Warmup Train [1][0/3239]	Time 51.902 (51.902)	Data 20.464 (20.464)	Loss 6.9597 (6.9597)	Top-1 acc 0.000 (0.000)	Top-5 acc 0.000 (0.000)	lr 0.05000
Warmup Train [1][10/3239]	Time 0.170 (5.459)	Data 0.001 (1.862)	Loss 6.8368 (6.8854)	Top-1 acc 0.000 (0.036)	Top-5 acc 0.000 (0.497)	lr 0.05000
Warmup Train [1][20/3239]	Time 0.292 (3.038)	Data 0.001 (0.977)	Loss 6.8015 (6.8352)	Top-1 acc 0.000 (0.130)	Top-5 acc 0.000 (0.707)	lr 0.05000
Warmup Train [1][30/3239]	Time 0.217 (2.151)	Data 0.001 (0.662)	Loss 6.7494 (6.8105)	Top-1 acc 0.000 (0.126)	Top-5 acc 0.781 (0.718)	lr 0.05000
Warmup Train [1][40/3239]	Time 0.145 (1.678)	Data 0.002 (0.501)	Loss 6.6977 (6.7904)	Top-1 acc 0.000 (0.200)	Top-5 acc 0.781 (0.753)	lr 0.05000
Warmup Train [1][50/3239]	Time 0.232 (1.391)	Data 0.001 (0.404)	Loss 6.7076 (6.7735)	Top-1 acc 0.000 (0.184)	Top-5 acc 0.391 (0.743)	lr 0.05000
Warmup Train [1][60/3239]	Time 0.199 (1.198)	Data 0.001 (0.338)	Loss 6.7164 (6.7626)	Top-1 acc 0.391 (0.179)	Top-5 acc 1.172 (0.756)	lr 0.05000
Warmup Train [1][70/3239]	Time 0.207 (1.060)	Data 0.001 (0.291)	Loss 6.7111 (6.7562)	Top-1 acc 0.000 (0.171)	Top-5 acc 0.391 (0.748)	lr 0.05000
Warmup Train [1][80/3239]	Time 0.222 (0.955)	Data 0.002 (0.255)	Loss 6.6764 (6.7501)	Top-1 acc 0.391 (0.174)	Top-5 acc 0.781 (0.776)	lr 0.05000
Warmup Train [1][90/3239]	Time 0.289 (0.890)	Data 0.001 (0.242)	Loss 6.7218 (6.7448)	Top-1 acc 0.000 (0.159)	Top-5 acc 0.000 (0.747)	lr 0.05000
Warmup Train [1][100/3239]	Time 0.178 (0.832)	Data 0.001 (0.229)	Loss 6.7009 (6.7405)	Top-1 acc 1.172 (0.166)	Top-5 acc 1.562 (0.750)	lr 0.05000
Warmup Train [1][110/3239]	Time 1.425 (0.827)	Data 1.241 (0.259)	Loss 6.7185 (6.7364)	Top-1 acc 0.000 (0.165)	Top-5 acc 0.391 (0.767)	lr 0.05000
Warmup Train [1][120/3239]	Time 0.216 (0.791)	Data 0.001 (0.246)	Loss 6.6768 (6.7334)	Top-1 acc 0.391 (0.171)	Top-5 acc 1.172 (0.794)	lr 0.05000
Warmup Train [1][130/3239]	Time 0.260 (0.767)	Data 0.002 (0.248)	Loss 6.7025 (6.7300)	Top-1 acc 0.000 (0.164)	Top-5 acc 0.781 (0.808)	lr 0.05000
Warmup Train [1][140/3239]	Time 0.255 (0.750)	Data 0.001 (0.252)	Loss 6.7211 (6.7278)	Top-1 acc 0.000 (0.169)	Top-5 acc 0.781 (0.837)	lr 0.05000
Warmup Train [1][150/3239]	Time 0.255 (0.752)	Data 0.001 (0.271)	Loss 6.6736 (6.7243)	Top-1 acc 0.000 (0.171)	Top-5 acc 0.781 (0.843)	lr 0.05000
Warmup Train [1][160/3239]	Time 0.229 (0.726)	Data 0.003 (0.262)	Loss 6.7074 (6.7218)	Top-1 acc 0.781 (0.167)	Top-5 acc 1.562 (0.837)	lr 0.05000
Warmup Train [1][170/3239]	Time 0.206 (0.734)	Data 0.002 (0.284)	Loss 6.6725 (6.7197)	Top-1 acc 0.391 (0.171)	Top-5 acc 1.172 (0.845)	lr 0.05000
Warmup Train [1][180/3239]	Time 0.210 (0.711)	Data 0.001 (0.274)	Loss 6.6799 (6.7179)	Top-1 acc 0.000 (0.170)	Top-5 acc 0.781 (0.848)	lr 0.05000
Warmup Train [1][190/3239]	Time 0.256 (0.689)	Data 0.001 (0.264)	Loss 6.6596 (6.7157)	Top-1 acc 0.000 (0.170)	Top-5 acc 1.172 (0.849)	lr 0.05000
Warmup Train [1][200/3239]	Time 2.817 (0.716)	Data 2.675 (0.302)	Loss 6.6708 (6.7146)	Top-1 acc 0.391 (0.177)	Top-5 acc 1.562 (0.857)	lr 0.05000
Warmup Train [1][210/3239]	Time 0.252 (0.693)	Data 0.001 (0.288)	Loss 6.6905 (6.7124)	Top-1 acc 0.000 (0.178)	Top-5 acc 1.953 (0.876)	lr 0.05000
Warmup Train [1][220/3239]	Time 0.201 (0.674)	Data 0.001 (0.278)	Loss 6.6272 (6.7106)	Top-1 acc 0.000 (0.179)	Top-5 acc 0.781 (0.882)	lr 0.05000
Warmup Train [1][230/3239]	Time 0.259 (0.676)	Data 0.001 (0.287)	Loss 6.6928 (6.7091)	Top-1 acc 0.000 (0.176)	Top-5 acc 0.781 (0.876)	lr 0.05000
Warmup Train [1][240/3239]	Time 0.254 (0.666)	Data 0.001 (0.285)	Loss 6.7000 (6.7077)	Top-1 acc 0.781 (0.183)	Top-5 acc 1.953 (0.880)	lr 0.05000
Warmup Train [1][250/3239]	Time 0.190 (0.659)	Data 0.002 (0.285)	Loss 6.6835 (6.7066)	Top-1 acc 0.391 (0.181)	Top-5 acc 1.953 (0.884)	lr 0.05000
Warmup Train [1][260/3239]	Time 0.196 (0.670)	Data 0.001 (0.303)	Loss 6.6703 (6.7057)	Top-1 acc 0.000 (0.180)	Top-5 acc 0.781 (0.889)	lr 0.05000
Warmup Train [1][270/3239]	Time 0.244 (0.665)	Data 0.001 (0.304)	Loss 6.6940 (6.7044)	Top-1 acc 0.000 (0.179)	Top-5 acc 0.391 (0.891)	lr 0.05000
Warmup Train [1][280/3239]	Time 0.226 (0.650)	Data 0.001 (0.293)	Loss 6.6803 (6.7036)	Top-1 acc 0.000 (0.181)	Top-5 acc 0.781 (0.892)	lr 0.05000
Warmup Train [1][290/3239]	Time 0.233 (0.635)	Data 0.001 (0.283)	Loss 6.6543 (6.7020)	Top-1 acc 0.000 (0.184)	Top-5 acc 0.391 (0.894)	lr 0.05000
Warmup Train [1][300/3239]	Time 0.292 (0.652)	Data 0.001 (0.304)	Loss 6.6525 (6.7009)	Top-1 acc 0.391 (0.183)	Top-5 acc 1.562 (0.894)	lr 0.05000
Warmup Train [1][310/3239]	Time 0.206 (0.639)	Data 0.002 (0.296)	Loss 6.6475 (6.7001)	Top-1 acc 0.000 (0.186)	Top-5 acc 0.781 (0.909)	lr 0.05000
Warmup Train [1][320/3239]	Time 0.215 (0.627)	Data 0.001 (0.286)	Loss 6.6821 (6.6993)	Top-1 acc 0.391 (0.187)	Top-5 acc 0.781 (0.911)	lr 0.05000
Warmup Train [1][330/3239]	Time 0.231 (0.642)	Data 0.001 (0.306)	Loss 6.6552 (6.6982)	Top-1 acc 0.391 (0.185)	Top-5 acc 1.562 (0.916)	lr 0.05000
Warmup Train [1][340/3239]	Time 0.225 (0.631)	Data 0.001 (0.298)	Loss 6.6755 (6.6973)	Top-1 acc 0.391 (0.194)	Top-5 acc 0.781 (0.928)	lr 0.05000
Warmup Train [1][350/3239]	Time 0.283 (0.620)	Data 0.001 (0.290)	Loss 6.6309 (6.6965)	Top-1 acc 0.391 (0.199)	Top-5 acc 0.391 (0.935)	lr 0.05000
Warmup Train [1][360/3239]	Time 0.553 (0.628)	Data 0.306 (0.301)	Loss 6.6216 (6.6957)	Top-1 acc 0.000 (0.201)	Top-5 acc 1.172 (0.942)	lr 0.05000
Warmup Train [1][370/3239]	Time 0.272 (0.617)	Data 0.002 (0.293)	Loss 6.6488 (6.6945)	Top-1 acc 0.000 (0.198)	Top-5 acc 0.391 (0.949)	lr 0.05000
Warmup Train [1][380/3239]	Time 0.223 (0.611)	Data 0.001 (0.290)	Loss 6.6516 (6.6939)	Top-1 acc 0.781 (0.200)	Top-5 acc 1.562 (0.950)	lr 0.05000
Warmup Train [1][390/3239]	Time 0.156 (0.616)	Data 0.001 (0.298)	Loss 6.6593 (6.6933)	Top-1 acc 0.000 (0.202)	Top-5 acc 0.781 (0.955)	lr 0.05000
Warmup Train [1][400/3239]	Time 0.217 (0.612)	Data 0.001 (0.297)	Loss 6.6468 (6.6927)	Top-1 acc 0.000 (0.202)	Top-5 acc 1.953 (0.957)	lr 0.05000
Warmup Train [1][410/3239]	Time 0.235 (0.608)	Data 0.001 (0.295)	Loss 6.7078 (6.6922)	Top-1 acc 0.000 (0.203)	Top-5 acc 1.172 (0.956)	lr 0.05000
Warmup Train [1][420/3239]	Time 0.207 (0.606)	Data 0.002 (0.295)	Loss 6.6592 (6.6915)	Top-1 acc 0.000 (0.204)	Top-5 acc 0.391 (0.963)	lr 0.05000
Warmup Train [1][430/3239]	Time 0.201 (0.608)	Data 0.001 (0.298)	Loss 6.7148 (6.6909)	Top-1 acc 0.000 (0.203)	Top-5 acc 0.000 (0.972)	lr 0.05000
Warmup Train [1][440/3239]	Time 0.246 (0.599)	Data 0.001 (0.292)	Loss 6.7257 (6.6902)	Top-1 acc 0.000 (0.204)	Top-5 acc 0.391 (0.976)	lr 0.05000
Warmup Train [1][450/3239]	Time 0.148 (0.598)	Data 0.001 (0.293)	Loss 6.6641 (6.6895)	Top-1 acc 0.000 (0.205)	Top-5 acc 0.781 (0.976)	lr 0.05000
Warmup Train [1][460/3239]	Time 0.215 (0.608)	Data 0.001 (0.305)	Loss 6.6730 (6.6893)	Top-1 acc 1.172 (0.206)	Top-5 acc 2.734 (0.977)	lr 0.05000
Warmup Train [1][470/3239]	Time 0.187 (0.600)	Data 0.002 (0.298)	Loss 6.6417 (6.6885)	Top-1 acc 0.000 (0.206)	Top-5 acc 0.781 (0.984)	lr 0.05000
Warmup Train [1][480/3239]	Time 0.988 (0.594)	Data 0.681 (0.294)	Loss 6.6520 (6.6880)	Top-1 acc 0.000 (0.209)	Top-5 acc 1.172 (0.983)	lr 0.05000
Warmup Train [1][490/3239]	Time 0.177 (0.599)	Data 0.002 (0.301)	Loss 6.6524 (6.6875)	Top-1 acc 0.000 (0.211)	Top-5 acc 1.562 (0.994)	lr 0.05000
Warmup Train [1][500/3239]	Time 0.180 (0.593)	Data 0.001 (0.297)	Loss 6.6616 (6.6868)	Top-1 acc 0.000 (0.211)	Top-5 acc 0.391 (0.998)	lr 0.05000
Warmup Train [1][510/3239]	Time 0.231 (0.591)	Data 0.001 (0.296)	Loss 6.6954 (6.6862)	Top-1 acc 0.391 (0.212)	Top-5 acc 0.391 (0.999)	lr 0.05000
Warmup Train [1][520/3239]	Time 4.602 (0.598)	Data 4.406 (0.305)	Loss 6.6475 (6.6857)	Top-1 acc 0.391 (0.214)	Top-5 acc 1.953 (1.008)	lr 0.05000
Warmup Train [1][530/3239]	Time 1.278 (0.593)	Data 0.001 (0.299)	Loss 6.6986 (6.6850)	Top-1 acc 0.000 (0.214)	Top-5 acc 0.000 (1.012)	lr 0.05000
Warmup Train [1][540/3239]	Time 0.207 (0.586)	Data 0.039 (0.294)	Loss 6.6884 (6.6847)	Top-1 acc 0.391 (0.215)	Top-5 acc 0.781 (1.012)	lr 0.05000
Warmup Train [1][550/3239]	Time 0.163 (0.587)	Data 0.001 (0.296)	Loss 6.6668 (6.6842)	Top-1 acc 0.000 (0.213)	Top-5 acc 0.391 (1.010)	lr 0.05000
Warmup Train [1][560/3239]	Time 0.179 (0.587)	Data 0.002 (0.298)	Loss 6.6333 (6.6835)	Top-1 acc 0.391 (0.214)	Top-5 acc 2.344 (1.015)	lr 0.05000
Warmup Train [1][570/3239]	Time 0.142 (0.581)	Data 0.001 (0.294)	Loss 6.6450 (6.6831)	Top-1 acc 0.391 (0.214)	Top-5 acc 1.172 (1.012)	lr 0.05000
Warmup Train [1][580/3239]	Time 0.208 (0.586)	Data 0.001 (0.300)	Loss 6.6250 (6.6826)	Top-1 acc 0.000 (0.214)	Top-5 acc 0.391 (1.013)	lr 0.05000
Warmup Train [1][590/3239]	Time 0.314 (0.584)	Data 0.001 (0.299)	Loss 6.6730 (6.6824)	Top-1 acc 0.000 (0.213)	Top-5 acc 0.781 (1.013)	lr 0.05000
Warmup Train [1][600/3239]	Time 0.210 (0.578)	Data 0.001 (0.294)	Loss 6.6838 (6.6822)	Top-1 acc 0.391 (0.214)	Top-5 acc 1.172 (1.012)	lr 0.05000
Warmup Train [1][610/3239]	Time 0.244 (0.579)	Data 0.001 (0.296)	Loss 6.5950 (6.6820)	Top-1 acc 0.391 (0.212)	Top-5 acc 1.172 (1.008)	lr 0.05000
Warmup Train [1][620/3239]	Time 0.219 (0.583)	Data 0.001 (0.301)	Loss 6.6679 (6.6816)	Top-1 acc 0.391 (0.210)	Top-5 acc 1.953 (1.009)	lr 0.05000
Warmup Train [1][630/3239]	Time 0.268 (0.577)	Data 0.001 (0.297)	Loss 6.6468 (6.6811)	Top-1 acc 0.000 (0.210)	Top-5 acc 0.781 (1.011)	lr 0.05000
Warmup Train [1][640/3239]	Time 2.378 (0.579)	Data 2.222 (0.300)	Loss 6.6873 (6.6807)	Top-1 acc 0.391 (0.211)	Top-5 acc 0.781 (1.013)	lr 0.05000
Warmup Train [1][650/3239]	Time 0.267 (0.577)	Data 0.001 (0.299)	Loss 6.5925 (6.6800)	Top-1 acc 0.391 (0.212)	Top-5 acc 1.562 (1.016)	lr 0.05000
Warmup Train [1][660/3239]	Time 0.218 (0.574)	Data 0.001 (0.297)	Loss 6.6482 (6.6796)	Top-1 acc 0.391 (0.212)	Top-5 acc 2.344 (1.021)	lr 0.05000
Warmup Train [1][670/3239]	Time 0.202 (0.574)	Data 0.001 (0.297)	Loss 6.6309 (6.6794)	Top-1 acc 0.000 (0.211)	Top-5 acc 0.781 (1.019)	lr 0.05000
Warmup Train [1][680/3239]	Time 2.926 (0.578)	Data 2.661 (0.302)	Loss 6.6624 (6.6789)	Top-1 acc 0.000 (0.212)	Top-5 acc 1.172 (1.019)	lr 0.05000
Warmup Train [1][690/3239]	Time 0.344 (0.573)	Data 0.002 (0.297)	Loss 6.5973 (6.6785)	Top-1 acc 0.391 (0.213)	Top-5 acc 1.172 (1.019)	lr 0.05000
Warmup Train [1][700/3239]	Time 0.249 (0.571)	Data 0.001 (0.296)	Loss 6.6263 (6.6783)	Top-1 acc 0.391 (0.212)	Top-5 acc 2.344 (1.024)	lr 0.05000
Warmup Train [1][710/3239]	Time 0.160 (0.571)	Data 0.001 (0.297)	Loss 6.6955 (6.6781)	Top-1 acc 0.391 (0.213)	Top-5 acc 0.391 (1.024)	lr 0.05000
Warmup Train [1][720/3239]	Time 0.240 (0.569)	Data 0.002 (0.296)	Loss 6.6620 (6.6778)	Top-1 acc 0.000 (0.213)	Top-5 acc 2.734 (1.028)	lr 0.05000
Warmup Train [1][730/3239]	Time 0.250 (0.568)	Data 0.002 (0.295)	Loss 6.6138 (6.6772)	Top-1 acc 0.000 (0.213)	Top-5 acc 0.391 (1.029)	lr 0.05000
Warmup Train [1][740/3239]	Time 0.181 (0.569)	Data 0.001 (0.297)	Loss 6.6071 (6.6770)	Top-1 acc 0.391 (0.215)	Top-5 acc 2.344 (1.033)	lr 0.05000
Warmup Train [1][750/3239]	Time 0.198 (0.567)	Data 0.001 (0.296)	Loss 6.7016 (6.6767)	Top-1 acc 0.000 (0.217)	Top-5 acc 0.391 (1.033)	lr 0.05000
Warmup Train [1][760/3239]	Time 0.189 (0.567)	Data 0.002 (0.296)	Loss 6.6713 (6.6764)	Top-1 acc 0.000 (0.217)	Top-5 acc 0.781 (1.035)	lr 0.05000
Warmup Train [1][770/3239]	Time 0.242 (0.567)	Data 0.002 (0.297)	Loss 6.6656 (6.6762)	Top-1 acc 0.000 (0.218)	Top-5 acc 0.391 (1.039)	lr 0.05000
Warmup Train [1][780/3239]	Time 0.214 (0.563)	Data 0.002 (0.293)	Loss 6.6578 (6.6757)	Top-1 acc 0.391 (0.220)	Top-5 acc 1.172 (1.042)	lr 0.05000
Warmup Train [1][790/3239]	Time 0.218 (0.564)	Data 0.002 (0.296)	Loss 6.6166 (6.6753)	Top-1 acc 0.391 (0.221)	Top-5 acc 1.562 (1.042)	lr 0.05000
Warmup Train [1][800/3239]	Time 4.247 (0.566)	Data 3.986 (0.298)	Loss 6.6171 (6.6751)	Top-1 acc 0.000 (0.220)	Top-5 acc 1.562 (1.041)	lr 0.05000
Warmup Train [1][810/3239]	Time 0.183 (0.564)	Data 0.001 (0.296)	Loss 6.6325 (6.6747)	Top-1 acc 0.000 (0.222)	Top-5 acc 0.781 (1.046)	lr 0.05000
Warmup Train [1][820/3239]	Time 0.198 (0.565)	Data 0.002 (0.298)	Loss 6.6461 (6.6745)	Top-1 acc 0.000 (0.221)	Top-5 acc 1.172 (1.045)	lr 0.05000
Warmup Train [1][830/3239]	Time 0.226 (0.561)	Data 0.002 (0.294)	Loss 6.6433 (6.6743)	Top-1 acc 0.000 (0.220)	Top-5 acc 1.562 (1.044)	lr 0.04999
Warmup Train [1][840/3239]	Time 0.841 (0.560)	Data 0.605 (0.294)	Loss 6.6695 (6.6740)	Top-1 acc 0.000 (0.220)	Top-5 acc 1.172 (1.047)	lr 0.04999
Warmup Train [1][850/3239]	Time 0.239 (0.556)	Data 0.002 (0.291)	Loss 6.6934 (6.6735)	Top-1 acc 0.000 (0.220)	Top-5 acc 0.391 (1.049)	lr 0.04999
Warmup Train [1][860/3239]	Time 0.230 (0.562)	Data 0.001 (0.297)	Loss 6.6810 (6.6734)	Top-1 acc 0.000 (0.219)	Top-5 acc 0.391 (1.048)	lr 0.04999
Warmup Train [1][870/3239]	Time 0.235 (0.558)	Data 0.026 (0.294)	Loss 6.6570 (6.6731)	Top-1 acc 0.781 (0.220)	Top-5 acc 1.172 (1.049)	lr 0.04999
Warmup Train [1][880/3239]	Time 0.173 (0.555)	Data 0.002 (0.292)	Loss 6.6141 (6.6729)	Top-1 acc 0.391 (0.220)	Top-5 acc 1.172 (1.050)	lr 0.04999
Warmup Train [1][890/3239]	Time 0.269 (0.557)	Data 0.002 (0.294)	Loss 6.6580 (6.6727)	Top-1 acc 0.000 (0.223)	Top-5 acc 0.781 (1.053)	lr 0.04999
Warmup Train [1][900/3239]	Time 0.265 (0.556)	Data 0.001 (0.293)	Loss 6.6227 (6.6723)	Top-1 acc 0.000 (0.225)	Top-5 acc 1.172 (1.055)	lr 0.04999
Warmup Train [1][910/3239]	Time 0.147 (0.556)	Data 0.001 (0.293)	Loss 6.6228 (6.6720)	Top-1 acc 1.562 (0.225)	Top-5 acc 2.734 (1.060)	lr 0.04999
Warmup Train [1][920/3239]	Time 0.224 (0.558)	Data 0.001 (0.296)	Loss 6.6597 (6.6716)	Top-1 acc 0.000 (0.224)	Top-5 acc 0.000 (1.057)	lr 0.04999
Warmup Train [1][930/3239]	Time 0.166 (0.555)	Data 0.003 (0.294)	Loss 6.5819 (6.6714)	Top-1 acc 0.000 (0.223)	Top-5 acc 1.953 (1.058)	lr 0.04999
Warmup Train [1][940/3239]	Time 0.256 (0.556)	Data 0.002 (0.294)	Loss 6.6903 (6.6710)	Top-1 acc 0.000 (0.223)	Top-5 acc 1.172 (1.059)	lr 0.04999
Warmup Train [1][950/3239]	Time 0.221 (0.556)	Data 0.002 (0.295)	Loss 6.6633 (6.6708)	Top-1 acc 0.000 (0.225)	Top-5 acc 1.172 (1.060)	lr 0.04999
Warmup Train [1][960/3239]	Time 1.361 (0.554)	Data 1.082 (0.293)	Loss 6.6495 (6.6704)	Top-1 acc 0.000 (0.224)	Top-5 acc 0.781 (1.064)	lr 0.04999
Warmup Train [1][970/3239]	Time 0.247 (0.558)	Data 0.001 (0.298)	Loss 6.6099 (6.6701)	Top-1 acc 0.391 (0.226)	Top-5 acc 1.562 (1.068)	lr 0.04999
Warmup Train [1][980/3239]	Time 0.192 (0.558)	Data 0.001 (0.298)	Loss 6.6745 (6.6699)	Top-1 acc 0.781 (0.227)	Top-5 acc 2.344 (1.072)	lr 0.04999
Warmup Train [1][990/3239]	Time 0.215 (0.554)	Data 0.002 (0.295)	Loss 6.6237 (6.6697)	Top-1 acc 0.391 (0.226)	Top-5 acc 1.172 (1.069)	lr 0.04999
Warmup Train [1][1000/3239]	Time 3.823 (0.556)	Data 3.654 (0.297)	Loss 6.6671 (6.6694)	Top-1 acc 0.391 (0.229)	Top-5 acc 1.172 (1.072)	lr 0.04999
Warmup Train [1][1010/3239]	Time 0.212 (0.553)	Data 0.001 (0.294)	Loss 6.6224 (6.6691)	Top-1 acc 0.000 (0.229)	Top-5 acc 0.000 (1.073)	lr 0.04999
Warmup Train [1][1020/3239]	Time 0.219 (0.553)	Data 0.001 (0.295)	Loss 6.6154 (6.6688)	Top-1 acc 0.000 (0.231)	Top-5 acc 0.781 (1.073)	lr 0.04999
Warmup Train [1][1030/3239]	Time 0.226 (0.550)	Data 0.001 (0.292)	Loss 6.6451 (6.6686)	Top-1 acc 0.000 (0.229)	Top-5 acc 0.391 (1.071)	lr 0.04999
Warmup Train [1][1040/3239]	Time 0.249 (0.553)	Data 0.001 (0.296)	Loss 6.6594 (6.6684)	Top-1 acc 0.000 (0.229)	Top-5 acc 0.781 (1.072)	lr 0.04999
Warmup Train [1][1050/3239]	Time 0.211 (0.553)	Data 0.001 (0.296)	Loss 6.6731 (6.6681)	Top-1 acc 0.781 (0.230)	Top-5 acc 1.562 (1.075)	lr 0.04999
Warmup Train [1][1060/3239]	Time 0.175 (0.551)	Data 0.002 (0.294)	Loss 6.6377 (6.6677)	Top-1 acc 0.000 (0.232)	Top-5 acc 1.562 (1.084)	lr 0.04999
Warmup Train [1][1070/3239]	Time 0.202 (0.552)	Data 0.001 (0.295)	Loss 6.6545 (6.6675)	Top-1 acc 0.391 (0.232)	Top-5 acc 1.172 (1.083)	lr 0.04999
Warmup Train [1][1080/3239]	Time 0.208 (0.550)	Data 0.001 (0.294)	Loss 6.6367 (6.6673)	Top-1 acc 0.391 (0.232)	Top-5 acc 1.562 (1.084)	lr 0.04999
Warmup Train [1][1090/3239]	Time 0.165 (0.549)	Data 0.002 (0.293)	Loss 6.6288 (6.6671)	Top-1 acc 0.781 (0.233)	Top-5 acc 1.562 (1.085)	lr 0.04999
Warmup Train [1][1100/3239]	Time 0.229 (0.551)	Data 0.002 (0.295)	Loss 6.6213 (6.6670)	Top-1 acc 0.000 (0.232)	Top-5 acc 0.391 (1.083)	lr 0.04999
Warmup Train [1][1110/3239]	Time 0.348 (0.549)	Data 0.001 (0.294)	Loss 6.6290 (6.6667)	Top-1 acc 0.781 (0.232)	Top-5 acc 1.562 (1.084)	lr 0.04999
Warmup Train [1][1120/3239]	Time 3.237 (0.549)	Data 2.981 (0.294)	Loss 6.7008 (6.6664)	Top-1 acc 0.000 (0.231)	Top-5 acc 0.391 (1.083)	lr 0.04999
Warmup Train [1][1130/3239]	Time 0.265 (0.548)	Data 0.001 (0.293)	Loss 6.6866 (6.6661)	Top-1 acc 0.391 (0.232)	Top-5 acc 1.172 (1.084)	lr 0.04999
Warmup Train [1][1140/3239]	Time 0.178 (0.547)	Data 0.002 (0.293)	Loss 6.6877 (6.6661)	Top-1 acc 0.000 (0.231)	Top-5 acc 0.781 (1.084)	lr 0.04999
Warmup Train [1][1150/3239]	Time 0.195 (0.545)	Data 0.001 (0.290)	Loss 6.6371 (6.6658)	Top-1 acc 0.391 (0.231)	Top-5 acc 0.781 (1.082)	lr 0.04999
Warmup Train [1][1160/3239]	Time 4.198 (0.549)	Data 3.992 (0.295)	Loss 6.6413 (6.6655)	Top-1 acc 0.000 (0.230)	Top-5 acc 0.391 (1.079)	lr 0.04999
Warmup Train [1][1170/3239]	Time 0.214 (0.546)	Data 0.001 (0.292)	Loss 6.5717 (6.6651)	Top-1 acc 0.000 (0.231)	Top-5 acc 1.172 (1.081)	lr 0.04999
Warmup Train [1][1180/3239]	Time 0.264 (0.545)	Data 0.001 (0.292)	Loss 6.6870 (6.6650)	Top-1 acc 0.391 (0.230)	Top-5 acc 0.391 (1.080)	lr 0.04999
Warmup Train [1][1190/3239]	Time 0.220 (0.545)	Data 0.006 (0.292)	Loss 6.6201 (6.6647)	Top-1 acc 0.781 (0.230)	Top-5 acc 2.344 (1.080)	lr 0.04999
Warmup Train [1][1200/3239]	Time 0.324 (0.544)	Data 0.001 (0.292)	Loss 6.6629 (6.6644)	Top-1 acc 0.391 (0.230)	Top-5 acc 1.172 (1.081)	lr 0.04999
Warmup Train [1][1210/3239]	Time 0.266 (0.544)	Data 0.001 (0.291)	Loss 6.6100 (6.6640)	Top-1 acc 0.391 (0.231)	Top-5 acc 1.172 (1.082)	lr 0.04999
Warmup Train [1][1220/3239]	Time 0.330 (0.545)	Data 0.002 (0.293)	Loss 6.6421 (6.6638)	Top-1 acc 0.000 (0.231)	Top-5 acc 0.781 (1.085)	lr 0.04999
Warmup Train [1][1230/3239]	Time 0.959 (0.544)	Data 0.706 (0.292)	Loss 6.6056 (6.6635)	Top-1 acc 0.391 (0.232)	Top-5 acc 0.781 (1.086)	lr 0.04999
Warmup Train [1][1240/3239]	Time 0.302 (0.544)	Data 0.001 (0.292)	Loss 6.5868 (6.6632)	Top-1 acc 0.000 (0.231)	Top-5 acc 0.781 (1.085)	lr 0.04999
Warmup Train [1][1250/3239]	Time 0.197 (0.543)	Data 0.001 (0.292)	Loss 6.6302 (6.6630)	Top-1 acc 0.391 (0.232)	Top-5 acc 1.562 (1.087)	lr 0.04999
Warmup Train [1][1260/3239]	Time 0.169 (0.542)	Data 0.001 (0.291)	Loss 6.5942 (6.6627)	Top-1 acc 0.000 (0.233)	Top-5 acc 1.562 (1.088)	lr 0.04999
Warmup Train [1][1270/3239]	Time 0.220 (0.542)	Data 0.002 (0.291)	Loss 6.6574 (6.6625)	Top-1 acc 0.000 (0.233)	Top-5 acc 0.391 (1.090)	lr 0.04999
Warmup Train [1][1280/3239]	Time 1.872 (0.541)	Data 1.662 (0.290)	Loss 6.6538 (6.6622)	Top-1 acc 0.000 (0.233)	Top-5 acc 0.781 (1.090)	lr 0.04999
Warmup Train [1][1290/3239]	Time 0.199 (0.543)	Data 0.002 (0.293)	Loss 6.5631 (6.6620)	Top-1 acc 0.391 (0.232)	Top-5 acc 1.953 (1.090)	lr 0.04999
Warmup Train [1][1300/3239]	Time 0.193 (0.542)	Data 0.001 (0.292)	Loss 6.6073 (6.6617)	Top-1 acc 0.781 (0.234)	Top-5 acc 3.125 (1.095)	lr 0.04999
Warmup Train [1][1310/3239]	Time 0.195 (0.540)	Data 0.001 (0.290)	Loss 6.6630 (6.6616)	Top-1 acc 0.781 (0.234)	Top-5 acc 1.562 (1.096)	lr 0.04999
Warmup Train [1][1320/3239]	Time 1.481 (0.540)	Data 1.156 (0.291)	Loss 6.6455 (6.6613)	Top-1 acc 0.000 (0.234)	Top-5 acc 0.781 (1.095)	lr 0.04999
Warmup Train [1][1330/3239]	Time 0.144 (0.541)	Data 0.001 (0.292)	Loss 6.6345 (6.6610)	Top-1 acc 0.391 (0.236)	Top-5 acc 1.172 (1.100)	lr 0.04999
Warmup Train [1][1340/3239]	Time 0.233 (0.539)	Data 0.001 (0.290)	Loss 6.6069 (6.6607)	Top-1 acc 0.000 (0.236)	Top-5 acc 0.391 (1.101)	lr 0.04999
Warmup Train [1][1350/3239]	Time 0.186 (0.540)	Data 0.001 (0.292)	Loss 6.6384 (6.6606)	Top-1 acc 0.391 (0.237)	Top-5 acc 2.734 (1.100)	lr 0.04999
Warmup Train [1][1360/3239]	Time 0.176 (0.542)	Data 0.001 (0.293)	Loss 6.6451 (6.6604)	Top-1 acc 0.000 (0.236)	Top-5 acc 1.172 (1.102)	lr 0.04999
Warmup Train [1][1370/3239]	Time 0.200 (0.541)	Data 0.002 (0.293)	Loss 6.6509 (6.6603)	Top-1 acc 0.000 (0.235)	Top-5 acc 0.391 (1.101)	lr 0.04999
Warmup Train [1][1380/3239]	Time 0.203 (0.541)	Data 0.001 (0.293)	Loss 6.6196 (6.6600)	Top-1 acc 0.391 (0.236)	Top-5 acc 1.953 (1.104)	lr 0.04999
Warmup Train [1][1390/3239]	Time 3.401 (0.541)	Data 3.096 (0.293)	Loss 6.6334 (6.6599)	Top-1 acc 0.781 (0.236)	Top-5 acc 1.172 (1.105)	lr 0.04999
Warmup Train [1][1400/3239]	Time 0.284 (0.541)	Data 0.002 (0.293)	Loss 6.6745 (6.6597)	Top-1 acc 0.391 (0.237)	Top-5 acc 2.344 (1.109)	lr 0.04999
Warmup Train [1][1410/3239]	Time 0.277 (0.541)	Data 0.001 (0.293)	Loss 6.6720 (6.6594)	Top-1 acc 0.391 (0.236)	Top-5 acc 0.781 (1.108)	lr 0.04999
Warmup Train [1][1420/3239]	Time 0.215 (0.539)	Data 0.001 (0.291)	Loss 6.6218 (6.6592)	Top-1 acc 0.781 (0.237)	Top-5 acc 1.562 (1.111)	lr 0.04999
Warmup Train [1][1430/3239]	Time 0.200 (0.540)	Data 0.001 (0.293)	Loss 6.6394 (6.6591)	Top-1 acc 0.391 (0.237)	Top-5 acc 0.781 (1.112)	lr 0.04998
Warmup Train [1][1440/3239]	Time 1.975 (0.539)	Data 1.788 (0.292)	Loss 6.6382 (6.6588)	Top-1 acc 0.391 (0.238)	Top-5 acc 1.953 (1.113)	lr 0.04998
Warmup Train [1][1450/3239]	Time 0.221 (0.539)	Data 0.001 (0.292)	Loss 6.6124 (6.6586)	Top-1 acc 0.000 (0.237)	Top-5 acc 1.562 (1.114)	lr 0.04998
Warmup Train [1][1460/3239]	Time 0.162 (0.540)	Data 0.001 (0.293)	Loss 6.6316 (6.6583)	Top-1 acc 0.000 (0.237)	Top-5 acc 0.781 (1.113)	lr 0.04998
Warmup Train [1][1470/3239]	Time 0.233 (0.538)	Data 0.002 (0.291)	Loss 6.6015 (6.6581)	Top-1 acc 0.391 (0.236)	Top-5 acc 1.172 (1.115)	lr 0.04998
Warmup Train [1][1480/3239]	Time 3.954 (0.540)	Data 3.667 (0.294)	Loss 6.6155 (6.6579)	Top-1 acc 0.391 (0.238)	Top-5 acc 1.562 (1.116)	lr 0.04998
Warmup Train [1][1490/3239]	Time 0.258 (0.538)	Data 0.001 (0.292)	Loss 6.6808 (6.6576)	Top-1 acc 0.000 (0.238)	Top-5 acc 0.000 (1.117)	lr 0.04998
Warmup Train [1][1500/3239]	Time 0.195 (0.537)	Data 0.001 (0.291)	Loss 6.6007 (6.6574)	Top-1 acc 0.000 (0.238)	Top-5 acc 1.172 (1.116)	lr 0.04998
Warmup Train [1][1510/3239]	Time 0.248 (0.535)	Data 0.001 (0.290)	Loss 6.6229 (6.6572)	Top-1 acc 0.000 (0.238)	Top-5 acc 0.391 (1.118)	lr 0.04998
Warmup Train [1][1520/3239]	Time 0.216 (0.536)	Data 0.001 (0.291)	Loss 6.6165 (6.6569)	Top-1 acc 0.000 (0.238)	Top-5 acc 0.781 (1.116)	lr 0.04998
Warmup Train [1][1530/3239]	Time 0.306 (0.536)	Data 0.002 (0.291)	Loss 6.6456 (6.6567)	Top-1 acc 0.000 (0.237)	Top-5 acc 0.000 (1.117)	lr 0.04998
Warmup Train [1][1540/3239]	Time 0.201 (0.535)	Data 0.001 (0.290)	Loss 6.6266 (6.6565)	Top-1 acc 0.000 (0.237)	Top-5 acc 1.172 (1.117)	lr 0.04998
Warmup Train [1][1550/3239]	Time 0.229 (0.538)	Data 0.002 (0.292)	Loss 6.6303 (6.6562)	Top-1 acc 0.391 (0.237)	Top-5 acc 0.781 (1.120)	lr 0.04998
Warmup Train [1][1560/3239]	Time 0.236 (0.536)	Data 0.001 (0.291)	Loss 6.6061 (6.6560)	Top-1 acc 0.781 (0.237)	Top-5 acc 1.953 (1.118)	lr 0.04998
Warmup Train [1][1570/3239]	Time 0.213 (0.534)	Data 0.001 (0.289)	Loss 6.5931 (6.6558)	Top-1 acc 0.000 (0.237)	Top-5 acc 0.391 (1.120)	lr 0.04998
Warmup Train [1][1580/3239]	Time 0.217 (0.536)	Data 0.001 (0.291)	Loss 6.6065 (6.6556)	Top-1 acc 0.000 (0.238)	Top-5 acc 1.172 (1.122)	lr 0.04998
Warmup Train [1][1590/3239]	Time 0.156 (0.536)	Data 0.001 (0.291)	Loss 6.6557 (6.6554)	Top-1 acc 0.391 (0.238)	Top-5 acc 1.953 (1.125)	lr 0.04998
Warmup Train [1][1600/3239]	Time 0.217 (0.534)	Data 0.001 (0.289)	Loss 6.6070 (6.6551)	Top-1 acc 0.000 (0.237)	Top-5 acc 0.781 (1.126)	lr 0.04998
Warmup Train [1][1610/3239]	Time 0.238 (0.536)	Data 0.002 (0.292)	Loss 6.6171 (6.6549)	Top-1 acc 0.391 (0.238)	Top-5 acc 1.562 (1.129)	lr 0.04998
Warmup Train [1][1620/3239]	Time 0.211 (0.536)	Data 0.001 (0.292)	Loss 6.5906 (6.6547)	Top-1 acc 0.000 (0.237)	Top-5 acc 1.172 (1.131)	lr 0.04998
Warmup Train [1][1630/3239]	Time 0.290 (0.534)	Data 0.002 (0.290)	Loss 6.6382 (6.6544)	Top-1 acc 0.000 (0.238)	Top-5 acc 0.781 (1.134)	lr 0.04998
Warmup Train [1][1640/3239]	Time 2.443 (0.536)	Data 2.246 (0.293)	Loss 6.6006 (6.6543)	Top-1 acc 0.781 (0.240)	Top-5 acc 1.172 (1.137)	lr 0.04998
Warmup Train [1][1650/3239]	Time 0.224 (0.537)	Data 0.001 (0.293)	Loss 6.6020 (6.6540)	Top-1 acc 0.000 (0.240)	Top-5 acc 1.172 (1.138)	lr 0.04998
Warmup Train [1][1660/3239]	Time 0.201 (0.535)	Data 0.001 (0.291)	Loss 6.5788 (6.6538)	Top-1 acc 0.781 (0.241)	Top-5 acc 1.172 (1.141)	lr 0.04998
Warmup Train [1][1670/3239]	Time 0.230 (0.533)	Data 0.001 (0.290)	Loss 6.5828 (6.6535)	Top-1 acc 0.781 (0.242)	Top-5 acc 2.344 (1.145)	lr 0.04998
Warmup Train [1][1680/3239]	Time 0.166 (0.536)	Data 0.001 (0.293)	Loss 6.5834 (6.6533)	Top-1 acc 0.000 (0.242)	Top-5 acc 1.172 (1.145)	lr 0.04998
Warmup Train [1][1690/3239]	Time 0.223 (0.535)	Data 0.001 (0.292)	Loss 6.6375 (6.6530)	Top-1 acc 0.000 (0.244)	Top-5 acc 0.000 (1.146)	lr 0.04998
Warmup Train [1][1700/3239]	Time 0.213 (0.533)	Data 0.001 (0.290)	Loss 6.6000 (6.6528)	Top-1 acc 0.391 (0.243)	Top-5 acc 1.172 (1.146)	lr 0.04998
Warmup Train [1][1710/3239]	Time 2.255 (0.537)	Data 2.005 (0.294)	Loss 6.6166 (6.6525)	Top-1 acc 0.000 (0.243)	Top-5 acc 0.391 (1.146)	lr 0.04998
Warmup Train [1][1720/3239]	Time 0.257 (0.535)	Data 0.001 (0.292)	Loss 6.6261 (6.6523)	Top-1 acc 1.172 (0.244)	Top-5 acc 1.953 (1.149)	lr 0.04998
Warmup Train [1][1730/3239]	Time 0.337 (0.533)	Data 0.001 (0.290)	Loss 6.6177 (6.6520)	Top-1 acc 0.781 (0.245)	Top-5 acc 1.172 (1.151)	lr 0.04998
Warmup Train [1][1740/3239]	Time 0.177 (0.535)	Data 0.001 (0.292)	Loss 6.5923 (6.6518)	Top-1 acc 0.391 (0.244)	Top-5 acc 0.781 (1.151)	lr 0.04998
Warmup Train [1][1750/3239]	Time 0.229 (0.534)	Data 0.003 (0.292)	Loss 6.6130 (6.6516)	Top-1 acc 0.391 (0.245)	Top-5 acc 0.391 (1.151)	lr 0.04998
Warmup Train [1][1760/3239]	Time 0.212 (0.532)	Data 0.001 (0.290)	Loss 6.6237 (6.6514)	Top-1 acc 0.000 (0.244)	Top-5 acc 0.391 (1.149)	lr 0.04998
Warmup Train [1][1770/3239]	Time 0.208 (0.535)	Data 0.002 (0.293)	Loss 6.6337 (6.6512)	Top-1 acc 0.391 (0.244)	Top-5 acc 0.781 (1.150)	lr 0.04998
Warmup Train [1][1780/3239]	Time 0.191 (0.534)	Data 0.001 (0.292)	Loss 6.6179 (6.6509)	Top-1 acc 0.000 (0.245)	Top-5 acc 1.172 (1.152)	lr 0.04998
Warmup Train [1][1790/3239]	Time 0.198 (0.532)	Data 0.001 (0.291)	Loss 6.6204 (6.6507)	Top-1 acc 0.391 (0.244)	Top-5 acc 1.172 (1.153)	lr 0.04998
Warmup Train [1][1800/3239]	Time 5.185 (0.534)	Data 4.973 (0.293)	Loss 6.5915 (6.6504)	Top-1 acc 0.391 (0.245)	Top-5 acc 1.172 (1.154)	lr 0.04998
Warmup Train [1][1810/3239]	Time 0.223 (0.533)	Data 0.001 (0.292)	Loss 6.5830 (6.6503)	Top-1 acc 0.391 (0.245)	Top-5 acc 1.562 (1.154)	lr 0.04998
Warmup Train [1][1820/3239]	Time 0.201 (0.532)	Data 0.002 (0.290)	Loss 6.6061 (6.6500)	Top-1 acc 0.391 (0.244)	Top-5 acc 0.781 (1.156)	lr 0.04998
Warmup Train [1][1830/3239]	Time 0.167 (0.532)	Data 0.001 (0.291)	Loss 6.6104 (6.6499)	Top-1 acc 0.000 (0.244)	Top-5 acc 1.172 (1.156)	lr 0.04998
Warmup Train [1][1840/3239]	Time 0.227 (0.535)	Data 0.001 (0.294)	Loss 6.5985 (6.6497)	Top-1 acc 0.000 (0.243)	Top-5 acc 0.781 (1.157)	lr 0.04998
Warmup Train [1][1850/3239]	Time 0.150 (0.533)	Data 0.001 (0.292)	Loss 6.6337 (6.6494)	Top-1 acc 0.391 (0.243)	Top-5 acc 1.172 (1.156)	lr 0.04997
Warmup Train [1][1860/3239]	Time 0.215 (0.531)	Data 0.001 (0.291)	Loss 6.5969 (6.6492)	Top-1 acc 0.391 (0.244)	Top-5 acc 0.781 (1.156)	lr 0.04997
Warmup Train [1][1870/3239]	Time 3.215 (0.534)	Data 3.043 (0.294)	Loss 6.5777 (6.6490)	Top-1 acc 0.000 (0.244)	Top-5 acc 2.734 (1.157)	lr 0.04997
Warmup Train [1][1880/3239]	Time 0.203 (0.533)	Data 0.001 (0.292)	Loss 6.6143 (6.6488)	Top-1 acc 0.000 (0.244)	Top-5 acc 1.172 (1.158)	lr 0.04997
Warmup Train [1][1890/3239]	Time 0.225 (0.531)	Data 0.001 (0.291)	Loss 6.6059 (6.6485)	Top-1 acc 0.000 (0.245)	Top-5 acc 0.781 (1.161)	lr 0.04997
Warmup Train [1][1900/3239]	Time 0.210 (0.533)	Data 0.001 (0.292)	Loss 6.6083 (6.6483)	Top-1 acc 0.391 (0.246)	Top-5 acc 2.344 (1.164)	lr 0.04997
Warmup Train [1][1910/3239]	Time 0.129 (0.533)	Data 0.001 (0.293)	Loss 6.5885 (6.6481)	Top-1 acc 0.000 (0.246)	Top-5 acc 0.781 (1.163)	lr 0.04997
Warmup Train [1][1920/3239]	Time 0.281 (0.532)	Data 0.001 (0.292)	Loss 6.5940 (6.6479)	Top-1 acc 0.391 (0.247)	Top-5 acc 1.562 (1.164)	lr 0.04997
Warmup Train [1][1930/3239]	Time 0.384 (0.533)	Data 0.002 (0.293)	Loss 6.5623 (6.6476)	Top-1 acc 0.000 (0.247)	Top-5 acc 1.562 (1.165)	lr 0.04997
Warmup Train [1][1940/3239]	Time 0.202 (0.532)	Data 0.002 (0.292)	Loss 6.6038 (6.6474)	Top-1 acc 0.391 (0.247)	Top-5 acc 1.172 (1.165)	lr 0.04997
Warmup Train [1][1950/3239]	Time 0.254 (0.530)	Data 0.001 (0.290)	Loss 6.5953 (6.6473)	Top-1 acc 0.000 (0.246)	Top-5 acc 1.562 (1.165)	lr 0.04997
Warmup Train [1][1960/3239]	Time 3.219 (0.533)	Data 2.963 (0.293)	Loss 6.6135 (6.6471)	Top-1 acc 0.391 (0.246)	Top-5 acc 3.125 (1.166)	lr 0.04997
Warmup Train [1][1970/3239]	Time 0.207 (0.532)	Data 0.001 (0.293)	Loss 6.5815 (6.6469)	Top-1 acc 0.391 (0.246)	Top-5 acc 1.172 (1.165)	lr 0.04997
Warmup Train [1][1980/3239]	Time 0.153 (0.531)	Data 0.001 (0.291)	Loss 6.6000 (6.6466)	Top-1 acc 1.172 (0.246)	Top-5 acc 1.562 (1.167)	lr 0.04997
Warmup Train [1][1990/3239]	Time 0.247 (0.530)	Data 0.002 (0.290)	Loss 6.6225 (6.6464)	Top-1 acc 0.000 (0.246)	Top-5 acc 1.172 (1.166)	lr 0.04997
Warmup Train [1][2000/3239]	Time 0.223 (0.533)	Data 0.002 (0.293)	Loss 6.6450 (6.6462)	Top-1 acc 0.000 (0.246)	Top-5 acc 0.781 (1.167)	lr 0.04997
Warmup Train [1][2010/3239]	Time 0.177 (0.531)	Data 0.001 (0.292)	Loss 6.5948 (6.6460)	Top-1 acc 0.000 (0.246)	Top-5 acc 1.172 (1.169)	lr 0.04997
Warmup Train [1][2020/3239]	Time 0.329 (0.530)	Data 0.001 (0.291)	Loss 6.5862 (6.6458)	Top-1 acc 0.000 (0.246)	Top-5 acc 1.562 (1.169)	lr 0.04997
Warmup Train [1][2030/3239]	Time 0.836 (0.531)	Data 0.694 (0.292)	Loss 6.5857 (6.6456)	Top-1 acc 0.000 (0.247)	Top-5 acc 1.562 (1.168)	lr 0.04997
Warmup Train [1][2040/3239]	Time 0.244 (0.530)	Data 0.001 (0.291)	Loss 6.5896 (6.6454)	Top-1 acc 0.000 (0.246)	Top-5 acc 3.125 (1.170)	lr 0.04997
Warmup Train [1][2050/3239]	Time 0.254 (0.528)	Data 0.001 (0.289)	Loss 6.6433 (6.6452)	Top-1 acc 0.391 (0.247)	Top-5 acc 1.562 (1.171)	lr 0.04997
Warmup Train [1][2060/3239]	Time 0.147 (0.532)	Data 0.001 (0.294)	Loss 6.5966 (6.6450)	Top-1 acc 0.000 (0.247)	Top-5 acc 0.781 (1.171)	lr 0.04997
Warmup Train [1][2070/3239]	Time 0.216 (0.531)	Data 0.001 (0.292)	Loss 6.6269 (6.6448)	Top-1 acc 0.000 (0.248)	Top-5 acc 0.781 (1.172)	lr 0.04997
Warmup Train [1][2080/3239]	Time 0.227 (0.529)	Data 0.002 (0.291)	Loss 6.5962 (6.6446)	Top-1 acc 0.391 (0.249)	Top-5 acc 1.562 (1.174)	lr 0.04997
Warmup Train [1][2090/3239]	Time 0.193 (0.533)	Data 0.002 (0.294)	Loss 6.5740 (6.6443)	Top-1 acc 0.000 (0.249)	Top-5 acc 1.562 (1.176)	lr 0.04997
Warmup Train [1][2100/3239]	Time 0.215 (0.531)	Data 0.002 (0.293)	Loss 6.6129 (6.6442)	Top-1 acc 0.000 (0.250)	Top-5 acc 0.781 (1.177)	lr 0.04997
Warmup Train [1][2110/3239]	Time 0.245 (0.530)	Data 0.002 (0.291)	Loss 6.5869 (6.6440)	Top-1 acc 0.391 (0.249)	Top-5 acc 1.172 (1.178)	lr 0.04997
Warmup Train [1][2120/3239]	Time 9.666 (0.533)	Data 9.327 (0.294)	Loss 6.5930 (6.6438)	Top-1 acc 0.000 (0.250)	Top-5 acc 1.172 (1.182)	lr 0.04997
Warmup Train [1][2130/3239]	Time 0.191 (0.531)	Data 0.001 (0.293)	Loss 6.5684 (6.6435)	Top-1 acc 0.391 (0.250)	Top-5 acc 1.562 (1.183)	lr 0.04997
Warmup Train [1][2140/3239]	Time 0.226 (0.530)	Data 0.001 (0.292)	Loss 6.6247 (6.6433)	Top-1 acc 0.000 (0.250)	Top-5 acc 0.781 (1.184)	lr 0.04997
Warmup Train [1][2150/3239]	Time 0.252 (0.529)	Data 0.001 (0.290)	Loss 6.5881 (6.6431)	Top-1 acc 0.391 (0.250)	Top-5 acc 0.781 (1.184)	lr 0.04997
Warmup Train [1][2160/3239]	Time 0.197 (0.530)	Data 0.001 (0.292)	Loss 6.5859 (6.6429)	Top-1 acc 0.000 (0.251)	Top-5 acc 1.953 (1.185)	lr 0.04997
Warmup Train [1][2170/3239]	Time 0.268 (0.529)	Data 0.002 (0.291)	Loss 6.6242 (6.6427)	Top-1 acc 0.391 (0.251)	Top-5 acc 1.562 (1.185)	lr 0.04997
Warmup Train [1][2180/3239]	Time 0.229 (0.528)	Data 0.001 (0.290)	Loss 6.6380 (6.6425)	Top-1 acc 0.391 (0.252)	Top-5 acc 2.344 (1.188)	lr 0.04997
Warmup Train [1][2190/3239]	Time 0.944 (0.531)	Data 0.705 (0.293)	Loss 6.6489 (6.6423)	Top-1 acc 0.000 (0.253)	Top-5 acc 2.344 (1.190)	lr 0.04996
Warmup Train [1][2200/3239]	Time 0.205 (0.529)	Data 0.001 (0.291)	Loss 6.6012 (6.6420)	Top-1 acc 0.781 (0.253)	Top-5 acc 0.781 (1.191)	lr 0.04996
Warmup Train [1][2210/3239]	Time 0.220 (0.528)	Data 0.001 (0.290)	Loss 6.5746 (6.6418)	Top-1 acc 0.391 (0.254)	Top-5 acc 1.562 (1.193)	lr 0.04996
Warmup Train [1][2220/3239]	Time 0.370 (0.530)	Data 0.001 (0.292)	Loss 6.6449 (6.6416)	Top-1 acc 0.391 (0.254)	Top-5 acc 0.391 (1.194)	lr 0.04996
Warmup Train [1][2230/3239]	Time 0.285 (0.529)	Data 0.002 (0.291)	Loss 6.6133 (6.6414)	Top-1 acc 0.391 (0.255)	Top-5 acc 0.781 (1.195)	lr 0.04996
Warmup Train [1][2240/3239]	Time 0.299 (0.527)	Data 0.001 (0.290)	Loss 6.5484 (6.6411)	Top-1 acc 0.391 (0.255)	Top-5 acc 1.562 (1.198)	lr 0.04996
Warmup Train [1][2250/3239]	Time 0.224 (0.531)	Data 0.002 (0.293)	Loss 6.5762 (6.6409)	Top-1 acc 0.000 (0.255)	Top-5 acc 0.391 (1.200)	lr 0.04996
Warmup Train [1][2260/3239]	Time 0.217 (0.530)	Data 0.001 (0.292)	Loss 6.5746 (6.6407)	Top-1 acc 0.000 (0.255)	Top-5 acc 1.953 (1.201)	lr 0.04996
Warmup Train [1][2270/3239]	Time 0.167 (0.528)	Data 0.001 (0.291)	Loss 6.6070 (6.6406)	Top-1 acc 0.000 (0.255)	Top-5 acc 0.391 (1.200)	lr 0.04996
Warmup Train [1][2280/3239]	Time 8.237 (0.531)	Data 8.030 (0.293)	Loss 6.5819 (6.6404)	Top-1 acc 0.000 (0.255)	Top-5 acc 1.172 (1.201)	lr 0.04996
Warmup Train [1][2290/3239]	Time 0.157 (0.529)	Data 0.001 (0.292)	Loss 6.5942 (6.6401)	Top-1 acc 0.391 (0.255)	Top-5 acc 1.953 (1.202)	lr 0.04996
Warmup Train [1][2300/3239]	Time 0.205 (0.528)	Data 0.001 (0.291)	Loss 6.5839 (6.6399)	Top-1 acc 0.781 (0.256)	Top-5 acc 1.953 (1.203)	lr 0.04996
Warmup Train [1][2310/3239]	Time 0.131 (0.526)	Data 0.001 (0.289)	Loss 6.6017 (6.6397)	Top-1 acc 0.781 (0.257)	Top-5 acc 1.172 (1.204)	lr 0.04996
Warmup Train [1][2320/3239]	Time 0.181 (0.530)	Data 0.001 (0.293)	Loss 6.5774 (6.6394)	Top-1 acc 0.000 (0.257)	Top-5 acc 1.953 (1.206)	lr 0.04996
Warmup Train [1][2330/3239]	Time 0.384 (0.529)	Data 0.001 (0.292)	Loss 6.5765 (6.6392)	Top-1 acc 0.000 (0.256)	Top-5 acc 0.781 (1.205)	lr 0.04996
Warmup Train [1][2340/3239]	Time 0.330 (0.527)	Data 0.002 (0.290)	Loss 6.5804 (6.6391)	Top-1 acc 0.000 (0.257)	Top-5 acc 1.562 (1.205)	lr 0.04996
Warmup Train [1][2350/3239]	Time 0.223 (0.530)	Data 0.001 (0.293)	Loss 6.5719 (6.6389)	Top-1 acc 0.000 (0.257)	Top-5 acc 2.344 (1.207)	lr 0.04996
Warmup Train [1][2360/3239]	Time 0.225 (0.528)	Data 0.001 (0.291)	Loss 6.5856 (6.6387)	Top-1 acc 0.781 (0.257)	Top-5 acc 1.172 (1.207)	lr 0.04996
Warmup Train [1][2370/3239]	Time 0.217 (0.527)	Data 0.041 (0.290)	Loss 6.6057 (6.6385)	Top-1 acc 0.391 (0.258)	Top-5 acc 0.391 (1.208)	lr 0.04996
Warmup Train [1][2380/3239]	Time 0.250 (0.530)	Data 0.001 (0.293)	Loss 6.5492 (6.6383)	Top-1 acc 0.391 (0.258)	Top-5 acc 1.562 (1.210)	lr 0.04996
Warmup Train [1][2390/3239]	Time 0.234 (0.528)	Data 0.002 (0.292)	Loss 6.5691 (6.6381)	Top-1 acc 0.391 (0.258)	Top-5 acc 1.562 (1.211)	lr 0.04996
Warmup Train [1][2400/3239]	Time 0.242 (0.527)	Data 0.001 (0.291)	Loss 6.5812 (6.6379)	Top-1 acc 0.781 (0.258)	Top-5 acc 1.953 (1.213)	lr 0.04996
Warmup Train [1][2410/3239]	Time 0.157 (0.528)	Data 0.001 (0.292)	Loss 6.5579 (6.6377)	Top-1 acc 1.562 (0.259)	Top-5 acc 2.734 (1.215)	lr 0.04996
Warmup Train [1][2420/3239]	Time 0.169 (0.527)	Data 0.001 (0.291)	Loss 6.5484 (6.6374)	Top-1 acc 0.391 (0.259)	Top-5 acc 2.734 (1.216)	lr 0.04996
Warmup Train [1][2430/3239]	Time 0.152 (0.527)	Data 0.001 (0.291)	Loss 6.5542 (6.6372)	Top-1 acc 0.000 (0.259)	Top-5 acc 1.562 (1.217)	lr 0.04996
Warmup Train [1][2440/3239]	Time 6.033 (0.529)	Data 5.733 (0.293)	Loss 6.5401 (6.6370)	Top-1 acc 0.000 (0.259)	Top-5 acc 1.562 (1.218)	lr 0.04996
Warmup Train [1][2450/3239]	Time 0.165 (0.528)	Data 0.001 (0.292)	Loss 6.6319 (6.6368)	Top-1 acc 0.391 (0.260)	Top-5 acc 0.781 (1.219)	lr 0.04996
Warmup Train [1][2460/3239]	Time 0.208 (0.528)	Data 0.001 (0.292)	Loss 6.5669 (6.6366)	Top-1 acc 0.781 (0.260)	Top-5 acc 2.344 (1.222)	lr 0.04996
Warmup Train [1][2470/3239]	Time 0.257 (0.527)	Data 0.001 (0.291)	Loss 6.5622 (6.6364)	Top-1 acc 0.000 (0.260)	Top-5 acc 1.172 (1.222)	lr 0.04996
Warmup Train [1][2480/3239]	Time 0.217 (0.529)	Data 0.001 (0.293)	Loss 6.5764 (6.6362)	Top-1 acc 0.781 (0.261)	Top-5 acc 1.172 (1.223)	lr 0.04995
Warmup Train [1][2490/3239]	Time 0.142 (0.528)	Data 0.001 (0.292)	Loss 6.6310 (6.6360)	Top-1 acc 0.000 (0.261)	Top-5 acc 1.172 (1.224)	lr 0.04995
Warmup Train [1][2500/3239]	Time 0.196 (0.527)	Data 0.001 (0.291)	Loss 6.5817 (6.6358)	Top-1 acc 0.391 (0.262)	Top-5 acc 1.172 (1.225)	lr 0.04995
Warmup Train [1][2510/3239]	Time 0.213 (0.529)	Data 0.001 (0.293)	Loss 6.6384 (6.6356)	Top-1 acc 0.000 (0.262)	Top-5 acc 1.562 (1.228)	lr 0.04995
Warmup Train [1][2520/3239]	Time 0.241 (0.527)	Data 0.002 (0.292)	Loss 6.5816 (6.6354)	Top-1 acc 0.391 (0.263)	Top-5 acc 3.125 (1.230)	lr 0.04995
Warmup Train [1][2530/3239]	Time 0.214 (0.526)	Data 0.001 (0.291)	Loss 6.5277 (6.6351)	Top-1 acc 1.172 (0.264)	Top-5 acc 3.516 (1.233)	lr 0.04995
Warmup Train [1][2540/3239]	Time 0.197 (0.529)	Data 0.001 (0.293)	Loss 6.6054 (6.6350)	Top-1 acc 1.562 (0.265)	Top-5 acc 2.344 (1.234)	lr 0.04995
Warmup Train [1][2550/3239]	Time 0.228 (0.527)	Data 0.001 (0.292)	Loss 6.6037 (6.6348)	Top-1 acc 0.391 (0.265)	Top-5 acc 0.781 (1.235)	lr 0.04995
Warmup Train [1][2560/3239]	Time 0.241 (0.526)	Data 0.001 (0.291)	Loss 6.5943 (6.6345)	Top-1 acc 0.391 (0.265)	Top-5 acc 1.172 (1.235)	lr 0.04995
Warmup Train [1][2570/3239]	Time 0.279 (0.529)	Data 0.002 (0.294)	Loss 6.5987 (6.6344)	Top-1 acc 0.000 (0.265)	Top-5 acc 0.781 (1.236)	lr 0.04995
Warmup Train [1][2580/3239]	Time 0.264 (0.528)	Data 0.002 (0.293)	Loss 6.5977 (6.6342)	Top-1 acc 0.391 (0.265)	Top-5 acc 1.953 (1.237)	lr 0.04995
Warmup Train [1][2590/3239]	Time 0.188 (0.527)	Data 0.002 (0.292)	Loss 6.5715 (6.6340)	Top-1 acc 0.781 (0.265)	Top-5 acc 3.516 (1.239)	lr 0.04995
Warmup Train [1][2600/3239]	Time 9.951 (0.529)	Data 9.763 (0.294)	Loss 6.5831 (6.6338)	Top-1 acc 0.000 (0.266)	Top-5 acc 3.125 (1.240)	lr 0.04995
Warmup Train [1][2610/3239]	Time 0.222 (0.528)	Data 0.001 (0.293)	Loss 6.5910 (6.6336)	Top-1 acc 0.391 (0.267)	Top-5 acc 1.172 (1.240)	lr 0.04995
Warmup Train [1][2620/3239]	Time 0.197 (0.527)	Data 0.002 (0.292)	Loss 6.5759 (6.6334)	Top-1 acc 0.781 (0.266)	Top-5 acc 1.953 (1.242)	lr 0.04995
Warmup Train [1][2630/3239]	Time 0.210 (0.526)	Data 0.001 (0.291)	Loss 6.5610 (6.6332)	Top-1 acc 0.000 (0.266)	Top-5 acc 1.953 (1.242)	lr 0.04995
Warmup Train [1][2640/3239]	Time 0.210 (0.527)	Data 0.001 (0.292)	Loss 6.5824 (6.6330)	Top-1 acc 1.172 (0.266)	Top-5 acc 2.344 (1.243)	lr 0.04995
Warmup Train [1][2650/3239]	Time 0.262 (0.526)	Data 0.002 (0.291)	Loss 6.5653 (6.6329)	Top-1 acc 0.000 (0.267)	Top-5 acc 3.516 (1.245)	lr 0.04995
Warmup Train [1][2660/3239]	Time 0.220 (0.525)	Data 0.001 (0.290)	Loss 6.5730 (6.6327)	Top-1 acc 0.391 (0.267)	Top-5 acc 1.562 (1.247)	lr 0.04995
Warmup Train [1][2670/3239]	Time 0.206 (0.528)	Data 0.001 (0.293)	Loss 6.5542 (6.6325)	Top-1 acc 0.391 (0.268)	Top-5 acc 1.562 (1.248)	lr 0.04995
Warmup Train [1][2680/3239]	Time 0.338 (0.527)	Data 0.001 (0.292)	Loss 6.6066 (6.6323)	Top-1 acc 0.000 (0.268)	Top-5 acc 0.391 (1.248)	lr 0.04995
Warmup Train [1][2690/3239]	Time 0.235 (0.526)	Data 0.001 (0.291)	Loss 6.5871 (6.6321)	Top-1 acc 0.781 (0.268)	Top-5 acc 1.953 (1.250)	lr 0.04995
Warmup Train [1][2700/3239]	Time 0.194 (0.527)	Data 0.001 (0.293)	Loss 6.5646 (6.6318)	Top-1 acc 0.391 (0.269)	Top-5 acc 0.781 (1.252)	lr 0.04995
Warmup Train [1][2710/3239]	Time 0.226 (0.526)	Data 0.001 (0.291)	Loss 6.5728 (6.6316)	Top-1 acc 0.000 (0.268)	Top-5 acc 1.172 (1.253)	lr 0.04995
Warmup Train [1][2720/3239]	Time 0.222 (0.525)	Data 0.001 (0.290)	Loss 6.5916 (6.6315)	Top-1 acc 0.781 (0.269)	Top-5 acc 2.734 (1.254)	lr 0.04995
Warmup Train [1][2730/3239]	Time 0.266 (0.527)	Data 0.001 (0.293)	Loss 6.5633 (6.6313)	Top-1 acc 0.000 (0.269)	Top-5 acc 1.953 (1.254)	lr 0.04995
Warmup Train [1][2740/3239]	Time 0.254 (0.526)	Data 0.001 (0.292)	Loss 6.5522 (6.6311)	Top-1 acc 0.391 (0.269)	Top-5 acc 0.781 (1.255)	lr 0.04994
Warmup Train [1][2750/3239]	Time 0.216 (0.526)	Data 0.002 (0.292)	Loss 6.5759 (6.6310)	Top-1 acc 0.000 (0.269)	Top-5 acc 1.562 (1.255)	lr 0.04994
Warmup Train [1][2760/3239]	Time 7.122 (0.528)	Data 6.885 (0.293)	Loss 6.5564 (6.6308)	Top-1 acc 0.391 (0.270)	Top-5 acc 1.172 (1.256)	lr 0.04994
Warmup Train [1][2770/3239]	Time 0.217 (0.526)	Data 0.002 (0.292)	Loss 6.5552 (6.6306)	Top-1 acc 0.000 (0.270)	Top-5 acc 0.781 (1.258)	lr 0.04994
Warmup Train [1][2780/3239]	Time 0.162 (0.526)	Data 0.001 (0.292)	Loss 6.5883 (6.6304)	Top-1 acc 0.000 (0.270)	Top-5 acc 2.344 (1.258)	lr 0.04994
Warmup Train [1][2790/3239]	Time 0.272 (0.525)	Data 0.001 (0.291)	Loss 6.5645 (6.6302)	Top-1 acc 0.391 (0.269)	Top-5 acc 0.781 (1.258)	lr 0.04994
Warmup Train [1][2800/3239]	Time 0.220 (0.527)	Data 0.001 (0.293)	Loss 6.5765 (6.6300)	Top-1 acc 0.391 (0.269)	Top-5 acc 1.562 (1.259)	lr 0.04994
Warmup Train [1][2810/3239]	Time 0.182 (0.526)	Data 0.001 (0.292)	Loss 6.5723 (6.6298)	Top-1 acc 0.781 (0.270)	Top-5 acc 2.344 (1.261)	lr 0.04994
Warmup Train [1][2820/3239]	Time 0.211 (0.525)	Data 0.001 (0.291)	Loss 6.5538 (6.6296)	Top-1 acc 0.391 (0.270)	Top-5 acc 1.953 (1.261)	lr 0.04994
Warmup Train [1][2830/3239]	Time 0.166 (0.527)	Data 0.002 (0.293)	Loss 6.6007 (6.6294)	Top-1 acc 0.781 (0.271)	Top-5 acc 2.344 (1.263)	lr 0.04994
Warmup Train [1][2840/3239]	Time 0.144 (0.526)	Data 0.002 (0.292)	Loss 6.6068 (6.6292)	Top-1 acc 0.000 (0.271)	Top-5 acc 0.781 (1.264)	lr 0.04994
Warmup Train [1][2850/3239]	Time 0.200 (0.525)	Data 0.001 (0.291)	Loss 6.6203 (6.6291)	Top-1 acc 0.391 (0.272)	Top-5 acc 2.344 (1.265)	lr 0.04994
Warmup Train [1][2860/3239]	Time 0.175 (0.527)	Data 0.001 (0.294)	Loss 6.5914 (6.6289)	Top-1 acc 0.000 (0.271)	Top-5 acc 3.125 (1.266)	lr 0.04994
Warmup Train [1][2870/3239]	Time 0.217 (0.526)	Data 0.001 (0.293)	Loss 6.5646 (6.6286)	Top-1 acc 0.000 (0.271)	Top-5 acc 0.781 (1.267)	lr 0.04994
Warmup Train [1][2880/3239]	Time 0.265 (0.525)	Data 0.002 (0.292)	Loss 6.6017 (6.6285)	Top-1 acc 0.000 (0.272)	Top-5 acc 0.391 (1.268)	lr 0.04994
Warmup Train [1][2890/3239]	Time 0.138 (0.527)	Data 0.001 (0.294)	Loss 6.5326 (6.6283)	Top-1 acc 0.781 (0.272)	Top-5 acc 2.734 (1.270)	lr 0.04994
Warmup Train [1][2900/3239]	Time 0.202 (0.526)	Data 0.001 (0.293)	Loss 6.5698 (6.6281)	Top-1 acc 0.000 (0.273)	Top-5 acc 0.781 (1.271)	lr 0.04994
Warmup Train [1][2910/3239]	Time 0.219 (0.525)	Data 0.002 (0.292)	Loss 6.5532 (6.6279)	Top-1 acc 0.000 (0.272)	Top-5 acc 1.953 (1.271)	lr 0.04994
Warmup Train [1][2920/3239]	Time 7.262 (0.528)	Data 6.987 (0.295)	Loss 6.5778 (6.6277)	Top-1 acc 0.781 (0.273)	Top-5 acc 2.344 (1.272)	lr 0.04994
Warmup Train [1][2930/3239]	Time 0.314 (0.527)	Data 0.002 (0.294)	Loss 6.5502 (6.6275)	Top-1 acc 0.000 (0.272)	Top-5 acc 0.000 (1.272)	lr 0.04994
Warmup Train [1][2940/3239]	Time 0.265 (0.525)	Data 0.001 (0.293)	Loss 6.5456 (6.6274)	Top-1 acc 0.781 (0.272)	Top-5 acc 1.953 (1.273)	lr 0.04994
Warmup Train [1][2950/3239]	Time 0.212 (0.524)	Data 0.001 (0.292)	Loss 6.6076 (6.6272)	Top-1 acc 0.000 (0.272)	Top-5 acc 0.000 (1.272)	lr 0.04994
Warmup Train [1][2960/3239]	Time 0.203 (0.527)	Data 0.002 (0.294)	Loss 6.5523 (6.6270)	Top-1 acc 0.391 (0.272)	Top-5 acc 2.734 (1.273)	lr 0.04994
Warmup Train [1][2970/3239]	Time 0.211 (0.526)	Data 0.001 (0.293)	Loss 6.5504 (6.6268)	Top-1 acc 1.562 (0.273)	Top-5 acc 2.734 (1.275)	lr 0.04994
Warmup Train [1][2980/3239]	Time 0.204 (0.525)	Data 0.001 (0.292)	Loss 6.5604 (6.6266)	Top-1 acc 0.000 (0.273)	Top-5 acc 1.562 (1.276)	lr 0.04993
Warmup Train [1][2990/3239]	Time 0.222 (0.527)	Data 0.001 (0.294)	Loss 6.5806 (6.6264)	Top-1 acc 0.391 (0.274)	Top-5 acc 0.781 (1.278)	lr 0.04993
Warmup Train [1][3000/3239]	Time 0.187 (0.526)	Data 0.001 (0.293)	Loss 6.5436 (6.6262)	Top-1 acc 0.781 (0.274)	Top-5 acc 2.344 (1.279)	lr 0.04993
Warmup Train [1][3010/3239]	Time 0.197 (0.525)	Data 0.001 (0.292)	Loss 6.5797 (6.6260)	Top-1 acc 0.000 (0.273)	Top-5 acc 0.000 (1.278)	lr 0.04993
Warmup Train [1][3020/3239]	Time 0.196 (0.527)	Data 0.001 (0.294)	Loss 6.5180 (6.6259)	Top-1 acc 0.781 (0.273)	Top-5 acc 2.734 (1.278)	lr 0.04993
Warmup Train [1][3030/3239]	Time 0.230 (0.526)	Data 0.001 (0.293)	Loss 6.5182 (6.6257)	Top-1 acc 0.391 (0.274)	Top-5 acc 1.172 (1.279)	lr 0.04993
Warmup Train [1][3040/3239]	Time 0.309 (0.525)	Data 0.001 (0.293)	Loss 6.5529 (6.6255)	Top-1 acc 0.000 (0.274)	Top-5 acc 1.172 (1.280)	lr 0.04993
Warmup Train [1][3050/3239]	Time 0.182 (0.527)	Data 0.002 (0.294)	Loss 6.5464 (6.6253)	Top-1 acc 1.172 (0.274)	Top-5 acc 5.078 (1.281)	lr 0.04993
Warmup Train [1][3060/3239]	Time 0.139 (0.526)	Data 0.001 (0.293)	Loss 6.5633 (6.6251)	Top-1 acc 0.000 (0.273)	Top-5 acc 1.562 (1.282)	lr 0.04993
Warmup Train [1][3070/3239]	Time 0.217 (0.525)	Data 0.001 (0.292)	Loss 6.5736 (6.6249)	Top-1 acc 0.391 (0.274)	Top-5 acc 2.344 (1.284)	lr 0.04993
Warmup Train [1][3080/3239]	Time 9.134 (0.527)	Data 8.923 (0.294)	Loss 6.5347 (6.6247)	Top-1 acc 0.000 (0.274)	Top-5 acc 1.562 (1.284)	lr 0.04993
Warmup Train [1][3090/3239]	Time 0.175 (0.526)	Data 0.002 (0.293)	Loss 6.5992 (6.6246)	Top-1 acc 0.000 (0.274)	Top-5 acc 1.172 (1.287)	lr 0.04993
Warmup Train [1][3100/3239]	Time 0.238 (0.525)	Data 0.001 (0.292)	Loss 6.5441 (6.6243)	Top-1 acc 0.000 (0.273)	Top-5 acc 1.562 (1.288)	lr 0.04993
Warmup Train [1][3110/3239]	Time 0.161 (0.524)	Data 0.001 (0.291)	Loss 6.5805 (6.6242)	Top-1 acc 0.000 (0.273)	Top-5 acc 0.391 (1.288)	lr 0.04993
Warmup Train [1][3120/3239]	Time 0.216 (0.525)	Data 0.001 (0.293)	Loss 6.5391 (6.6240)	Top-1 acc 0.000 (0.273)	Top-5 acc 1.172 (1.288)	lr 0.04993
Warmup Train [1][3130/3239]	Time 0.260 (0.524)	Data 0.002 (0.292)	Loss 6.5870 (6.6238)	Top-1 acc 0.391 (0.273)	Top-5 acc 0.781 (1.289)	lr 0.04993
Warmup Train [1][3140/3239]	Time 0.146 (0.523)	Data 0.002 (0.291)	Loss 6.5354 (6.6236)	Top-1 acc 0.781 (0.274)	Top-5 acc 2.734 (1.291)	lr 0.04993
Warmup Train [1][3150/3239]	Time 0.211 (0.526)	Data 0.001 (0.293)	Loss 6.4996 (6.6233)	Top-1 acc 0.000 (0.274)	Top-5 acc 0.781 (1.293)	lr 0.04993
Warmup Train [1][3160/3239]	Time 0.344 (0.525)	Data 0.001 (0.293)	Loss 6.5514 (6.6231)	Top-1 acc 0.391 (0.274)	Top-5 acc 2.734 (1.294)	lr 0.04993
Warmup Train [1][3170/3239]	Time 0.213 (0.524)	Data 0.001 (0.292)	Loss 6.5309 (6.6229)	Top-1 acc 0.000 (0.274)	Top-5 acc 0.391 (1.294)	lr 0.04993
Warmup Train [1][3180/3239]	Time 0.219 (0.525)	Data 0.000 (0.293)	Loss 6.5824 (6.6227)	Top-1 acc 0.391 (0.274)	Top-5 acc 2.734 (1.295)	lr 0.04993
Warmup Train [1][3190/3239]	Time 0.209 (0.524)	Data 0.000 (0.292)	Loss 6.5597 (6.6226)	Top-1 acc 0.000 (0.275)	Top-5 acc 0.391 (1.296)	lr 0.04993
Warmup Train [1][3200/3239]	Time 0.244 (0.523)	Data 0.000 (0.291)	Loss 6.5954 (6.6224)	Top-1 acc 0.000 (0.275)	Top-5 acc 1.172 (1.297)	lr 0.04992
Warmup Train [1][3210/3239]	Time 0.194 (0.524)	Data 0.000 (0.292)	Loss 6.5642 (6.6223)	Top-1 acc 0.391 (0.275)	Top-5 acc 1.953 (1.298)	lr 0.04992
Warmup Train [1][3220/3239]	Time 0.179 (0.523)	Data 0.000 (0.291)	Loss 6.5806 (6.6221)	Top-1 acc 0.000 (0.275)	Top-5 acc 0.391 (1.299)	lr 0.04992
Warmup Train [1][3230/3239]	Time 0.179 (0.522)	Data 0.000 (0.290)	Loss 6.5856 (6.6220)	Top-1 acc 0.000 (0.275)	Top-5 acc 1.562 (1.299)	lr 0.04992
Warmup Train [1][3239/3239]	Time 1.001 (0.521)	Data 0.000 (0.289)	Loss 6.5838 (6.6218)	Top-1 acc 0.000 (0.275)	Top-5 acc 2.469 (1.300)	lr 0.04992
==========Warmup Valid [1/40]	loss 6.459	top-1 acc 0.383	top-5 acc 1.674	Train top-1 0.275	top-5 1.300	flops: 442.4M
Warmup Train [2][0/3239]	Time 10.220 (10.220)	Data 8.824 (8.824)	Loss 6.5720 (6.5720)	Top-1 acc 0.391 (0.391)	Top-5 acc 1.562 (1.562)	lr 0.04992
Warmup Train [2][10/3239]	Time 0.188 (1.200)	Data 0.002 (0.806)	Loss 6.5629 (6.5684)	Top-1 acc 0.781 (0.426)	Top-5 acc 2.344 (1.349)	lr 0.04992
Warmup Train [2][20/3239]	Time 0.219 (0.732)	Data 0.002 (0.423)	Loss 6.5620 (6.5661)	Top-1 acc 0.000 (0.335)	Top-5 acc 1.562 (1.469)	lr 0.04992
Warmup Train [2][30/3239]	Time 0.252 (0.568)	Data 0.001 (0.287)	Loss 6.5855 (6.5652)	Top-1 acc 0.000 (0.353)	Top-5 acc 0.781 (1.399)	lr 0.04992
Warmup Train [2][40/3239]	Time 0.195 (0.483)	Data 0.001 (0.218)	Loss 6.5577 (6.5640)	Top-1 acc 0.000 (0.333)	Top-5 acc 1.953 (1.429)	lr 0.04992
Warmup Train [2][50/3239]	Time 0.292 (0.432)	Data 0.001 (0.176)	Loss 6.5267 (6.5631)	Top-1 acc 1.172 (0.368)	Top-5 acc 1.562 (1.425)	lr 0.04992
Warmup Train [2][60/3239]	Time 0.263 (0.398)	Data 0.001 (0.147)	Loss 6.5842 (6.5616)	Top-1 acc 0.391 (0.371)	Top-5 acc 1.562 (1.396)	lr 0.04992
Warmup Train [2][70/3239]	Time 0.276 (0.373)	Data 0.001 (0.127)	Loss 6.5859 (6.5621)	Top-1 acc 1.172 (0.363)	Top-5 acc 2.344 (1.403)	lr 0.04992
Warmup Train [2][80/3239]	Time 0.228 (0.352)	Data 0.001 (0.112)	Loss 6.5487 (6.5617)	Top-1 acc 0.391 (0.352)	Top-5 acc 2.344 (1.500)	lr 0.04992
Warmup Train [2][90/3239]	Time 0.237 (0.339)	Data 0.002 (0.100)	Loss 6.5767 (6.5614)	Top-1 acc 0.391 (0.361)	Top-5 acc 1.172 (1.494)	lr 0.04992
Warmup Train [2][100/3239]	Time 0.167 (0.326)	Data 0.001 (0.090)	Loss 6.5462 (6.5608)	Top-1 acc 0.781 (0.383)	Top-5 acc 1.562 (1.555)	lr 0.04992
Warmup Train [2][110/3239]	Time 0.198 (0.317)	Data 0.001 (0.083)	Loss 6.5643 (6.5611)	Top-1 acc 0.781 (0.380)	Top-5 acc 2.344 (1.555)	lr 0.04992
Warmup Train [2][120/3239]	Time 0.187 (0.308)	Data 0.002 (0.076)	Loss 6.5178 (6.5608)	Top-1 acc 1.172 (0.394)	Top-5 acc 1.562 (1.553)	lr 0.04992
Warmup Train [2][130/3239]	Time 0.210 (0.302)	Data 0.001 (0.071)	Loss 6.5731 (6.5598)	Top-1 acc 1.172 (0.391)	Top-5 acc 1.562 (1.565)	lr 0.04992
Warmup Train [2][140/3239]	Time 0.176 (0.296)	Data 0.001 (0.066)	Loss 6.5314 (6.5594)	Top-1 acc 0.000 (0.393)	Top-5 acc 1.953 (1.565)	lr 0.04992
Warmup Train [2][150/3239]	Time 0.163 (0.291)	Data 0.002 (0.062)	Loss 6.5237 (6.5591)	Top-1 acc 0.391 (0.404)	Top-5 acc 1.953 (1.573)	lr 0.04992
Warmup Train [2][160/3239]	Time 0.215 (0.287)	Data 0.002 (0.058)	Loss 6.5274 (6.5582)	Top-1 acc 0.391 (0.405)	Top-5 acc 1.172 (1.599)	lr 0.04992
Warmup Train [2][170/3239]	Time 0.388 (0.285)	Data 0.001 (0.055)	Loss 6.5001 (6.5583)	Top-1 acc 0.391 (0.397)	Top-5 acc 1.172 (1.597)	lr 0.04991
Warmup Train [2][180/3239]	Time 0.191 (0.281)	Data 0.002 (0.052)	Loss 6.5840 (6.5585)	Top-1 acc 0.391 (0.393)	Top-5 acc 1.953 (1.597)	lr 0.04991
Warmup Train [2][190/3239]	Time 0.258 (0.279)	Data 0.001 (0.050)	Loss 6.5375 (6.5577)	Top-1 acc 0.781 (0.395)	Top-5 acc 1.953 (1.626)	lr 0.04991
Warmup Train [2][200/3239]	Time 0.227 (0.275)	Data 0.001 (0.047)	Loss 6.5741 (6.5583)	Top-1 acc 0.000 (0.395)	Top-5 acc 0.391 (1.615)	lr 0.04991
Warmup Train [2][210/3239]	Time 0.190 (0.272)	Data 0.001 (0.045)	Loss 6.5348 (6.5586)	Top-1 acc 0.000 (0.385)	Top-5 acc 1.562 (1.629)	lr 0.04991
Warmup Train [2][220/3239]	Time 0.273 (0.270)	Data 0.001 (0.043)	Loss 6.5864 (6.5587)	Top-1 acc 0.781 (0.392)	Top-5 acc 2.734 (1.658)	lr 0.04991
Warmup Train [2][230/3239]	Time 0.225 (0.267)	Data 0.001 (0.041)	Loss 6.5484 (6.5588)	Top-1 acc 0.781 (0.386)	Top-5 acc 1.562 (1.644)	lr 0.04991
Warmup Train [2][240/3239]	Time 0.183 (0.265)	Data 0.001 (0.040)	Loss 6.5303 (6.5586)	Top-1 acc 0.781 (0.384)	Top-5 acc 1.562 (1.650)	lr 0.04991
Warmup Train [2][250/3239]	Time 0.249 (0.264)	Data 0.001 (0.038)	Loss 6.5405 (6.5580)	Top-1 acc 0.000 (0.383)	Top-5 acc 1.953 (1.642)	lr 0.04991
Warmup Train [2][260/3239]	Time 0.184 (0.261)	Data 0.001 (0.037)	Loss 6.5517 (6.5581)	Top-1 acc 1.172 (0.382)	Top-5 acc 1.562 (1.643)	lr 0.04991
Warmup Train [2][270/3239]	Time 0.179 (0.260)	Data 0.002 (0.036)	Loss 6.5045 (6.5579)	Top-1 acc 0.000 (0.386)	Top-5 acc 1.562 (1.649)	lr 0.04991
Warmup Train [2][280/3239]	Time 0.152 (0.258)	Data 0.001 (0.035)	Loss 6.5358 (6.5578)	Top-1 acc 0.000 (0.381)	Top-5 acc 0.391 (1.633)	lr 0.04991
Warmup Train [2][290/3239]	Time 0.242 (0.257)	Data 0.001 (0.034)	Loss 6.5585 (6.5578)	Top-1 acc 0.000 (0.387)	Top-5 acc 1.172 (1.634)	lr 0.04991
Warmup Train [2][300/3239]	Time 0.193 (0.256)	Data 0.001 (0.033)	Loss 6.5013 (6.5573)	Top-1 acc 0.391 (0.389)	Top-5 acc 1.562 (1.652)	lr 0.04991
Warmup Train [2][310/3239]	Time 0.191 (0.255)	Data 0.001 (0.032)	Loss 6.5484 (6.5574)	Top-1 acc 0.391 (0.383)	Top-5 acc 1.172 (1.652)	lr 0.04991
Warmup Train [2][320/3239]	Time 0.205 (0.254)	Data 0.001 (0.031)	Loss 6.5834 (6.5573)	Top-1 acc 0.000 (0.381)	Top-5 acc 2.344 (1.664)	lr 0.04991
Warmup Train [2][330/3239]	Time 0.161 (0.252)	Data 0.002 (0.030)	Loss 6.5662 (6.5567)	Top-1 acc 0.781 (0.384)	Top-5 acc 1.953 (1.678)	lr 0.04991
Warmup Train [2][340/3239]	Time 0.265 (0.251)	Data 0.001 (0.029)	Loss 6.5603 (6.5564)	Top-1 acc 0.391 (0.385)	Top-5 acc 1.953 (1.676)	lr 0.04991
Warmup Train [2][350/3239]	Time 0.229 (0.250)	Data 0.001 (0.028)	Loss 6.5656 (6.5563)	Top-1 acc 0.391 (0.383)	Top-5 acc 3.125 (1.678)	lr 0.04991
Warmup Train [2][360/3239]	Time 0.224 (0.249)	Data 0.001 (0.028)	Loss 6.5434 (6.5564)	Top-1 acc 0.000 (0.382)	Top-5 acc 1.562 (1.679)	lr 0.04990
Warmup Train [2][370/3239]	Time 0.184 (0.249)	Data 0.002 (0.027)	Loss 6.5390 (6.5562)	Top-1 acc 0.391 (0.379)	Top-5 acc 1.562 (1.673)	lr 0.04990
Warmup Train [2][380/3239]	Time 0.200 (0.248)	Data 0.001 (0.027)	Loss 6.5412 (6.5563)	Top-1 acc 0.391 (0.382)	Top-5 acc 1.953 (1.672)	lr 0.04990
Warmup Train [2][390/3239]	Time 0.270 (0.247)	Data 0.001 (0.026)	Loss 6.5273 (6.5559)	Top-1 acc 1.172 (0.384)	Top-5 acc 3.125 (1.669)	lr 0.04990
Warmup Train [2][400/3239]	Time 0.181 (0.247)	Data 0.001 (0.025)	Loss 6.5523 (6.5557)	Top-1 acc 0.781 (0.394)	Top-5 acc 3.125 (1.686)	lr 0.04990
Warmup Train [2][410/3239]	Time 0.145 (0.246)	Data 0.001 (0.025)	Loss 6.5464 (6.5559)	Top-1 acc 0.781 (0.395)	Top-5 acc 1.953 (1.688)	lr 0.04990
Warmup Train [2][420/3239]	Time 0.225 (0.245)	Data 0.001 (0.024)	Loss 6.5887 (6.5559)	Top-1 acc 0.391 (0.394)	Top-5 acc 0.391 (1.689)	lr 0.04990
Warmup Train [2][430/3239]	Time 0.209 (0.244)	Data 0.001 (0.024)	Loss 6.5211 (6.5555)	Top-1 acc 1.562 (0.399)	Top-5 acc 3.125 (1.693)	lr 0.04990
Warmup Train [2][440/3239]	Time 0.195 (0.244)	Data 0.001 (0.023)	Loss 6.5744 (6.5554)	Top-1 acc 0.781 (0.399)	Top-5 acc 1.953 (1.699)	lr 0.04990
Warmup Train [2][450/3239]	Time 0.213 (0.243)	Data 0.001 (0.023)	Loss 6.4779 (6.5553)	Top-1 acc 0.781 (0.398)	Top-5 acc 1.953 (1.699)	lr 0.04990
Warmup Train [2][460/3239]	Time 0.210 (0.243)	Data 0.001 (0.022)	Loss 6.5359 (6.5552)	Top-1 acc 0.391 (0.396)	Top-5 acc 1.953 (1.695)	lr 0.04990
Warmup Train [2][470/3239]	Time 0.240 (0.242)	Data 0.001 (0.022)	Loss 6.5211 (6.5552)	Top-1 acc 0.391 (0.396)	Top-5 acc 1.562 (1.692)	lr 0.04990
Warmup Train [2][480/3239]	Time 0.132 (0.241)	Data 0.001 (0.021)	Loss 6.5297 (6.5551)	Top-1 acc 0.000 (0.394)	Top-5 acc 1.953 (1.689)	lr 0.04990
Warmup Train [2][490/3239]	Time 0.198 (0.241)	Data 0.001 (0.021)	Loss 6.5515 (6.5549)	Top-1 acc 0.781 (0.395)	Top-5 acc 1.172 (1.694)	lr 0.04990
Warmup Train [2][500/3239]	Time 0.325 (0.240)	Data 0.001 (0.021)	Loss 6.5938 (6.5545)	Top-1 acc 0.391 (0.397)	Top-5 acc 1.562 (1.699)	lr 0.04990
Warmup Train [2][510/3239]	Time 0.275 (0.240)	Data 0.001 (0.020)	Loss 6.5218 (6.5543)	Top-1 acc 0.391 (0.398)	Top-5 acc 1.562 (1.700)	lr 0.04990
Warmup Train [2][520/3239]	Time 0.230 (0.240)	Data 0.001 (0.020)	Loss 6.5432 (6.5541)	Top-1 acc 0.000 (0.400)	Top-5 acc 2.734 (1.703)	lr 0.04990
Warmup Train [2][530/3239]	Time 0.196 (0.240)	Data 0.001 (0.020)	Loss 6.5567 (6.5541)	Top-1 acc 0.000 (0.401)	Top-5 acc 1.562 (1.711)	lr 0.04990
Warmup Train [2][540/3239]	Time 0.211 (0.239)	Data 0.001 (0.019)	Loss 6.5942 (6.5540)	Top-1 acc 0.391 (0.399)	Top-5 acc 1.172 (1.718)	lr 0.04990
Warmup Train [2][550/3239]	Time 0.227 (0.239)	Data 0.001 (0.019)	Loss 6.5289 (6.5537)	Top-1 acc 1.172 (0.397)	Top-5 acc 2.734 (1.718)	lr 0.04989
Warmup Train [2][560/3239]	Time 0.207 (0.239)	Data 0.001 (0.019)	Loss 6.4888 (6.5535)	Top-1 acc 0.391 (0.393)	Top-5 acc 3.906 (1.721)	lr 0.04989
Warmup Train [2][570/3239]	Time 0.234 (0.239)	Data 0.001 (0.019)	Loss 6.5387 (6.5531)	Top-1 acc 1.172 (0.393)	Top-5 acc 4.688 (1.724)	lr 0.04989
Warmup Train [2][580/3239]	Time 0.195 (0.238)	Data 0.001 (0.018)	Loss 6.5034 (6.5528)	Top-1 acc 1.172 (0.393)	Top-5 acc 3.906 (1.731)	lr 0.04989
Warmup Train [2][590/3239]	Time 0.283 (0.238)	Data 0.001 (0.018)	Loss 6.5613 (6.5528)	Top-1 acc 0.000 (0.392)	Top-5 acc 1.953 (1.732)	lr 0.04989
Warmup Train [2][600/3239]	Time 0.179 (0.238)	Data 0.001 (0.018)	Loss 6.5305 (6.5527)	Top-1 acc 0.391 (0.391)	Top-5 acc 3.125 (1.735)	lr 0.04989
Warmup Train [2][610/3239]	Time 0.151 (0.238)	Data 0.001 (0.018)	Loss 6.5122 (6.5522)	Top-1 acc 0.391 (0.393)	Top-5 acc 2.344 (1.738)	lr 0.04989
Warmup Train [2][620/3239]	Time 0.185 (0.237)	Data 0.001 (0.017)	Loss 6.5515 (6.5521)	Top-1 acc 0.391 (0.391)	Top-5 acc 1.562 (1.734)	lr 0.04989
Warmup Train [2][630/3239]	Time 0.189 (0.237)	Data 0.001 (0.017)	Loss 6.5366 (6.5518)	Top-1 acc 1.172 (0.391)	Top-5 acc 2.734 (1.736)	lr 0.04989
Warmup Train [2][640/3239]	Time 0.218 (0.237)	Data 0.001 (0.017)	Loss 6.5181 (6.5514)	Top-1 acc 0.000 (0.389)	Top-5 acc 2.344 (1.743)	lr 0.04989
Warmup Train [2][650/3239]	Time 0.190 (0.236)	Data 0.001 (0.017)	Loss 6.5106 (6.5510)	Top-1 acc 0.391 (0.388)	Top-5 acc 3.516 (1.745)	lr 0.04989
Warmup Train [2][660/3239]	Time 0.321 (0.236)	Data 0.001 (0.017)	Loss 6.5728 (6.5508)	Top-1 acc 0.000 (0.388)	Top-5 acc 0.391 (1.743)	lr 0.04989
Warmup Train [2][670/3239]	Time 0.141 (0.236)	Data 0.001 (0.016)	Loss 6.5738 (6.5508)	Top-1 acc 0.391 (0.391)	Top-5 acc 1.172 (1.741)	lr 0.04989
Warmup Train [2][680/3239]	Time 0.227 (0.235)	Data 0.001 (0.016)	Loss 6.5571 (6.5505)	Top-1 acc 0.391 (0.389)	Top-5 acc 0.781 (1.741)	lr 0.04989
Warmup Train [2][690/3239]	Time 0.201 (0.235)	Data 0.001 (0.016)	Loss 6.4816 (6.5501)	Top-1 acc 0.391 (0.392)	Top-5 acc 3.516 (1.746)	lr 0.04989
Warmup Train [2][700/3239]	Time 0.182 (0.235)	Data 0.001 (0.016)	Loss 6.5011 (6.5498)	Top-1 acc 1.562 (0.394)	Top-5 acc 3.125 (1.749)	lr 0.04989
Warmup Train [2][710/3239]	Time 0.192 (0.235)	Data 0.001 (0.016)	Loss 6.5037 (6.5494)	Top-1 acc 0.391 (0.393)	Top-5 acc 1.953 (1.753)	lr 0.04989
Warmup Train [2][720/3239]	Time 0.193 (0.235)	Data 0.001 (0.015)	Loss 6.5157 (6.5492)	Top-1 acc 0.781 (0.392)	Top-5 acc 1.953 (1.752)	lr 0.04988
Warmup Train [2][730/3239]	Time 0.221 (0.234)	Data 0.001 (0.015)	Loss 6.4987 (6.5490)	Top-1 acc 0.391 (0.393)	Top-5 acc 1.562 (1.757)	lr 0.04988
Warmup Train [2][740/3239]	Time 0.230 (0.234)	Data 0.001 (0.015)	Loss 6.4993 (6.5487)	Top-1 acc 0.391 (0.393)	Top-5 acc 1.953 (1.759)	lr 0.04988
Warmup Train [2][750/3239]	Time 0.164 (0.234)	Data 0.001 (0.015)	Loss 6.5585 (6.5485)	Top-1 acc 0.000 (0.393)	Top-5 acc 0.781 (1.761)	lr 0.04988
Warmup Train [2][760/3239]	Time 0.216 (0.234)	Data 0.001 (0.015)	Loss 6.5376 (6.5484)	Top-1 acc 0.000 (0.395)	Top-5 acc 0.781 (1.768)	lr 0.04988
Warmup Train [2][770/3239]	Time 0.188 (0.234)	Data 0.001 (0.015)	Loss 6.5611 (6.5479)	Top-1 acc 0.781 (0.397)	Top-5 acc 1.562 (1.773)	lr 0.04988
Warmup Train [2][780/3239]	Time 0.155 (0.234)	Data 0.001 (0.015)	Loss 6.5175 (6.5475)	Top-1 acc 0.000 (0.397)	Top-5 acc 0.781 (1.775)	lr 0.04988
Warmup Train [2][790/3239]	Time 0.237 (0.233)	Data 0.001 (0.014)	Loss 6.5276 (6.5474)	Top-1 acc 0.781 (0.398)	Top-5 acc 2.344 (1.779)	lr 0.04988
Warmup Train [2][800/3239]	Time 0.358 (0.233)	Data 0.003 (0.014)	Loss 6.5679 (6.5472)	Top-1 acc 0.000 (0.397)	Top-5 acc 1.172 (1.780)	lr 0.04988
Warmup Train [2][810/3239]	Time 0.341 (0.233)	Data 0.001 (0.014)	Loss 6.5365 (6.5471)	Top-1 acc 0.391 (0.395)	Top-5 acc 1.172 (1.782)	lr 0.04988
Warmup Train [2][820/3239]	Time 0.182 (0.233)	Data 0.001 (0.014)	Loss 6.5358 (6.5469)	Top-1 acc 0.391 (0.393)	Top-5 acc 0.391 (1.775)	lr 0.04988
Warmup Train [2][830/3239]	Time 0.196 (0.233)	Data 0.001 (0.014)	Loss 6.5914 (6.5467)	Top-1 acc 0.391 (0.395)	Top-5 acc 1.172 (1.776)	lr 0.04988
Warmup Train [2][840/3239]	Time 0.229 (0.232)	Data 0.001 (0.014)	Loss 6.5657 (6.5467)	Top-1 acc 0.391 (0.394)	Top-5 acc 1.953 (1.779)	lr 0.04988
Warmup Train [2][850/3239]	Time 0.226 (0.232)	Data 0.001 (0.014)	Loss 6.5552 (6.5463)	Top-1 acc 0.391 (0.392)	Top-5 acc 1.562 (1.779)	lr 0.04988
Warmup Train [2][860/3239]	Time 0.220 (0.232)	Data 0.001 (0.013)	Loss 6.4497 (6.5460)	Top-1 acc 0.781 (0.393)	Top-5 acc 3.125 (1.779)	lr 0.04988
Warmup Train [2][870/3239]	Time 0.205 (0.232)	Data 0.001 (0.013)	Loss 6.5044 (6.5458)	Top-1 acc 0.391 (0.392)	Top-5 acc 1.562 (1.779)	lr 0.04988
Warmup Train [2][880/3239]	Time 0.274 (0.232)	Data 0.001 (0.013)	Loss 6.5260 (6.5455)	Top-1 acc 0.391 (0.392)	Top-5 acc 1.953 (1.783)	lr 0.04988
Warmup Train [2][890/3239]	Time 0.243 (0.232)	Data 0.003 (0.013)	Loss 6.4931 (6.5453)	Top-1 acc 0.000 (0.394)	Top-5 acc 1.953 (1.784)	lr 0.04987
Warmup Train [2][900/3239]	Time 0.237 (0.232)	Data 0.001 (0.013)	Loss 6.4584 (6.5452)	Top-1 acc 0.781 (0.394)	Top-5 acc 5.469 (1.791)	lr 0.04987
Warmup Train [2][910/3239]	Time 0.322 (0.232)	Data 0.002 (0.013)	Loss 6.4866 (6.5450)	Top-1 acc 0.391 (0.394)	Top-5 acc 2.344 (1.793)	lr 0.04987
Warmup Train [2][920/3239]	Time 0.142 (0.232)	Data 0.001 (0.013)	Loss 6.5662 (6.5447)	Top-1 acc 0.781 (0.395)	Top-5 acc 1.562 (1.796)	lr 0.04987
Warmup Train [2][930/3239]	Time 0.232 (0.232)	Data 0.001 (0.013)	Loss 6.6180 (6.5447)	Top-1 acc 0.391 (0.394)	Top-5 acc 0.781 (1.795)	lr 0.04987
Warmup Train [2][940/3239]	Time 0.167 (0.231)	Data 0.001 (0.012)	Loss 6.5536 (6.5447)	Top-1 acc 0.000 (0.394)	Top-5 acc 0.781 (1.793)	lr 0.04987
Warmup Train [2][950/3239]	Time 0.188 (0.231)	Data 0.001 (0.012)	Loss 6.5486 (6.5447)	Top-1 acc 0.391 (0.394)	Top-5 acc 2.344 (1.799)	lr 0.04987
Warmup Train [2][960/3239]	Time 0.215 (0.231)	Data 0.001 (0.012)	Loss 6.5082 (6.5445)	Top-1 acc 0.391 (0.393)	Top-5 acc 1.562 (1.799)	lr 0.04987
Warmup Train [2][970/3239]	Time 0.187 (0.231)	Data 0.001 (0.012)	Loss 6.5581 (6.5443)	Top-1 acc 0.391 (0.392)	Top-5 acc 0.781 (1.797)	lr 0.04987
Warmup Train [2][980/3239]	Time 0.193 (0.231)	Data 0.001 (0.012)	Loss 6.5093 (6.5441)	Top-1 acc 0.391 (0.392)	Top-5 acc 3.125 (1.799)	lr 0.04987
Warmup Train [2][990/3239]	Time 0.232 (0.230)	Data 0.001 (0.012)	Loss 6.5425 (6.5439)	Top-1 acc 0.000 (0.393)	Top-5 acc 0.781 (1.799)	lr 0.04987
Warmup Train [2][1000/3239]	Time 0.247 (0.230)	Data 0.001 (0.012)	Loss 6.5332 (6.5437)	Top-1 acc 0.000 (0.394)	Top-5 acc 2.734 (1.806)	lr 0.04987
Warmup Train [2][1010/3239]	Time 0.196 (0.230)	Data 0.001 (0.012)	Loss 6.5535 (6.5434)	Top-1 acc 0.000 (0.398)	Top-5 acc 1.562 (1.813)	lr 0.04987
Warmup Train [2][1020/3239]	Time 0.325 (0.230)	Data 0.001 (0.012)	Loss 6.5499 (6.5431)	Top-1 acc 0.391 (0.401)	Top-5 acc 2.344 (1.820)	lr 0.04987
Warmup Train [2][1030/3239]	Time 0.174 (0.230)	Data 0.001 (0.012)	Loss 6.4705 (6.5428)	Top-1 acc 0.781 (0.400)	Top-5 acc 2.734 (1.826)	lr 0.04987
Warmup Train [2][1040/3239]	Time 0.176 (0.230)	Data 0.002 (0.012)	Loss 6.4671 (6.5424)	Top-1 acc 0.391 (0.400)	Top-5 acc 3.125 (1.832)	lr 0.04987
Warmup Train [2][1050/3239]	Time 0.187 (0.230)	Data 0.001 (0.012)	Loss 6.5021 (6.5421)	Top-1 acc 1.562 (0.404)	Top-5 acc 3.516 (1.839)	lr 0.04986
Warmup Train [2][1060/3239]	Time 0.193 (0.230)	Data 0.001 (0.011)	Loss 6.4784 (6.5420)	Top-1 acc 1.562 (0.404)	Top-5 acc 3.516 (1.839)	lr 0.04986
Warmup Train [2][1070/3239]	Time 0.217 (0.230)	Data 0.001 (0.011)	Loss 6.6283 (6.5419)	Top-1 acc 0.391 (0.404)	Top-5 acc 1.172 (1.840)	lr 0.04986
Warmup Train [2][1080/3239]	Time 0.269 (0.230)	Data 0.001 (0.011)	Loss 6.5302 (6.5415)	Top-1 acc 0.391 (0.404)	Top-5 acc 1.172 (1.846)	lr 0.04986
Warmup Train [2][1090/3239]	Time 0.166 (0.230)	Data 0.001 (0.011)	Loss 6.4809 (6.5412)	Top-1 acc 1.172 (0.406)	Top-5 acc 3.516 (1.853)	lr 0.04986
Warmup Train [2][1100/3239]	Time 0.206 (0.230)	Data 0.001 (0.011)	Loss 6.4930 (6.5409)	Top-1 acc 0.391 (0.407)	Top-5 acc 3.906 (1.856)	lr 0.04986
Warmup Train [2][1110/3239]	Time 0.260 (0.229)	Data 0.001 (0.011)	Loss 6.5248 (6.5405)	Top-1 acc 0.000 (0.408)	Top-5 acc 1.953 (1.865)	lr 0.04986
Warmup Train [2][1120/3239]	Time 0.389 (0.230)	Data 0.002 (0.011)	Loss 6.4993 (6.5402)	Top-1 acc 1.172 (0.410)	Top-5 acc 3.906 (1.870)	lr 0.04986
Warmup Train [2][1130/3239]	Time 0.261 (0.230)	Data 0.002 (0.011)	Loss 6.5948 (6.5401)	Top-1 acc 1.172 (0.411)	Top-5 acc 1.953 (1.871)	lr 0.04986
Warmup Train [2][1140/3239]	Time 0.255 (0.229)	Data 0.001 (0.011)	Loss 6.5147 (6.5399)	Top-1 acc 0.781 (0.411)	Top-5 acc 3.125 (1.877)	lr 0.04986
Warmup Train [2][1150/3239]	Time 0.140 (0.229)	Data 0.001 (0.011)	Loss 6.5200 (6.5395)	Top-1 acc 0.781 (0.412)	Top-5 acc 2.734 (1.883)	lr 0.04986
Warmup Train [2][1160/3239]	Time 0.150 (0.229)	Data 0.001 (0.011)	Loss 6.5374 (6.5394)	Top-1 acc 0.391 (0.413)	Top-5 acc 1.953 (1.885)	lr 0.04986
Warmup Train [2][1170/3239]	Time 0.269 (0.229)	Data 0.001 (0.011)	Loss 6.5347 (6.5393)	Top-1 acc 0.000 (0.413)	Top-5 acc 1.953 (1.884)	lr 0.04986
Warmup Train [2][1180/3239]	Time 0.169 (0.229)	Data 0.001 (0.011)	Loss 6.5307 (6.5391)	Top-1 acc 1.562 (0.414)	Top-5 acc 2.734 (1.885)	lr 0.04986
Warmup Train [2][1190/3239]	Time 0.169 (0.229)	Data 0.001 (0.011)	Loss 6.5080 (6.5390)	Top-1 acc 0.781 (0.415)	Top-5 acc 1.562 (1.885)	lr 0.04986
Warmup Train [2][1200/3239]	Time 0.189 (0.229)	Data 0.001 (0.011)	Loss 6.4803 (6.5386)	Top-1 acc 0.781 (0.415)	Top-5 acc 2.344 (1.890)	lr 0.04986
Warmup Train [2][1210/3239]	Time 0.245 (0.229)	Data 0.002 (0.010)	Loss 6.4730 (6.5384)	Top-1 acc 0.781 (0.414)	Top-5 acc 3.125 (1.890)	lr 0.04985
Warmup Train [2][1220/3239]	Time 0.199 (0.229)	Data 0.001 (0.010)	Loss 6.5287 (6.5383)	Top-1 acc 0.781 (0.416)	Top-5 acc 2.344 (1.893)	lr 0.04985
Warmup Train [2][1230/3239]	Time 0.259 (0.229)	Data 0.001 (0.010)	Loss 6.5478 (6.5380)	Top-1 acc 0.000 (0.415)	Top-5 acc 2.344 (1.898)	lr 0.04985
Warmup Train [2][1240/3239]	Time 0.326 (0.229)	Data 0.001 (0.010)	Loss 6.5851 (6.5379)	Top-1 acc 0.391 (0.416)	Top-5 acc 1.562 (1.898)	lr 0.04985
Warmup Train [2][1250/3239]	Time 0.242 (0.229)	Data 0.001 (0.010)	Loss 6.5249 (6.5378)	Top-1 acc 0.781 (0.416)	Top-5 acc 2.734 (1.902)	lr 0.04985
Warmup Train [2][1260/3239]	Time 0.212 (0.229)	Data 0.001 (0.010)	Loss 6.5393 (6.5375)	Top-1 acc 0.000 (0.416)	Top-5 acc 2.344 (1.900)	lr 0.04985
Warmup Train [2][1270/3239]	Time 0.191 (0.229)	Data 0.001 (0.010)	Loss 6.5562 (6.5373)	Top-1 acc 0.391 (0.416)	Top-5 acc 1.953 (1.902)	lr 0.04985
Warmup Train [2][1280/3239]	Time 0.240 (0.229)	Data 0.001 (0.010)	Loss 6.4229 (6.5371)	Top-1 acc 0.781 (0.417)	Top-5 acc 2.734 (1.902)	lr 0.04985
Warmup Train [2][1290/3239]	Time 0.198 (0.228)	Data 0.001 (0.010)	Loss 6.4607 (6.5369)	Top-1 acc 0.781 (0.416)	Top-5 acc 2.734 (1.904)	lr 0.04985
Warmup Train [2][1300/3239]	Time 0.236 (0.228)	Data 0.001 (0.010)	Loss 6.5073 (6.5366)	Top-1 acc 0.391 (0.417)	Top-5 acc 1.172 (1.907)	lr 0.04985
Warmup Train [2][1310/3239]	Time 0.246 (0.228)	Data 0.002 (0.010)	Loss 6.4580 (6.5362)	Top-1 acc 0.391 (0.416)	Top-5 acc 1.562 (1.909)	lr 0.04985
Warmup Train [2][1320/3239]	Time 0.262 (0.228)	Data 0.001 (0.010)	Loss 6.5523 (6.5360)	Top-1 acc 0.391 (0.418)	Top-5 acc 1.953 (1.911)	lr 0.04985
Warmup Train [2][1330/3239]	Time 0.273 (0.228)	Data 0.001 (0.010)	Loss 6.4759 (6.5357)	Top-1 acc 0.391 (0.420)	Top-5 acc 1.953 (1.911)	lr 0.04985
Warmup Train [2][1340/3239]	Time 0.235 (0.228)	Data 0.001 (0.010)	Loss 6.4899 (6.5355)	Top-1 acc 0.000 (0.420)	Top-5 acc 1.562 (1.912)	lr 0.04985
Warmup Train [2][1350/3239]	Time 0.133 (0.228)	Data 0.001 (0.010)	Loss 6.5133 (6.5354)	Top-1 acc 0.391 (0.423)	Top-5 acc 2.344 (1.913)	lr 0.04985
Warmup Train [2][1360/3239]	Time 0.207 (0.228)	Data 0.001 (0.010)	Loss 6.4763 (6.5352)	Top-1 acc 0.391 (0.423)	Top-5 acc 1.562 (1.914)	lr 0.04984
Warmup Train [2][1370/3239]	Time 0.169 (0.228)	Data 0.001 (0.010)	Loss 6.5195 (6.5350)	Top-1 acc 0.391 (0.424)	Top-5 acc 1.562 (1.918)	lr 0.04984
Warmup Train [2][1380/3239]	Time 0.163 (0.228)	Data 0.001 (0.010)	Loss 6.5092 (6.5348)	Top-1 acc 0.781 (0.423)	Top-5 acc 2.734 (1.915)	lr 0.04984
Warmup Train [2][1390/3239]	Time 0.244 (0.228)	Data 0.001 (0.010)	Loss 6.5258 (6.5345)	Top-1 acc 0.000 (0.422)	Top-5 acc 1.172 (1.917)	lr 0.04984
Warmup Train [2][1400/3239]	Time 0.190 (0.228)	Data 0.001 (0.010)	Loss 6.5503 (6.5344)	Top-1 acc 0.000 (0.423)	Top-5 acc 0.781 (1.920)	lr 0.04984
Warmup Train [2][1410/3239]	Time 0.206 (0.228)	Data 0.001 (0.010)	Loss 6.4889 (6.5342)	Top-1 acc 1.172 (0.424)	Top-5 acc 3.906 (1.925)	lr 0.04984
Warmup Train [2][1420/3239]	Time 0.148 (0.228)	Data 0.001 (0.009)	Loss 6.5001 (6.5341)	Top-1 acc 0.000 (0.424)	Top-5 acc 3.516 (1.928)	lr 0.04984
Warmup Train [2][1430/3239]	Time 0.191 (0.228)	Data 0.001 (0.009)	Loss 6.4731 (6.5339)	Top-1 acc 0.781 (0.424)	Top-5 acc 2.734 (1.929)	lr 0.04984
Warmup Train [2][1440/3239]	Time 0.202 (0.228)	Data 0.001 (0.009)	Loss 6.4955 (6.5336)	Top-1 acc 0.000 (0.426)	Top-5 acc 2.734 (1.935)	lr 0.04984
Warmup Train [2][1450/3239]	Time 0.297 (0.228)	Data 0.001 (0.009)	Loss 6.4801 (6.5334)	Top-1 acc 0.391 (0.427)	Top-5 acc 1.953 (1.936)	lr 0.04984
Warmup Train [2][1460/3239]	Time 0.209 (0.227)	Data 0.001 (0.009)	Loss 6.4701 (6.5331)	Top-1 acc 0.000 (0.428)	Top-5 acc 0.781 (1.939)	lr 0.04984
Warmup Train [2][1470/3239]	Time 0.200 (0.227)	Data 0.001 (0.009)	Loss 6.4455 (6.5328)	Top-1 acc 0.391 (0.429)	Top-5 acc 4.688 (1.941)	lr 0.04984
Warmup Train [2][1480/3239]	Time 0.136 (0.227)	Data 0.001 (0.009)	Loss 6.5485 (6.5326)	Top-1 acc 0.000 (0.429)	Top-5 acc 2.344 (1.945)	lr 0.04984
Warmup Train [2][1490/3239]	Time 0.195 (0.227)	Data 0.001 (0.009)	Loss 6.4949 (6.5324)	Top-1 acc 0.391 (0.428)	Top-5 acc 2.344 (1.943)	lr 0.04984
Warmup Train [2][1500/3239]	Time 0.242 (0.227)	Data 0.002 (0.009)	Loss 6.4813 (6.5321)	Top-1 acc 0.000 (0.429)	Top-5 acc 1.562 (1.945)	lr 0.04984
Warmup Train [2][1510/3239]	Time 0.252 (0.227)	Data 0.001 (0.009)	Loss 6.5130 (6.5319)	Top-1 acc 0.391 (0.429)	Top-5 acc 2.344 (1.947)	lr 0.04983
Warmup Train [2][1520/3239]	Time 0.164 (0.227)	Data 0.001 (0.009)	Loss 6.4907 (6.5317)	Top-1 acc 0.781 (0.430)	Top-5 acc 1.562 (1.949)	lr 0.04983
Warmup Train [2][1530/3239]	Time 0.139 (0.227)	Data 0.001 (0.009)	Loss 6.4789 (6.5314)	Top-1 acc 0.391 (0.432)	Top-5 acc 2.344 (1.953)	lr 0.04983
Warmup Train [2][1540/3239]	Time 0.186 (0.227)	Data 0.001 (0.009)	Loss 6.4437 (6.5312)	Top-1 acc 0.000 (0.432)	Top-5 acc 2.734 (1.954)	lr 0.04983
Warmup Train [2][1550/3239]	Time 0.221 (0.227)	Data 0.001 (0.009)	Loss 6.5506 (6.5312)	Top-1 acc 0.000 (0.431)	Top-5 acc 1.562 (1.955)	lr 0.04983
Warmup Train [2][1560/3239]	Time 0.302 (0.227)	Data 0.001 (0.009)	Loss 6.5032 (6.5309)	Top-1 acc 0.781 (0.432)	Top-5 acc 1.172 (1.959)	lr 0.04983
Warmup Train [2][1570/3239]	Time 0.214 (0.227)	Data 0.001 (0.009)	Loss 6.5103 (6.5307)	Top-1 acc 0.391 (0.433)	Top-5 acc 2.734 (1.959)	lr 0.04983
Warmup Train [2][1580/3239]	Time 0.171 (0.227)	Data 0.001 (0.009)	Loss 6.5109 (6.5305)	Top-1 acc 0.391 (0.434)	Top-5 acc 2.734 (1.958)	lr 0.04983
Warmup Train [2][1590/3239]	Time 0.180 (0.227)	Data 0.001 (0.009)	Loss 6.5096 (6.5301)	Top-1 acc 0.000 (0.433)	Top-5 acc 2.734 (1.962)	lr 0.04983
Warmup Train [2][1600/3239]	Time 0.228 (0.227)	Data 0.001 (0.009)	Loss 6.5209 (6.5299)	Top-1 acc 0.000 (0.433)	Top-5 acc 1.562 (1.964)	lr 0.04983
Warmup Train [2][1610/3239]	Time 0.269 (0.227)	Data 0.002 (0.009)	Loss 6.5313 (6.5297)	Top-1 acc 0.391 (0.435)	Top-5 acc 0.781 (1.965)	lr 0.04983
Warmup Train [2][1620/3239]	Time 0.220 (0.227)	Data 0.001 (0.009)	Loss 6.4640 (6.5294)	Top-1 acc 1.172 (0.435)	Top-5 acc 4.688 (1.964)	lr 0.04983
Warmup Train [2][1630/3239]	Time 0.218 (0.227)	Data 0.001 (0.009)	Loss 6.4943 (6.5292)	Top-1 acc 0.391 (0.436)	Top-5 acc 1.953 (1.967)	lr 0.04983
Warmup Train [2][1640/3239]	Time 0.244 (0.227)	Data 0.001 (0.009)	Loss 6.4897 (6.5290)	Top-1 acc 1.172 (0.437)	Top-5 acc 3.516 (1.969)	lr 0.04983
Warmup Train [2][1650/3239]	Time 0.217 (0.227)	Data 0.002 (0.009)	Loss 6.5010 (6.5288)	Top-1 acc 0.781 (0.437)	Top-5 acc 2.734 (1.971)	lr 0.04982
Warmup Train [2][1660/3239]	Time 0.311 (0.227)	Data 0.001 (0.009)	Loss 6.5452 (6.5285)	Top-1 acc 0.000 (0.439)	Top-5 acc 2.344 (1.975)	lr 0.04982
Warmup Train [2][1670/3239]	Time 0.190 (0.227)	Data 0.001 (0.009)	Loss 6.4988 (6.5284)	Top-1 acc 0.781 (0.440)	Top-5 acc 1.562 (1.977)	lr 0.04982
Warmup Train [2][1680/3239]	Time 0.247 (0.227)	Data 0.001 (0.009)	Loss 6.4613 (6.5282)	Top-1 acc 0.781 (0.441)	Top-5 acc 2.734 (1.978)	lr 0.04982
Warmup Train [2][1690/3239]	Time 0.291 (0.227)	Data 0.001 (0.008)	Loss 6.4753 (6.5279)	Top-1 acc 0.781 (0.442)	Top-5 acc 1.953 (1.980)	lr 0.04982
Warmup Train [2][1700/3239]	Time 0.269 (0.227)	Data 0.001 (0.008)	Loss 6.5110 (6.5278)	Top-1 acc 0.391 (0.442)	Top-5 acc 2.344 (1.982)	lr 0.04982
Warmup Train [2][1710/3239]	Time 0.196 (0.227)	Data 0.001 (0.008)	Loss 6.5003 (6.5276)	Top-1 acc 0.000 (0.442)	Top-5 acc 1.953 (1.980)	lr 0.04982
Warmup Train [2][1720/3239]	Time 0.182 (0.227)	Data 0.001 (0.008)	Loss 6.5365 (6.5274)	Top-1 acc 0.391 (0.442)	Top-5 acc 1.562 (1.985)	lr 0.04982
Warmup Train [2][1730/3239]	Time 0.248 (0.227)	Data 0.001 (0.008)	Loss 6.5112 (6.5271)	Top-1 acc 0.391 (0.443)	Top-5 acc 1.562 (1.986)	lr 0.04982
Warmup Train [2][1740/3239]	Time 0.266 (0.226)	Data 0.001 (0.008)	Loss 6.4877 (6.5270)	Top-1 acc 0.781 (0.444)	Top-5 acc 3.125 (1.986)	lr 0.04982
Warmup Train [2][1750/3239]	Time 0.274 (0.226)	Data 0.001 (0.008)	Loss 6.4788 (6.5267)	Top-1 acc 0.391 (0.446)	Top-5 acc 2.344 (1.989)	lr 0.04982
Warmup Train [2][1760/3239]	Time 0.163 (0.226)	Data 0.001 (0.008)	Loss 6.4728 (6.5265)	Top-1 acc 0.391 (0.446)	Top-5 acc 1.562 (1.994)	lr 0.04982
Warmup Train [2][1770/3239]	Time 0.197 (0.226)	Data 0.001 (0.008)	Loss 6.4249 (6.5262)	Top-1 acc 1.172 (0.446)	Top-5 acc 2.344 (1.997)	lr 0.04982
Warmup Train [2][1780/3239]	Time 0.159 (0.226)	Data 0.001 (0.008)	Loss 6.4592 (6.5259)	Top-1 acc 0.391 (0.448)	Top-5 acc 2.734 (2.002)	lr 0.04982
Warmup Train [2][1790/3239]	Time 0.203 (0.226)	Data 0.001 (0.008)	Loss 6.5171 (6.5256)	Top-1 acc 0.781 (0.450)	Top-5 acc 2.344 (2.006)	lr 0.04981
Warmup Train [2][1800/3239]	Time 0.223 (0.226)	Data 0.001 (0.008)	Loss 6.4932 (6.5254)	Top-1 acc 0.000 (0.449)	Top-5 acc 1.953 (2.008)	lr 0.04981
Warmup Train [2][1810/3239]	Time 0.203 (0.226)	Data 0.001 (0.008)	Loss 6.4775 (6.5251)	Top-1 acc 1.953 (0.452)	Top-5 acc 3.516 (2.012)	lr 0.04981
Warmup Train [2][1820/3239]	Time 0.201 (0.226)	Data 0.001 (0.008)	Loss 6.4274 (6.5249)	Top-1 acc 0.781 (0.453)	Top-5 acc 2.344 (2.016)	lr 0.04981
Warmup Train [2][1830/3239]	Time 0.222 (0.226)	Data 0.001 (0.008)	Loss 6.6078 (6.5248)	Top-1 acc 0.000 (0.453)	Top-5 acc 1.562 (2.018)	lr 0.04981
Warmup Train [2][1840/3239]	Time 0.277 (0.226)	Data 0.001 (0.008)	Loss 6.4869 (6.5246)	Top-1 acc 0.781 (0.453)	Top-5 acc 3.516 (2.018)	lr 0.04981
Warmup Train [2][1850/3239]	Time 0.159 (0.226)	Data 0.001 (0.008)	Loss 6.4967 (6.5244)	Top-1 acc 0.781 (0.452)	Top-5 acc 1.953 (2.019)	lr 0.04981
Warmup Train [2][1860/3239]	Time 0.315 (0.226)	Data 0.001 (0.008)	Loss 6.4502 (6.5242)	Top-1 acc 1.172 (0.454)	Top-5 acc 2.344 (2.023)	lr 0.04981
Warmup Train [2][1870/3239]	Time 0.142 (0.226)	Data 0.001 (0.008)	Loss 6.4112 (6.5240)	Top-1 acc 0.000 (0.454)	Top-5 acc 1.953 (2.023)	lr 0.04981
Warmup Train [2][1880/3239]	Time 0.224 (0.226)	Data 0.001 (0.008)	Loss 6.4344 (6.5238)	Top-1 acc 1.172 (0.456)	Top-5 acc 2.344 (2.029)	lr 0.04981
Warmup Train [2][1890/3239]	Time 0.248 (0.226)	Data 0.001 (0.008)	Loss 6.5131 (6.5237)	Top-1 acc 0.000 (0.455)	Top-5 acc 1.172 (2.026)	lr 0.04981
Warmup Train [2][1900/3239]	Time 0.148 (0.226)	Data 0.001 (0.008)	Loss 6.5209 (6.5234)	Top-1 acc 0.781 (0.457)	Top-5 acc 3.125 (2.029)	lr 0.04981
Warmup Train [2][1910/3239]	Time 0.166 (0.226)	Data 0.001 (0.008)	Loss 6.4368 (6.5231)	Top-1 acc 0.781 (0.458)	Top-5 acc 2.734 (2.033)	lr 0.04981
Warmup Train [2][1920/3239]	Time 0.194 (0.226)	Data 0.001 (0.008)	Loss 6.4588 (6.5229)	Top-1 acc 1.562 (0.459)	Top-5 acc 2.734 (2.035)	lr 0.04980
Warmup Train [2][1930/3239]	Time 0.242 (0.226)	Data 0.001 (0.008)	Loss 6.5367 (6.5228)	Top-1 acc 0.391 (0.459)	Top-5 acc 0.781 (2.034)	lr 0.04980
Warmup Train [2][1940/3239]	Time 0.246 (0.226)	Data 0.001 (0.008)	Loss 6.4677 (6.5225)	Top-1 acc 1.172 (0.460)	Top-5 acc 2.734 (2.036)	lr 0.04980
Warmup Train [2][1950/3239]	Time 0.178 (0.226)	Data 0.001 (0.008)	Loss 6.4995 (6.5224)	Top-1 acc 1.562 (0.461)	Top-5 acc 3.516 (2.039)	lr 0.04980
Warmup Train [2][1960/3239]	Time 0.223 (0.226)	Data 0.001 (0.008)	Loss 6.5106 (6.5223)	Top-1 acc 0.000 (0.462)	Top-5 acc 1.172 (2.040)	lr 0.04980
Warmup Train [2][1970/3239]	Time 0.341 (0.226)	Data 0.001 (0.008)	Loss 6.4404 (6.5221)	Top-1 acc 0.391 (0.462)	Top-5 acc 1.562 (2.040)	lr 0.04980
Warmup Train [2][1980/3239]	Time 0.202 (0.226)	Data 0.001 (0.008)	Loss 6.4938 (6.5218)	Top-1 acc 0.391 (0.463)	Top-5 acc 1.562 (2.045)	lr 0.04980
Warmup Train [2][1990/3239]	Time 0.240 (0.226)	Data 0.001 (0.008)	Loss 6.5122 (6.5216)	Top-1 acc 0.000 (0.464)	Top-5 acc 2.734 (2.048)	lr 0.04980
Warmup Train [2][2000/3239]	Time 0.202 (0.226)	Data 0.001 (0.008)	Loss 6.4622 (6.5214)	Top-1 acc 1.172 (0.465)	Top-5 acc 1.953 (2.049)	lr 0.04980
Warmup Train [2][2010/3239]	Time 0.240 (0.226)	Data 0.001 (0.008)	Loss 6.4323 (6.5211)	Top-1 acc 1.172 (0.466)	Top-5 acc 2.734 (2.049)	lr 0.04980
Warmup Train [2][2020/3239]	Time 0.166 (0.226)	Data 0.001 (0.008)	Loss 6.4423 (6.5209)	Top-1 acc 0.781 (0.467)	Top-5 acc 2.734 (2.051)	lr 0.04980
Warmup Train [2][2030/3239]	Time 0.188 (0.225)	Data 0.001 (0.008)	Loss 6.4793 (6.5207)	Top-1 acc 1.562 (0.467)	Top-5 acc 2.734 (2.050)	lr 0.04980
Warmup Train [2][2040/3239]	Time 0.179 (0.225)	Data 0.002 (0.008)	Loss 6.4953 (6.5205)	Top-1 acc 0.391 (0.468)	Top-5 acc 1.953 (2.055)	lr 0.04980
Warmup Train [2][2050/3239]	Time 0.200 (0.225)	Data 0.001 (0.008)	Loss 6.5096 (6.5203)	Top-1 acc 0.781 (0.469)	Top-5 acc 3.516 (2.058)	lr 0.04979
Warmup Train [2][2060/3239]	Time 0.281 (0.225)	Data 0.001 (0.008)	Loss 6.5290 (6.5200)	Top-1 acc 0.000 (0.468)	Top-5 acc 1.172 (2.062)	lr 0.04979
Warmup Train [2][2070/3239]	Time 0.129 (0.225)	Data 0.001 (0.008)	Loss 6.4555 (6.5198)	Top-1 acc 0.391 (0.468)	Top-5 acc 2.734 (2.064)	lr 0.04979
Warmup Train [2][2080/3239]	Time 0.309 (0.225)	Data 0.001 (0.008)	Loss 6.4904 (6.5196)	Top-1 acc 0.000 (0.467)	Top-5 acc 2.734 (2.064)	lr 0.04979
Warmup Train [2][2090/3239]	Time 0.236 (0.225)	Data 0.001 (0.007)	Loss 6.4655 (6.5194)	Top-1 acc 0.781 (0.466)	Top-5 acc 4.688 (2.065)	lr 0.04979
Warmup Train [2][2100/3239]	Time 0.212 (0.225)	Data 0.001 (0.007)	Loss 6.5178 (6.5192)	Top-1 acc 1.562 (0.468)	Top-5 acc 2.344 (2.069)	lr 0.04979
Warmup Train [2][2110/3239]	Time 0.244 (0.225)	Data 0.001 (0.007)	Loss 6.4810 (6.5190)	Top-1 acc 0.000 (0.467)	Top-5 acc 1.953 (2.071)	lr 0.04979
Warmup Train [2][2120/3239]	Time 0.205 (0.225)	Data 0.001 (0.007)	Loss 6.5847 (6.5189)	Top-1 acc 0.391 (0.467)	Top-5 acc 1.953 (2.075)	lr 0.04979
Warmup Train [2][2130/3239]	Time 0.192 (0.225)	Data 0.001 (0.007)	Loss 6.5113 (6.5188)	Top-1 acc 0.781 (0.468)	Top-5 acc 3.516 (2.080)	lr 0.04979
Warmup Train [2][2140/3239]	Time 0.226 (0.225)	Data 0.001 (0.007)	Loss 6.5604 (6.5186)	Top-1 acc 0.000 (0.468)	Top-5 acc 1.172 (2.084)	lr 0.04979
Warmup Train [2][2150/3239]	Time 0.206 (0.225)	Data 0.001 (0.007)	Loss 6.5395 (6.5184)	Top-1 acc 0.000 (0.468)	Top-5 acc 1.953 (2.085)	lr 0.04979
Warmup Train [2][2160/3239]	Time 0.192 (0.225)	Data 0.001 (0.007)	Loss 6.4431 (6.5181)	Top-1 acc 0.000 (0.468)	Top-5 acc 5.078 (2.090)	lr 0.04979
Warmup Train [2][2170/3239]	Time 0.311 (0.225)	Data 0.002 (0.007)	Loss 6.4320 (6.5178)	Top-1 acc 0.781 (0.469)	Top-5 acc 2.734 (2.093)	lr 0.04979
Warmup Train [2][2180/3239]	Time 0.245 (0.225)	Data 0.001 (0.007)	Loss 6.4861 (6.5177)	Top-1 acc 0.000 (0.467)	Top-5 acc 1.953 (2.096)	lr 0.04978
Warmup Train [2][2190/3239]	Time 0.334 (0.225)	Data 0.001 (0.007)	Loss 6.4616 (6.5175)	Top-1 acc 0.781 (0.468)	Top-5 acc 2.344 (2.098)	lr 0.04978
Warmup Train [2][2200/3239]	Time 0.224 (0.225)	Data 0.001 (0.007)	Loss 6.5457 (6.5174)	Top-1 acc 0.781 (0.469)	Top-5 acc 1.953 (2.101)	lr 0.04978
Warmup Train [2][2210/3239]	Time 0.159 (0.225)	Data 0.001 (0.007)	Loss 6.4324 (6.5171)	Top-1 acc 0.781 (0.469)	Top-5 acc 2.344 (2.102)	lr 0.04978
Warmup Train [2][2220/3239]	Time 0.279 (0.225)	Data 0.001 (0.007)	Loss 6.4408 (6.5170)	Top-1 acc 0.000 (0.469)	Top-5 acc 1.172 (2.102)	lr 0.04978
Warmup Train [2][2230/3239]	Time 0.205 (0.225)	Data 0.002 (0.007)	Loss 6.5008 (6.5168)	Top-1 acc 0.000 (0.470)	Top-5 acc 1.953 (2.103)	lr 0.04978
Warmup Train [2][2240/3239]	Time 0.189 (0.225)	Data 0.001 (0.007)	Loss 6.4565 (6.5166)	Top-1 acc 0.391 (0.470)	Top-5 acc 1.562 (2.105)	lr 0.04978
Warmup Train [2][2250/3239]	Time 0.173 (0.225)	Data 0.001 (0.007)	Loss 6.5382 (6.5165)	Top-1 acc 0.781 (0.470)	Top-5 acc 2.734 (2.105)	lr 0.04978
Warmup Train [2][2260/3239]	Time 0.215 (0.225)	Data 0.002 (0.007)	Loss 6.4673 (6.5162)	Top-1 acc 0.391 (0.470)	Top-5 acc 2.734 (2.108)	lr 0.04978
Warmup Train [2][2270/3239]	Time 0.209 (0.225)	Data 0.001 (0.007)	Loss 6.4747 (6.5161)	Top-1 acc 0.000 (0.471)	Top-5 acc 2.344 (2.111)	lr 0.04978
Warmup Train [2][2280/3239]	Time 0.149 (0.225)	Data 0.001 (0.007)	Loss 6.4463 (6.5159)	Top-1 acc 1.172 (0.471)	Top-5 acc 5.078 (2.112)	lr 0.04978
Warmup Train [2][2290/3239]	Time 0.233 (0.225)	Data 0.001 (0.007)	Loss 6.4643 (6.5157)	Top-1 acc 0.391 (0.472)	Top-5 acc 2.734 (2.114)	lr 0.04978
Warmup Train [2][2300/3239]	Time 0.342 (0.225)	Data 0.001 (0.007)	Loss 6.5203 (6.5155)	Top-1 acc 0.391 (0.472)	Top-5 acc 2.344 (2.113)	lr 0.04977
Warmup Train [2][2310/3239]	Time 0.131 (0.225)	Data 0.001 (0.007)	Loss 6.4167 (6.5152)	Top-1 acc 1.172 (0.473)	Top-5 acc 2.344 (2.116)	lr 0.04977
Warmup Train [2][2320/3239]	Time 0.223 (0.225)	Data 0.001 (0.007)	Loss 6.4601 (6.5151)	Top-1 acc 1.172 (0.474)	Top-5 acc 3.906 (2.117)	lr 0.04977
Warmup Train [2][2330/3239]	Time 0.181 (0.225)	Data 0.001 (0.007)	Loss 6.4921 (6.5148)	Top-1 acc 0.781 (0.475)	Top-5 acc 2.734 (2.122)	lr 0.04977
Warmup Train [2][2340/3239]	Time 0.180 (0.225)	Data 0.001 (0.007)	Loss 6.4391 (6.5146)	Top-1 acc 0.391 (0.475)	Top-5 acc 3.516 (2.124)	lr 0.04977
Warmup Train [2][2350/3239]	Time 0.224 (0.225)	Data 0.001 (0.007)	Loss 6.4184 (6.5144)	Top-1 acc 1.562 (0.477)	Top-5 acc 3.516 (2.126)	lr 0.04977
Warmup Train [2][2360/3239]	Time 0.248 (0.225)	Data 0.001 (0.007)	Loss 6.4371 (6.5142)	Top-1 acc 0.781 (0.477)	Top-5 acc 3.906 (2.128)	lr 0.04977
Warmup Train [2][2370/3239]	Time 0.213 (0.225)	Data 0.001 (0.007)	Loss 6.4278 (6.5139)	Top-1 acc 1.172 (0.478)	Top-5 acc 2.734 (2.130)	lr 0.04977
Warmup Train [2][2380/3239]	Time 0.142 (0.225)	Data 0.001 (0.007)	Loss 6.4903 (6.5138)	Top-1 acc 0.781 (0.478)	Top-5 acc 1.953 (2.131)	lr 0.04977
Warmup Train [2][2390/3239]	Time 0.233 (0.225)	Data 0.001 (0.007)	Loss 6.4413 (6.5136)	Top-1 acc 0.391 (0.479)	Top-5 acc 1.562 (2.132)	lr 0.04977
Warmup Train [2][2400/3239]	Time 0.201 (0.225)	Data 0.001 (0.007)	Loss 6.4712 (6.5134)	Top-1 acc 1.172 (0.480)	Top-5 acc 2.734 (2.134)	lr 0.04977
Warmup Train [2][2410/3239]	Time 0.207 (0.224)	Data 0.001 (0.007)	Loss 6.4685 (6.5133)	Top-1 acc 0.391 (0.482)	Top-5 acc 2.344 (2.139)	lr 0.04977
Warmup Train [2][2420/3239]	Time 0.277 (0.225)	Data 0.001 (0.007)	Loss 6.4821 (6.5131)	Top-1 acc 0.391 (0.483)	Top-5 acc 1.172 (2.140)	lr 0.04977
Warmup Train [2][2430/3239]	Time 0.181 (0.225)	Data 0.002 (0.007)	Loss 6.4267 (6.5129)	Top-1 acc 0.781 (0.483)	Top-5 acc 3.125 (2.143)	lr 0.04976
Warmup Train [2][2440/3239]	Time 0.222 (0.225)	Data 0.001 (0.007)	Loss 6.5006 (6.5127)	Top-1 acc 0.781 (0.483)	Top-5 acc 1.953 (2.143)	lr 0.04976
Warmup Train [2][2450/3239]	Time 0.222 (0.225)	Data 0.001 (0.007)	Loss 6.4638 (6.5125)	Top-1 acc 0.781 (0.484)	Top-5 acc 2.734 (2.146)	lr 0.04976
Warmup Train [2][2460/3239]	Time 0.265 (0.225)	Data 0.001 (0.007)	Loss 6.4406 (6.5123)	Top-1 acc 0.391 (0.484)	Top-5 acc 1.562 (2.150)	lr 0.04976
Warmup Train [2][2470/3239]	Time 0.201 (0.224)	Data 0.001 (0.007)	Loss 6.4139 (6.5120)	Top-1 acc 0.391 (0.485)	Top-5 acc 3.125 (2.154)	lr 0.04976
Warmup Train [2][2480/3239]	Time 0.221 (0.224)	Data 0.001 (0.007)	Loss 6.5285 (6.5119)	Top-1 acc 0.781 (0.487)	Top-5 acc 1.953 (2.158)	lr 0.04976
Warmup Train [2][2490/3239]	Time 0.211 (0.224)	Data 0.001 (0.007)	Loss 6.4157 (6.5117)	Top-1 acc 1.172 (0.487)	Top-5 acc 2.344 (2.160)	lr 0.04976
Warmup Train [2][2500/3239]	Time 0.253 (0.224)	Data 0.001 (0.007)	Loss 6.4724 (6.5116)	Top-1 acc 0.000 (0.486)	Top-5 acc 1.562 (2.160)	lr 0.04976
Warmup Train [2][2510/3239]	Time 0.124 (0.224)	Data 0.001 (0.007)	Loss 6.4499 (6.5114)	Top-1 acc 0.391 (0.487)	Top-5 acc 2.734 (2.160)	lr 0.04976
Warmup Train [2][2520/3239]	Time 0.304 (0.224)	Data 0.001 (0.007)	Loss 6.5088 (6.5112)	Top-1 acc 0.781 (0.488)	Top-5 acc 2.344 (2.162)	lr 0.04976
Warmup Train [2][2530/3239]	Time 0.154 (0.224)	Data 0.001 (0.007)	Loss 6.4905 (6.5111)	Top-1 acc 0.000 (0.489)	Top-5 acc 3.516 (2.163)	lr 0.04976
Warmup Train [2][2540/3239]	Time 0.168 (0.224)	Data 0.001 (0.007)	Loss 6.5225 (6.5109)	Top-1 acc 0.781 (0.490)	Top-5 acc 2.734 (2.166)	lr 0.04976
Warmup Train [2][2550/3239]	Time 0.137 (0.224)	Data 0.002 (0.007)	Loss 6.4984 (6.5107)	Top-1 acc 0.781 (0.492)	Top-5 acc 3.906 (2.171)	lr 0.04975
Warmup Train [2][2560/3239]	Time 0.240 (0.224)	Data 0.001 (0.007)	Loss 6.5348 (6.5106)	Top-1 acc 0.000 (0.492)	Top-5 acc 1.953 (2.172)	lr 0.04975
Warmup Train [2][2570/3239]	Time 0.238 (0.224)	Data 0.001 (0.007)	Loss 6.4319 (6.5103)	Top-1 acc 1.172 (0.493)	Top-5 acc 2.734 (2.175)	lr 0.04975
Warmup Train [2][2580/3239]	Time 0.201 (0.224)	Data 0.001 (0.007)	Loss 6.4493 (6.5101)	Top-1 acc 0.781 (0.494)	Top-5 acc 4.297 (2.179)	lr 0.04975
Warmup Train [2][2590/3239]	Time 0.224 (0.224)	Data 0.001 (0.007)	Loss 6.4322 (6.5098)	Top-1 acc 0.391 (0.495)	Top-5 acc 4.297 (2.182)	lr 0.04975
Warmup Train [2][2600/3239]	Time 0.195 (0.224)	Data 0.002 (0.007)	Loss 6.4669 (6.5096)	Top-1 acc 0.000 (0.495)	Top-5 acc 2.344 (2.184)	lr 0.04975
Warmup Train [2][2610/3239]	Time 0.201 (0.224)	Data 0.001 (0.007)	Loss 6.4493 (6.5094)	Top-1 acc 0.000 (0.495)	Top-5 acc 1.953 (2.186)	lr 0.04975
Warmup Train [2][2620/3239]	Time 0.337 (0.224)	Data 0.001 (0.007)	Loss 6.4717 (6.5093)	Top-1 acc 0.391 (0.496)	Top-5 acc 1.562 (2.186)	lr 0.04975
Warmup Train [2][2630/3239]	Time 0.197 (0.224)	Data 0.001 (0.007)	Loss 6.4764 (6.5091)	Top-1 acc 0.391 (0.497)	Top-5 acc 1.562 (2.188)	lr 0.04975
Warmup Train [2][2640/3239]	Time 0.138 (0.224)	Data 0.001 (0.007)	Loss 6.4823 (6.5089)	Top-1 acc 0.000 (0.496)	Top-5 acc 0.391 (2.189)	lr 0.04975
Warmup Train [2][2650/3239]	Time 0.259 (0.224)	Data 0.001 (0.007)	Loss 6.4807 (6.5087)	Top-1 acc 0.000 (0.498)	Top-5 acc 1.562 (2.191)	lr 0.04975
Warmup Train [2][2660/3239]	Time 0.177 (0.224)	Data 0.001 (0.007)	Loss 6.5081 (6.5086)	Top-1 acc 0.781 (0.497)	Top-5 acc 2.734 (2.194)	lr 0.04974
Warmup Train [2][2670/3239]	Time 0.297 (0.224)	Data 0.003 (0.007)	Loss 6.4960 (6.5085)	Top-1 acc 0.391 (0.497)	Top-5 acc 1.562 (2.194)	lr 0.04974
Warmup Train [2][2680/3239]	Time 0.206 (0.224)	Data 0.001 (0.007)	Loss 6.4244 (6.5083)	Top-1 acc 0.781 (0.497)	Top-5 acc 3.906 (2.196)	lr 0.04974
Warmup Train [2][2690/3239]	Time 0.212 (0.224)	Data 0.001 (0.006)	Loss 6.4218 (6.5081)	Top-1 acc 0.391 (0.497)	Top-5 acc 4.688 (2.199)	lr 0.04974
Warmup Train [2][2700/3239]	Time 0.284 (0.224)	Data 0.001 (0.006)	Loss 6.4386 (6.5080)	Top-1 acc 1.172 (0.497)	Top-5 acc 4.297 (2.201)	lr 0.04974
Warmup Train [2][2710/3239]	Time 0.224 (0.224)	Data 0.001 (0.006)	Loss 6.4785 (6.5078)	Top-1 acc 0.391 (0.498)	Top-5 acc 2.734 (2.204)	lr 0.04974
Warmup Train [2][2720/3239]	Time 0.204 (0.224)	Data 0.001 (0.006)	Loss 6.4651 (6.5076)	Top-1 acc 0.781 (0.500)	Top-5 acc 5.078 (2.207)	lr 0.04974
Warmup Train [2][2730/3239]	Time 0.185 (0.224)	Data 0.002 (0.006)	Loss 6.4972 (6.5074)	Top-1 acc 0.391 (0.500)	Top-5 acc 1.953 (2.209)	lr 0.04974
Warmup Train [2][2740/3239]	Time 0.272 (0.224)	Data 0.001 (0.006)	Loss 6.3674 (6.5072)	Top-1 acc 1.172 (0.500)	Top-5 acc 5.469 (2.212)	lr 0.04974
Warmup Train [2][2750/3239]	Time 0.152 (0.224)	Data 0.001 (0.006)	Loss 6.4859 (6.5070)	Top-1 acc 0.781 (0.500)	Top-5 acc 1.953 (2.212)	lr 0.04974
Warmup Train [2][2760/3239]	Time 0.253 (0.224)	Data 0.001 (0.006)	Loss 6.4608 (6.5068)	Top-1 acc 0.781 (0.502)	Top-5 acc 1.953 (2.216)	lr 0.04974
Warmup Train [2][2770/3239]	Time 0.231 (0.224)	Data 0.001 (0.006)	Loss 6.4167 (6.5066)	Top-1 acc 2.344 (0.504)	Top-5 acc 4.688 (2.219)	lr 0.04974
Warmup Train [2][2780/3239]	Time 0.179 (0.224)	Data 0.001 (0.006)	Loss 6.5301 (6.5065)	Top-1 acc 0.391 (0.504)	Top-5 acc 2.344 (2.221)	lr 0.04973
Warmup Train [2][2790/3239]	Time 0.222 (0.224)	Data 0.001 (0.006)	Loss 6.4623 (6.5063)	Top-1 acc 0.000 (0.503)	Top-5 acc 2.344 (2.222)	lr 0.04973
Warmup Train [2][2800/3239]	Time 0.234 (0.224)	Data 0.001 (0.006)	Loss 6.4372 (6.5061)	Top-1 acc 0.391 (0.503)	Top-5 acc 2.734 (2.224)	lr 0.04973
Warmup Train [2][2810/3239]	Time 0.184 (0.224)	Data 0.001 (0.006)	Loss 6.4821 (6.5060)	Top-1 acc 0.391 (0.502)	Top-5 acc 2.734 (2.225)	lr 0.04973
Warmup Train [2][2820/3239]	Time 0.256 (0.224)	Data 0.001 (0.006)	Loss 6.4415 (6.5057)	Top-1 acc 0.000 (0.504)	Top-5 acc 1.172 (2.229)	lr 0.04973
Warmup Train [2][2830/3239]	Time 0.196 (0.224)	Data 0.001 (0.006)	Loss 6.4934 (6.5055)	Top-1 acc 0.000 (0.504)	Top-5 acc 0.781 (2.233)	lr 0.04973
Warmup Train [2][2840/3239]	Time 0.197 (0.224)	Data 0.001 (0.006)	Loss 6.4333 (6.5054)	Top-1 acc 1.562 (0.505)	Top-5 acc 3.516 (2.235)	lr 0.04973
Warmup Train [2][2850/3239]	Time 0.307 (0.224)	Data 0.001 (0.006)	Loss 6.4513 (6.5051)	Top-1 acc 0.000 (0.505)	Top-5 acc 1.953 (2.238)	lr 0.04973
Warmup Train [2][2860/3239]	Time 0.289 (0.224)	Data 0.001 (0.006)	Loss 6.4756 (6.5050)	Top-1 acc 0.391 (0.505)	Top-5 acc 4.297 (2.238)	lr 0.04973
Warmup Train [2][2870/3239]	Time 0.212 (0.224)	Data 0.001 (0.006)	Loss 6.4895 (6.5049)	Top-1 acc 0.781 (0.506)	Top-5 acc 1.953 (2.239)	lr 0.04973
Warmup Train [2][2880/3239]	Time 0.187 (0.224)	Data 0.001 (0.006)	Loss 6.4105 (6.5047)	Top-1 acc 0.391 (0.506)	Top-5 acc 3.125 (2.240)	lr 0.04973
Warmup Train [2][2890/3239]	Time 0.216 (0.224)	Data 0.001 (0.006)	Loss 6.4004 (6.5045)	Top-1 acc 0.391 (0.506)	Top-5 acc 1.953 (2.241)	lr 0.04972
Warmup Train [2][2900/3239]	Time 0.162 (0.224)	Data 0.001 (0.006)	Loss 6.4494 (6.5044)	Top-1 acc 0.781 (0.507)	Top-5 acc 2.734 (2.242)	lr 0.04972
Warmup Train [2][2910/3239]	Time 0.130 (0.224)	Data 0.002 (0.006)	Loss 6.4565 (6.5043)	Top-1 acc 0.000 (0.507)	Top-5 acc 1.953 (2.244)	lr 0.04972
Warmup Train [2][2920/3239]	Time 0.144 (0.224)	Data 0.001 (0.006)	Loss 6.4352 (6.5040)	Top-1 acc 0.781 (0.507)	Top-5 acc 3.125 (2.246)	lr 0.04972
Warmup Train [2][2930/3239]	Time 0.200 (0.224)	Data 0.001 (0.006)	Loss 6.4626 (6.5038)	Top-1 acc 0.781 (0.507)	Top-5 acc 2.734 (2.248)	lr 0.04972
Warmup Train [2][2940/3239]	Time 0.237 (0.224)	Data 0.001 (0.006)	Loss 6.3948 (6.5037)	Top-1 acc 1.562 (0.509)	Top-5 acc 3.516 (2.250)	lr 0.04972
Warmup Train [2][2950/3239]	Time 0.225 (0.224)	Data 0.001 (0.006)	Loss 6.4642 (6.5035)	Top-1 acc 0.391 (0.509)	Top-5 acc 3.125 (2.252)	lr 0.04972
Warmup Train [2][2960/3239]	Time 0.283 (0.224)	Data 0.001 (0.006)	Loss 6.4877 (6.5033)	Top-1 acc 0.391 (0.509)	Top-5 acc 2.344 (2.254)	lr 0.04972
Warmup Train [2][2970/3239]	Time 0.218 (0.224)	Data 0.001 (0.006)	Loss 6.5015 (6.5031)	Top-1 acc 0.391 (0.509)	Top-5 acc 2.734 (2.256)	lr 0.04972
Warmup Train [2][2980/3239]	Time 0.385 (0.224)	Data 0.001 (0.006)	Loss 6.4227 (6.5030)	Top-1 acc 0.391 (0.509)	Top-5 acc 3.516 (2.257)	lr 0.04972
Warmup Train [2][2990/3239]	Time 0.225 (0.224)	Data 0.001 (0.006)	Loss 6.4467 (6.5028)	Top-1 acc 0.391 (0.510)	Top-5 acc 2.344 (2.259)	lr 0.04972
Warmup Train [2][3000/3239]	Time 0.183 (0.224)	Data 0.001 (0.006)	Loss 6.4368 (6.5026)	Top-1 acc 1.172 (0.511)	Top-5 acc 2.344 (2.260)	lr 0.04971
Warmup Train [2][3010/3239]	Time 0.219 (0.224)	Data 0.001 (0.006)	Loss 6.4620 (6.5023)	Top-1 acc 0.000 (0.511)	Top-5 acc 2.344 (2.262)	lr 0.04971
Warmup Train [2][3020/3239]	Time 0.196 (0.224)	Data 0.001 (0.006)	Loss 6.4140 (6.5021)	Top-1 acc 0.781 (0.513)	Top-5 acc 2.344 (2.265)	lr 0.04971
Warmup Train [2][3030/3239]	Time 0.206 (0.224)	Data 0.001 (0.006)	Loss 6.4540 (6.5019)	Top-1 acc 1.562 (0.513)	Top-5 acc 2.734 (2.265)	lr 0.04971
Warmup Train [2][3040/3239]	Time 0.193 (0.224)	Data 0.001 (0.006)	Loss 6.4620 (6.5017)	Top-1 acc 1.172 (0.514)	Top-5 acc 1.953 (2.267)	lr 0.04971
Warmup Train [2][3050/3239]	Time 0.271 (0.224)	Data 0.001 (0.006)	Loss 6.4034 (6.5015)	Top-1 acc 1.953 (0.515)	Top-5 acc 4.688 (2.270)	lr 0.04971
Warmup Train [2][3060/3239]	Time 0.216 (0.224)	Data 0.001 (0.006)	Loss 6.4904 (6.5013)	Top-1 acc 0.000 (0.515)	Top-5 acc 2.734 (2.272)	lr 0.04971
Warmup Train [2][3070/3239]	Time 0.215 (0.224)	Data 0.001 (0.006)	Loss 6.4758 (6.5012)	Top-1 acc 0.000 (0.515)	Top-5 acc 1.562 (2.272)	lr 0.04971
Warmup Train [2][3080/3239]	Time 0.256 (0.224)	Data 0.001 (0.006)	Loss 6.4177 (6.5010)	Top-1 acc 1.172 (0.515)	Top-5 acc 3.516 (2.275)	lr 0.04971
Warmup Train [2][3090/3239]	Time 0.200 (0.224)	Data 0.001 (0.006)	Loss 6.4912 (6.5008)	Top-1 acc 0.781 (0.515)	Top-5 acc 1.953 (2.277)	lr 0.04971
Warmup Train [2][3100/3239]	Time 0.186 (0.224)	Data 0.001 (0.006)	Loss 6.4622 (6.5006)	Top-1 acc 0.000 (0.516)	Top-5 acc 3.125 (2.278)	lr 0.04971
Warmup Train [2][3110/3239]	Time 0.227 (0.224)	Data 0.001 (0.006)	Loss 6.4595 (6.5004)	Top-1 acc 0.391 (0.517)	Top-5 acc 3.125 (2.281)	lr 0.04970
Warmup Train [2][3120/3239]	Time 0.201 (0.223)	Data 0.001 (0.006)	Loss 6.4209 (6.5003)	Top-1 acc 0.391 (0.517)	Top-5 acc 1.953 (2.282)	lr 0.04970
Warmup Train [2][3130/3239]	Time 0.194 (0.223)	Data 0.001 (0.006)	Loss 6.5426 (6.5002)	Top-1 acc 0.391 (0.517)	Top-5 acc 0.781 (2.282)	lr 0.04970
Warmup Train [2][3140/3239]	Time 0.137 (0.223)	Data 0.001 (0.006)	Loss 6.4604 (6.5001)	Top-1 acc 0.781 (0.517)	Top-5 acc 4.688 (2.284)	lr 0.04970
Warmup Train [2][3150/3239]	Time 0.203 (0.223)	Data 0.001 (0.006)	Loss 6.4193 (6.4999)	Top-1 acc 0.391 (0.518)	Top-5 acc 2.734 (2.287)	lr 0.04970
Warmup Train [2][3160/3239]	Time 0.196 (0.223)	Data 0.001 (0.006)	Loss 6.4189 (6.4997)	Top-1 acc 0.391 (0.519)	Top-5 acc 2.734 (2.290)	lr 0.04970
Warmup Train [2][3170/3239]	Time 0.293 (0.223)	Data 0.001 (0.006)	Loss 6.4625 (6.4995)	Top-1 acc 0.000 (0.519)	Top-5 acc 4.688 (2.294)	lr 0.04970
Warmup Train [2][3180/3239]	Time 0.146 (0.223)	Data 0.000 (0.006)	Loss 6.4260 (6.4993)	Top-1 acc 1.562 (0.520)	Top-5 acc 2.734 (2.295)	lr 0.04970
Warmup Train [2][3190/3239]	Time 0.207 (0.223)	Data 0.000 (0.006)	Loss 6.4451 (6.4991)	Top-1 acc 0.000 (0.520)	Top-5 acc 3.906 (2.297)	lr 0.04970
Warmup Train [2][3200/3239]	Time 0.136 (0.223)	Data 0.000 (0.006)	Loss 6.4455 (6.4989)	Top-1 acc 0.000 (0.520)	Top-5 acc 2.344 (2.299)	lr 0.04970
Warmup Train [2][3210/3239]	Time 0.175 (0.223)	Data 0.000 (0.006)	Loss 6.3751 (6.4987)	Top-1 acc 0.391 (0.520)	Top-5 acc 2.734 (2.302)	lr 0.04970
Warmup Train [2][3220/3239]	Time 0.186 (0.223)	Data 0.000 (0.006)	Loss 6.4313 (6.4985)	Top-1 acc 0.781 (0.521)	Top-5 acc 4.688 (2.304)	lr 0.04969
Warmup Train [2][3230/3239]	Time 0.235 (0.223)	Data 0.000 (0.006)	Loss 6.4611 (6.4982)	Top-1 acc 1.172 (0.521)	Top-5 acc 4.688 (2.307)	lr 0.04969
Warmup Train [2][3239/3239]	Time 0.485 (0.223)	Data 0.000 (0.006)	Loss 6.4857 (6.4981)	Top-1 acc 0.000 (0.522)	Top-5 acc 0.000 (2.309)	lr 0.04969
==========Warmup Valid [2/40]	loss 6.269	top-1 acc 0.783	top-5 acc 3.326	Train top-1 0.522	top-5 2.309	flops: 442.4M
Warmup Train [3][0/3239]	Time 10.768 (10.768)	Data 9.432 (9.432)	Loss 6.4223 (6.4223)	Top-1 acc 0.000 (0.000)	Top-5 acc 2.344 (2.344)	lr 0.04969
Warmup Train [3][10/3239]	Time 0.313 (1.257)	Data 0.002 (0.859)	Loss 6.4318 (6.4426)	Top-1 acc 0.391 (0.568)	Top-5 acc 2.344 (2.734)	lr 0.04969
Warmup Train [3][20/3239]	Time 0.269 (0.794)	Data 0.001 (0.451)	Loss 6.4306 (6.4432)	Top-1 acc 0.781 (0.781)	Top-5 acc 3.125 (2.734)	lr 0.04969
Warmup Train [3][30/3239]	Time 0.215 (0.605)	Data 0.001 (0.307)	Loss 6.4084 (6.4330)	Top-1 acc 0.391 (0.832)	Top-5 acc 2.344 (2.961)	lr 0.04969
Warmup Train [3][40/3239]	Time 0.242 (0.522)	Data 0.001 (0.234)	Loss 6.4693 (6.4366)	Top-1 acc 1.172 (0.762)	Top-5 acc 3.125 (2.820)	lr 0.04969
Warmup Train [3][50/3239]	Time 0.227 (0.468)	Data 0.001 (0.188)	Loss 6.3745 (6.4374)	Top-1 acc 1.172 (0.774)	Top-5 acc 2.734 (2.796)	lr 0.04969
Warmup Train [3][60/3239]	Time 0.269 (0.429)	Data 0.001 (0.157)	Loss 6.4471 (6.4336)	Top-1 acc 0.391 (0.724)	Top-5 acc 1.953 (2.888)	lr 0.04969
Warmup Train [3][70/3239]	Time 0.197 (0.399)	Data 0.001 (0.136)	Loss 6.4616 (6.4355)	Top-1 acc 0.391 (0.688)	Top-5 acc 2.734 (2.855)	lr 0.04969
Warmup Train [3][80/3239]	Time 0.230 (0.378)	Data 0.001 (0.119)	Loss 6.4164 (6.4358)	Top-1 acc 1.953 (0.685)	Top-5 acc 5.859 (2.918)	lr 0.04968
Warmup Train [3][90/3239]	Time 0.220 (0.361)	Data 0.001 (0.106)	Loss 6.3780 (6.4338)	Top-1 acc 1.172 (0.704)	Top-5 acc 3.906 (2.966)	lr 0.04968
Warmup Train [3][100/3239]	Time 0.325 (0.348)	Data 0.001 (0.096)	Loss 6.3981 (6.4338)	Top-1 acc 1.172 (0.708)	Top-5 acc 5.078 (3.017)	lr 0.04968
Warmup Train [3][110/3239]	Time 0.175 (0.336)	Data 0.001 (0.088)	Loss 6.4119 (6.4336)	Top-1 acc 0.781 (0.690)	Top-5 acc 4.297 (3.065)	lr 0.04968
Warmup Train [3][120/3239]	Time 0.179 (0.327)	Data 0.002 (0.081)	Loss 6.4873 (6.4347)	Top-1 acc 0.781 (0.684)	Top-5 acc 1.953 (2.999)	lr 0.04968
Warmup Train [3][130/3239]	Time 0.212 (0.319)	Data 0.002 (0.075)	Loss 6.4727 (6.4346)	Top-1 acc 1.172 (0.665)	Top-5 acc 3.906 (3.012)	lr 0.04968
Warmup Train [3][140/3239]	Time 0.195 (0.312)	Data 0.001 (0.070)	Loss 6.4896 (6.4343)	Top-1 acc 1.172 (0.668)	Top-5 acc 2.734 (3.039)	lr 0.04968
Warmup Train [3][150/3239]	Time 0.233 (0.306)	Data 0.001 (0.065)	Loss 6.3955 (6.4341)	Top-1 acc 0.781 (0.678)	Top-5 acc 3.906 (3.034)	lr 0.04968
Warmup Train [3][160/3239]	Time 0.262 (0.301)	Data 0.001 (0.061)	Loss 6.3811 (6.4346)	Top-1 acc 1.172 (0.704)	Top-5 acc 5.469 (3.084)	lr 0.04968
Warmup Train [3][170/3239]	Time 0.223 (0.296)	Data 0.001 (0.058)	Loss 6.4455 (6.4346)	Top-1 acc 0.391 (0.699)	Top-5 acc 3.125 (3.095)	lr 0.04968
Warmup Train [3][180/3239]	Time 0.206 (0.291)	Data 0.001 (0.055)	Loss 6.4177 (6.4355)	Top-1 acc 0.391 (0.704)	Top-5 acc 2.344 (3.099)	lr 0.04967
Warmup Train [3][190/3239]	Time 0.228 (0.288)	Data 0.002 (0.052)	Loss 6.4476 (6.4363)	Top-1 acc 0.781 (0.708)	Top-5 acc 2.344 (3.094)	lr 0.04967
Warmup Train [3][200/3239]	Time 0.209 (0.285)	Data 0.001 (0.050)	Loss 6.3855 (6.4351)	Top-1 acc 0.781 (0.723)	Top-5 acc 3.125 (3.092)	lr 0.04967
Warmup Train [3][210/3239]	Time 0.168 (0.282)	Data 0.001 (0.047)	Loss 6.4201 (6.4342)	Top-1 acc 0.781 (0.720)	Top-5 acc 2.344 (3.086)	lr 0.04967
Warmup Train [3][220/3239]	Time 0.256 (0.279)	Data 0.001 (0.046)	Loss 6.4484 (6.4343)	Top-1 acc 0.781 (0.716)	Top-5 acc 3.516 (3.068)	lr 0.04967
Warmup Train [3][230/3239]	Time 0.200 (0.277)	Data 0.001 (0.044)	Loss 6.3599 (6.4335)	Top-1 acc 1.172 (0.707)	Top-5 acc 3.125 (3.046)	lr 0.04967
Warmup Train [3][240/3239]	Time 0.179 (0.274)	Data 0.001 (0.042)	Loss 6.4342 (6.4328)	Top-1 acc 0.781 (0.705)	Top-5 acc 3.125 (3.046)	lr 0.04967
Warmup Train [3][250/3239]	Time 0.191 (0.272)	Data 0.002 (0.040)	Loss 6.3679 (6.4327)	Top-1 acc 0.781 (0.696)	Top-5 acc 3.906 (3.035)	lr 0.04967
Warmup Train [3][260/3239]	Time 0.150 (0.269)	Data 0.001 (0.039)	Loss 6.4983 (6.4336)	Top-1 acc 0.391 (0.682)	Top-5 acc 1.953 (3.010)	lr 0.04967
Warmup Train [3][270/3239]	Time 0.269 (0.268)	Data 0.001 (0.038)	Loss 6.4176 (6.4339)	Top-1 acc 0.781 (0.677)	Top-5 acc 2.344 (2.985)	lr 0.04967
Warmup Train [3][280/3239]	Time 0.246 (0.266)	Data 0.002 (0.036)	Loss 6.4477 (6.4331)	Top-1 acc 0.781 (0.677)	Top-5 acc 2.344 (2.987)	lr 0.04967
Warmup Train [3][290/3239]	Time 0.247 (0.265)	Data 0.001 (0.035)	Loss 6.4372 (6.4338)	Top-1 acc 1.953 (0.675)	Top-5 acc 3.516 (2.973)	lr 0.04966
Warmup Train [3][300/3239]	Time 0.270 (0.263)	Data 0.001 (0.034)	Loss 6.4195 (6.4339)	Top-1 acc 0.000 (0.679)	Top-5 acc 1.953 (2.967)	lr 0.04966
Warmup Train [3][310/3239]	Time 0.244 (0.262)	Data 0.001 (0.033)	Loss 6.4455 (6.4329)	Top-1 acc 0.391 (0.673)	Top-5 acc 3.125 (2.968)	lr 0.04966
Warmup Train [3][320/3239]	Time 0.256 (0.260)	Data 0.001 (0.032)	Loss 6.4295 (6.4324)	Top-1 acc 0.391 (0.677)	Top-5 acc 2.344 (2.974)	lr 0.04966
Warmup Train [3][330/3239]	Time 0.200 (0.259)	Data 0.001 (0.031)	Loss 6.4521 (6.4323)	Top-1 acc 0.781 (0.681)	Top-5 acc 1.562 (2.981)	lr 0.04966
Warmup Train [3][340/3239]	Time 0.283 (0.258)	Data 0.001 (0.030)	Loss 6.4443 (6.4323)	Top-1 acc 0.000 (0.677)	Top-5 acc 1.562 (2.981)	lr 0.04966
Warmup Train [3][350/3239]	Time 0.254 (0.256)	Data 0.001 (0.030)	Loss 6.4269 (6.4323)	Top-1 acc 0.000 (0.677)	Top-5 acc 2.734 (2.978)	lr 0.04966
Warmup Train [3][360/3239]	Time 0.209 (0.255)	Data 0.001 (0.029)	Loss 6.3614 (6.4316)	Top-1 acc 1.562 (0.683)	Top-5 acc 5.469 (2.990)	lr 0.04966
Warmup Train [3][370/3239]	Time 0.180 (0.254)	Data 0.001 (0.028)	Loss 6.4165 (6.4315)	Top-1 acc 0.000 (0.680)	Top-5 acc 3.125 (2.989)	lr 0.04966
Warmup Train [3][380/3239]	Time 0.147 (0.253)	Data 0.001 (0.027)	Loss 6.4000 (6.4310)	Top-1 acc 0.000 (0.680)	Top-5 acc 3.125 (2.999)	lr 0.04966
Warmup Train [3][390/3239]	Time 0.251 (0.253)	Data 0.001 (0.027)	Loss 6.4024 (6.4313)	Top-1 acc 1.953 (0.683)	Top-5 acc 3.906 (3.004)	lr 0.04965
Warmup Train [3][400/3239]	Time 0.225 (0.252)	Data 0.001 (0.026)	Loss 6.4084 (6.4313)	Top-1 acc 0.781 (0.678)	Top-5 acc 2.344 (3.010)	lr 0.04965
Warmup Train [3][410/3239]	Time 0.127 (0.251)	Data 0.001 (0.026)	Loss 6.3408 (6.4305)	Top-1 acc 1.172 (0.682)	Top-5 acc 5.078 (3.023)	lr 0.04965
Warmup Train [3][420/3239]	Time 0.411 (0.251)	Data 0.001 (0.025)	Loss 6.4831 (6.4306)	Top-1 acc 1.172 (0.682)	Top-5 acc 3.516 (3.016)	lr 0.04965
Warmup Train [3][430/3239]	Time 0.198 (0.250)	Data 0.001 (0.025)	Loss 6.4012 (6.4305)	Top-1 acc 1.172 (0.683)	Top-5 acc 5.078 (3.024)	lr 0.04965
Warmup Train [3][440/3239]	Time 0.185 (0.250)	Data 0.002 (0.024)	Loss 6.5071 (6.4303)	Top-1 acc 0.000 (0.684)	Top-5 acc 2.344 (3.023)	lr 0.04965
Warmup Train [3][450/3239]	Time 0.217 (0.249)	Data 0.002 (0.024)	Loss 6.3586 (6.4300)	Top-1 acc 1.562 (0.683)	Top-5 acc 4.688 (3.036)	lr 0.04965
Warmup Train [3][460/3239]	Time 0.208 (0.249)	Data 0.002 (0.023)	Loss 6.3715 (6.4294)	Top-1 acc 1.562 (0.680)	Top-5 acc 4.297 (3.040)	lr 0.04965
Warmup Train [3][470/3239]	Time 0.223 (0.248)	Data 0.001 (0.023)	Loss 6.4448 (6.4290)	Top-1 acc 1.562 (0.688)	Top-5 acc 3.906 (3.051)	lr 0.04965
Warmup Train [3][480/3239]	Time 0.243 (0.248)	Data 0.001 (0.022)	Loss 6.4466 (6.4288)	Top-1 acc 0.000 (0.694)	Top-5 acc 3.125 (3.056)	lr 0.04965
Warmup Train [3][490/3239]	Time 0.227 (0.247)	Data 0.002 (0.022)	Loss 6.4582 (6.4285)	Top-1 acc 0.781 (0.696)	Top-5 acc 2.734 (3.051)	lr 0.04964
Warmup Train [3][500/3239]	Time 0.161 (0.246)	Data 0.001 (0.022)	Loss 6.4645 (6.4283)	Top-1 acc 0.781 (0.695)	Top-5 acc 3.516 (3.052)	lr 0.04964
Warmup Train [3][510/3239]	Time 0.272 (0.246)	Data 0.001 (0.021)	Loss 6.4733 (6.4283)	Top-1 acc 0.391 (0.699)	Top-5 acc 2.344 (3.051)	lr 0.04964
Warmup Train [3][520/3239]	Time 0.314 (0.246)	Data 0.001 (0.021)	Loss 6.4643 (6.4281)	Top-1 acc 0.000 (0.696)	Top-5 acc 3.516 (3.055)	lr 0.04964
Warmup Train [3][530/3239]	Time 0.172 (0.245)	Data 0.001 (0.021)	Loss 6.4006 (6.4282)	Top-1 acc 0.781 (0.697)	Top-5 acc 3.516 (3.055)	lr 0.04964
Warmup Train [3][540/3239]	Time 0.306 (0.244)	Data 0.002 (0.020)	Loss 6.4436 (6.4281)	Top-1 acc 1.172 (0.697)	Top-5 acc 3.516 (3.060)	lr 0.04964
Warmup Train [3][550/3239]	Time 0.175 (0.244)	Data 0.001 (0.020)	Loss 6.4823 (6.4278)	Top-1 acc 1.172 (0.698)	Top-5 acc 3.125 (3.059)	lr 0.04964
Warmup Train [3][560/3239]	Time 0.191 (0.244)	Data 0.001 (0.020)	Loss 6.4048 (6.4271)	Top-1 acc 0.000 (0.699)	Top-5 acc 1.172 (3.064)	lr 0.04964
Warmup Train [3][570/3239]	Time 0.188 (0.243)	Data 0.001 (0.019)	Loss 6.3969 (6.4268)	Top-1 acc 0.781 (0.703)	Top-5 acc 3.516 (3.063)	lr 0.04964
Warmup Train [3][580/3239]	Time 0.207 (0.243)	Data 0.001 (0.019)	Loss 6.4423 (6.4269)	Top-1 acc 1.172 (0.702)	Top-5 acc 2.344 (3.068)	lr 0.04963
Warmup Train [3][590/3239]	Time 0.194 (0.243)	Data 0.001 (0.019)	Loss 6.3928 (6.4266)	Top-1 acc 0.391 (0.703)	Top-5 acc 4.297 (3.065)	lr 0.04963
Warmup Train [3][600/3239]	Time 0.272 (0.243)	Data 0.001 (0.019)	Loss 6.4271 (6.4267)	Top-1 acc 0.391 (0.701)	Top-5 acc 2.344 (3.060)	lr 0.04963
Warmup Train [3][610/3239]	Time 0.308 (0.244)	Data 0.003 (0.018)	Loss 6.4169 (6.4269)	Top-1 acc 0.391 (0.702)	Top-5 acc 3.906 (3.064)	lr 0.04963
Warmup Train [3][620/3239]	Time 0.205 (0.244)	Data 0.001 (0.018)	Loss 6.3852 (6.4265)	Top-1 acc 1.172 (0.702)	Top-5 acc 5.078 (3.072)	lr 0.04963
Warmup Train [3][630/3239]	Time 0.372 (0.244)	Data 0.001 (0.018)	Loss 6.4021 (6.4262)	Top-1 acc 0.391 (0.701)	Top-5 acc 3.125 (3.071)	lr 0.04963
Warmup Train [3][640/3239]	Time 0.196 (0.244)	Data 0.001 (0.018)	Loss 6.4126 (6.4258)	Top-1 acc 0.391 (0.706)	Top-5 acc 2.344 (3.077)	lr 0.04963
Warmup Train [3][650/3239]	Time 0.195 (0.244)	Data 0.001 (0.017)	Loss 6.4058 (6.4255)	Top-1 acc 2.734 (0.709)	Top-5 acc 5.859 (3.081)	lr 0.04963
Warmup Train [3][660/3239]	Time 0.194 (0.243)	Data 0.001 (0.017)	Loss 6.3177 (6.4252)	Top-1 acc 1.172 (0.710)	Top-5 acc 4.688 (3.086)	lr 0.04963
Warmup Train [3][670/3239]	Time 0.221 (0.243)	Data 0.001 (0.017)	Loss 6.4592 (6.4252)	Top-1 acc 0.391 (0.707)	Top-5 acc 2.734 (3.087)	lr 0.04963
Warmup Train [3][680/3239]	Time 0.231 (0.243)	Data 0.001 (0.017)	Loss 6.4420 (6.4253)	Top-1 acc 1.172 (0.708)	Top-5 acc 3.125 (3.084)	lr 0.04962
Warmup Train [3][690/3239]	Time 0.175 (0.243)	Data 0.001 (0.016)	Loss 6.4227 (6.4255)	Top-1 acc 0.781 (0.708)	Top-5 acc 3.125 (3.073)	lr 0.04962
Warmup Train [3][700/3239]	Time 0.243 (0.243)	Data 0.001 (0.016)	Loss 6.4121 (6.4254)	Top-1 acc 0.391 (0.711)	Top-5 acc 3.516 (3.076)	lr 0.04962
Warmup Train [3][710/3239]	Time 0.249 (0.242)	Data 0.002 (0.016)	Loss 6.4470 (6.4254)	Top-1 acc 1.172 (0.710)	Top-5 acc 3.125 (3.072)	lr 0.04962
Warmup Train [3][720/3239]	Time 0.344 (0.243)	Data 0.001 (0.016)	Loss 6.4782 (6.4252)	Top-1 acc 0.000 (0.710)	Top-5 acc 2.344 (3.077)	lr 0.04962
Warmup Train [3][730/3239]	Time 0.211 (0.242)	Data 0.001 (0.016)	Loss 6.4083 (6.4249)	Top-1 acc 0.391 (0.709)	Top-5 acc 3.125 (3.080)	lr 0.04962
Warmup Train [3][740/3239]	Time 0.221 (0.242)	Data 0.001 (0.015)	Loss 6.3817 (6.4247)	Top-1 acc 0.781 (0.712)	Top-5 acc 3.125 (3.089)	lr 0.04962
Warmup Train [3][750/3239]	Time 0.249 (0.242)	Data 0.001 (0.015)	Loss 6.3403 (6.4245)	Top-1 acc 1.172 (0.717)	Top-5 acc 2.734 (3.093)	lr 0.04962
Warmup Train [3][760/3239]	Time 0.191 (0.242)	Data 0.001 (0.015)	Loss 6.3626 (6.4241)	Top-1 acc 0.391 (0.715)	Top-5 acc 3.906 (3.093)	lr 0.04962
Warmup Train [3][770/3239]	Time 0.185 (0.242)	Data 0.001 (0.015)	Loss 6.3934 (6.4240)	Top-1 acc 1.172 (0.717)	Top-5 acc 4.297 (3.104)	lr 0.04961
Warmup Train [3][780/3239]	Time 0.287 (0.242)	Data 0.002 (0.015)	Loss 6.3683 (6.4234)	Top-1 acc 1.172 (0.721)	Top-5 acc 5.078 (3.115)	lr 0.04961
Warmup Train [3][790/3239]	Time 0.289 (0.242)	Data 0.001 (0.015)	Loss 6.4006 (6.4230)	Top-1 acc 1.172 (0.720)	Top-5 acc 3.125 (3.113)	lr 0.04961
Warmup Train [3][800/3239]	Time 0.276 (0.242)	Data 0.001 (0.014)	Loss 6.4498 (6.4228)	Top-1 acc 0.391 (0.723)	Top-5 acc 2.344 (3.120)	lr 0.04961
Warmup Train [3][810/3239]	Time 0.223 (0.242)	Data 0.002 (0.014)	Loss 6.4070 (6.4225)	Top-1 acc 0.391 (0.725)	Top-5 acc 2.734 (3.125)	lr 0.04961
Warmup Train [3][820/3239]	Time 0.297 (0.242)	Data 0.001 (0.014)	Loss 6.4325 (6.4222)	Top-1 acc 0.781 (0.725)	Top-5 acc 3.125 (3.125)	lr 0.04961
Warmup Train [3][830/3239]	Time 0.298 (0.242)	Data 0.002 (0.014)	Loss 6.4600 (6.4222)	Top-1 acc 0.000 (0.722)	Top-5 acc 2.734 (3.121)	lr 0.04961
Warmup Train [3][840/3239]	Time 0.186 (0.242)	Data 0.001 (0.014)	Loss 6.3814 (6.4220)	Top-1 acc 1.953 (0.724)	Top-5 acc 4.688 (3.127)	lr 0.04961
Warmup Train [3][850/3239]	Time 0.219 (0.241)	Data 0.001 (0.014)	Loss 6.4166 (6.4218)	Top-1 acc 1.562 (0.724)	Top-5 acc 3.906 (3.127)	lr 0.04961
Warmup Train [3][860/3239]	Time 0.203 (0.241)	Data 0.001 (0.014)	Loss 6.4088 (6.4216)	Top-1 acc 0.391 (0.725)	Top-5 acc 3.906 (3.134)	lr 0.04961
Warmup Train [3][870/3239]	Time 0.218 (0.241)	Data 0.001 (0.014)	Loss 6.4290 (6.4215)	Top-1 acc 0.781 (0.723)	Top-5 acc 2.344 (3.127)	lr 0.04960
Warmup Train [3][880/3239]	Time 0.282 (0.241)	Data 0.002 (0.013)	Loss 6.3986 (6.4214)	Top-1 acc 0.781 (0.724)	Top-5 acc 1.953 (3.133)	lr 0.04960
Warmup Train [3][890/3239]	Time 0.294 (0.241)	Data 0.002 (0.013)	Loss 6.3814 (6.4213)	Top-1 acc 1.172 (0.722)	Top-5 acc 3.516 (3.134)	lr 0.04960
Warmup Train [3][900/3239]	Time 0.208 (0.240)	Data 0.001 (0.013)	Loss 6.3762 (6.4207)	Top-1 acc 1.562 (0.721)	Top-5 acc 3.516 (3.136)	lr 0.04960
Warmup Train [3][910/3239]	Time 0.201 (0.240)	Data 0.001 (0.013)	Loss 6.4690 (6.4207)	Top-1 acc 0.781 (0.721)	Top-5 acc 3.125 (3.142)	lr 0.04960
Warmup Train [3][920/3239]	Time 0.208 (0.240)	Data 0.002 (0.013)	Loss 6.3949 (6.4204)	Top-1 acc 0.391 (0.721)	Top-5 acc 3.125 (3.142)	lr 0.04960
Warmup Train [3][930/3239]	Time 0.284 (0.240)	Data 0.001 (0.013)	Loss 6.3638 (6.4202)	Top-1 acc 0.391 (0.723)	Top-5 acc 2.344 (3.144)	lr 0.04960
Warmup Train [3][940/3239]	Time 0.174 (0.240)	Data 0.001 (0.013)	Loss 6.4012 (6.4200)	Top-1 acc 0.391 (0.723)	Top-5 acc 4.688 (3.146)	lr 0.04960
Warmup Train [3][950/3239]	Time 0.213 (0.239)	Data 0.001 (0.013)	Loss 6.2902 (6.4198)	Top-1 acc 1.172 (0.721)	Top-5 acc 7.031 (3.145)	lr 0.04960
Warmup Train [3][960/3239]	Time 0.127 (0.239)	Data 0.001 (0.013)	Loss 6.4421 (6.4196)	Top-1 acc 0.781 (0.722)	Top-5 acc 2.734 (3.149)	lr 0.04959
Warmup Train [3][970/3239]	Time 0.198 (0.239)	Data 0.001 (0.012)	Loss 6.4280 (6.4193)	Top-1 acc 1.562 (0.723)	Top-5 acc 3.906 (3.151)	lr 0.04959
Warmup Train [3][980/3239]	Time 0.215 (0.239)	Data 0.001 (0.012)	Loss 6.3455 (6.4189)	Top-1 acc 0.000 (0.725)	Top-5 acc 3.516 (3.158)	lr 0.04959
Warmup Train [3][990/3239]	Time 0.191 (0.239)	Data 0.001 (0.012)	Loss 6.3566 (6.4189)	Top-1 acc 0.781 (0.724)	Top-5 acc 3.906 (3.160)	lr 0.04959
Warmup Train [3][1000/3239]	Time 0.278 (0.239)	Data 0.001 (0.012)	Loss 6.3387 (6.4185)	Top-1 acc 0.391 (0.722)	Top-5 acc 2.344 (3.159)	lr 0.04959
Warmup Train [3][1010/3239]	Time 0.242 (0.238)	Data 0.001 (0.012)	Loss 6.3769 (6.4183)	Top-1 acc 0.391 (0.722)	Top-5 acc 1.562 (3.157)	lr 0.04959
Warmup Train [3][1020/3239]	Time 0.194 (0.238)	Data 0.001 (0.012)	Loss 6.3692 (6.4180)	Top-1 acc 0.781 (0.723)	Top-5 acc 1.172 (3.156)	lr 0.04959
Warmup Train [3][1030/3239]	Time 0.236 (0.238)	Data 0.001 (0.012)	Loss 6.4356 (6.4178)	Top-1 acc 1.172 (0.727)	Top-5 acc 3.125 (3.160)	lr 0.04959
Warmup Train [3][1040/3239]	Time 0.330 (0.238)	Data 0.001 (0.012)	Loss 6.3746 (6.4174)	Top-1 acc 1.953 (0.728)	Top-5 acc 6.641 (3.166)	lr 0.04959
Warmup Train [3][1050/3239]	Time 0.181 (0.238)	Data 0.001 (0.012)	Loss 6.4126 (6.4171)	Top-1 acc 1.172 (0.730)	Top-5 acc 3.125 (3.169)	lr 0.04958
Warmup Train [3][1060/3239]	Time 0.229 (0.238)	Data 0.001 (0.012)	Loss 6.4231 (6.4170)	Top-1 acc 0.781 (0.729)	Top-5 acc 2.734 (3.170)	lr 0.04958
Warmup Train [3][1070/3239]	Time 0.192 (0.237)	Data 0.001 (0.012)	Loss 6.4211 (6.4165)	Top-1 acc 1.172 (0.730)	Top-5 acc 2.734 (3.179)	lr 0.04958
Warmup Train [3][1080/3239]	Time 0.191 (0.237)	Data 0.001 (0.012)	Loss 6.4673 (6.4164)	Top-1 acc 0.781 (0.730)	Top-5 acc 1.953 (3.182)	lr 0.04958
Warmup Train [3][1090/3239]	Time 0.158 (0.237)	Data 0.001 (0.011)	Loss 6.3745 (6.4160)	Top-1 acc 0.391 (0.731)	Top-5 acc 3.516 (3.185)	lr 0.04958
Warmup Train [3][1100/3239]	Time 0.297 (0.237)	Data 0.001 (0.011)	Loss 6.4115 (6.4158)	Top-1 acc 0.781 (0.732)	Top-5 acc 2.344 (3.186)	lr 0.04958
Warmup Train [3][1110/3239]	Time 0.275 (0.237)	Data 0.001 (0.011)	Loss 6.3538 (6.4154)	Top-1 acc 1.172 (0.736)	Top-5 acc 4.297 (3.194)	lr 0.04958
Warmup Train [3][1120/3239]	Time 0.202 (0.237)	Data 0.001 (0.011)	Loss 6.3182 (6.4150)	Top-1 acc 0.391 (0.736)	Top-5 acc 2.734 (3.196)	lr 0.04958
Warmup Train [3][1130/3239]	Time 0.187 (0.236)	Data 0.001 (0.011)	Loss 6.3758 (6.4146)	Top-1 acc 0.781 (0.738)	Top-5 acc 3.125 (3.204)	lr 0.04958
Warmup Train [3][1140/3239]	Time 0.232 (0.236)	Data 0.002 (0.011)	Loss 6.3308 (6.4142)	Top-1 acc 0.781 (0.740)	Top-5 acc 2.734 (3.207)	lr 0.04957
Warmup Train [3][1150/3239]	Time 0.360 (0.236)	Data 0.008 (0.011)	Loss 6.4712 (6.4138)	Top-1 acc 0.000 (0.738)	Top-5 acc 2.344 (3.203)	lr 0.04957
Warmup Train [3][1160/3239]	Time 0.364 (0.236)	Data 0.002 (0.011)	Loss 6.4666 (6.4136)	Top-1 acc 1.172 (0.739)	Top-5 acc 3.516 (3.203)	lr 0.04957
Warmup Train [3][1170/3239]	Time 0.258 (0.236)	Data 0.001 (0.011)	Loss 6.4376 (6.4132)	Top-1 acc 0.781 (0.738)	Top-5 acc 3.125 (3.204)	lr 0.04957
Warmup Train [3][1180/3239]	Time 0.183 (0.236)	Data 0.001 (0.011)	Loss 6.4415 (6.4132)	Top-1 acc 1.562 (0.738)	Top-5 acc 3.906 (3.206)	lr 0.04957
Warmup Train [3][1190/3239]	Time 0.287 (0.236)	Data 0.001 (0.011)	Loss 6.4151 (6.4129)	Top-1 acc 0.391 (0.741)	Top-5 acc 4.297 (3.211)	lr 0.04957
Warmup Train [3][1200/3239]	Time 0.141 (0.236)	Data 0.001 (0.011)	Loss 6.3487 (6.4127)	Top-1 acc 1.172 (0.743)	Top-5 acc 3.516 (3.213)	lr 0.04957
Warmup Train [3][1210/3239]	Time 0.153 (0.236)	Data 0.001 (0.011)	Loss 6.3342 (6.4123)	Top-1 acc 0.781 (0.743)	Top-5 acc 5.078 (3.218)	lr 0.04957
Warmup Train [3][1220/3239]	Time 0.142 (0.236)	Data 0.002 (0.011)	Loss 6.3665 (6.4119)	Top-1 acc 1.172 (0.745)	Top-5 acc 6.250 (3.228)	lr 0.04957
Warmup Train [3][1230/3239]	Time 0.194 (0.236)	Data 0.002 (0.011)	Loss 6.4547 (6.4116)	Top-1 acc 0.781 (0.747)	Top-5 acc 4.688 (3.233)	lr 0.04956
Warmup Train [3][1240/3239]	Time 0.175 (0.235)	Data 0.001 (0.011)	Loss 6.4296 (6.4116)	Top-1 acc 0.000 (0.749)	Top-5 acc 1.562 (3.231)	lr 0.04956
Warmup Train [3][1250/3239]	Time 0.266 (0.235)	Data 0.001 (0.011)	Loss 6.3914 (6.4114)	Top-1 acc 0.391 (0.750)	Top-5 acc 3.906 (3.232)	lr 0.04956
Warmup Train [3][1260/3239]	Time 0.320 (0.235)	Data 0.002 (0.011)	Loss 6.3482 (6.4111)	Top-1 acc 1.172 (0.750)	Top-5 acc 5.078 (3.235)	lr 0.04956
Warmup Train [3][1270/3239]	Time 0.147 (0.235)	Data 0.001 (0.010)	Loss 6.3620 (6.4109)	Top-1 acc 0.391 (0.748)	Top-5 acc 2.734 (3.233)	lr 0.04956
Warmup Train [3][1280/3239]	Time 0.234 (0.235)	Data 0.001 (0.010)	Loss 6.4586 (6.4106)	Top-1 acc 1.172 (0.750)	Top-5 acc 2.734 (3.237)	lr 0.04956
Warmup Train [3][1290/3239]	Time 0.219 (0.235)	Data 0.001 (0.010)	Loss 6.3313 (6.4104)	Top-1 acc 2.344 (0.751)	Top-5 acc 7.031 (3.238)	lr 0.04956
Warmup Train [3][1300/3239]	Time 0.237 (0.235)	Data 0.001 (0.010)	Loss 6.4484 (6.4101)	Top-1 acc 0.781 (0.752)	Top-5 acc 1.172 (3.237)	lr 0.04956
Warmup Train [3][1310/3239]	Time 0.221 (0.235)	Data 0.001 (0.010)	Loss 6.4528 (6.4100)	Top-1 acc 0.781 (0.753)	Top-5 acc 2.734 (3.237)	lr 0.04956
Warmup Train [3][1320/3239]	Time 0.182 (0.235)	Data 0.001 (0.010)	Loss 6.3592 (6.4098)	Top-1 acc 0.781 (0.753)	Top-5 acc 3.516 (3.236)	lr 0.04955
Warmup Train [3][1330/3239]	Time 0.243 (0.235)	Data 0.001 (0.010)	Loss 6.3636 (6.4096)	Top-1 acc 1.172 (0.754)	Top-5 acc 4.688 (3.243)	lr 0.04955
Warmup Train [3][1340/3239]	Time 0.181 (0.235)	Data 0.001 (0.010)	Loss 6.3368 (6.4093)	Top-1 acc 0.781 (0.754)	Top-5 acc 3.906 (3.248)	lr 0.04955
Warmup Train [3][1350/3239]	Time 0.385 (0.235)	Data 0.001 (0.010)	Loss 6.3523 (6.4090)	Top-1 acc 0.781 (0.754)	Top-5 acc 2.344 (3.247)	lr 0.04955
Warmup Train [3][1360/3239]	Time 0.276 (0.235)	Data 0.001 (0.010)	Loss 6.3585 (6.4087)	Top-1 acc 0.781 (0.754)	Top-5 acc 3.516 (3.248)	lr 0.04955
Warmup Train [3][1370/3239]	Time 0.251 (0.234)	Data 0.001 (0.010)	Loss 6.4475 (6.4085)	Top-1 acc 0.000 (0.755)	Top-5 acc 2.344 (3.249)	lr 0.04955
Warmup Train [3][1380/3239]	Time 0.169 (0.234)	Data 0.001 (0.010)	Loss 6.3801 (6.4083)	Top-1 acc 0.000 (0.754)	Top-5 acc 3.516 (3.247)	lr 0.04955
Warmup Train [3][1390/3239]	Time 0.244 (0.234)	Data 0.001 (0.010)	Loss 6.4288 (6.4083)	Top-1 acc 0.391 (0.754)	Top-5 acc 3.125 (3.247)	lr 0.04955
Warmup Train [3][1400/3239]	Time 0.251 (0.234)	Data 0.001 (0.010)	Loss 6.3255 (6.4080)	Top-1 acc 1.953 (0.756)	Top-5 acc 5.859 (3.253)	lr 0.04955
Warmup Train [3][1410/3239]	Time 0.179 (0.234)	Data 0.001 (0.010)	Loss 6.3987 (6.4079)	Top-1 acc 0.391 (0.756)	Top-5 acc 3.516 (3.254)	lr 0.04954
Warmup Train [3][1420/3239]	Time 0.255 (0.234)	Data 0.032 (0.010)	Loss 6.2983 (6.4076)	Top-1 acc 2.344 (0.755)	Top-5 acc 4.688 (3.256)	lr 0.04954
Warmup Train [3][1430/3239]	Time 0.224 (0.234)	Data 0.001 (0.010)	Loss 6.3900 (6.4076)	Top-1 acc 0.781 (0.757)	Top-5 acc 2.734 (3.258)	lr 0.04954
Warmup Train [3][1440/3239]	Time 0.202 (0.234)	Data 0.001 (0.009)	Loss 6.3631 (6.4073)	Top-1 acc 0.000 (0.758)	Top-5 acc 1.953 (3.257)	lr 0.04954
Warmup Train [3][1450/3239]	Time 0.260 (0.234)	Data 0.001 (0.009)	Loss 6.3098 (6.4071)	Top-1 acc 1.562 (0.760)	Top-5 acc 4.688 (3.260)	lr 0.04954
Warmup Train [3][1460/3239]	Time 0.286 (0.234)	Data 0.001 (0.009)	Loss 6.4713 (6.4070)	Top-1 acc 1.172 (0.761)	Top-5 acc 3.125 (3.261)	lr 0.04954
Warmup Train [3][1470/3239]	Time 0.222 (0.234)	Data 0.001 (0.009)	Loss 6.3888 (6.4068)	Top-1 acc 1.172 (0.762)	Top-5 acc 3.516 (3.262)	lr 0.04954
Warmup Train [3][1480/3239]	Time 0.191 (0.233)	Data 0.001 (0.009)	Loss 6.3066 (6.4065)	Top-1 acc 0.000 (0.762)	Top-5 acc 3.516 (3.262)	lr 0.04954
Warmup Train [3][1490/3239]	Time 0.190 (0.233)	Data 0.001 (0.009)	Loss 6.3498 (6.4063)	Top-1 acc 1.172 (0.763)	Top-5 acc 3.516 (3.264)	lr 0.04953
Warmup Train [3][1500/3239]	Time 0.214 (0.233)	Data 0.001 (0.009)	Loss 6.4386 (6.4061)	Top-1 acc 1.172 (0.765)	Top-5 acc 3.516 (3.268)	lr 0.04953
Warmup Train [3][1510/3239]	Time 0.268 (0.233)	Data 0.001 (0.009)	Loss 6.3300 (6.4057)	Top-1 acc 1.172 (0.768)	Top-5 acc 2.734 (3.273)	lr 0.04953
Warmup Train [3][1520/3239]	Time 0.199 (0.233)	Data 0.001 (0.009)	Loss 6.4443 (6.4055)	Top-1 acc 1.172 (0.768)	Top-5 acc 3.906 (3.274)	lr 0.04953
Warmup Train [3][1530/3239]	Time 0.164 (0.233)	Data 0.001 (0.009)	Loss 6.3941 (6.4053)	Top-1 acc 0.000 (0.767)	Top-5 acc 3.906 (3.273)	lr 0.04953
Warmup Train [3][1540/3239]	Time 0.220 (0.233)	Data 0.001 (0.009)	Loss 6.3948 (6.4050)	Top-1 acc 0.391 (0.767)	Top-5 acc 1.172 (3.276)	lr 0.04953
Warmup Train [3][1550/3239]	Time 0.236 (0.233)	Data 0.001 (0.009)	Loss 6.3620 (6.4047)	Top-1 acc 0.781 (0.768)	Top-5 acc 3.516 (3.281)	lr 0.04953
Warmup Train [3][1560/3239]	Time 0.264 (0.233)	Data 0.001 (0.009)	Loss 6.4104 (6.4046)	Top-1 acc 0.781 (0.767)	Top-5 acc 3.125 (3.284)	lr 0.04953
Warmup Train [3][1570/3239]	Time 0.211 (0.233)	Data 0.001 (0.009)	Loss 6.3239 (6.4044)	Top-1 acc 0.781 (0.769)	Top-5 acc 4.297 (3.286)	lr 0.04953
Warmup Train [3][1580/3239]	Time 0.247 (0.233)	Data 0.001 (0.009)	Loss 6.3997 (6.4041)	Top-1 acc 0.391 (0.771)	Top-5 acc 3.906 (3.292)	lr 0.04952
Warmup Train [3][1590/3239]	Time 0.285 (0.233)	Data 0.001 (0.009)	Loss 6.3337 (6.4038)	Top-1 acc 2.344 (0.775)	Top-5 acc 5.078 (3.294)	lr 0.04952
Warmup Train [3][1600/3239]	Time 0.214 (0.232)	Data 0.002 (0.009)	Loss 6.3265 (6.4036)	Top-1 acc 0.781 (0.776)	Top-5 acc 1.953 (3.295)	lr 0.04952
Warmup Train [3][1610/3239]	Time 0.227 (0.232)	Data 0.001 (0.009)	Loss 6.3801 (6.4033)	Top-1 acc 0.391 (0.776)	Top-5 acc 3.125 (3.301)	lr 0.04952
Warmup Train [3][1620/3239]	Time 0.266 (0.232)	Data 0.001 (0.009)	Loss 6.3793 (6.4030)	Top-1 acc 1.562 (0.777)	Top-5 acc 5.469 (3.303)	lr 0.04952
Warmup Train [3][1630/3239]	Time 0.145 (0.232)	Data 0.001 (0.009)	Loss 6.3692 (6.4030)	Top-1 acc 0.391 (0.774)	Top-5 acc 3.516 (3.299)	lr 0.04952
Warmup Train [3][1640/3239]	Time 0.192 (0.232)	Data 0.001 (0.009)	Loss 6.4002 (6.4029)	Top-1 acc 0.781 (0.774)	Top-5 acc 2.344 (3.300)	lr 0.04952
Warmup Train [3][1650/3239]	Time 0.249 (0.232)	Data 0.027 (0.009)	Loss 6.3662 (6.4027)	Top-1 acc 0.781 (0.774)	Top-5 acc 3.906 (3.302)	lr 0.04952
Warmup Train [3][1660/3239]	Time 0.221 (0.232)	Data 0.001 (0.009)	Loss 6.3745 (6.4025)	Top-1 acc 0.781 (0.775)	Top-5 acc 4.688 (3.310)	lr 0.04951
Warmup Train [3][1670/3239]	Time 0.202 (0.232)	Data 0.001 (0.009)	Loss 6.3722 (6.4022)	Top-1 acc 0.391 (0.775)	Top-5 acc 3.125 (3.310)	lr 0.04951
Warmup Train [3][1680/3239]	Time 0.197 (0.232)	Data 0.001 (0.009)	Loss 6.4307 (6.4021)	Top-1 acc 0.391 (0.775)	Top-5 acc 2.344 (3.311)	lr 0.04951
Warmup Train [3][1690/3239]	Time 0.276 (0.232)	Data 0.001 (0.009)	Loss 6.3495 (6.4019)	Top-1 acc 0.391 (0.775)	Top-5 acc 4.297 (3.317)	lr 0.04951
Warmup Train [3][1700/3239]	Time 0.238 (0.231)	Data 0.001 (0.008)	Loss 6.3626 (6.4017)	Top-1 acc 1.953 (0.777)	Top-5 acc 4.297 (3.315)	lr 0.04951
Warmup Train [3][1710/3239]	Time 0.252 (0.231)	Data 0.001 (0.008)	Loss 6.3290 (6.4014)	Top-1 acc 1.172 (0.778)	Top-5 acc 5.469 (3.321)	lr 0.04951
Warmup Train [3][1720/3239]	Time 0.157 (0.231)	Data 0.001 (0.008)	Loss 6.3098 (6.4010)	Top-1 acc 0.781 (0.779)	Top-5 acc 3.906 (3.322)	lr 0.04951
Warmup Train [3][1730/3239]	Time 0.194 (0.231)	Data 0.001 (0.008)	Loss 6.3644 (6.4008)	Top-1 acc 0.781 (0.781)	Top-5 acc 2.734 (3.324)	lr 0.04951
Warmup Train [3][1740/3239]	Time 0.261 (0.231)	Data 0.002 (0.008)	Loss 6.3835 (6.4005)	Top-1 acc 0.781 (0.781)	Top-5 acc 1.953 (3.323)	lr 0.04951
Warmup Train [3][1750/3239]	Time 0.207 (0.231)	Data 0.001 (0.008)	Loss 6.3441 (6.4004)	Top-1 acc 0.391 (0.781)	Top-5 acc 2.734 (3.321)	lr 0.04950
Warmup Train [3][1760/3239]	Time 0.194 (0.231)	Data 0.001 (0.008)	Loss 6.4064 (6.4002)	Top-1 acc 1.172 (0.783)	Top-5 acc 2.344 (3.323)	lr 0.04950
Warmup Train [3][1770/3239]	Time 0.205 (0.231)	Data 0.002 (0.008)	Loss 6.3648 (6.4000)	Top-1 acc 1.172 (0.784)	Top-5 acc 5.469 (3.327)	lr 0.04950
Warmup Train [3][1780/3239]	Time 0.173 (0.231)	Data 0.002 (0.008)	Loss 6.4019 (6.3998)	Top-1 acc 0.391 (0.786)	Top-5 acc 2.734 (3.331)	lr 0.04950
Warmup Train [3][1790/3239]	Time 0.217 (0.231)	Data 0.001 (0.008)	Loss 6.3192 (6.3993)	Top-1 acc 1.562 (0.788)	Top-5 acc 4.688 (3.335)	lr 0.04950
Warmup Train [3][1800/3239]	Time 0.280 (0.231)	Data 0.002 (0.008)	Loss 6.3549 (6.3991)	Top-1 acc 1.172 (0.786)	Top-5 acc 3.906 (3.336)	lr 0.04950
Warmup Train [3][1810/3239]	Time 0.170 (0.231)	Data 0.001 (0.008)	Loss 6.4127 (6.3989)	Top-1 acc 1.172 (0.788)	Top-5 acc 4.688 (3.341)	lr 0.04950
Warmup Train [3][1820/3239]	Time 0.132 (0.231)	Data 0.002 (0.008)	Loss 6.2619 (6.3986)	Top-1 acc 0.781 (0.788)	Top-5 acc 3.516 (3.343)	lr 0.04950
Warmup Train [3][1830/3239]	Time 0.221 (0.231)	Data 0.001 (0.008)	Loss 6.3494 (6.3983)	Top-1 acc 1.562 (0.791)	Top-5 acc 5.859 (3.348)	lr 0.04949
Warmup Train [3][1840/3239]	Time 0.224 (0.231)	Data 0.001 (0.008)	Loss 6.3492 (6.3980)	Top-1 acc 0.391 (0.791)	Top-5 acc 2.344 (3.350)	lr 0.04949
Warmup Train [3][1850/3239]	Time 0.213 (0.231)	Data 0.001 (0.008)	Loss 6.3084 (6.3976)	Top-1 acc 1.172 (0.792)	Top-5 acc 5.469 (3.354)	lr 0.04949
Warmup Train [3][1860/3239]	Time 0.252 (0.231)	Data 0.001 (0.008)	Loss 6.3278 (6.3974)	Top-1 acc 1.172 (0.794)	Top-5 acc 3.906 (3.357)	lr 0.04949
Warmup Train [3][1870/3239]	Time 0.374 (0.231)	Data 0.001 (0.008)	Loss 6.3469 (6.3972)	Top-1 acc 0.391 (0.794)	Top-5 acc 3.125 (3.359)	lr 0.04949
Warmup Train [3][1880/3239]	Time 0.211 (0.231)	Data 0.001 (0.008)	Loss 6.3891 (6.3969)	Top-1 acc 0.781 (0.794)	Top-5 acc 3.125 (3.359)	lr 0.04949
Warmup Train [3][1890/3239]	Time 0.189 (0.231)	Data 0.001 (0.008)	Loss 6.3092 (6.3967)	Top-1 acc 0.000 (0.793)	Top-5 acc 4.297 (3.360)	lr 0.04949
Warmup Train [3][1900/3239]	Time 0.206 (0.231)	Data 0.001 (0.008)	Loss 6.3565 (6.3964)	Top-1 acc 1.172 (0.795)	Top-5 acc 4.297 (3.364)	lr 0.04949
Warmup Train [3][1910/3239]	Time 0.235 (0.231)	Data 0.002 (0.008)	Loss 6.3371 (6.3961)	Top-1 acc 0.000 (0.795)	Top-5 acc 1.562 (3.365)	lr 0.04948
Warmup Train [3][1920/3239]	Time 0.152 (0.231)	Data 0.001 (0.008)	Loss 6.4125 (6.3958)	Top-1 acc 0.781 (0.796)	Top-5 acc 3.906 (3.370)	lr 0.04948
Warmup Train [3][1930/3239]	Time 0.224 (0.230)	Data 0.001 (0.008)	Loss 6.3848 (6.3957)	Top-1 acc 0.391 (0.795)	Top-5 acc 5.078 (3.373)	lr 0.04948
Warmup Train [3][1940/3239]	Time 0.222 (0.230)	Data 0.001 (0.008)	Loss 6.2702 (6.3954)	Top-1 acc 0.781 (0.795)	Top-5 acc 6.250 (3.376)	lr 0.04948
Warmup Train [3][1950/3239]	Time 0.175 (0.230)	Data 0.001 (0.008)	Loss 6.3388 (6.3952)	Top-1 acc 1.172 (0.797)	Top-5 acc 2.734 (3.380)	lr 0.04948
Warmup Train [3][1960/3239]	Time 0.254 (0.230)	Data 0.001 (0.008)	Loss 6.3579 (6.3948)	Top-1 acc 0.781 (0.800)	Top-5 acc 5.469 (3.389)	lr 0.04948
Warmup Train [3][1970/3239]	Time 0.325 (0.230)	Data 0.001 (0.008)	Loss 6.2666 (6.3945)	Top-1 acc 1.953 (0.799)	Top-5 acc 5.859 (3.392)	lr 0.04948
Warmup Train [3][1980/3239]	Time 0.135 (0.230)	Data 0.001 (0.008)	Loss 6.3053 (6.3942)	Top-1 acc 1.172 (0.801)	Top-5 acc 3.516 (3.393)	lr 0.04948
Warmup Train [3][1990/3239]	Time 0.173 (0.230)	Data 0.001 (0.008)	Loss 6.3893 (6.3940)	Top-1 acc 0.391 (0.801)	Top-5 acc 0.781 (3.393)	lr 0.04947
Warmup Train [3][2000/3239]	Time 0.181 (0.230)	Data 0.001 (0.008)	Loss 6.2984 (6.3937)	Top-1 acc 0.781 (0.802)	Top-5 acc 4.297 (3.395)	lr 0.04947
Warmup Train [3][2010/3239]	Time 0.224 (0.230)	Data 0.001 (0.008)	Loss 6.4222 (6.3934)	Top-1 acc 0.391 (0.802)	Top-5 acc 2.734 (3.397)	lr 0.04947
Warmup Train [3][2020/3239]	Time 0.266 (0.230)	Data 0.001 (0.008)	Loss 6.3417 (6.3932)	Top-1 acc 1.562 (0.803)	Top-5 acc 3.516 (3.396)	lr 0.04947
Warmup Train [3][2030/3239]	Time 0.165 (0.230)	Data 0.001 (0.008)	Loss 6.3139 (6.3930)	Top-1 acc 0.781 (0.804)	Top-5 acc 4.688 (3.398)	lr 0.04947
Warmup Train [3][2040/3239]	Time 0.178 (0.230)	Data 0.001 (0.008)	Loss 6.3655 (6.3927)	Top-1 acc 0.781 (0.804)	Top-5 acc 3.906 (3.401)	lr 0.04947
Warmup Train [3][2050/3239]	Time 0.243 (0.230)	Data 0.001 (0.008)	Loss 6.3786 (6.3926)	Top-1 acc 0.000 (0.805)	Top-5 acc 1.953 (3.403)	lr 0.04947
Warmup Train [3][2060/3239]	Time 0.205 (0.230)	Data 0.001 (0.008)	Loss 6.3348 (6.3924)	Top-1 acc 1.172 (0.805)	Top-5 acc 3.125 (3.404)	lr 0.04947
Warmup Train [3][2070/3239]	Time 0.329 (0.230)	Data 0.001 (0.008)	Loss 6.2852 (6.3921)	Top-1 acc 1.172 (0.804)	Top-5 acc 6.250 (3.405)	lr 0.04946
Warmup Train [3][2080/3239]	Time 0.247 (0.230)	Data 0.001 (0.008)	Loss 6.3735 (6.3919)	Top-1 acc 1.172 (0.805)	Top-5 acc 3.125 (3.407)	lr 0.04946
Warmup Train [3][2090/3239]	Time 0.223 (0.230)	Data 0.001 (0.008)	Loss 6.3517 (6.3917)	Top-1 acc 0.781 (0.807)	Top-5 acc 3.516 (3.411)	lr 0.04946
Warmup Train [3][2100/3239]	Time 0.185 (0.230)	Data 0.002 (0.008)	Loss 6.3453 (6.3914)	Top-1 acc 0.000 (0.807)	Top-5 acc 3.516 (3.413)	lr 0.04946
Warmup Train [3][2110/3239]	Time 0.223 (0.230)	Data 0.001 (0.007)	Loss 6.2819 (6.3910)	Top-1 acc 1.953 (0.807)	Top-5 acc 6.641 (3.418)	lr 0.04946
Warmup Train [3][2120/3239]	Time 0.226 (0.230)	Data 0.001 (0.007)	Loss 6.3236 (6.3908)	Top-1 acc 1.953 (0.807)	Top-5 acc 3.516 (3.420)	lr 0.04946
Warmup Train [3][2130/3239]	Time 0.205 (0.230)	Data 0.001 (0.007)	Loss 6.3796 (6.3905)	Top-1 acc 0.000 (0.807)	Top-5 acc 3.516 (3.422)	lr 0.04946
Warmup Train [3][2140/3239]	Time 0.201 (0.230)	Data 0.001 (0.007)	Loss 6.3704 (6.3902)	Top-1 acc 1.172 (0.809)	Top-5 acc 2.344 (3.426)	lr 0.04946
Warmup Train [3][2150/3239]	Time 0.226 (0.230)	Data 0.001 (0.007)	Loss 6.2946 (6.3899)	Top-1 acc 0.781 (0.808)	Top-5 acc 3.906 (3.427)	lr 0.04945
Warmup Train [3][2160/3239]	Time 0.228 (0.230)	Data 0.002 (0.007)	Loss 6.3318 (6.3897)	Top-1 acc 0.391 (0.808)	Top-5 acc 2.344 (3.429)	lr 0.04945
Warmup Train [3][2170/3239]	Time 0.278 (0.230)	Data 0.001 (0.007)	Loss 6.2818 (6.3894)	Top-1 acc 1.172 (0.809)	Top-5 acc 5.469 (3.435)	lr 0.04945
Warmup Train [3][2180/3239]	Time 0.283 (0.230)	Data 0.001 (0.007)	Loss 6.2939 (6.3892)	Top-1 acc 1.172 (0.811)	Top-5 acc 6.250 (3.437)	lr 0.04945
Warmup Train [3][2190/3239]	Time 0.221 (0.230)	Data 0.001 (0.007)	Loss 6.3571 (6.3889)	Top-1 acc 0.781 (0.812)	Top-5 acc 3.516 (3.440)	lr 0.04945
Warmup Train [3][2200/3239]	Time 0.183 (0.229)	Data 0.001 (0.007)	Loss 6.2974 (6.3886)	Top-1 acc 0.391 (0.811)	Top-5 acc 3.906 (3.443)	lr 0.04945
Warmup Train [3][2210/3239]	Time 0.177 (0.229)	Data 0.001 (0.007)	Loss 6.3320 (6.3883)	Top-1 acc 3.125 (0.812)	Top-5 acc 7.031 (3.444)	lr 0.04945
Warmup Train [3][2220/3239]	Time 0.266 (0.229)	Data 0.045 (0.007)	Loss 6.3062 (6.3880)	Top-1 acc 0.391 (0.813)	Top-5 acc 5.469 (3.450)	lr 0.04945
Warmup Train [3][2230/3239]	Time 0.274 (0.229)	Data 0.001 (0.007)	Loss 6.2859 (6.3877)	Top-1 acc 1.953 (0.813)	Top-5 acc 5.078 (3.453)	lr 0.04944
Warmup Train [3][2240/3239]	Time 0.180 (0.229)	Data 0.002 (0.007)	Loss 6.3232 (6.3874)	Top-1 acc 1.562 (0.815)	Top-5 acc 4.688 (3.456)	lr 0.04944
Warmup Train [3][2250/3239]	Time 0.262 (0.229)	Data 0.002 (0.007)	Loss 6.3273 (6.3871)	Top-1 acc 0.391 (0.816)	Top-5 acc 3.516 (3.459)	lr 0.04944
Warmup Train [3][2260/3239]	Time 0.199 (0.229)	Data 0.001 (0.007)	Loss 6.3097 (6.3869)	Top-1 acc 0.781 (0.815)	Top-5 acc 3.516 (3.461)	lr 0.04944
Warmup Train [3][2270/3239]	Time 0.338 (0.229)	Data 0.001 (0.007)	Loss 6.3883 (6.3867)	Top-1 acc 0.391 (0.816)	Top-5 acc 3.125 (3.465)	lr 0.04944
Warmup Train [3][2280/3239]	Time 0.187 (0.229)	Data 0.001 (0.007)	Loss 6.2680 (6.3863)	Top-1 acc 0.391 (0.816)	Top-5 acc 3.906 (3.466)	lr 0.04944
Warmup Train [3][2290/3239]	Time 0.213 (0.229)	Data 0.025 (0.007)	Loss 6.2841 (6.3860)	Top-1 acc 1.953 (0.818)	Top-5 acc 4.297 (3.468)	lr 0.04944
Warmup Train [3][2300/3239]	Time 0.202 (0.229)	Data 0.002 (0.007)	Loss 6.2692 (6.3857)	Top-1 acc 0.781 (0.817)	Top-5 acc 3.516 (3.470)	lr 0.04944
Warmup Train [3][2310/3239]	Time 0.311 (0.229)	Data 0.003 (0.007)	Loss 6.3340 (6.3855)	Top-1 acc 0.781 (0.816)	Top-5 acc 1.953 (3.471)	lr 0.04943
Warmup Train [3][2320/3239]	Time 0.268 (0.229)	Data 0.001 (0.007)	Loss 6.3107 (6.3852)	Top-1 acc 0.781 (0.816)	Top-5 acc 2.734 (3.470)	lr 0.04943
Warmup Train [3][2330/3239]	Time 0.234 (0.229)	Data 0.001 (0.007)	Loss 6.3461 (6.3850)	Top-1 acc 0.781 (0.816)	Top-5 acc 4.297 (3.470)	lr 0.04943
Warmup Train [3][2340/3239]	Time 0.204 (0.229)	Data 0.001 (0.007)	Loss 6.3092 (6.3847)	Top-1 acc 0.781 (0.816)	Top-5 acc 5.078 (3.474)	lr 0.04943
Warmup Train [3][2350/3239]	Time 0.228 (0.229)	Data 0.001 (0.007)	Loss 6.3025 (6.3843)	Top-1 acc 0.781 (0.817)	Top-5 acc 5.078 (3.477)	lr 0.04943
Warmup Train [3][2360/3239]	Time 0.233 (0.229)	Data 0.001 (0.007)	Loss 6.2656 (6.3841)	Top-1 acc 0.000 (0.817)	Top-5 acc 4.688 (3.479)	lr 0.04943
Warmup Train [3][2370/3239]	Time 0.206 (0.229)	Data 0.001 (0.007)	Loss 6.3135 (6.3839)	Top-1 acc 1.953 (0.818)	Top-5 acc 4.688 (3.480)	lr 0.04943
Warmup Train [3][2380/3239]	Time 0.222 (0.229)	Data 0.001 (0.007)	Loss 6.2804 (6.3836)	Top-1 acc 0.000 (0.817)	Top-5 acc 6.250 (3.483)	lr 0.04943
Warmup Train [3][2390/3239]	Time 0.221 (0.229)	Data 0.001 (0.007)	Loss 6.2999 (6.3833)	Top-1 acc 1.172 (0.818)	Top-5 acc 5.469 (3.486)	lr 0.04942
Warmup Train [3][2400/3239]	Time 0.240 (0.229)	Data 0.001 (0.007)	Loss 6.2231 (6.3829)	Top-1 acc 1.953 (0.821)	Top-5 acc 5.859 (3.489)	lr 0.04942
Warmup Train [3][2410/3239]	Time 0.214 (0.229)	Data 0.001 (0.007)	Loss 6.4735 (6.3827)	Top-1 acc 0.000 (0.821)	Top-5 acc 1.562 (3.491)	lr 0.04942
Warmup Train [3][2420/3239]	Time 0.217 (0.229)	Data 0.001 (0.007)	Loss 6.2614 (6.3825)	Top-1 acc 0.781 (0.821)	Top-5 acc 3.125 (3.491)	lr 0.04942
Warmup Train [3][2430/3239]	Time 0.192 (0.229)	Data 0.002 (0.007)	Loss 6.3148 (6.3822)	Top-1 acc 0.781 (0.824)	Top-5 acc 2.734 (3.497)	lr 0.04942
Warmup Train [3][2440/3239]	Time 0.205 (0.229)	Data 0.001 (0.007)	Loss 6.3541 (6.3819)	Top-1 acc 1.953 (0.826)	Top-5 acc 4.688 (3.501)	lr 0.04942
Warmup Train [3][2450/3239]	Time 0.183 (0.229)	Data 0.001 (0.007)	Loss 6.3944 (6.3817)	Top-1 acc 2.344 (0.826)	Top-5 acc 3.906 (3.505)	lr 0.04942
Warmup Train [3][2460/3239]	Time 0.291 (0.229)	Data 0.001 (0.007)	Loss 6.2204 (6.3813)	Top-1 acc 0.391 (0.825)	Top-5 acc 4.688 (3.507)	lr 0.04942
Warmup Train [3][2470/3239]	Time 0.222 (0.229)	Data 0.001 (0.007)	Loss 6.2606 (6.3811)	Top-1 acc 0.781 (0.825)	Top-5 acc 3.906 (3.506)	lr 0.04941
Warmup Train [3][2480/3239]	Time 0.235 (0.229)	Data 0.002 (0.007)	Loss 6.2799 (6.3808)	Top-1 acc 0.391 (0.826)	Top-5 acc 3.906 (3.511)	lr 0.04941
Warmup Train [3][2490/3239]	Time 0.138 (0.229)	Data 0.001 (0.007)	Loss 6.2830 (6.3805)	Top-1 acc 1.172 (0.826)	Top-5 acc 5.469 (3.515)	lr 0.04941
Warmup Train [3][2500/3239]	Time 0.158 (0.229)	Data 0.001 (0.007)	Loss 6.2808 (6.3802)	Top-1 acc 1.172 (0.827)	Top-5 acc 4.297 (3.519)	lr 0.04941
Warmup Train [3][2510/3239]	Time 0.203 (0.229)	Data 0.002 (0.007)	Loss 6.3090 (6.3800)	Top-1 acc 0.391 (0.828)	Top-5 acc 5.078 (3.523)	lr 0.04941
Warmup Train [3][2520/3239]	Time 0.163 (0.228)	Data 0.001 (0.007)	Loss 6.2197 (6.3797)	Top-1 acc 2.344 (0.829)	Top-5 acc 6.250 (3.528)	lr 0.04941
Warmup Train [3][2530/3239]	Time 0.230 (0.228)	Data 0.002 (0.007)	Loss 6.3032 (6.3794)	Top-1 acc 1.172 (0.829)	Top-5 acc 6.250 (3.528)	lr 0.04941
Warmup Train [3][2540/3239]	Time 0.209 (0.228)	Data 0.001 (0.007)	Loss 6.2569 (6.3792)	Top-1 acc 0.391 (0.830)	Top-5 acc 4.297 (3.532)	lr 0.04940
Warmup Train [3][2550/3239]	Time 0.178 (0.228)	Data 0.001 (0.007)	Loss 6.2691 (6.3789)	Top-1 acc 2.344 (0.831)	Top-5 acc 6.641 (3.535)	lr 0.04940
Warmup Train [3][2560/3239]	Time 0.260 (0.228)	Data 0.001 (0.007)	Loss 6.3297 (6.3787)	Top-1 acc 0.391 (0.830)	Top-5 acc 4.297 (3.534)	lr 0.04940
Warmup Train [3][2570/3239]	Time 0.283 (0.228)	Data 0.001 (0.007)	Loss 6.3062 (6.3784)	Top-1 acc 1.953 (0.831)	Top-5 acc 4.688 (3.536)	lr 0.04940
Warmup Train [3][2580/3239]	Time 0.158 (0.228)	Data 0.001 (0.007)	Loss 6.2585 (6.3781)	Top-1 acc 0.781 (0.831)	Top-5 acc 3.906 (3.539)	lr 0.04940
Warmup Train [3][2590/3239]	Time 0.231 (0.228)	Data 0.001 (0.007)	Loss 6.2989 (6.3778)	Top-1 acc 0.781 (0.832)	Top-5 acc 4.297 (3.541)	lr 0.04940
Warmup Train [3][2600/3239]	Time 0.276 (0.228)	Data 0.001 (0.007)	Loss 6.3183 (6.3775)	Top-1 acc 0.391 (0.832)	Top-5 acc 4.297 (3.544)	lr 0.04940
Warmup Train [3][2610/3239]	Time 0.232 (0.228)	Data 0.001 (0.007)	Loss 6.2717 (6.3772)	Top-1 acc 1.562 (0.833)	Top-5 acc 6.641 (3.548)	lr 0.04940
Warmup Train [3][2620/3239]	Time 0.227 (0.228)	Data 0.001 (0.007)	Loss 6.2782 (6.3768)	Top-1 acc 0.781 (0.833)	Top-5 acc 3.516 (3.551)	lr 0.04939
Warmup Train [3][2630/3239]	Time 0.187 (0.228)	Data 0.001 (0.007)	Loss 6.3252 (6.3766)	Top-1 acc 0.000 (0.832)	Top-5 acc 1.953 (3.553)	lr 0.04939
Warmup Train [3][2640/3239]	Time 0.276 (0.228)	Data 0.001 (0.007)	Loss 6.2747 (6.3763)	Top-1 acc 0.781 (0.833)	Top-5 acc 2.734 (3.554)	lr 0.04939
Warmup Train [3][2650/3239]	Time 0.216 (0.228)	Data 0.001 (0.006)	Loss 6.3178 (6.3759)	Top-1 acc 0.781 (0.833)	Top-5 acc 3.906 (3.559)	lr 0.04939
Warmup Train [3][2660/3239]	Time 0.223 (0.228)	Data 0.001 (0.006)	Loss 6.2728 (6.3756)	Top-1 acc 1.562 (0.833)	Top-5 acc 3.516 (3.563)	lr 0.04939
Warmup Train [3][2670/3239]	Time 0.338 (0.228)	Data 0.001 (0.006)	Loss 6.3455 (6.3753)	Top-1 acc 1.953 (0.834)	Top-5 acc 4.688 (3.567)	lr 0.04939
Warmup Train [3][2680/3239]	Time 0.247 (0.228)	Data 0.002 (0.006)	Loss 6.2418 (6.3750)	Top-1 acc 2.344 (0.834)	Top-5 acc 6.641 (3.571)	lr 0.04939
Warmup Train [3][2690/3239]	Time 0.125 (0.228)	Data 0.001 (0.006)	Loss 6.2792 (6.3748)	Top-1 acc 2.344 (0.835)	Top-5 acc 5.859 (3.571)	lr 0.04938
Warmup Train [3][2700/3239]	Time 0.206 (0.228)	Data 0.001 (0.006)	Loss 6.3716 (6.3746)	Top-1 acc 0.781 (0.835)	Top-5 acc 2.734 (3.574)	lr 0.04938
Warmup Train [3][2710/3239]	Time 0.181 (0.228)	Data 0.002 (0.006)	Loss 6.2939 (6.3743)	Top-1 acc 0.000 (0.835)	Top-5 acc 3.125 (3.577)	lr 0.04938
Warmup Train [3][2720/3239]	Time 0.192 (0.228)	Data 0.001 (0.006)	Loss 6.2722 (6.3741)	Top-1 acc 1.172 (0.836)	Top-5 acc 3.125 (3.579)	lr 0.04938
Warmup Train [3][2730/3239]	Time 0.164 (0.228)	Data 0.001 (0.006)	Loss 6.3514 (6.3738)	Top-1 acc 0.391 (0.837)	Top-5 acc 2.734 (3.582)	lr 0.04938
Warmup Train [3][2740/3239]	Time 0.132 (0.228)	Data 0.001 (0.006)	Loss 6.3722 (6.3736)	Top-1 acc 0.391 (0.837)	Top-5 acc 3.906 (3.584)	lr 0.04938
Warmup Train [3][2750/3239]	Time 0.209 (0.228)	Data 0.002 (0.006)	Loss 6.2197 (6.3733)	Top-1 acc 1.172 (0.838)	Top-5 acc 4.297 (3.589)	lr 0.04938
Warmup Train [3][2760/3239]	Time 0.187 (0.228)	Data 0.001 (0.006)	Loss 6.3211 (6.3730)	Top-1 acc 0.781 (0.838)	Top-5 acc 3.125 (3.592)	lr 0.04938
Warmup Train [3][2770/3239]	Time 0.327 (0.228)	Data 0.001 (0.006)	Loss 6.2782 (6.3727)	Top-1 acc 0.000 (0.837)	Top-5 acc 3.516 (3.593)	lr 0.04937
Warmup Train [3][2780/3239]	Time 0.171 (0.228)	Data 0.001 (0.006)	Loss 6.3189 (6.3725)	Top-1 acc 1.562 (0.838)	Top-5 acc 3.906 (3.595)	lr 0.04937
Warmup Train [3][2790/3239]	Time 0.202 (0.227)	Data 0.002 (0.006)	Loss 6.3022 (6.3722)	Top-1 acc 1.172 (0.839)	Top-5 acc 3.906 (3.598)	lr 0.04937
Warmup Train [3][2800/3239]	Time 0.214 (0.227)	Data 0.001 (0.006)	Loss 6.2479 (6.3719)	Top-1 acc 0.781 (0.840)	Top-5 acc 5.469 (3.601)	lr 0.04937
Warmup Train [3][2810/3239]	Time 0.252 (0.227)	Data 0.001 (0.006)	Loss 6.2579 (6.3717)	Top-1 acc 1.172 (0.840)	Top-5 acc 4.297 (3.600)	lr 0.04937
Warmup Train [3][2820/3239]	Time 0.264 (0.227)	Data 0.001 (0.006)	Loss 6.3033 (6.3714)	Top-1 acc 0.781 (0.841)	Top-5 acc 4.688 (3.603)	lr 0.04937
Warmup Train [3][2830/3239]	Time 0.193 (0.227)	Data 0.002 (0.006)	Loss 6.2855 (6.3712)	Top-1 acc 0.781 (0.842)	Top-5 acc 7.031 (3.607)	lr 0.04937
Warmup Train [3][2840/3239]	Time 0.204 (0.227)	Data 0.001 (0.006)	Loss 6.2734 (6.3708)	Top-1 acc 1.562 (0.844)	Top-5 acc 3.906 (3.610)	lr 0.04936
Warmup Train [3][2850/3239]	Time 0.280 (0.227)	Data 0.001 (0.006)	Loss 6.2428 (6.3706)	Top-1 acc 1.562 (0.844)	Top-5 acc 5.859 (3.612)	lr 0.04936
Warmup Train [3][2860/3239]	Time 0.214 (0.227)	Data 0.001 (0.006)	Loss 6.3406 (6.3703)	Top-1 acc 0.000 (0.845)	Top-5 acc 2.734 (3.614)	lr 0.04936
Warmup Train [3][2870/3239]	Time 0.204 (0.227)	Data 0.001 (0.006)	Loss 6.2770 (6.3700)	Top-1 acc 1.172 (0.846)	Top-5 acc 3.125 (3.616)	lr 0.04936
Warmup Train [3][2880/3239]	Time 0.191 (0.227)	Data 0.001 (0.006)	Loss 6.3591 (6.3697)	Top-1 acc 1.562 (0.847)	Top-5 acc 3.125 (3.619)	lr 0.04936
Warmup Train [3][2890/3239]	Time 0.318 (0.227)	Data 0.002 (0.006)	Loss 6.3510 (6.3695)	Top-1 acc 0.391 (0.849)	Top-5 acc 3.125 (3.622)	lr 0.04936
Warmup Train [3][2900/3239]	Time 0.200 (0.227)	Data 0.001 (0.006)	Loss 6.3648 (6.3692)	Top-1 acc 0.781 (0.849)	Top-5 acc 3.125 (3.626)	lr 0.04936
Warmup Train [3][2910/3239]	Time 0.267 (0.227)	Data 0.002 (0.006)	Loss 6.3001 (6.3690)	Top-1 acc 0.000 (0.851)	Top-5 acc 3.125 (3.631)	lr 0.04936
Warmup Train [3][2920/3239]	Time 0.182 (0.227)	Data 0.002 (0.006)	Loss 6.1918 (6.3687)	Top-1 acc 1.172 (0.851)	Top-5 acc 3.125 (3.630)	lr 0.04935
Warmup Train [3][2930/3239]	Time 0.206 (0.227)	Data 0.001 (0.006)	Loss 6.1578 (6.3684)	Top-1 acc 2.734 (0.853)	Top-5 acc 6.250 (3.634)	lr 0.04935
Warmup Train [3][2940/3239]	Time 0.117 (0.227)	Data 0.001 (0.006)	Loss 6.2871 (6.3682)	Top-1 acc 1.562 (0.853)	Top-5 acc 3.125 (3.633)	lr 0.04935
Warmup Train [3][2950/3239]	Time 0.222 (0.227)	Data 0.001 (0.006)	Loss 6.3575 (6.3679)	Top-1 acc 0.781 (0.854)	Top-5 acc 3.516 (3.637)	lr 0.04935
Warmup Train [3][2960/3239]	Time 0.229 (0.227)	Data 0.002 (0.006)	Loss 6.3070 (6.3676)	Top-1 acc 0.391 (0.854)	Top-5 acc 4.297 (3.640)	lr 0.04935
Warmup Train [3][2970/3239]	Time 0.208 (0.227)	Data 0.001 (0.006)	Loss 6.2851 (6.3674)	Top-1 acc 1.953 (0.854)	Top-5 acc 7.031 (3.642)	lr 0.04935
Warmup Train [3][2980/3239]	Time 0.172 (0.227)	Data 0.001 (0.006)	Loss 6.2913 (6.3672)	Top-1 acc 0.391 (0.855)	Top-5 acc 2.344 (3.643)	lr 0.04935
Warmup Train [3][2990/3239]	Time 0.209 (0.227)	Data 0.002 (0.006)	Loss 6.3445 (6.3668)	Top-1 acc 0.781 (0.855)	Top-5 acc 3.125 (3.646)	lr 0.04934
Warmup Train [3][3000/3239]	Time 0.301 (0.227)	Data 0.002 (0.006)	Loss 6.2175 (6.3665)	Top-1 acc 0.781 (0.855)	Top-5 acc 3.516 (3.649)	lr 0.04934
Warmup Train [3][3010/3239]	Time 0.220 (0.227)	Data 0.001 (0.006)	Loss 6.2681 (6.3662)	Top-1 acc 0.781 (0.856)	Top-5 acc 3.125 (3.651)	lr 0.04934
Warmup Train [3][3020/3239]	Time 0.208 (0.227)	Data 0.001 (0.006)	Loss 6.3060 (6.3660)	Top-1 acc 1.172 (0.858)	Top-5 acc 5.078 (3.653)	lr 0.04934
Warmup Train [3][3030/3239]	Time 0.198 (0.227)	Data 0.001 (0.006)	Loss 6.2988 (6.3657)	Top-1 acc 1.562 (0.858)	Top-5 acc 2.344 (3.654)	lr 0.04934
Warmup Train [3][3040/3239]	Time 0.203 (0.227)	Data 0.001 (0.006)	Loss 6.3057 (6.3655)	Top-1 acc 2.344 (0.858)	Top-5 acc 5.859 (3.655)	lr 0.04934
Warmup Train [3][3050/3239]	Time 0.180 (0.227)	Data 0.001 (0.006)	Loss 6.2898 (6.3653)	Top-1 acc 1.172 (0.858)	Top-5 acc 5.078 (3.656)	lr 0.04934
Warmup Train [3][3060/3239]	Time 0.207 (0.227)	Data 0.001 (0.006)	Loss 6.3063 (6.3650)	Top-1 acc 1.953 (0.859)	Top-5 acc 5.859 (3.659)	lr 0.04933
Warmup Train [3][3070/3239]	Time 0.206 (0.227)	Data 0.001 (0.006)	Loss 6.3269 (6.3648)	Top-1 acc 1.172 (0.860)	Top-5 acc 1.953 (3.661)	lr 0.04933
Warmup Train [3][3080/3239]	Time 0.219 (0.227)	Data 0.001 (0.006)	Loss 6.2799 (6.3645)	Top-1 acc 1.172 (0.860)	Top-5 acc 4.688 (3.663)	lr 0.04933
Warmup Train [3][3090/3239]	Time 0.135 (0.227)	Data 0.001 (0.006)	Loss 6.2884 (6.3642)	Top-1 acc 1.562 (0.861)	Top-5 acc 4.297 (3.667)	lr 0.04933
Warmup Train [3][3100/3239]	Time 0.317 (0.227)	Data 0.001 (0.006)	Loss 6.3080 (6.3640)	Top-1 acc 0.781 (0.861)	Top-5 acc 4.688 (3.667)	lr 0.04933
Warmup Train [3][3110/3239]	Time 0.206 (0.227)	Data 0.002 (0.006)	Loss 6.2961 (6.3637)	Top-1 acc 0.391 (0.861)	Top-5 acc 1.562 (3.669)	lr 0.04933
Warmup Train [3][3120/3239]	Time 0.218 (0.227)	Data 0.001 (0.006)	Loss 6.3150 (6.3634)	Top-1 acc 0.000 (0.863)	Top-5 acc 3.516 (3.673)	lr 0.04933
Warmup Train [3][3130/3239]	Time 0.234 (0.227)	Data 0.001 (0.006)	Loss 6.2223 (6.3631)	Top-1 acc 0.781 (0.864)	Top-5 acc 5.859 (3.679)	lr 0.04932
Warmup Train [3][3140/3239]	Time 0.146 (0.226)	Data 0.001 (0.006)	Loss 6.2247 (6.3628)	Top-1 acc 0.781 (0.865)	Top-5 acc 3.516 (3.683)	lr 0.04932
Warmup Train [3][3150/3239]	Time 0.196 (0.226)	Data 0.001 (0.006)	Loss 6.2402 (6.3625)	Top-1 acc 0.391 (0.865)	Top-5 acc 5.469 (3.689)	lr 0.04932
Warmup Train [3][3160/3239]	Time 0.212 (0.226)	Data 0.001 (0.006)	Loss 6.3493 (6.3623)	Top-1 acc 1.953 (0.866)	Top-5 acc 5.469 (3.690)	lr 0.04932
Warmup Train [3][3170/3239]	Time 0.247 (0.226)	Data 0.001 (0.006)	Loss 6.3327 (6.3621)	Top-1 acc 1.172 (0.867)	Top-5 acc 5.469 (3.692)	lr 0.04932
Warmup Train [3][3180/3239]	Time 0.166 (0.226)	Data 0.000 (0.006)	Loss 6.2693 (6.3618)	Top-1 acc 1.562 (0.868)	Top-5 acc 6.641 (3.696)	lr 0.04932
Warmup Train [3][3190/3239]	Time 0.198 (0.226)	Data 0.000 (0.006)	Loss 6.3088 (6.3616)	Top-1 acc 0.391 (0.869)	Top-5 acc 4.297 (3.699)	lr 0.04932
Warmup Train [3][3200/3239]	Time 0.223 (0.226)	Data 0.000 (0.006)	Loss 6.2414 (6.3612)	Top-1 acc 1.562 (0.871)	Top-5 acc 5.078 (3.704)	lr 0.04931
Warmup Train [3][3210/3239]	Time 0.255 (0.226)	Data 0.000 (0.006)	Loss 6.2186 (6.3609)	Top-1 acc 1.172 (0.871)	Top-5 acc 7.422 (3.706)	lr 0.04931
Warmup Train [3][3220/3239]	Time 0.192 (0.226)	Data 0.000 (0.006)	Loss 6.1720 (6.3606)	Top-1 acc 1.953 (0.871)	Top-5 acc 6.250 (3.708)	lr 0.04931
Warmup Train [3][3230/3239]	Time 0.187 (0.226)	Data 0.000 (0.006)	Loss 6.2882 (6.3604)	Top-1 acc 0.000 (0.873)	Top-5 acc 0.781 (3.709)	lr 0.04931
Warmup Train [3][3239/3239]	Time 0.166 (0.226)	Data 0.000 (0.006)	Loss 6.2816 (6.3602)	Top-1 acc 0.000 (0.873)	Top-5 acc 6.173 (3.712)	lr 0.04931
==========Warmup Valid [3/40]	loss 6.024	top-1 acc 1.395	top-5 acc 5.576	Train top-1 0.873	top-5 3.712	flops: 442.4M
Warmup Train [4][0/3239]	Time 10.016 (10.016)	Data 9.647 (9.647)	Loss 6.2767 (6.2767)	Top-1 acc 1.562 (1.562)	Top-5 acc 5.078 (5.078)	lr 0.04931
Warmup Train [4][10/3239]	Time 0.347 (1.422)	Data 0.002 (1.039)	Loss 6.1793 (6.2632)	Top-1 acc 2.734 (1.030)	Top-5 acc 8.203 (4.794)	lr 0.04931
Warmup Train [4][20/3239]	Time 0.353 (0.902)	Data 0.002 (0.545)	Loss 6.2755 (6.2637)	Top-1 acc 0.391 (0.986)	Top-5 acc 3.125 (4.613)	lr 0.04931
Warmup Train [4][30/3239]	Time 0.234 (0.691)	Data 0.001 (0.370)	Loss 6.3800 (6.2717)	Top-1 acc 0.391 (0.983)	Top-5 acc 4.688 (4.637)	lr 0.04930
Warmup Train [4][40/3239]	Time 0.230 (0.581)	Data 0.027 (0.281)	Loss 6.1754 (6.2693)	Top-1 acc 1.172 (1.019)	Top-5 acc 5.859 (4.621)	lr 0.04930
Warmup Train [4][50/3239]	Time 0.170 (0.512)	Data 0.001 (0.227)	Loss 6.2888 (6.2665)	Top-1 acc 1.172 (0.957)	Top-5 acc 2.734 (4.665)	lr 0.04930
Warmup Train [4][60/3239]	Time 0.196 (0.467)	Data 0.001 (0.190)	Loss 6.2609 (6.2619)	Top-1 acc 0.391 (1.037)	Top-5 acc 4.688 (4.707)	lr 0.04930
Warmup Train [4][70/3239]	Time 0.217 (0.432)	Data 0.001 (0.164)	Loss 6.2894 (6.2625)	Top-1 acc 0.781 (1.018)	Top-5 acc 6.641 (4.638)	lr 0.04930
Warmup Train [4][80/3239]	Time 0.223 (0.408)	Data 0.001 (0.144)	Loss 6.3513 (6.2621)	Top-1 acc 0.781 (1.056)	Top-5 acc 4.297 (4.707)	lr 0.04930
Warmup Train [4][90/3239]	Time 0.211 (0.386)	Data 0.001 (0.128)	Loss 6.3068 (6.2622)	Top-1 acc 1.172 (1.035)	Top-5 acc 3.906 (4.636)	lr 0.04930
Warmup Train [4][100/3239]	Time 0.255 (0.371)	Data 0.001 (0.116)	Loss 6.3067 (6.2616)	Top-1 acc 0.781 (1.025)	Top-5 acc 2.344 (4.575)	lr 0.04930
Warmup Train [4][110/3239]	Time 0.244 (0.359)	Data 0.001 (0.105)	Loss 6.2605 (6.2622)	Top-1 acc 0.781 (1.049)	Top-5 acc 3.125 (4.575)	lr 0.04929
Warmup Train [4][120/3239]	Time 0.340 (0.349)	Data 0.001 (0.097)	Loss 6.2993 (6.2614)	Top-1 acc 0.781 (1.046)	Top-5 acc 3.906 (4.597)	lr 0.04929
Warmup Train [4][130/3239]	Time 0.224 (0.340)	Data 0.001 (0.090)	Loss 6.2167 (6.2616)	Top-1 acc 0.781 (1.065)	Top-5 acc 4.297 (4.598)	lr 0.04929
Warmup Train [4][140/3239]	Time 0.159 (0.331)	Data 0.001 (0.083)	Loss 6.3145 (6.2633)	Top-1 acc 0.781 (1.061)	Top-5 acc 4.297 (4.613)	lr 0.04929
Warmup Train [4][150/3239]	Time 0.218 (0.325)	Data 0.001 (0.078)	Loss 6.1926 (6.2629)	Top-1 acc 1.953 (1.061)	Top-5 acc 6.250 (4.646)	lr 0.04929
Warmup Train [4][160/3239]	Time 0.167 (0.319)	Data 0.001 (0.074)	Loss 6.2337 (6.2629)	Top-1 acc 1.562 (1.075)	Top-5 acc 3.906 (4.607)	lr 0.04929
Warmup Train [4][170/3239]	Time 0.212 (0.314)	Data 0.001 (0.070)	Loss 6.2805 (6.2609)	Top-1 acc 0.781 (1.081)	Top-5 acc 5.078 (4.630)	lr 0.04928
Warmup Train [4][180/3239]	Time 0.229 (0.309)	Data 0.001 (0.066)	Loss 6.2594 (6.2596)	Top-1 acc 1.953 (1.111)	Top-5 acc 3.125 (4.664)	lr 0.04928
Warmup Train [4][190/3239]	Time 0.213 (0.305)	Data 0.001 (0.063)	Loss 6.2725 (6.2593)	Top-1 acc 0.000 (1.094)	Top-5 acc 3.516 (4.645)	lr 0.04928
Warmup Train [4][200/3239]	Time 0.190 (0.301)	Data 0.001 (0.060)	Loss 6.3829 (6.2616)	Top-1 acc 0.000 (1.082)	Top-5 acc 3.906 (4.598)	lr 0.04928
Warmup Train [4][210/3239]	Time 0.238 (0.297)	Data 0.001 (0.057)	Loss 6.2052 (6.2602)	Top-1 acc 1.953 (1.092)	Top-5 acc 4.688 (4.613)	lr 0.04928
Warmup Train [4][220/3239]	Time 0.320 (0.295)	Data 0.001 (0.055)	Loss 6.1437 (6.2595)	Top-1 acc 1.562 (1.094)	Top-5 acc 8.594 (4.642)	lr 0.04928
Warmup Train [4][230/3239]	Time 0.354 (0.292)	Data 0.001 (0.052)	Loss 6.2736 (6.2592)	Top-1 acc 0.391 (1.091)	Top-5 acc 5.469 (4.633)	lr 0.04928
Warmup Train [4][240/3239]	Time 0.202 (0.289)	Data 0.001 (0.050)	Loss 6.3146 (6.2587)	Top-1 acc 1.953 (1.083)	Top-5 acc 6.250 (4.652)	lr 0.04927
Warmup Train [4][250/3239]	Time 0.165 (0.286)	Data 0.001 (0.048)	Loss 6.2799 (6.2589)	Top-1 acc 0.781 (1.086)	Top-5 acc 4.297 (4.649)	lr 0.04927
Warmup Train [4][260/3239]	Time 0.223 (0.284)	Data 0.001 (0.047)	Loss 6.2324 (6.2589)	Top-1 acc 0.781 (1.085)	Top-5 acc 7.422 (4.653)	lr 0.04927
Warmup Train [4][270/3239]	Time 0.251 (0.283)	Data 0.004 (0.045)	Loss 6.2487 (6.2584)	Top-1 acc 1.172 (1.085)	Top-5 acc 6.250 (4.644)	lr 0.04927
Warmup Train [4][280/3239]	Time 0.180 (0.281)	Data 0.001 (0.043)	Loss 6.2734 (6.2583)	Top-1 acc 0.781 (1.091)	Top-5 acc 5.078 (4.649)	lr 0.04927
Warmup Train [4][290/3239]	Time 0.293 (0.279)	Data 0.001 (0.042)	Loss 6.2374 (6.2578)	Top-1 acc 1.172 (1.115)	Top-5 acc 5.859 (4.677)	lr 0.04927
Warmup Train [4][300/3239]	Time 0.219 (0.277)	Data 0.001 (0.041)	Loss 6.2522 (6.2578)	Top-1 acc 0.781 (1.110)	Top-5 acc 2.734 (4.663)	lr 0.04927
Warmup Train [4][310/3239]	Time 0.216 (0.276)	Data 0.001 (0.040)	Loss 6.2902 (6.2570)	Top-1 acc 1.562 (1.117)	Top-5 acc 4.297 (4.682)	lr 0.04926
Warmup Train [4][320/3239]	Time 0.387 (0.275)	Data 0.001 (0.038)	Loss 6.2383 (6.2567)	Top-1 acc 0.781 (1.111)	Top-5 acc 4.688 (4.677)	lr 0.04926
Warmup Train [4][330/3239]	Time 0.231 (0.273)	Data 0.001 (0.037)	Loss 6.2708 (6.2565)	Top-1 acc 1.172 (1.114)	Top-5 acc 4.688 (4.675)	lr 0.04926
Warmup Train [4][340/3239]	Time 0.257 (0.272)	Data 0.001 (0.036)	Loss 6.2183 (6.2561)	Top-1 acc 0.781 (1.107)	Top-5 acc 4.297 (4.665)	lr 0.04926
Warmup Train [4][350/3239]	Time 0.265 (0.271)	Data 0.001 (0.035)	Loss 6.3188 (6.2569)	Top-1 acc 1.953 (1.116)	Top-5 acc 3.516 (4.674)	lr 0.04926
Warmup Train [4][360/3239]	Time 0.197 (0.270)	Data 0.001 (0.034)	Loss 6.2204 (6.2565)	Top-1 acc 1.172 (1.113)	Top-5 acc 5.859 (4.673)	lr 0.04926
Warmup Train [4][370/3239]	Time 0.192 (0.268)	Data 0.001 (0.034)	Loss 6.2048 (6.2562)	Top-1 acc 0.781 (1.108)	Top-5 acc 4.688 (4.673)	lr 0.04926
Warmup Train [4][380/3239]	Time 0.213 (0.267)	Data 0.001 (0.033)	Loss 6.2368 (6.2560)	Top-1 acc 0.781 (1.105)	Top-5 acc 4.297 (4.670)	lr 0.04925
Warmup Train [4][390/3239]	Time 0.243 (0.266)	Data 0.001 (0.032)	Loss 6.2253 (6.2561)	Top-1 acc 1.953 (1.108)	Top-5 acc 6.641 (4.676)	lr 0.04925
Warmup Train [4][400/3239]	Time 0.270 (0.265)	Data 0.001 (0.031)	Loss 6.3106 (6.2557)	Top-1 acc 0.781 (1.113)	Top-5 acc 4.297 (4.676)	lr 0.04925
Warmup Train [4][410/3239]	Time 0.257 (0.264)	Data 0.001 (0.031)	Loss 6.2511 (6.2552)	Top-1 acc 1.953 (1.116)	Top-5 acc 4.297 (4.677)	lr 0.04925
Warmup Train [4][420/3239]	Time 0.236 (0.263)	Data 0.003 (0.030)	Loss 6.1911 (6.2546)	Top-1 acc 1.562 (1.127)	Top-5 acc 7.812 (4.706)	lr 0.04925
Warmup Train [4][430/3239]	Time 0.337 (0.262)	Data 0.001 (0.029)	Loss 6.1752 (6.2543)	Top-1 acc 1.562 (1.128)	Top-5 acc 6.250 (4.719)	lr 0.04925
Warmup Train [4][440/3239]	Time 0.211 (0.262)	Data 0.001 (0.029)	Loss 6.3039 (6.2545)	Top-1 acc 1.172 (1.120)	Top-5 acc 3.516 (4.710)	lr 0.04925
Warmup Train [4][450/3239]	Time 0.274 (0.261)	Data 0.001 (0.028)	Loss 6.1379 (6.2536)	Top-1 acc 2.344 (1.120)	Top-5 acc 6.641 (4.713)	lr 0.04924
Warmup Train [4][460/3239]	Time 0.228 (0.260)	Data 0.001 (0.028)	Loss 6.2671 (6.2536)	Top-1 acc 0.000 (1.113)	Top-5 acc 2.734 (4.699)	lr 0.04924
Warmup Train [4][470/3239]	Time 0.192 (0.259)	Data 0.002 (0.027)	Loss 6.2327 (6.2531)	Top-1 acc 0.781 (1.115)	Top-5 acc 5.078 (4.709)	lr 0.04924
Warmup Train [4][480/3239]	Time 0.317 (0.259)	Data 0.003 (0.027)	Loss 6.2171 (6.2531)	Top-1 acc 0.391 (1.114)	Top-5 acc 7.031 (4.717)	lr 0.04924
Warmup Train [4][490/3239]	Time 0.198 (0.258)	Data 0.002 (0.026)	Loss 6.2062 (6.2523)	Top-1 acc 0.781 (1.112)	Top-5 acc 4.688 (4.718)	lr 0.04924
Warmup Train [4][500/3239]	Time 0.264 (0.258)	Data 0.002 (0.026)	Loss 6.2013 (6.2525)	Top-1 acc 0.391 (1.114)	Top-5 acc 2.734 (4.709)	lr 0.04924
Warmup Train [4][510/3239]	Time 0.208 (0.257)	Data 0.001 (0.025)	Loss 6.2981 (6.2526)	Top-1 acc 1.562 (1.115)	Top-5 acc 4.688 (4.710)	lr 0.04924
Warmup Train [4][520/3239]	Time 0.245 (0.257)	Data 0.001 (0.025)	Loss 6.2781 (6.2525)	Top-1 acc 2.344 (1.117)	Top-5 acc 7.031 (4.711)	lr 0.04923
Warmup Train [4][530/3239]	Time 0.203 (0.256)	Data 0.001 (0.024)	Loss 6.1842 (6.2519)	Top-1 acc 3.125 (1.121)	Top-5 acc 6.250 (4.718)	lr 0.04923
Warmup Train [4][540/3239]	Time 0.396 (0.256)	Data 0.001 (0.024)	Loss 6.1591 (6.2518)	Top-1 acc 1.172 (1.123)	Top-5 acc 5.469 (4.716)	lr 0.04923
Warmup Train [4][550/3239]	Time 0.164 (0.256)	Data 0.001 (0.023)	Loss 6.1898 (6.2513)	Top-1 acc 2.344 (1.131)	Top-5 acc 7.812 (4.722)	lr 0.04923
Warmup Train [4][560/3239]	Time 0.261 (0.255)	Data 0.002 (0.023)	Loss 6.2790 (6.2516)	Top-1 acc 0.781 (1.131)	Top-5 acc 4.297 (4.712)	lr 0.04923
Warmup Train [4][570/3239]	Time 0.298 (0.255)	Data 0.002 (0.023)	Loss 6.2427 (6.2510)	Top-1 acc 0.781 (1.129)	Top-5 acc 4.297 (4.720)	lr 0.04923
Warmup Train [4][580/3239]	Time 0.185 (0.254)	Data 0.001 (0.022)	Loss 6.2547 (6.2509)	Top-1 acc 1.562 (1.140)	Top-5 acc 4.297 (4.731)	lr 0.04922
Warmup Train [4][590/3239]	Time 0.152 (0.253)	Data 0.001 (0.022)	Loss 6.2381 (6.2508)	Top-1 acc 1.172 (1.141)	Top-5 acc 7.031 (4.740)	lr 0.04922
Warmup Train [4][600/3239]	Time 0.234 (0.253)	Data 0.001 (0.022)	Loss 6.2470 (6.2504)	Top-1 acc 1.562 (1.141)	Top-5 acc 6.641 (4.752)	lr 0.04922
Warmup Train [4][610/3239]	Time 0.253 (0.252)	Data 0.001 (0.022)	Loss 6.2699 (6.2503)	Top-1 acc 1.172 (1.141)	Top-5 acc 3.125 (4.749)	lr 0.04922
Warmup Train [4][620/3239]	Time 0.254 (0.252)	Data 0.002 (0.021)	Loss 6.2251 (6.2499)	Top-1 acc 1.562 (1.144)	Top-5 acc 5.078 (4.766)	lr 0.04922
Warmup Train [4][630/3239]	Time 0.142 (0.251)	Data 0.001 (0.021)	Loss 6.2152 (6.2502)	Top-1 acc 1.172 (1.142)	Top-5 acc 5.859 (4.768)	lr 0.04922
Warmup Train [4][640/3239]	Time 0.182 (0.251)	Data 0.001 (0.021)	Loss 6.2460 (6.2497)	Top-1 acc 1.172 (1.145)	Top-5 acc 3.906 (4.775)	lr 0.04922
Warmup Train [4][650/3239]	Time 0.266 (0.251)	Data 0.002 (0.020)	Loss 6.2808 (6.2494)	Top-1 acc 1.953 (1.144)	Top-5 acc 5.469 (4.790)	lr 0.04921
Warmup Train [4][660/3239]	Time 0.286 (0.250)	Data 0.001 (0.020)	Loss 6.1827 (6.2493)	Top-1 acc 1.172 (1.144)	Top-5 acc 5.859 (4.789)	lr 0.04921
Warmup Train [4][670/3239]	Time 0.192 (0.250)	Data 0.001 (0.020)	Loss 6.2094 (6.2487)	Top-1 acc 1.953 (1.142)	Top-5 acc 5.469 (4.805)	lr 0.04921
Warmup Train [4][680/3239]	Time 0.153 (0.250)	Data 0.001 (0.020)	Loss 6.2148 (6.2482)	Top-1 acc 1.562 (1.141)	Top-5 acc 5.859 (4.807)	lr 0.04921
Warmup Train [4][690/3239]	Time 0.205 (0.249)	Data 0.001 (0.019)	Loss 6.2467 (6.2474)	Top-1 acc 2.344 (1.143)	Top-5 acc 5.469 (4.806)	lr 0.04921
Warmup Train [4][700/3239]	Time 0.187 (0.249)	Data 0.001 (0.019)	Loss 6.1414 (6.2469)	Top-1 acc 1.562 (1.145)	Top-5 acc 6.250 (4.817)	lr 0.04921
Warmup Train [4][710/3239]	Time 0.230 (0.249)	Data 0.001 (0.019)	Loss 6.2300 (6.2465)	Top-1 acc 0.781 (1.139)	Top-5 acc 4.688 (4.823)	lr 0.04921
Warmup Train [4][720/3239]	Time 0.269 (0.248)	Data 0.002 (0.019)	Loss 6.1906 (6.2460)	Top-1 acc 1.953 (1.141)	Top-5 acc 6.250 (4.825)	lr 0.04920
Warmup Train [4][730/3239]	Time 0.259 (0.248)	Data 0.001 (0.018)	Loss 6.1931 (6.2457)	Top-1 acc 1.172 (1.141)	Top-5 acc 5.469 (4.821)	lr 0.04920
Warmup Train [4][740/3239]	Time 0.237 (0.247)	Data 0.001 (0.018)	Loss 6.3502 (6.2453)	Top-1 acc 1.172 (1.143)	Top-5 acc 4.688 (4.822)	lr 0.04920
Warmup Train [4][750/3239]	Time 0.254 (0.247)	Data 0.001 (0.018)	Loss 6.3227 (6.2454)	Top-1 acc 0.781 (1.143)	Top-5 acc 2.734 (4.820)	lr 0.04920
Warmup Train [4][760/3239]	Time 0.330 (0.247)	Data 0.002 (0.018)	Loss 6.2140 (6.2453)	Top-1 acc 2.344 (1.145)	Top-5 acc 4.688 (4.817)	lr 0.04920
Warmup Train [4][770/3239]	Time 0.248 (0.247)	Data 0.001 (0.018)	Loss 6.2389 (6.2449)	Top-1 acc 1.953 (1.148)	Top-5 acc 5.078 (4.828)	lr 0.04920
Warmup Train [4][780/3239]	Time 0.178 (0.247)	Data 0.001 (0.017)	Loss 6.2850 (6.2447)	Top-1 acc 1.172 (1.150)	Top-5 acc 3.516 (4.829)	lr 0.04919
Warmup Train [4][790/3239]	Time 0.151 (0.246)	Data 0.001 (0.017)	Loss 6.1149 (6.2444)	Top-1 acc 2.734 (1.152)	Top-5 acc 7.422 (4.830)	lr 0.04919
Warmup Train [4][800/3239]	Time 0.347 (0.246)	Data 0.001 (0.017)	Loss 6.1351 (6.2438)	Top-1 acc 1.562 (1.160)	Top-5 acc 5.859 (4.843)	lr 0.04919
Warmup Train [4][810/3239]	Time 0.156 (0.246)	Data 0.002 (0.017)	Loss 6.2955 (6.2434)	Top-1 acc 0.781 (1.158)	Top-5 acc 5.078 (4.847)	lr 0.04919
Warmup Train [4][820/3239]	Time 0.190 (0.246)	Data 0.001 (0.017)	Loss 6.2171 (6.2432)	Top-1 acc 0.000 (1.158)	Top-5 acc 5.859 (4.846)	lr 0.04919
Warmup Train [4][830/3239]	Time 0.218 (0.245)	Data 0.001 (0.017)	Loss 6.2041 (6.2429)	Top-1 acc 1.953 (1.160)	Top-5 acc 5.859 (4.846)	lr 0.04919
Warmup Train [4][840/3239]	Time 0.226 (0.245)	Data 0.002 (0.016)	Loss 6.1676 (6.2424)	Top-1 acc 0.781 (1.158)	Top-5 acc 3.906 (4.846)	lr 0.04919
Warmup Train [4][850/3239]	Time 0.245 (0.245)	Data 0.001 (0.016)	Loss 6.2021 (6.2421)	Top-1 acc 0.781 (1.159)	Top-5 acc 4.297 (4.851)	lr 0.04918
Warmup Train [4][860/3239]	Time 0.214 (0.245)	Data 0.001 (0.016)	Loss 6.2095 (6.2419)	Top-1 acc 1.562 (1.157)	Top-5 acc 3.906 (4.844)	lr 0.04918
Warmup Train [4][870/3239]	Time 0.441 (0.245)	Data 0.001 (0.016)	Loss 6.1693 (6.2417)	Top-1 acc 1.562 (1.161)	Top-5 acc 5.859 (4.853)	lr 0.04918
Warmup Train [4][880/3239]	Time 0.162 (0.245)	Data 0.001 (0.016)	Loss 6.2509 (6.2415)	Top-1 acc 0.781 (1.158)	Top-5 acc 2.734 (4.849)	lr 0.04918
Warmup Train [4][890/3239]	Time 0.220 (0.245)	Data 0.001 (0.016)	Loss 6.2370 (6.2411)	Top-1 acc 1.172 (1.162)	Top-5 acc 5.859 (4.860)	lr 0.04918
Warmup Train [4][900/3239]	Time 0.195 (0.244)	Data 0.001 (0.016)	Loss 6.2127 (6.2411)	Top-1 acc 1.953 (1.162)	Top-5 acc 7.422 (4.855)	lr 0.04918
Warmup Train [4][910/3239]	Time 0.244 (0.244)	Data 0.002 (0.015)	Loss 6.2748 (6.2409)	Top-1 acc 0.781 (1.163)	Top-5 acc 4.688 (4.865)	lr 0.04917
Warmup Train [4][920/3239]	Time 0.224 (0.244)	Data 0.001 (0.015)	Loss 6.2481 (6.2405)	Top-1 acc 1.172 (1.162)	Top-5 acc 5.469 (4.860)	lr 0.04917
Warmup Train [4][930/3239]	Time 0.223 (0.244)	Data 0.001 (0.015)	Loss 6.1951 (6.2402)	Top-1 acc 0.391 (1.163)	Top-5 acc 5.078 (4.863)	lr 0.04917
Warmup Train [4][940/3239]	Time 0.187 (0.243)	Data 0.001 (0.015)	Loss 6.1336 (6.2397)	Top-1 acc 0.391 (1.166)	Top-5 acc 4.688 (4.868)	lr 0.04917
Warmup Train [4][950/3239]	Time 0.203 (0.243)	Data 0.002 (0.015)	Loss 6.1835 (6.2396)	Top-1 acc 1.953 (1.164)	Top-5 acc 7.812 (4.873)	lr 0.04917
Warmup Train [4][960/3239]	Time 0.202 (0.243)	Data 0.001 (0.015)	Loss 6.2494 (6.2393)	Top-1 acc 1.172 (1.164)	Top-5 acc 3.125 (4.869)	lr 0.04917
Warmup Train [4][970/3239]	Time 0.245 (0.243)	Data 0.002 (0.015)	Loss 6.1258 (6.2390)	Top-1 acc 1.562 (1.164)	Top-5 acc 7.031 (4.871)	lr 0.04917
Warmup Train [4][980/3239]	Time 0.346 (0.243)	Data 0.001 (0.014)	Loss 6.2333 (6.2389)	Top-1 acc 1.953 (1.168)	Top-5 acc 4.688 (4.874)	lr 0.04916
Warmup Train [4][990/3239]	Time 0.183 (0.243)	Data 0.001 (0.014)	Loss 6.1821 (6.2387)	Top-1 acc 1.953 (1.167)	Top-5 acc 6.641 (4.878)	lr 0.04916
Warmup Train [4][1000/3239]	Time 0.227 (0.243)	Data 0.001 (0.014)	Loss 6.2418 (6.2381)	Top-1 acc 0.781 (1.169)	Top-5 acc 5.469 (4.886)	lr 0.04916
Warmup Train [4][1010/3239]	Time 0.233 (0.242)	Data 0.001 (0.014)	Loss 6.2810 (6.2380)	Top-1 acc 1.172 (1.168)	Top-5 acc 4.688 (4.888)	lr 0.04916
Warmup Train [4][1020/3239]	Time 0.172 (0.242)	Data 0.001 (0.014)	Loss 6.2137 (6.2378)	Top-1 acc 1.562 (1.172)	Top-5 acc 5.078 (4.892)	lr 0.04916
Warmup Train [4][1030/3239]	Time 0.199 (0.242)	Data 0.001 (0.014)	Loss 6.2162 (6.2374)	Top-1 acc 1.562 (1.175)	Top-5 acc 5.469 (4.899)	lr 0.04916
Warmup Train [4][1040/3239]	Time 0.203 (0.242)	Data 0.001 (0.014)	Loss 6.2186 (6.2370)	Top-1 acc 1.562 (1.174)	Top-5 acc 6.641 (4.905)	lr 0.04915
Warmup Train [4][1050/3239]	Time 0.218 (0.242)	Data 0.001 (0.014)	Loss 6.2469 (6.2370)	Top-1 acc 0.391 (1.169)	Top-5 acc 5.078 (4.901)	lr 0.04915
Warmup Train [4][1060/3239]	Time 0.270 (0.242)	Data 0.001 (0.014)	Loss 6.1552 (6.2366)	Top-1 acc 0.781 (1.170)	Top-5 acc 6.250 (4.900)	lr 0.04915
Warmup Train [4][1070/3239]	Time 0.242 (0.242)	Data 0.001 (0.013)	Loss 6.1817 (6.2362)	Top-1 acc 1.562 (1.170)	Top-5 acc 8.203 (4.910)	lr 0.04915
Warmup Train [4][1080/3239]	Time 0.244 (0.241)	Data 0.001 (0.013)	Loss 6.1989 (6.2359)	Top-1 acc 0.781 (1.171)	Top-5 acc 3.516 (4.910)	lr 0.04915
Warmup Train [4][1090/3239]	Time 0.287 (0.241)	Data 0.001 (0.013)	Loss 6.1563 (6.2356)	Top-1 acc 1.172 (1.170)	Top-5 acc 5.469 (4.907)	lr 0.04915
Warmup Train [4][1100/3239]	Time 0.182 (0.241)	Data 0.001 (0.013)	Loss 6.1873 (6.2353)	Top-1 acc 1.953 (1.173)	Top-5 acc 4.688 (4.913)	lr 0.04915
Warmup Train [4][1110/3239]	Time 0.265 (0.241)	Data 0.001 (0.013)	Loss 6.2567 (6.2351)	Top-1 acc 0.391 (1.174)	Top-5 acc 2.734 (4.917)	lr 0.04914
Warmup Train [4][1120/3239]	Time 0.188 (0.241)	Data 0.001 (0.013)	Loss 6.1381 (6.2348)	Top-1 acc 0.781 (1.175)	Top-5 acc 5.078 (4.918)	lr 0.04914
Warmup Train [4][1130/3239]	Time 0.341 (0.241)	Data 0.001 (0.013)	Loss 6.1533 (6.2345)	Top-1 acc 1.953 (1.176)	Top-5 acc 6.641 (4.924)	lr 0.04914
Warmup Train [4][1140/3239]	Time 0.153 (0.241)	Data 0.001 (0.013)	Loss 6.1297 (6.2339)	Top-1 acc 2.344 (1.178)	Top-5 acc 6.641 (4.931)	lr 0.04914
Warmup Train [4][1150/3239]	Time 0.201 (0.241)	Data 0.001 (0.013)	Loss 6.1595 (6.2335)	Top-1 acc 1.562 (1.184)	Top-5 acc 5.859 (4.938)	lr 0.04914
Warmup Train [4][1160/3239]	Time 0.278 (0.241)	Data 0.001 (0.013)	Loss 6.1818 (6.2330)	Top-1 acc 1.172 (1.185)	Top-5 acc 5.469 (4.941)	lr 0.04914
Warmup Train [4][1170/3239]	Time 0.198 (0.241)	Data 0.001 (0.013)	Loss 6.2308 (6.2329)	Top-1 acc 1.172 (1.185)	Top-5 acc 3.906 (4.936)	lr 0.04913
Warmup Train [4][1180/3239]	Time 0.347 (0.241)	Data 0.001 (0.013)	Loss 6.2120 (6.2325)	Top-1 acc 0.781 (1.185)	Top-5 acc 3.906 (4.941)	lr 0.04913
Warmup Train [4][1190/3239]	Time 0.246 (0.241)	Data 0.001 (0.012)	Loss 6.2739 (6.2323)	Top-1 acc 1.953 (1.186)	Top-5 acc 4.297 (4.939)	lr 0.04913
Warmup Train [4][1200/3239]	Time 0.228 (0.241)	Data 0.001 (0.012)	Loss 6.0401 (6.2319)	Top-1 acc 1.172 (1.184)	Top-5 acc 6.641 (4.940)	lr 0.04913
Warmup Train [4][1210/3239]	Time 0.222 (0.241)	Data 0.001 (0.012)	Loss 6.3195 (6.2319)	Top-1 acc 0.781 (1.185)	Top-5 acc 4.688 (4.944)	lr 0.04913
Warmup Train [4][1220/3239]	Time 0.170 (0.241)	Data 0.001 (0.012)	Loss 6.1741 (6.2316)	Top-1 acc 1.172 (1.186)	Top-5 acc 5.859 (4.939)	lr 0.04913
Warmup Train [4][1230/3239]	Time 0.248 (0.240)	Data 0.001 (0.012)	Loss 6.1637 (6.2314)	Top-1 acc 1.172 (1.184)	Top-5 acc 5.859 (4.940)	lr 0.04912
Warmup Train [4][1240/3239]	Time 0.254 (0.240)	Data 0.001 (0.012)	Loss 6.1572 (6.2310)	Top-1 acc 0.781 (1.184)	Top-5 acc 5.469 (4.946)	lr 0.04912
Warmup Train [4][1250/3239]	Time 0.206 (0.240)	Data 0.001 (0.012)	Loss 6.1071 (6.2305)	Top-1 acc 1.953 (1.186)	Top-5 acc 5.078 (4.949)	lr 0.04912
Warmup Train [4][1260/3239]	Time 0.212 (0.240)	Data 0.001 (0.012)	Loss 6.1398 (6.2301)	Top-1 acc 1.172 (1.186)	Top-5 acc 5.469 (4.957)	lr 0.04912
Warmup Train [4][1270/3239]	Time 0.199 (0.240)	Data 0.001 (0.012)	Loss 6.1297 (6.2300)	Top-1 acc 2.344 (1.189)	Top-5 acc 7.422 (4.960)	lr 0.04912
Warmup Train [4][1280/3239]	Time 0.232 (0.240)	Data 0.001 (0.012)	Loss 6.2682 (6.2297)	Top-1 acc 0.391 (1.190)	Top-5 acc 5.078 (4.966)	lr 0.04912
Warmup Train [4][1290/3239]	Time 0.303 (0.240)	Data 0.001 (0.012)	Loss 6.1894 (6.2296)	Top-1 acc 1.562 (1.190)	Top-5 acc 4.297 (4.965)	lr 0.04911
Warmup Train [4][1300/3239]	Time 0.351 (0.240)	Data 0.001 (0.012)	Loss 6.1743 (6.2293)	Top-1 acc 0.781 (1.190)	Top-5 acc 3.906 (4.968)	lr 0.04911
Warmup Train [4][1310/3239]	Time 0.222 (0.240)	Data 0.001 (0.012)	Loss 6.1873 (6.2288)	Top-1 acc 1.172 (1.194)	Top-5 acc 4.297 (4.980)	lr 0.04911
Warmup Train [4][1320/3239]	Time 0.213 (0.240)	Data 0.001 (0.011)	Loss 6.0969 (6.2283)	Top-1 acc 2.344 (1.196)	Top-5 acc 7.031 (4.987)	lr 0.04911
Warmup Train [4][1330/3239]	Time 0.212 (0.240)	Data 0.001 (0.011)	Loss 6.1884 (6.2278)	Top-1 acc 1.172 (1.198)	Top-5 acc 3.125 (4.995)	lr 0.04911
Warmup Train [4][1340/3239]	Time 0.293 (0.240)	Data 0.001 (0.011)	Loss 6.2607 (6.2274)	Top-1 acc 0.781 (1.200)	Top-5 acc 5.078 (5.006)	lr 0.04911
Warmup Train [4][1350/3239]	Time 0.237 (0.240)	Data 0.001 (0.011)	Loss 6.1910 (6.2271)	Top-1 acc 1.172 (1.200)	Top-5 acc 4.688 (5.009)	lr 0.04911
Warmup Train [4][1360/3239]	Time 0.241 (0.239)	Data 0.002 (0.011)	Loss 6.1700 (6.2267)	Top-1 acc 0.391 (1.201)	Top-5 acc 5.078 (5.013)	lr 0.04910
Warmup Train [4][1370/3239]	Time 0.194 (0.239)	Data 0.001 (0.011)	Loss 6.1755 (6.2262)	Top-1 acc 1.172 (1.204)	Top-5 acc 5.469 (5.018)	lr 0.04910
Warmup Train [4][1380/3239]	Time 0.247 (0.239)	Data 0.001 (0.011)	Loss 6.2543 (6.2260)	Top-1 acc 0.781 (1.204)	Top-5 acc 4.297 (5.024)	lr 0.04910
Warmup Train [4][1390/3239]	Time 0.230 (0.239)	Data 0.001 (0.011)	Loss 6.2471 (6.2257)	Top-1 acc 1.172 (1.202)	Top-5 acc 5.859 (5.026)	lr 0.04910
Warmup Train [4][1400/3239]	Time 0.176 (0.239)	Data 0.002 (0.011)	Loss 6.2898 (6.2256)	Top-1 acc 0.391 (1.203)	Top-5 acc 3.516 (5.031)	lr 0.04910
Warmup Train [4][1410/3239]	Time 0.410 (0.239)	Data 0.002 (0.011)	Loss 6.2386 (6.2253)	Top-1 acc 0.781 (1.205)	Top-5 acc 5.078 (5.031)	lr 0.04910
Warmup Train [4][1420/3239]	Time 0.199 (0.239)	Data 0.001 (0.011)	Loss 6.2031 (6.2252)	Top-1 acc 1.562 (1.205)	Top-5 acc 6.250 (5.039)	lr 0.04909
Warmup Train [4][1430/3239]	Time 0.244 (0.239)	Data 0.001 (0.011)	Loss 6.1966 (6.2247)	Top-1 acc 1.172 (1.206)	Top-5 acc 5.078 (5.046)	lr 0.04909
Warmup Train [4][1440/3239]	Time 0.232 (0.239)	Data 0.001 (0.011)	Loss 6.1311 (6.2244)	Top-1 acc 1.172 (1.207)	Top-5 acc 5.859 (5.055)	lr 0.04909
Warmup Train [4][1450/3239]	Time 0.209 (0.239)	Data 0.001 (0.011)	Loss 6.1408 (6.2240)	Top-1 acc 0.781 (1.209)	Top-5 acc 5.078 (5.059)	lr 0.04909
Warmup Train [4][1460/3239]	Time 0.194 (0.239)	Data 0.001 (0.011)	Loss 6.1555 (6.2236)	Top-1 acc 1.172 (1.211)	Top-5 acc 8.203 (5.069)	lr 0.04909
Warmup Train [4][1470/3239]	Time 0.281 (0.239)	Data 0.001 (0.011)	Loss 6.1342 (6.2231)	Top-1 acc 0.391 (1.211)	Top-5 acc 4.297 (5.075)	lr 0.04909
Warmup Train [4][1480/3239]	Time 0.210 (0.239)	Data 0.033 (0.010)	Loss 6.2243 (6.2230)	Top-1 acc 2.734 (1.211)	Top-5 acc 6.250 (5.074)	lr 0.04908
Warmup Train [4][1490/3239]	Time 0.198 (0.238)	Data 0.001 (0.010)	Loss 6.2189 (6.2229)	Top-1 acc 1.562 (1.214)	Top-5 acc 4.688 (5.076)	lr 0.04908
Warmup Train [4][1500/3239]	Time 0.213 (0.239)	Data 0.001 (0.010)	Loss 6.0880 (6.2226)	Top-1 acc 1.562 (1.216)	Top-5 acc 8.984 (5.080)	lr 0.04908
Warmup Train [4][1510/3239]	Time 0.298 (0.239)	Data 0.001 (0.010)	Loss 6.0753 (6.2223)	Top-1 acc 2.734 (1.216)	Top-5 acc 6.641 (5.078)	lr 0.04908
Warmup Train [4][1520/3239]	Time 0.237 (0.238)	Data 0.001 (0.010)	Loss 6.2501 (6.2221)	Top-1 acc 1.172 (1.218)	Top-5 acc 7.422 (5.079)	lr 0.04908
Warmup Train [4][1530/3239]	Time 0.223 (0.238)	Data 0.001 (0.010)	Loss 6.1724 (6.2217)	Top-1 acc 3.516 (1.222)	Top-5 acc 9.375 (5.088)	lr 0.04908
Warmup Train [4][1540/3239]	Time 0.159 (0.238)	Data 0.001 (0.010)	Loss 6.1863 (6.2214)	Top-1 acc 0.781 (1.227)	Top-5 acc 5.859 (5.098)	lr 0.04907
Warmup Train [4][1550/3239]	Time 0.237 (0.238)	Data 0.001 (0.010)	Loss 6.1438 (6.2212)	Top-1 acc 1.562 (1.229)	Top-5 acc 6.641 (5.101)	lr 0.04907
Warmup Train [4][1560/3239]	Time 0.190 (0.238)	Data 0.001 (0.010)	Loss 6.1313 (6.2207)	Top-1 acc 1.953 (1.231)	Top-5 acc 5.859 (5.105)	lr 0.04907
Warmup Train [4][1570/3239]	Time 0.187 (0.238)	Data 0.001 (0.010)	Loss 6.1048 (6.2203)	Top-1 acc 1.953 (1.232)	Top-5 acc 7.422 (5.110)	lr 0.04907
Warmup Train [4][1580/3239]	Time 0.251 (0.238)	Data 0.001 (0.010)	Loss 6.1205 (6.2200)	Top-1 acc 0.781 (1.233)	Top-5 acc 7.422 (5.115)	lr 0.04907
Warmup Train [4][1590/3239]	Time 0.204 (0.238)	Data 0.001 (0.010)	Loss 6.2144 (6.2198)	Top-1 acc 0.781 (1.232)	Top-5 acc 5.469 (5.115)	lr 0.04907
Warmup Train [4][1600/3239]	Time 0.179 (0.238)	Data 0.001 (0.010)	Loss 6.1758 (6.2195)	Top-1 acc 1.172 (1.233)	Top-5 acc 5.078 (5.123)	lr 0.04906
Warmup Train [4][1610/3239]	Time 0.358 (0.238)	Data 0.001 (0.010)	Loss 6.1090 (6.2191)	Top-1 acc 1.172 (1.234)	Top-5 acc 5.859 (5.126)	lr 0.04906
Warmup Train [4][1620/3239]	Time 0.188 (0.238)	Data 0.001 (0.010)	Loss 6.1567 (6.2188)	Top-1 acc 2.344 (1.234)	Top-5 acc 5.469 (5.129)	lr 0.04906
Warmup Train [4][1630/3239]	Time 0.189 (0.238)	Data 0.001 (0.010)	Loss 6.1096 (6.2185)	Top-1 acc 0.391 (1.235)	Top-5 acc 3.516 (5.129)	lr 0.04906
Warmup Train [4][1640/3239]	Time 0.210 (0.238)	Data 0.001 (0.010)	Loss 6.1583 (6.2183)	Top-1 acc 0.391 (1.234)	Top-5 acc 3.906 (5.131)	lr 0.04906
Warmup Train [4][1650/3239]	Time 0.255 (0.238)	Data 0.001 (0.010)	Loss 6.1869 (6.2180)	Top-1 acc 1.172 (1.236)	Top-5 acc 6.641 (5.138)	lr 0.04906
Warmup Train [4][1660/3239]	Time 0.208 (0.238)	Data 0.002 (0.010)	Loss 6.1045 (6.2176)	Top-1 acc 2.734 (1.237)	Top-5 acc 6.641 (5.142)	lr 0.04905
Warmup Train [4][1670/3239]	Time 0.240 (0.238)	Data 0.001 (0.010)	Loss 6.1727 (6.2173)	Top-1 acc 1.562 (1.238)	Top-5 acc 4.688 (5.147)	lr 0.04905
Warmup Train [4][1680/3239]	Time 0.292 (0.238)	Data 0.001 (0.010)	Loss 6.1587 (6.2170)	Top-1 acc 0.391 (1.240)	Top-5 acc 4.297 (5.151)	lr 0.04905
Warmup Train [4][1690/3239]	Time 0.242 (0.238)	Data 0.001 (0.010)	Loss 6.0919 (6.2166)	Top-1 acc 1.562 (1.240)	Top-5 acc 6.250 (5.152)	lr 0.04905
Warmup Train [4][1700/3239]	Time 0.233 (0.238)	Data 0.001 (0.009)	Loss 6.1875 (6.2163)	Top-1 acc 1.562 (1.243)	Top-5 acc 6.250 (5.159)	lr 0.04905
Warmup Train [4][1710/3239]	Time 0.347 (0.238)	Data 0.001 (0.009)	Loss 6.2459 (6.2161)	Top-1 acc 1.172 (1.243)	Top-5 acc 5.859 (5.164)	lr 0.04905
Warmup Train [4][1720/3239]	Time 0.271 (0.238)	Data 0.001 (0.009)	Loss 6.1179 (6.2157)	Top-1 acc 0.391 (1.243)	Top-5 acc 5.078 (5.169)	lr 0.04904
Warmup Train [4][1730/3239]	Time 0.218 (0.238)	Data 0.001 (0.009)	Loss 6.1170 (6.2153)	Top-1 acc 1.562 (1.246)	Top-5 acc 5.859 (5.178)	lr 0.04904
Warmup Train [4][1740/3239]	Time 0.226 (0.237)	Data 0.001 (0.009)	Loss 6.2376 (6.2151)	Top-1 acc 0.781 (1.247)	Top-5 acc 3.906 (5.181)	lr 0.04904
Warmup Train [4][1750/3239]	Time 0.218 (0.237)	Data 0.001 (0.009)	Loss 6.1628 (6.2147)	Top-1 acc 0.781 (1.249)	Top-5 acc 8.984 (5.194)	lr 0.04904
Warmup Train [4][1760/3239]	Time 0.185 (0.237)	Data 0.002 (0.009)	Loss 6.1531 (6.2143)	Top-1 acc 1.953 (1.251)	Top-5 acc 7.031 (5.200)	lr 0.04904
Warmup Train [4][1770/3239]	Time 0.189 (0.237)	Data 0.001 (0.009)	Loss 6.1247 (6.2140)	Top-1 acc 1.953 (1.253)	Top-5 acc 6.641 (5.204)	lr 0.04904
Warmup Train [4][1780/3239]	Time 0.274 (0.237)	Data 0.003 (0.009)	Loss 6.1394 (6.2135)	Top-1 acc 2.344 (1.256)	Top-5 acc 7.031 (5.211)	lr 0.04903
Warmup Train [4][1790/3239]	Time 0.247 (0.237)	Data 0.002 (0.009)	Loss 6.2343 (6.2132)	Top-1 acc 0.781 (1.258)	Top-5 acc 3.125 (5.215)	lr 0.04903
Warmup Train [4][1800/3239]	Time 0.237 (0.237)	Data 0.001 (0.009)	Loss 6.1396 (6.2129)	Top-1 acc 1.953 (1.260)	Top-5 acc 5.078 (5.220)	lr 0.04903
Warmup Train [4][1810/3239]	Time 0.283 (0.237)	Data 0.001 (0.009)	Loss 6.1221 (6.2126)	Top-1 acc 1.562 (1.262)	Top-5 acc 5.859 (5.227)	lr 0.04903
Warmup Train [4][1820/3239]	Time 0.327 (0.237)	Data 0.001 (0.009)	Loss 6.1261 (6.2122)	Top-1 acc 0.391 (1.264)	Top-5 acc 6.250 (5.232)	lr 0.04903
Warmup Train [4][1830/3239]	Time 0.214 (0.237)	Data 0.001 (0.009)	Loss 6.0670 (6.2119)	Top-1 acc 3.125 (1.265)	Top-5 acc 7.422 (5.235)	lr 0.04903
Warmup Train [4][1840/3239]	Time 0.291 (0.237)	Data 0.001 (0.009)	Loss 6.1503 (6.2115)	Top-1 acc 2.344 (1.267)	Top-5 acc 7.422 (5.240)	lr 0.04902
Warmup Train [4][1850/3239]	Time 0.238 (0.237)	Data 0.001 (0.009)	Loss 6.1387 (6.2112)	Top-1 acc 0.781 (1.269)	Top-5 acc 6.641 (5.244)	lr 0.04902
Warmup Train [4][1860/3239]	Time 0.299 (0.237)	Data 0.001 (0.009)	Loss 6.2562 (6.2109)	Top-1 acc 2.734 (1.271)	Top-5 acc 6.641 (5.249)	lr 0.04902
Warmup Train [4][1870/3239]	Time 0.229 (0.237)	Data 0.001 (0.009)	Loss 6.1813 (6.2104)	Top-1 acc 1.562 (1.273)	Top-5 acc 5.469 (5.257)	lr 0.04902
Warmup Train [4][1880/3239]	Time 0.238 (0.237)	Data 0.001 (0.009)	Loss 6.1290 (6.2100)	Top-1 acc 1.562 (1.274)	Top-5 acc 5.078 (5.263)	lr 0.04902
Warmup Train [4][1890/3239]	Time 0.236 (0.237)	Data 0.002 (0.009)	Loss 6.1429 (6.2098)	Top-1 acc 1.953 (1.275)	Top-5 acc 4.688 (5.265)	lr 0.04902
Warmup Train [4][1900/3239]	Time 0.204 (0.237)	Data 0.001 (0.009)	Loss 6.1316 (6.2094)	Top-1 acc 2.734 (1.277)	Top-5 acc 5.469 (5.269)	lr 0.04901
Warmup Train [4][1910/3239]	Time 0.195 (0.237)	Data 0.002 (0.009)	Loss 6.1780 (6.2091)	Top-1 acc 0.391 (1.280)	Top-5 acc 3.516 (5.271)	lr 0.04901
Warmup Train [4][1920/3239]	Time 0.360 (0.237)	Data 0.001 (0.009)	Loss 6.2103 (6.2088)	Top-1 acc 0.391 (1.281)	Top-5 acc 3.906 (5.274)	lr 0.04901
Warmup Train [4][1930/3239]	Time 0.182 (0.237)	Data 0.001 (0.009)	Loss 6.2379 (6.2085)	Top-1 acc 1.953 (1.285)	Top-5 acc 4.688 (5.280)	lr 0.04901
Warmup Train [4][1940/3239]	Time 0.232 (0.237)	Data 0.001 (0.009)	Loss 6.1225 (6.2081)	Top-1 acc 2.344 (1.288)	Top-5 acc 5.859 (5.287)	lr 0.04901
Warmup Train [4][1950/3239]	Time 0.179 (0.237)	Data 0.001 (0.009)	Loss 6.2033 (6.2078)	Top-1 acc 1.172 (1.291)	Top-5 acc 5.859 (5.293)	lr 0.04901
Warmup Train [4][1960/3239]	Time 0.136 (0.237)	Data 0.001 (0.009)	Loss 6.1861 (6.2074)	Top-1 acc 1.953 (1.294)	Top-5 acc 5.859 (5.299)	lr 0.04900
Warmup Train [4][1970/3239]	Time 0.285 (0.237)	Data 0.001 (0.008)	Loss 6.1077 (6.2071)	Top-1 acc 1.562 (1.296)	Top-5 acc 8.594 (5.309)	lr 0.04900
Warmup Train [4][1980/3239]	Time 0.217 (0.237)	Data 0.001 (0.008)	Loss 6.0822 (6.2068)	Top-1 acc 1.562 (1.297)	Top-5 acc 7.422 (5.316)	lr 0.04900
Warmup Train [4][1990/3239]	Time 0.146 (0.237)	Data 0.002 (0.008)	Loss 6.0467 (6.2063)	Top-1 acc 1.172 (1.298)	Top-5 acc 6.641 (5.322)	lr 0.04900
Warmup Train [4][2000/3239]	Time 0.266 (0.236)	Data 0.001 (0.008)	Loss 6.2453 (6.2060)	Top-1 acc 0.781 (1.298)	Top-5 acc 3.516 (5.324)	lr 0.04900
Warmup Train [4][2010/3239]	Time 0.153 (0.236)	Data 0.001 (0.008)	Loss 6.0245 (6.2055)	Top-1 acc 1.562 (1.299)	Top-5 acc 5.859 (5.330)	lr 0.04900
Warmup Train [4][2020/3239]	Time 0.194 (0.236)	Data 0.001 (0.008)	Loss 6.1792 (6.2053)	Top-1 acc 3.125 (1.301)	Top-5 acc 7.422 (5.338)	lr 0.04899
Warmup Train [4][2030/3239]	Time 0.313 (0.236)	Data 0.002 (0.008)	Loss 6.1386 (6.2051)	Top-1 acc 3.516 (1.303)	Top-5 acc 8.594 (5.342)	lr 0.04899
Warmup Train [4][2040/3239]	Time 0.231 (0.236)	Data 0.001 (0.008)	Loss 6.1360 (6.2049)	Top-1 acc 0.391 (1.305)	Top-5 acc 5.078 (5.346)	lr 0.04899
Warmup Train [4][2050/3239]	Time 0.249 (0.236)	Data 0.001 (0.008)	Loss 6.2112 (6.2046)	Top-1 acc 1.172 (1.304)	Top-5 acc 4.688 (5.347)	lr 0.04899
Warmup Train [4][2060/3239]	Time 0.248 (0.236)	Data 0.001 (0.008)	Loss 6.0806 (6.2042)	Top-1 acc 1.172 (1.304)	Top-5 acc 7.031 (5.351)	lr 0.04899
Warmup Train [4][2070/3239]	Time 0.210 (0.236)	Data 0.002 (0.008)	Loss 6.1325 (6.2039)	Top-1 acc 2.344 (1.307)	Top-5 acc 7.031 (5.360)	lr 0.04899
Warmup Train [4][2080/3239]	Time 0.150 (0.236)	Data 0.001 (0.008)	Loss 6.0631 (6.2035)	Top-1 acc 1.562 (1.308)	Top-5 acc 7.422 (5.368)	lr 0.04898
Warmup Train [4][2090/3239]	Time 0.225 (0.236)	Data 0.001 (0.008)	Loss 6.2107 (6.2032)	Top-1 acc 1.172 (1.309)	Top-5 acc 6.250 (5.370)	lr 0.04898
Warmup Train [4][2100/3239]	Time 0.210 (0.236)	Data 0.001 (0.008)	Loss 6.3325 (6.2030)	Top-1 acc 1.172 (1.311)	Top-5 acc 6.250 (5.377)	lr 0.04898
Warmup Train [4][2110/3239]	Time 0.264 (0.236)	Data 0.001 (0.008)	Loss 6.1405 (6.2027)	Top-1 acc 2.344 (1.311)	Top-5 acc 7.031 (5.382)	lr 0.04898
Warmup Train [4][2120/3239]	Time 0.224 (0.236)	Data 0.001 (0.008)	Loss 6.1098 (6.2023)	Top-1 acc 0.391 (1.312)	Top-5 acc 2.734 (5.384)	lr 0.04898
Warmup Train [4][2130/3239]	Time 0.179 (0.236)	Data 0.001 (0.008)	Loss 6.1610 (6.2019)	Top-1 acc 1.172 (1.313)	Top-5 acc 5.078 (5.386)	lr 0.04898
Warmup Train [4][2140/3239]	Time 0.345 (0.236)	Data 0.001 (0.008)	Loss 6.1886 (6.2018)	Top-1 acc 2.344 (1.315)	Top-5 acc 6.250 (5.389)	lr 0.04897
Warmup Train [4][2150/3239]	Time 0.196 (0.236)	Data 0.001 (0.008)	Loss 6.1590 (6.2015)	Top-1 acc 2.344 (1.319)	Top-5 acc 8.984 (5.395)	lr 0.04897
Warmup Train [4][2160/3239]	Time 0.181 (0.236)	Data 0.001 (0.008)	Loss 6.1876 (6.2012)	Top-1 acc 1.172 (1.321)	Top-5 acc 7.031 (5.403)	lr 0.04897
Warmup Train [4][2170/3239]	Time 0.232 (0.236)	Data 0.001 (0.008)	Loss 6.1342 (6.2007)	Top-1 acc 1.953 (1.321)	Top-5 acc 5.078 (5.407)	lr 0.04897
Warmup Train [4][2180/3239]	Time 0.285 (0.236)	Data 0.002 (0.008)	Loss 6.0499 (6.2004)	Top-1 acc 2.734 (1.323)	Top-5 acc 5.859 (5.409)	lr 0.04897
Warmup Train [4][2190/3239]	Time 0.210 (0.236)	Data 0.001 (0.008)	Loss 6.1307 (6.2001)	Top-1 acc 2.734 (1.325)	Top-5 acc 8.594 (5.413)	lr 0.04897
Warmup Train [4][2200/3239]	Time 0.241 (0.236)	Data 0.001 (0.008)	Loss 6.0932 (6.1997)	Top-1 acc 2.734 (1.327)	Top-5 acc 7.031 (5.420)	lr 0.04896
Warmup Train [4][2210/3239]	Time 0.271 (0.236)	Data 0.001 (0.008)	Loss 6.1514 (6.1994)	Top-1 acc 0.781 (1.329)	Top-5 acc 4.297 (5.428)	lr 0.04896
Warmup Train [4][2220/3239]	Time 0.151 (0.236)	Data 0.001 (0.008)	Loss 6.1209 (6.1991)	Top-1 acc 1.953 (1.331)	Top-5 acc 7.422 (5.431)	lr 0.04896
Warmup Train [4][2230/3239]	Time 0.200 (0.236)	Data 0.001 (0.008)	Loss 6.1058 (6.1989)	Top-1 acc 1.953 (1.331)	Top-5 acc 7.031 (5.437)	lr 0.04896
Warmup Train [4][2240/3239]	Time 0.341 (0.236)	Data 0.001 (0.008)	Loss 6.0972 (6.1986)	Top-1 acc 3.906 (1.333)	Top-5 acc 8.984 (5.439)	lr 0.04896
Warmup Train [4][2250/3239]	Time 0.218 (0.236)	Data 0.001 (0.008)	Loss 5.9677 (6.1984)	Top-1 acc 1.172 (1.335)	Top-5 acc 7.422 (5.442)	lr 0.04895
Warmup Train [4][2260/3239]	Time 0.198 (0.236)	Data 0.001 (0.008)	Loss 6.1155 (6.1979)	Top-1 acc 3.516 (1.337)	Top-5 acc 8.594 (5.452)	lr 0.04895
Warmup Train [4][2270/3239]	Time 0.307 (0.236)	Data 0.001 (0.008)	Loss 6.0662 (6.1975)	Top-1 acc 0.781 (1.339)	Top-5 acc 5.859 (5.458)	lr 0.04895
Warmup Train [4][2280/3239]	Time 0.162 (0.236)	Data 0.001 (0.008)	Loss 6.1364 (6.1973)	Top-1 acc 1.172 (1.341)	Top-5 acc 5.859 (5.460)	lr 0.04895
Warmup Train [4][2290/3239]	Time 0.214 (0.236)	Data 0.001 (0.008)	Loss 6.1046 (6.1969)	Top-1 acc 1.953 (1.342)	Top-5 acc 5.859 (5.465)	lr 0.04895
Warmup Train [4][2300/3239]	Time 0.243 (0.236)	Data 0.001 (0.008)	Loss 6.0250 (6.1965)	Top-1 acc 1.562 (1.342)	Top-5 acc 7.031 (5.470)	lr 0.04895
Warmup Train [4][2310/3239]	Time 0.237 (0.236)	Data 0.030 (0.008)	Loss 6.1855 (6.1961)	Top-1 acc 0.000 (1.343)	Top-5 acc 5.469 (5.475)	lr 0.04894
Warmup Train [4][2320/3239]	Time 0.319 (0.236)	Data 0.001 (0.008)	Loss 6.1277 (6.1958)	Top-1 acc 1.172 (1.344)	Top-5 acc 6.641 (5.480)	lr 0.04894
Warmup Train [4][2330/3239]	Time 0.247 (0.236)	Data 0.002 (0.008)	Loss 6.1103 (6.1954)	Top-1 acc 2.734 (1.345)	Top-5 acc 8.594 (5.485)	lr 0.04894
Warmup Train [4][2340/3239]	Time 0.313 (0.236)	Data 0.001 (0.008)	Loss 6.2016 (6.1952)	Top-1 acc 1.953 (1.348)	Top-5 acc 5.469 (5.488)	lr 0.04894
Warmup Train [4][2350/3239]	Time 0.221 (0.236)	Data 0.001 (0.008)	Loss 6.0591 (6.1949)	Top-1 acc 1.562 (1.349)	Top-5 acc 6.641 (5.492)	lr 0.04894
Warmup Train [4][2360/3239]	Time 0.250 (0.236)	Data 0.002 (0.008)	Loss 6.1039 (6.1946)	Top-1 acc 0.391 (1.348)	Top-5 acc 3.125 (5.494)	lr 0.04894
Warmup Train [4][2370/3239]	Time 0.183 (0.236)	Data 0.001 (0.008)	Loss 6.1098 (6.1944)	Top-1 acc 2.344 (1.349)	Top-5 acc 8.594 (5.499)	lr 0.04893
Warmup Train [4][2380/3239]	Time 0.201 (0.236)	Data 0.001 (0.008)	Loss 6.1972 (6.1941)	Top-1 acc 0.391 (1.349)	Top-5 acc 4.297 (5.499)	lr 0.04893
Warmup Train [4][2390/3239]	Time 0.249 (0.236)	Data 0.001 (0.008)	Loss 6.1315 (6.1937)	Top-1 acc 1.953 (1.352)	Top-5 acc 7.031 (5.506)	lr 0.04893
Warmup Train [4][2400/3239]	Time 0.207 (0.236)	Data 0.001 (0.008)	Loss 6.0880 (6.1934)	Top-1 acc 3.125 (1.353)	Top-5 acc 8.594 (5.510)	lr 0.04893
Warmup Train [4][2410/3239]	Time 0.191 (0.236)	Data 0.002 (0.008)	Loss 6.0885 (6.1930)	Top-1 acc 2.344 (1.353)	Top-5 acc 5.469 (5.513)	lr 0.04893
Warmup Train [4][2420/3239]	Time 0.207 (0.236)	Data 0.001 (0.008)	Loss 6.0782 (6.1927)	Top-1 acc 2.344 (1.355)	Top-5 acc 9.375 (5.518)	lr 0.04893
Warmup Train [4][2430/3239]	Time 0.271 (0.236)	Data 0.002 (0.007)	Loss 6.0436 (6.1923)	Top-1 acc 2.734 (1.357)	Top-5 acc 7.031 (5.523)	lr 0.04892
Warmup Train [4][2440/3239]	Time 0.365 (0.236)	Data 0.002 (0.007)	Loss 6.0958 (6.1921)	Top-1 acc 1.172 (1.357)	Top-5 acc 7.422 (5.527)	lr 0.04892
Warmup Train [4][2450/3239]	Time 0.258 (0.236)	Data 0.002 (0.007)	Loss 6.0972 (6.1917)	Top-1 acc 1.172 (1.357)	Top-5 acc 7.422 (5.529)	lr 0.04892
Warmup Train [4][2460/3239]	Time 0.195 (0.236)	Data 0.001 (0.007)	Loss 6.1253 (6.1914)	Top-1 acc 1.562 (1.359)	Top-5 acc 8.594 (5.537)	lr 0.04892
Warmup Train [4][2470/3239]	Time 0.170 (0.236)	Data 0.001 (0.007)	Loss 6.1240 (6.1911)	Top-1 acc 1.562 (1.361)	Top-5 acc 5.078 (5.542)	lr 0.04892
Warmup Train [4][2480/3239]	Time 0.211 (0.236)	Data 0.001 (0.007)	Loss 6.1575 (6.1908)	Top-1 acc 1.172 (1.361)	Top-5 acc 4.688 (5.545)	lr 0.04891
Warmup Train [4][2490/3239]	Time 0.253 (0.236)	Data 0.001 (0.007)	Loss 6.0844 (6.1903)	Top-1 acc 0.391 (1.363)	Top-5 acc 7.812 (5.549)	lr 0.04891
Warmup Train [4][2500/3239]	Time 0.242 (0.236)	Data 0.001 (0.007)	Loss 6.0717 (6.1901)	Top-1 acc 1.953 (1.362)	Top-5 acc 7.422 (5.551)	lr 0.04891
Warmup Train [4][2510/3239]	Time 0.203 (0.236)	Data 0.001 (0.007)	Loss 6.1067 (6.1898)	Top-1 acc 3.906 (1.366)	Top-5 acc 9.766 (5.558)	lr 0.04891
Warmup Train [4][2520/3239]	Time 0.244 (0.236)	Data 0.001 (0.007)	Loss 6.1462 (6.1895)	Top-1 acc 3.125 (1.368)	Top-5 acc 8.203 (5.564)	lr 0.04891
Warmup Train [4][2530/3239]	Time 0.230 (0.236)	Data 0.001 (0.007)	Loss 6.1191 (6.1892)	Top-1 acc 0.781 (1.368)	Top-5 acc 3.125 (5.567)	lr 0.04891
Warmup Train [4][2540/3239]	Time 0.277 (0.236)	Data 0.002 (0.007)	Loss 6.1386 (6.1890)	Top-1 acc 0.781 (1.368)	Top-5 acc 5.469 (5.567)	lr 0.04890
Warmup Train [4][2550/3239]	Time 0.221 (0.236)	Data 0.001 (0.007)	Loss 6.0529 (6.1886)	Top-1 acc 1.562 (1.369)	Top-5 acc 7.422 (5.572)	lr 0.04890
Warmup Train [4][2560/3239]	Time 0.213 (0.236)	Data 0.001 (0.007)	Loss 6.1100 (6.1882)	Top-1 acc 2.344 (1.372)	Top-5 acc 6.250 (5.577)	lr 0.04890
Warmup Train [4][2570/3239]	Time 0.155 (0.235)	Data 0.001 (0.007)	Loss 6.0578 (6.1879)	Top-1 acc 3.906 (1.373)	Top-5 acc 9.375 (5.582)	lr 0.04890
Warmup Train [4][2580/3239]	Time 0.199 (0.235)	Data 0.001 (0.007)	Loss 6.0845 (6.1875)	Top-1 acc 2.344 (1.375)	Top-5 acc 5.469 (5.586)	lr 0.04890
Warmup Train [4][2590/3239]	Time 0.129 (0.235)	Data 0.001 (0.007)	Loss 6.0475 (6.1871)	Top-1 acc 2.344 (1.377)	Top-5 acc 8.984 (5.595)	lr 0.04890
Warmup Train [4][2600/3239]	Time 0.211 (0.235)	Data 0.001 (0.007)	Loss 6.0087 (6.1867)	Top-1 acc 2.344 (1.380)	Top-5 acc 7.031 (5.600)	lr 0.04889
Warmup Train [4][2610/3239]	Time 0.196 (0.235)	Data 0.001 (0.007)	Loss 6.0869 (6.1863)	Top-1 acc 2.734 (1.380)	Top-5 acc 8.984 (5.606)	lr 0.04889
Warmup Train [4][2620/3239]	Time 0.306 (0.235)	Data 0.001 (0.007)	Loss 6.0939 (6.1860)	Top-1 acc 1.953 (1.382)	Top-5 acc 6.641 (5.611)	lr 0.04889
Warmup Train [4][2630/3239]	Time 0.136 (0.235)	Data 0.001 (0.007)	Loss 6.1883 (6.1856)	Top-1 acc 0.781 (1.383)	Top-5 acc 6.250 (5.616)	lr 0.04889
Warmup Train [4][2640/3239]	Time 0.330 (0.235)	Data 0.002 (0.007)	Loss 6.1598 (6.1854)	Top-1 acc 2.344 (1.385)	Top-5 acc 5.078 (5.621)	lr 0.04889
Warmup Train [4][2650/3239]	Time 0.218 (0.235)	Data 0.001 (0.007)	Loss 6.1485 (6.1850)	Top-1 acc 0.781 (1.386)	Top-5 acc 3.906 (5.623)	lr 0.04888
Warmup Train [4][2660/3239]	Time 0.182 (0.235)	Data 0.001 (0.007)	Loss 6.0804 (6.1849)	Top-1 acc 3.906 (1.388)	Top-5 acc 10.938 (5.627)	lr 0.04888
Warmup Train [4][2670/3239]	Time 0.226 (0.235)	Data 0.001 (0.007)	Loss 6.0958 (6.1845)	Top-1 acc 0.781 (1.388)	Top-5 acc 5.859 (5.630)	lr 0.04888
Warmup Train [4][2680/3239]	Time 0.256 (0.235)	Data 0.002 (0.007)	Loss 6.0636 (6.1841)	Top-1 acc 0.781 (1.390)	Top-5 acc 6.641 (5.636)	lr 0.04888
Warmup Train [4][2690/3239]	Time 0.250 (0.235)	Data 0.001 (0.007)	Loss 6.0206 (6.1837)	Top-1 acc 3.125 (1.393)	Top-5 acc 10.156 (5.642)	lr 0.04888
Warmup Train [4][2700/3239]	Time 0.256 (0.235)	Data 0.002 (0.007)	Loss 5.9826 (6.1833)	Top-1 acc 1.953 (1.395)	Top-5 acc 9.375 (5.647)	lr 0.04888
Warmup Train [4][2710/3239]	Time 0.235 (0.235)	Data 0.001 (0.007)	Loss 6.0735 (6.1830)	Top-1 acc 1.953 (1.397)	Top-5 acc 8.594 (5.652)	lr 0.04887
Warmup Train [4][2720/3239]	Time 0.185 (0.235)	Data 0.001 (0.007)	Loss 6.0595 (6.1826)	Top-1 acc 1.172 (1.399)	Top-5 acc 6.641 (5.657)	lr 0.04887
Warmup Train [4][2730/3239]	Time 0.194 (0.235)	Data 0.001 (0.007)	Loss 6.0680 (6.1823)	Top-1 acc 0.000 (1.399)	Top-5 acc 4.688 (5.658)	lr 0.04887
Warmup Train [4][2740/3239]	Time 0.226 (0.235)	Data 0.002 (0.007)	Loss 6.0946 (6.1819)	Top-1 acc 2.734 (1.401)	Top-5 acc 7.031 (5.662)	lr 0.04887
Warmup Train [4][2750/3239]	Time 0.223 (0.235)	Data 0.001 (0.007)	Loss 6.0373 (6.1817)	Top-1 acc 1.562 (1.402)	Top-5 acc 6.641 (5.665)	lr 0.04887
Warmup Train [4][2760/3239]	Time 0.171 (0.235)	Data 0.003 (0.007)	Loss 6.1603 (6.1814)	Top-1 acc 1.562 (1.404)	Top-5 acc 5.469 (5.669)	lr 0.04886
Warmup Train [4][2770/3239]	Time 0.202 (0.235)	Data 0.001 (0.007)	Loss 6.1321 (6.1811)	Top-1 acc 1.562 (1.403)	Top-5 acc 3.906 (5.674)	lr 0.04886
Warmup Train [4][2780/3239]	Time 0.204 (0.235)	Data 0.001 (0.007)	Loss 6.1552 (6.1809)	Top-1 acc 0.781 (1.405)	Top-5 acc 5.469 (5.678)	lr 0.04886
Warmup Train [4][2790/3239]	Time 0.216 (0.235)	Data 0.002 (0.007)	Loss 6.0111 (6.1805)	Top-1 acc 3.125 (1.408)	Top-5 acc 8.984 (5.685)	lr 0.04886
Warmup Train [4][2800/3239]	Time 0.213 (0.235)	Data 0.002 (0.007)	Loss 6.0144 (6.1803)	Top-1 acc 2.734 (1.409)	Top-5 acc 9.766 (5.685)	lr 0.04886
Warmup Train [4][2810/3239]	Time 0.182 (0.235)	Data 0.001 (0.007)	Loss 6.0548 (6.1798)	Top-1 acc 2.344 (1.412)	Top-5 acc 6.641 (5.693)	lr 0.04886
Warmup Train [4][2820/3239]	Time 0.208 (0.235)	Data 0.001 (0.007)	Loss 6.1249 (6.1795)	Top-1 acc 1.172 (1.412)	Top-5 acc 5.078 (5.695)	lr 0.04885
Warmup Train [4][2830/3239]	Time 0.350 (0.235)	Data 0.002 (0.007)	Loss 6.1680 (6.1793)	Top-1 acc 1.562 (1.412)	Top-5 acc 7.031 (5.698)	lr 0.04885
Warmup Train [4][2840/3239]	Time 0.200 (0.235)	Data 0.001 (0.007)	Loss 6.1090 (6.1790)	Top-1 acc 1.953 (1.414)	Top-5 acc 6.250 (5.704)	lr 0.04885
Warmup Train [4][2850/3239]	Time 0.348 (0.235)	Data 0.002 (0.007)	Loss 6.0625 (6.1787)	Top-1 acc 0.781 (1.415)	Top-5 acc 5.469 (5.710)	lr 0.04885
Warmup Train [4][2860/3239]	Time 0.191 (0.235)	Data 0.001 (0.007)	Loss 6.1577 (6.1784)	Top-1 acc 0.391 (1.416)	Top-5 acc 6.641 (5.713)	lr 0.04885
Warmup Train [4][2870/3239]	Time 0.172 (0.235)	Data 0.001 (0.007)	Loss 6.0767 (6.1782)	Top-1 acc 1.562 (1.418)	Top-5 acc 5.859 (5.715)	lr 0.04884
Warmup Train [4][2880/3239]	Time 0.239 (0.235)	Data 0.001 (0.007)	Loss 5.9682 (6.1778)	Top-1 acc 1.953 (1.421)	Top-5 acc 8.203 (5.723)	lr 0.04884
Warmup Train [4][2890/3239]	Time 0.226 (0.235)	Data 0.002 (0.007)	Loss 6.1390 (6.1776)	Top-1 acc 1.172 (1.423)	Top-5 acc 7.422 (5.728)	lr 0.04884
Warmup Train [4][2900/3239]	Time 0.238 (0.235)	Data 0.001 (0.007)	Loss 6.0534 (6.1772)	Top-1 acc 1.172 (1.425)	Top-5 acc 10.156 (5.734)	lr 0.04884
Warmup Train [4][2910/3239]	Time 0.163 (0.235)	Data 0.001 (0.007)	Loss 6.0284 (6.1769)	Top-1 acc 1.562 (1.426)	Top-5 acc 8.594 (5.738)	lr 0.04884
Warmup Train [4][2920/3239]	Time 0.221 (0.235)	Data 0.001 (0.007)	Loss 6.1093 (6.1766)	Top-1 acc 3.125 (1.429)	Top-5 acc 8.984 (5.743)	lr 0.04884
Warmup Train [4][2930/3239]	Time 0.266 (0.235)	Data 0.001 (0.007)	Loss 6.0536 (6.1763)	Top-1 acc 0.781 (1.429)	Top-5 acc 7.031 (5.747)	lr 0.04883
Warmup Train [4][2940/3239]	Time 0.166 (0.234)	Data 0.001 (0.007)	Loss 6.1324 (6.1761)	Top-1 acc 1.953 (1.430)	Top-5 acc 5.078 (5.750)	lr 0.04883
Warmup Train [4][2950/3239]	Time 0.199 (0.234)	Data 0.001 (0.007)	Loss 6.1757 (6.1758)	Top-1 acc 1.953 (1.431)	Top-5 acc 7.422 (5.755)	lr 0.04883
Warmup Train [4][2960/3239]	Time 0.206 (0.234)	Data 0.001 (0.007)	Loss 6.1810 (6.1755)	Top-1 acc 1.562 (1.433)	Top-5 acc 4.297 (5.759)	lr 0.04883
Warmup Train [4][2970/3239]	Time 0.358 (0.235)	Data 0.001 (0.007)	Loss 6.0594 (6.1752)	Top-1 acc 2.344 (1.437)	Top-5 acc 8.594 (5.767)	lr 0.04883
Warmup Train [4][2980/3239]	Time 0.200 (0.235)	Data 0.001 (0.007)	Loss 6.1467 (6.1749)	Top-1 acc 1.562 (1.437)	Top-5 acc 8.203 (5.770)	lr 0.04882
Warmup Train [4][2990/3239]	Time 0.246 (0.235)	Data 0.001 (0.007)	Loss 6.0679 (6.1746)	Top-1 acc 1.172 (1.438)	Top-5 acc 7.031 (5.774)	lr 0.04882
Warmup Train [4][3000/3239]	Time 0.161 (0.234)	Data 0.001 (0.007)	Loss 6.0422 (6.1743)	Top-1 acc 1.562 (1.438)	Top-5 acc 6.641 (5.779)	lr 0.04882
Warmup Train [4][3010/3239]	Time 0.260 (0.234)	Data 0.001 (0.007)	Loss 6.0594 (6.1740)	Top-1 acc 1.953 (1.441)	Top-5 acc 7.812 (5.784)	lr 0.04882
Warmup Train [4][3020/3239]	Time 0.212 (0.234)	Data 0.001 (0.007)	Loss 6.2012 (6.1736)	Top-1 acc 1.953 (1.442)	Top-5 acc 7.422 (5.789)	lr 0.04882
Warmup Train [4][3030/3239]	Time 0.233 (0.234)	Data 0.001 (0.007)	Loss 6.0387 (6.1733)	Top-1 acc 3.906 (1.443)	Top-5 acc 8.984 (5.793)	lr 0.04882
Warmup Train [4][3040/3239]	Time 0.203 (0.234)	Data 0.002 (0.007)	Loss 6.0507 (6.1730)	Top-1 acc 1.172 (1.443)	Top-5 acc 7.031 (5.797)	lr 0.04881
Warmup Train [4][3050/3239]	Time 0.194 (0.234)	Data 0.001 (0.007)	Loss 6.0379 (6.1727)	Top-1 acc 1.172 (1.445)	Top-5 acc 8.594 (5.800)	lr 0.04881
Warmup Train [4][3060/3239]	Time 0.381 (0.234)	Data 0.001 (0.007)	Loss 6.0949 (6.1725)	Top-1 acc 0.781 (1.445)	Top-5 acc 4.688 (5.801)	lr 0.04881
Warmup Train [4][3070/3239]	Time 0.166 (0.234)	Data 0.001 (0.007)	Loss 6.0874 (6.1721)	Top-1 acc 1.172 (1.445)	Top-5 acc 8.594 (5.806)	lr 0.04881
Warmup Train [4][3080/3239]	Time 0.173 (0.234)	Data 0.001 (0.007)	Loss 6.0747 (6.1718)	Top-1 acc 2.734 (1.447)	Top-5 acc 5.859 (5.808)	lr 0.04881
Warmup Train [4][3090/3239]	Time 0.228 (0.234)	Data 0.001 (0.007)	Loss 6.1737 (6.1715)	Top-1 acc 3.125 (1.450)	Top-5 acc 7.031 (5.815)	lr 0.04880
Warmup Train [4][3100/3239]	Time 0.191 (0.234)	Data 0.002 (0.006)	Loss 6.0633 (6.1711)	Top-1 acc 0.781 (1.452)	Top-5 acc 6.641 (5.823)	lr 0.04880
Warmup Train [4][3110/3239]	Time 0.218 (0.234)	Data 0.001 (0.006)	Loss 6.0674 (6.1707)	Top-1 acc 1.562 (1.454)	Top-5 acc 5.469 (5.827)	lr 0.04880
Warmup Train [4][3120/3239]	Time 0.184 (0.234)	Data 0.002 (0.006)	Loss 6.1226 (6.1705)	Top-1 acc 1.172 (1.454)	Top-5 acc 4.688 (5.832)	lr 0.04880
Warmup Train [4][3130/3239]	Time 0.270 (0.234)	Data 0.002 (0.006)	Loss 6.0430 (6.1701)	Top-1 acc 2.344 (1.456)	Top-5 acc 7.812 (5.836)	lr 0.04880
Warmup Train [4][3140/3239]	Time 0.250 (0.234)	Data 0.001 (0.006)	Loss 5.9468 (6.1698)	Top-1 acc 2.344 (1.457)	Top-5 acc 10.547 (5.839)	lr 0.04880
Warmup Train [4][3150/3239]	Time 0.289 (0.234)	Data 0.001 (0.006)	Loss 6.0486 (6.1695)	Top-1 acc 1.562 (1.457)	Top-5 acc 7.422 (5.843)	lr 0.04879
Warmup Train [4][3160/3239]	Time 0.234 (0.234)	Data 0.001 (0.006)	Loss 6.0741 (6.1692)	Top-1 acc 1.562 (1.458)	Top-5 acc 7.031 (5.844)	lr 0.04879
Warmup Train [4][3170/3239]	Time 0.415 (0.234)	Data 0.001 (0.006)	Loss 6.0629 (6.1690)	Top-1 acc 1.172 (1.459)	Top-5 acc 7.031 (5.848)	lr 0.04879
Warmup Train [4][3180/3239]	Time 0.210 (0.234)	Data 0.000 (0.006)	Loss 5.9606 (6.1686)	Top-1 acc 2.344 (1.460)	Top-5 acc 8.984 (5.855)	lr 0.04879
Warmup Train [4][3190/3239]	Time 0.175 (0.234)	Data 0.000 (0.006)	Loss 6.0494 (6.1683)	Top-1 acc 1.172 (1.462)	Top-5 acc 8.984 (5.861)	lr 0.04879
Warmup Train [4][3200/3239]	Time 0.150 (0.234)	Data 0.000 (0.006)	Loss 6.0493 (6.1680)	Top-1 acc 2.734 (1.464)	Top-5 acc 8.594 (5.865)	lr 0.04878
Warmup Train [4][3210/3239]	Time 0.247 (0.234)	Data 0.000 (0.006)	Loss 6.0688 (6.1677)	Top-1 acc 1.953 (1.466)	Top-5 acc 6.250 (5.871)	lr 0.04878
Warmup Train [4][3220/3239]	Time 0.145 (0.234)	Data 0.000 (0.006)	Loss 6.1338 (6.1673)	Top-1 acc 0.781 (1.467)	Top-5 acc 7.812 (5.878)	lr 0.04878
Warmup Train [4][3230/3239]	Time 0.185 (0.234)	Data 0.000 (0.006)	Loss 6.0428 (6.1670)	Top-1 acc 1.953 (1.468)	Top-5 acc 8.594 (5.883)	lr 0.04878
Warmup Train [4][3239/3239]	Time 0.557 (0.234)	Data 0.000 (0.006)	Loss 6.0511 (6.1667)	Top-1 acc 3.704 (1.471)	Top-5 acc 7.407 (5.888)	lr 0.04878
==========Warmup Valid [4/40]	loss 5.775	top-1 acc 2.356	top-5 acc 8.522	Train top-1 1.471	top-5 5.888	flops: 442.4M
Warmup Train [5][0/3239]	Time 13.166 (13.166)	Data 11.299 (11.299)	Loss 6.0268 (6.0268)	Top-1 acc 2.344 (2.344)	Top-5 acc 9.375 (9.375)	lr 0.04878
Warmup Train [5][10/3239]	Time 0.239 (1.477)	Data 0.002 (1.062)	Loss 6.0285 (6.0771)	Top-1 acc 0.781 (2.060)	Top-5 acc 8.594 (7.599)	lr 0.04877
Warmup Train [5][20/3239]	Time 0.231 (0.906)	Data 0.001 (0.558)	Loss 5.9736 (6.0625)	Top-1 acc 0.781 (1.767)	Top-5 acc 7.422 (7.292)	lr 0.04877
Warmup Train [5][30/3239]	Time 0.166 (0.689)	Data 0.001 (0.378)	Loss 6.0699 (6.0684)	Top-1 acc 4.688 (1.815)	Top-5 acc 12.500 (7.245)	lr 0.04877
Warmup Train [5][40/3239]	Time 0.280 (0.583)	Data 0.001 (0.288)	Loss 6.0607 (6.0628)	Top-1 acc 2.344 (1.934)	Top-5 acc 9.375 (7.536)	lr 0.04877
Warmup Train [5][50/3239]	Time 0.181 (0.513)	Data 0.002 (0.232)	Loss 6.0873 (6.0631)	Top-1 acc 2.734 (1.945)	Top-5 acc 7.812 (7.506)	lr 0.04877
Warmup Train [5][60/3239]	Time 0.211 (0.466)	Data 0.001 (0.195)	Loss 5.9800 (6.0603)	Top-1 acc 1.172 (1.921)	Top-5 acc 8.984 (7.563)	lr 0.04877
Warmup Train [5][70/3239]	Time 0.391 (0.433)	Data 0.001 (0.168)	Loss 6.0983 (6.0582)	Top-1 acc 2.344 (1.953)	Top-5 acc 6.250 (7.598)	lr 0.04876
Warmup Train [5][80/3239]	Time 0.311 (0.407)	Data 0.001 (0.147)	Loss 6.0243 (6.0567)	Top-1 acc 3.125 (1.958)	Top-5 acc 9.375 (7.620)	lr 0.04876
Warmup Train [5][90/3239]	Time 0.236 (0.388)	Data 0.001 (0.131)	Loss 5.9814 (6.0561)	Top-1 acc 2.734 (1.957)	Top-5 acc 10.156 (7.568)	lr 0.04876
Warmup Train [5][100/3239]	Time 0.228 (0.374)	Data 0.001 (0.119)	Loss 6.0326 (6.0514)	Top-1 acc 2.344 (2.007)	Top-5 acc 8.594 (7.627)	lr 0.04876
Warmup Train [5][110/3239]	Time 0.211 (0.361)	Data 0.001 (0.108)	Loss 6.1694 (6.0567)	Top-1 acc 1.953 (1.964)	Top-5 acc 5.859 (7.524)	lr 0.04876
Warmup Train [5][120/3239]	Time 0.220 (0.351)	Data 0.001 (0.100)	Loss 6.0122 (6.0561)	Top-1 acc 0.781 (1.950)	Top-5 acc 8.203 (7.554)	lr 0.04875
Warmup Train [5][130/3239]	Time 0.245 (0.342)	Data 0.002 (0.092)	Loss 6.1943 (6.0604)	Top-1 acc 1.172 (1.941)	Top-5 acc 7.031 (7.458)	lr 0.04875
Warmup Train [5][140/3239]	Time 0.160 (0.333)	Data 0.001 (0.086)	Loss 6.0568 (6.0603)	Top-1 acc 1.953 (1.937)	Top-5 acc 11.719 (7.488)	lr 0.04875
Warmup Train [5][150/3239]	Time 0.211 (0.326)	Data 0.001 (0.081)	Loss 6.1109 (6.0601)	Top-1 acc 1.562 (1.940)	Top-5 acc 7.031 (7.499)	lr 0.04875
Warmup Train [5][160/3239]	Time 0.211 (0.320)	Data 0.001 (0.076)	Loss 6.0887 (6.0590)	Top-1 acc 1.172 (1.941)	Top-5 acc 6.250 (7.531)	lr 0.04875
Warmup Train [5][170/3239]	Time 0.293 (0.317)	Data 0.002 (0.072)	Loss 6.0346 (6.0587)	Top-1 acc 2.344 (1.962)	Top-5 acc 8.594 (7.557)	lr 0.04874
Warmup Train [5][180/3239]	Time 0.210 (0.312)	Data 0.001 (0.068)	Loss 6.0850 (6.0592)	Top-1 acc 1.562 (1.951)	Top-5 acc 9.375 (7.592)	lr 0.04874
Warmup Train [5][190/3239]	Time 0.190 (0.307)	Data 0.001 (0.064)	Loss 5.9882 (6.0588)	Top-1 acc 3.516 (1.965)	Top-5 acc 11.719 (7.622)	lr 0.04874
Warmup Train [5][200/3239]	Time 0.215 (0.304)	Data 0.001 (0.061)	Loss 6.0335 (6.0578)	Top-1 acc 1.953 (1.963)	Top-5 acc 8.203 (7.626)	lr 0.04874
Warmup Train [5][210/3239]	Time 0.230 (0.301)	Data 0.001 (0.059)	Loss 6.0613 (6.0582)	Top-1 acc 2.344 (1.959)	Top-5 acc 8.984 (7.618)	lr 0.04874
Warmup Train [5][220/3239]	Time 0.263 (0.298)	Data 0.001 (0.056)	Loss 6.1056 (6.0589)	Top-1 acc 1.953 (1.941)	Top-5 acc 4.297 (7.597)	lr 0.04873
Warmup Train [5][230/3239]	Time 0.210 (0.295)	Data 0.001 (0.054)	Loss 6.1011 (6.0597)	Top-1 acc 2.344 (1.963)	Top-5 acc 8.203 (7.611)	lr 0.04873
Warmup Train [5][240/3239]	Time 0.249 (0.292)	Data 0.001 (0.052)	Loss 5.9596 (6.0603)	Top-1 acc 1.953 (1.961)	Top-5 acc 7.422 (7.615)	lr 0.04873
Warmup Train [5][250/3239]	Time 0.216 (0.289)	Data 0.001 (0.050)	Loss 5.9438 (6.0592)	Top-1 acc 2.344 (1.967)	Top-5 acc 9.766 (7.612)	lr 0.04873
Warmup Train [5][260/3239]	Time 0.206 (0.287)	Data 0.001 (0.048)	Loss 6.1377 (6.0588)	Top-1 acc 1.172 (1.968)	Top-5 acc 7.422 (7.598)	lr 0.04873
Warmup Train [5][270/3239]	Time 0.302 (0.285)	Data 0.002 (0.046)	Loss 6.0811 (6.0588)	Top-1 acc 1.172 (1.963)	Top-5 acc 7.812 (7.572)	lr 0.04873
Warmup Train [5][280/3239]	Time 0.328 (0.283)	Data 0.002 (0.044)	Loss 5.9475 (6.0591)	Top-1 acc 3.906 (1.966)	Top-5 acc 10.547 (7.580)	lr 0.04872
Warmup Train [5][290/3239]	Time 0.211 (0.282)	Data 0.002 (0.043)	Loss 6.0143 (6.0584)	Top-1 acc 2.344 (1.958)	Top-5 acc 8.203 (7.599)	lr 0.04872
Warmup Train [5][300/3239]	Time 0.192 (0.280)	Data 0.002 (0.042)	Loss 6.1046 (6.0582)	Top-1 acc 1.562 (1.951)	Top-5 acc 5.859 (7.604)	lr 0.04872
Warmup Train [5][310/3239]	Time 0.185 (0.278)	Data 0.002 (0.040)	Loss 6.1358 (6.0580)	Top-1 acc 1.953 (1.963)	Top-5 acc 6.641 (7.637)	lr 0.04872
Warmup Train [5][320/3239]	Time 0.234 (0.276)	Data 0.001 (0.039)	Loss 5.9662 (6.0583)	Top-1 acc 0.781 (1.968)	Top-5 acc 11.719 (7.640)	lr 0.04872
Warmup Train [5][330/3239]	Time 0.155 (0.274)	Data 0.002 (0.038)	Loss 6.0645 (6.0586)	Top-1 acc 1.953 (1.977)	Top-5 acc 7.031 (7.650)	lr 0.04871
Warmup Train [5][340/3239]	Time 0.240 (0.272)	Data 0.001 (0.037)	Loss 6.1129 (6.0586)	Top-1 acc 2.734 (1.979)	Top-5 acc 8.984 (7.657)	lr 0.04871
Warmup Train [5][350/3239]	Time 0.179 (0.271)	Data 0.002 (0.036)	Loss 6.0328 (6.0583)	Top-1 acc 1.562 (1.982)	Top-5 acc 8.203 (7.660)	lr 0.04871
Warmup Train [5][360/3239]	Time 0.257 (0.269)	Data 0.001 (0.035)	Loss 6.1115 (6.0578)	Top-1 acc 1.562 (1.985)	Top-5 acc 7.812 (7.653)	lr 0.04871
Warmup Train [5][370/3239]	Time 0.174 (0.268)	Data 0.002 (0.035)	Loss 5.9893 (6.0569)	Top-1 acc 1.562 (1.985)	Top-5 acc 8.594 (7.669)	lr 0.04871
Warmup Train [5][380/3239]	Time 0.200 (0.267)	Data 0.001 (0.034)	Loss 6.0545 (6.0570)	Top-1 acc 1.172 (1.987)	Top-5 acc 6.250 (7.666)	lr 0.04870
Warmup Train [5][390/3239]	Time 0.254 (0.266)	Data 0.001 (0.033)	Loss 6.1183 (6.0569)	Top-1 acc 0.000 (1.973)	Top-5 acc 4.297 (7.647)	lr 0.04870
Warmup Train [5][400/3239]	Time 0.342 (0.265)	Data 0.001 (0.032)	Loss 6.0344 (6.0563)	Top-1 acc 1.953 (1.978)	Top-5 acc 7.031 (7.649)	lr 0.04870
Warmup Train [5][410/3239]	Time 0.240 (0.264)	Data 0.001 (0.031)	Loss 6.0098 (6.0556)	Top-1 acc 1.172 (1.976)	Top-5 acc 8.594 (7.637)	lr 0.04870
Warmup Train [5][420/3239]	Time 0.149 (0.263)	Data 0.001 (0.031)	Loss 6.0399 (6.0551)	Top-1 acc 2.344 (1.980)	Top-5 acc 8.594 (7.642)	lr 0.04870
Warmup Train [5][430/3239]	Time 0.195 (0.262)	Data 0.001 (0.030)	Loss 6.0850 (6.0546)	Top-1 acc 2.344 (1.986)	Top-5 acc 6.641 (7.658)	lr 0.04869
Warmup Train [5][440/3239]	Time 0.233 (0.262)	Data 0.001 (0.029)	Loss 6.1010 (6.0534)	Top-1 acc 2.344 (1.995)	Top-5 acc 7.031 (7.681)	lr 0.04869
Warmup Train [5][450/3239]	Time 0.212 (0.261)	Data 0.001 (0.029)	Loss 5.9956 (6.0533)	Top-1 acc 1.953 (1.992)	Top-5 acc 7.812 (7.677)	lr 0.04869
Warmup Train [5][460/3239]	Time 0.184 (0.260)	Data 0.001 (0.028)	Loss 6.0986 (6.0533)	Top-1 acc 1.172 (1.992)	Top-5 acc 8.594 (7.674)	lr 0.04869
Warmup Train [5][470/3239]	Time 0.287 (0.260)	Data 0.002 (0.028)	Loss 6.0632 (6.0530)	Top-1 acc 1.172 (1.998)	Top-5 acc 6.250 (7.672)	lr 0.04869
Warmup Train [5][480/3239]	Time 0.199 (0.259)	Data 0.001 (0.027)	Loss 6.0175 (6.0521)	Top-1 acc 1.172 (1.992)	Top-5 acc 8.594 (7.684)	lr 0.04868
Warmup Train [5][490/3239]	Time 0.205 (0.258)	Data 0.001 (0.027)	Loss 5.9931 (6.0516)	Top-1 acc 1.172 (1.998)	Top-5 acc 6.250 (7.679)	lr 0.04868
Warmup Train [5][500/3239]	Time 0.231 (0.257)	Data 0.026 (0.026)	Loss 6.0437 (6.0514)	Top-1 acc 3.516 (2.006)	Top-5 acc 8.594 (7.690)	lr 0.04868
Warmup Train [5][510/3239]	Time 0.283 (0.257)	Data 0.001 (0.026)	Loss 5.9795 (6.0508)	Top-1 acc 2.344 (2.017)	Top-5 acc 6.641 (7.699)	lr 0.04868
Warmup Train [5][520/3239]	Time 0.207 (0.256)	Data 0.001 (0.026)	Loss 6.0993 (6.0506)	Top-1 acc 2.344 (2.019)	Top-5 acc 8.203 (7.706)	lr 0.04868
Warmup Train [5][530/3239]	Time 0.161 (0.255)	Data 0.001 (0.025)	Loss 6.0681 (6.0507)	Top-1 acc 1.172 (2.015)	Top-5 acc 4.688 (7.698)	lr 0.04868
Warmup Train [5][540/3239]	Time 0.227 (0.255)	Data 0.001 (0.025)	Loss 5.9280 (6.0499)	Top-1 acc 2.344 (2.017)	Top-5 acc 8.203 (7.707)	lr 0.04867
Warmup Train [5][550/3239]	Time 0.244 (0.254)	Data 0.001 (0.024)	Loss 5.9721 (6.0500)	Top-1 acc 3.516 (2.023)	Top-5 acc 10.938 (7.717)	lr 0.04867
Warmup Train [5][560/3239]	Time 0.256 (0.254)	Data 0.001 (0.024)	Loss 6.0044 (6.0499)	Top-1 acc 1.953 (2.028)	Top-5 acc 6.641 (7.709)	lr 0.04867
Warmup Train [5][570/3239]	Time 0.143 (0.254)	Data 0.001 (0.024)	Loss 6.0621 (6.0492)	Top-1 acc 1.953 (2.039)	Top-5 acc 8.594 (7.724)	lr 0.04867
Warmup Train [5][580/3239]	Time 0.215 (0.254)	Data 0.001 (0.023)	Loss 5.9577 (6.0488)	Top-1 acc 1.172 (2.032)	Top-5 acc 8.203 (7.708)	lr 0.04867
Warmup Train [5][590/3239]	Time 0.259 (0.253)	Data 0.001 (0.023)	Loss 5.9550 (6.0489)	Top-1 acc 2.344 (2.032)	Top-5 acc 9.375 (7.715)	lr 0.04866
Warmup Train [5][600/3239]	Time 0.283 (0.252)	Data 0.001 (0.023)	Loss 6.0518 (6.0485)	Top-1 acc 1.172 (2.023)	Top-5 acc 7.031 (7.721)	lr 0.04866
Warmup Train [5][610/3239]	Time 0.154 (0.252)	Data 0.001 (0.022)	Loss 5.8389 (6.0477)	Top-1 acc 3.125 (2.022)	Top-5 acc 11.328 (7.742)	lr 0.04866
Warmup Train [5][620/3239]	Time 0.264 (0.252)	Data 0.001 (0.022)	Loss 6.0011 (6.0473)	Top-1 acc 1.953 (2.021)	Top-5 acc 8.594 (7.755)	lr 0.04866
Warmup Train [5][630/3239]	Time 0.203 (0.251)	Data 0.001 (0.022)	Loss 5.9157 (6.0467)	Top-1 acc 5.859 (2.032)	Top-5 acc 12.109 (7.778)	lr 0.04866
Warmup Train [5][640/3239]	Time 0.131 (0.250)	Data 0.001 (0.021)	Loss 5.9903 (6.0469)	Top-1 acc 1.172 (2.033)	Top-5 acc 5.859 (7.783)	lr 0.04865
Warmup Train [5][650/3239]	Time 0.184 (0.250)	Data 0.001 (0.021)	Loss 6.0191 (6.0467)	Top-1 acc 2.344 (2.042)	Top-5 acc 7.031 (7.786)	lr 0.04865
Warmup Train [5][660/3239]	Time 0.222 (0.250)	Data 0.001 (0.021)	Loss 6.0296 (6.0468)	Top-1 acc 3.125 (2.041)	Top-5 acc 7.812 (7.771)	lr 0.04865
Warmup Train [5][670/3239]	Time 0.265 (0.249)	Data 0.001 (0.021)	Loss 5.9688 (6.0460)	Top-1 acc 1.953 (2.046)	Top-5 acc 8.984 (7.787)	lr 0.04865
Warmup Train [5][680/3239]	Time 0.211 (0.249)	Data 0.001 (0.020)	Loss 6.1304 (6.0460)	Top-1 acc 0.391 (2.042)	Top-5 acc 5.078 (7.779)	lr 0.04865
Warmup Train [5][690/3239]	Time 0.202 (0.248)	Data 0.001 (0.020)	Loss 5.9076 (6.0455)	Top-1 acc 3.125 (2.046)	Top-5 acc 11.719 (7.780)	lr 0.04864
Warmup Train [5][700/3239]	Time 0.199 (0.248)	Data 0.001 (0.020)	Loss 5.9740 (6.0450)	Top-1 acc 2.734 (2.047)	Top-5 acc 10.938 (7.790)	lr 0.04864
Warmup Train [5][710/3239]	Time 0.197 (0.247)	Data 0.001 (0.020)	Loss 5.9818 (6.0445)	Top-1 acc 1.562 (2.048)	Top-5 acc 9.375 (7.803)	lr 0.04864
Warmup Train [5][720/3239]	Time 0.240 (0.247)	Data 0.001 (0.019)	Loss 6.0472 (6.0443)	Top-1 acc 3.906 (2.047)	Top-5 acc 7.422 (7.794)	lr 0.04864
Warmup Train [5][730/3239]	Time 0.290 (0.247)	Data 0.001 (0.019)	Loss 6.0297 (6.0436)	Top-1 acc 1.953 (2.055)	Top-5 acc 7.812 (7.810)	lr 0.04864
Warmup Train [5][740/3239]	Time 0.214 (0.246)	Data 0.001 (0.019)	Loss 6.0395 (6.0432)	Top-1 acc 1.953 (2.061)	Top-5 acc 5.469 (7.814)	lr 0.04863
Warmup Train [5][750/3239]	Time 0.232 (0.246)	Data 0.002 (0.019)	Loss 6.0003 (6.0428)	Top-1 acc 1.562 (2.066)	Top-5 acc 8.594 (7.835)	lr 0.04863
Warmup Train [5][760/3239]	Time 0.191 (0.246)	Data 0.001 (0.018)	Loss 6.0646 (6.0430)	Top-1 acc 1.562 (2.069)	Top-5 acc 9.375 (7.844)	lr 0.04863
Warmup Train [5][770/3239]	Time 0.242 (0.246)	Data 0.001 (0.018)	Loss 6.0003 (6.0429)	Top-1 acc 1.953 (2.068)	Top-5 acc 9.766 (7.856)	lr 0.04863
Warmup Train [5][780/3239]	Time 0.246 (0.246)	Data 0.002 (0.018)	Loss 5.9898 (6.0428)	Top-1 acc 3.125 (2.070)	Top-5 acc 11.719 (7.864)	lr 0.04863
Warmup Train [5][790/3239]	Time 0.231 (0.246)	Data 0.001 (0.018)	Loss 5.9130 (6.0425)	Top-1 acc 2.734 (2.070)	Top-5 acc 10.938 (7.871)	lr 0.04862
Warmup Train [5][800/3239]	Time 0.198 (0.245)	Data 0.001 (0.018)	Loss 5.9261 (6.0419)	Top-1 acc 4.297 (2.069)	Top-5 acc 9.766 (7.886)	lr 0.04862
Warmup Train [5][810/3239]	Time 0.251 (0.245)	Data 0.003 (0.017)	Loss 6.0620 (6.0417)	Top-1 acc 2.734 (2.065)	Top-5 acc 7.812 (7.883)	lr 0.04862
Warmup Train [5][820/3239]	Time 0.141 (0.245)	Data 0.001 (0.017)	Loss 6.0130 (6.0412)	Top-1 acc 2.734 (2.073)	Top-5 acc 7.031 (7.890)	lr 0.04862
Warmup Train [5][830/3239]	Time 0.333 (0.245)	Data 0.002 (0.017)	Loss 6.0078 (6.0411)	Top-1 acc 1.562 (2.069)	Top-5 acc 8.203 (7.890)	lr 0.04862
Warmup Train [5][840/3239]	Time 0.151 (0.244)	Data 0.002 (0.017)	Loss 5.9566 (6.0405)	Top-1 acc 3.125 (2.079)	Top-5 acc 10.156 (7.892)	lr 0.04861
Warmup Train [5][850/3239]	Time 0.224 (0.244)	Data 0.001 (0.017)	Loss 6.1145 (6.0407)	Top-1 acc 1.562 (2.074)	Top-5 acc 6.250 (7.890)	lr 0.04861
Warmup Train [5][860/3239]	Time 0.150 (0.244)	Data 0.001 (0.017)	Loss 6.0248 (6.0408)	Top-1 acc 1.953 (2.073)	Top-5 acc 9.375 (7.891)	lr 0.04861
Warmup Train [5][870/3239]	Time 0.259 (0.243)	Data 0.001 (0.017)	Loss 6.0594 (6.0402)	Top-1 acc 0.391 (2.076)	Top-5 acc 6.250 (7.905)	lr 0.04861
Warmup Train [5][880/3239]	Time 0.241 (0.243)	Data 0.001 (0.016)	Loss 6.0276 (6.0401)	Top-1 acc 3.125 (2.080)	Top-5 acc 8.594 (7.906)	lr 0.04861
Warmup Train [5][890/3239]	Time 0.216 (0.243)	Data 0.001 (0.016)	Loss 6.0194 (6.0396)	Top-1 acc 1.953 (2.079)	Top-5 acc 6.250 (7.909)	lr 0.04860
Warmup Train [5][900/3239]	Time 0.164 (0.243)	Data 0.001 (0.016)	Loss 6.0806 (6.0395)	Top-1 acc 1.953 (2.078)	Top-5 acc 7.422 (7.913)	lr 0.04860
Warmup Train [5][910/3239]	Time 0.149 (0.242)	Data 0.002 (0.016)	Loss 6.0325 (6.0390)	Top-1 acc 3.516 (2.076)	Top-5 acc 10.156 (7.914)	lr 0.04860
Warmup Train [5][920/3239]	Time 0.217 (0.242)	Data 0.001 (0.016)	Loss 5.9902 (6.0382)	Top-1 acc 3.125 (2.084)	Top-5 acc 8.203 (7.935)	lr 0.04860
Warmup Train [5][930/3239]	Time 0.206 (0.242)	Data 0.001 (0.016)	Loss 5.9283 (6.0377)	Top-1 acc 2.344 (2.085)	Top-5 acc 12.500 (7.940)	lr 0.04860
Warmup Train [5][940/3239]	Time 0.285 (0.242)	Data 0.001 (0.016)	Loss 6.0310 (6.0372)	Top-1 acc 1.562 (2.084)	Top-5 acc 6.641 (7.942)	lr 0.04859
Warmup Train [5][950/3239]	Time 0.139 (0.241)	Data 0.001 (0.015)	Loss 6.0071 (6.0366)	Top-1 acc 2.344 (2.090)	Top-5 acc 7.812 (7.953)	lr 0.04859
Warmup Train [5][960/3239]	Time 0.203 (0.241)	Data 0.001 (0.015)	Loss 5.9365 (6.0364)	Top-1 acc 3.125 (2.092)	Top-5 acc 11.328 (7.957)	lr 0.04859
Warmup Train [5][970/3239]	Time 0.254 (0.241)	Data 0.001 (0.015)	Loss 6.0040 (6.0364)	Top-1 acc 2.344 (2.093)	Top-5 acc 10.547 (7.959)	lr 0.04859
Warmup Train [5][980/3239]	Time 0.196 (0.241)	Data 0.001 (0.015)	Loss 6.0522 (6.0360)	Top-1 acc 2.344 (2.094)	Top-5 acc 7.422 (7.962)	lr 0.04859
Warmup Train [5][990/3239]	Time 0.214 (0.241)	Data 0.001 (0.015)	Loss 6.0612 (6.0360)	Top-1 acc 1.953 (2.092)	Top-5 acc 7.812 (7.965)	lr 0.04858
Warmup Train [5][1000/3239]	Time 0.216 (0.240)	Data 0.001 (0.015)	Loss 5.9622 (6.0356)	Top-1 acc 2.344 (2.095)	Top-5 acc 8.594 (7.977)	lr 0.04858
Warmup Train [5][1010/3239]	Time 0.214 (0.240)	Data 0.001 (0.015)	Loss 5.9748 (6.0351)	Top-1 acc 2.734 (2.098)	Top-5 acc 8.203 (7.984)	lr 0.04858
Warmup Train [5][1020/3239]	Time 0.247 (0.240)	Data 0.001 (0.015)	Loss 6.0702 (6.0348)	Top-1 acc 2.344 (2.099)	Top-5 acc 7.031 (7.989)	lr 0.04858
Warmup Train [5][1030/3239]	Time 0.252 (0.240)	Data 0.001 (0.014)	Loss 5.9609 (6.0344)	Top-1 acc 1.953 (2.103)	Top-5 acc 8.203 (8.000)	lr 0.04858
Warmup Train [5][1040/3239]	Time 0.297 (0.240)	Data 0.001 (0.014)	Loss 6.0624 (6.0341)	Top-1 acc 1.953 (2.104)	Top-5 acc 10.156 (8.008)	lr 0.04857
Warmup Train [5][1050/3239]	Time 0.266 (0.240)	Data 0.001 (0.014)	Loss 6.0843 (6.0341)	Top-1 acc 1.172 (2.107)	Top-5 acc 6.641 (8.009)	lr 0.04857
Warmup Train [5][1060/3239]	Time 0.204 (0.239)	Data 0.001 (0.014)	Loss 5.9853 (6.0336)	Top-1 acc 3.906 (2.112)	Top-5 acc 13.672 (8.021)	lr 0.04857
Warmup Train [5][1070/3239]	Time 0.202 (0.239)	Data 0.001 (0.014)	Loss 5.9775 (6.0334)	Top-1 acc 2.344 (2.114)	Top-5 acc 8.984 (8.028)	lr 0.04857
Warmup Train [5][1080/3239]	Time 0.207 (0.239)	Data 0.001 (0.014)	Loss 5.9892 (6.0332)	Top-1 acc 1.562 (2.112)	Top-5 acc 8.594 (8.024)	lr 0.04857
Warmup Train [5][1090/3239]	Time 0.193 (0.239)	Data 0.002 (0.014)	Loss 5.9734 (6.0328)	Top-1 acc 2.344 (2.112)	Top-5 acc 9.375 (8.029)	lr 0.04856
Warmup Train [5][1100/3239]	Time 0.205 (0.238)	Data 0.001 (0.014)	Loss 5.9453 (6.0324)	Top-1 acc 3.125 (2.115)	Top-5 acc 8.594 (8.042)	lr 0.04856
Warmup Train [5][1110/3239]	Time 0.242 (0.238)	Data 0.001 (0.014)	Loss 5.9593 (6.0320)	Top-1 acc 1.562 (2.118)	Top-5 acc 10.547 (8.049)	lr 0.04856
Warmup Train [5][1120/3239]	Time 0.239 (0.238)	Data 0.001 (0.013)	Loss 6.1232 (6.0320)	Top-1 acc 1.562 (2.117)	Top-5 acc 7.422 (8.052)	lr 0.04856
Warmup Train [5][1130/3239]	Time 0.244 (0.238)	Data 0.001 (0.013)	Loss 5.9587 (6.0317)	Top-1 acc 3.516 (2.123)	Top-5 acc 8.984 (8.060)	lr 0.04856
Warmup Train [5][1140/3239]	Time 0.153 (0.238)	Data 0.001 (0.013)	Loss 5.9657 (6.0314)	Top-1 acc 1.562 (2.125)	Top-5 acc 8.203 (8.066)	lr 0.04855
Warmup Train [5][1150/3239]	Time 0.334 (0.238)	Data 0.001 (0.013)	Loss 6.0149 (6.0312)	Top-1 acc 1.953 (2.128)	Top-5 acc 7.812 (8.066)	lr 0.04855
Warmup Train [5][1160/3239]	Time 0.200 (0.238)	Data 0.001 (0.013)	Loss 6.0394 (6.0313)	Top-1 acc 1.172 (2.128)	Top-5 acc 10.156 (8.065)	lr 0.04855
Warmup Train [5][1170/3239]	Time 0.137 (0.238)	Data 0.001 (0.013)	Loss 5.9871 (6.0311)	Top-1 acc 2.734 (2.129)	Top-5 acc 8.203 (8.065)	lr 0.04855
Warmup Train [5][1180/3239]	Time 0.231 (0.237)	Data 0.001 (0.013)	Loss 6.0043 (6.0309)	Top-1 acc 2.734 (2.128)	Top-5 acc 8.203 (8.064)	lr 0.04855
Warmup Train [5][1190/3239]	Time 0.180 (0.237)	Data 0.001 (0.013)	Loss 5.9426 (6.0304)	Top-1 acc 1.562 (2.129)	Top-5 acc 7.422 (8.077)	lr 0.04854
Warmup Train [5][1200/3239]	Time 0.201 (0.237)	Data 0.003 (0.013)	Loss 6.0372 (6.0302)	Top-1 acc 3.516 (2.130)	Top-5 acc 7.812 (8.080)	lr 0.04854
Warmup Train [5][1210/3239]	Time 0.208 (0.237)	Data 0.002 (0.013)	Loss 5.9785 (6.0301)	Top-1 acc 2.344 (2.132)	Top-5 acc 7.812 (8.078)	lr 0.04854
Warmup Train [5][1220/3239]	Time 0.226 (0.237)	Data 0.001 (0.013)	Loss 6.0138 (6.0300)	Top-1 acc 1.953 (2.134)	Top-5 acc 8.984 (8.083)	lr 0.04854
Warmup Train [5][1230/3239]	Time 0.181 (0.237)	Data 0.001 (0.012)	Loss 5.9325 (6.0300)	Top-1 acc 2.734 (2.135)	Top-5 acc 9.375 (8.083)	lr 0.04854
Warmup Train [5][1240/3239]	Time 0.248 (0.237)	Data 0.003 (0.012)	Loss 6.1126 (6.0300)	Top-1 acc 2.344 (2.137)	Top-5 acc 10.156 (8.087)	lr 0.04853
Warmup Train [5][1250/3239]	Time 0.331 (0.237)	Data 0.001 (0.012)	Loss 6.0497 (6.0297)	Top-1 acc 1.562 (2.138)	Top-5 acc 7.422 (8.093)	lr 0.04853
Warmup Train [5][1260/3239]	Time 0.269 (0.236)	Data 0.001 (0.012)	Loss 6.0039 (6.0294)	Top-1 acc 2.344 (2.139)	Top-5 acc 9.766 (8.100)	lr 0.04853
Warmup Train [5][1270/3239]	Time 0.189 (0.236)	Data 0.001 (0.012)	Loss 6.0118 (6.0294)	Top-1 acc 3.516 (2.143)	Top-5 acc 9.375 (8.102)	lr 0.04853
Warmup Train [5][1280/3239]	Time 0.179 (0.236)	Data 0.001 (0.012)	Loss 5.9583 (6.0289)	Top-1 acc 1.562 (2.147)	Top-5 acc 8.594 (8.115)	lr 0.04853
Warmup Train [5][1290/3239]	Time 0.282 (0.236)	Data 0.001 (0.012)	Loss 6.0006 (6.0286)	Top-1 acc 3.906 (2.152)	Top-5 acc 10.547 (8.131)	lr 0.04852
Warmup Train [5][1300/3239]	Time 0.190 (0.236)	Data 0.001 (0.012)	Loss 5.9142 (6.0284)	Top-1 acc 3.516 (2.154)	Top-5 acc 10.547 (8.136)	lr 0.04852
Warmup Train [5][1310/3239]	Time 0.222 (0.236)	Data 0.001 (0.012)	Loss 6.0347 (6.0281)	Top-1 acc 2.344 (2.159)	Top-5 acc 6.641 (8.141)	lr 0.04852
Warmup Train [5][1320/3239]	Time 0.222 (0.236)	Data 0.001 (0.012)	Loss 6.0001 (6.0276)	Top-1 acc 1.562 (2.164)	Top-5 acc 5.469 (8.150)	lr 0.04852
Warmup Train [5][1330/3239]	Time 0.256 (0.236)	Data 0.001 (0.012)	Loss 6.0446 (6.0271)	Top-1 acc 2.734 (2.172)	Top-5 acc 8.594 (8.161)	lr 0.04852
Warmup Train [5][1340/3239]	Time 0.211 (0.236)	Data 0.002 (0.012)	Loss 5.9637 (6.0265)	Top-1 acc 1.562 (2.179)	Top-5 acc 8.594 (8.173)	lr 0.04851
Warmup Train [5][1350/3239]	Time 0.239 (0.236)	Data 0.001 (0.012)	Loss 5.8769 (6.0262)	Top-1 acc 4.297 (2.181)	Top-5 acc 13.672 (8.175)	lr 0.04851
Warmup Train [5][1360/3239]	Time 0.250 (0.236)	Data 0.001 (0.012)	Loss 5.8842 (6.0257)	Top-1 acc 2.734 (2.183)	Top-5 acc 8.594 (8.181)	lr 0.04851
Warmup Train [5][1370/3239]	Time 0.155 (0.235)	Data 0.001 (0.012)	Loss 5.9030 (6.0256)	Top-1 acc 3.516 (2.184)	Top-5 acc 11.719 (8.182)	lr 0.04851
Warmup Train [5][1380/3239]	Time 0.146 (0.235)	Data 0.002 (0.011)	Loss 6.0104 (6.0254)	Top-1 acc 1.172 (2.185)	Top-5 acc 6.641 (8.186)	lr 0.04850
Warmup Train [5][1390/3239]	Time 0.268 (0.235)	Data 0.001 (0.011)	Loss 5.9824 (6.0253)	Top-1 acc 3.125 (2.187)	Top-5 acc 10.156 (8.189)	lr 0.04850
Warmup Train [5][1400/3239]	Time 0.192 (0.235)	Data 0.001 (0.011)	Loss 6.1443 (6.0252)	Top-1 acc 0.781 (2.184)	Top-5 acc 3.906 (8.194)	lr 0.04850
Warmup Train [5][1410/3239]	Time 0.210 (0.235)	Data 0.001 (0.011)	Loss 5.8648 (6.0249)	Top-1 acc 4.297 (2.190)	Top-5 acc 11.719 (8.201)	lr 0.04850
Warmup Train [5][1420/3239]	Time 0.148 (0.235)	Data 0.001 (0.011)	Loss 6.0741 (6.0246)	Top-1 acc 1.562 (2.191)	Top-5 acc 6.641 (8.204)	lr 0.04850
Warmup Train [5][1430/3239]	Time 0.172 (0.234)	Data 0.001 (0.011)	Loss 6.0459 (6.0243)	Top-1 acc 1.172 (2.193)	Top-5 acc 7.422 (8.208)	lr 0.04849
Warmup Train [5][1440/3239]	Time 0.203 (0.234)	Data 0.001 (0.011)	Loss 6.0123 (6.0238)	Top-1 acc 1.562 (2.196)	Top-5 acc 8.594 (8.217)	lr 0.04849
Warmup Train [5][1450/3239]	Time 0.181 (0.234)	Data 0.001 (0.011)	Loss 5.9887 (6.0234)	Top-1 acc 1.562 (2.201)	Top-5 acc 8.984 (8.234)	lr 0.04849
Warmup Train [5][1460/3239]	Time 0.210 (0.234)	Data 0.001 (0.011)	Loss 6.0106 (6.0230)	Top-1 acc 2.344 (2.201)	Top-5 acc 8.203 (8.237)	lr 0.04849
Warmup Train [5][1470/3239]	Time 0.292 (0.234)	Data 0.002 (0.011)	Loss 5.9032 (6.0226)	Top-1 acc 1.172 (2.203)	Top-5 acc 9.766 (8.244)	lr 0.04849
Warmup Train [5][1480/3239]	Time 0.317 (0.234)	Data 0.001 (0.011)	Loss 5.9491 (6.0222)	Top-1 acc 4.297 (2.208)	Top-5 acc 10.938 (8.252)	lr 0.04848
Warmup Train [5][1490/3239]	Time 0.163 (0.234)	Data 0.002 (0.011)	Loss 5.8846 (6.0218)	Top-1 acc 3.125 (2.212)	Top-5 acc 7.031 (8.260)	lr 0.04848
Warmup Train [5][1500/3239]	Time 0.218 (0.234)	Data 0.001 (0.011)	Loss 6.0734 (6.0217)	Top-1 acc 1.172 (2.212)	Top-5 acc 5.469 (8.261)	lr 0.04848
Warmup Train [5][1510/3239]	Time 0.194 (0.234)	Data 0.001 (0.011)	Loss 6.0177 (6.0213)	Top-1 acc 1.562 (2.213)	Top-5 acc 8.203 (8.268)	lr 0.04848
Warmup Train [5][1520/3239]	Time 0.210 (0.234)	Data 0.001 (0.011)	Loss 5.9325 (6.0208)	Top-1 acc 0.781 (2.215)	Top-5 acc 8.203 (8.275)	lr 0.04848
Warmup Train [5][1530/3239]	Time 0.217 (0.234)	Data 0.001 (0.011)	Loss 5.9577 (6.0204)	Top-1 acc 2.734 (2.219)	Top-5 acc 8.984 (8.280)	lr 0.04847
Warmup Train [5][1540/3239]	Time 0.215 (0.233)	Data 0.001 (0.011)	Loss 5.8433 (6.0201)	Top-1 acc 2.734 (2.220)	Top-5 acc 14.062 (8.288)	lr 0.04847
Warmup Train [5][1550/3239]	Time 0.218 (0.233)	Data 0.001 (0.011)	Loss 5.8965 (6.0196)	Top-1 acc 3.125 (2.222)	Top-5 acc 10.547 (8.296)	lr 0.04847
Warmup Train [5][1560/3239]	Time 0.240 (0.233)	Data 0.001 (0.011)	Loss 5.8980 (6.0192)	Top-1 acc 3.906 (2.223)	Top-5 acc 12.109 (8.300)	lr 0.04847
Warmup Train [5][1570/3239]	Time 0.140 (0.233)	Data 0.002 (0.010)	Loss 5.9371 (6.0189)	Top-1 acc 2.734 (2.223)	Top-5 acc 8.594 (8.303)	lr 0.04847
Warmup Train [5][1580/3239]	Time 0.199 (0.233)	Data 0.001 (0.010)	Loss 6.0329 (6.0186)	Top-1 acc 2.734 (2.226)	Top-5 acc 8.203 (8.316)	lr 0.04846
Warmup Train [5][1590/3239]	Time 0.329 (0.233)	Data 0.001 (0.010)	Loss 5.9192 (6.0183)	Top-1 acc 5.078 (2.233)	Top-5 acc 11.328 (8.324)	lr 0.04846
Warmup Train [5][1600/3239]	Time 0.190 (0.233)	Data 0.001 (0.010)	Loss 5.9671 (6.0180)	Top-1 acc 2.734 (2.233)	Top-5 acc 10.156 (8.333)	lr 0.04846
Warmup Train [5][1610/3239]	Time 0.241 (0.233)	Data 0.001 (0.010)	Loss 6.0231 (6.0177)	Top-1 acc 3.906 (2.236)	Top-5 acc 8.594 (8.338)	lr 0.04846
Warmup Train [5][1620/3239]	Time 0.217 (0.233)	Data 0.001 (0.010)	Loss 5.8444 (6.0175)	Top-1 acc 1.953 (2.235)	Top-5 acc 9.375 (8.342)	lr 0.04845
Warmup Train [5][1630/3239]	Time 0.286 (0.233)	Data 0.001 (0.010)	Loss 5.9435 (6.0173)	Top-1 acc 3.125 (2.234)	Top-5 acc 9.375 (8.344)	lr 0.04845
Warmup Train [5][1640/3239]	Time 0.256 (0.233)	Data 0.001 (0.010)	Loss 5.9720 (6.0171)	Top-1 acc 2.734 (2.236)	Top-5 acc 9.766 (8.352)	lr 0.04845
Warmup Train [5][1650/3239]	Time 0.228 (0.233)	Data 0.003 (0.010)	Loss 5.9630 (6.0168)	Top-1 acc 1.562 (2.237)	Top-5 acc 7.812 (8.355)	lr 0.04845
Warmup Train [5][1660/3239]	Time 0.200 (0.232)	Data 0.001 (0.010)	Loss 5.9059 (6.0165)	Top-1 acc 3.516 (2.241)	Top-5 acc 6.641 (8.364)	lr 0.04845
Warmup Train [5][1670/3239]	Time 0.210 (0.232)	Data 0.001 (0.010)	Loss 6.0418 (6.0162)	Top-1 acc 3.516 (2.244)	Top-5 acc 8.984 (8.364)	lr 0.04844
Warmup Train [5][1680/3239]	Time 0.172 (0.232)	Data 0.001 (0.010)	Loss 6.0220 (6.0160)	Top-1 acc 3.125 (2.246)	Top-5 acc 8.594 (8.368)	lr 0.04844
Warmup Train [5][1690/3239]	Time 0.355 (0.232)	Data 0.002 (0.010)	Loss 5.9326 (6.0157)	Top-1 acc 3.906 (2.249)	Top-5 acc 11.328 (8.377)	lr 0.04844
Warmup Train [5][1700/3239]	Time 0.175 (0.232)	Data 0.001 (0.010)	Loss 6.0071 (6.0155)	Top-1 acc 3.125 (2.250)	Top-5 acc 6.641 (8.379)	lr 0.04844
Warmup Train [5][1710/3239]	Time 0.286 (0.232)	Data 0.001 (0.010)	Loss 5.9667 (6.0151)	Top-1 acc 3.125 (2.252)	Top-5 acc 9.375 (8.388)	lr 0.04844
Warmup Train [5][1720/3239]	Time 0.198 (0.232)	Data 0.001 (0.010)	Loss 5.8637 (6.0147)	Top-1 acc 2.734 (2.254)	Top-5 acc 14.844 (8.402)	lr 0.04843
Warmup Train [5][1730/3239]	Time 0.191 (0.232)	Data 0.001 (0.010)	Loss 5.9169 (6.0142)	Top-1 acc 3.125 (2.258)	Top-5 acc 10.547 (8.414)	lr 0.04843
Warmup Train [5][1740/3239]	Time 0.235 (0.232)	Data 0.001 (0.010)	Loss 5.9126 (6.0139)	Top-1 acc 3.516 (2.262)	Top-5 acc 9.766 (8.418)	lr 0.04843
Warmup Train [5][1750/3239]	Time 0.272 (0.232)	Data 0.001 (0.010)	Loss 6.0095 (6.0138)	Top-1 acc 2.734 (2.260)	Top-5 acc 8.594 (8.420)	lr 0.04843
Warmup Train [5][1760/3239]	Time 0.219 (0.232)	Data 0.001 (0.010)	Loss 5.9100 (6.0134)	Top-1 acc 1.953 (2.262)	Top-5 acc 10.156 (8.426)	lr 0.04843
Warmup Train [5][1770/3239]	Time 0.197 (0.232)	Data 0.001 (0.010)	Loss 6.0460 (6.0130)	Top-1 acc 1.562 (2.265)	Top-5 acc 6.250 (8.436)	lr 0.04842
Warmup Train [5][1780/3239]	Time 0.238 (0.232)	Data 0.001 (0.010)	Loss 5.9293 (6.0127)	Top-1 acc 3.516 (2.269)	Top-5 acc 9.375 (8.443)	lr 0.04842
Warmup Train [5][1790/3239]	Time 0.165 (0.232)	Data 0.001 (0.010)	Loss 6.0153 (6.0124)	Top-1 acc 1.953 (2.272)	Top-5 acc 8.984 (8.452)	lr 0.04842
Warmup Train [5][1800/3239]	Time 0.271 (0.232)	Data 0.001 (0.009)	Loss 5.9389 (6.0122)	Top-1 acc 3.906 (2.274)	Top-5 acc 10.938 (8.451)	lr 0.04842
Warmup Train [5][1810/3239]	Time 0.172 (0.232)	Data 0.001 (0.009)	Loss 6.0997 (6.0120)	Top-1 acc 1.562 (2.276)	Top-5 acc 5.469 (8.456)	lr 0.04841
Warmup Train [5][1820/3239]	Time 0.186 (0.231)	Data 0.002 (0.009)	Loss 6.0352 (6.0116)	Top-1 acc 1.953 (2.280)	Top-5 acc 7.812 (8.459)	lr 0.04841
Warmup Train [5][1830/3239]	Time 0.142 (0.231)	Data 0.001 (0.009)	Loss 6.0589 (6.0113)	Top-1 acc 2.344 (2.283)	Top-5 acc 7.422 (8.469)	lr 0.04841
Warmup Train [5][1840/3239]	Time 0.185 (0.231)	Data 0.001 (0.009)	Loss 5.9384 (6.0109)	Top-1 acc 1.953 (2.284)	Top-5 acc 10.156 (8.476)	lr 0.04841
Warmup Train [5][1850/3239]	Time 0.182 (0.231)	Data 0.001 (0.009)	Loss 5.9441 (6.0107)	Top-1 acc 3.125 (2.287)	Top-5 acc 8.203 (8.485)	lr 0.04841
Warmup Train [5][1860/3239]	Time 0.189 (0.231)	Data 0.001 (0.009)	Loss 5.8786 (6.0103)	Top-1 acc 2.734 (2.293)	Top-5 acc 9.766 (8.493)	lr 0.04840
Warmup Train [5][1870/3239]	Time 0.204 (0.231)	Data 0.001 (0.009)	Loss 5.9277 (6.0101)	Top-1 acc 2.344 (2.293)	Top-5 acc 8.203 (8.493)	lr 0.04840
Warmup Train [5][1880/3239]	Time 0.173 (0.231)	Data 0.002 (0.009)	Loss 5.9682 (6.0099)	Top-1 acc 2.734 (2.298)	Top-5 acc 9.766 (8.501)	lr 0.04840
Warmup Train [5][1890/3239]	Time 0.203 (0.231)	Data 0.002 (0.009)	Loss 5.9103 (6.0095)	Top-1 acc 3.125 (2.299)	Top-5 acc 10.156 (8.511)	lr 0.04840
Warmup Train [5][1900/3239]	Time 0.309 (0.231)	Data 0.001 (0.009)	Loss 5.8205 (6.0092)	Top-1 acc 4.297 (2.302)	Top-5 acc 10.547 (8.515)	lr 0.04840
Warmup Train [5][1910/3239]	Time 0.263 (0.231)	Data 0.001 (0.009)	Loss 6.0252 (6.0090)	Top-1 acc 3.906 (2.302)	Top-5 acc 9.375 (8.522)	lr 0.04839
Warmup Train [5][1920/3239]	Time 0.267 (0.231)	Data 0.001 (0.009)	Loss 5.9775 (6.0087)	Top-1 acc 1.953 (2.305)	Top-5 acc 8.203 (8.527)	lr 0.04839
Warmup Train [5][1930/3239]	Time 0.191 (0.231)	Data 0.001 (0.009)	Loss 5.9421 (6.0085)	Top-1 acc 3.906 (2.308)	Top-5 acc 11.328 (8.534)	lr 0.04839
Warmup Train [5][1940/3239]	Time 0.142 (0.231)	Data 0.001 (0.009)	Loss 6.0330 (6.0082)	Top-1 acc 2.734 (2.308)	Top-5 acc 9.375 (8.543)	lr 0.04839
Warmup Train [5][1950/3239]	Time 0.185 (0.231)	Data 0.002 (0.009)	Loss 6.0048 (6.0079)	Top-1 acc 2.734 (2.308)	Top-5 acc 7.422 (8.548)	lr 0.04838
Warmup Train [5][1960/3239]	Time 0.217 (0.231)	Data 0.001 (0.009)	Loss 5.9230 (6.0074)	Top-1 acc 3.516 (2.312)	Top-5 acc 10.547 (8.555)	lr 0.04838
Warmup Train [5][1970/3239]	Time 0.162 (0.231)	Data 0.002 (0.009)	Loss 6.0060 (6.0073)	Top-1 acc 1.562 (2.312)	Top-5 acc 9.375 (8.563)	lr 0.04838
Warmup Train [5][1980/3239]	Time 0.233 (0.231)	Data 0.001 (0.009)	Loss 5.8611 (6.0070)	Top-1 acc 3.125 (2.313)	Top-5 acc 11.719 (8.567)	lr 0.04838
Warmup Train [5][1990/3239]	Time 0.170 (0.231)	Data 0.001 (0.009)	Loss 5.9584 (6.0066)	Top-1 acc 2.734 (2.315)	Top-5 acc 10.938 (8.579)	lr 0.04838
Warmup Train [5][2000/3239]	Time 0.287 (0.231)	Data 0.001 (0.009)	Loss 5.9829 (6.0065)	Top-1 acc 1.953 (2.316)	Top-5 acc 8.203 (8.579)	lr 0.04837
Warmup Train [5][2010/3239]	Time 0.306 (0.231)	Data 0.001 (0.009)	Loss 5.8397 (6.0062)	Top-1 acc 3.125 (2.318)	Top-5 acc 12.109 (8.587)	lr 0.04837
Warmup Train [5][2020/3239]	Time 0.214 (0.230)	Data 0.001 (0.009)	Loss 5.9816 (6.0058)	Top-1 acc 2.734 (2.321)	Top-5 acc 9.766 (8.596)	lr 0.04837
Warmup Train [5][2030/3239]	Time 0.224 (0.230)	Data 0.002 (0.009)	Loss 5.8795 (6.0055)	Top-1 acc 3.906 (2.322)	Top-5 acc 10.547 (8.597)	lr 0.04837
Warmup Train [5][2040/3239]	Time 0.181 (0.230)	Data 0.001 (0.009)	Loss 5.9454 (6.0053)	Top-1 acc 3.516 (2.322)	Top-5 acc 10.547 (8.599)	lr 0.04837
Warmup Train [5][2050/3239]	Time 0.202 (0.230)	Data 0.001 (0.009)	Loss 5.8912 (6.0048)	Top-1 acc 3.516 (2.327)	Top-5 acc 12.500 (8.611)	lr 0.04836
Warmup Train [5][2060/3239]	Time 0.182 (0.230)	Data 0.001 (0.009)	Loss 6.1078 (6.0044)	Top-1 acc 0.781 (2.327)	Top-5 acc 8.203 (8.616)	lr 0.04836
Warmup Train [5][2070/3239]	Time 0.192 (0.230)	Data 0.001 (0.009)	Loss 6.0120 (6.0042)	Top-1 acc 1.953 (2.331)	Top-5 acc 7.422 (8.620)	lr 0.04836
Warmup Train [5][2080/3239]	Time 0.228 (0.230)	Data 0.002 (0.009)	Loss 5.9919 (6.0041)	Top-1 acc 1.562 (2.332)	Top-5 acc 7.812 (8.617)	lr 0.04836
Warmup Train [5][2090/3239]	Time 0.162 (0.230)	Data 0.002 (0.009)	Loss 5.9547 (6.0037)	Top-1 acc 1.953 (2.336)	Top-5 acc 8.594 (8.625)	lr 0.04835
Warmup Train [5][2100/3239]	Time 0.329 (0.230)	Data 0.001 (0.009)	Loss 5.8857 (6.0033)	Top-1 acc 4.297 (2.339)	Top-5 acc 14.062 (8.632)	lr 0.04835
Warmup Train [5][2110/3239]	Time 0.263 (0.230)	Data 0.001 (0.009)	Loss 6.0519 (6.0031)	Top-1 acc 1.172 (2.340)	Top-5 acc 8.203 (8.640)	lr 0.04835
Warmup Train [5][2120/3239]	Time 0.179 (0.230)	Data 0.001 (0.009)	Loss 5.9894 (6.0027)	Top-1 acc 3.125 (2.342)	Top-5 acc 8.203 (8.643)	lr 0.04835
Warmup Train [5][2130/3239]	Time 0.179 (0.230)	Data 0.001 (0.009)	Loss 5.9801 (6.0024)	Top-1 acc 2.734 (2.344)	Top-5 acc 10.156 (8.650)	lr 0.04835
Warmup Train [5][2140/3239]	Time 0.201 (0.230)	Data 0.001 (0.009)	Loss 5.9332 (6.0022)	Top-1 acc 3.125 (2.346)	Top-5 acc 11.328 (8.652)	lr 0.04834
Warmup Train [5][2150/3239]	Time 0.237 (0.230)	Data 0.001 (0.009)	Loss 5.9131 (6.0021)	Top-1 acc 4.297 (2.348)	Top-5 acc 10.547 (8.658)	lr 0.04834
Warmup Train [5][2160/3239]	Time 0.142 (0.230)	Data 0.002 (0.009)	Loss 6.0641 (6.0019)	Top-1 acc 3.516 (2.351)	Top-5 acc 8.984 (8.661)	lr 0.04834
Warmup Train [5][2170/3239]	Time 0.226 (0.230)	Data 0.001 (0.009)	Loss 5.9546 (6.0016)	Top-1 acc 2.344 (2.355)	Top-5 acc 11.328 (8.670)	lr 0.04834
Warmup Train [5][2180/3239]	Time 0.242 (0.230)	Data 0.001 (0.009)	Loss 5.9002 (6.0013)	Top-1 acc 4.297 (2.359)	Top-5 acc 10.156 (8.674)	lr 0.04834
Warmup Train [5][2190/3239]	Time 0.185 (0.230)	Data 0.001 (0.009)	Loss 6.0698 (6.0010)	Top-1 acc 0.781 (2.361)	Top-5 acc 5.469 (8.676)	lr 0.04833
Warmup Train [5][2200/3239]	Time 0.225 (0.230)	Data 0.002 (0.008)	Loss 5.8978 (6.0006)	Top-1 acc 1.562 (2.361)	Top-5 acc 8.984 (8.684)	lr 0.04833
Warmup Train [5][2210/3239]	Time 0.238 (0.230)	Data 0.001 (0.008)	Loss 6.0343 (6.0004)	Top-1 acc 1.172 (2.363)	Top-5 acc 7.422 (8.690)	lr 0.04833
Warmup Train [5][2220/3239]	Time 0.289 (0.229)	Data 0.001 (0.008)	Loss 5.9184 (6.0001)	Top-1 acc 3.125 (2.367)	Top-5 acc 9.375 (8.700)	lr 0.04833
Warmup Train [5][2230/3239]	Time 0.200 (0.229)	Data 0.001 (0.008)	Loss 5.9460 (5.9997)	Top-1 acc 1.953 (2.369)	Top-5 acc 11.328 (8.708)	lr 0.04832
Warmup Train [5][2240/3239]	Time 0.245 (0.229)	Data 0.001 (0.008)	Loss 5.9422 (5.9995)	Top-1 acc 2.344 (2.370)	Top-5 acc 12.500 (8.711)	lr 0.04832
Warmup Train [5][2250/3239]	Time 0.197 (0.229)	Data 0.001 (0.008)	Loss 5.9038 (5.9992)	Top-1 acc 5.078 (2.375)	Top-5 acc 11.719 (8.722)	lr 0.04832
Warmup Train [5][2260/3239]	Time 0.183 (0.229)	Data 0.001 (0.008)	Loss 5.9356 (5.9987)	Top-1 acc 2.734 (2.378)	Top-5 acc 9.766 (8.729)	lr 0.04832
Warmup Train [5][2270/3239]	Time 0.186 (0.229)	Data 0.001 (0.008)	Loss 5.8088 (5.9984)	Top-1 acc 3.516 (2.382)	Top-5 acc 12.109 (8.737)	lr 0.04832
Warmup Train [5][2280/3239]	Time 0.130 (0.229)	Data 0.003 (0.008)	Loss 5.9569 (5.9981)	Top-1 acc 1.953 (2.385)	Top-5 acc 7.812 (8.744)	lr 0.04831
Warmup Train [5][2290/3239]	Time 0.163 (0.229)	Data 0.001 (0.008)	Loss 5.9966 (5.9977)	Top-1 acc 4.688 (2.388)	Top-5 acc 11.328 (8.752)	lr 0.04831
Warmup Train [5][2300/3239]	Time 0.210 (0.229)	Data 0.001 (0.008)	Loss 5.8906 (5.9974)	Top-1 acc 3.906 (2.388)	Top-5 acc 12.109 (8.757)	lr 0.04831
Warmup Train [5][2310/3239]	Time 0.204 (0.229)	Data 0.001 (0.008)	Loss 5.8738 (5.9972)	Top-1 acc 2.344 (2.390)	Top-5 acc 10.156 (8.761)	lr 0.04831
Warmup Train [5][2320/3239]	Time 0.235 (0.229)	Data 0.001 (0.008)	Loss 5.9908 (5.9970)	Top-1 acc 3.516 (2.394)	Top-5 acc 9.766 (8.767)	lr 0.04830
Warmup Train [5][2330/3239]	Time 0.194 (0.229)	Data 0.001 (0.008)	Loss 5.9560 (5.9968)	Top-1 acc 3.125 (2.395)	Top-5 acc 8.594 (8.770)	lr 0.04830
Warmup Train [5][2340/3239]	Time 0.279 (0.229)	Data 0.001 (0.008)	Loss 5.9427 (5.9966)	Top-1 acc 2.734 (2.398)	Top-5 acc 6.641 (8.773)	lr 0.04830
Warmup Train [5][2350/3239]	Time 0.275 (0.229)	Data 0.002 (0.008)	Loss 6.0022 (5.9963)	Top-1 acc 1.953 (2.398)	Top-5 acc 8.203 (8.782)	lr 0.04830
Warmup Train [5][2360/3239]	Time 0.234 (0.229)	Data 0.001 (0.008)	Loss 5.9063 (5.9959)	Top-1 acc 3.125 (2.402)	Top-5 acc 10.938 (8.791)	lr 0.04830
Warmup Train [5][2370/3239]	Time 0.211 (0.229)	Data 0.024 (0.008)	Loss 5.9366 (5.9957)	Top-1 acc 3.125 (2.405)	Top-5 acc 11.719 (8.797)	lr 0.04829
Warmup Train [5][2380/3239]	Time 0.255 (0.229)	Data 0.001 (0.008)	Loss 5.8483 (5.9952)	Top-1 acc 2.344 (2.408)	Top-5 acc 11.328 (8.807)	lr 0.04829
Warmup Train [5][2390/3239]	Time 0.225 (0.229)	Data 0.001 (0.008)	Loss 6.0844 (5.9950)	Top-1 acc 3.516 (2.410)	Top-5 acc 8.203 (8.813)	lr 0.04829
Warmup Train [5][2400/3239]	Time 0.218 (0.229)	Data 0.001 (0.008)	Loss 5.9130 (5.9948)	Top-1 acc 0.781 (2.412)	Top-5 acc 8.594 (8.818)	lr 0.04829
Warmup Train [5][2410/3239]	Time 0.200 (0.229)	Data 0.001 (0.008)	Loss 5.8466 (5.9944)	Top-1 acc 3.125 (2.415)	Top-5 acc 10.547 (8.828)	lr 0.04828
Warmup Train [5][2420/3239]	Time 0.183 (0.229)	Data 0.001 (0.008)	Loss 5.8803 (5.9941)	Top-1 acc 4.297 (2.417)	Top-5 acc 11.719 (8.833)	lr 0.04828
Warmup Train [5][2430/3239]	Time 0.203 (0.229)	Data 0.001 (0.008)	Loss 6.0536 (5.9940)	Top-1 acc 1.953 (2.419)	Top-5 acc 7.422 (8.837)	lr 0.04828
Warmup Train [5][2440/3239]	Time 0.240 (0.229)	Data 0.001 (0.008)	Loss 6.0575 (5.9936)	Top-1 acc 2.734 (2.422)	Top-5 acc 7.812 (8.842)	lr 0.04828
Warmup Train [5][2450/3239]	Time 0.255 (0.229)	Data 0.002 (0.008)	Loss 5.9991 (5.9933)	Top-1 acc 1.953 (2.424)	Top-5 acc 8.203 (8.848)	lr 0.04828
Warmup Train [5][2460/3239]	Time 0.295 (0.229)	Data 0.002 (0.008)	Loss 5.9192 (5.9931)	Top-1 acc 2.344 (2.425)	Top-5 acc 10.547 (8.853)	lr 0.04827
Warmup Train [5][2470/3239]	Time 0.192 (0.229)	Data 0.001 (0.008)	Loss 5.8141 (5.9928)	Top-1 acc 3.516 (2.427)	Top-5 acc 12.500 (8.861)	lr 0.04827
Warmup Train [5][2480/3239]	Time 0.217 (0.228)	Data 0.001 (0.008)	Loss 5.9582 (5.9926)	Top-1 acc 3.125 (2.428)	Top-5 acc 12.891 (8.868)	lr 0.04827
Warmup Train [5][2490/3239]	Time 0.216 (0.228)	Data 0.001 (0.008)	Loss 5.7891 (5.9922)	Top-1 acc 4.688 (2.431)	Top-5 acc 16.406 (8.874)	lr 0.04827
Warmup Train [5][2500/3239]	Time 0.192 (0.228)	Data 0.001 (0.008)	Loss 5.9134 (5.9919)	Top-1 acc 1.172 (2.433)	Top-5 acc 8.594 (8.879)	lr 0.04826
Warmup Train [5][2510/3239]	Time 0.257 (0.228)	Data 0.002 (0.008)	Loss 5.8687 (5.9917)	Top-1 acc 3.906 (2.434)	Top-5 acc 16.016 (8.885)	lr 0.04826
Warmup Train [5][2520/3239]	Time 0.264 (0.228)	Data 0.001 (0.008)	Loss 5.9254 (5.9914)	Top-1 acc 3.516 (2.439)	Top-5 acc 8.984 (8.892)	lr 0.04826
Warmup Train [5][2530/3239]	Time 0.206 (0.228)	Data 0.001 (0.008)	Loss 5.8552 (5.9911)	Top-1 acc 2.734 (2.439)	Top-5 acc 9.766 (8.894)	lr 0.04826
Warmup Train [5][2540/3239]	Time 0.215 (0.228)	Data 0.001 (0.008)	Loss 5.9001 (5.9908)	Top-1 acc 5.469 (2.442)	Top-5 acc 13.281 (8.900)	lr 0.04826
Warmup Train [5][2550/3239]	Time 0.202 (0.228)	Data 0.001 (0.008)	Loss 6.0591 (5.9905)	Top-1 acc 0.781 (2.444)	Top-5 acc 6.641 (8.908)	lr 0.04825
Warmup Train [5][2560/3239]	Time 0.258 (0.228)	Data 0.001 (0.008)	Loss 5.9666 (5.9902)	Top-1 acc 1.953 (2.445)	Top-5 acc 9.766 (8.915)	lr 0.04825
Warmup Train [5][2570/3239]	Time 0.421 (0.228)	Data 0.001 (0.008)	Loss 5.8644 (5.9899)	Top-1 acc 2.344 (2.445)	Top-5 acc 9.375 (8.917)	lr 0.04825
Warmup Train [5][2580/3239]	Time 0.161 (0.228)	Data 0.002 (0.008)	Loss 5.8998 (5.9895)	Top-1 acc 3.125 (2.448)	Top-5 acc 11.719 (8.926)	lr 0.04825
Warmup Train [5][2590/3239]	Time 0.205 (0.228)	Data 0.001 (0.008)	Loss 5.9436 (5.9892)	Top-1 acc 3.125 (2.451)	Top-5 acc 10.938 (8.931)	lr 0.04824
Warmup Train [5][2600/3239]	Time 0.221 (0.228)	Data 0.001 (0.008)	Loss 5.8521 (5.9889)	Top-1 acc 2.734 (2.454)	Top-5 acc 10.938 (8.936)	lr 0.04824
Warmup Train [5][2610/3239]	Time 0.245 (0.228)	Data 0.001 (0.008)	Loss 5.9186 (5.9886)	Top-1 acc 1.562 (2.455)	Top-5 acc 5.859 (8.937)	lr 0.04824
Warmup Train [5][2620/3239]	Time 0.235 (0.228)	Data 0.002 (0.008)	Loss 5.8649 (5.9883)	Top-1 acc 3.906 (2.456)	Top-5 acc 12.500 (8.941)	lr 0.04824
Warmup Train [5][2630/3239]	Time 0.264 (0.228)	Data 0.001 (0.008)	Loss 6.0263 (5.9880)	Top-1 acc 3.516 (2.460)	Top-5 acc 9.375 (8.948)	lr 0.04824
Warmup Train [5][2640/3239]	Time 0.233 (0.228)	Data 0.001 (0.008)	Loss 5.8601 (5.9878)	Top-1 acc 3.906 (2.461)	Top-5 acc 13.281 (8.949)	lr 0.04823
Warmup Train [5][2650/3239]	Time 0.336 (0.228)	Data 0.002 (0.008)	Loss 5.9496 (5.9876)	Top-1 acc 1.562 (2.463)	Top-5 acc 7.812 (8.950)	lr 0.04823
Warmup Train [5][2660/3239]	Time 0.256 (0.228)	Data 0.002 (0.008)	Loss 5.9536 (5.9873)	Top-1 acc 3.125 (2.464)	Top-5 acc 13.281 (8.959)	lr 0.04823
Warmup Train [5][2670/3239]	Time 0.238 (0.228)	Data 0.001 (0.008)	Loss 5.8958 (5.9870)	Top-1 acc 1.953 (2.466)	Top-5 acc 9.766 (8.966)	lr 0.04823
Warmup Train [5][2680/3239]	Time 0.172 (0.228)	Data 0.001 (0.008)	Loss 5.9258 (5.9866)	Top-1 acc 2.734 (2.468)	Top-5 acc 8.594 (8.971)	lr 0.04822
Warmup Train [5][2690/3239]	Time 0.227 (0.228)	Data 0.001 (0.008)	Loss 5.9420 (5.9865)	Top-1 acc 3.125 (2.468)	Top-5 acc 10.156 (8.977)	lr 0.04822
Warmup Train [5][2700/3239]	Time 0.214 (0.228)	Data 0.001 (0.007)	Loss 5.9670 (5.9862)	Top-1 acc 3.125 (2.471)	Top-5 acc 10.156 (8.982)	lr 0.04822
Warmup Train [5][2710/3239]	Time 0.188 (0.228)	Data 0.002 (0.007)	Loss 5.9668 (5.9859)	Top-1 acc 2.734 (2.476)	Top-5 acc 9.766 (8.991)	lr 0.04822
Warmup Train [5][2720/3239]	Time 0.215 (0.228)	Data 0.001 (0.007)	Loss 5.8548 (5.9858)	Top-1 acc 1.953 (2.475)	Top-5 acc 11.328 (8.992)	lr 0.04822
Warmup Train [5][2730/3239]	Time 0.223 (0.228)	Data 0.002 (0.007)	Loss 5.8479 (5.9855)	Top-1 acc 4.688 (2.479)	Top-5 acc 13.672 (9.001)	lr 0.04821
Warmup Train [5][2740/3239]	Time 0.142 (0.228)	Data 0.001 (0.007)	Loss 5.9395 (5.9852)	Top-1 acc 4.688 (2.484)	Top-5 acc 11.328 (9.008)	lr 0.04821
Warmup Train [5][2750/3239]	Time 0.148 (0.228)	Data 0.001 (0.007)	Loss 5.8694 (5.9850)	Top-1 acc 5.469 (2.486)	Top-5 acc 14.062 (9.011)	lr 0.04821
Warmup Train [5][2760/3239]	Time 0.286 (0.228)	Data 0.001 (0.007)	Loss 5.8894 (5.9848)	Top-1 acc 3.125 (2.489)	Top-5 acc 10.938 (9.016)	lr 0.04821
Warmup Train [5][2770/3239]	Time 0.248 (0.228)	Data 0.001 (0.007)	Loss 5.7632 (5.9844)	Top-1 acc 4.688 (2.490)	Top-5 acc 9.766 (9.018)	lr 0.04820
Warmup Train [5][2780/3239]	Time 0.205 (0.228)	Data 0.001 (0.007)	Loss 5.8783 (5.9840)	Top-1 acc 2.344 (2.492)	Top-5 acc 8.984 (9.024)	lr 0.04820
Warmup Train [5][2790/3239]	Time 0.214 (0.228)	Data 0.001 (0.007)	Loss 5.9522 (5.9838)	Top-1 acc 2.734 (2.494)	Top-5 acc 8.594 (9.027)	lr 0.04820
Warmup Train [5][2800/3239]	Time 0.171 (0.228)	Data 0.001 (0.007)	Loss 5.8368 (5.9833)	Top-1 acc 3.516 (2.497)	Top-5 acc 8.984 (9.036)	lr 0.04820
Warmup Train [5][2810/3239]	Time 0.196 (0.228)	Data 0.001 (0.007)	Loss 5.9293 (5.9830)	Top-1 acc 3.125 (2.498)	Top-5 acc 8.984 (9.041)	lr 0.04820
Warmup Train [5][2820/3239]	Time 0.252 (0.228)	Data 0.001 (0.007)	Loss 5.9534 (5.9827)	Top-1 acc 1.953 (2.501)	Top-5 acc 10.547 (9.051)	lr 0.04819
Warmup Train [5][2830/3239]	Time 0.221 (0.228)	Data 0.002 (0.007)	Loss 5.8825 (5.9824)	Top-1 acc 3.516 (2.503)	Top-5 acc 10.938 (9.055)	lr 0.04819
Warmup Train [5][2840/3239]	Time 0.307 (0.228)	Data 0.002 (0.007)	Loss 5.9025 (5.9822)	Top-1 acc 1.562 (2.505)	Top-5 acc 9.375 (9.059)	lr 0.04819
Warmup Train [5][2850/3239]	Time 0.204 (0.228)	Data 0.001 (0.007)	Loss 5.8911 (5.9820)	Top-1 acc 3.516 (2.507)	Top-5 acc 9.766 (9.066)	lr 0.04819
Warmup Train [5][2860/3239]	Time 0.323 (0.228)	Data 0.001 (0.007)	Loss 5.9175 (5.9816)	Top-1 acc 3.906 (2.510)	Top-5 acc 11.328 (9.073)	lr 0.04818
Warmup Train [5][2870/3239]	Time 0.249 (0.227)	Data 0.001 (0.007)	Loss 5.9254 (5.9814)	Top-1 acc 1.562 (2.511)	Top-5 acc 10.938 (9.077)	lr 0.04818
Warmup Train [5][2880/3239]	Time 0.220 (0.227)	Data 0.001 (0.007)	Loss 5.8521 (5.9810)	Top-1 acc 2.734 (2.513)	Top-5 acc 8.984 (9.085)	lr 0.04818
Warmup Train [5][2890/3239]	Time 0.220 (0.227)	Data 0.001 (0.007)	Loss 5.9583 (5.9807)	Top-1 acc 1.172 (2.514)	Top-5 acc 7.812 (9.090)	lr 0.04818
Warmup Train [5][2900/3239]	Time 0.202 (0.227)	Data 0.001 (0.007)	Loss 5.9191 (5.9804)	Top-1 acc 2.344 (2.516)	Top-5 acc 8.984 (9.094)	lr 0.04818
Warmup Train [5][2910/3239]	Time 0.211 (0.227)	Data 0.001 (0.007)	Loss 5.9005 (5.9801)	Top-1 acc 3.125 (2.518)	Top-5 acc 8.203 (9.100)	lr 0.04817
Warmup Train [5][2920/3239]	Time 0.230 (0.227)	Data 0.002 (0.007)	Loss 5.9235 (5.9798)	Top-1 acc 3.125 (2.519)	Top-5 acc 11.328 (9.105)	lr 0.04817
Warmup Train [5][2930/3239]	Time 0.156 (0.227)	Data 0.001 (0.007)	Loss 5.8974 (5.9795)	Top-1 acc 2.344 (2.522)	Top-5 acc 8.984 (9.111)	lr 0.04817
Warmup Train [5][2940/3239]	Time 0.180 (0.227)	Data 0.001 (0.007)	Loss 5.7248 (5.9791)	Top-1 acc 4.688 (2.525)	Top-5 acc 17.578 (9.122)	lr 0.04817
Warmup Train [5][2950/3239]	Time 0.195 (0.227)	Data 0.001 (0.007)	Loss 5.8180 (5.9788)	Top-1 acc 3.516 (2.529)	Top-5 acc 10.547 (9.127)	lr 0.04816
Warmup Train [5][2960/3239]	Time 0.192 (0.227)	Data 0.001 (0.007)	Loss 5.8759 (5.9785)	Top-1 acc 5.078 (2.530)	Top-5 acc 10.938 (9.131)	lr 0.04816
Warmup Train [5][2970/3239]	Time 0.279 (0.227)	Data 0.001 (0.007)	Loss 5.9617 (5.9784)	Top-1 acc 3.906 (2.532)	Top-5 acc 8.984 (9.135)	lr 0.04816
Warmup Train [5][2980/3239]	Time 0.373 (0.227)	Data 0.001 (0.007)	Loss 5.9027 (5.9781)	Top-1 acc 3.906 (2.535)	Top-5 acc 9.766 (9.142)	lr 0.04816
Warmup Train [5][2990/3239]	Time 0.155 (0.227)	Data 0.001 (0.007)	Loss 5.8146 (5.9777)	Top-1 acc 2.734 (2.536)	Top-5 acc 11.328 (9.145)	lr 0.04815
Warmup Train [5][3000/3239]	Time 0.241 (0.227)	Data 0.001 (0.007)	Loss 5.9334 (5.9775)	Top-1 acc 2.344 (2.539)	Top-5 acc 10.547 (9.149)	lr 0.04815
Warmup Train [5][3010/3239]	Time 0.238 (0.227)	Data 0.001 (0.007)	Loss 5.8638 (5.9772)	Top-1 acc 4.297 (2.542)	Top-5 acc 12.500 (9.157)	lr 0.04815
Warmup Train [5][3020/3239]	Time 0.233 (0.227)	Data 0.001 (0.007)	Loss 5.9678 (5.9770)	Top-1 acc 3.516 (2.545)	Top-5 acc 8.984 (9.160)	lr 0.04815
Warmup Train [5][3030/3239]	Time 0.168 (0.227)	Data 0.001 (0.007)	Loss 6.0509 (5.9769)	Top-1 acc 2.344 (2.547)	Top-5 acc 6.250 (9.162)	lr 0.04815
Warmup Train [5][3040/3239]	Time 0.171 (0.227)	Data 0.001 (0.007)	Loss 5.8211 (5.9765)	Top-1 acc 1.953 (2.549)	Top-5 acc 9.375 (9.167)	lr 0.04814
Warmup Train [5][3050/3239]	Time 0.188 (0.227)	Data 0.001 (0.007)	Loss 5.8810 (5.9762)	Top-1 acc 1.953 (2.551)	Top-5 acc 10.547 (9.172)	lr 0.04814
Warmup Train [5][3060/3239]	Time 0.213 (0.227)	Data 0.001 (0.007)	Loss 5.9055 (5.9759)	Top-1 acc 2.734 (2.552)	Top-5 acc 7.031 (9.176)	lr 0.04814
Warmup Train [5][3070/3239]	Time 0.233 (0.227)	Data 0.001 (0.007)	Loss 5.9209 (5.9755)	Top-1 acc 1.953 (2.555)	Top-5 acc 8.203 (9.182)	lr 0.04814
Warmup Train [5][3080/3239]	Time 0.314 (0.227)	Data 0.001 (0.007)	Loss 5.9023 (5.9753)	Top-1 acc 4.297 (2.556)	Top-5 acc 12.109 (9.188)	lr 0.04813
Warmup Train [5][3090/3239]	Time 0.312 (0.227)	Data 0.001 (0.007)	Loss 5.8637 (5.9750)	Top-1 acc 1.953 (2.558)	Top-5 acc 10.938 (9.193)	lr 0.04813
Warmup Train [5][3100/3239]	Time 0.196 (0.227)	Data 0.001 (0.007)	Loss 5.7833 (5.9746)	Top-1 acc 4.297 (2.561)	Top-5 acc 11.719 (9.201)	lr 0.04813
Warmup Train [5][3110/3239]	Time 0.242 (0.227)	Data 0.001 (0.007)	Loss 5.9518 (5.9743)	Top-1 acc 2.734 (2.565)	Top-5 acc 10.938 (9.212)	lr 0.04813
Warmup Train [5][3120/3239]	Time 0.207 (0.227)	Data 0.001 (0.007)	Loss 5.8928 (5.9740)	Top-1 acc 3.516 (2.568)	Top-5 acc 9.375 (9.219)	lr 0.04812
Warmup Train [5][3130/3239]	Time 0.212 (0.227)	Data 0.001 (0.007)	Loss 5.9004 (5.9737)	Top-1 acc 1.172 (2.571)	Top-5 acc 9.766 (9.226)	lr 0.04812
Warmup Train [5][3140/3239]	Time 0.279 (0.227)	Data 0.002 (0.007)	Loss 5.9193 (5.9735)	Top-1 acc 2.734 (2.573)	Top-5 acc 11.719 (9.232)	lr 0.04812
Warmup Train [5][3150/3239]	Time 0.208 (0.227)	Data 0.001 (0.007)	Loss 5.8794 (5.9732)	Top-1 acc 2.734 (2.576)	Top-5 acc 10.156 (9.239)	lr 0.04812
Warmup Train [5][3160/3239]	Time 0.203 (0.227)	Data 0.001 (0.007)	Loss 5.9434 (5.9729)	Top-1 acc 2.344 (2.579)	Top-5 acc 14.062 (9.247)	lr 0.04812
Warmup Train [5][3170/3239]	Time 0.254 (0.227)	Data 0.002 (0.007)	Loss 5.8863 (5.9726)	Top-1 acc 3.516 (2.583)	Top-5 acc 9.766 (9.256)	lr 0.04811
Warmup Train [5][3180/3239]	Time 0.324 (0.227)	Data 0.000 (0.007)	Loss 5.9584 (5.9723)	Top-1 acc 2.344 (2.586)	Top-5 acc 7.812 (9.262)	lr 0.04811
Warmup Train [5][3190/3239]	Time 0.210 (0.227)	Data 0.000 (0.007)	Loss 5.8794 (5.9719)	Top-1 acc 3.906 (2.589)	Top-5 acc 14.453 (9.269)	lr 0.04811
Warmup Train [5][3200/3239]	Time 0.193 (0.227)	Data 0.000 (0.007)	Loss 5.8006 (5.9716)	Top-1 acc 4.688 (2.590)	Top-5 acc 10.547 (9.274)	lr 0.04811
Warmup Train [5][3210/3239]	Time 0.131 (0.227)	Data 0.000 (0.007)	Loss 5.7903 (5.9714)	Top-1 acc 2.734 (2.593)	Top-5 acc 14.453 (9.281)	lr 0.04810
Warmup Train [5][3220/3239]	Time 0.182 (0.227)	Data 0.000 (0.007)	Loss 5.8803 (5.9711)	Top-1 acc 4.688 (2.596)	Top-5 acc 11.328 (9.289)	lr 0.04810
Warmup Train [5][3230/3239]	Time 0.183 (0.226)	Data 0.000 (0.007)	Loss 5.7129 (5.9708)	Top-1 acc 3.516 (2.598)	Top-5 acc 14.453 (9.297)	lr 0.04810
Warmup Train [5][3239/3239]	Time 0.138 (0.226)	Data 0.000 (0.007)	Loss 6.1467 (5.9707)	Top-1 acc 0.000 (2.599)	Top-5 acc 6.173 (9.301)	lr 0.04810
==========Warmup Valid [5/40]	loss 5.490	top-1 acc 3.988	top-5 acc 13.359	Train top-1 2.599	top-5 9.301	flops: 442.4M
Warmup Train [6][0/3239]	Time 15.651 (15.651)	Data 11.880 (11.880)	Loss 5.8880 (5.8880)	Top-1 acc 2.344 (2.344)	Top-5 acc 9.766 (9.766)	lr 0.04810
Warmup Train [6][10/3239]	Time 0.290 (1.759)	Data 0.001 (1.154)	Loss 5.8758 (5.8808)	Top-1 acc 3.125 (3.267)	Top-5 acc 10.938 (11.328)	lr 0.04809
Warmup Train [6][20/3239]	Time 0.260 (1.054)	Data 0.002 (0.606)	Loss 5.7821 (5.8578)	Top-1 acc 3.125 (3.181)	Top-5 acc 14.062 (11.440)	lr 0.04809
Warmup Train [6][30/3239]	Time 0.213 (0.786)	Data 0.001 (0.411)	Loss 5.9849 (5.8644)	Top-1 acc 1.172 (3.112)	Top-5 acc 5.859 (11.076)	lr 0.04809
Warmup Train [6][40/3239]	Time 0.156 (0.658)	Data 0.001 (0.312)	Loss 5.9696 (5.8775)	Top-1 acc 2.344 (3.058)	Top-5 acc 15.234 (11.233)	lr 0.04809
Warmup Train [6][50/3239]	Time 0.201 (0.570)	Data 0.001 (0.252)	Loss 5.9037 (5.8837)	Top-1 acc 2.344 (3.018)	Top-5 acc 12.891 (11.091)	lr 0.04809
Warmup Train [6][60/3239]	Time 0.351 (0.515)	Data 0.001 (0.211)	Loss 5.8246 (5.8797)	Top-1 acc 5.469 (3.151)	Top-5 acc 12.109 (11.238)	lr 0.04808
Warmup Train [6][70/3239]	Time 0.285 (0.473)	Data 0.001 (0.182)	Loss 5.9667 (5.8789)	Top-1 acc 2.344 (3.169)	Top-5 acc 7.422 (11.334)	lr 0.04808
Warmup Train [6][80/3239]	Time 0.185 (0.443)	Data 0.001 (0.159)	Loss 5.8513 (5.8756)	Top-1 acc 3.516 (3.212)	Top-5 acc 12.891 (11.352)	lr 0.04808
Warmup Train [6][90/3239]	Time 0.242 (0.419)	Data 0.002 (0.142)	Loss 5.9291 (5.8757)	Top-1 acc 2.344 (3.262)	Top-5 acc 10.156 (11.328)	lr 0.04808
Warmup Train [6][100/3239]	Time 0.241 (0.399)	Data 0.001 (0.128)	Loss 5.8697 (5.8733)	Top-1 acc 2.734 (3.245)	Top-5 acc 12.500 (11.270)	lr 0.04807
Warmup Train [6][110/3239]	Time 0.164 (0.385)	Data 0.002 (0.117)	Loss 5.9155 (5.8727)	Top-1 acc 1.172 (3.231)	Top-5 acc 8.594 (11.240)	lr 0.04807
Warmup Train [6][120/3239]	Time 0.175 (0.372)	Data 0.001 (0.108)	Loss 6.0056 (5.8742)	Top-1 acc 4.297 (3.238)	Top-5 acc 10.156 (11.238)	lr 0.04807
Warmup Train [6][130/3239]	Time 0.190 (0.360)	Data 0.001 (0.100)	Loss 5.8134 (5.8720)	Top-1 acc 3.125 (3.262)	Top-5 acc 12.891 (11.328)	lr 0.04807
Warmup Train [6][140/3239]	Time 0.195 (0.350)	Data 0.001 (0.093)	Loss 5.8709 (5.8730)	Top-1 acc 2.734 (3.236)	Top-5 acc 9.766 (11.275)	lr 0.04806
Warmup Train [6][150/3239]	Time 0.238 (0.342)	Data 0.001 (0.087)	Loss 5.8964 (5.8735)	Top-1 acc 3.125 (3.239)	Top-5 acc 9.375 (11.222)	lr 0.04806
Warmup Train [6][160/3239]	Time 0.123 (0.333)	Data 0.002 (0.082)	Loss 5.9698 (5.8747)	Top-1 acc 1.562 (3.251)	Top-5 acc 8.594 (11.197)	lr 0.04806
Warmup Train [6][170/3239]	Time 0.342 (0.328)	Data 0.001 (0.078)	Loss 5.8891 (5.8751)	Top-1 acc 2.344 (3.260)	Top-5 acc 10.156 (11.175)	lr 0.04806
Warmup Train [6][180/3239]	Time 0.216 (0.321)	Data 0.001 (0.073)	Loss 5.8926 (5.8731)	Top-1 acc 3.125 (3.278)	Top-5 acc 8.594 (11.201)	lr 0.04806
Warmup Train [6][190/3239]	Time 0.256 (0.317)	Data 0.001 (0.070)	Loss 5.8830 (5.8724)	Top-1 acc 3.906 (3.266)	Top-5 acc 11.328 (11.173)	lr 0.04805
Warmup Train [6][200/3239]	Time 0.181 (0.312)	Data 0.001 (0.066)	Loss 5.7875 (5.8724)	Top-1 acc 3.516 (3.263)	Top-5 acc 13.281 (11.186)	lr 0.04805
Warmup Train [6][210/3239]	Time 0.199 (0.307)	Data 0.001 (0.063)	Loss 5.8446 (5.8723)	Top-1 acc 2.344 (3.260)	Top-5 acc 14.453 (11.187)	lr 0.04805
Warmup Train [6][220/3239]	Time 0.217 (0.303)	Data 0.001 (0.061)	Loss 5.9776 (5.8720)	Top-1 acc 2.344 (3.284)	Top-5 acc 7.422 (11.220)	lr 0.04805
Warmup Train [6][230/3239]	Time 0.209 (0.299)	Data 0.001 (0.058)	Loss 5.8931 (5.8727)	Top-1 acc 3.516 (3.282)	Top-5 acc 8.594 (11.247)	lr 0.04804
Warmup Train [6][240/3239]	Time 0.201 (0.295)	Data 0.001 (0.056)	Loss 5.8817 (5.8711)	Top-1 acc 1.562 (3.281)	Top-5 acc 8.594 (11.242)	lr 0.04804
Warmup Train [6][250/3239]	Time 0.146 (0.292)	Data 0.001 (0.054)	Loss 5.8858 (5.8711)	Top-1 acc 3.906 (3.274)	Top-5 acc 13.281 (11.247)	lr 0.04804
Warmup Train [6][260/3239]	Time 0.179 (0.289)	Data 0.002 (0.052)	Loss 5.8473 (5.8715)	Top-1 acc 3.906 (3.266)	Top-5 acc 12.500 (11.240)	lr 0.04804
Warmup Train [6][270/3239]	Time 0.206 (0.286)	Data 0.001 (0.050)	Loss 5.9215 (5.8715)	Top-1 acc 3.125 (3.256)	Top-5 acc 10.156 (11.210)	lr 0.04803
Warmup Train [6][280/3239]	Time 0.200 (0.284)	Data 0.001 (0.048)	Loss 5.8165 (5.8698)	Top-1 acc 6.250 (3.261)	Top-5 acc 11.328 (11.217)	lr 0.04803
Warmup Train [6][290/3239]	Time 0.141 (0.282)	Data 0.001 (0.047)	Loss 5.8557 (5.8690)	Top-1 acc 5.469 (3.290)	Top-5 acc 16.797 (11.262)	lr 0.04803
Warmup Train [6][300/3239]	Time 0.181 (0.279)	Data 0.001 (0.045)	Loss 5.9179 (5.8687)	Top-1 acc 1.562 (3.266)	Top-5 acc 7.812 (11.244)	lr 0.04803
Warmup Train [6][310/3239]	Time 0.221 (0.277)	Data 0.001 (0.044)	Loss 5.9303 (5.8693)	Top-1 acc 3.906 (3.266)	Top-5 acc 9.766 (11.219)	lr 0.04802
Warmup Train [6][320/3239]	Time 0.212 (0.275)	Data 0.001 (0.043)	Loss 5.8023 (5.8689)	Top-1 acc 5.859 (3.277)	Top-5 acc 13.672 (11.200)	lr 0.04802
Warmup Train [6][330/3239]	Time 0.197 (0.274)	Data 0.001 (0.042)	Loss 5.8748 (5.8694)	Top-1 acc 3.125 (3.261)	Top-5 acc 11.328 (11.176)	lr 0.04802
Warmup Train [6][340/3239]	Time 0.177 (0.273)	Data 0.002 (0.040)	Loss 5.8121 (5.8694)	Top-1 acc 3.906 (3.270)	Top-5 acc 13.672 (11.180)	lr 0.04802
Warmup Train [6][350/3239]	Time 0.215 (0.272)	Data 0.001 (0.039)	Loss 5.8006 (5.8687)	Top-1 acc 3.516 (3.276)	Top-5 acc 12.500 (11.190)	lr 0.04801
Warmup Train [6][360/3239]	Time 0.162 (0.270)	Data 0.002 (0.038)	Loss 5.9046 (5.8686)	Top-1 acc 5.078 (3.267)	Top-5 acc 12.500 (11.187)	lr 0.04801
Warmup Train [6][370/3239]	Time 0.258 (0.270)	Data 0.001 (0.038)	Loss 5.8699 (5.8683)	Top-1 acc 3.906 (3.276)	Top-5 acc 13.672 (11.213)	lr 0.04801
Warmup Train [6][380/3239]	Time 0.280 (0.268)	Data 0.001 (0.037)	Loss 5.8347 (5.8688)	Top-1 acc 3.516 (3.279)	Top-5 acc 13.281 (11.227)	lr 0.04801
Warmup Train [6][390/3239]	Time 0.257 (0.268)	Data 0.001 (0.036)	Loss 5.8095 (5.8691)	Top-1 acc 4.297 (3.284)	Top-5 acc 12.891 (11.227)	lr 0.04801
Warmup Train [6][400/3239]	Time 0.265 (0.267)	Data 0.001 (0.035)	Loss 5.8997 (5.8696)	Top-1 acc 2.344 (3.287)	Top-5 acc 10.938 (11.205)	lr 0.04800
Warmup Train [6][410/3239]	Time 0.193 (0.267)	Data 0.001 (0.034)	Loss 5.8013 (5.8696)	Top-1 acc 5.859 (3.291)	Top-5 acc 13.281 (11.206)	lr 0.04800
Warmup Train [6][420/3239]	Time 0.223 (0.266)	Data 0.001 (0.033)	Loss 5.8849 (5.8700)	Top-1 acc 2.344 (3.299)	Top-5 acc 10.938 (11.193)	lr 0.04800
Warmup Train [6][430/3239]	Time 0.214 (0.266)	Data 0.002 (0.033)	Loss 5.8884 (5.8701)	Top-1 acc 2.734 (3.305)	Top-5 acc 10.547 (11.216)	lr 0.04800
Warmup Train [6][440/3239]	Time 0.236 (0.265)	Data 0.001 (0.032)	Loss 5.8756 (5.8689)	Top-1 acc 4.688 (3.316)	Top-5 acc 12.109 (11.236)	lr 0.04799
Warmup Train [6][450/3239]	Time 0.148 (0.264)	Data 0.001 (0.031)	Loss 5.8390 (5.8693)	Top-1 acc 3.125 (3.319)	Top-5 acc 10.938 (11.233)	lr 0.04799
Warmup Train [6][460/3239]	Time 0.127 (0.263)	Data 0.002 (0.031)	Loss 5.9225 (5.8694)	Top-1 acc 4.688 (3.311)	Top-5 acc 10.547 (11.221)	lr 0.04799
Warmup Train [6][470/3239]	Time 0.187 (0.261)	Data 0.001 (0.030)	Loss 5.8416 (5.8694)	Top-1 acc 4.688 (3.304)	Top-5 acc 13.281 (11.210)	lr 0.04799
Warmup Train [6][480/3239]	Time 0.141 (0.260)	Data 0.001 (0.030)	Loss 5.8959 (5.8699)	Top-1 acc 5.078 (3.311)	Top-5 acc 10.156 (11.205)	lr 0.04798
Warmup Train [6][490/3239]	Time 0.312 (0.260)	Data 0.001 (0.029)	Loss 5.8928 (5.8693)	Top-1 acc 3.125 (3.320)	Top-5 acc 6.641 (11.227)	lr 0.04798
Warmup Train [6][500/3239]	Time 0.228 (0.258)	Data 0.001 (0.029)	Loss 5.7401 (5.8689)	Top-1 acc 1.562 (3.316)	Top-5 acc 11.719 (11.255)	lr 0.04798
Warmup Train [6][510/3239]	Time 0.245 (0.258)	Data 0.001 (0.028)	Loss 5.8790 (5.8686)	Top-1 acc 2.734 (3.324)	Top-5 acc 12.109 (11.275)	lr 0.04798
Warmup Train [6][520/3239]	Time 0.260 (0.257)	Data 0.002 (0.028)	Loss 5.9436 (5.8687)	Top-1 acc 2.734 (3.312)	Top-5 acc 10.938 (11.273)	lr 0.04797
Warmup Train [6][530/3239]	Time 0.274 (0.257)	Data 0.002 (0.027)	Loss 5.8742 (5.8683)	Top-1 acc 2.734 (3.318)	Top-5 acc 10.938 (11.291)	lr 0.04797
Warmup Train [6][540/3239]	Time 0.188 (0.257)	Data 0.001 (0.027)	Loss 5.8255 (5.8682)	Top-1 acc 2.734 (3.313)	Top-5 acc 9.766 (11.286)	lr 0.04797
Warmup Train [6][550/3239]	Time 0.178 (0.256)	Data 0.002 (0.026)	Loss 5.7569 (5.8673)	Top-1 acc 4.297 (3.321)	Top-5 acc 13.281 (11.308)	lr 0.04797
Warmup Train [6][560/3239]	Time 0.188 (0.256)	Data 0.001 (0.026)	Loss 5.8922 (5.8677)	Top-1 acc 3.516 (3.322)	Top-5 acc 8.984 (11.309)	lr 0.04796
Warmup Train [6][570/3239]	Time 0.358 (0.257)	Data 0.002 (0.026)	Loss 5.8095 (5.8678)	Top-1 acc 4.688 (3.326)	Top-5 acc 15.234 (11.310)	lr 0.04796
Warmup Train [6][580/3239]	Time 0.158 (0.256)	Data 0.001 (0.025)	Loss 5.8948 (5.8679)	Top-1 acc 6.250 (3.337)	Top-5 acc 12.500 (11.325)	lr 0.04796
Warmup Train [6][590/3239]	Time 0.274 (0.255)	Data 0.001 (0.025)	Loss 5.9917 (5.8678)	Top-1 acc 1.953 (3.338)	Top-5 acc 7.031 (11.314)	lr 0.04796
Warmup Train [6][600/3239]	Time 0.323 (0.255)	Data 0.001 (0.024)	Loss 5.8687 (5.8681)	Top-1 acc 2.734 (3.331)	Top-5 acc 10.156 (11.311)	lr 0.04796
Warmup Train [6][610/3239]	Time 0.260 (0.255)	Data 0.001 (0.024)	Loss 5.7871 (5.8675)	Top-1 acc 5.078 (3.339)	Top-5 acc 13.281 (11.315)	lr 0.04795
Warmup Train [6][620/3239]	Time 0.292 (0.254)	Data 0.001 (0.024)	Loss 5.8124 (5.8673)	Top-1 acc 5.078 (3.348)	Top-5 acc 12.500 (11.316)	lr 0.04795
Warmup Train [6][630/3239]	Time 0.199 (0.254)	Data 0.001 (0.023)	Loss 5.9250 (5.8669)	Top-1 acc 0.781 (3.355)	Top-5 acc 8.594 (11.306)	lr 0.04795
Warmup Train [6][640/3239]	Time 0.184 (0.254)	Data 0.001 (0.023)	Loss 5.7581 (5.8666)	Top-1 acc 3.125 (3.346)	Top-5 acc 13.672 (11.306)	lr 0.04795
Warmup Train [6][650/3239]	Time 0.190 (0.254)	Data 0.001 (0.023)	Loss 5.7531 (5.8660)	Top-1 acc 2.734 (3.345)	Top-5 acc 11.719 (11.311)	lr 0.04794
Warmup Train [6][660/3239]	Time 0.242 (0.253)	Data 0.001 (0.022)	Loss 5.9452 (5.8654)	Top-1 acc 3.906 (3.358)	Top-5 acc 11.328 (11.333)	lr 0.04794
Warmup Train [6][670/3239]	Time 0.225 (0.253)	Data 0.001 (0.022)	Loss 5.9095 (5.8647)	Top-1 acc 3.125 (3.360)	Top-5 acc 11.328 (11.344)	lr 0.04794
Warmup Train [6][680/3239]	Time 0.249 (0.252)	Data 0.001 (0.022)	Loss 5.7888 (5.8643)	Top-1 acc 5.469 (3.364)	Top-5 acc 15.234 (11.355)	lr 0.04794
Warmup Train [6][690/3239]	Time 0.301 (0.252)	Data 0.002 (0.021)	Loss 5.8395 (5.8640)	Top-1 acc 3.906 (3.369)	Top-5 acc 11.328 (11.368)	lr 0.04793
Warmup Train [6][700/3239]	Time 0.313 (0.252)	Data 0.002 (0.021)	Loss 5.8019 (5.8636)	Top-1 acc 2.344 (3.367)	Top-5 acc 12.500 (11.379)	lr 0.04793
Warmup Train [6][710/3239]	Time 0.194 (0.251)	Data 0.001 (0.021)	Loss 5.7881 (5.8635)	Top-1 acc 3.125 (3.377)	Top-5 acc 12.500 (11.390)	lr 0.04793
Warmup Train [6][720/3239]	Time 0.179 (0.251)	Data 0.001 (0.021)	Loss 5.8339 (5.8631)	Top-1 acc 3.906 (3.381)	Top-5 acc 10.156 (11.388)	lr 0.04793
Warmup Train [6][730/3239]	Time 0.232 (0.251)	Data 0.002 (0.020)	Loss 5.8532 (5.8623)	Top-1 acc 2.344 (3.383)	Top-5 acc 10.547 (11.407)	lr 0.04792
Warmup Train [6][740/3239]	Time 0.198 (0.250)	Data 0.001 (0.020)	Loss 5.8766 (5.8624)	Top-1 acc 4.297 (3.389)	Top-5 acc 12.891 (11.421)	lr 0.04792
Warmup Train [6][750/3239]	Time 0.172 (0.250)	Data 0.001 (0.020)	Loss 5.7906 (5.8617)	Top-1 acc 6.250 (3.398)	Top-5 acc 13.281 (11.443)	lr 0.04792
Warmup Train [6][760/3239]	Time 0.208 (0.249)	Data 0.001 (0.020)	Loss 5.8195 (5.8615)	Top-1 acc 5.469 (3.405)	Top-5 acc 12.109 (11.454)	lr 0.04792
Warmup Train [6][770/3239]	Time 0.250 (0.249)	Data 0.001 (0.019)	Loss 5.9088 (5.8609)	Top-1 acc 1.562 (3.410)	Top-5 acc 7.812 (11.463)	lr 0.04791
Warmup Train [6][780/3239]	Time 0.263 (0.249)	Data 0.001 (0.019)	Loss 5.8517 (5.8602)	Top-1 acc 2.344 (3.412)	Top-5 acc 11.328 (11.471)	lr 0.04791
Warmup Train [6][790/3239]	Time 0.281 (0.248)	Data 0.001 (0.019)	Loss 5.8143 (5.8593)	Top-1 acc 4.297 (3.417)	Top-5 acc 12.500 (11.481)	lr 0.04791
Warmup Train [6][800/3239]	Time 0.185 (0.248)	Data 0.001 (0.019)	Loss 5.7853 (5.8588)	Top-1 acc 4.297 (3.422)	Top-5 acc 10.547 (11.494)	lr 0.04791
Warmup Train [6][810/3239]	Time 0.231 (0.248)	Data 0.001 (0.019)	Loss 5.8057 (5.8580)	Top-1 acc 3.906 (3.424)	Top-5 acc 10.938 (11.501)	lr 0.04790
Warmup Train [6][820/3239]	Time 0.165 (0.247)	Data 0.001 (0.018)	Loss 5.8291 (5.8579)	Top-1 acc 4.297 (3.434)	Top-5 acc 12.500 (11.514)	lr 0.04790
Warmup Train [6][830/3239]	Time 0.248 (0.247)	Data 0.001 (0.018)	Loss 6.0011 (5.8578)	Top-1 acc 1.953 (3.437)	Top-5 acc 11.719 (11.522)	lr 0.04790
Warmup Train [6][840/3239]	Time 0.260 (0.247)	Data 0.001 (0.018)	Loss 5.7760 (5.8571)	Top-1 acc 2.344 (3.440)	Top-5 acc 12.891 (11.539)	lr 0.04790
Warmup Train [6][850/3239]	Time 0.254 (0.246)	Data 0.001 (0.018)	Loss 5.8343 (5.8565)	Top-1 acc 3.906 (3.445)	Top-5 acc 11.328 (11.543)	lr 0.04789
Warmup Train [6][860/3239]	Time 0.236 (0.246)	Data 0.002 (0.018)	Loss 5.8506 (5.8564)	Top-1 acc 3.516 (3.448)	Top-5 acc 12.500 (11.543)	lr 0.04789
Warmup Train [6][870/3239]	Time 0.211 (0.246)	Data 0.001 (0.018)	Loss 5.8145 (5.8564)	Top-1 acc 1.953 (3.450)	Top-5 acc 9.375 (11.536)	lr 0.04789
Warmup Train [6][880/3239]	Time 0.212 (0.245)	Data 0.001 (0.017)	Loss 5.8167 (5.8560)	Top-1 acc 2.734 (3.449)	Top-5 acc 12.500 (11.546)	lr 0.04789
Warmup Train [6][890/3239]	Time 0.211 (0.245)	Data 0.001 (0.017)	Loss 5.8206 (5.8559)	Top-1 acc 3.516 (3.448)	Top-5 acc 14.453 (11.544)	lr 0.04789
Warmup Train [6][900/3239]	Time 0.355 (0.245)	Data 0.001 (0.017)	Loss 5.8728 (5.8558)	Top-1 acc 2.344 (3.447)	Top-5 acc 11.328 (11.555)	lr 0.04788
Warmup Train [6][910/3239]	Time 0.225 (0.245)	Data 0.001 (0.017)	Loss 5.7854 (5.8554)	Top-1 acc 5.078 (3.450)	Top-5 acc 13.281 (11.565)	lr 0.04788
Warmup Train [6][920/3239]	Time 0.180 (0.244)	Data 0.002 (0.017)	Loss 5.7247 (5.8552)	Top-1 acc 5.469 (3.451)	Top-5 acc 13.672 (11.561)	lr 0.04788
Warmup Train [6][930/3239]	Time 0.202 (0.244)	Data 0.002 (0.017)	Loss 5.8569 (5.8549)	Top-1 acc 5.859 (3.459)	Top-5 acc 12.500 (11.571)	lr 0.04788
Warmup Train [6][940/3239]	Time 0.213 (0.244)	Data 0.001 (0.016)	Loss 5.6260 (5.8548)	Top-1 acc 4.688 (3.458)	Top-5 acc 16.016 (11.574)	lr 0.04787
Warmup Train [6][950/3239]	Time 0.287 (0.243)	Data 0.003 (0.016)	Loss 5.8063 (5.8549)	Top-1 acc 5.859 (3.461)	Top-5 acc 11.328 (11.581)	lr 0.04787
Warmup Train [6][960/3239]	Time 0.230 (0.243)	Data 0.001 (0.016)	Loss 5.9183 (5.8548)	Top-1 acc 3.906 (3.464)	Top-5 acc 8.984 (11.579)	lr 0.04787
Warmup Train [6][970/3239]	Time 0.214 (0.243)	Data 0.001 (0.016)	Loss 5.8308 (5.8547)	Top-1 acc 7.031 (3.473)	Top-5 acc 16.406 (11.588)	lr 0.04787
Warmup Train [6][980/3239]	Time 0.231 (0.243)	Data 0.001 (0.016)	Loss 5.9347 (5.8545)	Top-1 acc 3.906 (3.471)	Top-5 acc 14.062 (11.591)	lr 0.04786
Warmup Train [6][990/3239]	Time 0.240 (0.243)	Data 0.001 (0.016)	Loss 5.9235 (5.8545)	Top-1 acc 2.734 (3.472)	Top-5 acc 11.328 (11.590)	lr 0.04786
Warmup Train [6][1000/3239]	Time 0.202 (0.242)	Data 0.001 (0.016)	Loss 5.8052 (5.8542)	Top-1 acc 3.125 (3.473)	Top-5 acc 14.062 (11.587)	lr 0.04786
Warmup Train [6][1010/3239]	Time 0.271 (0.242)	Data 0.001 (0.015)	Loss 5.8231 (5.8538)	Top-1 acc 3.516 (3.470)	Top-5 acc 13.672 (11.591)	lr 0.04786
Warmup Train [6][1020/3239]	Time 0.344 (0.242)	Data 0.001 (0.015)	Loss 5.7767 (5.8533)	Top-1 acc 3.516 (3.471)	Top-5 acc 11.328 (11.597)	lr 0.04785
Warmup Train [6][1030/3239]	Time 0.166 (0.242)	Data 0.001 (0.015)	Loss 5.7817 (5.8529)	Top-1 acc 4.297 (3.478)	Top-5 acc 12.109 (11.605)	lr 0.04785
Warmup Train [6][1040/3239]	Time 0.263 (0.242)	Data 0.002 (0.015)	Loss 5.7773 (5.8525)	Top-1 acc 3.906 (3.483)	Top-5 acc 15.234 (11.609)	lr 0.04785
Warmup Train [6][1050/3239]	Time 0.198 (0.242)	Data 0.002 (0.015)	Loss 5.8547 (5.8527)	Top-1 acc 5.078 (3.481)	Top-5 acc 13.281 (11.604)	lr 0.04785
Warmup Train [6][1060/3239]	Time 0.156 (0.241)	Data 0.001 (0.015)	Loss 5.9246 (5.8520)	Top-1 acc 2.344 (3.484)	Top-5 acc 10.938 (11.611)	lr 0.04784
Warmup Train [6][1070/3239]	Time 0.223 (0.241)	Data 0.002 (0.015)	Loss 5.9270 (5.8519)	Top-1 acc 1.172 (3.489)	Top-5 acc 10.156 (11.621)	lr 0.04784
Warmup Train [6][1080/3239]	Time 0.245 (0.241)	Data 0.001 (0.015)	Loss 5.7924 (5.8518)	Top-1 acc 3.516 (3.487)	Top-5 acc 14.453 (11.618)	lr 0.04784
Warmup Train [6][1090/3239]	Time 0.232 (0.241)	Data 0.001 (0.015)	Loss 5.8850 (5.8515)	Top-1 acc 3.125 (3.491)	Top-5 acc 12.500 (11.634)	lr 0.04784
Warmup Train [6][1100/3239]	Time 0.188 (0.240)	Data 0.001 (0.014)	Loss 5.7996 (5.8510)	Top-1 acc 3.906 (3.499)	Top-5 acc 11.328 (11.638)	lr 0.04783
Warmup Train [6][1110/3239]	Time 0.302 (0.240)	Data 0.001 (0.014)	Loss 5.8099 (5.8505)	Top-1 acc 3.516 (3.504)	Top-5 acc 11.719 (11.647)	lr 0.04783
Warmup Train [6][1120/3239]	Time 0.227 (0.240)	Data 0.001 (0.014)	Loss 5.8029 (5.8500)	Top-1 acc 4.297 (3.510)	Top-5 acc 15.234 (11.664)	lr 0.04783
Warmup Train [6][1130/3239]	Time 0.219 (0.240)	Data 0.001 (0.014)	Loss 5.7762 (5.8498)	Top-1 acc 3.906 (3.507)	Top-5 acc 10.547 (11.660)	lr 0.04783
Warmup Train [6][1140/3239]	Time 0.228 (0.240)	Data 0.001 (0.014)	Loss 5.7949 (5.8496)	Top-1 acc 5.078 (3.513)	Top-5 acc 13.281 (11.660)	lr 0.04782
Warmup Train [6][1150/3239]	Time 0.207 (0.240)	Data 0.002 (0.014)	Loss 5.8172 (5.8495)	Top-1 acc 3.906 (3.514)	Top-5 acc 14.062 (11.670)	lr 0.04782
Warmup Train [6][1160/3239]	Time 0.188 (0.239)	Data 0.002 (0.014)	Loss 5.7508 (5.8492)	Top-1 acc 5.078 (3.518)	Top-5 acc 14.062 (11.672)	lr 0.04782
Warmup Train [6][1170/3239]	Time 0.221 (0.239)	Data 0.002 (0.014)	Loss 5.6939 (5.8490)	Top-1 acc 5.469 (3.517)	Top-5 acc 17.578 (11.676)	lr 0.04782
Warmup Train [6][1180/3239]	Time 0.194 (0.239)	Data 0.001 (0.014)	Loss 5.8706 (5.8490)	Top-1 acc 3.906 (3.521)	Top-5 acc 12.891 (11.679)	lr 0.04781
Warmup Train [6][1190/3239]	Time 0.164 (0.239)	Data 0.001 (0.014)	Loss 5.8711 (5.8490)	Top-1 acc 2.344 (3.519)	Top-5 acc 8.984 (11.677)	lr 0.04781
Warmup Train [6][1200/3239]	Time 0.192 (0.239)	Data 0.001 (0.014)	Loss 5.8803 (5.8486)	Top-1 acc 3.906 (3.522)	Top-5 acc 7.422 (11.680)	lr 0.04781
Warmup Train [6][1210/3239]	Time 0.190 (0.239)	Data 0.001 (0.013)	Loss 5.8742 (5.8485)	Top-1 acc 1.953 (3.523)	Top-5 acc 10.156 (11.690)	lr 0.04781
Warmup Train [6][1220/3239]	Time 0.229 (0.239)	Data 0.001 (0.013)	Loss 5.7550 (5.8484)	Top-1 acc 3.516 (3.526)	Top-5 acc 12.891 (11.691)	lr 0.04780
Warmup Train [6][1230/3239]	Time 0.258 (0.239)	Data 0.002 (0.013)	Loss 5.8322 (5.8483)	Top-1 acc 5.469 (3.527)	Top-5 acc 13.672 (11.686)	lr 0.04780
Warmup Train [6][1240/3239]	Time 0.186 (0.239)	Data 0.002 (0.013)	Loss 5.8345 (5.8481)	Top-1 acc 4.688 (3.528)	Top-5 acc 11.719 (11.689)	lr 0.04780
Warmup Train [6][1250/3239]	Time 0.212 (0.239)	Data 0.001 (0.013)	Loss 5.7858 (5.8480)	Top-1 acc 3.906 (3.523)	Top-5 acc 11.719 (11.682)	lr 0.04780
Warmup Train [6][1260/3239]	Time 0.247 (0.239)	Data 0.001 (0.013)	Loss 5.7339 (5.8477)	Top-1 acc 4.688 (3.528)	Top-5 acc 14.453 (11.693)	lr 0.04779
Warmup Train [6][1270/3239]	Time 0.231 (0.239)	Data 0.001 (0.013)	Loss 5.8097 (5.8477)	Top-1 acc 3.516 (3.523)	Top-5 acc 12.109 (11.688)	lr 0.04779
Warmup Train [6][1280/3239]	Time 0.153 (0.238)	Data 0.001 (0.013)	Loss 5.9274 (5.8477)	Top-1 acc 2.734 (3.523)	Top-5 acc 10.938 (11.696)	lr 0.04779
Warmup Train [6][1290/3239]	Time 0.198 (0.238)	Data 0.001 (0.013)	Loss 5.7036 (5.8474)	Top-1 acc 5.469 (3.526)	Top-5 acc 15.625 (11.708)	lr 0.04779
Warmup Train [6][1300/3239]	Time 0.224 (0.238)	Data 0.001 (0.013)	Loss 5.8719 (5.8469)	Top-1 acc 3.516 (3.528)	Top-5 acc 13.281 (11.718)	lr 0.04778
Warmup Train [6][1310/3239]	Time 0.148 (0.238)	Data 0.001 (0.013)	Loss 5.9118 (5.8467)	Top-1 acc 2.344 (3.529)	Top-5 acc 9.766 (11.724)	lr 0.04778
Warmup Train [6][1320/3239]	Time 0.222 (0.238)	Data 0.001 (0.013)	Loss 5.8959 (5.8464)	Top-1 acc 2.734 (3.529)	Top-5 acc 7.422 (11.721)	lr 0.04778
Warmup Train [6][1330/3239]	Time 0.197 (0.238)	Data 0.001 (0.013)	Loss 5.7675 (5.8458)	Top-1 acc 4.688 (3.535)	Top-5 acc 13.672 (11.733)	lr 0.04778
Warmup Train [6][1340/3239]	Time 0.305 (0.238)	Data 0.001 (0.013)	Loss 5.8424 (5.8454)	Top-1 acc 2.734 (3.536)	Top-5 acc 12.109 (11.745)	lr 0.04777
Warmup Train [6][1350/3239]	Time 0.196 (0.238)	Data 0.001 (0.013)	Loss 5.6663 (5.8451)	Top-1 acc 3.125 (3.541)	Top-5 acc 14.453 (11.754)	lr 0.04777
Warmup Train [6][1360/3239]	Time 0.203 (0.238)	Data 0.001 (0.012)	Loss 5.8012 (5.8447)	Top-1 acc 7.422 (3.551)	Top-5 acc 13.672 (11.763)	lr 0.04777
Warmup Train [6][1370/3239]	Time 0.207 (0.237)	Data 0.001 (0.012)	Loss 5.7506 (5.8444)	Top-1 acc 6.641 (3.553)	Top-5 acc 15.625 (11.766)	lr 0.04777
Warmup Train [6][1380/3239]	Time 0.219 (0.237)	Data 0.001 (0.012)	Loss 5.8016 (5.8442)	Top-1 acc 4.688 (3.553)	Top-5 acc 14.844 (11.771)	lr 0.04776
Warmup Train [6][1390/3239]	Time 0.233 (0.237)	Data 0.001 (0.012)	Loss 5.8779 (5.8438)	Top-1 acc 3.516 (3.559)	Top-5 acc 13.281 (11.784)	lr 0.04776
Warmup Train [6][1400/3239]	Time 0.150 (0.237)	Data 0.002 (0.012)	Loss 5.8035 (5.8437)	Top-1 acc 3.125 (3.555)	Top-5 acc 11.328 (11.787)	lr 0.04776
Warmup Train [6][1410/3239]	Time 0.241 (0.237)	Data 0.002 (0.012)	Loss 5.8651 (5.8437)	Top-1 acc 3.906 (3.557)	Top-5 acc 11.719 (11.793)	lr 0.04776
Warmup Train [6][1420/3239]	Time 0.198 (0.237)	Data 0.002 (0.012)	Loss 5.8312 (5.8434)	Top-1 acc 2.734 (3.562)	Top-5 acc 11.328 (11.804)	lr 0.04775
Warmup Train [6][1430/3239]	Time 0.211 (0.237)	Data 0.001 (0.012)	Loss 5.7607 (5.8432)	Top-1 acc 5.078 (3.567)	Top-5 acc 13.281 (11.809)	lr 0.04775
Warmup Train [6][1440/3239]	Time 0.272 (0.237)	Data 0.001 (0.012)	Loss 5.8764 (5.8430)	Top-1 acc 2.734 (3.567)	Top-5 acc 10.156 (11.813)	lr 0.04775
Warmup Train [6][1450/3239]	Time 0.188 (0.237)	Data 0.002 (0.012)	Loss 5.8651 (5.8427)	Top-1 acc 3.906 (3.571)	Top-5 acc 10.156 (11.814)	lr 0.04775
Warmup Train [6][1460/3239]	Time 0.193 (0.237)	Data 0.001 (0.012)	Loss 5.7831 (5.8424)	Top-1 acc 2.734 (3.573)	Top-5 acc 10.547 (11.821)	lr 0.04774
Warmup Train [6][1470/3239]	Time 0.128 (0.236)	Data 0.001 (0.012)	Loss 5.7078 (5.8421)	Top-1 acc 2.734 (3.579)	Top-5 acc 10.938 (11.831)	lr 0.04774
Warmup Train [6][1480/3239]	Time 0.246 (0.236)	Data 0.001 (0.012)	Loss 5.6653 (5.8415)	Top-1 acc 3.516 (3.584)	Top-5 acc 14.844 (11.851)	lr 0.04774
Warmup Train [6][1490/3239]	Time 0.207 (0.236)	Data 0.002 (0.012)	Loss 5.8292 (5.8413)	Top-1 acc 4.688 (3.590)	Top-5 acc 13.281 (11.855)	lr 0.04774
Warmup Train [6][1500/3239]	Time 0.243 (0.236)	Data 0.001 (0.012)	Loss 5.8885 (5.8409)	Top-1 acc 2.734 (3.593)	Top-5 acc 11.328 (11.862)	lr 0.04773
Warmup Train [6][1510/3239]	Time 0.215 (0.236)	Data 0.001 (0.012)	Loss 5.7333 (5.8404)	Top-1 acc 4.297 (3.598)	Top-5 acc 15.625 (11.873)	lr 0.04773
Warmup Train [6][1520/3239]	Time 0.232 (0.236)	Data 0.003 (0.012)	Loss 5.8815 (5.8401)	Top-1 acc 3.906 (3.600)	Top-5 acc 15.234 (11.878)	lr 0.04773
Warmup Train [6][1530/3239]	Time 0.186 (0.236)	Data 0.001 (0.011)	Loss 5.7961 (5.8399)	Top-1 acc 4.688 (3.601)	Top-5 acc 11.328 (11.882)	lr 0.04773
Warmup Train [6][1540/3239]	Time 0.306 (0.236)	Data 0.002 (0.011)	Loss 5.7279 (5.8394)	Top-1 acc 5.469 (3.606)	Top-5 acc 14.453 (11.897)	lr 0.04772
Warmup Train [6][1550/3239]	Time 0.203 (0.236)	Data 0.001 (0.011)	Loss 5.8671 (5.8394)	Top-1 acc 4.297 (3.612)	Top-5 acc 13.672 (11.899)	lr 0.04772
Warmup Train [6][1560/3239]	Time 0.246 (0.236)	Data 0.002 (0.011)	Loss 5.9048 (5.8394)	Top-1 acc 2.344 (3.611)	Top-5 acc 9.766 (11.899)	lr 0.04772
Warmup Train [6][1570/3239]	Time 0.203 (0.236)	Data 0.001 (0.011)	Loss 6.0498 (5.8393)	Top-1 acc 1.953 (3.617)	Top-5 acc 9.375 (11.911)	lr 0.04772
Warmup Train [6][1580/3239]	Time 0.238 (0.235)	Data 0.001 (0.011)	Loss 5.8343 (5.8391)	Top-1 acc 4.688 (3.619)	Top-5 acc 12.500 (11.918)	lr 0.04771
Warmup Train [6][1590/3239]	Time 0.228 (0.235)	Data 0.001 (0.011)	Loss 5.8329 (5.8386)	Top-1 acc 3.906 (3.625)	Top-5 acc 11.719 (11.928)	lr 0.04771
Warmup Train [6][1600/3239]	Time 0.119 (0.235)	Data 0.001 (0.011)	Loss 5.7751 (5.8385)	Top-1 acc 5.078 (3.629)	Top-5 acc 12.109 (11.927)	lr 0.04771
Warmup Train [6][1610/3239]	Time 0.191 (0.235)	Data 0.001 (0.011)	Loss 5.7190 (5.8382)	Top-1 acc 5.469 (3.632)	Top-5 acc 16.406 (11.936)	lr 0.04771
Warmup Train [6][1620/3239]	Time 0.180 (0.235)	Data 0.001 (0.011)	Loss 5.9045 (5.8381)	Top-1 acc 3.906 (3.631)	Top-5 acc 13.672 (11.943)	lr 0.04770
Warmup Train [6][1630/3239]	Time 0.237 (0.235)	Data 0.001 (0.011)	Loss 5.7139 (5.8378)	Top-1 acc 4.297 (3.633)	Top-5 acc 12.109 (11.949)	lr 0.04770
Warmup Train [6][1640/3239]	Time 0.213 (0.235)	Data 0.001 (0.011)	Loss 5.8021 (5.8374)	Top-1 acc 5.469 (3.633)	Top-5 acc 12.500 (11.952)	lr 0.04770
Warmup Train [6][1650/3239]	Time 0.301 (0.235)	Data 0.001 (0.011)	Loss 5.8108 (5.8375)	Top-1 acc 1.953 (3.632)	Top-5 acc 12.891 (11.949)	lr 0.04770
Warmup Train [6][1660/3239]	Time 0.218 (0.235)	Data 0.001 (0.011)	Loss 5.7612 (5.8372)	Top-1 acc 2.734 (3.634)	Top-5 acc 12.500 (11.955)	lr 0.04769
Warmup Train [6][1670/3239]	Time 0.167 (0.235)	Data 0.001 (0.011)	Loss 5.6241 (5.8371)	Top-1 acc 5.078 (3.633)	Top-5 acc 14.453 (11.955)	lr 0.04769
Warmup Train [6][1680/3239]	Time 0.224 (0.235)	Data 0.001 (0.011)	Loss 5.7605 (5.8368)	Top-1 acc 4.688 (3.633)	Top-5 acc 16.016 (11.963)	lr 0.04769
Warmup Train [6][1690/3239]	Time 0.268 (0.235)	Data 0.002 (0.011)	Loss 5.7653 (5.8366)	Top-1 acc 6.250 (3.637)	Top-5 acc 17.578 (11.971)	lr 0.04769
Warmup Train [6][1700/3239]	Time 0.208 (0.235)	Data 0.001 (0.011)	Loss 5.7657 (5.8364)	Top-1 acc 3.906 (3.643)	Top-5 acc 13.281 (11.976)	lr 0.04768
Warmup Train [6][1710/3239]	Time 0.238 (0.235)	Data 0.001 (0.011)	Loss 5.7643 (5.8359)	Top-1 acc 2.734 (3.648)	Top-5 acc 9.766 (11.985)	lr 0.04768
Warmup Train [6][1720/3239]	Time 0.226 (0.235)	Data 0.001 (0.011)	Loss 5.7493 (5.8356)	Top-1 acc 5.469 (3.650)	Top-5 acc 16.406 (11.993)	lr 0.04768
Warmup Train [6][1730/3239]	Time 0.185 (0.235)	Data 0.001 (0.010)	Loss 5.6981 (5.8353)	Top-1 acc 6.250 (3.648)	Top-5 acc 16.016 (11.994)	lr 0.04768
Warmup Train [6][1740/3239]	Time 0.293 (0.234)	Data 0.001 (0.010)	Loss 5.8440 (5.8351)	Top-1 acc 2.344 (3.650)	Top-5 acc 12.109 (11.997)	lr 0.04767
Warmup Train [6][1750/3239]	Time 0.224 (0.234)	Data 0.001 (0.010)	Loss 5.8090 (5.8350)	Top-1 acc 3.906 (3.650)	Top-5 acc 16.406 (11.999)	lr 0.04767
Warmup Train [6][1760/3239]	Time 0.149 (0.234)	Data 0.002 (0.010)	Loss 5.8209 (5.8347)	Top-1 acc 4.688 (3.652)	Top-5 acc 9.766 (12.003)	lr 0.04767
Warmup Train [6][1770/3239]	Time 0.215 (0.234)	Data 0.002 (0.010)	Loss 5.9101 (5.8347)	Top-1 acc 4.297 (3.652)	Top-5 acc 12.109 (12.000)	lr 0.04767
Warmup Train [6][1780/3239]	Time 0.187 (0.234)	Data 0.001 (0.010)	Loss 5.7791 (5.8344)	Top-1 acc 3.516 (3.656)	Top-5 acc 9.766 (12.003)	lr 0.04766
Warmup Train [6][1790/3239]	Time 0.254 (0.234)	Data 0.001 (0.010)	Loss 5.7637 (5.8343)	Top-1 acc 4.297 (3.660)	Top-5 acc 11.328 (12.007)	lr 0.04766
Warmup Train [6][1800/3239]	Time 0.287 (0.234)	Data 0.001 (0.010)	Loss 5.8151 (5.8342)	Top-1 acc 3.906 (3.659)	Top-5 acc 12.891 (12.008)	lr 0.04766
Warmup Train [6][1810/3239]	Time 0.165 (0.234)	Data 0.001 (0.010)	Loss 5.9542 (5.8340)	Top-1 acc 2.344 (3.663)	Top-5 acc 8.984 (12.012)	lr 0.04766
Warmup Train [6][1820/3239]	Time 0.208 (0.234)	Data 0.001 (0.010)	Loss 5.7860 (5.8339)	Top-1 acc 5.859 (3.667)	Top-5 acc 14.844 (12.016)	lr 0.04765
Warmup Train [6][1830/3239]	Time 0.258 (0.234)	Data 0.001 (0.010)	Loss 5.7555 (5.8337)	Top-1 acc 3.125 (3.670)	Top-5 acc 12.891 (12.023)	lr 0.04765
Warmup Train [6][1840/3239]	Time 0.196 (0.234)	Data 0.002 (0.010)	Loss 5.8744 (5.8336)	Top-1 acc 2.734 (3.672)	Top-5 acc 10.938 (12.025)	lr 0.04765
Warmup Train [6][1850/3239]	Time 0.281 (0.234)	Data 0.002 (0.010)	Loss 5.7345 (5.8333)	Top-1 acc 5.469 (3.674)	Top-5 acc 16.016 (12.029)	lr 0.04764
Warmup Train [6][1860/3239]	Time 0.225 (0.234)	Data 0.001 (0.010)	Loss 5.6338 (5.8329)	Top-1 acc 7.812 (3.680)	Top-5 acc 17.578 (12.038)	lr 0.04764
Warmup Train [6][1870/3239]	Time 0.147 (0.234)	Data 0.001 (0.010)	Loss 5.9154 (5.8327)	Top-1 acc 2.734 (3.685)	Top-5 acc 10.938 (12.050)	lr 0.04764
Warmup Train [6][1880/3239]	Time 0.196 (0.234)	Data 0.001 (0.010)	Loss 5.7600 (5.8323)	Top-1 acc 2.734 (3.688)	Top-5 acc 13.281 (12.059)	lr 0.04764
Warmup Train [6][1890/3239]	Time 0.167 (0.233)	Data 0.001 (0.010)	Loss 5.9912 (5.8321)	Top-1 acc 3.516 (3.686)	Top-5 acc 8.984 (12.061)	lr 0.04763
Warmup Train [6][1900/3239]	Time 0.205 (0.233)	Data 0.001 (0.010)	Loss 5.7139 (5.8318)	Top-1 acc 3.906 (3.688)	Top-5 acc 15.234 (12.065)	lr 0.04763
Warmup Train [6][1910/3239]	Time 0.252 (0.233)	Data 0.001 (0.010)	Loss 5.7698 (5.8316)	Top-1 acc 4.297 (3.689)	Top-5 acc 12.500 (12.065)	lr 0.04763
Warmup Train [6][1920/3239]	Time 0.248 (0.233)	Data 0.001 (0.010)	Loss 5.8251 (5.8314)	Top-1 acc 3.125 (3.692)	Top-5 acc 10.938 (12.072)	lr 0.04763
Warmup Train [6][1930/3239]	Time 0.217 (0.233)	Data 0.001 (0.010)	Loss 5.7120 (5.8310)	Top-1 acc 5.469 (3.697)	Top-5 acc 15.625 (12.081)	lr 0.04762
Warmup Train [6][1940/3239]	Time 0.200 (0.233)	Data 0.002 (0.010)	Loss 5.8032 (5.8307)	Top-1 acc 5.469 (3.700)	Top-5 acc 16.016 (12.091)	lr 0.04762
Warmup Train [6][1950/3239]	Time 0.182 (0.233)	Data 0.001 (0.010)	Loss 5.7930 (5.8305)	Top-1 acc 3.125 (3.701)	Top-5 acc 12.891 (12.094)	lr 0.04762
Warmup Train [6][1960/3239]	Time 0.273 (0.233)	Data 0.001 (0.010)	Loss 5.8196 (5.8305)	Top-1 acc 3.906 (3.705)	Top-5 acc 13.281 (12.099)	lr 0.04762
Warmup Train [6][1970/3239]	Time 0.206 (0.233)	Data 0.002 (0.010)	Loss 5.9449 (5.8302)	Top-1 acc 2.734 (3.708)	Top-5 acc 10.547 (12.106)	lr 0.04761
Warmup Train [6][1980/3239]	Time 0.248 (0.233)	Data 0.001 (0.010)	Loss 5.8145 (5.8299)	Top-1 acc 1.562 (3.711)	Top-5 acc 10.156 (12.107)	lr 0.04761
Warmup Train [6][1990/3239]	Time 0.199 (0.233)	Data 0.001 (0.010)	Loss 5.7440 (5.8296)	Top-1 acc 2.734 (3.712)	Top-5 acc 13.281 (12.116)	lr 0.04761
Warmup Train [6][2000/3239]	Time 0.198 (0.233)	Data 0.001 (0.009)	Loss 5.7060 (5.8293)	Top-1 acc 2.734 (3.716)	Top-5 acc 14.062 (12.119)	lr 0.04761
Warmup Train [6][2010/3239]	Time 0.224 (0.233)	Data 0.001 (0.009)	Loss 5.7639 (5.8289)	Top-1 acc 2.344 (3.719)	Top-5 acc 10.547 (12.130)	lr 0.04760
Warmup Train [6][2020/3239]	Time 0.252 (0.233)	Data 0.001 (0.009)	Loss 5.7203 (5.8288)	Top-1 acc 5.078 (3.722)	Top-5 acc 15.625 (12.135)	lr 0.04760
Warmup Train [6][2030/3239]	Time 0.235 (0.233)	Data 0.001 (0.009)	Loss 5.6798 (5.8285)	Top-1 acc 4.297 (3.722)	Top-5 acc 16.406 (12.139)	lr 0.04760
Warmup Train [6][2040/3239]	Time 0.185 (0.233)	Data 0.002 (0.009)	Loss 5.8106 (5.8285)	Top-1 acc 5.469 (3.724)	Top-5 acc 13.672 (12.144)	lr 0.04760
Warmup Train [6][2050/3239]	Time 0.225 (0.233)	Data 0.001 (0.009)	Loss 5.7770 (5.8280)	Top-1 acc 6.641 (3.728)	Top-5 acc 14.453 (12.158)	lr 0.04759
Warmup Train [6][2060/3239]	Time 0.199 (0.233)	Data 0.001 (0.009)	Loss 5.7948 (5.8280)	Top-1 acc 3.516 (3.730)	Top-5 acc 11.719 (12.159)	lr 0.04759
Warmup Train [6][2070/3239]	Time 0.264 (0.233)	Data 0.001 (0.009)	Loss 5.8504 (5.8277)	Top-1 acc 5.469 (3.731)	Top-5 acc 10.547 (12.163)	lr 0.04759
Warmup Train [6][2080/3239]	Time 0.187 (0.233)	Data 0.001 (0.009)	Loss 5.8036 (5.8274)	Top-1 acc 4.297 (3.731)	Top-5 acc 12.109 (12.169)	lr 0.04759
Warmup Train [6][2090/3239]	Time 0.163 (0.232)	Data 0.001 (0.009)	Loss 5.8368 (5.8271)	Top-1 acc 3.906 (3.733)	Top-5 acc 11.719 (12.173)	lr 0.04758
Warmup Train [6][2100/3239]	Time 0.236 (0.232)	Data 0.001 (0.009)	Loss 5.8073 (5.8268)	Top-1 acc 5.469 (3.740)	Top-5 acc 14.844 (12.183)	lr 0.04758
Warmup Train [6][2110/3239]	Time 0.182 (0.232)	Data 0.002 (0.009)	Loss 5.6920 (5.8264)	Top-1 acc 3.516 (3.741)	Top-5 acc 16.016 (12.190)	lr 0.04758
Warmup Train [6][2120/3239]	Time 0.255 (0.232)	Data 0.001 (0.009)	Loss 5.8226 (5.8260)	Top-1 acc 3.906 (3.744)	Top-5 acc 12.500 (12.202)	lr 0.04758
Warmup Train [6][2130/3239]	Time 0.256 (0.232)	Data 0.001 (0.009)	Loss 5.7460 (5.8257)	Top-1 acc 4.688 (3.743)	Top-5 acc 14.062 (12.204)	lr 0.04757
Warmup Train [6][2140/3239]	Time 0.241 (0.232)	Data 0.002 (0.009)	Loss 5.8931 (5.8255)	Top-1 acc 2.734 (3.748)	Top-5 acc 11.719 (12.212)	lr 0.04757
Warmup Train [6][2150/3239]	Time 0.278 (0.232)	Data 0.001 (0.009)	Loss 5.8246 (5.8252)	Top-1 acc 2.734 (3.751)	Top-5 acc 14.062 (12.219)	lr 0.04757
Warmup Train [6][2160/3239]	Time 0.194 (0.232)	Data 0.001 (0.009)	Loss 5.7709 (5.8251)	Top-1 acc 5.859 (3.751)	Top-5 acc 14.453 (12.219)	lr 0.04756
Warmup Train [6][2170/3239]	Time 0.315 (0.232)	Data 0.001 (0.009)	Loss 5.8290 (5.8247)	Top-1 acc 4.297 (3.755)	Top-5 acc 11.719 (12.224)	lr 0.04756
Warmup Train [6][2180/3239]	Time 0.187 (0.232)	Data 0.001 (0.009)	Loss 5.7059 (5.8244)	Top-1 acc 2.344 (3.755)	Top-5 acc 9.375 (12.233)	lr 0.04756
Warmup Train [6][2190/3239]	Time 0.219 (0.232)	Data 0.001 (0.009)	Loss 5.7894 (5.8242)	Top-1 acc 4.688 (3.757)	Top-5 acc 14.844 (12.239)	lr 0.04756
Warmup Train [6][2200/3239]	Time 0.276 (0.232)	Data 0.001 (0.009)	Loss 5.7955 (5.8239)	Top-1 acc 4.297 (3.763)	Top-5 acc 12.891 (12.248)	lr 0.04755
Warmup Train [6][2210/3239]	Time 0.204 (0.232)	Data 0.001 (0.009)	Loss 5.6759 (5.8237)	Top-1 acc 3.906 (3.763)	Top-5 acc 16.797 (12.251)	lr 0.04755
Warmup Train [6][2220/3239]	Time 0.212 (0.232)	Data 0.001 (0.009)	Loss 5.7334 (5.8235)	Top-1 acc 6.250 (3.765)	Top-5 acc 17.188 (12.255)	lr 0.04755
Warmup Train [6][2230/3239]	Time 0.181 (0.232)	Data 0.001 (0.009)	Loss 5.7519 (5.8232)	Top-1 acc 4.297 (3.765)	Top-5 acc 12.891 (12.262)	lr 0.04755
Warmup Train [6][2240/3239]	Time 0.202 (0.232)	Data 0.002 (0.009)	Loss 5.9286 (5.8231)	Top-1 acc 2.344 (3.768)	Top-5 acc 8.594 (12.263)	lr 0.04754
Warmup Train [6][2250/3239]	Time 0.207 (0.232)	Data 0.001 (0.009)	Loss 5.7269 (5.8228)	Top-1 acc 4.297 (3.772)	Top-5 acc 14.844 (12.273)	lr 0.04754
Warmup Train [6][2260/3239]	Time 0.213 (0.232)	Data 0.001 (0.009)	Loss 5.8228 (5.8226)	Top-1 acc 5.859 (3.777)	Top-5 acc 17.578 (12.284)	lr 0.04754
Warmup Train [6][2270/3239]	Time 0.176 (0.232)	Data 0.002 (0.009)	Loss 5.7617 (5.8222)	Top-1 acc 5.859 (3.783)	Top-5 acc 19.141 (12.298)	lr 0.04754
Warmup Train [6][2280/3239]	Time 0.368 (0.232)	Data 0.001 (0.009)	Loss 5.6707 (5.8220)	Top-1 acc 5.078 (3.787)	Top-5 acc 15.625 (12.307)	lr 0.04753
Warmup Train [6][2290/3239]	Time 0.187 (0.232)	Data 0.002 (0.009)	Loss 5.8633 (5.8218)	Top-1 acc 4.688 (3.789)	Top-5 acc 12.500 (12.312)	lr 0.04753
Warmup Train [6][2300/3239]	Time 0.150 (0.232)	Data 0.001 (0.009)	Loss 5.8311 (5.8216)	Top-1 acc 1.953 (3.791)	Top-5 acc 11.328 (12.319)	lr 0.04753
Warmup Train [6][2310/3239]	Time 0.299 (0.232)	Data 0.001 (0.009)	Loss 5.8685 (5.8214)	Top-1 acc 3.906 (3.795)	Top-5 acc 11.719 (12.323)	lr 0.04753
Warmup Train [6][2320/3239]	Time 0.191 (0.232)	Data 0.002 (0.009)	Loss 5.7541 (5.8212)	Top-1 acc 2.734 (3.797)	Top-5 acc 12.500 (12.326)	lr 0.04752
Warmup Train [6][2330/3239]	Time 0.382 (0.232)	Data 0.001 (0.009)	Loss 5.6962 (5.8209)	Top-1 acc 3.516 (3.797)	Top-5 acc 12.891 (12.330)	lr 0.04752
Warmup Train [6][2340/3239]	Time 0.216 (0.232)	Data 0.001 (0.009)	Loss 5.7450 (5.8207)	Top-1 acc 2.734 (3.796)	Top-5 acc 12.891 (12.332)	lr 0.04752
Warmup Train [6][2350/3239]	Time 0.276 (0.232)	Data 0.001 (0.009)	Loss 5.8769 (5.8204)	Top-1 acc 2.344 (3.798)	Top-5 acc 10.547 (12.332)	lr 0.04751
Warmup Train [6][2360/3239]	Time 0.309 (0.232)	Data 0.001 (0.009)	Loss 5.8270 (5.8202)	Top-1 acc 3.906 (3.801)	Top-5 acc 10.938 (12.334)	lr 0.04751
Warmup Train [6][2370/3239]	Time 0.259 (0.232)	Data 0.001 (0.008)	Loss 5.6175 (5.8199)	Top-1 acc 5.078 (3.805)	Top-5 acc 16.797 (12.343)	lr 0.04751
Warmup Train [6][2380/3239]	Time 0.321 (0.232)	Data 0.001 (0.008)	Loss 5.7510 (5.8196)	Top-1 acc 4.688 (3.806)	Top-5 acc 11.719 (12.349)	lr 0.04751
Warmup Train [6][2390/3239]	Time 0.202 (0.232)	Data 0.001 (0.008)	Loss 5.6544 (5.8193)	Top-1 acc 3.125 (3.805)	Top-5 acc 17.188 (12.353)	lr 0.04750
Warmup Train [6][2400/3239]	Time 0.160 (0.232)	Data 0.001 (0.008)	Loss 5.8058 (5.8190)	Top-1 acc 4.297 (3.810)	Top-5 acc 12.109 (12.362)	lr 0.04750
Warmup Train [6][2410/3239]	Time 0.192 (0.232)	Data 0.002 (0.008)	Loss 5.7731 (5.8188)	Top-1 acc 2.734 (3.813)	Top-5 acc 10.938 (12.366)	lr 0.04750
Warmup Train [6][2420/3239]	Time 0.232 (0.232)	Data 0.031 (0.008)	Loss 5.6928 (5.8187)	Top-1 acc 6.641 (3.814)	Top-5 acc 15.234 (12.371)	lr 0.04750
Warmup Train [6][2430/3239]	Time 0.229 (0.232)	Data 0.001 (0.008)	Loss 5.7379 (5.8184)	Top-1 acc 3.906 (3.814)	Top-5 acc 14.062 (12.376)	lr 0.04749
Warmup Train [6][2440/3239]	Time 0.186 (0.231)	Data 0.001 (0.008)	Loss 5.8354 (5.8182)	Top-1 acc 2.344 (3.818)	Top-5 acc 9.766 (12.382)	lr 0.04749
Warmup Train [6][2450/3239]	Time 0.244 (0.231)	Data 0.001 (0.008)	Loss 5.7515 (5.8178)	Top-1 acc 5.469 (3.821)	Top-5 acc 15.625 (12.390)	lr 0.04749
Warmup Train [6][2460/3239]	Time 0.223 (0.231)	Data 0.001 (0.008)	Loss 5.6330 (5.8177)	Top-1 acc 7.422 (3.823)	Top-5 acc 16.016 (12.397)	lr 0.04749
Warmup Train [6][2470/3239]	Time 0.192 (0.231)	Data 0.001 (0.008)	Loss 5.6689 (5.8173)	Top-1 acc 5.859 (3.827)	Top-5 acc 13.672 (12.399)	lr 0.04748
Warmup Train [6][2480/3239]	Time 0.215 (0.231)	Data 0.001 (0.008)	Loss 5.8063 (5.8172)	Top-1 acc 2.734 (3.826)	Top-5 acc 13.281 (12.400)	lr 0.04748
Warmup Train [6][2490/3239]	Time 0.305 (0.231)	Data 0.001 (0.008)	Loss 5.8691 (5.8170)	Top-1 acc 2.734 (3.828)	Top-5 acc 10.938 (12.404)	lr 0.04748
Warmup Train [6][2500/3239]	Time 0.180 (0.231)	Data 0.001 (0.008)	Loss 5.7622 (5.8167)	Top-1 acc 5.859 (3.833)	Top-5 acc 13.672 (12.408)	lr 0.04748
Warmup Train [6][2510/3239]	Time 0.202 (0.231)	Data 0.001 (0.008)	Loss 5.7214 (5.8166)	Top-1 acc 4.297 (3.833)	Top-5 acc 13.281 (12.407)	lr 0.04747
Warmup Train [6][2520/3239]	Time 0.255 (0.231)	Data 0.001 (0.008)	Loss 5.8363 (5.8164)	Top-1 acc 1.953 (3.833)	Top-5 acc 12.109 (12.413)	lr 0.04747
Warmup Train [6][2530/3239]	Time 0.201 (0.231)	Data 0.001 (0.008)	Loss 5.6153 (5.8161)	Top-1 acc 3.516 (3.833)	Top-5 acc 15.625 (12.418)	lr 0.04747
Warmup Train [6][2540/3239]	Time 0.229 (0.231)	Data 0.001 (0.008)	Loss 5.8686 (5.8160)	Top-1 acc 3.125 (3.834)	Top-5 acc 12.500 (12.423)	lr 0.04746
Warmup Train [6][2550/3239]	Time 0.172 (0.231)	Data 0.002 (0.008)	Loss 5.7771 (5.8158)	Top-1 acc 4.688 (3.837)	Top-5 acc 12.500 (12.426)	lr 0.04746
Warmup Train [6][2560/3239]	Time 0.237 (0.231)	Data 0.001 (0.008)	Loss 5.7693 (5.8155)	Top-1 acc 3.906 (3.840)	Top-5 acc 15.234 (12.432)	lr 0.04746
Warmup Train [6][2570/3239]	Time 0.153 (0.231)	Data 0.002 (0.008)	Loss 5.7414 (5.8152)	Top-1 acc 5.078 (3.844)	Top-5 acc 11.719 (12.439)	lr 0.04746
Warmup Train [6][2580/3239]	Time 0.179 (0.231)	Data 0.001 (0.008)	Loss 5.6919 (5.8149)	Top-1 acc 6.641 (3.846)	Top-5 acc 16.016 (12.446)	lr 0.04745
Warmup Train [6][2590/3239]	Time 0.242 (0.231)	Data 0.001 (0.008)	Loss 5.8447 (5.8146)	Top-1 acc 3.906 (3.848)	Top-5 acc 12.500 (12.452)	lr 0.04745
Warmup Train [6][2600/3239]	Time 0.366 (0.231)	Data 0.001 (0.008)	Loss 5.7719 (5.8143)	Top-1 acc 3.906 (3.851)	Top-5 acc 16.797 (12.457)	lr 0.04745
Warmup Train [6][2610/3239]	Time 0.228 (0.231)	Data 0.001 (0.008)	Loss 5.7913 (5.8141)	Top-1 acc 3.125 (3.852)	Top-5 acc 13.281 (12.459)	lr 0.04745
Warmup Train [6][2620/3239]	Time 0.155 (0.231)	Data 0.001 (0.008)	Loss 5.7584 (5.8138)	Top-1 acc 3.906 (3.856)	Top-5 acc 14.453 (12.467)	lr 0.04744
Warmup Train [6][2630/3239]	Time 0.249 (0.231)	Data 0.001 (0.008)	Loss 5.7134 (5.8133)	Top-1 acc 3.906 (3.859)	Top-5 acc 13.281 (12.474)	lr 0.04744
Warmup Train [6][2640/3239]	Time 0.215 (0.231)	Data 0.002 (0.008)	Loss 5.6833 (5.8130)	Top-1 acc 4.688 (3.864)	Top-5 acc 16.797 (12.485)	lr 0.04744
Warmup Train [6][2650/3239]	Time 0.249 (0.231)	Data 0.002 (0.008)	Loss 5.6653 (5.8127)	Top-1 acc 7.422 (3.869)	Top-5 acc 18.359 (12.495)	lr 0.04744
Warmup Train [6][2660/3239]	Time 0.233 (0.231)	Data 0.001 (0.008)	Loss 5.7210 (5.8125)	Top-1 acc 7.031 (3.873)	Top-5 acc 12.500 (12.500)	lr 0.04743
Warmup Train [6][2670/3239]	Time 0.268 (0.231)	Data 0.001 (0.008)	Loss 5.8160 (5.8124)	Top-1 acc 5.469 (3.872)	Top-5 acc 14.453 (12.499)	lr 0.04743
Warmup Train [6][2680/3239]	Time 0.155 (0.231)	Data 0.002 (0.008)	Loss 5.6702 (5.8121)	Top-1 acc 5.078 (3.873)	Top-5 acc 13.281 (12.504)	lr 0.04743
Warmup Train [6][2690/3239]	Time 0.168 (0.231)	Data 0.001 (0.008)	Loss 5.8081 (5.8118)	Top-1 acc 3.516 (3.873)	Top-5 acc 13.672 (12.507)	lr 0.04742
Warmup Train [6][2700/3239]	Time 0.331 (0.231)	Data 0.002 (0.008)	Loss 5.7655 (5.8116)	Top-1 acc 3.906 (3.876)	Top-5 acc 13.281 (12.510)	lr 0.04742
Warmup Train [6][2710/3239]	Time 0.208 (0.231)	Data 0.001 (0.008)	Loss 5.6833 (5.8115)	Top-1 acc 3.516 (3.880)	Top-5 acc 18.750 (12.518)	lr 0.04742
Warmup Train [6][2720/3239]	Time 0.200 (0.231)	Data 0.002 (0.008)	Loss 5.7168 (5.8113)	Top-1 acc 2.734 (3.881)	Top-5 acc 12.109 (12.523)	lr 0.04742
Warmup Train [6][2730/3239]	Time 0.170 (0.231)	Data 0.001 (0.008)	Loss 5.7305 (5.8110)	Top-1 acc 2.734 (3.883)	Top-5 acc 14.453 (12.529)	lr 0.04741
Warmup Train [6][2740/3239]	Time 0.209 (0.231)	Data 0.001 (0.008)	Loss 5.6213 (5.8108)	Top-1 acc 4.297 (3.884)	Top-5 acc 15.625 (12.532)	lr 0.04741
Warmup Train [6][2750/3239]	Time 0.275 (0.231)	Data 0.001 (0.008)	Loss 5.8284 (5.8106)	Top-1 acc 2.734 (3.888)	Top-5 acc 11.719 (12.537)	lr 0.04741
Warmup Train [6][2760/3239]	Time 0.188 (0.231)	Data 0.024 (0.008)	Loss 5.7690 (5.8105)	Top-1 acc 2.344 (3.892)	Top-5 acc 14.062 (12.541)	lr 0.04741
Warmup Train [6][2770/3239]	Time 0.213 (0.231)	Data 0.001 (0.008)	Loss 5.7864 (5.8101)	Top-1 acc 3.906 (3.893)	Top-5 acc 12.500 (12.549)	lr 0.04740
Warmup Train [6][2780/3239]	Time 0.247 (0.231)	Data 0.001 (0.008)	Loss 5.6696 (5.8099)	Top-1 acc 5.859 (3.896)	Top-5 acc 18.359 (12.550)	lr 0.04740
Warmup Train [6][2790/3239]	Time 0.189 (0.231)	Data 0.001 (0.008)	Loss 5.5611 (5.8096)	Top-1 acc 7.422 (3.899)	Top-5 acc 20.312 (12.558)	lr 0.04740
Warmup Train [6][2800/3239]	Time 0.176 (0.231)	Data 0.001 (0.008)	Loss 5.6701 (5.8094)	Top-1 acc 5.469 (3.901)	Top-5 acc 14.453 (12.561)	lr 0.04739
Warmup Train [6][2810/3239]	Time 0.242 (0.231)	Data 0.001 (0.008)	Loss 5.7168 (5.8092)	Top-1 acc 5.859 (3.903)	Top-5 acc 15.625 (12.566)	lr 0.04739
Warmup Train [6][2820/3239]	Time 0.190 (0.231)	Data 0.001 (0.008)	Loss 5.8106 (5.8090)	Top-1 acc 1.953 (3.903)	Top-5 acc 12.109 (12.572)	lr 0.04739
Warmup Train [6][2830/3239]	Time 0.271 (0.231)	Data 0.001 (0.008)	Loss 5.7974 (5.8089)	Top-1 acc 3.516 (3.905)	Top-5 acc 14.453 (12.577)	lr 0.04739
Warmup Train [6][2840/3239]	Time 0.142 (0.231)	Data 0.001 (0.008)	Loss 5.7587 (5.8087)	Top-1 acc 3.125 (3.907)	Top-5 acc 12.109 (12.583)	lr 0.04738
Warmup Train [6][2850/3239]	Time 0.177 (0.231)	Data 0.002 (0.008)	Loss 5.6355 (5.8084)	Top-1 acc 5.859 (3.909)	Top-5 acc 16.016 (12.589)	lr 0.04738
Warmup Train [6][2860/3239]	Time 0.247 (0.231)	Data 0.001 (0.008)	Loss 5.7822 (5.8081)	Top-1 acc 6.641 (3.912)	Top-5 acc 15.625 (12.599)	lr 0.04738
Warmup Train [6][2870/3239]	Time 0.268 (0.231)	Data 0.001 (0.008)	Loss 5.6440 (5.8078)	Top-1 acc 4.297 (3.915)	Top-5 acc 14.062 (12.605)	lr 0.04738
Warmup Train [6][2880/3239]	Time 0.258 (0.231)	Data 0.001 (0.008)	Loss 5.8001 (5.8075)	Top-1 acc 5.469 (3.917)	Top-5 acc 15.625 (12.611)	lr 0.04737
Warmup Train [6][2890/3239]	Time 0.172 (0.231)	Data 0.001 (0.008)	Loss 5.6514 (5.8073)	Top-1 acc 3.516 (3.915)	Top-5 acc 17.578 (12.617)	lr 0.04737
Warmup Train [6][2900/3239]	Time 0.223 (0.230)	Data 0.001 (0.008)	Loss 5.7557 (5.8070)	Top-1 acc 1.562 (3.917)	Top-5 acc 13.281 (12.623)	lr 0.04737
Warmup Train [6][2910/3239]	Time 0.182 (0.230)	Data 0.001 (0.008)	Loss 5.7557 (5.8067)	Top-1 acc 3.906 (3.918)	Top-5 acc 12.500 (12.628)	lr 0.04737
Warmup Train [6][2920/3239]	Time 0.196 (0.230)	Data 0.001 (0.007)	Loss 5.8224 (5.8066)	Top-1 acc 4.297 (3.919)	Top-5 acc 14.844 (12.634)	lr 0.04736
Warmup Train [6][2930/3239]	Time 0.208 (0.230)	Data 0.001 (0.007)	Loss 5.6867 (5.8065)	Top-1 acc 4.297 (3.921)	Top-5 acc 13.281 (12.637)	lr 0.04736
Warmup Train [6][2940/3239]	Time 0.397 (0.231)	Data 0.001 (0.007)	Loss 5.7114 (5.8061)	Top-1 acc 3.906 (3.923)	Top-5 acc 16.797 (12.648)	lr 0.04736
Warmup Train [6][2950/3239]	Time 0.226 (0.230)	Data 0.001 (0.007)	Loss 5.7985 (5.8059)	Top-1 acc 4.688 (3.924)	Top-5 acc 13.672 (12.652)	lr 0.04735
Warmup Train [6][2960/3239]	Time 0.233 (0.230)	Data 0.001 (0.007)	Loss 5.6982 (5.8056)	Top-1 acc 3.906 (3.925)	Top-5 acc 14.453 (12.660)	lr 0.04735
Warmup Train [6][2970/3239]	Time 0.196 (0.230)	Data 0.001 (0.007)	Loss 5.6578 (5.8053)	Top-1 acc 6.250 (3.929)	Top-5 acc 17.188 (12.664)	lr 0.04735
Warmup Train [6][2980/3239]	Time 0.199 (0.230)	Data 0.001 (0.007)	Loss 5.8345 (5.8050)	Top-1 acc 2.734 (3.932)	Top-5 acc 12.891 (12.673)	lr 0.04735
Warmup Train [6][2990/3239]	Time 0.223 (0.230)	Data 0.001 (0.007)	Loss 5.6448 (5.8046)	Top-1 acc 3.906 (3.935)	Top-5 acc 14.453 (12.682)	lr 0.04734
Warmup Train [6][3000/3239]	Time 0.186 (0.230)	Data 0.001 (0.007)	Loss 5.6777 (5.8044)	Top-1 acc 4.688 (3.935)	Top-5 acc 17.969 (12.683)	lr 0.04734
Warmup Train [6][3010/3239]	Time 0.193 (0.230)	Data 0.001 (0.007)	Loss 5.7075 (5.8043)	Top-1 acc 6.641 (3.937)	Top-5 acc 16.016 (12.685)	lr 0.04734
Warmup Train [6][3020/3239]	Time 0.203 (0.230)	Data 0.001 (0.007)	Loss 5.7143 (5.8040)	Top-1 acc 6.641 (3.942)	Top-5 acc 18.359 (12.695)	lr 0.04734
Warmup Train [6][3030/3239]	Time 0.218 (0.230)	Data 0.001 (0.007)	Loss 5.7755 (5.8037)	Top-1 acc 3.125 (3.944)	Top-5 acc 10.156 (12.700)	lr 0.04733
Warmup Train [6][3040/3239]	Time 0.305 (0.230)	Data 0.001 (0.007)	Loss 5.6565 (5.8034)	Top-1 acc 6.641 (3.948)	Top-5 acc 15.234 (12.706)	lr 0.04733
Warmup Train [6][3050/3239]	Time 0.205 (0.230)	Data 0.001 (0.007)	Loss 5.7696 (5.8033)	Top-1 acc 2.734 (3.947)	Top-5 acc 12.109 (12.708)	lr 0.04733
Warmup Train [6][3060/3239]	Time 0.282 (0.230)	Data 0.002 (0.007)	Loss 5.6250 (5.8030)	Top-1 acc 5.078 (3.951)	Top-5 acc 17.188 (12.715)	lr 0.04732
Warmup Train [6][3070/3239]	Time 0.160 (0.230)	Data 0.002 (0.007)	Loss 5.8225 (5.8026)	Top-1 acc 4.297 (3.953)	Top-5 acc 12.109 (12.720)	lr 0.04732
Warmup Train [6][3080/3239]	Time 0.199 (0.230)	Data 0.001 (0.007)	Loss 5.8603 (5.8024)	Top-1 acc 3.516 (3.956)	Top-5 acc 10.938 (12.727)	lr 0.04732
Warmup Train [6][3090/3239]	Time 0.237 (0.230)	Data 0.001 (0.007)	Loss 5.7432 (5.8021)	Top-1 acc 3.125 (3.958)	Top-5 acc 13.281 (12.734)	lr 0.04732
Warmup Train [6][3100/3239]	Time 0.257 (0.230)	Data 0.001 (0.007)	Loss 5.7450 (5.8019)	Top-1 acc 4.297 (3.960)	Top-5 acc 16.797 (12.738)	lr 0.04731
Warmup Train [6][3110/3239]	Time 0.210 (0.230)	Data 0.001 (0.007)	Loss 5.8119 (5.8017)	Top-1 acc 1.562 (3.963)	Top-5 acc 12.109 (12.746)	lr 0.04731
Warmup Train [6][3120/3239]	Time 0.180 (0.230)	Data 0.001 (0.007)	Loss 5.7768 (5.8015)	Top-1 acc 5.859 (3.965)	Top-5 acc 14.844 (12.749)	lr 0.04731
Warmup Train [6][3130/3239]	Time 0.198 (0.230)	Data 0.002 (0.007)	Loss 5.7157 (5.8012)	Top-1 acc 6.250 (3.965)	Top-5 acc 11.719 (12.755)	lr 0.04731
Warmup Train [6][3140/3239]	Time 0.238 (0.230)	Data 0.001 (0.007)	Loss 5.6660 (5.8009)	Top-1 acc 5.469 (3.970)	Top-5 acc 13.281 (12.766)	lr 0.04730
Warmup Train [6][3150/3239]	Time 0.223 (0.230)	Data 0.001 (0.007)	Loss 5.6464 (5.8007)	Top-1 acc 4.688 (3.973)	Top-5 acc 16.797 (12.771)	lr 0.04730
Warmup Train [6][3160/3239]	Time 0.241 (0.230)	Data 0.001 (0.007)	Loss 5.6908 (5.8005)	Top-1 acc 3.906 (3.977)	Top-5 acc 16.406 (12.776)	lr 0.04730
Warmup Train [6][3170/3239]	Time 0.236 (0.230)	Data 0.001 (0.007)	Loss 5.7802 (5.8003)	Top-1 acc 3.516 (3.981)	Top-5 acc 13.672 (12.785)	lr 0.04729
Warmup Train [6][3180/3239]	Time 0.228 (0.230)	Data 0.000 (0.007)	Loss 5.6384 (5.8001)	Top-1 acc 6.250 (3.984)	Top-5 acc 18.359 (12.793)	lr 0.04729
Warmup Train [6][3190/3239]	Time 0.173 (0.230)	Data 0.000 (0.007)	Loss 5.6359 (5.7998)	Top-1 acc 5.078 (3.986)	Top-5 acc 17.578 (12.801)	lr 0.04729
Warmup Train [6][3200/3239]	Time 0.235 (0.230)	Data 0.000 (0.007)	Loss 5.7375 (5.7995)	Top-1 acc 4.688 (3.988)	Top-5 acc 12.109 (12.804)	lr 0.04729
Warmup Train [6][3210/3239]	Time 0.202 (0.230)	Data 0.000 (0.007)	Loss 5.8047 (5.7994)	Top-1 acc 4.297 (3.991)	Top-5 acc 12.500 (12.807)	lr 0.04728
Warmup Train [6][3220/3239]	Time 0.141 (0.230)	Data 0.000 (0.007)	Loss 5.7879 (5.7994)	Top-1 acc 4.297 (3.991)	Top-5 acc 14.062 (12.810)	lr 0.04728
Warmup Train [6][3230/3239]	Time 0.199 (0.230)	Data 0.000 (0.007)	Loss 5.7002 (5.7991)	Top-1 acc 6.250 (3.995)	Top-5 acc 17.188 (12.817)	lr 0.04728
Warmup Train [6][3239/3239]	Time 0.172 (0.230)	Data 0.000 (0.007)	Loss 5.5931 (5.7990)	Top-1 acc 9.877 (3.997)	Top-5 acc 23.457 (12.823)	lr 0.04728
==========Warmup Valid [6/40]	loss 5.247	top-1 acc 5.964	top-5 acc 17.953	Train top-1 3.997	top-5 12.823	flops: 442.4M
Warmup Train [7][0/3239]	Time 13.133 (13.133)	Data 11.607 (11.607)	Loss 5.7373 (5.7373)	Top-1 acc 4.688 (4.688)	Top-5 acc 14.453 (14.453)	lr 0.04728
Warmup Train [7][10/3239]	Time 0.269 (1.519)	Data 0.002 (1.079)	Loss 5.7273 (5.7068)	Top-1 acc 3.516 (5.114)	Top-5 acc 13.281 (16.193)	lr 0.04727
Warmup Train [7][20/3239]	Time 0.252 (0.932)	Data 0.002 (0.567)	Loss 5.8073 (5.7054)	Top-1 acc 5.469 (5.301)	Top-5 acc 10.547 (15.495)	lr 0.04727
Warmup Train [7][30/3239]	Time 0.285 (0.706)	Data 0.001 (0.384)	Loss 5.6315 (5.7033)	Top-1 acc 8.203 (5.318)	Top-5 acc 17.188 (15.323)	lr 0.04727
Warmup Train [7][40/3239]	Time 0.445 (0.599)	Data 0.002 (0.291)	Loss 5.7502 (5.7084)	Top-1 acc 2.734 (5.059)	Top-5 acc 12.109 (15.196)	lr 0.04726
Warmup Train [7][50/3239]	Time 0.294 (0.528)	Data 0.002 (0.234)	Loss 5.6854 (5.7105)	Top-1 acc 4.688 (5.017)	Top-5 acc 16.016 (14.982)	lr 0.04726
Warmup Train [7][60/3239]	Time 0.272 (0.482)	Data 0.001 (0.196)	Loss 5.7701 (5.7137)	Top-1 acc 5.078 (5.078)	Top-5 acc 14.062 (15.017)	lr 0.04726
Warmup Train [7][70/3239]	Time 0.250 (0.448)	Data 0.001 (0.169)	Loss 5.7172 (5.7135)	Top-1 acc 4.688 (5.051)	Top-5 acc 16.016 (14.910)	lr 0.04726
Warmup Train [7][80/3239]	Time 0.184 (0.420)	Data 0.001 (0.149)	Loss 5.7191 (5.7145)	Top-1 acc 4.297 (5.006)	Top-5 acc 15.234 (14.926)	lr 0.04725
Warmup Train [7][90/3239]	Time 0.243 (0.400)	Data 0.002 (0.133)	Loss 5.6658 (5.7149)	Top-1 acc 5.078 (4.936)	Top-5 acc 14.844 (14.792)	lr 0.04725
Warmup Train [7][100/3239]	Time 0.225 (0.384)	Data 0.001 (0.120)	Loss 5.7274 (5.7170)	Top-1 acc 3.906 (4.935)	Top-5 acc 14.062 (14.824)	lr 0.04725
Warmup Train [7][110/3239]	Time 0.240 (0.369)	Data 0.001 (0.109)	Loss 5.6762 (5.7175)	Top-1 acc 4.688 (4.895)	Top-5 acc 15.234 (14.816)	lr 0.04724
Warmup Train [7][120/3239]	Time 0.184 (0.357)	Data 0.002 (0.100)	Loss 5.5943 (5.7186)	Top-1 acc 5.469 (4.823)	Top-5 acc 14.453 (14.679)	lr 0.04724
Warmup Train [7][130/3239]	Time 0.299 (0.349)	Data 0.001 (0.093)	Loss 5.7782 (5.7199)	Top-1 acc 5.859 (4.831)	Top-5 acc 14.062 (14.638)	lr 0.04724
Warmup Train [7][140/3239]	Time 0.311 (0.340)	Data 0.002 (0.087)	Loss 5.7781 (5.7194)	Top-1 acc 5.078 (4.826)	Top-5 acc 15.234 (14.672)	lr 0.04724
Warmup Train [7][150/3239]	Time 0.205 (0.332)	Data 0.001 (0.081)	Loss 5.6407 (5.7184)	Top-1 acc 4.688 (4.788)	Top-5 acc 17.969 (14.670)	lr 0.04723
Warmup Train [7][160/3239]	Time 0.192 (0.326)	Data 0.001 (0.076)	Loss 5.7228 (5.7188)	Top-1 acc 5.469 (4.821)	Top-5 acc 15.625 (14.708)	lr 0.04723
Warmup Train [7][170/3239]	Time 0.199 (0.321)	Data 0.001 (0.072)	Loss 5.6861 (5.7178)	Top-1 acc 3.906 (4.870)	Top-5 acc 16.797 (14.775)	lr 0.04723
Warmup Train [7][180/3239]	Time 0.208 (0.315)	Data 0.001 (0.068)	Loss 5.5503 (5.7162)	Top-1 acc 5.859 (4.882)	Top-5 acc 17.578 (14.811)	lr 0.04723
Warmup Train [7][190/3239]	Time 0.184 (0.310)	Data 0.001 (0.064)	Loss 5.7377 (5.7166)	Top-1 acc 3.906 (4.890)	Top-5 acc 13.281 (14.801)	lr 0.04722
Warmup Train [7][200/3239]	Time 0.191 (0.306)	Data 0.002 (0.061)	Loss 5.7945 (5.7171)	Top-1 acc 3.516 (4.894)	Top-5 acc 11.719 (14.824)	lr 0.04722
Warmup Train [7][210/3239]	Time 0.204 (0.301)	Data 0.001 (0.059)	Loss 5.7087 (5.7165)	Top-1 acc 6.250 (4.869)	Top-5 acc 17.578 (14.807)	lr 0.04722
Warmup Train [7][220/3239]	Time 0.246 (0.298)	Data 0.001 (0.056)	Loss 5.7957 (5.7172)	Top-1 acc 4.688 (4.862)	Top-5 acc 13.672 (14.792)	lr 0.04721
Warmup Train [7][230/3239]	Time 0.370 (0.295)	Data 0.001 (0.054)	Loss 5.5983 (5.7142)	Top-1 acc 6.250 (4.884)	Top-5 acc 18.359 (14.849)	lr 0.04721
Warmup Train [7][240/3239]	Time 0.255 (0.292)	Data 0.001 (0.052)	Loss 5.6765 (5.7134)	Top-1 acc 5.859 (4.861)	Top-5 acc 15.234 (14.836)	lr 0.04721
Warmup Train [7][250/3239]	Time 0.262 (0.290)	Data 0.001 (0.050)	Loss 5.7399 (5.7129)	Top-1 acc 5.078 (4.862)	Top-5 acc 11.719 (14.838)	lr 0.04721
Warmup Train [7][260/3239]	Time 0.162 (0.287)	Data 0.002 (0.048)	Loss 5.6128 (5.7122)	Top-1 acc 7.031 (4.842)	Top-5 acc 16.797 (14.848)	lr 0.04720
Warmup Train [7][270/3239]	Time 0.149 (0.285)	Data 0.001 (0.046)	Loss 5.8343 (5.7125)	Top-1 acc 5.859 (4.835)	Top-5 acc 14.453 (14.821)	lr 0.04720
Warmup Train [7][280/3239]	Time 0.181 (0.283)	Data 0.001 (0.045)	Loss 5.6871 (5.7131)	Top-1 acc 7.422 (4.820)	Top-5 acc 16.406 (14.781)	lr 0.04720
Warmup Train [7][290/3239]	Time 0.259 (0.281)	Data 0.001 (0.043)	Loss 5.7180 (5.7134)	Top-1 acc 3.906 (4.814)	Top-5 acc 16.016 (14.758)	lr 0.04719
Warmup Train [7][300/3239]	Time 0.233 (0.279)	Data 0.002 (0.042)	Loss 5.7470 (5.7134)	Top-1 acc 4.297 (4.800)	Top-5 acc 11.328 (14.730)	lr 0.04719
Warmup Train [7][310/3239]	Time 0.222 (0.277)	Data 0.001 (0.040)	Loss 5.6278 (5.7130)	Top-1 acc 5.859 (4.812)	Top-5 acc 14.453 (14.741)	lr 0.04719
Warmup Train [7][320/3239]	Time 0.225 (0.275)	Data 0.001 (0.039)	Loss 5.6597 (5.7123)	Top-1 acc 3.906 (4.809)	Top-5 acc 14.062 (14.759)	lr 0.04719
Warmup Train [7][330/3239]	Time 0.184 (0.274)	Data 0.001 (0.038)	Loss 5.7262 (5.7124)	Top-1 acc 1.953 (4.814)	Top-5 acc 10.156 (14.725)	lr 0.04718
Warmup Train [7][340/3239]	Time 0.330 (0.272)	Data 0.001 (0.037)	Loss 5.7283 (5.7118)	Top-1 acc 5.469 (4.838)	Top-5 acc 16.406 (14.737)	lr 0.04718
Warmup Train [7][350/3239]	Time 0.261 (0.271)	Data 0.001 (0.036)	Loss 5.6346 (5.7112)	Top-1 acc 4.297 (4.836)	Top-5 acc 19.531 (14.724)	lr 0.04718
Warmup Train [7][360/3239]	Time 0.268 (0.270)	Data 0.001 (0.035)	Loss 5.6562 (5.7114)	Top-1 acc 4.297 (4.827)	Top-5 acc 14.844 (14.715)	lr 0.04718
Warmup Train [7][370/3239]	Time 0.240 (0.269)	Data 0.001 (0.034)	Loss 5.6896 (5.7112)	Top-1 acc 3.516 (4.839)	Top-5 acc 12.891 (14.728)	lr 0.04717
Warmup Train [7][380/3239]	Time 0.204 (0.268)	Data 0.001 (0.034)	Loss 5.6757 (5.7111)	Top-1 acc 3.906 (4.834)	Top-5 acc 14.453 (14.739)	lr 0.04717
Warmup Train [7][390/3239]	Time 0.243 (0.266)	Data 0.001 (0.033)	Loss 5.7351 (5.7114)	Top-1 acc 4.297 (4.838)	Top-5 acc 12.891 (14.743)	lr 0.04717
Warmup Train [7][400/3239]	Time 0.259 (0.265)	Data 0.001 (0.032)	Loss 5.6755 (5.7107)	Top-1 acc 5.078 (4.834)	Top-5 acc 17.188 (14.741)	lr 0.04716
Warmup Train [7][410/3239]	Time 0.232 (0.263)	Data 0.001 (0.031)	Loss 5.6548 (5.7101)	Top-1 acc 6.250 (4.834)	Top-5 acc 17.188 (14.765)	lr 0.04716
Warmup Train [7][420/3239]	Time 0.217 (0.262)	Data 0.001 (0.031)	Loss 5.6997 (5.7098)	Top-1 acc 4.688 (4.842)	Top-5 acc 13.281 (14.785)	lr 0.04716
Warmup Train [7][430/3239]	Time 0.291 (0.261)	Data 0.001 (0.030)	Loss 5.7715 (5.7096)	Top-1 acc 3.906 (4.844)	Top-5 acc 12.500 (14.778)	lr 0.04716
Warmup Train [7][440/3239]	Time 0.224 (0.261)	Data 0.001 (0.029)	Loss 5.7179 (5.7090)	Top-1 acc 5.469 (4.839)	Top-5 acc 14.844 (14.799)	lr 0.04715
Warmup Train [7][450/3239]	Time 0.138 (0.260)	Data 0.001 (0.029)	Loss 5.6593 (5.7083)	Top-1 acc 5.078 (4.839)	Top-5 acc 14.062 (14.814)	lr 0.04715
Warmup Train [7][460/3239]	Time 0.333 (0.259)	Data 0.001 (0.028)	Loss 5.7259 (5.7081)	Top-1 acc 5.078 (4.831)	Top-5 acc 17.188 (14.817)	lr 0.04715
Warmup Train [7][470/3239]	Time 0.198 (0.258)	Data 0.001 (0.028)	Loss 5.7270 (5.7082)	Top-1 acc 3.125 (4.827)	Top-5 acc 10.156 (14.813)	lr 0.04714
Warmup Train [7][480/3239]	Time 0.272 (0.258)	Data 0.001 (0.027)	Loss 5.6604 (5.7071)	Top-1 acc 5.078 (4.826)	Top-5 acc 17.188 (14.842)	lr 0.04714
Warmup Train [7][490/3239]	Time 0.218 (0.257)	Data 0.001 (0.027)	Loss 5.5777 (5.7067)	Top-1 acc 6.250 (4.834)	Top-5 acc 17.188 (14.855)	lr 0.04714
Warmup Train [7][500/3239]	Time 0.213 (0.257)	Data 0.001 (0.026)	Loss 5.7173 (5.7063)	Top-1 acc 5.469 (4.836)	Top-5 acc 15.234 (14.855)	lr 0.04714
Warmup Train [7][510/3239]	Time 0.160 (0.256)	Data 0.001 (0.026)	Loss 5.7107 (5.7057)	Top-1 acc 5.469 (4.834)	Top-5 acc 15.625 (14.867)	lr 0.04713
Warmup Train [7][520/3239]	Time 0.243 (0.255)	Data 0.001 (0.025)	Loss 5.7837 (5.7057)	Top-1 acc 4.297 (4.829)	Top-5 acc 12.109 (14.853)	lr 0.04713
Warmup Train [7][530/3239]	Time 0.167 (0.254)	Data 0.002 (0.025)	Loss 5.7700 (5.7065)	Top-1 acc 4.297 (4.818)	Top-5 acc 17.969 (14.847)	lr 0.04713
Warmup Train [7][540/3239]	Time 0.290 (0.254)	Data 0.001 (0.024)	Loss 5.7901 (5.7062)	Top-1 acc 4.297 (4.820)	Top-5 acc 11.719 (14.846)	lr 0.04712
Warmup Train [7][550/3239]	Time 0.199 (0.253)	Data 0.001 (0.024)	Loss 5.6490 (5.7062)	Top-1 acc 5.469 (4.808)	Top-5 acc 14.453 (14.837)	lr 0.04712
Warmup Train [7][560/3239]	Time 0.280 (0.253)	Data 0.001 (0.024)	Loss 5.6180 (5.7062)	Top-1 acc 7.422 (4.810)	Top-5 acc 17.969 (14.833)	lr 0.04712
Warmup Train [7][570/3239]	Time 0.205 (0.252)	Data 0.001 (0.023)	Loss 5.7361 (5.7059)	Top-1 acc 5.078 (4.809)	Top-5 acc 16.016 (14.846)	lr 0.04712
Warmup Train [7][580/3239]	Time 0.212 (0.252)	Data 0.001 (0.023)	Loss 5.6245 (5.7057)	Top-1 acc 4.688 (4.805)	Top-5 acc 15.234 (14.842)	lr 0.04711
Warmup Train [7][590/3239]	Time 0.232 (0.251)	Data 0.001 (0.023)	Loss 5.7023 (5.7057)	Top-1 acc 5.859 (4.795)	Top-5 acc 13.281 (14.831)	lr 0.04711
Warmup Train [7][600/3239]	Time 0.210 (0.251)	Data 0.001 (0.022)	Loss 5.7675 (5.7062)	Top-1 acc 4.297 (4.795)	Top-5 acc 12.500 (14.828)	lr 0.04711
Warmup Train [7][610/3239]	Time 0.250 (0.250)	Data 0.001 (0.022)	Loss 5.6847 (5.7058)	Top-1 acc 5.859 (4.789)	Top-5 acc 12.500 (14.819)	lr 0.04710
Warmup Train [7][620/3239]	Time 0.190 (0.250)	Data 0.001 (0.022)	Loss 5.7378 (5.7052)	Top-1 acc 3.125 (4.790)	Top-5 acc 14.453 (14.835)	lr 0.04710
Warmup Train [7][630/3239]	Time 0.198 (0.250)	Data 0.001 (0.021)	Loss 5.7757 (5.7050)	Top-1 acc 4.297 (4.795)	Top-5 acc 14.062 (14.844)	lr 0.04710
Warmup Train [7][640/3239]	Time 0.144 (0.249)	Data 0.001 (0.021)	Loss 5.7676 (5.7053)	Top-1 acc 4.688 (4.794)	Top-5 acc 13.672 (14.843)	lr 0.04710
Warmup Train [7][650/3239]	Time 0.254 (0.249)	Data 0.001 (0.021)	Loss 5.7709 (5.7054)	Top-1 acc 3.125 (4.786)	Top-5 acc 10.156 (14.839)	lr 0.04709
Warmup Train [7][660/3239]	Time 0.294 (0.248)	Data 0.002 (0.021)	Loss 5.5841 (5.7054)	Top-1 acc 6.250 (4.789)	Top-5 acc 18.359 (14.841)	lr 0.04709
Warmup Train [7][670/3239]	Time 0.182 (0.248)	Data 0.001 (0.020)	Loss 5.6359 (5.7050)	Top-1 acc 6.250 (4.789)	Top-5 acc 16.797 (14.860)	lr 0.04709
Warmup Train [7][680/3239]	Time 0.219 (0.248)	Data 0.001 (0.020)	Loss 5.6312 (5.7052)	Top-1 acc 8.203 (4.793)	Top-5 acc 17.188 (14.860)	lr 0.04709
Warmup Train [7][690/3239]	Time 0.191 (0.247)	Data 0.002 (0.020)	Loss 5.6965 (5.7042)	Top-1 acc 3.516 (4.802)	Top-5 acc 13.672 (14.884)	lr 0.04708
Warmup Train [7][700/3239]	Time 0.230 (0.247)	Data 0.001 (0.020)	Loss 5.6867 (5.7033)	Top-1 acc 5.469 (4.810)	Top-5 acc 17.188 (14.905)	lr 0.04708
Warmup Train [7][710/3239]	Time 0.231 (0.246)	Data 0.001 (0.019)	Loss 5.5119 (5.7028)	Top-1 acc 8.594 (4.819)	Top-5 acc 18.750 (14.913)	lr 0.04708
Warmup Train [7][720/3239]	Time 0.246 (0.246)	Data 0.001 (0.019)	Loss 5.7532 (5.7031)	Top-1 acc 5.469 (4.820)	Top-5 acc 13.672 (14.904)	lr 0.04707
Warmup Train [7][730/3239]	Time 0.149 (0.246)	Data 0.001 (0.019)	Loss 5.7883 (5.7033)	Top-1 acc 4.297 (4.836)	Top-5 acc 13.281 (14.911)	lr 0.04707
Warmup Train [7][740/3239]	Time 0.204 (0.246)	Data 0.001 (0.019)	Loss 5.8447 (5.7030)	Top-1 acc 3.906 (4.846)	Top-5 acc 12.891 (14.913)	lr 0.04707
Warmup Train [7][750/3239]	Time 0.226 (0.245)	Data 0.001 (0.019)	Loss 5.5675 (5.7023)	Top-1 acc 7.031 (4.854)	Top-5 acc 18.750 (14.930)	lr 0.04707
Warmup Train [7][760/3239]	Time 0.308 (0.245)	Data 0.001 (0.018)	Loss 5.8036 (5.7027)	Top-1 acc 2.734 (4.848)	Top-5 acc 10.938 (14.909)	lr 0.04706
Warmup Train [7][770/3239]	Time 0.181 (0.245)	Data 0.001 (0.018)	Loss 5.7121 (5.7018)	Top-1 acc 5.469 (4.848)	Top-5 acc 14.844 (14.921)	lr 0.04706
Warmup Train [7][780/3239]	Time 0.173 (0.244)	Data 0.001 (0.018)	Loss 5.6595 (5.7014)	Top-1 acc 4.688 (4.852)	Top-5 acc 14.062 (14.925)	lr 0.04706
Warmup Train [7][790/3239]	Time 0.180 (0.244)	Data 0.001 (0.018)	Loss 5.6834 (5.7009)	Top-1 acc 3.906 (4.850)	Top-5 acc 15.234 (14.924)	lr 0.04705
Warmup Train [7][800/3239]	Time 0.233 (0.244)	Data 0.001 (0.018)	Loss 5.5767 (5.7003)	Top-1 acc 3.906 (4.853)	Top-5 acc 15.625 (14.932)	lr 0.04705
Warmup Train [7][810/3239]	Time 0.173 (0.243)	Data 0.001 (0.017)	Loss 5.7097 (5.6997)	Top-1 acc 4.297 (4.859)	Top-5 acc 13.281 (14.936)	lr 0.04705
Warmup Train [7][820/3239]	Time 0.296 (0.243)	Data 0.001 (0.017)	Loss 5.6221 (5.6995)	Top-1 acc 5.859 (4.865)	Top-5 acc 14.453 (14.940)	lr 0.04705
Warmup Train [7][830/3239]	Time 0.222 (0.243)	Data 0.002 (0.017)	Loss 5.6672 (5.6992)	Top-1 acc 3.906 (4.871)	Top-5 acc 12.500 (14.939)	lr 0.04704
Warmup Train [7][840/3239]	Time 0.157 (0.242)	Data 0.001 (0.017)	Loss 5.6228 (5.6989)	Top-1 acc 4.688 (4.875)	Top-5 acc 16.016 (14.945)	lr 0.04704
Warmup Train [7][850/3239]	Time 0.165 (0.242)	Data 0.001 (0.017)	Loss 5.6985 (5.6987)	Top-1 acc 4.688 (4.870)	Top-5 acc 15.234 (14.954)	lr 0.04704
Warmup Train [7][860/3239]	Time 0.231 (0.242)	Data 0.001 (0.017)	Loss 5.7213 (5.6985)	Top-1 acc 4.688 (4.871)	Top-5 acc 13.281 (14.955)	lr 0.04703
Warmup Train [7][870/3239]	Time 0.272 (0.242)	Data 0.001 (0.016)	Loss 5.6991 (5.6976)	Top-1 acc 5.078 (4.879)	Top-5 acc 14.844 (14.980)	lr 0.04703
Warmup Train [7][880/3239]	Time 0.135 (0.241)	Data 0.001 (0.016)	Loss 5.5819 (5.6976)	Top-1 acc 5.078 (4.887)	Top-5 acc 15.625 (14.987)	lr 0.04703
Warmup Train [7][890/3239]	Time 0.257 (0.241)	Data 0.001 (0.016)	Loss 5.5806 (5.6975)	Top-1 acc 6.250 (4.887)	Top-5 acc 17.969 (14.979)	lr 0.04703
Warmup Train [7][900/3239]	Time 0.238 (0.241)	Data 0.001 (0.016)	Loss 5.5795 (5.6972)	Top-1 acc 7.031 (4.887)	Top-5 acc 17.188 (14.990)	lr 0.04702
Warmup Train [7][910/3239]	Time 0.161 (0.241)	Data 0.001 (0.016)	Loss 5.6064 (5.6966)	Top-1 acc 7.812 (4.904)	Top-5 acc 15.625 (15.005)	lr 0.04702
Warmup Train [7][920/3239]	Time 0.182 (0.241)	Data 0.001 (0.016)	Loss 5.7144 (5.6961)	Top-1 acc 5.469 (4.909)	Top-5 acc 14.062 (15.021)	lr 0.04702
Warmup Train [7][930/3239]	Time 0.211 (0.241)	Data 0.001 (0.016)	Loss 5.6600 (5.6955)	Top-1 acc 3.906 (4.913)	Top-5 acc 14.453 (15.034)	lr 0.04701
Warmup Train [7][940/3239]	Time 0.236 (0.240)	Data 0.001 (0.015)	Loss 5.6242 (5.6952)	Top-1 acc 5.078 (4.919)	Top-5 acc 15.625 (15.043)	lr 0.04701
Warmup Train [7][950/3239]	Time 0.130 (0.240)	Data 0.001 (0.015)	Loss 5.7768 (5.6952)	Top-1 acc 3.516 (4.917)	Top-5 acc 13.672 (15.047)	lr 0.04701
Warmup Train [7][960/3239]	Time 0.202 (0.240)	Data 0.001 (0.015)	Loss 5.7182 (5.6955)	Top-1 acc 5.469 (4.913)	Top-5 acc 16.016 (15.048)	lr 0.04701
Warmup Train [7][970/3239]	Time 0.291 (0.240)	Data 0.001 (0.015)	Loss 5.7369 (5.6955)	Top-1 acc 4.297 (4.910)	Top-5 acc 17.578 (15.042)	lr 0.04700
Warmup Train [7][980/3239]	Time 0.282 (0.239)	Data 0.002 (0.015)	Loss 5.7013 (5.6956)	Top-1 acc 5.859 (4.911)	Top-5 acc 16.016 (15.038)	lr 0.04700
Warmup Train [7][990/3239]	Time 0.177 (0.239)	Data 0.001 (0.015)	Loss 5.5148 (5.6954)	Top-1 acc 7.031 (4.914)	Top-5 acc 19.141 (15.039)	lr 0.04700
Warmup Train [7][1000/3239]	Time 0.207 (0.239)	Data 0.001 (0.015)	Loss 5.7824 (5.6954)	Top-1 acc 3.516 (4.912)	Top-5 acc 12.500 (15.036)	lr 0.04699
Warmup Train [7][1010/3239]	Time 0.202 (0.239)	Data 0.002 (0.015)	Loss 5.7015 (5.6951)	Top-1 acc 5.078 (4.910)	Top-5 acc 11.328 (15.038)	lr 0.04699
Warmup Train [7][1020/3239]	Time 0.213 (0.239)	Data 0.001 (0.015)	Loss 5.7687 (5.6948)	Top-1 acc 6.641 (4.916)	Top-5 acc 16.016 (15.046)	lr 0.04699
Warmup Train [7][1030/3239]	Time 0.333 (0.239)	Data 0.001 (0.014)	Loss 5.6788 (5.6942)	Top-1 acc 5.469 (4.914)	Top-5 acc 15.625 (15.058)	lr 0.04698
Warmup Train [7][1040/3239]	Time 0.243 (0.239)	Data 0.001 (0.014)	Loss 5.6066 (5.6938)	Top-1 acc 8.203 (4.919)	Top-5 acc 17.188 (15.073)	lr 0.04698
Warmup Train [7][1050/3239]	Time 0.237 (0.238)	Data 0.001 (0.014)	Loss 5.6850 (5.6932)	Top-1 acc 5.859 (4.928)	Top-5 acc 14.453 (15.088)	lr 0.04698
Warmup Train [7][1060/3239]	Time 0.257 (0.238)	Data 0.010 (0.014)	Loss 5.6382 (5.6928)	Top-1 acc 5.078 (4.925)	Top-5 acc 17.578 (15.093)	lr 0.04698
Warmup Train [7][1070/3239]	Time 0.247 (0.238)	Data 0.001 (0.014)	Loss 5.7488 (5.6925)	Top-1 acc 6.250 (4.930)	Top-5 acc 17.578 (15.106)	lr 0.04697
Warmup Train [7][1080/3239]	Time 0.239 (0.238)	Data 0.001 (0.014)	Loss 5.6947 (5.6922)	Top-1 acc 3.906 (4.932)	Top-5 acc 13.672 (15.101)	lr 0.04697
Warmup Train [7][1090/3239]	Time 0.325 (0.238)	Data 0.001 (0.014)	Loss 5.7435 (5.6922)	Top-1 acc 3.516 (4.930)	Top-5 acc 14.453 (15.104)	lr 0.04697
Warmup Train [7][1100/3239]	Time 0.187 (0.238)	Data 0.001 (0.014)	Loss 5.6326 (5.6922)	Top-1 acc 6.641 (4.930)	Top-5 acc 17.188 (15.100)	lr 0.04696
Warmup Train [7][1110/3239]	Time 0.246 (0.237)	Data 0.001 (0.014)	Loss 5.7832 (5.6920)	Top-1 acc 3.516 (4.936)	Top-5 acc 13.672 (15.102)	lr 0.04696
Warmup Train [7][1120/3239]	Time 0.231 (0.237)	Data 0.002 (0.013)	Loss 5.6079 (5.6915)	Top-1 acc 7.422 (4.940)	Top-5 acc 17.969 (15.112)	lr 0.04696
Warmup Train [7][1130/3239]	Time 0.206 (0.237)	Data 0.002 (0.013)	Loss 5.5807 (5.6911)	Top-1 acc 6.641 (4.943)	Top-5 acc 17.969 (15.120)	lr 0.04696
Warmup Train [7][1140/3239]	Time 0.286 (0.237)	Data 0.001 (0.013)	Loss 5.6585 (5.6912)	Top-1 acc 3.516 (4.940)	Top-5 acc 16.797 (15.120)	lr 0.04695
Warmup Train [7][1150/3239]	Time 0.251 (0.237)	Data 0.001 (0.013)	Loss 5.6377 (5.6908)	Top-1 acc 6.250 (4.944)	Top-5 acc 17.188 (15.131)	lr 0.04695
Warmup Train [7][1160/3239]	Time 0.178 (0.237)	Data 0.001 (0.013)	Loss 5.6337 (5.6902)	Top-1 acc 2.344 (4.952)	Top-5 acc 14.062 (15.150)	lr 0.04695
Warmup Train [7][1170/3239]	Time 0.228 (0.237)	Data 0.001 (0.013)	Loss 5.6045 (5.6901)	Top-1 acc 6.250 (4.950)	Top-5 acc 18.750 (15.149)	lr 0.04694
Warmup Train [7][1180/3239]	Time 0.296 (0.236)	Data 0.001 (0.013)	Loss 5.7720 (5.6901)	Top-1 acc 4.297 (4.946)	Top-5 acc 13.672 (15.148)	lr 0.04694
Warmup Train [7][1190/3239]	Time 0.244 (0.236)	Data 0.001 (0.013)	Loss 5.6865 (5.6900)	Top-1 acc 3.125 (4.945)	Top-5 acc 14.844 (15.153)	lr 0.04694
Warmup Train [7][1200/3239]	Time 0.302 (0.236)	Data 0.001 (0.013)	Loss 5.6629 (5.6899)	Top-1 acc 6.641 (4.944)	Top-5 acc 17.969 (15.154)	lr 0.04694
Warmup Train [7][1210/3239]	Time 0.229 (0.236)	Data 0.002 (0.013)	Loss 5.6205 (5.6896)	Top-1 acc 8.594 (4.951)	Top-5 acc 16.797 (15.162)	lr 0.04693
Warmup Train [7][1220/3239]	Time 0.205 (0.236)	Data 0.001 (0.013)	Loss 5.8212 (5.6891)	Top-1 acc 2.344 (4.956)	Top-5 acc 12.109 (15.164)	lr 0.04693
Warmup Train [7][1230/3239]	Time 0.175 (0.236)	Data 0.001 (0.012)	Loss 5.7006 (5.6891)	Top-1 acc 5.469 (4.951)	Top-5 acc 17.578 (15.167)	lr 0.04693
Warmup Train [7][1240/3239]	Time 0.260 (0.236)	Data 0.002 (0.012)	Loss 5.6808 (5.6888)	Top-1 acc 7.031 (4.960)	Top-5 acc 17.578 (15.178)	lr 0.04692
Warmup Train [7][1250/3239]	Time 0.143 (0.236)	Data 0.001 (0.012)	Loss 5.6806 (5.6885)	Top-1 acc 6.641 (4.960)	Top-5 acc 17.188 (15.181)	lr 0.04692
Warmup Train [7][1260/3239]	Time 0.226 (0.236)	Data 0.002 (0.012)	Loss 5.6651 (5.6881)	Top-1 acc 3.125 (4.959)	Top-5 acc 13.281 (15.191)	lr 0.04692
Warmup Train [7][1270/3239]	Time 0.270 (0.235)	Data 0.001 (0.012)	Loss 5.7176 (5.6878)	Top-1 acc 4.297 (4.966)	Top-5 acc 15.234 (15.205)	lr 0.04692
Warmup Train [7][1280/3239]	Time 0.203 (0.235)	Data 0.001 (0.012)	Loss 5.5580 (5.6876)	Top-1 acc 7.031 (4.968)	Top-5 acc 19.531 (15.207)	lr 0.04691
Warmup Train [7][1290/3239]	Time 0.269 (0.235)	Data 0.001 (0.012)	Loss 5.5643 (5.6873)	Top-1 acc 4.688 (4.970)	Top-5 acc 21.875 (15.215)	lr 0.04691
Warmup Train [7][1300/3239]	Time 0.414 (0.235)	Data 0.001 (0.012)	Loss 5.6860 (5.6873)	Top-1 acc 3.906 (4.968)	Top-5 acc 11.328 (15.203)	lr 0.04691
Warmup Train [7][1310/3239]	Time 0.199 (0.235)	Data 0.001 (0.012)	Loss 5.7995 (5.6871)	Top-1 acc 1.562 (4.965)	Top-5 acc 10.547 (15.206)	lr 0.04690
Warmup Train [7][1320/3239]	Time 0.210 (0.235)	Data 0.001 (0.012)	Loss 5.5451 (5.6867)	Top-1 acc 5.469 (4.971)	Top-5 acc 15.234 (15.213)	lr 0.04690
Warmup Train [7][1330/3239]	Time 0.264 (0.235)	Data 0.001 (0.012)	Loss 5.6383 (5.6865)	Top-1 acc 3.125 (4.972)	Top-5 acc 11.328 (15.216)	lr 0.04690
Warmup Train [7][1340/3239]	Time 0.250 (0.235)	Data 0.001 (0.012)	Loss 5.5714 (5.6862)	Top-1 acc 5.078 (4.976)	Top-5 acc 15.234 (15.224)	lr 0.04689
Warmup Train [7][1350/3239]	Time 0.223 (0.235)	Data 0.001 (0.012)	Loss 5.7528 (5.6859)	Top-1 acc 5.859 (4.982)	Top-5 acc 13.281 (15.225)	lr 0.04689
Warmup Train [7][1360/3239]	Time 0.223 (0.235)	Data 0.002 (0.011)	Loss 5.6870 (5.6854)	Top-1 acc 3.906 (4.990)	Top-5 acc 16.016 (15.240)	lr 0.04689
Warmup Train [7][1370/3239]	Time 0.217 (0.235)	Data 0.001 (0.011)	Loss 5.6311 (5.6853)	Top-1 acc 7.031 (4.991)	Top-5 acc 16.797 (15.243)	lr 0.04689
Warmup Train [7][1380/3239]	Time 0.233 (0.235)	Data 0.001 (0.011)	Loss 5.7557 (5.6853)	Top-1 acc 2.734 (4.989)	Top-5 acc 11.719 (15.247)	lr 0.04688
Warmup Train [7][1390/3239]	Time 0.203 (0.235)	Data 0.001 (0.011)	Loss 5.6163 (5.6852)	Top-1 acc 4.297 (4.997)	Top-5 acc 16.797 (15.257)	lr 0.04688
Warmup Train [7][1400/3239]	Time 0.416 (0.235)	Data 0.002 (0.011)	Loss 5.5346 (5.6846)	Top-1 acc 5.078 (4.996)	Top-5 acc 17.578 (15.264)	lr 0.04688
Warmup Train [7][1410/3239]	Time 0.209 (0.235)	Data 0.003 (0.011)	Loss 5.7310 (5.6844)	Top-1 acc 3.906 (4.996)	Top-5 acc 13.672 (15.266)	lr 0.04687
Warmup Train [7][1420/3239]	Time 0.320 (0.235)	Data 0.002 (0.011)	Loss 5.6993 (5.6843)	Top-1 acc 4.688 (4.998)	Top-5 acc 13.672 (15.265)	lr 0.04687
Warmup Train [7][1430/3239]	Time 0.174 (0.234)	Data 0.001 (0.011)	Loss 5.7050 (5.6835)	Top-1 acc 3.516 (4.999)	Top-5 acc 15.234 (15.275)	lr 0.04687
Warmup Train [7][1440/3239]	Time 0.186 (0.234)	Data 0.001 (0.011)	Loss 5.6433 (5.6832)	Top-1 acc 5.859 (5.002)	Top-5 acc 18.750 (15.280)	lr 0.04687
Warmup Train [7][1450/3239]	Time 0.231 (0.234)	Data 0.001 (0.011)	Loss 5.6490 (5.6828)	Top-1 acc 6.250 (5.006)	Top-5 acc 17.188 (15.294)	lr 0.04686
Warmup Train [7][1460/3239]	Time 0.291 (0.234)	Data 0.002 (0.011)	Loss 5.6307 (5.6826)	Top-1 acc 5.078 (5.008)	Top-5 acc 13.281 (15.292)	lr 0.04686
Warmup Train [7][1470/3239]	Time 0.195 (0.234)	Data 0.001 (0.011)	Loss 5.6142 (5.6824)	Top-1 acc 4.688 (5.009)	Top-5 acc 16.797 (15.294)	lr 0.04686
Warmup Train [7][1480/3239]	Time 0.281 (0.234)	Data 0.001 (0.011)	Loss 5.6469 (5.6822)	Top-1 acc 4.688 (5.008)	Top-5 acc 16.797 (15.301)	lr 0.04685
Warmup Train [7][1490/3239]	Time 0.195 (0.234)	Data 0.001 (0.011)	Loss 5.6436 (5.6819)	Top-1 acc 5.859 (5.017)	Top-5 acc 17.578 (15.318)	lr 0.04685
Warmup Train [7][1500/3239]	Time 0.304 (0.234)	Data 0.002 (0.011)	Loss 5.7491 (5.6816)	Top-1 acc 3.125 (5.018)	Top-5 acc 11.719 (15.318)	lr 0.04685
Warmup Train [7][1510/3239]	Time 0.177 (0.234)	Data 0.001 (0.010)	Loss 5.6172 (5.6813)	Top-1 acc 3.516 (5.023)	Top-5 acc 10.938 (15.323)	lr 0.04684
Warmup Train [7][1520/3239]	Time 0.195 (0.234)	Data 0.001 (0.010)	Loss 5.7012 (5.6812)	Top-1 acc 6.250 (5.025)	Top-5 acc 13.672 (15.323)	lr 0.04684
Warmup Train [7][1530/3239]	Time 0.206 (0.234)	Data 0.002 (0.010)	Loss 5.6546 (5.6810)	Top-1 acc 7.422 (5.033)	Top-5 acc 16.797 (15.334)	lr 0.04684
Warmup Train [7][1540/3239]	Time 0.208 (0.234)	Data 0.001 (0.010)	Loss 5.6955 (5.6809)	Top-1 acc 5.469 (5.035)	Top-5 acc 14.453 (15.345)	lr 0.04684
Warmup Train [7][1550/3239]	Time 0.205 (0.234)	Data 0.001 (0.010)	Loss 5.6569 (5.6804)	Top-1 acc 3.125 (5.041)	Top-5 acc 12.500 (15.348)	lr 0.04683
Warmup Train [7][1560/3239]	Time 0.263 (0.234)	Data 0.001 (0.010)	Loss 5.5993 (5.6799)	Top-1 acc 5.859 (5.045)	Top-5 acc 17.969 (15.353)	lr 0.04683
Warmup Train [7][1570/3239]	Time 0.185 (0.234)	Data 0.002 (0.010)	Loss 5.6571 (5.6798)	Top-1 acc 7.031 (5.047)	Top-5 acc 16.016 (15.358)	lr 0.04683
Warmup Train [7][1580/3239]	Time 0.184 (0.234)	Data 0.001 (0.010)	Loss 5.7589 (5.6796)	Top-1 acc 4.688 (5.050)	Top-5 acc 16.406 (15.362)	lr 0.04682
Warmup Train [7][1590/3239]	Time 0.212 (0.234)	Data 0.002 (0.010)	Loss 5.7069 (5.6795)	Top-1 acc 2.734 (5.052)	Top-5 acc 10.156 (15.363)	lr 0.04682
Warmup Train [7][1600/3239]	Time 0.261 (0.234)	Data 0.001 (0.010)	Loss 5.7098 (5.6793)	Top-1 acc 4.297 (5.051)	Top-5 acc 14.062 (15.364)	lr 0.04682
Warmup Train [7][1610/3239]	Time 0.394 (0.234)	Data 0.001 (0.010)	Loss 5.6262 (5.6790)	Top-1 acc 4.297 (5.052)	Top-5 acc 17.188 (15.369)	lr 0.04682
Warmup Train [7][1620/3239]	Time 0.288 (0.234)	Data 0.002 (0.010)	Loss 5.5900 (5.6786)	Top-1 acc 4.297 (5.052)	Top-5 acc 15.625 (15.376)	lr 0.04681
Warmup Train [7][1630/3239]	Time 0.238 (0.233)	Data 0.001 (0.010)	Loss 5.6845 (5.6786)	Top-1 acc 6.250 (5.054)	Top-5 acc 14.062 (15.373)	lr 0.04681
Warmup Train [7][1640/3239]	Time 0.205 (0.233)	Data 0.002 (0.010)	Loss 5.5443 (5.6782)	Top-1 acc 5.078 (5.056)	Top-5 acc 21.094 (15.383)	lr 0.04681
Warmup Train [7][1650/3239]	Time 0.155 (0.233)	Data 0.001 (0.010)	Loss 5.6369 (5.6781)	Top-1 acc 5.469 (5.057)	Top-5 acc 18.359 (15.385)	lr 0.04680
Warmup Train [7][1660/3239]	Time 0.288 (0.233)	Data 0.002 (0.010)	Loss 5.7041 (5.6778)	Top-1 acc 4.297 (5.059)	Top-5 acc 17.188 (15.391)	lr 0.04680
Warmup Train [7][1670/3239]	Time 0.267 (0.233)	Data 0.002 (0.010)	Loss 5.7304 (5.6778)	Top-1 acc 6.641 (5.056)	Top-5 acc 16.797 (15.397)	lr 0.04680
Warmup Train [7][1680/3239]	Time 0.206 (0.233)	Data 0.001 (0.010)	Loss 5.7053 (5.6774)	Top-1 acc 4.297 (5.057)	Top-5 acc 14.062 (15.407)	lr 0.04679
Warmup Train [7][1690/3239]	Time 0.235 (0.233)	Data 0.001 (0.010)	Loss 5.5819 (5.6771)	Top-1 acc 6.641 (5.061)	Top-5 acc 14.844 (15.411)	lr 0.04679
Warmup Train [7][1700/3239]	Time 0.178 (0.233)	Data 0.001 (0.010)	Loss 5.7464 (5.6772)	Top-1 acc 3.906 (5.060)	Top-5 acc 14.062 (15.408)	lr 0.04679
Warmup Train [7][1710/3239]	Time 0.299 (0.233)	Data 0.001 (0.010)	Loss 5.6699 (5.6771)	Top-1 acc 4.297 (5.058)	Top-5 acc 12.891 (15.406)	lr 0.04679
Warmup Train [7][1720/3239]	Time 0.289 (0.233)	Data 0.002 (0.010)	Loss 5.6793 (5.6767)	Top-1 acc 5.078 (5.060)	Top-5 acc 12.109 (15.412)	lr 0.04678
Warmup Train [7][1730/3239]	Time 0.258 (0.233)	Data 0.002 (0.010)	Loss 5.5961 (5.6765)	Top-1 acc 4.297 (5.057)	Top-5 acc 17.188 (15.416)	lr 0.04678
Warmup Train [7][1740/3239]	Time 0.174 (0.233)	Data 0.001 (0.010)	Loss 5.6971 (5.6763)	Top-1 acc 2.344 (5.055)	Top-5 acc 12.891 (15.419)	lr 0.04678
Warmup Train [7][1750/3239]	Time 0.200 (0.233)	Data 0.001 (0.010)	Loss 5.6208 (5.6762)	Top-1 acc 6.250 (5.059)	Top-5 acc 14.844 (15.420)	lr 0.04677
Warmup Train [7][1760/3239]	Time 0.211 (0.233)	Data 0.001 (0.009)	Loss 5.5894 (5.6759)	Top-1 acc 5.469 (5.057)	Top-5 acc 14.844 (15.417)	lr 0.04677
Warmup Train [7][1770/3239]	Time 0.213 (0.233)	Data 0.001 (0.009)	Loss 5.6576 (5.6757)	Top-1 acc 6.641 (5.059)	Top-5 acc 19.531 (15.425)	lr 0.04677
Warmup Train [7][1780/3239]	Time 0.172 (0.233)	Data 0.001 (0.009)	Loss 5.6570 (5.6754)	Top-1 acc 5.078 (5.061)	Top-5 acc 19.141 (15.432)	lr 0.04676
Warmup Train [7][1790/3239]	Time 0.147 (0.233)	Data 0.001 (0.009)	Loss 5.7242 (5.6752)	Top-1 acc 3.906 (5.066)	Top-5 acc 9.375 (15.440)	lr 0.04676
Warmup Train [7][1800/3239]	Time 0.232 (0.233)	Data 0.001 (0.009)	Loss 5.6115 (5.6749)	Top-1 acc 7.812 (5.069)	Top-5 acc 16.797 (15.446)	lr 0.04676
Warmup Train [7][1810/3239]	Time 0.219 (0.232)	Data 0.001 (0.009)	Loss 5.6471 (5.6746)	Top-1 acc 5.078 (5.069)	Top-5 acc 15.234 (15.454)	lr 0.04676
Warmup Train [7][1820/3239]	Time 0.236 (0.232)	Data 0.001 (0.009)	Loss 5.6667 (5.6741)	Top-1 acc 5.859 (5.075)	Top-5 acc 14.062 (15.463)	lr 0.04675
Warmup Train [7][1830/3239]	Time 0.336 (0.232)	Data 0.001 (0.009)	Loss 5.6936 (5.6738)	Top-1 acc 6.641 (5.081)	Top-5 acc 16.016 (15.473)	lr 0.04675
Warmup Train [7][1840/3239]	Time 0.194 (0.232)	Data 0.001 (0.009)	Loss 5.6540 (5.6735)	Top-1 acc 4.297 (5.080)	Top-5 acc 14.453 (15.477)	lr 0.04675
Warmup Train [7][1850/3239]	Time 0.224 (0.232)	Data 0.001 (0.009)	Loss 5.6582 (5.6733)	Top-1 acc 6.250 (5.086)	Top-5 acc 14.844 (15.476)	lr 0.04674
Warmup Train [7][1860/3239]	Time 0.263 (0.232)	Data 0.001 (0.009)	Loss 5.5585 (5.6732)	Top-1 acc 5.469 (5.091)	Top-5 acc 18.359 (15.482)	lr 0.04674
Warmup Train [7][1870/3239]	Time 0.210 (0.232)	Data 0.001 (0.009)	Loss 5.5565 (5.6731)	Top-1 acc 7.422 (5.091)	Top-5 acc 18.359 (15.487)	lr 0.04674
Warmup Train [7][1880/3239]	Time 0.221 (0.232)	Data 0.001 (0.009)	Loss 5.7723 (5.6729)	Top-1 acc 5.859 (5.096)	Top-5 acc 11.719 (15.487)	lr 0.04673
Warmup Train [7][1890/3239]	Time 0.216 (0.232)	Data 0.001 (0.009)	Loss 5.6476 (5.6728)	Top-1 acc 4.297 (5.097)	Top-5 acc 13.281 (15.489)	lr 0.04673
Warmup Train [7][1900/3239]	Time 0.196 (0.232)	Data 0.001 (0.009)	Loss 5.5395 (5.6724)	Top-1 acc 3.906 (5.101)	Top-5 acc 19.531 (15.500)	lr 0.04673
Warmup Train [7][1910/3239]	Time 0.205 (0.232)	Data 0.001 (0.009)	Loss 5.6804 (5.6722)	Top-1 acc 5.469 (5.103)	Top-5 acc 17.969 (15.508)	lr 0.04673
Warmup Train [7][1920/3239]	Time 0.252 (0.232)	Data 0.001 (0.009)	Loss 5.6192 (5.6720)	Top-1 acc 4.688 (5.106)	Top-5 acc 16.797 (15.511)	lr 0.04672
Warmup Train [7][1930/3239]	Time 0.299 (0.232)	Data 0.001 (0.009)	Loss 5.5428 (5.6718)	Top-1 acc 7.422 (5.107)	Top-5 acc 19.141 (15.515)	lr 0.04672
Warmup Train [7][1940/3239]	Time 0.205 (0.232)	Data 0.001 (0.009)	Loss 5.5776 (5.6717)	Top-1 acc 6.250 (5.110)	Top-5 acc 18.750 (15.522)	lr 0.04672
Warmup Train [7][1950/3239]	Time 0.221 (0.231)	Data 0.001 (0.009)	Loss 5.6504 (5.6716)	Top-1 acc 5.469 (5.113)	Top-5 acc 17.578 (15.524)	lr 0.04671
Warmup Train [7][1960/3239]	Time 0.128 (0.231)	Data 0.001 (0.009)	Loss 5.5808 (5.6713)	Top-1 acc 6.250 (5.113)	Top-5 acc 18.359 (15.532)	lr 0.04671
Warmup Train [7][1970/3239]	Time 0.227 (0.231)	Data 0.001 (0.009)	Loss 5.8077 (5.6714)	Top-1 acc 4.297 (5.111)	Top-5 acc 12.109 (15.530)	lr 0.04671
Warmup Train [7][1980/3239]	Time 0.125 (0.231)	Data 0.001 (0.009)	Loss 5.7013 (5.6713)	Top-1 acc 3.906 (5.109)	Top-5 acc 13.672 (15.532)	lr 0.04670
Warmup Train [7][1990/3239]	Time 0.168 (0.231)	Data 0.001 (0.009)	Loss 5.6394 (5.6711)	Top-1 acc 7.812 (5.113)	Top-5 acc 15.234 (15.535)	lr 0.04670
Warmup Train [7][2000/3239]	Time 0.193 (0.231)	Data 0.001 (0.009)	Loss 5.7492 (5.6710)	Top-1 acc 5.469 (5.113)	Top-5 acc 11.328 (15.539)	lr 0.04670
Warmup Train [7][2010/3239]	Time 0.193 (0.231)	Data 0.001 (0.009)	Loss 5.8254 (5.6710)	Top-1 acc 2.734 (5.115)	Top-5 acc 10.156 (15.542)	lr 0.04670
Warmup Train [7][2020/3239]	Time 0.299 (0.231)	Data 0.001 (0.009)	Loss 5.6146 (5.6709)	Top-1 acc 5.469 (5.117)	Top-5 acc 14.453 (15.543)	lr 0.04669
Warmup Train [7][2030/3239]	Time 0.350 (0.231)	Data 0.001 (0.009)	Loss 5.5014 (5.6706)	Top-1 acc 6.250 (5.116)	Top-5 acc 17.188 (15.546)	lr 0.04669
Warmup Train [7][2040/3239]	Time 0.211 (0.231)	Data 0.001 (0.009)	Loss 5.7276 (5.6704)	Top-1 acc 5.078 (5.120)	Top-5 acc 14.844 (15.554)	lr 0.04669
Warmup Train [7][2050/3239]	Time 0.184 (0.231)	Data 0.002 (0.009)	Loss 5.6361 (5.6702)	Top-1 acc 4.688 (5.121)	Top-5 acc 15.625 (15.559)	lr 0.04668
Warmup Train [7][2060/3239]	Time 0.161 (0.231)	Data 0.001 (0.009)	Loss 5.6910 (5.6700)	Top-1 acc 4.688 (5.122)	Top-5 acc 17.188 (15.563)	lr 0.04668
Warmup Train [7][2070/3239]	Time 0.152 (0.231)	Data 0.001 (0.009)	Loss 5.5526 (5.6696)	Top-1 acc 6.641 (5.125)	Top-5 acc 16.016 (15.568)	lr 0.04668
Warmup Train [7][2080/3239]	Time 0.184 (0.231)	Data 0.002 (0.009)	Loss 5.5595 (5.6693)	Top-1 acc 8.203 (5.130)	Top-5 acc 19.922 (15.576)	lr 0.04667
Warmup Train [7][2090/3239]	Time 0.126 (0.231)	Data 0.002 (0.008)	Loss 5.7246 (5.6692)	Top-1 acc 4.688 (5.131)	Top-5 acc 12.500 (15.577)	lr 0.04667
Warmup Train [7][2100/3239]	Time 0.285 (0.231)	Data 0.001 (0.008)	Loss 5.5480 (5.6691)	Top-1 acc 4.297 (5.128)	Top-5 acc 17.188 (15.574)	lr 0.04667
Warmup Train [7][2110/3239]	Time 0.217 (0.231)	Data 0.002 (0.008)	Loss 5.6955 (5.6687)	Top-1 acc 4.297 (5.131)	Top-5 acc 14.062 (15.581)	lr 0.04667
Warmup Train [7][2120/3239]	Time 0.312 (0.231)	Data 0.001 (0.008)	Loss 5.5574 (5.6684)	Top-1 acc 4.688 (5.134)	Top-5 acc 16.797 (15.589)	lr 0.04666
Warmup Train [7][2130/3239]	Time 0.325 (0.231)	Data 0.001 (0.008)	Loss 5.5904 (5.6680)	Top-1 acc 7.812 (5.140)	Top-5 acc 18.750 (15.601)	lr 0.04666
Warmup Train [7][2140/3239]	Time 0.196 (0.231)	Data 0.001 (0.008)	Loss 5.5051 (5.6678)	Top-1 acc 6.250 (5.142)	Top-5 acc 23.047 (15.608)	lr 0.04666
Warmup Train [7][2150/3239]	Time 0.261 (0.231)	Data 0.001 (0.008)	Loss 5.5476 (5.6676)	Top-1 acc 4.297 (5.143)	Top-5 acc 15.234 (15.610)	lr 0.04665
Warmup Train [7][2160/3239]	Time 0.247 (0.231)	Data 0.001 (0.008)	Loss 5.6539 (5.6673)	Top-1 acc 6.641 (5.150)	Top-5 acc 17.578 (15.621)	lr 0.04665
Warmup Train [7][2170/3239]	Time 0.155 (0.231)	Data 0.001 (0.008)	Loss 5.6585 (5.6672)	Top-1 acc 5.078 (5.154)	Top-5 acc 14.453 (15.623)	lr 0.04665
Warmup Train [7][2180/3239]	Time 0.250 (0.231)	Data 0.001 (0.008)	Loss 5.6424 (5.6671)	Top-1 acc 5.859 (5.155)	Top-5 acc 17.578 (15.627)	lr 0.04664
Warmup Train [7][2190/3239]	Time 0.185 (0.231)	Data 0.001 (0.008)	Loss 5.5122 (5.6668)	Top-1 acc 8.594 (5.157)	Top-5 acc 24.219 (15.634)	lr 0.04664
Warmup Train [7][2200/3239]	Time 0.203 (0.231)	Data 0.001 (0.008)	Loss 5.6418 (5.6665)	Top-1 acc 4.688 (5.161)	Top-5 acc 18.750 (15.643)	lr 0.04664
Warmup Train [7][2210/3239]	Time 0.232 (0.231)	Data 0.001 (0.008)	Loss 5.6204 (5.6662)	Top-1 acc 7.031 (5.162)	Top-5 acc 19.141 (15.646)	lr 0.04664
Warmup Train [7][2220/3239]	Time 0.292 (0.231)	Data 0.001 (0.008)	Loss 5.7409 (5.6660)	Top-1 acc 5.859 (5.167)	Top-5 acc 14.062 (15.649)	lr 0.04663
Warmup Train [7][2230/3239]	Time 0.281 (0.231)	Data 0.001 (0.008)	Loss 5.6782 (5.6659)	Top-1 acc 5.859 (5.169)	Top-5 acc 15.625 (15.651)	lr 0.04663
Warmup Train [7][2240/3239]	Time 0.230 (0.231)	Data 0.001 (0.008)	Loss 5.6179 (5.6658)	Top-1 acc 7.031 (5.170)	Top-5 acc 17.188 (15.649)	lr 0.04663
Warmup Train [7][2250/3239]	Time 0.182 (0.231)	Data 0.001 (0.008)	Loss 5.6201 (5.6657)	Top-1 acc 6.641 (5.173)	Top-5 acc 16.406 (15.653)	lr 0.04662
Warmup Train [7][2260/3239]	Time 0.194 (0.231)	Data 0.002 (0.008)	Loss 5.4825 (5.6654)	Top-1 acc 3.906 (5.174)	Top-5 acc 16.016 (15.654)	lr 0.04662
Warmup Train [7][2270/3239]	Time 0.221 (0.231)	Data 0.001 (0.008)	Loss 5.5531 (5.6650)	Top-1 acc 6.641 (5.179)	Top-5 acc 21.484 (15.663)	lr 0.04662
Warmup Train [7][2280/3239]	Time 0.237 (0.231)	Data 0.001 (0.008)	Loss 5.6191 (5.6648)	Top-1 acc 5.469 (5.182)	Top-5 acc 15.234 (15.667)	lr 0.04661
Warmup Train [7][2290/3239]	Time 0.176 (0.231)	Data 0.001 (0.008)	Loss 5.5797 (5.6647)	Top-1 acc 5.859 (5.182)	Top-5 acc 16.797 (15.670)	lr 0.04661
Warmup Train [7][2300/3239]	Time 0.259 (0.231)	Data 0.002 (0.008)	Loss 5.5689 (5.6642)	Top-1 acc 5.859 (5.189)	Top-5 acc 16.406 (15.684)	lr 0.04661
Warmup Train [7][2310/3239]	Time 0.254 (0.231)	Data 0.001 (0.008)	Loss 5.5740 (5.6638)	Top-1 acc 5.859 (5.194)	Top-5 acc 15.625 (15.691)	lr 0.04661
Warmup Train [7][2320/3239]	Time 0.197 (0.231)	Data 0.001 (0.008)	Loss 5.6041 (5.6635)	Top-1 acc 7.812 (5.195)	Top-5 acc 19.141 (15.697)	lr 0.04660
Warmup Train [7][2330/3239]	Time 0.263 (0.231)	Data 0.001 (0.008)	Loss 5.5992 (5.6632)	Top-1 acc 6.641 (5.199)	Top-5 acc 16.797 (15.702)	lr 0.04660
Warmup Train [7][2340/3239]	Time 0.364 (0.231)	Data 0.001 (0.008)	Loss 5.6486 (5.6630)	Top-1 acc 5.469 (5.200)	Top-5 acc 15.625 (15.703)	lr 0.04660
Warmup Train [7][2350/3239]	Time 0.162 (0.231)	Data 0.001 (0.008)	Loss 5.5473 (5.6629)	Top-1 acc 5.469 (5.199)	Top-5 acc 19.531 (15.704)	lr 0.04659
Warmup Train [7][2360/3239]	Time 0.213 (0.231)	Data 0.001 (0.008)	Loss 5.5517 (5.6627)	Top-1 acc 7.031 (5.201)	Top-5 acc 16.406 (15.703)	lr 0.04659
Warmup Train [7][2370/3239]	Time 0.152 (0.231)	Data 0.001 (0.008)	Loss 5.4964 (5.6624)	Top-1 acc 7.422 (5.204)	Top-5 acc 16.016 (15.704)	lr 0.04659
Warmup Train [7][2380/3239]	Time 0.251 (0.231)	Data 0.001 (0.008)	Loss 5.7379 (5.6622)	Top-1 acc 6.641 (5.208)	Top-5 acc 13.672 (15.711)	lr 0.04658
Warmup Train [7][2390/3239]	Time 0.301 (0.231)	Data 0.001 (0.008)	Loss 5.5356 (5.6619)	Top-1 acc 6.250 (5.213)	Top-5 acc 19.141 (15.719)	lr 0.04658
Warmup Train [7][2400/3239]	Time 0.143 (0.231)	Data 0.001 (0.008)	Loss 5.6261 (5.6618)	Top-1 acc 6.250 (5.215)	Top-5 acc 14.453 (15.723)	lr 0.04658
Warmup Train [7][2410/3239]	Time 0.212 (0.231)	Data 0.001 (0.008)	Loss 5.6626 (5.6616)	Top-1 acc 7.031 (5.219)	Top-5 acc 14.062 (15.728)	lr 0.04657
Warmup Train [7][2420/3239]	Time 0.190 (0.231)	Data 0.002 (0.008)	Loss 5.5431 (5.6614)	Top-1 acc 7.031 (5.221)	Top-5 acc 17.578 (15.732)	lr 0.04657
Warmup Train [7][2430/3239]	Time 0.185 (0.231)	Data 0.001 (0.008)	Loss 5.5965 (5.6614)	Top-1 acc 6.250 (5.222)	Top-5 acc 17.578 (15.730)	lr 0.04657
Warmup Train [7][2440/3239]	Time 0.158 (0.231)	Data 0.001 (0.008)	Loss 5.6677 (5.6614)	Top-1 acc 7.031 (5.222)	Top-5 acc 18.750 (15.729)	lr 0.04657
Warmup Train [7][2450/3239]	Time 0.154 (0.231)	Data 0.001 (0.008)	Loss 5.5921 (5.6612)	Top-1 acc 3.516 (5.224)	Top-5 acc 14.062 (15.730)	lr 0.04656
Warmup Train [7][2460/3239]	Time 0.243 (0.231)	Data 0.001 (0.008)	Loss 5.4076 (5.6606)	Top-1 acc 7.422 (5.228)	Top-5 acc 24.219 (15.742)	lr 0.04656
Warmup Train [7][2470/3239]	Time 0.290 (0.230)	Data 0.001 (0.008)	Loss 5.5465 (5.6605)	Top-1 acc 5.078 (5.229)	Top-5 acc 18.359 (15.750)	lr 0.04656
Warmup Train [7][2480/3239]	Time 0.296 (0.230)	Data 0.001 (0.008)	Loss 5.5787 (5.6601)	Top-1 acc 5.859 (5.231)	Top-5 acc 18.750 (15.756)	lr 0.04655
Warmup Train [7][2490/3239]	Time 0.178 (0.230)	Data 0.001 (0.008)	Loss 5.5544 (5.6599)	Top-1 acc 7.812 (5.231)	Top-5 acc 17.969 (15.761)	lr 0.04655
Warmup Train [7][2500/3239]	Time 0.233 (0.230)	Data 0.001 (0.008)	Loss 5.5397 (5.6597)	Top-1 acc 6.250 (5.231)	Top-5 acc 16.406 (15.763)	lr 0.04655
Warmup Train [7][2510/3239]	Time 0.153 (0.230)	Data 0.001 (0.008)	Loss 5.6410 (5.6593)	Top-1 acc 5.859 (5.237)	Top-5 acc 14.844 (15.768)	lr 0.04654
Warmup Train [7][2520/3239]	Time 0.204 (0.230)	Data 0.002 (0.008)	Loss 5.7056 (5.6591)	Top-1 acc 1.953 (5.238)	Top-5 acc 14.844 (15.775)	lr 0.04654
Warmup Train [7][2530/3239]	Time 0.287 (0.230)	Data 0.001 (0.008)	Loss 5.6918 (5.6590)	Top-1 acc 5.469 (5.236)	Top-5 acc 14.453 (15.777)	lr 0.04654
Warmup Train [7][2540/3239]	Time 0.256 (0.230)	Data 0.001 (0.008)	Loss 5.6509 (5.6588)	Top-1 acc 5.078 (5.237)	Top-5 acc 16.406 (15.779)	lr 0.04653
Warmup Train [7][2550/3239]	Time 0.179 (0.230)	Data 0.001 (0.008)	Loss 5.6600 (5.6587)	Top-1 acc 6.250 (5.238)	Top-5 acc 14.453 (15.780)	lr 0.04653
Warmup Train [7][2560/3239]	Time 0.208 (0.230)	Data 0.001 (0.008)	Loss 5.4715 (5.6585)	Top-1 acc 5.078 (5.238)	Top-5 acc 18.359 (15.788)	lr 0.04653
Warmup Train [7][2570/3239]	Time 0.242 (0.230)	Data 0.002 (0.008)	Loss 5.5767 (5.6582)	Top-1 acc 4.688 (5.239)	Top-5 acc 17.578 (15.792)	lr 0.04653
Warmup Train [7][2580/3239]	Time 0.278 (0.230)	Data 0.001 (0.008)	Loss 5.5619 (5.6578)	Top-1 acc 6.641 (5.244)	Top-5 acc 16.797 (15.800)	lr 0.04652
Warmup Train [7][2590/3239]	Time 0.288 (0.230)	Data 0.001 (0.007)	Loss 5.6118 (5.6576)	Top-1 acc 4.688 (5.250)	Top-5 acc 12.891 (15.805)	lr 0.04652
Warmup Train [7][2600/3239]	Time 0.165 (0.230)	Data 0.001 (0.007)	Loss 5.5576 (5.6574)	Top-1 acc 6.641 (5.249)	Top-5 acc 17.188 (15.811)	lr 0.04652
Warmup Train [7][2610/3239]	Time 0.252 (0.230)	Data 0.001 (0.007)	Loss 5.6716 (5.6572)	Top-1 acc 5.078 (5.250)	Top-5 acc 14.453 (15.811)	lr 0.04651
Warmup Train [7][2620/3239]	Time 0.147 (0.230)	Data 0.001 (0.007)	Loss 5.6370 (5.6571)	Top-1 acc 3.906 (5.250)	Top-5 acc 16.406 (15.815)	lr 0.04651
Warmup Train [7][2630/3239]	Time 0.231 (0.230)	Data 0.001 (0.007)	Loss 5.4719 (5.6568)	Top-1 acc 5.078 (5.249)	Top-5 acc 20.312 (15.819)	lr 0.04651
Warmup Train [7][2640/3239]	Time 0.289 (0.230)	Data 0.001 (0.007)	Loss 5.5817 (5.6566)	Top-1 acc 3.906 (5.251)	Top-5 acc 15.234 (15.821)	lr 0.04650
Warmup Train [7][2650/3239]	Time 0.219 (0.230)	Data 0.001 (0.007)	Loss 5.5575 (5.6564)	Top-1 acc 5.859 (5.251)	Top-5 acc 17.188 (15.829)	lr 0.04650
Warmup Train [7][2660/3239]	Time 0.206 (0.230)	Data 0.001 (0.007)	Loss 5.5758 (5.6561)	Top-1 acc 6.641 (5.252)	Top-5 acc 21.094 (15.834)	lr 0.04650
Warmup Train [7][2670/3239]	Time 0.198 (0.230)	Data 0.001 (0.007)	Loss 5.6349 (5.6558)	Top-1 acc 5.469 (5.253)	Top-5 acc 18.359 (15.838)	lr 0.04649
Warmup Train [7][2680/3239]	Time 0.237 (0.230)	Data 0.001 (0.007)	Loss 5.4829 (5.6557)	Top-1 acc 7.422 (5.253)	Top-5 acc 18.359 (15.840)	lr 0.04649
Warmup Train [7][2690/3239]	Time 0.183 (0.230)	Data 0.001 (0.007)	Loss 5.6252 (5.6555)	Top-1 acc 5.078 (5.256)	Top-5 acc 13.672 (15.842)	lr 0.04649
Warmup Train [7][2700/3239]	Time 0.225 (0.230)	Data 0.001 (0.007)	Loss 5.6301 (5.6553)	Top-1 acc 5.859 (5.257)	Top-5 acc 14.453 (15.842)	lr 0.04649
Warmup Train [7][2710/3239]	Time 0.179 (0.230)	Data 0.001 (0.007)	Loss 5.6009 (5.6551)	Top-1 acc 5.078 (5.259)	Top-5 acc 16.406 (15.845)	lr 0.04648
Warmup Train [7][2720/3239]	Time 0.180 (0.230)	Data 0.001 (0.007)	Loss 5.7108 (5.6550)	Top-1 acc 5.859 (5.261)	Top-5 acc 16.406 (15.849)	lr 0.04648
Warmup Train [7][2730/3239]	Time 0.201 (0.230)	Data 0.002 (0.007)	Loss 5.6064 (5.6548)	Top-1 acc 6.641 (5.262)	Top-5 acc 19.141 (15.853)	lr 0.04648
Warmup Train [7][2740/3239]	Time 0.349 (0.230)	Data 0.002 (0.007)	Loss 5.7384 (5.6547)	Top-1 acc 3.516 (5.263)	Top-5 acc 11.719 (15.852)	lr 0.04647
Warmup Train [7][2750/3239]	Time 0.227 (0.230)	Data 0.001 (0.007)	Loss 5.5473 (5.6546)	Top-1 acc 7.812 (5.264)	Top-5 acc 19.531 (15.853)	lr 0.04647
Warmup Train [7][2760/3239]	Time 0.185 (0.230)	Data 0.001 (0.007)	Loss 5.5441 (5.6543)	Top-1 acc 4.688 (5.267)	Top-5 acc 15.625 (15.857)	lr 0.04647
Warmup Train [7][2770/3239]	Time 0.205 (0.229)	Data 0.001 (0.007)	Loss 5.6059 (5.6539)	Top-1 acc 4.688 (5.271)	Top-5 acc 16.406 (15.864)	lr 0.04646
Warmup Train [7][2780/3239]	Time 0.241 (0.230)	Data 0.001 (0.007)	Loss 5.5095 (5.6535)	Top-1 acc 7.422 (5.272)	Top-5 acc 17.969 (15.865)	lr 0.04646
Warmup Train [7][2790/3239]	Time 0.159 (0.229)	Data 0.001 (0.007)	Loss 5.6627 (5.6534)	Top-1 acc 3.125 (5.271)	Top-5 acc 13.672 (15.865)	lr 0.04646
Warmup Train [7][2800/3239]	Time 0.208 (0.229)	Data 0.001 (0.007)	Loss 5.6623 (5.6532)	Top-1 acc 5.469 (5.272)	Top-5 acc 16.016 (15.872)	lr 0.04645
Warmup Train [7][2810/3239]	Time 0.180 (0.229)	Data 0.001 (0.007)	Loss 5.4309 (5.6528)	Top-1 acc 7.422 (5.278)	Top-5 acc 22.266 (15.882)	lr 0.04645
Warmup Train [7][2820/3239]	Time 0.267 (0.229)	Data 0.001 (0.007)	Loss 5.5696 (5.6527)	Top-1 acc 4.688 (5.280)	Top-5 acc 14.844 (15.886)	lr 0.04645
Warmup Train [7][2830/3239]	Time 0.193 (0.229)	Data 0.001 (0.007)	Loss 5.6353 (5.6525)	Top-1 acc 5.078 (5.282)	Top-5 acc 12.500 (15.888)	lr 0.04644
Warmup Train [7][2840/3239]	Time 0.338 (0.229)	Data 0.001 (0.007)	Loss 5.5405 (5.6523)	Top-1 acc 5.078 (5.280)	Top-5 acc 16.406 (15.892)	lr 0.04644
Warmup Train [7][2850/3239]	Time 0.163 (0.229)	Data 0.001 (0.007)	Loss 5.4983 (5.6521)	Top-1 acc 7.031 (5.282)	Top-5 acc 18.359 (15.895)	lr 0.04644
Warmup Train [7][2860/3239]	Time 0.233 (0.229)	Data 0.002 (0.007)	Loss 5.6826 (5.6518)	Top-1 acc 3.906 (5.284)	Top-5 acc 17.188 (15.903)	lr 0.04644
Warmup Train [7][2870/3239]	Time 0.256 (0.229)	Data 0.001 (0.007)	Loss 5.6593 (5.6518)	Top-1 acc 3.125 (5.285)	Top-5 acc 15.234 (15.907)	lr 0.04643
Warmup Train [7][2880/3239]	Time 0.261 (0.229)	Data 0.001 (0.007)	Loss 5.6115 (5.6516)	Top-1 acc 7.031 (5.287)	Top-5 acc 17.188 (15.909)	lr 0.04643
Warmup Train [7][2890/3239]	Time 0.212 (0.229)	Data 0.001 (0.007)	Loss 5.6193 (5.6514)	Top-1 acc 5.859 (5.287)	Top-5 acc 16.016 (15.911)	lr 0.04643
Warmup Train [7][2900/3239]	Time 0.196 (0.229)	Data 0.001 (0.007)	Loss 5.6595 (5.6511)	Top-1 acc 6.250 (5.292)	Top-5 acc 20.312 (15.922)	lr 0.04642
Warmup Train [7][2910/3239]	Time 0.168 (0.229)	Data 0.003 (0.007)	Loss 5.5887 (5.6508)	Top-1 acc 5.469 (5.295)	Top-5 acc 17.969 (15.930)	lr 0.04642
Warmup Train [7][2920/3239]	Time 0.274 (0.229)	Data 0.001 (0.007)	Loss 5.6295 (5.6506)	Top-1 acc 5.859 (5.298)	Top-5 acc 16.016 (15.934)	lr 0.04642
Warmup Train [7][2930/3239]	Time 0.233 (0.229)	Data 0.001 (0.007)	Loss 5.6474 (5.6504)	Top-1 acc 5.078 (5.300)	Top-5 acc 17.188 (15.942)	lr 0.04641
Warmup Train [7][2940/3239]	Time 0.324 (0.229)	Data 0.001 (0.007)	Loss 5.5415 (5.6503)	Top-1 acc 5.469 (5.302)	Top-5 acc 17.578 (15.939)	lr 0.04641
Warmup Train [7][2950/3239]	Time 0.189 (0.229)	Data 0.001 (0.007)	Loss 5.5412 (5.6500)	Top-1 acc 2.734 (5.303)	Top-5 acc 17.969 (15.946)	lr 0.04641
Warmup Train [7][2960/3239]	Time 0.220 (0.229)	Data 0.001 (0.007)	Loss 5.7041 (5.6498)	Top-1 acc 4.688 (5.305)	Top-5 acc 15.625 (15.950)	lr 0.04640
Warmup Train [7][2970/3239]	Time 0.177 (0.229)	Data 0.001 (0.007)	Loss 5.5576 (5.6496)	Top-1 acc 4.688 (5.308)	Top-5 acc 18.750 (15.954)	lr 0.04640
Warmup Train [7][2980/3239]	Time 0.234 (0.229)	Data 0.001 (0.007)	Loss 5.7505 (5.6495)	Top-1 acc 2.734 (5.310)	Top-5 acc 10.938 (15.956)	lr 0.04640
Warmup Train [7][2990/3239]	Time 0.224 (0.229)	Data 0.027 (0.007)	Loss 5.5485 (5.6491)	Top-1 acc 5.469 (5.313)	Top-5 acc 17.188 (15.966)	lr 0.04639
Warmup Train [7][3000/3239]	Time 0.183 (0.229)	Data 0.001 (0.007)	Loss 5.4829 (5.6489)	Top-1 acc 7.812 (5.318)	Top-5 acc 18.750 (15.971)	lr 0.04639
Warmup Train [7][3010/3239]	Time 0.202 (0.229)	Data 0.001 (0.007)	Loss 5.6422 (5.6486)	Top-1 acc 6.250 (5.320)	Top-5 acc 17.969 (15.977)	lr 0.04639
Warmup Train [7][3020/3239]	Time 0.267 (0.229)	Data 0.002 (0.007)	Loss 5.6307 (5.6485)	Top-1 acc 7.031 (5.322)	Top-5 acc 19.141 (15.981)	lr 0.04639
Warmup Train [7][3030/3239]	Time 0.277 (0.229)	Data 0.002 (0.007)	Loss 5.4927 (5.6483)	Top-1 acc 5.859 (5.323)	Top-5 acc 18.359 (15.983)	lr 0.04638
Warmup Train [7][3040/3239]	Time 0.189 (0.229)	Data 0.001 (0.007)	Loss 5.6428 (5.6481)	Top-1 acc 4.297 (5.325)	Top-5 acc 18.750 (15.985)	lr 0.04638
Warmup Train [7][3050/3239]	Time 0.244 (0.229)	Data 0.001 (0.007)	Loss 5.5776 (5.6479)	Top-1 acc 4.297 (5.325)	Top-5 acc 14.844 (15.987)	lr 0.04638
Warmup Train [7][3060/3239]	Time 0.207 (0.229)	Data 0.001 (0.007)	Loss 5.5213 (5.6477)	Top-1 acc 6.641 (5.327)	Top-5 acc 19.141 (15.991)	lr 0.04637
Warmup Train [7][3070/3239]	Time 0.149 (0.229)	Data 0.001 (0.007)	Loss 5.6206 (5.6473)	Top-1 acc 7.422 (5.333)	Top-5 acc 19.922 (15.999)	lr 0.04637
Warmup Train [7][3080/3239]	Time 0.244 (0.229)	Data 0.002 (0.007)	Loss 5.5548 (5.6470)	Top-1 acc 5.078 (5.335)	Top-5 acc 17.188 (16.004)	lr 0.04637
Warmup Train [7][3090/3239]	Time 0.217 (0.229)	Data 0.002 (0.007)	Loss 5.7006 (5.6468)	Top-1 acc 4.688 (5.338)	Top-5 acc 16.016 (16.008)	lr 0.04636
Warmup Train [7][3100/3239]	Time 0.193 (0.229)	Data 0.001 (0.007)	Loss 5.6643 (5.6466)	Top-1 acc 5.469 (5.341)	Top-5 acc 16.797 (16.011)	lr 0.04636
Warmup Train [7][3110/3239]	Time 0.176 (0.229)	Data 0.001 (0.007)	Loss 5.5629 (5.6464)	Top-1 acc 6.250 (5.345)	Top-5 acc 17.188 (16.020)	lr 0.04636
Warmup Train [7][3120/3239]	Time 0.388 (0.229)	Data 0.001 (0.007)	Loss 5.7061 (5.6463)	Top-1 acc 7.031 (5.346)	Top-5 acc 16.016 (16.022)	lr 0.04635
Warmup Train [7][3130/3239]	Time 0.155 (0.229)	Data 0.001 (0.007)	Loss 5.6114 (5.6461)	Top-1 acc 5.859 (5.346)	Top-5 acc 16.797 (16.025)	lr 0.04635
Warmup Train [7][3140/3239]	Time 0.197 (0.229)	Data 0.001 (0.007)	Loss 5.5497 (5.6461)	Top-1 acc 6.641 (5.348)	Top-5 acc 17.969 (16.029)	lr 0.04635
Warmup Train [7][3150/3239]	Time 0.259 (0.229)	Data 0.001 (0.007)	Loss 5.5891 (5.6461)	Top-1 acc 6.641 (5.350)	Top-5 acc 16.406 (16.027)	lr 0.04634
Warmup Train [7][3160/3239]	Time 0.259 (0.229)	Data 0.001 (0.007)	Loss 5.6590 (5.6457)	Top-1 acc 6.641 (5.353)	Top-5 acc 15.234 (16.033)	lr 0.04634
Warmup Train [7][3170/3239]	Time 0.229 (0.229)	Data 0.002 (0.007)	Loss 5.6148 (5.6455)	Top-1 acc 5.469 (5.356)	Top-5 acc 18.750 (16.041)	lr 0.04634
Warmup Train [7][3180/3239]	Time 0.224 (0.229)	Data 0.000 (0.007)	Loss 5.5189 (5.6452)	Top-1 acc 8.203 (5.362)	Top-5 acc 21.875 (16.048)	lr 0.04633
Warmup Train [7][3190/3239]	Time 0.230 (0.229)	Data 0.000 (0.007)	Loss 5.5112 (5.6450)	Top-1 acc 4.688 (5.363)	Top-5 acc 16.797 (16.054)	lr 0.04633
Warmup Train [7][3200/3239]	Time 0.190 (0.229)	Data 0.000 (0.007)	Loss 5.6023 (5.6447)	Top-1 acc 7.031 (5.367)	Top-5 acc 17.188 (16.059)	lr 0.04633
Warmup Train [7][3210/3239]	Time 0.193 (0.228)	Data 0.000 (0.007)	Loss 5.5012 (5.6444)	Top-1 acc 8.594 (5.370)	Top-5 acc 20.703 (16.068)	lr 0.04633
Warmup Train [7][3220/3239]	Time 0.351 (0.228)	Data 0.000 (0.007)	Loss 5.5223 (5.6442)	Top-1 acc 5.859 (5.370)	Top-5 acc 21.484 (16.072)	lr 0.04632
Warmup Train [7][3230/3239]	Time 0.238 (0.228)	Data 0.000 (0.006)	Loss 5.5045 (5.6441)	Top-1 acc 4.688 (5.371)	Top-5 acc 15.234 (16.072)	lr 0.04632
Warmup Train [7][3239/3239]	Time 0.136 (0.228)	Data 0.000 (0.006)	Loss 5.8338 (5.6439)	Top-1 acc 4.938 (5.372)	Top-5 acc 14.815 (16.076)	lr 0.04632
==========Warmup Valid [7/40]	loss 5.004	top-1 acc 8.160	top-5 acc 22.579	Train top-1 5.372	top-5 16.076	flops: 442.4M
Warmup Train [8][0/3239]	Time 12.410 (12.410)	Data 11.362 (11.362)	Loss 5.5099 (5.5099)	Top-1 acc 4.688 (4.688)	Top-5 acc 16.797 (16.797)	lr 0.04632
Warmup Train [8][10/3239]	Time 0.241 (1.457)	Data 0.002 (1.064)	Loss 5.4882 (5.5675)	Top-1 acc 5.078 (5.469)	Top-5 acc 19.531 (17.116)	lr 0.04631
Warmup Train [8][20/3239]	Time 0.381 (0.895)	Data 0.002 (0.560)	Loss 5.5984 (5.5809)	Top-1 acc 4.297 (6.008)	Top-5 acc 14.062 (17.076)	lr 0.04631
Warmup Train [8][30/3239]	Time 0.262 (0.683)	Data 0.001 (0.380)	Loss 5.6867 (5.5830)	Top-1 acc 8.594 (6.162)	Top-5 acc 17.969 (17.339)	lr 0.04631
Warmup Train [8][40/3239]	Time 0.218 (0.576)	Data 0.001 (0.288)	Loss 5.4448 (5.5776)	Top-1 acc 10.156 (6.098)	Top-5 acc 20.312 (17.454)	lr 0.04630
Warmup Train [8][50/3239]	Time 0.262 (0.507)	Data 0.001 (0.232)	Loss 5.5903 (5.5699)	Top-1 acc 5.078 (6.097)	Top-5 acc 19.531 (17.670)	lr 0.04630
Warmup Train [8][60/3239]	Time 0.233 (0.460)	Data 0.001 (0.194)	Loss 5.5509 (5.5745)	Top-1 acc 3.125 (6.039)	Top-5 acc 17.188 (17.610)	lr 0.04630
Warmup Train [8][70/3239]	Time 0.270 (0.433)	Data 0.003 (0.167)	Loss 5.5173 (5.5721)	Top-1 acc 7.031 (6.052)	Top-5 acc 20.703 (17.479)	lr 0.04629
Warmup Train [8][80/3239]	Time 0.198 (0.406)	Data 0.001 (0.147)	Loss 5.5627 (5.5725)	Top-1 acc 4.297 (6.154)	Top-5 acc 16.406 (17.540)	lr 0.04629
Warmup Train [8][90/3239]	Time 0.143 (0.387)	Data 0.001 (0.131)	Loss 5.5081 (5.5702)	Top-1 acc 5.078 (6.147)	Top-5 acc 15.625 (17.501)	lr 0.04629
Warmup Train [8][100/3239]	Time 0.240 (0.371)	Data 0.002 (0.118)	Loss 5.5541 (5.5707)	Top-1 acc 7.031 (6.130)	Top-5 acc 16.406 (17.443)	lr 0.04628
Warmup Train [8][110/3239]	Time 0.370 (0.358)	Data 0.001 (0.108)	Loss 5.4614 (5.5684)	Top-1 acc 5.859 (6.141)	Top-5 acc 19.141 (17.480)	lr 0.04628
Warmup Train [8][120/3239]	Time 0.193 (0.346)	Data 0.002 (0.099)	Loss 5.5804 (5.5653)	Top-1 acc 6.641 (6.160)	Top-5 acc 19.141 (17.572)	lr 0.04628
Warmup Train [8][130/3239]	Time 0.239 (0.337)	Data 0.001 (0.091)	Loss 5.5390 (5.5663)	Top-1 acc 7.422 (6.140)	Top-5 acc 18.750 (17.536)	lr 0.04627
Warmup Train [8][140/3239]	Time 0.195 (0.328)	Data 0.001 (0.085)	Loss 5.4331 (5.5660)	Top-1 acc 7.812 (6.142)	Top-5 acc 19.922 (17.603)	lr 0.04627
Warmup Train [8][150/3239]	Time 0.192 (0.320)	Data 0.002 (0.080)	Loss 5.4921 (5.5679)	Top-1 acc 6.641 (6.097)	Top-5 acc 18.750 (17.542)	lr 0.04627
Warmup Train [8][160/3239]	Time 0.266 (0.314)	Data 0.001 (0.075)	Loss 5.6343 (5.5712)	Top-1 acc 4.688 (6.100)	Top-5 acc 16.016 (17.476)	lr 0.04627
Warmup Train [8][170/3239]	Time 0.295 (0.310)	Data 0.002 (0.071)	Loss 5.5824 (5.5748)	Top-1 acc 7.031 (6.106)	Top-5 acc 18.359 (17.423)	lr 0.04626
Warmup Train [8][180/3239]	Time 0.203 (0.305)	Data 0.001 (0.067)	Loss 5.6339 (5.5748)	Top-1 acc 5.469 (6.133)	Top-5 acc 16.797 (17.444)	lr 0.04626
Warmup Train [8][190/3239]	Time 0.200 (0.301)	Data 0.001 (0.063)	Loss 5.6758 (5.5744)	Top-1 acc 5.078 (6.090)	Top-5 acc 15.234 (17.435)	lr 0.04626
Warmup Train [8][200/3239]	Time 0.270 (0.297)	Data 0.001 (0.060)	Loss 5.6098 (5.5745)	Top-1 acc 5.859 (6.098)	Top-5 acc 16.797 (17.405)	lr 0.04625
Warmup Train [8][210/3239]	Time 0.321 (0.293)	Data 0.001 (0.058)	Loss 5.5574 (5.5732)	Top-1 acc 7.031 (6.117)	Top-5 acc 18.359 (17.486)	lr 0.04625
Warmup Train [8][220/3239]	Time 0.185 (0.290)	Data 0.001 (0.055)	Loss 5.5818 (5.5727)	Top-1 acc 6.250 (6.103)	Top-5 acc 18.750 (17.490)	lr 0.04625
Warmup Train [8][230/3239]	Time 0.221 (0.287)	Data 0.001 (0.053)	Loss 5.4828 (5.5720)	Top-1 acc 5.469 (6.098)	Top-5 acc 19.141 (17.516)	lr 0.04624
Warmup Train [8][240/3239]	Time 0.221 (0.284)	Data 0.002 (0.051)	Loss 5.6528 (5.5716)	Top-1 acc 4.297 (6.086)	Top-5 acc 14.062 (17.512)	lr 0.04624
Warmup Train [8][250/3239]	Time 0.183 (0.281)	Data 0.001 (0.049)	Loss 5.6002 (5.5722)	Top-1 acc 7.812 (6.107)	Top-5 acc 14.453 (17.507)	lr 0.04624
Warmup Train [8][260/3239]	Time 0.175 (0.279)	Data 0.001 (0.047)	Loss 5.6136 (5.5715)	Top-1 acc 4.688 (6.124)	Top-5 acc 18.750 (17.518)	lr 0.04623
Warmup Train [8][270/3239]	Time 0.197 (0.277)	Data 0.001 (0.046)	Loss 5.6320 (5.5705)	Top-1 acc 2.734 (6.099)	Top-5 acc 15.625 (17.529)	lr 0.04623
Warmup Train [8][280/3239]	Time 0.205 (0.275)	Data 0.001 (0.044)	Loss 5.4728 (5.5701)	Top-1 acc 5.469 (6.107)	Top-5 acc 20.312 (17.545)	lr 0.04623
Warmup Train [8][290/3239]	Time 0.203 (0.273)	Data 0.001 (0.043)	Loss 5.6113 (5.5705)	Top-1 acc 5.859 (6.105)	Top-5 acc 15.234 (17.526)	lr 0.04622
Warmup Train [8][300/3239]	Time 0.195 (0.271)	Data 0.001 (0.041)	Loss 5.5484 (5.5707)	Top-1 acc 4.688 (6.092)	Top-5 acc 19.531 (17.520)	lr 0.04622
Warmup Train [8][310/3239]	Time 0.148 (0.269)	Data 0.001 (0.040)	Loss 5.5500 (5.5707)	Top-1 acc 6.250 (6.083)	Top-5 acc 17.188 (17.507)	lr 0.04622
Warmup Train [8][320/3239]	Time 0.292 (0.268)	Data 0.001 (0.039)	Loss 5.5120 (5.5705)	Top-1 acc 8.594 (6.088)	Top-5 acc 19.141 (17.525)	lr 0.04621
Warmup Train [8][330/3239]	Time 0.208 (0.266)	Data 0.002 (0.038)	Loss 5.6903 (5.5711)	Top-1 acc 4.297 (6.080)	Top-5 acc 13.672 (17.499)	lr 0.04621
Warmup Train [8][340/3239]	Time 0.228 (0.265)	Data 0.001 (0.037)	Loss 5.6005 (5.5714)	Top-1 acc 4.688 (6.087)	Top-5 acc 15.625 (17.474)	lr 0.04621
Warmup Train [8][350/3239]	Time 0.229 (0.264)	Data 0.001 (0.036)	Loss 5.5371 (5.5719)	Top-1 acc 6.641 (6.084)	Top-5 acc 19.531 (17.475)	lr 0.04620
Warmup Train [8][360/3239]	Time 0.229 (0.262)	Data 0.001 (0.035)	Loss 5.5183 (5.5712)	Top-1 acc 6.641 (6.102)	Top-5 acc 19.531 (17.493)	lr 0.04620
Warmup Train [8][370/3239]	Time 0.238 (0.262)	Data 0.001 (0.034)	Loss 5.6348 (5.5701)	Top-1 acc 5.078 (6.111)	Top-5 acc 12.500 (17.510)	lr 0.04620
Warmup Train [8][380/3239]	Time 0.218 (0.260)	Data 0.001 (0.033)	Loss 5.5659 (5.5700)	Top-1 acc 5.859 (6.133)	Top-5 acc 20.312 (17.531)	lr 0.04619
Warmup Train [8][390/3239]	Time 0.235 (0.259)	Data 0.001 (0.032)	Loss 5.4996 (5.5702)	Top-1 acc 7.422 (6.137)	Top-5 acc 19.922 (17.535)	lr 0.04619
Warmup Train [8][400/3239]	Time 0.188 (0.259)	Data 0.002 (0.032)	Loss 5.5302 (5.5704)	Top-1 acc 6.250 (6.120)	Top-5 acc 17.578 (17.524)	lr 0.04619
Warmup Train [8][410/3239]	Time 0.187 (0.258)	Data 0.001 (0.031)	Loss 5.5323 (5.5699)	Top-1 acc 6.641 (6.132)	Top-5 acc 19.141 (17.535)	lr 0.04619
Warmup Train [8][420/3239]	Time 0.283 (0.257)	Data 0.001 (0.030)	Loss 5.4943 (5.5693)	Top-1 acc 8.203 (6.155)	Top-5 acc 21.484 (17.557)	lr 0.04618
Warmup Train [8][430/3239]	Time 0.191 (0.257)	Data 0.001 (0.030)	Loss 5.4744 (5.5691)	Top-1 acc 4.297 (6.159)	Top-5 acc 17.969 (17.564)	lr 0.04618
Warmup Train [8][440/3239]	Time 0.198 (0.256)	Data 0.001 (0.029)	Loss 5.5476 (5.5684)	Top-1 acc 6.641 (6.155)	Top-5 acc 19.922 (17.554)	lr 0.04618
Warmup Train [8][450/3239]	Time 0.178 (0.255)	Data 0.001 (0.029)	Loss 5.5755 (5.5691)	Top-1 acc 6.641 (6.137)	Top-5 acc 17.969 (17.550)	lr 0.04617
Warmup Train [8][460/3239]	Time 0.254 (0.255)	Data 0.001 (0.028)	Loss 5.4428 (5.5683)	Top-1 acc 8.203 (6.141)	Top-5 acc 19.922 (17.579)	lr 0.04617
Warmup Train [8][470/3239]	Time 0.216 (0.254)	Data 0.001 (0.028)	Loss 5.6022 (5.5683)	Top-1 acc 5.078 (6.140)	Top-5 acc 15.625 (17.567)	lr 0.04617
Warmup Train [8][480/3239]	Time 0.157 (0.253)	Data 0.001 (0.027)	Loss 5.5430 (5.5678)	Top-1 acc 3.906 (6.137)	Top-5 acc 18.750 (17.572)	lr 0.04616
Warmup Train [8][490/3239]	Time 0.236 (0.253)	Data 0.001 (0.027)	Loss 5.5572 (5.5676)	Top-1 acc 5.078 (6.143)	Top-5 acc 18.750 (17.573)	lr 0.04616
Warmup Train [8][500/3239]	Time 0.192 (0.252)	Data 0.001 (0.026)	Loss 5.5828 (5.5674)	Top-1 acc 7.031 (6.154)	Top-5 acc 18.359 (17.581)	lr 0.04616
Warmup Train [8][510/3239]	Time 0.288 (0.251)	Data 0.001 (0.026)	Loss 5.6627 (5.5670)	Top-1 acc 3.125 (6.167)	Top-5 acc 14.062 (17.596)	lr 0.04615
Warmup Train [8][520/3239]	Time 0.275 (0.250)	Data 0.001 (0.025)	Loss 5.5472 (5.5669)	Top-1 acc 3.516 (6.162)	Top-5 acc 16.016 (17.593)	lr 0.04615
Warmup Train [8][530/3239]	Time 0.169 (0.250)	Data 0.001 (0.025)	Loss 5.4653 (5.5664)	Top-1 acc 8.594 (6.170)	Top-5 acc 19.141 (17.610)	lr 0.04615
Warmup Train [8][540/3239]	Time 0.229 (0.249)	Data 0.001 (0.025)	Loss 5.4897 (5.5661)	Top-1 acc 5.469 (6.164)	Top-5 acc 20.312 (17.613)	lr 0.04614
Warmup Train [8][550/3239]	Time 0.211 (0.248)	Data 0.001 (0.024)	Loss 5.6357 (5.5659)	Top-1 acc 5.859 (6.160)	Top-5 acc 13.281 (17.618)	lr 0.04614
Warmup Train [8][560/3239]	Time 0.241 (0.248)	Data 0.002 (0.024)	Loss 5.6501 (5.5662)	Top-1 acc 5.469 (6.153)	Top-5 acc 17.188 (17.627)	lr 0.04614
Warmup Train [8][570/3239]	Time 0.136 (0.247)	Data 0.001 (0.023)	Loss 5.6540 (5.5654)	Top-1 acc 3.516 (6.174)	Top-5 acc 16.406 (17.638)	lr 0.04613
Warmup Train [8][580/3239]	Time 0.149 (0.247)	Data 0.001 (0.023)	Loss 5.4411 (5.5653)	Top-1 acc 5.859 (6.165)	Top-5 acc 16.797 (17.631)	lr 0.04613
Warmup Train [8][590/3239]	Time 0.209 (0.246)	Data 0.002 (0.023)	Loss 5.5328 (5.5659)	Top-1 acc 4.688 (6.154)	Top-5 acc 16.016 (17.626)	lr 0.04613
Warmup Train [8][600/3239]	Time 0.244 (0.246)	Data 0.001 (0.023)	Loss 5.5206 (5.5659)	Top-1 acc 7.812 (6.158)	Top-5 acc 19.141 (17.644)	lr 0.04612
Warmup Train [8][610/3239]	Time 0.152 (0.246)	Data 0.001 (0.022)	Loss 5.5729 (5.5655)	Top-1 acc 5.859 (6.155)	Top-5 acc 19.531 (17.669)	lr 0.04612
Warmup Train [8][620/3239]	Time 0.148 (0.245)	Data 0.001 (0.022)	Loss 5.6049 (5.5650)	Top-1 acc 6.250 (6.169)	Top-5 acc 16.406 (17.678)	lr 0.04612
Warmup Train [8][630/3239]	Time 0.338 (0.245)	Data 0.001 (0.022)	Loss 5.5517 (5.5649)	Top-1 acc 7.422 (6.174)	Top-5 acc 19.141 (17.682)	lr 0.04611
Warmup Train [8][640/3239]	Time 0.205 (0.245)	Data 0.001 (0.021)	Loss 5.5644 (5.5643)	Top-1 acc 5.469 (6.175)	Top-5 acc 17.969 (17.688)	lr 0.04611
Warmup Train [8][650/3239]	Time 0.233 (0.245)	Data 0.001 (0.021)	Loss 5.5541 (5.5638)	Top-1 acc 5.859 (6.170)	Top-5 acc 19.141 (17.701)	lr 0.04611
Warmup Train [8][660/3239]	Time 0.214 (0.244)	Data 0.001 (0.021)	Loss 5.4880 (5.5633)	Top-1 acc 5.078 (6.183)	Top-5 acc 19.922 (17.712)	lr 0.04610
Warmup Train [8][670/3239]	Time 0.203 (0.244)	Data 0.001 (0.020)	Loss 5.5574 (5.5636)	Top-1 acc 4.297 (6.181)	Top-5 acc 15.234 (17.704)	lr 0.04610
Warmup Train [8][680/3239]	Time 0.236 (0.244)	Data 0.001 (0.020)	Loss 5.4042 (5.5631)	Top-1 acc 7.812 (6.192)	Top-5 acc 19.922 (17.708)	lr 0.04610
Warmup Train [8][690/3239]	Time 0.225 (0.243)	Data 0.001 (0.020)	Loss 5.4654 (5.5628)	Top-1 acc 8.594 (6.190)	Top-5 acc 21.094 (17.709)	lr 0.04609
Warmup Train [8][700/3239]	Time 0.224 (0.243)	Data 0.002 (0.020)	Loss 5.5964 (5.5630)	Top-1 acc 6.641 (6.186)	Top-5 acc 14.453 (17.696)	lr 0.04609
Warmup Train [8][710/3239]	Time 0.236 (0.243)	Data 0.002 (0.019)	Loss 5.4937 (5.5630)	Top-1 acc 5.859 (6.191)	Top-5 acc 19.141 (17.698)	lr 0.04609
Warmup Train [8][720/3239]	Time 0.241 (0.242)	Data 0.001 (0.019)	Loss 5.5446 (5.5626)	Top-1 acc 3.516 (6.194)	Top-5 acc 16.406 (17.701)	lr 0.04608
Warmup Train [8][730/3239]	Time 0.350 (0.242)	Data 0.001 (0.019)	Loss 5.4812 (5.5622)	Top-1 acc 6.250 (6.204)	Top-5 acc 19.141 (17.731)	lr 0.04608
Warmup Train [8][740/3239]	Time 0.448 (0.242)	Data 0.001 (0.019)	Loss 5.5910 (5.5616)	Top-1 acc 4.297 (6.212)	Top-5 acc 16.016 (17.751)	lr 0.04608
Warmup Train [8][750/3239]	Time 0.208 (0.242)	Data 0.001 (0.018)	Loss 5.6748 (5.5612)	Top-1 acc 4.688 (6.222)	Top-5 acc 16.016 (17.759)	lr 0.04608
Warmup Train [8][760/3239]	Time 0.180 (0.241)	Data 0.001 (0.018)	Loss 5.5030 (5.5610)	Top-1 acc 5.859 (6.222)	Top-5 acc 19.531 (17.763)	lr 0.04607
Warmup Train [8][770/3239]	Time 0.191 (0.241)	Data 0.001 (0.018)	Loss 5.5345 (5.5612)	Top-1 acc 7.031 (6.226)	Top-5 acc 20.312 (17.769)	lr 0.04607
Warmup Train [8][780/3239]	Time 0.144 (0.240)	Data 0.001 (0.018)	Loss 5.7688 (5.5611)	Top-1 acc 5.078 (6.234)	Top-5 acc 15.625 (17.784)	lr 0.04607
Warmup Train [8][790/3239]	Time 0.215 (0.240)	Data 0.001 (0.018)	Loss 5.5629 (5.5613)	Top-1 acc 5.078 (6.233)	Top-5 acc 19.531 (17.784)	lr 0.04606
Warmup Train [8][800/3239]	Time 0.207 (0.240)	Data 0.001 (0.017)	Loss 5.4266 (5.5607)	Top-1 acc 7.031 (6.234)	Top-5 acc 23.828 (17.796)	lr 0.04606
Warmup Train [8][810/3239]	Time 0.277 (0.240)	Data 0.001 (0.017)	Loss 5.6375 (5.5608)	Top-1 acc 3.125 (6.232)	Top-5 acc 13.672 (17.789)	lr 0.04606
Warmup Train [8][820/3239]	Time 0.175 (0.240)	Data 0.001 (0.017)	Loss 5.4579 (5.5597)	Top-1 acc 5.859 (6.255)	Top-5 acc 18.359 (17.828)	lr 0.04605
Warmup Train [8][830/3239]	Time 0.188 (0.239)	Data 0.001 (0.017)	Loss 5.4836 (5.5594)	Top-1 acc 7.031 (6.259)	Top-5 acc 20.312 (17.837)	lr 0.04605
Warmup Train [8][840/3239]	Time 0.339 (0.239)	Data 0.001 (0.017)	Loss 5.4295 (5.5590)	Top-1 acc 4.688 (6.257)	Top-5 acc 19.922 (17.840)	lr 0.04605
Warmup Train [8][850/3239]	Time 0.128 (0.239)	Data 0.001 (0.017)	Loss 5.5326 (5.5582)	Top-1 acc 5.469 (6.263)	Top-5 acc 17.578 (17.859)	lr 0.04604
Warmup Train [8][860/3239]	Time 0.238 (0.239)	Data 0.002 (0.017)	Loss 5.5751 (5.5577)	Top-1 acc 5.469 (6.270)	Top-5 acc 16.406 (17.872)	lr 0.04604
Warmup Train [8][870/3239]	Time 0.159 (0.239)	Data 0.001 (0.016)	Loss 5.5139 (5.5574)	Top-1 acc 5.859 (6.271)	Top-5 acc 17.188 (17.871)	lr 0.04604
Warmup Train [8][880/3239]	Time 0.159 (0.238)	Data 0.001 (0.016)	Loss 5.6499 (5.5572)	Top-1 acc 5.859 (6.275)	Top-5 acc 17.578 (17.878)	lr 0.04603
Warmup Train [8][890/3239]	Time 0.198 (0.238)	Data 0.001 (0.016)	Loss 5.4460 (5.5564)	Top-1 acc 8.594 (6.284)	Top-5 acc 22.656 (17.899)	lr 0.04603
Warmup Train [8][900/3239]	Time 0.150 (0.238)	Data 0.001 (0.016)	Loss 5.5138 (5.5561)	Top-1 acc 6.250 (6.294)	Top-5 acc 19.531 (17.907)	lr 0.04603
Warmup Train [8][910/3239]	Time 0.223 (0.238)	Data 0.001 (0.016)	Loss 5.4595 (5.5555)	Top-1 acc 6.641 (6.299)	Top-5 acc 19.141 (17.917)	lr 0.04602
Warmup Train [8][920/3239]	Time 0.277 (0.238)	Data 0.001 (0.016)	Loss 5.5990 (5.5554)	Top-1 acc 5.469 (6.302)	Top-5 acc 18.750 (17.921)	lr 0.04602
Warmup Train [8][930/3239]	Time 0.219 (0.238)	Data 0.001 (0.016)	Loss 5.4891 (5.5549)	Top-1 acc 8.203 (6.305)	Top-5 acc 21.094 (17.939)	lr 0.04602
Warmup Train [8][940/3239]	Time 0.346 (0.238)	Data 0.002 (0.016)	Loss 5.4990 (5.5547)	Top-1 acc 6.250 (6.304)	Top-5 acc 20.312 (17.950)	lr 0.04601
Warmup Train [8][950/3239]	Time 0.272 (0.238)	Data 0.001 (0.015)	Loss 5.5939 (5.5542)	Top-1 acc 8.203 (6.310)	Top-5 acc 17.969 (17.965)	lr 0.04601
Warmup Train [8][960/3239]	Time 0.281 (0.238)	Data 0.002 (0.015)	Loss 5.4523 (5.5544)	Top-1 acc 8.594 (6.309)	Top-5 acc 20.703 (17.963)	lr 0.04601
Warmup Train [8][970/3239]	Time 0.217 (0.237)	Data 0.001 (0.015)	Loss 5.4707 (5.5544)	Top-1 acc 5.469 (6.308)	Top-5 acc 21.484 (17.966)	lr 0.04600
Warmup Train [8][980/3239]	Time 0.223 (0.237)	Data 0.001 (0.015)	Loss 5.6219 (5.5544)	Top-1 acc 5.859 (6.310)	Top-5 acc 13.281 (17.960)	lr 0.04600
Warmup Train [8][990/3239]	Time 0.313 (0.237)	Data 0.001 (0.015)	Loss 5.4569 (5.5545)	Top-1 acc 6.641 (6.309)	Top-5 acc 22.266 (17.964)	lr 0.04600
Warmup Train [8][1000/3239]	Time 0.223 (0.237)	Data 0.001 (0.015)	Loss 5.4532 (5.5543)	Top-1 acc 6.250 (6.312)	Top-5 acc 19.141 (17.961)	lr 0.04599
Warmup Train [8][1010/3239]	Time 0.234 (0.237)	Data 0.002 (0.015)	Loss 5.5677 (5.5541)	Top-1 acc 5.859 (6.311)	Top-5 acc 21.094 (17.968)	lr 0.04599
Warmup Train [8][1020/3239]	Time 0.160 (0.237)	Data 0.001 (0.015)	Loss 5.5212 (5.5538)	Top-1 acc 5.859 (6.310)	Top-5 acc 19.922 (17.978)	lr 0.04599
Warmup Train [8][1030/3239]	Time 0.245 (0.237)	Data 0.001 (0.014)	Loss 5.3676 (5.5535)	Top-1 acc 8.594 (6.309)	Top-5 acc 21.875 (17.980)	lr 0.04598
Warmup Train [8][1040/3239]	Time 0.254 (0.237)	Data 0.001 (0.014)	Loss 5.5034 (5.5539)	Top-1 acc 4.688 (6.300)	Top-5 acc 20.312 (17.973)	lr 0.04598
Warmup Train [8][1050/3239]	Time 0.373 (0.237)	Data 0.001 (0.014)	Loss 5.4910 (5.5540)	Top-1 acc 4.297 (6.300)	Top-5 acc 17.578 (17.974)	lr 0.04598
Warmup Train [8][1060/3239]	Time 0.229 (0.237)	Data 0.002 (0.014)	Loss 5.5768 (5.5537)	Top-1 acc 5.078 (6.307)	Top-5 acc 14.844 (17.983)	lr 0.04597
Warmup Train [8][1070/3239]	Time 0.260 (0.236)	Data 0.001 (0.014)	Loss 5.4004 (5.5533)	Top-1 acc 5.859 (6.308)	Top-5 acc 20.703 (17.994)	lr 0.04597
Warmup Train [8][1080/3239]	Time 0.218 (0.236)	Data 0.001 (0.014)	Loss 5.5222 (5.5527)	Top-1 acc 8.984 (6.312)	Top-5 acc 21.484 (18.007)	lr 0.04597
Warmup Train [8][1090/3239]	Time 0.138 (0.236)	Data 0.001 (0.014)	Loss 5.7175 (5.5527)	Top-1 acc 4.688 (6.311)	Top-5 acc 15.234 (18.003)	lr 0.04596
Warmup Train [8][1100/3239]	Time 0.241 (0.236)	Data 0.001 (0.014)	Loss 5.4881 (5.5524)	Top-1 acc 9.375 (6.319)	Top-5 acc 19.531 (18.005)	lr 0.04596
Warmup Train [8][1110/3239]	Time 0.225 (0.236)	Data 0.001 (0.014)	Loss 5.6118 (5.5524)	Top-1 acc 5.078 (6.316)	Top-5 acc 17.969 (18.010)	lr 0.04596
Warmup Train [8][1120/3239]	Time 0.166 (0.236)	Data 0.001 (0.013)	Loss 5.5788 (5.5520)	Top-1 acc 5.078 (6.313)	Top-5 acc 17.188 (18.020)	lr 0.04595
Warmup Train [8][1130/3239]	Time 0.197 (0.236)	Data 0.001 (0.013)	Loss 5.7043 (5.5524)	Top-1 acc 6.250 (6.316)	Top-5 acc 17.188 (18.024)	lr 0.04595
Warmup Train [8][1140/3239]	Time 0.170 (0.236)	Data 0.002 (0.013)	Loss 5.4342 (5.5520)	Top-1 acc 6.641 (6.323)	Top-5 acc 22.656 (18.039)	lr 0.04595
Warmup Train [8][1150/3239]	Time 0.398 (0.236)	Data 0.001 (0.013)	Loss 5.6958 (5.5519)	Top-1 acc 5.469 (6.334)	Top-5 acc 15.234 (18.042)	lr 0.04594
Warmup Train [8][1160/3239]	Time 0.190 (0.235)	Data 0.001 (0.013)	Loss 5.6202 (5.5520)	Top-1 acc 4.688 (6.325)	Top-5 acc 14.062 (18.036)	lr 0.04594
Warmup Train [8][1170/3239]	Time 0.171 (0.235)	Data 0.001 (0.013)	Loss 5.5361 (5.5520)	Top-1 acc 6.641 (6.332)	Top-5 acc 19.141 (18.034)	lr 0.04594
Warmup Train [8][1180/3239]	Time 0.181 (0.235)	Data 0.001 (0.013)	Loss 5.5049 (5.5520)	Top-1 acc 7.031 (6.327)	Top-5 acc 21.484 (18.033)	lr 0.04593
Warmup Train [8][1190/3239]	Time 0.266 (0.235)	Data 0.001 (0.013)	Loss 5.5131 (5.5517)	Top-1 acc 7.422 (6.329)	Top-5 acc 19.141 (18.035)	lr 0.04593
Warmup Train [8][1200/3239]	Time 0.185 (0.235)	Data 0.001 (0.013)	Loss 5.4147 (5.5514)	Top-1 acc 4.297 (6.338)	Top-5 acc 19.531 (18.046)	lr 0.04593
Warmup Train [8][1210/3239]	Time 0.147 (0.235)	Data 0.001 (0.013)	Loss 5.6581 (5.5514)	Top-1 acc 5.859 (6.338)	Top-5 acc 15.625 (18.045)	lr 0.04592
Warmup Train [8][1220/3239]	Time 0.236 (0.235)	Data 0.001 (0.013)	Loss 5.5609 (5.5512)	Top-1 acc 3.906 (6.334)	Top-5 acc 17.969 (18.046)	lr 0.04592
Warmup Train [8][1230/3239]	Time 0.270 (0.235)	Data 0.001 (0.013)	Loss 5.3930 (5.5511)	Top-1 acc 9.375 (6.330)	Top-5 acc 22.266 (18.047)	lr 0.04592
Warmup Train [8][1240/3239]	Time 0.389 (0.235)	Data 0.001 (0.013)	Loss 5.3753 (5.5508)	Top-1 acc 12.891 (6.335)	Top-5 acc 24.219 (18.059)	lr 0.04591
Warmup Train [8][1250/3239]	Time 0.275 (0.235)	Data 0.002 (0.012)	Loss 5.5263 (5.5508)	Top-1 acc 5.078 (6.336)	Top-5 acc 17.969 (18.055)	lr 0.04591
Warmup Train [8][1260/3239]	Time 0.161 (0.235)	Data 0.003 (0.012)	Loss 5.5053 (5.5503)	Top-1 acc 8.594 (6.344)	Top-5 acc 18.359 (18.057)	lr 0.04591
Warmup Train [8][1270/3239]	Time 0.162 (0.234)	Data 0.001 (0.012)	Loss 5.6806 (5.5503)	Top-1 acc 5.078 (6.343)	Top-5 acc 16.797 (18.056)	lr 0.04590
Warmup Train [8][1280/3239]	Time 0.239 (0.234)	Data 0.001 (0.012)	Loss 5.2355 (5.5498)	Top-1 acc 10.547 (6.355)	Top-5 acc 26.562 (18.069)	lr 0.04590
Warmup Train [8][1290/3239]	Time 0.227 (0.234)	Data 0.001 (0.012)	Loss 5.3539 (5.5494)	Top-1 acc 8.203 (6.365)	Top-5 acc 21.875 (18.085)	lr 0.04590
Warmup Train [8][1300/3239]	Time 0.212 (0.234)	Data 0.001 (0.012)	Loss 5.5346 (5.5493)	Top-1 acc 7.031 (6.370)	Top-5 acc 17.188 (18.089)	lr 0.04589
Warmup Train [8][1310/3239]	Time 0.246 (0.234)	Data 0.001 (0.012)	Loss 5.5548 (5.5493)	Top-1 acc 4.297 (6.369)	Top-5 acc 17.578 (18.089)	lr 0.04589
Warmup Train [8][1320/3239]	Time 0.186 (0.234)	Data 0.002 (0.012)	Loss 5.6224 (5.5492)	Top-1 acc 6.250 (6.371)	Top-5 acc 15.625 (18.086)	lr 0.04589
Warmup Train [8][1330/3239]	Time 0.299 (0.234)	Data 0.001 (0.012)	Loss 5.4224 (5.5489)	Top-1 acc 7.812 (6.372)	Top-5 acc 17.969 (18.090)	lr 0.04588
Warmup Train [8][1340/3239]	Time 0.324 (0.234)	Data 0.003 (0.012)	Loss 5.5314 (5.5488)	Top-1 acc 7.031 (6.371)	Top-5 acc 17.969 (18.098)	lr 0.04588
Warmup Train [8][1350/3239]	Time 0.275 (0.234)	Data 0.001 (0.012)	Loss 5.3942 (5.5484)	Top-1 acc 6.250 (6.371)	Top-5 acc 18.750 (18.105)	lr 0.04588
Warmup Train [8][1360/3239]	Time 0.253 (0.234)	Data 0.002 (0.012)	Loss 5.5595 (5.5481)	Top-1 acc 4.297 (6.371)	Top-5 acc 16.797 (18.103)	lr 0.04587
Warmup Train [8][1370/3239]	Time 0.229 (0.234)	Data 0.001 (0.012)	Loss 5.5957 (5.5480)	Top-1 acc 5.078 (6.376)	Top-5 acc 16.406 (18.109)	lr 0.04587
Warmup Train [8][1380/3239]	Time 0.153 (0.234)	Data 0.001 (0.011)	Loss 5.5136 (5.5476)	Top-1 acc 5.078 (6.383)	Top-5 acc 20.312 (18.120)	lr 0.04587
Warmup Train [8][1390/3239]	Time 0.176 (0.234)	Data 0.001 (0.011)	Loss 5.5945 (5.5477)	Top-1 acc 5.859 (6.383)	Top-5 acc 15.625 (18.121)	lr 0.04586
Warmup Train [8][1400/3239]	Time 0.182 (0.234)	Data 0.001 (0.011)	Loss 5.5764 (5.5474)	Top-1 acc 7.422 (6.386)	Top-5 acc 19.531 (18.129)	lr 0.04586
Warmup Train [8][1410/3239]	Time 0.144 (0.234)	Data 0.001 (0.011)	Loss 5.4764 (5.5469)	Top-1 acc 5.078 (6.386)	Top-5 acc 18.750 (18.138)	lr 0.04586
Warmup Train [8][1420/3239]	Time 0.223 (0.234)	Data 0.001 (0.011)	Loss 5.4214 (5.5466)	Top-1 acc 7.031 (6.384)	Top-5 acc 20.312 (18.150)	lr 0.04585
Warmup Train [8][1430/3239]	Time 0.210 (0.234)	Data 0.002 (0.011)	Loss 5.6130 (5.5465)	Top-1 acc 6.641 (6.388)	Top-5 acc 17.578 (18.151)	lr 0.04585
Warmup Train [8][1440/3239]	Time 0.266 (0.234)	Data 0.002 (0.011)	Loss 5.5333 (5.5464)	Top-1 acc 6.250 (6.392)	Top-5 acc 17.578 (18.155)	lr 0.04585
Warmup Train [8][1450/3239]	Time 0.281 (0.234)	Data 0.002 (0.011)	Loss 5.5318 (5.5460)	Top-1 acc 6.641 (6.400)	Top-5 acc 20.312 (18.173)	lr 0.04584
Warmup Train [8][1460/3239]	Time 0.191 (0.234)	Data 0.001 (0.011)	Loss 5.6708 (5.5457)	Top-1 acc 5.078 (6.398)	Top-5 acc 13.672 (18.183)	lr 0.04584
Warmup Train [8][1470/3239]	Time 0.217 (0.234)	Data 0.001 (0.011)	Loss 5.4542 (5.5454)	Top-1 acc 7.422 (6.401)	Top-5 acc 21.094 (18.188)	lr 0.04584
Warmup Train [8][1480/3239]	Time 0.249 (0.234)	Data 0.001 (0.011)	Loss 5.5744 (5.5455)	Top-1 acc 4.297 (6.399)	Top-5 acc 20.703 (18.192)	lr 0.04583
Warmup Train [8][1490/3239]	Time 0.244 (0.234)	Data 0.001 (0.011)	Loss 5.4769 (5.5454)	Top-1 acc 7.031 (6.400)	Top-5 acc 20.312 (18.190)	lr 0.04583
Warmup Train [8][1500/3239]	Time 0.287 (0.234)	Data 0.001 (0.011)	Loss 5.4558 (5.5451)	Top-1 acc 9.375 (6.400)	Top-5 acc 21.094 (18.195)	lr 0.04583
Warmup Train [8][1510/3239]	Time 0.213 (0.233)	Data 0.001 (0.011)	Loss 5.4848 (5.5448)	Top-1 acc 7.031 (6.406)	Top-5 acc 18.359 (18.207)	lr 0.04582
Warmup Train [8][1520/3239]	Time 0.331 (0.233)	Data 0.001 (0.011)	Loss 5.5227 (5.5449)	Top-1 acc 7.422 (6.404)	Top-5 acc 18.750 (18.208)	lr 0.04582
Warmup Train [8][1530/3239]	Time 0.144 (0.233)	Data 0.001 (0.011)	Loss 5.3002 (5.5446)	Top-1 acc 8.594 (6.411)	Top-5 acc 23.828 (18.214)	lr 0.04582
Warmup Train [8][1540/3239]	Time 0.277 (0.233)	Data 0.001 (0.011)	Loss 5.5436 (5.5445)	Top-1 acc 7.812 (6.413)	Top-5 acc 18.750 (18.216)	lr 0.04581
Warmup Train [8][1550/3239]	Time 0.177 (0.233)	Data 0.001 (0.011)	Loss 5.5511 (5.5445)	Top-1 acc 6.250 (6.418)	Top-5 acc 18.359 (18.219)	lr 0.04581
Warmup Train [8][1560/3239]	Time 0.142 (0.233)	Data 0.001 (0.011)	Loss 5.5310 (5.5441)	Top-1 acc 7.812 (6.419)	Top-5 acc 21.875 (18.228)	lr 0.04581
Warmup Train [8][1570/3239]	Time 0.266 (0.233)	Data 0.001 (0.011)	Loss 5.5047 (5.5438)	Top-1 acc 5.859 (6.420)	Top-5 acc 21.094 (18.232)	lr 0.04580
Warmup Train [8][1580/3239]	Time 0.196 (0.233)	Data 0.002 (0.011)	Loss 5.6336 (5.5437)	Top-1 acc 4.297 (6.417)	Top-5 acc 16.016 (18.231)	lr 0.04580
Warmup Train [8][1590/3239]	Time 0.219 (0.233)	Data 0.002 (0.010)	Loss 5.4972 (5.5435)	Top-1 acc 6.641 (6.420)	Top-5 acc 21.875 (18.241)	lr 0.04580
Warmup Train [8][1600/3239]	Time 0.179 (0.233)	Data 0.001 (0.010)	Loss 5.4708 (5.5433)	Top-1 acc 6.250 (6.422)	Top-5 acc 16.797 (18.241)	lr 0.04579
Warmup Train [8][1610/3239]	Time 0.265 (0.233)	Data 0.002 (0.010)	Loss 5.5087 (5.5433)	Top-1 acc 6.250 (6.420)	Top-5 acc 18.750 (18.242)	lr 0.04579
Warmup Train [8][1620/3239]	Time 0.167 (0.233)	Data 0.001 (0.010)	Loss 5.6267 (5.5430)	Top-1 acc 3.906 (6.419)	Top-5 acc 14.844 (18.245)	lr 0.04579
Warmup Train [8][1630/3239]	Time 0.215 (0.233)	Data 0.001 (0.010)	Loss 5.5683 (5.5429)	Top-1 acc 5.469 (6.426)	Top-5 acc 16.016 (18.252)	lr 0.04578
Warmup Train [8][1640/3239]	Time 0.304 (0.233)	Data 0.001 (0.010)	Loss 5.3866 (5.5427)	Top-1 acc 7.422 (6.430)	Top-5 acc 21.094 (18.253)	lr 0.04578
Warmup Train [8][1650/3239]	Time 0.305 (0.233)	Data 0.001 (0.010)	Loss 5.4323 (5.5427)	Top-1 acc 10.156 (6.431)	Top-5 acc 18.750 (18.249)	lr 0.04578
Warmup Train [8][1660/3239]	Time 0.184 (0.233)	Data 0.002 (0.010)	Loss 5.3144 (5.5426)	Top-1 acc 8.594 (6.432)	Top-5 acc 26.172 (18.259)	lr 0.04577
Warmup Train [8][1670/3239]	Time 0.209 (0.233)	Data 0.002 (0.010)	Loss 5.4948 (5.5424)	Top-1 acc 4.688 (6.434)	Top-5 acc 19.531 (18.266)	lr 0.04577
Warmup Train [8][1680/3239]	Time 0.223 (0.233)	Data 0.002 (0.010)	Loss 5.5580 (5.5423)	Top-1 acc 6.250 (6.432)	Top-5 acc 17.969 (18.267)	lr 0.04577
Warmup Train [8][1690/3239]	Time 0.196 (0.233)	Data 0.001 (0.010)	Loss 5.5453 (5.5420)	Top-1 acc 5.078 (6.434)	Top-5 acc 15.234 (18.275)	lr 0.04576
Warmup Train [8][1700/3239]	Time 0.222 (0.233)	Data 0.001 (0.010)	Loss 5.5198 (5.5418)	Top-1 acc 5.469 (6.431)	Top-5 acc 15.625 (18.282)	lr 0.04576
Warmup Train [8][1710/3239]	Time 0.201 (0.232)	Data 0.001 (0.010)	Loss 5.4171 (5.5419)	Top-1 acc 8.203 (6.434)	Top-5 acc 22.266 (18.284)	lr 0.04576
Warmup Train [8][1720/3239]	Time 0.192 (0.232)	Data 0.001 (0.010)	Loss 5.4768 (5.5420)	Top-1 acc 7.812 (6.434)	Top-5 acc 21.484 (18.287)	lr 0.04575
Warmup Train [8][1730/3239]	Time 0.180 (0.232)	Data 0.001 (0.010)	Loss 5.4692 (5.5416)	Top-1 acc 2.734 (6.431)	Top-5 acc 14.844 (18.285)	lr 0.04575
Warmup Train [8][1740/3239]	Time 0.207 (0.232)	Data 0.001 (0.010)	Loss 5.5526 (5.5414)	Top-1 acc 6.641 (6.434)	Top-5 acc 20.703 (18.295)	lr 0.04575
Warmup Train [8][1750/3239]	Time 0.244 (0.232)	Data 0.001 (0.010)	Loss 5.4976 (5.5413)	Top-1 acc 4.297 (6.432)	Top-5 acc 19.531 (18.298)	lr 0.04574
Warmup Train [8][1760/3239]	Time 0.256 (0.232)	Data 0.001 (0.010)	Loss 5.4477 (5.5413)	Top-1 acc 8.594 (6.431)	Top-5 acc 20.703 (18.297)	lr 0.04574
Warmup Train [8][1770/3239]	Time 0.342 (0.232)	Data 0.002 (0.010)	Loss 5.5860 (5.5411)	Top-1 acc 7.031 (6.438)	Top-5 acc 17.188 (18.309)	lr 0.04574
Warmup Train [8][1780/3239]	Time 0.191 (0.232)	Data 0.001 (0.010)	Loss 5.4615 (5.5409)	Top-1 acc 7.422 (6.443)	Top-5 acc 23.047 (18.315)	lr 0.04573
Warmup Train [8][1790/3239]	Time 0.186 (0.232)	Data 0.001 (0.010)	Loss 5.4239 (5.5406)	Top-1 acc 10.547 (6.443)	Top-5 acc 21.875 (18.322)	lr 0.04573
Warmup Train [8][1800/3239]	Time 0.220 (0.232)	Data 0.001 (0.010)	Loss 5.5010 (5.5404)	Top-1 acc 7.812 (6.447)	Top-5 acc 19.531 (18.325)	lr 0.04573
Warmup Train [8][1810/3239]	Time 0.213 (0.232)	Data 0.001 (0.010)	Loss 5.4816 (5.5404)	Top-1 acc 9.766 (6.453)	Top-5 acc 21.875 (18.326)	lr 0.04572
Warmup Train [8][1820/3239]	Time 0.241 (0.232)	Data 0.001 (0.010)	Loss 5.4976 (5.5402)	Top-1 acc 7.812 (6.458)	Top-5 acc 21.484 (18.331)	lr 0.04572
Warmup Train [8][1830/3239]	Time 0.253 (0.232)	Data 0.002 (0.009)	Loss 5.5008 (5.5399)	Top-1 acc 7.422 (6.461)	Top-5 acc 17.188 (18.334)	lr 0.04572
Warmup Train [8][1840/3239]	Time 0.190 (0.232)	Data 0.001 (0.009)	Loss 5.5325 (5.5396)	Top-1 acc 6.250 (6.470)	Top-5 acc 19.531 (18.344)	lr 0.04571
Warmup Train [8][1850/3239]	Time 0.235 (0.232)	Data 0.001 (0.009)	Loss 5.4797 (5.5393)	Top-1 acc 7.422 (6.471)	Top-5 acc 22.266 (18.347)	lr 0.04571
Warmup Train [8][1860/3239]	Time 0.200 (0.232)	Data 0.001 (0.009)	Loss 5.4077 (5.5393)	Top-1 acc 7.812 (6.472)	Top-5 acc 20.703 (18.348)	lr 0.04571
Warmup Train [8][1870/3239]	Time 0.268 (0.232)	Data 0.001 (0.009)	Loss 5.6364 (5.5392)	Top-1 acc 6.641 (6.472)	Top-5 acc 19.922 (18.348)	lr 0.04570
Warmup Train [8][1880/3239]	Time 0.302 (0.232)	Data 0.001 (0.009)	Loss 5.6535 (5.5387)	Top-1 acc 5.078 (6.477)	Top-5 acc 14.062 (18.359)	lr 0.04570
Warmup Train [8][1890/3239]	Time 0.227 (0.232)	Data 0.001 (0.009)	Loss 5.4783 (5.5387)	Top-1 acc 8.203 (6.479)	Top-5 acc 20.312 (18.358)	lr 0.04570
Warmup Train [8][1900/3239]	Time 0.172 (0.232)	Data 0.001 (0.009)	Loss 5.5819 (5.5387)	Top-1 acc 5.859 (6.480)	Top-5 acc 17.969 (18.361)	lr 0.04569
Warmup Train [8][1910/3239]	Time 0.193 (0.232)	Data 0.001 (0.009)	Loss 5.5362 (5.5386)	Top-1 acc 7.031 (6.480)	Top-5 acc 21.875 (18.364)	lr 0.04569
Warmup Train [8][1920/3239]	Time 0.225 (0.232)	Data 0.001 (0.009)	Loss 5.4015 (5.5383)	Top-1 acc 7.812 (6.481)	Top-5 acc 20.703 (18.367)	lr 0.04569
Warmup Train [8][1930/3239]	Time 0.164 (0.232)	Data 0.001 (0.009)	Loss 5.4794 (5.5379)	Top-1 acc 5.469 (6.488)	Top-5 acc 16.797 (18.371)	lr 0.04568
Warmup Train [8][1940/3239]	Time 0.160 (0.232)	Data 0.001 (0.009)	Loss 5.5810 (5.5378)	Top-1 acc 3.906 (6.488)	Top-5 acc 14.844 (18.373)	lr 0.04568
Warmup Train [8][1950/3239]	Time 0.184 (0.232)	Data 0.001 (0.009)	Loss 5.4882 (5.5379)	Top-1 acc 9.766 (6.492)	Top-5 acc 22.266 (18.375)	lr 0.04567
Warmup Train [8][1960/3239]	Time 0.216 (0.232)	Data 0.001 (0.009)	Loss 5.6030 (5.5377)	Top-1 acc 4.688 (6.493)	Top-5 acc 17.578 (18.379)	lr 0.04567
Warmup Train [8][1970/3239]	Time 0.380 (0.232)	Data 0.001 (0.009)	Loss 5.4530 (5.5373)	Top-1 acc 7.031 (6.495)	Top-5 acc 18.359 (18.385)	lr 0.04567
Warmup Train [8][1980/3239]	Time 0.203 (0.232)	Data 0.001 (0.009)	Loss 5.6085 (5.5370)	Top-1 acc 7.422 (6.496)	Top-5 acc 16.406 (18.388)	lr 0.04566
Warmup Train [8][1990/3239]	Time 0.252 (0.232)	Data 0.001 (0.009)	Loss 5.4811 (5.5369)	Top-1 acc 7.422 (6.497)	Top-5 acc 16.406 (18.393)	lr 0.04566
Warmup Train [8][2000/3239]	Time 0.237 (0.232)	Data 0.002 (0.009)	Loss 5.4411 (5.5369)	Top-1 acc 8.594 (6.496)	Top-5 acc 23.438 (18.396)	lr 0.04566
Warmup Train [8][2010/3239]	Time 0.251 (0.232)	Data 0.001 (0.009)	Loss 5.4149 (5.5367)	Top-1 acc 7.812 (6.501)	Top-5 acc 23.438 (18.405)	lr 0.04565
Warmup Train [8][2020/3239]	Time 0.291 (0.232)	Data 0.001 (0.009)	Loss 5.4678 (5.5366)	Top-1 acc 7.812 (6.505)	Top-5 acc 17.578 (18.404)	lr 0.04565
Warmup Train [8][2030/3239]	Time 0.209 (0.232)	Data 0.001 (0.009)	Loss 5.4486 (5.5364)	Top-1 acc 8.594 (6.507)	Top-5 acc 22.266 (18.410)	lr 0.04565
Warmup Train [8][2040/3239]	Time 0.253 (0.232)	Data 0.002 (0.009)	Loss 5.4821 (5.5364)	Top-1 acc 8.594 (6.507)	Top-5 acc 22.656 (18.413)	lr 0.04564
Warmup Train [8][2050/3239]	Time 0.195 (0.232)	Data 0.001 (0.009)	Loss 5.3830 (5.5363)	Top-1 acc 5.859 (6.506)	Top-5 acc 20.703 (18.410)	lr 0.04564
Warmup Train [8][2060/3239]	Time 0.190 (0.232)	Data 0.001 (0.009)	Loss 5.5233 (5.5360)	Top-1 acc 5.469 (6.507)	Top-5 acc 19.531 (18.418)	lr 0.04564
Warmup Train [8][2070/3239]	Time 0.218 (0.232)	Data 0.001 (0.009)	Loss 5.4540 (5.5358)	Top-1 acc 5.469 (6.511)	Top-5 acc 17.188 (18.424)	lr 0.04563
Warmup Train [8][2080/3239]	Time 0.311 (0.232)	Data 0.001 (0.009)	Loss 5.4682 (5.5357)	Top-1 acc 5.859 (6.512)	Top-5 acc 19.141 (18.419)	lr 0.04563
Warmup Train [8][2090/3239]	Time 0.258 (0.231)	Data 0.032 (0.009)	Loss 5.4969 (5.5355)	Top-1 acc 6.641 (6.513)	Top-5 acc 15.234 (18.424)	lr 0.04563
Warmup Train [8][2100/3239]	Time 0.268 (0.231)	Data 0.001 (0.009)	Loss 5.4251 (5.5352)	Top-1 acc 9.375 (6.518)	Top-5 acc 22.656 (18.433)	lr 0.04562
Warmup Train [8][2110/3239]	Time 0.187 (0.231)	Data 0.001 (0.009)	Loss 5.5686 (5.5350)	Top-1 acc 6.641 (6.519)	Top-5 acc 19.531 (18.437)	lr 0.04562
Warmup Train [8][2120/3239]	Time 0.201 (0.231)	Data 0.001 (0.009)	Loss 5.5136 (5.5348)	Top-1 acc 7.031 (6.523)	Top-5 acc 19.531 (18.446)	lr 0.04562
Warmup Train [8][2130/3239]	Time 0.259 (0.231)	Data 0.001 (0.009)	Loss 5.6477 (5.5348)	Top-1 acc 5.078 (6.526)	Top-5 acc 15.234 (18.454)	lr 0.04561
Warmup Train [8][2140/3239]	Time 0.250 (0.231)	Data 0.001 (0.009)	Loss 5.4615 (5.5348)	Top-1 acc 4.688 (6.528)	Top-5 acc 17.188 (18.455)	lr 0.04561
Warmup Train [8][2150/3239]	Time 0.241 (0.231)	Data 0.001 (0.008)	Loss 5.5457 (5.5349)	Top-1 acc 6.250 (6.526)	Top-5 acc 17.188 (18.450)	lr 0.04561
Warmup Train [8][2160/3239]	Time 0.215 (0.231)	Data 0.001 (0.008)	Loss 5.5240 (5.5347)	Top-1 acc 6.250 (6.528)	Top-5 acc 14.453 (18.451)	lr 0.04560
Warmup Train [8][2170/3239]	Time 0.318 (0.231)	Data 0.001 (0.008)	Loss 5.4844 (5.5346)	Top-1 acc 7.812 (6.529)	Top-5 acc 18.750 (18.456)	lr 0.04560
Warmup Train [8][2180/3239]	Time 0.327 (0.231)	Data 0.001 (0.008)	Loss 5.5293 (5.5346)	Top-1 acc 5.859 (6.528)	Top-5 acc 15.625 (18.454)	lr 0.04560
Warmup Train [8][2190/3239]	Time 0.212 (0.231)	Data 0.001 (0.008)	Loss 5.4340 (5.5344)	Top-1 acc 7.422 (6.532)	Top-5 acc 23.438 (18.462)	lr 0.04559
Warmup Train [8][2200/3239]	Time 0.284 (0.231)	Data 0.002 (0.008)	Loss 5.4364 (5.5342)	Top-1 acc 8.594 (6.535)	Top-5 acc 19.922 (18.464)	lr 0.04559
Warmup Train [8][2210/3239]	Time 0.189 (0.231)	Data 0.001 (0.008)	Loss 5.5937 (5.5339)	Top-1 acc 5.078 (6.540)	Top-5 acc 15.625 (18.470)	lr 0.04559
Warmup Train [8][2220/3239]	Time 0.258 (0.231)	Data 0.001 (0.008)	Loss 5.5748 (5.5339)	Top-1 acc 5.078 (6.543)	Top-5 acc 13.281 (18.471)	lr 0.04558
Warmup Train [8][2230/3239]	Time 0.233 (0.231)	Data 0.001 (0.008)	Loss 5.5408 (5.5337)	Top-1 acc 5.469 (6.541)	Top-5 acc 15.625 (18.472)	lr 0.04558
Warmup Train [8][2240/3239]	Time 0.223 (0.231)	Data 0.001 (0.008)	Loss 5.4637 (5.5334)	Top-1 acc 7.812 (6.543)	Top-5 acc 23.828 (18.476)	lr 0.04558
Warmup Train [8][2250/3239]	Time 0.237 (0.231)	Data 0.001 (0.008)	Loss 5.5755 (5.5333)	Top-1 acc 3.906 (6.544)	Top-5 acc 15.625 (18.478)	lr 0.04557
Warmup Train [8][2260/3239]	Time 0.248 (0.231)	Data 0.001 (0.008)	Loss 5.5061 (5.5332)	Top-1 acc 7.812 (6.544)	Top-5 acc 19.531 (18.484)	lr 0.04557
Warmup Train [8][2270/3239]	Time 0.198 (0.231)	Data 0.002 (0.008)	Loss 5.4226 (5.5330)	Top-1 acc 6.250 (6.547)	Top-5 acc 21.094 (18.488)	lr 0.04557
Warmup Train [8][2280/3239]	Time 0.365 (0.231)	Data 0.001 (0.008)	Loss 5.6098 (5.5329)	Top-1 acc 4.297 (6.545)	Top-5 acc 17.578 (18.487)	lr 0.04556
Warmup Train [8][2290/3239]	Time 0.191 (0.231)	Data 0.001 (0.008)	Loss 5.4042 (5.5325)	Top-1 acc 9.766 (6.551)	Top-5 acc 22.266 (18.501)	lr 0.04556
Warmup Train [8][2300/3239]	Time 0.227 (0.231)	Data 0.001 (0.008)	Loss 5.3744 (5.5324)	Top-1 acc 7.422 (6.552)	Top-5 acc 20.312 (18.503)	lr 0.04555
Warmup Train [8][2310/3239]	Time 0.173 (0.231)	Data 0.001 (0.008)	Loss 5.5453 (5.5324)	Top-1 acc 5.469 (6.552)	Top-5 acc 17.188 (18.504)	lr 0.04555
Warmup Train [8][2320/3239]	Time 0.291 (0.231)	Data 0.001 (0.008)	Loss 5.5166 (5.5324)	Top-1 acc 8.203 (6.552)	Top-5 acc 19.531 (18.503)	lr 0.04555
Warmup Train [8][2330/3239]	Time 0.245 (0.231)	Data 0.001 (0.008)	Loss 5.4470 (5.5324)	Top-1 acc 5.859 (6.551)	Top-5 acc 20.312 (18.504)	lr 0.04554
Warmup Train [8][2340/3239]	Time 0.251 (0.231)	Data 0.001 (0.008)	Loss 5.5045 (5.5322)	Top-1 acc 7.031 (6.549)	Top-5 acc 17.969 (18.509)	lr 0.04554
Warmup Train [8][2350/3239]	Time 0.193 (0.231)	Data 0.001 (0.008)	Loss 5.4432 (5.5319)	Top-1 acc 7.812 (6.554)	Top-5 acc 20.312 (18.516)	lr 0.04554
Warmup Train [8][2360/3239]	Time 0.141 (0.231)	Data 0.001 (0.008)	Loss 5.4712 (5.5316)	Top-1 acc 7.031 (6.555)	Top-5 acc 23.047 (18.522)	lr 0.04553
Warmup Train [8][2370/3239]	Time 0.129 (0.231)	Data 0.001 (0.008)	Loss 5.4946 (5.5316)	Top-1 acc 8.203 (6.559)	Top-5 acc 19.141 (18.522)	lr 0.04553
Warmup Train [8][2380/3239]	Time 0.296 (0.231)	Data 0.001 (0.008)	Loss 5.4621 (5.5313)	Top-1 acc 5.469 (6.561)	Top-5 acc 19.922 (18.530)	lr 0.04553
Warmup Train [8][2390/3239]	Time 0.231 (0.231)	Data 0.003 (0.008)	Loss 5.5357 (5.5311)	Top-1 acc 7.422 (6.564)	Top-5 acc 18.359 (18.534)	lr 0.04552
Warmup Train [8][2400/3239]	Time 0.175 (0.231)	Data 0.001 (0.008)	Loss 5.4697 (5.5310)	Top-1 acc 8.594 (6.567)	Top-5 acc 21.094 (18.537)	lr 0.04552
Warmup Train [8][2410/3239]	Time 0.213 (0.231)	Data 0.001 (0.008)	Loss 5.5441 (5.5310)	Top-1 acc 7.812 (6.568)	Top-5 acc 18.750 (18.538)	lr 0.04552
Warmup Train [8][2420/3239]	Time 0.205 (0.231)	Data 0.001 (0.008)	Loss 5.4616 (5.5308)	Top-1 acc 7.422 (6.572)	Top-5 acc 19.531 (18.543)	lr 0.04551
Warmup Train [8][2430/3239]	Time 0.180 (0.231)	Data 0.001 (0.008)	Loss 5.3564 (5.5306)	Top-1 acc 7.031 (6.572)	Top-5 acc 17.969 (18.544)	lr 0.04551
Warmup Train [8][2440/3239]	Time 0.207 (0.231)	Data 0.001 (0.008)	Loss 5.5960 (5.5304)	Top-1 acc 4.297 (6.573)	Top-5 acc 16.797 (18.548)	lr 0.04551
Warmup Train [8][2450/3239]	Time 0.153 (0.231)	Data 0.001 (0.008)	Loss 5.4584 (5.5303)	Top-1 acc 8.203 (6.576)	Top-5 acc 19.531 (18.550)	lr 0.04550
Warmup Train [8][2460/3239]	Time 0.221 (0.231)	Data 0.002 (0.008)	Loss 5.4228 (5.5300)	Top-1 acc 9.766 (6.579)	Top-5 acc 22.266 (18.555)	lr 0.04550
Warmup Train [8][2470/3239]	Time 0.139 (0.231)	Data 0.001 (0.008)	Loss 5.4502 (5.5298)	Top-1 acc 6.641 (6.582)	Top-5 acc 21.875 (18.561)	lr 0.04550
Warmup Train [8][2480/3239]	Time 0.266 (0.231)	Data 0.002 (0.008)	Loss 5.5207 (5.5296)	Top-1 acc 5.469 (6.583)	Top-5 acc 17.188 (18.563)	lr 0.04549
Warmup Train [8][2490/3239]	Time 0.219 (0.231)	Data 0.002 (0.008)	Loss 5.4518 (5.5295)	Top-1 acc 8.984 (6.585)	Top-5 acc 21.875 (18.564)	lr 0.04549
Warmup Train [8][2500/3239]	Time 0.217 (0.231)	Data 0.001 (0.008)	Loss 5.4071 (5.5295)	Top-1 acc 6.641 (6.584)	Top-5 acc 21.875 (18.568)	lr 0.04549
Warmup Train [8][2510/3239]	Time 0.213 (0.231)	Data 0.001 (0.008)	Loss 5.5424 (5.5296)	Top-1 acc 5.469 (6.584)	Top-5 acc 18.359 (18.567)	lr 0.04548
Warmup Train [8][2520/3239]	Time 0.179 (0.231)	Data 0.001 (0.008)	Loss 5.3872 (5.5292)	Top-1 acc 6.641 (6.585)	Top-5 acc 20.703 (18.574)	lr 0.04548
Warmup Train [8][2530/3239]	Time 0.268 (0.231)	Data 0.001 (0.008)	Loss 5.4510 (5.5291)	Top-1 acc 6.250 (6.584)	Top-5 acc 19.141 (18.576)	lr 0.04548
Warmup Train [8][2540/3239]	Time 0.225 (0.231)	Data 0.002 (0.008)	Loss 5.5399 (5.5289)	Top-1 acc 4.688 (6.587)	Top-5 acc 17.969 (18.581)	lr 0.04547
Warmup Train [8][2550/3239]	Time 0.255 (0.231)	Data 0.001 (0.008)	Loss 5.5013 (5.5288)	Top-1 acc 5.078 (6.589)	Top-5 acc 17.969 (18.581)	lr 0.04547
Warmup Train [8][2560/3239]	Time 0.188 (0.231)	Data 0.001 (0.008)	Loss 5.3980 (5.5286)	Top-1 acc 8.984 (6.592)	Top-5 acc 23.828 (18.593)	lr 0.04546
Warmup Train [8][2570/3239]	Time 0.239 (0.231)	Data 0.001 (0.008)	Loss 5.6049 (5.5284)	Top-1 acc 7.031 (6.595)	Top-5 acc 16.406 (18.601)	lr 0.04546
Warmup Train [8][2580/3239]	Time 0.176 (0.230)	Data 0.001 (0.008)	Loss 5.3784 (5.5283)	Top-1 acc 9.766 (6.596)	Top-5 acc 21.875 (18.605)	lr 0.04546
Warmup Train [8][2590/3239]	Time 0.189 (0.230)	Data 0.001 (0.008)	Loss 5.3935 (5.5281)	Top-1 acc 8.203 (6.595)	Top-5 acc 22.266 (18.605)	lr 0.04545
Warmup Train [8][2600/3239]	Time 0.322 (0.230)	Data 0.002 (0.008)	Loss 5.3988 (5.5281)	Top-1 acc 7.812 (6.596)	Top-5 acc 21.875 (18.609)	lr 0.04545
Warmup Train [8][2610/3239]	Time 0.177 (0.230)	Data 0.001 (0.008)	Loss 5.4692 (5.5279)	Top-1 acc 8.203 (6.598)	Top-5 acc 20.312 (18.611)	lr 0.04545
Warmup Train [8][2620/3239]	Time 0.207 (0.230)	Data 0.001 (0.008)	Loss 5.4981 (5.5276)	Top-1 acc 7.422 (6.602)	Top-5 acc 19.922 (18.616)	lr 0.04544
Warmup Train [8][2630/3239]	Time 0.283 (0.230)	Data 0.001 (0.008)	Loss 5.4460 (5.5274)	Top-1 acc 7.812 (6.606)	Top-5 acc 23.047 (18.619)	lr 0.04544
Warmup Train [8][2640/3239]	Time 0.229 (0.230)	Data 0.001 (0.008)	Loss 5.5197 (5.5272)	Top-1 acc 5.078 (6.605)	Top-5 acc 17.578 (18.619)	lr 0.04544
Warmup Train [8][2650/3239]	Time 0.188 (0.230)	Data 0.002 (0.008)	Loss 5.3202 (5.5269)	Top-1 acc 6.641 (6.607)	Top-5 acc 25.000 (18.626)	lr 0.04543
Warmup Train [8][2660/3239]	Time 0.243 (0.230)	Data 0.001 (0.007)	Loss 5.5170 (5.5269)	Top-1 acc 7.812 (6.607)	Top-5 acc 19.141 (18.628)	lr 0.04543
Warmup Train [8][2670/3239]	Time 0.227 (0.230)	Data 0.002 (0.007)	Loss 5.5384 (5.5269)	Top-1 acc 3.516 (6.605)	Top-5 acc 18.359 (18.626)	lr 0.04543
Warmup Train [8][2680/3239]	Time 0.231 (0.230)	Data 0.001 (0.007)	Loss 5.4064 (5.5269)	Top-1 acc 7.031 (6.605)	Top-5 acc 19.922 (18.626)	lr 0.04542
Warmup Train [8][2690/3239]	Time 0.200 (0.230)	Data 0.001 (0.007)	Loss 5.4165 (5.5266)	Top-1 acc 8.203 (6.609)	Top-5 acc 23.047 (18.630)	lr 0.04542
Warmup Train [8][2700/3239]	Time 0.423 (0.230)	Data 0.002 (0.007)	Loss 5.4842 (5.5265)	Top-1 acc 7.031 (6.612)	Top-5 acc 22.266 (18.638)	lr 0.04542
Warmup Train [8][2710/3239]	Time 0.254 (0.230)	Data 0.001 (0.007)	Loss 5.5869 (5.5264)	Top-1 acc 6.250 (6.611)	Top-5 acc 21.094 (18.639)	lr 0.04541
Warmup Train [8][2720/3239]	Time 0.236 (0.230)	Data 0.001 (0.007)	Loss 5.4825 (5.5261)	Top-1 acc 6.250 (6.612)	Top-5 acc 19.531 (18.645)	lr 0.04541
Warmup Train [8][2730/3239]	Time 0.236 (0.230)	Data 0.037 (0.007)	Loss 5.5018 (5.5259)	Top-1 acc 7.422 (6.615)	Top-5 acc 19.922 (18.651)	lr 0.04541
Warmup Train [8][2740/3239]	Time 0.203 (0.230)	Data 0.001 (0.007)	Loss 5.5646 (5.5257)	Top-1 acc 3.906 (6.616)	Top-5 acc 16.016 (18.656)	lr 0.04540
Warmup Train [8][2750/3239]	Time 0.220 (0.230)	Data 0.002 (0.007)	Loss 5.6260 (5.5258)	Top-1 acc 5.078 (6.612)	Top-5 acc 16.797 (18.655)	lr 0.04540
Warmup Train [8][2760/3239]	Time 0.205 (0.230)	Data 0.001 (0.007)	Loss 5.5106 (5.5256)	Top-1 acc 7.031 (6.615)	Top-5 acc 17.578 (18.657)	lr 0.04540
Warmup Train [8][2770/3239]	Time 0.171 (0.230)	Data 0.002 (0.007)	Loss 5.4430 (5.5254)	Top-1 acc 8.203 (6.617)	Top-5 acc 24.219 (18.662)	lr 0.04539
Warmup Train [8][2780/3239]	Time 0.252 (0.230)	Data 0.001 (0.007)	Loss 5.3601 (5.5251)	Top-1 acc 6.641 (6.621)	Top-5 acc 21.094 (18.670)	lr 0.04539
Warmup Train [8][2790/3239]	Time 0.235 (0.230)	Data 0.001 (0.007)	Loss 5.5033 (5.5248)	Top-1 acc 8.203 (6.624)	Top-5 acc 18.359 (18.675)	lr 0.04538
Warmup Train [8][2800/3239]	Time 0.273 (0.230)	Data 0.001 (0.007)	Loss 5.5022 (5.5246)	Top-1 acc 4.297 (6.628)	Top-5 acc 21.484 (18.681)	lr 0.04538
Warmup Train [8][2810/3239]	Time 0.216 (0.230)	Data 0.001 (0.007)	Loss 5.4155 (5.5244)	Top-1 acc 7.031 (6.630)	Top-5 acc 19.141 (18.687)	lr 0.04538
Warmup Train [8][2820/3239]	Time 0.242 (0.230)	Data 0.001 (0.007)	Loss 5.5284 (5.5242)	Top-1 acc 7.031 (6.632)	Top-5 acc 17.578 (18.694)	lr 0.04537
Warmup Train [8][2830/3239]	Time 0.254 (0.230)	Data 0.001 (0.007)	Loss 5.3873 (5.5240)	Top-1 acc 8.203 (6.635)	Top-5 acc 20.703 (18.696)	lr 0.04537
Warmup Train [8][2840/3239]	Time 0.212 (0.230)	Data 0.001 (0.007)	Loss 5.4840 (5.5238)	Top-1 acc 7.812 (6.637)	Top-5 acc 17.969 (18.701)	lr 0.04537
Warmup Train [8][2850/3239]	Time 0.234 (0.230)	Data 0.001 (0.007)	Loss 5.4180 (5.5237)	Top-1 acc 11.328 (6.638)	Top-5 acc 25.781 (18.707)	lr 0.04536
Warmup Train [8][2860/3239]	Time 0.263 (0.230)	Data 0.001 (0.007)	Loss 5.4343 (5.5235)	Top-1 acc 6.250 (6.640)	Top-5 acc 22.266 (18.709)	lr 0.04536
Warmup Train [8][2870/3239]	Time 0.238 (0.230)	Data 0.001 (0.007)	Loss 5.4460 (5.5232)	Top-1 acc 7.422 (6.645)	Top-5 acc 19.141 (18.717)	lr 0.04536
Warmup Train [8][2880/3239]	Time 0.239 (0.230)	Data 0.001 (0.007)	Loss 5.4059 (5.5230)	Top-1 acc 8.984 (6.648)	Top-5 acc 19.922 (18.720)	lr 0.04535
Warmup Train [8][2890/3239]	Time 0.232 (0.230)	Data 0.001 (0.007)	Loss 5.3623 (5.5228)	Top-1 acc 11.719 (6.651)	Top-5 acc 23.438 (18.729)	lr 0.04535
Warmup Train [8][2900/3239]	Time 0.232 (0.230)	Data 0.001 (0.007)	Loss 5.5480 (5.5225)	Top-1 acc 4.688 (6.655)	Top-5 acc 16.406 (18.733)	lr 0.04535
Warmup Train [8][2910/3239]	Time 0.194 (0.230)	Data 0.002 (0.007)	Loss 5.4396 (5.5224)	Top-1 acc 7.031 (6.651)	Top-5 acc 19.922 (18.733)	lr 0.04534
Warmup Train [8][2920/3239]	Time 0.222 (0.230)	Data 0.001 (0.007)	Loss 5.4968 (5.5223)	Top-1 acc 5.078 (6.653)	Top-5 acc 18.359 (18.735)	lr 0.04534
Warmup Train [8][2930/3239]	Time 0.216 (0.230)	Data 0.001 (0.007)	Loss 5.3997 (5.5220)	Top-1 acc 6.641 (6.654)	Top-5 acc 20.312 (18.738)	lr 0.04534
Warmup Train [8][2940/3239]	Time 0.215 (0.230)	Data 0.001 (0.007)	Loss 5.3930 (5.5218)	Top-1 acc 6.250 (6.656)	Top-5 acc 21.094 (18.746)	lr 0.04533
Warmup Train [8][2950/3239]	Time 0.283 (0.230)	Data 0.001 (0.007)	Loss 5.5316 (5.5216)	Top-1 acc 9.375 (6.662)	Top-5 acc 19.922 (18.754)	lr 0.04533
Warmup Train [8][2960/3239]	Time 0.251 (0.230)	Data 0.001 (0.007)	Loss 5.5671 (5.5215)	Top-1 acc 5.469 (6.663)	Top-5 acc 14.844 (18.755)	lr 0.04532
Warmup Train [8][2970/3239]	Time 0.236 (0.230)	Data 0.001 (0.007)	Loss 5.5451 (5.5212)	Top-1 acc 5.078 (6.665)	Top-5 acc 16.406 (18.756)	lr 0.04532
Warmup Train [8][2980/3239]	Time 0.232 (0.230)	Data 0.001 (0.007)	Loss 5.4702 (5.5211)	Top-1 acc 6.250 (6.669)	Top-5 acc 19.531 (18.760)	lr 0.04532
Warmup Train [8][2990/3239]	Time 0.177 (0.230)	Data 0.002 (0.007)	Loss 5.5001 (5.5207)	Top-1 acc 7.422 (6.672)	Top-5 acc 19.531 (18.767)	lr 0.04531
Warmup Train [8][3000/3239]	Time 0.194 (0.230)	Data 0.001 (0.007)	Loss 5.5028 (5.5206)	Top-1 acc 6.250 (6.673)	Top-5 acc 16.406 (18.771)	lr 0.04531
Warmup Train [8][3010/3239]	Time 0.344 (0.230)	Data 0.001 (0.007)	Loss 5.4749 (5.5204)	Top-1 acc 5.078 (6.674)	Top-5 acc 18.750 (18.770)	lr 0.04531
Warmup Train [8][3020/3239]	Time 0.192 (0.230)	Data 0.001 (0.007)	Loss 5.4625 (5.5202)	Top-1 acc 8.594 (6.678)	Top-5 acc 20.703 (18.772)	lr 0.04530
Warmup Train [8][3030/3239]	Time 0.238 (0.230)	Data 0.002 (0.007)	Loss 5.5407 (5.5200)	Top-1 acc 5.859 (6.679)	Top-5 acc 16.016 (18.774)	lr 0.04530
Warmup Train [8][3040/3239]	Time 0.164 (0.230)	Data 0.001 (0.007)	Loss 5.5210 (5.5199)	Top-1 acc 5.078 (6.683)	Top-5 acc 17.578 (18.780)	lr 0.04530
Warmup Train [8][3050/3239]	Time 0.250 (0.230)	Data 0.001 (0.007)	Loss 5.5188 (5.5196)	Top-1 acc 7.812 (6.685)	Top-5 acc 19.141 (18.787)	lr 0.04529
Warmup Train [8][3060/3239]	Time 0.203 (0.230)	Data 0.001 (0.007)	Loss 5.4655 (5.5195)	Top-1 acc 8.594 (6.684)	Top-5 acc 19.531 (18.788)	lr 0.04529
Warmup Train [8][3070/3239]	Time 0.233 (0.230)	Data 0.001 (0.007)	Loss 5.6232 (5.5194)	Top-1 acc 4.688 (6.686)	Top-5 acc 16.016 (18.789)	lr 0.04529
Warmup Train [8][3080/3239]	Time 0.258 (0.230)	Data 0.001 (0.007)	Loss 5.4953 (5.5194)	Top-1 acc 7.812 (6.689)	Top-5 acc 21.484 (18.790)	lr 0.04528
Warmup Train [8][3090/3239]	Time 0.163 (0.230)	Data 0.001 (0.007)	Loss 5.3344 (5.5192)	Top-1 acc 7.812 (6.690)	Top-5 acc 24.609 (18.794)	lr 0.04528
Warmup Train [8][3100/3239]	Time 0.169 (0.230)	Data 0.001 (0.007)	Loss 5.5663 (5.5190)	Top-1 acc 5.469 (6.690)	Top-5 acc 16.016 (18.797)	lr 0.04528
Warmup Train [8][3110/3239]	Time 0.197 (0.230)	Data 0.001 (0.007)	Loss 5.3311 (5.5187)	Top-1 acc 10.547 (6.695)	Top-5 acc 23.828 (18.807)	lr 0.04527
Warmup Train [8][3120/3239]	Time 0.333 (0.230)	Data 0.001 (0.007)	Loss 5.4285 (5.5186)	Top-1 acc 5.859 (6.695)	Top-5 acc 21.484 (18.807)	lr 0.04527
Warmup Train [8][3130/3239]	Time 0.178 (0.230)	Data 0.001 (0.007)	Loss 5.4096 (5.5186)	Top-1 acc 7.031 (6.693)	Top-5 acc 19.141 (18.806)	lr 0.04526
Warmup Train [8][3140/3239]	Time 0.287 (0.230)	Data 0.001 (0.007)	Loss 5.4633 (5.5184)	Top-1 acc 5.078 (6.697)	Top-5 acc 16.797 (18.811)	lr 0.04526
Warmup Train [8][3150/3239]	Time 0.198 (0.230)	Data 0.002 (0.007)	Loss 5.3714 (5.5181)	Top-1 acc 9.766 (6.699)	Top-5 acc 25.000 (18.815)	lr 0.04526
Warmup Train [8][3160/3239]	Time 0.212 (0.230)	Data 0.001 (0.007)	Loss 5.6616 (5.5178)	Top-1 acc 4.688 (6.698)	Top-5 acc 16.406 (18.817)	lr 0.04525
Warmup Train [8][3170/3239]	Time 0.234 (0.230)	Data 0.001 (0.007)	Loss 5.4530 (5.5176)	Top-1 acc 6.250 (6.700)	Top-5 acc 21.094 (18.822)	lr 0.04525
Warmup Train [8][3180/3239]	Time 0.152 (0.230)	Data 0.000 (0.007)	Loss 5.4384 (5.5175)	Top-1 acc 8.594 (6.705)	Top-5 acc 17.578 (18.825)	lr 0.04525
Warmup Train [8][3190/3239]	Time 0.188 (0.230)	Data 0.000 (0.007)	Loss 5.6065 (5.5175)	Top-1 acc 5.078 (6.706)	Top-5 acc 13.672 (18.826)	lr 0.04524
Warmup Train [8][3200/3239]	Time 0.185 (0.229)	Data 0.000 (0.007)	Loss 5.4190 (5.5173)	Top-1 acc 7.812 (6.707)	Top-5 acc 22.266 (18.830)	lr 0.04524
Warmup Train [8][3210/3239]	Time 0.149 (0.229)	Data 0.000 (0.007)	Loss 5.3988 (5.5172)	Top-1 acc 5.859 (6.706)	Top-5 acc 19.531 (18.831)	lr 0.04524
Warmup Train [8][3220/3239]	Time 0.284 (0.229)	Data 0.000 (0.007)	Loss 5.4953 (5.5171)	Top-1 acc 4.688 (6.703)	Top-5 acc 19.141 (18.833)	lr 0.04523
Warmup Train [8][3230/3239]	Time 0.182 (0.229)	Data 0.000 (0.007)	Loss 5.4475 (5.5170)	Top-1 acc 6.641 (6.703)	Top-5 acc 20.703 (18.833)	lr 0.04523
Warmup Train [8][3239/3239]	Time 0.247 (0.229)	Data 0.000 (0.007)	Loss 5.7273 (5.5169)	Top-1 acc 3.704 (6.705)	Top-5 acc 11.111 (18.835)	lr 0.04523
==========Warmup Valid [8/40]	loss 4.842	top-1 acc 10.101	top-5 acc 25.745	Train top-1 6.705	top-5 18.835	flops: 442.4M
Warmup Train [9][0/3239]	Time 14.545 (14.545)	Data 13.933 (13.933)	Loss 5.4634 (5.4634)	Top-1 acc 8.594 (8.594)	Top-5 acc 18.750 (18.750)	lr 0.04523
Warmup Train [9][10/3239]	Time 0.298 (1.658)	Data 0.001 (1.269)	Loss 5.3916 (5.5025)	Top-1 acc 8.984 (7.067)	Top-5 acc 23.047 (19.531)	lr 0.04522
Warmup Train [9][20/3239]	Time 0.342 (1.010)	Data 0.001 (0.665)	Loss 5.4937 (5.4779)	Top-1 acc 4.688 (7.273)	Top-5 acc 19.141 (19.699)	lr 0.04522
Warmup Train [9][30/3239]	Time 0.214 (0.754)	Data 0.002 (0.452)	Loss 5.3513 (5.4651)	Top-1 acc 6.641 (7.233)	Top-5 acc 21.875 (19.846)	lr 0.04521
Warmup Train [9][40/3239]	Time 0.203 (0.626)	Data 0.001 (0.342)	Loss 5.6501 (5.4748)	Top-1 acc 7.031 (7.212)	Top-5 acc 14.453 (19.684)	lr 0.04521
Warmup Train [9][50/3239]	Time 0.286 (0.547)	Data 0.001 (0.275)	Loss 5.5822 (5.4779)	Top-1 acc 5.078 (7.207)	Top-5 acc 13.281 (19.577)	lr 0.04521
Warmup Train [9][60/3239]	Time 0.190 (0.494)	Data 0.002 (0.231)	Loss 5.4386 (5.4844)	Top-1 acc 8.203 (7.179)	Top-5 acc 19.141 (19.243)	lr 0.04520
Warmup Train [9][70/3239]	Time 0.186 (0.456)	Data 0.002 (0.198)	Loss 5.4370 (5.4836)	Top-1 acc 8.984 (7.235)	Top-5 acc 21.484 (19.339)	lr 0.04520
Warmup Train [9][80/3239]	Time 0.219 (0.429)	Data 0.001 (0.174)	Loss 5.2785 (5.4793)	Top-1 acc 10.547 (7.258)	Top-5 acc 23.047 (19.473)	lr 0.04520
Warmup Train [9][90/3239]	Time 0.157 (0.408)	Data 0.001 (0.155)	Loss 5.4509 (5.4727)	Top-1 acc 9.375 (7.302)	Top-5 acc 20.312 (19.617)	lr 0.04519
Warmup Train [9][100/3239]	Time 0.230 (0.391)	Data 0.001 (0.141)	Loss 5.3801 (5.4728)	Top-1 acc 7.812 (7.306)	Top-5 acc 19.531 (19.601)	lr 0.04519
Warmup Train [9][110/3239]	Time 0.174 (0.377)	Data 0.001 (0.128)	Loss 5.2509 (5.4689)	Top-1 acc 10.938 (7.341)	Top-5 acc 26.953 (19.665)	lr 0.04519
Warmup Train [9][120/3239]	Time 0.301 (0.367)	Data 0.001 (0.118)	Loss 5.5129 (5.4675)	Top-1 acc 3.906 (7.328)	Top-5 acc 14.453 (19.683)	lr 0.04518
Warmup Train [9][130/3239]	Time 0.234 (0.355)	Data 0.001 (0.110)	Loss 5.3140 (5.4634)	Top-1 acc 10.547 (7.413)	Top-5 acc 24.219 (19.844)	lr 0.04518
Warmup Train [9][140/3239]	Time 0.204 (0.345)	Data 0.003 (0.102)	Loss 5.4365 (5.4641)	Top-1 acc 4.688 (7.344)	Top-5 acc 21.484 (19.792)	lr 0.04518
Warmup Train [9][150/3239]	Time 0.169 (0.337)	Data 0.001 (0.095)	Loss 5.4557 (5.4619)	Top-1 acc 5.469 (7.334)	Top-5 acc 18.359 (19.847)	lr 0.04517
Warmup Train [9][160/3239]	Time 0.174 (0.328)	Data 0.002 (0.090)	Loss 5.4595 (5.4585)	Top-1 acc 7.031 (7.402)	Top-5 acc 20.703 (19.953)	lr 0.04517
Warmup Train [9][170/3239]	Time 0.252 (0.321)	Data 0.028 (0.085)	Loss 5.2870 (5.4573)	Top-1 acc 9.766 (7.440)	Top-5 acc 23.828 (19.954)	lr 0.04516
Warmup Train [9][180/3239]	Time 0.250 (0.315)	Data 0.001 (0.080)	Loss 5.4314 (5.4594)	Top-1 acc 5.078 (7.400)	Top-5 acc 19.531 (19.922)	lr 0.04516
Warmup Train [9][190/3239]	Time 0.185 (0.309)	Data 0.001 (0.076)	Loss 5.5800 (5.4603)	Top-1 acc 4.688 (7.371)	Top-5 acc 18.750 (19.920)	lr 0.04516
Warmup Train [9][200/3239]	Time 0.221 (0.305)	Data 0.001 (0.072)	Loss 5.4526 (5.4602)	Top-1 acc 6.250 (7.350)	Top-5 acc 22.656 (19.934)	lr 0.04515
Warmup Train [9][210/3239]	Time 0.240 (0.301)	Data 0.001 (0.069)	Loss 5.5537 (5.4584)	Top-1 acc 7.031 (7.364)	Top-5 acc 21.094 (19.987)	lr 0.04515
Warmup Train [9][220/3239]	Time 0.401 (0.299)	Data 0.001 (0.066)	Loss 5.5082 (5.4600)	Top-1 acc 5.078 (7.346)	Top-5 acc 19.922 (19.989)	lr 0.04515
Warmup Train [9][230/3239]	Time 0.212 (0.297)	Data 0.001 (0.063)	Loss 5.5442 (5.4618)	Top-1 acc 7.422 (7.310)	Top-5 acc 22.656 (19.981)	lr 0.04514
Warmup Train [9][240/3239]	Time 0.226 (0.296)	Data 0.001 (0.061)	Loss 5.5150 (5.4618)	Top-1 acc 6.641 (7.289)	Top-5 acc 18.359 (19.969)	lr 0.04514
Warmup Train [9][250/3239]	Time 0.361 (0.297)	Data 0.002 (0.058)	Loss 5.4014 (5.4610)	Top-1 acc 8.203 (7.283)	Top-5 acc 19.141 (19.983)	lr 0.04514
Warmup Train [9][260/3239]	Time 0.273 (0.295)	Data 0.001 (0.056)	Loss 5.5348 (5.4618)	Top-1 acc 8.594 (7.286)	Top-5 acc 17.188 (19.965)	lr 0.04513
Warmup Train [9][270/3239]	Time 0.231 (0.293)	Data 0.003 (0.054)	Loss 5.3194 (5.4622)	Top-1 acc 5.078 (7.250)	Top-5 acc 16.797 (19.932)	lr 0.04513
Warmup Train [9][280/3239]	Time 0.271 (0.290)	Data 0.001 (0.053)	Loss 5.3967 (5.4593)	Top-1 acc 9.375 (7.301)	Top-5 acc 23.828 (20.018)	lr 0.04513
Warmup Train [9][290/3239]	Time 0.315 (0.290)	Data 0.001 (0.051)	Loss 5.3055 (5.4596)	Top-1 acc 9.766 (7.327)	Top-5 acc 24.609 (20.027)	lr 0.04512
Warmup Train [9][300/3239]	Time 0.187 (0.287)	Data 0.001 (0.049)	Loss 5.5396 (5.4597)	Top-1 acc 6.641 (7.321)	Top-5 acc 19.531 (20.033)	lr 0.04512
Warmup Train [9][310/3239]	Time 0.189 (0.285)	Data 0.001 (0.048)	Loss 5.4388 (5.4598)	Top-1 acc 7.812 (7.301)	Top-5 acc 23.438 (20.020)	lr 0.04511
Warmup Train [9][320/3239]	Time 0.350 (0.284)	Data 0.001 (0.046)	Loss 5.4811 (5.4602)	Top-1 acc 4.297 (7.287)	Top-5 acc 15.625 (20.006)	lr 0.04511
Warmup Train [9][330/3239]	Time 0.220 (0.281)	Data 0.001 (0.045)	Loss 5.3827 (5.4593)	Top-1 acc 8.203 (7.291)	Top-5 acc 19.922 (20.022)	lr 0.04511
Warmup Train [9][340/3239]	Time 0.193 (0.280)	Data 0.001 (0.044)	Loss 5.5281 (5.4576)	Top-1 acc 8.203 (7.313)	Top-5 acc 19.141 (20.067)	lr 0.04510
Warmup Train [9][350/3239]	Time 0.228 (0.278)	Data 0.001 (0.043)	Loss 5.4103 (5.4569)	Top-1 acc 7.031 (7.315)	Top-5 acc 20.703 (20.098)	lr 0.04510
Warmup Train [9][360/3239]	Time 0.226 (0.277)	Data 0.001 (0.042)	Loss 5.4427 (5.4561)	Top-1 acc 7.031 (7.313)	Top-5 acc 21.484 (20.092)	lr 0.04510
Warmup Train [9][370/3239]	Time 0.173 (0.275)	Data 0.002 (0.041)	Loss 5.4039 (5.4557)	Top-1 acc 5.859 (7.309)	Top-5 acc 21.094 (20.107)	lr 0.04509
Warmup Train [9][380/3239]	Time 0.206 (0.274)	Data 0.001 (0.040)	Loss 5.4245 (5.4558)	Top-1 acc 6.641 (7.307)	Top-5 acc 20.703 (20.121)	lr 0.04509
Warmup Train [9][390/3239]	Time 0.258 (0.273)	Data 0.002 (0.039)	Loss 5.5902 (5.4559)	Top-1 acc 8.984 (7.337)	Top-5 acc 21.484 (20.144)	lr 0.04509
Warmup Train [9][400/3239]	Time 0.224 (0.272)	Data 0.001 (0.038)	Loss 5.4119 (5.4549)	Top-1 acc 6.250 (7.348)	Top-5 acc 23.438 (20.162)	lr 0.04508
Warmup Train [9][410/3239]	Time 0.202 (0.271)	Data 0.002 (0.037)	Loss 5.4274 (5.4549)	Top-1 acc 9.766 (7.336)	Top-5 acc 19.531 (20.169)	lr 0.04508
Warmup Train [9][420/3239]	Time 0.221 (0.270)	Data 0.001 (0.036)	Loss 5.4013 (5.4535)	Top-1 acc 8.594 (7.366)	Top-5 acc 19.922 (20.204)	lr 0.04507
Warmup Train [9][430/3239]	Time 0.252 (0.269)	Data 0.001 (0.035)	Loss 5.4639 (5.4530)	Top-1 acc 7.031 (7.379)	Top-5 acc 19.531 (20.215)	lr 0.04507
Warmup Train [9][440/3239]	Time 0.290 (0.268)	Data 0.002 (0.035)	Loss 5.3462 (5.4528)	Top-1 acc 10.547 (7.394)	Top-5 acc 20.703 (20.226)	lr 0.04507
Warmup Train [9][450/3239]	Time 0.222 (0.268)	Data 0.001 (0.034)	Loss 5.3868 (5.4530)	Top-1 acc 8.984 (7.393)	Top-5 acc 22.266 (20.218)	lr 0.04506
Warmup Train [9][460/3239]	Time 0.229 (0.267)	Data 0.003 (0.033)	Loss 5.4901 (5.4535)	Top-1 acc 7.422 (7.385)	Top-5 acc 21.875 (20.202)	lr 0.04506
Warmup Train [9][470/3239]	Time 0.223 (0.266)	Data 0.001 (0.033)	Loss 5.4116 (5.4527)	Top-1 acc 10.938 (7.398)	Top-5 acc 21.484 (20.228)	lr 0.04506
Warmup Train [9][480/3239]	Time 0.166 (0.266)	Data 0.001 (0.032)	Loss 5.4420 (5.4526)	Top-1 acc 6.250 (7.422)	Top-5 acc 21.484 (20.268)	lr 0.04505
Warmup Train [9][490/3239]	Time 0.324 (0.266)	Data 0.003 (0.031)	Loss 5.6158 (5.4532)	Top-1 acc 5.469 (7.419)	Top-5 acc 16.406 (20.246)	lr 0.04505
Warmup Train [9][500/3239]	Time 0.216 (0.265)	Data 0.001 (0.031)	Loss 5.4296 (5.4530)	Top-1 acc 7.812 (7.416)	Top-5 acc 19.922 (20.256)	lr 0.04505
Warmup Train [9][510/3239]	Time 0.272 (0.265)	Data 0.001 (0.030)	Loss 5.5123 (5.4530)	Top-1 acc 7.031 (7.417)	Top-5 acc 18.359 (20.266)	lr 0.04504
Warmup Train [9][520/3239]	Time 0.247 (0.264)	Data 0.001 (0.030)	Loss 5.5228 (5.4524)	Top-1 acc 4.688 (7.417)	Top-5 acc 19.922 (20.272)	lr 0.04504
Warmup Train [9][530/3239]	Time 0.223 (0.263)	Data 0.002 (0.029)	Loss 5.4417 (5.4524)	Top-1 acc 9.766 (7.420)	Top-5 acc 21.484 (20.284)	lr 0.04503
Warmup Train [9][540/3239]	Time 0.346 (0.263)	Data 0.001 (0.029)	Loss 5.4322 (5.4522)	Top-1 acc 7.422 (7.402)	Top-5 acc 18.359 (20.288)	lr 0.04503
Warmup Train [9][550/3239]	Time 0.372 (0.262)	Data 0.001 (0.028)	Loss 5.4196 (5.4517)	Top-1 acc 8.984 (7.419)	Top-5 acc 19.531 (20.291)	lr 0.04503
Warmup Train [9][560/3239]	Time 0.247 (0.262)	Data 0.029 (0.028)	Loss 5.4844 (5.4515)	Top-1 acc 6.250 (7.409)	Top-5 acc 19.531 (20.286)	lr 0.04502
Warmup Train [9][570/3239]	Time 0.195 (0.261)	Data 0.001 (0.027)	Loss 5.3789 (5.4511)	Top-1 acc 7.422 (7.416)	Top-5 acc 20.703 (20.298)	lr 0.04502
Warmup Train [9][580/3239]	Time 0.195 (0.260)	Data 0.001 (0.027)	Loss 5.3365 (5.4501)	Top-1 acc 8.203 (7.433)	Top-5 acc 24.219 (20.315)	lr 0.04502
Warmup Train [9][590/3239]	Time 0.232 (0.259)	Data 0.001 (0.026)	Loss 5.4021 (5.4493)	Top-1 acc 4.688 (7.442)	Top-5 acc 18.750 (20.341)	lr 0.04501
Warmup Train [9][600/3239]	Time 0.199 (0.259)	Data 0.001 (0.026)	Loss 5.4864 (5.4491)	Top-1 acc 7.422 (7.441)	Top-5 acc 18.359 (20.351)	lr 0.04501
Warmup Train [9][610/3239]	Time 0.210 (0.258)	Data 0.001 (0.026)	Loss 5.4576 (5.4487)	Top-1 acc 10.938 (7.446)	Top-5 acc 18.750 (20.363)	lr 0.04501
Warmup Train [9][620/3239]	Time 0.244 (0.258)	Data 0.001 (0.025)	Loss 5.4543 (5.4485)	Top-1 acc 7.422 (7.443)	Top-5 acc 21.094 (20.358)	lr 0.04500
Warmup Train [9][630/3239]	Time 0.165 (0.257)	Data 0.001 (0.025)	Loss 5.3458 (5.4485)	Top-1 acc 8.594 (7.447)	Top-5 acc 20.703 (20.354)	lr 0.04500
Warmup Train [9][640/3239]	Time 0.224 (0.257)	Data 0.001 (0.025)	Loss 5.4008 (5.4486)	Top-1 acc 8.203 (7.441)	Top-5 acc 23.047 (20.346)	lr 0.04500
Warmup Train [9][650/3239]	Time 0.172 (0.256)	Data 0.001 (0.024)	Loss 5.4885 (5.4482)	Top-1 acc 5.859 (7.458)	Top-5 acc 18.750 (20.359)	lr 0.04499
Warmup Train [9][660/3239]	Time 0.314 (0.256)	Data 0.001 (0.024)	Loss 5.4055 (5.4476)	Top-1 acc 8.594 (7.475)	Top-5 acc 25.391 (20.380)	lr 0.04499
Warmup Train [9][670/3239]	Time 0.161 (0.255)	Data 0.001 (0.024)	Loss 5.5483 (5.4476)	Top-1 acc 5.859 (7.470)	Top-5 acc 18.359 (20.384)	lr 0.04498
Warmup Train [9][680/3239]	Time 0.192 (0.255)	Data 0.001 (0.023)	Loss 5.4653 (5.4475)	Top-1 acc 7.812 (7.485)	Top-5 acc 17.969 (20.386)	lr 0.04498
Warmup Train [9][690/3239]	Time 0.208 (0.255)	Data 0.001 (0.023)	Loss 5.4742 (5.4466)	Top-1 acc 6.641 (7.499)	Top-5 acc 18.359 (20.398)	lr 0.04498
Warmup Train [9][700/3239]	Time 0.213 (0.254)	Data 0.001 (0.023)	Loss 5.4127 (5.4465)	Top-1 acc 7.031 (7.489)	Top-5 acc 20.703 (20.400)	lr 0.04497
Warmup Train [9][710/3239]	Time 0.147 (0.254)	Data 0.002 (0.023)	Loss 5.3977 (5.4465)	Top-1 acc 8.594 (7.494)	Top-5 acc 23.047 (20.409)	lr 0.04497
Warmup Train [9][720/3239]	Time 0.203 (0.253)	Data 0.001 (0.022)	Loss 5.2672 (5.4461)	Top-1 acc 7.422 (7.491)	Top-5 acc 23.828 (20.430)	lr 0.04497
Warmup Train [9][730/3239]	Time 0.206 (0.253)	Data 0.001 (0.022)	Loss 5.5819 (5.4458)	Top-1 acc 6.250 (7.496)	Top-5 acc 17.188 (20.444)	lr 0.04496
Warmup Train [9][740/3239]	Time 0.217 (0.253)	Data 0.001 (0.022)	Loss 5.4842 (5.4456)	Top-1 acc 5.469 (7.509)	Top-5 acc 18.359 (20.454)	lr 0.04496
Warmup Train [9][750/3239]	Time 0.156 (0.252)	Data 0.001 (0.021)	Loss 5.3220 (5.4455)	Top-1 acc 6.641 (7.505)	Top-5 acc 23.828 (20.454)	lr 0.04495
Warmup Train [9][760/3239]	Time 0.304 (0.252)	Data 0.001 (0.021)	Loss 5.4862 (5.4460)	Top-1 acc 6.250 (7.491)	Top-5 acc 19.531 (20.440)	lr 0.04495
Warmup Train [9][770/3239]	Time 0.276 (0.251)	Data 0.001 (0.021)	Loss 5.4524 (5.4461)	Top-1 acc 5.078 (7.491)	Top-5 acc 22.266 (20.446)	lr 0.04495
Warmup Train [9][780/3239]	Time 0.204 (0.251)	Data 0.002 (0.021)	Loss 5.5568 (5.4458)	Top-1 acc 6.250 (7.489)	Top-5 acc 18.750 (20.454)	lr 0.04494
Warmup Train [9][790/3239]	Time 0.219 (0.251)	Data 0.001 (0.021)	Loss 5.4767 (5.4456)	Top-1 acc 7.422 (7.486)	Top-5 acc 23.047 (20.461)	lr 0.04494
Warmup Train [9][800/3239]	Time 0.232 (0.251)	Data 0.001 (0.020)	Loss 5.5273 (5.4452)	Top-1 acc 5.469 (7.490)	Top-5 acc 15.234 (20.458)	lr 0.04494
Warmup Train [9][810/3239]	Time 0.229 (0.250)	Data 0.001 (0.020)	Loss 5.4400 (5.4454)	Top-1 acc 7.031 (7.484)	Top-5 acc 19.141 (20.453)	lr 0.04493
Warmup Train [9][820/3239]	Time 0.185 (0.250)	Data 0.001 (0.020)	Loss 5.3661 (5.4458)	Top-1 acc 13.281 (7.477)	Top-5 acc 21.094 (20.438)	lr 0.04493
Warmup Train [9][830/3239]	Time 0.245 (0.249)	Data 0.001 (0.020)	Loss 5.3770 (5.4458)	Top-1 acc 7.422 (7.487)	Top-5 acc 27.344 (20.453)	lr 0.04493
Warmup Train [9][840/3239]	Time 0.210 (0.249)	Data 0.001 (0.020)	Loss 5.3473 (5.4455)	Top-1 acc 7.812 (7.493)	Top-5 acc 21.094 (20.464)	lr 0.04492
Warmup Train [9][850/3239]	Time 0.195 (0.249)	Data 0.001 (0.019)	Loss 5.4383 (5.4456)	Top-1 acc 7.031 (7.487)	Top-5 acc 19.922 (20.457)	lr 0.04492
Warmup Train [9][860/3239]	Time 0.280 (0.248)	Data 0.001 (0.019)	Loss 5.4362 (5.4459)	Top-1 acc 8.984 (7.473)	Top-5 acc 21.875 (20.445)	lr 0.04491
Warmup Train [9][870/3239]	Time 0.360 (0.248)	Data 0.001 (0.019)	Loss 5.4205 (5.4458)	Top-1 acc 6.641 (7.462)	Top-5 acc 21.094 (20.440)	lr 0.04491
Warmup Train [9][880/3239]	Time 0.284 (0.248)	Data 0.001 (0.019)	Loss 5.5073 (5.4455)	Top-1 acc 7.422 (7.464)	Top-5 acc 19.141 (20.447)	lr 0.04491
Warmup Train [9][890/3239]	Time 0.220 (0.248)	Data 0.001 (0.019)	Loss 5.3425 (5.4459)	Top-1 acc 8.984 (7.463)	Top-5 acc 23.047 (20.437)	lr 0.04490
Warmup Train [9][900/3239]	Time 0.212 (0.247)	Data 0.001 (0.019)	Loss 5.4524 (5.4459)	Top-1 acc 8.984 (7.463)	Top-5 acc 18.359 (20.430)	lr 0.04490
Warmup Train [9][910/3239]	Time 0.196 (0.247)	Data 0.001 (0.018)	Loss 5.5518 (5.4457)	Top-1 acc 6.250 (7.471)	Top-5 acc 16.797 (20.440)	lr 0.04490
Warmup Train [9][920/3239]	Time 0.194 (0.247)	Data 0.001 (0.018)	Loss 5.4921 (5.4454)	Top-1 acc 7.031 (7.470)	Top-5 acc 19.922 (20.442)	lr 0.04489
Warmup Train [9][930/3239]	Time 0.204 (0.247)	Data 0.001 (0.018)	Loss 5.4307 (5.4455)	Top-1 acc 8.594 (7.473)	Top-5 acc 24.609 (20.442)	lr 0.04489
Warmup Train [9][940/3239]	Time 0.238 (0.246)	Data 0.001 (0.018)	Loss 5.4163 (5.4449)	Top-1 acc 6.641 (7.476)	Top-5 acc 22.656 (20.451)	lr 0.04489
Warmup Train [9][950/3239]	Time 0.192 (0.246)	Data 0.001 (0.018)	Loss 5.4782 (5.4450)	Top-1 acc 7.031 (7.468)	Top-5 acc 19.141 (20.456)	lr 0.04488
Warmup Train [9][960/3239]	Time 0.194 (0.246)	Data 0.001 (0.018)	Loss 5.3552 (5.4448)	Top-1 acc 9.766 (7.474)	Top-5 acc 22.656 (20.464)	lr 0.04488
Warmup Train [9][970/3239]	Time 0.234 (0.246)	Data 0.001 (0.018)	Loss 5.3496 (5.4442)	Top-1 acc 9.766 (7.478)	Top-5 acc 24.609 (20.481)	lr 0.04487
Warmup Train [9][980/3239]	Time 0.221 (0.246)	Data 0.001 (0.017)	Loss 5.4434 (5.4439)	Top-1 acc 7.031 (7.479)	Top-5 acc 18.359 (20.485)	lr 0.04487
Warmup Train [9][990/3239]	Time 0.339 (0.245)	Data 0.001 (0.017)	Loss 5.4190 (5.4438)	Top-1 acc 8.594 (7.479)	Top-5 acc 23.828 (20.492)	lr 0.04487
Warmup Train [9][1000/3239]	Time 0.317 (0.245)	Data 0.002 (0.017)	Loss 5.4161 (5.4436)	Top-1 acc 8.984 (7.485)	Top-5 acc 20.312 (20.501)	lr 0.04486
Warmup Train [9][1010/3239]	Time 0.147 (0.245)	Data 0.001 (0.017)	Loss 5.4885 (5.4435)	Top-1 acc 7.812 (7.486)	Top-5 acc 22.656 (20.505)	lr 0.04486
Warmup Train [9][1020/3239]	Time 0.179 (0.244)	Data 0.001 (0.017)	Loss 5.2736 (5.4436)	Top-1 acc 8.984 (7.485)	Top-5 acc 25.391 (20.508)	lr 0.04486
Warmup Train [9][1030/3239]	Time 0.164 (0.244)	Data 0.001 (0.017)	Loss 5.3476 (5.4434)	Top-1 acc 8.203 (7.489)	Top-5 acc 23.828 (20.511)	lr 0.04485
Warmup Train [9][1040/3239]	Time 0.232 (0.244)	Data 0.001 (0.017)	Loss 5.5210 (5.4430)	Top-1 acc 7.422 (7.491)	Top-5 acc 17.578 (20.514)	lr 0.04485
Warmup Train [9][1050/3239]	Time 0.220 (0.244)	Data 0.001 (0.016)	Loss 5.3826 (5.4427)	Top-1 acc 10.547 (7.499)	Top-5 acc 22.266 (20.521)	lr 0.04484
Warmup Train [9][1060/3239]	Time 0.290 (0.244)	Data 0.005 (0.016)	Loss 5.4632 (5.4425)	Top-1 acc 8.203 (7.498)	Top-5 acc 23.438 (20.524)	lr 0.04484
Warmup Train [9][1070/3239]	Time 0.225 (0.243)	Data 0.001 (0.016)	Loss 5.5655 (5.4425)	Top-1 acc 8.203 (7.502)	Top-5 acc 18.359 (20.519)	lr 0.04484
Warmup Train [9][1080/3239]	Time 0.197 (0.243)	Data 0.001 (0.016)	Loss 5.3965 (5.4422)	Top-1 acc 6.641 (7.492)	Top-5 acc 19.922 (20.521)	lr 0.04483
Warmup Train [9][1090/3239]	Time 0.193 (0.243)	Data 0.001 (0.016)	Loss 5.4522 (5.4421)	Top-1 acc 7.812 (7.492)	Top-5 acc 19.922 (20.520)	lr 0.04483
Warmup Train [9][1100/3239]	Time 0.253 (0.243)	Data 0.002 (0.016)	Loss 5.4302 (5.4418)	Top-1 acc 7.422 (7.497)	Top-5 acc 21.094 (20.529)	lr 0.04483
Warmup Train [9][1110/3239]	Time 0.334 (0.243)	Data 0.001 (0.016)	Loss 5.3204 (5.4415)	Top-1 acc 8.203 (7.502)	Top-5 acc 21.875 (20.539)	lr 0.04482
Warmup Train [9][1120/3239]	Time 0.282 (0.243)	Data 0.001 (0.016)	Loss 5.3420 (5.4414)	Top-1 acc 7.422 (7.506)	Top-5 acc 21.875 (20.544)	lr 0.04482
Warmup Train [9][1130/3239]	Time 0.150 (0.243)	Data 0.001 (0.015)	Loss 5.4503 (5.4411)	Top-1 acc 8.984 (7.510)	Top-5 acc 26.953 (20.557)	lr 0.04482
Warmup Train [9][1140/3239]	Time 0.235 (0.242)	Data 0.001 (0.015)	Loss 5.3124 (5.4409)	Top-1 acc 7.812 (7.519)	Top-5 acc 25.000 (20.570)	lr 0.04481
Warmup Train [9][1150/3239]	Time 0.185 (0.242)	Data 0.001 (0.015)	Loss 5.5196 (5.4409)	Top-1 acc 5.078 (7.516)	Top-5 acc 19.141 (20.566)	lr 0.04481
Warmup Train [9][1160/3239]	Time 0.207 (0.242)	Data 0.001 (0.015)	Loss 5.3343 (5.4409)	Top-1 acc 8.594 (7.511)	Top-5 acc 24.609 (20.567)	lr 0.04480
Warmup Train [9][1170/3239]	Time 0.195 (0.242)	Data 0.001 (0.015)	Loss 5.4881 (5.4409)	Top-1 acc 5.859 (7.509)	Top-5 acc 19.922 (20.571)	lr 0.04480
Warmup Train [9][1180/3239]	Time 0.222 (0.242)	Data 0.002 (0.015)	Loss 5.1876 (5.4406)	Top-1 acc 9.766 (7.507)	Top-5 acc 27.734 (20.582)	lr 0.04480
Warmup Train [9][1190/3239]	Time 0.162 (0.242)	Data 0.001 (0.015)	Loss 5.4943 (5.4405)	Top-1 acc 9.375 (7.512)	Top-5 acc 21.094 (20.580)	lr 0.04479
Warmup Train [9][1200/3239]	Time 0.223 (0.241)	Data 0.025 (0.015)	Loss 5.3835 (5.4402)	Top-1 acc 8.203 (7.521)	Top-5 acc 22.656 (20.585)	lr 0.04479
Warmup Train [9][1210/3239]	Time 0.376 (0.241)	Data 0.002 (0.015)	Loss 5.6153 (5.4409)	Top-1 acc 7.031 (7.512)	Top-5 acc 16.406 (20.571)	lr 0.04479
Warmup Train [9][1220/3239]	Time 0.238 (0.241)	Data 0.001 (0.015)	Loss 5.3436 (5.4406)	Top-1 acc 7.031 (7.506)	Top-5 acc 21.094 (20.570)	lr 0.04478
Warmup Train [9][1230/3239]	Time 0.223 (0.241)	Data 0.001 (0.014)	Loss 5.5419 (5.4404)	Top-1 acc 5.859 (7.503)	Top-5 acc 20.312 (20.571)	lr 0.04478
Warmup Train [9][1240/3239]	Time 0.235 (0.241)	Data 0.001 (0.014)	Loss 5.6180 (5.4406)	Top-1 acc 4.688 (7.502)	Top-5 acc 17.188 (20.571)	lr 0.04477
Warmup Train [9][1250/3239]	Time 0.238 (0.241)	Data 0.002 (0.014)	Loss 5.4387 (5.4406)	Top-1 acc 6.250 (7.502)	Top-5 acc 19.141 (20.571)	lr 0.04477
Warmup Train [9][1260/3239]	Time 0.233 (0.241)	Data 0.001 (0.014)	Loss 5.5205 (5.4403)	Top-1 acc 7.422 (7.505)	Top-5 acc 20.703 (20.577)	lr 0.04477
Warmup Train [9][1270/3239]	Time 0.262 (0.240)	Data 0.001 (0.014)	Loss 5.3445 (5.4400)	Top-1 acc 10.938 (7.516)	Top-5 acc 23.438 (20.583)	lr 0.04476
Warmup Train [9][1280/3239]	Time 0.197 (0.240)	Data 0.002 (0.014)	Loss 5.4646 (5.4399)	Top-1 acc 5.859 (7.514)	Top-5 acc 20.703 (20.581)	lr 0.04476
Warmup Train [9][1290/3239]	Time 0.216 (0.240)	Data 0.001 (0.014)	Loss 5.3921 (5.4397)	Top-1 acc 7.031 (7.518)	Top-5 acc 21.484 (20.588)	lr 0.04476
Warmup Train [9][1300/3239]	Time 0.191 (0.240)	Data 0.001 (0.014)	Loss 5.3511 (5.4398)	Top-1 acc 7.031 (7.514)	Top-5 acc 22.266 (20.584)	lr 0.04475
Warmup Train [9][1310/3239]	Time 0.291 (0.240)	Data 0.001 (0.014)	Loss 5.4461 (5.4397)	Top-1 acc 8.203 (7.515)	Top-5 acc 19.141 (20.586)	lr 0.04475
Warmup Train [9][1320/3239]	Time 0.182 (0.240)	Data 0.001 (0.014)	Loss 5.4377 (5.4396)	Top-1 acc 4.688 (7.515)	Top-5 acc 20.312 (20.584)	lr 0.04474
Warmup Train [9][1330/3239]	Time 0.253 (0.240)	Data 0.001 (0.014)	Loss 5.4140 (5.4400)	Top-1 acc 5.859 (7.509)	Top-5 acc 18.359 (20.577)	lr 0.04474
Warmup Train [9][1340/3239]	Time 0.250 (0.240)	Data 0.001 (0.014)	Loss 5.3018 (5.4396)	Top-1 acc 10.156 (7.515)	Top-5 acc 24.219 (20.589)	lr 0.04474
Warmup Train [9][1350/3239]	Time 0.194 (0.240)	Data 0.001 (0.013)	Loss 5.4291 (5.4392)	Top-1 acc 5.469 (7.519)	Top-5 acc 20.312 (20.599)	lr 0.04473
Warmup Train [9][1360/3239]	Time 0.251 (0.240)	Data 0.001 (0.013)	Loss 5.4230 (5.4396)	Top-1 acc 8.984 (7.516)	Top-5 acc 21.094 (20.589)	lr 0.04473
Warmup Train [9][1370/3239]	Time 0.249 (0.239)	Data 0.001 (0.013)	Loss 5.4710 (5.4396)	Top-1 acc 6.641 (7.512)	Top-5 acc 19.922 (20.588)	lr 0.04473
Warmup Train [9][1380/3239]	Time 0.266 (0.239)	Data 0.001 (0.013)	Loss 5.5198 (5.4394)	Top-1 acc 6.250 (7.517)	Top-5 acc 17.578 (20.596)	lr 0.04472
Warmup Train [9][1390/3239]	Time 0.183 (0.239)	Data 0.001 (0.013)	Loss 5.4864 (5.4391)	Top-1 acc 8.203 (7.521)	Top-5 acc 23.438 (20.614)	lr 0.04472
Warmup Train [9][1400/3239]	Time 0.221 (0.239)	Data 0.001 (0.013)	Loss 5.4627 (5.4394)	Top-1 acc 6.641 (7.516)	Top-5 acc 18.359 (20.604)	lr 0.04472
Warmup Train [9][1410/3239]	Time 0.305 (0.239)	Data 0.001 (0.013)	Loss 5.5541 (5.4391)	Top-1 acc 5.859 (7.526)	Top-5 acc 17.578 (20.616)	lr 0.04471
Warmup Train [9][1420/3239]	Time 0.343 (0.239)	Data 0.001 (0.013)	Loss 5.5602 (5.4391)	Top-1 acc 6.250 (7.530)	Top-5 acc 16.016 (20.619)	lr 0.04471
Warmup Train [9][1430/3239]	Time 0.193 (0.239)	Data 0.001 (0.013)	Loss 5.3658 (5.4387)	Top-1 acc 7.031 (7.536)	Top-5 acc 25.391 (20.626)	lr 0.04470
Warmup Train [9][1440/3239]	Time 0.170 (0.239)	Data 0.002 (0.013)	Loss 5.4866 (5.4387)	Top-1 acc 6.250 (7.533)	Top-5 acc 18.750 (20.630)	lr 0.04470
Warmup Train [9][1450/3239]	Time 0.178 (0.239)	Data 0.001 (0.013)	Loss 5.5807 (5.4389)	Top-1 acc 3.906 (7.528)	Top-5 acc 16.797 (20.617)	lr 0.04470
Warmup Train [9][1460/3239]	Time 0.201 (0.239)	Data 0.001 (0.013)	Loss 5.3910 (5.4387)	Top-1 acc 7.422 (7.526)	Top-5 acc 23.828 (20.627)	lr 0.04469
Warmup Train [9][1470/3239]	Time 0.197 (0.239)	Data 0.001 (0.013)	Loss 5.3472 (5.4388)	Top-1 acc 9.375 (7.529)	Top-5 acc 21.875 (20.622)	lr 0.04469
Warmup Train [9][1480/3239]	Time 0.227 (0.238)	Data 0.001 (0.013)	Loss 5.3457 (5.4384)	Top-1 acc 5.469 (7.532)	Top-5 acc 21.484 (20.625)	lr 0.04469
Warmup Train [9][1490/3239]	Time 0.265 (0.239)	Data 0.001 (0.013)	Loss 5.3644 (5.4383)	Top-1 acc 7.422 (7.533)	Top-5 acc 23.047 (20.627)	lr 0.04468
Warmup Train [9][1500/3239]	Time 0.213 (0.238)	Data 0.001 (0.012)	Loss 5.4179 (5.4382)	Top-1 acc 7.031 (7.538)	Top-5 acc 17.188 (20.629)	lr 0.04468
Warmup Train [9][1510/3239]	Time 0.152 (0.238)	Data 0.003 (0.012)	Loss 5.5039 (5.4381)	Top-1 acc 5.078 (7.540)	Top-5 acc 19.141 (20.634)	lr 0.04467
Warmup Train [9][1520/3239]	Time 0.158 (0.238)	Data 0.001 (0.012)	Loss 5.3255 (5.4381)	Top-1 acc 8.984 (7.541)	Top-5 acc 21.094 (20.639)	lr 0.04467
Warmup Train [9][1530/3239]	Time 0.383 (0.238)	Data 0.001 (0.012)	Loss 5.3231 (5.4381)	Top-1 acc 9.766 (7.546)	Top-5 acc 21.484 (20.642)	lr 0.04467
Warmup Train [9][1540/3239]	Time 0.190 (0.238)	Data 0.001 (0.012)	Loss 5.4081 (5.4380)	Top-1 acc 7.812 (7.553)	Top-5 acc 20.703 (20.646)	lr 0.04466
Warmup Train [9][1550/3239]	Time 0.138 (0.238)	Data 0.002 (0.012)	Loss 5.3898 (5.4377)	Top-1 acc 8.203 (7.563)	Top-5 acc 22.656 (20.650)	lr 0.04466
Warmup Train [9][1560/3239]	Time 0.228 (0.238)	Data 0.002 (0.012)	Loss 5.5191 (5.4377)	Top-1 acc 5.859 (7.561)	Top-5 acc 17.969 (20.646)	lr 0.04466
Warmup Train [9][1570/3239]	Time 0.147 (0.238)	Data 0.001 (0.012)	Loss 5.5783 (5.4376)	Top-1 acc 5.078 (7.561)	Top-5 acc 20.312 (20.654)	lr 0.04465
Warmup Train [9][1580/3239]	Time 0.191 (0.238)	Data 0.001 (0.012)	Loss 5.3910 (5.4372)	Top-1 acc 7.812 (7.568)	Top-5 acc 21.094 (20.665)	lr 0.04465
Warmup Train [9][1590/3239]	Time 0.201 (0.238)	Data 0.002 (0.012)	Loss 5.3325 (5.4371)	Top-1 acc 5.469 (7.567)	Top-5 acc 22.656 (20.671)	lr 0.04464
Warmup Train [9][1600/3239]	Time 0.249 (0.237)	Data 0.001 (0.012)	Loss 5.2745 (5.4368)	Top-1 acc 10.156 (7.578)	Top-5 acc 20.703 (20.680)	lr 0.04464
Warmup Train [9][1610/3239]	Time 0.246 (0.237)	Data 0.001 (0.012)	Loss 5.2972 (5.4364)	Top-1 acc 6.250 (7.577)	Top-5 acc 25.781 (20.691)	lr 0.04464
Warmup Train [9][1620/3239]	Time 0.219 (0.237)	Data 0.001 (0.012)	Loss 5.4521 (5.4362)	Top-1 acc 7.422 (7.576)	Top-5 acc 20.312 (20.694)	lr 0.04463
Warmup Train [9][1630/3239]	Time 0.244 (0.237)	Data 0.001 (0.012)	Loss 5.3291 (5.4359)	Top-1 acc 7.812 (7.576)	Top-5 acc 23.438 (20.700)	lr 0.04463
Warmup Train [9][1640/3239]	Time 0.303 (0.237)	Data 0.001 (0.012)	Loss 5.6461 (5.4359)	Top-1 acc 5.859 (7.579)	Top-5 acc 17.578 (20.703)	lr 0.04463
Warmup Train [9][1650/3239]	Time 0.169 (0.237)	Data 0.001 (0.012)	Loss 5.4449 (5.4359)	Top-1 acc 7.422 (7.581)	Top-5 acc 20.703 (20.705)	lr 0.04462
Warmup Train [9][1660/3239]	Time 0.225 (0.237)	Data 0.001 (0.012)	Loss 5.3082 (5.4358)	Top-1 acc 9.766 (7.580)	Top-5 acc 22.266 (20.705)	lr 0.04462
Warmup Train [9][1670/3239]	Time 0.210 (0.237)	Data 0.001 (0.012)	Loss 5.4328 (5.4357)	Top-1 acc 8.203 (7.583)	Top-5 acc 22.266 (20.713)	lr 0.04461
Warmup Train [9][1680/3239]	Time 0.291 (0.237)	Data 0.001 (0.012)	Loss 5.4272 (5.4355)	Top-1 acc 6.250 (7.585)	Top-5 acc 17.969 (20.716)	lr 0.04461
Warmup Train [9][1690/3239]	Time 0.249 (0.237)	Data 0.001 (0.011)	Loss 5.4930 (5.4356)	Top-1 acc 7.422 (7.585)	Top-5 acc 18.750 (20.711)	lr 0.04461
Warmup Train [9][1700/3239]	Time 0.185 (0.237)	Data 0.001 (0.011)	Loss 5.2636 (5.4352)	Top-1 acc 9.766 (7.591)	Top-5 acc 23.438 (20.723)	lr 0.04460
Warmup Train [9][1710/3239]	Time 0.150 (0.237)	Data 0.001 (0.011)	Loss 5.2705 (5.4351)	Top-1 acc 8.984 (7.591)	Top-5 acc 23.047 (20.724)	lr 0.04460
Warmup Train [9][1720/3239]	Time 0.221 (0.237)	Data 0.001 (0.011)	Loss 5.2671 (5.4346)	Top-1 acc 6.250 (7.597)	Top-5 acc 25.781 (20.739)	lr 0.04460
Warmup Train [9][1730/3239]	Time 0.235 (0.237)	Data 0.001 (0.011)	Loss 5.3770 (5.4345)	Top-1 acc 7.812 (7.598)	Top-5 acc 23.438 (20.743)	lr 0.04459
Warmup Train [9][1740/3239]	Time 0.388 (0.237)	Data 0.002 (0.011)	Loss 5.4329 (5.4342)	Top-1 acc 7.422 (7.600)	Top-5 acc 19.922 (20.754)	lr 0.04459
Warmup Train [9][1750/3239]	Time 0.204 (0.237)	Data 0.001 (0.011)	Loss 5.2830 (5.4337)	Top-1 acc 10.156 (7.602)	Top-5 acc 22.656 (20.762)	lr 0.04458
Warmup Train [9][1760/3239]	Time 0.258 (0.237)	Data 0.001 (0.011)	Loss 5.3708 (5.4336)	Top-1 acc 8.594 (7.604)	Top-5 acc 20.312 (20.764)	lr 0.04458
Warmup Train [9][1770/3239]	Time 0.206 (0.236)	Data 0.001 (0.011)	Loss 5.3124 (5.4334)	Top-1 acc 9.375 (7.607)	Top-5 acc 24.609 (20.769)	lr 0.04458
Warmup Train [9][1780/3239]	Time 0.237 (0.236)	Data 0.001 (0.011)	Loss 5.3360 (5.4331)	Top-1 acc 6.250 (7.612)	Top-5 acc 22.266 (20.779)	lr 0.04457
Warmup Train [9][1790/3239]	Time 0.155 (0.236)	Data 0.001 (0.011)	Loss 5.5886 (5.4330)	Top-1 acc 4.688 (7.611)	Top-5 acc 16.797 (20.781)	lr 0.04457
Warmup Train [9][1800/3239]	Time 0.183 (0.236)	Data 0.001 (0.011)	Loss 5.2944 (5.4327)	Top-1 acc 10.156 (7.617)	Top-5 acc 26.172 (20.789)	lr 0.04457
Warmup Train [9][1810/3239]	Time 0.191 (0.236)	Data 0.001 (0.011)	Loss 5.4236 (5.4326)	Top-1 acc 8.984 (7.624)	Top-5 acc 19.531 (20.796)	lr 0.04456
Warmup Train [9][1820/3239]	Time 0.176 (0.236)	Data 0.001 (0.011)	Loss 5.3983 (5.4323)	Top-1 acc 7.812 (7.626)	Top-5 acc 22.266 (20.802)	lr 0.04456
Warmup Train [9][1830/3239]	Time 0.159 (0.236)	Data 0.001 (0.011)	Loss 5.3936 (5.4320)	Top-1 acc 7.422 (7.628)	Top-5 acc 25.391 (20.809)	lr 0.04455
Warmup Train [9][1840/3239]	Time 0.233 (0.236)	Data 0.002 (0.011)	Loss 5.4831 (5.4319)	Top-1 acc 7.031 (7.630)	Top-5 acc 19.922 (20.813)	lr 0.04455
Warmup Train [9][1850/3239]	Time 0.327 (0.236)	Data 0.001 (0.011)	Loss 5.5226 (5.4319)	Top-1 acc 8.984 (7.631)	Top-5 acc 20.312 (20.812)	lr 0.04455
Warmup Train [9][1860/3239]	Time 0.339 (0.236)	Data 0.002 (0.011)	Loss 5.3974 (5.4315)	Top-1 acc 8.594 (7.638)	Top-5 acc 21.094 (20.825)	lr 0.04454
Warmup Train [9][1870/3239]	Time 0.160 (0.236)	Data 0.001 (0.011)	Loss 5.1687 (5.4311)	Top-1 acc 9.766 (7.644)	Top-5 acc 25.781 (20.828)	lr 0.04454
Warmup Train [9][1880/3239]	Time 0.213 (0.236)	Data 0.001 (0.011)	Loss 5.4180 (5.4309)	Top-1 acc 7.812 (7.647)	Top-5 acc 21.484 (20.836)	lr 0.04453
Warmup Train [9][1890/3239]	Time 0.220 (0.236)	Data 0.001 (0.011)	Loss 5.1906 (5.4304)	Top-1 acc 9.766 (7.652)	Top-5 acc 29.688 (20.851)	lr 0.04453
Warmup Train [9][1900/3239]	Time 0.222 (0.236)	Data 0.001 (0.011)	Loss 5.4742 (5.4304)	Top-1 acc 5.859 (7.647)	Top-5 acc 19.531 (20.852)	lr 0.04453
Warmup Train [9][1910/3239]	Time 0.234 (0.236)	Data 0.001 (0.011)	Loss 5.4449 (5.4303)	Top-1 acc 8.203 (7.647)	Top-5 acc 22.656 (20.855)	lr 0.04452
Warmup Train [9][1920/3239]	Time 0.157 (0.236)	Data 0.001 (0.011)	Loss 5.5188 (5.4303)	Top-1 acc 7.422 (7.649)	Top-5 acc 20.703 (20.855)	lr 0.04452
Warmup Train [9][1930/3239]	Time 0.252 (0.236)	Data 0.001 (0.011)	Loss 5.2814 (5.4301)	Top-1 acc 11.328 (7.654)	Top-5 acc 24.219 (20.862)	lr 0.04452
Warmup Train [9][1940/3239]	Time 0.229 (0.236)	Data 0.001 (0.010)	Loss 5.3655 (5.4299)	Top-1 acc 6.250 (7.653)	Top-5 acc 17.969 (20.864)	lr 0.04451
Warmup Train [9][1950/3239]	Time 0.359 (0.236)	Data 0.001 (0.010)	Loss 5.3227 (5.4299)	Top-1 acc 7.031 (7.652)	Top-5 acc 21.484 (20.863)	lr 0.04451
Warmup Train [9][1960/3239]	Time 0.217 (0.236)	Data 0.001 (0.010)	Loss 5.3885 (5.4299)	Top-1 acc 10.547 (7.655)	Top-5 acc 23.047 (20.867)	lr 0.04450
Warmup Train [9][1970/3239]	Time 0.220 (0.235)	Data 0.001 (0.010)	Loss 5.3953 (5.4296)	Top-1 acc 8.203 (7.660)	Top-5 acc 20.312 (20.869)	lr 0.04450
Warmup Train [9][1980/3239]	Time 0.226 (0.235)	Data 0.001 (0.010)	Loss 5.4105 (5.4295)	Top-1 acc 7.031 (7.661)	Top-5 acc 22.266 (20.870)	lr 0.04450
Warmup Train [9][1990/3239]	Time 0.244 (0.235)	Data 0.001 (0.010)	Loss 5.3963 (5.4292)	Top-1 acc 7.031 (7.661)	Top-5 acc 18.359 (20.875)	lr 0.04449
Warmup Train [9][2000/3239]	Time 0.241 (0.235)	Data 0.001 (0.010)	Loss 5.5434 (5.4291)	Top-1 acc 7.031 (7.658)	Top-5 acc 16.406 (20.876)	lr 0.04449
Warmup Train [9][2010/3239]	Time 0.237 (0.235)	Data 0.001 (0.010)	Loss 5.4730 (5.4289)	Top-1 acc 6.250 (7.662)	Top-5 acc 21.094 (20.884)	lr 0.04449
Warmup Train [9][2020/3239]	Time 0.178 (0.235)	Data 0.001 (0.010)	Loss 5.4971 (5.4290)	Top-1 acc 8.984 (7.662)	Top-5 acc 19.141 (20.883)	lr 0.04448
Warmup Train [9][2030/3239]	Time 0.268 (0.235)	Data 0.001 (0.010)	Loss 5.5573 (5.4288)	Top-1 acc 4.688 (7.664)	Top-5 acc 17.969 (20.884)	lr 0.04448
Warmup Train [9][2040/3239]	Time 0.172 (0.235)	Data 0.001 (0.010)	Loss 5.1910 (5.4287)	Top-1 acc 9.766 (7.662)	Top-5 acc 22.656 (20.888)	lr 0.04447
Warmup Train [9][2050/3239]	Time 0.148 (0.235)	Data 0.001 (0.010)	Loss 5.4490 (5.4284)	Top-1 acc 7.422 (7.659)	Top-5 acc 20.703 (20.888)	lr 0.04447
Warmup Train [9][2060/3239]	Time 0.364 (0.235)	Data 0.001 (0.010)	Loss 5.4314 (5.4281)	Top-1 acc 10.938 (7.665)	Top-5 acc 23.047 (20.899)	lr 0.04447
Warmup Train [9][2070/3239]	Time 0.291 (0.235)	Data 0.001 (0.010)	Loss 5.5105 (5.4280)	Top-1 acc 7.422 (7.669)	Top-5 acc 17.578 (20.901)	lr 0.04446
Warmup Train [9][2080/3239]	Time 0.231 (0.235)	Data 0.001 (0.010)	Loss 5.4605 (5.4280)	Top-1 acc 5.078 (7.674)	Top-5 acc 17.969 (20.901)	lr 0.04446
Warmup Train [9][2090/3239]	Time 0.215 (0.235)	Data 0.001 (0.010)	Loss 5.2474 (5.4278)	Top-1 acc 6.250 (7.676)	Top-5 acc 24.219 (20.900)	lr 0.04446
Warmup Train [9][2100/3239]	Time 0.248 (0.235)	Data 0.001 (0.010)	Loss 5.3686 (5.4275)	Top-1 acc 6.641 (7.676)	Top-5 acc 18.359 (20.902)	lr 0.04445
Warmup Train [9][2110/3239]	Time 0.243 (0.235)	Data 0.001 (0.010)	Loss 5.5303 (5.4274)	Top-1 acc 6.641 (7.679)	Top-5 acc 18.750 (20.909)	lr 0.04445
Warmup Train [9][2120/3239]	Time 0.214 (0.235)	Data 0.001 (0.010)	Loss 5.2896 (5.4273)	Top-1 acc 9.375 (7.680)	Top-5 acc 23.438 (20.908)	lr 0.04444
Warmup Train [9][2130/3239]	Time 0.226 (0.235)	Data 0.001 (0.010)	Loss 5.3466 (5.4274)	Top-1 acc 8.594 (7.681)	Top-5 acc 21.875 (20.909)	lr 0.04444
Warmup Train [9][2140/3239]	Time 0.221 (0.235)	Data 0.001 (0.010)	Loss 5.1941 (5.4272)	Top-1 acc 10.156 (7.684)	Top-5 acc 28.125 (20.912)	lr 0.04444
Warmup Train [9][2150/3239]	Time 0.217 (0.234)	Data 0.001 (0.010)	Loss 5.4813 (5.4270)	Top-1 acc 10.547 (7.692)	Top-5 acc 22.656 (20.926)	lr 0.04443
Warmup Train [9][2160/3239]	Time 0.235 (0.234)	Data 0.001 (0.010)	Loss 5.4454 (5.4266)	Top-1 acc 6.641 (7.694)	Top-5 acc 20.703 (20.933)	lr 0.04443
Warmup Train [9][2170/3239]	Time 0.304 (0.234)	Data 0.001 (0.010)	Loss 5.3755 (5.4264)	Top-1 acc 7.422 (7.694)	Top-5 acc 22.656 (20.932)	lr 0.04442
Warmup Train [9][2180/3239]	Time 0.262 (0.234)	Data 0.001 (0.010)	Loss 5.4817 (5.4262)	Top-1 acc 6.641 (7.698)	Top-5 acc 19.141 (20.937)	lr 0.04442
Warmup Train [9][2190/3239]	Time 0.221 (0.234)	Data 0.001 (0.010)	Loss 5.3300 (5.4260)	Top-1 acc 7.422 (7.699)	Top-5 acc 21.484 (20.938)	lr 0.04442
Warmup Train [9][2200/3239]	Time 0.209 (0.234)	Data 0.001 (0.010)	Loss 5.2758 (5.4256)	Top-1 acc 10.156 (7.707)	Top-5 acc 25.391 (20.946)	lr 0.04441
Warmup Train [9][2210/3239]	Time 0.178 (0.234)	Data 0.001 (0.010)	Loss 5.2392 (5.4253)	Top-1 acc 9.766 (7.711)	Top-5 acc 26.172 (20.956)	lr 0.04441
Warmup Train [9][2220/3239]	Time 0.177 (0.234)	Data 0.001 (0.010)	Loss 5.5196 (5.4252)	Top-1 acc 6.641 (7.706)	Top-5 acc 17.188 (20.955)	lr 0.04441
Warmup Train [9][2230/3239]	Time 0.245 (0.234)	Data 0.001 (0.010)	Loss 5.4382 (5.4252)	Top-1 acc 5.078 (7.705)	Top-5 acc 22.656 (20.960)	lr 0.04440
Warmup Train [9][2240/3239]	Time 0.199 (0.234)	Data 0.001 (0.009)	Loss 5.3279 (5.4252)	Top-1 acc 8.203 (7.705)	Top-5 acc 21.094 (20.957)	lr 0.04440
Warmup Train [9][2250/3239]	Time 0.200 (0.234)	Data 0.001 (0.009)	Loss 5.5817 (5.4252)	Top-1 acc 2.734 (7.706)	Top-5 acc 14.453 (20.960)	lr 0.04439
Warmup Train [9][2260/3239]	Time 0.224 (0.234)	Data 0.001 (0.009)	Loss 5.3730 (5.4249)	Top-1 acc 7.422 (7.710)	Top-5 acc 22.656 (20.965)	lr 0.04439
Warmup Train [9][2270/3239]	Time 0.215 (0.234)	Data 0.002 (0.009)	Loss 5.4256 (5.4250)	Top-1 acc 8.203 (7.707)	Top-5 acc 18.750 (20.964)	lr 0.04439
Warmup Train [9][2280/3239]	Time 0.281 (0.234)	Data 0.001 (0.009)	Loss 5.5321 (5.4249)	Top-1 acc 7.031 (7.710)	Top-5 acc 20.703 (20.968)	lr 0.04438
Warmup Train [9][2290/3239]	Time 0.196 (0.234)	Data 0.001 (0.009)	Loss 5.4117 (5.4247)	Top-1 acc 7.422 (7.712)	Top-5 acc 22.656 (20.972)	lr 0.04438
Warmup Train [9][2300/3239]	Time 0.180 (0.234)	Data 0.003 (0.009)	Loss 5.3411 (5.4245)	Top-1 acc 10.156 (7.715)	Top-5 acc 24.219 (20.979)	lr 0.04438
Warmup Train [9][2310/3239]	Time 0.185 (0.234)	Data 0.001 (0.009)	Loss 5.3054 (5.4243)	Top-1 acc 10.938 (7.714)	Top-5 acc 25.781 (20.981)	lr 0.04437
Warmup Train [9][2320/3239]	Time 0.256 (0.234)	Data 0.001 (0.009)	Loss 5.3591 (5.4241)	Top-1 acc 8.203 (7.720)	Top-5 acc 24.219 (20.987)	lr 0.04437
Warmup Train [9][2330/3239]	Time 0.162 (0.233)	Data 0.001 (0.009)	Loss 5.2348 (5.4239)	Top-1 acc 12.109 (7.723)	Top-5 acc 25.781 (20.992)	lr 0.04436
Warmup Train [9][2340/3239]	Time 0.175 (0.233)	Data 0.001 (0.009)	Loss 5.3533 (5.4237)	Top-1 acc 8.984 (7.724)	Top-5 acc 22.656 (20.989)	lr 0.04436
Warmup Train [9][2350/3239]	Time 0.249 (0.233)	Data 0.002 (0.009)	Loss 5.4077 (5.4236)	Top-1 acc 7.812 (7.724)	Top-5 acc 21.094 (20.992)	lr 0.04436
Warmup Train [9][2360/3239]	Time 0.204 (0.233)	Data 0.001 (0.009)	Loss 5.3088 (5.4235)	Top-1 acc 5.859 (7.724)	Top-5 acc 20.703 (20.992)	lr 0.04435
Warmup Train [9][2370/3239]	Time 0.254 (0.233)	Data 0.027 (0.009)	Loss 5.3658 (5.4232)	Top-1 acc 6.250 (7.727)	Top-5 acc 17.188 (20.991)	lr 0.04435
Warmup Train [9][2380/3239]	Time 0.334 (0.233)	Data 0.001 (0.009)	Loss 5.3658 (5.4231)	Top-1 acc 9.375 (7.729)	Top-5 acc 21.875 (20.991)	lr 0.04434
Warmup Train [9][2390/3239]	Time 0.210 (0.233)	Data 0.001 (0.009)	Loss 5.4324 (5.4230)	Top-1 acc 7.812 (7.731)	Top-5 acc 21.094 (20.997)	lr 0.04434
Warmup Train [9][2400/3239]	Time 0.268 (0.233)	Data 0.002 (0.009)	Loss 5.3289 (5.4228)	Top-1 acc 8.594 (7.733)	Top-5 acc 22.656 (21.006)	lr 0.04434
Warmup Train [9][2410/3239]	Time 0.226 (0.233)	Data 0.001 (0.009)	Loss 5.3817 (5.4227)	Top-1 acc 8.203 (7.734)	Top-5 acc 18.359 (21.003)	lr 0.04433
Warmup Train [9][2420/3239]	Time 0.222 (0.233)	Data 0.001 (0.009)	Loss 5.3542 (5.4225)	Top-1 acc 8.984 (7.738)	Top-5 acc 21.094 (21.011)	lr 0.04433
Warmup Train [9][2430/3239]	Time 0.165 (0.233)	Data 0.001 (0.009)	Loss 5.4827 (5.4223)	Top-1 acc 7.031 (7.739)	Top-5 acc 20.703 (21.008)	lr 0.04433
Warmup Train [9][2440/3239]	Time 0.233 (0.233)	Data 0.001 (0.009)	Loss 5.2879 (5.4221)	Top-1 acc 10.547 (7.743)	Top-5 acc 25.391 (21.008)	lr 0.04432
Warmup Train [9][2450/3239]	Time 0.209 (0.233)	Data 0.001 (0.009)	Loss 5.2655 (5.4219)	Top-1 acc 9.375 (7.744)	Top-5 acc 25.781 (21.014)	lr 0.04432
Warmup Train [9][2460/3239]	Time 0.247 (0.233)	Data 0.001 (0.009)	Loss 5.3094 (5.4217)	Top-1 acc 7.031 (7.747)	Top-5 acc 22.266 (21.020)	lr 0.04431
Warmup Train [9][2470/3239]	Time 0.268 (0.233)	Data 0.002 (0.009)	Loss 5.3617 (5.4215)	Top-1 acc 7.031 (7.748)	Top-5 acc 26.562 (21.020)	lr 0.04431
Warmup Train [9][2480/3239]	Time 0.275 (0.233)	Data 0.001 (0.009)	Loss 5.4157 (5.4213)	Top-1 acc 7.031 (7.751)	Top-5 acc 19.922 (21.025)	lr 0.04431
Warmup Train [9][2490/3239]	Time 0.229 (0.233)	Data 0.001 (0.009)	Loss 5.3817 (5.4211)	Top-1 acc 7.812 (7.754)	Top-5 acc 23.047 (21.033)	lr 0.04430
Warmup Train [9][2500/3239]	Time 0.269 (0.233)	Data 0.001 (0.009)	Loss 5.2955 (5.4209)	Top-1 acc 10.156 (7.755)	Top-5 acc 26.562 (21.038)	lr 0.04430
Warmup Train [9][2510/3239]	Time 0.234 (0.233)	Data 0.001 (0.009)	Loss 5.3721 (5.4208)	Top-1 acc 10.547 (7.756)	Top-5 acc 25.000 (21.041)	lr 0.04429
Warmup Train [9][2520/3239]	Time 0.268 (0.233)	Data 0.001 (0.009)	Loss 5.3204 (5.4207)	Top-1 acc 8.203 (7.759)	Top-5 acc 23.828 (21.042)	lr 0.04429
Warmup Train [9][2530/3239]	Time 0.199 (0.233)	Data 0.001 (0.009)	Loss 5.4061 (5.4206)	Top-1 acc 8.984 (7.760)	Top-5 acc 20.703 (21.045)	lr 0.04429
Warmup Train [9][2540/3239]	Time 0.196 (0.233)	Data 0.001 (0.009)	Loss 5.3420 (5.4205)	Top-1 acc 9.766 (7.763)	Top-5 acc 20.703 (21.047)	lr 0.04428
Warmup Train [9][2550/3239]	Time 0.215 (0.233)	Data 0.001 (0.009)	Loss 5.3616 (5.4204)	Top-1 acc 7.422 (7.763)	Top-5 acc 21.094 (21.048)	lr 0.04428
Warmup Train [9][2560/3239]	Time 0.234 (0.233)	Data 0.001 (0.009)	Loss 5.3970 (5.4202)	Top-1 acc 7.812 (7.767)	Top-5 acc 21.875 (21.051)	lr 0.04428
Warmup Train [9][2570/3239]	Time 0.229 (0.233)	Data 0.001 (0.009)	Loss 5.3825 (5.4200)	Top-1 acc 9.375 (7.772)	Top-5 acc 24.609 (21.062)	lr 0.04427
Warmup Train [9][2580/3239]	Time 0.383 (0.233)	Data 0.001 (0.009)	Loss 5.3243 (5.4198)	Top-1 acc 8.203 (7.775)	Top-5 acc 23.047 (21.068)	lr 0.04427
Warmup Train [9][2590/3239]	Time 0.219 (0.233)	Data 0.001 (0.009)	Loss 5.4211 (5.4195)	Top-1 acc 8.984 (7.777)	Top-5 acc 22.656 (21.074)	lr 0.04426
Warmup Train [9][2600/3239]	Time 0.234 (0.233)	Data 0.001 (0.009)	Loss 5.4178 (5.4195)	Top-1 acc 6.641 (7.776)	Top-5 acc 21.484 (21.072)	lr 0.04426
Warmup Train [9][2610/3239]	Time 0.219 (0.233)	Data 0.001 (0.009)	Loss 5.2107 (5.4194)	Top-1 acc 10.938 (7.779)	Top-5 acc 27.344 (21.080)	lr 0.04426
Warmup Train [9][2620/3239]	Time 0.192 (0.232)	Data 0.001 (0.009)	Loss 5.3919 (5.4192)	Top-1 acc 7.812 (7.782)	Top-5 acc 21.094 (21.082)	lr 0.04425
Warmup Train [9][2630/3239]	Time 0.216 (0.233)	Data 0.001 (0.008)	Loss 5.3751 (5.4190)	Top-1 acc 7.031 (7.788)	Top-5 acc 22.656 (21.087)	lr 0.04425
Warmup Train [9][2640/3239]	Time 0.207 (0.232)	Data 0.002 (0.008)	Loss 5.4037 (5.4188)	Top-1 acc 8.984 (7.792)	Top-5 acc 19.922 (21.094)	lr 0.04424
Warmup Train [9][2650/3239]	Time 0.143 (0.232)	Data 0.001 (0.008)	Loss 5.4314 (5.4185)	Top-1 acc 7.422 (7.797)	Top-5 acc 21.484 (21.103)	lr 0.04424
Warmup Train [9][2660/3239]	Time 0.275 (0.232)	Data 0.001 (0.008)	Loss 5.3465 (5.4184)	Top-1 acc 7.031 (7.797)	Top-5 acc 23.828 (21.105)	lr 0.04424
Warmup Train [9][2670/3239]	Time 0.251 (0.232)	Data 0.001 (0.008)	Loss 5.3428 (5.4182)	Top-1 acc 8.984 (7.795)	Top-5 acc 23.047 (21.104)	lr 0.04423
Warmup Train [9][2680/3239]	Time 0.201 (0.232)	Data 0.001 (0.008)	Loss 5.4697 (5.4180)	Top-1 acc 6.250 (7.797)	Top-5 acc 22.656 (21.108)	lr 0.04423
Warmup Train [9][2690/3239]	Time 0.300 (0.232)	Data 0.001 (0.008)	Loss 5.3968 (5.4180)	Top-1 acc 8.984 (7.798)	Top-5 acc 19.141 (21.106)	lr 0.04422
Warmup Train [9][2700/3239]	Time 0.359 (0.232)	Data 0.001 (0.008)	Loss 5.4394 (5.4177)	Top-1 acc 7.812 (7.799)	Top-5 acc 21.484 (21.112)	lr 0.04422
Warmup Train [9][2710/3239]	Time 0.200 (0.232)	Data 0.001 (0.008)	Loss 5.4064 (5.4175)	Top-1 acc 4.688 (7.801)	Top-5 acc 17.969 (21.118)	lr 0.04422
Warmup Train [9][2720/3239]	Time 0.176 (0.232)	Data 0.001 (0.008)	Loss 5.3536 (5.4174)	Top-1 acc 9.766 (7.804)	Top-5 acc 25.391 (21.124)	lr 0.04421
Warmup Train [9][2730/3239]	Time 0.230 (0.232)	Data 0.002 (0.008)	Loss 5.4346 (5.4173)	Top-1 acc 6.641 (7.807)	Top-5 acc 23.047 (21.131)	lr 0.04421
Warmup Train [9][2740/3239]	Time 0.212 (0.232)	Data 0.001 (0.008)	Loss 5.2996 (5.4170)	Top-1 acc 7.422 (7.811)	Top-5 acc 23.828 (21.136)	lr 0.04421
Warmup Train [9][2750/3239]	Time 0.166 (0.232)	Data 0.001 (0.008)	Loss 5.5704 (5.4170)	Top-1 acc 7.422 (7.810)	Top-5 acc 21.484 (21.138)	lr 0.04420
Warmup Train [9][2760/3239]	Time 0.245 (0.232)	Data 0.025 (0.008)	Loss 5.4390 (5.4168)	Top-1 acc 5.078 (7.810)	Top-5 acc 20.703 (21.142)	lr 0.04420
Warmup Train [9][2770/3239]	Time 0.227 (0.232)	Data 0.001 (0.008)	Loss 5.3300 (5.4166)	Top-1 acc 12.109 (7.815)	Top-5 acc 23.438 (21.150)	lr 0.04419
Warmup Train [9][2780/3239]	Time 0.234 (0.232)	Data 0.001 (0.008)	Loss 5.2253 (5.4163)	Top-1 acc 11.328 (7.816)	Top-5 acc 22.656 (21.150)	lr 0.04419
Warmup Train [9][2790/3239]	Time 0.351 (0.232)	Data 0.001 (0.008)	Loss 5.2844 (5.4162)	Top-1 acc 12.109 (7.821)	Top-5 acc 29.688 (21.159)	lr 0.04419
Warmup Train [9][2800/3239]	Time 0.162 (0.232)	Data 0.001 (0.008)	Loss 5.3676 (5.4159)	Top-1 acc 7.812 (7.822)	Top-5 acc 22.656 (21.161)	lr 0.04418
Warmup Train [9][2810/3239]	Time 0.208 (0.232)	Data 0.001 (0.008)	Loss 5.3914 (5.4157)	Top-1 acc 6.641 (7.824)	Top-5 acc 22.656 (21.164)	lr 0.04418
Warmup Train [9][2820/3239]	Time 0.211 (0.232)	Data 0.001 (0.008)	Loss 5.3979 (5.4156)	Top-1 acc 8.203 (7.826)	Top-5 acc 24.609 (21.170)	lr 0.04417
Warmup Train [9][2830/3239]	Time 0.245 (0.232)	Data 0.001 (0.008)	Loss 5.4342 (5.4155)	Top-1 acc 8.203 (7.827)	Top-5 acc 21.094 (21.168)	lr 0.04417
Warmup Train [9][2840/3239]	Time 0.144 (0.232)	Data 0.001 (0.008)	Loss 5.2954 (5.4153)	Top-1 acc 5.859 (7.827)	Top-5 acc 18.750 (21.169)	lr 0.04417
Warmup Train [9][2850/3239]	Time 0.289 (0.232)	Data 0.001 (0.008)	Loss 5.4249 (5.4152)	Top-1 acc 9.375 (7.826)	Top-5 acc 23.828 (21.172)	lr 0.04416
Warmup Train [9][2860/3239]	Time 0.190 (0.232)	Data 0.001 (0.008)	Loss 5.3506 (5.4151)	Top-1 acc 10.938 (7.829)	Top-5 acc 26.562 (21.177)	lr 0.04416
Warmup Train [9][2870/3239]	Time 0.148 (0.232)	Data 0.001 (0.008)	Loss 5.4442 (5.4150)	Top-1 acc 7.812 (7.831)	Top-5 acc 19.141 (21.181)	lr 0.04416
Warmup Train [9][2880/3239]	Time 0.132 (0.232)	Data 0.001 (0.008)	Loss 5.2104 (5.4148)	Top-1 acc 8.594 (7.832)	Top-5 acc 26.172 (21.185)	lr 0.04415
Warmup Train [9][2890/3239]	Time 0.184 (0.232)	Data 0.001 (0.008)	Loss 5.3526 (5.4146)	Top-1 acc 7.812 (7.834)	Top-5 acc 23.828 (21.192)	lr 0.04415
Warmup Train [9][2900/3239]	Time 0.170 (0.232)	Data 0.001 (0.008)	Loss 5.3730 (5.4145)	Top-1 acc 10.156 (7.836)	Top-5 acc 22.266 (21.198)	lr 0.04414
Warmup Train [9][2910/3239]	Time 0.215 (0.232)	Data 0.001 (0.008)	Loss 5.4372 (5.4144)	Top-1 acc 9.375 (7.836)	Top-5 acc 22.266 (21.200)	lr 0.04414
Warmup Train [9][2920/3239]	Time 0.404 (0.232)	Data 0.001 (0.008)	Loss 5.4463 (5.4145)	Top-1 acc 5.469 (7.835)	Top-5 acc 19.531 (21.198)	lr 0.04414
Warmup Train [9][2930/3239]	Time 0.183 (0.232)	Data 0.001 (0.008)	Loss 5.4374 (5.4145)	Top-1 acc 8.203 (7.838)	Top-5 acc 20.703 (21.200)	lr 0.04413
Warmup Train [9][2940/3239]	Time 0.271 (0.232)	Data 0.001 (0.008)	Loss 5.2956 (5.4144)	Top-1 acc 8.984 (7.840)	Top-5 acc 24.219 (21.203)	lr 0.04413
Warmup Train [9][2950/3239]	Time 0.221 (0.232)	Data 0.001 (0.008)	Loss 5.3911 (5.4142)	Top-1 acc 7.812 (7.842)	Top-5 acc 17.969 (21.208)	lr 0.04412
Warmup Train [9][2960/3239]	Time 0.138 (0.232)	Data 0.001 (0.008)	Loss 5.4628 (5.4141)	Top-1 acc 10.938 (7.844)	Top-5 acc 18.750 (21.210)	lr 0.04412
Warmup Train [9][2970/3239]	Time 0.151 (0.232)	Data 0.001 (0.008)	Loss 5.2897 (5.4140)	Top-1 acc 8.594 (7.848)	Top-5 acc 25.000 (21.215)	lr 0.04412
Warmup Train [9][2980/3239]	Time 0.207 (0.232)	Data 0.001 (0.008)	Loss 5.4124 (5.4138)	Top-1 acc 7.031 (7.849)	Top-5 acc 21.484 (21.218)	lr 0.04411
Warmup Train [9][2990/3239]	Time 0.223 (0.232)	Data 0.002 (0.008)	Loss 5.4507 (5.4136)	Top-1 acc 8.594 (7.851)	Top-5 acc 20.312 (21.222)	lr 0.04411
Warmup Train [9][3000/3239]	Time 0.228 (0.232)	Data 0.001 (0.008)	Loss 5.2318 (5.4134)	Top-1 acc 10.547 (7.852)	Top-5 acc 26.562 (21.226)	lr 0.04410
Warmup Train [9][3010/3239]	Time 0.182 (0.231)	Data 0.001 (0.008)	Loss 5.3194 (5.4132)	Top-1 acc 9.766 (7.855)	Top-5 acc 22.656 (21.227)	lr 0.04410
Warmup Train [9][3020/3239]	Time 0.305 (0.232)	Data 0.001 (0.008)	Loss 5.4269 (5.4130)	Top-1 acc 7.812 (7.856)	Top-5 acc 20.703 (21.227)	lr 0.04410
Warmup Train [9][3030/3239]	Time 0.174 (0.231)	Data 0.001 (0.008)	Loss 5.3026 (5.4130)	Top-1 acc 9.766 (7.856)	Top-5 acc 26.953 (21.229)	lr 0.04409
Warmup Train [9][3040/3239]	Time 0.287 (0.231)	Data 0.002 (0.008)	Loss 5.4326 (5.4129)	Top-1 acc 6.250 (7.859)	Top-5 acc 21.484 (21.236)	lr 0.04409
Warmup Train [9][3050/3239]	Time 0.193 (0.231)	Data 0.001 (0.008)	Loss 5.4198 (5.4128)	Top-1 acc 7.422 (7.862)	Top-5 acc 20.312 (21.239)	lr 0.04408
Warmup Train [9][3060/3239]	Time 0.203 (0.231)	Data 0.001 (0.008)	Loss 5.2201 (5.4127)	Top-1 acc 11.719 (7.864)	Top-5 acc 26.172 (21.242)	lr 0.04408
Warmup Train [9][3070/3239]	Time 0.156 (0.231)	Data 0.001 (0.008)	Loss 5.2773 (5.4126)	Top-1 acc 9.375 (7.866)	Top-5 acc 23.828 (21.245)	lr 0.04408
Warmup Train [9][3080/3239]	Time 0.206 (0.231)	Data 0.001 (0.008)	Loss 5.4130 (5.4125)	Top-1 acc 9.766 (7.865)	Top-5 acc 22.266 (21.248)	lr 0.04407
Warmup Train [9][3090/3239]	Time 0.179 (0.231)	Data 0.001 (0.008)	Loss 5.4175 (5.4124)	Top-1 acc 7.812 (7.865)	Top-5 acc 19.531 (21.249)	lr 0.04407
Warmup Train [9][3100/3239]	Time 0.167 (0.231)	Data 0.001 (0.008)	Loss 5.3457 (5.4122)	Top-1 acc 8.594 (7.866)	Top-5 acc 21.484 (21.255)	lr 0.04407
Warmup Train [9][3110/3239]	Time 0.214 (0.231)	Data 0.002 (0.008)	Loss 5.3413 (5.4120)	Top-1 acc 10.156 (7.869)	Top-5 acc 21.875 (21.262)	lr 0.04406
Warmup Train [9][3120/3239]	Time 0.297 (0.231)	Data 0.001 (0.008)	Loss 5.3775 (5.4119)	Top-1 acc 8.984 (7.869)	Top-5 acc 19.531 (21.265)	lr 0.04406
Warmup Train [9][3130/3239]	Time 0.206 (0.231)	Data 0.001 (0.008)	Loss 5.4264 (5.4119)	Top-1 acc 7.031 (7.867)	Top-5 acc 20.312 (21.261)	lr 0.04405
Warmup Train [9][3140/3239]	Time 0.211 (0.231)	Data 0.002 (0.008)	Loss 5.5777 (5.4119)	Top-1 acc 5.078 (7.867)	Top-5 acc 19.531 (21.259)	lr 0.04405
Warmup Train [9][3150/3239]	Time 0.187 (0.231)	Data 0.001 (0.008)	Loss 5.3883 (5.4116)	Top-1 acc 12.109 (7.872)	Top-5 acc 24.219 (21.267)	lr 0.04405
Warmup Train [9][3160/3239]	Time 0.209 (0.231)	Data 0.001 (0.008)	Loss 5.3876 (5.4115)	Top-1 acc 10.938 (7.876)	Top-5 acc 23.047 (21.268)	lr 0.04404
Warmup Train [9][3170/3239]	Time 0.171 (0.231)	Data 0.001 (0.008)	Loss 5.5681 (5.4113)	Top-1 acc 5.469 (7.877)	Top-5 acc 17.188 (21.271)	lr 0.04404
Warmup Train [9][3180/3239]	Time 0.225 (0.231)	Data 0.000 (0.008)	Loss 5.2683 (5.4111)	Top-1 acc 9.375 (7.879)	Top-5 acc 23.047 (21.275)	lr 0.04403
Warmup Train [9][3190/3239]	Time 0.208 (0.231)	Data 0.000 (0.008)	Loss 5.3933 (5.4109)	Top-1 acc 9.375 (7.883)	Top-5 acc 21.484 (21.277)	lr 0.04403
Warmup Train [9][3200/3239]	Time 0.191 (0.231)	Data 0.000 (0.008)	Loss 5.5005 (5.4108)	Top-1 acc 6.641 (7.883)	Top-5 acc 16.406 (21.277)	lr 0.04403
Warmup Train [9][3210/3239]	Time 0.172 (0.231)	Data 0.000 (0.008)	Loss 5.3792 (5.4105)	Top-1 acc 4.297 (7.885)	Top-5 acc 22.656 (21.281)	lr 0.04402
Warmup Train [9][3220/3239]	Time 0.346 (0.231)	Data 0.000 (0.008)	Loss 5.3163 (5.4105)	Top-1 acc 10.547 (7.885)	Top-5 acc 24.219 (21.285)	lr 0.04402
Warmup Train [9][3230/3239]	Time 0.202 (0.231)	Data 0.000 (0.008)	Loss 5.2819 (5.4102)	Top-1 acc 8.203 (7.888)	Top-5 acc 26.953 (21.293)	lr 0.04401
Warmup Train [9][3239/3239]	Time 0.174 (0.231)	Data 0.000 (0.008)	Loss 5.0800 (5.4102)	Top-1 acc 12.346 (7.888)	Top-5 acc 33.333 (21.292)	lr 0.04401
==========Warmup Valid [9/40]	loss 4.697	top-1 acc 11.504	top-5 acc 28.721	Train top-1 7.888	top-5 21.292	flops: 442.4M
Warmup Train [10][0/3239]	Time 17.559 (17.559)	Data 16.865 (16.865)	Loss 5.3161 (5.3161)	Top-1 acc 8.203 (8.203)	Top-5 acc 19.531 (19.531)	lr 0.04401
Warmup Train [10][10/3239]	Time 0.282 (2.015)	Data 0.002 (1.646)	Loss 5.2604 (5.3203)	Top-1 acc 9.375 (9.126)	Top-5 acc 23.438 (22.798)	lr 0.04401
Warmup Train [10][20/3239]	Time 0.309 (1.188)	Data 0.001 (0.863)	Loss 5.3442 (5.3352)	Top-1 acc 8.594 (8.984)	Top-5 acc 22.656 (23.140)	lr 0.04400
Warmup Train [10][30/3239]	Time 0.230 (0.882)	Data 0.001 (0.585)	Loss 5.4165 (5.3455)	Top-1 acc 6.641 (8.695)	Top-5 acc 19.141 (22.631)	lr 0.04400
Warmup Train [10][40/3239]	Time 0.245 (0.724)	Data 0.001 (0.443)	Loss 5.2733 (5.3287)	Top-1 acc 8.984 (8.699)	Top-5 acc 23.438 (22.856)	lr 0.04399
Warmup Train [10][50/3239]	Time 0.194 (0.626)	Data 0.001 (0.356)	Loss 5.2005 (5.3385)	Top-1 acc 7.812 (8.464)	Top-5 acc 24.609 (22.358)	lr 0.04399
Warmup Train [10][60/3239]	Time 0.252 (0.562)	Data 0.001 (0.298)	Loss 5.4307 (5.3388)	Top-1 acc 8.984 (8.389)	Top-5 acc 19.141 (22.483)	lr 0.04399
Warmup Train [10][70/3239]	Time 0.285 (0.520)	Data 0.002 (0.256)	Loss 5.3526 (5.3419)	Top-1 acc 8.984 (8.352)	Top-5 acc 23.828 (22.557)	lr 0.04398
Warmup Train [10][80/3239]	Time 0.260 (0.485)	Data 0.001 (0.225)	Loss 5.3726 (5.3423)	Top-1 acc 8.594 (8.401)	Top-5 acc 23.828 (22.512)	lr 0.04398
Warmup Train [10][90/3239]	Time 0.309 (0.457)	Data 0.001 (0.200)	Loss 5.3861 (5.3438)	Top-1 acc 10.547 (8.495)	Top-5 acc 20.312 (22.510)	lr 0.04397
Warmup Train [10][100/3239]	Time 0.192 (0.435)	Data 0.003 (0.181)	Loss 5.3628 (5.3474)	Top-1 acc 7.031 (8.474)	Top-5 acc 21.875 (22.447)	lr 0.04397
Warmup Train [10][110/3239]	Time 0.236 (0.417)	Data 0.002 (0.165)	Loss 5.4310 (5.3487)	Top-1 acc 6.250 (8.449)	Top-5 acc 18.750 (22.420)	lr 0.04397
Warmup Train [10][120/3239]	Time 0.313 (0.401)	Data 0.001 (0.152)	Loss 5.4193 (5.3508)	Top-1 acc 5.469 (8.452)	Top-5 acc 20.312 (22.317)	lr 0.04396
Warmup Train [10][130/3239]	Time 0.184 (0.387)	Data 0.001 (0.141)	Loss 5.1945 (5.3494)	Top-1 acc 10.156 (8.471)	Top-5 acc 26.172 (22.325)	lr 0.04396
Warmup Train [10][140/3239]	Time 0.208 (0.375)	Data 0.001 (0.131)	Loss 5.2249 (5.3482)	Top-1 acc 10.938 (8.591)	Top-5 acc 30.469 (22.462)	lr 0.04395
Warmup Train [10][150/3239]	Time 0.166 (0.365)	Data 0.001 (0.123)	Loss 5.2838 (5.3501)	Top-1 acc 6.641 (8.529)	Top-5 acc 23.438 (22.447)	lr 0.04395
Warmup Train [10][160/3239]	Time 0.179 (0.355)	Data 0.001 (0.115)	Loss 5.2361 (5.3520)	Top-1 acc 9.375 (8.489)	Top-5 acc 21.094 (22.382)	lr 0.04395
Warmup Train [10][170/3239]	Time 0.228 (0.348)	Data 0.001 (0.109)	Loss 5.3152 (5.3561)	Top-1 acc 9.375 (8.489)	Top-5 acc 25.000 (22.316)	lr 0.04394
Warmup Train [10][180/3239]	Time 0.131 (0.340)	Data 0.001 (0.103)	Loss 5.4178 (5.3546)	Top-1 acc 6.250 (8.538)	Top-5 acc 17.188 (22.406)	lr 0.04394
Warmup Train [10][190/3239]	Time 0.213 (0.334)	Data 0.001 (0.098)	Loss 5.3100 (5.3546)	Top-1 acc 12.500 (8.588)	Top-5 acc 25.781 (22.386)	lr 0.04394
Warmup Train [10][200/3239]	Time 0.258 (0.329)	Data 0.001 (0.093)	Loss 5.3565 (5.3562)	Top-1 acc 10.938 (8.607)	Top-5 acc 21.484 (22.365)	lr 0.04393
Warmup Train [10][210/3239]	Time 0.285 (0.324)	Data 0.001 (0.089)	Loss 5.4331 (5.3564)	Top-1 acc 9.766 (8.623)	Top-5 acc 21.875 (22.375)	lr 0.04393
Warmup Train [10][220/3239]	Time 0.184 (0.319)	Data 0.001 (0.085)	Loss 5.4351 (5.3582)	Top-1 acc 5.859 (8.596)	Top-5 acc 23.047 (22.363)	lr 0.04392
Warmup Train [10][230/3239]	Time 0.323 (0.315)	Data 0.001 (0.081)	Loss 5.3246 (5.3542)	Top-1 acc 8.203 (8.619)	Top-5 acc 27.734 (22.450)	lr 0.04392
Warmup Train [10][240/3239]	Time 0.268 (0.312)	Data 0.001 (0.078)	Loss 5.1472 (5.3532)	Top-1 acc 8.984 (8.650)	Top-5 acc 27.344 (22.510)	lr 0.04392
Warmup Train [10][250/3239]	Time 0.235 (0.308)	Data 0.001 (0.075)	Loss 5.2807 (5.3510)	Top-1 acc 8.594 (8.672)	Top-5 acc 25.781 (22.544)	lr 0.04391
Warmup Train [10][260/3239]	Time 0.139 (0.305)	Data 0.001 (0.072)	Loss 5.3840 (5.3505)	Top-1 acc 8.984 (8.633)	Top-5 acc 23.047 (22.516)	lr 0.04391
Warmup Train [10][270/3239]	Time 0.200 (0.301)	Data 0.001 (0.070)	Loss 5.3065 (5.3503)	Top-1 acc 9.766 (8.651)	Top-5 acc 24.609 (22.525)	lr 0.04390
Warmup Train [10][280/3239]	Time 0.219 (0.299)	Data 0.001 (0.068)	Loss 5.5524 (5.3504)	Top-1 acc 7.812 (8.635)	Top-5 acc 21.094 (22.531)	lr 0.04390
Warmup Train [10][290/3239]	Time 0.188 (0.296)	Data 0.001 (0.065)	Loss 5.3699 (5.3512)	Top-1 acc 10.156 (8.625)	Top-5 acc 22.656 (22.503)	lr 0.04390
Warmup Train [10][300/3239]	Time 0.149 (0.293)	Data 0.001 (0.063)	Loss 5.3896 (5.3506)	Top-1 acc 9.375 (8.611)	Top-5 acc 23.438 (22.484)	lr 0.04389
Warmup Train [10][310/3239]	Time 0.236 (0.291)	Data 0.002 (0.061)	Loss 5.1744 (5.3486)	Top-1 acc 9.375 (8.619)	Top-5 acc 24.609 (22.516)	lr 0.04389
Warmup Train [10][320/3239]	Time 0.213 (0.289)	Data 0.001 (0.059)	Loss 5.4035 (5.3493)	Top-1 acc 8.203 (8.612)	Top-5 acc 23.047 (22.487)	lr 0.04388
Warmup Train [10][330/3239]	Time 0.329 (0.287)	Data 0.001 (0.058)	Loss 5.2983 (5.3480)	Top-1 acc 6.250 (8.606)	Top-5 acc 22.656 (22.511)	lr 0.04388
Warmup Train [10][340/3239]	Time 0.247 (0.285)	Data 0.001 (0.056)	Loss 5.2142 (5.3481)	Top-1 acc 10.156 (8.606)	Top-5 acc 25.000 (22.515)	lr 0.04388
Warmup Train [10][350/3239]	Time 0.211 (0.284)	Data 0.001 (0.055)	Loss 5.4214 (5.3484)	Top-1 acc 6.641 (8.614)	Top-5 acc 21.875 (22.522)	lr 0.04387
Warmup Train [10][360/3239]	Time 0.168 (0.281)	Data 0.001 (0.053)	Loss 5.5313 (5.3487)	Top-1 acc 5.859 (8.595)	Top-5 acc 15.625 (22.518)	lr 0.04387
Warmup Train [10][370/3239]	Time 0.155 (0.280)	Data 0.001 (0.052)	Loss 5.4219 (5.3483)	Top-1 acc 8.594 (8.597)	Top-5 acc 25.781 (22.538)	lr 0.04386
Warmup Train [10][380/3239]	Time 0.230 (0.278)	Data 0.002 (0.051)	Loss 5.4276 (5.3493)	Top-1 acc 5.078 (8.576)	Top-5 acc 22.266 (22.526)	lr 0.04386
Warmup Train [10][390/3239]	Time 0.172 (0.277)	Data 0.001 (0.049)	Loss 5.3837 (5.3504)	Top-1 acc 4.688 (8.565)	Top-5 acc 18.750 (22.493)	lr 0.04386
Warmup Train [10][400/3239]	Time 0.148 (0.276)	Data 0.002 (0.048)	Loss 5.2927 (5.3498)	Top-1 acc 9.766 (8.590)	Top-5 acc 21.484 (22.485)	lr 0.04385
Warmup Train [10][410/3239]	Time 0.231 (0.274)	Data 0.002 (0.047)	Loss 5.3043 (5.3486)	Top-1 acc 7.812 (8.597)	Top-5 acc 21.094 (22.534)	lr 0.04385
Warmup Train [10][420/3239]	Time 0.249 (0.273)	Data 0.002 (0.046)	Loss 5.3048 (5.3476)	Top-1 acc 10.156 (8.588)	Top-5 acc 23.047 (22.555)	lr 0.04384
Warmup Train [10][430/3239]	Time 0.234 (0.272)	Data 0.002 (0.045)	Loss 5.3388 (5.3486)	Top-1 acc 8.984 (8.586)	Top-5 acc 21.094 (22.550)	lr 0.04384
Warmup Train [10][440/3239]	Time 0.290 (0.271)	Data 0.002 (0.044)	Loss 5.2902 (5.3480)	Top-1 acc 5.859 (8.580)	Top-5 acc 20.703 (22.552)	lr 0.04384
Warmup Train [10][450/3239]	Time 0.258 (0.270)	Data 0.024 (0.043)	Loss 5.4158 (5.3477)	Top-1 acc 7.031 (8.589)	Top-5 acc 23.438 (22.572)	lr 0.04383
Warmup Train [10][460/3239]	Time 0.244 (0.269)	Data 0.001 (0.042)	Loss 5.4018 (5.3485)	Top-1 acc 5.859 (8.596)	Top-5 acc 20.312 (22.539)	lr 0.04383
Warmup Train [10][470/3239]	Time 0.190 (0.268)	Data 0.001 (0.042)	Loss 5.4470 (5.3489)	Top-1 acc 7.812 (8.581)	Top-5 acc 20.312 (22.509)	lr 0.04382
Warmup Train [10][480/3239]	Time 0.236 (0.267)	Data 0.030 (0.041)	Loss 5.3365 (5.3480)	Top-1 acc 6.250 (8.592)	Top-5 acc 21.094 (22.559)	lr 0.04382
Warmup Train [10][490/3239]	Time 0.187 (0.266)	Data 0.001 (0.040)	Loss 5.5416 (5.3480)	Top-1 acc 10.547 (8.599)	Top-5 acc 22.266 (22.552)	lr 0.04382
Warmup Train [10][500/3239]	Time 0.191 (0.265)	Data 0.001 (0.039)	Loss 5.3178 (5.3486)	Top-1 acc 9.766 (8.599)	Top-5 acc 23.047 (22.545)	lr 0.04381
Warmup Train [10][510/3239]	Time 0.240 (0.265)	Data 0.001 (0.039)	Loss 5.3724 (5.3485)	Top-1 acc 7.422 (8.584)	Top-5 acc 21.484 (22.549)	lr 0.04381
Warmup Train [10][520/3239]	Time 0.218 (0.264)	Data 0.001 (0.038)	Loss 5.3607 (5.3481)	Top-1 acc 7.422 (8.599)	Top-5 acc 23.438 (22.582)	lr 0.04380
Warmup Train [10][530/3239]	Time 0.219 (0.262)	Data 0.001 (0.037)	Loss 5.4314 (5.3487)	Top-1 acc 7.031 (8.593)	Top-5 acc 21.094 (22.578)	lr 0.04380
Warmup Train [10][540/3239]	Time 0.297 (0.262)	Data 0.001 (0.037)	Loss 5.1622 (5.3469)	Top-1 acc 10.938 (8.595)	Top-5 acc 25.781 (22.630)	lr 0.04380
Warmup Train [10][550/3239]	Time 0.239 (0.261)	Data 0.001 (0.036)	Loss 5.2693 (5.3466)	Top-1 acc 7.422 (8.588)	Top-5 acc 25.000 (22.645)	lr 0.04379
Warmup Train [10][560/3239]	Time 0.326 (0.261)	Data 0.001 (0.036)	Loss 5.3884 (5.3457)	Top-1 acc 8.594 (8.591)	Top-5 acc 21.875 (22.678)	lr 0.04379
Warmup Train [10][570/3239]	Time 0.204 (0.260)	Data 0.001 (0.035)	Loss 5.4477 (5.3456)	Top-1 acc 8.594 (8.590)	Top-5 acc 23.828 (22.675)	lr 0.04378
Warmup Train [10][580/3239]	Time 0.224 (0.259)	Data 0.001 (0.034)	Loss 5.4169 (5.3455)	Top-1 acc 10.547 (8.592)	Top-5 acc 21.484 (22.697)	lr 0.04378
Warmup Train [10][590/3239]	Time 0.141 (0.259)	Data 0.001 (0.034)	Loss 5.3892 (5.3452)	Top-1 acc 11.719 (8.598)	Top-5 acc 21.875 (22.690)	lr 0.04378
Warmup Train [10][600/3239]	Time 0.174 (0.258)	Data 0.001 (0.033)	Loss 5.2673 (5.3441)	Top-1 acc 11.328 (8.615)	Top-5 acc 24.609 (22.723)	lr 0.04377
Warmup Train [10][610/3239]	Time 0.147 (0.258)	Data 0.001 (0.033)	Loss 5.3560 (5.3441)	Top-1 acc 7.812 (8.630)	Top-5 acc 23.047 (22.743)	lr 0.04377
Warmup Train [10][620/3239]	Time 0.239 (0.257)	Data 0.001 (0.033)	Loss 5.3135 (5.3442)	Top-1 acc 6.641 (8.612)	Top-5 acc 26.953 (22.741)	lr 0.04376
Warmup Train [10][630/3239]	Time 0.224 (0.257)	Data 0.002 (0.032)	Loss 5.5856 (5.3446)	Top-1 acc 8.203 (8.613)	Top-5 acc 16.016 (22.724)	lr 0.04376
Warmup Train [10][640/3239]	Time 0.169 (0.256)	Data 0.002 (0.032)	Loss 5.3572 (5.3445)	Top-1 acc 7.031 (8.610)	Top-5 acc 24.219 (22.736)	lr 0.04376
Warmup Train [10][650/3239]	Time 0.228 (0.255)	Data 0.002 (0.031)	Loss 5.1354 (5.3444)	Top-1 acc 13.281 (8.611)	Top-5 acc 26.953 (22.743)	lr 0.04375
Warmup Train [10][660/3239]	Time 0.286 (0.255)	Data 0.001 (0.031)	Loss 5.1469 (5.3440)	Top-1 acc 10.156 (8.610)	Top-5 acc 24.609 (22.753)	lr 0.04375
Warmup Train [10][670/3239]	Time 0.349 (0.254)	Data 0.001 (0.030)	Loss 5.3326 (5.3444)	Top-1 acc 8.594 (8.601)	Top-5 acc 21.875 (22.751)	lr 0.04374
Warmup Train [10][680/3239]	Time 0.235 (0.254)	Data 0.002 (0.030)	Loss 5.2670 (5.3445)	Top-1 acc 12.109 (8.594)	Top-5 acc 26.172 (22.738)	lr 0.04374
Warmup Train [10][690/3239]	Time 0.210 (0.253)	Data 0.002 (0.029)	Loss 5.3834 (5.3440)	Top-1 acc 7.812 (8.597)	Top-5 acc 17.969 (22.737)	lr 0.04374
Warmup Train [10][700/3239]	Time 0.189 (0.253)	Data 0.001 (0.029)	Loss 5.3373 (5.3437)	Top-1 acc 6.641 (8.604)	Top-5 acc 19.141 (22.740)	lr 0.04373
Warmup Train [10][710/3239]	Time 0.208 (0.253)	Data 0.002 (0.029)	Loss 5.2558 (5.3437)	Top-1 acc 12.891 (8.611)	Top-5 acc 26.562 (22.748)	lr 0.04373
Warmup Train [10][720/3239]	Time 0.253 (0.252)	Data 0.001 (0.028)	Loss 5.2647 (5.3432)	Top-1 acc 9.375 (8.632)	Top-5 acc 25.000 (22.760)	lr 0.04372
Warmup Train [10][730/3239]	Time 0.143 (0.252)	Data 0.001 (0.028)	Loss 5.5098 (5.3434)	Top-1 acc 6.250 (8.626)	Top-5 acc 19.141 (22.761)	lr 0.04372
Warmup Train [10][740/3239]	Time 0.191 (0.251)	Data 0.001 (0.028)	Loss 5.3029 (5.3431)	Top-1 acc 10.156 (8.634)	Top-5 acc 22.656 (22.771)	lr 0.04372
Warmup Train [10][750/3239]	Time 0.231 (0.251)	Data 0.001 (0.027)	Loss 5.2669 (5.3428)	Top-1 acc 12.109 (8.649)	Top-5 acc 25.781 (22.783)	lr 0.04371
Warmup Train [10][760/3239]	Time 0.188 (0.250)	Data 0.001 (0.027)	Loss 5.2422 (5.3423)	Top-1 acc 10.938 (8.659)	Top-5 acc 24.609 (22.797)	lr 0.04371
Warmup Train [10][770/3239]	Time 0.353 (0.250)	Data 0.001 (0.027)	Loss 5.3113 (5.3426)	Top-1 acc 7.812 (8.659)	Top-5 acc 22.266 (22.798)	lr 0.04370
Warmup Train [10][780/3239]	Time 0.194 (0.250)	Data 0.001 (0.026)	Loss 5.5613 (5.3429)	Top-1 acc 7.031 (8.650)	Top-5 acc 16.797 (22.793)	lr 0.04370
Warmup Train [10][790/3239]	Time 0.204 (0.249)	Data 0.001 (0.026)	Loss 5.3259 (5.3430)	Top-1 acc 8.984 (8.641)	Top-5 acc 21.094 (22.794)	lr 0.04370
Warmup Train [10][800/3239]	Time 0.203 (0.249)	Data 0.001 (0.026)	Loss 5.3810 (5.3429)	Top-1 acc 5.469 (8.639)	Top-5 acc 19.141 (22.791)	lr 0.04369
Warmup Train [10][810/3239]	Time 0.233 (0.248)	Data 0.001 (0.025)	Loss 5.3012 (5.3431)	Top-1 acc 7.812 (8.652)	Top-5 acc 23.438 (22.800)	lr 0.04369
Warmup Train [10][820/3239]	Time 0.183 (0.248)	Data 0.001 (0.025)	Loss 5.1220 (5.3427)	Top-1 acc 7.422 (8.657)	Top-5 acc 27.734 (22.813)	lr 0.04368
Warmup Train [10][830/3239]	Time 0.176 (0.248)	Data 0.001 (0.025)	Loss 5.2219 (5.3424)	Top-1 acc 9.766 (8.663)	Top-5 acc 27.344 (22.819)	lr 0.04368
Warmup Train [10][840/3239]	Time 0.228 (0.248)	Data 0.001 (0.025)	Loss 5.3658 (5.3423)	Top-1 acc 8.984 (8.663)	Top-5 acc 23.828 (22.824)	lr 0.04368
Warmup Train [10][850/3239]	Time 0.181 (0.247)	Data 0.001 (0.024)	Loss 5.4241 (5.3424)	Top-1 acc 8.594 (8.667)	Top-5 acc 21.875 (22.827)	lr 0.04367
Warmup Train [10][860/3239]	Time 0.326 (0.247)	Data 0.001 (0.024)	Loss 5.3511 (5.3426)	Top-1 acc 8.984 (8.670)	Top-5 acc 22.266 (22.830)	lr 0.04367
Warmup Train [10][870/3239]	Time 0.205 (0.247)	Data 0.001 (0.024)	Loss 5.2784 (5.3425)	Top-1 acc 8.594 (8.679)	Top-5 acc 22.266 (22.826)	lr 0.04366
Warmup Train [10][880/3239]	Time 0.297 (0.247)	Data 0.001 (0.024)	Loss 5.3480 (5.3422)	Top-1 acc 5.469 (8.684)	Top-5 acc 18.359 (22.828)	lr 0.04366
Warmup Train [10][890/3239]	Time 0.212 (0.246)	Data 0.001 (0.023)	Loss 5.3552 (5.3422)	Top-1 acc 8.203 (8.686)	Top-5 acc 20.312 (22.825)	lr 0.04366
Warmup Train [10][900/3239]	Time 0.269 (0.246)	Data 0.001 (0.023)	Loss 5.2824 (5.3420)	Top-1 acc 8.984 (8.683)	Top-5 acc 24.219 (22.823)	lr 0.04365
Warmup Train [10][910/3239]	Time 0.289 (0.246)	Data 0.002 (0.023)	Loss 5.3323 (5.3416)	Top-1 acc 8.594 (8.688)	Top-5 acc 23.438 (22.831)	lr 0.04365
Warmup Train [10][920/3239]	Time 0.208 (0.246)	Data 0.001 (0.023)	Loss 5.3358 (5.3418)	Top-1 acc 6.641 (8.681)	Top-5 acc 23.047 (22.830)	lr 0.04364
Warmup Train [10][930/3239]	Time 0.223 (0.245)	Data 0.001 (0.023)	Loss 5.4717 (5.3414)	Top-1 acc 8.203 (8.684)	Top-5 acc 19.922 (22.836)	lr 0.04364
Warmup Train [10][940/3239]	Time 0.201 (0.245)	Data 0.002 (0.022)	Loss 5.4138 (5.3417)	Top-1 acc 8.984 (8.683)	Top-5 acc 21.484 (22.837)	lr 0.04364
Warmup Train [10][950/3239]	Time 0.305 (0.245)	Data 0.001 (0.022)	Loss 5.3925 (5.3417)	Top-1 acc 8.594 (8.681)	Top-5 acc 23.828 (22.846)	lr 0.04363
Warmup Train [10][960/3239]	Time 0.234 (0.245)	Data 0.001 (0.022)	Loss 5.2056 (5.3409)	Top-1 acc 12.109 (8.697)	Top-5 acc 25.391 (22.857)	lr 0.04363
Warmup Train [10][970/3239]	Time 0.141 (0.244)	Data 0.001 (0.022)	Loss 5.1115 (5.3404)	Top-1 acc 8.984 (8.702)	Top-5 acc 29.297 (22.874)	lr 0.04362
Warmup Train [10][980/3239]	Time 0.191 (0.244)	Data 0.001 (0.022)	Loss 5.3067 (5.3401)	Top-1 acc 9.766 (8.703)	Top-5 acc 24.219 (22.881)	lr 0.04362
Warmup Train [10][990/3239]	Time 0.233 (0.244)	Data 0.001 (0.021)	Loss 5.3388 (5.3400)	Top-1 acc 8.594 (8.701)	Top-5 acc 23.828 (22.886)	lr 0.04362
Warmup Train [10][1000/3239]	Time 0.152 (0.243)	Data 0.001 (0.021)	Loss 5.3622 (5.3397)	Top-1 acc 8.984 (8.714)	Top-5 acc 23.828 (22.901)	lr 0.04361
Warmup Train [10][1010/3239]	Time 0.242 (0.243)	Data 0.001 (0.021)	Loss 5.3825 (5.3394)	Top-1 acc 11.328 (8.720)	Top-5 acc 26.172 (22.917)	lr 0.04361
Warmup Train [10][1020/3239]	Time 0.199 (0.243)	Data 0.001 (0.021)	Loss 5.4084 (5.3388)	Top-1 acc 8.984 (8.725)	Top-5 acc 23.047 (22.923)	lr 0.04360
Warmup Train [10][1030/3239]	Time 0.206 (0.243)	Data 0.001 (0.021)	Loss 5.2317 (5.3385)	Top-1 acc 9.766 (8.737)	Top-5 acc 25.391 (22.940)	lr 0.04360
Warmup Train [10][1040/3239]	Time 0.248 (0.243)	Data 0.001 (0.020)	Loss 5.3112 (5.3380)	Top-1 acc 8.984 (8.742)	Top-5 acc 23.047 (22.946)	lr 0.04359
Warmup Train [10][1050/3239]	Time 0.232 (0.243)	Data 0.001 (0.020)	Loss 5.3525 (5.3379)	Top-1 acc 8.984 (8.744)	Top-5 acc 21.094 (22.955)	lr 0.04359
Warmup Train [10][1060/3239]	Time 0.202 (0.243)	Data 0.001 (0.020)	Loss 5.2868 (5.3373)	Top-1 acc 9.766 (8.749)	Top-5 acc 23.828 (22.973)	lr 0.04359
Warmup Train [10][1070/3239]	Time 0.233 (0.242)	Data 0.001 (0.020)	Loss 5.3454 (5.3371)	Top-1 acc 9.375 (8.747)	Top-5 acc 21.875 (22.973)	lr 0.04358
Warmup Train [10][1080/3239]	Time 0.203 (0.242)	Data 0.001 (0.020)	Loss 5.3992 (5.3366)	Top-1 acc 7.422 (8.752)	Top-5 acc 21.094 (22.993)	lr 0.04358
Warmup Train [10][1090/3239]	Time 0.295 (0.242)	Data 0.001 (0.020)	Loss 5.3783 (5.3365)	Top-1 acc 7.031 (8.761)	Top-5 acc 20.312 (23.002)	lr 0.04357
Warmup Train [10][1100/3239]	Time 0.248 (0.242)	Data 0.001 (0.020)	Loss 5.2931 (5.3364)	Top-1 acc 13.281 (8.770)	Top-5 acc 27.344 (23.016)	lr 0.04357
Warmup Train [10][1110/3239]	Time 0.211 (0.242)	Data 0.002 (0.019)	Loss 5.2857 (5.3361)	Top-1 acc 7.031 (8.770)	Top-5 acc 24.609 (23.022)	lr 0.04357
Warmup Train [10][1120/3239]	Time 0.253 (0.242)	Data 0.001 (0.019)	Loss 5.3236 (5.3357)	Top-1 acc 9.766 (8.779)	Top-5 acc 26.172 (23.025)	lr 0.04356
Warmup Train [10][1130/3239]	Time 0.170 (0.242)	Data 0.001 (0.019)	Loss 5.2479 (5.3356)	Top-1 acc 8.984 (8.792)	Top-5 acc 26.562 (23.040)	lr 0.04356
Warmup Train [10][1140/3239]	Time 0.271 (0.242)	Data 0.001 (0.019)	Loss 5.2483 (5.3355)	Top-1 acc 10.156 (8.797)	Top-5 acc 26.953 (23.039)	lr 0.04355
Warmup Train [10][1150/3239]	Time 0.187 (0.241)	Data 0.001 (0.019)	Loss 5.2851 (5.3354)	Top-1 acc 7.812 (8.795)	Top-5 acc 25.391 (23.043)	lr 0.04355
Warmup Train [10][1160/3239]	Time 0.226 (0.241)	Data 0.001 (0.019)	Loss 5.2275 (5.3350)	Top-1 acc 10.156 (8.800)	Top-5 acc 25.391 (23.045)	lr 0.04355
Warmup Train [10][1170/3239]	Time 0.158 (0.241)	Data 0.001 (0.019)	Loss 5.4710 (5.3349)	Top-1 acc 7.031 (8.804)	Top-5 acc 18.359 (23.049)	lr 0.04354
Warmup Train [10][1180/3239]	Time 0.165 (0.241)	Data 0.001 (0.018)	Loss 5.3910 (5.3353)	Top-1 acc 6.641 (8.800)	Top-5 acc 19.531 (23.032)	lr 0.04354
Warmup Train [10][1190/3239]	Time 0.209 (0.241)	Data 0.001 (0.018)	Loss 5.3368 (5.3350)	Top-1 acc 9.766 (8.808)	Top-5 acc 21.484 (23.032)	lr 0.04353
Warmup Train [10][1200/3239]	Time 0.209 (0.241)	Data 0.001 (0.018)	Loss 5.3194 (5.3350)	Top-1 acc 9.766 (8.821)	Top-5 acc 22.656 (23.045)	lr 0.04353
Warmup Train [10][1210/3239]	Time 0.166 (0.240)	Data 0.001 (0.018)	Loss 5.2252 (5.3346)	Top-1 acc 12.109 (8.831)	Top-5 acc 23.828 (23.060)	lr 0.04353
Warmup Train [10][1220/3239]	Time 0.190 (0.240)	Data 0.001 (0.018)	Loss 5.3321 (5.3342)	Top-1 acc 10.547 (8.827)	Top-5 acc 24.609 (23.064)	lr 0.04352
Warmup Train [10][1230/3239]	Time 0.227 (0.240)	Data 0.001 (0.018)	Loss 5.3737 (5.3340)	Top-1 acc 6.641 (8.836)	Top-5 acc 21.484 (23.075)	lr 0.04352
Warmup Train [10][1240/3239]	Time 0.243 (0.240)	Data 0.001 (0.018)	Loss 5.3080 (5.3335)	Top-1 acc 8.984 (8.846)	Top-5 acc 22.656 (23.089)	lr 0.04351
Warmup Train [10][1250/3239]	Time 0.289 (0.240)	Data 0.002 (0.018)	Loss 5.2748 (5.3331)	Top-1 acc 10.156 (8.853)	Top-5 acc 23.438 (23.095)	lr 0.04351
Warmup Train [10][1260/3239]	Time 0.255 (0.240)	Data 0.001 (0.017)	Loss 5.3435 (5.3327)	Top-1 acc 8.984 (8.858)	Top-5 acc 23.438 (23.101)	lr 0.04351
Warmup Train [10][1270/3239]	Time 0.210 (0.240)	Data 0.001 (0.017)	Loss 5.3269 (5.3328)	Top-1 acc 8.594 (8.859)	Top-5 acc 21.484 (23.099)	lr 0.04350
Warmup Train [10][1280/3239]	Time 0.214 (0.240)	Data 0.001 (0.017)	Loss 5.2057 (5.3324)	Top-1 acc 9.375 (8.864)	Top-5 acc 28.125 (23.117)	lr 0.04350
Warmup Train [10][1290/3239]	Time 0.252 (0.240)	Data 0.002 (0.017)	Loss 5.2227 (5.3324)	Top-1 acc 13.672 (8.862)	Top-5 acc 27.344 (23.118)	lr 0.04349
Warmup Train [10][1300/3239]	Time 0.180 (0.240)	Data 0.001 (0.017)	Loss 5.3662 (5.3323)	Top-1 acc 10.547 (8.871)	Top-5 acc 23.047 (23.121)	lr 0.04349
Warmup Train [10][1310/3239]	Time 0.224 (0.239)	Data 0.001 (0.017)	Loss 5.3877 (5.3323)	Top-1 acc 8.594 (8.872)	Top-5 acc 20.703 (23.112)	lr 0.04349
Warmup Train [10][1320/3239]	Time 0.182 (0.239)	Data 0.001 (0.017)	Loss 5.4260 (5.3321)	Top-1 acc 8.203 (8.879)	Top-5 acc 23.828 (23.121)	lr 0.04348
Warmup Train [10][1330/3239]	Time 0.218 (0.239)	Data 0.001 (0.017)	Loss 5.2451 (5.3324)	Top-1 acc 7.031 (8.873)	Top-5 acc 24.609 (23.109)	lr 0.04348
Warmup Train [10][1340/3239]	Time 0.223 (0.239)	Data 0.001 (0.017)	Loss 5.2213 (5.3323)	Top-1 acc 11.328 (8.877)	Top-5 acc 26.172 (23.116)	lr 0.04347
Warmup Train [10][1350/3239]	Time 0.161 (0.239)	Data 0.001 (0.016)	Loss 5.3183 (5.3320)	Top-1 acc 7.031 (8.879)	Top-5 acc 21.094 (23.123)	lr 0.04347
Warmup Train [10][1360/3239]	Time 0.223 (0.239)	Data 0.001 (0.016)	Loss 5.3767 (5.3321)	Top-1 acc 10.156 (8.880)	Top-5 acc 25.391 (23.120)	lr 0.04346
Warmup Train [10][1370/3239]	Time 0.312 (0.239)	Data 0.001 (0.016)	Loss 5.2061 (5.3320)	Top-1 acc 11.719 (8.882)	Top-5 acc 26.562 (23.122)	lr 0.04346
Warmup Train [10][1380/3239]	Time 0.185 (0.239)	Data 0.001 (0.016)	Loss 5.1902 (5.3317)	Top-1 acc 8.594 (8.885)	Top-5 acc 21.484 (23.123)	lr 0.04346
Warmup Train [10][1390/3239]	Time 0.240 (0.239)	Data 0.001 (0.016)	Loss 5.3360 (5.3313)	Top-1 acc 10.156 (8.891)	Top-5 acc 23.438 (23.132)	lr 0.04345
Warmup Train [10][1400/3239]	Time 0.149 (0.238)	Data 0.001 (0.016)	Loss 5.4093 (5.3313)	Top-1 acc 8.984 (8.893)	Top-5 acc 23.828 (23.141)	lr 0.04345
Warmup Train [10][1410/3239]	Time 0.211 (0.238)	Data 0.001 (0.016)	Loss 5.2066 (5.3312)	Top-1 acc 11.719 (8.897)	Top-5 acc 30.469 (23.143)	lr 0.04344
Warmup Train [10][1420/3239]	Time 0.203 (0.238)	Data 0.001 (0.016)	Loss 5.3366 (5.3310)	Top-1 acc 7.812 (8.897)	Top-5 acc 23.438 (23.138)	lr 0.04344
Warmup Train [10][1430/3239]	Time 0.254 (0.238)	Data 0.001 (0.016)	Loss 5.2853 (5.3315)	Top-1 acc 7.812 (8.893)	Top-5 acc 21.094 (23.127)	lr 0.04344
Warmup Train [10][1440/3239]	Time 0.194 (0.238)	Data 0.001 (0.016)	Loss 5.2399 (5.3312)	Top-1 acc 12.500 (8.898)	Top-5 acc 24.609 (23.133)	lr 0.04343
Warmup Train [10][1450/3239]	Time 0.205 (0.238)	Data 0.003 (0.016)	Loss 5.4416 (5.3312)	Top-1 acc 9.375 (8.900)	Top-5 acc 23.438 (23.134)	lr 0.04343
Warmup Train [10][1460/3239]	Time 0.198 (0.238)	Data 0.001 (0.016)	Loss 5.4632 (5.3311)	Top-1 acc 6.641 (8.905)	Top-5 acc 21.484 (23.140)	lr 0.04342
Warmup Train [10][1470/3239]	Time 0.232 (0.238)	Data 0.001 (0.015)	Loss 5.4460 (5.3310)	Top-1 acc 8.203 (8.908)	Top-5 acc 17.578 (23.143)	lr 0.04342
Warmup Train [10][1480/3239]	Time 0.309 (0.238)	Data 0.001 (0.015)	Loss 5.3040 (5.3310)	Top-1 acc 9.766 (8.912)	Top-5 acc 21.875 (23.146)	lr 0.04342
Warmup Train [10][1490/3239]	Time 0.192 (0.238)	Data 0.002 (0.015)	Loss 5.1914 (5.3308)	Top-1 acc 8.203 (8.914)	Top-5 acc 24.219 (23.146)	lr 0.04341
Warmup Train [10][1500/3239]	Time 0.223 (0.238)	Data 0.001 (0.015)	Loss 5.3090 (5.3307)	Top-1 acc 9.766 (8.916)	Top-5 acc 24.219 (23.147)	lr 0.04341
Warmup Train [10][1510/3239]	Time 0.229 (0.238)	Data 0.001 (0.015)	Loss 5.2238 (5.3307)	Top-1 acc 12.500 (8.917)	Top-5 acc 26.953 (23.142)	lr 0.04340
Warmup Train [10][1520/3239]	Time 0.145 (0.237)	Data 0.001 (0.015)	Loss 5.2088 (5.3302)	Top-1 acc 7.812 (8.913)	Top-5 acc 26.172 (23.148)	lr 0.04340
Warmup Train [10][1530/3239]	Time 0.137 (0.237)	Data 0.001 (0.015)	Loss 5.2931 (5.3301)	Top-1 acc 11.719 (8.918)	Top-5 acc 27.734 (23.158)	lr 0.04340
Warmup Train [10][1540/3239]	Time 0.236 (0.237)	Data 0.002 (0.015)	Loss 5.1198 (5.3299)	Top-1 acc 8.594 (8.921)	Top-5 acc 27.734 (23.167)	lr 0.04339
Warmup Train [10][1550/3239]	Time 0.175 (0.237)	Data 0.001 (0.015)	Loss 5.3011 (5.3300)	Top-1 acc 8.594 (8.918)	Top-5 acc 23.438 (23.159)	lr 0.04339
Warmup Train [10][1560/3239]	Time 0.187 (0.237)	Data 0.001 (0.015)	Loss 5.3231 (5.3297)	Top-1 acc 8.984 (8.918)	Top-5 acc 23.828 (23.164)	lr 0.04338
Warmup Train [10][1570/3239]	Time 0.243 (0.237)	Data 0.001 (0.015)	Loss 5.1293 (5.3294)	Top-1 acc 11.719 (8.923)	Top-5 acc 26.953 (23.171)	lr 0.04338
Warmup Train [10][1580/3239]	Time 0.321 (0.237)	Data 0.002 (0.015)	Loss 5.3353 (5.3293)	Top-1 acc 10.156 (8.923)	Top-5 acc 21.875 (23.170)	lr 0.04337
Warmup Train [10][1590/3239]	Time 0.208 (0.237)	Data 0.001 (0.015)	Loss 5.4180 (5.3293)	Top-1 acc 5.859 (8.920)	Top-5 acc 18.359 (23.166)	lr 0.04337
Warmup Train [10][1600/3239]	Time 0.148 (0.237)	Data 0.001 (0.014)	Loss 5.3598 (5.3289)	Top-1 acc 9.766 (8.923)	Top-5 acc 23.438 (23.174)	lr 0.04337
Warmup Train [10][1610/3239]	Time 0.214 (0.237)	Data 0.001 (0.014)	Loss 5.3967 (5.3287)	Top-1 acc 9.766 (8.926)	Top-5 acc 25.391 (23.176)	lr 0.04336
Warmup Train [10][1620/3239]	Time 0.207 (0.237)	Data 0.001 (0.014)	Loss 5.4163 (5.3286)	Top-1 acc 8.594 (8.931)	Top-5 acc 21.875 (23.181)	lr 0.04336
Warmup Train [10][1630/3239]	Time 0.224 (0.237)	Data 0.001 (0.014)	Loss 5.4017 (5.3284)	Top-1 acc 10.156 (8.929)	Top-5 acc 23.438 (23.184)	lr 0.04335
Warmup Train [10][1640/3239]	Time 0.208 (0.237)	Data 0.001 (0.014)	Loss 5.2083 (5.3285)	Top-1 acc 11.328 (8.927)	Top-5 acc 24.219 (23.177)	lr 0.04335
Warmup Train [10][1650/3239]	Time 0.226 (0.237)	Data 0.001 (0.014)	Loss 5.3524 (5.3283)	Top-1 acc 10.156 (8.931)	Top-5 acc 23.438 (23.189)	lr 0.04335
Warmup Train [10][1660/3239]	Time 0.166 (0.236)	Data 0.001 (0.014)	Loss 5.3517 (5.3285)	Top-1 acc 7.422 (8.931)	Top-5 acc 21.875 (23.184)	lr 0.04334
Warmup Train [10][1670/3239]	Time 0.258 (0.236)	Data 0.001 (0.014)	Loss 5.1884 (5.3286)	Top-1 acc 12.891 (8.929)	Top-5 acc 26.172 (23.183)	lr 0.04334
Warmup Train [10][1680/3239]	Time 0.251 (0.236)	Data 0.001 (0.014)	Loss 5.2087 (5.3284)	Top-1 acc 14.062 (8.933)	Top-5 acc 25.391 (23.189)	lr 0.04333
Warmup Train [10][1690/3239]	Time 0.333 (0.236)	Data 0.001 (0.014)	Loss 5.3554 (5.3284)	Top-1 acc 7.422 (8.936)	Top-5 acc 23.438 (23.191)	lr 0.04333
Warmup Train [10][1700/3239]	Time 0.216 (0.236)	Data 0.001 (0.014)	Loss 5.3875 (5.3285)	Top-1 acc 6.641 (8.936)	Top-5 acc 24.609 (23.192)	lr 0.04333
Warmup Train [10][1710/3239]	Time 0.264 (0.236)	Data 0.001 (0.014)	Loss 5.2239 (5.3283)	Top-1 acc 12.891 (8.943)	Top-5 acc 27.734 (23.203)	lr 0.04332
Warmup Train [10][1720/3239]	Time 0.236 (0.236)	Data 0.001 (0.014)	Loss 5.1725 (5.3280)	Top-1 acc 8.203 (8.948)	Top-5 acc 26.562 (23.212)	lr 0.04332
Warmup Train [10][1730/3239]	Time 0.281 (0.236)	Data 0.001 (0.014)	Loss 5.2216 (5.3280)	Top-1 acc 9.375 (8.950)	Top-5 acc 25.781 (23.213)	lr 0.04331
Warmup Train [10][1740/3239]	Time 0.218 (0.236)	Data 0.001 (0.014)	Loss 5.3640 (5.3278)	Top-1 acc 8.984 (8.957)	Top-5 acc 23.438 (23.220)	lr 0.04331
Warmup Train [10][1750/3239]	Time 0.272 (0.236)	Data 0.001 (0.013)	Loss 5.3767 (5.3277)	Top-1 acc 9.375 (8.963)	Top-5 acc 22.266 (23.229)	lr 0.04330
Warmup Train [10][1760/3239]	Time 0.260 (0.236)	Data 0.025 (0.013)	Loss 5.3110 (5.3276)	Top-1 acc 11.328 (8.968)	Top-5 acc 25.000 (23.238)	lr 0.04330
Warmup Train [10][1770/3239]	Time 0.209 (0.236)	Data 0.001 (0.013)	Loss 5.5039 (5.3276)	Top-1 acc 7.422 (8.974)	Top-5 acc 21.875 (23.242)	lr 0.04330
Warmup Train [10][1780/3239]	Time 0.146 (0.236)	Data 0.001 (0.013)	Loss 5.3514 (5.3276)	Top-1 acc 8.594 (8.977)	Top-5 acc 22.656 (23.241)	lr 0.04329
Warmup Train [10][1790/3239]	Time 0.256 (0.236)	Data 0.001 (0.013)	Loss 5.1807 (5.3274)	Top-1 acc 10.156 (8.980)	Top-5 acc 26.172 (23.248)	lr 0.04329
Warmup Train [10][1800/3239]	Time 0.326 (0.236)	Data 0.001 (0.013)	Loss 5.4559 (5.3275)	Top-1 acc 6.250 (8.975)	Top-5 acc 21.094 (23.247)	lr 0.04328
Warmup Train [10][1810/3239]	Time 0.233 (0.236)	Data 0.001 (0.013)	Loss 5.3266 (5.3276)	Top-1 acc 8.594 (8.970)	Top-5 acc 20.703 (23.241)	lr 0.04328
Warmup Train [10][1820/3239]	Time 0.174 (0.236)	Data 0.001 (0.013)	Loss 5.2308 (5.3276)	Top-1 acc 12.109 (8.971)	Top-5 acc 25.781 (23.236)	lr 0.04328
Warmup Train [10][1830/3239]	Time 0.188 (0.236)	Data 0.001 (0.013)	Loss 5.4180 (5.3278)	Top-1 acc 7.812 (8.968)	Top-5 acc 18.750 (23.235)	lr 0.04327
Warmup Train [10][1840/3239]	Time 0.296 (0.236)	Data 0.002 (0.013)	Loss 5.4890 (5.3278)	Top-1 acc 10.156 (8.971)	Top-5 acc 19.531 (23.234)	lr 0.04327
Warmup Train [10][1850/3239]	Time 0.210 (0.236)	Data 0.001 (0.013)	Loss 5.4542 (5.3275)	Top-1 acc 6.641 (8.973)	Top-5 acc 24.609 (23.239)	lr 0.04326
Warmup Train [10][1860/3239]	Time 0.207 (0.236)	Data 0.001 (0.013)	Loss 5.4228 (5.3274)	Top-1 acc 8.984 (8.978)	Top-5 acc 19.922 (23.244)	lr 0.04326
Warmup Train [10][1870/3239]	Time 0.310 (0.236)	Data 0.001 (0.013)	Loss 5.3727 (5.3276)	Top-1 acc 8.594 (8.978)	Top-5 acc 24.219 (23.243)	lr 0.04325
Warmup Train [10][1880/3239]	Time 0.202 (0.236)	Data 0.001 (0.013)	Loss 5.3302 (5.3274)	Top-1 acc 7.812 (8.982)	Top-5 acc 20.703 (23.248)	lr 0.04325
Warmup Train [10][1890/3239]	Time 0.182 (0.236)	Data 0.001 (0.013)	Loss 5.4912 (5.3275)	Top-1 acc 7.031 (8.982)	Top-5 acc 19.531 (23.246)	lr 0.04325
Warmup Train [10][1900/3239]	Time 0.245 (0.236)	Data 0.001 (0.013)	Loss 5.2550 (5.3274)	Top-1 acc 9.766 (8.981)	Top-5 acc 27.734 (23.243)	lr 0.04324
Warmup Train [10][1910/3239]	Time 0.264 (0.236)	Data 0.001 (0.013)	Loss 5.1177 (5.3268)	Top-1 acc 10.938 (8.989)	Top-5 acc 30.078 (23.260)	lr 0.04324
Warmup Train [10][1920/3239]	Time 0.214 (0.236)	Data 0.001 (0.013)	Loss 5.0962 (5.3264)	Top-1 acc 14.453 (8.996)	Top-5 acc 33.594 (23.273)	lr 0.04323
Warmup Train [10][1930/3239]	Time 0.232 (0.236)	Data 0.001 (0.012)	Loss 5.2032 (5.3263)	Top-1 acc 9.375 (8.995)	Top-5 acc 25.000 (23.272)	lr 0.04323
Warmup Train [10][1940/3239]	Time 0.284 (0.236)	Data 0.001 (0.012)	Loss 5.2133 (5.3262)	Top-1 acc 9.766 (8.998)	Top-5 acc 27.344 (23.280)	lr 0.04323
Warmup Train [10][1950/3239]	Time 0.149 (0.236)	Data 0.002 (0.012)	Loss 5.2424 (5.3258)	Top-1 acc 10.547 (9.001)	Top-5 acc 28.906 (23.292)	lr 0.04322
Warmup Train [10][1960/3239]	Time 0.267 (0.236)	Data 0.001 (0.012)	Loss 5.2618 (5.3256)	Top-1 acc 10.156 (9.004)	Top-5 acc 22.656 (23.293)	lr 0.04322
Warmup Train [10][1970/3239]	Time 0.195 (0.236)	Data 0.002 (0.012)	Loss 5.2830 (5.3254)	Top-1 acc 10.938 (9.003)	Top-5 acc 25.000 (23.299)	lr 0.04321
Warmup Train [10][1980/3239]	Time 0.218 (0.236)	Data 0.002 (0.012)	Loss 5.1904 (5.3253)	Top-1 acc 11.719 (9.005)	Top-5 acc 27.734 (23.300)	lr 0.04321
Warmup Train [10][1990/3239]	Time 0.195 (0.236)	Data 0.001 (0.012)	Loss 5.2824 (5.3252)	Top-1 acc 9.375 (9.006)	Top-5 acc 21.484 (23.300)	lr 0.04321
Warmup Train [10][2000/3239]	Time 0.284 (0.236)	Data 0.001 (0.012)	Loss 5.2902 (5.3249)	Top-1 acc 7.031 (9.010)	Top-5 acc 21.875 (23.309)	lr 0.04320
Warmup Train [10][2010/3239]	Time 0.161 (0.236)	Data 0.001 (0.012)	Loss 5.1872 (5.3247)	Top-1 acc 12.500 (9.012)	Top-5 acc 29.688 (23.315)	lr 0.04320
Warmup Train [10][2020/3239]	Time 0.144 (0.236)	Data 0.001 (0.012)	Loss 5.5272 (5.3248)	Top-1 acc 5.859 (9.009)	Top-5 acc 17.969 (23.308)	lr 0.04319
Warmup Train [10][2030/3239]	Time 0.187 (0.236)	Data 0.001 (0.012)	Loss 5.3435 (5.3245)	Top-1 acc 7.812 (9.010)	Top-5 acc 23.438 (23.314)	lr 0.04319
Warmup Train [10][2040/3239]	Time 0.280 (0.236)	Data 0.001 (0.012)	Loss 5.1942 (5.3244)	Top-1 acc 10.547 (9.015)	Top-5 acc 25.391 (23.315)	lr 0.04318
Warmup Train [10][2050/3239]	Time 0.201 (0.236)	Data 0.002 (0.012)	Loss 5.2056 (5.3242)	Top-1 acc 10.938 (9.017)	Top-5 acc 26.953 (23.317)	lr 0.04318
Warmup Train [10][2060/3239]	Time 0.133 (0.236)	Data 0.001 (0.012)	Loss 5.2448 (5.3242)	Top-1 acc 8.984 (9.016)	Top-5 acc 24.609 (23.313)	lr 0.04318
Warmup Train [10][2070/3239]	Time 0.230 (0.235)	Data 0.001 (0.012)	Loss 5.3098 (5.3239)	Top-1 acc 9.766 (9.015)	Top-5 acc 23.047 (23.318)	lr 0.04317
Warmup Train [10][2080/3239]	Time 0.194 (0.236)	Data 0.001 (0.012)	Loss 5.3662 (5.3241)	Top-1 acc 8.203 (9.013)	Top-5 acc 21.875 (23.312)	lr 0.04317
Warmup Train [10][2090/3239]	Time 0.239 (0.236)	Data 0.001 (0.012)	Loss 5.2168 (5.3238)	Top-1 acc 9.375 (9.014)	Top-5 acc 26.172 (23.317)	lr 0.04316
Warmup Train [10][2100/3239]	Time 0.346 (0.236)	Data 0.002 (0.012)	Loss 5.3967 (5.3239)	Top-1 acc 7.812 (9.012)	Top-5 acc 21.484 (23.316)	lr 0.04316
Warmup Train [10][2110/3239]	Time 0.270 (0.236)	Data 0.002 (0.012)	Loss 5.3351 (5.3237)	Top-1 acc 8.203 (9.015)	Top-5 acc 24.219 (23.322)	lr 0.04316
Warmup Train [10][2120/3239]	Time 0.162 (0.236)	Data 0.001 (0.012)	Loss 5.3308 (5.3236)	Top-1 acc 10.156 (9.017)	Top-5 acc 19.531 (23.325)	lr 0.04315
Warmup Train [10][2130/3239]	Time 0.274 (0.236)	Data 0.002 (0.012)	Loss 5.1585 (5.3234)	Top-1 acc 9.375 (9.022)	Top-5 acc 27.344 (23.330)	lr 0.04315
Warmup Train [10][2140/3239]	Time 0.276 (0.236)	Data 0.001 (0.012)	Loss 5.1749 (5.3232)	Top-1 acc 9.766 (9.023)	Top-5 acc 26.562 (23.342)	lr 0.04314
Warmup Train [10][2150/3239]	Time 0.208 (0.236)	Data 0.001 (0.012)	Loss 5.0790 (5.3228)	Top-1 acc 13.281 (9.032)	Top-5 acc 29.297 (23.351)	lr 0.04314
Warmup Train [10][2160/3239]	Time 0.213 (0.235)	Data 0.001 (0.012)	Loss 5.2987 (5.3227)	Top-1 acc 7.812 (9.030)	Top-5 acc 22.266 (23.357)	lr 0.04313
Warmup Train [10][2170/3239]	Time 0.167 (0.235)	Data 0.002 (0.011)	Loss 5.2420 (5.3225)	Top-1 acc 9.766 (9.030)	Top-5 acc 24.219 (23.360)	lr 0.04313
Warmup Train [10][2180/3239]	Time 0.235 (0.235)	Data 0.002 (0.011)	Loss 5.3713 (5.3220)	Top-1 acc 7.812 (9.035)	Top-5 acc 21.094 (23.373)	lr 0.04313
Warmup Train [10][2190/3239]	Time 0.217 (0.235)	Data 0.001 (0.011)	Loss 5.3046 (5.3216)	Top-1 acc 9.375 (9.041)	Top-5 acc 24.219 (23.382)	lr 0.04312
Warmup Train [10][2200/3239]	Time 0.287 (0.236)	Data 0.001 (0.011)	Loss 5.4322 (5.3216)	Top-1 acc 7.422 (9.041)	Top-5 acc 21.484 (23.384)	lr 0.04312
Warmup Train [10][2210/3239]	Time 0.244 (0.236)	Data 0.001 (0.011)	Loss 5.3097 (5.3214)	Top-1 acc 8.203 (9.039)	Top-5 acc 23.047 (23.387)	lr 0.04311
Warmup Train [10][2220/3239]	Time 0.226 (0.236)	Data 0.001 (0.011)	Loss 5.2202 (5.3212)	Top-1 acc 10.938 (9.039)	Top-5 acc 26.562 (23.386)	lr 0.04311
Warmup Train [10][2230/3239]	Time 0.230 (0.235)	Data 0.001 (0.011)	Loss 5.3953 (5.3213)	Top-1 acc 7.422 (9.038)	Top-5 acc 21.484 (23.386)	lr 0.04311
Warmup Train [10][2240/3239]	Time 0.211 (0.235)	Data 0.001 (0.011)	Loss 5.3602 (5.3212)	Top-1 acc 10.156 (9.040)	Top-5 acc 22.266 (23.388)	lr 0.04310
Warmup Train [10][2250/3239]	Time 0.197 (0.235)	Data 0.001 (0.011)	Loss 5.1525 (5.3210)	Top-1 acc 12.500 (9.040)	Top-5 acc 28.125 (23.392)	lr 0.04310
Warmup Train [10][2260/3239]	Time 0.150 (0.235)	Data 0.001 (0.011)	Loss 5.2910 (5.3208)	Top-1 acc 5.859 (9.040)	Top-5 acc 21.094 (23.395)	lr 0.04309
Warmup Train [10][2270/3239]	Time 0.258 (0.235)	Data 0.001 (0.011)	Loss 5.2046 (5.3207)	Top-1 acc 8.203 (9.041)	Top-5 acc 25.781 (23.397)	lr 0.04309
Warmup Train [10][2280/3239]	Time 0.221 (0.235)	Data 0.001 (0.011)	Loss 5.1976 (5.3207)	Top-1 acc 8.984 (9.044)	Top-5 acc 27.344 (23.396)	lr 0.04308
Warmup Train [10][2290/3239]	Time 0.331 (0.235)	Data 0.001 (0.011)	Loss 5.3401 (5.3208)	Top-1 acc 9.766 (9.045)	Top-5 acc 25.000 (23.395)	lr 0.04308
Warmup Train [10][2300/3239]	Time 0.213 (0.235)	Data 0.001 (0.011)	Loss 5.1954 (5.3209)	Top-1 acc 11.328 (9.044)	Top-5 acc 24.219 (23.393)	lr 0.04308
Warmup Train [10][2310/3239]	Time 0.166 (0.235)	Data 0.001 (0.011)	Loss 5.3096 (5.3207)	Top-1 acc 12.891 (9.046)	Top-5 acc 28.125 (23.400)	lr 0.04307
Warmup Train [10][2320/3239]	Time 0.215 (0.235)	Data 0.001 (0.011)	Loss 5.2719 (5.3204)	Top-1 acc 9.766 (9.047)	Top-5 acc 27.734 (23.407)	lr 0.04307
Warmup Train [10][2330/3239]	Time 0.164 (0.235)	Data 0.001 (0.011)	Loss 5.4505 (5.3204)	Top-1 acc 8.203 (9.047)	Top-5 acc 19.141 (23.408)	lr 0.04306
Warmup Train [10][2340/3239]	Time 0.189 (0.235)	Data 0.001 (0.011)	Loss 5.4043 (5.3203)	Top-1 acc 9.375 (9.047)	Top-5 acc 19.922 (23.410)	lr 0.04306
Warmup Train [10][2350/3239]	Time 0.172 (0.235)	Data 0.001 (0.011)	Loss 5.1587 (5.3201)	Top-1 acc 15.625 (9.053)	Top-5 acc 30.469 (23.416)	lr 0.04305
Warmup Train [10][2360/3239]	Time 0.177 (0.235)	Data 0.001 (0.011)	Loss 5.2736 (5.3197)	Top-1 acc 7.812 (9.058)	Top-5 acc 23.438 (23.422)	lr 0.04305
Warmup Train [10][2370/3239]	Time 0.286 (0.235)	Data 0.002 (0.011)	Loss 5.2583 (5.3195)	Top-1 acc 8.594 (9.060)	Top-5 acc 23.047 (23.425)	lr 0.04305
Warmup Train [10][2380/3239]	Time 0.247 (0.235)	Data 0.001 (0.011)	Loss 5.3186 (5.3194)	Top-1 acc 8.594 (9.062)	Top-5 acc 24.609 (23.432)	lr 0.04304
Warmup Train [10][2390/3239]	Time 0.381 (0.235)	Data 0.002 (0.011)	Loss 5.3185 (5.3193)	Top-1 acc 8.594 (9.067)	Top-5 acc 23.438 (23.436)	lr 0.04304
Warmup Train [10][2400/3239]	Time 0.203 (0.235)	Data 0.001 (0.011)	Loss 5.2909 (5.3192)	Top-1 acc 11.719 (9.072)	Top-5 acc 23.828 (23.436)	lr 0.04303
Warmup Train [10][2410/3239]	Time 0.230 (0.235)	Data 0.002 (0.011)	Loss 5.2354 (5.3191)	Top-1 acc 12.109 (9.071)	Top-5 acc 26.953 (23.438)	lr 0.04303
Warmup Train [10][2420/3239]	Time 0.215 (0.235)	Data 0.001 (0.011)	Loss 5.3834 (5.3190)	Top-1 acc 6.250 (9.065)	Top-5 acc 19.141 (23.438)	lr 0.04303
Warmup Train [10][2430/3239]	Time 0.185 (0.235)	Data 0.002 (0.011)	Loss 5.2793 (5.3188)	Top-1 acc 8.984 (9.065)	Top-5 acc 23.438 (23.443)	lr 0.04302
Warmup Train [10][2440/3239]	Time 0.188 (0.235)	Data 0.001 (0.011)	Loss 5.2385 (5.3188)	Top-1 acc 12.109 (9.066)	Top-5 acc 25.000 (23.446)	lr 0.04302
Warmup Train [10][2450/3239]	Time 0.190 (0.235)	Data 0.001 (0.011)	Loss 5.3326 (5.3189)	Top-1 acc 7.812 (9.064)	Top-5 acc 22.656 (23.446)	lr 0.04301
Warmup Train [10][2460/3239]	Time 0.209 (0.235)	Data 0.001 (0.010)	Loss 5.1447 (5.3190)	Top-1 acc 11.719 (9.062)	Top-5 acc 27.734 (23.442)	lr 0.04301
Warmup Train [10][2470/3239]	Time 0.224 (0.235)	Data 0.001 (0.010)	Loss 5.1989 (5.3187)	Top-1 acc 12.109 (9.064)	Top-5 acc 28.125 (23.449)	lr 0.04300
Warmup Train [10][2480/3239]	Time 0.229 (0.235)	Data 0.004 (0.010)	Loss 5.1803 (5.3186)	Top-1 acc 10.547 (9.068)	Top-5 acc 29.297 (23.454)	lr 0.04300
Warmup Train [10][2490/3239]	Time 0.330 (0.235)	Data 0.001 (0.010)	Loss 5.1927 (5.3182)	Top-1 acc 13.281 (9.076)	Top-5 acc 25.781 (23.463)	lr 0.04300
Warmup Train [10][2500/3239]	Time 0.235 (0.235)	Data 0.002 (0.010)	Loss 5.3753 (5.3184)	Top-1 acc 5.859 (9.074)	Top-5 acc 19.531 (23.465)	lr 0.04299
Warmup Train [10][2510/3239]	Time 0.205 (0.234)	Data 0.001 (0.010)	Loss 5.3741 (5.3180)	Top-1 acc 8.594 (9.080)	Top-5 acc 20.312 (23.472)	lr 0.04299
Warmup Train [10][2520/3239]	Time 0.174 (0.234)	Data 0.001 (0.010)	Loss 5.2764 (5.3179)	Top-1 acc 10.547 (9.082)	Top-5 acc 26.172 (23.472)	lr 0.04298
Warmup Train [10][2530/3239]	Time 0.247 (0.234)	Data 0.001 (0.010)	Loss 5.1616 (5.3176)	Top-1 acc 10.156 (9.085)	Top-5 acc 24.219 (23.476)	lr 0.04298
Warmup Train [10][2540/3239]	Time 0.130 (0.234)	Data 0.001 (0.010)	Loss 5.1782 (5.3175)	Top-1 acc 10.156 (9.086)	Top-5 acc 28.516 (23.481)	lr 0.04298
Warmup Train [10][2550/3239]	Time 0.130 (0.234)	Data 0.001 (0.010)	Loss 5.3742 (5.3174)	Top-1 acc 6.250 (9.086)	Top-5 acc 19.531 (23.479)	lr 0.04297
Warmup Train [10][2560/3239]	Time 0.206 (0.234)	Data 0.002 (0.010)	Loss 5.3926 (5.3173)	Top-1 acc 7.031 (9.089)	Top-5 acc 22.266 (23.487)	lr 0.04297
Warmup Train [10][2570/3239]	Time 0.205 (0.234)	Data 0.001 (0.010)	Loss 5.2215 (5.3171)	Top-1 acc 12.109 (9.094)	Top-5 acc 27.734 (23.491)	lr 0.04296
Warmup Train [10][2580/3239]	Time 0.311 (0.234)	Data 0.001 (0.010)	Loss 5.2228 (5.3169)	Top-1 acc 10.547 (9.098)	Top-5 acc 24.609 (23.498)	lr 0.04296
Warmup Train [10][2590/3239]	Time 0.224 (0.234)	Data 0.001 (0.010)	Loss 5.3196 (5.3168)	Top-1 acc 6.250 (9.097)	Top-5 acc 20.312 (23.499)	lr 0.04295
Warmup Train [10][2600/3239]	Time 0.228 (0.234)	Data 0.001 (0.010)	Loss 5.3121 (5.3168)	Top-1 acc 6.250 (9.097)	Top-5 acc 21.094 (23.500)	lr 0.04295
Warmup Train [10][2610/3239]	Time 0.232 (0.234)	Data 0.001 (0.010)	Loss 5.2395 (5.3166)	Top-1 acc 10.547 (9.103)	Top-5 acc 25.391 (23.505)	lr 0.04295
Warmup Train [10][2620/3239]	Time 0.270 (0.234)	Data 0.001 (0.010)	Loss 5.3517 (5.3163)	Top-1 acc 10.547 (9.109)	Top-5 acc 21.094 (23.509)	lr 0.04294
Warmup Train [10][2630/3239]	Time 0.200 (0.234)	Data 0.001 (0.010)	Loss 5.2511 (5.3162)	Top-1 acc 12.109 (9.115)	Top-5 acc 24.609 (23.513)	lr 0.04294
Warmup Train [10][2640/3239]	Time 0.326 (0.234)	Data 0.001 (0.010)	Loss 5.3044 (5.3159)	Top-1 acc 6.641 (9.112)	Top-5 acc 26.562 (23.514)	lr 0.04293
Warmup Train [10][2650/3239]	Time 0.262 (0.234)	Data 0.001 (0.010)	Loss 5.4124 (5.3157)	Top-1 acc 8.203 (9.118)	Top-5 acc 22.266 (23.521)	lr 0.04293
Warmup Train [10][2660/3239]	Time 0.242 (0.234)	Data 0.002 (0.010)	Loss 5.3458 (5.3156)	Top-1 acc 10.938 (9.119)	Top-5 acc 24.609 (23.525)	lr 0.04292
Warmup Train [10][2670/3239]	Time 0.238 (0.234)	Data 0.001 (0.010)	Loss 5.3155 (5.3153)	Top-1 acc 11.719 (9.124)	Top-5 acc 24.609 (23.533)	lr 0.04292
Warmup Train [10][2680/3239]	Time 0.283 (0.234)	Data 0.001 (0.010)	Loss 5.2723 (5.3154)	Top-1 acc 8.984 (9.123)	Top-5 acc 25.000 (23.533)	lr 0.04292
Warmup Train [10][2690/3239]	Time 0.295 (0.234)	Data 0.001 (0.010)	Loss 5.2281 (5.3152)	Top-1 acc 8.594 (9.127)	Top-5 acc 22.266 (23.535)	lr 0.04291
Warmup Train [10][2700/3239]	Time 0.227 (0.234)	Data 0.001 (0.010)	Loss 5.4165 (5.3150)	Top-1 acc 6.250 (9.129)	Top-5 acc 23.438 (23.545)	lr 0.04291
Warmup Train [10][2710/3239]	Time 0.183 (0.234)	Data 0.001 (0.010)	Loss 5.4270 (5.3150)	Top-1 acc 10.938 (9.131)	Top-5 acc 22.266 (23.543)	lr 0.04290
Warmup Train [10][2720/3239]	Time 0.225 (0.234)	Data 0.001 (0.010)	Loss 5.2594 (5.3150)	Top-1 acc 9.375 (9.132)	Top-5 acc 22.266 (23.546)	lr 0.04290
Warmup Train [10][2730/3239]	Time 0.170 (0.234)	Data 0.001 (0.010)	Loss 5.1299 (5.3148)	Top-1 acc 9.766 (9.133)	Top-5 acc 25.391 (23.553)	lr 0.04289
Warmup Train [10][2740/3239]	Time 0.196 (0.234)	Data 0.001 (0.010)	Loss 5.3221 (5.3147)	Top-1 acc 9.766 (9.136)	Top-5 acc 23.047 (23.557)	lr 0.04289
Warmup Train [10][2750/3239]	Time 0.171 (0.234)	Data 0.001 (0.010)	Loss 5.2348 (5.3146)	Top-1 acc 7.812 (9.137)	Top-5 acc 26.172 (23.559)	lr 0.04289
Warmup Train [10][2760/3239]	Time 0.171 (0.234)	Data 0.002 (0.010)	Loss 5.1807 (5.3145)	Top-1 acc 11.719 (9.140)	Top-5 acc 26.562 (23.564)	lr 0.04288
Warmup Train [10][2770/3239]	Time 0.219 (0.234)	Data 0.001 (0.010)	Loss 5.4420 (5.3145)	Top-1 acc 7.422 (9.141)	Top-5 acc 19.922 (23.568)	lr 0.04288
Warmup Train [10][2780/3239]	Time 0.203 (0.234)	Data 0.002 (0.010)	Loss 5.2588 (5.3143)	Top-1 acc 10.938 (9.143)	Top-5 acc 25.391 (23.574)	lr 0.04287
Warmup Train [10][2790/3239]	Time 0.213 (0.234)	Data 0.001 (0.010)	Loss 5.3018 (5.3141)	Top-1 acc 9.375 (9.146)	Top-5 acc 25.391 (23.579)	lr 0.04287
Warmup Train [10][2800/3239]	Time 0.364 (0.234)	Data 0.001 (0.010)	Loss 5.3354 (5.3139)	Top-1 acc 6.641 (9.145)	Top-5 acc 23.047 (23.583)	lr 0.04287
Warmup Train [10][2810/3239]	Time 0.331 (0.234)	Data 0.001 (0.010)	Loss 5.3086 (5.3139)	Top-1 acc 8.203 (9.145)	Top-5 acc 25.781 (23.585)	lr 0.04286
Warmup Train [10][2820/3239]	Time 0.212 (0.234)	Data 0.001 (0.010)	Loss 5.1746 (5.3137)	Top-1 acc 12.891 (9.150)	Top-5 acc 27.344 (23.590)	lr 0.04286
Warmup Train [10][2830/3239]	Time 0.217 (0.234)	Data 0.001 (0.010)	Loss 5.3808 (5.3136)	Top-1 acc 8.594 (9.153)	Top-5 acc 20.703 (23.593)	lr 0.04285
Warmup Train [10][2840/3239]	Time 0.288 (0.234)	Data 0.001 (0.010)	Loss 5.3264 (5.3133)	Top-1 acc 12.500 (9.157)	Top-5 acc 22.656 (23.598)	lr 0.04285
Warmup Train [10][2850/3239]	Time 0.220 (0.233)	Data 0.001 (0.010)	Loss 5.1273 (5.3130)	Top-1 acc 12.109 (9.159)	Top-5 acc 28.125 (23.606)	lr 0.04284
Warmup Train [10][2860/3239]	Time 0.205 (0.234)	Data 0.001 (0.009)	Loss 5.0179 (5.3126)	Top-1 acc 9.375 (9.162)	Top-5 acc 32.031 (23.619)	lr 0.04284
Warmup Train [10][2870/3239]	Time 0.211 (0.234)	Data 0.001 (0.009)	Loss 5.2768 (5.3124)	Top-1 acc 12.109 (9.167)	Top-5 acc 25.391 (23.625)	lr 0.04284
Warmup Train [10][2880/3239]	Time 0.193 (0.233)	Data 0.001 (0.009)	Loss 5.3690 (5.3123)	Top-1 acc 10.156 (9.170)	Top-5 acc 22.266 (23.629)	lr 0.04283
Warmup Train [10][2890/3239]	Time 0.325 (0.234)	Data 0.001 (0.009)	Loss 5.3363 (5.3122)	Top-1 acc 7.031 (9.171)	Top-5 acc 23.828 (23.632)	lr 0.04283
Warmup Train [10][2900/3239]	Time 0.313 (0.234)	Data 0.001 (0.009)	Loss 5.2722 (5.3120)	Top-1 acc 10.938 (9.175)	Top-5 acc 23.828 (23.634)	lr 0.04282
Warmup Train [10][2910/3239]	Time 0.139 (0.233)	Data 0.002 (0.009)	Loss 5.4410 (5.3118)	Top-1 acc 7.031 (9.181)	Top-5 acc 20.703 (23.638)	lr 0.04282
Warmup Train [10][2920/3239]	Time 0.231 (0.233)	Data 0.001 (0.009)	Loss 5.4429 (5.3117)	Top-1 acc 7.031 (9.181)	Top-5 acc 21.875 (23.640)	lr 0.04281
Warmup Train [10][2930/3239]	Time 0.201 (0.233)	Data 0.001 (0.009)	Loss 5.2266 (5.3116)	Top-1 acc 11.328 (9.181)	Top-5 acc 26.953 (23.643)	lr 0.04281
Warmup Train [10][2940/3239]	Time 0.201 (0.233)	Data 0.001 (0.009)	Loss 5.3037 (5.3115)	Top-1 acc 8.984 (9.181)	Top-5 acc 20.703 (23.642)	lr 0.04281
Warmup Train [10][2950/3239]	Time 0.244 (0.233)	Data 0.001 (0.009)	Loss 5.2924 (5.3113)	Top-1 acc 13.281 (9.185)	Top-5 acc 26.172 (23.647)	lr 0.04280
Warmup Train [10][2960/3239]	Time 0.282 (0.233)	Data 0.002 (0.009)	Loss 5.3715 (5.3112)	Top-1 acc 8.203 (9.185)	Top-5 acc 22.656 (23.646)	lr 0.04280
Warmup Train [10][2970/3239]	Time 0.228 (0.233)	Data 0.002 (0.009)	Loss 5.1563 (5.3110)	Top-1 acc 11.328 (9.189)	Top-5 acc 25.000 (23.653)	lr 0.04279
Warmup Train [10][2980/3239]	Time 0.144 (0.233)	Data 0.001 (0.009)	Loss 5.2230 (5.3110)	Top-1 acc 8.984 (9.191)	Top-5 acc 26.172 (23.657)	lr 0.04279
Warmup Train [10][2990/3239]	Time 0.315 (0.233)	Data 0.001 (0.009)	Loss 5.4625 (5.3108)	Top-1 acc 7.422 (9.192)	Top-5 acc 21.875 (23.661)	lr 0.04278
Warmup Train [10][3000/3239]	Time 0.344 (0.233)	Data 0.001 (0.009)	Loss 5.3817 (5.3108)	Top-1 acc 7.812 (9.194)	Top-5 acc 19.141 (23.661)	lr 0.04278
Warmup Train [10][3010/3239]	Time 0.182 (0.233)	Data 0.001 (0.009)	Loss 5.2133 (5.3106)	Top-1 acc 10.156 (9.200)	Top-5 acc 26.953 (23.667)	lr 0.04278
Warmup Train [10][3020/3239]	Time 0.133 (0.233)	Data 0.001 (0.009)	Loss 5.2634 (5.3106)	Top-1 acc 8.594 (9.199)	Top-5 acc 27.344 (23.668)	lr 0.04277
Warmup Train [10][3030/3239]	Time 0.202 (0.233)	Data 0.001 (0.009)	Loss 5.2531 (5.3103)	Top-1 acc 10.547 (9.200)	Top-5 acc 26.953 (23.677)	lr 0.04277
Warmup Train [10][3040/3239]	Time 0.219 (0.233)	Data 0.001 (0.009)	Loss 5.3579 (5.3102)	Top-1 acc 9.375 (9.204)	Top-5 acc 25.000 (23.677)	lr 0.04276
Warmup Train [10][3050/3239]	Time 0.241 (0.233)	Data 0.001 (0.009)	Loss 5.3610 (5.3099)	Top-1 acc 8.203 (9.210)	Top-5 acc 19.531 (23.686)	lr 0.04276
Warmup Train [10][3060/3239]	Time 0.161 (0.233)	Data 0.001 (0.009)	Loss 5.2711 (5.3097)	Top-1 acc 10.547 (9.213)	Top-5 acc 23.438 (23.688)	lr 0.04275
Warmup Train [10][3070/3239]	Time 0.174 (0.233)	Data 0.001 (0.009)	Loss 5.2298 (5.3094)	Top-1 acc 8.594 (9.214)	Top-5 acc 26.562 (23.693)	lr 0.04275
Warmup Train [10][3080/3239]	Time 0.252 (0.233)	Data 0.001 (0.009)	Loss 5.2518 (5.3093)	Top-1 acc 9.375 (9.214)	Top-5 acc 23.047 (23.694)	lr 0.04275
Warmup Train [10][3090/3239]	Time 0.190 (0.233)	Data 0.002 (0.009)	Loss 5.3017 (5.3093)	Top-1 acc 9.766 (9.216)	Top-5 acc 21.875 (23.696)	lr 0.04274
Warmup Train [10][3100/3239]	Time 0.335 (0.233)	Data 0.002 (0.009)	Loss 5.2133 (5.3093)	Top-1 acc 11.719 (9.217)	Top-5 acc 28.516 (23.698)	lr 0.04274
Warmup Train [10][3110/3239]	Time 0.174 (0.233)	Data 0.001 (0.009)	Loss 5.3539 (5.3093)	Top-1 acc 11.719 (9.217)	Top-5 acc 23.047 (23.698)	lr 0.04273
Warmup Train [10][3120/3239]	Time 0.235 (0.233)	Data 0.002 (0.009)	Loss 5.2748 (5.3090)	Top-1 acc 9.375 (9.218)	Top-5 acc 26.172 (23.700)	lr 0.04273
Warmup Train [10][3130/3239]	Time 0.196 (0.233)	Data 0.001 (0.009)	Loss 5.2402 (5.3089)	Top-1 acc 10.156 (9.220)	Top-5 acc 23.438 (23.706)	lr 0.04272
Warmup Train [10][3140/3239]	Time 0.136 (0.233)	Data 0.001 (0.009)	Loss 5.1512 (5.3087)	Top-1 acc 11.719 (9.223)	Top-5 acc 24.609 (23.709)	lr 0.04272
Warmup Train [10][3150/3239]	Time 0.209 (0.233)	Data 0.001 (0.009)	Loss 5.1464 (5.3085)	Top-1 acc 10.156 (9.226)	Top-5 acc 26.953 (23.714)	lr 0.04272
Warmup Train [10][3160/3239]	Time 0.181 (0.233)	Data 0.001 (0.009)	Loss 5.3513 (5.3085)	Top-1 acc 10.547 (9.227)	Top-5 acc 24.609 (23.719)	lr 0.04271
Warmup Train [10][3170/3239]	Time 0.216 (0.233)	Data 0.001 (0.009)	Loss 5.2331 (5.3083)	Top-1 acc 8.984 (9.231)	Top-5 acc 21.875 (23.721)	lr 0.04271
Warmup Train [10][3180/3239]	Time 0.236 (0.233)	Data 0.000 (0.009)	Loss 5.2548 (5.3082)	Top-1 acc 11.328 (9.232)	Top-5 acc 25.000 (23.723)	lr 0.04270
Warmup Train [10][3190/3239]	Time 0.189 (0.233)	Data 0.000 (0.009)	Loss 5.1909 (5.3078)	Top-1 acc 10.156 (9.235)	Top-5 acc 26.562 (23.731)	lr 0.04270
Warmup Train [10][3200/3239]	Time 0.208 (0.233)	Data 0.000 (0.009)	Loss 5.4849 (5.3079)	Top-1 acc 12.109 (9.234)	Top-5 acc 23.047 (23.728)	lr 0.04269
Warmup Train [10][3210/3239]	Time 0.194 (0.233)	Data 0.000 (0.009)	Loss 5.3964 (5.3076)	Top-1 acc 8.594 (9.239)	Top-5 acc 22.656 (23.735)	lr 0.04269
Warmup Train [10][3220/3239]	Time 0.304 (0.232)	Data 0.000 (0.009)	Loss 5.3479 (5.3075)	Top-1 acc 7.422 (9.238)	Top-5 acc 26.172 (23.735)	lr 0.04269
Warmup Train [10][3230/3239]	Time 0.208 (0.232)	Data 0.000 (0.009)	Loss 5.2117 (5.3072)	Top-1 acc 8.594 (9.245)	Top-5 acc 26.562 (23.747)	lr 0.04268
Warmup Train [10][3239/3239]	Time 0.147 (0.232)	Data 0.000 (0.009)	Loss 5.0347 (5.3070)	Top-1 acc 13.580 (9.249)	Top-5 acc 28.395 (23.753)	lr 0.04268
==========Warmup Valid [10/40]	loss 4.565	top-1 acc 12.923	top-5 acc 31.243	Train top-1 9.249	top-5 23.753	flops: 442.4M
Warmup Train [11][0/3239]	Time 14.791 (14.791)	Data 10.331 (10.331)	Loss 5.2635 (5.2635)	Top-1 acc 13.672 (13.672)	Top-5 acc 23.438 (23.438)	lr 0.04268
Warmup Train [11][10/3239]	Time 0.272 (1.749)	Data 0.002 (1.090)	Loss 5.1522 (5.2618)	Top-1 acc 11.719 (9.979)	Top-5 acc 28.125 (24.503)	lr 0.04267
Warmup Train [11][20/3239]	Time 0.271 (1.030)	Data 0.001 (0.572)	Loss 5.1988 (5.2560)	Top-1 acc 12.500 (9.914)	Top-5 acc 25.391 (24.665)	lr 0.04267
Warmup Train [11][30/3239]	Time 0.263 (0.777)	Data 0.002 (0.390)	Loss 5.2743 (5.2671)	Top-1 acc 10.156 (9.929)	Top-5 acc 23.047 (24.294)	lr 0.04266
Warmup Train [11][40/3239]	Time 0.243 (0.643)	Data 0.001 (0.295)	Loss 5.3072 (5.2737)	Top-1 acc 10.156 (9.804)	Top-5 acc 25.781 (24.390)	lr 0.04266
Warmup Train [11][50/3239]	Time 0.228 (0.560)	Data 0.001 (0.238)	Loss 5.1881 (5.2708)	Top-1 acc 8.203 (9.773)	Top-5 acc 25.000 (24.395)	lr 0.04266
Warmup Train [11][60/3239]	Time 0.265 (0.506)	Data 0.001 (0.199)	Loss 5.2935 (5.2610)	Top-1 acc 7.812 (9.817)	Top-5 acc 27.344 (24.571)	lr 0.04265
Warmup Train [11][70/3239]	Time 0.231 (0.466)	Data 0.001 (0.171)	Loss 5.2614 (5.2541)	Top-1 acc 11.719 (9.947)	Top-5 acc 22.656 (24.840)	lr 0.04265
Warmup Train [11][80/3239]	Time 0.222 (0.435)	Data 0.002 (0.150)	Loss 5.1960 (5.2484)	Top-1 acc 8.984 (9.949)	Top-5 acc 28.125 (24.894)	lr 0.04264
Warmup Train [11][90/3239]	Time 0.202 (0.411)	Data 0.001 (0.134)	Loss 5.2421 (5.2466)	Top-1 acc 11.719 (10.083)	Top-5 acc 28.516 (24.914)	lr 0.04264
Warmup Train [11][100/3239]	Time 0.198 (0.394)	Data 0.001 (0.121)	Loss 5.2544 (5.2456)	Top-1 acc 10.547 (10.025)	Top-5 acc 22.266 (24.988)	lr 0.04263
Warmup Train [11][110/3239]	Time 0.167 (0.379)	Data 0.001 (0.111)	Loss 5.2104 (5.2442)	Top-1 acc 8.203 (10.001)	Top-5 acc 25.781 (25.025)	lr 0.04263
Warmup Train [11][120/3239]	Time 0.287 (0.367)	Data 0.001 (0.102)	Loss 5.5010 (5.2456)	Top-1 acc 5.078 (9.998)	Top-5 acc 22.656 (24.990)	lr 0.04263
Warmup Train [11][130/3239]	Time 0.213 (0.356)	Data 0.001 (0.094)	Loss 5.1896 (5.2452)	Top-1 acc 10.938 (9.983)	Top-5 acc 23.828 (24.976)	lr 0.04262
Warmup Train [11][140/3239]	Time 0.135 (0.345)	Data 0.001 (0.088)	Loss 5.2616 (5.2452)	Top-1 acc 10.938 (9.976)	Top-5 acc 26.562 (25.014)	lr 0.04262
Warmup Train [11][150/3239]	Time 0.201 (0.337)	Data 0.002 (0.082)	Loss 5.1936 (5.2484)	Top-1 acc 11.719 (9.973)	Top-5 acc 24.219 (24.961)	lr 0.04261
Warmup Train [11][160/3239]	Time 0.156 (0.329)	Data 0.001 (0.078)	Loss 5.2323 (5.2513)	Top-1 acc 7.812 (9.945)	Top-5 acc 23.438 (24.869)	lr 0.04261
Warmup Train [11][170/3239]	Time 0.200 (0.322)	Data 0.001 (0.073)	Loss 5.3879 (5.2511)	Top-1 acc 9.375 (9.967)	Top-5 acc 21.875 (24.890)	lr 0.04260
Warmup Train [11][180/3239]	Time 0.300 (0.318)	Data 0.001 (0.070)	Loss 5.1623 (5.2512)	Top-1 acc 11.328 (9.971)	Top-5 acc 30.078 (24.912)	lr 0.04260
Warmup Train [11][190/3239]	Time 0.241 (0.312)	Data 0.002 (0.066)	Loss 5.3041 (5.2525)	Top-1 acc 6.641 (9.919)	Top-5 acc 22.656 (24.871)	lr 0.04260
Warmup Train [11][200/3239]	Time 0.201 (0.307)	Data 0.002 (0.063)	Loss 5.1399 (5.2494)	Top-1 acc 10.156 (9.968)	Top-5 acc 25.781 (24.936)	lr 0.04259
Warmup Train [11][210/3239]	Time 0.195 (0.304)	Data 0.001 (0.060)	Loss 5.1948 (5.2487)	Top-1 acc 12.500 (9.975)	Top-5 acc 26.172 (24.970)	lr 0.04259
Warmup Train [11][220/3239]	Time 0.195 (0.299)	Data 0.002 (0.057)	Loss 5.3815 (5.2492)	Top-1 acc 11.328 (9.988)	Top-5 acc 23.828 (24.943)	lr 0.04258
Warmup Train [11][230/3239]	Time 0.182 (0.296)	Data 0.001 (0.055)	Loss 5.3509 (5.2497)	Top-1 acc 9.766 (9.974)	Top-5 acc 24.609 (24.966)	lr 0.04258
Warmup Train [11][240/3239]	Time 0.307 (0.293)	Data 0.001 (0.053)	Loss 5.3590 (5.2503)	Top-1 acc 10.547 (9.960)	Top-5 acc 22.656 (24.937)	lr 0.04257
Warmup Train [11][250/3239]	Time 0.228 (0.290)	Data 0.001 (0.051)	Loss 5.1476 (5.2498)	Top-1 acc 10.156 (9.962)	Top-5 acc 25.391 (24.947)	lr 0.04257
Warmup Train [11][260/3239]	Time 0.210 (0.287)	Data 0.001 (0.049)	Loss 5.1501 (5.2487)	Top-1 acc 12.109 (9.968)	Top-5 acc 26.172 (24.985)	lr 0.04257
Warmup Train [11][270/3239]	Time 0.193 (0.285)	Data 0.001 (0.047)	Loss 5.2468 (5.2485)	Top-1 acc 11.719 (9.990)	Top-5 acc 23.438 (24.981)	lr 0.04256
Warmup Train [11][280/3239]	Time 0.139 (0.282)	Data 0.001 (0.046)	Loss 5.2963 (5.2476)	Top-1 acc 6.641 (10.019)	Top-5 acc 23.438 (25.035)	lr 0.04256
Warmup Train [11][290/3239]	Time 0.177 (0.281)	Data 0.001 (0.044)	Loss 5.3223 (5.2483)	Top-1 acc 6.250 (9.988)	Top-5 acc 22.656 (25.030)	lr 0.04255
Warmup Train [11][300/3239]	Time 0.181 (0.278)	Data 0.001 (0.043)	Loss 5.1017 (5.2494)	Top-1 acc 9.375 (9.965)	Top-5 acc 23.828 (24.988)	lr 0.04255
Warmup Train [11][310/3239]	Time 0.129 (0.276)	Data 0.001 (0.042)	Loss 5.2556 (5.2501)	Top-1 acc 8.984 (9.930)	Top-5 acc 26.562 (24.984)	lr 0.04254
Warmup Train [11][320/3239]	Time 0.157 (0.275)	Data 0.001 (0.041)	Loss 5.2265 (5.2498)	Top-1 acc 7.031 (9.919)	Top-5 acc 25.391 (24.972)	lr 0.04254
Warmup Train [11][330/3239]	Time 0.221 (0.273)	Data 0.001 (0.040)	Loss 5.2941 (5.2501)	Top-1 acc 9.766 (9.937)	Top-5 acc 23.047 (24.967)	lr 0.04254
Warmup Train [11][340/3239]	Time 0.164 (0.272)	Data 0.001 (0.038)	Loss 5.2696 (5.2506)	Top-1 acc 9.375 (9.918)	Top-5 acc 27.344 (24.960)	lr 0.04253
Warmup Train [11][350/3239]	Time 0.260 (0.270)	Data 0.001 (0.037)	Loss 5.3444 (5.2513)	Top-1 acc 10.156 (9.904)	Top-5 acc 22.266 (24.898)	lr 0.04253
Warmup Train [11][360/3239]	Time 0.282 (0.270)	Data 0.001 (0.036)	Loss 5.2150 (5.2507)	Top-1 acc 9.766 (9.892)	Top-5 acc 23.438 (24.911)	lr 0.04252
Warmup Train [11][370/3239]	Time 0.180 (0.268)	Data 0.001 (0.036)	Loss 5.2825 (5.2502)	Top-1 acc 10.547 (9.889)	Top-5 acc 26.172 (24.924)	lr 0.04252
Warmup Train [11][380/3239]	Time 0.221 (0.267)	Data 0.001 (0.035)	Loss 5.1998 (5.2498)	Top-1 acc 10.156 (9.921)	Top-5 acc 23.438 (24.948)	lr 0.04251
Warmup Train [11][390/3239]	Time 0.178 (0.265)	Data 0.001 (0.034)	Loss 5.1982 (5.2501)	Top-1 acc 9.375 (9.903)	Top-5 acc 27.734 (24.923)	lr 0.04251
Warmup Train [11][400/3239]	Time 0.160 (0.264)	Data 0.002 (0.033)	Loss 5.2744 (5.2502)	Top-1 acc 8.984 (9.892)	Top-5 acc 21.094 (24.934)	lr 0.04251
Warmup Train [11][410/3239]	Time 0.190 (0.263)	Data 0.001 (0.033)	Loss 5.1650 (5.2498)	Top-1 acc 9.766 (9.899)	Top-5 acc 25.000 (24.932)	lr 0.04250
Warmup Train [11][420/3239]	Time 0.166 (0.262)	Data 0.001 (0.032)	Loss 5.4455 (5.2516)	Top-1 acc 9.766 (9.896)	Top-5 acc 23.438 (24.911)	lr 0.04250
Warmup Train [11][430/3239]	Time 0.231 (0.261)	Data 0.001 (0.031)	Loss 5.0857 (5.2504)	Top-1 acc 12.109 (9.912)	Top-5 acc 28.906 (24.941)	lr 0.04249
Warmup Train [11][440/3239]	Time 0.184 (0.260)	Data 0.001 (0.030)	Loss 5.1922 (5.2503)	Top-1 acc 10.156 (9.900)	Top-5 acc 23.047 (24.936)	lr 0.04249
Warmup Train [11][450/3239]	Time 0.188 (0.259)	Data 0.001 (0.030)	Loss 5.2935 (5.2500)	Top-1 acc 10.547 (9.919)	Top-5 acc 25.391 (24.948)	lr 0.04248
Warmup Train [11][460/3239]	Time 0.297 (0.259)	Data 0.001 (0.029)	Loss 5.3688 (5.2495)	Top-1 acc 8.203 (9.922)	Top-5 acc 25.000 (24.978)	lr 0.04248
Warmup Train [11][470/3239]	Time 0.145 (0.258)	Data 0.003 (0.029)	Loss 5.3294 (5.2492)	Top-1 acc 8.984 (9.933)	Top-5 acc 23.828 (24.987)	lr 0.04248
Warmup Train [11][480/3239]	Time 0.226 (0.257)	Data 0.001 (0.028)	Loss 5.3712 (5.2500)	Top-1 acc 8.203 (9.909)	Top-5 acc 23.047 (24.957)	lr 0.04247
Warmup Train [11][490/3239]	Time 0.228 (0.256)	Data 0.002 (0.028)	Loss 5.2435 (5.2503)	Top-1 acc 9.766 (9.896)	Top-5 acc 25.391 (24.957)	lr 0.04247
Warmup Train [11][500/3239]	Time 0.209 (0.256)	Data 0.001 (0.027)	Loss 5.1250 (5.2494)	Top-1 acc 12.109 (9.897)	Top-5 acc 27.344 (24.963)	lr 0.04246
Warmup Train [11][510/3239]	Time 0.287 (0.255)	Data 0.001 (0.027)	Loss 5.1432 (5.2494)	Top-1 acc 10.547 (9.928)	Top-5 acc 28.125 (24.963)	lr 0.04246
Warmup Train [11][520/3239]	Time 0.204 (0.254)	Data 0.001 (0.026)	Loss 5.2360 (5.2494)	Top-1 acc 12.109 (9.927)	Top-5 acc 25.000 (24.938)	lr 0.04245
Warmup Train [11][530/3239]	Time 0.178 (0.253)	Data 0.001 (0.026)	Loss 5.1022 (5.2492)	Top-1 acc 10.938 (9.922)	Top-5 acc 27.344 (24.935)	lr 0.04245
Warmup Train [11][540/3239]	Time 0.301 (0.253)	Data 0.001 (0.025)	Loss 5.3301 (5.2492)	Top-1 acc 8.984 (9.927)	Top-5 acc 22.266 (24.946)	lr 0.04244
Warmup Train [11][550/3239]	Time 0.263 (0.252)	Data 0.001 (0.025)	Loss 5.0898 (5.2493)	Top-1 acc 12.500 (9.925)	Top-5 acc 30.078 (24.932)	lr 0.04244
Warmup Train [11][560/3239]	Time 0.324 (0.252)	Data 0.001 (0.025)	Loss 5.1938 (5.2488)	Top-1 acc 10.156 (9.926)	Top-5 acc 26.953 (24.942)	lr 0.04244
Warmup Train [11][570/3239]	Time 0.281 (0.251)	Data 0.001 (0.024)	Loss 5.1697 (5.2478)	Top-1 acc 7.812 (9.939)	Top-5 acc 27.344 (24.971)	lr 0.04243
Warmup Train [11][580/3239]	Time 0.231 (0.251)	Data 0.001 (0.024)	Loss 5.2301 (5.2482)	Top-1 acc 14.062 (9.941)	Top-5 acc 27.734 (24.966)	lr 0.04243
Warmup Train [11][590/3239]	Time 0.244 (0.250)	Data 0.001 (0.024)	Loss 5.1937 (5.2486)	Top-1 acc 12.500 (9.930)	Top-5 acc 25.000 (24.948)	lr 0.04242
Warmup Train [11][600/3239]	Time 0.318 (0.250)	Data 0.001 (0.023)	Loss 5.0979 (5.2484)	Top-1 acc 10.547 (9.931)	Top-5 acc 30.469 (24.962)	lr 0.04242
Warmup Train [11][610/3239]	Time 0.232 (0.249)	Data 0.001 (0.023)	Loss 5.2091 (5.2481)	Top-1 acc 11.719 (9.922)	Top-5 acc 27.344 (24.970)	lr 0.04241
Warmup Train [11][620/3239]	Time 0.220 (0.249)	Data 0.001 (0.023)	Loss 5.3739 (5.2482)	Top-1 acc 8.203 (9.920)	Top-5 acc 21.875 (24.962)	lr 0.04241
Warmup Train [11][630/3239]	Time 0.220 (0.248)	Data 0.002 (0.022)	Loss 5.1583 (5.2488)	Top-1 acc 10.156 (9.917)	Top-5 acc 29.297 (24.951)	lr 0.04241
Warmup Train [11][640/3239]	Time 0.214 (0.248)	Data 0.001 (0.022)	Loss 5.1219 (5.2481)	Top-1 acc 13.281 (9.928)	Top-5 acc 31.641 (24.976)	lr 0.04240
Warmup Train [11][650/3239]	Time 0.159 (0.248)	Data 0.001 (0.022)	Loss 5.4048 (5.2480)	Top-1 acc 7.422 (9.924)	Top-5 acc 22.266 (24.984)	lr 0.04240
Warmup Train [11][660/3239]	Time 0.191 (0.247)	Data 0.001 (0.021)	Loss 5.2083 (5.2476)	Top-1 acc 8.594 (9.916)	Top-5 acc 24.219 (24.977)	lr 0.04239
Warmup Train [11][670/3239]	Time 0.270 (0.247)	Data 0.001 (0.021)	Loss 5.2595 (5.2485)	Top-1 acc 10.938 (9.904)	Top-5 acc 27.344 (24.956)	lr 0.04239
Warmup Train [11][680/3239]	Time 0.394 (0.247)	Data 0.001 (0.021)	Loss 5.1695 (5.2486)	Top-1 acc 8.594 (9.912)	Top-5 acc 25.000 (24.971)	lr 0.04238
Warmup Train [11][690/3239]	Time 0.226 (0.246)	Data 0.001 (0.020)	Loss 5.2120 (5.2482)	Top-1 acc 10.156 (9.918)	Top-5 acc 24.219 (24.984)	lr 0.04238
Warmup Train [11][700/3239]	Time 0.231 (0.246)	Data 0.001 (0.020)	Loss 5.2594 (5.2480)	Top-1 acc 9.375 (9.913)	Top-5 acc 24.219 (24.979)	lr 0.04238
Warmup Train [11][710/3239]	Time 0.226 (0.246)	Data 0.002 (0.020)	Loss 5.2655 (5.2477)	Top-1 acc 10.938 (9.917)	Top-5 acc 26.172 (24.985)	lr 0.04237
Warmup Train [11][720/3239]	Time 0.146 (0.246)	Data 0.002 (0.020)	Loss 5.2095 (5.2478)	Top-1 acc 11.719 (9.918)	Top-5 acc 26.172 (24.983)	lr 0.04237
Warmup Train [11][730/3239]	Time 0.226 (0.246)	Data 0.002 (0.020)	Loss 5.1400 (5.2473)	Top-1 acc 12.109 (9.942)	Top-5 acc 27.734 (24.998)	lr 0.04236
Warmup Train [11][740/3239]	Time 0.201 (0.245)	Data 0.001 (0.019)	Loss 5.1777 (5.2472)	Top-1 acc 10.938 (9.945)	Top-5 acc 25.781 (25.012)	lr 0.04236
Warmup Train [11][750/3239]	Time 0.214 (0.245)	Data 0.002 (0.019)	Loss 5.1797 (5.2472)	Top-1 acc 11.328 (9.942)	Top-5 acc 29.688 (25.017)	lr 0.04235
Warmup Train [11][760/3239]	Time 0.130 (0.245)	Data 0.001 (0.019)	Loss 5.4054 (5.2467)	Top-1 acc 7.812 (9.948)	Top-5 acc 18.359 (25.038)	lr 0.04235
Warmup Train [11][770/3239]	Time 0.138 (0.244)	Data 0.001 (0.019)	Loss 5.2459 (5.2461)	Top-1 acc 8.984 (9.957)	Top-5 acc 25.000 (25.057)	lr 0.04234
Warmup Train [11][780/3239]	Time 0.159 (0.244)	Data 0.001 (0.019)	Loss 5.2402 (5.2460)	Top-1 acc 10.156 (9.958)	Top-5 acc 27.344 (25.064)	lr 0.04234
Warmup Train [11][790/3239]	Time 0.353 (0.244)	Data 0.001 (0.019)	Loss 5.1540 (5.2455)	Top-1 acc 8.984 (9.960)	Top-5 acc 21.875 (25.080)	lr 0.04234
Warmup Train [11][800/3239]	Time 0.256 (0.244)	Data 0.024 (0.018)	Loss 5.2653 (5.2453)	Top-1 acc 9.766 (9.960)	Top-5 acc 26.172 (25.087)	lr 0.04233
Warmup Train [11][810/3239]	Time 0.189 (0.243)	Data 0.002 (0.018)	Loss 5.3450 (5.2454)	Top-1 acc 7.031 (9.954)	Top-5 acc 21.094 (25.084)	lr 0.04233
Warmup Train [11][820/3239]	Time 0.210 (0.243)	Data 0.001 (0.018)	Loss 5.2169 (5.2459)	Top-1 acc 8.203 (9.947)	Top-5 acc 26.172 (25.081)	lr 0.04232
Warmup Train [11][830/3239]	Time 0.193 (0.243)	Data 0.001 (0.018)	Loss 5.1825 (5.2460)	Top-1 acc 7.422 (9.942)	Top-5 acc 25.781 (25.074)	lr 0.04232
Warmup Train [11][840/3239]	Time 0.287 (0.243)	Data 0.002 (0.018)	Loss 5.1569 (5.2456)	Top-1 acc 10.547 (9.944)	Top-5 acc 28.906 (25.102)	lr 0.04231
Warmup Train [11][850/3239]	Time 0.178 (0.242)	Data 0.001 (0.017)	Loss 5.1623 (5.2449)	Top-1 acc 9.375 (9.954)	Top-5 acc 26.562 (25.129)	lr 0.04231
Warmup Train [11][860/3239]	Time 0.202 (0.242)	Data 0.002 (0.017)	Loss 5.2658 (5.2444)	Top-1 acc 10.938 (9.958)	Top-5 acc 27.344 (25.134)	lr 0.04231
Warmup Train [11][870/3239]	Time 0.240 (0.242)	Data 0.002 (0.017)	Loss 5.2541 (5.2438)	Top-1 acc 8.984 (9.968)	Top-5 acc 24.609 (25.151)	lr 0.04230
Warmup Train [11][880/3239]	Time 0.169 (0.242)	Data 0.001 (0.017)	Loss 5.1385 (5.2430)	Top-1 acc 10.156 (9.976)	Top-5 acc 28.906 (25.179)	lr 0.04230
Warmup Train [11][890/3239]	Time 0.437 (0.241)	Data 0.001 (0.017)	Loss 5.0785 (5.2428)	Top-1 acc 11.719 (9.982)	Top-5 acc 26.953 (25.174)	lr 0.04229
Warmup Train [11][900/3239]	Time 0.334 (0.241)	Data 0.001 (0.017)	Loss 5.3333 (5.2429)	Top-1 acc 6.641 (9.978)	Top-5 acc 19.922 (25.174)	lr 0.04229
Warmup Train [11][910/3239]	Time 0.172 (0.241)	Data 0.002 (0.016)	Loss 5.1120 (5.2425)	Top-1 acc 10.938 (9.983)	Top-5 acc 29.297 (25.175)	lr 0.04228
Warmup Train [11][920/3239]	Time 0.285 (0.241)	Data 0.001 (0.016)	Loss 5.2952 (5.2423)	Top-1 acc 10.156 (9.987)	Top-5 acc 26.172 (25.180)	lr 0.04228
Warmup Train [11][930/3239]	Time 0.243 (0.241)	Data 0.001 (0.016)	Loss 5.2466 (5.2421)	Top-1 acc 10.938 (9.986)	Top-5 acc 26.562 (25.183)	lr 0.04227
Warmup Train [11][940/3239]	Time 0.221 (0.240)	Data 0.001 (0.016)	Loss 5.1118 (5.2419)	Top-1 acc 12.891 (9.991)	Top-5 acc 27.344 (25.193)	lr 0.04227
Warmup Train [11][950/3239]	Time 0.209 (0.240)	Data 0.001 (0.016)	Loss 5.2636 (5.2419)	Top-1 acc 8.594 (9.983)	Top-5 acc 24.609 (25.186)	lr 0.04227
Warmup Train [11][960/3239]	Time 0.263 (0.240)	Data 0.001 (0.016)	Loss 5.3370 (5.2420)	Top-1 acc 7.422 (9.997)	Top-5 acc 22.266 (25.191)	lr 0.04226
Warmup Train [11][970/3239]	Time 0.181 (0.240)	Data 0.001 (0.016)	Loss 5.1514 (5.2419)	Top-1 acc 11.328 (9.994)	Top-5 acc 31.250 (25.202)	lr 0.04226
Warmup Train [11][980/3239]	Time 0.258 (0.240)	Data 0.001 (0.015)	Loss 5.0758 (5.2417)	Top-1 acc 9.766 (9.994)	Top-5 acc 30.469 (25.207)	lr 0.04225
Warmup Train [11][990/3239]	Time 0.299 (0.240)	Data 0.001 (0.015)	Loss 5.1818 (5.2419)	Top-1 acc 11.328 (9.996)	Top-5 acc 26.172 (25.206)	lr 0.04225
Warmup Train [11][1000/3239]	Time 0.293 (0.240)	Data 0.002 (0.015)	Loss 5.2330 (5.2421)	Top-1 acc 10.156 (9.992)	Top-5 acc 24.219 (25.200)	lr 0.04224
Warmup Train [11][1010/3239]	Time 0.166 (0.239)	Data 0.001 (0.015)	Loss 5.2857 (5.2421)	Top-1 acc 9.375 (9.997)	Top-5 acc 21.094 (25.207)	lr 0.04224
Warmup Train [11][1020/3239]	Time 0.246 (0.239)	Data 0.001 (0.015)	Loss 5.3347 (5.2419)	Top-1 acc 6.641 (10.004)	Top-5 acc 21.484 (25.206)	lr 0.04224
Warmup Train [11][1030/3239]	Time 0.179 (0.239)	Data 0.001 (0.015)	Loss 5.2351 (5.2419)	Top-1 acc 9.766 (10.001)	Top-5 acc 25.000 (25.201)	lr 0.04223
Warmup Train [11][1040/3239]	Time 0.191 (0.239)	Data 0.002 (0.015)	Loss 5.1457 (5.2414)	Top-1 acc 7.812 (10.001)	Top-5 acc 25.391 (25.206)	lr 0.04223
Warmup Train [11][1050/3239]	Time 0.192 (0.238)	Data 0.001 (0.015)	Loss 5.2115 (5.2414)	Top-1 acc 10.938 (10.000)	Top-5 acc 24.609 (25.200)	lr 0.04222
Warmup Train [11][1060/3239]	Time 0.209 (0.238)	Data 0.001 (0.015)	Loss 5.2841 (5.2409)	Top-1 acc 10.547 (10.011)	Top-5 acc 25.781 (25.206)	lr 0.04222
Warmup Train [11][1070/3239]	Time 0.221 (0.238)	Data 0.001 (0.014)	Loss 5.1620 (5.2406)	Top-1 acc 10.938 (10.018)	Top-5 acc 29.297 (25.226)	lr 0.04221
Warmup Train [11][1080/3239]	Time 0.167 (0.238)	Data 0.001 (0.014)	Loss 5.2051 (5.2404)	Top-1 acc 12.891 (10.020)	Top-5 acc 29.688 (25.238)	lr 0.04221
Warmup Train [11][1090/3239]	Time 0.167 (0.238)	Data 0.002 (0.014)	Loss 5.3120 (5.2404)	Top-1 acc 9.375 (10.020)	Top-5 acc 25.391 (25.232)	lr 0.04220
Warmup Train [11][1100/3239]	Time 0.133 (0.238)	Data 0.001 (0.014)	Loss 5.3247 (5.2403)	Top-1 acc 8.203 (10.016)	Top-5 acc 22.656 (25.231)	lr 0.04220
Warmup Train [11][1110/3239]	Time 0.182 (0.238)	Data 0.001 (0.014)	Loss 5.3661 (5.2407)	Top-1 acc 7.422 (10.012)	Top-5 acc 19.922 (25.207)	lr 0.04220
Warmup Train [11][1120/3239]	Time 0.347 (0.238)	Data 0.001 (0.014)	Loss 5.1833 (5.2403)	Top-1 acc 11.719 (10.018)	Top-5 acc 27.344 (25.211)	lr 0.04219
Warmup Train [11][1130/3239]	Time 0.196 (0.237)	Data 0.002 (0.014)	Loss 5.2829 (5.2406)	Top-1 acc 9.766 (10.011)	Top-5 acc 26.172 (25.212)	lr 0.04219
Warmup Train [11][1140/3239]	Time 0.198 (0.237)	Data 0.001 (0.014)	Loss 5.2839 (5.2411)	Top-1 acc 9.375 (10.004)	Top-5 acc 27.734 (25.204)	lr 0.04218
Warmup Train [11][1150/3239]	Time 0.187 (0.237)	Data 0.001 (0.014)	Loss 5.1636 (5.2411)	Top-1 acc 12.109 (9.999)	Top-5 acc 25.781 (25.205)	lr 0.04218
Warmup Train [11][1160/3239]	Time 0.220 (0.237)	Data 0.002 (0.014)	Loss 5.1444 (5.2411)	Top-1 acc 11.719 (10.009)	Top-5 acc 26.953 (25.214)	lr 0.04217
Warmup Train [11][1170/3239]	Time 0.128 (0.237)	Data 0.001 (0.014)	Loss 5.1682 (5.2411)	Top-1 acc 12.109 (10.010)	Top-5 acc 26.562 (25.216)	lr 0.04217
Warmup Train [11][1180/3239]	Time 0.189 (0.237)	Data 0.001 (0.014)	Loss 5.1495 (5.2406)	Top-1 acc 10.938 (10.022)	Top-5 acc 28.516 (25.232)	lr 0.04216
Warmup Train [11][1190/3239]	Time 0.216 (0.237)	Data 0.002 (0.013)	Loss 5.1866 (5.2403)	Top-1 acc 13.672 (10.026)	Top-5 acc 29.297 (25.247)	lr 0.04216
Warmup Train [11][1200/3239]	Time 0.208 (0.236)	Data 0.001 (0.013)	Loss 5.3268 (5.2402)	Top-1 acc 10.547 (10.028)	Top-5 acc 23.047 (25.245)	lr 0.04216
Warmup Train [11][1210/3239]	Time 0.178 (0.236)	Data 0.001 (0.013)	Loss 5.1906 (5.2400)	Top-1 acc 10.156 (10.035)	Top-5 acc 23.828 (25.253)	lr 0.04215
Warmup Train [11][1220/3239]	Time 0.140 (0.236)	Data 0.001 (0.013)	Loss 5.0463 (5.2399)	Top-1 acc 11.719 (10.032)	Top-5 acc 27.734 (25.255)	lr 0.04215
Warmup Train [11][1230/3239]	Time 0.164 (0.236)	Data 0.002 (0.013)	Loss 5.3062 (5.2399)	Top-1 acc 10.547 (10.044)	Top-5 acc 23.047 (25.262)	lr 0.04214
Warmup Train [11][1240/3239]	Time 0.431 (0.236)	Data 0.001 (0.013)	Loss 5.0860 (5.2396)	Top-1 acc 17.969 (10.043)	Top-5 acc 32.812 (25.273)	lr 0.04214
Warmup Train [11][1250/3239]	Time 0.172 (0.236)	Data 0.001 (0.013)	Loss 5.3808 (5.2396)	Top-1 acc 9.375 (10.042)	Top-5 acc 22.266 (25.270)	lr 0.04213
Warmup Train [11][1260/3239]	Time 0.197 (0.236)	Data 0.001 (0.013)	Loss 5.2645 (5.2400)	Top-1 acc 10.547 (10.036)	Top-5 acc 25.000 (25.265)	lr 0.04213
Warmup Train [11][1270/3239]	Time 0.152 (0.235)	Data 0.001 (0.013)	Loss 5.2292 (5.2397)	Top-1 acc 9.375 (10.035)	Top-5 acc 23.438 (25.272)	lr 0.04213
Warmup Train [11][1280/3239]	Time 0.206 (0.235)	Data 0.001 (0.013)	Loss 5.3276 (5.2396)	Top-1 acc 8.203 (10.038)	Top-5 acc 21.094 (25.269)	lr 0.04212
Warmup Train [11][1290/3239]	Time 0.244 (0.235)	Data 0.001 (0.013)	Loss 5.1417 (5.2392)	Top-1 acc 10.938 (10.037)	Top-5 acc 26.562 (25.275)	lr 0.04212
Warmup Train [11][1300/3239]	Time 0.180 (0.235)	Data 0.001 (0.013)	Loss 5.1584 (5.2393)	Top-1 acc 12.891 (10.033)	Top-5 acc 28.125 (25.277)	lr 0.04211
Warmup Train [11][1310/3239]	Time 0.294 (0.235)	Data 0.002 (0.012)	Loss 5.1767 (5.2393)	Top-1 acc 8.594 (10.034)	Top-5 acc 23.438 (25.271)	lr 0.04211
Warmup Train [11][1320/3239]	Time 0.184 (0.235)	Data 0.001 (0.012)	Loss 5.2707 (5.2387)	Top-1 acc 8.984 (10.042)	Top-5 acc 27.344 (25.289)	lr 0.04210
Warmup Train [11][1330/3239]	Time 0.194 (0.235)	Data 0.001 (0.012)	Loss 5.1616 (5.2386)	Top-1 acc 11.719 (10.049)	Top-5 acc 25.781 (25.293)	lr 0.04210
Warmup Train [11][1340/3239]	Time 0.339 (0.235)	Data 0.001 (0.012)	Loss 5.1550 (5.2383)	Top-1 acc 11.328 (10.053)	Top-5 acc 27.734 (25.294)	lr 0.04209
Warmup Train [11][1350/3239]	Time 0.223 (0.235)	Data 0.001 (0.012)	Loss 5.3377 (5.2382)	Top-1 acc 8.984 (10.055)	Top-5 acc 25.391 (25.300)	lr 0.04209
Warmup Train [11][1360/3239]	Time 0.208 (0.235)	Data 0.001 (0.012)	Loss 5.3710 (5.2381)	Top-1 acc 9.375 (10.053)	Top-5 acc 22.656 (25.303)	lr 0.04209
Warmup Train [11][1370/3239]	Time 0.178 (0.234)	Data 0.001 (0.012)	Loss 5.3211 (5.2386)	Top-1 acc 7.812 (10.043)	Top-5 acc 23.438 (25.291)	lr 0.04208
Warmup Train [11][1380/3239]	Time 0.191 (0.234)	Data 0.001 (0.012)	Loss 5.2867 (5.2386)	Top-1 acc 10.547 (10.045)	Top-5 acc 24.609 (25.300)	lr 0.04208
Warmup Train [11][1390/3239]	Time 0.185 (0.234)	Data 0.001 (0.012)	Loss 5.4585 (5.2390)	Top-1 acc 6.250 (10.041)	Top-5 acc 18.750 (25.281)	lr 0.04207
Warmup Train [11][1400/3239]	Time 0.263 (0.234)	Data 0.001 (0.012)	Loss 5.1653 (5.2389)	Top-1 acc 10.938 (10.041)	Top-5 acc 27.344 (25.283)	lr 0.04207
Warmup Train [11][1410/3239]	Time 0.238 (0.234)	Data 0.001 (0.012)	Loss 5.2335 (5.2389)	Top-1 acc 12.500 (10.045)	Top-5 acc 26.172 (25.295)	lr 0.04206
Warmup Train [11][1420/3239]	Time 0.169 (0.234)	Data 0.001 (0.012)	Loss 5.2574 (5.2390)	Top-1 acc 10.547 (10.045)	Top-5 acc 25.000 (25.299)	lr 0.04206
Warmup Train [11][1430/3239]	Time 0.250 (0.234)	Data 0.001 (0.012)	Loss 5.1631 (5.2387)	Top-1 acc 10.938 (10.047)	Top-5 acc 25.391 (25.304)	lr 0.04205
Warmup Train [11][1440/3239]	Time 0.306 (0.234)	Data 0.002 (0.012)	Loss 5.1317 (5.2388)	Top-1 acc 15.234 (10.045)	Top-5 acc 32.812 (25.299)	lr 0.04205
Warmup Train [11][1450/3239]	Time 0.144 (0.234)	Data 0.001 (0.012)	Loss 5.1643 (5.2384)	Top-1 acc 11.328 (10.060)	Top-5 acc 26.172 (25.307)	lr 0.04205
Warmup Train [11][1460/3239]	Time 0.199 (0.234)	Data 0.001 (0.012)	Loss 5.2156 (5.2380)	Top-1 acc 10.938 (10.059)	Top-5 acc 27.734 (25.309)	lr 0.04204
Warmup Train [11][1470/3239]	Time 0.201 (0.234)	Data 0.001 (0.011)	Loss 5.1318 (5.2381)	Top-1 acc 11.328 (10.053)	Top-5 acc 26.562 (25.304)	lr 0.04204
Warmup Train [11][1480/3239]	Time 0.181 (0.234)	Data 0.001 (0.011)	Loss 4.9810 (5.2379)	Top-1 acc 10.547 (10.056)	Top-5 acc 31.641 (25.309)	lr 0.04203
Warmup Train [11][1490/3239]	Time 0.204 (0.233)	Data 0.002 (0.011)	Loss 5.2745 (5.2376)	Top-1 acc 8.594 (10.059)	Top-5 acc 23.438 (25.313)	lr 0.04203
Warmup Train [11][1500/3239]	Time 0.212 (0.233)	Data 0.001 (0.011)	Loss 5.1053 (5.2372)	Top-1 acc 12.109 (10.068)	Top-5 acc 28.516 (25.323)	lr 0.04202
Warmup Train [11][1510/3239]	Time 0.243 (0.233)	Data 0.001 (0.011)	Loss 5.1779 (5.2373)	Top-1 acc 10.156 (10.066)	Top-5 acc 28.516 (25.323)	lr 0.04202
Warmup Train [11][1520/3239]	Time 0.157 (0.233)	Data 0.001 (0.011)	Loss 5.3674 (5.2373)	Top-1 acc 11.719 (10.068)	Top-5 acc 23.438 (25.325)	lr 0.04201
Warmup Train [11][1530/3239]	Time 0.220 (0.233)	Data 0.001 (0.011)	Loss 5.3259 (5.2371)	Top-1 acc 8.594 (10.074)	Top-5 acc 22.656 (25.329)	lr 0.04201
Warmup Train [11][1540/3239]	Time 0.199 (0.233)	Data 0.001 (0.011)	Loss 5.2505 (5.2368)	Top-1 acc 7.031 (10.077)	Top-5 acc 22.266 (25.326)	lr 0.04201
Warmup Train [11][1550/3239]	Time 0.329 (0.233)	Data 0.001 (0.011)	Loss 5.2489 (5.2367)	Top-1 acc 8.594 (10.077)	Top-5 acc 22.266 (25.320)	lr 0.04200
Warmup Train [11][1560/3239]	Time 0.139 (0.233)	Data 0.001 (0.011)	Loss 5.2852 (5.2367)	Top-1 acc 10.547 (10.077)	Top-5 acc 25.000 (25.321)	lr 0.04200
Warmup Train [11][1570/3239]	Time 0.125 (0.233)	Data 0.001 (0.011)	Loss 5.4175 (5.2366)	Top-1 acc 8.203 (10.087)	Top-5 acc 20.703 (25.323)	lr 0.04199
Warmup Train [11][1580/3239]	Time 0.279 (0.233)	Data 0.001 (0.011)	Loss 5.1970 (5.2369)	Top-1 acc 9.766 (10.087)	Top-5 acc 23.438 (25.325)	lr 0.04199
Warmup Train [11][1590/3239]	Time 0.156 (0.232)	Data 0.001 (0.011)	Loss 5.1194 (5.2365)	Top-1 acc 10.938 (10.094)	Top-5 acc 25.000 (25.336)	lr 0.04198
Warmup Train [11][1600/3239]	Time 0.191 (0.232)	Data 0.001 (0.011)	Loss 5.2065 (5.2363)	Top-1 acc 11.328 (10.097)	Top-5 acc 27.344 (25.337)	lr 0.04198
Warmup Train [11][1610/3239]	Time 0.190 (0.232)	Data 0.002 (0.011)	Loss 5.0534 (5.2362)	Top-1 acc 13.672 (10.095)	Top-5 acc 30.859 (25.339)	lr 0.04197
Warmup Train [11][1620/3239]	Time 0.220 (0.232)	Data 0.002 (0.011)	Loss 5.1033 (5.2360)	Top-1 acc 10.547 (10.092)	Top-5 acc 27.344 (25.344)	lr 0.04197
Warmup Train [11][1630/3239]	Time 0.245 (0.232)	Data 0.002 (0.011)	Loss 5.1796 (5.2358)	Top-1 acc 12.891 (10.094)	Top-5 acc 26.562 (25.346)	lr 0.04197
Warmup Train [11][1640/3239]	Time 0.245 (0.232)	Data 0.001 (0.011)	Loss 5.2787 (5.2359)	Top-1 acc 10.547 (10.093)	Top-5 acc 23.047 (25.343)	lr 0.04196
Warmup Train [11][1650/3239]	Time 0.240 (0.232)	Data 0.002 (0.011)	Loss 5.1471 (5.2353)	Top-1 acc 12.891 (10.109)	Top-5 acc 28.906 (25.364)	lr 0.04196
Warmup Train [11][1660/3239]	Time 0.282 (0.232)	Data 0.002 (0.011)	Loss 5.3122 (5.2352)	Top-1 acc 11.719 (10.115)	Top-5 acc 23.047 (25.369)	lr 0.04195
Warmup Train [11][1670/3239]	Time 0.252 (0.232)	Data 0.001 (0.011)	Loss 5.1756 (5.2352)	Top-1 acc 9.375 (10.116)	Top-5 acc 27.734 (25.370)	lr 0.04195
Warmup Train [11][1680/3239]	Time 0.184 (0.232)	Data 0.001 (0.010)	Loss 5.2905 (5.2347)	Top-1 acc 7.812 (10.120)	Top-5 acc 25.781 (25.385)	lr 0.04194
Warmup Train [11][1690/3239]	Time 0.246 (0.232)	Data 0.001 (0.010)	Loss 5.1910 (5.2346)	Top-1 acc 11.719 (10.121)	Top-5 acc 23.828 (25.387)	lr 0.04194
Warmup Train [11][1700/3239]	Time 0.258 (0.232)	Data 0.002 (0.010)	Loss 5.2913 (5.2345)	Top-1 acc 9.375 (10.123)	Top-5 acc 23.438 (25.383)	lr 0.04193
Warmup Train [11][1710/3239]	Time 0.191 (0.232)	Data 0.001 (0.010)	Loss 5.1645 (5.2343)	Top-1 acc 12.891 (10.121)	Top-5 acc 30.859 (25.389)	lr 0.04193
Warmup Train [11][1720/3239]	Time 0.222 (0.232)	Data 0.002 (0.010)	Loss 5.2928 (5.2341)	Top-1 acc 11.328 (10.129)	Top-5 acc 24.609 (25.396)	lr 0.04193
Warmup Train [11][1730/3239]	Time 0.177 (0.232)	Data 0.001 (0.010)	Loss 5.2880 (5.2337)	Top-1 acc 8.203 (10.131)	Top-5 acc 25.000 (25.402)	lr 0.04192
Warmup Train [11][1740/3239]	Time 0.226 (0.232)	Data 0.001 (0.010)	Loss 5.2511 (5.2335)	Top-1 acc 7.812 (10.133)	Top-5 acc 25.391 (25.410)	lr 0.04192
Warmup Train [11][1750/3239]	Time 0.195 (0.231)	Data 0.002 (0.010)	Loss 5.2078 (5.2333)	Top-1 acc 10.156 (10.136)	Top-5 acc 26.172 (25.413)	lr 0.04191
Warmup Train [11][1760/3239]	Time 0.268 (0.231)	Data 0.001 (0.010)	Loss 5.2934 (5.2332)	Top-1 acc 8.203 (10.132)	Top-5 acc 21.875 (25.417)	lr 0.04191
Warmup Train [11][1770/3239]	Time 0.335 (0.231)	Data 0.002 (0.010)	Loss 5.2357 (5.2331)	Top-1 acc 9.766 (10.132)	Top-5 acc 24.219 (25.415)	lr 0.04190
Warmup Train [11][1780/3239]	Time 0.287 (0.231)	Data 0.001 (0.010)	Loss 5.5014 (5.2332)	Top-1 acc 8.594 (10.131)	Top-5 acc 22.656 (25.415)	lr 0.04190
Warmup Train [11][1790/3239]	Time 0.168 (0.231)	Data 0.001 (0.010)	Loss 5.2631 (5.2329)	Top-1 acc 7.422 (10.136)	Top-5 acc 23.828 (25.428)	lr 0.04189
Warmup Train [11][1800/3239]	Time 0.144 (0.231)	Data 0.001 (0.010)	Loss 5.2871 (5.2327)	Top-1 acc 10.938 (10.137)	Top-5 acc 24.609 (25.438)	lr 0.04189
Warmup Train [11][1810/3239]	Time 0.265 (0.231)	Data 0.001 (0.010)	Loss 5.1778 (5.2326)	Top-1 acc 11.719 (10.137)	Top-5 acc 25.781 (25.444)	lr 0.04189
Warmup Train [11][1820/3239]	Time 0.177 (0.231)	Data 0.001 (0.010)	Loss 5.1920 (5.2322)	Top-1 acc 10.156 (10.146)	Top-5 acc 22.656 (25.455)	lr 0.04188
Warmup Train [11][1830/3239]	Time 0.195 (0.231)	Data 0.001 (0.010)	Loss 5.2077 (5.2318)	Top-1 acc 14.062 (10.153)	Top-5 acc 28.516 (25.463)	lr 0.04188
Warmup Train [11][1840/3239]	Time 0.154 (0.231)	Data 0.001 (0.010)	Loss 5.1804 (5.2317)	Top-1 acc 8.984 (10.156)	Top-5 acc 25.000 (25.470)	lr 0.04187
Warmup Train [11][1850/3239]	Time 0.262 (0.231)	Data 0.001 (0.010)	Loss 5.2713 (5.2318)	Top-1 acc 9.375 (10.152)	Top-5 acc 26.953 (25.475)	lr 0.04187
Warmup Train [11][1860/3239]	Time 0.216 (0.231)	Data 0.001 (0.010)	Loss 5.1856 (5.2316)	Top-1 acc 10.156 (10.156)	Top-5 acc 25.781 (25.474)	lr 0.04186
Warmup Train [11][1870/3239]	Time 0.305 (0.231)	Data 0.001 (0.010)	Loss 5.4721 (5.2315)	Top-1 acc 8.594 (10.158)	Top-5 acc 21.875 (25.475)	lr 0.04186
Warmup Train [11][1880/3239]	Time 0.318 (0.231)	Data 0.001 (0.010)	Loss 5.1468 (5.2312)	Top-1 acc 14.453 (10.166)	Top-5 acc 31.250 (25.482)	lr 0.04185
Warmup Train [11][1890/3239]	Time 0.221 (0.231)	Data 0.001 (0.010)	Loss 5.0988 (5.2311)	Top-1 acc 11.719 (10.168)	Top-5 acc 26.562 (25.487)	lr 0.04185
Warmup Train [11][1900/3239]	Time 0.215 (0.230)	Data 0.001 (0.010)	Loss 5.1555 (5.2310)	Top-1 acc 10.938 (10.168)	Top-5 acc 26.172 (25.490)	lr 0.04185
Warmup Train [11][1910/3239]	Time 0.179 (0.230)	Data 0.001 (0.010)	Loss 5.2496 (5.2310)	Top-1 acc 9.766 (10.171)	Top-5 acc 29.688 (25.489)	lr 0.04184
Warmup Train [11][1920/3239]	Time 0.233 (0.230)	Data 0.002 (0.010)	Loss 5.4803 (5.2312)	Top-1 acc 7.812 (10.168)	Top-5 acc 21.484 (25.487)	lr 0.04184
Warmup Train [11][1930/3239]	Time 0.192 (0.230)	Data 0.001 (0.010)	Loss 5.3664 (5.2316)	Top-1 acc 11.328 (10.167)	Top-5 acc 22.266 (25.482)	lr 0.04183
Warmup Train [11][1940/3239]	Time 0.298 (0.230)	Data 0.002 (0.009)	Loss 5.1946 (5.2314)	Top-1 acc 8.984 (10.169)	Top-5 acc 26.172 (25.491)	lr 0.04183
Warmup Train [11][1950/3239]	Time 0.193 (0.230)	Data 0.001 (0.009)	Loss 5.2189 (5.2313)	Top-1 acc 11.719 (10.174)	Top-5 acc 27.344 (25.495)	lr 0.04182
Warmup Train [11][1960/3239]	Time 0.182 (0.230)	Data 0.001 (0.009)	Loss 5.2164 (5.2313)	Top-1 acc 11.719 (10.174)	Top-5 acc 28.125 (25.499)	lr 0.04182
Warmup Train [11][1970/3239]	Time 0.181 (0.230)	Data 0.001 (0.009)	Loss 5.1304 (5.2311)	Top-1 acc 8.594 (10.174)	Top-5 acc 27.344 (25.504)	lr 0.04181
Warmup Train [11][1980/3239]	Time 0.195 (0.230)	Data 0.001 (0.009)	Loss 5.1519 (5.2307)	Top-1 acc 12.891 (10.181)	Top-5 acc 28.906 (25.515)	lr 0.04181
Warmup Train [11][1990/3239]	Time 0.232 (0.230)	Data 0.001 (0.009)	Loss 5.2362 (5.2307)	Top-1 acc 11.328 (10.185)	Top-5 acc 22.656 (25.513)	lr 0.04180
Warmup Train [11][2000/3239]	Time 0.199 (0.230)	Data 0.001 (0.009)	Loss 5.1294 (5.2305)	Top-1 acc 9.375 (10.185)	Top-5 acc 28.125 (25.519)	lr 0.04180
Warmup Train [11][2010/3239]	Time 0.227 (0.230)	Data 0.001 (0.009)	Loss 5.3221 (5.2303)	Top-1 acc 9.375 (10.186)	Top-5 acc 21.094 (25.518)	lr 0.04180
Warmup Train [11][2020/3239]	Time 0.271 (0.230)	Data 0.001 (0.009)	Loss 5.3751 (5.2304)	Top-1 acc 7.031 (10.182)	Top-5 acc 25.391 (25.519)	lr 0.04179
Warmup Train [11][2030/3239]	Time 0.186 (0.230)	Data 0.001 (0.009)	Loss 5.0953 (5.2303)	Top-1 acc 11.328 (10.187)	Top-5 acc 29.297 (25.523)	lr 0.04179
Warmup Train [11][2040/3239]	Time 0.225 (0.230)	Data 0.001 (0.009)	Loss 5.0190 (5.2301)	Top-1 acc 15.625 (10.192)	Top-5 acc 30.469 (25.527)	lr 0.04178
Warmup Train [11][2050/3239]	Time 0.200 (0.230)	Data 0.001 (0.009)	Loss 5.2483 (5.2300)	Top-1 acc 8.203 (10.193)	Top-5 acc 23.828 (25.528)	lr 0.04178
Warmup Train [11][2060/3239]	Time 0.140 (0.230)	Data 0.003 (0.009)	Loss 5.1873 (5.2300)	Top-1 acc 12.500 (10.194)	Top-5 acc 26.953 (25.527)	lr 0.04177
Warmup Train [11][2070/3239]	Time 0.181 (0.230)	Data 0.001 (0.009)	Loss 4.9828 (5.2299)	Top-1 acc 14.062 (10.197)	Top-5 acc 30.469 (25.530)	lr 0.04177
Warmup Train [11][2080/3239]	Time 0.181 (0.230)	Data 0.001 (0.009)	Loss 5.1663 (5.2297)	Top-1 acc 10.938 (10.201)	Top-5 acc 26.953 (25.534)	lr 0.04176
Warmup Train [11][2090/3239]	Time 0.198 (0.230)	Data 0.002 (0.009)	Loss 5.2328 (5.2298)	Top-1 acc 10.547 (10.206)	Top-5 acc 25.781 (25.539)	lr 0.04176
Warmup Train [11][2100/3239]	Time 0.281 (0.230)	Data 0.001 (0.009)	Loss 5.1287 (5.2293)	Top-1 acc 10.156 (10.213)	Top-5 acc 28.516 (25.549)	lr 0.04176
Warmup Train [11][2110/3239]	Time 0.188 (0.230)	Data 0.002 (0.009)	Loss 5.1023 (5.2291)	Top-1 acc 12.891 (10.215)	Top-5 acc 30.469 (25.554)	lr 0.04175
Warmup Train [11][2120/3239]	Time 0.173 (0.229)	Data 0.001 (0.009)	Loss 5.1035 (5.2289)	Top-1 acc 13.672 (10.216)	Top-5 acc 26.562 (25.559)	lr 0.04175
Warmup Train [11][2130/3239]	Time 0.176 (0.229)	Data 0.001 (0.009)	Loss 5.2536 (5.2288)	Top-1 acc 13.281 (10.219)	Top-5 acc 28.516 (25.563)	lr 0.04174
Warmup Train [11][2140/3239]	Time 0.245 (0.229)	Data 0.001 (0.009)	Loss 5.1680 (5.2287)	Top-1 acc 10.938 (10.221)	Top-5 acc 28.125 (25.568)	lr 0.04174
Warmup Train [11][2150/3239]	Time 0.235 (0.229)	Data 0.001 (0.009)	Loss 5.0928 (5.2285)	Top-1 acc 10.938 (10.223)	Top-5 acc 26.172 (25.573)	lr 0.04173
Warmup Train [11][2160/3239]	Time 0.174 (0.229)	Data 0.001 (0.009)	Loss 5.2109 (5.2282)	Top-1 acc 12.500 (10.227)	Top-5 acc 26.562 (25.580)	lr 0.04173
Warmup Train [11][2170/3239]	Time 0.218 (0.229)	Data 0.002 (0.009)	Loss 5.3988 (5.2280)	Top-1 acc 6.641 (10.228)	Top-5 acc 23.047 (25.583)	lr 0.04172
Warmup Train [11][2180/3239]	Time 0.192 (0.229)	Data 0.001 (0.009)	Loss 5.2034 (5.2279)	Top-1 acc 7.422 (10.230)	Top-5 acc 25.391 (25.589)	lr 0.04172
Warmup Train [11][2190/3239]	Time 0.247 (0.229)	Data 0.002 (0.009)	Loss 5.3843 (5.2279)	Top-1 acc 8.203 (10.229)	Top-5 acc 19.922 (25.588)	lr 0.04171
Warmup Train [11][2200/3239]	Time 0.305 (0.229)	Data 0.001 (0.009)	Loss 5.2291 (5.2277)	Top-1 acc 10.938 (10.235)	Top-5 acc 26.953 (25.591)	lr 0.04171
Warmup Train [11][2210/3239]	Time 0.339 (0.229)	Data 0.001 (0.009)	Loss 5.0626 (5.2276)	Top-1 acc 8.203 (10.234)	Top-5 acc 28.516 (25.599)	lr 0.04171
Warmup Train [11][2220/3239]	Time 0.125 (0.229)	Data 0.001 (0.009)	Loss 5.2904 (5.2276)	Top-1 acc 8.594 (10.234)	Top-5 acc 22.266 (25.599)	lr 0.04170
Warmup Train [11][2230/3239]	Time 0.169 (0.229)	Data 0.001 (0.009)	Loss 5.0970 (5.2274)	Top-1 acc 14.062 (10.239)	Top-5 acc 28.516 (25.606)	lr 0.04170
Warmup Train [11][2240/3239]	Time 0.252 (0.229)	Data 0.001 (0.009)	Loss 5.0776 (5.2271)	Top-1 acc 14.062 (10.243)	Top-5 acc 29.297 (25.613)	lr 0.04169
Warmup Train [11][2250/3239]	Time 0.180 (0.229)	Data 0.001 (0.009)	Loss 4.9459 (5.2270)	Top-1 acc 14.844 (10.248)	Top-5 acc 35.156 (25.619)	lr 0.04169
Warmup Train [11][2260/3239]	Time 0.265 (0.229)	Data 0.001 (0.009)	Loss 5.0642 (5.2269)	Top-1 acc 10.938 (10.246)	Top-5 acc 28.125 (25.622)	lr 0.04168
Warmup Train [11][2270/3239]	Time 0.224 (0.229)	Data 0.002 (0.009)	Loss 5.1743 (5.2267)	Top-1 acc 9.766 (10.248)	Top-5 acc 25.391 (25.629)	lr 0.04168
Warmup Train [11][2280/3239]	Time 0.192 (0.229)	Data 0.001 (0.009)	Loss 5.1791 (5.2267)	Top-1 acc 8.594 (10.245)	Top-5 acc 25.391 (25.629)	lr 0.04167
Warmup Train [11][2290/3239]	Time 0.234 (0.229)	Data 0.001 (0.009)	Loss 5.3111 (5.2265)	Top-1 acc 10.547 (10.253)	Top-5 acc 19.922 (25.640)	lr 0.04167
Warmup Train [11][2300/3239]	Time 0.237 (0.229)	Data 0.002 (0.008)	Loss 5.1987 (5.2264)	Top-1 acc 10.156 (10.255)	Top-5 acc 23.828 (25.641)	lr 0.04167
Warmup Train [11][2310/3239]	Time 0.240 (0.229)	Data 0.001 (0.008)	Loss 5.2504 (5.2263)	Top-1 acc 10.156 (10.253)	Top-5 acc 26.562 (25.639)	lr 0.04166
Warmup Train [11][2320/3239]	Time 0.149 (0.229)	Data 0.001 (0.008)	Loss 5.3028 (5.2261)	Top-1 acc 9.766 (10.254)	Top-5 acc 24.609 (25.647)	lr 0.04166
Warmup Train [11][2330/3239]	Time 0.220 (0.229)	Data 0.001 (0.008)	Loss 5.3295 (5.2260)	Top-1 acc 9.375 (10.252)	Top-5 acc 24.609 (25.651)	lr 0.04165
Warmup Train [11][2340/3239]	Time 0.146 (0.229)	Data 0.001 (0.008)	Loss 5.1169 (5.2260)	Top-1 acc 12.109 (10.252)	Top-5 acc 26.953 (25.656)	lr 0.04165
Warmup Train [11][2350/3239]	Time 0.227 (0.229)	Data 0.001 (0.008)	Loss 5.0527 (5.2257)	Top-1 acc 10.938 (10.255)	Top-5 acc 31.641 (25.664)	lr 0.04164
Warmup Train [11][2360/3239]	Time 0.136 (0.229)	Data 0.001 (0.008)	Loss 5.2547 (5.2255)	Top-1 acc 9.375 (10.258)	Top-5 acc 26.172 (25.666)	lr 0.04164
Warmup Train [11][2370/3239]	Time 0.291 (0.229)	Data 0.001 (0.008)	Loss 5.2765 (5.2254)	Top-1 acc 11.328 (10.261)	Top-5 acc 24.609 (25.666)	lr 0.04163
Warmup Train [11][2380/3239]	Time 0.189 (0.228)	Data 0.001 (0.008)	Loss 5.2325 (5.2254)	Top-1 acc 10.547 (10.264)	Top-5 acc 23.828 (25.667)	lr 0.04163
Warmup Train [11][2390/3239]	Time 0.199 (0.228)	Data 0.001 (0.008)	Loss 5.1075 (5.2252)	Top-1 acc 10.938 (10.272)	Top-5 acc 26.953 (25.674)	lr 0.04162
Warmup Train [11][2400/3239]	Time 0.159 (0.228)	Data 0.001 (0.008)	Loss 5.1942 (5.2251)	Top-1 acc 9.375 (10.275)	Top-5 acc 22.266 (25.678)	lr 0.04162
Warmup Train [11][2410/3239]	Time 0.360 (0.228)	Data 0.001 (0.008)	Loss 5.3260 (5.2250)	Top-1 acc 8.984 (10.274)	Top-5 acc 22.656 (25.680)	lr 0.04162
Warmup Train [11][2420/3239]	Time 0.383 (0.228)	Data 0.001 (0.008)	Loss 5.2395 (5.2248)	Top-1 acc 9.375 (10.276)	Top-5 acc 25.391 (25.690)	lr 0.04161
Warmup Train [11][2430/3239]	Time 0.246 (0.228)	Data 0.001 (0.008)	Loss 5.1381 (5.2248)	Top-1 acc 15.234 (10.277)	Top-5 acc 31.250 (25.691)	lr 0.04161
Warmup Train [11][2440/3239]	Time 0.190 (0.228)	Data 0.002 (0.008)	Loss 5.1245 (5.2245)	Top-1 acc 12.109 (10.281)	Top-5 acc 25.781 (25.695)	lr 0.04160
Warmup Train [11][2450/3239]	Time 0.200 (0.228)	Data 0.001 (0.008)	Loss 5.0859 (5.2242)	Top-1 acc 12.891 (10.284)	Top-5 acc 28.125 (25.702)	lr 0.04160
Warmup Train [11][2460/3239]	Time 0.238 (0.228)	Data 0.001 (0.008)	Loss 5.1641 (5.2239)	Top-1 acc 9.766 (10.287)	Top-5 acc 27.344 (25.704)	lr 0.04159
Warmup Train [11][2470/3239]	Time 0.190 (0.228)	Data 0.002 (0.008)	Loss 5.3051 (5.2240)	Top-1 acc 6.250 (10.286)	Top-5 acc 23.438 (25.703)	lr 0.04159
Warmup Train [11][2480/3239]	Time 0.211 (0.228)	Data 0.001 (0.008)	Loss 5.2188 (5.2238)	Top-1 acc 9.375 (10.287)	Top-5 acc 26.562 (25.710)	lr 0.04158
Warmup Train [11][2490/3239]	Time 0.182 (0.228)	Data 0.001 (0.008)	Loss 5.1141 (5.2234)	Top-1 acc 10.156 (10.293)	Top-5 acc 28.906 (25.721)	lr 0.04158
Warmup Train [11][2500/3239]	Time 0.219 (0.228)	Data 0.001 (0.008)	Loss 5.1889 (5.2233)	Top-1 acc 11.719 (10.296)	Top-5 acc 26.172 (25.725)	lr 0.04157
Warmup Train [11][2510/3239]	Time 0.270 (0.228)	Data 0.001 (0.008)	Loss 5.1859 (5.2231)	Top-1 acc 12.500 (10.301)	Top-5 acc 28.906 (25.735)	lr 0.04157
Warmup Train [11][2520/3239]	Time 0.216 (0.228)	Data 0.022 (0.008)	Loss 5.1303 (5.2228)	Top-1 acc 13.281 (10.304)	Top-5 acc 30.469 (25.736)	lr 0.04157
Warmup Train [11][2530/3239]	Time 0.182 (0.228)	Data 0.002 (0.008)	Loss 5.3679 (5.2228)	Top-1 acc 12.500 (10.304)	Top-5 acc 23.828 (25.734)	lr 0.04156
Warmup Train [11][2540/3239]	Time 0.265 (0.228)	Data 0.001 (0.008)	Loss 5.1079 (5.2226)	Top-1 acc 13.672 (10.306)	Top-5 acc 29.688 (25.740)	lr 0.04156
Warmup Train [11][2550/3239]	Time 0.285 (0.228)	Data 0.001 (0.008)	Loss 5.3840 (5.2225)	Top-1 acc 12.109 (10.309)	Top-5 acc 26.172 (25.745)	lr 0.04155
Warmup Train [11][2560/3239]	Time 0.251 (0.228)	Data 0.001 (0.008)	Loss 5.0496 (5.2222)	Top-1 acc 12.500 (10.313)	Top-5 acc 29.297 (25.746)	lr 0.04155
Warmup Train [11][2570/3239]	Time 0.163 (0.228)	Data 0.001 (0.008)	Loss 5.2640 (5.2222)	Top-1 acc 10.156 (10.312)	Top-5 acc 26.172 (25.746)	lr 0.04154
Warmup Train [11][2580/3239]	Time 0.224 (0.228)	Data 0.001 (0.008)	Loss 5.1344 (5.2217)	Top-1 acc 11.719 (10.316)	Top-5 acc 26.562 (25.759)	lr 0.04154
Warmup Train [11][2590/3239]	Time 0.208 (0.228)	Data 0.002 (0.008)	Loss 5.0890 (5.2217)	Top-1 acc 11.328 (10.313)	Top-5 acc 26.953 (25.760)	lr 0.04153
Warmup Train [11][2600/3239]	Time 0.279 (0.228)	Data 0.001 (0.008)	Loss 5.2468 (5.2218)	Top-1 acc 11.719 (10.313)	Top-5 acc 23.438 (25.757)	lr 0.04153
Warmup Train [11][2610/3239]	Time 0.349 (0.228)	Data 0.001 (0.008)	Loss 5.1698 (5.2217)	Top-1 acc 13.672 (10.316)	Top-5 acc 28.906 (25.761)	lr 0.04152
Warmup Train [11][2620/3239]	Time 0.219 (0.228)	Data 0.001 (0.008)	Loss 5.1681 (5.2216)	Top-1 acc 14.062 (10.318)	Top-5 acc 27.344 (25.762)	lr 0.04152
Warmup Train [11][2630/3239]	Time 0.236 (0.228)	Data 0.002 (0.008)	Loss 5.0774 (5.2215)	Top-1 acc 10.938 (10.317)	Top-5 acc 33.594 (25.765)	lr 0.04152
Warmup Train [11][2640/3239]	Time 0.226 (0.228)	Data 0.001 (0.008)	Loss 5.1416 (5.2215)	Top-1 acc 11.719 (10.317)	Top-5 acc 32.812 (25.767)	lr 0.04151
Warmup Train [11][2650/3239]	Time 0.188 (0.228)	Data 0.001 (0.008)	Loss 5.2270 (5.2215)	Top-1 acc 13.672 (10.319)	Top-5 acc 28.516 (25.769)	lr 0.04151
Warmup Train [11][2660/3239]	Time 0.201 (0.228)	Data 0.001 (0.008)	Loss 5.1970 (5.2215)	Top-1 acc 12.891 (10.323)	Top-5 acc 30.469 (25.772)	lr 0.04150
Warmup Train [11][2670/3239]	Time 0.189 (0.228)	Data 0.001 (0.008)	Loss 5.0703 (5.2213)	Top-1 acc 13.672 (10.324)	Top-5 acc 30.469 (25.776)	lr 0.04150
Warmup Train [11][2680/3239]	Time 0.216 (0.228)	Data 0.001 (0.008)	Loss 5.1994 (5.2211)	Top-1 acc 9.766 (10.328)	Top-5 acc 24.219 (25.781)	lr 0.04149
Warmup Train [11][2690/3239]	Time 0.175 (0.228)	Data 0.001 (0.008)	Loss 5.0893 (5.2210)	Top-1 acc 12.891 (10.329)	Top-5 acc 26.172 (25.782)	lr 0.04149
Warmup Train [11][2700/3239]	Time 0.223 (0.228)	Data 0.001 (0.008)	Loss 5.2122 (5.2210)	Top-1 acc 14.844 (10.330)	Top-5 acc 27.344 (25.785)	lr 0.04148
Warmup Train [11][2710/3239]	Time 0.136 (0.228)	Data 0.001 (0.008)	Loss 5.3205 (5.2208)	Top-1 acc 8.594 (10.332)	Top-5 acc 22.656 (25.791)	lr 0.04148
Warmup Train [11][2720/3239]	Time 0.326 (0.228)	Data 0.001 (0.008)	Loss 5.1647 (5.2207)	Top-1 acc 8.594 (10.334)	Top-5 acc 26.172 (25.792)	lr 0.04147
Warmup Train [11][2730/3239]	Time 0.233 (0.227)	Data 0.001 (0.008)	Loss 5.1828 (5.2205)	Top-1 acc 10.156 (10.334)	Top-5 acc 24.609 (25.796)	lr 0.04147
Warmup Train [11][2740/3239]	Time 0.216 (0.227)	Data 0.001 (0.008)	Loss 5.1924 (5.2203)	Top-1 acc 9.766 (10.340)	Top-5 acc 26.953 (25.804)	lr 0.04147
Warmup Train [11][2750/3239]	Time 0.151 (0.227)	Data 0.001 (0.008)	Loss 5.0625 (5.2205)	Top-1 acc 14.062 (10.337)	Top-5 acc 30.078 (25.801)	lr 0.04146
Warmup Train [11][2760/3239]	Time 0.164 (0.227)	Data 0.001 (0.008)	Loss 5.1517 (5.2204)	Top-1 acc 13.672 (10.341)	Top-5 acc 26.562 (25.804)	lr 0.04146
Warmup Train [11][2770/3239]	Time 0.224 (0.227)	Data 0.001 (0.008)	Loss 5.2372 (5.2202)	Top-1 acc 11.719 (10.345)	Top-5 acc 23.828 (25.813)	lr 0.04145
Warmup Train [11][2780/3239]	Time 0.189 (0.227)	Data 0.001 (0.008)	Loss 5.2099 (5.2201)	Top-1 acc 9.375 (10.346)	Top-5 acc 22.656 (25.813)	lr 0.04145
Warmup Train [11][2790/3239]	Time 0.178 (0.227)	Data 0.001 (0.008)	Loss 5.1985 (5.2198)	Top-1 acc 12.109 (10.348)	Top-5 acc 22.656 (25.817)	lr 0.04144
Warmup Train [11][2800/3239]	Time 0.222 (0.227)	Data 0.001 (0.008)	Loss 5.1807 (5.2195)	Top-1 acc 9.375 (10.353)	Top-5 acc 24.219 (25.820)	lr 0.04144
Warmup Train [11][2810/3239]	Time 0.221 (0.227)	Data 0.001 (0.008)	Loss 5.2786 (5.2194)	Top-1 acc 9.375 (10.354)	Top-5 acc 23.438 (25.821)	lr 0.04143
Warmup Train [11][2820/3239]	Time 0.268 (0.227)	Data 0.001 (0.008)	Loss 5.1411 (5.2194)	Top-1 acc 10.156 (10.352)	Top-5 acc 28.516 (25.820)	lr 0.04143
Warmup Train [11][2830/3239]	Time 0.182 (0.227)	Data 0.001 (0.007)	Loss 5.1649 (5.2191)	Top-1 acc 10.156 (10.359)	Top-5 acc 28.125 (25.827)	lr 0.04142
Warmup Train [11][2840/3239]	Time 0.323 (0.227)	Data 0.001 (0.007)	Loss 5.1652 (5.2187)	Top-1 acc 12.109 (10.361)	Top-5 acc 24.609 (25.832)	lr 0.04142
Warmup Train [11][2850/3239]	Time 0.222 (0.227)	Data 0.001 (0.007)	Loss 5.0099 (5.2185)	Top-1 acc 12.500 (10.363)	Top-5 acc 29.688 (25.842)	lr 0.04142
Warmup Train [11][2860/3239]	Time 0.134 (0.227)	Data 0.001 (0.007)	Loss 5.3408 (5.2186)	Top-1 acc 7.031 (10.361)	Top-5 acc 24.609 (25.843)	lr 0.04141
Warmup Train [11][2870/3239]	Time 0.225 (0.227)	Data 0.001 (0.007)	Loss 5.1387 (5.2183)	Top-1 acc 9.766 (10.364)	Top-5 acc 28.516 (25.848)	lr 0.04141
Warmup Train [11][2880/3239]	Time 0.278 (0.227)	Data 0.001 (0.007)	Loss 5.2469 (5.2184)	Top-1 acc 8.203 (10.367)	Top-5 acc 22.266 (25.851)	lr 0.04140
Warmup Train [11][2890/3239]	Time 0.243 (0.227)	Data 0.002 (0.007)	Loss 5.1856 (5.2182)	Top-1 acc 13.672 (10.369)	Top-5 acc 29.297 (25.853)	lr 0.04140
Warmup Train [11][2900/3239]	Time 0.290 (0.227)	Data 0.002 (0.007)	Loss 5.2378 (5.2179)	Top-1 acc 10.156 (10.373)	Top-5 acc 26.172 (25.859)	lr 0.04139
Warmup Train [11][2910/3239]	Time 0.200 (0.227)	Data 0.001 (0.007)	Loss 5.0679 (5.2178)	Top-1 acc 12.500 (10.375)	Top-5 acc 30.078 (25.863)	lr 0.04139
Warmup Train [11][2920/3239]	Time 0.202 (0.227)	Data 0.001 (0.007)	Loss 5.1090 (5.2176)	Top-1 acc 12.500 (10.380)	Top-5 acc 28.906 (25.869)	lr 0.04138
Warmup Train [11][2930/3239]	Time 0.220 (0.227)	Data 0.001 (0.007)	Loss 5.3234 (5.2174)	Top-1 acc 8.984 (10.379)	Top-5 acc 24.219 (25.873)	lr 0.04138
Warmup Train [11][2940/3239]	Time 0.153 (0.227)	Data 0.001 (0.007)	Loss 5.1262 (5.2173)	Top-1 acc 11.328 (10.383)	Top-5 acc 24.609 (25.875)	lr 0.04137
Warmup Train [11][2950/3239]	Time 0.358 (0.227)	Data 0.001 (0.007)	Loss 5.1730 (5.2171)	Top-1 acc 10.156 (10.388)	Top-5 acc 26.953 (25.883)	lr 0.04137
Warmup Train [11][2960/3239]	Time 0.284 (0.227)	Data 0.001 (0.007)	Loss 5.1581 (5.2169)	Top-1 acc 10.547 (10.392)	Top-5 acc 25.000 (25.889)	lr 0.04136
Warmup Train [11][2970/3239]	Time 0.166 (0.227)	Data 0.001 (0.007)	Loss 5.2358 (5.2168)	Top-1 acc 7.812 (10.395)	Top-5 acc 22.656 (25.893)	lr 0.04136
Warmup Train [11][2980/3239]	Time 0.205 (0.227)	Data 0.002 (0.007)	Loss 5.2162 (5.2165)	Top-1 acc 10.156 (10.398)	Top-5 acc 25.781 (25.899)	lr 0.04136
Warmup Train [11][2990/3239]	Time 0.158 (0.227)	Data 0.001 (0.007)	Loss 5.1756 (5.2163)	Top-1 acc 12.891 (10.403)	Top-5 acc 29.297 (25.908)	lr 0.04135
Warmup Train [11][3000/3239]	Time 0.241 (0.227)	Data 0.001 (0.007)	Loss 5.1913 (5.2162)	Top-1 acc 12.500 (10.404)	Top-5 acc 28.125 (25.907)	lr 0.04135
Warmup Train [11][3010/3239]	Time 0.140 (0.227)	Data 0.001 (0.007)	Loss 5.1379 (5.2161)	Top-1 acc 12.109 (10.406)	Top-5 acc 28.906 (25.911)	lr 0.04134
Warmup Train [11][3020/3239]	Time 0.194 (0.227)	Data 0.001 (0.007)	Loss 5.2031 (5.2159)	Top-1 acc 10.547 (10.406)	Top-5 acc 22.656 (25.914)	lr 0.04134
Warmup Train [11][3030/3239]	Time 0.173 (0.226)	Data 0.001 (0.007)	Loss 5.3561 (5.2158)	Top-1 acc 6.641 (10.404)	Top-5 acc 23.047 (25.915)	lr 0.04133
Warmup Train [11][3040/3239]	Time 0.219 (0.227)	Data 0.002 (0.007)	Loss 4.9422 (5.2157)	Top-1 acc 17.578 (10.408)	Top-5 acc 32.031 (25.917)	lr 0.04133
Warmup Train [11][3050/3239]	Time 0.232 (0.226)	Data 0.001 (0.007)	Loss 5.2260 (5.2156)	Top-1 acc 12.109 (10.411)	Top-5 acc 24.609 (25.920)	lr 0.04132
Warmup Train [11][3060/3239]	Time 0.318 (0.226)	Data 0.001 (0.007)	Loss 5.0826 (5.2154)	Top-1 acc 10.547 (10.413)	Top-5 acc 28.125 (25.922)	lr 0.04132
Warmup Train [11][3070/3239]	Time 0.187 (0.226)	Data 0.001 (0.007)	Loss 5.2185 (5.2151)	Top-1 acc 10.938 (10.415)	Top-5 acc 28.516 (25.928)	lr 0.04131
Warmup Train [11][3080/3239]	Time 0.232 (0.226)	Data 0.001 (0.007)	Loss 5.2339 (5.2150)	Top-1 acc 14.062 (10.419)	Top-5 acc 23.828 (25.933)	lr 0.04131
Warmup Train [11][3090/3239]	Time 0.296 (0.226)	Data 0.001 (0.007)	Loss 5.2779 (5.2148)	Top-1 acc 12.109 (10.423)	Top-5 acc 26.172 (25.935)	lr 0.04131
Warmup Train [11][3100/3239]	Time 0.273 (0.226)	Data 0.002 (0.007)	Loss 5.0146 (5.2143)	Top-1 acc 10.156 (10.427)	Top-5 acc 27.734 (25.946)	lr 0.04130
Warmup Train [11][3110/3239]	Time 0.218 (0.226)	Data 0.001 (0.007)	Loss 5.0697 (5.2140)	Top-1 acc 13.281 (10.432)	Top-5 acc 30.078 (25.951)	lr 0.04130
Warmup Train [11][3120/3239]	Time 0.175 (0.226)	Data 0.001 (0.007)	Loss 5.1751 (5.2138)	Top-1 acc 12.109 (10.439)	Top-5 acc 25.781 (25.958)	lr 0.04129
Warmup Train [11][3130/3239]	Time 0.245 (0.226)	Data 0.001 (0.007)	Loss 4.9787 (5.2136)	Top-1 acc 16.016 (10.443)	Top-5 acc 28.125 (25.963)	lr 0.04129
Warmup Train [11][3140/3239]	Time 0.197 (0.226)	Data 0.001 (0.007)	Loss 5.1823 (5.2133)	Top-1 acc 9.766 (10.446)	Top-5 acc 29.297 (25.969)	lr 0.04128
Warmup Train [11][3150/3239]	Time 0.162 (0.226)	Data 0.001 (0.007)	Loss 5.0751 (5.2132)	Top-1 acc 14.453 (10.449)	Top-5 acc 29.688 (25.974)	lr 0.04128
Warmup Train [11][3160/3239]	Time 0.151 (0.226)	Data 0.001 (0.007)	Loss 5.2379 (5.2130)	Top-1 acc 8.984 (10.454)	Top-5 acc 24.219 (25.978)	lr 0.04127
Warmup Train [11][3170/3239]	Time 0.248 (0.226)	Data 0.002 (0.007)	Loss 5.1025 (5.2127)	Top-1 acc 12.109 (10.459)	Top-5 acc 32.031 (25.984)	lr 0.04127
Warmup Train [11][3180/3239]	Time 0.208 (0.226)	Data 0.000 (0.007)	Loss 5.2061 (5.2126)	Top-1 acc 8.594 (10.457)	Top-5 acc 31.641 (25.988)	lr 0.04126
Warmup Train [11][3190/3239]	Time 0.217 (0.226)	Data 0.000 (0.007)	Loss 5.1859 (5.2125)	Top-1 acc 8.984 (10.461)	Top-5 acc 25.000 (25.993)	lr 0.04126
Warmup Train [11][3200/3239]	Time 0.192 (0.226)	Data 0.000 (0.007)	Loss 5.1623 (5.2121)	Top-1 acc 12.109 (10.465)	Top-5 acc 30.469 (26.005)	lr 0.04125
Warmup Train [11][3210/3239]	Time 0.208 (0.226)	Data 0.000 (0.007)	Loss 5.0858 (5.2120)	Top-1 acc 12.500 (10.468)	Top-5 acc 28.906 (26.008)	lr 0.04125
Warmup Train [11][3220/3239]	Time 0.200 (0.226)	Data 0.000 (0.007)	Loss 5.0144 (5.2118)	Top-1 acc 14.453 (10.470)	Top-5 acc 32.031 (26.012)	lr 0.04125
Warmup Train [11][3230/3239]	Time 0.158 (0.226)	Data 0.000 (0.007)	Loss 5.1853 (5.2115)	Top-1 acc 12.500 (10.471)	Top-5 acc 26.562 (26.017)	lr 0.04124
Warmup Train [11][3239/3239]	Time 0.139 (0.226)	Data 0.000 (0.007)	Loss 5.4128 (5.2114)	Top-1 acc 8.642 (10.472)	Top-5 acc 19.753 (26.020)	lr 0.04124
==========Warmup Valid [11/40]	loss 4.413	top-1 acc 15.433	top-5 acc 34.813	Train top-1 10.472	top-5 26.020	flops: 442.4M
Warmup Train [12][0/3239]	Time 16.814 (16.814)	Data 15.385 (15.385)	Loss 5.1497 (5.1497)	Top-1 acc 11.328 (11.328)	Top-5 acc 28.516 (28.516)	lr 0.04124
Warmup Train [12][10/3239]	Time 0.221 (1.806)	Data 0.005 (1.463)	Loss 5.1299 (5.1886)	Top-1 acc 12.109 (10.547)	Top-5 acc 28.125 (26.527)	lr 0.04123
Warmup Train [12][20/3239]	Time 0.149 (1.048)	Data 0.001 (0.767)	Loss 5.1735 (5.1913)	Top-1 acc 9.766 (10.528)	Top-5 acc 23.438 (26.302)	lr 0.04123
Warmup Train [12][30/3239]	Time 0.325 (0.790)	Data 0.002 (0.522)	Loss 5.0881 (5.1778)	Top-1 acc 11.719 (10.723)	Top-5 acc 29.297 (26.852)	lr 0.04122
Warmup Train [12][40/3239]	Time 0.241 (0.653)	Data 0.001 (0.395)	Loss 5.1907 (5.1764)	Top-1 acc 12.500 (10.833)	Top-5 acc 26.172 (26.839)	lr 0.04122
Warmup Train [12][50/3239]	Time 0.190 (0.570)	Data 0.001 (0.318)	Loss 5.2878 (5.1692)	Top-1 acc 8.203 (10.960)	Top-5 acc 24.219 (27.091)	lr 0.04121
Warmup Train [12][60/3239]	Time 0.259 (0.516)	Data 0.001 (0.266)	Loss 5.1424 (5.1683)	Top-1 acc 10.547 (11.187)	Top-5 acc 25.391 (27.126)	lr 0.04121
Warmup Train [12][70/3239]	Time 0.312 (0.475)	Data 0.001 (0.229)	Loss 5.1943 (5.1700)	Top-1 acc 10.547 (11.207)	Top-5 acc 27.344 (27.162)	lr 0.04120
Warmup Train [12][80/3239]	Time 0.203 (0.446)	Data 0.001 (0.201)	Loss 5.0294 (5.1709)	Top-1 acc 16.016 (11.179)	Top-5 acc 30.859 (27.214)	lr 0.04120
Warmup Train [12][90/3239]	Time 0.192 (0.423)	Data 0.001 (0.179)	Loss 5.2487 (5.1669)	Top-1 acc 11.328 (11.251)	Top-5 acc 27.344 (27.374)	lr 0.04119
Warmup Train [12][100/3239]	Time 0.158 (0.404)	Data 0.001 (0.162)	Loss 5.2009 (5.1679)	Top-1 acc 10.156 (11.216)	Top-5 acc 26.953 (27.290)	lr 0.04119
Warmup Train [12][110/3239]	Time 0.265 (0.389)	Data 0.001 (0.148)	Loss 5.1444 (5.1656)	Top-1 acc 11.719 (11.120)	Top-5 acc 30.859 (27.263)	lr 0.04119
Warmup Train [12][120/3239]	Time 0.166 (0.375)	Data 0.001 (0.136)	Loss 5.1037 (5.1609)	Top-1 acc 12.891 (11.125)	Top-5 acc 31.250 (27.421)	lr 0.04118
Warmup Train [12][130/3239]	Time 0.170 (0.362)	Data 0.002 (0.126)	Loss 5.1788 (5.1592)	Top-1 acc 10.156 (11.122)	Top-5 acc 24.219 (27.406)	lr 0.04118
Warmup Train [12][140/3239]	Time 0.199 (0.352)	Data 0.001 (0.117)	Loss 5.0766 (5.1581)	Top-1 acc 9.766 (11.143)	Top-5 acc 28.516 (27.416)	lr 0.04117
Warmup Train [12][150/3239]	Time 0.221 (0.342)	Data 0.001 (0.109)	Loss 4.9957 (5.1544)	Top-1 acc 14.453 (11.194)	Top-5 acc 30.469 (27.460)	lr 0.04117
Warmup Train [12][160/3239]	Time 0.209 (0.334)	Data 0.001 (0.103)	Loss 5.2626 (5.1559)	Top-1 acc 11.719 (11.209)	Top-5 acc 27.344 (27.383)	lr 0.04116
Warmup Train [12][170/3239]	Time 0.321 (0.327)	Data 0.002 (0.097)	Loss 5.0379 (5.1557)	Top-1 acc 10.156 (11.234)	Top-5 acc 32.031 (27.376)	lr 0.04116
Warmup Train [12][180/3239]	Time 0.239 (0.321)	Data 0.001 (0.092)	Loss 4.9959 (5.1552)	Top-1 acc 13.281 (11.220)	Top-5 acc 33.984 (27.387)	lr 0.04115
Warmup Train [12][190/3239]	Time 0.236 (0.316)	Data 0.002 (0.087)	Loss 5.2616 (5.1558)	Top-1 acc 11.719 (11.261)	Top-5 acc 25.391 (27.434)	lr 0.04115
Warmup Train [12][200/3239]	Time 0.234 (0.313)	Data 0.001 (0.083)	Loss 4.9048 (5.1524)	Top-1 acc 17.188 (11.283)	Top-5 acc 32.422 (27.490)	lr 0.04114
Warmup Train [12][210/3239]	Time 0.191 (0.310)	Data 0.001 (0.079)	Loss 5.2963 (5.1509)	Top-1 acc 7.422 (11.321)	Top-5 acc 21.875 (27.566)	lr 0.04114
Warmup Train [12][220/3239]	Time 0.239 (0.307)	Data 0.002 (0.076)	Loss 5.3207 (5.1513)	Top-1 acc 12.500 (11.330)	Top-5 acc 26.172 (27.607)	lr 0.04113
Warmup Train [12][230/3239]	Time 0.226 (0.304)	Data 0.001 (0.073)	Loss 5.1877 (5.1512)	Top-1 acc 10.547 (11.362)	Top-5 acc 27.734 (27.636)	lr 0.04113
Warmup Train [12][240/3239]	Time 0.275 (0.301)	Data 0.001 (0.070)	Loss 5.1674 (5.1506)	Top-1 acc 12.891 (11.398)	Top-5 acc 28.125 (27.639)	lr 0.04113
Warmup Train [12][250/3239]	Time 0.241 (0.299)	Data 0.002 (0.067)	Loss 5.1456 (5.1518)	Top-1 acc 10.547 (11.389)	Top-5 acc 25.781 (27.613)	lr 0.04112
Warmup Train [12][260/3239]	Time 0.290 (0.297)	Data 0.001 (0.065)	Loss 5.0986 (5.1512)	Top-1 acc 14.062 (11.352)	Top-5 acc 28.516 (27.624)	lr 0.04112
Warmup Train [12][270/3239]	Time 0.271 (0.295)	Data 0.002 (0.062)	Loss 5.1271 (5.1528)	Top-1 acc 17.188 (11.348)	Top-5 acc 28.516 (27.586)	lr 0.04111
Warmup Train [12][280/3239]	Time 0.202 (0.294)	Data 0.001 (0.060)	Loss 5.1336 (5.1525)	Top-1 acc 12.891 (11.373)	Top-5 acc 27.344 (27.605)	lr 0.04111
Warmup Train [12][290/3239]	Time 0.160 (0.292)	Data 0.002 (0.058)	Loss 5.1159 (5.1521)	Top-1 acc 13.672 (11.414)	Top-5 acc 30.469 (27.670)	lr 0.04110
Warmup Train [12][300/3239]	Time 0.283 (0.291)	Data 0.001 (0.057)	Loss 5.1567 (5.1521)	Top-1 acc 12.891 (11.416)	Top-5 acc 27.734 (27.645)	lr 0.04110
Warmup Train [12][310/3239]	Time 0.158 (0.289)	Data 0.001 (0.055)	Loss 5.0311 (5.1531)	Top-1 acc 10.156 (11.425)	Top-5 acc 27.344 (27.613)	lr 0.04109
Warmup Train [12][320/3239]	Time 0.246 (0.287)	Data 0.002 (0.053)	Loss 5.2505 (5.1534)	Top-1 acc 6.641 (11.390)	Top-5 acc 25.000 (27.569)	lr 0.04109
Warmup Train [12][330/3239]	Time 0.222 (0.285)	Data 0.001 (0.052)	Loss 5.1416 (5.1549)	Top-1 acc 10.938 (11.372)	Top-5 acc 28.516 (27.538)	lr 0.04108
Warmup Train [12][340/3239]	Time 0.260 (0.283)	Data 0.002 (0.050)	Loss 5.1762 (5.1544)	Top-1 acc 8.594 (11.332)	Top-5 acc 25.000 (27.556)	lr 0.04108
Warmup Train [12][350/3239]	Time 0.404 (0.283)	Data 0.002 (0.049)	Loss 5.0579 (5.1531)	Top-1 acc 11.719 (11.382)	Top-5 acc 28.516 (27.616)	lr 0.04107
Warmup Train [12][360/3239]	Time 0.218 (0.282)	Data 0.001 (0.048)	Loss 5.1172 (5.1515)	Top-1 acc 10.156 (11.404)	Top-5 acc 27.344 (27.642)	lr 0.04107
Warmup Train [12][370/3239]	Time 0.178 (0.280)	Data 0.001 (0.046)	Loss 5.1928 (5.1514)	Top-1 acc 8.594 (11.396)	Top-5 acc 24.219 (27.606)	lr 0.04107
Warmup Train [12][380/3239]	Time 0.182 (0.278)	Data 0.001 (0.045)	Loss 5.0895 (5.1501)	Top-1 acc 10.156 (11.404)	Top-5 acc 28.906 (27.623)	lr 0.04106
Warmup Train [12][390/3239]	Time 0.250 (0.277)	Data 0.001 (0.044)	Loss 5.0860 (5.1496)	Top-1 acc 14.453 (11.423)	Top-5 acc 26.562 (27.642)	lr 0.04106
Warmup Train [12][400/3239]	Time 0.269 (0.276)	Data 0.003 (0.043)	Loss 5.2238 (5.1499)	Top-1 acc 11.328 (11.402)	Top-5 acc 23.047 (27.598)	lr 0.04105
Warmup Train [12][410/3239]	Time 0.190 (0.274)	Data 0.001 (0.042)	Loss 5.1036 (5.1499)	Top-1 acc 9.766 (11.404)	Top-5 acc 28.125 (27.582)	lr 0.04105
Warmup Train [12][420/3239]	Time 0.234 (0.273)	Data 0.001 (0.041)	Loss 5.0594 (5.1509)	Top-1 acc 14.453 (11.408)	Top-5 acc 32.422 (27.587)	lr 0.04104
Warmup Train [12][430/3239]	Time 0.198 (0.272)	Data 0.001 (0.040)	Loss 5.0071 (5.1508)	Top-1 acc 12.891 (11.379)	Top-5 acc 28.125 (27.573)	lr 0.04104
Warmup Train [12][440/3239]	Time 0.271 (0.271)	Data 0.001 (0.040)	Loss 5.2037 (5.1513)	Top-1 acc 7.812 (11.373)	Top-5 acc 28.516 (27.565)	lr 0.04103
Warmup Train [12][450/3239]	Time 0.252 (0.270)	Data 0.001 (0.039)	Loss 5.0362 (5.1506)	Top-1 acc 19.531 (11.395)	Top-5 acc 29.297 (27.565)	lr 0.04103
Warmup Train [12][460/3239]	Time 0.221 (0.269)	Data 0.001 (0.038)	Loss 5.1546 (5.1509)	Top-1 acc 10.156 (11.387)	Top-5 acc 23.438 (27.561)	lr 0.04102
Warmup Train [12][470/3239]	Time 0.215 (0.268)	Data 0.002 (0.037)	Loss 5.2753 (5.1503)	Top-1 acc 9.375 (11.404)	Top-5 acc 22.266 (27.569)	lr 0.04102
Warmup Train [12][480/3239]	Time 0.216 (0.267)	Data 0.001 (0.037)	Loss 5.1269 (5.1500)	Top-1 acc 11.328 (11.403)	Top-5 acc 29.688 (27.601)	lr 0.04101
Warmup Train [12][490/3239]	Time 0.238 (0.266)	Data 0.001 (0.036)	Loss 5.1538 (5.1487)	Top-1 acc 14.844 (11.421)	Top-5 acc 32.031 (27.645)	lr 0.04101
Warmup Train [12][500/3239]	Time 0.237 (0.265)	Data 0.002 (0.035)	Loss 5.0392 (5.1479)	Top-1 acc 10.938 (11.441)	Top-5 acc 29.688 (27.669)	lr 0.04100
Warmup Train [12][510/3239]	Time 0.164 (0.264)	Data 0.002 (0.035)	Loss 5.1146 (5.1483)	Top-1 acc 10.938 (11.434)	Top-5 acc 29.297 (27.637)	lr 0.04100
Warmup Train [12][520/3239]	Time 0.159 (0.263)	Data 0.001 (0.034)	Loss 5.1943 (5.1481)	Top-1 acc 11.719 (11.442)	Top-5 acc 26.953 (27.643)	lr 0.04100
Warmup Train [12][530/3239]	Time 0.173 (0.263)	Data 0.001 (0.034)	Loss 5.0961 (5.1472)	Top-1 acc 13.672 (11.444)	Top-5 acc 28.516 (27.651)	lr 0.04099
Warmup Train [12][540/3239]	Time 0.195 (0.262)	Data 0.002 (0.033)	Loss 5.1715 (5.1470)	Top-1 acc 10.938 (11.442)	Top-5 acc 26.172 (27.651)	lr 0.04099
Warmup Train [12][550/3239]	Time 0.324 (0.261)	Data 0.001 (0.032)	Loss 4.9928 (5.1464)	Top-1 acc 10.156 (11.445)	Top-5 acc 27.344 (27.643)	lr 0.04098
Warmup Train [12][560/3239]	Time 0.372 (0.261)	Data 0.001 (0.032)	Loss 5.4211 (5.1472)	Top-1 acc 10.547 (11.442)	Top-5 acc 18.750 (27.607)	lr 0.04098
Warmup Train [12][570/3239]	Time 0.199 (0.260)	Data 0.001 (0.031)	Loss 5.1254 (5.1477)	Top-1 acc 10.938 (11.430)	Top-5 acc 28.516 (27.593)	lr 0.04097
Warmup Train [12][580/3239]	Time 0.161 (0.259)	Data 0.001 (0.031)	Loss 5.1956 (5.1476)	Top-1 acc 10.547 (11.429)	Top-5 acc 22.656 (27.588)	lr 0.04097
Warmup Train [12][590/3239]	Time 0.201 (0.258)	Data 0.001 (0.030)	Loss 5.1177 (5.1467)	Top-1 acc 9.375 (11.447)	Top-5 acc 26.172 (27.619)	lr 0.04096
Warmup Train [12][600/3239]	Time 0.223 (0.258)	Data 0.002 (0.030)	Loss 5.1907 (5.1465)	Top-1 acc 11.328 (11.440)	Top-5 acc 29.297 (27.627)	lr 0.04096
Warmup Train [12][610/3239]	Time 0.229 (0.257)	Data 0.001 (0.030)	Loss 4.9602 (5.1460)	Top-1 acc 12.891 (11.449)	Top-5 acc 30.469 (27.638)	lr 0.04095
Warmup Train [12][620/3239]	Time 0.287 (0.256)	Data 0.002 (0.029)	Loss 5.0099 (5.1455)	Top-1 acc 11.328 (11.456)	Top-5 acc 26.562 (27.654)	lr 0.04095
Warmup Train [12][630/3239]	Time 0.233 (0.256)	Data 0.001 (0.029)	Loss 5.1295 (5.1456)	Top-1 acc 13.672 (11.443)	Top-5 acc 30.078 (27.646)	lr 0.04094
Warmup Train [12][640/3239]	Time 0.201 (0.255)	Data 0.001 (0.028)	Loss 4.9950 (5.1452)	Top-1 acc 10.547 (11.456)	Top-5 acc 29.297 (27.657)	lr 0.04094
Warmup Train [12][650/3239]	Time 0.287 (0.255)	Data 0.001 (0.028)	Loss 5.1590 (5.1446)	Top-1 acc 10.938 (11.462)	Top-5 acc 29.688 (27.664)	lr 0.04093
Warmup Train [12][660/3239]	Time 0.293 (0.254)	Data 0.001 (0.027)	Loss 4.9698 (5.1448)	Top-1 acc 16.016 (11.462)	Top-5 acc 34.766 (27.678)	lr 0.04093
Warmup Train [12][670/3239]	Time 0.233 (0.254)	Data 0.001 (0.027)	Loss 5.0949 (5.1441)	Top-1 acc 13.281 (11.468)	Top-5 acc 28.516 (27.691)	lr 0.04093
Warmup Train [12][680/3239]	Time 0.229 (0.254)	Data 0.001 (0.027)	Loss 5.2479 (5.1437)	Top-1 acc 10.547 (11.482)	Top-5 acc 26.953 (27.706)	lr 0.04092
Warmup Train [12][690/3239]	Time 0.238 (0.253)	Data 0.001 (0.027)	Loss 5.2788 (5.1442)	Top-1 acc 10.938 (11.473)	Top-5 acc 26.562 (27.690)	lr 0.04092
Warmup Train [12][700/3239]	Time 0.166 (0.253)	Data 0.002 (0.026)	Loss 5.1323 (5.1442)	Top-1 acc 11.719 (11.472)	Top-5 acc 28.906 (27.688)	lr 0.04091
Warmup Train [12][710/3239]	Time 0.193 (0.252)	Data 0.001 (0.026)	Loss 4.9952 (5.1441)	Top-1 acc 14.844 (11.475)	Top-5 acc 34.766 (27.701)	lr 0.04091
Warmup Train [12][720/3239]	Time 0.247 (0.252)	Data 0.001 (0.026)	Loss 5.0535 (5.1441)	Top-1 acc 14.062 (11.483)	Top-5 acc 30.078 (27.705)	lr 0.04090
Warmup Train [12][730/3239]	Time 0.200 (0.251)	Data 0.001 (0.025)	Loss 5.0361 (5.1434)	Top-1 acc 10.938 (11.494)	Top-5 acc 29.297 (27.718)	lr 0.04090
Warmup Train [12][740/3239]	Time 0.198 (0.251)	Data 0.001 (0.025)	Loss 5.0941 (5.1434)	Top-1 acc 11.328 (11.506)	Top-5 acc 32.422 (27.724)	lr 0.04089
Warmup Train [12][750/3239]	Time 0.329 (0.250)	Data 0.001 (0.025)	Loss 5.3062 (5.1433)	Top-1 acc 9.375 (11.520)	Top-5 acc 22.656 (27.722)	lr 0.04089
Warmup Train [12][760/3239]	Time 0.418 (0.250)	Data 0.001 (0.024)	Loss 5.1891 (5.1433)	Top-1 acc 9.766 (11.517)	Top-5 acc 25.000 (27.716)	lr 0.04088
Warmup Train [12][770/3239]	Time 0.192 (0.250)	Data 0.001 (0.024)	Loss 5.2567 (5.1438)	Top-1 acc 11.719 (11.519)	Top-5 acc 22.656 (27.696)	lr 0.04088
Warmup Train [12][780/3239]	Time 0.188 (0.249)	Data 0.001 (0.024)	Loss 5.1313 (5.1437)	Top-1 acc 13.672 (11.523)	Top-5 acc 29.688 (27.702)	lr 0.04087
Warmup Train [12][790/3239]	Time 0.232 (0.249)	Data 0.002 (0.023)	Loss 5.2025 (5.1429)	Top-1 acc 8.984 (11.523)	Top-5 acc 26.172 (27.707)	lr 0.04087
Warmup Train [12][800/3239]	Time 0.128 (0.249)	Data 0.001 (0.023)	Loss 5.2440 (5.1426)	Top-1 acc 10.938 (11.529)	Top-5 acc 23.047 (27.717)	lr 0.04086
Warmup Train [12][810/3239]	Time 0.235 (0.248)	Data 0.031 (0.023)	Loss 5.1053 (5.1424)	Top-1 acc 14.062 (11.539)	Top-5 acc 28.906 (27.723)	lr 0.04086
Warmup Train [12][820/3239]	Time 0.251 (0.248)	Data 0.001 (0.023)	Loss 5.0709 (5.1423)	Top-1 acc 16.406 (11.537)	Top-5 acc 32.422 (27.722)	lr 0.04086
Warmup Train [12][830/3239]	Time 0.200 (0.248)	Data 0.028 (0.023)	Loss 5.0510 (5.1419)	Top-1 acc 13.672 (11.532)	Top-5 acc 34.375 (27.738)	lr 0.04085
Warmup Train [12][840/3239]	Time 0.189 (0.247)	Data 0.001 (0.022)	Loss 5.2102 (5.1421)	Top-1 acc 10.547 (11.532)	Top-5 acc 25.391 (27.721)	lr 0.04085
Warmup Train [12][850/3239]	Time 0.231 (0.247)	Data 0.001 (0.022)	Loss 5.0630 (5.1414)	Top-1 acc 14.844 (11.545)	Top-5 acc 32.031 (27.744)	lr 0.04084
Warmup Train [12][860/3239]	Time 0.178 (0.247)	Data 0.001 (0.022)	Loss 5.0266 (5.1411)	Top-1 acc 11.719 (11.556)	Top-5 acc 31.641 (27.755)	lr 0.04084
Warmup Train [12][870/3239]	Time 0.234 (0.246)	Data 0.001 (0.022)	Loss 5.1771 (5.1409)	Top-1 acc 10.938 (11.556)	Top-5 acc 28.125 (27.755)	lr 0.04083
Warmup Train [12][880/3239]	Time 0.232 (0.246)	Data 0.001 (0.022)	Loss 5.1717 (5.1405)	Top-1 acc 10.938 (11.545)	Top-5 acc 26.172 (27.755)	lr 0.04083
Warmup Train [12][890/3239]	Time 0.240 (0.246)	Data 0.002 (0.021)	Loss 5.0295 (5.1407)	Top-1 acc 11.328 (11.543)	Top-5 acc 28.516 (27.767)	lr 0.04082
Warmup Train [12][900/3239]	Time 0.144 (0.245)	Data 0.001 (0.021)	Loss 5.1112 (5.1401)	Top-1 acc 12.500 (11.545)	Top-5 acc 26.562 (27.773)	lr 0.04082
Warmup Train [12][910/3239]	Time 0.339 (0.245)	Data 0.003 (0.021)	Loss 5.1668 (5.1398)	Top-1 acc 10.938 (11.537)	Top-5 acc 26.172 (27.777)	lr 0.04081
Warmup Train [12][920/3239]	Time 0.229 (0.245)	Data 0.001 (0.021)	Loss 4.9942 (5.1395)	Top-1 acc 12.500 (11.541)	Top-5 acc 32.422 (27.784)	lr 0.04081
Warmup Train [12][930/3239]	Time 0.290 (0.245)	Data 0.002 (0.020)	Loss 5.2138 (5.1393)	Top-1 acc 11.719 (11.545)	Top-5 acc 28.906 (27.783)	lr 0.04080
Warmup Train [12][940/3239]	Time 0.238 (0.244)	Data 0.001 (0.020)	Loss 5.2000 (5.1391)	Top-1 acc 14.062 (11.552)	Top-5 acc 29.297 (27.796)	lr 0.04080
Warmup Train [12][950/3239]	Time 0.291 (0.244)	Data 0.001 (0.020)	Loss 5.0246 (5.1391)	Top-1 acc 12.109 (11.540)	Top-5 acc 30.469 (27.776)	lr 0.04079
Warmup Train [12][960/3239]	Time 0.218 (0.244)	Data 0.001 (0.020)	Loss 5.2467 (5.1390)	Top-1 acc 9.375 (11.542)	Top-5 acc 24.609 (27.773)	lr 0.04079
Warmup Train [12][970/3239]	Time 0.170 (0.244)	Data 0.001 (0.020)	Loss 5.1023 (5.1390)	Top-1 acc 12.109 (11.548)	Top-5 acc 28.516 (27.778)	lr 0.04078
Warmup Train [12][980/3239]	Time 0.214 (0.243)	Data 0.001 (0.020)	Loss 5.2013 (5.1384)	Top-1 acc 8.984 (11.554)	Top-5 acc 26.172 (27.788)	lr 0.04078
Warmup Train [12][990/3239]	Time 0.143 (0.243)	Data 0.001 (0.019)	Loss 5.2915 (5.1381)	Top-1 acc 8.203 (11.551)	Top-5 acc 22.656 (27.784)	lr 0.04078
Warmup Train [12][1000/3239]	Time 0.203 (0.243)	Data 0.001 (0.019)	Loss 5.1969 (5.1380)	Top-1 acc 11.719 (11.558)	Top-5 acc 26.953 (27.796)	lr 0.04077
Warmup Train [12][1010/3239]	Time 0.215 (0.243)	Data 0.001 (0.019)	Loss 5.1341 (5.1380)	Top-1 acc 8.594 (11.559)	Top-5 acc 29.297 (27.793)	lr 0.04077
Warmup Train [12][1020/3239]	Time 0.165 (0.243)	Data 0.001 (0.019)	Loss 5.2088 (5.1382)	Top-1 acc 9.375 (11.550)	Top-5 acc 23.828 (27.789)	lr 0.04076
Warmup Train [12][1030/3239]	Time 0.141 (0.242)	Data 0.001 (0.019)	Loss 5.1811 (5.1375)	Top-1 acc 10.938 (11.556)	Top-5 acc 27.734 (27.795)	lr 0.04076
Warmup Train [12][1040/3239]	Time 0.171 (0.242)	Data 0.001 (0.019)	Loss 5.1887 (5.1380)	Top-1 acc 9.766 (11.548)	Top-5 acc 25.000 (27.788)	lr 0.04075
Warmup Train [12][1050/3239]	Time 0.284 (0.242)	Data 0.002 (0.018)	Loss 5.0122 (5.1381)	Top-1 acc 12.891 (11.551)	Top-5 acc 34.375 (27.792)	lr 0.04075
Warmup Train [12][1060/3239]	Time 0.233 (0.242)	Data 0.001 (0.018)	Loss 5.1755 (5.1387)	Top-1 acc 11.719 (11.543)	Top-5 acc 29.297 (27.782)	lr 0.04074
Warmup Train [12][1070/3239]	Time 0.250 (0.241)	Data 0.002 (0.018)	Loss 5.1444 (5.1386)	Top-1 acc 11.328 (11.536)	Top-5 acc 27.344 (27.791)	lr 0.04074
Warmup Train [12][1080/3239]	Time 0.227 (0.241)	Data 0.001 (0.018)	Loss 5.1836 (5.1388)	Top-1 acc 9.375 (11.535)	Top-5 acc 26.172 (27.776)	lr 0.04073
Warmup Train [12][1090/3239]	Time 0.236 (0.241)	Data 0.001 (0.018)	Loss 5.2457 (5.1387)	Top-1 acc 12.109 (11.537)	Top-5 acc 25.781 (27.779)	lr 0.04073
Warmup Train [12][1100/3239]	Time 0.175 (0.241)	Data 0.001 (0.018)	Loss 5.1113 (5.1385)	Top-1 acc 10.938 (11.545)	Top-5 acc 27.734 (27.790)	lr 0.04072
Warmup Train [12][1110/3239]	Time 0.268 (0.241)	Data 0.001 (0.018)	Loss 5.1499 (5.1383)	Top-1 acc 14.453 (11.554)	Top-5 acc 31.250 (27.808)	lr 0.04072
Warmup Train [12][1120/3239]	Time 0.287 (0.241)	Data 0.001 (0.017)	Loss 5.0416 (5.1379)	Top-1 acc 14.062 (11.556)	Top-5 acc 32.031 (27.815)	lr 0.04071
Warmup Train [12][1130/3239]	Time 0.305 (0.240)	Data 0.001 (0.017)	Loss 5.1242 (5.1378)	Top-1 acc 14.062 (11.554)	Top-5 acc 26.953 (27.817)	lr 0.04071
Warmup Train [12][1140/3239]	Time 0.191 (0.240)	Data 0.001 (0.017)	Loss 5.0707 (5.1373)	Top-1 acc 11.328 (11.563)	Top-5 acc 27.734 (27.828)	lr 0.04070
Warmup Train [12][1150/3239]	Time 0.173 (0.240)	Data 0.001 (0.017)	Loss 5.1491 (5.1373)	Top-1 acc 10.547 (11.560)	Top-5 acc 26.953 (27.829)	lr 0.04070
Warmup Train [12][1160/3239]	Time 0.320 (0.240)	Data 0.001 (0.017)	Loss 5.1945 (5.1372)	Top-1 acc 10.938 (11.566)	Top-5 acc 26.172 (27.827)	lr 0.04070
Warmup Train [12][1170/3239]	Time 0.238 (0.240)	Data 0.001 (0.017)	Loss 5.0940 (5.1370)	Top-1 acc 12.109 (11.568)	Top-5 acc 30.078 (27.833)	lr 0.04069
Warmup Train [12][1180/3239]	Time 0.219 (0.240)	Data 0.001 (0.017)	Loss 5.1271 (5.1369)	Top-1 acc 14.453 (11.568)	Top-5 acc 28.125 (27.835)	lr 0.04069
Warmup Train [12][1190/3239]	Time 0.194 (0.240)	Data 0.002 (0.017)	Loss 5.1250 (5.1367)	Top-1 acc 10.156 (11.577)	Top-5 acc 29.297 (27.853)	lr 0.04068
Warmup Train [12][1200/3239]	Time 0.194 (0.239)	Data 0.001 (0.017)	Loss 5.2018 (5.1367)	Top-1 acc 14.844 (11.579)	Top-5 acc 28.125 (27.851)	lr 0.04068
Warmup Train [12][1210/3239]	Time 0.193 (0.239)	Data 0.001 (0.016)	Loss 5.1748 (5.1365)	Top-1 acc 8.984 (11.584)	Top-5 acc 28.125 (27.858)	lr 0.04067
Warmup Train [12][1220/3239]	Time 0.201 (0.239)	Data 0.001 (0.016)	Loss 5.0232 (5.1359)	Top-1 acc 13.672 (11.591)	Top-5 acc 32.031 (27.880)	lr 0.04067
Warmup Train [12][1230/3239]	Time 0.226 (0.239)	Data 0.001 (0.016)	Loss 5.1059 (5.1355)	Top-1 acc 11.719 (11.595)	Top-5 acc 29.688 (27.890)	lr 0.04066
Warmup Train [12][1240/3239]	Time 0.196 (0.239)	Data 0.002 (0.016)	Loss 5.1534 (5.1350)	Top-1 acc 13.281 (11.605)	Top-5 acc 28.125 (27.909)	lr 0.04066
Warmup Train [12][1250/3239]	Time 0.197 (0.238)	Data 0.001 (0.016)	Loss 5.2109 (5.1349)	Top-1 acc 14.453 (11.608)	Top-5 acc 30.078 (27.909)	lr 0.04065
Warmup Train [12][1260/3239]	Time 0.312 (0.238)	Data 0.001 (0.016)	Loss 5.1962 (5.1349)	Top-1 acc 12.891 (11.607)	Top-5 acc 28.906 (27.914)	lr 0.04065
Warmup Train [12][1270/3239]	Time 0.267 (0.238)	Data 0.001 (0.016)	Loss 5.0617 (5.1347)	Top-1 acc 11.328 (11.614)	Top-5 acc 28.906 (27.922)	lr 0.04064
Warmup Train [12][1280/3239]	Time 0.189 (0.238)	Data 0.001 (0.016)	Loss 5.0716 (5.1346)	Top-1 acc 14.453 (11.614)	Top-5 acc 30.859 (27.927)	lr 0.04064
Warmup Train [12][1290/3239]	Time 0.209 (0.238)	Data 0.002 (0.016)	Loss 5.1791 (5.1351)	Top-1 acc 10.156 (11.610)	Top-5 acc 27.734 (27.909)	lr 0.04063
Warmup Train [12][1300/3239]	Time 0.183 (0.238)	Data 0.001 (0.015)	Loss 5.1396 (5.1349)	Top-1 acc 11.328 (11.611)	Top-5 acc 26.953 (27.912)	lr 0.04063
Warmup Train [12][1310/3239]	Time 0.231 (0.238)	Data 0.001 (0.015)	Loss 5.0602 (5.1349)	Top-1 acc 13.672 (11.621)	Top-5 acc 29.688 (27.914)	lr 0.04062
Warmup Train [12][1320/3239]	Time 0.276 (0.238)	Data 0.001 (0.015)	Loss 5.0648 (5.1349)	Top-1 acc 13.672 (11.626)	Top-5 acc 28.906 (27.915)	lr 0.04062
Warmup Train [12][1330/3239]	Time 0.183 (0.237)	Data 0.001 (0.015)	Loss 4.9867 (5.1347)	Top-1 acc 12.891 (11.621)	Top-5 acc 33.594 (27.913)	lr 0.04061
Warmup Train [12][1340/3239]	Time 0.205 (0.237)	Data 0.001 (0.015)	Loss 5.0872 (5.1344)	Top-1 acc 14.844 (11.630)	Top-5 acc 32.422 (27.925)	lr 0.04061
Warmup Train [12][1350/3239]	Time 0.198 (0.237)	Data 0.002 (0.015)	Loss 5.1774 (5.1343)	Top-1 acc 10.156 (11.627)	Top-5 acc 27.344 (27.927)	lr 0.04061
Warmup Train [12][1360/3239]	Time 0.301 (0.237)	Data 0.001 (0.015)	Loss 5.1828 (5.1342)	Top-1 acc 13.281 (11.627)	Top-5 acc 30.469 (27.934)	lr 0.04060
Warmup Train [12][1370/3239]	Time 0.260 (0.237)	Data 0.001 (0.015)	Loss 5.1880 (5.1341)	Top-1 acc 8.984 (11.628)	Top-5 acc 27.344 (27.932)	lr 0.04060
Warmup Train [12][1380/3239]	Time 0.220 (0.237)	Data 0.001 (0.015)	Loss 5.1500 (5.1339)	Top-1 acc 10.156 (11.631)	Top-5 acc 26.172 (27.934)	lr 0.04059
Warmup Train [12][1390/3239]	Time 0.184 (0.237)	Data 0.001 (0.015)	Loss 5.1512 (5.1338)	Top-1 acc 13.281 (11.633)	Top-5 acc 29.688 (27.935)	lr 0.04059
Warmup Train [12][1400/3239]	Time 0.158 (0.237)	Data 0.001 (0.015)	Loss 5.2760 (5.1340)	Top-1 acc 11.328 (11.627)	Top-5 acc 27.344 (27.931)	lr 0.04058
Warmup Train [12][1410/3239]	Time 0.259 (0.237)	Data 0.001 (0.015)	Loss 5.1947 (5.1338)	Top-1 acc 8.984 (11.630)	Top-5 acc 26.953 (27.932)	lr 0.04058
Warmup Train [12][1420/3239]	Time 0.147 (0.236)	Data 0.001 (0.014)	Loss 5.0776 (5.1337)	Top-1 acc 11.328 (11.629)	Top-5 acc 26.562 (27.930)	lr 0.04057
Warmup Train [12][1430/3239]	Time 0.200 (0.236)	Data 0.002 (0.014)	Loss 5.0544 (5.1337)	Top-1 acc 10.938 (11.628)	Top-5 acc 31.250 (27.932)	lr 0.04057
Warmup Train [12][1440/3239]	Time 0.186 (0.236)	Data 0.001 (0.014)	Loss 5.1652 (5.1335)	Top-1 acc 10.938 (11.630)	Top-5 acc 27.734 (27.936)	lr 0.04056
Warmup Train [12][1450/3239]	Time 0.222 (0.236)	Data 0.001 (0.014)	Loss 5.1665 (5.1334)	Top-1 acc 9.766 (11.636)	Top-5 acc 25.391 (27.940)	lr 0.04056
Warmup Train [12][1460/3239]	Time 0.304 (0.236)	Data 0.002 (0.014)	Loss 5.1737 (5.1333)	Top-1 acc 11.719 (11.641)	Top-5 acc 30.859 (27.949)	lr 0.04055
Warmup Train [12][1470/3239]	Time 0.243 (0.236)	Data 0.001 (0.014)	Loss 5.0498 (5.1331)	Top-1 acc 11.328 (11.643)	Top-5 acc 30.469 (27.951)	lr 0.04055
Warmup Train [12][1480/3239]	Time 0.167 (0.236)	Data 0.001 (0.014)	Loss 5.1516 (5.1333)	Top-1 acc 8.984 (11.632)	Top-5 acc 24.609 (27.941)	lr 0.04054
Warmup Train [12][1490/3239]	Time 0.204 (0.236)	Data 0.033 (0.014)	Loss 5.2189 (5.1333)	Top-1 acc 13.281 (11.637)	Top-5 acc 25.781 (27.942)	lr 0.04054
Warmup Train [12][1500/3239]	Time 0.169 (0.236)	Data 0.001 (0.014)	Loss 5.0460 (5.1331)	Top-1 acc 12.891 (11.636)	Top-5 acc 29.688 (27.946)	lr 0.04053
Warmup Train [12][1510/3239]	Time 0.188 (0.236)	Data 0.001 (0.014)	Loss 4.9952 (5.1329)	Top-1 acc 16.797 (11.635)	Top-5 acc 35.938 (27.950)	lr 0.04053
Warmup Train [12][1520/3239]	Time 0.223 (0.235)	Data 0.001 (0.014)	Loss 4.9906 (5.1329)	Top-1 acc 13.672 (11.635)	Top-5 acc 30.078 (27.957)	lr 0.04052
Warmup Train [12][1530/3239]	Time 0.245 (0.235)	Data 0.001 (0.014)	Loss 4.9535 (5.1327)	Top-1 acc 14.062 (11.633)	Top-5 acc 35.156 (27.966)	lr 0.04052
Warmup Train [12][1540/3239]	Time 0.135 (0.235)	Data 0.001 (0.014)	Loss 5.0260 (5.1322)	Top-1 acc 12.500 (11.637)	Top-5 acc 29.688 (27.979)	lr 0.04052
Warmup Train [12][1550/3239]	Time 0.397 (0.235)	Data 0.002 (0.014)	Loss 5.1528 (5.1323)	Top-1 acc 12.500 (11.640)	Top-5 acc 27.344 (27.977)	lr 0.04051
Warmup Train [12][1560/3239]	Time 0.276 (0.235)	Data 0.001 (0.014)	Loss 5.1856 (5.1323)	Top-1 acc 12.109 (11.636)	Top-5 acc 26.172 (27.978)	lr 0.04051
Warmup Train [12][1570/3239]	Time 0.173 (0.235)	Data 0.001 (0.013)	Loss 5.0746 (5.1321)	Top-1 acc 12.109 (11.645)	Top-5 acc 25.391 (27.983)	lr 0.04050
Warmup Train [12][1580/3239]	Time 0.194 (0.235)	Data 0.002 (0.013)	Loss 5.1262 (5.1320)	Top-1 acc 9.766 (11.644)	Top-5 acc 29.297 (27.980)	lr 0.04050
Warmup Train [12][1590/3239]	Time 0.215 (0.235)	Data 0.001 (0.013)	Loss 5.1897 (5.1321)	Top-1 acc 9.766 (11.641)	Top-5 acc 24.609 (27.973)	lr 0.04049
Warmup Train [12][1600/3239]	Time 0.230 (0.235)	Data 0.001 (0.013)	Loss 5.2262 (5.1319)	Top-1 acc 11.328 (11.642)	Top-5 acc 27.734 (27.974)	lr 0.04049
Warmup Train [12][1610/3239]	Time 0.226 (0.235)	Data 0.001 (0.013)	Loss 4.9852 (5.1320)	Top-1 acc 14.453 (11.639)	Top-5 acc 32.031 (27.971)	lr 0.04048
Warmup Train [12][1620/3239]	Time 0.211 (0.235)	Data 0.001 (0.013)	Loss 5.0457 (5.1322)	Top-1 acc 11.719 (11.635)	Top-5 acc 31.250 (27.972)	lr 0.04048
Warmup Train [12][1630/3239]	Time 0.249 (0.235)	Data 0.001 (0.013)	Loss 5.1132 (5.1324)	Top-1 acc 10.156 (11.634)	Top-5 acc 26.172 (27.970)	lr 0.04047
Warmup Train [12][1640/3239]	Time 0.252 (0.235)	Data 0.001 (0.013)	Loss 5.2508 (5.1326)	Top-1 acc 12.109 (11.632)	Top-5 acc 25.391 (27.961)	lr 0.04047
Warmup Train [12][1650/3239]	Time 0.223 (0.235)	Data 0.002 (0.013)	Loss 5.0313 (5.1325)	Top-1 acc 12.891 (11.633)	Top-5 acc 29.297 (27.960)	lr 0.04046
Warmup Train [12][1660/3239]	Time 0.323 (0.235)	Data 0.002 (0.013)	Loss 5.1654 (5.1326)	Top-1 acc 10.156 (11.634)	Top-5 acc 28.516 (27.963)	lr 0.04046
Warmup Train [12][1670/3239]	Time 0.200 (0.235)	Data 0.001 (0.013)	Loss 5.2056 (5.1326)	Top-1 acc 12.500 (11.635)	Top-5 acc 24.219 (27.966)	lr 0.04045
Warmup Train [12][1680/3239]	Time 0.156 (0.235)	Data 0.002 (0.013)	Loss 4.9214 (5.1323)	Top-1 acc 16.016 (11.641)	Top-5 acc 34.375 (27.970)	lr 0.04045
Warmup Train [12][1690/3239]	Time 0.196 (0.235)	Data 0.001 (0.013)	Loss 5.0956 (5.1321)	Top-1 acc 12.109 (11.647)	Top-5 acc 28.516 (27.973)	lr 0.04044
Warmup Train [12][1700/3239]	Time 0.242 (0.235)	Data 0.001 (0.013)	Loss 5.0988 (5.1318)	Top-1 acc 10.156 (11.654)	Top-5 acc 28.516 (27.983)	lr 0.04044
Warmup Train [12][1710/3239]	Time 0.248 (0.235)	Data 0.001 (0.013)	Loss 5.1276 (5.1319)	Top-1 acc 13.281 (11.653)	Top-5 acc 32.031 (27.984)	lr 0.04043
Warmup Train [12][1720/3239]	Time 0.187 (0.235)	Data 0.001 (0.013)	Loss 5.0662 (5.1318)	Top-1 acc 10.547 (11.655)	Top-5 acc 30.469 (27.986)	lr 0.04043
Warmup Train [12][1730/3239]	Time 0.301 (0.235)	Data 0.001 (0.012)	Loss 5.0336 (5.1313)	Top-1 acc 14.062 (11.660)	Top-5 acc 32.031 (27.995)	lr 0.04042
Warmup Train [12][1740/3239]	Time 0.262 (0.235)	Data 0.001 (0.012)	Loss 5.4420 (5.1314)	Top-1 acc 7.422 (11.656)	Top-5 acc 22.656 (27.994)	lr 0.04042
Warmup Train [12][1750/3239]	Time 0.252 (0.235)	Data 0.001 (0.012)	Loss 5.0482 (5.1312)	Top-1 acc 15.625 (11.656)	Top-5 acc 32.812 (27.996)	lr 0.04042
Warmup Train [12][1760/3239]	Time 0.213 (0.234)	Data 0.001 (0.012)	Loss 5.0654 (5.1312)	Top-1 acc 12.500 (11.658)	Top-5 acc 28.516 (27.995)	lr 0.04041
Warmup Train [12][1770/3239]	Time 0.266 (0.234)	Data 0.001 (0.012)	Loss 5.0507 (5.1310)	Top-1 acc 13.672 (11.656)	Top-5 acc 31.250 (27.998)	lr 0.04041
Warmup Train [12][1780/3239]	Time 0.231 (0.234)	Data 0.001 (0.012)	Loss 5.1805 (5.1309)	Top-1 acc 11.328 (11.654)	Top-5 acc 26.953 (27.998)	lr 0.04040
Warmup Train [12][1790/3239]	Time 0.231 (0.234)	Data 0.001 (0.012)	Loss 5.1459 (5.1307)	Top-1 acc 10.547 (11.658)	Top-5 acc 28.516 (28.000)	lr 0.04040
Warmup Train [12][1800/3239]	Time 0.165 (0.234)	Data 0.002 (0.012)	Loss 5.0064 (5.1303)	Top-1 acc 11.328 (11.661)	Top-5 acc 28.125 (28.009)	lr 0.04039
Warmup Train [12][1810/3239]	Time 0.188 (0.234)	Data 0.001 (0.012)	Loss 5.1009 (5.1300)	Top-1 acc 11.719 (11.662)	Top-5 acc 29.297 (28.022)	lr 0.04039
Warmup Train [12][1820/3239]	Time 0.223 (0.234)	Data 0.002 (0.012)	Loss 4.9851 (5.1299)	Top-1 acc 14.062 (11.664)	Top-5 acc 31.641 (28.023)	lr 0.04038
Warmup Train [12][1830/3239]	Time 0.221 (0.234)	Data 0.001 (0.012)	Loss 5.1499 (5.1295)	Top-1 acc 12.109 (11.676)	Top-5 acc 33.203 (28.037)	lr 0.04038
Warmup Train [12][1840/3239]	Time 0.156 (0.234)	Data 0.001 (0.012)	Loss 5.1526 (5.1295)	Top-1 acc 8.594 (11.671)	Top-5 acc 26.172 (28.036)	lr 0.04037
Warmup Train [12][1850/3239]	Time 0.208 (0.234)	Data 0.001 (0.012)	Loss 5.0613 (5.1292)	Top-1 acc 10.938 (11.672)	Top-5 acc 30.469 (28.046)	lr 0.04037
Warmup Train [12][1860/3239]	Time 0.242 (0.234)	Data 0.001 (0.012)	Loss 5.1898 (5.1292)	Top-1 acc 10.938 (11.672)	Top-5 acc 25.781 (28.047)	lr 0.04036
Warmup Train [12][1870/3239]	Time 0.263 (0.234)	Data 0.001 (0.012)	Loss 5.0588 (5.1291)	Top-1 acc 13.672 (11.672)	Top-5 acc 29.297 (28.046)	lr 0.04036
Warmup Train [12][1880/3239]	Time 0.295 (0.234)	Data 0.001 (0.012)	Loss 5.0320 (5.1290)	Top-1 acc 12.891 (11.674)	Top-5 acc 30.078 (28.054)	lr 0.04035
Warmup Train [12][1890/3239]	Time 0.237 (0.234)	Data 0.001 (0.012)	Loss 5.1641 (5.1286)	Top-1 acc 8.203 (11.680)	Top-5 acc 26.172 (28.062)	lr 0.04035
Warmup Train [12][1900/3239]	Time 0.241 (0.234)	Data 0.001 (0.012)	Loss 5.0780 (5.1283)	Top-1 acc 10.547 (11.686)	Top-5 acc 27.734 (28.066)	lr 0.04034
Warmup Train [12][1910/3239]	Time 0.231 (0.234)	Data 0.002 (0.012)	Loss 5.0993 (5.1279)	Top-1 acc 10.938 (11.692)	Top-5 acc 30.078 (28.086)	lr 0.04034
Warmup Train [12][1920/3239]	Time 0.242 (0.234)	Data 0.002 (0.012)	Loss 5.1813 (5.1278)	Top-1 acc 11.328 (11.694)	Top-5 acc 25.391 (28.090)	lr 0.04033
Warmup Train [12][1930/3239]	Time 0.131 (0.233)	Data 0.001 (0.012)	Loss 5.2737 (5.1278)	Top-1 acc 10.547 (11.696)	Top-5 acc 25.391 (28.089)	lr 0.04033
Warmup Train [12][1940/3239]	Time 0.189 (0.233)	Data 0.001 (0.012)	Loss 5.1407 (5.1275)	Top-1 acc 12.109 (11.703)	Top-5 acc 27.734 (28.098)	lr 0.04032
Warmup Train [12][1950/3239]	Time 0.160 (0.233)	Data 0.001 (0.012)	Loss 5.1015 (5.1273)	Top-1 acc 13.672 (11.704)	Top-5 acc 32.422 (28.102)	lr 0.04032
Warmup Train [12][1960/3239]	Time 0.271 (0.233)	Data 0.001 (0.011)	Loss 5.1879 (5.1271)	Top-1 acc 14.062 (11.710)	Top-5 acc 25.391 (28.106)	lr 0.04032
Warmup Train [12][1970/3239]	Time 0.202 (0.233)	Data 0.001 (0.011)	Loss 5.0861 (5.1270)	Top-1 acc 13.281 (11.711)	Top-5 acc 31.641 (28.109)	lr 0.04031
Warmup Train [12][1980/3239]	Time 0.288 (0.233)	Data 0.002 (0.011)	Loss 5.0786 (5.1269)	Top-1 acc 14.453 (11.713)	Top-5 acc 28.906 (28.112)	lr 0.04031
Warmup Train [12][1990/3239]	Time 0.245 (0.233)	Data 0.001 (0.011)	Loss 5.1764 (5.1271)	Top-1 acc 10.938 (11.708)	Top-5 acc 25.000 (28.105)	lr 0.04030
Warmup Train [12][2000/3239]	Time 0.181 (0.233)	Data 0.001 (0.011)	Loss 4.9646 (5.1266)	Top-1 acc 13.672 (11.717)	Top-5 acc 32.812 (28.119)	lr 0.04030
Warmup Train [12][2010/3239]	Time 0.190 (0.233)	Data 0.001 (0.011)	Loss 5.1829 (5.1265)	Top-1 acc 10.938 (11.716)	Top-5 acc 26.172 (28.122)	lr 0.04029
Warmup Train [12][2020/3239]	Time 0.177 (0.233)	Data 0.001 (0.011)	Loss 5.2975 (5.1264)	Top-1 acc 6.641 (11.715)	Top-5 acc 20.312 (28.121)	lr 0.04029
Warmup Train [12][2030/3239]	Time 0.204 (0.233)	Data 0.002 (0.011)	Loss 4.9238 (5.1260)	Top-1 acc 13.672 (11.720)	Top-5 acc 32.812 (28.132)	lr 0.04028
Warmup Train [12][2040/3239]	Time 0.239 (0.233)	Data 0.001 (0.011)	Loss 5.1789 (5.1260)	Top-1 acc 9.375 (11.722)	Top-5 acc 27.344 (28.130)	lr 0.04028
Warmup Train [12][2050/3239]	Time 0.177 (0.233)	Data 0.001 (0.011)	Loss 5.0799 (5.1257)	Top-1 acc 10.547 (11.726)	Top-5 acc 26.953 (28.136)	lr 0.04027
Warmup Train [12][2060/3239]	Time 0.193 (0.233)	Data 0.001 (0.011)	Loss 5.0173 (5.1253)	Top-1 acc 11.328 (11.730)	Top-5 acc 29.688 (28.141)	lr 0.04027
Warmup Train [12][2070/3239]	Time 0.255 (0.233)	Data 0.001 (0.011)	Loss 4.9844 (5.1251)	Top-1 acc 14.062 (11.732)	Top-5 acc 32.812 (28.140)	lr 0.04026
Warmup Train [12][2080/3239]	Time 0.196 (0.233)	Data 0.001 (0.011)	Loss 4.8912 (5.1249)	Top-1 acc 16.797 (11.740)	Top-5 acc 34.375 (28.146)	lr 0.04026
Warmup Train [12][2090/3239]	Time 0.314 (0.233)	Data 0.001 (0.011)	Loss 4.9816 (5.1247)	Top-1 acc 15.234 (11.742)	Top-5 acc 29.297 (28.149)	lr 0.04025
Warmup Train [12][2100/3239]	Time 0.291 (0.233)	Data 0.002 (0.011)	Loss 5.1447 (5.1246)	Top-1 acc 9.375 (11.741)	Top-5 acc 25.391 (28.146)	lr 0.04025
Warmup Train [12][2110/3239]	Time 0.233 (0.233)	Data 0.001 (0.011)	Loss 5.2430 (5.1245)	Top-1 acc 11.719 (11.747)	Top-5 acc 26.562 (28.149)	lr 0.04024
Warmup Train [12][2120/3239]	Time 0.211 (0.233)	Data 0.001 (0.011)	Loss 4.9454 (5.1242)	Top-1 acc 14.453 (11.756)	Top-5 acc 33.984 (28.159)	lr 0.04024
Warmup Train [12][2130/3239]	Time 0.270 (0.233)	Data 0.001 (0.011)	Loss 5.1307 (5.1239)	Top-1 acc 10.938 (11.757)	Top-5 acc 28.906 (28.169)	lr 0.04023
Warmup Train [12][2140/3239]	Time 0.239 (0.233)	Data 0.001 (0.011)	Loss 5.0122 (5.1236)	Top-1 acc 13.672 (11.762)	Top-5 acc 28.125 (28.177)	lr 0.04023
Warmup Train [12][2150/3239]	Time 0.222 (0.232)	Data 0.001 (0.011)	Loss 5.1309 (5.1234)	Top-1 acc 13.672 (11.767)	Top-5 acc 30.078 (28.185)	lr 0.04022
Warmup Train [12][2160/3239]	Time 0.185 (0.232)	Data 0.002 (0.011)	Loss 5.0776 (5.1231)	Top-1 acc 12.500 (11.770)	Top-5 acc 28.906 (28.192)	lr 0.04022
Warmup Train [12][2170/3239]	Time 0.259 (0.232)	Data 0.001 (0.011)	Loss 4.9738 (5.1229)	Top-1 acc 13.281 (11.766)	Top-5 acc 32.422 (28.196)	lr 0.04021
Warmup Train [12][2180/3239]	Time 0.138 (0.232)	Data 0.001 (0.011)	Loss 5.1267 (5.1227)	Top-1 acc 9.766 (11.768)	Top-5 acc 26.953 (28.200)	lr 0.04021
Warmup Train [12][2190/3239]	Time 0.317 (0.232)	Data 0.001 (0.011)	Loss 5.0574 (5.1225)	Top-1 acc 10.547 (11.772)	Top-5 acc 26.953 (28.206)	lr 0.04020
Warmup Train [12][2200/3239]	Time 0.233 (0.232)	Data 0.002 (0.011)	Loss 5.2384 (5.1224)	Top-1 acc 10.156 (11.775)	Top-5 acc 26.172 (28.210)	lr 0.04020
Warmup Train [12][2210/3239]	Time 0.206 (0.232)	Data 0.001 (0.011)	Loss 5.1263 (5.1222)	Top-1 acc 9.375 (11.774)	Top-5 acc 28.125 (28.212)	lr 0.04019
Warmup Train [12][2220/3239]	Time 0.165 (0.232)	Data 0.001 (0.010)	Loss 5.2025 (5.1224)	Top-1 acc 12.891 (11.768)	Top-5 acc 28.516 (28.206)	lr 0.04019
Warmup Train [12][2230/3239]	Time 0.283 (0.232)	Data 0.001 (0.010)	Loss 5.0537 (5.1223)	Top-1 acc 12.109 (11.770)	Top-5 acc 31.641 (28.212)	lr 0.04019
Warmup Train [12][2240/3239]	Time 0.194 (0.232)	Data 0.001 (0.010)	Loss 5.0840 (5.1221)	Top-1 acc 14.062 (11.776)	Top-5 acc 29.688 (28.219)	lr 0.04018
Warmup Train [12][2250/3239]	Time 0.196 (0.232)	Data 0.001 (0.010)	Loss 5.0848 (5.1217)	Top-1 acc 10.547 (11.778)	Top-5 acc 28.125 (28.228)	lr 0.04018
Warmup Train [12][2260/3239]	Time 0.246 (0.232)	Data 0.001 (0.010)	Loss 5.0902 (5.1217)	Top-1 acc 13.281 (11.780)	Top-5 acc 29.688 (28.226)	lr 0.04017
Warmup Train [12][2270/3239]	Time 0.171 (0.232)	Data 0.002 (0.010)	Loss 5.0613 (5.1217)	Top-1 acc 12.500 (11.776)	Top-5 acc 25.781 (28.220)	lr 0.04017
Warmup Train [12][2280/3239]	Time 0.151 (0.232)	Data 0.001 (0.010)	Loss 5.2425 (5.1217)	Top-1 acc 9.766 (11.778)	Top-5 acc 22.656 (28.219)	lr 0.04016
Warmup Train [12][2290/3239]	Time 0.233 (0.232)	Data 0.002 (0.010)	Loss 4.9686 (5.1214)	Top-1 acc 14.844 (11.779)	Top-5 acc 33.984 (28.224)	lr 0.04016
Warmup Train [12][2300/3239]	Time 0.328 (0.232)	Data 0.001 (0.010)	Loss 5.2372 (5.1212)	Top-1 acc 10.156 (11.788)	Top-5 acc 25.781 (28.233)	lr 0.04015
Warmup Train [12][2310/3239]	Time 0.200 (0.232)	Data 0.001 (0.010)	Loss 5.1067 (5.1208)	Top-1 acc 11.719 (11.793)	Top-5 acc 28.906 (28.242)	lr 0.04015
Warmup Train [12][2320/3239]	Time 0.215 (0.232)	Data 0.001 (0.010)	Loss 4.9618 (5.1206)	Top-1 acc 16.797 (11.799)	Top-5 acc 32.812 (28.250)	lr 0.04014
Warmup Train [12][2330/3239]	Time 0.151 (0.232)	Data 0.001 (0.010)	Loss 4.9911 (5.1205)	Top-1 acc 14.062 (11.801)	Top-5 acc 28.516 (28.255)	lr 0.04014
Warmup Train [12][2340/3239]	Time 0.239 (0.232)	Data 0.001 (0.010)	Loss 5.1207 (5.1201)	Top-1 acc 8.594 (11.803)	Top-5 acc 25.391 (28.262)	lr 0.04013
Warmup Train [12][2350/3239]	Time 0.201 (0.232)	Data 0.001 (0.010)	Loss 5.2458 (5.1201)	Top-1 acc 12.109 (11.802)	Top-5 acc 30.859 (28.267)	lr 0.04013
Warmup Train [12][2360/3239]	Time 0.227 (0.232)	Data 0.001 (0.010)	Loss 4.9017 (5.1198)	Top-1 acc 14.062 (11.808)	Top-5 acc 32.812 (28.275)	lr 0.04012
Warmup Train [12][2370/3239]	Time 0.147 (0.232)	Data 0.001 (0.010)	Loss 5.2691 (5.1199)	Top-1 acc 9.375 (11.807)	Top-5 acc 28.906 (28.274)	lr 0.04012
Warmup Train [12][2380/3239]	Time 0.245 (0.232)	Data 0.001 (0.010)	Loss 5.1109 (5.1197)	Top-1 acc 10.547 (11.813)	Top-5 acc 27.734 (28.284)	lr 0.04011
Warmup Train [12][2390/3239]	Time 0.252 (0.232)	Data 0.001 (0.010)	Loss 5.1656 (5.1197)	Top-1 acc 12.109 (11.813)	Top-5 acc 27.344 (28.288)	lr 0.04011
Warmup Train [12][2400/3239]	Time 0.181 (0.232)	Data 0.001 (0.010)	Loss 5.0384 (5.1195)	Top-1 acc 11.719 (11.814)	Top-5 acc 28.906 (28.292)	lr 0.04010
Warmup Train [12][2410/3239]	Time 0.412 (0.232)	Data 0.001 (0.010)	Loss 5.1306 (5.1194)	Top-1 acc 11.719 (11.816)	Top-5 acc 26.953 (28.294)	lr 0.04010
Warmup Train [12][2420/3239]	Time 0.221 (0.232)	Data 0.001 (0.010)	Loss 5.0048 (5.1191)	Top-1 acc 14.062 (11.822)	Top-5 acc 30.078 (28.300)	lr 0.04009
Warmup Train [12][2430/3239]	Time 0.160 (0.231)	Data 0.001 (0.010)	Loss 5.1338 (5.1190)	Top-1 acc 13.281 (11.826)	Top-5 acc 29.688 (28.303)	lr 0.04009
Warmup Train [12][2440/3239]	Time 0.186 (0.231)	Data 0.001 (0.010)	Loss 5.0161 (5.1189)	Top-1 acc 14.844 (11.830)	Top-5 acc 32.812 (28.310)	lr 0.04008
Warmup Train [12][2450/3239]	Time 0.219 (0.231)	Data 0.001 (0.010)	Loss 4.9253 (5.1185)	Top-1 acc 17.188 (11.839)	Top-5 acc 32.812 (28.317)	lr 0.04008
Warmup Train [12][2460/3239]	Time 0.210 (0.231)	Data 0.001 (0.010)	Loss 5.1169 (5.1184)	Top-1 acc 10.547 (11.842)	Top-5 acc 22.656 (28.319)	lr 0.04007
Warmup Train [12][2470/3239]	Time 0.225 (0.231)	Data 0.001 (0.010)	Loss 5.1877 (5.1182)	Top-1 acc 11.719 (11.843)	Top-5 acc 27.734 (28.325)	lr 0.04007
Warmup Train [12][2480/3239]	Time 0.161 (0.231)	Data 0.001 (0.010)	Loss 5.0763 (5.1180)	Top-1 acc 11.719 (11.846)	Top-5 acc 28.906 (28.329)	lr 0.04006
Warmup Train [12][2490/3239]	Time 0.211 (0.231)	Data 0.002 (0.010)	Loss 5.0266 (5.1180)	Top-1 acc 14.844 (11.847)	Top-5 acc 30.859 (28.327)	lr 0.04006
Warmup Train [12][2500/3239]	Time 0.273 (0.231)	Data 0.001 (0.010)	Loss 5.0028 (5.1179)	Top-1 acc 11.719 (11.849)	Top-5 acc 30.078 (28.326)	lr 0.04006
Warmup Train [12][2510/3239]	Time 0.193 (0.231)	Data 0.001 (0.010)	Loss 5.0467 (5.1177)	Top-1 acc 13.672 (11.854)	Top-5 acc 33.984 (28.331)	lr 0.04005
Warmup Train [12][2520/3239]	Time 0.317 (0.231)	Data 0.001 (0.010)	Loss 5.0600 (5.1176)	Top-1 acc 10.938 (11.853)	Top-5 acc 33.984 (28.332)	lr 0.04005
Warmup Train [12][2530/3239]	Time 0.323 (0.231)	Data 0.001 (0.010)	Loss 5.1174 (5.1174)	Top-1 acc 14.062 (11.857)	Top-5 acc 31.250 (28.340)	lr 0.04004
Warmup Train [12][2540/3239]	Time 0.262 (0.231)	Data 0.001 (0.010)	Loss 5.1775 (5.1172)	Top-1 acc 10.547 (11.856)	Top-5 acc 25.391 (28.344)	lr 0.04004
Warmup Train [12][2550/3239]	Time 0.263 (0.231)	Data 0.001 (0.010)	Loss 4.9975 (5.1172)	Top-1 acc 15.234 (11.859)	Top-5 acc 30.859 (28.342)	lr 0.04003
Warmup Train [12][2560/3239]	Time 0.133 (0.231)	Data 0.002 (0.010)	Loss 5.2580 (5.1171)	Top-1 acc 9.375 (11.861)	Top-5 acc 24.609 (28.342)	lr 0.04003
Warmup Train [12][2570/3239]	Time 0.158 (0.231)	Data 0.001 (0.010)	Loss 5.2088 (5.1167)	Top-1 acc 11.328 (11.864)	Top-5 acc 23.828 (28.351)	lr 0.04002
Warmup Train [12][2580/3239]	Time 0.203 (0.231)	Data 0.001 (0.010)	Loss 4.8902 (5.1164)	Top-1 acc 15.234 (11.869)	Top-5 acc 35.547 (28.360)	lr 0.04002
Warmup Train [12][2590/3239]	Time 0.191 (0.231)	Data 0.001 (0.010)	Loss 5.1293 (5.1164)	Top-1 acc 15.625 (11.873)	Top-5 acc 30.469 (28.362)	lr 0.04001
Warmup Train [12][2600/3239]	Time 0.176 (0.231)	Data 0.001 (0.010)	Loss 4.8640 (5.1162)	Top-1 acc 17.969 (11.877)	Top-5 acc 37.500 (28.371)	lr 0.04001
Warmup Train [12][2610/3239]	Time 0.178 (0.231)	Data 0.001 (0.010)	Loss 5.0677 (5.1158)	Top-1 acc 14.453 (11.883)	Top-5 acc 32.031 (28.375)	lr 0.04000
Warmup Train [12][2620/3239]	Time 0.141 (0.231)	Data 0.001 (0.010)	Loss 5.1776 (5.1155)	Top-1 acc 14.062 (11.886)	Top-5 acc 27.344 (28.384)	lr 0.04000
Warmup Train [12][2630/3239]	Time 0.171 (0.231)	Data 0.001 (0.010)	Loss 5.1316 (5.1154)	Top-1 acc 12.500 (11.888)	Top-5 acc 31.641 (28.390)	lr 0.03999
Warmup Train [12][2640/3239]	Time 0.220 (0.231)	Data 0.001 (0.009)	Loss 5.2148 (5.1154)	Top-1 acc 12.109 (11.887)	Top-5 acc 26.953 (28.388)	lr 0.03999
Warmup Train [12][2650/3239]	Time 0.177 (0.231)	Data 0.001 (0.009)	Loss 5.0454 (5.1154)	Top-1 acc 11.719 (11.884)	Top-5 acc 30.469 (28.385)	lr 0.03998
Warmup Train [12][2660/3239]	Time 0.224 (0.231)	Data 0.002 (0.009)	Loss 4.9539 (5.1152)	Top-1 acc 13.672 (11.886)	Top-5 acc 31.250 (28.388)	lr 0.03998
Warmup Train [12][2670/3239]	Time 0.197 (0.231)	Data 0.001 (0.009)	Loss 4.9860 (5.1151)	Top-1 acc 13.672 (11.888)	Top-5 acc 32.031 (28.388)	lr 0.03997
Warmup Train [12][2680/3239]	Time 0.175 (0.231)	Data 0.002 (0.009)	Loss 5.1316 (5.1150)	Top-1 acc 8.984 (11.890)	Top-5 acc 30.859 (28.392)	lr 0.03997
Warmup Train [12][2690/3239]	Time 0.195 (0.231)	Data 0.001 (0.009)	Loss 4.8781 (5.1146)	Top-1 acc 16.016 (11.894)	Top-5 acc 35.938 (28.400)	lr 0.03996
Warmup Train [12][2700/3239]	Time 0.336 (0.231)	Data 0.001 (0.009)	Loss 5.0304 (5.1144)	Top-1 acc 11.328 (11.895)	Top-5 acc 30.078 (28.403)	lr 0.03996
Warmup Train [12][2710/3239]	Time 0.231 (0.231)	Data 0.001 (0.009)	Loss 4.8827 (5.1142)	Top-1 acc 16.016 (11.902)	Top-5 acc 34.375 (28.414)	lr 0.03995
Warmup Train [12][2720/3239]	Time 0.309 (0.231)	Data 0.001 (0.009)	Loss 5.0839 (5.1139)	Top-1 acc 13.672 (11.908)	Top-5 acc 31.641 (28.424)	lr 0.03995
Warmup Train [12][2730/3239]	Time 0.317 (0.231)	Data 0.001 (0.009)	Loss 5.0654 (5.1136)	Top-1 acc 11.719 (11.911)	Top-5 acc 30.469 (28.426)	lr 0.03994
Warmup Train [12][2740/3239]	Time 0.192 (0.231)	Data 0.002 (0.009)	Loss 5.1439 (5.1136)	Top-1 acc 12.109 (11.910)	Top-5 acc 26.562 (28.425)	lr 0.03994
Warmup Train [12][2750/3239]	Time 0.213 (0.231)	Data 0.002 (0.009)	Loss 4.9235 (5.1134)	Top-1 acc 16.406 (11.915)	Top-5 acc 31.250 (28.433)	lr 0.03993
Warmup Train [12][2760/3239]	Time 0.198 (0.231)	Data 0.001 (0.009)	Loss 4.9652 (5.1132)	Top-1 acc 11.719 (11.917)	Top-5 acc 29.688 (28.438)	lr 0.03993
Warmup Train [12][2770/3239]	Time 0.181 (0.231)	Data 0.002 (0.009)	Loss 4.9981 (5.1130)	Top-1 acc 10.156 (11.923)	Top-5 acc 32.812 (28.450)	lr 0.03992
Warmup Train [12][2780/3239]	Time 0.172 (0.231)	Data 0.002 (0.009)	Loss 5.1722 (5.1128)	Top-1 acc 10.938 (11.926)	Top-5 acc 25.781 (28.456)	lr 0.03992
Warmup Train [12][2790/3239]	Time 0.165 (0.230)	Data 0.001 (0.009)	Loss 5.2742 (5.1129)	Top-1 acc 8.203 (11.921)	Top-5 acc 25.781 (28.453)	lr 0.03991
Warmup Train [12][2800/3239]	Time 0.238 (0.231)	Data 0.001 (0.009)	Loss 5.0916 (5.1128)	Top-1 acc 14.453 (11.925)	Top-5 acc 31.250 (28.462)	lr 0.03991
Warmup Train [12][2810/3239]	Time 0.182 (0.231)	Data 0.001 (0.009)	Loss 5.2101 (5.1126)	Top-1 acc 11.719 (11.926)	Top-5 acc 28.516 (28.469)	lr 0.03990
Warmup Train [12][2820/3239]	Time 0.229 (0.230)	Data 0.001 (0.009)	Loss 5.0111 (5.1125)	Top-1 acc 14.453 (11.927)	Top-5 acc 32.422 (28.471)	lr 0.03990
Warmup Train [12][2830/3239]	Time 0.212 (0.230)	Data 0.002 (0.009)	Loss 5.0875 (5.1123)	Top-1 acc 12.500 (11.933)	Top-5 acc 29.688 (28.480)	lr 0.03989
Warmup Train [12][2840/3239]	Time 0.177 (0.230)	Data 0.001 (0.009)	Loss 5.1078 (5.1122)	Top-1 acc 10.547 (11.933)	Top-5 acc 29.688 (28.480)	lr 0.03989
Warmup Train [12][2850/3239]	Time 0.331 (0.230)	Data 0.001 (0.009)	Loss 4.9615 (5.1119)	Top-1 acc 16.016 (11.936)	Top-5 acc 34.766 (28.488)	lr 0.03989
Warmup Train [12][2860/3239]	Time 0.184 (0.230)	Data 0.001 (0.009)	Loss 5.1514 (5.1119)	Top-1 acc 10.938 (11.936)	Top-5 acc 28.516 (28.490)	lr 0.03988
Warmup Train [12][2870/3239]	Time 0.286 (0.230)	Data 0.001 (0.009)	Loss 4.9961 (5.1117)	Top-1 acc 10.938 (11.935)	Top-5 acc 26.953 (28.490)	lr 0.03988
Warmup Train [12][2880/3239]	Time 0.189 (0.230)	Data 0.001 (0.009)	Loss 5.1978 (5.1117)	Top-1 acc 11.719 (11.937)	Top-5 acc 29.297 (28.491)	lr 0.03987
Warmup Train [12][2890/3239]	Time 0.290 (0.230)	Data 0.001 (0.009)	Loss 5.0565 (5.1115)	Top-1 acc 8.594 (11.937)	Top-5 acc 28.516 (28.493)	lr 0.03987
Warmup Train [12][2900/3239]	Time 0.185 (0.230)	Data 0.001 (0.009)	Loss 5.1456 (5.1113)	Top-1 acc 10.938 (11.937)	Top-5 acc 28.516 (28.496)	lr 0.03986
Warmup Train [12][2910/3239]	Time 0.184 (0.230)	Data 0.001 (0.009)	Loss 5.2270 (5.1111)	Top-1 acc 9.375 (11.939)	Top-5 acc 25.391 (28.501)	lr 0.03986
Warmup Train [12][2920/3239]	Time 0.223 (0.230)	Data 0.001 (0.009)	Loss 4.9950 (5.1109)	Top-1 acc 13.281 (11.944)	Top-5 acc 30.078 (28.505)	lr 0.03985
Warmup Train [12][2930/3239]	Time 0.182 (0.230)	Data 0.001 (0.009)	Loss 4.8439 (5.1108)	Top-1 acc 16.406 (11.945)	Top-5 acc 31.641 (28.506)	lr 0.03985
Warmup Train [12][2940/3239]	Time 0.276 (0.230)	Data 0.001 (0.009)	Loss 5.0798 (5.1105)	Top-1 acc 10.547 (11.947)	Top-5 acc 30.469 (28.513)	lr 0.03984
Warmup Train [12][2950/3239]	Time 0.214 (0.230)	Data 0.001 (0.009)	Loss 5.2138 (5.1105)	Top-1 acc 11.719 (11.946)	Top-5 acc 24.219 (28.514)	lr 0.03984
Warmup Train [12][2960/3239]	Time 0.188 (0.230)	Data 0.001 (0.009)	Loss 4.8568 (5.1104)	Top-1 acc 11.719 (11.946)	Top-5 acc 37.500 (28.516)	lr 0.03983
Warmup Train [12][2970/3239]	Time 0.286 (0.230)	Data 0.002 (0.009)	Loss 5.1385 (5.1103)	Top-1 acc 14.062 (11.949)	Top-5 acc 29.297 (28.517)	lr 0.03983
Warmup Train [12][2980/3239]	Time 0.197 (0.230)	Data 0.001 (0.009)	Loss 5.3844 (5.1101)	Top-1 acc 8.203 (11.954)	Top-5 acc 19.922 (28.522)	lr 0.03982
Warmup Train [12][2990/3239]	Time 0.155 (0.230)	Data 0.001 (0.009)	Loss 5.2718 (5.1101)	Top-1 acc 9.766 (11.954)	Top-5 acc 26.172 (28.520)	lr 0.03982
Warmup Train [12][3000/3239]	Time 0.226 (0.230)	Data 0.001 (0.009)	Loss 5.0751 (5.1100)	Top-1 acc 10.156 (11.956)	Top-5 acc 25.000 (28.525)	lr 0.03981
Warmup Train [12][3010/3239]	Time 0.255 (0.230)	Data 0.002 (0.009)	Loss 5.0058 (5.1099)	Top-1 acc 14.062 (11.953)	Top-5 acc 29.297 (28.524)	lr 0.03981
Warmup Train [12][3020/3239]	Time 0.194 (0.230)	Data 0.001 (0.009)	Loss 5.0542 (5.1097)	Top-1 acc 11.328 (11.956)	Top-5 acc 31.641 (28.529)	lr 0.03980
Warmup Train [12][3030/3239]	Time 0.208 (0.230)	Data 0.001 (0.009)	Loss 5.0774 (5.1095)	Top-1 acc 13.281 (11.960)	Top-5 acc 27.734 (28.532)	lr 0.03980
Warmup Train [12][3040/3239]	Time 0.210 (0.230)	Data 0.001 (0.009)	Loss 5.0962 (5.1093)	Top-1 acc 12.500 (11.960)	Top-5 acc 28.125 (28.539)	lr 0.03979
Warmup Train [12][3050/3239]	Time 0.212 (0.230)	Data 0.001 (0.009)	Loss 5.0379 (5.1093)	Top-1 acc 13.672 (11.962)	Top-5 acc 32.031 (28.539)	lr 0.03979
Warmup Train [12][3060/3239]	Time 0.247 (0.230)	Data 0.001 (0.009)	Loss 5.0766 (5.1091)	Top-1 acc 13.281 (11.966)	Top-5 acc 32.031 (28.545)	lr 0.03978
Warmup Train [12][3070/3239]	Time 0.169 (0.230)	Data 0.001 (0.009)	Loss 5.2836 (5.1091)	Top-1 acc 8.984 (11.967)	Top-5 acc 22.656 (28.545)	lr 0.03978
Warmup Train [12][3080/3239]	Time 0.288 (0.230)	Data 0.001 (0.009)	Loss 5.0468 (5.1089)	Top-1 acc 11.719 (11.968)	Top-5 acc 31.250 (28.550)	lr 0.03977
Warmup Train [12][3090/3239]	Time 0.207 (0.230)	Data 0.002 (0.009)	Loss 5.0513 (5.1087)	Top-1 acc 12.109 (11.970)	Top-5 acc 29.297 (28.554)	lr 0.03977
Warmup Train [12][3100/3239]	Time 0.202 (0.230)	Data 0.001 (0.009)	Loss 5.0960 (5.1086)	Top-1 acc 10.938 (11.971)	Top-5 acc 26.172 (28.558)	lr 0.03976
Warmup Train [12][3110/3239]	Time 0.172 (0.230)	Data 0.001 (0.008)	Loss 5.1326 (5.1086)	Top-1 acc 14.062 (11.969)	Top-5 acc 30.859 (28.559)	lr 0.03976
Warmup Train [12][3120/3239]	Time 0.310 (0.230)	Data 0.001 (0.008)	Loss 4.8975 (5.1085)	Top-1 acc 17.188 (11.973)	Top-5 acc 34.766 (28.561)	lr 0.03975
Warmup Train [12][3130/3239]	Time 0.210 (0.230)	Data 0.002 (0.008)	Loss 5.0352 (5.1083)	Top-1 acc 14.844 (11.976)	Top-5 acc 33.984 (28.565)	lr 0.03975
Warmup Train [12][3140/3239]	Time 0.257 (0.230)	Data 0.001 (0.008)	Loss 4.9174 (5.1080)	Top-1 acc 15.625 (11.976)	Top-5 acc 36.328 (28.571)	lr 0.03974
Warmup Train [12][3150/3239]	Time 0.236 (0.230)	Data 0.001 (0.008)	Loss 5.0613 (5.1080)	Top-1 acc 13.672 (11.975)	Top-5 acc 27.344 (28.574)	lr 0.03974
Warmup Train [12][3160/3239]	Time 0.168 (0.230)	Data 0.001 (0.008)	Loss 4.9579 (5.1077)	Top-1 acc 12.500 (11.977)	Top-5 acc 34.766 (28.580)	lr 0.03973
Warmup Train [12][3170/3239]	Time 0.172 (0.230)	Data 0.001 (0.008)	Loss 5.0043 (5.1076)	Top-1 acc 9.375 (11.975)	Top-5 acc 32.031 (28.582)	lr 0.03973
Warmup Train [12][3180/3239]	Time 0.260 (0.230)	Data 0.000 (0.008)	Loss 5.2619 (5.1075)	Top-1 acc 10.156 (11.975)	Top-5 acc 23.828 (28.583)	lr 0.03972
Warmup Train [12][3190/3239]	Time 0.280 (0.230)	Data 0.000 (0.008)	Loss 5.1953 (5.1073)	Top-1 acc 14.453 (11.983)	Top-5 acc 30.469 (28.590)	lr 0.03972
Warmup Train [12][3200/3239]	Time 0.235 (0.230)	Data 0.000 (0.008)	Loss 5.0428 (5.1071)	Top-1 acc 13.672 (11.986)	Top-5 acc 32.422 (28.592)	lr 0.03971
Warmup Train [12][3210/3239]	Time 0.230 (0.230)	Data 0.000 (0.008)	Loss 5.0517 (5.1069)	Top-1 acc 12.500 (11.989)	Top-5 acc 30.078 (28.595)	lr 0.03971
Warmup Train [12][3220/3239]	Time 0.183 (0.230)	Data 0.000 (0.008)	Loss 5.0154 (5.1068)	Top-1 acc 12.109 (11.989)	Top-5 acc 29.297 (28.596)	lr 0.03970
Warmup Train [12][3230/3239]	Time 0.194 (0.229)	Data 0.000 (0.008)	Loss 5.0007 (5.1068)	Top-1 acc 13.672 (11.991)	Top-5 acc 29.297 (28.598)	lr 0.03970
Warmup Train [12][3239/3239]	Time 0.178 (0.229)	Data 0.000 (0.008)	Loss 5.0634 (5.1066)	Top-1 acc 12.346 (11.993)	Top-5 acc 24.691 (28.598)	lr 0.03970
==========Warmup Valid [12/40]	loss 4.268	top-1 acc 17.478	top-5 acc 37.685	Train top-1 11.993	top-5 28.598	flops: 442.4M
Warmup Train [13][0/3239]	Time 14.268 (14.268)	Data 12.811 (12.811)	Loss 4.9723 (4.9723)	Top-1 acc 13.672 (13.672)	Top-5 acc 30.078 (30.078)	lr 0.03969
Warmup Train [13][10/3239]	Time 0.302 (1.595)	Data 0.002 (1.170)	Loss 5.0053 (5.0483)	Top-1 acc 12.109 (12.536)	Top-5 acc 30.078 (29.830)	lr 0.03969
Warmup Train [13][20/3239]	Time 0.191 (0.948)	Data 0.002 (0.614)	Loss 5.2193 (5.0463)	Top-1 acc 8.203 (12.519)	Top-5 acc 25.000 (29.855)	lr 0.03968
Warmup Train [13][30/3239]	Time 0.215 (0.713)	Data 0.001 (0.416)	Loss 5.0435 (5.0659)	Top-1 acc 10.156 (12.475)	Top-5 acc 26.172 (29.020)	lr 0.03968
Warmup Train [13][40/3239]	Time 0.214 (0.593)	Data 0.001 (0.315)	Loss 5.0453 (5.0789)	Top-1 acc 9.766 (12.119)	Top-5 acc 26.562 (28.620)	lr 0.03968
Warmup Train [13][50/3239]	Time 0.204 (0.522)	Data 0.002 (0.254)	Loss 5.1201 (5.0683)	Top-1 acc 11.719 (12.354)	Top-5 acc 30.078 (28.845)	lr 0.03967
Warmup Train [13][60/3239]	Time 0.213 (0.475)	Data 0.002 (0.214)	Loss 5.0664 (5.0660)	Top-1 acc 11.328 (12.391)	Top-5 acc 26.172 (28.957)	lr 0.03967
Warmup Train [13][70/3239]	Time 0.168 (0.437)	Data 0.001 (0.184)	Loss 5.0212 (5.0586)	Top-1 acc 11.328 (12.478)	Top-5 acc 28.125 (29.104)	lr 0.03966
Warmup Train [13][80/3239]	Time 0.253 (0.411)	Data 0.001 (0.162)	Loss 4.9784 (5.0545)	Top-1 acc 11.719 (12.432)	Top-5 acc 32.422 (29.331)	lr 0.03966
Warmup Train [13][90/3239]	Time 0.267 (0.392)	Data 0.001 (0.145)	Loss 5.0636 (5.0573)	Top-1 acc 14.062 (12.427)	Top-5 acc 30.859 (29.426)	lr 0.03965
Warmup Train [13][100/3239]	Time 0.195 (0.375)	Data 0.001 (0.131)	Loss 5.1337 (5.0565)	Top-1 acc 13.281 (12.461)	Top-5 acc 28.125 (29.459)	lr 0.03965
Warmup Train [13][110/3239]	Time 0.188 (0.362)	Data 0.001 (0.119)	Loss 5.2303 (5.0536)	Top-1 acc 10.938 (12.549)	Top-5 acc 26.172 (29.603)	lr 0.03964
Warmup Train [13][120/3239]	Time 0.230 (0.350)	Data 0.001 (0.110)	Loss 5.0406 (5.0507)	Top-1 acc 11.719 (12.555)	Top-5 acc 30.859 (29.681)	lr 0.03964
Warmup Train [13][130/3239]	Time 0.140 (0.340)	Data 0.001 (0.102)	Loss 4.9755 (5.0520)	Top-1 acc 15.625 (12.586)	Top-5 acc 34.766 (29.732)	lr 0.03963
Warmup Train [13][140/3239]	Time 0.197 (0.332)	Data 0.002 (0.095)	Loss 4.9362 (5.0489)	Top-1 acc 10.156 (12.641)	Top-5 acc 32.812 (29.754)	lr 0.03963
Warmup Train [13][150/3239]	Time 0.200 (0.325)	Data 0.002 (0.089)	Loss 5.0484 (5.0481)	Top-1 acc 14.844 (12.678)	Top-5 acc 29.688 (29.778)	lr 0.03962
Warmup Train [13][160/3239]	Time 0.206 (0.319)	Data 0.002 (0.084)	Loss 4.9873 (5.0456)	Top-1 acc 12.891 (12.747)	Top-5 acc 29.688 (29.850)	lr 0.03962
Warmup Train [13][170/3239]	Time 0.192 (0.313)	Data 0.001 (0.079)	Loss 5.2346 (5.0467)	Top-1 acc 15.234 (12.740)	Top-5 acc 26.562 (29.829)	lr 0.03961
Warmup Train [13][180/3239]	Time 0.219 (0.309)	Data 0.001 (0.075)	Loss 5.1044 (5.0486)	Top-1 acc 14.453 (12.768)	Top-5 acc 31.250 (29.864)	lr 0.03961
Warmup Train [13][190/3239]	Time 0.333 (0.305)	Data 0.002 (0.071)	Loss 4.9031 (5.0473)	Top-1 acc 15.234 (12.770)	Top-5 acc 29.297 (29.929)	lr 0.03960
Warmup Train [13][200/3239]	Time 0.156 (0.301)	Data 0.001 (0.068)	Loss 5.1050 (5.0482)	Top-1 acc 12.891 (12.782)	Top-5 acc 26.172 (29.925)	lr 0.03960
Warmup Train [13][210/3239]	Time 0.240 (0.297)	Data 0.002 (0.065)	Loss 5.0166 (5.0469)	Top-1 acc 13.672 (12.789)	Top-5 acc 30.469 (29.934)	lr 0.03959
Warmup Train [13][220/3239]	Time 0.201 (0.294)	Data 0.002 (0.062)	Loss 4.8109 (5.0470)	Top-1 acc 16.797 (12.845)	Top-5 acc 33.203 (29.931)	lr 0.03959
Warmup Train [13][230/3239]	Time 0.135 (0.290)	Data 0.001 (0.060)	Loss 5.0762 (5.0476)	Top-1 acc 10.938 (12.843)	Top-5 acc 29.688 (29.904)	lr 0.03958
Warmup Train [13][240/3239]	Time 0.166 (0.288)	Data 0.002 (0.058)	Loss 4.9944 (5.0484)	Top-1 acc 14.062 (12.847)	Top-5 acc 33.203 (29.934)	lr 0.03958
Warmup Train [13][250/3239]	Time 0.177 (0.285)	Data 0.001 (0.055)	Loss 5.1217 (5.0481)	Top-1 acc 12.109 (12.824)	Top-5 acc 27.344 (29.932)	lr 0.03957
Warmup Train [13][260/3239]	Time 0.199 (0.283)	Data 0.001 (0.053)	Loss 4.8740 (5.0483)	Top-1 acc 15.234 (12.819)	Top-5 acc 34.766 (29.927)	lr 0.03957
Warmup Train [13][270/3239]	Time 0.264 (0.281)	Data 0.001 (0.052)	Loss 5.1076 (5.0485)	Top-1 acc 9.766 (12.830)	Top-5 acc 26.953 (29.908)	lr 0.03956
Warmup Train [13][280/3239]	Time 0.212 (0.278)	Data 0.001 (0.050)	Loss 5.1121 (5.0495)	Top-1 acc 13.672 (12.827)	Top-5 acc 30.469 (29.882)	lr 0.03956
Warmup Train [13][290/3239]	Time 0.282 (0.277)	Data 0.001 (0.048)	Loss 5.3049 (5.0499)	Top-1 acc 11.719 (12.810)	Top-5 acc 28.906 (29.893)	lr 0.03955
Warmup Train [13][300/3239]	Time 0.283 (0.275)	Data 0.001 (0.047)	Loss 4.9798 (5.0512)	Top-1 acc 14.844 (12.797)	Top-5 acc 30.469 (29.863)	lr 0.03955
Warmup Train [13][310/3239]	Time 0.180 (0.274)	Data 0.001 (0.045)	Loss 4.9329 (5.0494)	Top-1 acc 11.719 (12.810)	Top-5 acc 32.422 (29.900)	lr 0.03954
Warmup Train [13][320/3239]	Time 0.217 (0.272)	Data 0.001 (0.044)	Loss 4.8546 (5.0465)	Top-1 acc 15.625 (12.837)	Top-5 acc 36.328 (29.991)	lr 0.03954
Warmup Train [13][330/3239]	Time 0.193 (0.271)	Data 0.001 (0.043)	Loss 5.0261 (5.0460)	Top-1 acc 10.156 (12.829)	Top-5 acc 25.000 (29.961)	lr 0.03953
Warmup Train [13][340/3239]	Time 0.205 (0.269)	Data 0.001 (0.042)	Loss 5.0620 (5.0456)	Top-1 acc 11.328 (12.816)	Top-5 acc 29.688 (29.936)	lr 0.03953
Warmup Train [13][350/3239]	Time 0.221 (0.268)	Data 0.001 (0.040)	Loss 5.0494 (5.0444)	Top-1 acc 14.453 (12.841)	Top-5 acc 28.516 (29.966)	lr 0.03952
Warmup Train [13][360/3239]	Time 0.165 (0.266)	Data 0.001 (0.039)	Loss 5.0343 (5.0440)	Top-1 acc 11.328 (12.835)	Top-5 acc 26.953 (29.968)	lr 0.03952
Warmup Train [13][370/3239]	Time 0.160 (0.265)	Data 0.001 (0.038)	Loss 4.9756 (5.0433)	Top-1 acc 14.844 (12.851)	Top-5 acc 33.203 (29.983)	lr 0.03951
Warmup Train [13][380/3239]	Time 0.223 (0.263)	Data 0.001 (0.038)	Loss 5.1860 (5.0433)	Top-1 acc 9.766 (12.827)	Top-5 acc 27.344 (30.016)	lr 0.03951
Warmup Train [13][390/3239]	Time 0.164 (0.262)	Data 0.001 (0.037)	Loss 4.9848 (5.0433)	Top-1 acc 12.891 (12.807)	Top-5 acc 34.375 (30.028)	lr 0.03950
Warmup Train [13][400/3239]	Time 0.250 (0.261)	Data 0.001 (0.036)	Loss 5.2181 (5.0443)	Top-1 acc 8.984 (12.789)	Top-5 acc 26.172 (29.985)	lr 0.03950
Warmup Train [13][410/3239]	Time 0.136 (0.260)	Data 0.001 (0.035)	Loss 5.0803 (5.0454)	Top-1 acc 10.938 (12.776)	Top-5 acc 28.906 (29.975)	lr 0.03949
Warmup Train [13][420/3239]	Time 0.263 (0.260)	Data 0.002 (0.035)	Loss 5.0234 (5.0455)	Top-1 acc 8.984 (12.764)	Top-5 acc 28.125 (29.956)	lr 0.03949
Warmup Train [13][430/3239]	Time 0.237 (0.259)	Data 0.001 (0.034)	Loss 5.1950 (5.0461)	Top-1 acc 8.984 (12.766)	Top-5 acc 25.781 (29.955)	lr 0.03948
Warmup Train [13][440/3239]	Time 0.224 (0.258)	Data 0.001 (0.033)	Loss 4.8368 (5.0448)	Top-1 acc 12.891 (12.768)	Top-5 acc 38.281 (29.979)	lr 0.03948
Warmup Train [13][450/3239]	Time 0.237 (0.257)	Data 0.001 (0.032)	Loss 5.1119 (5.0446)	Top-1 acc 12.891 (12.780)	Top-5 acc 32.812 (30.008)	lr 0.03947
Warmup Train [13][460/3239]	Time 0.195 (0.256)	Data 0.001 (0.032)	Loss 5.1270 (5.0441)	Top-1 acc 10.547 (12.792)	Top-5 acc 29.688 (30.030)	lr 0.03947
Warmup Train [13][470/3239]	Time 0.188 (0.256)	Data 0.001 (0.031)	Loss 5.1271 (5.0440)	Top-1 acc 9.766 (12.814)	Top-5 acc 28.125 (30.017)	lr 0.03946
Warmup Train [13][480/3239]	Time 0.270 (0.255)	Data 0.001 (0.031)	Loss 5.1583 (5.0433)	Top-1 acc 11.328 (12.823)	Top-5 acc 26.562 (30.037)	lr 0.03946
Warmup Train [13][490/3239]	Time 0.254 (0.255)	Data 0.002 (0.030)	Loss 4.9486 (5.0428)	Top-1 acc 12.109 (12.815)	Top-5 acc 31.250 (30.037)	lr 0.03945
Warmup Train [13][500/3239]	Time 0.326 (0.254)	Data 0.002 (0.029)	Loss 4.9962 (5.0433)	Top-1 acc 12.109 (12.818)	Top-5 acc 32.422 (30.027)	lr 0.03945
Warmup Train [13][510/3239]	Time 0.322 (0.254)	Data 0.001 (0.029)	Loss 4.8800 (5.0436)	Top-1 acc 14.453 (12.825)	Top-5 acc 33.594 (30.038)	lr 0.03944
Warmup Train [13][520/3239]	Time 0.164 (0.253)	Data 0.001 (0.028)	Loss 5.0155 (5.0440)	Top-1 acc 11.719 (12.815)	Top-5 acc 34.375 (30.036)	lr 0.03944
Warmup Train [13][530/3239]	Time 0.229 (0.253)	Data 0.001 (0.028)	Loss 5.0393 (5.0443)	Top-1 acc 11.328 (12.810)	Top-5 acc 32.031 (30.033)	lr 0.03943
Warmup Train [13][540/3239]	Time 0.227 (0.252)	Data 0.001 (0.027)	Loss 4.9775 (5.0452)	Top-1 acc 15.234 (12.802)	Top-5 acc 30.859 (30.014)	lr 0.03943
Warmup Train [13][550/3239]	Time 0.235 (0.252)	Data 0.001 (0.027)	Loss 4.9527 (5.0446)	Top-1 acc 15.625 (12.815)	Top-5 acc 29.688 (30.038)	lr 0.03942
Warmup Train [13][560/3239]	Time 0.239 (0.251)	Data 0.001 (0.027)	Loss 4.8524 (5.0442)	Top-1 acc 14.844 (12.822)	Top-5 acc 36.328 (30.071)	lr 0.03942
Warmup Train [13][570/3239]	Time 0.237 (0.251)	Data 0.001 (0.026)	Loss 5.0798 (5.0443)	Top-1 acc 12.109 (12.826)	Top-5 acc 31.641 (30.074)	lr 0.03941
Warmup Train [13][580/3239]	Time 0.235 (0.250)	Data 0.001 (0.026)	Loss 4.9396 (5.0435)	Top-1 acc 15.625 (12.832)	Top-5 acc 35.547 (30.069)	lr 0.03941
Warmup Train [13][590/3239]	Time 0.234 (0.250)	Data 0.002 (0.025)	Loss 5.0314 (5.0425)	Top-1 acc 16.016 (12.859)	Top-5 acc 32.422 (30.112)	lr 0.03940
Warmup Train [13][600/3239]	Time 0.306 (0.250)	Data 0.001 (0.025)	Loss 5.1624 (5.0433)	Top-1 acc 10.938 (12.849)	Top-5 acc 23.828 (30.089)	lr 0.03940
Warmup Train [13][610/3239]	Time 0.254 (0.249)	Data 0.001 (0.025)	Loss 5.1134 (5.0440)	Top-1 acc 13.672 (12.848)	Top-5 acc 28.125 (30.074)	lr 0.03939
Warmup Train [13][620/3239]	Time 0.246 (0.249)	Data 0.001 (0.024)	Loss 5.0106 (5.0439)	Top-1 acc 12.891 (12.845)	Top-5 acc 30.859 (30.060)	lr 0.03939
Warmup Train [13][630/3239]	Time 0.245 (0.249)	Data 0.001 (0.024)	Loss 4.9563 (5.0434)	Top-1 acc 15.625 (12.855)	Top-5 acc 32.812 (30.072)	lr 0.03938
Warmup Train [13][640/3239]	Time 0.270 (0.248)	Data 0.001 (0.023)	Loss 5.1030 (5.0438)	Top-1 acc 15.234 (12.857)	Top-5 acc 27.734 (30.061)	lr 0.03938
Warmup Train [13][650/3239]	Time 0.250 (0.248)	Data 0.002 (0.023)	Loss 5.2719 (5.0441)	Top-1 acc 8.594 (12.861)	Top-5 acc 23.828 (30.055)	lr 0.03937
Warmup Train [13][660/3239]	Time 0.219 (0.247)	Data 0.002 (0.023)	Loss 5.0143 (5.0432)	Top-1 acc 16.406 (12.876)	Top-5 acc 32.422 (30.073)	lr 0.03937
Warmup Train [13][670/3239]	Time 0.307 (0.247)	Data 0.001 (0.022)	Loss 4.9551 (5.0433)	Top-1 acc 14.453 (12.886)	Top-5 acc 28.906 (30.066)	lr 0.03936
Warmup Train [13][680/3239]	Time 0.254 (0.247)	Data 0.001 (0.022)	Loss 5.1820 (5.0438)	Top-1 acc 7.812 (12.889)	Top-5 acc 26.172 (30.046)	lr 0.03936
Warmup Train [13][690/3239]	Time 0.235 (0.247)	Data 0.002 (0.022)	Loss 5.0202 (5.0435)	Top-1 acc 12.109 (12.889)	Top-5 acc 30.078 (30.050)	lr 0.03935
Warmup Train [13][700/3239]	Time 0.326 (0.246)	Data 0.002 (0.022)	Loss 5.0337 (5.0432)	Top-1 acc 9.766 (12.872)	Top-5 acc 32.031 (30.050)	lr 0.03935
Warmup Train [13][710/3239]	Time 0.198 (0.246)	Data 0.001 (0.021)	Loss 4.9453 (5.0425)	Top-1 acc 14.844 (12.890)	Top-5 acc 32.422 (30.070)	lr 0.03934
Warmup Train [13][720/3239]	Time 0.193 (0.246)	Data 0.001 (0.021)	Loss 5.0445 (5.0419)	Top-1 acc 13.281 (12.891)	Top-5 acc 29.688 (30.073)	lr 0.03934
Warmup Train [13][730/3239]	Time 0.209 (0.245)	Data 0.001 (0.021)	Loss 5.0419 (5.0430)	Top-1 acc 13.672 (12.868)	Top-5 acc 32.812 (30.052)	lr 0.03933
Warmup Train [13][740/3239]	Time 0.223 (0.245)	Data 0.001 (0.021)	Loss 5.1081 (5.0433)	Top-1 acc 10.156 (12.857)	Top-5 acc 30.078 (30.045)	lr 0.03933
Warmup Train [13][750/3239]	Time 0.208 (0.244)	Data 0.001 (0.020)	Loss 5.0016 (5.0430)	Top-1 acc 12.891 (12.852)	Top-5 acc 30.859 (30.048)	lr 0.03932
Warmup Train [13][760/3239]	Time 0.214 (0.244)	Data 0.001 (0.020)	Loss 4.9054 (5.0426)	Top-1 acc 12.891 (12.861)	Top-5 acc 31.250 (30.063)	lr 0.03932
Warmup Train [13][770/3239]	Time 0.179 (0.244)	Data 0.001 (0.020)	Loss 5.1811 (5.0436)	Top-1 acc 9.766 (12.843)	Top-5 acc 25.781 (30.041)	lr 0.03931
Warmup Train [13][780/3239]	Time 0.215 (0.244)	Data 0.001 (0.020)	Loss 5.0423 (5.0436)	Top-1 acc 15.234 (12.843)	Top-5 acc 28.906 (30.026)	lr 0.03931
Warmup Train [13][790/3239]	Time 0.249 (0.243)	Data 0.001 (0.019)	Loss 4.8988 (5.0429)	Top-1 acc 17.188 (12.861)	Top-5 acc 34.375 (30.039)	lr 0.03930
Warmup Train [13][800/3239]	Time 0.283 (0.243)	Data 0.001 (0.019)	Loss 4.9801 (5.0430)	Top-1 acc 13.672 (12.858)	Top-5 acc 31.250 (30.039)	lr 0.03930
Warmup Train [13][810/3239]	Time 0.175 (0.243)	Data 0.002 (0.019)	Loss 5.0195 (5.0428)	Top-1 acc 14.844 (12.867)	Top-5 acc 28.125 (30.045)	lr 0.03929
Warmup Train [13][820/3239]	Time 0.181 (0.243)	Data 0.001 (0.019)	Loss 5.0873 (5.0425)	Top-1 acc 10.156 (12.883)	Top-5 acc 28.125 (30.047)	lr 0.03929
Warmup Train [13][830/3239]	Time 0.123 (0.242)	Data 0.001 (0.019)	Loss 5.1920 (5.0430)	Top-1 acc 9.766 (12.884)	Top-5 acc 26.172 (30.039)	lr 0.03928
Warmup Train [13][840/3239]	Time 0.195 (0.242)	Data 0.001 (0.019)	Loss 4.9064 (5.0434)	Top-1 acc 17.188 (12.892)	Top-5 acc 34.766 (30.029)	lr 0.03928
Warmup Train [13][850/3239]	Time 0.212 (0.242)	Data 0.001 (0.018)	Loss 4.8892 (5.0425)	Top-1 acc 13.281 (12.901)	Top-5 acc 30.469 (30.050)	lr 0.03927
Warmup Train [13][860/3239]	Time 0.205 (0.242)	Data 0.001 (0.018)	Loss 4.8504 (5.0422)	Top-1 acc 12.891 (12.894)	Top-5 acc 32.812 (30.054)	lr 0.03927
Warmup Train [13][870/3239]	Time 0.212 (0.242)	Data 0.001 (0.018)	Loss 5.0679 (5.0418)	Top-1 acc 11.719 (12.897)	Top-5 acc 31.641 (30.065)	lr 0.03926
Warmup Train [13][880/3239]	Time 0.230 (0.241)	Data 0.001 (0.018)	Loss 5.0680 (5.0413)	Top-1 acc 12.109 (12.891)	Top-5 acc 28.516 (30.072)	lr 0.03926
Warmup Train [13][890/3239]	Time 0.262 (0.241)	Data 0.001 (0.018)	Loss 5.0932 (5.0413)	Top-1 acc 12.891 (12.888)	Top-5 acc 29.688 (30.078)	lr 0.03925
Warmup Train [13][900/3239]	Time 0.167 (0.241)	Data 0.002 (0.017)	Loss 5.1591 (5.0420)	Top-1 acc 14.844 (12.886)	Top-5 acc 33.203 (30.075)	lr 0.03925
Warmup Train [13][910/3239]	Time 0.131 (0.241)	Data 0.002 (0.017)	Loss 5.1784 (5.0422)	Top-1 acc 17.188 (12.898)	Top-5 acc 30.469 (30.083)	lr 0.03924
Warmup Train [13][920/3239]	Time 0.249 (0.240)	Data 0.001 (0.017)	Loss 4.9739 (5.0420)	Top-1 acc 10.938 (12.902)	Top-5 acc 31.641 (30.097)	lr 0.03924
Warmup Train [13][930/3239]	Time 0.217 (0.240)	Data 0.001 (0.017)	Loss 5.0689 (5.0416)	Top-1 acc 11.719 (12.901)	Top-5 acc 27.344 (30.092)	lr 0.03923
Warmup Train [13][940/3239]	Time 0.178 (0.240)	Data 0.001 (0.017)	Loss 4.8499 (5.0415)	Top-1 acc 15.234 (12.903)	Top-5 acc 31.641 (30.098)	lr 0.03923
Warmup Train [13][950/3239]	Time 0.253 (0.240)	Data 0.001 (0.017)	Loss 4.9684 (5.0417)	Top-1 acc 16.797 (12.910)	Top-5 acc 32.422 (30.108)	lr 0.03923
Warmup Train [13][960/3239]	Time 0.219 (0.239)	Data 0.001 (0.017)	Loss 5.0696 (5.0417)	Top-1 acc 13.281 (12.899)	Top-5 acc 30.469 (30.107)	lr 0.03922
Warmup Train [13][970/3239]	Time 0.212 (0.239)	Data 0.001 (0.016)	Loss 4.8610 (5.0413)	Top-1 acc 15.234 (12.900)	Top-5 acc 33.594 (30.105)	lr 0.03922
Warmup Train [13][980/3239]	Time 0.125 (0.239)	Data 0.001 (0.016)	Loss 5.1510 (5.0409)	Top-1 acc 10.156 (12.897)	Top-5 acc 26.953 (30.110)	lr 0.03921
Warmup Train [13][990/3239]	Time 0.197 (0.239)	Data 0.001 (0.016)	Loss 5.1908 (5.0409)	Top-1 acc 14.062 (12.898)	Top-5 acc 27.734 (30.123)	lr 0.03921
Warmup Train [13][1000/3239]	Time 0.199 (0.239)	Data 0.001 (0.016)	Loss 5.1576 (5.0406)	Top-1 acc 11.328 (12.910)	Top-5 acc 27.734 (30.133)	lr 0.03920
Warmup Train [13][1010/3239]	Time 0.255 (0.239)	Data 0.001 (0.016)	Loss 4.9381 (5.0400)	Top-1 acc 16.016 (12.927)	Top-5 acc 36.328 (30.155)	lr 0.03920
Warmup Train [13][1020/3239]	Time 0.355 (0.238)	Data 0.001 (0.016)	Loss 5.0583 (5.0402)	Top-1 acc 10.547 (12.926)	Top-5 acc 27.344 (30.157)	lr 0.03919
Warmup Train [13][1030/3239]	Time 0.198 (0.238)	Data 0.002 (0.016)	Loss 4.9851 (5.0401)	Top-1 acc 13.281 (12.924)	Top-5 acc 28.516 (30.158)	lr 0.03919
Warmup Train [13][1040/3239]	Time 0.254 (0.238)	Data 0.001 (0.016)	Loss 4.9414 (5.0400)	Top-1 acc 17.188 (12.927)	Top-5 acc 33.984 (30.167)	lr 0.03918
Warmup Train [13][1050/3239]	Time 0.209 (0.238)	Data 0.001 (0.015)	Loss 5.0635 (5.0401)	Top-1 acc 11.719 (12.923)	Top-5 acc 25.781 (30.164)	lr 0.03918
Warmup Train [13][1060/3239]	Time 0.256 (0.238)	Data 0.001 (0.015)	Loss 4.9561 (5.0397)	Top-1 acc 12.109 (12.931)	Top-5 acc 30.469 (30.169)	lr 0.03917
Warmup Train [13][1070/3239]	Time 0.207 (0.237)	Data 0.001 (0.015)	Loss 4.9746 (5.0395)	Top-1 acc 11.328 (12.928)	Top-5 acc 30.859 (30.169)	lr 0.03917
Warmup Train [13][1080/3239]	Time 0.201 (0.237)	Data 0.001 (0.015)	Loss 5.1215 (5.0397)	Top-1 acc 8.594 (12.924)	Top-5 acc 30.078 (30.170)	lr 0.03916
Warmup Train [13][1090/3239]	Time 0.216 (0.237)	Data 0.002 (0.015)	Loss 5.0666 (5.0396)	Top-1 acc 12.500 (12.924)	Top-5 acc 29.688 (30.173)	lr 0.03916
Warmup Train [13][1100/3239]	Time 0.193 (0.237)	Data 0.001 (0.015)	Loss 5.2579 (5.0391)	Top-1 acc 8.984 (12.925)	Top-5 acc 25.000 (30.185)	lr 0.03915
Warmup Train [13][1110/3239]	Time 0.195 (0.237)	Data 0.001 (0.015)	Loss 5.1758 (5.0390)	Top-1 acc 9.766 (12.928)	Top-5 acc 29.297 (30.185)	lr 0.03915
Warmup Train [13][1120/3239]	Time 0.277 (0.237)	Data 0.001 (0.015)	Loss 5.0409 (5.0390)	Top-1 acc 13.281 (12.923)	Top-5 acc 30.469 (30.182)	lr 0.03914
Warmup Train [13][1130/3239]	Time 0.262 (0.237)	Data 0.001 (0.015)	Loss 5.1202 (5.0388)	Top-1 acc 14.062 (12.933)	Top-5 acc 28.516 (30.197)	lr 0.03914
Warmup Train [13][1140/3239]	Time 0.194 (0.236)	Data 0.001 (0.014)	Loss 4.9910 (5.0389)	Top-1 acc 13.672 (12.933)	Top-5 acc 33.203 (30.203)	lr 0.03913
Warmup Train [13][1150/3239]	Time 0.265 (0.236)	Data 0.001 (0.014)	Loss 5.0089 (5.0388)	Top-1 acc 13.672 (12.938)	Top-5 acc 31.641 (30.204)	lr 0.03913
Warmup Train [13][1160/3239]	Time 0.213 (0.236)	Data 0.001 (0.014)	Loss 4.8682 (5.0386)	Top-1 acc 14.844 (12.943)	Top-5 acc 33.203 (30.211)	lr 0.03912
Warmup Train [13][1170/3239]	Time 0.297 (0.236)	Data 0.001 (0.014)	Loss 5.0537 (5.0384)	Top-1 acc 10.938 (12.946)	Top-5 acc 30.859 (30.211)	lr 0.03912
Warmup Train [13][1180/3239]	Time 0.209 (0.236)	Data 0.001 (0.014)	Loss 4.9345 (5.0375)	Top-1 acc 10.938 (12.951)	Top-5 acc 31.641 (30.230)	lr 0.03911
Warmup Train [13][1190/3239]	Time 0.280 (0.236)	Data 0.001 (0.014)	Loss 5.0973 (5.0374)	Top-1 acc 14.844 (12.955)	Top-5 acc 29.297 (30.231)	lr 0.03911
Warmup Train [13][1200/3239]	Time 0.205 (0.236)	Data 0.001 (0.014)	Loss 5.1166 (5.0370)	Top-1 acc 13.281 (12.954)	Top-5 acc 32.422 (30.245)	lr 0.03910
Warmup Train [13][1210/3239]	Time 0.203 (0.235)	Data 0.001 (0.014)	Loss 4.8013 (5.0369)	Top-1 acc 15.625 (12.955)	Top-5 acc 32.031 (30.239)	lr 0.03910
Warmup Train [13][1220/3239]	Time 0.241 (0.235)	Data 0.001 (0.014)	Loss 5.0757 (5.0367)	Top-1 acc 13.281 (12.965)	Top-5 acc 30.469 (30.255)	lr 0.03909
Warmup Train [13][1230/3239]	Time 0.214 (0.235)	Data 0.002 (0.014)	Loss 5.0379 (5.0367)	Top-1 acc 16.406 (12.966)	Top-5 acc 29.688 (30.255)	lr 0.03909
Warmup Train [13][1240/3239]	Time 0.266 (0.235)	Data 0.001 (0.013)	Loss 4.9861 (5.0365)	Top-1 acc 16.406 (12.969)	Top-5 acc 32.422 (30.259)	lr 0.03908
Warmup Train [13][1250/3239]	Time 0.231 (0.235)	Data 0.001 (0.013)	Loss 5.0327 (5.0368)	Top-1 acc 11.328 (12.961)	Top-5 acc 28.125 (30.252)	lr 0.03908
Warmup Train [13][1260/3239]	Time 0.215 (0.235)	Data 0.001 (0.013)	Loss 5.0694 (5.0367)	Top-1 acc 10.156 (12.961)	Top-5 acc 28.516 (30.258)	lr 0.03907
Warmup Train [13][1270/3239]	Time 0.176 (0.235)	Data 0.001 (0.013)	Loss 5.1091 (5.0365)	Top-1 acc 12.891 (12.972)	Top-5 acc 27.734 (30.267)	lr 0.03907
Warmup Train [13][1280/3239]	Time 0.210 (0.235)	Data 0.001 (0.013)	Loss 4.9851 (5.0365)	Top-1 acc 13.281 (12.975)	Top-5 acc 33.594 (30.272)	lr 0.03906
Warmup Train [13][1290/3239]	Time 0.245 (0.235)	Data 0.001 (0.013)	Loss 5.0489 (5.0368)	Top-1 acc 13.672 (12.974)	Top-5 acc 31.641 (30.266)	lr 0.03906
Warmup Train [13][1300/3239]	Time 0.260 (0.235)	Data 0.001 (0.013)	Loss 5.0084 (5.0367)	Top-1 acc 16.406 (12.979)	Top-5 acc 32.422 (30.269)	lr 0.03905
Warmup Train [13][1310/3239]	Time 0.226 (0.235)	Data 0.004 (0.013)	Loss 4.9688 (5.0368)	Top-1 acc 14.062 (12.976)	Top-5 acc 32.031 (30.267)	lr 0.03905
Warmup Train [13][1320/3239]	Time 0.148 (0.235)	Data 0.001 (0.013)	Loss 4.9385 (5.0367)	Top-1 acc 12.891 (12.977)	Top-5 acc 32.422 (30.270)	lr 0.03904
Warmup Train [13][1330/3239]	Time 0.290 (0.235)	Data 0.001 (0.013)	Loss 5.1234 (5.0368)	Top-1 acc 14.062 (12.973)	Top-5 acc 30.078 (30.273)	lr 0.03904
Warmup Train [13][1340/3239]	Time 0.349 (0.235)	Data 0.002 (0.013)	Loss 5.0910 (5.0369)	Top-1 acc 12.500 (12.969)	Top-5 acc 33.984 (30.274)	lr 0.03903
Warmup Train [13][1350/3239]	Time 0.192 (0.235)	Data 0.001 (0.013)	Loss 5.1718 (5.0371)	Top-1 acc 8.203 (12.965)	Top-5 acc 25.000 (30.271)	lr 0.03903
Warmup Train [13][1360/3239]	Time 0.218 (0.234)	Data 0.001 (0.013)	Loss 4.9860 (5.0368)	Top-1 acc 13.672 (12.964)	Top-5 acc 33.984 (30.273)	lr 0.03902
Warmup Train [13][1370/3239]	Time 0.247 (0.234)	Data 0.001 (0.012)	Loss 5.1259 (5.0370)	Top-1 acc 10.547 (12.966)	Top-5 acc 26.562 (30.264)	lr 0.03901
Warmup Train [13][1380/3239]	Time 0.186 (0.234)	Data 0.001 (0.012)	Loss 4.9390 (5.0370)	Top-1 acc 12.109 (12.967)	Top-5 acc 28.516 (30.262)	lr 0.03901
Warmup Train [13][1390/3239]	Time 0.224 (0.234)	Data 0.001 (0.012)	Loss 5.1776 (5.0373)	Top-1 acc 10.938 (12.957)	Top-5 acc 26.562 (30.254)	lr 0.03900
Warmup Train [13][1400/3239]	Time 0.195 (0.234)	Data 0.001 (0.012)	Loss 4.9759 (5.0372)	Top-1 acc 12.891 (12.966)	Top-5 acc 30.469 (30.253)	lr 0.03900
Warmup Train [13][1410/3239]	Time 0.217 (0.234)	Data 0.001 (0.012)	Loss 5.0074 (5.0371)	Top-1 acc 11.328 (12.963)	Top-5 acc 31.250 (30.256)	lr 0.03899
Warmup Train [13][1420/3239]	Time 0.209 (0.234)	Data 0.001 (0.012)	Loss 4.9345 (5.0370)	Top-1 acc 14.844 (12.966)	Top-5 acc 32.812 (30.252)	lr 0.03899
Warmup Train [13][1430/3239]	Time 0.225 (0.234)	Data 0.001 (0.012)	Loss 4.9163 (5.0368)	Top-1 acc 12.109 (12.964)	Top-5 acc 34.375 (30.256)	lr 0.03898
Warmup Train [13][1440/3239]	Time 0.157 (0.234)	Data 0.001 (0.012)	Loss 5.0962 (5.0365)	Top-1 acc 10.156 (12.966)	Top-5 acc 31.250 (30.262)	lr 0.03898
Warmup Train [13][1450/3239]	Time 0.209 (0.234)	Data 0.002 (0.012)	Loss 4.9444 (5.0365)	Top-1 acc 14.844 (12.974)	Top-5 acc 32.031 (30.264)	lr 0.03897
Warmup Train [13][1460/3239]	Time 0.227 (0.234)	Data 0.001 (0.012)	Loss 4.8787 (5.0363)	Top-1 acc 14.062 (12.974)	Top-5 acc 32.812 (30.270)	lr 0.03897
Warmup Train [13][1470/3239]	Time 0.180 (0.234)	Data 0.001 (0.012)	Loss 4.8909 (5.0358)	Top-1 acc 12.891 (12.973)	Top-5 acc 33.203 (30.276)	lr 0.03896
Warmup Train [13][1480/3239]	Time 0.249 (0.233)	Data 0.001 (0.012)	Loss 5.1762 (5.0360)	Top-1 acc 12.500 (12.975)	Top-5 acc 28.516 (30.277)	lr 0.03896
Warmup Train [13][1490/3239]	Time 0.227 (0.233)	Data 0.002 (0.012)	Loss 5.0233 (5.0357)	Top-1 acc 12.891 (12.980)	Top-5 acc 32.422 (30.288)	lr 0.03895
Warmup Train [13][1500/3239]	Time 0.215 (0.233)	Data 0.001 (0.012)	Loss 5.1663 (5.0356)	Top-1 acc 12.891 (12.982)	Top-5 acc 26.953 (30.290)	lr 0.03895
Warmup Train [13][1510/3239]	Time 0.217 (0.233)	Data 0.001 (0.012)	Loss 5.0667 (5.0358)	Top-1 acc 11.719 (12.982)	Top-5 acc 32.812 (30.292)	lr 0.03894
Warmup Train [13][1520/3239]	Time 0.199 (0.233)	Data 0.001 (0.012)	Loss 5.1415 (5.0359)	Top-1 acc 14.844 (12.985)	Top-5 acc 28.125 (30.293)	lr 0.03894
Warmup Train [13][1530/3239]	Time 0.254 (0.233)	Data 0.001 (0.011)	Loss 5.0484 (5.0354)	Top-1 acc 15.625 (12.994)	Top-5 acc 33.594 (30.304)	lr 0.03893
Warmup Train [13][1540/3239]	Time 0.178 (0.233)	Data 0.002 (0.011)	Loss 4.9869 (5.0350)	Top-1 acc 13.672 (13.001)	Top-5 acc 34.766 (30.320)	lr 0.03893
Warmup Train [13][1550/3239]	Time 0.210 (0.233)	Data 0.001 (0.011)	Loss 4.9818 (5.0345)	Top-1 acc 12.500 (13.009)	Top-5 acc 30.469 (30.328)	lr 0.03892
Warmup Train [13][1560/3239]	Time 0.380 (0.233)	Data 0.002 (0.011)	Loss 4.8342 (5.0342)	Top-1 acc 16.016 (13.013)	Top-5 acc 33.984 (30.326)	lr 0.03892
Warmup Train [13][1570/3239]	Time 0.229 (0.233)	Data 0.001 (0.011)	Loss 5.0321 (5.0344)	Top-1 acc 12.109 (13.008)	Top-5 acc 31.641 (30.323)	lr 0.03891
Warmup Train [13][1580/3239]	Time 0.149 (0.233)	Data 0.001 (0.011)	Loss 5.0318 (5.0343)	Top-1 acc 10.938 (13.005)	Top-5 acc 29.297 (30.327)	lr 0.03891
Warmup Train [13][1590/3239]	Time 0.232 (0.233)	Data 0.001 (0.011)	Loss 5.0402 (5.0341)	Top-1 acc 12.500 (13.003)	Top-5 acc 30.859 (30.325)	lr 0.03890
Warmup Train [13][1600/3239]	Time 0.246 (0.233)	Data 0.001 (0.011)	Loss 5.1617 (5.0340)	Top-1 acc 9.766 (13.007)	Top-5 acc 27.344 (30.331)	lr 0.03890
Warmup Train [13][1610/3239]	Time 0.155 (0.233)	Data 0.002 (0.011)	Loss 4.9887 (5.0336)	Top-1 acc 12.500 (13.009)	Top-5 acc 30.078 (30.343)	lr 0.03889
Warmup Train [13][1620/3239]	Time 0.251 (0.233)	Data 0.001 (0.011)	Loss 5.0702 (5.0336)	Top-1 acc 12.891 (13.013)	Top-5 acc 28.125 (30.347)	lr 0.03889
Warmup Train [13][1630/3239]	Time 0.267 (0.233)	Data 0.001 (0.011)	Loss 5.1507 (5.0336)	Top-1 acc 9.375 (13.009)	Top-5 acc 25.391 (30.338)	lr 0.03888
Warmup Train [13][1640/3239]	Time 0.349 (0.233)	Data 0.002 (0.011)	Loss 4.9209 (5.0335)	Top-1 acc 14.844 (13.005)	Top-5 acc 32.422 (30.343)	lr 0.03888
Warmup Train [13][1650/3239]	Time 0.240 (0.233)	Data 0.001 (0.011)	Loss 5.1024 (5.0331)	Top-1 acc 16.406 (13.011)	Top-5 acc 32.422 (30.353)	lr 0.03887
Warmup Train [13][1660/3239]	Time 0.375 (0.233)	Data 0.002 (0.011)	Loss 5.0209 (5.0331)	Top-1 acc 12.500 (13.012)	Top-5 acc 30.859 (30.350)	lr 0.03887
Warmup Train [13][1670/3239]	Time 0.208 (0.233)	Data 0.001 (0.011)	Loss 5.0744 (5.0331)	Top-1 acc 10.547 (13.012)	Top-5 acc 28.125 (30.345)	lr 0.03886
Warmup Train [13][1680/3239]	Time 0.318 (0.233)	Data 0.002 (0.011)	Loss 5.0044 (5.0326)	Top-1 acc 14.062 (13.022)	Top-5 acc 30.469 (30.361)	lr 0.03886
Warmup Train [13][1690/3239]	Time 0.331 (0.233)	Data 0.002 (0.011)	Loss 4.9751 (5.0326)	Top-1 acc 12.891 (13.017)	Top-5 acc 32.422 (30.362)	lr 0.03885
Warmup Train [13][1700/3239]	Time 0.253 (0.234)	Data 0.001 (0.011)	Loss 5.0191 (5.0325)	Top-1 acc 14.062 (13.019)	Top-5 acc 33.203 (30.367)	lr 0.03885
Warmup Train [13][1710/3239]	Time 0.238 (0.234)	Data 0.002 (0.011)	Loss 4.9243 (5.0322)	Top-1 acc 15.625 (13.022)	Top-5 acc 32.812 (30.371)	lr 0.03884
Warmup Train [13][1720/3239]	Time 0.249 (0.234)	Data 0.001 (0.011)	Loss 5.0469 (5.0319)	Top-1 acc 14.844 (13.025)	Top-5 acc 29.688 (30.380)	lr 0.03884
Warmup Train [13][1730/3239]	Time 0.267 (0.234)	Data 0.001 (0.010)	Loss 5.0025 (5.0318)	Top-1 acc 12.500 (13.028)	Top-5 acc 25.391 (30.379)	lr 0.03883
Warmup Train [13][1740/3239]	Time 0.199 (0.234)	Data 0.004 (0.010)	Loss 4.9767 (5.0315)	Top-1 acc 15.234 (13.035)	Top-5 acc 34.766 (30.393)	lr 0.03883
Warmup Train [13][1750/3239]	Time 0.168 (0.234)	Data 0.001 (0.010)	Loss 4.9879 (5.0314)	Top-1 acc 17.188 (13.037)	Top-5 acc 33.594 (30.397)	lr 0.03882
Warmup Train [13][1760/3239]	Time 0.322 (0.234)	Data 0.001 (0.010)	Loss 4.9678 (5.0315)	Top-1 acc 16.016 (13.035)	Top-5 acc 32.812 (30.392)	lr 0.03882
Warmup Train [13][1770/3239]	Time 0.149 (0.234)	Data 0.001 (0.010)	Loss 4.9638 (5.0312)	Top-1 acc 16.016 (13.040)	Top-5 acc 32.422 (30.398)	lr 0.03881
Warmup Train [13][1780/3239]	Time 0.192 (0.234)	Data 0.001 (0.010)	Loss 4.9825 (5.0311)	Top-1 acc 10.938 (13.037)	Top-5 acc 30.078 (30.394)	lr 0.03881
Warmup Train [13][1790/3239]	Time 0.241 (0.234)	Data 0.001 (0.010)	Loss 4.9561 (5.0310)	Top-1 acc 16.797 (13.042)	Top-5 acc 34.375 (30.397)	lr 0.03880
Warmup Train [13][1800/3239]	Time 0.305 (0.234)	Data 0.003 (0.010)	Loss 4.8782 (5.0306)	Top-1 acc 16.016 (13.044)	Top-5 acc 38.672 (30.406)	lr 0.03880
Warmup Train [13][1810/3239]	Time 0.212 (0.234)	Data 0.002 (0.010)	Loss 5.1345 (5.0303)	Top-1 acc 11.719 (13.046)	Top-5 acc 29.688 (30.412)	lr 0.03879
Warmup Train [13][1820/3239]	Time 0.219 (0.234)	Data 0.002 (0.010)	Loss 4.9734 (5.0302)	Top-1 acc 11.719 (13.049)	Top-5 acc 30.078 (30.415)	lr 0.03879
Warmup Train [13][1830/3239]	Time 0.206 (0.234)	Data 0.001 (0.010)	Loss 5.0231 (5.0299)	Top-1 acc 14.844 (13.046)	Top-5 acc 33.594 (30.425)	lr 0.03878
Warmup Train [13][1840/3239]	Time 0.252 (0.234)	Data 0.001 (0.010)	Loss 4.9915 (5.0298)	Top-1 acc 13.672 (13.051)	Top-5 acc 34.375 (30.430)	lr 0.03878
Warmup Train [13][1850/3239]	Time 0.165 (0.234)	Data 0.001 (0.010)	Loss 5.0391 (5.0296)	Top-1 acc 12.500 (13.052)	Top-5 acc 30.469 (30.432)	lr 0.03877
Warmup Train [13][1860/3239]	Time 0.456 (0.235)	Data 0.002 (0.010)	Loss 5.1250 (5.0295)	Top-1 acc 11.719 (13.049)	Top-5 acc 29.688 (30.430)	lr 0.03877
Warmup Train [13][1870/3239]	Time 0.259 (0.235)	Data 0.002 (0.010)	Loss 5.1758 (5.0295)	Top-1 acc 10.938 (13.051)	Top-5 acc 25.000 (30.435)	lr 0.03876
Warmup Train [13][1880/3239]	Time 0.282 (0.235)	Data 0.002 (0.010)	Loss 4.9528 (5.0293)	Top-1 acc 13.672 (13.052)	Top-5 acc 28.516 (30.439)	lr 0.03876
Warmup Train [13][1890/3239]	Time 0.203 (0.235)	Data 0.002 (0.010)	Loss 5.0102 (5.0294)	Top-1 acc 13.672 (13.046)	Top-5 acc 30.469 (30.432)	lr 0.03875
Warmup Train [13][1900/3239]	Time 0.209 (0.235)	Data 0.001 (0.010)	Loss 4.9766 (5.0291)	Top-1 acc 14.062 (13.047)	Top-5 acc 31.250 (30.438)	lr 0.03875
Warmup Train [13][1910/3239]	Time 0.180 (0.235)	Data 0.001 (0.010)	Loss 5.0770 (5.0289)	Top-1 acc 12.891 (13.050)	Top-5 acc 30.469 (30.442)	lr 0.03874
Warmup Train [13][1920/3239]	Time 0.154 (0.235)	Data 0.002 (0.010)	Loss 4.9766 (5.0288)	Top-1 acc 13.672 (13.058)	Top-5 acc 32.812 (30.452)	lr 0.03874
Warmup Train [13][1930/3239]	Time 0.204 (0.235)	Data 0.002 (0.010)	Loss 4.9806 (5.0286)	Top-1 acc 11.328 (13.063)	Top-5 acc 31.641 (30.453)	lr 0.03873
Warmup Train [13][1940/3239]	Time 0.211 (0.235)	Data 0.001 (0.010)	Loss 5.0098 (5.0284)	Top-1 acc 12.891 (13.066)	Top-5 acc 33.594 (30.456)	lr 0.03873
Warmup Train [13][1950/3239]	Time 0.396 (0.235)	Data 0.001 (0.010)	Loss 4.8062 (5.0282)	Top-1 acc 16.406 (13.068)	Top-5 acc 32.031 (30.458)	lr 0.03872
Warmup Train [13][1960/3239]	Time 0.279 (0.235)	Data 0.043 (0.010)	Loss 5.0264 (5.0280)	Top-1 acc 12.891 (13.077)	Top-5 acc 29.297 (30.462)	lr 0.03872
Warmup Train [13][1970/3239]	Time 0.188 (0.236)	Data 0.001 (0.010)	Loss 5.0177 (5.0278)	Top-1 acc 11.328 (13.085)	Top-5 acc 30.078 (30.468)	lr 0.03871
Warmup Train [13][1980/3239]	Time 0.299 (0.236)	Data 0.002 (0.010)	Loss 4.9635 (5.0278)	Top-1 acc 12.109 (13.085)	Top-5 acc 28.125 (30.468)	lr 0.03871
Warmup Train [13][1990/3239]	Time 0.189 (0.235)	Data 0.001 (0.010)	Loss 4.9201 (5.0273)	Top-1 acc 15.234 (13.092)	Top-5 acc 35.156 (30.481)	lr 0.03870
Warmup Train [13][2000/3239]	Time 0.213 (0.235)	Data 0.001 (0.010)	Loss 4.9710 (5.0270)	Top-1 acc 12.109 (13.100)	Top-5 acc 31.641 (30.489)	lr 0.03870
Warmup Train [13][2010/3239]	Time 0.188 (0.235)	Data 0.001 (0.010)	Loss 4.6858 (5.0266)	Top-1 acc 17.188 (13.108)	Top-5 acc 37.500 (30.501)	lr 0.03869
Warmup Train [13][2020/3239]	Time 0.190 (0.235)	Data 0.001 (0.009)	Loss 5.0380 (5.0266)	Top-1 acc 11.328 (13.112)	Top-5 acc 28.516 (30.499)	lr 0.03869
Warmup Train [13][2030/3239]	Time 0.150 (0.235)	Data 0.001 (0.009)	Loss 5.0577 (5.0263)	Top-1 acc 14.453 (13.117)	Top-5 acc 26.562 (30.507)	lr 0.03868
Warmup Train [13][2040/3239]	Time 0.231 (0.235)	Data 0.001 (0.009)	Loss 4.8701 (5.0259)	Top-1 acc 12.500 (13.118)	Top-5 acc 31.641 (30.513)	lr 0.03868
Warmup Train [13][2050/3239]	Time 0.342 (0.235)	Data 0.001 (0.009)	Loss 4.9359 (5.0255)	Top-1 acc 13.281 (13.126)	Top-5 acc 33.203 (30.530)	lr 0.03867
Warmup Train [13][2060/3239]	Time 0.242 (0.235)	Data 0.001 (0.009)	Loss 5.0647 (5.0257)	Top-1 acc 9.766 (13.126)	Top-5 acc 28.516 (30.529)	lr 0.03867
Warmup Train [13][2070/3239]	Time 0.211 (0.235)	Data 0.001 (0.009)	Loss 5.2394 (5.0259)	Top-1 acc 8.984 (13.121)	Top-5 acc 23.828 (30.524)	lr 0.03866
Warmup Train [13][2080/3239]	Time 0.245 (0.235)	Data 0.001 (0.009)	Loss 4.9784 (5.0260)	Top-1 acc 15.234 (13.123)	Top-5 acc 31.641 (30.521)	lr 0.03866
Warmup Train [13][2090/3239]	Time 0.288 (0.235)	Data 0.001 (0.009)	Loss 5.0523 (5.0256)	Top-1 acc 12.500 (13.121)	Top-5 acc 30.859 (30.528)	lr 0.03865
Warmup Train [13][2100/3239]	Time 0.194 (0.235)	Data 0.002 (0.009)	Loss 4.8693 (5.0254)	Top-1 acc 15.234 (13.125)	Top-5 acc 30.469 (30.533)	lr 0.03865
Warmup Train [13][2110/3239]	Time 0.264 (0.235)	Data 0.001 (0.009)	Loss 4.8857 (5.0253)	Top-1 acc 12.891 (13.128)	Top-5 acc 34.375 (30.540)	lr 0.03864
Warmup Train [13][2120/3239]	Time 0.219 (0.235)	Data 0.001 (0.009)	Loss 5.0290 (5.0249)	Top-1 acc 11.719 (13.135)	Top-5 acc 26.953 (30.544)	lr 0.03864
Warmup Train [13][2130/3239]	Time 0.176 (0.235)	Data 0.001 (0.009)	Loss 5.1309 (5.0246)	Top-1 acc 10.547 (13.141)	Top-5 acc 26.953 (30.547)	lr 0.03863
Warmup Train [13][2140/3239]	Time 0.402 (0.235)	Data 0.001 (0.009)	Loss 4.9942 (5.0244)	Top-1 acc 11.719 (13.142)	Top-5 acc 32.031 (30.556)	lr 0.03863
Warmup Train [13][2150/3239]	Time 0.332 (0.235)	Data 0.001 (0.009)	Loss 4.9837 (5.0242)	Top-1 acc 12.500 (13.147)	Top-5 acc 30.078 (30.563)	lr 0.03862
Warmup Train [13][2160/3239]	Time 0.175 (0.235)	Data 0.001 (0.009)	Loss 5.0450 (5.0240)	Top-1 acc 10.156 (13.147)	Top-5 acc 29.688 (30.565)	lr 0.03862
Warmup Train [13][2170/3239]	Time 0.228 (0.235)	Data 0.001 (0.009)	Loss 5.0230 (5.0241)	Top-1 acc 13.672 (13.150)	Top-5 acc 29.297 (30.563)	lr 0.03861
Warmup Train [13][2180/3239]	Time 0.204 (0.235)	Data 0.002 (0.009)	Loss 5.0745 (5.0240)	Top-1 acc 12.500 (13.151)	Top-5 acc 29.297 (30.568)	lr 0.03861
Warmup Train [13][2190/3239]	Time 0.195 (0.235)	Data 0.001 (0.009)	Loss 4.9454 (5.0236)	Top-1 acc 15.234 (13.158)	Top-5 acc 33.203 (30.578)	lr 0.03860
Warmup Train [13][2200/3239]	Time 0.164 (0.235)	Data 0.001 (0.009)	Loss 5.1175 (5.0238)	Top-1 acc 11.328 (13.153)	Top-5 acc 27.344 (30.570)	lr 0.03860
Warmup Train [13][2210/3239]	Time 0.255 (0.235)	Data 0.001 (0.009)	Loss 4.9157 (5.0238)	Top-1 acc 15.625 (13.149)	Top-5 acc 37.500 (30.577)	lr 0.03859
Warmup Train [13][2220/3239]	Time 0.218 (0.235)	Data 0.001 (0.009)	Loss 4.9253 (5.0236)	Top-1 acc 14.453 (13.155)	Top-5 acc 30.859 (30.577)	lr 0.03859
Warmup Train [13][2230/3239]	Time 0.196 (0.235)	Data 0.002 (0.009)	Loss 5.1397 (5.0235)	Top-1 acc 13.672 (13.163)	Top-5 acc 28.516 (30.580)	lr 0.03858
Warmup Train [13][2240/3239]	Time 0.280 (0.235)	Data 0.001 (0.009)	Loss 5.0216 (5.0235)	Top-1 acc 13.281 (13.168)	Top-5 acc 29.297 (30.586)	lr 0.03858
Warmup Train [13][2250/3239]	Time 0.322 (0.235)	Data 0.001 (0.009)	Loss 5.0630 (5.0234)	Top-1 acc 12.891 (13.170)	Top-5 acc 25.391 (30.583)	lr 0.03857
Warmup Train [13][2260/3239]	Time 0.216 (0.235)	Data 0.002 (0.009)	Loss 4.9906 (5.0234)	Top-1 acc 13.672 (13.166)	Top-5 acc 30.859 (30.581)	lr 0.03857
Warmup Train [13][2270/3239]	Time 0.199 (0.235)	Data 0.001 (0.009)	Loss 4.8263 (5.0230)	Top-1 acc 16.797 (13.172)	Top-5 acc 34.375 (30.588)	lr 0.03856
Warmup Train [13][2280/3239]	Time 0.229 (0.235)	Data 0.001 (0.009)	Loss 4.9287 (5.0228)	Top-1 acc 10.156 (13.179)	Top-5 acc 34.766 (30.596)	lr 0.03855
Warmup Train [13][2290/3239]	Time 0.249 (0.235)	Data 0.001 (0.009)	Loss 4.8344 (5.0227)	Top-1 acc 17.578 (13.177)	Top-5 acc 36.719 (30.600)	lr 0.03855
Warmup Train [13][2300/3239]	Time 0.225 (0.235)	Data 0.001 (0.009)	Loss 5.0192 (5.0224)	Top-1 acc 12.500 (13.179)	Top-5 acc 33.203 (30.605)	lr 0.03854
Warmup Train [13][2310/3239]	Time 0.209 (0.235)	Data 0.001 (0.009)	Loss 4.9661 (5.0225)	Top-1 acc 13.281 (13.175)	Top-5 acc 32.422 (30.607)	lr 0.03854
Warmup Train [13][2320/3239]	Time 0.208 (0.235)	Data 0.001 (0.009)	Loss 5.0577 (5.0223)	Top-1 acc 13.281 (13.181)	Top-5 acc 28.906 (30.614)	lr 0.03853
Warmup Train [13][2330/3239]	Time 0.199 (0.235)	Data 0.001 (0.009)	Loss 4.8781 (5.0222)	Top-1 acc 16.016 (13.181)	Top-5 acc 36.328 (30.617)	lr 0.03853
Warmup Train [13][2340/3239]	Time 0.264 (0.235)	Data 0.001 (0.009)	Loss 5.1361 (5.0222)	Top-1 acc 14.062 (13.182)	Top-5 acc 32.422 (30.619)	lr 0.03852
Warmup Train [13][2350/3239]	Time 0.201 (0.235)	Data 0.003 (0.009)	Loss 4.8637 (5.0220)	Top-1 acc 15.625 (13.187)	Top-5 acc 32.422 (30.619)	lr 0.03852
Warmup Train [13][2360/3239]	Time 0.355 (0.235)	Data 0.001 (0.009)	Loss 5.1076 (5.0220)	Top-1 acc 10.938 (13.187)	Top-5 acc 27.734 (30.614)	lr 0.03851
Warmup Train [13][2370/3239]	Time 0.240 (0.235)	Data 0.001 (0.009)	Loss 4.9377 (5.0220)	Top-1 acc 14.844 (13.189)	Top-5 acc 31.641 (30.614)	lr 0.03851
Warmup Train [13][2380/3239]	Time 0.197 (0.235)	Data 0.001 (0.009)	Loss 5.0911 (5.0218)	Top-1 acc 12.500 (13.191)	Top-5 acc 27.344 (30.621)	lr 0.03850
Warmup Train [13][2390/3239]	Time 0.239 (0.235)	Data 0.001 (0.009)	Loss 5.0533 (5.0218)	Top-1 acc 12.500 (13.188)	Top-5 acc 27.734 (30.618)	lr 0.03850
Warmup Train [13][2400/3239]	Time 0.166 (0.235)	Data 0.002 (0.008)	Loss 4.8064 (5.0215)	Top-1 acc 17.578 (13.195)	Top-5 acc 32.422 (30.628)	lr 0.03849
Warmup Train [13][2410/3239]	Time 0.196 (0.234)	Data 0.002 (0.008)	Loss 4.9719 (5.0213)	Top-1 acc 14.844 (13.196)	Top-5 acc 33.984 (30.634)	lr 0.03849
Warmup Train [13][2420/3239]	Time 0.259 (0.234)	Data 0.001 (0.008)	Loss 4.8999 (5.0211)	Top-1 acc 13.672 (13.200)	Top-5 acc 34.375 (30.641)	lr 0.03848
Warmup Train [13][2430/3239]	Time 0.241 (0.234)	Data 0.001 (0.008)	Loss 4.9342 (5.0209)	Top-1 acc 15.234 (13.205)	Top-5 acc 34.375 (30.650)	lr 0.03848
Warmup Train [13][2440/3239]	Time 0.225 (0.234)	Data 0.001 (0.008)	Loss 4.8282 (5.0205)	Top-1 acc 16.406 (13.209)	Top-5 acc 34.375 (30.655)	lr 0.03847
Warmup Train [13][2450/3239]	Time 0.220 (0.234)	Data 0.001 (0.008)	Loss 4.9473 (5.0204)	Top-1 acc 16.406 (13.210)	Top-5 acc 29.688 (30.656)	lr 0.03847
Warmup Train [13][2460/3239]	Time 0.218 (0.234)	Data 0.001 (0.008)	Loss 5.1188 (5.0201)	Top-1 acc 12.891 (13.211)	Top-5 acc 29.297 (30.662)	lr 0.03846
Warmup Train [13][2470/3239]	Time 0.230 (0.234)	Data 0.001 (0.008)	Loss 4.8891 (5.0201)	Top-1 acc 14.844 (13.213)	Top-5 acc 33.203 (30.663)	lr 0.03846
Warmup Train [13][2480/3239]	Time 0.226 (0.234)	Data 0.001 (0.008)	Loss 4.8654 (5.0199)	Top-1 acc 14.844 (13.216)	Top-5 acc 34.375 (30.670)	lr 0.03845
Warmup Train [13][2490/3239]	Time 0.261 (0.234)	Data 0.001 (0.008)	Loss 4.9911 (5.0198)	Top-1 acc 12.109 (13.217)	Top-5 acc 27.734 (30.673)	lr 0.03845
Warmup Train [13][2500/3239]	Time 0.205 (0.234)	Data 0.002 (0.008)	Loss 5.0008 (5.0197)	Top-1 acc 14.453 (13.220)	Top-5 acc 30.469 (30.676)	lr 0.03844
Warmup Train [13][2510/3239]	Time 0.219 (0.234)	Data 0.001 (0.008)	Loss 5.0272 (5.0194)	Top-1 acc 12.500 (13.226)	Top-5 acc 32.812 (30.689)	lr 0.03844
Warmup Train [13][2520/3239]	Time 0.179 (0.234)	Data 0.001 (0.008)	Loss 4.9404 (5.0192)	Top-1 acc 12.891 (13.232)	Top-5 acc 30.469 (30.696)	lr 0.03843
Warmup Train [13][2530/3239]	Time 0.213 (0.234)	Data 0.002 (0.008)	Loss 4.9592 (5.0191)	Top-1 acc 12.500 (13.233)	Top-5 acc 29.297 (30.695)	lr 0.03843
Warmup Train [13][2540/3239]	Time 0.198 (0.234)	Data 0.002 (0.008)	Loss 5.0129 (5.0189)	Top-1 acc 12.500 (13.237)	Top-5 acc 29.688 (30.701)	lr 0.03842
Warmup Train [13][2550/3239]	Time 0.224 (0.234)	Data 0.002 (0.008)	Loss 4.8983 (5.0186)	Top-1 acc 12.109 (13.242)	Top-5 acc 35.547 (30.711)	lr 0.03842
Warmup Train [13][2560/3239]	Time 0.336 (0.234)	Data 0.001 (0.008)	Loss 4.8681 (5.0186)	Top-1 acc 16.797 (13.244)	Top-5 acc 35.156 (30.715)	lr 0.03841
Warmup Train [13][2570/3239]	Time 0.348 (0.234)	Data 0.001 (0.008)	Loss 5.1453 (5.0187)	Top-1 acc 11.719 (13.245)	Top-5 acc 28.516 (30.717)	lr 0.03841
Warmup Train [13][2580/3239]	Time 0.212 (0.234)	Data 0.001 (0.008)	Loss 4.8404 (5.0184)	Top-1 acc 13.672 (13.247)	Top-5 acc 36.719 (30.724)	lr 0.03840
Warmup Train [13][2590/3239]	Time 0.311 (0.234)	Data 0.001 (0.008)	Loss 5.0435 (5.0184)	Top-1 acc 12.109 (13.244)	Top-5 acc 27.734 (30.721)	lr 0.03840
Warmup Train [13][2600/3239]	Time 0.252 (0.234)	Data 0.001 (0.008)	Loss 5.0450 (5.0182)	Top-1 acc 10.547 (13.248)	Top-5 acc 29.688 (30.725)	lr 0.03839
Warmup Train [13][2610/3239]	Time 0.189 (0.234)	Data 0.001 (0.008)	Loss 4.8852 (5.0181)	Top-1 acc 12.891 (13.245)	Top-5 acc 32.031 (30.727)	lr 0.03839
Warmup Train [13][2620/3239]	Time 0.266 (0.234)	Data 0.002 (0.008)	Loss 5.1196 (5.0181)	Top-1 acc 10.938 (13.245)	Top-5 acc 27.734 (30.727)	lr 0.03838
Warmup Train [13][2630/3239]	Time 0.262 (0.234)	Data 0.001 (0.008)	Loss 4.9710 (5.0179)	Top-1 acc 16.406 (13.252)	Top-5 acc 35.156 (30.730)	lr 0.03838
Warmup Train [13][2640/3239]	Time 0.269 (0.234)	Data 0.001 (0.008)	Loss 5.0448 (5.0178)	Top-1 acc 9.766 (13.253)	Top-5 acc 32.812 (30.736)	lr 0.03837
Warmup Train [13][2650/3239]	Time 0.199 (0.234)	Data 0.001 (0.008)	Loss 4.8377 (5.0177)	Top-1 acc 14.453 (13.253)	Top-5 acc 35.156 (30.735)	lr 0.03837
Warmup Train [13][2660/3239]	Time 0.297 (0.234)	Data 0.001 (0.008)	Loss 5.1839 (5.0174)	Top-1 acc 10.938 (13.254)	Top-5 acc 30.078 (30.742)	lr 0.03836
Warmup Train [13][2670/3239]	Time 0.409 (0.234)	Data 0.001 (0.008)	Loss 5.0091 (5.0173)	Top-1 acc 13.281 (13.255)	Top-5 acc 27.734 (30.742)	lr 0.03836
Warmup Train [13][2680/3239]	Time 0.234 (0.234)	Data 0.002 (0.008)	Loss 4.8957 (5.0172)	Top-1 acc 16.016 (13.256)	Top-5 acc 33.984 (30.742)	lr 0.03835
Warmup Train [13][2690/3239]	Time 0.274 (0.234)	Data 0.001 (0.008)	Loss 5.0248 (5.0171)	Top-1 acc 9.766 (13.258)	Top-5 acc 32.812 (30.740)	lr 0.03835
Warmup Train [13][2700/3239]	Time 0.220 (0.234)	Data 0.001 (0.008)	Loss 4.8408 (5.0169)	Top-1 acc 13.672 (13.261)	Top-5 acc 35.156 (30.748)	lr 0.03834
Warmup Train [13][2710/3239]	Time 0.200 (0.234)	Data 0.001 (0.008)	Loss 4.8643 (5.0168)	Top-1 acc 12.109 (13.261)	Top-5 acc 30.859 (30.748)	lr 0.03834
Warmup Train [13][2720/3239]	Time 0.232 (0.234)	Data 0.001 (0.008)	Loss 5.1458 (5.0167)	Top-1 acc 13.281 (13.258)	Top-5 acc 30.078 (30.749)	lr 0.03833
Warmup Train [13][2730/3239]	Time 0.205 (0.234)	Data 0.001 (0.008)	Loss 5.0762 (5.0167)	Top-1 acc 11.328 (13.257)	Top-5 acc 30.078 (30.748)	lr 0.03832
Warmup Train [13][2740/3239]	Time 0.202 (0.234)	Data 0.001 (0.008)	Loss 5.1426 (5.0166)	Top-1 acc 12.109 (13.259)	Top-5 acc 28.906 (30.757)	lr 0.03832
Warmup Train [13][2750/3239]	Time 0.242 (0.234)	Data 0.001 (0.008)	Loss 5.0664 (5.0162)	Top-1 acc 12.109 (13.264)	Top-5 acc 27.734 (30.768)	lr 0.03831
Warmup Train [13][2760/3239]	Time 0.299 (0.234)	Data 0.002 (0.008)	Loss 4.9815 (5.0161)	Top-1 acc 14.453 (13.267)	Top-5 acc 32.031 (30.773)	lr 0.03831
Warmup Train [13][2770/3239]	Time 0.329 (0.234)	Data 0.001 (0.008)	Loss 4.9207 (5.0158)	Top-1 acc 16.016 (13.272)	Top-5 acc 31.250 (30.779)	lr 0.03830
Warmup Train [13][2780/3239]	Time 0.181 (0.234)	Data 0.001 (0.008)	Loss 5.1263 (5.0156)	Top-1 acc 11.719 (13.275)	Top-5 acc 32.031 (30.782)	lr 0.03830
Warmup Train [13][2790/3239]	Time 0.142 (0.233)	Data 0.001 (0.008)	Loss 4.9886 (5.0155)	Top-1 acc 14.062 (13.277)	Top-5 acc 32.812 (30.784)	lr 0.03829
Warmup Train [13][2800/3239]	Time 0.237 (0.233)	Data 0.001 (0.008)	Loss 4.9148 (5.0153)	Top-1 acc 13.281 (13.279)	Top-5 acc 32.031 (30.787)	lr 0.03829
Warmup Train [13][2810/3239]	Time 0.180 (0.233)	Data 0.001 (0.008)	Loss 5.1821 (5.0152)	Top-1 acc 14.453 (13.278)	Top-5 acc 28.516 (30.789)	lr 0.03828
Warmup Train [13][2820/3239]	Time 0.130 (0.233)	Data 0.002 (0.008)	Loss 4.8198 (5.0148)	Top-1 acc 17.188 (13.282)	Top-5 acc 37.109 (30.798)	lr 0.03828
Warmup Train [13][2830/3239]	Time 0.266 (0.233)	Data 0.002 (0.008)	Loss 4.8960 (5.0146)	Top-1 acc 13.281 (13.287)	Top-5 acc 31.641 (30.802)	lr 0.03827
Warmup Train [13][2840/3239]	Time 0.245 (0.233)	Data 0.002 (0.008)	Loss 4.9652 (5.0145)	Top-1 acc 17.188 (13.290)	Top-5 acc 34.766 (30.803)	lr 0.03827
Warmup Train [13][2850/3239]	Time 0.205 (0.233)	Data 0.001 (0.008)	Loss 4.8875 (5.0144)	Top-1 acc 17.578 (13.291)	Top-5 acc 32.812 (30.806)	lr 0.03826
Warmup Train [13][2860/3239]	Time 0.215 (0.233)	Data 0.001 (0.007)	Loss 4.9843 (5.0144)	Top-1 acc 14.844 (13.294)	Top-5 acc 29.297 (30.806)	lr 0.03826
Warmup Train [13][2870/3239]	Time 0.277 (0.233)	Data 0.001 (0.007)	Loss 5.1128 (5.0142)	Top-1 acc 10.938 (13.294)	Top-5 acc 28.906 (30.808)	lr 0.03825
Warmup Train [13][2880/3239]	Time 0.376 (0.233)	Data 0.001 (0.007)	Loss 4.8742 (5.0140)	Top-1 acc 12.109 (13.295)	Top-5 acc 29.688 (30.813)	lr 0.03825
Warmup Train [13][2890/3239]	Time 0.222 (0.233)	Data 0.002 (0.007)	Loss 4.8387 (5.0138)	Top-1 acc 14.453 (13.295)	Top-5 acc 36.719 (30.816)	lr 0.03824
Warmup Train [13][2900/3239]	Time 0.235 (0.233)	Data 0.001 (0.007)	Loss 4.8132 (5.0138)	Top-1 acc 13.672 (13.295)	Top-5 acc 37.500 (30.820)	lr 0.03824
Warmup Train [13][2910/3239]	Time 0.242 (0.233)	Data 0.001 (0.007)	Loss 5.0646 (5.0137)	Top-1 acc 15.625 (13.295)	Top-5 acc 30.078 (30.822)	lr 0.03823
Warmup Train [13][2920/3239]	Time 0.246 (0.233)	Data 0.001 (0.007)	Loss 4.9315 (5.0137)	Top-1 acc 14.062 (13.295)	Top-5 acc 28.516 (30.821)	lr 0.03823
Warmup Train [13][2930/3239]	Time 0.165 (0.233)	Data 0.001 (0.007)	Loss 4.9228 (5.0135)	Top-1 acc 18.750 (13.299)	Top-5 acc 32.422 (30.825)	lr 0.03822
Warmup Train [13][2940/3239]	Time 0.151 (0.233)	Data 0.001 (0.007)	Loss 4.9375 (5.0132)	Top-1 acc 15.625 (13.303)	Top-5 acc 35.547 (30.831)	lr 0.03822
Warmup Train [13][2950/3239]	Time 0.208 (0.233)	Data 0.001 (0.007)	Loss 4.9296 (5.0129)	Top-1 acc 14.062 (13.309)	Top-5 acc 33.594 (30.840)	lr 0.03821
Warmup Train [13][2960/3239]	Time 0.197 (0.233)	Data 0.002 (0.007)	Loss 4.7427 (5.0127)	Top-1 acc 18.750 (13.313)	Top-5 acc 36.328 (30.845)	lr 0.03821
Warmup Train [13][2970/3239]	Time 0.266 (0.233)	Data 0.001 (0.007)	Loss 4.9860 (5.0126)	Top-1 acc 13.672 (13.316)	Top-5 acc 31.641 (30.849)	lr 0.03820
Warmup Train [13][2980/3239]	Time 0.196 (0.233)	Data 0.001 (0.007)	Loss 4.9256 (5.0126)	Top-1 acc 14.844 (13.316)	Top-5 acc 30.469 (30.851)	lr 0.03820
Warmup Train [13][2990/3239]	Time 0.283 (0.233)	Data 0.002 (0.007)	Loss 5.0456 (5.0127)	Top-1 acc 12.500 (13.316)	Top-5 acc 29.297 (30.850)	lr 0.03819
Warmup Train [13][3000/3239]	Time 0.241 (0.233)	Data 0.001 (0.007)	Loss 4.9602 (5.0129)	Top-1 acc 12.500 (13.312)	Top-5 acc 30.078 (30.844)	lr 0.03819
Warmup Train [13][3010/3239]	Time 0.195 (0.233)	Data 0.001 (0.007)	Loss 5.1205 (5.0127)	Top-1 acc 11.719 (13.314)	Top-5 acc 30.469 (30.849)	lr 0.03818
Warmup Train [13][3020/3239]	Time 0.191 (0.233)	Data 0.001 (0.007)	Loss 5.0384 (5.0125)	Top-1 acc 14.062 (13.317)	Top-5 acc 32.812 (30.858)	lr 0.03818
Warmup Train [13][3030/3239]	Time 0.204 (0.233)	Data 0.001 (0.007)	Loss 4.9391 (5.0123)	Top-1 acc 14.453 (13.320)	Top-5 acc 31.641 (30.862)	lr 0.03817
Warmup Train [13][3040/3239]	Time 0.261 (0.233)	Data 0.001 (0.007)	Loss 4.9801 (5.0123)	Top-1 acc 13.281 (13.321)	Top-5 acc 32.031 (30.861)	lr 0.03817
Warmup Train [13][3050/3239]	Time 0.154 (0.232)	Data 0.002 (0.007)	Loss 4.8246 (5.0121)	Top-1 acc 15.234 (13.324)	Top-5 acc 37.500 (30.869)	lr 0.03816
Warmup Train [13][3060/3239]	Time 0.141 (0.232)	Data 0.001 (0.007)	Loss 4.8177 (5.0120)	Top-1 acc 17.578 (13.326)	Top-5 acc 32.031 (30.869)	lr 0.03816
Warmup Train [13][3070/3239]	Time 0.257 (0.232)	Data 0.001 (0.007)	Loss 5.1014 (5.0119)	Top-1 acc 9.375 (13.325)	Top-5 acc 31.250 (30.870)	lr 0.03815
Warmup Train [13][3080/3239]	Time 0.134 (0.232)	Data 0.001 (0.007)	Loss 5.0072 (5.0117)	Top-1 acc 11.328 (13.328)	Top-5 acc 29.297 (30.872)	lr 0.03815
Warmup Train [13][3090/3239]	Time 0.314 (0.232)	Data 0.001 (0.007)	Loss 5.2329 (5.0117)	Top-1 acc 10.547 (13.328)	Top-5 acc 26.953 (30.875)	lr 0.03814
Warmup Train [13][3100/3239]	Time 0.216 (0.232)	Data 0.002 (0.007)	Loss 4.7527 (5.0116)	Top-1 acc 17.969 (13.334)	Top-5 acc 36.719 (30.877)	lr 0.03813
Warmup Train [13][3110/3239]	Time 0.194 (0.232)	Data 0.001 (0.007)	Loss 4.9864 (5.0114)	Top-1 acc 13.281 (13.336)	Top-5 acc 32.422 (30.879)	lr 0.03813
Warmup Train [13][3120/3239]	Time 0.299 (0.232)	Data 0.001 (0.007)	Loss 4.9644 (5.0113)	Top-1 acc 12.500 (13.339)	Top-5 acc 32.422 (30.879)	lr 0.03812
Warmup Train [13][3130/3239]	Time 0.186 (0.232)	Data 0.002 (0.007)	Loss 5.0620 (5.0111)	Top-1 acc 12.109 (13.339)	Top-5 acc 27.734 (30.883)	lr 0.03812
Warmup Train [13][3140/3239]	Time 0.213 (0.232)	Data 0.001 (0.007)	Loss 4.9724 (5.0113)	Top-1 acc 10.938 (13.338)	Top-5 acc 29.297 (30.877)	lr 0.03811
Warmup Train [13][3150/3239]	Time 0.204 (0.232)	Data 0.002 (0.007)	Loss 5.0884 (5.0112)	Top-1 acc 10.547 (13.339)	Top-5 acc 27.344 (30.877)	lr 0.03811
Warmup Train [13][3160/3239]	Time 0.249 (0.232)	Data 0.001 (0.007)	Loss 4.9728 (5.0109)	Top-1 acc 12.891 (13.344)	Top-5 acc 30.469 (30.884)	lr 0.03810
Warmup Train [13][3170/3239]	Time 0.180 (0.232)	Data 0.001 (0.007)	Loss 4.8455 (5.0107)	Top-1 acc 15.234 (13.343)	Top-5 acc 34.766 (30.891)	lr 0.03810
Warmup Train [13][3180/3239]	Time 0.187 (0.232)	Data 0.000 (0.007)	Loss 4.8091 (5.0104)	Top-1 acc 14.844 (13.345)	Top-5 acc 35.547 (30.896)	lr 0.03809
Warmup Train [13][3190/3239]	Time 0.222 (0.232)	Data 0.000 (0.007)	Loss 4.8691 (5.0102)	Top-1 acc 12.109 (13.350)	Top-5 acc 33.594 (30.902)	lr 0.03809
Warmup Train [13][3200/3239]	Time 0.208 (0.232)	Data 0.000 (0.007)	Loss 4.9420 (5.0102)	Top-1 acc 15.625 (13.352)	Top-5 acc 35.547 (30.904)	lr 0.03808
Warmup Train [13][3210/3239]	Time 0.136 (0.232)	Data 0.000 (0.007)	Loss 4.9250 (5.0100)	Top-1 acc 16.797 (13.357)	Top-5 acc 32.812 (30.909)	lr 0.03808
Warmup Train [13][3220/3239]	Time 0.163 (0.232)	Data 0.000 (0.007)	Loss 4.8785 (5.0099)	Top-1 acc 13.281 (13.357)	Top-5 acc 30.078 (30.909)	lr 0.03807
Warmup Train [13][3230/3239]	Time 0.148 (0.232)	Data 0.000 (0.007)	Loss 5.1418 (5.0097)	Top-1 acc 14.062 (13.362)	Top-5 acc 31.250 (30.915)	lr 0.03807
Warmup Train [13][3239/3239]	Time 0.148 (0.232)	Data 0.000 (0.007)	Loss 4.7522 (5.0096)	Top-1 acc 13.580 (13.364)	Top-5 acc 43.210 (30.918)	lr 0.03806
==========Warmup Valid [13/40]	loss 4.159	top-1 acc 18.938	top-5 acc 40.074	Train top-1 13.364	top-5 30.918	flops: 442.4M
Warmup Train [14][0/3239]	Time 13.134 (13.134)	Data 10.664 (10.664)	Loss 5.2131 (5.2131)	Top-1 acc 14.453 (14.453)	Top-5 acc 27.734 (27.734)	lr 0.03806
Warmup Train [14][10/3239]	Time 0.282 (1.521)	Data 0.002 (0.987)	Loss 5.0234 (4.9701)	Top-1 acc 12.891 (14.986)	Top-5 acc 31.641 (31.854)	lr 0.03806
Warmup Train [14][20/3239]	Time 0.260 (0.927)	Data 0.001 (0.518)	Loss 4.9246 (4.9409)	Top-1 acc 15.625 (14.844)	Top-5 acc 37.109 (32.738)	lr 0.03805
Warmup Train [14][30/3239]	Time 0.184 (0.696)	Data 0.001 (0.352)	Loss 4.7862 (4.9416)	Top-1 acc 15.625 (14.529)	Top-5 acc 37.891 (32.523)	lr 0.03805
Warmup Train [14][40/3239]	Time 0.242 (0.584)	Data 0.001 (0.266)	Loss 4.9944 (4.9358)	Top-1 acc 11.719 (14.320)	Top-5 acc 26.562 (32.508)	lr 0.03804
Warmup Train [14][50/3239]	Time 0.304 (0.515)	Data 0.001 (0.215)	Loss 4.9621 (4.9460)	Top-1 acc 15.625 (14.338)	Top-5 acc 30.859 (32.246)	lr 0.03804
Warmup Train [14][60/3239]	Time 0.193 (0.472)	Data 0.001 (0.180)	Loss 4.9657 (4.9425)	Top-1 acc 13.672 (14.421)	Top-5 acc 30.469 (32.415)	lr 0.03803
Warmup Train [14][70/3239]	Time 0.220 (0.436)	Data 0.001 (0.155)	Loss 4.9800 (4.9344)	Top-1 acc 14.844 (14.563)	Top-5 acc 32.422 (32.686)	lr 0.03803
Warmup Train [14][80/3239]	Time 0.299 (0.413)	Data 0.001 (0.136)	Loss 5.0474 (4.9362)	Top-1 acc 14.844 (14.579)	Top-5 acc 32.031 (32.697)	lr 0.03802
Warmup Train [14][90/3239]	Time 0.214 (0.392)	Data 0.001 (0.121)	Loss 4.9931 (4.9325)	Top-1 acc 14.062 (14.590)	Top-5 acc 32.031 (32.778)	lr 0.03802
Warmup Train [14][100/3239]	Time 0.223 (0.376)	Data 0.001 (0.109)	Loss 4.9816 (4.9348)	Top-1 acc 11.719 (14.546)	Top-5 acc 28.906 (32.666)	lr 0.03801
Warmup Train [14][110/3239]	Time 0.179 (0.362)	Data 0.001 (0.100)	Loss 4.9974 (4.9376)	Top-1 acc 14.844 (14.436)	Top-5 acc 27.734 (32.626)	lr 0.03801
Warmup Train [14][120/3239]	Time 0.205 (0.351)	Data 0.001 (0.092)	Loss 4.9794 (4.9431)	Top-1 acc 14.062 (14.379)	Top-5 acc 29.688 (32.477)	lr 0.03800
Warmup Train [14][130/3239]	Time 0.137 (0.340)	Data 0.001 (0.085)	Loss 4.9776 (4.9464)	Top-1 acc 17.969 (14.385)	Top-5 acc 31.641 (32.318)	lr 0.03800
Warmup Train [14][140/3239]	Time 0.175 (0.332)	Data 0.001 (0.079)	Loss 4.9891 (4.9460)	Top-1 acc 13.672 (14.425)	Top-5 acc 32.422 (32.400)	lr 0.03799
Warmup Train [14][150/3239]	Time 0.257 (0.324)	Data 0.001 (0.074)	Loss 4.9269 (4.9456)	Top-1 acc 12.891 (14.419)	Top-5 acc 31.641 (32.386)	lr 0.03798
Warmup Train [14][160/3239]	Time 0.204 (0.317)	Data 0.001 (0.070)	Loss 4.8860 (4.9451)	Top-1 acc 16.406 (14.468)	Top-5 acc 32.031 (32.410)	lr 0.03798
Warmup Train [14][170/3239]	Time 0.183 (0.312)	Data 0.001 (0.066)	Loss 4.9637 (4.9464)	Top-1 acc 12.891 (14.396)	Top-5 acc 30.469 (32.349)	lr 0.03797
Warmup Train [14][180/3239]	Time 0.235 (0.307)	Data 0.002 (0.063)	Loss 4.9049 (4.9502)	Top-1 acc 16.406 (14.347)	Top-5 acc 30.469 (32.223)	lr 0.03797
Warmup Train [14][190/3239]	Time 0.185 (0.303)	Data 0.001 (0.059)	Loss 4.9221 (4.9523)	Top-1 acc 14.453 (14.296)	Top-5 acc 34.375 (32.230)	lr 0.03796
Warmup Train [14][200/3239]	Time 0.280 (0.299)	Data 0.001 (0.057)	Loss 4.7783 (4.9496)	Top-1 acc 14.062 (14.294)	Top-5 acc 35.938 (32.251)	lr 0.03796
Warmup Train [14][210/3239]	Time 0.156 (0.295)	Data 0.002 (0.054)	Loss 4.9082 (4.9465)	Top-1 acc 17.188 (14.320)	Top-5 acc 32.031 (32.277)	lr 0.03795
Warmup Train [14][220/3239]	Time 0.259 (0.292)	Data 0.001 (0.052)	Loss 5.0335 (4.9463)	Top-1 acc 14.844 (14.367)	Top-5 acc 28.125 (32.302)	lr 0.03795
Warmup Train [14][230/3239]	Time 0.302 (0.289)	Data 0.001 (0.050)	Loss 4.9761 (4.9460)	Top-1 acc 16.406 (14.330)	Top-5 acc 33.203 (32.356)	lr 0.03794
Warmup Train [14][240/3239]	Time 0.197 (0.286)	Data 0.002 (0.048)	Loss 5.0060 (4.9466)	Top-1 acc 14.062 (14.349)	Top-5 acc 31.250 (32.321)	lr 0.03794
Warmup Train [14][250/3239]	Time 0.213 (0.283)	Data 0.001 (0.046)	Loss 4.8350 (4.9445)	Top-1 acc 16.406 (14.383)	Top-5 acc 35.547 (32.434)	lr 0.03793
Warmup Train [14][260/3239]	Time 0.188 (0.281)	Data 0.001 (0.044)	Loss 5.0320 (4.9434)	Top-1 acc 12.109 (14.399)	Top-5 acc 27.734 (32.450)	lr 0.03793
Warmup Train [14][270/3239]	Time 0.287 (0.279)	Data 0.001 (0.043)	Loss 5.0564 (4.9434)	Top-1 acc 13.281 (14.368)	Top-5 acc 32.422 (32.459)	lr 0.03792
Warmup Train [14][280/3239]	Time 0.192 (0.276)	Data 0.001 (0.041)	Loss 4.7727 (4.9442)	Top-1 acc 17.969 (14.324)	Top-5 acc 36.328 (32.384)	lr 0.03792
Warmup Train [14][290/3239]	Time 0.226 (0.274)	Data 0.037 (0.040)	Loss 4.8837 (4.9436)	Top-1 acc 14.844 (14.316)	Top-5 acc 30.469 (32.352)	lr 0.03791
Warmup Train [14][300/3239]	Time 0.321 (0.273)	Data 0.001 (0.039)	Loss 4.9003 (4.9454)	Top-1 acc 15.234 (14.283)	Top-5 acc 30.078 (32.278)	lr 0.03791
Warmup Train [14][310/3239]	Time 0.192 (0.271)	Data 0.001 (0.037)	Loss 4.9013 (4.9461)	Top-1 acc 15.625 (14.299)	Top-5 acc 32.812 (32.264)	lr 0.03790
Warmup Train [14][320/3239]	Time 0.209 (0.269)	Data 0.001 (0.037)	Loss 5.1643 (4.9461)	Top-1 acc 9.766 (14.299)	Top-5 acc 28.516 (32.264)	lr 0.03790
Warmup Train [14][330/3239]	Time 0.249 (0.268)	Data 0.001 (0.036)	Loss 4.9225 (4.9451)	Top-1 acc 14.453 (14.352)	Top-5 acc 34.375 (32.299)	lr 0.03789
Warmup Train [14][340/3239]	Time 0.268 (0.267)	Data 0.001 (0.035)	Loss 4.7631 (4.9452)	Top-1 acc 14.062 (14.326)	Top-5 acc 36.328 (32.308)	lr 0.03789
Warmup Train [14][350/3239]	Time 0.147 (0.265)	Data 0.002 (0.034)	Loss 5.0285 (4.9453)	Top-1 acc 14.453 (14.343)	Top-5 acc 32.031 (32.334)	lr 0.03788
Warmup Train [14][360/3239]	Time 0.224 (0.264)	Data 0.001 (0.033)	Loss 4.8894 (4.9454)	Top-1 acc 14.844 (14.340)	Top-5 acc 33.984 (32.339)	lr 0.03788
Warmup Train [14][370/3239]	Time 0.251 (0.263)	Data 0.002 (0.032)	Loss 5.0855 (4.9457)	Top-1 acc 13.672 (14.352)	Top-5 acc 28.125 (32.286)	lr 0.03787
Warmup Train [14][380/3239]	Time 0.204 (0.262)	Data 0.001 (0.032)	Loss 4.8553 (4.9442)	Top-1 acc 14.844 (14.366)	Top-5 acc 35.938 (32.336)	lr 0.03787
Warmup Train [14][390/3239]	Time 0.163 (0.261)	Data 0.001 (0.031)	Loss 4.9833 (4.9435)	Top-1 acc 10.547 (14.369)	Top-5 acc 32.031 (32.351)	lr 0.03786
Warmup Train [14][400/3239]	Time 0.331 (0.260)	Data 0.001 (0.030)	Loss 4.8994 (4.9425)	Top-1 acc 11.719 (14.378)	Top-5 acc 33.594 (32.375)	lr 0.03786
Warmup Train [14][410/3239]	Time 0.240 (0.259)	Data 0.001 (0.030)	Loss 5.0156 (4.9421)	Top-1 acc 14.062 (14.376)	Top-5 acc 30.078 (32.377)	lr 0.03785
Warmup Train [14][420/3239]	Time 0.187 (0.258)	Data 0.001 (0.029)	Loss 5.0304 (4.9421)	Top-1 acc 15.234 (14.367)	Top-5 acc 32.422 (32.389)	lr 0.03784
Warmup Train [14][430/3239]	Time 0.242 (0.258)	Data 0.002 (0.028)	Loss 4.9735 (4.9429)	Top-1 acc 13.672 (14.386)	Top-5 acc 31.250 (32.395)	lr 0.03784
Warmup Train [14][440/3239]	Time 0.212 (0.257)	Data 0.001 (0.028)	Loss 4.9481 (4.9441)	Top-1 acc 12.109 (14.364)	Top-5 acc 32.422 (32.355)	lr 0.03783
Warmup Train [14][450/3239]	Time 0.287 (0.256)	Data 0.001 (0.027)	Loss 5.0203 (4.9442)	Top-1 acc 13.281 (14.352)	Top-5 acc 30.078 (32.346)	lr 0.03783
Warmup Train [14][460/3239]	Time 0.237 (0.255)	Data 0.002 (0.027)	Loss 5.0806 (4.9451)	Top-1 acc 12.891 (14.335)	Top-5 acc 28.906 (32.333)	lr 0.03782
Warmup Train [14][470/3239]	Time 0.182 (0.254)	Data 0.001 (0.026)	Loss 4.8565 (4.9446)	Top-1 acc 16.406 (14.374)	Top-5 acc 36.719 (32.351)	lr 0.03782
Warmup Train [14][480/3239]	Time 0.233 (0.253)	Data 0.002 (0.026)	Loss 4.7869 (4.9451)	Top-1 acc 18.750 (14.375)	Top-5 acc 35.547 (32.368)	lr 0.03781
Warmup Train [14][490/3239]	Time 0.185 (0.253)	Data 0.001 (0.025)	Loss 4.8766 (4.9444)	Top-1 acc 17.969 (14.389)	Top-5 acc 35.547 (32.395)	lr 0.03781
Warmup Train [14][500/3239]	Time 0.300 (0.252)	Data 0.001 (0.025)	Loss 5.0253 (4.9450)	Top-1 acc 11.719 (14.371)	Top-5 acc 32.031 (32.376)	lr 0.03780
Warmup Train [14][510/3239]	Time 0.242 (0.252)	Data 0.001 (0.024)	Loss 5.0184 (4.9458)	Top-1 acc 14.844 (14.371)	Top-5 acc 31.641 (32.371)	lr 0.03780
Warmup Train [14][520/3239]	Time 0.196 (0.251)	Data 0.001 (0.024)	Loss 4.9896 (4.9469)	Top-1 acc 14.453 (14.354)	Top-5 acc 28.516 (32.356)	lr 0.03779
Warmup Train [14][530/3239]	Time 0.279 (0.251)	Data 0.001 (0.024)	Loss 4.8301 (4.9466)	Top-1 acc 16.406 (14.356)	Top-5 acc 34.375 (32.363)	lr 0.03779
Warmup Train [14][540/3239]	Time 0.190 (0.250)	Data 0.001 (0.023)	Loss 4.9322 (4.9461)	Top-1 acc 16.406 (14.380)	Top-5 acc 32.031 (32.399)	lr 0.03778
Warmup Train [14][550/3239]	Time 0.195 (0.250)	Data 0.001 (0.023)	Loss 5.0766 (4.9466)	Top-1 acc 10.547 (14.378)	Top-5 acc 27.344 (32.397)	lr 0.03778
Warmup Train [14][560/3239]	Time 0.214 (0.249)	Data 0.003 (0.023)	Loss 5.0179 (4.9470)	Top-1 acc 14.062 (14.386)	Top-5 acc 30.078 (32.391)	lr 0.03777
Warmup Train [14][570/3239]	Time 0.230 (0.249)	Data 0.001 (0.022)	Loss 5.0198 (4.9461)	Top-1 acc 14.062 (14.389)	Top-5 acc 32.422 (32.414)	lr 0.03777
Warmup Train [14][580/3239]	Time 0.221 (0.248)	Data 0.002 (0.022)	Loss 5.1024 (4.9464)	Top-1 acc 13.281 (14.386)	Top-5 acc 28.125 (32.396)	lr 0.03776
Warmup Train [14][590/3239]	Time 0.269 (0.248)	Data 0.001 (0.021)	Loss 5.0601 (4.9459)	Top-1 acc 13.281 (14.396)	Top-5 acc 28.125 (32.412)	lr 0.03776
Warmup Train [14][600/3239]	Time 0.132 (0.247)	Data 0.001 (0.021)	Loss 4.9826 (4.9454)	Top-1 acc 14.453 (14.402)	Top-5 acc 30.859 (32.428)	lr 0.03775
Warmup Train [14][610/3239]	Time 0.271 (0.247)	Data 0.001 (0.021)	Loss 4.9781 (4.9451)	Top-1 acc 17.188 (14.406)	Top-5 acc 31.250 (32.423)	lr 0.03775
Warmup Train [14][620/3239]	Time 0.227 (0.246)	Data 0.001 (0.021)	Loss 5.0464 (4.9443)	Top-1 acc 13.281 (14.416)	Top-5 acc 28.906 (32.442)	lr 0.03774
Warmup Train [14][630/3239]	Time 0.183 (0.246)	Data 0.002 (0.020)	Loss 4.9299 (4.9446)	Top-1 acc 12.891 (14.423)	Top-5 acc 30.078 (32.448)	lr 0.03774
Warmup Train [14][640/3239]	Time 0.252 (0.245)	Data 0.001 (0.020)	Loss 5.1389 (4.9444)	Top-1 acc 11.328 (14.429)	Top-5 acc 27.734 (32.466)	lr 0.03773
Warmup Train [14][650/3239]	Time 0.157 (0.245)	Data 0.001 (0.020)	Loss 4.9617 (4.9442)	Top-1 acc 15.234 (14.433)	Top-5 acc 31.250 (32.479)	lr 0.03772
Warmup Train [14][660/3239]	Time 0.159 (0.245)	Data 0.001 (0.019)	Loss 4.8537 (4.9445)	Top-1 acc 11.328 (14.445)	Top-5 acc 33.203 (32.476)	lr 0.03772
Warmup Train [14][670/3239]	Time 0.262 (0.244)	Data 0.001 (0.019)	Loss 4.7873 (4.9444)	Top-1 acc 15.234 (14.448)	Top-5 acc 32.812 (32.475)	lr 0.03771
Warmup Train [14][680/3239]	Time 0.206 (0.244)	Data 0.001 (0.019)	Loss 4.9010 (4.9444)	Top-1 acc 15.234 (14.441)	Top-5 acc 32.812 (32.473)	lr 0.03771
Warmup Train [14][690/3239]	Time 0.244 (0.243)	Data 0.001 (0.019)	Loss 4.9105 (4.9435)	Top-1 acc 14.844 (14.449)	Top-5 acc 37.500 (32.489)	lr 0.03770
Warmup Train [14][700/3239]	Time 0.191 (0.243)	Data 0.001 (0.018)	Loss 4.9833 (4.9434)	Top-1 acc 12.500 (14.433)	Top-5 acc 32.812 (32.486)	lr 0.03770
Warmup Train [14][710/3239]	Time 0.289 (0.243)	Data 0.002 (0.018)	Loss 4.8567 (4.9428)	Top-1 acc 14.844 (14.439)	Top-5 acc 33.984 (32.507)	lr 0.03769
Warmup Train [14][720/3239]	Time 0.149 (0.243)	Data 0.001 (0.018)	Loss 4.9881 (4.9424)	Top-1 acc 13.672 (14.443)	Top-5 acc 33.594 (32.528)	lr 0.03769
Warmup Train [14][730/3239]	Time 0.185 (0.242)	Data 0.001 (0.018)	Loss 4.8306 (4.9424)	Top-1 acc 15.234 (14.442)	Top-5 acc 39.062 (32.537)	lr 0.03768
Warmup Train [14][740/3239]	Time 0.235 (0.242)	Data 0.001 (0.018)	Loss 4.9185 (4.9426)	Top-1 acc 14.062 (14.442)	Top-5 acc 29.297 (32.533)	lr 0.03768
Warmup Train [14][750/3239]	Time 0.191 (0.242)	Data 0.001 (0.018)	Loss 4.9633 (4.9425)	Top-1 acc 15.234 (14.437)	Top-5 acc 33.594 (32.533)	lr 0.03767
Warmup Train [14][760/3239]	Time 0.162 (0.242)	Data 0.002 (0.017)	Loss 4.9421 (4.9426)	Top-1 acc 14.844 (14.446)	Top-5 acc 30.859 (32.541)	lr 0.03767
Warmup Train [14][770/3239]	Time 0.240 (0.241)	Data 0.001 (0.017)	Loss 4.7915 (4.9422)	Top-1 acc 15.234 (14.450)	Top-5 acc 35.156 (32.543)	lr 0.03766
Warmup Train [14][780/3239]	Time 0.245 (0.241)	Data 0.001 (0.017)	Loss 5.0383 (4.9421)	Top-1 acc 11.719 (14.444)	Top-5 acc 28.125 (32.542)	lr 0.03766
Warmup Train [14][790/3239]	Time 0.227 (0.241)	Data 0.001 (0.017)	Loss 4.7570 (4.9418)	Top-1 acc 17.578 (14.462)	Top-5 acc 34.375 (32.553)	lr 0.03765
Warmup Train [14][800/3239]	Time 0.351 (0.241)	Data 0.001 (0.017)	Loss 4.9364 (4.9419)	Top-1 acc 13.672 (14.458)	Top-5 acc 33.594 (32.552)	lr 0.03765
Warmup Train [14][810/3239]	Time 0.200 (0.241)	Data 0.001 (0.017)	Loss 4.7855 (4.9412)	Top-1 acc 16.406 (14.470)	Top-5 acc 37.109 (32.570)	lr 0.03764
Warmup Train [14][820/3239]	Time 0.283 (0.240)	Data 0.001 (0.016)	Loss 4.8917 (4.9411)	Top-1 acc 16.406 (14.465)	Top-5 acc 34.375 (32.569)	lr 0.03764
Warmup Train [14][830/3239]	Time 0.173 (0.240)	Data 0.002 (0.016)	Loss 4.8572 (4.9414)	Top-1 acc 14.844 (14.458)	Top-5 acc 31.250 (32.553)	lr 0.03763
Warmup Train [14][840/3239]	Time 0.204 (0.240)	Data 0.001 (0.016)	Loss 4.8776 (4.9416)	Top-1 acc 15.234 (14.453)	Top-5 acc 34.766 (32.549)	lr 0.03763
Warmup Train [14][850/3239]	Time 0.245 (0.239)	Data 0.001 (0.016)	Loss 5.0251 (4.9419)	Top-1 acc 12.109 (14.437)	Top-5 acc 28.516 (32.542)	lr 0.03762
Warmup Train [14][860/3239]	Time 0.208 (0.239)	Data 0.001 (0.016)	Loss 4.9185 (4.9415)	Top-1 acc 13.672 (14.439)	Top-5 acc 29.688 (32.553)	lr 0.03762
Warmup Train [14][870/3239]	Time 0.237 (0.239)	Data 0.001 (0.016)	Loss 5.0285 (4.9420)	Top-1 acc 10.156 (14.426)	Top-5 acc 28.906 (32.546)	lr 0.03761
Warmup Train [14][880/3239]	Time 0.244 (0.239)	Data 0.003 (0.015)	Loss 5.0438 (4.9418)	Top-1 acc 14.453 (14.426)	Top-5 acc 30.078 (32.551)	lr 0.03760
Warmup Train [14][890/3239]	Time 0.225 (0.238)	Data 0.001 (0.015)	Loss 5.0014 (4.9419)	Top-1 acc 12.109 (14.420)	Top-5 acc 32.812 (32.550)	lr 0.03760
Warmup Train [14][900/3239]	Time 0.255 (0.238)	Data 0.001 (0.015)	Loss 5.0097 (4.9411)	Top-1 acc 13.672 (14.435)	Top-5 acc 30.078 (32.568)	lr 0.03759
Warmup Train [14][910/3239]	Time 0.224 (0.238)	Data 0.001 (0.015)	Loss 4.9001 (4.9409)	Top-1 acc 13.672 (14.455)	Top-5 acc 30.859 (32.571)	lr 0.03759
Warmup Train [14][920/3239]	Time 0.226 (0.238)	Data 0.001 (0.015)	Loss 4.9378 (4.9407)	Top-1 acc 16.016 (14.451)	Top-5 acc 33.203 (32.582)	lr 0.03758
Warmup Train [14][930/3239]	Time 0.224 (0.238)	Data 0.002 (0.015)	Loss 4.9552 (4.9406)	Top-1 acc 14.062 (14.451)	Top-5 acc 31.250 (32.590)	lr 0.03758
Warmup Train [14][940/3239]	Time 0.189 (0.237)	Data 0.001 (0.015)	Loss 5.0445 (4.9410)	Top-1 acc 15.234 (14.444)	Top-5 acc 33.984 (32.586)	lr 0.03757
Warmup Train [14][950/3239]	Time 0.155 (0.237)	Data 0.001 (0.014)	Loss 4.9523 (4.9410)	Top-1 acc 15.625 (14.452)	Top-5 acc 34.766 (32.588)	lr 0.03757
Warmup Train [14][960/3239]	Time 0.156 (0.237)	Data 0.001 (0.014)	Loss 4.8934 (4.9408)	Top-1 acc 14.062 (14.452)	Top-5 acc 35.547 (32.592)	lr 0.03756
Warmup Train [14][970/3239]	Time 0.193 (0.237)	Data 0.001 (0.014)	Loss 5.0185 (4.9409)	Top-1 acc 12.500 (14.456)	Top-5 acc 32.031 (32.588)	lr 0.03756
Warmup Train [14][980/3239]	Time 0.210 (0.237)	Data 0.002 (0.014)	Loss 4.8201 (4.9407)	Top-1 acc 15.234 (14.462)	Top-5 acc 34.766 (32.602)	lr 0.03755
Warmup Train [14][990/3239]	Time 0.163 (0.237)	Data 0.001 (0.014)	Loss 4.9382 (4.9405)	Top-1 acc 15.625 (14.465)	Top-5 acc 30.469 (32.603)	lr 0.03755
Warmup Train [14][1000/3239]	Time 0.277 (0.237)	Data 0.001 (0.014)	Loss 5.0187 (4.9406)	Top-1 acc 13.672 (14.466)	Top-5 acc 30.078 (32.596)	lr 0.03754
Warmup Train [14][1010/3239]	Time 0.245 (0.237)	Data 0.001 (0.014)	Loss 4.8586 (4.9408)	Top-1 acc 16.016 (14.457)	Top-5 acc 33.594 (32.586)	lr 0.03754
Warmup Train [14][1020/3239]	Time 0.189 (0.236)	Data 0.001 (0.014)	Loss 4.9994 (4.9407)	Top-1 acc 12.500 (14.456)	Top-5 acc 30.469 (32.591)	lr 0.03753
Warmup Train [14][1030/3239]	Time 0.191 (0.236)	Data 0.002 (0.014)	Loss 4.8546 (4.9405)	Top-1 acc 14.453 (14.456)	Top-5 acc 34.766 (32.603)	lr 0.03753
Warmup Train [14][1040/3239]	Time 0.149 (0.236)	Data 0.001 (0.013)	Loss 4.9613 (4.9405)	Top-1 acc 19.141 (14.465)	Top-5 acc 33.203 (32.601)	lr 0.03752
Warmup Train [14][1050/3239]	Time 0.233 (0.236)	Data 0.001 (0.013)	Loss 5.1645 (4.9406)	Top-1 acc 14.844 (14.471)	Top-5 acc 27.734 (32.600)	lr 0.03752
Warmup Train [14][1060/3239]	Time 0.212 (0.236)	Data 0.001 (0.013)	Loss 4.7722 (4.9408)	Top-1 acc 15.625 (14.472)	Top-5 acc 34.375 (32.596)	lr 0.03751
Warmup Train [14][1070/3239]	Time 0.225 (0.236)	Data 0.001 (0.013)	Loss 4.9092 (4.9411)	Top-1 acc 12.500 (14.467)	Top-5 acc 32.031 (32.582)	lr 0.03751
Warmup Train [14][1080/3239]	Time 0.208 (0.236)	Data 0.001 (0.013)	Loss 4.9104 (4.9411)	Top-1 acc 12.891 (14.460)	Top-5 acc 36.328 (32.588)	lr 0.03750
Warmup Train [14][1090/3239]	Time 0.269 (0.236)	Data 0.001 (0.013)	Loss 4.9254 (4.9410)	Top-1 acc 12.500 (14.470)	Top-5 acc 29.688 (32.589)	lr 0.03749
Warmup Train [14][1100/3239]	Time 0.156 (0.235)	Data 0.001 (0.013)	Loss 4.9406 (4.9406)	Top-1 acc 12.891 (14.472)	Top-5 acc 35.547 (32.601)	lr 0.03749
Warmup Train [14][1110/3239]	Time 0.252 (0.235)	Data 0.001 (0.013)	Loss 5.0297 (4.9410)	Top-1 acc 10.156 (14.470)	Top-5 acc 27.344 (32.593)	lr 0.03748
Warmup Train [14][1120/3239]	Time 0.186 (0.235)	Data 0.001 (0.013)	Loss 4.7512 (4.9413)	Top-1 acc 15.625 (14.459)	Top-5 acc 36.719 (32.583)	lr 0.03748
Warmup Train [14][1130/3239]	Time 0.143 (0.235)	Data 0.001 (0.013)	Loss 4.9330 (4.9411)	Top-1 acc 14.062 (14.475)	Top-5 acc 33.984 (32.597)	lr 0.03747
Warmup Train [14][1140/3239]	Time 0.273 (0.235)	Data 0.001 (0.013)	Loss 4.8277 (4.9408)	Top-1 acc 14.062 (14.470)	Top-5 acc 36.328 (32.599)	lr 0.03747
Warmup Train [14][1150/3239]	Time 0.261 (0.235)	Data 0.001 (0.013)	Loss 4.9496 (4.9407)	Top-1 acc 13.281 (14.475)	Top-5 acc 32.422 (32.607)	lr 0.03746
Warmup Train [14][1160/3239]	Time 0.219 (0.235)	Data 0.001 (0.012)	Loss 4.8688 (4.9407)	Top-1 acc 15.234 (14.477)	Top-5 acc 34.766 (32.603)	lr 0.03746
Warmup Train [14][1170/3239]	Time 0.238 (0.234)	Data 0.001 (0.012)	Loss 4.9217 (4.9408)	Top-1 acc 13.281 (14.472)	Top-5 acc 32.031 (32.593)	lr 0.03745
Warmup Train [14][1180/3239]	Time 0.210 (0.234)	Data 0.001 (0.012)	Loss 4.9985 (4.9410)	Top-1 acc 12.891 (14.468)	Top-5 acc 30.859 (32.595)	lr 0.03745
Warmup Train [14][1190/3239]	Time 0.352 (0.234)	Data 0.001 (0.012)	Loss 4.8282 (4.9405)	Top-1 acc 15.234 (14.476)	Top-5 acc 33.203 (32.606)	lr 0.03744
Warmup Train [14][1200/3239]	Time 0.278 (0.234)	Data 0.001 (0.012)	Loss 4.8007 (4.9405)	Top-1 acc 16.406 (14.478)	Top-5 acc 35.938 (32.615)	lr 0.03744
Warmup Train [14][1210/3239]	Time 0.205 (0.234)	Data 0.001 (0.012)	Loss 5.0676 (4.9403)	Top-1 acc 13.672 (14.476)	Top-5 acc 27.344 (32.618)	lr 0.03743
Warmup Train [14][1220/3239]	Time 0.178 (0.234)	Data 0.001 (0.012)	Loss 4.9494 (4.9404)	Top-1 acc 11.328 (14.463)	Top-5 acc 30.469 (32.603)	lr 0.03743
Warmup Train [14][1230/3239]	Time 0.159 (0.234)	Data 0.001 (0.012)	Loss 4.8502 (4.9403)	Top-1 acc 15.234 (14.461)	Top-5 acc 36.719 (32.605)	lr 0.03742
Warmup Train [14][1240/3239]	Time 0.144 (0.234)	Data 0.001 (0.012)	Loss 4.9016 (4.9397)	Top-1 acc 13.672 (14.467)	Top-5 acc 30.469 (32.610)	lr 0.03742
Warmup Train [14][1250/3239]	Time 0.236 (0.234)	Data 0.001 (0.012)	Loss 4.7539 (4.9398)	Top-1 acc 18.359 (14.464)	Top-5 acc 38.672 (32.615)	lr 0.03741
Warmup Train [14][1260/3239]	Time 0.212 (0.233)	Data 0.001 (0.012)	Loss 5.1577 (4.9399)	Top-1 acc 14.844 (14.465)	Top-5 acc 33.203 (32.605)	lr 0.03741
Warmup Train [14][1270/3239]	Time 0.168 (0.233)	Data 0.001 (0.012)	Loss 4.8755 (4.9398)	Top-1 acc 15.625 (14.458)	Top-5 acc 32.422 (32.607)	lr 0.03740
Warmup Train [14][1280/3239]	Time 0.238 (0.233)	Data 0.001 (0.012)	Loss 4.9688 (4.9396)	Top-1 acc 13.672 (14.460)	Top-5 acc 32.031 (32.605)	lr 0.03739
Warmup Train [14][1290/3239]	Time 0.200 (0.233)	Data 0.001 (0.011)	Loss 4.8648 (4.9396)	Top-1 acc 17.188 (14.466)	Top-5 acc 33.203 (32.605)	lr 0.03739
Warmup Train [14][1300/3239]	Time 0.219 (0.233)	Data 0.005 (0.011)	Loss 4.8068 (4.9392)	Top-1 acc 15.234 (14.474)	Top-5 acc 33.984 (32.619)	lr 0.03738
Warmup Train [14][1310/3239]	Time 0.361 (0.233)	Data 0.002 (0.011)	Loss 4.9383 (4.9390)	Top-1 acc 14.453 (14.484)	Top-5 acc 33.203 (32.633)	lr 0.03738
Warmup Train [14][1320/3239]	Time 0.280 (0.233)	Data 0.001 (0.011)	Loss 4.8651 (4.9386)	Top-1 acc 15.234 (14.480)	Top-5 acc 34.766 (32.647)	lr 0.03737
Warmup Train [14][1330/3239]	Time 0.204 (0.233)	Data 0.002 (0.011)	Loss 4.9186 (4.9384)	Top-1 acc 14.453 (14.490)	Top-5 acc 33.203 (32.648)	lr 0.03737
Warmup Train [14][1340/3239]	Time 0.205 (0.233)	Data 0.001 (0.011)	Loss 4.9387 (4.9386)	Top-1 acc 14.062 (14.491)	Top-5 acc 32.422 (32.644)	lr 0.03736
Warmup Train [14][1350/3239]	Time 0.185 (0.233)	Data 0.001 (0.011)	Loss 5.0997 (4.9386)	Top-1 acc 12.500 (14.493)	Top-5 acc 29.297 (32.641)	lr 0.03736
Warmup Train [14][1360/3239]	Time 0.200 (0.233)	Data 0.001 (0.011)	Loss 4.9615 (4.9383)	Top-1 acc 13.672 (14.497)	Top-5 acc 33.594 (32.647)	lr 0.03735
Warmup Train [14][1370/3239]	Time 0.242 (0.232)	Data 0.001 (0.011)	Loss 5.0927 (4.9384)	Top-1 acc 10.938 (14.495)	Top-5 acc 27.734 (32.646)	lr 0.03735
Warmup Train [14][1380/3239]	Time 0.189 (0.232)	Data 0.001 (0.011)	Loss 4.8674 (4.9383)	Top-1 acc 16.406 (14.492)	Top-5 acc 32.812 (32.648)	lr 0.03734
Warmup Train [14][1390/3239]	Time 0.138 (0.232)	Data 0.001 (0.011)	Loss 4.9307 (4.9385)	Top-1 acc 11.719 (14.490)	Top-5 acc 32.812 (32.647)	lr 0.03734
Warmup Train [14][1400/3239]	Time 0.186 (0.232)	Data 0.001 (0.011)	Loss 4.8764 (4.9385)	Top-1 acc 15.625 (14.489)	Top-5 acc 35.156 (32.650)	lr 0.03733
Warmup Train [14][1410/3239]	Time 0.196 (0.232)	Data 0.001 (0.011)	Loss 4.8516 (4.9383)	Top-1 acc 11.719 (14.491)	Top-5 acc 32.812 (32.651)	lr 0.03733
Warmup Train [14][1420/3239]	Time 0.284 (0.232)	Data 0.002 (0.011)	Loss 4.8884 (4.9381)	Top-1 acc 17.188 (14.491)	Top-5 acc 35.938 (32.647)	lr 0.03732
Warmup Train [14][1430/3239]	Time 0.209 (0.232)	Data 0.001 (0.011)	Loss 4.9629 (4.9380)	Top-1 acc 11.719 (14.489)	Top-5 acc 30.078 (32.651)	lr 0.03732
Warmup Train [14][1440/3239]	Time 0.198 (0.232)	Data 0.001 (0.011)	Loss 4.7289 (4.9379)	Top-1 acc 17.969 (14.489)	Top-5 acc 38.281 (32.653)	lr 0.03731
Warmup Train [14][1450/3239]	Time 0.134 (0.232)	Data 0.001 (0.011)	Loss 5.1097 (4.9377)	Top-1 acc 15.234 (14.492)	Top-5 acc 32.031 (32.657)	lr 0.03731
Warmup Train [14][1460/3239]	Time 0.220 (0.232)	Data 0.001 (0.010)	Loss 4.9952 (4.9380)	Top-1 acc 16.406 (14.491)	Top-5 acc 33.594 (32.657)	lr 0.03730
Warmup Train [14][1470/3239]	Time 0.186 (0.232)	Data 0.001 (0.010)	Loss 4.9673 (4.9379)	Top-1 acc 15.625 (14.491)	Top-5 acc 33.203 (32.656)	lr 0.03729
Warmup Train [14][1480/3239]	Time 0.238 (0.232)	Data 0.001 (0.010)	Loss 4.9833 (4.9378)	Top-1 acc 16.797 (14.492)	Top-5 acc 30.078 (32.658)	lr 0.03729
Warmup Train [14][1490/3239]	Time 0.253 (0.232)	Data 0.001 (0.010)	Loss 4.9292 (4.9382)	Top-1 acc 15.234 (14.484)	Top-5 acc 32.812 (32.647)	lr 0.03728
Warmup Train [14][1500/3239]	Time 0.150 (0.232)	Data 0.001 (0.010)	Loss 5.0184 (4.9386)	Top-1 acc 13.672 (14.478)	Top-5 acc 32.031 (32.646)	lr 0.03728
Warmup Train [14][1510/3239]	Time 0.180 (0.231)	Data 0.003 (0.010)	Loss 4.8983 (4.9385)	Top-1 acc 13.672 (14.482)	Top-5 acc 32.812 (32.645)	lr 0.03727
Warmup Train [14][1520/3239]	Time 0.176 (0.231)	Data 0.001 (0.010)	Loss 4.8858 (4.9383)	Top-1 acc 15.234 (14.484)	Top-5 acc 32.812 (32.649)	lr 0.03727
Warmup Train [14][1530/3239]	Time 0.305 (0.231)	Data 0.001 (0.010)	Loss 4.6653 (4.9384)	Top-1 acc 15.625 (14.473)	Top-5 acc 40.234 (32.651)	lr 0.03726
Warmup Train [14][1540/3239]	Time 0.206 (0.231)	Data 0.001 (0.010)	Loss 4.8925 (4.9381)	Top-1 acc 15.234 (14.475)	Top-5 acc 33.203 (32.656)	lr 0.03726
Warmup Train [14][1550/3239]	Time 0.296 (0.231)	Data 0.001 (0.010)	Loss 4.9315 (4.9384)	Top-1 acc 18.750 (14.471)	Top-5 acc 32.031 (32.651)	lr 0.03725
Warmup Train [14][1560/3239]	Time 0.205 (0.231)	Data 0.001 (0.010)	Loss 4.7401 (4.9384)	Top-1 acc 18.359 (14.472)	Top-5 acc 39.062 (32.653)	lr 0.03725
Warmup Train [14][1570/3239]	Time 0.209 (0.231)	Data 0.001 (0.010)	Loss 4.8363 (4.9381)	Top-1 acc 14.062 (14.474)	Top-5 acc 33.203 (32.653)	lr 0.03724
Warmup Train [14][1580/3239]	Time 0.202 (0.231)	Data 0.002 (0.010)	Loss 4.7761 (4.9380)	Top-1 acc 13.281 (14.479)	Top-5 acc 34.766 (32.648)	lr 0.03724
Warmup Train [14][1590/3239]	Time 0.244 (0.231)	Data 0.001 (0.010)	Loss 4.7879 (4.9379)	Top-1 acc 16.016 (14.485)	Top-5 acc 32.031 (32.650)	lr 0.03723
Warmup Train [14][1600/3239]	Time 0.190 (0.231)	Data 0.001 (0.010)	Loss 4.7878 (4.9377)	Top-1 acc 14.844 (14.484)	Top-5 acc 41.016 (32.659)	lr 0.03723
Warmup Train [14][1610/3239]	Time 0.165 (0.231)	Data 0.001 (0.010)	Loss 5.0553 (4.9379)	Top-1 acc 12.891 (14.483)	Top-5 acc 31.641 (32.655)	lr 0.03722
Warmup Train [14][1620/3239]	Time 0.170 (0.231)	Data 0.001 (0.010)	Loss 5.0620 (4.9376)	Top-1 acc 12.500 (14.488)	Top-5 acc 29.297 (32.669)	lr 0.03722
Warmup Train [14][1630/3239]	Time 0.226 (0.231)	Data 0.001 (0.010)	Loss 5.0644 (4.9375)	Top-1 acc 11.719 (14.489)	Top-5 acc 29.297 (32.665)	lr 0.03721
Warmup Train [14][1640/3239]	Time 0.317 (0.231)	Data 0.001 (0.010)	Loss 4.7863 (4.9374)	Top-1 acc 17.578 (14.495)	Top-5 acc 40.625 (32.669)	lr 0.03720
Warmup Train [14][1650/3239]	Time 0.261 (0.231)	Data 0.002 (0.010)	Loss 4.7897 (4.9373)	Top-1 acc 17.188 (14.495)	Top-5 acc 38.281 (32.670)	lr 0.03720
Warmup Train [14][1660/3239]	Time 0.148 (0.231)	Data 0.001 (0.010)	Loss 5.0106 (4.9374)	Top-1 acc 14.844 (14.499)	Top-5 acc 36.328 (32.668)	lr 0.03719
Warmup Train [14][1670/3239]	Time 0.231 (0.231)	Data 0.001 (0.010)	Loss 5.0626 (4.9375)	Top-1 acc 11.328 (14.495)	Top-5 acc 31.250 (32.663)	lr 0.03719
Warmup Train [14][1680/3239]	Time 0.132 (0.231)	Data 0.001 (0.010)	Loss 4.9141 (4.9374)	Top-1 acc 15.625 (14.494)	Top-5 acc 31.250 (32.663)	lr 0.03718
Warmup Train [14][1690/3239]	Time 0.194 (0.231)	Data 0.001 (0.010)	Loss 5.0358 (4.9373)	Top-1 acc 13.672 (14.491)	Top-5 acc 29.688 (32.662)	lr 0.03718
Warmup Train [14][1700/3239]	Time 0.171 (0.231)	Data 0.001 (0.009)	Loss 4.8013 (4.9373)	Top-1 acc 13.281 (14.490)	Top-5 acc 39.844 (32.666)	lr 0.03717
Warmup Train [14][1710/3239]	Time 0.235 (0.231)	Data 0.001 (0.009)	Loss 4.8635 (4.9373)	Top-1 acc 14.453 (14.487)	Top-5 acc 35.547 (32.660)	lr 0.03717
Warmup Train [14][1720/3239]	Time 0.169 (0.231)	Data 0.001 (0.009)	Loss 4.9402 (4.9371)	Top-1 acc 14.844 (14.496)	Top-5 acc 34.766 (32.671)	lr 0.03716
Warmup Train [14][1730/3239]	Time 0.248 (0.231)	Data 0.001 (0.009)	Loss 4.8785 (4.9369)	Top-1 acc 17.188 (14.498)	Top-5 acc 30.469 (32.670)	lr 0.03716
Warmup Train [14][1740/3239]	Time 0.331 (0.231)	Data 0.001 (0.009)	Loss 5.0401 (4.9367)	Top-1 acc 15.234 (14.498)	Top-5 acc 32.031 (32.676)	lr 0.03715
Warmup Train [14][1750/3239]	Time 0.245 (0.231)	Data 0.001 (0.009)	Loss 5.0011 (4.9368)	Top-1 acc 13.672 (14.497)	Top-5 acc 31.641 (32.673)	lr 0.03715
Warmup Train [14][1760/3239]	Time 0.147 (0.231)	Data 0.002 (0.009)	Loss 4.8377 (4.9367)	Top-1 acc 16.797 (14.500)	Top-5 acc 37.109 (32.676)	lr 0.03714
Warmup Train [14][1770/3239]	Time 0.168 (0.230)	Data 0.001 (0.009)	Loss 4.9111 (4.9363)	Top-1 acc 16.406 (14.506)	Top-5 acc 33.203 (32.687)	lr 0.03714
Warmup Train [14][1780/3239]	Time 0.220 (0.230)	Data 0.001 (0.009)	Loss 4.9469 (4.9361)	Top-1 acc 14.844 (14.508)	Top-5 acc 33.594 (32.691)	lr 0.03713
Warmup Train [14][1790/3239]	Time 0.243 (0.230)	Data 0.001 (0.009)	Loss 4.9710 (4.9361)	Top-1 acc 12.109 (14.509)	Top-5 acc 30.078 (32.691)	lr 0.03713
Warmup Train [14][1800/3239]	Time 0.220 (0.230)	Data 0.002 (0.009)	Loss 4.7354 (4.9358)	Top-1 acc 15.625 (14.511)	Top-5 acc 35.938 (32.703)	lr 0.03712
Warmup Train [14][1810/3239]	Time 0.212 (0.230)	Data 0.001 (0.009)	Loss 4.9638 (4.9357)	Top-1 acc 15.625 (14.509)	Top-5 acc 32.812 (32.704)	lr 0.03711
Warmup Train [14][1820/3239]	Time 0.225 (0.230)	Data 0.001 (0.009)	Loss 5.0895 (4.9357)	Top-1 acc 12.891 (14.508)	Top-5 acc 32.422 (32.706)	lr 0.03711
Warmup Train [14][1830/3239]	Time 0.179 (0.230)	Data 0.001 (0.009)	Loss 5.0285 (4.9358)	Top-1 acc 12.500 (14.508)	Top-5 acc 32.031 (32.702)	lr 0.03710
Warmup Train [14][1840/3239]	Time 0.324 (0.230)	Data 0.001 (0.009)	Loss 4.8454 (4.9360)	Top-1 acc 13.281 (14.502)	Top-5 acc 34.375 (32.701)	lr 0.03710
Warmup Train [14][1850/3239]	Time 0.138 (0.230)	Data 0.001 (0.009)	Loss 4.9313 (4.9358)	Top-1 acc 14.844 (14.506)	Top-5 acc 33.203 (32.705)	lr 0.03709
Warmup Train [14][1860/3239]	Time 0.206 (0.230)	Data 0.001 (0.009)	Loss 4.9535 (4.9358)	Top-1 acc 13.281 (14.503)	Top-5 acc 32.031 (32.707)	lr 0.03709
Warmup Train [14][1870/3239]	Time 0.184 (0.230)	Data 0.001 (0.009)	Loss 4.9764 (4.9360)	Top-1 acc 16.797 (14.503)	Top-5 acc 35.547 (32.701)	lr 0.03708
Warmup Train [14][1880/3239]	Time 0.237 (0.230)	Data 0.001 (0.009)	Loss 5.0277 (4.9359)	Top-1 acc 11.719 (14.503)	Top-5 acc 32.422 (32.706)	lr 0.03708
Warmup Train [14][1890/3239]	Time 0.234 (0.230)	Data 0.001 (0.009)	Loss 4.9385 (4.9355)	Top-1 acc 13.672 (14.507)	Top-5 acc 30.859 (32.711)	lr 0.03707
Warmup Train [14][1900/3239]	Time 0.254 (0.230)	Data 0.001 (0.009)	Loss 4.9621 (4.9352)	Top-1 acc 11.719 (14.508)	Top-5 acc 30.469 (32.718)	lr 0.03707
Warmup Train [14][1910/3239]	Time 0.136 (0.230)	Data 0.001 (0.009)	Loss 5.0349 (4.9352)	Top-1 acc 12.891 (14.506)	Top-5 acc 31.641 (32.723)	lr 0.03706
Warmup Train [14][1920/3239]	Time 0.267 (0.230)	Data 0.001 (0.009)	Loss 4.8805 (4.9349)	Top-1 acc 14.062 (14.513)	Top-5 acc 36.719 (32.732)	lr 0.03706
Warmup Train [14][1930/3239]	Time 0.201 (0.230)	Data 0.002 (0.009)	Loss 4.8915 (4.9346)	Top-1 acc 14.062 (14.513)	Top-5 acc 30.469 (32.740)	lr 0.03705
Warmup Train [14][1940/3239]	Time 0.158 (0.230)	Data 0.001 (0.009)	Loss 4.8899 (4.9343)	Top-1 acc 17.578 (14.520)	Top-5 acc 32.031 (32.750)	lr 0.03705
Warmup Train [14][1950/3239]	Time 0.217 (0.230)	Data 0.001 (0.009)	Loss 4.9374 (4.9345)	Top-1 acc 12.500 (14.515)	Top-5 acc 32.422 (32.745)	lr 0.03704
Warmup Train [14][1960/3239]	Time 0.312 (0.230)	Data 0.001 (0.009)	Loss 5.0765 (4.9346)	Top-1 acc 12.500 (14.517)	Top-5 acc 24.219 (32.744)	lr 0.03704
Warmup Train [14][1970/3239]	Time 0.232 (0.230)	Data 0.001 (0.009)	Loss 4.8833 (4.9346)	Top-1 acc 13.281 (14.513)	Top-5 acc 32.031 (32.744)	lr 0.03703
Warmup Train [14][1980/3239]	Time 0.189 (0.230)	Data 0.001 (0.009)	Loss 4.9319 (4.9344)	Top-1 acc 13.672 (14.515)	Top-5 acc 29.688 (32.749)	lr 0.03702
Warmup Train [14][1990/3239]	Time 0.277 (0.230)	Data 0.001 (0.009)	Loss 4.9045 (4.9342)	Top-1 acc 14.453 (14.513)	Top-5 acc 34.375 (32.756)	lr 0.03702
Warmup Train [14][2000/3239]	Time 0.200 (0.230)	Data 0.001 (0.009)	Loss 4.9511 (4.9341)	Top-1 acc 13.281 (14.513)	Top-5 acc 30.078 (32.760)	lr 0.03701
Warmup Train [14][2010/3239]	Time 0.250 (0.230)	Data 0.003 (0.009)	Loss 4.8417 (4.9344)	Top-1 acc 15.625 (14.510)	Top-5 acc 37.891 (32.753)	lr 0.03701
Warmup Train [14][2020/3239]	Time 0.186 (0.230)	Data 0.002 (0.009)	Loss 5.0875 (4.9341)	Top-1 acc 16.406 (14.517)	Top-5 acc 28.516 (32.757)	lr 0.03700
Warmup Train [14][2030/3239]	Time 0.199 (0.229)	Data 0.001 (0.009)	Loss 4.8677 (4.9340)	Top-1 acc 13.672 (14.516)	Top-5 acc 33.984 (32.758)	lr 0.03700
Warmup Train [14][2040/3239]	Time 0.187 (0.229)	Data 0.001 (0.009)	Loss 4.8384 (4.9339)	Top-1 acc 17.188 (14.515)	Top-5 acc 30.469 (32.758)	lr 0.03699
Warmup Train [14][2050/3239]	Time 0.225 (0.229)	Data 0.002 (0.008)	Loss 5.0767 (4.9338)	Top-1 acc 12.891 (14.518)	Top-5 acc 30.469 (32.761)	lr 0.03699
Warmup Train [14][2060/3239]	Time 0.363 (0.229)	Data 0.002 (0.008)	Loss 4.9236 (4.9337)	Top-1 acc 18.750 (14.523)	Top-5 acc 34.375 (32.762)	lr 0.03698
Warmup Train [14][2070/3239]	Time 0.135 (0.229)	Data 0.001 (0.008)	Loss 4.8305 (4.9332)	Top-1 acc 14.062 (14.528)	Top-5 acc 32.812 (32.771)	lr 0.03698
Warmup Train [14][2080/3239]	Time 0.195 (0.229)	Data 0.001 (0.008)	Loss 4.7703 (4.9330)	Top-1 acc 14.844 (14.530)	Top-5 acc 35.547 (32.770)	lr 0.03697
Warmup Train [14][2090/3239]	Time 0.157 (0.229)	Data 0.002 (0.008)	Loss 4.9728 (4.9331)	Top-1 acc 14.062 (14.533)	Top-5 acc 33.203 (32.769)	lr 0.03697
Warmup Train [14][2100/3239]	Time 0.260 (0.229)	Data 0.001 (0.008)	Loss 5.0693 (4.9333)	Top-1 acc 10.938 (14.529)	Top-5 acc 27.734 (32.763)	lr 0.03696
Warmup Train [14][2110/3239]	Time 0.237 (0.229)	Data 0.001 (0.008)	Loss 4.8908 (4.9330)	Top-1 acc 17.188 (14.535)	Top-5 acc 32.422 (32.771)	lr 0.03696
Warmup Train [14][2120/3239]	Time 0.173 (0.229)	Data 0.001 (0.008)	Loss 5.1441 (4.9329)	Top-1 acc 13.672 (14.538)	Top-5 acc 30.078 (32.772)	lr 0.03695
Warmup Train [14][2130/3239]	Time 0.174 (0.229)	Data 0.003 (0.008)	Loss 4.8481 (4.9329)	Top-1 acc 14.844 (14.541)	Top-5 acc 33.203 (32.773)	lr 0.03694
Warmup Train [14][2140/3239]	Time 0.238 (0.229)	Data 0.001 (0.008)	Loss 4.9098 (4.9328)	Top-1 acc 16.406 (14.547)	Top-5 acc 33.594 (32.775)	lr 0.03694
Warmup Train [14][2150/3239]	Time 0.266 (0.229)	Data 0.001 (0.008)	Loss 4.9095 (4.9325)	Top-1 acc 14.062 (14.554)	Top-5 acc 33.594 (32.783)	lr 0.03693
Warmup Train [14][2160/3239]	Time 0.207 (0.229)	Data 0.001 (0.008)	Loss 4.9774 (4.9322)	Top-1 acc 12.891 (14.554)	Top-5 acc 27.734 (32.789)	lr 0.03693
Warmup Train [14][2170/3239]	Time 0.285 (0.229)	Data 0.001 (0.008)	Loss 4.8468 (4.9321)	Top-1 acc 14.453 (14.552)	Top-5 acc 33.594 (32.791)	lr 0.03692
Warmup Train [14][2180/3239]	Time 0.395 (0.229)	Data 0.001 (0.008)	Loss 5.0301 (4.9320)	Top-1 acc 12.109 (14.557)	Top-5 acc 31.250 (32.798)	lr 0.03692
Warmup Train [14][2190/3239]	Time 0.229 (0.229)	Data 0.001 (0.008)	Loss 4.7576 (4.9317)	Top-1 acc 16.406 (14.560)	Top-5 acc 37.109 (32.804)	lr 0.03691
Warmup Train [14][2200/3239]	Time 0.200 (0.229)	Data 0.001 (0.008)	Loss 4.8754 (4.9315)	Top-1 acc 13.281 (14.559)	Top-5 acc 34.375 (32.807)	lr 0.03691
Warmup Train [14][2210/3239]	Time 0.264 (0.229)	Data 0.001 (0.008)	Loss 4.7611 (4.9313)	Top-1 acc 17.188 (14.559)	Top-5 acc 35.156 (32.809)	lr 0.03690
Warmup Train [14][2220/3239]	Time 0.194 (0.229)	Data 0.001 (0.008)	Loss 4.9925 (4.9313)	Top-1 acc 16.016 (14.558)	Top-5 acc 30.078 (32.809)	lr 0.03690
Warmup Train [14][2230/3239]	Time 0.206 (0.229)	Data 0.001 (0.008)	Loss 4.9324 (4.9312)	Top-1 acc 15.234 (14.564)	Top-5 acc 32.422 (32.811)	lr 0.03689
Warmup Train [14][2240/3239]	Time 0.220 (0.229)	Data 0.002 (0.008)	Loss 5.0002 (4.9310)	Top-1 acc 12.891 (14.569)	Top-5 acc 27.344 (32.813)	lr 0.03689
Warmup Train [14][2250/3239]	Time 0.252 (0.229)	Data 0.002 (0.008)	Loss 4.9763 (4.9308)	Top-1 acc 14.453 (14.573)	Top-5 acc 33.203 (32.816)	lr 0.03688
Warmup Train [14][2260/3239]	Time 0.253 (0.229)	Data 0.002 (0.008)	Loss 4.8880 (4.9307)	Top-1 acc 15.234 (14.571)	Top-5 acc 35.938 (32.824)	lr 0.03688
Warmup Train [14][2270/3239]	Time 0.216 (0.229)	Data 0.001 (0.008)	Loss 4.9224 (4.9306)	Top-1 acc 13.672 (14.570)	Top-5 acc 29.688 (32.823)	lr 0.03687
Warmup Train [14][2280/3239]	Time 0.267 (0.229)	Data 0.001 (0.008)	Loss 4.9736 (4.9306)	Top-1 acc 14.453 (14.572)	Top-5 acc 31.250 (32.824)	lr 0.03687
Warmup Train [14][2290/3239]	Time 0.219 (0.229)	Data 0.001 (0.008)	Loss 4.8598 (4.9305)	Top-1 acc 16.797 (14.579)	Top-5 acc 35.547 (32.826)	lr 0.03686
Warmup Train [14][2300/3239]	Time 0.240 (0.229)	Data 0.001 (0.008)	Loss 4.8430 (4.9302)	Top-1 acc 15.234 (14.584)	Top-5 acc 38.672 (32.837)	lr 0.03685
Warmup Train [14][2310/3239]	Time 0.200 (0.229)	Data 0.001 (0.008)	Loss 4.7989 (4.9301)	Top-1 acc 19.141 (14.589)	Top-5 acc 38.281 (32.843)	lr 0.03685
Warmup Train [14][2320/3239]	Time 0.267 (0.229)	Data 0.001 (0.008)	Loss 4.7258 (4.9298)	Top-1 acc 18.359 (14.589)	Top-5 acc 38.281 (32.848)	lr 0.03684
Warmup Train [14][2330/3239]	Time 0.200 (0.229)	Data 0.028 (0.008)	Loss 4.9173 (4.9298)	Top-1 acc 14.062 (14.592)	Top-5 acc 34.375 (32.849)	lr 0.03684
Warmup Train [14][2340/3239]	Time 0.238 (0.229)	Data 0.001 (0.008)	Loss 4.9430 (4.9296)	Top-1 acc 16.797 (14.595)	Top-5 acc 33.594 (32.853)	lr 0.03683
Warmup Train [14][2350/3239]	Time 0.259 (0.229)	Data 0.001 (0.008)	Loss 5.0507 (4.9294)	Top-1 acc 15.234 (14.602)	Top-5 acc 30.469 (32.857)	lr 0.03683
Warmup Train [14][2360/3239]	Time 0.201 (0.229)	Data 0.001 (0.008)	Loss 4.9309 (4.9294)	Top-1 acc 14.062 (14.598)	Top-5 acc 31.641 (32.858)	lr 0.03682
Warmup Train [14][2370/3239]	Time 0.241 (0.229)	Data 0.001 (0.008)	Loss 4.8704 (4.9292)	Top-1 acc 18.750 (14.604)	Top-5 acc 33.594 (32.862)	lr 0.03682
Warmup Train [14][2380/3239]	Time 0.243 (0.229)	Data 0.001 (0.008)	Loss 4.9362 (4.9292)	Top-1 acc 14.844 (14.605)	Top-5 acc 31.250 (32.862)	lr 0.03681
Warmup Train [14][2390/3239]	Time 0.255 (0.229)	Data 0.002 (0.008)	Loss 4.7399 (4.9293)	Top-1 acc 17.188 (14.604)	Top-5 acc 39.062 (32.859)	lr 0.03681
Warmup Train [14][2400/3239]	Time 0.250 (0.229)	Data 0.001 (0.008)	Loss 4.8257 (4.9293)	Top-1 acc 17.578 (14.601)	Top-5 acc 35.156 (32.858)	lr 0.03680
Warmup Train [14][2410/3239]	Time 0.179 (0.229)	Data 0.001 (0.008)	Loss 4.9868 (4.9292)	Top-1 acc 17.578 (14.609)	Top-5 acc 33.984 (32.862)	lr 0.03680
Warmup Train [14][2420/3239]	Time 0.271 (0.229)	Data 0.001 (0.008)	Loss 5.0434 (4.9290)	Top-1 acc 13.672 (14.616)	Top-5 acc 34.766 (32.871)	lr 0.03679
Warmup Train [14][2430/3239]	Time 0.189 (0.229)	Data 0.001 (0.008)	Loss 4.8175 (4.9290)	Top-1 acc 11.719 (14.611)	Top-5 acc 33.594 (32.870)	lr 0.03678
Warmup Train [14][2440/3239]	Time 0.189 (0.229)	Data 0.001 (0.008)	Loss 4.8603 (4.9288)	Top-1 acc 19.141 (14.618)	Top-5 acc 37.891 (32.877)	lr 0.03678
Warmup Train [14][2450/3239]	Time 0.234 (0.229)	Data 0.001 (0.008)	Loss 4.8957 (4.9286)	Top-1 acc 16.016 (14.623)	Top-5 acc 30.078 (32.882)	lr 0.03677
Warmup Train [14][2460/3239]	Time 0.230 (0.229)	Data 0.001 (0.007)	Loss 4.8312 (4.9282)	Top-1 acc 13.281 (14.629)	Top-5 acc 34.766 (32.892)	lr 0.03677
Warmup Train [14][2470/3239]	Time 0.214 (0.228)	Data 0.001 (0.007)	Loss 5.0077 (4.9280)	Top-1 acc 15.625 (14.633)	Top-5 acc 33.203 (32.896)	lr 0.03676
Warmup Train [14][2480/3239]	Time 0.265 (0.228)	Data 0.001 (0.007)	Loss 4.7595 (4.9277)	Top-1 acc 16.406 (14.637)	Top-5 acc 35.938 (32.901)	lr 0.03676
Warmup Train [14][2490/3239]	Time 0.193 (0.228)	Data 0.001 (0.007)	Loss 4.8342 (4.9275)	Top-1 acc 19.922 (14.644)	Top-5 acc 38.281 (32.909)	lr 0.03675
Warmup Train [14][2500/3239]	Time 0.245 (0.228)	Data 0.001 (0.007)	Loss 4.8632 (4.9274)	Top-1 acc 17.188 (14.646)	Top-5 acc 35.547 (32.911)	lr 0.03675
Warmup Train [14][2510/3239]	Time 0.204 (0.228)	Data 0.001 (0.007)	Loss 5.0825 (4.9275)	Top-1 acc 12.500 (14.641)	Top-5 acc 28.906 (32.905)	lr 0.03674
Warmup Train [14][2520/3239]	Time 0.221 (0.228)	Data 0.001 (0.007)	Loss 4.9294 (4.9272)	Top-1 acc 14.844 (14.645)	Top-5 acc 31.641 (32.912)	lr 0.03674
Warmup Train [14][2530/3239]	Time 0.182 (0.228)	Data 0.001 (0.007)	Loss 4.7575 (4.9268)	Top-1 acc 17.969 (14.653)	Top-5 acc 39.062 (32.924)	lr 0.03673
Warmup Train [14][2540/3239]	Time 0.295 (0.228)	Data 0.001 (0.007)	Loss 4.8616 (4.9266)	Top-1 acc 14.844 (14.656)	Top-5 acc 33.594 (32.930)	lr 0.03673
Warmup Train [14][2550/3239]	Time 0.256 (0.228)	Data 0.001 (0.007)	Loss 4.7994 (4.9264)	Top-1 acc 17.188 (14.658)	Top-5 acc 36.328 (32.934)	lr 0.03672
Warmup Train [14][2560/3239]	Time 0.180 (0.228)	Data 0.001 (0.007)	Loss 4.9294 (4.9264)	Top-1 acc 16.406 (14.659)	Top-5 acc 33.203 (32.937)	lr 0.03672
Warmup Train [14][2570/3239]	Time 0.339 (0.228)	Data 0.001 (0.007)	Loss 4.9228 (4.9265)	Top-1 acc 16.797 (14.660)	Top-5 acc 31.250 (32.938)	lr 0.03671
Warmup Train [14][2580/3239]	Time 0.228 (0.228)	Data 0.001 (0.007)	Loss 4.9997 (4.9264)	Top-1 acc 14.062 (14.656)	Top-5 acc 36.328 (32.938)	lr 0.03670
Warmup Train [14][2590/3239]	Time 0.204 (0.228)	Data 0.001 (0.007)	Loss 4.7091 (4.9263)	Top-1 acc 21.094 (14.660)	Top-5 acc 38.281 (32.944)	lr 0.03670
Warmup Train [14][2600/3239]	Time 0.140 (0.228)	Data 0.001 (0.007)	Loss 5.0408 (4.9262)	Top-1 acc 12.891 (14.661)	Top-5 acc 28.906 (32.939)	lr 0.03669
Warmup Train [14][2610/3239]	Time 0.246 (0.228)	Data 0.001 (0.007)	Loss 4.7756 (4.9262)	Top-1 acc 18.750 (14.663)	Top-5 acc 37.109 (32.946)	lr 0.03669
Warmup Train [14][2620/3239]	Time 0.181 (0.228)	Data 0.002 (0.007)	Loss 4.7547 (4.9260)	Top-1 acc 21.484 (14.664)	Top-5 acc 37.109 (32.948)	lr 0.03668
Warmup Train [14][2630/3239]	Time 0.136 (0.228)	Data 0.001 (0.007)	Loss 4.8011 (4.9257)	Top-1 acc 12.891 (14.666)	Top-5 acc 32.812 (32.950)	lr 0.03668
Warmup Train [14][2640/3239]	Time 0.173 (0.228)	Data 0.001 (0.007)	Loss 4.9450 (4.9257)	Top-1 acc 12.500 (14.665)	Top-5 acc 32.422 (32.952)	lr 0.03667
Warmup Train [14][2650/3239]	Time 0.208 (0.228)	Data 0.001 (0.007)	Loss 4.7997 (4.9256)	Top-1 acc 16.406 (14.662)	Top-5 acc 36.719 (32.954)	lr 0.03667
Warmup Train [14][2660/3239]	Time 0.153 (0.228)	Data 0.002 (0.007)	Loss 4.9014 (4.9253)	Top-1 acc 16.016 (14.666)	Top-5 acc 32.812 (32.960)	lr 0.03666
Warmup Train [14][2670/3239]	Time 0.241 (0.228)	Data 0.037 (0.007)	Loss 4.7534 (4.9252)	Top-1 acc 12.500 (14.667)	Top-5 acc 37.500 (32.964)	lr 0.03666
Warmup Train [14][2680/3239]	Time 0.271 (0.228)	Data 0.001 (0.007)	Loss 4.6998 (4.9251)	Top-1 acc 16.406 (14.667)	Top-5 acc 39.062 (32.969)	lr 0.03665
Warmup Train [14][2690/3239]	Time 0.293 (0.228)	Data 0.002 (0.007)	Loss 4.9017 (4.9250)	Top-1 acc 14.453 (14.669)	Top-5 acc 32.812 (32.967)	lr 0.03665
Warmup Train [14][2700/3239]	Time 0.195 (0.228)	Data 0.002 (0.007)	Loss 4.9457 (4.9250)	Top-1 acc 15.234 (14.672)	Top-5 acc 34.766 (32.970)	lr 0.03664
Warmup Train [14][2710/3239]	Time 0.230 (0.228)	Data 0.001 (0.007)	Loss 4.9860 (4.9247)	Top-1 acc 16.016 (14.677)	Top-5 acc 33.594 (32.976)	lr 0.03663
Warmup Train [14][2720/3239]	Time 0.265 (0.228)	Data 0.001 (0.007)	Loss 4.8747 (4.9247)	Top-1 acc 18.359 (14.679)	Top-5 acc 33.594 (32.976)	lr 0.03663
Warmup Train [14][2730/3239]	Time 0.220 (0.228)	Data 0.001 (0.007)	Loss 4.8172 (4.9243)	Top-1 acc 15.625 (14.685)	Top-5 acc 33.203 (32.982)	lr 0.03662
Warmup Train [14][2740/3239]	Time 0.142 (0.228)	Data 0.001 (0.007)	Loss 4.8757 (4.9242)	Top-1 acc 16.016 (14.688)	Top-5 acc 33.594 (32.988)	lr 0.03662
Warmup Train [14][2750/3239]	Time 0.194 (0.228)	Data 0.001 (0.007)	Loss 4.9210 (4.9240)	Top-1 acc 14.844 (14.686)	Top-5 acc 31.641 (32.991)	lr 0.03661
Warmup Train [14][2760/3239]	Time 0.308 (0.228)	Data 0.001 (0.007)	Loss 4.9463 (4.9238)	Top-1 acc 12.891 (14.689)	Top-5 acc 33.594 (32.993)	lr 0.03661
Warmup Train [14][2770/3239]	Time 0.288 (0.228)	Data 0.001 (0.007)	Loss 4.9745 (4.9235)	Top-1 acc 13.672 (14.698)	Top-5 acc 32.812 (33.000)	lr 0.03660
Warmup Train [14][2780/3239]	Time 0.230 (0.228)	Data 0.001 (0.007)	Loss 4.7702 (4.9236)	Top-1 acc 19.922 (14.701)	Top-5 acc 37.109 (33.001)	lr 0.03660
Warmup Train [14][2790/3239]	Time 0.378 (0.228)	Data 0.001 (0.007)	Loss 4.8760 (4.9237)	Top-1 acc 14.062 (14.700)	Top-5 acc 33.203 (32.999)	lr 0.03659
Warmup Train [14][2800/3239]	Time 0.332 (0.228)	Data 0.001 (0.007)	Loss 4.9008 (4.9237)	Top-1 acc 14.453 (14.699)	Top-5 acc 33.984 (33.002)	lr 0.03659
Warmup Train [14][2810/3239]	Time 0.261 (0.228)	Data 0.001 (0.007)	Loss 4.9827 (4.9236)	Top-1 acc 10.938 (14.698)	Top-5 acc 30.859 (33.003)	lr 0.03658
Warmup Train [14][2820/3239]	Time 0.241 (0.228)	Data 0.001 (0.007)	Loss 4.8910 (4.9233)	Top-1 acc 16.016 (14.703)	Top-5 acc 32.422 (33.007)	lr 0.03658
Warmup Train [14][2830/3239]	Time 0.154 (0.228)	Data 0.002 (0.007)	Loss 4.8895 (4.9233)	Top-1 acc 16.797 (14.703)	Top-5 acc 33.594 (33.007)	lr 0.03657
Warmup Train [14][2840/3239]	Time 0.265 (0.228)	Data 0.001 (0.007)	Loss 4.8258 (4.9231)	Top-1 acc 15.625 (14.708)	Top-5 acc 36.719 (33.014)	lr 0.03657
Warmup Train [14][2850/3239]	Time 0.194 (0.228)	Data 0.001 (0.007)	Loss 4.7594 (4.9228)	Top-1 acc 20.703 (14.716)	Top-5 acc 40.625 (33.021)	lr 0.03656
Warmup Train [14][2860/3239]	Time 0.171 (0.228)	Data 0.001 (0.007)	Loss 4.8968 (4.9227)	Top-1 acc 14.453 (14.719)	Top-5 acc 35.938 (33.024)	lr 0.03655
Warmup Train [14][2870/3239]	Time 0.278 (0.228)	Data 0.002 (0.007)	Loss 4.8661 (4.9225)	Top-1 acc 15.234 (14.723)	Top-5 acc 33.984 (33.029)	lr 0.03655
Warmup Train [14][2880/3239]	Time 0.168 (0.228)	Data 0.001 (0.007)	Loss 4.9248 (4.9223)	Top-1 acc 15.625 (14.725)	Top-5 acc 33.203 (33.032)	lr 0.03654
Warmup Train [14][2890/3239]	Time 0.184 (0.228)	Data 0.002 (0.007)	Loss 4.9461 (4.9221)	Top-1 acc 14.844 (14.728)	Top-5 acc 30.469 (33.038)	lr 0.03654
Warmup Train [14][2900/3239]	Time 0.215 (0.228)	Data 0.001 (0.007)	Loss 4.9450 (4.9222)	Top-1 acc 16.016 (14.726)	Top-5 acc 37.109 (33.040)	lr 0.03653
Warmup Train [14][2910/3239]	Time 0.171 (0.228)	Data 0.001 (0.007)	Loss 4.9147 (4.9219)	Top-1 acc 13.672 (14.728)	Top-5 acc 32.031 (33.048)	lr 0.03653
Warmup Train [14][2920/3239]	Time 0.280 (0.228)	Data 0.001 (0.007)	Loss 5.0217 (4.9219)	Top-1 acc 14.062 (14.731)	Top-5 acc 30.859 (33.049)	lr 0.03652
Warmup Train [14][2930/3239]	Time 0.248 (0.228)	Data 0.001 (0.007)	Loss 4.8565 (4.9219)	Top-1 acc 14.844 (14.728)	Top-5 acc 33.203 (33.049)	lr 0.03652
Warmup Train [14][2940/3239]	Time 0.239 (0.228)	Data 0.001 (0.007)	Loss 4.8174 (4.9216)	Top-1 acc 14.453 (14.731)	Top-5 acc 31.250 (33.054)	lr 0.03651
Warmup Train [14][2950/3239]	Time 0.244 (0.228)	Data 0.001 (0.007)	Loss 4.8713 (4.9214)	Top-1 acc 16.406 (14.738)	Top-5 acc 30.859 (33.057)	lr 0.03651
Warmup Train [14][2960/3239]	Time 0.253 (0.228)	Data 0.001 (0.007)	Loss 4.7617 (4.9212)	Top-1 acc 17.188 (14.740)	Top-5 acc 38.672 (33.061)	lr 0.03650
Warmup Train [14][2970/3239]	Time 0.211 (0.228)	Data 0.001 (0.007)	Loss 5.0551 (4.9212)	Top-1 acc 14.062 (14.741)	Top-5 acc 27.734 (33.060)	lr 0.03650
Warmup Train [14][2980/3239]	Time 0.183 (0.228)	Data 0.001 (0.007)	Loss 4.8516 (4.9212)	Top-1 acc 14.844 (14.742)	Top-5 acc 32.812 (33.061)	lr 0.03649
Warmup Train [14][2990/3239]	Time 0.239 (0.228)	Data 0.002 (0.007)	Loss 5.0112 (4.9211)	Top-1 acc 12.891 (14.744)	Top-5 acc 24.609 (33.063)	lr 0.03648
Warmup Train [14][3000/3239]	Time 0.304 (0.228)	Data 0.001 (0.007)	Loss 4.8651 (4.9209)	Top-1 acc 16.406 (14.750)	Top-5 acc 36.328 (33.069)	lr 0.03648
Warmup Train [14][3010/3239]	Time 0.201 (0.228)	Data 0.001 (0.007)	Loss 4.9770 (4.9208)	Top-1 acc 12.109 (14.747)	Top-5 acc 28.906 (33.072)	lr 0.03647
Warmup Train [14][3020/3239]	Time 0.242 (0.228)	Data 0.001 (0.007)	Loss 4.9789 (4.9208)	Top-1 acc 11.719 (14.749)	Top-5 acc 30.469 (33.070)	lr 0.03647
Warmup Train [14][3030/3239]	Time 0.289 (0.228)	Data 0.001 (0.007)	Loss 4.7902 (4.9208)	Top-1 acc 17.969 (14.750)	Top-5 acc 38.672 (33.074)	lr 0.03646
Warmup Train [14][3040/3239]	Time 0.296 (0.227)	Data 0.001 (0.007)	Loss 5.0230 (4.9207)	Top-1 acc 14.062 (14.752)	Top-5 acc 28.906 (33.077)	lr 0.03646
Warmup Train [14][3050/3239]	Time 0.148 (0.227)	Data 0.001 (0.007)	Loss 5.0552 (4.9208)	Top-1 acc 14.844 (14.754)	Top-5 acc 32.031 (33.079)	lr 0.03645
Warmup Train [14][3060/3239]	Time 0.153 (0.227)	Data 0.001 (0.007)	Loss 4.8157 (4.9206)	Top-1 acc 14.062 (14.753)	Top-5 acc 37.109 (33.081)	lr 0.03645
Warmup Train [14][3070/3239]	Time 0.254 (0.227)	Data 0.002 (0.007)	Loss 5.0072 (4.9205)	Top-1 acc 15.234 (14.758)	Top-5 acc 32.031 (33.083)	lr 0.03644
Warmup Train [14][3080/3239]	Time 0.212 (0.227)	Data 0.001 (0.007)	Loss 4.8824 (4.9204)	Top-1 acc 16.016 (14.760)	Top-5 acc 29.688 (33.082)	lr 0.03644
Warmup Train [14][3090/3239]	Time 0.265 (0.227)	Data 0.002 (0.007)	Loss 4.9393 (4.9204)	Top-1 acc 13.672 (14.761)	Top-5 acc 33.203 (33.082)	lr 0.03643
Warmup Train [14][3100/3239]	Time 0.237 (0.227)	Data 0.001 (0.007)	Loss 4.9557 (4.9204)	Top-1 acc 14.844 (14.761)	Top-5 acc 33.984 (33.083)	lr 0.03643
Warmup Train [14][3110/3239]	Time 0.213 (0.227)	Data 0.001 (0.007)	Loss 5.0565 (4.9202)	Top-1 acc 15.234 (14.766)	Top-5 acc 28.906 (33.085)	lr 0.03642
Warmup Train [14][3120/3239]	Time 0.250 (0.227)	Data 0.001 (0.007)	Loss 4.7674 (4.9202)	Top-1 acc 16.406 (14.769)	Top-5 acc 34.766 (33.087)	lr 0.03641
Warmup Train [14][3130/3239]	Time 0.249 (0.227)	Data 0.001 (0.007)	Loss 4.9616 (4.9201)	Top-1 acc 14.453 (14.771)	Top-5 acc 34.766 (33.091)	lr 0.03641
Warmup Train [14][3140/3239]	Time 0.227 (0.227)	Data 0.001 (0.007)	Loss 4.8823 (4.9201)	Top-1 acc 14.844 (14.771)	Top-5 acc 35.938 (33.092)	lr 0.03640
Warmup Train [14][3150/3239]	Time 0.253 (0.227)	Data 0.001 (0.006)	Loss 4.8466 (4.9200)	Top-1 acc 18.359 (14.771)	Top-5 acc 35.156 (33.093)	lr 0.03640
Warmup Train [14][3160/3239]	Time 0.131 (0.227)	Data 0.001 (0.007)	Loss 4.7368 (4.9199)	Top-1 acc 14.453 (14.772)	Top-5 acc 34.766 (33.095)	lr 0.03639
Warmup Train [14][3170/3239]	Time 0.206 (0.227)	Data 0.002 (0.007)	Loss 5.0427 (4.9199)	Top-1 acc 14.062 (14.771)	Top-5 acc 26.562 (33.095)	lr 0.03639
Warmup Train [14][3180/3239]	Time 0.201 (0.227)	Data 0.000 (0.006)	Loss 4.9699 (4.9198)	Top-1 acc 13.672 (14.775)	Top-5 acc 33.594 (33.100)	lr 0.03638
Warmup Train [14][3190/3239]	Time 0.231 (0.227)	Data 0.000 (0.006)	Loss 4.8211 (4.9197)	Top-1 acc 17.969 (14.779)	Top-5 acc 35.156 (33.103)	lr 0.03638
Warmup Train [14][3200/3239]	Time 0.143 (0.227)	Data 0.000 (0.006)	Loss 4.8787 (4.9196)	Top-1 acc 17.578 (14.781)	Top-5 acc 33.984 (33.109)	lr 0.03637
Warmup Train [14][3210/3239]	Time 0.157 (0.227)	Data 0.000 (0.006)	Loss 4.7942 (4.9195)	Top-1 acc 13.672 (14.782)	Top-5 acc 35.156 (33.110)	lr 0.03637
Warmup Train [14][3220/3239]	Time 0.170 (0.227)	Data 0.000 (0.006)	Loss 4.8216 (4.9192)	Top-1 acc 14.453 (14.789)	Top-5 acc 37.891 (33.123)	lr 0.03636
Warmup Train [14][3230/3239]	Time 0.153 (0.227)	Data 0.000 (0.006)	Loss 4.8254 (4.9189)	Top-1 acc 13.672 (14.792)	Top-5 acc 39.844 (33.129)	lr 0.03636
Warmup Train [14][3239/3239]	Time 0.117 (0.226)	Data 0.000 (0.006)	Loss 5.0915 (4.9189)	Top-1 acc 11.111 (14.794)	Top-5 acc 32.099 (33.130)	lr 0.03635
==========Warmup Valid [14/40]	loss 4.048	top-1 acc 20.407	top-5 acc 42.237	Train top-1 14.794	top-5 33.130	flops: 442.4M
Warmup Train [15][0/3239]	Time 14.971 (14.971)	Data 13.406 (13.406)	Loss 4.8841 (4.8841)	Top-1 acc 14.844 (14.844)	Top-5 acc 33.984 (33.984)	lr 0.03635
Warmup Train [15][10/3239]	Time 0.316 (1.980)	Data 0.003 (1.475)	Loss 4.8539 (4.8943)	Top-1 acc 15.625 (15.234)	Top-5 acc 34.766 (33.771)	lr 0.03634
Warmup Train [15][20/3239]	Time 0.271 (1.195)	Data 0.003 (0.774)	Loss 4.8339 (4.8747)	Top-1 acc 16.797 (15.402)	Top-5 acc 31.641 (33.668)	lr 0.03634
Warmup Train [15][30/3239]	Time 0.232 (0.896)	Data 0.002 (0.525)	Loss 4.7680 (4.8617)	Top-1 acc 18.359 (15.801)	Top-5 acc 37.500 (33.858)	lr 0.03633
Warmup Train [15][40/3239]	Time 0.193 (0.735)	Data 0.002 (0.398)	Loss 4.8010 (4.8538)	Top-1 acc 15.625 (15.997)	Top-5 acc 37.109 (34.070)	lr 0.03633
Warmup Train [15][50/3239]	Time 0.336 (0.641)	Data 0.002 (0.320)	Loss 5.0020 (4.8581)	Top-1 acc 15.625 (15.985)	Top-5 acc 34.766 (34.206)	lr 0.03632
Warmup Train [15][60/3239]	Time 0.241 (0.576)	Data 0.002 (0.268)	Loss 4.8568 (4.8529)	Top-1 acc 16.016 (15.932)	Top-5 acc 34.375 (34.298)	lr 0.03632
Warmup Train [15][70/3239]	Time 0.155 (0.525)	Data 0.001 (0.230)	Loss 4.6366 (4.8552)	Top-1 acc 18.359 (15.928)	Top-5 acc 42.188 (34.551)	lr 0.03631
Warmup Train [15][80/3239]	Time 0.244 (0.488)	Data 0.002 (0.202)	Loss 4.8217 (4.8514)	Top-1 acc 21.094 (15.963)	Top-5 acc 35.938 (34.722)	lr 0.03631
Warmup Train [15][90/3239]	Time 0.322 (0.461)	Data 0.003 (0.180)	Loss 4.8080 (4.8558)	Top-1 acc 16.016 (15.822)	Top-5 acc 37.109 (34.637)	lr 0.03630
Warmup Train [15][100/3239]	Time 0.227 (0.440)	Data 0.001 (0.163)	Loss 4.7589 (4.8566)	Top-1 acc 19.141 (15.849)	Top-5 acc 39.844 (34.673)	lr 0.03630
Warmup Train [15][110/3239]	Time 0.264 (0.421)	Data 0.001 (0.148)	Loss 5.0002 (4.8581)	Top-1 acc 16.016 (15.815)	Top-5 acc 33.594 (34.586)	lr 0.03629
Warmup Train [15][120/3239]	Time 0.304 (0.406)	Data 0.002 (0.136)	Loss 4.8886 (4.8576)	Top-1 acc 14.844 (15.738)	Top-5 acc 31.250 (34.530)	lr 0.03628
Warmup Train [15][130/3239]	Time 0.223 (0.391)	Data 0.002 (0.126)	Loss 4.8391 (4.8575)	Top-1 acc 14.844 (15.750)	Top-5 acc 36.719 (34.602)	lr 0.03628
Warmup Train [15][140/3239]	Time 0.264 (0.381)	Data 0.001 (0.117)	Loss 4.8576 (4.8576)	Top-1 acc 15.625 (15.772)	Top-5 acc 35.938 (34.605)	lr 0.03627
Warmup Train [15][150/3239]	Time 0.361 (0.372)	Data 0.002 (0.110)	Loss 4.8078 (4.8555)	Top-1 acc 19.141 (15.819)	Top-5 acc 42.188 (34.660)	lr 0.03627
Warmup Train [15][160/3239]	Time 0.184 (0.364)	Data 0.001 (0.103)	Loss 4.7846 (4.8548)	Top-1 acc 19.141 (15.846)	Top-5 acc 35.938 (34.676)	lr 0.03626
Warmup Train [15][170/3239]	Time 0.224 (0.356)	Data 0.001 (0.097)	Loss 4.9019 (4.8552)	Top-1 acc 16.016 (15.867)	Top-5 acc 32.422 (34.683)	lr 0.03626
Warmup Train [15][180/3239]	Time 0.251 (0.350)	Data 0.001 (0.092)	Loss 4.9592 (4.8549)	Top-1 acc 16.016 (15.869)	Top-5 acc 34.766 (34.673)	lr 0.03625
Warmup Train [15][190/3239]	Time 0.224 (0.343)	Data 0.002 (0.087)	Loss 4.7556 (4.8520)	Top-1 acc 16.797 (15.887)	Top-5 acc 36.328 (34.762)	lr 0.03625
Warmup Train [15][200/3239]	Time 0.208 (0.337)	Data 0.001 (0.083)	Loss 4.8880 (4.8476)	Top-1 acc 16.016 (15.934)	Top-5 acc 31.641 (34.855)	lr 0.03624
Warmup Train [15][210/3239]	Time 0.244 (0.332)	Data 0.001 (0.079)	Loss 4.8282 (4.8482)	Top-1 acc 16.016 (15.882)	Top-5 acc 34.375 (34.845)	lr 0.03624
Warmup Train [15][220/3239]	Time 0.246 (0.328)	Data 0.001 (0.076)	Loss 4.8483 (4.8501)	Top-1 acc 14.453 (15.839)	Top-5 acc 34.375 (34.819)	lr 0.03623
Warmup Train [15][230/3239]	Time 0.222 (0.323)	Data 0.001 (0.072)	Loss 4.9025 (4.8531)	Top-1 acc 16.406 (15.779)	Top-5 acc 34.766 (34.732)	lr 0.03623
Warmup Train [15][240/3239]	Time 0.361 (0.320)	Data 0.002 (0.070)	Loss 5.0262 (4.8534)	Top-1 acc 15.625 (15.807)	Top-5 acc 31.250 (34.741)	lr 0.03622
Warmup Train [15][250/3239]	Time 0.240 (0.316)	Data 0.001 (0.067)	Loss 4.8755 (4.8547)	Top-1 acc 17.188 (15.813)	Top-5 acc 36.719 (34.731)	lr 0.03621
Warmup Train [15][260/3239]	Time 0.215 (0.313)	Data 0.001 (0.064)	Loss 4.7715 (4.8534)	Top-1 acc 19.141 (15.776)	Top-5 acc 40.234 (34.758)	lr 0.03621
Warmup Train [15][270/3239]	Time 0.143 (0.309)	Data 0.001 (0.062)	Loss 4.7739 (4.8528)	Top-1 acc 17.578 (15.802)	Top-5 acc 37.109 (34.756)	lr 0.03620
Warmup Train [15][280/3239]	Time 0.255 (0.306)	Data 0.001 (0.060)	Loss 4.8584 (4.8537)	Top-1 acc 16.016 (15.822)	Top-5 acc 36.719 (34.750)	lr 0.03620
Warmup Train [15][290/3239]	Time 0.237 (0.304)	Data 0.001 (0.058)	Loss 4.7743 (4.8538)	Top-1 acc 13.672 (15.818)	Top-5 acc 35.938 (34.759)	lr 0.03619
Warmup Train [15][300/3239]	Time 0.152 (0.301)	Data 0.001 (0.056)	Loss 4.8216 (4.8554)	Top-1 acc 17.969 (15.799)	Top-5 acc 35.938 (34.728)	lr 0.03619
Warmup Train [15][310/3239]	Time 0.171 (0.298)	Data 0.001 (0.055)	Loss 4.8647 (4.8591)	Top-1 acc 16.797 (15.774)	Top-5 acc 30.859 (34.607)	lr 0.03618
Warmup Train [15][320/3239]	Time 0.196 (0.296)	Data 0.001 (0.053)	Loss 4.7452 (4.8606)	Top-1 acc 21.094 (15.750)	Top-5 acc 38.672 (34.578)	lr 0.03618
Warmup Train [15][330/3239]	Time 0.213 (0.294)	Data 0.002 (0.052)	Loss 4.8417 (4.8619)	Top-1 acc 14.844 (15.734)	Top-5 acc 35.547 (34.561)	lr 0.03617
Warmup Train [15][340/3239]	Time 0.257 (0.292)	Data 0.001 (0.050)	Loss 4.8543 (4.8614)	Top-1 acc 16.016 (15.757)	Top-5 acc 32.812 (34.589)	lr 0.03617
Warmup Train [15][350/3239]	Time 0.247 (0.290)	Data 0.001 (0.049)	Loss 4.8134 (4.8613)	Top-1 acc 17.578 (15.754)	Top-5 acc 36.328 (34.616)	lr 0.03616
Warmup Train [15][360/3239]	Time 0.249 (0.288)	Data 0.001 (0.047)	Loss 4.8716 (4.8611)	Top-1 acc 17.578 (15.727)	Top-5 acc 39.062 (34.620)	lr 0.03615
Warmup Train [15][370/3239]	Time 0.212 (0.286)	Data 0.001 (0.046)	Loss 4.9568 (4.8606)	Top-1 acc 17.578 (15.737)	Top-5 acc 34.766 (34.646)	lr 0.03615
Warmup Train [15][380/3239]	Time 0.266 (0.285)	Data 0.001 (0.045)	Loss 4.8072 (4.8613)	Top-1 acc 13.281 (15.718)	Top-5 acc 36.328 (34.623)	lr 0.03614
Warmup Train [15][390/3239]	Time 0.183 (0.283)	Data 0.001 (0.044)	Loss 4.8783 (4.8613)	Top-1 acc 16.016 (15.699)	Top-5 acc 33.594 (34.600)	lr 0.03614
Warmup Train [15][400/3239]	Time 0.182 (0.281)	Data 0.002 (0.043)	Loss 4.7714 (4.8617)	Top-1 acc 17.578 (15.716)	Top-5 acc 34.766 (34.585)	lr 0.03613
Warmup Train [15][410/3239]	Time 0.177 (0.280)	Data 0.001 (0.042)	Loss 4.8714 (4.8624)	Top-1 acc 16.016 (15.708)	Top-5 acc 35.156 (34.580)	lr 0.03613
Warmup Train [15][420/3239]	Time 0.280 (0.279)	Data 0.002 (0.041)	Loss 4.8182 (4.8624)	Top-1 acc 15.625 (15.700)	Top-5 acc 37.891 (34.563)	lr 0.03612
Warmup Train [15][430/3239]	Time 0.243 (0.277)	Data 0.001 (0.040)	Loss 4.7978 (4.8623)	Top-1 acc 17.188 (15.695)	Top-5 acc 37.500 (34.548)	lr 0.03612
Warmup Train [15][440/3239]	Time 0.331 (0.277)	Data 0.001 (0.039)	Loss 4.7749 (4.8614)	Top-1 acc 18.750 (15.718)	Top-5 acc 36.328 (34.554)	lr 0.03611
Warmup Train [15][450/3239]	Time 0.176 (0.275)	Data 0.002 (0.038)	Loss 4.9777 (4.8605)	Top-1 acc 12.891 (15.730)	Top-5 acc 30.859 (34.578)	lr 0.03611
Warmup Train [15][460/3239]	Time 0.191 (0.274)	Data 0.001 (0.038)	Loss 4.7559 (4.8607)	Top-1 acc 15.234 (15.706)	Top-5 acc 36.328 (34.558)	lr 0.03610
Warmup Train [15][470/3239]	Time 0.194 (0.273)	Data 0.001 (0.037)	Loss 4.9057 (4.8607)	Top-1 acc 14.844 (15.711)	Top-5 acc 34.375 (34.550)	lr 0.03610
Warmup Train [15][480/3239]	Time 0.237 (0.273)	Data 0.001 (0.036)	Loss 4.8005 (4.8607)	Top-1 acc 16.016 (15.698)	Top-5 acc 33.984 (34.542)	lr 0.03609
Warmup Train [15][490/3239]	Time 0.224 (0.271)	Data 0.001 (0.036)	Loss 4.9065 (4.8609)	Top-1 acc 16.016 (15.695)	Top-5 acc 33.203 (34.555)	lr 0.03608
Warmup Train [15][500/3239]	Time 0.230 (0.270)	Data 0.001 (0.035)	Loss 4.7432 (4.8594)	Top-1 acc 19.531 (15.707)	Top-5 acc 39.062 (34.597)	lr 0.03608
Warmup Train [15][510/3239]	Time 0.154 (0.269)	Data 0.001 (0.034)	Loss 4.8752 (4.8597)	Top-1 acc 12.109 (15.695)	Top-5 acc 31.250 (34.587)	lr 0.03607
Warmup Train [15][520/3239]	Time 0.240 (0.269)	Data 0.001 (0.034)	Loss 4.8514 (4.8594)	Top-1 acc 16.406 (15.704)	Top-5 acc 35.938 (34.593)	lr 0.03607
Warmup Train [15][530/3239]	Time 0.274 (0.268)	Data 0.001 (0.033)	Loss 4.9545 (4.8592)	Top-1 acc 14.844 (15.693)	Top-5 acc 29.297 (34.562)	lr 0.03606
Warmup Train [15][540/3239]	Time 0.281 (0.267)	Data 0.001 (0.033)	Loss 4.7633 (4.8586)	Top-1 acc 18.750 (15.705)	Top-5 acc 37.500 (34.567)	lr 0.03606
Warmup Train [15][550/3239]	Time 0.247 (0.266)	Data 0.001 (0.032)	Loss 4.5957 (4.8574)	Top-1 acc 19.531 (15.724)	Top-5 acc 40.625 (34.597)	lr 0.03605
Warmup Train [15][560/3239]	Time 0.147 (0.265)	Data 0.001 (0.032)	Loss 4.9471 (4.8573)	Top-1 acc 15.234 (15.704)	Top-5 acc 34.375 (34.603)	lr 0.03605
Warmup Train [15][570/3239]	Time 0.174 (0.265)	Data 0.002 (0.031)	Loss 4.6825 (4.8562)	Top-1 acc 16.797 (15.724)	Top-5 acc 38.672 (34.628)	lr 0.03604
Warmup Train [15][580/3239]	Time 0.205 (0.264)	Data 0.001 (0.031)	Loss 4.9302 (4.8569)	Top-1 acc 16.406 (15.705)	Top-5 acc 33.594 (34.602)	lr 0.03604
Warmup Train [15][590/3239]	Time 0.215 (0.263)	Data 0.001 (0.030)	Loss 4.8415 (4.8573)	Top-1 acc 15.234 (15.686)	Top-5 acc 33.203 (34.585)	lr 0.03603
Warmup Train [15][600/3239]	Time 0.216 (0.262)	Data 0.001 (0.030)	Loss 4.7739 (4.8569)	Top-1 acc 16.797 (15.702)	Top-5 acc 36.719 (34.595)	lr 0.03602
Warmup Train [15][610/3239]	Time 0.176 (0.262)	Data 0.001 (0.029)	Loss 5.0556 (4.8570)	Top-1 acc 12.891 (15.691)	Top-5 acc 29.688 (34.598)	lr 0.03602
Warmup Train [15][620/3239]	Time 0.218 (0.261)	Data 0.002 (0.029)	Loss 4.9011 (4.8573)	Top-1 acc 12.500 (15.683)	Top-5 acc 32.422 (34.599)	lr 0.03601
Warmup Train [15][630/3239]	Time 0.305 (0.260)	Data 0.001 (0.028)	Loss 4.8591 (4.8576)	Top-1 acc 15.625 (15.681)	Top-5 acc 32.031 (34.610)	lr 0.03601
Warmup Train [15][640/3239]	Time 0.201 (0.260)	Data 0.001 (0.028)	Loss 4.9429 (4.8577)	Top-1 acc 13.672 (15.679)	Top-5 acc 33.594 (34.597)	lr 0.03600
Warmup Train [15][650/3239]	Time 0.211 (0.259)	Data 0.001 (0.028)	Loss 4.9458 (4.8569)	Top-1 acc 14.844 (15.674)	Top-5 acc 30.469 (34.613)	lr 0.03600
Warmup Train [15][660/3239]	Time 0.276 (0.258)	Data 0.002 (0.027)	Loss 4.8545 (4.8566)	Top-1 acc 16.797 (15.688)	Top-5 acc 35.156 (34.621)	lr 0.03599
Warmup Train [15][670/3239]	Time 0.318 (0.258)	Data 0.001 (0.027)	Loss 4.8294 (4.8568)	Top-1 acc 16.016 (15.680)	Top-5 acc 34.375 (34.624)	lr 0.03599
Warmup Train [15][680/3239]	Time 0.155 (0.257)	Data 0.001 (0.026)	Loss 4.9049 (4.8568)	Top-1 acc 11.328 (15.665)	Top-5 acc 35.938 (34.623)	lr 0.03598
Warmup Train [15][690/3239]	Time 0.186 (0.257)	Data 0.001 (0.026)	Loss 4.9438 (4.8573)	Top-1 acc 12.891 (15.652)	Top-5 acc 30.859 (34.610)	lr 0.03598
Warmup Train [15][700/3239]	Time 0.213 (0.256)	Data 0.001 (0.026)	Loss 4.9167 (4.8572)	Top-1 acc 12.500 (15.642)	Top-5 acc 31.250 (34.607)	lr 0.03597
Warmup Train [15][710/3239]	Time 0.184 (0.256)	Data 0.001 (0.026)	Loss 4.7966 (4.8569)	Top-1 acc 12.500 (15.635)	Top-5 acc 36.719 (34.610)	lr 0.03596
Warmup Train [15][720/3239]	Time 0.183 (0.256)	Data 0.001 (0.025)	Loss 4.9647 (4.8576)	Top-1 acc 11.719 (15.619)	Top-5 acc 29.688 (34.599)	lr 0.03596
Warmup Train [15][730/3239]	Time 0.260 (0.255)	Data 0.001 (0.025)	Loss 4.9364 (4.8579)	Top-1 acc 11.328 (15.623)	Top-5 acc 32.031 (34.594)	lr 0.03595
Warmup Train [15][740/3239]	Time 0.167 (0.255)	Data 0.001 (0.025)	Loss 4.9104 (4.8583)	Top-1 acc 11.328 (15.617)	Top-5 acc 34.766 (34.594)	lr 0.03595
Warmup Train [15][750/3239]	Time 0.258 (0.254)	Data 0.002 (0.024)	Loss 4.8639 (4.8583)	Top-1 acc 13.281 (15.616)	Top-5 acc 36.328 (34.607)	lr 0.03594
Warmup Train [15][760/3239]	Time 0.239 (0.254)	Data 0.003 (0.024)	Loss 4.7603 (4.8582)	Top-1 acc 16.406 (15.614)	Top-5 acc 34.375 (34.614)	lr 0.03594
Warmup Train [15][770/3239]	Time 0.280 (0.253)	Data 0.001 (0.024)	Loss 4.6729 (4.8579)	Top-1 acc 19.141 (15.630)	Top-5 acc 40.234 (34.632)	lr 0.03593
Warmup Train [15][780/3239]	Time 0.287 (0.253)	Data 0.001 (0.023)	Loss 4.8493 (4.8585)	Top-1 acc 17.969 (15.632)	Top-5 acc 35.156 (34.625)	lr 0.03593
Warmup Train [15][790/3239]	Time 0.202 (0.252)	Data 0.001 (0.023)	Loss 4.7677 (4.8584)	Top-1 acc 17.969 (15.617)	Top-5 acc 37.891 (34.629)	lr 0.03592
Warmup Train [15][800/3239]	Time 0.209 (0.252)	Data 0.001 (0.023)	Loss 4.6347 (4.8578)	Top-1 acc 19.531 (15.635)	Top-5 acc 38.672 (34.644)	lr 0.03592
Warmup Train [15][810/3239]	Time 0.207 (0.251)	Data 0.001 (0.023)	Loss 4.9065 (4.8584)	Top-1 acc 14.844 (15.625)	Top-5 acc 29.688 (34.626)	lr 0.03591
Warmup Train [15][820/3239]	Time 0.212 (0.251)	Data 0.001 (0.022)	Loss 4.9165 (4.8586)	Top-1 acc 14.453 (15.624)	Top-5 acc 33.594 (34.627)	lr 0.03590
Warmup Train [15][830/3239]	Time 0.230 (0.251)	Data 0.001 (0.022)	Loss 4.9675 (4.8578)	Top-1 acc 16.797 (15.646)	Top-5 acc 34.375 (34.656)	lr 0.03590
Warmup Train [15][840/3239]	Time 0.141 (0.250)	Data 0.001 (0.022)	Loss 4.9460 (4.8574)	Top-1 acc 17.578 (15.653)	Top-5 acc 35.547 (34.659)	lr 0.03589
Warmup Train [15][850/3239]	Time 0.148 (0.250)	Data 0.001 (0.022)	Loss 4.7654 (4.8567)	Top-1 acc 14.062 (15.653)	Top-5 acc 37.109 (34.685)	lr 0.03589
Warmup Train [15][860/3239]	Time 0.190 (0.250)	Data 0.001 (0.022)	Loss 5.0888 (4.8568)	Top-1 acc 13.672 (15.659)	Top-5 acc 30.859 (34.693)	lr 0.03588
Warmup Train [15][870/3239]	Time 0.226 (0.249)	Data 0.001 (0.021)	Loss 4.5515 (4.8563)	Top-1 acc 19.531 (15.669)	Top-5 acc 41.797 (34.699)	lr 0.03588
Warmup Train [15][880/3239]	Time 0.245 (0.249)	Data 0.002 (0.021)	Loss 4.9605 (4.8564)	Top-1 acc 11.328 (15.665)	Top-5 acc 32.422 (34.708)	lr 0.03587
Warmup Train [15][890/3239]	Time 0.304 (0.249)	Data 0.023 (0.021)	Loss 4.7586 (4.8560)	Top-1 acc 16.016 (15.661)	Top-5 acc 37.891 (34.707)	lr 0.03587
Warmup Train [15][900/3239]	Time 0.187 (0.248)	Data 0.001 (0.021)	Loss 4.8292 (4.8560)	Top-1 acc 16.016 (15.659)	Top-5 acc 37.500 (34.716)	lr 0.03586
Warmup Train [15][910/3239]	Time 0.234 (0.248)	Data 0.001 (0.021)	Loss 5.0047 (4.8562)	Top-1 acc 10.547 (15.653)	Top-5 acc 30.469 (34.705)	lr 0.03586
Warmup Train [15][920/3239]	Time 0.238 (0.248)	Data 0.002 (0.020)	Loss 4.8433 (4.8563)	Top-1 acc 15.625 (15.656)	Top-5 acc 35.938 (34.711)	lr 0.03585
Warmup Train [15][930/3239]	Time 0.205 (0.248)	Data 0.001 (0.020)	Loss 4.8197 (4.8567)	Top-1 acc 17.969 (15.648)	Top-5 acc 37.109 (34.708)	lr 0.03584
Warmup Train [15][940/3239]	Time 0.302 (0.247)	Data 0.001 (0.020)	Loss 4.8098 (4.8565)	Top-1 acc 16.406 (15.650)	Top-5 acc 35.938 (34.705)	lr 0.03584
Warmup Train [15][950/3239]	Time 0.268 (0.247)	Data 0.001 (0.020)	Loss 4.9896 (4.8562)	Top-1 acc 17.188 (15.671)	Top-5 acc 32.422 (34.711)	lr 0.03583
Warmup Train [15][960/3239]	Time 0.195 (0.247)	Data 0.001 (0.020)	Loss 4.7590 (4.8557)	Top-1 acc 16.016 (15.679)	Top-5 acc 37.891 (34.730)	lr 0.03583
Warmup Train [15][970/3239]	Time 0.150 (0.246)	Data 0.001 (0.020)	Loss 4.8151 (4.8556)	Top-1 acc 17.969 (15.687)	Top-5 acc 36.328 (34.731)	lr 0.03582
Warmup Train [15][980/3239]	Time 0.156 (0.246)	Data 0.001 (0.019)	Loss 4.9786 (4.8553)	Top-1 acc 16.797 (15.698)	Top-5 acc 35.156 (34.737)	lr 0.03582
Warmup Train [15][990/3239]	Time 0.172 (0.246)	Data 0.002 (0.019)	Loss 5.0044 (4.8555)	Top-1 acc 16.016 (15.700)	Top-5 acc 33.203 (34.742)	lr 0.03581
Warmup Train [15][1000/3239]	Time 0.242 (0.246)	Data 0.001 (0.019)	Loss 4.9245 (4.8552)	Top-1 acc 16.016 (15.714)	Top-5 acc 33.594 (34.750)	lr 0.03581
Warmup Train [15][1010/3239]	Time 0.324 (0.246)	Data 0.001 (0.019)	Loss 4.7451 (4.8551)	Top-1 acc 16.797 (15.712)	Top-5 acc 39.062 (34.757)	lr 0.03580
Warmup Train [15][1020/3239]	Time 0.181 (0.245)	Data 0.001 (0.019)	Loss 4.7358 (4.8552)	Top-1 acc 15.625 (15.702)	Top-5 acc 37.109 (34.756)	lr 0.03580
Warmup Train [15][1030/3239]	Time 0.217 (0.245)	Data 0.001 (0.019)	Loss 4.7629 (4.8550)	Top-1 acc 18.359 (15.703)	Top-5 acc 38.672 (34.758)	lr 0.03579
Warmup Train [15][1040/3239]	Time 0.190 (0.245)	Data 0.001 (0.018)	Loss 4.7940 (4.8553)	Top-1 acc 19.922 (15.702)	Top-5 acc 36.328 (34.748)	lr 0.03578
Warmup Train [15][1050/3239]	Time 0.173 (0.245)	Data 0.001 (0.018)	Loss 5.0399 (4.8556)	Top-1 acc 12.500 (15.697)	Top-5 acc 28.125 (34.746)	lr 0.03578
Warmup Train [15][1060/3239]	Time 0.217 (0.244)	Data 0.001 (0.018)	Loss 4.7664 (4.8555)	Top-1 acc 14.453 (15.694)	Top-5 acc 36.719 (34.752)	lr 0.03577
Warmup Train [15][1070/3239]	Time 0.241 (0.244)	Data 0.001 (0.018)	Loss 4.9043 (4.8554)	Top-1 acc 15.234 (15.698)	Top-5 acc 31.250 (34.755)	lr 0.03577
Warmup Train [15][1080/3239]	Time 0.197 (0.244)	Data 0.001 (0.018)	Loss 4.9662 (4.8557)	Top-1 acc 15.625 (15.691)	Top-5 acc 32.031 (34.752)	lr 0.03576
Warmup Train [15][1090/3239]	Time 0.193 (0.244)	Data 0.001 (0.018)	Loss 5.0166 (4.8561)	Top-1 acc 8.984 (15.690)	Top-5 acc 33.984 (34.746)	lr 0.03576
Warmup Train [15][1100/3239]	Time 0.224 (0.244)	Data 0.002 (0.018)	Loss 4.9177 (4.8559)	Top-1 acc 15.625 (15.696)	Top-5 acc 33.984 (34.750)	lr 0.03575
Warmup Train [15][1110/3239]	Time 0.143 (0.243)	Data 0.001 (0.017)	Loss 4.9565 (4.8561)	Top-1 acc 14.062 (15.689)	Top-5 acc 32.031 (34.744)	lr 0.03575
Warmup Train [15][1120/3239]	Time 0.195 (0.243)	Data 0.001 (0.017)	Loss 4.8768 (4.8566)	Top-1 acc 12.891 (15.680)	Top-5 acc 36.328 (34.732)	lr 0.03574
Warmup Train [15][1130/3239]	Time 0.310 (0.243)	Data 0.002 (0.017)	Loss 4.8312 (4.8566)	Top-1 acc 14.062 (15.677)	Top-5 acc 34.766 (34.732)	lr 0.03574
Warmup Train [15][1140/3239]	Time 0.156 (0.243)	Data 0.002 (0.017)	Loss 4.9238 (4.8566)	Top-1 acc 17.188 (15.690)	Top-5 acc 35.156 (34.738)	lr 0.03573
Warmup Train [15][1150/3239]	Time 0.225 (0.243)	Data 0.001 (0.017)	Loss 4.9175 (4.8562)	Top-1 acc 14.844 (15.699)	Top-5 acc 34.766 (34.749)	lr 0.03572
Warmup Train [15][1160/3239]	Time 0.204 (0.243)	Data 0.001 (0.017)	Loss 4.9419 (4.8561)	Top-1 acc 14.062 (15.697)	Top-5 acc 32.031 (34.740)	lr 0.03572
Warmup Train [15][1170/3239]	Time 0.231 (0.242)	Data 0.002 (0.017)	Loss 4.7291 (4.8558)	Top-1 acc 16.016 (15.706)	Top-5 acc 35.547 (34.746)	lr 0.03571
Warmup Train [15][1180/3239]	Time 0.215 (0.242)	Data 0.001 (0.017)	Loss 5.0075 (4.8558)	Top-1 acc 13.281 (15.712)	Top-5 acc 33.594 (34.747)	lr 0.03571
Warmup Train [15][1190/3239]	Time 0.263 (0.242)	Data 0.001 (0.017)	Loss 4.8209 (4.8557)	Top-1 acc 14.844 (15.711)	Top-5 acc 32.031 (34.749)	lr 0.03570
Warmup Train [15][1200/3239]	Time 0.188 (0.242)	Data 0.002 (0.016)	Loss 4.7920 (4.8554)	Top-1 acc 15.625 (15.700)	Top-5 acc 37.109 (34.753)	lr 0.03570
Warmup Train [15][1210/3239]	Time 0.195 (0.242)	Data 0.001 (0.016)	Loss 4.7614 (4.8555)	Top-1 acc 15.234 (15.695)	Top-5 acc 36.328 (34.748)	lr 0.03569
Warmup Train [15][1220/3239]	Time 0.226 (0.242)	Data 0.001 (0.016)	Loss 4.7661 (4.8554)	Top-1 acc 14.844 (15.697)	Top-5 acc 33.594 (34.743)	lr 0.03569
Warmup Train [15][1230/3239]	Time 0.312 (0.242)	Data 0.001 (0.016)	Loss 4.7074 (4.8555)	Top-1 acc 17.188 (15.702)	Top-5 acc 37.500 (34.750)	lr 0.03568
Warmup Train [15][1240/3239]	Time 0.271 (0.241)	Data 0.001 (0.016)	Loss 4.8475 (4.8557)	Top-1 acc 16.406 (15.700)	Top-5 acc 34.766 (34.741)	lr 0.03568
Warmup Train [15][1250/3239]	Time 0.166 (0.241)	Data 0.001 (0.016)	Loss 4.7941 (4.8554)	Top-1 acc 14.453 (15.706)	Top-5 acc 36.328 (34.747)	lr 0.03567
Warmup Train [15][1260/3239]	Time 0.230 (0.241)	Data 0.001 (0.016)	Loss 4.7911 (4.8555)	Top-1 acc 21.094 (15.709)	Top-5 acc 36.719 (34.747)	lr 0.03566
Warmup Train [15][1270/3239]	Time 0.275 (0.241)	Data 0.001 (0.016)	Loss 4.7664 (4.8550)	Top-1 acc 17.578 (15.721)	Top-5 acc 36.719 (34.763)	lr 0.03566
Warmup Train [15][1280/3239]	Time 0.196 (0.241)	Data 0.001 (0.016)	Loss 4.9419 (4.8555)	Top-1 acc 15.234 (15.712)	Top-5 acc 29.688 (34.743)	lr 0.03565
Warmup Train [15][1290/3239]	Time 0.291 (0.241)	Data 0.001 (0.016)	Loss 4.8638 (4.8554)	Top-1 acc 16.016 (15.715)	Top-5 acc 36.719 (34.753)	lr 0.03565
Warmup Train [15][1300/3239]	Time 0.248 (0.241)	Data 0.002 (0.015)	Loss 4.8396 (4.8552)	Top-1 acc 16.016 (15.725)	Top-5 acc 34.375 (34.768)	lr 0.03564
Warmup Train [15][1310/3239]	Time 0.184 (0.240)	Data 0.001 (0.015)	Loss 5.0290 (4.8553)	Top-1 acc 13.281 (15.714)	Top-5 acc 30.469 (34.756)	lr 0.03564
Warmup Train [15][1320/3239]	Time 0.218 (0.240)	Data 0.001 (0.015)	Loss 4.6944 (4.8553)	Top-1 acc 14.062 (15.711)	Top-5 acc 39.062 (34.763)	lr 0.03563
Warmup Train [15][1330/3239]	Time 0.196 (0.240)	Data 0.001 (0.015)	Loss 4.8073 (4.8550)	Top-1 acc 13.672 (15.708)	Top-5 acc 35.156 (34.773)	lr 0.03563
Warmup Train [15][1340/3239]	Time 0.271 (0.240)	Data 0.001 (0.015)	Loss 4.9534 (4.8550)	Top-1 acc 9.766 (15.706)	Top-5 acc 31.250 (34.775)	lr 0.03562
Warmup Train [15][1350/3239]	Time 0.354 (0.240)	Data 0.001 (0.015)	Loss 5.0220 (4.8549)	Top-1 acc 15.234 (15.713)	Top-5 acc 30.469 (34.779)	lr 0.03561
Warmup Train [15][1360/3239]	Time 0.206 (0.240)	Data 0.001 (0.015)	Loss 4.9060 (4.8548)	Top-1 acc 14.062 (15.710)	Top-5 acc 35.547 (34.783)	lr 0.03561
Warmup Train [15][1370/3239]	Time 0.183 (0.240)	Data 0.002 (0.015)	Loss 4.7000 (4.8546)	Top-1 acc 17.969 (15.712)	Top-5 acc 35.938 (34.782)	lr 0.03560
Warmup Train [15][1380/3239]	Time 0.198 (0.240)	Data 0.001 (0.015)	Loss 4.7937 (4.8551)	Top-1 acc 16.797 (15.708)	Top-5 acc 35.156 (34.773)	lr 0.03560
Warmup Train [15][1390/3239]	Time 0.204 (0.240)	Data 0.001 (0.015)	Loss 4.8418 (4.8551)	Top-1 acc 15.234 (15.711)	Top-5 acc 34.766 (34.777)	lr 0.03559
Warmup Train [15][1400/3239]	Time 0.240 (0.240)	Data 0.001 (0.015)	Loss 4.8708 (4.8549)	Top-1 acc 14.453 (15.715)	Top-5 acc 33.203 (34.778)	lr 0.03559
Warmup Train [15][1410/3239]	Time 0.182 (0.240)	Data 0.001 (0.014)	Loss 4.6724 (4.8543)	Top-1 acc 21.094 (15.724)	Top-5 acc 39.062 (34.794)	lr 0.03558
Warmup Train [15][1420/3239]	Time 0.177 (0.239)	Data 0.002 (0.014)	Loss 4.6902 (4.8543)	Top-1 acc 19.531 (15.728)	Top-5 acc 40.625 (34.796)	lr 0.03558
Warmup Train [15][1430/3239]	Time 0.202 (0.239)	Data 0.001 (0.014)	Loss 4.8898 (4.8543)	Top-1 acc 13.281 (15.727)	Top-5 acc 32.031 (34.802)	lr 0.03557
Warmup Train [15][1440/3239]	Time 0.146 (0.239)	Data 0.002 (0.014)	Loss 4.9045 (4.8543)	Top-1 acc 14.844 (15.735)	Top-5 acc 34.766 (34.797)	lr 0.03557
Warmup Train [15][1450/3239]	Time 0.175 (0.239)	Data 0.001 (0.014)	Loss 5.0869 (4.8544)	Top-1 acc 10.156 (15.736)	Top-5 acc 30.859 (34.806)	lr 0.03556
Warmup Train [15][1460/3239]	Time 0.245 (0.239)	Data 0.002 (0.014)	Loss 4.9619 (4.8543)	Top-1 acc 13.672 (15.739)	Top-5 acc 32.422 (34.807)	lr 0.03555
Warmup Train [15][1470/3239]	Time 0.343 (0.239)	Data 0.001 (0.014)	Loss 4.9035 (4.8543)	Top-1 acc 14.062 (15.745)	Top-5 acc 34.766 (34.809)	lr 0.03555
Warmup Train [15][1480/3239]	Time 0.202 (0.239)	Data 0.001 (0.014)	Loss 4.9648 (4.8539)	Top-1 acc 12.891 (15.749)	Top-5 acc 30.078 (34.816)	lr 0.03554
Warmup Train [15][1490/3239]	Time 0.230 (0.239)	Data 0.001 (0.014)	Loss 4.9154 (4.8543)	Top-1 acc 15.625 (15.740)	Top-5 acc 32.422 (34.803)	lr 0.03554
Warmup Train [15][1500/3239]	Time 0.249 (0.239)	Data 0.001 (0.014)	Loss 5.0978 (4.8546)	Top-1 acc 12.109 (15.738)	Top-5 acc 24.219 (34.789)	lr 0.03553
Warmup Train [15][1510/3239]	Time 0.193 (0.238)	Data 0.001 (0.014)	Loss 4.6370 (4.8541)	Top-1 acc 22.656 (15.741)	Top-5 acc 39.453 (34.800)	lr 0.03553
Warmup Train [15][1520/3239]	Time 0.241 (0.238)	Data 0.002 (0.014)	Loss 4.8737 (4.8538)	Top-1 acc 17.969 (15.749)	Top-5 acc 33.203 (34.809)	lr 0.03552
Warmup Train [15][1530/3239]	Time 0.148 (0.238)	Data 0.002 (0.014)	Loss 4.8501 (4.8539)	Top-1 acc 19.141 (15.747)	Top-5 acc 34.375 (34.808)	lr 0.03552
Warmup Train [15][1540/3239]	Time 0.264 (0.238)	Data 0.001 (0.013)	Loss 4.8351 (4.8540)	Top-1 acc 16.016 (15.744)	Top-5 acc 36.328 (34.808)	lr 0.03551
Warmup Train [15][1550/3239]	Time 0.213 (0.238)	Data 0.001 (0.013)	Loss 4.9443 (4.8541)	Top-1 acc 19.531 (15.749)	Top-5 acc 33.594 (34.809)	lr 0.03551
Warmup Train [15][1560/3239]	Time 0.207 (0.238)	Data 0.002 (0.013)	Loss 4.7971 (4.8540)	Top-1 acc 15.625 (15.753)	Top-5 acc 37.109 (34.811)	lr 0.03550
Warmup Train [15][1570/3239]	Time 0.333 (0.238)	Data 0.001 (0.013)	Loss 4.6831 (4.8542)	Top-1 acc 18.750 (15.750)	Top-5 acc 37.891 (34.802)	lr 0.03549
Warmup Train [15][1580/3239]	Time 0.177 (0.238)	Data 0.001 (0.013)	Loss 4.6882 (4.8539)	Top-1 acc 17.969 (15.744)	Top-5 acc 37.891 (34.808)	lr 0.03549
Warmup Train [15][1590/3239]	Time 0.166 (0.238)	Data 0.001 (0.013)	Loss 4.8535 (4.8541)	Top-1 acc 15.234 (15.747)	Top-5 acc 35.156 (34.806)	lr 0.03548
Warmup Train [15][1600/3239]	Time 0.193 (0.238)	Data 0.001 (0.013)	Loss 4.7274 (4.8538)	Top-1 acc 18.359 (15.747)	Top-5 acc 39.453 (34.811)	lr 0.03548
Warmup Train [15][1610/3239]	Time 0.199 (0.237)	Data 0.001 (0.013)	Loss 4.9288 (4.8538)	Top-1 acc 12.109 (15.747)	Top-5 acc 32.812 (34.808)	lr 0.03547
Warmup Train [15][1620/3239]	Time 0.215 (0.237)	Data 0.002 (0.013)	Loss 4.7633 (4.8537)	Top-1 acc 20.312 (15.751)	Top-5 acc 38.281 (34.812)	lr 0.03547
Warmup Train [15][1630/3239]	Time 0.242 (0.237)	Data 0.001 (0.013)	Loss 4.6157 (4.8532)	Top-1 acc 19.922 (15.762)	Top-5 acc 43.359 (34.828)	lr 0.03546
Warmup Train [15][1640/3239]	Time 0.314 (0.237)	Data 0.001 (0.013)	Loss 4.8437 (4.8531)	Top-1 acc 14.844 (15.764)	Top-5 acc 33.203 (34.833)	lr 0.03546
Warmup Train [15][1650/3239]	Time 0.229 (0.237)	Data 0.002 (0.013)	Loss 4.9171 (4.8531)	Top-1 acc 17.578 (15.769)	Top-5 acc 32.812 (34.837)	lr 0.03545
Warmup Train [15][1660/3239]	Time 0.156 (0.237)	Data 0.001 (0.013)	Loss 4.8610 (4.8532)	Top-1 acc 17.969 (15.769)	Top-5 acc 35.547 (34.832)	lr 0.03544
Warmup Train [15][1670/3239]	Time 0.336 (0.237)	Data 0.001 (0.013)	Loss 4.8660 (4.8532)	Top-1 acc 13.672 (15.770)	Top-5 acc 34.375 (34.828)	lr 0.03544
Warmup Train [15][1680/3239]	Time 0.228 (0.237)	Data 0.001 (0.013)	Loss 5.0007 (4.8535)	Top-1 acc 15.234 (15.763)	Top-5 acc 32.031 (34.824)	lr 0.03543
Warmup Train [15][1690/3239]	Time 0.180 (0.237)	Data 0.001 (0.012)	Loss 4.6827 (4.8535)	Top-1 acc 17.969 (15.760)	Top-5 acc 34.766 (34.820)	lr 0.03543
Warmup Train [15][1700/3239]	Time 0.188 (0.237)	Data 0.001 (0.012)	Loss 4.7790 (4.8532)	Top-1 acc 15.625 (15.764)	Top-5 acc 36.328 (34.824)	lr 0.03542
Warmup Train [15][1710/3239]	Time 0.160 (0.237)	Data 0.001 (0.012)	Loss 5.0844 (4.8532)	Top-1 acc 12.891 (15.765)	Top-5 acc 32.422 (34.819)	lr 0.03542
Warmup Train [15][1720/3239]	Time 0.181 (0.237)	Data 0.001 (0.012)	Loss 4.7038 (4.8532)	Top-1 acc 19.922 (15.766)	Top-5 acc 39.453 (34.816)	lr 0.03541
Warmup Train [15][1730/3239]	Time 0.150 (0.236)	Data 0.002 (0.012)	Loss 4.7542 (4.8531)	Top-1 acc 19.141 (15.766)	Top-5 acc 37.500 (34.818)	lr 0.03541
Warmup Train [15][1740/3239]	Time 0.168 (0.236)	Data 0.003 (0.012)	Loss 5.0345 (4.8533)	Top-1 acc 8.594 (15.763)	Top-5 acc 30.078 (34.819)	lr 0.03540
Warmup Train [15][1750/3239]	Time 0.180 (0.236)	Data 0.001 (0.012)	Loss 4.8283 (4.8532)	Top-1 acc 14.844 (15.768)	Top-5 acc 35.156 (34.821)	lr 0.03539
Warmup Train [15][1760/3239]	Time 0.249 (0.236)	Data 0.001 (0.012)	Loss 4.8065 (4.8529)	Top-1 acc 19.141 (15.778)	Top-5 acc 34.766 (34.830)	lr 0.03539
Warmup Train [15][1770/3239]	Time 0.214 (0.236)	Data 0.001 (0.012)	Loss 4.8360 (4.8527)	Top-1 acc 13.672 (15.781)	Top-5 acc 34.766 (34.826)	lr 0.03538
Warmup Train [15][1780/3239]	Time 0.217 (0.236)	Data 0.001 (0.012)	Loss 4.7179 (4.8523)	Top-1 acc 23.828 (15.797)	Top-5 acc 37.891 (34.831)	lr 0.03538
Warmup Train [15][1790/3239]	Time 0.191 (0.236)	Data 0.001 (0.012)	Loss 5.0074 (4.8523)	Top-1 acc 12.109 (15.795)	Top-5 acc 29.688 (34.826)	lr 0.03537
Warmup Train [15][1800/3239]	Time 0.382 (0.236)	Data 0.001 (0.012)	Loss 4.7765 (4.8521)	Top-1 acc 13.672 (15.796)	Top-5 acc 33.203 (34.830)	lr 0.03537
Warmup Train [15][1810/3239]	Time 0.218 (0.236)	Data 0.002 (0.012)	Loss 4.7393 (4.8520)	Top-1 acc 16.016 (15.798)	Top-5 acc 37.891 (34.827)	lr 0.03536
Warmup Train [15][1820/3239]	Time 0.164 (0.236)	Data 0.027 (0.012)	Loss 5.0071 (4.8523)	Top-1 acc 13.672 (15.795)	Top-5 acc 32.422 (34.825)	lr 0.03536
Warmup Train [15][1830/3239]	Time 0.205 (0.236)	Data 0.001 (0.012)	Loss 4.8339 (4.8521)	Top-1 acc 14.844 (15.798)	Top-5 acc 32.422 (34.827)	lr 0.03535
Warmup Train [15][1840/3239]	Time 0.188 (0.236)	Data 0.001 (0.012)	Loss 4.5855 (4.8518)	Top-1 acc 22.656 (15.805)	Top-5 acc 41.406 (34.835)	lr 0.03535
Warmup Train [15][1850/3239]	Time 0.174 (0.236)	Data 0.002 (0.012)	Loss 4.9017 (4.8519)	Top-1 acc 13.672 (15.802)	Top-5 acc 34.375 (34.834)	lr 0.03534
Warmup Train [15][1860/3239]	Time 0.235 (0.236)	Data 0.001 (0.012)	Loss 4.8756 (4.8518)	Top-1 acc 15.625 (15.801)	Top-5 acc 34.766 (34.841)	lr 0.03533
Warmup Train [15][1870/3239]	Time 0.252 (0.236)	Data 0.002 (0.012)	Loss 4.8877 (4.8517)	Top-1 acc 18.359 (15.809)	Top-5 acc 36.719 (34.847)	lr 0.03533
Warmup Train [15][1880/3239]	Time 0.174 (0.236)	Data 0.001 (0.012)	Loss 4.6879 (4.8516)	Top-1 acc 16.016 (15.805)	Top-5 acc 37.500 (34.848)	lr 0.03532
Warmup Train [15][1890/3239]	Time 0.193 (0.236)	Data 0.001 (0.012)	Loss 4.7678 (4.8514)	Top-1 acc 17.188 (15.805)	Top-5 acc 37.109 (34.853)	lr 0.03532
Warmup Train [15][1900/3239]	Time 0.391 (0.236)	Data 0.001 (0.012)	Loss 4.8273 (4.8513)	Top-1 acc 14.453 (15.802)	Top-5 acc 37.500 (34.855)	lr 0.03531
Warmup Train [15][1910/3239]	Time 0.165 (0.235)	Data 0.001 (0.011)	Loss 4.7314 (4.8511)	Top-1 acc 14.453 (15.808)	Top-5 acc 36.719 (34.859)	lr 0.03531
Warmup Train [15][1920/3239]	Time 0.239 (0.235)	Data 0.002 (0.011)	Loss 4.8221 (4.8513)	Top-1 acc 20.312 (15.805)	Top-5 acc 37.109 (34.852)	lr 0.03530
Warmup Train [15][1930/3239]	Time 0.194 (0.235)	Data 0.002 (0.011)	Loss 4.7126 (4.8513)	Top-1 acc 18.359 (15.809)	Top-5 acc 39.453 (34.855)	lr 0.03530
Warmup Train [15][1940/3239]	Time 0.214 (0.235)	Data 0.001 (0.011)	Loss 4.8888 (4.8510)	Top-1 acc 14.844 (15.814)	Top-5 acc 34.766 (34.864)	lr 0.03529
Warmup Train [15][1950/3239]	Time 0.223 (0.235)	Data 0.002 (0.011)	Loss 5.0038 (4.8509)	Top-1 acc 10.156 (15.814)	Top-5 acc 26.953 (34.862)	lr 0.03528
Warmup Train [15][1960/3239]	Time 0.247 (0.235)	Data 0.002 (0.011)	Loss 4.7293 (4.8504)	Top-1 acc 19.531 (15.828)	Top-5 acc 38.281 (34.876)	lr 0.03528
Warmup Train [15][1970/3239]	Time 0.223 (0.235)	Data 0.001 (0.011)	Loss 4.8569 (4.8502)	Top-1 acc 21.875 (15.835)	Top-5 acc 36.719 (34.883)	lr 0.03527
Warmup Train [15][1980/3239]	Time 0.235 (0.235)	Data 0.001 (0.011)	Loss 4.7942 (4.8499)	Top-1 acc 17.969 (15.845)	Top-5 acc 34.375 (34.887)	lr 0.03527
Warmup Train [15][1990/3239]	Time 0.184 (0.235)	Data 0.001 (0.011)	Loss 5.1047 (4.8500)	Top-1 acc 11.328 (15.845)	Top-5 acc 31.641 (34.881)	lr 0.03526
Warmup Train [15][2000/3239]	Time 0.213 (0.235)	Data 0.001 (0.011)	Loss 5.0145 (4.8498)	Top-1 acc 14.844 (15.847)	Top-5 acc 29.297 (34.879)	lr 0.03526
Warmup Train [15][2010/3239]	Time 0.190 (0.235)	Data 0.001 (0.011)	Loss 4.9279 (4.8497)	Top-1 acc 12.500 (15.845)	Top-5 acc 27.734 (34.882)	lr 0.03525
Warmup Train [15][2020/3239]	Time 0.233 (0.235)	Data 0.001 (0.011)	Loss 4.7515 (4.8494)	Top-1 acc 18.359 (15.848)	Top-5 acc 36.328 (34.889)	lr 0.03525
Warmup Train [15][2030/3239]	Time 0.214 (0.235)	Data 0.001 (0.011)	Loss 4.7871 (4.8497)	Top-1 acc 17.188 (15.845)	Top-5 acc 37.109 (34.885)	lr 0.03524
Warmup Train [15][2040/3239]	Time 0.265 (0.235)	Data 0.001 (0.011)	Loss 4.9090 (4.8497)	Top-1 acc 12.500 (15.840)	Top-5 acc 33.594 (34.882)	lr 0.03523
Warmup Train [15][2050/3239]	Time 0.208 (0.235)	Data 0.001 (0.011)	Loss 4.8651 (4.8492)	Top-1 acc 13.672 (15.842)	Top-5 acc 32.422 (34.898)	lr 0.03523
Warmup Train [15][2060/3239]	Time 0.189 (0.235)	Data 0.001 (0.011)	Loss 4.9078 (4.8491)	Top-1 acc 15.234 (15.840)	Top-5 acc 33.203 (34.898)	lr 0.03522
Warmup Train [15][2070/3239]	Time 0.213 (0.235)	Data 0.001 (0.011)	Loss 4.9279 (4.8488)	Top-1 acc 12.109 (15.843)	Top-5 acc 30.078 (34.906)	lr 0.03522
Warmup Train [15][2080/3239]	Time 0.216 (0.235)	Data 0.001 (0.011)	Loss 4.8131 (4.8485)	Top-1 acc 16.016 (15.846)	Top-5 acc 30.859 (34.908)	lr 0.03521
Warmup Train [15][2090/3239]	Time 0.317 (0.235)	Data 0.023 (0.011)	Loss 5.0043 (4.8486)	Top-1 acc 10.547 (15.845)	Top-5 acc 30.078 (34.904)	lr 0.03521
Warmup Train [15][2100/3239]	Time 0.202 (0.235)	Data 0.001 (0.011)	Loss 4.9684 (4.8486)	Top-1 acc 13.281 (15.847)	Top-5 acc 27.344 (34.898)	lr 0.03520
Warmup Train [15][2110/3239]	Time 0.239 (0.235)	Data 0.002 (0.011)	Loss 4.7887 (4.8486)	Top-1 acc 15.234 (15.847)	Top-5 acc 35.938 (34.901)	lr 0.03520
Warmup Train [15][2120/3239]	Time 0.263 (0.235)	Data 0.002 (0.011)	Loss 4.9425 (4.8486)	Top-1 acc 14.453 (15.851)	Top-5 acc 32.031 (34.905)	lr 0.03519
Warmup Train [15][2130/3239]	Time 0.199 (0.234)	Data 0.001 (0.011)	Loss 4.8103 (4.8483)	Top-1 acc 16.016 (15.854)	Top-5 acc 35.938 (34.905)	lr 0.03519
Warmup Train [15][2140/3239]	Time 0.210 (0.234)	Data 0.001 (0.011)	Loss 4.8385 (4.8480)	Top-1 acc 14.062 (15.860)	Top-5 acc 37.109 (34.911)	lr 0.03518
Warmup Train [15][2150/3239]	Time 0.169 (0.234)	Data 0.001 (0.011)	Loss 4.9250 (4.8479)	Top-1 acc 13.672 (15.857)	Top-5 acc 33.203 (34.916)	lr 0.03517
Warmup Train [15][2160/3239]	Time 0.308 (0.234)	Data 0.001 (0.010)	Loss 4.8514 (4.8477)	Top-1 acc 16.406 (15.867)	Top-5 acc 35.156 (34.919)	lr 0.03517
Warmup Train [15][2170/3239]	Time 0.130 (0.234)	Data 0.002 (0.010)	Loss 4.7732 (4.8475)	Top-1 acc 17.969 (15.871)	Top-5 acc 35.938 (34.926)	lr 0.03516
Warmup Train [15][2180/3239]	Time 0.234 (0.234)	Data 0.001 (0.010)	Loss 4.9475 (4.8473)	Top-1 acc 13.672 (15.873)	Top-5 acc 29.688 (34.928)	lr 0.03516
Warmup Train [15][2190/3239]	Time 0.328 (0.234)	Data 0.001 (0.010)	Loss 4.8968 (4.8473)	Top-1 acc 16.016 (15.870)	Top-5 acc 32.812 (34.928)	lr 0.03515
Warmup Train [15][2200/3239]	Time 0.158 (0.234)	Data 0.001 (0.010)	Loss 4.8219 (4.8475)	Top-1 acc 17.578 (15.868)	Top-5 acc 36.719 (34.925)	lr 0.03515
Warmup Train [15][2210/3239]	Time 0.160 (0.234)	Data 0.001 (0.010)	Loss 4.7527 (4.8472)	Top-1 acc 17.188 (15.874)	Top-5 acc 38.672 (34.932)	lr 0.03514
Warmup Train [15][2220/3239]	Time 0.192 (0.234)	Data 0.001 (0.010)	Loss 4.9042 (4.8474)	Top-1 acc 17.188 (15.872)	Top-5 acc 32.031 (34.926)	lr 0.03514
Warmup Train [15][2230/3239]	Time 0.192 (0.234)	Data 0.001 (0.010)	Loss 4.8748 (4.8473)	Top-1 acc 17.188 (15.879)	Top-5 acc 32.031 (34.934)	lr 0.03513
Warmup Train [15][2240/3239]	Time 0.244 (0.234)	Data 0.001 (0.010)	Loss 5.0371 (4.8473)	Top-1 acc 13.672 (15.877)	Top-5 acc 30.078 (34.932)	lr 0.03512
Warmup Train [15][2250/3239]	Time 0.170 (0.234)	Data 0.001 (0.010)	Loss 4.7014 (4.8469)	Top-1 acc 15.234 (15.882)	Top-5 acc 36.328 (34.941)	lr 0.03512
Warmup Train [15][2260/3239]	Time 0.210 (0.234)	Data 0.001 (0.010)	Loss 4.8215 (4.8468)	Top-1 acc 18.359 (15.883)	Top-5 acc 37.500 (34.938)	lr 0.03511
Warmup Train [15][2270/3239]	Time 0.227 (0.234)	Data 0.001 (0.010)	Loss 4.9261 (4.8469)	Top-1 acc 16.406 (15.882)	Top-5 acc 32.031 (34.940)	lr 0.03511
Warmup Train [15][2280/3239]	Time 0.195 (0.234)	Data 0.002 (0.010)	Loss 4.8955 (4.8470)	Top-1 acc 14.844 (15.882)	Top-5 acc 33.984 (34.941)	lr 0.03510
Warmup Train [15][2290/3239]	Time 0.212 (0.234)	Data 0.001 (0.010)	Loss 4.9531 (4.8472)	Top-1 acc 14.453 (15.878)	Top-5 acc 33.203 (34.937)	lr 0.03510
Warmup Train [15][2300/3239]	Time 0.293 (0.234)	Data 0.001 (0.010)	Loss 4.7376 (4.8469)	Top-1 acc 16.406 (15.881)	Top-5 acc 39.844 (34.943)	lr 0.03509
Warmup Train [15][2310/3239]	Time 0.199 (0.234)	Data 0.001 (0.010)	Loss 4.7245 (4.8465)	Top-1 acc 14.062 (15.890)	Top-5 acc 34.375 (34.950)	lr 0.03509
Warmup Train [15][2320/3239]	Time 0.167 (0.234)	Data 0.001 (0.010)	Loss 4.9024 (4.8465)	Top-1 acc 13.281 (15.890)	Top-5 acc 30.469 (34.947)	lr 0.03508
Warmup Train [15][2330/3239]	Time 0.155 (0.234)	Data 0.001 (0.010)	Loss 4.8485 (4.8463)	Top-1 acc 15.625 (15.893)	Top-5 acc 35.938 (34.951)	lr 0.03507
Warmup Train [15][2340/3239]	Time 0.181 (0.234)	Data 0.001 (0.010)	Loss 4.9621 (4.8462)	Top-1 acc 16.406 (15.890)	Top-5 acc 33.203 (34.956)	lr 0.03507
Warmup Train [15][2350/3239]	Time 0.226 (0.233)	Data 0.002 (0.010)	Loss 4.8163 (4.8462)	Top-1 acc 17.578 (15.888)	Top-5 acc 34.766 (34.955)	lr 0.03506
Warmup Train [15][2360/3239]	Time 0.237 (0.233)	Data 0.001 (0.010)	Loss 4.6863 (4.8460)	Top-1 acc 18.359 (15.887)	Top-5 acc 42.188 (34.957)	lr 0.03506
Warmup Train [15][2370/3239]	Time 0.186 (0.233)	Data 0.002 (0.010)	Loss 4.8281 (4.8458)	Top-1 acc 15.234 (15.892)	Top-5 acc 33.594 (34.961)	lr 0.03505
Warmup Train [15][2380/3239]	Time 0.173 (0.233)	Data 0.001 (0.010)	Loss 4.7797 (4.8457)	Top-1 acc 17.188 (15.894)	Top-5 acc 35.547 (34.962)	lr 0.03505
Warmup Train [15][2390/3239]	Time 0.206 (0.233)	Data 0.004 (0.010)	Loss 4.8399 (4.8456)	Top-1 acc 12.891 (15.896)	Top-5 acc 35.156 (34.964)	lr 0.03504
Warmup Train [15][2400/3239]	Time 0.251 (0.233)	Data 0.002 (0.010)	Loss 4.8033 (4.8452)	Top-1 acc 16.797 (15.902)	Top-5 acc 38.281 (34.976)	lr 0.03504
Warmup Train [15][2410/3239]	Time 0.323 (0.233)	Data 0.001 (0.010)	Loss 4.8546 (4.8451)	Top-1 acc 12.500 (15.903)	Top-5 acc 33.984 (34.983)	lr 0.03503
Warmup Train [15][2420/3239]	Time 0.208 (0.233)	Data 0.001 (0.010)	Loss 4.9363 (4.8447)	Top-1 acc 19.531 (15.918)	Top-5 acc 34.766 (34.996)	lr 0.03502
Warmup Train [15][2430/3239]	Time 0.152 (0.233)	Data 0.001 (0.010)	Loss 4.7923 (4.8446)	Top-1 acc 17.969 (15.923)	Top-5 acc 33.203 (35.002)	lr 0.03502
Warmup Train [15][2440/3239]	Time 0.231 (0.233)	Data 0.001 (0.010)	Loss 4.8737 (4.8444)	Top-1 acc 18.359 (15.925)	Top-5 acc 35.156 (35.004)	lr 0.03501
Warmup Train [15][2450/3239]	Time 0.175 (0.233)	Data 0.002 (0.010)	Loss 4.6862 (4.8441)	Top-1 acc 17.578 (15.928)	Top-5 acc 37.109 (35.007)	lr 0.03501
Warmup Train [15][2460/3239]	Time 0.192 (0.233)	Data 0.001 (0.010)	Loss 4.7357 (4.8438)	Top-1 acc 17.578 (15.933)	Top-5 acc 41.016 (35.016)	lr 0.03500
Warmup Train [15][2470/3239]	Time 0.167 (0.233)	Data 0.002 (0.010)	Loss 4.6109 (4.8437)	Top-1 acc 17.969 (15.932)	Top-5 acc 36.719 (35.016)	lr 0.03500
Warmup Train [15][2480/3239]	Time 0.144 (0.233)	Data 0.001 (0.010)	Loss 4.7585 (4.8434)	Top-1 acc 17.578 (15.936)	Top-5 acc 38.281 (35.022)	lr 0.03499
Warmup Train [15][2490/3239]	Time 0.245 (0.233)	Data 0.001 (0.010)	Loss 4.7725 (4.8434)	Top-1 acc 22.266 (15.936)	Top-5 acc 37.109 (35.023)	lr 0.03499
Warmup Train [15][2500/3239]	Time 0.292 (0.233)	Data 0.001 (0.010)	Loss 4.8284 (4.8432)	Top-1 acc 15.625 (15.939)	Top-5 acc 32.031 (35.026)	lr 0.03498
Warmup Train [15][2510/3239]	Time 0.229 (0.233)	Data 0.001 (0.010)	Loss 4.7669 (4.8431)	Top-1 acc 16.016 (15.938)	Top-5 acc 39.453 (35.027)	lr 0.03497
Warmup Train [15][2520/3239]	Time 0.169 (0.233)	Data 0.001 (0.009)	Loss 4.8466 (4.8433)	Top-1 acc 13.672 (15.934)	Top-5 acc 35.156 (35.021)	lr 0.03497
Warmup Train [15][2530/3239]	Time 0.184 (0.233)	Data 0.001 (0.009)	Loss 4.8787 (4.8432)	Top-1 acc 16.406 (15.937)	Top-5 acc 37.109 (35.023)	lr 0.03496
Warmup Train [15][2540/3239]	Time 0.224 (0.233)	Data 0.001 (0.009)	Loss 4.7853 (4.8431)	Top-1 acc 15.625 (15.935)	Top-5 acc 35.938 (35.025)	lr 0.03496
Warmup Train [15][2550/3239]	Time 0.231 (0.233)	Data 0.001 (0.009)	Loss 5.0975 (4.8431)	Top-1 acc 10.938 (15.934)	Top-5 acc 26.562 (35.026)	lr 0.03495
Warmup Train [15][2560/3239]	Time 0.210 (0.233)	Data 0.001 (0.009)	Loss 4.8535 (4.8430)	Top-1 acc 16.797 (15.937)	Top-5 acc 37.891 (35.032)	lr 0.03495
Warmup Train [15][2570/3239]	Time 0.244 (0.233)	Data 0.002 (0.009)	Loss 4.9379 (4.8430)	Top-1 acc 18.359 (15.937)	Top-5 acc 35.156 (35.029)	lr 0.03494
Warmup Train [15][2580/3239]	Time 0.178 (0.233)	Data 0.002 (0.009)	Loss 4.8289 (4.8431)	Top-1 acc 15.234 (15.936)	Top-5 acc 36.328 (35.028)	lr 0.03494
Warmup Train [15][2590/3239]	Time 0.246 (0.233)	Data 0.001 (0.009)	Loss 4.8829 (4.8430)	Top-1 acc 16.406 (15.936)	Top-5 acc 32.812 (35.028)	lr 0.03493
Warmup Train [15][2600/3239]	Time 0.237 (0.233)	Data 0.001 (0.009)	Loss 4.7480 (4.8429)	Top-1 acc 18.359 (15.938)	Top-5 acc 38.281 (35.029)	lr 0.03492
Warmup Train [15][2610/3239]	Time 0.339 (0.233)	Data 0.001 (0.009)	Loss 4.9004 (4.8429)	Top-1 acc 16.016 (15.936)	Top-5 acc 32.031 (35.026)	lr 0.03492
Warmup Train [15][2620/3239]	Time 0.212 (0.233)	Data 0.001 (0.009)	Loss 4.6505 (4.8427)	Top-1 acc 22.656 (15.940)	Top-5 acc 38.281 (35.030)	lr 0.03491
Warmup Train [15][2630/3239]	Time 0.205 (0.233)	Data 0.002 (0.009)	Loss 4.6007 (4.8424)	Top-1 acc 19.531 (15.946)	Top-5 acc 44.141 (35.037)	lr 0.03491
Warmup Train [15][2640/3239]	Time 0.179 (0.233)	Data 0.001 (0.009)	Loss 4.9367 (4.8424)	Top-1 acc 15.625 (15.942)	Top-5 acc 30.859 (35.036)	lr 0.03490
Warmup Train [15][2650/3239]	Time 0.197 (0.233)	Data 0.001 (0.009)	Loss 4.7889 (4.8423)	Top-1 acc 19.922 (15.941)	Top-5 acc 34.766 (35.034)	lr 0.03490
Warmup Train [15][2660/3239]	Time 0.191 (0.233)	Data 0.001 (0.009)	Loss 4.8295 (4.8423)	Top-1 acc 16.797 (15.943)	Top-5 acc 35.156 (35.030)	lr 0.03489
Warmup Train [15][2670/3239]	Time 0.196 (0.233)	Data 0.001 (0.009)	Loss 4.6293 (4.8419)	Top-1 acc 19.141 (15.948)	Top-5 acc 41.016 (35.038)	lr 0.03489
Warmup Train [15][2680/3239]	Time 0.179 (0.233)	Data 0.001 (0.009)	Loss 4.8592 (4.8420)	Top-1 acc 17.188 (15.946)	Top-5 acc 35.547 (35.037)	lr 0.03488
Warmup Train [15][2690/3239]	Time 0.183 (0.233)	Data 0.001 (0.009)	Loss 4.7183 (4.8418)	Top-1 acc 19.531 (15.953)	Top-5 acc 35.938 (35.039)	lr 0.03487
Warmup Train [15][2700/3239]	Time 0.210 (0.233)	Data 0.002 (0.009)	Loss 4.8611 (4.8416)	Top-1 acc 14.844 (15.954)	Top-5 acc 35.938 (35.044)	lr 0.03487
Warmup Train [15][2710/3239]	Time 0.213 (0.232)	Data 0.001 (0.009)	Loss 4.8610 (4.8415)	Top-1 acc 15.625 (15.954)	Top-5 acc 31.641 (35.047)	lr 0.03486
Warmup Train [15][2720/3239]	Time 0.308 (0.233)	Data 0.001 (0.009)	Loss 4.7889 (4.8414)	Top-1 acc 17.188 (15.957)	Top-5 acc 36.328 (35.051)	lr 0.03486
Warmup Train [15][2730/3239]	Time 0.190 (0.232)	Data 0.001 (0.009)	Loss 4.9561 (4.8413)	Top-1 acc 14.453 (15.960)	Top-5 acc 32.031 (35.050)	lr 0.03485
Warmup Train [15][2740/3239]	Time 0.318 (0.233)	Data 0.003 (0.009)	Loss 4.7484 (4.8411)	Top-1 acc 18.750 (15.964)	Top-5 acc 34.766 (35.052)	lr 0.03485
Warmup Train [15][2750/3239]	Time 0.211 (0.232)	Data 0.001 (0.009)	Loss 4.8657 (4.8410)	Top-1 acc 14.844 (15.970)	Top-5 acc 34.375 (35.056)	lr 0.03484
Warmup Train [15][2760/3239]	Time 0.161 (0.232)	Data 0.001 (0.009)	Loss 4.6021 (4.8408)	Top-1 acc 20.312 (15.971)	Top-5 acc 40.234 (35.056)	lr 0.03484
Warmup Train [15][2770/3239]	Time 0.196 (0.232)	Data 0.001 (0.009)	Loss 4.9185 (4.8406)	Top-1 acc 16.016 (15.974)	Top-5 acc 34.375 (35.061)	lr 0.03483
Warmup Train [15][2780/3239]	Time 0.197 (0.232)	Data 0.001 (0.009)	Loss 4.5823 (4.8405)	Top-1 acc 23.047 (15.975)	Top-5 acc 42.188 (35.067)	lr 0.03482
Warmup Train [15][2790/3239]	Time 0.199 (0.232)	Data 0.002 (0.009)	Loss 4.8467 (4.8403)	Top-1 acc 16.797 (15.980)	Top-5 acc 35.156 (35.074)	lr 0.03482
Warmup Train [15][2800/3239]	Time 0.259 (0.232)	Data 0.001 (0.009)	Loss 4.5569 (4.8402)	Top-1 acc 20.312 (15.981)	Top-5 acc 41.797 (35.080)	lr 0.03481
Warmup Train [15][2810/3239]	Time 0.321 (0.232)	Data 0.001 (0.009)	Loss 4.9076 (4.8401)	Top-1 acc 16.406 (15.980)	Top-5 acc 33.594 (35.082)	lr 0.03481
Warmup Train [15][2820/3239]	Time 0.205 (0.232)	Data 0.001 (0.009)	Loss 4.8107 (4.8401)	Top-1 acc 14.062 (15.983)	Top-5 acc 39.062 (35.082)	lr 0.03480
Warmup Train [15][2830/3239]	Time 0.229 (0.232)	Data 0.001 (0.009)	Loss 4.6053 (4.8400)	Top-1 acc 20.312 (15.989)	Top-5 acc 38.281 (35.089)	lr 0.03480
Warmup Train [15][2840/3239]	Time 0.244 (0.232)	Data 0.001 (0.009)	Loss 4.9253 (4.8400)	Top-1 acc 15.625 (15.989)	Top-5 acc 33.984 (35.090)	lr 0.03479
Warmup Train [15][2850/3239]	Time 0.266 (0.232)	Data 0.001 (0.009)	Loss 4.7845 (4.8397)	Top-1 acc 18.359 (15.992)	Top-5 acc 35.547 (35.097)	lr 0.03479
Warmup Train [15][2860/3239]	Time 0.198 (0.232)	Data 0.001 (0.009)	Loss 4.9729 (4.8397)	Top-1 acc 13.672 (15.992)	Top-5 acc 31.250 (35.097)	lr 0.03478
Warmup Train [15][2870/3239]	Time 0.226 (0.232)	Data 0.001 (0.009)	Loss 4.8265 (4.8398)	Top-1 acc 16.016 (15.988)	Top-5 acc 34.375 (35.095)	lr 0.03477
Warmup Train [15][2880/3239]	Time 0.261 (0.232)	Data 0.001 (0.009)	Loss 4.6377 (4.8396)	Top-1 acc 19.531 (15.992)	Top-5 acc 41.016 (35.102)	lr 0.03477
Warmup Train [15][2890/3239]	Time 0.211 (0.232)	Data 0.001 (0.009)	Loss 4.9252 (4.8395)	Top-1 acc 14.453 (15.997)	Top-5 acc 32.031 (35.108)	lr 0.03476
Warmup Train [15][2900/3239]	Time 0.205 (0.232)	Data 0.001 (0.009)	Loss 4.7062 (4.8394)	Top-1 acc 16.797 (15.997)	Top-5 acc 38.281 (35.111)	lr 0.03476
Warmup Train [15][2910/3239]	Time 0.148 (0.232)	Data 0.001 (0.009)	Loss 4.8397 (4.8392)	Top-1 acc 15.625 (16.000)	Top-5 acc 34.766 (35.112)	lr 0.03475
Warmup Train [15][2920/3239]	Time 0.349 (0.232)	Data 0.001 (0.009)	Loss 4.8095 (4.8391)	Top-1 acc 14.062 (16.002)	Top-5 acc 33.984 (35.115)	lr 0.03475
Warmup Train [15][2930/3239]	Time 0.219 (0.232)	Data 0.001 (0.009)	Loss 4.7619 (4.8391)	Top-1 acc 17.578 (16.004)	Top-5 acc 38.281 (35.114)	lr 0.03474
Warmup Train [15][2940/3239]	Time 0.167 (0.232)	Data 0.001 (0.009)	Loss 4.6800 (4.8389)	Top-1 acc 21.094 (16.014)	Top-5 acc 42.969 (35.123)	lr 0.03473
Warmup Train [15][2950/3239]	Time 0.234 (0.232)	Data 0.001 (0.009)	Loss 4.8487 (4.8386)	Top-1 acc 13.281 (16.016)	Top-5 acc 37.500 (35.128)	lr 0.03473
Warmup Train [15][2960/3239]	Time 0.245 (0.232)	Data 0.002 (0.009)	Loss 4.7378 (4.8384)	Top-1 acc 15.234 (16.017)	Top-5 acc 38.281 (35.132)	lr 0.03472
Warmup Train [15][2970/3239]	Time 0.255 (0.232)	Data 0.001 (0.009)	Loss 4.5594 (4.8382)	Top-1 acc 18.750 (16.018)	Top-5 acc 42.578 (35.135)	lr 0.03472
Warmup Train [15][2980/3239]	Time 0.223 (0.232)	Data 0.001 (0.009)	Loss 4.7779 (4.8380)	Top-1 acc 15.625 (16.017)	Top-5 acc 35.547 (35.139)	lr 0.03471
Warmup Train [15][2990/3239]	Time 0.193 (0.232)	Data 0.001 (0.009)	Loss 4.7870 (4.8379)	Top-1 acc 18.359 (16.019)	Top-5 acc 39.453 (35.143)	lr 0.03471
Warmup Train [15][3000/3239]	Time 0.214 (0.232)	Data 0.001 (0.008)	Loss 5.0332 (4.8378)	Top-1 acc 11.328 (16.024)	Top-5 acc 30.469 (35.147)	lr 0.03470
Warmup Train [15][3010/3239]	Time 0.308 (0.232)	Data 0.001 (0.008)	Loss 4.8577 (4.8376)	Top-1 acc 16.406 (16.026)	Top-5 acc 35.938 (35.150)	lr 0.03470
Warmup Train [15][3020/3239]	Time 0.413 (0.232)	Data 0.001 (0.008)	Loss 5.0334 (4.8376)	Top-1 acc 14.453 (16.028)	Top-5 acc 30.469 (35.151)	lr 0.03469
Warmup Train [15][3030/3239]	Time 0.293 (0.232)	Data 0.002 (0.008)	Loss 4.9964 (4.8377)	Top-1 acc 11.719 (16.026)	Top-5 acc 30.469 (35.150)	lr 0.03468
Warmup Train [15][3040/3239]	Time 0.166 (0.232)	Data 0.001 (0.008)	Loss 4.8292 (4.8376)	Top-1 acc 14.062 (16.027)	Top-5 acc 36.719 (35.154)	lr 0.03468
Warmup Train [15][3050/3239]	Time 0.215 (0.232)	Data 0.001 (0.008)	Loss 4.7280 (4.8374)	Top-1 acc 16.797 (16.032)	Top-5 acc 32.812 (35.154)	lr 0.03467
Warmup Train [15][3060/3239]	Time 0.241 (0.232)	Data 0.001 (0.008)	Loss 4.6571 (4.8372)	Top-1 acc 21.484 (16.035)	Top-5 acc 41.406 (35.158)	lr 0.03467
Warmup Train [15][3070/3239]	Time 0.222 (0.232)	Data 0.001 (0.008)	Loss 4.7693 (4.8372)	Top-1 acc 19.922 (16.036)	Top-5 acc 39.844 (35.160)	lr 0.03466
Warmup Train [15][3080/3239]	Time 0.222 (0.232)	Data 0.001 (0.008)	Loss 4.8487 (4.8373)	Top-1 acc 15.234 (16.035)	Top-5 acc 34.766 (35.159)	lr 0.03466
Warmup Train [15][3090/3239]	Time 0.157 (0.232)	Data 0.001 (0.008)	Loss 4.8318 (4.8372)	Top-1 acc 14.453 (16.035)	Top-5 acc 33.594 (35.158)	lr 0.03465
Warmup Train [15][3100/3239]	Time 0.163 (0.232)	Data 0.001 (0.008)	Loss 4.9665 (4.8371)	Top-1 acc 14.844 (16.039)	Top-5 acc 33.984 (35.162)	lr 0.03465
Warmup Train [15][3110/3239]	Time 0.190 (0.232)	Data 0.001 (0.008)	Loss 4.8633 (4.8369)	Top-1 acc 14.844 (16.042)	Top-5 acc 36.719 (35.164)	lr 0.03464
Warmup Train [15][3120/3239]	Time 0.316 (0.232)	Data 0.001 (0.008)	Loss 4.9421 (4.8368)	Top-1 acc 16.016 (16.045)	Top-5 acc 33.594 (35.167)	lr 0.03463
Warmup Train [15][3130/3239]	Time 0.212 (0.232)	Data 0.001 (0.008)	Loss 4.9023 (4.8367)	Top-1 acc 16.797 (16.047)	Top-5 acc 37.891 (35.171)	lr 0.03463
Warmup Train [15][3140/3239]	Time 0.220 (0.232)	Data 0.001 (0.008)	Loss 4.9346 (4.8366)	Top-1 acc 15.234 (16.046)	Top-5 acc 32.422 (35.172)	lr 0.03462
Warmup Train [15][3150/3239]	Time 0.202 (0.232)	Data 0.001 (0.008)	Loss 4.9700 (4.8365)	Top-1 acc 9.766 (16.046)	Top-5 acc 29.297 (35.173)	lr 0.03462
Warmup Train [15][3160/3239]	Time 0.190 (0.232)	Data 0.001 (0.008)	Loss 4.5891 (4.8364)	Top-1 acc 18.359 (16.048)	Top-5 acc 41.016 (35.175)	lr 0.03461
Warmup Train [15][3170/3239]	Time 0.205 (0.232)	Data 0.001 (0.008)	Loss 4.7365 (4.8362)	Top-1 acc 18.359 (16.050)	Top-5 acc 37.500 (35.180)	lr 0.03461
Warmup Train [15][3180/3239]	Time 0.181 (0.232)	Data 0.000 (0.008)	Loss 4.6102 (4.8362)	Top-1 acc 17.969 (16.051)	Top-5 acc 39.453 (35.179)	lr 0.03460
Warmup Train [15][3190/3239]	Time 0.243 (0.231)	Data 0.000 (0.008)	Loss 4.6882 (4.8360)	Top-1 acc 14.844 (16.052)	Top-5 acc 35.547 (35.182)	lr 0.03460
Warmup Train [15][3200/3239]	Time 0.142 (0.231)	Data 0.000 (0.008)	Loss 4.9488 (4.8359)	Top-1 acc 13.281 (16.054)	Top-5 acc 32.031 (35.182)	lr 0.03459
Warmup Train [15][3210/3239]	Time 0.227 (0.231)	Data 0.000 (0.008)	Loss 4.7424 (4.8357)	Top-1 acc 20.703 (16.058)	Top-5 acc 37.891 (35.187)	lr 0.03458
Warmup Train [15][3220/3239]	Time 0.246 (0.231)	Data 0.000 (0.008)	Loss 4.7575 (4.8356)	Top-1 acc 17.969 (16.056)	Top-5 acc 39.062 (35.189)	lr 0.03458
Warmup Train [15][3230/3239]	Time 0.348 (0.231)	Data 0.000 (0.008)	Loss 4.8332 (4.8355)	Top-1 acc 14.453 (16.056)	Top-5 acc 35.156 (35.192)	lr 0.03457
Warmup Train [15][3239/3239]	Time 0.277 (0.231)	Data 0.000 (0.008)	Loss 4.6979 (4.8354)	Top-1 acc 19.753 (16.060)	Top-5 acc 41.975 (35.197)	lr 0.03457
==========Warmup Valid [15/40]	loss 3.909	top-1 acc 22.665	top-5 acc 44.884	Train top-1 16.060	top-5 35.197	flops: 442.4M
Warmup Train [16][0/3239]	Time 16.230 (16.230)	Data 14.759 (14.759)	Loss 4.7362 (4.7362)	Top-1 acc 16.406 (16.406)	Top-5 acc 37.500 (37.500)	lr 0.03457
Warmup Train [16][10/3239]	Time 0.284 (1.786)	Data 0.001 (1.344)	Loss 4.6596 (4.7649)	Top-1 acc 21.094 (17.294)	Top-5 acc 40.625 (37.642)	lr 0.03456
Warmup Train [16][20/3239]	Time 0.244 (1.075)	Data 0.002 (0.707)	Loss 4.8975 (4.7891)	Top-1 acc 17.188 (16.778)	Top-5 acc 34.375 (36.700)	lr 0.03456
Warmup Train [16][30/3239]	Time 0.242 (0.796)	Data 0.001 (0.479)	Loss 4.8578 (4.7960)	Top-1 acc 14.062 (17.112)	Top-5 acc 35.938 (36.442)	lr 0.03455
Warmup Train [16][40/3239]	Time 0.357 (0.661)	Data 0.002 (0.363)	Loss 4.7201 (4.7891)	Top-1 acc 16.016 (16.997)	Top-5 acc 34.766 (36.462)	lr 0.03454
Warmup Train [16][50/3239]	Time 0.217 (0.579)	Data 0.002 (0.292)	Loss 4.5455 (4.7922)	Top-1 acc 21.875 (16.950)	Top-5 acc 41.016 (36.129)	lr 0.03454
Warmup Train [16][60/3239]	Time 0.211 (0.521)	Data 0.001 (0.244)	Loss 4.9163 (4.7910)	Top-1 acc 16.406 (17.002)	Top-5 acc 31.641 (36.226)	lr 0.03453
Warmup Train [16][70/3239]	Time 0.244 (0.481)	Data 0.001 (0.210)	Loss 4.8176 (4.7874)	Top-1 acc 16.016 (17.050)	Top-5 acc 35.156 (36.240)	lr 0.03453
Warmup Train [16][80/3239]	Time 0.258 (0.454)	Data 0.002 (0.185)	Loss 4.8934 (4.7861)	Top-1 acc 17.188 (17.086)	Top-5 acc 36.719 (36.347)	lr 0.03452
Warmup Train [16][90/3239]	Time 0.174 (0.430)	Data 0.001 (0.164)	Loss 4.6838 (4.7795)	Top-1 acc 20.703 (17.239)	Top-5 acc 39.453 (36.560)	lr 0.03452
Warmup Train [16][100/3239]	Time 0.281 (0.410)	Data 0.002 (0.149)	Loss 4.7899 (4.7753)	Top-1 acc 15.625 (17.215)	Top-5 acc 35.547 (36.580)	lr 0.03451
Warmup Train [16][110/3239]	Time 0.237 (0.394)	Data 0.001 (0.136)	Loss 4.9226 (4.7768)	Top-1 acc 12.109 (17.170)	Top-5 acc 34.375 (36.617)	lr 0.03451
Warmup Train [16][120/3239]	Time 0.196 (0.380)	Data 0.001 (0.125)	Loss 4.8154 (4.7737)	Top-1 acc 13.672 (17.107)	Top-5 acc 32.031 (36.628)	lr 0.03450
Warmup Train [16][130/3239]	Time 0.168 (0.367)	Data 0.001 (0.115)	Loss 4.7345 (4.7729)	Top-1 acc 18.750 (17.155)	Top-5 acc 34.766 (36.674)	lr 0.03449
Warmup Train [16][140/3239]	Time 0.364 (0.358)	Data 0.001 (0.108)	Loss 4.7215 (4.7716)	Top-1 acc 17.188 (17.088)	Top-5 acc 38.672 (36.619)	lr 0.03449
Warmup Train [16][150/3239]	Time 0.212 (0.350)	Data 0.001 (0.101)	Loss 4.8029 (4.7728)	Top-1 acc 18.359 (17.144)	Top-5 acc 38.672 (36.641)	lr 0.03448
Warmup Train [16][160/3239]	Time 0.239 (0.343)	Data 0.001 (0.094)	Loss 4.7498 (4.7721)	Top-1 acc 19.531 (17.141)	Top-5 acc 40.625 (36.668)	lr 0.03448
Warmup Train [16][170/3239]	Time 0.171 (0.336)	Data 0.001 (0.089)	Loss 4.6540 (4.7740)	Top-1 acc 23.047 (17.165)	Top-5 acc 38.672 (36.598)	lr 0.03447
Warmup Train [16][180/3239]	Time 0.209 (0.330)	Data 0.001 (0.085)	Loss 4.8196 (4.7714)	Top-1 acc 15.234 (17.134)	Top-5 acc 39.062 (36.684)	lr 0.03447
Warmup Train [16][190/3239]	Time 0.203 (0.324)	Data 0.001 (0.080)	Loss 4.7677 (4.7708)	Top-1 acc 19.531 (17.095)	Top-5 acc 33.203 (36.651)	lr 0.03446
Warmup Train [16][200/3239]	Time 0.282 (0.319)	Data 0.001 (0.076)	Loss 4.7682 (4.7709)	Top-1 acc 20.312 (17.096)	Top-5 acc 37.500 (36.670)	lr 0.03445
Warmup Train [16][210/3239]	Time 0.133 (0.315)	Data 0.001 (0.073)	Loss 4.8290 (4.7734)	Top-1 acc 14.844 (16.999)	Top-5 acc 37.109 (36.611)	lr 0.03445
Warmup Train [16][220/3239]	Time 0.201 (0.311)	Data 0.001 (0.070)	Loss 4.7625 (4.7734)	Top-1 acc 16.016 (16.993)	Top-5 acc 37.500 (36.630)	lr 0.03444
Warmup Train [16][230/3239]	Time 0.257 (0.307)	Data 0.001 (0.067)	Loss 4.7951 (4.7755)	Top-1 acc 16.016 (16.942)	Top-5 acc 36.719 (36.605)	lr 0.03444
Warmup Train [16][240/3239]	Time 0.205 (0.303)	Data 0.001 (0.064)	Loss 4.9391 (4.7770)	Top-1 acc 12.891 (16.938)	Top-5 acc 33.984 (36.557)	lr 0.03443
Warmup Train [16][250/3239]	Time 0.326 (0.300)	Data 0.001 (0.062)	Loss 4.7317 (4.7787)	Top-1 acc 16.406 (16.924)	Top-5 acc 38.672 (36.548)	lr 0.03443
Warmup Train [16][260/3239]	Time 0.169 (0.297)	Data 0.001 (0.059)	Loss 4.7909 (4.7777)	Top-1 acc 13.281 (16.960)	Top-5 acc 30.859 (36.539)	lr 0.03442
Warmup Train [16][270/3239]	Time 0.216 (0.295)	Data 0.001 (0.057)	Loss 4.8563 (4.7797)	Top-1 acc 17.188 (16.938)	Top-5 acc 35.938 (36.475)	lr 0.03442
Warmup Train [16][280/3239]	Time 0.204 (0.292)	Data 0.001 (0.055)	Loss 4.6338 (4.7792)	Top-1 acc 21.094 (16.976)	Top-5 acc 41.016 (36.499)	lr 0.03441
Warmup Train [16][290/3239]	Time 0.165 (0.289)	Data 0.001 (0.054)	Loss 4.7130 (4.7780)	Top-1 acc 17.188 (17.000)	Top-5 acc 38.281 (36.500)	lr 0.03440
Warmup Train [16][300/3239]	Time 0.172 (0.287)	Data 0.001 (0.052)	Loss 4.9198 (4.7782)	Top-1 acc 15.625 (16.975)	Top-5 acc 31.641 (36.516)	lr 0.03440
Warmup Train [16][310/3239]	Time 0.195 (0.285)	Data 0.002 (0.050)	Loss 4.6895 (4.7784)	Top-1 acc 19.141 (16.983)	Top-5 acc 36.328 (36.520)	lr 0.03439
Warmup Train [16][320/3239]	Time 0.275 (0.283)	Data 0.001 (0.049)	Loss 4.7717 (4.7777)	Top-1 acc 17.188 (16.981)	Top-5 acc 38.281 (36.528)	lr 0.03439
Warmup Train [16][330/3239]	Time 0.210 (0.281)	Data 0.001 (0.047)	Loss 4.7784 (4.7762)	Top-1 acc 14.453 (17.019)	Top-5 acc 35.156 (36.579)	lr 0.03438
Warmup Train [16][340/3239]	Time 0.135 (0.280)	Data 0.001 (0.046)	Loss 4.9447 (4.7778)	Top-1 acc 16.797 (16.986)	Top-5 acc 35.938 (36.554)	lr 0.03438
Warmup Train [16][350/3239]	Time 0.281 (0.278)	Data 0.001 (0.045)	Loss 4.8997 (4.7804)	Top-1 acc 14.844 (16.942)	Top-5 acc 35.156 (36.491)	lr 0.03437
Warmup Train [16][360/3239]	Time 0.344 (0.277)	Data 0.001 (0.044)	Loss 4.8550 (4.7801)	Top-1 acc 17.578 (16.953)	Top-5 acc 36.328 (36.509)	lr 0.03437
Warmup Train [16][370/3239]	Time 0.183 (0.276)	Data 0.001 (0.043)	Loss 4.6495 (4.7800)	Top-1 acc 22.656 (16.982)	Top-5 acc 38.281 (36.501)	lr 0.03436
Warmup Train [16][380/3239]	Time 0.194 (0.275)	Data 0.001 (0.042)	Loss 4.6902 (4.7800)	Top-1 acc 14.453 (16.968)	Top-5 acc 36.328 (36.498)	lr 0.03435
Warmup Train [16][390/3239]	Time 0.244 (0.273)	Data 0.001 (0.041)	Loss 4.5956 (4.7795)	Top-1 acc 18.359 (16.986)	Top-5 acc 39.453 (36.526)	lr 0.03435
Warmup Train [16][400/3239]	Time 0.173 (0.271)	Data 0.001 (0.040)	Loss 4.7970 (4.7809)	Top-1 acc 15.625 (16.941)	Top-5 acc 38.672 (36.491)	lr 0.03434
Warmup Train [16][410/3239]	Time 0.243 (0.270)	Data 0.001 (0.039)	Loss 4.7999 (4.7805)	Top-1 acc 17.578 (16.967)	Top-5 acc 37.109 (36.503)	lr 0.03434
Warmup Train [16][420/3239]	Time 0.188 (0.270)	Data 0.001 (0.038)	Loss 5.0187 (4.7830)	Top-1 acc 11.328 (16.909)	Top-5 acc 32.422 (36.438)	lr 0.03433
Warmup Train [16][430/3239]	Time 0.259 (0.269)	Data 0.001 (0.037)	Loss 4.8484 (4.7828)	Top-1 acc 16.406 (16.914)	Top-5 acc 31.250 (36.414)	lr 0.03433
Warmup Train [16][440/3239]	Time 0.246 (0.268)	Data 0.001 (0.037)	Loss 4.6629 (4.7811)	Top-1 acc 21.094 (16.944)	Top-5 acc 40.234 (36.440)	lr 0.03432
Warmup Train [16][450/3239]	Time 0.192 (0.267)	Data 0.001 (0.036)	Loss 4.7194 (4.7811)	Top-1 acc 18.359 (16.947)	Top-5 acc 37.109 (36.442)	lr 0.03431
Warmup Train [16][460/3239]	Time 0.426 (0.267)	Data 0.002 (0.035)	Loss 4.6828 (4.7808)	Top-1 acc 18.750 (16.950)	Top-5 acc 37.500 (36.448)	lr 0.03431
Warmup Train [16][470/3239]	Time 0.192 (0.266)	Data 0.001 (0.034)	Loss 4.6963 (4.7820)	Top-1 acc 19.141 (16.914)	Top-5 acc 41.797 (36.411)	lr 0.03430
Warmup Train [16][480/3239]	Time 0.196 (0.264)	Data 0.001 (0.034)	Loss 4.7280 (4.7826)	Top-1 acc 18.750 (16.906)	Top-5 acc 38.672 (36.389)	lr 0.03430
Warmup Train [16][490/3239]	Time 0.161 (0.264)	Data 0.002 (0.033)	Loss 4.8018 (4.7827)	Top-1 acc 19.141 (16.931)	Top-5 acc 37.500 (36.399)	lr 0.03429
Warmup Train [16][500/3239]	Time 0.194 (0.263)	Data 0.001 (0.033)	Loss 4.8149 (4.7825)	Top-1 acc 16.406 (16.926)	Top-5 acc 36.328 (36.400)	lr 0.03429
Warmup Train [16][510/3239]	Time 0.146 (0.262)	Data 0.002 (0.032)	Loss 4.7335 (4.7823)	Top-1 acc 14.844 (16.913)	Top-5 acc 38.281 (36.410)	lr 0.03428
Warmup Train [16][520/3239]	Time 0.241 (0.262)	Data 0.002 (0.032)	Loss 4.7395 (4.7819)	Top-1 acc 17.969 (16.918)	Top-5 acc 36.328 (36.418)	lr 0.03428
Warmup Train [16][530/3239]	Time 0.347 (0.262)	Data 0.001 (0.031)	Loss 4.7531 (4.7821)	Top-1 acc 16.016 (16.940)	Top-5 acc 37.109 (36.427)	lr 0.03427
Warmup Train [16][540/3239]	Time 0.215 (0.261)	Data 0.003 (0.030)	Loss 4.7999 (4.7811)	Top-1 acc 18.750 (16.966)	Top-5 acc 35.156 (36.468)	lr 0.03426
Warmup Train [16][550/3239]	Time 0.211 (0.261)	Data 0.001 (0.030)	Loss 4.7818 (4.7813)	Top-1 acc 14.844 (16.976)	Top-5 acc 35.547 (36.474)	lr 0.03426
Warmup Train [16][560/3239]	Time 0.221 (0.260)	Data 0.001 (0.029)	Loss 4.8303 (4.7815)	Top-1 acc 12.500 (16.958)	Top-5 acc 34.375 (36.478)	lr 0.03425
Warmup Train [16][570/3239]	Time 0.343 (0.260)	Data 0.001 (0.029)	Loss 4.6011 (4.7810)	Top-1 acc 21.484 (16.976)	Top-5 acc 37.500 (36.476)	lr 0.03425
Warmup Train [16][580/3239]	Time 0.178 (0.259)	Data 0.001 (0.029)	Loss 4.7441 (4.7798)	Top-1 acc 19.141 (16.984)	Top-5 acc 36.328 (36.500)	lr 0.03424
Warmup Train [16][590/3239]	Time 0.175 (0.258)	Data 0.001 (0.028)	Loss 4.8179 (4.7795)	Top-1 acc 15.234 (16.971)	Top-5 acc 35.156 (36.499)	lr 0.03424
Warmup Train [16][600/3239]	Time 0.208 (0.258)	Data 0.001 (0.028)	Loss 4.6803 (4.7794)	Top-1 acc 16.406 (16.966)	Top-5 acc 35.938 (36.495)	lr 0.03423
Warmup Train [16][610/3239]	Time 0.231 (0.257)	Data 0.001 (0.027)	Loss 4.7511 (4.7794)	Top-1 acc 16.797 (16.955)	Top-5 acc 36.719 (36.490)	lr 0.03422
Warmup Train [16][620/3239]	Time 0.159 (0.256)	Data 0.001 (0.027)	Loss 4.5778 (4.7789)	Top-1 acc 19.922 (16.969)	Top-5 acc 42.578 (36.510)	lr 0.03422
Warmup Train [16][630/3239]	Time 0.195 (0.256)	Data 0.001 (0.027)	Loss 4.8086 (4.7791)	Top-1 acc 14.062 (16.961)	Top-5 acc 33.984 (36.493)	lr 0.03421
Warmup Train [16][640/3239]	Time 0.195 (0.255)	Data 0.002 (0.026)	Loss 4.7516 (4.7797)	Top-1 acc 15.625 (16.960)	Top-5 acc 33.594 (36.465)	lr 0.03421
Warmup Train [16][650/3239]	Time 0.181 (0.255)	Data 0.001 (0.026)	Loss 4.6652 (4.7795)	Top-1 acc 18.750 (16.971)	Top-5 acc 40.625 (36.462)	lr 0.03420
Warmup Train [16][660/3239]	Time 0.249 (0.254)	Data 0.001 (0.026)	Loss 4.6251 (4.7791)	Top-1 acc 18.359 (16.967)	Top-5 acc 41.797 (36.476)	lr 0.03420
Warmup Train [16][670/3239]	Time 0.219 (0.254)	Data 0.001 (0.025)	Loss 4.6515 (4.7788)	Top-1 acc 22.266 (16.983)	Top-5 acc 42.578 (36.495)	lr 0.03419
Warmup Train [16][680/3239]	Time 0.313 (0.254)	Data 0.001 (0.025)	Loss 4.9727 (4.7785)	Top-1 acc 12.109 (16.973)	Top-5 acc 29.688 (36.489)	lr 0.03419
Warmup Train [16][690/3239]	Time 0.165 (0.253)	Data 0.002 (0.025)	Loss 4.8014 (4.7785)	Top-1 acc 16.406 (16.969)	Top-5 acc 37.109 (36.485)	lr 0.03418
Warmup Train [16][700/3239]	Time 0.230 (0.253)	Data 0.001 (0.024)	Loss 4.6820 (4.7782)	Top-1 acc 17.578 (16.967)	Top-5 acc 39.453 (36.501)	lr 0.03417
Warmup Train [16][710/3239]	Time 0.185 (0.252)	Data 0.001 (0.024)	Loss 4.7607 (4.7781)	Top-1 acc 17.578 (16.980)	Top-5 acc 37.109 (36.508)	lr 0.03417
Warmup Train [16][720/3239]	Time 0.228 (0.252)	Data 0.001 (0.024)	Loss 4.7209 (4.7782)	Top-1 acc 19.531 (16.985)	Top-5 acc 41.406 (36.521)	lr 0.03416
Warmup Train [16][730/3239]	Time 0.254 (0.252)	Data 0.001 (0.023)	Loss 4.7436 (4.7778)	Top-1 acc 19.922 (16.994)	Top-5 acc 38.281 (36.525)	lr 0.03416
Warmup Train [16][740/3239]	Time 0.224 (0.251)	Data 0.001 (0.023)	Loss 4.6746 (4.7771)	Top-1 acc 19.922 (17.009)	Top-5 acc 41.016 (36.551)	lr 0.03415
Warmup Train [16][750/3239]	Time 0.305 (0.251)	Data 0.001 (0.023)	Loss 4.7507 (4.7767)	Top-1 acc 21.094 (17.019)	Top-5 acc 37.109 (36.556)	lr 0.03415
Warmup Train [16][760/3239]	Time 0.343 (0.251)	Data 0.001 (0.023)	Loss 4.5955 (4.7771)	Top-1 acc 17.188 (17.020)	Top-5 acc 41.797 (36.542)	lr 0.03414
Warmup Train [16][770/3239]	Time 0.133 (0.251)	Data 0.001 (0.022)	Loss 4.6113 (4.7764)	Top-1 acc 19.531 (17.039)	Top-5 acc 41.406 (36.549)	lr 0.03413
Warmup Train [16][780/3239]	Time 0.213 (0.250)	Data 0.002 (0.022)	Loss 4.7117 (4.7766)	Top-1 acc 19.141 (17.026)	Top-5 acc 37.109 (36.537)	lr 0.03413
Warmup Train [16][790/3239]	Time 0.303 (0.250)	Data 0.001 (0.022)	Loss 4.8969 (4.7775)	Top-1 acc 15.625 (17.019)	Top-5 acc 35.547 (36.532)	lr 0.03412
Warmup Train [16][800/3239]	Time 0.265 (0.250)	Data 0.001 (0.022)	Loss 4.8774 (4.7771)	Top-1 acc 15.625 (17.019)	Top-5 acc 35.547 (36.533)	lr 0.03412
Warmup Train [16][810/3239]	Time 0.160 (0.249)	Data 0.001 (0.021)	Loss 4.6750 (4.7765)	Top-1 acc 21.875 (17.033)	Top-5 acc 38.281 (36.546)	lr 0.03411
Warmup Train [16][820/3239]	Time 0.181 (0.249)	Data 0.001 (0.021)	Loss 4.7186 (4.7765)	Top-1 acc 17.969 (17.029)	Top-5 acc 38.281 (36.543)	lr 0.03411
Warmup Train [16][830/3239]	Time 0.206 (0.249)	Data 0.001 (0.021)	Loss 4.6977 (4.7767)	Top-1 acc 19.531 (17.019)	Top-5 acc 33.203 (36.542)	lr 0.03410
Warmup Train [16][840/3239]	Time 0.240 (0.249)	Data 0.001 (0.021)	Loss 4.7605 (4.7771)	Top-1 acc 15.625 (16.998)	Top-5 acc 35.938 (36.534)	lr 0.03409
Warmup Train [16][850/3239]	Time 0.142 (0.249)	Data 0.001 (0.021)	Loss 4.6627 (4.7770)	Top-1 acc 21.484 (17.010)	Top-5 acc 38.672 (36.531)	lr 0.03409
Warmup Train [16][860/3239]	Time 0.173 (0.248)	Data 0.001 (0.021)	Loss 4.8738 (4.7766)	Top-1 acc 14.844 (17.024)	Top-5 acc 36.328 (36.538)	lr 0.03408
Warmup Train [16][870/3239]	Time 0.276 (0.248)	Data 0.001 (0.020)	Loss 4.6866 (4.7765)	Top-1 acc 21.484 (17.040)	Top-5 acc 40.625 (36.525)	lr 0.03408
Warmup Train [16][880/3239]	Time 0.394 (0.248)	Data 0.002 (0.020)	Loss 4.8711 (4.7767)	Top-1 acc 17.578 (17.045)	Top-5 acc 35.547 (36.514)	lr 0.03407
Warmup Train [16][890/3239]	Time 0.197 (0.248)	Data 0.001 (0.020)	Loss 4.8677 (4.7771)	Top-1 acc 15.234 (17.034)	Top-5 acc 36.328 (36.501)	lr 0.03407
Warmup Train [16][900/3239]	Time 0.276 (0.248)	Data 0.003 (0.020)	Loss 4.7023 (4.7771)	Top-1 acc 19.531 (17.039)	Top-5 acc 42.578 (36.509)	lr 0.03406
Warmup Train [16][910/3239]	Time 0.196 (0.247)	Data 0.001 (0.020)	Loss 4.6711 (4.7767)	Top-1 acc 15.234 (17.049)	Top-5 acc 41.797 (36.518)	lr 0.03406
Warmup Train [16][920/3239]	Time 0.204 (0.247)	Data 0.001 (0.019)	Loss 4.8631 (4.7775)	Top-1 acc 16.797 (17.040)	Top-5 acc 33.594 (36.494)	lr 0.03405
Warmup Train [16][930/3239]	Time 0.222 (0.247)	Data 0.001 (0.019)	Loss 4.6396 (4.7773)	Top-1 acc 18.359 (17.040)	Top-5 acc 38.281 (36.488)	lr 0.03404
Warmup Train [16][940/3239]	Time 0.243 (0.247)	Data 0.002 (0.019)	Loss 4.8684 (4.7779)	Top-1 acc 14.453 (17.029)	Top-5 acc 31.641 (36.485)	lr 0.03404
Warmup Train [16][950/3239]	Time 0.193 (0.246)	Data 0.001 (0.019)	Loss 4.7264 (4.7779)	Top-1 acc 17.578 (17.026)	Top-5 acc 34.766 (36.479)	lr 0.03403
Warmup Train [16][960/3239]	Time 0.142 (0.246)	Data 0.001 (0.019)	Loss 4.7956 (4.7781)	Top-1 acc 14.453 (17.018)	Top-5 acc 40.234 (36.479)	lr 0.03403
Warmup Train [16][970/3239]	Time 0.220 (0.246)	Data 0.001 (0.019)	Loss 4.6940 (4.7785)	Top-1 acc 17.188 (17.012)	Top-5 acc 42.188 (36.471)	lr 0.03402
Warmup Train [16][980/3239]	Time 0.209 (0.246)	Data 0.001 (0.018)	Loss 4.6858 (4.7781)	Top-1 acc 15.234 (17.013)	Top-5 acc 43.359 (36.486)	lr 0.03402
Warmup Train [16][990/3239]	Time 0.307 (0.246)	Data 0.001 (0.018)	Loss 4.6622 (4.7777)	Top-1 acc 19.922 (17.014)	Top-5 acc 40.625 (36.491)	lr 0.03401
Warmup Train [16][1000/3239]	Time 0.239 (0.245)	Data 0.002 (0.018)	Loss 4.7473 (4.7779)	Top-1 acc 17.188 (17.012)	Top-5 acc 39.844 (36.483)	lr 0.03400
Warmup Train [16][1010/3239]	Time 0.246 (0.245)	Data 0.001 (0.018)	Loss 4.7632 (4.7779)	Top-1 acc 14.062 (17.004)	Top-5 acc 37.891 (36.485)	lr 0.03400
Warmup Train [16][1020/3239]	Time 0.230 (0.245)	Data 0.001 (0.018)	Loss 4.8599 (4.7779)	Top-1 acc 14.844 (16.997)	Top-5 acc 38.672 (36.492)	lr 0.03399
Warmup Train [16][1030/3239]	Time 0.207 (0.245)	Data 0.001 (0.018)	Loss 4.7006 (4.7780)	Top-1 acc 19.922 (16.997)	Top-5 acc 39.453 (36.491)	lr 0.03399
Warmup Train [16][1040/3239]	Time 0.143 (0.245)	Data 0.001 (0.017)	Loss 4.6000 (4.7776)	Top-1 acc 20.312 (17.007)	Top-5 acc 40.625 (36.504)	lr 0.03398
Warmup Train [16][1050/3239]	Time 0.226 (0.245)	Data 0.001 (0.017)	Loss 4.7539 (4.7774)	Top-1 acc 18.750 (17.019)	Top-5 acc 36.328 (36.504)	lr 0.03398
Warmup Train [16][1060/3239]	Time 0.253 (0.244)	Data 0.002 (0.017)	Loss 4.7680 (4.7772)	Top-1 acc 20.312 (17.020)	Top-5 acc 35.938 (36.510)	lr 0.03397
Warmup Train [16][1070/3239]	Time 0.172 (0.244)	Data 0.002 (0.017)	Loss 4.8764 (4.7776)	Top-1 acc 17.578 (17.013)	Top-5 acc 35.938 (36.510)	lr 0.03396
Warmup Train [16][1080/3239]	Time 0.293 (0.244)	Data 0.001 (0.017)	Loss 4.7953 (4.7776)	Top-1 acc 15.625 (17.015)	Top-5 acc 37.891 (36.520)	lr 0.03396
Warmup Train [16][1090/3239]	Time 0.385 (0.244)	Data 0.002 (0.017)	Loss 4.6786 (4.7777)	Top-1 acc 17.969 (17.006)	Top-5 acc 39.453 (36.517)	lr 0.03395
Warmup Train [16][1100/3239]	Time 0.172 (0.244)	Data 0.001 (0.017)	Loss 4.6360 (4.7780)	Top-1 acc 16.406 (17.007)	Top-5 acc 42.969 (36.515)	lr 0.03395
Warmup Train [16][1110/3239]	Time 0.241 (0.244)	Data 0.001 (0.017)	Loss 4.9180 (4.7777)	Top-1 acc 13.672 (17.009)	Top-5 acc 33.203 (36.514)	lr 0.03394
Warmup Train [16][1120/3239]	Time 0.188 (0.244)	Data 0.001 (0.016)	Loss 4.5882 (4.7780)	Top-1 acc 23.047 (17.008)	Top-5 acc 44.922 (36.514)	lr 0.03394
Warmup Train [16][1130/3239]	Time 0.254 (0.244)	Data 0.001 (0.016)	Loss 4.8417 (4.7782)	Top-1 acc 15.234 (17.014)	Top-5 acc 33.203 (36.521)	lr 0.03393
Warmup Train [16][1140/3239]	Time 0.249 (0.243)	Data 0.001 (0.016)	Loss 4.7971 (4.7784)	Top-1 acc 16.797 (17.009)	Top-5 acc 35.547 (36.516)	lr 0.03393
Warmup Train [16][1150/3239]	Time 0.265 (0.243)	Data 0.001 (0.016)	Loss 4.6766 (4.7784)	Top-1 acc 20.703 (17.012)	Top-5 acc 38.672 (36.515)	lr 0.03392
Warmup Train [16][1160/3239]	Time 0.221 (0.243)	Data 0.001 (0.016)	Loss 4.4502 (4.7777)	Top-1 acc 22.656 (17.026)	Top-5 acc 41.797 (36.524)	lr 0.03391
Warmup Train [16][1170/3239]	Time 0.242 (0.243)	Data 0.001 (0.016)	Loss 4.8190 (4.7776)	Top-1 acc 15.625 (17.020)	Top-5 acc 35.156 (36.521)	lr 0.03391
Warmup Train [16][1180/3239]	Time 0.257 (0.242)	Data 0.001 (0.016)	Loss 4.8272 (4.7777)	Top-1 acc 17.578 (17.022)	Top-5 acc 33.984 (36.513)	lr 0.03390
Warmup Train [16][1190/3239]	Time 0.234 (0.242)	Data 0.001 (0.016)	Loss 4.7806 (4.7774)	Top-1 acc 14.453 (17.014)	Top-5 acc 33.984 (36.521)	lr 0.03390
Warmup Train [16][1200/3239]	Time 0.362 (0.242)	Data 0.002 (0.015)	Loss 4.7745 (4.7773)	Top-1 acc 18.359 (17.017)	Top-5 acc 38.672 (36.519)	lr 0.03389
Warmup Train [16][1210/3239]	Time 0.189 (0.242)	Data 0.001 (0.015)	Loss 4.5643 (4.7769)	Top-1 acc 23.047 (17.026)	Top-5 acc 40.625 (36.530)	lr 0.03389
Warmup Train [16][1220/3239]	Time 0.226 (0.242)	Data 0.001 (0.015)	Loss 4.8468 (4.7772)	Top-1 acc 16.406 (17.021)	Top-5 acc 34.766 (36.522)	lr 0.03388
Warmup Train [16][1230/3239]	Time 0.269 (0.242)	Data 0.001 (0.015)	Loss 4.7943 (4.7770)	Top-1 acc 14.453 (17.025)	Top-5 acc 36.719 (36.525)	lr 0.03387
Warmup Train [16][1240/3239]	Time 0.186 (0.242)	Data 0.001 (0.015)	Loss 4.8412 (4.7770)	Top-1 acc 18.359 (17.031)	Top-5 acc 36.719 (36.533)	lr 0.03387
Warmup Train [16][1250/3239]	Time 0.215 (0.242)	Data 0.001 (0.015)	Loss 4.9086 (4.7771)	Top-1 acc 13.281 (17.032)	Top-5 acc 29.297 (36.524)	lr 0.03386
Warmup Train [16][1260/3239]	Time 0.211 (0.242)	Data 0.001 (0.015)	Loss 4.8268 (4.7769)	Top-1 acc 19.531 (17.040)	Top-5 acc 38.672 (36.536)	lr 0.03386
Warmup Train [16][1270/3239]	Time 0.177 (0.241)	Data 0.001 (0.015)	Loss 4.9578 (4.7770)	Top-1 acc 14.844 (17.037)	Top-5 acc 32.812 (36.533)	lr 0.03385
Warmup Train [16][1280/3239]	Time 0.239 (0.241)	Data 0.001 (0.015)	Loss 4.8167 (4.7767)	Top-1 acc 14.062 (17.037)	Top-5 acc 36.328 (36.540)	lr 0.03385
Warmup Train [16][1290/3239]	Time 0.206 (0.241)	Data 0.002 (0.015)	Loss 4.6334 (4.7763)	Top-1 acc 17.578 (17.037)	Top-5 acc 38.281 (36.546)	lr 0.03384
Warmup Train [16][1300/3239]	Time 0.216 (0.241)	Data 0.001 (0.014)	Loss 4.7447 (4.7766)	Top-1 acc 17.578 (17.031)	Top-5 acc 41.016 (36.543)	lr 0.03383
Warmup Train [16][1310/3239]	Time 0.251 (0.241)	Data 0.001 (0.014)	Loss 4.9024 (4.7771)	Top-1 acc 16.797 (17.018)	Top-5 acc 33.594 (36.531)	lr 0.03383
Warmup Train [16][1320/3239]	Time 0.299 (0.241)	Data 0.001 (0.014)	Loss 4.8535 (4.7769)	Top-1 acc 12.500 (17.023)	Top-5 acc 35.156 (36.540)	lr 0.03382
Warmup Train [16][1330/3239]	Time 0.223 (0.241)	Data 0.002 (0.014)	Loss 4.8658 (4.7766)	Top-1 acc 14.062 (17.030)	Top-5 acc 34.766 (36.545)	lr 0.03382
Warmup Train [16][1340/3239]	Time 0.139 (0.241)	Data 0.001 (0.014)	Loss 4.7606 (4.7767)	Top-1 acc 16.406 (17.026)	Top-5 acc 38.281 (36.538)	lr 0.03381
Warmup Train [16][1350/3239]	Time 0.240 (0.241)	Data 0.002 (0.014)	Loss 4.7904 (4.7769)	Top-1 acc 20.703 (17.028)	Top-5 acc 37.891 (36.537)	lr 0.03381
Warmup Train [16][1360/3239]	Time 0.243 (0.241)	Data 0.002 (0.014)	Loss 4.7776 (4.7768)	Top-1 acc 16.016 (17.035)	Top-5 acc 35.547 (36.543)	lr 0.03380
Warmup Train [16][1370/3239]	Time 0.173 (0.241)	Data 0.002 (0.014)	Loss 4.9663 (4.7766)	Top-1 acc 15.234 (17.038)	Top-5 acc 33.203 (36.549)	lr 0.03379
Warmup Train [16][1380/3239]	Time 0.320 (0.241)	Data 0.001 (0.014)	Loss 4.6587 (4.7762)	Top-1 acc 19.531 (17.048)	Top-5 acc 37.500 (36.557)	lr 0.03379
Warmup Train [16][1390/3239]	Time 0.317 (0.241)	Data 0.001 (0.014)	Loss 4.5115 (4.7756)	Top-1 acc 20.312 (17.057)	Top-5 acc 44.141 (36.576)	lr 0.03378
Warmup Train [16][1400/3239]	Time 0.311 (0.241)	Data 0.001 (0.014)	Loss 4.5655 (4.7750)	Top-1 acc 22.656 (17.066)	Top-5 acc 42.578 (36.585)	lr 0.03378
Warmup Train [16][1410/3239]	Time 0.165 (0.241)	Data 0.002 (0.014)	Loss 4.6833 (4.7749)	Top-1 acc 16.016 (17.065)	Top-5 acc 40.234 (36.592)	lr 0.03377
Warmup Train [16][1420/3239]	Time 0.296 (0.241)	Data 0.001 (0.013)	Loss 4.9509 (4.7749)	Top-1 acc 14.062 (17.074)	Top-5 acc 30.859 (36.588)	lr 0.03377
Warmup Train [16][1430/3239]	Time 0.309 (0.241)	Data 0.001 (0.013)	Loss 4.5912 (4.7745)	Top-1 acc 16.016 (17.077)	Top-5 acc 40.234 (36.597)	lr 0.03376
Warmup Train [16][1440/3239]	Time 0.244 (0.241)	Data 0.001 (0.013)	Loss 4.6039 (4.7743)	Top-1 acc 17.188 (17.082)	Top-5 acc 39.062 (36.596)	lr 0.03376
Warmup Train [16][1450/3239]	Time 0.201 (0.240)	Data 0.001 (0.013)	Loss 4.6939 (4.7743)	Top-1 acc 16.797 (17.086)	Top-5 acc 35.156 (36.593)	lr 0.03375
Warmup Train [16][1460/3239]	Time 0.172 (0.240)	Data 0.001 (0.013)	Loss 4.6642 (4.7742)	Top-1 acc 21.094 (17.086)	Top-5 acc 38.672 (36.595)	lr 0.03374
Warmup Train [16][1470/3239]	Time 0.227 (0.240)	Data 0.001 (0.013)	Loss 4.6932 (4.7744)	Top-1 acc 19.531 (17.078)	Top-5 acc 36.719 (36.585)	lr 0.03374
Warmup Train [16][1480/3239]	Time 0.233 (0.240)	Data 0.001 (0.013)	Loss 4.7804 (4.7738)	Top-1 acc 17.188 (17.089)	Top-5 acc 36.719 (36.596)	lr 0.03373
Warmup Train [16][1490/3239]	Time 0.233 (0.240)	Data 0.001 (0.013)	Loss 4.8757 (4.7737)	Top-1 acc 16.016 (17.091)	Top-5 acc 35.938 (36.600)	lr 0.03373
Warmup Train [16][1500/3239]	Time 0.236 (0.240)	Data 0.001 (0.013)	Loss 4.8600 (4.7734)	Top-1 acc 16.406 (17.105)	Top-5 acc 34.375 (36.607)	lr 0.03372
Warmup Train [16][1510/3239]	Time 0.300 (0.240)	Data 0.002 (0.013)	Loss 4.8860 (4.7735)	Top-1 acc 15.234 (17.106)	Top-5 acc 35.547 (36.601)	lr 0.03372
Warmup Train [16][1520/3239]	Time 0.389 (0.240)	Data 0.002 (0.013)	Loss 4.7958 (4.7734)	Top-1 acc 16.406 (17.108)	Top-5 acc 37.891 (36.601)	lr 0.03371
Warmup Train [16][1530/3239]	Time 0.392 (0.240)	Data 0.001 (0.013)	Loss 4.8055 (4.7732)	Top-1 acc 16.016 (17.108)	Top-5 acc 36.328 (36.605)	lr 0.03370
Warmup Train [16][1540/3239]	Time 0.278 (0.240)	Data 0.001 (0.013)	Loss 4.7777 (4.7730)	Top-1 acc 18.750 (17.110)	Top-5 acc 37.500 (36.609)	lr 0.03370
Warmup Train [16][1550/3239]	Time 0.278 (0.240)	Data 0.001 (0.013)	Loss 4.6990 (4.7728)	Top-1 acc 22.266 (17.114)	Top-5 acc 41.016 (36.619)	lr 0.03369
Warmup Train [16][1560/3239]	Time 0.210 (0.240)	Data 0.026 (0.013)	Loss 4.8609 (4.7727)	Top-1 acc 14.844 (17.114)	Top-5 acc 33.203 (36.617)	lr 0.03369
Warmup Train [16][1570/3239]	Time 0.196 (0.240)	Data 0.002 (0.012)	Loss 4.8243 (4.7729)	Top-1 acc 17.578 (17.109)	Top-5 acc 33.984 (36.609)	lr 0.03368
Warmup Train [16][1580/3239]	Time 0.232 (0.241)	Data 0.001 (0.012)	Loss 4.6883 (4.7724)	Top-1 acc 17.969 (17.118)	Top-5 acc 39.453 (36.625)	lr 0.03368
Warmup Train [16][1590/3239]	Time 0.211 (0.241)	Data 0.001 (0.012)	Loss 4.6831 (4.7723)	Top-1 acc 14.844 (17.116)	Top-5 acc 35.938 (36.634)	lr 0.03367
Warmup Train [16][1600/3239]	Time 0.250 (0.241)	Data 0.001 (0.012)	Loss 4.6825 (4.7722)	Top-1 acc 17.969 (17.114)	Top-5 acc 38.672 (36.636)	lr 0.03366
Warmup Train [16][1610/3239]	Time 0.228 (0.241)	Data 0.002 (0.012)	Loss 4.7529 (4.7721)	Top-1 acc 19.141 (17.119)	Top-5 acc 39.062 (36.639)	lr 0.03366
Warmup Train [16][1620/3239]	Time 0.387 (0.241)	Data 0.001 (0.012)	Loss 4.9009 (4.7720)	Top-1 acc 16.797 (17.122)	Top-5 acc 37.109 (36.642)	lr 0.03365
Warmup Train [16][1630/3239]	Time 0.192 (0.240)	Data 0.001 (0.012)	Loss 4.6538 (4.7720)	Top-1 acc 18.750 (17.122)	Top-5 acc 37.500 (36.646)	lr 0.03365
Warmup Train [16][1640/3239]	Time 0.180 (0.240)	Data 0.001 (0.012)	Loss 4.8628 (4.7725)	Top-1 acc 16.406 (17.118)	Top-5 acc 34.766 (36.641)	lr 0.03364
Warmup Train [16][1650/3239]	Time 0.241 (0.240)	Data 0.001 (0.012)	Loss 4.5968 (4.7721)	Top-1 acc 20.703 (17.119)	Top-5 acc 41.016 (36.647)	lr 0.03364
Warmup Train [16][1660/3239]	Time 0.144 (0.240)	Data 0.001 (0.012)	Loss 4.8742 (4.7720)	Top-1 acc 14.453 (17.127)	Top-5 acc 35.156 (36.655)	lr 0.03363
Warmup Train [16][1670/3239]	Time 0.239 (0.240)	Data 0.001 (0.012)	Loss 4.9314 (4.7720)	Top-1 acc 13.672 (17.128)	Top-5 acc 32.812 (36.649)	lr 0.03362
Warmup Train [16][1680/3239]	Time 0.257 (0.240)	Data 0.002 (0.012)	Loss 4.5591 (4.7718)	Top-1 acc 19.531 (17.130)	Top-5 acc 41.797 (36.661)	lr 0.03362
Warmup Train [16][1690/3239]	Time 0.245 (0.240)	Data 0.001 (0.012)	Loss 4.6917 (4.7716)	Top-1 acc 21.875 (17.135)	Top-5 acc 38.672 (36.662)	lr 0.03361
Warmup Train [16][1700/3239]	Time 0.239 (0.240)	Data 0.001 (0.012)	Loss 4.7951 (4.7714)	Top-1 acc 16.797 (17.134)	Top-5 acc 39.062 (36.662)	lr 0.03361
Warmup Train [16][1710/3239]	Time 0.250 (0.240)	Data 0.001 (0.012)	Loss 4.7986 (4.7715)	Top-1 acc 16.406 (17.131)	Top-5 acc 37.500 (36.659)	lr 0.03360
Warmup Train [16][1720/3239]	Time 0.299 (0.240)	Data 0.001 (0.012)	Loss 4.8026 (4.7711)	Top-1 acc 15.625 (17.134)	Top-5 acc 34.375 (36.663)	lr 0.03360
Warmup Train [16][1730/3239]	Time 0.316 (0.240)	Data 0.007 (0.012)	Loss 4.6666 (4.7709)	Top-1 acc 17.969 (17.136)	Top-5 acc 37.109 (36.666)	lr 0.03359
Warmup Train [16][1740/3239]	Time 0.162 (0.240)	Data 0.002 (0.012)	Loss 4.6417 (4.7708)	Top-1 acc 20.703 (17.140)	Top-5 acc 38.672 (36.667)	lr 0.03358
Warmup Train [16][1750/3239]	Time 0.261 (0.240)	Data 0.001 (0.012)	Loss 4.7432 (4.7706)	Top-1 acc 16.406 (17.140)	Top-5 acc 38.672 (36.669)	lr 0.03358
Warmup Train [16][1760/3239]	Time 0.196 (0.240)	Data 0.001 (0.011)	Loss 4.7526 (4.7706)	Top-1 acc 17.969 (17.144)	Top-5 acc 36.328 (36.672)	lr 0.03357
Warmup Train [16][1770/3239]	Time 0.229 (0.240)	Data 0.001 (0.011)	Loss 4.6156 (4.7705)	Top-1 acc 23.828 (17.146)	Top-5 acc 44.141 (36.676)	lr 0.03357
Warmup Train [16][1780/3239]	Time 0.237 (0.240)	Data 0.001 (0.011)	Loss 4.7675 (4.7705)	Top-1 acc 20.703 (17.149)	Top-5 acc 40.234 (36.683)	lr 0.03356
Warmup Train [16][1790/3239]	Time 0.226 (0.240)	Data 0.001 (0.011)	Loss 4.6756 (4.7705)	Top-1 acc 16.797 (17.153)	Top-5 acc 41.406 (36.681)	lr 0.03356
Warmup Train [16][1800/3239]	Time 0.187 (0.240)	Data 0.001 (0.011)	Loss 4.6413 (4.7703)	Top-1 acc 21.094 (17.161)	Top-5 acc 39.453 (36.684)	lr 0.03355
Warmup Train [16][1810/3239]	Time 0.291 (0.240)	Data 0.001 (0.011)	Loss 4.6921 (4.7702)	Top-1 acc 16.016 (17.162)	Top-5 acc 36.719 (36.684)	lr 0.03354
Warmup Train [16][1820/3239]	Time 0.397 (0.240)	Data 0.002 (0.011)	Loss 4.8435 (4.7702)	Top-1 acc 14.062 (17.165)	Top-5 acc 36.719 (36.690)	lr 0.03354
Warmup Train [16][1830/3239]	Time 0.273 (0.240)	Data 0.001 (0.011)	Loss 4.7341 (4.7700)	Top-1 acc 16.797 (17.164)	Top-5 acc 38.281 (36.690)	lr 0.03353
Warmup Train [16][1840/3239]	Time 0.268 (0.240)	Data 0.001 (0.011)	Loss 4.7529 (4.7700)	Top-1 acc 16.406 (17.171)	Top-5 acc 33.594 (36.693)	lr 0.03353
Warmup Train [16][1850/3239]	Time 0.252 (0.239)	Data 0.001 (0.011)	Loss 4.6998 (4.7699)	Top-1 acc 19.922 (17.173)	Top-5 acc 40.234 (36.696)	lr 0.03352
Warmup Train [16][1860/3239]	Time 0.209 (0.239)	Data 0.001 (0.011)	Loss 4.6725 (4.7697)	Top-1 acc 19.141 (17.177)	Top-5 acc 41.406 (36.699)	lr 0.03352
Warmup Train [16][1870/3239]	Time 0.223 (0.239)	Data 0.001 (0.011)	Loss 4.8513 (4.7696)	Top-1 acc 17.969 (17.181)	Top-5 acc 36.719 (36.700)	lr 0.03351
Warmup Train [16][1880/3239]	Time 0.220 (0.239)	Data 0.002 (0.011)	Loss 4.7187 (4.7695)	Top-1 acc 19.531 (17.183)	Top-5 acc 37.891 (36.695)	lr 0.03350
Warmup Train [16][1890/3239]	Time 0.208 (0.239)	Data 0.001 (0.011)	Loss 4.6615 (4.7695)	Top-1 acc 17.188 (17.180)	Top-5 acc 39.453 (36.699)	lr 0.03350
Warmup Train [16][1900/3239]	Time 0.225 (0.239)	Data 0.001 (0.011)	Loss 4.7931 (4.7695)	Top-1 acc 14.062 (17.176)	Top-5 acc 34.375 (36.700)	lr 0.03349
Warmup Train [16][1910/3239]	Time 0.145 (0.239)	Data 0.001 (0.011)	Loss 4.9016 (4.7694)	Top-1 acc 14.844 (17.174)	Top-5 acc 35.156 (36.699)	lr 0.03349
Warmup Train [16][1920/3239]	Time 0.296 (0.239)	Data 0.001 (0.011)	Loss 4.7791 (4.7692)	Top-1 acc 18.359 (17.177)	Top-5 acc 40.625 (36.702)	lr 0.03348
Warmup Train [16][1930/3239]	Time 0.189 (0.239)	Data 0.001 (0.011)	Loss 4.7309 (4.7692)	Top-1 acc 14.453 (17.173)	Top-5 acc 41.406 (36.698)	lr 0.03348
Warmup Train [16][1940/3239]	Time 0.198 (0.239)	Data 0.001 (0.011)	Loss 4.8825 (4.7691)	Top-1 acc 13.281 (17.174)	Top-5 acc 31.641 (36.702)	lr 0.03347
Warmup Train [16][1950/3239]	Time 0.247 (0.239)	Data 0.001 (0.011)	Loss 4.6757 (4.7690)	Top-1 acc 19.922 (17.177)	Top-5 acc 41.406 (36.707)	lr 0.03347
Warmup Train [16][1960/3239]	Time 0.195 (0.239)	Data 0.001 (0.011)	Loss 4.8829 (4.7687)	Top-1 acc 13.281 (17.181)	Top-5 acc 33.594 (36.715)	lr 0.03346
Warmup Train [16][1970/3239]	Time 0.152 (0.239)	Data 0.002 (0.011)	Loss 4.9129 (4.7686)	Top-1 acc 13.672 (17.183)	Top-5 acc 30.859 (36.713)	lr 0.03345
Warmup Train [16][1980/3239]	Time 0.220 (0.239)	Data 0.002 (0.011)	Loss 4.8261 (4.7686)	Top-1 acc 17.969 (17.185)	Top-5 acc 35.938 (36.712)	lr 0.03345
Warmup Train [16][1990/3239]	Time 0.271 (0.239)	Data 0.001 (0.011)	Loss 4.6198 (4.7683)	Top-1 acc 19.141 (17.187)	Top-5 acc 37.891 (36.717)	lr 0.03344
Warmup Train [16][2000/3239]	Time 0.154 (0.239)	Data 0.001 (0.010)	Loss 4.6393 (4.7681)	Top-1 acc 17.188 (17.188)	Top-5 acc 37.891 (36.719)	lr 0.03344
Warmup Train [16][2010/3239]	Time 0.213 (0.239)	Data 0.001 (0.010)	Loss 4.4486 (4.7679)	Top-1 acc 17.969 (17.190)	Top-5 acc 42.969 (36.722)	lr 0.03343
Warmup Train [16][2020/3239]	Time 0.368 (0.239)	Data 0.001 (0.010)	Loss 4.9056 (4.7678)	Top-1 acc 15.234 (17.190)	Top-5 acc 32.031 (36.726)	lr 0.03343
Warmup Train [16][2030/3239]	Time 0.195 (0.239)	Data 0.001 (0.010)	Loss 4.6434 (4.7676)	Top-1 acc 16.797 (17.193)	Top-5 acc 39.062 (36.729)	lr 0.03342
Warmup Train [16][2040/3239]	Time 0.277 (0.239)	Data 0.001 (0.010)	Loss 4.7378 (4.7675)	Top-1 acc 17.578 (17.189)	Top-5 acc 35.547 (36.726)	lr 0.03341
Warmup Train [16][2050/3239]	Time 0.153 (0.239)	Data 0.002 (0.010)	Loss 4.7028 (4.7676)	Top-1 acc 17.578 (17.188)	Top-5 acc 39.453 (36.725)	lr 0.03341
Warmup Train [16][2060/3239]	Time 0.191 (0.239)	Data 0.001 (0.010)	Loss 4.6106 (4.7675)	Top-1 acc 18.750 (17.191)	Top-5 acc 41.406 (36.726)	lr 0.03340
Warmup Train [16][2070/3239]	Time 0.240 (0.239)	Data 0.001 (0.010)	Loss 4.6734 (4.7675)	Top-1 acc 17.188 (17.193)	Top-5 acc 38.672 (36.722)	lr 0.03340
Warmup Train [16][2080/3239]	Time 0.185 (0.239)	Data 0.002 (0.010)	Loss 4.7334 (4.7673)	Top-1 acc 16.406 (17.195)	Top-5 acc 37.891 (36.728)	lr 0.03339
Warmup Train [16][2090/3239]	Time 0.237 (0.239)	Data 0.001 (0.010)	Loss 4.8599 (4.7676)	Top-1 acc 16.406 (17.191)	Top-5 acc 35.547 (36.721)	lr 0.03339
Warmup Train [16][2100/3239]	Time 0.176 (0.238)	Data 0.001 (0.010)	Loss 4.6738 (4.7676)	Top-1 acc 17.188 (17.191)	Top-5 acc 39.062 (36.726)	lr 0.03338
Warmup Train [16][2110/3239]	Time 0.245 (0.238)	Data 0.001 (0.010)	Loss 4.6906 (4.7675)	Top-1 acc 14.453 (17.190)	Top-5 acc 39.062 (36.733)	lr 0.03337
Warmup Train [16][2120/3239]	Time 0.206 (0.238)	Data 0.003 (0.010)	Loss 4.7553 (4.7676)	Top-1 acc 16.797 (17.185)	Top-5 acc 36.719 (36.730)	lr 0.03337
Warmup Train [16][2130/3239]	Time 0.363 (0.238)	Data 0.002 (0.010)	Loss 4.6273 (4.7677)	Top-1 acc 22.656 (17.193)	Top-5 acc 39.844 (36.730)	lr 0.03336
Warmup Train [16][2140/3239]	Time 0.240 (0.238)	Data 0.001 (0.010)	Loss 4.6188 (4.7676)	Top-1 acc 15.625 (17.192)	Top-5 acc 37.891 (36.732)	lr 0.03336
Warmup Train [16][2150/3239]	Time 0.152 (0.238)	Data 0.001 (0.010)	Loss 4.8257 (4.7679)	Top-1 acc 14.453 (17.187)	Top-5 acc 35.156 (36.728)	lr 0.03335
Warmup Train [16][2160/3239]	Time 0.236 (0.238)	Data 0.001 (0.010)	Loss 4.7806 (4.7679)	Top-1 acc 14.453 (17.182)	Top-5 acc 36.719 (36.725)	lr 0.03335
Warmup Train [16][2170/3239]	Time 0.225 (0.238)	Data 0.001 (0.010)	Loss 4.9214 (4.7682)	Top-1 acc 19.141 (17.179)	Top-5 acc 33.594 (36.718)	lr 0.03334
Warmup Train [16][2180/3239]	Time 0.208 (0.238)	Data 0.001 (0.010)	Loss 4.6562 (4.7681)	Top-1 acc 19.141 (17.182)	Top-5 acc 41.016 (36.720)	lr 0.03333
Warmup Train [16][2190/3239]	Time 0.236 (0.238)	Data 0.001 (0.010)	Loss 4.7698 (4.7677)	Top-1 acc 16.406 (17.193)	Top-5 acc 35.156 (36.734)	lr 0.03333
Warmup Train [16][2200/3239]	Time 0.286 (0.238)	Data 0.001 (0.010)	Loss 4.6895 (4.7675)	Top-1 acc 21.484 (17.204)	Top-5 acc 42.969 (36.745)	lr 0.03332
Warmup Train [16][2210/3239]	Time 0.207 (0.238)	Data 0.001 (0.010)	Loss 4.6796 (4.7676)	Top-1 acc 16.406 (17.200)	Top-5 acc 40.625 (36.742)	lr 0.03332
Warmup Train [16][2220/3239]	Time 0.203 (0.238)	Data 0.001 (0.010)	Loss 4.7635 (4.7674)	Top-1 acc 19.531 (17.201)	Top-5 acc 37.109 (36.744)	lr 0.03331
Warmup Train [16][2230/3239]	Time 0.325 (0.238)	Data 0.001 (0.010)	Loss 4.6455 (4.7673)	Top-1 acc 19.922 (17.206)	Top-5 acc 39.844 (36.747)	lr 0.03331
Warmup Train [16][2240/3239]	Time 0.231 (0.238)	Data 0.001 (0.010)	Loss 4.7598 (4.7673)	Top-1 acc 16.406 (17.205)	Top-5 acc 35.938 (36.751)	lr 0.03330
Warmup Train [16][2250/3239]	Time 0.227 (0.238)	Data 0.001 (0.010)	Loss 4.8037 (4.7673)	Top-1 acc 14.453 (17.203)	Top-5 acc 33.984 (36.750)	lr 0.03329
Warmup Train [16][2260/3239]	Time 0.235 (0.238)	Data 0.001 (0.010)	Loss 4.6610 (4.7672)	Top-1 acc 15.234 (17.203)	Top-5 acc 38.672 (36.752)	lr 0.03329
Warmup Train [16][2270/3239]	Time 0.189 (0.237)	Data 0.001 (0.010)	Loss 4.6763 (4.7671)	Top-1 acc 18.359 (17.207)	Top-5 acc 37.109 (36.756)	lr 0.03328
Warmup Train [16][2280/3239]	Time 0.165 (0.237)	Data 0.001 (0.010)	Loss 4.8432 (4.7673)	Top-1 acc 22.266 (17.207)	Top-5 acc 36.719 (36.755)	lr 0.03328
Warmup Train [16][2290/3239]	Time 0.309 (0.237)	Data 0.003 (0.010)	Loss 4.8522 (4.7674)	Top-1 acc 14.453 (17.202)	Top-5 acc 37.500 (36.751)	lr 0.03327
Warmup Train [16][2300/3239]	Time 0.225 (0.237)	Data 0.002 (0.009)	Loss 4.7097 (4.7672)	Top-1 acc 20.703 (17.208)	Top-5 acc 38.672 (36.752)	lr 0.03327
Warmup Train [16][2310/3239]	Time 0.301 (0.237)	Data 0.001 (0.009)	Loss 4.7334 (4.7672)	Top-1 acc 22.266 (17.212)	Top-5 acc 39.453 (36.756)	lr 0.03326
Warmup Train [16][2320/3239]	Time 0.288 (0.237)	Data 0.002 (0.009)	Loss 4.8478 (4.7672)	Top-1 acc 16.406 (17.215)	Top-5 acc 35.547 (36.760)	lr 0.03325
Warmup Train [16][2330/3239]	Time 0.223 (0.237)	Data 0.002 (0.009)	Loss 4.7328 (4.7671)	Top-1 acc 16.406 (17.218)	Top-5 acc 34.375 (36.761)	lr 0.03325
Warmup Train [16][2340/3239]	Time 0.199 (0.237)	Data 0.002 (0.009)	Loss 4.7488 (4.7669)	Top-1 acc 18.359 (17.220)	Top-5 acc 35.156 (36.763)	lr 0.03324
Warmup Train [16][2350/3239]	Time 0.235 (0.237)	Data 0.001 (0.009)	Loss 4.7669 (4.7668)	Top-1 acc 17.188 (17.222)	Top-5 acc 36.719 (36.765)	lr 0.03324
Warmup Train [16][2360/3239]	Time 0.174 (0.237)	Data 0.001 (0.009)	Loss 5.0058 (4.7669)	Top-1 acc 9.766 (17.219)	Top-5 acc 31.641 (36.763)	lr 0.03323
Warmup Train [16][2370/3239]	Time 0.184 (0.237)	Data 0.002 (0.009)	Loss 4.7006 (4.7667)	Top-1 acc 19.922 (17.221)	Top-5 acc 39.062 (36.768)	lr 0.03323
Warmup Train [16][2380/3239]	Time 0.172 (0.237)	Data 0.001 (0.009)	Loss 4.7363 (4.7666)	Top-1 acc 15.625 (17.222)	Top-5 acc 40.625 (36.772)	lr 0.03322
Warmup Train [16][2390/3239]	Time 0.245 (0.237)	Data 0.001 (0.009)	Loss 4.6713 (4.7666)	Top-1 acc 22.656 (17.224)	Top-5 acc 42.578 (36.776)	lr 0.03321
Warmup Train [16][2400/3239]	Time 0.210 (0.237)	Data 0.001 (0.009)	Loss 4.7260 (4.7663)	Top-1 acc 16.797 (17.228)	Top-5 acc 36.328 (36.783)	lr 0.03321
Warmup Train [16][2410/3239]	Time 0.239 (0.237)	Data 0.002 (0.009)	Loss 4.8085 (4.7663)	Top-1 acc 15.234 (17.224)	Top-5 acc 38.672 (36.784)	lr 0.03320
Warmup Train [16][2420/3239]	Time 0.288 (0.237)	Data 0.001 (0.009)	Loss 4.9118 (4.7661)	Top-1 acc 16.016 (17.228)	Top-5 acc 31.641 (36.786)	lr 0.03320
Warmup Train [16][2430/3239]	Time 0.196 (0.237)	Data 0.002 (0.009)	Loss 4.7673 (4.7659)	Top-1 acc 14.453 (17.231)	Top-5 acc 39.062 (36.790)	lr 0.03319
Warmup Train [16][2440/3239]	Time 0.236 (0.237)	Data 0.001 (0.009)	Loss 4.6749 (4.7659)	Top-1 acc 19.922 (17.225)	Top-5 acc 39.062 (36.789)	lr 0.03319
Warmup Train [16][2450/3239]	Time 0.241 (0.237)	Data 0.001 (0.009)	Loss 4.6460 (4.7657)	Top-1 acc 20.312 (17.230)	Top-5 acc 42.578 (36.795)	lr 0.03318
Warmup Train [16][2460/3239]	Time 0.237 (0.237)	Data 0.001 (0.009)	Loss 4.8983 (4.7659)	Top-1 acc 16.406 (17.227)	Top-5 acc 33.203 (36.790)	lr 0.03317
Warmup Train [16][2470/3239]	Time 0.191 (0.237)	Data 0.001 (0.009)	Loss 4.7306 (4.7660)	Top-1 acc 16.016 (17.224)	Top-5 acc 36.328 (36.795)	lr 0.03317
Warmup Train [16][2480/3239]	Time 0.206 (0.237)	Data 0.001 (0.009)	Loss 4.8527 (4.7659)	Top-1 acc 19.141 (17.224)	Top-5 acc 34.375 (36.792)	lr 0.03316
Warmup Train [16][2490/3239]	Time 0.229 (0.237)	Data 0.001 (0.009)	Loss 4.7069 (4.7657)	Top-1 acc 19.141 (17.227)	Top-5 acc 36.719 (36.794)	lr 0.03316
Warmup Train [16][2500/3239]	Time 0.188 (0.237)	Data 0.002 (0.009)	Loss 4.7096 (4.7654)	Top-1 acc 17.188 (17.234)	Top-5 acc 36.328 (36.800)	lr 0.03315
Warmup Train [16][2510/3239]	Time 0.367 (0.237)	Data 0.001 (0.009)	Loss 4.6837 (4.7651)	Top-1 acc 19.141 (17.237)	Top-5 acc 39.844 (36.806)	lr 0.03314
Warmup Train [16][2520/3239]	Time 0.357 (0.237)	Data 0.001 (0.009)	Loss 4.5456 (4.7649)	Top-1 acc 17.969 (17.243)	Top-5 acc 42.188 (36.816)	lr 0.03314
Warmup Train [16][2530/3239]	Time 0.211 (0.237)	Data 0.001 (0.009)	Loss 4.8063 (4.7650)	Top-1 acc 14.844 (17.238)	Top-5 acc 35.547 (36.808)	lr 0.03313
Warmup Train [16][2540/3239]	Time 0.227 (0.237)	Data 0.001 (0.009)	Loss 4.6699 (4.7652)	Top-1 acc 19.141 (17.234)	Top-5 acc 40.234 (36.799)	lr 0.03313
Warmup Train [16][2550/3239]	Time 0.159 (0.237)	Data 0.001 (0.009)	Loss 4.9902 (4.7652)	Top-1 acc 14.844 (17.234)	Top-5 acc 30.078 (36.799)	lr 0.03312
Warmup Train [16][2560/3239]	Time 0.214 (0.236)	Data 0.002 (0.009)	Loss 4.6786 (4.7649)	Top-1 acc 17.969 (17.239)	Top-5 acc 39.062 (36.806)	lr 0.03312
Warmup Train [16][2570/3239]	Time 0.203 (0.237)	Data 0.002 (0.009)	Loss 4.6162 (4.7648)	Top-1 acc 19.141 (17.243)	Top-5 acc 42.578 (36.811)	lr 0.03311
Warmup Train [16][2580/3239]	Time 0.225 (0.237)	Data 0.001 (0.009)	Loss 4.6400 (4.7646)	Top-1 acc 17.969 (17.247)	Top-5 acc 41.406 (36.817)	lr 0.03310
Warmup Train [16][2590/3239]	Time 0.174 (0.236)	Data 0.001 (0.009)	Loss 4.5700 (4.7642)	Top-1 acc 21.094 (17.251)	Top-5 acc 44.531 (36.829)	lr 0.03310
Warmup Train [16][2600/3239]	Time 0.199 (0.236)	Data 0.001 (0.009)	Loss 4.6885 (4.7643)	Top-1 acc 21.484 (17.250)	Top-5 acc 37.891 (36.831)	lr 0.03309
Warmup Train [16][2610/3239]	Time 0.222 (0.236)	Data 0.001 (0.009)	Loss 4.7024 (4.7641)	Top-1 acc 21.875 (17.254)	Top-5 acc 41.016 (36.840)	lr 0.03309
Warmup Train [16][2620/3239]	Time 0.283 (0.236)	Data 0.001 (0.009)	Loss 4.8189 (4.7641)	Top-1 acc 15.625 (17.257)	Top-5 acc 33.594 (36.842)	lr 0.03308
Warmup Train [16][2630/3239]	Time 0.244 (0.236)	Data 0.001 (0.009)	Loss 4.8885 (4.7641)	Top-1 acc 13.281 (17.254)	Top-5 acc 31.641 (36.841)	lr 0.03308
Warmup Train [16][2640/3239]	Time 0.229 (0.236)	Data 0.001 (0.009)	Loss 4.8243 (4.7640)	Top-1 acc 17.969 (17.260)	Top-5 acc 37.891 (36.844)	lr 0.03307
Warmup Train [16][2650/3239]	Time 0.192 (0.236)	Data 0.001 (0.009)	Loss 4.5848 (4.7638)	Top-1 acc 14.453 (17.263)	Top-5 acc 43.359 (36.850)	lr 0.03306
Warmup Train [16][2660/3239]	Time 0.213 (0.236)	Data 0.001 (0.009)	Loss 4.7532 (4.7637)	Top-1 acc 16.406 (17.264)	Top-5 acc 36.328 (36.852)	lr 0.03306
Warmup Train [16][2670/3239]	Time 0.205 (0.236)	Data 0.001 (0.009)	Loss 4.8299 (4.7635)	Top-1 acc 16.406 (17.269)	Top-5 acc 37.500 (36.856)	lr 0.03305
Warmup Train [16][2680/3239]	Time 0.345 (0.236)	Data 0.002 (0.009)	Loss 5.0033 (4.7635)	Top-1 acc 13.672 (17.269)	Top-5 acc 32.812 (36.858)	lr 0.03305
Warmup Train [16][2690/3239]	Time 0.208 (0.236)	Data 0.003 (0.009)	Loss 4.7716 (4.7635)	Top-1 acc 17.188 (17.271)	Top-5 acc 37.891 (36.856)	lr 0.03304
Warmup Train [16][2700/3239]	Time 0.239 (0.236)	Data 0.001 (0.009)	Loss 4.6571 (4.7634)	Top-1 acc 18.750 (17.276)	Top-5 acc 39.844 (36.860)	lr 0.03304
Warmup Train [16][2710/3239]	Time 0.216 (0.236)	Data 0.001 (0.008)	Loss 4.6715 (4.7633)	Top-1 acc 17.188 (17.273)	Top-5 acc 40.234 (36.860)	lr 0.03303
Warmup Train [16][2720/3239]	Time 0.218 (0.236)	Data 0.001 (0.008)	Loss 4.9489 (4.7634)	Top-1 acc 13.281 (17.271)	Top-5 acc 32.031 (36.857)	lr 0.03302
Warmup Train [16][2730/3239]	Time 0.319 (0.236)	Data 0.001 (0.008)	Loss 4.7360 (4.7632)	Top-1 acc 17.969 (17.273)	Top-5 acc 33.984 (36.856)	lr 0.03302
Warmup Train [16][2740/3239]	Time 0.262 (0.236)	Data 0.001 (0.008)	Loss 4.6791 (4.7631)	Top-1 acc 19.141 (17.275)	Top-5 acc 37.891 (36.858)	lr 0.03301
Warmup Train [16][2750/3239]	Time 0.158 (0.236)	Data 0.001 (0.008)	Loss 4.9670 (4.7631)	Top-1 acc 14.453 (17.276)	Top-5 acc 33.203 (36.861)	lr 0.03301
Warmup Train [16][2760/3239]	Time 0.203 (0.236)	Data 0.001 (0.008)	Loss 4.7290 (4.7631)	Top-1 acc 18.750 (17.277)	Top-5 acc 39.844 (36.863)	lr 0.03300
Warmup Train [16][2770/3239]	Time 0.214 (0.236)	Data 0.001 (0.008)	Loss 4.6662 (4.7631)	Top-1 acc 18.359 (17.276)	Top-5 acc 38.672 (36.866)	lr 0.03300
Warmup Train [16][2780/3239]	Time 0.219 (0.236)	Data 0.001 (0.008)	Loss 4.6454 (4.7629)	Top-1 acc 21.484 (17.279)	Top-5 acc 40.625 (36.869)	lr 0.03299
Warmup Train [16][2790/3239]	Time 0.215 (0.236)	Data 0.001 (0.008)	Loss 4.6786 (4.7627)	Top-1 acc 23.047 (17.280)	Top-5 acc 38.281 (36.874)	lr 0.03298
Warmup Train [16][2800/3239]	Time 0.210 (0.236)	Data 0.001 (0.008)	Loss 4.7368 (4.7628)	Top-1 acc 16.406 (17.276)	Top-5 acc 36.719 (36.872)	lr 0.03298
Warmup Train [16][2810/3239]	Time 0.209 (0.236)	Data 0.001 (0.008)	Loss 4.9596 (4.7627)	Top-1 acc 14.453 (17.280)	Top-5 acc 32.422 (36.874)	lr 0.03297
Warmup Train [16][2820/3239]	Time 0.159 (0.236)	Data 0.001 (0.008)	Loss 4.6768 (4.7623)	Top-1 acc 19.141 (17.283)	Top-5 acc 35.547 (36.886)	lr 0.03297
Warmup Train [16][2830/3239]	Time 0.325 (0.236)	Data 0.003 (0.008)	Loss 4.6497 (4.7619)	Top-1 acc 15.234 (17.287)	Top-5 acc 38.281 (36.899)	lr 0.03296
Warmup Train [16][2840/3239]	Time 0.161 (0.236)	Data 0.002 (0.008)	Loss 4.9311 (4.7619)	Top-1 acc 18.359 (17.286)	Top-5 acc 35.156 (36.900)	lr 0.03296
Warmup Train [16][2850/3239]	Time 0.197 (0.236)	Data 0.001 (0.008)	Loss 4.7708 (4.7620)	Top-1 acc 21.875 (17.285)	Top-5 acc 38.672 (36.898)	lr 0.03295
Warmup Train [16][2860/3239]	Time 0.166 (0.236)	Data 0.001 (0.008)	Loss 4.6709 (4.7620)	Top-1 acc 17.969 (17.288)	Top-5 acc 39.844 (36.899)	lr 0.03294
Warmup Train [16][2870/3239]	Time 0.204 (0.236)	Data 0.001 (0.008)	Loss 4.8341 (4.7619)	Top-1 acc 16.406 (17.289)	Top-5 acc 35.547 (36.898)	lr 0.03294
Warmup Train [16][2880/3239]	Time 0.202 (0.236)	Data 0.001 (0.008)	Loss 4.7094 (4.7616)	Top-1 acc 19.922 (17.298)	Top-5 acc 40.234 (36.908)	lr 0.03293
Warmup Train [16][2890/3239]	Time 0.263 (0.236)	Data 0.003 (0.008)	Loss 4.4805 (4.7615)	Top-1 acc 21.875 (17.302)	Top-5 acc 42.188 (36.911)	lr 0.03293
Warmup Train [16][2900/3239]	Time 0.229 (0.236)	Data 0.001 (0.008)	Loss 4.8201 (4.7616)	Top-1 acc 16.016 (17.300)	Top-5 acc 35.547 (36.910)	lr 0.03292
Warmup Train [16][2910/3239]	Time 0.218 (0.236)	Data 0.001 (0.008)	Loss 4.7006 (4.7617)	Top-1 acc 16.797 (17.294)	Top-5 acc 40.625 (36.908)	lr 0.03292
Warmup Train [16][2920/3239]	Time 0.183 (0.236)	Data 0.001 (0.008)	Loss 4.7823 (4.7617)	Top-1 acc 18.359 (17.293)	Top-5 acc 37.109 (36.908)	lr 0.03291
Warmup Train [16][2930/3239]	Time 0.279 (0.236)	Data 0.001 (0.008)	Loss 4.8628 (4.7617)	Top-1 acc 16.016 (17.296)	Top-5 acc 33.984 (36.910)	lr 0.03290
Warmup Train [16][2940/3239]	Time 0.221 (0.236)	Data 0.001 (0.008)	Loss 4.5151 (4.7617)	Top-1 acc 25.000 (17.297)	Top-5 acc 41.797 (36.912)	lr 0.03290
Warmup Train [16][2950/3239]	Time 0.231 (0.236)	Data 0.001 (0.008)	Loss 4.6436 (4.7617)	Top-1 acc 19.531 (17.295)	Top-5 acc 41.406 (36.911)	lr 0.03289
Warmup Train [16][2960/3239]	Time 0.255 (0.236)	Data 0.001 (0.008)	Loss 4.8859 (4.7614)	Top-1 acc 15.234 (17.298)	Top-5 acc 36.328 (36.919)	lr 0.03289
Warmup Train [16][2970/3239]	Time 0.205 (0.236)	Data 0.001 (0.008)	Loss 4.7231 (4.7613)	Top-1 acc 19.141 (17.299)	Top-5 acc 34.766 (36.921)	lr 0.03288
Warmup Train [16][2980/3239]	Time 0.194 (0.236)	Data 0.001 (0.008)	Loss 4.5422 (4.7612)	Top-1 acc 22.656 (17.299)	Top-5 acc 40.625 (36.920)	lr 0.03288
Warmup Train [16][2990/3239]	Time 0.303 (0.236)	Data 0.001 (0.008)	Loss 4.6883 (4.7612)	Top-1 acc 18.359 (17.302)	Top-5 acc 38.281 (36.923)	lr 0.03287
Warmup Train [16][3000/3239]	Time 0.249 (0.236)	Data 0.001 (0.008)	Loss 4.6448 (4.7609)	Top-1 acc 21.875 (17.307)	Top-5 acc 41.406 (36.929)	lr 0.03286
Warmup Train [16][3010/3239]	Time 0.260 (0.236)	Data 0.001 (0.008)	Loss 4.6671 (4.7608)	Top-1 acc 17.188 (17.309)	Top-5 acc 36.719 (36.929)	lr 0.03286
Warmup Train [16][3020/3239]	Time 0.244 (0.236)	Data 0.001 (0.008)	Loss 4.6025 (4.7608)	Top-1 acc 19.141 (17.311)	Top-5 acc 42.188 (36.930)	lr 0.03285
Warmup Train [16][3030/3239]	Time 0.290 (0.236)	Data 0.001 (0.008)	Loss 4.7692 (4.7610)	Top-1 acc 14.844 (17.307)	Top-5 acc 35.938 (36.923)	lr 0.03285
Warmup Train [16][3040/3239]	Time 0.236 (0.236)	Data 0.001 (0.008)	Loss 4.5531 (4.7609)	Top-1 acc 19.141 (17.308)	Top-5 acc 37.891 (36.926)	lr 0.03284
Warmup Train [16][3050/3239]	Time 0.266 (0.236)	Data 0.002 (0.008)	Loss 4.8972 (4.7607)	Top-1 acc 16.016 (17.310)	Top-5 acc 35.156 (36.932)	lr 0.03283
Warmup Train [16][3060/3239]	Time 0.184 (0.236)	Data 0.003 (0.008)	Loss 4.8809 (4.7606)	Top-1 acc 15.625 (17.313)	Top-5 acc 33.594 (36.938)	lr 0.03283
Warmup Train [16][3070/3239]	Time 0.231 (0.236)	Data 0.003 (0.008)	Loss 4.7266 (4.7605)	Top-1 acc 20.312 (17.313)	Top-5 acc 38.672 (36.939)	lr 0.03282
Warmup Train [16][3080/3239]	Time 0.186 (0.236)	Data 0.002 (0.008)	Loss 4.8303 (4.7604)	Top-1 acc 15.234 (17.318)	Top-5 acc 39.453 (36.945)	lr 0.03282
Warmup Train [16][3090/3239]	Time 0.178 (0.236)	Data 0.001 (0.008)	Loss 4.6226 (4.7602)	Top-1 acc 21.094 (17.323)	Top-5 acc 39.844 (36.946)	lr 0.03281
Warmup Train [16][3100/3239]	Time 0.241 (0.236)	Data 0.001 (0.008)	Loss 4.9114 (4.7603)	Top-1 acc 17.188 (17.322)	Top-5 acc 35.938 (36.944)	lr 0.03281
Warmup Train [16][3110/3239]	Time 0.201 (0.236)	Data 0.002 (0.008)	Loss 4.7292 (4.7603)	Top-1 acc 16.016 (17.322)	Top-5 acc 38.672 (36.945)	lr 0.03280
Warmup Train [16][3120/3239]	Time 0.278 (0.236)	Data 0.001 (0.008)	Loss 4.8288 (4.7603)	Top-1 acc 15.234 (17.320)	Top-5 acc 36.328 (36.941)	lr 0.03279
Warmup Train [16][3130/3239]	Time 0.175 (0.236)	Data 0.001 (0.008)	Loss 4.6825 (4.7604)	Top-1 acc 18.359 (17.316)	Top-5 acc 38.672 (36.937)	lr 0.03279
Warmup Train [16][3140/3239]	Time 0.201 (0.236)	Data 0.001 (0.008)	Loss 4.6924 (4.7603)	Top-1 acc 17.969 (17.317)	Top-5 acc 38.281 (36.939)	lr 0.03278
Warmup Train [16][3150/3239]	Time 0.176 (0.236)	Data 0.001 (0.008)	Loss 4.8147 (4.7604)	Top-1 acc 17.578 (17.315)	Top-5 acc 33.984 (36.940)	lr 0.03278
Warmup Train [16][3160/3239]	Time 0.298 (0.236)	Data 0.001 (0.008)	Loss 4.7518 (4.7603)	Top-1 acc 16.406 (17.313)	Top-5 acc 33.984 (36.940)	lr 0.03277
Warmup Train [16][3170/3239]	Time 0.315 (0.236)	Data 0.001 (0.008)	Loss 4.7753 (4.7604)	Top-1 acc 15.625 (17.313)	Top-5 acc 37.500 (36.940)	lr 0.03277
Warmup Train [16][3180/3239]	Time 0.252 (0.236)	Data 0.000 (0.008)	Loss 4.6164 (4.7602)	Top-1 acc 20.312 (17.313)	Top-5 acc 39.844 (36.947)	lr 0.03276
Warmup Train [16][3190/3239]	Time 0.184 (0.236)	Data 0.000 (0.008)	Loss 4.8036 (4.7600)	Top-1 acc 13.672 (17.315)	Top-5 acc 36.719 (36.952)	lr 0.03275
Warmup Train [16][3200/3239]	Time 0.189 (0.235)	Data 0.000 (0.008)	Loss 4.7469 (4.7598)	Top-1 acc 20.312 (17.320)	Top-5 acc 37.109 (36.957)	lr 0.03275
Warmup Train [16][3210/3239]	Time 0.195 (0.235)	Data 0.000 (0.008)	Loss 4.4450 (4.7597)	Top-1 acc 23.047 (17.323)	Top-5 acc 41.797 (36.958)	lr 0.03274
Warmup Train [16][3220/3239]	Time 0.152 (0.235)	Data 0.000 (0.008)	Loss 4.8155 (4.7597)	Top-1 acc 16.406 (17.321)	Top-5 acc 38.281 (36.959)	lr 0.03274
Warmup Train [16][3230/3239]	Time 0.248 (0.235)	Data 0.000 (0.008)	Loss 4.7943 (4.7597)	Top-1 acc 19.531 (17.322)	Top-5 acc 37.891 (36.958)	lr 0.03273
Warmup Train [16][3239/3239]	Time 0.153 (0.235)	Data 0.000 (0.008)	Loss 4.7630 (4.7595)	Top-1 acc 11.111 (17.325)	Top-5 acc 32.099 (36.964)	lr 0.03273
==========Warmup Valid [16/40]	loss 3.826	top-1 acc 23.653	top-5 acc 46.513	Train top-1 17.325	top-5 36.964	flops: 442.4M
Warmup Train [17][0/3239]	Time 17.683 (17.683)	Data 14.608 (14.608)	Loss 4.6484 (4.6484)	Top-1 acc 19.141 (19.141)	Top-5 acc 41.797 (41.797)	lr 0.03273
Warmup Train [17][10/3239]	Time 0.304 (1.970)	Data 0.002 (1.434)	Loss 4.7611 (4.7056)	Top-1 acc 14.453 (17.898)	Top-5 acc 36.719 (37.607)	lr 0.03272
Warmup Train [17][20/3239]	Time 0.215 (1.144)	Data 0.001 (0.752)	Loss 4.6083 (4.7230)	Top-1 acc 23.047 (17.913)	Top-5 acc 39.453 (37.760)	lr 0.03271
Warmup Train [17][30/3239]	Time 0.188 (0.850)	Data 0.001 (0.510)	Loss 4.4842 (4.7084)	Top-1 acc 20.703 (18.120)	Top-5 acc 42.188 (38.206)	lr 0.03271
Warmup Train [17][40/3239]	Time 0.189 (0.696)	Data 0.002 (0.387)	Loss 4.7900 (4.7257)	Top-1 acc 17.578 (17.702)	Top-5 acc 35.547 (37.700)	lr 0.03270
Warmup Train [17][50/3239]	Time 0.232 (0.606)	Data 0.001 (0.312)	Loss 4.8642 (4.7274)	Top-1 acc 14.453 (17.701)	Top-5 acc 33.984 (37.492)	lr 0.03270
Warmup Train [17][60/3239]	Time 0.278 (0.546)	Data 0.002 (0.261)	Loss 4.7377 (4.7241)	Top-1 acc 19.922 (17.834)	Top-5 acc 34.766 (37.609)	lr 0.03269
Warmup Train [17][70/3239]	Time 0.215 (0.499)	Data 0.001 (0.224)	Loss 4.7027 (4.7262)	Top-1 acc 17.578 (17.842)	Top-5 acc 37.109 (37.649)	lr 0.03269
Warmup Train [17][80/3239]	Time 0.313 (0.468)	Data 0.001 (0.197)	Loss 4.8237 (4.7280)	Top-1 acc 15.234 (17.814)	Top-5 acc 34.375 (37.707)	lr 0.03268
Warmup Train [17][90/3239]	Time 0.235 (0.442)	Data 0.001 (0.176)	Loss 4.9536 (4.7255)	Top-1 acc 13.281 (17.819)	Top-5 acc 32.031 (37.753)	lr 0.03267
Warmup Train [17][100/3239]	Time 0.201 (0.419)	Data 0.001 (0.158)	Loss 4.6250 (4.7239)	Top-1 acc 20.312 (17.853)	Top-5 acc 41.797 (37.748)	lr 0.03267
Warmup Train [17][110/3239]	Time 0.241 (0.402)	Data 0.002 (0.144)	Loss 4.7848 (4.7194)	Top-1 acc 19.922 (18.018)	Top-5 acc 35.156 (37.862)	lr 0.03266
Warmup Train [17][120/3239]	Time 0.208 (0.389)	Data 0.001 (0.132)	Loss 4.6872 (4.7178)	Top-1 acc 14.844 (17.978)	Top-5 acc 37.891 (37.907)	lr 0.03266
Warmup Train [17][130/3239]	Time 0.248 (0.376)	Data 0.001 (0.122)	Loss 4.7118 (4.7129)	Top-1 acc 16.406 (18.043)	Top-5 acc 36.328 (37.953)	lr 0.03265
Warmup Train [17][140/3239]	Time 0.218 (0.366)	Data 0.001 (0.114)	Loss 4.6044 (4.7102)	Top-1 acc 17.969 (18.071)	Top-5 acc 40.234 (37.954)	lr 0.03264
Warmup Train [17][150/3239]	Time 0.213 (0.357)	Data 0.001 (0.106)	Loss 4.5915 (4.7103)	Top-1 acc 17.969 (18.020)	Top-5 acc 42.188 (37.966)	lr 0.03264
Warmup Train [17][160/3239]	Time 0.254 (0.350)	Data 0.001 (0.100)	Loss 4.6117 (4.7107)	Top-1 acc 19.141 (18.044)	Top-5 acc 37.500 (37.908)	lr 0.03263
Warmup Train [17][170/3239]	Time 0.136 (0.342)	Data 0.001 (0.094)	Loss 4.5083 (4.7093)	Top-1 acc 20.312 (18.019)	Top-5 acc 43.750 (37.891)	lr 0.03263
Warmup Train [17][180/3239]	Time 0.375 (0.337)	Data 0.001 (0.090)	Loss 4.7177 (4.7106)	Top-1 acc 15.625 (17.960)	Top-5 acc 36.328 (37.839)	lr 0.03262
Warmup Train [17][190/3239]	Time 0.298 (0.332)	Data 0.001 (0.085)	Loss 4.5200 (4.7098)	Top-1 acc 22.266 (17.979)	Top-5 acc 41.406 (37.876)	lr 0.03262
Warmup Train [17][200/3239]	Time 0.243 (0.326)	Data 0.002 (0.081)	Loss 4.7961 (4.7089)	Top-1 acc 22.266 (18.021)	Top-5 acc 36.719 (37.914)	lr 0.03261
Warmup Train [17][210/3239]	Time 0.238 (0.322)	Data 0.002 (0.077)	Loss 4.6075 (4.7083)	Top-1 acc 17.969 (18.024)	Top-5 acc 40.625 (37.950)	lr 0.03260
Warmup Train [17][220/3239]	Time 0.217 (0.318)	Data 0.001 (0.074)	Loss 4.8605 (4.7104)	Top-1 acc 17.188 (18.015)	Top-5 acc 33.203 (37.915)	lr 0.03260
Warmup Train [17][230/3239]	Time 0.171 (0.314)	Data 0.001 (0.071)	Loss 4.9674 (4.7124)	Top-1 acc 13.281 (17.930)	Top-5 acc 29.297 (37.872)	lr 0.03259
Warmup Train [17][240/3239]	Time 0.157 (0.311)	Data 0.001 (0.068)	Loss 4.5772 (4.7112)	Top-1 acc 18.750 (17.944)	Top-5 acc 41.406 (37.949)	lr 0.03259
Warmup Train [17][250/3239]	Time 0.199 (0.308)	Data 0.001 (0.066)	Loss 4.5490 (4.7086)	Top-1 acc 19.141 (18.003)	Top-5 acc 42.969 (38.021)	lr 0.03258
Warmup Train [17][260/3239]	Time 0.148 (0.304)	Data 0.001 (0.063)	Loss 4.6346 (4.7062)	Top-1 acc 20.312 (18.009)	Top-5 acc 39.062 (38.072)	lr 0.03258
Warmup Train [17][270/3239]	Time 0.151 (0.301)	Data 0.001 (0.061)	Loss 4.6567 (4.7061)	Top-1 acc 15.625 (18.012)	Top-5 acc 38.281 (38.078)	lr 0.03257
Warmup Train [17][280/3239]	Time 0.322 (0.299)	Data 0.001 (0.059)	Loss 4.6711 (4.7055)	Top-1 acc 19.922 (18.024)	Top-5 acc 40.234 (38.105)	lr 0.03256
Warmup Train [17][290/3239]	Time 0.235 (0.296)	Data 0.002 (0.057)	Loss 4.9773 (4.7045)	Top-1 acc 12.109 (18.057)	Top-5 acc 32.031 (38.124)	lr 0.03256
Warmup Train [17][300/3239]	Time 0.222 (0.294)	Data 0.001 (0.055)	Loss 4.6673 (4.7061)	Top-1 acc 20.703 (18.070)	Top-5 acc 37.891 (38.089)	lr 0.03255
Warmup Train [17][310/3239]	Time 0.287 (0.292)	Data 0.002 (0.054)	Loss 4.7643 (4.7073)	Top-1 acc 20.703 (18.044)	Top-5 acc 36.328 (38.038)	lr 0.03255
Warmup Train [17][320/3239]	Time 0.233 (0.290)	Data 0.001 (0.052)	Loss 4.7869 (4.7085)	Top-1 acc 20.703 (18.050)	Top-5 acc 37.891 (38.004)	lr 0.03254
Warmup Train [17][330/3239]	Time 0.271 (0.287)	Data 0.002 (0.050)	Loss 4.7136 (4.7083)	Top-1 acc 16.797 (18.055)	Top-5 acc 37.500 (38.023)	lr 0.03253
Warmup Train [17][340/3239]	Time 0.177 (0.286)	Data 0.001 (0.049)	Loss 4.6510 (4.7094)	Top-1 acc 17.578 (18.013)	Top-5 acc 38.672 (37.998)	lr 0.03253
Warmup Train [17][350/3239]	Time 0.297 (0.284)	Data 0.001 (0.048)	Loss 4.6085 (4.7092)	Top-1 acc 20.312 (17.997)	Top-5 acc 40.234 (38.005)	lr 0.03252
Warmup Train [17][360/3239]	Time 0.203 (0.282)	Data 0.001 (0.046)	Loss 4.6490 (4.7106)	Top-1 acc 23.438 (17.980)	Top-5 acc 39.844 (37.962)	lr 0.03252
Warmup Train [17][370/3239]	Time 0.228 (0.280)	Data 0.002 (0.045)	Loss 4.7424 (4.7110)	Top-1 acc 16.406 (17.985)	Top-5 acc 36.328 (37.971)	lr 0.03251
Warmup Train [17][380/3239]	Time 0.346 (0.280)	Data 0.002 (0.044)	Loss 4.8103 (4.7104)	Top-1 acc 16.016 (18.013)	Top-5 acc 37.109 (37.998)	lr 0.03251
Warmup Train [17][390/3239]	Time 0.203 (0.278)	Data 0.001 (0.043)	Loss 4.7178 (4.7102)	Top-1 acc 17.578 (18.031)	Top-5 acc 35.938 (38.008)	lr 0.03250
Warmup Train [17][400/3239]	Time 0.161 (0.276)	Data 0.001 (0.042)	Loss 4.5281 (4.7082)	Top-1 acc 19.922 (18.034)	Top-5 acc 42.578 (38.040)	lr 0.03249
Warmup Train [17][410/3239]	Time 0.235 (0.275)	Data 0.002 (0.041)	Loss 4.8104 (4.7069)	Top-1 acc 15.234 (18.053)	Top-5 acc 35.547 (38.084)	lr 0.03249
Warmup Train [17][420/3239]	Time 0.190 (0.274)	Data 0.001 (0.040)	Loss 4.8269 (4.7072)	Top-1 acc 15.625 (18.052)	Top-5 acc 36.328 (38.085)	lr 0.03248
Warmup Train [17][430/3239]	Time 0.159 (0.273)	Data 0.002 (0.040)	Loss 4.8074 (4.7065)	Top-1 acc 14.844 (18.059)	Top-5 acc 37.500 (38.135)	lr 0.03248
Warmup Train [17][440/3239]	Time 0.166 (0.272)	Data 0.001 (0.039)	Loss 4.7838 (4.7073)	Top-1 acc 17.578 (18.043)	Top-5 acc 39.062 (38.110)	lr 0.03247
Warmup Train [17][450/3239]	Time 0.157 (0.271)	Data 0.002 (0.038)	Loss 4.6524 (4.7068)	Top-1 acc 21.484 (18.062)	Top-5 acc 38.281 (38.123)	lr 0.03247
Warmup Train [17][460/3239]	Time 0.170 (0.270)	Data 0.001 (0.037)	Loss 4.6970 (4.7070)	Top-1 acc 18.750 (18.062)	Top-5 acc 39.844 (38.134)	lr 0.03246
Warmup Train [17][470/3239]	Time 0.166 (0.269)	Data 0.002 (0.037)	Loss 4.7247 (4.7074)	Top-1 acc 15.625 (18.063)	Top-5 acc 36.719 (38.128)	lr 0.03245
Warmup Train [17][480/3239]	Time 0.194 (0.268)	Data 0.001 (0.036)	Loss 4.5867 (4.7066)	Top-1 acc 21.094 (18.058)	Top-5 acc 44.141 (38.138)	lr 0.03245
Warmup Train [17][490/3239]	Time 0.336 (0.267)	Data 0.001 (0.035)	Loss 4.7054 (4.7065)	Top-1 acc 16.016 (18.063)	Top-5 acc 35.938 (38.128)	lr 0.03244
Warmup Train [17][500/3239]	Time 0.237 (0.267)	Data 0.001 (0.035)	Loss 4.6931 (4.7059)	Top-1 acc 17.969 (18.070)	Top-5 acc 38.672 (38.164)	lr 0.03244
Warmup Train [17][510/3239]	Time 0.211 (0.266)	Data 0.001 (0.034)	Loss 4.7379 (4.7069)	Top-1 acc 14.453 (18.060)	Top-5 acc 33.594 (38.137)	lr 0.03243
Warmup Train [17][520/3239]	Time 0.222 (0.265)	Data 0.001 (0.033)	Loss 4.7553 (4.7072)	Top-1 acc 14.453 (18.065)	Top-5 acc 37.109 (38.123)	lr 0.03243
Warmup Train [17][530/3239]	Time 0.230 (0.264)	Data 0.002 (0.033)	Loss 4.6044 (4.7064)	Top-1 acc 18.359 (18.076)	Top-5 acc 40.625 (38.122)	lr 0.03242
Warmup Train [17][540/3239]	Time 0.249 (0.263)	Data 0.002 (0.032)	Loss 4.5830 (4.7059)	Top-1 acc 19.531 (18.097)	Top-5 acc 43.750 (38.141)	lr 0.03241
Warmup Train [17][550/3239]	Time 0.159 (0.263)	Data 0.001 (0.032)	Loss 4.6604 (4.7069)	Top-1 acc 17.969 (18.075)	Top-5 acc 37.109 (38.120)	lr 0.03241
Warmup Train [17][560/3239]	Time 0.229 (0.262)	Data 0.002 (0.031)	Loss 4.4927 (4.7068)	Top-1 acc 20.312 (18.078)	Top-5 acc 43.359 (38.129)	lr 0.03240
Warmup Train [17][570/3239]	Time 0.168 (0.262)	Data 0.002 (0.031)	Loss 4.6522 (4.7067)	Top-1 acc 15.625 (18.064)	Top-5 acc 38.672 (38.122)	lr 0.03240
Warmup Train [17][580/3239]	Time 0.139 (0.261)	Data 0.002 (0.030)	Loss 4.9166 (4.7067)	Top-1 acc 16.406 (18.094)	Top-5 acc 32.422 (38.123)	lr 0.03239
Warmup Train [17][590/3239]	Time 0.340 (0.260)	Data 0.002 (0.030)	Loss 4.8255 (4.7072)	Top-1 acc 16.406 (18.092)	Top-5 acc 39.453 (38.117)	lr 0.03238
Warmup Train [17][600/3239]	Time 0.281 (0.260)	Data 0.001 (0.029)	Loss 4.8843 (4.7073)	Top-1 acc 13.672 (18.081)	Top-5 acc 34.766 (38.117)	lr 0.03238
Warmup Train [17][610/3239]	Time 0.172 (0.259)	Data 0.001 (0.029)	Loss 4.9801 (4.7073)	Top-1 acc 13.281 (18.091)	Top-5 acc 38.281 (38.135)	lr 0.03237
Warmup Train [17][620/3239]	Time 0.191 (0.259)	Data 0.001 (0.029)	Loss 4.7039 (4.7065)	Top-1 acc 18.359 (18.105)	Top-5 acc 36.328 (38.169)	lr 0.03237
Warmup Train [17][630/3239]	Time 0.187 (0.258)	Data 0.001 (0.028)	Loss 4.7187 (4.7061)	Top-1 acc 16.016 (18.086)	Top-5 acc 38.672 (38.167)	lr 0.03236
Warmup Train [17][640/3239]	Time 0.228 (0.258)	Data 0.001 (0.028)	Loss 4.6863 (4.7055)	Top-1 acc 19.922 (18.087)	Top-5 acc 39.453 (38.172)	lr 0.03236
Warmup Train [17][650/3239]	Time 0.234 (0.257)	Data 0.003 (0.027)	Loss 4.7038 (4.7054)	Top-1 acc 19.141 (18.086)	Top-5 acc 35.547 (38.164)	lr 0.03235
Warmup Train [17][660/3239]	Time 0.189 (0.257)	Data 0.001 (0.027)	Loss 4.8073 (4.7043)	Top-1 acc 19.141 (18.106)	Top-5 acc 39.844 (38.195)	lr 0.03234
Warmup Train [17][670/3239]	Time 0.230 (0.256)	Data 0.001 (0.027)	Loss 4.6818 (4.7042)	Top-1 acc 19.141 (18.111)	Top-5 acc 34.766 (38.196)	lr 0.03234
Warmup Train [17][680/3239]	Time 0.212 (0.256)	Data 0.001 (0.026)	Loss 4.5714 (4.7043)	Top-1 acc 20.312 (18.116)	Top-5 acc 39.062 (38.187)	lr 0.03233
Warmup Train [17][690/3239]	Time 0.154 (0.255)	Data 0.002 (0.026)	Loss 4.5611 (4.7034)	Top-1 acc 21.484 (18.127)	Top-5 acc 41.406 (38.209)	lr 0.03233
Warmup Train [17][700/3239]	Time 0.184 (0.255)	Data 0.001 (0.026)	Loss 4.6595 (4.7029)	Top-1 acc 21.484 (18.145)	Top-5 acc 40.625 (38.209)	lr 0.03232
Warmup Train [17][710/3239]	Time 0.341 (0.255)	Data 0.002 (0.025)	Loss 4.7554 (4.7039)	Top-1 acc 21.094 (18.125)	Top-5 acc 38.281 (38.191)	lr 0.03232
Warmup Train [17][720/3239]	Time 0.219 (0.254)	Data 0.001 (0.025)	Loss 4.6036 (4.7037)	Top-1 acc 19.922 (18.150)	Top-5 acc 43.750 (38.223)	lr 0.03231
Warmup Train [17][730/3239]	Time 0.176 (0.254)	Data 0.001 (0.025)	Loss 4.7211 (4.7031)	Top-1 acc 15.625 (18.158)	Top-5 acc 36.719 (38.252)	lr 0.03230
Warmup Train [17][740/3239]	Time 0.205 (0.254)	Data 0.001 (0.025)	Loss 4.9545 (4.7035)	Top-1 acc 15.234 (18.143)	Top-5 acc 34.766 (38.253)	lr 0.03230
Warmup Train [17][750/3239]	Time 0.277 (0.253)	Data 0.002 (0.024)	Loss 4.5794 (4.7026)	Top-1 acc 19.922 (18.153)	Top-5 acc 41.797 (38.277)	lr 0.03229
Warmup Train [17][760/3239]	Time 0.280 (0.253)	Data 0.001 (0.024)	Loss 4.6771 (4.7024)	Top-1 acc 18.359 (18.153)	Top-5 acc 38.281 (38.281)	lr 0.03229
Warmup Train [17][770/3239]	Time 0.314 (0.253)	Data 0.001 (0.024)	Loss 4.7141 (4.7029)	Top-1 acc 19.141 (18.151)	Top-5 acc 40.625 (38.279)	lr 0.03228
Warmup Train [17][780/3239]	Time 0.186 (0.252)	Data 0.001 (0.023)	Loss 4.6215 (4.7025)	Top-1 acc 19.141 (18.156)	Top-5 acc 41.016 (38.290)	lr 0.03227
Warmup Train [17][790/3239]	Time 0.233 (0.252)	Data 0.001 (0.023)	Loss 4.6780 (4.7026)	Top-1 acc 23.438 (18.163)	Top-5 acc 40.625 (38.284)	lr 0.03227
Warmup Train [17][800/3239]	Time 0.236 (0.252)	Data 0.001 (0.023)	Loss 4.6758 (4.7027)	Top-1 acc 17.969 (18.165)	Top-5 acc 38.281 (38.281)	lr 0.03226
Warmup Train [17][810/3239]	Time 0.358 (0.251)	Data 0.001 (0.023)	Loss 4.7220 (4.7034)	Top-1 acc 17.188 (18.155)	Top-5 acc 40.625 (38.261)	lr 0.03226
Warmup Train [17][820/3239]	Time 0.213 (0.251)	Data 0.001 (0.022)	Loss 4.6774 (4.7031)	Top-1 acc 14.062 (18.148)	Top-5 acc 39.844 (38.263)	lr 0.03225
Warmup Train [17][830/3239]	Time 0.240 (0.251)	Data 0.002 (0.022)	Loss 4.6540 (4.7027)	Top-1 acc 20.312 (18.143)	Top-5 acc 38.281 (38.284)	lr 0.03225
Warmup Train [17][840/3239]	Time 0.239 (0.250)	Data 0.002 (0.022)	Loss 4.6242 (4.7024)	Top-1 acc 18.359 (18.153)	Top-5 acc 41.797 (38.288)	lr 0.03224
Warmup Train [17][850/3239]	Time 0.218 (0.250)	Data 0.001 (0.022)	Loss 4.7865 (4.7027)	Top-1 acc 19.531 (18.150)	Top-5 acc 35.547 (38.279)	lr 0.03223
Warmup Train [17][860/3239]	Time 0.196 (0.250)	Data 0.001 (0.021)	Loss 4.8418 (4.7031)	Top-1 acc 16.016 (18.140)	Top-5 acc 34.375 (38.261)	lr 0.03223
Warmup Train [17][870/3239]	Time 0.228 (0.249)	Data 0.001 (0.021)	Loss 4.9038 (4.7033)	Top-1 acc 16.016 (18.148)	Top-5 acc 35.547 (38.247)	lr 0.03222
Warmup Train [17][880/3239]	Time 0.198 (0.249)	Data 0.001 (0.021)	Loss 4.7205 (4.7032)	Top-1 acc 16.406 (18.159)	Top-5 acc 35.547 (38.254)	lr 0.03222
Warmup Train [17][890/3239]	Time 0.234 (0.249)	Data 0.001 (0.021)	Loss 4.5833 (4.7036)	Top-1 acc 24.219 (18.156)	Top-5 acc 38.672 (38.250)	lr 0.03221
Warmup Train [17][900/3239]	Time 0.204 (0.249)	Data 0.001 (0.021)	Loss 4.7600 (4.7039)	Top-1 acc 16.406 (18.148)	Top-5 acc 36.719 (38.240)	lr 0.03220
Warmup Train [17][910/3239]	Time 0.205 (0.248)	Data 0.001 (0.020)	Loss 4.5312 (4.7035)	Top-1 acc 17.969 (18.155)	Top-5 acc 40.625 (38.251)	lr 0.03220
Warmup Train [17][920/3239]	Time 0.369 (0.248)	Data 0.002 (0.020)	Loss 4.5793 (4.7029)	Top-1 acc 19.141 (18.159)	Top-5 acc 39.453 (38.257)	lr 0.03219
Warmup Train [17][930/3239]	Time 0.234 (0.248)	Data 0.001 (0.020)	Loss 4.8688 (4.7028)	Top-1 acc 17.188 (18.155)	Top-5 acc 31.641 (38.254)	lr 0.03219
Warmup Train [17][940/3239]	Time 0.214 (0.248)	Data 0.002 (0.020)	Loss 4.7386 (4.7030)	Top-1 acc 17.188 (18.156)	Top-5 acc 36.328 (38.259)	lr 0.03218
Warmup Train [17][950/3239]	Time 0.196 (0.248)	Data 0.001 (0.020)	Loss 4.5311 (4.7028)	Top-1 acc 21.094 (18.168)	Top-5 acc 42.578 (38.283)	lr 0.03218
Warmup Train [17][960/3239]	Time 0.228 (0.247)	Data 0.002 (0.020)	Loss 4.9126 (4.7029)	Top-1 acc 10.547 (18.166)	Top-5 acc 33.203 (38.272)	lr 0.03217
Warmup Train [17][970/3239]	Time 0.272 (0.247)	Data 0.001 (0.019)	Loss 4.6111 (4.7025)	Top-1 acc 18.750 (18.178)	Top-5 acc 41.016 (38.284)	lr 0.03216
Warmup Train [17][980/3239]	Time 0.228 (0.247)	Data 0.001 (0.019)	Loss 4.7300 (4.7026)	Top-1 acc 15.234 (18.172)	Top-5 acc 38.672 (38.290)	lr 0.03216
Warmup Train [17][990/3239]	Time 0.206 (0.247)	Data 0.001 (0.019)	Loss 4.7703 (4.7025)	Top-1 acc 17.969 (18.182)	Top-5 acc 37.500 (38.291)	lr 0.03215
Warmup Train [17][1000/3239]	Time 0.253 (0.247)	Data 0.001 (0.019)	Loss 4.8640 (4.7023)	Top-1 acc 14.453 (18.187)	Top-5 acc 33.984 (38.309)	lr 0.03215
Warmup Train [17][1010/3239]	Time 0.213 (0.246)	Data 0.001 (0.019)	Loss 4.6283 (4.7022)	Top-1 acc 20.312 (18.197)	Top-5 acc 43.750 (38.318)	lr 0.03214
Warmup Train [17][1020/3239]	Time 0.239 (0.246)	Data 0.001 (0.018)	Loss 4.7601 (4.7022)	Top-1 acc 17.578 (18.199)	Top-5 acc 34.766 (38.315)	lr 0.03214
Warmup Train [17][1030/3239]	Time 0.373 (0.246)	Data 0.001 (0.018)	Loss 4.6203 (4.7018)	Top-1 acc 20.703 (18.210)	Top-5 acc 43.359 (38.335)	lr 0.03213
Warmup Train [17][1040/3239]	Time 0.256 (0.246)	Data 0.001 (0.018)	Loss 4.6290 (4.7015)	Top-1 acc 16.797 (18.213)	Top-5 acc 42.578 (38.346)	lr 0.03212
Warmup Train [17][1050/3239]	Time 0.225 (0.246)	Data 0.001 (0.018)	Loss 4.5801 (4.7012)	Top-1 acc 20.312 (18.219)	Top-5 acc 43.359 (38.358)	lr 0.03212
Warmup Train [17][1060/3239]	Time 0.227 (0.245)	Data 0.001 (0.018)	Loss 4.7031 (4.7012)	Top-1 acc 18.359 (18.225)	Top-5 acc 39.062 (38.365)	lr 0.03211
Warmup Train [17][1070/3239]	Time 0.230 (0.245)	Data 0.001 (0.018)	Loss 4.6248 (4.7011)	Top-1 acc 19.922 (18.231)	Top-5 acc 37.891 (38.362)	lr 0.03211
Warmup Train [17][1080/3239]	Time 0.170 (0.245)	Data 0.001 (0.018)	Loss 4.5857 (4.7010)	Top-1 acc 22.266 (18.238)	Top-5 acc 40.625 (38.373)	lr 0.03210
Warmup Train [17][1090/3239]	Time 0.241 (0.245)	Data 0.001 (0.017)	Loss 4.8245 (4.7009)	Top-1 acc 18.750 (18.241)	Top-5 acc 36.328 (38.376)	lr 0.03209
Warmup Train [17][1100/3239]	Time 0.190 (0.245)	Data 0.001 (0.017)	Loss 4.5440 (4.7007)	Top-1 acc 20.703 (18.247)	Top-5 acc 39.844 (38.388)	lr 0.03209
Warmup Train [17][1110/3239]	Time 0.172 (0.244)	Data 0.001 (0.017)	Loss 4.4062 (4.7006)	Top-1 acc 24.219 (18.242)	Top-5 acc 47.266 (38.385)	lr 0.03208
Warmup Train [17][1120/3239]	Time 0.180 (0.244)	Data 0.002 (0.017)	Loss 4.7595 (4.7005)	Top-1 acc 16.406 (18.238)	Top-5 acc 39.844 (38.396)	lr 0.03208
Warmup Train [17][1130/3239]	Time 0.402 (0.244)	Data 0.001 (0.017)	Loss 4.5375 (4.7001)	Top-1 acc 22.266 (18.252)	Top-5 acc 41.016 (38.402)	lr 0.03207
Warmup Train [17][1140/3239]	Time 0.156 (0.244)	Data 0.001 (0.017)	Loss 4.7952 (4.7000)	Top-1 acc 20.312 (18.260)	Top-5 acc 39.844 (38.407)	lr 0.03207
Warmup Train [17][1150/3239]	Time 0.170 (0.244)	Data 0.001 (0.017)	Loss 4.4039 (4.6993)	Top-1 acc 20.312 (18.273)	Top-5 acc 44.531 (38.420)	lr 0.03206
Warmup Train [17][1160/3239]	Time 0.214 (0.244)	Data 0.001 (0.017)	Loss 4.5889 (4.6989)	Top-1 acc 20.703 (18.281)	Top-5 acc 41.016 (38.427)	lr 0.03205
Warmup Train [17][1170/3239]	Time 0.164 (0.243)	Data 0.001 (0.016)	Loss 4.8397 (4.6989)	Top-1 acc 14.844 (18.285)	Top-5 acc 35.156 (38.415)	lr 0.03205
Warmup Train [17][1180/3239]	Time 0.197 (0.243)	Data 0.001 (0.016)	Loss 4.5734 (4.6992)	Top-1 acc 20.703 (18.278)	Top-5 acc 43.750 (38.403)	lr 0.03204
Warmup Train [17][1190/3239]	Time 0.141 (0.243)	Data 0.001 (0.016)	Loss 4.7437 (4.6992)	Top-1 acc 16.016 (18.266)	Top-5 acc 36.328 (38.407)	lr 0.03204
Warmup Train [17][1200/3239]	Time 0.153 (0.243)	Data 0.002 (0.016)	Loss 4.6616 (4.6994)	Top-1 acc 19.531 (18.268)	Top-5 acc 40.625 (38.410)	lr 0.03203
Warmup Train [17][1210/3239]	Time 0.147 (0.243)	Data 0.001 (0.016)	Loss 4.6812 (4.6994)	Top-1 acc 20.312 (18.266)	Top-5 acc 36.328 (38.415)	lr 0.03202
Warmup Train [17][1220/3239]	Time 0.193 (0.243)	Data 0.001 (0.016)	Loss 4.5220 (4.6995)	Top-1 acc 20.703 (18.261)	Top-5 acc 41.016 (38.414)	lr 0.03202
Warmup Train [17][1230/3239]	Time 0.229 (0.243)	Data 0.002 (0.016)	Loss 4.7502 (4.6989)	Top-1 acc 17.188 (18.270)	Top-5 acc 38.281 (38.428)	lr 0.03201
Warmup Train [17][1240/3239]	Time 0.169 (0.242)	Data 0.001 (0.016)	Loss 4.7380 (4.6989)	Top-1 acc 15.625 (18.267)	Top-5 acc 35.156 (38.433)	lr 0.03201
Warmup Train [17][1250/3239]	Time 0.294 (0.242)	Data 0.002 (0.016)	Loss 4.6682 (4.6983)	Top-1 acc 16.797 (18.274)	Top-5 acc 40.234 (38.443)	lr 0.03200
Warmup Train [17][1260/3239]	Time 0.192 (0.242)	Data 0.001 (0.016)	Loss 4.6332 (4.6981)	Top-1 acc 19.922 (18.281)	Top-5 acc 40.234 (38.448)	lr 0.03200
Warmup Train [17][1270/3239]	Time 0.211 (0.242)	Data 0.001 (0.016)	Loss 4.8180 (4.6982)	Top-1 acc 15.625 (18.284)	Top-5 acc 35.547 (38.457)	lr 0.03199
Warmup Train [17][1280/3239]	Time 0.247 (0.242)	Data 0.001 (0.016)	Loss 4.6362 (4.6982)	Top-1 acc 22.656 (18.280)	Top-5 acc 41.016 (38.461)	lr 0.03198
Warmup Train [17][1290/3239]	Time 0.137 (0.242)	Data 0.001 (0.015)	Loss 4.7173 (4.6986)	Top-1 acc 18.750 (18.275)	Top-5 acc 37.109 (38.449)	lr 0.03198
Warmup Train [17][1300/3239]	Time 0.230 (0.241)	Data 0.001 (0.015)	Loss 4.8337 (4.6987)	Top-1 acc 13.672 (18.271)	Top-5 acc 35.156 (38.447)	lr 0.03197
Warmup Train [17][1310/3239]	Time 0.225 (0.241)	Data 0.002 (0.015)	Loss 4.5760 (4.6986)	Top-1 acc 23.438 (18.271)	Top-5 acc 43.359 (38.452)	lr 0.03197
Warmup Train [17][1320/3239]	Time 0.133 (0.241)	Data 0.001 (0.015)	Loss 4.7620 (4.6985)	Top-1 acc 17.969 (18.272)	Top-5 acc 38.672 (38.446)	lr 0.03196
Warmup Train [17][1330/3239]	Time 0.246 (0.241)	Data 0.001 (0.015)	Loss 4.7683 (4.6986)	Top-1 acc 16.797 (18.269)	Top-5 acc 35.938 (38.446)	lr 0.03195
Warmup Train [17][1340/3239]	Time 0.204 (0.241)	Data 0.001 (0.015)	Loss 4.8375 (4.6986)	Top-1 acc 12.500 (18.261)	Top-5 acc 32.031 (38.440)	lr 0.03195
Warmup Train [17][1350/3239]	Time 0.227 (0.241)	Data 0.001 (0.015)	Loss 4.7455 (4.6985)	Top-1 acc 19.141 (18.262)	Top-5 acc 41.406 (38.439)	lr 0.03194
Warmup Train [17][1360/3239]	Time 0.373 (0.241)	Data 0.001 (0.015)	Loss 4.6929 (4.6988)	Top-1 acc 15.625 (18.263)	Top-5 acc 36.328 (38.440)	lr 0.03194
Warmup Train [17][1370/3239]	Time 0.228 (0.241)	Data 0.001 (0.015)	Loss 4.6525 (4.6985)	Top-1 acc 16.016 (18.262)	Top-5 acc 39.453 (38.435)	lr 0.03193
Warmup Train [17][1380/3239]	Time 0.212 (0.241)	Data 0.001 (0.015)	Loss 4.4960 (4.6984)	Top-1 acc 22.266 (18.260)	Top-5 acc 44.922 (38.439)	lr 0.03193
Warmup Train [17][1390/3239]	Time 0.222 (0.240)	Data 0.002 (0.015)	Loss 4.9008 (4.6984)	Top-1 acc 15.234 (18.265)	Top-5 acc 36.328 (38.438)	lr 0.03192
Warmup Train [17][1400/3239]	Time 0.241 (0.240)	Data 0.002 (0.014)	Loss 4.8182 (4.6984)	Top-1 acc 14.453 (18.264)	Top-5 acc 36.719 (38.447)	lr 0.03191
Warmup Train [17][1410/3239]	Time 0.243 (0.240)	Data 0.002 (0.014)	Loss 4.7097 (4.6981)	Top-1 acc 19.141 (18.268)	Top-5 acc 37.891 (38.455)	lr 0.03191
Warmup Train [17][1420/3239]	Time 0.269 (0.240)	Data 0.002 (0.014)	Loss 4.4721 (4.6978)	Top-1 acc 19.141 (18.268)	Top-5 acc 42.188 (38.459)	lr 0.03190
Warmup Train [17][1430/3239]	Time 0.177 (0.240)	Data 0.002 (0.014)	Loss 4.6722 (4.6975)	Top-1 acc 18.750 (18.272)	Top-5 acc 37.891 (38.460)	lr 0.03190
Warmup Train [17][1440/3239]	Time 0.239 (0.240)	Data 0.001 (0.014)	Loss 4.6417 (4.6973)	Top-1 acc 19.922 (18.274)	Top-5 acc 37.891 (38.456)	lr 0.03189
Warmup Train [17][1450/3239]	Time 0.243 (0.240)	Data 0.002 (0.014)	Loss 4.7562 (4.6977)	Top-1 acc 16.797 (18.270)	Top-5 acc 37.891 (38.451)	lr 0.03189
Warmup Train [17][1460/3239]	Time 0.316 (0.240)	Data 0.001 (0.014)	Loss 4.6290 (4.6974)	Top-1 acc 19.531 (18.275)	Top-5 acc 41.016 (38.457)	lr 0.03188
Warmup Train [17][1470/3239]	Time 0.182 (0.240)	Data 0.002 (0.014)	Loss 4.7628 (4.6975)	Top-1 acc 15.625 (18.277)	Top-5 acc 35.938 (38.454)	lr 0.03187
Warmup Train [17][1480/3239]	Time 0.200 (0.239)	Data 0.001 (0.014)	Loss 4.7409 (4.6972)	Top-1 acc 14.844 (18.282)	Top-5 acc 38.672 (38.462)	lr 0.03187
Warmup Train [17][1490/3239]	Time 0.183 (0.239)	Data 0.001 (0.014)	Loss 4.8391 (4.6972)	Top-1 acc 16.797 (18.285)	Top-5 acc 35.938 (38.465)	lr 0.03186
Warmup Train [17][1500/3239]	Time 0.181 (0.239)	Data 0.001 (0.014)	Loss 4.7129 (4.6972)	Top-1 acc 18.359 (18.285)	Top-5 acc 39.453 (38.465)	lr 0.03186
Warmup Train [17][1510/3239]	Time 0.234 (0.239)	Data 0.001 (0.014)	Loss 4.7931 (4.6974)	Top-1 acc 17.578 (18.282)	Top-5 acc 38.281 (38.455)	lr 0.03185
Warmup Train [17][1520/3239]	Time 0.179 (0.239)	Data 0.002 (0.014)	Loss 4.6550 (4.6976)	Top-1 acc 18.359 (18.281)	Top-5 acc 37.500 (38.452)	lr 0.03184
Warmup Train [17][1530/3239]	Time 0.173 (0.239)	Data 0.002 (0.013)	Loss 4.7144 (4.6980)	Top-1 acc 20.312 (18.275)	Top-5 acc 39.062 (38.445)	lr 0.03184
Warmup Train [17][1540/3239]	Time 0.246 (0.239)	Data 0.001 (0.013)	Loss 4.5185 (4.6981)	Top-1 acc 19.141 (18.267)	Top-5 acc 46.875 (38.450)	lr 0.03183
Warmup Train [17][1550/3239]	Time 0.190 (0.239)	Data 0.001 (0.013)	Loss 4.6687 (4.6979)	Top-1 acc 18.750 (18.277)	Top-5 acc 39.062 (38.453)	lr 0.03183
Warmup Train [17][1560/3239]	Time 0.220 (0.239)	Data 0.001 (0.013)	Loss 4.6096 (4.6977)	Top-1 acc 20.703 (18.284)	Top-5 acc 39.844 (38.465)	lr 0.03182
Warmup Train [17][1570/3239]	Time 0.175 (0.239)	Data 0.001 (0.013)	Loss 4.7223 (4.6979)	Top-1 acc 17.578 (18.278)	Top-5 acc 35.938 (38.453)	lr 0.03182
Warmup Train [17][1580/3239]	Time 0.251 (0.239)	Data 0.001 (0.013)	Loss 4.8531 (4.6980)	Top-1 acc 16.797 (18.279)	Top-5 acc 34.766 (38.449)	lr 0.03181
Warmup Train [17][1590/3239]	Time 0.217 (0.239)	Data 0.001 (0.013)	Loss 4.6923 (4.6979)	Top-1 acc 19.141 (18.283)	Top-5 acc 40.625 (38.451)	lr 0.03180
Warmup Train [17][1600/3239]	Time 0.251 (0.239)	Data 0.001 (0.013)	Loss 4.6872 (4.6977)	Top-1 acc 15.234 (18.290)	Top-5 acc 38.672 (38.457)	lr 0.03180
Warmup Train [17][1610/3239]	Time 0.160 (0.238)	Data 0.001 (0.013)	Loss 4.5480 (4.6973)	Top-1 acc 20.312 (18.297)	Top-5 acc 40.625 (38.469)	lr 0.03179
Warmup Train [17][1620/3239]	Time 0.234 (0.238)	Data 0.001 (0.013)	Loss 4.8112 (4.6972)	Top-1 acc 16.016 (18.301)	Top-5 acc 41.016 (38.469)	lr 0.03179
Warmup Train [17][1630/3239]	Time 0.229 (0.238)	Data 0.002 (0.013)	Loss 4.4908 (4.6971)	Top-1 acc 25.000 (18.303)	Top-5 acc 44.531 (38.479)	lr 0.03178
Warmup Train [17][1640/3239]	Time 0.191 (0.238)	Data 0.001 (0.013)	Loss 4.7159 (4.6971)	Top-1 acc 19.141 (18.305)	Top-5 acc 35.938 (38.473)	lr 0.03177
Warmup Train [17][1650/3239]	Time 0.209 (0.238)	Data 0.003 (0.013)	Loss 4.6491 (4.6971)	Top-1 acc 19.922 (18.307)	Top-5 acc 39.062 (38.472)	lr 0.03177
Warmup Train [17][1660/3239]	Time 0.347 (0.238)	Data 0.001 (0.013)	Loss 4.6513 (4.6971)	Top-1 acc 19.531 (18.308)	Top-5 acc 38.281 (38.467)	lr 0.03176
Warmup Train [17][1670/3239]	Time 0.328 (0.238)	Data 0.001 (0.013)	Loss 4.8292 (4.6973)	Top-1 acc 13.672 (18.304)	Top-5 acc 35.938 (38.467)	lr 0.03176
Warmup Train [17][1680/3239]	Time 0.248 (0.238)	Data 0.003 (0.013)	Loss 4.6835 (4.6979)	Top-1 acc 21.094 (18.301)	Top-5 acc 37.891 (38.452)	lr 0.03175
Warmup Train [17][1690/3239]	Time 0.187 (0.238)	Data 0.001 (0.012)	Loss 4.7243 (4.6980)	Top-1 acc 15.234 (18.299)	Top-5 acc 38.281 (38.448)	lr 0.03175
Warmup Train [17][1700/3239]	Time 0.212 (0.238)	Data 0.001 (0.012)	Loss 4.7414 (4.6979)	Top-1 acc 18.750 (18.299)	Top-5 acc 37.500 (38.457)	lr 0.03174
Warmup Train [17][1710/3239]	Time 0.227 (0.238)	Data 0.001 (0.012)	Loss 4.6666 (4.6979)	Top-1 acc 20.312 (18.304)	Top-5 acc 37.500 (38.457)	lr 0.03173
Warmup Train [17][1720/3239]	Time 0.206 (0.237)	Data 0.001 (0.012)	Loss 4.6160 (4.6981)	Top-1 acc 18.359 (18.303)	Top-5 acc 40.234 (38.456)	lr 0.03173
Warmup Train [17][1730/3239]	Time 0.144 (0.237)	Data 0.001 (0.012)	Loss 4.6790 (4.6979)	Top-1 acc 18.359 (18.305)	Top-5 acc 39.453 (38.460)	lr 0.03172
Warmup Train [17][1740/3239]	Time 0.225 (0.237)	Data 0.002 (0.012)	Loss 4.7261 (4.6977)	Top-1 acc 19.141 (18.308)	Top-5 acc 38.672 (38.466)	lr 0.03172
Warmup Train [17][1750/3239]	Time 0.213 (0.237)	Data 0.001 (0.012)	Loss 4.7696 (4.6977)	Top-1 acc 17.969 (18.310)	Top-5 acc 37.109 (38.467)	lr 0.03171
Warmup Train [17][1760/3239]	Time 0.174 (0.237)	Data 0.001 (0.012)	Loss 4.7226 (4.6977)	Top-1 acc 19.531 (18.310)	Top-5 acc 37.500 (38.466)	lr 0.03170
Warmup Train [17][1770/3239]	Time 0.360 (0.237)	Data 0.001 (0.012)	Loss 4.4942 (4.6978)	Top-1 acc 18.750 (18.311)	Top-5 acc 44.141 (38.460)	lr 0.03170
Warmup Train [17][1780/3239]	Time 0.173 (0.237)	Data 0.001 (0.012)	Loss 4.7847 (4.6978)	Top-1 acc 16.797 (18.315)	Top-5 acc 35.938 (38.463)	lr 0.03169
Warmup Train [17][1790/3239]	Time 0.240 (0.237)	Data 0.026 (0.012)	Loss 4.5274 (4.6976)	Top-1 acc 19.922 (18.321)	Top-5 acc 43.359 (38.468)	lr 0.03169
Warmup Train [17][1800/3239]	Time 0.210 (0.237)	Data 0.001 (0.012)	Loss 4.7329 (4.6977)	Top-1 acc 16.406 (18.321)	Top-5 acc 34.766 (38.468)	lr 0.03168
Warmup Train [17][1810/3239]	Time 0.206 (0.237)	Data 0.001 (0.012)	Loss 4.8230 (4.6975)	Top-1 acc 14.844 (18.327)	Top-5 acc 35.156 (38.479)	lr 0.03168
Warmup Train [17][1820/3239]	Time 0.215 (0.237)	Data 0.001 (0.012)	Loss 4.5619 (4.6976)	Top-1 acc 21.875 (18.331)	Top-5 acc 42.188 (38.481)	lr 0.03167
Warmup Train [17][1830/3239]	Time 0.194 (0.237)	Data 0.001 (0.012)	Loss 4.7851 (4.6974)	Top-1 acc 19.922 (18.334)	Top-5 acc 38.672 (38.486)	lr 0.03166
Warmup Train [17][1840/3239]	Time 0.214 (0.237)	Data 0.002 (0.012)	Loss 4.8298 (4.6975)	Top-1 acc 15.625 (18.330)	Top-5 acc 32.422 (38.483)	lr 0.03166
Warmup Train [17][1850/3239]	Time 0.242 (0.236)	Data 0.002 (0.012)	Loss 4.6085 (4.6974)	Top-1 acc 21.875 (18.331)	Top-5 acc 40.234 (38.490)	lr 0.03165
Warmup Train [17][1860/3239]	Time 0.361 (0.236)	Data 0.001 (0.012)	Loss 4.6310 (4.6976)	Top-1 acc 16.016 (18.325)	Top-5 acc 40.625 (38.484)	lr 0.03165
Warmup Train [17][1870/3239]	Time 0.135 (0.236)	Data 0.001 (0.012)	Loss 4.5606 (4.6977)	Top-1 acc 18.750 (18.331)	Top-5 acc 44.922 (38.490)	lr 0.03164
Warmup Train [17][1880/3239]	Time 0.168 (0.236)	Data 0.001 (0.012)	Loss 4.6418 (4.6977)	Top-1 acc 19.531 (18.330)	Top-5 acc 37.891 (38.485)	lr 0.03163
Warmup Train [17][1890/3239]	Time 0.242 (0.236)	Data 0.002 (0.012)	Loss 4.5991 (4.6975)	Top-1 acc 19.922 (18.334)	Top-5 acc 43.359 (38.492)	lr 0.03163
Warmup Train [17][1900/3239]	Time 0.204 (0.236)	Data 0.001 (0.012)	Loss 4.7772 (4.6973)	Top-1 acc 19.531 (18.338)	Top-5 acc 38.672 (38.503)	lr 0.03162
Warmup Train [17][1910/3239]	Time 0.226 (0.236)	Data 0.002 (0.011)	Loss 4.6898 (4.6971)	Top-1 acc 17.969 (18.346)	Top-5 acc 37.500 (38.507)	lr 0.03162
Warmup Train [17][1920/3239]	Time 0.250 (0.236)	Data 0.001 (0.011)	Loss 4.9013 (4.6972)	Top-1 acc 13.281 (18.346)	Top-5 acc 30.469 (38.501)	lr 0.03161
Warmup Train [17][1930/3239]	Time 0.193 (0.236)	Data 0.001 (0.011)	Loss 4.6099 (4.6968)	Top-1 acc 20.312 (18.356)	Top-5 acc 39.453 (38.509)	lr 0.03161
Warmup Train [17][1940/3239]	Time 0.205 (0.236)	Data 0.001 (0.011)	Loss 4.7458 (4.6965)	Top-1 acc 15.625 (18.359)	Top-5 acc 39.844 (38.517)	lr 0.03160
Warmup Train [17][1950/3239]	Time 0.180 (0.236)	Data 0.002 (0.011)	Loss 4.6670 (4.6962)	Top-1 acc 22.656 (18.363)	Top-5 acc 41.406 (38.523)	lr 0.03159
Warmup Train [17][1960/3239]	Time 0.258 (0.236)	Data 0.001 (0.011)	Loss 4.6276 (4.6961)	Top-1 acc 19.141 (18.365)	Top-5 acc 41.016 (38.522)	lr 0.03159
Warmup Train [17][1970/3239]	Time 0.338 (0.236)	Data 0.002 (0.011)	Loss 4.4793 (4.6959)	Top-1 acc 22.266 (18.366)	Top-5 acc 43.750 (38.527)	lr 0.03158
Warmup Train [17][1980/3239]	Time 0.183 (0.236)	Data 0.002 (0.011)	Loss 4.8152 (4.6960)	Top-1 acc 17.578 (18.370)	Top-5 acc 37.891 (38.524)	lr 0.03158
Warmup Train [17][1990/3239]	Time 0.208 (0.236)	Data 0.001 (0.011)	Loss 4.7831 (4.6960)	Top-1 acc 16.797 (18.374)	Top-5 acc 35.156 (38.527)	lr 0.03157
Warmup Train [17][2000/3239]	Time 0.218 (0.236)	Data 0.001 (0.011)	Loss 4.6300 (4.6961)	Top-1 acc 17.578 (18.370)	Top-5 acc 40.625 (38.527)	lr 0.03156
Warmup Train [17][2010/3239]	Time 0.163 (0.236)	Data 0.001 (0.011)	Loss 4.8103 (4.6963)	Top-1 acc 16.797 (18.369)	Top-5 acc 36.719 (38.523)	lr 0.03156
Warmup Train [17][2020/3239]	Time 0.247 (0.236)	Data 0.001 (0.011)	Loss 4.6578 (4.6962)	Top-1 acc 13.672 (18.364)	Top-5 acc 34.375 (38.522)	lr 0.03155
Warmup Train [17][2030/3239]	Time 0.242 (0.236)	Data 0.003 (0.011)	Loss 4.6928 (4.6964)	Top-1 acc 19.531 (18.361)	Top-5 acc 35.938 (38.517)	lr 0.03155
Warmup Train [17][2040/3239]	Time 0.225 (0.236)	Data 0.002 (0.011)	Loss 4.7421 (4.6964)	Top-1 acc 18.359 (18.360)	Top-5 acc 39.062 (38.517)	lr 0.03154
Warmup Train [17][2050/3239]	Time 0.220 (0.236)	Data 0.001 (0.011)	Loss 4.6650 (4.6965)	Top-1 acc 17.969 (18.358)	Top-5 acc 39.453 (38.519)	lr 0.03153
Warmup Train [17][2060/3239]	Time 0.247 (0.235)	Data 0.001 (0.011)	Loss 4.8121 (4.6966)	Top-1 acc 20.312 (18.356)	Top-5 acc 36.719 (38.517)	lr 0.03153
Warmup Train [17][2070/3239]	Time 0.201 (0.235)	Data 0.001 (0.011)	Loss 4.6350 (4.6966)	Top-1 acc 19.922 (18.362)	Top-5 acc 42.969 (38.531)	lr 0.03152
Warmup Train [17][2080/3239]	Time 0.376 (0.236)	Data 0.001 (0.011)	Loss 4.7842 (4.6967)	Top-1 acc 17.578 (18.358)	Top-5 acc 35.547 (38.527)	lr 0.03152
Warmup Train [17][2090/3239]	Time 0.176 (0.236)	Data 0.001 (0.011)	Loss 4.6880 (4.6964)	Top-1 acc 19.922 (18.366)	Top-5 acc 40.625 (38.533)	lr 0.03151
Warmup Train [17][2100/3239]	Time 0.175 (0.235)	Data 0.001 (0.011)	Loss 4.6054 (4.6963)	Top-1 acc 20.312 (18.369)	Top-5 acc 36.719 (38.528)	lr 0.03151
Warmup Train [17][2110/3239]	Time 0.243 (0.235)	Data 0.001 (0.011)	Loss 4.5504 (4.6961)	Top-1 acc 16.406 (18.371)	Top-5 acc 44.141 (38.536)	lr 0.03150
Warmup Train [17][2120/3239]	Time 0.192 (0.235)	Data 0.001 (0.011)	Loss 4.5975 (4.6958)	Top-1 acc 20.312 (18.372)	Top-5 acc 39.453 (38.537)	lr 0.03149
Warmup Train [17][2130/3239]	Time 0.130 (0.235)	Data 0.001 (0.011)	Loss 4.6262 (4.6958)	Top-1 acc 16.797 (18.370)	Top-5 acc 40.625 (38.536)	lr 0.03149
Warmup Train [17][2140/3239]	Time 0.216 (0.235)	Data 0.002 (0.011)	Loss 4.5856 (4.6956)	Top-1 acc 20.703 (18.375)	Top-5 acc 39.453 (38.540)	lr 0.03148
Warmup Train [17][2150/3239]	Time 0.138 (0.235)	Data 0.002 (0.011)	Loss 4.6361 (4.6957)	Top-1 acc 15.234 (18.372)	Top-5 acc 38.281 (38.536)	lr 0.03148
Warmup Train [17][2160/3239]	Time 0.197 (0.235)	Data 0.001 (0.011)	Loss 4.7188 (4.6954)	Top-1 acc 19.141 (18.377)	Top-5 acc 38.281 (38.544)	lr 0.03147
Warmup Train [17][2170/3239]	Time 0.194 (0.235)	Data 0.001 (0.011)	Loss 4.7891 (4.6953)	Top-1 acc 19.141 (18.383)	Top-5 acc 37.109 (38.548)	lr 0.03146
Warmup Train [17][2180/3239]	Time 0.176 (0.235)	Data 0.002 (0.010)	Loss 4.5336 (4.6951)	Top-1 acc 18.359 (18.382)	Top-5 acc 39.062 (38.550)	lr 0.03146
Warmup Train [17][2190/3239]	Time 0.389 (0.235)	Data 0.001 (0.010)	Loss 4.7349 (4.6948)	Top-1 acc 17.578 (18.385)	Top-5 acc 38.672 (38.553)	lr 0.03145
Warmup Train [17][2200/3239]	Time 0.239 (0.235)	Data 0.001 (0.010)	Loss 4.6758 (4.6951)	Top-1 acc 19.922 (18.385)	Top-5 acc 40.234 (38.553)	lr 0.03145
Warmup Train [17][2210/3239]	Time 0.185 (0.235)	Data 0.001 (0.010)	Loss 4.6898 (4.6948)	Top-1 acc 18.750 (18.391)	Top-5 acc 39.844 (38.558)	lr 0.03144
Warmup Train [17][2220/3239]	Time 0.301 (0.235)	Data 0.001 (0.010)	Loss 4.7302 (4.6946)	Top-1 acc 16.797 (18.393)	Top-5 acc 38.672 (38.566)	lr 0.03144
Warmup Train [17][2230/3239]	Time 0.246 (0.235)	Data 0.001 (0.010)	Loss 4.6829 (4.6944)	Top-1 acc 16.016 (18.394)	Top-5 acc 37.891 (38.568)	lr 0.03143
Warmup Train [17][2240/3239]	Time 0.211 (0.235)	Data 0.002 (0.010)	Loss 4.8569 (4.6944)	Top-1 acc 14.844 (18.395)	Top-5 acc 35.156 (38.570)	lr 0.03142
Warmup Train [17][2250/3239]	Time 0.205 (0.235)	Data 0.001 (0.010)	Loss 4.6946 (4.6943)	Top-1 acc 16.797 (18.393)	Top-5 acc 36.719 (38.572)	lr 0.03142
Warmup Train [17][2260/3239]	Time 0.215 (0.235)	Data 0.001 (0.010)	Loss 4.6839 (4.6943)	Top-1 acc 17.578 (18.395)	Top-5 acc 39.453 (38.569)	lr 0.03141
Warmup Train [17][2270/3239]	Time 0.280 (0.235)	Data 0.001 (0.010)	Loss 4.4126 (4.6941)	Top-1 acc 23.438 (18.401)	Top-5 acc 41.797 (38.572)	lr 0.03141
Warmup Train [17][2280/3239]	Time 0.205 (0.234)	Data 0.002 (0.010)	Loss 4.6810 (4.6943)	Top-1 acc 19.141 (18.400)	Top-5 acc 39.453 (38.572)	lr 0.03140
Warmup Train [17][2290/3239]	Time 0.287 (0.234)	Data 0.002 (0.010)	Loss 4.6498 (4.6943)	Top-1 acc 22.266 (18.402)	Top-5 acc 39.453 (38.568)	lr 0.03139
Warmup Train [17][2300/3239]	Time 0.308 (0.234)	Data 0.002 (0.010)	Loss 4.5727 (4.6944)	Top-1 acc 19.922 (18.401)	Top-5 acc 40.234 (38.566)	lr 0.03139
Warmup Train [17][2310/3239]	Time 0.254 (0.234)	Data 0.001 (0.010)	Loss 4.7163 (4.6943)	Top-1 acc 19.141 (18.406)	Top-5 acc 37.891 (38.570)	lr 0.03138
Warmup Train [17][2320/3239]	Time 0.206 (0.234)	Data 0.002 (0.010)	Loss 4.8900 (4.6942)	Top-1 acc 14.453 (18.406)	Top-5 acc 32.812 (38.574)	lr 0.03138
Warmup Train [17][2330/3239]	Time 0.204 (0.234)	Data 0.001 (0.010)	Loss 4.4571 (4.6939)	Top-1 acc 21.875 (18.413)	Top-5 acc 45.312 (38.583)	lr 0.03137
Warmup Train [17][2340/3239]	Time 0.188 (0.234)	Data 0.001 (0.010)	Loss 4.6924 (4.6939)	Top-1 acc 21.094 (18.419)	Top-5 acc 37.891 (38.585)	lr 0.03137
Warmup Train [17][2350/3239]	Time 0.206 (0.234)	Data 0.001 (0.010)	Loss 4.8485 (4.6940)	Top-1 acc 16.406 (18.416)	Top-5 acc 36.719 (38.587)	lr 0.03136
Warmup Train [17][2360/3239]	Time 0.133 (0.234)	Data 0.001 (0.010)	Loss 4.5912 (4.6936)	Top-1 acc 21.094 (18.422)	Top-5 acc 42.188 (38.597)	lr 0.03135
Warmup Train [17][2370/3239]	Time 0.129 (0.234)	Data 0.002 (0.010)	Loss 4.6470 (4.6936)	Top-1 acc 18.750 (18.420)	Top-5 acc 38.672 (38.598)	lr 0.03135
Warmup Train [17][2380/3239]	Time 0.241 (0.234)	Data 0.001 (0.010)	Loss 4.6210 (4.6932)	Top-1 acc 17.969 (18.427)	Top-5 acc 39.062 (38.612)	lr 0.03134
Warmup Train [17][2390/3239]	Time 0.156 (0.234)	Data 0.001 (0.010)	Loss 4.6495 (4.6932)	Top-1 acc 18.359 (18.424)	Top-5 acc 39.844 (38.611)	lr 0.03134
Warmup Train [17][2400/3239]	Time 0.311 (0.234)	Data 0.002 (0.010)	Loss 4.7164 (4.6931)	Top-1 acc 21.094 (18.426)	Top-5 acc 39.844 (38.613)	lr 0.03133
Warmup Train [17][2410/3239]	Time 0.245 (0.234)	Data 0.002 (0.010)	Loss 4.7676 (4.6932)	Top-1 acc 19.531 (18.424)	Top-5 acc 39.844 (38.615)	lr 0.03132
Warmup Train [17][2420/3239]	Time 0.155 (0.234)	Data 0.001 (0.010)	Loss 4.5252 (4.6930)	Top-1 acc 19.531 (18.428)	Top-5 acc 38.672 (38.615)	lr 0.03132
Warmup Train [17][2430/3239]	Time 0.200 (0.234)	Data 0.001 (0.010)	Loss 4.6578 (4.6927)	Top-1 acc 20.312 (18.433)	Top-5 acc 41.406 (38.622)	lr 0.03131
Warmup Train [17][2440/3239]	Time 0.205 (0.234)	Data 0.001 (0.010)	Loss 4.7885 (4.6928)	Top-1 acc 17.969 (18.435)	Top-5 acc 38.281 (38.621)	lr 0.03131
Warmup Train [17][2450/3239]	Time 0.185 (0.234)	Data 0.001 (0.010)	Loss 4.7215 (4.6929)	Top-1 acc 16.797 (18.434)	Top-5 acc 37.109 (38.615)	lr 0.03130
Warmup Train [17][2460/3239]	Time 0.179 (0.234)	Data 0.002 (0.010)	Loss 4.7495 (4.6930)	Top-1 acc 18.750 (18.437)	Top-5 acc 36.328 (38.613)	lr 0.03129
Warmup Train [17][2470/3239]	Time 0.302 (0.234)	Data 0.001 (0.010)	Loss 4.6453 (4.6928)	Top-1 acc 18.359 (18.443)	Top-5 acc 39.062 (38.619)	lr 0.03129
Warmup Train [17][2480/3239]	Time 0.250 (0.234)	Data 0.001 (0.010)	Loss 4.7179 (4.6928)	Top-1 acc 19.922 (18.438)	Top-5 acc 40.625 (38.612)	lr 0.03128
Warmup Train [17][2490/3239]	Time 0.324 (0.233)	Data 0.001 (0.010)	Loss 4.5496 (4.6926)	Top-1 acc 25.391 (18.441)	Top-5 acc 44.141 (38.617)	lr 0.03128
Warmup Train [17][2500/3239]	Time 0.222 (0.233)	Data 0.001 (0.010)	Loss 4.5862 (4.6925)	Top-1 acc 17.969 (18.440)	Top-5 acc 41.797 (38.615)	lr 0.03127
Warmup Train [17][2510/3239]	Time 0.179 (0.233)	Data 0.001 (0.010)	Loss 4.7003 (4.6923)	Top-1 acc 16.406 (18.439)	Top-5 acc 38.672 (38.614)	lr 0.03127
Warmup Train [17][2520/3239]	Time 0.206 (0.233)	Data 0.001 (0.010)	Loss 4.6420 (4.6921)	Top-1 acc 17.188 (18.441)	Top-5 acc 38.281 (38.618)	lr 0.03126
Warmup Train [17][2530/3239]	Time 0.160 (0.233)	Data 0.002 (0.010)	Loss 4.5423 (4.6919)	Top-1 acc 16.406 (18.446)	Top-5 acc 39.062 (38.623)	lr 0.03125
Warmup Train [17][2540/3239]	Time 0.225 (0.233)	Data 0.001 (0.009)	Loss 4.6237 (4.6918)	Top-1 acc 18.750 (18.443)	Top-5 acc 40.625 (38.622)	lr 0.03125
Warmup Train [17][2550/3239]	Time 0.235 (0.233)	Data 0.001 (0.009)	Loss 4.8008 (4.6917)	Top-1 acc 16.406 (18.444)	Top-5 acc 40.234 (38.624)	lr 0.03124
Warmup Train [17][2560/3239]	Time 0.206 (0.233)	Data 0.001 (0.009)	Loss 4.7921 (4.6917)	Top-1 acc 21.875 (18.445)	Top-5 acc 37.891 (38.629)	lr 0.03124
Warmup Train [17][2570/3239]	Time 0.184 (0.233)	Data 0.001 (0.009)	Loss 4.8443 (4.6917)	Top-1 acc 16.797 (18.445)	Top-5 acc 36.719 (38.634)	lr 0.03123
Warmup Train [17][2580/3239]	Time 0.356 (0.233)	Data 0.001 (0.009)	Loss 4.6104 (4.6918)	Top-1 acc 20.703 (18.442)	Top-5 acc 39.062 (38.632)	lr 0.03122
Warmup Train [17][2590/3239]	Time 0.208 (0.233)	Data 0.001 (0.009)	Loss 4.7635 (4.6918)	Top-1 acc 20.312 (18.448)	Top-5 acc 34.375 (38.629)	lr 0.03122
Warmup Train [17][2600/3239]	Time 0.248 (0.233)	Data 0.001 (0.009)	Loss 4.7513 (4.6918)	Top-1 acc 17.188 (18.449)	Top-5 acc 39.062 (38.634)	lr 0.03121
Warmup Train [17][2610/3239]	Time 0.237 (0.233)	Data 0.001 (0.009)	Loss 4.5215 (4.6916)	Top-1 acc 19.922 (18.453)	Top-5 acc 44.531 (38.639)	lr 0.03121
Warmup Train [17][2620/3239]	Time 0.222 (0.233)	Data 0.001 (0.009)	Loss 4.7070 (4.6915)	Top-1 acc 16.797 (18.451)	Top-5 acc 37.109 (38.640)	lr 0.03120
Warmup Train [17][2630/3239]	Time 0.221 (0.233)	Data 0.001 (0.009)	Loss 4.7436 (4.6914)	Top-1 acc 17.578 (18.453)	Top-5 acc 39.453 (38.644)	lr 0.03119
Warmup Train [17][2640/3239]	Time 0.175 (0.233)	Data 0.001 (0.009)	Loss 4.5624 (4.6914)	Top-1 acc 19.922 (18.458)	Top-5 acc 41.406 (38.642)	lr 0.03119
Warmup Train [17][2650/3239]	Time 0.161 (0.233)	Data 0.001 (0.009)	Loss 4.6785 (4.6913)	Top-1 acc 18.359 (18.460)	Top-5 acc 37.500 (38.644)	lr 0.03118
Warmup Train [17][2660/3239]	Time 0.206 (0.233)	Data 0.002 (0.009)	Loss 4.5895 (4.6910)	Top-1 acc 17.188 (18.460)	Top-5 acc 39.453 (38.649)	lr 0.03118
Warmup Train [17][2670/3239]	Time 0.348 (0.233)	Data 0.001 (0.009)	Loss 4.6673 (4.6909)	Top-1 acc 19.141 (18.462)	Top-5 acc 41.406 (38.653)	lr 0.03117
Warmup Train [17][2680/3239]	Time 0.290 (0.233)	Data 0.001 (0.009)	Loss 4.7053 (4.6909)	Top-1 acc 20.703 (18.461)	Top-5 acc 38.672 (38.650)	lr 0.03117
Warmup Train [17][2690/3239]	Time 0.180 (0.233)	Data 0.001 (0.009)	Loss 4.8240 (4.6910)	Top-1 acc 16.016 (18.460)	Top-5 acc 36.328 (38.648)	lr 0.03116
Warmup Train [17][2700/3239]	Time 0.229 (0.233)	Data 0.001 (0.009)	Loss 4.7250 (4.6909)	Top-1 acc 20.312 (18.463)	Top-5 acc 39.062 (38.650)	lr 0.03115
Warmup Train [17][2710/3239]	Time 0.206 (0.233)	Data 0.001 (0.009)	Loss 4.7616 (4.6908)	Top-1 acc 18.750 (18.463)	Top-5 acc 36.719 (38.651)	lr 0.03115
Warmup Train [17][2720/3239]	Time 0.246 (0.233)	Data 0.001 (0.009)	Loss 4.8264 (4.6909)	Top-1 acc 14.453 (18.463)	Top-5 acc 37.500 (38.655)	lr 0.03114
Warmup Train [17][2730/3239]	Time 0.220 (0.233)	Data 0.001 (0.009)	Loss 4.6946 (4.6908)	Top-1 acc 19.141 (18.463)	Top-5 acc 40.234 (38.658)	lr 0.03114
Warmup Train [17][2740/3239]	Time 0.181 (0.233)	Data 0.001 (0.009)	Loss 4.6306 (4.6905)	Top-1 acc 17.578 (18.463)	Top-5 acc 37.500 (38.662)	lr 0.03113
Warmup Train [17][2750/3239]	Time 0.156 (0.233)	Data 0.002 (0.009)	Loss 4.5082 (4.6905)	Top-1 acc 17.578 (18.460)	Top-5 acc 46.094 (38.666)	lr 0.03112
Warmup Train [17][2760/3239]	Time 0.212 (0.233)	Data 0.002 (0.009)	Loss 4.8630 (4.6904)	Top-1 acc 16.016 (18.464)	Top-5 acc 33.203 (38.666)	lr 0.03112
Warmup Train [17][2770/3239]	Time 0.205 (0.233)	Data 0.001 (0.009)	Loss 4.6804 (4.6903)	Top-1 acc 17.969 (18.464)	Top-5 acc 39.062 (38.666)	lr 0.03111
Warmup Train [17][2780/3239]	Time 0.191 (0.233)	Data 0.001 (0.009)	Loss 4.6047 (4.6903)	Top-1 acc 17.578 (18.464)	Top-5 acc 39.453 (38.669)	lr 0.03111
Warmup Train [17][2790/3239]	Time 0.344 (0.233)	Data 0.001 (0.009)	Loss 4.4803 (4.6902)	Top-1 acc 18.359 (18.460)	Top-5 acc 42.578 (38.668)	lr 0.03110
Warmup Train [17][2800/3239]	Time 0.244 (0.233)	Data 0.001 (0.009)	Loss 4.4715 (4.6901)	Top-1 acc 20.703 (18.459)	Top-5 acc 46.484 (38.671)	lr 0.03110
Warmup Train [17][2810/3239]	Time 0.159 (0.233)	Data 0.001 (0.009)	Loss 4.7765 (4.6901)	Top-1 acc 12.500 (18.462)	Top-5 acc 33.984 (38.672)	lr 0.03109
Warmup Train [17][2820/3239]	Time 0.209 (0.233)	Data 0.001 (0.009)	Loss 4.6635 (4.6899)	Top-1 acc 21.094 (18.468)	Top-5 acc 41.797 (38.679)	lr 0.03108
Warmup Train [17][2830/3239]	Time 0.177 (0.233)	Data 0.002 (0.009)	Loss 4.7232 (4.6897)	Top-1 acc 17.188 (18.469)	Top-5 acc 40.625 (38.680)	lr 0.03108
Warmup Train [17][2840/3239]	Time 0.201 (0.233)	Data 0.001 (0.009)	Loss 4.6996 (4.6898)	Top-1 acc 19.531 (18.469)	Top-5 acc 36.328 (38.678)	lr 0.03107
Warmup Train [17][2850/3239]	Time 0.193 (0.233)	Data 0.001 (0.009)	Loss 4.6155 (4.6898)	Top-1 acc 17.578 (18.467)	Top-5 acc 43.359 (38.677)	lr 0.03107
Warmup Train [17][2860/3239]	Time 0.265 (0.233)	Data 0.001 (0.009)	Loss 4.7636 (4.6895)	Top-1 acc 17.188 (18.473)	Top-5 acc 35.547 (38.685)	lr 0.03106
Warmup Train [17][2870/3239]	Time 0.228 (0.233)	Data 0.001 (0.009)	Loss 4.7814 (4.6893)	Top-1 acc 19.141 (18.477)	Top-5 acc 37.891 (38.690)	lr 0.03105
Warmup Train [17][2880/3239]	Time 0.140 (0.233)	Data 0.001 (0.009)	Loss 4.6492 (4.6893)	Top-1 acc 19.531 (18.480)	Top-5 acc 40.625 (38.691)	lr 0.03105
Warmup Train [17][2890/3239]	Time 0.204 (0.233)	Data 0.001 (0.009)	Loss 4.5037 (4.6892)	Top-1 acc 25.000 (18.482)	Top-5 acc 41.797 (38.691)	lr 0.03104
Warmup Train [17][2900/3239]	Time 0.245 (0.233)	Data 0.001 (0.009)	Loss 4.5399 (4.6891)	Top-1 acc 18.750 (18.482)	Top-5 acc 43.359 (38.694)	lr 0.03104
Warmup Train [17][2910/3239]	Time 0.304 (0.233)	Data 0.001 (0.009)	Loss 4.5152 (4.6889)	Top-1 acc 22.266 (18.486)	Top-5 acc 46.484 (38.703)	lr 0.03103
Warmup Train [17][2920/3239]	Time 0.200 (0.233)	Data 0.001 (0.009)	Loss 4.6706 (4.6890)	Top-1 acc 21.094 (18.487)	Top-5 acc 38.281 (38.700)	lr 0.03102
Warmup Train [17][2930/3239]	Time 0.172 (0.232)	Data 0.001 (0.009)	Loss 4.5896 (4.6889)	Top-1 acc 21.484 (18.490)	Top-5 acc 42.578 (38.704)	lr 0.03102
Warmup Train [17][2940/3239]	Time 0.196 (0.232)	Data 0.001 (0.009)	Loss 4.7181 (4.6888)	Top-1 acc 17.188 (18.492)	Top-5 acc 36.328 (38.706)	lr 0.03101
Warmup Train [17][2950/3239]	Time 0.210 (0.232)	Data 0.001 (0.009)	Loss 4.7329 (4.6886)	Top-1 acc 18.750 (18.493)	Top-5 acc 39.844 (38.710)	lr 0.03101
Warmup Train [17][2960/3239]	Time 0.209 (0.232)	Data 0.001 (0.009)	Loss 4.4800 (4.6884)	Top-1 acc 22.656 (18.494)	Top-5 acc 39.844 (38.708)	lr 0.03100
Warmup Train [17][2970/3239]	Time 0.208 (0.232)	Data 0.002 (0.009)	Loss 4.5860 (4.6884)	Top-1 acc 20.312 (18.495)	Top-5 acc 40.234 (38.706)	lr 0.03100
Warmup Train [17][2980/3239]	Time 0.261 (0.232)	Data 0.002 (0.009)	Loss 4.8342 (4.6884)	Top-1 acc 15.625 (18.496)	Top-5 acc 35.547 (38.705)	lr 0.03099
Warmup Train [17][2990/3239]	Time 0.216 (0.232)	Data 0.002 (0.009)	Loss 4.7120 (4.6886)	Top-1 acc 17.969 (18.496)	Top-5 acc 39.844 (38.701)	lr 0.03098
Warmup Train [17][3000/3239]	Time 0.148 (0.232)	Data 0.001 (0.009)	Loss 4.6158 (4.6885)	Top-1 acc 19.531 (18.494)	Top-5 acc 37.891 (38.697)	lr 0.03098
Warmup Train [17][3010/3239]	Time 0.204 (0.232)	Data 0.001 (0.009)	Loss 4.6828 (4.6885)	Top-1 acc 18.750 (18.495)	Top-5 acc 39.062 (38.699)	lr 0.03097
Warmup Train [17][3020/3239]	Time 0.346 (0.233)	Data 0.001 (0.008)	Loss 4.6886 (4.6884)	Top-1 acc 18.359 (18.498)	Top-5 acc 34.766 (38.698)	lr 0.03097
Warmup Train [17][3030/3239]	Time 0.220 (0.233)	Data 0.002 (0.008)	Loss 4.6823 (4.6884)	Top-1 acc 19.922 (18.497)	Top-5 acc 40.625 (38.699)	lr 0.03096
Warmup Train [17][3040/3239]	Time 0.174 (0.233)	Data 0.001 (0.008)	Loss 4.6415 (4.6885)	Top-1 acc 16.797 (18.492)	Top-5 acc 38.281 (38.696)	lr 0.03095
Warmup Train [17][3050/3239]	Time 0.243 (0.233)	Data 0.001 (0.008)	Loss 4.6878 (4.6884)	Top-1 acc 20.312 (18.493)	Top-5 acc 40.625 (38.698)	lr 0.03095
Warmup Train [17][3060/3239]	Time 0.275 (0.233)	Data 0.002 (0.008)	Loss 4.5449 (4.6884)	Top-1 acc 19.141 (18.491)	Top-5 acc 41.016 (38.701)	lr 0.03094
Warmup Train [17][3070/3239]	Time 0.215 (0.233)	Data 0.002 (0.008)	Loss 4.7612 (4.6883)	Top-1 acc 16.406 (18.495)	Top-5 acc 35.547 (38.700)	lr 0.03094
Warmup Train [17][3080/3239]	Time 0.191 (0.233)	Data 0.002 (0.008)	Loss 4.8137 (4.6882)	Top-1 acc 14.062 (18.495)	Top-5 acc 33.594 (38.705)	lr 0.03093
Warmup Train [17][3090/3239]	Time 0.259 (0.233)	Data 0.001 (0.008)	Loss 4.6210 (4.6882)	Top-1 acc 18.750 (18.496)	Top-5 acc 41.016 (38.705)	lr 0.03092
Warmup Train [17][3100/3239]	Time 0.152 (0.233)	Data 0.002 (0.008)	Loss 4.6359 (4.6880)	Top-1 acc 21.094 (18.500)	Top-5 acc 39.062 (38.709)	lr 0.03092
Warmup Train [17][3110/3239]	Time 0.194 (0.233)	Data 0.001 (0.008)	Loss 4.7017 (4.6879)	Top-1 acc 17.969 (18.503)	Top-5 acc 34.375 (38.711)	lr 0.03091
Warmup Train [17][3120/3239]	Time 0.155 (0.233)	Data 0.001 (0.008)	Loss 4.5564 (4.6878)	Top-1 acc 18.359 (18.500)	Top-5 acc 41.016 (38.712)	lr 0.03091
Warmup Train [17][3130/3239]	Time 0.232 (0.233)	Data 0.001 (0.008)	Loss 4.7081 (4.6877)	Top-1 acc 16.797 (18.503)	Top-5 acc 37.891 (38.712)	lr 0.03090
Warmup Train [17][3140/3239]	Time 0.330 (0.233)	Data 0.001 (0.008)	Loss 4.8747 (4.6876)	Top-1 acc 14.062 (18.502)	Top-5 acc 34.375 (38.715)	lr 0.03090
Warmup Train [17][3150/3239]	Time 0.308 (0.233)	Data 0.002 (0.008)	Loss 4.6853 (4.6875)	Top-1 acc 19.531 (18.503)	Top-5 acc 39.062 (38.718)	lr 0.03089
Warmup Train [17][3160/3239]	Time 0.144 (0.233)	Data 0.001 (0.008)	Loss 4.6588 (4.6874)	Top-1 acc 19.141 (18.505)	Top-5 acc 39.844 (38.717)	lr 0.03088
Warmup Train [17][3170/3239]	Time 0.212 (0.233)	Data 0.001 (0.008)	Loss 4.6448 (4.6873)	Top-1 acc 19.141 (18.511)	Top-5 acc 40.234 (38.721)	lr 0.03088
Warmup Train [17][3180/3239]	Time 0.242 (0.233)	Data 0.000 (0.008)	Loss 4.5289 (4.6873)	Top-1 acc 18.359 (18.511)	Top-5 acc 44.141 (38.721)	lr 0.03087
Warmup Train [17][3190/3239]	Time 0.144 (0.233)	Data 0.000 (0.008)	Loss 4.7218 (4.6872)	Top-1 acc 19.531 (18.515)	Top-5 acc 39.062 (38.722)	lr 0.03087
Warmup Train [17][3200/3239]	Time 0.165 (0.233)	Data 0.000 (0.008)	Loss 4.7222 (4.6871)	Top-1 acc 15.625 (18.516)	Top-5 acc 42.188 (38.728)	lr 0.03086
Warmup Train [17][3210/3239]	Time 0.183 (0.232)	Data 0.000 (0.008)	Loss 4.6238 (4.6871)	Top-1 acc 21.484 (18.516)	Top-5 acc 43.359 (38.732)	lr 0.03085
Warmup Train [17][3220/3239]	Time 0.141 (0.232)	Data 0.000 (0.008)	Loss 4.8651 (4.6871)	Top-1 acc 15.234 (18.517)	Top-5 acc 33.203 (38.733)	lr 0.03085
Warmup Train [17][3230/3239]	Time 0.156 (0.232)	Data 0.000 (0.008)	Loss 4.7886 (4.6869)	Top-1 acc 15.234 (18.521)	Top-5 acc 36.719 (38.739)	lr 0.03084
Warmup Train [17][3239/3239]	Time 0.163 (0.232)	Data 0.000 (0.008)	Loss 4.6907 (4.6869)	Top-1 acc 23.457 (18.524)	Top-5 acc 39.506 (38.739)	lr 0.03084
==========Warmup Valid [17/40]	loss 3.749	top-1 acc 24.964	top-5 acc 48.276	Train top-1 18.524	top-5 38.739	flops: 442.4M
Warmup Train [18][0/3239]	Time 17.556 (17.556)	Data 15.446 (15.446)	Loss 4.5032 (4.5032)	Top-1 acc 19.922 (19.922)	Top-5 acc 41.406 (41.406)	lr 0.03084
Warmup Train [18][10/3239]	Time 0.340 (1.854)	Data 0.002 (1.406)	Loss 4.6407 (4.6074)	Top-1 acc 18.750 (20.206)	Top-5 acc 39.844 (40.838)	lr 0.03083
Warmup Train [18][20/3239]	Time 0.207 (1.084)	Data 0.002 (0.737)	Loss 4.6302 (4.6499)	Top-1 acc 17.188 (19.085)	Top-5 acc 37.891 (39.546)	lr 0.03082
Warmup Train [18][30/3239]	Time 0.225 (0.806)	Data 0.001 (0.500)	Loss 4.6842 (4.6440)	Top-1 acc 19.531 (18.926)	Top-5 acc 40.625 (39.705)	lr 0.03082
Warmup Train [18][40/3239]	Time 0.331 (0.671)	Data 0.001 (0.378)	Loss 4.6513 (4.6417)	Top-1 acc 21.094 (19.207)	Top-5 acc 39.844 (39.882)	lr 0.03081
Warmup Train [18][50/3239]	Time 0.225 (0.587)	Data 0.001 (0.305)	Loss 4.6561 (4.6474)	Top-1 acc 19.922 (18.995)	Top-5 acc 43.359 (39.744)	lr 0.03081
Warmup Train [18][60/3239]	Time 0.164 (0.528)	Data 0.001 (0.255)	Loss 4.5554 (4.6467)	Top-1 acc 20.703 (18.987)	Top-5 acc 42.578 (39.684)	lr 0.03080
Warmup Train [18][70/3239]	Time 0.228 (0.487)	Data 0.001 (0.220)	Loss 4.3583 (4.6435)	Top-1 acc 26.953 (19.119)	Top-5 acc 46.094 (39.712)	lr 0.03079
Warmup Train [18][80/3239]	Time 0.190 (0.457)	Data 0.001 (0.193)	Loss 4.7777 (4.6425)	Top-1 acc 18.750 (19.198)	Top-5 acc 38.281 (39.810)	lr 0.03079
Warmup Train [18][90/3239]	Time 0.267 (0.432)	Data 0.001 (0.173)	Loss 4.4457 (4.6355)	Top-1 acc 20.312 (19.252)	Top-5 acc 41.797 (40.024)	lr 0.03078
Warmup Train [18][100/3239]	Time 0.235 (0.412)	Data 0.002 (0.156)	Loss 4.7720 (4.6383)	Top-1 acc 16.016 (19.156)	Top-5 acc 32.812 (39.898)	lr 0.03078
Warmup Train [18][110/3239]	Time 0.251 (0.395)	Data 0.002 (0.142)	Loss 4.4469 (4.6435)	Top-1 acc 21.875 (19.091)	Top-5 acc 46.094 (39.798)	lr 0.03077
Warmup Train [18][120/3239]	Time 0.191 (0.382)	Data 0.002 (0.131)	Loss 4.7489 (4.6436)	Top-1 acc 19.922 (19.150)	Top-5 acc 36.719 (39.757)	lr 0.03077
Warmup Train [18][130/3239]	Time 0.167 (0.369)	Data 0.002 (0.121)	Loss 4.4887 (4.6437)	Top-1 acc 26.172 (19.182)	Top-5 acc 44.922 (39.716)	lr 0.03076
Warmup Train [18][140/3239]	Time 0.178 (0.360)	Data 0.001 (0.113)	Loss 4.9522 (4.6459)	Top-1 acc 15.234 (19.130)	Top-5 acc 34.375 (39.733)	lr 0.03075
Warmup Train [18][150/3239]	Time 0.132 (0.351)	Data 0.001 (0.106)	Loss 4.8131 (4.6495)	Top-1 acc 12.109 (19.040)	Top-5 acc 34.375 (39.626)	lr 0.03075
Warmup Train [18][160/3239]	Time 0.292 (0.344)	Data 0.001 (0.100)	Loss 4.6147 (4.6486)	Top-1 acc 21.094 (19.082)	Top-5 acc 40.625 (39.589)	lr 0.03074
Warmup Train [18][170/3239]	Time 0.254 (0.337)	Data 0.001 (0.094)	Loss 4.5915 (4.6467)	Top-1 acc 16.797 (19.106)	Top-5 acc 46.094 (39.620)	lr 0.03074
Warmup Train [18][180/3239]	Time 0.143 (0.331)	Data 0.001 (0.089)	Loss 4.7633 (4.6506)	Top-1 acc 16.406 (19.089)	Top-5 acc 39.844 (39.589)	lr 0.03073
Warmup Train [18][190/3239]	Time 0.263 (0.325)	Data 0.001 (0.085)	Loss 4.7411 (4.6471)	Top-1 acc 19.141 (19.122)	Top-5 acc 39.453 (39.629)	lr 0.03072
Warmup Train [18][200/3239]	Time 0.248 (0.321)	Data 0.001 (0.081)	Loss 4.7852 (4.6452)	Top-1 acc 19.141 (19.156)	Top-5 acc 38.281 (39.682)	lr 0.03072
Warmup Train [18][210/3239]	Time 0.261 (0.316)	Data 0.001 (0.077)	Loss 4.6490 (4.6472)	Top-1 acc 16.406 (19.109)	Top-5 acc 39.844 (39.660)	lr 0.03071
Warmup Train [18][220/3239]	Time 0.130 (0.312)	Data 0.001 (0.074)	Loss 4.8702 (4.6495)	Top-1 acc 14.062 (19.104)	Top-5 acc 35.156 (39.656)	lr 0.03071
Warmup Train [18][230/3239]	Time 0.235 (0.309)	Data 0.001 (0.071)	Loss 4.7192 (4.6513)	Top-1 acc 18.750 (19.080)	Top-5 acc 37.500 (39.585)	lr 0.03070
Warmup Train [18][240/3239]	Time 0.182 (0.305)	Data 0.001 (0.068)	Loss 4.4984 (4.6511)	Top-1 acc 16.406 (19.084)	Top-5 acc 40.625 (39.601)	lr 0.03069
Warmup Train [18][250/3239]	Time 0.367 (0.302)	Data 0.002 (0.065)	Loss 4.5503 (4.6495)	Top-1 acc 21.875 (19.085)	Top-5 acc 39.844 (39.627)	lr 0.03069
Warmup Train [18][260/3239]	Time 0.153 (0.299)	Data 0.001 (0.063)	Loss 4.5897 (4.6483)	Top-1 acc 18.359 (19.085)	Top-5 acc 41.016 (39.661)	lr 0.03068
Warmup Train [18][270/3239]	Time 0.223 (0.297)	Data 0.001 (0.061)	Loss 4.7090 (4.6489)	Top-1 acc 16.797 (19.077)	Top-5 acc 40.234 (39.646)	lr 0.03068
Warmup Train [18][280/3239]	Time 0.270 (0.294)	Data 0.001 (0.059)	Loss 4.8006 (4.6510)	Top-1 acc 17.578 (19.054)	Top-5 acc 35.938 (39.560)	lr 0.03067
Warmup Train [18][290/3239]	Time 0.281 (0.292)	Data 0.001 (0.057)	Loss 4.5370 (4.6512)	Top-1 acc 22.656 (19.057)	Top-5 acc 41.406 (39.546)	lr 0.03067
Warmup Train [18][300/3239]	Time 0.215 (0.290)	Data 0.001 (0.055)	Loss 4.6557 (4.6520)	Top-1 acc 19.141 (19.074)	Top-5 acc 37.109 (39.480)	lr 0.03066
Warmup Train [18][310/3239]	Time 0.197 (0.288)	Data 0.001 (0.053)	Loss 4.6386 (4.6504)	Top-1 acc 21.484 (19.142)	Top-5 acc 40.625 (39.540)	lr 0.03065
Warmup Train [18][320/3239]	Time 0.247 (0.286)	Data 0.002 (0.052)	Loss 4.8315 (4.6506)	Top-1 acc 17.188 (19.120)	Top-5 acc 35.938 (39.535)	lr 0.03065
Warmup Train [18][330/3239]	Time 0.240 (0.284)	Data 0.003 (0.050)	Loss 4.6536 (4.6507)	Top-1 acc 19.531 (19.104)	Top-5 acc 42.188 (39.552)	lr 0.03064
Warmup Train [18][340/3239]	Time 0.211 (0.283)	Data 0.002 (0.049)	Loss 4.6628 (4.6501)	Top-1 acc 19.141 (19.093)	Top-5 acc 41.406 (39.557)	lr 0.03064
Warmup Train [18][350/3239]	Time 0.220 (0.281)	Data 0.002 (0.048)	Loss 4.8608 (4.6510)	Top-1 acc 17.188 (19.081)	Top-5 acc 34.766 (39.533)	lr 0.03063
Warmup Train [18][360/3239]	Time 0.312 (0.280)	Data 0.001 (0.046)	Loss 4.7667 (4.6507)	Top-1 acc 17.188 (19.083)	Top-5 acc 35.156 (39.532)	lr 0.03062
Warmup Train [18][370/3239]	Time 0.217 (0.279)	Data 0.001 (0.045)	Loss 4.7380 (4.6512)	Top-1 acc 16.406 (19.062)	Top-5 acc 37.891 (39.522)	lr 0.03062
Warmup Train [18][380/3239]	Time 0.193 (0.277)	Data 0.001 (0.044)	Loss 4.6057 (4.6504)	Top-1 acc 21.875 (19.075)	Top-5 acc 42.969 (39.574)	lr 0.03061
Warmup Train [18][390/3239]	Time 0.224 (0.276)	Data 0.001 (0.043)	Loss 4.6139 (4.6505)	Top-1 acc 19.141 (19.063)	Top-5 acc 41.406 (39.561)	lr 0.03061
Warmup Train [18][400/3239]	Time 0.201 (0.274)	Data 0.001 (0.042)	Loss 4.5293 (4.6512)	Top-1 acc 21.875 (19.057)	Top-5 acc 44.141 (39.559)	lr 0.03060
Warmup Train [18][410/3239]	Time 0.253 (0.274)	Data 0.001 (0.041)	Loss 4.6310 (4.6509)	Top-1 acc 17.969 (19.045)	Top-5 acc 39.062 (39.576)	lr 0.03059
Warmup Train [18][420/3239]	Time 0.189 (0.272)	Data 0.002 (0.040)	Loss 4.6896 (4.6511)	Top-1 acc 18.750 (19.036)	Top-5 acc 36.719 (39.553)	lr 0.03059
Warmup Train [18][430/3239]	Time 0.249 (0.271)	Data 0.001 (0.039)	Loss 4.5505 (4.6503)	Top-1 acc 21.875 (19.056)	Top-5 acc 40.625 (39.561)	lr 0.03058
Warmup Train [18][440/3239]	Time 0.189 (0.270)	Data 0.001 (0.038)	Loss 4.6518 (4.6504)	Top-1 acc 19.531 (19.061)	Top-5 acc 39.453 (39.560)	lr 0.03058
Warmup Train [18][450/3239]	Time 0.185 (0.269)	Data 0.001 (0.038)	Loss 4.7722 (4.6499)	Top-1 acc 16.406 (19.075)	Top-5 acc 35.938 (39.573)	lr 0.03057
Warmup Train [18][460/3239]	Time 0.324 (0.268)	Data 0.001 (0.037)	Loss 4.4877 (4.6498)	Top-1 acc 19.922 (19.095)	Top-5 acc 41.406 (39.568)	lr 0.03056
Warmup Train [18][470/3239]	Time 0.285 (0.268)	Data 0.002 (0.036)	Loss 4.7239 (4.6493)	Top-1 acc 21.094 (19.107)	Top-5 acc 42.578 (39.582)	lr 0.03056
Warmup Train [18][480/3239]	Time 0.233 (0.267)	Data 0.001 (0.035)	Loss 4.6136 (4.6499)	Top-1 acc 20.312 (19.094)	Top-5 acc 39.062 (39.550)	lr 0.03055
Warmup Train [18][490/3239]	Time 0.193 (0.266)	Data 0.001 (0.035)	Loss 4.6606 (4.6506)	Top-1 acc 19.922 (19.093)	Top-5 acc 40.625 (39.520)	lr 0.03055
Warmup Train [18][500/3239]	Time 0.207 (0.265)	Data 0.002 (0.034)	Loss 4.6620 (4.6503)	Top-1 acc 21.875 (19.104)	Top-5 acc 36.719 (39.512)	lr 0.03054
Warmup Train [18][510/3239]	Time 0.181 (0.264)	Data 0.002 (0.033)	Loss 4.5730 (4.6496)	Top-1 acc 17.188 (19.116)	Top-5 acc 39.453 (39.535)	lr 0.03054
Warmup Train [18][520/3239]	Time 0.150 (0.263)	Data 0.001 (0.033)	Loss 4.6232 (4.6514)	Top-1 acc 21.484 (19.091)	Top-5 acc 40.625 (39.508)	lr 0.03053
Warmup Train [18][530/3239]	Time 0.172 (0.262)	Data 0.002 (0.032)	Loss 4.6219 (4.6520)	Top-1 acc 17.188 (19.078)	Top-5 acc 40.625 (39.514)	lr 0.03052
Warmup Train [18][540/3239]	Time 0.229 (0.262)	Data 0.002 (0.032)	Loss 4.5188 (4.6514)	Top-1 acc 23.047 (19.100)	Top-5 acc 45.703 (39.535)	lr 0.03052
Warmup Train [18][550/3239]	Time 0.252 (0.261)	Data 0.001 (0.031)	Loss 4.5367 (4.6510)	Top-1 acc 19.141 (19.097)	Top-5 acc 41.797 (39.540)	lr 0.03051
Warmup Train [18][560/3239]	Time 0.230 (0.260)	Data 0.002 (0.031)	Loss 4.6389 (4.6514)	Top-1 acc 18.359 (19.121)	Top-5 acc 38.672 (39.562)	lr 0.03051
Warmup Train [18][570/3239]	Time 0.201 (0.260)	Data 0.001 (0.030)	Loss 4.7501 (4.6511)	Top-1 acc 17.969 (19.137)	Top-5 acc 36.328 (39.568)	lr 0.03050
Warmup Train [18][580/3239]	Time 0.297 (0.260)	Data 0.001 (0.030)	Loss 4.5517 (4.6498)	Top-1 acc 20.312 (19.166)	Top-5 acc 42.188 (39.602)	lr 0.03049
Warmup Train [18][590/3239]	Time 0.230 (0.259)	Data 0.001 (0.029)	Loss 4.7034 (4.6498)	Top-1 acc 16.797 (19.157)	Top-5 acc 38.281 (39.603)	lr 0.03049
Warmup Train [18][600/3239]	Time 0.210 (0.259)	Data 0.001 (0.029)	Loss 4.8631 (4.6505)	Top-1 acc 10.938 (19.117)	Top-5 acc 28.516 (39.571)	lr 0.03048
Warmup Train [18][610/3239]	Time 0.161 (0.258)	Data 0.002 (0.028)	Loss 4.7025 (4.6493)	Top-1 acc 20.703 (19.157)	Top-5 acc 39.453 (39.601)	lr 0.03048
Warmup Train [18][620/3239]	Time 0.226 (0.258)	Data 0.001 (0.028)	Loss 4.4085 (4.6491)	Top-1 acc 24.219 (19.170)	Top-5 acc 45.312 (39.622)	lr 0.03047
Warmup Train [18][630/3239]	Time 0.180 (0.258)	Data 0.001 (0.028)	Loss 4.6774 (4.6487)	Top-1 acc 17.578 (19.162)	Top-5 acc 37.891 (39.623)	lr 0.03046
Warmup Train [18][640/3239]	Time 0.223 (0.257)	Data 0.002 (0.027)	Loss 4.7544 (4.6492)	Top-1 acc 16.406 (19.158)	Top-5 acc 37.500 (39.618)	lr 0.03046
Warmup Train [18][650/3239]	Time 0.227 (0.257)	Data 0.001 (0.027)	Loss 4.7995 (4.6486)	Top-1 acc 17.578 (19.179)	Top-5 acc 34.766 (39.631)	lr 0.03045
Warmup Train [18][660/3239]	Time 0.199 (0.256)	Data 0.001 (0.027)	Loss 4.5601 (4.6491)	Top-1 acc 21.484 (19.192)	Top-5 acc 42.188 (39.639)	lr 0.03045
Warmup Train [18][670/3239]	Time 0.215 (0.256)	Data 0.001 (0.026)	Loss 4.5004 (4.6486)	Top-1 acc 22.266 (19.226)	Top-5 acc 44.141 (39.652)	lr 0.03044
Warmup Train [18][680/3239]	Time 0.293 (0.255)	Data 0.001 (0.026)	Loss 4.5477 (4.6485)	Top-1 acc 21.094 (19.224)	Top-5 acc 42.578 (39.655)	lr 0.03043
Warmup Train [18][690/3239]	Time 0.316 (0.255)	Data 0.002 (0.025)	Loss 4.6024 (4.6475)	Top-1 acc 19.922 (19.225)	Top-5 acc 40.625 (39.679)	lr 0.03043
Warmup Train [18][700/3239]	Time 0.135 (0.254)	Data 0.001 (0.025)	Loss 4.5010 (4.6476)	Top-1 acc 24.609 (19.232)	Top-5 acc 44.922 (39.680)	lr 0.03042
Warmup Train [18][710/3239]	Time 0.197 (0.254)	Data 0.002 (0.025)	Loss 4.5390 (4.6476)	Top-1 acc 25.000 (19.251)	Top-5 acc 40.625 (39.673)	lr 0.03042
Warmup Train [18][720/3239]	Time 0.212 (0.254)	Data 0.001 (0.025)	Loss 4.5787 (4.6467)	Top-1 acc 20.312 (19.254)	Top-5 acc 42.969 (39.705)	lr 0.03041
Warmup Train [18][730/3239]	Time 0.168 (0.253)	Data 0.001 (0.024)	Loss 4.4799 (4.6463)	Top-1 acc 23.828 (19.258)	Top-5 acc 42.188 (39.709)	lr 0.03041
Warmup Train [18][740/3239]	Time 0.222 (0.253)	Data 0.003 (0.024)	Loss 4.8359 (4.6466)	Top-1 acc 15.625 (19.248)	Top-5 acc 33.984 (39.695)	lr 0.03040
Warmup Train [18][750/3239]	Time 0.260 (0.253)	Data 0.001 (0.024)	Loss 4.5116 (4.6461)	Top-1 acc 21.094 (19.250)	Top-5 acc 42.188 (39.712)	lr 0.03039
Warmup Train [18][760/3239]	Time 0.224 (0.252)	Data 0.001 (0.023)	Loss 4.7112 (4.6454)	Top-1 acc 20.312 (19.271)	Top-5 acc 37.109 (39.718)	lr 0.03039
Warmup Train [18][770/3239]	Time 0.141 (0.252)	Data 0.001 (0.023)	Loss 4.6041 (4.6448)	Top-1 acc 23.438 (19.281)	Top-5 acc 40.625 (39.726)	lr 0.03038
Warmup Train [18][780/3239]	Time 0.224 (0.252)	Data 0.001 (0.023)	Loss 4.6998 (4.6448)	Top-1 acc 13.672 (19.278)	Top-5 acc 37.500 (39.721)	lr 0.03038
Warmup Train [18][790/3239]	Time 0.260 (0.251)	Data 0.002 (0.023)	Loss 4.7231 (4.6454)	Top-1 acc 19.922 (19.262)	Top-5 acc 38.281 (39.727)	lr 0.03037
Warmup Train [18][800/3239]	Time 0.298 (0.251)	Data 0.002 (0.022)	Loss 4.6410 (4.6448)	Top-1 acc 18.359 (19.258)	Top-5 acc 39.453 (39.736)	lr 0.03036
Warmup Train [18][810/3239]	Time 0.242 (0.251)	Data 0.002 (0.022)	Loss 4.6527 (4.6441)	Top-1 acc 16.797 (19.262)	Top-5 acc 39.844 (39.760)	lr 0.03036
Warmup Train [18][820/3239]	Time 0.173 (0.251)	Data 0.001 (0.022)	Loss 4.6850 (4.6441)	Top-1 acc 18.359 (19.270)	Top-5 acc 39.453 (39.757)	lr 0.03035
Warmup Train [18][830/3239]	Time 0.188 (0.250)	Data 0.001 (0.022)	Loss 4.5487 (4.6440)	Top-1 acc 20.703 (19.258)	Top-5 acc 41.016 (39.758)	lr 0.03035
Warmup Train [18][840/3239]	Time 0.194 (0.250)	Data 0.001 (0.022)	Loss 4.6521 (4.6442)	Top-1 acc 21.875 (19.251)	Top-5 acc 42.188 (39.762)	lr 0.03034
Warmup Train [18][850/3239]	Time 0.253 (0.250)	Data 0.001 (0.021)	Loss 4.8208 (4.6444)	Top-1 acc 17.578 (19.240)	Top-5 acc 36.328 (39.757)	lr 0.03033
Warmup Train [18][860/3239]	Time 0.246 (0.249)	Data 0.001 (0.021)	Loss 4.7873 (4.6448)	Top-1 acc 13.672 (19.223)	Top-5 acc 34.375 (39.745)	lr 0.03033
Warmup Train [18][870/3239]	Time 0.222 (0.249)	Data 0.001 (0.021)	Loss 4.5743 (4.6450)	Top-1 acc 21.094 (19.219)	Top-5 acc 40.234 (39.734)	lr 0.03032
Warmup Train [18][880/3239]	Time 0.170 (0.249)	Data 0.001 (0.021)	Loss 4.7468 (4.6454)	Top-1 acc 18.359 (19.217)	Top-5 acc 36.328 (39.724)	lr 0.03032
Warmup Train [18][890/3239]	Time 0.202 (0.249)	Data 0.002 (0.021)	Loss 4.6714 (4.6451)	Top-1 acc 19.531 (19.216)	Top-5 acc 39.844 (39.714)	lr 0.03031
Warmup Train [18][900/3239]	Time 0.175 (0.249)	Data 0.001 (0.020)	Loss 4.8093 (4.6454)	Top-1 acc 14.844 (19.222)	Top-5 acc 37.500 (39.718)	lr 0.03030
Warmup Train [18][910/3239]	Time 0.222 (0.248)	Data 0.001 (0.020)	Loss 4.6107 (4.6451)	Top-1 acc 23.047 (19.230)	Top-5 acc 42.578 (39.724)	lr 0.03030
Warmup Train [18][920/3239]	Time 0.186 (0.248)	Data 0.001 (0.020)	Loss 4.7066 (4.6446)	Top-1 acc 20.703 (19.248)	Top-5 acc 42.188 (39.740)	lr 0.03029
Warmup Train [18][930/3239]	Time 0.193 (0.248)	Data 0.002 (0.020)	Loss 4.6778 (4.6449)	Top-1 acc 16.797 (19.245)	Top-5 acc 40.234 (39.734)	lr 0.03029
Warmup Train [18][940/3239]	Time 0.233 (0.248)	Data 0.001 (0.020)	Loss 4.4715 (4.6450)	Top-1 acc 23.047 (19.249)	Top-5 acc 45.703 (39.725)	lr 0.03028
Warmup Train [18][950/3239]	Time 0.229 (0.247)	Data 0.001 (0.020)	Loss 4.7172 (4.6453)	Top-1 acc 18.750 (19.249)	Top-5 acc 37.109 (39.725)	lr 0.03027
Warmup Train [18][960/3239]	Time 0.231 (0.247)	Data 0.001 (0.019)	Loss 4.6704 (4.6450)	Top-1 acc 17.969 (19.243)	Top-5 acc 38.672 (39.740)	lr 0.03027
Warmup Train [18][970/3239]	Time 0.193 (0.247)	Data 0.002 (0.019)	Loss 4.7598 (4.6452)	Top-1 acc 16.016 (19.235)	Top-5 acc 40.234 (39.756)	lr 0.03026
Warmup Train [18][980/3239]	Time 0.159 (0.247)	Data 0.001 (0.019)	Loss 4.5378 (4.6450)	Top-1 acc 19.922 (19.227)	Top-5 acc 46.094 (39.760)	lr 0.03026
Warmup Train [18][990/3239]	Time 0.157 (0.247)	Data 0.001 (0.019)	Loss 4.6285 (4.6444)	Top-1 acc 19.141 (19.237)	Top-5 acc 37.109 (39.780)	lr 0.03025
Warmup Train [18][1000/3239]	Time 0.305 (0.246)	Data 0.003 (0.019)	Loss 4.9087 (4.6443)	Top-1 acc 17.578 (19.227)	Top-5 acc 35.547 (39.781)	lr 0.03025
Warmup Train [18][1010/3239]	Time 0.216 (0.246)	Data 0.001 (0.019)	Loss 4.7109 (4.6438)	Top-1 acc 16.406 (19.226)	Top-5 acc 35.938 (39.787)	lr 0.03024
Warmup Train [18][1020/3239]	Time 0.221 (0.246)	Data 0.001 (0.018)	Loss 4.6393 (4.6437)	Top-1 acc 19.922 (19.229)	Top-5 acc 39.453 (39.788)	lr 0.03023
Warmup Train [18][1030/3239]	Time 0.206 (0.246)	Data 0.001 (0.018)	Loss 4.5831 (4.6438)	Top-1 acc 20.312 (19.225)	Top-5 acc 40.234 (39.783)	lr 0.03023
Warmup Train [18][1040/3239]	Time 0.196 (0.246)	Data 0.001 (0.018)	Loss 4.7007 (4.6438)	Top-1 acc 16.406 (19.211)	Top-5 acc 34.375 (39.775)	lr 0.03022
Warmup Train [18][1050/3239]	Time 0.199 (0.245)	Data 0.001 (0.018)	Loss 4.5521 (4.6435)	Top-1 acc 23.438 (19.221)	Top-5 acc 42.578 (39.776)	lr 0.03022
Warmup Train [18][1060/3239]	Time 0.253 (0.245)	Data 0.001 (0.018)	Loss 4.6554 (4.6431)	Top-1 acc 21.484 (19.225)	Top-5 acc 42.969 (39.786)	lr 0.03021
Warmup Train [18][1070/3239]	Time 0.234 (0.245)	Data 0.001 (0.018)	Loss 4.8820 (4.6432)	Top-1 acc 15.234 (19.221)	Top-5 acc 32.031 (39.773)	lr 0.03020
Warmup Train [18][1080/3239]	Time 0.179 (0.245)	Data 0.002 (0.018)	Loss 4.6499 (4.6434)	Top-1 acc 20.703 (19.229)	Top-5 acc 39.062 (39.771)	lr 0.03020
Warmup Train [18][1090/3239]	Time 0.198 (0.245)	Data 0.001 (0.017)	Loss 4.7326 (4.6438)	Top-1 acc 17.969 (19.216)	Top-5 acc 40.234 (39.758)	lr 0.03019
Warmup Train [18][1100/3239]	Time 0.404 (0.244)	Data 0.002 (0.017)	Loss 4.5793 (4.6444)	Top-1 acc 19.531 (19.204)	Top-5 acc 42.969 (39.745)	lr 0.03019
Warmup Train [18][1110/3239]	Time 0.219 (0.244)	Data 0.001 (0.017)	Loss 4.7075 (4.6438)	Top-1 acc 20.312 (19.216)	Top-5 acc 38.672 (39.761)	lr 0.03018
Warmup Train [18][1120/3239]	Time 0.240 (0.244)	Data 0.001 (0.017)	Loss 4.5786 (4.6433)	Top-1 acc 18.359 (19.222)	Top-5 acc 40.625 (39.775)	lr 0.03017
Warmup Train [18][1130/3239]	Time 0.223 (0.244)	Data 0.001 (0.017)	Loss 4.5807 (4.6427)	Top-1 acc 20.703 (19.236)	Top-5 acc 40.625 (39.781)	lr 0.03017
Warmup Train [18][1140/3239]	Time 0.189 (0.244)	Data 0.002 (0.017)	Loss 4.7660 (4.6426)	Top-1 acc 18.750 (19.242)	Top-5 acc 32.812 (39.778)	lr 0.03016
Warmup Train [18][1150/3239]	Time 0.221 (0.244)	Data 0.001 (0.017)	Loss 4.4849 (4.6423)	Top-1 acc 22.266 (19.248)	Top-5 acc 44.531 (39.793)	lr 0.03016
Warmup Train [18][1160/3239]	Time 0.249 (0.243)	Data 0.002 (0.017)	Loss 4.8689 (4.6418)	Top-1 acc 16.406 (19.262)	Top-5 acc 35.938 (39.817)	lr 0.03015
Warmup Train [18][1170/3239]	Time 0.195 (0.243)	Data 0.001 (0.017)	Loss 4.6527 (4.6419)	Top-1 acc 17.969 (19.259)	Top-5 acc 37.109 (39.813)	lr 0.03014
Warmup Train [18][1180/3239]	Time 0.186 (0.243)	Data 0.001 (0.016)	Loss 4.7605 (4.6419)	Top-1 acc 16.797 (19.259)	Top-5 acc 35.156 (39.805)	lr 0.03014
Warmup Train [18][1190/3239]	Time 0.181 (0.243)	Data 0.002 (0.016)	Loss 4.6571 (4.6419)	Top-1 acc 22.266 (19.264)	Top-5 acc 37.891 (39.807)	lr 0.03013
Warmup Train [18][1200/3239]	Time 0.239 (0.243)	Data 0.004 (0.016)	Loss 4.6064 (4.6422)	Top-1 acc 24.609 (19.263)	Top-5 acc 42.969 (39.807)	lr 0.03013
Warmup Train [18][1210/3239]	Time 0.379 (0.243)	Data 0.002 (0.016)	Loss 4.5269 (4.6419)	Top-1 acc 21.875 (19.271)	Top-5 acc 44.922 (39.817)	lr 0.03012
Warmup Train [18][1220/3239]	Time 0.239 (0.243)	Data 0.001 (0.016)	Loss 4.5123 (4.6411)	Top-1 acc 21.094 (19.271)	Top-5 acc 44.531 (39.833)	lr 0.03011
Warmup Train [18][1230/3239]	Time 0.205 (0.242)	Data 0.003 (0.016)	Loss 4.6068 (4.6408)	Top-1 acc 19.531 (19.278)	Top-5 acc 41.406 (39.841)	lr 0.03011
Warmup Train [18][1240/3239]	Time 0.203 (0.242)	Data 0.001 (0.016)	Loss 4.7213 (4.6412)	Top-1 acc 15.234 (19.265)	Top-5 acc 37.109 (39.830)	lr 0.03010
Warmup Train [18][1250/3239]	Time 0.212 (0.242)	Data 0.001 (0.016)	Loss 4.6293 (4.6412)	Top-1 acc 16.797 (19.266)	Top-5 acc 42.578 (39.836)	lr 0.03010
Warmup Train [18][1260/3239]	Time 0.216 (0.242)	Data 0.001 (0.015)	Loss 4.6218 (4.6410)	Top-1 acc 22.266 (19.261)	Top-5 acc 38.281 (39.837)	lr 0.03009
Warmup Train [18][1270/3239]	Time 0.230 (0.242)	Data 0.001 (0.015)	Loss 4.6174 (4.6410)	Top-1 acc 17.578 (19.255)	Top-5 acc 37.109 (39.839)	lr 0.03009
Warmup Train [18][1280/3239]	Time 0.221 (0.242)	Data 0.001 (0.015)	Loss 4.4977 (4.6411)	Top-1 acc 25.000 (19.259)	Top-5 acc 45.312 (39.841)	lr 0.03008
Warmup Train [18][1290/3239]	Time 0.253 (0.242)	Data 0.001 (0.015)	Loss 4.4442 (4.6406)	Top-1 acc 22.656 (19.268)	Top-5 acc 45.312 (39.858)	lr 0.03007
Warmup Train [18][1300/3239]	Time 0.292 (0.241)	Data 0.002 (0.015)	Loss 4.7610 (4.6409)	Top-1 acc 14.453 (19.267)	Top-5 acc 35.547 (39.857)	lr 0.03007
Warmup Train [18][1310/3239]	Time 0.300 (0.241)	Data 0.002 (0.015)	Loss 4.7504 (4.6408)	Top-1 acc 19.531 (19.270)	Top-5 acc 33.594 (39.853)	lr 0.03006
Warmup Train [18][1320/3239]	Time 0.229 (0.241)	Data 0.002 (0.015)	Loss 4.8018 (4.6405)	Top-1 acc 15.625 (19.271)	Top-5 acc 35.938 (39.860)	lr 0.03006
Warmup Train [18][1330/3239]	Time 0.223 (0.241)	Data 0.001 (0.015)	Loss 4.6287 (4.6399)	Top-1 acc 18.750 (19.282)	Top-5 acc 40.234 (39.873)	lr 0.03005
Warmup Train [18][1340/3239]	Time 0.237 (0.241)	Data 0.001 (0.015)	Loss 4.5350 (4.6400)	Top-1 acc 19.531 (19.284)	Top-5 acc 44.922 (39.871)	lr 0.03004
Warmup Train [18][1350/3239]	Time 0.174 (0.241)	Data 0.001 (0.015)	Loss 4.6422 (4.6400)	Top-1 acc 19.141 (19.283)	Top-5 acc 37.109 (39.865)	lr 0.03004
Warmup Train [18][1360/3239]	Time 0.308 (0.241)	Data 0.001 (0.015)	Loss 4.5939 (4.6401)	Top-1 acc 19.141 (19.281)	Top-5 acc 40.625 (39.864)	lr 0.03003
Warmup Train [18][1370/3239]	Time 0.226 (0.241)	Data 0.001 (0.014)	Loss 4.6245 (4.6398)	Top-1 acc 20.312 (19.274)	Top-5 acc 40.625 (39.872)	lr 0.03003
Warmup Train [18][1380/3239]	Time 0.200 (0.241)	Data 0.001 (0.014)	Loss 4.6674 (4.6399)	Top-1 acc 16.797 (19.266)	Top-5 acc 39.453 (39.871)	lr 0.03002
Warmup Train [18][1390/3239]	Time 0.181 (0.241)	Data 0.001 (0.014)	Loss 4.8056 (4.6400)	Top-1 acc 17.188 (19.265)	Top-5 acc 33.984 (39.866)	lr 0.03001
Warmup Train [18][1400/3239]	Time 0.193 (0.240)	Data 0.001 (0.014)	Loss 4.3029 (4.6396)	Top-1 acc 25.781 (19.270)	Top-5 acc 47.656 (39.875)	lr 0.03001
Warmup Train [18][1410/3239]	Time 0.263 (0.240)	Data 0.002 (0.014)	Loss 4.7108 (4.6398)	Top-1 acc 16.406 (19.266)	Top-5 acc 40.234 (39.873)	lr 0.03000
Warmup Train [18][1420/3239]	Time 0.196 (0.240)	Data 0.002 (0.014)	Loss 4.4622 (4.6393)	Top-1 acc 21.094 (19.275)	Top-5 acc 42.969 (39.881)	lr 0.03000
Warmup Train [18][1430/3239]	Time 0.158 (0.240)	Data 0.001 (0.014)	Loss 4.6333 (4.6394)	Top-1 acc 18.359 (19.268)	Top-5 acc 38.672 (39.881)	lr 0.02999
Warmup Train [18][1440/3239]	Time 0.191 (0.240)	Data 0.001 (0.014)	Loss 4.5716 (4.6391)	Top-1 acc 19.141 (19.277)	Top-5 acc 38.281 (39.878)	lr 0.02998
Warmup Train [18][1450/3239]	Time 0.221 (0.240)	Data 0.001 (0.014)	Loss 4.5516 (4.6388)	Top-1 acc 17.969 (19.281)	Top-5 acc 38.672 (39.884)	lr 0.02998
Warmup Train [18][1460/3239]	Time 0.229 (0.240)	Data 0.001 (0.014)	Loss 4.8954 (4.6385)	Top-1 acc 13.672 (19.284)	Top-5 acc 32.031 (39.889)	lr 0.02997
Warmup Train [18][1470/3239]	Time 0.175 (0.240)	Data 0.002 (0.014)	Loss 4.7570 (4.6385)	Top-1 acc 16.406 (19.283)	Top-5 acc 39.062 (39.888)	lr 0.02997
Warmup Train [18][1480/3239]	Time 0.256 (0.240)	Data 0.001 (0.014)	Loss 4.5685 (4.6384)	Top-1 acc 20.312 (19.286)	Top-5 acc 43.750 (39.889)	lr 0.02996
Warmup Train [18][1490/3239]	Time 0.200 (0.240)	Data 0.001 (0.014)	Loss 4.7170 (4.6384)	Top-1 acc 18.750 (19.289)	Top-5 acc 35.938 (39.890)	lr 0.02995
Warmup Train [18][1500/3239]	Time 0.266 (0.240)	Data 0.002 (0.013)	Loss 4.7403 (4.6380)	Top-1 acc 16.797 (19.290)	Top-5 acc 40.625 (39.895)	lr 0.02995
Warmup Train [18][1510/3239]	Time 0.175 (0.239)	Data 0.001 (0.013)	Loss 4.5911 (4.6379)	Top-1 acc 17.578 (19.292)	Top-5 acc 40.625 (39.901)	lr 0.02994
Warmup Train [18][1520/3239]	Time 0.246 (0.239)	Data 0.001 (0.013)	Loss 4.6585 (4.6380)	Top-1 acc 18.750 (19.293)	Top-5 acc 38.281 (39.898)	lr 0.02994
Warmup Train [18][1530/3239]	Time 0.281 (0.239)	Data 0.001 (0.013)	Loss 4.5998 (4.6378)	Top-1 acc 21.094 (19.293)	Top-5 acc 42.188 (39.900)	lr 0.02993
Warmup Train [18][1540/3239]	Time 0.203 (0.239)	Data 0.003 (0.013)	Loss 4.5471 (4.6377)	Top-1 acc 20.312 (19.300)	Top-5 acc 40.625 (39.900)	lr 0.02992
Warmup Train [18][1550/3239]	Time 0.153 (0.239)	Data 0.001 (0.013)	Loss 4.3608 (4.6372)	Top-1 acc 20.703 (19.305)	Top-5 acc 46.484 (39.914)	lr 0.02992
Warmup Train [18][1560/3239]	Time 0.204 (0.239)	Data 0.001 (0.013)	Loss 4.6381 (4.6375)	Top-1 acc 17.188 (19.293)	Top-5 acc 39.062 (39.902)	lr 0.02991
Warmup Train [18][1570/3239]	Time 0.132 (0.239)	Data 0.001 (0.013)	Loss 4.7496 (4.6372)	Top-1 acc 20.703 (19.298)	Top-5 acc 40.234 (39.915)	lr 0.02991
Warmup Train [18][1580/3239]	Time 0.255 (0.239)	Data 0.002 (0.013)	Loss 4.5275 (4.6369)	Top-1 acc 19.922 (19.300)	Top-5 acc 42.188 (39.923)	lr 0.02990
Warmup Train [18][1590/3239]	Time 0.210 (0.239)	Data 0.001 (0.013)	Loss 4.5276 (4.6367)	Top-1 acc 22.266 (19.304)	Top-5 acc 38.672 (39.926)	lr 0.02990
Warmup Train [18][1600/3239]	Time 0.192 (0.239)	Data 0.001 (0.013)	Loss 4.6683 (4.6364)	Top-1 acc 15.625 (19.314)	Top-5 acc 38.281 (39.932)	lr 0.02989
Warmup Train [18][1610/3239]	Time 0.194 (0.238)	Data 0.001 (0.013)	Loss 4.6935 (4.6364)	Top-1 acc 19.922 (19.304)	Top-5 acc 40.625 (39.929)	lr 0.02988
Warmup Train [18][1620/3239]	Time 0.283 (0.239)	Data 0.001 (0.013)	Loss 4.7355 (4.6366)	Top-1 acc 19.922 (19.307)	Top-5 acc 41.016 (39.929)	lr 0.02988
Warmup Train [18][1630/3239]	Time 0.290 (0.238)	Data 0.002 (0.013)	Loss 4.7088 (4.6365)	Top-1 acc 17.578 (19.298)	Top-5 acc 34.375 (39.924)	lr 0.02987
Warmup Train [18][1640/3239]	Time 0.354 (0.238)	Data 0.003 (0.013)	Loss 4.8196 (4.6365)	Top-1 acc 18.359 (19.299)	Top-5 acc 39.062 (39.919)	lr 0.02987
Warmup Train [18][1650/3239]	Time 0.211 (0.238)	Data 0.002 (0.013)	Loss 4.6473 (4.6364)	Top-1 acc 17.969 (19.302)	Top-5 acc 41.406 (39.924)	lr 0.02986
Warmup Train [18][1660/3239]	Time 0.276 (0.238)	Data 0.002 (0.012)	Loss 4.6179 (4.6365)	Top-1 acc 21.875 (19.295)	Top-5 acc 41.016 (39.922)	lr 0.02985
Warmup Train [18][1670/3239]	Time 0.247 (0.238)	Data 0.001 (0.012)	Loss 4.6073 (4.6364)	Top-1 acc 19.141 (19.290)	Top-5 acc 42.578 (39.929)	lr 0.02985
Warmup Train [18][1680/3239]	Time 0.227 (0.238)	Data 0.001 (0.012)	Loss 4.6428 (4.6362)	Top-1 acc 20.703 (19.282)	Top-5 acc 39.844 (39.931)	lr 0.02984
Warmup Train [18][1690/3239]	Time 0.175 (0.238)	Data 0.002 (0.012)	Loss 4.7216 (4.6366)	Top-1 acc 20.312 (19.286)	Top-5 acc 39.062 (39.923)	lr 0.02984
Warmup Train [18][1700/3239]	Time 0.259 (0.238)	Data 0.002 (0.012)	Loss 4.5656 (4.6365)	Top-1 acc 17.578 (19.290)	Top-5 acc 46.484 (39.930)	lr 0.02983
Warmup Train [18][1710/3239]	Time 0.189 (0.238)	Data 0.001 (0.012)	Loss 4.7216 (4.6359)	Top-1 acc 19.141 (19.300)	Top-5 acc 36.719 (39.941)	lr 0.02982
Warmup Train [18][1720/3239]	Time 0.245 (0.238)	Data 0.002 (0.012)	Loss 4.6310 (4.6358)	Top-1 acc 22.266 (19.307)	Top-5 acc 43.359 (39.943)	lr 0.02982
Warmup Train [18][1730/3239]	Time 0.321 (0.238)	Data 0.001 (0.012)	Loss 4.6217 (4.6358)	Top-1 acc 23.438 (19.307)	Top-5 acc 40.234 (39.947)	lr 0.02981
Warmup Train [18][1740/3239]	Time 0.219 (0.238)	Data 0.001 (0.012)	Loss 4.7195 (4.6357)	Top-1 acc 17.969 (19.312)	Top-5 acc 38.672 (39.949)	lr 0.02981
Warmup Train [18][1750/3239]	Time 0.204 (0.238)	Data 0.001 (0.012)	Loss 4.4404 (4.6354)	Top-1 acc 24.219 (19.324)	Top-5 acc 39.844 (39.958)	lr 0.02980
Warmup Train [18][1760/3239]	Time 0.233 (0.238)	Data 0.001 (0.012)	Loss 4.6444 (4.6352)	Top-1 acc 18.359 (19.323)	Top-5 acc 39.062 (39.958)	lr 0.02979
Warmup Train [18][1770/3239]	Time 0.233 (0.238)	Data 0.002 (0.012)	Loss 4.5755 (4.6351)	Top-1 acc 17.969 (19.329)	Top-5 acc 39.062 (39.962)	lr 0.02979
Warmup Train [18][1780/3239]	Time 0.244 (0.238)	Data 0.001 (0.012)	Loss 4.5224 (4.6352)	Top-1 acc 24.609 (19.324)	Top-5 acc 41.016 (39.955)	lr 0.02978
Warmup Train [18][1790/3239]	Time 0.282 (0.238)	Data 0.002 (0.012)	Loss 4.7388 (4.6349)	Top-1 acc 17.578 (19.328)	Top-5 acc 39.844 (39.962)	lr 0.02978
Warmup Train [18][1800/3239]	Time 0.235 (0.238)	Data 0.001 (0.012)	Loss 4.7964 (4.6350)	Top-1 acc 12.500 (19.323)	Top-5 acc 31.641 (39.960)	lr 0.02977
Warmup Train [18][1810/3239]	Time 0.161 (0.238)	Data 0.001 (0.012)	Loss 4.8495 (4.6346)	Top-1 acc 16.406 (19.332)	Top-5 acc 33.203 (39.963)	lr 0.02976
Warmup Train [18][1820/3239]	Time 0.206 (0.238)	Data 0.001 (0.012)	Loss 4.7143 (4.6345)	Top-1 acc 19.141 (19.336)	Top-5 acc 38.281 (39.965)	lr 0.02976
Warmup Train [18][1830/3239]	Time 0.292 (0.238)	Data 0.002 (0.012)	Loss 4.4953 (4.6344)	Top-1 acc 20.703 (19.336)	Top-5 acc 42.578 (39.967)	lr 0.02975
Warmup Train [18][1840/3239]	Time 0.232 (0.238)	Data 0.002 (0.012)	Loss 4.7183 (4.6347)	Top-1 acc 20.312 (19.333)	Top-5 acc 37.500 (39.963)	lr 0.02975
Warmup Train [18][1850/3239]	Time 0.159 (0.238)	Data 0.001 (0.011)	Loss 4.5038 (4.6348)	Top-1 acc 21.875 (19.332)	Top-5 acc 44.141 (39.959)	lr 0.02974
Warmup Train [18][1860/3239]	Time 0.176 (0.238)	Data 0.001 (0.011)	Loss 4.5393 (4.6345)	Top-1 acc 21.875 (19.336)	Top-5 acc 44.141 (39.967)	lr 0.02973
Warmup Train [18][1870/3239]	Time 0.162 (0.237)	Data 0.001 (0.011)	Loss 4.4481 (4.6345)	Top-1 acc 23.828 (19.339)	Top-5 acc 41.406 (39.971)	lr 0.02973
Warmup Train [18][1880/3239]	Time 0.185 (0.237)	Data 0.001 (0.011)	Loss 4.7115 (4.6344)	Top-1 acc 19.531 (19.338)	Top-5 acc 38.672 (39.974)	lr 0.02972
Warmup Train [18][1890/3239]	Time 0.252 (0.237)	Data 0.001 (0.011)	Loss 4.7855 (4.6343)	Top-1 acc 20.312 (19.340)	Top-5 acc 38.281 (39.973)	lr 0.02972
Warmup Train [18][1900/3239]	Time 0.222 (0.237)	Data 0.001 (0.011)	Loss 4.5247 (4.6344)	Top-1 acc 19.141 (19.334)	Top-5 acc 42.969 (39.972)	lr 0.02971
Warmup Train [18][1910/3239]	Time 0.245 (0.237)	Data 0.001 (0.011)	Loss 4.7790 (4.6345)	Top-1 acc 17.578 (19.335)	Top-5 acc 34.766 (39.975)	lr 0.02970
Warmup Train [18][1920/3239]	Time 0.146 (0.237)	Data 0.002 (0.011)	Loss 4.6412 (4.6343)	Top-1 acc 19.141 (19.337)	Top-5 acc 38.672 (39.975)	lr 0.02970
Warmup Train [18][1930/3239]	Time 0.352 (0.237)	Data 0.001 (0.011)	Loss 4.6572 (4.6342)	Top-1 acc 17.969 (19.343)	Top-5 acc 37.500 (39.982)	lr 0.02969
Warmup Train [18][1940/3239]	Time 0.181 (0.237)	Data 0.001 (0.011)	Loss 4.7312 (4.6344)	Top-1 acc 18.359 (19.342)	Top-5 acc 38.281 (39.982)	lr 0.02969
Warmup Train [18][1950/3239]	Time 0.187 (0.237)	Data 0.001 (0.011)	Loss 4.7540 (4.6344)	Top-1 acc 19.141 (19.349)	Top-5 acc 37.891 (39.985)	lr 0.02968
Warmup Train [18][1960/3239]	Time 0.173 (0.237)	Data 0.001 (0.011)	Loss 4.9505 (4.6346)	Top-1 acc 14.453 (19.347)	Top-5 acc 32.031 (39.977)	lr 0.02968
Warmup Train [18][1970/3239]	Time 0.240 (0.237)	Data 0.001 (0.011)	Loss 4.4767 (4.6345)	Top-1 acc 21.094 (19.352)	Top-5 acc 47.656 (39.980)	lr 0.02967
Warmup Train [18][1980/3239]	Time 0.162 (0.237)	Data 0.002 (0.011)	Loss 4.6240 (4.6344)	Top-1 acc 21.484 (19.354)	Top-5 acc 39.453 (39.988)	lr 0.02966
Warmup Train [18][1990/3239]	Time 0.228 (0.237)	Data 0.001 (0.011)	Loss 4.6873 (4.6344)	Top-1 acc 20.312 (19.351)	Top-5 acc 43.359 (39.990)	lr 0.02966
Warmup Train [18][2000/3239]	Time 0.157 (0.237)	Data 0.001 (0.011)	Loss 4.3850 (4.6343)	Top-1 acc 21.484 (19.351)	Top-5 acc 45.703 (39.991)	lr 0.02965
Warmup Train [18][2010/3239]	Time 0.196 (0.236)	Data 0.001 (0.011)	Loss 4.7724 (4.6343)	Top-1 acc 16.797 (19.347)	Top-5 acc 35.547 (39.986)	lr 0.02965
Warmup Train [18][2020/3239]	Time 0.201 (0.236)	Data 0.002 (0.011)	Loss 4.6183 (4.6340)	Top-1 acc 22.266 (19.347)	Top-5 acc 41.016 (39.991)	lr 0.02964
Warmup Train [18][2030/3239]	Time 0.186 (0.236)	Data 0.002 (0.011)	Loss 4.8197 (4.6338)	Top-1 acc 18.359 (19.351)	Top-5 acc 35.547 (39.992)	lr 0.02963
Warmup Train [18][2040/3239]	Time 0.209 (0.236)	Data 0.002 (0.011)	Loss 4.6033 (4.6337)	Top-1 acc 18.359 (19.351)	Top-5 acc 39.453 (39.992)	lr 0.02963
Warmup Train [18][2050/3239]	Time 0.358 (0.236)	Data 0.001 (0.011)	Loss 4.7023 (4.6337)	Top-1 acc 17.188 (19.351)	Top-5 acc 42.578 (39.997)	lr 0.02962
Warmup Train [18][2060/3239]	Time 0.185 (0.236)	Data 0.001 (0.011)	Loss 4.4752 (4.6335)	Top-1 acc 21.094 (19.355)	Top-5 acc 43.359 (39.997)	lr 0.02962
Warmup Train [18][2070/3239]	Time 0.170 (0.236)	Data 0.002 (0.011)	Loss 4.5633 (4.6334)	Top-1 acc 21.484 (19.360)	Top-5 acc 42.969 (40.002)	lr 0.02961
Warmup Train [18][2080/3239]	Time 0.228 (0.236)	Data 0.001 (0.011)	Loss 4.8008 (4.6335)	Top-1 acc 14.062 (19.358)	Top-5 acc 35.938 (40.000)	lr 0.02960
Warmup Train [18][2090/3239]	Time 0.224 (0.236)	Data 0.001 (0.011)	Loss 4.5925 (4.6335)	Top-1 acc 21.484 (19.360)	Top-5 acc 42.578 (40.000)	lr 0.02960
Warmup Train [18][2100/3239]	Time 0.198 (0.236)	Data 0.001 (0.011)	Loss 4.4927 (4.6333)	Top-1 acc 20.703 (19.363)	Top-5 acc 43.750 (40.006)	lr 0.02959
Warmup Train [18][2110/3239]	Time 0.200 (0.236)	Data 0.001 (0.011)	Loss 4.4300 (4.6332)	Top-1 acc 23.828 (19.362)	Top-5 acc 42.578 (40.004)	lr 0.02959
Warmup Train [18][2120/3239]	Time 0.207 (0.236)	Data 0.002 (0.010)	Loss 4.7308 (4.6332)	Top-1 acc 15.234 (19.362)	Top-5 acc 37.891 (40.003)	lr 0.02958
Warmup Train [18][2130/3239]	Time 0.249 (0.236)	Data 0.001 (0.010)	Loss 4.4317 (4.6331)	Top-1 acc 19.141 (19.364)	Top-5 acc 46.875 (40.008)	lr 0.02957
Warmup Train [18][2140/3239]	Time 0.238 (0.236)	Data 0.001 (0.010)	Loss 4.6207 (4.6329)	Top-1 acc 19.922 (19.362)	Top-5 acc 43.750 (40.017)	lr 0.02957
Warmup Train [18][2150/3239]	Time 0.322 (0.236)	Data 0.001 (0.010)	Loss 4.7965 (4.6328)	Top-1 acc 21.484 (19.368)	Top-5 acc 38.672 (40.026)	lr 0.02956
Warmup Train [18][2160/3239]	Time 0.221 (0.236)	Data 0.001 (0.010)	Loss 4.4270 (4.6326)	Top-1 acc 25.000 (19.378)	Top-5 acc 46.875 (40.029)	lr 0.02956
Warmup Train [18][2170/3239]	Time 0.174 (0.236)	Data 0.001 (0.010)	Loss 4.7206 (4.6325)	Top-1 acc 15.625 (19.380)	Top-5 acc 37.500 (40.029)	lr 0.02955
Warmup Train [18][2180/3239]	Time 0.212 (0.236)	Data 0.002 (0.010)	Loss 4.6886 (4.6324)	Top-1 acc 18.359 (19.381)	Top-5 acc 35.938 (40.032)	lr 0.02954
Warmup Train [18][2190/3239]	Time 0.272 (0.236)	Data 0.002 (0.010)	Loss 4.7252 (4.6323)	Top-1 acc 13.672 (19.379)	Top-5 acc 40.234 (40.034)	lr 0.02954
Warmup Train [18][2200/3239]	Time 0.169 (0.236)	Data 0.001 (0.010)	Loss 4.4574 (4.6323)	Top-1 acc 20.312 (19.382)	Top-5 acc 42.188 (40.035)	lr 0.02953
Warmup Train [18][2210/3239]	Time 0.228 (0.236)	Data 0.001 (0.010)	Loss 4.4788 (4.6327)	Top-1 acc 22.266 (19.373)	Top-5 acc 43.750 (40.025)	lr 0.02953
Warmup Train [18][2220/3239]	Time 0.144 (0.236)	Data 0.001 (0.010)	Loss 4.6460 (4.6327)	Top-1 acc 17.188 (19.372)	Top-5 acc 35.547 (40.023)	lr 0.02952
Warmup Train [18][2230/3239]	Time 0.210 (0.236)	Data 0.001 (0.010)	Loss 4.4506 (4.6323)	Top-1 acc 19.531 (19.381)	Top-5 acc 43.359 (40.033)	lr 0.02951
Warmup Train [18][2240/3239]	Time 0.284 (0.235)	Data 0.001 (0.010)	Loss 4.4595 (4.6321)	Top-1 acc 25.781 (19.388)	Top-5 acc 46.875 (40.038)	lr 0.02951
Warmup Train [18][2250/3239]	Time 0.321 (0.235)	Data 0.001 (0.010)	Loss 4.7964 (4.6322)	Top-1 acc 18.750 (19.385)	Top-5 acc 34.375 (40.037)	lr 0.02950
Warmup Train [18][2260/3239]	Time 0.213 (0.235)	Data 0.001 (0.010)	Loss 4.5467 (4.6320)	Top-1 acc 22.266 (19.389)	Top-5 acc 40.625 (40.039)	lr 0.02950
Warmup Train [18][2270/3239]	Time 0.189 (0.235)	Data 0.001 (0.010)	Loss 4.6459 (4.6320)	Top-1 acc 19.531 (19.389)	Top-5 acc 42.188 (40.043)	lr 0.02949
Warmup Train [18][2280/3239]	Time 0.144 (0.235)	Data 0.002 (0.010)	Loss 4.6948 (4.6321)	Top-1 acc 14.453 (19.383)	Top-5 acc 36.328 (40.041)	lr 0.02948
Warmup Train [18][2290/3239]	Time 0.269 (0.235)	Data 0.001 (0.010)	Loss 4.6626 (4.6317)	Top-1 acc 19.922 (19.390)	Top-5 acc 37.891 (40.051)	lr 0.02948
Warmup Train [18][2300/3239]	Time 0.204 (0.235)	Data 0.001 (0.010)	Loss 4.5623 (4.6317)	Top-1 acc 18.750 (19.387)	Top-5 acc 41.797 (40.052)	lr 0.02947
Warmup Train [18][2310/3239]	Time 0.145 (0.235)	Data 0.001 (0.010)	Loss 4.4556 (4.6315)	Top-1 acc 21.875 (19.390)	Top-5 acc 42.578 (40.058)	lr 0.02947
Warmup Train [18][2320/3239]	Time 0.188 (0.235)	Data 0.001 (0.010)	Loss 4.7553 (4.6314)	Top-1 acc 17.188 (19.392)	Top-5 acc 35.156 (40.055)	lr 0.02946
Warmup Train [18][2330/3239]	Time 0.236 (0.235)	Data 0.001 (0.010)	Loss 4.6027 (4.6311)	Top-1 acc 21.094 (19.398)	Top-5 acc 39.062 (40.062)	lr 0.02945
Warmup Train [18][2340/3239]	Time 0.126 (0.235)	Data 0.001 (0.010)	Loss 4.5627 (4.6311)	Top-1 acc 19.922 (19.404)	Top-5 acc 39.062 (40.064)	lr 0.02945
Warmup Train [18][2350/3239]	Time 0.195 (0.235)	Data 0.001 (0.010)	Loss 4.4730 (4.6310)	Top-1 acc 22.266 (19.408)	Top-5 acc 47.656 (40.072)	lr 0.02944
Warmup Train [18][2360/3239]	Time 0.291 (0.235)	Data 0.001 (0.010)	Loss 4.6516 (4.6308)	Top-1 acc 15.234 (19.410)	Top-5 acc 40.234 (40.075)	lr 0.02944
Warmup Train [18][2370/3239]	Time 0.237 (0.235)	Data 0.001 (0.010)	Loss 4.8001 (4.6312)	Top-1 acc 19.922 (19.407)	Top-5 acc 41.016 (40.070)	lr 0.02943
Warmup Train [18][2380/3239]	Time 0.207 (0.235)	Data 0.002 (0.010)	Loss 4.7008 (4.6313)	Top-1 acc 17.969 (19.405)	Top-5 acc 36.328 (40.065)	lr 0.02942
Warmup Train [18][2390/3239]	Time 0.152 (0.235)	Data 0.002 (0.010)	Loss 4.5405 (4.6312)	Top-1 acc 19.531 (19.408)	Top-5 acc 41.406 (40.067)	lr 0.02942
Warmup Train [18][2400/3239]	Time 0.179 (0.235)	Data 0.001 (0.010)	Loss 4.5907 (4.6313)	Top-1 acc 19.141 (19.404)	Top-5 acc 40.234 (40.067)	lr 0.02941
Warmup Train [18][2410/3239]	Time 0.275 (0.235)	Data 0.001 (0.010)	Loss 4.6160 (4.6313)	Top-1 acc 20.703 (19.403)	Top-5 acc 42.969 (40.060)	lr 0.02941
Warmup Train [18][2420/3239]	Time 0.266 (0.235)	Data 0.001 (0.010)	Loss 4.8504 (4.6312)	Top-1 acc 17.969 (19.401)	Top-5 acc 35.938 (40.061)	lr 0.02940
Warmup Train [18][2430/3239]	Time 0.148 (0.235)	Data 0.001 (0.010)	Loss 4.6493 (4.6312)	Top-1 acc 19.531 (19.406)	Top-5 acc 42.969 (40.065)	lr 0.02939
Warmup Train [18][2440/3239]	Time 0.160 (0.234)	Data 0.001 (0.010)	Loss 4.6784 (4.6309)	Top-1 acc 16.797 (19.412)	Top-5 acc 44.141 (40.072)	lr 0.02939
Warmup Train [18][2450/3239]	Time 0.225 (0.234)	Data 0.001 (0.010)	Loss 4.3998 (4.6308)	Top-1 acc 20.312 (19.413)	Top-5 acc 47.656 (40.079)	lr 0.02938
Warmup Train [18][2460/3239]	Time 0.219 (0.234)	Data 0.001 (0.009)	Loss 4.5489 (4.6305)	Top-1 acc 19.922 (19.419)	Top-5 acc 42.188 (40.086)	lr 0.02938
Warmup Train [18][2470/3239]	Time 0.312 (0.234)	Data 0.001 (0.009)	Loss 4.7292 (4.6304)	Top-1 acc 16.016 (19.417)	Top-5 acc 37.109 (40.087)	lr 0.02937
Warmup Train [18][2480/3239]	Time 0.204 (0.234)	Data 0.002 (0.009)	Loss 4.6639 (4.6303)	Top-1 acc 18.750 (19.418)	Top-5 acc 39.062 (40.091)	lr 0.02937
Warmup Train [18][2490/3239]	Time 0.149 (0.234)	Data 0.002 (0.009)	Loss 4.7123 (4.6302)	Top-1 acc 20.312 (19.421)	Top-5 acc 40.625 (40.093)	lr 0.02936
Warmup Train [18][2500/3239]	Time 0.153 (0.234)	Data 0.001 (0.009)	Loss 4.6816 (4.6303)	Top-1 acc 22.266 (19.424)	Top-5 acc 40.625 (40.092)	lr 0.02935
Warmup Train [18][2510/3239]	Time 0.217 (0.234)	Data 0.001 (0.009)	Loss 4.3279 (4.6301)	Top-1 acc 19.922 (19.426)	Top-5 acc 46.094 (40.097)	lr 0.02935
Warmup Train [18][2520/3239]	Time 0.210 (0.234)	Data 0.001 (0.009)	Loss 4.7876 (4.6298)	Top-1 acc 15.234 (19.432)	Top-5 acc 36.719 (40.105)	lr 0.02934
Warmup Train [18][2530/3239]	Time 0.178 (0.234)	Data 0.002 (0.009)	Loss 4.6556 (4.6297)	Top-1 acc 20.312 (19.432)	Top-5 acc 38.672 (40.102)	lr 0.02934
Warmup Train [18][2540/3239]	Time 0.238 (0.234)	Data 0.001 (0.009)	Loss 4.5726 (4.6298)	Top-1 acc 20.312 (19.427)	Top-5 acc 42.188 (40.098)	lr 0.02933
Warmup Train [18][2550/3239]	Time 0.204 (0.234)	Data 0.001 (0.009)	Loss 4.5387 (4.6296)	Top-1 acc 20.312 (19.432)	Top-5 acc 47.656 (40.101)	lr 0.02932
Warmup Train [18][2560/3239]	Time 0.359 (0.234)	Data 0.002 (0.009)	Loss 4.6808 (4.6294)	Top-1 acc 18.750 (19.434)	Top-5 acc 41.016 (40.103)	lr 0.02932
Warmup Train [18][2570/3239]	Time 0.231 (0.234)	Data 0.001 (0.009)	Loss 4.4710 (4.6291)	Top-1 acc 25.391 (19.438)	Top-5 acc 41.406 (40.110)	lr 0.02931
Warmup Train [18][2580/3239]	Time 0.157 (0.234)	Data 0.002 (0.009)	Loss 4.6104 (4.6291)	Top-1 acc 19.531 (19.440)	Top-5 acc 41.406 (40.109)	lr 0.02931
Warmup Train [18][2590/3239]	Time 0.274 (0.234)	Data 0.001 (0.009)	Loss 4.8680 (4.6292)	Top-1 acc 16.797 (19.436)	Top-5 acc 34.375 (40.104)	lr 0.02930
Warmup Train [18][2600/3239]	Time 0.177 (0.234)	Data 0.002 (0.009)	Loss 4.5369 (4.6291)	Top-1 acc 21.484 (19.442)	Top-5 acc 38.672 (40.105)	lr 0.02929
Warmup Train [18][2610/3239]	Time 0.233 (0.234)	Data 0.001 (0.009)	Loss 4.6783 (4.6291)	Top-1 acc 17.969 (19.441)	Top-5 acc 39.062 (40.102)	lr 0.02929
Warmup Train [18][2620/3239]	Time 0.172 (0.234)	Data 0.001 (0.009)	Loss 4.5479 (4.6289)	Top-1 acc 19.531 (19.443)	Top-5 acc 41.406 (40.109)	lr 0.02928
Warmup Train [18][2630/3239]	Time 0.244 (0.234)	Data 0.002 (0.009)	Loss 4.6579 (4.6288)	Top-1 acc 18.359 (19.443)	Top-5 acc 42.188 (40.114)	lr 0.02928
Warmup Train [18][2640/3239]	Time 0.187 (0.234)	Data 0.001 (0.009)	Loss 4.5495 (4.6287)	Top-1 acc 19.531 (19.443)	Top-5 acc 39.844 (40.115)	lr 0.02927
Warmup Train [18][2650/3239]	Time 0.250 (0.234)	Data 0.001 (0.009)	Loss 4.5687 (4.6286)	Top-1 acc 23.438 (19.442)	Top-5 acc 44.922 (40.114)	lr 0.02926
Warmup Train [18][2660/3239]	Time 0.373 (0.234)	Data 0.001 (0.009)	Loss 4.6215 (4.6287)	Top-1 acc 24.609 (19.441)	Top-5 acc 42.969 (40.112)	lr 0.02926
Warmup Train [18][2670/3239]	Time 0.189 (0.234)	Data 0.001 (0.009)	Loss 4.4621 (4.6284)	Top-1 acc 21.484 (19.444)	Top-5 acc 44.531 (40.120)	lr 0.02925
Warmup Train [18][2680/3239]	Time 0.130 (0.234)	Data 0.001 (0.009)	Loss 4.6039 (4.6283)	Top-1 acc 22.266 (19.449)	Top-5 acc 41.406 (40.124)	lr 0.02925
Warmup Train [18][2690/3239]	Time 0.218 (0.234)	Data 0.001 (0.009)	Loss 4.5518 (4.6283)	Top-1 acc 21.875 (19.450)	Top-5 acc 42.578 (40.122)	lr 0.02924
Warmup Train [18][2700/3239]	Time 0.199 (0.234)	Data 0.001 (0.009)	Loss 4.6604 (4.6283)	Top-1 acc 19.922 (19.451)	Top-5 acc 41.016 (40.124)	lr 0.02923
Warmup Train [18][2710/3239]	Time 0.194 (0.234)	Data 0.001 (0.009)	Loss 4.7534 (4.6283)	Top-1 acc 21.094 (19.454)	Top-5 acc 39.453 (40.124)	lr 0.02923
Warmup Train [18][2720/3239]	Time 0.268 (0.234)	Data 0.002 (0.009)	Loss 4.6418 (4.6280)	Top-1 acc 20.703 (19.459)	Top-5 acc 38.672 (40.129)	lr 0.02922
Warmup Train [18][2730/3239]	Time 0.139 (0.234)	Data 0.001 (0.009)	Loss 4.6367 (4.6280)	Top-1 acc 17.188 (19.462)	Top-5 acc 39.844 (40.132)	lr 0.02922
Warmup Train [18][2740/3239]	Time 0.240 (0.234)	Data 0.001 (0.009)	Loss 4.7113 (4.6281)	Top-1 acc 17.188 (19.462)	Top-5 acc 38.672 (40.131)	lr 0.02921
Warmup Train [18][2750/3239]	Time 0.187 (0.234)	Data 0.001 (0.009)	Loss 4.5998 (4.6283)	Top-1 acc 17.188 (19.460)	Top-5 acc 40.625 (40.126)	lr 0.02920
Warmup Train [18][2760/3239]	Time 0.375 (0.234)	Data 0.002 (0.009)	Loss 4.6113 (4.6282)	Top-1 acc 21.875 (19.465)	Top-5 acc 41.406 (40.128)	lr 0.02920
Warmup Train [18][2770/3239]	Time 0.260 (0.234)	Data 0.001 (0.009)	Loss 4.5602 (4.6282)	Top-1 acc 21.875 (19.464)	Top-5 acc 42.188 (40.132)	lr 0.02919
Warmup Train [18][2780/3239]	Time 0.143 (0.234)	Data 0.001 (0.009)	Loss 4.4006 (4.6279)	Top-1 acc 20.703 (19.466)	Top-5 acc 46.875 (40.139)	lr 0.02919
Warmup Train [18][2790/3239]	Time 0.170 (0.234)	Data 0.001 (0.009)	Loss 4.8473 (4.6280)	Top-1 acc 14.453 (19.465)	Top-5 acc 35.156 (40.140)	lr 0.02918
Warmup Train [18][2800/3239]	Time 0.224 (0.234)	Data 0.001 (0.009)	Loss 4.7421 (4.6279)	Top-1 acc 17.578 (19.467)	Top-5 acc 36.328 (40.142)	lr 0.02917
Warmup Train [18][2810/3239]	Time 0.188 (0.234)	Data 0.001 (0.009)	Loss 4.5738 (4.6281)	Top-1 acc 21.875 (19.465)	Top-5 acc 41.016 (40.139)	lr 0.02917
Warmup Train [18][2820/3239]	Time 0.198 (0.234)	Data 0.001 (0.009)	Loss 4.4733 (4.6280)	Top-1 acc 21.875 (19.466)	Top-5 acc 44.141 (40.145)	lr 0.02916
Warmup Train [18][2830/3239]	Time 0.206 (0.234)	Data 0.001 (0.009)	Loss 4.4938 (4.6278)	Top-1 acc 25.391 (19.470)	Top-5 acc 44.141 (40.151)	lr 0.02916
Warmup Train [18][2840/3239]	Time 0.209 (0.234)	Data 0.001 (0.009)	Loss 4.4502 (4.6276)	Top-1 acc 21.094 (19.474)	Top-5 acc 40.625 (40.157)	lr 0.02915
Warmup Train [18][2850/3239]	Time 0.236 (0.233)	Data 0.001 (0.009)	Loss 4.5636 (4.6274)	Top-1 acc 19.141 (19.475)	Top-5 acc 38.281 (40.160)	lr 0.02914
Warmup Train [18][2860/3239]	Time 0.295 (0.234)	Data 0.002 (0.009)	Loss 4.5403 (4.6274)	Top-1 acc 20.703 (19.475)	Top-5 acc 43.750 (40.158)	lr 0.02914
Warmup Train [18][2870/3239]	Time 0.338 (0.233)	Data 0.001 (0.009)	Loss 4.5835 (4.6273)	Top-1 acc 17.969 (19.474)	Top-5 acc 39.453 (40.158)	lr 0.02913
Warmup Train [18][2880/3239]	Time 0.203 (0.233)	Data 0.001 (0.009)	Loss 4.6045 (4.6273)	Top-1 acc 20.703 (19.474)	Top-5 acc 40.234 (40.160)	lr 0.02913
Warmup Train [18][2890/3239]	Time 0.209 (0.233)	Data 0.001 (0.009)	Loss 4.6725 (4.6273)	Top-1 acc 17.578 (19.472)	Top-5 acc 36.719 (40.157)	lr 0.02912
Warmup Train [18][2900/3239]	Time 0.169 (0.233)	Data 0.001 (0.009)	Loss 4.8010 (4.6273)	Top-1 acc 16.797 (19.472)	Top-5 acc 35.938 (40.160)	lr 0.02911
Warmup Train [18][2910/3239]	Time 0.203 (0.233)	Data 0.002 (0.009)	Loss 4.5352 (4.6272)	Top-1 acc 21.484 (19.473)	Top-5 acc 44.141 (40.161)	lr 0.02911
Warmup Train [18][2920/3239]	Time 0.138 (0.233)	Data 0.001 (0.009)	Loss 4.8419 (4.6270)	Top-1 acc 14.453 (19.475)	Top-5 acc 37.500 (40.162)	lr 0.02910
Warmup Train [18][2930/3239]	Time 0.232 (0.233)	Data 0.001 (0.009)	Loss 4.6081 (4.6270)	Top-1 acc 13.672 (19.473)	Top-5 acc 37.891 (40.163)	lr 0.02910
Warmup Train [18][2940/3239]	Time 0.203 (0.233)	Data 0.001 (0.009)	Loss 4.5604 (4.6269)	Top-1 acc 21.875 (19.477)	Top-5 acc 43.359 (40.168)	lr 0.02909
Warmup Train [18][2950/3239]	Time 0.216 (0.233)	Data 0.001 (0.008)	Loss 4.5788 (4.6269)	Top-1 acc 19.531 (19.478)	Top-5 acc 41.797 (40.168)	lr 0.02908
Warmup Train [18][2960/3239]	Time 0.255 (0.233)	Data 0.001 (0.008)	Loss 4.5504 (4.6269)	Top-1 acc 25.781 (19.479)	Top-5 acc 42.578 (40.169)	lr 0.02908
Warmup Train [18][2970/3239]	Time 0.324 (0.233)	Data 0.001 (0.008)	Loss 4.5612 (4.6271)	Top-1 acc 19.922 (19.476)	Top-5 acc 42.578 (40.163)	lr 0.02907
Warmup Train [18][2980/3239]	Time 0.209 (0.233)	Data 0.001 (0.008)	Loss 4.6130 (4.6271)	Top-1 acc 20.703 (19.476)	Top-5 acc 41.406 (40.159)	lr 0.02907
Warmup Train [18][2990/3239]	Time 0.252 (0.233)	Data 0.001 (0.008)	Loss 4.6392 (4.6271)	Top-1 acc 19.922 (19.475)	Top-5 acc 37.500 (40.161)	lr 0.02906
Warmup Train [18][3000/3239]	Time 0.204 (0.233)	Data 0.001 (0.008)	Loss 4.4335 (4.6270)	Top-1 acc 26.953 (19.478)	Top-5 acc 46.875 (40.163)	lr 0.02905
Warmup Train [18][3010/3239]	Time 0.257 (0.233)	Data 0.002 (0.008)	Loss 4.6434 (4.6269)	Top-1 acc 15.234 (19.478)	Top-5 acc 39.062 (40.165)	lr 0.02905
Warmup Train [18][3020/3239]	Time 0.217 (0.233)	Data 0.002 (0.008)	Loss 4.5739 (4.6268)	Top-1 acc 19.531 (19.478)	Top-5 acc 44.141 (40.171)	lr 0.02904
Warmup Train [18][3030/3239]	Time 0.215 (0.233)	Data 0.001 (0.008)	Loss 4.5870 (4.6267)	Top-1 acc 20.312 (19.481)	Top-5 acc 38.281 (40.172)	lr 0.02904
Warmup Train [18][3040/3239]	Time 0.160 (0.233)	Data 0.001 (0.008)	Loss 4.6535 (4.6265)	Top-1 acc 20.312 (19.483)	Top-5 acc 41.406 (40.178)	lr 0.02903
Warmup Train [18][3050/3239]	Time 0.222 (0.233)	Data 0.001 (0.008)	Loss 4.5699 (4.6264)	Top-1 acc 18.750 (19.483)	Top-5 acc 40.625 (40.178)	lr 0.02902
Warmup Train [18][3060/3239]	Time 0.196 (0.233)	Data 0.001 (0.008)	Loss 4.6520 (4.6262)	Top-1 acc 18.359 (19.485)	Top-5 acc 39.062 (40.187)	lr 0.02902
Warmup Train [18][3070/3239]	Time 0.206 (0.233)	Data 0.001 (0.008)	Loss 4.4367 (4.6259)	Top-1 acc 22.656 (19.492)	Top-5 acc 45.312 (40.192)	lr 0.02901
Warmup Train [18][3080/3239]	Time 0.338 (0.233)	Data 0.001 (0.008)	Loss 4.7136 (4.6259)	Top-1 acc 19.922 (19.493)	Top-5 acc 40.234 (40.192)	lr 0.02901
Warmup Train [18][3090/3239]	Time 0.192 (0.233)	Data 0.001 (0.008)	Loss 4.7964 (4.6258)	Top-1 acc 17.188 (19.497)	Top-5 acc 39.453 (40.197)	lr 0.02900
Warmup Train [18][3100/3239]	Time 0.165 (0.233)	Data 0.002 (0.008)	Loss 4.5111 (4.6256)	Top-1 acc 21.484 (19.501)	Top-5 acc 43.750 (40.201)	lr 0.02899
Warmup Train [18][3110/3239]	Time 0.190 (0.233)	Data 0.001 (0.008)	Loss 4.4887 (4.6255)	Top-1 acc 21.484 (19.503)	Top-5 acc 44.141 (40.204)	lr 0.02899
Warmup Train [18][3120/3239]	Time 0.232 (0.233)	Data 0.003 (0.008)	Loss 4.4622 (4.6255)	Top-1 acc 24.609 (19.499)	Top-5 acc 41.797 (40.203)	lr 0.02898
Warmup Train [18][3130/3239]	Time 0.182 (0.233)	Data 0.001 (0.008)	Loss 4.5972 (4.6254)	Top-1 acc 22.266 (19.500)	Top-5 acc 42.188 (40.204)	lr 0.02898
Warmup Train [18][3140/3239]	Time 0.234 (0.233)	Data 0.001 (0.008)	Loss 4.6430 (4.6254)	Top-1 acc 17.969 (19.500)	Top-5 acc 42.188 (40.203)	lr 0.02897
Warmup Train [18][3150/3239]	Time 0.349 (0.233)	Data 0.002 (0.008)	Loss 4.6824 (4.6253)	Top-1 acc 17.188 (19.500)	Top-5 acc 39.062 (40.206)	lr 0.02896
Warmup Train [18][3160/3239]	Time 0.248 (0.233)	Data 0.001 (0.008)	Loss 4.4800 (4.6253)	Top-1 acc 23.047 (19.499)	Top-5 acc 47.266 (40.205)	lr 0.02896
Warmup Train [18][3170/3239]	Time 0.256 (0.233)	Data 0.001 (0.008)	Loss 4.5296 (4.6252)	Top-1 acc 20.703 (19.499)	Top-5 acc 41.406 (40.209)	lr 0.02895
Warmup Train [18][3180/3239]	Time 0.226 (0.233)	Data 0.000 (0.008)	Loss 4.5307 (4.6250)	Top-1 acc 25.000 (19.507)	Top-5 acc 42.188 (40.214)	lr 0.02895
Warmup Train [18][3190/3239]	Time 0.324 (0.233)	Data 0.000 (0.008)	Loss 4.5749 (4.6248)	Top-1 acc 17.578 (19.508)	Top-5 acc 39.453 (40.217)	lr 0.02894
Warmup Train [18][3200/3239]	Time 0.229 (0.233)	Data 0.000 (0.008)	Loss 4.5397 (4.6246)	Top-1 acc 21.484 (19.510)	Top-5 acc 41.406 (40.220)	lr 0.02893
Warmup Train [18][3210/3239]	Time 0.208 (0.233)	Data 0.000 (0.008)	Loss 4.5406 (4.6246)	Top-1 acc 23.828 (19.509)	Top-5 acc 41.016 (40.220)	lr 0.02893
Warmup Train [18][3220/3239]	Time 0.202 (0.233)	Data 0.000 (0.008)	Loss 4.6930 (4.6245)	Top-1 acc 18.359 (19.509)	Top-5 acc 42.188 (40.223)	lr 0.02892
Warmup Train [18][3230/3239]	Time 0.196 (0.233)	Data 0.000 (0.008)	Loss 4.5844 (4.6243)	Top-1 acc 19.922 (19.510)	Top-5 acc 41.406 (40.229)	lr 0.02892
Warmup Train [18][3239/3239]	Time 0.185 (0.232)	Data 0.000 (0.008)	Loss 4.7495 (4.6244)	Top-1 acc 22.222 (19.509)	Top-5 acc 35.802 (40.230)	lr 0.02891
==========Warmup Valid [18/40]	loss 3.668	top-1 acc 26.430	top-5 acc 49.596	Train top-1 19.509	top-5 40.230	flops: 442.4M
Warmup Train [19][0/3239]	Time 16.050 (16.050)	Data 13.908 (13.908)	Loss 4.6589 (4.6589)	Top-1 acc 24.219 (24.219)	Top-5 acc 41.406 (41.406)	lr 0.02891
Warmup Train [19][10/3239]	Time 0.261 (1.810)	Data 0.002 (1.308)	Loss 4.6068 (4.6510)	Top-1 acc 18.359 (19.709)	Top-5 acc 38.672 (39.240)	lr 0.02890
Warmup Train [19][20/3239]	Time 0.301 (1.082)	Data 0.002 (0.686)	Loss 4.5577 (4.6245)	Top-1 acc 21.875 (20.126)	Top-5 acc 44.141 (39.900)	lr 0.02890
Warmup Train [19][30/3239]	Time 0.217 (0.807)	Data 0.002 (0.465)	Loss 4.5019 (4.6177)	Top-1 acc 20.703 (20.060)	Top-5 acc 43.750 (40.071)	lr 0.02889
Warmup Train [19][40/3239]	Time 0.201 (0.662)	Data 0.001 (0.352)	Loss 4.6177 (4.6102)	Top-1 acc 21.484 (20.208)	Top-5 acc 38.672 (40.234)	lr 0.02889
Warmup Train [19][50/3239]	Time 0.254 (0.578)	Data 0.001 (0.284)	Loss 4.6206 (4.6084)	Top-1 acc 18.359 (20.106)	Top-5 acc 37.500 (40.158)	lr 0.02888
Warmup Train [19][60/3239]	Time 0.237 (0.520)	Data 0.001 (0.238)	Loss 4.7745 (4.6141)	Top-1 acc 19.531 (19.896)	Top-5 acc 36.328 (39.991)	lr 0.02887
Warmup Train [19][70/3239]	Time 0.204 (0.481)	Data 0.002 (0.205)	Loss 4.6192 (4.6073)	Top-1 acc 21.094 (19.949)	Top-5 acc 40.625 (40.300)	lr 0.02887
Warmup Train [19][80/3239]	Time 0.172 (0.450)	Data 0.001 (0.180)	Loss 4.5393 (4.6025)	Top-1 acc 15.234 (19.985)	Top-5 acc 39.453 (40.447)	lr 0.02886
Warmup Train [19][90/3239]	Time 0.336 (0.426)	Data 0.001 (0.161)	Loss 4.5697 (4.6000)	Top-1 acc 18.359 (20.012)	Top-5 acc 40.234 (40.492)	lr 0.02886
Warmup Train [19][100/3239]	Time 0.206 (0.408)	Data 0.001 (0.145)	Loss 4.5408 (4.6015)	Top-1 acc 21.484 (19.972)	Top-5 acc 38.281 (40.490)	lr 0.02885
Warmup Train [19][110/3239]	Time 0.274 (0.391)	Data 0.001 (0.132)	Loss 4.6037 (4.6018)	Top-1 acc 18.750 (19.904)	Top-5 acc 39.062 (40.541)	lr 0.02885
Warmup Train [19][120/3239]	Time 0.249 (0.377)	Data 0.001 (0.121)	Loss 4.3027 (4.6006)	Top-1 acc 23.047 (19.935)	Top-5 acc 50.000 (40.612)	lr 0.02884
Warmup Train [19][130/3239]	Time 0.208 (0.366)	Data 0.001 (0.112)	Loss 4.5932 (4.6036)	Top-1 acc 24.609 (19.949)	Top-5 acc 39.844 (40.601)	lr 0.02883
Warmup Train [19][140/3239]	Time 0.193 (0.356)	Data 0.001 (0.104)	Loss 4.5170 (4.6029)	Top-1 acc 24.219 (19.994)	Top-5 acc 43.359 (40.636)	lr 0.02883
Warmup Train [19][150/3239]	Time 0.197 (0.347)	Data 0.001 (0.097)	Loss 4.5639 (4.6030)	Top-1 acc 19.141 (19.945)	Top-5 acc 37.891 (40.514)	lr 0.02882
Warmup Train [19][160/3239]	Time 0.207 (0.340)	Data 0.001 (0.091)	Loss 4.5268 (4.6000)	Top-1 acc 22.656 (19.987)	Top-5 acc 44.141 (40.620)	lr 0.02882
Warmup Train [19][170/3239]	Time 0.246 (0.332)	Data 0.001 (0.086)	Loss 4.5843 (4.6029)	Top-1 acc 19.531 (19.878)	Top-5 acc 42.578 (40.579)	lr 0.02881
Warmup Train [19][180/3239]	Time 0.242 (0.326)	Data 0.001 (0.081)	Loss 4.6836 (4.6053)	Top-1 acc 19.141 (19.823)	Top-5 acc 39.844 (40.519)	lr 0.02880
Warmup Train [19][190/3239]	Time 0.305 (0.321)	Data 0.001 (0.077)	Loss 4.6155 (4.6026)	Top-1 acc 18.359 (19.838)	Top-5 acc 37.109 (40.572)	lr 0.02880
Warmup Train [19][200/3239]	Time 0.456 (0.317)	Data 0.002 (0.074)	Loss 4.5435 (4.6002)	Top-1 acc 19.141 (19.823)	Top-5 acc 42.578 (40.613)	lr 0.02879
Warmup Train [19][210/3239]	Time 0.192 (0.312)	Data 0.029 (0.070)	Loss 4.7759 (4.6009)	Top-1 acc 17.578 (19.853)	Top-5 acc 37.109 (40.588)	lr 0.02879
Warmup Train [19][220/3239]	Time 0.236 (0.308)	Data 0.002 (0.067)	Loss 4.6086 (4.5996)	Top-1 acc 20.312 (19.862)	Top-5 acc 41.406 (40.623)	lr 0.02878
Warmup Train [19][230/3239]	Time 0.183 (0.304)	Data 0.001 (0.065)	Loss 4.6577 (4.5984)	Top-1 acc 23.047 (19.869)	Top-5 acc 38.281 (40.677)	lr 0.02877
Warmup Train [19][240/3239]	Time 0.139 (0.301)	Data 0.001 (0.062)	Loss 4.5441 (4.6001)	Top-1 acc 21.875 (19.881)	Top-5 acc 42.188 (40.631)	lr 0.02877
Warmup Train [19][250/3239]	Time 0.247 (0.297)	Data 0.001 (0.060)	Loss 4.5194 (4.5989)	Top-1 acc 22.266 (19.900)	Top-5 acc 41.406 (40.639)	lr 0.02876
Warmup Train [19][260/3239]	Time 0.195 (0.295)	Data 0.001 (0.058)	Loss 4.5850 (4.5958)	Top-1 acc 21.094 (19.971)	Top-5 acc 41.406 (40.734)	lr 0.02876
Warmup Train [19][270/3239]	Time 0.240 (0.292)	Data 0.001 (0.055)	Loss 4.6331 (4.5929)	Top-1 acc 19.141 (20.014)	Top-5 acc 37.891 (40.818)	lr 0.02875
Warmup Train [19][280/3239]	Time 0.239 (0.290)	Data 0.002 (0.054)	Loss 4.6040 (4.5937)	Top-1 acc 20.312 (20.019)	Top-5 acc 41.016 (40.814)	lr 0.02874
Warmup Train [19][290/3239]	Time 0.175 (0.287)	Data 0.001 (0.052)	Loss 4.7062 (4.5931)	Top-1 acc 16.797 (20.037)	Top-5 acc 36.719 (40.798)	lr 0.02874
Warmup Train [19][300/3239]	Time 0.354 (0.286)	Data 0.001 (0.050)	Loss 4.7228 (4.5926)	Top-1 acc 20.703 (20.054)	Top-5 acc 40.234 (40.835)	lr 0.02873
Warmup Train [19][310/3239]	Time 0.217 (0.284)	Data 0.001 (0.049)	Loss 4.6095 (4.5900)	Top-1 acc 16.797 (20.090)	Top-5 acc 38.281 (40.919)	lr 0.02873
Warmup Train [19][320/3239]	Time 0.270 (0.282)	Data 0.001 (0.047)	Loss 4.6483 (4.5902)	Top-1 acc 19.141 (20.059)	Top-5 acc 37.891 (40.894)	lr 0.02872
Warmup Train [19][330/3239]	Time 0.206 (0.281)	Data 0.001 (0.046)	Loss 4.5383 (4.5901)	Top-1 acc 25.391 (20.091)	Top-5 acc 46.484 (40.915)	lr 0.02871
Warmup Train [19][340/3239]	Time 0.189 (0.279)	Data 0.001 (0.045)	Loss 4.5808 (4.5889)	Top-1 acc 20.312 (20.103)	Top-5 acc 41.406 (40.957)	lr 0.02871
Warmup Train [19][350/3239]	Time 0.221 (0.278)	Data 0.005 (0.044)	Loss 4.4417 (4.5892)	Top-1 acc 23.047 (20.097)	Top-5 acc 43.750 (40.944)	lr 0.02870
Warmup Train [19][360/3239]	Time 0.230 (0.277)	Data 0.001 (0.042)	Loss 4.5032 (4.5895)	Top-1 acc 20.312 (20.108)	Top-5 acc 40.625 (40.929)	lr 0.02870
Warmup Train [19][370/3239]	Time 0.250 (0.275)	Data 0.002 (0.041)	Loss 4.8083 (4.5912)	Top-1 acc 18.750 (20.087)	Top-5 acc 39.453 (40.892)	lr 0.02869
Warmup Train [19][380/3239]	Time 0.202 (0.274)	Data 0.001 (0.040)	Loss 4.5414 (4.5921)	Top-1 acc 19.531 (20.079)	Top-5 acc 42.969 (40.878)	lr 0.02868
Warmup Train [19][390/3239]	Time 0.182 (0.272)	Data 0.001 (0.039)	Loss 4.6528 (4.5939)	Top-1 acc 17.188 (20.077)	Top-5 acc 40.234 (40.866)	lr 0.02868
Warmup Train [19][400/3239]	Time 0.191 (0.271)	Data 0.001 (0.038)	Loss 4.5519 (4.5934)	Top-1 acc 21.875 (20.102)	Top-5 acc 41.406 (40.897)	lr 0.02867
Warmup Train [19][410/3239]	Time 0.221 (0.270)	Data 0.001 (0.038)	Loss 4.4154 (4.5927)	Top-1 acc 20.312 (20.085)	Top-5 acc 46.094 (40.921)	lr 0.02867
Warmup Train [19][420/3239]	Time 0.297 (0.269)	Data 0.001 (0.037)	Loss 4.6713 (4.5916)	Top-1 acc 18.750 (20.101)	Top-5 acc 38.672 (40.945)	lr 0.02866
Warmup Train [19][430/3239]	Time 0.271 (0.268)	Data 0.003 (0.036)	Loss 4.4479 (4.5904)	Top-1 acc 22.656 (20.120)	Top-5 acc 43.359 (40.946)	lr 0.02865
Warmup Train [19][440/3239]	Time 0.194 (0.267)	Data 0.001 (0.035)	Loss 4.5308 (4.5904)	Top-1 acc 22.266 (20.126)	Top-5 acc 43.359 (40.958)	lr 0.02865
Warmup Train [19][450/3239]	Time 0.202 (0.266)	Data 0.001 (0.034)	Loss 4.7441 (4.5898)	Top-1 acc 16.406 (20.131)	Top-5 acc 39.453 (40.981)	lr 0.02864
Warmup Train [19][460/3239]	Time 0.229 (0.265)	Data 0.001 (0.034)	Loss 4.5889 (4.5892)	Top-1 acc 17.578 (20.157)	Top-5 acc 40.234 (40.979)	lr 0.02864
Warmup Train [19][470/3239]	Time 0.242 (0.264)	Data 0.001 (0.033)	Loss 4.6126 (4.5895)	Top-1 acc 20.312 (20.157)	Top-5 acc 43.359 (40.989)	lr 0.02863
Warmup Train [19][480/3239]	Time 0.244 (0.263)	Data 0.002 (0.032)	Loss 4.5183 (4.5886)	Top-1 acc 19.531 (20.151)	Top-5 acc 43.750 (40.987)	lr 0.02862
Warmup Train [19][490/3239]	Time 0.272 (0.262)	Data 0.002 (0.032)	Loss 4.4562 (4.5883)	Top-1 acc 23.047 (20.150)	Top-5 acc 46.094 (40.993)	lr 0.02862
Warmup Train [19][500/3239]	Time 0.234 (0.261)	Data 0.001 (0.031)	Loss 4.3536 (4.5874)	Top-1 acc 23.047 (20.178)	Top-5 acc 44.922 (40.998)	lr 0.02861
Warmup Train [19][510/3239]	Time 0.382 (0.261)	Data 0.001 (0.031)	Loss 4.4422 (4.5871)	Top-1 acc 25.391 (20.186)	Top-5 acc 44.531 (41.006)	lr 0.02861
Warmup Train [19][520/3239]	Time 0.140 (0.260)	Data 0.002 (0.030)	Loss 4.8367 (4.5872)	Top-1 acc 18.359 (20.178)	Top-5 acc 35.938 (41.000)	lr 0.02860
Warmup Train [19][530/3239]	Time 0.200 (0.260)	Data 0.001 (0.030)	Loss 4.6000 (4.5864)	Top-1 acc 21.875 (20.190)	Top-5 acc 41.797 (41.043)	lr 0.02859
Warmup Train [19][540/3239]	Time 0.220 (0.259)	Data 0.001 (0.029)	Loss 4.4940 (4.5867)	Top-1 acc 19.922 (20.180)	Top-5 acc 44.531 (41.043)	lr 0.02859
Warmup Train [19][550/3239]	Time 0.224 (0.258)	Data 0.001 (0.029)	Loss 4.5205 (4.5860)	Top-1 acc 22.656 (20.200)	Top-5 acc 46.484 (41.073)	lr 0.02858
Warmup Train [19][560/3239]	Time 0.252 (0.258)	Data 0.001 (0.028)	Loss 4.7449 (4.5853)	Top-1 acc 17.578 (20.217)	Top-5 acc 42.969 (41.103)	lr 0.02858
Warmup Train [19][570/3239]	Time 0.187 (0.257)	Data 0.001 (0.028)	Loss 4.4664 (4.5850)	Top-1 acc 21.094 (20.216)	Top-5 acc 45.703 (41.122)	lr 0.02857
Warmup Train [19][580/3239]	Time 0.176 (0.257)	Data 0.001 (0.027)	Loss 4.5915 (4.5848)	Top-1 acc 17.969 (20.234)	Top-5 acc 40.625 (41.142)	lr 0.02856
Warmup Train [19][590/3239]	Time 0.217 (0.256)	Data 0.002 (0.027)	Loss 4.6479 (4.5849)	Top-1 acc 20.312 (20.235)	Top-5 acc 38.281 (41.158)	lr 0.02856
Warmup Train [19][600/3239]	Time 0.192 (0.256)	Data 0.002 (0.027)	Loss 4.6188 (4.5858)	Top-1 acc 19.922 (20.225)	Top-5 acc 41.406 (41.138)	lr 0.02855
Warmup Train [19][610/3239]	Time 0.295 (0.256)	Data 0.001 (0.026)	Loss 4.6034 (4.5857)	Top-1 acc 21.094 (20.224)	Top-5 acc 38.281 (41.139)	lr 0.02855
Warmup Train [19][620/3239]	Time 0.273 (0.255)	Data 0.001 (0.026)	Loss 4.6305 (4.5858)	Top-1 acc 18.750 (20.201)	Top-5 acc 38.281 (41.127)	lr 0.02854
Warmup Train [19][630/3239]	Time 0.214 (0.255)	Data 0.001 (0.025)	Loss 4.4889 (4.5853)	Top-1 acc 23.438 (20.197)	Top-5 acc 42.578 (41.132)	lr 0.02853
Warmup Train [19][640/3239]	Time 0.148 (0.254)	Data 0.002 (0.025)	Loss 4.6669 (4.5855)	Top-1 acc 18.750 (20.210)	Top-5 acc 36.719 (41.113)	lr 0.02853
Warmup Train [19][650/3239]	Time 0.252 (0.254)	Data 0.001 (0.025)	Loss 4.4965 (4.5858)	Top-1 acc 20.312 (20.210)	Top-5 acc 43.359 (41.118)	lr 0.02852
Warmup Train [19][660/3239]	Time 0.233 (0.253)	Data 0.001 (0.024)	Loss 4.4083 (4.5855)	Top-1 acc 25.391 (20.207)	Top-5 acc 44.141 (41.115)	lr 0.02852
Warmup Train [19][670/3239]	Time 0.222 (0.253)	Data 0.001 (0.024)	Loss 4.7166 (4.5861)	Top-1 acc 15.625 (20.193)	Top-5 acc 38.672 (41.099)	lr 0.02851
Warmup Train [19][680/3239]	Time 0.225 (0.252)	Data 0.001 (0.024)	Loss 4.6099 (4.5860)	Top-1 acc 19.531 (20.201)	Top-5 acc 38.281 (41.095)	lr 0.02850
Warmup Train [19][690/3239]	Time 0.186 (0.252)	Data 0.001 (0.023)	Loss 4.4953 (4.5858)	Top-1 acc 19.531 (20.190)	Top-5 acc 42.969 (41.089)	lr 0.02850
Warmup Train [19][700/3239]	Time 0.159 (0.251)	Data 0.001 (0.023)	Loss 4.4577 (4.5866)	Top-1 acc 26.172 (20.180)	Top-5 acc 48.828 (41.067)	lr 0.02849
Warmup Train [19][710/3239]	Time 0.309 (0.251)	Data 0.001 (0.023)	Loss 4.6446 (4.5864)	Top-1 acc 17.969 (20.192)	Top-5 acc 40.625 (41.076)	lr 0.02849
Warmup Train [19][720/3239]	Time 0.194 (0.250)	Data 0.001 (0.023)	Loss 4.4606 (4.5855)	Top-1 acc 26.953 (20.225)	Top-5 acc 44.531 (41.104)	lr 0.02848
Warmup Train [19][730/3239]	Time 0.170 (0.250)	Data 0.001 (0.022)	Loss 4.6748 (4.5849)	Top-1 acc 19.531 (20.225)	Top-5 acc 40.234 (41.117)	lr 0.02847
Warmup Train [19][740/3239]	Time 0.248 (0.250)	Data 0.001 (0.022)	Loss 4.2919 (4.5841)	Top-1 acc 23.828 (20.240)	Top-5 acc 48.047 (41.137)	lr 0.02847
Warmup Train [19][750/3239]	Time 0.242 (0.250)	Data 0.002 (0.022)	Loss 4.5346 (4.5841)	Top-1 acc 23.047 (20.233)	Top-5 acc 46.094 (41.134)	lr 0.02846
Warmup Train [19][760/3239]	Time 0.170 (0.249)	Data 0.001 (0.022)	Loss 4.6852 (4.5842)	Top-1 acc 15.234 (20.228)	Top-5 acc 42.188 (41.137)	lr 0.02846
Warmup Train [19][770/3239]	Time 0.202 (0.249)	Data 0.002 (0.021)	Loss 4.5321 (4.5843)	Top-1 acc 21.484 (20.235)	Top-5 acc 41.016 (41.141)	lr 0.02845
Warmup Train [19][780/3239]	Time 0.234 (0.249)	Data 0.029 (0.021)	Loss 4.5341 (4.5840)	Top-1 acc 21.484 (20.250)	Top-5 acc 44.141 (41.152)	lr 0.02844
Warmup Train [19][790/3239]	Time 0.210 (0.248)	Data 0.002 (0.021)	Loss 4.5929 (4.5836)	Top-1 acc 20.703 (20.249)	Top-5 acc 43.750 (41.162)	lr 0.02844
Warmup Train [19][800/3239]	Time 0.171 (0.248)	Data 0.001 (0.021)	Loss 4.4613 (4.5824)	Top-1 acc 21.875 (20.278)	Top-5 acc 47.266 (41.204)	lr 0.02843
Warmup Train [19][810/3239]	Time 0.170 (0.247)	Data 0.001 (0.020)	Loss 4.8807 (4.5827)	Top-1 acc 14.062 (20.279)	Top-5 acc 34.766 (41.207)	lr 0.02843
Warmup Train [19][820/3239]	Time 0.361 (0.247)	Data 0.002 (0.020)	Loss 4.5052 (4.5825)	Top-1 acc 21.094 (20.292)	Top-5 acc 39.844 (41.209)	lr 0.02842
Warmup Train [19][830/3239]	Time 0.239 (0.247)	Data 0.002 (0.020)	Loss 4.6072 (4.5824)	Top-1 acc 17.969 (20.288)	Top-5 acc 40.625 (41.203)	lr 0.02841
Warmup Train [19][840/3239]	Time 0.254 (0.248)	Data 0.003 (0.020)	Loss 4.7323 (4.5822)	Top-1 acc 16.016 (20.295)	Top-5 acc 37.500 (41.221)	lr 0.02841
Warmup Train [19][850/3239]	Time 0.156 (0.248)	Data 0.001 (0.020)	Loss 4.5925 (4.5818)	Top-1 acc 21.484 (20.295)	Top-5 acc 38.281 (41.226)	lr 0.02840
Warmup Train [19][860/3239]	Time 0.196 (0.247)	Data 0.001 (0.019)	Loss 4.7339 (4.5818)	Top-1 acc 17.188 (20.296)	Top-5 acc 39.062 (41.229)	lr 0.02840
Warmup Train [19][870/3239]	Time 0.189 (0.248)	Data 0.001 (0.019)	Loss 4.4225 (4.5810)	Top-1 acc 22.656 (20.298)	Top-5 acc 45.312 (41.245)	lr 0.02839
Warmup Train [19][880/3239]	Time 0.265 (0.247)	Data 0.002 (0.019)	Loss 4.6688 (4.5808)	Top-1 acc 17.969 (20.294)	Top-5 acc 41.406 (41.258)	lr 0.02838
Warmup Train [19][890/3239]	Time 0.221 (0.247)	Data 0.002 (0.019)	Loss 4.4922 (4.5804)	Top-1 acc 20.312 (20.287)	Top-5 acc 42.969 (41.270)	lr 0.02838
Warmup Train [19][900/3239]	Time 0.281 (0.248)	Data 0.002 (0.019)	Loss 4.6796 (4.5803)	Top-1 acc 16.797 (20.284)	Top-5 acc 42.578 (41.281)	lr 0.02837
Warmup Train [19][910/3239]	Time 0.216 (0.247)	Data 0.001 (0.019)	Loss 4.7020 (4.5805)	Top-1 acc 17.188 (20.277)	Top-5 acc 37.500 (41.279)	lr 0.02837
Warmup Train [19][920/3239]	Time 0.200 (0.247)	Data 0.001 (0.018)	Loss 4.6173 (4.5808)	Top-1 acc 19.141 (20.274)	Top-5 acc 40.234 (41.258)	lr 0.02836
Warmup Train [19][930/3239]	Time 0.196 (0.246)	Data 0.002 (0.018)	Loss 4.4279 (4.5809)	Top-1 acc 25.391 (20.280)	Top-5 acc 46.094 (41.264)	lr 0.02835
Warmup Train [19][940/3239]	Time 0.291 (0.246)	Data 0.001 (0.018)	Loss 4.6746 (4.5807)	Top-1 acc 18.750 (20.284)	Top-5 acc 41.016 (41.268)	lr 0.02835
Warmup Train [19][950/3239]	Time 0.196 (0.246)	Data 0.001 (0.018)	Loss 4.5708 (4.5805)	Top-1 acc 19.141 (20.299)	Top-5 acc 41.016 (41.271)	lr 0.02834
Warmup Train [19][960/3239]	Time 0.272 (0.245)	Data 0.001 (0.018)	Loss 4.3719 (4.5795)	Top-1 acc 19.531 (20.321)	Top-5 acc 44.141 (41.296)	lr 0.02834
Warmup Train [19][970/3239]	Time 0.247 (0.245)	Data 0.004 (0.018)	Loss 4.6543 (4.5789)	Top-1 acc 17.578 (20.319)	Top-5 acc 40.234 (41.306)	lr 0.02833
Warmup Train [19][980/3239]	Time 0.308 (0.245)	Data 0.002 (0.017)	Loss 4.5727 (4.5789)	Top-1 acc 19.531 (20.306)	Top-5 acc 41.406 (41.310)	lr 0.02832
Warmup Train [19][990/3239]	Time 0.266 (0.245)	Data 0.002 (0.017)	Loss 4.6621 (4.5791)	Top-1 acc 22.656 (20.316)	Top-5 acc 41.406 (41.297)	lr 0.02832
Warmup Train [19][1000/3239]	Time 0.263 (0.245)	Data 0.002 (0.017)	Loss 4.7107 (4.5797)	Top-1 acc 18.750 (20.311)	Top-5 acc 36.328 (41.297)	lr 0.02831
Warmup Train [19][1010/3239]	Time 0.316 (0.246)	Data 0.001 (0.017)	Loss 4.3527 (4.5787)	Top-1 acc 26.953 (20.327)	Top-5 acc 47.656 (41.325)	lr 0.02831
Warmup Train [19][1020/3239]	Time 0.280 (0.245)	Data 0.002 (0.017)	Loss 4.6183 (4.5786)	Top-1 acc 18.750 (20.321)	Top-5 acc 42.188 (41.327)	lr 0.02830
Warmup Train [19][1030/3239]	Time 0.467 (0.246)	Data 0.002 (0.017)	Loss 4.7890 (4.5783)	Top-1 acc 16.797 (20.327)	Top-5 acc 35.938 (41.341)	lr 0.02829
Warmup Train [19][1040/3239]	Time 0.244 (0.246)	Data 0.002 (0.017)	Loss 4.3752 (4.5784)	Top-1 acc 23.438 (20.321)	Top-5 acc 45.312 (41.340)	lr 0.02829
Warmup Train [19][1050/3239]	Time 0.275 (0.246)	Data 0.001 (0.016)	Loss 4.5970 (4.5781)	Top-1 acc 18.750 (20.323)	Top-5 acc 40.234 (41.344)	lr 0.02828
Warmup Train [19][1060/3239]	Time 0.246 (0.246)	Data 0.002 (0.016)	Loss 4.5972 (4.5775)	Top-1 acc 21.484 (20.332)	Top-5 acc 41.406 (41.348)	lr 0.02828
Warmup Train [19][1070/3239]	Time 0.189 (0.246)	Data 0.002 (0.016)	Loss 4.6307 (4.5774)	Top-1 acc 21.484 (20.344)	Top-5 acc 39.844 (41.353)	lr 0.02827
Warmup Train [19][1080/3239]	Time 0.283 (0.246)	Data 0.001 (0.016)	Loss 4.5944 (4.5772)	Top-1 acc 23.047 (20.347)	Top-5 acc 42.188 (41.359)	lr 0.02826
Warmup Train [19][1090/3239]	Time 0.190 (0.246)	Data 0.001 (0.016)	Loss 4.4644 (4.5772)	Top-1 acc 23.047 (20.350)	Top-5 acc 44.531 (41.358)	lr 0.02826
Warmup Train [19][1100/3239]	Time 0.206 (0.246)	Data 0.001 (0.016)	Loss 4.5501 (4.5770)	Top-1 acc 23.047 (20.359)	Top-5 acc 40.625 (41.365)	lr 0.02825
Warmup Train [19][1110/3239]	Time 0.306 (0.246)	Data 0.054 (0.016)	Loss 4.5129 (4.5773)	Top-1 acc 19.141 (20.342)	Top-5 acc 39.453 (41.353)	lr 0.02825
Warmup Train [19][1120/3239]	Time 0.390 (0.246)	Data 0.002 (0.016)	Loss 4.4992 (4.5775)	Top-1 acc 23.438 (20.345)	Top-5 acc 42.969 (41.350)	lr 0.02824
Warmup Train [19][1130/3239]	Time 0.384 (0.246)	Data 0.002 (0.015)	Loss 4.7753 (4.5778)	Top-1 acc 16.797 (20.334)	Top-5 acc 36.719 (41.341)	lr 0.02823
Warmup Train [19][1140/3239]	Time 0.191 (0.246)	Data 0.001 (0.015)	Loss 4.5825 (4.5777)	Top-1 acc 18.750 (20.335)	Top-5 acc 44.922 (41.345)	lr 0.02823
Warmup Train [19][1150/3239]	Time 0.179 (0.246)	Data 0.001 (0.015)	Loss 4.7512 (4.5774)	Top-1 acc 19.141 (20.333)	Top-5 acc 36.719 (41.349)	lr 0.02822
Warmup Train [19][1160/3239]	Time 0.162 (0.246)	Data 0.001 (0.015)	Loss 4.7849 (4.5774)	Top-1 acc 16.406 (20.334)	Top-5 acc 33.594 (41.341)	lr 0.02822
Warmup Train [19][1170/3239]	Time 0.207 (0.246)	Data 0.001 (0.015)	Loss 4.4948 (4.5775)	Top-1 acc 23.828 (20.336)	Top-5 acc 43.359 (41.342)	lr 0.02821
Warmup Train [19][1180/3239]	Time 0.215 (0.245)	Data 0.001 (0.015)	Loss 4.6098 (4.5774)	Top-1 acc 19.922 (20.337)	Top-5 acc 38.281 (41.348)	lr 0.02820
Warmup Train [19][1190/3239]	Time 0.259 (0.245)	Data 0.001 (0.015)	Loss 4.8390 (4.5779)	Top-1 acc 14.844 (20.332)	Top-5 acc 36.719 (41.334)	lr 0.02820
Warmup Train [19][1200/3239]	Time 0.221 (0.245)	Data 0.001 (0.015)	Loss 4.4163 (4.5779)	Top-1 acc 23.828 (20.332)	Top-5 acc 44.922 (41.333)	lr 0.02819
Warmup Train [19][1210/3239]	Time 0.184 (0.245)	Data 0.002 (0.015)	Loss 4.5205 (4.5776)	Top-1 acc 19.531 (20.341)	Top-5 acc 44.531 (41.339)	lr 0.02819
Warmup Train [19][1220/3239]	Time 0.223 (0.245)	Data 0.003 (0.015)	Loss 4.6948 (4.5778)	Top-1 acc 23.047 (20.344)	Top-5 acc 37.891 (41.337)	lr 0.02818
Warmup Train [19][1230/3239]	Time 0.175 (0.245)	Data 0.001 (0.014)	Loss 4.7345 (4.5779)	Top-1 acc 19.922 (20.353)	Top-5 acc 38.672 (41.338)	lr 0.02817
Warmup Train [19][1240/3239]	Time 0.270 (0.244)	Data 0.002 (0.014)	Loss 4.7839 (4.5779)	Top-1 acc 21.484 (20.354)	Top-5 acc 39.844 (41.338)	lr 0.02817
Warmup Train [19][1250/3239]	Time 0.362 (0.244)	Data 0.001 (0.014)	Loss 4.3415 (4.5776)	Top-1 acc 24.219 (20.363)	Top-5 acc 46.484 (41.351)	lr 0.02816
Warmup Train [19][1260/3239]	Time 0.178 (0.244)	Data 0.001 (0.014)	Loss 4.6014 (4.5777)	Top-1 acc 21.484 (20.360)	Top-5 acc 41.406 (41.343)	lr 0.02815
Warmup Train [19][1270/3239]	Time 0.171 (0.244)	Data 0.001 (0.014)	Loss 4.6405 (4.5776)	Top-1 acc 15.625 (20.356)	Top-5 acc 42.578 (41.348)	lr 0.02815
Warmup Train [19][1280/3239]	Time 0.150 (0.244)	Data 0.002 (0.014)	Loss 4.5251 (4.5775)	Top-1 acc 19.922 (20.358)	Top-5 acc 41.406 (41.353)	lr 0.02814
Warmup Train [19][1290/3239]	Time 0.173 (0.244)	Data 0.001 (0.014)	Loss 4.4168 (4.5772)	Top-1 acc 23.047 (20.363)	Top-5 acc 42.969 (41.363)	lr 0.02814
Warmup Train [19][1300/3239]	Time 0.267 (0.244)	Data 0.001 (0.014)	Loss 4.5073 (4.5771)	Top-1 acc 19.141 (20.363)	Top-5 acc 43.359 (41.365)	lr 0.02813
Warmup Train [19][1310/3239]	Time 0.160 (0.244)	Data 0.001 (0.014)	Loss 4.6936 (4.5771)	Top-1 acc 21.094 (20.367)	Top-5 acc 38.281 (41.368)	lr 0.02812
Warmup Train [19][1320/3239]	Time 0.201 (0.243)	Data 0.001 (0.014)	Loss 4.6119 (4.5771)	Top-1 acc 17.188 (20.365)	Top-5 acc 41.797 (41.367)	lr 0.02812
Warmup Train [19][1330/3239]	Time 0.222 (0.243)	Data 0.001 (0.014)	Loss 4.3452 (4.5768)	Top-1 acc 26.172 (20.371)	Top-5 acc 46.875 (41.366)	lr 0.02811
Warmup Train [19][1340/3239]	Time 0.191 (0.243)	Data 0.001 (0.014)	Loss 4.7249 (4.5768)	Top-1 acc 13.672 (20.358)	Top-5 acc 37.891 (41.360)	lr 0.02811
Warmup Train [19][1350/3239]	Time 0.206 (0.243)	Data 0.001 (0.014)	Loss 4.5354 (4.5767)	Top-1 acc 22.656 (20.358)	Top-5 acc 43.750 (41.370)	lr 0.02810
Warmup Train [19][1360/3239]	Time 0.335 (0.243)	Data 0.001 (0.013)	Loss 4.5861 (4.5765)	Top-1 acc 19.922 (20.366)	Top-5 acc 41.016 (41.375)	lr 0.02809
Warmup Train [19][1370/3239]	Time 0.245 (0.243)	Data 0.001 (0.013)	Loss 4.5030 (4.5764)	Top-1 acc 22.656 (20.360)	Top-5 acc 42.188 (41.373)	lr 0.02809
Warmup Train [19][1380/3239]	Time 0.224 (0.242)	Data 0.001 (0.013)	Loss 4.3949 (4.5763)	Top-1 acc 23.828 (20.360)	Top-5 acc 42.969 (41.362)	lr 0.02808
Warmup Train [19][1390/3239]	Time 0.243 (0.242)	Data 0.001 (0.013)	Loss 4.5914 (4.5761)	Top-1 acc 21.484 (20.363)	Top-5 acc 41.016 (41.367)	lr 0.02808
Warmup Train [19][1400/3239]	Time 0.176 (0.242)	Data 0.004 (0.013)	Loss 4.5036 (4.5757)	Top-1 acc 23.047 (20.363)	Top-5 acc 42.578 (41.376)	lr 0.02807
Warmup Train [19][1410/3239]	Time 0.144 (0.242)	Data 0.001 (0.013)	Loss 4.4578 (4.5751)	Top-1 acc 20.703 (20.370)	Top-5 acc 46.094 (41.391)	lr 0.02806
Warmup Train [19][1420/3239]	Time 0.211 (0.242)	Data 0.001 (0.013)	Loss 4.5375 (4.5750)	Top-1 acc 19.531 (20.367)	Top-5 acc 41.797 (41.391)	lr 0.02806
Warmup Train [19][1430/3239]	Time 0.194 (0.242)	Data 0.001 (0.013)	Loss 4.4741 (4.5748)	Top-1 acc 21.875 (20.369)	Top-5 acc 42.188 (41.392)	lr 0.02805
Warmup Train [19][1440/3239]	Time 0.245 (0.242)	Data 0.001 (0.013)	Loss 4.4224 (4.5747)	Top-1 acc 22.266 (20.364)	Top-5 acc 45.312 (41.398)	lr 0.02805
Warmup Train [19][1450/3239]	Time 0.217 (0.242)	Data 0.001 (0.013)	Loss 4.4183 (4.5741)	Top-1 acc 25.391 (20.373)	Top-5 acc 45.312 (41.412)	lr 0.02804
Warmup Train [19][1460/3239]	Time 0.269 (0.242)	Data 0.001 (0.013)	Loss 4.7375 (4.5742)	Top-1 acc 15.234 (20.367)	Top-5 acc 35.938 (41.411)	lr 0.02803
Warmup Train [19][1470/3239]	Time 0.177 (0.242)	Data 0.033 (0.013)	Loss 4.5828 (4.5742)	Top-1 acc 17.969 (20.369)	Top-5 acc 40.234 (41.412)	lr 0.02803
Warmup Train [19][1480/3239]	Time 0.219 (0.242)	Data 0.001 (0.013)	Loss 4.5324 (4.5741)	Top-1 acc 23.438 (20.369)	Top-5 acc 43.750 (41.409)	lr 0.02802
Warmup Train [19][1490/3239]	Time 0.191 (0.241)	Data 0.001 (0.013)	Loss 4.4118 (4.5741)	Top-1 acc 23.828 (20.370)	Top-5 acc 44.922 (41.413)	lr 0.02802
Warmup Train [19][1500/3239]	Time 0.247 (0.241)	Data 0.001 (0.013)	Loss 4.7632 (4.5741)	Top-1 acc 19.141 (20.365)	Top-5 acc 38.672 (41.408)	lr 0.02801
Warmup Train [19][1510/3239]	Time 0.195 (0.241)	Data 0.001 (0.012)	Loss 4.6199 (4.5740)	Top-1 acc 16.797 (20.368)	Top-5 acc 37.891 (41.407)	lr 0.02800
Warmup Train [19][1520/3239]	Time 0.231 (0.241)	Data 0.001 (0.012)	Loss 4.5337 (4.5739)	Top-1 acc 21.484 (20.370)	Top-5 acc 42.969 (41.408)	lr 0.02800
Warmup Train [19][1530/3239]	Time 0.266 (0.241)	Data 0.001 (0.012)	Loss 4.5276 (4.5738)	Top-1 acc 22.266 (20.377)	Top-5 acc 44.141 (41.411)	lr 0.02799
Warmup Train [19][1540/3239]	Time 0.261 (0.241)	Data 0.001 (0.012)	Loss 4.5590 (4.5739)	Top-1 acc 23.047 (20.378)	Top-5 acc 39.062 (41.407)	lr 0.02799
Warmup Train [19][1550/3239]	Time 0.353 (0.241)	Data 0.004 (0.012)	Loss 4.5650 (4.5738)	Top-1 acc 21.094 (20.380)	Top-5 acc 38.672 (41.406)	lr 0.02798
Warmup Train [19][1560/3239]	Time 0.161 (0.241)	Data 0.002 (0.012)	Loss 4.6532 (4.5736)	Top-1 acc 15.234 (20.382)	Top-5 acc 39.062 (41.417)	lr 0.02797
Warmup Train [19][1570/3239]	Time 0.194 (0.241)	Data 0.001 (0.012)	Loss 4.6710 (4.5736)	Top-1 acc 21.875 (20.385)	Top-5 acc 40.234 (41.420)	lr 0.02797
Warmup Train [19][1580/3239]	Time 0.203 (0.241)	Data 0.001 (0.012)	Loss 4.4758 (4.5737)	Top-1 acc 21.875 (20.385)	Top-5 acc 46.094 (41.420)	lr 0.02796
Warmup Train [19][1590/3239]	Time 0.172 (0.241)	Data 0.001 (0.012)	Loss 4.4404 (4.5737)	Top-1 acc 21.875 (20.385)	Top-5 acc 45.312 (41.420)	lr 0.02796
Warmup Train [19][1600/3239]	Time 0.258 (0.240)	Data 0.001 (0.012)	Loss 4.5682 (4.5738)	Top-1 acc 23.828 (20.382)	Top-5 acc 43.359 (41.423)	lr 0.02795
Warmup Train [19][1610/3239]	Time 0.210 (0.240)	Data 0.001 (0.012)	Loss 4.3789 (4.5737)	Top-1 acc 25.781 (20.385)	Top-5 acc 44.141 (41.424)	lr 0.02794
Warmup Train [19][1620/3239]	Time 0.216 (0.240)	Data 0.001 (0.012)	Loss 4.6119 (4.5734)	Top-1 acc 17.578 (20.386)	Top-5 acc 36.328 (41.429)	lr 0.02794
Warmup Train [19][1630/3239]	Time 0.220 (0.240)	Data 0.001 (0.012)	Loss 4.6691 (4.5734)	Top-1 acc 16.016 (20.387)	Top-5 acc 39.453 (41.431)	lr 0.02793
Warmup Train [19][1640/3239]	Time 0.193 (0.240)	Data 0.001 (0.012)	Loss 4.5258 (4.5731)	Top-1 acc 23.047 (20.394)	Top-5 acc 47.266 (41.436)	lr 0.02793
Warmup Train [19][1650/3239]	Time 0.230 (0.240)	Data 0.001 (0.012)	Loss 4.4934 (4.5734)	Top-1 acc 19.141 (20.390)	Top-5 acc 42.969 (41.428)	lr 0.02792
Warmup Train [19][1660/3239]	Time 0.249 (0.240)	Data 0.003 (0.012)	Loss 4.6164 (4.5733)	Top-1 acc 19.922 (20.392)	Top-5 acc 42.188 (41.431)	lr 0.02791
Warmup Train [19][1670/3239]	Time 0.321 (0.240)	Data 0.002 (0.012)	Loss 4.5566 (4.5732)	Top-1 acc 22.656 (20.397)	Top-5 acc 37.500 (41.431)	lr 0.02791
Warmup Train [19][1680/3239]	Time 0.210 (0.240)	Data 0.002 (0.012)	Loss 4.5203 (4.5732)	Top-1 acc 24.609 (20.398)	Top-5 acc 43.750 (41.428)	lr 0.02790
Warmup Train [19][1690/3239]	Time 0.245 (0.240)	Data 0.002 (0.011)	Loss 4.4080 (4.5731)	Top-1 acc 24.609 (20.396)	Top-5 acc 45.703 (41.428)	lr 0.02790
Warmup Train [19][1700/3239]	Time 0.213 (0.240)	Data 0.001 (0.011)	Loss 4.5138 (4.5731)	Top-1 acc 22.266 (20.398)	Top-5 acc 44.141 (41.426)	lr 0.02789
Warmup Train [19][1710/3239]	Time 0.268 (0.239)	Data 0.001 (0.011)	Loss 4.7701 (4.5734)	Top-1 acc 19.141 (20.392)	Top-5 acc 34.375 (41.417)	lr 0.02788
Warmup Train [19][1720/3239]	Time 0.236 (0.239)	Data 0.002 (0.011)	Loss 4.4404 (4.5731)	Top-1 acc 23.438 (20.401)	Top-5 acc 43.750 (41.422)	lr 0.02788
Warmup Train [19][1730/3239]	Time 0.166 (0.239)	Data 0.001 (0.011)	Loss 4.6309 (4.5729)	Top-1 acc 21.875 (20.403)	Top-5 acc 38.672 (41.428)	lr 0.02787
Warmup Train [19][1740/3239]	Time 0.197 (0.239)	Data 0.001 (0.011)	Loss 4.6423 (4.5731)	Top-1 acc 19.922 (20.399)	Top-5 acc 42.969 (41.426)	lr 0.02787
Warmup Train [19][1750/3239]	Time 0.236 (0.239)	Data 0.002 (0.011)	Loss 4.5441 (4.5729)	Top-1 acc 25.000 (20.406)	Top-5 acc 44.922 (41.430)	lr 0.02786
Warmup Train [19][1760/3239]	Time 0.205 (0.239)	Data 0.001 (0.011)	Loss 4.5471 (4.5728)	Top-1 acc 21.094 (20.404)	Top-5 acc 39.453 (41.430)	lr 0.02785
Warmup Train [19][1770/3239]	Time 0.393 (0.239)	Data 0.001 (0.011)	Loss 4.5138 (4.5725)	Top-1 acc 20.703 (20.407)	Top-5 acc 39.844 (41.432)	lr 0.02785
Warmup Train [19][1780/3239]	Time 0.207 (0.239)	Data 0.001 (0.011)	Loss 4.7319 (4.5724)	Top-1 acc 18.359 (20.406)	Top-5 acc 39.844 (41.437)	lr 0.02784
Warmup Train [19][1790/3239]	Time 0.202 (0.239)	Data 0.001 (0.011)	Loss 4.8027 (4.5725)	Top-1 acc 17.188 (20.408)	Top-5 acc 37.891 (41.439)	lr 0.02784
Warmup Train [19][1800/3239]	Time 0.197 (0.239)	Data 0.001 (0.011)	Loss 4.6381 (4.5723)	Top-1 acc 21.094 (20.416)	Top-5 acc 40.625 (41.441)	lr 0.02783
Warmup Train [19][1810/3239]	Time 0.247 (0.239)	Data 0.001 (0.011)	Loss 4.6480 (4.5722)	Top-1 acc 17.969 (20.418)	Top-5 acc 37.109 (41.442)	lr 0.02782
Warmup Train [19][1820/3239]	Time 0.164 (0.239)	Data 0.001 (0.011)	Loss 4.6556 (4.5721)	Top-1 acc 19.531 (20.417)	Top-5 acc 36.328 (41.451)	lr 0.02782
Warmup Train [19][1830/3239]	Time 0.203 (0.239)	Data 0.001 (0.011)	Loss 4.4014 (4.5723)	Top-1 acc 22.656 (20.413)	Top-5 acc 45.312 (41.449)	lr 0.02781
Warmup Train [19][1840/3239]	Time 0.200 (0.239)	Data 0.001 (0.011)	Loss 4.6580 (4.5723)	Top-1 acc 20.312 (20.418)	Top-5 acc 41.016 (41.457)	lr 0.02781
Warmup Train [19][1850/3239]	Time 0.284 (0.239)	Data 0.002 (0.011)	Loss 4.7011 (4.5720)	Top-1 acc 19.531 (20.418)	Top-5 acc 41.016 (41.465)	lr 0.02780
Warmup Train [19][1860/3239]	Time 0.237 (0.238)	Data 0.001 (0.011)	Loss 4.6394 (4.5719)	Top-1 acc 24.219 (20.425)	Top-5 acc 38.672 (41.467)	lr 0.02779
Warmup Train [19][1870/3239]	Time 0.130 (0.238)	Data 0.001 (0.011)	Loss 4.7518 (4.5721)	Top-1 acc 16.797 (20.419)	Top-5 acc 36.719 (41.462)	lr 0.02779
Warmup Train [19][1880/3239]	Time 0.141 (0.238)	Data 0.001 (0.011)	Loss 4.3517 (4.5722)	Top-1 acc 23.438 (20.421)	Top-5 acc 45.312 (41.464)	lr 0.02778
Warmup Train [19][1890/3239]	Time 0.296 (0.238)	Data 0.001 (0.011)	Loss 4.3720 (4.5719)	Top-1 acc 22.656 (20.423)	Top-5 acc 48.438 (41.466)	lr 0.02778
Warmup Train [19][1900/3239]	Time 0.177 (0.238)	Data 0.001 (0.011)	Loss 4.7686 (4.5721)	Top-1 acc 19.531 (20.418)	Top-5 acc 35.547 (41.465)	lr 0.02777
Warmup Train [19][1910/3239]	Time 0.185 (0.238)	Data 0.001 (0.011)	Loss 4.6338 (4.5721)	Top-1 acc 18.359 (20.418)	Top-5 acc 38.281 (41.463)	lr 0.02776
Warmup Train [19][1920/3239]	Time 0.258 (0.238)	Data 0.001 (0.010)	Loss 4.5616 (4.5721)	Top-1 acc 22.656 (20.417)	Top-5 acc 42.969 (41.457)	lr 0.02776
Warmup Train [19][1930/3239]	Time 0.132 (0.238)	Data 0.001 (0.010)	Loss 4.6439 (4.5719)	Top-1 acc 17.188 (20.414)	Top-5 acc 37.891 (41.459)	lr 0.02775
Warmup Train [19][1940/3239]	Time 0.159 (0.238)	Data 0.001 (0.010)	Loss 4.5081 (4.5718)	Top-1 acc 23.828 (20.419)	Top-5 acc 41.406 (41.464)	lr 0.02775
Warmup Train [19][1950/3239]	Time 0.187 (0.238)	Data 0.001 (0.010)	Loss 4.5794 (4.5720)	Top-1 acc 22.266 (20.419)	Top-5 acc 42.969 (41.454)	lr 0.02774
Warmup Train [19][1960/3239]	Time 0.217 (0.238)	Data 0.001 (0.010)	Loss 4.8057 (4.5721)	Top-1 acc 21.484 (20.418)	Top-5 acc 41.016 (41.451)	lr 0.02773
Warmup Train [19][1970/3239]	Time 0.206 (0.238)	Data 0.002 (0.010)	Loss 4.4635 (4.5722)	Top-1 acc 20.312 (20.419)	Top-5 acc 43.359 (41.458)	lr 0.02773
Warmup Train [19][1980/3239]	Time 0.217 (0.238)	Data 0.001 (0.010)	Loss 4.5896 (4.5719)	Top-1 acc 20.312 (20.420)	Top-5 acc 41.406 (41.462)	lr 0.02772
Warmup Train [19][1990/3239]	Time 0.198 (0.238)	Data 0.001 (0.010)	Loss 4.3012 (4.5719)	Top-1 acc 25.781 (20.419)	Top-5 acc 49.609 (41.462)	lr 0.02772
Warmup Train [19][2000/3239]	Time 0.283 (0.238)	Data 0.001 (0.010)	Loss 4.3553 (4.5715)	Top-1 acc 22.656 (20.427)	Top-5 acc 43.750 (41.470)	lr 0.02771
Warmup Train [19][2010/3239]	Time 0.227 (0.237)	Data 0.001 (0.010)	Loss 4.3993 (4.5714)	Top-1 acc 21.484 (20.425)	Top-5 acc 46.484 (41.468)	lr 0.02770
Warmup Train [19][2020/3239]	Time 0.175 (0.237)	Data 0.001 (0.010)	Loss 4.5439 (4.5713)	Top-1 acc 21.484 (20.427)	Top-5 acc 43.359 (41.472)	lr 0.02770
Warmup Train [19][2030/3239]	Time 0.163 (0.237)	Data 0.001 (0.010)	Loss 4.4655 (4.5711)	Top-1 acc 16.406 (20.430)	Top-5 acc 41.406 (41.475)	lr 0.02769
Warmup Train [19][2040/3239]	Time 0.265 (0.237)	Data 0.002 (0.010)	Loss 4.5528 (4.5713)	Top-1 acc 21.875 (20.433)	Top-5 acc 45.312 (41.474)	lr 0.02769
Warmup Train [19][2050/3239]	Time 0.214 (0.237)	Data 0.001 (0.010)	Loss 4.5583 (4.5713)	Top-1 acc 21.094 (20.431)	Top-5 acc 41.016 (41.470)	lr 0.02768
Warmup Train [19][2060/3239]	Time 0.177 (0.237)	Data 0.001 (0.010)	Loss 4.5688 (4.5713)	Top-1 acc 19.531 (20.426)	Top-5 acc 40.625 (41.473)	lr 0.02767
Warmup Train [19][2070/3239]	Time 0.221 (0.237)	Data 0.001 (0.010)	Loss 4.4350 (4.5713)	Top-1 acc 24.219 (20.432)	Top-5 acc 47.656 (41.479)	lr 0.02767
Warmup Train [19][2080/3239]	Time 0.283 (0.237)	Data 0.027 (0.010)	Loss 4.4227 (4.5712)	Top-1 acc 25.781 (20.433)	Top-5 acc 44.531 (41.479)	lr 0.02766
Warmup Train [19][2090/3239]	Time 0.220 (0.237)	Data 0.001 (0.010)	Loss 4.6785 (4.5712)	Top-1 acc 20.312 (20.434)	Top-5 acc 38.281 (41.475)	lr 0.02766
Warmup Train [19][2100/3239]	Time 0.185 (0.237)	Data 0.001 (0.010)	Loss 4.4431 (4.5713)	Top-1 acc 19.922 (20.429)	Top-5 acc 45.312 (41.471)	lr 0.02765
Warmup Train [19][2110/3239]	Time 0.384 (0.237)	Data 0.001 (0.010)	Loss 4.5221 (4.5713)	Top-1 acc 22.266 (20.429)	Top-5 acc 43.750 (41.467)	lr 0.02764
Warmup Train [19][2120/3239]	Time 0.247 (0.237)	Data 0.001 (0.010)	Loss 4.5582 (4.5710)	Top-1 acc 20.703 (20.437)	Top-5 acc 41.016 (41.468)	lr 0.02764
Warmup Train [19][2130/3239]	Time 0.242 (0.237)	Data 0.001 (0.010)	Loss 4.4817 (4.5713)	Top-1 acc 20.703 (20.433)	Top-5 acc 43.750 (41.463)	lr 0.02763
Warmup Train [19][2140/3239]	Time 0.207 (0.237)	Data 0.001 (0.010)	Loss 4.6634 (4.5713)	Top-1 acc 19.141 (20.436)	Top-5 acc 36.719 (41.463)	lr 0.02763
Warmup Train [19][2150/3239]	Time 0.260 (0.237)	Data 0.001 (0.010)	Loss 4.6683 (4.5715)	Top-1 acc 17.188 (20.435)	Top-5 acc 36.328 (41.457)	lr 0.02762
Warmup Train [19][2160/3239]	Time 0.180 (0.237)	Data 0.001 (0.010)	Loss 4.7775 (4.5718)	Top-1 acc 16.016 (20.433)	Top-5 acc 36.719 (41.454)	lr 0.02761
Warmup Train [19][2170/3239]	Time 0.238 (0.237)	Data 0.002 (0.010)	Loss 4.5114 (4.5720)	Top-1 acc 20.703 (20.430)	Top-5 acc 42.578 (41.452)	lr 0.02761
Warmup Train [19][2180/3239]	Time 0.224 (0.237)	Data 0.001 (0.010)	Loss 4.3832 (4.5720)	Top-1 acc 22.656 (20.428)	Top-5 acc 44.922 (41.454)	lr 0.02760
Warmup Train [19][2190/3239]	Time 0.233 (0.237)	Data 0.001 (0.010)	Loss 4.7402 (4.5721)	Top-1 acc 16.797 (20.426)	Top-5 acc 33.203 (41.452)	lr 0.02760
Warmup Train [19][2200/3239]	Time 0.228 (0.236)	Data 0.001 (0.010)	Loss 4.6898 (4.5721)	Top-1 acc 21.875 (20.429)	Top-5 acc 39.453 (41.450)	lr 0.02759
Warmup Train [19][2210/3239]	Time 0.323 (0.236)	Data 0.001 (0.010)	Loss 4.4856 (4.5721)	Top-1 acc 21.875 (20.434)	Top-5 acc 44.141 (41.451)	lr 0.02758
Warmup Train [19][2220/3239]	Time 0.226 (0.236)	Data 0.001 (0.009)	Loss 4.5451 (4.5720)	Top-1 acc 21.875 (20.434)	Top-5 acc 44.141 (41.455)	lr 0.02758
Warmup Train [19][2230/3239]	Time 0.189 (0.236)	Data 0.001 (0.009)	Loss 4.5528 (4.5720)	Top-1 acc 19.922 (20.433)	Top-5 acc 41.016 (41.453)	lr 0.02757
Warmup Train [19][2240/3239]	Time 0.172 (0.236)	Data 0.001 (0.009)	Loss 4.5505 (4.5720)	Top-1 acc 18.359 (20.433)	Top-5 acc 39.844 (41.458)	lr 0.02756
Warmup Train [19][2250/3239]	Time 0.216 (0.236)	Data 0.001 (0.009)	Loss 4.6282 (4.5719)	Top-1 acc 17.969 (20.437)	Top-5 acc 38.281 (41.462)	lr 0.02756
Warmup Train [19][2260/3239]	Time 0.221 (0.236)	Data 0.001 (0.009)	Loss 4.6658 (4.5716)	Top-1 acc 17.188 (20.443)	Top-5 acc 41.016 (41.464)	lr 0.02755
Warmup Train [19][2270/3239]	Time 0.238 (0.236)	Data 0.001 (0.009)	Loss 4.5954 (4.5716)	Top-1 acc 17.188 (20.442)	Top-5 acc 39.062 (41.464)	lr 0.02755
Warmup Train [19][2280/3239]	Time 0.238 (0.236)	Data 0.002 (0.009)	Loss 4.5095 (4.5715)	Top-1 acc 19.531 (20.441)	Top-5 acc 37.500 (41.462)	lr 0.02754
Warmup Train [19][2290/3239]	Time 0.222 (0.236)	Data 0.001 (0.009)	Loss 4.6445 (4.5715)	Top-1 acc 21.484 (20.438)	Top-5 acc 39.453 (41.457)	lr 0.02753
Warmup Train [19][2300/3239]	Time 0.192 (0.236)	Data 0.001 (0.009)	Loss 4.7282 (4.5714)	Top-1 acc 18.359 (20.442)	Top-5 acc 37.109 (41.453)	lr 0.02753
Warmup Train [19][2310/3239]	Time 0.321 (0.236)	Data 0.002 (0.009)	Loss 4.6607 (4.5714)	Top-1 acc 17.578 (20.445)	Top-5 acc 39.453 (41.457)	lr 0.02752
Warmup Train [19][2320/3239]	Time 0.163 (0.236)	Data 0.001 (0.009)	Loss 4.6397 (4.5713)	Top-1 acc 18.359 (20.447)	Top-5 acc 41.016 (41.461)	lr 0.02752
Warmup Train [19][2330/3239]	Time 0.225 (0.236)	Data 0.001 (0.009)	Loss 4.6724 (4.5712)	Top-1 acc 20.703 (20.453)	Top-5 acc 39.453 (41.465)	lr 0.02751
Warmup Train [19][2340/3239]	Time 0.224 (0.236)	Data 0.001 (0.009)	Loss 4.5861 (4.5712)	Top-1 acc 22.656 (20.457)	Top-5 acc 40.234 (41.466)	lr 0.02750
Warmup Train [19][2350/3239]	Time 0.243 (0.236)	Data 0.001 (0.009)	Loss 4.5980 (4.5712)	Top-1 acc 22.656 (20.457)	Top-5 acc 39.062 (41.466)	lr 0.02750
Warmup Train [19][2360/3239]	Time 0.193 (0.236)	Data 0.001 (0.009)	Loss 4.4415 (4.5711)	Top-1 acc 23.828 (20.461)	Top-5 acc 44.922 (41.468)	lr 0.02749
Warmup Train [19][2370/3239]	Time 0.189 (0.236)	Data 0.002 (0.009)	Loss 4.7999 (4.5710)	Top-1 acc 17.578 (20.466)	Top-5 acc 39.844 (41.474)	lr 0.02749
Warmup Train [19][2380/3239]	Time 0.204 (0.236)	Data 0.002 (0.009)	Loss 4.5518 (4.5711)	Top-1 acc 21.094 (20.465)	Top-5 acc 42.578 (41.472)	lr 0.02748
Warmup Train [19][2390/3239]	Time 0.154 (0.236)	Data 0.002 (0.009)	Loss 4.3680 (4.5709)	Top-1 acc 24.609 (20.470)	Top-5 acc 44.922 (41.476)	lr 0.02747
Warmup Train [19][2400/3239]	Time 0.223 (0.236)	Data 0.002 (0.009)	Loss 4.6598 (4.5707)	Top-1 acc 21.875 (20.473)	Top-5 acc 40.234 (41.485)	lr 0.02747
Warmup Train [19][2410/3239]	Time 0.228 (0.236)	Data 0.001 (0.009)	Loss 4.6249 (4.5708)	Top-1 acc 16.406 (20.468)	Top-5 acc 40.625 (41.485)	lr 0.02746
Warmup Train [19][2420/3239]	Time 0.342 (0.236)	Data 0.001 (0.009)	Loss 4.3898 (4.5703)	Top-1 acc 22.656 (20.478)	Top-5 acc 46.875 (41.497)	lr 0.02746
Warmup Train [19][2430/3239]	Time 0.256 (0.236)	Data 0.001 (0.009)	Loss 4.6346 (4.5702)	Top-1 acc 21.484 (20.478)	Top-5 acc 41.406 (41.500)	lr 0.02745
Warmup Train [19][2440/3239]	Time 0.225 (0.235)	Data 0.001 (0.009)	Loss 4.6918 (4.5702)	Top-1 acc 16.016 (20.480)	Top-5 acc 38.672 (41.501)	lr 0.02744
Warmup Train [19][2450/3239]	Time 0.209 (0.235)	Data 0.025 (0.009)	Loss 4.4164 (4.5702)	Top-1 acc 22.656 (20.481)	Top-5 acc 44.922 (41.501)	lr 0.02744
Warmup Train [19][2460/3239]	Time 0.263 (0.235)	Data 0.001 (0.009)	Loss 4.3875 (4.5700)	Top-1 acc 25.000 (20.486)	Top-5 acc 45.312 (41.506)	lr 0.02743
Warmup Train [19][2470/3239]	Time 0.225 (0.235)	Data 0.001 (0.009)	Loss 4.5711 (4.5699)	Top-1 acc 19.141 (20.485)	Top-5 acc 40.625 (41.509)	lr 0.02743
Warmup Train [19][2480/3239]	Time 0.240 (0.235)	Data 0.002 (0.009)	Loss 4.5430 (4.5696)	Top-1 acc 20.703 (20.489)	Top-5 acc 43.750 (41.513)	lr 0.02742
Warmup Train [19][2490/3239]	Time 0.263 (0.235)	Data 0.001 (0.009)	Loss 4.4879 (4.5695)	Top-1 acc 20.312 (20.492)	Top-5 acc 46.094 (41.517)	lr 0.02741
Warmup Train [19][2500/3239]	Time 0.216 (0.235)	Data 0.029 (0.009)	Loss 4.5392 (4.5694)	Top-1 acc 16.406 (20.493)	Top-5 acc 39.844 (41.521)	lr 0.02741
Warmup Train [19][2510/3239]	Time 0.184 (0.235)	Data 0.001 (0.009)	Loss 4.7761 (4.5694)	Top-1 acc 16.016 (20.497)	Top-5 acc 39.844 (41.524)	lr 0.02740
Warmup Train [19][2520/3239]	Time 0.193 (0.235)	Data 0.001 (0.009)	Loss 4.5525 (4.5691)	Top-1 acc 22.656 (20.504)	Top-5 acc 39.453 (41.531)	lr 0.02740
Warmup Train [19][2530/3239]	Time 0.185 (0.235)	Data 0.001 (0.009)	Loss 4.6159 (4.5689)	Top-1 acc 22.656 (20.512)	Top-5 acc 40.625 (41.535)	lr 0.02739
Warmup Train [19][2540/3239]	Time 0.257 (0.235)	Data 0.001 (0.009)	Loss 4.6405 (4.5689)	Top-1 acc 14.453 (20.513)	Top-5 acc 38.281 (41.534)	lr 0.02738
Warmup Train [19][2550/3239]	Time 0.213 (0.235)	Data 0.001 (0.009)	Loss 4.7353 (4.5688)	Top-1 acc 19.531 (20.514)	Top-5 acc 37.500 (41.537)	lr 0.02738
Warmup Train [19][2560/3239]	Time 0.242 (0.235)	Data 0.001 (0.009)	Loss 4.7033 (4.5687)	Top-1 acc 17.578 (20.514)	Top-5 acc 37.500 (41.536)	lr 0.02737
Warmup Train [19][2570/3239]	Time 0.208 (0.235)	Data 0.002 (0.009)	Loss 4.3477 (4.5685)	Top-1 acc 23.828 (20.518)	Top-5 acc 45.312 (41.544)	lr 0.02737
Warmup Train [19][2580/3239]	Time 0.176 (0.235)	Data 0.001 (0.009)	Loss 4.3596 (4.5683)	Top-1 acc 21.875 (20.519)	Top-5 acc 44.922 (41.544)	lr 0.02736
Warmup Train [19][2590/3239]	Time 0.268 (0.235)	Data 0.001 (0.009)	Loss 4.4560 (4.5682)	Top-1 acc 21.094 (20.521)	Top-5 acc 45.312 (41.548)	lr 0.02735
Warmup Train [19][2600/3239]	Time 0.233 (0.235)	Data 0.001 (0.009)	Loss 4.8167 (4.5682)	Top-1 acc 15.234 (20.518)	Top-5 acc 37.109 (41.546)	lr 0.02735
Warmup Train [19][2610/3239]	Time 0.162 (0.235)	Data 0.001 (0.009)	Loss 4.5972 (4.5682)	Top-1 acc 20.703 (20.520)	Top-5 acc 45.312 (41.547)	lr 0.02734
Warmup Train [19][2620/3239]	Time 0.205 (0.235)	Data 0.002 (0.009)	Loss 4.4833 (4.5681)	Top-1 acc 21.094 (20.524)	Top-5 acc 42.188 (41.550)	lr 0.02734
Warmup Train [19][2630/3239]	Time 0.284 (0.235)	Data 0.001 (0.009)	Loss 4.7326 (4.5683)	Top-1 acc 17.188 (20.521)	Top-5 acc 38.281 (41.544)	lr 0.02733
Warmup Train [19][2640/3239]	Time 0.187 (0.235)	Data 0.002 (0.008)	Loss 4.4802 (4.5681)	Top-1 acc 22.656 (20.523)	Top-5 acc 40.234 (41.544)	lr 0.02732
Warmup Train [19][2650/3239]	Time 0.212 (0.235)	Data 0.001 (0.008)	Loss 4.6793 (4.5680)	Top-1 acc 19.531 (20.521)	Top-5 acc 38.281 (41.545)	lr 0.02732
Warmup Train [19][2660/3239]	Time 0.238 (0.235)	Data 0.001 (0.008)	Loss 4.5883 (4.5679)	Top-1 acc 23.438 (20.523)	Top-5 acc 43.359 (41.551)	lr 0.02731
Warmup Train [19][2670/3239]	Time 0.184 (0.235)	Data 0.002 (0.008)	Loss 4.5775 (4.5680)	Top-1 acc 19.922 (20.521)	Top-5 acc 40.625 (41.546)	lr 0.02731
Warmup Train [19][2680/3239]	Time 0.247 (0.235)	Data 0.002 (0.008)	Loss 4.4107 (4.5680)	Top-1 acc 21.094 (20.522)	Top-5 acc 45.312 (41.546)	lr 0.02730
Warmup Train [19][2690/3239]	Time 0.232 (0.235)	Data 0.001 (0.008)	Loss 4.6089 (4.5677)	Top-1 acc 19.141 (20.530)	Top-5 acc 40.625 (41.554)	lr 0.02729
Warmup Train [19][2700/3239]	Time 0.299 (0.235)	Data 0.001 (0.008)	Loss 4.5515 (4.5677)	Top-1 acc 23.047 (20.533)	Top-5 acc 43.750 (41.558)	lr 0.02729
Warmup Train [19][2710/3239]	Time 0.254 (0.235)	Data 0.001 (0.008)	Loss 4.7496 (4.5677)	Top-1 acc 17.969 (20.534)	Top-5 acc 36.719 (41.556)	lr 0.02728
Warmup Train [19][2720/3239]	Time 0.206 (0.235)	Data 0.001 (0.008)	Loss 4.5600 (4.5676)	Top-1 acc 23.828 (20.541)	Top-5 acc 41.406 (41.556)	lr 0.02728
Warmup Train [19][2730/3239]	Time 0.265 (0.235)	Data 0.002 (0.008)	Loss 4.6047 (4.5674)	Top-1 acc 20.312 (20.549)	Top-5 acc 41.797 (41.563)	lr 0.02727
Warmup Train [19][2740/3239]	Time 0.177 (0.235)	Data 0.001 (0.008)	Loss 4.6021 (4.5674)	Top-1 acc 20.703 (20.551)	Top-5 acc 41.406 (41.564)	lr 0.02726
Warmup Train [19][2750/3239]	Time 0.151 (0.234)	Data 0.001 (0.008)	Loss 4.5817 (4.5673)	Top-1 acc 19.141 (20.550)	Top-5 acc 38.281 (41.563)	lr 0.02726
Warmup Train [19][2760/3239]	Time 0.204 (0.234)	Data 0.001 (0.008)	Loss 4.4565 (4.5672)	Top-1 acc 19.531 (20.551)	Top-5 acc 41.797 (41.564)	lr 0.02725
Warmup Train [19][2770/3239]	Time 0.256 (0.234)	Data 0.001 (0.008)	Loss 4.4508 (4.5670)	Top-1 acc 24.219 (20.558)	Top-5 acc 42.578 (41.569)	lr 0.02725
Warmup Train [19][2780/3239]	Time 0.271 (0.234)	Data 0.001 (0.008)	Loss 4.6396 (4.5669)	Top-1 acc 18.750 (20.555)	Top-5 acc 40.234 (41.570)	lr 0.02724
Warmup Train [19][2790/3239]	Time 0.229 (0.234)	Data 0.002 (0.008)	Loss 4.7298 (4.5669)	Top-1 acc 16.406 (20.558)	Top-5 acc 40.234 (41.573)	lr 0.02723
Warmup Train [19][2800/3239]	Time 0.218 (0.234)	Data 0.001 (0.008)	Loss 4.4959 (4.5669)	Top-1 acc 20.703 (20.560)	Top-5 acc 41.406 (41.570)	lr 0.02723
Warmup Train [19][2810/3239]	Time 0.243 (0.234)	Data 0.001 (0.008)	Loss 4.6260 (4.5669)	Top-1 acc 19.531 (20.560)	Top-5 acc 38.281 (41.570)	lr 0.02722
Warmup Train [19][2820/3239]	Time 0.217 (0.234)	Data 0.001 (0.008)	Loss 4.6088 (4.5669)	Top-1 acc 19.531 (20.558)	Top-5 acc 37.500 (41.569)	lr 0.02722
Warmup Train [19][2830/3239]	Time 0.225 (0.234)	Data 0.001 (0.008)	Loss 4.5076 (4.5669)	Top-1 acc 21.094 (20.560)	Top-5 acc 38.672 (41.567)	lr 0.02721
Warmup Train [19][2840/3239]	Time 0.200 (0.234)	Data 0.001 (0.008)	Loss 4.6559 (4.5670)	Top-1 acc 19.922 (20.558)	Top-5 acc 36.719 (41.564)	lr 0.02720
Warmup Train [19][2850/3239]	Time 0.201 (0.234)	Data 0.001 (0.008)	Loss 4.5544 (4.5668)	Top-1 acc 22.266 (20.561)	Top-5 acc 42.578 (41.566)	lr 0.02720
Warmup Train [19][2860/3239]	Time 0.192 (0.234)	Data 0.001 (0.008)	Loss 4.3743 (4.5664)	Top-1 acc 24.219 (20.570)	Top-5 acc 46.484 (41.573)	lr 0.02719
Warmup Train [19][2870/3239]	Time 0.270 (0.234)	Data 0.002 (0.008)	Loss 4.5567 (4.5663)	Top-1 acc 18.750 (20.569)	Top-5 acc 39.844 (41.574)	lr 0.02718
Warmup Train [19][2880/3239]	Time 0.173 (0.234)	Data 0.001 (0.008)	Loss 4.6080 (4.5663)	Top-1 acc 18.750 (20.571)	Top-5 acc 38.281 (41.577)	lr 0.02718
Warmup Train [19][2890/3239]	Time 0.207 (0.234)	Data 0.001 (0.008)	Loss 4.5551 (4.5662)	Top-1 acc 22.266 (20.574)	Top-5 acc 42.578 (41.577)	lr 0.02717
Warmup Train [19][2900/3239]	Time 0.323 (0.234)	Data 0.001 (0.008)	Loss 4.6460 (4.5659)	Top-1 acc 14.844 (20.576)	Top-5 acc 39.062 (41.582)	lr 0.02717
Warmup Train [19][2910/3239]	Time 0.231 (0.234)	Data 0.001 (0.008)	Loss 4.5126 (4.5658)	Top-1 acc 23.438 (20.580)	Top-5 acc 38.672 (41.581)	lr 0.02716
Warmup Train [19][2920/3239]	Time 0.187 (0.234)	Data 0.001 (0.008)	Loss 4.6378 (4.5657)	Top-1 acc 21.094 (20.582)	Top-5 acc 44.141 (41.587)	lr 0.02715
Warmup Train [19][2930/3239]	Time 0.172 (0.234)	Data 0.001 (0.008)	Loss 4.6174 (4.5657)	Top-1 acc 16.406 (20.578)	Top-5 acc 41.406 (41.586)	lr 0.02715
Warmup Train [19][2940/3239]	Time 0.212 (0.234)	Data 0.001 (0.008)	Loss 4.5245 (4.5654)	Top-1 acc 19.922 (20.583)	Top-5 acc 43.359 (41.593)	lr 0.02714
Warmup Train [19][2950/3239]	Time 0.215 (0.234)	Data 0.001 (0.008)	Loss 4.5354 (4.5652)	Top-1 acc 20.703 (20.587)	Top-5 acc 44.531 (41.601)	lr 0.02714
Warmup Train [19][2960/3239]	Time 0.263 (0.234)	Data 0.002 (0.008)	Loss 4.4784 (4.5650)	Top-1 acc 26.562 (20.592)	Top-5 acc 45.703 (41.605)	lr 0.02713
Warmup Train [19][2970/3239]	Time 0.241 (0.234)	Data 0.002 (0.008)	Loss 4.4583 (4.5650)	Top-1 acc 21.094 (20.596)	Top-5 acc 44.531 (41.605)	lr 0.02712
Warmup Train [19][2980/3239]	Time 0.189 (0.234)	Data 0.002 (0.008)	Loss 4.6325 (4.5648)	Top-1 acc 18.359 (20.598)	Top-5 acc 41.406 (41.608)	lr 0.02712
Warmup Train [19][2990/3239]	Time 0.231 (0.234)	Data 0.002 (0.008)	Loss 4.4518 (4.5647)	Top-1 acc 23.438 (20.601)	Top-5 acc 43.359 (41.610)	lr 0.02711
Warmup Train [19][3000/3239]	Time 0.337 (0.234)	Data 0.001 (0.008)	Loss 4.5992 (4.5646)	Top-1 acc 23.828 (20.600)	Top-5 acc 43.359 (41.610)	lr 0.02711
Warmup Train [19][3010/3239]	Time 0.184 (0.234)	Data 0.001 (0.008)	Loss 4.6397 (4.5646)	Top-1 acc 17.578 (20.603)	Top-5 acc 36.719 (41.611)	lr 0.02710
Warmup Train [19][3020/3239]	Time 0.162 (0.234)	Data 0.001 (0.008)	Loss 4.6428 (4.5645)	Top-1 acc 22.266 (20.603)	Top-5 acc 42.188 (41.607)	lr 0.02709
Warmup Train [19][3030/3239]	Time 0.207 (0.234)	Data 0.002 (0.008)	Loss 4.3550 (4.5645)	Top-1 acc 22.656 (20.601)	Top-5 acc 45.703 (41.605)	lr 0.02709
Warmup Train [19][3040/3239]	Time 0.153 (0.234)	Data 0.001 (0.008)	Loss 4.6579 (4.5646)	Top-1 acc 17.969 (20.600)	Top-5 acc 37.891 (41.604)	lr 0.02708
Warmup Train [19][3050/3239]	Time 0.193 (0.234)	Data 0.001 (0.008)	Loss 4.4373 (4.5644)	Top-1 acc 21.484 (20.603)	Top-5 acc 44.531 (41.607)	lr 0.02708
Warmup Train [19][3060/3239]	Time 0.211 (0.234)	Data 0.001 (0.008)	Loss 4.3147 (4.5640)	Top-1 acc 24.609 (20.609)	Top-5 acc 46.484 (41.614)	lr 0.02707
Warmup Train [19][3070/3239]	Time 0.206 (0.234)	Data 0.001 (0.008)	Loss 4.8031 (4.5639)	Top-1 acc 16.406 (20.610)	Top-5 acc 35.938 (41.619)	lr 0.02706
Warmup Train [19][3080/3239]	Time 0.187 (0.234)	Data 0.001 (0.008)	Loss 4.4655 (4.5638)	Top-1 acc 26.562 (20.614)	Top-5 acc 41.406 (41.622)	lr 0.02706
Warmup Train [19][3090/3239]	Time 0.230 (0.234)	Data 0.001 (0.008)	Loss 4.6463 (4.5637)	Top-1 acc 20.703 (20.615)	Top-5 acc 40.625 (41.626)	lr 0.02705
Warmup Train [19][3100/3239]	Time 0.236 (0.234)	Data 0.001 (0.008)	Loss 4.4222 (4.5636)	Top-1 acc 21.875 (20.619)	Top-5 acc 43.750 (41.633)	lr 0.02705
Warmup Train [19][3110/3239]	Time 0.204 (0.234)	Data 0.001 (0.008)	Loss 4.4899 (4.5636)	Top-1 acc 19.531 (20.618)	Top-5 acc 42.188 (41.633)	lr 0.02704
Warmup Train [19][3120/3239]	Time 0.198 (0.234)	Data 0.001 (0.008)	Loss 4.5796 (4.5634)	Top-1 acc 23.047 (20.622)	Top-5 acc 41.797 (41.636)	lr 0.02703
Warmup Train [19][3130/3239]	Time 0.220 (0.234)	Data 0.001 (0.008)	Loss 4.2985 (4.5631)	Top-1 acc 23.438 (20.629)	Top-5 acc 46.875 (41.646)	lr 0.02703
Warmup Train [19][3140/3239]	Time 0.243 (0.233)	Data 0.001 (0.008)	Loss 4.6614 (4.5630)	Top-1 acc 18.750 (20.631)	Top-5 acc 39.844 (41.649)	lr 0.02702
Warmup Train [19][3150/3239]	Time 0.212 (0.233)	Data 0.001 (0.008)	Loss 4.4684 (4.5629)	Top-1 acc 23.438 (20.635)	Top-5 acc 45.312 (41.648)	lr 0.02702
Warmup Train [19][3160/3239]	Time 0.195 (0.233)	Data 0.001 (0.008)	Loss 4.5836 (4.5629)	Top-1 acc 20.312 (20.635)	Top-5 acc 38.281 (41.648)	lr 0.02701
Warmup Train [19][3170/3239]	Time 0.208 (0.233)	Data 0.002 (0.008)	Loss 4.6120 (4.5627)	Top-1 acc 23.828 (20.636)	Top-5 acc 41.016 (41.654)	lr 0.02700
Warmup Train [19][3180/3239]	Time 0.232 (0.233)	Data 0.000 (0.008)	Loss 4.4980 (4.5627)	Top-1 acc 22.266 (20.637)	Top-5 acc 41.797 (41.656)	lr 0.02700
Warmup Train [19][3190/3239]	Time 0.256 (0.233)	Data 0.000 (0.007)	Loss 4.6662 (4.5626)	Top-1 acc 20.703 (20.639)	Top-5 acc 41.016 (41.657)	lr 0.02699
Warmup Train [19][3200/3239]	Time 0.166 (0.233)	Data 0.000 (0.007)	Loss 4.4266 (4.5624)	Top-1 acc 21.094 (20.643)	Top-5 acc 44.922 (41.664)	lr 0.02699
Warmup Train [19][3210/3239]	Time 0.192 (0.233)	Data 0.000 (0.007)	Loss 4.4956 (4.5625)	Top-1 acc 21.484 (20.636)	Top-5 acc 43.359 (41.660)	lr 0.02698
Warmup Train [19][3220/3239]	Time 0.258 (0.233)	Data 0.000 (0.007)	Loss 4.3764 (4.5625)	Top-1 acc 25.781 (20.638)	Top-5 acc 49.219 (41.662)	lr 0.02697
Warmup Train [19][3230/3239]	Time 0.252 (0.233)	Data 0.000 (0.007)	Loss 4.5633 (4.5625)	Top-1 acc 23.047 (20.639)	Top-5 acc 38.672 (41.660)	lr 0.02697
Warmup Train [19][3239/3239]	Time 0.133 (0.233)	Data 0.000 (0.007)	Loss 4.6182 (4.5624)	Top-1 acc 17.284 (20.642)	Top-5 acc 41.975 (41.661)	lr 0.02696
==========Warmup Valid [19/40]	loss 3.604	top-1 acc 27.326	top-5 acc 50.944	Train top-1 20.642	top-5 41.661	flops: 442.4M
Warmup Train [20][0/3239]	Time 17.990 (17.990)	Data 17.272 (17.272)	Loss 4.7009 (4.7009)	Top-1 acc 16.406 (16.406)	Top-5 acc 37.500 (37.500)	lr 0.02696
Warmup Train [20][10/3239]	Time 0.268 (1.919)	Data 0.001 (1.572)	Loss 4.5535 (4.5666)	Top-1 acc 18.359 (20.703)	Top-5 acc 40.625 (42.045)	lr 0.02696
Warmup Train [20][20/3239]	Time 0.201 (1.128)	Data 0.002 (0.826)	Loss 4.5389 (4.5630)	Top-1 acc 21.094 (20.294)	Top-5 acc 44.141 (41.983)	lr 0.02695
Warmup Train [20][30/3239]	Time 0.259 (0.845)	Data 0.001 (0.560)	Loss 4.7695 (4.5719)	Top-1 acc 19.922 (20.426)	Top-5 acc 37.109 (41.872)	lr 0.02694
Warmup Train [20][40/3239]	Time 0.286 (0.698)	Data 0.003 (0.424)	Loss 4.5910 (4.5543)	Top-1 acc 21.875 (20.665)	Top-5 acc 39.453 (42.159)	lr 0.02694
Warmup Train [20][50/3239]	Time 0.238 (0.608)	Data 0.001 (0.341)	Loss 4.5072 (4.5489)	Top-1 acc 20.703 (20.596)	Top-5 acc 41.016 (42.172)	lr 0.02693
Warmup Train [20][60/3239]	Time 0.257 (0.547)	Data 0.001 (0.286)	Loss 4.3711 (4.5361)	Top-1 acc 23.828 (20.921)	Top-5 acc 44.531 (42.501)	lr 0.02693
Warmup Train [20][70/3239]	Time 0.345 (0.504)	Data 0.001 (0.246)	Loss 4.4960 (4.5266)	Top-1 acc 19.922 (20.918)	Top-5 acc 44.141 (42.760)	lr 0.02692
Warmup Train [20][80/3239]	Time 0.271 (0.470)	Data 0.003 (0.216)	Loss 4.3642 (4.5195)	Top-1 acc 25.781 (21.089)	Top-5 acc 47.266 (42.964)	lr 0.02691
Warmup Train [20][90/3239]	Time 0.244 (0.442)	Data 0.001 (0.192)	Loss 4.4477 (4.5140)	Top-1 acc 20.703 (21.192)	Top-5 acc 43.750 (43.050)	lr 0.02691
Warmup Train [20][100/3239]	Time 0.190 (0.422)	Data 0.001 (0.174)	Loss 4.7726 (4.5149)	Top-1 acc 17.188 (21.206)	Top-5 acc 37.891 (43.027)	lr 0.02690
Warmup Train [20][110/3239]	Time 0.238 (0.404)	Data 0.001 (0.158)	Loss 4.7764 (4.5155)	Top-1 acc 15.625 (21.069)	Top-5 acc 35.938 (43.011)	lr 0.02690
Warmup Train [20][120/3239]	Time 0.222 (0.390)	Data 0.001 (0.145)	Loss 4.7370 (4.5171)	Top-1 acc 16.797 (21.094)	Top-5 acc 41.406 (43.001)	lr 0.02689
Warmup Train [20][130/3239]	Time 0.237 (0.377)	Data 0.001 (0.134)	Loss 4.5417 (4.5168)	Top-1 acc 22.656 (21.067)	Top-5 acc 42.188 (42.999)	lr 0.02688
Warmup Train [20][140/3239]	Time 0.213 (0.366)	Data 0.001 (0.125)	Loss 4.5515 (4.5178)	Top-1 acc 25.000 (21.130)	Top-5 acc 41.406 (42.966)	lr 0.02688
Warmup Train [20][150/3239]	Time 0.265 (0.357)	Data 0.001 (0.117)	Loss 4.6089 (4.5196)	Top-1 acc 17.969 (21.102)	Top-5 acc 39.844 (42.943)	lr 0.02687
Warmup Train [20][160/3239]	Time 0.269 (0.348)	Data 0.001 (0.110)	Loss 4.8610 (4.5204)	Top-1 acc 17.969 (21.048)	Top-5 acc 35.156 (42.879)	lr 0.02686
Warmup Train [20][170/3239]	Time 0.199 (0.341)	Data 0.001 (0.104)	Loss 4.4990 (4.5216)	Top-1 acc 19.531 (21.069)	Top-5 acc 41.797 (42.745)	lr 0.02686
Warmup Train [20][180/3239]	Time 0.218 (0.335)	Data 0.001 (0.098)	Loss 4.4176 (4.5212)	Top-1 acc 22.656 (21.040)	Top-5 acc 40.625 (42.775)	lr 0.02685
Warmup Train [20][190/3239]	Time 0.208 (0.328)	Data 0.001 (0.093)	Loss 4.4749 (4.5216)	Top-1 acc 27.734 (21.094)	Top-5 acc 44.531 (42.723)	lr 0.02685
Warmup Train [20][200/3239]	Time 0.251 (0.324)	Data 0.001 (0.089)	Loss 4.5002 (4.5215)	Top-1 acc 22.656 (21.084)	Top-5 acc 44.531 (42.669)	lr 0.02684
Warmup Train [20][210/3239]	Time 0.151 (0.318)	Data 0.001 (0.084)	Loss 4.4942 (4.5233)	Top-1 acc 20.312 (21.020)	Top-5 acc 41.797 (42.621)	lr 0.02683
Warmup Train [20][220/3239]	Time 0.190 (0.314)	Data 0.001 (0.081)	Loss 4.5987 (4.5252)	Top-1 acc 17.969 (20.972)	Top-5 acc 43.359 (42.571)	lr 0.02683
Warmup Train [20][230/3239]	Time 0.239 (0.311)	Data 0.002 (0.077)	Loss 4.5549 (4.5255)	Top-1 acc 16.406 (20.969)	Top-5 acc 40.625 (42.587)	lr 0.02682
Warmup Train [20][240/3239]	Time 0.212 (0.307)	Data 0.002 (0.074)	Loss 4.6323 (4.5244)	Top-1 acc 18.359 (21.027)	Top-5 acc 40.625 (42.627)	lr 0.02682
Warmup Train [20][250/3239]	Time 0.307 (0.304)	Data 0.001 (0.071)	Loss 4.4814 (4.5244)	Top-1 acc 19.531 (21.011)	Top-5 acc 43.750 (42.598)	lr 0.02681
Warmup Train [20][260/3239]	Time 0.207 (0.301)	Data 0.001 (0.069)	Loss 4.4891 (4.5237)	Top-1 acc 24.609 (21.052)	Top-5 acc 45.703 (42.638)	lr 0.02680
Warmup Train [20][270/3239]	Time 0.252 (0.299)	Data 0.001 (0.066)	Loss 4.3698 (4.5221)	Top-1 acc 22.266 (21.078)	Top-5 acc 46.875 (42.665)	lr 0.02680
Warmup Train [20][280/3239]	Time 0.244 (0.297)	Data 0.002 (0.064)	Loss 4.4815 (4.5224)	Top-1 acc 21.484 (21.055)	Top-5 acc 39.453 (42.627)	lr 0.02679
Warmup Train [20][290/3239]	Time 0.196 (0.294)	Data 0.002 (0.062)	Loss 4.5870 (4.5231)	Top-1 acc 19.531 (21.051)	Top-5 acc 42.578 (42.622)	lr 0.02679
Warmup Train [20][300/3239]	Time 0.216 (0.292)	Data 0.001 (0.060)	Loss 4.3942 (4.5207)	Top-1 acc 25.000 (21.090)	Top-5 acc 47.656 (42.705)	lr 0.02678
Warmup Train [20][310/3239]	Time 0.170 (0.289)	Data 0.001 (0.058)	Loss 4.3589 (4.5186)	Top-1 acc 25.000 (21.119)	Top-5 acc 43.750 (42.726)	lr 0.02677
Warmup Train [20][320/3239]	Time 0.223 (0.287)	Data 0.001 (0.056)	Loss 4.4466 (4.5204)	Top-1 acc 22.266 (21.073)	Top-5 acc 44.531 (42.671)	lr 0.02677
Warmup Train [20][330/3239]	Time 0.157 (0.285)	Data 0.002 (0.055)	Loss 4.5907 (4.5225)	Top-1 acc 22.266 (21.035)	Top-5 acc 41.797 (42.616)	lr 0.02676
Warmup Train [20][340/3239]	Time 0.154 (0.283)	Data 0.001 (0.053)	Loss 4.6633 (4.5238)	Top-1 acc 18.750 (21.023)	Top-5 acc 40.625 (42.584)	lr 0.02676
Warmup Train [20][350/3239]	Time 0.276 (0.282)	Data 0.001 (0.052)	Loss 4.5893 (4.5233)	Top-1 acc 18.750 (21.055)	Top-5 acc 40.625 (42.586)	lr 0.02675
Warmup Train [20][360/3239]	Time 0.148 (0.279)	Data 0.001 (0.051)	Loss 4.6572 (4.5264)	Top-1 acc 17.969 (21.027)	Top-5 acc 39.453 (42.525)	lr 0.02674
Warmup Train [20][370/3239]	Time 0.309 (0.278)	Data 0.001 (0.049)	Loss 4.5070 (4.5260)	Top-1 acc 23.047 (21.042)	Top-5 acc 40.625 (42.509)	lr 0.02674
Warmup Train [20][380/3239]	Time 0.245 (0.277)	Data 0.001 (0.048)	Loss 4.5584 (4.5257)	Top-1 acc 22.266 (21.074)	Top-5 acc 41.797 (42.526)	lr 0.02673
Warmup Train [20][390/3239]	Time 0.193 (0.276)	Data 0.001 (0.047)	Loss 4.6649 (4.5261)	Top-1 acc 16.016 (21.074)	Top-5 acc 39.062 (42.533)	lr 0.02673
Warmup Train [20][400/3239]	Time 0.157 (0.274)	Data 0.001 (0.046)	Loss 4.4837 (4.5266)	Top-1 acc 23.828 (21.067)	Top-5 acc 41.797 (42.517)	lr 0.02672
Warmup Train [20][410/3239]	Time 0.191 (0.273)	Data 0.001 (0.045)	Loss 4.3783 (4.5261)	Top-1 acc 25.000 (21.106)	Top-5 acc 44.531 (42.507)	lr 0.02671
Warmup Train [20][420/3239]	Time 0.229 (0.272)	Data 0.001 (0.044)	Loss 4.6772 (4.5264)	Top-1 acc 17.578 (21.118)	Top-5 acc 39.453 (42.469)	lr 0.02671
Warmup Train [20][430/3239]	Time 0.215 (0.271)	Data 0.001 (0.043)	Loss 4.5799 (4.5251)	Top-1 acc 21.094 (21.147)	Top-5 acc 41.406 (42.503)	lr 0.02670
Warmup Train [20][440/3239]	Time 0.169 (0.269)	Data 0.001 (0.042)	Loss 4.5274 (4.5259)	Top-1 acc 17.188 (21.141)	Top-5 acc 41.016 (42.473)	lr 0.02670
Warmup Train [20][450/3239]	Time 0.222 (0.268)	Data 0.001 (0.041)	Loss 4.5346 (4.5257)	Top-1 acc 17.969 (21.143)	Top-5 acc 42.969 (42.479)	lr 0.02669
Warmup Train [20][460/3239]	Time 0.190 (0.267)	Data 0.001 (0.040)	Loss 4.4869 (4.5256)	Top-1 acc 24.609 (21.145)	Top-5 acc 44.141 (42.467)	lr 0.02668
Warmup Train [20][470/3239]	Time 0.297 (0.267)	Data 0.001 (0.040)	Loss 4.4269 (4.5239)	Top-1 acc 20.703 (21.163)	Top-5 acc 45.312 (42.500)	lr 0.02668
Warmup Train [20][480/3239]	Time 0.353 (0.266)	Data 0.001 (0.039)	Loss 4.6048 (4.5242)	Top-1 acc 19.531 (21.154)	Top-5 acc 40.625 (42.506)	lr 0.02667
Warmup Train [20][490/3239]	Time 0.236 (0.265)	Data 0.001 (0.038)	Loss 4.5150 (4.5254)	Top-1 acc 26.172 (21.132)	Top-5 acc 43.750 (42.488)	lr 0.02667
Warmup Train [20][500/3239]	Time 0.205 (0.264)	Data 0.001 (0.037)	Loss 4.3796 (4.5235)	Top-1 acc 25.391 (21.165)	Top-5 acc 46.484 (42.540)	lr 0.02666
Warmup Train [20][510/3239]	Time 0.177 (0.264)	Data 0.001 (0.037)	Loss 4.3998 (4.5241)	Top-1 acc 22.656 (21.144)	Top-5 acc 48.047 (42.515)	lr 0.02665
Warmup Train [20][520/3239]	Time 0.246 (0.263)	Data 0.001 (0.036)	Loss 4.5508 (4.5238)	Top-1 acc 23.438 (21.160)	Top-5 acc 42.969 (42.526)	lr 0.02665
Warmup Train [20][530/3239]	Time 0.233 (0.262)	Data 0.001 (0.035)	Loss 4.3588 (4.5237)	Top-1 acc 25.781 (21.163)	Top-5 acc 46.094 (42.536)	lr 0.02664
Warmup Train [20][540/3239]	Time 0.239 (0.261)	Data 0.001 (0.035)	Loss 4.3083 (4.5234)	Top-1 acc 25.781 (21.190)	Top-5 acc 43.750 (42.549)	lr 0.02664
Warmup Train [20][550/3239]	Time 0.254 (0.261)	Data 0.002 (0.034)	Loss 4.4487 (4.5232)	Top-1 acc 20.312 (21.185)	Top-5 acc 45.703 (42.559)	lr 0.02663
Warmup Train [20][560/3239]	Time 0.246 (0.260)	Data 0.001 (0.034)	Loss 4.3132 (4.5235)	Top-1 acc 26.172 (21.172)	Top-5 acc 47.656 (42.538)	lr 0.02662
Warmup Train [20][570/3239]	Time 0.194 (0.260)	Data 0.001 (0.033)	Loss 4.4511 (4.5228)	Top-1 acc 24.219 (21.190)	Top-5 acc 42.969 (42.556)	lr 0.02662
Warmup Train [20][580/3239]	Time 0.370 (0.260)	Data 0.002 (0.033)	Loss 4.4483 (4.5220)	Top-1 acc 22.656 (21.195)	Top-5 acc 44.141 (42.594)	lr 0.02661
Warmup Train [20][590/3239]	Time 0.204 (0.259)	Data 0.001 (0.032)	Loss 4.5892 (4.5225)	Top-1 acc 18.359 (21.170)	Top-5 acc 42.578 (42.595)	lr 0.02660
Warmup Train [20][600/3239]	Time 0.209 (0.258)	Data 0.001 (0.032)	Loss 4.5342 (4.5227)	Top-1 acc 21.875 (21.187)	Top-5 acc 37.109 (42.600)	lr 0.02660
Warmup Train [20][610/3239]	Time 0.241 (0.258)	Data 0.001 (0.031)	Loss 4.5282 (4.5224)	Top-1 acc 21.484 (21.205)	Top-5 acc 41.797 (42.605)	lr 0.02659
Warmup Train [20][620/3239]	Time 0.241 (0.257)	Data 0.001 (0.031)	Loss 4.4924 (4.5219)	Top-1 acc 21.875 (21.205)	Top-5 acc 42.578 (42.615)	lr 0.02659
Warmup Train [20][630/3239]	Time 0.196 (0.257)	Data 0.001 (0.030)	Loss 4.5356 (4.5218)	Top-1 acc 21.094 (21.211)	Top-5 acc 40.625 (42.614)	lr 0.02658
Warmup Train [20][640/3239]	Time 0.159 (0.256)	Data 0.002 (0.030)	Loss 4.5437 (4.5220)	Top-1 acc 21.484 (21.217)	Top-5 acc 41.406 (42.598)	lr 0.02657
Warmup Train [20][650/3239]	Time 0.249 (0.256)	Data 0.001 (0.029)	Loss 4.5686 (4.5214)	Top-1 acc 21.875 (21.222)	Top-5 acc 43.359 (42.612)	lr 0.02657
Warmup Train [20][660/3239]	Time 0.231 (0.256)	Data 0.001 (0.029)	Loss 4.4737 (4.5214)	Top-1 acc 19.531 (21.216)	Top-5 acc 41.016 (42.614)	lr 0.02656
Warmup Train [20][670/3239]	Time 0.313 (0.255)	Data 0.001 (0.029)	Loss 4.5203 (4.5211)	Top-1 acc 19.531 (21.225)	Top-5 acc 39.453 (42.622)	lr 0.02656
Warmup Train [20][680/3239]	Time 0.181 (0.255)	Data 0.001 (0.028)	Loss 4.4298 (4.5202)	Top-1 acc 23.047 (21.238)	Top-5 acc 44.922 (42.646)	lr 0.02655
Warmup Train [20][690/3239]	Time 0.262 (0.255)	Data 0.001 (0.028)	Loss 4.5336 (4.5198)	Top-1 acc 17.969 (21.244)	Top-5 acc 41.406 (42.656)	lr 0.02654
Warmup Train [20][700/3239]	Time 0.195 (0.254)	Data 0.001 (0.027)	Loss 4.4299 (4.5199)	Top-1 acc 23.047 (21.244)	Top-5 acc 43.359 (42.640)	lr 0.02654
Warmup Train [20][710/3239]	Time 0.227 (0.254)	Data 0.001 (0.027)	Loss 4.7298 (4.5200)	Top-1 acc 19.531 (21.248)	Top-5 acc 34.766 (42.652)	lr 0.02653
Warmup Train [20][720/3239]	Time 0.258 (0.253)	Data 0.001 (0.027)	Loss 4.5529 (4.5204)	Top-1 acc 17.578 (21.249)	Top-5 acc 43.750 (42.666)	lr 0.02653
Warmup Train [20][730/3239]	Time 0.272 (0.253)	Data 0.001 (0.026)	Loss 4.6078 (4.5200)	Top-1 acc 19.141 (21.250)	Top-5 acc 40.625 (42.683)	lr 0.02652
Warmup Train [20][740/3239]	Time 0.237 (0.253)	Data 0.001 (0.026)	Loss 4.3530 (4.5204)	Top-1 acc 25.000 (21.251)	Top-5 acc 44.922 (42.683)	lr 0.02651
Warmup Train [20][750/3239]	Time 0.138 (0.252)	Data 0.001 (0.026)	Loss 4.6135 (4.5205)	Top-1 acc 17.188 (21.242)	Top-5 acc 39.844 (42.674)	lr 0.02651
Warmup Train [20][760/3239]	Time 0.185 (0.252)	Data 0.001 (0.026)	Loss 4.2135 (4.5196)	Top-1 acc 23.438 (21.264)	Top-5 acc 49.219 (42.693)	lr 0.02650
Warmup Train [20][770/3239]	Time 0.306 (0.252)	Data 0.001 (0.025)	Loss 4.3904 (4.5195)	Top-1 acc 21.094 (21.271)	Top-5 acc 43.359 (42.674)	lr 0.02650
Warmup Train [20][780/3239]	Time 0.270 (0.251)	Data 0.001 (0.025)	Loss 4.5242 (4.5204)	Top-1 acc 20.312 (21.255)	Top-5 acc 42.969 (42.649)	lr 0.02649
Warmup Train [20][790/3239]	Time 0.215 (0.251)	Data 0.001 (0.025)	Loss 4.5402 (4.5207)	Top-1 acc 17.969 (21.251)	Top-5 acc 41.406 (42.625)	lr 0.02648
Warmup Train [20][800/3239]	Time 0.211 (0.251)	Data 0.002 (0.024)	Loss 4.5774 (4.5209)	Top-1 acc 22.266 (21.245)	Top-5 acc 42.578 (42.622)	lr 0.02648
Warmup Train [20][810/3239]	Time 0.228 (0.251)	Data 0.002 (0.024)	Loss 4.6512 (4.5211)	Top-1 acc 17.188 (21.242)	Top-5 acc 37.891 (42.613)	lr 0.02647
Warmup Train [20][820/3239]	Time 0.177 (0.250)	Data 0.002 (0.024)	Loss 4.5079 (4.5218)	Top-1 acc 21.484 (21.219)	Top-5 acc 43.750 (42.589)	lr 0.02647
Warmup Train [20][830/3239]	Time 0.216 (0.250)	Data 0.001 (0.024)	Loss 4.3860 (4.5216)	Top-1 acc 23.438 (21.225)	Top-5 acc 46.094 (42.597)	lr 0.02646
Warmup Train [20][840/3239]	Time 0.208 (0.250)	Data 0.002 (0.023)	Loss 4.4461 (4.5213)	Top-1 acc 24.219 (21.221)	Top-5 acc 47.266 (42.608)	lr 0.02645
Warmup Train [20][850/3239]	Time 0.163 (0.249)	Data 0.001 (0.023)	Loss 4.5963 (4.5217)	Top-1 acc 19.141 (21.213)	Top-5 acc 39.844 (42.593)	lr 0.02645
Warmup Train [20][860/3239]	Time 0.284 (0.249)	Data 0.001 (0.023)	Loss 4.4497 (4.5221)	Top-1 acc 18.750 (21.200)	Top-5 acc 43.750 (42.566)	lr 0.02644
Warmup Train [20][870/3239]	Time 0.317 (0.249)	Data 0.001 (0.023)	Loss 4.4721 (4.5223)	Top-1 acc 21.875 (21.199)	Top-5 acc 41.016 (42.566)	lr 0.02644
Warmup Train [20][880/3239]	Time 0.146 (0.249)	Data 0.001 (0.023)	Loss 4.4750 (4.5227)	Top-1 acc 24.219 (21.206)	Top-5 acc 45.312 (42.563)	lr 0.02643
Warmup Train [20][890/3239]	Time 0.178 (0.248)	Data 0.002 (0.022)	Loss 4.6991 (4.5227)	Top-1 acc 19.531 (21.220)	Top-5 acc 39.062 (42.557)	lr 0.02642
Warmup Train [20][900/3239]	Time 0.216 (0.248)	Data 0.001 (0.022)	Loss 4.7357 (4.5226)	Top-1 acc 18.750 (21.218)	Top-5 acc 40.234 (42.575)	lr 0.02642
Warmup Train [20][910/3239]	Time 0.264 (0.248)	Data 0.001 (0.022)	Loss 4.2982 (4.5228)	Top-1 acc 28.125 (21.219)	Top-5 acc 49.219 (42.576)	lr 0.02641
Warmup Train [20][920/3239]	Time 0.195 (0.248)	Data 0.002 (0.022)	Loss 4.5199 (4.5225)	Top-1 acc 22.656 (21.221)	Top-5 acc 40.234 (42.578)	lr 0.02641
Warmup Train [20][930/3239]	Time 0.227 (0.247)	Data 0.001 (0.022)	Loss 4.3890 (4.5222)	Top-1 acc 24.609 (21.228)	Top-5 acc 46.484 (42.585)	lr 0.02640
Warmup Train [20][940/3239]	Time 0.178 (0.247)	Data 0.001 (0.021)	Loss 4.5378 (4.5226)	Top-1 acc 19.531 (21.213)	Top-5 acc 42.578 (42.565)	lr 0.02639
Warmup Train [20][950/3239]	Time 0.236 (0.247)	Data 0.001 (0.021)	Loss 4.4188 (4.5225)	Top-1 acc 25.000 (21.219)	Top-5 acc 42.969 (42.560)	lr 0.02639
Warmup Train [20][960/3239]	Time 0.177 (0.247)	Data 0.001 (0.021)	Loss 4.5125 (4.5222)	Top-1 acc 18.750 (21.214)	Top-5 acc 37.891 (42.557)	lr 0.02638
Warmup Train [20][970/3239]	Time 0.361 (0.247)	Data 0.001 (0.021)	Loss 4.4331 (4.5223)	Top-1 acc 23.828 (21.211)	Top-5 acc 48.047 (42.562)	lr 0.02637
Warmup Train [20][980/3239]	Time 0.338 (0.246)	Data 0.001 (0.021)	Loss 4.5147 (4.5223)	Top-1 acc 21.875 (21.222)	Top-5 acc 44.141 (42.571)	lr 0.02637
Warmup Train [20][990/3239]	Time 0.180 (0.246)	Data 0.002 (0.021)	Loss 4.4861 (4.5223)	Top-1 acc 22.266 (21.232)	Top-5 acc 38.672 (42.569)	lr 0.02636
Warmup Train [20][1000/3239]	Time 0.192 (0.246)	Data 0.001 (0.020)	Loss 4.4346 (4.5215)	Top-1 acc 24.219 (21.240)	Top-5 acc 45.312 (42.584)	lr 0.02636
Warmup Train [20][1010/3239]	Time 0.230 (0.246)	Data 0.001 (0.020)	Loss 4.4325 (4.5212)	Top-1 acc 23.828 (21.249)	Top-5 acc 44.922 (42.588)	lr 0.02635
Warmup Train [20][1020/3239]	Time 0.193 (0.246)	Data 0.002 (0.020)	Loss 4.4830 (4.5207)	Top-1 acc 23.047 (21.255)	Top-5 acc 43.359 (42.599)	lr 0.02634
Warmup Train [20][1030/3239]	Time 0.194 (0.245)	Data 0.001 (0.020)	Loss 4.4663 (4.5216)	Top-1 acc 22.266 (21.248)	Top-5 acc 42.188 (42.575)	lr 0.02634
Warmup Train [20][1040/3239]	Time 0.279 (0.245)	Data 0.002 (0.020)	Loss 4.4653 (4.5209)	Top-1 acc 21.094 (21.253)	Top-5 acc 44.141 (42.575)	lr 0.02633
Warmup Train [20][1050/3239]	Time 0.175 (0.245)	Data 0.002 (0.020)	Loss 4.3633 (4.5210)	Top-1 acc 24.219 (21.244)	Top-5 acc 46.484 (42.570)	lr 0.02633
Warmup Train [20][1060/3239]	Time 0.288 (0.245)	Data 0.001 (0.019)	Loss 4.5903 (4.5202)	Top-1 acc 23.438 (21.262)	Top-5 acc 39.062 (42.589)	lr 0.02632
Warmup Train [20][1070/3239]	Time 0.318 (0.245)	Data 0.001 (0.019)	Loss 4.3850 (4.5202)	Top-1 acc 23.047 (21.265)	Top-5 acc 44.141 (42.592)	lr 0.02631
Warmup Train [20][1080/3239]	Time 0.297 (0.245)	Data 0.001 (0.019)	Loss 4.5358 (4.5199)	Top-1 acc 20.703 (21.272)	Top-5 acc 41.406 (42.608)	lr 0.02631
Warmup Train [20][1090/3239]	Time 0.203 (0.245)	Data 0.001 (0.019)	Loss 4.5437 (4.5198)	Top-1 acc 21.484 (21.272)	Top-5 acc 42.969 (42.611)	lr 0.02630
Warmup Train [20][1100/3239]	Time 0.226 (0.244)	Data 0.001 (0.019)	Loss 4.3418 (4.5197)	Top-1 acc 24.609 (21.276)	Top-5 acc 45.312 (42.622)	lr 0.02630
Warmup Train [20][1110/3239]	Time 0.287 (0.244)	Data 0.002 (0.019)	Loss 4.4241 (4.5197)	Top-1 acc 20.312 (21.276)	Top-5 acc 47.266 (42.625)	lr 0.02629
Warmup Train [20][1120/3239]	Time 0.229 (0.244)	Data 0.001 (0.019)	Loss 4.7310 (4.5189)	Top-1 acc 19.141 (21.289)	Top-5 acc 37.109 (42.642)	lr 0.02628
Warmup Train [20][1130/3239]	Time 0.187 (0.244)	Data 0.001 (0.018)	Loss 4.5692 (4.5190)	Top-1 acc 21.875 (21.285)	Top-5 acc 42.969 (42.646)	lr 0.02628
Warmup Train [20][1140/3239]	Time 0.154 (0.244)	Data 0.001 (0.018)	Loss 4.5032 (4.5191)	Top-1 acc 17.578 (21.274)	Top-5 acc 40.625 (42.655)	lr 0.02627
Warmup Train [20][1150/3239]	Time 0.269 (0.244)	Data 0.002 (0.018)	Loss 4.4588 (4.5191)	Top-1 acc 19.531 (21.273)	Top-5 acc 42.969 (42.646)	lr 0.02627
Warmup Train [20][1160/3239]	Time 0.200 (0.244)	Data 0.001 (0.018)	Loss 4.4799 (4.5189)	Top-1 acc 23.047 (21.281)	Top-5 acc 40.234 (42.645)	lr 0.02626
Warmup Train [20][1170/3239]	Time 0.160 (0.244)	Data 0.001 (0.018)	Loss 4.5061 (4.5194)	Top-1 acc 20.703 (21.280)	Top-5 acc 42.188 (42.635)	lr 0.02625
Warmup Train [20][1180/3239]	Time 0.222 (0.243)	Data 0.001 (0.018)	Loss 4.5241 (4.5193)	Top-1 acc 23.047 (21.272)	Top-5 acc 40.625 (42.635)	lr 0.02625
Warmup Train [20][1190/3239]	Time 0.312 (0.243)	Data 0.002 (0.018)	Loss 4.4099 (4.5190)	Top-1 acc 23.047 (21.282)	Top-5 acc 44.141 (42.648)	lr 0.02624
Warmup Train [20][1200/3239]	Time 0.192 (0.243)	Data 0.002 (0.018)	Loss 4.3086 (4.5186)	Top-1 acc 22.656 (21.301)	Top-5 acc 48.438 (42.659)	lr 0.02624
Warmup Train [20][1210/3239]	Time 0.198 (0.243)	Data 0.001 (0.017)	Loss 4.4794 (4.5181)	Top-1 acc 21.484 (21.315)	Top-5 acc 42.969 (42.676)	lr 0.02623
Warmup Train [20][1220/3239]	Time 0.196 (0.243)	Data 0.001 (0.017)	Loss 4.4010 (4.5181)	Top-1 acc 24.609 (21.321)	Top-5 acc 42.188 (42.671)	lr 0.02622
Warmup Train [20][1230/3239]	Time 0.173 (0.243)	Data 0.001 (0.017)	Loss 4.6536 (4.5179)	Top-1 acc 20.312 (21.325)	Top-5 acc 42.188 (42.684)	lr 0.02622
Warmup Train [20][1240/3239]	Time 0.200 (0.243)	Data 0.001 (0.017)	Loss 4.5017 (4.5175)	Top-1 acc 23.828 (21.329)	Top-5 acc 41.797 (42.684)	lr 0.02621
Warmup Train [20][1250/3239]	Time 0.163 (0.242)	Data 0.001 (0.017)	Loss 4.3960 (4.5179)	Top-1 acc 24.219 (21.324)	Top-5 acc 45.312 (42.671)	lr 0.02621
Warmup Train [20][1260/3239]	Time 0.206 (0.242)	Data 0.001 (0.017)	Loss 4.5087 (4.5176)	Top-1 acc 21.094 (21.326)	Top-5 acc 45.703 (42.678)	lr 0.02620
Warmup Train [20][1270/3239]	Time 0.200 (0.242)	Data 0.001 (0.017)	Loss 4.4317 (4.5174)	Top-1 acc 24.609 (21.323)	Top-5 acc 48.828 (42.684)	lr 0.02619
Warmup Train [20][1280/3239]	Time 0.169 (0.242)	Data 0.001 (0.017)	Loss 4.5636 (4.5169)	Top-1 acc 23.438 (21.336)	Top-5 acc 41.797 (42.703)	lr 0.02619
Warmup Train [20][1290/3239]	Time 0.367 (0.242)	Data 0.002 (0.017)	Loss 4.7003 (4.5169)	Top-1 acc 19.531 (21.340)	Top-5 acc 36.328 (42.702)	lr 0.02618
Warmup Train [20][1300/3239]	Time 0.208 (0.242)	Data 0.001 (0.016)	Loss 4.5637 (4.5169)	Top-1 acc 22.266 (21.345)	Top-5 acc 43.359 (42.699)	lr 0.02618
Warmup Train [20][1310/3239]	Time 0.249 (0.242)	Data 0.001 (0.016)	Loss 4.7515 (4.5169)	Top-1 acc 17.578 (21.342)	Top-5 acc 38.672 (42.699)	lr 0.02617
Warmup Train [20][1320/3239]	Time 0.226 (0.242)	Data 0.001 (0.016)	Loss 4.5676 (4.5167)	Top-1 acc 19.141 (21.343)	Top-5 acc 41.016 (42.700)	lr 0.02616
Warmup Train [20][1330/3239]	Time 0.247 (0.241)	Data 0.002 (0.016)	Loss 4.5042 (4.5163)	Top-1 acc 22.266 (21.350)	Top-5 acc 41.016 (42.708)	lr 0.02616
Warmup Train [20][1340/3239]	Time 0.191 (0.241)	Data 0.001 (0.016)	Loss 4.5186 (4.5159)	Top-1 acc 20.703 (21.356)	Top-5 acc 48.047 (42.717)	lr 0.02615
Warmup Train [20][1350/3239]	Time 0.185 (0.241)	Data 0.002 (0.016)	Loss 4.5992 (4.5166)	Top-1 acc 20.312 (21.343)	Top-5 acc 39.062 (42.696)	lr 0.02614
Warmup Train [20][1360/3239]	Time 0.258 (0.241)	Data 0.001 (0.016)	Loss 4.5577 (4.5168)	Top-1 acc 19.922 (21.338)	Top-5 acc 40.625 (42.690)	lr 0.02614
Warmup Train [20][1370/3239]	Time 0.240 (0.241)	Data 0.002 (0.016)	Loss 4.6135 (4.5166)	Top-1 acc 19.531 (21.345)	Top-5 acc 40.625 (42.693)	lr 0.02613
Warmup Train [20][1380/3239]	Time 0.246 (0.241)	Data 0.002 (0.016)	Loss 4.4549 (4.5169)	Top-1 acc 19.141 (21.342)	Top-5 acc 45.703 (42.692)	lr 0.02613
Warmup Train [20][1390/3239]	Time 0.124 (0.241)	Data 0.001 (0.016)	Loss 4.6994 (4.5168)	Top-1 acc 19.922 (21.342)	Top-5 acc 39.844 (42.695)	lr 0.02612
Warmup Train [20][1400/3239]	Time 0.174 (0.241)	Data 0.001 (0.016)	Loss 4.4117 (4.5168)	Top-1 acc 22.656 (21.345)	Top-5 acc 47.266 (42.700)	lr 0.02611
Warmup Train [20][1410/3239]	Time 0.231 (0.241)	Data 0.001 (0.015)	Loss 4.3052 (4.5166)	Top-1 acc 21.875 (21.345)	Top-5 acc 50.000 (42.703)	lr 0.02611
Warmup Train [20][1420/3239]	Time 0.218 (0.240)	Data 0.001 (0.015)	Loss 4.4083 (4.5162)	Top-1 acc 23.828 (21.357)	Top-5 acc 43.359 (42.714)	lr 0.02610
Warmup Train [20][1430/3239]	Time 0.225 (0.240)	Data 0.001 (0.015)	Loss 4.4601 (4.5161)	Top-1 acc 26.562 (21.363)	Top-5 acc 41.406 (42.714)	lr 0.02610
Warmup Train [20][1440/3239]	Time 0.174 (0.240)	Data 0.001 (0.015)	Loss 4.5697 (4.5161)	Top-1 acc 19.141 (21.359)	Top-5 acc 44.141 (42.715)	lr 0.02609
Warmup Train [20][1450/3239]	Time 0.191 (0.240)	Data 0.001 (0.015)	Loss 4.5814 (4.5156)	Top-1 acc 18.359 (21.363)	Top-5 acc 39.844 (42.729)	lr 0.02608
Warmup Train [20][1460/3239]	Time 0.164 (0.240)	Data 0.001 (0.015)	Loss 4.5984 (4.5160)	Top-1 acc 23.047 (21.359)	Top-5 acc 41.406 (42.721)	lr 0.02608
Warmup Train [20][1470/3239]	Time 0.228 (0.240)	Data 0.002 (0.015)	Loss 4.5150 (4.5161)	Top-1 acc 20.312 (21.357)	Top-5 acc 40.625 (42.718)	lr 0.02607
Warmup Train [20][1480/3239]	Time 0.432 (0.240)	Data 0.001 (0.015)	Loss 4.6481 (4.5162)	Top-1 acc 16.797 (21.358)	Top-5 acc 38.672 (42.718)	lr 0.02607
Warmup Train [20][1490/3239]	Time 0.212 (0.240)	Data 0.001 (0.015)	Loss 4.5384 (4.5166)	Top-1 acc 22.266 (21.348)	Top-5 acc 39.062 (42.706)	lr 0.02606
Warmup Train [20][1500/3239]	Time 0.208 (0.240)	Data 0.001 (0.015)	Loss 4.4361 (4.5168)	Top-1 acc 25.000 (21.345)	Top-5 acc 42.969 (42.701)	lr 0.02605
Warmup Train [20][1510/3239]	Time 0.193 (0.239)	Data 0.001 (0.015)	Loss 4.4573 (4.5165)	Top-1 acc 22.656 (21.350)	Top-5 acc 43.750 (42.709)	lr 0.02605
Warmup Train [20][1520/3239]	Time 0.136 (0.239)	Data 0.001 (0.015)	Loss 4.6839 (4.5165)	Top-1 acc 16.016 (21.350)	Top-5 acc 38.672 (42.709)	lr 0.02604
Warmup Train [20][1530/3239]	Time 0.197 (0.239)	Data 0.001 (0.015)	Loss 4.4486 (4.5165)	Top-1 acc 22.656 (21.345)	Top-5 acc 43.359 (42.710)	lr 0.02604
Warmup Train [20][1540/3239]	Time 0.213 (0.239)	Data 0.001 (0.014)	Loss 4.3717 (4.5159)	Top-1 acc 28.125 (21.349)	Top-5 acc 45.703 (42.720)	lr 0.02603
Warmup Train [20][1550/3239]	Time 0.200 (0.239)	Data 0.001 (0.014)	Loss 4.4927 (4.5157)	Top-1 acc 22.656 (21.355)	Top-5 acc 44.141 (42.725)	lr 0.02602
Warmup Train [20][1560/3239]	Time 0.219 (0.239)	Data 0.001 (0.014)	Loss 4.4589 (4.5156)	Top-1 acc 23.047 (21.354)	Top-5 acc 48.047 (42.731)	lr 0.02602
Warmup Train [20][1570/3239]	Time 0.272 (0.239)	Data 0.001 (0.014)	Loss 4.4697 (4.5152)	Top-1 acc 22.656 (21.367)	Top-5 acc 44.141 (42.741)	lr 0.02601
Warmup Train [20][1580/3239]	Time 0.351 (0.239)	Data 0.001 (0.014)	Loss 4.5462 (4.5151)	Top-1 acc 20.703 (21.371)	Top-5 acc 39.453 (42.745)	lr 0.02601
Warmup Train [20][1590/3239]	Time 0.341 (0.239)	Data 0.001 (0.014)	Loss 4.4996 (4.5154)	Top-1 acc 19.531 (21.365)	Top-5 acc 41.797 (42.739)	lr 0.02600
Warmup Train [20][1600/3239]	Time 0.213 (0.239)	Data 0.001 (0.014)	Loss 4.6696 (4.5150)	Top-1 acc 17.578 (21.367)	Top-5 acc 35.156 (42.747)	lr 0.02599
Warmup Train [20][1610/3239]	Time 0.193 (0.239)	Data 0.001 (0.014)	Loss 4.4482 (4.5150)	Top-1 acc 25.000 (21.363)	Top-5 acc 44.141 (42.757)	lr 0.02599
Warmup Train [20][1620/3239]	Time 0.181 (0.238)	Data 0.002 (0.014)	Loss 4.4973 (4.5150)	Top-1 acc 21.094 (21.363)	Top-5 acc 41.016 (42.754)	lr 0.02598
Warmup Train [20][1630/3239]	Time 0.203 (0.238)	Data 0.001 (0.014)	Loss 4.5331 (4.5150)	Top-1 acc 19.531 (21.355)	Top-5 acc 40.625 (42.755)	lr 0.02598
Warmup Train [20][1640/3239]	Time 0.243 (0.238)	Data 0.002 (0.014)	Loss 4.5398 (4.5149)	Top-1 acc 21.484 (21.368)	Top-5 acc 44.141 (42.763)	lr 0.02597
Warmup Train [20][1650/3239]	Time 0.206 (0.238)	Data 0.001 (0.014)	Loss 4.7475 (4.5150)	Top-1 acc 17.188 (21.359)	Top-5 acc 36.328 (42.754)	lr 0.02596
Warmup Train [20][1660/3239]	Time 0.202 (0.238)	Data 0.001 (0.014)	Loss 4.6240 (4.5151)	Top-1 acc 19.922 (21.359)	Top-5 acc 37.500 (42.751)	lr 0.02596
Warmup Train [20][1670/3239]	Time 0.146 (0.238)	Data 0.001 (0.013)	Loss 4.7577 (4.5150)	Top-1 acc 17.188 (21.367)	Top-5 acc 37.891 (42.761)	lr 0.02595
Warmup Train [20][1680/3239]	Time 0.170 (0.238)	Data 0.001 (0.013)	Loss 4.3110 (4.5151)	Top-1 acc 25.781 (21.368)	Top-5 acc 46.094 (42.759)	lr 0.02595
Warmup Train [20][1690/3239]	Time 0.212 (0.238)	Data 0.001 (0.013)	Loss 4.4843 (4.5150)	Top-1 acc 25.391 (21.371)	Top-5 acc 45.312 (42.762)	lr 0.02594
Warmup Train [20][1700/3239]	Time 0.352 (0.238)	Data 0.001 (0.013)	Loss 4.5285 (4.5153)	Top-1 acc 23.828 (21.374)	Top-5 acc 43.750 (42.757)	lr 0.02593
Warmup Train [20][1710/3239]	Time 0.189 (0.238)	Data 0.002 (0.013)	Loss 4.5964 (4.5152)	Top-1 acc 19.141 (21.383)	Top-5 acc 39.453 (42.766)	lr 0.02593
Warmup Train [20][1720/3239]	Time 0.203 (0.238)	Data 0.001 (0.013)	Loss 4.3920 (4.5151)	Top-1 acc 19.922 (21.389)	Top-5 acc 45.312 (42.769)	lr 0.02592
Warmup Train [20][1730/3239]	Time 0.176 (0.237)	Data 0.001 (0.013)	Loss 4.3412 (4.5151)	Top-1 acc 24.219 (21.395)	Top-5 acc 47.266 (42.769)	lr 0.02591
Warmup Train [20][1740/3239]	Time 0.149 (0.237)	Data 0.001 (0.013)	Loss 4.5942 (4.5154)	Top-1 acc 19.922 (21.394)	Top-5 acc 41.797 (42.762)	lr 0.02591
Warmup Train [20][1750/3239]	Time 0.167 (0.237)	Data 0.002 (0.013)	Loss 4.4632 (4.5149)	Top-1 acc 23.438 (21.401)	Top-5 acc 45.312 (42.772)	lr 0.02590
Warmup Train [20][1760/3239]	Time 0.223 (0.237)	Data 0.001 (0.013)	Loss 4.5189 (4.5148)	Top-1 acc 20.703 (21.399)	Top-5 acc 41.797 (42.775)	lr 0.02590
Warmup Train [20][1770/3239]	Time 0.273 (0.237)	Data 0.001 (0.013)	Loss 4.4027 (4.5147)	Top-1 acc 22.656 (21.401)	Top-5 acc 48.438 (42.781)	lr 0.02589
Warmup Train [20][1780/3239]	Time 0.153 (0.237)	Data 0.003 (0.013)	Loss 4.5409 (4.5144)	Top-1 acc 23.047 (21.405)	Top-5 acc 42.578 (42.792)	lr 0.02588
Warmup Train [20][1790/3239]	Time 0.176 (0.237)	Data 0.001 (0.013)	Loss 4.4247 (4.5143)	Top-1 acc 20.312 (21.406)	Top-5 acc 43.359 (42.794)	lr 0.02588
Warmup Train [20][1800/3239]	Time 0.190 (0.237)	Data 0.001 (0.013)	Loss 4.5228 (4.5143)	Top-1 acc 21.484 (21.406)	Top-5 acc 43.359 (42.795)	lr 0.02587
Warmup Train [20][1810/3239]	Time 0.335 (0.237)	Data 0.001 (0.013)	Loss 4.5946 (4.5140)	Top-1 acc 23.438 (21.413)	Top-5 acc 41.016 (42.801)	lr 0.02587
Warmup Train [20][1820/3239]	Time 0.316 (0.237)	Data 0.001 (0.013)	Loss 4.3553 (4.5139)	Top-1 acc 19.531 (21.410)	Top-5 acc 49.219 (42.807)	lr 0.02586
Warmup Train [20][1830/3239]	Time 0.254 (0.237)	Data 0.030 (0.013)	Loss 4.6007 (4.5141)	Top-1 acc 18.359 (21.404)	Top-5 acc 38.281 (42.796)	lr 0.02585
Warmup Train [20][1840/3239]	Time 0.184 (0.237)	Data 0.001 (0.012)	Loss 4.2554 (4.5139)	Top-1 acc 24.609 (21.407)	Top-5 acc 48.047 (42.798)	lr 0.02585
Warmup Train [20][1850/3239]	Time 0.177 (0.237)	Data 0.002 (0.012)	Loss 4.4727 (4.5138)	Top-1 acc 21.484 (21.409)	Top-5 acc 41.406 (42.800)	lr 0.02584
Warmup Train [20][1860/3239]	Time 0.224 (0.237)	Data 0.002 (0.012)	Loss 4.6491 (4.5138)	Top-1 acc 20.703 (21.414)	Top-5 acc 37.500 (42.801)	lr 0.02584
Warmup Train [20][1870/3239]	Time 0.206 (0.236)	Data 0.002 (0.012)	Loss 4.7059 (4.5140)	Top-1 acc 20.312 (21.413)	Top-5 acc 39.062 (42.800)	lr 0.02583
Warmup Train [20][1880/3239]	Time 0.237 (0.236)	Data 0.001 (0.012)	Loss 4.3946 (4.5137)	Top-1 acc 23.047 (21.418)	Top-5 acc 47.656 (42.805)	lr 0.02582
Warmup Train [20][1890/3239]	Time 0.197 (0.236)	Data 0.002 (0.012)	Loss 4.4219 (4.5138)	Top-1 acc 21.484 (21.419)	Top-5 acc 48.828 (42.805)	lr 0.02582
Warmup Train [20][1900/3239]	Time 0.193 (0.236)	Data 0.002 (0.012)	Loss 4.4108 (4.5135)	Top-1 acc 23.828 (21.421)	Top-5 acc 42.969 (42.805)	lr 0.02581
Warmup Train [20][1910/3239]	Time 0.250 (0.236)	Data 0.001 (0.012)	Loss 4.3243 (4.5138)	Top-1 acc 25.000 (21.417)	Top-5 acc 48.828 (42.803)	lr 0.02581
Warmup Train [20][1920/3239]	Time 0.294 (0.236)	Data 0.001 (0.012)	Loss 4.5880 (4.5138)	Top-1 acc 21.875 (21.420)	Top-5 acc 39.844 (42.806)	lr 0.02580
Warmup Train [20][1930/3239]	Time 0.243 (0.236)	Data 0.002 (0.012)	Loss 4.4995 (4.5135)	Top-1 acc 22.266 (21.424)	Top-5 acc 42.188 (42.817)	lr 0.02579
Warmup Train [20][1940/3239]	Time 0.236 (0.236)	Data 0.001 (0.012)	Loss 4.4382 (4.5134)	Top-1 acc 22.266 (21.424)	Top-5 acc 41.406 (42.810)	lr 0.02579
Warmup Train [20][1950/3239]	Time 0.251 (0.236)	Data 0.001 (0.012)	Loss 4.7578 (4.5135)	Top-1 acc 16.797 (21.420)	Top-5 acc 38.281 (42.809)	lr 0.02578
Warmup Train [20][1960/3239]	Time 0.235 (0.236)	Data 0.001 (0.012)	Loss 4.6659 (4.5135)	Top-1 acc 18.750 (21.420)	Top-5 acc 38.281 (42.811)	lr 0.02578
Warmup Train [20][1970/3239]	Time 0.244 (0.236)	Data 0.001 (0.012)	Loss 4.5434 (4.5133)	Top-1 acc 21.875 (21.418)	Top-5 acc 40.625 (42.812)	lr 0.02577
Warmup Train [20][1980/3239]	Time 0.214 (0.236)	Data 0.001 (0.012)	Loss 4.4137 (4.5136)	Top-1 acc 26.562 (21.422)	Top-5 acc 44.922 (42.810)	lr 0.02576
Warmup Train [20][1990/3239]	Time 0.229 (0.236)	Data 0.002 (0.012)	Loss 4.6156 (4.5136)	Top-1 acc 19.922 (21.422)	Top-5 acc 42.578 (42.809)	lr 0.02576
Warmup Train [20][2000/3239]	Time 0.213 (0.236)	Data 0.001 (0.012)	Loss 4.6097 (4.5137)	Top-1 acc 20.312 (21.420)	Top-5 acc 39.844 (42.807)	lr 0.02575
Warmup Train [20][2010/3239]	Time 0.238 (0.236)	Data 0.001 (0.012)	Loss 4.5902 (4.5139)	Top-1 acc 19.531 (21.422)	Top-5 acc 39.453 (42.805)	lr 0.02575
Warmup Train [20][2020/3239]	Time 0.227 (0.235)	Data 0.001 (0.012)	Loss 4.8247 (4.5137)	Top-1 acc 17.578 (21.427)	Top-5 acc 34.375 (42.808)	lr 0.02574
Warmup Train [20][2030/3239]	Time 0.328 (0.236)	Data 0.001 (0.012)	Loss 4.5159 (4.5134)	Top-1 acc 21.484 (21.432)	Top-5 acc 42.188 (42.816)	lr 0.02573
Warmup Train [20][2040/3239]	Time 0.272 (0.235)	Data 0.001 (0.012)	Loss 4.5605 (4.5132)	Top-1 acc 21.875 (21.436)	Top-5 acc 44.531 (42.817)	lr 0.02573
Warmup Train [20][2050/3239]	Time 0.174 (0.235)	Data 0.001 (0.012)	Loss 4.4865 (4.5131)	Top-1 acc 20.703 (21.438)	Top-5 acc 44.141 (42.823)	lr 0.02572
Warmup Train [20][2060/3239]	Time 0.205 (0.235)	Data 0.001 (0.011)	Loss 4.2645 (4.5128)	Top-1 acc 25.781 (21.447)	Top-5 acc 48.438 (42.826)	lr 0.02572
Warmup Train [20][2070/3239]	Time 0.215 (0.235)	Data 0.002 (0.011)	Loss 4.4217 (4.5127)	Top-1 acc 25.781 (21.453)	Top-5 acc 44.531 (42.826)	lr 0.02571
Warmup Train [20][2080/3239]	Time 0.229 (0.235)	Data 0.001 (0.011)	Loss 4.4545 (4.5127)	Top-1 acc 23.047 (21.452)	Top-5 acc 43.750 (42.827)	lr 0.02570
Warmup Train [20][2090/3239]	Time 0.209 (0.235)	Data 0.001 (0.011)	Loss 4.5632 (4.5127)	Top-1 acc 19.531 (21.455)	Top-5 acc 41.797 (42.831)	lr 0.02570
Warmup Train [20][2100/3239]	Time 0.214 (0.235)	Data 0.002 (0.011)	Loss 4.4137 (4.5126)	Top-1 acc 23.047 (21.459)	Top-5 acc 44.531 (42.833)	lr 0.02569
Warmup Train [20][2110/3239]	Time 0.183 (0.235)	Data 0.001 (0.011)	Loss 4.4806 (4.5128)	Top-1 acc 20.703 (21.453)	Top-5 acc 42.188 (42.827)	lr 0.02568
Warmup Train [20][2120/3239]	Time 0.227 (0.235)	Data 0.001 (0.011)	Loss 4.4718 (4.5126)	Top-1 acc 21.094 (21.457)	Top-5 acc 43.359 (42.829)	lr 0.02568
Warmup Train [20][2130/3239]	Time 0.393 (0.235)	Data 0.001 (0.011)	Loss 4.3724 (4.5126)	Top-1 acc 22.656 (21.458)	Top-5 acc 42.188 (42.825)	lr 0.02567
Warmup Train [20][2140/3239]	Time 0.141 (0.235)	Data 0.001 (0.011)	Loss 4.4515 (4.5126)	Top-1 acc 23.438 (21.457)	Top-5 acc 41.406 (42.825)	lr 0.02567
Warmup Train [20][2150/3239]	Time 0.196 (0.235)	Data 0.001 (0.011)	Loss 4.6915 (4.5127)	Top-1 acc 17.578 (21.454)	Top-5 acc 36.719 (42.827)	lr 0.02566
Warmup Train [20][2160/3239]	Time 0.279 (0.235)	Data 0.002 (0.011)	Loss 4.4887 (4.5123)	Top-1 acc 22.266 (21.454)	Top-5 acc 42.188 (42.830)	lr 0.02565
Warmup Train [20][2170/3239]	Time 0.137 (0.235)	Data 0.001 (0.011)	Loss 4.5219 (4.5122)	Top-1 acc 21.484 (21.454)	Top-5 acc 39.453 (42.836)	lr 0.02565
Warmup Train [20][2180/3239]	Time 0.152 (0.235)	Data 0.001 (0.011)	Loss 4.5810 (4.5121)	Top-1 acc 18.359 (21.453)	Top-5 acc 43.750 (42.840)	lr 0.02564
Warmup Train [20][2190/3239]	Time 0.231 (0.235)	Data 0.001 (0.011)	Loss 4.4632 (4.5122)	Top-1 acc 19.922 (21.450)	Top-5 acc 44.141 (42.844)	lr 0.02564
Warmup Train [20][2200/3239]	Time 0.154 (0.235)	Data 0.001 (0.011)	Loss 4.5797 (4.5121)	Top-1 acc 18.359 (21.450)	Top-5 acc 41.406 (42.852)	lr 0.02563
Warmup Train [20][2210/3239]	Time 0.213 (0.235)	Data 0.001 (0.011)	Loss 4.6492 (4.5121)	Top-1 acc 20.703 (21.451)	Top-5 acc 40.234 (42.849)	lr 0.02562
Warmup Train [20][2220/3239]	Time 0.205 (0.235)	Data 0.001 (0.011)	Loss 4.5424 (4.5119)	Top-1 acc 19.922 (21.450)	Top-5 acc 40.625 (42.855)	lr 0.02562
Warmup Train [20][2230/3239]	Time 0.245 (0.235)	Data 0.001 (0.011)	Loss 4.5465 (4.5118)	Top-1 acc 20.312 (21.452)	Top-5 acc 41.797 (42.861)	lr 0.02561
Warmup Train [20][2240/3239]	Time 0.268 (0.234)	Data 0.001 (0.011)	Loss 4.5840 (4.5120)	Top-1 acc 18.359 (21.451)	Top-5 acc 40.234 (42.858)	lr 0.02561
Warmup Train [20][2250/3239]	Time 0.161 (0.234)	Data 0.001 (0.011)	Loss 4.7426 (4.5120)	Top-1 acc 21.094 (21.447)	Top-5 acc 36.328 (42.855)	lr 0.02560
Warmup Train [20][2260/3239]	Time 0.355 (0.234)	Data 0.001 (0.011)	Loss 4.6073 (4.5120)	Top-1 acc 18.750 (21.448)	Top-5 acc 43.359 (42.853)	lr 0.02559
Warmup Train [20][2270/3239]	Time 0.190 (0.234)	Data 0.001 (0.011)	Loss 4.4998 (4.5117)	Top-1 acc 21.094 (21.454)	Top-5 acc 41.797 (42.861)	lr 0.02559
Warmup Train [20][2280/3239]	Time 0.150 (0.234)	Data 0.001 (0.011)	Loss 4.5830 (4.5117)	Top-1 acc 20.703 (21.452)	Top-5 acc 40.234 (42.856)	lr 0.02558
Warmup Train [20][2290/3239]	Time 0.209 (0.234)	Data 0.001 (0.011)	Loss 4.5087 (4.5118)	Top-1 acc 22.656 (21.445)	Top-5 acc 45.312 (42.853)	lr 0.02558
Warmup Train [20][2300/3239]	Time 0.159 (0.234)	Data 0.001 (0.011)	Loss 4.4697 (4.5117)	Top-1 acc 19.922 (21.448)	Top-5 acc 44.531 (42.855)	lr 0.02557
Warmup Train [20][2310/3239]	Time 0.154 (0.234)	Data 0.001 (0.011)	Loss 4.6429 (4.5116)	Top-1 acc 17.578 (21.453)	Top-5 acc 39.844 (42.859)	lr 0.02556
Warmup Train [20][2320/3239]	Time 0.174 (0.234)	Data 0.001 (0.011)	Loss 4.6689 (4.5114)	Top-1 acc 17.969 (21.457)	Top-5 acc 39.453 (42.860)	lr 0.02556
Warmup Train [20][2330/3239]	Time 0.217 (0.234)	Data 0.002 (0.011)	Loss 4.2823 (4.5109)	Top-1 acc 26.953 (21.468)	Top-5 acc 45.312 (42.871)	lr 0.02555
Warmup Train [20][2340/3239]	Time 0.223 (0.234)	Data 0.002 (0.011)	Loss 4.4112 (4.5108)	Top-1 acc 20.703 (21.471)	Top-5 acc 46.484 (42.873)	lr 0.02555
Warmup Train [20][2350/3239]	Time 0.218 (0.234)	Data 0.002 (0.010)	Loss 4.5318 (4.5109)	Top-1 acc 22.266 (21.469)	Top-5 acc 39.062 (42.872)	lr 0.02554
Warmup Train [20][2360/3239]	Time 0.368 (0.234)	Data 0.002 (0.010)	Loss 4.5981 (4.5109)	Top-1 acc 20.312 (21.469)	Top-5 acc 37.891 (42.870)	lr 0.02553
Warmup Train [20][2370/3239]	Time 0.161 (0.234)	Data 0.002 (0.010)	Loss 4.5091 (4.5106)	Top-1 acc 21.094 (21.476)	Top-5 acc 42.969 (42.879)	lr 0.02553
Warmup Train [20][2380/3239]	Time 0.261 (0.234)	Data 0.002 (0.010)	Loss 4.4059 (4.5106)	Top-1 acc 20.703 (21.474)	Top-5 acc 45.312 (42.873)	lr 0.02552
Warmup Train [20][2390/3239]	Time 0.210 (0.234)	Data 0.002 (0.010)	Loss 4.4953 (4.5106)	Top-1 acc 23.828 (21.481)	Top-5 acc 42.188 (42.876)	lr 0.02552
Warmup Train [20][2400/3239]	Time 0.229 (0.234)	Data 0.001 (0.010)	Loss 4.3410 (4.5104)	Top-1 acc 24.609 (21.484)	Top-5 acc 47.266 (42.878)	lr 0.02551
Warmup Train [20][2410/3239]	Time 0.240 (0.235)	Data 0.002 (0.010)	Loss 4.4836 (4.5104)	Top-1 acc 21.094 (21.482)	Top-5 acc 40.625 (42.877)	lr 0.02550
Warmup Train [20][2420/3239]	Time 0.233 (0.234)	Data 0.001 (0.010)	Loss 4.6802 (4.5103)	Top-1 acc 17.188 (21.483)	Top-5 acc 39.844 (42.883)	lr 0.02550
Warmup Train [20][2430/3239]	Time 0.186 (0.234)	Data 0.001 (0.010)	Loss 4.5821 (4.5104)	Top-1 acc 21.094 (21.481)	Top-5 acc 41.797 (42.877)	lr 0.02549
Warmup Train [20][2440/3239]	Time 0.201 (0.234)	Data 0.001 (0.010)	Loss 4.4971 (4.5106)	Top-1 acc 21.094 (21.479)	Top-5 acc 40.625 (42.872)	lr 0.02548
Warmup Train [20][2450/3239]	Time 0.242 (0.234)	Data 0.001 (0.010)	Loss 4.4793 (4.5104)	Top-1 acc 23.438 (21.484)	Top-5 acc 44.922 (42.880)	lr 0.02548
Warmup Train [20][2460/3239]	Time 0.146 (0.234)	Data 0.002 (0.010)	Loss 4.7929 (4.5105)	Top-1 acc 16.406 (21.479)	Top-5 acc 33.594 (42.872)	lr 0.02547
Warmup Train [20][2470/3239]	Time 0.265 (0.234)	Data 0.001 (0.010)	Loss 4.6979 (4.5106)	Top-1 acc 17.188 (21.476)	Top-5 acc 36.719 (42.872)	lr 0.02547
Warmup Train [20][2480/3239]	Time 0.312 (0.234)	Data 0.003 (0.010)	Loss 4.2745 (4.5102)	Top-1 acc 25.000 (21.485)	Top-5 acc 49.219 (42.881)	lr 0.02546
Warmup Train [20][2490/3239]	Time 0.294 (0.234)	Data 0.001 (0.010)	Loss 4.5532 (4.5102)	Top-1 acc 16.797 (21.482)	Top-5 acc 38.281 (42.879)	lr 0.02545
Warmup Train [20][2500/3239]	Time 0.197 (0.234)	Data 0.002 (0.010)	Loss 4.5328 (4.5105)	Top-1 acc 21.484 (21.478)	Top-5 acc 40.234 (42.872)	lr 0.02545
Warmup Train [20][2510/3239]	Time 0.252 (0.234)	Data 0.002 (0.010)	Loss 4.6323 (4.5104)	Top-1 acc 17.969 (21.480)	Top-5 acc 46.875 (42.876)	lr 0.02544
Warmup Train [20][2520/3239]	Time 0.217 (0.234)	Data 0.002 (0.010)	Loss 4.5746 (4.5104)	Top-1 acc 20.312 (21.482)	Top-5 acc 36.719 (42.877)	lr 0.02544
Warmup Train [20][2530/3239]	Time 0.218 (0.234)	Data 0.001 (0.010)	Loss 4.5996 (4.5103)	Top-1 acc 17.969 (21.488)	Top-5 acc 35.547 (42.879)	lr 0.02543
Warmup Train [20][2540/3239]	Time 0.173 (0.234)	Data 0.001 (0.010)	Loss 4.5200 (4.5104)	Top-1 acc 21.094 (21.488)	Top-5 acc 43.359 (42.878)	lr 0.02542
Warmup Train [20][2550/3239]	Time 0.235 (0.234)	Data 0.001 (0.010)	Loss 4.3526 (4.5102)	Top-1 acc 24.609 (21.494)	Top-5 acc 44.922 (42.883)	lr 0.02542
Warmup Train [20][2560/3239]	Time 0.246 (0.234)	Data 0.001 (0.010)	Loss 4.4983 (4.5102)	Top-1 acc 20.312 (21.495)	Top-5 acc 39.453 (42.882)	lr 0.02541
Warmup Train [20][2570/3239]	Time 0.127 (0.234)	Data 0.003 (0.010)	Loss 4.4955 (4.5103)	Top-1 acc 22.656 (21.495)	Top-5 acc 44.141 (42.881)	lr 0.02541
Warmup Train [20][2580/3239]	Time 0.339 (0.234)	Data 0.001 (0.010)	Loss 4.4919 (4.5102)	Top-1 acc 23.438 (21.497)	Top-5 acc 46.875 (42.884)	lr 0.02540
Warmup Train [20][2590/3239]	Time 0.215 (0.234)	Data 0.001 (0.010)	Loss 4.6287 (4.5101)	Top-1 acc 23.438 (21.499)	Top-5 acc 42.578 (42.888)	lr 0.02539
Warmup Train [20][2600/3239]	Time 0.279 (0.235)	Data 0.001 (0.010)	Loss 4.5694 (4.5100)	Top-1 acc 23.047 (21.500)	Top-5 acc 43.359 (42.892)	lr 0.02539
Warmup Train [20][2610/3239]	Time 0.233 (0.235)	Data 0.001 (0.010)	Loss 4.5701 (4.5100)	Top-1 acc 19.141 (21.497)	Top-5 acc 42.578 (42.892)	lr 0.02538
Warmup Train [20][2620/3239]	Time 0.206 (0.235)	Data 0.001 (0.010)	Loss 4.4218 (4.5100)	Top-1 acc 23.438 (21.496)	Top-5 acc 45.312 (42.893)	lr 0.02538
Warmup Train [20][2630/3239]	Time 0.242 (0.235)	Data 0.001 (0.010)	Loss 4.5940 (4.5097)	Top-1 acc 15.234 (21.498)	Top-5 acc 36.719 (42.897)	lr 0.02537
Warmup Train [20][2640/3239]	Time 0.183 (0.235)	Data 0.001 (0.010)	Loss 4.4634 (4.5098)	Top-1 acc 22.656 (21.503)	Top-5 acc 45.703 (42.898)	lr 0.02536
Warmup Train [20][2650/3239]	Time 0.194 (0.235)	Data 0.001 (0.010)	Loss 4.3273 (4.5096)	Top-1 acc 24.609 (21.506)	Top-5 acc 46.094 (42.902)	lr 0.02536
Warmup Train [20][2660/3239]	Time 0.260 (0.235)	Data 0.001 (0.010)	Loss 4.5719 (4.5094)	Top-1 acc 16.016 (21.509)	Top-5 acc 37.109 (42.908)	lr 0.02535
Warmup Train [20][2670/3239]	Time 0.172 (0.234)	Data 0.002 (0.010)	Loss 4.4591 (4.5094)	Top-1 acc 20.703 (21.513)	Top-5 acc 42.969 (42.912)	lr 0.02535
Warmup Train [20][2680/3239]	Time 0.289 (0.234)	Data 0.001 (0.010)	Loss 4.5025 (4.5093)	Top-1 acc 21.875 (21.517)	Top-5 acc 44.922 (42.914)	lr 0.02534
Warmup Train [20][2690/3239]	Time 0.238 (0.234)	Data 0.002 (0.010)	Loss 4.5367 (4.5092)	Top-1 acc 19.141 (21.522)	Top-5 acc 42.188 (42.921)	lr 0.02533
Warmup Train [20][2700/3239]	Time 0.186 (0.234)	Data 0.001 (0.010)	Loss 4.4157 (4.5090)	Top-1 acc 25.391 (21.528)	Top-5 acc 46.094 (42.927)	lr 0.02533
Warmup Train [20][2710/3239]	Time 0.250 (0.234)	Data 0.001 (0.010)	Loss 4.4269 (4.5089)	Top-1 acc 21.484 (21.533)	Top-5 acc 44.141 (42.933)	lr 0.02532
Warmup Train [20][2720/3239]	Time 0.251 (0.234)	Data 0.001 (0.010)	Loss 4.4620 (4.5089)	Top-1 acc 21.094 (21.533)	Top-5 acc 44.531 (42.932)	lr 0.02532
Warmup Train [20][2730/3239]	Time 0.243 (0.234)	Data 0.001 (0.010)	Loss 4.2603 (4.5086)	Top-1 acc 25.781 (21.540)	Top-5 acc 47.266 (42.939)	lr 0.02531
Warmup Train [20][2740/3239]	Time 0.196 (0.234)	Data 0.001 (0.009)	Loss 4.5002 (4.5084)	Top-1 acc 17.578 (21.543)	Top-5 acc 42.188 (42.942)	lr 0.02530
Warmup Train [20][2750/3239]	Time 0.189 (0.234)	Data 0.001 (0.009)	Loss 4.4306 (4.5082)	Top-1 acc 22.656 (21.548)	Top-5 acc 46.094 (42.948)	lr 0.02530
Warmup Train [20][2760/3239]	Time 0.242 (0.234)	Data 0.001 (0.009)	Loss 4.4823 (4.5084)	Top-1 acc 21.875 (21.546)	Top-5 acc 44.141 (42.944)	lr 0.02529
Warmup Train [20][2770/3239]	Time 0.233 (0.234)	Data 0.001 (0.009)	Loss 4.3729 (4.5084)	Top-1 acc 24.219 (21.545)	Top-5 acc 48.047 (42.943)	lr 0.02528
Warmup Train [20][2780/3239]	Time 0.183 (0.234)	Data 0.001 (0.009)	Loss 4.5008 (4.5084)	Top-1 acc 19.531 (21.546)	Top-5 acc 41.797 (42.947)	lr 0.02528
Warmup Train [20][2790/3239]	Time 0.308 (0.234)	Data 0.002 (0.009)	Loss 4.5786 (4.5082)	Top-1 acc 20.312 (21.552)	Top-5 acc 38.672 (42.950)	lr 0.02527
Warmup Train [20][2800/3239]	Time 0.220 (0.234)	Data 0.001 (0.009)	Loss 4.5828 (4.5081)	Top-1 acc 19.922 (21.548)	Top-5 acc 39.453 (42.951)	lr 0.02527
Warmup Train [20][2810/3239]	Time 0.194 (0.234)	Data 0.002 (0.009)	Loss 4.7285 (4.5082)	Top-1 acc 16.016 (21.547)	Top-5 acc 39.062 (42.950)	lr 0.02526
Warmup Train [20][2820/3239]	Time 0.238 (0.234)	Data 0.001 (0.009)	Loss 4.1705 (4.5081)	Top-1 acc 25.000 (21.551)	Top-5 acc 52.344 (42.955)	lr 0.02525
Warmup Train [20][2830/3239]	Time 0.219 (0.234)	Data 0.001 (0.009)	Loss 4.6350 (4.5081)	Top-1 acc 19.531 (21.554)	Top-5 acc 36.328 (42.953)	lr 0.02525
Warmup Train [20][2840/3239]	Time 0.180 (0.234)	Data 0.001 (0.009)	Loss 4.4868 (4.5079)	Top-1 acc 25.781 (21.555)	Top-5 acc 44.141 (42.953)	lr 0.02524
Warmup Train [20][2850/3239]	Time 0.235 (0.234)	Data 0.001 (0.009)	Loss 4.2864 (4.5078)	Top-1 acc 25.391 (21.556)	Top-5 acc 50.391 (42.954)	lr 0.02524
Warmup Train [20][2860/3239]	Time 0.208 (0.234)	Data 0.002 (0.009)	Loss 4.4987 (4.5080)	Top-1 acc 24.609 (21.555)	Top-5 acc 42.188 (42.949)	lr 0.02523
Warmup Train [20][2870/3239]	Time 0.157 (0.234)	Data 0.001 (0.009)	Loss 4.6118 (4.5080)	Top-1 acc 19.531 (21.558)	Top-5 acc 39.062 (42.951)	lr 0.02522
Warmup Train [20][2880/3239]	Time 0.253 (0.234)	Data 0.001 (0.009)	Loss 4.4051 (4.5080)	Top-1 acc 19.141 (21.557)	Top-5 acc 47.266 (42.953)	lr 0.02522
Warmup Train [20][2890/3239]	Time 0.250 (0.234)	Data 0.002 (0.009)	Loss 4.5086 (4.5078)	Top-1 acc 18.750 (21.558)	Top-5 acc 44.141 (42.960)	lr 0.02521
Warmup Train [20][2900/3239]	Time 0.393 (0.234)	Data 0.001 (0.009)	Loss 4.4362 (4.5078)	Top-1 acc 26.953 (21.559)	Top-5 acc 44.141 (42.957)	lr 0.02521
Warmup Train [20][2910/3239]	Time 0.194 (0.234)	Data 0.001 (0.009)	Loss 4.5851 (4.5077)	Top-1 acc 20.312 (21.557)	Top-5 acc 41.797 (42.964)	lr 0.02520
Warmup Train [20][2920/3239]	Time 0.262 (0.234)	Data 0.002 (0.009)	Loss 4.5374 (4.5078)	Top-1 acc 24.609 (21.556)	Top-5 acc 42.969 (42.960)	lr 0.02519
Warmup Train [20][2930/3239]	Time 0.237 (0.234)	Data 0.001 (0.009)	Loss 4.2702 (4.5075)	Top-1 acc 25.391 (21.562)	Top-5 acc 48.047 (42.967)	lr 0.02519
Warmup Train [20][2940/3239]	Time 0.240 (0.234)	Data 0.001 (0.009)	Loss 4.6695 (4.5074)	Top-1 acc 19.141 (21.562)	Top-5 acc 39.453 (42.969)	lr 0.02518
Warmup Train [20][2950/3239]	Time 0.225 (0.234)	Data 0.002 (0.009)	Loss 4.4279 (4.5073)	Top-1 acc 21.875 (21.565)	Top-5 acc 45.312 (42.972)	lr 0.02518
Warmup Train [20][2960/3239]	Time 0.172 (0.234)	Data 0.001 (0.009)	Loss 4.5851 (4.5073)	Top-1 acc 17.969 (21.567)	Top-5 acc 40.625 (42.975)	lr 0.02517
Warmup Train [20][2970/3239]	Time 0.284 (0.234)	Data 0.001 (0.009)	Loss 4.4865 (4.5072)	Top-1 acc 20.312 (21.568)	Top-5 acc 42.969 (42.977)	lr 0.02516
Warmup Train [20][2980/3239]	Time 0.185 (0.234)	Data 0.001 (0.009)	Loss 4.3001 (4.5071)	Top-1 acc 28.516 (21.571)	Top-5 acc 47.656 (42.981)	lr 0.02516
Warmup Train [20][2990/3239]	Time 0.206 (0.234)	Data 0.001 (0.009)	Loss 4.5236 (4.5072)	Top-1 acc 20.312 (21.571)	Top-5 acc 42.188 (42.979)	lr 0.02515
Warmup Train [20][3000/3239]	Time 0.207 (0.234)	Data 0.001 (0.009)	Loss 4.5386 (4.5073)	Top-1 acc 23.828 (21.570)	Top-5 acc 40.234 (42.975)	lr 0.02515
Warmup Train [20][3010/3239]	Time 0.322 (0.234)	Data 0.001 (0.009)	Loss 4.5877 (4.5072)	Top-1 acc 20.703 (21.569)	Top-5 acc 40.625 (42.976)	lr 0.02514
Warmup Train [20][3020/3239]	Time 0.326 (0.234)	Data 0.001 (0.009)	Loss 4.3772 (4.5070)	Top-1 acc 23.828 (21.573)	Top-5 acc 44.531 (42.982)	lr 0.02513
Warmup Train [20][3030/3239]	Time 0.154 (0.234)	Data 0.001 (0.009)	Loss 4.5241 (4.5069)	Top-1 acc 20.703 (21.574)	Top-5 acc 39.844 (42.987)	lr 0.02513
Warmup Train [20][3040/3239]	Time 0.211 (0.234)	Data 0.001 (0.009)	Loss 4.5207 (4.5067)	Top-1 acc 19.531 (21.573)	Top-5 acc 42.188 (42.990)	lr 0.02512
Warmup Train [20][3050/3239]	Time 0.232 (0.234)	Data 0.001 (0.009)	Loss 4.6923 (4.5067)	Top-1 acc 20.703 (21.575)	Top-5 acc 40.625 (42.992)	lr 0.02512
Warmup Train [20][3060/3239]	Time 0.231 (0.234)	Data 0.001 (0.009)	Loss 4.4651 (4.5066)	Top-1 acc 21.875 (21.578)	Top-5 acc 46.094 (42.993)	lr 0.02511
Warmup Train [20][3070/3239]	Time 0.183 (0.234)	Data 0.036 (0.009)	Loss 4.5381 (4.5064)	Top-1 acc 22.266 (21.581)	Top-5 acc 44.922 (42.999)	lr 0.02510
Warmup Train [20][3080/3239]	Time 0.151 (0.234)	Data 0.001 (0.009)	Loss 4.5855 (4.5064)	Top-1 acc 20.703 (21.580)	Top-5 acc 40.234 (42.995)	lr 0.02510
Warmup Train [20][3090/3239]	Time 0.223 (0.234)	Data 0.002 (0.009)	Loss 4.4053 (4.5063)	Top-1 acc 23.438 (21.585)	Top-5 acc 44.531 (42.999)	lr 0.02509
Warmup Train [20][3100/3239]	Time 0.258 (0.234)	Data 0.001 (0.009)	Loss 4.6480 (4.5062)	Top-1 acc 21.875 (21.587)	Top-5 acc 39.844 (43.000)	lr 0.02508
Warmup Train [20][3110/3239]	Time 0.348 (0.234)	Data 0.002 (0.009)	Loss 4.6222 (4.5061)	Top-1 acc 21.875 (21.589)	Top-5 acc 39.062 (42.999)	lr 0.02508
Warmup Train [20][3120/3239]	Time 0.187 (0.234)	Data 0.001 (0.009)	Loss 4.5691 (4.5061)	Top-1 acc 18.750 (21.590)	Top-5 acc 42.188 (42.997)	lr 0.02507
Warmup Train [20][3130/3239]	Time 0.202 (0.233)	Data 0.001 (0.009)	Loss 4.4666 (4.5061)	Top-1 acc 27.344 (21.592)	Top-5 acc 47.656 (42.998)	lr 0.02507
Warmup Train [20][3140/3239]	Time 0.206 (0.233)	Data 0.001 (0.009)	Loss 4.3925 (4.5060)	Top-1 acc 23.828 (21.595)	Top-5 acc 44.922 (42.998)	lr 0.02506
Warmup Train [20][3150/3239]	Time 0.184 (0.233)	Data 0.001 (0.009)	Loss 4.5326 (4.5060)	Top-1 acc 19.141 (21.596)	Top-5 acc 39.844 (42.999)	lr 0.02505
Warmup Train [20][3160/3239]	Time 0.247 (0.233)	Data 0.001 (0.009)	Loss 4.5366 (4.5059)	Top-1 acc 21.094 (21.598)	Top-5 acc 42.188 (43.001)	lr 0.02505
Warmup Train [20][3170/3239]	Time 0.173 (0.233)	Data 0.001 (0.009)	Loss 4.6484 (4.5058)	Top-1 acc 18.750 (21.600)	Top-5 acc 40.234 (43.003)	lr 0.02504
Warmup Train [20][3180/3239]	Time 0.260 (0.233)	Data 0.000 (0.009)	Loss 4.6145 (4.5057)	Top-1 acc 23.828 (21.606)	Top-5 acc 40.234 (43.006)	lr 0.02504
Warmup Train [20][3190/3239]	Time 0.209 (0.233)	Data 0.000 (0.009)	Loss 4.3359 (4.5056)	Top-1 acc 22.266 (21.606)	Top-5 acc 46.875 (43.005)	lr 0.02503
Warmup Train [20][3200/3239]	Time 0.179 (0.233)	Data 0.000 (0.009)	Loss 4.4371 (4.5055)	Top-1 acc 21.484 (21.606)	Top-5 acc 45.703 (43.009)	lr 0.02502
Warmup Train [20][3210/3239]	Time 0.276 (0.233)	Data 0.000 (0.009)	Loss 4.1930 (4.5055)	Top-1 acc 26.953 (21.610)	Top-5 acc 51.562 (43.012)	lr 0.02502
Warmup Train [20][3220/3239]	Time 0.149 (0.233)	Data 0.000 (0.009)	Loss 4.4363 (4.5055)	Top-1 acc 23.438 (21.611)	Top-5 acc 45.703 (43.013)	lr 0.02501
Warmup Train [20][3230/3239]	Time 0.173 (0.233)	Data 0.000 (0.009)	Loss 4.2718 (4.5054)	Top-1 acc 23.438 (21.610)	Top-5 acc 48.047 (43.017)	lr 0.02501
Warmup Train [20][3239/3239]	Time 0.147 (0.233)	Data 0.000 (0.009)	Loss 4.2378 (4.5052)	Top-1 acc 30.864 (21.613)	Top-5 acc 53.086 (43.022)	lr 0.02500
==========Warmup Valid [20/40]	loss 3.522	top-1 acc 28.537	top-5 acc 52.614	Train top-1 21.613	top-5 43.022	flops: 442.4M
Warmup Train [21][0/3239]	Time 17.682 (17.682)	Data 16.083 (16.083)	Loss 4.3695 (4.3695)	Top-1 acc 21.875 (21.875)	Top-5 acc 46.875 (46.875)	lr 0.02500
Warmup Train [21][10/3239]	Time 0.256 (1.947)	Data 0.001 (1.532)	Loss 4.5022 (4.4490)	Top-1 acc 17.578 (21.449)	Top-5 acc 41.016 (44.673)	lr 0.02499
Warmup Train [21][20/3239]	Time 0.365 (1.152)	Data 0.001 (0.804)	Loss 4.3159 (4.4576)	Top-1 acc 26.562 (21.949)	Top-5 acc 46.875 (44.345)	lr 0.02499
Warmup Train [21][30/3239]	Time 0.233 (0.853)	Data 0.001 (0.545)	Loss 4.3315 (4.4581)	Top-1 acc 23.047 (21.636)	Top-5 acc 42.578 (44.065)	lr 0.02498
Warmup Train [21][40/3239]	Time 0.232 (0.701)	Data 0.001 (0.412)	Loss 4.5302 (4.4661)	Top-1 acc 19.922 (21.427)	Top-5 acc 40.625 (43.702)	lr 0.02498
Warmup Train [21][50/3239]	Time 0.183 (0.609)	Data 0.001 (0.332)	Loss 4.4573 (4.4704)	Top-1 acc 21.094 (21.515)	Top-5 acc 42.578 (43.604)	lr 0.02497
Warmup Train [21][60/3239]	Time 0.233 (0.544)	Data 0.001 (0.278)	Loss 4.4078 (4.4631)	Top-1 acc 21.484 (21.676)	Top-5 acc 46.484 (43.814)	lr 0.02496
Warmup Train [21][70/3239]	Time 0.181 (0.500)	Data 0.002 (0.239)	Loss 4.5507 (4.4673)	Top-1 acc 20.312 (21.677)	Top-5 acc 43.750 (43.728)	lr 0.02496
Warmup Train [21][80/3239]	Time 0.266 (0.467)	Data 0.001 (0.210)	Loss 4.5914 (4.4667)	Top-1 acc 22.266 (21.812)	Top-5 acc 43.359 (43.673)	lr 0.02495
Warmup Train [21][90/3239]	Time 0.220 (0.440)	Data 0.001 (0.187)	Loss 4.4296 (4.4683)	Top-1 acc 24.219 (21.965)	Top-5 acc 47.656 (43.750)	lr 0.02495
Warmup Train [21][100/3239]	Time 0.171 (0.420)	Data 0.001 (0.169)	Loss 4.4206 (4.4705)	Top-1 acc 19.922 (21.964)	Top-5 acc 48.047 (43.773)	lr 0.02494
Warmup Train [21][110/3239]	Time 0.154 (0.402)	Data 0.001 (0.154)	Loss 4.4746 (4.4704)	Top-1 acc 24.219 (22.026)	Top-5 acc 46.484 (43.831)	lr 0.02493
Warmup Train [21][120/3239]	Time 0.307 (0.388)	Data 0.001 (0.142)	Loss 4.4237 (4.4674)	Top-1 acc 23.438 (22.149)	Top-5 acc 46.094 (43.934)	lr 0.02493
Warmup Train [21][130/3239]	Time 0.238 (0.376)	Data 0.002 (0.131)	Loss 4.4912 (4.4667)	Top-1 acc 21.875 (22.155)	Top-5 acc 45.312 (43.938)	lr 0.02492
Warmup Train [21][140/3239]	Time 0.200 (0.364)	Data 0.001 (0.122)	Loss 4.7054 (4.4692)	Top-1 acc 16.016 (22.094)	Top-5 acc 35.156 (43.866)	lr 0.02492
Warmup Train [21][150/3239]	Time 0.175 (0.355)	Data 0.002 (0.114)	Loss 4.6618 (4.4719)	Top-1 acc 18.359 (22.048)	Top-5 acc 39.062 (43.750)	lr 0.02491
Warmup Train [21][160/3239]	Time 0.206 (0.347)	Data 0.002 (0.108)	Loss 4.5436 (4.4730)	Top-1 acc 25.391 (22.072)	Top-5 acc 42.578 (43.733)	lr 0.02490
Warmup Train [21][170/3239]	Time 0.205 (0.340)	Data 0.001 (0.101)	Loss 4.5693 (4.4756)	Top-1 acc 24.609 (22.115)	Top-5 acc 43.750 (43.636)	lr 0.02490
Warmup Train [21][180/3239]	Time 0.157 (0.334)	Data 0.001 (0.096)	Loss 4.3000 (4.4736)	Top-1 acc 24.219 (22.123)	Top-5 acc 45.703 (43.674)	lr 0.02489
Warmup Train [21][190/3239]	Time 0.276 (0.328)	Data 0.001 (0.091)	Loss 4.4155 (4.4753)	Top-1 acc 21.094 (22.100)	Top-5 acc 42.578 (43.678)	lr 0.02488
Warmup Train [21][200/3239]	Time 0.261 (0.323)	Data 0.001 (0.087)	Loss 4.4043 (4.4771)	Top-1 acc 23.047 (22.064)	Top-5 acc 49.609 (43.668)	lr 0.02488
Warmup Train [21][210/3239]	Time 0.248 (0.319)	Data 0.001 (0.083)	Loss 4.5227 (4.4776)	Top-1 acc 20.312 (22.038)	Top-5 acc 41.797 (43.650)	lr 0.02487
Warmup Train [21][220/3239]	Time 0.201 (0.315)	Data 0.001 (0.079)	Loss 4.5102 (4.4741)	Top-1 acc 21.094 (22.055)	Top-5 acc 42.578 (43.761)	lr 0.02487
Warmup Train [21][230/3239]	Time 0.156 (0.311)	Data 0.001 (0.076)	Loss 4.5807 (4.4724)	Top-1 acc 17.578 (22.032)	Top-5 acc 41.406 (43.777)	lr 0.02486
Warmup Train [21][240/3239]	Time 0.301 (0.308)	Data 0.001 (0.073)	Loss 4.5185 (4.4737)	Top-1 acc 21.875 (21.993)	Top-5 acc 47.266 (43.740)	lr 0.02485
Warmup Train [21][250/3239]	Time 0.330 (0.305)	Data 0.001 (0.070)	Loss 4.3451 (4.4745)	Top-1 acc 23.438 (22.000)	Top-5 acc 45.312 (43.717)	lr 0.02485
Warmup Train [21][260/3239]	Time 0.144 (0.302)	Data 0.001 (0.068)	Loss 4.6504 (4.4722)	Top-1 acc 17.188 (22.076)	Top-5 acc 37.891 (43.743)	lr 0.02484
Warmup Train [21][270/3239]	Time 0.280 (0.299)	Data 0.001 (0.065)	Loss 4.6755 (4.4722)	Top-1 acc 21.094 (22.049)	Top-5 acc 40.234 (43.754)	lr 0.02484
Warmup Train [21][280/3239]	Time 0.184 (0.296)	Data 0.001 (0.063)	Loss 4.6060 (4.4716)	Top-1 acc 19.531 (22.060)	Top-5 acc 40.625 (43.761)	lr 0.02483
Warmup Train [21][290/3239]	Time 0.222 (0.294)	Data 0.001 (0.061)	Loss 4.4156 (4.4688)	Top-1 acc 23.047 (22.106)	Top-5 acc 42.578 (43.809)	lr 0.02482
Warmup Train [21][300/3239]	Time 0.232 (0.292)	Data 0.002 (0.059)	Loss 4.4104 (4.4691)	Top-1 acc 20.312 (22.089)	Top-5 acc 46.875 (43.840)	lr 0.02482
Warmup Train [21][310/3239]	Time 0.186 (0.289)	Data 0.001 (0.057)	Loss 4.3361 (4.4683)	Top-1 acc 21.875 (22.086)	Top-5 acc 47.656 (43.829)	lr 0.02481
Warmup Train [21][320/3239]	Time 0.197 (0.287)	Data 0.002 (0.056)	Loss 4.4642 (4.4665)	Top-1 acc 24.219 (22.145)	Top-5 acc 45.312 (43.897)	lr 0.02481
Warmup Train [21][330/3239]	Time 0.133 (0.285)	Data 0.001 (0.054)	Loss 4.3245 (4.4659)	Top-1 acc 23.438 (22.170)	Top-5 acc 45.703 (43.903)	lr 0.02480
Warmup Train [21][340/3239]	Time 0.383 (0.284)	Data 0.001 (0.053)	Loss 4.3052 (4.4665)	Top-1 acc 23.438 (22.146)	Top-5 acc 48.438 (43.878)	lr 0.02479
Warmup Train [21][350/3239]	Time 0.228 (0.282)	Data 0.002 (0.051)	Loss 4.3170 (4.4658)	Top-1 acc 24.219 (22.189)	Top-5 acc 43.359 (43.917)	lr 0.02479
Warmup Train [21][360/3239]	Time 0.203 (0.281)	Data 0.002 (0.050)	Loss 4.3741 (4.4649)	Top-1 acc 24.219 (22.201)	Top-5 acc 47.266 (43.934)	lr 0.02478
Warmup Train [21][370/3239]	Time 0.234 (0.280)	Data 0.001 (0.049)	Loss 4.5372 (4.4649)	Top-1 acc 21.875 (22.206)	Top-5 acc 42.578 (43.938)	lr 0.02478
Warmup Train [21][380/3239]	Time 0.183 (0.278)	Data 0.001 (0.047)	Loss 4.3878 (4.4643)	Top-1 acc 25.391 (22.194)	Top-5 acc 45.312 (43.923)	lr 0.02477
Warmup Train [21][390/3239]	Time 0.173 (0.277)	Data 0.001 (0.046)	Loss 4.4267 (4.4635)	Top-1 acc 22.266 (22.197)	Top-5 acc 50.000 (43.973)	lr 0.02476
Warmup Train [21][400/3239]	Time 0.245 (0.276)	Data 0.001 (0.045)	Loss 4.6694 (4.4637)	Top-1 acc 20.312 (22.195)	Top-5 acc 38.281 (43.957)	lr 0.02476
Warmup Train [21][410/3239]	Time 0.268 (0.274)	Data 0.001 (0.044)	Loss 4.2279 (4.4621)	Top-1 acc 27.734 (22.230)	Top-5 acc 50.781 (43.995)	lr 0.02475
Warmup Train [21][420/3239]	Time 0.256 (0.273)	Data 0.002 (0.043)	Loss 4.5306 (4.4620)	Top-1 acc 19.531 (22.231)	Top-5 acc 40.625 (43.978)	lr 0.02475
Warmup Train [21][430/3239]	Time 0.256 (0.272)	Data 0.001 (0.042)	Loss 4.4052 (4.4632)	Top-1 acc 22.656 (22.222)	Top-5 acc 47.656 (43.950)	lr 0.02474
Warmup Train [21][440/3239]	Time 0.141 (0.270)	Data 0.001 (0.041)	Loss 4.3389 (4.4631)	Top-1 acc 21.484 (22.243)	Top-5 acc 45.312 (43.939)	lr 0.02473
Warmup Train [21][450/3239]	Time 0.162 (0.269)	Data 0.002 (0.041)	Loss 4.3785 (4.4626)	Top-1 acc 25.000 (22.262)	Top-5 acc 47.266 (43.936)	lr 0.02473
Warmup Train [21][460/3239]	Time 0.223 (0.268)	Data 0.001 (0.040)	Loss 4.6939 (4.4634)	Top-1 acc 19.531 (22.257)	Top-5 acc 37.500 (43.910)	lr 0.02472
Warmup Train [21][470/3239]	Time 0.244 (0.268)	Data 0.001 (0.039)	Loss 4.4714 (4.4648)	Top-1 acc 26.172 (22.264)	Top-5 acc 45.703 (43.885)	lr 0.02472
Warmup Train [21][480/3239]	Time 0.210 (0.267)	Data 0.001 (0.038)	Loss 4.5318 (4.4654)	Top-1 acc 21.484 (22.248)	Top-5 acc 41.797 (43.852)	lr 0.02471
Warmup Train [21][490/3239]	Time 0.225 (0.266)	Data 0.001 (0.038)	Loss 4.6300 (4.4650)	Top-1 acc 19.141 (22.256)	Top-5 acc 39.844 (43.860)	lr 0.02470
Warmup Train [21][500/3239]	Time 0.224 (0.265)	Data 0.001 (0.037)	Loss 4.4765 (4.4650)	Top-1 acc 22.266 (22.263)	Top-5 acc 44.141 (43.883)	lr 0.02470
Warmup Train [21][510/3239]	Time 0.238 (0.265)	Data 0.001 (0.036)	Loss 4.3791 (4.4639)	Top-1 acc 21.875 (22.300)	Top-5 acc 46.094 (43.909)	lr 0.02469
Warmup Train [21][520/3239]	Time 0.248 (0.264)	Data 0.001 (0.035)	Loss 4.2368 (4.4633)	Top-1 acc 26.562 (22.294)	Top-5 acc 51.562 (43.919)	lr 0.02468
Warmup Train [21][530/3239]	Time 0.206 (0.263)	Data 0.002 (0.035)	Loss 4.5524 (4.4635)	Top-1 acc 21.094 (22.288)	Top-5 acc 41.016 (43.921)	lr 0.02468
Warmup Train [21][540/3239]	Time 0.297 (0.263)	Data 0.002 (0.034)	Loss 4.7552 (4.4649)	Top-1 acc 15.625 (22.277)	Top-5 acc 35.547 (43.881)	lr 0.02467
Warmup Train [21][550/3239]	Time 0.250 (0.262)	Data 0.001 (0.034)	Loss 4.4607 (4.4650)	Top-1 acc 21.094 (22.288)	Top-5 acc 42.578 (43.861)	lr 0.02467
Warmup Train [21][560/3239]	Time 0.221 (0.261)	Data 0.002 (0.033)	Loss 4.7449 (4.4658)	Top-1 acc 16.016 (22.278)	Top-5 acc 38.281 (43.850)	lr 0.02466
Warmup Train [21][570/3239]	Time 0.214 (0.261)	Data 0.001 (0.033)	Loss 4.6363 (4.4656)	Top-1 acc 20.703 (22.295)	Top-5 acc 42.188 (43.862)	lr 0.02465
Warmup Train [21][580/3239]	Time 0.272 (0.260)	Data 0.001 (0.032)	Loss 4.5558 (4.4655)	Top-1 acc 22.656 (22.290)	Top-5 acc 41.406 (43.859)	lr 0.02465
Warmup Train [21][590/3239]	Time 0.203 (0.260)	Data 0.002 (0.032)	Loss 4.5687 (4.4656)	Top-1 acc 22.266 (22.289)	Top-5 acc 41.406 (43.853)	lr 0.02464
Warmup Train [21][600/3239]	Time 0.176 (0.259)	Data 0.001 (0.031)	Loss 4.5235 (4.4652)	Top-1 acc 21.484 (22.279)	Top-5 acc 44.141 (43.861)	lr 0.02464
Warmup Train [21][610/3239]	Time 0.226 (0.258)	Data 0.002 (0.031)	Loss 4.6162 (4.4657)	Top-1 acc 23.438 (22.276)	Top-5 acc 40.625 (43.857)	lr 0.02463
Warmup Train [21][620/3239]	Time 0.259 (0.258)	Data 0.001 (0.030)	Loss 4.3582 (4.4652)	Top-1 acc 21.484 (22.268)	Top-5 acc 42.969 (43.883)	lr 0.02462
Warmup Train [21][630/3239]	Time 0.135 (0.257)	Data 0.001 (0.030)	Loss 4.3730 (4.4648)	Top-1 acc 22.656 (22.272)	Top-5 acc 50.391 (43.900)	lr 0.02462
Warmup Train [21][640/3239]	Time 0.292 (0.257)	Data 0.001 (0.029)	Loss 4.5410 (4.4649)	Top-1 acc 20.703 (22.269)	Top-5 acc 39.453 (43.897)	lr 0.02461
Warmup Train [21][650/3239]	Time 0.296 (0.256)	Data 0.002 (0.029)	Loss 4.4419 (4.4652)	Top-1 acc 22.266 (22.248)	Top-5 acc 47.266 (43.904)	lr 0.02461
Warmup Train [21][660/3239]	Time 0.219 (0.256)	Data 0.001 (0.029)	Loss 4.4305 (4.4649)	Top-1 acc 22.266 (22.255)	Top-5 acc 45.312 (43.918)	lr 0.02460
Warmup Train [21][670/3239]	Time 0.209 (0.256)	Data 0.002 (0.028)	Loss 4.3405 (4.4646)	Top-1 acc 24.609 (22.277)	Top-5 acc 47.266 (43.928)	lr 0.02459
Warmup Train [21][680/3239]	Time 0.197 (0.255)	Data 0.002 (0.028)	Loss 4.3491 (4.4648)	Top-1 acc 24.219 (22.265)	Top-5 acc 43.359 (43.922)	lr 0.02459
Warmup Train [21][690/3239]	Time 0.165 (0.255)	Data 0.001 (0.027)	Loss 4.3473 (4.4653)	Top-1 acc 25.781 (22.266)	Top-5 acc 48.828 (43.918)	lr 0.02458
Warmup Train [21][700/3239]	Time 0.191 (0.254)	Data 0.001 (0.027)	Loss 4.2718 (4.4649)	Top-1 acc 25.391 (22.273)	Top-5 acc 50.391 (43.938)	lr 0.02458
Warmup Train [21][710/3239]	Time 0.256 (0.254)	Data 0.003 (0.027)	Loss 4.4516 (4.4652)	Top-1 acc 23.438 (22.272)	Top-5 acc 43.359 (43.921)	lr 0.02457
Warmup Train [21][720/3239]	Time 0.341 (0.254)	Data 0.002 (0.027)	Loss 4.4902 (4.4642)	Top-1 acc 22.266 (22.288)	Top-5 acc 43.359 (43.952)	lr 0.02456
Warmup Train [21][730/3239]	Time 0.168 (0.254)	Data 0.001 (0.026)	Loss 4.4616 (4.4644)	Top-1 acc 19.922 (22.271)	Top-5 acc 42.969 (43.941)	lr 0.02456
Warmup Train [21][740/3239]	Time 0.171 (0.253)	Data 0.001 (0.026)	Loss 4.3316 (4.4638)	Top-1 acc 25.781 (22.287)	Top-5 acc 45.312 (43.948)	lr 0.02455
Warmup Train [21][750/3239]	Time 0.273 (0.253)	Data 0.001 (0.026)	Loss 4.5741 (4.4643)	Top-1 acc 21.484 (22.296)	Top-5 acc 40.625 (43.933)	lr 0.02455
Warmup Train [21][760/3239]	Time 0.352 (0.253)	Data 0.001 (0.025)	Loss 4.4271 (4.4649)	Top-1 acc 20.312 (22.286)	Top-5 acc 44.141 (43.915)	lr 0.02454
Warmup Train [21][770/3239]	Time 0.249 (0.252)	Data 0.001 (0.025)	Loss 4.4952 (4.4646)	Top-1 acc 23.047 (22.291)	Top-5 acc 44.531 (43.921)	lr 0.02453
Warmup Train [21][780/3239]	Time 0.238 (0.252)	Data 0.001 (0.025)	Loss 4.3894 (4.4642)	Top-1 acc 24.219 (22.292)	Top-5 acc 50.391 (43.934)	lr 0.02453
Warmup Train [21][790/3239]	Time 0.194 (0.252)	Data 0.001 (0.024)	Loss 4.6453 (4.4644)	Top-1 acc 21.484 (22.297)	Top-5 acc 40.625 (43.939)	lr 0.02452
Warmup Train [21][800/3239]	Time 0.244 (0.252)	Data 0.001 (0.024)	Loss 4.5883 (4.4642)	Top-1 acc 22.656 (22.309)	Top-5 acc 42.969 (43.944)	lr 0.02452
Warmup Train [21][810/3239]	Time 0.185 (0.251)	Data 0.001 (0.024)	Loss 4.1728 (4.4637)	Top-1 acc 25.781 (22.313)	Top-5 acc 50.391 (43.957)	lr 0.02451
Warmup Train [21][820/3239]	Time 0.259 (0.251)	Data 0.002 (0.024)	Loss 4.3871 (4.4637)	Top-1 acc 21.484 (22.315)	Top-5 acc 44.531 (43.958)	lr 0.02450
Warmup Train [21][830/3239]	Time 0.228 (0.251)	Data 0.001 (0.023)	Loss 4.5763 (4.4644)	Top-1 acc 19.531 (22.303)	Top-5 acc 39.062 (43.933)	lr 0.02450
Warmup Train [21][840/3239]	Time 0.242 (0.250)	Data 0.001 (0.023)	Loss 4.3095 (4.4638)	Top-1 acc 27.734 (22.302)	Top-5 acc 51.172 (43.954)	lr 0.02449
Warmup Train [21][850/3239]	Time 0.225 (0.250)	Data 0.001 (0.023)	Loss 4.5492 (4.4641)	Top-1 acc 18.359 (22.295)	Top-5 acc 41.797 (43.950)	lr 0.02448
Warmup Train [21][860/3239]	Time 0.264 (0.250)	Data 0.001 (0.023)	Loss 4.3703 (4.4642)	Top-1 acc 23.828 (22.292)	Top-5 acc 45.703 (43.949)	lr 0.02448
Warmup Train [21][870/3239]	Time 0.250 (0.250)	Data 0.001 (0.022)	Loss 4.4333 (4.4646)	Top-1 acc 22.266 (22.282)	Top-5 acc 39.844 (43.935)	lr 0.02447
Warmup Train [21][880/3239]	Time 0.270 (0.249)	Data 0.001 (0.022)	Loss 4.7271 (4.4649)	Top-1 acc 16.406 (22.266)	Top-5 acc 41.016 (43.921)	lr 0.02447
Warmup Train [21][890/3239]	Time 0.227 (0.249)	Data 0.001 (0.022)	Loss 4.5159 (4.4646)	Top-1 acc 20.312 (22.276)	Top-5 acc 44.922 (43.927)	lr 0.02446
Warmup Train [21][900/3239]	Time 0.239 (0.249)	Data 0.001 (0.022)	Loss 4.4240 (4.4644)	Top-1 acc 21.875 (22.279)	Top-5 acc 47.656 (43.932)	lr 0.02445
Warmup Train [21][910/3239]	Time 0.164 (0.248)	Data 0.001 (0.022)	Loss 4.6142 (4.4651)	Top-1 acc 21.484 (22.260)	Top-5 acc 39.844 (43.918)	lr 0.02445
Warmup Train [21][920/3239]	Time 0.255 (0.248)	Data 0.001 (0.021)	Loss 4.2983 (4.4652)	Top-1 acc 26.562 (22.265)	Top-5 acc 47.656 (43.917)	lr 0.02444
Warmup Train [21][930/3239]	Time 0.237 (0.248)	Data 0.001 (0.021)	Loss 4.4838 (4.4650)	Top-1 acc 20.312 (22.278)	Top-5 acc 48.438 (43.925)	lr 0.02444
Warmup Train [21][940/3239]	Time 0.261 (0.248)	Data 0.001 (0.021)	Loss 4.3448 (4.4651)	Top-1 acc 23.438 (22.270)	Top-5 acc 48.047 (43.918)	lr 0.02443
Warmup Train [21][950/3239]	Time 0.161 (0.247)	Data 0.001 (0.021)	Loss 4.5871 (4.4650)	Top-1 acc 21.094 (22.280)	Top-5 acc 41.016 (43.928)	lr 0.02442
Warmup Train [21][960/3239]	Time 0.264 (0.247)	Data 0.001 (0.021)	Loss 4.4205 (4.4646)	Top-1 acc 23.047 (22.287)	Top-5 acc 46.875 (43.941)	lr 0.02442
Warmup Train [21][970/3239]	Time 0.208 (0.247)	Data 0.001 (0.021)	Loss 4.3967 (4.4646)	Top-1 acc 25.781 (22.287)	Top-5 acc 46.875 (43.945)	lr 0.02441
Warmup Train [21][980/3239]	Time 0.208 (0.247)	Data 0.001 (0.020)	Loss 4.4642 (4.4649)	Top-1 acc 21.094 (22.286)	Top-5 acc 45.312 (43.942)	lr 0.02441
Warmup Train [21][990/3239]	Time 0.373 (0.247)	Data 0.001 (0.020)	Loss 4.3814 (4.4647)	Top-1 acc 23.047 (22.287)	Top-5 acc 45.312 (43.946)	lr 0.02440
Warmup Train [21][1000/3239]	Time 0.199 (0.246)	Data 0.001 (0.020)	Loss 4.4594 (4.4648)	Top-1 acc 25.781 (22.287)	Top-5 acc 47.266 (43.954)	lr 0.02439
Warmup Train [21][1010/3239]	Time 0.238 (0.246)	Data 0.001 (0.020)	Loss 4.5003 (4.4652)	Top-1 acc 23.828 (22.285)	Top-5 acc 44.531 (43.951)	lr 0.02439
Warmup Train [21][1020/3239]	Time 0.219 (0.246)	Data 0.001 (0.020)	Loss 4.5185 (4.4654)	Top-1 acc 22.266 (22.288)	Top-5 acc 42.188 (43.946)	lr 0.02438
Warmup Train [21][1030/3239]	Time 0.257 (0.246)	Data 0.001 (0.019)	Loss 4.2634 (4.4650)	Top-1 acc 23.438 (22.300)	Top-5 acc 50.391 (43.953)	lr 0.02438
Warmup Train [21][1040/3239]	Time 0.191 (0.246)	Data 0.001 (0.019)	Loss 4.5393 (4.4649)	Top-1 acc 21.875 (22.303)	Top-5 acc 40.625 (43.948)	lr 0.02437
Warmup Train [21][1050/3239]	Time 0.250 (0.245)	Data 0.001 (0.019)	Loss 4.3817 (4.4649)	Top-1 acc 20.703 (22.296)	Top-5 acc 46.484 (43.959)	lr 0.02436
Warmup Train [21][1060/3239]	Time 0.237 (0.245)	Data 0.002 (0.019)	Loss 4.3979 (4.4646)	Top-1 acc 23.828 (22.301)	Top-5 acc 42.969 (43.972)	lr 0.02436
Warmup Train [21][1070/3239]	Time 0.202 (0.245)	Data 0.001 (0.019)	Loss 4.3872 (4.4646)	Top-1 acc 26.172 (22.300)	Top-5 acc 48.438 (43.977)	lr 0.02435
Warmup Train [21][1080/3239]	Time 0.205 (0.245)	Data 0.001 (0.019)	Loss 4.4647 (4.4643)	Top-1 acc 22.266 (22.304)	Top-5 acc 46.094 (43.983)	lr 0.02435
Warmup Train [21][1090/3239]	Time 0.283 (0.245)	Data 0.001 (0.019)	Loss 4.4755 (4.4643)	Top-1 acc 23.047 (22.310)	Top-5 acc 46.484 (43.986)	lr 0.02434
Warmup Train [21][1100/3239]	Time 0.180 (0.245)	Data 0.001 (0.018)	Loss 4.5016 (4.4639)	Top-1 acc 17.188 (22.310)	Top-5 acc 44.141 (43.991)	lr 0.02433
Warmup Train [21][1110/3239]	Time 0.286 (0.244)	Data 0.001 (0.018)	Loss 4.4885 (4.4637)	Top-1 acc 25.000 (22.315)	Top-5 acc 42.578 (44.000)	lr 0.02433
Warmup Train [21][1120/3239]	Time 0.196 (0.244)	Data 0.002 (0.018)	Loss 4.3549 (4.4634)	Top-1 acc 23.828 (22.315)	Top-5 acc 44.531 (44.002)	lr 0.02432
Warmup Train [21][1130/3239]	Time 0.289 (0.244)	Data 0.001 (0.018)	Loss 4.3852 (4.4645)	Top-1 acc 23.438 (22.302)	Top-5 acc 44.922 (43.972)	lr 0.02432
Warmup Train [21][1140/3239]	Time 0.260 (0.244)	Data 0.001 (0.018)	Loss 4.4025 (4.4645)	Top-1 acc 24.609 (22.300)	Top-5 acc 45.312 (43.975)	lr 0.02431
Warmup Train [21][1150/3239]	Time 0.207 (0.244)	Data 0.001 (0.018)	Loss 4.6771 (4.4643)	Top-1 acc 20.703 (22.307)	Top-5 acc 39.844 (43.986)	lr 0.02430
Warmup Train [21][1160/3239]	Time 0.242 (0.244)	Data 0.001 (0.018)	Loss 4.4381 (4.4645)	Top-1 acc 25.000 (22.315)	Top-5 acc 46.094 (43.985)	lr 0.02430
Warmup Train [21][1170/3239]	Time 0.213 (0.244)	Data 0.002 (0.017)	Loss 4.6048 (4.4645)	Top-1 acc 21.875 (22.321)	Top-5 acc 43.750 (43.991)	lr 0.02429
Warmup Train [21][1180/3239]	Time 0.217 (0.244)	Data 0.001 (0.017)	Loss 4.4182 (4.4641)	Top-1 acc 25.000 (22.321)	Top-5 acc 46.094 (44.001)	lr 0.02428
Warmup Train [21][1190/3239]	Time 0.136 (0.244)	Data 0.001 (0.017)	Loss 4.5020 (4.4641)	Top-1 acc 18.359 (22.313)	Top-5 acc 44.141 (43.998)	lr 0.02428
Warmup Train [21][1200/3239]	Time 0.381 (0.244)	Data 0.001 (0.017)	Loss 4.2642 (4.4635)	Top-1 acc 28.906 (22.328)	Top-5 acc 47.656 (44.007)	lr 0.02427
Warmup Train [21][1210/3239]	Time 0.243 (0.243)	Data 0.001 (0.017)	Loss 4.5016 (4.4631)	Top-1 acc 23.828 (22.336)	Top-5 acc 44.141 (44.001)	lr 0.02427
Warmup Train [21][1220/3239]	Time 0.220 (0.243)	Data 0.001 (0.017)	Loss 4.4549 (4.4630)	Top-1 acc 24.219 (22.342)	Top-5 acc 47.266 (44.009)	lr 0.02426
Warmup Train [21][1230/3239]	Time 0.229 (0.243)	Data 0.001 (0.017)	Loss 4.3218 (4.4634)	Top-1 acc 25.000 (22.338)	Top-5 acc 47.656 (44.004)	lr 0.02425
Warmup Train [21][1240/3239]	Time 0.214 (0.243)	Data 0.001 (0.017)	Loss 4.2162 (4.4629)	Top-1 acc 30.078 (22.347)	Top-5 acc 48.047 (44.019)	lr 0.02425
Warmup Train [21][1250/3239]	Time 0.184 (0.243)	Data 0.001 (0.016)	Loss 4.4631 (4.4631)	Top-1 acc 22.266 (22.342)	Top-5 acc 45.703 (44.012)	lr 0.02424
Warmup Train [21][1260/3239]	Time 0.248 (0.243)	Data 0.001 (0.016)	Loss 4.3787 (4.4627)	Top-1 acc 23.828 (22.344)	Top-5 acc 46.875 (44.026)	lr 0.02424
Warmup Train [21][1270/3239]	Time 0.218 (0.243)	Data 0.001 (0.016)	Loss 4.3738 (4.4627)	Top-1 acc 24.609 (22.344)	Top-5 acc 46.875 (44.022)	lr 0.02423
Warmup Train [21][1280/3239]	Time 0.228 (0.242)	Data 0.001 (0.016)	Loss 4.4538 (4.4627)	Top-1 acc 25.391 (22.349)	Top-5 acc 42.578 (44.019)	lr 0.02422
Warmup Train [21][1290/3239]	Time 0.133 (0.242)	Data 0.001 (0.016)	Loss 4.2949 (4.4622)	Top-1 acc 26.172 (22.348)	Top-5 acc 51.953 (44.029)	lr 0.02422
Warmup Train [21][1300/3239]	Time 0.309 (0.242)	Data 0.001 (0.016)	Loss 4.3534 (4.4623)	Top-1 acc 25.000 (22.353)	Top-5 acc 46.094 (44.030)	lr 0.02421
Warmup Train [21][1310/3239]	Time 0.198 (0.242)	Data 0.002 (0.016)	Loss 4.4444 (4.4619)	Top-1 acc 23.828 (22.360)	Top-5 acc 46.875 (44.034)	lr 0.02421
Warmup Train [21][1320/3239]	Time 0.221 (0.242)	Data 0.002 (0.016)	Loss 4.4221 (4.4617)	Top-1 acc 21.875 (22.361)	Top-5 acc 43.750 (44.039)	lr 0.02420
Warmup Train [21][1330/3239]	Time 0.159 (0.242)	Data 0.001 (0.016)	Loss 4.5542 (4.4616)	Top-1 acc 20.312 (22.366)	Top-5 acc 41.016 (44.038)	lr 0.02419
Warmup Train [21][1340/3239]	Time 0.231 (0.242)	Data 0.001 (0.016)	Loss 4.3594 (4.4611)	Top-1 acc 23.047 (22.369)	Top-5 acc 46.875 (44.044)	lr 0.02419
Warmup Train [21][1350/3239]	Time 0.212 (0.242)	Data 0.001 (0.016)	Loss 4.4862 (4.4609)	Top-1 acc 23.828 (22.378)	Top-5 acc 43.750 (44.048)	lr 0.02418
Warmup Train [21][1360/3239]	Time 0.148 (0.241)	Data 0.001 (0.015)	Loss 4.7028 (4.4611)	Top-1 acc 19.922 (22.382)	Top-5 acc 36.719 (44.052)	lr 0.02418
Warmup Train [21][1370/3239]	Time 0.226 (0.241)	Data 0.001 (0.015)	Loss 4.4929 (4.4609)	Top-1 acc 23.438 (22.384)	Top-5 acc 43.359 (44.060)	lr 0.02417
Warmup Train [21][1380/3239]	Time 0.276 (0.241)	Data 0.001 (0.015)	Loss 4.5595 (4.4608)	Top-1 acc 21.484 (22.382)	Top-5 acc 41.797 (44.061)	lr 0.02416
Warmup Train [21][1390/3239]	Time 0.224 (0.241)	Data 0.001 (0.015)	Loss 4.4560 (4.4608)	Top-1 acc 21.875 (22.384)	Top-5 acc 45.312 (44.065)	lr 0.02416
Warmup Train [21][1400/3239]	Time 0.357 (0.241)	Data 0.001 (0.015)	Loss 4.2202 (4.4602)	Top-1 acc 30.469 (22.401)	Top-5 acc 52.734 (44.080)	lr 0.02415
Warmup Train [21][1410/3239]	Time 0.208 (0.241)	Data 0.002 (0.015)	Loss 4.4460 (4.4599)	Top-1 acc 18.359 (22.397)	Top-5 acc 41.016 (44.084)	lr 0.02415
Warmup Train [21][1420/3239]	Time 0.253 (0.241)	Data 0.001 (0.015)	Loss 4.3632 (4.4600)	Top-1 acc 23.828 (22.402)	Top-5 acc 47.656 (44.082)	lr 0.02414
Warmup Train [21][1430/3239]	Time 0.174 (0.241)	Data 0.001 (0.015)	Loss 4.4083 (4.4600)	Top-1 acc 19.141 (22.403)	Top-5 acc 47.656 (44.080)	lr 0.02413
Warmup Train [21][1440/3239]	Time 0.278 (0.241)	Data 0.001 (0.015)	Loss 4.2404 (4.4599)	Top-1 acc 27.734 (22.414)	Top-5 acc 48.438 (44.084)	lr 0.02413
Warmup Train [21][1450/3239]	Time 0.182 (0.240)	Data 0.001 (0.015)	Loss 4.3549 (4.4598)	Top-1 acc 23.047 (22.416)	Top-5 acc 45.312 (44.084)	lr 0.02412
Warmup Train [21][1460/3239]	Time 0.174 (0.240)	Data 0.001 (0.014)	Loss 4.5003 (4.4602)	Top-1 acc 26.562 (22.416)	Top-5 acc 43.750 (44.079)	lr 0.02412
Warmup Train [21][1470/3239]	Time 0.263 (0.240)	Data 0.002 (0.014)	Loss 4.4864 (4.4602)	Top-1 acc 23.828 (22.420)	Top-5 acc 45.312 (44.078)	lr 0.02411
Warmup Train [21][1480/3239]	Time 0.236 (0.240)	Data 0.001 (0.014)	Loss 4.3915 (4.4599)	Top-1 acc 23.438 (22.423)	Top-5 acc 48.047 (44.090)	lr 0.02410
Warmup Train [21][1490/3239]	Time 0.140 (0.240)	Data 0.002 (0.014)	Loss 4.5306 (4.4602)	Top-1 acc 18.750 (22.413)	Top-5 acc 41.797 (44.076)	lr 0.02410
Warmup Train [21][1500/3239]	Time 0.374 (0.240)	Data 0.001 (0.014)	Loss 4.3282 (4.4600)	Top-1 acc 26.562 (22.417)	Top-5 acc 50.391 (44.079)	lr 0.02409
Warmup Train [21][1510/3239]	Time 0.183 (0.240)	Data 0.001 (0.014)	Loss 4.3418 (4.4601)	Top-1 acc 24.609 (22.413)	Top-5 acc 48.438 (44.077)	lr 0.02409
Warmup Train [21][1520/3239]	Time 0.324 (0.240)	Data 0.001 (0.014)	Loss 4.2500 (4.4598)	Top-1 acc 23.828 (22.417)	Top-5 acc 50.000 (44.081)	lr 0.02408
Warmup Train [21][1530/3239]	Time 0.211 (0.240)	Data 0.025 (0.014)	Loss 4.6408 (4.4595)	Top-1 acc 18.359 (22.424)	Top-5 acc 39.453 (44.091)	lr 0.02407
Warmup Train [21][1540/3239]	Time 0.206 (0.240)	Data 0.001 (0.014)	Loss 4.4914 (4.4595)	Top-1 acc 19.922 (22.423)	Top-5 acc 42.969 (44.094)	lr 0.02407
Warmup Train [21][1550/3239]	Time 0.212 (0.240)	Data 0.002 (0.014)	Loss 4.3872 (4.4599)	Top-1 acc 21.484 (22.410)	Top-5 acc 44.922 (44.082)	lr 0.02406
Warmup Train [21][1560/3239]	Time 0.196 (0.240)	Data 0.001 (0.014)	Loss 4.5889 (4.4601)	Top-1 acc 16.016 (22.403)	Top-5 acc 39.844 (44.071)	lr 0.02405
Warmup Train [21][1570/3239]	Time 0.189 (0.240)	Data 0.003 (0.014)	Loss 4.3276 (4.4600)	Top-1 acc 23.438 (22.406)	Top-5 acc 46.875 (44.077)	lr 0.02405
Warmup Train [21][1580/3239]	Time 0.200 (0.240)	Data 0.002 (0.014)	Loss 4.6522 (4.4601)	Top-1 acc 18.750 (22.405)	Top-5 acc 38.281 (44.071)	lr 0.02404
Warmup Train [21][1590/3239]	Time 0.204 (0.239)	Data 0.001 (0.014)	Loss 4.2469 (4.4599)	Top-1 acc 25.391 (22.407)	Top-5 acc 47.266 (44.074)	lr 0.02404
Warmup Train [21][1600/3239]	Time 0.329 (0.239)	Data 0.001 (0.014)	Loss 4.5189 (4.4597)	Top-1 acc 20.703 (22.413)	Top-5 acc 43.359 (44.087)	lr 0.02403
Warmup Train [21][1610/3239]	Time 0.344 (0.239)	Data 0.007 (0.013)	Loss 4.5886 (4.4595)	Top-1 acc 17.578 (22.414)	Top-5 acc 40.625 (44.091)	lr 0.02402
Warmup Train [21][1620/3239]	Time 0.164 (0.239)	Data 0.001 (0.013)	Loss 4.4603 (4.4595)	Top-1 acc 24.219 (22.411)	Top-5 acc 45.312 (44.092)	lr 0.02402
Warmup Train [21][1630/3239]	Time 0.183 (0.239)	Data 0.001 (0.013)	Loss 4.5446 (4.4597)	Top-1 acc 19.531 (22.411)	Top-5 acc 39.453 (44.088)	lr 0.02401
Warmup Train [21][1640/3239]	Time 0.261 (0.239)	Data 0.001 (0.013)	Loss 4.6169 (4.4598)	Top-1 acc 21.094 (22.411)	Top-5 acc 38.672 (44.086)	lr 0.02401
Warmup Train [21][1650/3239]	Time 0.244 (0.239)	Data 0.001 (0.013)	Loss 4.1673 (4.4596)	Top-1 acc 24.219 (22.415)	Top-5 acc 52.344 (44.099)	lr 0.02400
Warmup Train [21][1660/3239]	Time 0.197 (0.239)	Data 0.001 (0.013)	Loss 4.4116 (4.4597)	Top-1 acc 23.828 (22.416)	Top-5 acc 46.094 (44.092)	lr 0.02399
Warmup Train [21][1670/3239]	Time 0.207 (0.239)	Data 0.002 (0.013)	Loss 4.4854 (4.4597)	Top-1 acc 23.438 (22.415)	Top-5 acc 45.312 (44.093)	lr 0.02399
Warmup Train [21][1680/3239]	Time 0.201 (0.239)	Data 0.001 (0.013)	Loss 4.3673 (4.4595)	Top-1 acc 21.094 (22.416)	Top-5 acc 44.141 (44.096)	lr 0.02398
Warmup Train [21][1690/3239]	Time 0.276 (0.239)	Data 0.001 (0.013)	Loss 4.4277 (4.4594)	Top-1 acc 21.875 (22.419)	Top-5 acc 48.047 (44.102)	lr 0.02398
Warmup Train [21][1700/3239]	Time 0.251 (0.239)	Data 0.001 (0.013)	Loss 4.3090 (4.4596)	Top-1 acc 23.828 (22.413)	Top-5 acc 48.828 (44.100)	lr 0.02397
Warmup Train [21][1710/3239]	Time 0.202 (0.239)	Data 0.001 (0.013)	Loss 4.5642 (4.4595)	Top-1 acc 22.656 (22.414)	Top-5 acc 42.188 (44.103)	lr 0.02396
Warmup Train [21][1720/3239]	Time 0.198 (0.239)	Data 0.001 (0.013)	Loss 4.1878 (4.4592)	Top-1 acc 26.172 (22.417)	Top-5 acc 51.172 (44.115)	lr 0.02396
Warmup Train [21][1730/3239]	Time 0.230 (0.239)	Data 0.001 (0.013)	Loss 4.6175 (4.4594)	Top-1 acc 21.094 (22.415)	Top-5 acc 39.453 (44.113)	lr 0.02395
Warmup Train [21][1740/3239]	Time 0.174 (0.239)	Data 0.001 (0.013)	Loss 4.4729 (4.4590)	Top-1 acc 23.047 (22.425)	Top-5 acc 41.016 (44.118)	lr 0.02395
Warmup Train [21][1750/3239]	Time 0.221 (0.239)	Data 0.002 (0.013)	Loss 4.3645 (4.4583)	Top-1 acc 25.000 (22.442)	Top-5 acc 45.312 (44.133)	lr 0.02394
Warmup Train [21][1760/3239]	Time 0.223 (0.239)	Data 0.001 (0.013)	Loss 4.4436 (4.4584)	Top-1 acc 22.266 (22.440)	Top-5 acc 43.750 (44.131)	lr 0.02393
Warmup Train [21][1770/3239]	Time 0.226 (0.239)	Data 0.001 (0.013)	Loss 4.4662 (4.4582)	Top-1 acc 21.094 (22.444)	Top-5 acc 42.578 (44.137)	lr 0.02393
Warmup Train [21][1780/3239]	Time 0.176 (0.238)	Data 0.001 (0.012)	Loss 4.3379 (4.4578)	Top-1 acc 24.219 (22.451)	Top-5 acc 47.656 (44.151)	lr 0.02392
Warmup Train [21][1790/3239]	Time 0.185 (0.238)	Data 0.001 (0.012)	Loss 4.3036 (4.4577)	Top-1 acc 27.734 (22.458)	Top-5 acc 48.438 (44.154)	lr 0.02392
Warmup Train [21][1800/3239]	Time 0.188 (0.238)	Data 0.001 (0.012)	Loss 4.5768 (4.4575)	Top-1 acc 17.188 (22.453)	Top-5 acc 38.281 (44.158)	lr 0.02391
Warmup Train [21][1810/3239]	Time 0.348 (0.238)	Data 0.001 (0.012)	Loss 4.4240 (4.4575)	Top-1 acc 25.781 (22.457)	Top-5 acc 46.875 (44.159)	lr 0.02390
Warmup Train [21][1820/3239]	Time 0.242 (0.238)	Data 0.001 (0.012)	Loss 4.5732 (4.4574)	Top-1 acc 19.531 (22.460)	Top-5 acc 43.750 (44.163)	lr 0.02390
Warmup Train [21][1830/3239]	Time 0.216 (0.238)	Data 0.001 (0.012)	Loss 4.3782 (4.4577)	Top-1 acc 21.094 (22.458)	Top-5 acc 43.750 (44.155)	lr 0.02389
Warmup Train [21][1840/3239]	Time 0.189 (0.238)	Data 0.001 (0.012)	Loss 4.4159 (4.4575)	Top-1 acc 25.000 (22.460)	Top-5 acc 45.703 (44.163)	lr 0.02389
Warmup Train [21][1850/3239]	Time 0.312 (0.238)	Data 0.002 (0.012)	Loss 4.2959 (4.4575)	Top-1 acc 21.875 (22.457)	Top-5 acc 45.703 (44.156)	lr 0.02388
Warmup Train [21][1860/3239]	Time 0.210 (0.238)	Data 0.003 (0.012)	Loss 4.5991 (4.4574)	Top-1 acc 19.922 (22.458)	Top-5 acc 41.797 (44.153)	lr 0.02387
Warmup Train [21][1870/3239]	Time 0.216 (0.238)	Data 0.001 (0.012)	Loss 4.4715 (4.4574)	Top-1 acc 23.828 (22.452)	Top-5 acc 45.312 (44.154)	lr 0.02387
Warmup Train [21][1880/3239]	Time 0.199 (0.238)	Data 0.001 (0.012)	Loss 4.5404 (4.4573)	Top-1 acc 23.047 (22.453)	Top-5 acc 41.797 (44.153)	lr 0.02386
Warmup Train [21][1890/3239]	Time 0.252 (0.238)	Data 0.001 (0.012)	Loss 4.3627 (4.4572)	Top-1 acc 23.828 (22.455)	Top-5 acc 44.141 (44.157)	lr 0.02386
Warmup Train [21][1900/3239]	Time 0.188 (0.238)	Data 0.001 (0.012)	Loss 4.2733 (4.4575)	Top-1 acc 25.000 (22.455)	Top-5 acc 43.750 (44.153)	lr 0.02385
Warmup Train [21][1910/3239]	Time 0.240 (0.238)	Data 0.001 (0.012)	Loss 4.3374 (4.4575)	Top-1 acc 23.828 (22.456)	Top-5 acc 47.656 (44.156)	lr 0.02384
Warmup Train [21][1920/3239]	Time 0.344 (0.238)	Data 0.001 (0.012)	Loss 4.4087 (4.4574)	Top-1 acc 20.703 (22.457)	Top-5 acc 47.656 (44.156)	lr 0.02384
Warmup Train [21][1930/3239]	Time 0.170 (0.237)	Data 0.002 (0.012)	Loss 4.3539 (4.4573)	Top-1 acc 24.609 (22.460)	Top-5 acc 45.703 (44.159)	lr 0.02383
Warmup Train [21][1940/3239]	Time 0.251 (0.237)	Data 0.001 (0.012)	Loss 4.4963 (4.4574)	Top-1 acc 23.047 (22.458)	Top-5 acc 42.969 (44.159)	lr 0.02382
Warmup Train [21][1950/3239]	Time 0.181 (0.237)	Data 0.001 (0.012)	Loss 4.4536 (4.4573)	Top-1 acc 20.312 (22.461)	Top-5 acc 41.016 (44.161)	lr 0.02382
Warmup Train [21][1960/3239]	Time 0.280 (0.237)	Data 0.001 (0.012)	Loss 4.1979 (4.4572)	Top-1 acc 25.391 (22.458)	Top-5 acc 50.391 (44.163)	lr 0.02381
Warmup Train [21][1970/3239]	Time 0.131 (0.237)	Data 0.002 (0.012)	Loss 4.5005 (4.4570)	Top-1 acc 20.703 (22.454)	Top-5 acc 44.531 (44.163)	lr 0.02381
Warmup Train [21][1980/3239]	Time 0.275 (0.237)	Data 0.001 (0.012)	Loss 4.3236 (4.4570)	Top-1 acc 26.562 (22.456)	Top-5 acc 46.094 (44.164)	lr 0.02380
Warmup Train [21][1990/3239]	Time 0.223 (0.237)	Data 0.001 (0.011)	Loss 4.5318 (4.4572)	Top-1 acc 19.531 (22.451)	Top-5 acc 40.234 (44.157)	lr 0.02379
Warmup Train [21][2000/3239]	Time 0.207 (0.237)	Data 0.001 (0.011)	Loss 4.3764 (4.4574)	Top-1 acc 24.219 (22.449)	Top-5 acc 46.484 (44.151)	lr 0.02379
Warmup Train [21][2010/3239]	Time 0.189 (0.237)	Data 0.034 (0.011)	Loss 4.4477 (4.4574)	Top-1 acc 22.266 (22.449)	Top-5 acc 44.141 (44.149)	lr 0.02378
Warmup Train [21][2020/3239]	Time 0.150 (0.237)	Data 0.001 (0.011)	Loss 4.4500 (4.4572)	Top-1 acc 17.969 (22.452)	Top-5 acc 44.531 (44.160)	lr 0.02378
Warmup Train [21][2030/3239]	Time 0.323 (0.237)	Data 0.001 (0.011)	Loss 4.3409 (4.4569)	Top-1 acc 23.438 (22.456)	Top-5 acc 49.219 (44.165)	lr 0.02377
Warmup Train [21][2040/3239]	Time 0.186 (0.237)	Data 0.001 (0.011)	Loss 4.2789 (4.4569)	Top-1 acc 25.391 (22.455)	Top-5 acc 48.047 (44.157)	lr 0.02376
Warmup Train [21][2050/3239]	Time 0.195 (0.237)	Data 0.001 (0.011)	Loss 4.3242 (4.4567)	Top-1 acc 23.828 (22.456)	Top-5 acc 49.609 (44.164)	lr 0.02376
Warmup Train [21][2060/3239]	Time 0.226 (0.237)	Data 0.029 (0.011)	Loss 4.5283 (4.4568)	Top-1 acc 21.875 (22.454)	Top-5 acc 41.406 (44.159)	lr 0.02375
Warmup Train [21][2070/3239]	Time 0.172 (0.237)	Data 0.001 (0.011)	Loss 4.3236 (4.4566)	Top-1 acc 24.219 (22.458)	Top-5 acc 48.047 (44.169)	lr 0.02375
Warmup Train [21][2080/3239]	Time 0.220 (0.237)	Data 0.001 (0.011)	Loss 4.5211 (4.4566)	Top-1 acc 25.391 (22.463)	Top-5 acc 43.359 (44.167)	lr 0.02374
Warmup Train [21][2090/3239]	Time 0.160 (0.237)	Data 0.002 (0.011)	Loss 4.5836 (4.4564)	Top-1 acc 23.828 (22.469)	Top-5 acc 41.797 (44.168)	lr 0.02373
Warmup Train [21][2100/3239]	Time 0.294 (0.236)	Data 0.003 (0.011)	Loss 4.2800 (4.4561)	Top-1 acc 26.172 (22.476)	Top-5 acc 49.219 (44.173)	lr 0.02373
Warmup Train [21][2110/3239]	Time 0.206 (0.236)	Data 0.001 (0.011)	Loss 4.4923 (4.4560)	Top-1 acc 25.781 (22.479)	Top-5 acc 45.312 (44.176)	lr 0.02372
Warmup Train [21][2120/3239]	Time 0.152 (0.236)	Data 0.001 (0.011)	Loss 4.5744 (4.4559)	Top-1 acc 19.922 (22.480)	Top-5 acc 42.578 (44.183)	lr 0.02372
Warmup Train [21][2130/3239]	Time 0.244 (0.236)	Data 0.002 (0.011)	Loss 4.4655 (4.4558)	Top-1 acc 21.484 (22.482)	Top-5 acc 41.797 (44.186)	lr 0.02371
Warmup Train [21][2140/3239]	Time 0.335 (0.236)	Data 0.001 (0.011)	Loss 4.3838 (4.4556)	Top-1 acc 22.656 (22.484)	Top-5 acc 44.531 (44.188)	lr 0.02370
Warmup Train [21][2150/3239]	Time 0.266 (0.236)	Data 0.003 (0.011)	Loss 4.5152 (4.4560)	Top-1 acc 22.266 (22.476)	Top-5 acc 40.234 (44.180)	lr 0.02370
Warmup Train [21][2160/3239]	Time 0.204 (0.236)	Data 0.001 (0.011)	Loss 4.4975 (4.4560)	Top-1 acc 21.875 (22.473)	Top-5 acc 44.531 (44.183)	lr 0.02369
Warmup Train [21][2170/3239]	Time 0.191 (0.236)	Data 0.001 (0.011)	Loss 4.4160 (4.4556)	Top-1 acc 23.828 (22.476)	Top-5 acc 45.312 (44.188)	lr 0.02369
Warmup Train [21][2180/3239]	Time 0.214 (0.236)	Data 0.001 (0.011)	Loss 4.4260 (4.4559)	Top-1 acc 25.000 (22.478)	Top-5 acc 46.484 (44.190)	lr 0.02368
Warmup Train [21][2190/3239]	Time 0.227 (0.236)	Data 0.001 (0.011)	Loss 4.2091 (4.4557)	Top-1 acc 28.516 (22.482)	Top-5 acc 47.266 (44.187)	lr 0.02367
Warmup Train [21][2200/3239]	Time 0.212 (0.236)	Data 0.001 (0.011)	Loss 4.3086 (4.4559)	Top-1 acc 24.609 (22.476)	Top-5 acc 48.438 (44.178)	lr 0.02367
Warmup Train [21][2210/3239]	Time 0.250 (0.236)	Data 0.001 (0.011)	Loss 4.5181 (4.4554)	Top-1 acc 22.656 (22.492)	Top-5 acc 42.969 (44.189)	lr 0.02366
Warmup Train [21][2220/3239]	Time 0.190 (0.236)	Data 0.001 (0.011)	Loss 4.4573 (4.4552)	Top-1 acc 21.875 (22.496)	Top-5 acc 43.750 (44.197)	lr 0.02366
Warmup Train [21][2230/3239]	Time 0.187 (0.236)	Data 0.001 (0.011)	Loss 4.6262 (4.4549)	Top-1 acc 20.312 (22.501)	Top-5 acc 39.453 (44.206)	lr 0.02365
Warmup Train [21][2240/3239]	Time 0.369 (0.236)	Data 0.001 (0.011)	Loss 4.2593 (4.4551)	Top-1 acc 25.000 (22.493)	Top-5 acc 48.828 (44.203)	lr 0.02364
Warmup Train [21][2250/3239]	Time 0.233 (0.236)	Data 0.001 (0.011)	Loss 4.3110 (4.4551)	Top-1 acc 24.609 (22.494)	Top-5 acc 48.047 (44.202)	lr 0.02364
Warmup Train [21][2260/3239]	Time 0.177 (0.236)	Data 0.002 (0.010)	Loss 4.3369 (4.4551)	Top-1 acc 24.609 (22.492)	Top-5 acc 43.750 (44.198)	lr 0.02363
Warmup Train [21][2270/3239]	Time 0.226 (0.236)	Data 0.002 (0.010)	Loss 4.4203 (4.4551)	Top-1 acc 20.703 (22.495)	Top-5 acc 42.578 (44.197)	lr 0.02363
Warmup Train [21][2280/3239]	Time 0.205 (0.236)	Data 0.001 (0.010)	Loss 4.2564 (4.4547)	Top-1 acc 26.953 (22.495)	Top-5 acc 46.484 (44.206)	lr 0.02362
Warmup Train [21][2290/3239]	Time 0.237 (0.236)	Data 0.001 (0.010)	Loss 4.4844 (4.4544)	Top-1 acc 21.484 (22.502)	Top-5 acc 45.312 (44.214)	lr 0.02361
Warmup Train [21][2300/3239]	Time 0.322 (0.236)	Data 0.003 (0.010)	Loss 4.2474 (4.4543)	Top-1 acc 26.953 (22.504)	Top-5 acc 48.438 (44.215)	lr 0.02361
Warmup Train [21][2310/3239]	Time 0.226 (0.236)	Data 0.001 (0.010)	Loss 4.2886 (4.4542)	Top-1 acc 25.391 (22.508)	Top-5 acc 49.219 (44.214)	lr 0.02360
Warmup Train [21][2320/3239]	Time 0.180 (0.236)	Data 0.001 (0.010)	Loss 4.2817 (4.4539)	Top-1 acc 26.562 (22.508)	Top-5 acc 51.172 (44.220)	lr 0.02359
Warmup Train [21][2330/3239]	Time 0.208 (0.236)	Data 0.001 (0.010)	Loss 4.3250 (4.4539)	Top-1 acc 26.953 (22.510)	Top-5 acc 45.703 (44.219)	lr 0.02359
Warmup Train [21][2340/3239]	Time 0.349 (0.236)	Data 0.002 (0.010)	Loss 4.4797 (4.4539)	Top-1 acc 20.312 (22.509)	Top-5 acc 42.969 (44.217)	lr 0.02358
Warmup Train [21][2350/3239]	Time 0.399 (0.236)	Data 0.001 (0.010)	Loss 4.2975 (4.4542)	Top-1 acc 27.734 (22.507)	Top-5 acc 49.219 (44.210)	lr 0.02358
Warmup Train [21][2360/3239]	Time 0.159 (0.236)	Data 0.002 (0.010)	Loss 4.5536 (4.4540)	Top-1 acc 19.141 (22.511)	Top-5 acc 37.891 (44.210)	lr 0.02357
Warmup Train [21][2370/3239]	Time 0.203 (0.236)	Data 0.001 (0.010)	Loss 4.6017 (4.4540)	Top-1 acc 22.266 (22.514)	Top-5 acc 38.672 (44.210)	lr 0.02356
Warmup Train [21][2380/3239]	Time 0.178 (0.235)	Data 0.001 (0.010)	Loss 4.4609 (4.4539)	Top-1 acc 21.875 (22.518)	Top-5 acc 41.797 (44.212)	lr 0.02356
Warmup Train [21][2390/3239]	Time 0.216 (0.236)	Data 0.002 (0.010)	Loss 4.5284 (4.4537)	Top-1 acc 22.266 (22.520)	Top-5 acc 42.188 (44.215)	lr 0.02355
Warmup Train [21][2400/3239]	Time 0.271 (0.236)	Data 0.001 (0.010)	Loss 4.2004 (4.4536)	Top-1 acc 29.297 (22.522)	Top-5 acc 46.875 (44.218)	lr 0.02355
Warmup Train [21][2410/3239]	Time 0.246 (0.236)	Data 0.002 (0.010)	Loss 4.2318 (4.4536)	Top-1 acc 27.734 (22.528)	Top-5 acc 50.391 (44.217)	lr 0.02354
Warmup Train [21][2420/3239]	Time 0.196 (0.235)	Data 0.002 (0.010)	Loss 4.2766 (4.4538)	Top-1 acc 23.047 (22.526)	Top-5 acc 51.172 (44.211)	lr 0.02353
Warmup Train [21][2430/3239]	Time 0.169 (0.235)	Data 0.001 (0.010)	Loss 4.6241 (4.4540)	Top-1 acc 20.312 (22.520)	Top-5 acc 41.016 (44.203)	lr 0.02353
Warmup Train [21][2440/3239]	Time 0.195 (0.235)	Data 0.001 (0.010)	Loss 4.4565 (4.4539)	Top-1 acc 24.219 (22.521)	Top-5 acc 42.578 (44.203)	lr 0.02352
Warmup Train [21][2450/3239]	Time 0.458 (0.235)	Data 0.001 (0.010)	Loss 4.3062 (4.4539)	Top-1 acc 21.484 (22.518)	Top-5 acc 46.094 (44.204)	lr 0.02352
Warmup Train [21][2460/3239]	Time 0.236 (0.235)	Data 0.001 (0.010)	Loss 4.5176 (4.4538)	Top-1 acc 23.438 (22.520)	Top-5 acc 43.359 (44.203)	lr 0.02351
Warmup Train [21][2470/3239]	Time 0.219 (0.235)	Data 0.001 (0.010)	Loss 4.4566 (4.4538)	Top-1 acc 23.828 (22.518)	Top-5 acc 44.922 (44.200)	lr 0.02350
Warmup Train [21][2480/3239]	Time 0.204 (0.235)	Data 0.001 (0.010)	Loss 4.5563 (4.4539)	Top-1 acc 19.922 (22.513)	Top-5 acc 42.969 (44.199)	lr 0.02350
Warmup Train [21][2490/3239]	Time 0.201 (0.235)	Data 0.001 (0.010)	Loss 4.3830 (4.4538)	Top-1 acc 20.312 (22.511)	Top-5 acc 46.875 (44.201)	lr 0.02349
Warmup Train [21][2500/3239]	Time 0.207 (0.235)	Data 0.001 (0.010)	Loss 4.4008 (4.4538)	Top-1 acc 24.609 (22.515)	Top-5 acc 48.438 (44.202)	lr 0.02349
Warmup Train [21][2510/3239]	Time 0.188 (0.235)	Data 0.003 (0.010)	Loss 4.4431 (4.4534)	Top-1 acc 22.266 (22.522)	Top-5 acc 41.797 (44.212)	lr 0.02348
Warmup Train [21][2520/3239]	Time 0.257 (0.235)	Data 0.001 (0.010)	Loss 4.3836 (4.4533)	Top-1 acc 25.781 (22.521)	Top-5 acc 43.359 (44.212)	lr 0.02347
Warmup Train [21][2530/3239]	Time 0.202 (0.235)	Data 0.002 (0.010)	Loss 4.5701 (4.4533)	Top-1 acc 22.266 (22.521)	Top-5 acc 40.625 (44.209)	lr 0.02347
Warmup Train [21][2540/3239]	Time 0.300 (0.235)	Data 0.001 (0.010)	Loss 4.4723 (4.4533)	Top-1 acc 21.094 (22.519)	Top-5 acc 42.969 (44.208)	lr 0.02346
Warmup Train [21][2550/3239]	Time 0.326 (0.235)	Data 0.001 (0.010)	Loss 4.4564 (4.4534)	Top-1 acc 19.531 (22.516)	Top-5 acc 40.625 (44.207)	lr 0.02346
Warmup Train [21][2560/3239]	Time 0.233 (0.235)	Data 0.001 (0.010)	Loss 4.5067 (4.4535)	Top-1 acc 21.484 (22.518)	Top-5 acc 42.188 (44.204)	lr 0.02345
Warmup Train [21][2570/3239]	Time 0.231 (0.235)	Data 0.001 (0.010)	Loss 4.3474 (4.4534)	Top-1 acc 23.828 (22.518)	Top-5 acc 48.828 (44.206)	lr 0.02344
Warmup Train [21][2580/3239]	Time 0.250 (0.235)	Data 0.002 (0.010)	Loss 4.5540 (4.4533)	Top-1 acc 18.750 (22.519)	Top-5 acc 42.188 (44.207)	lr 0.02344
Warmup Train [21][2590/3239]	Time 0.275 (0.235)	Data 0.001 (0.010)	Loss 4.4456 (4.4531)	Top-1 acc 25.000 (22.525)	Top-5 acc 47.266 (44.213)	lr 0.02343
Warmup Train [21][2600/3239]	Time 0.186 (0.235)	Data 0.001 (0.009)	Loss 4.5421 (4.4532)	Top-1 acc 20.312 (22.525)	Top-5 acc 39.062 (44.211)	lr 0.02343
Warmup Train [21][2610/3239]	Time 0.218 (0.235)	Data 0.001 (0.009)	Loss 4.4332 (4.4529)	Top-1 acc 25.781 (22.529)	Top-5 acc 43.359 (44.214)	lr 0.02342
Warmup Train [21][2620/3239]	Time 0.229 (0.235)	Data 0.001 (0.009)	Loss 4.5067 (4.4529)	Top-1 acc 22.656 (22.532)	Top-5 acc 43.750 (44.216)	lr 0.02341
Warmup Train [21][2630/3239]	Time 0.218 (0.235)	Data 0.001 (0.009)	Loss 4.4394 (4.4528)	Top-1 acc 22.266 (22.532)	Top-5 acc 48.438 (44.221)	lr 0.02341
Warmup Train [21][2640/3239]	Time 0.150 (0.235)	Data 0.001 (0.009)	Loss 4.3814 (4.4528)	Top-1 acc 20.312 (22.531)	Top-5 acc 48.047 (44.219)	lr 0.02340
Warmup Train [21][2650/3239]	Time 0.210 (0.235)	Data 0.001 (0.009)	Loss 4.3220 (4.4525)	Top-1 acc 23.438 (22.535)	Top-5 acc 49.219 (44.227)	lr 0.02340
Warmup Train [21][2660/3239]	Time 0.348 (0.235)	Data 0.002 (0.009)	Loss 4.2745 (4.4525)	Top-1 acc 25.000 (22.537)	Top-5 acc 46.484 (44.230)	lr 0.02339
Warmup Train [21][2670/3239]	Time 0.228 (0.235)	Data 0.001 (0.009)	Loss 4.4589 (4.4527)	Top-1 acc 22.656 (22.533)	Top-5 acc 41.797 (44.225)	lr 0.02338
Warmup Train [21][2680/3239]	Time 0.254 (0.235)	Data 0.001 (0.009)	Loss 4.3912 (4.4525)	Top-1 acc 20.703 (22.536)	Top-5 acc 46.484 (44.228)	lr 0.02338
Warmup Train [21][2690/3239]	Time 0.209 (0.235)	Data 0.002 (0.009)	Loss 4.3312 (4.4523)	Top-1 acc 25.781 (22.541)	Top-5 acc 50.391 (44.230)	lr 0.02337
Warmup Train [21][2700/3239]	Time 0.207 (0.235)	Data 0.001 (0.009)	Loss 4.3916 (4.4525)	Top-1 acc 24.219 (22.539)	Top-5 acc 46.875 (44.226)	lr 0.02336
Warmup Train [21][2710/3239]	Time 0.187 (0.235)	Data 0.001 (0.009)	Loss 4.2918 (4.4522)	Top-1 acc 22.656 (22.540)	Top-5 acc 45.312 (44.228)	lr 0.02336
Warmup Train [21][2720/3239]	Time 0.180 (0.235)	Data 0.001 (0.009)	Loss 4.2355 (4.4522)	Top-1 acc 27.734 (22.541)	Top-5 acc 47.266 (44.228)	lr 0.02335
Warmup Train [21][2730/3239]	Time 0.227 (0.235)	Data 0.001 (0.009)	Loss 4.3840 (4.4522)	Top-1 acc 20.703 (22.541)	Top-5 acc 48.438 (44.230)	lr 0.02335
Warmup Train [21][2740/3239]	Time 0.215 (0.234)	Data 0.002 (0.009)	Loss 4.4698 (4.4521)	Top-1 acc 23.828 (22.547)	Top-5 acc 44.531 (44.232)	lr 0.02334
Warmup Train [21][2750/3239]	Time 0.393 (0.234)	Data 0.001 (0.009)	Loss 4.5423 (4.4521)	Top-1 acc 21.875 (22.544)	Top-5 acc 45.312 (44.229)	lr 0.02333
Warmup Train [21][2760/3239]	Time 0.220 (0.234)	Data 0.001 (0.009)	Loss 4.4914 (4.4520)	Top-1 acc 21.094 (22.546)	Top-5 acc 42.578 (44.230)	lr 0.02333
Warmup Train [21][2770/3239]	Time 0.254 (0.234)	Data 0.001 (0.009)	Loss 4.3714 (4.4519)	Top-1 acc 25.781 (22.546)	Top-5 acc 44.531 (44.231)	lr 0.02332
Warmup Train [21][2780/3239]	Time 0.204 (0.234)	Data 0.002 (0.009)	Loss 4.3870 (4.4517)	Top-1 acc 24.609 (22.550)	Top-5 acc 43.750 (44.232)	lr 0.02332
Warmup Train [21][2790/3239]	Time 0.192 (0.234)	Data 0.002 (0.009)	Loss 4.3026 (4.4518)	Top-1 acc 23.828 (22.549)	Top-5 acc 46.484 (44.235)	lr 0.02331
Warmup Train [21][2800/3239]	Time 0.192 (0.234)	Data 0.001 (0.009)	Loss 4.2439 (4.4517)	Top-1 acc 27.344 (22.549)	Top-5 acc 48.828 (44.234)	lr 0.02330
Warmup Train [21][2810/3239]	Time 0.220 (0.234)	Data 0.001 (0.009)	Loss 4.5587 (4.4518)	Top-1 acc 23.828 (22.550)	Top-5 acc 40.625 (44.230)	lr 0.02330
Warmup Train [21][2820/3239]	Time 0.205 (0.234)	Data 0.001 (0.009)	Loss 4.6607 (4.4520)	Top-1 acc 17.969 (22.550)	Top-5 acc 39.062 (44.227)	lr 0.02329
Warmup Train [21][2830/3239]	Time 0.224 (0.234)	Data 0.001 (0.009)	Loss 4.2787 (4.4518)	Top-1 acc 24.609 (22.552)	Top-5 acc 46.875 (44.236)	lr 0.02329
Warmup Train [21][2840/3239]	Time 0.184 (0.234)	Data 0.001 (0.009)	Loss 4.4048 (4.4516)	Top-1 acc 24.219 (22.555)	Top-5 acc 45.312 (44.238)	lr 0.02328
Warmup Train [21][2850/3239]	Time 0.204 (0.234)	Data 0.001 (0.009)	Loss 4.4591 (4.4515)	Top-1 acc 26.953 (22.561)	Top-5 acc 44.141 (44.246)	lr 0.02327
Warmup Train [21][2860/3239]	Time 0.383 (0.234)	Data 0.001 (0.009)	Loss 4.5058 (4.4514)	Top-1 acc 21.484 (22.561)	Top-5 acc 42.578 (44.249)	lr 0.02327
Warmup Train [21][2870/3239]	Time 0.364 (0.234)	Data 0.001 (0.009)	Loss 4.5848 (4.4514)	Top-1 acc 21.094 (22.563)	Top-5 acc 41.016 (44.249)	lr 0.02326
Warmup Train [21][2880/3239]	Time 0.200 (0.234)	Data 0.001 (0.009)	Loss 4.4195 (4.4511)	Top-1 acc 23.047 (22.565)	Top-5 acc 47.656 (44.254)	lr 0.02326
Warmup Train [21][2890/3239]	Time 0.297 (0.234)	Data 0.001 (0.009)	Loss 4.6116 (4.4513)	Top-1 acc 25.391 (22.565)	Top-5 acc 43.750 (44.253)	lr 0.02325
Warmup Train [21][2900/3239]	Time 0.147 (0.234)	Data 0.002 (0.009)	Loss 4.5386 (4.4513)	Top-1 acc 21.484 (22.564)	Top-5 acc 42.188 (44.255)	lr 0.02324
Warmup Train [21][2910/3239]	Time 0.202 (0.234)	Data 0.001 (0.009)	Loss 4.2596 (4.4510)	Top-1 acc 23.438 (22.569)	Top-5 acc 47.656 (44.258)	lr 0.02324
Warmup Train [21][2920/3239]	Time 0.212 (0.234)	Data 0.001 (0.009)	Loss 4.6438 (4.4509)	Top-1 acc 20.312 (22.572)	Top-5 acc 37.891 (44.262)	lr 0.02323
Warmup Train [21][2930/3239]	Time 0.189 (0.234)	Data 0.001 (0.009)	Loss 4.5338 (4.4508)	Top-1 acc 21.875 (22.576)	Top-5 acc 39.453 (44.264)	lr 0.02323
Warmup Train [21][2940/3239]	Time 0.273 (0.234)	Data 0.002 (0.009)	Loss 4.4205 (4.4507)	Top-1 acc 25.000 (22.582)	Top-5 acc 46.094 (44.269)	lr 0.02322
Warmup Train [21][2950/3239]	Time 0.215 (0.234)	Data 0.001 (0.009)	Loss 4.4025 (4.4506)	Top-1 acc 25.781 (22.583)	Top-5 acc 44.922 (44.272)	lr 0.02321
Warmup Train [21][2960/3239]	Time 0.318 (0.234)	Data 0.002 (0.009)	Loss 4.4574 (4.4507)	Top-1 acc 23.047 (22.585)	Top-5 acc 46.484 (44.272)	lr 0.02321
Warmup Train [21][2970/3239]	Time 0.342 (0.234)	Data 0.001 (0.009)	Loss 4.3871 (4.4506)	Top-1 acc 25.781 (22.585)	Top-5 acc 42.969 (44.273)	lr 0.02320
Warmup Train [21][2980/3239]	Time 0.207 (0.234)	Data 0.001 (0.009)	Loss 4.2422 (4.4502)	Top-1 acc 29.688 (22.594)	Top-5 acc 47.656 (44.284)	lr 0.02320
Warmup Train [21][2990/3239]	Time 0.139 (0.234)	Data 0.002 (0.009)	Loss 4.4929 (4.4502)	Top-1 acc 21.484 (22.593)	Top-5 acc 44.531 (44.282)	lr 0.02319
Warmup Train [21][3000/3239]	Time 0.147 (0.234)	Data 0.001 (0.009)	Loss 4.3674 (4.4502)	Top-1 acc 21.484 (22.596)	Top-5 acc 44.531 (44.286)	lr 0.02318
Warmup Train [21][3010/3239]	Time 0.222 (0.234)	Data 0.001 (0.009)	Loss 4.3876 (4.4500)	Top-1 acc 21.875 (22.602)	Top-5 acc 47.266 (44.288)	lr 0.02318
Warmup Train [21][3020/3239]	Time 0.199 (0.234)	Data 0.001 (0.009)	Loss 4.4157 (4.4499)	Top-1 acc 23.828 (22.601)	Top-5 acc 43.750 (44.290)	lr 0.02317
Warmup Train [21][3030/3239]	Time 0.189 (0.234)	Data 0.002 (0.009)	Loss 4.1894 (4.4499)	Top-1 acc 26.953 (22.597)	Top-5 acc 49.609 (44.289)	lr 0.02317
Warmup Train [21][3040/3239]	Time 0.225 (0.234)	Data 0.001 (0.009)	Loss 4.3870 (4.4498)	Top-1 acc 24.609 (22.599)	Top-5 acc 44.922 (44.289)	lr 0.02316
Warmup Train [21][3050/3239]	Time 0.214 (0.234)	Data 0.001 (0.009)	Loss 4.4984 (4.4499)	Top-1 acc 21.875 (22.599)	Top-5 acc 45.312 (44.288)	lr 0.02315
Warmup Train [21][3060/3239]	Time 0.198 (0.234)	Data 0.001 (0.009)	Loss 4.4879 (4.4498)	Top-1 acc 22.656 (22.603)	Top-5 acc 45.312 (44.292)	lr 0.02315
Warmup Train [21][3070/3239]	Time 0.208 (0.234)	Data 0.001 (0.009)	Loss 4.3667 (4.4499)	Top-1 acc 21.094 (22.605)	Top-5 acc 51.172 (44.294)	lr 0.02314
Warmup Train [21][3080/3239]	Time 0.363 (0.234)	Data 0.001 (0.008)	Loss 4.4068 (4.4500)	Top-1 acc 23.047 (22.604)	Top-5 acc 43.359 (44.293)	lr 0.02314
Warmup Train [21][3090/3239]	Time 0.251 (0.234)	Data 0.001 (0.008)	Loss 4.5444 (4.4499)	Top-1 acc 19.922 (22.606)	Top-5 acc 40.625 (44.296)	lr 0.02313
Warmup Train [21][3100/3239]	Time 0.251 (0.234)	Data 0.001 (0.008)	Loss 4.5035 (4.4498)	Top-1 acc 22.266 (22.607)	Top-5 acc 39.062 (44.295)	lr 0.02312
Warmup Train [21][3110/3239]	Time 0.258 (0.234)	Data 0.001 (0.008)	Loss 4.5564 (4.4499)	Top-1 acc 23.828 (22.609)	Top-5 acc 42.188 (44.292)	lr 0.02312
Warmup Train [21][3120/3239]	Time 0.201 (0.234)	Data 0.001 (0.008)	Loss 4.6314 (4.4500)	Top-1 acc 19.141 (22.606)	Top-5 acc 44.922 (44.291)	lr 0.02311
Warmup Train [21][3130/3239]	Time 0.271 (0.234)	Data 0.001 (0.008)	Loss 4.4262 (4.4499)	Top-1 acc 23.047 (22.613)	Top-5 acc 44.141 (44.296)	lr 0.02310
Warmup Train [21][3140/3239]	Time 0.219 (0.234)	Data 0.001 (0.008)	Loss 4.4496 (4.4497)	Top-1 acc 20.312 (22.614)	Top-5 acc 42.969 (44.296)	lr 0.02310
Warmup Train [21][3150/3239]	Time 0.185 (0.234)	Data 0.001 (0.008)	Loss 4.4861 (4.4497)	Top-1 acc 23.828 (22.616)	Top-5 acc 43.750 (44.300)	lr 0.02309
Warmup Train [21][3160/3239]	Time 0.193 (0.234)	Data 0.001 (0.008)	Loss 4.3711 (4.4496)	Top-1 acc 18.750 (22.616)	Top-5 acc 45.312 (44.300)	lr 0.02309
Warmup Train [21][3170/3239]	Time 0.314 (0.234)	Data 0.002 (0.008)	Loss 4.4276 (4.4497)	Top-1 acc 24.219 (22.615)	Top-5 acc 43.750 (44.296)	lr 0.02308
Warmup Train [21][3180/3239]	Time 0.215 (0.234)	Data 0.000 (0.008)	Loss 4.4842 (4.4495)	Top-1 acc 21.875 (22.619)	Top-5 acc 43.750 (44.301)	lr 0.02307
Warmup Train [21][3190/3239]	Time 0.204 (0.234)	Data 0.000 (0.008)	Loss 4.2651 (4.4492)	Top-1 acc 29.688 (22.625)	Top-5 acc 48.047 (44.307)	lr 0.02307
Warmup Train [21][3200/3239]	Time 0.238 (0.234)	Data 0.000 (0.008)	Loss 4.4174 (4.4492)	Top-1 acc 22.656 (22.626)	Top-5 acc 46.094 (44.309)	lr 0.02306
Warmup Train [21][3210/3239]	Time 0.147 (0.234)	Data 0.000 (0.008)	Loss 4.4769 (4.4492)	Top-1 acc 22.266 (22.621)	Top-5 acc 41.797 (44.308)	lr 0.02306
Warmup Train [21][3220/3239]	Time 0.191 (0.234)	Data 0.000 (0.008)	Loss 4.5097 (4.4490)	Top-1 acc 19.922 (22.625)	Top-5 acc 43.750 (44.314)	lr 0.02305
Warmup Train [21][3230/3239]	Time 0.210 (0.233)	Data 0.000 (0.008)	Loss 4.2531 (4.4488)	Top-1 acc 27.734 (22.632)	Top-5 acc 49.219 (44.320)	lr 0.02304
Warmup Train [21][3239/3239]	Time 0.141 (0.233)	Data 0.000 (0.008)	Loss 4.2506 (4.4488)	Top-1 acc 30.864 (22.633)	Top-5 acc 51.852 (44.321)	lr 0.02304
==========Warmup Valid [21/40]	loss 3.445	top-1 acc 29.780	top-5 acc 54.027	Train top-1 22.633	top-5 44.321	flops: 442.4M
Warmup Train [22][0/3239]	Time 19.492 (19.492)	Data 16.845 (16.845)	Loss 4.3410 (4.3410)	Top-1 acc 23.438 (23.438)	Top-5 acc 48.438 (48.438)	lr 0.02304
Warmup Train [22][10/3239]	Time 0.340 (2.028)	Data 0.002 (1.533)	Loss 4.5039 (4.4543)	Top-1 acc 21.875 (23.366)	Top-5 acc 42.578 (44.354)	lr 0.02303
Warmup Train [22][20/3239]	Time 0.188 (1.193)	Data 0.001 (0.804)	Loss 4.3364 (4.4214)	Top-1 acc 24.609 (23.196)	Top-5 acc 44.922 (44.680)	lr 0.02303
Warmup Train [22][30/3239]	Time 0.219 (0.878)	Data 0.001 (0.545)	Loss 4.3597 (4.4278)	Top-1 acc 23.438 (23.185)	Top-5 acc 47.266 (44.808)	lr 0.02302
Warmup Train [22][40/3239]	Time 0.162 (0.717)	Data 0.001 (0.413)	Loss 4.5494 (4.4225)	Top-1 acc 21.094 (23.152)	Top-5 acc 41.797 (44.846)	lr 0.02301
Warmup Train [22][50/3239]	Time 0.177 (0.620)	Data 0.001 (0.333)	Loss 4.5728 (4.4232)	Top-1 acc 16.797 (23.108)	Top-5 acc 40.234 (44.953)	lr 0.02301
Warmup Train [22][60/3239]	Time 0.238 (0.557)	Data 0.003 (0.279)	Loss 4.3567 (4.4158)	Top-1 acc 21.484 (23.386)	Top-5 acc 48.828 (45.159)	lr 0.02300
Warmup Train [22][70/3239]	Time 0.244 (0.509)	Data 0.002 (0.240)	Loss 4.2081 (4.4158)	Top-1 acc 24.609 (23.223)	Top-5 acc 46.094 (45.142)	lr 0.02300
Warmup Train [22][80/3239]	Time 0.361 (0.475)	Data 0.001 (0.211)	Loss 4.2248 (4.4162)	Top-1 acc 29.297 (23.370)	Top-5 acc 50.000 (45.052)	lr 0.02299
Warmup Train [22][90/3239]	Time 0.272 (0.448)	Data 0.001 (0.188)	Loss 4.5526 (4.4152)	Top-1 acc 20.703 (23.386)	Top-5 acc 42.578 (45.072)	lr 0.02298
Warmup Train [22][100/3239]	Time 0.249 (0.428)	Data 0.001 (0.169)	Loss 4.4659 (4.4201)	Top-1 acc 21.484 (23.325)	Top-5 acc 44.922 (45.084)	lr 0.02298
Warmup Train [22][110/3239]	Time 0.212 (0.409)	Data 0.001 (0.154)	Loss 4.4878 (4.4224)	Top-1 acc 20.312 (23.276)	Top-5 acc 41.016 (44.975)	lr 0.02297
Warmup Train [22][120/3239]	Time 0.197 (0.394)	Data 0.001 (0.142)	Loss 4.5268 (4.4259)	Top-1 acc 25.391 (23.250)	Top-5 acc 44.922 (44.915)	lr 0.02297
Warmup Train [22][130/3239]	Time 0.197 (0.380)	Data 0.001 (0.131)	Loss 4.4196 (4.4251)	Top-1 acc 23.047 (23.244)	Top-5 acc 44.531 (44.967)	lr 0.02296
Warmup Train [22][140/3239]	Time 0.172 (0.369)	Data 0.001 (0.122)	Loss 4.4071 (4.4228)	Top-1 acc 23.828 (23.205)	Top-5 acc 46.484 (44.977)	lr 0.02295
Warmup Train [22][150/3239]	Time 0.236 (0.359)	Data 0.001 (0.114)	Loss 4.4126 (4.4220)	Top-1 acc 22.656 (23.171)	Top-5 acc 41.016 (44.984)	lr 0.02295
Warmup Train [22][160/3239]	Time 0.257 (0.351)	Data 0.001 (0.107)	Loss 4.3983 (4.4217)	Top-1 acc 22.266 (23.127)	Top-5 acc 45.703 (44.973)	lr 0.02294
Warmup Train [22][170/3239]	Time 0.202 (0.343)	Data 0.001 (0.101)	Loss 4.5331 (4.4252)	Top-1 acc 21.875 (23.083)	Top-5 acc 41.016 (44.860)	lr 0.02294
Warmup Train [22][180/3239]	Time 0.354 (0.338)	Data 0.001 (0.096)	Loss 4.5594 (4.4267)	Top-1 acc 19.922 (23.045)	Top-5 acc 42.578 (44.792)	lr 0.02293
Warmup Train [22][190/3239]	Time 0.207 (0.331)	Data 0.001 (0.091)	Loss 4.6225 (4.4268)	Top-1 acc 17.969 (23.010)	Top-5 acc 41.797 (44.766)	lr 0.02292
Warmup Train [22][200/3239]	Time 0.212 (0.326)	Data 0.001 (0.087)	Loss 4.3582 (4.4266)	Top-1 acc 23.047 (23.014)	Top-5 acc 45.312 (44.803)	lr 0.02292
Warmup Train [22][210/3239]	Time 0.228 (0.321)	Data 0.002 (0.083)	Loss 4.3292 (4.4258)	Top-1 acc 23.828 (23.023)	Top-5 acc 51.953 (44.827)	lr 0.02291
Warmup Train [22][220/3239]	Time 0.185 (0.317)	Data 0.002 (0.079)	Loss 4.3594 (4.4246)	Top-1 acc 23.047 (22.997)	Top-5 acc 48.047 (44.888)	lr 0.02291
Warmup Train [22][230/3239]	Time 0.268 (0.314)	Data 0.001 (0.076)	Loss 4.3262 (4.4213)	Top-1 acc 22.266 (23.037)	Top-5 acc 46.094 (44.991)	lr 0.02290
Warmup Train [22][240/3239]	Time 0.199 (0.310)	Data 0.001 (0.073)	Loss 4.4868 (4.4200)	Top-1 acc 21.094 (23.066)	Top-5 acc 44.531 (44.998)	lr 0.02289
Warmup Train [22][250/3239]	Time 0.232 (0.307)	Data 0.001 (0.070)	Loss 4.4765 (4.4209)	Top-1 acc 18.359 (23.059)	Top-5 acc 42.969 (45.015)	lr 0.02289
Warmup Train [22][260/3239]	Time 0.210 (0.303)	Data 0.001 (0.067)	Loss 4.3491 (4.4200)	Top-1 acc 21.875 (23.075)	Top-5 acc 46.484 (45.043)	lr 0.02288
Warmup Train [22][270/3239]	Time 0.196 (0.301)	Data 0.001 (0.065)	Loss 4.2561 (4.4192)	Top-1 acc 25.000 (23.054)	Top-5 acc 46.094 (45.027)	lr 0.02288
Warmup Train [22][280/3239]	Time 0.304 (0.298)	Data 0.001 (0.063)	Loss 4.3715 (4.4181)	Top-1 acc 23.047 (23.029)	Top-5 acc 46.484 (45.019)	lr 0.02287
Warmup Train [22][290/3239]	Time 0.331 (0.295)	Data 0.001 (0.061)	Loss 4.4315 (4.4175)	Top-1 acc 23.047 (23.042)	Top-5 acc 44.922 (45.027)	lr 0.02286
Warmup Train [22][300/3239]	Time 0.193 (0.293)	Data 0.001 (0.059)	Loss 4.4535 (4.4169)	Top-1 acc 25.000 (23.055)	Top-5 acc 44.141 (45.036)	lr 0.02286
Warmup Train [22][310/3239]	Time 0.226 (0.291)	Data 0.002 (0.057)	Loss 4.4614 (4.4181)	Top-1 acc 23.828 (23.042)	Top-5 acc 45.703 (45.026)	lr 0.02285
Warmup Train [22][320/3239]	Time 0.142 (0.289)	Data 0.002 (0.055)	Loss 4.5855 (4.4191)	Top-1 acc 18.359 (23.021)	Top-5 acc 40.234 (45.016)	lr 0.02285
Warmup Train [22][330/3239]	Time 0.260 (0.288)	Data 0.002 (0.054)	Loss 4.4850 (4.4190)	Top-1 acc 20.703 (23.049)	Top-5 acc 41.016 (45.012)	lr 0.02284
Warmup Train [22][340/3239]	Time 0.193 (0.286)	Data 0.002 (0.052)	Loss 4.4886 (4.4180)	Top-1 acc 20.703 (23.025)	Top-5 acc 42.578 (45.028)	lr 0.02283
Warmup Train [22][350/3239]	Time 0.184 (0.285)	Data 0.002 (0.051)	Loss 4.2132 (4.4167)	Top-1 acc 26.172 (23.069)	Top-5 acc 47.656 (45.102)	lr 0.02283
Warmup Train [22][360/3239]	Time 0.237 (0.283)	Data 0.001 (0.049)	Loss 4.4682 (4.4172)	Top-1 acc 24.219 (23.056)	Top-5 acc 41.797 (45.087)	lr 0.02282
Warmup Train [22][370/3239]	Time 0.417 (0.283)	Data 0.002 (0.048)	Loss 4.4344 (4.4161)	Top-1 acc 21.484 (23.077)	Top-5 acc 45.703 (45.092)	lr 0.02282
Warmup Train [22][380/3239]	Time 0.224 (0.281)	Data 0.002 (0.047)	Loss 4.4718 (4.4165)	Top-1 acc 22.266 (23.058)	Top-5 acc 43.359 (45.078)	lr 0.02281
Warmup Train [22][390/3239]	Time 0.228 (0.280)	Data 0.002 (0.046)	Loss 4.3395 (4.4172)	Top-1 acc 22.266 (23.041)	Top-5 acc 43.359 (45.073)	lr 0.02280
Warmup Train [22][400/3239]	Time 0.174 (0.280)	Data 0.002 (0.045)	Loss 4.4209 (4.4165)	Top-1 acc 25.000 (23.062)	Top-5 acc 42.578 (45.083)	lr 0.02280
Warmup Train [22][410/3239]	Time 0.232 (0.278)	Data 0.001 (0.044)	Loss 4.4679 (4.4158)	Top-1 acc 21.484 (23.084)	Top-5 acc 41.016 (45.079)	lr 0.02279
Warmup Train [22][420/3239]	Time 0.206 (0.277)	Data 0.001 (0.043)	Loss 4.5491 (4.4163)	Top-1 acc 21.875 (23.079)	Top-5 acc 44.922 (45.072)	lr 0.02278
Warmup Train [22][430/3239]	Time 0.202 (0.275)	Data 0.001 (0.042)	Loss 4.7425 (4.4167)	Top-1 acc 17.578 (23.063)	Top-5 acc 38.281 (45.063)	lr 0.02278
Warmup Train [22][440/3239]	Time 0.163 (0.274)	Data 0.001 (0.041)	Loss 4.6114 (4.4171)	Top-1 acc 18.359 (23.052)	Top-5 acc 39.062 (45.039)	lr 0.02277
Warmup Train [22][450/3239]	Time 0.230 (0.273)	Data 0.001 (0.040)	Loss 4.2265 (4.4177)	Top-1 acc 32.031 (23.075)	Top-5 acc 49.609 (45.049)	lr 0.02277
Warmup Train [22][460/3239]	Time 0.289 (0.272)	Data 0.001 (0.040)	Loss 4.2708 (4.4177)	Top-1 acc 23.828 (23.071)	Top-5 acc 49.609 (45.052)	lr 0.02276
Warmup Train [22][470/3239]	Time 0.215 (0.271)	Data 0.001 (0.039)	Loss 4.2323 (4.4171)	Top-1 acc 25.391 (23.089)	Top-5 acc 49.219 (45.053)	lr 0.02275
Warmup Train [22][480/3239]	Time 0.247 (0.270)	Data 0.001 (0.038)	Loss 4.4977 (4.4168)	Top-1 acc 23.047 (23.118)	Top-5 acc 42.969 (45.064)	lr 0.02275
Warmup Train [22][490/3239]	Time 0.175 (0.269)	Data 0.001 (0.037)	Loss 4.4013 (4.4158)	Top-1 acc 20.703 (23.127)	Top-5 acc 43.750 (45.073)	lr 0.02274
Warmup Train [22][500/3239]	Time 0.186 (0.268)	Data 0.001 (0.037)	Loss 4.1787 (4.4153)	Top-1 acc 28.906 (23.131)	Top-5 acc 52.734 (45.070)	lr 0.02274
Warmup Train [22][510/3239]	Time 0.228 (0.267)	Data 0.002 (0.036)	Loss 4.3148 (4.4162)	Top-1 acc 20.703 (23.097)	Top-5 acc 47.266 (45.053)	lr 0.02273
Warmup Train [22][520/3239]	Time 0.155 (0.267)	Data 0.002 (0.035)	Loss 4.3981 (4.4167)	Top-1 acc 22.266 (23.095)	Top-5 acc 44.531 (45.057)	lr 0.02272
Warmup Train [22][530/3239]	Time 0.223 (0.266)	Data 0.001 (0.035)	Loss 4.5160 (4.4162)	Top-1 acc 20.703 (23.109)	Top-5 acc 45.312 (45.077)	lr 0.02272
Warmup Train [22][540/3239]	Time 0.203 (0.266)	Data 0.001 (0.034)	Loss 4.6822 (4.4167)	Top-1 acc 17.188 (23.114)	Top-5 acc 38.281 (45.071)	lr 0.02271
Warmup Train [22][550/3239]	Time 0.370 (0.266)	Data 0.002 (0.034)	Loss 4.4514 (4.4167)	Top-1 acc 23.438 (23.122)	Top-5 acc 45.703 (45.064)	lr 0.02271
Warmup Train [22][560/3239]	Time 0.238 (0.265)	Data 0.001 (0.033)	Loss 4.5412 (4.4172)	Top-1 acc 24.609 (23.126)	Top-5 acc 43.750 (45.044)	lr 0.02270
Warmup Train [22][570/3239]	Time 0.194 (0.265)	Data 0.002 (0.033)	Loss 4.4934 (4.4177)	Top-1 acc 23.828 (23.117)	Top-5 acc 42.969 (45.045)	lr 0.02269
Warmup Train [22][580/3239]	Time 0.287 (0.264)	Data 0.002 (0.032)	Loss 4.4856 (4.4172)	Top-1 acc 23.438 (23.142)	Top-5 acc 45.312 (45.055)	lr 0.02269
Warmup Train [22][590/3239]	Time 0.237 (0.264)	Data 0.003 (0.032)	Loss 4.3264 (4.4170)	Top-1 acc 25.391 (23.134)	Top-5 acc 45.312 (45.043)	lr 0.02268
Warmup Train [22][600/3239]	Time 0.229 (0.264)	Data 0.001 (0.031)	Loss 4.3771 (4.4175)	Top-1 acc 25.391 (23.133)	Top-5 acc 43.750 (45.030)	lr 0.02268
Warmup Train [22][610/3239]	Time 0.205 (0.263)	Data 0.001 (0.031)	Loss 4.5491 (4.4168)	Top-1 acc 21.094 (23.149)	Top-5 acc 44.531 (45.052)	lr 0.02267
Warmup Train [22][620/3239]	Time 0.304 (0.262)	Data 0.001 (0.030)	Loss 4.4416 (4.4165)	Top-1 acc 23.828 (23.165)	Top-5 acc 42.969 (45.050)	lr 0.02266
Warmup Train [22][630/3239]	Time 0.236 (0.262)	Data 0.001 (0.030)	Loss 4.4643 (4.4164)	Top-1 acc 19.531 (23.173)	Top-5 acc 43.750 (45.052)	lr 0.02266
Warmup Train [22][640/3239]	Time 0.165 (0.261)	Data 0.001 (0.029)	Loss 4.3952 (4.4173)	Top-1 acc 28.125 (23.157)	Top-5 acc 48.047 (45.039)	lr 0.02265
Warmup Train [22][650/3239]	Time 0.169 (0.261)	Data 0.001 (0.029)	Loss 4.5008 (4.4179)	Top-1 acc 26.562 (23.139)	Top-5 acc 43.359 (45.020)	lr 0.02265
Warmup Train [22][660/3239]	Time 0.183 (0.260)	Data 0.001 (0.029)	Loss 4.6603 (4.4181)	Top-1 acc 19.141 (23.141)	Top-5 acc 39.062 (45.013)	lr 0.02264
Warmup Train [22][670/3239]	Time 0.245 (0.260)	Data 0.001 (0.028)	Loss 4.2695 (4.4184)	Top-1 acc 26.172 (23.137)	Top-5 acc 45.703 (45.012)	lr 0.02263
Warmup Train [22][680/3239]	Time 0.179 (0.259)	Data 0.001 (0.028)	Loss 4.3757 (4.4174)	Top-1 acc 21.484 (23.140)	Top-5 acc 43.750 (45.045)	lr 0.02263
Warmup Train [22][690/3239]	Time 0.243 (0.258)	Data 0.001 (0.028)	Loss 4.3448 (4.4178)	Top-1 acc 27.344 (23.157)	Top-5 acc 46.094 (45.038)	lr 0.02262
Warmup Train [22][700/3239]	Time 0.177 (0.258)	Data 0.001 (0.027)	Loss 4.5389 (4.4181)	Top-1 acc 22.266 (23.156)	Top-5 acc 43.359 (45.029)	lr 0.02262
Warmup Train [22][710/3239]	Time 0.216 (0.258)	Data 0.001 (0.027)	Loss 4.4425 (4.4179)	Top-1 acc 25.000 (23.157)	Top-5 acc 46.484 (45.022)	lr 0.02261
Warmup Train [22][720/3239]	Time 0.193 (0.257)	Data 0.001 (0.027)	Loss 4.3812 (4.4177)	Top-1 acc 23.438 (23.157)	Top-5 acc 46.484 (45.026)	lr 0.02260
Warmup Train [22][730/3239]	Time 0.162 (0.257)	Data 0.002 (0.026)	Loss 4.3654 (4.4175)	Top-1 acc 24.219 (23.165)	Top-5 acc 44.141 (45.024)	lr 0.02260
Warmup Train [22][740/3239]	Time 0.204 (0.256)	Data 0.003 (0.026)	Loss 4.3444 (4.4177)	Top-1 acc 25.000 (23.165)	Top-5 acc 44.141 (45.025)	lr 0.02259
Warmup Train [22][750/3239]	Time 0.214 (0.256)	Data 0.001 (0.026)	Loss 4.4708 (4.4179)	Top-1 acc 19.922 (23.157)	Top-5 acc 43.750 (45.021)	lr 0.02259
Warmup Train [22][760/3239]	Time 0.283 (0.255)	Data 0.001 (0.025)	Loss 4.4657 (4.4180)	Top-1 acc 26.172 (23.148)	Top-5 acc 48.438 (45.006)	lr 0.02258
Warmup Train [22][770/3239]	Time 0.315 (0.255)	Data 0.001 (0.025)	Loss 4.3132 (4.4172)	Top-1 acc 26.562 (23.152)	Top-5 acc 47.266 (45.027)	lr 0.02257
Warmup Train [22][780/3239]	Time 0.231 (0.255)	Data 0.002 (0.025)	Loss 4.2666 (4.4168)	Top-1 acc 26.953 (23.149)	Top-5 acc 46.094 (45.044)	lr 0.02257
Warmup Train [22][790/3239]	Time 0.275 (0.254)	Data 0.002 (0.025)	Loss 4.5283 (4.4172)	Top-1 acc 22.266 (23.150)	Top-5 acc 44.141 (45.038)	lr 0.02256
Warmup Train [22][800/3239]	Time 0.177 (0.254)	Data 0.002 (0.024)	Loss 4.5748 (4.4171)	Top-1 acc 18.359 (23.152)	Top-5 acc 42.578 (45.055)	lr 0.02256
Warmup Train [22][810/3239]	Time 0.200 (0.254)	Data 0.001 (0.024)	Loss 4.4641 (4.4174)	Top-1 acc 20.703 (23.140)	Top-5 acc 45.703 (45.050)	lr 0.02255
Warmup Train [22][820/3239]	Time 0.153 (0.253)	Data 0.002 (0.024)	Loss 4.3800 (4.4174)	Top-1 acc 21.484 (23.153)	Top-5 acc 46.484 (45.047)	lr 0.02254
Warmup Train [22][830/3239]	Time 0.211 (0.253)	Data 0.001 (0.023)	Loss 4.2560 (4.4170)	Top-1 acc 28.125 (23.179)	Top-5 acc 51.953 (45.061)	lr 0.02254
Warmup Train [22][840/3239]	Time 0.237 (0.252)	Data 0.001 (0.023)	Loss 4.5121 (4.4173)	Top-1 acc 22.266 (23.179)	Top-5 acc 42.969 (45.060)	lr 0.02253
Warmup Train [22][850/3239]	Time 0.199 (0.252)	Data 0.001 (0.023)	Loss 4.1666 (4.4172)	Top-1 acc 29.297 (23.175)	Top-5 acc 55.859 (45.070)	lr 0.02253
Warmup Train [22][860/3239]	Time 0.196 (0.252)	Data 0.003 (0.023)	Loss 4.3788 (4.4174)	Top-1 acc 22.656 (23.168)	Top-5 acc 46.094 (45.063)	lr 0.02252
Warmup Train [22][870/3239]	Time 0.244 (0.252)	Data 0.001 (0.023)	Loss 4.5134 (4.4177)	Top-1 acc 24.219 (23.166)	Top-5 acc 40.234 (45.059)	lr 0.02251
Warmup Train [22][880/3239]	Time 0.379 (0.252)	Data 0.002 (0.022)	Loss 4.5224 (4.4180)	Top-1 acc 19.922 (23.153)	Top-5 acc 43.750 (45.049)	lr 0.02251
Warmup Train [22][890/3239]	Time 0.196 (0.251)	Data 0.001 (0.022)	Loss 4.4103 (4.4181)	Top-1 acc 21.484 (23.149)	Top-5 acc 44.141 (45.036)	lr 0.02250
Warmup Train [22][900/3239]	Time 0.192 (0.251)	Data 0.001 (0.022)	Loss 4.3454 (4.4175)	Top-1 acc 27.734 (23.167)	Top-5 acc 46.484 (45.053)	lr 0.02250
Warmup Train [22][910/3239]	Time 0.158 (0.251)	Data 0.001 (0.022)	Loss 4.4954 (4.4171)	Top-1 acc 23.828 (23.182)	Top-5 acc 46.875 (45.068)	lr 0.02249
Warmup Train [22][920/3239]	Time 0.227 (0.251)	Data 0.002 (0.022)	Loss 4.2493 (4.4165)	Top-1 acc 28.125 (23.189)	Top-5 acc 45.312 (45.062)	lr 0.02248
Warmup Train [22][930/3239]	Time 0.208 (0.251)	Data 0.001 (0.021)	Loss 4.5854 (4.4166)	Top-1 acc 19.922 (23.185)	Top-5 acc 43.359 (45.061)	lr 0.02248
Warmup Train [22][940/3239]	Time 0.225 (0.251)	Data 0.001 (0.021)	Loss 4.4899 (4.4165)	Top-1 acc 19.141 (23.184)	Top-5 acc 45.312 (45.069)	lr 0.02247
Warmup Train [22][950/3239]	Time 0.197 (0.250)	Data 0.001 (0.021)	Loss 4.4802 (4.4166)	Top-1 acc 25.391 (23.182)	Top-5 acc 42.578 (45.065)	lr 0.02247
Warmup Train [22][960/3239]	Time 0.203 (0.250)	Data 0.001 (0.021)	Loss 4.2288 (4.4162)	Top-1 acc 23.047 (23.176)	Top-5 acc 47.266 (45.070)	lr 0.02246
Warmup Train [22][970/3239]	Time 0.309 (0.250)	Data 0.002 (0.021)	Loss 4.2676 (4.4157)	Top-1 acc 23.047 (23.194)	Top-5 acc 48.047 (45.084)	lr 0.02245
Warmup Train [22][980/3239]	Time 0.206 (0.249)	Data 0.003 (0.020)	Loss 4.3502 (4.4161)	Top-1 acc 22.656 (23.185)	Top-5 acc 45.703 (45.078)	lr 0.02245
Warmup Train [22][990/3239]	Time 0.200 (0.249)	Data 0.001 (0.020)	Loss 4.4516 (4.4160)	Top-1 acc 19.531 (23.179)	Top-5 acc 43.750 (45.090)	lr 0.02244
Warmup Train [22][1000/3239]	Time 0.236 (0.249)	Data 0.001 (0.020)	Loss 4.2983 (4.4159)	Top-1 acc 26.172 (23.186)	Top-5 acc 48.438 (45.089)	lr 0.02244
Warmup Train [22][1010/3239]	Time 0.196 (0.249)	Data 0.001 (0.020)	Loss 4.3627 (4.4154)	Top-1 acc 23.438 (23.198)	Top-5 acc 49.219 (45.099)	lr 0.02243
Warmup Train [22][1020/3239]	Time 0.146 (0.248)	Data 0.001 (0.020)	Loss 4.7670 (4.4148)	Top-1 acc 18.750 (23.206)	Top-5 acc 35.156 (45.108)	lr 0.02242
Warmup Train [22][1030/3239]	Time 0.191 (0.248)	Data 0.001 (0.020)	Loss 4.3718 (4.4148)	Top-1 acc 24.219 (23.209)	Top-5 acc 44.141 (45.107)	lr 0.02242
Warmup Train [22][1040/3239]	Time 0.233 (0.248)	Data 0.001 (0.019)	Loss 4.4550 (4.4141)	Top-1 acc 21.875 (23.218)	Top-5 acc 44.531 (45.131)	lr 0.02241
Warmup Train [22][1050/3239]	Time 0.188 (0.248)	Data 0.001 (0.019)	Loss 4.2839 (4.4141)	Top-1 acc 27.344 (23.210)	Top-5 acc 50.781 (45.134)	lr 0.02240
Warmup Train [22][1060/3239]	Time 0.229 (0.247)	Data 0.001 (0.019)	Loss 4.2382 (4.4136)	Top-1 acc 26.953 (23.219)	Top-5 acc 46.094 (45.149)	lr 0.02240
Warmup Train [22][1070/3239]	Time 0.267 (0.247)	Data 0.002 (0.019)	Loss 4.2751 (4.4139)	Top-1 acc 26.953 (23.215)	Top-5 acc 50.391 (45.143)	lr 0.02239
Warmup Train [22][1080/3239]	Time 0.281 (0.247)	Data 0.001 (0.019)	Loss 4.3512 (4.4139)	Top-1 acc 23.828 (23.212)	Top-5 acc 50.781 (45.145)	lr 0.02239
Warmup Train [22][1090/3239]	Time 0.239 (0.247)	Data 0.001 (0.019)	Loss 4.3780 (4.4142)	Top-1 acc 24.219 (23.207)	Top-5 acc 43.359 (45.140)	lr 0.02238
Warmup Train [22][1100/3239]	Time 0.226 (0.247)	Data 0.001 (0.018)	Loss 4.2609 (4.4138)	Top-1 acc 28.906 (23.211)	Top-5 acc 49.219 (45.155)	lr 0.02237
Warmup Train [22][1110/3239]	Time 0.183 (0.246)	Data 0.002 (0.018)	Loss 4.4183 (4.4139)	Top-1 acc 23.047 (23.216)	Top-5 acc 46.094 (45.151)	lr 0.02237
Warmup Train [22][1120/3239]	Time 0.147 (0.246)	Data 0.001 (0.018)	Loss 4.5583 (4.4144)	Top-1 acc 19.922 (23.211)	Top-5 acc 39.844 (45.153)	lr 0.02236
Warmup Train [22][1130/3239]	Time 0.207 (0.246)	Data 0.001 (0.018)	Loss 4.6015 (4.4142)	Top-1 acc 21.094 (23.214)	Top-5 acc 38.281 (45.155)	lr 0.02236
Warmup Train [22][1140/3239]	Time 0.281 (0.246)	Data 0.001 (0.018)	Loss 4.4209 (4.4144)	Top-1 acc 24.609 (23.221)	Top-5 acc 44.922 (45.152)	lr 0.02235
Warmup Train [22][1150/3239]	Time 0.159 (0.246)	Data 0.001 (0.018)	Loss 4.3759 (4.4142)	Top-1 acc 26.562 (23.225)	Top-5 acc 46.875 (45.151)	lr 0.02234
Warmup Train [22][1160/3239]	Time 0.268 (0.246)	Data 0.002 (0.018)	Loss 4.3341 (4.4146)	Top-1 acc 25.000 (23.216)	Top-5 acc 49.219 (45.144)	lr 0.02234
Warmup Train [22][1170/3239]	Time 0.250 (0.246)	Data 0.002 (0.018)	Loss 4.3718 (4.4144)	Top-1 acc 24.609 (23.222)	Top-5 acc 47.656 (45.149)	lr 0.02233
Warmup Train [22][1180/3239]	Time 0.320 (0.245)	Data 0.001 (0.017)	Loss 4.5631 (4.4146)	Top-1 acc 21.875 (23.223)	Top-5 acc 44.531 (45.148)	lr 0.02233
Warmup Train [22][1190/3239]	Time 0.240 (0.245)	Data 0.001 (0.017)	Loss 4.3756 (4.4141)	Top-1 acc 21.484 (23.217)	Top-5 acc 44.531 (45.160)	lr 0.02232
Warmup Train [22][1200/3239]	Time 0.226 (0.245)	Data 0.001 (0.017)	Loss 4.4570 (4.4143)	Top-1 acc 20.703 (23.218)	Top-5 acc 40.234 (45.152)	lr 0.02231
Warmup Train [22][1210/3239]	Time 0.220 (0.245)	Data 0.001 (0.017)	Loss 4.6300 (4.4141)	Top-1 acc 23.047 (23.219)	Top-5 acc 41.016 (45.162)	lr 0.02231
Warmup Train [22][1220/3239]	Time 0.230 (0.245)	Data 0.001 (0.017)	Loss 4.3691 (4.4135)	Top-1 acc 19.141 (23.221)	Top-5 acc 45.312 (45.172)	lr 0.02230
Warmup Train [22][1230/3239]	Time 0.222 (0.245)	Data 0.002 (0.017)	Loss 4.1622 (4.4129)	Top-1 acc 24.609 (23.223)	Top-5 acc 48.047 (45.185)	lr 0.02230
Warmup Train [22][1240/3239]	Time 0.221 (0.244)	Data 0.001 (0.017)	Loss 4.4901 (4.4131)	Top-1 acc 24.609 (23.216)	Top-5 acc 42.969 (45.181)	lr 0.02229
Warmup Train [22][1250/3239]	Time 0.201 (0.244)	Data 0.001 (0.017)	Loss 4.3277 (4.4130)	Top-1 acc 28.516 (23.224)	Top-5 acc 47.266 (45.186)	lr 0.02228
Warmup Train [22][1260/3239]	Time 0.168 (0.244)	Data 0.001 (0.016)	Loss 4.4277 (4.4131)	Top-1 acc 25.391 (23.236)	Top-5 acc 45.312 (45.189)	lr 0.02228
Warmup Train [22][1270/3239]	Time 0.208 (0.244)	Data 0.001 (0.016)	Loss 4.4126 (4.4131)	Top-1 acc 24.609 (23.235)	Top-5 acc 44.531 (45.190)	lr 0.02227
Warmup Train [22][1280/3239]	Time 0.293 (0.244)	Data 0.001 (0.016)	Loss 4.2753 (4.4131)	Top-1 acc 26.172 (23.243)	Top-5 acc 46.484 (45.186)	lr 0.02227
Warmup Train [22][1290/3239]	Time 0.176 (0.244)	Data 0.001 (0.016)	Loss 4.3707 (4.4130)	Top-1 acc 26.562 (23.237)	Top-5 acc 48.438 (45.193)	lr 0.02226
Warmup Train [22][1300/3239]	Time 0.282 (0.244)	Data 0.001 (0.016)	Loss 4.4331 (4.4131)	Top-1 acc 21.875 (23.241)	Top-5 acc 43.359 (45.188)	lr 0.02225
Warmup Train [22][1310/3239]	Time 0.183 (0.243)	Data 0.001 (0.016)	Loss 4.5260 (4.4132)	Top-1 acc 21.875 (23.233)	Top-5 acc 41.797 (45.194)	lr 0.02225
Warmup Train [22][1320/3239]	Time 0.149 (0.243)	Data 0.001 (0.016)	Loss 4.4351 (4.4133)	Top-1 acc 23.438 (23.228)	Top-5 acc 43.750 (45.189)	lr 0.02224
Warmup Train [22][1330/3239]	Time 0.175 (0.243)	Data 0.001 (0.016)	Loss 4.3573 (4.4134)	Top-1 acc 23.047 (23.233)	Top-5 acc 44.922 (45.182)	lr 0.02224
Warmup Train [22][1340/3239]	Time 0.183 (0.243)	Data 0.001 (0.016)	Loss 4.3143 (4.4134)	Top-1 acc 21.484 (23.230)	Top-5 acc 46.484 (45.175)	lr 0.02223
Warmup Train [22][1350/3239]	Time 0.130 (0.243)	Data 0.001 (0.016)	Loss 4.4517 (4.4133)	Top-1 acc 23.828 (23.235)	Top-5 acc 42.188 (45.175)	lr 0.02222
Warmup Train [22][1360/3239]	Time 0.212 (0.243)	Data 0.001 (0.015)	Loss 4.4207 (4.4126)	Top-1 acc 22.656 (23.249)	Top-5 acc 45.312 (45.196)	lr 0.02222
Warmup Train [22][1370/3239]	Time 0.247 (0.243)	Data 0.001 (0.015)	Loss 4.4294 (4.4120)	Top-1 acc 26.172 (23.259)	Top-5 acc 42.969 (45.213)	lr 0.02221
Warmup Train [22][1380/3239]	Time 0.131 (0.242)	Data 0.001 (0.015)	Loss 4.3463 (4.4119)	Top-1 acc 23.438 (23.261)	Top-5 acc 45.703 (45.209)	lr 0.02221
Warmup Train [22][1390/3239]	Time 0.241 (0.242)	Data 0.001 (0.015)	Loss 4.3946 (4.4117)	Top-1 acc 22.656 (23.265)	Top-5 acc 43.359 (45.210)	lr 0.02220
Warmup Train [22][1400/3239]	Time 0.343 (0.242)	Data 0.001 (0.015)	Loss 4.3935 (4.4110)	Top-1 acc 23.047 (23.283)	Top-5 acc 43.359 (45.223)	lr 0.02219
Warmup Train [22][1410/3239]	Time 0.296 (0.242)	Data 0.001 (0.015)	Loss 4.3392 (4.4109)	Top-1 acc 26.562 (23.285)	Top-5 acc 46.484 (45.223)	lr 0.02219
Warmup Train [22][1420/3239]	Time 0.152 (0.242)	Data 0.001 (0.015)	Loss 4.2921 (4.4107)	Top-1 acc 22.656 (23.284)	Top-5 acc 48.828 (45.221)	lr 0.02218
Warmup Train [22][1430/3239]	Time 0.201 (0.242)	Data 0.001 (0.015)	Loss 4.3606 (4.4108)	Top-1 acc 23.047 (23.283)	Top-5 acc 48.047 (45.219)	lr 0.02218
Warmup Train [22][1440/3239]	Time 0.199 (0.242)	Data 0.001 (0.015)	Loss 4.5959 (4.4104)	Top-1 acc 18.359 (23.289)	Top-5 acc 40.234 (45.225)	lr 0.02217
Warmup Train [22][1450/3239]	Time 0.196 (0.242)	Data 0.001 (0.015)	Loss 4.3965 (4.4104)	Top-1 acc 24.609 (23.283)	Top-5 acc 42.578 (45.216)	lr 0.02216
Warmup Train [22][1460/3239]	Time 0.207 (0.242)	Data 0.001 (0.015)	Loss 4.3412 (4.4103)	Top-1 acc 22.656 (23.281)	Top-5 acc 48.047 (45.211)	lr 0.02216
Warmup Train [22][1470/3239]	Time 0.146 (0.241)	Data 0.002 (0.015)	Loss 4.3716 (4.4098)	Top-1 acc 21.484 (23.291)	Top-5 acc 46.484 (45.229)	lr 0.02215
Warmup Train [22][1480/3239]	Time 0.203 (0.241)	Data 0.001 (0.014)	Loss 4.4604 (4.4099)	Top-1 acc 24.609 (23.291)	Top-5 acc 45.312 (45.223)	lr 0.02215
Warmup Train [22][1490/3239]	Time 0.310 (0.241)	Data 0.001 (0.014)	Loss 4.2644 (4.4098)	Top-1 acc 28.906 (23.285)	Top-5 acc 50.000 (45.225)	lr 0.02214
Warmup Train [22][1500/3239]	Time 0.163 (0.241)	Data 0.001 (0.014)	Loss 4.3996 (4.4097)	Top-1 acc 24.609 (23.292)	Top-5 acc 46.094 (45.232)	lr 0.02213
Warmup Train [22][1510/3239]	Time 0.209 (0.241)	Data 0.001 (0.014)	Loss 4.3808 (4.4096)	Top-1 acc 21.875 (23.291)	Top-5 acc 45.312 (45.233)	lr 0.02213
Warmup Train [22][1520/3239]	Time 0.185 (0.241)	Data 0.001 (0.014)	Loss 4.5124 (4.4096)	Top-1 acc 19.141 (23.293)	Top-5 acc 43.359 (45.233)	lr 0.02212
Warmup Train [22][1530/3239]	Time 0.165 (0.241)	Data 0.001 (0.014)	Loss 4.4292 (4.4097)	Top-1 acc 23.828 (23.292)	Top-5 acc 44.531 (45.230)	lr 0.02212
Warmup Train [22][1540/3239]	Time 0.233 (0.241)	Data 0.001 (0.014)	Loss 4.3814 (4.4095)	Top-1 acc 20.703 (23.286)	Top-5 acc 43.359 (45.228)	lr 0.02211
Warmup Train [22][1550/3239]	Time 0.186 (0.241)	Data 0.001 (0.014)	Loss 4.3400 (4.4093)	Top-1 acc 25.781 (23.285)	Top-5 acc 46.875 (45.232)	lr 0.02210
Warmup Train [22][1560/3239]	Time 0.207 (0.240)	Data 0.002 (0.014)	Loss 4.3721 (4.4095)	Top-1 acc 24.609 (23.275)	Top-5 acc 45.703 (45.225)	lr 0.02210
Warmup Train [22][1570/3239]	Time 0.214 (0.240)	Data 0.002 (0.014)	Loss 4.4933 (4.4090)	Top-1 acc 22.656 (23.280)	Top-5 acc 40.625 (45.234)	lr 0.02209
Warmup Train [22][1580/3239]	Time 0.177 (0.240)	Data 0.001 (0.014)	Loss 4.4777 (4.4092)	Top-1 acc 21.484 (23.277)	Top-5 acc 41.797 (45.231)	lr 0.02209
Warmup Train [22][1590/3239]	Time 0.169 (0.240)	Data 0.002 (0.014)	Loss 4.2309 (4.4091)	Top-1 acc 26.953 (23.275)	Top-5 acc 51.172 (45.234)	lr 0.02208
Warmup Train [22][1600/3239]	Time 0.331 (0.240)	Data 0.001 (0.014)	Loss 4.3231 (4.4092)	Top-1 acc 25.391 (23.266)	Top-5 acc 46.484 (45.231)	lr 0.02207
Warmup Train [22][1610/3239]	Time 0.198 (0.240)	Data 0.001 (0.014)	Loss 4.3998 (4.4092)	Top-1 acc 20.703 (23.267)	Top-5 acc 47.266 (45.228)	lr 0.02207
Warmup Train [22][1620/3239]	Time 0.180 (0.240)	Data 0.001 (0.013)	Loss 4.5003 (4.4092)	Top-1 acc 23.438 (23.262)	Top-5 acc 44.922 (45.229)	lr 0.02206
Warmup Train [22][1630/3239]	Time 0.217 (0.240)	Data 0.001 (0.013)	Loss 4.2876 (4.4091)	Top-1 acc 28.516 (23.267)	Top-5 acc 49.219 (45.233)	lr 0.02206
Warmup Train [22][1640/3239]	Time 0.145 (0.240)	Data 0.001 (0.013)	Loss 4.3733 (4.4089)	Top-1 acc 24.609 (23.273)	Top-5 acc 48.438 (45.238)	lr 0.02205
Warmup Train [22][1650/3239]	Time 0.204 (0.240)	Data 0.001 (0.013)	Loss 4.5139 (4.4088)	Top-1 acc 24.609 (23.277)	Top-5 acc 40.234 (45.237)	lr 0.02204
Warmup Train [22][1660/3239]	Time 0.245 (0.240)	Data 0.002 (0.013)	Loss 4.3521 (4.4084)	Top-1 acc 23.047 (23.284)	Top-5 acc 46.094 (45.244)	lr 0.02204
Warmup Train [22][1670/3239]	Time 0.215 (0.239)	Data 0.001 (0.013)	Loss 4.3701 (4.4083)	Top-1 acc 21.094 (23.279)	Top-5 acc 48.438 (45.251)	lr 0.02203
Warmup Train [22][1680/3239]	Time 0.235 (0.239)	Data 0.001 (0.013)	Loss 4.4288 (4.4083)	Top-1 acc 22.266 (23.278)	Top-5 acc 46.875 (45.251)	lr 0.02203
Warmup Train [22][1690/3239]	Time 0.206 (0.239)	Data 0.002 (0.013)	Loss 4.5819 (4.4086)	Top-1 acc 22.266 (23.270)	Top-5 acc 42.188 (45.240)	lr 0.02202
Warmup Train [22][1700/3239]	Time 0.334 (0.239)	Data 0.001 (0.013)	Loss 4.5095 (4.4088)	Top-1 acc 23.047 (23.271)	Top-5 acc 42.188 (45.235)	lr 0.02201
Warmup Train [22][1710/3239]	Time 0.327 (0.239)	Data 0.002 (0.013)	Loss 4.3447 (4.4088)	Top-1 acc 23.828 (23.271)	Top-5 acc 48.828 (45.236)	lr 0.02201
Warmup Train [22][1720/3239]	Time 0.200 (0.239)	Data 0.002 (0.013)	Loss 4.3830 (4.4090)	Top-1 acc 25.391 (23.271)	Top-5 acc 46.094 (45.230)	lr 0.02200
Warmup Train [22][1730/3239]	Time 0.219 (0.239)	Data 0.001 (0.013)	Loss 4.1001 (4.4086)	Top-1 acc 31.250 (23.275)	Top-5 acc 53.125 (45.238)	lr 0.02200
Warmup Train [22][1740/3239]	Time 0.185 (0.239)	Data 0.002 (0.013)	Loss 4.2390 (4.4080)	Top-1 acc 24.609 (23.284)	Top-5 acc 50.781 (45.253)	lr 0.02199
Warmup Train [22][1750/3239]	Time 0.177 (0.239)	Data 0.003 (0.013)	Loss 4.1430 (4.4073)	Top-1 acc 27.734 (23.293)	Top-5 acc 52.734 (45.272)	lr 0.02198
Warmup Train [22][1760/3239]	Time 0.194 (0.238)	Data 0.001 (0.013)	Loss 4.3640 (4.4067)	Top-1 acc 23.438 (23.304)	Top-5 acc 42.969 (45.286)	lr 0.02198
Warmup Train [22][1770/3239]	Time 0.214 (0.238)	Data 0.001 (0.013)	Loss 4.2006 (4.4066)	Top-1 acc 24.219 (23.307)	Top-5 acc 50.000 (45.291)	lr 0.02197
Warmup Train [22][1780/3239]	Time 0.258 (0.238)	Data 0.001 (0.012)	Loss 4.2160 (4.4065)	Top-1 acc 27.344 (23.311)	Top-5 acc 50.000 (45.300)	lr 0.02197
Warmup Train [22][1790/3239]	Time 0.208 (0.238)	Data 0.001 (0.012)	Loss 4.3618 (4.4066)	Top-1 acc 21.875 (23.311)	Top-5 acc 50.391 (45.297)	lr 0.02196
Warmup Train [22][1800/3239]	Time 0.333 (0.238)	Data 0.001 (0.012)	Loss 4.3294 (4.4065)	Top-1 acc 23.438 (23.311)	Top-5 acc 47.266 (45.300)	lr 0.02195
Warmup Train [22][1810/3239]	Time 0.200 (0.238)	Data 0.001 (0.012)	Loss 4.5135 (4.4066)	Top-1 acc 21.094 (23.313)	Top-5 acc 41.797 (45.293)	lr 0.02195
Warmup Train [22][1820/3239]	Time 0.200 (0.238)	Data 0.001 (0.012)	Loss 4.3035 (4.4065)	Top-1 acc 24.219 (23.314)	Top-5 acc 47.656 (45.299)	lr 0.02194
Warmup Train [22][1830/3239]	Time 0.182 (0.238)	Data 0.001 (0.012)	Loss 4.4120 (4.4061)	Top-1 acc 26.953 (23.322)	Top-5 acc 46.875 (45.310)	lr 0.02194
Warmup Train [22][1840/3239]	Time 0.170 (0.238)	Data 0.001 (0.012)	Loss 4.4019 (4.4059)	Top-1 acc 23.438 (23.322)	Top-5 acc 44.531 (45.313)	lr 0.02193
Warmup Train [22][1850/3239]	Time 0.190 (0.238)	Data 0.001 (0.012)	Loss 4.3728 (4.4060)	Top-1 acc 19.531 (23.324)	Top-5 acc 42.188 (45.311)	lr 0.02192
Warmup Train [22][1860/3239]	Time 0.282 (0.238)	Data 0.001 (0.012)	Loss 4.3358 (4.4057)	Top-1 acc 25.000 (23.326)	Top-5 acc 45.312 (45.317)	lr 0.02192
Warmup Train [22][1870/3239]	Time 0.135 (0.238)	Data 0.001 (0.012)	Loss 4.4600 (4.4054)	Top-1 acc 23.828 (23.329)	Top-5 acc 45.703 (45.327)	lr 0.02191
Warmup Train [22][1880/3239]	Time 0.195 (0.238)	Data 0.001 (0.012)	Loss 4.4721 (4.4053)	Top-1 acc 23.438 (23.329)	Top-5 acc 44.922 (45.331)	lr 0.02191
Warmup Train [22][1890/3239]	Time 0.281 (0.238)	Data 0.001 (0.012)	Loss 4.3262 (4.4052)	Top-1 acc 24.219 (23.331)	Top-5 acc 50.000 (45.341)	lr 0.02190
Warmup Train [22][1900/3239]	Time 0.378 (0.238)	Data 0.001 (0.012)	Loss 4.4569 (4.4052)	Top-1 acc 25.000 (23.340)	Top-5 acc 44.141 (45.339)	lr 0.02189
Warmup Train [22][1910/3239]	Time 0.178 (0.238)	Data 0.001 (0.012)	Loss 4.1848 (4.4047)	Top-1 acc 24.609 (23.345)	Top-5 acc 46.094 (45.351)	lr 0.02189
Warmup Train [22][1920/3239]	Time 0.217 (0.238)	Data 0.001 (0.012)	Loss 4.3200 (4.4044)	Top-1 acc 23.438 (23.351)	Top-5 acc 46.875 (45.352)	lr 0.02188
Warmup Train [22][1930/3239]	Time 0.176 (0.237)	Data 0.002 (0.012)	Loss 4.3191 (4.4041)	Top-1 acc 24.609 (23.353)	Top-5 acc 44.922 (45.357)	lr 0.02188
Warmup Train [22][1940/3239]	Time 0.220 (0.237)	Data 0.001 (0.012)	Loss 4.2445 (4.4042)	Top-1 acc 27.734 (23.351)	Top-5 acc 51.562 (45.355)	lr 0.02187
Warmup Train [22][1950/3239]	Time 0.189 (0.237)	Data 0.001 (0.012)	Loss 4.3056 (4.4042)	Top-1 acc 25.391 (23.354)	Top-5 acc 46.094 (45.357)	lr 0.02186
Warmup Train [22][1960/3239]	Time 0.243 (0.237)	Data 0.001 (0.012)	Loss 4.3364 (4.4041)	Top-1 acc 24.609 (23.354)	Top-5 acc 46.094 (45.361)	lr 0.02186
Warmup Train [22][1970/3239]	Time 0.224 (0.237)	Data 0.001 (0.012)	Loss 4.4235 (4.4040)	Top-1 acc 23.438 (23.350)	Top-5 acc 43.750 (45.361)	lr 0.02185
Warmup Train [22][1980/3239]	Time 0.230 (0.237)	Data 0.001 (0.012)	Loss 4.3371 (4.4040)	Top-1 acc 23.438 (23.351)	Top-5 acc 43.750 (45.364)	lr 0.02185
Warmup Train [22][1990/3239]	Time 0.235 (0.237)	Data 0.001 (0.011)	Loss 4.5077 (4.4041)	Top-1 acc 19.922 (23.345)	Top-5 acc 42.578 (45.359)	lr 0.02184
Warmup Train [22][2000/3239]	Time 0.393 (0.237)	Data 0.001 (0.011)	Loss 4.5559 (4.4041)	Top-1 acc 18.750 (23.348)	Top-5 acc 42.969 (45.365)	lr 0.02183
Warmup Train [22][2010/3239]	Time 0.221 (0.237)	Data 0.001 (0.011)	Loss 4.4307 (4.4040)	Top-1 acc 23.047 (23.352)	Top-5 acc 46.094 (45.367)	lr 0.02183
Warmup Train [22][2020/3239]	Time 0.224 (0.237)	Data 0.001 (0.011)	Loss 4.3260 (4.4039)	Top-1 acc 27.344 (23.352)	Top-5 acc 47.656 (45.370)	lr 0.02182
Warmup Train [22][2030/3239]	Time 0.197 (0.237)	Data 0.001 (0.011)	Loss 4.3764 (4.4035)	Top-1 acc 22.656 (23.359)	Top-5 acc 42.578 (45.376)	lr 0.02181
Warmup Train [22][2040/3239]	Time 0.207 (0.237)	Data 0.001 (0.011)	Loss 4.4601 (4.4035)	Top-1 acc 18.359 (23.354)	Top-5 acc 45.312 (45.376)	lr 0.02181
Warmup Train [22][2050/3239]	Time 0.177 (0.237)	Data 0.001 (0.011)	Loss 4.4494 (4.4032)	Top-1 acc 24.219 (23.359)	Top-5 acc 46.484 (45.383)	lr 0.02180
Warmup Train [22][2060/3239]	Time 0.251 (0.237)	Data 0.001 (0.011)	Loss 4.2978 (4.4030)	Top-1 acc 30.859 (23.363)	Top-5 acc 44.922 (45.387)	lr 0.02180
Warmup Train [22][2070/3239]	Time 0.201 (0.237)	Data 0.001 (0.011)	Loss 4.4275 (4.4031)	Top-1 acc 23.828 (23.364)	Top-5 acc 46.094 (45.389)	lr 0.02179
Warmup Train [22][2080/3239]	Time 0.201 (0.237)	Data 0.001 (0.011)	Loss 4.6473 (4.4031)	Top-1 acc 17.578 (23.365)	Top-5 acc 36.719 (45.389)	lr 0.02178
Warmup Train [22][2090/3239]	Time 0.241 (0.237)	Data 0.001 (0.011)	Loss 4.4530 (4.4029)	Top-1 acc 24.219 (23.371)	Top-5 acc 43.359 (45.391)	lr 0.02178
Warmup Train [22][2100/3239]	Time 0.357 (0.237)	Data 0.001 (0.011)	Loss 4.5060 (4.4029)	Top-1 acc 23.047 (23.376)	Top-5 acc 41.016 (45.388)	lr 0.02177
Warmup Train [22][2110/3239]	Time 0.398 (0.237)	Data 0.001 (0.011)	Loss 4.3535 (4.4027)	Top-1 acc 26.562 (23.378)	Top-5 acc 46.484 (45.394)	lr 0.02177
Warmup Train [22][2120/3239]	Time 0.232 (0.236)	Data 0.002 (0.011)	Loss 4.4640 (4.4025)	Top-1 acc 23.438 (23.382)	Top-5 acc 46.875 (45.400)	lr 0.02176
Warmup Train [22][2130/3239]	Time 0.268 (0.236)	Data 0.002 (0.011)	Loss 4.4502 (4.4023)	Top-1 acc 21.875 (23.382)	Top-5 acc 46.484 (45.403)	lr 0.02175
Warmup Train [22][2140/3239]	Time 0.216 (0.236)	Data 0.001 (0.011)	Loss 4.1889 (4.4023)	Top-1 acc 28.125 (23.383)	Top-5 acc 51.562 (45.410)	lr 0.02175
Warmup Train [22][2150/3239]	Time 0.222 (0.236)	Data 0.002 (0.011)	Loss 4.2800 (4.4023)	Top-1 acc 24.609 (23.379)	Top-5 acc 48.438 (45.409)	lr 0.02174
Warmup Train [22][2160/3239]	Time 0.219 (0.236)	Data 0.001 (0.011)	Loss 4.3883 (4.4022)	Top-1 acc 21.094 (23.377)	Top-5 acc 46.875 (45.407)	lr 0.02174
Warmup Train [22][2170/3239]	Time 0.190 (0.236)	Data 0.002 (0.011)	Loss 4.5908 (4.4023)	Top-1 acc 23.047 (23.379)	Top-5 acc 41.406 (45.403)	lr 0.02173
Warmup Train [22][2180/3239]	Time 0.187 (0.236)	Data 0.001 (0.011)	Loss 4.7419 (4.4022)	Top-1 acc 17.969 (23.381)	Top-5 acc 35.938 (45.404)	lr 0.02172
Warmup Train [22][2190/3239]	Time 0.289 (0.236)	Data 0.002 (0.011)	Loss 4.5307 (4.4024)	Top-1 acc 23.438 (23.377)	Top-5 acc 45.312 (45.400)	lr 0.02172
Warmup Train [22][2200/3239]	Time 0.281 (0.236)	Data 0.001 (0.011)	Loss 4.2909 (4.4019)	Top-1 acc 28.516 (23.387)	Top-5 acc 45.703 (45.410)	lr 0.02171
Warmup Train [22][2210/3239]	Time 0.190 (0.236)	Data 0.002 (0.011)	Loss 4.3773 (4.4020)	Top-1 acc 25.781 (23.390)	Top-5 acc 44.141 (45.414)	lr 0.02171
Warmup Train [22][2220/3239]	Time 0.166 (0.236)	Data 0.001 (0.011)	Loss 4.4336 (4.4021)	Top-1 acc 23.438 (23.394)	Top-5 acc 46.094 (45.413)	lr 0.02170
Warmup Train [22][2230/3239]	Time 0.208 (0.236)	Data 0.001 (0.011)	Loss 4.3699 (4.4019)	Top-1 acc 26.953 (23.395)	Top-5 acc 47.656 (45.413)	lr 0.02169
Warmup Train [22][2240/3239]	Time 0.182 (0.236)	Data 0.001 (0.010)	Loss 4.3421 (4.4020)	Top-1 acc 24.219 (23.400)	Top-5 acc 45.703 (45.416)	lr 0.02169
Warmup Train [22][2250/3239]	Time 0.201 (0.236)	Data 0.001 (0.010)	Loss 4.5893 (4.4015)	Top-1 acc 18.359 (23.410)	Top-5 acc 40.234 (45.425)	lr 0.02168
Warmup Train [22][2260/3239]	Time 0.217 (0.236)	Data 0.001 (0.010)	Loss 4.5133 (4.4015)	Top-1 acc 17.578 (23.405)	Top-5 acc 41.797 (45.426)	lr 0.02168
Warmup Train [22][2270/3239]	Time 0.201 (0.236)	Data 0.001 (0.010)	Loss 4.5015 (4.4015)	Top-1 acc 20.703 (23.403)	Top-5 acc 40.625 (45.426)	lr 0.02167
Warmup Train [22][2280/3239]	Time 0.244 (0.235)	Data 0.002 (0.010)	Loss 4.3055 (4.4014)	Top-1 acc 27.734 (23.402)	Top-5 acc 49.219 (45.426)	lr 0.02166
Warmup Train [22][2290/3239]	Time 0.320 (0.236)	Data 0.002 (0.010)	Loss 4.5807 (4.4013)	Top-1 acc 17.188 (23.405)	Top-5 acc 40.234 (45.428)	lr 0.02166
Warmup Train [22][2300/3239]	Time 0.392 (0.235)	Data 0.001 (0.010)	Loss 4.3642 (4.4011)	Top-1 acc 19.531 (23.402)	Top-5 acc 48.438 (45.434)	lr 0.02165
Warmup Train [22][2310/3239]	Time 0.207 (0.235)	Data 0.001 (0.010)	Loss 4.3866 (4.4010)	Top-1 acc 22.656 (23.403)	Top-5 acc 44.922 (45.436)	lr 0.02165
Warmup Train [22][2320/3239]	Time 0.226 (0.235)	Data 0.001 (0.010)	Loss 4.4063 (4.4009)	Top-1 acc 25.781 (23.405)	Top-5 acc 42.578 (45.440)	lr 0.02164
Warmup Train [22][2330/3239]	Time 0.201 (0.235)	Data 0.001 (0.010)	Loss 4.5997 (4.4011)	Top-1 acc 21.094 (23.402)	Top-5 acc 41.797 (45.434)	lr 0.02163
Warmup Train [22][2340/3239]	Time 0.130 (0.235)	Data 0.001 (0.010)	Loss 4.4897 (4.4012)	Top-1 acc 23.047 (23.402)	Top-5 acc 43.359 (45.430)	lr 0.02163
Warmup Train [22][2350/3239]	Time 0.273 (0.235)	Data 0.001 (0.010)	Loss 4.2259 (4.4012)	Top-1 acc 23.828 (23.402)	Top-5 acc 49.219 (45.433)	lr 0.02162
Warmup Train [22][2360/3239]	Time 0.229 (0.235)	Data 0.001 (0.010)	Loss 4.3291 (4.4013)	Top-1 acc 25.391 (23.403)	Top-5 acc 49.219 (45.431)	lr 0.02162
Warmup Train [22][2370/3239]	Time 0.133 (0.235)	Data 0.001 (0.010)	Loss 4.3187 (4.4011)	Top-1 acc 23.438 (23.406)	Top-5 acc 47.656 (45.434)	lr 0.02161
Warmup Train [22][2380/3239]	Time 0.259 (0.235)	Data 0.001 (0.010)	Loss 4.5033 (4.4012)	Top-1 acc 21.484 (23.403)	Top-5 acc 41.016 (45.430)	lr 0.02160
Warmup Train [22][2390/3239]	Time 0.277 (0.235)	Data 0.001 (0.010)	Loss 4.3545 (4.4012)	Top-1 acc 25.781 (23.404)	Top-5 acc 44.141 (45.425)	lr 0.02160
Warmup Train [22][2400/3239]	Time 0.292 (0.235)	Data 0.001 (0.010)	Loss 4.4400 (4.4013)	Top-1 acc 21.875 (23.404)	Top-5 acc 44.141 (45.421)	lr 0.02159
Warmup Train [22][2410/3239]	Time 0.173 (0.235)	Data 0.001 (0.010)	Loss 4.6060 (4.4016)	Top-1 acc 21.094 (23.404)	Top-5 acc 42.969 (45.417)	lr 0.02159
Warmup Train [22][2420/3239]	Time 0.235 (0.235)	Data 0.001 (0.010)	Loss 4.3782 (4.4013)	Top-1 acc 21.875 (23.411)	Top-5 acc 46.484 (45.421)	lr 0.02158
Warmup Train [22][2430/3239]	Time 0.241 (0.235)	Data 0.001 (0.010)	Loss 4.3776 (4.4013)	Top-1 acc 26.953 (23.413)	Top-5 acc 46.875 (45.420)	lr 0.02157
Warmup Train [22][2440/3239]	Time 0.171 (0.235)	Data 0.001 (0.010)	Loss 4.6818 (4.4012)	Top-1 acc 19.141 (23.415)	Top-5 acc 37.891 (45.423)	lr 0.02157
Warmup Train [22][2450/3239]	Time 0.263 (0.235)	Data 0.001 (0.010)	Loss 4.4056 (4.4013)	Top-1 acc 22.656 (23.415)	Top-5 acc 45.312 (45.418)	lr 0.02156
Warmup Train [22][2460/3239]	Time 0.217 (0.235)	Data 0.001 (0.010)	Loss 4.2495 (4.4012)	Top-1 acc 28.125 (23.415)	Top-5 acc 49.219 (45.421)	lr 0.02156
Warmup Train [22][2470/3239]	Time 0.261 (0.235)	Data 0.001 (0.010)	Loss 4.4253 (4.4013)	Top-1 acc 23.828 (23.418)	Top-5 acc 46.094 (45.420)	lr 0.02155
Warmup Train [22][2480/3239]	Time 0.282 (0.235)	Data 0.001 (0.010)	Loss 4.4264 (4.4015)	Top-1 acc 24.219 (23.416)	Top-5 acc 46.875 (45.416)	lr 0.02154
Warmup Train [22][2490/3239]	Time 0.215 (0.235)	Data 0.002 (0.010)	Loss 4.3181 (4.4015)	Top-1 acc 25.391 (23.415)	Top-5 acc 43.359 (45.411)	lr 0.02154
Warmup Train [22][2500/3239]	Time 0.298 (0.235)	Data 0.001 (0.010)	Loss 4.6392 (4.4014)	Top-1 acc 20.312 (23.420)	Top-5 acc 39.453 (45.412)	lr 0.02153
Warmup Train [22][2510/3239]	Time 0.205 (0.234)	Data 0.002 (0.010)	Loss 4.3760 (4.4011)	Top-1 acc 26.172 (23.424)	Top-5 acc 44.922 (45.418)	lr 0.02153
Warmup Train [22][2520/3239]	Time 0.178 (0.234)	Data 0.001 (0.010)	Loss 4.3288 (4.4009)	Top-1 acc 23.438 (23.432)	Top-5 acc 46.875 (45.423)	lr 0.02152
Warmup Train [22][2530/3239]	Time 0.263 (0.234)	Data 0.039 (0.010)	Loss 4.3121 (4.4007)	Top-1 acc 26.172 (23.437)	Top-5 acc 45.312 (45.428)	lr 0.02151
Warmup Train [22][2540/3239]	Time 0.205 (0.234)	Data 0.002 (0.010)	Loss 4.4711 (4.4005)	Top-1 acc 21.094 (23.438)	Top-5 acc 42.188 (45.428)	lr 0.02151
Warmup Train [22][2550/3239]	Time 0.267 (0.234)	Data 0.001 (0.010)	Loss 4.5386 (4.4007)	Top-1 acc 20.703 (23.438)	Top-5 acc 43.359 (45.424)	lr 0.02150
Warmup Train [22][2560/3239]	Time 0.190 (0.234)	Data 0.001 (0.010)	Loss 4.3643 (4.4008)	Top-1 acc 21.484 (23.435)	Top-5 acc 47.266 (45.421)	lr 0.02150
Warmup Train [22][2570/3239]	Time 0.175 (0.234)	Data 0.002 (0.010)	Loss 4.3784 (4.4009)	Top-1 acc 22.656 (23.433)	Top-5 acc 43.359 (45.421)	lr 0.02149
Warmup Train [22][2580/3239]	Time 0.235 (0.234)	Data 0.002 (0.010)	Loss 4.3679 (4.4007)	Top-1 acc 26.172 (23.438)	Top-5 acc 48.047 (45.427)	lr 0.02148
Warmup Train [22][2590/3239]	Time 0.280 (0.234)	Data 0.001 (0.010)	Loss 4.1972 (4.4007)	Top-1 acc 24.609 (23.438)	Top-5 acc 47.266 (45.426)	lr 0.02148
Warmup Train [22][2600/3239]	Time 0.328 (0.234)	Data 0.001 (0.009)	Loss 4.5703 (4.4010)	Top-1 acc 20.312 (23.433)	Top-5 acc 44.141 (45.419)	lr 0.02147
Warmup Train [22][2610/3239]	Time 0.235 (0.234)	Data 0.001 (0.009)	Loss 4.2187 (4.4010)	Top-1 acc 25.000 (23.435)	Top-5 acc 45.703 (45.419)	lr 0.02147
Warmup Train [22][2620/3239]	Time 0.176 (0.234)	Data 0.001 (0.009)	Loss 4.3136 (4.4010)	Top-1 acc 21.875 (23.432)	Top-5 acc 50.000 (45.420)	lr 0.02146
Warmup Train [22][2630/3239]	Time 0.169 (0.234)	Data 0.001 (0.009)	Loss 4.3435 (4.4009)	Top-1 acc 24.219 (23.433)	Top-5 acc 46.094 (45.422)	lr 0.02145
Warmup Train [22][2640/3239]	Time 0.177 (0.234)	Data 0.001 (0.009)	Loss 4.4465 (4.4009)	Top-1 acc 26.953 (23.438)	Top-5 acc 47.266 (45.424)	lr 0.02145
Warmup Train [22][2650/3239]	Time 0.283 (0.234)	Data 0.001 (0.009)	Loss 4.3573 (4.4008)	Top-1 acc 24.609 (23.440)	Top-5 acc 46.094 (45.428)	lr 0.02144
Warmup Train [22][2660/3239]	Time 0.314 (0.234)	Data 0.001 (0.009)	Loss 4.3555 (4.4008)	Top-1 acc 22.656 (23.441)	Top-5 acc 44.531 (45.428)	lr 0.02144
Warmup Train [22][2670/3239]	Time 0.217 (0.234)	Data 0.001 (0.009)	Loss 4.5052 (4.4009)	Top-1 acc 19.531 (23.443)	Top-5 acc 42.188 (45.430)	lr 0.02143
Warmup Train [22][2680/3239]	Time 0.238 (0.234)	Data 0.001 (0.009)	Loss 4.2199 (4.4009)	Top-1 acc 29.297 (23.450)	Top-5 acc 48.438 (45.429)	lr 0.02142
Warmup Train [22][2690/3239]	Time 0.223 (0.234)	Data 0.001 (0.009)	Loss 4.4366 (4.4008)	Top-1 acc 22.656 (23.452)	Top-5 acc 44.531 (45.431)	lr 0.02142
Warmup Train [22][2700/3239]	Time 0.197 (0.234)	Data 0.001 (0.009)	Loss 4.3299 (4.4004)	Top-1 acc 23.047 (23.459)	Top-5 acc 50.391 (45.444)	lr 0.02141
Warmup Train [22][2710/3239]	Time 0.409 (0.234)	Data 0.001 (0.009)	Loss 4.2880 (4.4000)	Top-1 acc 23.828 (23.465)	Top-5 acc 45.703 (45.452)	lr 0.02141
Warmup Train [22][2720/3239]	Time 0.343 (0.234)	Data 0.001 (0.009)	Loss 4.4043 (4.3999)	Top-1 acc 21.875 (23.469)	Top-5 acc 45.703 (45.457)	lr 0.02140
Warmup Train [22][2730/3239]	Time 0.232 (0.234)	Data 0.001 (0.009)	Loss 4.3114 (4.4000)	Top-1 acc 26.562 (23.467)	Top-5 acc 46.484 (45.456)	lr 0.02139
Warmup Train [22][2740/3239]	Time 0.150 (0.234)	Data 0.002 (0.009)	Loss 4.5065 (4.4000)	Top-1 acc 21.875 (23.468)	Top-5 acc 44.141 (45.455)	lr 0.02139
Warmup Train [22][2750/3239]	Time 0.242 (0.234)	Data 0.001 (0.009)	Loss 4.3658 (4.3999)	Top-1 acc 25.391 (23.469)	Top-5 acc 50.781 (45.461)	lr 0.02138
Warmup Train [22][2760/3239]	Time 0.190 (0.234)	Data 0.001 (0.009)	Loss 4.2720 (4.3998)	Top-1 acc 25.391 (23.468)	Top-5 acc 48.438 (45.464)	lr 0.02138
Warmup Train [22][2770/3239]	Time 0.213 (0.234)	Data 0.001 (0.009)	Loss 4.3136 (4.3997)	Top-1 acc 24.609 (23.470)	Top-5 acc 46.094 (45.467)	lr 0.02137
Warmup Train [22][2780/3239]	Time 0.203 (0.234)	Data 0.001 (0.009)	Loss 4.3426 (4.3996)	Top-1 acc 24.609 (23.473)	Top-5 acc 44.922 (45.471)	lr 0.02136
Warmup Train [22][2790/3239]	Time 0.186 (0.234)	Data 0.001 (0.009)	Loss 4.0638 (4.3992)	Top-1 acc 27.344 (23.481)	Top-5 acc 51.953 (45.481)	lr 0.02136
Warmup Train [22][2800/3239]	Time 0.188 (0.233)	Data 0.001 (0.009)	Loss 4.3885 (4.3990)	Top-1 acc 21.875 (23.485)	Top-5 acc 45.703 (45.484)	lr 0.02135
Warmup Train [22][2810/3239]	Time 0.324 (0.234)	Data 0.004 (0.009)	Loss 4.4259 (4.3989)	Top-1 acc 28.516 (23.490)	Top-5 acc 46.484 (45.490)	lr 0.02135
Warmup Train [22][2820/3239]	Time 0.203 (0.234)	Data 0.001 (0.009)	Loss 4.3798 (4.3989)	Top-1 acc 25.391 (23.490)	Top-5 acc 45.312 (45.487)	lr 0.02134
Warmup Train [22][2830/3239]	Time 0.163 (0.233)	Data 0.001 (0.009)	Loss 4.3233 (4.3987)	Top-1 acc 26.562 (23.495)	Top-5 acc 49.609 (45.496)	lr 0.02133
Warmup Train [22][2840/3239]	Time 0.136 (0.233)	Data 0.002 (0.009)	Loss 4.3664 (4.3987)	Top-1 acc 26.562 (23.496)	Top-5 acc 46.875 (45.498)	lr 0.02133
Warmup Train [22][2850/3239]	Time 0.205 (0.233)	Data 0.002 (0.009)	Loss 4.3515 (4.3985)	Top-1 acc 23.047 (23.497)	Top-5 acc 46.094 (45.503)	lr 0.02132
Warmup Train [22][2860/3239]	Time 0.282 (0.233)	Data 0.001 (0.009)	Loss 4.1043 (4.3983)	Top-1 acc 28.516 (23.498)	Top-5 acc 49.219 (45.501)	lr 0.02132
Warmup Train [22][2870/3239]	Time 0.207 (0.233)	Data 0.001 (0.009)	Loss 4.2856 (4.3982)	Top-1 acc 23.047 (23.501)	Top-5 acc 45.312 (45.507)	lr 0.02131
Warmup Train [22][2880/3239]	Time 0.205 (0.233)	Data 0.001 (0.009)	Loss 4.2157 (4.3980)	Top-1 acc 26.562 (23.507)	Top-5 acc 50.781 (45.512)	lr 0.02130
Warmup Train [22][2890/3239]	Time 0.223 (0.233)	Data 0.001 (0.009)	Loss 4.3081 (4.3978)	Top-1 acc 25.391 (23.513)	Top-5 acc 47.266 (45.515)	lr 0.02130
Warmup Train [22][2900/3239]	Time 0.192 (0.233)	Data 0.001 (0.009)	Loss 4.5262 (4.3978)	Top-1 acc 22.656 (23.516)	Top-5 acc 41.797 (45.514)	lr 0.02129
Warmup Train [22][2910/3239]	Time 0.218 (0.233)	Data 0.002 (0.009)	Loss 4.1866 (4.3977)	Top-1 acc 29.297 (23.517)	Top-5 acc 49.219 (45.515)	lr 0.02129
Warmup Train [22][2920/3239]	Time 0.283 (0.233)	Data 0.001 (0.009)	Loss 4.3877 (4.3975)	Top-1 acc 23.438 (23.522)	Top-5 acc 44.141 (45.521)	lr 0.02128
Warmup Train [22][2930/3239]	Time 0.224 (0.233)	Data 0.002 (0.009)	Loss 4.2159 (4.3974)	Top-1 acc 25.781 (23.524)	Top-5 acc 50.391 (45.523)	lr 0.02127
Warmup Train [22][2940/3239]	Time 0.235 (0.233)	Data 0.001 (0.009)	Loss 4.4031 (4.3974)	Top-1 acc 23.828 (23.520)	Top-5 acc 42.969 (45.520)	lr 0.02127
Warmup Train [22][2950/3239]	Time 0.215 (0.233)	Data 0.001 (0.009)	Loss 4.3434 (4.3974)	Top-1 acc 23.828 (23.520)	Top-5 acc 44.922 (45.520)	lr 0.02126
Warmup Train [22][2960/3239]	Time 0.224 (0.233)	Data 0.001 (0.009)	Loss 4.5583 (4.3974)	Top-1 acc 20.312 (23.522)	Top-5 acc 43.750 (45.521)	lr 0.02126
Warmup Train [22][2970/3239]	Time 0.165 (0.233)	Data 0.001 (0.009)	Loss 4.2757 (4.3973)	Top-1 acc 23.438 (23.524)	Top-5 acc 45.312 (45.523)	lr 0.02125
Warmup Train [22][2980/3239]	Time 0.235 (0.233)	Data 0.001 (0.009)	Loss 4.6358 (4.3975)	Top-1 acc 19.531 (23.519)	Top-5 acc 42.969 (45.516)	lr 0.02124
Warmup Train [22][2990/3239]	Time 0.197 (0.233)	Data 0.001 (0.009)	Loss 4.2759 (4.3974)	Top-1 acc 26.953 (23.520)	Top-5 acc 48.828 (45.516)	lr 0.02124
Warmup Train [22][3000/3239]	Time 0.183 (0.233)	Data 0.002 (0.009)	Loss 4.3034 (4.3974)	Top-1 acc 26.953 (23.521)	Top-5 acc 50.391 (45.519)	lr 0.02123
Warmup Train [22][3010/3239]	Time 0.200 (0.233)	Data 0.002 (0.009)	Loss 4.3406 (4.3975)	Top-1 acc 22.266 (23.520)	Top-5 acc 46.875 (45.515)	lr 0.02123
Warmup Train [22][3020/3239]	Time 0.363 (0.233)	Data 0.003 (0.009)	Loss 4.3189 (4.3976)	Top-1 acc 25.781 (23.518)	Top-5 acc 46.875 (45.513)	lr 0.02122
Warmup Train [22][3030/3239]	Time 0.138 (0.233)	Data 0.002 (0.009)	Loss 4.4747 (4.3975)	Top-1 acc 24.609 (23.520)	Top-5 acc 46.484 (45.518)	lr 0.02121
Warmup Train [22][3040/3239]	Time 0.201 (0.233)	Data 0.001 (0.009)	Loss 4.3218 (4.3973)	Top-1 acc 21.484 (23.525)	Top-5 acc 45.703 (45.517)	lr 0.02121
Warmup Train [22][3050/3239]	Time 0.194 (0.233)	Data 0.001 (0.008)	Loss 4.4913 (4.3974)	Top-1 acc 25.391 (23.526)	Top-5 acc 47.656 (45.519)	lr 0.02120
Warmup Train [22][3060/3239]	Time 0.224 (0.233)	Data 0.001 (0.008)	Loss 4.3377 (4.3973)	Top-1 acc 25.781 (23.529)	Top-5 acc 48.047 (45.524)	lr 0.02120
Warmup Train [22][3070/3239]	Time 0.210 (0.233)	Data 0.001 (0.008)	Loss 4.4503 (4.3971)	Top-1 acc 23.438 (23.532)	Top-5 acc 46.094 (45.525)	lr 0.02119
Warmup Train [22][3080/3239]	Time 0.187 (0.233)	Data 0.002 (0.008)	Loss 4.4710 (4.3970)	Top-1 acc 21.875 (23.536)	Top-5 acc 44.141 (45.527)	lr 0.02118
Warmup Train [22][3090/3239]	Time 0.223 (0.233)	Data 0.002 (0.008)	Loss 4.3451 (4.3969)	Top-1 acc 26.953 (23.537)	Top-5 acc 46.484 (45.528)	lr 0.02118
Warmup Train [22][3100/3239]	Time 0.210 (0.233)	Data 0.001 (0.008)	Loss 4.3493 (4.3968)	Top-1 acc 21.875 (23.537)	Top-5 acc 47.656 (45.528)	lr 0.02117
Warmup Train [22][3110/3239]	Time 0.391 (0.233)	Data 0.001 (0.008)	Loss 4.4721 (4.3966)	Top-1 acc 23.438 (23.544)	Top-5 acc 43.359 (45.536)	lr 0.02117
Warmup Train [22][3120/3239]	Time 0.264 (0.233)	Data 0.001 (0.008)	Loss 4.2934 (4.3964)	Top-1 acc 23.438 (23.549)	Top-5 acc 43.359 (45.540)	lr 0.02116
Warmup Train [22][3130/3239]	Time 0.254 (0.233)	Data 0.001 (0.008)	Loss 4.4018 (4.3963)	Top-1 acc 24.609 (23.552)	Top-5 acc 46.875 (45.544)	lr 0.02115
Warmup Train [22][3140/3239]	Time 0.191 (0.233)	Data 0.001 (0.008)	Loss 4.4726 (4.3962)	Top-1 acc 25.391 (23.552)	Top-5 acc 47.266 (45.546)	lr 0.02115
Warmup Train [22][3150/3239]	Time 0.184 (0.233)	Data 0.001 (0.008)	Loss 4.4256 (4.3961)	Top-1 acc 23.828 (23.555)	Top-5 acc 43.359 (45.548)	lr 0.02114
Warmup Train [22][3160/3239]	Time 0.250 (0.232)	Data 0.001 (0.008)	Loss 4.3625 (4.3960)	Top-1 acc 25.391 (23.553)	Top-5 acc 46.875 (45.551)	lr 0.02114
Warmup Train [22][3170/3239]	Time 0.163 (0.232)	Data 0.001 (0.008)	Loss 4.4567 (4.3962)	Top-1 acc 21.875 (23.553)	Top-5 acc 43.359 (45.546)	lr 0.02113
Warmup Train [22][3180/3239]	Time 0.235 (0.232)	Data 0.000 (0.008)	Loss 4.4421 (4.3962)	Top-1 acc 23.828 (23.554)	Top-5 acc 41.406 (45.544)	lr 0.02113
Warmup Train [22][3190/3239]	Time 0.198 (0.232)	Data 0.000 (0.008)	Loss 4.2477 (4.3963)	Top-1 acc 26.562 (23.548)	Top-5 acc 47.656 (45.541)	lr 0.02112
Warmup Train [22][3200/3239]	Time 0.234 (0.232)	Data 0.000 (0.008)	Loss 4.3673 (4.3960)	Top-1 acc 23.438 (23.555)	Top-5 acc 46.094 (45.550)	lr 0.02111
Warmup Train [22][3210/3239]	Time 0.272 (0.232)	Data 0.000 (0.008)	Loss 4.2966 (4.3959)	Top-1 acc 25.391 (23.558)	Top-5 acc 50.391 (45.550)	lr 0.02111
Warmup Train [22][3220/3239]	Time 0.173 (0.232)	Data 0.000 (0.008)	Loss 4.4106 (4.3960)	Top-1 acc 25.000 (23.555)	Top-5 acc 46.094 (45.547)	lr 0.02110
Warmup Train [22][3230/3239]	Time 0.183 (0.232)	Data 0.000 (0.008)	Loss 4.4257 (4.3959)	Top-1 acc 20.703 (23.557)	Top-5 acc 45.312 (45.548)	lr 0.02110
Warmup Train [22][3239/3239]	Time 0.161 (0.232)	Data 0.000 (0.008)	Loss 4.4661 (4.3959)	Top-1 acc 25.926 (23.556)	Top-5 acc 38.272 (45.544)	lr 0.02109
==========Warmup Valid [22/40]	loss 3.403	top-1 acc 30.585	top-5 acc 55.119	Train top-1 23.556	top-5 45.544	flops: 442.4M
Warmup Train [23][0/3239]	Time 16.966 (16.966)	Data 16.099 (16.099)	Loss 4.4953 (4.4953)	Top-1 acc 20.312 (20.312)	Top-5 acc 46.875 (46.875)	lr 0.02109
Warmup Train [23][10/3239]	Time 0.463 (2.010)	Data 0.002 (1.588)	Loss 4.2981 (4.3935)	Top-1 acc 25.000 (23.793)	Top-5 acc 43.750 (44.993)	lr 0.02108
Warmup Train [23][20/3239]	Time 0.166 (1.182)	Data 0.001 (0.833)	Loss 4.3967 (4.3881)	Top-1 acc 23.047 (24.014)	Top-5 acc 48.828 (45.443)	lr 0.02108
Warmup Train [23][30/3239]	Time 0.172 (0.871)	Data 0.001 (0.566)	Loss 4.3557 (4.3854)	Top-1 acc 26.172 (24.030)	Top-5 acc 46.484 (45.464)	lr 0.02107
Warmup Train [23][40/3239]	Time 0.235 (0.718)	Data 0.003 (0.429)	Loss 4.3262 (4.3682)	Top-1 acc 23.438 (24.143)	Top-5 acc 46.094 (45.941)	lr 0.02107
Warmup Train [23][50/3239]	Time 0.283 (0.625)	Data 0.002 (0.346)	Loss 4.3404 (4.3604)	Top-1 acc 23.828 (24.318)	Top-5 acc 48.047 (46.040)	lr 0.02106
Warmup Train [23][60/3239]	Time 0.246 (0.558)	Data 0.001 (0.289)	Loss 4.4272 (4.3524)	Top-1 acc 23.828 (24.520)	Top-5 acc 44.922 (46.196)	lr 0.02105
Warmup Train [23][70/3239]	Time 0.217 (0.511)	Data 0.001 (0.249)	Loss 4.5011 (4.3556)	Top-1 acc 22.266 (24.444)	Top-5 acc 42.188 (46.099)	lr 0.02105
Warmup Train [23][80/3239]	Time 0.208 (0.474)	Data 0.001 (0.218)	Loss 4.2753 (4.3512)	Top-1 acc 26.562 (24.402)	Top-5 acc 48.047 (46.301)	lr 0.02104
Warmup Train [23][90/3239]	Time 0.210 (0.448)	Data 0.001 (0.194)	Loss 4.3036 (4.3543)	Top-1 acc 27.734 (24.360)	Top-5 acc 46.875 (46.317)	lr 0.02104
Warmup Train [23][100/3239]	Time 0.354 (0.426)	Data 0.003 (0.175)	Loss 4.3464 (4.3492)	Top-1 acc 28.516 (24.482)	Top-5 acc 46.094 (46.345)	lr 0.02103
Warmup Train [23][110/3239]	Time 0.251 (0.408)	Data 0.001 (0.160)	Loss 4.3585 (4.3482)	Top-1 acc 26.172 (24.546)	Top-5 acc 44.922 (46.361)	lr 0.02102
Warmup Train [23][120/3239]	Time 0.202 (0.391)	Data 0.001 (0.147)	Loss 4.4423 (4.3526)	Top-1 acc 25.391 (24.454)	Top-5 acc 42.188 (46.281)	lr 0.02102
Warmup Train [23][130/3239]	Time 0.169 (0.376)	Data 0.001 (0.136)	Loss 4.2744 (4.3565)	Top-1 acc 25.000 (24.338)	Top-5 acc 48.438 (46.237)	lr 0.02101
Warmup Train [23][140/3239]	Time 0.158 (0.365)	Data 0.002 (0.127)	Loss 4.4423 (4.3566)	Top-1 acc 21.484 (24.352)	Top-5 acc 46.484 (46.290)	lr 0.02101
Warmup Train [23][150/3239]	Time 0.251 (0.358)	Data 0.002 (0.119)	Loss 4.3195 (4.3553)	Top-1 acc 23.047 (24.338)	Top-5 acc 47.266 (46.280)	lr 0.02100
Warmup Train [23][160/3239]	Time 0.201 (0.349)	Data 0.001 (0.111)	Loss 4.3551 (4.3591)	Top-1 acc 23.438 (24.270)	Top-5 acc 48.047 (46.220)	lr 0.02099
Warmup Train [23][170/3239]	Time 0.247 (0.341)	Data 0.001 (0.105)	Loss 4.3877 (4.3601)	Top-1 acc 24.609 (24.283)	Top-5 acc 45.312 (46.222)	lr 0.02099
Warmup Train [23][180/3239]	Time 0.204 (0.335)	Data 0.002 (0.099)	Loss 4.5273 (4.3602)	Top-1 acc 18.359 (24.294)	Top-5 acc 41.797 (46.225)	lr 0.02098
Warmup Train [23][190/3239]	Time 0.162 (0.329)	Data 0.001 (0.094)	Loss 4.4313 (4.3599)	Top-1 acc 23.438 (24.264)	Top-5 acc 43.359 (46.237)	lr 0.02098
Warmup Train [23][200/3239]	Time 0.209 (0.324)	Data 0.002 (0.090)	Loss 4.2846 (4.3609)	Top-1 acc 24.219 (24.223)	Top-5 acc 49.219 (46.191)	lr 0.02097
Warmup Train [23][210/3239]	Time 0.342 (0.320)	Data 0.001 (0.086)	Loss 4.2818 (4.3599)	Top-1 acc 26.172 (24.174)	Top-5 acc 50.781 (46.223)	lr 0.02096
Warmup Train [23][220/3239]	Time 0.295 (0.315)	Data 0.001 (0.082)	Loss 4.2979 (4.3606)	Top-1 acc 25.391 (24.153)	Top-5 acc 46.094 (46.233)	lr 0.02096
Warmup Train [23][230/3239]	Time 0.229 (0.311)	Data 0.001 (0.078)	Loss 4.3118 (4.3591)	Top-1 acc 26.953 (24.195)	Top-5 acc 48.438 (46.263)	lr 0.02095
Warmup Train [23][240/3239]	Time 0.182 (0.307)	Data 0.002 (0.075)	Loss 4.4780 (4.3607)	Top-1 acc 21.094 (24.151)	Top-5 acc 41.406 (46.217)	lr 0.02095
Warmup Train [23][250/3239]	Time 0.159 (0.303)	Data 0.001 (0.072)	Loss 4.5653 (4.3631)	Top-1 acc 21.484 (24.114)	Top-5 acc 41.406 (46.186)	lr 0.02094
Warmup Train [23][260/3239]	Time 0.248 (0.301)	Data 0.001 (0.070)	Loss 4.2198 (4.3615)	Top-1 acc 26.562 (24.107)	Top-5 acc 47.266 (46.175)	lr 0.02093
Warmup Train [23][270/3239]	Time 0.234 (0.298)	Data 0.002 (0.067)	Loss 4.4488 (4.3620)	Top-1 acc 20.312 (24.098)	Top-5 acc 45.312 (46.196)	lr 0.02093
Warmup Train [23][280/3239]	Time 0.208 (0.295)	Data 0.001 (0.065)	Loss 4.4019 (4.3604)	Top-1 acc 22.656 (24.114)	Top-5 acc 44.922 (46.256)	lr 0.02092
Warmup Train [23][290/3239]	Time 0.209 (0.292)	Data 0.001 (0.063)	Loss 4.3399 (4.3602)	Top-1 acc 24.219 (24.137)	Top-5 acc 46.875 (46.268)	lr 0.02092
Warmup Train [23][300/3239]	Time 0.217 (0.290)	Data 0.003 (0.061)	Loss 4.3452 (4.3613)	Top-1 acc 22.656 (24.140)	Top-5 acc 50.000 (46.257)	lr 0.02091
Warmup Train [23][310/3239]	Time 0.294 (0.288)	Data 0.001 (0.059)	Loss 4.2404 (4.3611)	Top-1 acc 28.125 (24.118)	Top-5 acc 48.047 (46.249)	lr 0.02090
Warmup Train [23][320/3239]	Time 0.240 (0.286)	Data 0.001 (0.057)	Loss 4.3224 (4.3598)	Top-1 acc 28.906 (24.143)	Top-5 acc 46.875 (46.259)	lr 0.02090
Warmup Train [23][330/3239]	Time 0.171 (0.284)	Data 0.001 (0.056)	Loss 4.3623 (4.3608)	Top-1 acc 25.000 (24.146)	Top-5 acc 42.969 (46.224)	lr 0.02089
Warmup Train [23][340/3239]	Time 0.190 (0.282)	Data 0.001 (0.054)	Loss 4.4169 (4.3614)	Top-1 acc 22.266 (24.108)	Top-5 acc 46.484 (46.206)	lr 0.02089
Warmup Train [23][350/3239]	Time 0.150 (0.280)	Data 0.001 (0.053)	Loss 4.3911 (4.3630)	Top-1 acc 25.781 (24.093)	Top-5 acc 46.094 (46.163)	lr 0.02088
Warmup Train [23][360/3239]	Time 0.253 (0.278)	Data 0.001 (0.051)	Loss 4.4124 (4.3623)	Top-1 acc 26.172 (24.108)	Top-5 acc 46.484 (46.204)	lr 0.02087
Warmup Train [23][370/3239]	Time 0.207 (0.277)	Data 0.001 (0.050)	Loss 4.4233 (4.3618)	Top-1 acc 24.609 (24.111)	Top-5 acc 44.922 (46.210)	lr 0.02087
Warmup Train [23][380/3239]	Time 0.253 (0.275)	Data 0.002 (0.049)	Loss 4.2540 (4.3625)	Top-1 acc 27.344 (24.095)	Top-5 acc 50.781 (46.206)	lr 0.02086
Warmup Train [23][390/3239]	Time 0.231 (0.274)	Data 0.001 (0.047)	Loss 4.3200 (4.3628)	Top-1 acc 28.906 (24.103)	Top-5 acc 49.219 (46.199)	lr 0.02086
Warmup Train [23][400/3239]	Time 0.237 (0.273)	Data 0.001 (0.046)	Loss 4.3077 (4.3616)	Top-1 acc 23.438 (24.139)	Top-5 acc 48.828 (46.225)	lr 0.02085
Warmup Train [23][410/3239]	Time 0.304 (0.272)	Data 0.002 (0.045)	Loss 4.2854 (4.3604)	Top-1 acc 25.781 (24.149)	Top-5 acc 46.875 (46.252)	lr 0.02084
Warmup Train [23][420/3239]	Time 0.220 (0.271)	Data 0.002 (0.044)	Loss 4.2565 (4.3595)	Top-1 acc 26.562 (24.135)	Top-5 acc 48.828 (46.263)	lr 0.02084
Warmup Train [23][430/3239]	Time 0.198 (0.270)	Data 0.001 (0.043)	Loss 4.3903 (4.3595)	Top-1 acc 22.266 (24.128)	Top-5 acc 43.359 (46.255)	lr 0.02083
Warmup Train [23][440/3239]	Time 0.209 (0.269)	Data 0.001 (0.042)	Loss 4.3841 (4.3587)	Top-1 acc 23.828 (24.158)	Top-5 acc 50.781 (46.291)	lr 0.02083
Warmup Train [23][450/3239]	Time 0.216 (0.268)	Data 0.001 (0.042)	Loss 4.1417 (4.3577)	Top-1 acc 26.953 (24.189)	Top-5 acc 53.906 (46.325)	lr 0.02082
Warmup Train [23][460/3239]	Time 0.238 (0.267)	Data 0.002 (0.041)	Loss 4.5300 (4.3575)	Top-1 acc 19.922 (24.186)	Top-5 acc 42.188 (46.337)	lr 0.02081
Warmup Train [23][470/3239]	Time 0.229 (0.267)	Data 0.002 (0.040)	Loss 4.4711 (4.3578)	Top-1 acc 20.703 (24.163)	Top-5 acc 44.141 (46.319)	lr 0.02081
Warmup Train [23][480/3239]	Time 0.198 (0.266)	Data 0.001 (0.040)	Loss 4.4608 (4.3587)	Top-1 acc 18.750 (24.165)	Top-5 acc 45.312 (46.311)	lr 0.02080
Warmup Train [23][490/3239]	Time 0.229 (0.265)	Data 0.001 (0.039)	Loss 4.4546 (4.3583)	Top-1 acc 21.484 (24.166)	Top-5 acc 47.266 (46.328)	lr 0.02080
Warmup Train [23][500/3239]	Time 0.219 (0.264)	Data 0.002 (0.038)	Loss 4.4687 (4.3590)	Top-1 acc 22.656 (24.155)	Top-5 acc 41.406 (46.318)	lr 0.02079
Warmup Train [23][510/3239]	Time 0.351 (0.264)	Data 0.001 (0.037)	Loss 4.2845 (4.3586)	Top-1 acc 22.656 (24.158)	Top-5 acc 47.656 (46.327)	lr 0.02078
Warmup Train [23][520/3239]	Time 0.251 (0.263)	Data 0.001 (0.037)	Loss 4.2910 (4.3589)	Top-1 acc 25.781 (24.161)	Top-5 acc 50.391 (46.345)	lr 0.02078
Warmup Train [23][530/3239]	Time 0.231 (0.263)	Data 0.001 (0.036)	Loss 4.3059 (4.3582)	Top-1 acc 25.781 (24.171)	Top-5 acc 50.391 (46.362)	lr 0.02077
Warmup Train [23][540/3239]	Time 0.224 (0.262)	Data 0.001 (0.036)	Loss 4.6495 (4.3583)	Top-1 acc 22.656 (24.188)	Top-5 acc 40.234 (46.364)	lr 0.02077
Warmup Train [23][550/3239]	Time 0.195 (0.261)	Data 0.001 (0.035)	Loss 4.1763 (4.3582)	Top-1 acc 28.906 (24.207)	Top-5 acc 48.438 (46.374)	lr 0.02076
Warmup Train [23][560/3239]	Time 0.239 (0.261)	Data 0.001 (0.034)	Loss 4.2689 (4.3582)	Top-1 acc 25.391 (24.200)	Top-5 acc 48.828 (46.379)	lr 0.02075
Warmup Train [23][570/3239]	Time 0.212 (0.260)	Data 0.001 (0.034)	Loss 4.5633 (4.3580)	Top-1 acc 21.484 (24.202)	Top-5 acc 40.625 (46.378)	lr 0.02075
Warmup Train [23][580/3239]	Time 0.156 (0.259)	Data 0.002 (0.033)	Loss 4.3058 (4.3581)	Top-1 acc 23.828 (24.207)	Top-5 acc 44.531 (46.371)	lr 0.02074
Warmup Train [23][590/3239]	Time 0.220 (0.259)	Data 0.001 (0.033)	Loss 4.2786 (4.3583)	Top-1 acc 23.828 (24.229)	Top-5 acc 48.047 (46.364)	lr 0.02074
Warmup Train [23][600/3239]	Time 0.240 (0.258)	Data 0.001 (0.032)	Loss 4.4133 (4.3570)	Top-1 acc 27.344 (24.256)	Top-5 acc 48.047 (46.395)	lr 0.02073
Warmup Train [23][610/3239]	Time 0.324 (0.257)	Data 0.001 (0.032)	Loss 4.1287 (4.3570)	Top-1 acc 29.688 (24.253)	Top-5 acc 52.734 (46.393)	lr 0.02072
Warmup Train [23][620/3239]	Time 0.372 (0.257)	Data 0.001 (0.031)	Loss 4.5546 (4.3578)	Top-1 acc 24.219 (24.233)	Top-5 acc 39.844 (46.374)	lr 0.02072
Warmup Train [23][630/3239]	Time 0.182 (0.256)	Data 0.001 (0.031)	Loss 4.3669 (4.3585)	Top-1 acc 23.438 (24.232)	Top-5 acc 47.266 (46.364)	lr 0.02071
Warmup Train [23][640/3239]	Time 0.251 (0.256)	Data 0.001 (0.030)	Loss 4.3023 (4.3588)	Top-1 acc 23.828 (24.220)	Top-5 acc 46.875 (46.358)	lr 0.02071
Warmup Train [23][650/3239]	Time 0.239 (0.255)	Data 0.003 (0.030)	Loss 4.4571 (4.3588)	Top-1 acc 21.484 (24.222)	Top-5 acc 42.188 (46.347)	lr 0.02070
Warmup Train [23][660/3239]	Time 0.203 (0.255)	Data 0.001 (0.030)	Loss 4.1932 (4.3585)	Top-1 acc 23.047 (24.195)	Top-5 acc 48.438 (46.341)	lr 0.02069
Warmup Train [23][670/3239]	Time 0.207 (0.254)	Data 0.001 (0.029)	Loss 4.1965 (4.3583)	Top-1 acc 29.297 (24.204)	Top-5 acc 51.172 (46.342)	lr 0.02069
Warmup Train [23][680/3239]	Time 0.229 (0.254)	Data 0.001 (0.029)	Loss 4.3689 (4.3588)	Top-1 acc 23.047 (24.190)	Top-5 acc 47.266 (46.344)	lr 0.02068
Warmup Train [23][690/3239]	Time 0.225 (0.253)	Data 0.001 (0.028)	Loss 4.3120 (4.3589)	Top-1 acc 23.438 (24.177)	Top-5 acc 49.219 (46.333)	lr 0.02068
Warmup Train [23][700/3239]	Time 0.196 (0.253)	Data 0.001 (0.028)	Loss 4.5027 (4.3590)	Top-1 acc 22.266 (24.180)	Top-5 acc 42.969 (46.328)	lr 0.02067
Warmup Train [23][710/3239]	Time 0.350 (0.252)	Data 0.001 (0.028)	Loss 4.2582 (4.3588)	Top-1 acc 26.562 (24.174)	Top-5 acc 49.219 (46.335)	lr 0.02066
Warmup Train [23][720/3239]	Time 0.388 (0.252)	Data 0.002 (0.027)	Loss 4.3262 (4.3584)	Top-1 acc 26.953 (24.195)	Top-5 acc 44.531 (46.347)	lr 0.02066
Warmup Train [23][730/3239]	Time 0.288 (0.251)	Data 0.001 (0.027)	Loss 4.3892 (4.3584)	Top-1 acc 26.953 (24.210)	Top-5 acc 42.188 (46.327)	lr 0.02065
Warmup Train [23][740/3239]	Time 0.165 (0.251)	Data 0.001 (0.027)	Loss 4.2893 (4.3573)	Top-1 acc 22.656 (24.228)	Top-5 acc 47.656 (46.347)	lr 0.02065
Warmup Train [23][750/3239]	Time 0.204 (0.250)	Data 0.001 (0.026)	Loss 4.5869 (4.3581)	Top-1 acc 22.266 (24.217)	Top-5 acc 40.234 (46.338)	lr 0.02064
Warmup Train [23][760/3239]	Time 0.199 (0.250)	Data 0.001 (0.026)	Loss 4.2204 (4.3578)	Top-1 acc 30.078 (24.227)	Top-5 acc 51.562 (46.352)	lr 0.02063
Warmup Train [23][770/3239]	Time 0.202 (0.250)	Data 0.001 (0.026)	Loss 4.3889 (4.3577)	Top-1 acc 21.094 (24.225)	Top-5 acc 46.094 (46.337)	lr 0.02063
Warmup Train [23][780/3239]	Time 0.198 (0.249)	Data 0.001 (0.025)	Loss 4.2500 (4.3579)	Top-1 acc 27.734 (24.229)	Top-5 acc 43.750 (46.328)	lr 0.02062
Warmup Train [23][790/3239]	Time 0.228 (0.249)	Data 0.002 (0.025)	Loss 4.1914 (4.3582)	Top-1 acc 27.344 (24.231)	Top-5 acc 48.047 (46.320)	lr 0.02062
Warmup Train [23][800/3239]	Time 0.241 (0.248)	Data 0.001 (0.025)	Loss 4.2597 (4.3575)	Top-1 acc 27.344 (24.252)	Top-5 acc 45.703 (46.333)	lr 0.02061
Warmup Train [23][810/3239]	Time 0.207 (0.248)	Data 0.001 (0.024)	Loss 4.1627 (4.3571)	Top-1 acc 28.906 (24.252)	Top-5 acc 52.344 (46.341)	lr 0.02061
Warmup Train [23][820/3239]	Time 0.171 (0.248)	Data 0.001 (0.024)	Loss 4.2765 (4.3580)	Top-1 acc 23.438 (24.238)	Top-5 acc 49.219 (46.317)	lr 0.02060
Warmup Train [23][830/3239]	Time 0.252 (0.248)	Data 0.001 (0.024)	Loss 4.3541 (4.3576)	Top-1 acc 21.484 (24.238)	Top-5 acc 45.312 (46.326)	lr 0.02059
Warmup Train [23][840/3239]	Time 0.177 (0.247)	Data 0.001 (0.024)	Loss 4.4060 (4.3573)	Top-1 acc 21.875 (24.238)	Top-5 acc 44.141 (46.335)	lr 0.02059
Warmup Train [23][850/3239]	Time 0.236 (0.247)	Data 0.001 (0.023)	Loss 4.4259 (4.3571)	Top-1 acc 25.000 (24.252)	Top-5 acc 45.703 (46.348)	lr 0.02058
Warmup Train [23][860/3239]	Time 0.247 (0.247)	Data 0.001 (0.023)	Loss 4.4791 (4.3573)	Top-1 acc 21.094 (24.251)	Top-5 acc 44.531 (46.352)	lr 0.02058
Warmup Train [23][870/3239]	Time 0.137 (0.247)	Data 0.002 (0.023)	Loss 4.3613 (4.3576)	Top-1 acc 23.047 (24.246)	Top-5 acc 43.359 (46.350)	lr 0.02057
Warmup Train [23][880/3239]	Time 0.212 (0.246)	Data 0.001 (0.023)	Loss 4.3325 (4.3578)	Top-1 acc 20.312 (24.247)	Top-5 acc 46.094 (46.349)	lr 0.02056
Warmup Train [23][890/3239]	Time 0.171 (0.246)	Data 0.001 (0.023)	Loss 4.1850 (4.3577)	Top-1 acc 28.516 (24.255)	Top-5 acc 52.344 (46.354)	lr 0.02056
Warmup Train [23][900/3239]	Time 0.160 (0.246)	Data 0.001 (0.022)	Loss 4.4016 (4.3572)	Top-1 acc 26.562 (24.265)	Top-5 acc 46.875 (46.387)	lr 0.02055
Warmup Train [23][910/3239]	Time 0.150 (0.246)	Data 0.002 (0.022)	Loss 4.4436 (4.3577)	Top-1 acc 23.047 (24.255)	Top-5 acc 45.312 (46.373)	lr 0.02055
Warmup Train [23][920/3239]	Time 0.206 (0.245)	Data 0.001 (0.022)	Loss 4.4497 (4.3578)	Top-1 acc 26.172 (24.258)	Top-5 acc 45.703 (46.370)	lr 0.02054
Warmup Train [23][930/3239]	Time 0.246 (0.245)	Data 0.001 (0.022)	Loss 4.2229 (4.3574)	Top-1 acc 27.734 (24.260)	Top-5 acc 48.438 (46.383)	lr 0.02053
Warmup Train [23][940/3239]	Time 0.334 (0.245)	Data 0.001 (0.022)	Loss 4.7991 (4.3583)	Top-1 acc 18.359 (24.254)	Top-5 acc 39.844 (46.354)	lr 0.02053
Warmup Train [23][950/3239]	Time 0.168 (0.245)	Data 0.001 (0.022)	Loss 4.4047 (4.3580)	Top-1 acc 22.266 (24.252)	Top-5 acc 48.438 (46.374)	lr 0.02052
Warmup Train [23][960/3239]	Time 0.170 (0.244)	Data 0.001 (0.021)	Loss 4.4737 (4.3580)	Top-1 acc 23.438 (24.259)	Top-5 acc 42.969 (46.385)	lr 0.02052
Warmup Train [23][970/3239]	Time 0.163 (0.244)	Data 0.002 (0.021)	Loss 4.3196 (4.3575)	Top-1 acc 23.828 (24.269)	Top-5 acc 50.391 (46.402)	lr 0.02051
Warmup Train [23][980/3239]	Time 0.180 (0.244)	Data 0.001 (0.021)	Loss 4.4775 (4.3576)	Top-1 acc 20.703 (24.255)	Top-5 acc 48.047 (46.396)	lr 0.02050
Warmup Train [23][990/3239]	Time 0.246 (0.244)	Data 0.002 (0.021)	Loss 4.4115 (4.3573)	Top-1 acc 24.219 (24.267)	Top-5 acc 42.188 (46.399)	lr 0.02050
Warmup Train [23][1000/3239]	Time 0.252 (0.244)	Data 0.001 (0.021)	Loss 4.4104 (4.3569)	Top-1 acc 23.047 (24.280)	Top-5 acc 42.578 (46.408)	lr 0.02049
Warmup Train [23][1010/3239]	Time 0.240 (0.243)	Data 0.002 (0.021)	Loss 4.3187 (4.3566)	Top-1 acc 25.391 (24.289)	Top-5 acc 49.609 (46.424)	lr 0.02049
Warmup Train [23][1020/3239]	Time 0.236 (0.243)	Data 0.001 (0.020)	Loss 4.4000 (4.3567)	Top-1 acc 24.609 (24.281)	Top-5 acc 45.312 (46.434)	lr 0.02048
Warmup Train [23][1030/3239]	Time 0.204 (0.243)	Data 0.001 (0.020)	Loss 4.3477 (4.3563)	Top-1 acc 25.781 (24.290)	Top-5 acc 45.703 (46.448)	lr 0.02047
Warmup Train [23][1040/3239]	Time 0.268 (0.243)	Data 0.001 (0.020)	Loss 4.5671 (4.3572)	Top-1 acc 20.703 (24.282)	Top-5 acc 40.234 (46.434)	lr 0.02047
Warmup Train [23][1050/3239]	Time 0.349 (0.243)	Data 0.001 (0.020)	Loss 4.4420 (4.3569)	Top-1 acc 21.875 (24.280)	Top-5 acc 45.703 (46.430)	lr 0.02046
Warmup Train [23][1060/3239]	Time 0.150 (0.243)	Data 0.001 (0.020)	Loss 4.4084 (4.3568)	Top-1 acc 23.828 (24.279)	Top-5 acc 44.922 (46.434)	lr 0.02046
Warmup Train [23][1070/3239]	Time 0.178 (0.242)	Data 0.001 (0.020)	Loss 4.3346 (4.3565)	Top-1 acc 23.828 (24.285)	Top-5 acc 46.484 (46.448)	lr 0.02045
Warmup Train [23][1080/3239]	Time 0.164 (0.242)	Data 0.002 (0.019)	Loss 4.3268 (4.3562)	Top-1 acc 26.172 (24.294)	Top-5 acc 46.484 (46.442)	lr 0.02044
Warmup Train [23][1090/3239]	Time 0.217 (0.242)	Data 0.001 (0.019)	Loss 4.2455 (4.3566)	Top-1 acc 25.000 (24.284)	Top-5 acc 48.047 (46.442)	lr 0.02044
Warmup Train [23][1100/3239]	Time 0.182 (0.242)	Data 0.001 (0.019)	Loss 4.4044 (4.3570)	Top-1 acc 23.438 (24.274)	Top-5 acc 47.656 (46.436)	lr 0.02043
Warmup Train [23][1110/3239]	Time 0.210 (0.242)	Data 0.001 (0.019)	Loss 4.1503 (4.3569)	Top-1 acc 28.125 (24.276)	Top-5 acc 51.172 (46.443)	lr 0.02043
Warmup Train [23][1120/3239]	Time 0.145 (0.241)	Data 0.001 (0.019)	Loss 4.4336 (4.3566)	Top-1 acc 22.656 (24.286)	Top-5 acc 42.188 (46.452)	lr 0.02042
Warmup Train [23][1130/3239]	Time 0.230 (0.241)	Data 0.001 (0.019)	Loss 4.3766 (4.3569)	Top-1 acc 26.953 (24.284)	Top-5 acc 43.750 (46.441)	lr 0.02041
Warmup Train [23][1140/3239]	Time 0.216 (0.241)	Data 0.001 (0.019)	Loss 4.2491 (4.3570)	Top-1 acc 28.516 (24.293)	Top-5 acc 48.828 (46.441)	lr 0.02041
Warmup Train [23][1150/3239]	Time 0.219 (0.241)	Data 0.001 (0.018)	Loss 4.3364 (4.3568)	Top-1 acc 23.047 (24.297)	Top-5 acc 45.312 (46.448)	lr 0.02040
Warmup Train [23][1160/3239]	Time 0.197 (0.241)	Data 0.001 (0.018)	Loss 4.2631 (4.3566)	Top-1 acc 25.391 (24.294)	Top-5 acc 47.656 (46.449)	lr 0.02040
Warmup Train [23][1170/3239]	Time 0.401 (0.241)	Data 0.001 (0.018)	Loss 4.2972 (4.3563)	Top-1 acc 23.828 (24.299)	Top-5 acc 48.047 (46.465)	lr 0.02039
Warmup Train [23][1180/3239]	Time 0.199 (0.240)	Data 0.001 (0.018)	Loss 4.2940 (4.3565)	Top-1 acc 27.344 (24.293)	Top-5 acc 47.266 (46.457)	lr 0.02038
Warmup Train [23][1190/3239]	Time 0.211 (0.240)	Data 0.001 (0.018)	Loss 4.1318 (4.3563)	Top-1 acc 32.031 (24.296)	Top-5 acc 49.609 (46.463)	lr 0.02038
Warmup Train [23][1200/3239]	Time 0.188 (0.240)	Data 0.002 (0.018)	Loss 4.2320 (4.3563)	Top-1 acc 23.828 (24.297)	Top-5 acc 49.609 (46.463)	lr 0.02037
Warmup Train [23][1210/3239]	Time 0.191 (0.240)	Data 0.001 (0.018)	Loss 4.2908 (4.3561)	Top-1 acc 24.219 (24.299)	Top-5 acc 48.828 (46.471)	lr 0.02037
Warmup Train [23][1220/3239]	Time 0.203 (0.240)	Data 0.001 (0.018)	Loss 4.4708 (4.3564)	Top-1 acc 19.531 (24.288)	Top-5 acc 42.578 (46.458)	lr 0.02036
Warmup Train [23][1230/3239]	Time 0.169 (0.240)	Data 0.002 (0.017)	Loss 4.2869 (4.3563)	Top-1 acc 25.781 (24.292)	Top-5 acc 49.219 (46.464)	lr 0.02035
Warmup Train [23][1240/3239]	Time 0.183 (0.239)	Data 0.002 (0.017)	Loss 4.3933 (4.3559)	Top-1 acc 21.484 (24.302)	Top-5 acc 47.656 (46.474)	lr 0.02035
Warmup Train [23][1250/3239]	Time 0.128 (0.239)	Data 0.002 (0.017)	Loss 4.4362 (4.3561)	Top-1 acc 23.438 (24.293)	Top-5 acc 43.359 (46.470)	lr 0.02034
Warmup Train [23][1260/3239]	Time 0.229 (0.239)	Data 0.002 (0.017)	Loss 4.2555 (4.3559)	Top-1 acc 25.391 (24.297)	Top-5 acc 46.484 (46.481)	lr 0.02034
Warmup Train [23][1270/3239]	Time 0.345 (0.239)	Data 0.001 (0.017)	Loss 4.3619 (4.3558)	Top-1 acc 22.656 (24.307)	Top-5 acc 48.047 (46.480)	lr 0.02033
Warmup Train [23][1280/3239]	Time 0.149 (0.239)	Data 0.002 (0.017)	Loss 4.2062 (4.3552)	Top-1 acc 21.875 (24.318)	Top-5 acc 50.000 (46.496)	lr 0.02032
Warmup Train [23][1290/3239]	Time 0.196 (0.239)	Data 0.002 (0.017)	Loss 4.2654 (4.3550)	Top-1 acc 26.562 (24.309)	Top-5 acc 46.094 (46.499)	lr 0.02032
Warmup Train [23][1300/3239]	Time 0.236 (0.239)	Data 0.002 (0.017)	Loss 4.2410 (4.3549)	Top-1 acc 27.734 (24.313)	Top-5 acc 48.047 (46.499)	lr 0.02031
Warmup Train [23][1310/3239]	Time 0.258 (0.239)	Data 0.001 (0.017)	Loss 4.3590 (4.3549)	Top-1 acc 22.656 (24.302)	Top-5 acc 42.578 (46.502)	lr 0.02031
Warmup Train [23][1320/3239]	Time 0.254 (0.239)	Data 0.001 (0.016)	Loss 4.3267 (4.3554)	Top-1 acc 25.000 (24.298)	Top-5 acc 46.875 (46.493)	lr 0.02030
Warmup Train [23][1330/3239]	Time 0.257 (0.239)	Data 0.001 (0.016)	Loss 4.2724 (4.3552)	Top-1 acc 23.047 (24.294)	Top-5 acc 46.875 (46.496)	lr 0.02030
Warmup Train [23][1340/3239]	Time 0.166 (0.239)	Data 0.001 (0.016)	Loss 4.2000 (4.3548)	Top-1 acc 25.391 (24.302)	Top-5 acc 50.000 (46.504)	lr 0.02029
Warmup Train [23][1350/3239]	Time 0.173 (0.238)	Data 0.001 (0.016)	Loss 4.3650 (4.3549)	Top-1 acc 25.391 (24.304)	Top-5 acc 48.438 (46.497)	lr 0.02028
Warmup Train [23][1360/3239]	Time 0.240 (0.238)	Data 0.002 (0.016)	Loss 4.3392 (4.3546)	Top-1 acc 26.172 (24.311)	Top-5 acc 44.531 (46.498)	lr 0.02028
Warmup Train [23][1370/3239]	Time 0.299 (0.238)	Data 0.001 (0.016)	Loss 4.4828 (4.3544)	Top-1 acc 23.828 (24.316)	Top-5 acc 44.141 (46.502)	lr 0.02027
Warmup Train [23][1380/3239]	Time 0.269 (0.238)	Data 0.001 (0.016)	Loss 4.4333 (4.3547)	Top-1 acc 22.266 (24.309)	Top-5 acc 45.312 (46.492)	lr 0.02027
Warmup Train [23][1390/3239]	Time 0.200 (0.238)	Data 0.002 (0.016)	Loss 4.3758 (4.3545)	Top-1 acc 20.703 (24.310)	Top-5 acc 45.703 (46.496)	lr 0.02026
Warmup Train [23][1400/3239]	Time 0.181 (0.238)	Data 0.001 (0.016)	Loss 4.1613 (4.3540)	Top-1 acc 28.906 (24.323)	Top-5 acc 54.297 (46.514)	lr 0.02025
Warmup Train [23][1410/3239]	Time 0.189 (0.238)	Data 0.001 (0.016)	Loss 4.3211 (4.3532)	Top-1 acc 23.047 (24.335)	Top-5 acc 45.703 (46.537)	lr 0.02025
Warmup Train [23][1420/3239]	Time 0.183 (0.237)	Data 0.001 (0.016)	Loss 4.3079 (4.3528)	Top-1 acc 25.781 (24.346)	Top-5 acc 45.312 (46.542)	lr 0.02024
Warmup Train [23][1430/3239]	Time 0.219 (0.237)	Data 0.001 (0.015)	Loss 4.4135 (4.3529)	Top-1 acc 22.656 (24.348)	Top-5 acc 44.922 (46.539)	lr 0.02024
Warmup Train [23][1440/3239]	Time 0.272 (0.237)	Data 0.001 (0.015)	Loss 4.2606 (4.3522)	Top-1 acc 24.609 (24.367)	Top-5 acc 48.438 (46.560)	lr 0.02023
Warmup Train [23][1450/3239]	Time 0.164 (0.237)	Data 0.001 (0.015)	Loss 4.2652 (4.3520)	Top-1 acc 26.562 (24.372)	Top-5 acc 50.781 (46.572)	lr 0.02022
Warmup Train [23][1460/3239]	Time 0.181 (0.237)	Data 0.001 (0.015)	Loss 4.2264 (4.3516)	Top-1 acc 26.953 (24.379)	Top-5 acc 48.438 (46.574)	lr 0.02022
Warmup Train [23][1470/3239]	Time 0.189 (0.237)	Data 0.002 (0.015)	Loss 4.1928 (4.3512)	Top-1 acc 30.078 (24.391)	Top-5 acc 51.953 (46.581)	lr 0.02021
Warmup Train [23][1480/3239]	Time 0.200 (0.237)	Data 0.001 (0.015)	Loss 4.4119 (4.3520)	Top-1 acc 26.172 (24.372)	Top-5 acc 44.922 (46.564)	lr 0.02021
Warmup Train [23][1490/3239]	Time 0.366 (0.237)	Data 0.001 (0.015)	Loss 4.3068 (4.3521)	Top-1 acc 26.562 (24.378)	Top-5 acc 43.359 (46.563)	lr 0.02020
Warmup Train [23][1500/3239]	Time 0.275 (0.237)	Data 0.001 (0.015)	Loss 4.5666 (4.3522)	Top-1 acc 18.359 (24.374)	Top-5 acc 41.016 (46.560)	lr 0.02019
Warmup Train [23][1510/3239]	Time 0.212 (0.236)	Data 0.001 (0.015)	Loss 4.5140 (4.3523)	Top-1 acc 20.703 (24.372)	Top-5 acc 40.234 (46.555)	lr 0.02019
Warmup Train [23][1520/3239]	Time 0.189 (0.236)	Data 0.001 (0.015)	Loss 4.2399 (4.3524)	Top-1 acc 26.562 (24.369)	Top-5 acc 49.219 (46.550)	lr 0.02018
Warmup Train [23][1530/3239]	Time 0.205 (0.236)	Data 0.001 (0.015)	Loss 4.3439 (4.3525)	Top-1 acc 23.438 (24.378)	Top-5 acc 47.656 (46.548)	lr 0.02018
Warmup Train [23][1540/3239]	Time 0.199 (0.236)	Data 0.001 (0.014)	Loss 4.4032 (4.3528)	Top-1 acc 25.781 (24.380)	Top-5 acc 47.656 (46.543)	lr 0.02017
Warmup Train [23][1550/3239]	Time 0.208 (0.236)	Data 0.002 (0.014)	Loss 4.4045 (4.3530)	Top-1 acc 22.266 (24.374)	Top-5 acc 45.703 (46.532)	lr 0.02016
Warmup Train [23][1560/3239]	Time 0.266 (0.236)	Data 0.002 (0.014)	Loss 4.3817 (4.3528)	Top-1 acc 24.609 (24.378)	Top-5 acc 46.875 (46.537)	lr 0.02016
Warmup Train [23][1570/3239]	Time 0.173 (0.236)	Data 0.001 (0.014)	Loss 4.3622 (4.3527)	Top-1 acc 22.266 (24.380)	Top-5 acc 49.609 (46.546)	lr 0.02015
Warmup Train [23][1580/3239]	Time 0.199 (0.236)	Data 0.002 (0.014)	Loss 4.3560 (4.3527)	Top-1 acc 23.047 (24.372)	Top-5 acc 46.094 (46.548)	lr 0.02015
Warmup Train [23][1590/3239]	Time 0.267 (0.236)	Data 0.001 (0.014)	Loss 4.2498 (4.3527)	Top-1 acc 27.344 (24.372)	Top-5 acc 50.781 (46.548)	lr 0.02014
Warmup Train [23][1600/3239]	Time 0.299 (0.236)	Data 0.001 (0.014)	Loss 4.3235 (4.3528)	Top-1 acc 21.875 (24.371)	Top-5 acc 48.438 (46.547)	lr 0.02013
Warmup Train [23][1610/3239]	Time 0.177 (0.235)	Data 0.001 (0.014)	Loss 4.4982 (4.3528)	Top-1 acc 25.000 (24.370)	Top-5 acc 44.141 (46.547)	lr 0.02013
Warmup Train [23][1620/3239]	Time 0.267 (0.235)	Data 0.001 (0.014)	Loss 4.1751 (4.3528)	Top-1 acc 29.688 (24.371)	Top-5 acc 49.609 (46.546)	lr 0.02012
Warmup Train [23][1630/3239]	Time 0.208 (0.235)	Data 0.001 (0.014)	Loss 4.4755 (4.3534)	Top-1 acc 23.438 (24.358)	Top-5 acc 44.141 (46.536)	lr 0.02012
Warmup Train [23][1640/3239]	Time 0.268 (0.235)	Data 0.001 (0.014)	Loss 4.3878 (4.3538)	Top-1 acc 23.828 (24.349)	Top-5 acc 45.703 (46.529)	lr 0.02011
Warmup Train [23][1650/3239]	Time 0.213 (0.235)	Data 0.002 (0.014)	Loss 4.1437 (4.3535)	Top-1 acc 28.125 (24.358)	Top-5 acc 50.781 (46.538)	lr 0.02010
Warmup Train [23][1660/3239]	Time 0.222 (0.235)	Data 0.002 (0.014)	Loss 4.4131 (4.3534)	Top-1 acc 21.875 (24.354)	Top-5 acc 44.922 (46.539)	lr 0.02010
Warmup Train [23][1670/3239]	Time 0.220 (0.235)	Data 0.001 (0.014)	Loss 4.2454 (4.3535)	Top-1 acc 23.047 (24.352)	Top-5 acc 48.438 (46.538)	lr 0.02009
Warmup Train [23][1680/3239]	Time 0.233 (0.235)	Data 0.001 (0.013)	Loss 4.3146 (4.3538)	Top-1 acc 25.391 (24.345)	Top-5 acc 46.094 (46.540)	lr 0.02009
Warmup Train [23][1690/3239]	Time 0.362 (0.235)	Data 0.001 (0.013)	Loss 4.4592 (4.3538)	Top-1 acc 21.875 (24.343)	Top-5 acc 44.922 (46.542)	lr 0.02008
Warmup Train [23][1700/3239]	Time 0.277 (0.235)	Data 0.001 (0.013)	Loss 4.2152 (4.3535)	Top-1 acc 26.953 (24.345)	Top-5 acc 48.047 (46.542)	lr 0.02008
Warmup Train [23][1710/3239]	Time 0.190 (0.235)	Data 0.001 (0.013)	Loss 4.4820 (4.3533)	Top-1 acc 23.438 (24.350)	Top-5 acc 47.656 (46.544)	lr 0.02007
Warmup Train [23][1720/3239]	Time 0.154 (0.234)	Data 0.001 (0.013)	Loss 4.4176 (4.3535)	Top-1 acc 22.656 (24.349)	Top-5 acc 42.578 (46.542)	lr 0.02006
Warmup Train [23][1730/3239]	Time 0.198 (0.234)	Data 0.001 (0.013)	Loss 4.3480 (4.3535)	Top-1 acc 24.609 (24.352)	Top-5 acc 46.094 (46.542)	lr 0.02006
Warmup Train [23][1740/3239]	Time 0.180 (0.234)	Data 0.001 (0.013)	Loss 4.3802 (4.3533)	Top-1 acc 22.266 (24.352)	Top-5 acc 46.484 (46.543)	lr 0.02005
Warmup Train [23][1750/3239]	Time 0.187 (0.234)	Data 0.001 (0.013)	Loss 4.3892 (4.3532)	Top-1 acc 22.656 (24.349)	Top-5 acc 47.656 (46.548)	lr 0.02005
Warmup Train [23][1760/3239]	Time 0.211 (0.234)	Data 0.001 (0.013)	Loss 4.1306 (4.3534)	Top-1 acc 26.172 (24.346)	Top-5 acc 50.781 (46.546)	lr 0.02004
Warmup Train [23][1770/3239]	Time 0.215 (0.234)	Data 0.002 (0.013)	Loss 4.4191 (4.3534)	Top-1 acc 25.000 (24.348)	Top-5 acc 44.141 (46.543)	lr 0.02003
Warmup Train [23][1780/3239]	Time 0.228 (0.234)	Data 0.001 (0.013)	Loss 4.5012 (4.3534)	Top-1 acc 19.531 (24.350)	Top-5 acc 42.578 (46.543)	lr 0.02003
Warmup Train [23][1790/3239]	Time 0.342 (0.234)	Data 0.002 (0.013)	Loss 4.2165 (4.3536)	Top-1 acc 26.172 (24.349)	Top-5 acc 48.828 (46.545)	lr 0.02002
Warmup Train [23][1800/3239]	Time 0.146 (0.234)	Data 0.001 (0.013)	Loss 4.3425 (4.3535)	Top-1 acc 27.734 (24.353)	Top-5 acc 48.438 (46.551)	lr 0.02002
Warmup Train [23][1810/3239]	Time 0.175 (0.234)	Data 0.001 (0.013)	Loss 4.1913 (4.3534)	Top-1 acc 28.906 (24.352)	Top-5 acc 51.562 (46.552)	lr 0.02001
Warmup Train [23][1820/3239]	Time 0.262 (0.234)	Data 0.001 (0.013)	Loss 4.3380 (4.3533)	Top-1 acc 22.266 (24.357)	Top-5 acc 46.484 (46.557)	lr 0.02000
Warmup Train [23][1830/3239]	Time 0.252 (0.234)	Data 0.001 (0.013)	Loss 4.2870 (4.3532)	Top-1 acc 26.172 (24.359)	Top-5 acc 44.531 (46.561)	lr 0.02000
Warmup Train [23][1840/3239]	Time 0.184 (0.234)	Data 0.001 (0.013)	Loss 4.3353 (4.3531)	Top-1 acc 21.875 (24.357)	Top-5 acc 46.484 (46.566)	lr 0.01999
Warmup Train [23][1850/3239]	Time 0.176 (0.234)	Data 0.001 (0.013)	Loss 4.4616 (4.3530)	Top-1 acc 21.875 (24.356)	Top-5 acc 43.750 (46.569)	lr 0.01999
Warmup Train [23][1860/3239]	Time 0.232 (0.234)	Data 0.001 (0.012)	Loss 4.3759 (4.3531)	Top-1 acc 27.344 (24.358)	Top-5 acc 44.922 (46.568)	lr 0.01998
Warmup Train [23][1870/3239]	Time 0.226 (0.234)	Data 0.002 (0.012)	Loss 4.3061 (4.3531)	Top-1 acc 24.219 (24.357)	Top-5 acc 48.438 (46.570)	lr 0.01997
Warmup Train [23][1880/3239]	Time 0.208 (0.234)	Data 0.001 (0.012)	Loss 4.5104 (4.3531)	Top-1 acc 26.562 (24.363)	Top-5 acc 43.750 (46.570)	lr 0.01997
Warmup Train [23][1890/3239]	Time 0.301 (0.234)	Data 0.001 (0.012)	Loss 4.2541 (4.3531)	Top-1 acc 24.219 (24.364)	Top-5 acc 46.875 (46.572)	lr 0.01996
Warmup Train [23][1900/3239]	Time 0.352 (0.234)	Data 0.001 (0.012)	Loss 4.1938 (4.3526)	Top-1 acc 28.906 (24.377)	Top-5 acc 49.609 (46.583)	lr 0.01996
Warmup Train [23][1910/3239]	Time 0.142 (0.234)	Data 0.001 (0.012)	Loss 4.3054 (4.3527)	Top-1 acc 26.953 (24.369)	Top-5 acc 49.219 (46.583)	lr 0.01995
Warmup Train [23][1920/3239]	Time 0.213 (0.234)	Data 0.003 (0.012)	Loss 4.3013 (4.3528)	Top-1 acc 26.953 (24.366)	Top-5 acc 50.000 (46.584)	lr 0.01994
Warmup Train [23][1930/3239]	Time 0.223 (0.234)	Data 0.001 (0.012)	Loss 4.3322 (4.3530)	Top-1 acc 23.828 (24.364)	Top-5 acc 44.922 (46.580)	lr 0.01994
Warmup Train [23][1940/3239]	Time 0.155 (0.234)	Data 0.001 (0.012)	Loss 4.4651 (4.3530)	Top-1 acc 22.656 (24.367)	Top-5 acc 41.797 (46.585)	lr 0.01993
Warmup Train [23][1950/3239]	Time 0.267 (0.234)	Data 0.002 (0.012)	Loss 4.4174 (4.3532)	Top-1 acc 22.656 (24.364)	Top-5 acc 49.219 (46.580)	lr 0.01993
Warmup Train [23][1960/3239]	Time 0.214 (0.234)	Data 0.001 (0.012)	Loss 4.4224 (4.3531)	Top-1 acc 23.047 (24.368)	Top-5 acc 43.359 (46.584)	lr 0.01992
Warmup Train [23][1970/3239]	Time 0.200 (0.234)	Data 0.001 (0.012)	Loss 4.3234 (4.3528)	Top-1 acc 24.219 (24.368)	Top-5 acc 44.922 (46.594)	lr 0.01991
Warmup Train [23][1980/3239]	Time 0.232 (0.234)	Data 0.001 (0.012)	Loss 4.5093 (4.3527)	Top-1 acc 22.266 (24.369)	Top-5 acc 41.016 (46.590)	lr 0.01991
Warmup Train [23][1990/3239]	Time 0.209 (0.234)	Data 0.002 (0.012)	Loss 4.1020 (4.3525)	Top-1 acc 25.781 (24.370)	Top-5 acc 57.422 (46.594)	lr 0.01990
Warmup Train [23][2000/3239]	Time 0.203 (0.234)	Data 0.001 (0.012)	Loss 4.4010 (4.3522)	Top-1 acc 21.484 (24.369)	Top-5 acc 43.750 (46.602)	lr 0.01990
Warmup Train [23][2010/3239]	Time 0.186 (0.234)	Data 0.001 (0.012)	Loss 4.3615 (4.3522)	Top-1 acc 22.656 (24.369)	Top-5 acc 49.609 (46.606)	lr 0.01989
Warmup Train [23][2020/3239]	Time 0.245 (0.234)	Data 0.001 (0.012)	Loss 4.4442 (4.3524)	Top-1 acc 23.438 (24.363)	Top-5 acc 45.703 (46.600)	lr 0.01989
Warmup Train [23][2030/3239]	Time 0.176 (0.234)	Data 0.001 (0.012)	Loss 4.3944 (4.3520)	Top-1 acc 23.438 (24.367)	Top-5 acc 44.922 (46.609)	lr 0.01988
Warmup Train [23][2040/3239]	Time 0.279 (0.234)	Data 0.002 (0.012)	Loss 4.4066 (4.3519)	Top-1 acc 20.312 (24.368)	Top-5 acc 42.578 (46.613)	lr 0.01987
Warmup Train [23][2050/3239]	Time 0.206 (0.234)	Data 0.001 (0.012)	Loss 4.2221 (4.3518)	Top-1 acc 24.219 (24.364)	Top-5 acc 51.172 (46.611)	lr 0.01987
Warmup Train [23][2060/3239]	Time 0.170 (0.234)	Data 0.001 (0.012)	Loss 4.1233 (4.3518)	Top-1 acc 28.516 (24.364)	Top-5 acc 50.000 (46.611)	lr 0.01986
Warmup Train [23][2070/3239]	Time 0.290 (0.234)	Data 0.001 (0.012)	Loss 4.2945 (4.3516)	Top-1 acc 25.000 (24.368)	Top-5 acc 48.438 (46.614)	lr 0.01986
Warmup Train [23][2080/3239]	Time 0.200 (0.234)	Data 0.001 (0.012)	Loss 4.5261 (4.3514)	Top-1 acc 26.953 (24.368)	Top-5 acc 44.141 (46.622)	lr 0.01985
Warmup Train [23][2090/3239]	Time 0.257 (0.234)	Data 0.001 (0.011)	Loss 4.3047 (4.3515)	Top-1 acc 25.781 (24.366)	Top-5 acc 44.531 (46.619)	lr 0.01984
Warmup Train [23][2100/3239]	Time 0.394 (0.234)	Data 0.001 (0.011)	Loss 4.3108 (4.3518)	Top-1 acc 25.000 (24.365)	Top-5 acc 48.047 (46.617)	lr 0.01984
Warmup Train [23][2110/3239]	Time 0.256 (0.234)	Data 0.002 (0.011)	Loss 4.2666 (4.3516)	Top-1 acc 23.047 (24.367)	Top-5 acc 50.781 (46.621)	lr 0.01983
Warmup Train [23][2120/3239]	Time 0.288 (0.234)	Data 0.001 (0.011)	Loss 4.3945 (4.3515)	Top-1 acc 22.266 (24.371)	Top-5 acc 47.656 (46.625)	lr 0.01983
Warmup Train [23][2130/3239]	Time 0.213 (0.234)	Data 0.002 (0.011)	Loss 4.1622 (4.3510)	Top-1 acc 26.953 (24.376)	Top-5 acc 49.609 (46.634)	lr 0.01982
Warmup Train [23][2140/3239]	Time 0.224 (0.234)	Data 0.002 (0.011)	Loss 4.1413 (4.3510)	Top-1 acc 27.734 (24.372)	Top-5 acc 51.953 (46.635)	lr 0.01981
Warmup Train [23][2150/3239]	Time 0.212 (0.234)	Data 0.001 (0.011)	Loss 4.3751 (4.3511)	Top-1 acc 23.438 (24.375)	Top-5 acc 42.188 (46.632)	lr 0.01981
Warmup Train [23][2160/3239]	Time 0.168 (0.234)	Data 0.001 (0.011)	Loss 4.5274 (4.3511)	Top-1 acc 21.484 (24.370)	Top-5 acc 38.672 (46.632)	lr 0.01980
Warmup Train [23][2170/3239]	Time 0.167 (0.234)	Data 0.002 (0.011)	Loss 4.3821 (4.3512)	Top-1 acc 21.875 (24.366)	Top-5 acc 44.531 (46.630)	lr 0.01980
Warmup Train [23][2180/3239]	Time 0.169 (0.233)	Data 0.001 (0.011)	Loss 4.1698 (4.3512)	Top-1 acc 30.078 (24.368)	Top-5 acc 52.734 (46.634)	lr 0.01979
Warmup Train [23][2190/3239]	Time 0.137 (0.233)	Data 0.001 (0.011)	Loss 4.3962 (4.3516)	Top-1 acc 21.875 (24.358)	Top-5 acc 46.875 (46.630)	lr 0.01978
Warmup Train [23][2200/3239]	Time 0.221 (0.233)	Data 0.001 (0.011)	Loss 4.5698 (4.3516)	Top-1 acc 21.094 (24.357)	Top-5 acc 39.844 (46.630)	lr 0.01978
Warmup Train [23][2210/3239]	Time 0.171 (0.233)	Data 0.001 (0.011)	Loss 4.3426 (4.3517)	Top-1 acc 25.000 (24.353)	Top-5 acc 46.484 (46.625)	lr 0.01977
Warmup Train [23][2220/3239]	Time 0.446 (0.234)	Data 0.002 (0.011)	Loss 4.3163 (4.3514)	Top-1 acc 26.172 (24.363)	Top-5 acc 44.531 (46.633)	lr 0.01977
Warmup Train [23][2230/3239]	Time 0.192 (0.234)	Data 0.002 (0.011)	Loss 4.4570 (4.3512)	Top-1 acc 23.047 (24.364)	Top-5 acc 44.922 (46.639)	lr 0.01976
Warmup Train [23][2240/3239]	Time 0.228 (0.234)	Data 0.001 (0.011)	Loss 4.4139 (4.3511)	Top-1 acc 25.781 (24.364)	Top-5 acc 49.219 (46.643)	lr 0.01975
Warmup Train [23][2250/3239]	Time 0.273 (0.234)	Data 0.001 (0.011)	Loss 4.3936 (4.3511)	Top-1 acc 23.047 (24.364)	Top-5 acc 46.875 (46.642)	lr 0.01975
Warmup Train [23][2260/3239]	Time 0.168 (0.234)	Data 0.002 (0.011)	Loss 4.2656 (4.3511)	Top-1 acc 24.219 (24.363)	Top-5 acc 48.438 (46.643)	lr 0.01974
Warmup Train [23][2270/3239]	Time 0.289 (0.234)	Data 0.001 (0.011)	Loss 4.2386 (4.3512)	Top-1 acc 24.609 (24.362)	Top-5 acc 46.875 (46.641)	lr 0.01974
Warmup Train [23][2280/3239]	Time 0.245 (0.233)	Data 0.001 (0.011)	Loss 4.4268 (4.3511)	Top-1 acc 19.531 (24.363)	Top-5 acc 42.578 (46.642)	lr 0.01973
Warmup Train [23][2290/3239]	Time 0.248 (0.233)	Data 0.001 (0.011)	Loss 4.3834 (4.3511)	Top-1 acc 22.266 (24.357)	Top-5 acc 48.438 (46.646)	lr 0.01973
Warmup Train [23][2300/3239]	Time 0.254 (0.233)	Data 0.001 (0.011)	Loss 4.3864 (4.3510)	Top-1 acc 23.047 (24.359)	Top-5 acc 44.922 (46.651)	lr 0.01972
Warmup Train [23][2310/3239]	Time 0.183 (0.233)	Data 0.001 (0.011)	Loss 4.3916 (4.3509)	Top-1 acc 19.531 (24.362)	Top-5 acc 44.922 (46.650)	lr 0.01971
Warmup Train [23][2320/3239]	Time 0.284 (0.233)	Data 0.002 (0.011)	Loss 4.2155 (4.3510)	Top-1 acc 22.266 (24.361)	Top-5 acc 49.609 (46.646)	lr 0.01971
Warmup Train [23][2330/3239]	Time 0.323 (0.233)	Data 0.001 (0.011)	Loss 4.2023 (4.3508)	Top-1 acc 29.297 (24.366)	Top-5 acc 52.734 (46.651)	lr 0.01970
Warmup Train [23][2340/3239]	Time 0.211 (0.233)	Data 0.001 (0.011)	Loss 4.2830 (4.3510)	Top-1 acc 25.391 (24.363)	Top-5 acc 47.656 (46.649)	lr 0.01970
Warmup Train [23][2350/3239]	Time 0.224 (0.233)	Data 0.001 (0.011)	Loss 4.2517 (4.3511)	Top-1 acc 23.438 (24.359)	Top-5 acc 50.000 (46.648)	lr 0.01969
Warmup Train [23][2360/3239]	Time 0.227 (0.233)	Data 0.001 (0.011)	Loss 4.2939 (4.3511)	Top-1 acc 28.516 (24.361)	Top-5 acc 46.094 (46.650)	lr 0.01968
Warmup Train [23][2370/3239]	Time 0.181 (0.233)	Data 0.001 (0.010)	Loss 4.3183 (4.3513)	Top-1 acc 26.172 (24.356)	Top-5 acc 47.656 (46.642)	lr 0.01968
Warmup Train [23][2380/3239]	Time 0.235 (0.233)	Data 0.001 (0.010)	Loss 4.1946 (4.3511)	Top-1 acc 27.734 (24.357)	Top-5 acc 51.953 (46.648)	lr 0.01967
Warmup Train [23][2390/3239]	Time 0.246 (0.233)	Data 0.002 (0.010)	Loss 4.2362 (4.3510)	Top-1 acc 24.609 (24.357)	Top-5 acc 50.391 (46.653)	lr 0.01967
Warmup Train [23][2400/3239]	Time 0.141 (0.233)	Data 0.001 (0.010)	Loss 4.3366 (4.3508)	Top-1 acc 22.656 (24.360)	Top-5 acc 46.875 (46.655)	lr 0.01966
Warmup Train [23][2410/3239]	Time 0.186 (0.233)	Data 0.001 (0.010)	Loss 4.3244 (4.3507)	Top-1 acc 23.438 (24.359)	Top-5 acc 46.484 (46.653)	lr 0.01965
Warmup Train [23][2420/3239]	Time 0.191 (0.233)	Data 0.001 (0.010)	Loss 4.2912 (4.3505)	Top-1 acc 23.438 (24.362)	Top-5 acc 45.703 (46.661)	lr 0.01965
Warmup Train [23][2430/3239]	Time 0.233 (0.233)	Data 0.001 (0.010)	Loss 4.2540 (4.3506)	Top-1 acc 25.000 (24.358)	Top-5 acc 50.391 (46.656)	lr 0.01964
Warmup Train [23][2440/3239]	Time 0.371 (0.233)	Data 0.001 (0.010)	Loss 4.4026 (4.3507)	Top-1 acc 21.484 (24.357)	Top-5 acc 46.094 (46.655)	lr 0.01964
Warmup Train [23][2450/3239]	Time 0.232 (0.233)	Data 0.001 (0.010)	Loss 4.3905 (4.3507)	Top-1 acc 24.219 (24.358)	Top-5 acc 44.141 (46.654)	lr 0.01963
Warmup Train [23][2460/3239]	Time 0.171 (0.233)	Data 0.001 (0.010)	Loss 4.2532 (4.3505)	Top-1 acc 28.906 (24.365)	Top-5 acc 48.438 (46.661)	lr 0.01962
Warmup Train [23][2470/3239]	Time 0.185 (0.233)	Data 0.001 (0.010)	Loss 4.3222 (4.3506)	Top-1 acc 25.000 (24.361)	Top-5 acc 47.656 (46.658)	lr 0.01962
Warmup Train [23][2480/3239]	Time 0.152 (0.233)	Data 0.001 (0.010)	Loss 4.3650 (4.3505)	Top-1 acc 23.438 (24.362)	Top-5 acc 47.656 (46.662)	lr 0.01961
Warmup Train [23][2490/3239]	Time 0.197 (0.233)	Data 0.001 (0.010)	Loss 4.3300 (4.3504)	Top-1 acc 22.266 (24.364)	Top-5 acc 48.047 (46.661)	lr 0.01961
Warmup Train [23][2500/3239]	Time 0.207 (0.233)	Data 0.001 (0.010)	Loss 4.1544 (4.3503)	Top-1 acc 29.688 (24.365)	Top-5 acc 52.344 (46.666)	lr 0.01960
Warmup Train [23][2510/3239]	Time 0.301 (0.233)	Data 0.001 (0.010)	Loss 4.3608 (4.3503)	Top-1 acc 22.266 (24.365)	Top-5 acc 44.922 (46.667)	lr 0.01959
Warmup Train [23][2520/3239]	Time 0.217 (0.233)	Data 0.001 (0.010)	Loss 4.3268 (4.3503)	Top-1 acc 24.219 (24.364)	Top-5 acc 46.484 (46.666)	lr 0.01959
Warmup Train [23][2530/3239]	Time 0.226 (0.233)	Data 0.001 (0.010)	Loss 4.3551 (4.3504)	Top-1 acc 22.656 (24.361)	Top-5 acc 48.047 (46.664)	lr 0.01958
Warmup Train [23][2540/3239]	Time 0.257 (0.232)	Data 0.001 (0.010)	Loss 4.0530 (4.3502)	Top-1 acc 31.250 (24.366)	Top-5 acc 53.516 (46.671)	lr 0.01958
Warmup Train [23][2550/3239]	Time 0.172 (0.232)	Data 0.001 (0.010)	Loss 4.5005 (4.3503)	Top-1 acc 23.047 (24.363)	Top-5 acc 40.625 (46.666)	lr 0.01957
Warmup Train [23][2560/3239]	Time 0.344 (0.233)	Data 0.001 (0.010)	Loss 4.3008 (4.3502)	Top-1 acc 29.297 (24.368)	Top-5 acc 48.438 (46.667)	lr 0.01957
Warmup Train [23][2570/3239]	Time 0.191 (0.232)	Data 0.001 (0.010)	Loss 4.5048 (4.3501)	Top-1 acc 22.656 (24.372)	Top-5 acc 41.797 (46.666)	lr 0.01956
Warmup Train [23][2580/3239]	Time 0.220 (0.233)	Data 0.002 (0.010)	Loss 4.4624 (4.3500)	Top-1 acc 21.484 (24.375)	Top-5 acc 43.359 (46.674)	lr 0.01955
Warmup Train [23][2590/3239]	Time 0.205 (0.233)	Data 0.001 (0.010)	Loss 4.3759 (4.3498)	Top-1 acc 24.219 (24.380)	Top-5 acc 47.656 (46.681)	lr 0.01955
Warmup Train [23][2600/3239]	Time 0.233 (0.233)	Data 0.001 (0.010)	Loss 4.3086 (4.3499)	Top-1 acc 26.953 (24.381)	Top-5 acc 46.094 (46.676)	lr 0.01954
Warmup Train [23][2610/3239]	Time 0.192 (0.232)	Data 0.001 (0.010)	Loss 4.3607 (4.3499)	Top-1 acc 25.000 (24.380)	Top-5 acc 48.047 (46.677)	lr 0.01954
Warmup Train [23][2620/3239]	Time 0.223 (0.233)	Data 0.001 (0.010)	Loss 4.4852 (4.3498)	Top-1 acc 22.656 (24.380)	Top-5 acc 42.969 (46.680)	lr 0.01953
Warmup Train [23][2630/3239]	Time 0.174 (0.232)	Data 0.001 (0.010)	Loss 4.3393 (4.3496)	Top-1 acc 23.828 (24.383)	Top-5 acc 47.656 (46.686)	lr 0.01952
Warmup Train [23][2640/3239]	Time 0.190 (0.232)	Data 0.001 (0.010)	Loss 4.2452 (4.3495)	Top-1 acc 24.219 (24.386)	Top-5 acc 48.047 (46.687)	lr 0.01952
Warmup Train [23][2650/3239]	Time 0.332 (0.232)	Data 0.002 (0.010)	Loss 4.2958 (4.3494)	Top-1 acc 24.609 (24.387)	Top-5 acc 48.438 (46.685)	lr 0.01951
Warmup Train [23][2660/3239]	Time 0.232 (0.232)	Data 0.001 (0.010)	Loss 4.2849 (4.3491)	Top-1 acc 26.172 (24.394)	Top-5 acc 48.047 (46.695)	lr 0.01951
Warmup Train [23][2670/3239]	Time 0.320 (0.232)	Data 0.001 (0.010)	Loss 4.1466 (4.3490)	Top-1 acc 27.734 (24.397)	Top-5 acc 50.781 (46.696)	lr 0.01950
Warmup Train [23][2680/3239]	Time 0.233 (0.232)	Data 0.002 (0.010)	Loss 4.4292 (4.3491)	Top-1 acc 24.219 (24.397)	Top-5 acc 44.141 (46.695)	lr 0.01949
Warmup Train [23][2690/3239]	Time 0.193 (0.232)	Data 0.001 (0.010)	Loss 4.4515 (4.3491)	Top-1 acc 25.000 (24.399)	Top-5 acc 47.266 (46.695)	lr 0.01949
Warmup Train [23][2700/3239]	Time 0.300 (0.232)	Data 0.002 (0.010)	Loss 4.2293 (4.3490)	Top-1 acc 25.391 (24.402)	Top-5 acc 50.000 (46.697)	lr 0.01948
Warmup Train [23][2710/3239]	Time 0.210 (0.232)	Data 0.001 (0.010)	Loss 4.3924 (4.3492)	Top-1 acc 22.656 (24.398)	Top-5 acc 47.656 (46.693)	lr 0.01948
Warmup Train [23][2720/3239]	Time 0.187 (0.232)	Data 0.001 (0.009)	Loss 4.3974 (4.3491)	Top-1 acc 25.000 (24.400)	Top-5 acc 47.266 (46.695)	lr 0.01947
Warmup Train [23][2730/3239]	Time 0.225 (0.232)	Data 0.001 (0.009)	Loss 4.3158 (4.3489)	Top-1 acc 21.094 (24.401)	Top-5 acc 49.609 (46.697)	lr 0.01946
Warmup Train [23][2740/3239]	Time 0.199 (0.232)	Data 0.001 (0.009)	Loss 4.4567 (4.3489)	Top-1 acc 21.875 (24.402)	Top-5 acc 45.312 (46.697)	lr 0.01946
Warmup Train [23][2750/3239]	Time 0.301 (0.232)	Data 0.001 (0.009)	Loss 4.3283 (4.3490)	Top-1 acc 21.875 (24.400)	Top-5 acc 49.609 (46.696)	lr 0.01945
Warmup Train [23][2760/3239]	Time 0.223 (0.232)	Data 0.002 (0.009)	Loss 4.4886 (4.3490)	Top-1 acc 23.438 (24.402)	Top-5 acc 43.359 (46.696)	lr 0.01945
Warmup Train [23][2770/3239]	Time 0.272 (0.232)	Data 0.001 (0.009)	Loss 4.4163 (4.3488)	Top-1 acc 22.266 (24.404)	Top-5 acc 44.531 (46.700)	lr 0.01944
Warmup Train [23][2780/3239]	Time 0.214 (0.232)	Data 0.001 (0.009)	Loss 4.5322 (4.3487)	Top-1 acc 24.219 (24.411)	Top-5 acc 44.141 (46.702)	lr 0.01944
Warmup Train [23][2790/3239]	Time 0.206 (0.232)	Data 0.001 (0.009)	Loss 4.2134 (4.3486)	Top-1 acc 26.172 (24.415)	Top-5 acc 50.391 (46.706)	lr 0.01943
Warmup Train [23][2800/3239]	Time 0.231 (0.232)	Data 0.001 (0.009)	Loss 4.3302 (4.3489)	Top-1 acc 23.828 (24.408)	Top-5 acc 45.312 (46.697)	lr 0.01942
Warmup Train [23][2810/3239]	Time 0.212 (0.232)	Data 0.001 (0.009)	Loss 4.1917 (4.3487)	Top-1 acc 27.344 (24.411)	Top-5 acc 51.172 (46.703)	lr 0.01942
Warmup Train [23][2820/3239]	Time 0.147 (0.232)	Data 0.001 (0.009)	Loss 4.2455 (4.3487)	Top-1 acc 27.734 (24.410)	Top-5 acc 50.781 (46.704)	lr 0.01941
Warmup Train [23][2830/3239]	Time 0.196 (0.232)	Data 0.001 (0.009)	Loss 4.2369 (4.3488)	Top-1 acc 22.656 (24.408)	Top-5 acc 51.953 (46.706)	lr 0.01941
Warmup Train [23][2840/3239]	Time 0.243 (0.232)	Data 0.001 (0.009)	Loss 4.4598 (4.3491)	Top-1 acc 23.438 (24.404)	Top-5 acc 45.312 (46.698)	lr 0.01940
Warmup Train [23][2850/3239]	Time 0.278 (0.232)	Data 0.001 (0.009)	Loss 4.1731 (4.3490)	Top-1 acc 28.906 (24.407)	Top-5 acc 48.047 (46.700)	lr 0.01939
Warmup Train [23][2860/3239]	Time 0.227 (0.232)	Data 0.001 (0.009)	Loss 4.0920 (4.3487)	Top-1 acc 26.562 (24.413)	Top-5 acc 49.219 (46.706)	lr 0.01939
Warmup Train [23][2870/3239]	Time 0.216 (0.232)	Data 0.002 (0.009)	Loss 4.1942 (4.3487)	Top-1 acc 24.609 (24.413)	Top-5 acc 47.266 (46.703)	lr 0.01938
Warmup Train [23][2880/3239]	Time 0.174 (0.232)	Data 0.001 (0.009)	Loss 4.3760 (4.3486)	Top-1 acc 25.391 (24.416)	Top-5 acc 43.359 (46.706)	lr 0.01938
Warmup Train [23][2890/3239]	Time 0.214 (0.232)	Data 0.001 (0.009)	Loss 4.2545 (4.3485)	Top-1 acc 26.172 (24.416)	Top-5 acc 45.703 (46.707)	lr 0.01937
Warmup Train [23][2900/3239]	Time 0.227 (0.232)	Data 0.001 (0.009)	Loss 4.3587 (4.3485)	Top-1 acc 25.000 (24.418)	Top-5 acc 48.438 (46.707)	lr 0.01936
Warmup Train [23][2910/3239]	Time 0.178 (0.232)	Data 0.001 (0.009)	Loss 4.5080 (4.3484)	Top-1 acc 20.703 (24.420)	Top-5 acc 40.625 (46.710)	lr 0.01936
Warmup Train [23][2920/3239]	Time 0.179 (0.232)	Data 0.001 (0.009)	Loss 4.4342 (4.3484)	Top-1 acc 25.781 (24.419)	Top-5 acc 47.656 (46.711)	lr 0.01935
Warmup Train [23][2930/3239]	Time 0.223 (0.232)	Data 0.001 (0.009)	Loss 4.6001 (4.3483)	Top-1 acc 19.141 (24.419)	Top-5 acc 41.797 (46.714)	lr 0.01935
Warmup Train [23][2940/3239]	Time 0.186 (0.232)	Data 0.002 (0.009)	Loss 4.2764 (4.3482)	Top-1 acc 25.781 (24.419)	Top-5 acc 48.828 (46.719)	lr 0.01934
Warmup Train [23][2950/3239]	Time 0.215 (0.232)	Data 0.001 (0.009)	Loss 4.2102 (4.3479)	Top-1 acc 28.516 (24.423)	Top-5 acc 50.391 (46.727)	lr 0.01933
Warmup Train [23][2960/3239]	Time 0.276 (0.232)	Data 0.001 (0.009)	Loss 4.1926 (4.3478)	Top-1 acc 28.516 (24.427)	Top-5 acc 50.391 (46.731)	lr 0.01933
Warmup Train [23][2970/3239]	Time 0.140 (0.232)	Data 0.002 (0.009)	Loss 4.2764 (4.3477)	Top-1 acc 22.266 (24.426)	Top-5 acc 44.922 (46.731)	lr 0.01932
Warmup Train [23][2980/3239]	Time 0.260 (0.232)	Data 0.001 (0.009)	Loss 4.4419 (4.3475)	Top-1 acc 25.000 (24.431)	Top-5 acc 44.922 (46.736)	lr 0.01932
Warmup Train [23][2990/3239]	Time 0.201 (0.232)	Data 0.001 (0.009)	Loss 4.2495 (4.3473)	Top-1 acc 22.266 (24.430)	Top-5 acc 50.391 (46.741)	lr 0.01931
Warmup Train [23][3000/3239]	Time 0.214 (0.232)	Data 0.001 (0.009)	Loss 4.1695 (4.3470)	Top-1 acc 26.953 (24.437)	Top-5 acc 49.609 (46.747)	lr 0.01931
Warmup Train [23][3010/3239]	Time 0.199 (0.232)	Data 0.001 (0.009)	Loss 4.3165 (4.3468)	Top-1 acc 25.000 (24.440)	Top-5 acc 49.609 (46.754)	lr 0.01930
Warmup Train [23][3020/3239]	Time 0.252 (0.232)	Data 0.001 (0.009)	Loss 4.3478 (4.3467)	Top-1 acc 25.391 (24.442)	Top-5 acc 48.828 (46.758)	lr 0.01929
Warmup Train [23][3030/3239]	Time 0.263 (0.232)	Data 0.001 (0.009)	Loss 4.3208 (4.3468)	Top-1 acc 22.266 (24.441)	Top-5 acc 44.531 (46.755)	lr 0.01929
Warmup Train [23][3040/3239]	Time 0.147 (0.232)	Data 0.001 (0.009)	Loss 4.3342 (4.3468)	Top-1 acc 23.047 (24.443)	Top-5 acc 49.609 (46.750)	lr 0.01928
Warmup Train [23][3050/3239]	Time 0.238 (0.232)	Data 0.001 (0.009)	Loss 4.3287 (4.3467)	Top-1 acc 26.953 (24.442)	Top-5 acc 48.828 (46.752)	lr 0.01928
Warmup Train [23][3060/3239]	Time 0.368 (0.232)	Data 0.001 (0.009)	Loss 4.1789 (4.3467)	Top-1 acc 28.125 (24.444)	Top-5 acc 48.828 (46.754)	lr 0.01927
Warmup Train [23][3070/3239]	Time 0.230 (0.232)	Data 0.002 (0.009)	Loss 4.2409 (4.3467)	Top-1 acc 27.734 (24.448)	Top-5 acc 49.219 (46.753)	lr 0.01926
Warmup Train [23][3080/3239]	Time 0.237 (0.232)	Data 0.001 (0.009)	Loss 4.3776 (4.3467)	Top-1 acc 25.391 (24.447)	Top-5 acc 45.312 (46.753)	lr 0.01926
Warmup Train [23][3090/3239]	Time 0.199 (0.232)	Data 0.001 (0.009)	Loss 4.3780 (4.3465)	Top-1 acc 23.438 (24.450)	Top-5 acc 46.094 (46.757)	lr 0.01925
Warmup Train [23][3100/3239]	Time 0.272 (0.232)	Data 0.001 (0.009)	Loss 4.2303 (4.3463)	Top-1 acc 25.781 (24.452)	Top-5 acc 48.828 (46.761)	lr 0.01925
Warmup Train [23][3110/3239]	Time 0.228 (0.232)	Data 0.001 (0.009)	Loss 4.3112 (4.3464)	Top-1 acc 25.391 (24.453)	Top-5 acc 48.438 (46.762)	lr 0.01924
Warmup Train [23][3120/3239]	Time 0.185 (0.232)	Data 0.001 (0.009)	Loss 4.3642 (4.3464)	Top-1 acc 19.922 (24.450)	Top-5 acc 44.531 (46.760)	lr 0.01923
Warmup Train [23][3130/3239]	Time 0.220 (0.232)	Data 0.001 (0.009)	Loss 4.4586 (4.3464)	Top-1 acc 23.828 (24.450)	Top-5 acc 42.578 (46.759)	lr 0.01923
Warmup Train [23][3140/3239]	Time 0.229 (0.232)	Data 0.001 (0.009)	Loss 4.3682 (4.3465)	Top-1 acc 26.172 (24.447)	Top-5 acc 48.828 (46.760)	lr 0.01922
Warmup Train [23][3150/3239]	Time 0.334 (0.232)	Data 0.001 (0.009)	Loss 4.5156 (4.3464)	Top-1 acc 23.828 (24.450)	Top-5 acc 44.141 (46.761)	lr 0.01922
Warmup Train [23][3160/3239]	Time 0.327 (0.232)	Data 0.002 (0.009)	Loss 4.0624 (4.3462)	Top-1 acc 30.859 (24.453)	Top-5 acc 52.344 (46.762)	lr 0.01921
Warmup Train [23][3170/3239]	Time 0.204 (0.232)	Data 0.001 (0.009)	Loss 4.1897 (4.3462)	Top-1 acc 26.172 (24.455)	Top-5 acc 50.781 (46.762)	lr 0.01921
Warmup Train [23][3180/3239]	Time 0.218 (0.231)	Data 0.000 (0.009)	Loss 4.5362 (4.3461)	Top-1 acc 19.531 (24.453)	Top-5 acc 41.797 (46.763)	lr 0.01920
Warmup Train [23][3190/3239]	Time 0.186 (0.231)	Data 0.000 (0.009)	Loss 4.3097 (4.3462)	Top-1 acc 22.656 (24.447)	Top-5 acc 46.875 (46.760)	lr 0.01919
Warmup Train [23][3200/3239]	Time 0.244 (0.231)	Data 0.000 (0.008)	Loss 4.1924 (4.3461)	Top-1 acc 25.781 (24.448)	Top-5 acc 48.828 (46.763)	lr 0.01919
Warmup Train [23][3210/3239]	Time 0.155 (0.231)	Data 0.000 (0.008)	Loss 4.2888 (4.3460)	Top-1 acc 29.688 (24.454)	Top-5 acc 48.047 (46.766)	lr 0.01918
Warmup Train [23][3220/3239]	Time 0.172 (0.231)	Data 0.000 (0.008)	Loss 4.4201 (4.3460)	Top-1 acc 21.484 (24.455)	Top-5 acc 42.188 (46.763)	lr 0.01918
Warmup Train [23][3230/3239]	Time 0.195 (0.231)	Data 0.000 (0.008)	Loss 4.2089 (4.3460)	Top-1 acc 22.656 (24.454)	Top-5 acc 49.609 (46.759)	lr 0.01917
Warmup Train [23][3239/3239]	Time 0.139 (0.231)	Data 0.000 (0.008)	Loss 4.3245 (4.3459)	Top-1 acc 18.519 (24.455)	Top-5 acc 50.617 (46.761)	lr 0.01916
==========Warmup Valid [23/40]	loss 3.326	top-1 acc 31.872	top-5 acc 56.671	Train top-1 24.455	top-5 46.761	flops: 442.4M
Warmup Train [24][0/3239]	Time 17.044 (17.044)	Data 15.906 (15.906)	Loss 4.3692 (4.3692)	Top-1 acc 26.953 (26.953)	Top-5 acc 46.484 (46.484)	lr 0.01916
Warmup Train [24][10/3239]	Time 0.339 (1.970)	Data 0.001 (1.581)	Loss 4.2692 (4.2830)	Top-1 acc 26.953 (25.533)	Top-5 acc 46.484 (47.905)	lr 0.01916
Warmup Train [24][20/3239]	Time 0.191 (1.144)	Data 0.001 (0.829)	Loss 4.2907 (4.2990)	Top-1 acc 24.609 (25.037)	Top-5 acc 45.312 (47.377)	lr 0.01915
Warmup Train [24][30/3239]	Time 0.242 (0.846)	Data 0.002 (0.563)	Loss 4.2721 (4.2991)	Top-1 acc 23.047 (24.912)	Top-5 acc 49.609 (47.429)	lr 0.01915
Warmup Train [24][40/3239]	Time 0.373 (0.704)	Data 0.001 (0.426)	Loss 4.3989 (4.2979)	Top-1 acc 23.438 (25.171)	Top-5 acc 46.484 (47.637)	lr 0.01914
Warmup Train [24][50/3239]	Time 0.319 (0.609)	Data 0.001 (0.343)	Loss 4.1979 (4.3041)	Top-1 acc 26.172 (25.360)	Top-5 acc 48.828 (47.603)	lr 0.01913
Warmup Train [24][60/3239]	Time 0.225 (0.549)	Data 0.002 (0.287)	Loss 4.4961 (4.3074)	Top-1 acc 21.875 (25.038)	Top-5 acc 44.141 (47.528)	lr 0.01913
Warmup Train [24][70/3239]	Time 0.183 (0.504)	Data 0.001 (0.247)	Loss 4.4253 (4.3062)	Top-1 acc 21.875 (25.176)	Top-5 acc 40.625 (47.546)	lr 0.01912
Warmup Train [24][80/3239]	Time 0.227 (0.472)	Data 0.001 (0.217)	Loss 4.2994 (4.3072)	Top-1 acc 25.000 (25.082)	Top-5 acc 44.531 (47.526)	lr 0.01912
Warmup Train [24][90/3239]	Time 0.240 (0.447)	Data 0.001 (0.193)	Loss 4.2953 (4.3020)	Top-1 acc 26.172 (25.069)	Top-5 acc 50.781 (47.673)	lr 0.01911
Warmup Train [24][100/3239]	Time 0.274 (0.424)	Data 0.001 (0.174)	Loss 4.3491 (4.3074)	Top-1 acc 23.438 (24.996)	Top-5 acc 49.219 (47.629)	lr 0.01910
Warmup Train [24][110/3239]	Time 0.236 (0.406)	Data 0.002 (0.159)	Loss 4.3646 (4.3098)	Top-1 acc 21.875 (24.930)	Top-5 acc 42.969 (47.611)	lr 0.01910
Warmup Train [24][120/3239]	Time 0.183 (0.392)	Data 0.002 (0.146)	Loss 4.2577 (4.3124)	Top-1 acc 28.125 (24.942)	Top-5 acc 50.391 (47.517)	lr 0.01909
Warmup Train [24][130/3239]	Time 0.240 (0.380)	Data 0.001 (0.135)	Loss 4.2975 (4.3119)	Top-1 acc 24.219 (24.973)	Top-5 acc 49.609 (47.573)	lr 0.01909
Warmup Train [24][140/3239]	Time 0.257 (0.368)	Data 0.001 (0.126)	Loss 4.2051 (4.3104)	Top-1 acc 29.297 (25.064)	Top-5 acc 48.047 (47.570)	lr 0.01908
Warmup Train [24][150/3239]	Time 0.315 (0.360)	Data 0.001 (0.118)	Loss 4.3138 (4.3099)	Top-1 acc 27.734 (25.031)	Top-5 acc 44.531 (47.514)	lr 0.01908
Warmup Train [24][160/3239]	Time 0.169 (0.351)	Data 0.001 (0.110)	Loss 4.2579 (4.3103)	Top-1 acc 26.172 (25.032)	Top-5 acc 49.219 (47.549)	lr 0.01907
Warmup Train [24][170/3239]	Time 0.216 (0.344)	Data 0.001 (0.104)	Loss 4.5732 (4.3122)	Top-1 acc 23.438 (25.059)	Top-5 acc 42.188 (47.512)	lr 0.01906
Warmup Train [24][180/3239]	Time 0.158 (0.338)	Data 0.001 (0.099)	Loss 4.1008 (4.3134)	Top-1 acc 27.734 (25.013)	Top-5 acc 51.562 (47.520)	lr 0.01906
Warmup Train [24][190/3239]	Time 0.209 (0.332)	Data 0.002 (0.094)	Loss 4.3164 (4.3121)	Top-1 acc 24.219 (25.053)	Top-5 acc 46.484 (47.572)	lr 0.01905
Warmup Train [24][200/3239]	Time 0.178 (0.327)	Data 0.001 (0.089)	Loss 4.4343 (4.3133)	Top-1 acc 25.781 (25.037)	Top-5 acc 43.750 (47.534)	lr 0.01905
Warmup Train [24][210/3239]	Time 0.231 (0.323)	Data 0.001 (0.085)	Loss 4.2534 (4.3128)	Top-1 acc 26.562 (25.046)	Top-5 acc 48.828 (47.510)	lr 0.01904
Warmup Train [24][220/3239]	Time 0.243 (0.318)	Data 0.001 (0.081)	Loss 4.4613 (4.3140)	Top-1 acc 21.094 (25.028)	Top-5 acc 43.750 (47.471)	lr 0.01903
Warmup Train [24][230/3239]	Time 0.229 (0.314)	Data 0.002 (0.078)	Loss 4.5327 (4.3167)	Top-1 acc 21.094 (24.970)	Top-5 acc 38.672 (47.399)	lr 0.01903
Warmup Train [24][240/3239]	Time 0.251 (0.311)	Data 0.001 (0.075)	Loss 4.2349 (4.3157)	Top-1 acc 26.562 (25.011)	Top-5 acc 49.219 (47.390)	lr 0.01902
Warmup Train [24][250/3239]	Time 0.326 (0.308)	Data 0.001 (0.072)	Loss 4.2616 (4.3170)	Top-1 acc 26.953 (25.017)	Top-5 acc 51.562 (47.385)	lr 0.01902
Warmup Train [24][260/3239]	Time 0.201 (0.304)	Data 0.001 (0.069)	Loss 4.3814 (4.3180)	Top-1 acc 24.609 (24.999)	Top-5 acc 44.922 (47.340)	lr 0.01901
Warmup Train [24][270/3239]	Time 0.245 (0.302)	Data 0.001 (0.067)	Loss 4.4688 (4.3167)	Top-1 acc 22.656 (25.023)	Top-5 acc 43.750 (47.378)	lr 0.01900
Warmup Train [24][280/3239]	Time 0.202 (0.298)	Data 0.001 (0.064)	Loss 4.3187 (4.3147)	Top-1 acc 24.609 (25.021)	Top-5 acc 48.047 (47.417)	lr 0.01900
Warmup Train [24][290/3239]	Time 0.188 (0.296)	Data 0.002 (0.062)	Loss 4.3467 (4.3134)	Top-1 acc 25.000 (25.058)	Top-5 acc 45.312 (47.474)	lr 0.01899
Warmup Train [24][300/3239]	Time 0.219 (0.293)	Data 0.002 (0.060)	Loss 4.3535 (4.3135)	Top-1 acc 25.000 (25.070)	Top-5 acc 46.875 (47.442)	lr 0.01899
Warmup Train [24][310/3239]	Time 0.195 (0.291)	Data 0.001 (0.059)	Loss 4.2889 (4.3114)	Top-1 acc 25.000 (25.090)	Top-5 acc 46.484 (47.468)	lr 0.01898
Warmup Train [24][320/3239]	Time 0.202 (0.289)	Data 0.001 (0.057)	Loss 4.2009 (4.3107)	Top-1 acc 28.516 (25.107)	Top-5 acc 48.828 (47.496)	lr 0.01898
Warmup Train [24][330/3239]	Time 0.193 (0.287)	Data 0.001 (0.055)	Loss 4.2573 (4.3101)	Top-1 acc 28.516 (25.100)	Top-5 acc 49.609 (47.524)	lr 0.01897
Warmup Train [24][340/3239]	Time 0.418 (0.285)	Data 0.001 (0.054)	Loss 4.2996 (4.3097)	Top-1 acc 25.781 (25.049)	Top-5 acc 48.438 (47.554)	lr 0.01896
Warmup Train [24][350/3239]	Time 0.223 (0.284)	Data 0.001 (0.052)	Loss 4.3243 (4.3094)	Top-1 acc 23.438 (25.055)	Top-5 acc 46.484 (47.591)	lr 0.01896
Warmup Train [24][360/3239]	Time 0.177 (0.282)	Data 0.001 (0.051)	Loss 4.4821 (4.3099)	Top-1 acc 23.047 (25.029)	Top-5 acc 45.312 (47.565)	lr 0.01895
Warmup Train [24][370/3239]	Time 0.245 (0.281)	Data 0.001 (0.050)	Loss 4.2696 (4.3092)	Top-1 acc 25.781 (25.045)	Top-5 acc 51.172 (47.586)	lr 0.01895
Warmup Train [24][380/3239]	Time 0.276 (0.279)	Data 0.002 (0.048)	Loss 4.1940 (4.3104)	Top-1 acc 30.078 (25.046)	Top-5 acc 50.781 (47.563)	lr 0.01894
Warmup Train [24][390/3239]	Time 0.192 (0.278)	Data 0.001 (0.047)	Loss 4.3857 (4.3119)	Top-1 acc 22.266 (25.029)	Top-5 acc 46.484 (47.531)	lr 0.01893
Warmup Train [24][400/3239]	Time 0.251 (0.277)	Data 0.001 (0.046)	Loss 4.1845 (4.3129)	Top-1 acc 25.391 (25.039)	Top-5 acc 47.656 (47.510)	lr 0.01893
Warmup Train [24][410/3239]	Time 0.227 (0.276)	Data 0.001 (0.045)	Loss 4.3393 (4.3128)	Top-1 acc 24.219 (25.030)	Top-5 acc 44.922 (47.503)	lr 0.01892
Warmup Train [24][420/3239]	Time 0.303 (0.275)	Data 0.001 (0.044)	Loss 4.3452 (4.3125)	Top-1 acc 20.703 (25.019)	Top-5 acc 49.219 (47.513)	lr 0.01892
Warmup Train [24][430/3239]	Time 0.195 (0.273)	Data 0.001 (0.043)	Loss 4.4331 (4.3139)	Top-1 acc 24.219 (25.014)	Top-5 acc 44.141 (47.488)	lr 0.01891
Warmup Train [24][440/3239]	Time 0.205 (0.272)	Data 0.002 (0.042)	Loss 4.2163 (4.3144)	Top-1 acc 27.344 (24.987)	Top-5 acc 52.344 (47.483)	lr 0.01890
Warmup Train [24][450/3239]	Time 0.191 (0.271)	Data 0.001 (0.041)	Loss 4.4538 (4.3139)	Top-1 acc 21.484 (24.990)	Top-5 acc 41.016 (47.463)	lr 0.01890
Warmup Train [24][460/3239]	Time 0.200 (0.269)	Data 0.001 (0.040)	Loss 4.3498 (4.3128)	Top-1 acc 21.094 (24.987)	Top-5 acc 46.875 (47.477)	lr 0.01889
Warmup Train [24][470/3239]	Time 0.177 (0.269)	Data 0.001 (0.039)	Loss 4.3439 (4.3134)	Top-1 acc 26.172 (24.985)	Top-5 acc 46.484 (47.482)	lr 0.01889
Warmup Train [24][480/3239]	Time 0.209 (0.268)	Data 0.001 (0.039)	Loss 4.3200 (4.3119)	Top-1 acc 20.703 (25.009)	Top-5 acc 50.000 (47.526)	lr 0.01888
Warmup Train [24][490/3239]	Time 0.201 (0.266)	Data 0.001 (0.038)	Loss 4.3495 (4.3116)	Top-1 acc 26.172 (25.009)	Top-5 acc 49.219 (47.519)	lr 0.01888
Warmup Train [24][500/3239]	Time 0.196 (0.265)	Data 0.001 (0.037)	Loss 4.3311 (4.3116)	Top-1 acc 24.609 (25.019)	Top-5 acc 46.094 (47.516)	lr 0.01887
Warmup Train [24][510/3239]	Time 0.166 (0.264)	Data 0.001 (0.036)	Loss 4.1369 (4.3117)	Top-1 acc 23.828 (25.011)	Top-5 acc 53.125 (47.503)	lr 0.01886
Warmup Train [24][520/3239]	Time 0.299 (0.264)	Data 0.001 (0.036)	Loss 4.3812 (4.3113)	Top-1 acc 24.609 (25.026)	Top-5 acc 44.531 (47.515)	lr 0.01886
Warmup Train [24][530/3239]	Time 0.143 (0.263)	Data 0.001 (0.035)	Loss 4.2261 (4.3110)	Top-1 acc 30.078 (25.029)	Top-5 acc 50.781 (47.500)	lr 0.01885
Warmup Train [24][540/3239]	Time 0.187 (0.262)	Data 0.001 (0.035)	Loss 4.1618 (4.3106)	Top-1 acc 26.562 (25.042)	Top-5 acc 51.562 (47.520)	lr 0.01885
Warmup Train [24][550/3239]	Time 0.226 (0.261)	Data 0.002 (0.034)	Loss 4.3723 (4.3119)	Top-1 acc 23.828 (25.009)	Top-5 acc 50.000 (47.486)	lr 0.01884
Warmup Train [24][560/3239]	Time 0.234 (0.261)	Data 0.001 (0.034)	Loss 4.2027 (4.3117)	Top-1 acc 27.344 (24.990)	Top-5 acc 50.391 (47.486)	lr 0.01883
Warmup Train [24][570/3239]	Time 0.214 (0.260)	Data 0.002 (0.033)	Loss 4.2268 (4.3113)	Top-1 acc 28.906 (25.009)	Top-5 acc 48.047 (47.492)	lr 0.01883
Warmup Train [24][580/3239]	Time 0.208 (0.259)	Data 0.001 (0.032)	Loss 4.2842 (4.3121)	Top-1 acc 25.391 (25.002)	Top-5 acc 47.656 (47.479)	lr 0.01882
Warmup Train [24][590/3239]	Time 0.189 (0.259)	Data 0.001 (0.032)	Loss 4.1686 (4.3111)	Top-1 acc 27.344 (25.026)	Top-5 acc 53.125 (47.516)	lr 0.01882
Warmup Train [24][600/3239]	Time 0.202 (0.258)	Data 0.001 (0.031)	Loss 4.4792 (4.3107)	Top-1 acc 21.875 (25.043)	Top-5 acc 44.531 (47.524)	lr 0.01881
Warmup Train [24][610/3239]	Time 0.351 (0.258)	Data 0.001 (0.031)	Loss 4.4564 (4.3099)	Top-1 acc 25.781 (25.065)	Top-5 acc 47.266 (47.551)	lr 0.01881
Warmup Train [24][620/3239]	Time 0.206 (0.257)	Data 0.001 (0.031)	Loss 4.3748 (4.3095)	Top-1 acc 23.828 (25.083)	Top-5 acc 45.703 (47.555)	lr 0.01880
Warmup Train [24][630/3239]	Time 0.237 (0.256)	Data 0.001 (0.030)	Loss 4.2028 (4.3092)	Top-1 acc 24.219 (25.095)	Top-5 acc 47.656 (47.543)	lr 0.01879
Warmup Train [24][640/3239]	Time 0.297 (0.256)	Data 0.001 (0.030)	Loss 4.3405 (4.3097)	Top-1 acc 24.609 (25.095)	Top-5 acc 47.266 (47.530)	lr 0.01879
Warmup Train [24][650/3239]	Time 0.176 (0.256)	Data 0.001 (0.029)	Loss 4.2466 (4.3090)	Top-1 acc 25.391 (25.099)	Top-5 acc 51.562 (47.550)	lr 0.01878
Warmup Train [24][660/3239]	Time 0.229 (0.255)	Data 0.001 (0.029)	Loss 4.4191 (4.3098)	Top-1 acc 26.172 (25.078)	Top-5 acc 44.922 (47.539)	lr 0.01878
Warmup Train [24][670/3239]	Time 0.240 (0.255)	Data 0.001 (0.028)	Loss 4.3615 (4.3088)	Top-1 acc 22.266 (25.076)	Top-5 acc 48.047 (47.554)	lr 0.01877
Warmup Train [24][680/3239]	Time 0.207 (0.254)	Data 0.001 (0.028)	Loss 4.2777 (4.3086)	Top-1 acc 23.828 (25.067)	Top-5 acc 49.219 (47.568)	lr 0.01876
Warmup Train [24][690/3239]	Time 0.235 (0.254)	Data 0.002 (0.028)	Loss 4.3270 (4.3094)	Top-1 acc 25.781 (25.047)	Top-5 acc 46.875 (47.549)	lr 0.01876
Warmup Train [24][700/3239]	Time 0.214 (0.253)	Data 0.001 (0.027)	Loss 4.3682 (4.3104)	Top-1 acc 25.781 (25.056)	Top-5 acc 44.922 (47.538)	lr 0.01875
Warmup Train [24][710/3239]	Time 0.247 (0.253)	Data 0.001 (0.027)	Loss 4.3351 (4.3107)	Top-1 acc 25.000 (25.059)	Top-5 acc 46.094 (47.528)	lr 0.01875
Warmup Train [24][720/3239]	Time 0.172 (0.252)	Data 0.001 (0.027)	Loss 4.4012 (4.3112)	Top-1 acc 24.219 (25.060)	Top-5 acc 49.609 (47.528)	lr 0.01874
Warmup Train [24][730/3239]	Time 0.233 (0.252)	Data 0.001 (0.026)	Loss 4.4655 (4.3121)	Top-1 acc 23.828 (25.046)	Top-5 acc 46.484 (47.514)	lr 0.01873
Warmup Train [24][740/3239]	Time 0.192 (0.252)	Data 0.001 (0.026)	Loss 4.4181 (4.3130)	Top-1 acc 24.219 (25.031)	Top-5 acc 45.312 (47.491)	lr 0.01873
Warmup Train [24][750/3239]	Time 0.162 (0.251)	Data 0.001 (0.026)	Loss 4.2037 (4.3122)	Top-1 acc 26.562 (25.049)	Top-5 acc 49.609 (47.516)	lr 0.01872
Warmup Train [24][760/3239]	Time 0.222 (0.251)	Data 0.002 (0.025)	Loss 4.3533 (4.3129)	Top-1 acc 23.828 (25.033)	Top-5 acc 48.438 (47.505)	lr 0.01872
Warmup Train [24][770/3239]	Time 0.219 (0.250)	Data 0.001 (0.025)	Loss 4.4146 (4.3131)	Top-1 acc 21.484 (25.047)	Top-5 acc 47.266 (47.491)	lr 0.01871
Warmup Train [24][780/3239]	Time 0.250 (0.250)	Data 0.001 (0.025)	Loss 4.3479 (4.3136)	Top-1 acc 20.703 (25.038)	Top-5 acc 45.312 (47.486)	lr 0.01871
Warmup Train [24][790/3239]	Time 0.218 (0.250)	Data 0.001 (0.024)	Loss 4.1929 (4.3129)	Top-1 acc 26.562 (25.038)	Top-5 acc 50.000 (47.507)	lr 0.01870
Warmup Train [24][800/3239]	Time 0.227 (0.249)	Data 0.001 (0.024)	Loss 4.3313 (4.3128)	Top-1 acc 23.828 (25.036)	Top-5 acc 46.484 (47.519)	lr 0.01869
Warmup Train [24][810/3239]	Time 0.273 (0.249)	Data 0.027 (0.024)	Loss 4.3066 (4.3133)	Top-1 acc 27.734 (25.033)	Top-5 acc 48.828 (47.509)	lr 0.01869
Warmup Train [24][820/3239]	Time 0.248 (0.248)	Data 0.001 (0.024)	Loss 4.2167 (4.3132)	Top-1 acc 26.562 (25.036)	Top-5 acc 47.656 (47.503)	lr 0.01868
Warmup Train [24][830/3239]	Time 0.234 (0.248)	Data 0.001 (0.023)	Loss 4.2418 (4.3124)	Top-1 acc 23.438 (25.043)	Top-5 acc 51.953 (47.532)	lr 0.01868
Warmup Train [24][840/3239]	Time 0.202 (0.248)	Data 0.001 (0.023)	Loss 4.3322 (4.3118)	Top-1 acc 26.953 (25.052)	Top-5 acc 45.703 (47.534)	lr 0.01867
Warmup Train [24][850/3239]	Time 0.212 (0.248)	Data 0.002 (0.023)	Loss 4.2486 (4.3118)	Top-1 acc 27.734 (25.064)	Top-5 acc 49.219 (47.539)	lr 0.01866
Warmup Train [24][860/3239]	Time 0.206 (0.247)	Data 0.001 (0.023)	Loss 4.3219 (4.3117)	Top-1 acc 21.484 (25.049)	Top-5 acc 45.703 (47.538)	lr 0.01866
Warmup Train [24][870/3239]	Time 0.226 (0.247)	Data 0.001 (0.023)	Loss 4.0781 (4.3115)	Top-1 acc 30.469 (25.060)	Top-5 acc 53.516 (47.537)	lr 0.01865
Warmup Train [24][880/3239]	Time 0.214 (0.247)	Data 0.001 (0.022)	Loss 4.4348 (4.3112)	Top-1 acc 23.828 (25.072)	Top-5 acc 41.016 (47.540)	lr 0.01865
Warmup Train [24][890/3239]	Time 0.190 (0.247)	Data 0.001 (0.022)	Loss 4.1653 (4.3105)	Top-1 acc 26.562 (25.084)	Top-5 acc 48.828 (47.552)	lr 0.01864
Warmup Train [24][900/3239]	Time 0.255 (0.247)	Data 0.002 (0.022)	Loss 4.5353 (4.3114)	Top-1 acc 24.609 (25.059)	Top-5 acc 45.312 (47.528)	lr 0.01863
Warmup Train [24][910/3239]	Time 0.194 (0.246)	Data 0.001 (0.022)	Loss 4.2498 (4.3112)	Top-1 acc 24.219 (25.061)	Top-5 acc 46.094 (47.533)	lr 0.01863
Warmup Train [24][920/3239]	Time 0.186 (0.246)	Data 0.001 (0.021)	Loss 4.5043 (4.3112)	Top-1 acc 23.438 (25.059)	Top-5 acc 44.531 (47.540)	lr 0.01862
Warmup Train [24][930/3239]	Time 0.236 (0.246)	Data 0.001 (0.021)	Loss 4.4161 (4.3118)	Top-1 acc 23.438 (25.040)	Top-5 acc 45.312 (47.525)	lr 0.01862
Warmup Train [24][940/3239]	Time 0.227 (0.246)	Data 0.001 (0.021)	Loss 4.4557 (4.3112)	Top-1 acc 20.312 (25.052)	Top-5 acc 43.359 (47.545)	lr 0.01861
Warmup Train [24][950/3239]	Time 0.179 (0.245)	Data 0.001 (0.021)	Loss 4.3466 (4.3114)	Top-1 acc 25.781 (25.064)	Top-5 acc 46.094 (47.531)	lr 0.01861
Warmup Train [24][960/3239]	Time 0.181 (0.245)	Data 0.001 (0.021)	Loss 4.1444 (4.3113)	Top-1 acc 28.516 (25.058)	Top-5 acc 56.250 (47.535)	lr 0.01860
Warmup Train [24][970/3239]	Time 0.171 (0.245)	Data 0.001 (0.021)	Loss 4.0853 (4.3113)	Top-1 acc 30.469 (25.064)	Top-5 acc 54.297 (47.540)	lr 0.01859
Warmup Train [24][980/3239]	Time 0.233 (0.245)	Data 0.001 (0.020)	Loss 4.2801 (4.3112)	Top-1 acc 23.047 (25.060)	Top-5 acc 48.828 (47.544)	lr 0.01859
Warmup Train [24][990/3239]	Time 0.175 (0.244)	Data 0.001 (0.020)	Loss 4.2182 (4.3108)	Top-1 acc 27.734 (25.070)	Top-5 acc 50.781 (47.544)	lr 0.01858
Warmup Train [24][1000/3239]	Time 0.250 (0.244)	Data 0.002 (0.020)	Loss 4.2834 (4.3110)	Top-1 acc 26.562 (25.077)	Top-5 acc 50.000 (47.546)	lr 0.01858
Warmup Train [24][1010/3239]	Time 0.324 (0.244)	Data 0.002 (0.020)	Loss 4.1899 (4.3109)	Top-1 acc 26.953 (25.070)	Top-5 acc 53.906 (47.559)	lr 0.01857
Warmup Train [24][1020/3239]	Time 0.183 (0.244)	Data 0.001 (0.020)	Loss 4.2669 (4.3108)	Top-1 acc 23.438 (25.067)	Top-5 acc 51.172 (47.569)	lr 0.01856
Warmup Train [24][1030/3239]	Time 0.189 (0.244)	Data 0.001 (0.019)	Loss 4.3581 (4.3108)	Top-1 acc 22.266 (25.063)	Top-5 acc 46.484 (47.571)	lr 0.01856
Warmup Train [24][1040/3239]	Time 0.239 (0.243)	Data 0.002 (0.019)	Loss 4.3778 (4.3107)	Top-1 acc 26.172 (25.062)	Top-5 acc 47.266 (47.560)	lr 0.01855
Warmup Train [24][1050/3239]	Time 0.174 (0.243)	Data 0.002 (0.019)	Loss 4.2557 (4.3104)	Top-1 acc 24.219 (25.062)	Top-5 acc 51.172 (47.570)	lr 0.01855
Warmup Train [24][1060/3239]	Time 0.258 (0.243)	Data 0.001 (0.019)	Loss 4.2174 (4.3103)	Top-1 acc 29.688 (25.062)	Top-5 acc 51.172 (47.572)	lr 0.01854
Warmup Train [24][1070/3239]	Time 0.263 (0.243)	Data 0.001 (0.019)	Loss 4.2968 (4.3098)	Top-1 acc 22.656 (25.061)	Top-5 acc 46.094 (47.581)	lr 0.01854
Warmup Train [24][1080/3239]	Time 0.221 (0.243)	Data 0.002 (0.019)	Loss 4.1634 (4.3097)	Top-1 acc 30.078 (25.061)	Top-5 acc 51.172 (47.584)	lr 0.01853
Warmup Train [24][1090/3239]	Time 0.229 (0.243)	Data 0.001 (0.019)	Loss 4.4583 (4.3098)	Top-1 acc 22.266 (25.057)	Top-5 acc 44.141 (47.577)	lr 0.01852
Warmup Train [24][1100/3239]	Time 0.304 (0.243)	Data 0.001 (0.018)	Loss 4.6032 (4.3098)	Top-1 acc 22.266 (25.061)	Top-5 acc 41.016 (47.580)	lr 0.01852
Warmup Train [24][1110/3239]	Time 0.248 (0.242)	Data 0.001 (0.018)	Loss 4.3303 (4.3092)	Top-1 acc 25.781 (25.069)	Top-5 acc 48.047 (47.594)	lr 0.01851
Warmup Train [24][1120/3239]	Time 0.198 (0.242)	Data 0.001 (0.018)	Loss 4.2245 (4.3095)	Top-1 acc 25.391 (25.062)	Top-5 acc 47.266 (47.577)	lr 0.01851
Warmup Train [24][1130/3239]	Time 0.228 (0.242)	Data 0.001 (0.018)	Loss 4.3619 (4.3094)	Top-1 acc 24.219 (25.069)	Top-5 acc 46.875 (47.580)	lr 0.01850
Warmup Train [24][1140/3239]	Time 0.195 (0.242)	Data 0.001 (0.018)	Loss 4.1853 (4.3094)	Top-1 acc 24.609 (25.073)	Top-5 acc 49.219 (47.583)	lr 0.01849
Warmup Train [24][1150/3239]	Time 0.222 (0.242)	Data 0.001 (0.018)	Loss 4.4219 (4.3098)	Top-1 acc 24.609 (25.074)	Top-5 acc 45.312 (47.578)	lr 0.01849
Warmup Train [24][1160/3239]	Time 0.194 (0.241)	Data 0.001 (0.018)	Loss 4.3779 (4.3097)	Top-1 acc 23.828 (25.072)	Top-5 acc 44.922 (47.582)	lr 0.01848
Warmup Train [24][1170/3239]	Time 0.204 (0.241)	Data 0.001 (0.017)	Loss 4.1530 (4.3094)	Top-1 acc 26.953 (25.081)	Top-5 acc 51.172 (47.597)	lr 0.01848
Warmup Train [24][1180/3239]	Time 0.208 (0.241)	Data 0.001 (0.017)	Loss 4.2182 (4.3092)	Top-1 acc 24.219 (25.080)	Top-5 acc 47.266 (47.604)	lr 0.01847
Warmup Train [24][1190/3239]	Time 0.239 (0.241)	Data 0.001 (0.017)	Loss 4.3071 (4.3092)	Top-1 acc 22.656 (25.091)	Top-5 acc 47.656 (47.605)	lr 0.01847
Warmup Train [24][1200/3239]	Time 0.321 (0.241)	Data 0.001 (0.017)	Loss 4.2543 (4.3095)	Top-1 acc 29.688 (25.091)	Top-5 acc 48.828 (47.600)	lr 0.01846
Warmup Train [24][1210/3239]	Time 0.212 (0.241)	Data 0.001 (0.017)	Loss 4.3692 (4.3099)	Top-1 acc 22.266 (25.078)	Top-5 acc 45.312 (47.590)	lr 0.01845
Warmup Train [24][1220/3239]	Time 0.175 (0.241)	Data 0.001 (0.017)	Loss 4.4253 (4.3099)	Top-1 acc 22.266 (25.076)	Top-5 acc 43.359 (47.586)	lr 0.01845
Warmup Train [24][1230/3239]	Time 0.284 (0.240)	Data 0.001 (0.017)	Loss 4.4086 (4.3099)	Top-1 acc 21.875 (25.075)	Top-5 acc 45.312 (47.579)	lr 0.01844
Warmup Train [24][1240/3239]	Time 0.194 (0.240)	Data 0.002 (0.017)	Loss 4.2388 (4.3099)	Top-1 acc 26.172 (25.077)	Top-5 acc 48.438 (47.577)	lr 0.01844
Warmup Train [24][1250/3239]	Time 0.143 (0.240)	Data 0.002 (0.017)	Loss 4.4136 (4.3102)	Top-1 acc 23.438 (25.070)	Top-5 acc 44.531 (47.581)	lr 0.01843
Warmup Train [24][1260/3239]	Time 0.232 (0.240)	Data 0.001 (0.016)	Loss 4.3728 (4.3102)	Top-1 acc 23.047 (25.064)	Top-5 acc 44.531 (47.577)	lr 0.01842
Warmup Train [24][1270/3239]	Time 0.193 (0.240)	Data 0.001 (0.016)	Loss 4.3993 (4.3099)	Top-1 acc 20.703 (25.060)	Top-5 acc 45.703 (47.582)	lr 0.01842
Warmup Train [24][1280/3239]	Time 0.202 (0.240)	Data 0.001 (0.016)	Loss 4.2475 (4.3100)	Top-1 acc 26.172 (25.062)	Top-5 acc 53.906 (47.580)	lr 0.01841
Warmup Train [24][1290/3239]	Time 0.244 (0.240)	Data 0.001 (0.016)	Loss 4.3835 (4.3098)	Top-1 acc 21.094 (25.064)	Top-5 acc 46.094 (47.588)	lr 0.01841
Warmup Train [24][1300/3239]	Time 0.325 (0.240)	Data 0.002 (0.016)	Loss 4.5193 (4.3102)	Top-1 acc 25.781 (25.065)	Top-5 acc 40.234 (47.569)	lr 0.01840
Warmup Train [24][1310/3239]	Time 0.209 (0.240)	Data 0.002 (0.016)	Loss 4.3053 (4.3104)	Top-1 acc 25.391 (25.062)	Top-5 acc 50.391 (47.567)	lr 0.01839
Warmup Train [24][1320/3239]	Time 0.233 (0.239)	Data 0.002 (0.016)	Loss 4.2937 (4.3098)	Top-1 acc 27.734 (25.075)	Top-5 acc 46.094 (47.586)	lr 0.01839
Warmup Train [24][1330/3239]	Time 0.215 (0.239)	Data 0.001 (0.016)	Loss 4.3524 (4.3097)	Top-1 acc 27.734 (25.088)	Top-5 acc 48.047 (47.590)	lr 0.01838
Warmup Train [24][1340/3239]	Time 0.207 (0.239)	Data 0.001 (0.016)	Loss 4.5300 (4.3095)	Top-1 acc 20.312 (25.103)	Top-5 acc 40.234 (47.603)	lr 0.01838
Warmup Train [24][1350/3239]	Time 0.209 (0.239)	Data 0.001 (0.015)	Loss 4.2016 (4.3093)	Top-1 acc 26.562 (25.107)	Top-5 acc 52.734 (47.609)	lr 0.01837
Warmup Train [24][1360/3239]	Time 0.211 (0.239)	Data 0.001 (0.015)	Loss 4.1985 (4.3089)	Top-1 acc 26.172 (25.118)	Top-5 acc 48.828 (47.617)	lr 0.01837
Warmup Train [24][1370/3239]	Time 0.209 (0.239)	Data 0.002 (0.015)	Loss 4.1252 (4.3086)	Top-1 acc 27.344 (25.111)	Top-5 acc 49.609 (47.623)	lr 0.01836
Warmup Train [24][1380/3239]	Time 0.205 (0.239)	Data 0.001 (0.015)	Loss 4.2677 (4.3089)	Top-1 acc 25.391 (25.106)	Top-5 acc 47.266 (47.617)	lr 0.01835
Warmup Train [24][1390/3239]	Time 0.234 (0.239)	Data 0.001 (0.015)	Loss 4.4130 (4.3096)	Top-1 acc 23.438 (25.099)	Top-5 acc 47.656 (47.604)	lr 0.01835
Warmup Train [24][1400/3239]	Time 0.374 (0.239)	Data 0.001 (0.015)	Loss 4.3495 (4.3099)	Top-1 acc 21.094 (25.088)	Top-5 acc 46.484 (47.597)	lr 0.01834
Warmup Train [24][1410/3239]	Time 0.259 (0.239)	Data 0.002 (0.015)	Loss 4.2550 (4.3098)	Top-1 acc 26.172 (25.097)	Top-5 acc 49.219 (47.601)	lr 0.01834
Warmup Train [24][1420/3239]	Time 0.187 (0.239)	Data 0.001 (0.015)	Loss 4.1898 (4.3095)	Top-1 acc 27.734 (25.106)	Top-5 acc 53.125 (47.609)	lr 0.01833
Warmup Train [24][1430/3239]	Time 0.189 (0.238)	Data 0.002 (0.015)	Loss 4.3245 (4.3093)	Top-1 acc 25.000 (25.114)	Top-5 acc 46.484 (47.609)	lr 0.01832
Warmup Train [24][1440/3239]	Time 0.242 (0.238)	Data 0.002 (0.015)	Loss 4.2350 (4.3093)	Top-1 acc 27.734 (25.114)	Top-5 acc 48.438 (47.613)	lr 0.01832
Warmup Train [24][1450/3239]	Time 0.219 (0.238)	Data 0.001 (0.015)	Loss 4.1457 (4.3088)	Top-1 acc 28.906 (25.121)	Top-5 acc 49.609 (47.620)	lr 0.01831
Warmup Train [24][1460/3239]	Time 0.200 (0.238)	Data 0.001 (0.015)	Loss 4.1631 (4.3087)	Top-1 acc 32.812 (25.130)	Top-5 acc 49.219 (47.622)	lr 0.01831
Warmup Train [24][1470/3239]	Time 0.191 (0.238)	Data 0.003 (0.014)	Loss 4.5071 (4.3091)	Top-1 acc 21.094 (25.128)	Top-5 acc 46.094 (47.616)	lr 0.01830
Warmup Train [24][1480/3239]	Time 0.180 (0.238)	Data 0.002 (0.014)	Loss 4.2348 (4.3091)	Top-1 acc 24.219 (25.129)	Top-5 acc 46.875 (47.620)	lr 0.01830
Warmup Train [24][1490/3239]	Time 0.327 (0.238)	Data 0.001 (0.014)	Loss 4.3038 (4.3092)	Top-1 acc 28.125 (25.135)	Top-5 acc 47.266 (47.617)	lr 0.01829
Warmup Train [24][1500/3239]	Time 0.288 (0.238)	Data 0.001 (0.014)	Loss 4.2980 (4.3096)	Top-1 acc 23.047 (25.128)	Top-5 acc 49.219 (47.607)	lr 0.01828
Warmup Train [24][1510/3239]	Time 0.143 (0.238)	Data 0.001 (0.014)	Loss 4.2933 (4.3096)	Top-1 acc 22.656 (25.128)	Top-5 acc 49.219 (47.609)	lr 0.01828
Warmup Train [24][1520/3239]	Time 0.193 (0.238)	Data 0.001 (0.014)	Loss 4.3158 (4.3090)	Top-1 acc 25.391 (25.137)	Top-5 acc 48.047 (47.619)	lr 0.01827
Warmup Train [24][1530/3239]	Time 0.228 (0.237)	Data 0.001 (0.014)	Loss 4.3510 (4.3091)	Top-1 acc 22.656 (25.134)	Top-5 acc 47.656 (47.616)	lr 0.01827
Warmup Train [24][1540/3239]	Time 0.210 (0.237)	Data 0.002 (0.014)	Loss 4.4283 (4.3095)	Top-1 acc 24.609 (25.132)	Top-5 acc 46.875 (47.610)	lr 0.01826
Warmup Train [24][1550/3239]	Time 0.218 (0.237)	Data 0.001 (0.014)	Loss 4.2115 (4.3093)	Top-1 acc 25.781 (25.141)	Top-5 acc 49.609 (47.616)	lr 0.01825
Warmup Train [24][1560/3239]	Time 0.213 (0.237)	Data 0.001 (0.014)	Loss 4.2713 (4.3090)	Top-1 acc 25.000 (25.148)	Top-5 acc 44.531 (47.618)	lr 0.01825
Warmup Train [24][1570/3239]	Time 0.268 (0.237)	Data 0.001 (0.014)	Loss 4.1019 (4.3085)	Top-1 acc 29.297 (25.163)	Top-5 acc 53.125 (47.627)	lr 0.01824
Warmup Train [24][1580/3239]	Time 0.214 (0.237)	Data 0.002 (0.014)	Loss 4.4536 (4.3084)	Top-1 acc 20.312 (25.167)	Top-5 acc 46.484 (47.629)	lr 0.01824
Warmup Train [24][1590/3239]	Time 0.230 (0.237)	Data 0.001 (0.014)	Loss 4.3495 (4.3083)	Top-1 acc 23.047 (25.167)	Top-5 acc 47.266 (47.631)	lr 0.01823
Warmup Train [24][1600/3239]	Time 0.323 (0.237)	Data 0.002 (0.014)	Loss 4.3944 (4.3080)	Top-1 acc 23.828 (25.174)	Top-5 acc 44.922 (47.636)	lr 0.01823
Warmup Train [24][1610/3239]	Time 0.244 (0.237)	Data 0.001 (0.013)	Loss 4.4514 (4.3082)	Top-1 acc 23.438 (25.175)	Top-5 acc 46.094 (47.630)	lr 0.01822
Warmup Train [24][1620/3239]	Time 0.183 (0.237)	Data 0.001 (0.013)	Loss 4.4665 (4.3080)	Top-1 acc 21.094 (25.174)	Top-5 acc 43.750 (47.638)	lr 0.01821
Warmup Train [24][1630/3239]	Time 0.267 (0.237)	Data 0.001 (0.013)	Loss 4.1146 (4.3078)	Top-1 acc 27.734 (25.175)	Top-5 acc 48.438 (47.645)	lr 0.01821
Warmup Train [24][1640/3239]	Time 0.215 (0.237)	Data 0.002 (0.013)	Loss 4.4950 (4.3077)	Top-1 acc 21.875 (25.184)	Top-5 acc 41.797 (47.650)	lr 0.01820
Warmup Train [24][1650/3239]	Time 0.258 (0.237)	Data 0.002 (0.013)	Loss 4.2985 (4.3073)	Top-1 acc 21.875 (25.192)	Top-5 acc 47.656 (47.661)	lr 0.01820
Warmup Train [24][1660/3239]	Time 0.146 (0.237)	Data 0.002 (0.013)	Loss 4.3894 (4.3073)	Top-1 acc 25.781 (25.190)	Top-5 acc 46.875 (47.660)	lr 0.01819
Warmup Train [24][1670/3239]	Time 0.212 (0.237)	Data 0.001 (0.013)	Loss 4.2579 (4.3071)	Top-1 acc 20.703 (25.187)	Top-5 acc 51.172 (47.671)	lr 0.01818
Warmup Train [24][1680/3239]	Time 0.143 (0.236)	Data 0.002 (0.013)	Loss 4.1185 (4.3070)	Top-1 acc 28.906 (25.183)	Top-5 acc 50.000 (47.673)	lr 0.01818
Warmup Train [24][1690/3239]	Time 0.353 (0.237)	Data 0.001 (0.013)	Loss 4.3211 (4.3072)	Top-1 acc 23.047 (25.181)	Top-5 acc 46.094 (47.671)	lr 0.01817
Warmup Train [24][1700/3239]	Time 0.238 (0.236)	Data 0.001 (0.013)	Loss 4.3614 (4.3072)	Top-1 acc 29.688 (25.176)	Top-5 acc 46.484 (47.662)	lr 0.01817
Warmup Train [24][1710/3239]	Time 0.199 (0.236)	Data 0.001 (0.013)	Loss 4.0253 (4.3065)	Top-1 acc 30.859 (25.189)	Top-5 acc 53.516 (47.678)	lr 0.01816
Warmup Train [24][1720/3239]	Time 0.265 (0.236)	Data 0.002 (0.013)	Loss 4.1011 (4.3064)	Top-1 acc 30.469 (25.196)	Top-5 acc 53.125 (47.679)	lr 0.01816
Warmup Train [24][1730/3239]	Time 0.155 (0.236)	Data 0.003 (0.013)	Loss 4.5490 (4.3064)	Top-1 acc 18.750 (25.201)	Top-5 acc 40.625 (47.681)	lr 0.01815
Warmup Train [24][1740/3239]	Time 0.228 (0.236)	Data 0.001 (0.013)	Loss 4.3909 (4.3067)	Top-1 acc 18.359 (25.195)	Top-5 acc 44.141 (47.673)	lr 0.01814
Warmup Train [24][1750/3239]	Time 0.195 (0.236)	Data 0.001 (0.013)	Loss 4.3113 (4.3067)	Top-1 acc 20.703 (25.191)	Top-5 acc 45.703 (47.671)	lr 0.01814
Warmup Train [24][1760/3239]	Time 0.193 (0.236)	Data 0.001 (0.013)	Loss 4.4551 (4.3066)	Top-1 acc 23.438 (25.191)	Top-5 acc 44.141 (47.677)	lr 0.01813
Warmup Train [24][1770/3239]	Time 0.192 (0.236)	Data 0.001 (0.013)	Loss 4.3272 (4.3064)	Top-1 acc 21.484 (25.189)	Top-5 acc 46.875 (47.684)	lr 0.01813
Warmup Train [24][1780/3239]	Time 0.196 (0.236)	Data 0.002 (0.012)	Loss 4.2745 (4.3065)	Top-1 acc 25.391 (25.189)	Top-5 acc 49.609 (47.687)	lr 0.01812
Warmup Train [24][1790/3239]	Time 0.305 (0.236)	Data 0.001 (0.012)	Loss 4.5197 (4.3068)	Top-1 acc 18.750 (25.179)	Top-5 acc 41.406 (47.680)	lr 0.01811
Warmup Train [24][1800/3239]	Time 0.216 (0.236)	Data 0.002 (0.012)	Loss 4.1697 (4.3064)	Top-1 acc 27.344 (25.185)	Top-5 acc 47.656 (47.686)	lr 0.01811
Warmup Train [24][1810/3239]	Time 0.228 (0.236)	Data 0.001 (0.012)	Loss 4.4202 (4.3066)	Top-1 acc 20.312 (25.175)	Top-5 acc 44.531 (47.678)	lr 0.01810
Warmup Train [24][1820/3239]	Time 0.198 (0.236)	Data 0.001 (0.012)	Loss 4.3097 (4.3067)	Top-1 acc 25.781 (25.171)	Top-5 acc 48.828 (47.673)	lr 0.01810
Warmup Train [24][1830/3239]	Time 0.204 (0.236)	Data 0.001 (0.012)	Loss 4.3686 (4.3065)	Top-1 acc 25.000 (25.176)	Top-5 acc 44.531 (47.674)	lr 0.01809
Warmup Train [24][1840/3239]	Time 0.191 (0.235)	Data 0.002 (0.012)	Loss 4.3370 (4.3067)	Top-1 acc 27.344 (25.176)	Top-5 acc 48.438 (47.673)	lr 0.01809
Warmup Train [24][1850/3239]	Time 0.172 (0.235)	Data 0.002 (0.012)	Loss 4.2458 (4.3064)	Top-1 acc 22.656 (25.183)	Top-5 acc 44.922 (47.672)	lr 0.01808
Warmup Train [24][1860/3239]	Time 0.196 (0.235)	Data 0.002 (0.012)	Loss 4.5266 (4.3066)	Top-1 acc 17.969 (25.173)	Top-5 acc 40.625 (47.664)	lr 0.01807
Warmup Train [24][1870/3239]	Time 0.238 (0.235)	Data 0.001 (0.012)	Loss 4.1977 (4.3065)	Top-1 acc 28.125 (25.180)	Top-5 acc 48.047 (47.666)	lr 0.01807
Warmup Train [24][1880/3239]	Time 0.136 (0.235)	Data 0.001 (0.012)	Loss 4.4568 (4.3065)	Top-1 acc 23.828 (25.175)	Top-5 acc 42.578 (47.666)	lr 0.01806
Warmup Train [24][1890/3239]	Time 0.334 (0.235)	Data 0.001 (0.012)	Loss 4.3299 (4.3067)	Top-1 acc 22.656 (25.171)	Top-5 acc 44.141 (47.664)	lr 0.01806
Warmup Train [24][1900/3239]	Time 0.226 (0.235)	Data 0.001 (0.012)	Loss 4.3747 (4.3067)	Top-1 acc 25.781 (25.173)	Top-5 acc 47.656 (47.668)	lr 0.01805
Warmup Train [24][1910/3239]	Time 0.219 (0.235)	Data 0.002 (0.012)	Loss 4.2243 (4.3069)	Top-1 acc 27.734 (25.177)	Top-5 acc 48.438 (47.662)	lr 0.01805
Warmup Train [24][1920/3239]	Time 0.229 (0.235)	Data 0.002 (0.012)	Loss 4.2309 (4.3071)	Top-1 acc 24.609 (25.169)	Top-5 acc 49.609 (47.658)	lr 0.01804
Warmup Train [24][1930/3239]	Time 0.291 (0.235)	Data 0.001 (0.012)	Loss 4.4130 (4.3072)	Top-1 acc 26.953 (25.170)	Top-5 acc 44.531 (47.655)	lr 0.01803
Warmup Train [24][1940/3239]	Time 0.217 (0.235)	Data 0.001 (0.012)	Loss 4.2602 (4.3074)	Top-1 acc 24.609 (25.164)	Top-5 acc 47.656 (47.648)	lr 0.01803
Warmup Train [24][1950/3239]	Time 0.246 (0.235)	Data 0.001 (0.012)	Loss 4.2233 (4.3074)	Top-1 acc 23.047 (25.168)	Top-5 acc 48.047 (47.647)	lr 0.01802
Warmup Train [24][1960/3239]	Time 0.190 (0.235)	Data 0.002 (0.012)	Loss 4.1866 (4.3070)	Top-1 acc 26.953 (25.176)	Top-5 acc 50.391 (47.658)	lr 0.01802
Warmup Train [24][1970/3239]	Time 0.196 (0.235)	Data 0.001 (0.012)	Loss 4.1380 (4.3071)	Top-1 acc 28.906 (25.178)	Top-5 acc 50.391 (47.652)	lr 0.01801
Warmup Train [24][1980/3239]	Time 0.251 (0.235)	Data 0.001 (0.012)	Loss 4.2187 (4.3072)	Top-1 acc 27.344 (25.176)	Top-5 acc 51.172 (47.649)	lr 0.01800
Warmup Train [24][1990/3239]	Time 0.336 (0.235)	Data 0.002 (0.012)	Loss 4.2915 (4.3071)	Top-1 acc 25.000 (25.179)	Top-5 acc 48.828 (47.651)	lr 0.01800
Warmup Train [24][2000/3239]	Time 0.256 (0.235)	Data 0.001 (0.011)	Loss 4.3513 (4.3071)	Top-1 acc 23.438 (25.175)	Top-5 acc 46.094 (47.653)	lr 0.01799
Warmup Train [24][2010/3239]	Time 0.198 (0.235)	Data 0.002 (0.011)	Loss 4.2825 (4.3069)	Top-1 acc 25.000 (25.178)	Top-5 acc 49.219 (47.656)	lr 0.01799
Warmup Train [24][2020/3239]	Time 0.217 (0.235)	Data 0.001 (0.011)	Loss 4.4467 (4.3069)	Top-1 acc 21.875 (25.179)	Top-5 acc 45.703 (47.661)	lr 0.01798
Warmup Train [24][2030/3239]	Time 0.194 (0.235)	Data 0.001 (0.011)	Loss 4.1759 (4.3068)	Top-1 acc 32.422 (25.184)	Top-5 acc 51.953 (47.662)	lr 0.01798
Warmup Train [24][2040/3239]	Time 0.179 (0.235)	Data 0.001 (0.011)	Loss 4.4976 (4.3068)	Top-1 acc 18.750 (25.180)	Top-5 acc 41.406 (47.654)	lr 0.01797
Warmup Train [24][2050/3239]	Time 0.192 (0.235)	Data 0.002 (0.011)	Loss 4.4004 (4.3068)	Top-1 acc 24.609 (25.182)	Top-5 acc 44.922 (47.649)	lr 0.01796
Warmup Train [24][2060/3239]	Time 0.188 (0.234)	Data 0.001 (0.011)	Loss 4.1483 (4.3067)	Top-1 acc 30.078 (25.183)	Top-5 acc 50.000 (47.646)	lr 0.01796
Warmup Train [24][2070/3239]	Time 0.231 (0.234)	Data 0.001 (0.011)	Loss 4.3680 (4.3065)	Top-1 acc 24.609 (25.189)	Top-5 acc 45.703 (47.649)	lr 0.01795
Warmup Train [24][2080/3239]	Time 0.328 (0.234)	Data 0.001 (0.011)	Loss 4.3024 (4.3066)	Top-1 acc 25.391 (25.189)	Top-5 acc 48.438 (47.649)	lr 0.01795
Warmup Train [24][2090/3239]	Time 0.243 (0.234)	Data 0.001 (0.011)	Loss 4.5344 (4.3067)	Top-1 acc 19.531 (25.191)	Top-5 acc 45.312 (47.646)	lr 0.01794
Warmup Train [24][2100/3239]	Time 0.203 (0.234)	Data 0.001 (0.011)	Loss 3.8849 (4.3064)	Top-1 acc 33.984 (25.200)	Top-5 acc 59.375 (47.654)	lr 0.01793
Warmup Train [24][2110/3239]	Time 0.158 (0.234)	Data 0.001 (0.011)	Loss 4.4668 (4.3066)	Top-1 acc 23.828 (25.199)	Top-5 acc 46.094 (47.648)	lr 0.01793
Warmup Train [24][2120/3239]	Time 0.246 (0.234)	Data 0.001 (0.011)	Loss 4.3922 (4.3064)	Top-1 acc 21.484 (25.198)	Top-5 acc 42.188 (47.650)	lr 0.01792
Warmup Train [24][2130/3239]	Time 0.224 (0.234)	Data 0.001 (0.011)	Loss 4.2485 (4.3060)	Top-1 acc 26.562 (25.206)	Top-5 acc 51.562 (47.661)	lr 0.01792
Warmup Train [24][2140/3239]	Time 0.270 (0.234)	Data 0.001 (0.011)	Loss 4.2088 (4.3060)	Top-1 acc 23.438 (25.203)	Top-5 acc 47.656 (47.658)	lr 0.01791
Warmup Train [24][2150/3239]	Time 0.218 (0.234)	Data 0.002 (0.011)	Loss 4.3129 (4.3059)	Top-1 acc 27.344 (25.204)	Top-5 acc 48.828 (47.658)	lr 0.01791
Warmup Train [24][2160/3239]	Time 0.233 (0.234)	Data 0.001 (0.011)	Loss 4.1475 (4.3058)	Top-1 acc 30.078 (25.207)	Top-5 acc 49.219 (47.657)	lr 0.01790
Warmup Train [24][2170/3239]	Time 0.333 (0.234)	Data 0.002 (0.011)	Loss 4.3619 (4.3058)	Top-1 acc 25.000 (25.211)	Top-5 acc 43.359 (47.660)	lr 0.01789
Warmup Train [24][2180/3239]	Time 0.213 (0.234)	Data 0.001 (0.011)	Loss 4.2974 (4.3058)	Top-1 acc 28.125 (25.211)	Top-5 acc 49.219 (47.657)	lr 0.01789
Warmup Train [24][2190/3239]	Time 0.182 (0.234)	Data 0.001 (0.011)	Loss 4.2547 (4.3060)	Top-1 acc 30.469 (25.214)	Top-5 acc 46.484 (47.654)	lr 0.01788
Warmup Train [24][2200/3239]	Time 0.229 (0.234)	Data 0.002 (0.011)	Loss 4.3492 (4.3060)	Top-1 acc 23.828 (25.211)	Top-5 acc 46.484 (47.652)	lr 0.01788
Warmup Train [24][2210/3239]	Time 0.238 (0.234)	Data 0.002 (0.011)	Loss 4.3073 (4.3058)	Top-1 acc 24.609 (25.214)	Top-5 acc 48.438 (47.654)	lr 0.01787
Warmup Train [24][2220/3239]	Time 0.150 (0.234)	Data 0.001 (0.011)	Loss 4.1747 (4.3057)	Top-1 acc 28.516 (25.215)	Top-5 acc 50.781 (47.654)	lr 0.01786
Warmup Train [24][2230/3239]	Time 0.190 (0.234)	Data 0.001 (0.011)	Loss 4.4055 (4.3056)	Top-1 acc 25.781 (25.215)	Top-5 acc 44.141 (47.654)	lr 0.01786
Warmup Train [24][2240/3239]	Time 0.178 (0.234)	Data 0.002 (0.011)	Loss 4.2539 (4.3055)	Top-1 acc 23.828 (25.211)	Top-5 acc 50.781 (47.656)	lr 0.01785
Warmup Train [24][2250/3239]	Time 0.152 (0.234)	Data 0.001 (0.011)	Loss 4.2698 (4.3054)	Top-1 acc 26.172 (25.208)	Top-5 acc 50.000 (47.661)	lr 0.01785
Warmup Train [24][2260/3239]	Time 0.207 (0.234)	Data 0.001 (0.011)	Loss 4.1846 (4.3054)	Top-1 acc 31.250 (25.213)	Top-5 acc 51.562 (47.660)	lr 0.01784
Warmup Train [24][2270/3239]	Time 0.294 (0.234)	Data 0.002 (0.010)	Loss 4.4542 (4.3056)	Top-1 acc 24.219 (25.207)	Top-5 acc 46.875 (47.661)	lr 0.01784
Warmup Train [24][2280/3239]	Time 0.250 (0.234)	Data 0.003 (0.010)	Loss 4.1291 (4.3055)	Top-1 acc 30.078 (25.211)	Top-5 acc 52.344 (47.661)	lr 0.01783
Warmup Train [24][2290/3239]	Time 0.204 (0.234)	Data 0.001 (0.010)	Loss 4.2285 (4.3051)	Top-1 acc 26.562 (25.215)	Top-5 acc 48.047 (47.667)	lr 0.01782
Warmup Train [24][2300/3239]	Time 0.302 (0.234)	Data 0.002 (0.010)	Loss 4.5203 (4.3052)	Top-1 acc 21.484 (25.214)	Top-5 acc 43.750 (47.666)	lr 0.01782
Warmup Train [24][2310/3239]	Time 0.176 (0.233)	Data 0.001 (0.010)	Loss 4.2854 (4.3052)	Top-1 acc 24.609 (25.219)	Top-5 acc 45.703 (47.663)	lr 0.01781
Warmup Train [24][2320/3239]	Time 0.200 (0.234)	Data 0.001 (0.010)	Loss 4.3038 (4.3052)	Top-1 acc 22.656 (25.219)	Top-5 acc 45.703 (47.664)	lr 0.01781
Warmup Train [24][2330/3239]	Time 0.257 (0.233)	Data 0.001 (0.010)	Loss 4.2948 (4.3053)	Top-1 acc 26.562 (25.223)	Top-5 acc 44.922 (47.662)	lr 0.01780
Warmup Train [24][2340/3239]	Time 0.181 (0.233)	Data 0.001 (0.010)	Loss 4.2002 (4.3051)	Top-1 acc 24.219 (25.223)	Top-5 acc 51.562 (47.667)	lr 0.01780
Warmup Train [24][2350/3239]	Time 0.207 (0.233)	Data 0.001 (0.010)	Loss 4.4496 (4.3052)	Top-1 acc 22.656 (25.223)	Top-5 acc 44.531 (47.666)	lr 0.01779
Warmup Train [24][2360/3239]	Time 0.188 (0.233)	Data 0.002 (0.010)	Loss 4.5080 (4.3053)	Top-1 acc 20.312 (25.227)	Top-5 acc 41.797 (47.665)	lr 0.01778
Warmup Train [24][2370/3239]	Time 0.314 (0.233)	Data 0.001 (0.010)	Loss 4.1695 (4.3050)	Top-1 acc 26.953 (25.234)	Top-5 acc 53.125 (47.672)	lr 0.01778
Warmup Train [24][2380/3239]	Time 0.266 (0.233)	Data 0.001 (0.010)	Loss 4.2397 (4.3048)	Top-1 acc 22.656 (25.235)	Top-5 acc 47.656 (47.674)	lr 0.01777
Warmup Train [24][2390/3239]	Time 0.190 (0.233)	Data 0.001 (0.010)	Loss 4.2143 (4.3044)	Top-1 acc 27.344 (25.244)	Top-5 acc 50.000 (47.687)	lr 0.01777
Warmup Train [24][2400/3239]	Time 0.273 (0.233)	Data 0.002 (0.010)	Loss 4.2596 (4.3044)	Top-1 acc 26.172 (25.247)	Top-5 acc 46.484 (47.687)	lr 0.01776
Warmup Train [24][2410/3239]	Time 0.248 (0.233)	Data 0.002 (0.010)	Loss 4.3641 (4.3044)	Top-1 acc 20.312 (25.251)	Top-5 acc 48.047 (47.683)	lr 0.01775
Warmup Train [24][2420/3239]	Time 0.240 (0.233)	Data 0.002 (0.010)	Loss 4.2634 (4.3045)	Top-1 acc 29.297 (25.254)	Top-5 acc 47.266 (47.680)	lr 0.01775
Warmup Train [24][2430/3239]	Time 0.157 (0.233)	Data 0.001 (0.010)	Loss 4.4818 (4.3045)	Top-1 acc 23.047 (25.258)	Top-5 acc 44.531 (47.683)	lr 0.01774
Warmup Train [24][2440/3239]	Time 0.212 (0.233)	Data 0.002 (0.010)	Loss 4.0960 (4.3045)	Top-1 acc 26.953 (25.257)	Top-5 acc 53.125 (47.686)	lr 0.01774
Warmup Train [24][2450/3239]	Time 0.223 (0.233)	Data 0.001 (0.010)	Loss 4.2696 (4.3045)	Top-1 acc 22.656 (25.254)	Top-5 acc 50.391 (47.687)	lr 0.01773
Warmup Train [24][2460/3239]	Time 0.226 (0.233)	Data 0.001 (0.010)	Loss 4.2427 (4.3045)	Top-1 acc 27.344 (25.253)	Top-5 acc 44.531 (47.683)	lr 0.01773
Warmup Train [24][2470/3239]	Time 0.313 (0.233)	Data 0.001 (0.010)	Loss 4.2133 (4.3044)	Top-1 acc 25.781 (25.254)	Top-5 acc 48.438 (47.685)	lr 0.01772
Warmup Train [24][2480/3239]	Time 0.330 (0.233)	Data 0.001 (0.010)	Loss 4.2907 (4.3044)	Top-1 acc 24.609 (25.253)	Top-5 acc 46.875 (47.687)	lr 0.01771
Warmup Train [24][2490/3239]	Time 0.225 (0.233)	Data 0.001 (0.010)	Loss 4.3503 (4.3044)	Top-1 acc 20.703 (25.255)	Top-5 acc 48.438 (47.685)	lr 0.01771
Warmup Train [24][2500/3239]	Time 0.237 (0.233)	Data 0.001 (0.010)	Loss 4.2577 (4.3043)	Top-1 acc 25.000 (25.250)	Top-5 acc 51.953 (47.691)	lr 0.01770
Warmup Train [24][2510/3239]	Time 0.234 (0.233)	Data 0.001 (0.010)	Loss 4.2284 (4.3042)	Top-1 acc 25.781 (25.255)	Top-5 acc 53.906 (47.696)	lr 0.01770
Warmup Train [24][2520/3239]	Time 0.232 (0.233)	Data 0.001 (0.010)	Loss 4.2890 (4.3043)	Top-1 acc 25.781 (25.254)	Top-5 acc 47.266 (47.692)	lr 0.01769
Warmup Train [24][2530/3239]	Time 0.168 (0.233)	Data 0.002 (0.010)	Loss 4.2249 (4.3044)	Top-1 acc 27.734 (25.253)	Top-5 acc 48.828 (47.689)	lr 0.01768
Warmup Train [24][2540/3239]	Time 0.240 (0.233)	Data 0.002 (0.010)	Loss 4.0828 (4.3044)	Top-1 acc 23.438 (25.255)	Top-5 acc 53.125 (47.687)	lr 0.01768
Warmup Train [24][2550/3239]	Time 0.230 (0.232)	Data 0.001 (0.010)	Loss 4.2712 (4.3043)	Top-1 acc 26.172 (25.256)	Top-5 acc 45.312 (47.687)	lr 0.01767
Warmup Train [24][2560/3239]	Time 0.205 (0.232)	Data 0.001 (0.010)	Loss 4.3892 (4.3043)	Top-1 acc 25.000 (25.255)	Top-5 acc 46.094 (47.688)	lr 0.01767
Warmup Train [24][2570/3239]	Time 0.207 (0.232)	Data 0.001 (0.010)	Loss 4.3874 (4.3043)	Top-1 acc 23.828 (25.259)	Top-5 acc 42.188 (47.686)	lr 0.01766
Warmup Train [24][2580/3239]	Time 0.166 (0.232)	Data 0.001 (0.010)	Loss 4.1711 (4.3043)	Top-1 acc 28.516 (25.259)	Top-5 acc 51.562 (47.683)	lr 0.01766
Warmup Train [24][2590/3239]	Time 0.191 (0.232)	Data 0.001 (0.010)	Loss 4.5891 (4.3043)	Top-1 acc 21.484 (25.258)	Top-5 acc 42.969 (47.684)	lr 0.01765
Warmup Train [24][2600/3239]	Time 0.361 (0.232)	Data 0.001 (0.010)	Loss 4.0850 (4.3041)	Top-1 acc 28.906 (25.263)	Top-5 acc 52.734 (47.684)	lr 0.01764
Warmup Train [24][2610/3239]	Time 0.205 (0.232)	Data 0.001 (0.009)	Loss 4.3674 (4.3040)	Top-1 acc 21.875 (25.262)	Top-5 acc 46.484 (47.683)	lr 0.01764
Warmup Train [24][2620/3239]	Time 0.187 (0.232)	Data 0.001 (0.009)	Loss 4.4247 (4.3040)	Top-1 acc 25.781 (25.265)	Top-5 acc 47.266 (47.687)	lr 0.01763
Warmup Train [24][2630/3239]	Time 0.176 (0.232)	Data 0.001 (0.009)	Loss 4.2400 (4.3038)	Top-1 acc 23.438 (25.266)	Top-5 acc 45.312 (47.692)	lr 0.01763
Warmup Train [24][2640/3239]	Time 0.201 (0.232)	Data 0.001 (0.009)	Loss 4.2568 (4.3039)	Top-1 acc 26.172 (25.264)	Top-5 acc 49.609 (47.686)	lr 0.01762
Warmup Train [24][2650/3239]	Time 0.158 (0.232)	Data 0.001 (0.009)	Loss 4.4016 (4.3039)	Top-1 acc 22.266 (25.265)	Top-5 acc 50.391 (47.686)	lr 0.01762
Warmup Train [24][2660/3239]	Time 0.188 (0.232)	Data 0.001 (0.009)	Loss 4.0372 (4.3037)	Top-1 acc 30.859 (25.269)	Top-5 acc 55.078 (47.693)	lr 0.01761
Warmup Train [24][2670/3239]	Time 0.237 (0.232)	Data 0.001 (0.009)	Loss 4.2079 (4.3037)	Top-1 acc 22.656 (25.271)	Top-5 acc 52.734 (47.695)	lr 0.01760
Warmup Train [24][2680/3239]	Time 0.192 (0.232)	Data 0.001 (0.009)	Loss 4.1909 (4.3036)	Top-1 acc 28.125 (25.274)	Top-5 acc 51.172 (47.702)	lr 0.01760
Warmup Train [24][2690/3239]	Time 0.191 (0.232)	Data 0.001 (0.009)	Loss 4.4213 (4.3036)	Top-1 acc 22.656 (25.274)	Top-5 acc 43.359 (47.703)	lr 0.01759
Warmup Train [24][2700/3239]	Time 0.233 (0.232)	Data 0.001 (0.009)	Loss 4.3386 (4.3038)	Top-1 acc 25.391 (25.275)	Top-5 acc 47.656 (47.703)	lr 0.01759
Warmup Train [24][2710/3239]	Time 0.340 (0.232)	Data 0.001 (0.009)	Loss 4.1434 (4.3034)	Top-1 acc 27.344 (25.280)	Top-5 acc 49.219 (47.712)	lr 0.01758
Warmup Train [24][2720/3239]	Time 0.226 (0.232)	Data 0.001 (0.009)	Loss 4.3086 (4.3033)	Top-1 acc 18.359 (25.277)	Top-5 acc 43.359 (47.715)	lr 0.01757
Warmup Train [24][2730/3239]	Time 0.148 (0.232)	Data 0.002 (0.009)	Loss 4.2211 (4.3031)	Top-1 acc 26.172 (25.284)	Top-5 acc 46.875 (47.720)	lr 0.01757
Warmup Train [24][2740/3239]	Time 0.216 (0.232)	Data 0.001 (0.009)	Loss 4.3897 (4.3032)	Top-1 acc 25.000 (25.286)	Top-5 acc 44.922 (47.719)	lr 0.01756
Warmup Train [24][2750/3239]	Time 0.196 (0.232)	Data 0.001 (0.009)	Loss 4.2187 (4.3030)	Top-1 acc 24.219 (25.289)	Top-5 acc 48.438 (47.726)	lr 0.01756
Warmup Train [24][2760/3239]	Time 0.229 (0.232)	Data 0.001 (0.009)	Loss 4.3636 (4.3032)	Top-1 acc 23.438 (25.282)	Top-5 acc 48.438 (47.723)	lr 0.01755
Warmup Train [24][2770/3239]	Time 0.163 (0.232)	Data 0.001 (0.009)	Loss 4.3980 (4.3034)	Top-1 acc 26.562 (25.277)	Top-5 acc 47.656 (47.722)	lr 0.01755
Warmup Train [24][2780/3239]	Time 0.192 (0.232)	Data 0.001 (0.009)	Loss 4.1218 (4.3034)	Top-1 acc 29.297 (25.275)	Top-5 acc 48.047 (47.719)	lr 0.01754
Warmup Train [24][2790/3239]	Time 0.217 (0.232)	Data 0.001 (0.009)	Loss 4.3376 (4.3035)	Top-1 acc 25.781 (25.273)	Top-5 acc 48.438 (47.719)	lr 0.01753
Warmup Train [24][2800/3239]	Time 0.208 (0.232)	Data 0.001 (0.009)	Loss 4.4310 (4.3033)	Top-1 acc 21.875 (25.277)	Top-5 acc 42.969 (47.719)	lr 0.01753
Warmup Train [24][2810/3239]	Time 0.261 (0.232)	Data 0.002 (0.009)	Loss 4.1669 (4.3032)	Top-1 acc 24.219 (25.277)	Top-5 acc 51.562 (47.725)	lr 0.01752
Warmup Train [24][2820/3239]	Time 0.191 (0.231)	Data 0.002 (0.009)	Loss 4.1537 (4.3031)	Top-1 acc 29.688 (25.280)	Top-5 acc 51.953 (47.729)	lr 0.01752
Warmup Train [24][2830/3239]	Time 0.289 (0.231)	Data 0.001 (0.009)	Loss 4.4506 (4.3030)	Top-1 acc 21.875 (25.281)	Top-5 acc 43.750 (47.728)	lr 0.01751
Warmup Train [24][2840/3239]	Time 0.360 (0.231)	Data 0.001 (0.009)	Loss 4.3236 (4.3029)	Top-1 acc 25.391 (25.288)	Top-5 acc 45.312 (47.735)	lr 0.01751
Warmup Train [24][2850/3239]	Time 0.217 (0.231)	Data 0.001 (0.009)	Loss 4.3157 (4.3031)	Top-1 acc 25.000 (25.283)	Top-5 acc 43.359 (47.727)	lr 0.01750
Warmup Train [24][2860/3239]	Time 0.186 (0.231)	Data 0.001 (0.009)	Loss 4.3135 (4.3030)	Top-1 acc 25.000 (25.282)	Top-5 acc 49.219 (47.730)	lr 0.01749
Warmup Train [24][2870/3239]	Time 0.184 (0.231)	Data 0.001 (0.009)	Loss 4.3031 (4.3029)	Top-1 acc 21.094 (25.283)	Top-5 acc 48.438 (47.733)	lr 0.01749
Warmup Train [24][2880/3239]	Time 0.218 (0.231)	Data 0.002 (0.009)	Loss 4.3059 (4.3029)	Top-1 acc 25.781 (25.281)	Top-5 acc 46.094 (47.731)	lr 0.01748
Warmup Train [24][2890/3239]	Time 0.164 (0.231)	Data 0.001 (0.009)	Loss 4.4197 (4.3029)	Top-1 acc 23.828 (25.281)	Top-5 acc 42.969 (47.728)	lr 0.01748
Warmup Train [24][2900/3239]	Time 0.216 (0.231)	Data 0.001 (0.009)	Loss 4.2041 (4.3025)	Top-1 acc 23.047 (25.288)	Top-5 acc 49.609 (47.734)	lr 0.01747
Warmup Train [24][2910/3239]	Time 0.178 (0.231)	Data 0.002 (0.009)	Loss 4.3624 (4.3024)	Top-1 acc 23.828 (25.288)	Top-5 acc 47.656 (47.738)	lr 0.01747
Warmup Train [24][2920/3239]	Time 0.194 (0.231)	Data 0.001 (0.009)	Loss 4.2538 (4.3025)	Top-1 acc 25.781 (25.286)	Top-5 acc 48.828 (47.738)	lr 0.01746
Warmup Train [24][2930/3239]	Time 0.373 (0.231)	Data 0.002 (0.009)	Loss 4.1627 (4.3022)	Top-1 acc 30.078 (25.292)	Top-5 acc 52.344 (47.743)	lr 0.01745
Warmup Train [24][2940/3239]	Time 0.184 (0.231)	Data 0.001 (0.009)	Loss 4.3404 (4.3022)	Top-1 acc 26.562 (25.293)	Top-5 acc 46.094 (47.741)	lr 0.01745
Warmup Train [24][2950/3239]	Time 0.198 (0.231)	Data 0.001 (0.009)	Loss 4.3276 (4.3023)	Top-1 acc 30.859 (25.293)	Top-5 acc 49.609 (47.740)	lr 0.01744
Warmup Train [24][2960/3239]	Time 0.184 (0.231)	Data 0.001 (0.009)	Loss 4.3301 (4.3021)	Top-1 acc 25.391 (25.298)	Top-5 acc 44.141 (47.745)	lr 0.01744
Warmup Train [24][2970/3239]	Time 0.231 (0.231)	Data 0.001 (0.009)	Loss 4.2441 (4.3020)	Top-1 acc 28.125 (25.296)	Top-5 acc 47.266 (47.741)	lr 0.01743
Warmup Train [24][2980/3239]	Time 0.169 (0.231)	Data 0.001 (0.009)	Loss 4.3257 (4.3021)	Top-1 acc 26.562 (25.292)	Top-5 acc 47.266 (47.736)	lr 0.01742
Warmup Train [24][2990/3239]	Time 0.238 (0.231)	Data 0.001 (0.009)	Loss 4.4944 (4.3024)	Top-1 acc 19.531 (25.283)	Top-5 acc 46.094 (47.729)	lr 0.01742
Warmup Train [24][3000/3239]	Time 0.196 (0.231)	Data 0.001 (0.009)	Loss 4.3698 (4.3022)	Top-1 acc 22.656 (25.286)	Top-5 acc 47.266 (47.735)	lr 0.01741
Warmup Train [24][3010/3239]	Time 0.212 (0.231)	Data 0.003 (0.009)	Loss 4.1525 (4.3020)	Top-1 acc 25.391 (25.287)	Top-5 acc 51.562 (47.740)	lr 0.01741
Warmup Train [24][3020/3239]	Time 0.214 (0.231)	Data 0.002 (0.009)	Loss 4.2963 (4.3019)	Top-1 acc 25.000 (25.287)	Top-5 acc 48.828 (47.740)	lr 0.01740
Warmup Train [24][3030/3239]	Time 0.294 (0.231)	Data 0.001 (0.009)	Loss 4.0276 (4.3018)	Top-1 acc 28.125 (25.288)	Top-5 acc 54.688 (47.744)	lr 0.01740
Warmup Train [24][3040/3239]	Time 0.196 (0.231)	Data 0.002 (0.009)	Loss 4.1839 (4.3017)	Top-1 acc 24.219 (25.288)	Top-5 acc 49.609 (47.744)	lr 0.01739
Warmup Train [24][3050/3239]	Time 0.191 (0.231)	Data 0.001 (0.009)	Loss 4.1962 (4.3017)	Top-1 acc 26.172 (25.286)	Top-5 acc 50.000 (47.743)	lr 0.01738
Warmup Train [24][3060/3239]	Time 0.205 (0.231)	Data 0.001 (0.009)	Loss 4.3894 (4.3016)	Top-1 acc 21.875 (25.286)	Top-5 acc 46.875 (47.743)	lr 0.01738
Warmup Train [24][3070/3239]	Time 0.199 (0.231)	Data 0.001 (0.009)	Loss 4.2517 (4.3017)	Top-1 acc 24.609 (25.285)	Top-5 acc 46.094 (47.741)	lr 0.01737
Warmup Train [24][3080/3239]	Time 0.184 (0.231)	Data 0.001 (0.008)	Loss 4.3372 (4.3016)	Top-1 acc 21.875 (25.285)	Top-5 acc 50.000 (47.740)	lr 0.01737
Warmup Train [24][3090/3239]	Time 0.229 (0.231)	Data 0.001 (0.008)	Loss 4.1690 (4.3015)	Top-1 acc 31.641 (25.286)	Top-5 acc 52.344 (47.742)	lr 0.01736
Warmup Train [24][3100/3239]	Time 0.209 (0.231)	Data 0.001 (0.008)	Loss 4.3224 (4.3014)	Top-1 acc 23.047 (25.289)	Top-5 acc 47.656 (47.746)	lr 0.01736
Warmup Train [24][3110/3239]	Time 0.197 (0.231)	Data 0.002 (0.008)	Loss 4.2934 (4.3013)	Top-1 acc 25.000 (25.288)	Top-5 acc 46.484 (47.747)	lr 0.01735
Warmup Train [24][3120/3239]	Time 0.214 (0.231)	Data 0.002 (0.008)	Loss 4.1501 (4.3013)	Top-1 acc 28.125 (25.288)	Top-5 acc 52.734 (47.749)	lr 0.01734
Warmup Train [24][3130/3239]	Time 0.329 (0.231)	Data 0.001 (0.008)	Loss 4.5180 (4.3014)	Top-1 acc 19.922 (25.283)	Top-5 acc 45.312 (47.748)	lr 0.01734
Warmup Train [24][3140/3239]	Time 0.194 (0.231)	Data 0.002 (0.008)	Loss 4.2676 (4.3013)	Top-1 acc 30.469 (25.287)	Top-5 acc 49.219 (47.751)	lr 0.01733
Warmup Train [24][3150/3239]	Time 0.219 (0.231)	Data 0.002 (0.008)	Loss 4.2877 (4.3013)	Top-1 acc 28.906 (25.288)	Top-5 acc 49.219 (47.750)	lr 0.01733
Warmup Train [24][3160/3239]	Time 0.191 (0.231)	Data 0.001 (0.008)	Loss 4.3324 (4.3014)	Top-1 acc 23.828 (25.284)	Top-5 acc 47.266 (47.748)	lr 0.01732
Warmup Train [24][3170/3239]	Time 0.253 (0.231)	Data 0.001 (0.008)	Loss 4.4901 (4.3014)	Top-1 acc 19.141 (25.283)	Top-5 acc 40.234 (47.743)	lr 0.01731
Warmup Train [24][3180/3239]	Time 0.225 (0.231)	Data 0.000 (0.008)	Loss 4.0739 (4.3013)	Top-1 acc 27.344 (25.282)	Top-5 acc 51.562 (47.745)	lr 0.01731
Warmup Train [24][3190/3239]	Time 0.190 (0.230)	Data 0.000 (0.008)	Loss 4.5958 (4.3013)	Top-1 acc 20.312 (25.280)	Top-5 acc 41.797 (47.748)	lr 0.01730
Warmup Train [24][3200/3239]	Time 0.228 (0.230)	Data 0.000 (0.008)	Loss 4.4318 (4.3014)	Top-1 acc 22.266 (25.277)	Top-5 acc 44.922 (47.745)	lr 0.01730
Warmup Train [24][3210/3239]	Time 0.181 (0.230)	Data 0.000 (0.008)	Loss 4.4304 (4.3013)	Top-1 acc 21.484 (25.275)	Top-5 acc 44.531 (47.746)	lr 0.01729
Warmup Train [24][3220/3239]	Time 0.198 (0.230)	Data 0.000 (0.008)	Loss 4.0814 (4.3014)	Top-1 acc 32.812 (25.274)	Top-5 acc 55.078 (47.744)	lr 0.01729
Warmup Train [24][3230/3239]	Time 0.210 (0.230)	Data 0.000 (0.008)	Loss 4.1649 (4.3012)	Top-1 acc 25.781 (25.275)	Top-5 acc 51.953 (47.749)	lr 0.01728
Warmup Train [24][3239/3239]	Time 0.142 (0.230)	Data 0.000 (0.008)	Loss 4.5095 (4.3014)	Top-1 acc 24.691 (25.275)	Top-5 acc 44.444 (47.743)	lr 0.01728
==========Warmup Valid [24/40]	loss 3.279	top-1 acc 32.632	top-5 acc 57.131	Train top-1 25.275	top-5 47.743	flops: 442.4M
Warmup Train [25][0/3239]	Time 21.102 (21.102)	Data 20.071 (20.071)	Loss 4.3495 (4.3495)	Top-1 acc 25.391 (25.391)	Top-5 acc 49.609 (49.609)	lr 0.01727
Warmup Train [25][10/3239]	Time 0.156 (2.153)	Data 0.001 (1.826)	Loss 4.1862 (4.2774)	Top-1 acc 25.781 (25.675)	Top-5 acc 51.562 (48.828)	lr 0.01727
Warmup Train [25][20/3239]	Time 0.170 (1.239)	Data 0.001 (0.961)	Loss 4.3711 (4.2959)	Top-1 acc 25.391 (25.465)	Top-5 acc 50.000 (48.605)	lr 0.01726
Warmup Train [25][30/3239]	Time 0.185 (0.914)	Data 0.001 (0.653)	Loss 4.3108 (4.2979)	Top-1 acc 26.953 (25.277)	Top-5 acc 48.047 (48.286)	lr 0.01726
Warmup Train [25][40/3239]	Time 0.293 (0.747)	Data 0.002 (0.494)	Loss 4.3089 (4.2947)	Top-1 acc 27.344 (25.419)	Top-5 acc 47.656 (48.266)	lr 0.01725
Warmup Train [25][50/3239]	Time 0.164 (0.642)	Data 0.001 (0.397)	Loss 4.3546 (4.2996)	Top-1 acc 23.438 (25.352)	Top-5 acc 44.141 (48.169)	lr 0.01725
Warmup Train [25][60/3239]	Time 0.207 (0.575)	Data 0.001 (0.333)	Loss 4.2595 (4.2918)	Top-1 acc 24.609 (25.608)	Top-5 acc 47.656 (48.329)	lr 0.01724
Warmup Train [25][70/3239]	Time 0.272 (0.530)	Data 0.002 (0.286)	Loss 4.3339 (4.2913)	Top-1 acc 22.266 (25.567)	Top-5 acc 46.484 (48.239)	lr 0.01723
Warmup Train [25][80/3239]	Time 0.279 (0.498)	Data 0.002 (0.251)	Loss 4.2938 (4.2904)	Top-1 acc 25.781 (25.526)	Top-5 acc 46.484 (48.259)	lr 0.01723
Warmup Train [25][90/3239]	Time 0.222 (0.468)	Data 0.001 (0.224)	Loss 4.2581 (4.2880)	Top-1 acc 28.516 (25.614)	Top-5 acc 48.828 (48.257)	lr 0.01722
Warmup Train [25][100/3239]	Time 0.226 (0.447)	Data 0.001 (0.202)	Loss 4.1635 (4.2850)	Top-1 acc 30.469 (25.774)	Top-5 acc 48.828 (48.360)	lr 0.01722
Warmup Train [25][110/3239]	Time 0.294 (0.430)	Data 0.001 (0.184)	Loss 4.1841 (4.2843)	Top-1 acc 25.391 (25.662)	Top-5 acc 51.172 (48.265)	lr 0.01721
Warmup Train [25][120/3239]	Time 0.300 (0.417)	Data 0.001 (0.170)	Loss 4.3427 (4.2818)	Top-1 acc 21.484 (25.649)	Top-5 acc 46.875 (48.315)	lr 0.01721
Warmup Train [25][130/3239]	Time 0.236 (0.405)	Data 0.001 (0.157)	Loss 4.3194 (4.2751)	Top-1 acc 22.656 (25.835)	Top-5 acc 47.656 (48.446)	lr 0.01720
Warmup Train [25][140/3239]	Time 0.167 (0.396)	Data 0.001 (0.146)	Loss 4.4594 (4.2792)	Top-1 acc 19.922 (25.690)	Top-5 acc 45.312 (48.368)	lr 0.01719
Warmup Train [25][150/3239]	Time 0.245 (0.388)	Data 0.001 (0.137)	Loss 4.2714 (4.2779)	Top-1 acc 24.219 (25.727)	Top-5 acc 48.438 (48.368)	lr 0.01719
Warmup Train [25][160/3239]	Time 0.206 (0.380)	Data 0.002 (0.129)	Loss 4.2247 (4.2768)	Top-1 acc 31.250 (25.873)	Top-5 acc 49.609 (48.421)	lr 0.01718
Warmup Train [25][170/3239]	Time 0.238 (0.374)	Data 0.001 (0.121)	Loss 4.2428 (4.2761)	Top-1 acc 25.781 (25.845)	Top-5 acc 52.344 (48.492)	lr 0.01718
Warmup Train [25][180/3239]	Time 0.236 (0.366)	Data 0.001 (0.115)	Loss 4.1273 (4.2782)	Top-1 acc 27.344 (25.811)	Top-5 acc 51.953 (48.472)	lr 0.01717
Warmup Train [25][190/3239]	Time 0.350 (0.359)	Data 0.001 (0.109)	Loss 4.0785 (4.2769)	Top-1 acc 28.906 (25.779)	Top-5 acc 53.125 (48.470)	lr 0.01717
Warmup Train [25][200/3239]	Time 0.241 (0.354)	Data 0.001 (0.104)	Loss 4.2027 (4.2754)	Top-1 acc 25.391 (25.779)	Top-5 acc 49.219 (48.482)	lr 0.01716
Warmup Train [25][210/3239]	Time 0.268 (0.349)	Data 0.002 (0.099)	Loss 4.1240 (4.2753)	Top-1 acc 27.344 (25.846)	Top-5 acc 53.516 (48.523)	lr 0.01715
Warmup Train [25][220/3239]	Time 0.235 (0.345)	Data 0.002 (0.095)	Loss 4.2288 (4.2747)	Top-1 acc 29.688 (25.900)	Top-5 acc 49.219 (48.547)	lr 0.01715
Warmup Train [25][230/3239]	Time 0.412 (0.343)	Data 0.002 (0.091)	Loss 4.2830 (4.2763)	Top-1 acc 27.734 (25.878)	Top-5 acc 49.609 (48.495)	lr 0.01714
Warmup Train [25][240/3239]	Time 0.208 (0.338)	Data 0.001 (0.087)	Loss 4.1657 (4.2742)	Top-1 acc 28.906 (25.887)	Top-5 acc 51.172 (48.533)	lr 0.01714
Warmup Train [25][250/3239]	Time 0.247 (0.333)	Data 0.024 (0.084)	Loss 4.2934 (4.2726)	Top-1 acc 22.266 (25.884)	Top-5 acc 46.094 (48.564)	lr 0.01713
Warmup Train [25][260/3239]	Time 0.141 (0.328)	Data 0.001 (0.081)	Loss 4.4810 (4.2709)	Top-1 acc 20.703 (25.896)	Top-5 acc 43.359 (48.565)	lr 0.01712
Warmup Train [25][270/3239]	Time 0.220 (0.324)	Data 0.001 (0.078)	Loss 4.3914 (4.2710)	Top-1 acc 25.781 (25.924)	Top-5 acc 46.875 (48.570)	lr 0.01712
Warmup Train [25][280/3239]	Time 0.302 (0.321)	Data 0.001 (0.075)	Loss 4.2682 (4.2707)	Top-1 acc 27.344 (25.963)	Top-5 acc 50.781 (48.592)	lr 0.01711
Warmup Train [25][290/3239]	Time 0.226 (0.318)	Data 0.001 (0.073)	Loss 4.1912 (4.2727)	Top-1 acc 28.906 (25.941)	Top-5 acc 52.344 (48.515)	lr 0.01711
Warmup Train [25][300/3239]	Time 0.166 (0.315)	Data 0.001 (0.070)	Loss 4.2675 (4.2744)	Top-1 acc 26.953 (25.915)	Top-5 acc 48.828 (48.489)	lr 0.01710
Warmup Train [25][310/3239]	Time 0.147 (0.311)	Data 0.001 (0.068)	Loss 4.3957 (4.2749)	Top-1 acc 22.656 (25.921)	Top-5 acc 44.531 (48.464)	lr 0.01710
Warmup Train [25][320/3239]	Time 0.225 (0.309)	Data 0.001 (0.066)	Loss 4.2588 (4.2737)	Top-1 acc 25.781 (25.975)	Top-5 acc 47.266 (48.495)	lr 0.01709
Warmup Train [25][330/3239]	Time 0.272 (0.307)	Data 0.001 (0.064)	Loss 4.2479 (4.2750)	Top-1 acc 28.516 (25.931)	Top-5 acc 48.438 (48.452)	lr 0.01708
Warmup Train [25][340/3239]	Time 0.217 (0.304)	Data 0.001 (0.063)	Loss 4.2131 (4.2735)	Top-1 acc 24.609 (25.966)	Top-5 acc 45.703 (48.449)	lr 0.01708
Warmup Train [25][350/3239]	Time 0.308 (0.302)	Data 0.001 (0.061)	Loss 4.3029 (4.2746)	Top-1 acc 23.438 (25.939)	Top-5 acc 47.266 (48.383)	lr 0.01707
Warmup Train [25][360/3239]	Time 0.241 (0.300)	Data 0.001 (0.059)	Loss 4.5264 (4.2754)	Top-1 acc 23.828 (25.921)	Top-5 acc 44.922 (48.365)	lr 0.01707
Warmup Train [25][370/3239]	Time 0.223 (0.298)	Data 0.001 (0.058)	Loss 4.2801 (4.2741)	Top-1 acc 26.172 (25.924)	Top-5 acc 48.438 (48.402)	lr 0.01706
Warmup Train [25][380/3239]	Time 0.273 (0.297)	Data 0.001 (0.056)	Loss 4.3258 (4.2752)	Top-1 acc 24.609 (25.916)	Top-5 acc 44.531 (48.373)	lr 0.01706
Warmup Train [25][390/3239]	Time 0.199 (0.295)	Data 0.001 (0.055)	Loss 4.0892 (4.2755)	Top-1 acc 30.078 (25.886)	Top-5 acc 50.000 (48.338)	lr 0.01705
Warmup Train [25][400/3239]	Time 0.231 (0.293)	Data 0.001 (0.054)	Loss 4.3059 (4.2756)	Top-1 acc 28.906 (25.910)	Top-5 acc 48.047 (48.358)	lr 0.01704
Warmup Train [25][410/3239]	Time 0.158 (0.292)	Data 0.001 (0.052)	Loss 4.2350 (4.2752)	Top-1 acc 30.078 (25.940)	Top-5 acc 50.781 (48.366)	lr 0.01704
Warmup Train [25][420/3239]	Time 0.159 (0.290)	Data 0.001 (0.051)	Loss 4.3441 (4.2754)	Top-1 acc 25.781 (25.922)	Top-5 acc 51.562 (48.380)	lr 0.01703
Warmup Train [25][430/3239]	Time 0.193 (0.289)	Data 0.001 (0.050)	Loss 4.0559 (4.2747)	Top-1 acc 26.172 (25.927)	Top-5 acc 53.125 (48.408)	lr 0.01703
Warmup Train [25][440/3239]	Time 0.165 (0.287)	Data 0.001 (0.049)	Loss 4.3245 (4.2758)	Top-1 acc 25.391 (25.908)	Top-5 acc 44.141 (48.362)	lr 0.01702
Warmup Train [25][450/3239]	Time 0.298 (0.286)	Data 0.001 (0.048)	Loss 4.4571 (4.2761)	Top-1 acc 21.484 (25.882)	Top-5 acc 46.484 (48.354)	lr 0.01702
Warmup Train [25][460/3239]	Time 0.147 (0.285)	Data 0.001 (0.047)	Loss 4.2858 (4.2762)	Top-1 acc 24.219 (25.872)	Top-5 acc 47.266 (48.355)	lr 0.01701
Warmup Train [25][470/3239]	Time 0.158 (0.283)	Data 0.001 (0.046)	Loss 4.1244 (4.2748)	Top-1 acc 26.172 (25.895)	Top-5 acc 51.562 (48.384)	lr 0.01700
Warmup Train [25][480/3239]	Time 0.196 (0.282)	Data 0.001 (0.046)	Loss 4.2824 (4.2751)	Top-1 acc 24.609 (25.866)	Top-5 acc 45.312 (48.370)	lr 0.01700
Warmup Train [25][490/3239]	Time 0.174 (0.281)	Data 0.001 (0.045)	Loss 4.1085 (4.2756)	Top-1 acc 27.734 (25.854)	Top-5 acc 53.125 (48.380)	lr 0.01699
Warmup Train [25][500/3239]	Time 0.227 (0.280)	Data 0.002 (0.044)	Loss 4.3674 (4.2745)	Top-1 acc 23.438 (25.856)	Top-5 acc 44.922 (48.391)	lr 0.01699
Warmup Train [25][510/3239]	Time 0.198 (0.279)	Data 0.001 (0.043)	Loss 4.3773 (4.2736)	Top-1 acc 25.000 (25.882)	Top-5 acc 46.484 (48.441)	lr 0.01698
Warmup Train [25][520/3239]	Time 0.222 (0.278)	Data 0.001 (0.042)	Loss 4.1741 (4.2721)	Top-1 acc 27.734 (25.906)	Top-5 acc 48.438 (48.463)	lr 0.01698
Warmup Train [25][530/3239]	Time 0.165 (0.277)	Data 0.001 (0.042)	Loss 4.1586 (4.2731)	Top-1 acc 25.781 (25.886)	Top-5 acc 50.000 (48.454)	lr 0.01697
Warmup Train [25][540/3239]	Time 0.207 (0.276)	Data 0.001 (0.041)	Loss 4.1552 (4.2723)	Top-1 acc 24.219 (25.894)	Top-5 acc 50.391 (48.458)	lr 0.01696
Warmup Train [25][550/3239]	Time 0.314 (0.275)	Data 0.001 (0.040)	Loss 4.2551 (4.2706)	Top-1 acc 25.781 (25.932)	Top-5 acc 48.438 (48.506)	lr 0.01696
Warmup Train [25][560/3239]	Time 0.177 (0.274)	Data 0.003 (0.040)	Loss 4.3272 (4.2704)	Top-1 acc 27.344 (25.948)	Top-5 acc 46.484 (48.519)	lr 0.01695
Warmup Train [25][570/3239]	Time 0.299 (0.273)	Data 0.001 (0.039)	Loss 4.2589 (4.2709)	Top-1 acc 21.484 (25.930)	Top-5 acc 48.438 (48.501)	lr 0.01695
Warmup Train [25][580/3239]	Time 0.270 (0.272)	Data 0.001 (0.038)	Loss 4.1734 (4.2709)	Top-1 acc 25.391 (25.926)	Top-5 acc 51.562 (48.493)	lr 0.01694
Warmup Train [25][590/3239]	Time 0.169 (0.272)	Data 0.001 (0.038)	Loss 4.2299 (4.2711)	Top-1 acc 26.172 (25.921)	Top-5 acc 52.734 (48.484)	lr 0.01694
Warmup Train [25][600/3239]	Time 0.279 (0.271)	Data 0.001 (0.037)	Loss 4.2121 (4.2709)	Top-1 acc 27.344 (25.917)	Top-5 acc 47.656 (48.478)	lr 0.01693
Warmup Train [25][610/3239]	Time 0.225 (0.270)	Data 0.001 (0.037)	Loss 4.3731 (4.2713)	Top-1 acc 25.000 (25.914)	Top-5 acc 46.875 (48.472)	lr 0.01692
Warmup Train [25][620/3239]	Time 0.215 (0.269)	Data 0.001 (0.036)	Loss 4.2419 (4.2721)	Top-1 acc 27.734 (25.896)	Top-5 acc 50.000 (48.451)	lr 0.01692
Warmup Train [25][630/3239]	Time 0.204 (0.268)	Data 0.001 (0.036)	Loss 4.3933 (4.2731)	Top-1 acc 23.828 (25.885)	Top-5 acc 45.312 (48.435)	lr 0.01691
Warmup Train [25][640/3239]	Time 0.203 (0.268)	Data 0.022 (0.035)	Loss 4.2404 (4.2728)	Top-1 acc 23.828 (25.891)	Top-5 acc 46.484 (48.435)	lr 0.01691
Warmup Train [25][650/3239]	Time 0.252 (0.267)	Data 0.001 (0.035)	Loss 4.3464 (4.2726)	Top-1 acc 21.875 (25.890)	Top-5 acc 45.703 (48.439)	lr 0.01690
Warmup Train [25][660/3239]	Time 0.282 (0.267)	Data 0.001 (0.034)	Loss 4.2762 (4.2724)	Top-1 acc 28.906 (25.885)	Top-5 acc 45.312 (48.441)	lr 0.01690
Warmup Train [25][670/3239]	Time 0.343 (0.266)	Data 0.001 (0.034)	Loss 4.3037 (4.2719)	Top-1 acc 26.953 (25.894)	Top-5 acc 47.656 (48.464)	lr 0.01689
Warmup Train [25][680/3239]	Time 0.211 (0.265)	Data 0.001 (0.033)	Loss 4.0845 (4.2718)	Top-1 acc 29.297 (25.886)	Top-5 acc 52.344 (48.463)	lr 0.01688
Warmup Train [25][690/3239]	Time 0.216 (0.265)	Data 0.025 (0.033)	Loss 4.2384 (4.2713)	Top-1 acc 27.344 (25.892)	Top-5 acc 50.391 (48.490)	lr 0.01688
Warmup Train [25][700/3239]	Time 0.233 (0.265)	Data 0.003 (0.032)	Loss 4.2455 (4.2709)	Top-1 acc 28.516 (25.890)	Top-5 acc 50.391 (48.495)	lr 0.01687
Warmup Train [25][710/3239]	Time 0.214 (0.264)	Data 0.001 (0.032)	Loss 4.3095 (4.2716)	Top-1 acc 28.516 (25.876)	Top-5 acc 49.219 (48.485)	lr 0.01687
Warmup Train [25][720/3239]	Time 0.192 (0.263)	Data 0.001 (0.031)	Loss 4.4090 (4.2721)	Top-1 acc 25.391 (25.868)	Top-5 acc 48.828 (48.486)	lr 0.01686
Warmup Train [25][730/3239]	Time 0.159 (0.263)	Data 0.001 (0.031)	Loss 4.2877 (4.2726)	Top-1 acc 21.875 (25.849)	Top-5 acc 45.312 (48.460)	lr 0.01686
Warmup Train [25][740/3239]	Time 0.198 (0.262)	Data 0.001 (0.031)	Loss 4.2344 (4.2723)	Top-1 acc 22.266 (25.852)	Top-5 acc 50.781 (48.484)	lr 0.01685
Warmup Train [25][750/3239]	Time 0.234 (0.262)	Data 0.001 (0.030)	Loss 4.1432 (4.2724)	Top-1 acc 31.250 (25.857)	Top-5 acc 51.562 (48.495)	lr 0.01684
Warmup Train [25][760/3239]	Time 0.237 (0.261)	Data 0.001 (0.030)	Loss 4.2924 (4.2722)	Top-1 acc 29.688 (25.874)	Top-5 acc 48.438 (48.492)	lr 0.01684
Warmup Train [25][770/3239]	Time 0.239 (0.261)	Data 0.001 (0.030)	Loss 4.2404 (4.2725)	Top-1 acc 25.391 (25.859)	Top-5 acc 47.266 (48.495)	lr 0.01683
Warmup Train [25][780/3239]	Time 0.293 (0.261)	Data 0.001 (0.029)	Loss 4.1435 (4.2726)	Top-1 acc 23.047 (25.848)	Top-5 acc 48.438 (48.491)	lr 0.01683
Warmup Train [25][790/3239]	Time 0.273 (0.260)	Data 0.001 (0.029)	Loss 4.0910 (4.2720)	Top-1 acc 28.516 (25.863)	Top-5 acc 53.125 (48.504)	lr 0.01682
Warmup Train [25][800/3239]	Time 0.174 (0.260)	Data 0.001 (0.029)	Loss 4.2717 (4.2720)	Top-1 acc 22.656 (25.852)	Top-5 acc 46.484 (48.486)	lr 0.01681
Warmup Train [25][810/3239]	Time 0.158 (0.259)	Data 0.001 (0.028)	Loss 4.2337 (4.2720)	Top-1 acc 28.906 (25.840)	Top-5 acc 48.047 (48.482)	lr 0.01681
Warmup Train [25][820/3239]	Time 0.202 (0.259)	Data 0.001 (0.028)	Loss 4.4013 (4.2720)	Top-1 acc 24.609 (25.847)	Top-5 acc 43.359 (48.479)	lr 0.01680
Warmup Train [25][830/3239]	Time 0.206 (0.258)	Data 0.001 (0.028)	Loss 4.3473 (4.2717)	Top-1 acc 21.484 (25.851)	Top-5 acc 45.312 (48.480)	lr 0.01680
Warmup Train [25][840/3239]	Time 0.159 (0.258)	Data 0.001 (0.027)	Loss 4.1421 (4.2717)	Top-1 acc 30.469 (25.860)	Top-5 acc 53.516 (48.485)	lr 0.01679
Warmup Train [25][850/3239]	Time 0.154 (0.258)	Data 0.001 (0.027)	Loss 3.9155 (4.2716)	Top-1 acc 30.859 (25.855)	Top-5 acc 57.422 (48.486)	lr 0.01679
Warmup Train [25][860/3239]	Time 0.202 (0.257)	Data 0.001 (0.027)	Loss 4.3810 (4.2716)	Top-1 acc 24.609 (25.832)	Top-5 acc 43.750 (48.481)	lr 0.01678
Warmup Train [25][870/3239]	Time 0.246 (0.257)	Data 0.001 (0.027)	Loss 4.3326 (4.2715)	Top-1 acc 27.344 (25.848)	Top-5 acc 48.047 (48.485)	lr 0.01677
Warmup Train [25][880/3239]	Time 0.299 (0.257)	Data 0.001 (0.026)	Loss 4.2100 (4.2712)	Top-1 acc 26.953 (25.844)	Top-5 acc 54.688 (48.500)	lr 0.01677
Warmup Train [25][890/3239]	Time 0.192 (0.256)	Data 0.002 (0.026)	Loss 4.3260 (4.2711)	Top-1 acc 26.953 (25.838)	Top-5 acc 49.609 (48.493)	lr 0.01676
Warmup Train [25][900/3239]	Time 0.163 (0.256)	Data 0.001 (0.026)	Loss 4.1418 (4.2705)	Top-1 acc 21.875 (25.839)	Top-5 acc 53.906 (48.511)	lr 0.01676
Warmup Train [25][910/3239]	Time 0.209 (0.256)	Data 0.001 (0.026)	Loss 4.4668 (4.2702)	Top-1 acc 21.484 (25.841)	Top-5 acc 43.359 (48.516)	lr 0.01675
Warmup Train [25][920/3239]	Time 0.224 (0.255)	Data 0.001 (0.025)	Loss 4.2847 (4.2699)	Top-1 acc 24.609 (25.845)	Top-5 acc 47.266 (48.523)	lr 0.01675
Warmup Train [25][930/3239]	Time 0.242 (0.255)	Data 0.001 (0.025)	Loss 4.3172 (4.2699)	Top-1 acc 25.391 (25.843)	Top-5 acc 46.875 (48.511)	lr 0.01674
Warmup Train [25][940/3239]	Time 0.192 (0.254)	Data 0.002 (0.025)	Loss 4.3572 (4.2696)	Top-1 acc 19.922 (25.851)	Top-5 acc 50.000 (48.530)	lr 0.01673
Warmup Train [25][950/3239]	Time 0.214 (0.254)	Data 0.001 (0.025)	Loss 4.4166 (4.2697)	Top-1 acc 25.781 (25.856)	Top-5 acc 43.750 (48.523)	lr 0.01673
Warmup Train [25][960/3239]	Time 0.264 (0.254)	Data 0.001 (0.024)	Loss 4.2711 (4.2688)	Top-1 acc 25.391 (25.872)	Top-5 acc 49.609 (48.542)	lr 0.01672
Warmup Train [25][970/3239]	Time 0.246 (0.253)	Data 0.001 (0.024)	Loss 4.0276 (4.2682)	Top-1 acc 25.391 (25.892)	Top-5 acc 53.516 (48.565)	lr 0.01672
Warmup Train [25][980/3239]	Time 0.202 (0.253)	Data 0.001 (0.024)	Loss 4.1192 (4.2681)	Top-1 acc 30.078 (25.897)	Top-5 acc 51.562 (48.568)	lr 0.01671
Warmup Train [25][990/3239]	Time 0.311 (0.253)	Data 0.001 (0.024)	Loss 4.0883 (4.2672)	Top-1 acc 30.078 (25.909)	Top-5 acc 50.391 (48.583)	lr 0.01671
Warmup Train [25][1000/3239]	Time 0.162 (0.253)	Data 0.001 (0.024)	Loss 4.2476 (4.2672)	Top-1 acc 29.688 (25.900)	Top-5 acc 48.047 (48.583)	lr 0.01670
Warmup Train [25][1010/3239]	Time 0.213 (0.252)	Data 0.001 (0.023)	Loss 4.1954 (4.2673)	Top-1 acc 27.344 (25.906)	Top-5 acc 49.609 (48.579)	lr 0.01669
Warmup Train [25][1020/3239]	Time 0.260 (0.252)	Data 0.002 (0.023)	Loss 4.0455 (4.2671)	Top-1 acc 32.031 (25.909)	Top-5 acc 51.562 (48.576)	lr 0.01669
Warmup Train [25][1030/3239]	Time 0.218 (0.251)	Data 0.001 (0.023)	Loss 3.9347 (4.2673)	Top-1 acc 37.109 (25.917)	Top-5 acc 55.469 (48.574)	lr 0.01668
Warmup Train [25][1040/3239]	Time 0.159 (0.251)	Data 0.003 (0.023)	Loss 4.4739 (4.2677)	Top-1 acc 25.781 (25.919)	Top-5 acc 44.141 (48.562)	lr 0.01668
Warmup Train [25][1050/3239]	Time 0.208 (0.251)	Data 0.001 (0.023)	Loss 4.4560 (4.2678)	Top-1 acc 24.219 (25.915)	Top-5 acc 41.406 (48.554)	lr 0.01667
Warmup Train [25][1060/3239]	Time 0.222 (0.251)	Data 0.001 (0.022)	Loss 4.4183 (4.2673)	Top-1 acc 25.781 (25.924)	Top-5 acc 46.875 (48.570)	lr 0.01667
Warmup Train [25][1070/3239]	Time 0.251 (0.250)	Data 0.001 (0.022)	Loss 4.3796 (4.2677)	Top-1 acc 26.172 (25.913)	Top-5 acc 47.266 (48.562)	lr 0.01666
Warmup Train [25][1080/3239]	Time 0.193 (0.250)	Data 0.002 (0.022)	Loss 4.3262 (4.2676)	Top-1 acc 26.562 (25.923)	Top-5 acc 47.656 (48.569)	lr 0.01665
Warmup Train [25][1090/3239]	Time 0.239 (0.250)	Data 0.001 (0.022)	Loss 4.3006 (4.2683)	Top-1 acc 27.734 (25.911)	Top-5 acc 47.656 (48.562)	lr 0.01665
Warmup Train [25][1100/3239]	Time 0.232 (0.249)	Data 0.002 (0.022)	Loss 4.3522 (4.2684)	Top-1 acc 25.781 (25.909)	Top-5 acc 41.797 (48.557)	lr 0.01664
Warmup Train [25][1110/3239]	Time 0.329 (0.249)	Data 0.022 (0.022)	Loss 4.1853 (4.2680)	Top-1 acc 23.828 (25.917)	Top-5 acc 50.000 (48.557)	lr 0.01664
Warmup Train [25][1120/3239]	Time 0.248 (0.249)	Data 0.001 (0.021)	Loss 4.1981 (4.2676)	Top-1 acc 29.688 (25.919)	Top-5 acc 47.656 (48.561)	lr 0.01663
Warmup Train [25][1130/3239]	Time 0.193 (0.249)	Data 0.001 (0.021)	Loss 4.4041 (4.2677)	Top-1 acc 24.609 (25.910)	Top-5 acc 46.875 (48.557)	lr 0.01663
Warmup Train [25][1140/3239]	Time 0.243 (0.249)	Data 0.001 (0.021)	Loss 4.2916 (4.2680)	Top-1 acc 26.562 (25.895)	Top-5 acc 48.828 (48.548)	lr 0.01662
Warmup Train [25][1150/3239]	Time 0.183 (0.248)	Data 0.001 (0.021)	Loss 4.2022 (4.2677)	Top-1 acc 29.297 (25.906)	Top-5 acc 51.172 (48.555)	lr 0.01661
Warmup Train [25][1160/3239]	Time 0.197 (0.248)	Data 0.001 (0.021)	Loss 4.1035 (4.2675)	Top-1 acc 29.688 (25.903)	Top-5 acc 50.391 (48.553)	lr 0.01661
Warmup Train [25][1170/3239]	Time 0.156 (0.248)	Data 0.002 (0.021)	Loss 4.3408 (4.2675)	Top-1 acc 23.828 (25.914)	Top-5 acc 46.875 (48.554)	lr 0.01660
Warmup Train [25][1180/3239]	Time 0.168 (0.248)	Data 0.001 (0.020)	Loss 4.4866 (4.2674)	Top-1 acc 23.438 (25.916)	Top-5 acc 40.234 (48.551)	lr 0.01660
Warmup Train [25][1190/3239]	Time 0.210 (0.248)	Data 0.002 (0.020)	Loss 4.1780 (4.2676)	Top-1 acc 28.125 (25.909)	Top-5 acc 50.391 (48.544)	lr 0.01659
Warmup Train [25][1200/3239]	Time 0.199 (0.247)	Data 0.001 (0.020)	Loss 4.2044 (4.2674)	Top-1 acc 24.609 (25.914)	Top-5 acc 47.266 (48.553)	lr 0.01659
Warmup Train [25][1210/3239]	Time 0.274 (0.247)	Data 0.001 (0.020)	Loss 4.3322 (4.2673)	Top-1 acc 23.438 (25.910)	Top-5 acc 47.266 (48.551)	lr 0.01658
Warmup Train [25][1220/3239]	Time 0.313 (0.247)	Data 0.001 (0.020)	Loss 4.3526 (4.2672)	Top-1 acc 26.172 (25.911)	Top-5 acc 50.000 (48.559)	lr 0.01657
Warmup Train [25][1230/3239]	Time 0.202 (0.247)	Data 0.001 (0.020)	Loss 4.1415 (4.2668)	Top-1 acc 26.562 (25.921)	Top-5 acc 48.828 (48.573)	lr 0.01657
Warmup Train [25][1240/3239]	Time 0.207 (0.247)	Data 0.001 (0.020)	Loss 4.3219 (4.2663)	Top-1 acc 19.531 (25.921)	Top-5 acc 46.094 (48.584)	lr 0.01656
Warmup Train [25][1250/3239]	Time 0.186 (0.246)	Data 0.001 (0.019)	Loss 4.2064 (4.2661)	Top-1 acc 28.906 (25.922)	Top-5 acc 50.781 (48.594)	lr 0.01656
Warmup Train [25][1260/3239]	Time 0.228 (0.246)	Data 0.001 (0.019)	Loss 4.2990 (4.2660)	Top-1 acc 28.516 (25.933)	Top-5 acc 48.047 (48.594)	lr 0.01655
Warmup Train [25][1270/3239]	Time 0.184 (0.246)	Data 0.001 (0.019)	Loss 4.4832 (4.2663)	Top-1 acc 21.094 (25.932)	Top-5 acc 45.312 (48.600)	lr 0.01655
Warmup Train [25][1280/3239]	Time 0.189 (0.246)	Data 0.001 (0.019)	Loss 4.1860 (4.2663)	Top-1 acc 26.562 (25.932)	Top-5 acc 52.734 (48.599)	lr 0.01654
Warmup Train [25][1290/3239]	Time 0.197 (0.246)	Data 0.001 (0.019)	Loss 4.2250 (4.2662)	Top-1 acc 23.438 (25.939)	Top-5 acc 48.438 (48.597)	lr 0.01653
Warmup Train [25][1300/3239]	Time 0.221 (0.245)	Data 0.001 (0.019)	Loss 4.4792 (4.2664)	Top-1 acc 22.266 (25.932)	Top-5 acc 40.625 (48.600)	lr 0.01653
Warmup Train [25][1310/3239]	Time 0.236 (0.245)	Data 0.001 (0.019)	Loss 4.4131 (4.2664)	Top-1 acc 21.484 (25.922)	Top-5 acc 46.875 (48.593)	lr 0.01652
Warmup Train [25][1320/3239]	Time 0.228 (0.245)	Data 0.001 (0.019)	Loss 4.0800 (4.2663)	Top-1 acc 30.469 (25.930)	Top-5 acc 50.781 (48.597)	lr 0.01652
Warmup Train [25][1330/3239]	Time 0.361 (0.245)	Data 0.002 (0.018)	Loss 4.3917 (4.2664)	Top-1 acc 24.609 (25.929)	Top-5 acc 44.531 (48.603)	lr 0.01651
Warmup Train [25][1340/3239]	Time 0.151 (0.245)	Data 0.001 (0.018)	Loss 4.1743 (4.2661)	Top-1 acc 28.125 (25.932)	Top-5 acc 49.609 (48.597)	lr 0.01651
Warmup Train [25][1350/3239]	Time 0.222 (0.245)	Data 0.001 (0.018)	Loss 4.2976 (4.2656)	Top-1 acc 25.781 (25.945)	Top-5 acc 48.828 (48.604)	lr 0.01650
Warmup Train [25][1360/3239]	Time 0.232 (0.245)	Data 0.001 (0.018)	Loss 4.3948 (4.2652)	Top-1 acc 24.219 (25.956)	Top-5 acc 46.484 (48.615)	lr 0.01650
Warmup Train [25][1370/3239]	Time 0.265 (0.245)	Data 0.001 (0.018)	Loss 4.2907 (4.2655)	Top-1 acc 26.172 (25.956)	Top-5 acc 47.656 (48.604)	lr 0.01649
Warmup Train [25][1380/3239]	Time 0.180 (0.244)	Data 0.002 (0.018)	Loss 4.3425 (4.2657)	Top-1 acc 25.000 (25.960)	Top-5 acc 47.266 (48.592)	lr 0.01648
Warmup Train [25][1390/3239]	Time 0.160 (0.244)	Data 0.001 (0.018)	Loss 4.1789 (4.2661)	Top-1 acc 25.781 (25.948)	Top-5 acc 49.609 (48.578)	lr 0.01648
Warmup Train [25][1400/3239]	Time 0.179 (0.244)	Data 0.001 (0.018)	Loss 4.1716 (4.2660)	Top-1 acc 26.172 (25.949)	Top-5 acc 50.000 (48.579)	lr 0.01647
Warmup Train [25][1410/3239]	Time 0.182 (0.244)	Data 0.002 (0.018)	Loss 4.3497 (4.2661)	Top-1 acc 27.344 (25.952)	Top-5 acc 48.047 (48.579)	lr 0.01647
Warmup Train [25][1420/3239]	Time 0.250 (0.244)	Data 0.001 (0.017)	Loss 4.3452 (4.2656)	Top-1 acc 23.047 (25.964)	Top-5 acc 47.266 (48.593)	lr 0.01646
Warmup Train [25][1430/3239]	Time 0.430 (0.244)	Data 0.001 (0.017)	Loss 4.3268 (4.2653)	Top-1 acc 24.609 (25.973)	Top-5 acc 47.656 (48.600)	lr 0.01646
Warmup Train [25][1440/3239]	Time 0.169 (0.244)	Data 0.001 (0.017)	Loss 4.3452 (4.2653)	Top-1 acc 25.781 (25.973)	Top-5 acc 47.656 (48.602)	lr 0.01645
Warmup Train [25][1450/3239]	Time 0.164 (0.243)	Data 0.001 (0.017)	Loss 4.3221 (4.2654)	Top-1 acc 25.391 (25.969)	Top-5 acc 51.953 (48.597)	lr 0.01644
Warmup Train [25][1460/3239]	Time 0.234 (0.243)	Data 0.002 (0.017)	Loss 4.1709 (4.2653)	Top-1 acc 30.078 (25.979)	Top-5 acc 50.391 (48.594)	lr 0.01644
Warmup Train [25][1470/3239]	Time 0.221 (0.243)	Data 0.001 (0.017)	Loss 4.2943 (4.2655)	Top-1 acc 26.172 (25.979)	Top-5 acc 46.484 (48.585)	lr 0.01643
Warmup Train [25][1480/3239]	Time 0.200 (0.243)	Data 0.001 (0.017)	Loss 4.3487 (4.2655)	Top-1 acc 25.000 (25.984)	Top-5 acc 44.922 (48.585)	lr 0.01643
Warmup Train [25][1490/3239]	Time 0.150 (0.243)	Data 0.001 (0.017)	Loss 4.1840 (4.2655)	Top-1 acc 27.344 (25.980)	Top-5 acc 48.047 (48.580)	lr 0.01642
Warmup Train [25][1500/3239]	Time 0.154 (0.243)	Data 0.001 (0.017)	Loss 4.3624 (4.2658)	Top-1 acc 23.047 (25.968)	Top-5 acc 47.266 (48.565)	lr 0.01642
Warmup Train [25][1510/3239]	Time 0.214 (0.242)	Data 0.001 (0.017)	Loss 4.2092 (4.2659)	Top-1 acc 26.562 (25.957)	Top-5 acc 48.828 (48.563)	lr 0.01641
Warmup Train [25][1520/3239]	Time 0.209 (0.242)	Data 0.002 (0.017)	Loss 4.2706 (4.2657)	Top-1 acc 25.781 (25.962)	Top-5 acc 47.266 (48.570)	lr 0.01640
Warmup Train [25][1530/3239]	Time 0.227 (0.242)	Data 0.002 (0.016)	Loss 4.2441 (4.2658)	Top-1 acc 26.953 (25.956)	Top-5 acc 50.781 (48.569)	lr 0.01640
Warmup Train [25][1540/3239]	Time 0.383 (0.242)	Data 0.002 (0.016)	Loss 4.3151 (4.2661)	Top-1 acc 25.781 (25.953)	Top-5 acc 46.094 (48.563)	lr 0.01639
Warmup Train [25][1550/3239]	Time 0.324 (0.242)	Data 0.001 (0.016)	Loss 4.3304 (4.2659)	Top-1 acc 23.438 (25.950)	Top-5 acc 48.828 (48.571)	lr 0.01639
Warmup Train [25][1560/3239]	Time 0.165 (0.242)	Data 0.001 (0.016)	Loss 4.4782 (4.2662)	Top-1 acc 20.703 (25.947)	Top-5 acc 41.797 (48.556)	lr 0.01638
Warmup Train [25][1570/3239]	Time 0.257 (0.242)	Data 0.001 (0.016)	Loss 4.0391 (4.2660)	Top-1 acc 30.469 (25.953)	Top-5 acc 53.516 (48.562)	lr 0.01638
Warmup Train [25][1580/3239]	Time 0.237 (0.242)	Data 0.001 (0.016)	Loss 4.3115 (4.2664)	Top-1 acc 24.609 (25.939)	Top-5 acc 46.875 (48.548)	lr 0.01637
Warmup Train [25][1590/3239]	Time 0.176 (0.242)	Data 0.002 (0.016)	Loss 4.4268 (4.2664)	Top-1 acc 23.438 (25.943)	Top-5 acc 44.141 (48.552)	lr 0.01636
Warmup Train [25][1600/3239]	Time 0.221 (0.242)	Data 0.001 (0.016)	Loss 4.1183 (4.2663)	Top-1 acc 29.297 (25.946)	Top-5 acc 54.688 (48.563)	lr 0.01636
Warmup Train [25][1610/3239]	Time 0.202 (0.241)	Data 0.001 (0.016)	Loss 4.2445 (4.2665)	Top-1 acc 28.906 (25.947)	Top-5 acc 50.781 (48.561)	lr 0.01635
Warmup Train [25][1620/3239]	Time 0.248 (0.241)	Data 0.001 (0.016)	Loss 4.3302 (4.2663)	Top-1 acc 25.391 (25.954)	Top-5 acc 45.703 (48.571)	lr 0.01635
Warmup Train [25][1630/3239]	Time 0.185 (0.241)	Data 0.001 (0.016)	Loss 4.2452 (4.2662)	Top-1 acc 28.516 (25.961)	Top-5 acc 50.391 (48.575)	lr 0.01634
Warmup Train [25][1640/3239]	Time 0.189 (0.241)	Data 0.001 (0.016)	Loss 4.1686 (4.2660)	Top-1 acc 27.734 (25.962)	Top-5 acc 51.172 (48.581)	lr 0.01634
Warmup Train [25][1650/3239]	Time 0.364 (0.241)	Data 0.001 (0.016)	Loss 4.1697 (4.2661)	Top-1 acc 25.000 (25.962)	Top-5 acc 50.000 (48.580)	lr 0.01633
Warmup Train [25][1660/3239]	Time 0.239 (0.241)	Data 0.003 (0.015)	Loss 4.0940 (4.2662)	Top-1 acc 28.125 (25.958)	Top-5 acc 53.125 (48.579)	lr 0.01632
Warmup Train [25][1670/3239]	Time 0.242 (0.241)	Data 0.001 (0.015)	Loss 4.3147 (4.2660)	Top-1 acc 24.219 (25.959)	Top-5 acc 49.219 (48.581)	lr 0.01632
Warmup Train [25][1680/3239]	Time 0.248 (0.241)	Data 0.001 (0.015)	Loss 4.2786 (4.2658)	Top-1 acc 25.000 (25.968)	Top-5 acc 48.047 (48.586)	lr 0.01631
Warmup Train [25][1690/3239]	Time 0.249 (0.241)	Data 0.001 (0.015)	Loss 3.9205 (4.2662)	Top-1 acc 33.203 (25.957)	Top-5 acc 54.297 (48.575)	lr 0.01631
Warmup Train [25][1700/3239]	Time 0.182 (0.241)	Data 0.002 (0.015)	Loss 4.1310 (4.2661)	Top-1 acc 30.078 (25.959)	Top-5 acc 46.094 (48.579)	lr 0.01630
Warmup Train [25][1710/3239]	Time 0.211 (0.241)	Data 0.002 (0.015)	Loss 4.1564 (4.2660)	Top-1 acc 28.516 (25.962)	Top-5 acc 50.391 (48.581)	lr 0.01630
Warmup Train [25][1720/3239]	Time 0.244 (0.241)	Data 0.001 (0.015)	Loss 4.3172 (4.2656)	Top-1 acc 23.047 (25.966)	Top-5 acc 47.656 (48.591)	lr 0.01629
Warmup Train [25][1730/3239]	Time 0.212 (0.240)	Data 0.001 (0.015)	Loss 4.2278 (4.2654)	Top-1 acc 25.781 (25.965)	Top-5 acc 48.047 (48.594)	lr 0.01628
Warmup Train [25][1740/3239]	Time 0.272 (0.240)	Data 0.001 (0.015)	Loss 4.3616 (4.2654)	Top-1 acc 30.469 (25.971)	Top-5 acc 48.438 (48.596)	lr 0.01628
Warmup Train [25][1750/3239]	Time 0.207 (0.240)	Data 0.001 (0.015)	Loss 4.2914 (4.2657)	Top-1 acc 22.266 (25.963)	Top-5 acc 47.656 (48.592)	lr 0.01627
Warmup Train [25][1760/3239]	Time 0.216 (0.240)	Data 0.002 (0.015)	Loss 4.4378 (4.2658)	Top-1 acc 19.922 (25.954)	Top-5 acc 44.922 (48.590)	lr 0.01627
Warmup Train [25][1770/3239]	Time 0.361 (0.240)	Data 0.001 (0.015)	Loss 4.2703 (4.2659)	Top-1 acc 23.828 (25.954)	Top-5 acc 51.953 (48.592)	lr 0.01626
Warmup Train [25][1780/3239]	Time 0.348 (0.240)	Data 0.001 (0.015)	Loss 4.2736 (4.2659)	Top-1 acc 26.562 (25.958)	Top-5 acc 48.047 (48.591)	lr 0.01626
Warmup Train [25][1790/3239]	Time 0.253 (0.240)	Data 0.001 (0.014)	Loss 4.2376 (4.2657)	Top-1 acc 25.781 (25.960)	Top-5 acc 49.609 (48.595)	lr 0.01625
Warmup Train [25][1800/3239]	Time 0.186 (0.240)	Data 0.002 (0.014)	Loss 4.2738 (4.2653)	Top-1 acc 25.781 (25.966)	Top-5 acc 48.828 (48.602)	lr 0.01624
Warmup Train [25][1810/3239]	Time 0.234 (0.240)	Data 0.001 (0.014)	Loss 3.9692 (4.2648)	Top-1 acc 29.688 (25.978)	Top-5 acc 56.641 (48.617)	lr 0.01624
Warmup Train [25][1820/3239]	Time 0.152 (0.239)	Data 0.001 (0.014)	Loss 4.3470 (4.2650)	Top-1 acc 23.438 (25.976)	Top-5 acc 46.875 (48.617)	lr 0.01623
Warmup Train [25][1830/3239]	Time 0.223 (0.239)	Data 0.001 (0.014)	Loss 4.0918 (4.2653)	Top-1 acc 30.859 (25.975)	Top-5 acc 53.516 (48.613)	lr 0.01623
Warmup Train [25][1840/3239]	Time 0.162 (0.239)	Data 0.001 (0.014)	Loss 4.4178 (4.2655)	Top-1 acc 22.266 (25.972)	Top-5 acc 42.578 (48.613)	lr 0.01622
Warmup Train [25][1850/3239]	Time 0.140 (0.239)	Data 0.001 (0.014)	Loss 4.1976 (4.2655)	Top-1 acc 26.562 (25.970)	Top-5 acc 47.656 (48.609)	lr 0.01622
Warmup Train [25][1860/3239]	Time 0.185 (0.239)	Data 0.001 (0.014)	Loss 4.2422 (4.2655)	Top-1 acc 26.562 (25.966)	Top-5 acc 50.781 (48.606)	lr 0.01621
Warmup Train [25][1870/3239]	Time 0.270 (0.239)	Data 0.001 (0.014)	Loss 4.3478 (4.2655)	Top-1 acc 23.828 (25.968)	Top-5 acc 47.266 (48.606)	lr 0.01621
Warmup Train [25][1880/3239]	Time 0.185 (0.239)	Data 0.001 (0.014)	Loss 4.2889 (4.2654)	Top-1 acc 22.656 (25.967)	Top-5 acc 44.922 (48.609)	lr 0.01620
Warmup Train [25][1890/3239]	Time 0.394 (0.239)	Data 0.001 (0.014)	Loss 4.3301 (4.2654)	Top-1 acc 25.391 (25.967)	Top-5 acc 42.969 (48.599)	lr 0.01619
Warmup Train [25][1900/3239]	Time 0.176 (0.239)	Data 0.002 (0.014)	Loss 4.4501 (4.2653)	Top-1 acc 25.000 (25.965)	Top-5 acc 46.094 (48.600)	lr 0.01619
Warmup Train [25][1910/3239]	Time 0.142 (0.239)	Data 0.001 (0.014)	Loss 4.1605 (4.2652)	Top-1 acc 27.344 (25.972)	Top-5 acc 55.469 (48.605)	lr 0.01618
Warmup Train [25][1920/3239]	Time 0.203 (0.239)	Data 0.001 (0.014)	Loss 4.1062 (4.2653)	Top-1 acc 27.734 (25.974)	Top-5 acc 55.078 (48.602)	lr 0.01618
Warmup Train [25][1930/3239]	Time 0.221 (0.239)	Data 0.001 (0.014)	Loss 4.2891 (4.2652)	Top-1 acc 27.344 (25.976)	Top-5 acc 49.609 (48.607)	lr 0.01617
Warmup Train [25][1940/3239]	Time 0.214 (0.239)	Data 0.001 (0.014)	Loss 4.1374 (4.2651)	Top-1 acc 28.906 (25.979)	Top-5 acc 53.516 (48.611)	lr 0.01617
Warmup Train [25][1950/3239]	Time 0.238 (0.239)	Data 0.002 (0.014)	Loss 4.2260 (4.2651)	Top-1 acc 27.734 (25.980)	Top-5 acc 47.656 (48.612)	lr 0.01616
Warmup Train [25][1960/3239]	Time 0.202 (0.239)	Data 0.001 (0.013)	Loss 4.3181 (4.2649)	Top-1 acc 22.656 (25.981)	Top-5 acc 44.922 (48.619)	lr 0.01615
Warmup Train [25][1970/3239]	Time 0.269 (0.239)	Data 0.001 (0.013)	Loss 4.3374 (4.2652)	Top-1 acc 26.172 (25.977)	Top-5 acc 46.484 (48.611)	lr 0.01615
Warmup Train [25][1980/3239]	Time 0.187 (0.238)	Data 0.001 (0.013)	Loss 4.2738 (4.2652)	Top-1 acc 26.953 (25.973)	Top-5 acc 48.438 (48.615)	lr 0.01614
Warmup Train [25][1990/3239]	Time 0.287 (0.238)	Data 0.001 (0.013)	Loss 4.3138 (4.2653)	Top-1 acc 28.125 (25.970)	Top-5 acc 46.875 (48.613)	lr 0.01614
Warmup Train [25][2000/3239]	Time 0.213 (0.238)	Data 0.001 (0.013)	Loss 4.4147 (4.2656)	Top-1 acc 23.047 (25.967)	Top-5 acc 44.141 (48.605)	lr 0.01613
Warmup Train [25][2010/3239]	Time 0.252 (0.238)	Data 0.004 (0.013)	Loss 4.4054 (4.2657)	Top-1 acc 24.609 (25.963)	Top-5 acc 44.922 (48.598)	lr 0.01613
Warmup Train [25][2020/3239]	Time 0.188 (0.238)	Data 0.001 (0.013)	Loss 4.4043 (4.2658)	Top-1 acc 21.484 (25.961)	Top-5 acc 45.312 (48.593)	lr 0.01612
Warmup Train [25][2030/3239]	Time 0.190 (0.238)	Data 0.001 (0.013)	Loss 4.1823 (4.2661)	Top-1 acc 23.828 (25.955)	Top-5 acc 51.172 (48.588)	lr 0.01611
Warmup Train [25][2040/3239]	Time 0.173 (0.238)	Data 0.001 (0.013)	Loss 4.2024 (4.2659)	Top-1 acc 27.734 (25.963)	Top-5 acc 49.609 (48.593)	lr 0.01611
Warmup Train [25][2050/3239]	Time 0.221 (0.238)	Data 0.001 (0.013)	Loss 4.1708 (4.2658)	Top-1 acc 26.953 (25.968)	Top-5 acc 51.172 (48.595)	lr 0.01610
Warmup Train [25][2060/3239]	Time 0.212 (0.238)	Data 0.001 (0.013)	Loss 4.3263 (4.2659)	Top-1 acc 24.219 (25.966)	Top-5 acc 42.578 (48.593)	lr 0.01610
Warmup Train [25][2070/3239]	Time 0.273 (0.238)	Data 0.001 (0.013)	Loss 4.1179 (4.2657)	Top-1 acc 28.516 (25.969)	Top-5 acc 51.953 (48.600)	lr 0.01609
Warmup Train [25][2080/3239]	Time 0.246 (0.238)	Data 0.001 (0.013)	Loss 4.1448 (4.2657)	Top-1 acc 31.250 (25.970)	Top-5 acc 51.562 (48.600)	lr 0.01609
Warmup Train [25][2090/3239]	Time 0.334 (0.238)	Data 0.001 (0.013)	Loss 4.2995 (4.2657)	Top-1 acc 25.000 (25.970)	Top-5 acc 50.000 (48.603)	lr 0.01608
Warmup Train [25][2100/3239]	Time 0.186 (0.238)	Data 0.001 (0.013)	Loss 4.2921 (4.2657)	Top-1 acc 25.781 (25.970)	Top-5 acc 44.531 (48.601)	lr 0.01607
Warmup Train [25][2110/3239]	Time 0.206 (0.238)	Data 0.001 (0.013)	Loss 4.4258 (4.2658)	Top-1 acc 23.438 (25.967)	Top-5 acc 45.703 (48.599)	lr 0.01607
Warmup Train [25][2120/3239]	Time 0.252 (0.237)	Data 0.001 (0.013)	Loss 4.1388 (4.2654)	Top-1 acc 30.469 (25.974)	Top-5 acc 49.609 (48.607)	lr 0.01606
Warmup Train [25][2130/3239]	Time 0.225 (0.237)	Data 0.001 (0.013)	Loss 4.2211 (4.2652)	Top-1 acc 27.734 (25.977)	Top-5 acc 50.781 (48.612)	lr 0.01606
Warmup Train [25][2140/3239]	Time 0.211 (0.237)	Data 0.001 (0.013)	Loss 4.1031 (4.2651)	Top-1 acc 28.906 (25.979)	Top-5 acc 54.688 (48.619)	lr 0.01605
Warmup Train [25][2150/3239]	Time 0.164 (0.237)	Data 0.001 (0.013)	Loss 4.3042 (4.2651)	Top-1 acc 23.828 (25.980)	Top-5 acc 48.828 (48.621)	lr 0.01605
Warmup Train [25][2160/3239]	Time 0.248 (0.237)	Data 0.001 (0.012)	Loss 4.2064 (4.2650)	Top-1 acc 25.781 (25.978)	Top-5 acc 51.953 (48.622)	lr 0.01604
Warmup Train [25][2170/3239]	Time 0.225 (0.237)	Data 0.001 (0.012)	Loss 4.2425 (4.2650)	Top-1 acc 28.125 (25.983)	Top-5 acc 51.172 (48.620)	lr 0.01604
Warmup Train [25][2180/3239]	Time 0.167 (0.237)	Data 0.001 (0.012)	Loss 3.9937 (4.2650)	Top-1 acc 33.594 (25.989)	Top-5 acc 51.562 (48.622)	lr 0.01603
Warmup Train [25][2190/3239]	Time 0.192 (0.237)	Data 0.001 (0.012)	Loss 4.2994 (4.2654)	Top-1 acc 25.781 (25.983)	Top-5 acc 46.875 (48.617)	lr 0.01602
Warmup Train [25][2200/3239]	Time 0.286 (0.237)	Data 0.001 (0.012)	Loss 3.9801 (4.2651)	Top-1 acc 31.250 (25.986)	Top-5 acc 51.953 (48.620)	lr 0.01602
Warmup Train [25][2210/3239]	Time 0.310 (0.237)	Data 0.001 (0.012)	Loss 4.3199 (4.2650)	Top-1 acc 25.000 (25.986)	Top-5 acc 46.484 (48.627)	lr 0.01601
Warmup Train [25][2220/3239]	Time 0.226 (0.237)	Data 0.001 (0.012)	Loss 4.2122 (4.2650)	Top-1 acc 31.641 (25.989)	Top-5 acc 54.688 (48.628)	lr 0.01601
Warmup Train [25][2230/3239]	Time 0.134 (0.237)	Data 0.002 (0.012)	Loss 4.2294 (4.2650)	Top-1 acc 26.562 (25.991)	Top-5 acc 45.312 (48.625)	lr 0.01600
Warmup Train [25][2240/3239]	Time 0.191 (0.237)	Data 0.001 (0.012)	Loss 4.5542 (4.2651)	Top-1 acc 22.266 (25.987)	Top-5 acc 44.531 (48.620)	lr 0.01600
Warmup Train [25][2250/3239]	Time 0.217 (0.237)	Data 0.001 (0.012)	Loss 4.1758 (4.2651)	Top-1 acc 28.906 (25.988)	Top-5 acc 50.391 (48.626)	lr 0.01599
Warmup Train [25][2260/3239]	Time 0.189 (0.237)	Data 0.001 (0.012)	Loss 4.2219 (4.2651)	Top-1 acc 25.000 (25.986)	Top-5 acc 49.609 (48.624)	lr 0.01598
Warmup Train [25][2270/3239]	Time 0.197 (0.237)	Data 0.001 (0.012)	Loss 4.1473 (4.2651)	Top-1 acc 28.516 (25.985)	Top-5 acc 51.953 (48.627)	lr 0.01598
Warmup Train [25][2280/3239]	Time 0.172 (0.237)	Data 0.002 (0.012)	Loss 4.1727 (4.2651)	Top-1 acc 27.344 (25.985)	Top-5 acc 52.344 (48.627)	lr 0.01597
Warmup Train [25][2290/3239]	Time 0.129 (0.237)	Data 0.001 (0.012)	Loss 4.2826 (4.2652)	Top-1 acc 25.000 (25.983)	Top-5 acc 50.781 (48.626)	lr 0.01597
Warmup Train [25][2300/3239]	Time 0.177 (0.237)	Data 0.001 (0.012)	Loss 4.2699 (4.2652)	Top-1 acc 28.516 (25.983)	Top-5 acc 49.609 (48.625)	lr 0.01596
Warmup Train [25][2310/3239]	Time 0.282 (0.236)	Data 0.001 (0.012)	Loss 4.1163 (4.2652)	Top-1 acc 30.078 (25.981)	Top-5 acc 50.000 (48.626)	lr 0.01596
Warmup Train [25][2320/3239]	Time 0.156 (0.236)	Data 0.001 (0.012)	Loss 4.3183 (4.2651)	Top-1 acc 26.953 (25.986)	Top-5 acc 48.828 (48.628)	lr 0.01595
Warmup Train [25][2330/3239]	Time 0.274 (0.236)	Data 0.002 (0.012)	Loss 4.3420 (4.2651)	Top-1 acc 26.953 (25.988)	Top-5 acc 46.094 (48.631)	lr 0.01594
Warmup Train [25][2340/3239]	Time 0.152 (0.236)	Data 0.002 (0.012)	Loss 4.3888 (4.2652)	Top-1 acc 23.828 (25.985)	Top-5 acc 44.922 (48.623)	lr 0.01594
Warmup Train [25][2350/3239]	Time 0.205 (0.236)	Data 0.001 (0.012)	Loss 4.2318 (4.2652)	Top-1 acc 24.609 (25.986)	Top-5 acc 52.734 (48.624)	lr 0.01593
Warmup Train [25][2360/3239]	Time 0.225 (0.236)	Data 0.001 (0.012)	Loss 4.1235 (4.2654)	Top-1 acc 25.781 (25.982)	Top-5 acc 47.656 (48.613)	lr 0.01593
Warmup Train [25][2370/3239]	Time 0.270 (0.236)	Data 0.001 (0.012)	Loss 4.2358 (4.2655)	Top-1 acc 27.344 (25.977)	Top-5 acc 48.828 (48.608)	lr 0.01592
Warmup Train [25][2380/3239]	Time 0.236 (0.236)	Data 0.001 (0.012)	Loss 4.3088 (4.2653)	Top-1 acc 29.688 (25.984)	Top-5 acc 47.656 (48.608)	lr 0.01592
Warmup Train [25][2390/3239]	Time 0.235 (0.236)	Data 0.002 (0.012)	Loss 4.1907 (4.2654)	Top-1 acc 28.516 (25.988)	Top-5 acc 50.781 (48.608)	lr 0.01591
Warmup Train [25][2400/3239]	Time 0.208 (0.236)	Data 0.001 (0.012)	Loss 4.3285 (4.2655)	Top-1 acc 26.953 (25.987)	Top-5 acc 48.047 (48.602)	lr 0.01591
Warmup Train [25][2410/3239]	Time 0.227 (0.236)	Data 0.001 (0.012)	Loss 4.3589 (4.2653)	Top-1 acc 26.172 (25.987)	Top-5 acc 46.094 (48.605)	lr 0.01590
Warmup Train [25][2420/3239]	Time 0.333 (0.236)	Data 0.002 (0.011)	Loss 4.1027 (4.2653)	Top-1 acc 32.031 (25.987)	Top-5 acc 52.734 (48.604)	lr 0.01589
Warmup Train [25][2430/3239]	Time 0.174 (0.236)	Data 0.001 (0.011)	Loss 4.2042 (4.2653)	Top-1 acc 25.781 (25.986)	Top-5 acc 49.219 (48.607)	lr 0.01589
Warmup Train [25][2440/3239]	Time 0.252 (0.236)	Data 0.001 (0.011)	Loss 4.4190 (4.2653)	Top-1 acc 22.266 (25.992)	Top-5 acc 43.359 (48.610)	lr 0.01588
Warmup Train [25][2450/3239]	Time 0.195 (0.236)	Data 0.001 (0.011)	Loss 4.3323 (4.2651)	Top-1 acc 24.609 (25.996)	Top-5 acc 50.000 (48.615)	lr 0.01588
Warmup Train [25][2460/3239]	Time 0.246 (0.236)	Data 0.001 (0.011)	Loss 4.1398 (4.2650)	Top-1 acc 26.172 (25.993)	Top-5 acc 53.125 (48.619)	lr 0.01587
Warmup Train [25][2470/3239]	Time 0.211 (0.236)	Data 0.001 (0.011)	Loss 4.4355 (4.2650)	Top-1 acc 23.047 (25.988)	Top-5 acc 43.750 (48.615)	lr 0.01587
Warmup Train [25][2480/3239]	Time 0.204 (0.236)	Data 0.001 (0.011)	Loss 4.3663 (4.2651)	Top-1 acc 23.047 (25.989)	Top-5 acc 44.922 (48.613)	lr 0.01586
Warmup Train [25][2490/3239]	Time 0.214 (0.236)	Data 0.024 (0.011)	Loss 4.2932 (4.2653)	Top-1 acc 25.000 (25.988)	Top-5 acc 50.391 (48.613)	lr 0.01585
Warmup Train [25][2500/3239]	Time 0.152 (0.236)	Data 0.001 (0.011)	Loss 4.1653 (4.2651)	Top-1 acc 30.469 (25.989)	Top-5 acc 50.000 (48.616)	lr 0.01585
Warmup Train [25][2510/3239]	Time 0.331 (0.236)	Data 0.003 (0.011)	Loss 4.3010 (4.2650)	Top-1 acc 25.391 (25.991)	Top-5 acc 46.094 (48.616)	lr 0.01584
Warmup Train [25][2520/3239]	Time 0.173 (0.236)	Data 0.001 (0.011)	Loss 4.2927 (4.2647)	Top-1 acc 22.656 (25.996)	Top-5 acc 44.531 (48.622)	lr 0.01584
Warmup Train [25][2530/3239]	Time 0.255 (0.235)	Data 0.001 (0.011)	Loss 4.3994 (4.2646)	Top-1 acc 24.609 (25.993)	Top-5 acc 48.047 (48.624)	lr 0.01583
Warmup Train [25][2540/3239]	Time 0.204 (0.235)	Data 0.001 (0.011)	Loss 4.2488 (4.2646)	Top-1 acc 29.688 (25.997)	Top-5 acc 49.219 (48.627)	lr 0.01583
Warmup Train [25][2550/3239]	Time 0.210 (0.235)	Data 0.001 (0.011)	Loss 4.2451 (4.2648)	Top-1 acc 21.094 (25.989)	Top-5 acc 50.391 (48.622)	lr 0.01582
Warmup Train [25][2560/3239]	Time 0.204 (0.235)	Data 0.001 (0.011)	Loss 4.1547 (4.2647)	Top-1 acc 26.953 (25.993)	Top-5 acc 49.609 (48.626)	lr 0.01581
Warmup Train [25][2570/3239]	Time 0.192 (0.235)	Data 0.001 (0.011)	Loss 4.1818 (4.2646)	Top-1 acc 28.125 (25.992)	Top-5 acc 51.172 (48.629)	lr 0.01581
Warmup Train [25][2580/3239]	Time 0.157 (0.235)	Data 0.001 (0.011)	Loss 4.2806 (4.2646)	Top-1 acc 24.609 (25.990)	Top-5 acc 47.266 (48.628)	lr 0.01580
Warmup Train [25][2590/3239]	Time 0.157 (0.235)	Data 0.001 (0.011)	Loss 4.0866 (4.2643)	Top-1 acc 27.344 (25.992)	Top-5 acc 53.125 (48.636)	lr 0.01580
Warmup Train [25][2600/3239]	Time 0.228 (0.235)	Data 0.050 (0.011)	Loss 4.2057 (4.2642)	Top-1 acc 23.828 (25.990)	Top-5 acc 49.609 (48.637)	lr 0.01579
Warmup Train [25][2610/3239]	Time 0.163 (0.235)	Data 0.001 (0.011)	Loss 4.4120 (4.2642)	Top-1 acc 23.438 (25.984)	Top-5 acc 45.703 (48.636)	lr 0.01579
Warmup Train [25][2620/3239]	Time 0.177 (0.235)	Data 0.002 (0.011)	Loss 4.1909 (4.2640)	Top-1 acc 29.297 (25.988)	Top-5 acc 52.344 (48.636)	lr 0.01578
Warmup Train [25][2630/3239]	Time 0.352 (0.235)	Data 0.001 (0.011)	Loss 4.2051 (4.2640)	Top-1 acc 23.828 (25.990)	Top-5 acc 46.094 (48.639)	lr 0.01578
Warmup Train [25][2640/3239]	Time 0.267 (0.235)	Data 0.001 (0.011)	Loss 4.3733 (4.2641)	Top-1 acc 24.609 (25.991)	Top-5 acc 48.438 (48.639)	lr 0.01577
Warmup Train [25][2650/3239]	Time 0.135 (0.235)	Data 0.001 (0.011)	Loss 4.3962 (4.2642)	Top-1 acc 26.172 (25.991)	Top-5 acc 45.703 (48.637)	lr 0.01576
Warmup Train [25][2660/3239]	Time 0.237 (0.235)	Data 0.002 (0.011)	Loss 4.2405 (4.2640)	Top-1 acc 28.906 (25.994)	Top-5 acc 46.875 (48.639)	lr 0.01576
Warmup Train [25][2670/3239]	Time 0.240 (0.235)	Data 0.001 (0.011)	Loss 4.0931 (4.2640)	Top-1 acc 29.297 (25.993)	Top-5 acc 51.562 (48.638)	lr 0.01575
Warmup Train [25][2680/3239]	Time 0.208 (0.235)	Data 0.001 (0.011)	Loss 4.3966 (4.2640)	Top-1 acc 25.000 (25.993)	Top-5 acc 46.875 (48.638)	lr 0.01575
Warmup Train [25][2690/3239]	Time 0.231 (0.235)	Data 0.001 (0.011)	Loss 4.0703 (4.2638)	Top-1 acc 27.734 (25.995)	Top-5 acc 52.344 (48.643)	lr 0.01574
Warmup Train [25][2700/3239]	Time 0.290 (0.235)	Data 0.001 (0.011)	Loss 4.2435 (4.2640)	Top-1 acc 28.516 (25.994)	Top-5 acc 48.828 (48.639)	lr 0.01574
Warmup Train [25][2710/3239]	Time 0.249 (0.235)	Data 0.001 (0.011)	Loss 4.2830 (4.2640)	Top-1 acc 26.172 (25.992)	Top-5 acc 50.391 (48.639)	lr 0.01573
Warmup Train [25][2720/3239]	Time 0.199 (0.235)	Data 0.001 (0.011)	Loss 4.4174 (4.2640)	Top-1 acc 26.172 (25.996)	Top-5 acc 47.656 (48.641)	lr 0.01572
Warmup Train [25][2730/3239]	Time 0.180 (0.234)	Data 0.001 (0.011)	Loss 4.2518 (4.2641)	Top-1 acc 27.344 (25.991)	Top-5 acc 49.219 (48.636)	lr 0.01572
Warmup Train [25][2740/3239]	Time 0.318 (0.235)	Data 0.001 (0.011)	Loss 4.1851 (4.2640)	Top-1 acc 28.125 (25.992)	Top-5 acc 50.391 (48.641)	lr 0.01571
Warmup Train [25][2750/3239]	Time 0.344 (0.234)	Data 0.001 (0.010)	Loss 4.3036 (4.2639)	Top-1 acc 29.297 (25.994)	Top-5 acc 45.703 (48.648)	lr 0.01571
Warmup Train [25][2760/3239]	Time 0.159 (0.234)	Data 0.001 (0.010)	Loss 4.2564 (4.2639)	Top-1 acc 27.344 (25.990)	Top-5 acc 50.391 (48.646)	lr 0.01570
Warmup Train [25][2770/3239]	Time 0.236 (0.234)	Data 0.001 (0.010)	Loss 4.2272 (4.2638)	Top-1 acc 24.609 (25.992)	Top-5 acc 52.344 (48.653)	lr 0.01570
Warmup Train [25][2780/3239]	Time 0.196 (0.234)	Data 0.002 (0.010)	Loss 4.3938 (4.2637)	Top-1 acc 24.219 (25.994)	Top-5 acc 49.219 (48.657)	lr 0.01569
Warmup Train [25][2790/3239]	Time 0.235 (0.234)	Data 0.002 (0.010)	Loss 4.3130 (4.2635)	Top-1 acc 25.000 (25.997)	Top-5 acc 50.781 (48.663)	lr 0.01569
Warmup Train [25][2800/3239]	Time 0.220 (0.234)	Data 0.001 (0.010)	Loss 4.2216 (4.2636)	Top-1 acc 26.953 (25.994)	Top-5 acc 49.219 (48.663)	lr 0.01568
Warmup Train [25][2810/3239]	Time 0.127 (0.234)	Data 0.001 (0.010)	Loss 4.1392 (4.2634)	Top-1 acc 26.953 (25.995)	Top-5 acc 48.047 (48.667)	lr 0.01567
Warmup Train [25][2820/3239]	Time 0.186 (0.234)	Data 0.001 (0.010)	Loss 4.2221 (4.2635)	Top-1 acc 27.344 (25.995)	Top-5 acc 50.000 (48.665)	lr 0.01567
Warmup Train [25][2830/3239]	Time 0.185 (0.234)	Data 0.001 (0.010)	Loss 4.1896 (4.2637)	Top-1 acc 27.344 (25.994)	Top-5 acc 51.953 (48.659)	lr 0.01566
Warmup Train [25][2840/3239]	Time 0.147 (0.234)	Data 0.001 (0.010)	Loss 4.1845 (4.2637)	Top-1 acc 26.562 (25.992)	Top-5 acc 48.828 (48.657)	lr 0.01566
Warmup Train [25][2850/3239]	Time 0.182 (0.234)	Data 0.001 (0.010)	Loss 4.3021 (4.2639)	Top-1 acc 27.734 (25.985)	Top-5 acc 45.312 (48.651)	lr 0.01565
Warmup Train [25][2860/3239]	Time 0.320 (0.234)	Data 0.001 (0.010)	Loss 4.0599 (4.2637)	Top-1 acc 27.734 (25.987)	Top-5 acc 55.078 (48.658)	lr 0.01565
Warmup Train [25][2870/3239]	Time 0.182 (0.234)	Data 0.001 (0.010)	Loss 4.0667 (4.2632)	Top-1 acc 29.297 (25.996)	Top-5 acc 53.125 (48.670)	lr 0.01564
Warmup Train [25][2880/3239]	Time 0.219 (0.234)	Data 0.001 (0.010)	Loss 4.2321 (4.2631)	Top-1 acc 23.438 (25.997)	Top-5 acc 48.828 (48.675)	lr 0.01563
Warmup Train [25][2890/3239]	Time 0.192 (0.234)	Data 0.001 (0.010)	Loss 4.2298 (4.2628)	Top-1 acc 24.609 (26.004)	Top-5 acc 48.828 (48.679)	lr 0.01563
Warmup Train [25][2900/3239]	Time 0.260 (0.234)	Data 0.001 (0.010)	Loss 4.2433 (4.2626)	Top-1 acc 23.828 (26.007)	Top-5 acc 50.781 (48.688)	lr 0.01562
Warmup Train [25][2910/3239]	Time 0.208 (0.234)	Data 0.001 (0.010)	Loss 4.3050 (4.2621)	Top-1 acc 27.344 (26.017)	Top-5 acc 45.312 (48.698)	lr 0.01562
Warmup Train [25][2920/3239]	Time 0.222 (0.234)	Data 0.001 (0.010)	Loss 4.2991 (4.2621)	Top-1 acc 30.078 (26.020)	Top-5 acc 48.047 (48.697)	lr 0.01561
Warmup Train [25][2930/3239]	Time 0.188 (0.234)	Data 0.001 (0.010)	Loss 4.1979 (4.2622)	Top-1 acc 25.000 (26.017)	Top-5 acc 48.828 (48.697)	lr 0.01561
Warmup Train [25][2940/3239]	Time 0.225 (0.234)	Data 0.001 (0.010)	Loss 4.2042 (4.2621)	Top-1 acc 25.000 (26.019)	Top-5 acc 50.781 (48.700)	lr 0.01560
Warmup Train [25][2950/3239]	Time 0.172 (0.234)	Data 0.001 (0.010)	Loss 4.1898 (4.2622)	Top-1 acc 26.172 (26.016)	Top-5 acc 51.562 (48.701)	lr 0.01560
Warmup Train [25][2960/3239]	Time 0.345 (0.234)	Data 0.002 (0.010)	Loss 4.2504 (4.2621)	Top-1 acc 22.656 (26.013)	Top-5 acc 46.875 (48.704)	lr 0.01559
Warmup Train [25][2970/3239]	Time 0.159 (0.234)	Data 0.002 (0.010)	Loss 4.0122 (4.2621)	Top-1 acc 30.859 (26.015)	Top-5 acc 53.516 (48.702)	lr 0.01558
Warmup Train [25][2980/3239]	Time 0.227 (0.234)	Data 0.001 (0.010)	Loss 4.2116 (4.2620)	Top-1 acc 30.469 (26.018)	Top-5 acc 48.828 (48.704)	lr 0.01558
Warmup Train [25][2990/3239]	Time 0.264 (0.234)	Data 0.003 (0.010)	Loss 4.2497 (4.2621)	Top-1 acc 24.609 (26.015)	Top-5 acc 47.656 (48.698)	lr 0.01557
Warmup Train [25][3000/3239]	Time 0.245 (0.234)	Data 0.001 (0.010)	Loss 4.4560 (4.2622)	Top-1 acc 22.656 (26.016)	Top-5 acc 43.359 (48.698)	lr 0.01557
Warmup Train [25][3010/3239]	Time 0.236 (0.234)	Data 0.002 (0.010)	Loss 4.0896 (4.2623)	Top-1 acc 28.125 (26.012)	Top-5 acc 53.516 (48.696)	lr 0.01556
Warmup Train [25][3020/3239]	Time 0.192 (0.234)	Data 0.001 (0.010)	Loss 4.0932 (4.2622)	Top-1 acc 33.594 (26.016)	Top-5 acc 51.953 (48.697)	lr 0.01556
Warmup Train [25][3030/3239]	Time 0.225 (0.234)	Data 0.001 (0.010)	Loss 4.2256 (4.2621)	Top-1 acc 24.219 (26.015)	Top-5 acc 50.000 (48.700)	lr 0.01555
Warmup Train [25][3040/3239]	Time 0.200 (0.233)	Data 0.001 (0.010)	Loss 4.3285 (4.2619)	Top-1 acc 26.562 (26.019)	Top-5 acc 47.266 (48.703)	lr 0.01555
Warmup Train [25][3050/3239]	Time 0.155 (0.233)	Data 0.001 (0.010)	Loss 4.1417 (4.2620)	Top-1 acc 28.906 (26.016)	Top-5 acc 49.609 (48.699)	lr 0.01554
Warmup Train [25][3060/3239]	Time 0.223 (0.233)	Data 0.002 (0.010)	Loss 4.2298 (4.2620)	Top-1 acc 24.609 (26.014)	Top-5 acc 50.000 (48.700)	lr 0.01553
Warmup Train [25][3070/3239]	Time 0.318 (0.233)	Data 0.002 (0.010)	Loss 4.2306 (4.2619)	Top-1 acc 28.516 (26.016)	Top-5 acc 48.047 (48.701)	lr 0.01553
Warmup Train [25][3080/3239]	Time 0.208 (0.233)	Data 0.001 (0.010)	Loss 4.1399 (4.2617)	Top-1 acc 27.734 (26.018)	Top-5 acc 51.172 (48.704)	lr 0.01552
Warmup Train [25][3090/3239]	Time 0.197 (0.233)	Data 0.001 (0.010)	Loss 4.4046 (4.2617)	Top-1 acc 19.922 (26.016)	Top-5 acc 46.484 (48.700)	lr 0.01552
Warmup Train [25][3100/3239]	Time 0.209 (0.233)	Data 0.002 (0.010)	Loss 4.3996 (4.2615)	Top-1 acc 23.828 (26.024)	Top-5 acc 44.141 (48.702)	lr 0.01551
Warmup Train [25][3110/3239]	Time 0.216 (0.233)	Data 0.001 (0.010)	Loss 4.1786 (4.2614)	Top-1 acc 25.781 (26.027)	Top-5 acc 52.344 (48.706)	lr 0.01551
Warmup Train [25][3120/3239]	Time 0.227 (0.233)	Data 0.001 (0.010)	Loss 4.3800 (4.2615)	Top-1 acc 23.047 (26.027)	Top-5 acc 46.094 (48.706)	lr 0.01550
Warmup Train [25][3130/3239]	Time 0.227 (0.233)	Data 0.001 (0.010)	Loss 4.2340 (4.2615)	Top-1 acc 24.219 (26.022)	Top-5 acc 46.484 (48.705)	lr 0.01549
Warmup Train [25][3140/3239]	Time 0.169 (0.233)	Data 0.001 (0.010)	Loss 4.2895 (4.2615)	Top-1 acc 27.734 (26.024)	Top-5 acc 50.000 (48.706)	lr 0.01549
Warmup Train [25][3150/3239]	Time 0.181 (0.233)	Data 0.001 (0.010)	Loss 4.3577 (4.2615)	Top-1 acc 25.391 (26.022)	Top-5 acc 43.359 (48.706)	lr 0.01548
Warmup Train [25][3160/3239]	Time 0.310 (0.233)	Data 0.001 (0.009)	Loss 4.1300 (4.2616)	Top-1 acc 26.562 (26.017)	Top-5 acc 51.172 (48.704)	lr 0.01548
Warmup Train [25][3170/3239]	Time 0.225 (0.233)	Data 0.001 (0.009)	Loss 4.3097 (4.2615)	Top-1 acc 27.734 (26.019)	Top-5 acc 47.656 (48.705)	lr 0.01547
Warmup Train [25][3180/3239]	Time 0.145 (0.233)	Data 0.000 (0.009)	Loss 4.3843 (4.2612)	Top-1 acc 19.531 (26.023)	Top-5 acc 47.656 (48.712)	lr 0.01547
Warmup Train [25][3190/3239]	Time 0.196 (0.233)	Data 0.000 (0.009)	Loss 4.1107 (4.2611)	Top-1 acc 29.688 (26.030)	Top-5 acc 51.172 (48.717)	lr 0.01546
Warmup Train [25][3200/3239]	Time 0.208 (0.233)	Data 0.000 (0.009)	Loss 4.2265 (4.2611)	Top-1 acc 26.953 (26.031)	Top-5 acc 48.047 (48.716)	lr 0.01546
Warmup Train [25][3210/3239]	Time 0.250 (0.233)	Data 0.000 (0.009)	Loss 4.1556 (4.2608)	Top-1 acc 29.297 (26.039)	Top-5 acc 51.953 (48.723)	lr 0.01545
Warmup Train [25][3220/3239]	Time 0.198 (0.233)	Data 0.000 (0.009)	Loss 4.2570 (4.2607)	Top-1 acc 26.562 (26.044)	Top-5 acc 50.781 (48.729)	lr 0.01544
Warmup Train [25][3230/3239]	Time 0.204 (0.233)	Data 0.000 (0.009)	Loss 4.0941 (4.2605)	Top-1 acc 31.641 (26.047)	Top-5 acc 51.562 (48.729)	lr 0.01544
Warmup Train [25][3239/3239]	Time 0.149 (0.233)	Data 0.000 (0.009)	Loss 4.3617 (4.2605)	Top-1 acc 27.160 (26.046)	Top-5 acc 43.210 (48.728)	lr 0.01543
==========Warmup Valid [25/40]	loss 3.225	top-1 acc 33.537	top-5 acc 58.018	Train top-1 26.046	top-5 48.728	flops: 442.4M
Warmup Train [26][0/3239]	Time 18.381 (18.381)	Data 16.356 (16.356)	Loss 4.0533 (4.0533)	Top-1 acc 31.250 (31.250)	Top-5 acc 51.953 (51.953)	lr 0.01543
Warmup Train [26][10/3239]	Time 0.260 (1.917)	Data 0.001 (1.493)	Loss 4.1111 (4.1777)	Top-1 acc 25.391 (26.456)	Top-5 acc 51.953 (50.213)	lr 0.01543
Warmup Train [26][20/3239]	Time 0.158 (1.122)	Data 0.001 (0.783)	Loss 4.1793 (4.2136)	Top-1 acc 31.250 (26.265)	Top-5 acc 48.828 (49.944)	lr 0.01542
Warmup Train [26][30/3239]	Time 0.210 (0.834)	Data 0.001 (0.533)	Loss 4.2475 (4.2214)	Top-1 acc 28.516 (26.588)	Top-5 acc 49.219 (49.836)	lr 0.01542
Warmup Train [26][40/3239]	Time 0.215 (0.686)	Data 0.001 (0.404)	Loss 4.2637 (4.2341)	Top-1 acc 28.125 (26.220)	Top-5 acc 49.219 (49.343)	lr 0.01541
Warmup Train [26][50/3239]	Time 0.192 (0.598)	Data 0.001 (0.326)	Loss 4.0972 (4.2307)	Top-1 acc 29.297 (26.386)	Top-5 acc 51.562 (49.387)	lr 0.01540
Warmup Train [26][60/3239]	Time 0.180 (0.535)	Data 0.002 (0.273)	Loss 4.0026 (4.2225)	Top-1 acc 29.688 (26.729)	Top-5 acc 56.641 (49.629)	lr 0.01540
Warmup Train [26][70/3239]	Time 0.392 (0.495)	Data 0.001 (0.234)	Loss 4.2987 (4.2229)	Top-1 acc 25.391 (26.728)	Top-5 acc 51.953 (49.681)	lr 0.01539
Warmup Train [26][80/3239]	Time 0.222 (0.462)	Data 0.001 (0.206)	Loss 4.3267 (4.2208)	Top-1 acc 26.953 (26.905)	Top-5 acc 48.828 (49.797)	lr 0.01539
Warmup Train [26][90/3239]	Time 0.174 (0.435)	Data 0.002 (0.183)	Loss 4.2295 (4.2262)	Top-1 acc 21.875 (26.807)	Top-5 acc 48.047 (49.635)	lr 0.01538
Warmup Train [26][100/3239]	Time 0.231 (0.413)	Data 0.001 (0.165)	Loss 4.1497 (4.2198)	Top-1 acc 27.344 (26.899)	Top-5 acc 46.484 (49.652)	lr 0.01538
Warmup Train [26][110/3239]	Time 0.183 (0.396)	Data 0.001 (0.151)	Loss 4.2040 (4.2181)	Top-1 acc 25.781 (26.830)	Top-5 acc 50.000 (49.697)	lr 0.01537
Warmup Train [26][120/3239]	Time 0.138 (0.381)	Data 0.001 (0.138)	Loss 4.1270 (4.2175)	Top-1 acc 32.812 (26.924)	Top-5 acc 54.688 (49.655)	lr 0.01537
Warmup Train [26][130/3239]	Time 0.233 (0.370)	Data 0.001 (0.128)	Loss 4.1596 (4.2237)	Top-1 acc 26.953 (26.834)	Top-5 acc 52.734 (49.526)	lr 0.01536
Warmup Train [26][140/3239]	Time 0.221 (0.359)	Data 0.001 (0.120)	Loss 4.0361 (4.2211)	Top-1 acc 30.859 (26.867)	Top-5 acc 53.516 (49.565)	lr 0.01535
Warmup Train [26][150/3239]	Time 0.167 (0.351)	Data 0.001 (0.112)	Loss 4.0516 (4.2199)	Top-1 acc 30.469 (26.894)	Top-5 acc 51.562 (49.529)	lr 0.01535
Warmup Train [26][160/3239]	Time 0.215 (0.342)	Data 0.002 (0.105)	Loss 4.2709 (4.2222)	Top-1 acc 26.172 (26.856)	Top-5 acc 44.531 (49.486)	lr 0.01534
Warmup Train [26][170/3239]	Time 0.346 (0.335)	Data 0.001 (0.099)	Loss 4.1913 (4.2213)	Top-1 acc 27.734 (26.880)	Top-5 acc 48.438 (49.516)	lr 0.01534
Warmup Train [26][180/3239]	Time 0.227 (0.329)	Data 0.001 (0.094)	Loss 4.2950 (4.2207)	Top-1 acc 27.344 (26.906)	Top-5 acc 50.391 (49.553)	lr 0.01533
Warmup Train [26][190/3239]	Time 0.208 (0.324)	Data 0.001 (0.089)	Loss 4.1448 (4.2235)	Top-1 acc 28.516 (26.851)	Top-5 acc 50.391 (49.450)	lr 0.01533
Warmup Train [26][200/3239]	Time 0.265 (0.320)	Data 0.001 (0.085)	Loss 4.2810 (4.2213)	Top-1 acc 23.047 (26.881)	Top-5 acc 45.312 (49.487)	lr 0.01532
Warmup Train [26][210/3239]	Time 0.215 (0.315)	Data 0.001 (0.081)	Loss 4.1251 (4.2209)	Top-1 acc 30.469 (26.883)	Top-5 acc 52.734 (49.496)	lr 0.01532
Warmup Train [26][220/3239]	Time 0.213 (0.311)	Data 0.001 (0.077)	Loss 4.0588 (4.2201)	Top-1 acc 28.906 (26.859)	Top-5 acc 51.953 (49.505)	lr 0.01531
Warmup Train [26][230/3239]	Time 0.206 (0.307)	Data 0.001 (0.074)	Loss 4.2656 (4.2220)	Top-1 acc 23.828 (26.779)	Top-5 acc 45.312 (49.483)	lr 0.01530
Warmup Train [26][240/3239]	Time 0.156 (0.304)	Data 0.001 (0.071)	Loss 4.1969 (4.2226)	Top-1 acc 28.125 (26.781)	Top-5 acc 51.562 (49.507)	lr 0.01530
Warmup Train [26][250/3239]	Time 0.162 (0.300)	Data 0.001 (0.068)	Loss 4.3265 (4.2260)	Top-1 acc 24.609 (26.684)	Top-5 acc 47.656 (49.476)	lr 0.01529
Warmup Train [26][260/3239]	Time 0.174 (0.297)	Data 0.001 (0.066)	Loss 4.2274 (4.2255)	Top-1 acc 28.906 (26.711)	Top-5 acc 45.703 (49.446)	lr 0.01529
Warmup Train [26][270/3239]	Time 0.152 (0.294)	Data 0.001 (0.064)	Loss 4.2443 (4.2259)	Top-1 acc 28.516 (26.666)	Top-5 acc 50.000 (49.436)	lr 0.01528
Warmup Train [26][280/3239]	Time 0.364 (0.293)	Data 0.001 (0.061)	Loss 4.1887 (4.2247)	Top-1 acc 24.609 (26.714)	Top-5 acc 51.172 (49.479)	lr 0.01528
Warmup Train [26][290/3239]	Time 0.277 (0.290)	Data 0.001 (0.059)	Loss 4.1968 (4.2250)	Top-1 acc 27.734 (26.709)	Top-5 acc 50.000 (49.436)	lr 0.01527
Warmup Train [26][300/3239]	Time 0.136 (0.288)	Data 0.001 (0.057)	Loss 4.2264 (4.2252)	Top-1 acc 24.219 (26.721)	Top-5 acc 51.172 (49.463)	lr 0.01527
Warmup Train [26][310/3239]	Time 0.204 (0.286)	Data 0.001 (0.056)	Loss 4.3901 (4.2234)	Top-1 acc 22.656 (26.756)	Top-5 acc 44.141 (49.510)	lr 0.01526
Warmup Train [26][320/3239]	Time 0.210 (0.284)	Data 0.001 (0.054)	Loss 4.3360 (4.2224)	Top-1 acc 23.828 (26.735)	Top-5 acc 46.484 (49.539)	lr 0.01525
Warmup Train [26][330/3239]	Time 0.156 (0.282)	Data 0.001 (0.052)	Loss 4.1742 (4.2204)	Top-1 acc 24.219 (26.764)	Top-5 acc 50.391 (49.603)	lr 0.01525
Warmup Train [26][340/3239]	Time 0.227 (0.281)	Data 0.001 (0.051)	Loss 4.3198 (4.2210)	Top-1 acc 26.562 (26.748)	Top-5 acc 48.438 (49.588)	lr 0.01524
Warmup Train [26][350/3239]	Time 0.251 (0.279)	Data 0.001 (0.050)	Loss 4.3449 (4.2241)	Top-1 acc 26.562 (26.678)	Top-5 acc 48.047 (49.517)	lr 0.01524
Warmup Train [26][360/3239]	Time 0.332 (0.277)	Data 0.001 (0.048)	Loss 4.2863 (4.2239)	Top-1 acc 27.734 (26.680)	Top-5 acc 48.047 (49.517)	lr 0.01523
Warmup Train [26][370/3239]	Time 0.262 (0.276)	Data 0.001 (0.047)	Loss 4.3691 (4.2252)	Top-1 acc 25.000 (26.643)	Top-5 acc 44.531 (49.495)	lr 0.01523
Warmup Train [26][380/3239]	Time 0.208 (0.275)	Data 0.001 (0.046)	Loss 4.2245 (4.2260)	Top-1 acc 32.031 (26.657)	Top-5 acc 50.000 (49.481)	lr 0.01522
Warmup Train [26][390/3239]	Time 0.191 (0.273)	Data 0.001 (0.045)	Loss 4.3720 (4.2277)	Top-1 acc 25.391 (26.626)	Top-5 acc 48.047 (49.456)	lr 0.01521
Warmup Train [26][400/3239]	Time 0.217 (0.272)	Data 0.001 (0.044)	Loss 4.2212 (4.2269)	Top-1 acc 23.438 (26.610)	Top-5 acc 46.484 (49.472)	lr 0.01521
Warmup Train [26][410/3239]	Time 0.227 (0.271)	Data 0.002 (0.043)	Loss 4.1556 (4.2259)	Top-1 acc 24.219 (26.615)	Top-5 acc 53.125 (49.493)	lr 0.01520
Warmup Train [26][420/3239]	Time 0.260 (0.270)	Data 0.001 (0.042)	Loss 4.0524 (4.2262)	Top-1 acc 31.641 (26.614)	Top-5 acc 53.516 (49.495)	lr 0.01520
Warmup Train [26][430/3239]	Time 0.215 (0.268)	Data 0.001 (0.041)	Loss 4.3301 (4.2260)	Top-1 acc 26.172 (26.628)	Top-5 acc 49.609 (49.523)	lr 0.01519
Warmup Train [26][440/3239]	Time 0.259 (0.267)	Data 0.001 (0.040)	Loss 4.2582 (4.2263)	Top-1 acc 26.172 (26.607)	Top-5 acc 50.781 (49.530)	lr 0.01519
Warmup Train [26][450/3239]	Time 0.224 (0.266)	Data 0.001 (0.039)	Loss 4.1006 (4.2265)	Top-1 acc 26.953 (26.589)	Top-5 acc 53.125 (49.510)	lr 0.01518
Warmup Train [26][460/3239]	Time 0.131 (0.265)	Data 0.001 (0.038)	Loss 4.1119 (4.2278)	Top-1 acc 26.172 (26.537)	Top-5 acc 48.438 (49.490)	lr 0.01518
Warmup Train [26][470/3239]	Time 0.279 (0.264)	Data 0.001 (0.038)	Loss 4.1947 (4.2267)	Top-1 acc 26.953 (26.570)	Top-5 acc 48.828 (49.521)	lr 0.01517
Warmup Train [26][480/3239]	Time 0.294 (0.263)	Data 0.001 (0.037)	Loss 4.1258 (4.2272)	Top-1 acc 32.812 (26.565)	Top-5 acc 51.562 (49.525)	lr 0.01516
Warmup Train [26][490/3239]	Time 0.167 (0.262)	Data 0.001 (0.036)	Loss 4.3985 (4.2269)	Top-1 acc 18.359 (26.559)	Top-5 acc 46.094 (49.535)	lr 0.01516
Warmup Train [26][500/3239]	Time 0.161 (0.261)	Data 0.001 (0.036)	Loss 3.9725 (4.2266)	Top-1 acc 34.375 (26.559)	Top-5 acc 53.125 (49.548)	lr 0.01515
Warmup Train [26][510/3239]	Time 0.235 (0.261)	Data 0.001 (0.035)	Loss 4.2099 (4.2279)	Top-1 acc 26.562 (26.537)	Top-5 acc 51.953 (49.518)	lr 0.01515
Warmup Train [26][520/3239]	Time 0.248 (0.260)	Data 0.002 (0.034)	Loss 4.1641 (4.2281)	Top-1 acc 26.953 (26.520)	Top-5 acc 52.344 (49.522)	lr 0.01514
Warmup Train [26][530/3239]	Time 0.197 (0.259)	Data 0.001 (0.034)	Loss 4.2680 (4.2270)	Top-1 acc 25.000 (26.540)	Top-5 acc 50.000 (49.556)	lr 0.01514
Warmup Train [26][540/3239]	Time 0.226 (0.258)	Data 0.001 (0.033)	Loss 4.1634 (4.2269)	Top-1 acc 26.172 (26.539)	Top-5 acc 51.953 (49.562)	lr 0.01513
Warmup Train [26][550/3239]	Time 0.222 (0.258)	Data 0.002 (0.033)	Loss 4.1103 (4.2266)	Top-1 acc 27.734 (26.581)	Top-5 acc 52.734 (49.595)	lr 0.01513
Warmup Train [26][560/3239]	Time 0.189 (0.257)	Data 0.001 (0.032)	Loss 4.2541 (4.2269)	Top-1 acc 24.609 (26.583)	Top-5 acc 48.438 (49.594)	lr 0.01512
Warmup Train [26][570/3239]	Time 0.210 (0.257)	Data 0.001 (0.031)	Loss 4.0360 (4.2267)	Top-1 acc 30.078 (26.593)	Top-5 acc 56.250 (49.598)	lr 0.01511
Warmup Train [26][580/3239]	Time 0.336 (0.256)	Data 0.001 (0.031)	Loss 3.9957 (4.2259)	Top-1 acc 32.422 (26.613)	Top-5 acc 53.906 (49.614)	lr 0.01511
Warmup Train [26][590/3239]	Time 0.237 (0.256)	Data 0.002 (0.030)	Loss 4.2126 (4.2261)	Top-1 acc 28.125 (26.604)	Top-5 acc 49.219 (49.601)	lr 0.01510
Warmup Train [26][600/3239]	Time 0.209 (0.255)	Data 0.001 (0.030)	Loss 4.1582 (4.2272)	Top-1 acc 30.859 (26.587)	Top-5 acc 50.781 (49.572)	lr 0.01510
Warmup Train [26][610/3239]	Time 0.154 (0.255)	Data 0.001 (0.030)	Loss 4.1805 (4.2274)	Top-1 acc 28.906 (26.608)	Top-5 acc 48.047 (49.570)	lr 0.01509
Warmup Train [26][620/3239]	Time 0.198 (0.254)	Data 0.001 (0.029)	Loss 4.1456 (4.2278)	Top-1 acc 26.562 (26.591)	Top-5 acc 53.125 (49.555)	lr 0.01509
Warmup Train [26][630/3239]	Time 0.204 (0.254)	Data 0.001 (0.029)	Loss 4.1552 (4.2271)	Top-1 acc 25.000 (26.595)	Top-5 acc 49.609 (49.579)	lr 0.01508
Warmup Train [26][640/3239]	Time 0.199 (0.253)	Data 0.001 (0.028)	Loss 4.1907 (4.2271)	Top-1 acc 27.734 (26.605)	Top-5 acc 50.000 (49.584)	lr 0.01508
Warmup Train [26][650/3239]	Time 0.203 (0.253)	Data 0.001 (0.028)	Loss 4.1490 (4.2273)	Top-1 acc 26.562 (26.614)	Top-5 acc 51.953 (49.588)	lr 0.01507
Warmup Train [26][660/3239]	Time 0.172 (0.252)	Data 0.001 (0.028)	Loss 4.0713 (4.2273)	Top-1 acc 30.859 (26.616)	Top-5 acc 49.609 (49.594)	lr 0.01506
Warmup Train [26][670/3239]	Time 0.329 (0.252)	Data 0.001 (0.027)	Loss 4.1605 (4.2259)	Top-1 acc 26.172 (26.653)	Top-5 acc 49.219 (49.623)	lr 0.01506
Warmup Train [26][680/3239]	Time 0.199 (0.252)	Data 0.002 (0.027)	Loss 4.3999 (4.2256)	Top-1 acc 20.703 (26.647)	Top-5 acc 47.656 (49.630)	lr 0.01505
Warmup Train [26][690/3239]	Time 0.187 (0.251)	Data 0.001 (0.026)	Loss 4.0710 (4.2263)	Top-1 acc 32.422 (26.629)	Top-5 acc 53.516 (49.628)	lr 0.01505
Warmup Train [26][700/3239]	Time 0.231 (0.251)	Data 0.001 (0.026)	Loss 4.0831 (4.2267)	Top-1 acc 29.688 (26.619)	Top-5 acc 53.125 (49.616)	lr 0.01504
Warmup Train [26][710/3239]	Time 0.190 (0.250)	Data 0.002 (0.026)	Loss 4.4231 (4.2263)	Top-1 acc 21.094 (26.620)	Top-5 acc 42.188 (49.618)	lr 0.01504
Warmup Train [26][720/3239]	Time 0.211 (0.250)	Data 0.001 (0.025)	Loss 4.1695 (4.2265)	Top-1 acc 30.469 (26.614)	Top-5 acc 48.438 (49.609)	lr 0.01503
Warmup Train [26][730/3239]	Time 0.183 (0.250)	Data 0.002 (0.025)	Loss 4.3232 (4.2263)	Top-1 acc 26.562 (26.634)	Top-5 acc 45.312 (49.614)	lr 0.01503
Warmup Train [26][740/3239]	Time 0.225 (0.249)	Data 0.001 (0.025)	Loss 4.2954 (4.2276)	Top-1 acc 23.438 (26.618)	Top-5 acc 47.656 (49.589)	lr 0.01502
Warmup Train [26][750/3239]	Time 0.185 (0.249)	Data 0.001 (0.025)	Loss 4.1175 (4.2283)	Top-1 acc 26.172 (26.590)	Top-5 acc 51.562 (49.573)	lr 0.01501
Warmup Train [26][760/3239]	Time 0.281 (0.249)	Data 0.002 (0.024)	Loss 4.3045 (4.2281)	Top-1 acc 20.703 (26.593)	Top-5 acc 47.266 (49.573)	lr 0.01501
Warmup Train [26][770/3239]	Time 0.218 (0.248)	Data 0.002 (0.024)	Loss 4.0364 (4.2284)	Top-1 acc 30.078 (26.577)	Top-5 acc 56.250 (49.574)	lr 0.01500
Warmup Train [26][780/3239]	Time 0.318 (0.248)	Data 0.001 (0.024)	Loss 4.2045 (4.2290)	Top-1 acc 30.078 (26.577)	Top-5 acc 50.391 (49.566)	lr 0.01500
Warmup Train [26][790/3239]	Time 0.184 (0.248)	Data 0.001 (0.023)	Loss 4.1847 (4.2289)	Top-1 acc 29.688 (26.580)	Top-5 acc 49.219 (49.575)	lr 0.01499
Warmup Train [26][800/3239]	Time 0.268 (0.247)	Data 0.001 (0.023)	Loss 4.3139 (4.2297)	Top-1 acc 26.172 (26.573)	Top-5 acc 47.266 (49.555)	lr 0.01499
Warmup Train [26][810/3239]	Time 0.214 (0.247)	Data 0.001 (0.023)	Loss 4.1023 (4.2298)	Top-1 acc 25.000 (26.566)	Top-5 acc 55.469 (49.555)	lr 0.01498
Warmup Train [26][820/3239]	Time 0.216 (0.247)	Data 0.001 (0.023)	Loss 4.2293 (4.2300)	Top-1 acc 26.953 (26.573)	Top-5 acc 50.781 (49.547)	lr 0.01498
Warmup Train [26][830/3239]	Time 0.139 (0.247)	Data 0.001 (0.022)	Loss 4.1663 (4.2302)	Top-1 acc 25.391 (26.568)	Top-5 acc 51.562 (49.538)	lr 0.01497
Warmup Train [26][840/3239]	Time 0.226 (0.246)	Data 0.001 (0.022)	Loss 4.2503 (4.2300)	Top-1 acc 22.656 (26.565)	Top-5 acc 48.438 (49.555)	lr 0.01496
Warmup Train [26][850/3239]	Time 0.163 (0.246)	Data 0.001 (0.022)	Loss 4.2276 (4.2304)	Top-1 acc 25.000 (26.558)	Top-5 acc 50.000 (49.540)	lr 0.01496
Warmup Train [26][860/3239]	Time 0.195 (0.245)	Data 0.001 (0.022)	Loss 4.2907 (4.2306)	Top-1 acc 23.438 (26.553)	Top-5 acc 48.047 (49.539)	lr 0.01495
Warmup Train [26][870/3239]	Time 0.233 (0.245)	Data 0.001 (0.022)	Loss 4.0599 (4.2298)	Top-1 acc 30.859 (26.574)	Top-5 acc 53.906 (49.550)	lr 0.01495
Warmup Train [26][880/3239]	Time 0.196 (0.245)	Data 0.001 (0.021)	Loss 4.3999 (4.2299)	Top-1 acc 26.172 (26.574)	Top-5 acc 45.703 (49.560)	lr 0.01494
Warmup Train [26][890/3239]	Time 0.360 (0.245)	Data 0.001 (0.021)	Loss 3.9928 (4.2297)	Top-1 acc 36.719 (26.584)	Top-5 acc 55.078 (49.564)	lr 0.01494
Warmup Train [26][900/3239]	Time 0.225 (0.245)	Data 0.001 (0.021)	Loss 4.1661 (4.2289)	Top-1 acc 30.859 (26.597)	Top-5 acc 47.656 (49.569)	lr 0.01493
Warmup Train [26][910/3239]	Time 0.221 (0.245)	Data 0.001 (0.021)	Loss 4.1718 (4.2293)	Top-1 acc 30.469 (26.593)	Top-5 acc 50.391 (49.553)	lr 0.01493
Warmup Train [26][920/3239]	Time 0.195 (0.244)	Data 0.001 (0.021)	Loss 4.3405 (4.2292)	Top-1 acc 23.828 (26.582)	Top-5 acc 45.312 (49.557)	lr 0.01492
Warmup Train [26][930/3239]	Time 0.205 (0.244)	Data 0.001 (0.020)	Loss 4.2075 (4.2292)	Top-1 acc 26.953 (26.589)	Top-5 acc 53.125 (49.549)	lr 0.01491
Warmup Train [26][940/3239]	Time 0.149 (0.244)	Data 0.001 (0.020)	Loss 4.2516 (4.2293)	Top-1 acc 26.172 (26.594)	Top-5 acc 48.438 (49.551)	lr 0.01491
Warmup Train [26][950/3239]	Time 0.195 (0.244)	Data 0.002 (0.020)	Loss 4.3990 (4.2299)	Top-1 acc 23.828 (26.597)	Top-5 acc 43.750 (49.536)	lr 0.01490
Warmup Train [26][960/3239]	Time 0.225 (0.244)	Data 0.001 (0.020)	Loss 4.2724 (4.2298)	Top-1 acc 25.391 (26.597)	Top-5 acc 51.172 (49.543)	lr 0.01490
Warmup Train [26][970/3239]	Time 0.284 (0.243)	Data 0.001 (0.020)	Loss 4.3682 (4.2303)	Top-1 acc 23.438 (26.587)	Top-5 acc 45.312 (49.525)	lr 0.01489
Warmup Train [26][980/3239]	Time 0.158 (0.243)	Data 0.001 (0.020)	Loss 4.3380 (4.2304)	Top-1 acc 25.000 (26.573)	Top-5 acc 47.266 (49.522)	lr 0.01489
Warmup Train [26][990/3239]	Time 0.245 (0.243)	Data 0.001 (0.019)	Loss 4.0897 (4.2310)	Top-1 acc 28.906 (26.568)	Top-5 acc 50.391 (49.504)	lr 0.01488
Warmup Train [26][1000/3239]	Time 0.303 (0.243)	Data 0.001 (0.019)	Loss 4.0680 (4.2311)	Top-1 acc 29.297 (26.563)	Top-5 acc 53.906 (49.498)	lr 0.01488
Warmup Train [26][1010/3239]	Time 0.381 (0.243)	Data 0.001 (0.019)	Loss 4.3977 (4.2314)	Top-1 acc 25.000 (26.561)	Top-5 acc 47.656 (49.494)	lr 0.01487
Warmup Train [26][1020/3239]	Time 0.172 (0.242)	Data 0.001 (0.019)	Loss 4.1239 (4.2319)	Top-1 acc 28.125 (26.543)	Top-5 acc 52.734 (49.479)	lr 0.01486
Warmup Train [26][1030/3239]	Time 0.195 (0.242)	Data 0.001 (0.019)	Loss 4.0227 (4.2321)	Top-1 acc 27.344 (26.541)	Top-5 acc 57.031 (49.467)	lr 0.01486
Warmup Train [26][1040/3239]	Time 0.218 (0.242)	Data 0.002 (0.019)	Loss 4.2618 (4.2323)	Top-1 acc 30.469 (26.539)	Top-5 acc 49.609 (49.463)	lr 0.01485
Warmup Train [26][1050/3239]	Time 0.215 (0.242)	Data 0.001 (0.018)	Loss 4.2064 (4.2324)	Top-1 acc 22.656 (26.544)	Top-5 acc 49.219 (49.462)	lr 0.01485
Warmup Train [26][1060/3239]	Time 0.226 (0.241)	Data 0.001 (0.018)	Loss 4.3872 (4.2323)	Top-1 acc 23.438 (26.550)	Top-5 acc 41.406 (49.468)	lr 0.01484
Warmup Train [26][1070/3239]	Time 0.194 (0.241)	Data 0.001 (0.018)	Loss 3.9975 (4.2320)	Top-1 acc 31.250 (26.548)	Top-5 acc 52.344 (49.467)	lr 0.01484
Warmup Train [26][1080/3239]	Time 0.262 (0.241)	Data 0.001 (0.018)	Loss 4.0264 (4.2313)	Top-1 acc 30.469 (26.560)	Top-5 acc 49.609 (49.481)	lr 0.01483
Warmup Train [26][1090/3239]	Time 0.215 (0.241)	Data 0.001 (0.018)	Loss 4.0329 (4.2309)	Top-1 acc 31.250 (26.569)	Top-5 acc 52.734 (49.490)	lr 0.01483
Warmup Train [26][1100/3239]	Time 0.189 (0.241)	Data 0.001 (0.018)	Loss 4.0948 (4.2305)	Top-1 acc 30.859 (26.573)	Top-5 acc 51.172 (49.498)	lr 0.01482
Warmup Train [26][1110/3239]	Time 0.313 (0.241)	Data 0.001 (0.018)	Loss 4.2129 (4.2305)	Top-1 acc 28.125 (26.571)	Top-5 acc 52.734 (49.499)	lr 0.01481
Warmup Train [26][1120/3239]	Time 0.186 (0.240)	Data 0.002 (0.017)	Loss 4.1698 (4.2305)	Top-1 acc 28.125 (26.583)	Top-5 acc 52.734 (49.504)	lr 0.01481
Warmup Train [26][1130/3239]	Time 0.186 (0.240)	Data 0.001 (0.017)	Loss 4.2090 (4.2303)	Top-1 acc 28.516 (26.591)	Top-5 acc 48.438 (49.511)	lr 0.01480
Warmup Train [26][1140/3239]	Time 0.255 (0.240)	Data 0.001 (0.017)	Loss 4.1951 (4.2303)	Top-1 acc 31.250 (26.597)	Top-5 acc 52.344 (49.513)	lr 0.01480
Warmup Train [26][1150/3239]	Time 0.231 (0.240)	Data 0.001 (0.017)	Loss 4.3509 (4.2304)	Top-1 acc 24.609 (26.603)	Top-5 acc 48.438 (49.515)	lr 0.01479
Warmup Train [26][1160/3239]	Time 0.128 (0.240)	Data 0.001 (0.017)	Loss 4.1598 (4.2305)	Top-1 acc 30.078 (26.599)	Top-5 acc 50.781 (49.500)	lr 0.01479
Warmup Train [26][1170/3239]	Time 0.189 (0.239)	Data 0.001 (0.017)	Loss 4.1302 (4.2303)	Top-1 acc 28.906 (26.598)	Top-5 acc 51.562 (49.497)	lr 0.01478
Warmup Train [26][1180/3239]	Time 0.223 (0.239)	Data 0.002 (0.017)	Loss 4.1635 (4.2307)	Top-1 acc 28.125 (26.580)	Top-5 acc 50.000 (49.483)	lr 0.01478
Warmup Train [26][1190/3239]	Time 0.180 (0.239)	Data 0.001 (0.017)	Loss 4.3224 (4.2311)	Top-1 acc 24.609 (26.581)	Top-5 acc 44.141 (49.480)	lr 0.01477
Warmup Train [26][1200/3239]	Time 0.182 (0.239)	Data 0.001 (0.017)	Loss 4.1055 (4.2308)	Top-1 acc 28.906 (26.585)	Top-5 acc 54.297 (49.487)	lr 0.01477
Warmup Train [26][1210/3239]	Time 0.244 (0.239)	Data 0.001 (0.016)	Loss 4.1943 (4.2304)	Top-1 acc 28.125 (26.592)	Top-5 acc 49.609 (49.498)	lr 0.01476
Warmup Train [26][1220/3239]	Time 0.256 (0.239)	Data 0.002 (0.016)	Loss 4.1785 (4.2300)	Top-1 acc 28.516 (26.610)	Top-5 acc 50.391 (49.511)	lr 0.01475
Warmup Train [26][1230/3239]	Time 0.145 (0.239)	Data 0.001 (0.016)	Loss 4.3430 (4.2302)	Top-1 acc 23.828 (26.610)	Top-5 acc 47.656 (49.509)	lr 0.01475
Warmup Train [26][1240/3239]	Time 0.254 (0.239)	Data 0.001 (0.016)	Loss 4.1945 (4.2306)	Top-1 acc 27.734 (26.604)	Top-5 acc 50.391 (49.497)	lr 0.01474
Warmup Train [26][1250/3239]	Time 0.184 (0.239)	Data 0.001 (0.016)	Loss 4.3332 (4.2304)	Top-1 acc 27.734 (26.604)	Top-5 acc 44.922 (49.499)	lr 0.01474
Warmup Train [26][1260/3239]	Time 0.221 (0.239)	Data 0.001 (0.016)	Loss 4.1798 (4.2307)	Top-1 acc 25.781 (26.600)	Top-5 acc 50.781 (49.490)	lr 0.01473
Warmup Train [26][1270/3239]	Time 0.145 (0.239)	Data 0.001 (0.016)	Loss 4.2432 (4.2308)	Top-1 acc 30.078 (26.603)	Top-5 acc 46.875 (49.487)	lr 0.01473
Warmup Train [26][1280/3239]	Time 0.234 (0.238)	Data 0.001 (0.016)	Loss 4.4914 (4.2311)	Top-1 acc 23.828 (26.605)	Top-5 acc 41.406 (49.486)	lr 0.01472
Warmup Train [26][1290/3239]	Time 0.235 (0.238)	Data 0.001 (0.016)	Loss 4.2307 (4.2308)	Top-1 acc 22.656 (26.606)	Top-5 acc 51.562 (49.494)	lr 0.01472
Warmup Train [26][1300/3239]	Time 0.286 (0.238)	Data 0.001 (0.015)	Loss 4.0408 (4.2307)	Top-1 acc 29.688 (26.605)	Top-5 acc 55.469 (49.500)	lr 0.01471
Warmup Train [26][1310/3239]	Time 0.279 (0.238)	Data 0.002 (0.015)	Loss 4.2267 (4.2304)	Top-1 acc 24.609 (26.602)	Top-5 acc 49.219 (49.504)	lr 0.01470
Warmup Train [26][1320/3239]	Time 0.282 (0.238)	Data 0.002 (0.015)	Loss 4.3867 (4.2307)	Top-1 acc 21.875 (26.595)	Top-5 acc 44.141 (49.492)	lr 0.01470
Warmup Train [26][1330/3239]	Time 0.233 (0.238)	Data 0.001 (0.015)	Loss 4.2936 (4.2305)	Top-1 acc 27.734 (26.600)	Top-5 acc 48.047 (49.493)	lr 0.01469
Warmup Train [26][1340/3239]	Time 0.248 (0.238)	Data 0.001 (0.015)	Loss 3.9000 (4.2306)	Top-1 acc 32.812 (26.590)	Top-5 acc 58.203 (49.485)	lr 0.01469
Warmup Train [26][1350/3239]	Time 0.142 (0.238)	Data 0.001 (0.015)	Loss 4.1374 (4.2310)	Top-1 acc 31.641 (26.584)	Top-5 acc 52.734 (49.476)	lr 0.01468
Warmup Train [26][1360/3239]	Time 0.264 (0.238)	Data 0.001 (0.015)	Loss 4.0581 (4.2309)	Top-1 acc 26.562 (26.590)	Top-5 acc 51.953 (49.472)	lr 0.01468
Warmup Train [26][1370/3239]	Time 0.266 (0.238)	Data 0.001 (0.015)	Loss 4.2152 (4.2314)	Top-1 acc 26.562 (26.582)	Top-5 acc 51.172 (49.458)	lr 0.01467
Warmup Train [26][1380/3239]	Time 0.231 (0.237)	Data 0.001 (0.015)	Loss 4.5597 (4.2321)	Top-1 acc 20.703 (26.574)	Top-5 acc 42.578 (49.441)	lr 0.01467
Warmup Train [26][1390/3239]	Time 0.146 (0.237)	Data 0.001 (0.015)	Loss 4.3696 (4.2319)	Top-1 acc 23.438 (26.579)	Top-5 acc 51.562 (49.448)	lr 0.01466
Warmup Train [26][1400/3239]	Time 0.178 (0.237)	Data 0.002 (0.015)	Loss 4.3999 (4.2319)	Top-1 acc 23.438 (26.583)	Top-5 acc 48.047 (49.451)	lr 0.01465
Warmup Train [26][1410/3239]	Time 0.141 (0.237)	Data 0.001 (0.014)	Loss 4.0963 (4.2320)	Top-1 acc 23.828 (26.579)	Top-5 acc 54.297 (49.445)	lr 0.01465
Warmup Train [26][1420/3239]	Time 0.180 (0.237)	Data 0.001 (0.014)	Loss 4.1420 (4.2317)	Top-1 acc 27.734 (26.584)	Top-5 acc 52.344 (49.452)	lr 0.01464
Warmup Train [26][1430/3239]	Time 0.375 (0.237)	Data 0.001 (0.014)	Loss 4.1407 (4.2313)	Top-1 acc 26.953 (26.585)	Top-5 acc 53.125 (49.462)	lr 0.01464
Warmup Train [26][1440/3239]	Time 0.190 (0.237)	Data 0.001 (0.014)	Loss 4.2511 (4.2322)	Top-1 acc 26.562 (26.569)	Top-5 acc 46.875 (49.433)	lr 0.01463
Warmup Train [26][1450/3239]	Time 0.226 (0.237)	Data 0.001 (0.014)	Loss 4.5050 (4.2325)	Top-1 acc 20.703 (26.561)	Top-5 acc 42.578 (49.421)	lr 0.01463
Warmup Train [26][1460/3239]	Time 0.138 (0.237)	Data 0.001 (0.014)	Loss 4.3875 (4.2331)	Top-1 acc 24.609 (26.552)	Top-5 acc 45.703 (49.405)	lr 0.01462
Warmup Train [26][1470/3239]	Time 0.293 (0.237)	Data 0.001 (0.014)	Loss 4.2483 (4.2328)	Top-1 acc 27.344 (26.562)	Top-5 acc 46.875 (49.413)	lr 0.01462
Warmup Train [26][1480/3239]	Time 0.321 (0.237)	Data 0.004 (0.014)	Loss 4.1354 (4.2323)	Top-1 acc 28.906 (26.565)	Top-5 acc 48.047 (49.416)	lr 0.01461
Warmup Train [26][1490/3239]	Time 0.224 (0.237)	Data 0.001 (0.014)	Loss 4.3590 (4.2317)	Top-1 acc 24.609 (26.581)	Top-5 acc 42.578 (49.431)	lr 0.01461
Warmup Train [26][1500/3239]	Time 0.228 (0.237)	Data 0.001 (0.014)	Loss 4.0982 (4.2314)	Top-1 acc 30.469 (26.587)	Top-5 acc 53.516 (49.436)	lr 0.01460
Warmup Train [26][1510/3239]	Time 0.252 (0.237)	Data 0.002 (0.014)	Loss 4.1967 (4.2318)	Top-1 acc 27.344 (26.581)	Top-5 acc 48.438 (49.418)	lr 0.01459
Warmup Train [26][1520/3239]	Time 0.246 (0.237)	Data 0.001 (0.014)	Loss 4.2285 (4.2321)	Top-1 acc 27.344 (26.573)	Top-5 acc 52.344 (49.416)	lr 0.01459
Warmup Train [26][1530/3239]	Time 0.275 (0.237)	Data 0.002 (0.014)	Loss 4.1176 (4.2318)	Top-1 acc 28.125 (26.577)	Top-5 acc 50.000 (49.417)	lr 0.01458
Warmup Train [26][1540/3239]	Time 0.170 (0.237)	Data 0.001 (0.014)	Loss 4.2226 (4.2317)	Top-1 acc 28.516 (26.579)	Top-5 acc 49.219 (49.417)	lr 0.01458
Warmup Train [26][1550/3239]	Time 0.206 (0.236)	Data 0.001 (0.013)	Loss 4.1805 (4.2317)	Top-1 acc 27.734 (26.578)	Top-5 acc 51.953 (49.417)	lr 0.01457
Warmup Train [26][1560/3239]	Time 0.148 (0.236)	Data 0.001 (0.013)	Loss 4.2638 (4.2315)	Top-1 acc 23.828 (26.582)	Top-5 acc 46.484 (49.425)	lr 0.01457
Warmup Train [26][1570/3239]	Time 0.208 (0.236)	Data 0.001 (0.013)	Loss 4.3926 (4.2314)	Top-1 acc 21.484 (26.580)	Top-5 acc 42.578 (49.424)	lr 0.01456
Warmup Train [26][1580/3239]	Time 0.145 (0.236)	Data 0.002 (0.013)	Loss 4.1880 (4.2315)	Top-1 acc 29.688 (26.576)	Top-5 acc 51.562 (49.420)	lr 0.01456
Warmup Train [26][1590/3239]	Time 0.230 (0.236)	Data 0.001 (0.013)	Loss 4.1175 (4.2313)	Top-1 acc 27.734 (26.579)	Top-5 acc 51.172 (49.424)	lr 0.01455
Warmup Train [26][1600/3239]	Time 0.232 (0.236)	Data 0.001 (0.013)	Loss 4.2217 (4.2313)	Top-1 acc 32.031 (26.585)	Top-5 acc 46.484 (49.420)	lr 0.01454
Warmup Train [26][1610/3239]	Time 0.197 (0.236)	Data 0.001 (0.013)	Loss 4.2366 (4.2311)	Top-1 acc 32.812 (26.595)	Top-5 acc 50.000 (49.430)	lr 0.01454
Warmup Train [26][1620/3239]	Time 0.239 (0.236)	Data 0.001 (0.013)	Loss 4.3325 (4.2310)	Top-1 acc 24.219 (26.596)	Top-5 acc 47.266 (49.431)	lr 0.01453
Warmup Train [26][1630/3239]	Time 0.343 (0.236)	Data 0.002 (0.013)	Loss 4.1786 (4.2309)	Top-1 acc 25.781 (26.595)	Top-5 acc 49.609 (49.431)	lr 0.01453
Warmup Train [26][1640/3239]	Time 0.390 (0.236)	Data 0.001 (0.013)	Loss 4.1374 (4.2310)	Top-1 acc 28.125 (26.587)	Top-5 acc 51.562 (49.431)	lr 0.01452
Warmup Train [26][1650/3239]	Time 0.231 (0.237)	Data 0.001 (0.013)	Loss 4.2752 (4.2313)	Top-1 acc 26.172 (26.582)	Top-5 acc 47.656 (49.417)	lr 0.01452
Warmup Train [26][1660/3239]	Time 0.236 (0.237)	Data 0.002 (0.013)	Loss 4.2657 (4.2313)	Top-1 acc 25.781 (26.582)	Top-5 acc 46.484 (49.418)	lr 0.01451
Warmup Train [26][1670/3239]	Time 0.247 (0.237)	Data 0.001 (0.013)	Loss 4.0760 (4.2313)	Top-1 acc 31.250 (26.582)	Top-5 acc 53.906 (49.413)	lr 0.01451
Warmup Train [26][1680/3239]	Time 0.237 (0.237)	Data 0.001 (0.013)	Loss 4.0159 (4.2310)	Top-1 acc 30.469 (26.587)	Top-5 acc 55.469 (49.421)	lr 0.01450
Warmup Train [26][1690/3239]	Time 0.178 (0.237)	Data 0.002 (0.013)	Loss 4.3548 (4.2313)	Top-1 acc 22.266 (26.576)	Top-5 acc 47.266 (49.411)	lr 0.01449
Warmup Train [26][1700/3239]	Time 0.200 (0.237)	Data 0.002 (0.013)	Loss 4.2063 (4.2311)	Top-1 acc 23.047 (26.576)	Top-5 acc 51.953 (49.414)	lr 0.01449
Warmup Train [26][1710/3239]	Time 0.190 (0.237)	Data 0.001 (0.013)	Loss 4.2688 (4.2311)	Top-1 acc 27.344 (26.582)	Top-5 acc 47.656 (49.418)	lr 0.01448
Warmup Train [26][1720/3239]	Time 0.203 (0.237)	Data 0.001 (0.012)	Loss 4.0662 (4.2309)	Top-1 acc 33.203 (26.586)	Top-5 acc 55.078 (49.417)	lr 0.01448
Warmup Train [26][1730/3239]	Time 0.229 (0.237)	Data 0.001 (0.012)	Loss 4.0845 (4.2308)	Top-1 acc 30.859 (26.590)	Top-5 acc 52.344 (49.422)	lr 0.01447
Warmup Train [26][1740/3239]	Time 0.355 (0.237)	Data 0.001 (0.012)	Loss 4.0830 (4.2306)	Top-1 acc 29.297 (26.596)	Top-5 acc 53.125 (49.425)	lr 0.01447
Warmup Train [26][1750/3239]	Time 0.191 (0.237)	Data 0.001 (0.012)	Loss 4.3753 (4.2305)	Top-1 acc 21.875 (26.601)	Top-5 acc 42.578 (49.424)	lr 0.01446
Warmup Train [26][1760/3239]	Time 0.216 (0.237)	Data 0.001 (0.012)	Loss 4.3662 (4.2306)	Top-1 acc 21.875 (26.601)	Top-5 acc 46.875 (49.421)	lr 0.01446
Warmup Train [26][1770/3239]	Time 0.170 (0.237)	Data 0.001 (0.012)	Loss 4.0635 (4.2305)	Top-1 acc 32.031 (26.600)	Top-5 acc 52.734 (49.421)	lr 0.01445
Warmup Train [26][1780/3239]	Time 0.188 (0.237)	Data 0.001 (0.012)	Loss 4.0859 (4.2305)	Top-1 acc 29.297 (26.603)	Top-5 acc 55.078 (49.428)	lr 0.01445
Warmup Train [26][1790/3239]	Time 0.186 (0.237)	Data 0.001 (0.012)	Loss 4.2755 (4.2307)	Top-1 acc 29.297 (26.608)	Top-5 acc 47.266 (49.424)	lr 0.01444
Warmup Train [26][1800/3239]	Time 0.222 (0.237)	Data 0.002 (0.012)	Loss 4.2013 (4.2305)	Top-1 acc 29.297 (26.610)	Top-5 acc 51.562 (49.430)	lr 0.01443
Warmup Train [26][1810/3239]	Time 0.239 (0.237)	Data 0.024 (0.012)	Loss 4.3759 (4.2308)	Top-1 acc 22.656 (26.608)	Top-5 acc 47.656 (49.424)	lr 0.01443
Warmup Train [26][1820/3239]	Time 0.242 (0.236)	Data 0.001 (0.012)	Loss 4.4181 (4.2311)	Top-1 acc 20.703 (26.594)	Top-5 acc 45.312 (49.421)	lr 0.01442
Warmup Train [26][1830/3239]	Time 0.220 (0.236)	Data 0.001 (0.012)	Loss 4.2556 (4.2307)	Top-1 acc 25.391 (26.604)	Top-5 acc 51.562 (49.431)	lr 0.01442
Warmup Train [26][1840/3239]	Time 0.189 (0.236)	Data 0.001 (0.012)	Loss 4.6111 (4.2310)	Top-1 acc 21.094 (26.600)	Top-5 acc 40.625 (49.425)	lr 0.01441
Warmup Train [26][1850/3239]	Time 0.206 (0.236)	Data 0.001 (0.012)	Loss 4.1304 (4.2309)	Top-1 acc 27.344 (26.603)	Top-5 acc 55.078 (49.423)	lr 0.01441
Warmup Train [26][1860/3239]	Time 0.198 (0.236)	Data 0.001 (0.012)	Loss 4.1742 (4.2308)	Top-1 acc 33.984 (26.606)	Top-5 acc 49.219 (49.424)	lr 0.01440
Warmup Train [26][1870/3239]	Time 0.194 (0.236)	Data 0.001 (0.012)	Loss 4.0684 (4.2305)	Top-1 acc 29.297 (26.612)	Top-5 acc 55.469 (49.435)	lr 0.01440
Warmup Train [26][1880/3239]	Time 0.275 (0.236)	Data 0.001 (0.012)	Loss 4.1777 (4.2305)	Top-1 acc 27.734 (26.617)	Top-5 acc 51.172 (49.434)	lr 0.01439
Warmup Train [26][1890/3239]	Time 0.182 (0.236)	Data 0.001 (0.012)	Loss 4.3046 (4.2305)	Top-1 acc 26.172 (26.616)	Top-5 acc 46.484 (49.432)	lr 0.01439
Warmup Train [26][1900/3239]	Time 0.235 (0.236)	Data 0.001 (0.012)	Loss 4.2781 (4.2303)	Top-1 acc 23.438 (26.616)	Top-5 acc 48.047 (49.439)	lr 0.01438
Warmup Train [26][1910/3239]	Time 0.150 (0.236)	Data 0.001 (0.012)	Loss 4.2518 (4.2303)	Top-1 acc 25.391 (26.615)	Top-5 acc 47.266 (49.440)	lr 0.01437
Warmup Train [26][1920/3239]	Time 0.164 (0.236)	Data 0.001 (0.011)	Loss 4.1867 (4.2305)	Top-1 acc 22.656 (26.606)	Top-5 acc 50.391 (49.435)	lr 0.01437
Warmup Train [26][1930/3239]	Time 0.171 (0.236)	Data 0.001 (0.011)	Loss 4.1231 (4.2304)	Top-1 acc 30.078 (26.612)	Top-5 acc 48.828 (49.435)	lr 0.01436
Warmup Train [26][1940/3239]	Time 0.208 (0.236)	Data 0.001 (0.011)	Loss 4.3583 (4.2306)	Top-1 acc 23.047 (26.607)	Top-5 acc 45.312 (49.427)	lr 0.01436
Warmup Train [26][1950/3239]	Time 0.239 (0.236)	Data 0.001 (0.011)	Loss 4.3518 (4.2306)	Top-1 acc 24.609 (26.611)	Top-5 acc 48.047 (49.430)	lr 0.01435
Warmup Train [26][1960/3239]	Time 0.389 (0.236)	Data 0.001 (0.011)	Loss 4.1811 (4.2304)	Top-1 acc 30.859 (26.618)	Top-5 acc 53.516 (49.432)	lr 0.01435
Warmup Train [26][1970/3239]	Time 0.234 (0.236)	Data 0.001 (0.011)	Loss 4.2574 (4.2304)	Top-1 acc 27.734 (26.624)	Top-5 acc 48.828 (49.428)	lr 0.01434
Warmup Train [26][1980/3239]	Time 0.223 (0.236)	Data 0.001 (0.011)	Loss 4.3805 (4.2304)	Top-1 acc 21.875 (26.625)	Top-5 acc 48.828 (49.432)	lr 0.01434
Warmup Train [26][1990/3239]	Time 0.186 (0.236)	Data 0.001 (0.011)	Loss 4.0297 (4.2302)	Top-1 acc 28.516 (26.624)	Top-5 acc 51.172 (49.438)	lr 0.01433
Warmup Train [26][2000/3239]	Time 0.180 (0.236)	Data 0.001 (0.011)	Loss 4.0410 (4.2303)	Top-1 acc 28.906 (26.621)	Top-5 acc 54.688 (49.438)	lr 0.01432
Warmup Train [26][2010/3239]	Time 0.190 (0.236)	Data 0.001 (0.011)	Loss 4.2263 (4.2305)	Top-1 acc 25.000 (26.623)	Top-5 acc 50.781 (49.433)	lr 0.01432
Warmup Train [26][2020/3239]	Time 0.223 (0.235)	Data 0.001 (0.011)	Loss 4.2139 (4.2305)	Top-1 acc 28.516 (26.625)	Top-5 acc 50.781 (49.431)	lr 0.01431
Warmup Train [26][2030/3239]	Time 0.266 (0.235)	Data 0.001 (0.011)	Loss 4.1312 (4.2305)	Top-1 acc 26.953 (26.625)	Top-5 acc 51.562 (49.436)	lr 0.01431
Warmup Train [26][2040/3239]	Time 0.159 (0.235)	Data 0.001 (0.011)	Loss 4.3255 (4.2306)	Top-1 acc 26.953 (26.624)	Top-5 acc 42.969 (49.427)	lr 0.01430
Warmup Train [26][2050/3239]	Time 0.204 (0.235)	Data 0.001 (0.011)	Loss 4.1404 (4.2306)	Top-1 acc 26.953 (26.621)	Top-5 acc 49.609 (49.423)	lr 0.01430
Warmup Train [26][2060/3239]	Time 0.304 (0.235)	Data 0.001 (0.011)	Loss 4.1516 (4.2304)	Top-1 acc 30.469 (26.629)	Top-5 acc 51.172 (49.426)	lr 0.01429
Warmup Train [26][2070/3239]	Time 0.205 (0.235)	Data 0.001 (0.011)	Loss 4.2854 (4.2303)	Top-1 acc 25.781 (26.632)	Top-5 acc 48.438 (49.429)	lr 0.01429
Warmup Train [26][2080/3239]	Time 0.185 (0.235)	Data 0.002 (0.011)	Loss 4.2259 (4.2303)	Top-1 acc 26.562 (26.637)	Top-5 acc 48.828 (49.431)	lr 0.01428
Warmup Train [26][2090/3239]	Time 0.203 (0.235)	Data 0.001 (0.011)	Loss 4.3085 (4.2307)	Top-1 acc 26.562 (26.627)	Top-5 acc 46.484 (49.422)	lr 0.01428
Warmup Train [26][2100/3239]	Time 0.181 (0.235)	Data 0.001 (0.011)	Loss 4.0500 (4.2309)	Top-1 acc 27.344 (26.626)	Top-5 acc 54.688 (49.424)	lr 0.01427
Warmup Train [26][2110/3239]	Time 0.252 (0.235)	Data 0.001 (0.011)	Loss 4.3291 (4.2306)	Top-1 acc 25.000 (26.626)	Top-5 acc 46.094 (49.431)	lr 0.01426
Warmup Train [26][2120/3239]	Time 0.203 (0.235)	Data 0.001 (0.011)	Loss 4.4265 (4.2308)	Top-1 acc 21.875 (26.617)	Top-5 acc 49.219 (49.425)	lr 0.01426
Warmup Train [26][2130/3239]	Time 0.208 (0.235)	Data 0.002 (0.011)	Loss 4.3528 (4.2309)	Top-1 acc 25.000 (26.612)	Top-5 acc 47.266 (49.425)	lr 0.01425
Warmup Train [26][2140/3239]	Time 0.171 (0.235)	Data 0.001 (0.011)	Loss 4.0692 (4.2307)	Top-1 acc 27.734 (26.611)	Top-5 acc 53.516 (49.434)	lr 0.01425
Warmup Train [26][2150/3239]	Time 0.148 (0.235)	Data 0.002 (0.011)	Loss 4.4835 (4.2306)	Top-1 acc 21.875 (26.615)	Top-5 acc 43.359 (49.440)	lr 0.01424
Warmup Train [26][2160/3239]	Time 0.306 (0.235)	Data 0.001 (0.011)	Loss 4.1898 (4.2306)	Top-1 acc 23.438 (26.613)	Top-5 acc 51.562 (49.439)	lr 0.01424
Warmup Train [26][2170/3239]	Time 0.167 (0.235)	Data 0.001 (0.011)	Loss 4.0905 (4.2304)	Top-1 acc 25.391 (26.611)	Top-5 acc 51.562 (49.443)	lr 0.01423
Warmup Train [26][2180/3239]	Time 0.217 (0.234)	Data 0.001 (0.011)	Loss 4.2508 (4.2306)	Top-1 acc 25.000 (26.608)	Top-5 acc 51.953 (49.443)	lr 0.01423
Warmup Train [26][2190/3239]	Time 0.265 (0.234)	Data 0.001 (0.011)	Loss 4.1412 (4.2303)	Top-1 acc 27.734 (26.613)	Top-5 acc 48.828 (49.443)	lr 0.01422
Warmup Train [26][2200/3239]	Time 0.202 (0.234)	Data 0.001 (0.010)	Loss 4.4727 (4.2304)	Top-1 acc 22.656 (26.616)	Top-5 acc 45.703 (49.441)	lr 0.01422
Warmup Train [26][2210/3239]	Time 0.229 (0.234)	Data 0.001 (0.010)	Loss 4.1478 (4.2303)	Top-1 acc 28.906 (26.618)	Top-5 acc 50.000 (49.442)	lr 0.01421
Warmup Train [26][2220/3239]	Time 0.247 (0.234)	Data 0.002 (0.010)	Loss 4.1537 (4.2304)	Top-1 acc 26.172 (26.617)	Top-5 acc 55.078 (49.443)	lr 0.01420
Warmup Train [26][2230/3239]	Time 0.172 (0.234)	Data 0.002 (0.010)	Loss 4.4321 (4.2303)	Top-1 acc 21.484 (26.619)	Top-5 acc 47.266 (49.447)	lr 0.01420
Warmup Train [26][2240/3239]	Time 0.172 (0.234)	Data 0.001 (0.010)	Loss 4.2844 (4.2301)	Top-1 acc 23.047 (26.616)	Top-5 acc 46.484 (49.448)	lr 0.01419
Warmup Train [26][2250/3239]	Time 0.237 (0.234)	Data 0.001 (0.010)	Loss 4.2878 (4.2299)	Top-1 acc 25.391 (26.613)	Top-5 acc 45.703 (49.455)	lr 0.01419
Warmup Train [26][2260/3239]	Time 0.223 (0.234)	Data 0.001 (0.010)	Loss 4.0170 (4.2298)	Top-1 acc 30.469 (26.613)	Top-5 acc 54.297 (49.457)	lr 0.01418
Warmup Train [26][2270/3239]	Time 0.320 (0.234)	Data 0.001 (0.010)	Loss 4.3522 (4.2299)	Top-1 acc 21.484 (26.611)	Top-5 acc 49.219 (49.457)	lr 0.01418
Warmup Train [26][2280/3239]	Time 0.392 (0.234)	Data 0.001 (0.010)	Loss 4.3525 (4.2301)	Top-1 acc 23.438 (26.606)	Top-5 acc 41.406 (49.452)	lr 0.01417
Warmup Train [26][2290/3239]	Time 0.242 (0.234)	Data 0.002 (0.010)	Loss 4.0515 (4.2301)	Top-1 acc 31.641 (26.606)	Top-5 acc 51.953 (49.451)	lr 0.01417
Warmup Train [26][2300/3239]	Time 0.232 (0.234)	Data 0.001 (0.010)	Loss 4.2049 (4.2302)	Top-1 acc 25.000 (26.603)	Top-5 acc 48.047 (49.447)	lr 0.01416
Warmup Train [26][2310/3239]	Time 0.217 (0.234)	Data 0.003 (0.010)	Loss 4.2167 (4.2302)	Top-1 acc 27.344 (26.602)	Top-5 acc 51.172 (49.445)	lr 0.01416
Warmup Train [26][2320/3239]	Time 0.197 (0.234)	Data 0.001 (0.010)	Loss 4.2126 (4.2304)	Top-1 acc 28.906 (26.601)	Top-5 acc 49.609 (49.442)	lr 0.01415
Warmup Train [26][2330/3239]	Time 0.207 (0.234)	Data 0.001 (0.010)	Loss 4.1022 (4.2301)	Top-1 acc 26.953 (26.608)	Top-5 acc 50.391 (49.448)	lr 0.01414
Warmup Train [26][2340/3239]	Time 0.152 (0.234)	Data 0.001 (0.010)	Loss 4.1628 (4.2301)	Top-1 acc 26.953 (26.609)	Top-5 acc 50.391 (49.448)	lr 0.01414
Warmup Train [26][2350/3239]	Time 0.209 (0.234)	Data 0.001 (0.010)	Loss 4.3429 (4.2301)	Top-1 acc 23.047 (26.611)	Top-5 acc 47.656 (49.446)	lr 0.01413
Warmup Train [26][2360/3239]	Time 0.232 (0.234)	Data 0.001 (0.010)	Loss 4.0388 (4.2297)	Top-1 acc 29.688 (26.618)	Top-5 acc 54.297 (49.456)	lr 0.01413
Warmup Train [26][2370/3239]	Time 0.250 (0.234)	Data 0.001 (0.010)	Loss 4.1905 (4.2296)	Top-1 acc 28.906 (26.622)	Top-5 acc 50.000 (49.455)	lr 0.01412
Warmup Train [26][2380/3239]	Time 0.293 (0.234)	Data 0.001 (0.010)	Loss 4.1841 (4.2295)	Top-1 acc 26.562 (26.624)	Top-5 acc 51.562 (49.460)	lr 0.01412
Warmup Train [26][2390/3239]	Time 0.156 (0.234)	Data 0.001 (0.010)	Loss 4.0488 (4.2295)	Top-1 acc 28.516 (26.627)	Top-5 acc 51.562 (49.463)	lr 0.01411
Warmup Train [26][2400/3239]	Time 0.200 (0.234)	Data 0.001 (0.010)	Loss 4.1831 (4.2295)	Top-1 acc 26.953 (26.625)	Top-5 acc 51.953 (49.464)	lr 0.01411
Warmup Train [26][2410/3239]	Time 0.229 (0.234)	Data 0.001 (0.010)	Loss 4.3359 (4.2295)	Top-1 acc 25.391 (26.625)	Top-5 acc 46.875 (49.464)	lr 0.01410
Warmup Train [26][2420/3239]	Time 0.245 (0.234)	Data 0.001 (0.010)	Loss 4.5883 (4.2294)	Top-1 acc 18.750 (26.630)	Top-5 acc 38.281 (49.464)	lr 0.01410
Warmup Train [26][2430/3239]	Time 0.204 (0.234)	Data 0.001 (0.010)	Loss 4.3679 (4.2295)	Top-1 acc 27.734 (26.622)	Top-5 acc 46.094 (49.460)	lr 0.01409
Warmup Train [26][2440/3239]	Time 0.145 (0.234)	Data 0.001 (0.010)	Loss 4.1033 (4.2294)	Top-1 acc 30.469 (26.623)	Top-5 acc 51.172 (49.460)	lr 0.01408
Warmup Train [26][2450/3239]	Time 0.210 (0.233)	Data 0.001 (0.010)	Loss 4.2246 (4.2292)	Top-1 acc 27.344 (26.625)	Top-5 acc 51.172 (49.459)	lr 0.01408
Warmup Train [26][2460/3239]	Time 0.226 (0.233)	Data 0.001 (0.010)	Loss 4.0365 (4.2289)	Top-1 acc 30.859 (26.633)	Top-5 acc 57.031 (49.468)	lr 0.01407
Warmup Train [26][2470/3239]	Time 0.266 (0.233)	Data 0.001 (0.010)	Loss 4.3191 (4.2290)	Top-1 acc 25.000 (26.635)	Top-5 acc 45.703 (49.461)	lr 0.01407
Warmup Train [26][2480/3239]	Time 0.173 (0.233)	Data 0.001 (0.010)	Loss 4.4752 (4.2289)	Top-1 acc 22.266 (26.631)	Top-5 acc 46.875 (49.463)	lr 0.01406
Warmup Train [26][2490/3239]	Time 0.203 (0.233)	Data 0.001 (0.010)	Loss 4.2013 (4.2287)	Top-1 acc 27.344 (26.631)	Top-5 acc 50.391 (49.466)	lr 0.01406
Warmup Train [26][2500/3239]	Time 0.267 (0.233)	Data 0.001 (0.010)	Loss 4.2501 (4.2287)	Top-1 acc 24.609 (26.631)	Top-5 acc 46.875 (49.465)	lr 0.01405
Warmup Train [26][2510/3239]	Time 0.210 (0.233)	Data 0.001 (0.010)	Loss 4.2249 (4.2283)	Top-1 acc 26.562 (26.639)	Top-5 acc 50.000 (49.474)	lr 0.01405
Warmup Train [26][2520/3239]	Time 0.259 (0.233)	Data 0.001 (0.010)	Loss 4.2217 (4.2281)	Top-1 acc 28.125 (26.639)	Top-5 acc 49.219 (49.478)	lr 0.01404
Warmup Train [26][2530/3239]	Time 0.266 (0.233)	Data 0.001 (0.010)	Loss 4.3508 (4.2283)	Top-1 acc 26.172 (26.638)	Top-5 acc 47.656 (49.477)	lr 0.01404
Warmup Train [26][2540/3239]	Time 0.223 (0.233)	Data 0.001 (0.009)	Loss 4.3144 (4.2288)	Top-1 acc 25.000 (26.629)	Top-5 acc 47.656 (49.465)	lr 0.01403
Warmup Train [26][2550/3239]	Time 0.234 (0.233)	Data 0.001 (0.009)	Loss 4.3245 (4.2287)	Top-1 acc 21.094 (26.630)	Top-5 acc 48.438 (49.472)	lr 0.01402
Warmup Train [26][2560/3239]	Time 0.257 (0.233)	Data 0.001 (0.009)	Loss 4.2738 (4.2289)	Top-1 acc 23.828 (26.626)	Top-5 acc 45.703 (49.468)	lr 0.01402
Warmup Train [26][2570/3239]	Time 0.213 (0.233)	Data 0.001 (0.009)	Loss 4.1481 (4.2288)	Top-1 acc 28.906 (26.631)	Top-5 acc 51.953 (49.468)	lr 0.01401
Warmup Train [26][2580/3239]	Time 0.207 (0.233)	Data 0.001 (0.009)	Loss 4.2875 (4.2288)	Top-1 acc 29.688 (26.631)	Top-5 acc 48.438 (49.469)	lr 0.01401
Warmup Train [26][2590/3239]	Time 0.257 (0.233)	Data 0.001 (0.009)	Loss 4.0393 (4.2287)	Top-1 acc 27.734 (26.632)	Top-5 acc 52.344 (49.471)	lr 0.01400
Warmup Train [26][2600/3239]	Time 0.272 (0.233)	Data 0.001 (0.009)	Loss 4.1998 (4.2286)	Top-1 acc 28.125 (26.638)	Top-5 acc 47.656 (49.474)	lr 0.01400
Warmup Train [26][2610/3239]	Time 0.195 (0.233)	Data 0.001 (0.009)	Loss 4.2053 (4.2286)	Top-1 acc 26.953 (26.636)	Top-5 acc 48.828 (49.474)	lr 0.01399
Warmup Train [26][2620/3239]	Time 0.233 (0.233)	Data 0.001 (0.009)	Loss 4.2074 (4.2284)	Top-1 acc 25.781 (26.634)	Top-5 acc 50.000 (49.472)	lr 0.01399
Warmup Train [26][2630/3239]	Time 0.175 (0.233)	Data 0.001 (0.009)	Loss 4.3059 (4.2284)	Top-1 acc 25.391 (26.634)	Top-5 acc 51.172 (49.470)	lr 0.01398
Warmup Train [26][2640/3239]	Time 0.146 (0.233)	Data 0.001 (0.009)	Loss 4.2627 (4.2284)	Top-1 acc 27.734 (26.634)	Top-5 acc 48.828 (49.470)	lr 0.01398
Warmup Train [26][2650/3239]	Time 0.296 (0.233)	Data 0.001 (0.009)	Loss 4.2209 (4.2282)	Top-1 acc 30.078 (26.637)	Top-5 acc 50.000 (49.472)	lr 0.01397
Warmup Train [26][2660/3239]	Time 0.224 (0.233)	Data 0.001 (0.009)	Loss 4.2862 (4.2281)	Top-1 acc 26.562 (26.641)	Top-5 acc 46.875 (49.475)	lr 0.01396
Warmup Train [26][2670/3239]	Time 0.361 (0.233)	Data 0.002 (0.009)	Loss 4.2569 (4.2278)	Top-1 acc 30.078 (26.649)	Top-5 acc 49.219 (49.486)	lr 0.01396
Warmup Train [26][2680/3239]	Time 0.229 (0.233)	Data 0.001 (0.009)	Loss 4.1990 (4.2276)	Top-1 acc 27.734 (26.651)	Top-5 acc 52.344 (49.491)	lr 0.01395
Warmup Train [26][2690/3239]	Time 0.146 (0.233)	Data 0.001 (0.009)	Loss 4.2981 (4.2278)	Top-1 acc 25.781 (26.649)	Top-5 acc 47.656 (49.487)	lr 0.01395
Warmup Train [26][2700/3239]	Time 0.194 (0.233)	Data 0.001 (0.009)	Loss 4.3525 (4.2280)	Top-1 acc 23.438 (26.647)	Top-5 acc 47.656 (49.486)	lr 0.01394
Warmup Train [26][2710/3239]	Time 0.238 (0.233)	Data 0.001 (0.009)	Loss 4.2650 (4.2280)	Top-1 acc 26.562 (26.644)	Top-5 acc 46.484 (49.488)	lr 0.01394
Warmup Train [26][2720/3239]	Time 0.167 (0.233)	Data 0.001 (0.009)	Loss 4.1505 (4.2279)	Top-1 acc 28.906 (26.648)	Top-5 acc 51.953 (49.491)	lr 0.01393
Warmup Train [26][2730/3239]	Time 0.242 (0.233)	Data 0.002 (0.009)	Loss 4.3055 (4.2277)	Top-1 acc 24.219 (26.647)	Top-5 acc 45.703 (49.494)	lr 0.01393
Warmup Train [26][2740/3239]	Time 0.241 (0.233)	Data 0.002 (0.009)	Loss 4.3773 (4.2278)	Top-1 acc 21.094 (26.645)	Top-5 acc 45.312 (49.493)	lr 0.01392
Warmup Train [26][2750/3239]	Time 0.181 (0.233)	Data 0.001 (0.009)	Loss 4.0448 (4.2274)	Top-1 acc 31.250 (26.654)	Top-5 acc 53.906 (49.498)	lr 0.01392
Warmup Train [26][2760/3239]	Time 0.355 (0.233)	Data 0.001 (0.009)	Loss 4.0345 (4.2271)	Top-1 acc 28.906 (26.661)	Top-5 acc 53.906 (49.502)	lr 0.01391
Warmup Train [26][2770/3239]	Time 0.207 (0.233)	Data 0.001 (0.009)	Loss 4.1945 (4.2270)	Top-1 acc 29.297 (26.667)	Top-5 acc 49.609 (49.509)	lr 0.01390
Warmup Train [26][2780/3239]	Time 0.202 (0.233)	Data 0.001 (0.009)	Loss 4.3059 (4.2273)	Top-1 acc 27.734 (26.662)	Top-5 acc 50.000 (49.499)	lr 0.01390
Warmup Train [26][2790/3239]	Time 0.208 (0.233)	Data 0.001 (0.009)	Loss 4.1229 (4.2272)	Top-1 acc 25.781 (26.661)	Top-5 acc 50.781 (49.500)	lr 0.01389
Warmup Train [26][2800/3239]	Time 0.223 (0.233)	Data 0.002 (0.009)	Loss 4.0997 (4.2273)	Top-1 acc 28.125 (26.658)	Top-5 acc 49.609 (49.493)	lr 0.01389
Warmup Train [26][2810/3239]	Time 0.209 (0.233)	Data 0.001 (0.009)	Loss 4.1601 (4.2274)	Top-1 acc 26.953 (26.657)	Top-5 acc 50.000 (49.486)	lr 0.01388
Warmup Train [26][2820/3239]	Time 0.259 (0.233)	Data 0.001 (0.009)	Loss 4.1078 (4.2273)	Top-1 acc 29.688 (26.663)	Top-5 acc 50.781 (49.493)	lr 0.01388
Warmup Train [26][2830/3239]	Time 0.196 (0.232)	Data 0.001 (0.009)	Loss 4.2378 (4.2274)	Top-1 acc 26.172 (26.667)	Top-5 acc 50.781 (49.493)	lr 0.01387
Warmup Train [26][2840/3239]	Time 0.161 (0.232)	Data 0.001 (0.009)	Loss 4.2987 (4.2272)	Top-1 acc 29.297 (26.672)	Top-5 acc 46.094 (49.496)	lr 0.01387
Warmup Train [26][2850/3239]	Time 0.199 (0.232)	Data 0.001 (0.009)	Loss 4.1649 (4.2271)	Top-1 acc 24.609 (26.671)	Top-5 acc 51.172 (49.496)	lr 0.01386
Warmup Train [26][2860/3239]	Time 0.209 (0.233)	Data 0.001 (0.009)	Loss 4.2301 (4.2270)	Top-1 acc 24.219 (26.674)	Top-5 acc 47.656 (49.499)	lr 0.01386
Warmup Train [26][2870/3239]	Time 0.443 (0.233)	Data 0.003 (0.009)	Loss 4.2013 (4.2270)	Top-1 acc 26.172 (26.673)	Top-5 acc 52.734 (49.500)	lr 0.01385
Warmup Train [26][2880/3239]	Time 0.211 (0.233)	Data 0.001 (0.009)	Loss 4.2230 (4.2270)	Top-1 acc 22.656 (26.672)	Top-5 acc 48.047 (49.504)	lr 0.01385
Warmup Train [26][2890/3239]	Time 0.231 (0.233)	Data 0.001 (0.009)	Loss 4.1692 (4.2270)	Top-1 acc 29.297 (26.672)	Top-5 acc 51.953 (49.501)	lr 0.01384
Warmup Train [26][2900/3239]	Time 0.201 (0.233)	Data 0.001 (0.009)	Loss 4.2508 (4.2270)	Top-1 acc 25.781 (26.674)	Top-5 acc 50.781 (49.502)	lr 0.01383
Warmup Train [26][2910/3239]	Time 0.242 (0.233)	Data 0.001 (0.009)	Loss 4.3121 (4.2271)	Top-1 acc 21.484 (26.671)	Top-5 acc 49.219 (49.500)	lr 0.01383
Warmup Train [26][2920/3239]	Time 0.230 (0.233)	Data 0.001 (0.009)	Loss 4.3080 (4.2272)	Top-1 acc 24.219 (26.669)	Top-5 acc 47.266 (49.497)	lr 0.01382
Warmup Train [26][2930/3239]	Time 0.214 (0.232)	Data 0.001 (0.009)	Loss 4.0963 (4.2271)	Top-1 acc 28.516 (26.673)	Top-5 acc 57.812 (49.502)	lr 0.01382
Warmup Train [26][2940/3239]	Time 0.181 (0.232)	Data 0.001 (0.009)	Loss 4.2419 (4.2271)	Top-1 acc 28.125 (26.673)	Top-5 acc 48.438 (49.499)	lr 0.01381
Warmup Train [26][2950/3239]	Time 0.211 (0.232)	Data 0.001 (0.009)	Loss 4.1232 (4.2270)	Top-1 acc 23.828 (26.670)	Top-5 acc 55.078 (49.502)	lr 0.01381
Warmup Train [26][2960/3239]	Time 0.152 (0.232)	Data 0.001 (0.009)	Loss 4.0432 (4.2269)	Top-1 acc 32.031 (26.674)	Top-5 acc 55.469 (49.505)	lr 0.01380
Warmup Train [26][2970/3239]	Time 0.183 (0.232)	Data 0.001 (0.009)	Loss 4.2746 (4.2269)	Top-1 acc 25.781 (26.673)	Top-5 acc 53.906 (49.507)	lr 0.01380
Warmup Train [26][2980/3239]	Time 0.174 (0.232)	Data 0.001 (0.009)	Loss 4.1937 (4.2269)	Top-1 acc 25.391 (26.673)	Top-5 acc 51.953 (49.509)	lr 0.01379
Warmup Train [26][2990/3239]	Time 0.143 (0.232)	Data 0.001 (0.008)	Loss 4.1889 (4.2268)	Top-1 acc 31.250 (26.676)	Top-5 acc 52.344 (49.511)	lr 0.01379
Warmup Train [26][3000/3239]	Time 0.239 (0.232)	Data 0.002 (0.009)	Loss 4.0373 (4.2265)	Top-1 acc 27.344 (26.679)	Top-5 acc 55.078 (49.517)	lr 0.01378
Warmup Train [26][3010/3239]	Time 0.206 (0.232)	Data 0.001 (0.008)	Loss 4.1505 (4.2262)	Top-1 acc 27.734 (26.676)	Top-5 acc 49.219 (49.525)	lr 0.01377
Warmup Train [26][3020/3239]	Time 0.257 (0.232)	Data 0.001 (0.008)	Loss 4.1942 (4.2260)	Top-1 acc 26.562 (26.679)	Top-5 acc 46.484 (49.528)	lr 0.01377
Warmup Train [26][3030/3239]	Time 0.189 (0.232)	Data 0.001 (0.008)	Loss 4.1470 (4.2259)	Top-1 acc 32.031 (26.682)	Top-5 acc 50.781 (49.530)	lr 0.01376
Warmup Train [26][3040/3239]	Time 0.248 (0.232)	Data 0.001 (0.008)	Loss 4.2793 (4.2260)	Top-1 acc 23.828 (26.677)	Top-5 acc 49.219 (49.527)	lr 0.01376
Warmup Train [26][3050/3239]	Time 0.236 (0.232)	Data 0.002 (0.008)	Loss 4.2325 (4.2260)	Top-1 acc 26.562 (26.677)	Top-5 acc 51.953 (49.530)	lr 0.01375
Warmup Train [26][3060/3239]	Time 0.223 (0.232)	Data 0.001 (0.008)	Loss 4.1841 (4.2259)	Top-1 acc 25.391 (26.677)	Top-5 acc 50.000 (49.532)	lr 0.01375
Warmup Train [26][3070/3239]	Time 0.328 (0.232)	Data 0.001 (0.008)	Loss 4.2701 (4.2259)	Top-1 acc 24.609 (26.678)	Top-5 acc 45.703 (49.531)	lr 0.01374
Warmup Train [26][3080/3239]	Time 0.153 (0.232)	Data 0.001 (0.008)	Loss 4.3520 (4.2258)	Top-1 acc 22.656 (26.676)	Top-5 acc 49.609 (49.535)	lr 0.01374
Warmup Train [26][3090/3239]	Time 0.155 (0.232)	Data 0.001 (0.008)	Loss 4.1772 (4.2258)	Top-1 acc 28.516 (26.676)	Top-5 acc 53.516 (49.535)	lr 0.01373
Warmup Train [26][3100/3239]	Time 0.150 (0.232)	Data 0.002 (0.008)	Loss 4.1811 (4.2257)	Top-1 acc 28.516 (26.679)	Top-5 acc 50.391 (49.540)	lr 0.01373
Warmup Train [26][3110/3239]	Time 0.258 (0.232)	Data 0.001 (0.008)	Loss 4.3253 (4.2257)	Top-1 acc 27.344 (26.679)	Top-5 acc 48.438 (49.542)	lr 0.01372
Warmup Train [26][3120/3239]	Time 0.275 (0.232)	Data 0.001 (0.008)	Loss 4.1634 (4.2255)	Top-1 acc 30.078 (26.685)	Top-5 acc 51.562 (49.545)	lr 0.01372
Warmup Train [26][3130/3239]	Time 0.245 (0.232)	Data 0.001 (0.008)	Loss 4.2839 (4.2254)	Top-1 acc 23.828 (26.686)	Top-5 acc 48.438 (49.546)	lr 0.01371
Warmup Train [26][3140/3239]	Time 0.225 (0.232)	Data 0.002 (0.008)	Loss 4.0609 (4.2252)	Top-1 acc 31.250 (26.693)	Top-5 acc 56.250 (49.551)	lr 0.01370
Warmup Train [26][3150/3239]	Time 0.182 (0.232)	Data 0.002 (0.008)	Loss 4.0931 (4.2252)	Top-1 acc 29.688 (26.695)	Top-5 acc 52.734 (49.552)	lr 0.01370
Warmup Train [26][3160/3239]	Time 0.297 (0.232)	Data 0.001 (0.008)	Loss 4.1717 (4.2253)	Top-1 acc 28.125 (26.694)	Top-5 acc 54.297 (49.551)	lr 0.01369
Warmup Train [26][3170/3239]	Time 0.403 (0.232)	Data 0.001 (0.008)	Loss 4.1998 (4.2252)	Top-1 acc 25.391 (26.696)	Top-5 acc 48.438 (49.551)	lr 0.01369
Warmup Train [26][3180/3239]	Time 0.237 (0.232)	Data 0.000 (0.008)	Loss 4.4292 (4.2252)	Top-1 acc 24.609 (26.698)	Top-5 acc 46.875 (49.554)	lr 0.01368
Warmup Train [26][3190/3239]	Time 0.209 (0.232)	Data 0.000 (0.008)	Loss 4.1360 (4.2249)	Top-1 acc 27.734 (26.701)	Top-5 acc 53.906 (49.561)	lr 0.01368
Warmup Train [26][3200/3239]	Time 0.202 (0.232)	Data 0.000 (0.008)	Loss 4.2677 (4.2249)	Top-1 acc 28.516 (26.701)	Top-5 acc 48.047 (49.560)	lr 0.01367
Warmup Train [26][3210/3239]	Time 0.154 (0.232)	Data 0.000 (0.008)	Loss 4.1587 (4.2250)	Top-1 acc 26.953 (26.697)	Top-5 acc 48.438 (49.556)	lr 0.01367
Warmup Train [26][3220/3239]	Time 0.226 (0.232)	Data 0.000 (0.008)	Loss 4.1533 (4.2250)	Top-1 acc 25.781 (26.700)	Top-5 acc 51.172 (49.560)	lr 0.01366
Warmup Train [26][3230/3239]	Time 0.194 (0.232)	Data 0.000 (0.008)	Loss 4.3724 (4.2249)	Top-1 acc 25.000 (26.703)	Top-5 acc 46.484 (49.564)	lr 0.01366
Warmup Train [26][3239/3239]	Time 0.171 (0.231)	Data 0.000 (0.008)	Loss 4.1826 (4.2248)	Top-1 acc 28.395 (26.704)	Top-5 acc 43.210 (49.561)	lr 0.01365
==========Warmup Valid [26/40]	loss 3.174	top-1 acc 34.282	top-5 acc 59.270	Train top-1 26.704	top-5 49.561	flops: 442.4M
Warmup Train [27][0/3239]	Time 16.755 (16.755)	Data 16.339 (16.339)	Loss 3.9499 (3.9499)	Top-1 acc 30.859 (30.859)	Top-5 acc 58.984 (58.984)	lr 0.01365
Warmup Train [27][10/3239]	Time 0.312 (1.954)	Data 0.003 (1.540)	Loss 4.1318 (4.1556)	Top-1 acc 28.906 (28.018)	Top-5 acc 51.562 (51.847)	lr 0.01364
Warmup Train [27][20/3239]	Time 0.250 (1.163)	Data 0.002 (0.808)	Loss 4.1910 (4.1855)	Top-1 acc 28.906 (27.623)	Top-5 acc 52.734 (50.391)	lr 0.01364
Warmup Train [27][30/3239]	Time 0.296 (0.865)	Data 0.002 (0.548)	Loss 4.0754 (4.1922)	Top-1 acc 27.344 (27.331)	Top-5 acc 52.734 (50.101)	lr 0.01363
Warmup Train [27][40/3239]	Time 0.177 (0.710)	Data 0.001 (0.414)	Loss 4.3984 (4.2016)	Top-1 acc 27.344 (27.229)	Top-5 acc 48.438 (50.095)	lr 0.01363
Warmup Train [27][50/3239]	Time 0.167 (0.613)	Data 0.001 (0.334)	Loss 4.3467 (4.2024)	Top-1 acc 23.828 (27.282)	Top-5 acc 43.359 (49.939)	lr 0.01362
Warmup Train [27][60/3239]	Time 0.271 (0.554)	Data 0.002 (0.280)	Loss 4.1437 (4.1959)	Top-1 acc 27.344 (27.357)	Top-5 acc 49.609 (50.109)	lr 0.01362
Warmup Train [27][70/3239]	Time 0.324 (0.510)	Data 0.001 (0.241)	Loss 4.2773 (4.1981)	Top-1 acc 23.828 (27.300)	Top-5 acc 48.828 (50.094)	lr 0.01361
Warmup Train [27][80/3239]	Time 0.258 (0.475)	Data 0.002 (0.212)	Loss 4.2859 (4.2013)	Top-1 acc 26.562 (27.262)	Top-5 acc 48.828 (50.039)	lr 0.01361
Warmup Train [27][90/3239]	Time 0.196 (0.449)	Data 0.002 (0.189)	Loss 4.2170 (4.2024)	Top-1 acc 26.953 (27.314)	Top-5 acc 50.781 (50.021)	lr 0.01360
Warmup Train [27][100/3239]	Time 0.258 (0.427)	Data 0.001 (0.171)	Loss 4.2844 (4.1978)	Top-1 acc 28.516 (27.471)	Top-5 acc 47.656 (50.077)	lr 0.01360
Warmup Train [27][110/3239]	Time 0.218 (0.409)	Data 0.001 (0.156)	Loss 4.2637 (4.1968)	Top-1 acc 24.219 (27.418)	Top-5 acc 51.953 (50.130)	lr 0.01359
Warmup Train [27][120/3239]	Time 0.258 (0.393)	Data 0.001 (0.143)	Loss 4.2582 (4.2015)	Top-1 acc 25.391 (27.324)	Top-5 acc 48.828 (50.077)	lr 0.01359
Warmup Train [27][130/3239]	Time 0.184 (0.381)	Data 0.002 (0.132)	Loss 4.2742 (4.2001)	Top-1 acc 24.219 (27.371)	Top-5 acc 48.828 (50.060)	lr 0.01358
Warmup Train [27][140/3239]	Time 0.159 (0.369)	Data 0.002 (0.123)	Loss 4.1798 (4.1961)	Top-1 acc 27.344 (27.416)	Top-5 acc 49.609 (50.150)	lr 0.01357
Warmup Train [27][150/3239]	Time 0.201 (0.360)	Data 0.001 (0.115)	Loss 4.1662 (4.2006)	Top-1 acc 24.609 (27.336)	Top-5 acc 53.516 (50.065)	lr 0.01357
Warmup Train [27][160/3239]	Time 0.228 (0.351)	Data 0.001 (0.108)	Loss 4.2312 (4.2037)	Top-1 acc 25.781 (27.269)	Top-5 acc 50.781 (49.922)	lr 0.01356
Warmup Train [27][170/3239]	Time 0.374 (0.345)	Data 0.002 (0.102)	Loss 4.1643 (4.2049)	Top-1 acc 28.125 (27.172)	Top-5 acc 50.781 (49.954)	lr 0.01356
Warmup Train [27][180/3239]	Time 0.248 (0.338)	Data 0.002 (0.097)	Loss 3.9524 (4.2029)	Top-1 acc 30.078 (27.199)	Top-5 acc 54.688 (49.974)	lr 0.01355
Warmup Train [27][190/3239]	Time 0.207 (0.333)	Data 0.001 (0.092)	Loss 4.2382 (4.2011)	Top-1 acc 24.609 (27.188)	Top-5 acc 48.047 (50.012)	lr 0.01355
Warmup Train [27][200/3239]	Time 0.277 (0.327)	Data 0.001 (0.087)	Loss 4.1906 (4.1979)	Top-1 acc 27.344 (27.233)	Top-5 acc 50.000 (50.068)	lr 0.01354
Warmup Train [27][210/3239]	Time 0.238 (0.322)	Data 0.002 (0.083)	Loss 4.1394 (4.2010)	Top-1 acc 27.734 (27.160)	Top-5 acc 54.688 (50.030)	lr 0.01354
Warmup Train [27][220/3239]	Time 0.214 (0.317)	Data 0.002 (0.079)	Loss 4.3524 (4.1991)	Top-1 acc 24.609 (27.167)	Top-5 acc 43.750 (50.080)	lr 0.01353
Warmup Train [27][230/3239]	Time 0.221 (0.313)	Data 0.002 (0.076)	Loss 4.4100 (4.1985)	Top-1 acc 19.141 (27.146)	Top-5 acc 44.531 (50.095)	lr 0.01353
Warmup Train [27][240/3239]	Time 0.223 (0.309)	Data 0.002 (0.073)	Loss 4.3319 (4.2009)	Top-1 acc 21.875 (27.084)	Top-5 acc 44.922 (50.039)	lr 0.01352
Warmup Train [27][250/3239]	Time 0.248 (0.306)	Data 0.001 (0.070)	Loss 4.0420 (4.2018)	Top-1 acc 29.297 (27.093)	Top-5 acc 49.219 (50.008)	lr 0.01352
Warmup Train [27][260/3239]	Time 0.238 (0.303)	Data 0.003 (0.068)	Loss 4.4012 (4.2038)	Top-1 acc 23.438 (27.103)	Top-5 acc 47.656 (50.018)	lr 0.01351
Warmup Train [27][270/3239]	Time 0.228 (0.300)	Data 0.001 (0.065)	Loss 4.2189 (4.2025)	Top-1 acc 24.609 (27.093)	Top-5 acc 49.219 (50.043)	lr 0.01350
Warmup Train [27][280/3239]	Time 0.189 (0.298)	Data 0.001 (0.063)	Loss 4.2313 (4.2005)	Top-1 acc 26.953 (27.174)	Top-5 acc 47.266 (50.076)	lr 0.01350
Warmup Train [27][290/3239]	Time 0.246 (0.295)	Data 0.002 (0.061)	Loss 4.2663 (4.2019)	Top-1 acc 26.562 (27.156)	Top-5 acc 49.219 (50.024)	lr 0.01349
Warmup Train [27][300/3239]	Time 0.229 (0.293)	Data 0.001 (0.059)	Loss 4.1422 (4.2016)	Top-1 acc 29.297 (27.128)	Top-5 acc 50.781 (50.038)	lr 0.01349
Warmup Train [27][310/3239]	Time 0.274 (0.291)	Data 0.001 (0.057)	Loss 4.1324 (4.1994)	Top-1 acc 29.297 (27.170)	Top-5 acc 50.000 (50.060)	lr 0.01348
Warmup Train [27][320/3239]	Time 0.192 (0.288)	Data 0.002 (0.055)	Loss 4.2242 (4.2013)	Top-1 acc 29.688 (27.145)	Top-5 acc 46.875 (50.009)	lr 0.01348
Warmup Train [27][330/3239]	Time 0.179 (0.286)	Data 0.001 (0.054)	Loss 4.3476 (4.2023)	Top-1 acc 22.266 (27.149)	Top-5 acc 47.656 (49.991)	lr 0.01347
Warmup Train [27][340/3239]	Time 0.214 (0.284)	Data 0.001 (0.052)	Loss 4.3594 (4.2029)	Top-1 acc 21.875 (27.131)	Top-5 acc 46.094 (50.026)	lr 0.01347
Warmup Train [27][350/3239]	Time 0.194 (0.283)	Data 0.004 (0.051)	Loss 4.2249 (4.2018)	Top-1 acc 29.297 (27.158)	Top-5 acc 51.953 (50.071)	lr 0.01346
Warmup Train [27][360/3239]	Time 0.305 (0.281)	Data 0.001 (0.050)	Loss 4.1254 (4.2017)	Top-1 acc 29.297 (27.144)	Top-5 acc 52.734 (50.063)	lr 0.01346
Warmup Train [27][370/3239]	Time 0.344 (0.280)	Data 0.001 (0.048)	Loss 3.8328 (4.1995)	Top-1 acc 32.031 (27.175)	Top-5 acc 58.203 (50.099)	lr 0.01345
Warmup Train [27][380/3239]	Time 0.216 (0.278)	Data 0.001 (0.047)	Loss 4.4571 (4.2002)	Top-1 acc 23.828 (27.161)	Top-5 acc 43.359 (50.108)	lr 0.01345
Warmup Train [27][390/3239]	Time 0.219 (0.276)	Data 0.001 (0.046)	Loss 4.2421 (4.1995)	Top-1 acc 23.828 (27.194)	Top-5 acc 47.266 (50.121)	lr 0.01344
Warmup Train [27][400/3239]	Time 0.228 (0.275)	Data 0.001 (0.045)	Loss 4.4808 (4.2009)	Top-1 acc 23.438 (27.172)	Top-5 acc 43.750 (50.078)	lr 0.01343
Warmup Train [27][410/3239]	Time 0.196 (0.273)	Data 0.001 (0.044)	Loss 4.1563 (4.2011)	Top-1 acc 32.031 (27.171)	Top-5 acc 51.172 (50.052)	lr 0.01343
Warmup Train [27][420/3239]	Time 0.147 (0.272)	Data 0.001 (0.043)	Loss 4.3943 (4.2008)	Top-1 acc 26.172 (27.172)	Top-5 acc 44.922 (50.045)	lr 0.01342
Warmup Train [27][430/3239]	Time 0.185 (0.271)	Data 0.003 (0.042)	Loss 4.1241 (4.2001)	Top-1 acc 26.562 (27.171)	Top-5 acc 53.125 (50.056)	lr 0.01342
Warmup Train [27][440/3239]	Time 0.206 (0.270)	Data 0.001 (0.041)	Loss 4.0213 (4.2014)	Top-1 acc 31.250 (27.148)	Top-5 acc 52.734 (50.023)	lr 0.01341
Warmup Train [27][450/3239]	Time 0.256 (0.269)	Data 0.001 (0.040)	Loss 4.1172 (4.2006)	Top-1 acc 26.562 (27.169)	Top-5 acc 53.906 (50.057)	lr 0.01341
Warmup Train [27][460/3239]	Time 0.192 (0.268)	Data 0.002 (0.039)	Loss 4.4076 (4.2014)	Top-1 acc 25.391 (27.138)	Top-5 acc 47.656 (50.051)	lr 0.01340
Warmup Train [27][470/3239]	Time 0.292 (0.267)	Data 0.002 (0.039)	Loss 4.4323 (4.2023)	Top-1 acc 24.609 (27.145)	Top-5 acc 46.484 (50.030)	lr 0.01340
Warmup Train [27][480/3239]	Time 0.230 (0.266)	Data 0.001 (0.038)	Loss 4.1375 (4.2020)	Top-1 acc 28.516 (27.155)	Top-5 acc 53.125 (50.034)	lr 0.01339
Warmup Train [27][490/3239]	Time 0.190 (0.265)	Data 0.001 (0.037)	Loss 4.1744 (4.2025)	Top-1 acc 26.953 (27.150)	Top-5 acc 49.219 (50.014)	lr 0.01339
Warmup Train [27][500/3239]	Time 0.236 (0.265)	Data 0.001 (0.036)	Loss 4.1908 (4.2012)	Top-1 acc 25.391 (27.177)	Top-5 acc 48.438 (50.037)	lr 0.01338
Warmup Train [27][510/3239]	Time 0.208 (0.264)	Data 0.001 (0.036)	Loss 4.2869 (4.2016)	Top-1 acc 26.172 (27.173)	Top-5 acc 53.125 (50.037)	lr 0.01338
Warmup Train [27][520/3239]	Time 0.192 (0.263)	Data 0.001 (0.035)	Loss 4.0766 (4.2014)	Top-1 acc 28.516 (27.159)	Top-5 acc 51.562 (50.057)	lr 0.01337
Warmup Train [27][530/3239]	Time 0.216 (0.262)	Data 0.001 (0.034)	Loss 4.3594 (4.2014)	Top-1 acc 18.750 (27.164)	Top-5 acc 45.312 (50.061)	lr 0.01337
Warmup Train [27][540/3239]	Time 0.180 (0.261)	Data 0.003 (0.034)	Loss 4.2480 (4.2024)	Top-1 acc 31.250 (27.159)	Top-5 acc 49.609 (50.046)	lr 0.01336
Warmup Train [27][550/3239]	Time 0.198 (0.261)	Data 0.001 (0.033)	Loss 3.8212 (4.2006)	Top-1 acc 35.938 (27.184)	Top-5 acc 58.594 (50.081)	lr 0.01335
Warmup Train [27][560/3239]	Time 0.273 (0.260)	Data 0.001 (0.033)	Loss 4.0852 (4.2011)	Top-1 acc 28.516 (27.182)	Top-5 acc 53.125 (50.067)	lr 0.01335
Warmup Train [27][570/3239]	Time 0.179 (0.259)	Data 0.001 (0.032)	Loss 4.1309 (4.2010)	Top-1 acc 26.172 (27.182)	Top-5 acc 50.781 (50.065)	lr 0.01334
Warmup Train [27][580/3239]	Time 0.126 (0.258)	Data 0.001 (0.032)	Loss 4.1419 (4.2011)	Top-1 acc 29.297 (27.190)	Top-5 acc 49.609 (50.049)	lr 0.01334
Warmup Train [27][590/3239]	Time 0.309 (0.258)	Data 0.001 (0.031)	Loss 4.0940 (4.2000)	Top-1 acc 29.688 (27.206)	Top-5 acc 54.688 (50.076)	lr 0.01333
Warmup Train [27][600/3239]	Time 0.213 (0.257)	Data 0.001 (0.031)	Loss 4.0960 (4.2000)	Top-1 acc 26.953 (27.201)	Top-5 acc 53.125 (50.068)	lr 0.01333
Warmup Train [27][610/3239]	Time 0.185 (0.257)	Data 0.001 (0.030)	Loss 4.2060 (4.2000)	Top-1 acc 26.953 (27.188)	Top-5 acc 50.781 (50.042)	lr 0.01332
Warmup Train [27][620/3239]	Time 0.224 (0.256)	Data 0.002 (0.030)	Loss 3.9141 (4.1993)	Top-1 acc 35.156 (27.198)	Top-5 acc 55.078 (50.072)	lr 0.01332
Warmup Train [27][630/3239]	Time 0.208 (0.256)	Data 0.002 (0.029)	Loss 4.3798 (4.1994)	Top-1 acc 25.781 (27.207)	Top-5 acc 48.047 (50.085)	lr 0.01331
Warmup Train [27][640/3239]	Time 0.193 (0.255)	Data 0.002 (0.029)	Loss 4.2509 (4.1993)	Top-1 acc 24.609 (27.206)	Top-5 acc 50.781 (50.085)	lr 0.01331
Warmup Train [27][650/3239]	Time 0.195 (0.254)	Data 0.001 (0.029)	Loss 4.0327 (4.1984)	Top-1 acc 32.812 (27.226)	Top-5 acc 53.906 (50.114)	lr 0.01330
Warmup Train [27][660/3239]	Time 0.211 (0.254)	Data 0.001 (0.028)	Loss 4.1799 (4.1990)	Top-1 acc 24.219 (27.209)	Top-5 acc 50.781 (50.110)	lr 0.01330
Warmup Train [27][670/3239]	Time 0.205 (0.254)	Data 0.002 (0.028)	Loss 4.3304 (4.1995)	Top-1 acc 26.953 (27.198)	Top-5 acc 48.828 (50.112)	lr 0.01329
Warmup Train [27][680/3239]	Time 0.240 (0.253)	Data 0.002 (0.028)	Loss 4.0403 (4.1990)	Top-1 acc 30.469 (27.217)	Top-5 acc 53.125 (50.114)	lr 0.01328
Warmup Train [27][690/3239]	Time 0.169 (0.253)	Data 0.001 (0.027)	Loss 4.2264 (4.1987)	Top-1 acc 26.562 (27.224)	Top-5 acc 50.781 (50.118)	lr 0.01328
Warmup Train [27][700/3239]	Time 0.251 (0.252)	Data 0.001 (0.027)	Loss 4.2774 (4.1990)	Top-1 acc 26.953 (27.221)	Top-5 acc 48.438 (50.118)	lr 0.01327
Warmup Train [27][710/3239]	Time 0.238 (0.252)	Data 0.001 (0.027)	Loss 4.2234 (4.1978)	Top-1 acc 28.125 (27.243)	Top-5 acc 51.172 (50.150)	lr 0.01327
Warmup Train [27][720/3239]	Time 0.179 (0.252)	Data 0.001 (0.026)	Loss 4.2507 (4.1976)	Top-1 acc 27.734 (27.228)	Top-5 acc 49.219 (50.153)	lr 0.01326
Warmup Train [27][730/3239]	Time 0.208 (0.251)	Data 0.001 (0.026)	Loss 4.1349 (4.1970)	Top-1 acc 30.078 (27.236)	Top-5 acc 50.781 (50.162)	lr 0.01326
Warmup Train [27][740/3239]	Time 0.250 (0.251)	Data 0.001 (0.026)	Loss 4.2529 (4.1957)	Top-1 acc 24.219 (27.263)	Top-5 acc 48.047 (50.193)	lr 0.01325
Warmup Train [27][750/3239]	Time 0.235 (0.251)	Data 0.001 (0.025)	Loss 3.9785 (4.1954)	Top-1 acc 28.516 (27.274)	Top-5 acc 54.688 (50.214)	lr 0.01325
Warmup Train [27][760/3239]	Time 0.158 (0.250)	Data 0.001 (0.025)	Loss 4.1912 (4.1951)	Top-1 acc 27.734 (27.271)	Top-5 acc 49.609 (50.214)	lr 0.01324
Warmup Train [27][770/3239]	Time 0.198 (0.250)	Data 0.001 (0.025)	Loss 4.2029 (4.1955)	Top-1 acc 27.734 (27.269)	Top-5 acc 52.734 (50.203)	lr 0.01324
Warmup Train [27][780/3239]	Time 0.213 (0.250)	Data 0.001 (0.024)	Loss 4.5354 (4.1959)	Top-1 acc 21.094 (27.258)	Top-5 acc 41.797 (50.195)	lr 0.01323
Warmup Train [27][790/3239]	Time 0.212 (0.249)	Data 0.001 (0.024)	Loss 4.2038 (4.1962)	Top-1 acc 26.172 (27.274)	Top-5 acc 46.094 (50.204)	lr 0.01323
Warmup Train [27][800/3239]	Time 0.278 (0.249)	Data 0.001 (0.024)	Loss 4.3227 (4.1958)	Top-1 acc 22.266 (27.287)	Top-5 acc 46.484 (50.217)	lr 0.01322
Warmup Train [27][810/3239]	Time 0.237 (0.249)	Data 0.001 (0.024)	Loss 4.0525 (4.1961)	Top-1 acc 32.812 (27.285)	Top-5 acc 52.344 (50.210)	lr 0.01322
Warmup Train [27][820/3239]	Time 0.185 (0.248)	Data 0.001 (0.023)	Loss 3.9949 (4.1956)	Top-1 acc 30.859 (27.293)	Top-5 acc 53.906 (50.229)	lr 0.01321
Warmup Train [27][830/3239]	Time 0.216 (0.248)	Data 0.001 (0.023)	Loss 4.4289 (4.1959)	Top-1 acc 23.438 (27.293)	Top-5 acc 43.359 (50.216)	lr 0.01320
Warmup Train [27][840/3239]	Time 0.207 (0.248)	Data 0.001 (0.023)	Loss 4.3804 (4.1969)	Top-1 acc 24.609 (27.275)	Top-5 acc 44.141 (50.189)	lr 0.01320
Warmup Train [27][850/3239]	Time 0.188 (0.247)	Data 0.001 (0.023)	Loss 4.1904 (4.1966)	Top-1 acc 27.344 (27.285)	Top-5 acc 49.609 (50.185)	lr 0.01319
Warmup Train [27][860/3239]	Time 0.215 (0.247)	Data 0.001 (0.022)	Loss 4.1992 (4.1961)	Top-1 acc 26.562 (27.285)	Top-5 acc 48.047 (50.208)	lr 0.01319
Warmup Train [27][870/3239]	Time 0.229 (0.247)	Data 0.001 (0.022)	Loss 4.1066 (4.1959)	Top-1 acc 29.297 (27.289)	Top-5 acc 51.562 (50.205)	lr 0.01318
Warmup Train [27][880/3239]	Time 0.237 (0.246)	Data 0.001 (0.022)	Loss 4.0932 (4.1959)	Top-1 acc 29.688 (27.294)	Top-5 acc 54.297 (50.210)	lr 0.01318
Warmup Train [27][890/3239]	Time 0.231 (0.246)	Data 0.002 (0.022)	Loss 4.2221 (4.1955)	Top-1 acc 23.438 (27.289)	Top-5 acc 47.266 (50.229)	lr 0.01317
Warmup Train [27][900/3239]	Time 0.233 (0.246)	Data 0.001 (0.021)	Loss 4.1822 (4.1954)	Top-1 acc 26.562 (27.296)	Top-5 acc 51.953 (50.222)	lr 0.01317
Warmup Train [27][910/3239]	Time 0.324 (0.246)	Data 0.001 (0.021)	Loss 4.3031 (4.1953)	Top-1 acc 27.344 (27.302)	Top-5 acc 46.875 (50.223)	lr 0.01316
Warmup Train [27][920/3239]	Time 0.183 (0.246)	Data 0.001 (0.021)	Loss 4.2163 (4.1956)	Top-1 acc 28.516 (27.299)	Top-5 acc 50.781 (50.210)	lr 0.01316
Warmup Train [27][930/3239]	Time 0.210 (0.245)	Data 0.001 (0.021)	Loss 3.9643 (4.1947)	Top-1 acc 28.906 (27.314)	Top-5 acc 55.469 (50.228)	lr 0.01315
Warmup Train [27][940/3239]	Time 0.196 (0.245)	Data 0.001 (0.021)	Loss 4.0139 (4.1944)	Top-1 acc 29.688 (27.319)	Top-5 acc 54.297 (50.234)	lr 0.01315
Warmup Train [27][950/3239]	Time 0.191 (0.245)	Data 0.001 (0.020)	Loss 4.1734 (4.1943)	Top-1 acc 24.609 (27.310)	Top-5 acc 47.266 (50.226)	lr 0.01314
Warmup Train [27][960/3239]	Time 0.227 (0.244)	Data 0.001 (0.020)	Loss 4.2998 (4.1948)	Top-1 acc 25.391 (27.305)	Top-5 acc 49.219 (50.213)	lr 0.01313
Warmup Train [27][970/3239]	Time 0.286 (0.244)	Data 0.001 (0.020)	Loss 4.2560 (4.1947)	Top-1 acc 21.484 (27.302)	Top-5 acc 47.656 (50.212)	lr 0.01313
Warmup Train [27][980/3239]	Time 0.203 (0.244)	Data 0.001 (0.020)	Loss 4.2477 (4.1953)	Top-1 acc 25.000 (27.290)	Top-5 acc 46.875 (50.198)	lr 0.01312
Warmup Train [27][990/3239]	Time 0.199 (0.244)	Data 0.001 (0.020)	Loss 4.2862 (4.1950)	Top-1 acc 29.297 (27.298)	Top-5 acc 50.391 (50.202)	lr 0.01312
Warmup Train [27][1000/3239]	Time 0.230 (0.244)	Data 0.001 (0.020)	Loss 4.3824 (4.1945)	Top-1 acc 23.047 (27.304)	Top-5 acc 44.922 (50.210)	lr 0.01311
Warmup Train [27][1010/3239]	Time 0.309 (0.244)	Data 0.001 (0.019)	Loss 4.4990 (4.1949)	Top-1 acc 22.266 (27.299)	Top-5 acc 40.625 (50.194)	lr 0.01311
Warmup Train [27][1020/3239]	Time 0.173 (0.243)	Data 0.001 (0.019)	Loss 4.1736 (4.1947)	Top-1 acc 28.906 (27.317)	Top-5 acc 53.516 (50.190)	lr 0.01310
Warmup Train [27][1030/3239]	Time 0.158 (0.243)	Data 0.002 (0.019)	Loss 4.4133 (4.1948)	Top-1 acc 24.609 (27.312)	Top-5 acc 42.188 (50.192)	lr 0.01310
Warmup Train [27][1040/3239]	Time 0.195 (0.243)	Data 0.001 (0.019)	Loss 4.3876 (4.1945)	Top-1 acc 25.781 (27.316)	Top-5 acc 48.047 (50.193)	lr 0.01309
Warmup Train [27][1050/3239]	Time 0.192 (0.243)	Data 0.001 (0.019)	Loss 4.1765 (4.1951)	Top-1 acc 28.125 (27.304)	Top-5 acc 50.781 (50.180)	lr 0.01309
Warmup Train [27][1060/3239]	Time 0.207 (0.243)	Data 0.001 (0.019)	Loss 4.3000 (4.1952)	Top-1 acc 31.641 (27.310)	Top-5 acc 50.391 (50.183)	lr 0.01308
Warmup Train [27][1070/3239]	Time 0.237 (0.242)	Data 0.001 (0.019)	Loss 4.1370 (4.1957)	Top-1 acc 26.562 (27.293)	Top-5 acc 52.344 (50.168)	lr 0.01308
Warmup Train [27][1080/3239]	Time 0.197 (0.242)	Data 0.001 (0.018)	Loss 4.1448 (4.1958)	Top-1 acc 29.688 (27.292)	Top-5 acc 50.781 (50.167)	lr 0.01307
Warmup Train [27][1090/3239]	Time 0.218 (0.242)	Data 0.001 (0.018)	Loss 4.2127 (4.1954)	Top-1 acc 26.172 (27.298)	Top-5 acc 50.000 (50.184)	lr 0.01307
Warmup Train [27][1100/3239]	Time 0.222 (0.242)	Data 0.001 (0.018)	Loss 4.0661 (4.1955)	Top-1 acc 33.203 (27.298)	Top-5 acc 49.609 (50.171)	lr 0.01306
Warmup Train [27][1110/3239]	Time 0.323 (0.242)	Data 0.002 (0.018)	Loss 4.1152 (4.1957)	Top-1 acc 28.516 (27.293)	Top-5 acc 50.391 (50.164)	lr 0.01306
Warmup Train [27][1120/3239]	Time 0.196 (0.241)	Data 0.001 (0.018)	Loss 4.2268 (4.1959)	Top-1 acc 28.906 (27.294)	Top-5 acc 49.219 (50.162)	lr 0.01305
Warmup Train [27][1130/3239]	Time 0.205 (0.241)	Data 0.001 (0.018)	Loss 4.1654 (4.1958)	Top-1 acc 26.562 (27.301)	Top-5 acc 48.047 (50.166)	lr 0.01304
Warmup Train [27][1140/3239]	Time 0.216 (0.241)	Data 0.001 (0.018)	Loss 4.1747 (4.1958)	Top-1 acc 28.516 (27.301)	Top-5 acc 51.172 (50.160)	lr 0.01304
Warmup Train [27][1150/3239]	Time 0.229 (0.241)	Data 0.002 (0.017)	Loss 3.9734 (4.1956)	Top-1 acc 33.203 (27.306)	Top-5 acc 57.031 (50.170)	lr 0.01303
Warmup Train [27][1160/3239]	Time 0.303 (0.241)	Data 0.001 (0.017)	Loss 3.9802 (4.1953)	Top-1 acc 32.812 (27.307)	Top-5 acc 55.859 (50.181)	lr 0.01303
Warmup Train [27][1170/3239]	Time 0.204 (0.241)	Data 0.002 (0.017)	Loss 4.2402 (4.1956)	Top-1 acc 26.953 (27.297)	Top-5 acc 48.828 (50.171)	lr 0.01302
Warmup Train [27][1180/3239]	Time 0.220 (0.241)	Data 0.001 (0.017)	Loss 4.3294 (4.1959)	Top-1 acc 26.953 (27.293)	Top-5 acc 46.094 (50.166)	lr 0.01302
Warmup Train [27][1190/3239]	Time 0.358 (0.241)	Data 0.002 (0.017)	Loss 4.2174 (4.1965)	Top-1 acc 26.562 (27.283)	Top-5 acc 50.000 (50.158)	lr 0.01301
Warmup Train [27][1200/3239]	Time 0.226 (0.241)	Data 0.001 (0.017)	Loss 4.2151 (4.1969)	Top-1 acc 25.781 (27.278)	Top-5 acc 46.484 (50.149)	lr 0.01301
Warmup Train [27][1210/3239]	Time 0.262 (0.240)	Data 0.001 (0.017)	Loss 4.3777 (4.1964)	Top-1 acc 26.953 (27.292)	Top-5 acc 46.484 (50.164)	lr 0.01300
Warmup Train [27][1220/3239]	Time 0.164 (0.240)	Data 0.002 (0.017)	Loss 4.4037 (4.1966)	Top-1 acc 25.781 (27.295)	Top-5 acc 44.141 (50.163)	lr 0.01300
Warmup Train [27][1230/3239]	Time 0.248 (0.240)	Data 0.001 (0.016)	Loss 4.3159 (4.1964)	Top-1 acc 25.781 (27.293)	Top-5 acc 47.266 (50.166)	lr 0.01299
Warmup Train [27][1240/3239]	Time 0.196 (0.240)	Data 0.002 (0.016)	Loss 4.0583 (4.1967)	Top-1 acc 28.125 (27.286)	Top-5 acc 52.734 (50.165)	lr 0.01299
Warmup Train [27][1250/3239]	Time 0.182 (0.240)	Data 0.001 (0.016)	Loss 4.3701 (4.1965)	Top-1 acc 21.484 (27.288)	Top-5 acc 45.703 (50.160)	lr 0.01298
Warmup Train [27][1260/3239]	Time 0.221 (0.240)	Data 0.001 (0.016)	Loss 4.1318 (4.1963)	Top-1 acc 25.781 (27.293)	Top-5 acc 54.297 (50.164)	lr 0.01298
Warmup Train [27][1270/3239]	Time 0.247 (0.240)	Data 0.001 (0.016)	Loss 4.2812 (4.1963)	Top-1 acc 28.516 (27.303)	Top-5 acc 50.781 (50.168)	lr 0.01297
Warmup Train [27][1280/3239]	Time 0.202 (0.239)	Data 0.001 (0.016)	Loss 4.3569 (4.1964)	Top-1 acc 23.828 (27.307)	Top-5 acc 48.047 (50.167)	lr 0.01296
Warmup Train [27][1290/3239]	Time 0.293 (0.239)	Data 0.001 (0.016)	Loss 4.3318 (4.1966)	Top-1 acc 23.047 (27.293)	Top-5 acc 47.266 (50.156)	lr 0.01296
Warmup Train [27][1300/3239]	Time 0.251 (0.239)	Data 0.001 (0.016)	Loss 4.2058 (4.1965)	Top-1 acc 24.219 (27.294)	Top-5 acc 50.000 (50.162)	lr 0.01295
Warmup Train [27][1310/3239]	Time 0.155 (0.239)	Data 0.001 (0.016)	Loss 4.2228 (4.1966)	Top-1 acc 25.391 (27.286)	Top-5 acc 48.047 (50.156)	lr 0.01295
Warmup Train [27][1320/3239]	Time 0.212 (0.239)	Data 0.001 (0.016)	Loss 4.1330 (4.1964)	Top-1 acc 23.828 (27.283)	Top-5 acc 48.828 (50.164)	lr 0.01294
Warmup Train [27][1330/3239]	Time 0.200 (0.239)	Data 0.001 (0.015)	Loss 4.0908 (4.1966)	Top-1 acc 28.516 (27.278)	Top-5 acc 50.000 (50.154)	lr 0.01294
Warmup Train [27][1340/3239]	Time 0.181 (0.239)	Data 0.001 (0.015)	Loss 4.1469 (4.1961)	Top-1 acc 30.078 (27.291)	Top-5 acc 52.344 (50.176)	lr 0.01293
Warmup Train [27][1350/3239]	Time 0.249 (0.239)	Data 0.001 (0.015)	Loss 4.1764 (4.1959)	Top-1 acc 28.125 (27.302)	Top-5 acc 50.391 (50.186)	lr 0.01293
Warmup Train [27][1360/3239]	Time 0.258 (0.239)	Data 0.001 (0.015)	Loss 4.3448 (4.1960)	Top-1 acc 23.047 (27.299)	Top-5 acc 45.312 (50.183)	lr 0.01292
Warmup Train [27][1370/3239]	Time 0.237 (0.238)	Data 0.002 (0.015)	Loss 4.1444 (4.1960)	Top-1 acc 30.859 (27.309)	Top-5 acc 50.781 (50.180)	lr 0.01292
Warmup Train [27][1380/3239]	Time 0.284 (0.238)	Data 0.002 (0.015)	Loss 4.3107 (4.1956)	Top-1 acc 26.172 (27.317)	Top-5 acc 46.094 (50.188)	lr 0.01291
Warmup Train [27][1390/3239]	Time 0.315 (0.238)	Data 0.001 (0.015)	Loss 4.0179 (4.1952)	Top-1 acc 28.516 (27.314)	Top-5 acc 53.125 (50.198)	lr 0.01291
Warmup Train [27][1400/3239]	Time 0.330 (0.238)	Data 0.001 (0.015)	Loss 4.0194 (4.1949)	Top-1 acc 28.125 (27.319)	Top-5 acc 52.734 (50.201)	lr 0.01290
Warmup Train [27][1410/3239]	Time 0.183 (0.238)	Data 0.001 (0.015)	Loss 3.9699 (4.1949)	Top-1 acc 28.516 (27.311)	Top-5 acc 56.250 (50.202)	lr 0.01290
Warmup Train [27][1420/3239]	Time 0.185 (0.238)	Data 0.001 (0.015)	Loss 4.2161 (4.1945)	Top-1 acc 27.344 (27.325)	Top-5 acc 46.875 (50.214)	lr 0.01289
Warmup Train [27][1430/3239]	Time 0.235 (0.238)	Data 0.001 (0.015)	Loss 4.0839 (4.1943)	Top-1 acc 29.688 (27.324)	Top-5 acc 55.078 (50.219)	lr 0.01289
Warmup Train [27][1440/3239]	Time 0.239 (0.238)	Data 0.001 (0.014)	Loss 4.2073 (4.1942)	Top-1 acc 26.953 (27.330)	Top-5 acc 46.094 (50.214)	lr 0.01288
Warmup Train [27][1450/3239]	Time 0.275 (0.238)	Data 0.002 (0.014)	Loss 4.1548 (4.1940)	Top-1 acc 28.125 (27.331)	Top-5 acc 53.125 (50.218)	lr 0.01287
Warmup Train [27][1460/3239]	Time 0.203 (0.237)	Data 0.001 (0.014)	Loss 4.3416 (4.1941)	Top-1 acc 17.969 (27.325)	Top-5 acc 45.703 (50.221)	lr 0.01287
Warmup Train [27][1470/3239]	Time 0.268 (0.237)	Data 0.002 (0.014)	Loss 4.0190 (4.1935)	Top-1 acc 30.859 (27.336)	Top-5 acc 55.859 (50.237)	lr 0.01286
Warmup Train [27][1480/3239]	Time 0.207 (0.237)	Data 0.001 (0.014)	Loss 4.2802 (4.1938)	Top-1 acc 25.781 (27.330)	Top-5 acc 48.438 (50.236)	lr 0.01286
Warmup Train [27][1490/3239]	Time 0.162 (0.237)	Data 0.001 (0.014)	Loss 4.1417 (4.1937)	Top-1 acc 28.125 (27.331)	Top-5 acc 50.781 (50.232)	lr 0.01285
Warmup Train [27][1500/3239]	Time 0.310 (0.237)	Data 0.001 (0.014)	Loss 4.1721 (4.1937)	Top-1 acc 28.516 (27.329)	Top-5 acc 55.078 (50.239)	lr 0.01285
Warmup Train [27][1510/3239]	Time 0.134 (0.237)	Data 0.001 (0.014)	Loss 4.3219 (4.1943)	Top-1 acc 24.609 (27.321)	Top-5 acc 48.047 (50.226)	lr 0.01284
Warmup Train [27][1520/3239]	Time 0.218 (0.237)	Data 0.001 (0.014)	Loss 4.0430 (4.1939)	Top-1 acc 30.078 (27.332)	Top-5 acc 55.469 (50.237)	lr 0.01284
Warmup Train [27][1530/3239]	Time 0.233 (0.237)	Data 0.001 (0.014)	Loss 4.2485 (4.1936)	Top-1 acc 28.516 (27.346)	Top-5 acc 48.828 (50.241)	lr 0.01283
Warmup Train [27][1540/3239]	Time 0.165 (0.237)	Data 0.001 (0.014)	Loss 4.0589 (4.1935)	Top-1 acc 25.391 (27.347)	Top-5 acc 52.344 (50.245)	lr 0.01283
Warmup Train [27][1550/3239]	Time 0.189 (0.237)	Data 0.001 (0.014)	Loss 4.3264 (4.1939)	Top-1 acc 26.953 (27.342)	Top-5 acc 51.562 (50.245)	lr 0.01282
Warmup Train [27][1560/3239]	Time 0.161 (0.237)	Data 0.001 (0.014)	Loss 4.1056 (4.1939)	Top-1 acc 29.688 (27.340)	Top-5 acc 51.172 (50.238)	lr 0.01282
Warmup Train [27][1570/3239]	Time 0.185 (0.236)	Data 0.001 (0.014)	Loss 4.3025 (4.1939)	Top-1 acc 23.828 (27.339)	Top-5 acc 44.531 (50.237)	lr 0.01281
Warmup Train [27][1580/3239]	Time 0.240 (0.236)	Data 0.001 (0.013)	Loss 4.1510 (4.1938)	Top-1 acc 27.734 (27.341)	Top-5 acc 49.219 (50.236)	lr 0.01281
Warmup Train [27][1590/3239]	Time 0.191 (0.236)	Data 0.001 (0.013)	Loss 4.0380 (4.1938)	Top-1 acc 26.953 (27.334)	Top-5 acc 53.125 (50.235)	lr 0.01280
Warmup Train [27][1600/3239]	Time 0.225 (0.236)	Data 0.001 (0.013)	Loss 4.1310 (4.1940)	Top-1 acc 27.734 (27.322)	Top-5 acc 51.953 (50.236)	lr 0.01280
Warmup Train [27][1610/3239]	Time 0.374 (0.236)	Data 0.001 (0.013)	Loss 4.3174 (4.1940)	Top-1 acc 23.438 (27.323)	Top-5 acc 44.141 (50.236)	lr 0.01279
Warmup Train [27][1620/3239]	Time 0.207 (0.236)	Data 0.001 (0.013)	Loss 4.1653 (4.1936)	Top-1 acc 28.906 (27.337)	Top-5 acc 50.391 (50.243)	lr 0.01278
Warmup Train [27][1630/3239]	Time 0.219 (0.236)	Data 0.001 (0.013)	Loss 4.3808 (4.1936)	Top-1 acc 21.484 (27.337)	Top-5 acc 48.047 (50.251)	lr 0.01278
Warmup Train [27][1640/3239]	Time 0.234 (0.236)	Data 0.001 (0.013)	Loss 4.3011 (4.1937)	Top-1 acc 26.953 (27.336)	Top-5 acc 50.391 (50.248)	lr 0.01277
Warmup Train [27][1650/3239]	Time 0.192 (0.236)	Data 0.001 (0.013)	Loss 4.1078 (4.1937)	Top-1 acc 28.125 (27.336)	Top-5 acc 52.344 (50.247)	lr 0.01277
Warmup Train [27][1660/3239]	Time 0.234 (0.236)	Data 0.001 (0.013)	Loss 4.4153 (4.1940)	Top-1 acc 25.781 (27.333)	Top-5 acc 46.875 (50.243)	lr 0.01276
Warmup Train [27][1670/3239]	Time 0.192 (0.236)	Data 0.001 (0.013)	Loss 4.3487 (4.1943)	Top-1 acc 24.609 (27.329)	Top-5 acc 46.875 (50.238)	lr 0.01276
Warmup Train [27][1680/3239]	Time 0.200 (0.236)	Data 0.001 (0.013)	Loss 4.1823 (4.1946)	Top-1 acc 25.781 (27.318)	Top-5 acc 50.000 (50.228)	lr 0.01275
Warmup Train [27][1690/3239]	Time 0.165 (0.236)	Data 0.001 (0.013)	Loss 4.1715 (4.1942)	Top-1 acc 29.297 (27.329)	Top-5 acc 48.828 (50.239)	lr 0.01275
Warmup Train [27][1700/3239]	Time 0.316 (0.236)	Data 0.002 (0.013)	Loss 4.0122 (4.1940)	Top-1 acc 33.594 (27.336)	Top-5 acc 56.250 (50.245)	lr 0.01274
Warmup Train [27][1710/3239]	Time 0.246 (0.236)	Data 0.001 (0.013)	Loss 4.3714 (4.1940)	Top-1 acc 20.312 (27.334)	Top-5 acc 43.750 (50.242)	lr 0.01274
Warmup Train [27][1720/3239]	Time 0.195 (0.236)	Data 0.002 (0.013)	Loss 3.8692 (4.1938)	Top-1 acc 34.766 (27.334)	Top-5 acc 58.594 (50.250)	lr 0.01273
Warmup Train [27][1730/3239]	Time 0.230 (0.236)	Data 0.001 (0.013)	Loss 4.0827 (4.1940)	Top-1 acc 30.469 (27.332)	Top-5 acc 51.562 (50.241)	lr 0.01273
Warmup Train [27][1740/3239]	Time 0.240 (0.235)	Data 0.001 (0.013)	Loss 4.1782 (4.1942)	Top-1 acc 26.562 (27.321)	Top-5 acc 50.781 (50.233)	lr 0.01272
Warmup Train [27][1750/3239]	Time 0.198 (0.235)	Data 0.001 (0.012)	Loss 4.2303 (4.1940)	Top-1 acc 29.688 (27.327)	Top-5 acc 47.266 (50.240)	lr 0.01272
Warmup Train [27][1760/3239]	Time 0.216 (0.235)	Data 0.001 (0.012)	Loss 4.3233 (4.1938)	Top-1 acc 25.000 (27.330)	Top-5 acc 46.875 (50.249)	lr 0.01271
Warmup Train [27][1770/3239]	Time 0.281 (0.235)	Data 0.001 (0.012)	Loss 4.1582 (4.1938)	Top-1 acc 33.203 (27.329)	Top-5 acc 56.250 (50.246)	lr 0.01271
Warmup Train [27][1780/3239]	Time 0.166 (0.235)	Data 0.001 (0.012)	Loss 4.3089 (4.1942)	Top-1 acc 23.828 (27.322)	Top-5 acc 50.391 (50.240)	lr 0.01270
Warmup Train [27][1790/3239]	Time 0.192 (0.235)	Data 0.001 (0.012)	Loss 4.3488 (4.1940)	Top-1 acc 28.516 (27.333)	Top-5 acc 48.828 (50.252)	lr 0.01269
Warmup Train [27][1800/3239]	Time 0.226 (0.235)	Data 0.001 (0.012)	Loss 4.1508 (4.1939)	Top-1 acc 27.734 (27.330)	Top-5 acc 51.172 (50.254)	lr 0.01269
Warmup Train [27][1810/3239]	Time 0.188 (0.235)	Data 0.001 (0.012)	Loss 4.0822 (4.1938)	Top-1 acc 29.688 (27.331)	Top-5 acc 52.344 (50.250)	lr 0.01268
Warmup Train [27][1820/3239]	Time 0.228 (0.235)	Data 0.002 (0.012)	Loss 4.1901 (4.1938)	Top-1 acc 25.391 (27.323)	Top-5 acc 51.562 (50.245)	lr 0.01268
Warmup Train [27][1830/3239]	Time 0.230 (0.235)	Data 0.001 (0.012)	Loss 4.1627 (4.1936)	Top-1 acc 26.562 (27.328)	Top-5 acc 49.219 (50.250)	lr 0.01267
Warmup Train [27][1840/3239]	Time 0.262 (0.235)	Data 0.001 (0.012)	Loss 4.3585 (4.1937)	Top-1 acc 23.438 (27.326)	Top-5 acc 47.656 (50.247)	lr 0.01267
Warmup Train [27][1850/3239]	Time 0.210 (0.235)	Data 0.002 (0.012)	Loss 4.4419 (4.1937)	Top-1 acc 21.875 (27.322)	Top-5 acc 44.922 (50.249)	lr 0.01266
Warmup Train [27][1860/3239]	Time 0.205 (0.235)	Data 0.001 (0.012)	Loss 4.0756 (4.1940)	Top-1 acc 29.297 (27.319)	Top-5 acc 51.562 (50.244)	lr 0.01266
Warmup Train [27][1870/3239]	Time 0.273 (0.235)	Data 0.001 (0.012)	Loss 4.2298 (4.1941)	Top-1 acc 24.219 (27.317)	Top-5 acc 45.312 (50.242)	lr 0.01265
Warmup Train [27][1880/3239]	Time 0.179 (0.234)	Data 0.002 (0.012)	Loss 4.3135 (4.1944)	Top-1 acc 23.438 (27.309)	Top-5 acc 44.531 (50.233)	lr 0.01265
Warmup Train [27][1890/3239]	Time 0.215 (0.234)	Data 0.001 (0.012)	Loss 4.1464 (4.1944)	Top-1 acc 25.781 (27.308)	Top-5 acc 49.219 (50.238)	lr 0.01264
Warmup Train [27][1900/3239]	Time 0.279 (0.234)	Data 0.001 (0.012)	Loss 4.2132 (4.1941)	Top-1 acc 25.391 (27.307)	Top-5 acc 49.219 (50.241)	lr 0.01264
Warmup Train [27][1910/3239]	Time 0.396 (0.234)	Data 0.001 (0.012)	Loss 4.0515 (4.1942)	Top-1 acc 26.953 (27.303)	Top-5 acc 52.734 (50.239)	lr 0.01263
Warmup Train [27][1920/3239]	Time 0.188 (0.234)	Data 0.001 (0.012)	Loss 4.2747 (4.1944)	Top-1 acc 28.906 (27.305)	Top-5 acc 48.438 (50.235)	lr 0.01263
Warmup Train [27][1930/3239]	Time 0.214 (0.234)	Data 0.001 (0.012)	Loss 4.1561 (4.1943)	Top-1 acc 28.906 (27.313)	Top-5 acc 50.781 (50.239)	lr 0.01262
Warmup Train [27][1940/3239]	Time 0.224 (0.234)	Data 0.002 (0.012)	Loss 4.1442 (4.1941)	Top-1 acc 25.391 (27.313)	Top-5 acc 52.734 (50.244)	lr 0.01262
Warmup Train [27][1950/3239]	Time 0.188 (0.234)	Data 0.001 (0.011)	Loss 4.3020 (4.1941)	Top-1 acc 25.391 (27.316)	Top-5 acc 48.438 (50.250)	lr 0.01261
Warmup Train [27][1960/3239]	Time 0.190 (0.234)	Data 0.001 (0.011)	Loss 4.2647 (4.1942)	Top-1 acc 27.734 (27.315)	Top-5 acc 49.219 (50.247)	lr 0.01261
Warmup Train [27][1970/3239]	Time 0.218 (0.234)	Data 0.001 (0.011)	Loss 4.1020 (4.1939)	Top-1 acc 29.688 (27.319)	Top-5 acc 53.125 (50.252)	lr 0.01260
Warmup Train [27][1980/3239]	Time 0.212 (0.234)	Data 0.001 (0.011)	Loss 4.1779 (4.1942)	Top-1 acc 30.469 (27.314)	Top-5 acc 52.734 (50.250)	lr 0.01259
Warmup Train [27][1990/3239]	Time 0.211 (0.234)	Data 0.002 (0.011)	Loss 4.2332 (4.1943)	Top-1 acc 28.906 (27.316)	Top-5 acc 51.953 (50.250)	lr 0.01259
Warmup Train [27][2000/3239]	Time 0.219 (0.234)	Data 0.001 (0.011)	Loss 4.0830 (4.1940)	Top-1 acc 29.688 (27.322)	Top-5 acc 50.781 (50.256)	lr 0.01258
Warmup Train [27][2010/3239]	Time 0.315 (0.234)	Data 0.001 (0.011)	Loss 4.1623 (4.1942)	Top-1 acc 27.734 (27.323)	Top-5 acc 50.000 (50.249)	lr 0.01258
Warmup Train [27][2020/3239]	Time 0.346 (0.234)	Data 0.002 (0.011)	Loss 4.2099 (4.1943)	Top-1 acc 26.953 (27.321)	Top-5 acc 46.875 (50.249)	lr 0.01257
Warmup Train [27][2030/3239]	Time 0.208 (0.234)	Data 0.003 (0.011)	Loss 4.1785 (4.1942)	Top-1 acc 29.297 (27.321)	Top-5 acc 51.953 (50.251)	lr 0.01257
Warmup Train [27][2040/3239]	Time 0.141 (0.233)	Data 0.002 (0.011)	Loss 4.2229 (4.1941)	Top-1 acc 23.047 (27.315)	Top-5 acc 48.438 (50.254)	lr 0.01256
Warmup Train [27][2050/3239]	Time 0.170 (0.233)	Data 0.001 (0.011)	Loss 4.1191 (4.1940)	Top-1 acc 27.344 (27.314)	Top-5 acc 52.734 (50.254)	lr 0.01256
Warmup Train [27][2060/3239]	Time 0.195 (0.233)	Data 0.001 (0.011)	Loss 4.3234 (4.1941)	Top-1 acc 26.562 (27.308)	Top-5 acc 45.312 (50.252)	lr 0.01255
Warmup Train [27][2070/3239]	Time 0.219 (0.233)	Data 0.001 (0.011)	Loss 4.1685 (4.1941)	Top-1 acc 28.125 (27.309)	Top-5 acc 50.781 (50.258)	lr 0.01255
Warmup Train [27][2080/3239]	Time 0.215 (0.233)	Data 0.001 (0.011)	Loss 4.4170 (4.1944)	Top-1 acc 19.531 (27.301)	Top-5 acc 44.531 (50.246)	lr 0.01254
Warmup Train [27][2090/3239]	Time 0.232 (0.233)	Data 0.001 (0.011)	Loss 4.6013 (4.1944)	Top-1 acc 21.875 (27.304)	Top-5 acc 42.578 (50.243)	lr 0.01254
Warmup Train [27][2100/3239]	Time 0.186 (0.233)	Data 0.001 (0.011)	Loss 4.1166 (4.1942)	Top-1 acc 28.906 (27.303)	Top-5 acc 53.516 (50.248)	lr 0.01253
Warmup Train [27][2110/3239]	Time 0.421 (0.233)	Data 0.001 (0.011)	Loss 4.0560 (4.1941)	Top-1 acc 32.031 (27.311)	Top-5 acc 53.906 (50.257)	lr 0.01253
Warmup Train [27][2120/3239]	Time 0.195 (0.233)	Data 0.002 (0.011)	Loss 4.1704 (4.1941)	Top-1 acc 26.172 (27.314)	Top-5 acc 52.344 (50.257)	lr 0.01252
Warmup Train [27][2130/3239]	Time 0.215 (0.233)	Data 0.001 (0.011)	Loss 4.2107 (4.1939)	Top-1 acc 26.953 (27.314)	Top-5 acc 50.000 (50.264)	lr 0.01252
Warmup Train [27][2140/3239]	Time 0.225 (0.233)	Data 0.001 (0.011)	Loss 4.1228 (4.1940)	Top-1 acc 28.906 (27.310)	Top-5 acc 50.391 (50.259)	lr 0.01251
Warmup Train [27][2150/3239]	Time 0.152 (0.233)	Data 0.001 (0.011)	Loss 4.4583 (4.1936)	Top-1 acc 23.047 (27.320)	Top-5 acc 43.750 (50.264)	lr 0.01251
Warmup Train [27][2160/3239]	Time 0.232 (0.233)	Data 0.001 (0.011)	Loss 3.9815 (4.1936)	Top-1 acc 35.156 (27.327)	Top-5 acc 53.516 (50.268)	lr 0.01250
Warmup Train [27][2170/3239]	Time 0.200 (0.233)	Data 0.001 (0.011)	Loss 4.1439 (4.1935)	Top-1 acc 26.953 (27.333)	Top-5 acc 52.344 (50.275)	lr 0.01249
Warmup Train [27][2180/3239]	Time 0.272 (0.233)	Data 0.001 (0.011)	Loss 4.3220 (4.1934)	Top-1 acc 26.953 (27.335)	Top-5 acc 50.391 (50.278)	lr 0.01249
Warmup Train [27][2190/3239]	Time 0.213 (0.233)	Data 0.022 (0.011)	Loss 4.2106 (4.1934)	Top-1 acc 26.953 (27.334)	Top-5 acc 48.047 (50.272)	lr 0.01248
Warmup Train [27][2200/3239]	Time 0.192 (0.233)	Data 0.001 (0.011)	Loss 4.2183 (4.1936)	Top-1 acc 28.516 (27.338)	Top-5 acc 49.219 (50.272)	lr 0.01248
Warmup Train [27][2210/3239]	Time 0.228 (0.233)	Data 0.002 (0.010)	Loss 4.1127 (4.1933)	Top-1 acc 33.203 (27.345)	Top-5 acc 50.000 (50.278)	lr 0.01247
Warmup Train [27][2220/3239]	Time 0.285 (0.233)	Data 0.001 (0.010)	Loss 4.1205 (4.1933)	Top-1 acc 27.734 (27.348)	Top-5 acc 49.219 (50.278)	lr 0.01247
Warmup Train [27][2230/3239]	Time 0.249 (0.233)	Data 0.001 (0.010)	Loss 4.3772 (4.1933)	Top-1 acc 23.438 (27.353)	Top-5 acc 43.750 (50.277)	lr 0.01246
Warmup Train [27][2240/3239]	Time 0.199 (0.233)	Data 0.001 (0.010)	Loss 4.3444 (4.1933)	Top-1 acc 23.438 (27.348)	Top-5 acc 46.875 (50.276)	lr 0.01246
Warmup Train [27][2250/3239]	Time 0.194 (0.232)	Data 0.001 (0.010)	Loss 4.1239 (4.1933)	Top-1 acc 25.000 (27.346)	Top-5 acc 50.391 (50.279)	lr 0.01245
Warmup Train [27][2260/3239]	Time 0.202 (0.232)	Data 0.001 (0.010)	Loss 4.3164 (4.1936)	Top-1 acc 26.562 (27.343)	Top-5 acc 50.781 (50.270)	lr 0.01245
Warmup Train [27][2270/3239]	Time 0.199 (0.232)	Data 0.002 (0.010)	Loss 4.0952 (4.1932)	Top-1 acc 24.219 (27.347)	Top-5 acc 55.859 (50.281)	lr 0.01244
Warmup Train [27][2280/3239]	Time 0.198 (0.232)	Data 0.001 (0.010)	Loss 4.3329 (4.1933)	Top-1 acc 25.000 (27.344)	Top-5 acc 47.656 (50.279)	lr 0.01244
Warmup Train [27][2290/3239]	Time 0.240 (0.232)	Data 0.001 (0.010)	Loss 4.0907 (4.1934)	Top-1 acc 27.344 (27.345)	Top-5 acc 53.516 (50.275)	lr 0.01243
Warmup Train [27][2300/3239]	Time 0.139 (0.232)	Data 0.001 (0.010)	Loss 4.2004 (4.1937)	Top-1 acc 28.516 (27.340)	Top-5 acc 50.000 (50.268)	lr 0.01243
Warmup Train [27][2310/3239]	Time 0.182 (0.232)	Data 0.003 (0.010)	Loss 4.2799 (4.1937)	Top-1 acc 25.000 (27.339)	Top-5 acc 47.656 (50.262)	lr 0.01242
Warmup Train [27][2320/3239]	Time 0.138 (0.232)	Data 0.001 (0.010)	Loss 4.1950 (4.1933)	Top-1 acc 26.953 (27.344)	Top-5 acc 50.781 (50.271)	lr 0.01242
Warmup Train [27][2330/3239]	Time 0.349 (0.232)	Data 0.001 (0.010)	Loss 4.1689 (4.1934)	Top-1 acc 25.000 (27.342)	Top-5 acc 50.391 (50.266)	lr 0.01241
Warmup Train [27][2340/3239]	Time 0.180 (0.232)	Data 0.001 (0.010)	Loss 4.1416 (4.1932)	Top-1 acc 27.344 (27.344)	Top-5 acc 53.125 (50.268)	lr 0.01241
Warmup Train [27][2350/3239]	Time 0.206 (0.232)	Data 0.001 (0.010)	Loss 4.1598 (4.1932)	Top-1 acc 26.562 (27.342)	Top-5 acc 50.000 (50.269)	lr 0.01240
Warmup Train [27][2360/3239]	Time 0.237 (0.232)	Data 0.001 (0.010)	Loss 4.2025 (4.1932)	Top-1 acc 28.125 (27.342)	Top-5 acc 51.172 (50.267)	lr 0.01240
Warmup Train [27][2370/3239]	Time 0.172 (0.232)	Data 0.001 (0.010)	Loss 4.3615 (4.1930)	Top-1 acc 21.875 (27.343)	Top-5 acc 43.359 (50.265)	lr 0.01239
Warmup Train [27][2380/3239]	Time 0.198 (0.232)	Data 0.001 (0.010)	Loss 4.0470 (4.1929)	Top-1 acc 28.906 (27.345)	Top-5 acc 52.344 (50.266)	lr 0.01238
Warmup Train [27][2390/3239]	Time 0.268 (0.232)	Data 0.001 (0.010)	Loss 4.1607 (4.1928)	Top-1 acc 28.125 (27.344)	Top-5 acc 50.391 (50.264)	lr 0.01238
Warmup Train [27][2400/3239]	Time 0.189 (0.232)	Data 0.001 (0.010)	Loss 4.3332 (4.1928)	Top-1 acc 22.656 (27.343)	Top-5 acc 46.094 (50.263)	lr 0.01237
Warmup Train [27][2410/3239]	Time 0.181 (0.232)	Data 0.001 (0.010)	Loss 4.3410 (4.1929)	Top-1 acc 24.609 (27.343)	Top-5 acc 43.750 (50.257)	lr 0.01237
Warmup Train [27][2420/3239]	Time 0.157 (0.231)	Data 0.002 (0.010)	Loss 4.3635 (4.1929)	Top-1 acc 23.828 (27.343)	Top-5 acc 46.875 (50.260)	lr 0.01236
Warmup Train [27][2430/3239]	Time 0.221 (0.231)	Data 0.001 (0.010)	Loss 4.0826 (4.1929)	Top-1 acc 30.078 (27.341)	Top-5 acc 49.219 (50.258)	lr 0.01236
Warmup Train [27][2440/3239]	Time 0.215 (0.231)	Data 0.001 (0.010)	Loss 4.1331 (4.1928)	Top-1 acc 29.297 (27.340)	Top-5 acc 54.297 (50.263)	lr 0.01235
Warmup Train [27][2450/3239]	Time 0.310 (0.231)	Data 0.001 (0.010)	Loss 4.0209 (4.1926)	Top-1 acc 28.516 (27.344)	Top-5 acc 53.906 (50.267)	lr 0.01235
Warmup Train [27][2460/3239]	Time 0.222 (0.231)	Data 0.001 (0.010)	Loss 4.2994 (4.1926)	Top-1 acc 22.656 (27.340)	Top-5 acc 46.484 (50.266)	lr 0.01234
Warmup Train [27][2470/3239]	Time 0.207 (0.231)	Data 0.001 (0.010)	Loss 4.0638 (4.1923)	Top-1 acc 28.516 (27.348)	Top-5 acc 50.000 (50.275)	lr 0.01234
Warmup Train [27][2480/3239]	Time 0.236 (0.231)	Data 0.001 (0.010)	Loss 4.3817 (4.1927)	Top-1 acc 26.172 (27.347)	Top-5 acc 47.266 (50.270)	lr 0.01233
Warmup Train [27][2490/3239]	Time 0.229 (0.231)	Data 0.001 (0.010)	Loss 4.0326 (4.1928)	Top-1 acc 31.250 (27.346)	Top-5 acc 55.859 (50.270)	lr 0.01233
Warmup Train [27][2500/3239]	Time 0.218 (0.231)	Data 0.001 (0.010)	Loss 4.2408 (4.1928)	Top-1 acc 29.688 (27.347)	Top-5 acc 52.344 (50.275)	lr 0.01232
Warmup Train [27][2510/3239]	Time 0.219 (0.231)	Data 0.002 (0.010)	Loss 4.0368 (4.1930)	Top-1 acc 28.906 (27.347)	Top-5 acc 52.734 (50.270)	lr 0.01232
Warmup Train [27][2520/3239]	Time 0.240 (0.231)	Data 0.002 (0.010)	Loss 4.1386 (4.1929)	Top-1 acc 27.344 (27.349)	Top-5 acc 48.828 (50.269)	lr 0.01231
Warmup Train [27][2530/3239]	Time 0.255 (0.231)	Data 0.001 (0.010)	Loss 4.1572 (4.1926)	Top-1 acc 30.469 (27.355)	Top-5 acc 50.391 (50.277)	lr 0.01231
Warmup Train [27][2540/3239]	Time 0.252 (0.231)	Data 0.001 (0.010)	Loss 3.9809 (4.1924)	Top-1 acc 28.516 (27.363)	Top-5 acc 57.422 (50.287)	lr 0.01230
Warmup Train [27][2550/3239]	Time 0.316 (0.231)	Data 0.001 (0.009)	Loss 4.1425 (4.1924)	Top-1 acc 26.953 (27.362)	Top-5 acc 51.562 (50.288)	lr 0.01230
Warmup Train [27][2560/3239]	Time 0.254 (0.231)	Data 0.001 (0.009)	Loss 4.1815 (4.1923)	Top-1 acc 27.734 (27.365)	Top-5 acc 47.656 (50.289)	lr 0.01229
Warmup Train [27][2570/3239]	Time 0.213 (0.231)	Data 0.001 (0.009)	Loss 4.1614 (4.1923)	Top-1 acc 26.172 (27.364)	Top-5 acc 50.391 (50.283)	lr 0.01229
Warmup Train [27][2580/3239]	Time 0.258 (0.231)	Data 0.001 (0.009)	Loss 4.1625 (4.1920)	Top-1 acc 25.781 (27.369)	Top-5 acc 51.953 (50.291)	lr 0.01228
Warmup Train [27][2590/3239]	Time 0.260 (0.231)	Data 0.001 (0.009)	Loss 4.0074 (4.1918)	Top-1 acc 31.641 (27.373)	Top-5 acc 53.906 (50.295)	lr 0.01228
Warmup Train [27][2600/3239]	Time 0.203 (0.231)	Data 0.001 (0.009)	Loss 4.2556 (4.1917)	Top-1 acc 28.516 (27.374)	Top-5 acc 45.703 (50.299)	lr 0.01227
Warmup Train [27][2610/3239]	Time 0.209 (0.231)	Data 0.001 (0.009)	Loss 4.3080 (4.1918)	Top-1 acc 25.391 (27.380)	Top-5 acc 50.000 (50.300)	lr 0.01226
Warmup Train [27][2620/3239]	Time 0.290 (0.231)	Data 0.001 (0.009)	Loss 4.0456 (4.1917)	Top-1 acc 28.906 (27.376)	Top-5 acc 53.125 (50.300)	lr 0.01226
Warmup Train [27][2630/3239]	Time 0.164 (0.231)	Data 0.001 (0.009)	Loss 4.1612 (4.1916)	Top-1 acc 27.344 (27.381)	Top-5 acc 54.297 (50.302)	lr 0.01225
Warmup Train [27][2640/3239]	Time 0.189 (0.231)	Data 0.001 (0.009)	Loss 4.0886 (4.1916)	Top-1 acc 29.297 (27.379)	Top-5 acc 50.000 (50.300)	lr 0.01225
Warmup Train [27][2650/3239]	Time 0.295 (0.231)	Data 0.001 (0.009)	Loss 4.2544 (4.1916)	Top-1 acc 29.297 (27.379)	Top-5 acc 48.047 (50.300)	lr 0.01224
Warmup Train [27][2660/3239]	Time 0.177 (0.231)	Data 0.001 (0.009)	Loss 4.1025 (4.1915)	Top-1 acc 30.469 (27.383)	Top-5 acc 54.297 (50.305)	lr 0.01224
Warmup Train [27][2670/3239]	Time 0.190 (0.231)	Data 0.002 (0.009)	Loss 4.3397 (4.1917)	Top-1 acc 25.781 (27.379)	Top-5 acc 48.438 (50.302)	lr 0.01223
Warmup Train [27][2680/3239]	Time 0.195 (0.231)	Data 0.001 (0.009)	Loss 4.3621 (4.1916)	Top-1 acc 22.266 (27.379)	Top-5 acc 43.750 (50.301)	lr 0.01223
Warmup Train [27][2690/3239]	Time 0.214 (0.231)	Data 0.003 (0.009)	Loss 4.2951 (4.1917)	Top-1 acc 27.344 (27.381)	Top-5 acc 46.094 (50.298)	lr 0.01222
Warmup Train [27][2700/3239]	Time 0.232 (0.231)	Data 0.026 (0.009)	Loss 4.1206 (4.1915)	Top-1 acc 26.953 (27.383)	Top-5 acc 48.828 (50.305)	lr 0.01222
Warmup Train [27][2710/3239]	Time 0.189 (0.231)	Data 0.002 (0.009)	Loss 4.0658 (4.1914)	Top-1 acc 30.859 (27.384)	Top-5 acc 51.172 (50.309)	lr 0.01221
Warmup Train [27][2720/3239]	Time 0.164 (0.230)	Data 0.001 (0.009)	Loss 4.2830 (4.1914)	Top-1 acc 23.828 (27.383)	Top-5 acc 45.703 (50.308)	lr 0.01221
Warmup Train [27][2730/3239]	Time 0.211 (0.230)	Data 0.001 (0.009)	Loss 4.1385 (4.1912)	Top-1 acc 32.031 (27.386)	Top-5 acc 50.000 (50.316)	lr 0.01220
Warmup Train [27][2740/3239]	Time 0.203 (0.230)	Data 0.001 (0.009)	Loss 4.0136 (4.1913)	Top-1 acc 28.906 (27.388)	Top-5 acc 56.250 (50.318)	lr 0.01220
Warmup Train [27][2750/3239]	Time 0.309 (0.230)	Data 0.001 (0.009)	Loss 4.1859 (4.1914)	Top-1 acc 29.688 (27.391)	Top-5 acc 51.172 (50.316)	lr 0.01219
Warmup Train [27][2760/3239]	Time 0.156 (0.230)	Data 0.001 (0.009)	Loss 4.2509 (4.1912)	Top-1 acc 26.562 (27.399)	Top-5 acc 44.531 (50.323)	lr 0.01219
Warmup Train [27][2770/3239]	Time 0.208 (0.230)	Data 0.001 (0.009)	Loss 4.2818 (4.1913)	Top-1 acc 26.953 (27.397)	Top-5 acc 50.391 (50.318)	lr 0.01218
Warmup Train [27][2780/3239]	Time 0.191 (0.230)	Data 0.001 (0.009)	Loss 4.4060 (4.1916)	Top-1 acc 23.438 (27.392)	Top-5 acc 46.484 (50.309)	lr 0.01218
Warmup Train [27][2790/3239]	Time 0.200 (0.230)	Data 0.001 (0.009)	Loss 4.0699 (4.1915)	Top-1 acc 27.344 (27.393)	Top-5 acc 52.734 (50.308)	lr 0.01217
Warmup Train [27][2800/3239]	Time 0.229 (0.230)	Data 0.001 (0.009)	Loss 4.3043 (4.1913)	Top-1 acc 22.656 (27.392)	Top-5 acc 48.828 (50.312)	lr 0.01217
Warmup Train [27][2810/3239]	Time 0.280 (0.230)	Data 0.001 (0.009)	Loss 4.3134 (4.1915)	Top-1 acc 26.562 (27.390)	Top-5 acc 49.219 (50.313)	lr 0.01216
Warmup Train [27][2820/3239]	Time 0.253 (0.230)	Data 0.001 (0.009)	Loss 4.1384 (4.1916)	Top-1 acc 30.859 (27.388)	Top-5 acc 48.438 (50.311)	lr 0.01216
Warmup Train [27][2830/3239]	Time 0.172 (0.230)	Data 0.001 (0.009)	Loss 4.1942 (4.1915)	Top-1 acc 28.125 (27.389)	Top-5 acc 48.047 (50.309)	lr 0.01215
Warmup Train [27][2840/3239]	Time 0.180 (0.230)	Data 0.001 (0.009)	Loss 4.0402 (4.1915)	Top-1 acc 31.641 (27.388)	Top-5 acc 51.562 (50.308)	lr 0.01214
Warmup Train [27][2850/3239]	Time 0.286 (0.230)	Data 0.001 (0.009)	Loss 3.9753 (4.1912)	Top-1 acc 31.641 (27.397)	Top-5 acc 57.031 (50.315)	lr 0.01214
Warmup Train [27][2860/3239]	Time 0.362 (0.230)	Data 0.001 (0.009)	Loss 4.2997 (4.1913)	Top-1 acc 23.047 (27.394)	Top-5 acc 48.828 (50.316)	lr 0.01213
Warmup Train [27][2870/3239]	Time 0.206 (0.230)	Data 0.001 (0.009)	Loss 4.1357 (4.1913)	Top-1 acc 30.859 (27.398)	Top-5 acc 53.125 (50.319)	lr 0.01213
Warmup Train [27][2880/3239]	Time 0.222 (0.230)	Data 0.001 (0.009)	Loss 4.0275 (4.1914)	Top-1 acc 28.516 (27.395)	Top-5 acc 52.344 (50.317)	lr 0.01212
Warmup Train [27][2890/3239]	Time 0.266 (0.230)	Data 0.001 (0.009)	Loss 4.0242 (4.1915)	Top-1 acc 34.375 (27.397)	Top-5 acc 52.734 (50.318)	lr 0.01212
Warmup Train [27][2900/3239]	Time 0.224 (0.230)	Data 0.001 (0.009)	Loss 4.2310 (4.1913)	Top-1 acc 27.344 (27.398)	Top-5 acc 49.609 (50.322)	lr 0.01211
Warmup Train [27][2910/3239]	Time 0.235 (0.230)	Data 0.001 (0.009)	Loss 4.1973 (4.1912)	Top-1 acc 26.953 (27.401)	Top-5 acc 44.531 (50.322)	lr 0.01211
Warmup Train [27][2920/3239]	Time 0.238 (0.230)	Data 0.001 (0.009)	Loss 4.1715 (4.1912)	Top-1 acc 27.344 (27.400)	Top-5 acc 50.781 (50.319)	lr 0.01210
Warmup Train [27][2930/3239]	Time 0.192 (0.230)	Data 0.001 (0.009)	Loss 4.2866 (4.1914)	Top-1 acc 26.562 (27.399)	Top-5 acc 47.656 (50.317)	lr 0.01210
Warmup Train [27][2940/3239]	Time 0.212 (0.230)	Data 0.001 (0.009)	Loss 4.2549 (4.1915)	Top-1 acc 24.219 (27.395)	Top-5 acc 47.266 (50.314)	lr 0.01209
Warmup Train [27][2950/3239]	Time 0.234 (0.230)	Data 0.001 (0.009)	Loss 4.0893 (4.1914)	Top-1 acc 27.344 (27.397)	Top-5 acc 53.906 (50.316)	lr 0.01209
Warmup Train [27][2960/3239]	Time 0.338 (0.230)	Data 0.001 (0.009)	Loss 4.2849 (4.1914)	Top-1 acc 24.219 (27.397)	Top-5 acc 46.484 (50.315)	lr 0.01208
Warmup Train [27][2970/3239]	Time 0.195 (0.230)	Data 0.002 (0.009)	Loss 4.1507 (4.1913)	Top-1 acc 25.781 (27.400)	Top-5 acc 52.344 (50.316)	lr 0.01208
Warmup Train [27][2980/3239]	Time 0.301 (0.230)	Data 0.002 (0.009)	Loss 4.2809 (4.1911)	Top-1 acc 26.172 (27.404)	Top-5 acc 49.219 (50.319)	lr 0.01207
Warmup Train [27][2990/3239]	Time 0.222 (0.230)	Data 0.002 (0.009)	Loss 4.3932 (4.1910)	Top-1 acc 21.875 (27.400)	Top-5 acc 44.922 (50.319)	lr 0.01207
Warmup Train [27][3000/3239]	Time 0.223 (0.230)	Data 0.001 (0.009)	Loss 4.1146 (4.1909)	Top-1 acc 30.078 (27.400)	Top-5 acc 54.297 (50.322)	lr 0.01206
Warmup Train [27][3010/3239]	Time 0.209 (0.230)	Data 0.002 (0.009)	Loss 4.3568 (4.1908)	Top-1 acc 26.562 (27.404)	Top-5 acc 48.438 (50.328)	lr 0.01206
Warmup Train [27][3020/3239]	Time 0.182 (0.230)	Data 0.002 (0.009)	Loss 4.1449 (4.1907)	Top-1 acc 29.297 (27.407)	Top-5 acc 50.000 (50.330)	lr 0.01205
Warmup Train [27][3030/3239]	Time 0.250 (0.231)	Data 0.002 (0.009)	Loss 4.1296 (4.1906)	Top-1 acc 30.859 (27.410)	Top-5 acc 51.172 (50.332)	lr 0.01205
Warmup Train [27][3040/3239]	Time 0.254 (0.231)	Data 0.001 (0.008)	Loss 4.0015 (4.1904)	Top-1 acc 30.078 (27.419)	Top-5 acc 53.516 (50.336)	lr 0.01204
Warmup Train [27][3050/3239]	Time 0.333 (0.231)	Data 0.002 (0.008)	Loss 4.1274 (4.1900)	Top-1 acc 32.812 (27.426)	Top-5 acc 53.516 (50.344)	lr 0.01204
Warmup Train [27][3060/3239]	Time 0.334 (0.231)	Data 0.002 (0.008)	Loss 4.1905 (4.1900)	Top-1 acc 29.688 (27.430)	Top-5 acc 52.734 (50.340)	lr 0.01203
Warmup Train [27][3070/3239]	Time 0.311 (0.231)	Data 0.001 (0.008)	Loss 3.9469 (4.1900)	Top-1 acc 32.812 (27.431)	Top-5 acc 54.297 (50.338)	lr 0.01203
Warmup Train [27][3080/3239]	Time 0.186 (0.231)	Data 0.001 (0.008)	Loss 4.0857 (4.1899)	Top-1 acc 28.906 (27.431)	Top-5 acc 54.297 (50.338)	lr 0.01202
Warmup Train [27][3090/3239]	Time 0.227 (0.231)	Data 0.001 (0.008)	Loss 4.3020 (4.1901)	Top-1 acc 28.516 (27.428)	Top-5 acc 50.000 (50.333)	lr 0.01202
Warmup Train [27][3100/3239]	Time 0.218 (0.231)	Data 0.001 (0.008)	Loss 4.1134 (4.1900)	Top-1 acc 32.422 (27.430)	Top-5 acc 51.172 (50.333)	lr 0.01201
Warmup Train [27][3110/3239]	Time 0.288 (0.231)	Data 0.002 (0.008)	Loss 4.4321 (4.1901)	Top-1 acc 23.047 (27.430)	Top-5 acc 43.359 (50.332)	lr 0.01200
Warmup Train [27][3120/3239]	Time 0.204 (0.231)	Data 0.035 (0.008)	Loss 4.2105 (4.1902)	Top-1 acc 24.609 (27.428)	Top-5 acc 51.562 (50.329)	lr 0.01200
Warmup Train [27][3130/3239]	Time 0.220 (0.231)	Data 0.002 (0.008)	Loss 4.1547 (4.1900)	Top-1 acc 28.516 (27.432)	Top-5 acc 49.609 (50.333)	lr 0.01199
Warmup Train [27][3140/3239]	Time 0.247 (0.231)	Data 0.046 (0.008)	Loss 4.3687 (4.1899)	Top-1 acc 25.000 (27.432)	Top-5 acc 46.094 (50.339)	lr 0.01199
Warmup Train [27][3150/3239]	Time 0.268 (0.231)	Data 0.001 (0.008)	Loss 4.1684 (4.1897)	Top-1 acc 26.953 (27.435)	Top-5 acc 53.125 (50.344)	lr 0.01198
Warmup Train [27][3160/3239]	Time 0.196 (0.231)	Data 0.001 (0.008)	Loss 4.3540 (4.1897)	Top-1 acc 25.781 (27.436)	Top-5 acc 47.656 (50.344)	lr 0.01198
Warmup Train [27][3170/3239]	Time 0.272 (0.231)	Data 0.001 (0.008)	Loss 4.0881 (4.1896)	Top-1 acc 28.125 (27.436)	Top-5 acc 53.125 (50.346)	lr 0.01197
Warmup Train [27][3180/3239]	Time 0.181 (0.231)	Data 0.000 (0.008)	Loss 4.2238 (4.1895)	Top-1 acc 26.562 (27.437)	Top-5 acc 53.516 (50.352)	lr 0.01197
Warmup Train [27][3190/3239]	Time 0.209 (0.231)	Data 0.000 (0.008)	Loss 4.1042 (4.1893)	Top-1 acc 28.516 (27.438)	Top-5 acc 53.516 (50.359)	lr 0.01196
Warmup Train [27][3200/3239]	Time 0.245 (0.231)	Data 0.000 (0.008)	Loss 4.2178 (4.1892)	Top-1 acc 26.953 (27.437)	Top-5 acc 47.266 (50.361)	lr 0.01196
Warmup Train [27][3210/3239]	Time 0.166 (0.231)	Data 0.000 (0.008)	Loss 4.2070 (4.1891)	Top-1 acc 27.734 (27.440)	Top-5 acc 49.609 (50.366)	lr 0.01195
Warmup Train [27][3220/3239]	Time 0.178 (0.231)	Data 0.000 (0.008)	Loss 4.1396 (4.1892)	Top-1 acc 25.781 (27.436)	Top-5 acc 49.609 (50.359)	lr 0.01195
Warmup Train [27][3230/3239]	Time 0.148 (0.231)	Data 0.000 (0.008)	Loss 4.1970 (4.1891)	Top-1 acc 30.078 (27.438)	Top-5 acc 50.391 (50.363)	lr 0.01194
Warmup Train [27][3239/3239]	Time 0.140 (0.231)	Data 0.000 (0.008)	Loss 4.9114 (4.1890)	Top-1 acc 14.815 (27.440)	Top-5 acc 30.864 (50.363)	lr 0.01194
==========Warmup Valid [27/40]	loss 3.141	top-1 acc 35.039	top-5 acc 59.697	Train top-1 27.440	top-5 50.363	flops: 442.4M
Warmup Train [28][0/3239]	Time 19.923 (19.923)	Data 17.428 (17.428)	Loss 4.0034 (4.0034)	Top-1 acc 30.078 (30.078)	Top-5 acc 55.859 (55.859)	lr 0.01194
Warmup Train [28][10/3239]	Time 0.267 (2.058)	Data 0.002 (1.589)	Loss 4.0959 (4.0603)	Top-1 acc 26.172 (28.551)	Top-5 acc 53.125 (52.628)	lr 0.01193
Warmup Train [28][20/3239]	Time 0.207 (1.193)	Data 0.001 (0.833)	Loss 3.9750 (4.1123)	Top-1 acc 32.812 (28.144)	Top-5 acc 57.422 (51.618)	lr 0.01193
Warmup Train [28][30/3239]	Time 0.261 (0.881)	Data 0.001 (0.565)	Loss 4.1245 (4.1290)	Top-1 acc 26.562 (28.100)	Top-5 acc 50.781 (51.247)	lr 0.01192
Warmup Train [28][40/3239]	Time 0.185 (0.719)	Data 0.001 (0.428)	Loss 4.0325 (4.1195)	Top-1 acc 28.906 (28.268)	Top-5 acc 51.172 (51.620)	lr 0.01192
Warmup Train [28][50/3239]	Time 0.206 (0.625)	Data 0.001 (0.345)	Loss 4.1570 (4.1227)	Top-1 acc 25.000 (28.355)	Top-5 acc 52.344 (51.593)	lr 0.01191
Warmup Train [28][60/3239]	Time 0.197 (0.557)	Data 0.001 (0.289)	Loss 4.1418 (4.1300)	Top-1 acc 25.391 (28.215)	Top-5 acc 51.172 (51.383)	lr 0.01191
Warmup Train [28][70/3239]	Time 0.158 (0.512)	Data 0.001 (0.249)	Loss 4.0027 (4.1261)	Top-1 acc 32.031 (28.428)	Top-5 acc 57.031 (51.574)	lr 0.01190
Warmup Train [28][80/3239]	Time 0.343 (0.478)	Data 0.001 (0.219)	Loss 4.2046 (4.1335)	Top-1 acc 27.734 (28.188)	Top-5 acc 49.609 (51.331)	lr 0.01190
Warmup Train [28][90/3239]	Time 0.245 (0.450)	Data 0.001 (0.195)	Loss 4.1531 (4.1313)	Top-1 acc 28.125 (28.181)	Top-5 acc 53.516 (51.352)	lr 0.01189
Warmup Train [28][100/3239]	Time 0.188 (0.428)	Data 0.001 (0.176)	Loss 4.2205 (4.1296)	Top-1 acc 28.906 (28.295)	Top-5 acc 52.344 (51.485)	lr 0.01189
Warmup Train [28][110/3239]	Time 0.233 (0.410)	Data 0.001 (0.161)	Loss 4.2470 (4.1358)	Top-1 acc 23.438 (28.100)	Top-5 acc 45.703 (51.327)	lr 0.01188
Warmup Train [28][120/3239]	Time 0.201 (0.395)	Data 0.002 (0.147)	Loss 4.3095 (4.1366)	Top-1 acc 24.609 (28.109)	Top-5 acc 48.047 (51.314)	lr 0.01188
Warmup Train [28][130/3239]	Time 0.203 (0.382)	Data 0.002 (0.136)	Loss 4.0228 (4.1322)	Top-1 acc 30.078 (28.241)	Top-5 acc 52.734 (51.470)	lr 0.01187
Warmup Train [28][140/3239]	Time 0.182 (0.372)	Data 0.002 (0.127)	Loss 4.0305 (4.1322)	Top-1 acc 30.469 (28.319)	Top-5 acc 52.734 (51.449)	lr 0.01187
Warmup Train [28][150/3239]	Time 0.206 (0.361)	Data 0.001 (0.119)	Loss 4.0640 (4.1324)	Top-1 acc 30.469 (28.270)	Top-5 acc 52.734 (51.451)	lr 0.01186
Warmup Train [28][160/3239]	Time 0.220 (0.353)	Data 0.001 (0.111)	Loss 4.1745 (4.1328)	Top-1 acc 27.734 (28.237)	Top-5 acc 50.000 (51.490)	lr 0.01185
Warmup Train [28][170/3239]	Time 0.282 (0.346)	Data 0.001 (0.105)	Loss 4.1701 (4.1331)	Top-1 acc 25.391 (28.255)	Top-5 acc 49.609 (51.467)	lr 0.01185
Warmup Train [28][180/3239]	Time 0.322 (0.339)	Data 0.001 (0.099)	Loss 4.2759 (4.1381)	Top-1 acc 26.562 (28.183)	Top-5 acc 50.391 (51.364)	lr 0.01184
Warmup Train [28][190/3239]	Time 0.197 (0.333)	Data 0.001 (0.094)	Loss 4.1612 (4.1401)	Top-1 acc 26.953 (28.147)	Top-5 acc 51.562 (51.293)	lr 0.01184
Warmup Train [28][200/3239]	Time 0.242 (0.327)	Data 0.001 (0.090)	Loss 3.9147 (4.1389)	Top-1 acc 34.375 (28.142)	Top-5 acc 54.297 (51.302)	lr 0.01183
Warmup Train [28][210/3239]	Time 0.208 (0.323)	Data 0.001 (0.086)	Loss 4.0469 (4.1394)	Top-1 acc 28.125 (28.116)	Top-5 acc 51.172 (51.279)	lr 0.01183
Warmup Train [28][220/3239]	Time 0.199 (0.318)	Data 0.001 (0.082)	Loss 3.9987 (4.1388)	Top-1 acc 31.250 (28.106)	Top-5 acc 53.516 (51.264)	lr 0.01182
Warmup Train [28][230/3239]	Time 0.246 (0.314)	Data 0.002 (0.079)	Loss 4.0973 (4.1402)	Top-1 acc 28.125 (28.091)	Top-5 acc 50.391 (51.196)	lr 0.01182
Warmup Train [28][240/3239]	Time 0.246 (0.311)	Data 0.001 (0.076)	Loss 4.1834 (4.1402)	Top-1 acc 25.391 (28.106)	Top-5 acc 50.000 (51.214)	lr 0.01181
Warmup Train [28][250/3239]	Time 0.196 (0.307)	Data 0.001 (0.073)	Loss 4.2120 (4.1412)	Top-1 acc 22.656 (28.060)	Top-5 acc 43.750 (51.166)	lr 0.01181
Warmup Train [28][260/3239]	Time 0.212 (0.305)	Data 0.002 (0.070)	Loss 4.2343 (4.1434)	Top-1 acc 28.516 (28.014)	Top-5 acc 50.000 (51.151)	lr 0.01180
Warmup Train [28][270/3239]	Time 0.372 (0.303)	Data 0.001 (0.067)	Loss 4.1441 (4.1439)	Top-1 acc 27.344 (28.005)	Top-5 acc 50.391 (51.153)	lr 0.01180
Warmup Train [28][280/3239]	Time 0.148 (0.300)	Data 0.002 (0.065)	Loss 4.0035 (4.1440)	Top-1 acc 31.641 (28.022)	Top-5 acc 53.516 (51.130)	lr 0.01179
Warmup Train [28][290/3239]	Time 0.228 (0.297)	Data 0.001 (0.063)	Loss 4.2051 (4.1443)	Top-1 acc 26.953 (27.997)	Top-5 acc 48.828 (51.122)	lr 0.01179
Warmup Train [28][300/3239]	Time 0.254 (0.295)	Data 0.002 (0.061)	Loss 4.1061 (4.1437)	Top-1 acc 30.078 (28.020)	Top-5 acc 52.344 (51.172)	lr 0.01178
Warmup Train [28][310/3239]	Time 0.236 (0.293)	Data 0.002 (0.059)	Loss 4.4596 (4.1441)	Top-1 acc 24.219 (28.023)	Top-5 acc 45.703 (51.189)	lr 0.01178
Warmup Train [28][320/3239]	Time 0.264 (0.291)	Data 0.002 (0.057)	Loss 4.1334 (4.1458)	Top-1 acc 29.688 (27.991)	Top-5 acc 52.344 (51.139)	lr 0.01177
Warmup Train [28][330/3239]	Time 0.296 (0.289)	Data 0.001 (0.056)	Loss 4.1835 (4.1464)	Top-1 acc 28.516 (28.001)	Top-5 acc 51.172 (51.113)	lr 0.01177
Warmup Train [28][340/3239]	Time 0.178 (0.287)	Data 0.003 (0.054)	Loss 4.0115 (4.1458)	Top-1 acc 28.906 (28.013)	Top-5 acc 54.297 (51.156)	lr 0.01176
Warmup Train [28][350/3239]	Time 0.198 (0.285)	Data 0.001 (0.053)	Loss 4.2409 (4.1477)	Top-1 acc 25.000 (28.015)	Top-5 acc 48.828 (51.122)	lr 0.01176
Warmup Train [28][360/3239]	Time 0.288 (0.283)	Data 0.001 (0.051)	Loss 4.1696 (4.1465)	Top-1 acc 27.344 (28.056)	Top-5 acc 51.172 (51.160)	lr 0.01175
Warmup Train [28][370/3239]	Time 0.179 (0.281)	Data 0.001 (0.050)	Loss 4.5499 (4.1480)	Top-1 acc 24.609 (28.050)	Top-5 acc 39.453 (51.144)	lr 0.01175
Warmup Train [28][380/3239]	Time 0.215 (0.280)	Data 0.001 (0.049)	Loss 4.2416 (4.1476)	Top-1 acc 29.297 (28.075)	Top-5 acc 52.344 (51.168)	lr 0.01174
Warmup Train [28][390/3239]	Time 0.176 (0.278)	Data 0.001 (0.048)	Loss 4.2955 (4.1493)	Top-1 acc 25.391 (28.053)	Top-5 acc 47.266 (51.143)	lr 0.01174
Warmup Train [28][400/3239]	Time 0.277 (0.277)	Data 0.001 (0.047)	Loss 4.0992 (4.1483)	Top-1 acc 29.297 (28.090)	Top-5 acc 51.953 (51.189)	lr 0.01173
Warmup Train [28][410/3239]	Time 0.191 (0.276)	Data 0.001 (0.046)	Loss 4.0047 (4.1477)	Top-1 acc 31.641 (28.108)	Top-5 acc 53.516 (51.186)	lr 0.01173
Warmup Train [28][420/3239]	Time 0.193 (0.274)	Data 0.001 (0.045)	Loss 4.2178 (4.1483)	Top-1 acc 26.562 (28.100)	Top-5 acc 49.609 (51.169)	lr 0.01172
Warmup Train [28][430/3239]	Time 0.156 (0.273)	Data 0.004 (0.044)	Loss 4.3443 (4.1488)	Top-1 acc 25.000 (28.120)	Top-5 acc 45.312 (51.161)	lr 0.01172
Warmup Train [28][440/3239]	Time 0.173 (0.272)	Data 0.003 (0.043)	Loss 4.4688 (4.1501)	Top-1 acc 23.438 (28.125)	Top-5 acc 44.922 (51.134)	lr 0.01171
Warmup Train [28][450/3239]	Time 0.185 (0.271)	Data 0.001 (0.042)	Loss 4.3892 (4.1530)	Top-1 acc 21.094 (28.044)	Top-5 acc 44.922 (51.058)	lr 0.01171
Warmup Train [28][460/3239]	Time 0.363 (0.270)	Data 0.002 (0.041)	Loss 4.1997 (4.1525)	Top-1 acc 25.781 (28.047)	Top-5 acc 51.562 (51.065)	lr 0.01170
Warmup Train [28][470/3239]	Time 0.217 (0.269)	Data 0.001 (0.040)	Loss 4.2244 (4.1528)	Top-1 acc 26.562 (28.059)	Top-5 acc 46.484 (51.046)	lr 0.01170
Warmup Train [28][480/3239]	Time 0.266 (0.268)	Data 0.001 (0.039)	Loss 4.3470 (4.1524)	Top-1 acc 24.219 (28.075)	Top-5 acc 47.266 (51.084)	lr 0.01169
Warmup Train [28][490/3239]	Time 0.201 (0.267)	Data 0.001 (0.039)	Loss 4.0239 (4.1520)	Top-1 acc 32.031 (28.070)	Top-5 acc 54.688 (51.088)	lr 0.01169
Warmup Train [28][500/3239]	Time 0.225 (0.266)	Data 0.002 (0.038)	Loss 4.1737 (4.1525)	Top-1 acc 29.688 (28.072)	Top-5 acc 50.000 (51.099)	lr 0.01168
Warmup Train [28][510/3239]	Time 0.144 (0.265)	Data 0.001 (0.037)	Loss 4.1323 (4.1523)	Top-1 acc 29.297 (28.085)	Top-5 acc 51.953 (51.105)	lr 0.01168
Warmup Train [28][520/3239]	Time 0.167 (0.264)	Data 0.001 (0.037)	Loss 4.1489 (4.1536)	Top-1 acc 31.250 (28.076)	Top-5 acc 53.906 (51.081)	lr 0.01167
Warmup Train [28][530/3239]	Time 0.194 (0.263)	Data 0.001 (0.036)	Loss 4.1118 (4.1544)	Top-1 acc 29.688 (28.073)	Top-5 acc 53.516 (51.076)	lr 0.01166
Warmup Train [28][540/3239]	Time 0.185 (0.263)	Data 0.002 (0.035)	Loss 3.9590 (4.1547)	Top-1 acc 30.078 (28.078)	Top-5 acc 58.203 (51.073)	lr 0.01166
Warmup Train [28][550/3239]	Time 0.204 (0.262)	Data 0.001 (0.035)	Loss 4.3015 (4.1545)	Top-1 acc 22.656 (28.075)	Top-5 acc 48.047 (51.071)	lr 0.01165
Warmup Train [28][560/3239]	Time 0.237 (0.261)	Data 0.001 (0.034)	Loss 4.0278 (4.1540)	Top-1 acc 32.812 (28.085)	Top-5 acc 53.516 (51.081)	lr 0.01165
Warmup Train [28][570/3239]	Time 0.301 (0.260)	Data 0.001 (0.034)	Loss 4.1551 (4.1546)	Top-1 acc 25.391 (28.065)	Top-5 acc 52.734 (51.074)	lr 0.01164
Warmup Train [28][580/3239]	Time 0.203 (0.260)	Data 0.001 (0.033)	Loss 4.0866 (4.1545)	Top-1 acc 29.297 (28.077)	Top-5 acc 51.953 (51.072)	lr 0.01164
Warmup Train [28][590/3239]	Time 0.223 (0.259)	Data 0.001 (0.033)	Loss 4.1530 (4.1544)	Top-1 acc 26.562 (28.073)	Top-5 acc 53.125 (51.096)	lr 0.01163
Warmup Train [28][600/3239]	Time 0.182 (0.259)	Data 0.001 (0.032)	Loss 4.0665 (4.1545)	Top-1 acc 30.859 (28.078)	Top-5 acc 53.906 (51.115)	lr 0.01163
Warmup Train [28][610/3239]	Time 0.254 (0.258)	Data 0.001 (0.032)	Loss 3.8474 (4.1546)	Top-1 acc 37.891 (28.084)	Top-5 acc 59.375 (51.110)	lr 0.01162
Warmup Train [28][620/3239]	Time 0.174 (0.258)	Data 0.001 (0.031)	Loss 4.2014 (4.1555)	Top-1 acc 27.344 (28.061)	Top-5 acc 50.391 (51.088)	lr 0.01162
Warmup Train [28][630/3239]	Time 0.256 (0.257)	Data 0.001 (0.031)	Loss 4.1532 (4.1560)	Top-1 acc 25.000 (28.029)	Top-5 acc 50.391 (51.073)	lr 0.01161
Warmup Train [28][640/3239]	Time 0.219 (0.256)	Data 0.001 (0.030)	Loss 3.9429 (4.1562)	Top-1 acc 34.375 (28.041)	Top-5 acc 54.688 (51.068)	lr 0.01161
Warmup Train [28][650/3239]	Time 0.146 (0.256)	Data 0.001 (0.030)	Loss 4.2499 (4.1550)	Top-1 acc 24.219 (28.055)	Top-5 acc 47.266 (51.087)	lr 0.01160
Warmup Train [28][660/3239]	Time 0.286 (0.256)	Data 0.001 (0.030)	Loss 4.2311 (4.1560)	Top-1 acc 26.953 (28.049)	Top-5 acc 51.953 (51.080)	lr 0.01160
Warmup Train [28][670/3239]	Time 0.349 (0.255)	Data 0.004 (0.029)	Loss 4.0658 (4.1563)	Top-1 acc 28.906 (28.046)	Top-5 acc 53.125 (51.085)	lr 0.01159
Warmup Train [28][680/3239]	Time 0.209 (0.255)	Data 0.001 (0.029)	Loss 4.2996 (4.1564)	Top-1 acc 26.953 (28.035)	Top-5 acc 50.391 (51.081)	lr 0.01159
Warmup Train [28][690/3239]	Time 0.192 (0.254)	Data 0.002 (0.028)	Loss 4.2564 (4.1563)	Top-1 acc 24.219 (28.045)	Top-5 acc 49.219 (51.093)	lr 0.01158
Warmup Train [28][700/3239]	Time 0.217 (0.254)	Data 0.001 (0.028)	Loss 4.1417 (4.1563)	Top-1 acc 29.688 (28.048)	Top-5 acc 51.172 (51.103)	lr 0.01158
Warmup Train [28][710/3239]	Time 0.216 (0.254)	Data 0.001 (0.028)	Loss 4.2489 (4.1575)	Top-1 acc 27.344 (28.029)	Top-5 acc 52.344 (51.086)	lr 0.01157
Warmup Train [28][720/3239]	Time 0.246 (0.253)	Data 0.001 (0.027)	Loss 4.2310 (4.1575)	Top-1 acc 26.172 (28.018)	Top-5 acc 46.094 (51.086)	lr 0.01157
Warmup Train [28][730/3239]	Time 0.253 (0.253)	Data 0.001 (0.027)	Loss 4.1939 (4.1574)	Top-1 acc 25.781 (28.023)	Top-5 acc 49.219 (51.100)	lr 0.01156
Warmup Train [28][740/3239]	Time 0.228 (0.252)	Data 0.001 (0.027)	Loss 4.2260 (4.1577)	Top-1 acc 25.781 (28.022)	Top-5 acc 51.172 (51.092)	lr 0.01156
Warmup Train [28][750/3239]	Time 0.220 (0.252)	Data 0.001 (0.026)	Loss 4.3720 (4.1583)	Top-1 acc 23.438 (28.009)	Top-5 acc 44.922 (51.081)	lr 0.01155
Warmup Train [28][760/3239]	Time 0.280 (0.251)	Data 0.001 (0.026)	Loss 4.0025 (4.1587)	Top-1 acc 31.250 (28.015)	Top-5 acc 54.688 (51.063)	lr 0.01155
Warmup Train [28][770/3239]	Time 0.208 (0.251)	Data 0.002 (0.026)	Loss 4.1804 (4.1585)	Top-1 acc 29.297 (28.021)	Top-5 acc 51.562 (51.051)	lr 0.01154
Warmup Train [28][780/3239]	Time 0.249 (0.251)	Data 0.001 (0.025)	Loss 4.1690 (4.1581)	Top-1 acc 29.688 (28.032)	Top-5 acc 50.391 (51.060)	lr 0.01154
Warmup Train [28][790/3239]	Time 0.268 (0.250)	Data 0.001 (0.025)	Loss 3.9699 (4.1577)	Top-1 acc 32.031 (28.039)	Top-5 acc 50.781 (51.045)	lr 0.01153
Warmup Train [28][800/3239]	Time 0.214 (0.250)	Data 0.001 (0.025)	Loss 4.2743 (4.1586)	Top-1 acc 26.953 (28.012)	Top-5 acc 47.266 (51.023)	lr 0.01153
Warmup Train [28][810/3239]	Time 0.206 (0.250)	Data 0.001 (0.025)	Loss 4.1668 (4.1584)	Top-1 acc 29.688 (28.017)	Top-5 acc 49.219 (51.020)	lr 0.01152
Warmup Train [28][820/3239]	Time 0.192 (0.250)	Data 0.001 (0.024)	Loss 3.8180 (4.1578)	Top-1 acc 36.719 (28.027)	Top-5 acc 60.547 (51.022)	lr 0.01152
Warmup Train [28][830/3239]	Time 0.182 (0.249)	Data 0.001 (0.024)	Loss 4.2129 (4.1584)	Top-1 acc 31.250 (28.028)	Top-5 acc 48.828 (51.012)	lr 0.01151
Warmup Train [28][840/3239]	Time 0.215 (0.249)	Data 0.001 (0.024)	Loss 4.1677 (4.1576)	Top-1 acc 26.562 (28.038)	Top-5 acc 48.047 (51.030)	lr 0.01151
Warmup Train [28][850/3239]	Time 0.171 (0.248)	Data 0.001 (0.024)	Loss 4.3387 (4.1584)	Top-1 acc 23.438 (28.020)	Top-5 acc 50.000 (51.024)	lr 0.01150
Warmup Train [28][860/3239]	Time 0.286 (0.248)	Data 0.001 (0.023)	Loss 4.2713 (4.1588)	Top-1 acc 25.000 (28.007)	Top-5 acc 50.781 (51.018)	lr 0.01150
Warmup Train [28][870/3239]	Time 0.155 (0.248)	Data 0.001 (0.023)	Loss 4.0670 (4.1592)	Top-1 acc 25.781 (27.999)	Top-5 acc 50.391 (51.007)	lr 0.01149
Warmup Train [28][880/3239]	Time 0.221 (0.248)	Data 0.001 (0.023)	Loss 4.1228 (4.1592)	Top-1 acc 25.781 (27.992)	Top-5 acc 49.219 (50.995)	lr 0.01149
Warmup Train [28][890/3239]	Time 0.138 (0.247)	Data 0.001 (0.023)	Loss 4.2414 (4.1592)	Top-1 acc 29.688 (27.989)	Top-5 acc 48.438 (50.990)	lr 0.01148
Warmup Train [28][900/3239]	Time 0.252 (0.247)	Data 0.001 (0.022)	Loss 4.3676 (4.1595)	Top-1 acc 23.438 (27.988)	Top-5 acc 47.266 (50.975)	lr 0.01148
Warmup Train [28][910/3239]	Time 0.213 (0.247)	Data 0.002 (0.022)	Loss 4.2384 (4.1595)	Top-1 acc 23.828 (27.971)	Top-5 acc 48.047 (50.970)	lr 0.01147
Warmup Train [28][920/3239]	Time 0.243 (0.246)	Data 0.001 (0.022)	Loss 4.0379 (4.1596)	Top-1 acc 31.250 (27.969)	Top-5 acc 55.078 (50.971)	lr 0.01147
Warmup Train [28][930/3239]	Time 0.261 (0.246)	Data 0.001 (0.022)	Loss 4.1657 (4.1600)	Top-1 acc 29.297 (27.971)	Top-5 acc 50.391 (50.964)	lr 0.01146
Warmup Train [28][940/3239]	Time 0.205 (0.246)	Data 0.001 (0.022)	Loss 4.1887 (4.1597)	Top-1 acc 25.781 (27.976)	Top-5 acc 47.266 (50.972)	lr 0.01146
Warmup Train [28][950/3239]	Time 0.265 (0.246)	Data 0.001 (0.021)	Loss 4.2440 (4.1589)	Top-1 acc 27.344 (27.998)	Top-5 acc 48.438 (50.993)	lr 0.01145
Warmup Train [28][960/3239]	Time 0.410 (0.246)	Data 0.001 (0.021)	Loss 4.2875 (4.1593)	Top-1 acc 25.391 (27.986)	Top-5 acc 44.531 (50.981)	lr 0.01145
Warmup Train [28][970/3239]	Time 0.223 (0.245)	Data 0.001 (0.021)	Loss 3.9928 (4.1591)	Top-1 acc 33.203 (27.994)	Top-5 acc 55.859 (50.980)	lr 0.01144
Warmup Train [28][980/3239]	Time 0.309 (0.245)	Data 0.001 (0.021)	Loss 4.0578 (4.1595)	Top-1 acc 31.641 (28.003)	Top-5 acc 53.516 (50.968)	lr 0.01143
Warmup Train [28][990/3239]	Time 0.216 (0.245)	Data 0.001 (0.021)	Loss 4.1631 (4.1597)	Top-1 acc 25.000 (27.997)	Top-5 acc 52.344 (50.965)	lr 0.01143
Warmup Train [28][1000/3239]	Time 0.161 (0.244)	Data 0.001 (0.020)	Loss 4.0541 (4.1602)	Top-1 acc 30.859 (27.986)	Top-5 acc 55.469 (50.953)	lr 0.01142
Warmup Train [28][1010/3239]	Time 0.233 (0.244)	Data 0.001 (0.020)	Loss 4.0315 (4.1599)	Top-1 acc 29.688 (27.991)	Top-5 acc 53.125 (50.960)	lr 0.01142
Warmup Train [28][1020/3239]	Time 0.210 (0.244)	Data 0.001 (0.020)	Loss 4.1101 (4.1608)	Top-1 acc 30.469 (27.983)	Top-5 acc 51.172 (50.944)	lr 0.01141
Warmup Train [28][1030/3239]	Time 0.142 (0.244)	Data 0.001 (0.020)	Loss 4.1387 (4.1610)	Top-1 acc 27.734 (27.971)	Top-5 acc 55.469 (50.954)	lr 0.01141
Warmup Train [28][1040/3239]	Time 0.267 (0.244)	Data 0.001 (0.020)	Loss 4.2878 (4.1606)	Top-1 acc 27.344 (27.975)	Top-5 acc 46.875 (50.970)	lr 0.01140
Warmup Train [28][1050/3239]	Time 0.241 (0.243)	Data 0.001 (0.020)	Loss 4.1211 (4.1601)	Top-1 acc 25.781 (27.980)	Top-5 acc 54.297 (50.986)	lr 0.01140
Warmup Train [28][1060/3239]	Time 0.204 (0.243)	Data 0.001 (0.019)	Loss 4.2269 (4.1601)	Top-1 acc 27.344 (27.979)	Top-5 acc 48.438 (50.983)	lr 0.01139
Warmup Train [28][1070/3239]	Time 0.221 (0.243)	Data 0.001 (0.019)	Loss 4.1724 (4.1603)	Top-1 acc 30.859 (27.977)	Top-5 acc 47.266 (50.969)	lr 0.01139
Warmup Train [28][1080/3239]	Time 0.203 (0.243)	Data 0.001 (0.019)	Loss 4.2722 (4.1601)	Top-1 acc 25.000 (27.986)	Top-5 acc 47.656 (50.976)	lr 0.01138
Warmup Train [28][1090/3239]	Time 0.195 (0.243)	Data 0.001 (0.019)	Loss 4.2154 (4.1599)	Top-1 acc 24.609 (27.986)	Top-5 acc 49.609 (50.986)	lr 0.01138
Warmup Train [28][1100/3239]	Time 0.225 (0.242)	Data 0.001 (0.019)	Loss 4.1700 (4.1603)	Top-1 acc 29.297 (27.980)	Top-5 acc 51.953 (50.979)	lr 0.01137
Warmup Train [28][1110/3239]	Time 0.158 (0.242)	Data 0.001 (0.019)	Loss 4.1926 (4.1602)	Top-1 acc 26.172 (27.985)	Top-5 acc 48.438 (50.975)	lr 0.01137
Warmup Train [28][1120/3239]	Time 0.309 (0.242)	Data 0.001 (0.019)	Loss 4.1736 (4.1605)	Top-1 acc 28.516 (27.978)	Top-5 acc 51.953 (50.966)	lr 0.01136
Warmup Train [28][1130/3239]	Time 0.214 (0.242)	Data 0.001 (0.018)	Loss 4.0501 (4.1604)	Top-1 acc 29.297 (27.976)	Top-5 acc 53.906 (50.967)	lr 0.01136
Warmup Train [28][1140/3239]	Time 0.191 (0.242)	Data 0.001 (0.018)	Loss 4.1167 (4.1603)	Top-1 acc 36.328 (27.985)	Top-5 acc 53.516 (50.975)	lr 0.01135
Warmup Train [28][1150/3239]	Time 0.154 (0.242)	Data 0.001 (0.018)	Loss 4.1631 (4.1601)	Top-1 acc 23.828 (27.987)	Top-5 acc 48.828 (50.972)	lr 0.01135
Warmup Train [28][1160/3239]	Time 0.387 (0.241)	Data 0.003 (0.018)	Loss 4.0825 (4.1600)	Top-1 acc 29.297 (27.997)	Top-5 acc 55.859 (50.980)	lr 0.01134
Warmup Train [28][1170/3239]	Time 0.370 (0.241)	Data 0.001 (0.018)	Loss 4.1793 (4.1602)	Top-1 acc 25.391 (27.996)	Top-5 acc 50.391 (50.974)	lr 0.01134
Warmup Train [28][1180/3239]	Time 0.377 (0.241)	Data 0.003 (0.018)	Loss 3.9521 (4.1605)	Top-1 acc 33.594 (27.992)	Top-5 acc 56.250 (50.968)	lr 0.01133
Warmup Train [28][1190/3239]	Time 0.186 (0.241)	Data 0.001 (0.018)	Loss 4.2556 (4.1603)	Top-1 acc 28.516 (28.008)	Top-5 acc 54.297 (50.983)	lr 0.01133
Warmup Train [28][1200/3239]	Time 0.188 (0.241)	Data 0.002 (0.018)	Loss 4.2209 (4.1608)	Top-1 acc 29.297 (28.001)	Top-5 acc 46.875 (50.968)	lr 0.01132
Warmup Train [28][1210/3239]	Time 0.226 (0.241)	Data 0.001 (0.017)	Loss 4.2894 (4.1605)	Top-1 acc 26.172 (28.003)	Top-5 acc 46.484 (50.976)	lr 0.01132
Warmup Train [28][1220/3239]	Time 0.205 (0.241)	Data 0.001 (0.017)	Loss 4.1832 (4.1607)	Top-1 acc 28.125 (27.998)	Top-5 acc 52.734 (50.973)	lr 0.01131
Warmup Train [28][1230/3239]	Time 0.218 (0.240)	Data 0.001 (0.017)	Loss 4.1326 (4.1602)	Top-1 acc 27.734 (28.005)	Top-5 acc 51.953 (50.988)	lr 0.01131
Warmup Train [28][1240/3239]	Time 0.157 (0.240)	Data 0.001 (0.017)	Loss 4.2809 (4.1605)	Top-1 acc 23.438 (27.990)	Top-5 acc 46.484 (50.978)	lr 0.01130
Warmup Train [28][1250/3239]	Time 0.223 (0.240)	Data 0.002 (0.017)	Loss 3.8286 (4.1602)	Top-1 acc 34.375 (27.989)	Top-5 acc 56.250 (50.984)	lr 0.01130
Warmup Train [28][1260/3239]	Time 0.173 (0.240)	Data 0.002 (0.017)	Loss 4.0865 (4.1602)	Top-1 acc 28.906 (27.988)	Top-5 acc 51.172 (50.984)	lr 0.01129
Warmup Train [28][1270/3239]	Time 0.187 (0.240)	Data 0.001 (0.017)	Loss 3.9868 (4.1604)	Top-1 acc 30.469 (27.986)	Top-5 acc 57.812 (50.980)	lr 0.01129
Warmup Train [28][1280/3239]	Time 0.227 (0.240)	Data 0.001 (0.017)	Loss 4.2907 (4.1607)	Top-1 acc 22.266 (27.980)	Top-5 acc 44.141 (50.976)	lr 0.01128
Warmup Train [28][1290/3239]	Time 0.333 (0.240)	Data 0.001 (0.017)	Loss 4.1483 (4.1610)	Top-1 acc 27.344 (27.972)	Top-5 acc 49.609 (50.970)	lr 0.01128
Warmup Train [28][1300/3239]	Time 0.232 (0.240)	Data 0.001 (0.016)	Loss 4.1426 (4.1608)	Top-1 acc 26.953 (27.974)	Top-5 acc 48.438 (50.976)	lr 0.01127
Warmup Train [28][1310/3239]	Time 0.226 (0.240)	Data 0.003 (0.016)	Loss 4.3483 (4.1611)	Top-1 acc 24.609 (27.977)	Top-5 acc 43.359 (50.966)	lr 0.01127
Warmup Train [28][1320/3239]	Time 0.190 (0.239)	Data 0.001 (0.016)	Loss 4.1708 (4.1610)	Top-1 acc 28.125 (27.975)	Top-5 acc 52.734 (50.963)	lr 0.01126
Warmup Train [28][1330/3239]	Time 0.269 (0.239)	Data 0.002 (0.016)	Loss 4.2529 (4.1610)	Top-1 acc 23.438 (27.974)	Top-5 acc 48.047 (50.961)	lr 0.01126
Warmup Train [28][1340/3239]	Time 0.243 (0.239)	Data 0.001 (0.016)	Loss 3.9864 (4.1607)	Top-1 acc 31.250 (27.977)	Top-5 acc 56.250 (50.968)	lr 0.01125
Warmup Train [28][1350/3239]	Time 0.193 (0.239)	Data 0.001 (0.016)	Loss 4.0781 (4.1612)	Top-1 acc 26.562 (27.959)	Top-5 acc 55.469 (50.964)	lr 0.01125
Warmup Train [28][1360/3239]	Time 0.200 (0.239)	Data 0.001 (0.016)	Loss 4.1963 (4.1611)	Top-1 acc 27.734 (27.956)	Top-5 acc 52.344 (50.970)	lr 0.01124
Warmup Train [28][1370/3239]	Time 0.234 (0.239)	Data 0.002 (0.016)	Loss 4.0407 (4.1608)	Top-1 acc 32.812 (27.962)	Top-5 acc 52.734 (50.977)	lr 0.01124
Warmup Train [28][1380/3239]	Time 0.352 (0.239)	Data 0.002 (0.016)	Loss 4.1764 (4.1609)	Top-1 acc 28.125 (27.957)	Top-5 acc 51.562 (50.974)	lr 0.01123
Warmup Train [28][1390/3239]	Time 0.207 (0.239)	Data 0.001 (0.016)	Loss 4.2117 (4.1609)	Top-1 acc 25.781 (27.957)	Top-5 acc 50.781 (50.968)	lr 0.01123
Warmup Train [28][1400/3239]	Time 0.201 (0.239)	Data 0.001 (0.015)	Loss 4.2683 (4.1606)	Top-1 acc 26.172 (27.962)	Top-5 acc 45.703 (50.969)	lr 0.01122
Warmup Train [28][1410/3239]	Time 0.182 (0.238)	Data 0.001 (0.015)	Loss 4.1379 (4.1607)	Top-1 acc 26.953 (27.958)	Top-5 acc 51.172 (50.966)	lr 0.01122
Warmup Train [28][1420/3239]	Time 0.258 (0.238)	Data 0.001 (0.015)	Loss 4.2694 (4.1609)	Top-1 acc 30.469 (27.960)	Top-5 acc 48.828 (50.962)	lr 0.01121
Warmup Train [28][1430/3239]	Time 0.297 (0.238)	Data 0.002 (0.015)	Loss 4.0116 (4.1605)	Top-1 acc 32.031 (27.976)	Top-5 acc 51.562 (50.972)	lr 0.01121
Warmup Train [28][1440/3239]	Time 0.183 (0.238)	Data 0.002 (0.015)	Loss 4.1892 (4.1602)	Top-1 acc 26.172 (27.985)	Top-5 acc 49.609 (50.976)	lr 0.01120
Warmup Train [28][1450/3239]	Time 0.195 (0.238)	Data 0.001 (0.015)	Loss 4.0344 (4.1600)	Top-1 acc 28.125 (27.985)	Top-5 acc 50.000 (50.975)	lr 0.01120
Warmup Train [28][1460/3239]	Time 0.204 (0.238)	Data 0.002 (0.015)	Loss 4.3455 (4.1604)	Top-1 acc 25.781 (27.979)	Top-5 acc 47.266 (50.966)	lr 0.01119
Warmup Train [28][1470/3239]	Time 0.154 (0.238)	Data 0.001 (0.015)	Loss 4.1640 (4.1600)	Top-1 acc 26.562 (27.991)	Top-5 acc 52.344 (50.974)	lr 0.01119
Warmup Train [28][1480/3239]	Time 0.232 (0.238)	Data 0.001 (0.015)	Loss 3.9169 (4.1595)	Top-1 acc 35.547 (28.004)	Top-5 acc 58.984 (50.981)	lr 0.01118
Warmup Train [28][1490/3239]	Time 0.344 (0.238)	Data 0.002 (0.015)	Loss 4.1789 (4.1594)	Top-1 acc 29.688 (28.013)	Top-5 acc 54.297 (50.993)	lr 0.01118
Warmup Train [28][1500/3239]	Time 0.390 (0.238)	Data 0.003 (0.015)	Loss 4.3192 (4.1594)	Top-1 acc 24.219 (28.013)	Top-5 acc 46.484 (50.992)	lr 0.01117
Warmup Train [28][1510/3239]	Time 0.199 (0.238)	Data 0.002 (0.015)	Loss 4.0943 (4.1594)	Top-1 acc 30.469 (28.018)	Top-5 acc 50.781 (50.984)	lr 0.01117
Warmup Train [28][1520/3239]	Time 0.219 (0.237)	Data 0.001 (0.014)	Loss 4.2506 (4.1599)	Top-1 acc 24.609 (28.011)	Top-5 acc 50.781 (50.981)	lr 0.01116
Warmup Train [28][1530/3239]	Time 0.203 (0.237)	Data 0.001 (0.014)	Loss 4.0196 (4.1596)	Top-1 acc 31.250 (28.015)	Top-5 acc 53.125 (50.985)	lr 0.01116
Warmup Train [28][1540/3239]	Time 0.186 (0.237)	Data 0.001 (0.014)	Loss 4.1839 (4.1596)	Top-1 acc 25.781 (28.011)	Top-5 acc 49.609 (50.993)	lr 0.01115
Warmup Train [28][1550/3239]	Time 0.240 (0.237)	Data 0.001 (0.014)	Loss 4.1546 (4.1598)	Top-1 acc 25.000 (28.009)	Top-5 acc 48.828 (50.991)	lr 0.01115
Warmup Train [28][1560/3239]	Time 0.174 (0.237)	Data 0.001 (0.014)	Loss 4.2961 (4.1595)	Top-1 acc 26.953 (28.020)	Top-5 acc 46.875 (50.996)	lr 0.01114
Warmup Train [28][1570/3239]	Time 0.230 (0.237)	Data 0.001 (0.014)	Loss 4.3138 (4.1599)	Top-1 acc 27.344 (28.016)	Top-5 acc 46.094 (50.986)	lr 0.01114
Warmup Train [28][1580/3239]	Time 0.206 (0.237)	Data 0.001 (0.014)	Loss 4.2057 (4.1598)	Top-1 acc 27.734 (28.017)	Top-5 acc 49.219 (50.986)	lr 0.01113
Warmup Train [28][1590/3239]	Time 0.225 (0.237)	Data 0.001 (0.014)	Loss 4.0972 (4.1599)	Top-1 acc 30.859 (28.017)	Top-5 acc 52.734 (50.987)	lr 0.01113
Warmup Train [28][1600/3239]	Time 0.376 (0.237)	Data 0.001 (0.014)	Loss 4.3236 (4.1601)	Top-1 acc 26.172 (28.014)	Top-5 acc 46.484 (50.980)	lr 0.01112
Warmup Train [28][1610/3239]	Time 0.242 (0.237)	Data 0.001 (0.014)	Loss 4.0991 (4.1598)	Top-1 acc 30.469 (28.016)	Top-5 acc 50.391 (50.985)	lr 0.01112
Warmup Train [28][1620/3239]	Time 0.205 (0.237)	Data 0.001 (0.014)	Loss 4.1004 (4.1597)	Top-1 acc 29.297 (28.024)	Top-5 acc 53.516 (50.991)	lr 0.01111
Warmup Train [28][1630/3239]	Time 0.165 (0.237)	Data 0.001 (0.014)	Loss 4.2922 (4.1597)	Top-1 acc 24.609 (28.021)	Top-5 acc 49.219 (50.995)	lr 0.01111
Warmup Train [28][1640/3239]	Time 0.183 (0.236)	Data 0.001 (0.014)	Loss 3.9158 (4.1598)	Top-1 acc 34.375 (28.025)	Top-5 acc 58.594 (50.997)	lr 0.01110
Warmup Train [28][1650/3239]	Time 0.173 (0.236)	Data 0.001 (0.014)	Loss 4.0093 (4.1597)	Top-1 acc 29.297 (28.026)	Top-5 acc 54.688 (50.998)	lr 0.01110
Warmup Train [28][1660/3239]	Time 0.229 (0.236)	Data 0.001 (0.014)	Loss 4.0474 (4.1601)	Top-1 acc 35.156 (28.020)	Top-5 acc 53.516 (50.997)	lr 0.01109
Warmup Train [28][1670/3239]	Time 0.219 (0.236)	Data 0.001 (0.013)	Loss 4.3208 (4.1605)	Top-1 acc 25.000 (28.007)	Top-5 acc 46.484 (50.989)	lr 0.01109
Warmup Train [28][1680/3239]	Time 0.208 (0.236)	Data 0.001 (0.013)	Loss 4.1617 (4.1608)	Top-1 acc 26.172 (28.003)	Top-5 acc 51.562 (50.985)	lr 0.01108
Warmup Train [28][1690/3239]	Time 0.143 (0.236)	Data 0.002 (0.013)	Loss 4.1829 (4.1607)	Top-1 acc 26.953 (28.001)	Top-5 acc 50.391 (50.988)	lr 0.01108
Warmup Train [28][1700/3239]	Time 0.207 (0.236)	Data 0.002 (0.013)	Loss 4.1459 (4.1608)	Top-1 acc 24.219 (28.000)	Top-5 acc 51.562 (50.985)	lr 0.01107
Warmup Train [28][1710/3239]	Time 0.307 (0.236)	Data 0.001 (0.013)	Loss 4.2572 (4.1611)	Top-1 acc 26.172 (27.993)	Top-5 acc 48.047 (50.983)	lr 0.01107
Warmup Train [28][1720/3239]	Time 0.207 (0.236)	Data 0.001 (0.013)	Loss 4.0983 (4.1610)	Top-1 acc 32.031 (27.999)	Top-5 acc 55.469 (50.993)	lr 0.01106
Warmup Train [28][1730/3239]	Time 0.224 (0.236)	Data 0.001 (0.013)	Loss 4.0717 (4.1613)	Top-1 acc 30.859 (27.996)	Top-5 acc 48.828 (50.983)	lr 0.01106
Warmup Train [28][1740/3239]	Time 0.225 (0.236)	Data 0.001 (0.013)	Loss 4.2599 (4.1610)	Top-1 acc 27.344 (27.997)	Top-5 acc 48.828 (50.992)	lr 0.01105
Warmup Train [28][1750/3239]	Time 0.228 (0.236)	Data 0.001 (0.013)	Loss 3.9490 (4.1609)	Top-1 acc 29.297 (28.003)	Top-5 acc 57.031 (51.003)	lr 0.01105
Warmup Train [28][1760/3239]	Time 0.214 (0.236)	Data 0.001 (0.013)	Loss 3.9782 (4.1608)	Top-1 acc 29.297 (27.999)	Top-5 acc 54.297 (51.003)	lr 0.01104
Warmup Train [28][1770/3239]	Time 0.173 (0.236)	Data 0.001 (0.013)	Loss 4.1545 (4.1609)	Top-1 acc 28.516 (28.002)	Top-5 acc 51.172 (51.007)	lr 0.01104
Warmup Train [28][1780/3239]	Time 0.223 (0.236)	Data 0.001 (0.013)	Loss 4.1837 (4.1608)	Top-1 acc 32.422 (28.007)	Top-5 acc 51.172 (51.012)	lr 0.01103
Warmup Train [28][1790/3239]	Time 0.161 (0.235)	Data 0.002 (0.013)	Loss 4.2928 (4.1608)	Top-1 acc 24.219 (28.003)	Top-5 acc 45.703 (51.015)	lr 0.01103
Warmup Train [28][1800/3239]	Time 0.239 (0.235)	Data 0.001 (0.013)	Loss 4.0436 (4.1605)	Top-1 acc 30.859 (28.012)	Top-5 acc 57.422 (51.026)	lr 0.01102
Warmup Train [28][1810/3239]	Time 0.342 (0.235)	Data 0.001 (0.013)	Loss 3.9586 (4.1607)	Top-1 acc 30.469 (28.008)	Top-5 acc 53.906 (51.021)	lr 0.01102
Warmup Train [28][1820/3239]	Time 0.234 (0.235)	Data 0.001 (0.013)	Loss 4.0082 (4.1603)	Top-1 acc 26.562 (28.009)	Top-5 acc 53.906 (51.028)	lr 0.01101
Warmup Train [28][1830/3239]	Time 0.189 (0.235)	Data 0.001 (0.013)	Loss 4.3100 (4.1601)	Top-1 acc 26.953 (28.010)	Top-5 acc 50.391 (51.033)	lr 0.01101
Warmup Train [28][1840/3239]	Time 0.227 (0.235)	Data 0.001 (0.012)	Loss 4.2541 (4.1602)	Top-1 acc 26.953 (28.006)	Top-5 acc 47.656 (51.031)	lr 0.01100
Warmup Train [28][1850/3239]	Time 0.222 (0.235)	Data 0.002 (0.012)	Loss 4.2752 (4.1602)	Top-1 acc 28.906 (28.011)	Top-5 acc 47.656 (51.034)	lr 0.01100
Warmup Train [28][1860/3239]	Time 0.234 (0.235)	Data 0.001 (0.012)	Loss 4.4869 (4.1603)	Top-1 acc 21.875 (28.005)	Top-5 acc 41.016 (51.034)	lr 0.01099
Warmup Train [28][1870/3239]	Time 0.185 (0.235)	Data 0.001 (0.012)	Loss 4.2838 (4.1602)	Top-1 acc 25.391 (28.009)	Top-5 acc 47.266 (51.032)	lr 0.01099
Warmup Train [28][1880/3239]	Time 0.206 (0.235)	Data 0.001 (0.012)	Loss 4.1258 (4.1599)	Top-1 acc 26.172 (28.017)	Top-5 acc 52.734 (51.039)	lr 0.01098
Warmup Train [28][1890/3239]	Time 0.189 (0.235)	Data 0.001 (0.012)	Loss 4.2740 (4.1604)	Top-1 acc 26.562 (28.009)	Top-5 acc 45.312 (51.026)	lr 0.01097
Warmup Train [28][1900/3239]	Time 0.199 (0.235)	Data 0.001 (0.012)	Loss 4.2058 (4.1604)	Top-1 acc 27.734 (28.011)	Top-5 acc 49.609 (51.024)	lr 0.01097
Warmup Train [28][1910/3239]	Time 0.278 (0.235)	Data 0.001 (0.012)	Loss 4.2280 (4.1601)	Top-1 acc 25.781 (28.018)	Top-5 acc 46.875 (51.029)	lr 0.01096
Warmup Train [28][1920/3239]	Time 0.203 (0.235)	Data 0.001 (0.012)	Loss 4.1674 (4.1598)	Top-1 acc 29.297 (28.023)	Top-5 acc 49.219 (51.037)	lr 0.01096
Warmup Train [28][1930/3239]	Time 0.173 (0.235)	Data 0.002 (0.012)	Loss 4.2241 (4.1595)	Top-1 acc 27.734 (28.023)	Top-5 acc 46.875 (51.043)	lr 0.01095
Warmup Train [28][1940/3239]	Time 0.215 (0.235)	Data 0.001 (0.012)	Loss 4.0840 (4.1596)	Top-1 acc 28.906 (28.020)	Top-5 acc 53.906 (51.041)	lr 0.01095
Warmup Train [28][1950/3239]	Time 0.223 (0.235)	Data 0.001 (0.012)	Loss 4.2199 (4.1595)	Top-1 acc 29.297 (28.018)	Top-5 acc 49.609 (51.043)	lr 0.01094
Warmup Train [28][1960/3239]	Time 0.245 (0.235)	Data 0.001 (0.012)	Loss 4.2326 (4.1596)	Top-1 acc 25.000 (28.012)	Top-5 acc 50.781 (51.042)	lr 0.01094
Warmup Train [28][1970/3239]	Time 0.229 (0.235)	Data 0.001 (0.012)	Loss 4.2631 (4.1597)	Top-1 acc 25.781 (28.002)	Top-5 acc 49.219 (51.036)	lr 0.01093
Warmup Train [28][1980/3239]	Time 0.244 (0.235)	Data 0.001 (0.012)	Loss 4.0784 (4.1594)	Top-1 acc 28.516 (28.003)	Top-5 acc 52.344 (51.039)	lr 0.01093
Warmup Train [28][1990/3239]	Time 0.246 (0.235)	Data 0.001 (0.012)	Loss 4.2927 (4.1595)	Top-1 acc 29.688 (28.005)	Top-5 acc 48.438 (51.034)	lr 0.01092
Warmup Train [28][2000/3239]	Time 0.193 (0.235)	Data 0.001 (0.012)	Loss 4.1211 (4.1594)	Top-1 acc 31.250 (28.003)	Top-5 acc 52.734 (51.031)	lr 0.01092
Warmup Train [28][2010/3239]	Time 0.339 (0.235)	Data 0.001 (0.012)	Loss 3.9969 (4.1596)	Top-1 acc 32.422 (28.002)	Top-5 acc 51.953 (51.029)	lr 0.01091
Warmup Train [28][2020/3239]	Time 0.188 (0.234)	Data 0.001 (0.012)	Loss 4.1620 (4.1597)	Top-1 acc 29.688 (28.001)	Top-5 acc 50.391 (51.026)	lr 0.01091
Warmup Train [28][2030/3239]	Time 0.178 (0.234)	Data 0.001 (0.012)	Loss 4.1701 (4.1597)	Top-1 acc 25.000 (28.001)	Top-5 acc 52.734 (51.026)	lr 0.01090
Warmup Train [28][2040/3239]	Time 0.227 (0.234)	Data 0.001 (0.011)	Loss 4.3500 (4.1597)	Top-1 acc 26.172 (28.000)	Top-5 acc 45.703 (51.025)	lr 0.01090
Warmup Train [28][2050/3239]	Time 0.189 (0.234)	Data 0.002 (0.011)	Loss 4.1502 (4.1595)	Top-1 acc 31.641 (28.009)	Top-5 acc 50.781 (51.027)	lr 0.01089
Warmup Train [28][2060/3239]	Time 0.197 (0.234)	Data 0.001 (0.011)	Loss 4.2163 (4.1595)	Top-1 acc 26.953 (28.002)	Top-5 acc 50.000 (51.027)	lr 0.01089
Warmup Train [28][2070/3239]	Time 0.216 (0.234)	Data 0.002 (0.011)	Loss 4.1826 (4.1595)	Top-1 acc 26.562 (28.001)	Top-5 acc 53.906 (51.029)	lr 0.01088
Warmup Train [28][2080/3239]	Time 0.184 (0.234)	Data 0.001 (0.011)	Loss 4.3695 (4.1595)	Top-1 acc 22.656 (28.002)	Top-5 acc 44.141 (51.025)	lr 0.01088
Warmup Train [28][2090/3239]	Time 0.287 (0.234)	Data 0.001 (0.011)	Loss 4.2110 (4.1596)	Top-1 acc 26.953 (27.996)	Top-5 acc 51.953 (51.020)	lr 0.01087
Warmup Train [28][2100/3239]	Time 0.185 (0.234)	Data 0.001 (0.011)	Loss 4.2768 (4.1596)	Top-1 acc 27.344 (27.999)	Top-5 acc 51.562 (51.023)	lr 0.01087
Warmup Train [28][2110/3239]	Time 0.322 (0.234)	Data 0.001 (0.011)	Loss 4.2593 (4.1600)	Top-1 acc 28.125 (27.991)	Top-5 acc 49.609 (51.014)	lr 0.01086
Warmup Train [28][2120/3239]	Time 0.209 (0.234)	Data 0.001 (0.011)	Loss 4.1056 (4.1597)	Top-1 acc 30.469 (27.993)	Top-5 acc 50.781 (51.023)	lr 0.01086
Warmup Train [28][2130/3239]	Time 0.212 (0.234)	Data 0.001 (0.011)	Loss 4.0396 (4.1595)	Top-1 acc 30.859 (27.998)	Top-5 acc 51.953 (51.028)	lr 0.01085
Warmup Train [28][2140/3239]	Time 0.243 (0.234)	Data 0.001 (0.011)	Loss 4.0309 (4.1593)	Top-1 acc 31.641 (27.999)	Top-5 acc 51.562 (51.033)	lr 0.01085
Warmup Train [28][2150/3239]	Time 0.219 (0.234)	Data 0.001 (0.011)	Loss 4.3409 (4.1593)	Top-1 acc 25.391 (27.999)	Top-5 acc 47.656 (51.039)	lr 0.01084
Warmup Train [28][2160/3239]	Time 0.181 (0.234)	Data 0.002 (0.011)	Loss 4.1375 (4.1592)	Top-1 acc 29.688 (28.002)	Top-5 acc 50.000 (51.046)	lr 0.01084
Warmup Train [28][2170/3239]	Time 0.222 (0.234)	Data 0.001 (0.011)	Loss 4.1453 (4.1593)	Top-1 acc 26.172 (28.001)	Top-5 acc 50.391 (51.041)	lr 0.01083
Warmup Train [28][2180/3239]	Time 0.220 (0.234)	Data 0.001 (0.011)	Loss 4.0241 (4.1586)	Top-1 acc 29.297 (28.015)	Top-5 acc 51.172 (51.056)	lr 0.01083
Warmup Train [28][2190/3239]	Time 0.232 (0.234)	Data 0.002 (0.011)	Loss 4.0167 (4.1589)	Top-1 acc 29.688 (28.007)	Top-5 acc 52.734 (51.053)	lr 0.01082
Warmup Train [28][2200/3239]	Time 0.305 (0.234)	Data 0.001 (0.011)	Loss 4.1668 (4.1591)	Top-1 acc 29.297 (28.003)	Top-5 acc 49.219 (51.049)	lr 0.01082
Warmup Train [28][2210/3239]	Time 0.344 (0.233)	Data 0.001 (0.011)	Loss 4.1557 (4.1591)	Top-1 acc 28.516 (28.005)	Top-5 acc 51.172 (51.049)	lr 0.01081
Warmup Train [28][2220/3239]	Time 0.227 (0.233)	Data 0.001 (0.011)	Loss 4.1321 (4.1592)	Top-1 acc 27.734 (28.002)	Top-5 acc 48.438 (51.045)	lr 0.01081
Warmup Train [28][2230/3239]	Time 0.247 (0.233)	Data 0.001 (0.011)	Loss 3.9931 (4.1592)	Top-1 acc 30.078 (28.001)	Top-5 acc 54.297 (51.044)	lr 0.01080
Warmup Train [28][2240/3239]	Time 0.225 (0.233)	Data 0.001 (0.011)	Loss 4.0685 (4.1592)	Top-1 acc 31.641 (28.002)	Top-5 acc 53.125 (51.047)	lr 0.01080
Warmup Train [28][2250/3239]	Time 0.188 (0.233)	Data 0.002 (0.011)	Loss 4.0360 (4.1590)	Top-1 acc 27.734 (28.002)	Top-5 acc 51.172 (51.050)	lr 0.01079
Warmup Train [28][2260/3239]	Time 0.197 (0.233)	Data 0.001 (0.011)	Loss 4.4245 (4.1592)	Top-1 acc 22.656 (28.001)	Top-5 acc 43.359 (51.046)	lr 0.01079
Warmup Train [28][2270/3239]	Time 0.238 (0.233)	Data 0.001 (0.011)	Loss 4.0726 (4.1592)	Top-1 acc 31.641 (28.000)	Top-5 acc 51.172 (51.042)	lr 0.01078
Warmup Train [28][2280/3239]	Time 0.251 (0.233)	Data 0.001 (0.011)	Loss 3.9542 (4.1590)	Top-1 acc 31.641 (28.003)	Top-5 acc 55.469 (51.046)	lr 0.01078
Warmup Train [28][2290/3239]	Time 0.199 (0.233)	Data 0.002 (0.011)	Loss 4.1257 (4.1592)	Top-1 acc 28.516 (27.994)	Top-5 acc 51.953 (51.039)	lr 0.01077
Warmup Train [28][2300/3239]	Time 0.233 (0.233)	Data 0.001 (0.010)	Loss 4.1781 (4.1590)	Top-1 acc 28.516 (27.993)	Top-5 acc 47.266 (51.036)	lr 0.01077
Warmup Train [28][2310/3239]	Time 0.342 (0.233)	Data 0.001 (0.010)	Loss 4.3136 (4.1588)	Top-1 acc 25.781 (27.990)	Top-5 acc 49.219 (51.038)	lr 0.01077
Warmup Train [28][2320/3239]	Time 0.320 (0.233)	Data 0.001 (0.010)	Loss 4.2357 (4.1588)	Top-1 acc 28.125 (27.992)	Top-5 acc 51.172 (51.037)	lr 0.01076
Warmup Train [28][2330/3239]	Time 0.157 (0.233)	Data 0.002 (0.010)	Loss 4.2492 (4.1593)	Top-1 acc 26.172 (27.981)	Top-5 acc 50.000 (51.026)	lr 0.01076
Warmup Train [28][2340/3239]	Time 0.152 (0.233)	Data 0.001 (0.010)	Loss 4.0434 (4.1592)	Top-1 acc 33.203 (27.985)	Top-5 acc 55.078 (51.025)	lr 0.01075
Warmup Train [28][2350/3239]	Time 0.144 (0.233)	Data 0.001 (0.010)	Loss 4.1170 (4.1590)	Top-1 acc 27.734 (27.992)	Top-5 acc 52.734 (51.030)	lr 0.01075
Warmup Train [28][2360/3239]	Time 0.215 (0.233)	Data 0.001 (0.010)	Loss 4.1886 (4.1590)	Top-1 acc 26.172 (27.990)	Top-5 acc 50.391 (51.028)	lr 0.01074
Warmup Train [28][2370/3239]	Time 0.234 (0.233)	Data 0.001 (0.010)	Loss 4.2449 (4.1592)	Top-1 acc 27.734 (27.987)	Top-5 acc 49.609 (51.023)	lr 0.01074
Warmup Train [28][2380/3239]	Time 0.209 (0.233)	Data 0.001 (0.010)	Loss 4.2439 (4.1593)	Top-1 acc 23.438 (27.985)	Top-5 acc 45.703 (51.017)	lr 0.01073
Warmup Train [28][2390/3239]	Time 0.216 (0.233)	Data 0.002 (0.010)	Loss 4.0577 (4.1590)	Top-1 acc 32.422 (27.994)	Top-5 acc 56.641 (51.027)	lr 0.01073
Warmup Train [28][2400/3239]	Time 0.196 (0.233)	Data 0.001 (0.010)	Loss 4.0979 (4.1590)	Top-1 acc 30.078 (27.993)	Top-5 acc 51.562 (51.028)	lr 0.01072
Warmup Train [28][2410/3239]	Time 0.204 (0.233)	Data 0.001 (0.010)	Loss 4.1729 (4.1589)	Top-1 acc 28.906 (27.997)	Top-5 acc 49.609 (51.031)	lr 0.01072
Warmup Train [28][2420/3239]	Time 0.315 (0.233)	Data 0.001 (0.010)	Loss 4.1118 (4.1591)	Top-1 acc 29.297 (27.992)	Top-5 acc 50.781 (51.025)	lr 0.01071
Warmup Train [28][2430/3239]	Time 0.280 (0.233)	Data 0.002 (0.010)	Loss 4.2670 (4.1591)	Top-1 acc 24.609 (27.990)	Top-5 acc 51.953 (51.024)	lr 0.01071
Warmup Train [28][2440/3239]	Time 0.286 (0.232)	Data 0.001 (0.010)	Loss 3.9383 (4.1592)	Top-1 acc 32.031 (27.987)	Top-5 acc 60.547 (51.025)	lr 0.01070
Warmup Train [28][2450/3239]	Time 0.277 (0.232)	Data 0.001 (0.010)	Loss 4.2802 (4.1594)	Top-1 acc 26.953 (27.984)	Top-5 acc 49.219 (51.020)	lr 0.01070
Warmup Train [28][2460/3239]	Time 0.208 (0.232)	Data 0.001 (0.010)	Loss 4.3583 (4.1597)	Top-1 acc 28.906 (27.980)	Top-5 acc 47.656 (51.013)	lr 0.01069
Warmup Train [28][2470/3239]	Time 0.255 (0.232)	Data 0.001 (0.010)	Loss 4.0838 (4.1596)	Top-1 acc 26.953 (27.981)	Top-5 acc 53.906 (51.013)	lr 0.01069
Warmup Train [28][2480/3239]	Time 0.225 (0.232)	Data 0.002 (0.010)	Loss 4.2870 (4.1596)	Top-1 acc 24.609 (27.983)	Top-5 acc 49.609 (51.015)	lr 0.01068
Warmup Train [28][2490/3239]	Time 0.266 (0.232)	Data 0.001 (0.010)	Loss 4.1983 (4.1595)	Top-1 acc 24.219 (27.983)	Top-5 acc 49.219 (51.015)	lr 0.01068
Warmup Train [28][2500/3239]	Time 0.149 (0.232)	Data 0.001 (0.010)	Loss 4.0759 (4.1595)	Top-1 acc 29.297 (27.981)	Top-5 acc 51.562 (51.016)	lr 0.01067
Warmup Train [28][2510/3239]	Time 0.210 (0.232)	Data 0.001 (0.010)	Loss 4.3802 (4.1595)	Top-1 acc 21.094 (27.979)	Top-5 acc 47.656 (51.018)	lr 0.01067
Warmup Train [28][2520/3239]	Time 0.302 (0.232)	Data 0.001 (0.010)	Loss 4.2689 (4.1594)	Top-1 acc 22.656 (27.978)	Top-5 acc 46.094 (51.019)	lr 0.01066
Warmup Train [28][2530/3239]	Time 0.241 (0.232)	Data 0.001 (0.010)	Loss 4.1388 (4.1593)	Top-1 acc 29.297 (27.980)	Top-5 acc 50.000 (51.020)	lr 0.01066
Warmup Train [28][2540/3239]	Time 0.228 (0.232)	Data 0.001 (0.010)	Loss 4.0172 (4.1593)	Top-1 acc 31.641 (27.981)	Top-5 acc 55.469 (51.022)	lr 0.01065
Warmup Train [28][2550/3239]	Time 0.216 (0.232)	Data 0.001 (0.010)	Loss 4.2404 (4.1593)	Top-1 acc 23.828 (27.982)	Top-5 acc 49.609 (51.023)	lr 0.01065
Warmup Train [28][2560/3239]	Time 0.296 (0.232)	Data 0.001 (0.010)	Loss 4.1003 (4.1595)	Top-1 acc 26.562 (27.980)	Top-5 acc 50.391 (51.020)	lr 0.01064
Warmup Train [28][2570/3239]	Time 0.222 (0.232)	Data 0.001 (0.010)	Loss 4.2131 (4.1593)	Top-1 acc 26.562 (27.981)	Top-5 acc 45.703 (51.021)	lr 0.01064
Warmup Train [28][2580/3239]	Time 0.166 (0.232)	Data 0.001 (0.010)	Loss 4.1893 (4.1592)	Top-1 acc 27.734 (27.981)	Top-5 acc 49.609 (51.021)	lr 0.01063
Warmup Train [28][2590/3239]	Time 0.191 (0.232)	Data 0.001 (0.010)	Loss 4.2972 (4.1593)	Top-1 acc 26.562 (27.977)	Top-5 acc 48.438 (51.017)	lr 0.01063
Warmup Train [28][2600/3239]	Time 0.313 (0.232)	Data 0.002 (0.010)	Loss 4.1782 (4.1593)	Top-1 acc 25.000 (27.980)	Top-5 acc 51.172 (51.014)	lr 0.01062
Warmup Train [28][2610/3239]	Time 0.150 (0.232)	Data 0.001 (0.010)	Loss 4.1754 (4.1592)	Top-1 acc 28.516 (27.982)	Top-5 acc 53.125 (51.019)	lr 0.01062
Warmup Train [28][2620/3239]	Time 0.225 (0.232)	Data 0.003 (0.010)	Loss 3.8100 (4.1589)	Top-1 acc 35.547 (27.988)	Top-5 acc 59.766 (51.023)	lr 0.01061
Warmup Train [28][2630/3239]	Time 0.322 (0.232)	Data 0.001 (0.010)	Loss 4.3271 (4.1590)	Top-1 acc 21.484 (27.984)	Top-5 acc 44.922 (51.017)	lr 0.01061
Warmup Train [28][2640/3239]	Time 0.202 (0.232)	Data 0.002 (0.010)	Loss 4.1962 (4.1590)	Top-1 acc 23.438 (27.985)	Top-5 acc 51.562 (51.019)	lr 0.01060
Warmup Train [28][2650/3239]	Time 0.204 (0.232)	Data 0.001 (0.010)	Loss 4.1762 (4.1590)	Top-1 acc 24.609 (27.982)	Top-5 acc 52.344 (51.018)	lr 0.01060
Warmup Train [28][2660/3239]	Time 0.248 (0.232)	Data 0.001 (0.010)	Loss 4.3471 (4.1591)	Top-1 acc 22.656 (27.983)	Top-5 acc 47.266 (51.017)	lr 0.01059
Warmup Train [28][2670/3239]	Time 0.184 (0.232)	Data 0.001 (0.009)	Loss 4.1044 (4.1590)	Top-1 acc 30.469 (27.987)	Top-5 acc 51.172 (51.020)	lr 0.01059
Warmup Train [28][2680/3239]	Time 0.238 (0.232)	Data 0.001 (0.009)	Loss 3.8957 (4.1589)	Top-1 acc 33.203 (27.989)	Top-5 acc 58.203 (51.022)	lr 0.01058
Warmup Train [28][2690/3239]	Time 0.229 (0.232)	Data 0.001 (0.009)	Loss 4.1617 (4.1587)	Top-1 acc 27.344 (27.993)	Top-5 acc 51.172 (51.029)	lr 0.01058
Warmup Train [28][2700/3239]	Time 0.240 (0.232)	Data 0.001 (0.009)	Loss 4.2048 (4.1586)	Top-1 acc 24.219 (27.990)	Top-5 acc 54.297 (51.027)	lr 0.01057
Warmup Train [28][2710/3239]	Time 0.246 (0.232)	Data 0.001 (0.009)	Loss 4.1725 (4.1585)	Top-1 acc 27.344 (27.991)	Top-5 acc 50.781 (51.029)	lr 0.01057
Warmup Train [28][2720/3239]	Time 0.295 (0.232)	Data 0.001 (0.009)	Loss 4.0636 (4.1583)	Top-1 acc 26.562 (27.992)	Top-5 acc 53.906 (51.029)	lr 0.01056
Warmup Train [28][2730/3239]	Time 0.197 (0.232)	Data 0.001 (0.009)	Loss 4.2279 (4.1587)	Top-1 acc 31.641 (27.988)	Top-5 acc 50.391 (51.023)	lr 0.01056
Warmup Train [28][2740/3239]	Time 0.149 (0.232)	Data 0.001 (0.009)	Loss 4.2972 (4.1586)	Top-1 acc 23.438 (27.987)	Top-5 acc 48.047 (51.025)	lr 0.01055
Warmup Train [28][2750/3239]	Time 0.191 (0.232)	Data 0.002 (0.009)	Loss 3.9913 (4.1586)	Top-1 acc 29.688 (27.988)	Top-5 acc 56.250 (51.027)	lr 0.01055
Warmup Train [28][2760/3239]	Time 0.228 (0.232)	Data 0.001 (0.009)	Loss 3.9283 (4.1586)	Top-1 acc 31.250 (27.986)	Top-5 acc 55.859 (51.025)	lr 0.01054
Warmup Train [28][2770/3239]	Time 0.216 (0.232)	Data 0.001 (0.009)	Loss 4.2845 (4.1586)	Top-1 acc 28.125 (27.984)	Top-5 acc 47.656 (51.027)	lr 0.01054
Warmup Train [28][2780/3239]	Time 0.219 (0.232)	Data 0.001 (0.009)	Loss 4.2194 (4.1586)	Top-1 acc 23.828 (27.982)	Top-5 acc 47.266 (51.028)	lr 0.01053
Warmup Train [28][2790/3239]	Time 0.252 (0.232)	Data 0.002 (0.009)	Loss 4.0981 (4.1585)	Top-1 acc 29.688 (27.982)	Top-5 acc 55.859 (51.032)	lr 0.01053
Warmup Train [28][2800/3239]	Time 0.227 (0.232)	Data 0.002 (0.009)	Loss 3.9608 (4.1587)	Top-1 acc 29.688 (27.983)	Top-5 acc 55.078 (51.030)	lr 0.01052
Warmup Train [28][2810/3239]	Time 0.347 (0.232)	Data 0.002 (0.009)	Loss 4.1250 (4.1588)	Top-1 acc 28.516 (27.980)	Top-5 acc 52.344 (51.029)	lr 0.01052
Warmup Train [28][2820/3239]	Time 0.193 (0.232)	Data 0.001 (0.009)	Loss 4.1573 (4.1587)	Top-1 acc 26.953 (27.979)	Top-5 acc 46.484 (51.031)	lr 0.01051
Warmup Train [28][2830/3239]	Time 0.262 (0.232)	Data 0.001 (0.009)	Loss 4.4161 (4.1588)	Top-1 acc 22.656 (27.976)	Top-5 acc 46.875 (51.030)	lr 0.01051
Warmup Train [28][2840/3239]	Time 0.203 (0.232)	Data 0.001 (0.009)	Loss 4.2367 (4.1586)	Top-1 acc 25.000 (27.977)	Top-5 acc 50.000 (51.034)	lr 0.01050
Warmup Train [28][2850/3239]	Time 0.264 (0.232)	Data 0.001 (0.009)	Loss 4.1490 (4.1584)	Top-1 acc 28.125 (27.983)	Top-5 acc 51.172 (51.040)	lr 0.01050
Warmup Train [28][2860/3239]	Time 0.204 (0.232)	Data 0.001 (0.009)	Loss 4.2799 (4.1584)	Top-1 acc 26.172 (27.980)	Top-5 acc 50.000 (51.042)	lr 0.01049
Warmup Train [28][2870/3239]	Time 0.221 (0.232)	Data 0.001 (0.009)	Loss 4.3290 (4.1584)	Top-1 acc 24.219 (27.979)	Top-5 acc 46.094 (51.040)	lr 0.01049
Warmup Train [28][2880/3239]	Time 0.264 (0.231)	Data 0.002 (0.009)	Loss 4.1763 (4.1585)	Top-1 acc 25.781 (27.975)	Top-5 acc 50.781 (51.038)	lr 0.01048
Warmup Train [28][2890/3239]	Time 0.222 (0.231)	Data 0.001 (0.009)	Loss 4.0899 (4.1584)	Top-1 acc 29.297 (27.976)	Top-5 acc 57.422 (51.043)	lr 0.01048
Warmup Train [28][2900/3239]	Time 0.243 (0.231)	Data 0.001 (0.009)	Loss 4.1138 (4.1582)	Top-1 acc 27.734 (27.982)	Top-5 acc 51.562 (51.044)	lr 0.01047
Warmup Train [28][2910/3239]	Time 0.367 (0.231)	Data 0.001 (0.009)	Loss 4.1059 (4.1581)	Top-1 acc 29.688 (27.983)	Top-5 acc 52.734 (51.045)	lr 0.01047
Warmup Train [28][2920/3239]	Time 0.215 (0.231)	Data 0.001 (0.009)	Loss 4.1868 (4.1580)	Top-1 acc 24.219 (27.988)	Top-5 acc 51.953 (51.049)	lr 0.01046
Warmup Train [28][2930/3239]	Time 0.252 (0.231)	Data 0.002 (0.009)	Loss 4.1050 (4.1578)	Top-1 acc 30.469 (27.991)	Top-5 acc 54.688 (51.055)	lr 0.01046
Warmup Train [28][2940/3239]	Time 0.240 (0.231)	Data 0.002 (0.009)	Loss 4.3583 (4.1579)	Top-1 acc 26.953 (27.991)	Top-5 acc 46.094 (51.055)	lr 0.01045
Warmup Train [28][2950/3239]	Time 0.248 (0.231)	Data 0.001 (0.009)	Loss 4.2013 (4.1580)	Top-1 acc 24.219 (27.986)	Top-5 acc 49.609 (51.052)	lr 0.01045
Warmup Train [28][2960/3239]	Time 0.236 (0.231)	Data 0.001 (0.009)	Loss 4.1314 (4.1579)	Top-1 acc 25.000 (27.986)	Top-5 acc 51.953 (51.056)	lr 0.01044
Warmup Train [28][2970/3239]	Time 0.150 (0.231)	Data 0.001 (0.009)	Loss 4.1419 (4.1579)	Top-1 acc 30.078 (27.989)	Top-5 acc 53.125 (51.056)	lr 0.01044
Warmup Train [28][2980/3239]	Time 0.212 (0.231)	Data 0.001 (0.009)	Loss 4.1157 (4.1580)	Top-1 acc 28.906 (27.988)	Top-5 acc 53.516 (51.057)	lr 0.01043
Warmup Train [28][2990/3239]	Time 0.173 (0.231)	Data 0.001 (0.009)	Loss 4.1651 (4.1578)	Top-1 acc 27.344 (27.989)	Top-5 acc 51.172 (51.061)	lr 0.01043
Warmup Train [28][3000/3239]	Time 0.211 (0.231)	Data 0.001 (0.009)	Loss 4.1638 (4.1578)	Top-1 acc 28.906 (27.987)	Top-5 acc 51.562 (51.060)	lr 0.01042
Warmup Train [28][3010/3239]	Time 0.302 (0.231)	Data 0.001 (0.009)	Loss 4.1872 (4.1575)	Top-1 acc 29.297 (27.994)	Top-5 acc 51.172 (51.065)	lr 0.01042
Warmup Train [28][3020/3239]	Time 0.191 (0.231)	Data 0.001 (0.009)	Loss 4.1311 (4.1573)	Top-1 acc 27.344 (27.997)	Top-5 acc 53.125 (51.070)	lr 0.01041
Warmup Train [28][3030/3239]	Time 0.198 (0.231)	Data 0.001 (0.009)	Loss 4.2068 (4.1574)	Top-1 acc 25.391 (27.994)	Top-5 acc 50.391 (51.071)	lr 0.01041
Warmup Train [28][3040/3239]	Time 0.204 (0.231)	Data 0.001 (0.009)	Loss 3.9550 (4.1572)	Top-1 acc 32.812 (28.000)	Top-5 acc 57.812 (51.078)	lr 0.01040
Warmup Train [28][3050/3239]	Time 0.273 (0.231)	Data 0.001 (0.009)	Loss 4.1717 (4.1573)	Top-1 acc 26.953 (27.998)	Top-5 acc 48.438 (51.076)	lr 0.01040
Warmup Train [28][3060/3239]	Time 0.210 (0.231)	Data 0.001 (0.009)	Loss 4.0971 (4.1574)	Top-1 acc 29.297 (28.001)	Top-5 acc 52.734 (51.074)	lr 0.01039
Warmup Train [28][3070/3239]	Time 0.199 (0.231)	Data 0.002 (0.009)	Loss 4.2316 (4.1573)	Top-1 acc 29.297 (28.003)	Top-5 acc 46.094 (51.077)	lr 0.01039
Warmup Train [28][3080/3239]	Time 0.160 (0.231)	Data 0.001 (0.009)	Loss 4.1334 (4.1574)	Top-1 acc 26.562 (27.999)	Top-5 acc 50.391 (51.078)	lr 0.01038
Warmup Train [28][3090/3239]	Time 0.203 (0.231)	Data 0.001 (0.009)	Loss 4.2691 (4.1575)	Top-1 acc 28.125 (27.997)	Top-5 acc 44.531 (51.074)	lr 0.01038
Warmup Train [28][3100/3239]	Time 0.212 (0.231)	Data 0.003 (0.008)	Loss 4.0547 (4.1573)	Top-1 acc 30.078 (28.000)	Top-5 acc 55.859 (51.076)	lr 0.01037
Warmup Train [28][3110/3239]	Time 0.210 (0.231)	Data 0.042 (0.009)	Loss 4.2875 (4.1572)	Top-1 acc 26.562 (28.001)	Top-5 acc 48.438 (51.078)	lr 0.01037
Warmup Train [28][3120/3239]	Time 0.246 (0.231)	Data 0.001 (0.008)	Loss 3.9816 (4.1572)	Top-1 acc 30.078 (27.999)	Top-5 acc 55.859 (51.076)	lr 0.01036
Warmup Train [28][3130/3239]	Time 0.373 (0.231)	Data 0.002 (0.008)	Loss 4.0542 (4.1571)	Top-1 acc 25.781 (28.001)	Top-5 acc 53.125 (51.078)	lr 0.01036
Warmup Train [28][3140/3239]	Time 0.197 (0.231)	Data 0.001 (0.008)	Loss 4.0600 (4.1570)	Top-1 acc 29.297 (28.003)	Top-5 acc 51.562 (51.081)	lr 0.01035
Warmup Train [28][3150/3239]	Time 0.177 (0.231)	Data 0.002 (0.008)	Loss 4.2545 (4.1571)	Top-1 acc 25.391 (28.001)	Top-5 acc 49.609 (51.079)	lr 0.01035
Warmup Train [28][3160/3239]	Time 0.256 (0.231)	Data 0.001 (0.008)	Loss 4.0989 (4.1569)	Top-1 acc 28.906 (28.004)	Top-5 acc 55.859 (51.081)	lr 0.01034
Warmup Train [28][3170/3239]	Time 0.218 (0.231)	Data 0.001 (0.008)	Loss 4.0346 (4.1566)	Top-1 acc 30.469 (28.012)	Top-5 acc 53.906 (51.090)	lr 0.01034
Warmup Train [28][3180/3239]	Time 0.272 (0.231)	Data 0.000 (0.008)	Loss 4.2863 (4.1566)	Top-1 acc 25.391 (28.009)	Top-5 acc 48.047 (51.088)	lr 0.01033
Warmup Train [28][3190/3239]	Time 0.148 (0.231)	Data 0.000 (0.008)	Loss 3.9726 (4.1567)	Top-1 acc 33.594 (28.010)	Top-5 acc 55.078 (51.088)	lr 0.01033
Warmup Train [28][3200/3239]	Time 0.225 (0.231)	Data 0.000 (0.008)	Loss 4.1002 (4.1566)	Top-1 acc 29.688 (28.011)	Top-5 acc 52.344 (51.090)	lr 0.01032
Warmup Train [28][3210/3239]	Time 0.146 (0.231)	Data 0.000 (0.008)	Loss 4.0968 (4.1566)	Top-1 acc 31.250 (28.013)	Top-5 acc 51.953 (51.093)	lr 0.01032
Warmup Train [28][3220/3239]	Time 0.182 (0.231)	Data 0.000 (0.008)	Loss 4.2382 (4.1566)	Top-1 acc 29.297 (28.014)	Top-5 acc 49.609 (51.094)	lr 0.01032
Warmup Train [28][3230/3239]	Time 0.279 (0.231)	Data 0.000 (0.008)	Loss 4.0985 (4.1566)	Top-1 acc 26.953 (28.013)	Top-5 acc 49.609 (51.092)	lr 0.01031
Warmup Train [28][3239/3239]	Time 0.249 (0.230)	Data 0.000 (0.008)	Loss 4.1275 (4.1566)	Top-1 acc 27.160 (28.012)	Top-5 acc 53.086 (51.092)	lr 0.01031
==========Warmup Valid [28/40]	loss 3.104	top-1 acc 35.985	top-5 acc 60.552	Train top-1 28.012	top-5 51.092	flops: 442.4M
Warmup Train [29][0/3239]	Time 17.773 (17.773)	Data 16.157 (16.157)	Loss 4.2689 (4.2689)	Top-1 acc 25.000 (25.000)	Top-5 acc 46.484 (46.484)	lr 0.01031
Warmup Train [29][10/3239]	Time 0.261 (1.916)	Data 0.002 (1.472)	Loss 4.0562 (4.1177)	Top-1 acc 28.516 (28.018)	Top-5 acc 56.250 (51.989)	lr 0.01030
Warmup Train [29][20/3239]	Time 0.199 (1.131)	Data 0.001 (0.772)	Loss 3.9833 (4.1536)	Top-1 acc 31.250 (27.809)	Top-5 acc 53.906 (51.209)	lr 0.01030
Warmup Train [29][30/3239]	Time 0.315 (0.844)	Data 0.001 (0.525)	Loss 4.2203 (4.1498)	Top-1 acc 27.344 (27.823)	Top-5 acc 46.875 (51.134)	lr 0.01029
Warmup Train [29][40/3239]	Time 0.198 (0.691)	Data 0.001 (0.397)	Loss 4.0327 (4.1285)	Top-1 acc 26.562 (28.106)	Top-5 acc 55.078 (51.772)	lr 0.01029
Warmup Train [29][50/3239]	Time 0.159 (0.606)	Data 0.001 (0.320)	Loss 4.2456 (4.1384)	Top-1 acc 24.609 (28.102)	Top-5 acc 51.953 (51.585)	lr 0.01028
Warmup Train [29][60/3239]	Time 0.174 (0.541)	Data 0.001 (0.268)	Loss 4.0105 (4.1379)	Top-1 acc 28.125 (28.099)	Top-5 acc 54.297 (51.646)	lr 0.01028
Warmup Train [29][70/3239]	Time 0.240 (0.496)	Data 0.002 (0.231)	Loss 4.2029 (4.1395)	Top-1 acc 25.781 (28.147)	Top-5 acc 50.391 (51.590)	lr 0.01027
Warmup Train [29][80/3239]	Time 0.211 (0.463)	Data 0.001 (0.204)	Loss 4.1447 (4.1362)	Top-1 acc 26.953 (28.241)	Top-5 acc 52.734 (51.543)	lr 0.01027
Warmup Train [29][90/3239]	Time 0.259 (0.437)	Data 0.002 (0.181)	Loss 4.1275 (4.1329)	Top-1 acc 25.781 (28.284)	Top-5 acc 52.734 (51.610)	lr 0.01026
Warmup Train [29][100/3239]	Time 0.280 (0.417)	Data 0.001 (0.164)	Loss 4.1540 (4.1375)	Top-1 acc 26.562 (28.237)	Top-5 acc 52.344 (51.566)	lr 0.01026
Warmup Train [29][110/3239]	Time 0.264 (0.400)	Data 0.001 (0.149)	Loss 4.0269 (4.1341)	Top-1 acc 33.203 (28.410)	Top-5 acc 53.516 (51.584)	lr 0.01025
Warmup Train [29][120/3239]	Time 0.196 (0.387)	Data 0.001 (0.137)	Loss 3.9815 (4.1344)	Top-1 acc 30.859 (28.403)	Top-5 acc 54.688 (51.537)	lr 0.01025
Warmup Train [29][130/3239]	Time 0.264 (0.374)	Data 0.001 (0.127)	Loss 4.1872 (4.1364)	Top-1 acc 26.562 (28.283)	Top-5 acc 50.000 (51.574)	lr 0.01024
Warmup Train [29][140/3239]	Time 0.328 (0.365)	Data 0.001 (0.118)	Loss 4.1508 (4.1379)	Top-1 acc 26.172 (28.316)	Top-5 acc 51.172 (51.510)	lr 0.01024
Warmup Train [29][150/3239]	Time 0.227 (0.356)	Data 0.001 (0.110)	Loss 4.0279 (4.1364)	Top-1 acc 31.641 (28.347)	Top-5 acc 55.078 (51.609)	lr 0.01023
Warmup Train [29][160/3239]	Time 0.208 (0.348)	Data 0.001 (0.103)	Loss 4.0816 (4.1348)	Top-1 acc 26.953 (28.329)	Top-5 acc 50.781 (51.662)	lr 0.01023
Warmup Train [29][170/3239]	Time 0.237 (0.341)	Data 0.001 (0.098)	Loss 4.2003 (4.1339)	Top-1 acc 26.172 (28.317)	Top-5 acc 49.219 (51.681)	lr 0.01022
Warmup Train [29][180/3239]	Time 0.245 (0.335)	Data 0.002 (0.092)	Loss 3.9590 (4.1330)	Top-1 acc 36.328 (28.406)	Top-5 acc 55.469 (51.651)	lr 0.01022
Warmup Train [29][190/3239]	Time 0.202 (0.329)	Data 0.002 (0.088)	Loss 3.9851 (4.1304)	Top-1 acc 31.250 (28.477)	Top-5 acc 53.125 (51.728)	lr 0.01021
Warmup Train [29][200/3239]	Time 0.260 (0.324)	Data 0.001 (0.083)	Loss 4.0752 (4.1312)	Top-1 acc 30.078 (28.455)	Top-5 acc 53.906 (51.734)	lr 0.01021
Warmup Train [29][210/3239]	Time 0.260 (0.319)	Data 0.001 (0.080)	Loss 4.0279 (4.1307)	Top-1 acc 31.641 (28.469)	Top-5 acc 53.125 (51.762)	lr 0.01020
Warmup Train [29][220/3239]	Time 0.248 (0.314)	Data 0.002 (0.076)	Loss 4.2057 (4.1294)	Top-1 acc 28.906 (28.477)	Top-5 acc 48.438 (51.764)	lr 0.01020
Warmup Train [29][230/3239]	Time 0.412 (0.311)	Data 0.001 (0.073)	Loss 4.0920 (4.1299)	Top-1 acc 28.125 (28.505)	Top-5 acc 51.562 (51.740)	lr 0.01019
Warmup Train [29][240/3239]	Time 0.226 (0.307)	Data 0.001 (0.070)	Loss 4.0201 (4.1289)	Top-1 acc 32.812 (28.537)	Top-5 acc 52.344 (51.726)	lr 0.01019
Warmup Train [29][250/3239]	Time 0.200 (0.304)	Data 0.001 (0.067)	Loss 4.0649 (4.1283)	Top-1 acc 33.203 (28.603)	Top-5 acc 53.125 (51.745)	lr 0.01018
Warmup Train [29][260/3239]	Time 0.219 (0.301)	Data 0.001 (0.065)	Loss 4.0734 (4.1296)	Top-1 acc 33.984 (28.593)	Top-5 acc 53.906 (51.684)	lr 0.01018
Warmup Train [29][270/3239]	Time 0.150 (0.298)	Data 0.001 (0.063)	Loss 4.2798 (4.1277)	Top-1 acc 28.125 (28.683)	Top-5 acc 50.000 (51.743)	lr 0.01017
Warmup Train [29][280/3239]	Time 0.236 (0.296)	Data 0.002 (0.061)	Loss 4.1531 (4.1288)	Top-1 acc 28.125 (28.673)	Top-5 acc 48.438 (51.717)	lr 0.01017
Warmup Train [29][290/3239]	Time 0.208 (0.293)	Data 0.001 (0.059)	Loss 4.1905 (4.1297)	Top-1 acc 25.391 (28.615)	Top-5 acc 49.609 (51.710)	lr 0.01016
Warmup Train [29][300/3239]	Time 0.267 (0.291)	Data 0.001 (0.057)	Loss 4.2640 (4.1303)	Top-1 acc 27.344 (28.605)	Top-5 acc 51.953 (51.682)	lr 0.01016
Warmup Train [29][310/3239]	Time 0.178 (0.289)	Data 0.001 (0.055)	Loss 4.2380 (4.1309)	Top-1 acc 30.078 (28.606)	Top-5 acc 51.953 (51.672)	lr 0.01015
Warmup Train [29][320/3239]	Time 0.232 (0.287)	Data 0.001 (0.054)	Loss 4.1208 (4.1319)	Top-1 acc 26.562 (28.590)	Top-5 acc 53.516 (51.640)	lr 0.01015
Warmup Train [29][330/3239]	Time 0.336 (0.285)	Data 0.002 (0.052)	Loss 4.1399 (4.1328)	Top-1 acc 32.422 (28.582)	Top-5 acc 52.734 (51.670)	lr 0.01014
Warmup Train [29][340/3239]	Time 0.193 (0.284)	Data 0.001 (0.051)	Loss 3.9633 (4.1319)	Top-1 acc 30.078 (28.606)	Top-5 acc 54.688 (51.689)	lr 0.01014
Warmup Train [29][350/3239]	Time 0.217 (0.282)	Data 0.001 (0.049)	Loss 4.1728 (4.1319)	Top-1 acc 27.734 (28.598)	Top-5 acc 48.828 (51.677)	lr 0.01013
Warmup Train [29][360/3239]	Time 0.182 (0.280)	Data 0.001 (0.048)	Loss 4.3799 (4.1336)	Top-1 acc 26.172 (28.574)	Top-5 acc 46.484 (51.630)	lr 0.01013
Warmup Train [29][370/3239]	Time 0.231 (0.278)	Data 0.001 (0.047)	Loss 4.0308 (4.1344)	Top-1 acc 29.688 (28.551)	Top-5 acc 53.516 (51.626)	lr 0.01012
Warmup Train [29][380/3239]	Time 0.210 (0.277)	Data 0.001 (0.046)	Loss 4.3133 (4.1335)	Top-1 acc 27.734 (28.565)	Top-5 acc 48.047 (51.664)	lr 0.01012
Warmup Train [29][390/3239]	Time 0.220 (0.275)	Data 0.001 (0.045)	Loss 3.9771 (4.1338)	Top-1 acc 32.031 (28.591)	Top-5 acc 55.469 (51.660)	lr 0.01011
Warmup Train [29][400/3239]	Time 0.262 (0.274)	Data 0.002 (0.044)	Loss 4.1756 (4.1339)	Top-1 acc 29.297 (28.593)	Top-5 acc 50.781 (51.689)	lr 0.01011
Warmup Train [29][410/3239]	Time 0.206 (0.273)	Data 0.001 (0.043)	Loss 4.1201 (4.1326)	Top-1 acc 30.859 (28.619)	Top-5 acc 54.688 (51.753)	lr 0.01011
Warmup Train [29][420/3239]	Time 0.232 (0.272)	Data 0.002 (0.042)	Loss 4.2232 (4.1324)	Top-1 acc 24.609 (28.636)	Top-5 acc 47.266 (51.747)	lr 0.01010
Warmup Train [29][430/3239]	Time 0.194 (0.271)	Data 0.001 (0.041)	Loss 4.0329 (4.1322)	Top-1 acc 31.641 (28.644)	Top-5 acc 51.953 (51.737)	lr 0.01010
Warmup Train [29][440/3239]	Time 0.170 (0.270)	Data 0.001 (0.040)	Loss 4.0632 (4.1320)	Top-1 acc 29.297 (28.645)	Top-5 acc 50.391 (51.757)	lr 0.01009
Warmup Train [29][450/3239]	Time 0.250 (0.269)	Data 0.001 (0.039)	Loss 4.2013 (4.1331)	Top-1 acc 26.562 (28.631)	Top-5 acc 51.562 (51.729)	lr 0.01009
Warmup Train [29][460/3239]	Time 0.203 (0.268)	Data 0.001 (0.038)	Loss 4.1062 (4.1324)	Top-1 acc 28.516 (28.644)	Top-5 acc 50.781 (51.712)	lr 0.01008
Warmup Train [29][470/3239]	Time 0.279 (0.267)	Data 0.001 (0.038)	Loss 4.0301 (4.1317)	Top-1 acc 30.469 (28.662)	Top-5 acc 53.125 (51.745)	lr 0.01008
Warmup Train [29][480/3239]	Time 0.177 (0.266)	Data 0.001 (0.037)	Loss 4.2819 (4.1319)	Top-1 acc 24.219 (28.633)	Top-5 acc 44.531 (51.729)	lr 0.01007
Warmup Train [29][490/3239]	Time 0.228 (0.265)	Data 0.002 (0.036)	Loss 4.2240 (4.1335)	Top-1 acc 28.906 (28.603)	Top-5 acc 49.219 (51.686)	lr 0.01007
Warmup Train [29][500/3239]	Time 0.278 (0.264)	Data 0.002 (0.035)	Loss 4.0968 (4.1334)	Top-1 acc 26.953 (28.595)	Top-5 acc 54.297 (51.700)	lr 0.01006
Warmup Train [29][510/3239]	Time 0.193 (0.263)	Data 0.001 (0.035)	Loss 4.2015 (4.1343)	Top-1 acc 27.734 (28.587)	Top-5 acc 47.266 (51.677)	lr 0.01006
Warmup Train [29][520/3239]	Time 0.246 (0.263)	Data 0.003 (0.034)	Loss 4.1125 (4.1333)	Top-1 acc 30.078 (28.601)	Top-5 acc 52.734 (51.686)	lr 0.01005
Warmup Train [29][530/3239]	Time 0.173 (0.262)	Data 0.001 (0.034)	Loss 4.2273 (4.1331)	Top-1 acc 25.781 (28.605)	Top-5 acc 49.609 (51.701)	lr 0.01005
Warmup Train [29][540/3239]	Time 0.334 (0.262)	Data 0.002 (0.033)	Loss 4.0599 (4.1326)	Top-1 acc 29.688 (28.617)	Top-5 acc 52.734 (51.708)	lr 0.01004
Warmup Train [29][550/3239]	Time 0.239 (0.261)	Data 0.001 (0.032)	Loss 4.2308 (4.1335)	Top-1 acc 29.297 (28.611)	Top-5 acc 50.781 (51.682)	lr 0.01004
Warmup Train [29][560/3239]	Time 0.265 (0.260)	Data 0.001 (0.032)	Loss 4.2494 (4.1339)	Top-1 acc 28.516 (28.596)	Top-5 acc 50.391 (51.680)	lr 0.01003
Warmup Train [29][570/3239]	Time 0.290 (0.259)	Data 0.001 (0.031)	Loss 4.2249 (4.1332)	Top-1 acc 25.391 (28.600)	Top-5 acc 43.750 (51.702)	lr 0.01003
Warmup Train [29][580/3239]	Time 0.194 (0.259)	Data 0.001 (0.031)	Loss 4.2110 (4.1333)	Top-1 acc 27.344 (28.579)	Top-5 acc 50.781 (51.696)	lr 0.01002
Warmup Train [29][590/3239]	Time 0.189 (0.258)	Data 0.001 (0.030)	Loss 4.2068 (4.1335)	Top-1 acc 26.562 (28.559)	Top-5 acc 48.047 (51.689)	lr 0.01002
Warmup Train [29][600/3239]	Time 0.177 (0.258)	Data 0.001 (0.030)	Loss 4.1622 (4.1325)	Top-1 acc 27.344 (28.566)	Top-5 acc 51.953 (51.724)	lr 0.01001
Warmup Train [29][610/3239]	Time 0.207 (0.257)	Data 0.002 (0.029)	Loss 4.2528 (4.1319)	Top-1 acc 26.953 (28.585)	Top-5 acc 48.438 (51.741)	lr 0.01001
Warmup Train [29][620/3239]	Time 0.237 (0.256)	Data 0.001 (0.029)	Loss 4.0782 (4.1328)	Top-1 acc 29.688 (28.555)	Top-5 acc 51.953 (51.715)	lr 0.01000
Warmup Train [29][630/3239]	Time 0.256 (0.256)	Data 0.001 (0.029)	Loss 4.4132 (4.1333)	Top-1 acc 23.828 (28.553)	Top-5 acc 44.141 (51.697)	lr 0.01000
Warmup Train [29][640/3239]	Time 0.207 (0.255)	Data 0.001 (0.028)	Loss 4.0891 (4.1331)	Top-1 acc 31.250 (28.549)	Top-5 acc 54.688 (51.698)	lr 0.00999
Warmup Train [29][650/3239]	Time 0.280 (0.254)	Data 0.001 (0.028)	Loss 4.2799 (4.1334)	Top-1 acc 23.438 (28.529)	Top-5 acc 42.969 (51.690)	lr 0.00999
Warmup Train [29][660/3239]	Time 0.320 (0.254)	Data 0.002 (0.027)	Loss 4.0661 (4.1334)	Top-1 acc 30.078 (28.523)	Top-5 acc 53.906 (51.684)	lr 0.00998
Warmup Train [29][670/3239]	Time 0.247 (0.254)	Data 0.002 (0.027)	Loss 4.1276 (4.1344)	Top-1 acc 30.859 (28.505)	Top-5 acc 51.562 (51.672)	lr 0.00998
Warmup Train [29][680/3239]	Time 0.215 (0.253)	Data 0.001 (0.027)	Loss 4.0354 (4.1336)	Top-1 acc 29.297 (28.528)	Top-5 acc 52.734 (51.682)	lr 0.00997
Warmup Train [29][690/3239]	Time 0.221 (0.253)	Data 0.002 (0.026)	Loss 4.2153 (4.1337)	Top-1 acc 29.297 (28.523)	Top-5 acc 46.094 (51.672)	lr 0.00997
Warmup Train [29][700/3239]	Time 0.167 (0.252)	Data 0.002 (0.026)	Loss 4.2099 (4.1337)	Top-1 acc 24.609 (28.513)	Top-5 acc 51.172 (51.682)	lr 0.00996
Warmup Train [29][710/3239]	Time 0.210 (0.252)	Data 0.002 (0.026)	Loss 4.0665 (4.1335)	Top-1 acc 26.953 (28.528)	Top-5 acc 53.906 (51.692)	lr 0.00996
Warmup Train [29][720/3239]	Time 0.303 (0.252)	Data 0.003 (0.025)	Loss 4.2836 (4.1328)	Top-1 acc 26.562 (28.531)	Top-5 acc 50.391 (51.701)	lr 0.00995
Warmup Train [29][730/3239]	Time 0.236 (0.251)	Data 0.001 (0.025)	Loss 4.1868 (4.1331)	Top-1 acc 30.859 (28.532)	Top-5 acc 50.391 (51.708)	lr 0.00995
Warmup Train [29][740/3239]	Time 0.127 (0.251)	Data 0.002 (0.025)	Loss 4.0377 (4.1324)	Top-1 acc 28.516 (28.542)	Top-5 acc 57.031 (51.721)	lr 0.00994
Warmup Train [29][750/3239]	Time 0.378 (0.251)	Data 0.002 (0.025)	Loss 3.9439 (4.1322)	Top-1 acc 33.984 (28.534)	Top-5 acc 59.766 (51.734)	lr 0.00994
Warmup Train [29][760/3239]	Time 0.311 (0.251)	Data 0.001 (0.024)	Loss 4.1403 (4.1326)	Top-1 acc 28.906 (28.539)	Top-5 acc 48.828 (51.720)	lr 0.00994
Warmup Train [29][770/3239]	Time 0.193 (0.250)	Data 0.001 (0.024)	Loss 3.8435 (4.1326)	Top-1 acc 33.203 (28.539)	Top-5 acc 58.594 (51.707)	lr 0.00993
Warmup Train [29][780/3239]	Time 0.264 (0.250)	Data 0.001 (0.024)	Loss 4.0036 (4.1329)	Top-1 acc 33.984 (28.533)	Top-5 acc 58.594 (51.706)	lr 0.00993
Warmup Train [29][790/3239]	Time 0.244 (0.250)	Data 0.001 (0.023)	Loss 4.1960 (4.1333)	Top-1 acc 27.344 (28.528)	Top-5 acc 52.344 (51.708)	lr 0.00992
Warmup Train [29][800/3239]	Time 0.279 (0.249)	Data 0.002 (0.023)	Loss 4.1408 (4.1327)	Top-1 acc 28.125 (28.547)	Top-5 acc 51.953 (51.721)	lr 0.00992
Warmup Train [29][810/3239]	Time 0.260 (0.249)	Data 0.001 (0.023)	Loss 3.9220 (4.1332)	Top-1 acc 36.328 (28.542)	Top-5 acc 53.516 (51.691)	lr 0.00991
Warmup Train [29][820/3239]	Time 0.191 (0.248)	Data 0.001 (0.023)	Loss 4.1560 (4.1333)	Top-1 acc 24.219 (28.533)	Top-5 acc 51.562 (51.696)	lr 0.00991
Warmup Train [29][830/3239]	Time 0.139 (0.248)	Data 0.001 (0.022)	Loss 4.0977 (4.1333)	Top-1 acc 28.906 (28.535)	Top-5 acc 53.125 (51.696)	lr 0.00990
Warmup Train [29][840/3239]	Time 0.235 (0.248)	Data 0.001 (0.022)	Loss 4.0491 (4.1335)	Top-1 acc 29.297 (28.529)	Top-5 acc 53.516 (51.687)	lr 0.00990
Warmup Train [29][850/3239]	Time 0.227 (0.247)	Data 0.002 (0.022)	Loss 4.0978 (4.1338)	Top-1 acc 28.516 (28.511)	Top-5 acc 52.734 (51.669)	lr 0.00989
Warmup Train [29][860/3239]	Time 0.155 (0.247)	Data 0.001 (0.022)	Loss 4.1022 (4.1341)	Top-1 acc 26.562 (28.497)	Top-5 acc 51.562 (51.656)	lr 0.00989
Warmup Train [29][870/3239]	Time 0.301 (0.247)	Data 0.002 (0.021)	Loss 3.9473 (4.1341)	Top-1 acc 30.078 (28.500)	Top-5 acc 57.031 (51.650)	lr 0.00988
Warmup Train [29][880/3239]	Time 0.196 (0.246)	Data 0.001 (0.021)	Loss 4.2868 (4.1349)	Top-1 acc 26.953 (28.486)	Top-5 acc 49.609 (51.643)	lr 0.00988
Warmup Train [29][890/3239]	Time 0.236 (0.246)	Data 0.002 (0.021)	Loss 3.9435 (4.1345)	Top-1 acc 30.859 (28.483)	Top-5 acc 59.375 (51.647)	lr 0.00987
Warmup Train [29][900/3239]	Time 0.227 (0.246)	Data 0.002 (0.021)	Loss 4.0877 (4.1344)	Top-1 acc 30.859 (28.488)	Top-5 acc 55.469 (51.661)	lr 0.00987
Warmup Train [29][910/3239]	Time 0.270 (0.245)	Data 0.002 (0.021)	Loss 3.9371 (4.1335)	Top-1 acc 35.547 (28.511)	Top-5 acc 51.953 (51.683)	lr 0.00986
Warmup Train [29][920/3239]	Time 0.211 (0.245)	Data 0.002 (0.020)	Loss 4.2483 (4.1339)	Top-1 acc 26.562 (28.494)	Top-5 acc 48.828 (51.663)	lr 0.00986
Warmup Train [29][930/3239]	Time 0.212 (0.245)	Data 0.002 (0.020)	Loss 3.8881 (4.1334)	Top-1 acc 33.594 (28.505)	Top-5 acc 57.031 (51.684)	lr 0.00985
Warmup Train [29][940/3239]	Time 0.172 (0.244)	Data 0.002 (0.020)	Loss 4.0984 (4.1328)	Top-1 acc 27.734 (28.499)	Top-5 acc 54.297 (51.696)	lr 0.00985
Warmup Train [29][950/3239]	Time 0.218 (0.244)	Data 0.002 (0.020)	Loss 4.0178 (4.1326)	Top-1 acc 33.594 (28.507)	Top-5 acc 53.516 (51.695)	lr 0.00984
Warmup Train [29][960/3239]	Time 0.227 (0.244)	Data 0.002 (0.020)	Loss 4.2770 (4.1328)	Top-1 acc 31.641 (28.506)	Top-5 acc 48.828 (51.695)	lr 0.00984
Warmup Train [29][970/3239]	Time 0.266 (0.244)	Data 0.002 (0.020)	Loss 3.9130 (4.1324)	Top-1 acc 33.594 (28.516)	Top-5 acc 58.203 (51.719)	lr 0.00983
Warmup Train [29][980/3239]	Time 0.237 (0.243)	Data 0.002 (0.019)	Loss 4.0749 (4.1326)	Top-1 acc 25.391 (28.514)	Top-5 acc 49.609 (51.712)	lr 0.00983
Warmup Train [29][990/3239]	Time 0.245 (0.243)	Data 0.002 (0.019)	Loss 4.1080 (4.1322)	Top-1 acc 28.125 (28.524)	Top-5 acc 50.781 (51.719)	lr 0.00982
Warmup Train [29][1000/3239]	Time 0.223 (0.243)	Data 0.001 (0.019)	Loss 3.9695 (4.1322)	Top-1 acc 32.031 (28.523)	Top-5 acc 56.641 (51.719)	lr 0.00982
Warmup Train [29][1010/3239]	Time 0.205 (0.243)	Data 0.001 (0.019)	Loss 4.2333 (4.1318)	Top-1 acc 28.516 (28.528)	Top-5 acc 48.438 (51.722)	lr 0.00981
Warmup Train [29][1020/3239]	Time 0.226 (0.243)	Data 0.001 (0.019)	Loss 4.2713 (4.1319)	Top-1 acc 29.297 (28.527)	Top-5 acc 45.312 (51.717)	lr 0.00981
Warmup Train [29][1030/3239]	Time 0.211 (0.243)	Data 0.001 (0.019)	Loss 4.2315 (4.1322)	Top-1 acc 23.438 (28.522)	Top-5 acc 47.656 (51.707)	lr 0.00981
Warmup Train [29][1040/3239]	Time 0.206 (0.243)	Data 0.001 (0.018)	Loss 4.2242 (4.1323)	Top-1 acc 31.641 (28.524)	Top-5 acc 50.391 (51.702)	lr 0.00980
Warmup Train [29][1050/3239]	Time 0.194 (0.242)	Data 0.001 (0.018)	Loss 4.1992 (4.1328)	Top-1 acc 27.344 (28.508)	Top-5 acc 49.609 (51.689)	lr 0.00980
Warmup Train [29][1060/3239]	Time 0.275 (0.242)	Data 0.003 (0.018)	Loss 4.3468 (4.1324)	Top-1 acc 22.656 (28.512)	Top-5 acc 46.094 (51.694)	lr 0.00979
Warmup Train [29][1070/3239]	Time 0.292 (0.242)	Data 0.001 (0.018)	Loss 4.0449 (4.1321)	Top-1 acc 32.812 (28.528)	Top-5 acc 54.688 (51.699)	lr 0.00979
Warmup Train [29][1080/3239]	Time 0.220 (0.242)	Data 0.001 (0.018)	Loss 3.9356 (4.1321)	Top-1 acc 33.203 (28.527)	Top-5 acc 58.984 (51.695)	lr 0.00978
Warmup Train [29][1090/3239]	Time 0.233 (0.242)	Data 0.001 (0.018)	Loss 4.2145 (4.1321)	Top-1 acc 27.734 (28.521)	Top-5 acc 51.172 (51.694)	lr 0.00978
Warmup Train [29][1100/3239]	Time 0.224 (0.242)	Data 0.001 (0.018)	Loss 4.0469 (4.1319)	Top-1 acc 29.297 (28.518)	Top-5 acc 55.469 (51.699)	lr 0.00977
Warmup Train [29][1110/3239]	Time 0.194 (0.242)	Data 0.001 (0.017)	Loss 4.0248 (4.1319)	Top-1 acc 33.594 (28.525)	Top-5 acc 53.516 (51.693)	lr 0.00977
Warmup Train [29][1120/3239]	Time 0.211 (0.242)	Data 0.001 (0.017)	Loss 4.2132 (4.1317)	Top-1 acc 25.391 (28.524)	Top-5 acc 47.266 (51.687)	lr 0.00976
Warmup Train [29][1130/3239]	Time 0.199 (0.241)	Data 0.001 (0.017)	Loss 4.2732 (4.1323)	Top-1 acc 28.125 (28.517)	Top-5 acc 49.219 (51.669)	lr 0.00976
Warmup Train [29][1140/3239]	Time 0.213 (0.241)	Data 0.002 (0.017)	Loss 4.1389 (4.1327)	Top-1 acc 28.906 (28.512)	Top-5 acc 48.828 (51.663)	lr 0.00975
Warmup Train [29][1150/3239]	Time 0.188 (0.241)	Data 0.001 (0.017)	Loss 4.2891 (4.1332)	Top-1 acc 30.078 (28.504)	Top-5 acc 49.219 (51.652)	lr 0.00975
Warmup Train [29][1160/3239]	Time 0.150 (0.241)	Data 0.001 (0.017)	Loss 4.1063 (4.1334)	Top-1 acc 25.000 (28.508)	Top-5 acc 55.078 (51.647)	lr 0.00974
Warmup Train [29][1170/3239]	Time 0.194 (0.241)	Data 0.001 (0.017)	Loss 4.0012 (4.1332)	Top-1 acc 28.906 (28.524)	Top-5 acc 52.734 (51.648)	lr 0.00974
Warmup Train [29][1180/3239]	Time 0.247 (0.241)	Data 0.001 (0.017)	Loss 4.2075 (4.1326)	Top-1 acc 23.438 (28.525)	Top-5 acc 46.484 (51.657)	lr 0.00973
Warmup Train [29][1190/3239]	Time 0.385 (0.241)	Data 0.001 (0.016)	Loss 4.2284 (4.1322)	Top-1 acc 27.734 (28.535)	Top-5 acc 50.000 (51.666)	lr 0.00973
Warmup Train [29][1200/3239]	Time 0.228 (0.241)	Data 0.002 (0.016)	Loss 4.1695 (4.1325)	Top-1 acc 27.344 (28.534)	Top-5 acc 51.562 (51.658)	lr 0.00972
Warmup Train [29][1210/3239]	Time 0.271 (0.241)	Data 0.001 (0.016)	Loss 4.2360 (4.1328)	Top-1 acc 27.344 (28.524)	Top-5 acc 51.562 (51.647)	lr 0.00972
Warmup Train [29][1220/3239]	Time 0.248 (0.241)	Data 0.001 (0.016)	Loss 4.1269 (4.1321)	Top-1 acc 28.906 (28.538)	Top-5 acc 49.609 (51.666)	lr 0.00971
Warmup Train [29][1230/3239]	Time 0.247 (0.241)	Data 0.001 (0.016)	Loss 4.1627 (4.1324)	Top-1 acc 28.906 (28.540)	Top-5 acc 51.172 (51.660)	lr 0.00971
Warmup Train [29][1240/3239]	Time 0.196 (0.241)	Data 0.002 (0.016)	Loss 3.9824 (4.1323)	Top-1 acc 30.469 (28.539)	Top-5 acc 54.688 (51.665)	lr 0.00970
Warmup Train [29][1250/3239]	Time 0.271 (0.241)	Data 0.001 (0.016)	Loss 3.8929 (4.1320)	Top-1 acc 30.469 (28.538)	Top-5 acc 59.375 (51.679)	lr 0.00970
Warmup Train [29][1260/3239]	Time 0.233 (0.242)	Data 0.002 (0.016)	Loss 4.1281 (4.1319)	Top-1 acc 29.688 (28.535)	Top-5 acc 50.391 (51.684)	lr 0.00969
Warmup Train [29][1270/3239]	Time 0.310 (0.242)	Data 0.001 (0.016)	Loss 4.0717 (4.1317)	Top-1 acc 26.953 (28.534)	Top-5 acc 51.562 (51.685)	lr 0.00969
Warmup Train [29][1280/3239]	Time 0.189 (0.242)	Data 0.001 (0.015)	Loss 4.1234 (4.1313)	Top-1 acc 29.297 (28.540)	Top-5 acc 51.953 (51.697)	lr 0.00968
Warmup Train [29][1290/3239]	Time 0.257 (0.242)	Data 0.001 (0.015)	Loss 4.2260 (4.1319)	Top-1 acc 25.000 (28.530)	Top-5 acc 49.609 (51.689)	lr 0.00968
Warmup Train [29][1300/3239]	Time 0.227 (0.241)	Data 0.001 (0.015)	Loss 4.1882 (4.1319)	Top-1 acc 24.609 (28.518)	Top-5 acc 48.047 (51.692)	lr 0.00968
Warmup Train [29][1310/3239]	Time 0.265 (0.241)	Data 0.001 (0.015)	Loss 4.2041 (4.1322)	Top-1 acc 26.953 (28.519)	Top-5 acc 50.781 (51.689)	lr 0.00967
Warmup Train [29][1320/3239]	Time 0.178 (0.241)	Data 0.001 (0.015)	Loss 4.3240 (4.1325)	Top-1 acc 24.219 (28.513)	Top-5 acc 46.094 (51.673)	lr 0.00967
Warmup Train [29][1330/3239]	Time 0.204 (0.241)	Data 0.001 (0.015)	Loss 4.1165 (4.1331)	Top-1 acc 30.859 (28.510)	Top-5 acc 50.781 (51.662)	lr 0.00966
Warmup Train [29][1340/3239]	Time 0.195 (0.241)	Data 0.001 (0.015)	Loss 4.0780 (4.1333)	Top-1 acc 26.953 (28.505)	Top-5 acc 51.953 (51.655)	lr 0.00966
Warmup Train [29][1350/3239]	Time 0.290 (0.241)	Data 0.001 (0.015)	Loss 4.1578 (4.1334)	Top-1 acc 30.859 (28.504)	Top-5 acc 49.609 (51.646)	lr 0.00965
Warmup Train [29][1360/3239]	Time 0.249 (0.241)	Data 0.001 (0.015)	Loss 3.9958 (4.1334)	Top-1 acc 33.594 (28.506)	Top-5 acc 53.906 (51.648)	lr 0.00965
Warmup Train [29][1370/3239]	Time 0.288 (0.241)	Data 0.001 (0.015)	Loss 3.8955 (4.1334)	Top-1 acc 32.422 (28.508)	Top-5 acc 57.812 (51.648)	lr 0.00964
Warmup Train [29][1380/3239]	Time 0.216 (0.241)	Data 0.001 (0.015)	Loss 4.1788 (4.1336)	Top-1 acc 26.562 (28.505)	Top-5 acc 49.609 (51.639)	lr 0.00964
Warmup Train [29][1390/3239]	Time 0.213 (0.241)	Data 0.001 (0.014)	Loss 3.9967 (4.1334)	Top-1 acc 32.031 (28.502)	Top-5 acc 55.859 (51.648)	lr 0.00963
Warmup Train [29][1400/3239]	Time 0.234 (0.241)	Data 0.001 (0.014)	Loss 4.2698 (4.1332)	Top-1 acc 28.516 (28.505)	Top-5 acc 49.609 (51.658)	lr 0.00963
Warmup Train [29][1410/3239]	Time 0.255 (0.240)	Data 0.001 (0.014)	Loss 4.2849 (4.1331)	Top-1 acc 28.516 (28.514)	Top-5 acc 53.906 (51.655)	lr 0.00962
Warmup Train [29][1420/3239]	Time 0.365 (0.240)	Data 0.002 (0.014)	Loss 4.1360 (4.1332)	Top-1 acc 28.906 (28.506)	Top-5 acc 51.562 (51.647)	lr 0.00962
Warmup Train [29][1430/3239]	Time 0.212 (0.240)	Data 0.001 (0.014)	Loss 3.9850 (4.1330)	Top-1 acc 30.469 (28.513)	Top-5 acc 53.125 (51.651)	lr 0.00961
Warmup Train [29][1440/3239]	Time 0.143 (0.240)	Data 0.001 (0.014)	Loss 4.2406 (4.1330)	Top-1 acc 30.859 (28.520)	Top-5 acc 49.219 (51.649)	lr 0.00961
Warmup Train [29][1450/3239]	Time 0.225 (0.240)	Data 0.001 (0.014)	Loss 4.0460 (4.1330)	Top-1 acc 26.953 (28.516)	Top-5 acc 53.516 (51.652)	lr 0.00960
Warmup Train [29][1460/3239]	Time 0.228 (0.240)	Data 0.002 (0.014)	Loss 4.0319 (4.1330)	Top-1 acc 26.172 (28.512)	Top-5 acc 48.828 (51.652)	lr 0.00960
Warmup Train [29][1470/3239]	Time 0.222 (0.240)	Data 0.001 (0.014)	Loss 4.1958 (4.1332)	Top-1 acc 29.688 (28.517)	Top-5 acc 50.000 (51.647)	lr 0.00959
Warmup Train [29][1480/3239]	Time 0.218 (0.240)	Data 0.001 (0.014)	Loss 4.0687 (4.1331)	Top-1 acc 28.906 (28.506)	Top-5 acc 53.516 (51.650)	lr 0.00959
Warmup Train [29][1490/3239]	Time 0.197 (0.240)	Data 0.001 (0.014)	Loss 4.1723 (4.1339)	Top-1 acc 29.688 (28.496)	Top-5 acc 51.172 (51.634)	lr 0.00958
Warmup Train [29][1500/3239]	Time 0.237 (0.240)	Data 0.001 (0.014)	Loss 4.0925 (4.1338)	Top-1 acc 28.516 (28.493)	Top-5 acc 52.734 (51.628)	lr 0.00958
Warmup Train [29][1510/3239]	Time 0.175 (0.240)	Data 0.001 (0.013)	Loss 4.2440 (4.1340)	Top-1 acc 27.344 (28.491)	Top-5 acc 49.219 (51.627)	lr 0.00958
Warmup Train [29][1520/3239]	Time 0.216 (0.239)	Data 0.001 (0.013)	Loss 4.5292 (4.1341)	Top-1 acc 22.656 (28.490)	Top-5 acc 46.094 (51.624)	lr 0.00957
Warmup Train [29][1530/3239]	Time 0.302 (0.239)	Data 0.001 (0.013)	Loss 3.9399 (4.1340)	Top-1 acc 34.766 (28.499)	Top-5 acc 56.641 (51.624)	lr 0.00957
Warmup Train [29][1540/3239]	Time 0.226 (0.239)	Data 0.002 (0.013)	Loss 3.9592 (4.1334)	Top-1 acc 30.859 (28.513)	Top-5 acc 53.516 (51.638)	lr 0.00956
Warmup Train [29][1550/3239]	Time 0.136 (0.239)	Data 0.001 (0.013)	Loss 4.4945 (4.1340)	Top-1 acc 21.875 (28.497)	Top-5 acc 41.016 (51.621)	lr 0.00956
Warmup Train [29][1560/3239]	Time 0.184 (0.239)	Data 0.001 (0.013)	Loss 4.3606 (4.1347)	Top-1 acc 26.172 (28.484)	Top-5 acc 46.875 (51.610)	lr 0.00955
Warmup Train [29][1570/3239]	Time 0.208 (0.239)	Data 0.001 (0.013)	Loss 4.0892 (4.1345)	Top-1 acc 28.906 (28.489)	Top-5 acc 52.344 (51.614)	lr 0.00955
Warmup Train [29][1580/3239]	Time 0.241 (0.239)	Data 0.001 (0.013)	Loss 4.1151 (4.1347)	Top-1 acc 29.297 (28.491)	Top-5 acc 56.641 (51.608)	lr 0.00954
Warmup Train [29][1590/3239]	Time 0.219 (0.239)	Data 0.002 (0.013)	Loss 4.0722 (4.1347)	Top-1 acc 30.469 (28.493)	Top-5 acc 52.344 (51.603)	lr 0.00954
Warmup Train [29][1600/3239]	Time 0.198 (0.239)	Data 0.001 (0.013)	Loss 4.1934 (4.1344)	Top-1 acc 28.906 (28.495)	Top-5 acc 51.172 (51.615)	lr 0.00953
Warmup Train [29][1610/3239]	Time 0.226 (0.238)	Data 0.002 (0.013)	Loss 4.0944 (4.1345)	Top-1 acc 29.688 (28.499)	Top-5 acc 53.516 (51.617)	lr 0.00953
Warmup Train [29][1620/3239]	Time 0.218 (0.238)	Data 0.001 (0.013)	Loss 3.9723 (4.1339)	Top-1 acc 30.469 (28.507)	Top-5 acc 58.984 (51.633)	lr 0.00952
Warmup Train [29][1630/3239]	Time 0.342 (0.238)	Data 0.002 (0.013)	Loss 4.1909 (4.1342)	Top-1 acc 28.125 (28.504)	Top-5 acc 47.656 (51.626)	lr 0.00952
Warmup Train [29][1640/3239]	Time 0.164 (0.238)	Data 0.004 (0.013)	Loss 4.1838 (4.1340)	Top-1 acc 28.516 (28.508)	Top-5 acc 50.781 (51.630)	lr 0.00951
Warmup Train [29][1650/3239]	Time 0.227 (0.238)	Data 0.001 (0.013)	Loss 4.2555 (4.1341)	Top-1 acc 23.047 (28.506)	Top-5 acc 44.141 (51.629)	lr 0.00951
Warmup Train [29][1660/3239]	Time 0.201 (0.238)	Data 0.001 (0.013)	Loss 4.0447 (4.1337)	Top-1 acc 30.078 (28.520)	Top-5 acc 54.688 (51.641)	lr 0.00950
Warmup Train [29][1670/3239]	Time 0.223 (0.238)	Data 0.001 (0.012)	Loss 4.3214 (4.1334)	Top-1 acc 26.953 (28.522)	Top-5 acc 50.000 (51.651)	lr 0.00950
Warmup Train [29][1680/3239]	Time 0.188 (0.238)	Data 0.002 (0.012)	Loss 4.1270 (4.1333)	Top-1 acc 26.172 (28.523)	Top-5 acc 50.391 (51.648)	lr 0.00949
Warmup Train [29][1690/3239]	Time 0.196 (0.238)	Data 0.001 (0.012)	Loss 4.1530 (4.1334)	Top-1 acc 23.828 (28.514)	Top-5 acc 54.688 (51.648)	lr 0.00949
Warmup Train [29][1700/3239]	Time 0.232 (0.238)	Data 0.001 (0.012)	Loss 4.0786 (4.1333)	Top-1 acc 33.203 (28.514)	Top-5 acc 52.734 (51.653)	lr 0.00948
Warmup Train [29][1710/3239]	Time 0.234 (0.238)	Data 0.002 (0.012)	Loss 4.2060 (4.1334)	Top-1 acc 24.609 (28.515)	Top-5 acc 48.438 (51.658)	lr 0.00948
Warmup Train [29][1720/3239]	Time 0.150 (0.238)	Data 0.001 (0.012)	Loss 3.9121 (4.1333)	Top-1 acc 33.594 (28.519)	Top-5 acc 55.078 (51.663)	lr 0.00948
Warmup Train [29][1730/3239]	Time 0.173 (0.237)	Data 0.001 (0.012)	Loss 4.2527 (4.1332)	Top-1 acc 30.859 (28.521)	Top-5 acc 47.656 (51.665)	lr 0.00947
Warmup Train [29][1740/3239]	Time 0.286 (0.238)	Data 0.001 (0.012)	Loss 4.3391 (4.1334)	Top-1 acc 23.828 (28.514)	Top-5 acc 45.312 (51.662)	lr 0.00947
Warmup Train [29][1750/3239]	Time 0.170 (0.237)	Data 0.003 (0.012)	Loss 3.9902 (4.1333)	Top-1 acc 30.078 (28.508)	Top-5 acc 52.344 (51.659)	lr 0.00946
Warmup Train [29][1760/3239]	Time 0.226 (0.237)	Data 0.002 (0.012)	Loss 4.0771 (4.1332)	Top-1 acc 32.031 (28.510)	Top-5 acc 54.688 (51.663)	lr 0.00946
Warmup Train [29][1770/3239]	Time 0.180 (0.237)	Data 0.002 (0.012)	Loss 4.1040 (4.1330)	Top-1 acc 28.516 (28.512)	Top-5 acc 50.000 (51.657)	lr 0.00945
Warmup Train [29][1780/3239]	Time 0.223 (0.237)	Data 0.001 (0.012)	Loss 4.1720 (4.1330)	Top-1 acc 25.781 (28.508)	Top-5 acc 52.344 (51.654)	lr 0.00945
Warmup Train [29][1790/3239]	Time 0.183 (0.237)	Data 0.001 (0.012)	Loss 4.2719 (4.1331)	Top-1 acc 26.172 (28.509)	Top-5 acc 51.562 (51.655)	lr 0.00944
Warmup Train [29][1800/3239]	Time 0.193 (0.237)	Data 0.001 (0.012)	Loss 4.0467 (4.1330)	Top-1 acc 31.641 (28.514)	Top-5 acc 52.344 (51.654)	lr 0.00944
Warmup Train [29][1810/3239]	Time 0.223 (0.237)	Data 0.001 (0.012)	Loss 4.1683 (4.1329)	Top-1 acc 28.516 (28.517)	Top-5 acc 48.047 (51.660)	lr 0.00943
Warmup Train [29][1820/3239]	Time 0.175 (0.237)	Data 0.001 (0.012)	Loss 4.2375 (4.1327)	Top-1 acc 27.344 (28.517)	Top-5 acc 49.219 (51.661)	lr 0.00943
Warmup Train [29][1830/3239]	Time 0.221 (0.237)	Data 0.001 (0.012)	Loss 3.9800 (4.1324)	Top-1 acc 33.984 (28.527)	Top-5 acc 54.297 (51.665)	lr 0.00942
Warmup Train [29][1840/3239]	Time 0.227 (0.237)	Data 0.001 (0.012)	Loss 4.0942 (4.1322)	Top-1 acc 30.859 (28.533)	Top-5 acc 51.562 (51.670)	lr 0.00942
Warmup Train [29][1850/3239]	Time 0.330 (0.237)	Data 0.001 (0.012)	Loss 4.3030 (4.1325)	Top-1 acc 23.828 (28.525)	Top-5 acc 46.875 (51.661)	lr 0.00941
Warmup Train [29][1860/3239]	Time 0.258 (0.237)	Data 0.001 (0.011)	Loss 4.0936 (4.1322)	Top-1 acc 28.516 (28.529)	Top-5 acc 54.297 (51.668)	lr 0.00941
Warmup Train [29][1870/3239]	Time 0.231 (0.237)	Data 0.001 (0.011)	Loss 4.2554 (4.1322)	Top-1 acc 24.219 (28.530)	Top-5 acc 50.391 (51.662)	lr 0.00940
Warmup Train [29][1880/3239]	Time 0.245 (0.237)	Data 0.001 (0.011)	Loss 4.0925 (4.1318)	Top-1 acc 26.562 (28.536)	Top-5 acc 53.906 (51.672)	lr 0.00940
Warmup Train [29][1890/3239]	Time 0.162 (0.236)	Data 0.001 (0.011)	Loss 4.1578 (4.1320)	Top-1 acc 26.953 (28.532)	Top-5 acc 51.562 (51.668)	lr 0.00939
Warmup Train [29][1900/3239]	Time 0.240 (0.236)	Data 0.001 (0.011)	Loss 3.9399 (4.1320)	Top-1 acc 29.688 (28.531)	Top-5 acc 58.594 (51.675)	lr 0.00939
Warmup Train [29][1910/3239]	Time 0.238 (0.236)	Data 0.001 (0.011)	Loss 4.2780 (4.1319)	Top-1 acc 23.438 (28.528)	Top-5 acc 48.438 (51.680)	lr 0.00939
Warmup Train [29][1920/3239]	Time 0.208 (0.236)	Data 0.001 (0.011)	Loss 4.2232 (4.1316)	Top-1 acc 29.688 (28.534)	Top-5 acc 50.781 (51.692)	lr 0.00938
Warmup Train [29][1930/3239]	Time 0.206 (0.236)	Data 0.001 (0.011)	Loss 4.1896 (4.1314)	Top-1 acc 26.953 (28.542)	Top-5 acc 50.000 (51.697)	lr 0.00938
Warmup Train [29][1940/3239]	Time 0.177 (0.236)	Data 0.001 (0.011)	Loss 4.4365 (4.1317)	Top-1 acc 22.266 (28.529)	Top-5 acc 46.875 (51.693)	lr 0.00937
Warmup Train [29][1950/3239]	Time 0.216 (0.236)	Data 0.001 (0.011)	Loss 4.1193 (4.1317)	Top-1 acc 27.734 (28.526)	Top-5 acc 51.562 (51.691)	lr 0.00937
Warmup Train [29][1960/3239]	Time 0.295 (0.236)	Data 0.001 (0.011)	Loss 4.0009 (4.1316)	Top-1 acc 33.203 (28.528)	Top-5 acc 55.469 (51.692)	lr 0.00936
Warmup Train [29][1970/3239]	Time 0.385 (0.236)	Data 0.001 (0.011)	Loss 4.2225 (4.1316)	Top-1 acc 30.078 (28.528)	Top-5 acc 51.953 (51.689)	lr 0.00936
Warmup Train [29][1980/3239]	Time 0.230 (0.236)	Data 0.001 (0.011)	Loss 4.2265 (4.1316)	Top-1 acc 25.781 (28.527)	Top-5 acc 50.000 (51.690)	lr 0.00935
Warmup Train [29][1990/3239]	Time 0.218 (0.236)	Data 0.001 (0.011)	Loss 4.0171 (4.1312)	Top-1 acc 32.422 (28.536)	Top-5 acc 54.297 (51.697)	lr 0.00935
Warmup Train [29][2000/3239]	Time 0.237 (0.236)	Data 0.001 (0.011)	Loss 4.1409 (4.1314)	Top-1 acc 28.906 (28.540)	Top-5 acc 52.344 (51.697)	lr 0.00934
Warmup Train [29][2010/3239]	Time 0.197 (0.236)	Data 0.001 (0.011)	Loss 4.1522 (4.1316)	Top-1 acc 29.297 (28.536)	Top-5 acc 51.953 (51.694)	lr 0.00934
Warmup Train [29][2020/3239]	Time 0.260 (0.236)	Data 0.002 (0.011)	Loss 3.8151 (4.1316)	Top-1 acc 33.984 (28.542)	Top-5 acc 56.641 (51.692)	lr 0.00933
Warmup Train [29][2030/3239]	Time 0.130 (0.236)	Data 0.001 (0.011)	Loss 4.3916 (4.1315)	Top-1 acc 21.484 (28.540)	Top-5 acc 47.656 (51.692)	lr 0.00933
Warmup Train [29][2040/3239]	Time 0.226 (0.236)	Data 0.001 (0.011)	Loss 4.0325 (4.1315)	Top-1 acc 30.859 (28.538)	Top-5 acc 51.172 (51.695)	lr 0.00932
Warmup Train [29][2050/3239]	Time 0.253 (0.235)	Data 0.001 (0.011)	Loss 4.2397 (4.1315)	Top-1 acc 26.562 (28.541)	Top-5 acc 49.219 (51.699)	lr 0.00932
Warmup Train [29][2060/3239]	Time 0.262 (0.235)	Data 0.001 (0.011)	Loss 4.0859 (4.1315)	Top-1 acc 27.344 (28.539)	Top-5 acc 50.000 (51.697)	lr 0.00931
Warmup Train [29][2070/3239]	Time 0.256 (0.235)	Data 0.001 (0.011)	Loss 4.0017 (4.1317)	Top-1 acc 30.078 (28.534)	Top-5 acc 51.562 (51.694)	lr 0.00931
Warmup Train [29][2080/3239]	Time 0.201 (0.235)	Data 0.001 (0.011)	Loss 3.9448 (4.1317)	Top-1 acc 35.547 (28.540)	Top-5 acc 56.250 (51.695)	lr 0.00930
Warmup Train [29][2090/3239]	Time 0.362 (0.235)	Data 0.001 (0.010)	Loss 4.3102 (4.1317)	Top-1 acc 24.219 (28.538)	Top-5 acc 47.656 (51.690)	lr 0.00930
Warmup Train [29][2100/3239]	Time 0.189 (0.235)	Data 0.002 (0.010)	Loss 4.1554 (4.1317)	Top-1 acc 26.562 (28.537)	Top-5 acc 53.125 (51.693)	lr 0.00930
Warmup Train [29][2110/3239]	Time 0.218 (0.235)	Data 0.002 (0.010)	Loss 4.0649 (4.1314)	Top-1 acc 28.125 (28.537)	Top-5 acc 53.125 (51.699)	lr 0.00929
Warmup Train [29][2120/3239]	Time 0.252 (0.235)	Data 0.001 (0.010)	Loss 4.1455 (4.1314)	Top-1 acc 26.953 (28.534)	Top-5 acc 49.219 (51.697)	lr 0.00929
Warmup Train [29][2130/3239]	Time 0.203 (0.235)	Data 0.001 (0.010)	Loss 4.0002 (4.1312)	Top-1 acc 29.297 (28.537)	Top-5 acc 53.125 (51.703)	lr 0.00928
Warmup Train [29][2140/3239]	Time 0.227 (0.235)	Data 0.001 (0.010)	Loss 4.0481 (4.1311)	Top-1 acc 27.734 (28.536)	Top-5 acc 51.562 (51.702)	lr 0.00928
Warmup Train [29][2150/3239]	Time 0.200 (0.235)	Data 0.001 (0.010)	Loss 4.0177 (4.1312)	Top-1 acc 31.641 (28.533)	Top-5 acc 58.203 (51.702)	lr 0.00927
Warmup Train [29][2160/3239]	Time 0.184 (0.235)	Data 0.001 (0.010)	Loss 4.2991 (4.1311)	Top-1 acc 25.781 (28.537)	Top-5 acc 48.047 (51.705)	lr 0.00927
Warmup Train [29][2170/3239]	Time 0.178 (0.235)	Data 0.001 (0.010)	Loss 4.2474 (4.1309)	Top-1 acc 23.828 (28.545)	Top-5 acc 47.266 (51.707)	lr 0.00926
Warmup Train [29][2180/3239]	Time 0.194 (0.235)	Data 0.001 (0.010)	Loss 4.1133 (4.1308)	Top-1 acc 26.953 (28.542)	Top-5 acc 54.297 (51.706)	lr 0.00926
Warmup Train [29][2190/3239]	Time 0.224 (0.235)	Data 0.001 (0.010)	Loss 4.2115 (4.1311)	Top-1 acc 27.344 (28.539)	Top-5 acc 49.219 (51.702)	lr 0.00925
Warmup Train [29][2200/3239]	Time 0.350 (0.235)	Data 0.002 (0.010)	Loss 4.0391 (4.1309)	Top-1 acc 32.031 (28.541)	Top-5 acc 56.250 (51.707)	lr 0.00925
Warmup Train [29][2210/3239]	Time 0.206 (0.235)	Data 0.001 (0.010)	Loss 4.0521 (4.1307)	Top-1 acc 30.078 (28.544)	Top-5 acc 52.344 (51.712)	lr 0.00924
Warmup Train [29][2220/3239]	Time 0.189 (0.234)	Data 0.002 (0.010)	Loss 4.1102 (4.1306)	Top-1 acc 27.734 (28.542)	Top-5 acc 53.125 (51.718)	lr 0.00924
Warmup Train [29][2230/3239]	Time 0.202 (0.234)	Data 0.001 (0.010)	Loss 4.2478 (4.1305)	Top-1 acc 27.734 (28.548)	Top-5 acc 51.562 (51.723)	lr 0.00923
Warmup Train [29][2240/3239]	Time 0.201 (0.234)	Data 0.001 (0.010)	Loss 4.2669 (4.1307)	Top-1 acc 22.266 (28.539)	Top-5 acc 48.438 (51.718)	lr 0.00923
Warmup Train [29][2250/3239]	Time 0.240 (0.234)	Data 0.002 (0.010)	Loss 4.0720 (4.1305)	Top-1 acc 32.031 (28.544)	Top-5 acc 53.906 (51.724)	lr 0.00922
Warmup Train [29][2260/3239]	Time 0.200 (0.234)	Data 0.001 (0.010)	Loss 4.0897 (4.1304)	Top-1 acc 27.344 (28.543)	Top-5 acc 51.562 (51.724)	lr 0.00922
Warmup Train [29][2270/3239]	Time 0.171 (0.234)	Data 0.001 (0.010)	Loss 3.9678 (4.1302)	Top-1 acc 28.125 (28.540)	Top-5 acc 54.688 (51.727)	lr 0.00922
Warmup Train [29][2280/3239]	Time 0.208 (0.234)	Data 0.001 (0.010)	Loss 4.1112 (4.1303)	Top-1 acc 29.297 (28.538)	Top-5 acc 54.688 (51.725)	lr 0.00921
Warmup Train [29][2290/3239]	Time 0.218 (0.234)	Data 0.001 (0.010)	Loss 4.1902 (4.1302)	Top-1 acc 30.859 (28.543)	Top-5 acc 53.906 (51.725)	lr 0.00921
Warmup Train [29][2300/3239]	Time 0.199 (0.234)	Data 0.001 (0.010)	Loss 4.0531 (4.1305)	Top-1 acc 30.078 (28.539)	Top-5 acc 52.344 (51.717)	lr 0.00920
Warmup Train [29][2310/3239]	Time 0.366 (0.234)	Data 0.001 (0.010)	Loss 4.0509 (4.1304)	Top-1 acc 28.516 (28.543)	Top-5 acc 53.125 (51.721)	lr 0.00920
Warmup Train [29][2320/3239]	Time 0.146 (0.234)	Data 0.001 (0.010)	Loss 4.2883 (4.1305)	Top-1 acc 25.391 (28.544)	Top-5 acc 48.047 (51.718)	lr 0.00919
Warmup Train [29][2330/3239]	Time 0.145 (0.234)	Data 0.001 (0.010)	Loss 4.0936 (4.1302)	Top-1 acc 27.734 (28.551)	Top-5 acc 48.828 (51.723)	lr 0.00919
Warmup Train [29][2340/3239]	Time 0.227 (0.234)	Data 0.001 (0.010)	Loss 4.2745 (4.1299)	Top-1 acc 23.438 (28.550)	Top-5 acc 45.312 (51.727)	lr 0.00918
Warmup Train [29][2350/3239]	Time 0.194 (0.234)	Data 0.001 (0.010)	Loss 4.3278 (4.1299)	Top-1 acc 28.125 (28.550)	Top-5 acc 49.609 (51.724)	lr 0.00918
Warmup Train [29][2360/3239]	Time 0.270 (0.234)	Data 0.001 (0.010)	Loss 4.1032 (4.1300)	Top-1 acc 28.125 (28.548)	Top-5 acc 51.562 (51.723)	lr 0.00917
Warmup Train [29][2370/3239]	Time 0.263 (0.234)	Data 0.002 (0.010)	Loss 4.0360 (4.1298)	Top-1 acc 32.422 (28.554)	Top-5 acc 51.953 (51.726)	lr 0.00917
Warmup Train [29][2380/3239]	Time 0.157 (0.234)	Data 0.001 (0.010)	Loss 4.0365 (4.1298)	Top-1 acc 32.812 (28.558)	Top-5 acc 53.516 (51.725)	lr 0.00916
Warmup Train [29][2390/3239]	Time 0.183 (0.234)	Data 0.001 (0.010)	Loss 4.2008 (4.1298)	Top-1 acc 26.953 (28.554)	Top-5 acc 48.047 (51.725)	lr 0.00916
Warmup Train [29][2400/3239]	Time 0.178 (0.234)	Data 0.001 (0.010)	Loss 4.0996 (4.1296)	Top-1 acc 28.906 (28.556)	Top-5 acc 49.219 (51.725)	lr 0.00915
Warmup Train [29][2410/3239]	Time 0.360 (0.234)	Data 0.001 (0.010)	Loss 3.8994 (4.1296)	Top-1 acc 35.156 (28.562)	Top-5 acc 56.250 (51.721)	lr 0.00915
Warmup Train [29][2420/3239]	Time 0.189 (0.234)	Data 0.002 (0.010)	Loss 4.0224 (4.1294)	Top-1 acc 33.594 (28.569)	Top-5 acc 55.078 (51.723)	lr 0.00914
Warmup Train [29][2430/3239]	Time 0.161 (0.234)	Data 0.001 (0.010)	Loss 4.1541 (4.1293)	Top-1 acc 27.344 (28.570)	Top-5 acc 47.266 (51.722)	lr 0.00914
Warmup Train [29][2440/3239]	Time 0.224 (0.234)	Data 0.001 (0.010)	Loss 4.0555 (4.1294)	Top-1 acc 29.688 (28.566)	Top-5 acc 52.344 (51.718)	lr 0.00914
Warmup Train [29][2450/3239]	Time 0.217 (0.234)	Data 0.001 (0.009)	Loss 4.0289 (4.1295)	Top-1 acc 33.984 (28.562)	Top-5 acc 55.469 (51.713)	lr 0.00913
Warmup Train [29][2460/3239]	Time 0.201 (0.234)	Data 0.001 (0.009)	Loss 4.1353 (4.1293)	Top-1 acc 29.688 (28.566)	Top-5 acc 49.219 (51.716)	lr 0.00913
Warmup Train [29][2470/3239]	Time 0.184 (0.233)	Data 0.001 (0.009)	Loss 4.2648 (4.1293)	Top-1 acc 26.562 (28.566)	Top-5 acc 52.734 (51.714)	lr 0.00912
Warmup Train [29][2480/3239]	Time 0.216 (0.233)	Data 0.002 (0.009)	Loss 4.1229 (4.1294)	Top-1 acc 32.031 (28.568)	Top-5 acc 55.469 (51.717)	lr 0.00912
Warmup Train [29][2490/3239]	Time 0.265 (0.234)	Data 0.002 (0.009)	Loss 4.2627 (4.1295)	Top-1 acc 28.516 (28.564)	Top-5 acc 48.047 (51.715)	lr 0.00911
Warmup Train [29][2500/3239]	Time 0.184 (0.233)	Data 0.001 (0.009)	Loss 4.0657 (4.1297)	Top-1 acc 30.078 (28.562)	Top-5 acc 51.562 (51.707)	lr 0.00911
Warmup Train [29][2510/3239]	Time 0.310 (0.233)	Data 0.002 (0.009)	Loss 4.0071 (4.1298)	Top-1 acc 34.766 (28.566)	Top-5 acc 56.641 (51.706)	lr 0.00910
Warmup Train [29][2520/3239]	Time 0.327 (0.233)	Data 0.001 (0.009)	Loss 4.2388 (4.1298)	Top-1 acc 25.781 (28.563)	Top-5 acc 45.703 (51.707)	lr 0.00910
Warmup Train [29][2530/3239]	Time 0.163 (0.233)	Data 0.001 (0.009)	Loss 4.0464 (4.1298)	Top-1 acc 29.297 (28.560)	Top-5 acc 50.781 (51.705)	lr 0.00909
Warmup Train [29][2540/3239]	Time 0.240 (0.233)	Data 0.001 (0.009)	Loss 3.9974 (4.1299)	Top-1 acc 34.766 (28.556)	Top-5 acc 56.250 (51.702)	lr 0.00909
Warmup Train [29][2550/3239]	Time 0.235 (0.233)	Data 0.001 (0.009)	Loss 4.1260 (4.1299)	Top-1 acc 27.734 (28.559)	Top-5 acc 53.125 (51.706)	lr 0.00908
Warmup Train [29][2560/3239]	Time 0.189 (0.233)	Data 0.001 (0.009)	Loss 4.0455 (4.1299)	Top-1 acc 31.250 (28.562)	Top-5 acc 50.000 (51.705)	lr 0.00908
Warmup Train [29][2570/3239]	Time 0.210 (0.233)	Data 0.001 (0.009)	Loss 4.3302 (4.1300)	Top-1 acc 25.000 (28.557)	Top-5 acc 49.609 (51.704)	lr 0.00907
Warmup Train [29][2580/3239]	Time 0.203 (0.233)	Data 0.002 (0.009)	Loss 4.0296 (4.1302)	Top-1 acc 35.938 (28.551)	Top-5 acc 56.250 (51.699)	lr 0.00907
Warmup Train [29][2590/3239]	Time 0.145 (0.233)	Data 0.001 (0.009)	Loss 4.0429 (4.1302)	Top-1 acc 30.078 (28.555)	Top-5 acc 52.734 (51.699)	lr 0.00907
Warmup Train [29][2600/3239]	Time 0.189 (0.233)	Data 0.001 (0.009)	Loss 4.2833 (4.1306)	Top-1 acc 23.828 (28.552)	Top-5 acc 48.047 (51.693)	lr 0.00906
Warmup Train [29][2610/3239]	Time 0.240 (0.233)	Data 0.002 (0.009)	Loss 3.9991 (4.1307)	Top-1 acc 28.906 (28.549)	Top-5 acc 55.078 (51.692)	lr 0.00906
Warmup Train [29][2620/3239]	Time 0.227 (0.233)	Data 0.001 (0.009)	Loss 4.1087 (4.1307)	Top-1 acc 27.734 (28.553)	Top-5 acc 53.125 (51.694)	lr 0.00905
Warmup Train [29][2630/3239]	Time 0.324 (0.233)	Data 0.001 (0.009)	Loss 4.1442 (4.1308)	Top-1 acc 29.297 (28.555)	Top-5 acc 48.828 (51.690)	lr 0.00905
Warmup Train [29][2640/3239]	Time 0.370 (0.233)	Data 0.001 (0.009)	Loss 4.0226 (4.1308)	Top-1 acc 27.344 (28.561)	Top-5 acc 56.250 (51.690)	lr 0.00904
Warmup Train [29][2650/3239]	Time 0.193 (0.233)	Data 0.001 (0.009)	Loss 4.0552 (4.1308)	Top-1 acc 30.469 (28.562)	Top-5 acc 53.125 (51.690)	lr 0.00904
Warmup Train [29][2660/3239]	Time 0.190 (0.233)	Data 0.001 (0.009)	Loss 4.0148 (4.1307)	Top-1 acc 31.641 (28.563)	Top-5 acc 52.734 (51.698)	lr 0.00903
Warmup Train [29][2670/3239]	Time 0.191 (0.233)	Data 0.001 (0.009)	Loss 4.0564 (4.1306)	Top-1 acc 30.078 (28.564)	Top-5 acc 53.125 (51.698)	lr 0.00903
Warmup Train [29][2680/3239]	Time 0.133 (0.233)	Data 0.001 (0.009)	Loss 4.0319 (4.1306)	Top-1 acc 32.031 (28.566)	Top-5 acc 49.609 (51.695)	lr 0.00902
Warmup Train [29][2690/3239]	Time 0.240 (0.233)	Data 0.001 (0.009)	Loss 3.9907 (4.1304)	Top-1 acc 27.734 (28.570)	Top-5 acc 55.469 (51.699)	lr 0.00902
Warmup Train [29][2700/3239]	Time 0.216 (0.233)	Data 0.001 (0.009)	Loss 3.9641 (4.1302)	Top-1 acc 31.250 (28.574)	Top-5 acc 57.031 (51.706)	lr 0.00901
Warmup Train [29][2710/3239]	Time 0.254 (0.233)	Data 0.001 (0.009)	Loss 3.8981 (4.1301)	Top-1 acc 34.766 (28.579)	Top-5 acc 57.812 (51.708)	lr 0.00901
Warmup Train [29][2720/3239]	Time 0.241 (0.233)	Data 0.001 (0.009)	Loss 4.0228 (4.1300)	Top-1 acc 33.594 (28.581)	Top-5 acc 58.984 (51.709)	lr 0.00900
Warmup Train [29][2730/3239]	Time 0.276 (0.233)	Data 0.001 (0.009)	Loss 4.2994 (4.1300)	Top-1 acc 28.125 (28.581)	Top-5 acc 50.000 (51.711)	lr 0.00900
Warmup Train [29][2740/3239]	Time 0.176 (0.233)	Data 0.001 (0.009)	Loss 4.2343 (4.1301)	Top-1 acc 28.125 (28.580)	Top-5 acc 50.000 (51.709)	lr 0.00900
Warmup Train [29][2750/3239]	Time 0.216 (0.233)	Data 0.001 (0.009)	Loss 3.9511 (4.1300)	Top-1 acc 30.859 (28.583)	Top-5 acc 51.172 (51.708)	lr 0.00899
Warmup Train [29][2760/3239]	Time 0.199 (0.233)	Data 0.001 (0.009)	Loss 4.1205 (4.1301)	Top-1 acc 33.984 (28.587)	Top-5 acc 54.297 (51.708)	lr 0.00899
Warmup Train [29][2770/3239]	Time 0.264 (0.233)	Data 0.002 (0.009)	Loss 4.1080 (4.1299)	Top-1 acc 28.906 (28.586)	Top-5 acc 52.734 (51.709)	lr 0.00898
Warmup Train [29][2780/3239]	Time 0.202 (0.233)	Data 0.002 (0.009)	Loss 4.1565 (4.1301)	Top-1 acc 31.641 (28.588)	Top-5 acc 49.219 (51.705)	lr 0.00898
Warmup Train [29][2790/3239]	Time 0.217 (0.233)	Data 0.001 (0.009)	Loss 4.1510 (4.1300)	Top-1 acc 30.859 (28.588)	Top-5 acc 51.562 (51.704)	lr 0.00897
Warmup Train [29][2800/3239]	Time 0.265 (0.233)	Data 0.001 (0.009)	Loss 4.1472 (4.1301)	Top-1 acc 26.562 (28.584)	Top-5 acc 53.125 (51.699)	lr 0.00897
Warmup Train [29][2810/3239]	Time 0.211 (0.233)	Data 0.001 (0.009)	Loss 4.1341 (4.1299)	Top-1 acc 28.125 (28.587)	Top-5 acc 54.297 (51.703)	lr 0.00896
Warmup Train [29][2820/3239]	Time 0.323 (0.233)	Data 0.001 (0.009)	Loss 4.0903 (4.1300)	Top-1 acc 28.516 (28.587)	Top-5 acc 48.047 (51.701)	lr 0.00896
Warmup Train [29][2830/3239]	Time 0.180 (0.233)	Data 0.001 (0.009)	Loss 4.2464 (4.1300)	Top-1 acc 27.344 (28.586)	Top-5 acc 48.828 (51.698)	lr 0.00895
Warmup Train [29][2840/3239]	Time 0.251 (0.233)	Data 0.001 (0.009)	Loss 3.9808 (4.1299)	Top-1 acc 33.203 (28.589)	Top-5 acc 54.688 (51.699)	lr 0.00895
Warmup Train [29][2850/3239]	Time 0.211 (0.232)	Data 0.001 (0.009)	Loss 4.1405 (4.1299)	Top-1 acc 30.078 (28.591)	Top-5 acc 53.906 (51.699)	lr 0.00894
Warmup Train [29][2860/3239]	Time 0.165 (0.232)	Data 0.001 (0.009)	Loss 4.1020 (4.1299)	Top-1 acc 32.422 (28.590)	Top-5 acc 53.906 (51.699)	lr 0.00894
Warmup Train [29][2870/3239]	Time 0.225 (0.232)	Data 0.003 (0.009)	Loss 4.1615 (4.1297)	Top-1 acc 28.906 (28.595)	Top-5 acc 48.828 (51.702)	lr 0.00893
Warmup Train [29][2880/3239]	Time 0.224 (0.232)	Data 0.001 (0.009)	Loss 4.1827 (4.1297)	Top-1 acc 28.906 (28.597)	Top-5 acc 51.953 (51.698)	lr 0.00893
Warmup Train [29][2890/3239]	Time 0.260 (0.232)	Data 0.001 (0.008)	Loss 4.1874 (4.1297)	Top-1 acc 30.469 (28.599)	Top-5 acc 48.438 (51.698)	lr 0.00893
Warmup Train [29][2900/3239]	Time 0.178 (0.232)	Data 0.001 (0.008)	Loss 4.1235 (4.1298)	Top-1 acc 27.734 (28.595)	Top-5 acc 50.391 (51.695)	lr 0.00892
Warmup Train [29][2910/3239]	Time 0.229 (0.232)	Data 0.002 (0.008)	Loss 4.2289 (4.1299)	Top-1 acc 24.609 (28.597)	Top-5 acc 48.828 (51.695)	lr 0.00892
Warmup Train [29][2920/3239]	Time 0.383 (0.232)	Data 0.001 (0.008)	Loss 3.9975 (4.1297)	Top-1 acc 32.812 (28.603)	Top-5 acc 55.859 (51.700)	lr 0.00891
Warmup Train [29][2930/3239]	Time 0.168 (0.232)	Data 0.001 (0.008)	Loss 3.9202 (4.1295)	Top-1 acc 32.812 (28.604)	Top-5 acc 56.250 (51.703)	lr 0.00891
Warmup Train [29][2940/3239]	Time 0.189 (0.232)	Data 0.001 (0.008)	Loss 4.4291 (4.1296)	Top-1 acc 26.172 (28.606)	Top-5 acc 46.484 (51.704)	lr 0.00890
Warmup Train [29][2950/3239]	Time 0.248 (0.232)	Data 0.001 (0.008)	Loss 4.1383 (4.1295)	Top-1 acc 27.734 (28.609)	Top-5 acc 48.047 (51.706)	lr 0.00890
Warmup Train [29][2960/3239]	Time 0.238 (0.232)	Data 0.001 (0.008)	Loss 4.0525 (4.1296)	Top-1 acc 32.031 (28.606)	Top-5 acc 53.125 (51.703)	lr 0.00889
Warmup Train [29][2970/3239]	Time 0.206 (0.232)	Data 0.001 (0.008)	Loss 4.1496 (4.1296)	Top-1 acc 31.250 (28.607)	Top-5 acc 48.047 (51.705)	lr 0.00889
Warmup Train [29][2980/3239]	Time 0.214 (0.232)	Data 0.001 (0.008)	Loss 3.9480 (4.1294)	Top-1 acc 30.469 (28.612)	Top-5 acc 53.906 (51.708)	lr 0.00888
Warmup Train [29][2990/3239]	Time 0.184 (0.232)	Data 0.001 (0.008)	Loss 4.1857 (4.1295)	Top-1 acc 27.734 (28.608)	Top-5 acc 52.344 (51.704)	lr 0.00888
Warmup Train [29][3000/3239]	Time 0.203 (0.232)	Data 0.001 (0.008)	Loss 4.0071 (4.1295)	Top-1 acc 29.688 (28.606)	Top-5 acc 55.078 (51.705)	lr 0.00887
Warmup Train [29][3010/3239]	Time 0.175 (0.232)	Data 0.001 (0.008)	Loss 4.2106 (4.1297)	Top-1 acc 25.000 (28.603)	Top-5 acc 51.172 (51.700)	lr 0.00887
Warmup Train [29][3020/3239]	Time 0.224 (0.232)	Data 0.001 (0.008)	Loss 4.3597 (4.1301)	Top-1 acc 24.219 (28.595)	Top-5 acc 48.438 (51.693)	lr 0.00887
Warmup Train [29][3030/3239]	Time 0.327 (0.232)	Data 0.002 (0.008)	Loss 4.2325 (4.1301)	Top-1 acc 23.828 (28.595)	Top-5 acc 48.828 (51.691)	lr 0.00886
Warmup Train [29][3040/3239]	Time 0.208 (0.232)	Data 0.002 (0.008)	Loss 4.1088 (4.1301)	Top-1 acc 29.688 (28.594)	Top-5 acc 53.906 (51.692)	lr 0.00886
Warmup Train [29][3050/3239]	Time 0.137 (0.232)	Data 0.001 (0.008)	Loss 4.3372 (4.1303)	Top-1 acc 24.219 (28.587)	Top-5 acc 46.875 (51.684)	lr 0.00885
Warmup Train [29][3060/3239]	Time 0.202 (0.232)	Data 0.001 (0.008)	Loss 3.9977 (4.1302)	Top-1 acc 30.859 (28.589)	Top-5 acc 56.250 (51.691)	lr 0.00885
Warmup Train [29][3070/3239]	Time 0.159 (0.232)	Data 0.001 (0.008)	Loss 4.2119 (4.1300)	Top-1 acc 26.172 (28.593)	Top-5 acc 48.438 (51.696)	lr 0.00884
Warmup Train [29][3080/3239]	Time 0.217 (0.232)	Data 0.001 (0.008)	Loss 4.2393 (4.1302)	Top-1 acc 28.906 (28.595)	Top-5 acc 50.781 (51.692)	lr 0.00884
Warmup Train [29][3090/3239]	Time 0.193 (0.232)	Data 0.001 (0.008)	Loss 4.2127 (4.1301)	Top-1 acc 28.125 (28.592)	Top-5 acc 49.219 (51.692)	lr 0.00883
Warmup Train [29][3100/3239]	Time 0.276 (0.232)	Data 0.001 (0.008)	Loss 4.1551 (4.1303)	Top-1 acc 26.953 (28.590)	Top-5 acc 52.734 (51.692)	lr 0.00883
Warmup Train [29][3110/3239]	Time 0.137 (0.232)	Data 0.001 (0.008)	Loss 4.0348 (4.1302)	Top-1 acc 31.250 (28.591)	Top-5 acc 54.688 (51.694)	lr 0.00882
Warmup Train [29][3120/3239]	Time 0.209 (0.232)	Data 0.001 (0.008)	Loss 4.0546 (4.1302)	Top-1 acc 30.078 (28.592)	Top-5 acc 50.391 (51.697)	lr 0.00882
Warmup Train [29][3130/3239]	Time 0.164 (0.232)	Data 0.001 (0.008)	Loss 4.0600 (4.1301)	Top-1 acc 30.859 (28.596)	Top-5 acc 50.000 (51.700)	lr 0.00881
Warmup Train [29][3140/3239]	Time 0.270 (0.232)	Data 0.001 (0.008)	Loss 3.8250 (4.1300)	Top-1 acc 33.594 (28.594)	Top-5 acc 57.422 (51.700)	lr 0.00881
Warmup Train [29][3150/3239]	Time 0.305 (0.232)	Data 0.001 (0.008)	Loss 4.4190 (4.1299)	Top-1 acc 24.609 (28.594)	Top-5 acc 48.828 (51.703)	lr 0.00881
Warmup Train [29][3160/3239]	Time 0.168 (0.232)	Data 0.001 (0.008)	Loss 4.1046 (4.1299)	Top-1 acc 28.125 (28.594)	Top-5 acc 55.469 (51.706)	lr 0.00880
Warmup Train [29][3170/3239]	Time 0.181 (0.232)	Data 0.001 (0.008)	Loss 4.2787 (4.1300)	Top-1 acc 25.391 (28.589)	Top-5 acc 48.438 (51.702)	lr 0.00880
Warmup Train [29][3180/3239]	Time 0.198 (0.232)	Data 0.000 (0.008)	Loss 4.3250 (4.1299)	Top-1 acc 26.953 (28.593)	Top-5 acc 49.219 (51.707)	lr 0.00879
Warmup Train [29][3190/3239]	Time 0.215 (0.232)	Data 0.000 (0.008)	Loss 4.1078 (4.1298)	Top-1 acc 30.078 (28.593)	Top-5 acc 53.516 (51.710)	lr 0.00879
Warmup Train [29][3200/3239]	Time 0.193 (0.232)	Data 0.000 (0.008)	Loss 4.2627 (4.1298)	Top-1 acc 24.609 (28.593)	Top-5 acc 48.438 (51.712)	lr 0.00878
Warmup Train [29][3210/3239]	Time 0.242 (0.232)	Data 0.000 (0.008)	Loss 4.3009 (4.1295)	Top-1 acc 24.609 (28.597)	Top-5 acc 48.828 (51.717)	lr 0.00878
Warmup Train [29][3220/3239]	Time 0.209 (0.231)	Data 0.000 (0.008)	Loss 4.1567 (4.1295)	Top-1 acc 28.516 (28.596)	Top-5 acc 48.438 (51.718)	lr 0.00877
Warmup Train [29][3230/3239]	Time 0.162 (0.231)	Data 0.000 (0.008)	Loss 4.1954 (4.1295)	Top-1 acc 25.391 (28.594)	Top-5 acc 51.172 (51.720)	lr 0.00877
Warmup Train [29][3239/3239]	Time 0.143 (0.231)	Data 0.000 (0.008)	Loss 4.0557 (4.1295)	Top-1 acc 32.099 (28.596)	Top-5 acc 51.852 (51.719)	lr 0.00876
==========Warmup Valid [29/40]	loss 3.062	top-1 acc 36.528	top-5 acc 61.217	Train top-1 28.596	top-5 51.719	flops: 442.4M
Warmup Train [30][0/3239]	Time 20.834 (20.834)	Data 19.402 (19.402)	Loss 4.3347 (4.3347)	Top-1 acc 26.172 (26.172)	Top-5 acc 47.266 (47.266)	lr 0.00876
Warmup Train [30][10/3239]	Time 0.263 (2.194)	Data 0.002 (1.766)	Loss 3.9258 (4.0746)	Top-1 acc 32.422 (30.185)	Top-5 acc 55.859 (51.918)	lr 0.00876
Warmup Train [30][20/3239]	Time 0.206 (1.260)	Data 0.002 (0.926)	Loss 4.2050 (4.1202)	Top-1 acc 24.219 (28.683)	Top-5 acc 51.172 (51.079)	lr 0.00875
Warmup Train [30][30/3239]	Time 0.286 (0.931)	Data 0.001 (0.628)	Loss 4.0194 (4.1045)	Top-1 acc 32.422 (29.234)	Top-5 acc 53.906 (51.903)	lr 0.00875
Warmup Train [30][40/3239]	Time 0.202 (0.758)	Data 0.001 (0.475)	Loss 3.8396 (4.0887)	Top-1 acc 31.250 (29.516)	Top-5 acc 61.328 (52.391)	lr 0.00875
Warmup Train [30][50/3239]	Time 0.336 (0.658)	Data 0.002 (0.382)	Loss 4.1272 (4.0912)	Top-1 acc 30.469 (29.274)	Top-5 acc 50.000 (52.436)	lr 0.00874
Warmup Train [30][60/3239]	Time 0.200 (0.587)	Data 0.003 (0.320)	Loss 3.9204 (4.0963)	Top-1 acc 33.203 (29.022)	Top-5 acc 57.031 (52.305)	lr 0.00874
Warmup Train [30][70/3239]	Time 0.194 (0.539)	Data 0.001 (0.276)	Loss 4.0245 (4.0907)	Top-1 acc 34.375 (29.275)	Top-5 acc 51.562 (52.404)	lr 0.00873
Warmup Train [30][80/3239]	Time 0.224 (0.502)	Data 0.002 (0.243)	Loss 4.0774 (4.0925)	Top-1 acc 30.078 (29.128)	Top-5 acc 51.172 (52.421)	lr 0.00873
Warmup Train [30][90/3239]	Time 0.180 (0.471)	Data 0.001 (0.216)	Loss 4.0825 (4.0938)	Top-1 acc 30.469 (29.069)	Top-5 acc 50.000 (52.498)	lr 0.00872
Warmup Train [30][100/3239]	Time 0.230 (0.446)	Data 0.001 (0.195)	Loss 4.0380 (4.0943)	Top-1 acc 31.641 (29.200)	Top-5 acc 55.859 (52.580)	lr 0.00872
Warmup Train [30][110/3239]	Time 0.210 (0.427)	Data 0.002 (0.178)	Loss 3.9588 (4.0894)	Top-1 acc 35.938 (29.230)	Top-5 acc 54.688 (52.678)	lr 0.00871
Warmup Train [30][120/3239]	Time 0.277 (0.412)	Data 0.001 (0.163)	Loss 4.3142 (4.0892)	Top-1 acc 27.344 (29.336)	Top-5 acc 47.266 (52.638)	lr 0.00871
Warmup Train [30][130/3239]	Time 0.289 (0.397)	Data 0.001 (0.151)	Loss 4.0638 (4.0890)	Top-1 acc 28.906 (29.336)	Top-5 acc 51.562 (52.603)	lr 0.00870
Warmup Train [30][140/3239]	Time 0.316 (0.386)	Data 0.002 (0.140)	Loss 3.9723 (4.0877)	Top-1 acc 30.469 (29.341)	Top-5 acc 56.641 (52.593)	lr 0.00870
Warmup Train [30][150/3239]	Time 0.178 (0.376)	Data 0.001 (0.131)	Loss 3.9941 (4.0905)	Top-1 acc 32.031 (29.227)	Top-5 acc 56.250 (52.608)	lr 0.00869
Warmup Train [30][160/3239]	Time 0.262 (0.368)	Data 0.002 (0.123)	Loss 4.2005 (4.0911)	Top-1 acc 25.781 (29.251)	Top-5 acc 51.172 (52.603)	lr 0.00869
Warmup Train [30][170/3239]	Time 0.165 (0.359)	Data 0.001 (0.116)	Loss 4.0972 (4.0925)	Top-1 acc 29.688 (29.180)	Top-5 acc 53.125 (52.606)	lr 0.00869
Warmup Train [30][180/3239]	Time 0.276 (0.352)	Data 0.001 (0.110)	Loss 4.1444 (4.0943)	Top-1 acc 29.688 (29.223)	Top-5 acc 54.688 (52.633)	lr 0.00868
Warmup Train [30][190/3239]	Time 0.205 (0.345)	Data 0.001 (0.104)	Loss 4.3012 (4.0976)	Top-1 acc 23.828 (29.172)	Top-5 acc 46.484 (52.587)	lr 0.00868
Warmup Train [30][200/3239]	Time 0.232 (0.339)	Data 0.001 (0.099)	Loss 4.2233 (4.0985)	Top-1 acc 25.000 (29.159)	Top-5 acc 51.953 (52.604)	lr 0.00867
Warmup Train [30][210/3239]	Time 0.272 (0.334)	Data 0.001 (0.095)	Loss 3.9098 (4.0995)	Top-1 acc 34.375 (29.152)	Top-5 acc 56.641 (52.590)	lr 0.00867
Warmup Train [30][220/3239]	Time 0.212 (0.328)	Data 0.001 (0.091)	Loss 4.1018 (4.0997)	Top-1 acc 29.688 (29.140)	Top-5 acc 54.688 (52.593)	lr 0.00866
Warmup Train [30][230/3239]	Time 0.230 (0.324)	Data 0.003 (0.087)	Loss 4.0019 (4.0995)	Top-1 acc 32.422 (29.157)	Top-5 acc 54.297 (52.540)	lr 0.00866
Warmup Train [30][240/3239]	Time 0.180 (0.319)	Data 0.001 (0.083)	Loss 4.1781 (4.0999)	Top-1 acc 28.125 (29.167)	Top-5 acc 50.391 (52.559)	lr 0.00865
Warmup Train [30][250/3239]	Time 0.327 (0.316)	Data 0.001 (0.080)	Loss 4.0478 (4.0986)	Top-1 acc 32.422 (29.193)	Top-5 acc 53.906 (52.585)	lr 0.00865
Warmup Train [30][260/3239]	Time 0.306 (0.312)	Data 0.001 (0.077)	Loss 4.1405 (4.0996)	Top-1 acc 27.734 (29.179)	Top-5 acc 48.828 (52.535)	lr 0.00864
Warmup Train [30][270/3239]	Time 0.275 (0.309)	Data 0.002 (0.074)	Loss 4.2132 (4.1001)	Top-1 acc 28.125 (29.161)	Top-5 acc 50.781 (52.540)	lr 0.00864
Warmup Train [30][280/3239]	Time 0.239 (0.306)	Data 0.001 (0.072)	Loss 4.0003 (4.0989)	Top-1 acc 33.984 (29.179)	Top-5 acc 53.906 (52.597)	lr 0.00864
Warmup Train [30][290/3239]	Time 0.231 (0.303)	Data 0.001 (0.069)	Loss 4.0169 (4.0983)	Top-1 acc 30.469 (29.171)	Top-5 acc 55.469 (52.579)	lr 0.00863
Warmup Train [30][300/3239]	Time 0.180 (0.300)	Data 0.001 (0.067)	Loss 4.0911 (4.0981)	Top-1 acc 30.859 (29.209)	Top-5 acc 55.859 (52.584)	lr 0.00863
Warmup Train [30][310/3239]	Time 0.217 (0.298)	Data 0.001 (0.065)	Loss 4.1162 (4.0989)	Top-1 acc 30.078 (29.195)	Top-5 acc 50.781 (52.564)	lr 0.00862
Warmup Train [30][320/3239]	Time 0.195 (0.295)	Data 0.001 (0.063)	Loss 4.0732 (4.0996)	Top-1 acc 25.391 (29.176)	Top-5 acc 52.344 (52.531)	lr 0.00862
Warmup Train [30][330/3239]	Time 0.207 (0.293)	Data 0.001 (0.061)	Loss 3.9645 (4.1008)	Top-1 acc 28.516 (29.127)	Top-5 acc 56.250 (52.492)	lr 0.00861
Warmup Train [30][340/3239]	Time 0.224 (0.291)	Data 0.001 (0.059)	Loss 4.0508 (4.1000)	Top-1 acc 32.812 (29.145)	Top-5 acc 51.953 (52.485)	lr 0.00861
Warmup Train [30][350/3239]	Time 0.354 (0.289)	Data 0.001 (0.058)	Loss 4.0905 (4.0984)	Top-1 acc 29.688 (29.133)	Top-5 acc 51.562 (52.513)	lr 0.00860
Warmup Train [30][360/3239]	Time 0.236 (0.288)	Data 0.001 (0.056)	Loss 3.9473 (4.0973)	Top-1 acc 32.031 (29.151)	Top-5 acc 56.641 (52.552)	lr 0.00860
Warmup Train [30][370/3239]	Time 0.161 (0.286)	Data 0.001 (0.055)	Loss 4.1323 (4.0989)	Top-1 acc 26.562 (29.137)	Top-5 acc 50.000 (52.510)	lr 0.00859
Warmup Train [30][380/3239]	Time 0.189 (0.284)	Data 0.001 (0.053)	Loss 4.1332 (4.0994)	Top-1 acc 28.906 (29.140)	Top-5 acc 51.172 (52.499)	lr 0.00859
Warmup Train [30][390/3239]	Time 0.215 (0.283)	Data 0.001 (0.052)	Loss 4.0219 (4.0989)	Top-1 acc 28.125 (29.153)	Top-5 acc 54.688 (52.502)	lr 0.00858
Warmup Train [30][400/3239]	Time 0.154 (0.281)	Data 0.001 (0.051)	Loss 3.9650 (4.0967)	Top-1 acc 30.859 (29.171)	Top-5 acc 55.078 (52.545)	lr 0.00858
Warmup Train [30][410/3239]	Time 0.153 (0.280)	Data 0.001 (0.050)	Loss 4.1141 (4.0988)	Top-1 acc 25.781 (29.130)	Top-5 acc 51.562 (52.505)	lr 0.00858
Warmup Train [30][420/3239]	Time 0.204 (0.278)	Data 0.001 (0.049)	Loss 4.0317 (4.0974)	Top-1 acc 28.125 (29.152)	Top-5 acc 53.516 (52.541)	lr 0.00857
Warmup Train [30][430/3239]	Time 0.219 (0.277)	Data 0.002 (0.047)	Loss 3.9673 (4.0964)	Top-1 acc 30.469 (29.162)	Top-5 acc 54.297 (52.545)	lr 0.00857
Warmup Train [30][440/3239]	Time 0.154 (0.276)	Data 0.001 (0.046)	Loss 4.3397 (4.0968)	Top-1 acc 28.516 (29.202)	Top-5 acc 48.438 (52.554)	lr 0.00856
Warmup Train [30][450/3239]	Time 0.306 (0.275)	Data 0.001 (0.046)	Loss 4.2489 (4.0972)	Top-1 acc 28.125 (29.225)	Top-5 acc 47.656 (52.567)	lr 0.00856
Warmup Train [30][460/3239]	Time 0.170 (0.274)	Data 0.038 (0.045)	Loss 4.1502 (4.0961)	Top-1 acc 24.219 (29.228)	Top-5 acc 49.219 (52.579)	lr 0.00855
Warmup Train [30][470/3239]	Time 0.208 (0.273)	Data 0.001 (0.044)	Loss 4.1991 (4.0956)	Top-1 acc 22.656 (29.235)	Top-5 acc 46.875 (52.591)	lr 0.00855
Warmup Train [30][480/3239]	Time 0.231 (0.272)	Data 0.002 (0.043)	Loss 4.2162 (4.0952)	Top-1 acc 24.609 (29.229)	Top-5 acc 48.438 (52.579)	lr 0.00854
Warmup Train [30][490/3239]	Time 0.188 (0.271)	Data 0.001 (0.042)	Loss 4.0227 (4.0953)	Top-1 acc 29.688 (29.236)	Top-5 acc 52.344 (52.575)	lr 0.00854
Warmup Train [30][500/3239]	Time 0.252 (0.270)	Data 0.001 (0.042)	Loss 4.0773 (4.0948)	Top-1 acc 28.906 (29.245)	Top-5 acc 50.781 (52.583)	lr 0.00853
Warmup Train [30][510/3239]	Time 0.246 (0.269)	Data 0.001 (0.041)	Loss 3.9851 (4.0948)	Top-1 acc 31.250 (29.254)	Top-5 acc 55.078 (52.591)	lr 0.00853
Warmup Train [30][520/3239]	Time 0.204 (0.268)	Data 0.002 (0.040)	Loss 4.0783 (4.0945)	Top-1 acc 25.391 (29.254)	Top-5 acc 51.562 (52.598)	lr 0.00853
Warmup Train [30][530/3239]	Time 0.229 (0.267)	Data 0.002 (0.039)	Loss 4.0925 (4.0951)	Top-1 acc 26.953 (29.234)	Top-5 acc 51.172 (52.588)	lr 0.00852
Warmup Train [30][540/3239]	Time 0.237 (0.266)	Data 0.001 (0.039)	Loss 3.7653 (4.0959)	Top-1 acc 35.156 (29.223)	Top-5 acc 60.547 (52.563)	lr 0.00852
Warmup Train [30][550/3239]	Time 0.173 (0.265)	Data 0.001 (0.038)	Loss 4.2118 (4.0965)	Top-1 acc 26.562 (29.227)	Top-5 acc 50.000 (52.547)	lr 0.00851
Warmup Train [30][560/3239]	Time 0.317 (0.265)	Data 0.001 (0.037)	Loss 4.0733 (4.0958)	Top-1 acc 31.250 (29.235)	Top-5 acc 52.344 (52.553)	lr 0.00851
Warmup Train [30][570/3239]	Time 0.217 (0.264)	Data 0.001 (0.037)	Loss 4.1848 (4.0960)	Top-1 acc 26.172 (29.219)	Top-5 acc 48.828 (52.542)	lr 0.00850
Warmup Train [30][580/3239]	Time 0.154 (0.263)	Data 0.001 (0.036)	Loss 3.9751 (4.0962)	Top-1 acc 29.297 (29.203)	Top-5 acc 57.031 (52.539)	lr 0.00850
Warmup Train [30][590/3239]	Time 0.165 (0.263)	Data 0.001 (0.036)	Loss 4.1992 (4.0963)	Top-1 acc 28.125 (29.194)	Top-5 acc 50.000 (52.536)	lr 0.00849
Warmup Train [30][600/3239]	Time 0.273 (0.262)	Data 0.001 (0.035)	Loss 3.9348 (4.0955)	Top-1 acc 30.469 (29.194)	Top-5 acc 56.250 (52.557)	lr 0.00849
Warmup Train [30][610/3239]	Time 0.161 (0.261)	Data 0.001 (0.035)	Loss 4.0125 (4.0956)	Top-1 acc 31.250 (29.168)	Top-5 acc 55.469 (52.547)	lr 0.00848
Warmup Train [30][620/3239]	Time 0.292 (0.261)	Data 0.001 (0.034)	Loss 4.0318 (4.0957)	Top-1 acc 29.688 (29.175)	Top-5 acc 53.125 (52.548)	lr 0.00848
Warmup Train [30][630/3239]	Time 0.183 (0.260)	Data 0.001 (0.034)	Loss 4.0892 (4.0969)	Top-1 acc 26.562 (29.146)	Top-5 acc 51.953 (52.515)	lr 0.00848
Warmup Train [30][640/3239]	Time 0.153 (0.259)	Data 0.001 (0.033)	Loss 4.2606 (4.0977)	Top-1 acc 24.219 (29.120)	Top-5 acc 46.094 (52.495)	lr 0.00847
Warmup Train [30][650/3239]	Time 0.227 (0.259)	Data 0.001 (0.033)	Loss 4.2348 (4.0967)	Top-1 acc 25.781 (29.146)	Top-5 acc 48.828 (52.525)	lr 0.00847
Warmup Train [30][660/3239]	Time 0.270 (0.258)	Data 0.001 (0.032)	Loss 4.1014 (4.0978)	Top-1 acc 28.906 (29.133)	Top-5 acc 50.781 (52.479)	lr 0.00846
Warmup Train [30][670/3239]	Time 0.226 (0.258)	Data 0.001 (0.032)	Loss 4.0563 (4.0984)	Top-1 acc 31.641 (29.132)	Top-5 acc 53.516 (52.469)	lr 0.00846
Warmup Train [30][680/3239]	Time 0.217 (0.257)	Data 0.001 (0.031)	Loss 4.1247 (4.0988)	Top-1 acc 28.516 (29.134)	Top-5 acc 53.125 (52.467)	lr 0.00845
Warmup Train [30][690/3239]	Time 0.219 (0.257)	Data 0.002 (0.031)	Loss 3.9859 (4.0977)	Top-1 acc 30.469 (29.162)	Top-5 acc 57.031 (52.495)	lr 0.00845
Warmup Train [30][700/3239]	Time 0.205 (0.256)	Data 0.002 (0.031)	Loss 4.1271 (4.0975)	Top-1 acc 26.953 (29.166)	Top-5 acc 51.172 (52.501)	lr 0.00844
Warmup Train [30][710/3239]	Time 0.141 (0.256)	Data 0.001 (0.030)	Loss 4.1491 (4.0974)	Top-1 acc 26.562 (29.160)	Top-5 acc 50.781 (52.496)	lr 0.00844
Warmup Train [30][720/3239]	Time 0.239 (0.255)	Data 0.001 (0.030)	Loss 4.2809 (4.0972)	Top-1 acc 26.562 (29.160)	Top-5 acc 47.266 (52.502)	lr 0.00843
Warmup Train [30][730/3239]	Time 0.271 (0.255)	Data 0.001 (0.030)	Loss 3.9701 (4.0980)	Top-1 acc 32.422 (29.167)	Top-5 acc 55.078 (52.484)	lr 0.00843
Warmup Train [30][740/3239]	Time 0.232 (0.254)	Data 0.002 (0.029)	Loss 4.3312 (4.0979)	Top-1 acc 29.297 (29.180)	Top-5 acc 49.219 (52.490)	lr 0.00843
Warmup Train [30][750/3239]	Time 0.197 (0.254)	Data 0.001 (0.029)	Loss 4.2155 (4.0978)	Top-1 acc 25.391 (29.183)	Top-5 acc 50.391 (52.485)	lr 0.00842
Warmup Train [30][760/3239]	Time 0.341 (0.254)	Data 0.002 (0.028)	Loss 4.1010 (4.0982)	Top-1 acc 28.906 (29.174)	Top-5 acc 50.391 (52.470)	lr 0.00842
Warmup Train [30][770/3239]	Time 0.218 (0.253)	Data 0.001 (0.028)	Loss 4.2593 (4.0981)	Top-1 acc 29.297 (29.187)	Top-5 acc 49.609 (52.465)	lr 0.00841
Warmup Train [30][780/3239]	Time 0.213 (0.253)	Data 0.001 (0.028)	Loss 4.0832 (4.0976)	Top-1 acc 27.734 (29.186)	Top-5 acc 51.953 (52.483)	lr 0.00841
Warmup Train [30][790/3239]	Time 0.150 (0.252)	Data 0.001 (0.028)	Loss 4.1150 (4.0978)	Top-1 acc 26.172 (29.180)	Top-5 acc 54.688 (52.485)	lr 0.00840
Warmup Train [30][800/3239]	Time 0.205 (0.252)	Data 0.001 (0.027)	Loss 4.0244 (4.0983)	Top-1 acc 25.781 (29.158)	Top-5 acc 50.000 (52.472)	lr 0.00840
Warmup Train [30][810/3239]	Time 0.209 (0.252)	Data 0.001 (0.027)	Loss 4.0131 (4.0980)	Top-1 acc 28.906 (29.180)	Top-5 acc 51.172 (52.472)	lr 0.00839
Warmup Train [30][820/3239]	Time 0.165 (0.251)	Data 0.001 (0.027)	Loss 4.0720 (4.0988)	Top-1 acc 29.688 (29.172)	Top-5 acc 51.172 (52.451)	lr 0.00839
Warmup Train [30][830/3239]	Time 0.212 (0.251)	Data 0.002 (0.026)	Loss 4.0932 (4.0989)	Top-1 acc 28.906 (29.171)	Top-5 acc 53.516 (52.450)	lr 0.00838
Warmup Train [30][840/3239]	Time 0.183 (0.251)	Data 0.001 (0.026)	Loss 4.1583 (4.0982)	Top-1 acc 24.609 (29.171)	Top-5 acc 50.391 (52.471)	lr 0.00838
Warmup Train [30][850/3239]	Time 0.262 (0.250)	Data 0.002 (0.026)	Loss 4.1938 (4.0981)	Top-1 acc 25.391 (29.163)	Top-5 acc 50.000 (52.470)	lr 0.00838
Warmup Train [30][860/3239]	Time 0.181 (0.250)	Data 0.001 (0.026)	Loss 3.9977 (4.0980)	Top-1 acc 31.641 (29.176)	Top-5 acc 55.859 (52.479)	lr 0.00837
Warmup Train [30][870/3239]	Time 0.190 (0.250)	Data 0.001 (0.025)	Loss 3.9465 (4.0974)	Top-1 acc 30.469 (29.173)	Top-5 acc 52.734 (52.492)	lr 0.00837
Warmup Train [30][880/3239]	Time 0.336 (0.249)	Data 0.001 (0.025)	Loss 3.8693 (4.0980)	Top-1 acc 32.422 (29.160)	Top-5 acc 58.984 (52.483)	lr 0.00836
Warmup Train [30][890/3239]	Time 0.175 (0.249)	Data 0.001 (0.025)	Loss 4.2024 (4.0979)	Top-1 acc 24.609 (29.161)	Top-5 acc 51.953 (52.488)	lr 0.00836
Warmup Train [30][900/3239]	Time 0.201 (0.249)	Data 0.001 (0.025)	Loss 4.1296 (4.0980)	Top-1 acc 26.562 (29.145)	Top-5 acc 51.953 (52.485)	lr 0.00835
Warmup Train [30][910/3239]	Time 0.218 (0.249)	Data 0.001 (0.024)	Loss 4.0820 (4.0979)	Top-1 acc 29.688 (29.149)	Top-5 acc 49.219 (52.481)	lr 0.00835
Warmup Train [30][920/3239]	Time 0.171 (0.248)	Data 0.001 (0.024)	Loss 4.0378 (4.0978)	Top-1 acc 27.344 (29.150)	Top-5 acc 53.125 (52.477)	lr 0.00834
Warmup Train [30][930/3239]	Time 0.211 (0.248)	Data 0.001 (0.024)	Loss 4.1799 (4.0984)	Top-1 acc 27.734 (29.141)	Top-5 acc 51.953 (52.458)	lr 0.00834
Warmup Train [30][940/3239]	Time 0.218 (0.248)	Data 0.002 (0.024)	Loss 4.2209 (4.0983)	Top-1 acc 26.172 (29.147)	Top-5 acc 50.781 (52.455)	lr 0.00833
Warmup Train [30][950/3239]	Time 0.233 (0.248)	Data 0.002 (0.023)	Loss 4.2367 (4.0987)	Top-1 acc 24.219 (29.131)	Top-5 acc 50.391 (52.444)	lr 0.00833
Warmup Train [30][960/3239]	Time 0.234 (0.247)	Data 0.003 (0.023)	Loss 4.0260 (4.0981)	Top-1 acc 29.688 (29.143)	Top-5 acc 51.562 (52.464)	lr 0.00833
Warmup Train [30][970/3239]	Time 0.202 (0.247)	Data 0.001 (0.023)	Loss 4.0168 (4.0982)	Top-1 acc 31.250 (29.144)	Top-5 acc 55.859 (52.469)	lr 0.00832
Warmup Train [30][980/3239]	Time 0.304 (0.247)	Data 0.001 (0.023)	Loss 4.3290 (4.0982)	Top-1 acc 25.781 (29.140)	Top-5 acc 48.828 (52.468)	lr 0.00832
Warmup Train [30][990/3239]	Time 0.264 (0.247)	Data 0.002 (0.022)	Loss 4.1337 (4.0978)	Top-1 acc 28.125 (29.152)	Top-5 acc 48.828 (52.474)	lr 0.00831
Warmup Train [30][1000/3239]	Time 0.169 (0.246)	Data 0.001 (0.022)	Loss 4.2839 (4.0986)	Top-1 acc 25.781 (29.133)	Top-5 acc 48.047 (52.456)	lr 0.00831
Warmup Train [30][1010/3239]	Time 0.272 (0.246)	Data 0.002 (0.022)	Loss 4.0907 (4.0989)	Top-1 acc 31.641 (29.128)	Top-5 acc 52.344 (52.448)	lr 0.00830
Warmup Train [30][1020/3239]	Time 0.227 (0.246)	Data 0.002 (0.022)	Loss 4.2121 (4.0988)	Top-1 acc 26.172 (29.125)	Top-5 acc 48.047 (52.449)	lr 0.00830
Warmup Train [30][1030/3239]	Time 0.203 (0.246)	Data 0.002 (0.022)	Loss 4.0435 (4.0987)	Top-1 acc 31.641 (29.129)	Top-5 acc 58.203 (52.454)	lr 0.00829
Warmup Train [30][1040/3239]	Time 0.228 (0.246)	Data 0.002 (0.022)	Loss 4.1283 (4.0984)	Top-1 acc 30.078 (29.147)	Top-5 acc 53.906 (52.463)	lr 0.00829
Warmup Train [30][1050/3239]	Time 0.150 (0.245)	Data 0.001 (0.021)	Loss 4.0941 (4.0985)	Top-1 acc 30.859 (29.146)	Top-5 acc 51.172 (52.468)	lr 0.00829
Warmup Train [30][1060/3239]	Time 0.217 (0.245)	Data 0.001 (0.021)	Loss 3.9419 (4.0981)	Top-1 acc 33.984 (29.157)	Top-5 acc 55.469 (52.483)	lr 0.00828
Warmup Train [30][1070/3239]	Time 0.230 (0.245)	Data 0.001 (0.021)	Loss 4.2880 (4.0981)	Top-1 acc 24.219 (29.160)	Top-5 acc 49.609 (52.486)	lr 0.00828
Warmup Train [30][1080/3239]	Time 0.272 (0.245)	Data 0.001 (0.021)	Loss 4.1921 (4.0984)	Top-1 acc 29.297 (29.165)	Top-5 acc 52.734 (52.475)	lr 0.00827
Warmup Train [30][1090/3239]	Time 0.282 (0.245)	Data 0.001 (0.021)	Loss 3.9187 (4.0984)	Top-1 acc 33.594 (29.171)	Top-5 acc 57.031 (52.477)	lr 0.00827
Warmup Train [30][1100/3239]	Time 0.127 (0.244)	Data 0.001 (0.021)	Loss 4.3455 (4.0988)	Top-1 acc 26.172 (29.165)	Top-5 acc 46.484 (52.473)	lr 0.00826
Warmup Train [30][1110/3239]	Time 0.166 (0.244)	Data 0.002 (0.020)	Loss 4.1294 (4.0988)	Top-1 acc 32.422 (29.169)	Top-5 acc 51.562 (52.472)	lr 0.00826
Warmup Train [30][1120/3239]	Time 0.252 (0.244)	Data 0.001 (0.020)	Loss 4.1922 (4.0992)	Top-1 acc 28.516 (29.168)	Top-5 acc 51.562 (52.465)	lr 0.00825
Warmup Train [30][1130/3239]	Time 0.180 (0.244)	Data 0.002 (0.020)	Loss 4.1069 (4.0991)	Top-1 acc 31.250 (29.165)	Top-5 acc 51.953 (52.468)	lr 0.00825
Warmup Train [30][1140/3239]	Time 0.186 (0.244)	Data 0.001 (0.020)	Loss 3.8605 (4.0990)	Top-1 acc 32.422 (29.158)	Top-5 acc 55.859 (52.463)	lr 0.00824
Warmup Train [30][1150/3239]	Time 0.232 (0.244)	Data 0.002 (0.020)	Loss 4.0789 (4.0990)	Top-1 acc 32.031 (29.149)	Top-5 acc 52.734 (52.458)	lr 0.00824
Warmup Train [30][1160/3239]	Time 0.225 (0.243)	Data 0.001 (0.020)	Loss 4.0742 (4.0994)	Top-1 acc 31.250 (29.148)	Top-5 acc 53.125 (52.455)	lr 0.00824
Warmup Train [30][1170/3239]	Time 0.239 (0.243)	Data 0.001 (0.019)	Loss 4.1294 (4.0995)	Top-1 acc 30.469 (29.156)	Top-5 acc 53.906 (52.454)	lr 0.00823
Warmup Train [30][1180/3239]	Time 0.331 (0.243)	Data 0.001 (0.019)	Loss 4.0472 (4.0992)	Top-1 acc 26.953 (29.162)	Top-5 acc 51.172 (52.454)	lr 0.00823
Warmup Train [30][1190/3239]	Time 0.225 (0.243)	Data 0.001 (0.019)	Loss 4.0482 (4.0989)	Top-1 acc 26.562 (29.161)	Top-5 acc 55.078 (52.459)	lr 0.00822
Warmup Train [30][1200/3239]	Time 0.242 (0.243)	Data 0.001 (0.019)	Loss 3.9574 (4.0990)	Top-1 acc 32.031 (29.155)	Top-5 acc 55.078 (52.456)	lr 0.00822
Warmup Train [30][1210/3239]	Time 0.167 (0.243)	Data 0.001 (0.019)	Loss 4.2088 (4.0992)	Top-1 acc 28.906 (29.153)	Top-5 acc 49.609 (52.448)	lr 0.00821
Warmup Train [30][1220/3239]	Time 0.247 (0.243)	Data 0.001 (0.019)	Loss 4.1496 (4.0989)	Top-1 acc 30.859 (29.164)	Top-5 acc 48.828 (52.462)	lr 0.00821
Warmup Train [30][1230/3239]	Time 0.264 (0.242)	Data 0.001 (0.019)	Loss 4.1694 (4.0980)	Top-1 acc 25.391 (29.188)	Top-5 acc 48.828 (52.484)	lr 0.00820
Warmup Train [30][1240/3239]	Time 0.198 (0.242)	Data 0.002 (0.019)	Loss 3.9505 (4.0976)	Top-1 acc 28.906 (29.199)	Top-5 acc 55.469 (52.487)	lr 0.00820
Warmup Train [30][1250/3239]	Time 0.212 (0.242)	Data 0.002 (0.018)	Loss 4.0782 (4.0973)	Top-1 acc 29.688 (29.209)	Top-5 acc 53.906 (52.496)	lr 0.00820
Warmup Train [30][1260/3239]	Time 0.208 (0.242)	Data 0.001 (0.018)	Loss 4.0416 (4.0973)	Top-1 acc 28.906 (29.211)	Top-5 acc 53.906 (52.496)	lr 0.00819
Warmup Train [30][1270/3239]	Time 0.154 (0.242)	Data 0.002 (0.018)	Loss 4.3229 (4.0980)	Top-1 acc 23.438 (29.200)	Top-5 acc 46.094 (52.485)	lr 0.00819
Warmup Train [30][1280/3239]	Time 0.290 (0.242)	Data 0.001 (0.018)	Loss 4.0828 (4.0979)	Top-1 acc 26.953 (29.200)	Top-5 acc 53.906 (52.493)	lr 0.00818
Warmup Train [30][1290/3239]	Time 0.179 (0.242)	Data 0.001 (0.018)	Loss 3.9639 (4.0976)	Top-1 acc 31.641 (29.206)	Top-5 acc 55.859 (52.498)	lr 0.00818
Warmup Train [30][1300/3239]	Time 0.168 (0.241)	Data 0.001 (0.018)	Loss 4.0879 (4.0978)	Top-1 acc 30.469 (29.200)	Top-5 acc 51.953 (52.490)	lr 0.00817
Warmup Train [30][1310/3239]	Time 0.236 (0.241)	Data 0.002 (0.018)	Loss 4.0950 (4.0984)	Top-1 acc 25.391 (29.191)	Top-5 acc 52.734 (52.474)	lr 0.00817
Warmup Train [30][1320/3239]	Time 0.249 (0.241)	Data 0.002 (0.018)	Loss 4.2524 (4.0989)	Top-1 acc 26.953 (29.187)	Top-5 acc 51.172 (52.464)	lr 0.00816
Warmup Train [30][1330/3239]	Time 0.199 (0.241)	Data 0.001 (0.018)	Loss 3.8681 (4.0981)	Top-1 acc 37.891 (29.192)	Top-5 acc 57.812 (52.478)	lr 0.00816
Warmup Train [30][1340/3239]	Time 0.185 (0.241)	Data 0.001 (0.017)	Loss 3.9657 (4.0981)	Top-1 acc 34.375 (29.196)	Top-5 acc 57.422 (52.472)	lr 0.00815
Warmup Train [30][1350/3239]	Time 0.207 (0.240)	Data 0.001 (0.017)	Loss 4.0246 (4.0983)	Top-1 acc 30.469 (29.192)	Top-5 acc 57.422 (52.465)	lr 0.00815
Warmup Train [30][1360/3239]	Time 0.274 (0.240)	Data 0.002 (0.017)	Loss 4.0210 (4.0986)	Top-1 acc 33.594 (29.197)	Top-5 acc 50.781 (52.451)	lr 0.00815
Warmup Train [30][1370/3239]	Time 0.201 (0.240)	Data 0.001 (0.017)	Loss 4.3924 (4.0988)	Top-1 acc 23.828 (29.191)	Top-5 acc 46.094 (52.445)	lr 0.00814
Warmup Train [30][1380/3239]	Time 0.266 (0.240)	Data 0.002 (0.017)	Loss 4.1686 (4.0986)	Top-1 acc 29.688 (29.196)	Top-5 acc 53.125 (52.450)	lr 0.00814
Warmup Train [30][1390/3239]	Time 0.237 (0.240)	Data 0.001 (0.017)	Loss 3.8936 (4.0985)	Top-1 acc 30.078 (29.197)	Top-5 acc 54.297 (52.449)	lr 0.00813
Warmup Train [30][1400/3239]	Time 0.142 (0.240)	Data 0.001 (0.017)	Loss 4.0127 (4.0984)	Top-1 acc 30.469 (29.191)	Top-5 acc 51.172 (52.444)	lr 0.00813
Warmup Train [30][1410/3239]	Time 0.286 (0.240)	Data 0.001 (0.017)	Loss 4.1197 (4.0983)	Top-1 acc 26.562 (29.195)	Top-5 acc 52.734 (52.446)	lr 0.00812
Warmup Train [30][1420/3239]	Time 0.224 (0.239)	Data 0.001 (0.017)	Loss 4.2385 (4.0988)	Top-1 acc 28.125 (29.186)	Top-5 acc 49.219 (52.427)	lr 0.00812
Warmup Train [30][1430/3239]	Time 0.284 (0.239)	Data 0.002 (0.016)	Loss 4.0115 (4.0986)	Top-1 acc 30.469 (29.189)	Top-5 acc 54.688 (52.431)	lr 0.00811
Warmup Train [30][1440/3239]	Time 0.265 (0.239)	Data 0.002 (0.016)	Loss 4.0957 (4.0987)	Top-1 acc 30.078 (29.194)	Top-5 acc 50.781 (52.428)	lr 0.00811
Warmup Train [30][1450/3239]	Time 0.242 (0.239)	Data 0.001 (0.016)	Loss 4.2158 (4.0989)	Top-1 acc 26.562 (29.189)	Top-5 acc 48.828 (52.423)	lr 0.00811
Warmup Train [30][1460/3239]	Time 0.261 (0.239)	Data 0.001 (0.016)	Loss 4.0343 (4.0995)	Top-1 acc 30.078 (29.173)	Top-5 acc 55.469 (52.412)	lr 0.00810
Warmup Train [30][1470/3239]	Time 0.199 (0.239)	Data 0.001 (0.016)	Loss 4.1241 (4.0995)	Top-1 acc 31.250 (29.166)	Top-5 acc 53.906 (52.413)	lr 0.00810
Warmup Train [30][1480/3239]	Time 0.254 (0.239)	Data 0.001 (0.016)	Loss 4.1604 (4.1000)	Top-1 acc 29.297 (29.156)	Top-5 acc 49.219 (52.400)	lr 0.00809
Warmup Train [30][1490/3239]	Time 0.274 (0.239)	Data 0.001 (0.016)	Loss 4.0436 (4.1002)	Top-1 acc 28.906 (29.152)	Top-5 acc 53.125 (52.396)	lr 0.00809
Warmup Train [30][1500/3239]	Time 0.172 (0.238)	Data 0.002 (0.016)	Loss 4.3478 (4.1004)	Top-1 acc 23.047 (29.152)	Top-5 acc 48.047 (52.386)	lr 0.00808
Warmup Train [30][1510/3239]	Time 0.243 (0.238)	Data 0.001 (0.016)	Loss 4.1946 (4.1005)	Top-1 acc 27.734 (29.151)	Top-5 acc 48.828 (52.385)	lr 0.00808
Warmup Train [30][1520/3239]	Time 0.234 (0.238)	Data 0.001 (0.016)	Loss 4.0256 (4.0996)	Top-1 acc 30.469 (29.168)	Top-5 acc 52.734 (52.403)	lr 0.00807
Warmup Train [30][1530/3239]	Time 0.258 (0.238)	Data 0.002 (0.016)	Loss 4.1055 (4.0996)	Top-1 acc 27.344 (29.174)	Top-5 acc 52.344 (52.408)	lr 0.00807
Warmup Train [30][1540/3239]	Time 0.140 (0.238)	Data 0.002 (0.015)	Loss 4.1184 (4.0995)	Top-1 acc 25.391 (29.176)	Top-5 acc 55.859 (52.409)	lr 0.00807
Warmup Train [30][1550/3239]	Time 0.279 (0.238)	Data 0.001 (0.015)	Loss 4.1415 (4.0995)	Top-1 acc 33.203 (29.181)	Top-5 acc 51.172 (52.411)	lr 0.00806
Warmup Train [30][1560/3239]	Time 0.187 (0.238)	Data 0.001 (0.015)	Loss 4.1588 (4.1001)	Top-1 acc 24.219 (29.168)	Top-5 acc 50.781 (52.404)	lr 0.00806
Warmup Train [30][1570/3239]	Time 0.149 (0.238)	Data 0.002 (0.015)	Loss 4.2056 (4.1002)	Top-1 acc 26.562 (29.153)	Top-5 acc 52.344 (52.399)	lr 0.00805
Warmup Train [30][1580/3239]	Time 0.230 (0.237)	Data 0.001 (0.015)	Loss 4.0738 (4.1004)	Top-1 acc 31.641 (29.152)	Top-5 acc 49.219 (52.396)	lr 0.00805
Warmup Train [30][1590/3239]	Time 0.192 (0.237)	Data 0.001 (0.015)	Loss 4.1100 (4.1000)	Top-1 acc 25.781 (29.163)	Top-5 acc 54.688 (52.399)	lr 0.00804
Warmup Train [30][1600/3239]	Time 0.247 (0.237)	Data 0.002 (0.015)	Loss 4.0150 (4.0996)	Top-1 acc 30.859 (29.168)	Top-5 acc 57.422 (52.407)	lr 0.00804
Warmup Train [30][1610/3239]	Time 0.198 (0.237)	Data 0.001 (0.015)	Loss 4.2599 (4.0999)	Top-1 acc 26.562 (29.159)	Top-5 acc 51.172 (52.399)	lr 0.00803
Warmup Train [30][1620/3239]	Time 0.284 (0.237)	Data 0.001 (0.015)	Loss 4.2298 (4.1002)	Top-1 acc 30.859 (29.157)	Top-5 acc 51.953 (52.390)	lr 0.00803
Warmup Train [30][1630/3239]	Time 0.163 (0.237)	Data 0.002 (0.015)	Loss 4.2914 (4.1005)	Top-1 acc 26.562 (29.149)	Top-5 acc 43.359 (52.378)	lr 0.00803
Warmup Train [30][1640/3239]	Time 0.195 (0.237)	Data 0.001 (0.015)	Loss 3.9063 (4.1003)	Top-1 acc 32.031 (29.151)	Top-5 acc 56.250 (52.380)	lr 0.00802
Warmup Train [30][1650/3239]	Time 0.190 (0.237)	Data 0.002 (0.015)	Loss 4.2250 (4.1001)	Top-1 acc 26.953 (29.152)	Top-5 acc 50.781 (52.387)	lr 0.00802
Warmup Train [30][1660/3239]	Time 0.186 (0.237)	Data 0.001 (0.015)	Loss 4.0332 (4.0999)	Top-1 acc 28.516 (29.155)	Top-5 acc 55.859 (52.391)	lr 0.00801
Warmup Train [30][1670/3239]	Time 0.250 (0.237)	Data 0.002 (0.015)	Loss 4.0543 (4.0997)	Top-1 acc 28.125 (29.165)	Top-5 acc 54.688 (52.397)	lr 0.00801
Warmup Train [30][1680/3239]	Time 0.143 (0.237)	Data 0.001 (0.014)	Loss 3.7973 (4.0995)	Top-1 acc 36.328 (29.167)	Top-5 acc 60.938 (52.400)	lr 0.00800
Warmup Train [30][1690/3239]	Time 0.180 (0.236)	Data 0.001 (0.014)	Loss 4.1520 (4.0995)	Top-1 acc 28.906 (29.169)	Top-5 acc 55.078 (52.406)	lr 0.00800
Warmup Train [30][1700/3239]	Time 0.229 (0.236)	Data 0.002 (0.014)	Loss 4.0831 (4.0998)	Top-1 acc 28.516 (29.168)	Top-5 acc 55.078 (52.404)	lr 0.00799
Warmup Train [30][1710/3239]	Time 0.308 (0.236)	Data 0.001 (0.014)	Loss 4.0482 (4.0998)	Top-1 acc 32.422 (29.168)	Top-5 acc 52.344 (52.398)	lr 0.00799
Warmup Train [30][1720/3239]	Time 0.203 (0.236)	Data 0.001 (0.014)	Loss 4.1935 (4.0995)	Top-1 acc 28.516 (29.171)	Top-5 acc 47.656 (52.407)	lr 0.00799
Warmup Train [30][1730/3239]	Time 0.180 (0.236)	Data 0.001 (0.014)	Loss 4.1123 (4.0993)	Top-1 acc 29.297 (29.168)	Top-5 acc 48.828 (52.410)	lr 0.00798
Warmup Train [30][1740/3239]	Time 0.214 (0.236)	Data 0.001 (0.014)	Loss 4.1335 (4.0993)	Top-1 acc 30.078 (29.166)	Top-5 acc 53.906 (52.408)	lr 0.00798
Warmup Train [30][1750/3239]	Time 0.234 (0.236)	Data 0.001 (0.014)	Loss 4.1347 (4.0997)	Top-1 acc 32.031 (29.161)	Top-5 acc 53.125 (52.402)	lr 0.00797
Warmup Train [30][1760/3239]	Time 0.186 (0.236)	Data 0.002 (0.014)	Loss 4.2255 (4.0998)	Top-1 acc 27.344 (29.161)	Top-5 acc 50.000 (52.403)	lr 0.00797
Warmup Train [30][1770/3239]	Time 0.222 (0.236)	Data 0.001 (0.014)	Loss 4.0578 (4.1001)	Top-1 acc 29.688 (29.156)	Top-5 acc 56.250 (52.397)	lr 0.00796
Warmup Train [30][1780/3239]	Time 0.151 (0.236)	Data 0.001 (0.014)	Loss 4.3313 (4.1004)	Top-1 acc 24.219 (29.153)	Top-5 acc 45.312 (52.387)	lr 0.00796
Warmup Train [30][1790/3239]	Time 0.313 (0.236)	Data 0.001 (0.014)	Loss 4.2588 (4.1006)	Top-1 acc 26.172 (29.150)	Top-5 acc 51.953 (52.384)	lr 0.00795
Warmup Train [30][1800/3239]	Time 0.236 (0.236)	Data 0.001 (0.014)	Loss 4.0675 (4.1005)	Top-1 acc 28.906 (29.149)	Top-5 acc 52.344 (52.389)	lr 0.00795
Warmup Train [30][1810/3239]	Time 0.173 (0.236)	Data 0.001 (0.014)	Loss 4.1438 (4.1002)	Top-1 acc 28.516 (29.149)	Top-5 acc 51.953 (52.391)	lr 0.00795
Warmup Train [30][1820/3239]	Time 0.141 (0.236)	Data 0.001 (0.014)	Loss 4.0600 (4.1000)	Top-1 acc 25.000 (29.155)	Top-5 acc 51.172 (52.399)	lr 0.00794
Warmup Train [30][1830/3239]	Time 0.208 (0.236)	Data 0.001 (0.014)	Loss 4.1542 (4.1000)	Top-1 acc 29.688 (29.157)	Top-5 acc 53.516 (52.399)	lr 0.00794
Warmup Train [30][1840/3239]	Time 0.174 (0.235)	Data 0.001 (0.013)	Loss 4.1033 (4.1002)	Top-1 acc 24.609 (29.151)	Top-5 acc 51.562 (52.393)	lr 0.00793
Warmup Train [30][1850/3239]	Time 0.215 (0.235)	Data 0.001 (0.013)	Loss 4.1214 (4.1000)	Top-1 acc 30.078 (29.159)	Top-5 acc 50.391 (52.396)	lr 0.00793
Warmup Train [30][1860/3239]	Time 0.210 (0.235)	Data 0.001 (0.013)	Loss 4.1809 (4.1001)	Top-1 acc 28.125 (29.159)	Top-5 acc 50.781 (52.394)	lr 0.00792
Warmup Train [30][1870/3239]	Time 0.229 (0.235)	Data 0.001 (0.013)	Loss 3.9371 (4.0999)	Top-1 acc 35.156 (29.167)	Top-5 acc 56.250 (52.398)	lr 0.00792
Warmup Train [30][1880/3239]	Time 0.220 (0.235)	Data 0.001 (0.013)	Loss 3.9971 (4.0999)	Top-1 acc 31.641 (29.168)	Top-5 acc 55.859 (52.398)	lr 0.00791
Warmup Train [30][1890/3239]	Time 0.210 (0.235)	Data 0.002 (0.013)	Loss 4.3306 (4.0998)	Top-1 acc 26.562 (29.169)	Top-5 acc 46.484 (52.402)	lr 0.00791
Warmup Train [30][1900/3239]	Time 0.313 (0.235)	Data 0.001 (0.013)	Loss 4.2363 (4.1001)	Top-1 acc 29.297 (29.162)	Top-5 acc 52.734 (52.396)	lr 0.00791
Warmup Train [30][1910/3239]	Time 0.244 (0.235)	Data 0.001 (0.013)	Loss 3.9728 (4.1001)	Top-1 acc 34.766 (29.171)	Top-5 acc 54.297 (52.396)	lr 0.00790
Warmup Train [30][1920/3239]	Time 0.223 (0.235)	Data 0.001 (0.013)	Loss 4.0458 (4.1002)	Top-1 acc 30.469 (29.167)	Top-5 acc 55.859 (52.395)	lr 0.00790
Warmup Train [30][1930/3239]	Time 0.215 (0.235)	Data 0.001 (0.013)	Loss 4.1019 (4.1004)	Top-1 acc 31.250 (29.160)	Top-5 acc 52.734 (52.393)	lr 0.00789
Warmup Train [30][1940/3239]	Time 0.217 (0.235)	Data 0.001 (0.013)	Loss 3.9575 (4.1007)	Top-1 acc 30.859 (29.152)	Top-5 acc 54.297 (52.383)	lr 0.00789
Warmup Train [30][1950/3239]	Time 0.322 (0.235)	Data 0.001 (0.013)	Loss 4.1295 (4.1008)	Top-1 acc 28.516 (29.152)	Top-5 acc 52.734 (52.384)	lr 0.00788
Warmup Train [30][1960/3239]	Time 0.209 (0.235)	Data 0.001 (0.013)	Loss 4.1469 (4.1007)	Top-1 acc 32.031 (29.157)	Top-5 acc 55.859 (52.387)	lr 0.00788
Warmup Train [30][1970/3239]	Time 0.249 (0.235)	Data 0.002 (0.013)	Loss 4.0534 (4.1003)	Top-1 acc 29.297 (29.161)	Top-5 acc 53.125 (52.398)	lr 0.00787
Warmup Train [30][1980/3239]	Time 0.191 (0.235)	Data 0.001 (0.013)	Loss 4.0566 (4.1004)	Top-1 acc 32.422 (29.160)	Top-5 acc 51.562 (52.391)	lr 0.00787
Warmup Train [30][1990/3239]	Time 0.167 (0.234)	Data 0.002 (0.013)	Loss 4.1781 (4.1008)	Top-1 acc 27.734 (29.152)	Top-5 acc 51.953 (52.381)	lr 0.00787
Warmup Train [30][2000/3239]	Time 0.251 (0.234)	Data 0.002 (0.013)	Loss 4.1533 (4.1009)	Top-1 acc 32.422 (29.152)	Top-5 acc 53.516 (52.383)	lr 0.00786
Warmup Train [30][2010/3239]	Time 0.166 (0.234)	Data 0.001 (0.013)	Loss 4.5132 (4.1009)	Top-1 acc 26.172 (29.155)	Top-5 acc 42.969 (52.380)	lr 0.00786
Warmup Train [30][2020/3239]	Time 0.279 (0.234)	Data 0.001 (0.013)	Loss 4.1655 (4.1010)	Top-1 acc 26.172 (29.154)	Top-5 acc 49.609 (52.374)	lr 0.00785
Warmup Train [30][2030/3239]	Time 0.254 (0.234)	Data 0.001 (0.013)	Loss 4.1229 (4.1009)	Top-1 acc 27.344 (29.154)	Top-5 acc 52.344 (52.381)	lr 0.00785
Warmup Train [30][2040/3239]	Time 0.244 (0.234)	Data 0.001 (0.012)	Loss 4.2231 (4.1010)	Top-1 acc 28.516 (29.151)	Top-5 acc 48.438 (52.372)	lr 0.00784
Warmup Train [30][2050/3239]	Time 0.216 (0.234)	Data 0.001 (0.012)	Loss 4.3046 (4.1010)	Top-1 acc 26.172 (29.154)	Top-5 acc 49.609 (52.369)	lr 0.00784
Warmup Train [30][2060/3239]	Time 0.169 (0.234)	Data 0.001 (0.012)	Loss 4.0787 (4.1010)	Top-1 acc 29.688 (29.158)	Top-5 acc 50.781 (52.370)	lr 0.00784
Warmup Train [30][2070/3239]	Time 0.216 (0.234)	Data 0.001 (0.012)	Loss 4.3055 (4.1011)	Top-1 acc 24.219 (29.155)	Top-5 acc 46.484 (52.365)	lr 0.00783
Warmup Train [30][2080/3239]	Time 0.180 (0.234)	Data 0.001 (0.012)	Loss 4.0791 (4.1012)	Top-1 acc 28.906 (29.150)	Top-5 acc 53.516 (52.363)	lr 0.00783
Warmup Train [30][2090/3239]	Time 0.217 (0.234)	Data 0.002 (0.012)	Loss 4.1677 (4.1012)	Top-1 acc 24.219 (29.141)	Top-5 acc 50.781 (52.362)	lr 0.00782
Warmup Train [30][2100/3239]	Time 0.226 (0.234)	Data 0.001 (0.012)	Loss 4.2175 (4.1010)	Top-1 acc 27.734 (29.144)	Top-5 acc 48.828 (52.367)	lr 0.00782
Warmup Train [30][2110/3239]	Time 0.230 (0.234)	Data 0.001 (0.012)	Loss 3.9990 (4.1009)	Top-1 acc 30.859 (29.143)	Top-5 acc 53.906 (52.365)	lr 0.00781
Warmup Train [30][2120/3239]	Time 0.221 (0.234)	Data 0.001 (0.012)	Loss 4.0816 (4.1013)	Top-1 acc 28.125 (29.138)	Top-5 acc 51.172 (52.356)	lr 0.00781
Warmup Train [30][2130/3239]	Time 0.288 (0.234)	Data 0.001 (0.012)	Loss 4.4052 (4.1018)	Top-1 acc 22.266 (29.126)	Top-5 acc 44.922 (52.345)	lr 0.00780
Warmup Train [30][2140/3239]	Time 0.366 (0.233)	Data 0.001 (0.012)	Loss 4.2361 (4.1020)	Top-1 acc 28.516 (29.122)	Top-5 acc 46.094 (52.338)	lr 0.00780
Warmup Train [30][2150/3239]	Time 0.162 (0.233)	Data 0.001 (0.012)	Loss 4.3755 (4.1022)	Top-1 acc 25.000 (29.121)	Top-5 acc 47.656 (52.333)	lr 0.00780
Warmup Train [30][2160/3239]	Time 0.261 (0.233)	Data 0.001 (0.012)	Loss 4.1346 (4.1022)	Top-1 acc 29.688 (29.119)	Top-5 acc 50.000 (52.336)	lr 0.00779
Warmup Train [30][2170/3239]	Time 0.187 (0.233)	Data 0.001 (0.012)	Loss 4.1083 (4.1020)	Top-1 acc 28.516 (29.117)	Top-5 acc 51.953 (52.339)	lr 0.00779
Warmup Train [30][2180/3239]	Time 0.177 (0.233)	Data 0.001 (0.012)	Loss 4.1237 (4.1022)	Top-1 acc 28.906 (29.114)	Top-5 acc 49.219 (52.333)	lr 0.00778
Warmup Train [30][2190/3239]	Time 0.254 (0.233)	Data 0.001 (0.012)	Loss 4.0114 (4.1023)	Top-1 acc 33.203 (29.110)	Top-5 acc 55.859 (52.329)	lr 0.00778
Warmup Train [30][2200/3239]	Time 0.271 (0.233)	Data 0.001 (0.012)	Loss 3.9341 (4.1023)	Top-1 acc 32.812 (29.109)	Top-5 acc 56.250 (52.328)	lr 0.00777
Warmup Train [30][2210/3239]	Time 0.237 (0.233)	Data 0.002 (0.012)	Loss 4.1676 (4.1022)	Top-1 acc 27.734 (29.109)	Top-5 acc 50.000 (52.329)	lr 0.00777
Warmup Train [30][2220/3239]	Time 0.173 (0.233)	Data 0.001 (0.012)	Loss 4.1753 (4.1025)	Top-1 acc 30.078 (29.107)	Top-5 acc 50.000 (52.326)	lr 0.00776
Warmup Train [30][2230/3239]	Time 0.213 (0.233)	Data 0.001 (0.012)	Loss 4.2763 (4.1024)	Top-1 acc 26.953 (29.113)	Top-5 acc 46.875 (52.329)	lr 0.00776
Warmup Train [30][2240/3239]	Time 0.400 (0.233)	Data 0.001 (0.012)	Loss 3.9843 (4.1023)	Top-1 acc 29.297 (29.116)	Top-5 acc 52.734 (52.328)	lr 0.00776
Warmup Train [30][2250/3239]	Time 0.190 (0.233)	Data 0.001 (0.012)	Loss 4.1154 (4.1023)	Top-1 acc 28.125 (29.112)	Top-5 acc 52.344 (52.327)	lr 0.00775
Warmup Train [30][2260/3239]	Time 0.178 (0.233)	Data 0.001 (0.012)	Loss 4.1784 (4.1028)	Top-1 acc 30.078 (29.103)	Top-5 acc 49.219 (52.319)	lr 0.00775
Warmup Train [30][2270/3239]	Time 0.254 (0.233)	Data 0.001 (0.012)	Loss 4.0728 (4.1028)	Top-1 acc 28.516 (29.109)	Top-5 acc 52.734 (52.322)	lr 0.00774
Warmup Train [30][2280/3239]	Time 0.199 (0.233)	Data 0.002 (0.011)	Loss 4.0847 (4.1028)	Top-1 acc 27.344 (29.108)	Top-5 acc 49.609 (52.319)	lr 0.00774
Warmup Train [30][2290/3239]	Time 0.187 (0.233)	Data 0.001 (0.011)	Loss 4.2583 (4.1031)	Top-1 acc 24.219 (29.106)	Top-5 acc 46.875 (52.311)	lr 0.00773
Warmup Train [30][2300/3239]	Time 0.146 (0.233)	Data 0.001 (0.011)	Loss 4.2121 (4.1031)	Top-1 acc 25.391 (29.104)	Top-5 acc 47.266 (52.307)	lr 0.00773
Warmup Train [30][2310/3239]	Time 0.222 (0.233)	Data 0.001 (0.011)	Loss 4.4360 (4.1034)	Top-1 acc 25.000 (29.104)	Top-5 acc 47.656 (52.301)	lr 0.00773
Warmup Train [30][2320/3239]	Time 0.189 (0.232)	Data 0.001 (0.011)	Loss 4.0051 (4.1033)	Top-1 acc 30.469 (29.110)	Top-5 acc 58.203 (52.307)	lr 0.00772
Warmup Train [30][2330/3239]	Time 0.261 (0.232)	Data 0.001 (0.011)	Loss 4.1228 (4.1035)	Top-1 acc 28.516 (29.110)	Top-5 acc 52.734 (52.301)	lr 0.00772
Warmup Train [30][2340/3239]	Time 0.202 (0.232)	Data 0.001 (0.011)	Loss 4.0058 (4.1035)	Top-1 acc 26.562 (29.108)	Top-5 acc 53.906 (52.299)	lr 0.00771
Warmup Train [30][2350/3239]	Time 0.192 (0.232)	Data 0.001 (0.011)	Loss 4.2349 (4.1035)	Top-1 acc 29.688 (29.109)	Top-5 acc 49.219 (52.302)	lr 0.00771
Warmup Train [30][2360/3239]	Time 0.246 (0.232)	Data 0.001 (0.011)	Loss 4.1651 (4.1034)	Top-1 acc 28.125 (29.111)	Top-5 acc 49.609 (52.304)	lr 0.00770
Warmup Train [30][2370/3239]	Time 0.176 (0.232)	Data 0.003 (0.011)	Loss 4.0097 (4.1036)	Top-1 acc 31.250 (29.105)	Top-5 acc 54.688 (52.300)	lr 0.00770
Warmup Train [30][2380/3239]	Time 0.192 (0.232)	Data 0.001 (0.011)	Loss 4.0539 (4.1035)	Top-1 acc 30.078 (29.106)	Top-5 acc 55.078 (52.299)	lr 0.00769
Warmup Train [30][2390/3239]	Time 0.223 (0.232)	Data 0.001 (0.011)	Loss 4.0768 (4.1036)	Top-1 acc 31.250 (29.106)	Top-5 acc 57.422 (52.300)	lr 0.00769
Warmup Train [30][2400/3239]	Time 0.272 (0.232)	Data 0.001 (0.011)	Loss 4.0848 (4.1035)	Top-1 acc 28.125 (29.101)	Top-5 acc 50.781 (52.303)	lr 0.00769
Warmup Train [30][2410/3239]	Time 0.156 (0.232)	Data 0.001 (0.011)	Loss 4.0786 (4.1036)	Top-1 acc 30.078 (29.101)	Top-5 acc 54.688 (52.306)	lr 0.00768
Warmup Train [30][2420/3239]	Time 0.191 (0.232)	Data 0.001 (0.011)	Loss 4.2750 (4.1039)	Top-1 acc 30.078 (29.096)	Top-5 acc 48.828 (52.299)	lr 0.00768
Warmup Train [30][2430/3239]	Time 0.224 (0.232)	Data 0.002 (0.011)	Loss 4.1937 (4.1039)	Top-1 acc 28.906 (29.096)	Top-5 acc 45.312 (52.298)	lr 0.00767
Warmup Train [30][2440/3239]	Time 0.180 (0.232)	Data 0.001 (0.011)	Loss 4.2343 (4.1040)	Top-1 acc 26.562 (29.093)	Top-5 acc 53.906 (52.300)	lr 0.00767
Warmup Train [30][2450/3239]	Time 0.189 (0.232)	Data 0.001 (0.011)	Loss 4.4282 (4.1039)	Top-1 acc 22.266 (29.095)	Top-5 acc 44.531 (52.300)	lr 0.00766
Warmup Train [30][2460/3239]	Time 0.285 (0.232)	Data 0.001 (0.011)	Loss 4.0607 (4.1039)	Top-1 acc 30.078 (29.095)	Top-5 acc 51.562 (52.302)	lr 0.00766
Warmup Train [30][2470/3239]	Time 0.178 (0.232)	Data 0.001 (0.011)	Loss 3.9981 (4.1038)	Top-1 acc 31.641 (29.099)	Top-5 acc 54.688 (52.305)	lr 0.00766
Warmup Train [30][2480/3239]	Time 0.204 (0.232)	Data 0.001 (0.011)	Loss 4.1034 (4.1039)	Top-1 acc 29.688 (29.100)	Top-5 acc 52.344 (52.302)	lr 0.00765
Warmup Train [30][2490/3239]	Time 0.208 (0.232)	Data 0.001 (0.011)	Loss 4.0468 (4.1037)	Top-1 acc 32.031 (29.105)	Top-5 acc 55.469 (52.308)	lr 0.00765
Warmup Train [30][2500/3239]	Time 0.270 (0.232)	Data 0.001 (0.011)	Loss 4.0116 (4.1034)	Top-1 acc 30.078 (29.113)	Top-5 acc 54.297 (52.317)	lr 0.00764
Warmup Train [30][2510/3239]	Time 0.129 (0.232)	Data 0.001 (0.011)	Loss 3.9251 (4.1032)	Top-1 acc 31.641 (29.114)	Top-5 acc 52.734 (52.321)	lr 0.00764
Warmup Train [30][2520/3239]	Time 0.222 (0.231)	Data 0.001 (0.011)	Loss 4.2427 (4.1032)	Top-1 acc 23.047 (29.113)	Top-5 acc 48.828 (52.318)	lr 0.00763
Warmup Train [30][2530/3239]	Time 0.215 (0.231)	Data 0.001 (0.011)	Loss 4.0618 (4.1032)	Top-1 acc 28.516 (29.109)	Top-5 acc 52.344 (52.316)	lr 0.00763
Warmup Train [30][2540/3239]	Time 0.213 (0.231)	Data 0.001 (0.011)	Loss 4.0361 (4.1033)	Top-1 acc 29.688 (29.101)	Top-5 acc 53.516 (52.313)	lr 0.00762
Warmup Train [30][2550/3239]	Time 0.252 (0.231)	Data 0.001 (0.011)	Loss 4.0579 (4.1032)	Top-1 acc 28.906 (29.100)	Top-5 acc 57.031 (52.314)	lr 0.00762
Warmup Train [30][2560/3239]	Time 0.337 (0.231)	Data 0.001 (0.011)	Loss 4.0692 (4.1030)	Top-1 acc 28.125 (29.105)	Top-5 acc 52.344 (52.318)	lr 0.00762
Warmup Train [30][2570/3239]	Time 0.203 (0.231)	Data 0.001 (0.011)	Loss 4.1769 (4.1031)	Top-1 acc 24.609 (29.103)	Top-5 acc 49.609 (52.313)	lr 0.00761
Warmup Train [30][2580/3239]	Time 0.220 (0.231)	Data 0.001 (0.011)	Loss 4.1935 (4.1030)	Top-1 acc 26.562 (29.107)	Top-5 acc 49.219 (52.317)	lr 0.00761
Warmup Train [30][2590/3239]	Time 0.212 (0.231)	Data 0.001 (0.010)	Loss 4.0247 (4.1032)	Top-1 acc 33.203 (29.106)	Top-5 acc 55.859 (52.311)	lr 0.00760
Warmup Train [30][2600/3239]	Time 0.257 (0.231)	Data 0.002 (0.010)	Loss 4.0710 (4.1031)	Top-1 acc 31.641 (29.107)	Top-5 acc 51.562 (52.312)	lr 0.00760
Warmup Train [30][2610/3239]	Time 0.209 (0.231)	Data 0.001 (0.010)	Loss 4.0283 (4.1031)	Top-1 acc 32.422 (29.108)	Top-5 acc 56.641 (52.314)	lr 0.00759
Warmup Train [30][2620/3239]	Time 0.257 (0.231)	Data 0.001 (0.010)	Loss 4.1727 (4.1030)	Top-1 acc 25.781 (29.107)	Top-5 acc 51.562 (52.318)	lr 0.00759
Warmup Train [30][2630/3239]	Time 0.262 (0.231)	Data 0.001 (0.010)	Loss 4.0052 (4.1032)	Top-1 acc 30.469 (29.103)	Top-5 acc 55.078 (52.311)	lr 0.00759
Warmup Train [30][2640/3239]	Time 0.191 (0.231)	Data 0.001 (0.010)	Loss 4.1486 (4.1031)	Top-1 acc 26.953 (29.099)	Top-5 acc 50.000 (52.312)	lr 0.00758
Warmup Train [30][2650/3239]	Time 0.200 (0.231)	Data 0.001 (0.010)	Loss 4.0639 (4.1030)	Top-1 acc 31.641 (29.102)	Top-5 acc 56.641 (52.319)	lr 0.00758
Warmup Train [30][2660/3239]	Time 0.264 (0.231)	Data 0.001 (0.010)	Loss 4.3010 (4.1030)	Top-1 acc 25.781 (29.098)	Top-5 acc 50.391 (52.316)	lr 0.00757
Warmup Train [30][2670/3239]	Time 0.188 (0.231)	Data 0.001 (0.010)	Loss 4.0814 (4.1030)	Top-1 acc 28.906 (29.099)	Top-5 acc 50.781 (52.316)	lr 0.00757
Warmup Train [30][2680/3239]	Time 0.240 (0.231)	Data 0.001 (0.010)	Loss 4.1467 (4.1028)	Top-1 acc 27.344 (29.105)	Top-5 acc 51.953 (52.319)	lr 0.00756
Warmup Train [30][2690/3239]	Time 0.242 (0.231)	Data 0.001 (0.010)	Loss 4.0709 (4.1027)	Top-1 acc 32.031 (29.104)	Top-5 acc 50.391 (52.321)	lr 0.00756
Warmup Train [30][2700/3239]	Time 0.251 (0.231)	Data 0.001 (0.010)	Loss 4.1745 (4.1027)	Top-1 acc 26.953 (29.100)	Top-5 acc 51.172 (52.318)	lr 0.00756
Warmup Train [30][2710/3239]	Time 0.175 (0.231)	Data 0.001 (0.010)	Loss 4.0328 (4.1028)	Top-1 acc 31.641 (29.098)	Top-5 acc 53.906 (52.315)	lr 0.00755
Warmup Train [30][2720/3239]	Time 0.238 (0.231)	Data 0.002 (0.010)	Loss 4.0273 (4.1028)	Top-1 acc 28.125 (29.103)	Top-5 acc 53.125 (52.318)	lr 0.00755
Warmup Train [30][2730/3239]	Time 0.239 (0.231)	Data 0.001 (0.010)	Loss 4.1185 (4.1030)	Top-1 acc 32.812 (29.097)	Top-5 acc 50.781 (52.317)	lr 0.00754
Warmup Train [30][2740/3239]	Time 0.159 (0.231)	Data 0.001 (0.010)	Loss 4.1494 (4.1031)	Top-1 acc 28.906 (29.093)	Top-5 acc 50.781 (52.314)	lr 0.00754
Warmup Train [30][2750/3239]	Time 0.189 (0.231)	Data 0.001 (0.010)	Loss 4.1617 (4.1031)	Top-1 acc 31.250 (29.095)	Top-5 acc 52.344 (52.315)	lr 0.00753
Warmup Train [30][2760/3239]	Time 0.188 (0.231)	Data 0.001 (0.010)	Loss 4.1175 (4.1030)	Top-1 acc 28.906 (29.096)	Top-5 acc 54.297 (52.317)	lr 0.00753
Warmup Train [30][2770/3239]	Time 0.283 (0.231)	Data 0.001 (0.010)	Loss 3.9081 (4.1028)	Top-1 acc 35.156 (29.100)	Top-5 acc 57.422 (52.320)	lr 0.00752
Warmup Train [30][2780/3239]	Time 0.434 (0.231)	Data 0.003 (0.010)	Loss 4.0776 (4.1028)	Top-1 acc 31.641 (29.100)	Top-5 acc 52.734 (52.321)	lr 0.00752
Warmup Train [30][2790/3239]	Time 0.182 (0.231)	Data 0.002 (0.010)	Loss 4.0887 (4.1029)	Top-1 acc 28.516 (29.099)	Top-5 acc 53.906 (52.322)	lr 0.00752
Warmup Train [30][2800/3239]	Time 0.212 (0.231)	Data 0.001 (0.010)	Loss 4.3763 (4.1029)	Top-1 acc 22.656 (29.093)	Top-5 acc 42.969 (52.316)	lr 0.00751
Warmup Train [30][2810/3239]	Time 0.233 (0.231)	Data 0.002 (0.010)	Loss 3.9704 (4.1027)	Top-1 acc 29.297 (29.101)	Top-5 acc 58.594 (52.326)	lr 0.00751
Warmup Train [30][2820/3239]	Time 0.288 (0.231)	Data 0.001 (0.010)	Loss 4.0078 (4.1027)	Top-1 acc 30.078 (29.101)	Top-5 acc 53.906 (52.325)	lr 0.00750
Warmup Train [30][2830/3239]	Time 0.226 (0.231)	Data 0.001 (0.010)	Loss 3.9602 (4.1027)	Top-1 acc 30.078 (29.100)	Top-5 acc 55.078 (52.325)	lr 0.00750
Warmup Train [30][2840/3239]	Time 0.219 (0.231)	Data 0.001 (0.010)	Loss 3.9924 (4.1028)	Top-1 acc 31.250 (29.098)	Top-5 acc 56.641 (52.321)	lr 0.00749
Warmup Train [30][2850/3239]	Time 0.219 (0.231)	Data 0.002 (0.010)	Loss 4.1355 (4.1027)	Top-1 acc 26.953 (29.102)	Top-5 acc 52.344 (52.326)	lr 0.00749
Warmup Train [30][2860/3239]	Time 0.147 (0.231)	Data 0.001 (0.010)	Loss 4.0097 (4.1026)	Top-1 acc 30.469 (29.103)	Top-5 acc 59.375 (52.327)	lr 0.00749
Warmup Train [30][2870/3239]	Time 0.389 (0.231)	Data 0.001 (0.010)	Loss 4.1384 (4.1028)	Top-1 acc 26.172 (29.099)	Top-5 acc 52.344 (52.325)	lr 0.00748
Warmup Train [30][2880/3239]	Time 0.286 (0.232)	Data 0.001 (0.010)	Loss 4.1696 (4.1027)	Top-1 acc 29.297 (29.101)	Top-5 acc 55.078 (52.329)	lr 0.00748
Warmup Train [30][2890/3239]	Time 0.167 (0.232)	Data 0.002 (0.010)	Loss 4.1161 (4.1028)	Top-1 acc 28.516 (29.103)	Top-5 acc 51.172 (52.329)	lr 0.00747
Warmup Train [30][2900/3239]	Time 0.231 (0.232)	Data 0.001 (0.010)	Loss 3.9772 (4.1027)	Top-1 acc 31.250 (29.102)	Top-5 acc 57.812 (52.332)	lr 0.00747
Warmup Train [30][2910/3239]	Time 0.298 (0.231)	Data 0.001 (0.010)	Loss 4.3247 (4.1030)	Top-1 acc 28.125 (29.100)	Top-5 acc 47.266 (52.326)	lr 0.00746
Warmup Train [30][2920/3239]	Time 0.165 (0.231)	Data 0.001 (0.010)	Loss 4.1164 (4.1030)	Top-1 acc 27.344 (29.096)	Top-5 acc 50.781 (52.322)	lr 0.00746
Warmup Train [30][2930/3239]	Time 0.238 (0.231)	Data 0.001 (0.010)	Loss 4.2138 (4.1028)	Top-1 acc 26.172 (29.103)	Top-5 acc 54.297 (52.327)	lr 0.00746
Warmup Train [30][2940/3239]	Time 0.205 (0.231)	Data 0.001 (0.010)	Loss 4.2289 (4.1028)	Top-1 acc 30.469 (29.105)	Top-5 acc 50.000 (52.327)	lr 0.00745
Warmup Train [30][2950/3239]	Time 0.197 (0.231)	Data 0.001 (0.010)	Loss 4.1143 (4.1028)	Top-1 acc 25.000 (29.104)	Top-5 acc 54.297 (52.329)	lr 0.00745
Warmup Train [30][2960/3239]	Time 0.208 (0.231)	Data 0.001 (0.010)	Loss 4.2012 (4.1029)	Top-1 acc 31.641 (29.103)	Top-5 acc 47.656 (52.326)	lr 0.00744
Warmup Train [30][2970/3239]	Time 0.172 (0.231)	Data 0.001 (0.010)	Loss 3.9516 (4.1027)	Top-1 acc 33.594 (29.108)	Top-5 acc 56.641 (52.329)	lr 0.00744
Warmup Train [30][2980/3239]	Time 0.347 (0.231)	Data 0.002 (0.010)	Loss 4.0368 (4.1028)	Top-1 acc 30.469 (29.107)	Top-5 acc 54.297 (52.328)	lr 0.00743
Warmup Train [30][2990/3239]	Time 0.262 (0.231)	Data 0.001 (0.009)	Loss 4.1202 (4.1027)	Top-1 acc 27.734 (29.107)	Top-5 acc 50.391 (52.329)	lr 0.00743
Warmup Train [30][3000/3239]	Time 0.249 (0.231)	Data 0.001 (0.009)	Loss 4.0463 (4.1029)	Top-1 acc 32.422 (29.105)	Top-5 acc 53.125 (52.325)	lr 0.00743
Warmup Train [30][3010/3239]	Time 0.217 (0.231)	Data 0.002 (0.009)	Loss 4.2554 (4.1029)	Top-1 acc 25.781 (29.104)	Top-5 acc 47.656 (52.322)	lr 0.00742
Warmup Train [30][3020/3239]	Time 0.233 (0.231)	Data 0.001 (0.009)	Loss 3.9445 (4.1027)	Top-1 acc 32.422 (29.105)	Top-5 acc 55.859 (52.324)	lr 0.00742
Warmup Train [30][3030/3239]	Time 0.163 (0.231)	Data 0.001 (0.009)	Loss 4.3259 (4.1029)	Top-1 acc 23.828 (29.100)	Top-5 acc 44.922 (52.319)	lr 0.00741
Warmup Train [30][3040/3239]	Time 0.261 (0.231)	Data 0.001 (0.009)	Loss 4.0224 (4.1029)	Top-1 acc 32.422 (29.106)	Top-5 acc 52.344 (52.318)	lr 0.00741
Warmup Train [30][3050/3239]	Time 0.183 (0.231)	Data 0.001 (0.009)	Loss 4.1985 (4.1028)	Top-1 acc 26.562 (29.113)	Top-5 acc 49.219 (52.321)	lr 0.00740
Warmup Train [30][3060/3239]	Time 0.173 (0.231)	Data 0.001 (0.009)	Loss 4.0561 (4.1030)	Top-1 acc 27.734 (29.106)	Top-5 acc 53.906 (52.312)	lr 0.00740
Warmup Train [30][3070/3239]	Time 0.215 (0.231)	Data 0.002 (0.009)	Loss 4.0512 (4.1030)	Top-1 acc 29.688 (29.106)	Top-5 acc 53.516 (52.315)	lr 0.00740
Warmup Train [30][3080/3239]	Time 0.149 (0.231)	Data 0.001 (0.009)	Loss 4.0952 (4.1028)	Top-1 acc 28.516 (29.109)	Top-5 acc 51.953 (52.316)	lr 0.00739
Warmup Train [30][3090/3239]	Time 0.220 (0.231)	Data 0.001 (0.009)	Loss 4.0517 (4.1029)	Top-1 acc 32.422 (29.111)	Top-5 acc 55.469 (52.319)	lr 0.00739
Warmup Train [30][3100/3239]	Time 0.285 (0.231)	Data 0.001 (0.009)	Loss 4.0268 (4.1028)	Top-1 acc 28.906 (29.118)	Top-5 acc 51.562 (52.323)	lr 0.00738
Warmup Train [30][3110/3239]	Time 0.211 (0.231)	Data 0.002 (0.009)	Loss 3.9422 (4.1026)	Top-1 acc 30.469 (29.121)	Top-5 acc 58.594 (52.329)	lr 0.00738
Warmup Train [30][3120/3239]	Time 0.191 (0.231)	Data 0.001 (0.009)	Loss 3.9269 (4.1026)	Top-1 acc 30.859 (29.122)	Top-5 acc 57.031 (52.331)	lr 0.00737
Warmup Train [30][3130/3239]	Time 0.321 (0.231)	Data 0.002 (0.009)	Loss 4.0542 (4.1025)	Top-1 acc 32.422 (29.123)	Top-5 acc 53.906 (52.331)	lr 0.00737
Warmup Train [30][3140/3239]	Time 0.263 (0.231)	Data 0.001 (0.009)	Loss 4.1382 (4.1026)	Top-1 acc 26.953 (29.119)	Top-5 acc 49.609 (52.328)	lr 0.00737
Warmup Train [30][3150/3239]	Time 0.201 (0.231)	Data 0.001 (0.009)	Loss 4.0472 (4.1025)	Top-1 acc 28.906 (29.120)	Top-5 acc 52.734 (52.332)	lr 0.00736
Warmup Train [30][3160/3239]	Time 0.248 (0.231)	Data 0.003 (0.009)	Loss 4.2926 (4.1024)	Top-1 acc 25.781 (29.121)	Top-5 acc 48.828 (52.338)	lr 0.00736
Warmup Train [30][3170/3239]	Time 0.241 (0.231)	Data 0.001 (0.009)	Loss 4.0067 (4.1025)	Top-1 acc 29.297 (29.121)	Top-5 acc 53.516 (52.336)	lr 0.00735
Warmup Train [30][3180/3239]	Time 0.252 (0.231)	Data 0.000 (0.009)	Loss 4.1472 (4.1025)	Top-1 acc 31.641 (29.122)	Top-5 acc 51.953 (52.332)	lr 0.00735
Warmup Train [30][3190/3239]	Time 0.171 (0.231)	Data 0.000 (0.009)	Loss 4.0092 (4.1025)	Top-1 acc 28.125 (29.121)	Top-5 acc 54.688 (52.333)	lr 0.00734
Warmup Train [30][3200/3239]	Time 0.249 (0.231)	Data 0.000 (0.009)	Loss 4.0853 (4.1025)	Top-1 acc 29.688 (29.119)	Top-5 acc 53.516 (52.333)	lr 0.00734
Warmup Train [30][3210/3239]	Time 0.251 (0.231)	Data 0.000 (0.009)	Loss 4.0506 (4.1026)	Top-1 acc 32.812 (29.123)	Top-5 acc 52.734 (52.331)	lr 0.00734
Warmup Train [30][3220/3239]	Time 0.156 (0.231)	Data 0.000 (0.009)	Loss 4.1241 (4.1026)	Top-1 acc 24.219 (29.118)	Top-5 acc 51.172 (52.329)	lr 0.00733
Warmup Train [30][3230/3239]	Time 0.191 (0.230)	Data 0.000 (0.009)	Loss 4.1162 (4.1026)	Top-1 acc 28.125 (29.118)	Top-5 acc 51.562 (52.331)	lr 0.00733
Warmup Train [30][3239/3239]	Time 0.154 (0.230)	Data 0.000 (0.009)	Loss 3.7547 (4.1025)	Top-1 acc 38.272 (29.121)	Top-5 acc 62.963 (52.332)	lr 0.00732
==========Warmup Valid [30/40]	loss 3.046	top-1 acc 36.665	top-5 acc 61.496	Train top-1 29.121	top-5 52.332	flops: 442.4M
Warmup Train [31][0/3239]	Time 17.842 (17.842)	Data 16.395 (16.395)	Loss 3.9750 (3.9750)	Top-1 acc 33.984 (33.984)	Top-5 acc 54.297 (54.297)	lr 0.00732
Warmup Train [31][10/3239]	Time 0.458 (1.973)	Data 0.003 (1.493)	Loss 4.1178 (4.0687)	Top-1 acc 29.297 (30.078)	Top-5 acc 50.781 (52.770)	lr 0.00732
Warmup Train [31][20/3239]	Time 0.222 (1.162)	Data 0.002 (0.783)	Loss 4.3369 (4.0781)	Top-1 acc 24.219 (29.557)	Top-5 acc 49.219 (52.623)	lr 0.00731
Warmup Train [31][30/3239]	Time 0.250 (0.857)	Data 0.001 (0.531)	Loss 4.1867 (4.0577)	Top-1 acc 28.125 (30.028)	Top-5 acc 51.562 (53.201)	lr 0.00731
Warmup Train [31][40/3239]	Time 0.195 (0.697)	Data 0.001 (0.402)	Loss 4.0041 (4.0610)	Top-1 acc 29.297 (30.107)	Top-5 acc 54.688 (53.163)	lr 0.00731
Warmup Train [31][50/3239]	Time 0.191 (0.605)	Data 0.001 (0.324)	Loss 4.0466 (4.0724)	Top-1 acc 32.422 (30.025)	Top-5 acc 57.422 (52.895)	lr 0.00730
Warmup Train [31][60/3239]	Time 0.173 (0.541)	Data 0.001 (0.272)	Loss 4.1435 (4.0775)	Top-1 acc 25.781 (29.848)	Top-5 acc 50.000 (52.914)	lr 0.00730
Warmup Train [31][70/3239]	Time 0.219 (0.501)	Data 0.001 (0.234)	Loss 4.2190 (4.0821)	Top-1 acc 30.078 (29.605)	Top-5 acc 50.391 (52.723)	lr 0.00729
Warmup Train [31][80/3239]	Time 0.203 (0.467)	Data 0.001 (0.206)	Loss 4.1994 (4.0799)	Top-1 acc 26.953 (29.620)	Top-5 acc 48.438 (52.744)	lr 0.00729
Warmup Train [31][90/3239]	Time 0.331 (0.444)	Data 0.003 (0.183)	Loss 4.0729 (4.0700)	Top-1 acc 32.031 (29.739)	Top-5 acc 53.906 (52.940)	lr 0.00728
Warmup Train [31][100/3239]	Time 0.225 (0.422)	Data 0.001 (0.166)	Loss 3.9721 (4.0696)	Top-1 acc 27.734 (29.676)	Top-5 acc 55.469 (52.893)	lr 0.00728
Warmup Train [31][110/3239]	Time 0.266 (0.405)	Data 0.001 (0.151)	Loss 3.9086 (4.0724)	Top-1 acc 31.641 (29.684)	Top-5 acc 55.469 (52.886)	lr 0.00728
Warmup Train [31][120/3239]	Time 0.360 (0.390)	Data 0.001 (0.139)	Loss 3.9867 (4.0691)	Top-1 acc 29.688 (29.742)	Top-5 acc 58.594 (52.964)	lr 0.00727
Warmup Train [31][130/3239]	Time 0.228 (0.378)	Data 0.001 (0.128)	Loss 3.7752 (4.0660)	Top-1 acc 33.984 (29.786)	Top-5 acc 59.375 (53.000)	lr 0.00727
Warmup Train [31][140/3239]	Time 0.249 (0.368)	Data 0.001 (0.119)	Loss 4.2377 (4.0710)	Top-1 acc 26.172 (29.685)	Top-5 acc 48.828 (52.837)	lr 0.00726
Warmup Train [31][150/3239]	Time 0.179 (0.358)	Data 0.001 (0.112)	Loss 3.8555 (4.0703)	Top-1 acc 30.469 (29.662)	Top-5 acc 55.469 (52.853)	lr 0.00726
Warmup Train [31][160/3239]	Time 0.245 (0.350)	Data 0.002 (0.105)	Loss 4.1184 (4.0754)	Top-1 acc 26.953 (29.573)	Top-5 acc 53.906 (52.814)	lr 0.00725
Warmup Train [31][170/3239]	Time 0.265 (0.343)	Data 0.001 (0.099)	Loss 4.0806 (4.0792)	Top-1 acc 30.859 (29.475)	Top-5 acc 50.000 (52.732)	lr 0.00725
Warmup Train [31][180/3239]	Time 0.158 (0.336)	Data 0.002 (0.094)	Loss 4.3713 (4.0816)	Top-1 acc 25.000 (29.424)	Top-5 acc 45.703 (52.674)	lr 0.00725
Warmup Train [31][190/3239]	Time 0.188 (0.330)	Data 0.001 (0.089)	Loss 3.9179 (4.0813)	Top-1 acc 31.641 (29.475)	Top-5 acc 55.859 (52.636)	lr 0.00724
Warmup Train [31][200/3239]	Time 0.132 (0.324)	Data 0.002 (0.085)	Loss 3.9742 (4.0811)	Top-1 acc 32.812 (29.487)	Top-5 acc 53.516 (52.703)	lr 0.00724
Warmup Train [31][210/3239]	Time 0.245 (0.319)	Data 0.001 (0.081)	Loss 4.3025 (4.0837)	Top-1 acc 25.391 (29.443)	Top-5 acc 47.656 (52.660)	lr 0.00723
Warmup Train [31][220/3239]	Time 0.354 (0.316)	Data 0.001 (0.077)	Loss 4.1888 (4.0839)	Top-1 acc 28.906 (29.454)	Top-5 acc 52.734 (52.667)	lr 0.00723
Warmup Train [31][230/3239]	Time 0.269 (0.312)	Data 0.001 (0.074)	Loss 4.1398 (4.0841)	Top-1 acc 24.609 (29.464)	Top-5 acc 50.000 (52.665)	lr 0.00722
Warmup Train [31][240/3239]	Time 0.215 (0.308)	Data 0.001 (0.071)	Loss 3.9572 (4.0826)	Top-1 acc 31.641 (29.457)	Top-5 acc 56.641 (52.708)	lr 0.00722
Warmup Train [31][250/3239]	Time 0.232 (0.305)	Data 0.001 (0.068)	Loss 4.0895 (4.0819)	Top-1 acc 32.422 (29.498)	Top-5 acc 52.344 (52.713)	lr 0.00722
Warmup Train [31][260/3239]	Time 0.225 (0.302)	Data 0.001 (0.066)	Loss 3.9958 (4.0837)	Top-1 acc 29.297 (29.453)	Top-5 acc 53.516 (52.685)	lr 0.00721
Warmup Train [31][270/3239]	Time 0.249 (0.299)	Data 0.001 (0.063)	Loss 4.0770 (4.0838)	Top-1 acc 29.297 (29.428)	Top-5 acc 51.172 (52.645)	lr 0.00721
Warmup Train [31][280/3239]	Time 0.225 (0.296)	Data 0.001 (0.061)	Loss 4.0122 (4.0863)	Top-1 acc 32.422 (29.396)	Top-5 acc 55.859 (52.634)	lr 0.00720
Warmup Train [31][290/3239]	Time 0.216 (0.294)	Data 0.001 (0.059)	Loss 4.0578 (4.0882)	Top-1 acc 30.859 (29.353)	Top-5 acc 55.469 (52.601)	lr 0.00720
Warmup Train [31][300/3239]	Time 0.177 (0.291)	Data 0.001 (0.057)	Loss 4.1314 (4.0855)	Top-1 acc 30.469 (29.392)	Top-5 acc 48.828 (52.657)	lr 0.00719
Warmup Train [31][310/3239]	Time 0.241 (0.289)	Data 0.002 (0.056)	Loss 4.2590 (4.0846)	Top-1 acc 24.219 (29.395)	Top-5 acc 48.828 (52.682)	lr 0.00719
Warmup Train [31][320/3239]	Time 0.298 (0.287)	Data 0.001 (0.054)	Loss 4.2260 (4.0836)	Top-1 acc 25.391 (29.384)	Top-5 acc 49.219 (52.695)	lr 0.00719
Warmup Train [31][330/3239]	Time 0.234 (0.285)	Data 0.001 (0.052)	Loss 3.8510 (4.0837)	Top-1 acc 34.375 (29.381)	Top-5 acc 57.031 (52.647)	lr 0.00718
Warmup Train [31][340/3239]	Time 0.214 (0.283)	Data 0.001 (0.051)	Loss 4.1563 (4.0821)	Top-1 acc 26.562 (29.382)	Top-5 acc 50.781 (52.660)	lr 0.00718
Warmup Train [31][350/3239]	Time 0.191 (0.281)	Data 0.001 (0.049)	Loss 4.0867 (4.0830)	Top-1 acc 28.516 (29.326)	Top-5 acc 51.953 (52.623)	lr 0.00717
Warmup Train [31][360/3239]	Time 0.194 (0.280)	Data 0.001 (0.048)	Loss 4.2480 (4.0860)	Top-1 acc 26.953 (29.293)	Top-5 acc 50.391 (52.570)	lr 0.00717
Warmup Train [31][370/3239]	Time 0.178 (0.278)	Data 0.002 (0.047)	Loss 4.2669 (4.0864)	Top-1 acc 27.734 (29.324)	Top-5 acc 48.828 (52.546)	lr 0.00716
Warmup Train [31][380/3239]	Time 0.220 (0.276)	Data 0.002 (0.046)	Loss 4.3034 (4.0878)	Top-1 acc 24.609 (29.288)	Top-5 acc 47.656 (52.539)	lr 0.00716
Warmup Train [31][390/3239]	Time 0.178 (0.275)	Data 0.001 (0.045)	Loss 3.8893 (4.0873)	Top-1 acc 32.031 (29.318)	Top-5 acc 57.812 (52.547)	lr 0.00716
Warmup Train [31][400/3239]	Time 0.247 (0.274)	Data 0.001 (0.044)	Loss 4.0599 (4.0871)	Top-1 acc 29.297 (29.313)	Top-5 acc 53.516 (52.566)	lr 0.00715
Warmup Train [31][410/3239]	Time 0.225 (0.273)	Data 0.001 (0.043)	Loss 4.0965 (4.0873)	Top-1 acc 30.078 (29.323)	Top-5 acc 52.344 (52.539)	lr 0.00715
Warmup Train [31][420/3239]	Time 0.239 (0.272)	Data 0.001 (0.042)	Loss 4.1001 (4.0882)	Top-1 acc 32.422 (29.324)	Top-5 acc 53.125 (52.557)	lr 0.00714
Warmup Train [31][430/3239]	Time 0.363 (0.271)	Data 0.001 (0.041)	Loss 3.9978 (4.0874)	Top-1 acc 29.297 (29.313)	Top-5 acc 51.953 (52.588)	lr 0.00714
Warmup Train [31][440/3239]	Time 0.207 (0.270)	Data 0.001 (0.040)	Loss 4.2054 (4.0874)	Top-1 acc 26.953 (29.322)	Top-5 acc 49.219 (52.593)	lr 0.00713
Warmup Train [31][450/3239]	Time 0.126 (0.269)	Data 0.001 (0.039)	Loss 4.2689 (4.0877)	Top-1 acc 26.953 (29.312)	Top-5 acc 47.266 (52.586)	lr 0.00713
Warmup Train [31][460/3239]	Time 0.206 (0.268)	Data 0.002 (0.038)	Loss 4.2232 (4.0874)	Top-1 acc 30.469 (29.339)	Top-5 acc 46.094 (52.600)	lr 0.00713
Warmup Train [31][470/3239]	Time 0.137 (0.267)	Data 0.001 (0.038)	Loss 4.2723 (4.0880)	Top-1 acc 26.953 (29.311)	Top-5 acc 48.438 (52.583)	lr 0.00712
Warmup Train [31][480/3239]	Time 0.216 (0.266)	Data 0.001 (0.037)	Loss 3.9303 (4.0889)	Top-1 acc 31.250 (29.286)	Top-5 acc 52.734 (52.583)	lr 0.00712
Warmup Train [31][490/3239]	Time 0.188 (0.265)	Data 0.001 (0.036)	Loss 3.9471 (4.0882)	Top-1 acc 33.984 (29.305)	Top-5 acc 55.078 (52.594)	lr 0.00711
Warmup Train [31][500/3239]	Time 0.145 (0.264)	Data 0.001 (0.036)	Loss 4.1568 (4.0891)	Top-1 acc 28.125 (29.299)	Top-5 acc 51.953 (52.587)	lr 0.00711
Warmup Train [31][510/3239]	Time 0.237 (0.263)	Data 0.044 (0.035)	Loss 4.0870 (4.0886)	Top-1 acc 30.859 (29.292)	Top-5 acc 51.562 (52.589)	lr 0.00711
Warmup Train [31][520/3239]	Time 0.215 (0.262)	Data 0.001 (0.035)	Loss 4.0241 (4.0884)	Top-1 acc 30.078 (29.289)	Top-5 acc 54.297 (52.596)	lr 0.00710
Warmup Train [31][530/3239]	Time 0.376 (0.262)	Data 0.001 (0.034)	Loss 4.1238 (4.0885)	Top-1 acc 31.250 (29.312)	Top-5 acc 50.000 (52.606)	lr 0.00710
Warmup Train [31][540/3239]	Time 0.264 (0.261)	Data 0.001 (0.034)	Loss 4.3155 (4.0890)	Top-1 acc 25.391 (29.308)	Top-5 acc 49.609 (52.602)	lr 0.00709
Warmup Train [31][550/3239]	Time 0.225 (0.260)	Data 0.001 (0.033)	Loss 4.1783 (4.0895)	Top-1 acc 28.906 (29.315)	Top-5 acc 50.781 (52.596)	lr 0.00709
Warmup Train [31][560/3239]	Time 0.151 (0.259)	Data 0.001 (0.032)	Loss 3.8912 (4.0888)	Top-1 acc 34.766 (29.300)	Top-5 acc 57.812 (52.606)	lr 0.00708
Warmup Train [31][570/3239]	Time 0.178 (0.259)	Data 0.001 (0.032)	Loss 4.1224 (4.0884)	Top-1 acc 28.906 (29.309)	Top-5 acc 50.781 (52.615)	lr 0.00708
Warmup Train [31][580/3239]	Time 0.238 (0.258)	Data 0.001 (0.031)	Loss 4.1255 (4.0883)	Top-1 acc 27.734 (29.321)	Top-5 acc 50.781 (52.608)	lr 0.00708
Warmup Train [31][590/3239]	Time 0.241 (0.257)	Data 0.001 (0.031)	Loss 4.0748 (4.0885)	Top-1 acc 29.688 (29.321)	Top-5 acc 54.688 (52.600)	lr 0.00707
Warmup Train [31][600/3239]	Time 0.257 (0.256)	Data 0.001 (0.030)	Loss 4.1812 (4.0879)	Top-1 acc 30.469 (29.331)	Top-5 acc 51.172 (52.625)	lr 0.00707
Warmup Train [31][610/3239]	Time 0.200 (0.256)	Data 0.001 (0.030)	Loss 3.9941 (4.0879)	Top-1 acc 30.078 (29.305)	Top-5 acc 53.516 (52.624)	lr 0.00706
Warmup Train [31][620/3239]	Time 0.142 (0.255)	Data 0.001 (0.030)	Loss 4.1074 (4.0883)	Top-1 acc 31.250 (29.284)	Top-5 acc 53.125 (52.626)	lr 0.00706
Warmup Train [31][630/3239]	Time 0.202 (0.255)	Data 0.001 (0.029)	Loss 3.8935 (4.0883)	Top-1 acc 33.984 (29.281)	Top-5 acc 56.250 (52.621)	lr 0.00705
Warmup Train [31][640/3239]	Time 0.227 (0.254)	Data 0.001 (0.029)	Loss 3.9142 (4.0877)	Top-1 acc 29.297 (29.285)	Top-5 acc 55.078 (52.640)	lr 0.00705
Warmup Train [31][650/3239]	Time 0.201 (0.254)	Data 0.001 (0.028)	Loss 4.1111 (4.0874)	Top-1 acc 29.688 (29.296)	Top-5 acc 50.391 (52.634)	lr 0.00705
Warmup Train [31][660/3239]	Time 0.229 (0.254)	Data 0.001 (0.028)	Loss 4.1956 (4.0871)	Top-1 acc 27.344 (29.305)	Top-5 acc 47.266 (52.644)	lr 0.00704
Warmup Train [31][670/3239]	Time 0.211 (0.253)	Data 0.001 (0.028)	Loss 4.2123 (4.0872)	Top-1 acc 26.172 (29.317)	Top-5 acc 47.656 (52.637)	lr 0.00704
Warmup Train [31][680/3239]	Time 0.198 (0.253)	Data 0.001 (0.027)	Loss 3.8804 (4.0859)	Top-1 acc 33.984 (29.331)	Top-5 acc 58.984 (52.671)	lr 0.00703
Warmup Train [31][690/3239]	Time 0.226 (0.252)	Data 0.001 (0.027)	Loss 4.1835 (4.0868)	Top-1 acc 32.422 (29.320)	Top-5 acc 52.344 (52.655)	lr 0.00703
Warmup Train [31][700/3239]	Time 0.198 (0.252)	Data 0.001 (0.026)	Loss 3.9998 (4.0862)	Top-1 acc 26.953 (29.327)	Top-5 acc 53.516 (52.671)	lr 0.00702
Warmup Train [31][710/3239]	Time 0.204 (0.252)	Data 0.001 (0.026)	Loss 4.2340 (4.0858)	Top-1 acc 27.734 (29.331)	Top-5 acc 48.047 (52.671)	lr 0.00702
Warmup Train [31][720/3239]	Time 0.175 (0.251)	Data 0.001 (0.026)	Loss 3.8257 (4.0853)	Top-1 acc 35.547 (29.339)	Top-5 acc 59.375 (52.680)	lr 0.00702
Warmup Train [31][730/3239]	Time 0.293 (0.251)	Data 0.001 (0.026)	Loss 4.0260 (4.0849)	Top-1 acc 32.812 (29.351)	Top-5 acc 57.422 (52.684)	lr 0.00701
Warmup Train [31][740/3239]	Time 0.229 (0.251)	Data 0.002 (0.025)	Loss 4.1329 (4.0852)	Top-1 acc 29.688 (29.352)	Top-5 acc 48.047 (52.675)	lr 0.00701
Warmup Train [31][750/3239]	Time 0.236 (0.251)	Data 0.001 (0.025)	Loss 4.1861 (4.0847)	Top-1 acc 28.125 (29.375)	Top-5 acc 52.734 (52.691)	lr 0.00700
Warmup Train [31][760/3239]	Time 0.145 (0.250)	Data 0.001 (0.025)	Loss 4.0046 (4.0849)	Top-1 acc 33.203 (29.384)	Top-5 acc 57.031 (52.698)	lr 0.00700
Warmup Train [31][770/3239]	Time 0.169 (0.250)	Data 0.002 (0.024)	Loss 3.9610 (4.0845)	Top-1 acc 30.078 (29.375)	Top-5 acc 55.469 (52.696)	lr 0.00700
Warmup Train [31][780/3239]	Time 0.215 (0.250)	Data 0.001 (0.024)	Loss 4.0943 (4.0847)	Top-1 acc 29.297 (29.367)	Top-5 acc 49.219 (52.685)	lr 0.00699
Warmup Train [31][790/3239]	Time 0.197 (0.249)	Data 0.001 (0.024)	Loss 4.1510 (4.0846)	Top-1 acc 26.953 (29.373)	Top-5 acc 52.344 (52.696)	lr 0.00699
Warmup Train [31][800/3239]	Time 0.268 (0.249)	Data 0.001 (0.024)	Loss 3.9068 (4.0842)	Top-1 acc 34.766 (29.386)	Top-5 acc 58.203 (52.716)	lr 0.00698
Warmup Train [31][810/3239]	Time 0.321 (0.249)	Data 0.002 (0.023)	Loss 4.1606 (4.0852)	Top-1 acc 28.906 (29.363)	Top-5 acc 53.125 (52.705)	lr 0.00698
Warmup Train [31][820/3239]	Time 0.304 (0.249)	Data 0.001 (0.023)	Loss 4.0819 (4.0852)	Top-1 acc 26.562 (29.355)	Top-5 acc 53.516 (52.706)	lr 0.00697
Warmup Train [31][830/3239]	Time 0.256 (0.248)	Data 0.001 (0.023)	Loss 4.1158 (4.0851)	Top-1 acc 28.125 (29.368)	Top-5 acc 53.125 (52.711)	lr 0.00697
Warmup Train [31][840/3239]	Time 0.235 (0.248)	Data 0.001 (0.023)	Loss 4.0509 (4.0848)	Top-1 acc 33.594 (29.388)	Top-5 acc 53.906 (52.710)	lr 0.00697
Warmup Train [31][850/3239]	Time 0.225 (0.248)	Data 0.001 (0.022)	Loss 4.0876 (4.0842)	Top-1 acc 29.688 (29.397)	Top-5 acc 51.953 (52.729)	lr 0.00696
Warmup Train [31][860/3239]	Time 0.157 (0.247)	Data 0.001 (0.022)	Loss 4.2000 (4.0844)	Top-1 acc 25.000 (29.393)	Top-5 acc 50.781 (52.726)	lr 0.00696
Warmup Train [31][870/3239]	Time 0.300 (0.247)	Data 0.001 (0.022)	Loss 4.0761 (4.0847)	Top-1 acc 26.953 (29.391)	Top-5 acc 50.391 (52.713)	lr 0.00695
Warmup Train [31][880/3239]	Time 0.199 (0.247)	Data 0.002 (0.022)	Loss 4.0597 (4.0848)	Top-1 acc 26.172 (29.386)	Top-5 acc 52.344 (52.712)	lr 0.00695
Warmup Train [31][890/3239]	Time 0.199 (0.247)	Data 0.001 (0.022)	Loss 3.9670 (4.0844)	Top-1 acc 31.250 (29.382)	Top-5 acc 57.812 (52.724)	lr 0.00695
Warmup Train [31][900/3239]	Time 0.236 (0.246)	Data 0.001 (0.021)	Loss 4.0550 (4.0844)	Top-1 acc 28.906 (29.377)	Top-5 acc 49.609 (52.726)	lr 0.00694
Warmup Train [31][910/3239]	Time 0.309 (0.246)	Data 0.001 (0.021)	Loss 3.8207 (4.0841)	Top-1 acc 34.375 (29.382)	Top-5 acc 57.812 (52.724)	lr 0.00694
Warmup Train [31][920/3239]	Time 0.377 (0.246)	Data 0.001 (0.021)	Loss 4.1414 (4.0842)	Top-1 acc 26.172 (29.389)	Top-5 acc 51.562 (52.720)	lr 0.00693
Warmup Train [31][930/3239]	Time 0.224 (0.246)	Data 0.001 (0.021)	Loss 4.2595 (4.0851)	Top-1 acc 24.609 (29.364)	Top-5 acc 51.172 (52.701)	lr 0.00693
Warmup Train [31][940/3239]	Time 0.204 (0.246)	Data 0.001 (0.021)	Loss 4.1692 (4.0852)	Top-1 acc 25.000 (29.357)	Top-5 acc 50.781 (52.693)	lr 0.00692
Warmup Train [31][950/3239]	Time 0.238 (0.245)	Data 0.001 (0.020)	Loss 3.9205 (4.0856)	Top-1 acc 35.156 (29.358)	Top-5 acc 56.641 (52.685)	lr 0.00692
Warmup Train [31][960/3239]	Time 0.244 (0.245)	Data 0.001 (0.020)	Loss 4.0574 (4.0858)	Top-1 acc 30.469 (29.350)	Top-5 acc 55.469 (52.685)	lr 0.00692
Warmup Train [31][970/3239]	Time 0.245 (0.245)	Data 0.001 (0.020)	Loss 4.1400 (4.0866)	Top-1 acc 23.438 (29.321)	Top-5 acc 48.047 (52.662)	lr 0.00691
Warmup Train [31][980/3239]	Time 0.198 (0.245)	Data 0.001 (0.020)	Loss 4.0688 (4.0864)	Top-1 acc 29.688 (29.322)	Top-5 acc 54.297 (52.667)	lr 0.00691
Warmup Train [31][990/3239]	Time 0.207 (0.244)	Data 0.001 (0.020)	Loss 3.9864 (4.0866)	Top-1 acc 35.547 (29.331)	Top-5 acc 53.906 (52.661)	lr 0.00690
Warmup Train [31][1000/3239]	Time 0.252 (0.244)	Data 0.001 (0.020)	Loss 3.8455 (4.0867)	Top-1 acc 32.031 (29.322)	Top-5 acc 58.594 (52.652)	lr 0.00690
Warmup Train [31][1010/3239]	Time 0.185 (0.244)	Data 0.001 (0.019)	Loss 4.1238 (4.0868)	Top-1 acc 27.344 (29.322)	Top-5 acc 51.562 (52.658)	lr 0.00689
Warmup Train [31][1020/3239]	Time 0.401 (0.244)	Data 0.001 (0.019)	Loss 4.0940 (4.0867)	Top-1 acc 30.469 (29.340)	Top-5 acc 50.391 (52.660)	lr 0.00689
Warmup Train [31][1030/3239]	Time 0.193 (0.244)	Data 0.001 (0.019)	Loss 4.1367 (4.0867)	Top-1 acc 31.250 (29.346)	Top-5 acc 54.688 (52.667)	lr 0.00689
Warmup Train [31][1040/3239]	Time 0.185 (0.243)	Data 0.001 (0.019)	Loss 4.0010 (4.0872)	Top-1 acc 32.031 (29.346)	Top-5 acc 57.031 (52.656)	lr 0.00688
Warmup Train [31][1050/3239]	Time 0.185 (0.243)	Data 0.001 (0.019)	Loss 4.0103 (4.0877)	Top-1 acc 30.469 (29.340)	Top-5 acc 55.078 (52.647)	lr 0.00688
Warmup Train [31][1060/3239]	Time 0.208 (0.243)	Data 0.001 (0.019)	Loss 4.1393 (4.0874)	Top-1 acc 29.688 (29.357)	Top-5 acc 50.391 (52.653)	lr 0.00687
Warmup Train [31][1070/3239]	Time 0.184 (0.243)	Data 0.001 (0.018)	Loss 3.8939 (4.0880)	Top-1 acc 35.547 (29.342)	Top-5 acc 55.859 (52.637)	lr 0.00687
Warmup Train [31][1080/3239]	Time 0.183 (0.243)	Data 0.001 (0.018)	Loss 3.8883 (4.0881)	Top-1 acc 33.984 (29.345)	Top-5 acc 57.031 (52.630)	lr 0.00687
Warmup Train [31][1090/3239]	Time 0.213 (0.242)	Data 0.001 (0.018)	Loss 3.9990 (4.0875)	Top-1 acc 32.422 (29.356)	Top-5 acc 55.859 (52.645)	lr 0.00686
Warmup Train [31][1100/3239]	Time 0.200 (0.242)	Data 0.001 (0.018)	Loss 4.2112 (4.0881)	Top-1 acc 23.828 (29.353)	Top-5 acc 48.047 (52.629)	lr 0.00686
Warmup Train [31][1110/3239]	Time 0.300 (0.242)	Data 0.001 (0.018)	Loss 3.8684 (4.0879)	Top-1 acc 32.031 (29.351)	Top-5 acc 60.156 (52.633)	lr 0.00685
Warmup Train [31][1120/3239]	Time 0.219 (0.242)	Data 0.001 (0.018)	Loss 4.1191 (4.0873)	Top-1 acc 28.125 (29.359)	Top-5 acc 48.828 (52.643)	lr 0.00685
Warmup Train [31][1130/3239]	Time 0.134 (0.242)	Data 0.001 (0.018)	Loss 4.1076 (4.0875)	Top-1 acc 31.641 (29.361)	Top-5 acc 53.516 (52.638)	lr 0.00684
Warmup Train [31][1140/3239]	Time 0.186 (0.242)	Data 0.001 (0.018)	Loss 3.9548 (4.0874)	Top-1 acc 29.297 (29.355)	Top-5 acc 54.688 (52.640)	lr 0.00684
Warmup Train [31][1150/3239]	Time 0.259 (0.242)	Data 0.002 (0.017)	Loss 4.0926 (4.0876)	Top-1 acc 26.953 (29.352)	Top-5 acc 53.906 (52.633)	lr 0.00684
Warmup Train [31][1160/3239]	Time 0.209 (0.242)	Data 0.001 (0.017)	Loss 3.9511 (4.0879)	Top-1 acc 31.641 (29.337)	Top-5 acc 57.031 (52.633)	lr 0.00683
Warmup Train [31][1170/3239]	Time 0.186 (0.242)	Data 0.002 (0.017)	Loss 4.0422 (4.0876)	Top-1 acc 30.469 (29.345)	Top-5 acc 54.297 (52.641)	lr 0.00683
Warmup Train [31][1180/3239]	Time 0.264 (0.241)	Data 0.001 (0.017)	Loss 4.1808 (4.0873)	Top-1 acc 30.078 (29.357)	Top-5 acc 51.562 (52.653)	lr 0.00682
Warmup Train [31][1190/3239]	Time 0.231 (0.241)	Data 0.001 (0.017)	Loss 4.1116 (4.0873)	Top-1 acc 28.125 (29.358)	Top-5 acc 53.516 (52.661)	lr 0.00682
Warmup Train [31][1200/3239]	Time 0.198 (0.241)	Data 0.001 (0.017)	Loss 4.2280 (4.0873)	Top-1 acc 27.344 (29.374)	Top-5 acc 47.266 (52.664)	lr 0.00682
Warmup Train [31][1210/3239]	Time 0.371 (0.241)	Data 0.001 (0.017)	Loss 4.1231 (4.0872)	Top-1 acc 27.344 (29.368)	Top-5 acc 53.125 (52.670)	lr 0.00681
Warmup Train [31][1220/3239]	Time 0.222 (0.241)	Data 0.001 (0.017)	Loss 4.2000 (4.0873)	Top-1 acc 26.953 (29.371)	Top-5 acc 50.781 (52.669)	lr 0.00681
Warmup Train [31][1230/3239]	Time 0.200 (0.241)	Data 0.001 (0.016)	Loss 4.1063 (4.0869)	Top-1 acc 28.906 (29.385)	Top-5 acc 52.734 (52.671)	lr 0.00680
Warmup Train [31][1240/3239]	Time 0.146 (0.241)	Data 0.002 (0.016)	Loss 4.3050 (4.0872)	Top-1 acc 26.562 (29.382)	Top-5 acc 47.266 (52.659)	lr 0.00680
Warmup Train [31][1250/3239]	Time 0.218 (0.241)	Data 0.001 (0.016)	Loss 4.0193 (4.0869)	Top-1 acc 32.812 (29.383)	Top-5 acc 57.422 (52.669)	lr 0.00679
Warmup Train [31][1260/3239]	Time 0.218 (0.241)	Data 0.002 (0.016)	Loss 4.2001 (4.0873)	Top-1 acc 26.172 (29.377)	Top-5 acc 49.219 (52.657)	lr 0.00679
Warmup Train [31][1270/3239]	Time 0.242 (0.241)	Data 0.002 (0.016)	Loss 4.0514 (4.0875)	Top-1 acc 31.250 (29.373)	Top-5 acc 52.734 (52.643)	lr 0.00679
Warmup Train [31][1280/3239]	Time 0.203 (0.240)	Data 0.001 (0.016)	Loss 4.2188 (4.0876)	Top-1 acc 30.859 (29.377)	Top-5 acc 50.000 (52.641)	lr 0.00678
Warmup Train [31][1290/3239]	Time 0.233 (0.240)	Data 0.001 (0.016)	Loss 4.2090 (4.0873)	Top-1 acc 25.781 (29.383)	Top-5 acc 50.000 (52.649)	lr 0.00678
Warmup Train [31][1300/3239]	Time 0.286 (0.240)	Data 0.001 (0.016)	Loss 3.9940 (4.0873)	Top-1 acc 35.938 (29.381)	Top-5 acc 54.297 (52.648)	lr 0.00677
Warmup Train [31][1310/3239]	Time 0.203 (0.240)	Data 0.001 (0.016)	Loss 4.2741 (4.0870)	Top-1 acc 26.172 (29.390)	Top-5 acc 46.875 (52.659)	lr 0.00677
Warmup Train [31][1320/3239]	Time 0.197 (0.240)	Data 0.001 (0.015)	Loss 3.9424 (4.0871)	Top-1 acc 30.859 (29.382)	Top-5 acc 54.688 (52.652)	lr 0.00677
Warmup Train [31][1330/3239]	Time 0.150 (0.240)	Data 0.002 (0.015)	Loss 3.9284 (4.0872)	Top-1 acc 31.641 (29.381)	Top-5 acc 55.859 (52.647)	lr 0.00676
Warmup Train [31][1340/3239]	Time 0.245 (0.240)	Data 0.001 (0.015)	Loss 4.0382 (4.0876)	Top-1 acc 33.203 (29.376)	Top-5 acc 57.031 (52.643)	lr 0.00676
Warmup Train [31][1350/3239]	Time 0.215 (0.240)	Data 0.001 (0.015)	Loss 3.9459 (4.0876)	Top-1 acc 33.203 (29.374)	Top-5 acc 54.297 (52.638)	lr 0.00675
Warmup Train [31][1360/3239]	Time 0.151 (0.239)	Data 0.001 (0.015)	Loss 3.9609 (4.0878)	Top-1 acc 35.938 (29.374)	Top-5 acc 55.078 (52.634)	lr 0.00675
Warmup Train [31][1370/3239]	Time 0.248 (0.239)	Data 0.001 (0.015)	Loss 4.0549 (4.0874)	Top-1 acc 30.078 (29.375)	Top-5 acc 51.953 (52.628)	lr 0.00675
Warmup Train [31][1380/3239]	Time 0.142 (0.239)	Data 0.002 (0.015)	Loss 4.3791 (4.0874)	Top-1 acc 24.609 (29.377)	Top-5 acc 46.484 (52.627)	lr 0.00674
Warmup Train [31][1390/3239]	Time 0.193 (0.239)	Data 0.001 (0.015)	Loss 4.1338 (4.0870)	Top-1 acc 32.031 (29.373)	Top-5 acc 49.609 (52.630)	lr 0.00674
Warmup Train [31][1400/3239]	Time 0.203 (0.239)	Data 0.001 (0.015)	Loss 3.9713 (4.0868)	Top-1 acc 33.984 (29.385)	Top-5 acc 54.297 (52.629)	lr 0.00673
Warmup Train [31][1410/3239]	Time 0.322 (0.239)	Data 0.001 (0.015)	Loss 4.0695 (4.0868)	Top-1 acc 29.688 (29.387)	Top-5 acc 54.688 (52.631)	lr 0.00673
Warmup Train [31][1420/3239]	Time 0.184 (0.239)	Data 0.001 (0.015)	Loss 4.1816 (4.0867)	Top-1 acc 26.562 (29.384)	Top-5 acc 55.469 (52.635)	lr 0.00672
Warmup Train [31][1430/3239]	Time 0.213 (0.239)	Data 0.001 (0.014)	Loss 3.7996 (4.0868)	Top-1 acc 37.109 (29.381)	Top-5 acc 56.250 (52.629)	lr 0.00672
Warmup Train [31][1440/3239]	Time 0.173 (0.239)	Data 0.001 (0.014)	Loss 4.0917 (4.0870)	Top-1 acc 30.078 (29.377)	Top-5 acc 54.297 (52.624)	lr 0.00672
Warmup Train [31][1450/3239]	Time 0.200 (0.238)	Data 0.001 (0.014)	Loss 4.3234 (4.0867)	Top-1 acc 27.344 (29.384)	Top-5 acc 48.047 (52.628)	lr 0.00671
Warmup Train [31][1460/3239]	Time 0.259 (0.238)	Data 0.002 (0.014)	Loss 4.0827 (4.0864)	Top-1 acc 28.906 (29.388)	Top-5 acc 52.344 (52.635)	lr 0.00671
Warmup Train [31][1470/3239]	Time 0.212 (0.238)	Data 0.001 (0.014)	Loss 4.2693 (4.0865)	Top-1 acc 26.953 (29.387)	Top-5 acc 50.000 (52.634)	lr 0.00670
Warmup Train [31][1480/3239]	Time 0.231 (0.238)	Data 0.001 (0.014)	Loss 4.0120 (4.0867)	Top-1 acc 30.469 (29.385)	Top-5 acc 55.078 (52.633)	lr 0.00670
Warmup Train [31][1490/3239]	Time 0.307 (0.238)	Data 0.001 (0.014)	Loss 4.0774 (4.0867)	Top-1 acc 25.391 (29.382)	Top-5 acc 51.172 (52.635)	lr 0.00670
Warmup Train [31][1500/3239]	Time 0.273 (0.238)	Data 0.001 (0.014)	Loss 4.0394 (4.0865)	Top-1 acc 30.469 (29.389)	Top-5 acc 52.344 (52.641)	lr 0.00669
Warmup Train [31][1510/3239]	Time 0.248 (0.238)	Data 0.001 (0.014)	Loss 4.2616 (4.0867)	Top-1 acc 25.781 (29.386)	Top-5 acc 51.172 (52.632)	lr 0.00669
Warmup Train [31][1520/3239]	Time 0.294 (0.238)	Data 0.001 (0.014)	Loss 4.0438 (4.0867)	Top-1 acc 32.422 (29.390)	Top-5 acc 54.688 (52.628)	lr 0.00668
Warmup Train [31][1530/3239]	Time 0.279 (0.238)	Data 0.001 (0.014)	Loss 4.1180 (4.0871)	Top-1 acc 32.422 (29.389)	Top-5 acc 53.516 (52.619)	lr 0.00668
Warmup Train [31][1540/3239]	Time 0.223 (0.238)	Data 0.002 (0.014)	Loss 4.1551 (4.0870)	Top-1 acc 26.953 (29.383)	Top-5 acc 49.609 (52.620)	lr 0.00667
Warmup Train [31][1550/3239]	Time 0.209 (0.237)	Data 0.001 (0.014)	Loss 3.9480 (4.0869)	Top-1 acc 30.469 (29.381)	Top-5 acc 57.422 (52.621)	lr 0.00667
Warmup Train [31][1560/3239]	Time 0.224 (0.237)	Data 0.001 (0.013)	Loss 3.8857 (4.0869)	Top-1 acc 32.422 (29.370)	Top-5 acc 55.859 (52.622)	lr 0.00667
Warmup Train [31][1570/3239]	Time 0.229 (0.237)	Data 0.001 (0.013)	Loss 4.0852 (4.0869)	Top-1 acc 33.594 (29.375)	Top-5 acc 55.469 (52.623)	lr 0.00666
Warmup Train [31][1580/3239]	Time 0.242 (0.237)	Data 0.002 (0.013)	Loss 4.1143 (4.0866)	Top-1 acc 26.953 (29.381)	Top-5 acc 50.781 (52.620)	lr 0.00666
Warmup Train [31][1590/3239]	Time 0.173 (0.237)	Data 0.001 (0.013)	Loss 4.1059 (4.0869)	Top-1 acc 28.516 (29.380)	Top-5 acc 53.125 (52.618)	lr 0.00665
Warmup Train [31][1600/3239]	Time 0.182 (0.237)	Data 0.001 (0.013)	Loss 4.1033 (4.0871)	Top-1 acc 28.906 (29.378)	Top-5 acc 50.391 (52.612)	lr 0.00665
Warmup Train [31][1610/3239]	Time 0.207 (0.237)	Data 0.001 (0.013)	Loss 4.1848 (4.0869)	Top-1 acc 28.516 (29.387)	Top-5 acc 51.172 (52.620)	lr 0.00665
Warmup Train [31][1620/3239]	Time 0.291 (0.237)	Data 0.001 (0.013)	Loss 4.0274 (4.0869)	Top-1 acc 29.688 (29.390)	Top-5 acc 54.688 (52.622)	lr 0.00664
Warmup Train [31][1630/3239]	Time 0.360 (0.237)	Data 0.001 (0.013)	Loss 3.9528 (4.0867)	Top-1 acc 30.469 (29.391)	Top-5 acc 57.031 (52.626)	lr 0.00664
Warmup Train [31][1640/3239]	Time 0.254 (0.237)	Data 0.002 (0.013)	Loss 4.0178 (4.0866)	Top-1 acc 33.203 (29.394)	Top-5 acc 54.688 (52.627)	lr 0.00663
Warmup Train [31][1650/3239]	Time 0.206 (0.237)	Data 0.001 (0.013)	Loss 3.9858 (4.0870)	Top-1 acc 33.594 (29.391)	Top-5 acc 54.688 (52.621)	lr 0.00663
Warmup Train [31][1660/3239]	Time 0.144 (0.237)	Data 0.001 (0.013)	Loss 4.1235 (4.0869)	Top-1 acc 30.078 (29.393)	Top-5 acc 52.344 (52.625)	lr 0.00663
Warmup Train [31][1670/3239]	Time 0.205 (0.237)	Data 0.001 (0.013)	Loss 4.1718 (4.0867)	Top-1 acc 27.344 (29.398)	Top-5 acc 51.953 (52.631)	lr 0.00662
Warmup Train [31][1680/3239]	Time 0.212 (0.237)	Data 0.001 (0.013)	Loss 4.0707 (4.0868)	Top-1 acc 29.297 (29.397)	Top-5 acc 52.734 (52.629)	lr 0.00662
Warmup Train [31][1690/3239]	Time 0.163 (0.237)	Data 0.001 (0.013)	Loss 3.9171 (4.0867)	Top-1 acc 32.812 (29.396)	Top-5 acc 55.469 (52.636)	lr 0.00661
Warmup Train [31][1700/3239]	Time 0.199 (0.236)	Data 0.001 (0.013)	Loss 4.3463 (4.0867)	Top-1 acc 25.781 (29.398)	Top-5 acc 47.656 (52.639)	lr 0.00661
Warmup Train [31][1710/3239]	Time 0.246 (0.236)	Data 0.001 (0.013)	Loss 4.0018 (4.0869)	Top-1 acc 26.172 (29.396)	Top-5 acc 52.344 (52.637)	lr 0.00660
Warmup Train [31][1720/3239]	Time 0.188 (0.236)	Data 0.001 (0.013)	Loss 4.1899 (4.0866)	Top-1 acc 26.562 (29.396)	Top-5 acc 48.438 (52.638)	lr 0.00660
Warmup Train [31][1730/3239]	Time 0.267 (0.236)	Data 0.001 (0.012)	Loss 4.0313 (4.0863)	Top-1 acc 29.688 (29.396)	Top-5 acc 52.734 (52.646)	lr 0.00660
Warmup Train [31][1740/3239]	Time 0.205 (0.236)	Data 0.001 (0.012)	Loss 4.1318 (4.0862)	Top-1 acc 30.859 (29.401)	Top-5 acc 53.906 (52.653)	lr 0.00659
Warmup Train [31][1750/3239]	Time 0.203 (0.236)	Data 0.001 (0.012)	Loss 4.2037 (4.0862)	Top-1 acc 28.516 (29.399)	Top-5 acc 52.344 (52.652)	lr 0.00659
Warmup Train [31][1760/3239]	Time 0.244 (0.236)	Data 0.001 (0.012)	Loss 3.8970 (4.0860)	Top-1 acc 30.859 (29.401)	Top-5 acc 54.297 (52.659)	lr 0.00658
Warmup Train [31][1770/3239]	Time 0.253 (0.236)	Data 0.001 (0.012)	Loss 4.2066 (4.0859)	Top-1 acc 24.219 (29.403)	Top-5 acc 48.828 (52.660)	lr 0.00658
Warmup Train [31][1780/3239]	Time 0.201 (0.236)	Data 0.001 (0.012)	Loss 3.9753 (4.0859)	Top-1 acc 30.469 (29.411)	Top-5 acc 57.422 (52.660)	lr 0.00658
Warmup Train [31][1790/3239]	Time 0.239 (0.236)	Data 0.001 (0.012)	Loss 4.2668 (4.0859)	Top-1 acc 26.562 (29.416)	Top-5 acc 47.656 (52.655)	lr 0.00657
Warmup Train [31][1800/3239]	Time 0.234 (0.236)	Data 0.003 (0.012)	Loss 4.0507 (4.0860)	Top-1 acc 30.469 (29.418)	Top-5 acc 52.734 (52.653)	lr 0.00657
Warmup Train [31][1810/3239]	Time 0.221 (0.236)	Data 0.001 (0.012)	Loss 3.8096 (4.0857)	Top-1 acc 38.672 (29.422)	Top-5 acc 59.766 (52.660)	lr 0.00656
Warmup Train [31][1820/3239]	Time 0.253 (0.236)	Data 0.001 (0.012)	Loss 4.0856 (4.0854)	Top-1 acc 30.859 (29.429)	Top-5 acc 47.656 (52.662)	lr 0.00656
Warmup Train [31][1830/3239]	Time 0.203 (0.236)	Data 0.002 (0.012)	Loss 4.0897 (4.0852)	Top-1 acc 26.562 (29.433)	Top-5 acc 52.734 (52.665)	lr 0.00656
Warmup Train [31][1840/3239]	Time 0.378 (0.236)	Data 0.001 (0.012)	Loss 3.9226 (4.0850)	Top-1 acc 29.688 (29.430)	Top-5 acc 55.469 (52.670)	lr 0.00655
Warmup Train [31][1850/3239]	Time 0.168 (0.236)	Data 0.001 (0.012)	Loss 3.9559 (4.0850)	Top-1 acc 33.594 (29.432)	Top-5 acc 56.250 (52.673)	lr 0.00655
Warmup Train [31][1860/3239]	Time 0.224 (0.235)	Data 0.001 (0.012)	Loss 4.0664 (4.0850)	Top-1 acc 29.688 (29.433)	Top-5 acc 51.953 (52.669)	lr 0.00654
Warmup Train [31][1870/3239]	Time 0.153 (0.235)	Data 0.001 (0.012)	Loss 4.4684 (4.0850)	Top-1 acc 28.516 (29.439)	Top-5 acc 46.875 (52.670)	lr 0.00654
Warmup Train [31][1880/3239]	Time 0.212 (0.235)	Data 0.001 (0.012)	Loss 4.3247 (4.0848)	Top-1 acc 25.781 (29.445)	Top-5 acc 48.438 (52.677)	lr 0.00654
Warmup Train [31][1890/3239]	Time 0.176 (0.235)	Data 0.002 (0.012)	Loss 3.9990 (4.0844)	Top-1 acc 31.641 (29.455)	Top-5 acc 51.953 (52.687)	lr 0.00653
Warmup Train [31][1900/3239]	Time 0.171 (0.235)	Data 0.001 (0.012)	Loss 4.3424 (4.0845)	Top-1 acc 26.172 (29.458)	Top-5 acc 50.781 (52.686)	lr 0.00653
Warmup Train [31][1910/3239]	Time 0.240 (0.235)	Data 0.003 (0.012)	Loss 4.1203 (4.0844)	Top-1 acc 32.812 (29.468)	Top-5 acc 48.828 (52.686)	lr 0.00652
Warmup Train [31][1920/3239]	Time 0.149 (0.235)	Data 0.001 (0.012)	Loss 4.1560 (4.0843)	Top-1 acc 23.047 (29.467)	Top-5 acc 49.609 (52.684)	lr 0.00652
Warmup Train [31][1930/3239]	Time 0.333 (0.235)	Data 0.001 (0.012)	Loss 4.0536 (4.0845)	Top-1 acc 30.859 (29.466)	Top-5 acc 56.250 (52.682)	lr 0.00651
Warmup Train [31][1940/3239]	Time 0.338 (0.235)	Data 0.001 (0.011)	Loss 3.9677 (4.0847)	Top-1 acc 28.906 (29.462)	Top-5 acc 53.125 (52.677)	lr 0.00651
Warmup Train [31][1950/3239]	Time 0.228 (0.235)	Data 0.001 (0.011)	Loss 4.1966 (4.0845)	Top-1 acc 25.391 (29.461)	Top-5 acc 47.266 (52.686)	lr 0.00651
Warmup Train [31][1960/3239]	Time 0.199 (0.235)	Data 0.003 (0.011)	Loss 4.2210 (4.0845)	Top-1 acc 30.078 (29.461)	Top-5 acc 52.734 (52.691)	lr 0.00650
Warmup Train [31][1970/3239]	Time 0.190 (0.235)	Data 0.001 (0.011)	Loss 3.9223 (4.0842)	Top-1 acc 32.812 (29.468)	Top-5 acc 56.641 (52.698)	lr 0.00650
Warmup Train [31][1980/3239]	Time 0.226 (0.235)	Data 0.001 (0.011)	Loss 4.1516 (4.0842)	Top-1 acc 27.734 (29.472)	Top-5 acc 51.172 (52.698)	lr 0.00649
Warmup Train [31][1990/3239]	Time 0.221 (0.235)	Data 0.001 (0.011)	Loss 4.0995 (4.0842)	Top-1 acc 32.422 (29.476)	Top-5 acc 53.125 (52.699)	lr 0.00649
Warmup Train [31][2000/3239]	Time 0.222 (0.235)	Data 0.001 (0.011)	Loss 3.9974 (4.0840)	Top-1 acc 32.422 (29.481)	Top-5 acc 54.688 (52.709)	lr 0.00649
Warmup Train [31][2010/3239]	Time 0.188 (0.235)	Data 0.001 (0.011)	Loss 3.9834 (4.0842)	Top-1 acc 32.812 (29.478)	Top-5 acc 57.031 (52.703)	lr 0.00648
Warmup Train [31][2020/3239]	Time 0.199 (0.235)	Data 0.002 (0.011)	Loss 4.1479 (4.0838)	Top-1 acc 28.906 (29.480)	Top-5 acc 54.297 (52.716)	lr 0.00648
Warmup Train [31][2030/3239]	Time 0.215 (0.235)	Data 0.001 (0.011)	Loss 4.1599 (4.0838)	Top-1 acc 30.078 (29.479)	Top-5 acc 48.047 (52.710)	lr 0.00647
Warmup Train [31][2040/3239]	Time 0.318 (0.235)	Data 0.002 (0.011)	Loss 4.1098 (4.0840)	Top-1 acc 29.688 (29.476)	Top-5 acc 49.609 (52.707)	lr 0.00647
Warmup Train [31][2050/3239]	Time 0.167 (0.235)	Data 0.001 (0.011)	Loss 4.2369 (4.0842)	Top-1 acc 26.953 (29.470)	Top-5 acc 47.266 (52.695)	lr 0.00647
Warmup Train [31][2060/3239]	Time 0.259 (0.235)	Data 0.001 (0.011)	Loss 4.0318 (4.0845)	Top-1 acc 32.031 (29.467)	Top-5 acc 53.906 (52.689)	lr 0.00646
Warmup Train [31][2070/3239]	Time 0.221 (0.235)	Data 0.002 (0.011)	Loss 3.9771 (4.0845)	Top-1 acc 35.156 (29.469)	Top-5 acc 55.078 (52.684)	lr 0.00646
Warmup Train [31][2080/3239]	Time 0.147 (0.234)	Data 0.002 (0.011)	Loss 3.9831 (4.0845)	Top-1 acc 29.297 (29.467)	Top-5 acc 54.297 (52.687)	lr 0.00645
Warmup Train [31][2090/3239]	Time 0.237 (0.234)	Data 0.001 (0.011)	Loss 4.0144 (4.0845)	Top-1 acc 28.516 (29.471)	Top-5 acc 54.297 (52.688)	lr 0.00645
Warmup Train [31][2100/3239]	Time 0.158 (0.234)	Data 0.001 (0.011)	Loss 4.2990 (4.0847)	Top-1 acc 21.484 (29.468)	Top-5 acc 51.953 (52.682)	lr 0.00645
Warmup Train [31][2110/3239]	Time 0.245 (0.234)	Data 0.001 (0.011)	Loss 4.1147 (4.0846)	Top-1 acc 24.609 (29.468)	Top-5 acc 52.734 (52.683)	lr 0.00644
Warmup Train [31][2120/3239]	Time 0.161 (0.234)	Data 0.001 (0.011)	Loss 4.1145 (4.0846)	Top-1 acc 30.859 (29.466)	Top-5 acc 53.516 (52.686)	lr 0.00644
Warmup Train [31][2130/3239]	Time 0.166 (0.234)	Data 0.001 (0.011)	Loss 3.9471 (4.0845)	Top-1 acc 35.547 (29.466)	Top-5 acc 54.297 (52.688)	lr 0.00643
Warmup Train [31][2140/3239]	Time 0.331 (0.234)	Data 0.001 (0.011)	Loss 4.0211 (4.0846)	Top-1 acc 31.641 (29.463)	Top-5 acc 54.297 (52.682)	lr 0.00643
Warmup Train [31][2150/3239]	Time 0.225 (0.234)	Data 0.001 (0.011)	Loss 3.9235 (4.0845)	Top-1 acc 29.688 (29.466)	Top-5 acc 59.375 (52.690)	lr 0.00643
Warmup Train [31][2160/3239]	Time 0.180 (0.234)	Data 0.001 (0.011)	Loss 3.9581 (4.0842)	Top-1 acc 34.766 (29.476)	Top-5 acc 57.031 (52.699)	lr 0.00642
Warmup Train [31][2170/3239]	Time 0.126 (0.234)	Data 0.001 (0.011)	Loss 4.2563 (4.0842)	Top-1 acc 25.781 (29.475)	Top-5 acc 49.609 (52.699)	lr 0.00642
Warmup Train [31][2180/3239]	Time 0.273 (0.234)	Data 0.002 (0.011)	Loss 3.9366 (4.0840)	Top-1 acc 32.812 (29.477)	Top-5 acc 55.078 (52.705)	lr 0.00641
Warmup Train [31][2190/3239]	Time 0.274 (0.234)	Data 0.002 (0.011)	Loss 4.0594 (4.0840)	Top-1 acc 32.812 (29.480)	Top-5 acc 54.297 (52.705)	lr 0.00641
Warmup Train [31][2200/3239]	Time 0.249 (0.234)	Data 0.001 (0.010)	Loss 3.9414 (4.0842)	Top-1 acc 32.031 (29.477)	Top-5 acc 56.641 (52.706)	lr 0.00641
Warmup Train [31][2210/3239]	Time 0.170 (0.234)	Data 0.002 (0.010)	Loss 4.0914 (4.0840)	Top-1 acc 30.469 (29.477)	Top-5 acc 54.688 (52.713)	lr 0.00640
Warmup Train [31][2220/3239]	Time 0.153 (0.234)	Data 0.001 (0.010)	Loss 4.1180 (4.0834)	Top-1 acc 25.781 (29.489)	Top-5 acc 51.172 (52.729)	lr 0.00640
Warmup Train [31][2230/3239]	Time 0.175 (0.234)	Data 0.001 (0.010)	Loss 3.9645 (4.0835)	Top-1 acc 33.594 (29.484)	Top-5 acc 56.641 (52.730)	lr 0.00639
Warmup Train [31][2240/3239]	Time 0.334 (0.234)	Data 0.002 (0.010)	Loss 4.0456 (4.0834)	Top-1 acc 30.078 (29.483)	Top-5 acc 51.172 (52.731)	lr 0.00639
Warmup Train [31][2250/3239]	Time 0.174 (0.234)	Data 0.001 (0.010)	Loss 3.9673 (4.0834)	Top-1 acc 31.250 (29.482)	Top-5 acc 57.031 (52.734)	lr 0.00638
Warmup Train [31][2260/3239]	Time 0.189 (0.234)	Data 0.001 (0.010)	Loss 4.0518 (4.0831)	Top-1 acc 26.562 (29.484)	Top-5 acc 54.297 (52.737)	lr 0.00638
Warmup Train [31][2270/3239]	Time 0.217 (0.234)	Data 0.001 (0.010)	Loss 4.1928 (4.0830)	Top-1 acc 26.953 (29.486)	Top-5 acc 48.047 (52.738)	lr 0.00638
Warmup Train [31][2280/3239]	Time 0.291 (0.234)	Data 0.002 (0.010)	Loss 4.1961 (4.0831)	Top-1 acc 28.516 (29.484)	Top-5 acc 49.219 (52.730)	lr 0.00637
Warmup Train [31][2290/3239]	Time 0.196 (0.234)	Data 0.001 (0.010)	Loss 4.0907 (4.0834)	Top-1 acc 30.469 (29.475)	Top-5 acc 52.734 (52.726)	lr 0.00637
Warmup Train [31][2300/3239]	Time 0.149 (0.233)	Data 0.001 (0.010)	Loss 3.9980 (4.0830)	Top-1 acc 31.250 (29.486)	Top-5 acc 55.469 (52.736)	lr 0.00636
Warmup Train [31][2310/3239]	Time 0.188 (0.233)	Data 0.001 (0.010)	Loss 4.0805 (4.0833)	Top-1 acc 29.297 (29.479)	Top-5 acc 52.734 (52.728)	lr 0.00636
Warmup Train [31][2320/3239]	Time 0.383 (0.233)	Data 0.001 (0.010)	Loss 4.0102 (4.0833)	Top-1 acc 28.516 (29.476)	Top-5 acc 54.297 (52.727)	lr 0.00636
Warmup Train [31][2330/3239]	Time 0.292 (0.233)	Data 0.001 (0.010)	Loss 3.8714 (4.0830)	Top-1 acc 33.984 (29.483)	Top-5 acc 56.641 (52.733)	lr 0.00635
Warmup Train [31][2340/3239]	Time 0.194 (0.233)	Data 0.001 (0.010)	Loss 4.2594 (4.0832)	Top-1 acc 27.734 (29.482)	Top-5 acc 46.484 (52.732)	lr 0.00635
Warmup Train [31][2350/3239]	Time 0.192 (0.233)	Data 0.001 (0.010)	Loss 3.9256 (4.0832)	Top-1 acc 32.422 (29.482)	Top-5 acc 57.422 (52.734)	lr 0.00634
Warmup Train [31][2360/3239]	Time 0.236 (0.233)	Data 0.001 (0.010)	Loss 4.1029 (4.0832)	Top-1 acc 29.297 (29.483)	Top-5 acc 51.562 (52.736)	lr 0.00634
Warmup Train [31][2370/3239]	Time 0.278 (0.233)	Data 0.001 (0.010)	Loss 4.2281 (4.0834)	Top-1 acc 29.297 (29.482)	Top-5 acc 48.047 (52.729)	lr 0.00634
Warmup Train [31][2380/3239]	Time 0.247 (0.233)	Data 0.001 (0.010)	Loss 4.0853 (4.0834)	Top-1 acc 32.422 (29.484)	Top-5 acc 51.172 (52.729)	lr 0.00633
Warmup Train [31][2390/3239]	Time 0.212 (0.233)	Data 0.001 (0.010)	Loss 4.0750 (4.0834)	Top-1 acc 33.594 (29.488)	Top-5 acc 53.516 (52.728)	lr 0.00633
Warmup Train [31][2400/3239]	Time 0.188 (0.233)	Data 0.002 (0.010)	Loss 4.0970 (4.0836)	Top-1 acc 28.125 (29.482)	Top-5 acc 53.516 (52.723)	lr 0.00632
Warmup Train [31][2410/3239]	Time 0.180 (0.233)	Data 0.001 (0.010)	Loss 3.9952 (4.0836)	Top-1 acc 32.812 (29.484)	Top-5 acc 53.906 (52.727)	lr 0.00632
Warmup Train [31][2420/3239]	Time 0.200 (0.233)	Data 0.001 (0.010)	Loss 4.0641 (4.0835)	Top-1 acc 33.594 (29.492)	Top-5 acc 52.734 (52.729)	lr 0.00632
Warmup Train [31][2430/3239]	Time 0.186 (0.233)	Data 0.002 (0.010)	Loss 4.0796 (4.0833)	Top-1 acc 26.562 (29.494)	Top-5 acc 49.219 (52.731)	lr 0.00631
Warmup Train [31][2440/3239]	Time 0.232 (0.233)	Data 0.001 (0.010)	Loss 3.9568 (4.0833)	Top-1 acc 30.859 (29.493)	Top-5 acc 57.422 (52.730)	lr 0.00631
Warmup Train [31][2450/3239]	Time 0.216 (0.233)	Data 0.001 (0.010)	Loss 3.9144 (4.0832)	Top-1 acc 30.469 (29.495)	Top-5 acc 55.469 (52.736)	lr 0.00630
Warmup Train [31][2460/3239]	Time 0.206 (0.233)	Data 0.001 (0.010)	Loss 4.1237 (4.0832)	Top-1 acc 25.781 (29.494)	Top-5 acc 52.344 (52.740)	lr 0.00630
Warmup Train [31][2470/3239]	Time 0.218 (0.232)	Data 0.001 (0.010)	Loss 3.9971 (4.0832)	Top-1 acc 27.344 (29.493)	Top-5 acc 56.641 (52.736)	lr 0.00630
Warmup Train [31][2480/3239]	Time 0.230 (0.232)	Data 0.001 (0.010)	Loss 4.1476 (4.0832)	Top-1 acc 27.734 (29.490)	Top-5 acc 49.219 (52.732)	lr 0.00629
Warmup Train [31][2490/3239]	Time 0.250 (0.232)	Data 0.001 (0.010)	Loss 4.0173 (4.0831)	Top-1 acc 28.906 (29.490)	Top-5 acc 51.953 (52.736)	lr 0.00629
Warmup Train [31][2500/3239]	Time 0.199 (0.232)	Data 0.001 (0.010)	Loss 3.9634 (4.0831)	Top-1 acc 30.469 (29.489)	Top-5 acc 49.609 (52.734)	lr 0.00628
Warmup Train [31][2510/3239]	Time 0.256 (0.232)	Data 0.001 (0.010)	Loss 4.0939 (4.0830)	Top-1 acc 31.250 (29.497)	Top-5 acc 53.516 (52.736)	lr 0.00628
Warmup Train [31][2520/3239]	Time 0.205 (0.232)	Data 0.001 (0.010)	Loss 4.1711 (4.0830)	Top-1 acc 29.688 (29.498)	Top-5 acc 52.734 (52.742)	lr 0.00628
Warmup Train [31][2530/3239]	Time 0.205 (0.232)	Data 0.001 (0.010)	Loss 3.9735 (4.0831)	Top-1 acc 31.250 (29.500)	Top-5 acc 50.000 (52.737)	lr 0.00627
Warmup Train [31][2540/3239]	Time 0.217 (0.232)	Data 0.001 (0.009)	Loss 4.0683 (4.0830)	Top-1 acc 29.688 (29.500)	Top-5 acc 52.734 (52.739)	lr 0.00627
Warmup Train [31][2550/3239]	Time 0.210 (0.232)	Data 0.002 (0.009)	Loss 4.3184 (4.0830)	Top-1 acc 26.172 (29.501)	Top-5 acc 48.438 (52.735)	lr 0.00626
Warmup Train [31][2560/3239]	Time 0.235 (0.232)	Data 0.001 (0.009)	Loss 4.0443 (4.0831)	Top-1 acc 32.422 (29.497)	Top-5 acc 52.344 (52.736)	lr 0.00626
Warmup Train [31][2570/3239]	Time 0.200 (0.232)	Data 0.001 (0.009)	Loss 3.9471 (4.0830)	Top-1 acc 27.344 (29.498)	Top-5 acc 53.906 (52.741)	lr 0.00626
Warmup Train [31][2580/3239]	Time 0.253 (0.232)	Data 0.001 (0.009)	Loss 4.0651 (4.0829)	Top-1 acc 26.953 (29.498)	Top-5 acc 53.516 (52.743)	lr 0.00625
Warmup Train [31][2590/3239]	Time 0.159 (0.232)	Data 0.001 (0.009)	Loss 4.0909 (4.0828)	Top-1 acc 29.297 (29.503)	Top-5 acc 51.172 (52.751)	lr 0.00625
Warmup Train [31][2600/3239]	Time 0.206 (0.232)	Data 0.001 (0.009)	Loss 4.0890 (4.0827)	Top-1 acc 28.906 (29.504)	Top-5 acc 53.125 (52.751)	lr 0.00624
Warmup Train [31][2610/3239]	Time 0.255 (0.232)	Data 0.049 (0.009)	Loss 4.2443 (4.0825)	Top-1 acc 29.688 (29.509)	Top-5 acc 46.875 (52.757)	lr 0.00624
Warmup Train [31][2620/3239]	Time 0.147 (0.232)	Data 0.002 (0.009)	Loss 3.9214 (4.0824)	Top-1 acc 33.203 (29.510)	Top-5 acc 54.297 (52.759)	lr 0.00624
Warmup Train [31][2630/3239]	Time 0.234 (0.232)	Data 0.001 (0.009)	Loss 4.0576 (4.0822)	Top-1 acc 27.344 (29.514)	Top-5 acc 51.562 (52.758)	lr 0.00623
Warmup Train [31][2640/3239]	Time 0.234 (0.232)	Data 0.001 (0.009)	Loss 3.9890 (4.0823)	Top-1 acc 29.688 (29.515)	Top-5 acc 55.859 (52.757)	lr 0.00623
Warmup Train [31][2650/3239]	Time 0.188 (0.232)	Data 0.001 (0.009)	Loss 4.2524 (4.0823)	Top-1 acc 26.562 (29.511)	Top-5 acc 47.656 (52.756)	lr 0.00622
Warmup Train [31][2660/3239]	Time 0.253 (0.232)	Data 0.002 (0.009)	Loss 4.0448 (4.0824)	Top-1 acc 30.469 (29.509)	Top-5 acc 51.172 (52.751)	lr 0.00622
Warmup Train [31][2670/3239]	Time 0.310 (0.232)	Data 0.001 (0.009)	Loss 3.9978 (4.0824)	Top-1 acc 31.250 (29.511)	Top-5 acc 52.734 (52.755)	lr 0.00622
Warmup Train [31][2680/3239]	Time 0.163 (0.232)	Data 0.001 (0.009)	Loss 4.1829 (4.0824)	Top-1 acc 28.516 (29.515)	Top-5 acc 48.047 (52.756)	lr 0.00621
Warmup Train [31][2690/3239]	Time 0.221 (0.232)	Data 0.002 (0.009)	Loss 3.9987 (4.0825)	Top-1 acc 29.688 (29.509)	Top-5 acc 53.906 (52.751)	lr 0.00621
Warmup Train [31][2700/3239]	Time 0.212 (0.232)	Data 0.002 (0.009)	Loss 4.0598 (4.0825)	Top-1 acc 28.516 (29.504)	Top-5 acc 53.906 (52.751)	lr 0.00620
Warmup Train [31][2710/3239]	Time 0.215 (0.231)	Data 0.002 (0.009)	Loss 4.1614 (4.0825)	Top-1 acc 26.172 (29.505)	Top-5 acc 50.000 (52.749)	lr 0.00620
Warmup Train [31][2720/3239]	Time 0.308 (0.231)	Data 0.001 (0.009)	Loss 4.2408 (4.0824)	Top-1 acc 28.125 (29.510)	Top-5 acc 48.438 (52.746)	lr 0.00620
Warmup Train [31][2730/3239]	Time 0.190 (0.231)	Data 0.002 (0.009)	Loss 3.8522 (4.0823)	Top-1 acc 33.984 (29.514)	Top-5 acc 57.422 (52.750)	lr 0.00619
Warmup Train [31][2740/3239]	Time 0.281 (0.231)	Data 0.001 (0.009)	Loss 4.1717 (4.0823)	Top-1 acc 25.000 (29.512)	Top-5 acc 48.047 (52.751)	lr 0.00619
Warmup Train [31][2750/3239]	Time 0.215 (0.231)	Data 0.001 (0.009)	Loss 3.8663 (4.0820)	Top-1 acc 36.719 (29.519)	Top-5 acc 57.031 (52.762)	lr 0.00618
Warmup Train [31][2760/3239]	Time 0.313 (0.231)	Data 0.001 (0.009)	Loss 4.3027 (4.0819)	Top-1 acc 27.344 (29.517)	Top-5 acc 45.312 (52.761)	lr 0.00618
Warmup Train [31][2770/3239]	Time 0.198 (0.231)	Data 0.001 (0.009)	Loss 4.0563 (4.0817)	Top-1 acc 33.984 (29.525)	Top-5 acc 55.859 (52.763)	lr 0.00618
Warmup Train [31][2780/3239]	Time 0.223 (0.231)	Data 0.001 (0.009)	Loss 4.0615 (4.0819)	Top-1 acc 28.906 (29.524)	Top-5 acc 52.734 (52.758)	lr 0.00617
Warmup Train [31][2790/3239]	Time 0.187 (0.231)	Data 0.001 (0.009)	Loss 4.1300 (4.0820)	Top-1 acc 28.906 (29.527)	Top-5 acc 53.906 (52.757)	lr 0.00617
Warmup Train [31][2800/3239]	Time 0.219 (0.231)	Data 0.001 (0.009)	Loss 4.1601 (4.0821)	Top-1 acc 28.125 (29.527)	Top-5 acc 51.172 (52.754)	lr 0.00616
Warmup Train [31][2810/3239]	Time 0.235 (0.231)	Data 0.001 (0.009)	Loss 4.0599 (4.0819)	Top-1 acc 31.641 (29.529)	Top-5 acc 52.734 (52.758)	lr 0.00616
Warmup Train [31][2820/3239]	Time 0.204 (0.231)	Data 0.001 (0.009)	Loss 3.9063 (4.0820)	Top-1 acc 35.547 (29.526)	Top-5 acc 57.031 (52.759)	lr 0.00616
Warmup Train [31][2830/3239]	Time 0.242 (0.231)	Data 0.001 (0.009)	Loss 4.1980 (4.0820)	Top-1 acc 28.125 (29.523)	Top-5 acc 51.172 (52.757)	lr 0.00615
Warmup Train [31][2840/3239]	Time 0.156 (0.231)	Data 0.001 (0.009)	Loss 4.1033 (4.0821)	Top-1 acc 28.516 (29.520)	Top-5 acc 55.469 (52.758)	lr 0.00615
Warmup Train [31][2850/3239]	Time 0.178 (0.231)	Data 0.001 (0.009)	Loss 4.1810 (4.0821)	Top-1 acc 27.344 (29.521)	Top-5 acc 50.781 (52.752)	lr 0.00614
Warmup Train [31][2860/3239]	Time 0.274 (0.231)	Data 0.001 (0.009)	Loss 4.2808 (4.0821)	Top-1 acc 25.000 (29.523)	Top-5 acc 47.656 (52.754)	lr 0.00614
Warmup Train [31][2870/3239]	Time 0.184 (0.231)	Data 0.001 (0.009)	Loss 3.9579 (4.0820)	Top-1 acc 32.031 (29.523)	Top-5 acc 57.812 (52.758)	lr 0.00614
Warmup Train [31][2880/3239]	Time 0.205 (0.231)	Data 0.001 (0.009)	Loss 4.2032 (4.0821)	Top-1 acc 28.125 (29.524)	Top-5 acc 50.391 (52.755)	lr 0.00613
Warmup Train [31][2890/3239]	Time 0.239 (0.231)	Data 0.001 (0.009)	Loss 3.9973 (4.0820)	Top-1 acc 31.641 (29.523)	Top-5 acc 54.297 (52.760)	lr 0.00613
Warmup Train [31][2900/3239]	Time 0.204 (0.231)	Data 0.001 (0.009)	Loss 3.9758 (4.0818)	Top-1 acc 29.688 (29.527)	Top-5 acc 55.859 (52.766)	lr 0.00612
Warmup Train [31][2910/3239]	Time 0.251 (0.231)	Data 0.001 (0.009)	Loss 4.1073 (4.0819)	Top-1 acc 30.078 (29.523)	Top-5 acc 54.297 (52.764)	lr 0.00612
Warmup Train [31][2920/3239]	Time 0.142 (0.231)	Data 0.001 (0.009)	Loss 4.0510 (4.0819)	Top-1 acc 27.734 (29.524)	Top-5 acc 54.297 (52.766)	lr 0.00612
Warmup Train [31][2930/3239]	Time 0.196 (0.231)	Data 0.001 (0.009)	Loss 4.0760 (4.0821)	Top-1 acc 28.125 (29.520)	Top-5 acc 52.734 (52.760)	lr 0.00611
Warmup Train [31][2940/3239]	Time 0.249 (0.231)	Data 0.001 (0.009)	Loss 4.1367 (4.0820)	Top-1 acc 25.781 (29.522)	Top-5 acc 50.391 (52.765)	lr 0.00611
Warmup Train [31][2950/3239]	Time 0.223 (0.231)	Data 0.003 (0.009)	Loss 4.1381 (4.0822)	Top-1 acc 26.953 (29.521)	Top-5 acc 52.344 (52.760)	lr 0.00610
Warmup Train [31][2960/3239]	Time 0.286 (0.231)	Data 0.002 (0.009)	Loss 3.9278 (4.0819)	Top-1 acc 33.594 (29.527)	Top-5 acc 54.297 (52.770)	lr 0.00610
Warmup Train [31][2970/3239]	Time 0.257 (0.231)	Data 0.001 (0.008)	Loss 4.1337 (4.0819)	Top-1 acc 27.734 (29.526)	Top-5 acc 55.859 (52.772)	lr 0.00610
Warmup Train [31][2980/3239]	Time 0.237 (0.231)	Data 0.001 (0.008)	Loss 4.0405 (4.0817)	Top-1 acc 28.125 (29.528)	Top-5 acc 55.078 (52.774)	lr 0.00609
Warmup Train [31][2990/3239]	Time 0.199 (0.231)	Data 0.001 (0.008)	Loss 4.1244 (4.0818)	Top-1 acc 25.391 (29.523)	Top-5 acc 50.781 (52.775)	lr 0.00609
Warmup Train [31][3000/3239]	Time 0.178 (0.231)	Data 0.002 (0.008)	Loss 4.1202 (4.0818)	Top-1 acc 28.516 (29.523)	Top-5 acc 51.953 (52.774)	lr 0.00608
Warmup Train [31][3010/3239]	Time 0.217 (0.231)	Data 0.002 (0.008)	Loss 4.1303 (4.0820)	Top-1 acc 29.297 (29.524)	Top-5 acc 54.688 (52.771)	lr 0.00608
Warmup Train [31][3020/3239]	Time 0.254 (0.231)	Data 0.001 (0.008)	Loss 4.2262 (4.0821)	Top-1 acc 31.641 (29.522)	Top-5 acc 47.266 (52.768)	lr 0.00608
Warmup Train [31][3030/3239]	Time 0.236 (0.231)	Data 0.002 (0.008)	Loss 3.9743 (4.0820)	Top-1 acc 28.516 (29.523)	Top-5 acc 55.859 (52.769)	lr 0.00607
Warmup Train [31][3040/3239]	Time 0.268 (0.231)	Data 0.001 (0.008)	Loss 4.0396 (4.0820)	Top-1 acc 29.688 (29.520)	Top-5 acc 54.297 (52.767)	lr 0.00607
Warmup Train [31][3050/3239]	Time 0.211 (0.231)	Data 0.001 (0.008)	Loss 3.9699 (4.0821)	Top-1 acc 34.375 (29.518)	Top-5 acc 58.594 (52.769)	lr 0.00606
Warmup Train [31][3060/3239]	Time 0.363 (0.231)	Data 0.001 (0.008)	Loss 4.1672 (4.0823)	Top-1 acc 28.125 (29.518)	Top-5 acc 52.344 (52.762)	lr 0.00606
Warmup Train [31][3070/3239]	Time 0.193 (0.231)	Data 0.001 (0.008)	Loss 4.1386 (4.0823)	Top-1 acc 30.469 (29.517)	Top-5 acc 50.781 (52.761)	lr 0.00606
Warmup Train [31][3080/3239]	Time 0.211 (0.231)	Data 0.001 (0.008)	Loss 4.2697 (4.0823)	Top-1 acc 27.344 (29.518)	Top-5 acc 44.531 (52.757)	lr 0.00605
Warmup Train [31][3090/3239]	Time 0.306 (0.231)	Data 0.001 (0.008)	Loss 4.0104 (4.0822)	Top-1 acc 30.469 (29.519)	Top-5 acc 54.297 (52.761)	lr 0.00605
Warmup Train [31][3100/3239]	Time 0.218 (0.231)	Data 0.003 (0.008)	Loss 3.9756 (4.0821)	Top-1 acc 32.422 (29.518)	Top-5 acc 49.609 (52.763)	lr 0.00605
Warmup Train [31][3110/3239]	Time 0.168 (0.231)	Data 0.001 (0.008)	Loss 4.1039 (4.0820)	Top-1 acc 28.125 (29.521)	Top-5 acc 52.734 (52.767)	lr 0.00604
Warmup Train [31][3120/3239]	Time 0.203 (0.231)	Data 0.001 (0.008)	Loss 4.0545 (4.0819)	Top-1 acc 31.641 (29.522)	Top-5 acc 53.516 (52.770)	lr 0.00604
Warmup Train [31][3130/3239]	Time 0.183 (0.231)	Data 0.001 (0.008)	Loss 4.0822 (4.0818)	Top-1 acc 27.344 (29.521)	Top-5 acc 52.734 (52.768)	lr 0.00603
Warmup Train [31][3140/3239]	Time 0.232 (0.231)	Data 0.001 (0.008)	Loss 3.9802 (4.0819)	Top-1 acc 32.422 (29.521)	Top-5 acc 55.859 (52.770)	lr 0.00603
Warmup Train [31][3150/3239]	Time 0.236 (0.231)	Data 0.001 (0.008)	Loss 4.0179 (4.0815)	Top-1 acc 29.297 (29.529)	Top-5 acc 53.906 (52.777)	lr 0.00603
Warmup Train [31][3160/3239]	Time 0.278 (0.231)	Data 0.001 (0.008)	Loss 4.0014 (4.0814)	Top-1 acc 30.078 (29.531)	Top-5 acc 54.688 (52.777)	lr 0.00602
Warmup Train [31][3170/3239]	Time 0.185 (0.231)	Data 0.001 (0.008)	Loss 4.1326 (4.0815)	Top-1 acc 31.250 (29.531)	Top-5 acc 51.172 (52.777)	lr 0.00602
Warmup Train [31][3180/3239]	Time 0.208 (0.231)	Data 0.000 (0.008)	Loss 4.0141 (4.0814)	Top-1 acc 31.641 (29.534)	Top-5 acc 52.734 (52.779)	lr 0.00601
Warmup Train [31][3190/3239]	Time 0.193 (0.231)	Data 0.000 (0.008)	Loss 4.2104 (4.0814)	Top-1 acc 26.172 (29.535)	Top-5 acc 50.000 (52.780)	lr 0.00601
Warmup Train [31][3200/3239]	Time 0.180 (0.230)	Data 0.000 (0.008)	Loss 4.1097 (4.0815)	Top-1 acc 27.734 (29.533)	Top-5 acc 54.297 (52.778)	lr 0.00601
Warmup Train [31][3210/3239]	Time 0.207 (0.230)	Data 0.000 (0.008)	Loss 4.1671 (4.0816)	Top-1 acc 28.906 (29.532)	Top-5 acc 49.219 (52.773)	lr 0.00600
Warmup Train [31][3220/3239]	Time 0.229 (0.230)	Data 0.000 (0.008)	Loss 4.0888 (4.0817)	Top-1 acc 29.297 (29.527)	Top-5 acc 53.516 (52.772)	lr 0.00600
Warmup Train [31][3230/3239]	Time 0.217 (0.230)	Data 0.000 (0.008)	Loss 4.0274 (4.0817)	Top-1 acc 30.469 (29.525)	Top-5 acc 54.688 (52.774)	lr 0.00599
Warmup Train [31][3239/3239]	Time 0.164 (0.230)	Data 0.000 (0.008)	Loss 4.1612 (4.0817)	Top-1 acc 30.864 (29.527)	Top-5 acc 50.617 (52.776)	lr 0.00599
==========Warmup Valid [31/40]	loss 3.018	top-1 acc 37.119	top-5 acc 62.039	Train top-1 29.527	top-5 52.776	flops: 442.4M
Warmup Train [32][0/3239]	Time 15.984 (15.984)	Data 15.621 (15.621)	Loss 4.2642 (4.2642)	Top-1 acc 26.172 (26.172)	Top-5 acc 48.438 (48.438)	lr 0.00599
Warmup Train [32][10/3239]	Time 0.398 (1.950)	Data 0.002 (1.579)	Loss 4.1903 (4.0477)	Top-1 acc 26.562 (30.291)	Top-5 acc 49.219 (52.805)	lr 0.00599
Warmup Train [32][20/3239]	Time 0.323 (1.178)	Data 0.002 (0.828)	Loss 4.1331 (4.0592)	Top-1 acc 32.031 (30.078)	Top-5 acc 53.125 (52.939)	lr 0.00598
Warmup Train [32][30/3239]	Time 0.202 (0.870)	Data 0.001 (0.561)	Loss 4.1514 (4.0766)	Top-1 acc 28.516 (29.435)	Top-5 acc 51.562 (52.419)	lr 0.00598
Warmup Train [32][40/3239]	Time 0.323 (0.719)	Data 0.002 (0.425)	Loss 4.1029 (4.0833)	Top-1 acc 31.641 (29.144)	Top-5 acc 56.250 (52.563)	lr 0.00597
Warmup Train [32][50/3239]	Time 0.238 (0.622)	Data 0.002 (0.342)	Loss 3.8710 (4.0696)	Top-1 acc 34.766 (29.580)	Top-5 acc 55.078 (52.826)	lr 0.00597
Warmup Train [32][60/3239]	Time 0.186 (0.560)	Data 0.001 (0.286)	Loss 4.3419 (4.0612)	Top-1 acc 25.781 (29.643)	Top-5 acc 46.094 (53.099)	lr 0.00597
Warmup Train [32][70/3239]	Time 0.229 (0.512)	Data 0.001 (0.246)	Loss 4.0873 (4.0659)	Top-1 acc 29.297 (29.577)	Top-5 acc 54.688 (53.158)	lr 0.00596
Warmup Train [32][80/3239]	Time 0.289 (0.477)	Data 0.002 (0.217)	Loss 3.9263 (4.0695)	Top-1 acc 28.516 (29.712)	Top-5 acc 55.469 (53.091)	lr 0.00596
Warmup Train [32][90/3239]	Time 0.181 (0.449)	Data 0.002 (0.193)	Loss 4.2025 (4.0728)	Top-1 acc 23.828 (29.675)	Top-5 acc 50.391 (53.069)	lr 0.00595
Warmup Train [32][100/3239]	Time 0.182 (0.426)	Data 0.001 (0.174)	Loss 3.9911 (4.0737)	Top-1 acc 32.031 (29.618)	Top-5 acc 54.297 (53.040)	lr 0.00595
Warmup Train [32][110/3239]	Time 0.153 (0.409)	Data 0.001 (0.159)	Loss 4.2318 (4.0702)	Top-1 acc 27.344 (29.744)	Top-5 acc 51.562 (53.132)	lr 0.00595
Warmup Train [32][120/3239]	Time 0.141 (0.395)	Data 0.001 (0.146)	Loss 4.1527 (4.0725)	Top-1 acc 30.859 (29.768)	Top-5 acc 51.172 (53.131)	lr 0.00594
Warmup Train [32][130/3239]	Time 0.180 (0.382)	Data 0.001 (0.135)	Loss 4.0774 (4.0729)	Top-1 acc 27.344 (29.744)	Top-5 acc 53.906 (53.071)	lr 0.00594
Warmup Train [32][140/3239]	Time 0.199 (0.371)	Data 0.001 (0.126)	Loss 3.9518 (4.0706)	Top-1 acc 33.203 (29.826)	Top-5 acc 55.859 (53.056)	lr 0.00593
Warmup Train [32][150/3239]	Time 0.157 (0.361)	Data 0.001 (0.117)	Loss 4.2012 (4.0711)	Top-1 acc 27.734 (29.796)	Top-5 acc 51.953 (53.063)	lr 0.00593
Warmup Train [32][160/3239]	Time 0.336 (0.354)	Data 0.001 (0.111)	Loss 4.2306 (4.0735)	Top-1 acc 29.297 (29.755)	Top-5 acc 51.172 (52.962)	lr 0.00593
Warmup Train [32][170/3239]	Time 0.264 (0.346)	Data 0.001 (0.104)	Loss 4.0012 (4.0704)	Top-1 acc 30.859 (29.788)	Top-5 acc 58.594 (53.070)	lr 0.00592
Warmup Train [32][180/3239]	Time 0.278 (0.339)	Data 0.001 (0.098)	Loss 4.1496 (4.0707)	Top-1 acc 28.125 (29.864)	Top-5 acc 51.562 (53.032)	lr 0.00592
Warmup Train [32][190/3239]	Time 0.260 (0.333)	Data 0.002 (0.093)	Loss 4.0914 (4.0744)	Top-1 acc 30.859 (29.767)	Top-5 acc 51.953 (52.949)	lr 0.00592
Warmup Train [32][200/3239]	Time 0.202 (0.327)	Data 0.001 (0.089)	Loss 4.1542 (4.0760)	Top-1 acc 28.516 (29.779)	Top-5 acc 51.172 (52.929)	lr 0.00591
Warmup Train [32][210/3239]	Time 0.168 (0.322)	Data 0.001 (0.085)	Loss 4.1561 (4.0784)	Top-1 acc 28.125 (29.728)	Top-5 acc 50.391 (52.851)	lr 0.00591
Warmup Train [32][220/3239]	Time 0.170 (0.318)	Data 0.001 (0.081)	Loss 4.1595 (4.0774)	Top-1 acc 31.250 (29.781)	Top-5 acc 52.344 (52.840)	lr 0.00590
Warmup Train [32][230/3239]	Time 0.171 (0.314)	Data 0.001 (0.078)	Loss 4.2624 (4.0777)	Top-1 acc 25.781 (29.802)	Top-5 acc 47.656 (52.788)	lr 0.00590
Warmup Train [32][240/3239]	Time 0.154 (0.311)	Data 0.001 (0.075)	Loss 4.0510 (4.0775)	Top-1 acc 30.859 (29.803)	Top-5 acc 53.125 (52.796)	lr 0.00590
Warmup Train [32][250/3239]	Time 0.244 (0.307)	Data 0.001 (0.072)	Loss 3.9775 (4.0789)	Top-1 acc 32.812 (29.810)	Top-5 acc 58.203 (52.792)	lr 0.00589
Warmup Train [32][260/3239]	Time 0.220 (0.305)	Data 0.001 (0.069)	Loss 4.1545 (4.0778)	Top-1 acc 30.078 (29.782)	Top-5 acc 52.344 (52.812)	lr 0.00589
Warmup Train [32][270/3239]	Time 0.265 (0.302)	Data 0.003 (0.067)	Loss 4.0225 (4.0749)	Top-1 acc 27.734 (29.853)	Top-5 acc 52.734 (52.842)	lr 0.00588
Warmup Train [32][280/3239]	Time 0.208 (0.299)	Data 0.001 (0.065)	Loss 3.8565 (4.0738)	Top-1 acc 34.375 (29.884)	Top-5 acc 60.938 (52.872)	lr 0.00588
Warmup Train [32][290/3239]	Time 0.225 (0.296)	Data 0.001 (0.063)	Loss 3.9627 (4.0740)	Top-1 acc 30.859 (29.870)	Top-5 acc 55.469 (52.867)	lr 0.00588
Warmup Train [32][300/3239]	Time 0.145 (0.294)	Data 0.001 (0.061)	Loss 4.1375 (4.0757)	Top-1 acc 28.906 (29.841)	Top-5 acc 50.000 (52.849)	lr 0.00587
Warmup Train [32][310/3239]	Time 0.335 (0.292)	Data 0.001 (0.059)	Loss 3.8971 (4.0761)	Top-1 acc 29.688 (29.822)	Top-5 acc 54.688 (52.821)	lr 0.00587
Warmup Train [32][320/3239]	Time 0.238 (0.290)	Data 0.001 (0.057)	Loss 4.1167 (4.0758)	Top-1 acc 31.641 (29.835)	Top-5 acc 53.516 (52.831)	lr 0.00586
Warmup Train [32][330/3239]	Time 0.230 (0.288)	Data 0.001 (0.055)	Loss 4.3299 (4.0774)	Top-1 acc 28.125 (29.793)	Top-5 acc 48.438 (52.803)	lr 0.00586
Warmup Train [32][340/3239]	Time 0.168 (0.286)	Data 0.001 (0.054)	Loss 3.9642 (4.0778)	Top-1 acc 27.734 (29.777)	Top-5 acc 51.172 (52.781)	lr 0.00586
Warmup Train [32][350/3239]	Time 0.180 (0.284)	Data 0.002 (0.053)	Loss 4.2161 (4.0776)	Top-1 acc 27.734 (29.782)	Top-5 acc 50.781 (52.792)	lr 0.00585
Warmup Train [32][360/3239]	Time 0.348 (0.283)	Data 0.001 (0.051)	Loss 4.0897 (4.0765)	Top-1 acc 28.125 (29.809)	Top-5 acc 50.781 (52.798)	lr 0.00585
Warmup Train [32][370/3239]	Time 0.167 (0.281)	Data 0.001 (0.050)	Loss 4.1556 (4.0763)	Top-1 acc 25.781 (29.806)	Top-5 acc 49.609 (52.806)	lr 0.00584
Warmup Train [32][380/3239]	Time 0.186 (0.280)	Data 0.002 (0.049)	Loss 4.2421 (4.0757)	Top-1 acc 25.391 (29.813)	Top-5 acc 47.656 (52.801)	lr 0.00584
Warmup Train [32][390/3239]	Time 0.187 (0.278)	Data 0.001 (0.048)	Loss 3.8354 (4.0746)	Top-1 acc 33.984 (29.813)	Top-5 acc 57.031 (52.825)	lr 0.00584
Warmup Train [32][400/3239]	Time 0.172 (0.277)	Data 0.001 (0.046)	Loss 4.0315 (4.0752)	Top-1 acc 31.250 (29.794)	Top-5 acc 53.516 (52.809)	lr 0.00583
Warmup Train [32][410/3239]	Time 0.197 (0.275)	Data 0.001 (0.045)	Loss 4.1174 (4.0743)	Top-1 acc 29.297 (29.792)	Top-5 acc 53.906 (52.830)	lr 0.00583
Warmup Train [32][420/3239]	Time 0.227 (0.274)	Data 0.001 (0.044)	Loss 4.1285 (4.0750)	Top-1 acc 28.516 (29.783)	Top-5 acc 52.734 (52.823)	lr 0.00583
Warmup Train [32][430/3239]	Time 0.247 (0.273)	Data 0.001 (0.044)	Loss 4.0894 (4.0742)	Top-1 acc 33.594 (29.812)	Top-5 acc 50.391 (52.843)	lr 0.00582
Warmup Train [32][440/3239]	Time 0.235 (0.272)	Data 0.001 (0.043)	Loss 4.0025 (4.0733)	Top-1 acc 32.031 (29.832)	Top-5 acc 55.078 (52.871)	lr 0.00582
Warmup Train [32][450/3239]	Time 0.282 (0.270)	Data 0.001 (0.042)	Loss 4.0936 (4.0745)	Top-1 acc 27.734 (29.803)	Top-5 acc 52.344 (52.840)	lr 0.00581
Warmup Train [32][460/3239]	Time 0.312 (0.269)	Data 0.001 (0.041)	Loss 3.9595 (4.0732)	Top-1 acc 28.906 (29.821)	Top-5 acc 53.125 (52.867)	lr 0.00581
Warmup Train [32][470/3239]	Time 0.240 (0.268)	Data 0.001 (0.040)	Loss 4.1320 (4.0737)	Top-1 acc 28.906 (29.794)	Top-5 acc 55.078 (52.869)	lr 0.00581
Warmup Train [32][480/3239]	Time 0.141 (0.267)	Data 0.002 (0.039)	Loss 4.0430 (4.0747)	Top-1 acc 26.953 (29.771)	Top-5 acc 51.953 (52.859)	lr 0.00580
Warmup Train [32][490/3239]	Time 0.224 (0.266)	Data 0.001 (0.038)	Loss 3.9252 (4.0751)	Top-1 acc 32.422 (29.764)	Top-5 acc 55.078 (52.844)	lr 0.00580
Warmup Train [32][500/3239]	Time 0.180 (0.265)	Data 0.001 (0.038)	Loss 4.1030 (4.0748)	Top-1 acc 32.422 (29.768)	Top-5 acc 52.734 (52.864)	lr 0.00579
Warmup Train [32][510/3239]	Time 0.173 (0.264)	Data 0.001 (0.037)	Loss 4.2482 (4.0757)	Top-1 acc 28.906 (29.752)	Top-5 acc 49.609 (52.837)	lr 0.00579
Warmup Train [32][520/3239]	Time 0.262 (0.263)	Data 0.001 (0.037)	Loss 3.9102 (4.0757)	Top-1 acc 30.078 (29.720)	Top-5 acc 53.516 (52.822)	lr 0.00579
Warmup Train [32][530/3239]	Time 0.231 (0.262)	Data 0.001 (0.036)	Loss 4.1074 (4.0753)	Top-1 acc 26.953 (29.715)	Top-5 acc 53.906 (52.855)	lr 0.00578
Warmup Train [32][540/3239]	Time 0.154 (0.261)	Data 0.001 (0.035)	Loss 4.0726 (4.0752)	Top-1 acc 29.297 (29.734)	Top-5 acc 53.906 (52.857)	lr 0.00578
Warmup Train [32][550/3239]	Time 0.192 (0.260)	Data 0.001 (0.035)	Loss 3.9576 (4.0751)	Top-1 acc 29.688 (29.740)	Top-5 acc 54.688 (52.874)	lr 0.00578
Warmup Train [32][560/3239]	Time 0.281 (0.260)	Data 0.001 (0.034)	Loss 4.1674 (4.0745)	Top-1 acc 28.906 (29.736)	Top-5 acc 50.000 (52.864)	lr 0.00577
Warmup Train [32][570/3239]	Time 0.210 (0.259)	Data 0.001 (0.034)	Loss 3.9960 (4.0743)	Top-1 acc 31.641 (29.738)	Top-5 acc 53.906 (52.873)	lr 0.00577
Warmup Train [32][580/3239]	Time 0.310 (0.259)	Data 0.001 (0.033)	Loss 4.1082 (4.0734)	Top-1 acc 28.125 (29.751)	Top-5 acc 54.297 (52.899)	lr 0.00576
Warmup Train [32][590/3239]	Time 0.220 (0.259)	Data 0.001 (0.032)	Loss 3.7919 (4.0718)	Top-1 acc 36.328 (29.758)	Top-5 acc 60.938 (52.943)	lr 0.00576
Warmup Train [32][600/3239]	Time 0.261 (0.258)	Data 0.002 (0.032)	Loss 4.0980 (4.0716)	Top-1 acc 28.125 (29.757)	Top-5 acc 55.078 (52.955)	lr 0.00576
Warmup Train [32][610/3239]	Time 0.260 (0.258)	Data 0.001 (0.031)	Loss 4.0189 (4.0712)	Top-1 acc 27.344 (29.760)	Top-5 acc 51.953 (52.979)	lr 0.00575
Warmup Train [32][620/3239]	Time 0.257 (0.258)	Data 0.001 (0.031)	Loss 3.9635 (4.0711)	Top-1 acc 28.125 (29.760)	Top-5 acc 56.641 (52.977)	lr 0.00575
Warmup Train [32][630/3239]	Time 0.216 (0.258)	Data 0.002 (0.031)	Loss 4.1489 (4.0710)	Top-1 acc 30.469 (29.777)	Top-5 acc 51.172 (52.989)	lr 0.00574
Warmup Train [32][640/3239]	Time 0.311 (0.258)	Data 0.001 (0.030)	Loss 4.0156 (4.0714)	Top-1 acc 30.078 (29.768)	Top-5 acc 53.125 (52.998)	lr 0.00574
Warmup Train [32][650/3239]	Time 0.215 (0.258)	Data 0.002 (0.030)	Loss 4.0444 (4.0721)	Top-1 acc 28.906 (29.760)	Top-5 acc 53.906 (52.997)	lr 0.00574
Warmup Train [32][660/3239]	Time 0.273 (0.257)	Data 0.001 (0.029)	Loss 3.7697 (4.0721)	Top-1 acc 35.938 (29.779)	Top-5 acc 60.156 (52.994)	lr 0.00573
Warmup Train [32][670/3239]	Time 0.171 (0.257)	Data 0.001 (0.029)	Loss 4.0197 (4.0727)	Top-1 acc 29.688 (29.775)	Top-5 acc 55.078 (52.988)	lr 0.00573
Warmup Train [32][680/3239]	Time 0.214 (0.256)	Data 0.002 (0.029)	Loss 4.0562 (4.0728)	Top-1 acc 28.125 (29.767)	Top-5 acc 52.344 (52.987)	lr 0.00572
Warmup Train [32][690/3239]	Time 0.195 (0.256)	Data 0.001 (0.028)	Loss 4.3571 (4.0729)	Top-1 acc 23.438 (29.768)	Top-5 acc 44.141 (52.969)	lr 0.00572
Warmup Train [32][700/3239]	Time 0.206 (0.255)	Data 0.001 (0.028)	Loss 4.0274 (4.0725)	Top-1 acc 33.203 (29.784)	Top-5 acc 53.906 (52.972)	lr 0.00572
Warmup Train [32][710/3239]	Time 0.246 (0.254)	Data 0.001 (0.027)	Loss 3.9486 (4.0725)	Top-1 acc 34.766 (29.795)	Top-5 acc 57.812 (52.967)	lr 0.00571
Warmup Train [32][720/3239]	Time 0.242 (0.254)	Data 0.001 (0.027)	Loss 4.0376 (4.0730)	Top-1 acc 32.031 (29.788)	Top-5 acc 55.469 (52.962)	lr 0.00571
Warmup Train [32][730/3239]	Time 0.219 (0.253)	Data 0.001 (0.027)	Loss 3.9808 (4.0729)	Top-1 acc 34.375 (29.795)	Top-5 acc 54.297 (52.974)	lr 0.00571
Warmup Train [32][740/3239]	Time 0.228 (0.253)	Data 0.001 (0.026)	Loss 3.9716 (4.0723)	Top-1 acc 32.812 (29.801)	Top-5 acc 57.422 (52.990)	lr 0.00570
Warmup Train [32][750/3239]	Time 0.236 (0.253)	Data 0.001 (0.026)	Loss 4.0405 (4.0724)	Top-1 acc 29.688 (29.787)	Top-5 acc 55.078 (52.986)	lr 0.00570
Warmup Train [32][760/3239]	Time 0.329 (0.253)	Data 0.001 (0.026)	Loss 3.8043 (4.0716)	Top-1 acc 31.641 (29.810)	Top-5 acc 60.938 (53.001)	lr 0.00569
Warmup Train [32][770/3239]	Time 0.319 (0.254)	Data 0.001 (0.026)	Loss 4.1370 (4.0712)	Top-1 acc 32.812 (29.827)	Top-5 acc 55.859 (53.016)	lr 0.00569
Warmup Train [32][780/3239]	Time 0.279 (0.254)	Data 0.001 (0.025)	Loss 4.1869 (4.0715)	Top-1 acc 26.953 (29.828)	Top-5 acc 49.609 (53.014)	lr 0.00569
Warmup Train [32][790/3239]	Time 0.248 (0.254)	Data 0.003 (0.025)	Loss 3.8546 (4.0715)	Top-1 acc 33.203 (29.807)	Top-5 acc 58.203 (53.007)	lr 0.00568
Warmup Train [32][800/3239]	Time 0.242 (0.254)	Data 0.003 (0.025)	Loss 4.1246 (4.0711)	Top-1 acc 28.516 (29.813)	Top-5 acc 52.344 (53.024)	lr 0.00568
Warmup Train [32][810/3239]	Time 0.275 (0.254)	Data 0.002 (0.024)	Loss 3.9963 (4.0713)	Top-1 acc 30.469 (29.796)	Top-5 acc 57.422 (53.022)	lr 0.00567
Warmup Train [32][820/3239]	Time 0.263 (0.254)	Data 0.001 (0.024)	Loss 4.1305 (4.0710)	Top-1 acc 33.203 (29.811)	Top-5 acc 51.562 (53.028)	lr 0.00567
Warmup Train [32][830/3239]	Time 0.189 (0.254)	Data 0.002 (0.024)	Loss 4.0549 (4.0710)	Top-1 acc 28.906 (29.814)	Top-5 acc 53.125 (53.024)	lr 0.00567
Warmup Train [32][840/3239]	Time 0.232 (0.254)	Data 0.001 (0.024)	Loss 4.0287 (4.0705)	Top-1 acc 32.422 (29.830)	Top-5 acc 53.906 (53.017)	lr 0.00566
Warmup Train [32][850/3239]	Time 0.186 (0.254)	Data 0.002 (0.023)	Loss 3.9884 (4.0701)	Top-1 acc 31.250 (29.847)	Top-5 acc 55.469 (53.028)	lr 0.00566
Warmup Train [32][860/3239]	Time 0.276 (0.254)	Data 0.001 (0.023)	Loss 4.1386 (4.0712)	Top-1 acc 28.125 (29.830)	Top-5 acc 53.516 (53.009)	lr 0.00566
Warmup Train [32][870/3239]	Time 0.275 (0.253)	Data 0.001 (0.023)	Loss 3.9499 (4.0718)	Top-1 acc 31.641 (29.821)	Top-5 acc 52.734 (53.006)	lr 0.00565
Warmup Train [32][880/3239]	Time 0.218 (0.253)	Data 0.002 (0.023)	Loss 4.1522 (4.0717)	Top-1 acc 29.688 (29.834)	Top-5 acc 52.734 (53.015)	lr 0.00565
Warmup Train [32][890/3239]	Time 0.214 (0.253)	Data 0.002 (0.022)	Loss 3.9577 (4.0712)	Top-1 acc 33.984 (29.859)	Top-5 acc 55.078 (53.030)	lr 0.00564
Warmup Train [32][900/3239]	Time 0.229 (0.253)	Data 0.001 (0.022)	Loss 3.8986 (4.0712)	Top-1 acc 33.594 (29.861)	Top-5 acc 56.641 (53.034)	lr 0.00564
Warmup Train [32][910/3239]	Time 0.236 (0.252)	Data 0.001 (0.022)	Loss 4.0425 (4.0710)	Top-1 acc 30.859 (29.853)	Top-5 acc 53.516 (53.039)	lr 0.00564
Warmup Train [32][920/3239]	Time 0.141 (0.252)	Data 0.001 (0.022)	Loss 4.2591 (4.0704)	Top-1 acc 28.125 (29.864)	Top-5 acc 49.219 (53.049)	lr 0.00563
Warmup Train [32][930/3239]	Time 0.218 (0.252)	Data 0.001 (0.022)	Loss 4.1550 (4.0710)	Top-1 acc 30.078 (29.850)	Top-5 acc 49.609 (53.026)	lr 0.00563
Warmup Train [32][940/3239]	Time 0.197 (0.251)	Data 0.001 (0.021)	Loss 4.0306 (4.0704)	Top-1 acc 30.078 (29.852)	Top-5 acc 52.734 (53.045)	lr 0.00562
Warmup Train [32][950/3239]	Time 0.280 (0.251)	Data 0.001 (0.021)	Loss 4.0300 (4.0700)	Top-1 acc 30.469 (29.854)	Top-5 acc 52.734 (53.059)	lr 0.00562
Warmup Train [32][960/3239]	Time 0.289 (0.251)	Data 0.001 (0.021)	Loss 4.1366 (4.0698)	Top-1 acc 29.688 (29.854)	Top-5 acc 52.734 (53.072)	lr 0.00562
Warmup Train [32][970/3239]	Time 0.345 (0.250)	Data 0.001 (0.021)	Loss 3.9888 (4.0695)	Top-1 acc 29.688 (29.849)	Top-5 acc 54.688 (53.072)	lr 0.00561
Warmup Train [32][980/3239]	Time 0.239 (0.250)	Data 0.001 (0.021)	Loss 3.9285 (4.0696)	Top-1 acc 31.641 (29.844)	Top-5 acc 54.688 (53.072)	lr 0.00561
Warmup Train [32][990/3239]	Time 0.230 (0.250)	Data 0.001 (0.020)	Loss 4.0495 (4.0692)	Top-1 acc 32.422 (29.858)	Top-5 acc 52.344 (53.086)	lr 0.00561
Warmup Train [32][1000/3239]	Time 0.212 (0.250)	Data 0.001 (0.020)	Loss 4.1336 (4.0692)	Top-1 acc 23.828 (29.849)	Top-5 acc 51.172 (53.086)	lr 0.00560
Warmup Train [32][1010/3239]	Time 0.228 (0.249)	Data 0.002 (0.020)	Loss 3.8967 (4.0696)	Top-1 acc 32.031 (29.831)	Top-5 acc 56.641 (53.072)	lr 0.00560
Warmup Train [32][1020/3239]	Time 0.236 (0.249)	Data 0.001 (0.020)	Loss 4.1315 (4.0700)	Top-1 acc 28.906 (29.821)	Top-5 acc 55.469 (53.074)	lr 0.00559
Warmup Train [32][1030/3239]	Time 0.260 (0.249)	Data 0.001 (0.020)	Loss 4.2261 (4.0699)	Top-1 acc 26.562 (29.823)	Top-5 acc 48.047 (53.068)	lr 0.00559
Warmup Train [32][1040/3239]	Time 0.225 (0.249)	Data 0.001 (0.020)	Loss 4.1032 (4.0700)	Top-1 acc 30.469 (29.823)	Top-5 acc 54.688 (53.069)	lr 0.00559
Warmup Train [32][1050/3239]	Time 0.224 (0.248)	Data 0.001 (0.019)	Loss 3.9256 (4.0702)	Top-1 acc 33.984 (29.822)	Top-5 acc 57.031 (53.065)	lr 0.00558
Warmup Train [32][1060/3239]	Time 0.228 (0.248)	Data 0.001 (0.019)	Loss 4.0415 (4.0698)	Top-1 acc 33.594 (29.829)	Top-5 acc 53.125 (53.068)	lr 0.00558
Warmup Train [32][1070/3239]	Time 0.156 (0.248)	Data 0.001 (0.019)	Loss 3.9470 (4.0700)	Top-1 acc 32.812 (29.828)	Top-5 acc 55.859 (53.069)	lr 0.00558
Warmup Train [32][1080/3239]	Time 0.304 (0.248)	Data 0.002 (0.019)	Loss 3.9830 (4.0700)	Top-1 acc 31.641 (29.822)	Top-5 acc 57.031 (53.063)	lr 0.00557
Warmup Train [32][1090/3239]	Time 0.269 (0.247)	Data 0.002 (0.019)	Loss 3.9442 (4.0702)	Top-1 acc 32.031 (29.820)	Top-5 acc 55.078 (53.049)	lr 0.00557
Warmup Train [32][1100/3239]	Time 0.201 (0.247)	Data 0.001 (0.019)	Loss 3.9547 (4.0704)	Top-1 acc 34.375 (29.823)	Top-5 acc 55.469 (53.052)	lr 0.00556
Warmup Train [32][1110/3239]	Time 0.224 (0.247)	Data 0.001 (0.018)	Loss 4.2480 (4.0702)	Top-1 acc 23.438 (29.828)	Top-5 acc 47.266 (53.056)	lr 0.00556
Warmup Train [32][1120/3239]	Time 0.177 (0.247)	Data 0.001 (0.018)	Loss 4.1405 (4.0704)	Top-1 acc 26.953 (29.820)	Top-5 acc 48.438 (53.051)	lr 0.00556
Warmup Train [32][1130/3239]	Time 0.226 (0.247)	Data 0.001 (0.018)	Loss 4.0125 (4.0705)	Top-1 acc 30.469 (29.807)	Top-5 acc 54.688 (53.038)	lr 0.00555
Warmup Train [32][1140/3239]	Time 0.290 (0.246)	Data 0.001 (0.018)	Loss 3.7833 (4.0698)	Top-1 acc 32.422 (29.811)	Top-5 acc 57.422 (53.057)	lr 0.00555
Warmup Train [32][1150/3239]	Time 0.231 (0.246)	Data 0.001 (0.018)	Loss 4.1363 (4.0699)	Top-1 acc 28.516 (29.805)	Top-5 acc 53.125 (53.053)	lr 0.00554
Warmup Train [32][1160/3239]	Time 0.236 (0.246)	Data 0.001 (0.018)	Loss 4.0231 (4.0696)	Top-1 acc 30.469 (29.813)	Top-5 acc 54.688 (53.059)	lr 0.00554
Warmup Train [32][1170/3239]	Time 0.264 (0.246)	Data 0.001 (0.018)	Loss 3.9151 (4.0690)	Top-1 acc 32.812 (29.821)	Top-5 acc 61.328 (53.073)	lr 0.00554
Warmup Train [32][1180/3239]	Time 0.226 (0.246)	Data 0.001 (0.018)	Loss 4.0515 (4.0683)	Top-1 acc 31.641 (29.831)	Top-5 acc 55.469 (53.099)	lr 0.00553
Warmup Train [32][1190/3239]	Time 0.288 (0.246)	Data 0.001 (0.017)	Loss 4.0839 (4.0681)	Top-1 acc 30.078 (29.838)	Top-5 acc 50.781 (53.112)	lr 0.00553
Warmup Train [32][1200/3239]	Time 0.148 (0.246)	Data 0.001 (0.017)	Loss 4.1639 (4.0677)	Top-1 acc 32.422 (29.852)	Top-5 acc 52.734 (53.117)	lr 0.00553
Warmup Train [32][1210/3239]	Time 0.283 (0.245)	Data 0.001 (0.017)	Loss 3.9619 (4.0679)	Top-1 acc 30.859 (29.841)	Top-5 acc 54.688 (53.122)	lr 0.00552
Warmup Train [32][1220/3239]	Time 0.266 (0.245)	Data 0.001 (0.017)	Loss 4.0018 (4.0677)	Top-1 acc 32.812 (29.849)	Top-5 acc 53.906 (53.126)	lr 0.00552
Warmup Train [32][1230/3239]	Time 0.281 (0.245)	Data 0.002 (0.017)	Loss 3.9006 (4.0676)	Top-1 acc 34.766 (29.858)	Top-5 acc 59.375 (53.129)	lr 0.00551
Warmup Train [32][1240/3239]	Time 0.202 (0.245)	Data 0.001 (0.017)	Loss 4.0702 (4.0672)	Top-1 acc 31.250 (29.857)	Top-5 acc 54.688 (53.140)	lr 0.00551
Warmup Train [32][1250/3239]	Time 0.146 (0.245)	Data 0.002 (0.017)	Loss 4.0969 (4.0676)	Top-1 acc 31.250 (29.860)	Top-5 acc 50.781 (53.130)	lr 0.00551
Warmup Train [32][1260/3239]	Time 0.224 (0.244)	Data 0.001 (0.017)	Loss 4.1796 (4.0680)	Top-1 acc 28.906 (29.848)	Top-5 acc 52.734 (53.122)	lr 0.00550
Warmup Train [32][1270/3239]	Time 0.197 (0.244)	Data 0.001 (0.017)	Loss 3.8398 (4.0679)	Top-1 acc 32.031 (29.852)	Top-5 acc 56.641 (53.130)	lr 0.00550
Warmup Train [32][1280/3239]	Time 0.243 (0.244)	Data 0.001 (0.016)	Loss 4.1260 (4.0674)	Top-1 acc 25.391 (29.858)	Top-5 acc 51.562 (53.145)	lr 0.00550
Warmup Train [32][1290/3239]	Time 0.319 (0.244)	Data 0.001 (0.016)	Loss 4.0360 (4.0674)	Top-1 acc 25.781 (29.850)	Top-5 acc 55.469 (53.141)	lr 0.00549
Warmup Train [32][1300/3239]	Time 0.160 (0.244)	Data 0.001 (0.016)	Loss 4.0447 (4.0672)	Top-1 acc 35.547 (29.867)	Top-5 acc 55.078 (53.145)	lr 0.00549
Warmup Train [32][1310/3239]	Time 0.204 (0.244)	Data 0.001 (0.016)	Loss 4.1532 (4.0677)	Top-1 acc 25.000 (29.854)	Top-5 acc 51.172 (53.135)	lr 0.00548
Warmup Train [32][1320/3239]	Time 0.192 (0.244)	Data 0.001 (0.016)	Loss 4.2503 (4.0673)	Top-1 acc 25.391 (29.861)	Top-5 acc 48.047 (53.145)	lr 0.00548
Warmup Train [32][1330/3239]	Time 0.182 (0.243)	Data 0.001 (0.016)	Loss 3.9884 (4.0669)	Top-1 acc 32.031 (29.873)	Top-5 acc 53.125 (53.157)	lr 0.00548
Warmup Train [32][1340/3239]	Time 0.207 (0.243)	Data 0.001 (0.016)	Loss 3.7202 (4.0667)	Top-1 acc 34.766 (29.880)	Top-5 acc 62.500 (53.165)	lr 0.00547
Warmup Train [32][1350/3239]	Time 0.165 (0.243)	Data 0.002 (0.016)	Loss 4.2103 (4.0666)	Top-1 acc 28.125 (29.879)	Top-5 acc 49.219 (53.168)	lr 0.00547
Warmup Train [32][1360/3239]	Time 0.203 (0.243)	Data 0.001 (0.016)	Loss 4.1134 (4.0670)	Top-1 acc 26.562 (29.867)	Top-5 acc 53.125 (53.153)	lr 0.00547
Warmup Train [32][1370/3239]	Time 0.268 (0.242)	Data 0.026 (0.016)	Loss 3.8028 (4.0668)	Top-1 acc 35.547 (29.871)	Top-5 acc 57.031 (53.158)	lr 0.00546
Warmup Train [32][1380/3239]	Time 0.187 (0.242)	Data 0.001 (0.015)	Loss 4.1827 (4.0665)	Top-1 acc 29.297 (29.879)	Top-5 acc 50.000 (53.162)	lr 0.00546
Warmup Train [32][1390/3239]	Time 0.242 (0.242)	Data 0.001 (0.015)	Loss 4.0066 (4.0666)	Top-1 acc 31.641 (29.879)	Top-5 acc 54.688 (53.157)	lr 0.00545
Warmup Train [32][1400/3239]	Time 0.214 (0.242)	Data 0.001 (0.015)	Loss 4.0340 (4.0666)	Top-1 acc 31.641 (29.881)	Top-5 acc 55.469 (53.155)	lr 0.00545
Warmup Train [32][1410/3239]	Time 0.268 (0.242)	Data 0.001 (0.015)	Loss 4.0292 (4.0667)	Top-1 acc 29.297 (29.878)	Top-5 acc 55.078 (53.150)	lr 0.00545
Warmup Train [32][1420/3239]	Time 0.204 (0.242)	Data 0.002 (0.015)	Loss 3.9353 (4.0668)	Top-1 acc 32.422 (29.878)	Top-5 acc 56.641 (53.151)	lr 0.00544
Warmup Train [32][1430/3239]	Time 0.207 (0.242)	Data 0.001 (0.015)	Loss 3.8510 (4.0666)	Top-1 acc 32.031 (29.885)	Top-5 acc 58.203 (53.153)	lr 0.00544
Warmup Train [32][1440/3239]	Time 0.211 (0.242)	Data 0.002 (0.015)	Loss 4.2629 (4.0667)	Top-1 acc 27.344 (29.885)	Top-5 acc 51.172 (53.150)	lr 0.00543
Warmup Train [32][1450/3239]	Time 0.150 (0.241)	Data 0.001 (0.015)	Loss 4.2367 (4.0666)	Top-1 acc 26.562 (29.888)	Top-5 acc 51.172 (53.152)	lr 0.00543
Warmup Train [32][1460/3239]	Time 0.147 (0.241)	Data 0.002 (0.015)	Loss 4.0076 (4.0665)	Top-1 acc 32.812 (29.893)	Top-5 acc 53.516 (53.159)	lr 0.00543
Warmup Train [32][1470/3239]	Time 0.200 (0.241)	Data 0.001 (0.015)	Loss 4.2629 (4.0665)	Top-1 acc 25.781 (29.896)	Top-5 acc 47.656 (53.161)	lr 0.00542
Warmup Train [32][1480/3239]	Time 0.272 (0.241)	Data 0.001 (0.015)	Loss 4.0536 (4.0665)	Top-1 acc 29.688 (29.902)	Top-5 acc 51.562 (53.157)	lr 0.00542
Warmup Train [32][1490/3239]	Time 0.228 (0.241)	Data 0.001 (0.015)	Loss 4.0416 (4.0665)	Top-1 acc 30.859 (29.907)	Top-5 acc 53.906 (53.165)	lr 0.00542
Warmup Train [32][1500/3239]	Time 0.142 (0.241)	Data 0.001 (0.014)	Loss 4.2436 (4.0669)	Top-1 acc 28.516 (29.902)	Top-5 acc 51.953 (53.161)	lr 0.00541
Warmup Train [32][1510/3239]	Time 0.223 (0.241)	Data 0.001 (0.014)	Loss 4.2192 (4.0671)	Top-1 acc 28.516 (29.901)	Top-5 acc 52.344 (53.159)	lr 0.00541
Warmup Train [32][1520/3239]	Time 0.345 (0.241)	Data 0.001 (0.014)	Loss 4.1540 (4.0674)	Top-1 acc 25.391 (29.887)	Top-5 acc 49.609 (53.146)	lr 0.00540
Warmup Train [32][1530/3239]	Time 0.195 (0.240)	Data 0.002 (0.014)	Loss 4.0349 (4.0674)	Top-1 acc 28.906 (29.879)	Top-5 acc 54.297 (53.139)	lr 0.00540
Warmup Train [32][1540/3239]	Time 0.208 (0.240)	Data 0.001 (0.014)	Loss 4.0707 (4.0674)	Top-1 acc 29.688 (29.879)	Top-5 acc 49.219 (53.137)	lr 0.00540
Warmup Train [32][1550/3239]	Time 0.205 (0.240)	Data 0.001 (0.014)	Loss 3.8533 (4.0673)	Top-1 acc 31.250 (29.876)	Top-5 acc 57.422 (53.136)	lr 0.00539
Warmup Train [32][1560/3239]	Time 0.185 (0.240)	Data 0.001 (0.014)	Loss 4.1844 (4.0673)	Top-1 acc 29.297 (29.880)	Top-5 acc 49.219 (53.140)	lr 0.00539
Warmup Train [32][1570/3239]	Time 0.270 (0.240)	Data 0.002 (0.014)	Loss 4.0767 (4.0673)	Top-1 acc 30.078 (29.876)	Top-5 acc 49.609 (53.132)	lr 0.00539
Warmup Train [32][1580/3239]	Time 0.202 (0.240)	Data 0.001 (0.014)	Loss 3.9979 (4.0674)	Top-1 acc 29.297 (29.879)	Top-5 acc 56.250 (53.134)	lr 0.00538
Warmup Train [32][1590/3239]	Time 0.204 (0.240)	Data 0.002 (0.014)	Loss 4.0465 (4.0672)	Top-1 acc 33.984 (29.883)	Top-5 acc 51.953 (53.143)	lr 0.00538
Warmup Train [32][1600/3239]	Time 0.224 (0.240)	Data 0.001 (0.014)	Loss 3.9467 (4.0668)	Top-1 acc 32.812 (29.891)	Top-5 acc 54.688 (53.155)	lr 0.00537
Warmup Train [32][1610/3239]	Time 0.251 (0.239)	Data 0.001 (0.014)	Loss 3.9240 (4.0663)	Top-1 acc 31.250 (29.900)	Top-5 acc 52.734 (53.164)	lr 0.00537
Warmup Train [32][1620/3239]	Time 0.175 (0.239)	Data 0.001 (0.014)	Loss 4.0856 (4.0662)	Top-1 acc 25.781 (29.898)	Top-5 acc 54.688 (53.164)	lr 0.00537
Warmup Train [32][1630/3239]	Time 0.319 (0.239)	Data 0.001 (0.014)	Loss 4.1207 (4.0662)	Top-1 acc 29.297 (29.894)	Top-5 acc 55.469 (53.165)	lr 0.00536
Warmup Train [32][1640/3239]	Time 0.201 (0.239)	Data 0.002 (0.014)	Loss 4.1173 (4.0665)	Top-1 acc 30.469 (29.894)	Top-5 acc 50.000 (53.156)	lr 0.00536
Warmup Train [32][1650/3239]	Time 0.233 (0.239)	Data 0.001 (0.013)	Loss 4.0223 (4.0661)	Top-1 acc 29.688 (29.894)	Top-5 acc 54.688 (53.158)	lr 0.00536
Warmup Train [32][1660/3239]	Time 0.218 (0.239)	Data 0.001 (0.013)	Loss 4.0274 (4.0659)	Top-1 acc 28.125 (29.902)	Top-5 acc 56.641 (53.160)	lr 0.00535
Warmup Train [32][1670/3239]	Time 0.178 (0.239)	Data 0.001 (0.013)	Loss 4.0258 (4.0661)	Top-1 acc 32.031 (29.895)	Top-5 acc 51.953 (53.157)	lr 0.00535
Warmup Train [32][1680/3239]	Time 0.214 (0.239)	Data 0.001 (0.013)	Loss 4.1217 (4.0658)	Top-1 acc 27.734 (29.904)	Top-5 acc 52.734 (53.164)	lr 0.00534
Warmup Train [32][1690/3239]	Time 0.152 (0.239)	Data 0.001 (0.013)	Loss 4.0378 (4.0660)	Top-1 acc 30.469 (29.895)	Top-5 acc 53.516 (53.156)	lr 0.00534
Warmup Train [32][1700/3239]	Time 0.221 (0.239)	Data 0.001 (0.013)	Loss 4.0784 (4.0659)	Top-1 acc 27.734 (29.902)	Top-5 acc 54.688 (53.162)	lr 0.00534
Warmup Train [32][1710/3239]	Time 0.221 (0.239)	Data 0.001 (0.013)	Loss 4.0081 (4.0658)	Top-1 acc 30.078 (29.908)	Top-5 acc 53.516 (53.165)	lr 0.00533
Warmup Train [32][1720/3239]	Time 0.193 (0.239)	Data 0.001 (0.013)	Loss 4.0397 (4.0657)	Top-1 acc 28.906 (29.904)	Top-5 acc 54.688 (53.167)	lr 0.00533
Warmup Train [32][1730/3239]	Time 0.203 (0.238)	Data 0.002 (0.013)	Loss 3.8901 (4.0657)	Top-1 acc 34.766 (29.908)	Top-5 acc 55.078 (53.166)	lr 0.00533
Warmup Train [32][1740/3239]	Time 0.204 (0.238)	Data 0.001 (0.013)	Loss 4.2022 (4.0659)	Top-1 acc 28.906 (29.905)	Top-5 acc 47.656 (53.165)	lr 0.00532
Warmup Train [32][1750/3239]	Time 0.243 (0.238)	Data 0.001 (0.013)	Loss 4.0910 (4.0660)	Top-1 acc 30.469 (29.905)	Top-5 acc 51.953 (53.161)	lr 0.00532
Warmup Train [32][1760/3239]	Time 0.192 (0.238)	Data 0.001 (0.013)	Loss 4.3292 (4.0662)	Top-1 acc 25.781 (29.902)	Top-5 acc 51.562 (53.162)	lr 0.00531
Warmup Train [32][1770/3239]	Time 0.199 (0.238)	Data 0.001 (0.013)	Loss 3.8881 (4.0662)	Top-1 acc 32.812 (29.907)	Top-5 acc 57.422 (53.166)	lr 0.00531
Warmup Train [32][1780/3239]	Time 0.186 (0.238)	Data 0.001 (0.013)	Loss 4.0072 (4.0659)	Top-1 acc 29.688 (29.915)	Top-5 acc 56.641 (53.178)	lr 0.00531
Warmup Train [32][1790/3239]	Time 0.213 (0.238)	Data 0.001 (0.013)	Loss 4.1185 (4.0660)	Top-1 acc 26.172 (29.913)	Top-5 acc 51.172 (53.176)	lr 0.00530
Warmup Train [32][1800/3239]	Time 0.295 (0.238)	Data 0.001 (0.013)	Loss 3.9240 (4.0658)	Top-1 acc 32.422 (29.910)	Top-5 acc 56.250 (53.182)	lr 0.00530
Warmup Train [32][1810/3239]	Time 0.172 (0.238)	Data 0.001 (0.012)	Loss 4.1079 (4.0657)	Top-1 acc 30.469 (29.908)	Top-5 acc 50.391 (53.183)	lr 0.00530
Warmup Train [32][1820/3239]	Time 0.225 (0.238)	Data 0.001 (0.012)	Loss 4.1122 (4.0657)	Top-1 acc 29.297 (29.907)	Top-5 acc 53.516 (53.181)	lr 0.00529
Warmup Train [32][1830/3239]	Time 0.195 (0.237)	Data 0.001 (0.012)	Loss 4.0003 (4.0656)	Top-1 acc 33.594 (29.911)	Top-5 acc 55.078 (53.185)	lr 0.00529
Warmup Train [32][1840/3239]	Time 0.344 (0.237)	Data 0.001 (0.012)	Loss 3.8029 (4.0655)	Top-1 acc 32.812 (29.910)	Top-5 acc 59.766 (53.191)	lr 0.00528
Warmup Train [32][1850/3239]	Time 0.145 (0.237)	Data 0.001 (0.012)	Loss 4.0619 (4.0656)	Top-1 acc 32.031 (29.911)	Top-5 acc 53.906 (53.192)	lr 0.00528
Warmup Train [32][1860/3239]	Time 0.155 (0.237)	Data 0.001 (0.012)	Loss 4.1958 (4.0660)	Top-1 acc 31.250 (29.908)	Top-5 acc 49.609 (53.184)	lr 0.00528
Warmup Train [32][1870/3239]	Time 0.251 (0.237)	Data 0.001 (0.012)	Loss 4.2540 (4.0658)	Top-1 acc 26.953 (29.914)	Top-5 acc 48.828 (53.194)	lr 0.00527
Warmup Train [32][1880/3239]	Time 0.244 (0.237)	Data 0.001 (0.012)	Loss 4.0517 (4.0657)	Top-1 acc 28.125 (29.913)	Top-5 acc 50.000 (53.192)	lr 0.00527
Warmup Train [32][1890/3239]	Time 0.246 (0.237)	Data 0.001 (0.012)	Loss 4.0543 (4.0653)	Top-1 acc 28.906 (29.924)	Top-5 acc 53.125 (53.201)	lr 0.00527
Warmup Train [32][1900/3239]	Time 0.190 (0.237)	Data 0.001 (0.012)	Loss 3.9102 (4.0650)	Top-1 acc 32.812 (29.926)	Top-5 acc 57.031 (53.207)	lr 0.00526
Warmup Train [32][1910/3239]	Time 0.213 (0.237)	Data 0.001 (0.012)	Loss 4.0456 (4.0653)	Top-1 acc 31.250 (29.927)	Top-5 acc 53.516 (53.202)	lr 0.00526
Warmup Train [32][1920/3239]	Time 0.212 (0.237)	Data 0.001 (0.012)	Loss 4.0284 (4.0651)	Top-1 acc 28.906 (29.937)	Top-5 acc 53.906 (53.203)	lr 0.00526
Warmup Train [32][1930/3239]	Time 0.225 (0.237)	Data 0.001 (0.012)	Loss 4.0070 (4.0650)	Top-1 acc 30.469 (29.940)	Top-5 acc 53.906 (53.205)	lr 0.00525
Warmup Train [32][1940/3239]	Time 0.356 (0.237)	Data 0.001 (0.012)	Loss 4.0725 (4.0649)	Top-1 acc 30.859 (29.948)	Top-5 acc 54.297 (53.207)	lr 0.00525
Warmup Train [32][1950/3239]	Time 0.179 (0.237)	Data 0.001 (0.012)	Loss 4.1222 (4.0649)	Top-1 acc 28.906 (29.949)	Top-5 acc 55.078 (53.208)	lr 0.00524
Warmup Train [32][1960/3239]	Time 0.188 (0.237)	Data 0.001 (0.012)	Loss 3.9803 (4.0648)	Top-1 acc 32.422 (29.949)	Top-5 acc 55.859 (53.211)	lr 0.00524
Warmup Train [32][1970/3239]	Time 0.250 (0.236)	Data 0.001 (0.012)	Loss 4.2148 (4.0647)	Top-1 acc 27.344 (29.950)	Top-5 acc 48.047 (53.213)	lr 0.00524
Warmup Train [32][1980/3239]	Time 0.239 (0.236)	Data 0.001 (0.012)	Loss 4.1551 (4.0647)	Top-1 acc 28.516 (29.947)	Top-5 acc 51.172 (53.214)	lr 0.00523
Warmup Train [32][1990/3239]	Time 0.196 (0.236)	Data 0.002 (0.012)	Loss 4.1440 (4.0647)	Top-1 acc 29.688 (29.945)	Top-5 acc 50.391 (53.209)	lr 0.00523
Warmup Train [32][2000/3239]	Time 0.193 (0.236)	Data 0.001 (0.012)	Loss 4.0260 (4.0646)	Top-1 acc 30.078 (29.941)	Top-5 acc 53.906 (53.208)	lr 0.00523
Warmup Train [32][2010/3239]	Time 0.217 (0.236)	Data 0.001 (0.012)	Loss 3.9823 (4.0644)	Top-1 acc 29.688 (29.943)	Top-5 acc 54.297 (53.211)	lr 0.00522
Warmup Train [32][2020/3239]	Time 0.140 (0.236)	Data 0.002 (0.011)	Loss 4.1365 (4.0643)	Top-1 acc 29.297 (29.941)	Top-5 acc 50.781 (53.214)	lr 0.00522
Warmup Train [32][2030/3239]	Time 0.194 (0.236)	Data 0.002 (0.011)	Loss 4.0045 (4.0642)	Top-1 acc 31.641 (29.940)	Top-5 acc 57.031 (53.217)	lr 0.00521
Warmup Train [32][2040/3239]	Time 0.160 (0.236)	Data 0.001 (0.011)	Loss 4.0394 (4.0641)	Top-1 acc 32.812 (29.940)	Top-5 acc 52.344 (53.221)	lr 0.00521
Warmup Train [32][2050/3239]	Time 0.385 (0.236)	Data 0.001 (0.011)	Loss 3.9470 (4.0642)	Top-1 acc 33.984 (29.938)	Top-5 acc 58.594 (53.219)	lr 0.00521
Warmup Train [32][2060/3239]	Time 0.220 (0.236)	Data 0.001 (0.011)	Loss 3.9842 (4.0642)	Top-1 acc 33.594 (29.935)	Top-5 acc 54.688 (53.223)	lr 0.00520
Warmup Train [32][2070/3239]	Time 0.217 (0.236)	Data 0.001 (0.011)	Loss 3.7776 (4.0638)	Top-1 acc 35.547 (29.942)	Top-5 acc 58.203 (53.231)	lr 0.00520
Warmup Train [32][2080/3239]	Time 0.246 (0.235)	Data 0.001 (0.011)	Loss 4.0302 (4.0636)	Top-1 acc 32.812 (29.948)	Top-5 acc 52.344 (53.232)	lr 0.00520
Warmup Train [32][2090/3239]	Time 0.197 (0.235)	Data 0.001 (0.011)	Loss 4.1541 (4.0637)	Top-1 acc 26.172 (29.942)	Top-5 acc 50.391 (53.235)	lr 0.00519
Warmup Train [32][2100/3239]	Time 0.184 (0.235)	Data 0.001 (0.011)	Loss 4.1509 (4.0634)	Top-1 acc 28.906 (29.950)	Top-5 acc 50.000 (53.240)	lr 0.00519
Warmup Train [32][2110/3239]	Time 0.218 (0.235)	Data 0.001 (0.011)	Loss 3.9783 (4.0634)	Top-1 acc 31.250 (29.946)	Top-5 acc 56.641 (53.237)	lr 0.00518
Warmup Train [32][2120/3239]	Time 0.136 (0.235)	Data 0.002 (0.011)	Loss 3.8186 (4.0631)	Top-1 acc 33.203 (29.950)	Top-5 acc 56.250 (53.240)	lr 0.00518
Warmup Train [32][2130/3239]	Time 0.165 (0.235)	Data 0.002 (0.011)	Loss 4.0804 (4.0633)	Top-1 acc 28.906 (29.946)	Top-5 acc 55.859 (53.240)	lr 0.00518
Warmup Train [32][2140/3239]	Time 0.187 (0.235)	Data 0.001 (0.011)	Loss 4.0047 (4.0632)	Top-1 acc 35.547 (29.952)	Top-5 acc 55.469 (53.242)	lr 0.00517
Warmup Train [32][2150/3239]	Time 0.202 (0.235)	Data 0.001 (0.011)	Loss 4.1766 (4.0632)	Top-1 acc 23.438 (29.951)	Top-5 acc 53.516 (53.244)	lr 0.00517
Warmup Train [32][2160/3239]	Time 0.166 (0.235)	Data 0.002 (0.011)	Loss 4.0391 (4.0632)	Top-1 acc 30.859 (29.953)	Top-5 acc 56.641 (53.244)	lr 0.00517
Warmup Train [32][2170/3239]	Time 0.433 (0.235)	Data 0.001 (0.011)	Loss 4.2712 (4.0636)	Top-1 acc 23.828 (29.950)	Top-5 acc 50.781 (53.241)	lr 0.00516
Warmup Train [32][2180/3239]	Time 0.168 (0.235)	Data 0.001 (0.011)	Loss 4.0318 (4.0637)	Top-1 acc 27.734 (29.944)	Top-5 acc 52.734 (53.240)	lr 0.00516
Warmup Train [32][2190/3239]	Time 0.235 (0.235)	Data 0.002 (0.011)	Loss 4.0472 (4.0637)	Top-1 acc 32.031 (29.946)	Top-5 acc 54.688 (53.239)	lr 0.00516
Warmup Train [32][2200/3239]	Time 0.167 (0.235)	Data 0.001 (0.011)	Loss 4.1333 (4.0636)	Top-1 acc 30.859 (29.942)	Top-5 acc 52.344 (53.239)	lr 0.00515
Warmup Train [32][2210/3239]	Time 0.268 (0.235)	Data 0.001 (0.011)	Loss 3.8194 (4.0636)	Top-1 acc 34.375 (29.944)	Top-5 acc 57.422 (53.237)	lr 0.00515
Warmup Train [32][2220/3239]	Time 0.152 (0.235)	Data 0.001 (0.011)	Loss 3.9032 (4.0633)	Top-1 acc 33.203 (29.953)	Top-5 acc 55.078 (53.240)	lr 0.00514
Warmup Train [32][2230/3239]	Time 0.206 (0.235)	Data 0.001 (0.011)	Loss 3.9725 (4.0632)	Top-1 acc 29.297 (29.957)	Top-5 acc 51.953 (53.238)	lr 0.00514
Warmup Train [32][2240/3239]	Time 0.197 (0.235)	Data 0.001 (0.011)	Loss 4.3204 (4.0635)	Top-1 acc 21.484 (29.950)	Top-5 acc 48.438 (53.231)	lr 0.00514
Warmup Train [32][2250/3239]	Time 0.186 (0.235)	Data 0.001 (0.011)	Loss 3.9822 (4.0632)	Top-1 acc 31.250 (29.957)	Top-5 acc 58.984 (53.243)	lr 0.00513
Warmup Train [32][2260/3239]	Time 0.230 (0.234)	Data 0.001 (0.011)	Loss 3.9398 (4.0629)	Top-1 acc 28.516 (29.958)	Top-5 acc 56.641 (53.249)	lr 0.00513
Warmup Train [32][2270/3239]	Time 0.227 (0.234)	Data 0.001 (0.011)	Loss 3.9491 (4.0631)	Top-1 acc 35.156 (29.952)	Top-5 acc 57.812 (53.248)	lr 0.00513
Warmup Train [32][2280/3239]	Time 0.318 (0.234)	Data 0.001 (0.011)	Loss 4.0727 (4.0632)	Top-1 acc 34.766 (29.953)	Top-5 acc 53.125 (53.247)	lr 0.00512
Warmup Train [32][2290/3239]	Time 0.281 (0.234)	Data 0.002 (0.010)	Loss 4.0116 (4.0631)	Top-1 acc 31.641 (29.959)	Top-5 acc 53.516 (53.252)	lr 0.00512
Warmup Train [32][2300/3239]	Time 0.202 (0.234)	Data 0.001 (0.010)	Loss 4.1083 (4.0634)	Top-1 acc 28.516 (29.952)	Top-5 acc 50.391 (53.247)	lr 0.00511
Warmup Train [32][2310/3239]	Time 0.175 (0.234)	Data 0.001 (0.010)	Loss 4.1026 (4.0635)	Top-1 acc 31.250 (29.945)	Top-5 acc 48.047 (53.242)	lr 0.00511
Warmup Train [32][2320/3239]	Time 0.222 (0.234)	Data 0.002 (0.010)	Loss 4.0190 (4.0634)	Top-1 acc 31.641 (29.948)	Top-5 acc 56.250 (53.238)	lr 0.00511
Warmup Train [32][2330/3239]	Time 0.231 (0.234)	Data 0.001 (0.010)	Loss 4.1214 (4.0635)	Top-1 acc 25.781 (29.948)	Top-5 acc 53.125 (53.235)	lr 0.00510
Warmup Train [32][2340/3239]	Time 0.218 (0.234)	Data 0.001 (0.010)	Loss 4.0681 (4.0631)	Top-1 acc 30.469 (29.960)	Top-5 acc 54.297 (53.247)	lr 0.00510
Warmup Train [32][2350/3239]	Time 0.225 (0.234)	Data 0.001 (0.010)	Loss 4.1163 (4.0631)	Top-1 acc 24.219 (29.959)	Top-5 acc 53.516 (53.248)	lr 0.00510
Warmup Train [32][2360/3239]	Time 0.270 (0.234)	Data 0.001 (0.010)	Loss 3.9964 (4.0632)	Top-1 acc 35.938 (29.957)	Top-5 acc 54.688 (53.247)	lr 0.00509
Warmup Train [32][2370/3239]	Time 0.164 (0.234)	Data 0.001 (0.010)	Loss 3.9808 (4.0631)	Top-1 acc 34.766 (29.954)	Top-5 acc 59.766 (53.245)	lr 0.00509
Warmup Train [32][2380/3239]	Time 0.314 (0.234)	Data 0.001 (0.010)	Loss 4.0813 (4.0632)	Top-1 acc 31.250 (29.952)	Top-5 acc 51.562 (53.243)	lr 0.00509
Warmup Train [32][2390/3239]	Time 0.253 (0.234)	Data 0.001 (0.010)	Loss 3.9160 (4.0630)	Top-1 acc 32.812 (29.955)	Top-5 acc 58.984 (53.245)	lr 0.00508
Warmup Train [32][2400/3239]	Time 0.182 (0.234)	Data 0.001 (0.010)	Loss 3.8983 (4.0633)	Top-1 acc 33.594 (29.948)	Top-5 acc 56.641 (53.235)	lr 0.00508
Warmup Train [32][2410/3239]	Time 0.208 (0.234)	Data 0.003 (0.010)	Loss 4.1013 (4.0633)	Top-1 acc 33.203 (29.949)	Top-5 acc 53.516 (53.234)	lr 0.00507
Warmup Train [32][2420/3239]	Time 0.155 (0.234)	Data 0.001 (0.010)	Loss 4.1022 (4.0635)	Top-1 acc 32.812 (29.945)	Top-5 acc 52.734 (53.236)	lr 0.00507
Warmup Train [32][2430/3239]	Time 0.221 (0.234)	Data 0.044 (0.010)	Loss 4.1471 (4.0633)	Top-1 acc 28.906 (29.948)	Top-5 acc 48.047 (53.239)	lr 0.00507
Warmup Train [32][2440/3239]	Time 0.248 (0.234)	Data 0.001 (0.010)	Loss 4.1050 (4.0631)	Top-1 acc 28.125 (29.947)	Top-5 acc 51.172 (53.242)	lr 0.00506
Warmup Train [32][2450/3239]	Time 0.166 (0.234)	Data 0.001 (0.010)	Loss 4.1710 (4.0630)	Top-1 acc 30.859 (29.948)	Top-5 acc 47.656 (53.245)	lr 0.00506
Warmup Train [32][2460/3239]	Time 0.193 (0.234)	Data 0.001 (0.010)	Loss 4.0668 (4.0631)	Top-1 acc 30.469 (29.948)	Top-5 acc 53.906 (53.244)	lr 0.00506
Warmup Train [32][2470/3239]	Time 0.240 (0.234)	Data 0.001 (0.010)	Loss 3.9754 (4.0629)	Top-1 acc 31.641 (29.949)	Top-5 acc 53.516 (53.247)	lr 0.00505
Warmup Train [32][2480/3239]	Time 0.237 (0.234)	Data 0.002 (0.010)	Loss 3.9468 (4.0628)	Top-1 acc 33.203 (29.946)	Top-5 acc 56.641 (53.247)	lr 0.00505
Warmup Train [32][2490/3239]	Time 0.377 (0.234)	Data 0.001 (0.010)	Loss 4.0603 (4.0629)	Top-1 acc 30.078 (29.945)	Top-5 acc 55.859 (53.252)	lr 0.00505
Warmup Train [32][2500/3239]	Time 0.168 (0.234)	Data 0.001 (0.010)	Loss 3.9581 (4.0629)	Top-1 acc 28.516 (29.940)	Top-5 acc 57.031 (53.247)	lr 0.00504
Warmup Train [32][2510/3239]	Time 0.214 (0.233)	Data 0.001 (0.010)	Loss 3.9146 (4.0630)	Top-1 acc 32.031 (29.936)	Top-5 acc 55.078 (53.240)	lr 0.00504
Warmup Train [32][2520/3239]	Time 0.224 (0.233)	Data 0.001 (0.010)	Loss 3.9872 (4.0630)	Top-1 acc 28.125 (29.939)	Top-5 acc 55.859 (53.244)	lr 0.00503
Warmup Train [32][2530/3239]	Time 0.210 (0.233)	Data 0.002 (0.010)	Loss 3.9361 (4.0626)	Top-1 acc 32.812 (29.949)	Top-5 acc 55.469 (53.254)	lr 0.00503
Warmup Train [32][2540/3239]	Time 0.209 (0.233)	Data 0.001 (0.010)	Loss 4.1194 (4.0626)	Top-1 acc 33.984 (29.954)	Top-5 acc 56.250 (53.255)	lr 0.00503
Warmup Train [32][2550/3239]	Time 0.243 (0.233)	Data 0.001 (0.010)	Loss 4.2017 (4.0625)	Top-1 acc 26.953 (29.951)	Top-5 acc 48.828 (53.256)	lr 0.00502
Warmup Train [32][2560/3239]	Time 0.241 (0.233)	Data 0.002 (0.010)	Loss 4.2260 (4.0626)	Top-1 acc 26.562 (29.950)	Top-5 acc 48.438 (53.254)	lr 0.00502
Warmup Train [32][2570/3239]	Time 0.250 (0.233)	Data 0.001 (0.010)	Loss 3.8208 (4.0623)	Top-1 acc 30.469 (29.955)	Top-5 acc 58.594 (53.260)	lr 0.00502
Warmup Train [32][2580/3239]	Time 0.130 (0.233)	Data 0.002 (0.010)	Loss 4.1749 (4.0625)	Top-1 acc 26.172 (29.954)	Top-5 acc 53.516 (53.255)	lr 0.00501
Warmup Train [32][2590/3239]	Time 0.177 (0.233)	Data 0.001 (0.010)	Loss 3.9385 (4.0623)	Top-1 acc 35.547 (29.961)	Top-5 acc 51.172 (53.258)	lr 0.00501
Warmup Train [32][2600/3239]	Time 0.219 (0.233)	Data 0.001 (0.010)	Loss 4.2689 (4.0624)	Top-1 acc 28.906 (29.962)	Top-5 acc 50.391 (53.259)	lr 0.00500
Warmup Train [32][2610/3239]	Time 0.329 (0.233)	Data 0.001 (0.010)	Loss 4.0380 (4.0622)	Top-1 acc 32.422 (29.959)	Top-5 acc 54.297 (53.265)	lr 0.00500
Warmup Train [32][2620/3239]	Time 0.236 (0.233)	Data 0.001 (0.010)	Loss 3.8643 (4.0620)	Top-1 acc 33.203 (29.962)	Top-5 acc 57.812 (53.268)	lr 0.00500
Warmup Train [32][2630/3239]	Time 0.186 (0.233)	Data 0.001 (0.010)	Loss 4.2347 (4.0621)	Top-1 acc 27.344 (29.964)	Top-5 acc 51.172 (53.268)	lr 0.00499
Warmup Train [32][2640/3239]	Time 0.186 (0.233)	Data 0.001 (0.009)	Loss 4.1153 (4.0621)	Top-1 acc 30.469 (29.965)	Top-5 acc 53.125 (53.271)	lr 0.00499
Warmup Train [32][2650/3239]	Time 0.147 (0.233)	Data 0.001 (0.009)	Loss 3.9354 (4.0618)	Top-1 acc 34.766 (29.973)	Top-5 acc 52.734 (53.273)	lr 0.00499
Warmup Train [32][2660/3239]	Time 0.187 (0.233)	Data 0.002 (0.009)	Loss 4.0488 (4.0619)	Top-1 acc 30.859 (29.973)	Top-5 acc 52.734 (53.270)	lr 0.00498
Warmup Train [32][2670/3239]	Time 0.152 (0.233)	Data 0.001 (0.009)	Loss 4.1227 (4.0621)	Top-1 acc 25.000 (29.974)	Top-5 acc 50.781 (53.267)	lr 0.00498
Warmup Train [32][2680/3239]	Time 0.243 (0.233)	Data 0.002 (0.009)	Loss 4.0034 (4.0624)	Top-1 acc 31.250 (29.968)	Top-5 acc 56.641 (53.260)	lr 0.00498
Warmup Train [32][2690/3239]	Time 0.184 (0.233)	Data 0.001 (0.009)	Loss 4.0848 (4.0621)	Top-1 acc 26.953 (29.971)	Top-5 acc 50.781 (53.265)	lr 0.00497
Warmup Train [32][2700/3239]	Time 0.170 (0.233)	Data 0.001 (0.009)	Loss 3.9054 (4.0616)	Top-1 acc 31.641 (29.980)	Top-5 acc 58.203 (53.277)	lr 0.00497
Warmup Train [32][2710/3239]	Time 0.337 (0.233)	Data 0.001 (0.009)	Loss 4.1685 (4.0616)	Top-1 acc 25.781 (29.977)	Top-5 acc 50.000 (53.275)	lr 0.00497
Warmup Train [32][2720/3239]	Time 0.362 (0.233)	Data 0.001 (0.009)	Loss 4.0206 (4.0618)	Top-1 acc 29.297 (29.971)	Top-5 acc 54.688 (53.269)	lr 0.00496
Warmup Train [32][2730/3239]	Time 0.173 (0.232)	Data 0.002 (0.009)	Loss 3.9646 (4.0617)	Top-1 acc 33.984 (29.973)	Top-5 acc 59.375 (53.273)	lr 0.00496
Warmup Train [32][2740/3239]	Time 0.207 (0.232)	Data 0.001 (0.009)	Loss 4.0958 (4.0617)	Top-1 acc 31.641 (29.974)	Top-5 acc 53.516 (53.273)	lr 0.00495
Warmup Train [32][2750/3239]	Time 0.272 (0.232)	Data 0.001 (0.009)	Loss 4.0448 (4.0619)	Top-1 acc 30.859 (29.971)	Top-5 acc 52.734 (53.274)	lr 0.00495
Warmup Train [32][2760/3239]	Time 0.299 (0.232)	Data 0.001 (0.009)	Loss 4.0011 (4.0620)	Top-1 acc 31.641 (29.972)	Top-5 acc 54.688 (53.270)	lr 0.00495
Warmup Train [32][2770/3239]	Time 0.193 (0.232)	Data 0.002 (0.009)	Loss 4.0984 (4.0619)	Top-1 acc 30.469 (29.976)	Top-5 acc 52.734 (53.272)	lr 0.00494
Warmup Train [32][2780/3239]	Time 0.275 (0.232)	Data 0.001 (0.009)	Loss 3.9652 (4.0618)	Top-1 acc 28.125 (29.979)	Top-5 acc 51.953 (53.272)	lr 0.00494
Warmup Train [32][2790/3239]	Time 0.216 (0.232)	Data 0.001 (0.009)	Loss 4.1181 (4.0615)	Top-1 acc 29.688 (29.983)	Top-5 acc 49.219 (53.277)	lr 0.00494
Warmup Train [32][2800/3239]	Time 0.203 (0.232)	Data 0.001 (0.009)	Loss 4.1302 (4.0616)	Top-1 acc 29.688 (29.989)	Top-5 acc 49.219 (53.279)	lr 0.00493
Warmup Train [32][2810/3239]	Time 0.277 (0.232)	Data 0.001 (0.009)	Loss 3.9750 (4.0616)	Top-1 acc 31.641 (29.986)	Top-5 acc 57.422 (53.280)	lr 0.00493
Warmup Train [32][2820/3239]	Time 0.172 (0.232)	Data 0.001 (0.009)	Loss 3.9846 (4.0616)	Top-1 acc 30.469 (29.985)	Top-5 acc 53.906 (53.281)	lr 0.00493
Warmup Train [32][2830/3239]	Time 0.150 (0.232)	Data 0.001 (0.009)	Loss 4.0301 (4.0617)	Top-1 acc 28.125 (29.984)	Top-5 acc 54.297 (53.279)	lr 0.00492
Warmup Train [32][2840/3239]	Time 0.167 (0.232)	Data 0.001 (0.009)	Loss 3.8900 (4.0617)	Top-1 acc 32.422 (29.983)	Top-5 acc 57.422 (53.279)	lr 0.00492
Warmup Train [32][2850/3239]	Time 0.249 (0.232)	Data 0.001 (0.009)	Loss 4.2491 (4.0619)	Top-1 acc 26.172 (29.976)	Top-5 acc 49.219 (53.275)	lr 0.00491
Warmup Train [32][2860/3239]	Time 0.212 (0.232)	Data 0.001 (0.009)	Loss 4.1725 (4.0617)	Top-1 acc 26.172 (29.977)	Top-5 acc 49.609 (53.280)	lr 0.00491
Warmup Train [32][2870/3239]	Time 0.238 (0.232)	Data 0.001 (0.009)	Loss 4.0882 (4.0618)	Top-1 acc 28.516 (29.977)	Top-5 acc 49.219 (53.276)	lr 0.00491
Warmup Train [32][2880/3239]	Time 0.218 (0.232)	Data 0.001 (0.009)	Loss 4.1730 (4.0621)	Top-1 acc 25.781 (29.970)	Top-5 acc 48.047 (53.267)	lr 0.00490
Warmup Train [32][2890/3239]	Time 0.231 (0.232)	Data 0.001 (0.009)	Loss 4.0328 (4.0622)	Top-1 acc 29.688 (29.966)	Top-5 acc 52.734 (53.265)	lr 0.00490
Warmup Train [32][2900/3239]	Time 0.220 (0.232)	Data 0.001 (0.009)	Loss 4.0053 (4.0622)	Top-1 acc 29.297 (29.965)	Top-5 acc 54.688 (53.264)	lr 0.00490
Warmup Train [32][2910/3239]	Time 0.178 (0.232)	Data 0.001 (0.009)	Loss 4.0414 (4.0621)	Top-1 acc 31.641 (29.968)	Top-5 acc 53.516 (53.263)	lr 0.00489
Warmup Train [32][2920/3239]	Time 0.172 (0.232)	Data 0.001 (0.009)	Loss 4.2502 (4.0621)	Top-1 acc 26.562 (29.970)	Top-5 acc 47.266 (53.265)	lr 0.00489
Warmup Train [32][2930/3239]	Time 0.271 (0.232)	Data 0.002 (0.009)	Loss 4.0643 (4.0619)	Top-1 acc 26.562 (29.972)	Top-5 acc 52.734 (53.269)	lr 0.00489
Warmup Train [32][2940/3239]	Time 0.249 (0.232)	Data 0.002 (0.009)	Loss 4.1920 (4.0617)	Top-1 acc 24.609 (29.970)	Top-5 acc 47.266 (53.270)	lr 0.00488
Warmup Train [32][2950/3239]	Time 0.208 (0.232)	Data 0.001 (0.009)	Loss 4.1538 (4.0618)	Top-1 acc 27.344 (29.968)	Top-5 acc 50.000 (53.266)	lr 0.00488
Warmup Train [32][2960/3239]	Time 0.196 (0.232)	Data 0.002 (0.009)	Loss 4.2048 (4.0619)	Top-1 acc 30.078 (29.967)	Top-5 acc 52.734 (53.264)	lr 0.00487
Warmup Train [32][2970/3239]	Time 0.188 (0.232)	Data 0.002 (0.009)	Loss 3.9447 (4.0617)	Top-1 acc 30.078 (29.971)	Top-5 acc 52.344 (53.268)	lr 0.00487
Warmup Train [32][2980/3239]	Time 0.191 (0.232)	Data 0.001 (0.009)	Loss 4.0901 (4.0616)	Top-1 acc 27.344 (29.969)	Top-5 acc 51.562 (53.267)	lr 0.00487
Warmup Train [32][2990/3239]	Time 0.249 (0.231)	Data 0.001 (0.009)	Loss 4.1148 (4.0614)	Top-1 acc 29.688 (29.973)	Top-5 acc 51.562 (53.270)	lr 0.00486
Warmup Train [32][3000/3239]	Time 0.306 (0.231)	Data 0.001 (0.009)	Loss 3.9987 (4.0614)	Top-1 acc 34.375 (29.977)	Top-5 acc 57.422 (53.274)	lr 0.00486
Warmup Train [32][3010/3239]	Time 0.157 (0.231)	Data 0.001 (0.009)	Loss 3.9700 (4.0613)	Top-1 acc 33.594 (29.983)	Top-5 acc 54.688 (53.273)	lr 0.00486
Warmup Train [32][3020/3239]	Time 0.263 (0.231)	Data 0.001 (0.009)	Loss 4.0229 (4.0614)	Top-1 acc 30.078 (29.979)	Top-5 acc 51.953 (53.270)	lr 0.00485
Warmup Train [32][3030/3239]	Time 0.187 (0.231)	Data 0.001 (0.009)	Loss 4.1701 (4.0615)	Top-1 acc 30.078 (29.981)	Top-5 acc 49.609 (53.268)	lr 0.00485
Warmup Train [32][3040/3239]	Time 0.196 (0.231)	Data 0.001 (0.009)	Loss 4.0717 (4.0615)	Top-1 acc 26.562 (29.981)	Top-5 acc 50.781 (53.269)	lr 0.00485
Warmup Train [32][3050/3239]	Time 0.275 (0.231)	Data 0.001 (0.009)	Loss 3.9691 (4.0616)	Top-1 acc 30.469 (29.978)	Top-5 acc 56.641 (53.268)	lr 0.00484
Warmup Train [32][3060/3239]	Time 0.254 (0.231)	Data 0.001 (0.009)	Loss 4.1557 (4.0618)	Top-1 acc 32.422 (29.979)	Top-5 acc 52.734 (53.266)	lr 0.00484
Warmup Train [32][3070/3239]	Time 0.185 (0.231)	Data 0.001 (0.009)	Loss 4.1845 (4.0618)	Top-1 acc 31.641 (29.979)	Top-5 acc 52.344 (53.268)	lr 0.00484
Warmup Train [32][3080/3239]	Time 0.164 (0.231)	Data 0.001 (0.009)	Loss 4.1015 (4.0617)	Top-1 acc 30.859 (29.983)	Top-5 acc 53.516 (53.272)	lr 0.00483
Warmup Train [32][3090/3239]	Time 0.188 (0.231)	Data 0.001 (0.009)	Loss 4.0546 (4.0615)	Top-1 acc 29.297 (29.985)	Top-5 acc 51.953 (53.276)	lr 0.00483
Warmup Train [32][3100/3239]	Time 0.197 (0.231)	Data 0.001 (0.009)	Loss 4.0346 (4.0614)	Top-1 acc 32.031 (29.987)	Top-5 acc 52.344 (53.277)	lr 0.00482
Warmup Train [32][3110/3239]	Time 0.237 (0.231)	Data 0.001 (0.009)	Loss 4.0100 (4.0614)	Top-1 acc 30.469 (29.983)	Top-5 acc 52.344 (53.277)	lr 0.00482
Warmup Train [32][3120/3239]	Time 0.167 (0.231)	Data 0.002 (0.008)	Loss 4.1365 (4.0614)	Top-1 acc 27.344 (29.983)	Top-5 acc 48.828 (53.279)	lr 0.00482
Warmup Train [32][3130/3239]	Time 0.152 (0.231)	Data 0.001 (0.008)	Loss 4.2284 (4.0613)	Top-1 acc 23.047 (29.983)	Top-5 acc 53.516 (53.280)	lr 0.00481
Warmup Train [32][3140/3239]	Time 0.325 (0.231)	Data 0.001 (0.008)	Loss 3.7869 (4.0612)	Top-1 acc 36.719 (29.985)	Top-5 acc 57.422 (53.280)	lr 0.00481
Warmup Train [32][3150/3239]	Time 0.227 (0.231)	Data 0.002 (0.008)	Loss 4.1497 (4.0612)	Top-1 acc 26.953 (29.988)	Top-5 acc 51.172 (53.281)	lr 0.00481
Warmup Train [32][3160/3239]	Time 0.219 (0.231)	Data 0.002 (0.008)	Loss 4.1626 (4.0611)	Top-1 acc 26.953 (29.987)	Top-5 acc 50.000 (53.281)	lr 0.00480
Warmup Train [32][3170/3239]	Time 0.253 (0.231)	Data 0.001 (0.008)	Loss 4.0105 (4.0612)	Top-1 acc 28.516 (29.984)	Top-5 acc 54.688 (53.280)	lr 0.00480
Warmup Train [32][3180/3239]	Time 0.184 (0.231)	Data 0.000 (0.008)	Loss 4.0487 (4.0612)	Top-1 acc 27.734 (29.983)	Top-5 acc 57.031 (53.282)	lr 0.00480
Warmup Train [32][3190/3239]	Time 0.198 (0.231)	Data 0.000 (0.008)	Loss 4.2196 (4.0614)	Top-1 acc 30.078 (29.981)	Top-5 acc 47.656 (53.273)	lr 0.00479
Warmup Train [32][3200/3239]	Time 0.145 (0.231)	Data 0.000 (0.008)	Loss 3.9861 (4.0613)	Top-1 acc 30.469 (29.982)	Top-5 acc 55.469 (53.274)	lr 0.00479
Warmup Train [32][3210/3239]	Time 0.169 (0.231)	Data 0.000 (0.008)	Loss 4.0596 (4.0612)	Top-1 acc 29.688 (29.983)	Top-5 acc 55.859 (53.274)	lr 0.00479
Warmup Train [32][3220/3239]	Time 0.247 (0.231)	Data 0.000 (0.008)	Loss 3.9958 (4.0611)	Top-1 acc 31.250 (29.985)	Top-5 acc 53.125 (53.275)	lr 0.00478
Warmup Train [32][3230/3239]	Time 0.200 (0.230)	Data 0.000 (0.008)	Loss 4.1120 (4.0609)	Top-1 acc 26.562 (29.986)	Top-5 acc 52.734 (53.282)	lr 0.00478
Warmup Train [32][3239/3239]	Time 0.133 (0.230)	Data 0.000 (0.008)	Loss 4.2738 (4.0609)	Top-1 acc 32.099 (29.987)	Top-5 acc 49.383 (53.281)	lr 0.00477
==========Warmup Valid [32/40]	loss 3.001	top-1 acc 37.656	top-5 acc 62.427	Train top-1 29.987	top-5 53.281	flops: 442.4M
Warmup Train [33][0/3239]	Time 18.873 (18.873)	Data 18.376 (18.376)	Loss 4.0695 (4.0695)	Top-1 acc 29.688 (29.688)	Top-5 acc 51.562 (51.562)	lr 0.00477
Warmup Train [33][10/3239]	Time 0.249 (2.019)	Data 0.001 (1.688)	Loss 3.8909 (4.0185)	Top-1 acc 30.469 (30.327)	Top-5 acc 54.688 (53.054)	lr 0.00477
Warmup Train [33][20/3239]	Time 0.219 (1.176)	Data 0.001 (0.887)	Loss 3.9726 (4.0213)	Top-1 acc 33.594 (30.525)	Top-5 acc 57.812 (53.609)	lr 0.00477
Warmup Train [33][30/3239]	Time 0.241 (0.874)	Data 0.001 (0.602)	Loss 3.8793 (4.0241)	Top-1 acc 34.766 (30.494)	Top-5 acc 61.719 (53.969)	lr 0.00476
Warmup Train [33][40/3239]	Time 0.354 (0.716)	Data 0.002 (0.455)	Loss 3.9153 (4.0338)	Top-1 acc 32.031 (30.212)	Top-5 acc 56.250 (53.754)	lr 0.00476
Warmup Train [33][50/3239]	Time 0.217 (0.619)	Data 0.001 (0.367)	Loss 4.1631 (4.0412)	Top-1 acc 26.562 (30.086)	Top-5 acc 50.781 (53.784)	lr 0.00476
Warmup Train [33][60/3239]	Time 0.248 (0.555)	Data 0.002 (0.308)	Loss 3.9323 (4.0414)	Top-1 acc 28.125 (30.110)	Top-5 acc 52.734 (53.592)	lr 0.00475
Warmup Train [33][70/3239]	Time 0.201 (0.509)	Data 0.001 (0.265)	Loss 4.0298 (4.0400)	Top-1 acc 29.297 (30.117)	Top-5 acc 54.297 (53.576)	lr 0.00475
Warmup Train [33][80/3239]	Time 0.259 (0.475)	Data 0.001 (0.232)	Loss 3.9961 (4.0457)	Top-1 acc 32.422 (30.126)	Top-5 acc 52.344 (53.448)	lr 0.00475
Warmup Train [33][90/3239]	Time 0.354 (0.448)	Data 0.001 (0.207)	Loss 3.9933 (4.0506)	Top-1 acc 30.469 (30.014)	Top-5 acc 55.078 (53.421)	lr 0.00474
Warmup Train [33][100/3239]	Time 0.301 (0.426)	Data 0.002 (0.187)	Loss 3.9731 (4.0543)	Top-1 acc 31.641 (29.947)	Top-5 acc 54.688 (53.473)	lr 0.00474
Warmup Train [33][110/3239]	Time 0.226 (0.409)	Data 0.001 (0.170)	Loss 4.0058 (4.0529)	Top-1 acc 31.250 (29.997)	Top-5 acc 55.859 (53.491)	lr 0.00474
Warmup Train [33][120/3239]	Time 0.274 (0.394)	Data 0.001 (0.156)	Loss 3.8875 (4.0520)	Top-1 acc 32.031 (30.049)	Top-5 acc 57.812 (53.503)	lr 0.00473
Warmup Train [33][130/3239]	Time 0.224 (0.381)	Data 0.002 (0.144)	Loss 4.2569 (4.0529)	Top-1 acc 26.562 (30.051)	Top-5 acc 47.656 (53.417)	lr 0.00473
Warmup Train [33][140/3239]	Time 0.229 (0.370)	Data 0.002 (0.134)	Loss 3.9796 (4.0519)	Top-1 acc 30.469 (30.025)	Top-5 acc 52.344 (53.349)	lr 0.00472
Warmup Train [33][150/3239]	Time 0.303 (0.360)	Data 0.002 (0.126)	Loss 4.1878 (4.0495)	Top-1 acc 31.641 (30.047)	Top-5 acc 51.562 (53.451)	lr 0.00472
Warmup Train [33][160/3239]	Time 0.195 (0.352)	Data 0.001 (0.118)	Loss 3.8926 (4.0510)	Top-1 acc 33.203 (30.059)	Top-5 acc 56.250 (53.460)	lr 0.00472
Warmup Train [33][170/3239]	Time 0.198 (0.344)	Data 0.002 (0.111)	Loss 4.0047 (4.0501)	Top-1 acc 30.859 (30.062)	Top-5 acc 53.125 (53.502)	lr 0.00471
Warmup Train [33][180/3239]	Time 0.186 (0.338)	Data 0.001 (0.105)	Loss 3.9958 (4.0527)	Top-1 acc 30.859 (30.011)	Top-5 acc 57.812 (53.472)	lr 0.00471
Warmup Train [33][190/3239]	Time 0.258 (0.332)	Data 0.002 (0.100)	Loss 4.4219 (4.0512)	Top-1 acc 17.969 (30.041)	Top-5 acc 45.703 (53.487)	lr 0.00471
Warmup Train [33][200/3239]	Time 0.165 (0.326)	Data 0.001 (0.095)	Loss 4.0006 (4.0510)	Top-1 acc 32.031 (30.041)	Top-5 acc 53.906 (53.516)	lr 0.00470
Warmup Train [33][210/3239]	Time 0.159 (0.321)	Data 0.002 (0.091)	Loss 3.8560 (4.0510)	Top-1 acc 35.156 (30.052)	Top-5 acc 59.766 (53.530)	lr 0.00470
Warmup Train [33][220/3239]	Time 0.187 (0.318)	Data 0.001 (0.087)	Loss 4.2243 (4.0508)	Top-1 acc 29.297 (30.032)	Top-5 acc 48.047 (53.535)	lr 0.00470
Warmup Train [33][230/3239]	Time 0.212 (0.313)	Data 0.001 (0.083)	Loss 4.2311 (4.0510)	Top-1 acc 29.688 (30.085)	Top-5 acc 51.172 (53.546)	lr 0.00469
Warmup Train [33][240/3239]	Time 0.162 (0.309)	Data 0.001 (0.080)	Loss 3.8274 (4.0498)	Top-1 acc 34.375 (30.111)	Top-5 acc 61.328 (53.584)	lr 0.00469
Warmup Train [33][250/3239]	Time 0.286 (0.306)	Data 0.001 (0.077)	Loss 4.0640 (4.0485)	Top-1 acc 30.469 (30.182)	Top-5 acc 50.781 (53.620)	lr 0.00469
Warmup Train [33][260/3239]	Time 0.169 (0.303)	Data 0.001 (0.074)	Loss 4.0825 (4.0496)	Top-1 acc 29.688 (30.162)	Top-5 acc 48.438 (53.581)	lr 0.00468
Warmup Train [33][270/3239]	Time 0.207 (0.300)	Data 0.002 (0.072)	Loss 4.0092 (4.0490)	Top-1 acc 32.422 (30.149)	Top-5 acc 51.562 (53.562)	lr 0.00468
Warmup Train [33][280/3239]	Time 0.252 (0.297)	Data 0.001 (0.069)	Loss 4.0603 (4.0475)	Top-1 acc 32.031 (30.174)	Top-5 acc 49.219 (53.598)	lr 0.00468
Warmup Train [33][290/3239]	Time 0.237 (0.295)	Data 0.001 (0.067)	Loss 4.2053 (4.0496)	Top-1 acc 30.469 (30.156)	Top-5 acc 48.438 (53.556)	lr 0.00467
Warmup Train [33][300/3239]	Time 0.236 (0.292)	Data 0.001 (0.065)	Loss 4.0081 (4.0504)	Top-1 acc 30.078 (30.140)	Top-5 acc 56.641 (53.549)	lr 0.00467
Warmup Train [33][310/3239]	Time 0.259 (0.290)	Data 0.001 (0.063)	Loss 4.0510 (4.0503)	Top-1 acc 27.344 (30.143)	Top-5 acc 51.172 (53.523)	lr 0.00466
Warmup Train [33][320/3239]	Time 0.177 (0.288)	Data 0.002 (0.061)	Loss 4.1456 (4.0506)	Top-1 acc 26.172 (30.140)	Top-5 acc 51.562 (53.514)	lr 0.00466
Warmup Train [33][330/3239]	Time 0.222 (0.286)	Data 0.002 (0.059)	Loss 4.1735 (4.0507)	Top-1 acc 32.812 (30.149)	Top-5 acc 52.734 (53.499)	lr 0.00466
Warmup Train [33][340/3239]	Time 0.214 (0.284)	Data 0.001 (0.057)	Loss 3.9259 (4.0507)	Top-1 acc 33.594 (30.147)	Top-5 acc 60.547 (53.504)	lr 0.00465
Warmup Train [33][350/3239]	Time 0.285 (0.282)	Data 0.001 (0.056)	Loss 4.0114 (4.0481)	Top-1 acc 30.469 (30.201)	Top-5 acc 55.078 (53.580)	lr 0.00465
Warmup Train [33][360/3239]	Time 0.198 (0.281)	Data 0.001 (0.054)	Loss 4.0272 (4.0485)	Top-1 acc 30.859 (30.180)	Top-5 acc 53.516 (53.568)	lr 0.00465
Warmup Train [33][370/3239]	Time 0.173 (0.279)	Data 0.001 (0.053)	Loss 3.9557 (4.0482)	Top-1 acc 30.078 (30.173)	Top-5 acc 54.297 (53.594)	lr 0.00464
Warmup Train [33][380/3239]	Time 0.221 (0.278)	Data 0.001 (0.052)	Loss 3.8023 (4.0472)	Top-1 acc 37.500 (30.209)	Top-5 acc 58.203 (53.590)	lr 0.00464
Warmup Train [33][390/3239]	Time 0.232 (0.277)	Data 0.001 (0.050)	Loss 4.1564 (4.0470)	Top-1 acc 30.078 (30.200)	Top-5 acc 49.219 (53.575)	lr 0.00464
Warmup Train [33][400/3239]	Time 0.215 (0.275)	Data 0.001 (0.049)	Loss 4.0924 (4.0474)	Top-1 acc 29.297 (30.179)	Top-5 acc 51.953 (53.575)	lr 0.00463
Warmup Train [33][410/3239]	Time 0.153 (0.274)	Data 0.001 (0.048)	Loss 3.9658 (4.0475)	Top-1 acc 33.203 (30.169)	Top-5 acc 56.250 (53.598)	lr 0.00463
Warmup Train [33][420/3239]	Time 0.236 (0.273)	Data 0.001 (0.047)	Loss 4.2468 (4.0474)	Top-1 acc 28.906 (30.176)	Top-5 acc 50.781 (53.601)	lr 0.00463
Warmup Train [33][430/3239]	Time 0.282 (0.272)	Data 0.002 (0.046)	Loss 3.9380 (4.0456)	Top-1 acc 32.031 (30.181)	Top-5 acc 57.812 (53.654)	lr 0.00462
Warmup Train [33][440/3239]	Time 0.251 (0.271)	Data 0.001 (0.045)	Loss 4.1831 (4.0461)	Top-1 acc 25.000 (30.164)	Top-5 acc 50.391 (53.643)	lr 0.00462
Warmup Train [33][450/3239]	Time 0.271 (0.270)	Data 0.001 (0.044)	Loss 4.1799 (4.0466)	Top-1 acc 25.391 (30.148)	Top-5 acc 48.828 (53.617)	lr 0.00462
Warmup Train [33][460/3239]	Time 0.319 (0.269)	Data 0.001 (0.043)	Loss 4.0675 (4.0463)	Top-1 acc 27.344 (30.137)	Top-5 acc 55.859 (53.622)	lr 0.00461
Warmup Train [33][470/3239]	Time 0.210 (0.268)	Data 0.001 (0.042)	Loss 4.1043 (4.0447)	Top-1 acc 27.734 (30.143)	Top-5 acc 50.391 (53.680)	lr 0.00461
Warmup Train [33][480/3239]	Time 0.210 (0.267)	Data 0.001 (0.041)	Loss 4.0360 (4.0445)	Top-1 acc 33.594 (30.160)	Top-5 acc 54.297 (53.674)	lr 0.00460
Warmup Train [33][490/3239]	Time 0.219 (0.266)	Data 0.001 (0.041)	Loss 3.8586 (4.0437)	Top-1 acc 38.281 (30.171)	Top-5 acc 58.203 (53.680)	lr 0.00460
Warmup Train [33][500/3239]	Time 0.224 (0.265)	Data 0.002 (0.040)	Loss 4.2061 (4.0436)	Top-1 acc 26.953 (30.179)	Top-5 acc 53.516 (53.686)	lr 0.00460
Warmup Train [33][510/3239]	Time 0.228 (0.264)	Data 0.001 (0.039)	Loss 4.1223 (4.0434)	Top-1 acc 28.516 (30.198)	Top-5 acc 55.078 (53.724)	lr 0.00459
Warmup Train [33][520/3239]	Time 0.193 (0.263)	Data 0.001 (0.038)	Loss 4.2260 (4.0444)	Top-1 acc 30.078 (30.196)	Top-5 acc 49.219 (53.708)	lr 0.00459
Warmup Train [33][530/3239]	Time 0.233 (0.263)	Data 0.002 (0.038)	Loss 3.8359 (4.0439)	Top-1 acc 37.891 (30.204)	Top-5 acc 58.203 (53.713)	lr 0.00459
Warmup Train [33][540/3239]	Time 0.210 (0.262)	Data 0.003 (0.037)	Loss 4.0631 (4.0442)	Top-1 acc 30.469 (30.214)	Top-5 acc 52.734 (53.708)	lr 0.00458
Warmup Train [33][550/3239]	Time 0.211 (0.261)	Data 0.002 (0.037)	Loss 4.0688 (4.0457)	Top-1 acc 30.469 (30.189)	Top-5 acc 52.734 (53.692)	lr 0.00458
Warmup Train [33][560/3239]	Time 0.285 (0.261)	Data 0.001 (0.036)	Loss 3.9931 (4.0455)	Top-1 acc 26.953 (30.188)	Top-5 acc 55.078 (53.702)	lr 0.00458
Warmup Train [33][570/3239]	Time 0.196 (0.260)	Data 0.001 (0.035)	Loss 4.1853 (4.0463)	Top-1 acc 27.344 (30.174)	Top-5 acc 47.656 (53.683)	lr 0.00457
Warmup Train [33][580/3239]	Time 0.204 (0.259)	Data 0.001 (0.035)	Loss 4.1127 (4.0460)	Top-1 acc 28.906 (30.171)	Top-5 acc 52.344 (53.687)	lr 0.00457
Warmup Train [33][590/3239]	Time 0.256 (0.259)	Data 0.001 (0.034)	Loss 3.9331 (4.0466)	Top-1 acc 30.859 (30.173)	Top-5 acc 57.422 (53.669)	lr 0.00457
Warmup Train [33][600/3239]	Time 0.279 (0.258)	Data 0.001 (0.034)	Loss 3.9609 (4.0473)	Top-1 acc 31.250 (30.153)	Top-5 acc 54.688 (53.646)	lr 0.00456
Warmup Train [33][610/3239]	Time 0.174 (0.257)	Data 0.001 (0.033)	Loss 3.9072 (4.0468)	Top-1 acc 35.547 (30.173)	Top-5 acc 55.859 (53.648)	lr 0.00456
Warmup Train [33][620/3239]	Time 0.238 (0.257)	Data 0.001 (0.033)	Loss 4.0165 (4.0473)	Top-1 acc 33.203 (30.167)	Top-5 acc 59.375 (53.635)	lr 0.00456
Warmup Train [33][630/3239]	Time 0.194 (0.256)	Data 0.001 (0.032)	Loss 4.1368 (4.0469)	Top-1 acc 27.734 (30.172)	Top-5 acc 55.859 (53.656)	lr 0.00455
Warmup Train [33][640/3239]	Time 0.217 (0.256)	Data 0.001 (0.032)	Loss 3.9932 (4.0466)	Top-1 acc 33.984 (30.190)	Top-5 acc 53.125 (53.648)	lr 0.00455
Warmup Train [33][650/3239]	Time 0.241 (0.256)	Data 0.001 (0.031)	Loss 3.9701 (4.0458)	Top-1 acc 31.250 (30.211)	Top-5 acc 53.906 (53.661)	lr 0.00455
Warmup Train [33][660/3239]	Time 0.150 (0.255)	Data 0.003 (0.031)	Loss 4.1777 (4.0463)	Top-1 acc 25.391 (30.195)	Top-5 acc 48.047 (53.650)	lr 0.00454
Warmup Train [33][670/3239]	Time 0.170 (0.254)	Data 0.002 (0.031)	Loss 4.1451 (4.0468)	Top-1 acc 29.297 (30.204)	Top-5 acc 51.172 (53.633)	lr 0.00454
Warmup Train [33][680/3239]	Time 0.281 (0.254)	Data 0.001 (0.030)	Loss 4.0900 (4.0461)	Top-1 acc 32.031 (30.235)	Top-5 acc 51.562 (53.630)	lr 0.00454
Warmup Train [33][690/3239]	Time 0.161 (0.254)	Data 0.002 (0.030)	Loss 3.9161 (4.0454)	Top-1 acc 30.859 (30.249)	Top-5 acc 58.984 (53.646)	lr 0.00453
Warmup Train [33][700/3239]	Time 0.213 (0.253)	Data 0.002 (0.030)	Loss 3.8557 (4.0455)	Top-1 acc 30.859 (30.240)	Top-5 acc 56.641 (53.641)	lr 0.00453
Warmup Train [33][710/3239]	Time 0.249 (0.253)	Data 0.001 (0.029)	Loss 4.0092 (4.0458)	Top-1 acc 30.469 (30.219)	Top-5 acc 55.469 (53.639)	lr 0.00452
Warmup Train [33][720/3239]	Time 0.194 (0.252)	Data 0.001 (0.029)	Loss 4.0583 (4.0463)	Top-1 acc 29.297 (30.204)	Top-5 acc 54.688 (53.645)	lr 0.00452
Warmup Train [33][730/3239]	Time 0.172 (0.252)	Data 0.001 (0.028)	Loss 4.1509 (4.0467)	Top-1 acc 31.250 (30.202)	Top-5 acc 52.344 (53.645)	lr 0.00452
Warmup Train [33][740/3239]	Time 0.223 (0.252)	Data 0.001 (0.028)	Loss 3.9545 (4.0472)	Top-1 acc 32.031 (30.191)	Top-5 acc 58.203 (53.633)	lr 0.00451
Warmup Train [33][750/3239]	Time 0.147 (0.251)	Data 0.001 (0.028)	Loss 3.9129 (4.0466)	Top-1 acc 30.078 (30.201)	Top-5 acc 55.078 (53.651)	lr 0.00451
Warmup Train [33][760/3239]	Time 0.227 (0.251)	Data 0.001 (0.028)	Loss 4.0447 (4.0463)	Top-1 acc 31.250 (30.198)	Top-5 acc 55.859 (53.653)	lr 0.00451
Warmup Train [33][770/3239]	Time 0.190 (0.250)	Data 0.001 (0.027)	Loss 3.9133 (4.0458)	Top-1 acc 32.422 (30.196)	Top-5 acc 58.984 (53.659)	lr 0.00450
Warmup Train [33][780/3239]	Time 0.258 (0.250)	Data 0.003 (0.027)	Loss 4.0408 (4.0456)	Top-1 acc 29.688 (30.191)	Top-5 acc 55.859 (53.669)	lr 0.00450
Warmup Train [33][790/3239]	Time 0.261 (0.250)	Data 0.002 (0.027)	Loss 4.0617 (4.0458)	Top-1 acc 26.562 (30.171)	Top-5 acc 51.562 (53.659)	lr 0.00450
Warmup Train [33][800/3239]	Time 0.296 (0.250)	Data 0.002 (0.026)	Loss 4.1958 (4.0455)	Top-1 acc 27.344 (30.161)	Top-5 acc 50.781 (53.670)	lr 0.00449
Warmup Train [33][810/3239]	Time 0.201 (0.249)	Data 0.001 (0.026)	Loss 3.9115 (4.0464)	Top-1 acc 30.859 (30.143)	Top-5 acc 55.078 (53.652)	lr 0.00449
Warmup Train [33][820/3239]	Time 0.228 (0.249)	Data 0.001 (0.026)	Loss 4.1032 (4.0468)	Top-1 acc 28.125 (30.149)	Top-5 acc 52.734 (53.650)	lr 0.00449
Warmup Train [33][830/3239]	Time 0.175 (0.249)	Data 0.002 (0.025)	Loss 3.9813 (4.0473)	Top-1 acc 33.203 (30.130)	Top-5 acc 55.859 (53.635)	lr 0.00448
Warmup Train [33][840/3239]	Time 0.234 (0.248)	Data 0.002 (0.025)	Loss 4.0082 (4.0471)	Top-1 acc 30.078 (30.142)	Top-5 acc 55.078 (53.641)	lr 0.00448
Warmup Train [33][850/3239]	Time 0.188 (0.248)	Data 0.001 (0.025)	Loss 4.1040 (4.0475)	Top-1 acc 28.516 (30.127)	Top-5 acc 53.516 (53.632)	lr 0.00448
Warmup Train [33][860/3239]	Time 0.206 (0.248)	Data 0.001 (0.025)	Loss 3.8274 (4.0475)	Top-1 acc 35.938 (30.134)	Top-5 acc 61.328 (53.629)	lr 0.00447
Warmup Train [33][870/3239]	Time 0.167 (0.247)	Data 0.001 (0.024)	Loss 4.0300 (4.0473)	Top-1 acc 33.984 (30.154)	Top-5 acc 54.688 (53.634)	lr 0.00447
Warmup Train [33][880/3239]	Time 0.320 (0.247)	Data 0.001 (0.024)	Loss 4.0859 (4.0472)	Top-1 acc 31.641 (30.150)	Top-5 acc 51.562 (53.638)	lr 0.00447
Warmup Train [33][890/3239]	Time 0.181 (0.247)	Data 0.001 (0.024)	Loss 4.0112 (4.0471)	Top-1 acc 31.641 (30.160)	Top-5 acc 57.031 (53.646)	lr 0.00446
Warmup Train [33][900/3239]	Time 0.148 (0.247)	Data 0.001 (0.024)	Loss 3.7726 (4.0465)	Top-1 acc 36.328 (30.177)	Top-5 acc 60.156 (53.657)	lr 0.00446
Warmup Train [33][910/3239]	Time 0.179 (0.246)	Data 0.001 (0.024)	Loss 4.0123 (4.0469)	Top-1 acc 32.031 (30.172)	Top-5 acc 53.125 (53.642)	lr 0.00446
Warmup Train [33][920/3239]	Time 0.234 (0.246)	Data 0.001 (0.023)	Loss 4.0417 (4.0470)	Top-1 acc 33.203 (30.174)	Top-5 acc 55.469 (53.636)	lr 0.00445
Warmup Train [33][930/3239]	Time 0.193 (0.246)	Data 0.001 (0.023)	Loss 4.3326 (4.0475)	Top-1 acc 28.125 (30.181)	Top-5 acc 50.781 (53.630)	lr 0.00445
Warmup Train [33][940/3239]	Time 0.202 (0.245)	Data 0.001 (0.023)	Loss 4.2892 (4.0476)	Top-1 acc 26.172 (30.174)	Top-5 acc 45.703 (53.627)	lr 0.00445
Warmup Train [33][950/3239]	Time 0.211 (0.245)	Data 0.002 (0.023)	Loss 4.1218 (4.0473)	Top-1 acc 30.469 (30.174)	Top-5 acc 52.344 (53.625)	lr 0.00444
Warmup Train [33][960/3239]	Time 0.205 (0.245)	Data 0.001 (0.022)	Loss 3.9420 (4.0468)	Top-1 acc 34.766 (30.184)	Top-5 acc 56.641 (53.642)	lr 0.00444
Warmup Train [33][970/3239]	Time 0.227 (0.244)	Data 0.001 (0.022)	Loss 4.0602 (4.0467)	Top-1 acc 29.688 (30.181)	Top-5 acc 52.344 (53.644)	lr 0.00443
Warmup Train [33][980/3239]	Time 0.240 (0.244)	Data 0.002 (0.022)	Loss 3.9734 (4.0465)	Top-1 acc 32.812 (30.179)	Top-5 acc 54.688 (53.654)	lr 0.00443
Warmup Train [33][990/3239]	Time 0.215 (0.244)	Data 0.001 (0.022)	Loss 3.9498 (4.0464)	Top-1 acc 35.156 (30.184)	Top-5 acc 55.078 (53.653)	lr 0.00443
Warmup Train [33][1000/3239]	Time 0.200 (0.244)	Data 0.001 (0.022)	Loss 4.0388 (4.0465)	Top-1 acc 32.812 (30.185)	Top-5 acc 56.250 (53.648)	lr 0.00442
Warmup Train [33][1010/3239]	Time 0.216 (0.243)	Data 0.002 (0.021)	Loss 4.1140 (4.0463)	Top-1 acc 29.688 (30.187)	Top-5 acc 53.125 (53.654)	lr 0.00442
Warmup Train [33][1020/3239]	Time 0.207 (0.243)	Data 0.002 (0.021)	Loss 4.1408 (4.0464)	Top-1 acc 28.125 (30.188)	Top-5 acc 53.125 (53.661)	lr 0.00442
Warmup Train [33][1030/3239]	Time 0.259 (0.243)	Data 0.001 (0.021)	Loss 4.1784 (4.0468)	Top-1 acc 25.391 (30.171)	Top-5 acc 48.047 (53.650)	lr 0.00441
Warmup Train [33][1040/3239]	Time 0.173 (0.243)	Data 0.001 (0.021)	Loss 4.2704 (4.0471)	Top-1 acc 24.609 (30.173)	Top-5 acc 46.875 (53.642)	lr 0.00441
Warmup Train [33][1050/3239]	Time 0.216 (0.243)	Data 0.002 (0.021)	Loss 4.0146 (4.0473)	Top-1 acc 30.859 (30.163)	Top-5 acc 55.078 (53.639)	lr 0.00441
Warmup Train [33][1060/3239]	Time 0.196 (0.242)	Data 0.002 (0.021)	Loss 4.1425 (4.0474)	Top-1 acc 32.812 (30.159)	Top-5 acc 53.906 (53.637)	lr 0.00440
Warmup Train [33][1070/3239]	Time 0.262 (0.242)	Data 0.001 (0.020)	Loss 4.1462 (4.0467)	Top-1 acc 26.172 (30.163)	Top-5 acc 51.562 (53.648)	lr 0.00440
Warmup Train [33][1080/3239]	Time 0.260 (0.242)	Data 0.001 (0.020)	Loss 3.7847 (4.0461)	Top-1 acc 34.375 (30.166)	Top-5 acc 58.594 (53.663)	lr 0.00440
Warmup Train [33][1090/3239]	Time 0.205 (0.242)	Data 0.001 (0.020)	Loss 4.2651 (4.0468)	Top-1 acc 21.094 (30.145)	Top-5 acc 48.828 (53.650)	lr 0.00439
Warmup Train [33][1100/3239]	Time 0.212 (0.242)	Data 0.001 (0.020)	Loss 4.0458 (4.0464)	Top-1 acc 32.031 (30.153)	Top-5 acc 52.344 (53.653)	lr 0.00439
Warmup Train [33][1110/3239]	Time 0.258 (0.242)	Data 0.001 (0.020)	Loss 4.3627 (4.0466)	Top-1 acc 25.391 (30.147)	Top-5 acc 50.391 (53.637)	lr 0.00439
Warmup Train [33][1120/3239]	Time 0.187 (0.242)	Data 0.002 (0.020)	Loss 4.1210 (4.0465)	Top-1 acc 27.734 (30.145)	Top-5 acc 51.172 (53.643)	lr 0.00438
Warmup Train [33][1130/3239]	Time 0.261 (0.241)	Data 0.001 (0.020)	Loss 3.9118 (4.0464)	Top-1 acc 32.812 (30.142)	Top-5 acc 56.641 (53.645)	lr 0.00438
Warmup Train [33][1140/3239]	Time 0.210 (0.241)	Data 0.001 (0.019)	Loss 4.1671 (4.0461)	Top-1 acc 27.344 (30.158)	Top-5 acc 50.000 (53.656)	lr 0.00438
Warmup Train [33][1150/3239]	Time 0.216 (0.241)	Data 0.001 (0.019)	Loss 4.0756 (4.0460)	Top-1 acc 31.250 (30.162)	Top-5 acc 56.641 (53.662)	lr 0.00437
Warmup Train [33][1160/3239]	Time 0.242 (0.241)	Data 0.001 (0.019)	Loss 4.0778 (4.0463)	Top-1 acc 28.125 (30.166)	Top-5 acc 49.219 (53.647)	lr 0.00437
Warmup Train [33][1170/3239]	Time 0.240 (0.241)	Data 0.002 (0.019)	Loss 3.9413 (4.0464)	Top-1 acc 38.281 (30.172)	Top-5 acc 55.859 (53.649)	lr 0.00437
Warmup Train [33][1180/3239]	Time 0.202 (0.241)	Data 0.001 (0.019)	Loss 4.0105 (4.0463)	Top-1 acc 33.203 (30.177)	Top-5 acc 57.031 (53.660)	lr 0.00436
Warmup Train [33][1190/3239]	Time 0.258 (0.240)	Data 0.002 (0.019)	Loss 4.0635 (4.0462)	Top-1 acc 28.906 (30.179)	Top-5 acc 55.078 (53.671)	lr 0.00436
Warmup Train [33][1200/3239]	Time 0.198 (0.240)	Data 0.001 (0.019)	Loss 4.1145 (4.0466)	Top-1 acc 24.609 (30.167)	Top-5 acc 54.688 (53.655)	lr 0.00436
Warmup Train [33][1210/3239]	Time 0.199 (0.240)	Data 0.001 (0.018)	Loss 4.1514 (4.0468)	Top-1 acc 30.078 (30.173)	Top-5 acc 50.000 (53.650)	lr 0.00435
Warmup Train [33][1220/3239]	Time 0.164 (0.240)	Data 0.001 (0.018)	Loss 4.1928 (4.0469)	Top-1 acc 25.391 (30.171)	Top-5 acc 48.047 (53.645)	lr 0.00435
Warmup Train [33][1230/3239]	Time 0.219 (0.240)	Data 0.002 (0.018)	Loss 4.1289 (4.0466)	Top-1 acc 26.953 (30.174)	Top-5 acc 51.562 (53.647)	lr 0.00435
Warmup Train [33][1240/3239]	Time 0.137 (0.240)	Data 0.001 (0.018)	Loss 3.9955 (4.0460)	Top-1 acc 30.469 (30.185)	Top-5 acc 55.078 (53.661)	lr 0.00434
Warmup Train [33][1250/3239]	Time 0.209 (0.239)	Data 0.002 (0.018)	Loss 4.1008 (4.0461)	Top-1 acc 29.297 (30.175)	Top-5 acc 53.906 (53.659)	lr 0.00434
Warmup Train [33][1260/3239]	Time 0.127 (0.239)	Data 0.002 (0.018)	Loss 3.9825 (4.0462)	Top-1 acc 35.547 (30.179)	Top-5 acc 55.078 (53.659)	lr 0.00434
Warmup Train [33][1270/3239]	Time 0.246 (0.239)	Data 0.001 (0.018)	Loss 4.1314 (4.0459)	Top-1 acc 31.641 (30.193)	Top-5 acc 54.297 (53.667)	lr 0.00433
Warmup Train [33][1280/3239]	Time 0.297 (0.239)	Data 0.002 (0.018)	Loss 4.1672 (4.0464)	Top-1 acc 26.953 (30.187)	Top-5 acc 50.391 (53.660)	lr 0.00433
Warmup Train [33][1290/3239]	Time 0.222 (0.239)	Data 0.001 (0.017)	Loss 3.9985 (4.0467)	Top-1 acc 32.812 (30.188)	Top-5 acc 52.734 (53.657)	lr 0.00433
Warmup Train [33][1300/3239]	Time 0.235 (0.239)	Data 0.001 (0.017)	Loss 3.8612 (4.0471)	Top-1 acc 32.812 (30.181)	Top-5 acc 58.203 (53.650)	lr 0.00432
Warmup Train [33][1310/3239]	Time 0.181 (0.239)	Data 0.002 (0.017)	Loss 4.2706 (4.0474)	Top-1 acc 22.656 (30.174)	Top-5 acc 48.047 (53.645)	lr 0.00432
Warmup Train [33][1320/3239]	Time 0.230 (0.239)	Data 0.002 (0.017)	Loss 3.8906 (4.0473)	Top-1 acc 31.641 (30.175)	Top-5 acc 55.078 (53.642)	lr 0.00431
Warmup Train [33][1330/3239]	Time 0.236 (0.238)	Data 0.001 (0.017)	Loss 3.7831 (4.0472)	Top-1 acc 33.203 (30.174)	Top-5 acc 59.375 (53.643)	lr 0.00431
Warmup Train [33][1340/3239]	Time 0.250 (0.238)	Data 0.001 (0.017)	Loss 4.2216 (4.0471)	Top-1 acc 26.172 (30.173)	Top-5 acc 48.047 (53.651)	lr 0.00431
Warmup Train [33][1350/3239]	Time 0.205 (0.238)	Data 0.001 (0.017)	Loss 4.2357 (4.0469)	Top-1 acc 26.953 (30.185)	Top-5 acc 47.656 (53.658)	lr 0.00430
Warmup Train [33][1360/3239]	Time 0.208 (0.238)	Data 0.001 (0.017)	Loss 3.9858 (4.0471)	Top-1 acc 32.031 (30.176)	Top-5 acc 52.734 (53.656)	lr 0.00430
Warmup Train [33][1370/3239]	Time 0.197 (0.238)	Data 0.001 (0.017)	Loss 4.0750 (4.0464)	Top-1 acc 32.812 (30.197)	Top-5 acc 54.297 (53.665)	lr 0.00430
Warmup Train [33][1380/3239]	Time 0.166 (0.238)	Data 0.001 (0.016)	Loss 4.0101 (4.0467)	Top-1 acc 28.906 (30.196)	Top-5 acc 53.906 (53.658)	lr 0.00429
Warmup Train [33][1390/3239]	Time 0.176 (0.238)	Data 0.001 (0.016)	Loss 4.0152 (4.0469)	Top-1 acc 30.469 (30.194)	Top-5 acc 54.688 (53.650)	lr 0.00429
Warmup Train [33][1400/3239]	Time 0.298 (0.238)	Data 0.001 (0.016)	Loss 4.2094 (4.0469)	Top-1 acc 26.953 (30.191)	Top-5 acc 49.609 (53.646)	lr 0.00429
Warmup Train [33][1410/3239]	Time 0.260 (0.237)	Data 0.001 (0.016)	Loss 4.0159 (4.0468)	Top-1 acc 28.125 (30.192)	Top-5 acc 55.078 (53.644)	lr 0.00428
Warmup Train [33][1420/3239]	Time 0.204 (0.237)	Data 0.001 (0.016)	Loss 3.8375 (4.0471)	Top-1 acc 34.766 (30.199)	Top-5 acc 56.250 (53.636)	lr 0.00428
Warmup Train [33][1430/3239]	Time 0.232 (0.237)	Data 0.001 (0.016)	Loss 4.0537 (4.0469)	Top-1 acc 29.297 (30.201)	Top-5 acc 51.172 (53.632)	lr 0.00428
Warmup Train [33][1440/3239]	Time 0.241 (0.237)	Data 0.001 (0.016)	Loss 4.0784 (4.0470)	Top-1 acc 30.469 (30.202)	Top-5 acc 53.125 (53.628)	lr 0.00427
Warmup Train [33][1450/3239]	Time 0.264 (0.237)	Data 0.001 (0.016)	Loss 3.9477 (4.0471)	Top-1 acc 33.594 (30.208)	Top-5 acc 54.297 (53.626)	lr 0.00427
Warmup Train [33][1460/3239]	Time 0.211 (0.237)	Data 0.001 (0.016)	Loss 4.0351 (4.0470)	Top-1 acc 30.078 (30.211)	Top-5 acc 54.688 (53.626)	lr 0.00427
Warmup Train [33][1470/3239]	Time 0.202 (0.237)	Data 0.001 (0.016)	Loss 4.1543 (4.0472)	Top-1 acc 32.422 (30.212)	Top-5 acc 51.172 (53.622)	lr 0.00426
Warmup Train [33][1480/3239]	Time 0.229 (0.237)	Data 0.001 (0.016)	Loss 3.9906 (4.0473)	Top-1 acc 31.641 (30.210)	Top-5 acc 53.516 (53.615)	lr 0.00426
Warmup Train [33][1490/3239]	Time 0.236 (0.237)	Data 0.001 (0.015)	Loss 4.0090 (4.0473)	Top-1 acc 30.078 (30.201)	Top-5 acc 53.906 (53.614)	lr 0.00426
Warmup Train [33][1500/3239]	Time 0.199 (0.236)	Data 0.001 (0.015)	Loss 4.1092 (4.0474)	Top-1 acc 27.734 (30.203)	Top-5 acc 50.391 (53.608)	lr 0.00425
Warmup Train [33][1510/3239]	Time 0.324 (0.236)	Data 0.001 (0.015)	Loss 4.2291 (4.0474)	Top-1 acc 27.734 (30.206)	Top-5 acc 50.391 (53.608)	lr 0.00425
Warmup Train [33][1520/3239]	Time 0.210 (0.236)	Data 0.001 (0.015)	Loss 4.2124 (4.0473)	Top-1 acc 29.297 (30.209)	Top-5 acc 51.172 (53.610)	lr 0.00425
Warmup Train [33][1530/3239]	Time 0.227 (0.236)	Data 0.032 (0.015)	Loss 3.9925 (4.0472)	Top-1 acc 31.250 (30.205)	Top-5 acc 54.688 (53.608)	lr 0.00424
Warmup Train [33][1540/3239]	Time 0.214 (0.236)	Data 0.001 (0.015)	Loss 3.8916 (4.0469)	Top-1 acc 31.250 (30.202)	Top-5 acc 61.328 (53.614)	lr 0.00424
Warmup Train [33][1550/3239]	Time 0.201 (0.236)	Data 0.002 (0.015)	Loss 3.8956 (4.0470)	Top-1 acc 32.812 (30.198)	Top-5 acc 58.594 (53.615)	lr 0.00424
Warmup Train [33][1560/3239]	Time 0.208 (0.236)	Data 0.001 (0.015)	Loss 4.3160 (4.0472)	Top-1 acc 27.344 (30.200)	Top-5 acc 47.656 (53.613)	lr 0.00423
Warmup Train [33][1570/3239]	Time 0.174 (0.236)	Data 0.001 (0.015)	Loss 4.0730 (4.0472)	Top-1 acc 28.125 (30.198)	Top-5 acc 54.297 (53.624)	lr 0.00423
Warmup Train [33][1580/3239]	Time 0.242 (0.236)	Data 0.001 (0.015)	Loss 4.0968 (4.0473)	Top-1 acc 29.297 (30.202)	Top-5 acc 54.297 (53.630)	lr 0.00423
Warmup Train [33][1590/3239]	Time 0.249 (0.236)	Data 0.001 (0.015)	Loss 4.0219 (4.0473)	Top-1 acc 30.469 (30.196)	Top-5 acc 52.344 (53.626)	lr 0.00422
Warmup Train [33][1600/3239]	Time 0.214 (0.236)	Data 0.001 (0.015)	Loss 3.9224 (4.0472)	Top-1 acc 33.203 (30.195)	Top-5 acc 59.766 (53.631)	lr 0.00422
Warmup Train [33][1610/3239]	Time 0.196 (0.235)	Data 0.002 (0.015)	Loss 3.9321 (4.0471)	Top-1 acc 32.812 (30.194)	Top-5 acc 54.688 (53.629)	lr 0.00422
Warmup Train [33][1620/3239]	Time 0.352 (0.235)	Data 0.001 (0.014)	Loss 4.0563 (4.0472)	Top-1 acc 30.859 (30.195)	Top-5 acc 55.469 (53.629)	lr 0.00421
Warmup Train [33][1630/3239]	Time 0.263 (0.235)	Data 0.002 (0.014)	Loss 4.0583 (4.0472)	Top-1 acc 32.031 (30.204)	Top-5 acc 52.734 (53.628)	lr 0.00421
Warmup Train [33][1640/3239]	Time 0.204 (0.235)	Data 0.001 (0.014)	Loss 4.0557 (4.0468)	Top-1 acc 28.516 (30.212)	Top-5 acc 52.734 (53.640)	lr 0.00421
Warmup Train [33][1650/3239]	Time 0.219 (0.235)	Data 0.001 (0.014)	Loss 3.9673 (4.0469)	Top-1 acc 32.031 (30.208)	Top-5 acc 56.250 (53.642)	lr 0.00420
Warmup Train [33][1660/3239]	Time 0.181 (0.235)	Data 0.001 (0.014)	Loss 4.1164 (4.0469)	Top-1 acc 27.344 (30.209)	Top-5 acc 52.734 (53.651)	lr 0.00420
Warmup Train [33][1670/3239]	Time 0.158 (0.235)	Data 0.002 (0.014)	Loss 3.9547 (4.0469)	Top-1 acc 32.031 (30.215)	Top-5 acc 54.688 (53.648)	lr 0.00420
Warmup Train [33][1680/3239]	Time 0.191 (0.235)	Data 0.001 (0.014)	Loss 4.2155 (4.0470)	Top-1 acc 25.391 (30.212)	Top-5 acc 51.562 (53.641)	lr 0.00419
Warmup Train [33][1690/3239]	Time 0.231 (0.235)	Data 0.002 (0.014)	Loss 4.0806 (4.0470)	Top-1 acc 31.250 (30.209)	Top-5 acc 53.125 (53.644)	lr 0.00419
Warmup Train [33][1700/3239]	Time 0.233 (0.235)	Data 0.001 (0.014)	Loss 4.0191 (4.0470)	Top-1 acc 28.125 (30.204)	Top-5 acc 54.688 (53.644)	lr 0.00419
Warmup Train [33][1710/3239]	Time 0.216 (0.235)	Data 0.001 (0.014)	Loss 4.0829 (4.0472)	Top-1 acc 27.344 (30.202)	Top-5 acc 50.781 (53.635)	lr 0.00418
Warmup Train [33][1720/3239]	Time 0.334 (0.235)	Data 0.001 (0.014)	Loss 4.0178 (4.0470)	Top-1 acc 28.906 (30.207)	Top-5 acc 55.078 (53.638)	lr 0.00418
Warmup Train [33][1730/3239]	Time 0.166 (0.234)	Data 0.001 (0.014)	Loss 4.1193 (4.0468)	Top-1 acc 30.859 (30.212)	Top-5 acc 53.516 (53.642)	lr 0.00418
Warmup Train [33][1740/3239]	Time 0.203 (0.234)	Data 0.001 (0.014)	Loss 4.0825 (4.0466)	Top-1 acc 24.609 (30.211)	Top-5 acc 50.781 (53.649)	lr 0.00417
Warmup Train [33][1750/3239]	Time 0.232 (0.234)	Data 0.001 (0.014)	Loss 4.1277 (4.0466)	Top-1 acc 28.906 (30.209)	Top-5 acc 50.391 (53.646)	lr 0.00417
Warmup Train [33][1760/3239]	Time 0.203 (0.234)	Data 0.001 (0.013)	Loss 4.1762 (4.0467)	Top-1 acc 28.906 (30.209)	Top-5 acc 52.734 (53.648)	lr 0.00417
Warmup Train [33][1770/3239]	Time 0.159 (0.234)	Data 0.001 (0.013)	Loss 4.0729 (4.0467)	Top-1 acc 30.078 (30.216)	Top-5 acc 51.953 (53.648)	lr 0.00416
Warmup Train [33][1780/3239]	Time 0.182 (0.234)	Data 0.001 (0.013)	Loss 3.9825 (4.0468)	Top-1 acc 36.328 (30.215)	Top-5 acc 56.641 (53.644)	lr 0.00416
Warmup Train [33][1790/3239]	Time 0.158 (0.234)	Data 0.001 (0.013)	Loss 4.2236 (4.0464)	Top-1 acc 23.828 (30.219)	Top-5 acc 52.344 (53.653)	lr 0.00416
Warmup Train [33][1800/3239]	Time 0.172 (0.234)	Data 0.001 (0.013)	Loss 4.0450 (4.0467)	Top-1 acc 33.203 (30.213)	Top-5 acc 52.734 (53.640)	lr 0.00415
Warmup Train [33][1810/3239]	Time 0.184 (0.234)	Data 0.002 (0.013)	Loss 4.0856 (4.0467)	Top-1 acc 30.078 (30.209)	Top-5 acc 52.734 (53.637)	lr 0.00415
Warmup Train [33][1820/3239]	Time 0.210 (0.234)	Data 0.001 (0.013)	Loss 3.9703 (4.0471)	Top-1 acc 30.078 (30.199)	Top-5 acc 55.859 (53.628)	lr 0.00415
Warmup Train [33][1830/3239]	Time 0.366 (0.234)	Data 0.002 (0.013)	Loss 4.2675 (4.0475)	Top-1 acc 26.953 (30.189)	Top-5 acc 50.781 (53.620)	lr 0.00414
Warmup Train [33][1840/3239]	Time 0.212 (0.234)	Data 0.001 (0.013)	Loss 3.9702 (4.0477)	Top-1 acc 29.297 (30.180)	Top-5 acc 54.297 (53.609)	lr 0.00414
Warmup Train [33][1850/3239]	Time 0.230 (0.234)	Data 0.001 (0.013)	Loss 4.0074 (4.0474)	Top-1 acc 30.859 (30.183)	Top-5 acc 53.516 (53.611)	lr 0.00414
Warmup Train [33][1860/3239]	Time 0.223 (0.234)	Data 0.001 (0.013)	Loss 4.0411 (4.0472)	Top-1 acc 32.422 (30.189)	Top-5 acc 56.641 (53.615)	lr 0.00413
Warmup Train [33][1870/3239]	Time 0.171 (0.233)	Data 0.002 (0.013)	Loss 4.0176 (4.0475)	Top-1 acc 33.984 (30.188)	Top-5 acc 52.344 (53.614)	lr 0.00413
Warmup Train [33][1880/3239]	Time 0.214 (0.233)	Data 0.003 (0.013)	Loss 3.8516 (4.0472)	Top-1 acc 35.547 (30.190)	Top-5 acc 56.641 (53.617)	lr 0.00413
Warmup Train [33][1890/3239]	Time 0.205 (0.233)	Data 0.001 (0.013)	Loss 4.0259 (4.0469)	Top-1 acc 32.031 (30.193)	Top-5 acc 53.906 (53.628)	lr 0.00412
Warmup Train [33][1900/3239]	Time 0.271 (0.233)	Data 0.001 (0.013)	Loss 3.9438 (4.0468)	Top-1 acc 29.688 (30.190)	Top-5 acc 57.422 (53.631)	lr 0.00412
Warmup Train [33][1910/3239]	Time 0.173 (0.233)	Data 0.001 (0.013)	Loss 4.2143 (4.0469)	Top-1 acc 28.125 (30.194)	Top-5 acc 50.781 (53.629)	lr 0.00412
Warmup Train [33][1920/3239]	Time 0.205 (0.233)	Data 0.001 (0.013)	Loss 4.0669 (4.0467)	Top-1 acc 28.516 (30.197)	Top-5 acc 52.344 (53.635)	lr 0.00411
Warmup Train [33][1930/3239]	Time 0.234 (0.233)	Data 0.001 (0.013)	Loss 3.9054 (4.0464)	Top-1 acc 34.375 (30.208)	Top-5 acc 54.688 (53.640)	lr 0.00411
Warmup Train [33][1940/3239]	Time 0.409 (0.233)	Data 0.002 (0.013)	Loss 4.0622 (4.0466)	Top-1 acc 25.000 (30.198)	Top-5 acc 53.125 (53.636)	lr 0.00411
Warmup Train [33][1950/3239]	Time 0.221 (0.233)	Data 0.001 (0.012)	Loss 3.9268 (4.0463)	Top-1 acc 28.906 (30.201)	Top-5 acc 54.688 (53.639)	lr 0.00410
Warmup Train [33][1960/3239]	Time 0.196 (0.233)	Data 0.001 (0.012)	Loss 3.9832 (4.0464)	Top-1 acc 37.500 (30.204)	Top-5 acc 55.078 (53.639)	lr 0.00410
Warmup Train [33][1970/3239]	Time 0.226 (0.233)	Data 0.002 (0.012)	Loss 4.0305 (4.0462)	Top-1 acc 32.812 (30.207)	Top-5 acc 55.859 (53.643)	lr 0.00410
Warmup Train [33][1980/3239]	Time 0.208 (0.233)	Data 0.002 (0.012)	Loss 4.0810 (4.0463)	Top-1 acc 29.688 (30.200)	Top-5 acc 53.516 (53.638)	lr 0.00409
Warmup Train [33][1990/3239]	Time 0.239 (0.233)	Data 0.001 (0.012)	Loss 3.9335 (4.0462)	Top-1 acc 30.859 (30.198)	Top-5 acc 53.125 (53.635)	lr 0.00409
Warmup Train [33][2000/3239]	Time 0.161 (0.233)	Data 0.001 (0.012)	Loss 4.0623 (4.0461)	Top-1 acc 28.125 (30.193)	Top-5 acc 53.906 (53.639)	lr 0.00409
Warmup Train [33][2010/3239]	Time 0.258 (0.233)	Data 0.001 (0.012)	Loss 4.1088 (4.0464)	Top-1 acc 28.906 (30.189)	Top-5 acc 55.469 (53.636)	lr 0.00408
Warmup Train [33][2020/3239]	Time 0.225 (0.233)	Data 0.001 (0.012)	Loss 4.0456 (4.0467)	Top-1 acc 28.906 (30.186)	Top-5 acc 52.734 (53.628)	lr 0.00408
Warmup Train [33][2030/3239]	Time 0.222 (0.233)	Data 0.001 (0.012)	Loss 3.9860 (4.0468)	Top-1 acc 28.516 (30.185)	Top-5 acc 53.906 (53.625)	lr 0.00408
Warmup Train [33][2040/3239]	Time 0.255 (0.233)	Data 0.002 (0.012)	Loss 3.8845 (4.0470)	Top-1 acc 31.641 (30.185)	Top-5 acc 56.250 (53.622)	lr 0.00407
Warmup Train [33][2050/3239]	Time 0.227 (0.233)	Data 0.001 (0.012)	Loss 3.9258 (4.0470)	Top-1 acc 33.203 (30.190)	Top-5 acc 55.078 (53.621)	lr 0.00407
Warmup Train [33][2060/3239]	Time 0.203 (0.233)	Data 0.001 (0.012)	Loss 3.9948 (4.0470)	Top-1 acc 30.078 (30.187)	Top-5 acc 54.297 (53.620)	lr 0.00407
Warmup Train [33][2070/3239]	Time 0.190 (0.232)	Data 0.001 (0.012)	Loss 3.9850 (4.0471)	Top-1 acc 30.469 (30.188)	Top-5 acc 53.516 (53.618)	lr 0.00406
Warmup Train [33][2080/3239]	Time 0.156 (0.232)	Data 0.001 (0.012)	Loss 3.8540 (4.0471)	Top-1 acc 31.641 (30.184)	Top-5 acc 57.422 (53.611)	lr 0.00406
Warmup Train [33][2090/3239]	Time 0.220 (0.232)	Data 0.001 (0.012)	Loss 3.8543 (4.0468)	Top-1 acc 34.375 (30.189)	Top-5 acc 59.375 (53.620)	lr 0.00406
Warmup Train [33][2100/3239]	Time 0.196 (0.232)	Data 0.002 (0.012)	Loss 4.0281 (4.0465)	Top-1 acc 29.688 (30.188)	Top-5 acc 51.562 (53.628)	lr 0.00405
Warmup Train [33][2110/3239]	Time 0.162 (0.232)	Data 0.002 (0.012)	Loss 4.0520 (4.0464)	Top-1 acc 31.250 (30.187)	Top-5 acc 54.688 (53.630)	lr 0.00405
Warmup Train [33][2120/3239]	Time 0.156 (0.232)	Data 0.001 (0.012)	Loss 3.9562 (4.0463)	Top-1 acc 33.984 (30.189)	Top-5 acc 57.422 (53.637)	lr 0.00405
Warmup Train [33][2130/3239]	Time 0.182 (0.232)	Data 0.001 (0.012)	Loss 3.9038 (4.0461)	Top-1 acc 35.156 (30.191)	Top-5 acc 54.688 (53.644)	lr 0.00404
Warmup Train [33][2140/3239]	Time 0.165 (0.232)	Data 0.001 (0.012)	Loss 4.0134 (4.0463)	Top-1 acc 31.641 (30.187)	Top-5 acc 51.562 (53.641)	lr 0.00404
Warmup Train [33][2150/3239]	Time 0.363 (0.232)	Data 0.001 (0.012)	Loss 3.8697 (4.0462)	Top-1 acc 31.641 (30.186)	Top-5 acc 57.812 (53.637)	lr 0.00404
Warmup Train [33][2160/3239]	Time 0.331 (0.232)	Data 0.001 (0.012)	Loss 4.0587 (4.0462)	Top-1 acc 28.125 (30.189)	Top-5 acc 55.078 (53.640)	lr 0.00403
Warmup Train [33][2170/3239]	Time 0.252 (0.232)	Data 0.001 (0.012)	Loss 3.9960 (4.0462)	Top-1 acc 30.469 (30.186)	Top-5 acc 54.688 (53.640)	lr 0.00403
Warmup Train [33][2180/3239]	Time 0.185 (0.232)	Data 0.001 (0.012)	Loss 4.1031 (4.0464)	Top-1 acc 28.906 (30.179)	Top-5 acc 51.953 (53.632)	lr 0.00403
Warmup Train [33][2190/3239]	Time 0.208 (0.232)	Data 0.002 (0.011)	Loss 4.0943 (4.0466)	Top-1 acc 28.906 (30.182)	Top-5 acc 55.469 (53.632)	lr 0.00402
Warmup Train [33][2200/3239]	Time 0.266 (0.232)	Data 0.001 (0.011)	Loss 4.1083 (4.0467)	Top-1 acc 32.812 (30.181)	Top-5 acc 52.734 (53.624)	lr 0.00402
Warmup Train [33][2210/3239]	Time 0.192 (0.232)	Data 0.002 (0.011)	Loss 3.9916 (4.0469)	Top-1 acc 32.031 (30.182)	Top-5 acc 55.469 (53.628)	lr 0.00402
Warmup Train [33][2220/3239]	Time 0.301 (0.232)	Data 0.001 (0.011)	Loss 4.2622 (4.0468)	Top-1 acc 23.438 (30.182)	Top-5 acc 48.828 (53.626)	lr 0.00401
Warmup Train [33][2230/3239]	Time 0.213 (0.232)	Data 0.001 (0.011)	Loss 4.1612 (4.0469)	Top-1 acc 26.953 (30.181)	Top-5 acc 49.219 (53.620)	lr 0.00401
Warmup Train [33][2240/3239]	Time 0.215 (0.232)	Data 0.001 (0.011)	Loss 4.1310 (4.0469)	Top-1 acc 27.734 (30.179)	Top-5 acc 51.953 (53.623)	lr 0.00401
Warmup Train [33][2250/3239]	Time 0.194 (0.232)	Data 0.001 (0.011)	Loss 4.1089 (4.0470)	Top-1 acc 30.469 (30.173)	Top-5 acc 52.734 (53.621)	lr 0.00400
Warmup Train [33][2260/3239]	Time 0.198 (0.232)	Data 0.001 (0.011)	Loss 4.1576 (4.0470)	Top-1 acc 33.594 (30.181)	Top-5 acc 52.344 (53.622)	lr 0.00400
Warmup Train [33][2270/3239]	Time 0.264 (0.232)	Data 0.001 (0.011)	Loss 4.1277 (4.0469)	Top-1 acc 31.641 (30.184)	Top-5 acc 50.000 (53.624)	lr 0.00400
Warmup Train [33][2280/3239]	Time 0.195 (0.232)	Data 0.001 (0.011)	Loss 4.1946 (4.0471)	Top-1 acc 28.516 (30.181)	Top-5 acc 49.219 (53.615)	lr 0.00399
Warmup Train [33][2290/3239]	Time 0.230 (0.232)	Data 0.001 (0.011)	Loss 3.9890 (4.0470)	Top-1 acc 31.250 (30.186)	Top-5 acc 53.906 (53.615)	lr 0.00399
Warmup Train [33][2300/3239]	Time 0.226 (0.232)	Data 0.001 (0.011)	Loss 4.0370 (4.0469)	Top-1 acc 32.031 (30.192)	Top-5 acc 54.688 (53.619)	lr 0.00399
Warmup Train [33][2310/3239]	Time 0.218 (0.232)	Data 0.001 (0.011)	Loss 3.9210 (4.0471)	Top-1 acc 29.297 (30.190)	Top-5 acc 61.328 (53.614)	lr 0.00398
Warmup Train [33][2320/3239]	Time 0.251 (0.232)	Data 0.002 (0.011)	Loss 3.9466 (4.0473)	Top-1 acc 34.375 (30.187)	Top-5 acc 54.297 (53.605)	lr 0.00398
Warmup Train [33][2330/3239]	Time 0.289 (0.232)	Data 0.001 (0.011)	Loss 4.0454 (4.0472)	Top-1 acc 28.516 (30.188)	Top-5 acc 51.172 (53.603)	lr 0.00398
Warmup Train [33][2340/3239]	Time 0.249 (0.232)	Data 0.002 (0.011)	Loss 4.0585 (4.0471)	Top-1 acc 27.734 (30.189)	Top-5 acc 55.078 (53.612)	lr 0.00397
Warmup Train [33][2350/3239]	Time 0.342 (0.232)	Data 0.001 (0.011)	Loss 4.1426 (4.0470)	Top-1 acc 30.859 (30.195)	Top-5 acc 51.172 (53.616)	lr 0.00397
Warmup Train [33][2360/3239]	Time 0.306 (0.232)	Data 0.001 (0.011)	Loss 4.0106 (4.0469)	Top-1 acc 33.594 (30.195)	Top-5 acc 53.516 (53.615)	lr 0.00397
Warmup Train [33][2370/3239]	Time 0.246 (0.232)	Data 0.001 (0.011)	Loss 3.8944 (4.0470)	Top-1 acc 34.375 (30.198)	Top-5 acc 55.859 (53.615)	lr 0.00396
Warmup Train [33][2380/3239]	Time 0.201 (0.232)	Data 0.001 (0.011)	Loss 4.0603 (4.0469)	Top-1 acc 33.984 (30.204)	Top-5 acc 54.688 (53.616)	lr 0.00396
Warmup Train [33][2390/3239]	Time 0.250 (0.232)	Data 0.002 (0.011)	Loss 3.9028 (4.0468)	Top-1 acc 31.250 (30.205)	Top-5 acc 53.516 (53.615)	lr 0.00396
Warmup Train [33][2400/3239]	Time 0.148 (0.232)	Data 0.001 (0.011)	Loss 4.1483 (4.0470)	Top-1 acc 28.906 (30.201)	Top-5 acc 50.000 (53.608)	lr 0.00395
Warmup Train [33][2410/3239]	Time 0.268 (0.232)	Data 0.002 (0.011)	Loss 4.0278 (4.0468)	Top-1 acc 30.078 (30.204)	Top-5 acc 50.781 (53.612)	lr 0.00395
Warmup Train [33][2420/3239]	Time 0.276 (0.232)	Data 0.002 (0.011)	Loss 3.8810 (4.0468)	Top-1 acc 32.031 (30.204)	Top-5 acc 58.984 (53.611)	lr 0.00395
Warmup Train [33][2430/3239]	Time 0.223 (0.232)	Data 0.003 (0.011)	Loss 4.1191 (4.0468)	Top-1 acc 30.078 (30.202)	Top-5 acc 50.000 (53.612)	lr 0.00394
Warmup Train [33][2440/3239]	Time 0.208 (0.232)	Data 0.001 (0.011)	Loss 3.9407 (4.0465)	Top-1 acc 30.859 (30.209)	Top-5 acc 55.469 (53.619)	lr 0.00394
Warmup Train [33][2450/3239]	Time 0.328 (0.232)	Data 0.001 (0.011)	Loss 3.8449 (4.0465)	Top-1 acc 31.641 (30.209)	Top-5 acc 58.984 (53.624)	lr 0.00394
Warmup Train [33][2460/3239]	Time 0.165 (0.232)	Data 0.001 (0.011)	Loss 4.0771 (4.0465)	Top-1 acc 26.172 (30.210)	Top-5 acc 51.562 (53.622)	lr 0.00393
Warmup Train [33][2470/3239]	Time 0.270 (0.232)	Data 0.001 (0.010)	Loss 4.3067 (4.0466)	Top-1 acc 24.609 (30.210)	Top-5 acc 47.656 (53.621)	lr 0.00393
Warmup Train [33][2480/3239]	Time 0.236 (0.232)	Data 0.002 (0.010)	Loss 3.9811 (4.0465)	Top-1 acc 33.594 (30.218)	Top-5 acc 56.641 (53.621)	lr 0.00393
Warmup Train [33][2490/3239]	Time 0.208 (0.232)	Data 0.002 (0.010)	Loss 4.2683 (4.0466)	Top-1 acc 26.172 (30.217)	Top-5 acc 53.125 (53.622)	lr 0.00392
Warmup Train [33][2500/3239]	Time 0.238 (0.232)	Data 0.002 (0.010)	Loss 4.0574 (4.0467)	Top-1 acc 30.469 (30.219)	Top-5 acc 57.031 (53.620)	lr 0.00392
Warmup Train [33][2510/3239]	Time 0.185 (0.232)	Data 0.001 (0.010)	Loss 4.0195 (4.0467)	Top-1 acc 28.516 (30.220)	Top-5 acc 54.297 (53.617)	lr 0.00392
Warmup Train [33][2520/3239]	Time 0.258 (0.232)	Data 0.001 (0.010)	Loss 3.9871 (4.0463)	Top-1 acc 33.594 (30.230)	Top-5 acc 54.688 (53.626)	lr 0.00392
Warmup Train [33][2530/3239]	Time 0.239 (0.232)	Data 0.001 (0.010)	Loss 3.9202 (4.0461)	Top-1 acc 33.203 (30.234)	Top-5 acc 55.078 (53.636)	lr 0.00391
Warmup Train [33][2540/3239]	Time 0.259 (0.232)	Data 0.001 (0.010)	Loss 3.9665 (4.0461)	Top-1 acc 32.812 (30.236)	Top-5 acc 56.250 (53.636)	lr 0.00391
Warmup Train [33][2550/3239]	Time 0.147 (0.232)	Data 0.001 (0.010)	Loss 3.8952 (4.0461)	Top-1 acc 35.156 (30.241)	Top-5 acc 57.812 (53.633)	lr 0.00391
Warmup Train [33][2560/3239]	Time 0.306 (0.232)	Data 0.001 (0.010)	Loss 4.2149 (4.0464)	Top-1 acc 26.562 (30.233)	Top-5 acc 49.609 (53.625)	lr 0.00390
Warmup Train [33][2570/3239]	Time 0.201 (0.232)	Data 0.001 (0.010)	Loss 3.7407 (4.0462)	Top-1 acc 36.719 (30.239)	Top-5 acc 58.594 (53.632)	lr 0.00390
Warmup Train [33][2580/3239]	Time 0.218 (0.232)	Data 0.001 (0.010)	Loss 4.0022 (4.0462)	Top-1 acc 33.984 (30.240)	Top-5 acc 55.469 (53.631)	lr 0.00390
Warmup Train [33][2590/3239]	Time 0.194 (0.232)	Data 0.001 (0.010)	Loss 4.0316 (4.0465)	Top-1 acc 30.469 (30.233)	Top-5 acc 53.906 (53.623)	lr 0.00389
Warmup Train [33][2600/3239]	Time 0.199 (0.232)	Data 0.001 (0.010)	Loss 3.9783 (4.0465)	Top-1 acc 35.156 (30.233)	Top-5 acc 53.516 (53.625)	lr 0.00389
Warmup Train [33][2610/3239]	Time 0.278 (0.232)	Data 0.001 (0.010)	Loss 3.9710 (4.0464)	Top-1 acc 29.688 (30.234)	Top-5 acc 57.812 (53.624)	lr 0.00389
Warmup Train [33][2620/3239]	Time 0.210 (0.232)	Data 0.002 (0.010)	Loss 3.9356 (4.0460)	Top-1 acc 33.594 (30.244)	Top-5 acc 56.250 (53.631)	lr 0.00388
Warmup Train [33][2630/3239]	Time 0.216 (0.232)	Data 0.002 (0.010)	Loss 4.1036 (4.0462)	Top-1 acc 32.422 (30.239)	Top-5 acc 51.172 (53.624)	lr 0.00388
Warmup Train [33][2640/3239]	Time 0.289 (0.232)	Data 0.001 (0.010)	Loss 4.2894 (4.0462)	Top-1 acc 26.172 (30.240)	Top-5 acc 44.922 (53.623)	lr 0.00388
Warmup Train [33][2650/3239]	Time 0.290 (0.232)	Data 0.001 (0.010)	Loss 4.2527 (4.0465)	Top-1 acc 25.781 (30.237)	Top-5 acc 45.703 (53.614)	lr 0.00387
Warmup Train [33][2660/3239]	Time 0.262 (0.232)	Data 0.001 (0.010)	Loss 4.2208 (4.0465)	Top-1 acc 26.562 (30.234)	Top-5 acc 50.781 (53.613)	lr 0.00387
Warmup Train [33][2670/3239]	Time 0.203 (0.232)	Data 0.001 (0.010)	Loss 3.9785 (4.0466)	Top-1 acc 32.422 (30.236)	Top-5 acc 52.734 (53.608)	lr 0.00387
Warmup Train [33][2680/3239]	Time 0.335 (0.232)	Data 0.001 (0.010)	Loss 4.1931 (4.0467)	Top-1 acc 28.125 (30.235)	Top-5 acc 49.219 (53.609)	lr 0.00386
Warmup Train [33][2690/3239]	Time 0.313 (0.232)	Data 0.001 (0.010)	Loss 4.0279 (4.0469)	Top-1 acc 29.297 (30.228)	Top-5 acc 55.078 (53.603)	lr 0.00386
Warmup Train [33][2700/3239]	Time 0.256 (0.232)	Data 0.002 (0.010)	Loss 4.0518 (4.0470)	Top-1 acc 30.078 (30.227)	Top-5 acc 50.000 (53.597)	lr 0.00386
Warmup Train [33][2710/3239]	Time 0.179 (0.232)	Data 0.001 (0.010)	Loss 4.2826 (4.0469)	Top-1 acc 27.734 (30.231)	Top-5 acc 47.656 (53.599)	lr 0.00385
Warmup Train [33][2720/3239]	Time 0.232 (0.232)	Data 0.002 (0.010)	Loss 3.9287 (4.0469)	Top-1 acc 35.547 (30.232)	Top-5 acc 55.469 (53.600)	lr 0.00385
Warmup Train [33][2730/3239]	Time 0.159 (0.232)	Data 0.001 (0.010)	Loss 4.0907 (4.0470)	Top-1 acc 26.562 (30.228)	Top-5 acc 53.125 (53.595)	lr 0.00385
Warmup Train [33][2740/3239]	Time 0.224 (0.232)	Data 0.001 (0.010)	Loss 3.7885 (4.0469)	Top-1 acc 33.203 (30.227)	Top-5 acc 61.328 (53.593)	lr 0.00384
Warmup Train [33][2750/3239]	Time 0.178 (0.232)	Data 0.001 (0.010)	Loss 4.0833 (4.0470)	Top-1 acc 32.422 (30.227)	Top-5 acc 56.641 (53.596)	lr 0.00384
Warmup Train [33][2760/3239]	Time 0.158 (0.232)	Data 0.002 (0.010)	Loss 4.2020 (4.0469)	Top-1 acc 24.219 (30.229)	Top-5 acc 52.344 (53.599)	lr 0.00384
Warmup Train [33][2770/3239]	Time 0.211 (0.232)	Data 0.001 (0.010)	Loss 3.8731 (4.0467)	Top-1 acc 31.641 (30.232)	Top-5 acc 55.859 (53.601)	lr 0.00383
Warmup Train [33][2780/3239]	Time 0.440 (0.232)	Data 0.001 (0.010)	Loss 3.9966 (4.0469)	Top-1 acc 33.984 (30.226)	Top-5 acc 54.297 (53.598)	lr 0.00383
Warmup Train [33][2790/3239]	Time 0.279 (0.232)	Data 0.002 (0.010)	Loss 3.9780 (4.0466)	Top-1 acc 32.422 (30.233)	Top-5 acc 53.906 (53.605)	lr 0.00383
Warmup Train [33][2800/3239]	Time 0.260 (0.232)	Data 0.001 (0.010)	Loss 4.1247 (4.0466)	Top-1 acc 30.078 (30.236)	Top-5 acc 51.953 (53.603)	lr 0.00382
Warmup Train [33][2810/3239]	Time 0.260 (0.232)	Data 0.001 (0.010)	Loss 4.2099 (4.0466)	Top-1 acc 32.422 (30.234)	Top-5 acc 49.219 (53.597)	lr 0.00382
Warmup Train [33][2820/3239]	Time 0.197 (0.232)	Data 0.001 (0.010)	Loss 3.8555 (4.0466)	Top-1 acc 33.203 (30.232)	Top-5 acc 59.375 (53.598)	lr 0.00382
Warmup Train [33][2830/3239]	Time 0.215 (0.232)	Data 0.001 (0.010)	Loss 3.8379 (4.0466)	Top-1 acc 37.891 (30.229)	Top-5 acc 58.203 (53.601)	lr 0.00381
Warmup Train [33][2840/3239]	Time 0.254 (0.232)	Data 0.002 (0.010)	Loss 4.1256 (4.0466)	Top-1 acc 29.297 (30.225)	Top-5 acc 53.125 (53.602)	lr 0.00381
Warmup Train [33][2850/3239]	Time 0.228 (0.232)	Data 0.001 (0.009)	Loss 4.0759 (4.0468)	Top-1 acc 30.078 (30.223)	Top-5 acc 51.172 (53.597)	lr 0.00381
Warmup Train [33][2860/3239]	Time 0.195 (0.232)	Data 0.001 (0.009)	Loss 3.8785 (4.0467)	Top-1 acc 33.594 (30.228)	Top-5 acc 56.250 (53.598)	lr 0.00381
Warmup Train [33][2870/3239]	Time 0.252 (0.231)	Data 0.001 (0.009)	Loss 4.1344 (4.0468)	Top-1 acc 30.859 (30.224)	Top-5 acc 55.859 (53.598)	lr 0.00380
Warmup Train [33][2880/3239]	Time 0.187 (0.231)	Data 0.001 (0.009)	Loss 4.0647 (4.0467)	Top-1 acc 28.906 (30.228)	Top-5 acc 51.172 (53.597)	lr 0.00380
Warmup Train [33][2890/3239]	Time 0.302 (0.231)	Data 0.001 (0.009)	Loss 4.1297 (4.0467)	Top-1 acc 32.812 (30.229)	Top-5 acc 52.734 (53.599)	lr 0.00380
Warmup Train [33][2900/3239]	Time 0.313 (0.231)	Data 0.001 (0.009)	Loss 3.8940 (4.0467)	Top-1 acc 29.297 (30.232)	Top-5 acc 55.859 (53.598)	lr 0.00379
Warmup Train [33][2910/3239]	Time 0.326 (0.231)	Data 0.001 (0.009)	Loss 4.0923 (4.0466)	Top-1 acc 30.078 (30.233)	Top-5 acc 53.125 (53.601)	lr 0.00379
Warmup Train [33][2920/3239]	Time 0.222 (0.231)	Data 0.001 (0.009)	Loss 4.2272 (4.0467)	Top-1 acc 26.562 (30.228)	Top-5 acc 49.219 (53.599)	lr 0.00379
Warmup Train [33][2930/3239]	Time 0.207 (0.231)	Data 0.001 (0.009)	Loss 4.1560 (4.0469)	Top-1 acc 29.297 (30.226)	Top-5 acc 53.125 (53.596)	lr 0.00378
Warmup Train [33][2940/3239]	Time 0.183 (0.231)	Data 0.001 (0.009)	Loss 4.0585 (4.0469)	Top-1 acc 28.906 (30.225)	Top-5 acc 55.078 (53.594)	lr 0.00378
Warmup Train [33][2950/3239]	Time 0.215 (0.231)	Data 0.001 (0.009)	Loss 4.1254 (4.0471)	Top-1 acc 32.422 (30.222)	Top-5 acc 53.906 (53.589)	lr 0.00378
Warmup Train [33][2960/3239]	Time 0.227 (0.231)	Data 0.002 (0.009)	Loss 4.1241 (4.0471)	Top-1 acc 31.250 (30.221)	Top-5 acc 53.516 (53.588)	lr 0.00377
Warmup Train [33][2970/3239]	Time 0.248 (0.231)	Data 0.003 (0.009)	Loss 4.0658 (4.0470)	Top-1 acc 30.078 (30.222)	Top-5 acc 51.953 (53.588)	lr 0.00377
Warmup Train [33][2980/3239]	Time 0.211 (0.231)	Data 0.001 (0.009)	Loss 3.7495 (4.0469)	Top-1 acc 34.375 (30.224)	Top-5 acc 59.766 (53.590)	lr 0.00377
Warmup Train [33][2990/3239]	Time 0.173 (0.231)	Data 0.001 (0.009)	Loss 4.1693 (4.0472)	Top-1 acc 25.781 (30.217)	Top-5 acc 50.000 (53.581)	lr 0.00376
Warmup Train [33][3000/3239]	Time 0.237 (0.231)	Data 0.001 (0.009)	Loss 4.0648 (4.0472)	Top-1 acc 33.984 (30.216)	Top-5 acc 53.516 (53.582)	lr 0.00376
Warmup Train [33][3010/3239]	Time 0.359 (0.231)	Data 0.002 (0.009)	Loss 4.0415 (4.0471)	Top-1 acc 33.984 (30.221)	Top-5 acc 53.516 (53.585)	lr 0.00376
Warmup Train [33][3020/3239]	Time 0.295 (0.231)	Data 0.001 (0.009)	Loss 3.9888 (4.0471)	Top-1 acc 34.375 (30.219)	Top-5 acc 54.688 (53.583)	lr 0.00375
Warmup Train [33][3030/3239]	Time 0.138 (0.231)	Data 0.001 (0.009)	Loss 4.2018 (4.0472)	Top-1 acc 26.953 (30.217)	Top-5 acc 48.828 (53.581)	lr 0.00375
Warmup Train [33][3040/3239]	Time 0.222 (0.231)	Data 0.001 (0.009)	Loss 3.9939 (4.0471)	Top-1 acc 33.594 (30.220)	Top-5 acc 52.344 (53.586)	lr 0.00375
Warmup Train [33][3050/3239]	Time 0.188 (0.231)	Data 0.002 (0.009)	Loss 4.0612 (4.0472)	Top-1 acc 32.422 (30.219)	Top-5 acc 53.516 (53.585)	lr 0.00374
Warmup Train [33][3060/3239]	Time 0.223 (0.231)	Data 0.001 (0.009)	Loss 4.1006 (4.0471)	Top-1 acc 27.734 (30.225)	Top-5 acc 50.781 (53.586)	lr 0.00374
Warmup Train [33][3070/3239]	Time 0.176 (0.231)	Data 0.001 (0.009)	Loss 4.0030 (4.0471)	Top-1 acc 31.250 (30.229)	Top-5 acc 58.984 (53.586)	lr 0.00374
Warmup Train [33][3080/3239]	Time 0.184 (0.231)	Data 0.005 (0.009)	Loss 3.8299 (4.0473)	Top-1 acc 36.328 (30.226)	Top-5 acc 58.594 (53.585)	lr 0.00373
Warmup Train [33][3090/3239]	Time 0.206 (0.231)	Data 0.001 (0.009)	Loss 4.0794 (4.0471)	Top-1 acc 30.859 (30.228)	Top-5 acc 51.172 (53.587)	lr 0.00373
Warmup Train [33][3100/3239]	Time 0.152 (0.231)	Data 0.001 (0.009)	Loss 3.9383 (4.0470)	Top-1 acc 29.297 (30.227)	Top-5 acc 57.812 (53.587)	lr 0.00373
Warmup Train [33][3110/3239]	Time 0.204 (0.231)	Data 0.001 (0.009)	Loss 4.0031 (4.0472)	Top-1 acc 36.328 (30.224)	Top-5 acc 54.297 (53.582)	lr 0.00373
Warmup Train [33][3120/3239]	Time 0.334 (0.231)	Data 0.001 (0.009)	Loss 4.1683 (4.0471)	Top-1 acc 25.391 (30.227)	Top-5 acc 51.953 (53.589)	lr 0.00372
Warmup Train [33][3130/3239]	Time 0.281 (0.231)	Data 0.001 (0.009)	Loss 4.0258 (4.0472)	Top-1 acc 31.641 (30.225)	Top-5 acc 53.125 (53.588)	lr 0.00372
Warmup Train [33][3140/3239]	Time 0.222 (0.231)	Data 0.001 (0.009)	Loss 3.9322 (4.0473)	Top-1 acc 31.641 (30.223)	Top-5 acc 57.422 (53.583)	lr 0.00372
Warmup Train [33][3150/3239]	Time 0.263 (0.231)	Data 0.001 (0.009)	Loss 3.9919 (4.0471)	Top-1 acc 29.688 (30.226)	Top-5 acc 53.906 (53.588)	lr 0.00371
Warmup Train [33][3160/3239]	Time 0.220 (0.231)	Data 0.001 (0.009)	Loss 3.8714 (4.0467)	Top-1 acc 33.984 (30.233)	Top-5 acc 57.031 (53.598)	lr 0.00371
Warmup Train [33][3170/3239]	Time 0.237 (0.231)	Data 0.001 (0.009)	Loss 3.9802 (4.0465)	Top-1 acc 33.594 (30.243)	Top-5 acc 55.859 (53.604)	lr 0.00371
Warmup Train [33][3180/3239]	Time 0.274 (0.231)	Data 0.000 (0.009)	Loss 3.9123 (4.0465)	Top-1 acc 34.375 (30.245)	Top-5 acc 57.031 (53.604)	lr 0.00370
Warmup Train [33][3190/3239]	Time 0.224 (0.231)	Data 0.000 (0.009)	Loss 4.0890 (4.0467)	Top-1 acc 29.688 (30.238)	Top-5 acc 49.609 (53.598)	lr 0.00370
Warmup Train [33][3200/3239]	Time 0.247 (0.230)	Data 0.000 (0.009)	Loss 4.0941 (4.0467)	Top-1 acc 26.562 (30.236)	Top-5 acc 51.562 (53.597)	lr 0.00370
Warmup Train [33][3210/3239]	Time 0.183 (0.230)	Data 0.000 (0.009)	Loss 4.1802 (4.0466)	Top-1 acc 26.172 (30.239)	Top-5 acc 51.172 (53.603)	lr 0.00369
Warmup Train [33][3220/3239]	Time 0.207 (0.230)	Data 0.000 (0.009)	Loss 4.0420 (4.0465)	Top-1 acc 33.594 (30.242)	Top-5 acc 51.172 (53.603)	lr 0.00369
Warmup Train [33][3230/3239]	Time 0.301 (0.230)	Data 0.000 (0.009)	Loss 3.8982 (4.0464)	Top-1 acc 33.984 (30.248)	Top-5 acc 57.812 (53.606)	lr 0.00369
Warmup Train [33][3239/3239]	Time 0.172 (0.230)	Data 0.000 (0.009)	Loss 4.6021 (4.0463)	Top-1 acc 20.988 (30.243)	Top-5 acc 41.975 (53.608)	lr 0.00368
==========Warmup Valid [33/40]	loss 2.984	top-1 acc 38.071	top-5 acc 62.614	Train top-1 30.243	top-5 53.608	flops: 442.4M
Warmup Train [34][0/3239]	Time 22.444 (22.444)	Data 18.805 (18.805)	Loss 4.2145 (4.2145)	Top-1 acc 23.438 (23.438)	Top-5 acc 48.047 (48.047)	lr 0.00368
Warmup Train [34][10/3239]	Time 0.260 (2.271)	Data 0.001 (1.711)	Loss 4.2094 (4.0802)	Top-1 acc 27.734 (28.516)	Top-5 acc 48.438 (53.196)	lr 0.00368
Warmup Train [34][20/3239]	Time 0.259 (1.299)	Data 0.001 (0.897)	Loss 4.0550 (4.0702)	Top-1 acc 31.250 (28.962)	Top-5 acc 55.859 (53.032)	lr 0.00368
Warmup Train [34][30/3239]	Time 0.315 (0.959)	Data 0.001 (0.608)	Loss 4.0819 (4.0756)	Top-1 acc 26.562 (28.856)	Top-5 acc 51.562 (52.621)	lr 0.00367
Warmup Train [34][40/3239]	Time 0.212 (0.780)	Data 0.001 (0.460)	Loss 3.8795 (4.0742)	Top-1 acc 32.031 (29.030)	Top-5 acc 57.031 (52.544)	lr 0.00367
Warmup Train [34][50/3239]	Time 0.192 (0.670)	Data 0.001 (0.371)	Loss 4.0685 (4.0765)	Top-1 acc 28.125 (29.052)	Top-5 acc 51.953 (52.719)	lr 0.00367
Warmup Train [34][60/3239]	Time 0.251 (0.596)	Data 0.001 (0.310)	Loss 3.8756 (4.0661)	Top-1 acc 32.031 (29.297)	Top-5 acc 60.156 (53.170)	lr 0.00367
Warmup Train [34][70/3239]	Time 0.258 (0.543)	Data 0.002 (0.267)	Loss 4.0436 (4.0610)	Top-1 acc 31.250 (29.533)	Top-5 acc 51.172 (53.351)	lr 0.00366
Warmup Train [34][80/3239]	Time 0.203 (0.503)	Data 0.001 (0.234)	Loss 3.9641 (4.0587)	Top-1 acc 32.031 (29.644)	Top-5 acc 54.297 (53.414)	lr 0.00366
Warmup Train [34][90/3239]	Time 0.273 (0.471)	Data 0.001 (0.209)	Loss 3.9891 (4.0473)	Top-1 acc 30.859 (29.992)	Top-5 acc 53.125 (53.653)	lr 0.00366
Warmup Train [34][100/3239]	Time 0.288 (0.449)	Data 0.002 (0.188)	Loss 4.2827 (4.0476)	Top-1 acc 27.734 (30.148)	Top-5 acc 49.609 (53.709)	lr 0.00365
Warmup Train [34][110/3239]	Time 0.210 (0.430)	Data 0.001 (0.171)	Loss 4.0197 (4.0466)	Top-1 acc 32.422 (30.163)	Top-5 acc 53.906 (53.758)	lr 0.00365
Warmup Train [34][120/3239]	Time 0.177 (0.413)	Data 0.001 (0.158)	Loss 4.1665 (4.0468)	Top-1 acc 25.781 (30.133)	Top-5 acc 51.172 (53.767)	lr 0.00365
Warmup Train [34][130/3239]	Time 0.152 (0.398)	Data 0.001 (0.146)	Loss 3.7078 (4.0378)	Top-1 acc 33.203 (30.320)	Top-5 acc 58.594 (53.963)	lr 0.00364
Warmup Train [34][140/3239]	Time 0.280 (0.387)	Data 0.002 (0.136)	Loss 4.0234 (4.0394)	Top-1 acc 29.297 (30.297)	Top-5 acc 53.906 (53.892)	lr 0.00364
Warmup Train [34][150/3239]	Time 0.300 (0.377)	Data 0.001 (0.127)	Loss 4.0946 (4.0423)	Top-1 acc 31.641 (30.200)	Top-5 acc 52.344 (53.818)	lr 0.00364
Warmup Train [34][160/3239]	Time 0.244 (0.367)	Data 0.002 (0.120)	Loss 3.9437 (4.0406)	Top-1 acc 33.203 (30.282)	Top-5 acc 55.469 (53.867)	lr 0.00363
Warmup Train [34][170/3239]	Time 0.232 (0.358)	Data 0.004 (0.113)	Loss 4.0643 (4.0416)	Top-1 acc 30.859 (30.297)	Top-5 acc 54.297 (53.847)	lr 0.00363
Warmup Train [34][180/3239]	Time 0.207 (0.350)	Data 0.001 (0.107)	Loss 4.0410 (4.0399)	Top-1 acc 32.422 (30.357)	Top-5 acc 51.562 (53.842)	lr 0.00363
Warmup Train [34][190/3239]	Time 0.162 (0.343)	Data 0.001 (0.101)	Loss 3.8649 (4.0373)	Top-1 acc 32.031 (30.350)	Top-5 acc 56.641 (53.892)	lr 0.00362
Warmup Train [34][200/3239]	Time 0.184 (0.338)	Data 0.002 (0.097)	Loss 3.8030 (4.0373)	Top-1 acc 35.938 (30.360)	Top-5 acc 60.938 (53.898)	lr 0.00362
Warmup Train [34][210/3239]	Time 0.166 (0.332)	Data 0.001 (0.092)	Loss 4.0060 (4.0377)	Top-1 acc 32.812 (30.310)	Top-5 acc 52.344 (53.878)	lr 0.00362
Warmup Train [34][220/3239]	Time 0.333 (0.327)	Data 0.002 (0.088)	Loss 3.9469 (4.0383)	Top-1 acc 31.641 (30.310)	Top-5 acc 55.469 (53.873)	lr 0.00361
Warmup Train [34][230/3239]	Time 0.237 (0.323)	Data 0.001 (0.084)	Loss 3.9753 (4.0371)	Top-1 acc 34.375 (30.401)	Top-5 acc 53.125 (53.889)	lr 0.00361
Warmup Train [34][240/3239]	Time 0.212 (0.319)	Data 0.001 (0.081)	Loss 4.0622 (4.0354)	Top-1 acc 32.422 (30.461)	Top-5 acc 54.297 (53.921)	lr 0.00361
Warmup Train [34][250/3239]	Time 0.327 (0.315)	Data 0.002 (0.078)	Loss 4.1756 (4.0368)	Top-1 acc 27.344 (30.419)	Top-5 acc 52.344 (53.883)	lr 0.00361
Warmup Train [34][260/3239]	Time 0.184 (0.313)	Data 0.002 (0.075)	Loss 3.9505 (4.0364)	Top-1 acc 32.031 (30.442)	Top-5 acc 56.641 (53.888)	lr 0.00360
Warmup Train [34][270/3239]	Time 0.138 (0.309)	Data 0.002 (0.072)	Loss 4.0316 (4.0389)	Top-1 acc 31.250 (30.389)	Top-5 acc 53.906 (53.815)	lr 0.00360
Warmup Train [34][280/3239]	Time 0.279 (0.306)	Data 0.001 (0.070)	Loss 4.0695 (4.0350)	Top-1 acc 27.344 (30.441)	Top-5 acc 51.172 (53.873)	lr 0.00360
Warmup Train [34][290/3239]	Time 0.156 (0.304)	Data 0.001 (0.068)	Loss 4.1911 (4.0342)	Top-1 acc 26.953 (30.447)	Top-5 acc 49.609 (53.886)	lr 0.00359
Warmup Train [34][300/3239]	Time 0.224 (0.301)	Data 0.002 (0.066)	Loss 4.1106 (4.0338)	Top-1 acc 31.641 (30.483)	Top-5 acc 55.469 (53.915)	lr 0.00359
Warmup Train [34][310/3239]	Time 0.185 (0.299)	Data 0.001 (0.063)	Loss 3.9406 (4.0322)	Top-1 acc 30.859 (30.530)	Top-5 acc 53.906 (53.939)	lr 0.00359
Warmup Train [34][320/3239]	Time 0.240 (0.296)	Data 0.002 (0.062)	Loss 4.0755 (4.0324)	Top-1 acc 27.344 (30.522)	Top-5 acc 50.000 (53.954)	lr 0.00358
Warmup Train [34][330/3239]	Time 0.206 (0.294)	Data 0.001 (0.060)	Loss 3.9554 (4.0333)	Top-1 acc 32.422 (30.523)	Top-5 acc 55.859 (53.905)	lr 0.00358
Warmup Train [34][340/3239]	Time 0.226 (0.292)	Data 0.001 (0.058)	Loss 3.9735 (4.0332)	Top-1 acc 31.250 (30.516)	Top-5 acc 54.297 (53.896)	lr 0.00358
Warmup Train [34][350/3239]	Time 0.365 (0.290)	Data 0.001 (0.057)	Loss 3.9058 (4.0323)	Top-1 acc 30.859 (30.497)	Top-5 acc 58.203 (53.911)	lr 0.00357
Warmup Train [34][360/3239]	Time 0.185 (0.288)	Data 0.001 (0.055)	Loss 3.9616 (4.0319)	Top-1 acc 32.031 (30.487)	Top-5 acc 54.297 (53.878)	lr 0.00357
Warmup Train [34][370/3239]	Time 0.208 (0.286)	Data 0.001 (0.054)	Loss 4.0462 (4.0318)	Top-1 acc 32.031 (30.494)	Top-5 acc 52.734 (53.865)	lr 0.00357
Warmup Train [34][380/3239]	Time 0.227 (0.285)	Data 0.001 (0.052)	Loss 4.0097 (4.0317)	Top-1 acc 29.688 (30.466)	Top-5 acc 53.125 (53.873)	lr 0.00356
Warmup Train [34][390/3239]	Time 0.182 (0.283)	Data 0.001 (0.051)	Loss 4.1261 (4.0314)	Top-1 acc 32.812 (30.470)	Top-5 acc 53.125 (53.873)	lr 0.00356
Warmup Train [34][400/3239]	Time 0.208 (0.281)	Data 0.001 (0.050)	Loss 3.9758 (4.0310)	Top-1 acc 29.688 (30.477)	Top-5 acc 51.953 (53.868)	lr 0.00356
Warmup Train [34][410/3239]	Time 0.145 (0.280)	Data 0.001 (0.049)	Loss 4.3139 (4.0323)	Top-1 acc 26.562 (30.457)	Top-5 acc 47.266 (53.839)	lr 0.00356
Warmup Train [34][420/3239]	Time 0.261 (0.278)	Data 0.001 (0.048)	Loss 3.8125 (4.0324)	Top-1 acc 37.891 (30.458)	Top-5 acc 58.203 (53.813)	lr 0.00355
Warmup Train [34][430/3239]	Time 0.166 (0.277)	Data 0.001 (0.047)	Loss 3.9783 (4.0314)	Top-1 acc 32.031 (30.496)	Top-5 acc 57.422 (53.829)	lr 0.00355
Warmup Train [34][440/3239]	Time 0.164 (0.276)	Data 0.002 (0.046)	Loss 3.8143 (4.0307)	Top-1 acc 35.156 (30.512)	Top-5 acc 57.812 (53.846)	lr 0.00355
Warmup Train [34][450/3239]	Time 0.323 (0.275)	Data 0.002 (0.045)	Loss 3.9757 (4.0317)	Top-1 acc 32.031 (30.466)	Top-5 acc 54.688 (53.812)	lr 0.00354
Warmup Train [34][460/3239]	Time 0.270 (0.274)	Data 0.001 (0.044)	Loss 4.0803 (4.0307)	Top-1 acc 28.516 (30.470)	Top-5 acc 50.781 (53.841)	lr 0.00354
Warmup Train [34][470/3239]	Time 0.131 (0.273)	Data 0.001 (0.043)	Loss 4.0127 (4.0305)	Top-1 acc 30.469 (30.469)	Top-5 acc 53.906 (53.847)	lr 0.00354
Warmup Train [34][480/3239]	Time 0.184 (0.272)	Data 0.001 (0.042)	Loss 4.1039 (4.0302)	Top-1 acc 31.641 (30.483)	Top-5 acc 52.344 (53.851)	lr 0.00353
Warmup Train [34][490/3239]	Time 0.182 (0.271)	Data 0.001 (0.041)	Loss 4.0564 (4.0300)	Top-1 acc 31.641 (30.502)	Top-5 acc 57.031 (53.878)	lr 0.00353
Warmup Train [34][500/3239]	Time 0.181 (0.269)	Data 0.001 (0.040)	Loss 3.9958 (4.0307)	Top-1 acc 30.859 (30.491)	Top-5 acc 55.469 (53.849)	lr 0.00353
Warmup Train [34][510/3239]	Time 0.159 (0.268)	Data 0.002 (0.040)	Loss 4.0155 (4.0303)	Top-1 acc 29.688 (30.486)	Top-5 acc 54.297 (53.865)	lr 0.00352
Warmup Train [34][520/3239]	Time 0.237 (0.267)	Data 0.001 (0.039)	Loss 4.0151 (4.0303)	Top-1 acc 28.906 (30.503)	Top-5 acc 55.859 (53.895)	lr 0.00352
Warmup Train [34][530/3239]	Time 0.191 (0.266)	Data 0.001 (0.038)	Loss 4.1695 (4.0298)	Top-1 acc 28.516 (30.510)	Top-5 acc 50.391 (53.913)	lr 0.00352
Warmup Train [34][540/3239]	Time 0.253 (0.266)	Data 0.001 (0.038)	Loss 4.1207 (4.0315)	Top-1 acc 28.906 (30.502)	Top-5 acc 48.438 (53.866)	lr 0.00351
Warmup Train [34][550/3239]	Time 0.189 (0.265)	Data 0.001 (0.037)	Loss 4.3155 (4.0338)	Top-1 acc 23.047 (30.474)	Top-5 acc 45.703 (53.822)	lr 0.00351
Warmup Train [34][560/3239]	Time 0.298 (0.264)	Data 0.001 (0.037)	Loss 4.2783 (4.0343)	Top-1 acc 23.438 (30.444)	Top-5 acc 47.656 (53.796)	lr 0.00351
Warmup Train [34][570/3239]	Time 0.205 (0.263)	Data 0.001 (0.036)	Loss 4.1148 (4.0341)	Top-1 acc 26.953 (30.443)	Top-5 acc 53.516 (53.795)	lr 0.00351
Warmup Train [34][580/3239]	Time 0.213 (0.262)	Data 0.001 (0.035)	Loss 4.0994 (4.0334)	Top-1 acc 29.688 (30.463)	Top-5 acc 54.297 (53.813)	lr 0.00350
Warmup Train [34][590/3239]	Time 0.192 (0.262)	Data 0.002 (0.035)	Loss 3.9310 (4.0335)	Top-1 acc 31.250 (30.457)	Top-5 acc 53.125 (53.807)	lr 0.00350
Warmup Train [34][600/3239]	Time 0.185 (0.261)	Data 0.002 (0.034)	Loss 4.1181 (4.0336)	Top-1 acc 26.953 (30.434)	Top-5 acc 51.562 (53.793)	lr 0.00350
Warmup Train [34][610/3239]	Time 0.268 (0.261)	Data 0.001 (0.034)	Loss 4.1022 (4.0327)	Top-1 acc 28.516 (30.450)	Top-5 acc 51.953 (53.807)	lr 0.00349
Warmup Train [34][620/3239]	Time 0.276 (0.260)	Data 0.002 (0.033)	Loss 3.9618 (4.0319)	Top-1 acc 31.641 (30.452)	Top-5 acc 57.031 (53.835)	lr 0.00349
Warmup Train [34][630/3239]	Time 0.255 (0.259)	Data 0.001 (0.033)	Loss 3.9827 (4.0326)	Top-1 acc 34.375 (30.446)	Top-5 acc 57.422 (53.820)	lr 0.00349
Warmup Train [34][640/3239]	Time 0.216 (0.259)	Data 0.001 (0.032)	Loss 4.2190 (4.0329)	Top-1 acc 25.391 (30.449)	Top-5 acc 49.609 (53.812)	lr 0.00348
Warmup Train [34][650/3239]	Time 0.163 (0.258)	Data 0.002 (0.032)	Loss 3.9167 (4.0334)	Top-1 acc 33.203 (30.444)	Top-5 acc 56.250 (53.802)	lr 0.00348
Warmup Train [34][660/3239]	Time 0.310 (0.258)	Data 0.001 (0.031)	Loss 3.9770 (4.0330)	Top-1 acc 30.859 (30.451)	Top-5 acc 56.641 (53.812)	lr 0.00348
Warmup Train [34][670/3239]	Time 0.236 (0.257)	Data 0.002 (0.031)	Loss 4.3099 (4.0336)	Top-1 acc 27.734 (30.461)	Top-5 acc 48.828 (53.818)	lr 0.00347
Warmup Train [34][680/3239]	Time 0.196 (0.256)	Data 0.001 (0.031)	Loss 3.8739 (4.0336)	Top-1 acc 35.547 (30.469)	Top-5 acc 58.984 (53.822)	lr 0.00347
Warmup Train [34][690/3239]	Time 0.178 (0.256)	Data 0.001 (0.030)	Loss 4.0096 (4.0336)	Top-1 acc 32.031 (30.463)	Top-5 acc 54.688 (53.825)	lr 0.00347
Warmup Train [34][700/3239]	Time 0.264 (0.255)	Data 0.001 (0.030)	Loss 4.0294 (4.0340)	Top-1 acc 32.812 (30.455)	Top-5 acc 52.734 (53.816)	lr 0.00347
Warmup Train [34][710/3239]	Time 0.237 (0.255)	Data 0.001 (0.029)	Loss 4.1680 (4.0349)	Top-1 acc 24.609 (30.451)	Top-5 acc 48.438 (53.788)	lr 0.00346
Warmup Train [34][720/3239]	Time 0.231 (0.254)	Data 0.003 (0.029)	Loss 3.8667 (4.0350)	Top-1 acc 35.156 (30.440)	Top-5 acc 55.859 (53.777)	lr 0.00346
Warmup Train [34][730/3239]	Time 0.260 (0.254)	Data 0.001 (0.029)	Loss 4.0584 (4.0347)	Top-1 acc 30.469 (30.429)	Top-5 acc 54.688 (53.783)	lr 0.00346
Warmup Train [34][740/3239]	Time 0.254 (0.254)	Data 0.002 (0.028)	Loss 4.2090 (4.0362)	Top-1 acc 30.469 (30.409)	Top-5 acc 51.172 (53.759)	lr 0.00345
Warmup Train [34][750/3239]	Time 0.138 (0.253)	Data 0.001 (0.028)	Loss 4.0185 (4.0361)	Top-1 acc 28.906 (30.408)	Top-5 acc 56.250 (53.755)	lr 0.00345
Warmup Train [34][760/3239]	Time 0.374 (0.253)	Data 0.001 (0.028)	Loss 4.0377 (4.0367)	Top-1 acc 28.516 (30.406)	Top-5 acc 53.125 (53.747)	lr 0.00345
Warmup Train [34][770/3239]	Time 0.170 (0.253)	Data 0.001 (0.027)	Loss 4.1343 (4.0373)	Top-1 acc 30.859 (30.396)	Top-5 acc 51.562 (53.731)	lr 0.00344
Warmup Train [34][780/3239]	Time 0.287 (0.252)	Data 0.001 (0.027)	Loss 4.0613 (4.0372)	Top-1 acc 30.469 (30.404)	Top-5 acc 51.953 (53.740)	lr 0.00344
Warmup Train [34][790/3239]	Time 0.217 (0.252)	Data 0.001 (0.027)	Loss 4.2711 (4.0379)	Top-1 acc 27.344 (30.398)	Top-5 acc 50.781 (53.723)	lr 0.00344
Warmup Train [34][800/3239]	Time 0.187 (0.251)	Data 0.001 (0.026)	Loss 4.1355 (4.0374)	Top-1 acc 27.344 (30.401)	Top-5 acc 53.125 (53.724)	lr 0.00343
Warmup Train [34][810/3239]	Time 0.206 (0.251)	Data 0.001 (0.026)	Loss 4.1234 (4.0374)	Top-1 acc 28.125 (30.402)	Top-5 acc 49.219 (53.718)	lr 0.00343
Warmup Train [34][820/3239]	Time 0.215 (0.250)	Data 0.001 (0.026)	Loss 4.0650 (4.0378)	Top-1 acc 33.203 (30.393)	Top-5 acc 51.562 (53.717)	lr 0.00343
Warmup Train [34][830/3239]	Time 0.174 (0.250)	Data 0.001 (0.025)	Loss 4.0774 (4.0369)	Top-1 acc 28.516 (30.413)	Top-5 acc 51.172 (53.732)	lr 0.00343
Warmup Train [34][840/3239]	Time 0.215 (0.249)	Data 0.001 (0.025)	Loss 3.9547 (4.0368)	Top-1 acc 32.812 (30.407)	Top-5 acc 55.078 (53.718)	lr 0.00342
Warmup Train [34][850/3239]	Time 0.234 (0.249)	Data 0.002 (0.025)	Loss 4.0081 (4.0363)	Top-1 acc 32.422 (30.425)	Top-5 acc 52.734 (53.722)	lr 0.00342
Warmup Train [34][860/3239]	Time 0.189 (0.249)	Data 0.001 (0.025)	Loss 4.1282 (4.0363)	Top-1 acc 30.078 (30.427)	Top-5 acc 50.000 (53.725)	lr 0.00342
Warmup Train [34][870/3239]	Time 0.271 (0.248)	Data 0.003 (0.024)	Loss 4.1948 (4.0363)	Top-1 acc 27.734 (30.430)	Top-5 acc 49.609 (53.726)	lr 0.00341
Warmup Train [34][880/3239]	Time 0.195 (0.248)	Data 0.001 (0.024)	Loss 4.0348 (4.0361)	Top-1 acc 26.953 (30.436)	Top-5 acc 55.078 (53.723)	lr 0.00341
Warmup Train [34][890/3239]	Time 0.202 (0.248)	Data 0.001 (0.024)	Loss 4.1396 (4.0363)	Top-1 acc 27.344 (30.432)	Top-5 acc 50.781 (53.716)	lr 0.00341
Warmup Train [34][900/3239]	Time 0.209 (0.247)	Data 0.001 (0.024)	Loss 3.9113 (4.0362)	Top-1 acc 33.984 (30.435)	Top-5 acc 58.203 (53.730)	lr 0.00340
Warmup Train [34][910/3239]	Time 0.223 (0.247)	Data 0.001 (0.024)	Loss 3.9501 (4.0364)	Top-1 acc 30.078 (30.438)	Top-5 acc 57.031 (53.736)	lr 0.00340
Warmup Train [34][920/3239]	Time 0.227 (0.247)	Data 0.001 (0.023)	Loss 4.0197 (4.0363)	Top-1 acc 28.516 (30.435)	Top-5 acc 53.516 (53.754)	lr 0.00340
Warmup Train [34][930/3239]	Time 0.221 (0.247)	Data 0.001 (0.023)	Loss 4.0612 (4.0359)	Top-1 acc 32.422 (30.435)	Top-5 acc 50.000 (53.759)	lr 0.00339
Warmup Train [34][940/3239]	Time 0.213 (0.247)	Data 0.001 (0.023)	Loss 3.7637 (4.0354)	Top-1 acc 35.547 (30.443)	Top-5 acc 58.594 (53.775)	lr 0.00339
Warmup Train [34][950/3239]	Time 0.234 (0.246)	Data 0.001 (0.023)	Loss 4.1612 (4.0361)	Top-1 acc 24.609 (30.415)	Top-5 acc 50.781 (53.765)	lr 0.00339
Warmup Train [34][960/3239]	Time 0.330 (0.246)	Data 0.001 (0.022)	Loss 4.0660 (4.0360)	Top-1 acc 32.422 (30.423)	Top-5 acc 53.516 (53.780)	lr 0.00339
Warmup Train [34][970/3239]	Time 0.312 (0.246)	Data 0.002 (0.022)	Loss 4.1997 (4.0357)	Top-1 acc 27.344 (30.433)	Top-5 acc 52.344 (53.785)	lr 0.00338
Warmup Train [34][980/3239]	Time 0.283 (0.246)	Data 0.001 (0.022)	Loss 3.9717 (4.0358)	Top-1 acc 30.078 (30.438)	Top-5 acc 54.297 (53.783)	lr 0.00338
Warmup Train [34][990/3239]	Time 0.228 (0.245)	Data 0.001 (0.022)	Loss 3.9450 (4.0360)	Top-1 acc 32.422 (30.435)	Top-5 acc 57.031 (53.776)	lr 0.00338
Warmup Train [34][1000/3239]	Time 0.204 (0.245)	Data 0.001 (0.022)	Loss 3.8831 (4.0349)	Top-1 acc 36.719 (30.460)	Top-5 acc 56.641 (53.805)	lr 0.00337
Warmup Train [34][1010/3239]	Time 0.197 (0.245)	Data 0.001 (0.021)	Loss 4.0852 (4.0351)	Top-1 acc 29.297 (30.447)	Top-5 acc 54.297 (53.807)	lr 0.00337
Warmup Train [34][1020/3239]	Time 0.184 (0.245)	Data 0.001 (0.021)	Loss 4.1179 (4.0350)	Top-1 acc 27.344 (30.444)	Top-5 acc 54.297 (53.809)	lr 0.00337
Warmup Train [34][1030/3239]	Time 0.184 (0.244)	Data 0.001 (0.021)	Loss 4.1368 (4.0353)	Top-1 acc 26.562 (30.429)	Top-5 acc 51.953 (53.806)	lr 0.00336
Warmup Train [34][1040/3239]	Time 0.178 (0.244)	Data 0.001 (0.021)	Loss 3.9341 (4.0343)	Top-1 acc 30.859 (30.439)	Top-5 acc 55.469 (53.826)	lr 0.00336
Warmup Train [34][1050/3239]	Time 0.261 (0.244)	Data 0.001 (0.021)	Loss 4.0131 (4.0340)	Top-1 acc 32.812 (30.443)	Top-5 acc 52.734 (53.833)	lr 0.00336
Warmup Train [34][1060/3239]	Time 0.218 (0.244)	Data 0.001 (0.020)	Loss 3.9660 (4.0338)	Top-1 acc 33.203 (30.439)	Top-5 acc 53.125 (53.841)	lr 0.00336
Warmup Train [34][1070/3239]	Time 0.342 (0.244)	Data 0.001 (0.020)	Loss 3.8953 (4.0334)	Top-1 acc 32.422 (30.443)	Top-5 acc 58.203 (53.854)	lr 0.00335
Warmup Train [34][1080/3239]	Time 0.351 (0.244)	Data 0.002 (0.020)	Loss 3.9727 (4.0332)	Top-1 acc 32.031 (30.456)	Top-5 acc 54.688 (53.859)	lr 0.00335
Warmup Train [34][1090/3239]	Time 0.132 (0.243)	Data 0.001 (0.020)	Loss 4.2160 (4.0334)	Top-1 acc 24.219 (30.456)	Top-5 acc 48.438 (53.858)	lr 0.00335
Warmup Train [34][1100/3239]	Time 0.237 (0.243)	Data 0.001 (0.020)	Loss 4.0195 (4.0336)	Top-1 acc 29.297 (30.453)	Top-5 acc 55.078 (53.856)	lr 0.00334
Warmup Train [34][1110/3239]	Time 0.244 (0.243)	Data 0.001 (0.020)	Loss 3.9473 (4.0333)	Top-1 acc 32.031 (30.456)	Top-5 acc 54.688 (53.861)	lr 0.00334
Warmup Train [34][1120/3239]	Time 0.195 (0.243)	Data 0.001 (0.020)	Loss 4.0295 (4.0334)	Top-1 acc 35.547 (30.452)	Top-5 acc 52.734 (53.860)	lr 0.00334
Warmup Train [34][1130/3239]	Time 0.210 (0.243)	Data 0.001 (0.019)	Loss 4.0148 (4.0332)	Top-1 acc 32.031 (30.457)	Top-5 acc 54.297 (53.871)	lr 0.00333
Warmup Train [34][1140/3239]	Time 0.156 (0.242)	Data 0.002 (0.019)	Loss 3.9765 (4.0328)	Top-1 acc 31.250 (30.464)	Top-5 acc 54.688 (53.883)	lr 0.00333
Warmup Train [34][1150/3239]	Time 0.167 (0.242)	Data 0.001 (0.019)	Loss 3.9680 (4.0329)	Top-1 acc 31.250 (30.469)	Top-5 acc 58.594 (53.889)	lr 0.00333
Warmup Train [34][1160/3239]	Time 0.206 (0.242)	Data 0.001 (0.019)	Loss 4.2188 (4.0329)	Top-1 acc 27.734 (30.461)	Top-5 acc 51.562 (53.888)	lr 0.00333
Warmup Train [34][1170/3239]	Time 0.244 (0.242)	Data 0.001 (0.019)	Loss 4.0001 (4.0327)	Top-1 acc 32.422 (30.460)	Top-5 acc 53.906 (53.895)	lr 0.00332
Warmup Train [34][1180/3239]	Time 0.291 (0.242)	Data 0.001 (0.019)	Loss 3.9401 (4.0328)	Top-1 acc 30.078 (30.460)	Top-5 acc 57.422 (53.886)	lr 0.00332
Warmup Train [34][1190/3239]	Time 0.259 (0.242)	Data 0.001 (0.019)	Loss 3.9347 (4.0330)	Top-1 acc 32.422 (30.455)	Top-5 acc 60.156 (53.891)	lr 0.00332
Warmup Train [34][1200/3239]	Time 0.260 (0.242)	Data 0.001 (0.018)	Loss 4.0633 (4.0328)	Top-1 acc 31.250 (30.465)	Top-5 acc 54.688 (53.895)	lr 0.00331
Warmup Train [34][1210/3239]	Time 0.289 (0.241)	Data 0.001 (0.018)	Loss 3.9446 (4.0327)	Top-1 acc 31.641 (30.463)	Top-5 acc 54.297 (53.896)	lr 0.00331
Warmup Train [34][1220/3239]	Time 0.214 (0.241)	Data 0.002 (0.018)	Loss 4.0076 (4.0322)	Top-1 acc 33.594 (30.474)	Top-5 acc 55.078 (53.916)	lr 0.00331
Warmup Train [34][1230/3239]	Time 0.332 (0.241)	Data 0.001 (0.018)	Loss 3.9650 (4.0323)	Top-1 acc 33.984 (30.473)	Top-5 acc 55.078 (53.921)	lr 0.00330
Warmup Train [34][1240/3239]	Time 0.213 (0.241)	Data 0.001 (0.018)	Loss 4.1899 (4.0326)	Top-1 acc 25.781 (30.467)	Top-5 acc 48.828 (53.919)	lr 0.00330
Warmup Train [34][1250/3239]	Time 0.198 (0.241)	Data 0.001 (0.018)	Loss 3.8989 (4.0322)	Top-1 acc 30.469 (30.467)	Top-5 acc 57.031 (53.931)	lr 0.00330
Warmup Train [34][1260/3239]	Time 0.210 (0.241)	Data 0.002 (0.018)	Loss 3.8492 (4.0326)	Top-1 acc 32.422 (30.460)	Top-5 acc 55.859 (53.921)	lr 0.00330
Warmup Train [34][1270/3239]	Time 0.250 (0.241)	Data 0.001 (0.017)	Loss 3.9515 (4.0325)	Top-1 acc 32.422 (30.464)	Top-5 acc 53.516 (53.914)	lr 0.00329
Warmup Train [34][1280/3239]	Time 0.181 (0.240)	Data 0.001 (0.017)	Loss 4.1918 (4.0320)	Top-1 acc 26.953 (30.466)	Top-5 acc 49.219 (53.926)	lr 0.00329
Warmup Train [34][1290/3239]	Time 0.320 (0.240)	Data 0.001 (0.017)	Loss 4.2293 (4.0322)	Top-1 acc 23.047 (30.459)	Top-5 acc 48.047 (53.921)	lr 0.00329
Warmup Train [34][1300/3239]	Time 0.196 (0.240)	Data 0.001 (0.017)	Loss 4.0772 (4.0325)	Top-1 acc 29.688 (30.452)	Top-5 acc 53.906 (53.907)	lr 0.00328
Warmup Train [34][1310/3239]	Time 0.225 (0.240)	Data 0.002 (0.017)	Loss 4.2655 (4.0323)	Top-1 acc 25.000 (30.448)	Top-5 acc 45.312 (53.907)	lr 0.00328
Warmup Train [34][1320/3239]	Time 0.216 (0.240)	Data 0.002 (0.017)	Loss 3.7450 (4.0321)	Top-1 acc 35.938 (30.455)	Top-5 acc 61.719 (53.912)	lr 0.00328
Warmup Train [34][1330/3239]	Time 0.218 (0.240)	Data 0.002 (0.017)	Loss 4.0541 (4.0322)	Top-1 acc 28.125 (30.455)	Top-5 acc 53.906 (53.915)	lr 0.00327
Warmup Train [34][1340/3239]	Time 0.227 (0.240)	Data 0.002 (0.017)	Loss 3.9537 (4.0322)	Top-1 acc 36.328 (30.450)	Top-5 acc 57.031 (53.912)	lr 0.00327
Warmup Train [34][1350/3239]	Time 0.259 (0.240)	Data 0.002 (0.017)	Loss 3.9665 (4.0320)	Top-1 acc 32.031 (30.457)	Top-5 acc 58.984 (53.921)	lr 0.00327
Warmup Train [34][1360/3239]	Time 0.201 (0.239)	Data 0.001 (0.017)	Loss 3.8505 (4.0317)	Top-1 acc 34.375 (30.466)	Top-5 acc 58.203 (53.928)	lr 0.00327
Warmup Train [34][1370/3239]	Time 0.264 (0.239)	Data 0.001 (0.016)	Loss 3.9541 (4.0320)	Top-1 acc 28.906 (30.448)	Top-5 acc 52.344 (53.918)	lr 0.00326
Warmup Train [34][1380/3239]	Time 0.251 (0.239)	Data 0.001 (0.016)	Loss 4.0418 (4.0318)	Top-1 acc 32.031 (30.451)	Top-5 acc 55.078 (53.922)	lr 0.00326
Warmup Train [34][1390/3239]	Time 0.185 (0.239)	Data 0.001 (0.016)	Loss 4.0318 (4.0321)	Top-1 acc 32.031 (30.447)	Top-5 acc 54.688 (53.916)	lr 0.00326
Warmup Train [34][1400/3239]	Time 0.270 (0.239)	Data 0.002 (0.016)	Loss 4.0798 (4.0323)	Top-1 acc 29.688 (30.451)	Top-5 acc 53.516 (53.908)	lr 0.00325
Warmup Train [34][1410/3239]	Time 0.221 (0.239)	Data 0.001 (0.016)	Loss 4.0027 (4.0322)	Top-1 acc 29.297 (30.452)	Top-5 acc 54.688 (53.910)	lr 0.00325
Warmup Train [34][1420/3239]	Time 0.232 (0.238)	Data 0.001 (0.016)	Loss 4.0138 (4.0320)	Top-1 acc 33.203 (30.456)	Top-5 acc 54.688 (53.913)	lr 0.00325
Warmup Train [34][1430/3239]	Time 0.206 (0.238)	Data 0.001 (0.016)	Loss 3.8695 (4.0321)	Top-1 acc 35.547 (30.447)	Top-5 acc 57.031 (53.908)	lr 0.00324
Warmup Train [34][1440/3239]	Time 0.230 (0.238)	Data 0.001 (0.016)	Loss 4.1191 (4.0322)	Top-1 acc 27.734 (30.445)	Top-5 acc 51.953 (53.904)	lr 0.00324
Warmup Train [34][1450/3239]	Time 0.231 (0.238)	Data 0.001 (0.016)	Loss 3.7285 (4.0320)	Top-1 acc 35.547 (30.445)	Top-5 acc 58.594 (53.899)	lr 0.00324
Warmup Train [34][1460/3239]	Time 0.159 (0.238)	Data 0.002 (0.016)	Loss 4.0524 (4.0318)	Top-1 acc 31.250 (30.454)	Top-5 acc 54.297 (53.907)	lr 0.00324
Warmup Train [34][1470/3239]	Time 0.271 (0.238)	Data 0.001 (0.015)	Loss 4.1578 (4.0319)	Top-1 acc 22.656 (30.451)	Top-5 acc 52.344 (53.908)	lr 0.00323
Warmup Train [34][1480/3239]	Time 0.197 (0.238)	Data 0.002 (0.015)	Loss 4.1127 (4.0316)	Top-1 acc 30.469 (30.457)	Top-5 acc 54.297 (53.913)	lr 0.00323
Warmup Train [34][1490/3239]	Time 0.340 (0.238)	Data 0.001 (0.015)	Loss 3.9842 (4.0315)	Top-1 acc 27.344 (30.460)	Top-5 acc 52.734 (53.917)	lr 0.00323
Warmup Train [34][1500/3239]	Time 0.205 (0.238)	Data 0.001 (0.015)	Loss 4.0400 (4.0317)	Top-1 acc 30.078 (30.455)	Top-5 acc 51.562 (53.919)	lr 0.00322
Warmup Train [34][1510/3239]	Time 0.241 (0.237)	Data 0.001 (0.015)	Loss 3.9543 (4.0317)	Top-1 acc 30.469 (30.448)	Top-5 acc 55.469 (53.925)	lr 0.00322
Warmup Train [34][1520/3239]	Time 0.184 (0.237)	Data 0.001 (0.015)	Loss 4.1446 (4.0316)	Top-1 acc 27.344 (30.456)	Top-5 acc 51.172 (53.933)	lr 0.00322
Warmup Train [34][1530/3239]	Time 0.194 (0.237)	Data 0.001 (0.015)	Loss 4.1894 (4.0318)	Top-1 acc 26.172 (30.454)	Top-5 acc 54.297 (53.933)	lr 0.00321
Warmup Train [34][1540/3239]	Time 0.252 (0.237)	Data 0.001 (0.015)	Loss 3.9588 (4.0320)	Top-1 acc 32.422 (30.448)	Top-5 acc 57.812 (53.930)	lr 0.00321
Warmup Train [34][1550/3239]	Time 0.188 (0.237)	Data 0.002 (0.015)	Loss 3.9526 (4.0317)	Top-1 acc 32.422 (30.462)	Top-5 acc 55.078 (53.936)	lr 0.00321
Warmup Train [34][1560/3239]	Time 0.227 (0.237)	Data 0.001 (0.015)	Loss 4.1203 (4.0316)	Top-1 acc 26.172 (30.460)	Top-5 acc 53.906 (53.942)	lr 0.00321
Warmup Train [34][1570/3239]	Time 0.184 (0.237)	Data 0.001 (0.015)	Loss 4.2499 (4.0315)	Top-1 acc 26.953 (30.464)	Top-5 acc 50.000 (53.949)	lr 0.00320
Warmup Train [34][1580/3239]	Time 0.389 (0.237)	Data 0.001 (0.015)	Loss 3.9427 (4.0312)	Top-1 acc 34.375 (30.467)	Top-5 acc 53.516 (53.956)	lr 0.00320
Warmup Train [34][1590/3239]	Time 0.221 (0.237)	Data 0.001 (0.014)	Loss 3.9466 (4.0308)	Top-1 acc 30.859 (30.473)	Top-5 acc 55.469 (53.963)	lr 0.00320
Warmup Train [34][1600/3239]	Time 0.148 (0.236)	Data 0.001 (0.014)	Loss 3.7687 (4.0307)	Top-1 acc 38.281 (30.476)	Top-5 acc 57.812 (53.963)	lr 0.00319
Warmup Train [34][1610/3239]	Time 0.268 (0.236)	Data 0.001 (0.014)	Loss 4.1595 (4.0311)	Top-1 acc 29.297 (30.471)	Top-5 acc 49.219 (53.958)	lr 0.00319
Warmup Train [34][1620/3239]	Time 0.173 (0.236)	Data 0.004 (0.014)	Loss 3.9684 (4.0311)	Top-1 acc 31.250 (30.468)	Top-5 acc 55.078 (53.956)	lr 0.00319
Warmup Train [34][1630/3239]	Time 0.222 (0.236)	Data 0.001 (0.014)	Loss 4.1815 (4.0313)	Top-1 acc 27.344 (30.468)	Top-5 acc 49.609 (53.950)	lr 0.00318
Warmup Train [34][1640/3239]	Time 0.143 (0.236)	Data 0.001 (0.014)	Loss 4.1871 (4.0312)	Top-1 acc 26.562 (30.465)	Top-5 acc 50.781 (53.946)	lr 0.00318
Warmup Train [34][1650/3239]	Time 0.218 (0.236)	Data 0.001 (0.014)	Loss 4.1908 (4.0313)	Top-1 acc 30.859 (30.470)	Top-5 acc 50.781 (53.944)	lr 0.00318
Warmup Train [34][1660/3239]	Time 0.165 (0.236)	Data 0.001 (0.014)	Loss 4.0301 (4.0310)	Top-1 acc 30.078 (30.482)	Top-5 acc 53.125 (53.950)	lr 0.00318
Warmup Train [34][1670/3239]	Time 0.188 (0.236)	Data 0.002 (0.014)	Loss 3.8255 (4.0305)	Top-1 acc 36.328 (30.500)	Top-5 acc 58.984 (53.962)	lr 0.00317
Warmup Train [34][1680/3239]	Time 0.300 (0.236)	Data 0.001 (0.014)	Loss 4.0581 (4.0307)	Top-1 acc 29.688 (30.496)	Top-5 acc 51.562 (53.953)	lr 0.00317
Warmup Train [34][1690/3239]	Time 0.229 (0.236)	Data 0.001 (0.014)	Loss 4.0824 (4.0307)	Top-1 acc 28.125 (30.491)	Top-5 acc 53.906 (53.949)	lr 0.00317
Warmup Train [34][1700/3239]	Time 0.175 (0.236)	Data 0.002 (0.014)	Loss 4.3045 (4.0314)	Top-1 acc 26.953 (30.483)	Top-5 acc 47.656 (53.936)	lr 0.00316
Warmup Train [34][1710/3239]	Time 0.264 (0.236)	Data 0.001 (0.014)	Loss 3.9660 (4.0311)	Top-1 acc 28.516 (30.495)	Top-5 acc 54.688 (53.944)	lr 0.00316
Warmup Train [34][1720/3239]	Time 0.202 (0.235)	Data 0.001 (0.014)	Loss 4.0281 (4.0313)	Top-1 acc 30.078 (30.495)	Top-5 acc 52.344 (53.936)	lr 0.00316
Warmup Train [34][1730/3239]	Time 0.214 (0.235)	Data 0.001 (0.014)	Loss 4.1455 (4.0313)	Top-1 acc 30.859 (30.492)	Top-5 acc 51.953 (53.930)	lr 0.00316
Warmup Train [34][1740/3239]	Time 0.278 (0.235)	Data 0.001 (0.013)	Loss 4.0288 (4.0313)	Top-1 acc 28.516 (30.490)	Top-5 acc 53.906 (53.929)	lr 0.00315
Warmup Train [34][1750/3239]	Time 0.243 (0.235)	Data 0.001 (0.013)	Loss 3.9098 (4.0314)	Top-1 acc 33.203 (30.490)	Top-5 acc 57.422 (53.929)	lr 0.00315
Warmup Train [34][1760/3239]	Time 0.201 (0.235)	Data 0.001 (0.013)	Loss 4.0368 (4.0316)	Top-1 acc 29.297 (30.483)	Top-5 acc 56.250 (53.927)	lr 0.00315
Warmup Train [34][1770/3239]	Time 0.199 (0.235)	Data 0.001 (0.013)	Loss 3.9409 (4.0314)	Top-1 acc 30.469 (30.487)	Top-5 acc 57.812 (53.935)	lr 0.00314
Warmup Train [34][1780/3239]	Time 0.343 (0.235)	Data 0.001 (0.013)	Loss 4.0357 (4.0312)	Top-1 acc 28.125 (30.489)	Top-5 acc 52.734 (53.939)	lr 0.00314
Warmup Train [34][1790/3239]	Time 0.302 (0.235)	Data 0.001 (0.013)	Loss 4.1473 (4.0313)	Top-1 acc 26.562 (30.490)	Top-5 acc 53.125 (53.939)	lr 0.00314
Warmup Train [34][1800/3239]	Time 0.242 (0.235)	Data 0.001 (0.013)	Loss 3.8920 (4.0314)	Top-1 acc 32.422 (30.489)	Top-5 acc 53.906 (53.932)	lr 0.00313
Warmup Train [34][1810/3239]	Time 0.216 (0.235)	Data 0.001 (0.013)	Loss 4.1989 (4.0315)	Top-1 acc 26.172 (30.484)	Top-5 acc 50.000 (53.937)	lr 0.00313
Warmup Train [34][1820/3239]	Time 0.166 (0.235)	Data 0.001 (0.013)	Loss 4.0323 (4.0312)	Top-1 acc 29.297 (30.491)	Top-5 acc 50.000 (53.941)	lr 0.00313
Warmup Train [34][1830/3239]	Time 0.211 (0.235)	Data 0.001 (0.013)	Loss 3.8760 (4.0312)	Top-1 acc 33.594 (30.496)	Top-5 acc 57.422 (53.943)	lr 0.00313
Warmup Train [34][1840/3239]	Time 0.211 (0.235)	Data 0.001 (0.013)	Loss 4.0626 (4.0312)	Top-1 acc 29.688 (30.494)	Top-5 acc 53.516 (53.941)	lr 0.00312
Warmup Train [34][1850/3239]	Time 0.162 (0.234)	Data 0.001 (0.013)	Loss 4.2405 (4.0314)	Top-1 acc 27.734 (30.491)	Top-5 acc 52.344 (53.936)	lr 0.00312
Warmup Train [34][1860/3239]	Time 0.228 (0.235)	Data 0.001 (0.013)	Loss 4.0408 (4.0315)	Top-1 acc 30.469 (30.487)	Top-5 acc 53.125 (53.928)	lr 0.00312
Warmup Train [34][1870/3239]	Time 0.251 (0.234)	Data 0.002 (0.013)	Loss 3.8611 (4.0315)	Top-1 acc 33.984 (30.489)	Top-5 acc 59.375 (53.926)	lr 0.00311
Warmup Train [34][1880/3239]	Time 0.333 (0.234)	Data 0.001 (0.013)	Loss 3.9175 (4.0313)	Top-1 acc 33.594 (30.495)	Top-5 acc 57.031 (53.928)	lr 0.00311
Warmup Train [34][1890/3239]	Time 0.207 (0.234)	Data 0.001 (0.013)	Loss 4.2588 (4.0317)	Top-1 acc 27.344 (30.488)	Top-5 acc 49.609 (53.926)	lr 0.00311
Warmup Train [34][1900/3239]	Time 0.179 (0.234)	Data 0.001 (0.013)	Loss 4.1376 (4.0317)	Top-1 acc 27.734 (30.489)	Top-5 acc 50.781 (53.928)	lr 0.00311
Warmup Train [34][1910/3239]	Time 0.145 (0.234)	Data 0.001 (0.013)	Loss 4.2104 (4.0317)	Top-1 acc 28.516 (30.485)	Top-5 acc 48.438 (53.925)	lr 0.00310
Warmup Train [34][1920/3239]	Time 0.193 (0.234)	Data 0.001 (0.012)	Loss 3.9375 (4.0317)	Top-1 acc 33.203 (30.483)	Top-5 acc 57.031 (53.928)	lr 0.00310
Warmup Train [34][1930/3239]	Time 0.276 (0.234)	Data 0.046 (0.012)	Loss 4.0859 (4.0316)	Top-1 acc 30.469 (30.485)	Top-5 acc 50.781 (53.931)	lr 0.00310
Warmup Train [34][1940/3239]	Time 0.167 (0.234)	Data 0.001 (0.012)	Loss 4.1593 (4.0318)	Top-1 acc 28.906 (30.485)	Top-5 acc 51.172 (53.926)	lr 0.00309
Warmup Train [34][1950/3239]	Time 0.230 (0.234)	Data 0.001 (0.012)	Loss 4.0399 (4.0316)	Top-1 acc 29.688 (30.486)	Top-5 acc 53.125 (53.928)	lr 0.00309
Warmup Train [34][1960/3239]	Time 0.231 (0.234)	Data 0.001 (0.012)	Loss 3.8733 (4.0317)	Top-1 acc 31.250 (30.488)	Top-5 acc 58.984 (53.928)	lr 0.00309
Warmup Train [34][1970/3239]	Time 0.190 (0.234)	Data 0.001 (0.012)	Loss 4.1643 (4.0319)	Top-1 acc 28.516 (30.487)	Top-5 acc 51.562 (53.923)	lr 0.00308
Warmup Train [34][1980/3239]	Time 0.199 (0.234)	Data 0.001 (0.012)	Loss 4.0194 (4.0320)	Top-1 acc 28.906 (30.482)	Top-5 acc 55.078 (53.922)	lr 0.00308
Warmup Train [34][1990/3239]	Time 0.224 (0.234)	Data 0.001 (0.012)	Loss 4.2465 (4.0318)	Top-1 acc 30.469 (30.489)	Top-5 acc 49.609 (53.926)	lr 0.00308
Warmup Train [34][2000/3239]	Time 0.304 (0.234)	Data 0.001 (0.012)	Loss 4.0305 (4.0319)	Top-1 acc 32.422 (30.490)	Top-5 acc 53.906 (53.925)	lr 0.00308
Warmup Train [34][2010/3239]	Time 0.223 (0.234)	Data 0.002 (0.012)	Loss 3.9442 (4.0322)	Top-1 acc 31.641 (30.485)	Top-5 acc 53.125 (53.913)	lr 0.00307
Warmup Train [34][2020/3239]	Time 0.257 (0.234)	Data 0.002 (0.012)	Loss 4.0001 (4.0322)	Top-1 acc 30.859 (30.482)	Top-5 acc 55.469 (53.911)	lr 0.00307
Warmup Train [34][2030/3239]	Time 0.235 (0.234)	Data 0.001 (0.012)	Loss 4.1683 (4.0320)	Top-1 acc 28.125 (30.489)	Top-5 acc 49.609 (53.915)	lr 0.00307
Warmup Train [34][2040/3239]	Time 0.201 (0.234)	Data 0.001 (0.012)	Loss 4.0227 (4.0321)	Top-1 acc 30.469 (30.487)	Top-5 acc 52.734 (53.910)	lr 0.00306
Warmup Train [34][2050/3239]	Time 0.167 (0.233)	Data 0.001 (0.012)	Loss 4.0592 (4.0320)	Top-1 acc 30.469 (30.486)	Top-5 acc 50.781 (53.908)	lr 0.00306
Warmup Train [34][2060/3239]	Time 0.211 (0.233)	Data 0.001 (0.012)	Loss 4.1961 (4.0319)	Top-1 acc 29.297 (30.496)	Top-5 acc 54.297 (53.915)	lr 0.00306
Warmup Train [34][2070/3239]	Time 0.162 (0.233)	Data 0.002 (0.012)	Loss 4.2652 (4.0320)	Top-1 acc 20.703 (30.490)	Top-5 acc 47.656 (53.915)	lr 0.00306
Warmup Train [34][2080/3239]	Time 0.298 (0.233)	Data 0.001 (0.012)	Loss 3.8827 (4.0318)	Top-1 acc 37.109 (30.494)	Top-5 acc 55.469 (53.917)	lr 0.00305
Warmup Train [34][2090/3239]	Time 0.364 (0.233)	Data 0.001 (0.012)	Loss 3.7604 (4.0318)	Top-1 acc 35.938 (30.493)	Top-5 acc 60.547 (53.917)	lr 0.00305
Warmup Train [34][2100/3239]	Time 0.274 (0.233)	Data 0.002 (0.012)	Loss 3.9376 (4.0318)	Top-1 acc 32.031 (30.493)	Top-5 acc 55.469 (53.922)	lr 0.00305
Warmup Train [34][2110/3239]	Time 0.228 (0.233)	Data 0.001 (0.012)	Loss 3.9931 (4.0319)	Top-1 acc 33.984 (30.493)	Top-5 acc 52.734 (53.921)	lr 0.00304
Warmup Train [34][2120/3239]	Time 0.210 (0.233)	Data 0.001 (0.012)	Loss 4.0608 (4.0319)	Top-1 acc 33.594 (30.496)	Top-5 acc 55.469 (53.921)	lr 0.00304
Warmup Train [34][2130/3239]	Time 0.228 (0.233)	Data 0.001 (0.012)	Loss 4.2332 (4.0320)	Top-1 acc 28.125 (30.502)	Top-5 acc 49.609 (53.921)	lr 0.00304
Warmup Train [34][2140/3239]	Time 0.199 (0.233)	Data 0.001 (0.012)	Loss 4.0088 (4.0319)	Top-1 acc 30.078 (30.505)	Top-5 acc 56.641 (53.922)	lr 0.00304
Warmup Train [34][2150/3239]	Time 0.197 (0.233)	Data 0.001 (0.012)	Loss 4.0492 (4.0323)	Top-1 acc 29.688 (30.498)	Top-5 acc 53.125 (53.916)	lr 0.00303
Warmup Train [34][2160/3239]	Time 0.249 (0.233)	Data 0.001 (0.012)	Loss 4.0407 (4.0325)	Top-1 acc 32.812 (30.495)	Top-5 acc 53.125 (53.910)	lr 0.00303
Warmup Train [34][2170/3239]	Time 0.197 (0.233)	Data 0.001 (0.011)	Loss 3.9087 (4.0323)	Top-1 acc 33.203 (30.498)	Top-5 acc 56.250 (53.911)	lr 0.00303
Warmup Train [34][2180/3239]	Time 0.240 (0.233)	Data 0.001 (0.011)	Loss 3.8342 (4.0321)	Top-1 acc 32.031 (30.500)	Top-5 acc 59.766 (53.922)	lr 0.00302
Warmup Train [34][2190/3239]	Time 0.348 (0.233)	Data 0.001 (0.011)	Loss 4.0528 (4.0322)	Top-1 acc 28.516 (30.499)	Top-5 acc 55.469 (53.921)	lr 0.00302
Warmup Train [34][2200/3239]	Time 0.190 (0.233)	Data 0.001 (0.011)	Loss 4.2011 (4.0323)	Top-1 acc 26.172 (30.489)	Top-5 acc 51.562 (53.918)	lr 0.00302
Warmup Train [34][2210/3239]	Time 0.196 (0.233)	Data 0.001 (0.011)	Loss 4.0613 (4.0320)	Top-1 acc 30.078 (30.495)	Top-5 acc 52.344 (53.922)	lr 0.00302
Warmup Train [34][2220/3239]	Time 0.233 (0.233)	Data 0.001 (0.011)	Loss 4.1097 (4.0320)	Top-1 acc 30.469 (30.496)	Top-5 acc 49.609 (53.920)	lr 0.00301
Warmup Train [34][2230/3239]	Time 0.216 (0.233)	Data 0.001 (0.011)	Loss 4.0035 (4.0319)	Top-1 acc 31.250 (30.497)	Top-5 acc 53.906 (53.923)	lr 0.00301
Warmup Train [34][2240/3239]	Time 0.219 (0.233)	Data 0.001 (0.011)	Loss 4.2868 (4.0320)	Top-1 acc 26.172 (30.491)	Top-5 acc 45.703 (53.921)	lr 0.00301
Warmup Train [34][2250/3239]	Time 0.224 (0.232)	Data 0.001 (0.011)	Loss 3.8397 (4.0318)	Top-1 acc 34.375 (30.498)	Top-5 acc 55.859 (53.924)	lr 0.00300
Warmup Train [34][2260/3239]	Time 0.230 (0.232)	Data 0.001 (0.011)	Loss 4.0705 (4.0321)	Top-1 acc 27.344 (30.488)	Top-5 acc 50.391 (53.915)	lr 0.00300
Warmup Train [34][2270/3239]	Time 0.217 (0.232)	Data 0.001 (0.011)	Loss 4.0451 (4.0321)	Top-1 acc 34.375 (30.493)	Top-5 acc 51.953 (53.915)	lr 0.00300
Warmup Train [34][2280/3239]	Time 0.187 (0.232)	Data 0.001 (0.011)	Loss 4.1289 (4.0320)	Top-1 acc 29.297 (30.491)	Top-5 acc 51.172 (53.914)	lr 0.00299
Warmup Train [34][2290/3239]	Time 0.200 (0.232)	Data 0.002 (0.011)	Loss 4.0252 (4.0317)	Top-1 acc 27.344 (30.491)	Top-5 acc 51.562 (53.923)	lr 0.00299
Warmup Train [34][2300/3239]	Time 0.313 (0.232)	Data 0.002 (0.011)	Loss 3.9478 (4.0317)	Top-1 acc 30.859 (30.490)	Top-5 acc 55.078 (53.923)	lr 0.00299
Warmup Train [34][2310/3239]	Time 0.189 (0.232)	Data 0.001 (0.011)	Loss 3.8990 (4.0316)	Top-1 acc 33.203 (30.489)	Top-5 acc 54.688 (53.923)	lr 0.00299
Warmup Train [34][2320/3239]	Time 0.165 (0.232)	Data 0.001 (0.011)	Loss 4.2980 (4.0320)	Top-1 acc 25.391 (30.481)	Top-5 acc 47.266 (53.913)	lr 0.00298
Warmup Train [34][2330/3239]	Time 0.191 (0.232)	Data 0.001 (0.011)	Loss 3.9869 (4.0319)	Top-1 acc 37.109 (30.485)	Top-5 acc 56.250 (53.918)	lr 0.00298
Warmup Train [34][2340/3239]	Time 0.172 (0.232)	Data 0.001 (0.011)	Loss 4.1078 (4.0319)	Top-1 acc 30.078 (30.486)	Top-5 acc 55.078 (53.923)	lr 0.00298
Warmup Train [34][2350/3239]	Time 0.154 (0.232)	Data 0.001 (0.011)	Loss 3.9887 (4.0318)	Top-1 acc 28.516 (30.488)	Top-5 acc 56.250 (53.925)	lr 0.00297
Warmup Train [34][2360/3239]	Time 0.171 (0.232)	Data 0.001 (0.011)	Loss 4.2051 (4.0318)	Top-1 acc 30.078 (30.489)	Top-5 acc 48.828 (53.924)	lr 0.00297
Warmup Train [34][2370/3239]	Time 0.249 (0.232)	Data 0.002 (0.011)	Loss 4.1130 (4.0319)	Top-1 acc 27.344 (30.485)	Top-5 acc 54.297 (53.923)	lr 0.00297
Warmup Train [34][2380/3239]	Time 0.307 (0.232)	Data 0.001 (0.011)	Loss 3.9991 (4.0317)	Top-1 acc 30.859 (30.489)	Top-5 acc 54.297 (53.926)	lr 0.00297
Warmup Train [34][2390/3239]	Time 0.197 (0.232)	Data 0.002 (0.011)	Loss 4.1469 (4.0316)	Top-1 acc 29.297 (30.491)	Top-5 acc 53.516 (53.927)	lr 0.00296
Warmup Train [34][2400/3239]	Time 0.193 (0.232)	Data 0.001 (0.011)	Loss 3.9962 (4.0317)	Top-1 acc 30.859 (30.491)	Top-5 acc 56.250 (53.928)	lr 0.00296
Warmup Train [34][2410/3239]	Time 0.190 (0.232)	Data 0.001 (0.011)	Loss 4.0347 (4.0317)	Top-1 acc 31.641 (30.491)	Top-5 acc 53.516 (53.929)	lr 0.00296
Warmup Train [34][2420/3239]	Time 0.245 (0.232)	Data 0.001 (0.011)	Loss 4.0826 (4.0317)	Top-1 acc 32.422 (30.495)	Top-5 acc 52.734 (53.930)	lr 0.00295
Warmup Train [34][2430/3239]	Time 0.209 (0.232)	Data 0.001 (0.011)	Loss 4.0142 (4.0319)	Top-1 acc 29.297 (30.494)	Top-5 acc 50.781 (53.919)	lr 0.00295
Warmup Train [34][2440/3239]	Time 0.198 (0.232)	Data 0.001 (0.010)	Loss 4.0494 (4.0318)	Top-1 acc 29.297 (30.492)	Top-5 acc 53.906 (53.921)	lr 0.00295
Warmup Train [34][2450/3239]	Time 0.221 (0.232)	Data 0.001 (0.010)	Loss 4.0027 (4.0320)	Top-1 acc 34.375 (30.488)	Top-5 acc 57.031 (53.918)	lr 0.00295
Warmup Train [34][2460/3239]	Time 0.284 (0.232)	Data 0.001 (0.010)	Loss 3.9663 (4.0322)	Top-1 acc 29.688 (30.487)	Top-5 acc 58.984 (53.918)	lr 0.00294
Warmup Train [34][2470/3239]	Time 0.226 (0.231)	Data 0.001 (0.010)	Loss 4.0699 (4.0323)	Top-1 acc 30.078 (30.483)	Top-5 acc 53.125 (53.914)	lr 0.00294
Warmup Train [34][2480/3239]	Time 0.224 (0.231)	Data 0.001 (0.010)	Loss 4.1277 (4.0326)	Top-1 acc 28.906 (30.481)	Top-5 acc 52.734 (53.911)	lr 0.00294
Warmup Train [34][2490/3239]	Time 0.227 (0.231)	Data 0.001 (0.010)	Loss 4.0368 (4.0326)	Top-1 acc 28.516 (30.483)	Top-5 acc 50.000 (53.909)	lr 0.00293
Warmup Train [34][2500/3239]	Time 0.231 (0.231)	Data 0.001 (0.010)	Loss 3.9791 (4.0328)	Top-1 acc 32.812 (30.483)	Top-5 acc 54.688 (53.902)	lr 0.00293
Warmup Train [34][2510/3239]	Time 0.179 (0.231)	Data 0.001 (0.010)	Loss 4.0768 (4.0330)	Top-1 acc 30.078 (30.482)	Top-5 acc 54.297 (53.902)	lr 0.00293
Warmup Train [34][2520/3239]	Time 0.375 (0.231)	Data 0.001 (0.010)	Loss 3.8026 (4.0329)	Top-1 acc 35.156 (30.486)	Top-5 acc 57.031 (53.905)	lr 0.00293
Warmup Train [34][2530/3239]	Time 0.244 (0.231)	Data 0.001 (0.010)	Loss 3.9629 (4.0328)	Top-1 acc 29.297 (30.490)	Top-5 acc 55.859 (53.903)	lr 0.00292
Warmup Train [34][2540/3239]	Time 0.249 (0.231)	Data 0.001 (0.010)	Loss 3.9062 (4.0326)	Top-1 acc 31.641 (30.493)	Top-5 acc 56.641 (53.904)	lr 0.00292
Warmup Train [34][2550/3239]	Time 0.232 (0.231)	Data 0.001 (0.010)	Loss 3.9776 (4.0328)	Top-1 acc 34.375 (30.494)	Top-5 acc 57.031 (53.899)	lr 0.00292
Warmup Train [34][2560/3239]	Time 0.189 (0.231)	Data 0.001 (0.010)	Loss 3.8796 (4.0329)	Top-1 acc 32.422 (30.491)	Top-5 acc 57.031 (53.895)	lr 0.00291
Warmup Train [34][2570/3239]	Time 0.198 (0.231)	Data 0.001 (0.010)	Loss 4.1464 (4.0328)	Top-1 acc 30.859 (30.494)	Top-5 acc 52.734 (53.896)	lr 0.00291
Warmup Train [34][2580/3239]	Time 0.177 (0.231)	Data 0.001 (0.010)	Loss 4.0026 (4.0326)	Top-1 acc 28.906 (30.498)	Top-5 acc 54.297 (53.897)	lr 0.00291
Warmup Train [34][2590/3239]	Time 0.245 (0.231)	Data 0.001 (0.010)	Loss 4.1719 (4.0325)	Top-1 acc 29.297 (30.498)	Top-5 acc 53.516 (53.900)	lr 0.00291
Warmup Train [34][2600/3239]	Time 0.199 (0.231)	Data 0.001 (0.010)	Loss 4.0191 (4.0326)	Top-1 acc 27.344 (30.496)	Top-5 acc 57.422 (53.897)	lr 0.00290
Warmup Train [34][2610/3239]	Time 0.308 (0.231)	Data 0.001 (0.010)	Loss 3.9954 (4.0326)	Top-1 acc 28.516 (30.497)	Top-5 acc 55.859 (53.896)	lr 0.00290
Warmup Train [34][2620/3239]	Time 0.199 (0.231)	Data 0.002 (0.010)	Loss 3.9862 (4.0324)	Top-1 acc 28.125 (30.502)	Top-5 acc 51.953 (53.901)	lr 0.00290
Warmup Train [34][2630/3239]	Time 0.246 (0.231)	Data 0.002 (0.010)	Loss 4.0098 (4.0326)	Top-1 acc 31.641 (30.500)	Top-5 acc 54.688 (53.899)	lr 0.00290
Warmup Train [34][2640/3239]	Time 0.344 (0.231)	Data 0.001 (0.010)	Loss 4.0041 (4.0327)	Top-1 acc 31.250 (30.499)	Top-5 acc 56.641 (53.898)	lr 0.00289
Warmup Train [34][2650/3239]	Time 0.242 (0.231)	Data 0.001 (0.010)	Loss 4.2067 (4.0324)	Top-1 acc 28.906 (30.505)	Top-5 acc 51.562 (53.909)	lr 0.00289
Warmup Train [34][2660/3239]	Time 0.256 (0.231)	Data 0.001 (0.010)	Loss 4.1057 (4.0322)	Top-1 acc 28.516 (30.505)	Top-5 acc 52.734 (53.921)	lr 0.00289
Warmup Train [34][2670/3239]	Time 0.176 (0.231)	Data 0.001 (0.010)	Loss 3.8706 (4.0322)	Top-1 acc 36.719 (30.504)	Top-5 acc 54.688 (53.922)	lr 0.00288
Warmup Train [34][2680/3239]	Time 0.278 (0.231)	Data 0.001 (0.010)	Loss 4.0531 (4.0322)	Top-1 acc 33.984 (30.501)	Top-5 acc 57.812 (53.924)	lr 0.00288
Warmup Train [34][2690/3239]	Time 0.209 (0.231)	Data 0.001 (0.010)	Loss 4.1767 (4.0321)	Top-1 acc 28.906 (30.501)	Top-5 acc 48.438 (53.925)	lr 0.00288
Warmup Train [34][2700/3239]	Time 0.139 (0.231)	Data 0.001 (0.010)	Loss 4.0697 (4.0321)	Top-1 acc 29.297 (30.501)	Top-5 acc 53.906 (53.924)	lr 0.00288
Warmup Train [34][2710/3239]	Time 0.247 (0.231)	Data 0.001 (0.010)	Loss 3.8847 (4.0317)	Top-1 acc 37.109 (30.510)	Top-5 acc 58.594 (53.934)	lr 0.00287
Warmup Train [34][2720/3239]	Time 0.232 (0.231)	Data 0.002 (0.010)	Loss 3.9249 (4.0318)	Top-1 acc 33.203 (30.508)	Top-5 acc 57.422 (53.932)	lr 0.00287
Warmup Train [34][2730/3239]	Time 0.225 (0.231)	Data 0.001 (0.010)	Loss 4.0444 (4.0319)	Top-1 acc 30.859 (30.502)	Top-5 acc 52.734 (53.929)	lr 0.00287
Warmup Train [34][2740/3239]	Time 0.200 (0.231)	Data 0.001 (0.010)	Loss 3.9504 (4.0320)	Top-1 acc 28.125 (30.499)	Top-5 acc 56.641 (53.925)	lr 0.00286
Warmup Train [34][2750/3239]	Time 0.355 (0.231)	Data 0.001 (0.010)	Loss 3.8297 (4.0320)	Top-1 acc 35.156 (30.502)	Top-5 acc 60.156 (53.925)	lr 0.00286
Warmup Train [34][2760/3239]	Time 0.152 (0.230)	Data 0.001 (0.010)	Loss 4.4283 (4.0321)	Top-1 acc 21.875 (30.504)	Top-5 acc 41.406 (53.918)	lr 0.00286
Warmup Train [34][2770/3239]	Time 0.180 (0.230)	Data 0.001 (0.010)	Loss 4.1770 (4.0320)	Top-1 acc 26.562 (30.507)	Top-5 acc 48.828 (53.917)	lr 0.00286
Warmup Train [34][2780/3239]	Time 0.190 (0.230)	Data 0.001 (0.010)	Loss 4.0928 (4.0320)	Top-1 acc 28.125 (30.507)	Top-5 acc 51.172 (53.918)	lr 0.00285
Warmup Train [34][2790/3239]	Time 0.182 (0.230)	Data 0.001 (0.010)	Loss 3.9132 (4.0320)	Top-1 acc 34.375 (30.511)	Top-5 acc 53.516 (53.919)	lr 0.00285
Warmup Train [34][2800/3239]	Time 0.223 (0.230)	Data 0.001 (0.010)	Loss 4.2007 (4.0320)	Top-1 acc 25.781 (30.511)	Top-5 acc 50.781 (53.920)	lr 0.00285
Warmup Train [34][2810/3239]	Time 0.193 (0.230)	Data 0.001 (0.010)	Loss 4.0506 (4.0320)	Top-1 acc 31.250 (30.511)	Top-5 acc 56.641 (53.922)	lr 0.00284
Warmup Train [34][2820/3239]	Time 0.235 (0.230)	Data 0.001 (0.009)	Loss 3.8251 (4.0321)	Top-1 acc 33.984 (30.508)	Top-5 acc 58.594 (53.921)	lr 0.00284
Warmup Train [34][2830/3239]	Time 0.288 (0.230)	Data 0.001 (0.009)	Loss 4.0977 (4.0322)	Top-1 acc 25.391 (30.503)	Top-5 acc 51.172 (53.914)	lr 0.00284
Warmup Train [34][2840/3239]	Time 0.204 (0.230)	Data 0.002 (0.009)	Loss 4.0300 (4.0320)	Top-1 acc 30.469 (30.512)	Top-5 acc 53.125 (53.916)	lr 0.00284
Warmup Train [34][2850/3239]	Time 0.269 (0.230)	Data 0.001 (0.009)	Loss 4.0683 (4.0320)	Top-1 acc 28.906 (30.512)	Top-5 acc 52.734 (53.916)	lr 0.00283
Warmup Train [34][2860/3239]	Time 0.267 (0.230)	Data 0.001 (0.009)	Loss 4.2182 (4.0324)	Top-1 acc 24.219 (30.505)	Top-5 acc 46.094 (53.911)	lr 0.00283
Warmup Train [34][2870/3239]	Time 0.187 (0.230)	Data 0.001 (0.009)	Loss 3.8667 (4.0322)	Top-1 acc 35.938 (30.509)	Top-5 acc 56.641 (53.912)	lr 0.00283
Warmup Train [34][2880/3239]	Time 0.157 (0.230)	Data 0.002 (0.009)	Loss 4.0642 (4.0324)	Top-1 acc 30.859 (30.507)	Top-5 acc 51.172 (53.910)	lr 0.00282
Warmup Train [34][2890/3239]	Time 0.205 (0.230)	Data 0.001 (0.009)	Loss 3.9405 (4.0322)	Top-1 acc 30.078 (30.507)	Top-5 acc 57.031 (53.911)	lr 0.00282
Warmup Train [34][2900/3239]	Time 0.173 (0.230)	Data 0.001 (0.009)	Loss 4.0134 (4.0322)	Top-1 acc 32.031 (30.510)	Top-5 acc 53.906 (53.914)	lr 0.00282
Warmup Train [34][2910/3239]	Time 0.194 (0.230)	Data 0.001 (0.009)	Loss 4.0341 (4.0324)	Top-1 acc 30.078 (30.506)	Top-5 acc 56.250 (53.911)	lr 0.00282
Warmup Train [34][2920/3239]	Time 0.213 (0.230)	Data 0.001 (0.009)	Loss 3.9817 (4.0322)	Top-1 acc 31.250 (30.510)	Top-5 acc 54.688 (53.919)	lr 0.00281
Warmup Train [34][2930/3239]	Time 0.212 (0.230)	Data 0.002 (0.009)	Loss 3.9776 (4.0322)	Top-1 acc 26.172 (30.507)	Top-5 acc 55.859 (53.919)	lr 0.00281
Warmup Train [34][2940/3239]	Time 0.190 (0.230)	Data 0.001 (0.009)	Loss 3.6950 (4.0320)	Top-1 acc 36.719 (30.511)	Top-5 acc 62.109 (53.924)	lr 0.00281
Warmup Train [34][2950/3239]	Time 0.389 (0.230)	Data 0.002 (0.009)	Loss 3.9362 (4.0316)	Top-1 acc 32.812 (30.522)	Top-5 acc 56.250 (53.930)	lr 0.00281
Warmup Train [34][2960/3239]	Time 0.186 (0.230)	Data 0.001 (0.009)	Loss 4.0172 (4.0315)	Top-1 acc 32.812 (30.527)	Top-5 acc 52.734 (53.928)	lr 0.00280
Warmup Train [34][2970/3239]	Time 0.133 (0.230)	Data 0.001 (0.009)	Loss 4.3135 (4.0315)	Top-1 acc 26.562 (30.528)	Top-5 acc 49.219 (53.930)	lr 0.00280
Warmup Train [34][2980/3239]	Time 0.172 (0.230)	Data 0.001 (0.009)	Loss 4.0958 (4.0317)	Top-1 acc 25.781 (30.519)	Top-5 acc 51.562 (53.928)	lr 0.00280
Warmup Train [34][2990/3239]	Time 0.249 (0.230)	Data 0.001 (0.009)	Loss 3.9040 (4.0317)	Top-1 acc 30.859 (30.519)	Top-5 acc 57.031 (53.925)	lr 0.00279
Warmup Train [34][3000/3239]	Time 0.244 (0.230)	Data 0.001 (0.009)	Loss 4.1532 (4.0318)	Top-1 acc 32.422 (30.518)	Top-5 acc 53.125 (53.924)	lr 0.00279
Warmup Train [34][3010/3239]	Time 0.219 (0.230)	Data 0.001 (0.009)	Loss 4.2762 (4.0318)	Top-1 acc 26.172 (30.518)	Top-5 acc 47.656 (53.925)	lr 0.00279
Warmup Train [34][3020/3239]	Time 0.208 (0.230)	Data 0.002 (0.009)	Loss 3.9453 (4.0318)	Top-1 acc 33.203 (30.517)	Top-5 acc 53.125 (53.922)	lr 0.00279
Warmup Train [34][3030/3239]	Time 0.211 (0.230)	Data 0.002 (0.009)	Loss 3.9333 (4.0319)	Top-1 acc 32.031 (30.520)	Top-5 acc 57.812 (53.920)	lr 0.00278
Warmup Train [34][3040/3239]	Time 0.289 (0.230)	Data 0.001 (0.009)	Loss 3.9572 (4.0318)	Top-1 acc 29.297 (30.520)	Top-5 acc 57.031 (53.922)	lr 0.00278
Warmup Train [34][3050/3239]	Time 0.216 (0.230)	Data 0.001 (0.009)	Loss 4.1038 (4.0318)	Top-1 acc 30.859 (30.521)	Top-5 acc 53.125 (53.923)	lr 0.00278
Warmup Train [34][3060/3239]	Time 0.176 (0.230)	Data 0.001 (0.009)	Loss 4.1888 (4.0318)	Top-1 acc 25.781 (30.521)	Top-5 acc 53.125 (53.922)	lr 0.00277
Warmup Train [34][3070/3239]	Time 0.338 (0.230)	Data 0.002 (0.009)	Loss 4.0426 (4.0319)	Top-1 acc 30.078 (30.520)	Top-5 acc 53.516 (53.924)	lr 0.00277
Warmup Train [34][3080/3239]	Time 0.227 (0.230)	Data 0.001 (0.009)	Loss 4.0240 (4.0320)	Top-1 acc 28.125 (30.520)	Top-5 acc 54.297 (53.920)	lr 0.00277
Warmup Train [34][3090/3239]	Time 0.201 (0.230)	Data 0.001 (0.009)	Loss 3.9944 (4.0319)	Top-1 acc 27.734 (30.519)	Top-5 acc 55.469 (53.921)	lr 0.00277
Warmup Train [34][3100/3239]	Time 0.161 (0.230)	Data 0.001 (0.009)	Loss 4.1164 (4.0320)	Top-1 acc 31.250 (30.515)	Top-5 acc 54.297 (53.919)	lr 0.00276
Warmup Train [34][3110/3239]	Time 0.228 (0.230)	Data 0.001 (0.009)	Loss 4.0975 (4.0319)	Top-1 acc 27.734 (30.518)	Top-5 acc 54.688 (53.924)	lr 0.00276
Warmup Train [34][3120/3239]	Time 0.200 (0.230)	Data 0.001 (0.009)	Loss 4.2210 (4.0319)	Top-1 acc 26.953 (30.518)	Top-5 acc 51.562 (53.926)	lr 0.00276
Warmup Train [34][3130/3239]	Time 0.274 (0.230)	Data 0.001 (0.009)	Loss 3.9913 (4.0318)	Top-1 acc 29.688 (30.519)	Top-5 acc 54.688 (53.927)	lr 0.00276
Warmup Train [34][3140/3239]	Time 0.228 (0.230)	Data 0.002 (0.009)	Loss 4.0201 (4.0319)	Top-1 acc 32.812 (30.515)	Top-5 acc 52.344 (53.927)	lr 0.00275
Warmup Train [34][3150/3239]	Time 0.229 (0.230)	Data 0.003 (0.009)	Loss 3.9888 (4.0319)	Top-1 acc 32.422 (30.517)	Top-5 acc 55.469 (53.928)	lr 0.00275
Warmup Train [34][3160/3239]	Time 0.201 (0.230)	Data 0.001 (0.009)	Loss 4.0771 (4.0320)	Top-1 acc 30.078 (30.514)	Top-5 acc 50.391 (53.927)	lr 0.00275
Warmup Train [34][3170/3239]	Time 0.206 (0.229)	Data 0.002 (0.009)	Loss 3.8888 (4.0321)	Top-1 acc 31.250 (30.513)	Top-5 acc 55.078 (53.924)	lr 0.00274
Warmup Train [34][3180/3239]	Time 0.340 (0.230)	Data 0.000 (0.009)	Loss 3.9658 (4.0321)	Top-1 acc 31.250 (30.510)	Top-5 acc 56.250 (53.923)	lr 0.00274
Warmup Train [34][3190/3239]	Time 0.134 (0.229)	Data 0.000 (0.009)	Loss 3.9856 (4.0321)	Top-1 acc 32.031 (30.512)	Top-5 acc 55.469 (53.927)	lr 0.00274
Warmup Train [34][3200/3239]	Time 0.200 (0.229)	Data 0.000 (0.009)	Loss 4.1385 (4.0321)	Top-1 acc 26.953 (30.513)	Top-5 acc 50.391 (53.930)	lr 0.00274
Warmup Train [34][3210/3239]	Time 0.156 (0.229)	Data 0.000 (0.009)	Loss 4.0306 (4.0323)	Top-1 acc 30.469 (30.509)	Top-5 acc 52.734 (53.926)	lr 0.00273
Warmup Train [34][3220/3239]	Time 0.138 (0.229)	Data 0.000 (0.009)	Loss 4.2190 (4.0323)	Top-1 acc 28.516 (30.511)	Top-5 acc 48.438 (53.927)	lr 0.00273
Warmup Train [34][3230/3239]	Time 0.204 (0.229)	Data 0.000 (0.009)	Loss 4.1099 (4.0325)	Top-1 acc 25.781 (30.509)	Top-5 acc 53.125 (53.924)	lr 0.00273
Warmup Train [34][3239/3239]	Time 0.143 (0.229)	Data 0.000 (0.009)	Loss 4.2395 (4.0325)	Top-1 acc 33.333 (30.510)	Top-5 acc 49.383 (53.926)	lr 0.00273
==========Warmup Valid [34/40]	loss 2.967	top-1 acc 38.169	top-5 acc 62.840	Train top-1 30.510	top-5 53.926	flops: 442.4M
Warmup Train [35][0/3239]	Time 18.529 (18.529)	Data 16.858 (16.858)	Loss 4.0345 (4.0345)	Top-1 acc 30.078 (30.078)	Top-5 acc 53.516 (53.516)	lr 0.00272
Warmup Train [35][10/3239]	Time 0.248 (1.962)	Data 0.001 (1.539)	Loss 4.0523 (4.0559)	Top-1 acc 28.516 (30.114)	Top-5 acc 53.516 (53.942)	lr 0.00272
Warmup Train [35][20/3239]	Time 0.180 (1.158)	Data 0.002 (0.809)	Loss 3.9825 (4.0429)	Top-1 acc 31.250 (30.450)	Top-5 acc 53.516 (54.092)	lr 0.00272
Warmup Train [35][30/3239]	Time 0.227 (0.855)	Data 0.001 (0.549)	Loss 3.9743 (4.0312)	Top-1 acc 34.766 (30.570)	Top-5 acc 57.812 (54.549)	lr 0.00272
Warmup Train [35][40/3239]	Time 0.259 (0.700)	Data 0.002 (0.415)	Loss 3.8733 (4.0343)	Top-1 acc 31.250 (30.583)	Top-5 acc 55.078 (54.183)	lr 0.00271
Warmup Train [35][50/3239]	Time 0.189 (0.607)	Data 0.002 (0.334)	Loss 4.0067 (4.0360)	Top-1 acc 32.031 (30.722)	Top-5 acc 57.031 (54.328)	lr 0.00271
Warmup Train [35][60/3239]	Time 0.229 (0.543)	Data 0.001 (0.280)	Loss 4.2810 (4.0362)	Top-1 acc 26.172 (30.526)	Top-5 acc 44.922 (54.034)	lr 0.00271
Warmup Train [35][70/3239]	Time 0.260 (0.497)	Data 0.001 (0.241)	Loss 4.0209 (4.0394)	Top-1 acc 30.469 (30.463)	Top-5 acc 55.078 (53.983)	lr 0.00271
Warmup Train [35][80/3239]	Time 0.201 (0.462)	Data 0.001 (0.212)	Loss 3.9414 (4.0396)	Top-1 acc 35.156 (30.430)	Top-5 acc 59.375 (54.051)	lr 0.00270
Warmup Train [35][90/3239]	Time 0.222 (0.435)	Data 0.001 (0.189)	Loss 4.0714 (4.0443)	Top-1 acc 28.125 (30.258)	Top-5 acc 50.391 (53.962)	lr 0.00270
Warmup Train [35][100/3239]	Time 0.286 (0.416)	Data 0.002 (0.170)	Loss 4.0664 (4.0367)	Top-1 acc 29.297 (30.399)	Top-5 acc 51.562 (54.103)	lr 0.00270
Warmup Train [35][110/3239]	Time 0.264 (0.397)	Data 0.001 (0.155)	Loss 3.8261 (4.0369)	Top-1 acc 35.156 (30.374)	Top-5 acc 60.547 (54.089)	lr 0.00269
Warmup Train [35][120/3239]	Time 0.215 (0.381)	Data 0.001 (0.142)	Loss 4.0795 (4.0363)	Top-1 acc 29.688 (30.359)	Top-5 acc 51.172 (54.022)	lr 0.00269
Warmup Train [35][130/3239]	Time 0.220 (0.367)	Data 0.001 (0.132)	Loss 3.9850 (4.0336)	Top-1 acc 30.859 (30.484)	Top-5 acc 55.859 (54.115)	lr 0.00269
Warmup Train [35][140/3239]	Time 0.227 (0.357)	Data 0.001 (0.123)	Loss 3.9929 (4.0284)	Top-1 acc 32.031 (30.652)	Top-5 acc 55.078 (54.200)	lr 0.00269
Warmup Train [35][150/3239]	Time 0.176 (0.347)	Data 0.001 (0.115)	Loss 4.0355 (4.0286)	Top-1 acc 31.250 (30.634)	Top-5 acc 51.953 (54.188)	lr 0.00268
Warmup Train [35][160/3239]	Time 0.199 (0.339)	Data 0.001 (0.107)	Loss 3.7291 (4.0272)	Top-1 acc 37.500 (30.690)	Top-5 acc 61.719 (54.256)	lr 0.00268
Warmup Train [35][170/3239]	Time 0.189 (0.332)	Data 0.001 (0.101)	Loss 3.9835 (4.0273)	Top-1 acc 30.078 (30.711)	Top-5 acc 56.250 (54.256)	lr 0.00268
Warmup Train [35][180/3239]	Time 0.185 (0.326)	Data 0.001 (0.096)	Loss 4.1027 (4.0286)	Top-1 acc 35.547 (30.713)	Top-5 acc 55.469 (54.236)	lr 0.00268
Warmup Train [35][190/3239]	Time 0.186 (0.320)	Data 0.001 (0.091)	Loss 4.0265 (4.0269)	Top-1 acc 31.250 (30.747)	Top-5 acc 54.688 (54.289)	lr 0.00267
Warmup Train [35][200/3239]	Time 0.208 (0.315)	Data 0.001 (0.086)	Loss 3.9297 (4.0257)	Top-1 acc 28.906 (30.768)	Top-5 acc 53.516 (54.268)	lr 0.00267
Warmup Train [35][210/3239]	Time 0.150 (0.311)	Data 0.001 (0.083)	Loss 3.8945 (4.0258)	Top-1 acc 34.375 (30.767)	Top-5 acc 56.250 (54.228)	lr 0.00267
Warmup Train [35][220/3239]	Time 0.296 (0.308)	Data 0.001 (0.079)	Loss 4.2109 (4.0258)	Top-1 acc 26.953 (30.794)	Top-5 acc 49.219 (54.224)	lr 0.00266
Warmup Train [35][230/3239]	Time 0.175 (0.304)	Data 0.001 (0.076)	Loss 3.9560 (4.0242)	Top-1 acc 31.250 (30.854)	Top-5 acc 56.250 (54.236)	lr 0.00266
Warmup Train [35][240/3239]	Time 0.211 (0.302)	Data 0.003 (0.073)	Loss 4.1638 (4.0245)	Top-1 acc 29.297 (30.867)	Top-5 acc 51.953 (54.248)	lr 0.00266
Warmup Train [35][250/3239]	Time 0.235 (0.300)	Data 0.002 (0.070)	Loss 3.8598 (4.0242)	Top-1 acc 36.328 (30.861)	Top-5 acc 55.859 (54.216)	lr 0.00266
Warmup Train [35][260/3239]	Time 0.239 (0.297)	Data 0.001 (0.068)	Loss 4.2397 (4.0248)	Top-1 acc 26.172 (30.865)	Top-5 acc 48.828 (54.215)	lr 0.00265
Warmup Train [35][270/3239]	Time 0.193 (0.294)	Data 0.002 (0.065)	Loss 4.2119 (4.0249)	Top-1 acc 29.297 (30.875)	Top-5 acc 50.000 (54.202)	lr 0.00265
Warmup Train [35][280/3239]	Time 0.227 (0.292)	Data 0.002 (0.063)	Loss 4.0868 (4.0232)	Top-1 acc 26.953 (30.926)	Top-5 acc 53.906 (54.227)	lr 0.00265
Warmup Train [35][290/3239]	Time 0.204 (0.290)	Data 0.001 (0.061)	Loss 3.9456 (4.0224)	Top-1 acc 33.203 (30.909)	Top-5 acc 55.859 (54.223)	lr 0.00265
Warmup Train [35][300/3239]	Time 0.165 (0.288)	Data 0.001 (0.059)	Loss 3.9310 (4.0227)	Top-1 acc 32.812 (30.909)	Top-5 acc 55.859 (54.250)	lr 0.00264
Warmup Train [35][310/3239]	Time 0.270 (0.286)	Data 0.001 (0.057)	Loss 4.0485 (4.0217)	Top-1 acc 30.859 (30.969)	Top-5 acc 57.031 (54.269)	lr 0.00264
Warmup Train [35][320/3239]	Time 0.224 (0.285)	Data 0.001 (0.056)	Loss 3.8170 (4.0213)	Top-1 acc 33.984 (30.953)	Top-5 acc 58.984 (54.275)	lr 0.00264
Warmup Train [35][330/3239]	Time 0.160 (0.282)	Data 0.001 (0.054)	Loss 3.8759 (4.0212)	Top-1 acc 30.469 (30.938)	Top-5 acc 56.641 (54.289)	lr 0.00263
Warmup Train [35][340/3239]	Time 0.241 (0.282)	Data 0.003 (0.053)	Loss 4.0710 (4.0198)	Top-1 acc 28.906 (30.969)	Top-5 acc 55.469 (54.322)	lr 0.00263
Warmup Train [35][350/3239]	Time 0.250 (0.281)	Data 0.001 (0.051)	Loss 3.6569 (4.0199)	Top-1 acc 40.625 (30.988)	Top-5 acc 64.844 (54.334)	lr 0.00263
Warmup Train [35][360/3239]	Time 0.243 (0.280)	Data 0.001 (0.050)	Loss 4.0666 (4.0196)	Top-1 acc 26.953 (30.968)	Top-5 acc 54.297 (54.333)	lr 0.00263
Warmup Train [35][370/3239]	Time 0.301 (0.280)	Data 0.001 (0.049)	Loss 4.0809 (4.0184)	Top-1 acc 29.297 (30.965)	Top-5 acc 53.906 (54.341)	lr 0.00262
Warmup Train [35][380/3239]	Time 0.239 (0.279)	Data 0.001 (0.048)	Loss 4.1661 (4.0188)	Top-1 acc 24.609 (30.927)	Top-5 acc 51.562 (54.359)	lr 0.00262
Warmup Train [35][390/3239]	Time 0.228 (0.279)	Data 0.001 (0.047)	Loss 3.9370 (4.0184)	Top-1 acc 33.203 (30.916)	Top-5 acc 57.812 (54.375)	lr 0.00262
Warmup Train [35][400/3239]	Time 0.219 (0.278)	Data 0.002 (0.046)	Loss 3.9747 (4.0181)	Top-1 acc 30.469 (30.919)	Top-5 acc 57.031 (54.403)	lr 0.00262
Warmup Train [35][410/3239]	Time 0.243 (0.278)	Data 0.002 (0.045)	Loss 4.1525 (4.0187)	Top-1 acc 29.688 (30.892)	Top-5 acc 51.562 (54.373)	lr 0.00261
Warmup Train [35][420/3239]	Time 0.299 (0.277)	Data 0.001 (0.044)	Loss 4.1623 (4.0198)	Top-1 acc 26.172 (30.871)	Top-5 acc 46.484 (54.317)	lr 0.00261
Warmup Train [35][430/3239]	Time 0.348 (0.277)	Data 0.002 (0.043)	Loss 4.0231 (4.0207)	Top-1 acc 28.125 (30.842)	Top-5 acc 52.344 (54.320)	lr 0.00261
Warmup Train [35][440/3239]	Time 0.257 (0.277)	Data 0.002 (0.042)	Loss 4.0796 (4.0207)	Top-1 acc 28.906 (30.858)	Top-5 acc 49.609 (54.319)	lr 0.00261
Warmup Train [35][450/3239]	Time 0.243 (0.276)	Data 0.001 (0.041)	Loss 4.0592 (4.0206)	Top-1 acc 29.297 (30.853)	Top-5 acc 51.562 (54.309)	lr 0.00260
Warmup Train [35][460/3239]	Time 0.252 (0.275)	Data 0.002 (0.040)	Loss 3.8910 (4.0200)	Top-1 acc 32.812 (30.864)	Top-5 acc 57.812 (54.304)	lr 0.00260
Warmup Train [35][470/3239]	Time 0.191 (0.274)	Data 0.002 (0.039)	Loss 4.0261 (4.0202)	Top-1 acc 30.469 (30.842)	Top-5 acc 55.859 (54.287)	lr 0.00260
Warmup Train [35][480/3239]	Time 0.227 (0.272)	Data 0.001 (0.038)	Loss 4.1021 (4.0210)	Top-1 acc 28.906 (30.817)	Top-5 acc 55.078 (54.281)	lr 0.00259
Warmup Train [35][490/3239]	Time 0.187 (0.271)	Data 0.001 (0.038)	Loss 4.2129 (4.0218)	Top-1 acc 26.172 (30.811)	Top-5 acc 49.609 (54.267)	lr 0.00259
Warmup Train [35][500/3239]	Time 0.162 (0.270)	Data 0.003 (0.037)	Loss 4.0973 (4.0209)	Top-1 acc 30.078 (30.816)	Top-5 acc 53.516 (54.274)	lr 0.00259
Warmup Train [35][510/3239]	Time 0.264 (0.270)	Data 0.002 (0.036)	Loss 3.9998 (4.0201)	Top-1 acc 35.547 (30.840)	Top-5 acc 53.125 (54.301)	lr 0.00259
Warmup Train [35][520/3239]	Time 0.283 (0.269)	Data 0.001 (0.036)	Loss 3.9431 (4.0211)	Top-1 acc 31.250 (30.802)	Top-5 acc 53.125 (54.280)	lr 0.00258
Warmup Train [35][530/3239]	Time 0.268 (0.268)	Data 0.001 (0.035)	Loss 3.9105 (4.0211)	Top-1 acc 28.906 (30.805)	Top-5 acc 57.031 (54.256)	lr 0.00258
Warmup Train [35][540/3239]	Time 0.319 (0.268)	Data 0.001 (0.034)	Loss 3.8957 (4.0204)	Top-1 acc 33.984 (30.821)	Top-5 acc 55.078 (54.267)	lr 0.00258
Warmup Train [35][550/3239]	Time 0.241 (0.267)	Data 0.001 (0.034)	Loss 3.9279 (4.0204)	Top-1 acc 31.641 (30.815)	Top-5 acc 53.906 (54.251)	lr 0.00258
Warmup Train [35][560/3239]	Time 0.197 (0.266)	Data 0.001 (0.033)	Loss 4.0884 (4.0204)	Top-1 acc 33.594 (30.829)	Top-5 acc 53.906 (54.254)	lr 0.00257
Warmup Train [35][570/3239]	Time 0.210 (0.265)	Data 0.001 (0.033)	Loss 3.9795 (4.0195)	Top-1 acc 28.125 (30.855)	Top-5 acc 51.562 (54.263)	lr 0.00257
Warmup Train [35][580/3239]	Time 0.160 (0.265)	Data 0.001 (0.032)	Loss 4.0263 (4.0199)	Top-1 acc 30.078 (30.849)	Top-5 acc 53.906 (54.256)	lr 0.00257
Warmup Train [35][590/3239]	Time 0.220 (0.264)	Data 0.001 (0.032)	Loss 3.8430 (4.0199)	Top-1 acc 34.766 (30.847)	Top-5 acc 60.156 (54.268)	lr 0.00256
Warmup Train [35][600/3239]	Time 0.240 (0.263)	Data 0.001 (0.031)	Loss 4.0745 (4.0207)	Top-1 acc 30.469 (30.837)	Top-5 acc 51.953 (54.266)	lr 0.00256
Warmup Train [35][610/3239]	Time 0.259 (0.263)	Data 0.002 (0.031)	Loss 4.0442 (4.0204)	Top-1 acc 29.297 (30.864)	Top-5 acc 51.172 (54.274)	lr 0.00256
Warmup Train [35][620/3239]	Time 0.249 (0.262)	Data 0.002 (0.030)	Loss 4.0632 (4.0214)	Top-1 acc 31.250 (30.839)	Top-5 acc 55.469 (54.263)	lr 0.00256
Warmup Train [35][630/3239]	Time 0.197 (0.262)	Data 0.001 (0.030)	Loss 3.7841 (4.0201)	Top-1 acc 36.328 (30.870)	Top-5 acc 57.812 (54.281)	lr 0.00255
Warmup Train [35][640/3239]	Time 0.329 (0.261)	Data 0.002 (0.030)	Loss 4.2237 (4.0198)	Top-1 acc 29.297 (30.882)	Top-5 acc 48.828 (54.287)	lr 0.00255
Warmup Train [35][650/3239]	Time 0.198 (0.261)	Data 0.001 (0.029)	Loss 4.0133 (4.0208)	Top-1 acc 28.125 (30.864)	Top-5 acc 53.906 (54.251)	lr 0.00255
Warmup Train [35][660/3239]	Time 0.233 (0.260)	Data 0.001 (0.029)	Loss 4.0117 (4.0210)	Top-1 acc 35.938 (30.866)	Top-5 acc 56.250 (54.254)	lr 0.00255
Warmup Train [35][670/3239]	Time 0.217 (0.260)	Data 0.001 (0.028)	Loss 4.0954 (4.0217)	Top-1 acc 29.688 (30.862)	Top-5 acc 54.688 (54.239)	lr 0.00254
Warmup Train [35][680/3239]	Time 0.193 (0.259)	Data 0.001 (0.028)	Loss 3.9621 (4.0215)	Top-1 acc 30.078 (30.848)	Top-5 acc 56.641 (54.248)	lr 0.00254
Warmup Train [35][690/3239]	Time 0.176 (0.259)	Data 0.001 (0.028)	Loss 4.1539 (4.0213)	Top-1 acc 28.906 (30.872)	Top-5 acc 50.781 (54.251)	lr 0.00254
Warmup Train [35][700/3239]	Time 0.168 (0.258)	Data 0.001 (0.027)	Loss 3.9139 (4.0212)	Top-1 acc 32.031 (30.872)	Top-5 acc 57.031 (54.238)	lr 0.00254
Warmup Train [35][710/3239]	Time 0.236 (0.257)	Data 0.001 (0.027)	Loss 3.8116 (4.0212)	Top-1 acc 31.641 (30.857)	Top-5 acc 57.422 (54.224)	lr 0.00253
Warmup Train [35][720/3239]	Time 0.169 (0.257)	Data 0.002 (0.027)	Loss 4.1043 (4.0206)	Top-1 acc 27.734 (30.862)	Top-5 acc 54.297 (54.236)	lr 0.00253
Warmup Train [35][730/3239]	Time 0.283 (0.256)	Data 0.001 (0.026)	Loss 4.0689 (4.0208)	Top-1 acc 29.297 (30.873)	Top-5 acc 52.734 (54.240)	lr 0.00253
Warmup Train [35][740/3239]	Time 0.284 (0.256)	Data 0.001 (0.026)	Loss 4.1146 (4.0210)	Top-1 acc 29.297 (30.867)	Top-5 acc 52.734 (54.238)	lr 0.00252
Warmup Train [35][750/3239]	Time 0.205 (0.255)	Data 0.001 (0.026)	Loss 4.1728 (4.0207)	Top-1 acc 29.688 (30.862)	Top-5 acc 51.562 (54.235)	lr 0.00252
Warmup Train [35][760/3239]	Time 0.201 (0.255)	Data 0.001 (0.025)	Loss 4.1330 (4.0216)	Top-1 acc 26.172 (30.835)	Top-5 acc 51.953 (54.217)	lr 0.00252
Warmup Train [35][770/3239]	Time 0.242 (0.254)	Data 0.001 (0.025)	Loss 4.2417 (4.0224)	Top-1 acc 26.172 (30.807)	Top-5 acc 47.656 (54.206)	lr 0.00252
Warmup Train [35][780/3239]	Time 0.220 (0.254)	Data 0.002 (0.025)	Loss 4.0516 (4.0222)	Top-1 acc 29.297 (30.814)	Top-5 acc 55.078 (54.209)	lr 0.00251
Warmup Train [35][790/3239]	Time 0.251 (0.254)	Data 0.002 (0.024)	Loss 3.9379 (4.0218)	Top-1 acc 29.297 (30.817)	Top-5 acc 55.469 (54.206)	lr 0.00251
Warmup Train [35][800/3239]	Time 0.216 (0.253)	Data 0.001 (0.024)	Loss 4.1019 (4.0218)	Top-1 acc 31.250 (30.804)	Top-5 acc 54.297 (54.210)	lr 0.00251
Warmup Train [35][810/3239]	Time 0.227 (0.253)	Data 0.001 (0.024)	Loss 3.9348 (4.0217)	Top-1 acc 35.938 (30.810)	Top-5 acc 55.078 (54.207)	lr 0.00251
Warmup Train [35][820/3239]	Time 0.245 (0.253)	Data 0.002 (0.024)	Loss 4.1008 (4.0218)	Top-1 acc 26.172 (30.803)	Top-5 acc 53.906 (54.207)	lr 0.00250
Warmup Train [35][830/3239]	Time 0.234 (0.252)	Data 0.001 (0.023)	Loss 3.9888 (4.0216)	Top-1 acc 32.031 (30.802)	Top-5 acc 56.250 (54.210)	lr 0.00250
Warmup Train [35][840/3239]	Time 0.211 (0.252)	Data 0.001 (0.023)	Loss 4.0110 (4.0224)	Top-1 acc 32.812 (30.782)	Top-5 acc 54.297 (54.178)	lr 0.00250
Warmup Train [35][850/3239]	Time 0.290 (0.252)	Data 0.001 (0.023)	Loss 4.2636 (4.0229)	Top-1 acc 25.391 (30.760)	Top-5 acc 49.609 (54.172)	lr 0.00250
Warmup Train [35][860/3239]	Time 0.177 (0.252)	Data 0.001 (0.023)	Loss 3.8612 (4.0228)	Top-1 acc 37.500 (30.772)	Top-5 acc 60.938 (54.185)	lr 0.00249
Warmup Train [35][870/3239]	Time 0.130 (0.251)	Data 0.001 (0.022)	Loss 3.8696 (4.0220)	Top-1 acc 35.547 (30.794)	Top-5 acc 57.812 (54.200)	lr 0.00249
Warmup Train [35][880/3239]	Time 0.152 (0.251)	Data 0.001 (0.022)	Loss 4.0930 (4.0224)	Top-1 acc 30.078 (30.784)	Top-5 acc 48.047 (54.190)	lr 0.00249
Warmup Train [35][890/3239]	Time 0.248 (0.251)	Data 0.001 (0.022)	Loss 4.0517 (4.0217)	Top-1 acc 28.125 (30.794)	Top-5 acc 52.344 (54.198)	lr 0.00249
Warmup Train [35][900/3239]	Time 0.135 (0.250)	Data 0.001 (0.022)	Loss 4.1402 (4.0216)	Top-1 acc 29.297 (30.796)	Top-5 acc 49.219 (54.198)	lr 0.00248
Warmup Train [35][910/3239]	Time 0.179 (0.250)	Data 0.001 (0.022)	Loss 4.0163 (4.0218)	Top-1 acc 30.078 (30.790)	Top-5 acc 57.031 (54.203)	lr 0.00248
Warmup Train [35][920/3239]	Time 0.271 (0.250)	Data 0.001 (0.021)	Loss 3.9856 (4.0222)	Top-1 acc 31.250 (30.776)	Top-5 acc 51.562 (54.176)	lr 0.00248
Warmup Train [35][930/3239]	Time 0.205 (0.249)	Data 0.001 (0.021)	Loss 3.9988 (4.0227)	Top-1 acc 34.766 (30.762)	Top-5 acc 53.516 (54.155)	lr 0.00247
Warmup Train [35][940/3239]	Time 0.199 (0.249)	Data 0.001 (0.021)	Loss 4.0295 (4.0230)	Top-1 acc 27.344 (30.744)	Top-5 acc 52.734 (54.145)	lr 0.00247
Warmup Train [35][950/3239]	Time 0.204 (0.249)	Data 0.001 (0.021)	Loss 3.9539 (4.0228)	Top-1 acc 31.641 (30.746)	Top-5 acc 55.469 (54.149)	lr 0.00247
Warmup Train [35][960/3239]	Time 0.292 (0.248)	Data 0.001 (0.021)	Loss 4.1084 (4.0226)	Top-1 acc 29.297 (30.749)	Top-5 acc 53.906 (54.151)	lr 0.00247
Warmup Train [35][970/3239]	Time 0.259 (0.248)	Data 0.001 (0.020)	Loss 3.9976 (4.0227)	Top-1 acc 27.734 (30.738)	Top-5 acc 51.953 (54.155)	lr 0.00246
Warmup Train [35][980/3239]	Time 0.184 (0.248)	Data 0.001 (0.020)	Loss 3.8627 (4.0228)	Top-1 acc 29.297 (30.743)	Top-5 acc 52.734 (54.149)	lr 0.00246
Warmup Train [35][990/3239]	Time 0.206 (0.248)	Data 0.002 (0.020)	Loss 4.0681 (4.0225)	Top-1 acc 31.250 (30.752)	Top-5 acc 53.906 (54.148)	lr 0.00246
Warmup Train [35][1000/3239]	Time 0.213 (0.248)	Data 0.001 (0.020)	Loss 4.0920 (4.0226)	Top-1 acc 30.078 (30.755)	Top-5 acc 51.562 (54.153)	lr 0.00246
Warmup Train [35][1010/3239]	Time 0.225 (0.247)	Data 0.001 (0.020)	Loss 3.7401 (4.0228)	Top-1 acc 37.109 (30.755)	Top-5 acc 60.156 (54.145)	lr 0.00245
Warmup Train [35][1020/3239]	Time 0.223 (0.247)	Data 0.001 (0.020)	Loss 3.8792 (4.0234)	Top-1 acc 33.984 (30.732)	Top-5 acc 54.688 (54.124)	lr 0.00245
Warmup Train [35][1030/3239]	Time 0.206 (0.247)	Data 0.001 (0.020)	Loss 4.2433 (4.0237)	Top-1 acc 25.000 (30.725)	Top-5 acc 50.000 (54.116)	lr 0.00245
Warmup Train [35][1040/3239]	Time 0.203 (0.246)	Data 0.001 (0.019)	Loss 3.9703 (4.0230)	Top-1 acc 31.641 (30.742)	Top-5 acc 55.469 (54.137)	lr 0.00245
Warmup Train [35][1050/3239]	Time 0.213 (0.246)	Data 0.001 (0.019)	Loss 4.0315 (4.0221)	Top-1 acc 27.344 (30.752)	Top-5 acc 54.297 (54.160)	lr 0.00244
Warmup Train [35][1060/3239]	Time 0.272 (0.246)	Data 0.001 (0.019)	Loss 3.7603 (4.0221)	Top-1 acc 41.016 (30.755)	Top-5 acc 58.203 (54.155)	lr 0.00244
Warmup Train [35][1070/3239]	Time 0.232 (0.246)	Data 0.002 (0.019)	Loss 4.2945 (4.0223)	Top-1 acc 27.734 (30.757)	Top-5 acc 48.438 (54.146)	lr 0.00244
Warmup Train [35][1080/3239]	Time 0.149 (0.246)	Data 0.001 (0.019)	Loss 4.2088 (4.0225)	Top-1 acc 28.125 (30.758)	Top-5 acc 49.609 (54.134)	lr 0.00244
Warmup Train [35][1090/3239]	Time 0.208 (0.246)	Data 0.001 (0.019)	Loss 4.1009 (4.0223)	Top-1 acc 27.734 (30.762)	Top-5 acc 50.781 (54.140)	lr 0.00243
Warmup Train [35][1100/3239]	Time 0.207 (0.245)	Data 0.002 (0.019)	Loss 3.9234 (4.0225)	Top-1 acc 37.109 (30.766)	Top-5 acc 55.078 (54.130)	lr 0.00243
Warmup Train [35][1110/3239]	Time 0.186 (0.245)	Data 0.001 (0.018)	Loss 4.0891 (4.0228)	Top-1 acc 30.078 (30.768)	Top-5 acc 54.297 (54.131)	lr 0.00243
Warmup Train [35][1120/3239]	Time 0.171 (0.245)	Data 0.002 (0.018)	Loss 3.9590 (4.0232)	Top-1 acc 30.469 (30.757)	Top-5 acc 55.469 (54.117)	lr 0.00242
Warmup Train [35][1130/3239]	Time 0.218 (0.245)	Data 0.001 (0.018)	Loss 4.1419 (4.0230)	Top-1 acc 31.641 (30.767)	Top-5 acc 52.344 (54.125)	lr 0.00242
Warmup Train [35][1140/3239]	Time 0.186 (0.244)	Data 0.001 (0.018)	Loss 4.0922 (4.0229)	Top-1 acc 32.031 (30.777)	Top-5 acc 52.734 (54.129)	lr 0.00242
Warmup Train [35][1150/3239]	Time 0.185 (0.244)	Data 0.001 (0.018)	Loss 3.9590 (4.0234)	Top-1 acc 31.641 (30.759)	Top-5 acc 54.297 (54.111)	lr 0.00242
Warmup Train [35][1160/3239]	Time 0.237 (0.244)	Data 0.001 (0.018)	Loss 3.7920 (4.0233)	Top-1 acc 39.453 (30.767)	Top-5 acc 58.203 (54.115)	lr 0.00241
Warmup Train [35][1170/3239]	Time 0.281 (0.244)	Data 0.001 (0.018)	Loss 3.9223 (4.0237)	Top-1 acc 34.375 (30.761)	Top-5 acc 55.859 (54.111)	lr 0.00241
Warmup Train [35][1180/3239]	Time 0.225 (0.244)	Data 0.001 (0.017)	Loss 4.0566 (4.0236)	Top-1 acc 30.859 (30.768)	Top-5 acc 50.781 (54.117)	lr 0.00241
Warmup Train [35][1190/3239]	Time 0.189 (0.244)	Data 0.001 (0.017)	Loss 4.1099 (4.0234)	Top-1 acc 30.078 (30.766)	Top-5 acc 51.562 (54.119)	lr 0.00241
Warmup Train [35][1200/3239]	Time 0.195 (0.243)	Data 0.001 (0.017)	Loss 3.8953 (4.0237)	Top-1 acc 33.594 (30.759)	Top-5 acc 54.688 (54.105)	lr 0.00240
Warmup Train [35][1210/3239]	Time 0.236 (0.243)	Data 0.001 (0.017)	Loss 3.9873 (4.0233)	Top-1 acc 33.594 (30.763)	Top-5 acc 55.859 (54.117)	lr 0.00240
Warmup Train [35][1220/3239]	Time 0.227 (0.243)	Data 0.001 (0.017)	Loss 4.0282 (4.0236)	Top-1 acc 30.859 (30.750)	Top-5 acc 55.859 (54.108)	lr 0.00240
Warmup Train [35][1230/3239]	Time 0.205 (0.243)	Data 0.001 (0.017)	Loss 4.1426 (4.0239)	Top-1 acc 29.297 (30.745)	Top-5 acc 48.828 (54.097)	lr 0.00240
Warmup Train [35][1240/3239]	Time 0.277 (0.243)	Data 0.001 (0.017)	Loss 4.0874 (4.0232)	Top-1 acc 29.297 (30.748)	Top-5 acc 51.172 (54.114)	lr 0.00239
Warmup Train [35][1250/3239]	Time 0.200 (0.243)	Data 0.001 (0.017)	Loss 4.1007 (4.0235)	Top-1 acc 32.031 (30.740)	Top-5 acc 52.734 (54.115)	lr 0.00239
Warmup Train [35][1260/3239]	Time 0.218 (0.243)	Data 0.001 (0.016)	Loss 3.8781 (4.0228)	Top-1 acc 32.422 (30.755)	Top-5 acc 57.031 (54.131)	lr 0.00239
Warmup Train [35][1270/3239]	Time 0.347 (0.243)	Data 0.002 (0.016)	Loss 4.2484 (4.0230)	Top-1 acc 23.828 (30.744)	Top-5 acc 46.484 (54.130)	lr 0.00239
Warmup Train [35][1280/3239]	Time 0.223 (0.242)	Data 0.002 (0.016)	Loss 3.9603 (4.0238)	Top-1 acc 35.547 (30.728)	Top-5 acc 56.641 (54.115)	lr 0.00238
Warmup Train [35][1290/3239]	Time 0.203 (0.242)	Data 0.001 (0.016)	Loss 4.0104 (4.0241)	Top-1 acc 27.734 (30.729)	Top-5 acc 56.641 (54.108)	lr 0.00238
Warmup Train [35][1300/3239]	Time 0.210 (0.242)	Data 0.001 (0.016)	Loss 3.9304 (4.0239)	Top-1 acc 34.375 (30.735)	Top-5 acc 55.859 (54.109)	lr 0.00238
Warmup Train [35][1310/3239]	Time 0.148 (0.242)	Data 0.001 (0.016)	Loss 4.1769 (4.0238)	Top-1 acc 26.953 (30.733)	Top-5 acc 51.172 (54.111)	lr 0.00238
Warmup Train [35][1320/3239]	Time 0.197 (0.242)	Data 0.001 (0.016)	Loss 4.1473 (4.0245)	Top-1 acc 28.906 (30.720)	Top-5 acc 51.562 (54.099)	lr 0.00237
Warmup Train [35][1330/3239]	Time 0.291 (0.242)	Data 0.001 (0.016)	Loss 3.9760 (4.0244)	Top-1 acc 28.906 (30.720)	Top-5 acc 55.859 (54.103)	lr 0.00237
Warmup Train [35][1340/3239]	Time 0.212 (0.242)	Data 0.002 (0.016)	Loss 3.9687 (4.0240)	Top-1 acc 29.297 (30.727)	Top-5 acc 58.594 (54.115)	lr 0.00237
Warmup Train [35][1350/3239]	Time 0.275 (0.241)	Data 0.001 (0.016)	Loss 4.0079 (4.0242)	Top-1 acc 30.469 (30.719)	Top-5 acc 54.297 (54.110)	lr 0.00237
Warmup Train [35][1360/3239]	Time 0.249 (0.241)	Data 0.001 (0.015)	Loss 3.9846 (4.0243)	Top-1 acc 33.594 (30.715)	Top-5 acc 54.688 (54.108)	lr 0.00236
Warmup Train [35][1370/3239]	Time 0.316 (0.241)	Data 0.001 (0.015)	Loss 3.9893 (4.0243)	Top-1 acc 29.688 (30.721)	Top-5 acc 53.516 (54.114)	lr 0.00236
Warmup Train [35][1380/3239]	Time 0.243 (0.241)	Data 0.001 (0.015)	Loss 4.0089 (4.0242)	Top-1 acc 29.297 (30.720)	Top-5 acc 55.078 (54.116)	lr 0.00236
Warmup Train [35][1390/3239]	Time 0.230 (0.241)	Data 0.002 (0.015)	Loss 4.0045 (4.0241)	Top-1 acc 29.688 (30.715)	Top-5 acc 54.688 (54.106)	lr 0.00236
Warmup Train [35][1400/3239]	Time 0.198 (0.241)	Data 0.001 (0.015)	Loss 3.8847 (4.0242)	Top-1 acc 33.984 (30.713)	Top-5 acc 57.422 (54.105)	lr 0.00235
Warmup Train [35][1410/3239]	Time 0.183 (0.241)	Data 0.002 (0.015)	Loss 3.8449 (4.0243)	Top-1 acc 31.250 (30.711)	Top-5 acc 56.641 (54.105)	lr 0.00235
Warmup Train [35][1420/3239]	Time 0.198 (0.241)	Data 0.001 (0.015)	Loss 4.1594 (4.0247)	Top-1 acc 26.953 (30.705)	Top-5 acc 52.344 (54.094)	lr 0.00235
Warmup Train [35][1430/3239]	Time 0.167 (0.241)	Data 0.001 (0.015)	Loss 3.9182 (4.0243)	Top-1 acc 35.156 (30.718)	Top-5 acc 55.469 (54.099)	lr 0.00234
Warmup Train [35][1440/3239]	Time 0.165 (0.240)	Data 0.001 (0.015)	Loss 3.9383 (4.0242)	Top-1 acc 30.469 (30.716)	Top-5 acc 59.766 (54.102)	lr 0.00234
Warmup Train [35][1450/3239]	Time 0.222 (0.240)	Data 0.001 (0.015)	Loss 4.1043 (4.0243)	Top-1 acc 26.562 (30.704)	Top-5 acc 50.781 (54.097)	lr 0.00234
Warmup Train [35][1460/3239]	Time 0.198 (0.240)	Data 0.001 (0.015)	Loss 3.9420 (4.0240)	Top-1 acc 33.984 (30.714)	Top-5 acc 54.297 (54.100)	lr 0.00234
Warmup Train [35][1470/3239]	Time 0.264 (0.240)	Data 0.003 (0.015)	Loss 3.9614 (4.0240)	Top-1 acc 33.984 (30.713)	Top-5 acc 53.516 (54.092)	lr 0.00233
Warmup Train [35][1480/3239]	Time 0.193 (0.240)	Data 0.001 (0.015)	Loss 3.8848 (4.0237)	Top-1 acc 35.938 (30.728)	Top-5 acc 58.984 (54.092)	lr 0.00233
Warmup Train [35][1490/3239]	Time 0.377 (0.240)	Data 0.032 (0.014)	Loss 4.0998 (4.0236)	Top-1 acc 27.344 (30.722)	Top-5 acc 52.344 (54.091)	lr 0.00233
Warmup Train [35][1500/3239]	Time 0.206 (0.240)	Data 0.002 (0.014)	Loss 3.8867 (4.0237)	Top-1 acc 37.500 (30.731)	Top-5 acc 57.812 (54.089)	lr 0.00233
Warmup Train [35][1510/3239]	Time 0.199 (0.240)	Data 0.001 (0.014)	Loss 3.9302 (4.0238)	Top-1 acc 32.031 (30.725)	Top-5 acc 54.688 (54.091)	lr 0.00232
Warmup Train [35][1520/3239]	Time 0.184 (0.239)	Data 0.001 (0.014)	Loss 4.0836 (4.0236)	Top-1 acc 31.250 (30.733)	Top-5 acc 52.344 (54.093)	lr 0.00232
Warmup Train [35][1530/3239]	Time 0.230 (0.239)	Data 0.002 (0.014)	Loss 3.9788 (4.0234)	Top-1 acc 31.250 (30.735)	Top-5 acc 54.688 (54.103)	lr 0.00232
Warmup Train [35][1540/3239]	Time 0.206 (0.239)	Data 0.001 (0.014)	Loss 4.0128 (4.0230)	Top-1 acc 34.375 (30.746)	Top-5 acc 55.469 (54.108)	lr 0.00232
Warmup Train [35][1550/3239]	Time 0.230 (0.239)	Data 0.002 (0.014)	Loss 3.9506 (4.0234)	Top-1 acc 36.719 (30.739)	Top-5 acc 57.422 (54.106)	lr 0.00231
Warmup Train [35][1560/3239]	Time 0.201 (0.239)	Data 0.001 (0.014)	Loss 3.6971 (4.0232)	Top-1 acc 38.672 (30.740)	Top-5 acc 61.719 (54.107)	lr 0.00231
Warmup Train [35][1570/3239]	Time 0.215 (0.239)	Data 0.001 (0.014)	Loss 4.0782 (4.0232)	Top-1 acc 31.250 (30.740)	Top-5 acc 51.953 (54.104)	lr 0.00231
Warmup Train [35][1580/3239]	Time 0.331 (0.239)	Data 0.024 (0.014)	Loss 3.8750 (4.0230)	Top-1 acc 35.156 (30.747)	Top-5 acc 62.109 (54.113)	lr 0.00231
Warmup Train [35][1590/3239]	Time 0.230 (0.239)	Data 0.001 (0.014)	Loss 3.8524 (4.0229)	Top-1 acc 34.375 (30.748)	Top-5 acc 58.984 (54.116)	lr 0.00230
Warmup Train [35][1600/3239]	Time 0.221 (0.238)	Data 0.002 (0.014)	Loss 4.1430 (4.0230)	Top-1 acc 28.906 (30.747)	Top-5 acc 51.562 (54.107)	lr 0.00230
Warmup Train [35][1610/3239]	Time 0.184 (0.238)	Data 0.001 (0.014)	Loss 4.0267 (4.0229)	Top-1 acc 28.125 (30.754)	Top-5 acc 53.516 (54.108)	lr 0.00230
Warmup Train [35][1620/3239]	Time 0.228 (0.238)	Data 0.001 (0.014)	Loss 3.7137 (4.0229)	Top-1 acc 34.766 (30.757)	Top-5 acc 62.109 (54.115)	lr 0.00230
Warmup Train [35][1630/3239]	Time 0.204 (0.238)	Data 0.001 (0.013)	Loss 4.1204 (4.0231)	Top-1 acc 26.953 (30.749)	Top-5 acc 53.906 (54.110)	lr 0.00229
Warmup Train [35][1640/3239]	Time 0.192 (0.238)	Data 0.002 (0.013)	Loss 3.8576 (4.0229)	Top-1 acc 33.594 (30.751)	Top-5 acc 58.203 (54.122)	lr 0.00229
Warmup Train [35][1650/3239]	Time 0.162 (0.238)	Data 0.001 (0.013)	Loss 4.1677 (4.0229)	Top-1 acc 26.562 (30.750)	Top-5 acc 47.656 (54.130)	lr 0.00229
Warmup Train [35][1660/3239]	Time 0.227 (0.238)	Data 0.001 (0.013)	Loss 3.9275 (4.0227)	Top-1 acc 31.641 (30.746)	Top-5 acc 55.078 (54.134)	lr 0.00229
Warmup Train [35][1670/3239]	Time 0.269 (0.238)	Data 0.001 (0.013)	Loss 4.0862 (4.0228)	Top-1 acc 26.562 (30.743)	Top-5 acc 52.344 (54.132)	lr 0.00228
Warmup Train [35][1680/3239]	Time 0.326 (0.238)	Data 0.002 (0.013)	Loss 3.8371 (4.0226)	Top-1 acc 35.547 (30.752)	Top-5 acc 57.812 (54.136)	lr 0.00228
Warmup Train [35][1690/3239]	Time 0.195 (0.238)	Data 0.001 (0.013)	Loss 3.8879 (4.0224)	Top-1 acc 34.766 (30.761)	Top-5 acc 57.031 (54.141)	lr 0.00228
Warmup Train [35][1700/3239]	Time 0.187 (0.238)	Data 0.001 (0.013)	Loss 4.0855 (4.0227)	Top-1 acc 29.688 (30.763)	Top-5 acc 51.562 (54.134)	lr 0.00228
Warmup Train [35][1710/3239]	Time 0.163 (0.237)	Data 0.001 (0.013)	Loss 4.1189 (4.0227)	Top-1 acc 30.078 (30.767)	Top-5 acc 49.219 (54.132)	lr 0.00227
Warmup Train [35][1720/3239]	Time 0.208 (0.237)	Data 0.001 (0.013)	Loss 4.1210 (4.0224)	Top-1 acc 27.344 (30.768)	Top-5 acc 50.000 (54.136)	lr 0.00227
Warmup Train [35][1730/3239]	Time 0.215 (0.237)	Data 0.001 (0.013)	Loss 3.8694 (4.0222)	Top-1 acc 33.203 (30.771)	Top-5 acc 58.203 (54.145)	lr 0.00227
Warmup Train [35][1740/3239]	Time 0.344 (0.237)	Data 0.001 (0.013)	Loss 3.9557 (4.0221)	Top-1 acc 32.031 (30.771)	Top-5 acc 53.906 (54.145)	lr 0.00227
Warmup Train [35][1750/3239]	Time 0.211 (0.237)	Data 0.002 (0.013)	Loss 4.0086 (4.0220)	Top-1 acc 31.250 (30.770)	Top-5 acc 54.688 (54.143)	lr 0.00226
Warmup Train [35][1760/3239]	Time 0.197 (0.237)	Data 0.001 (0.013)	Loss 4.0580 (4.0222)	Top-1 acc 28.125 (30.767)	Top-5 acc 54.297 (54.144)	lr 0.00226
Warmup Train [35][1770/3239]	Time 0.293 (0.237)	Data 0.001 (0.013)	Loss 3.8069 (4.0222)	Top-1 acc 32.812 (30.763)	Top-5 acc 57.812 (54.144)	lr 0.00226
Warmup Train [35][1780/3239]	Time 0.186 (0.237)	Data 0.001 (0.013)	Loss 4.0198 (4.0220)	Top-1 acc 34.766 (30.769)	Top-5 acc 54.688 (54.151)	lr 0.00226
Warmup Train [35][1790/3239]	Time 0.243 (0.237)	Data 0.002 (0.013)	Loss 3.9786 (4.0224)	Top-1 acc 29.688 (30.763)	Top-5 acc 53.906 (54.144)	lr 0.00225
Warmup Train [35][1800/3239]	Time 0.253 (0.237)	Data 0.002 (0.012)	Loss 3.8461 (4.0224)	Top-1 acc 37.500 (30.769)	Top-5 acc 62.109 (54.149)	lr 0.00225
Warmup Train [35][1810/3239]	Time 0.228 (0.237)	Data 0.001 (0.012)	Loss 3.8358 (4.0225)	Top-1 acc 32.812 (30.763)	Top-5 acc 57.031 (54.147)	lr 0.00225
Warmup Train [35][1820/3239]	Time 0.231 (0.237)	Data 0.002 (0.012)	Loss 4.0039 (4.0226)	Top-1 acc 30.469 (30.764)	Top-5 acc 55.469 (54.142)	lr 0.00225
Warmup Train [35][1830/3239]	Time 0.190 (0.237)	Data 0.001 (0.012)	Loss 4.1697 (4.0227)	Top-1 acc 27.734 (30.758)	Top-5 acc 52.734 (54.143)	lr 0.00224
Warmup Train [35][1840/3239]	Time 0.220 (0.237)	Data 0.001 (0.012)	Loss 3.9009 (4.0227)	Top-1 acc 29.688 (30.758)	Top-5 acc 54.688 (54.145)	lr 0.00224
Warmup Train [35][1850/3239]	Time 0.176 (0.237)	Data 0.001 (0.012)	Loss 4.0213 (4.0228)	Top-1 acc 30.859 (30.753)	Top-5 acc 54.297 (54.141)	lr 0.00224
Warmup Train [35][1860/3239]	Time 0.305 (0.237)	Data 0.001 (0.012)	Loss 3.8426 (4.0228)	Top-1 acc 33.203 (30.751)	Top-5 acc 55.078 (54.139)	lr 0.00224
Warmup Train [35][1870/3239]	Time 0.233 (0.237)	Data 0.002 (0.012)	Loss 3.8802 (4.0225)	Top-1 acc 34.375 (30.768)	Top-5 acc 58.594 (54.150)	lr 0.00223
Warmup Train [35][1880/3239]	Time 0.204 (0.236)	Data 0.002 (0.012)	Loss 3.8046 (4.0225)	Top-1 acc 33.984 (30.766)	Top-5 acc 61.719 (54.151)	lr 0.00223
Warmup Train [35][1890/3239]	Time 0.260 (0.236)	Data 0.001 (0.012)	Loss 4.1395 (4.0226)	Top-1 acc 30.078 (30.767)	Top-5 acc 52.734 (54.157)	lr 0.00223
Warmup Train [35][1900/3239]	Time 0.243 (0.236)	Data 0.002 (0.012)	Loss 4.0204 (4.0227)	Top-1 acc 28.125 (30.764)	Top-5 acc 54.297 (54.151)	lr 0.00223
Warmup Train [35][1910/3239]	Time 0.186 (0.236)	Data 0.001 (0.012)	Loss 4.2076 (4.0229)	Top-1 acc 27.344 (30.761)	Top-5 acc 50.391 (54.151)	lr 0.00222
Warmup Train [35][1920/3239]	Time 0.175 (0.236)	Data 0.002 (0.012)	Loss 4.1844 (4.0229)	Top-1 acc 28.125 (30.764)	Top-5 acc 51.953 (54.154)	lr 0.00222
Warmup Train [35][1930/3239]	Time 0.232 (0.236)	Data 0.001 (0.012)	Loss 4.0000 (4.0228)	Top-1 acc 28.906 (30.764)	Top-5 acc 53.906 (54.154)	lr 0.00222
Warmup Train [35][1940/3239]	Time 0.278 (0.236)	Data 0.001 (0.012)	Loss 3.9956 (4.0227)	Top-1 acc 30.469 (30.769)	Top-5 acc 55.078 (54.162)	lr 0.00222
Warmup Train [35][1950/3239]	Time 0.251 (0.236)	Data 0.027 (0.012)	Loss 3.9126 (4.0226)	Top-1 acc 33.203 (30.772)	Top-5 acc 57.422 (54.163)	lr 0.00221
Warmup Train [35][1960/3239]	Time 0.295 (0.236)	Data 0.002 (0.012)	Loss 3.8190 (4.0223)	Top-1 acc 32.422 (30.779)	Top-5 acc 58.594 (54.170)	lr 0.00221
Warmup Train [35][1970/3239]	Time 0.288 (0.236)	Data 0.002 (0.012)	Loss 3.8667 (4.0219)	Top-1 acc 33.984 (30.785)	Top-5 acc 56.250 (54.174)	lr 0.00221
Warmup Train [35][1980/3239]	Time 0.186 (0.236)	Data 0.002 (0.012)	Loss 4.1892 (4.0218)	Top-1 acc 28.125 (30.792)	Top-5 acc 48.828 (54.180)	lr 0.00221
Warmup Train [35][1990/3239]	Time 0.217 (0.236)	Data 0.001 (0.012)	Loss 3.9300 (4.0219)	Top-1 acc 31.641 (30.790)	Top-5 acc 57.422 (54.181)	lr 0.00220
Warmup Train [35][2000/3239]	Time 0.192 (0.236)	Data 0.001 (0.012)	Loss 4.1667 (4.0222)	Top-1 acc 27.734 (30.780)	Top-5 acc 51.562 (54.175)	lr 0.00220
Warmup Train [35][2010/3239]	Time 0.283 (0.236)	Data 0.002 (0.011)	Loss 4.0548 (4.0222)	Top-1 acc 33.203 (30.782)	Top-5 acc 55.859 (54.172)	lr 0.00220
Warmup Train [35][2020/3239]	Time 0.273 (0.236)	Data 0.002 (0.011)	Loss 3.9540 (4.0221)	Top-1 acc 30.469 (30.781)	Top-5 acc 54.688 (54.175)	lr 0.00220
Warmup Train [35][2030/3239]	Time 0.217 (0.235)	Data 0.001 (0.011)	Loss 4.0067 (4.0223)	Top-1 acc 32.812 (30.779)	Top-5 acc 55.859 (54.168)	lr 0.00219
Warmup Train [35][2040/3239]	Time 0.263 (0.235)	Data 0.001 (0.011)	Loss 3.9797 (4.0220)	Top-1 acc 30.469 (30.784)	Top-5 acc 56.250 (54.172)	lr 0.00219
Warmup Train [35][2050/3239]	Time 0.240 (0.235)	Data 0.001 (0.011)	Loss 4.0510 (4.0221)	Top-1 acc 32.031 (30.788)	Top-5 acc 53.516 (54.171)	lr 0.00219
Warmup Train [35][2060/3239]	Time 0.238 (0.235)	Data 0.002 (0.011)	Loss 3.8287 (4.0221)	Top-1 acc 36.328 (30.789)	Top-5 acc 57.812 (54.167)	lr 0.00219
Warmup Train [35][2070/3239]	Time 0.148 (0.235)	Data 0.001 (0.011)	Loss 3.9311 (4.0224)	Top-1 acc 30.859 (30.779)	Top-5 acc 57.812 (54.163)	lr 0.00218
Warmup Train [35][2080/3239]	Time 0.248 (0.235)	Data 0.001 (0.011)	Loss 3.9039 (4.0223)	Top-1 acc 32.812 (30.777)	Top-5 acc 56.250 (54.160)	lr 0.00218
Warmup Train [35][2090/3239]	Time 0.187 (0.235)	Data 0.002 (0.011)	Loss 4.1798 (4.0226)	Top-1 acc 28.125 (30.772)	Top-5 acc 50.391 (54.150)	lr 0.00218
Warmup Train [35][2100/3239]	Time 0.219 (0.235)	Data 0.002 (0.011)	Loss 4.0227 (4.0227)	Top-1 acc 30.469 (30.765)	Top-5 acc 54.688 (54.146)	lr 0.00218
Warmup Train [35][2110/3239]	Time 0.186 (0.235)	Data 0.002 (0.011)	Loss 3.9184 (4.0226)	Top-1 acc 30.859 (30.770)	Top-5 acc 57.031 (54.148)	lr 0.00217
Warmup Train [35][2120/3239]	Time 0.231 (0.235)	Data 0.001 (0.011)	Loss 3.8741 (4.0224)	Top-1 acc 30.469 (30.771)	Top-5 acc 54.297 (54.155)	lr 0.00217
Warmup Train [35][2130/3239]	Time 0.227 (0.235)	Data 0.001 (0.011)	Loss 3.8754 (4.0222)	Top-1 acc 33.984 (30.776)	Top-5 acc 59.375 (54.162)	lr 0.00217
Warmup Train [35][2140/3239]	Time 0.196 (0.235)	Data 0.001 (0.011)	Loss 4.0291 (4.0222)	Top-1 acc 26.953 (30.777)	Top-5 acc 54.297 (54.163)	lr 0.00217
Warmup Train [35][2150/3239]	Time 0.186 (0.235)	Data 0.002 (0.011)	Loss 3.9438 (4.0223)	Top-1 acc 35.547 (30.777)	Top-5 acc 57.422 (54.162)	lr 0.00216
Warmup Train [35][2160/3239]	Time 0.229 (0.235)	Data 0.001 (0.011)	Loss 4.0027 (4.0222)	Top-1 acc 30.859 (30.771)	Top-5 acc 53.906 (54.164)	lr 0.00216
Warmup Train [35][2170/3239]	Time 0.265 (0.235)	Data 0.001 (0.011)	Loss 4.0216 (4.0221)	Top-1 acc 26.562 (30.770)	Top-5 acc 51.172 (54.168)	lr 0.00216
Warmup Train [35][2180/3239]	Time 0.288 (0.235)	Data 0.001 (0.011)	Loss 4.0047 (4.0219)	Top-1 acc 33.203 (30.770)	Top-5 acc 53.516 (54.172)	lr 0.00216
Warmup Train [35][2190/3239]	Time 0.229 (0.235)	Data 0.002 (0.011)	Loss 4.1103 (4.0218)	Top-1 acc 30.859 (30.774)	Top-5 acc 56.250 (54.175)	lr 0.00215
Warmup Train [35][2200/3239]	Time 0.153 (0.235)	Data 0.001 (0.011)	Loss 4.1794 (4.0218)	Top-1 acc 24.609 (30.779)	Top-5 acc 51.562 (54.178)	lr 0.00215
Warmup Train [35][2210/3239]	Time 0.222 (0.235)	Data 0.002 (0.011)	Loss 4.1069 (4.0218)	Top-1 acc 32.812 (30.783)	Top-5 acc 52.344 (54.173)	lr 0.00215
Warmup Train [35][2220/3239]	Time 0.139 (0.234)	Data 0.001 (0.011)	Loss 4.2576 (4.0221)	Top-1 acc 27.344 (30.779)	Top-5 acc 47.266 (54.168)	lr 0.00215
Warmup Train [35][2230/3239]	Time 0.178 (0.234)	Data 0.001 (0.011)	Loss 4.2489 (4.0224)	Top-1 acc 23.047 (30.772)	Top-5 acc 49.219 (54.157)	lr 0.00214
Warmup Train [35][2240/3239]	Time 0.191 (0.234)	Data 0.002 (0.011)	Loss 3.8361 (4.0224)	Top-1 acc 32.422 (30.772)	Top-5 acc 60.938 (54.155)	lr 0.00214
Warmup Train [35][2250/3239]	Time 0.223 (0.234)	Data 0.002 (0.011)	Loss 4.0533 (4.0223)	Top-1 acc 26.953 (30.774)	Top-5 acc 53.125 (54.157)	lr 0.00214
Warmup Train [35][2260/3239]	Time 0.215 (0.234)	Data 0.002 (0.011)	Loss 3.9469 (4.0223)	Top-1 acc 36.328 (30.776)	Top-5 acc 55.078 (54.157)	lr 0.00214
Warmup Train [35][2270/3239]	Time 0.193 (0.234)	Data 0.002 (0.011)	Loss 3.9401 (4.0220)	Top-1 acc 31.250 (30.783)	Top-5 acc 59.766 (54.166)	lr 0.00213
Warmup Train [35][2280/3239]	Time 0.342 (0.234)	Data 0.001 (0.010)	Loss 4.0647 (4.0223)	Top-1 acc 32.812 (30.778)	Top-5 acc 51.172 (54.160)	lr 0.00213
Warmup Train [35][2290/3239]	Time 0.274 (0.234)	Data 0.001 (0.010)	Loss 4.2356 (4.0224)	Top-1 acc 26.562 (30.774)	Top-5 acc 48.438 (54.157)	lr 0.00213
Warmup Train [35][2300/3239]	Time 0.163 (0.234)	Data 0.001 (0.010)	Loss 3.9441 (4.0223)	Top-1 acc 34.766 (30.777)	Top-5 acc 55.859 (54.155)	lr 0.00213
Warmup Train [35][2310/3239]	Time 0.229 (0.234)	Data 0.001 (0.010)	Loss 3.8362 (4.0227)	Top-1 acc 33.984 (30.768)	Top-5 acc 58.984 (54.147)	lr 0.00212
Warmup Train [35][2320/3239]	Time 0.271 (0.234)	Data 0.001 (0.010)	Loss 3.9871 (4.0227)	Top-1 acc 31.250 (30.765)	Top-5 acc 56.641 (54.149)	lr 0.00212
Warmup Train [35][2330/3239]	Time 0.158 (0.234)	Data 0.001 (0.010)	Loss 4.0622 (4.0227)	Top-1 acc 29.297 (30.764)	Top-5 acc 56.641 (54.153)	lr 0.00212
Warmup Train [35][2340/3239]	Time 0.199 (0.234)	Data 0.001 (0.010)	Loss 4.4494 (4.0228)	Top-1 acc 19.531 (30.766)	Top-5 acc 41.016 (54.151)	lr 0.00212
Warmup Train [35][2350/3239]	Time 0.153 (0.234)	Data 0.001 (0.010)	Loss 3.9306 (4.0227)	Top-1 acc 32.812 (30.769)	Top-5 acc 57.812 (54.151)	lr 0.00211
Warmup Train [35][2360/3239]	Time 0.225 (0.234)	Data 0.001 (0.010)	Loss 4.0673 (4.0227)	Top-1 acc 30.078 (30.771)	Top-5 acc 55.859 (54.157)	lr 0.00211
Warmup Train [35][2370/3239]	Time 0.216 (0.234)	Data 0.001 (0.010)	Loss 3.9445 (4.0227)	Top-1 acc 32.812 (30.772)	Top-5 acc 56.250 (54.154)	lr 0.00211
Warmup Train [35][2380/3239]	Time 0.282 (0.234)	Data 0.001 (0.010)	Loss 4.1546 (4.0227)	Top-1 acc 27.734 (30.772)	Top-5 acc 50.781 (54.155)	lr 0.00211
Warmup Train [35][2390/3239]	Time 0.247 (0.234)	Data 0.001 (0.010)	Loss 4.1210 (4.0230)	Top-1 acc 29.297 (30.767)	Top-5 acc 51.953 (54.150)	lr 0.00211
Warmup Train [35][2400/3239]	Time 0.255 (0.234)	Data 0.001 (0.010)	Loss 3.9089 (4.0228)	Top-1 acc 34.766 (30.766)	Top-5 acc 57.812 (54.154)	lr 0.00210
Warmup Train [35][2410/3239]	Time 0.212 (0.234)	Data 0.001 (0.010)	Loss 3.9608 (4.0229)	Top-1 acc 32.031 (30.763)	Top-5 acc 55.859 (54.156)	lr 0.00210
Warmup Train [35][2420/3239]	Time 0.245 (0.234)	Data 0.001 (0.010)	Loss 3.8727 (4.0227)	Top-1 acc 32.812 (30.763)	Top-5 acc 57.812 (54.165)	lr 0.00210
Warmup Train [35][2430/3239]	Time 0.211 (0.234)	Data 0.001 (0.010)	Loss 4.0401 (4.0227)	Top-1 acc 29.297 (30.762)	Top-5 acc 57.422 (54.163)	lr 0.00210
Warmup Train [35][2440/3239]	Time 0.199 (0.234)	Data 0.002 (0.010)	Loss 4.2215 (4.0228)	Top-1 acc 28.125 (30.761)	Top-5 acc 50.391 (54.161)	lr 0.00209
Warmup Train [35][2450/3239]	Time 0.252 (0.233)	Data 0.001 (0.010)	Loss 4.0046 (4.0227)	Top-1 acc 30.469 (30.758)	Top-5 acc 52.344 (54.157)	lr 0.00209
Warmup Train [35][2460/3239]	Time 0.177 (0.233)	Data 0.001 (0.010)	Loss 3.9854 (4.0228)	Top-1 acc 27.734 (30.754)	Top-5 acc 55.469 (54.157)	lr 0.00209
Warmup Train [35][2470/3239]	Time 0.225 (0.233)	Data 0.001 (0.010)	Loss 3.9767 (4.0226)	Top-1 acc 31.250 (30.757)	Top-5 acc 53.125 (54.161)	lr 0.00209
Warmup Train [35][2480/3239]	Time 0.292 (0.233)	Data 0.001 (0.010)	Loss 3.9096 (4.0226)	Top-1 acc 32.031 (30.755)	Top-5 acc 59.375 (54.163)	lr 0.00208
Warmup Train [35][2490/3239]	Time 0.283 (0.233)	Data 0.001 (0.010)	Loss 4.1408 (4.0226)	Top-1 acc 31.250 (30.756)	Top-5 acc 50.781 (54.164)	lr 0.00208
Warmup Train [35][2500/3239]	Time 0.140 (0.233)	Data 0.001 (0.010)	Loss 4.0197 (4.0226)	Top-1 acc 29.297 (30.757)	Top-5 acc 51.562 (54.165)	lr 0.00208
Warmup Train [35][2510/3239]	Time 0.225 (0.233)	Data 0.001 (0.010)	Loss 4.0716 (4.0226)	Top-1 acc 29.688 (30.754)	Top-5 acc 51.953 (54.167)	lr 0.00208
Warmup Train [35][2520/3239]	Time 0.245 (0.233)	Data 0.001 (0.010)	Loss 4.0981 (4.0228)	Top-1 acc 26.953 (30.748)	Top-5 acc 51.953 (54.161)	lr 0.00207
Warmup Train [35][2530/3239]	Time 0.202 (0.233)	Data 0.001 (0.010)	Loss 3.9787 (4.0228)	Top-1 acc 32.422 (30.748)	Top-5 acc 57.422 (54.163)	lr 0.00207
Warmup Train [35][2540/3239]	Time 0.243 (0.233)	Data 0.001 (0.010)	Loss 3.9210 (4.0227)	Top-1 acc 34.375 (30.748)	Top-5 acc 57.422 (54.163)	lr 0.00207
Warmup Train [35][2550/3239]	Time 0.200 (0.233)	Data 0.002 (0.010)	Loss 3.9318 (4.0228)	Top-1 acc 31.641 (30.747)	Top-5 acc 57.422 (54.162)	lr 0.00207
Warmup Train [35][2560/3239]	Time 0.202 (0.233)	Data 0.001 (0.010)	Loss 4.1101 (4.0228)	Top-1 acc 28.125 (30.747)	Top-5 acc 51.953 (54.163)	lr 0.00206
Warmup Train [35][2570/3239]	Time 0.214 (0.233)	Data 0.001 (0.010)	Loss 4.1094 (4.0228)	Top-1 acc 27.344 (30.747)	Top-5 acc 51.953 (54.162)	lr 0.00206
Warmup Train [35][2580/3239]	Time 0.188 (0.233)	Data 0.002 (0.010)	Loss 4.0225 (4.0228)	Top-1 acc 31.250 (30.747)	Top-5 acc 51.953 (54.158)	lr 0.00206
Warmup Train [35][2590/3239]	Time 0.337 (0.233)	Data 0.001 (0.010)	Loss 4.0291 (4.0229)	Top-1 acc 32.031 (30.740)	Top-5 acc 55.078 (54.154)	lr 0.00206
Warmup Train [35][2600/3239]	Time 0.188 (0.233)	Data 0.003 (0.010)	Loss 4.1839 (4.0230)	Top-1 acc 36.719 (30.744)	Top-5 acc 51.172 (54.153)	lr 0.00205
Warmup Train [35][2610/3239]	Time 0.197 (0.233)	Data 0.002 (0.010)	Loss 3.9066 (4.0229)	Top-1 acc 33.594 (30.744)	Top-5 acc 59.375 (54.155)	lr 0.00205
Warmup Train [35][2620/3239]	Time 0.225 (0.233)	Data 0.002 (0.010)	Loss 3.9446 (4.0229)	Top-1 acc 32.031 (30.743)	Top-5 acc 55.859 (54.152)	lr 0.00205
Warmup Train [35][2630/3239]	Time 0.208 (0.233)	Data 0.001 (0.010)	Loss 3.9359 (4.0229)	Top-1 acc 34.375 (30.744)	Top-5 acc 56.641 (54.151)	lr 0.00205
Warmup Train [35][2640/3239]	Time 0.283 (0.233)	Data 0.002 (0.010)	Loss 3.8936 (4.0229)	Top-1 acc 32.031 (30.740)	Top-5 acc 58.984 (54.151)	lr 0.00204
Warmup Train [35][2650/3239]	Time 0.182 (0.233)	Data 0.001 (0.009)	Loss 3.8762 (4.0228)	Top-1 acc 34.375 (30.739)	Top-5 acc 57.031 (54.151)	lr 0.00204
Warmup Train [35][2660/3239]	Time 0.213 (0.232)	Data 0.001 (0.009)	Loss 3.9990 (4.0228)	Top-1 acc 31.250 (30.738)	Top-5 acc 54.297 (54.149)	lr 0.00204
Warmup Train [35][2670/3239]	Time 0.223 (0.232)	Data 0.006 (0.009)	Loss 3.9416 (4.0228)	Top-1 acc 31.250 (30.739)	Top-5 acc 53.125 (54.150)	lr 0.00204
Warmup Train [35][2680/3239]	Time 0.234 (0.232)	Data 0.002 (0.009)	Loss 3.7676 (4.0227)	Top-1 acc 34.766 (30.741)	Top-5 acc 60.547 (54.151)	lr 0.00204
Warmup Train [35][2690/3239]	Time 0.344 (0.232)	Data 0.001 (0.009)	Loss 4.0781 (4.0227)	Top-1 acc 29.688 (30.743)	Top-5 acc 53.906 (54.155)	lr 0.00203
Warmup Train [35][2700/3239]	Time 0.328 (0.232)	Data 0.001 (0.009)	Loss 4.1102 (4.0227)	Top-1 acc 30.078 (30.744)	Top-5 acc 50.781 (54.152)	lr 0.00203
Warmup Train [35][2710/3239]	Time 0.202 (0.232)	Data 0.002 (0.009)	Loss 4.0641 (4.0227)	Top-1 acc 30.078 (30.741)	Top-5 acc 55.469 (54.152)	lr 0.00203
Warmup Train [35][2720/3239]	Time 0.190 (0.232)	Data 0.001 (0.009)	Loss 4.0794 (4.0226)	Top-1 acc 27.344 (30.742)	Top-5 acc 53.906 (54.152)	lr 0.00203
Warmup Train [35][2730/3239]	Time 0.206 (0.232)	Data 0.001 (0.009)	Loss 3.9593 (4.0226)	Top-1 acc 33.984 (30.740)	Top-5 acc 55.078 (54.148)	lr 0.00202
Warmup Train [35][2740/3239]	Time 0.195 (0.232)	Data 0.001 (0.009)	Loss 4.1362 (4.0227)	Top-1 acc 28.906 (30.736)	Top-5 acc 48.828 (54.141)	lr 0.00202
Warmup Train [35][2750/3239]	Time 0.166 (0.232)	Data 0.001 (0.009)	Loss 3.9828 (4.0226)	Top-1 acc 32.031 (30.740)	Top-5 acc 57.812 (54.144)	lr 0.00202
Warmup Train [35][2760/3239]	Time 0.208 (0.232)	Data 0.001 (0.009)	Loss 4.0185 (4.0226)	Top-1 acc 33.203 (30.740)	Top-5 acc 54.688 (54.145)	lr 0.00202
Warmup Train [35][2770/3239]	Time 0.221 (0.232)	Data 0.002 (0.009)	Loss 3.8926 (4.0228)	Top-1 acc 32.422 (30.737)	Top-5 acc 56.250 (54.141)	lr 0.00201
Warmup Train [35][2780/3239]	Time 0.205 (0.232)	Data 0.001 (0.009)	Loss 4.0714 (4.0227)	Top-1 acc 33.984 (30.744)	Top-5 acc 53.516 (54.143)	lr 0.00201
Warmup Train [35][2790/3239]	Time 0.234 (0.232)	Data 0.001 (0.009)	Loss 3.8985 (4.0228)	Top-1 acc 33.594 (30.744)	Top-5 acc 57.031 (54.140)	lr 0.00201
Warmup Train [35][2800/3239]	Time 0.301 (0.232)	Data 0.001 (0.009)	Loss 4.0891 (4.0231)	Top-1 acc 28.125 (30.740)	Top-5 acc 52.734 (54.133)	lr 0.00201
Warmup Train [35][2810/3239]	Time 0.160 (0.232)	Data 0.001 (0.009)	Loss 4.0136 (4.0229)	Top-1 acc 31.641 (30.742)	Top-5 acc 54.297 (54.140)	lr 0.00200
Warmup Train [35][2820/3239]	Time 0.138 (0.232)	Data 0.001 (0.009)	Loss 3.9091 (4.0229)	Top-1 acc 28.125 (30.743)	Top-5 acc 51.172 (54.140)	lr 0.00200
Warmup Train [35][2830/3239]	Time 0.306 (0.232)	Data 0.001 (0.009)	Loss 4.1122 (4.0228)	Top-1 acc 26.562 (30.745)	Top-5 acc 57.031 (54.142)	lr 0.00200
Warmup Train [35][2840/3239]	Time 0.189 (0.232)	Data 0.001 (0.009)	Loss 3.9656 (4.0229)	Top-1 acc 31.641 (30.746)	Top-5 acc 54.297 (54.137)	lr 0.00200
Warmup Train [35][2850/3239]	Time 0.298 (0.232)	Data 0.037 (0.009)	Loss 3.7505 (4.0228)	Top-1 acc 34.375 (30.751)	Top-5 acc 60.547 (54.136)	lr 0.00199
Warmup Train [35][2860/3239]	Time 0.209 (0.232)	Data 0.001 (0.009)	Loss 4.1344 (4.0230)	Top-1 acc 26.953 (30.745)	Top-5 acc 46.875 (54.130)	lr 0.00199
Warmup Train [35][2870/3239]	Time 0.249 (0.232)	Data 0.001 (0.009)	Loss 4.2046 (4.0231)	Top-1 acc 25.391 (30.742)	Top-5 acc 50.391 (54.129)	lr 0.00199
Warmup Train [35][2880/3239]	Time 0.220 (0.232)	Data 0.001 (0.009)	Loss 4.0988 (4.0233)	Top-1 acc 26.562 (30.741)	Top-5 acc 51.172 (54.125)	lr 0.00199
Warmup Train [35][2890/3239]	Time 0.254 (0.232)	Data 0.001 (0.009)	Loss 3.9403 (4.0232)	Top-1 acc 33.594 (30.743)	Top-5 acc 57.422 (54.125)	lr 0.00199
Warmup Train [35][2900/3239]	Time 0.224 (0.232)	Data 0.001 (0.009)	Loss 3.9591 (4.0231)	Top-1 acc 29.688 (30.746)	Top-5 acc 55.078 (54.130)	lr 0.00198
Warmup Train [35][2910/3239]	Time 0.167 (0.232)	Data 0.001 (0.009)	Loss 4.1045 (4.0231)	Top-1 acc 31.250 (30.748)	Top-5 acc 52.344 (54.128)	lr 0.00198
Warmup Train [35][2920/3239]	Time 0.332 (0.232)	Data 0.001 (0.009)	Loss 3.9350 (4.0230)	Top-1 acc 32.422 (30.749)	Top-5 acc 57.031 (54.129)	lr 0.00198
Warmup Train [35][2930/3239]	Time 0.236 (0.232)	Data 0.001 (0.009)	Loss 3.8343 (4.0228)	Top-1 acc 34.766 (30.756)	Top-5 acc 60.938 (54.136)	lr 0.00198
Warmup Train [35][2940/3239]	Time 0.204 (0.231)	Data 0.001 (0.009)	Loss 3.9010 (4.0228)	Top-1 acc 29.688 (30.758)	Top-5 acc 59.375 (54.138)	lr 0.00197
Warmup Train [35][2950/3239]	Time 0.337 (0.231)	Data 0.001 (0.009)	Loss 4.1425 (4.0229)	Top-1 acc 28.906 (30.758)	Top-5 acc 51.172 (54.137)	lr 0.00197
Warmup Train [35][2960/3239]	Time 0.159 (0.231)	Data 0.001 (0.009)	Loss 4.2510 (4.0230)	Top-1 acc 28.125 (30.758)	Top-5 acc 49.609 (54.133)	lr 0.00197
Warmup Train [35][2970/3239]	Time 0.171 (0.231)	Data 0.001 (0.009)	Loss 4.1677 (4.0231)	Top-1 acc 33.984 (30.760)	Top-5 acc 53.125 (54.130)	lr 0.00197
Warmup Train [35][2980/3239]	Time 0.251 (0.231)	Data 0.002 (0.009)	Loss 4.1048 (4.0232)	Top-1 acc 29.688 (30.755)	Top-5 acc 50.391 (54.127)	lr 0.00196
Warmup Train [35][2990/3239]	Time 0.200 (0.231)	Data 0.001 (0.009)	Loss 4.0053 (4.0232)	Top-1 acc 30.859 (30.758)	Top-5 acc 54.688 (54.127)	lr 0.00196
Warmup Train [35][3000/3239]	Time 0.199 (0.231)	Data 0.001 (0.009)	Loss 3.9693 (4.0234)	Top-1 acc 33.984 (30.758)	Top-5 acc 55.859 (54.124)	lr 0.00196
Warmup Train [35][3010/3239]	Time 0.341 (0.231)	Data 0.001 (0.009)	Loss 4.0129 (4.0234)	Top-1 acc 33.203 (30.758)	Top-5 acc 53.516 (54.122)	lr 0.00196
Warmup Train [35][3020/3239]	Time 0.159 (0.231)	Data 0.001 (0.009)	Loss 4.0902 (4.0233)	Top-1 acc 26.562 (30.755)	Top-5 acc 51.562 (54.122)	lr 0.00195
Warmup Train [35][3030/3239]	Time 0.185 (0.231)	Data 0.001 (0.009)	Loss 3.9232 (4.0232)	Top-1 acc 32.422 (30.757)	Top-5 acc 53.906 (54.122)	lr 0.00195
Warmup Train [35][3040/3239]	Time 0.245 (0.231)	Data 0.002 (0.009)	Loss 3.9672 (4.0231)	Top-1 acc 28.125 (30.754)	Top-5 acc 57.812 (54.126)	lr 0.00195
Warmup Train [35][3050/3239]	Time 0.201 (0.231)	Data 0.002 (0.009)	Loss 3.9454 (4.0229)	Top-1 acc 33.984 (30.761)	Top-5 acc 55.859 (54.134)	lr 0.00195
Warmup Train [35][3060/3239]	Time 0.233 (0.231)	Data 0.001 (0.009)	Loss 4.0056 (4.0228)	Top-1 acc 27.734 (30.763)	Top-5 acc 51.953 (54.137)	lr 0.00194
Warmup Train [35][3070/3239]	Time 0.191 (0.231)	Data 0.001 (0.009)	Loss 4.1947 (4.0228)	Top-1 acc 28.516 (30.762)	Top-5 acc 49.219 (54.131)	lr 0.00194
Warmup Train [35][3080/3239]	Time 0.185 (0.231)	Data 0.001 (0.009)	Loss 4.0880 (4.0229)	Top-1 acc 30.469 (30.759)	Top-5 acc 54.688 (54.132)	lr 0.00194
Warmup Train [35][3090/3239]	Time 0.227 (0.231)	Data 0.001 (0.009)	Loss 4.0486 (4.0230)	Top-1 acc 30.078 (30.761)	Top-5 acc 51.562 (54.130)	lr 0.00194
Warmup Train [35][3100/3239]	Time 0.242 (0.231)	Data 0.001 (0.009)	Loss 4.0659 (4.0230)	Top-1 acc 32.422 (30.765)	Top-5 acc 53.516 (54.133)	lr 0.00194
Warmup Train [35][3110/3239]	Time 0.188 (0.231)	Data 0.002 (0.008)	Loss 4.2511 (4.0229)	Top-1 acc 29.297 (30.763)	Top-5 acc 51.172 (54.138)	lr 0.00193
Warmup Train [35][3120/3239]	Time 0.339 (0.231)	Data 0.001 (0.008)	Loss 4.0588 (4.0230)	Top-1 acc 32.422 (30.764)	Top-5 acc 55.469 (54.138)	lr 0.00193
Warmup Train [35][3130/3239]	Time 0.146 (0.231)	Data 0.001 (0.008)	Loss 4.0858 (4.0228)	Top-1 acc 26.953 (30.770)	Top-5 acc 53.906 (54.144)	lr 0.00193
Warmup Train [35][3140/3239]	Time 0.218 (0.231)	Data 0.001 (0.008)	Loss 4.1530 (4.0228)	Top-1 acc 28.125 (30.768)	Top-5 acc 54.688 (54.146)	lr 0.00193
Warmup Train [35][3150/3239]	Time 0.183 (0.231)	Data 0.001 (0.008)	Loss 3.9945 (4.0227)	Top-1 acc 32.422 (30.769)	Top-5 acc 54.688 (54.149)	lr 0.00192
Warmup Train [35][3160/3239]	Time 0.224 (0.231)	Data 0.001 (0.008)	Loss 4.0961 (4.0228)	Top-1 acc 32.031 (30.764)	Top-5 acc 51.953 (54.148)	lr 0.00192
Warmup Train [35][3170/3239]	Time 0.233 (0.231)	Data 0.002 (0.008)	Loss 4.0323 (4.0229)	Top-1 acc 31.641 (30.760)	Top-5 acc 55.859 (54.146)	lr 0.00192
Warmup Train [35][3180/3239]	Time 0.222 (0.231)	Data 0.000 (0.008)	Loss 3.8913 (4.0229)	Top-1 acc 30.078 (30.758)	Top-5 acc 56.250 (54.148)	lr 0.00192
Warmup Train [35][3190/3239]	Time 0.225 (0.231)	Data 0.000 (0.008)	Loss 3.9570 (4.0229)	Top-1 acc 33.203 (30.759)	Top-5 acc 55.469 (54.144)	lr 0.00191
Warmup Train [35][3200/3239]	Time 0.207 (0.231)	Data 0.000 (0.008)	Loss 4.0301 (4.0229)	Top-1 acc 32.422 (30.763)	Top-5 acc 56.641 (54.143)	lr 0.00191
Warmup Train [35][3210/3239]	Time 0.198 (0.230)	Data 0.000 (0.008)	Loss 3.8984 (4.0229)	Top-1 acc 30.078 (30.763)	Top-5 acc 59.766 (54.145)	lr 0.00191
Warmup Train [35][3220/3239]	Time 0.189 (0.230)	Data 0.000 (0.008)	Loss 4.0183 (4.0229)	Top-1 acc 31.250 (30.764)	Top-5 acc 54.297 (54.145)	lr 0.00191
Warmup Train [35][3230/3239]	Time 0.255 (0.230)	Data 0.000 (0.008)	Loss 4.1991 (4.0230)	Top-1 acc 24.609 (30.762)	Top-5 acc 48.828 (54.143)	lr 0.00191
Warmup Train [35][3239/3239]	Time 0.251 (0.230)	Data 0.000 (0.008)	Loss 4.3027 (4.0230)	Top-1 acc 28.395 (30.761)	Top-5 acc 48.148 (54.143)	lr 0.00190
==========Warmup Valid [35/40]	loss 2.956	top-1 acc 38.300	top-5 acc 63.077	Train top-1 30.761	top-5 54.143	flops: 442.4M
Warmup Train [36][0/3239]	Time 20.252 (20.252)	Data 18.546 (18.546)	Loss 3.9748 (3.9748)	Top-1 acc 31.641 (31.641)	Top-5 acc 54.297 (54.297)	lr 0.00190
Warmup Train [36][10/3239]	Time 0.276 (2.105)	Data 0.002 (1.690)	Loss 3.8963 (4.0080)	Top-1 acc 29.688 (31.179)	Top-5 acc 56.641 (54.190)	lr 0.00190
Warmup Train [36][20/3239]	Time 0.195 (1.210)	Data 0.001 (0.886)	Loss 4.2287 (4.0379)	Top-1 acc 26.172 (30.469)	Top-5 acc 46.875 (53.590)	lr 0.00190
Warmup Train [36][30/3239]	Time 0.342 (0.898)	Data 0.001 (0.602)	Loss 3.9655 (4.0272)	Top-1 acc 31.250 (30.481)	Top-5 acc 57.422 (53.856)	lr 0.00190
Warmup Train [36][40/3239]	Time 0.288 (0.735)	Data 0.001 (0.456)	Loss 3.9139 (4.0283)	Top-1 acc 35.938 (30.840)	Top-5 acc 57.422 (53.735)	lr 0.00189
Warmup Train [36][50/3239]	Time 0.292 (0.637)	Data 0.001 (0.367)	Loss 3.8987 (4.0260)	Top-1 acc 32.031 (30.614)	Top-5 acc 55.469 (53.768)	lr 0.00189
Warmup Train [36][60/3239]	Time 0.231 (0.570)	Data 0.001 (0.307)	Loss 3.9960 (4.0246)	Top-1 acc 30.469 (30.661)	Top-5 acc 53.125 (53.842)	lr 0.00189
Warmup Train [36][70/3239]	Time 0.264 (0.522)	Data 0.001 (0.265)	Loss 3.9125 (4.0149)	Top-1 acc 29.297 (30.815)	Top-5 acc 55.078 (54.000)	lr 0.00189
Warmup Train [36][80/3239]	Time 0.209 (0.485)	Data 0.001 (0.232)	Loss 4.0700 (4.0238)	Top-1 acc 29.688 (30.691)	Top-5 acc 55.469 (53.805)	lr 0.00188
Warmup Train [36][90/3239]	Time 0.146 (0.455)	Data 0.001 (0.207)	Loss 3.9444 (4.0291)	Top-1 acc 26.953 (30.593)	Top-5 acc 55.078 (53.730)	lr 0.00188
Warmup Train [36][100/3239]	Time 0.162 (0.433)	Data 0.001 (0.187)	Loss 4.2391 (4.0237)	Top-1 acc 27.344 (30.739)	Top-5 acc 49.609 (53.868)	lr 0.00188
Warmup Train [36][110/3239]	Time 0.256 (0.415)	Data 0.001 (0.170)	Loss 3.7623 (4.0244)	Top-1 acc 34.375 (30.722)	Top-5 acc 62.109 (53.801)	lr 0.00188
Warmup Train [36][120/3239]	Time 0.215 (0.399)	Data 0.001 (0.156)	Loss 3.7623 (4.0217)	Top-1 acc 35.547 (30.792)	Top-5 acc 60.156 (53.832)	lr 0.00188
Warmup Train [36][130/3239]	Time 0.292 (0.387)	Data 0.001 (0.144)	Loss 4.1639 (4.0203)	Top-1 acc 27.734 (30.800)	Top-5 acc 49.609 (53.873)	lr 0.00187
Warmup Train [36][140/3239]	Time 0.266 (0.377)	Data 0.001 (0.134)	Loss 3.9009 (4.0175)	Top-1 acc 32.422 (30.818)	Top-5 acc 53.906 (53.964)	lr 0.00187
Warmup Train [36][150/3239]	Time 0.356 (0.366)	Data 0.002 (0.126)	Loss 4.1660 (4.0158)	Top-1 acc 25.781 (30.872)	Top-5 acc 51.172 (54.038)	lr 0.00187
Warmup Train [36][160/3239]	Time 0.244 (0.358)	Data 0.002 (0.118)	Loss 4.0102 (4.0148)	Top-1 acc 28.906 (30.852)	Top-5 acc 51.172 (54.049)	lr 0.00187
Warmup Train [36][170/3239]	Time 0.145 (0.350)	Data 0.001 (0.111)	Loss 4.0048 (4.0152)	Top-1 acc 33.984 (30.864)	Top-5 acc 55.078 (54.041)	lr 0.00186
Warmup Train [36][180/3239]	Time 0.206 (0.343)	Data 0.001 (0.105)	Loss 3.9039 (4.0142)	Top-1 acc 31.250 (30.887)	Top-5 acc 56.250 (54.096)	lr 0.00186
Warmup Train [36][190/3239]	Time 0.244 (0.337)	Data 0.001 (0.100)	Loss 3.8983 (4.0138)	Top-1 acc 35.938 (30.894)	Top-5 acc 59.375 (54.162)	lr 0.00186
Warmup Train [36][200/3239]	Time 0.202 (0.332)	Data 0.001 (0.095)	Loss 4.0704 (4.0144)	Top-1 acc 27.734 (30.830)	Top-5 acc 51.953 (54.097)	lr 0.00186
Warmup Train [36][210/3239]	Time 0.255 (0.326)	Data 0.002 (0.091)	Loss 4.1128 (4.0163)	Top-1 acc 30.859 (30.761)	Top-5 acc 51.562 (54.056)	lr 0.00185
Warmup Train [36][220/3239]	Time 0.182 (0.321)	Data 0.001 (0.087)	Loss 4.0697 (4.0167)	Top-1 acc 28.906 (30.757)	Top-5 acc 54.688 (54.030)	lr 0.00185
Warmup Train [36][230/3239]	Time 0.187 (0.318)	Data 0.001 (0.083)	Loss 3.9644 (4.0164)	Top-1 acc 31.641 (30.795)	Top-5 acc 56.250 (54.052)	lr 0.00185
Warmup Train [36][240/3239]	Time 0.189 (0.314)	Data 0.001 (0.080)	Loss 3.8495 (4.0174)	Top-1 acc 30.078 (30.749)	Top-5 acc 54.688 (54.036)	lr 0.00185
Warmup Train [36][250/3239]	Time 0.376 (0.311)	Data 0.001 (0.077)	Loss 4.0057 (4.0174)	Top-1 acc 30.078 (30.763)	Top-5 acc 55.078 (54.032)	lr 0.00185
Warmup Train [36][260/3239]	Time 0.221 (0.307)	Data 0.001 (0.074)	Loss 4.0089 (4.0186)	Top-1 acc 32.031 (30.726)	Top-5 acc 54.688 (54.023)	lr 0.00184
Warmup Train [36][270/3239]	Time 0.232 (0.305)	Data 0.001 (0.071)	Loss 3.9831 (4.0193)	Top-1 acc 29.688 (30.724)	Top-5 acc 51.562 (53.981)	lr 0.00184
Warmup Train [36][280/3239]	Time 0.280 (0.301)	Data 0.002 (0.069)	Loss 3.9245 (4.0183)	Top-1 acc 29.297 (30.741)	Top-5 acc 58.594 (54.022)	lr 0.00184
Warmup Train [36][290/3239]	Time 0.180 (0.299)	Data 0.001 (0.067)	Loss 3.6800 (4.0164)	Top-1 acc 34.375 (30.767)	Top-5 acc 58.984 (54.097)	lr 0.00184
Warmup Train [36][300/3239]	Time 0.217 (0.296)	Data 0.001 (0.065)	Loss 3.9144 (4.0166)	Top-1 acc 35.547 (30.771)	Top-5 acc 53.906 (54.122)	lr 0.00183
Warmup Train [36][310/3239]	Time 0.266 (0.294)	Data 0.001 (0.063)	Loss 3.8177 (4.0186)	Top-1 acc 35.938 (30.749)	Top-5 acc 58.594 (54.058)	lr 0.00183
Warmup Train [36][320/3239]	Time 0.185 (0.292)	Data 0.001 (0.061)	Loss 4.1305 (4.0190)	Top-1 acc 28.516 (30.757)	Top-5 acc 52.734 (54.049)	lr 0.00183
Warmup Train [36][330/3239]	Time 0.160 (0.290)	Data 0.001 (0.059)	Loss 3.8275 (4.0194)	Top-1 acc 34.375 (30.735)	Top-5 acc 59.766 (54.055)	lr 0.00183
Warmup Train [36][340/3239]	Time 0.270 (0.288)	Data 0.001 (0.057)	Loss 3.9339 (4.0188)	Top-1 acc 30.078 (30.749)	Top-5 acc 55.469 (54.039)	lr 0.00182
Warmup Train [36][350/3239]	Time 0.312 (0.286)	Data 0.001 (0.056)	Loss 3.9908 (4.0174)	Top-1 acc 30.469 (30.769)	Top-5 acc 53.516 (54.060)	lr 0.00182
Warmup Train [36][360/3239]	Time 0.253 (0.285)	Data 0.002 (0.054)	Loss 3.9247 (4.0166)	Top-1 acc 31.641 (30.791)	Top-5 acc 54.688 (54.096)	lr 0.00182
Warmup Train [36][370/3239]	Time 0.167 (0.283)	Data 0.002 (0.053)	Loss 3.9532 (4.0167)	Top-1 acc 29.688 (30.780)	Top-5 acc 55.078 (54.082)	lr 0.00182
Warmup Train [36][380/3239]	Time 0.247 (0.281)	Data 0.001 (0.052)	Loss 4.1460 (4.0169)	Top-1 acc 29.297 (30.762)	Top-5 acc 52.734 (54.074)	lr 0.00182
Warmup Train [36][390/3239]	Time 0.184 (0.279)	Data 0.001 (0.050)	Loss 4.0129 (4.0182)	Top-1 acc 27.344 (30.743)	Top-5 acc 55.078 (54.075)	lr 0.00181
Warmup Train [36][400/3239]	Time 0.170 (0.278)	Data 0.002 (0.049)	Loss 4.0271 (4.0205)	Top-1 acc 32.031 (30.676)	Top-5 acc 54.297 (54.016)	lr 0.00181
Warmup Train [36][410/3239]	Time 0.256 (0.277)	Data 0.001 (0.048)	Loss 3.9242 (4.0206)	Top-1 acc 33.984 (30.694)	Top-5 acc 54.688 (54.037)	lr 0.00181
Warmup Train [36][420/3239]	Time 0.229 (0.275)	Data 0.002 (0.047)	Loss 3.8676 (4.0191)	Top-1 acc 37.500 (30.737)	Top-5 acc 58.984 (54.082)	lr 0.00181
Warmup Train [36][430/3239]	Time 0.259 (0.274)	Data 0.001 (0.046)	Loss 3.9989 (4.0185)	Top-1 acc 32.422 (30.753)	Top-5 acc 55.078 (54.101)	lr 0.00180
Warmup Train [36][440/3239]	Time 0.381 (0.274)	Data 0.002 (0.045)	Loss 4.0182 (4.0176)	Top-1 acc 29.688 (30.763)	Top-5 acc 52.734 (54.132)	lr 0.00180
Warmup Train [36][450/3239]	Time 0.323 (0.273)	Data 0.001 (0.044)	Loss 4.0979 (4.0175)	Top-1 acc 27.734 (30.743)	Top-5 acc 54.688 (54.141)	lr 0.00180
Warmup Train [36][460/3239]	Time 0.174 (0.272)	Data 0.001 (0.043)	Loss 3.9834 (4.0178)	Top-1 acc 34.766 (30.767)	Top-5 acc 54.688 (54.144)	lr 0.00180
Warmup Train [36][470/3239]	Time 0.235 (0.271)	Data 0.002 (0.042)	Loss 4.0184 (4.0178)	Top-1 acc 28.516 (30.742)	Top-5 acc 55.078 (54.138)	lr 0.00180
Warmup Train [36][480/3239]	Time 0.156 (0.269)	Data 0.001 (0.041)	Loss 3.9631 (4.0179)	Top-1 acc 26.562 (30.737)	Top-5 acc 57.422 (54.144)	lr 0.00179
Warmup Train [36][490/3239]	Time 0.259 (0.269)	Data 0.002 (0.041)	Loss 4.0652 (4.0172)	Top-1 acc 30.469 (30.755)	Top-5 acc 51.953 (54.165)	lr 0.00179
Warmup Train [36][500/3239]	Time 0.221 (0.268)	Data 0.001 (0.040)	Loss 4.0753 (4.0173)	Top-1 acc 28.516 (30.765)	Top-5 acc 50.000 (54.161)	lr 0.00179
Warmup Train [36][510/3239]	Time 0.239 (0.267)	Data 0.001 (0.039)	Loss 4.0057 (4.0171)	Top-1 acc 28.906 (30.767)	Top-5 acc 51.953 (54.155)	lr 0.00179
Warmup Train [36][520/3239]	Time 0.164 (0.266)	Data 0.001 (0.039)	Loss 4.0401 (4.0174)	Top-1 acc 32.422 (30.763)	Top-5 acc 51.562 (54.156)	lr 0.00178
Warmup Train [36][530/3239]	Time 0.139 (0.265)	Data 0.001 (0.038)	Loss 3.8944 (4.0165)	Top-1 acc 34.375 (30.784)	Top-5 acc 55.859 (54.168)	lr 0.00178
Warmup Train [36][540/3239]	Time 0.291 (0.265)	Data 0.001 (0.037)	Loss 4.1475 (4.0167)	Top-1 acc 31.641 (30.766)	Top-5 acc 52.344 (54.165)	lr 0.00178
Warmup Train [36][550/3239]	Time 0.233 (0.264)	Data 0.001 (0.037)	Loss 4.1410 (4.0167)	Top-1 acc 31.641 (30.754)	Top-5 acc 51.172 (54.183)	lr 0.00178
Warmup Train [36][560/3239]	Time 0.177 (0.263)	Data 0.002 (0.036)	Loss 3.9491 (4.0159)	Top-1 acc 30.469 (30.763)	Top-5 acc 54.688 (54.206)	lr 0.00178
Warmup Train [36][570/3239]	Time 0.220 (0.262)	Data 0.002 (0.036)	Loss 3.8977 (4.0151)	Top-1 acc 32.031 (30.779)	Top-5 acc 58.984 (54.217)	lr 0.00177
Warmup Train [36][580/3239]	Time 0.192 (0.262)	Data 0.001 (0.035)	Loss 4.0044 (4.0142)	Top-1 acc 29.297 (30.801)	Top-5 acc 54.297 (54.229)	lr 0.00177
Warmup Train [36][590/3239]	Time 0.188 (0.261)	Data 0.001 (0.034)	Loss 4.1522 (4.0138)	Top-1 acc 29.688 (30.801)	Top-5 acc 51.953 (54.247)	lr 0.00177
Warmup Train [36][600/3239]	Time 0.164 (0.260)	Data 0.001 (0.034)	Loss 4.0423 (4.0138)	Top-1 acc 28.516 (30.805)	Top-5 acc 54.297 (54.248)	lr 0.00177
Warmup Train [36][610/3239]	Time 0.263 (0.260)	Data 0.001 (0.033)	Loss 4.0290 (4.0134)	Top-1 acc 31.250 (30.806)	Top-5 acc 57.812 (54.251)	lr 0.00176
Warmup Train [36][620/3239]	Time 0.229 (0.259)	Data 0.001 (0.033)	Loss 3.8848 (4.0137)	Top-1 acc 34.766 (30.802)	Top-5 acc 58.203 (54.246)	lr 0.00176
Warmup Train [36][630/3239]	Time 0.370 (0.259)	Data 0.001 (0.032)	Loss 4.1388 (4.0144)	Top-1 acc 25.781 (30.782)	Top-5 acc 48.438 (54.219)	lr 0.00176
Warmup Train [36][640/3239]	Time 0.217 (0.258)	Data 0.001 (0.032)	Loss 4.0841 (4.0146)	Top-1 acc 28.516 (30.794)	Top-5 acc 55.078 (54.240)	lr 0.00176
Warmup Train [36][650/3239]	Time 0.237 (0.258)	Data 0.001 (0.031)	Loss 4.0430 (4.0146)	Top-1 acc 32.422 (30.798)	Top-5 acc 53.516 (54.242)	lr 0.00176
Warmup Train [36][660/3239]	Time 0.232 (0.257)	Data 0.001 (0.031)	Loss 4.0673 (4.0147)	Top-1 acc 27.734 (30.799)	Top-5 acc 51.172 (54.235)	lr 0.00175
Warmup Train [36][670/3239]	Time 0.185 (0.257)	Data 0.001 (0.031)	Loss 3.7987 (4.0155)	Top-1 acc 35.938 (30.780)	Top-5 acc 59.375 (54.222)	lr 0.00175
Warmup Train [36][680/3239]	Time 0.194 (0.256)	Data 0.002 (0.030)	Loss 4.2080 (4.0163)	Top-1 acc 25.000 (30.758)	Top-5 acc 49.219 (54.204)	lr 0.00175
Warmup Train [36][690/3239]	Time 0.255 (0.256)	Data 0.001 (0.030)	Loss 4.0281 (4.0158)	Top-1 acc 30.469 (30.769)	Top-5 acc 55.078 (54.226)	lr 0.00175
Warmup Train [36][700/3239]	Time 0.248 (0.255)	Data 0.001 (0.029)	Loss 3.9690 (4.0162)	Top-1 acc 28.125 (30.759)	Top-5 acc 53.516 (54.212)	lr 0.00174
Warmup Train [36][710/3239]	Time 0.155 (0.255)	Data 0.001 (0.029)	Loss 3.9871 (4.0154)	Top-1 acc 31.250 (30.793)	Top-5 acc 55.859 (54.241)	lr 0.00174
Warmup Train [36][720/3239]	Time 0.161 (0.255)	Data 0.002 (0.029)	Loss 4.1841 (4.0148)	Top-1 acc 30.469 (30.799)	Top-5 acc 51.562 (54.267)	lr 0.00174
Warmup Train [36][730/3239]	Time 0.409 (0.254)	Data 0.001 (0.028)	Loss 3.9247 (4.0146)	Top-1 acc 32.422 (30.806)	Top-5 acc 56.250 (54.270)	lr 0.00174
Warmup Train [36][740/3239]	Time 0.132 (0.254)	Data 0.001 (0.028)	Loss 3.9784 (4.0141)	Top-1 acc 31.250 (30.826)	Top-5 acc 57.031 (54.289)	lr 0.00174
Warmup Train [36][750/3239]	Time 0.171 (0.253)	Data 0.001 (0.028)	Loss 4.0191 (4.0148)	Top-1 acc 31.641 (30.814)	Top-5 acc 57.031 (54.291)	lr 0.00173
Warmup Train [36][760/3239]	Time 0.239 (0.253)	Data 0.001 (0.027)	Loss 4.0832 (4.0145)	Top-1 acc 29.688 (30.825)	Top-5 acc 52.344 (54.308)	lr 0.00173
Warmup Train [36][770/3239]	Time 0.232 (0.253)	Data 0.002 (0.027)	Loss 3.9892 (4.0140)	Top-1 acc 28.906 (30.831)	Top-5 acc 55.078 (54.326)	lr 0.00173
Warmup Train [36][780/3239]	Time 0.222 (0.252)	Data 0.002 (0.027)	Loss 4.2627 (4.0144)	Top-1 acc 26.953 (30.813)	Top-5 acc 51.562 (54.320)	lr 0.00173
Warmup Train [36][790/3239]	Time 0.209 (0.252)	Data 0.001 (0.026)	Loss 4.0632 (4.0139)	Top-1 acc 25.391 (30.808)	Top-5 acc 52.734 (54.319)	lr 0.00172
Warmup Train [36][800/3239]	Time 0.210 (0.252)	Data 0.002 (0.026)	Loss 3.9145 (4.0143)	Top-1 acc 33.984 (30.804)	Top-5 acc 57.422 (54.317)	lr 0.00172
Warmup Train [36][810/3239]	Time 0.223 (0.251)	Data 0.001 (0.026)	Loss 4.0941 (4.0147)	Top-1 acc 26.562 (30.783)	Top-5 acc 52.734 (54.308)	lr 0.00172
Warmup Train [36][820/3239]	Time 0.221 (0.251)	Data 0.001 (0.026)	Loss 4.0949 (4.0145)	Top-1 acc 28.125 (30.779)	Top-5 acc 54.297 (54.313)	lr 0.00172
Warmup Train [36][830/3239]	Time 0.307 (0.251)	Data 0.002 (0.025)	Loss 4.0674 (4.0145)	Top-1 acc 27.344 (30.774)	Top-5 acc 53.125 (54.320)	lr 0.00172
Warmup Train [36][840/3239]	Time 0.292 (0.251)	Data 0.001 (0.025)	Loss 4.0976 (4.0152)	Top-1 acc 33.594 (30.753)	Top-5 acc 51.953 (54.302)	lr 0.00171
Warmup Train [36][850/3239]	Time 0.168 (0.250)	Data 0.001 (0.025)	Loss 4.0099 (4.0151)	Top-1 acc 31.641 (30.761)	Top-5 acc 55.859 (54.290)	lr 0.00171
Warmup Train [36][860/3239]	Time 0.216 (0.250)	Data 0.002 (0.025)	Loss 3.9184 (4.0148)	Top-1 acc 32.031 (30.770)	Top-5 acc 57.031 (54.302)	lr 0.00171
Warmup Train [36][870/3239]	Time 0.224 (0.250)	Data 0.001 (0.024)	Loss 4.1096 (4.0148)	Top-1 acc 29.688 (30.772)	Top-5 acc 57.031 (54.307)	lr 0.00171
Warmup Train [36][880/3239]	Time 0.289 (0.249)	Data 0.001 (0.024)	Loss 4.0188 (4.0146)	Top-1 acc 31.641 (30.782)	Top-5 acc 55.078 (54.309)	lr 0.00170
Warmup Train [36][890/3239]	Time 0.222 (0.249)	Data 0.001 (0.024)	Loss 3.9999 (4.0146)	Top-1 acc 32.031 (30.778)	Top-5 acc 54.688 (54.304)	lr 0.00170
Warmup Train [36][900/3239]	Time 0.190 (0.249)	Data 0.001 (0.024)	Loss 4.1973 (4.0148)	Top-1 acc 28.906 (30.785)	Top-5 acc 51.172 (54.302)	lr 0.00170
Warmup Train [36][910/3239]	Time 0.162 (0.248)	Data 0.001 (0.023)	Loss 4.0916 (4.0151)	Top-1 acc 28.125 (30.777)	Top-5 acc 51.172 (54.296)	lr 0.00170
Warmup Train [36][920/3239]	Time 0.212 (0.248)	Data 0.001 (0.023)	Loss 3.9868 (4.0153)	Top-1 acc 30.078 (30.778)	Top-5 acc 54.297 (54.296)	lr 0.00170
Warmup Train [36][930/3239]	Time 0.170 (0.248)	Data 0.001 (0.023)	Loss 4.0620 (4.0146)	Top-1 acc 30.859 (30.784)	Top-5 acc 51.562 (54.300)	lr 0.00169
Warmup Train [36][940/3239]	Time 0.390 (0.248)	Data 0.001 (0.023)	Loss 3.9082 (4.0147)	Top-1 acc 30.859 (30.788)	Top-5 acc 56.250 (54.299)	lr 0.00169
Warmup Train [36][950/3239]	Time 0.195 (0.248)	Data 0.001 (0.023)	Loss 3.8505 (4.0144)	Top-1 acc 33.203 (30.794)	Top-5 acc 60.156 (54.315)	lr 0.00169
Warmup Train [36][960/3239]	Time 0.205 (0.247)	Data 0.001 (0.022)	Loss 4.0612 (4.0147)	Top-1 acc 28.125 (30.793)	Top-5 acc 54.688 (54.307)	lr 0.00169
Warmup Train [36][970/3239]	Time 0.222 (0.247)	Data 0.001 (0.022)	Loss 3.9956 (4.0145)	Top-1 acc 31.250 (30.794)	Top-5 acc 55.078 (54.314)	lr 0.00168
Warmup Train [36][980/3239]	Time 0.208 (0.247)	Data 0.001 (0.022)	Loss 3.8918 (4.0148)	Top-1 acc 35.156 (30.798)	Top-5 acc 56.641 (54.314)	lr 0.00168
Warmup Train [36][990/3239]	Time 0.172 (0.246)	Data 0.001 (0.022)	Loss 3.9363 (4.0144)	Top-1 acc 33.594 (30.803)	Top-5 acc 54.688 (54.320)	lr 0.00168
Warmup Train [36][1000/3239]	Time 0.197 (0.246)	Data 0.001 (0.022)	Loss 3.9531 (4.0143)	Top-1 acc 32.812 (30.808)	Top-5 acc 55.078 (54.320)	lr 0.00168
Warmup Train [36][1010/3239]	Time 0.203 (0.246)	Data 0.001 (0.021)	Loss 3.9827 (4.0143)	Top-1 acc 33.984 (30.812)	Top-5 acc 53.906 (54.325)	lr 0.00168
Warmup Train [36][1020/3239]	Time 0.240 (0.246)	Data 0.001 (0.021)	Loss 4.1009 (4.0141)	Top-1 acc 26.172 (30.805)	Top-5 acc 51.562 (54.323)	lr 0.00167
Warmup Train [36][1030/3239]	Time 0.223 (0.245)	Data 0.002 (0.021)	Loss 3.8917 (4.0138)	Top-1 acc 32.422 (30.810)	Top-5 acc 55.078 (54.330)	lr 0.00167
Warmup Train [36][1040/3239]	Time 0.369 (0.245)	Data 0.001 (0.021)	Loss 4.0020 (4.0131)	Top-1 acc 31.250 (30.827)	Top-5 acc 52.734 (54.355)	lr 0.00167
Warmup Train [36][1050/3239]	Time 0.214 (0.245)	Data 0.002 (0.021)	Loss 4.0899 (4.0131)	Top-1 acc 28.906 (30.820)	Top-5 acc 53.125 (54.346)	lr 0.00167
Warmup Train [36][1060/3239]	Time 0.213 (0.245)	Data 0.002 (0.021)	Loss 4.2206 (4.0133)	Top-1 acc 25.391 (30.813)	Top-5 acc 48.438 (54.338)	lr 0.00166
Warmup Train [36][1070/3239]	Time 0.214 (0.245)	Data 0.002 (0.020)	Loss 3.8469 (4.0138)	Top-1 acc 38.672 (30.805)	Top-5 acc 61.719 (54.333)	lr 0.00166
Warmup Train [36][1080/3239]	Time 0.237 (0.245)	Data 0.001 (0.020)	Loss 4.0454 (4.0140)	Top-1 acc 30.469 (30.799)	Top-5 acc 52.734 (54.333)	lr 0.00166
Warmup Train [36][1090/3239]	Time 0.180 (0.244)	Data 0.001 (0.020)	Loss 3.9642 (4.0135)	Top-1 acc 34.375 (30.813)	Top-5 acc 55.469 (54.335)	lr 0.00166
Warmup Train [36][1100/3239]	Time 0.248 (0.244)	Data 0.002 (0.020)	Loss 4.0307 (4.0127)	Top-1 acc 32.031 (30.821)	Top-5 acc 56.641 (54.354)	lr 0.00166
Warmup Train [36][1110/3239]	Time 0.187 (0.244)	Data 0.001 (0.020)	Loss 3.6984 (4.0124)	Top-1 acc 36.719 (30.821)	Top-5 acc 63.281 (54.365)	lr 0.00165
Warmup Train [36][1120/3239]	Time 0.172 (0.244)	Data 0.001 (0.020)	Loss 3.9329 (4.0123)	Top-1 acc 30.078 (30.821)	Top-5 acc 56.250 (54.372)	lr 0.00165
Warmup Train [36][1130/3239]	Time 0.204 (0.244)	Data 0.001 (0.019)	Loss 4.0735 (4.0125)	Top-1 acc 32.031 (30.816)	Top-5 acc 53.516 (54.366)	lr 0.00165
Warmup Train [36][1140/3239]	Time 0.315 (0.243)	Data 0.001 (0.019)	Loss 3.9043 (4.0126)	Top-1 acc 33.984 (30.815)	Top-5 acc 56.641 (54.365)	lr 0.00165
Warmup Train [36][1150/3239]	Time 0.213 (0.243)	Data 0.004 (0.019)	Loss 4.2837 (4.0131)	Top-1 acc 28.125 (30.817)	Top-5 acc 48.828 (54.355)	lr 0.00165
Warmup Train [36][1160/3239]	Time 0.206 (0.243)	Data 0.001 (0.019)	Loss 4.0886 (4.0130)	Top-1 acc 31.641 (30.831)	Top-5 acc 53.125 (54.362)	lr 0.00164
Warmup Train [36][1170/3239]	Time 0.177 (0.243)	Data 0.001 (0.019)	Loss 3.9994 (4.0125)	Top-1 acc 31.250 (30.836)	Top-5 acc 57.031 (54.378)	lr 0.00164
Warmup Train [36][1180/3239]	Time 0.179 (0.243)	Data 0.001 (0.019)	Loss 3.9010 (4.0129)	Top-1 acc 32.812 (30.826)	Top-5 acc 55.469 (54.374)	lr 0.00164
Warmup Train [36][1190/3239]	Time 0.281 (0.243)	Data 0.001 (0.019)	Loss 3.9016 (4.0130)	Top-1 acc 32.812 (30.831)	Top-5 acc 58.984 (54.370)	lr 0.00164
Warmup Train [36][1200/3239]	Time 0.157 (0.242)	Data 0.001 (0.018)	Loss 4.1076 (4.0133)	Top-1 acc 32.812 (30.826)	Top-5 acc 52.344 (54.359)	lr 0.00163
Warmup Train [36][1210/3239]	Time 0.211 (0.242)	Data 0.001 (0.018)	Loss 4.1758 (4.0135)	Top-1 acc 28.516 (30.838)	Top-5 acc 53.906 (54.358)	lr 0.00163
Warmup Train [36][1220/3239]	Time 0.272 (0.242)	Data 0.002 (0.018)	Loss 3.8988 (4.0132)	Top-1 acc 32.422 (30.846)	Top-5 acc 55.859 (54.365)	lr 0.00163
Warmup Train [36][1230/3239]	Time 0.167 (0.242)	Data 0.001 (0.018)	Loss 3.9841 (4.0127)	Top-1 acc 32.031 (30.855)	Top-5 acc 53.516 (54.382)	lr 0.00163
Warmup Train [36][1240/3239]	Time 0.145 (0.242)	Data 0.001 (0.018)	Loss 4.1222 (4.0128)	Top-1 acc 28.125 (30.848)	Top-5 acc 51.953 (54.378)	lr 0.00163
Warmup Train [36][1250/3239]	Time 0.289 (0.242)	Data 0.002 (0.018)	Loss 3.9877 (4.0123)	Top-1 acc 33.203 (30.856)	Top-5 acc 56.641 (54.394)	lr 0.00162
Warmup Train [36][1260/3239]	Time 0.169 (0.241)	Data 0.001 (0.018)	Loss 4.1451 (4.0126)	Top-1 acc 24.219 (30.849)	Top-5 acc 50.391 (54.388)	lr 0.00162
Warmup Train [36][1270/3239]	Time 0.249 (0.241)	Data 0.001 (0.018)	Loss 4.1147 (4.0129)	Top-1 acc 28.125 (30.829)	Top-5 acc 51.172 (54.383)	lr 0.00162
Warmup Train [36][1280/3239]	Time 0.198 (0.241)	Data 0.001 (0.018)	Loss 4.0521 (4.0135)	Top-1 acc 28.516 (30.825)	Top-5 acc 53.906 (54.373)	lr 0.00162
Warmup Train [36][1290/3239]	Time 0.204 (0.241)	Data 0.001 (0.017)	Loss 3.9924 (4.0134)	Top-1 acc 30.859 (30.825)	Top-5 acc 57.422 (54.386)	lr 0.00162
Warmup Train [36][1300/3239]	Time 0.221 (0.241)	Data 0.002 (0.017)	Loss 4.0962 (4.0136)	Top-1 acc 27.734 (30.817)	Top-5 acc 53.906 (54.382)	lr 0.00161
Warmup Train [36][1310/3239]	Time 0.242 (0.241)	Data 0.001 (0.017)	Loss 3.8586 (4.0134)	Top-1 acc 32.031 (30.816)	Top-5 acc 57.422 (54.381)	lr 0.00161
Warmup Train [36][1320/3239]	Time 0.209 (0.241)	Data 0.001 (0.017)	Loss 3.9583 (4.0135)	Top-1 acc 33.594 (30.809)	Top-5 acc 60.547 (54.385)	lr 0.00161
Warmup Train [36][1330/3239]	Time 0.216 (0.241)	Data 0.001 (0.017)	Loss 3.8929 (4.0134)	Top-1 acc 33.594 (30.806)	Top-5 acc 59.766 (54.388)	lr 0.00161
Warmup Train [36][1340/3239]	Time 0.290 (0.241)	Data 0.002 (0.017)	Loss 4.0152 (4.0131)	Top-1 acc 30.469 (30.818)	Top-5 acc 53.516 (54.393)	lr 0.00160
Warmup Train [36][1350/3239]	Time 0.308 (0.240)	Data 0.001 (0.017)	Loss 4.0831 (4.0131)	Top-1 acc 26.953 (30.824)	Top-5 acc 52.344 (54.392)	lr 0.00160
Warmup Train [36][1360/3239]	Time 0.206 (0.240)	Data 0.001 (0.017)	Loss 4.1868 (4.0135)	Top-1 acc 28.906 (30.815)	Top-5 acc 50.391 (54.384)	lr 0.00160
Warmup Train [36][1370/3239]	Time 0.197 (0.240)	Data 0.001 (0.017)	Loss 3.9670 (4.0134)	Top-1 acc 29.688 (30.823)	Top-5 acc 56.641 (54.388)	lr 0.00160
Warmup Train [36][1380/3239]	Time 0.167 (0.240)	Data 0.001 (0.016)	Loss 3.8444 (4.0131)	Top-1 acc 28.516 (30.827)	Top-5 acc 60.156 (54.392)	lr 0.00160
Warmup Train [36][1390/3239]	Time 0.224 (0.240)	Data 0.002 (0.016)	Loss 4.0980 (4.0131)	Top-1 acc 30.859 (30.827)	Top-5 acc 51.562 (54.394)	lr 0.00159
Warmup Train [36][1400/3239]	Time 0.243 (0.240)	Data 0.001 (0.016)	Loss 4.0695 (4.0131)	Top-1 acc 29.297 (30.823)	Top-5 acc 54.688 (54.391)	lr 0.00159
Warmup Train [36][1410/3239]	Time 0.170 (0.240)	Data 0.002 (0.016)	Loss 4.0327 (4.0131)	Top-1 acc 29.688 (30.828)	Top-5 acc 55.469 (54.387)	lr 0.00159
Warmup Train [36][1420/3239]	Time 0.242 (0.240)	Data 0.002 (0.016)	Loss 3.9211 (4.0128)	Top-1 acc 34.766 (30.829)	Top-5 acc 56.250 (54.400)	lr 0.00159
Warmup Train [36][1430/3239]	Time 0.195 (0.239)	Data 0.001 (0.016)	Loss 4.1024 (4.0130)	Top-1 acc 27.734 (30.827)	Top-5 acc 52.734 (54.400)	lr 0.00159
Warmup Train [36][1440/3239]	Time 0.184 (0.239)	Data 0.001 (0.016)	Loss 3.9574 (4.0126)	Top-1 acc 35.156 (30.838)	Top-5 acc 53.906 (54.404)	lr 0.00158
Warmup Train [36][1450/3239]	Time 0.219 (0.239)	Data 0.001 (0.016)	Loss 4.0520 (4.0130)	Top-1 acc 33.594 (30.837)	Top-5 acc 50.000 (54.389)	lr 0.00158
Warmup Train [36][1460/3239]	Time 0.334 (0.239)	Data 0.001 (0.016)	Loss 3.9063 (4.0130)	Top-1 acc 30.859 (30.831)	Top-5 acc 58.984 (54.385)	lr 0.00158
Warmup Train [36][1470/3239]	Time 0.225 (0.239)	Data 0.001 (0.016)	Loss 3.8415 (4.0131)	Top-1 acc 31.641 (30.824)	Top-5 acc 57.422 (54.379)	lr 0.00158
Warmup Train [36][1480/3239]	Time 0.224 (0.239)	Data 0.001 (0.016)	Loss 3.9125 (4.0133)	Top-1 acc 32.812 (30.822)	Top-5 acc 54.297 (54.374)	lr 0.00157
Warmup Train [36][1490/3239]	Time 0.233 (0.239)	Data 0.001 (0.016)	Loss 3.8555 (4.0135)	Top-1 acc 33.984 (30.823)	Top-5 acc 58.594 (54.363)	lr 0.00157
Warmup Train [36][1500/3239]	Time 0.223 (0.239)	Data 0.002 (0.015)	Loss 3.9715 (4.0139)	Top-1 acc 30.859 (30.813)	Top-5 acc 54.297 (54.356)	lr 0.00157
Warmup Train [36][1510/3239]	Time 0.181 (0.239)	Data 0.001 (0.015)	Loss 4.1227 (4.0141)	Top-1 acc 31.250 (30.810)	Top-5 acc 54.688 (54.346)	lr 0.00157
Warmup Train [36][1520/3239]	Time 0.213 (0.239)	Data 0.002 (0.015)	Loss 3.9912 (4.0143)	Top-1 acc 31.250 (30.810)	Top-5 acc 55.859 (54.344)	lr 0.00157
Warmup Train [36][1530/3239]	Time 0.158 (0.238)	Data 0.001 (0.015)	Loss 4.1981 (4.0143)	Top-1 acc 25.391 (30.811)	Top-5 acc 51.172 (54.337)	lr 0.00156
Warmup Train [36][1540/3239]	Time 0.245 (0.238)	Data 0.005 (0.015)	Loss 4.1299 (4.0145)	Top-1 acc 30.078 (30.810)	Top-5 acc 52.344 (54.335)	lr 0.00156
Warmup Train [36][1550/3239]	Time 0.192 (0.238)	Data 0.002 (0.015)	Loss 4.2413 (4.0147)	Top-1 acc 25.781 (30.807)	Top-5 acc 44.922 (54.333)	lr 0.00156
Warmup Train [36][1560/3239]	Time 0.298 (0.238)	Data 0.001 (0.015)	Loss 3.8976 (4.0146)	Top-1 acc 31.641 (30.814)	Top-5 acc 54.688 (54.336)	lr 0.00156
Warmup Train [36][1570/3239]	Time 0.224 (0.238)	Data 0.001 (0.015)	Loss 4.1572 (4.0146)	Top-1 acc 29.297 (30.821)	Top-5 acc 53.125 (54.337)	lr 0.00156
Warmup Train [36][1580/3239]	Time 0.196 (0.238)	Data 0.001 (0.015)	Loss 4.1647 (4.0147)	Top-1 acc 24.609 (30.819)	Top-5 acc 53.516 (54.339)	lr 0.00155
Warmup Train [36][1590/3239]	Time 0.191 (0.238)	Data 0.001 (0.015)	Loss 4.1132 (4.0146)	Top-1 acc 29.297 (30.828)	Top-5 acc 50.000 (54.342)	lr 0.00155
Warmup Train [36][1600/3239]	Time 0.190 (0.238)	Data 0.001 (0.015)	Loss 3.8857 (4.0142)	Top-1 acc 30.469 (30.824)	Top-5 acc 57.031 (54.345)	lr 0.00155
Warmup Train [36][1610/3239]	Time 0.198 (0.237)	Data 0.001 (0.015)	Loss 4.0804 (4.0140)	Top-1 acc 30.078 (30.830)	Top-5 acc 51.953 (54.349)	lr 0.00155
Warmup Train [36][1620/3239]	Time 0.219 (0.237)	Data 0.001 (0.015)	Loss 3.9059 (4.0139)	Top-1 acc 34.766 (30.829)	Top-5 acc 57.812 (54.344)	lr 0.00155
Warmup Train [36][1630/3239]	Time 0.220 (0.237)	Data 0.002 (0.014)	Loss 4.0943 (4.0142)	Top-1 acc 30.078 (30.821)	Top-5 acc 54.297 (54.341)	lr 0.00154
Warmup Train [36][1640/3239]	Time 0.252 (0.237)	Data 0.001 (0.014)	Loss 4.0375 (4.0141)	Top-1 acc 30.078 (30.826)	Top-5 acc 51.172 (54.343)	lr 0.00154
Warmup Train [36][1650/3239]	Time 0.152 (0.237)	Data 0.001 (0.014)	Loss 3.8947 (4.0140)	Top-1 acc 34.375 (30.832)	Top-5 acc 55.859 (54.343)	lr 0.00154
Warmup Train [36][1660/3239]	Time 0.332 (0.237)	Data 0.001 (0.014)	Loss 4.2078 (4.0142)	Top-1 acc 30.469 (30.833)	Top-5 acc 52.344 (54.340)	lr 0.00154
Warmup Train [36][1670/3239]	Time 0.244 (0.237)	Data 0.001 (0.014)	Loss 3.8913 (4.0138)	Top-1 acc 35.547 (30.843)	Top-5 acc 60.156 (54.347)	lr 0.00153
Warmup Train [36][1680/3239]	Time 0.233 (0.237)	Data 0.001 (0.014)	Loss 4.0335 (4.0138)	Top-1 acc 31.250 (30.843)	Top-5 acc 56.250 (54.349)	lr 0.00153
Warmup Train [36][1690/3239]	Time 0.252 (0.237)	Data 0.001 (0.014)	Loss 3.9618 (4.0138)	Top-1 acc 30.859 (30.841)	Top-5 acc 56.250 (54.352)	lr 0.00153
Warmup Train [36][1700/3239]	Time 0.210 (0.237)	Data 0.001 (0.014)	Loss 4.0716 (4.0139)	Top-1 acc 28.906 (30.833)	Top-5 acc 52.344 (54.346)	lr 0.00153
Warmup Train [36][1710/3239]	Time 0.256 (0.237)	Data 0.002 (0.014)	Loss 3.8140 (4.0138)	Top-1 acc 37.891 (30.838)	Top-5 acc 58.984 (54.350)	lr 0.00153
Warmup Train [36][1720/3239]	Time 0.216 (0.237)	Data 0.001 (0.014)	Loss 3.9071 (4.0136)	Top-1 acc 33.203 (30.841)	Top-5 acc 56.250 (54.349)	lr 0.00152
Warmup Train [36][1730/3239]	Time 0.329 (0.237)	Data 0.002 (0.014)	Loss 3.8554 (4.0136)	Top-1 acc 31.250 (30.839)	Top-5 acc 60.547 (54.352)	lr 0.00152
Warmup Train [36][1740/3239]	Time 0.187 (0.237)	Data 0.002 (0.014)	Loss 3.9409 (4.0135)	Top-1 acc 34.375 (30.836)	Top-5 acc 56.641 (54.355)	lr 0.00152
Warmup Train [36][1750/3239]	Time 0.146 (0.237)	Data 0.001 (0.014)	Loss 3.9484 (4.0130)	Top-1 acc 31.641 (30.848)	Top-5 acc 56.641 (54.371)	lr 0.00152
Warmup Train [36][1760/3239]	Time 0.301 (0.237)	Data 0.001 (0.014)	Loss 3.9976 (4.0132)	Top-1 acc 31.250 (30.851)	Top-5 acc 53.516 (54.361)	lr 0.00152
Warmup Train [36][1770/3239]	Time 0.255 (0.237)	Data 0.001 (0.014)	Loss 3.8266 (4.0128)	Top-1 acc 35.156 (30.863)	Top-5 acc 61.328 (54.377)	lr 0.00151
Warmup Train [36][1780/3239]	Time 0.239 (0.237)	Data 0.001 (0.014)	Loss 4.1034 (4.0129)	Top-1 acc 34.375 (30.870)	Top-5 acc 49.609 (54.375)	lr 0.00151
Warmup Train [36][1790/3239]	Time 0.222 (0.237)	Data 0.001 (0.013)	Loss 3.9323 (4.0125)	Top-1 acc 35.547 (30.880)	Top-5 acc 55.078 (54.385)	lr 0.00151
Warmup Train [36][1800/3239]	Time 0.231 (0.237)	Data 0.001 (0.013)	Loss 4.0276 (4.0124)	Top-1 acc 32.812 (30.884)	Top-5 acc 52.344 (54.386)	lr 0.00151
Warmup Train [36][1810/3239]	Time 0.238 (0.237)	Data 0.001 (0.013)	Loss 3.9461 (4.0121)	Top-1 acc 31.641 (30.893)	Top-5 acc 57.031 (54.398)	lr 0.00151
Warmup Train [36][1820/3239]	Time 0.141 (0.236)	Data 0.001 (0.013)	Loss 4.3029 (4.0122)	Top-1 acc 25.391 (30.896)	Top-5 acc 46.484 (54.395)	lr 0.00150
Warmup Train [36][1830/3239]	Time 0.171 (0.236)	Data 0.001 (0.013)	Loss 3.9970 (4.0121)	Top-1 acc 29.688 (30.903)	Top-5 acc 52.734 (54.397)	lr 0.00150
Warmup Train [36][1840/3239]	Time 0.194 (0.236)	Data 0.001 (0.013)	Loss 3.9294 (4.0126)	Top-1 acc 32.812 (30.895)	Top-5 acc 54.297 (54.386)	lr 0.00150
Warmup Train [36][1850/3239]	Time 0.130 (0.236)	Data 0.001 (0.013)	Loss 3.9313 (4.0123)	Top-1 acc 37.109 (30.907)	Top-5 acc 58.984 (54.392)	lr 0.00150
Warmup Train [36][1860/3239]	Time 0.395 (0.236)	Data 0.002 (0.013)	Loss 4.1071 (4.0122)	Top-1 acc 26.953 (30.907)	Top-5 acc 53.906 (54.400)	lr 0.00150
Warmup Train [36][1870/3239]	Time 0.449 (0.236)	Data 0.002 (0.013)	Loss 3.6707 (4.0119)	Top-1 acc 35.547 (30.916)	Top-5 acc 61.328 (54.408)	lr 0.00149
Warmup Train [36][1880/3239]	Time 0.327 (0.236)	Data 0.002 (0.013)	Loss 4.0173 (4.0122)	Top-1 acc 33.203 (30.912)	Top-5 acc 55.078 (54.403)	lr 0.00149
Warmup Train [36][1890/3239]	Time 0.324 (0.237)	Data 0.002 (0.013)	Loss 3.9226 (4.0120)	Top-1 acc 34.375 (30.915)	Top-5 acc 57.031 (54.406)	lr 0.00149
Warmup Train [36][1900/3239]	Time 0.225 (0.237)	Data 0.001 (0.013)	Loss 4.1014 (4.0118)	Top-1 acc 27.344 (30.919)	Top-5 acc 52.344 (54.407)	lr 0.00149
Warmup Train [36][1910/3239]	Time 0.308 (0.237)	Data 0.001 (0.013)	Loss 3.9086 (4.0119)	Top-1 acc 36.719 (30.918)	Top-5 acc 60.547 (54.406)	lr 0.00148
Warmup Train [36][1920/3239]	Time 0.284 (0.237)	Data 0.002 (0.013)	Loss 3.8775 (4.0117)	Top-1 acc 29.297 (30.918)	Top-5 acc 60.938 (54.410)	lr 0.00148
Warmup Train [36][1930/3239]	Time 0.265 (0.237)	Data 0.027 (0.013)	Loss 4.0797 (4.0113)	Top-1 acc 32.031 (30.926)	Top-5 acc 54.297 (54.419)	lr 0.00148
Warmup Train [36][1940/3239]	Time 0.214 (0.237)	Data 0.001 (0.013)	Loss 4.0647 (4.0113)	Top-1 acc 32.422 (30.925)	Top-5 acc 54.297 (54.414)	lr 0.00148
Warmup Train [36][1950/3239]	Time 0.291 (0.237)	Data 0.002 (0.013)	Loss 4.1348 (4.0114)	Top-1 acc 30.078 (30.915)	Top-5 acc 51.953 (54.411)	lr 0.00148
Warmup Train [36][1960/3239]	Time 0.224 (0.238)	Data 0.001 (0.013)	Loss 3.9849 (4.0112)	Top-1 acc 32.812 (30.928)	Top-5 acc 52.344 (54.421)	lr 0.00147
Warmup Train [36][1970/3239]	Time 0.431 (0.238)	Data 0.001 (0.012)	Loss 4.2519 (4.0110)	Top-1 acc 23.438 (30.933)	Top-5 acc 50.391 (54.427)	lr 0.00147
Warmup Train [36][1980/3239]	Time 0.161 (0.238)	Data 0.001 (0.012)	Loss 3.8616 (4.0109)	Top-1 acc 34.375 (30.934)	Top-5 acc 60.156 (54.430)	lr 0.00147
Warmup Train [36][1990/3239]	Time 0.307 (0.238)	Data 0.001 (0.012)	Loss 3.9962 (4.0108)	Top-1 acc 29.297 (30.933)	Top-5 acc 55.469 (54.431)	lr 0.00147
Warmup Train [36][2000/3239]	Time 0.225 (0.238)	Data 0.001 (0.012)	Loss 4.2422 (4.0111)	Top-1 acc 26.562 (30.926)	Top-5 acc 46.484 (54.425)	lr 0.00147
Warmup Train [36][2010/3239]	Time 0.263 (0.238)	Data 0.001 (0.012)	Loss 3.9174 (4.0109)	Top-1 acc 32.812 (30.929)	Top-5 acc 55.859 (54.425)	lr 0.00146
Warmup Train [36][2020/3239]	Time 0.204 (0.238)	Data 0.001 (0.012)	Loss 3.9757 (4.0110)	Top-1 acc 32.422 (30.928)	Top-5 acc 55.469 (54.423)	lr 0.00146
Warmup Train [36][2030/3239]	Time 0.251 (0.238)	Data 0.001 (0.012)	Loss 4.0696 (4.0113)	Top-1 acc 28.516 (30.916)	Top-5 acc 54.297 (54.416)	lr 0.00146
Warmup Train [36][2040/3239]	Time 0.232 (0.238)	Data 0.001 (0.012)	Loss 4.0491 (4.0114)	Top-1 acc 30.859 (30.916)	Top-5 acc 53.125 (54.413)	lr 0.00146
Warmup Train [36][2050/3239]	Time 0.170 (0.238)	Data 0.001 (0.012)	Loss 4.1894 (4.0117)	Top-1 acc 26.562 (30.911)	Top-5 acc 49.609 (54.405)	lr 0.00146
Warmup Train [36][2060/3239]	Time 0.147 (0.238)	Data 0.001 (0.012)	Loss 4.0643 (4.0116)	Top-1 acc 28.125 (30.910)	Top-5 acc 50.000 (54.405)	lr 0.00145
Warmup Train [36][2070/3239]	Time 0.279 (0.238)	Data 0.001 (0.012)	Loss 4.0885 (4.0117)	Top-1 acc 28.125 (30.908)	Top-5 acc 54.297 (54.400)	lr 0.00145
Warmup Train [36][2080/3239]	Time 0.195 (0.238)	Data 0.001 (0.012)	Loss 4.0764 (4.0119)	Top-1 acc 31.250 (30.906)	Top-5 acc 56.641 (54.397)	lr 0.00145
Warmup Train [36][2090/3239]	Time 0.272 (0.238)	Data 0.001 (0.012)	Loss 3.9371 (4.0116)	Top-1 acc 35.547 (30.912)	Top-5 acc 57.812 (54.403)	lr 0.00145
Warmup Train [36][2100/3239]	Time 0.193 (0.238)	Data 0.001 (0.012)	Loss 4.0764 (4.0115)	Top-1 acc 33.594 (30.921)	Top-5 acc 56.641 (54.412)	lr 0.00145
Warmup Train [36][2110/3239]	Time 0.225 (0.238)	Data 0.003 (0.012)	Loss 4.0786 (4.0114)	Top-1 acc 30.078 (30.922)	Top-5 acc 51.953 (54.413)	lr 0.00144
Warmup Train [36][2120/3239]	Time 0.286 (0.238)	Data 0.001 (0.012)	Loss 3.7377 (4.0113)	Top-1 acc 30.469 (30.919)	Top-5 acc 59.375 (54.414)	lr 0.00144
Warmup Train [36][2130/3239]	Time 0.185 (0.238)	Data 0.002 (0.012)	Loss 3.9428 (4.0112)	Top-1 acc 33.203 (30.919)	Top-5 acc 57.031 (54.416)	lr 0.00144
Warmup Train [36][2140/3239]	Time 0.196 (0.238)	Data 0.001 (0.012)	Loss 4.1942 (4.0111)	Top-1 acc 29.297 (30.928)	Top-5 acc 50.000 (54.419)	lr 0.00144
Warmup Train [36][2150/3239]	Time 0.289 (0.238)	Data 0.001 (0.012)	Loss 3.9985 (4.0113)	Top-1 acc 32.422 (30.923)	Top-5 acc 51.953 (54.412)	lr 0.00144
Warmup Train [36][2160/3239]	Time 0.289 (0.237)	Data 0.001 (0.012)	Loss 4.0107 (4.0113)	Top-1 acc 29.688 (30.917)	Top-5 acc 57.812 (54.414)	lr 0.00143
Warmup Train [36][2170/3239]	Time 0.309 (0.238)	Data 0.002 (0.012)	Loss 4.0525 (4.0112)	Top-1 acc 32.812 (30.922)	Top-5 acc 52.344 (54.416)	lr 0.00143
Warmup Train [36][2180/3239]	Time 0.259 (0.237)	Data 0.001 (0.012)	Loss 3.9518 (4.0117)	Top-1 acc 31.641 (30.907)	Top-5 acc 55.859 (54.405)	lr 0.00143
Warmup Train [36][2190/3239]	Time 0.238 (0.237)	Data 0.001 (0.012)	Loss 3.8259 (4.0115)	Top-1 acc 33.203 (30.906)	Top-5 acc 58.594 (54.405)	lr 0.00143
Warmup Train [36][2200/3239]	Time 0.201 (0.237)	Data 0.002 (0.012)	Loss 3.8875 (4.0112)	Top-1 acc 30.469 (30.912)	Top-5 acc 54.297 (54.405)	lr 0.00143
Warmup Train [36][2210/3239]	Time 0.286 (0.237)	Data 0.002 (0.011)	Loss 4.1275 (4.0113)	Top-1 acc 30.859 (30.911)	Top-5 acc 51.172 (54.404)	lr 0.00142
Warmup Train [36][2220/3239]	Time 0.205 (0.237)	Data 0.001 (0.011)	Loss 3.8158 (4.0110)	Top-1 acc 37.891 (30.921)	Top-5 acc 61.719 (54.411)	lr 0.00142
Warmup Train [36][2230/3239]	Time 0.286 (0.237)	Data 0.002 (0.011)	Loss 4.0079 (4.0108)	Top-1 acc 34.375 (30.926)	Top-5 acc 55.859 (54.414)	lr 0.00142
Warmup Train [36][2240/3239]	Time 0.194 (0.237)	Data 0.001 (0.011)	Loss 3.9327 (4.0107)	Top-1 acc 33.203 (30.925)	Top-5 acc 57.422 (54.416)	lr 0.00142
Warmup Train [36][2250/3239]	Time 0.232 (0.237)	Data 0.001 (0.011)	Loss 3.7849 (4.0106)	Top-1 acc 35.547 (30.927)	Top-5 acc 58.203 (54.422)	lr 0.00142
Warmup Train [36][2260/3239]	Time 0.206 (0.237)	Data 0.001 (0.011)	Loss 3.9378 (4.0107)	Top-1 acc 32.812 (30.927)	Top-5 acc 56.250 (54.416)	lr 0.00141
Warmup Train [36][2270/3239]	Time 0.302 (0.237)	Data 0.001 (0.011)	Loss 4.1160 (4.0107)	Top-1 acc 30.078 (30.927)	Top-5 acc 49.219 (54.410)	lr 0.00141
Warmup Train [36][2280/3239]	Time 0.238 (0.237)	Data 0.001 (0.011)	Loss 4.0279 (4.0105)	Top-1 acc 32.812 (30.935)	Top-5 acc 52.344 (54.415)	lr 0.00141
Warmup Train [36][2290/3239]	Time 0.231 (0.237)	Data 0.002 (0.011)	Loss 3.8623 (4.0105)	Top-1 acc 34.375 (30.936)	Top-5 acc 55.859 (54.416)	lr 0.00141
Warmup Train [36][2300/3239]	Time 0.251 (0.237)	Data 0.001 (0.011)	Loss 4.3041 (4.0107)	Top-1 acc 23.438 (30.932)	Top-5 acc 48.047 (54.412)	lr 0.00141
Warmup Train [36][2310/3239]	Time 0.284 (0.237)	Data 0.002 (0.011)	Loss 3.9790 (4.0108)	Top-1 acc 34.766 (30.935)	Top-5 acc 57.812 (54.410)	lr 0.00140
Warmup Train [36][2320/3239]	Time 0.157 (0.237)	Data 0.002 (0.011)	Loss 4.1252 (4.0108)	Top-1 acc 26.953 (30.934)	Top-5 acc 49.609 (54.408)	lr 0.00140
Warmup Train [36][2330/3239]	Time 0.274 (0.237)	Data 0.001 (0.011)	Loss 4.2100 (4.0112)	Top-1 acc 26.562 (30.925)	Top-5 acc 49.609 (54.399)	lr 0.00140
Warmup Train [36][2340/3239]	Time 0.193 (0.237)	Data 0.001 (0.011)	Loss 4.4146 (4.0113)	Top-1 acc 22.656 (30.919)	Top-5 acc 46.484 (54.398)	lr 0.00140
Warmup Train [36][2350/3239]	Time 0.206 (0.237)	Data 0.001 (0.011)	Loss 3.8790 (4.0111)	Top-1 acc 33.984 (30.922)	Top-5 acc 58.203 (54.401)	lr 0.00140
Warmup Train [36][2360/3239]	Time 0.308 (0.237)	Data 0.001 (0.011)	Loss 4.0038 (4.0109)	Top-1 acc 30.859 (30.927)	Top-5 acc 54.297 (54.404)	lr 0.00139
Warmup Train [36][2370/3239]	Time 0.164 (0.237)	Data 0.001 (0.011)	Loss 4.0891 (4.0110)	Top-1 acc 27.734 (30.929)	Top-5 acc 48.828 (54.401)	lr 0.00139
Warmup Train [36][2380/3239]	Time 0.205 (0.237)	Data 0.003 (0.011)	Loss 3.9914 (4.0111)	Top-1 acc 33.984 (30.927)	Top-5 acc 55.469 (54.396)	lr 0.00139
Warmup Train [36][2390/3239]	Time 0.259 (0.237)	Data 0.001 (0.011)	Loss 4.2543 (4.0114)	Top-1 acc 27.734 (30.925)	Top-5 acc 49.219 (54.390)	lr 0.00139
Warmup Train [36][2400/3239]	Time 0.137 (0.237)	Data 0.002 (0.011)	Loss 4.1136 (4.0113)	Top-1 acc 31.250 (30.925)	Top-5 acc 53.516 (54.393)	lr 0.00139
Warmup Train [36][2410/3239]	Time 0.257 (0.237)	Data 0.001 (0.011)	Loss 3.9592 (4.0114)	Top-1 acc 31.250 (30.925)	Top-5 acc 53.516 (54.390)	lr 0.00138
Warmup Train [36][2420/3239]	Time 0.132 (0.236)	Data 0.002 (0.011)	Loss 4.0461 (4.0115)	Top-1 acc 31.641 (30.924)	Top-5 acc 52.344 (54.387)	lr 0.00138
Warmup Train [36][2430/3239]	Time 0.131 (0.236)	Data 0.001 (0.011)	Loss 4.2133 (4.0114)	Top-1 acc 26.172 (30.927)	Top-5 acc 49.219 (54.390)	lr 0.00138
Warmup Train [36][2440/3239]	Time 0.205 (0.236)	Data 0.001 (0.011)	Loss 4.0091 (4.0115)	Top-1 acc 32.422 (30.930)	Top-5 acc 54.297 (54.384)	lr 0.00138
Warmup Train [36][2450/3239]	Time 0.165 (0.236)	Data 0.001 (0.011)	Loss 3.9442 (4.0113)	Top-1 acc 29.297 (30.932)	Top-5 acc 54.688 (54.389)	lr 0.00138
Warmup Train [36][2460/3239]	Time 0.188 (0.236)	Data 0.001 (0.011)	Loss 4.1117 (4.0112)	Top-1 acc 27.734 (30.934)	Top-5 acc 53.906 (54.391)	lr 0.00137
Warmup Train [36][2470/3239]	Time 0.194 (0.236)	Data 0.001 (0.011)	Loss 4.0178 (4.0112)	Top-1 acc 33.984 (30.928)	Top-5 acc 55.469 (54.393)	lr 0.00137
Warmup Train [36][2480/3239]	Time 0.224 (0.236)	Data 0.001 (0.011)	Loss 3.8245 (4.0113)	Top-1 acc 32.031 (30.925)	Top-5 acc 57.422 (54.389)	lr 0.00137
Warmup Train [36][2490/3239]	Time 0.329 (0.236)	Data 0.003 (0.011)	Loss 4.4893 (4.0114)	Top-1 acc 26.172 (30.928)	Top-5 acc 44.922 (54.387)	lr 0.00137
Warmup Train [36][2500/3239]	Time 0.220 (0.236)	Data 0.001 (0.011)	Loss 4.0099 (4.0115)	Top-1 acc 32.812 (30.927)	Top-5 acc 52.344 (54.388)	lr 0.00137
Warmup Train [36][2510/3239]	Time 0.212 (0.236)	Data 0.002 (0.010)	Loss 4.2550 (4.0118)	Top-1 acc 24.609 (30.923)	Top-5 acc 45.312 (54.380)	lr 0.00136
Warmup Train [36][2520/3239]	Time 0.220 (0.236)	Data 0.001 (0.010)	Loss 3.9383 (4.0117)	Top-1 acc 33.984 (30.925)	Top-5 acc 55.859 (54.384)	lr 0.00136
Warmup Train [36][2530/3239]	Time 0.233 (0.236)	Data 0.002 (0.010)	Loss 3.9992 (4.0116)	Top-1 acc 33.594 (30.927)	Top-5 acc 55.859 (54.385)	lr 0.00136
Warmup Train [36][2540/3239]	Time 0.209 (0.236)	Data 0.001 (0.010)	Loss 3.9999 (4.0120)	Top-1 acc 29.688 (30.918)	Top-5 acc 51.172 (54.375)	lr 0.00136
Warmup Train [36][2550/3239]	Time 0.235 (0.236)	Data 0.001 (0.010)	Loss 4.0504 (4.0120)	Top-1 acc 28.906 (30.916)	Top-5 acc 55.469 (54.375)	lr 0.00136
Warmup Train [36][2560/3239]	Time 0.177 (0.235)	Data 0.001 (0.010)	Loss 3.8938 (4.0119)	Top-1 acc 34.766 (30.921)	Top-5 acc 55.078 (54.377)	lr 0.00135
Warmup Train [36][2570/3239]	Time 0.233 (0.236)	Data 0.001 (0.010)	Loss 3.9428 (4.0118)	Top-1 acc 29.688 (30.919)	Top-5 acc 58.203 (54.377)	lr 0.00135
Warmup Train [36][2580/3239]	Time 0.274 (0.235)	Data 0.001 (0.010)	Loss 3.9834 (4.0124)	Top-1 acc 30.469 (30.908)	Top-5 acc 55.859 (54.365)	lr 0.00135
Warmup Train [36][2590/3239]	Time 0.353 (0.235)	Data 0.002 (0.010)	Loss 3.8781 (4.0124)	Top-1 acc 32.031 (30.905)	Top-5 acc 55.469 (54.365)	lr 0.00135
Warmup Train [36][2600/3239]	Time 0.166 (0.235)	Data 0.001 (0.010)	Loss 4.0098 (4.0125)	Top-1 acc 29.688 (30.904)	Top-5 acc 53.125 (54.364)	lr 0.00135
Warmup Train [36][2610/3239]	Time 0.192 (0.235)	Data 0.001 (0.010)	Loss 4.1066 (4.0124)	Top-1 acc 27.344 (30.904)	Top-5 acc 51.953 (54.364)	lr 0.00134
Warmup Train [36][2620/3239]	Time 0.205 (0.235)	Data 0.001 (0.010)	Loss 4.0822 (4.0125)	Top-1 acc 25.781 (30.900)	Top-5 acc 50.000 (54.361)	lr 0.00134
Warmup Train [36][2630/3239]	Time 0.186 (0.235)	Data 0.002 (0.010)	Loss 4.1217 (4.0125)	Top-1 acc 29.297 (30.897)	Top-5 acc 52.734 (54.358)	lr 0.00134
Warmup Train [36][2640/3239]	Time 0.224 (0.235)	Data 0.001 (0.010)	Loss 4.0481 (4.0128)	Top-1 acc 31.641 (30.893)	Top-5 acc 52.344 (54.353)	lr 0.00134
Warmup Train [36][2650/3239]	Time 0.238 (0.235)	Data 0.002 (0.010)	Loss 4.2041 (4.0128)	Top-1 acc 24.219 (30.893)	Top-5 acc 48.828 (54.349)	lr 0.00134
Warmup Train [36][2660/3239]	Time 0.169 (0.235)	Data 0.001 (0.010)	Loss 4.2042 (4.0129)	Top-1 acc 25.781 (30.890)	Top-5 acc 49.609 (54.351)	lr 0.00133
Warmup Train [36][2670/3239]	Time 0.206 (0.235)	Data 0.001 (0.010)	Loss 4.0093 (4.0129)	Top-1 acc 28.516 (30.892)	Top-5 acc 54.297 (54.348)	lr 0.00133
Warmup Train [36][2680/3239]	Time 0.233 (0.235)	Data 0.001 (0.010)	Loss 4.0961 (4.0131)	Top-1 acc 25.781 (30.887)	Top-5 acc 55.078 (54.347)	lr 0.00133
Warmup Train [36][2690/3239]	Time 0.229 (0.235)	Data 0.003 (0.010)	Loss 4.1202 (4.0130)	Top-1 acc 30.078 (30.893)	Top-5 acc 49.219 (54.351)	lr 0.00133
Warmup Train [36][2700/3239]	Time 0.186 (0.235)	Data 0.001 (0.010)	Loss 3.8902 (4.0130)	Top-1 acc 33.203 (30.892)	Top-5 acc 57.031 (54.346)	lr 0.00133
Warmup Train [36][2710/3239]	Time 0.199 (0.235)	Data 0.001 (0.010)	Loss 4.1004 (4.0131)	Top-1 acc 30.469 (30.892)	Top-5 acc 52.734 (54.345)	lr 0.00132
Warmup Train [36][2720/3239]	Time 0.241 (0.235)	Data 0.002 (0.010)	Loss 3.9843 (4.0133)	Top-1 acc 30.078 (30.887)	Top-5 acc 56.250 (54.339)	lr 0.00132
Warmup Train [36][2730/3239]	Time 0.221 (0.235)	Data 0.001 (0.010)	Loss 4.0943 (4.0133)	Top-1 acc 31.250 (30.887)	Top-5 acc 50.781 (54.337)	lr 0.00132
Warmup Train [36][2740/3239]	Time 0.189 (0.235)	Data 0.001 (0.010)	Loss 4.0183 (4.0131)	Top-1 acc 32.031 (30.890)	Top-5 acc 52.344 (54.341)	lr 0.00132
Warmup Train [36][2750/3239]	Time 0.228 (0.235)	Data 0.001 (0.010)	Loss 3.9825 (4.0134)	Top-1 acc 29.297 (30.886)	Top-5 acc 57.031 (54.338)	lr 0.00132
Warmup Train [36][2760/3239]	Time 0.257 (0.235)	Data 0.001 (0.010)	Loss 3.9383 (4.0133)	Top-1 acc 33.203 (30.889)	Top-5 acc 56.250 (54.337)	lr 0.00132
Warmup Train [36][2770/3239]	Time 0.322 (0.235)	Data 0.001 (0.010)	Loss 3.6450 (4.0129)	Top-1 acc 39.453 (30.896)	Top-5 acc 63.281 (54.346)	lr 0.00131
Warmup Train [36][2780/3239]	Time 0.239 (0.235)	Data 0.001 (0.010)	Loss 4.1006 (4.0128)	Top-1 acc 28.516 (30.896)	Top-5 acc 51.562 (54.346)	lr 0.00131
Warmup Train [36][2790/3239]	Time 0.267 (0.235)	Data 0.001 (0.010)	Loss 3.9905 (4.0127)	Top-1 acc 32.031 (30.902)	Top-5 acc 54.688 (54.350)	lr 0.00131
Warmup Train [36][2800/3239]	Time 0.205 (0.235)	Data 0.001 (0.010)	Loss 3.7851 (4.0124)	Top-1 acc 38.281 (30.908)	Top-5 acc 62.109 (54.354)	lr 0.00131
Warmup Train [36][2810/3239]	Time 0.202 (0.235)	Data 0.001 (0.010)	Loss 4.1019 (4.0124)	Top-1 acc 25.000 (30.909)	Top-5 acc 50.781 (54.356)	lr 0.00131
Warmup Train [36][2820/3239]	Time 0.186 (0.235)	Data 0.001 (0.010)	Loss 3.9643 (4.0125)	Top-1 acc 33.203 (30.904)	Top-5 acc 53.516 (54.354)	lr 0.00130
Warmup Train [36][2830/3239]	Time 0.222 (0.235)	Data 0.001 (0.010)	Loss 4.1411 (4.0125)	Top-1 acc 26.172 (30.907)	Top-5 acc 51.953 (54.354)	lr 0.00130
Warmup Train [36][2840/3239]	Time 0.243 (0.235)	Data 0.001 (0.010)	Loss 3.8713 (4.0124)	Top-1 acc 33.203 (30.909)	Top-5 acc 58.984 (54.354)	lr 0.00130
Warmup Train [36][2850/3239]	Time 0.160 (0.235)	Data 0.002 (0.010)	Loss 3.8841 (4.0125)	Top-1 acc 33.594 (30.907)	Top-5 acc 58.203 (54.355)	lr 0.00130
Warmup Train [36][2860/3239]	Time 0.222 (0.235)	Data 0.002 (0.010)	Loss 4.0530 (4.0125)	Top-1 acc 28.516 (30.907)	Top-5 acc 53.516 (54.351)	lr 0.00130
Warmup Train [36][2870/3239]	Time 0.323 (0.235)	Data 0.001 (0.010)	Loss 3.9987 (4.0124)	Top-1 acc 33.594 (30.907)	Top-5 acc 56.250 (54.353)	lr 0.00129
Warmup Train [36][2880/3239]	Time 0.177 (0.235)	Data 0.001 (0.009)	Loss 4.0496 (4.0122)	Top-1 acc 29.688 (30.910)	Top-5 acc 54.297 (54.356)	lr 0.00129
Warmup Train [36][2890/3239]	Time 0.236 (0.235)	Data 0.001 (0.009)	Loss 4.1278 (4.0120)	Top-1 acc 30.859 (30.915)	Top-5 acc 50.000 (54.358)	lr 0.00129
Warmup Train [36][2900/3239]	Time 0.200 (0.234)	Data 0.001 (0.009)	Loss 4.0033 (4.0120)	Top-1 acc 32.422 (30.914)	Top-5 acc 54.688 (54.358)	lr 0.00129
Warmup Train [36][2910/3239]	Time 0.176 (0.234)	Data 0.002 (0.009)	Loss 3.9419 (4.0122)	Top-1 acc 25.781 (30.911)	Top-5 acc 60.156 (54.354)	lr 0.00129
Warmup Train [36][2920/3239]	Time 0.218 (0.234)	Data 0.002 (0.009)	Loss 4.1428 (4.0122)	Top-1 acc 28.516 (30.914)	Top-5 acc 47.266 (54.350)	lr 0.00128
Warmup Train [36][2930/3239]	Time 0.243 (0.234)	Data 0.001 (0.009)	Loss 3.9152 (4.0122)	Top-1 acc 35.938 (30.914)	Top-5 acc 56.641 (54.350)	lr 0.00128
Warmup Train [36][2940/3239]	Time 0.243 (0.234)	Data 0.030 (0.009)	Loss 3.8247 (4.0121)	Top-1 acc 30.469 (30.914)	Top-5 acc 58.984 (54.352)	lr 0.00128
Warmup Train [36][2950/3239]	Time 0.228 (0.234)	Data 0.002 (0.009)	Loss 3.9573 (4.0121)	Top-1 acc 31.250 (30.916)	Top-5 acc 55.469 (54.353)	lr 0.00128
Warmup Train [36][2960/3239]	Time 0.193 (0.234)	Data 0.001 (0.009)	Loss 4.0009 (4.0123)	Top-1 acc 29.297 (30.914)	Top-5 acc 52.344 (54.349)	lr 0.00128
Warmup Train [36][2970/3239]	Time 0.389 (0.234)	Data 0.002 (0.009)	Loss 3.9616 (4.0122)	Top-1 acc 28.906 (30.916)	Top-5 acc 55.859 (54.346)	lr 0.00127
Warmup Train [36][2980/3239]	Time 0.188 (0.234)	Data 0.001 (0.009)	Loss 3.9871 (4.0121)	Top-1 acc 32.812 (30.918)	Top-5 acc 55.078 (54.348)	lr 0.00127
Warmup Train [36][2990/3239]	Time 0.247 (0.234)	Data 0.001 (0.009)	Loss 3.9892 (4.0122)	Top-1 acc 30.469 (30.914)	Top-5 acc 55.859 (54.346)	lr 0.00127
Warmup Train [36][3000/3239]	Time 0.187 (0.234)	Data 0.001 (0.009)	Loss 4.0600 (4.0120)	Top-1 acc 30.469 (30.919)	Top-5 acc 51.953 (54.347)	lr 0.00127
Warmup Train [36][3010/3239]	Time 0.168 (0.234)	Data 0.001 (0.009)	Loss 4.0651 (4.0121)	Top-1 acc 28.125 (30.916)	Top-5 acc 51.172 (54.345)	lr 0.00127
Warmup Train [36][3020/3239]	Time 0.209 (0.234)	Data 0.002 (0.009)	Loss 4.2191 (4.0122)	Top-1 acc 27.344 (30.915)	Top-5 acc 49.609 (54.341)	lr 0.00127
Warmup Train [36][3030/3239]	Time 0.198 (0.234)	Data 0.001 (0.009)	Loss 4.0103 (4.0122)	Top-1 acc 28.125 (30.912)	Top-5 acc 52.344 (54.339)	lr 0.00126
Warmup Train [36][3040/3239]	Time 0.219 (0.234)	Data 0.001 (0.009)	Loss 4.1610 (4.0123)	Top-1 acc 31.641 (30.913)	Top-5 acc 54.297 (54.338)	lr 0.00126
Warmup Train [36][3050/3239]	Time 0.276 (0.234)	Data 0.001 (0.009)	Loss 4.0716 (4.0123)	Top-1 acc 30.469 (30.911)	Top-5 acc 55.859 (54.339)	lr 0.00126
Warmup Train [36][3060/3239]	Time 0.288 (0.234)	Data 0.001 (0.009)	Loss 4.1334 (4.0125)	Top-1 acc 29.297 (30.907)	Top-5 acc 50.781 (54.334)	lr 0.00126
Warmup Train [36][3070/3239]	Time 0.173 (0.234)	Data 0.001 (0.009)	Loss 4.0860 (4.0126)	Top-1 acc 33.203 (30.905)	Top-5 acc 53.125 (54.331)	lr 0.00126
Warmup Train [36][3080/3239]	Time 0.320 (0.234)	Data 0.001 (0.009)	Loss 3.9833 (4.0127)	Top-1 acc 30.469 (30.903)	Top-5 acc 51.953 (54.329)	lr 0.00125
Warmup Train [36][3090/3239]	Time 0.218 (0.234)	Data 0.001 (0.009)	Loss 4.2411 (4.0127)	Top-1 acc 24.219 (30.904)	Top-5 acc 50.781 (54.332)	lr 0.00125
Warmup Train [36][3100/3239]	Time 0.204 (0.234)	Data 0.001 (0.009)	Loss 3.9772 (4.0127)	Top-1 acc 31.250 (30.906)	Top-5 acc 56.250 (54.334)	lr 0.00125
Warmup Train [36][3110/3239]	Time 0.189 (0.234)	Data 0.002 (0.009)	Loss 4.1166 (4.0125)	Top-1 acc 28.906 (30.908)	Top-5 acc 50.391 (54.338)	lr 0.00125
Warmup Train [36][3120/3239]	Time 0.222 (0.234)	Data 0.001 (0.009)	Loss 4.0673 (4.0124)	Top-1 acc 30.859 (30.911)	Top-5 acc 51.172 (54.336)	lr 0.00125
Warmup Train [36][3130/3239]	Time 0.211 (0.234)	Data 0.003 (0.009)	Loss 3.9616 (4.0121)	Top-1 acc 30.859 (30.918)	Top-5 acc 55.859 (54.341)	lr 0.00124
Warmup Train [36][3140/3239]	Time 0.201 (0.234)	Data 0.001 (0.009)	Loss 4.2105 (4.0120)	Top-1 acc 30.078 (30.919)	Top-5 acc 52.344 (54.347)	lr 0.00124
Warmup Train [36][3150/3239]	Time 0.254 (0.234)	Data 0.002 (0.009)	Loss 3.8394 (4.0120)	Top-1 acc 30.469 (30.917)	Top-5 acc 58.594 (54.350)	lr 0.00124
Warmup Train [36][3160/3239]	Time 0.213 (0.234)	Data 0.002 (0.009)	Loss 3.9834 (4.0121)	Top-1 acc 32.812 (30.918)	Top-5 acc 58.984 (54.351)	lr 0.00124
Warmup Train [36][3170/3239]	Time 0.159 (0.234)	Data 0.002 (0.009)	Loss 4.0330 (4.0121)	Top-1 acc 30.469 (30.915)	Top-5 acc 56.250 (54.352)	lr 0.00124
Warmup Train [36][3180/3239]	Time 0.155 (0.234)	Data 0.000 (0.009)	Loss 4.2099 (4.0121)	Top-1 acc 25.781 (30.913)	Top-5 acc 47.266 (54.350)	lr 0.00123
Warmup Train [36][3190/3239]	Time 0.210 (0.234)	Data 0.000 (0.009)	Loss 4.0268 (4.0120)	Top-1 acc 31.641 (30.916)	Top-5 acc 55.078 (54.354)	lr 0.00123
Warmup Train [36][3200/3239]	Time 0.212 (0.233)	Data 0.000 (0.009)	Loss 4.2212 (4.0121)	Top-1 acc 27.734 (30.914)	Top-5 acc 49.609 (54.355)	lr 0.00123
Warmup Train [36][3210/3239]	Time 0.191 (0.233)	Data 0.000 (0.009)	Loss 3.8723 (4.0120)	Top-1 acc 33.203 (30.917)	Top-5 acc 57.422 (54.356)	lr 0.00123
Warmup Train [36][3220/3239]	Time 0.140 (0.233)	Data 0.000 (0.009)	Loss 4.0189 (4.0122)	Top-1 acc 29.688 (30.914)	Top-5 acc 53.125 (54.351)	lr 0.00123
Warmup Train [36][3230/3239]	Time 0.220 (0.233)	Data 0.000 (0.009)	Loss 3.8219 (4.0122)	Top-1 acc 35.547 (30.916)	Top-5 acc 60.938 (54.354)	lr 0.00123
Warmup Train [36][3239/3239]	Time 0.180 (0.233)	Data 0.000 (0.009)	Loss 3.9973 (4.0120)	Top-1 acc 28.395 (30.919)	Top-5 acc 53.086 (54.362)	lr 0.00122
==========Warmup Valid [36/40]	loss 2.949	top-1 acc 38.555	top-5 acc 63.199	Train top-1 30.919	top-5 54.362	flops: 442.4M
Warmup Train [37][0/3239]	Time 20.435 (20.435)	Data 17.641 (17.641)	Loss 4.2939 (4.2939)	Top-1 acc 25.000 (25.000)	Top-5 acc 46.875 (46.875)	lr 0.00122
Warmup Train [37][10/3239]	Time 0.304 (2.197)	Data 0.001 (1.699)	Loss 3.9980 (4.0119)	Top-1 acc 35.938 (30.717)	Top-5 acc 55.859 (54.084)	lr 0.00122
Warmup Train [37][20/3239]	Time 0.230 (1.265)	Data 0.002 (0.891)	Loss 3.9369 (4.0081)	Top-1 acc 30.859 (30.543)	Top-5 acc 60.156 (54.613)	lr 0.00122
Warmup Train [37][30/3239]	Time 0.241 (0.928)	Data 0.001 (0.604)	Loss 3.9738 (3.9983)	Top-1 acc 32.422 (30.960)	Top-5 acc 58.203 (54.814)	lr 0.00122
Warmup Train [37][40/3239]	Time 0.307 (0.762)	Data 0.002 (0.458)	Loss 3.9802 (3.9921)	Top-1 acc 31.641 (31.441)	Top-5 acc 55.469 (55.030)	lr 0.00122
Warmup Train [37][50/3239]	Time 0.323 (0.663)	Data 0.002 (0.369)	Loss 4.1370 (3.9938)	Top-1 acc 26.172 (31.357)	Top-5 acc 51.562 (55.002)	lr 0.00121
Warmup Train [37][60/3239]	Time 0.237 (0.592)	Data 0.002 (0.308)	Loss 3.7617 (3.9927)	Top-1 acc 36.328 (31.365)	Top-5 acc 57.031 (55.059)	lr 0.00121
Warmup Train [37][70/3239]	Time 0.200 (0.541)	Data 0.001 (0.265)	Loss 4.0952 (3.9980)	Top-1 acc 30.859 (31.228)	Top-5 acc 50.000 (54.908)	lr 0.00121
Warmup Train [37][80/3239]	Time 0.173 (0.503)	Data 0.002 (0.233)	Loss 4.2773 (3.9987)	Top-1 acc 27.344 (31.308)	Top-5 acc 48.438 (54.905)	lr 0.00121
Warmup Train [37][90/3239]	Time 0.224 (0.473)	Data 0.001 (0.208)	Loss 3.9941 (3.9994)	Top-1 acc 31.250 (31.302)	Top-5 acc 55.078 (54.941)	lr 0.00121
Warmup Train [37][100/3239]	Time 0.213 (0.447)	Data 0.001 (0.187)	Loss 3.8786 (4.0057)	Top-1 acc 35.938 (31.130)	Top-5 acc 58.203 (54.811)	lr 0.00120
Warmup Train [37][110/3239]	Time 0.236 (0.428)	Data 0.001 (0.171)	Loss 4.1459 (4.0025)	Top-1 acc 25.391 (31.109)	Top-5 acc 52.344 (54.899)	lr 0.00120
Warmup Train [37][120/3239]	Time 0.210 (0.412)	Data 0.001 (0.157)	Loss 3.9652 (4.0053)	Top-1 acc 36.719 (31.127)	Top-5 acc 56.641 (54.736)	lr 0.00120
Warmup Train [37][130/3239]	Time 0.182 (0.397)	Data 0.002 (0.145)	Loss 4.0938 (4.0065)	Top-1 acc 30.078 (31.217)	Top-5 acc 53.125 (54.628)	lr 0.00120
Warmup Train [37][140/3239]	Time 0.236 (0.385)	Data 0.001 (0.135)	Loss 3.9003 (4.0068)	Top-1 acc 32.422 (31.159)	Top-5 acc 55.859 (54.588)	lr 0.00120
Warmup Train [37][150/3239]	Time 0.229 (0.376)	Data 0.001 (0.126)	Loss 3.9241 (4.0035)	Top-1 acc 32.812 (31.209)	Top-5 acc 55.469 (54.620)	lr 0.00120
Warmup Train [37][160/3239]	Time 0.305 (0.367)	Data 0.001 (0.119)	Loss 4.0226 (4.0065)	Top-1 acc 31.250 (31.274)	Top-5 acc 51.562 (54.520)	lr 0.00119
Warmup Train [37][170/3239]	Time 0.244 (0.359)	Data 0.002 (0.112)	Loss 3.9903 (4.0037)	Top-1 acc 32.812 (31.344)	Top-5 acc 55.469 (54.585)	lr 0.00119
Warmup Train [37][180/3239]	Time 0.216 (0.351)	Data 0.003 (0.106)	Loss 4.1117 (4.0010)	Top-1 acc 30.859 (31.395)	Top-5 acc 50.000 (54.614)	lr 0.00119
Warmup Train [37][190/3239]	Time 0.236 (0.345)	Data 0.001 (0.100)	Loss 3.9762 (4.0018)	Top-1 acc 33.203 (31.346)	Top-5 acc 58.203 (54.610)	lr 0.00119
Warmup Train [37][200/3239]	Time 0.274 (0.338)	Data 0.001 (0.095)	Loss 4.0220 (4.0011)	Top-1 acc 30.859 (31.275)	Top-5 acc 55.078 (54.674)	lr 0.00119
Warmup Train [37][210/3239]	Time 0.235 (0.333)	Data 0.001 (0.091)	Loss 4.1195 (4.0040)	Top-1 acc 30.078 (31.241)	Top-5 acc 51.953 (54.602)	lr 0.00118
Warmup Train [37][220/3239]	Time 0.193 (0.328)	Data 0.002 (0.087)	Loss 4.0055 (4.0047)	Top-1 acc 30.469 (31.246)	Top-5 acc 53.906 (54.604)	lr 0.00118
Warmup Train [37][230/3239]	Time 0.222 (0.323)	Data 0.002 (0.083)	Loss 3.9574 (4.0056)	Top-1 acc 32.812 (31.235)	Top-5 acc 59.375 (54.633)	lr 0.00118
Warmup Train [37][240/3239]	Time 0.178 (0.318)	Data 0.001 (0.080)	Loss 4.1735 (4.0064)	Top-1 acc 28.906 (31.237)	Top-5 acc 51.953 (54.602)	lr 0.00118
Warmup Train [37][250/3239]	Time 0.184 (0.315)	Data 0.001 (0.077)	Loss 4.0540 (4.0071)	Top-1 acc 32.422 (31.227)	Top-5 acc 53.906 (54.569)	lr 0.00118
Warmup Train [37][260/3239]	Time 0.173 (0.311)	Data 0.001 (0.074)	Loss 3.7974 (4.0092)	Top-1 acc 33.203 (31.118)	Top-5 acc 60.938 (54.515)	lr 0.00118
Warmup Train [37][270/3239]	Time 0.338 (0.308)	Data 0.001 (0.071)	Loss 4.0588 (4.0102)	Top-1 acc 29.688 (31.068)	Top-5 acc 57.812 (54.520)	lr 0.00117
Warmup Train [37][280/3239]	Time 0.202 (0.305)	Data 0.002 (0.069)	Loss 3.9432 (4.0127)	Top-1 acc 35.156 (31.043)	Top-5 acc 51.562 (54.453)	lr 0.00117
Warmup Train [37][290/3239]	Time 0.181 (0.302)	Data 0.001 (0.067)	Loss 3.9235 (4.0121)	Top-1 acc 32.031 (31.012)	Top-5 acc 58.203 (54.478)	lr 0.00117
Warmup Train [37][300/3239]	Time 0.207 (0.299)	Data 0.001 (0.065)	Loss 4.4608 (4.0125)	Top-1 acc 20.312 (30.998)	Top-5 acc 40.625 (54.486)	lr 0.00117
Warmup Train [37][310/3239]	Time 0.205 (0.297)	Data 0.001 (0.063)	Loss 4.1385 (4.0124)	Top-1 acc 26.562 (30.980)	Top-5 acc 53.516 (54.498)	lr 0.00117
Warmup Train [37][320/3239]	Time 0.205 (0.294)	Data 0.001 (0.061)	Loss 3.9846 (4.0132)	Top-1 acc 30.469 (30.947)	Top-5 acc 55.469 (54.462)	lr 0.00116
Warmup Train [37][330/3239]	Time 0.193 (0.292)	Data 0.001 (0.059)	Loss 3.8795 (4.0118)	Top-1 acc 33.984 (30.963)	Top-5 acc 53.906 (54.483)	lr 0.00116
Warmup Train [37][340/3239]	Time 0.154 (0.290)	Data 0.001 (0.057)	Loss 4.1079 (4.0093)	Top-1 acc 27.344 (31.011)	Top-5 acc 51.953 (54.517)	lr 0.00116
Warmup Train [37][350/3239]	Time 0.168 (0.288)	Data 0.001 (0.056)	Loss 4.1485 (4.0124)	Top-1 acc 25.781 (30.927)	Top-5 acc 54.688 (54.462)	lr 0.00116
Warmup Train [37][360/3239]	Time 0.234 (0.287)	Data 0.003 (0.055)	Loss 3.9941 (4.0122)	Top-1 acc 33.984 (30.923)	Top-5 acc 55.469 (54.460)	lr 0.00116
Warmup Train [37][370/3239]	Time 0.218 (0.285)	Data 0.001 (0.053)	Loss 3.9069 (4.0116)	Top-1 acc 30.859 (30.954)	Top-5 acc 57.422 (54.458)	lr 0.00116
Warmup Train [37][380/3239]	Time 0.203 (0.283)	Data 0.001 (0.052)	Loss 3.8527 (4.0100)	Top-1 acc 36.328 (30.979)	Top-5 acc 58.984 (54.494)	lr 0.00115
Warmup Train [37][390/3239]	Time 0.253 (0.282)	Data 0.001 (0.051)	Loss 3.9496 (4.0104)	Top-1 acc 30.859 (30.970)	Top-5 acc 54.688 (54.486)	lr 0.00115
Warmup Train [37][400/3239]	Time 0.258 (0.281)	Data 0.001 (0.049)	Loss 4.1375 (4.0106)	Top-1 acc 29.688 (30.979)	Top-5 acc 53.906 (54.496)	lr 0.00115
Warmup Train [37][410/3239]	Time 0.188 (0.279)	Data 0.002 (0.048)	Loss 3.9080 (4.0101)	Top-1 acc 28.516 (30.992)	Top-5 acc 59.766 (54.533)	lr 0.00115
Warmup Train [37][420/3239]	Time 0.241 (0.277)	Data 0.001 (0.047)	Loss 4.1462 (4.0101)	Top-1 acc 26.562 (30.966)	Top-5 acc 48.828 (54.519)	lr 0.00115
Warmup Train [37][430/3239]	Time 0.201 (0.276)	Data 0.001 (0.046)	Loss 3.9284 (4.0104)	Top-1 acc 31.641 (30.975)	Top-5 acc 59.766 (54.525)	lr 0.00114
Warmup Train [37][440/3239]	Time 0.159 (0.275)	Data 0.001 (0.045)	Loss 4.1257 (4.0105)	Top-1 acc 27.734 (30.990)	Top-5 acc 50.391 (54.538)	lr 0.00114
Warmup Train [37][450/3239]	Time 0.200 (0.274)	Data 0.001 (0.044)	Loss 4.0725 (4.0100)	Top-1 acc 30.469 (30.979)	Top-5 acc 52.344 (54.525)	lr 0.00114
Warmup Train [37][460/3239]	Time 0.182 (0.273)	Data 0.001 (0.044)	Loss 4.0081 (4.0103)	Top-1 acc 33.594 (30.960)	Top-5 acc 55.469 (54.539)	lr 0.00114
Warmup Train [37][470/3239]	Time 0.281 (0.271)	Data 0.002 (0.043)	Loss 4.0372 (4.0110)	Top-1 acc 30.078 (30.965)	Top-5 acc 54.297 (54.508)	lr 0.00114
Warmup Train [37][480/3239]	Time 0.222 (0.270)	Data 0.001 (0.042)	Loss 3.9304 (4.0098)	Top-1 acc 32.422 (30.980)	Top-5 acc 58.594 (54.554)	lr 0.00114
Warmup Train [37][490/3239]	Time 0.306 (0.270)	Data 0.001 (0.041)	Loss 4.0457 (4.0098)	Top-1 acc 31.250 (30.986)	Top-5 acc 50.781 (54.548)	lr 0.00113
Warmup Train [37][500/3239]	Time 0.214 (0.268)	Data 0.001 (0.040)	Loss 4.1040 (4.0096)	Top-1 acc 28.516 (30.985)	Top-5 acc 53.125 (54.558)	lr 0.00113
Warmup Train [37][510/3239]	Time 0.240 (0.268)	Data 0.001 (0.040)	Loss 4.0797 (4.0099)	Top-1 acc 30.469 (30.986)	Top-5 acc 53.516 (54.544)	lr 0.00113
Warmup Train [37][520/3239]	Time 0.271 (0.267)	Data 0.002 (0.039)	Loss 3.8343 (4.0083)	Top-1 acc 36.328 (31.021)	Top-5 acc 57.812 (54.578)	lr 0.00113
Warmup Train [37][530/3239]	Time 0.154 (0.266)	Data 0.003 (0.038)	Loss 3.8399 (4.0084)	Top-1 acc 33.984 (31.003)	Top-5 acc 57.422 (54.576)	lr 0.00113
Warmup Train [37][540/3239]	Time 0.232 (0.266)	Data 0.001 (0.038)	Loss 3.8439 (4.0076)	Top-1 acc 33.594 (31.013)	Top-5 acc 58.984 (54.578)	lr 0.00112
Warmup Train [37][550/3239]	Time 0.184 (0.265)	Data 0.001 (0.037)	Loss 4.1582 (4.0076)	Top-1 acc 27.734 (31.010)	Top-5 acc 51.172 (54.603)	lr 0.00112
Warmup Train [37][560/3239]	Time 0.205 (0.264)	Data 0.001 (0.036)	Loss 3.9939 (4.0070)	Top-1 acc 35.156 (31.037)	Top-5 acc 55.469 (54.600)	lr 0.00112
Warmup Train [37][570/3239]	Time 0.174 (0.263)	Data 0.003 (0.036)	Loss 4.1175 (4.0080)	Top-1 acc 29.297 (31.022)	Top-5 acc 51.562 (54.568)	lr 0.00112
Warmup Train [37][580/3239]	Time 0.216 (0.262)	Data 0.001 (0.035)	Loss 3.9379 (4.0074)	Top-1 acc 32.422 (31.028)	Top-5 acc 53.906 (54.587)	lr 0.00112
Warmup Train [37][590/3239]	Time 0.279 (0.262)	Data 0.003 (0.035)	Loss 3.8811 (4.0070)	Top-1 acc 33.203 (31.019)	Top-5 acc 57.422 (54.598)	lr 0.00112
Warmup Train [37][600/3239]	Time 0.238 (0.261)	Data 0.002 (0.034)	Loss 3.9985 (4.0078)	Top-1 acc 28.516 (30.982)	Top-5 acc 54.688 (54.574)	lr 0.00111
Warmup Train [37][610/3239]	Time 0.297 (0.260)	Data 0.001 (0.034)	Loss 4.0349 (4.0077)	Top-1 acc 31.641 (30.986)	Top-5 acc 53.906 (54.571)	lr 0.00111
Warmup Train [37][620/3239]	Time 0.146 (0.260)	Data 0.001 (0.033)	Loss 3.8538 (4.0081)	Top-1 acc 31.641 (30.980)	Top-5 acc 58.203 (54.557)	lr 0.00111
Warmup Train [37][630/3239]	Time 0.288 (0.259)	Data 0.002 (0.033)	Loss 3.9823 (4.0085)	Top-1 acc 33.984 (30.976)	Top-5 acc 53.125 (54.553)	lr 0.00111
Warmup Train [37][640/3239]	Time 0.209 (0.259)	Data 0.002 (0.032)	Loss 3.9347 (4.0081)	Top-1 acc 33.203 (30.989)	Top-5 acc 59.375 (54.557)	lr 0.00111
Warmup Train [37][650/3239]	Time 0.251 (0.258)	Data 0.001 (0.032)	Loss 3.9934 (4.0075)	Top-1 acc 28.906 (31.006)	Top-5 acc 56.250 (54.564)	lr 0.00110
Warmup Train [37][660/3239]	Time 0.194 (0.257)	Data 0.002 (0.031)	Loss 4.0505 (4.0083)	Top-1 acc 26.172 (30.999)	Top-5 acc 55.469 (54.556)	lr 0.00110
Warmup Train [37][670/3239]	Time 0.212 (0.257)	Data 0.001 (0.031)	Loss 4.1626 (4.0093)	Top-1 acc 28.125 (30.991)	Top-5 acc 47.266 (54.518)	lr 0.00110
Warmup Train [37][680/3239]	Time 0.300 (0.256)	Data 0.001 (0.030)	Loss 3.7978 (4.0086)	Top-1 acc 37.109 (31.007)	Top-5 acc 59.375 (54.536)	lr 0.00110
Warmup Train [37][690/3239]	Time 0.255 (0.256)	Data 0.024 (0.030)	Loss 3.8410 (4.0092)	Top-1 acc 31.641 (30.985)	Top-5 acc 56.250 (54.519)	lr 0.00110
Warmup Train [37][700/3239]	Time 0.251 (0.255)	Data 0.002 (0.030)	Loss 4.1940 (4.0100)	Top-1 acc 28.906 (30.967)	Top-5 acc 49.609 (54.493)	lr 0.00110
Warmup Train [37][710/3239]	Time 0.191 (0.255)	Data 0.002 (0.029)	Loss 4.3295 (4.0104)	Top-1 acc 25.000 (30.962)	Top-5 acc 47.266 (54.473)	lr 0.00109
Warmup Train [37][720/3239]	Time 0.307 (0.254)	Data 0.001 (0.029)	Loss 3.8246 (4.0104)	Top-1 acc 32.031 (30.953)	Top-5 acc 58.594 (54.456)	lr 0.00109
Warmup Train [37][730/3239]	Time 0.233 (0.254)	Data 0.001 (0.028)	Loss 4.0583 (4.0099)	Top-1 acc 30.469 (30.960)	Top-5 acc 53.906 (54.477)	lr 0.00109
Warmup Train [37][740/3239]	Time 0.145 (0.253)	Data 0.001 (0.028)	Loss 4.0320 (4.0099)	Top-1 acc 29.688 (30.974)	Top-5 acc 54.688 (54.478)	lr 0.00109
Warmup Train [37][750/3239]	Time 0.227 (0.253)	Data 0.001 (0.028)	Loss 3.8818 (4.0105)	Top-1 acc 33.984 (30.976)	Top-5 acc 58.203 (54.464)	lr 0.00109
Warmup Train [37][760/3239]	Time 0.317 (0.252)	Data 0.001 (0.027)	Loss 3.9992 (4.0114)	Top-1 acc 30.469 (30.952)	Top-5 acc 54.297 (54.448)	lr 0.00109
Warmup Train [37][770/3239]	Time 0.175 (0.252)	Data 0.002 (0.027)	Loss 3.9555 (4.0118)	Top-1 acc 30.469 (30.932)	Top-5 acc 56.641 (54.444)	lr 0.00108
Warmup Train [37][780/3239]	Time 0.142 (0.252)	Data 0.001 (0.027)	Loss 4.0937 (4.0115)	Top-1 acc 29.297 (30.941)	Top-5 acc 54.688 (54.460)	lr 0.00108
Warmup Train [37][790/3239]	Time 0.228 (0.251)	Data 0.001 (0.027)	Loss 3.9382 (4.0119)	Top-1 acc 29.688 (30.919)	Top-5 acc 54.297 (54.447)	lr 0.00108
Warmup Train [37][800/3239]	Time 0.246 (0.251)	Data 0.001 (0.026)	Loss 4.0236 (4.0124)	Top-1 acc 30.859 (30.911)	Top-5 acc 56.641 (54.431)	lr 0.00108
Warmup Train [37][810/3239]	Time 0.171 (0.250)	Data 0.001 (0.026)	Loss 4.0532 (4.0126)	Top-1 acc 30.078 (30.913)	Top-5 acc 53.516 (54.418)	lr 0.00108
Warmup Train [37][820/3239]	Time 0.318 (0.250)	Data 0.001 (0.026)	Loss 3.8624 (4.0128)	Top-1 acc 32.422 (30.909)	Top-5 acc 56.641 (54.413)	lr 0.00107
Warmup Train [37][830/3239]	Time 0.210 (0.250)	Data 0.001 (0.025)	Loss 4.1236 (4.0128)	Top-1 acc 25.391 (30.902)	Top-5 acc 54.688 (54.414)	lr 0.00107
Warmup Train [37][840/3239]	Time 0.195 (0.249)	Data 0.001 (0.025)	Loss 4.0387 (4.0131)	Top-1 acc 32.031 (30.889)	Top-5 acc 53.516 (54.410)	lr 0.00107
Warmup Train [37][850/3239]	Time 0.142 (0.249)	Data 0.001 (0.025)	Loss 4.0807 (4.0139)	Top-1 acc 27.734 (30.887)	Top-5 acc 52.734 (54.384)	lr 0.00107
Warmup Train [37][860/3239]	Time 0.207 (0.249)	Data 0.043 (0.025)	Loss 3.7667 (4.0134)	Top-1 acc 37.109 (30.886)	Top-5 acc 60.156 (54.389)	lr 0.00107
Warmup Train [37][870/3239]	Time 0.224 (0.248)	Data 0.001 (0.024)	Loss 4.0050 (4.0131)	Top-1 acc 32.812 (30.890)	Top-5 acc 57.422 (54.405)	lr 0.00107
Warmup Train [37][880/3239]	Time 0.239 (0.248)	Data 0.001 (0.024)	Loss 3.8891 (4.0126)	Top-1 acc 32.812 (30.908)	Top-5 acc 54.297 (54.415)	lr 0.00106
Warmup Train [37][890/3239]	Time 0.164 (0.248)	Data 0.001 (0.024)	Loss 4.3402 (4.0135)	Top-1 acc 27.344 (30.886)	Top-5 acc 48.047 (54.389)	lr 0.00106
Warmup Train [37][900/3239]	Time 0.273 (0.247)	Data 0.001 (0.024)	Loss 4.1257 (4.0140)	Top-1 acc 30.469 (30.884)	Top-5 acc 50.781 (54.389)	lr 0.00106
Warmup Train [37][910/3239]	Time 0.256 (0.247)	Data 0.001 (0.023)	Loss 3.8587 (4.0136)	Top-1 acc 33.984 (30.896)	Top-5 acc 57.812 (54.403)	lr 0.00106
Warmup Train [37][920/3239]	Time 0.227 (0.247)	Data 0.001 (0.023)	Loss 4.0635 (4.0136)	Top-1 acc 32.422 (30.901)	Top-5 acc 53.906 (54.407)	lr 0.00106
Warmup Train [37][930/3239]	Time 0.314 (0.247)	Data 0.002 (0.023)	Loss 3.8086 (4.0129)	Top-1 acc 35.938 (30.915)	Top-5 acc 57.422 (54.421)	lr 0.00106
Warmup Train [37][940/3239]	Time 0.196 (0.246)	Data 0.001 (0.023)	Loss 4.0233 (4.0127)	Top-1 acc 30.859 (30.923)	Top-5 acc 55.859 (54.430)	lr 0.00105
Warmup Train [37][950/3239]	Time 0.217 (0.246)	Data 0.001 (0.023)	Loss 4.1532 (4.0132)	Top-1 acc 30.078 (30.902)	Top-5 acc 50.000 (54.418)	lr 0.00105
Warmup Train [37][960/3239]	Time 0.165 (0.245)	Data 0.001 (0.022)	Loss 3.9645 (4.0128)	Top-1 acc 32.812 (30.906)	Top-5 acc 55.078 (54.430)	lr 0.00105
Warmup Train [37][970/3239]	Time 0.260 (0.245)	Data 0.001 (0.022)	Loss 3.8266 (4.0123)	Top-1 acc 32.812 (30.906)	Top-5 acc 55.469 (54.424)	lr 0.00105
Warmup Train [37][980/3239]	Time 0.233 (0.245)	Data 0.001 (0.022)	Loss 3.9198 (4.0124)	Top-1 acc 33.203 (30.905)	Top-5 acc 53.906 (54.422)	lr 0.00105
Warmup Train [37][990/3239]	Time 0.230 (0.245)	Data 0.001 (0.022)	Loss 3.7837 (4.0120)	Top-1 acc 37.109 (30.907)	Top-5 acc 58.594 (54.431)	lr 0.00105
Warmup Train [37][1000/3239]	Time 0.214 (0.244)	Data 0.001 (0.022)	Loss 3.9381 (4.0128)	Top-1 acc 30.469 (30.884)	Top-5 acc 55.859 (54.406)	lr 0.00104
Warmup Train [37][1010/3239]	Time 0.213 (0.244)	Data 0.002 (0.021)	Loss 4.1433 (4.0124)	Top-1 acc 30.859 (30.906)	Top-5 acc 52.344 (54.410)	lr 0.00104
Warmup Train [37][1020/3239]	Time 0.149 (0.244)	Data 0.002 (0.021)	Loss 3.9091 (4.0119)	Top-1 acc 32.812 (30.909)	Top-5 acc 55.469 (54.419)	lr 0.00104
Warmup Train [37][1030/3239]	Time 0.291 (0.244)	Data 0.001 (0.021)	Loss 3.8675 (4.0122)	Top-1 acc 30.859 (30.903)	Top-5 acc 61.328 (54.426)	lr 0.00104
Warmup Train [37][1040/3239]	Time 0.217 (0.243)	Data 0.001 (0.021)	Loss 4.1223 (4.0120)	Top-1 acc 24.219 (30.913)	Top-5 acc 56.641 (54.437)	lr 0.00104
Warmup Train [37][1050/3239]	Time 0.304 (0.243)	Data 0.001 (0.021)	Loss 3.9289 (4.0120)	Top-1 acc 32.422 (30.915)	Top-5 acc 56.250 (54.431)	lr 0.00103
Warmup Train [37][1060/3239]	Time 0.208 (0.243)	Data 0.001 (0.020)	Loss 3.9928 (4.0119)	Top-1 acc 30.469 (30.925)	Top-5 acc 56.250 (54.434)	lr 0.00103
Warmup Train [37][1070/3239]	Time 0.230 (0.243)	Data 0.002 (0.020)	Loss 4.1088 (4.0120)	Top-1 acc 26.562 (30.917)	Top-5 acc 50.781 (54.431)	lr 0.00103
Warmup Train [37][1080/3239]	Time 0.262 (0.243)	Data 0.001 (0.020)	Loss 3.9630 (4.0122)	Top-1 acc 29.688 (30.910)	Top-5 acc 56.641 (54.424)	lr 0.00103
Warmup Train [37][1090/3239]	Time 0.139 (0.242)	Data 0.001 (0.020)	Loss 3.7924 (4.0117)	Top-1 acc 36.719 (30.915)	Top-5 acc 60.156 (54.439)	lr 0.00103
Warmup Train [37][1100/3239]	Time 0.173 (0.242)	Data 0.002 (0.020)	Loss 3.8555 (4.0114)	Top-1 acc 31.641 (30.918)	Top-5 acc 57.812 (54.440)	lr 0.00103
Warmup Train [37][1110/3239]	Time 0.159 (0.242)	Data 0.001 (0.020)	Loss 4.0308 (4.0112)	Top-1 acc 33.203 (30.928)	Top-5 acc 52.734 (54.442)	lr 0.00102
Warmup Train [37][1120/3239]	Time 0.212 (0.242)	Data 0.001 (0.020)	Loss 4.0666 (4.0113)	Top-1 acc 30.859 (30.924)	Top-5 acc 51.562 (54.438)	lr 0.00102
Warmup Train [37][1130/3239]	Time 0.215 (0.242)	Data 0.002 (0.019)	Loss 4.1578 (4.0116)	Top-1 acc 27.734 (30.914)	Top-5 acc 48.438 (54.431)	lr 0.00102
Warmup Train [37][1140/3239]	Time 0.367 (0.242)	Data 0.003 (0.019)	Loss 3.9521 (4.0109)	Top-1 acc 32.031 (30.929)	Top-5 acc 58.984 (54.448)	lr 0.00102
Warmup Train [37][1150/3239]	Time 0.259 (0.241)	Data 0.001 (0.019)	Loss 3.9440 (4.0110)	Top-1 acc 32.422 (30.933)	Top-5 acc 55.469 (54.451)	lr 0.00102
Warmup Train [37][1160/3239]	Time 0.169 (0.241)	Data 0.002 (0.019)	Loss 3.9723 (4.0107)	Top-1 acc 32.422 (30.940)	Top-5 acc 55.859 (54.460)	lr 0.00102
Warmup Train [37][1170/3239]	Time 0.187 (0.241)	Data 0.001 (0.019)	Loss 3.9485 (4.0108)	Top-1 acc 33.984 (30.936)	Top-5 acc 57.031 (54.461)	lr 0.00101
Warmup Train [37][1180/3239]	Time 0.156 (0.241)	Data 0.002 (0.019)	Loss 4.2012 (4.0106)	Top-1 acc 25.391 (30.935)	Top-5 acc 49.219 (54.463)	lr 0.00101
Warmup Train [37][1190/3239]	Time 0.263 (0.241)	Data 0.001 (0.019)	Loss 4.0827 (4.0106)	Top-1 acc 25.781 (30.925)	Top-5 acc 52.344 (54.462)	lr 0.00101
Warmup Train [37][1200/3239]	Time 0.280 (0.241)	Data 0.001 (0.019)	Loss 4.0398 (4.0110)	Top-1 acc 31.250 (30.922)	Top-5 acc 53.516 (54.453)	lr 0.00101
Warmup Train [37][1210/3239]	Time 0.156 (0.241)	Data 0.001 (0.018)	Loss 4.3001 (4.0112)	Top-1 acc 24.219 (30.913)	Top-5 acc 50.000 (54.448)	lr 0.00101
Warmup Train [37][1220/3239]	Time 0.277 (0.241)	Data 0.001 (0.018)	Loss 3.9547 (4.0110)	Top-1 acc 32.422 (30.922)	Top-5 acc 54.297 (54.448)	lr 0.00101
Warmup Train [37][1230/3239]	Time 0.228 (0.241)	Data 0.001 (0.018)	Loss 4.0658 (4.0108)	Top-1 acc 30.078 (30.931)	Top-5 acc 53.906 (54.450)	lr 0.00100
Warmup Train [37][1240/3239]	Time 0.300 (0.241)	Data 0.001 (0.018)	Loss 4.1670 (4.0106)	Top-1 acc 28.125 (30.935)	Top-5 acc 50.781 (54.449)	lr 0.00100
Warmup Train [37][1250/3239]	Time 0.187 (0.240)	Data 0.001 (0.018)	Loss 4.2160 (4.0106)	Top-1 acc 23.438 (30.926)	Top-5 acc 51.172 (54.446)	lr 0.00100
Warmup Train [37][1260/3239]	Time 0.160 (0.240)	Data 0.001 (0.018)	Loss 4.0002 (4.0103)	Top-1 acc 30.469 (30.927)	Top-5 acc 55.469 (54.451)	lr 0.00100
Warmup Train [37][1270/3239]	Time 0.197 (0.240)	Data 0.001 (0.018)	Loss 4.0594 (4.0100)	Top-1 acc 33.984 (30.941)	Top-5 acc 52.734 (54.464)	lr 0.00100
Warmup Train [37][1280/3239]	Time 0.233 (0.240)	Data 0.001 (0.018)	Loss 3.8458 (4.0097)	Top-1 acc 35.938 (30.936)	Top-5 acc 58.984 (54.482)	lr 0.00100
Warmup Train [37][1290/3239]	Time 0.271 (0.240)	Data 0.001 (0.017)	Loss 3.8782 (4.0095)	Top-1 acc 35.156 (30.937)	Top-5 acc 58.984 (54.487)	lr 0.00099
Warmup Train [37][1300/3239]	Time 0.214 (0.240)	Data 0.002 (0.017)	Loss 3.9457 (4.0090)	Top-1 acc 32.812 (30.949)	Top-5 acc 57.031 (54.501)	lr 0.00099
Warmup Train [37][1310/3239]	Time 0.157 (0.239)	Data 0.001 (0.017)	Loss 3.9602 (4.0090)	Top-1 acc 30.859 (30.940)	Top-5 acc 58.203 (54.498)	lr 0.00099
Warmup Train [37][1320/3239]	Time 0.181 (0.239)	Data 0.001 (0.017)	Loss 3.9842 (4.0089)	Top-1 acc 34.375 (30.953)	Top-5 acc 54.688 (54.502)	lr 0.00099
Warmup Train [37][1330/3239]	Time 0.153 (0.239)	Data 0.001 (0.017)	Loss 4.0659 (4.0094)	Top-1 acc 28.906 (30.954)	Top-5 acc 51.172 (54.488)	lr 0.00099
Warmup Train [37][1340/3239]	Time 0.148 (0.239)	Data 0.001 (0.017)	Loss 4.1392 (4.0094)	Top-1 acc 30.078 (30.958)	Top-5 acc 48.047 (54.489)	lr 0.00099
Warmup Train [37][1350/3239]	Time 0.166 (0.239)	Data 0.003 (0.017)	Loss 4.1590 (4.0093)	Top-1 acc 25.000 (30.964)	Top-5 acc 51.562 (54.495)	lr 0.00098
Warmup Train [37][1360/3239]	Time 0.156 (0.239)	Data 0.001 (0.017)	Loss 3.9874 (4.0090)	Top-1 acc 29.688 (30.977)	Top-5 acc 59.375 (54.502)	lr 0.00098
Warmup Train [37][1370/3239]	Time 0.210 (0.239)	Data 0.001 (0.017)	Loss 3.9232 (4.0088)	Top-1 acc 33.594 (30.990)	Top-5 acc 53.125 (54.503)	lr 0.00098
Warmup Train [37][1380/3239]	Time 0.197 (0.239)	Data 0.001 (0.017)	Loss 4.0115 (4.0085)	Top-1 acc 29.297 (30.999)	Top-5 acc 51.562 (54.507)	lr 0.00098
Warmup Train [37][1390/3239]	Time 0.171 (0.239)	Data 0.001 (0.016)	Loss 3.8959 (4.0084)	Top-1 acc 34.375 (31.010)	Top-5 acc 57.812 (54.509)	lr 0.00098
Warmup Train [37][1400/3239]	Time 0.214 (0.239)	Data 0.001 (0.016)	Loss 3.9355 (4.0082)	Top-1 acc 35.156 (31.016)	Top-5 acc 56.250 (54.514)	lr 0.00098
Warmup Train [37][1410/3239]	Time 0.217 (0.239)	Data 0.002 (0.016)	Loss 3.9729 (4.0084)	Top-1 acc 31.250 (31.015)	Top-5 acc 53.516 (54.504)	lr 0.00097
Warmup Train [37][1420/3239]	Time 0.176 (0.239)	Data 0.002 (0.016)	Loss 4.0837 (4.0085)	Top-1 acc 29.688 (31.013)	Top-5 acc 51.953 (54.503)	lr 0.00097
Warmup Train [37][1430/3239]	Time 0.232 (0.239)	Data 0.003 (0.016)	Loss 3.9693 (4.0085)	Top-1 acc 32.031 (31.007)	Top-5 acc 55.859 (54.503)	lr 0.00097
Warmup Train [37][1440/3239]	Time 0.293 (0.239)	Data 0.001 (0.016)	Loss 4.0237 (4.0087)	Top-1 acc 29.688 (30.999)	Top-5 acc 53.906 (54.498)	lr 0.00097
Warmup Train [37][1450/3239]	Time 0.333 (0.239)	Data 0.001 (0.016)	Loss 3.9609 (4.0087)	Top-1 acc 30.859 (31.000)	Top-5 acc 56.641 (54.501)	lr 0.00097
Warmup Train [37][1460/3239]	Time 0.166 (0.238)	Data 0.001 (0.016)	Loss 3.8769 (4.0083)	Top-1 acc 37.109 (31.013)	Top-5 acc 57.422 (54.511)	lr 0.00097
Warmup Train [37][1470/3239]	Time 0.214 (0.238)	Data 0.001 (0.016)	Loss 4.0097 (4.0083)	Top-1 acc 33.203 (31.026)	Top-5 acc 56.250 (54.515)	lr 0.00096
Warmup Train [37][1480/3239]	Time 0.258 (0.238)	Data 0.001 (0.016)	Loss 4.0609 (4.0081)	Top-1 acc 30.469 (31.023)	Top-5 acc 54.688 (54.518)	lr 0.00096
Warmup Train [37][1490/3239]	Time 0.269 (0.238)	Data 0.001 (0.016)	Loss 3.8595 (4.0079)	Top-1 acc 32.812 (31.026)	Top-5 acc 54.297 (54.519)	lr 0.00096
Warmup Train [37][1500/3239]	Time 0.284 (0.238)	Data 0.001 (0.016)	Loss 3.9148 (4.0079)	Top-1 acc 29.688 (31.026)	Top-5 acc 57.812 (54.523)	lr 0.00096
Warmup Train [37][1510/3239]	Time 0.226 (0.238)	Data 0.001 (0.015)	Loss 3.9437 (4.0077)	Top-1 acc 32.031 (31.028)	Top-5 acc 54.688 (54.526)	lr 0.00096
Warmup Train [37][1520/3239]	Time 0.263 (0.238)	Data 0.001 (0.015)	Loss 4.0627 (4.0079)	Top-1 acc 31.250 (31.028)	Top-5 acc 53.516 (54.519)	lr 0.00096
Warmup Train [37][1530/3239]	Time 0.309 (0.238)	Data 0.001 (0.015)	Loss 3.9596 (4.0078)	Top-1 acc 33.203 (31.034)	Top-5 acc 54.297 (54.522)	lr 0.00095
Warmup Train [37][1540/3239]	Time 0.247 (0.238)	Data 0.001 (0.015)	Loss 4.0239 (4.0077)	Top-1 acc 30.469 (31.040)	Top-5 acc 53.516 (54.522)	lr 0.00095
Warmup Train [37][1550/3239]	Time 0.207 (0.238)	Data 0.001 (0.015)	Loss 3.9240 (4.0078)	Top-1 acc 31.250 (31.030)	Top-5 acc 54.688 (54.521)	lr 0.00095
Warmup Train [37][1560/3239]	Time 0.177 (0.238)	Data 0.001 (0.015)	Loss 3.9781 (4.0074)	Top-1 acc 30.859 (31.030)	Top-5 acc 52.344 (54.529)	lr 0.00095
Warmup Train [37][1570/3239]	Time 0.267 (0.238)	Data 0.002 (0.015)	Loss 3.9751 (4.0074)	Top-1 acc 31.250 (31.024)	Top-5 acc 55.078 (54.523)	lr 0.00095
Warmup Train [37][1580/3239]	Time 0.202 (0.237)	Data 0.001 (0.015)	Loss 3.9155 (4.0069)	Top-1 acc 32.422 (31.032)	Top-5 acc 55.078 (54.531)	lr 0.00095
Warmup Train [37][1590/3239]	Time 0.229 (0.237)	Data 0.002 (0.015)	Loss 4.1452 (4.0071)	Top-1 acc 28.906 (31.032)	Top-5 acc 50.391 (54.523)	lr 0.00094
Warmup Train [37][1600/3239]	Time 0.244 (0.237)	Data 0.001 (0.015)	Loss 3.8859 (4.0073)	Top-1 acc 32.422 (31.027)	Top-5 acc 55.469 (54.525)	lr 0.00094
Warmup Train [37][1610/3239]	Time 0.178 (0.237)	Data 0.001 (0.015)	Loss 3.9594 (4.0073)	Top-1 acc 26.562 (31.029)	Top-5 acc 55.078 (54.523)	lr 0.00094
Warmup Train [37][1620/3239]	Time 0.286 (0.237)	Data 0.001 (0.015)	Loss 4.2020 (4.0072)	Top-1 acc 26.172 (31.029)	Top-5 acc 49.609 (54.526)	lr 0.00094
Warmup Train [37][1630/3239]	Time 0.223 (0.237)	Data 0.023 (0.014)	Loss 4.0755 (4.0073)	Top-1 acc 27.734 (31.024)	Top-5 acc 53.516 (54.522)	lr 0.00094
Warmup Train [37][1640/3239]	Time 0.246 (0.237)	Data 0.001 (0.014)	Loss 3.8603 (4.0072)	Top-1 acc 31.250 (31.028)	Top-5 acc 57.031 (54.527)	lr 0.00094
Warmup Train [37][1650/3239]	Time 0.181 (0.237)	Data 0.001 (0.014)	Loss 3.9844 (4.0075)	Top-1 acc 33.984 (31.028)	Top-5 acc 55.859 (54.521)	lr 0.00093
Warmup Train [37][1660/3239]	Time 0.212 (0.237)	Data 0.001 (0.014)	Loss 4.0362 (4.0072)	Top-1 acc 28.516 (31.033)	Top-5 acc 50.781 (54.525)	lr 0.00093
Warmup Train [37][1670/3239]	Time 0.170 (0.237)	Data 0.001 (0.014)	Loss 4.0101 (4.0074)	Top-1 acc 30.078 (31.034)	Top-5 acc 54.688 (54.521)	lr 0.00093
Warmup Train [37][1680/3239]	Time 0.192 (0.237)	Data 0.001 (0.014)	Loss 3.7868 (4.0071)	Top-1 acc 38.672 (31.040)	Top-5 acc 60.938 (54.529)	lr 0.00093
Warmup Train [37][1690/3239]	Time 0.193 (0.237)	Data 0.001 (0.014)	Loss 4.1550 (4.0074)	Top-1 acc 28.516 (31.039)	Top-5 acc 49.609 (54.517)	lr 0.00093
Warmup Train [37][1700/3239]	Time 0.173 (0.237)	Data 0.002 (0.014)	Loss 4.1670 (4.0078)	Top-1 acc 28.125 (31.036)	Top-5 acc 51.562 (54.512)	lr 0.00093
Warmup Train [37][1710/3239]	Time 0.266 (0.237)	Data 0.001 (0.014)	Loss 3.8063 (4.0079)	Top-1 acc 37.891 (31.038)	Top-5 acc 59.375 (54.507)	lr 0.00092
Warmup Train [37][1720/3239]	Time 0.283 (0.237)	Data 0.002 (0.014)	Loss 3.9883 (4.0078)	Top-1 acc 34.375 (31.048)	Top-5 acc 56.641 (54.511)	lr 0.00092
Warmup Train [37][1730/3239]	Time 0.198 (0.236)	Data 0.002 (0.014)	Loss 3.9772 (4.0078)	Top-1 acc 33.203 (31.051)	Top-5 acc 57.031 (54.512)	lr 0.00092
Warmup Train [37][1740/3239]	Time 0.159 (0.236)	Data 0.002 (0.014)	Loss 3.9908 (4.0079)	Top-1 acc 32.031 (31.054)	Top-5 acc 56.250 (54.514)	lr 0.00092
Warmup Train [37][1750/3239]	Time 0.238 (0.236)	Data 0.002 (0.014)	Loss 4.0819 (4.0078)	Top-1 acc 27.734 (31.046)	Top-5 acc 53.906 (54.515)	lr 0.00092
Warmup Train [37][1760/3239]	Time 0.216 (0.236)	Data 0.002 (0.014)	Loss 4.1006 (4.0083)	Top-1 acc 32.422 (31.041)	Top-5 acc 52.734 (54.503)	lr 0.00092
Warmup Train [37][1770/3239]	Time 0.230 (0.236)	Data 0.024 (0.014)	Loss 3.9273 (4.0084)	Top-1 acc 33.984 (31.041)	Top-5 acc 56.250 (54.502)	lr 0.00091
Warmup Train [37][1780/3239]	Time 0.239 (0.236)	Data 0.003 (0.014)	Loss 4.2135 (4.0085)	Top-1 acc 29.297 (31.048)	Top-5 acc 51.562 (54.507)	lr 0.00091
Warmup Train [37][1790/3239]	Time 0.212 (0.236)	Data 0.001 (0.013)	Loss 3.7998 (4.0083)	Top-1 acc 34.766 (31.047)	Top-5 acc 59.375 (54.508)	lr 0.00091
Warmup Train [37][1800/3239]	Time 0.188 (0.236)	Data 0.001 (0.013)	Loss 3.9156 (4.0080)	Top-1 acc 30.469 (31.050)	Top-5 acc 59.766 (54.518)	lr 0.00091
Warmup Train [37][1810/3239]	Time 0.284 (0.236)	Data 0.001 (0.013)	Loss 4.0818 (4.0081)	Top-1 acc 27.344 (31.048)	Top-5 acc 55.469 (54.521)	lr 0.00091
Warmup Train [37][1820/3239]	Time 0.194 (0.236)	Data 0.001 (0.013)	Loss 3.9620 (4.0081)	Top-1 acc 31.641 (31.045)	Top-5 acc 55.859 (54.511)	lr 0.00091
Warmup Train [37][1830/3239]	Time 0.320 (0.236)	Data 0.001 (0.013)	Loss 3.9085 (4.0080)	Top-1 acc 31.641 (31.049)	Top-5 acc 60.547 (54.516)	lr 0.00090
Warmup Train [37][1840/3239]	Time 0.189 (0.236)	Data 0.002 (0.013)	Loss 4.0133 (4.0081)	Top-1 acc 31.641 (31.045)	Top-5 acc 57.031 (54.516)	lr 0.00090
Warmup Train [37][1850/3239]	Time 0.135 (0.235)	Data 0.001 (0.013)	Loss 4.0911 (4.0084)	Top-1 acc 28.906 (31.041)	Top-5 acc 51.172 (54.510)	lr 0.00090
Warmup Train [37][1860/3239]	Time 0.254 (0.235)	Data 0.001 (0.013)	Loss 3.8505 (4.0086)	Top-1 acc 29.297 (31.037)	Top-5 acc 61.719 (54.509)	lr 0.00090
Warmup Train [37][1870/3239]	Time 0.223 (0.235)	Data 0.001 (0.013)	Loss 3.8634 (4.0082)	Top-1 acc 38.672 (31.050)	Top-5 acc 57.031 (54.515)	lr 0.00090
Warmup Train [37][1880/3239]	Time 0.163 (0.235)	Data 0.001 (0.013)	Loss 4.0617 (4.0080)	Top-1 acc 28.906 (31.053)	Top-5 acc 55.469 (54.520)	lr 0.00090
Warmup Train [37][1890/3239]	Time 0.202 (0.235)	Data 0.001 (0.013)	Loss 3.8616 (4.0078)	Top-1 acc 30.859 (31.052)	Top-5 acc 55.859 (54.526)	lr 0.00089
Warmup Train [37][1900/3239]	Time 0.230 (0.235)	Data 0.002 (0.013)	Loss 4.0511 (4.0080)	Top-1 acc 30.078 (31.045)	Top-5 acc 54.688 (54.521)	lr 0.00089
Warmup Train [37][1910/3239]	Time 0.226 (0.235)	Data 0.001 (0.013)	Loss 3.8078 (4.0076)	Top-1 acc 34.766 (31.046)	Top-5 acc 60.547 (54.526)	lr 0.00089
Warmup Train [37][1920/3239]	Time 0.172 (0.235)	Data 0.002 (0.013)	Loss 4.0615 (4.0079)	Top-1 acc 31.250 (31.043)	Top-5 acc 53.516 (54.525)	lr 0.00089
Warmup Train [37][1930/3239]	Time 0.243 (0.235)	Data 0.001 (0.013)	Loss 4.0076 (4.0076)	Top-1 acc 30.469 (31.050)	Top-5 acc 56.250 (54.531)	lr 0.00089
Warmup Train [37][1940/3239]	Time 0.147 (0.235)	Data 0.002 (0.013)	Loss 4.0793 (4.0074)	Top-1 acc 25.391 (31.053)	Top-5 acc 51.953 (54.536)	lr 0.00089
Warmup Train [37][1950/3239]	Time 0.125 (0.235)	Data 0.001 (0.013)	Loss 4.1007 (4.0075)	Top-1 acc 29.297 (31.050)	Top-5 acc 52.734 (54.535)	lr 0.00089
Warmup Train [37][1960/3239]	Time 0.214 (0.235)	Data 0.001 (0.013)	Loss 4.1128 (4.0078)	Top-1 acc 28.125 (31.049)	Top-5 acc 53.125 (54.526)	lr 0.00088
Warmup Train [37][1970/3239]	Time 0.222 (0.235)	Data 0.001 (0.013)	Loss 3.8830 (4.0075)	Top-1 acc 38.281 (31.057)	Top-5 acc 56.250 (54.535)	lr 0.00088
Warmup Train [37][1980/3239]	Time 0.237 (0.235)	Data 0.002 (0.012)	Loss 3.7833 (4.0076)	Top-1 acc 33.984 (31.056)	Top-5 acc 60.547 (54.537)	lr 0.00088
Warmup Train [37][1990/3239]	Time 0.193 (0.235)	Data 0.001 (0.012)	Loss 4.1348 (4.0078)	Top-1 acc 27.344 (31.056)	Top-5 acc 52.734 (54.532)	lr 0.00088
Warmup Train [37][2000/3239]	Time 0.268 (0.235)	Data 0.003 (0.012)	Loss 3.9808 (4.0079)	Top-1 acc 33.594 (31.055)	Top-5 acc 54.297 (54.530)	lr 0.00088
Warmup Train [37][2010/3239]	Time 0.319 (0.235)	Data 0.001 (0.012)	Loss 4.0527 (4.0079)	Top-1 acc 28.125 (31.055)	Top-5 acc 53.125 (54.527)	lr 0.00088
Warmup Train [37][2020/3239]	Time 0.178 (0.235)	Data 0.001 (0.012)	Loss 3.8545 (4.0074)	Top-1 acc 33.203 (31.064)	Top-5 acc 58.203 (54.538)	lr 0.00087
Warmup Train [37][2030/3239]	Time 0.186 (0.235)	Data 0.001 (0.012)	Loss 3.9829 (4.0073)	Top-1 acc 29.297 (31.060)	Top-5 acc 53.906 (54.535)	lr 0.00087
Warmup Train [37][2040/3239]	Time 0.232 (0.234)	Data 0.002 (0.012)	Loss 3.8915 (4.0075)	Top-1 acc 32.812 (31.057)	Top-5 acc 53.516 (54.528)	lr 0.00087
Warmup Train [37][2050/3239]	Time 0.187 (0.234)	Data 0.001 (0.012)	Loss 3.9678 (4.0074)	Top-1 acc 31.250 (31.058)	Top-5 acc 55.859 (54.536)	lr 0.00087
Warmup Train [37][2060/3239]	Time 0.261 (0.234)	Data 0.001 (0.012)	Loss 3.9268 (4.0072)	Top-1 acc 32.031 (31.057)	Top-5 acc 56.641 (54.538)	lr 0.00087
Warmup Train [37][2070/3239]	Time 0.150 (0.234)	Data 0.001 (0.012)	Loss 3.9481 (4.0069)	Top-1 acc 33.203 (31.071)	Top-5 acc 55.469 (54.548)	lr 0.00087
Warmup Train [37][2080/3239]	Time 0.208 (0.234)	Data 0.001 (0.012)	Loss 4.0575 (4.0069)	Top-1 acc 30.859 (31.072)	Top-5 acc 58.203 (54.551)	lr 0.00086
Warmup Train [37][2090/3239]	Time 0.169 (0.234)	Data 0.001 (0.012)	Loss 4.0053 (4.0066)	Top-1 acc 28.516 (31.075)	Top-5 acc 52.344 (54.557)	lr 0.00086
Warmup Train [37][2100/3239]	Time 0.231 (0.234)	Data 0.002 (0.012)	Loss 3.9449 (4.0064)	Top-1 acc 34.375 (31.077)	Top-5 acc 54.688 (54.561)	lr 0.00086
Warmup Train [37][2110/3239]	Time 0.376 (0.234)	Data 0.002 (0.012)	Loss 3.9284 (4.0062)	Top-1 acc 30.078 (31.077)	Top-5 acc 58.203 (54.564)	lr 0.00086
Warmup Train [37][2120/3239]	Time 0.193 (0.234)	Data 0.002 (0.012)	Loss 3.8389 (4.0065)	Top-1 acc 36.328 (31.069)	Top-5 acc 55.859 (54.555)	lr 0.00086
Warmup Train [37][2130/3239]	Time 0.225 (0.234)	Data 0.002 (0.012)	Loss 3.8767 (4.0063)	Top-1 acc 33.984 (31.071)	Top-5 acc 56.641 (54.558)	lr 0.00086
Warmup Train [37][2140/3239]	Time 0.250 (0.234)	Data 0.001 (0.012)	Loss 3.9012 (4.0063)	Top-1 acc 28.125 (31.071)	Top-5 acc 55.469 (54.554)	lr 0.00085
Warmup Train [37][2150/3239]	Time 0.186 (0.234)	Data 0.001 (0.012)	Loss 4.1630 (4.0063)	Top-1 acc 26.953 (31.071)	Top-5 acc 50.000 (54.556)	lr 0.00085
Warmup Train [37][2160/3239]	Time 0.197 (0.234)	Data 0.001 (0.012)	Loss 4.0355 (4.0064)	Top-1 acc 31.641 (31.074)	Top-5 acc 53.125 (54.552)	lr 0.00085
Warmup Train [37][2170/3239]	Time 0.226 (0.234)	Data 0.001 (0.012)	Loss 3.9902 (4.0067)	Top-1 acc 33.984 (31.073)	Top-5 acc 52.734 (54.542)	lr 0.00085
Warmup Train [37][2180/3239]	Time 0.254 (0.234)	Data 0.001 (0.012)	Loss 3.9361 (4.0070)	Top-1 acc 33.594 (31.069)	Top-5 acc 57.422 (54.539)	lr 0.00085
Warmup Train [37][2190/3239]	Time 0.180 (0.234)	Data 0.001 (0.011)	Loss 4.0572 (4.0069)	Top-1 acc 31.250 (31.068)	Top-5 acc 53.125 (54.540)	lr 0.00085
Warmup Train [37][2200/3239]	Time 0.338 (0.234)	Data 0.001 (0.011)	Loss 4.0578 (4.0068)	Top-1 acc 29.297 (31.064)	Top-5 acc 52.344 (54.542)	lr 0.00085
Warmup Train [37][2210/3239]	Time 0.240 (0.234)	Data 0.001 (0.011)	Loss 4.0828 (4.0066)	Top-1 acc 27.344 (31.065)	Top-5 acc 53.906 (54.544)	lr 0.00084
Warmup Train [37][2220/3239]	Time 0.213 (0.234)	Data 0.001 (0.011)	Loss 4.0022 (4.0065)	Top-1 acc 32.812 (31.069)	Top-5 acc 52.344 (54.548)	lr 0.00084
Warmup Train [37][2230/3239]	Time 0.250 (0.234)	Data 0.001 (0.011)	Loss 3.9237 (4.0066)	Top-1 acc 32.031 (31.069)	Top-5 acc 55.078 (54.548)	lr 0.00084
Warmup Train [37][2240/3239]	Time 0.156 (0.234)	Data 0.001 (0.011)	Loss 4.1097 (4.0064)	Top-1 acc 28.906 (31.073)	Top-5 acc 52.344 (54.551)	lr 0.00084
Warmup Train [37][2250/3239]	Time 0.243 (0.234)	Data 0.001 (0.011)	Loss 3.9056 (4.0062)	Top-1 acc 32.031 (31.078)	Top-5 acc 56.250 (54.554)	lr 0.00084
Warmup Train [37][2260/3239]	Time 0.223 (0.234)	Data 0.001 (0.011)	Loss 3.8434 (4.0064)	Top-1 acc 35.547 (31.075)	Top-5 acc 55.469 (54.550)	lr 0.00084
Warmup Train [37][2270/3239]	Time 0.176 (0.234)	Data 0.001 (0.011)	Loss 4.2586 (4.0064)	Top-1 acc 26.172 (31.077)	Top-5 acc 47.266 (54.548)	lr 0.00083
Warmup Train [37][2280/3239]	Time 0.303 (0.234)	Data 0.001 (0.011)	Loss 4.0839 (4.0067)	Top-1 acc 31.641 (31.077)	Top-5 acc 51.172 (54.539)	lr 0.00083
Warmup Train [37][2290/3239]	Time 0.144 (0.233)	Data 0.001 (0.011)	Loss 4.0440 (4.0065)	Top-1 acc 30.469 (31.082)	Top-5 acc 55.469 (54.545)	lr 0.00083
Warmup Train [37][2300/3239]	Time 0.304 (0.233)	Data 0.001 (0.011)	Loss 3.8011 (4.0067)	Top-1 acc 35.547 (31.087)	Top-5 acc 57.422 (54.543)	lr 0.00083
Warmup Train [37][2310/3239]	Time 0.217 (0.233)	Data 0.002 (0.011)	Loss 4.1270 (4.0067)	Top-1 acc 28.125 (31.084)	Top-5 acc 51.953 (54.543)	lr 0.00083
Warmup Train [37][2320/3239]	Time 0.231 (0.233)	Data 0.001 (0.011)	Loss 3.8286 (4.0068)	Top-1 acc 35.938 (31.084)	Top-5 acc 53.516 (54.540)	lr 0.00083
Warmup Train [37][2330/3239]	Time 0.204 (0.233)	Data 0.001 (0.011)	Loss 4.3456 (4.0069)	Top-1 acc 27.344 (31.084)	Top-5 acc 46.875 (54.534)	lr 0.00083
Warmup Train [37][2340/3239]	Time 0.219 (0.233)	Data 0.001 (0.011)	Loss 3.9623 (4.0068)	Top-1 acc 32.031 (31.086)	Top-5 acc 56.250 (54.536)	lr 0.00082
Warmup Train [37][2350/3239]	Time 0.203 (0.233)	Data 0.001 (0.011)	Loss 4.0511 (4.0068)	Top-1 acc 28.125 (31.089)	Top-5 acc 54.297 (54.537)	lr 0.00082
Warmup Train [37][2360/3239]	Time 0.222 (0.233)	Data 0.001 (0.011)	Loss 4.3086 (4.0069)	Top-1 acc 26.953 (31.087)	Top-5 acc 47.266 (54.532)	lr 0.00082
Warmup Train [37][2370/3239]	Time 0.225 (0.233)	Data 0.001 (0.011)	Loss 4.0181 (4.0069)	Top-1 acc 30.078 (31.090)	Top-5 acc 52.734 (54.529)	lr 0.00082
Warmup Train [37][2380/3239]	Time 0.163 (0.233)	Data 0.001 (0.011)	Loss 4.0886 (4.0070)	Top-1 acc 32.031 (31.090)	Top-5 acc 52.344 (54.527)	lr 0.00082
Warmup Train [37][2390/3239]	Time 0.192 (0.233)	Data 0.001 (0.011)	Loss 3.9305 (4.0070)	Top-1 acc 32.812 (31.090)	Top-5 acc 55.078 (54.525)	lr 0.00082
Warmup Train [37][2400/3239]	Time 0.240 (0.233)	Data 0.002 (0.011)	Loss 3.9503 (4.0066)	Top-1 acc 32.031 (31.096)	Top-5 acc 53.906 (54.530)	lr 0.00081
Warmup Train [37][2410/3239]	Time 0.243 (0.233)	Data 0.001 (0.011)	Loss 4.0620 (4.0064)	Top-1 acc 29.688 (31.101)	Top-5 acc 58.203 (54.538)	lr 0.00081
Warmup Train [37][2420/3239]	Time 0.145 (0.233)	Data 0.001 (0.011)	Loss 4.1092 (4.0062)	Top-1 acc 32.812 (31.102)	Top-5 acc 54.297 (54.544)	lr 0.00081
Warmup Train [37][2430/3239]	Time 0.252 (0.233)	Data 0.002 (0.011)	Loss 4.0985 (4.0061)	Top-1 acc 32.031 (31.108)	Top-5 acc 56.250 (54.549)	lr 0.00081
Warmup Train [37][2440/3239]	Time 0.204 (0.233)	Data 0.001 (0.011)	Loss 4.0056 (4.0060)	Top-1 acc 32.031 (31.115)	Top-5 acc 52.734 (54.554)	lr 0.00081
Warmup Train [37][2450/3239]	Time 0.183 (0.233)	Data 0.001 (0.011)	Loss 3.9199 (4.0059)	Top-1 acc 31.250 (31.115)	Top-5 acc 55.469 (54.557)	lr 0.00081
Warmup Train [37][2460/3239]	Time 0.219 (0.233)	Data 0.002 (0.011)	Loss 3.8211 (4.0055)	Top-1 acc 34.375 (31.119)	Top-5 acc 60.938 (54.569)	lr 0.00081
Warmup Train [37][2470/3239]	Time 0.211 (0.233)	Data 0.001 (0.011)	Loss 4.0882 (4.0056)	Top-1 acc 29.688 (31.116)	Top-5 acc 51.172 (54.567)	lr 0.00080
Warmup Train [37][2480/3239]	Time 0.196 (0.233)	Data 0.001 (0.010)	Loss 4.0331 (4.0058)	Top-1 acc 26.953 (31.114)	Top-5 acc 52.344 (54.561)	lr 0.00080
Warmup Train [37][2490/3239]	Time 0.184 (0.233)	Data 0.001 (0.010)	Loss 4.1446 (4.0057)	Top-1 acc 25.781 (31.112)	Top-5 acc 50.000 (54.559)	lr 0.00080
Warmup Train [37][2500/3239]	Time 0.218 (0.233)	Data 0.001 (0.010)	Loss 3.9366 (4.0058)	Top-1 acc 28.906 (31.107)	Top-5 acc 55.859 (54.557)	lr 0.00080
Warmup Train [37][2510/3239]	Time 0.166 (0.233)	Data 0.002 (0.010)	Loss 4.0850 (4.0059)	Top-1 acc 35.547 (31.108)	Top-5 acc 53.906 (54.553)	lr 0.00080
Warmup Train [37][2520/3239]	Time 0.328 (0.233)	Data 0.001 (0.010)	Loss 3.8174 (4.0058)	Top-1 acc 35.547 (31.114)	Top-5 acc 60.938 (54.559)	lr 0.00080
Warmup Train [37][2530/3239]	Time 0.134 (0.233)	Data 0.001 (0.010)	Loss 3.9707 (4.0060)	Top-1 acc 30.859 (31.108)	Top-5 acc 53.516 (54.555)	lr 0.00079
Warmup Train [37][2540/3239]	Time 0.243 (0.233)	Data 0.001 (0.010)	Loss 3.9376 (4.0061)	Top-1 acc 30.078 (31.104)	Top-5 acc 54.688 (54.550)	lr 0.00079
Warmup Train [37][2550/3239]	Time 0.172 (0.233)	Data 0.002 (0.010)	Loss 4.0822 (4.0061)	Top-1 acc 31.250 (31.104)	Top-5 acc 51.562 (54.547)	lr 0.00079
Warmup Train [37][2560/3239]	Time 0.261 (0.233)	Data 0.001 (0.010)	Loss 4.1848 (4.0059)	Top-1 acc 25.000 (31.105)	Top-5 acc 54.297 (54.552)	lr 0.00079
Warmup Train [37][2570/3239]	Time 0.183 (0.233)	Data 0.001 (0.010)	Loss 4.0279 (4.0060)	Top-1 acc 28.516 (31.105)	Top-5 acc 56.641 (54.550)	lr 0.00079
Warmup Train [37][2580/3239]	Time 0.198 (0.232)	Data 0.002 (0.010)	Loss 4.1785 (4.0062)	Top-1 acc 26.562 (31.102)	Top-5 acc 48.047 (54.545)	lr 0.00079
Warmup Train [37][2590/3239]	Time 0.223 (0.232)	Data 0.001 (0.010)	Loss 3.9455 (4.0063)	Top-1 acc 29.297 (31.100)	Top-5 acc 55.859 (54.545)	lr 0.00079
Warmup Train [37][2600/3239]	Time 0.258 (0.232)	Data 0.001 (0.010)	Loss 4.0533 (4.0063)	Top-1 acc 28.516 (31.104)	Top-5 acc 52.344 (54.547)	lr 0.00078
Warmup Train [37][2610/3239]	Time 0.231 (0.232)	Data 0.002 (0.010)	Loss 3.8670 (4.0062)	Top-1 acc 33.594 (31.101)	Top-5 acc 55.859 (54.551)	lr 0.00078
Warmup Train [37][2620/3239]	Time 0.168 (0.232)	Data 0.001 (0.010)	Loss 4.1597 (4.0063)	Top-1 acc 30.078 (31.100)	Top-5 acc 51.562 (54.551)	lr 0.00078
Warmup Train [37][2630/3239]	Time 0.302 (0.232)	Data 0.001 (0.010)	Loss 4.0460 (4.0064)	Top-1 acc 28.516 (31.097)	Top-5 acc 55.469 (54.546)	lr 0.00078
Warmup Train [37][2640/3239]	Time 0.347 (0.232)	Data 0.001 (0.010)	Loss 3.8319 (4.0060)	Top-1 acc 35.547 (31.103)	Top-5 acc 55.859 (54.558)	lr 0.00078
Warmup Train [37][2650/3239]	Time 0.214 (0.232)	Data 0.001 (0.010)	Loss 4.0420 (4.0059)	Top-1 acc 35.547 (31.107)	Top-5 acc 52.734 (54.560)	lr 0.00078
Warmup Train [37][2660/3239]	Time 0.143 (0.232)	Data 0.001 (0.010)	Loss 4.3057 (4.0061)	Top-1 acc 26.172 (31.102)	Top-5 acc 46.875 (54.553)	lr 0.00078
Warmup Train [37][2670/3239]	Time 0.196 (0.232)	Data 0.001 (0.010)	Loss 4.0971 (4.0061)	Top-1 acc 28.516 (31.098)	Top-5 acc 51.562 (54.553)	lr 0.00077
Warmup Train [37][2680/3239]	Time 0.194 (0.232)	Data 0.001 (0.010)	Loss 3.9015 (4.0061)	Top-1 acc 34.375 (31.097)	Top-5 acc 56.250 (54.555)	lr 0.00077
Warmup Train [37][2690/3239]	Time 0.175 (0.232)	Data 0.001 (0.010)	Loss 3.9905 (4.0062)	Top-1 acc 29.297 (31.095)	Top-5 acc 51.562 (54.553)	lr 0.00077
Warmup Train [37][2700/3239]	Time 0.229 (0.232)	Data 0.001 (0.010)	Loss 3.9079 (4.0061)	Top-1 acc 33.203 (31.096)	Top-5 acc 58.594 (54.555)	lr 0.00077
Warmup Train [37][2710/3239]	Time 0.188 (0.232)	Data 0.001 (0.010)	Loss 4.2317 (4.0062)	Top-1 acc 26.953 (31.096)	Top-5 acc 46.484 (54.555)	lr 0.00077
Warmup Train [37][2720/3239]	Time 0.251 (0.232)	Data 0.001 (0.010)	Loss 3.9500 (4.0062)	Top-1 acc 32.422 (31.095)	Top-5 acc 52.734 (54.557)	lr 0.00077
Warmup Train [37][2730/3239]	Time 0.226 (0.232)	Data 0.001 (0.010)	Loss 3.9641 (4.0062)	Top-1 acc 28.516 (31.092)	Top-5 acc 58.203 (54.554)	lr 0.00076
Warmup Train [37][2740/3239]	Time 0.329 (0.232)	Data 0.002 (0.010)	Loss 3.8032 (4.0061)	Top-1 acc 33.203 (31.093)	Top-5 acc 59.766 (54.555)	lr 0.00076
Warmup Train [37][2750/3239]	Time 0.251 (0.232)	Data 0.001 (0.010)	Loss 3.9785 (4.0063)	Top-1 acc 34.766 (31.090)	Top-5 acc 57.812 (54.552)	lr 0.00076
Warmup Train [37][2760/3239]	Time 0.213 (0.232)	Data 0.001 (0.010)	Loss 3.9663 (4.0063)	Top-1 acc 30.469 (31.091)	Top-5 acc 53.125 (54.551)	lr 0.00076
Warmup Train [37][2770/3239]	Time 0.229 (0.232)	Data 0.001 (0.010)	Loss 4.1371 (4.0064)	Top-1 acc 24.609 (31.087)	Top-5 acc 52.344 (54.545)	lr 0.00076
Warmup Train [37][2780/3239]	Time 0.261 (0.232)	Data 0.001 (0.010)	Loss 4.1213 (4.0065)	Top-1 acc 30.859 (31.083)	Top-5 acc 53.125 (54.541)	lr 0.00076
Warmup Train [37][2790/3239]	Time 0.217 (0.232)	Data 0.001 (0.010)	Loss 3.9816 (4.0064)	Top-1 acc 34.375 (31.086)	Top-5 acc 54.297 (54.541)	lr 0.00076
Warmup Train [37][2800/3239]	Time 0.269 (0.232)	Data 0.001 (0.010)	Loss 3.8839 (4.0062)	Top-1 acc 33.594 (31.088)	Top-5 acc 58.594 (54.545)	lr 0.00075
Warmup Train [37][2810/3239]	Time 0.201 (0.232)	Data 0.002 (0.010)	Loss 4.0135 (4.0062)	Top-1 acc 26.953 (31.088)	Top-5 acc 54.688 (54.545)	lr 0.00075
Warmup Train [37][2820/3239]	Time 0.215 (0.232)	Data 0.001 (0.010)	Loss 3.9647 (4.0061)	Top-1 acc 32.812 (31.086)	Top-5 acc 48.828 (54.543)	lr 0.00075
Warmup Train [37][2830/3239]	Time 0.245 (0.231)	Data 0.001 (0.010)	Loss 4.0166 (4.0064)	Top-1 acc 29.688 (31.081)	Top-5 acc 51.953 (54.541)	lr 0.00075
Warmup Train [37][2840/3239]	Time 0.198 (0.231)	Data 0.002 (0.010)	Loss 4.0723 (4.0063)	Top-1 acc 29.688 (31.083)	Top-5 acc 49.609 (54.542)	lr 0.00075
Warmup Train [37][2850/3239]	Time 0.309 (0.231)	Data 0.001 (0.010)	Loss 4.2422 (4.0063)	Top-1 acc 28.516 (31.086)	Top-5 acc 48.828 (54.544)	lr 0.00075
Warmup Train [37][2860/3239]	Time 0.315 (0.231)	Data 0.001 (0.010)	Loss 3.9475 (4.0065)	Top-1 acc 33.594 (31.086)	Top-5 acc 54.297 (54.535)	lr 0.00075
Warmup Train [37][2870/3239]	Time 0.179 (0.231)	Data 0.001 (0.010)	Loss 4.0199 (4.0066)	Top-1 acc 29.688 (31.085)	Top-5 acc 55.078 (54.528)	lr 0.00074
Warmup Train [37][2880/3239]	Time 0.236 (0.231)	Data 0.001 (0.010)	Loss 3.9092 (4.0064)	Top-1 acc 32.812 (31.087)	Top-5 acc 55.469 (54.533)	lr 0.00074
Warmup Train [37][2890/3239]	Time 0.243 (0.231)	Data 0.001 (0.009)	Loss 3.9549 (4.0066)	Top-1 acc 28.906 (31.085)	Top-5 acc 57.031 (54.529)	lr 0.00074
Warmup Train [37][2900/3239]	Time 0.198 (0.231)	Data 0.001 (0.009)	Loss 4.0754 (4.0066)	Top-1 acc 27.734 (31.085)	Top-5 acc 51.953 (54.530)	lr 0.00074
Warmup Train [37][2910/3239]	Time 0.162 (0.231)	Data 0.002 (0.009)	Loss 4.3026 (4.0070)	Top-1 acc 28.516 (31.082)	Top-5 acc 48.438 (54.522)	lr 0.00074
Warmup Train [37][2920/3239]	Time 0.257 (0.231)	Data 0.001 (0.009)	Loss 3.9523 (4.0068)	Top-1 acc 35.938 (31.085)	Top-5 acc 53.906 (54.523)	lr 0.00074
Warmup Train [37][2930/3239]	Time 0.207 (0.231)	Data 0.001 (0.009)	Loss 4.0305 (4.0067)	Top-1 acc 26.953 (31.085)	Top-5 acc 55.078 (54.524)	lr 0.00074
Warmup Train [37][2940/3239]	Time 0.186 (0.231)	Data 0.001 (0.009)	Loss 3.9597 (4.0063)	Top-1 acc 34.375 (31.090)	Top-5 acc 55.859 (54.532)	lr 0.00073
Warmup Train [37][2950/3239]	Time 0.308 (0.231)	Data 0.001 (0.009)	Loss 3.9033 (4.0063)	Top-1 acc 33.203 (31.091)	Top-5 acc 56.641 (54.533)	lr 0.00073
Warmup Train [37][2960/3239]	Time 0.247 (0.231)	Data 0.001 (0.009)	Loss 3.9476 (4.0065)	Top-1 acc 30.859 (31.086)	Top-5 acc 55.078 (54.527)	lr 0.00073
Warmup Train [37][2970/3239]	Time 0.164 (0.231)	Data 0.001 (0.009)	Loss 4.0268 (4.0065)	Top-1 acc 29.688 (31.087)	Top-5 acc 51.953 (54.525)	lr 0.00073
Warmup Train [37][2980/3239]	Time 0.236 (0.231)	Data 0.001 (0.009)	Loss 3.9823 (4.0066)	Top-1 acc 31.641 (31.084)	Top-5 acc 55.469 (54.526)	lr 0.00073
Warmup Train [37][2990/3239]	Time 0.161 (0.231)	Data 0.001 (0.009)	Loss 3.8145 (4.0065)	Top-1 acc 35.938 (31.088)	Top-5 acc 61.328 (54.528)	lr 0.00073
Warmup Train [37][3000/3239]	Time 0.253 (0.231)	Data 0.001 (0.009)	Loss 4.0332 (4.0064)	Top-1 acc 30.078 (31.089)	Top-5 acc 57.812 (54.530)	lr 0.00073
Warmup Train [37][3010/3239]	Time 0.213 (0.231)	Data 0.001 (0.009)	Loss 4.2524 (4.0065)	Top-1 acc 29.297 (31.087)	Top-5 acc 47.656 (54.528)	lr 0.00072
Warmup Train [37][3020/3239]	Time 0.236 (0.231)	Data 0.001 (0.009)	Loss 3.9791 (4.0065)	Top-1 acc 32.422 (31.087)	Top-5 acc 56.641 (54.530)	lr 0.00072
Warmup Train [37][3030/3239]	Time 0.133 (0.231)	Data 0.001 (0.009)	Loss 3.9782 (4.0065)	Top-1 acc 29.688 (31.086)	Top-5 acc 56.641 (54.530)	lr 0.00072
Warmup Train [37][3040/3239]	Time 0.223 (0.231)	Data 0.003 (0.009)	Loss 3.9944 (4.0063)	Top-1 acc 29.297 (31.087)	Top-5 acc 56.250 (54.536)	lr 0.00072
Warmup Train [37][3050/3239]	Time 0.196 (0.231)	Data 0.001 (0.009)	Loss 3.9455 (4.0065)	Top-1 acc 33.594 (31.083)	Top-5 acc 53.125 (54.531)	lr 0.00072
Warmup Train [37][3060/3239]	Time 0.281 (0.231)	Data 0.002 (0.009)	Loss 4.2908 (4.0065)	Top-1 acc 25.781 (31.079)	Top-5 acc 48.047 (54.531)	lr 0.00072
Warmup Train [37][3070/3239]	Time 0.324 (0.231)	Data 0.001 (0.009)	Loss 4.0544 (4.0064)	Top-1 acc 28.906 (31.078)	Top-5 acc 55.078 (54.535)	lr 0.00072
Warmup Train [37][3080/3239]	Time 0.179 (0.231)	Data 0.001 (0.009)	Loss 4.1161 (4.0064)	Top-1 acc 30.078 (31.079)	Top-5 acc 54.297 (54.536)	lr 0.00071
Warmup Train [37][3090/3239]	Time 0.190 (0.231)	Data 0.001 (0.009)	Loss 4.0306 (4.0065)	Top-1 acc 31.250 (31.077)	Top-5 acc 52.734 (54.531)	lr 0.00071
Warmup Train [37][3100/3239]	Time 0.254 (0.231)	Data 0.001 (0.009)	Loss 3.8936 (4.0066)	Top-1 acc 32.812 (31.077)	Top-5 acc 55.859 (54.533)	lr 0.00071
Warmup Train [37][3110/3239]	Time 0.192 (0.230)	Data 0.001 (0.009)	Loss 4.1311 (4.0065)	Top-1 acc 26.172 (31.077)	Top-5 acc 52.344 (54.533)	lr 0.00071
Warmup Train [37][3120/3239]	Time 0.268 (0.230)	Data 0.001 (0.009)	Loss 3.7248 (4.0062)	Top-1 acc 34.375 (31.080)	Top-5 acc 58.984 (54.539)	lr 0.00071
Warmup Train [37][3130/3239]	Time 0.231 (0.230)	Data 0.002 (0.009)	Loss 4.1495 (4.0062)	Top-1 acc 27.344 (31.080)	Top-5 acc 51.172 (54.539)	lr 0.00071
Warmup Train [37][3140/3239]	Time 0.245 (0.230)	Data 0.001 (0.009)	Loss 3.8322 (4.0059)	Top-1 acc 35.156 (31.086)	Top-5 acc 59.766 (54.545)	lr 0.00070
Warmup Train [37][3150/3239]	Time 0.319 (0.230)	Data 0.001 (0.009)	Loss 4.0148 (4.0059)	Top-1 acc 32.031 (31.087)	Top-5 acc 55.469 (54.544)	lr 0.00070
Warmup Train [37][3160/3239]	Time 0.308 (0.230)	Data 0.002 (0.009)	Loss 4.0546 (4.0059)	Top-1 acc 29.688 (31.089)	Top-5 acc 52.734 (54.542)	lr 0.00070
Warmup Train [37][3170/3239]	Time 0.223 (0.230)	Data 0.001 (0.009)	Loss 4.0160 (4.0060)	Top-1 acc 31.641 (31.086)	Top-5 acc 53.125 (54.536)	lr 0.00070
Warmup Train [37][3180/3239]	Time 0.221 (0.230)	Data 0.000 (0.009)	Loss 4.0245 (4.0059)	Top-1 acc 23.828 (31.083)	Top-5 acc 53.516 (54.540)	lr 0.00070
Warmup Train [37][3190/3239]	Time 0.173 (0.230)	Data 0.000 (0.009)	Loss 4.0727 (4.0061)	Top-1 acc 27.344 (31.082)	Top-5 acc 51.172 (54.534)	lr 0.00070
Warmup Train [37][3200/3239]	Time 0.214 (0.230)	Data 0.000 (0.009)	Loss 3.7569 (4.0061)	Top-1 acc 37.109 (31.082)	Top-5 acc 62.109 (54.532)	lr 0.00070
Warmup Train [37][3210/3239]	Time 0.181 (0.230)	Data 0.000 (0.009)	Loss 3.9584 (4.0061)	Top-1 acc 33.594 (31.083)	Top-5 acc 54.688 (54.532)	lr 0.00070
Warmup Train [37][3220/3239]	Time 0.213 (0.230)	Data 0.000 (0.009)	Loss 3.9578 (4.0059)	Top-1 acc 30.859 (31.087)	Top-5 acc 55.078 (54.536)	lr 0.00069
Warmup Train [37][3230/3239]	Time 0.222 (0.230)	Data 0.000 (0.009)	Loss 4.0072 (4.0059)	Top-1 acc 28.906 (31.085)	Top-5 acc 52.344 (54.536)	lr 0.00069
Warmup Train [37][3239/3239]	Time 0.150 (0.230)	Data 0.000 (0.009)	Loss 4.0969 (4.0058)	Top-1 acc 25.926 (31.087)	Top-5 acc 50.617 (54.537)	lr 0.00069
==========Warmup Valid [37/40]	loss 2.946	top-1 acc 38.561	top-5 acc 63.252	Train top-1 31.087	top-5 54.537	flops: 442.4M
Warmup Train [38][0/3239]	Time 18.634 (18.634)	Data 16.903 (16.903)	Loss 3.9904 (3.9904)	Top-1 acc 30.078 (30.078)	Top-5 acc 54.297 (54.297)	lr 0.00069
Warmup Train [38][10/3239]	Time 0.302 (2.097)	Data 0.001 (1.645)	Loss 3.8494 (4.0261)	Top-1 acc 34.766 (30.504)	Top-5 acc 60.547 (54.474)	lr 0.00069
Warmup Train [38][20/3239]	Time 0.200 (1.215)	Data 0.001 (0.863)	Loss 3.7612 (4.0152)	Top-1 acc 36.328 (31.548)	Top-5 acc 58.203 (54.725)	lr 0.00069
Warmup Train [38][30/3239]	Time 0.189 (0.893)	Data 0.002 (0.585)	Loss 3.9567 (4.0152)	Top-1 acc 30.469 (31.641)	Top-5 acc 57.422 (54.688)	lr 0.00069
Warmup Train [38][40/3239]	Time 0.201 (0.734)	Data 0.001 (0.444)	Loss 3.7170 (4.0316)	Top-1 acc 35.547 (31.212)	Top-5 acc 62.500 (54.087)	lr 0.00069
Warmup Train [38][50/3239]	Time 0.222 (0.634)	Data 0.002 (0.357)	Loss 3.9562 (4.0271)	Top-1 acc 32.031 (31.066)	Top-5 acc 55.469 (53.891)	lr 0.00068
Warmup Train [38][60/3239]	Time 0.400 (0.570)	Data 0.001 (0.299)	Loss 3.9454 (4.0288)	Top-1 acc 32.422 (30.943)	Top-5 acc 57.812 (53.938)	lr 0.00068
Warmup Train [38][70/3239]	Time 0.153 (0.522)	Data 0.001 (0.257)	Loss 4.0713 (4.0262)	Top-1 acc 29.688 (30.914)	Top-5 acc 54.688 (53.939)	lr 0.00068
Warmup Train [38][80/3239]	Time 0.180 (0.486)	Data 0.001 (0.226)	Loss 3.9321 (4.0212)	Top-1 acc 33.594 (31.019)	Top-5 acc 56.250 (54.027)	lr 0.00068
Warmup Train [38][90/3239]	Time 0.286 (0.457)	Data 0.001 (0.201)	Loss 4.0274 (4.0265)	Top-1 acc 31.641 (30.988)	Top-5 acc 56.250 (53.962)	lr 0.00068
Warmup Train [38][100/3239]	Time 0.194 (0.436)	Data 0.002 (0.181)	Loss 4.0965 (4.0299)	Top-1 acc 28.906 (30.956)	Top-5 acc 52.344 (53.891)	lr 0.00068
Warmup Train [38][110/3239]	Time 0.296 (0.418)	Data 0.001 (0.165)	Loss 3.9607 (4.0262)	Top-1 acc 30.859 (31.007)	Top-5 acc 55.859 (53.980)	lr 0.00068
Warmup Train [38][120/3239]	Time 0.245 (0.402)	Data 0.001 (0.152)	Loss 4.2145 (4.0265)	Top-1 acc 25.000 (30.917)	Top-5 acc 48.047 (53.932)	lr 0.00067
Warmup Train [38][130/3239]	Time 0.236 (0.388)	Data 0.001 (0.140)	Loss 3.9222 (4.0270)	Top-1 acc 32.031 (30.925)	Top-5 acc 57.422 (53.957)	lr 0.00067
Warmup Train [38][140/3239]	Time 0.179 (0.377)	Data 0.001 (0.130)	Loss 3.8782 (4.0274)	Top-1 acc 31.641 (30.937)	Top-5 acc 55.859 (53.892)	lr 0.00067
Warmup Train [38][150/3239]	Time 0.265 (0.367)	Data 0.001 (0.122)	Loss 3.9598 (4.0215)	Top-1 acc 31.641 (31.069)	Top-5 acc 57.031 (54.005)	lr 0.00067
Warmup Train [38][160/3239]	Time 0.231 (0.358)	Data 0.001 (0.115)	Loss 3.7034 (4.0224)	Top-1 acc 38.281 (31.039)	Top-5 acc 64.844 (54.025)	lr 0.00067
Warmup Train [38][170/3239]	Time 0.349 (0.351)	Data 0.001 (0.108)	Loss 3.9813 (4.0218)	Top-1 acc 31.250 (31.028)	Top-5 acc 57.812 (54.059)	lr 0.00067
Warmup Train [38][180/3239]	Time 0.243 (0.343)	Data 0.001 (0.102)	Loss 3.8257 (4.0209)	Top-1 acc 37.109 (31.010)	Top-5 acc 60.156 (54.103)	lr 0.00067
Warmup Train [38][190/3239]	Time 0.216 (0.336)	Data 0.001 (0.097)	Loss 4.2092 (4.0238)	Top-1 acc 26.953 (30.923)	Top-5 acc 48.828 (54.060)	lr 0.00066
Warmup Train [38][200/3239]	Time 0.215 (0.331)	Data 0.002 (0.092)	Loss 3.9906 (4.0218)	Top-1 acc 30.859 (30.972)	Top-5 acc 54.688 (54.147)	lr 0.00066
Warmup Train [38][210/3239]	Time 0.225 (0.326)	Data 0.002 (0.088)	Loss 4.0500 (4.0223)	Top-1 acc 31.641 (30.995)	Top-5 acc 56.641 (54.115)	lr 0.00066
Warmup Train [38][220/3239]	Time 0.195 (0.321)	Data 0.001 (0.084)	Loss 3.9327 (4.0212)	Top-1 acc 29.688 (30.950)	Top-5 acc 57.422 (54.124)	lr 0.00066
Warmup Train [38][230/3239]	Time 0.268 (0.316)	Data 0.001 (0.081)	Loss 4.2850 (4.0235)	Top-1 acc 25.781 (30.890)	Top-5 acc 49.609 (54.064)	lr 0.00066
Warmup Train [38][240/3239]	Time 0.186 (0.312)	Data 0.001 (0.077)	Loss 4.0344 (4.0246)	Top-1 acc 32.031 (30.845)	Top-5 acc 54.297 (54.057)	lr 0.00066
Warmup Train [38][250/3239]	Time 0.199 (0.309)	Data 0.001 (0.074)	Loss 3.8585 (4.0201)	Top-1 acc 35.156 (30.939)	Top-5 acc 57.812 (54.191)	lr 0.00066
Warmup Train [38][260/3239]	Time 0.246 (0.305)	Data 0.001 (0.072)	Loss 3.9584 (4.0191)	Top-1 acc 29.297 (30.946)	Top-5 acc 53.125 (54.203)	lr 0.00065
Warmup Train [38][270/3239]	Time 0.308 (0.302)	Data 0.002 (0.069)	Loss 3.7880 (4.0187)	Top-1 acc 37.500 (30.960)	Top-5 acc 58.594 (54.220)	lr 0.00065
Warmup Train [38][280/3239]	Time 0.305 (0.300)	Data 0.001 (0.067)	Loss 3.9430 (4.0181)	Top-1 acc 34.375 (30.983)	Top-5 acc 58.203 (54.252)	lr 0.00065
Warmup Train [38][290/3239]	Time 0.179 (0.297)	Data 0.001 (0.064)	Loss 3.9476 (4.0159)	Top-1 acc 30.469 (31.011)	Top-5 acc 55.078 (54.290)	lr 0.00065
Warmup Train [38][300/3239]	Time 0.166 (0.294)	Data 0.001 (0.062)	Loss 3.8825 (4.0155)	Top-1 acc 34.766 (31.003)	Top-5 acc 54.688 (54.303)	lr 0.00065
Warmup Train [38][310/3239]	Time 0.266 (0.292)	Data 0.001 (0.061)	Loss 3.9861 (4.0156)	Top-1 acc 31.641 (31.030)	Top-5 acc 55.469 (54.325)	lr 0.00065
Warmup Train [38][320/3239]	Time 0.235 (0.290)	Data 0.001 (0.059)	Loss 4.0009 (4.0161)	Top-1 acc 33.594 (31.033)	Top-5 acc 53.125 (54.313)	lr 0.00065
Warmup Train [38][330/3239]	Time 0.181 (0.288)	Data 0.001 (0.057)	Loss 4.1328 (4.0174)	Top-1 acc 25.391 (30.983)	Top-5 acc 49.609 (54.276)	lr 0.00064
Warmup Train [38][340/3239]	Time 0.172 (0.285)	Data 0.001 (0.055)	Loss 3.9641 (4.0184)	Top-1 acc 30.469 (30.967)	Top-5 acc 58.594 (54.251)	lr 0.00064
Warmup Train [38][350/3239]	Time 0.220 (0.284)	Data 0.001 (0.054)	Loss 3.9311 (4.0170)	Top-1 acc 33.203 (30.977)	Top-5 acc 55.469 (54.259)	lr 0.00064
Warmup Train [38][360/3239]	Time 0.200 (0.282)	Data 0.001 (0.053)	Loss 3.9370 (4.0168)	Top-1 acc 33.594 (30.992)	Top-5 acc 54.688 (54.260)	lr 0.00064
Warmup Train [38][370/3239]	Time 0.224 (0.281)	Data 0.002 (0.051)	Loss 4.1136 (4.0176)	Top-1 acc 30.078 (30.969)	Top-5 acc 53.516 (54.256)	lr 0.00064
Warmup Train [38][380/3239]	Time 0.152 (0.280)	Data 0.002 (0.050)	Loss 4.0279 (4.0174)	Top-1 acc 31.250 (30.980)	Top-5 acc 53.516 (54.268)	lr 0.00064
Warmup Train [38][390/3239]	Time 0.314 (0.278)	Data 0.002 (0.049)	Loss 3.9086 (4.0154)	Top-1 acc 34.375 (31.030)	Top-5 acc 55.859 (54.308)	lr 0.00064
Warmup Train [38][400/3239]	Time 0.204 (0.277)	Data 0.001 (0.048)	Loss 3.9762 (4.0151)	Top-1 acc 30.859 (31.020)	Top-5 acc 55.078 (54.304)	lr 0.00064
Warmup Train [38][410/3239]	Time 0.230 (0.276)	Data 0.001 (0.047)	Loss 4.1408 (4.0147)	Top-1 acc 29.297 (31.054)	Top-5 acc 49.219 (54.305)	lr 0.00063
Warmup Train [38][420/3239]	Time 0.242 (0.274)	Data 0.001 (0.045)	Loss 3.8828 (4.0141)	Top-1 acc 33.594 (31.057)	Top-5 acc 59.766 (54.331)	lr 0.00063
Warmup Train [38][430/3239]	Time 0.221 (0.273)	Data 0.001 (0.044)	Loss 3.9064 (4.0142)	Top-1 acc 33.594 (31.051)	Top-5 acc 55.859 (54.330)	lr 0.00063
Warmup Train [38][440/3239]	Time 0.184 (0.271)	Data 0.001 (0.044)	Loss 3.9399 (4.0127)	Top-1 acc 32.422 (31.076)	Top-5 acc 53.906 (54.354)	lr 0.00063
Warmup Train [38][450/3239]	Time 0.172 (0.270)	Data 0.001 (0.043)	Loss 4.0245 (4.0121)	Top-1 acc 31.250 (31.077)	Top-5 acc 52.344 (54.370)	lr 0.00063
Warmup Train [38][460/3239]	Time 0.184 (0.269)	Data 0.001 (0.042)	Loss 4.0352 (4.0116)	Top-1 acc 28.906 (31.066)	Top-5 acc 51.172 (54.378)	lr 0.00063
Warmup Train [38][470/3239]	Time 0.181 (0.268)	Data 0.001 (0.041)	Loss 3.9369 (4.0106)	Top-1 acc 32.031 (31.078)	Top-5 acc 55.859 (54.398)	lr 0.00063
Warmup Train [38][480/3239]	Time 0.224 (0.267)	Data 0.001 (0.040)	Loss 4.0561 (4.0112)	Top-1 acc 29.688 (31.045)	Top-5 acc 52.344 (54.383)	lr 0.00062
Warmup Train [38][490/3239]	Time 0.312 (0.267)	Data 0.001 (0.039)	Loss 4.0611 (4.0097)	Top-1 acc 31.641 (31.077)	Top-5 acc 51.953 (54.414)	lr 0.00062
Warmup Train [38][500/3239]	Time 0.221 (0.266)	Data 0.002 (0.039)	Loss 3.8778 (4.0098)	Top-1 acc 29.297 (31.052)	Top-5 acc 58.203 (54.405)	lr 0.00062
Warmup Train [38][510/3239]	Time 0.229 (0.266)	Data 0.001 (0.038)	Loss 4.1237 (4.0105)	Top-1 acc 29.297 (31.045)	Top-5 acc 52.344 (54.394)	lr 0.00062
Warmup Train [38][520/3239]	Time 0.242 (0.265)	Data 0.030 (0.037)	Loss 3.9458 (4.0098)	Top-1 acc 30.859 (31.054)	Top-5 acc 55.859 (54.412)	lr 0.00062
Warmup Train [38][530/3239]	Time 0.216 (0.264)	Data 0.001 (0.037)	Loss 4.1084 (4.0091)	Top-1 acc 28.125 (31.061)	Top-5 acc 52.734 (54.437)	lr 0.00062
Warmup Train [38][540/3239]	Time 0.245 (0.263)	Data 0.001 (0.036)	Loss 4.0862 (4.0083)	Top-1 acc 28.516 (31.064)	Top-5 acc 55.469 (54.472)	lr 0.00062
Warmup Train [38][550/3239]	Time 0.198 (0.262)	Data 0.001 (0.035)	Loss 4.0733 (4.0081)	Top-1 acc 30.469 (31.074)	Top-5 acc 52.344 (54.474)	lr 0.00062
Warmup Train [38][560/3239]	Time 0.313 (0.262)	Data 0.001 (0.035)	Loss 4.1409 (4.0082)	Top-1 acc 25.000 (31.081)	Top-5 acc 47.656 (54.456)	lr 0.00061
Warmup Train [38][570/3239]	Time 0.211 (0.261)	Data 0.001 (0.034)	Loss 4.1045 (4.0085)	Top-1 acc 31.641 (31.080)	Top-5 acc 51.562 (54.458)	lr 0.00061
Warmup Train [38][580/3239]	Time 0.306 (0.261)	Data 0.001 (0.034)	Loss 3.8305 (4.0074)	Top-1 acc 35.547 (31.095)	Top-5 acc 60.156 (54.483)	lr 0.00061
Warmup Train [38][590/3239]	Time 0.335 (0.260)	Data 0.001 (0.033)	Loss 4.1036 (4.0076)	Top-1 acc 26.172 (31.089)	Top-5 acc 53.125 (54.496)	lr 0.00061
Warmup Train [38][600/3239]	Time 0.191 (0.259)	Data 0.001 (0.033)	Loss 4.0747 (4.0069)	Top-1 acc 27.344 (31.070)	Top-5 acc 53.125 (54.519)	lr 0.00061
Warmup Train [38][610/3239]	Time 0.206 (0.259)	Data 0.001 (0.032)	Loss 3.9531 (4.0063)	Top-1 acc 31.250 (31.093)	Top-5 acc 59.375 (54.549)	lr 0.00061
Warmup Train [38][620/3239]	Time 0.224 (0.258)	Data 0.001 (0.032)	Loss 3.9025 (4.0072)	Top-1 acc 34.375 (31.086)	Top-5 acc 54.297 (54.525)	lr 0.00061
Warmup Train [38][630/3239]	Time 0.246 (0.257)	Data 0.001 (0.031)	Loss 4.0388 (4.0081)	Top-1 acc 30.078 (31.072)	Top-5 acc 55.469 (54.512)	lr 0.00060
Warmup Train [38][640/3239]	Time 0.150 (0.257)	Data 0.001 (0.031)	Loss 4.0444 (4.0076)	Top-1 acc 29.297 (31.073)	Top-5 acc 50.391 (54.516)	lr 0.00060
Warmup Train [38][650/3239]	Time 0.177 (0.256)	Data 0.002 (0.031)	Loss 4.0072 (4.0075)	Top-1 acc 26.562 (31.054)	Top-5 acc 53.906 (54.528)	lr 0.00060
Warmup Train [38][660/3239]	Time 0.226 (0.256)	Data 0.001 (0.030)	Loss 3.9854 (4.0067)	Top-1 acc 31.250 (31.061)	Top-5 acc 56.641 (54.548)	lr 0.00060
Warmup Train [38][670/3239]	Time 0.234 (0.255)	Data 0.001 (0.030)	Loss 4.0749 (4.0064)	Top-1 acc 32.812 (31.071)	Top-5 acc 52.344 (54.550)	lr 0.00060
Warmup Train [38][680/3239]	Time 0.190 (0.255)	Data 0.002 (0.029)	Loss 4.0362 (4.0061)	Top-1 acc 33.203 (31.073)	Top-5 acc 53.125 (54.562)	lr 0.00060
Warmup Train [38][690/3239]	Time 0.368 (0.255)	Data 0.003 (0.029)	Loss 3.9015 (4.0053)	Top-1 acc 33.203 (31.076)	Top-5 acc 58.984 (54.580)	lr 0.00060
Warmup Train [38][700/3239]	Time 0.220 (0.254)	Data 0.001 (0.029)	Loss 4.0045 (4.0048)	Top-1 acc 30.469 (31.100)	Top-5 acc 55.469 (54.600)	lr 0.00060
Warmup Train [38][710/3239]	Time 0.280 (0.254)	Data 0.001 (0.028)	Loss 3.9852 (4.0048)	Top-1 acc 28.125 (31.087)	Top-5 acc 50.000 (54.599)	lr 0.00059
Warmup Train [38][720/3239]	Time 0.184 (0.253)	Data 0.001 (0.028)	Loss 3.8084 (4.0043)	Top-1 acc 37.109 (31.087)	Top-5 acc 57.031 (54.600)	lr 0.00059
Warmup Train [38][730/3239]	Time 0.259 (0.253)	Data 0.001 (0.027)	Loss 3.9153 (4.0043)	Top-1 acc 30.078 (31.093)	Top-5 acc 57.812 (54.603)	lr 0.00059
Warmup Train [38][740/3239]	Time 0.187 (0.253)	Data 0.001 (0.027)	Loss 4.1165 (4.0047)	Top-1 acc 26.172 (31.088)	Top-5 acc 53.906 (54.592)	lr 0.00059
Warmup Train [38][750/3239]	Time 0.228 (0.253)	Data 0.001 (0.027)	Loss 3.9416 (4.0050)	Top-1 acc 33.984 (31.084)	Top-5 acc 54.688 (54.579)	lr 0.00059
Warmup Train [38][760/3239]	Time 0.191 (0.252)	Data 0.001 (0.027)	Loss 4.0487 (4.0045)	Top-1 acc 31.250 (31.102)	Top-5 acc 54.688 (54.593)	lr 0.00059
Warmup Train [38][770/3239]	Time 0.132 (0.252)	Data 0.002 (0.026)	Loss 4.0155 (4.0047)	Top-1 acc 30.078 (31.111)	Top-5 acc 51.172 (54.579)	lr 0.00059
Warmup Train [38][780/3239]	Time 0.219 (0.251)	Data 0.001 (0.026)	Loss 3.8821 (4.0040)	Top-1 acc 32.812 (31.127)	Top-5 acc 57.812 (54.601)	lr 0.00058
Warmup Train [38][790/3239]	Time 0.329 (0.251)	Data 0.001 (0.026)	Loss 4.1395 (4.0050)	Top-1 acc 27.344 (31.106)	Top-5 acc 55.859 (54.577)	lr 0.00058
Warmup Train [38][800/3239]	Time 0.213 (0.251)	Data 0.002 (0.025)	Loss 3.7511 (4.0047)	Top-1 acc 33.984 (31.097)	Top-5 acc 59.766 (54.564)	lr 0.00058
Warmup Train [38][810/3239]	Time 0.176 (0.250)	Data 0.002 (0.025)	Loss 3.8303 (4.0037)	Top-1 acc 34.766 (31.113)	Top-5 acc 56.641 (54.584)	lr 0.00058
Warmup Train [38][820/3239]	Time 0.176 (0.250)	Data 0.001 (0.025)	Loss 3.9705 (4.0034)	Top-1 acc 27.734 (31.099)	Top-5 acc 54.688 (54.585)	lr 0.00058
Warmup Train [38][830/3239]	Time 0.242 (0.250)	Data 0.002 (0.025)	Loss 4.1356 (4.0035)	Top-1 acc 31.641 (31.101)	Top-5 acc 53.125 (54.577)	lr 0.00058
Warmup Train [38][840/3239]	Time 0.232 (0.249)	Data 0.001 (0.024)	Loss 4.0514 (4.0030)	Top-1 acc 30.469 (31.119)	Top-5 acc 51.172 (54.593)	lr 0.00058
Warmup Train [38][850/3239]	Time 0.189 (0.249)	Data 0.002 (0.024)	Loss 3.9974 (4.0030)	Top-1 acc 30.859 (31.119)	Top-5 acc 54.688 (54.590)	lr 0.00058
Warmup Train [38][860/3239]	Time 0.225 (0.249)	Data 0.001 (0.024)	Loss 3.9459 (4.0033)	Top-1 acc 33.594 (31.123)	Top-5 acc 56.641 (54.578)	lr 0.00057
Warmup Train [38][870/3239]	Time 0.161 (0.248)	Data 0.001 (0.024)	Loss 3.7763 (4.0024)	Top-1 acc 35.156 (31.138)	Top-5 acc 58.984 (54.596)	lr 0.00057
Warmup Train [38][880/3239]	Time 0.142 (0.248)	Data 0.001 (0.023)	Loss 3.9920 (4.0026)	Top-1 acc 30.859 (31.141)	Top-5 acc 55.078 (54.595)	lr 0.00057
Warmup Train [38][890/3239]	Time 0.306 (0.248)	Data 0.002 (0.023)	Loss 4.0351 (4.0023)	Top-1 acc 29.297 (31.150)	Top-5 acc 54.297 (54.595)	lr 0.00057
Warmup Train [38][900/3239]	Time 0.298 (0.248)	Data 0.001 (0.023)	Loss 3.8656 (4.0014)	Top-1 acc 33.594 (31.179)	Top-5 acc 55.078 (54.620)	lr 0.00057
Warmup Train [38][910/3239]	Time 0.227 (0.248)	Data 0.002 (0.023)	Loss 4.0032 (4.0013)	Top-1 acc 30.078 (31.192)	Top-5 acc 55.469 (54.622)	lr 0.00057
Warmup Train [38][920/3239]	Time 0.219 (0.248)	Data 0.002 (0.022)	Loss 4.1008 (4.0015)	Top-1 acc 29.297 (31.180)	Top-5 acc 54.297 (54.614)	lr 0.00057
Warmup Train [38][930/3239]	Time 0.232 (0.247)	Data 0.001 (0.022)	Loss 3.9249 (4.0022)	Top-1 acc 30.859 (31.167)	Top-5 acc 57.812 (54.608)	lr 0.00057
Warmup Train [38][940/3239]	Time 0.219 (0.247)	Data 0.001 (0.022)	Loss 4.0247 (4.0029)	Top-1 acc 29.297 (31.152)	Top-5 acc 50.391 (54.590)	lr 0.00056
Warmup Train [38][950/3239]	Time 0.212 (0.247)	Data 0.001 (0.022)	Loss 3.8692 (4.0028)	Top-1 acc 37.500 (31.155)	Top-5 acc 61.719 (54.594)	lr 0.00056
Warmup Train [38][960/3239]	Time 0.229 (0.246)	Data 0.001 (0.022)	Loss 4.0474 (4.0032)	Top-1 acc 35.156 (31.146)	Top-5 acc 58.203 (54.582)	lr 0.00056
Warmup Train [38][970/3239]	Time 0.215 (0.246)	Data 0.001 (0.021)	Loss 3.8767 (4.0033)	Top-1 acc 32.031 (31.166)	Top-5 acc 58.203 (54.588)	lr 0.00056
Warmup Train [38][980/3239]	Time 0.356 (0.246)	Data 0.001 (0.021)	Loss 4.0699 (4.0033)	Top-1 acc 32.031 (31.165)	Top-5 acc 52.344 (54.580)	lr 0.00056
Warmup Train [38][990/3239]	Time 0.202 (0.246)	Data 0.001 (0.021)	Loss 3.8574 (4.0031)	Top-1 acc 34.766 (31.162)	Top-5 acc 58.203 (54.591)	lr 0.00056
Warmup Train [38][1000/3239]	Time 0.207 (0.246)	Data 0.001 (0.021)	Loss 4.0713 (4.0027)	Top-1 acc 28.125 (31.176)	Top-5 acc 52.344 (54.608)	lr 0.00056
Warmup Train [38][1010/3239]	Time 0.257 (0.245)	Data 0.001 (0.021)	Loss 4.0328 (4.0028)	Top-1 acc 26.953 (31.172)	Top-5 acc 50.391 (54.609)	lr 0.00056
Warmup Train [38][1020/3239]	Time 0.230 (0.245)	Data 0.002 (0.020)	Loss 3.9272 (4.0025)	Top-1 acc 32.031 (31.182)	Top-5 acc 57.812 (54.608)	lr 0.00055
Warmup Train [38][1030/3239]	Time 0.214 (0.245)	Data 0.001 (0.020)	Loss 3.9836 (4.0026)	Top-1 acc 30.859 (31.184)	Top-5 acc 53.906 (54.602)	lr 0.00055
Warmup Train [38][1040/3239]	Time 0.266 (0.245)	Data 0.001 (0.020)	Loss 4.3047 (4.0031)	Top-1 acc 26.172 (31.177)	Top-5 acc 51.562 (54.593)	lr 0.00055
Warmup Train [38][1050/3239]	Time 0.213 (0.245)	Data 0.002 (0.020)	Loss 4.0303 (4.0038)	Top-1 acc 34.766 (31.169)	Top-5 acc 57.422 (54.576)	lr 0.00055
Warmup Train [38][1060/3239]	Time 0.166 (0.244)	Data 0.002 (0.020)	Loss 4.0191 (4.0043)	Top-1 acc 32.812 (31.164)	Top-5 acc 53.125 (54.562)	lr 0.00055
Warmup Train [38][1070/3239]	Time 0.194 (0.244)	Data 0.001 (0.020)	Loss 3.9627 (4.0044)	Top-1 acc 32.422 (31.166)	Top-5 acc 54.688 (54.557)	lr 0.00055
Warmup Train [38][1080/3239]	Time 0.316 (0.244)	Data 0.001 (0.019)	Loss 3.8697 (4.0040)	Top-1 acc 35.156 (31.174)	Top-5 acc 54.297 (54.563)	lr 0.00055
Warmup Train [38][1090/3239]	Time 0.146 (0.244)	Data 0.001 (0.019)	Loss 4.0881 (4.0040)	Top-1 acc 29.688 (31.172)	Top-5 acc 52.344 (54.556)	lr 0.00055
Warmup Train [38][1100/3239]	Time 0.165 (0.244)	Data 0.001 (0.019)	Loss 4.1140 (4.0039)	Top-1 acc 30.859 (31.181)	Top-5 acc 54.297 (54.565)	lr 0.00054
Warmup Train [38][1110/3239]	Time 0.212 (0.244)	Data 0.001 (0.019)	Loss 4.1823 (4.0042)	Top-1 acc 26.562 (31.174)	Top-5 acc 48.828 (54.549)	lr 0.00054
Warmup Train [38][1120/3239]	Time 0.174 (0.243)	Data 0.001 (0.019)	Loss 3.7999 (4.0042)	Top-1 acc 32.812 (31.174)	Top-5 acc 58.594 (54.551)	lr 0.00054
Warmup Train [38][1130/3239]	Time 0.190 (0.243)	Data 0.001 (0.019)	Loss 3.8186 (4.0038)	Top-1 acc 36.719 (31.175)	Top-5 acc 60.547 (54.561)	lr 0.00054
Warmup Train [38][1140/3239]	Time 0.266 (0.243)	Data 0.001 (0.019)	Loss 3.7865 (4.0041)	Top-1 acc 32.031 (31.175)	Top-5 acc 58.984 (54.563)	lr 0.00054
Warmup Train [38][1150/3239]	Time 0.197 (0.243)	Data 0.001 (0.018)	Loss 4.0318 (4.0047)	Top-1 acc 27.734 (31.163)	Top-5 acc 54.297 (54.547)	lr 0.00054
Warmup Train [38][1160/3239]	Time 0.159 (0.243)	Data 0.002 (0.018)	Loss 4.0740 (4.0045)	Top-1 acc 24.609 (31.157)	Top-5 acc 50.391 (54.538)	lr 0.00054
Warmup Train [38][1170/3239]	Time 0.248 (0.243)	Data 0.001 (0.018)	Loss 4.0468 (4.0042)	Top-1 acc 35.547 (31.173)	Top-5 acc 52.734 (54.549)	lr 0.00054
Warmup Train [38][1180/3239]	Time 0.326 (0.243)	Data 0.001 (0.018)	Loss 3.9816 (4.0040)	Top-1 acc 33.594 (31.176)	Top-5 acc 55.859 (54.554)	lr 0.00053
Warmup Train [38][1190/3239]	Time 0.233 (0.243)	Data 0.001 (0.018)	Loss 3.6014 (4.0032)	Top-1 acc 39.453 (31.190)	Top-5 acc 64.844 (54.569)	lr 0.00053
Warmup Train [38][1200/3239]	Time 0.154 (0.242)	Data 0.001 (0.018)	Loss 3.9000 (4.0032)	Top-1 acc 32.812 (31.188)	Top-5 acc 58.594 (54.566)	lr 0.00053
Warmup Train [38][1210/3239]	Time 0.200 (0.242)	Data 0.001 (0.018)	Loss 3.9247 (4.0031)	Top-1 acc 34.766 (31.188)	Top-5 acc 58.203 (54.579)	lr 0.00053
Warmup Train [38][1220/3239]	Time 0.241 (0.242)	Data 0.001 (0.018)	Loss 3.9856 (4.0031)	Top-1 acc 31.641 (31.190)	Top-5 acc 53.516 (54.580)	lr 0.00053
Warmup Train [38][1230/3239]	Time 0.282 (0.242)	Data 0.001 (0.017)	Loss 4.0649 (4.0030)	Top-1 acc 29.688 (31.186)	Top-5 acc 53.125 (54.581)	lr 0.00053
Warmup Train [38][1240/3239]	Time 0.150 (0.242)	Data 0.001 (0.017)	Loss 3.9042 (4.0031)	Top-1 acc 32.812 (31.179)	Top-5 acc 53.906 (54.580)	lr 0.00053
Warmup Train [38][1250/3239]	Time 0.280 (0.242)	Data 0.030 (0.017)	Loss 4.3353 (4.0031)	Top-1 acc 26.172 (31.176)	Top-5 acc 47.656 (54.583)	lr 0.00053
Warmup Train [38][1260/3239]	Time 0.218 (0.242)	Data 0.001 (0.017)	Loss 3.8764 (4.0029)	Top-1 acc 36.328 (31.179)	Top-5 acc 62.109 (54.595)	lr 0.00052
Warmup Train [38][1270/3239]	Time 0.370 (0.241)	Data 0.001 (0.017)	Loss 4.0648 (4.0026)	Top-1 acc 28.906 (31.184)	Top-5 acc 50.391 (54.606)	lr 0.00052
Warmup Train [38][1280/3239]	Time 0.204 (0.241)	Data 0.001 (0.017)	Loss 4.1667 (4.0024)	Top-1 acc 27.344 (31.189)	Top-5 acc 51.172 (54.609)	lr 0.00052
Warmup Train [38][1290/3239]	Time 0.185 (0.241)	Data 0.001 (0.017)	Loss 4.0277 (4.0028)	Top-1 acc 33.594 (31.191)	Top-5 acc 55.078 (54.603)	lr 0.00052
Warmup Train [38][1300/3239]	Time 0.234 (0.241)	Data 0.001 (0.017)	Loss 3.6385 (4.0025)	Top-1 acc 41.406 (31.198)	Top-5 acc 58.984 (54.602)	lr 0.00052
Warmup Train [38][1310/3239]	Time 0.215 (0.241)	Data 0.002 (0.017)	Loss 4.0027 (4.0026)	Top-1 acc 31.641 (31.196)	Top-5 acc 51.953 (54.602)	lr 0.00052
Warmup Train [38][1320/3239]	Time 0.181 (0.241)	Data 0.002 (0.016)	Loss 3.9629 (4.0025)	Top-1 acc 26.953 (31.195)	Top-5 acc 59.375 (54.604)	lr 0.00052
Warmup Train [38][1330/3239]	Time 0.184 (0.241)	Data 0.002 (0.016)	Loss 4.2068 (4.0026)	Top-1 acc 25.781 (31.194)	Top-5 acc 51.562 (54.604)	lr 0.00052
Warmup Train [38][1340/3239]	Time 0.250 (0.241)	Data 0.001 (0.016)	Loss 4.1861 (4.0027)	Top-1 acc 31.250 (31.190)	Top-5 acc 51.562 (54.604)	lr 0.00051
Warmup Train [38][1350/3239]	Time 0.222 (0.240)	Data 0.001 (0.016)	Loss 3.7666 (4.0026)	Top-1 acc 37.500 (31.191)	Top-5 acc 60.547 (54.610)	lr 0.00051
Warmup Train [38][1360/3239]	Time 0.252 (0.240)	Data 0.001 (0.016)	Loss 4.0907 (4.0025)	Top-1 acc 28.516 (31.189)	Top-5 acc 52.734 (54.611)	lr 0.00051
Warmup Train [38][1370/3239]	Time 0.317 (0.240)	Data 0.001 (0.016)	Loss 4.0387 (4.0026)	Top-1 acc 33.203 (31.190)	Top-5 acc 53.125 (54.611)	lr 0.00051
Warmup Train [38][1380/3239]	Time 0.239 (0.240)	Data 0.002 (0.016)	Loss 3.9989 (4.0032)	Top-1 acc 31.641 (31.175)	Top-5 acc 57.422 (54.598)	lr 0.00051
Warmup Train [38][1390/3239]	Time 0.158 (0.240)	Data 0.001 (0.016)	Loss 4.1305 (4.0032)	Top-1 acc 28.906 (31.171)	Top-5 acc 54.297 (54.596)	lr 0.00051
Warmup Train [38][1400/3239]	Time 0.233 (0.240)	Data 0.001 (0.016)	Loss 4.0121 (4.0035)	Top-1 acc 28.906 (31.170)	Top-5 acc 55.078 (54.594)	lr 0.00051
Warmup Train [38][1410/3239]	Time 0.245 (0.240)	Data 0.001 (0.016)	Loss 3.9970 (4.0035)	Top-1 acc 31.250 (31.160)	Top-5 acc 57.031 (54.598)	lr 0.00051
Warmup Train [38][1420/3239]	Time 0.217 (0.240)	Data 0.001 (0.015)	Loss 4.0636 (4.0036)	Top-1 acc 33.984 (31.163)	Top-5 acc 51.172 (54.594)	lr 0.00050
Warmup Train [38][1430/3239]	Time 0.201 (0.240)	Data 0.002 (0.015)	Loss 3.9457 (4.0032)	Top-1 acc 33.203 (31.168)	Top-5 acc 58.984 (54.602)	lr 0.00050
Warmup Train [38][1440/3239]	Time 0.231 (0.240)	Data 0.001 (0.015)	Loss 3.9588 (4.0034)	Top-1 acc 33.594 (31.168)	Top-5 acc 57.422 (54.600)	lr 0.00050
Warmup Train [38][1450/3239]	Time 0.230 (0.239)	Data 0.002 (0.015)	Loss 3.8579 (4.0030)	Top-1 acc 30.469 (31.175)	Top-5 acc 56.641 (54.607)	lr 0.00050
Warmup Train [38][1460/3239]	Time 0.267 (0.239)	Data 0.001 (0.015)	Loss 3.9333 (4.0032)	Top-1 acc 31.641 (31.163)	Top-5 acc 56.641 (54.605)	lr 0.00050
Warmup Train [38][1470/3239]	Time 0.152 (0.239)	Data 0.001 (0.015)	Loss 3.9915 (4.0031)	Top-1 acc 30.469 (31.159)	Top-5 acc 54.297 (54.601)	lr 0.00050
Warmup Train [38][1480/3239]	Time 0.311 (0.239)	Data 0.001 (0.015)	Loss 4.0527 (4.0033)	Top-1 acc 28.906 (31.157)	Top-5 acc 52.734 (54.602)	lr 0.00050
Warmup Train [38][1490/3239]	Time 0.240 (0.239)	Data 0.001 (0.015)	Loss 4.0800 (4.0032)	Top-1 acc 26.172 (31.161)	Top-5 acc 51.562 (54.604)	lr 0.00050
Warmup Train [38][1500/3239]	Time 0.152 (0.239)	Data 0.001 (0.015)	Loss 4.2106 (4.0025)	Top-1 acc 27.344 (31.177)	Top-5 acc 52.344 (54.622)	lr 0.00049
Warmup Train [38][1510/3239]	Time 0.235 (0.239)	Data 0.001 (0.015)	Loss 4.0339 (4.0029)	Top-1 acc 29.688 (31.168)	Top-5 acc 57.031 (54.614)	lr 0.00049
Warmup Train [38][1520/3239]	Time 0.264 (0.239)	Data 0.002 (0.015)	Loss 4.0440 (4.0031)	Top-1 acc 23.828 (31.162)	Top-5 acc 50.391 (54.606)	lr 0.00049
Warmup Train [38][1530/3239]	Time 0.210 (0.239)	Data 0.001 (0.015)	Loss 3.9350 (4.0029)	Top-1 acc 29.297 (31.163)	Top-5 acc 54.297 (54.614)	lr 0.00049
Warmup Train [38][1540/3239]	Time 0.231 (0.238)	Data 0.001 (0.014)	Loss 4.1792 (4.0031)	Top-1 acc 30.078 (31.160)	Top-5 acc 46.875 (54.612)	lr 0.00049
Warmup Train [38][1550/3239]	Time 0.193 (0.238)	Data 0.001 (0.014)	Loss 3.8966 (4.0037)	Top-1 acc 34.375 (31.156)	Top-5 acc 57.422 (54.606)	lr 0.00049
Warmup Train [38][1560/3239]	Time 0.212 (0.238)	Data 0.001 (0.014)	Loss 3.9051 (4.0037)	Top-1 acc 29.297 (31.150)	Top-5 acc 54.297 (54.599)	lr 0.00049
Warmup Train [38][1570/3239]	Time 0.198 (0.238)	Data 0.001 (0.014)	Loss 3.9095 (4.0037)	Top-1 acc 30.859 (31.151)	Top-5 acc 55.469 (54.605)	lr 0.00049
Warmup Train [38][1580/3239]	Time 0.190 (0.238)	Data 0.001 (0.014)	Loss 4.0441 (4.0034)	Top-1 acc 30.859 (31.162)	Top-5 acc 53.906 (54.610)	lr 0.00049
Warmup Train [38][1590/3239]	Time 0.339 (0.238)	Data 0.001 (0.014)	Loss 4.2386 (4.0035)	Top-1 acc 27.344 (31.159)	Top-5 acc 49.609 (54.611)	lr 0.00048
Warmup Train [38][1600/3239]	Time 0.191 (0.238)	Data 0.001 (0.014)	Loss 4.2159 (4.0031)	Top-1 acc 29.688 (31.172)	Top-5 acc 51.953 (54.620)	lr 0.00048
Warmup Train [38][1610/3239]	Time 0.183 (0.238)	Data 0.001 (0.014)	Loss 4.0109 (4.0032)	Top-1 acc 30.859 (31.166)	Top-5 acc 54.297 (54.618)	lr 0.00048
Warmup Train [38][1620/3239]	Time 0.278 (0.238)	Data 0.001 (0.014)	Loss 3.9738 (4.0033)	Top-1 acc 30.859 (31.157)	Top-5 acc 56.250 (54.617)	lr 0.00048
Warmup Train [38][1630/3239]	Time 0.231 (0.238)	Data 0.006 (0.014)	Loss 4.0597 (4.0032)	Top-1 acc 33.594 (31.164)	Top-5 acc 55.859 (54.620)	lr 0.00048
Warmup Train [38][1640/3239]	Time 0.218 (0.238)	Data 0.001 (0.014)	Loss 3.8415 (4.0031)	Top-1 acc 39.453 (31.179)	Top-5 acc 58.594 (54.621)	lr 0.00048
Warmup Train [38][1650/3239]	Time 0.238 (0.237)	Data 0.001 (0.014)	Loss 3.8974 (4.0030)	Top-1 acc 32.812 (31.182)	Top-5 acc 53.516 (54.618)	lr 0.00048
Warmup Train [38][1660/3239]	Time 0.188 (0.237)	Data 0.002 (0.014)	Loss 4.1605 (4.0027)	Top-1 acc 26.953 (31.184)	Top-5 acc 49.219 (54.625)	lr 0.00048
Warmup Train [38][1670/3239]	Time 0.217 (0.237)	Data 0.001 (0.014)	Loss 3.7700 (4.0029)	Top-1 acc 38.672 (31.183)	Top-5 acc 61.328 (54.621)	lr 0.00047
Warmup Train [38][1680/3239]	Time 0.231 (0.237)	Data 0.002 (0.013)	Loss 3.9659 (4.0028)	Top-1 acc 33.984 (31.182)	Top-5 acc 56.250 (54.622)	lr 0.00047
Warmup Train [38][1690/3239]	Time 0.284 (0.237)	Data 0.001 (0.013)	Loss 3.8995 (4.0022)	Top-1 acc 30.469 (31.186)	Top-5 acc 57.812 (54.636)	lr 0.00047
Warmup Train [38][1700/3239]	Time 0.191 (0.237)	Data 0.001 (0.013)	Loss 4.0088 (4.0022)	Top-1 acc 31.250 (31.188)	Top-5 acc 53.906 (54.634)	lr 0.00047
Warmup Train [38][1710/3239]	Time 0.241 (0.237)	Data 0.001 (0.013)	Loss 4.0546 (4.0025)	Top-1 acc 30.469 (31.187)	Top-5 acc 53.906 (54.629)	lr 0.00047
Warmup Train [38][1720/3239]	Time 0.181 (0.237)	Data 0.001 (0.013)	Loss 3.8486 (4.0024)	Top-1 acc 32.812 (31.188)	Top-5 acc 57.031 (54.635)	lr 0.00047
Warmup Train [38][1730/3239]	Time 0.209 (0.237)	Data 0.001 (0.013)	Loss 4.1182 (4.0024)	Top-1 acc 27.344 (31.183)	Top-5 acc 51.172 (54.633)	lr 0.00047
Warmup Train [38][1740/3239]	Time 0.269 (0.237)	Data 0.001 (0.013)	Loss 3.8808 (4.0022)	Top-1 acc 31.641 (31.180)	Top-5 acc 58.984 (54.635)	lr 0.00047
Warmup Train [38][1750/3239]	Time 0.247 (0.237)	Data 0.001 (0.013)	Loss 4.0940 (4.0023)	Top-1 acc 28.906 (31.175)	Top-5 acc 53.125 (54.626)	lr 0.00047
Warmup Train [38][1760/3239]	Time 0.193 (0.237)	Data 0.002 (0.013)	Loss 4.0473 (4.0023)	Top-1 acc 32.031 (31.170)	Top-5 acc 54.297 (54.628)	lr 0.00046
Warmup Train [38][1770/3239]	Time 0.158 (0.237)	Data 0.001 (0.013)	Loss 3.9902 (4.0023)	Top-1 acc 32.422 (31.176)	Top-5 acc 53.906 (54.633)	lr 0.00046
Warmup Train [38][1780/3239]	Time 0.168 (0.236)	Data 0.001 (0.013)	Loss 4.1544 (4.0028)	Top-1 acc 27.344 (31.164)	Top-5 acc 48.047 (54.627)	lr 0.00046
Warmup Train [38][1790/3239]	Time 0.146 (0.236)	Data 0.001 (0.013)	Loss 4.0913 (4.0025)	Top-1 acc 30.469 (31.168)	Top-5 acc 50.000 (54.635)	lr 0.00046
Warmup Train [38][1800/3239]	Time 0.290 (0.236)	Data 0.001 (0.013)	Loss 3.8663 (4.0024)	Top-1 acc 35.938 (31.171)	Top-5 acc 56.641 (54.636)	lr 0.00046
Warmup Train [38][1810/3239]	Time 0.341 (0.236)	Data 0.002 (0.013)	Loss 3.7589 (4.0024)	Top-1 acc 34.375 (31.172)	Top-5 acc 60.156 (54.635)	lr 0.00046
Warmup Train [38][1820/3239]	Time 0.183 (0.236)	Data 0.001 (0.013)	Loss 4.2081 (4.0024)	Top-1 acc 27.344 (31.177)	Top-5 acc 55.078 (54.638)	lr 0.00046
Warmup Train [38][1830/3239]	Time 0.208 (0.236)	Data 0.001 (0.013)	Loss 3.9853 (4.0027)	Top-1 acc 33.203 (31.172)	Top-5 acc 54.688 (54.632)	lr 0.00046
Warmup Train [38][1840/3239]	Time 0.171 (0.236)	Data 0.002 (0.013)	Loss 4.0539 (4.0028)	Top-1 acc 28.516 (31.165)	Top-5 acc 53.906 (54.628)	lr 0.00045
Warmup Train [38][1850/3239]	Time 0.252 (0.236)	Data 0.001 (0.013)	Loss 4.0187 (4.0023)	Top-1 acc 28.906 (31.170)	Top-5 acc 50.391 (54.636)	lr 0.00045
Warmup Train [38][1860/3239]	Time 0.206 (0.236)	Data 0.001 (0.012)	Loss 4.1380 (4.0025)	Top-1 acc 28.516 (31.166)	Top-5 acc 52.344 (54.634)	lr 0.00045
Warmup Train [38][1870/3239]	Time 0.276 (0.236)	Data 0.001 (0.012)	Loss 4.2210 (4.0025)	Top-1 acc 28.906 (31.171)	Top-5 acc 46.484 (54.636)	lr 0.00045
Warmup Train [38][1880/3239]	Time 0.222 (0.236)	Data 0.001 (0.012)	Loss 4.0490 (4.0026)	Top-1 acc 32.422 (31.172)	Top-5 acc 57.031 (54.634)	lr 0.00045
Warmup Train [38][1890/3239]	Time 0.226 (0.236)	Data 0.001 (0.012)	Loss 4.0152 (4.0025)	Top-1 acc 28.516 (31.175)	Top-5 acc 53.516 (54.636)	lr 0.00045
Warmup Train [38][1900/3239]	Time 0.362 (0.236)	Data 0.001 (0.012)	Loss 4.0684 (4.0029)	Top-1 acc 34.375 (31.175)	Top-5 acc 56.641 (54.623)	lr 0.00045
Warmup Train [38][1910/3239]	Time 0.199 (0.236)	Data 0.002 (0.012)	Loss 3.9072 (4.0028)	Top-1 acc 31.250 (31.179)	Top-5 acc 57.031 (54.624)	lr 0.00045
Warmup Train [38][1920/3239]	Time 0.219 (0.236)	Data 0.001 (0.012)	Loss 3.7648 (4.0029)	Top-1 acc 32.812 (31.170)	Top-5 acc 58.594 (54.622)	lr 0.00045
Warmup Train [38][1930/3239]	Time 0.183 (0.236)	Data 0.001 (0.012)	Loss 3.8299 (4.0027)	Top-1 acc 33.594 (31.172)	Top-5 acc 59.766 (54.626)	lr 0.00044
Warmup Train [38][1940/3239]	Time 0.170 (0.236)	Data 0.001 (0.012)	Loss 4.0277 (4.0027)	Top-1 acc 34.766 (31.170)	Top-5 acc 54.688 (54.622)	lr 0.00044
Warmup Train [38][1950/3239]	Time 0.217 (0.235)	Data 0.001 (0.012)	Loss 4.1711 (4.0029)	Top-1 acc 27.344 (31.168)	Top-5 acc 51.562 (54.614)	lr 0.00044
Warmup Train [38][1960/3239]	Time 0.233 (0.235)	Data 0.001 (0.012)	Loss 4.0454 (4.0028)	Top-1 acc 26.953 (31.170)	Top-5 acc 55.078 (54.618)	lr 0.00044
Warmup Train [38][1970/3239]	Time 0.186 (0.235)	Data 0.001 (0.012)	Loss 4.1183 (4.0027)	Top-1 acc 23.828 (31.169)	Top-5 acc 50.000 (54.619)	lr 0.00044
Warmup Train [38][1980/3239]	Time 0.185 (0.235)	Data 0.001 (0.012)	Loss 3.9175 (4.0029)	Top-1 acc 35.547 (31.168)	Top-5 acc 58.203 (54.620)	lr 0.00044
Warmup Train [38][1990/3239]	Time 0.248 (0.235)	Data 0.001 (0.012)	Loss 4.0491 (4.0027)	Top-1 acc 28.906 (31.170)	Top-5 acc 55.469 (54.626)	lr 0.00044
Warmup Train [38][2000/3239]	Time 0.324 (0.235)	Data 0.003 (0.012)	Loss 3.9488 (4.0026)	Top-1 acc 29.297 (31.166)	Top-5 acc 56.641 (54.627)	lr 0.00044
Warmup Train [38][2010/3239]	Time 0.181 (0.235)	Data 0.001 (0.012)	Loss 3.7509 (4.0024)	Top-1 acc 39.453 (31.173)	Top-5 acc 61.328 (54.632)	lr 0.00044
Warmup Train [38][2020/3239]	Time 0.163 (0.235)	Data 0.001 (0.012)	Loss 3.8870 (4.0023)	Top-1 acc 30.078 (31.176)	Top-5 acc 54.688 (54.632)	lr 0.00043
Warmup Train [38][2030/3239]	Time 0.224 (0.235)	Data 0.001 (0.012)	Loss 3.9924 (4.0018)	Top-1 acc 32.812 (31.188)	Top-5 acc 55.469 (54.642)	lr 0.00043
Warmup Train [38][2040/3239]	Time 0.240 (0.235)	Data 0.001 (0.012)	Loss 3.9235 (4.0017)	Top-1 acc 34.766 (31.189)	Top-5 acc 54.688 (54.645)	lr 0.00043
Warmup Train [38][2050/3239]	Time 0.211 (0.235)	Data 0.001 (0.012)	Loss 3.9078 (4.0014)	Top-1 acc 37.500 (31.200)	Top-5 acc 53.906 (54.649)	lr 0.00043
Warmup Train [38][2060/3239]	Time 0.172 (0.235)	Data 0.001 (0.012)	Loss 4.1333 (4.0016)	Top-1 acc 30.078 (31.205)	Top-5 acc 52.344 (54.646)	lr 0.00043
Warmup Train [38][2070/3239]	Time 0.230 (0.235)	Data 0.001 (0.012)	Loss 3.9950 (4.0016)	Top-1 acc 25.781 (31.206)	Top-5 acc 55.859 (54.647)	lr 0.00043
Warmup Train [38][2080/3239]	Time 0.254 (0.235)	Data 0.001 (0.012)	Loss 3.9794 (4.0015)	Top-1 acc 30.078 (31.201)	Top-5 acc 57.031 (54.651)	lr 0.00043
Warmup Train [38][2090/3239]	Time 0.212 (0.235)	Data 0.001 (0.011)	Loss 3.9653 (4.0019)	Top-1 acc 33.984 (31.189)	Top-5 acc 55.859 (54.648)	lr 0.00043
Warmup Train [38][2100/3239]	Time 0.173 (0.234)	Data 0.001 (0.011)	Loss 4.1906 (4.0018)	Top-1 acc 33.203 (31.195)	Top-5 acc 51.562 (54.648)	lr 0.00043
Warmup Train [38][2110/3239]	Time 0.184 (0.234)	Data 0.001 (0.011)	Loss 3.8925 (4.0018)	Top-1 acc 35.156 (31.199)	Top-5 acc 58.594 (54.647)	lr 0.00042
Warmup Train [38][2120/3239]	Time 0.348 (0.234)	Data 0.001 (0.011)	Loss 4.0073 (4.0018)	Top-1 acc 34.766 (31.202)	Top-5 acc 56.250 (54.643)	lr 0.00042
Warmup Train [38][2130/3239]	Time 0.223 (0.234)	Data 0.001 (0.011)	Loss 4.0967 (4.0020)	Top-1 acc 28.125 (31.198)	Top-5 acc 51.953 (54.638)	lr 0.00042
Warmup Train [38][2140/3239]	Time 0.206 (0.234)	Data 0.001 (0.011)	Loss 3.9457 (4.0023)	Top-1 acc 33.594 (31.193)	Top-5 acc 59.766 (54.631)	lr 0.00042
Warmup Train [38][2150/3239]	Time 0.237 (0.234)	Data 0.001 (0.011)	Loss 4.1207 (4.0019)	Top-1 acc 30.078 (31.202)	Top-5 acc 49.219 (54.640)	lr 0.00042
Warmup Train [38][2160/3239]	Time 0.194 (0.234)	Data 0.001 (0.011)	Loss 3.9887 (4.0018)	Top-1 acc 31.641 (31.209)	Top-5 acc 54.297 (54.643)	lr 0.00042
Warmup Train [38][2170/3239]	Time 0.186 (0.234)	Data 0.001 (0.011)	Loss 4.2318 (4.0018)	Top-1 acc 25.781 (31.209)	Top-5 acc 50.000 (54.648)	lr 0.00042
Warmup Train [38][2180/3239]	Time 0.267 (0.234)	Data 0.001 (0.011)	Loss 3.7582 (4.0016)	Top-1 acc 34.766 (31.212)	Top-5 acc 61.328 (54.653)	lr 0.00042
Warmup Train [38][2190/3239]	Time 0.259 (0.234)	Data 0.001 (0.011)	Loss 3.9922 (4.0018)	Top-1 acc 30.078 (31.208)	Top-5 acc 56.641 (54.647)	lr 0.00042
Warmup Train [38][2200/3239]	Time 0.157 (0.234)	Data 0.001 (0.011)	Loss 3.9301 (4.0020)	Top-1 acc 33.984 (31.208)	Top-5 acc 53.906 (54.642)	lr 0.00041
Warmup Train [38][2210/3239]	Time 0.233 (0.234)	Data 0.001 (0.011)	Loss 3.9853 (4.0020)	Top-1 acc 32.422 (31.210)	Top-5 acc 55.859 (54.642)	lr 0.00041
Warmup Train [38][2220/3239]	Time 0.241 (0.234)	Data 0.002 (0.011)	Loss 3.8507 (4.0016)	Top-1 acc 36.719 (31.218)	Top-5 acc 55.859 (54.648)	lr 0.00041
Warmup Train [38][2230/3239]	Time 0.316 (0.234)	Data 0.001 (0.011)	Loss 4.0244 (4.0014)	Top-1 acc 30.859 (31.225)	Top-5 acc 55.859 (54.651)	lr 0.00041
Warmup Train [38][2240/3239]	Time 0.237 (0.234)	Data 0.001 (0.011)	Loss 3.9733 (4.0015)	Top-1 acc 29.297 (31.225)	Top-5 acc 57.422 (54.653)	lr 0.00041
Warmup Train [38][2250/3239]	Time 0.202 (0.234)	Data 0.001 (0.011)	Loss 3.9053 (4.0012)	Top-1 acc 32.422 (31.227)	Top-5 acc 56.250 (54.662)	lr 0.00041
Warmup Train [38][2260/3239]	Time 0.224 (0.233)	Data 0.001 (0.011)	Loss 4.0487 (4.0011)	Top-1 acc 30.859 (31.234)	Top-5 acc 53.906 (54.661)	lr 0.00041
Warmup Train [38][2270/3239]	Time 0.257 (0.233)	Data 0.003 (0.011)	Loss 4.0057 (4.0012)	Top-1 acc 29.688 (31.232)	Top-5 acc 56.641 (54.658)	lr 0.00041
Warmup Train [38][2280/3239]	Time 0.192 (0.233)	Data 0.002 (0.011)	Loss 3.9150 (4.0010)	Top-1 acc 34.766 (31.236)	Top-5 acc 57.422 (54.661)	lr 0.00041
Warmup Train [38][2290/3239]	Time 0.209 (0.233)	Data 0.002 (0.011)	Loss 4.0693 (4.0010)	Top-1 acc 32.031 (31.234)	Top-5 acc 53.516 (54.659)	lr 0.00040
Warmup Train [38][2300/3239]	Time 0.219 (0.233)	Data 0.002 (0.011)	Loss 4.1553 (4.0013)	Top-1 acc 27.344 (31.230)	Top-5 acc 51.172 (54.650)	lr 0.00040
Warmup Train [38][2310/3239]	Time 0.216 (0.233)	Data 0.001 (0.011)	Loss 4.2161 (4.0014)	Top-1 acc 33.203 (31.230)	Top-5 acc 48.828 (54.645)	lr 0.00040
Warmup Train [38][2320/3239]	Time 0.228 (0.233)	Data 0.001 (0.011)	Loss 3.8705 (4.0017)	Top-1 acc 33.984 (31.220)	Top-5 acc 54.297 (54.633)	lr 0.00040
Warmup Train [38][2330/3239]	Time 0.336 (0.233)	Data 0.002 (0.011)	Loss 4.1404 (4.0017)	Top-1 acc 29.688 (31.222)	Top-5 acc 51.172 (54.630)	lr 0.00040
Warmup Train [38][2340/3239]	Time 0.251 (0.233)	Data 0.001 (0.011)	Loss 3.8926 (4.0017)	Top-1 acc 33.203 (31.222)	Top-5 acc 58.203 (54.634)	lr 0.00040
Warmup Train [38][2350/3239]	Time 0.200 (0.233)	Data 0.001 (0.010)	Loss 4.2216 (4.0020)	Top-1 acc 31.250 (31.219)	Top-5 acc 47.656 (54.623)	lr 0.00040
Warmup Train [38][2360/3239]	Time 0.182 (0.233)	Data 0.001 (0.010)	Loss 4.0165 (4.0024)	Top-1 acc 32.422 (31.219)	Top-5 acc 53.906 (54.616)	lr 0.00040
Warmup Train [38][2370/3239]	Time 0.236 (0.233)	Data 0.001 (0.010)	Loss 3.8848 (4.0023)	Top-1 acc 32.812 (31.219)	Top-5 acc 55.859 (54.618)	lr 0.00040
Warmup Train [38][2380/3239]	Time 0.225 (0.233)	Data 0.001 (0.010)	Loss 4.0888 (4.0025)	Top-1 acc 30.469 (31.216)	Top-5 acc 50.000 (54.614)	lr 0.00039
Warmup Train [38][2390/3239]	Time 0.179 (0.233)	Data 0.001 (0.010)	Loss 3.9559 (4.0022)	Top-1 acc 32.422 (31.224)	Top-5 acc 54.297 (54.621)	lr 0.00039
Warmup Train [38][2400/3239]	Time 0.195 (0.233)	Data 0.001 (0.010)	Loss 4.1146 (4.0021)	Top-1 acc 30.078 (31.228)	Top-5 acc 54.297 (54.625)	lr 0.00039
Warmup Train [38][2410/3239]	Time 0.219 (0.233)	Data 0.002 (0.010)	Loss 3.8952 (4.0020)	Top-1 acc 31.250 (31.221)	Top-5 acc 58.594 (54.623)	lr 0.00039
Warmup Train [38][2420/3239]	Time 0.216 (0.233)	Data 0.001 (0.010)	Loss 3.8674 (4.0016)	Top-1 acc 33.984 (31.231)	Top-5 acc 57.812 (54.630)	lr 0.00039
Warmup Train [38][2430/3239]	Time 0.307 (0.233)	Data 0.002 (0.010)	Loss 4.0034 (4.0016)	Top-1 acc 32.031 (31.232)	Top-5 acc 53.906 (54.627)	lr 0.00039
Warmup Train [38][2440/3239]	Time 0.237 (0.233)	Data 0.001 (0.010)	Loss 3.8410 (4.0014)	Top-1 acc 34.375 (31.236)	Top-5 acc 57.422 (54.629)	lr 0.00039
Warmup Train [38][2450/3239]	Time 0.190 (0.233)	Data 0.001 (0.010)	Loss 3.9870 (4.0014)	Top-1 acc 32.422 (31.230)	Top-5 acc 55.078 (54.627)	lr 0.00039
Warmup Train [38][2460/3239]	Time 0.195 (0.233)	Data 0.002 (0.010)	Loss 3.9926 (4.0013)	Top-1 acc 31.641 (31.233)	Top-5 acc 53.125 (54.626)	lr 0.00039
Warmup Train [38][2470/3239]	Time 0.205 (0.233)	Data 0.001 (0.010)	Loss 4.2830 (4.0016)	Top-1 acc 25.000 (31.231)	Top-5 acc 46.875 (54.617)	lr 0.00039
Warmup Train [38][2480/3239]	Time 0.193 (0.233)	Data 0.001 (0.010)	Loss 4.0161 (4.0015)	Top-1 acc 29.688 (31.235)	Top-5 acc 55.469 (54.616)	lr 0.00038
Warmup Train [38][2490/3239]	Time 0.132 (0.233)	Data 0.001 (0.010)	Loss 4.0709 (4.0016)	Top-1 acc 29.688 (31.232)	Top-5 acc 54.297 (54.615)	lr 0.00038
Warmup Train [38][2500/3239]	Time 0.239 (0.233)	Data 0.001 (0.010)	Loss 3.9879 (4.0019)	Top-1 acc 32.422 (31.228)	Top-5 acc 57.031 (54.608)	lr 0.00038
Warmup Train [38][2510/3239]	Time 0.179 (0.233)	Data 0.001 (0.010)	Loss 4.3012 (4.0020)	Top-1 acc 27.344 (31.225)	Top-5 acc 50.781 (54.610)	lr 0.00038
Warmup Train [38][2520/3239]	Time 0.211 (0.233)	Data 0.001 (0.010)	Loss 3.9279 (4.0021)	Top-1 acc 31.250 (31.221)	Top-5 acc 57.031 (54.611)	lr 0.00038
Warmup Train [38][2530/3239]	Time 0.229 (0.233)	Data 0.001 (0.010)	Loss 3.9829 (4.0022)	Top-1 acc 29.688 (31.220)	Top-5 acc 57.422 (54.611)	lr 0.00038
Warmup Train [38][2540/3239]	Time 0.383 (0.233)	Data 0.001 (0.010)	Loss 3.8994 (4.0021)	Top-1 acc 37.109 (31.219)	Top-5 acc 56.250 (54.616)	lr 0.00038
Warmup Train [38][2550/3239]	Time 0.181 (0.233)	Data 0.001 (0.010)	Loss 4.0358 (4.0020)	Top-1 acc 30.078 (31.222)	Top-5 acc 52.344 (54.615)	lr 0.00038
Warmup Train [38][2560/3239]	Time 0.251 (0.232)	Data 0.001 (0.010)	Loss 4.1714 (4.0021)	Top-1 acc 26.562 (31.219)	Top-5 acc 49.609 (54.613)	lr 0.00038
Warmup Train [38][2570/3239]	Time 0.215 (0.232)	Data 0.002 (0.010)	Loss 3.8589 (4.0021)	Top-1 acc 33.203 (31.223)	Top-5 acc 57.422 (54.614)	lr 0.00037
Warmup Train [38][2580/3239]	Time 0.210 (0.232)	Data 0.002 (0.010)	Loss 4.0709 (4.0023)	Top-1 acc 32.422 (31.217)	Top-5 acc 53.125 (54.603)	lr 0.00037
Warmup Train [38][2590/3239]	Time 0.147 (0.232)	Data 0.002 (0.010)	Loss 4.0315 (4.0023)	Top-1 acc 28.906 (31.220)	Top-5 acc 52.734 (54.603)	lr 0.00037
Warmup Train [38][2600/3239]	Time 0.213 (0.232)	Data 0.001 (0.010)	Loss 4.0962 (4.0022)	Top-1 acc 29.688 (31.225)	Top-5 acc 50.391 (54.605)	lr 0.00037
Warmup Train [38][2610/3239]	Time 0.210 (0.232)	Data 0.001 (0.010)	Loss 4.0586 (4.0025)	Top-1 acc 29.688 (31.214)	Top-5 acc 53.125 (54.597)	lr 0.00037
Warmup Train [38][2620/3239]	Time 0.198 (0.232)	Data 0.001 (0.010)	Loss 3.9185 (4.0024)	Top-1 acc 30.859 (31.217)	Top-5 acc 56.641 (54.599)	lr 0.00037
Warmup Train [38][2630/3239]	Time 0.217 (0.232)	Data 0.001 (0.010)	Loss 4.0293 (4.0025)	Top-1 acc 27.344 (31.212)	Top-5 acc 55.078 (54.595)	lr 0.00037
Warmup Train [38][2640/3239]	Time 0.191 (0.232)	Data 0.001 (0.010)	Loss 3.8712 (4.0028)	Top-1 acc 32.422 (31.207)	Top-5 acc 57.031 (54.585)	lr 0.00037
Warmup Train [38][2650/3239]	Time 0.203 (0.232)	Data 0.001 (0.010)	Loss 3.9985 (4.0031)	Top-1 acc 28.906 (31.199)	Top-5 acc 55.078 (54.581)	lr 0.00037
Warmup Train [38][2660/3239]	Time 0.342 (0.232)	Data 0.001 (0.010)	Loss 3.8936 (4.0030)	Top-1 acc 32.812 (31.199)	Top-5 acc 58.984 (54.584)	lr 0.00037
Warmup Train [38][2670/3239]	Time 0.225 (0.232)	Data 0.001 (0.010)	Loss 4.0242 (4.0030)	Top-1 acc 32.031 (31.199)	Top-5 acc 52.734 (54.582)	lr 0.00036
Warmup Train [38][2680/3239]	Time 0.220 (0.232)	Data 0.001 (0.010)	Loss 3.9222 (4.0030)	Top-1 acc 30.859 (31.199)	Top-5 acc 58.984 (54.585)	lr 0.00036
Warmup Train [38][2690/3239]	Time 0.249 (0.232)	Data 0.001 (0.010)	Loss 3.8788 (4.0032)	Top-1 acc 33.984 (31.190)	Top-5 acc 56.250 (54.579)	lr 0.00036
Warmup Train [38][2700/3239]	Time 0.150 (0.232)	Data 0.001 (0.009)	Loss 4.0069 (4.0031)	Top-1 acc 30.469 (31.190)	Top-5 acc 53.125 (54.581)	lr 0.00036
Warmup Train [38][2710/3239]	Time 0.170 (0.232)	Data 0.003 (0.009)	Loss 3.8734 (4.0030)	Top-1 acc 30.469 (31.188)	Top-5 acc 54.688 (54.578)	lr 0.00036
Warmup Train [38][2720/3239]	Time 0.255 (0.232)	Data 0.001 (0.009)	Loss 3.7803 (4.0029)	Top-1 acc 35.938 (31.186)	Top-5 acc 58.594 (54.580)	lr 0.00036
Warmup Train [38][2730/3239]	Time 0.224 (0.232)	Data 0.002 (0.009)	Loss 3.9256 (4.0030)	Top-1 acc 31.641 (31.186)	Top-5 acc 55.469 (54.581)	lr 0.00036
Warmup Train [38][2740/3239]	Time 0.201 (0.232)	Data 0.001 (0.009)	Loss 4.0120 (4.0028)	Top-1 acc 31.250 (31.181)	Top-5 acc 53.125 (54.584)	lr 0.00036
Warmup Train [38][2750/3239]	Time 0.171 (0.232)	Data 0.001 (0.009)	Loss 3.9937 (4.0028)	Top-1 acc 32.812 (31.183)	Top-5 acc 54.297 (54.584)	lr 0.00036
Warmup Train [38][2760/3239]	Time 0.170 (0.232)	Data 0.001 (0.009)	Loss 3.9990 (4.0029)	Top-1 acc 34.375 (31.183)	Top-5 acc 53.516 (54.583)	lr 0.00035
Warmup Train [38][2770/3239]	Time 0.338 (0.232)	Data 0.001 (0.009)	Loss 3.8972 (4.0029)	Top-1 acc 33.203 (31.183)	Top-5 acc 56.250 (54.582)	lr 0.00035
Warmup Train [38][2780/3239]	Time 0.163 (0.232)	Data 0.002 (0.009)	Loss 3.8791 (4.0029)	Top-1 acc 34.375 (31.184)	Top-5 acc 56.641 (54.583)	lr 0.00035
Warmup Train [38][2790/3239]	Time 0.230 (0.232)	Data 0.002 (0.009)	Loss 4.1026 (4.0026)	Top-1 acc 26.562 (31.193)	Top-5 acc 50.000 (54.592)	lr 0.00035
Warmup Train [38][2800/3239]	Time 0.174 (0.232)	Data 0.001 (0.009)	Loss 4.0751 (4.0027)	Top-1 acc 34.766 (31.194)	Top-5 acc 58.203 (54.591)	lr 0.00035
Warmup Train [38][2810/3239]	Time 0.273 (0.232)	Data 0.001 (0.009)	Loss 4.0631 (4.0026)	Top-1 acc 28.516 (31.197)	Top-5 acc 55.469 (54.593)	lr 0.00035
Warmup Train [38][2820/3239]	Time 0.245 (0.232)	Data 0.001 (0.009)	Loss 4.1335 (4.0024)	Top-1 acc 29.688 (31.200)	Top-5 acc 49.219 (54.597)	lr 0.00035
Warmup Train [38][2830/3239]	Time 0.190 (0.232)	Data 0.001 (0.009)	Loss 4.1699 (4.0024)	Top-1 acc 26.562 (31.195)	Top-5 acc 47.266 (54.598)	lr 0.00035
Warmup Train [38][2840/3239]	Time 0.253 (0.232)	Data 0.001 (0.009)	Loss 3.9705 (4.0024)	Top-1 acc 30.078 (31.192)	Top-5 acc 57.422 (54.597)	lr 0.00035
Warmup Train [38][2850/3239]	Time 0.270 (0.232)	Data 0.001 (0.009)	Loss 3.9643 (4.0025)	Top-1 acc 35.547 (31.197)	Top-5 acc 55.859 (54.595)	lr 0.00035
Warmup Train [38][2860/3239]	Time 0.203 (0.232)	Data 0.001 (0.009)	Loss 4.0715 (4.0024)	Top-1 acc 31.641 (31.196)	Top-5 acc 55.078 (54.599)	lr 0.00034
Warmup Train [38][2870/3239]	Time 0.380 (0.232)	Data 0.001 (0.009)	Loss 4.0872 (4.0025)	Top-1 acc 26.953 (31.192)	Top-5 acc 51.953 (54.596)	lr 0.00034
Warmup Train [38][2880/3239]	Time 0.262 (0.232)	Data 0.001 (0.009)	Loss 3.8789 (4.0026)	Top-1 acc 34.375 (31.193)	Top-5 acc 61.328 (54.595)	lr 0.00034
Warmup Train [38][2890/3239]	Time 0.249 (0.232)	Data 0.001 (0.009)	Loss 4.0621 (4.0026)	Top-1 acc 32.031 (31.194)	Top-5 acc 51.562 (54.594)	lr 0.00034
Warmup Train [38][2900/3239]	Time 0.144 (0.232)	Data 0.002 (0.009)	Loss 3.9956 (4.0025)	Top-1 acc 29.688 (31.191)	Top-5 acc 55.078 (54.594)	lr 0.00034
Warmup Train [38][2910/3239]	Time 0.178 (0.232)	Data 0.001 (0.009)	Loss 3.7941 (4.0022)	Top-1 acc 32.031 (31.194)	Top-5 acc 61.328 (54.599)	lr 0.00034
Warmup Train [38][2920/3239]	Time 0.232 (0.232)	Data 0.001 (0.009)	Loss 4.0418 (4.0022)	Top-1 acc 30.469 (31.191)	Top-5 acc 53.906 (54.601)	lr 0.00034
Warmup Train [38][2930/3239]	Time 0.203 (0.232)	Data 0.001 (0.009)	Loss 4.1684 (4.0022)	Top-1 acc 26.953 (31.191)	Top-5 acc 48.047 (54.598)	lr 0.00034
Warmup Train [38][2940/3239]	Time 0.185 (0.232)	Data 0.002 (0.009)	Loss 4.3012 (4.0024)	Top-1 acc 26.562 (31.190)	Top-5 acc 50.781 (54.598)	lr 0.00034
Warmup Train [38][2950/3239]	Time 0.195 (0.232)	Data 0.001 (0.009)	Loss 3.9250 (4.0024)	Top-1 acc 33.984 (31.188)	Top-5 acc 57.422 (54.601)	lr 0.00034
Warmup Train [38][2960/3239]	Time 0.132 (0.231)	Data 0.001 (0.009)	Loss 3.9212 (4.0024)	Top-1 acc 32.031 (31.190)	Top-5 acc 57.812 (54.601)	lr 0.00033
Warmup Train [38][2970/3239]	Time 0.355 (0.231)	Data 0.001 (0.009)	Loss 3.9963 (4.0022)	Top-1 acc 31.641 (31.193)	Top-5 acc 53.125 (54.604)	lr 0.00033
Warmup Train [38][2980/3239]	Time 0.293 (0.231)	Data 0.001 (0.009)	Loss 4.0220 (4.0024)	Top-1 acc 32.422 (31.189)	Top-5 acc 53.125 (54.599)	lr 0.00033
Warmup Train [38][2990/3239]	Time 0.202 (0.231)	Data 0.001 (0.009)	Loss 3.8814 (4.0024)	Top-1 acc 34.766 (31.191)	Top-5 acc 55.469 (54.601)	lr 0.00033
Warmup Train [38][3000/3239]	Time 0.220 (0.231)	Data 0.001 (0.009)	Loss 3.8328 (4.0023)	Top-1 acc 30.469 (31.191)	Top-5 acc 56.641 (54.604)	lr 0.00033
Warmup Train [38][3010/3239]	Time 0.196 (0.231)	Data 0.002 (0.009)	Loss 4.2145 (4.0022)	Top-1 acc 24.219 (31.193)	Top-5 acc 50.781 (54.606)	lr 0.00033
Warmup Train [38][3020/3239]	Time 0.242 (0.231)	Data 0.001 (0.009)	Loss 3.9928 (4.0021)	Top-1 acc 29.688 (31.192)	Top-5 acc 54.297 (54.607)	lr 0.00033
Warmup Train [38][3030/3239]	Time 0.219 (0.231)	Data 0.001 (0.009)	Loss 3.8553 (4.0023)	Top-1 acc 34.766 (31.190)	Top-5 acc 58.203 (54.603)	lr 0.00033
Warmup Train [38][3040/3239]	Time 0.232 (0.231)	Data 0.003 (0.009)	Loss 3.8877 (4.0020)	Top-1 acc 29.688 (31.195)	Top-5 acc 58.594 (54.608)	lr 0.00033
Warmup Train [38][3050/3239]	Time 0.284 (0.231)	Data 0.002 (0.009)	Loss 3.8973 (4.0021)	Top-1 acc 30.859 (31.193)	Top-5 acc 57.422 (54.607)	lr 0.00033
Warmup Train [38][3060/3239]	Time 0.231 (0.231)	Data 0.001 (0.009)	Loss 3.8354 (4.0020)	Top-1 acc 38.281 (31.197)	Top-5 acc 55.078 (54.607)	lr 0.00033
Warmup Train [38][3070/3239]	Time 0.304 (0.231)	Data 0.002 (0.009)	Loss 4.0150 (4.0018)	Top-1 acc 34.375 (31.202)	Top-5 acc 58.984 (54.612)	lr 0.00032
Warmup Train [38][3080/3239]	Time 0.219 (0.231)	Data 0.001 (0.009)	Loss 3.9368 (4.0019)	Top-1 acc 37.109 (31.200)	Top-5 acc 55.859 (54.608)	lr 0.00032
Warmup Train [38][3090/3239]	Time 0.173 (0.231)	Data 0.001 (0.009)	Loss 4.0732 (4.0018)	Top-1 acc 27.344 (31.199)	Top-5 acc 51.953 (54.611)	lr 0.00032
Warmup Train [38][3100/3239]	Time 0.187 (0.231)	Data 0.001 (0.009)	Loss 3.8698 (4.0019)	Top-1 acc 35.938 (31.199)	Top-5 acc 58.984 (54.611)	lr 0.00032
Warmup Train [38][3110/3239]	Time 0.218 (0.231)	Data 0.001 (0.009)	Loss 4.1108 (4.0019)	Top-1 acc 28.516 (31.199)	Top-5 acc 51.953 (54.611)	lr 0.00032
Warmup Train [38][3120/3239]	Time 0.252 (0.231)	Data 0.024 (0.009)	Loss 4.1244 (4.0020)	Top-1 acc 30.469 (31.194)	Top-5 acc 50.781 (54.613)	lr 0.00032
Warmup Train [38][3130/3239]	Time 0.164 (0.231)	Data 0.002 (0.009)	Loss 3.9059 (4.0020)	Top-1 acc 33.203 (31.193)	Top-5 acc 57.031 (54.609)	lr 0.00032
Warmup Train [38][3140/3239]	Time 0.265 (0.231)	Data 0.001 (0.009)	Loss 4.0893 (4.0021)	Top-1 acc 31.641 (31.192)	Top-5 acc 51.562 (54.611)	lr 0.00032
Warmup Train [38][3150/3239]	Time 0.240 (0.231)	Data 0.001 (0.009)	Loss 4.1383 (4.0020)	Top-1 acc 28.906 (31.190)	Top-5 acc 51.953 (54.610)	lr 0.00032
Warmup Train [38][3160/3239]	Time 0.359 (0.231)	Data 0.001 (0.009)	Loss 3.9101 (4.0019)	Top-1 acc 35.547 (31.188)	Top-5 acc 58.203 (54.609)	lr 0.00032
Warmup Train [38][3170/3239]	Time 0.181 (0.231)	Data 0.002 (0.009)	Loss 4.0210 (4.0019)	Top-1 acc 30.859 (31.185)	Top-5 acc 52.734 (54.607)	lr 0.00031
Warmup Train [38][3180/3239]	Time 0.174 (0.231)	Data 0.000 (0.009)	Loss 4.0006 (4.0021)	Top-1 acc 33.203 (31.185)	Top-5 acc 54.688 (54.607)	lr 0.00031
Warmup Train [38][3190/3239]	Time 0.155 (0.231)	Data 0.000 (0.009)	Loss 3.9117 (4.0020)	Top-1 acc 32.422 (31.185)	Top-5 acc 56.250 (54.609)	lr 0.00031
Warmup Train [38][3200/3239]	Time 0.236 (0.231)	Data 0.000 (0.009)	Loss 4.0928 (4.0021)	Top-1 acc 29.297 (31.182)	Top-5 acc 51.562 (54.606)	lr 0.00031
Warmup Train [38][3210/3239]	Time 0.228 (0.231)	Data 0.000 (0.008)	Loss 3.8837 (4.0021)	Top-1 acc 32.812 (31.182)	Top-5 acc 57.031 (54.605)	lr 0.00031
Warmup Train [38][3220/3239]	Time 0.170 (0.231)	Data 0.000 (0.008)	Loss 3.9838 (4.0021)	Top-1 acc 31.250 (31.181)	Top-5 acc 52.734 (54.606)	lr 0.00031
Warmup Train [38][3230/3239]	Time 0.187 (0.231)	Data 0.000 (0.008)	Loss 4.3029 (4.0020)	Top-1 acc 26.172 (31.184)	Top-5 acc 49.609 (54.607)	lr 0.00031
Warmup Train [38][3239/3239]	Time 0.167 (0.231)	Data 0.000 (0.008)	Loss 3.9994 (4.0020)	Top-1 acc 35.802 (31.186)	Top-5 acc 62.963 (54.609)	lr 0.00031
==========Warmup Valid [38/40]	loss 2.941	top-1 acc 38.691	top-5 acc 63.291	Train top-1 31.186	top-5 54.609	flops: 442.4M
Warmup Train [39][0/3239]	Time 19.591 (19.591)	Data 18.647 (18.647)	Loss 4.2335 (4.2335)	Top-1 acc 28.906 (28.906)	Top-5 acc 50.391 (50.391)	lr 0.00031
Warmup Train [39][10/3239]	Time 0.299 (2.240)	Data 0.002 (1.871)	Loss 3.9426 (4.0272)	Top-1 acc 30.859 (30.256)	Top-5 acc 58.984 (55.185)	lr 0.00031
Warmup Train [39][20/3239]	Time 0.338 (1.299)	Data 0.002 (0.982)	Loss 3.9065 (3.9813)	Top-1 acc 31.641 (31.343)	Top-5 acc 55.859 (56.045)	lr 0.00031
Warmup Train [39][30/3239]	Time 0.211 (0.951)	Data 0.001 (0.666)	Loss 4.0086 (3.9834)	Top-1 acc 32.031 (31.338)	Top-5 acc 51.172 (55.481)	lr 0.00030
Warmup Train [39][40/3239]	Time 0.219 (0.776)	Data 0.001 (0.505)	Loss 3.9436 (3.9796)	Top-1 acc 31.641 (31.421)	Top-5 acc 57.031 (55.469)	lr 0.00030
Warmup Train [39][50/3239]	Time 0.296 (0.672)	Data 0.001 (0.407)	Loss 4.0635 (3.9907)	Top-1 acc 30.469 (31.380)	Top-5 acc 53.906 (55.170)	lr 0.00030
Warmup Train [39][60/3239]	Time 0.190 (0.600)	Data 0.001 (0.340)	Loss 4.0969 (3.9940)	Top-1 acc 32.031 (31.378)	Top-5 acc 54.688 (55.020)	lr 0.00030
Warmup Train [39][70/3239]	Time 0.276 (0.547)	Data 0.001 (0.293)	Loss 3.9207 (3.9912)	Top-1 acc 29.688 (31.349)	Top-5 acc 53.516 (54.902)	lr 0.00030
Warmup Train [39][80/3239]	Time 0.154 (0.506)	Data 0.001 (0.257)	Loss 4.2731 (3.9923)	Top-1 acc 27.344 (31.211)	Top-5 acc 51.562 (54.919)	lr 0.00030
Warmup Train [39][90/3239]	Time 0.182 (0.475)	Data 0.001 (0.229)	Loss 3.9927 (3.9915)	Top-1 acc 29.297 (31.160)	Top-5 acc 53.125 (54.872)	lr 0.00030
Warmup Train [39][100/3239]	Time 0.134 (0.449)	Data 0.001 (0.207)	Loss 4.0458 (3.9939)	Top-1 acc 30.469 (31.200)	Top-5 acc 53.906 (54.819)	lr 0.00030
Warmup Train [39][110/3239]	Time 0.211 (0.429)	Data 0.001 (0.189)	Loss 3.9787 (3.9942)	Top-1 acc 29.688 (31.201)	Top-5 acc 58.594 (54.811)	lr 0.00030
Warmup Train [39][120/3239]	Time 0.182 (0.412)	Data 0.001 (0.174)	Loss 3.8243 (3.9922)	Top-1 acc 34.766 (31.256)	Top-5 acc 59.375 (54.872)	lr 0.00030
Warmup Train [39][130/3239]	Time 0.226 (0.398)	Data 0.001 (0.161)	Loss 3.9412 (3.9927)	Top-1 acc 30.859 (31.331)	Top-5 acc 54.297 (54.804)	lr 0.00030
Warmup Train [39][140/3239]	Time 0.211 (0.386)	Data 0.001 (0.150)	Loss 4.2374 (3.9946)	Top-1 acc 28.516 (31.280)	Top-5 acc 51.562 (54.740)	lr 0.00029
Warmup Train [39][150/3239]	Time 0.210 (0.374)	Data 0.001 (0.140)	Loss 4.0143 (3.9926)	Top-1 acc 32.031 (31.242)	Top-5 acc 55.078 (54.770)	lr 0.00029
Warmup Train [39][160/3239]	Time 0.188 (0.365)	Data 0.001 (0.131)	Loss 4.0640 (3.9970)	Top-1 acc 28.125 (31.160)	Top-5 acc 51.953 (54.641)	lr 0.00029
Warmup Train [39][170/3239]	Time 0.202 (0.356)	Data 0.002 (0.124)	Loss 4.0387 (3.9992)	Top-1 acc 27.344 (31.161)	Top-5 acc 51.172 (54.589)	lr 0.00029
Warmup Train [39][180/3239]	Time 0.198 (0.348)	Data 0.001 (0.117)	Loss 4.1327 (4.0004)	Top-1 acc 28.906 (31.112)	Top-5 acc 50.000 (54.567)	lr 0.00029
Warmup Train [39][190/3239]	Time 0.278 (0.342)	Data 0.002 (0.111)	Loss 4.0675 (4.0049)	Top-1 acc 30.078 (30.964)	Top-5 acc 54.688 (54.550)	lr 0.00029
Warmup Train [39][200/3239]	Time 0.379 (0.336)	Data 0.002 (0.106)	Loss 3.9286 (4.0045)	Top-1 acc 29.688 (30.993)	Top-5 acc 59.375 (54.606)	lr 0.00029
Warmup Train [39][210/3239]	Time 0.235 (0.331)	Data 0.002 (0.101)	Loss 3.9073 (4.0065)	Top-1 acc 33.203 (30.932)	Top-5 acc 59.375 (54.576)	lr 0.00029
Warmup Train [39][220/3239]	Time 0.173 (0.326)	Data 0.001 (0.096)	Loss 4.1479 (4.0052)	Top-1 acc 31.641 (31.015)	Top-5 acc 50.000 (54.592)	lr 0.00029
Warmup Train [39][230/3239]	Time 0.244 (0.321)	Data 0.001 (0.092)	Loss 3.9286 (4.0065)	Top-1 acc 30.469 (30.963)	Top-5 acc 52.344 (54.532)	lr 0.00029
Warmup Train [39][240/3239]	Time 0.195 (0.318)	Data 0.001 (0.089)	Loss 3.9830 (4.0071)	Top-1 acc 30.078 (30.978)	Top-5 acc 53.125 (54.546)	lr 0.00029
Warmup Train [39][250/3239]	Time 0.164 (0.314)	Data 0.001 (0.085)	Loss 3.9384 (4.0078)	Top-1 acc 31.250 (30.948)	Top-5 acc 52.344 (54.527)	lr 0.00028
Warmup Train [39][260/3239]	Time 0.224 (0.310)	Data 0.001 (0.082)	Loss 4.0276 (4.0073)	Top-1 acc 29.688 (30.940)	Top-5 acc 53.516 (54.521)	lr 0.00028
Warmup Train [39][270/3239]	Time 0.251 (0.307)	Data 0.001 (0.079)	Loss 3.9548 (4.0061)	Top-1 acc 33.594 (30.978)	Top-5 acc 55.078 (54.553)	lr 0.00028
Warmup Train [39][280/3239]	Time 0.183 (0.304)	Data 0.002 (0.077)	Loss 3.9284 (4.0030)	Top-1 acc 31.250 (31.057)	Top-5 acc 60.156 (54.633)	lr 0.00028
Warmup Train [39][290/3239]	Time 0.203 (0.302)	Data 0.001 (0.074)	Loss 4.1248 (4.0005)	Top-1 acc 26.953 (31.133)	Top-5 acc 51.953 (54.722)	lr 0.00028
Warmup Train [39][300/3239]	Time 0.249 (0.299)	Data 0.001 (0.072)	Loss 4.0641 (4.0003)	Top-1 acc 30.469 (31.133)	Top-5 acc 53.906 (54.694)	lr 0.00028
Warmup Train [39][310/3239]	Time 0.171 (0.297)	Data 0.001 (0.069)	Loss 4.0515 (4.0010)	Top-1 acc 30.078 (31.141)	Top-5 acc 50.781 (54.675)	lr 0.00028
Warmup Train [39][320/3239]	Time 0.241 (0.294)	Data 0.001 (0.067)	Loss 4.0013 (4.0018)	Top-1 acc 30.469 (31.111)	Top-5 acc 52.734 (54.610)	lr 0.00028
Warmup Train [39][330/3239]	Time 0.185 (0.292)	Data 0.001 (0.065)	Loss 3.8858 (4.0032)	Top-1 acc 37.109 (31.097)	Top-5 acc 57.031 (54.569)	lr 0.00028
Warmup Train [39][340/3239]	Time 0.226 (0.290)	Data 0.001 (0.063)	Loss 3.8722 (4.0026)	Top-1 acc 33.984 (31.098)	Top-5 acc 57.031 (54.567)	lr 0.00028
Warmup Train [39][350/3239]	Time 0.153 (0.288)	Data 0.001 (0.062)	Loss 4.0418 (4.0025)	Top-1 acc 29.688 (31.112)	Top-5 acc 55.859 (54.598)	lr 0.00028
Warmup Train [39][360/3239]	Time 0.224 (0.286)	Data 0.001 (0.060)	Loss 4.0846 (4.0027)	Top-1 acc 31.250 (31.131)	Top-5 acc 50.781 (54.591)	lr 0.00027
Warmup Train [39][370/3239]	Time 0.158 (0.284)	Data 0.002 (0.058)	Loss 3.9027 (4.0013)	Top-1 acc 33.594 (31.166)	Top-5 acc 54.688 (54.626)	lr 0.00027
Warmup Train [39][380/3239]	Time 0.240 (0.283)	Data 0.002 (0.057)	Loss 4.0429 (4.0025)	Top-1 acc 31.250 (31.120)	Top-5 acc 55.469 (54.565)	lr 0.00027
Warmup Train [39][390/3239]	Time 0.212 (0.282)	Data 0.001 (0.056)	Loss 3.8520 (4.0013)	Top-1 acc 35.547 (31.133)	Top-5 acc 58.594 (54.585)	lr 0.00027
Warmup Train [39][400/3239]	Time 0.320 (0.280)	Data 0.002 (0.054)	Loss 4.2817 (4.0029)	Top-1 acc 26.172 (31.080)	Top-5 acc 50.000 (54.555)	lr 0.00027
Warmup Train [39][410/3239]	Time 0.209 (0.279)	Data 0.001 (0.053)	Loss 3.9685 (4.0020)	Top-1 acc 33.594 (31.110)	Top-5 acc 55.078 (54.581)	lr 0.00027
Warmup Train [39][420/3239]	Time 0.186 (0.277)	Data 0.001 (0.052)	Loss 4.1470 (4.0018)	Top-1 acc 27.344 (31.152)	Top-5 acc 51.172 (54.626)	lr 0.00027
Warmup Train [39][430/3239]	Time 0.267 (0.276)	Data 0.001 (0.051)	Loss 3.7032 (4.0009)	Top-1 acc 37.500 (31.160)	Top-5 acc 61.328 (54.641)	lr 0.00027
Warmup Train [39][440/3239]	Time 0.210 (0.275)	Data 0.001 (0.050)	Loss 3.9419 (4.0009)	Top-1 acc 31.641 (31.155)	Top-5 acc 53.906 (54.646)	lr 0.00027
Warmup Train [39][450/3239]	Time 0.207 (0.274)	Data 0.001 (0.048)	Loss 3.6542 (4.0001)	Top-1 acc 34.766 (31.165)	Top-5 acc 63.281 (54.672)	lr 0.00027
Warmup Train [39][460/3239]	Time 0.235 (0.273)	Data 0.001 (0.047)	Loss 4.1100 (3.9998)	Top-1 acc 31.250 (31.174)	Top-5 acc 50.781 (54.667)	lr 0.00027
Warmup Train [39][470/3239]	Time 0.268 (0.271)	Data 0.001 (0.046)	Loss 3.8640 (3.9993)	Top-1 acc 32.422 (31.193)	Top-5 acc 58.594 (54.686)	lr 0.00026
Warmup Train [39][480/3239]	Time 0.214 (0.271)	Data 0.001 (0.046)	Loss 3.8242 (3.9994)	Top-1 acc 35.547 (31.179)	Top-5 acc 60.938 (54.686)	lr 0.00026
Warmup Train [39][490/3239]	Time 0.328 (0.269)	Data 0.001 (0.045)	Loss 3.9913 (3.9995)	Top-1 acc 32.812 (31.185)	Top-5 acc 56.250 (54.702)	lr 0.00026
Warmup Train [39][500/3239]	Time 0.315 (0.268)	Data 0.001 (0.044)	Loss 4.0193 (3.9986)	Top-1 acc 31.250 (31.226)	Top-5 acc 51.562 (54.709)	lr 0.00026
Warmup Train [39][510/3239]	Time 0.221 (0.267)	Data 0.001 (0.043)	Loss 3.8252 (3.9978)	Top-1 acc 34.375 (31.243)	Top-5 acc 56.250 (54.728)	lr 0.00026
Warmup Train [39][520/3239]	Time 0.224 (0.267)	Data 0.001 (0.042)	Loss 4.0613 (3.9978)	Top-1 acc 29.688 (31.232)	Top-5 acc 52.734 (54.729)	lr 0.00026
Warmup Train [39][530/3239]	Time 0.138 (0.266)	Data 0.001 (0.042)	Loss 4.0459 (3.9987)	Top-1 acc 28.906 (31.221)	Top-5 acc 58.203 (54.716)	lr 0.00026
Warmup Train [39][540/3239]	Time 0.159 (0.265)	Data 0.001 (0.041)	Loss 3.9734 (3.9984)	Top-1 acc 31.250 (31.241)	Top-5 acc 55.859 (54.724)	lr 0.00026
Warmup Train [39][550/3239]	Time 0.200 (0.264)	Data 0.003 (0.040)	Loss 3.7575 (3.9976)	Top-1 acc 33.594 (31.263)	Top-5 acc 61.328 (54.738)	lr 0.00026
Warmup Train [39][560/3239]	Time 0.173 (0.264)	Data 0.001 (0.040)	Loss 4.0698 (3.9975)	Top-1 acc 27.734 (31.244)	Top-5 acc 50.391 (54.731)	lr 0.00026
Warmup Train [39][570/3239]	Time 0.194 (0.263)	Data 0.001 (0.039)	Loss 4.0411 (3.9976)	Top-1 acc 24.609 (31.206)	Top-5 acc 57.812 (54.722)	lr 0.00026
Warmup Train [39][580/3239]	Time 0.182 (0.262)	Data 0.001 (0.038)	Loss 3.9877 (3.9975)	Top-1 acc 33.984 (31.228)	Top-5 acc 51.953 (54.733)	lr 0.00026
Warmup Train [39][590/3239]	Time 0.233 (0.261)	Data 0.002 (0.038)	Loss 4.1141 (3.9976)	Top-1 acc 26.953 (31.224)	Top-5 acc 53.906 (54.737)	lr 0.00025
Warmup Train [39][600/3239]	Time 0.317 (0.261)	Data 0.001 (0.037)	Loss 3.9390 (3.9980)	Top-1 acc 33.203 (31.219)	Top-5 acc 53.516 (54.740)	lr 0.00025
Warmup Train [39][610/3239]	Time 0.320 (0.260)	Data 0.001 (0.037)	Loss 3.8628 (3.9988)	Top-1 acc 34.766 (31.196)	Top-5 acc 56.641 (54.691)	lr 0.00025
Warmup Train [39][620/3239]	Time 0.231 (0.259)	Data 0.001 (0.036)	Loss 4.1331 (3.9980)	Top-1 acc 29.297 (31.212)	Top-5 acc 50.391 (54.727)	lr 0.00025
Warmup Train [39][630/3239]	Time 0.247 (0.259)	Data 0.001 (0.036)	Loss 3.9963 (3.9989)	Top-1 acc 31.641 (31.193)	Top-5 acc 57.031 (54.700)	lr 0.00025
Warmup Train [39][640/3239]	Time 0.197 (0.258)	Data 0.001 (0.035)	Loss 4.0989 (3.9991)	Top-1 acc 29.297 (31.203)	Top-5 acc 50.391 (54.680)	lr 0.00025
Warmup Train [39][650/3239]	Time 0.214 (0.258)	Data 0.001 (0.035)	Loss 3.9611 (4.0004)	Top-1 acc 32.812 (31.163)	Top-5 acc 54.688 (54.640)	lr 0.00025
Warmup Train [39][660/3239]	Time 0.164 (0.257)	Data 0.002 (0.034)	Loss 3.9673 (4.0003)	Top-1 acc 32.031 (31.181)	Top-5 acc 55.469 (54.648)	lr 0.00025
Warmup Train [39][670/3239]	Time 0.199 (0.256)	Data 0.001 (0.034)	Loss 3.8660 (4.0003)	Top-1 acc 33.203 (31.167)	Top-5 acc 57.031 (54.639)	lr 0.00025
Warmup Train [39][680/3239]	Time 0.243 (0.256)	Data 0.001 (0.033)	Loss 4.2174 (4.0011)	Top-1 acc 25.391 (31.143)	Top-5 acc 48.828 (54.632)	lr 0.00025
Warmup Train [39][690/3239]	Time 0.240 (0.255)	Data 0.001 (0.033)	Loss 3.9953 (4.0006)	Top-1 acc 31.641 (31.163)	Top-5 acc 53.125 (54.646)	lr 0.00025
Warmup Train [39][700/3239]	Time 0.294 (0.255)	Data 0.001 (0.032)	Loss 4.0011 (4.0006)	Top-1 acc 30.859 (31.163)	Top-5 acc 55.469 (54.650)	lr 0.00024
Warmup Train [39][710/3239]	Time 0.335 (0.254)	Data 0.001 (0.032)	Loss 4.0228 (4.0003)	Top-1 acc 31.641 (31.169)	Top-5 acc 50.391 (54.656)	lr 0.00024
Warmup Train [39][720/3239]	Time 0.286 (0.254)	Data 0.001 (0.031)	Loss 3.8900 (4.0007)	Top-1 acc 35.156 (31.174)	Top-5 acc 57.422 (54.654)	lr 0.00024
Warmup Train [39][730/3239]	Time 0.244 (0.253)	Data 0.001 (0.031)	Loss 3.9694 (3.9996)	Top-1 acc 33.203 (31.198)	Top-5 acc 55.859 (54.690)	lr 0.00024
Warmup Train [39][740/3239]	Time 0.253 (0.253)	Data 0.001 (0.031)	Loss 4.1344 (4.0002)	Top-1 acc 31.250 (31.178)	Top-5 acc 52.734 (54.671)	lr 0.00024
Warmup Train [39][750/3239]	Time 0.237 (0.252)	Data 0.002 (0.030)	Loss 4.0399 (4.0009)	Top-1 acc 32.422 (31.177)	Top-5 acc 53.516 (54.652)	lr 0.00024
Warmup Train [39][760/3239]	Time 0.213 (0.252)	Data 0.001 (0.030)	Loss 3.9668 (4.0009)	Top-1 acc 33.203 (31.171)	Top-5 acc 55.078 (54.644)	lr 0.00024
Warmup Train [39][770/3239]	Time 0.136 (0.251)	Data 0.001 (0.030)	Loss 3.8986 (4.0007)	Top-1 acc 35.156 (31.192)	Top-5 acc 58.594 (54.645)	lr 0.00024
Warmup Train [39][780/3239]	Time 0.143 (0.251)	Data 0.002 (0.029)	Loss 4.0338 (4.0011)	Top-1 acc 33.594 (31.190)	Top-5 acc 57.031 (54.641)	lr 0.00024
Warmup Train [39][790/3239]	Time 0.209 (0.251)	Data 0.002 (0.029)	Loss 3.9284 (4.0010)	Top-1 acc 32.031 (31.204)	Top-5 acc 53.125 (54.627)	lr 0.00024
Warmup Train [39][800/3239]	Time 0.240 (0.250)	Data 0.002 (0.029)	Loss 4.0266 (4.0008)	Top-1 acc 27.344 (31.198)	Top-5 acc 53.125 (54.629)	lr 0.00024
Warmup Train [39][810/3239]	Time 0.305 (0.250)	Data 0.001 (0.028)	Loss 4.1617 (4.0007)	Top-1 acc 28.906 (31.209)	Top-5 acc 50.781 (54.633)	lr 0.00024
Warmup Train [39][820/3239]	Time 0.216 (0.250)	Data 0.001 (0.028)	Loss 3.8173 (4.0003)	Top-1 acc 37.500 (31.221)	Top-5 acc 61.328 (54.642)	lr 0.00023
Warmup Train [39][830/3239]	Time 0.231 (0.249)	Data 0.002 (0.028)	Loss 3.9971 (4.0005)	Top-1 acc 33.594 (31.217)	Top-5 acc 55.078 (54.636)	lr 0.00023
Warmup Train [39][840/3239]	Time 0.205 (0.249)	Data 0.001 (0.027)	Loss 3.9940 (4.0005)	Top-1 acc 33.594 (31.237)	Top-5 acc 55.078 (54.645)	lr 0.00023
Warmup Train [39][850/3239]	Time 0.220 (0.249)	Data 0.001 (0.027)	Loss 3.8008 (4.0004)	Top-1 acc 34.375 (31.247)	Top-5 acc 58.984 (54.645)	lr 0.00023
Warmup Train [39][860/3239]	Time 0.153 (0.249)	Data 0.001 (0.027)	Loss 3.9600 (3.9997)	Top-1 acc 34.375 (31.258)	Top-5 acc 59.766 (54.662)	lr 0.00023
Warmup Train [39][870/3239]	Time 0.202 (0.248)	Data 0.001 (0.026)	Loss 4.1765 (4.0003)	Top-1 acc 32.812 (31.256)	Top-5 acc 48.438 (54.648)	lr 0.00023
Warmup Train [39][880/3239]	Time 0.238 (0.248)	Data 0.001 (0.026)	Loss 3.9387 (4.0006)	Top-1 acc 30.859 (31.239)	Top-5 acc 58.984 (54.644)	lr 0.00023
Warmup Train [39][890/3239]	Time 0.194 (0.247)	Data 0.002 (0.026)	Loss 3.9075 (4.0008)	Top-1 acc 32.812 (31.239)	Top-5 acc 57.031 (54.628)	lr 0.00023
Warmup Train [39][900/3239]	Time 0.192 (0.247)	Data 0.002 (0.026)	Loss 4.1103 (4.0005)	Top-1 acc 27.344 (31.254)	Top-5 acc 53.906 (54.627)	lr 0.00023
Warmup Train [39][910/3239]	Time 0.326 (0.247)	Data 0.001 (0.025)	Loss 4.0361 (4.0003)	Top-1 acc 33.984 (31.268)	Top-5 acc 55.469 (54.632)	lr 0.00023
Warmup Train [39][920/3239]	Time 0.289 (0.247)	Data 0.001 (0.025)	Loss 3.9247 (4.0005)	Top-1 acc 32.812 (31.262)	Top-5 acc 56.641 (54.629)	lr 0.00023
Warmup Train [39][930/3239]	Time 0.189 (0.246)	Data 0.001 (0.025)	Loss 3.9939 (4.0003)	Top-1 acc 31.641 (31.279)	Top-5 acc 53.516 (54.638)	lr 0.00023
Warmup Train [39][940/3239]	Time 0.241 (0.246)	Data 0.002 (0.025)	Loss 3.9096 (4.0002)	Top-1 acc 32.422 (31.274)	Top-5 acc 56.250 (54.639)	lr 0.00023
Warmup Train [39][950/3239]	Time 0.233 (0.246)	Data 0.001 (0.024)	Loss 4.1720 (4.0005)	Top-1 acc 26.562 (31.268)	Top-5 acc 50.391 (54.631)	lr 0.00022
Warmup Train [39][960/3239]	Time 0.214 (0.246)	Data 0.001 (0.024)	Loss 3.8954 (4.0002)	Top-1 acc 32.031 (31.269)	Top-5 acc 56.641 (54.643)	lr 0.00022
Warmup Train [39][970/3239]	Time 0.177 (0.245)	Data 0.001 (0.024)	Loss 3.9874 (4.0005)	Top-1 acc 33.594 (31.255)	Top-5 acc 57.422 (54.634)	lr 0.00022
Warmup Train [39][980/3239]	Time 0.184 (0.245)	Data 0.001 (0.024)	Loss 3.9108 (4.0007)	Top-1 acc 33.203 (31.250)	Top-5 acc 58.984 (54.629)	lr 0.00022
Warmup Train [39][990/3239]	Time 0.213 (0.245)	Data 0.001 (0.024)	Loss 4.0811 (4.0011)	Top-1 acc 29.297 (31.251)	Top-5 acc 50.000 (54.617)	lr 0.00022
Warmup Train [39][1000/3239]	Time 0.232 (0.244)	Data 0.003 (0.023)	Loss 4.1447 (4.0012)	Top-1 acc 28.906 (31.238)	Top-5 acc 49.609 (54.609)	lr 0.00022
Warmup Train [39][1010/3239]	Time 0.206 (0.244)	Data 0.003 (0.023)	Loss 4.0086 (4.0009)	Top-1 acc 32.031 (31.238)	Top-5 acc 57.031 (54.615)	lr 0.00022
Warmup Train [39][1020/3239]	Time 0.301 (0.244)	Data 0.001 (0.023)	Loss 4.0082 (4.0002)	Top-1 acc 32.812 (31.263)	Top-5 acc 54.297 (54.633)	lr 0.00022
Warmup Train [39][1030/3239]	Time 0.192 (0.244)	Data 0.001 (0.023)	Loss 3.9411 (4.0006)	Top-1 acc 28.906 (31.263)	Top-5 acc 57.031 (54.628)	lr 0.00022
Warmup Train [39][1040/3239]	Time 0.198 (0.244)	Data 0.001 (0.023)	Loss 4.2210 (4.0002)	Top-1 acc 25.000 (31.265)	Top-5 acc 50.781 (54.639)	lr 0.00022
Warmup Train [39][1050/3239]	Time 0.188 (0.243)	Data 0.001 (0.022)	Loss 4.1029 (3.9999)	Top-1 acc 28.906 (31.267)	Top-5 acc 53.125 (54.647)	lr 0.00022
Warmup Train [39][1060/3239]	Time 0.254 (0.243)	Data 0.001 (0.022)	Loss 4.0521 (4.0001)	Top-1 acc 30.859 (31.269)	Top-5 acc 52.344 (54.636)	lr 0.00022
Warmup Train [39][1070/3239]	Time 0.180 (0.243)	Data 0.002 (0.022)	Loss 3.8408 (4.0000)	Top-1 acc 33.203 (31.271)	Top-5 acc 59.375 (54.639)	lr 0.00021
Warmup Train [39][1080/3239]	Time 0.284 (0.243)	Data 0.001 (0.022)	Loss 3.8956 (4.0001)	Top-1 acc 36.328 (31.278)	Top-5 acc 57.031 (54.630)	lr 0.00021
Warmup Train [39][1090/3239]	Time 0.179 (0.242)	Data 0.001 (0.022)	Loss 4.2640 (3.9999)	Top-1 acc 23.828 (31.270)	Top-5 acc 51.562 (54.633)	lr 0.00021
Warmup Train [39][1100/3239]	Time 0.136 (0.242)	Data 0.001 (0.021)	Loss 3.9721 (4.0000)	Top-1 acc 31.641 (31.271)	Top-5 acc 53.906 (54.626)	lr 0.00021
Warmup Train [39][1110/3239]	Time 0.231 (0.242)	Data 0.001 (0.021)	Loss 3.9575 (3.9999)	Top-1 acc 28.906 (31.276)	Top-5 acc 56.641 (54.635)	lr 0.00021
Warmup Train [39][1120/3239]	Time 0.199 (0.242)	Data 0.002 (0.021)	Loss 4.1170 (3.9999)	Top-1 acc 27.734 (31.278)	Top-5 acc 54.297 (54.636)	lr 0.00021
Warmup Train [39][1130/3239]	Time 0.300 (0.242)	Data 0.001 (0.021)	Loss 3.9976 (3.9998)	Top-1 acc 32.812 (31.284)	Top-5 acc 55.859 (54.635)	lr 0.00021
Warmup Train [39][1140/3239]	Time 0.193 (0.241)	Data 0.001 (0.021)	Loss 3.9596 (3.9995)	Top-1 acc 31.641 (31.291)	Top-5 acc 56.641 (54.644)	lr 0.00021
Warmup Train [39][1150/3239]	Time 0.222 (0.241)	Data 0.001 (0.021)	Loss 3.9094 (3.9996)	Top-1 acc 32.031 (31.284)	Top-5 acc 56.250 (54.643)	lr 0.00021
Warmup Train [39][1160/3239]	Time 0.195 (0.241)	Data 0.001 (0.021)	Loss 4.0400 (3.9996)	Top-1 acc 35.938 (31.292)	Top-5 acc 54.688 (54.646)	lr 0.00021
Warmup Train [39][1170/3239]	Time 0.188 (0.241)	Data 0.001 (0.020)	Loss 3.8842 (3.9992)	Top-1 acc 32.422 (31.302)	Top-5 acc 58.203 (54.651)	lr 0.00021
Warmup Train [39][1180/3239]	Time 0.138 (0.240)	Data 0.001 (0.020)	Loss 4.1836 (3.9993)	Top-1 acc 26.562 (31.303)	Top-5 acc 49.219 (54.648)	lr 0.00021
Warmup Train [39][1190/3239]	Time 0.178 (0.240)	Data 0.001 (0.020)	Loss 3.9901 (3.9993)	Top-1 acc 32.812 (31.292)	Top-5 acc 53.516 (54.652)	lr 0.00021
Warmup Train [39][1200/3239]	Time 0.199 (0.240)	Data 0.001 (0.020)	Loss 4.0452 (3.9999)	Top-1 acc 30.859 (31.273)	Top-5 acc 52.734 (54.638)	lr 0.00020
Warmup Train [39][1210/3239]	Time 0.174 (0.240)	Data 0.001 (0.020)	Loss 4.0492 (3.9998)	Top-1 acc 28.906 (31.266)	Top-5 acc 55.859 (54.643)	lr 0.00020
Warmup Train [39][1220/3239]	Time 0.196 (0.240)	Data 0.001 (0.020)	Loss 4.1817 (4.0008)	Top-1 acc 27.344 (31.247)	Top-5 acc 49.609 (54.613)	lr 0.00020
Warmup Train [39][1230/3239]	Time 0.298 (0.240)	Data 0.001 (0.020)	Loss 4.0419 (4.0008)	Top-1 acc 28.516 (31.253)	Top-5 acc 51.953 (54.609)	lr 0.00020
Warmup Train [39][1240/3239]	Time 0.200 (0.240)	Data 0.001 (0.019)	Loss 4.1413 (4.0005)	Top-1 acc 28.125 (31.254)	Top-5 acc 52.344 (54.608)	lr 0.00020
Warmup Train [39][1250/3239]	Time 0.185 (0.240)	Data 0.001 (0.019)	Loss 4.0953 (4.0006)	Top-1 acc 25.781 (31.258)	Top-5 acc 51.953 (54.607)	lr 0.00020
Warmup Train [39][1260/3239]	Time 0.161 (0.239)	Data 0.001 (0.019)	Loss 3.9662 (4.0009)	Top-1 acc 30.469 (31.257)	Top-5 acc 57.422 (54.609)	lr 0.00020
Warmup Train [39][1270/3239]	Time 0.239 (0.239)	Data 0.001 (0.019)	Loss 4.0526 (4.0008)	Top-1 acc 31.250 (31.255)	Top-5 acc 55.859 (54.613)	lr 0.00020
Warmup Train [39][1280/3239]	Time 0.210 (0.240)	Data 0.001 (0.019)	Loss 3.8509 (4.0008)	Top-1 acc 35.547 (31.267)	Top-5 acc 55.859 (54.617)	lr 0.00020
Warmup Train [39][1290/3239]	Time 0.245 (0.240)	Data 0.003 (0.019)	Loss 4.1003 (4.0000)	Top-1 acc 31.641 (31.282)	Top-5 acc 54.297 (54.631)	lr 0.00020
Warmup Train [39][1300/3239]	Time 0.180 (0.239)	Data 0.001 (0.019)	Loss 3.8056 (3.9998)	Top-1 acc 36.719 (31.288)	Top-5 acc 60.938 (54.635)	lr 0.00020
Warmup Train [39][1310/3239]	Time 0.204 (0.239)	Data 0.002 (0.019)	Loss 4.0048 (3.9999)	Top-1 acc 30.078 (31.278)	Top-5 acc 50.781 (54.631)	lr 0.00020
Warmup Train [39][1320/3239]	Time 0.152 (0.239)	Data 0.001 (0.018)	Loss 4.0268 (3.9999)	Top-1 acc 31.641 (31.281)	Top-5 acc 55.469 (54.630)	lr 0.00020
Warmup Train [39][1330/3239]	Time 0.332 (0.239)	Data 0.001 (0.018)	Loss 4.1040 (4.0003)	Top-1 acc 26.953 (31.274)	Top-5 acc 51.562 (54.620)	lr 0.00019
Warmup Train [39][1340/3239]	Time 0.176 (0.239)	Data 0.001 (0.018)	Loss 4.0419 (4.0001)	Top-1 acc 28.516 (31.280)	Top-5 acc 54.297 (54.625)	lr 0.00019
Warmup Train [39][1350/3239]	Time 0.208 (0.239)	Data 0.001 (0.018)	Loss 4.0370 (4.0000)	Top-1 acc 28.516 (31.282)	Top-5 acc 51.953 (54.625)	lr 0.00019
Warmup Train [39][1360/3239]	Time 0.207 (0.238)	Data 0.001 (0.018)	Loss 3.9110 (4.0001)	Top-1 acc 30.859 (31.286)	Top-5 acc 59.375 (54.619)	lr 0.00019
Warmup Train [39][1370/3239]	Time 0.256 (0.238)	Data 0.001 (0.018)	Loss 4.1301 (4.0000)	Top-1 acc 30.078 (31.286)	Top-5 acc 52.734 (54.626)	lr 0.00019
Warmup Train [39][1380/3239]	Time 0.330 (0.238)	Data 0.001 (0.018)	Loss 4.1051 (3.9997)	Top-1 acc 28.906 (31.289)	Top-5 acc 49.219 (54.632)	lr 0.00019
Warmup Train [39][1390/3239]	Time 0.195 (0.239)	Data 0.001 (0.018)	Loss 4.0488 (4.0001)	Top-1 acc 27.344 (31.281)	Top-5 acc 52.734 (54.630)	lr 0.00019
Warmup Train [39][1400/3239]	Time 0.202 (0.239)	Data 0.002 (0.018)	Loss 3.7238 (3.9999)	Top-1 acc 36.328 (31.283)	Top-5 acc 59.375 (54.632)	lr 0.00019
Warmup Train [39][1410/3239]	Time 0.205 (0.239)	Data 0.002 (0.018)	Loss 3.9409 (4.0000)	Top-1 acc 33.203 (31.280)	Top-5 acc 57.422 (54.634)	lr 0.00019
Warmup Train [39][1420/3239]	Time 0.342 (0.239)	Data 0.002 (0.017)	Loss 3.9813 (4.0000)	Top-1 acc 26.172 (31.271)	Top-5 acc 55.078 (54.627)	lr 0.00019
Warmup Train [39][1430/3239]	Time 0.286 (0.239)	Data 0.001 (0.017)	Loss 4.0350 (4.0002)	Top-1 acc 30.078 (31.269)	Top-5 acc 51.562 (54.620)	lr 0.00019
Warmup Train [39][1440/3239]	Time 0.163 (0.239)	Data 0.002 (0.017)	Loss 4.1425 (4.0004)	Top-1 acc 30.859 (31.262)	Top-5 acc 51.172 (54.612)	lr 0.00019
Warmup Train [39][1450/3239]	Time 0.213 (0.239)	Data 0.002 (0.017)	Loss 4.2668 (4.0006)	Top-1 acc 29.297 (31.255)	Top-5 acc 50.391 (54.608)	lr 0.00019
Warmup Train [39][1460/3239]	Time 0.210 (0.239)	Data 0.001 (0.017)	Loss 3.9682 (4.0006)	Top-1 acc 32.812 (31.257)	Top-5 acc 56.641 (54.605)	lr 0.00018
Warmup Train [39][1470/3239]	Time 0.227 (0.239)	Data 0.002 (0.017)	Loss 3.9113 (4.0000)	Top-1 acc 32.031 (31.271)	Top-5 acc 55.859 (54.617)	lr 0.00018
Warmup Train [39][1480/3239]	Time 0.220 (0.239)	Data 0.003 (0.017)	Loss 3.9404 (4.0000)	Top-1 acc 33.984 (31.278)	Top-5 acc 54.688 (54.623)	lr 0.00018
Warmup Train [39][1490/3239]	Time 0.137 (0.239)	Data 0.001 (0.017)	Loss 4.2204 (4.0001)	Top-1 acc 28.125 (31.275)	Top-5 acc 51.562 (54.627)	lr 0.00018
Warmup Train [39][1500/3239]	Time 0.145 (0.239)	Data 0.001 (0.017)	Loss 4.1135 (4.0001)	Top-1 acc 34.375 (31.276)	Top-5 acc 53.516 (54.634)	lr 0.00018
Warmup Train [39][1510/3239]	Time 0.231 (0.239)	Data 0.001 (0.017)	Loss 3.8494 (4.0001)	Top-1 acc 32.812 (31.279)	Top-5 acc 56.250 (54.633)	lr 0.00018
Warmup Train [39][1520/3239]	Time 0.244 (0.239)	Data 0.002 (0.017)	Loss 4.0731 (4.0000)	Top-1 acc 29.688 (31.275)	Top-5 acc 53.125 (54.635)	lr 0.00018
Warmup Train [39][1530/3239]	Time 0.321 (0.239)	Data 0.001 (0.016)	Loss 3.9035 (3.9998)	Top-1 acc 35.156 (31.283)	Top-5 acc 58.594 (54.642)	lr 0.00018
Warmup Train [39][1540/3239]	Time 0.203 (0.239)	Data 0.001 (0.016)	Loss 4.1906 (3.9999)	Top-1 acc 30.078 (31.284)	Top-5 acc 49.609 (54.636)	lr 0.00018
Warmup Train [39][1550/3239]	Time 0.202 (0.239)	Data 0.002 (0.016)	Loss 4.0877 (4.0003)	Top-1 acc 26.562 (31.269)	Top-5 acc 53.125 (54.624)	lr 0.00018
Warmup Train [39][1560/3239]	Time 0.211 (0.239)	Data 0.001 (0.016)	Loss 3.9634 (4.0001)	Top-1 acc 31.250 (31.270)	Top-5 acc 55.469 (54.625)	lr 0.00018
Warmup Train [39][1570/3239]	Time 0.171 (0.238)	Data 0.001 (0.016)	Loss 4.2526 (4.0003)	Top-1 acc 25.391 (31.269)	Top-5 acc 48.047 (54.623)	lr 0.00018
Warmup Train [39][1580/3239]	Time 0.242 (0.238)	Data 0.001 (0.016)	Loss 4.1373 (3.9999)	Top-1 acc 29.688 (31.280)	Top-5 acc 51.953 (54.629)	lr 0.00018
Warmup Train [39][1590/3239]	Time 0.202 (0.238)	Data 0.001 (0.016)	Loss 4.0519 (4.0000)	Top-1 acc 28.516 (31.274)	Top-5 acc 53.516 (54.630)	lr 0.00018
Warmup Train [39][1600/3239]	Time 0.186 (0.238)	Data 0.001 (0.016)	Loss 3.9555 (3.9997)	Top-1 acc 35.156 (31.282)	Top-5 acc 53.125 (54.632)	lr 0.00017
Warmup Train [39][1610/3239]	Time 0.211 (0.238)	Data 0.003 (0.016)	Loss 3.9230 (3.9999)	Top-1 acc 32.031 (31.272)	Top-5 acc 51.562 (54.624)	lr 0.00017
Warmup Train [39][1620/3239]	Time 0.204 (0.238)	Data 0.001 (0.016)	Loss 4.0485 (3.9995)	Top-1 acc 29.688 (31.276)	Top-5 acc 54.297 (54.633)	lr 0.00017
Warmup Train [39][1630/3239]	Time 0.297 (0.238)	Data 0.002 (0.016)	Loss 3.9585 (3.9996)	Top-1 acc 32.422 (31.271)	Top-5 acc 55.859 (54.631)	lr 0.00017
Warmup Train [39][1640/3239]	Time 0.229 (0.238)	Data 0.001 (0.016)	Loss 3.9683 (3.9994)	Top-1 acc 35.547 (31.274)	Top-5 acc 53.906 (54.628)	lr 0.00017
Warmup Train [39][1650/3239]	Time 0.185 (0.238)	Data 0.001 (0.015)	Loss 4.0589 (3.9994)	Top-1 acc 28.125 (31.266)	Top-5 acc 51.953 (54.625)	lr 0.00017
Warmup Train [39][1660/3239]	Time 0.149 (0.237)	Data 0.002 (0.015)	Loss 4.0841 (3.9996)	Top-1 acc 33.203 (31.261)	Top-5 acc 53.516 (54.626)	lr 0.00017
Warmup Train [39][1670/3239]	Time 0.182 (0.237)	Data 0.001 (0.015)	Loss 4.0131 (4.0000)	Top-1 acc 28.516 (31.250)	Top-5 acc 53.906 (54.618)	lr 0.00017
Warmup Train [39][1680/3239]	Time 0.229 (0.237)	Data 0.001 (0.015)	Loss 3.7835 (4.0000)	Top-1 acc 35.156 (31.249)	Top-5 acc 54.297 (54.616)	lr 0.00017
Warmup Train [39][1690/3239]	Time 0.186 (0.237)	Data 0.001 (0.015)	Loss 4.0193 (4.0003)	Top-1 acc 30.078 (31.238)	Top-5 acc 54.297 (54.611)	lr 0.00017
Warmup Train [39][1700/3239]	Time 0.206 (0.237)	Data 0.001 (0.015)	Loss 3.9763 (4.0002)	Top-1 acc 32.812 (31.242)	Top-5 acc 56.250 (54.611)	lr 0.00017
Warmup Train [39][1710/3239]	Time 0.227 (0.237)	Data 0.001 (0.015)	Loss 4.0419 (4.0002)	Top-1 acc 28.516 (31.244)	Top-5 acc 54.297 (54.611)	lr 0.00017
Warmup Train [39][1720/3239]	Time 0.231 (0.237)	Data 0.002 (0.015)	Loss 3.9981 (4.0001)	Top-1 acc 31.250 (31.243)	Top-5 acc 53.125 (54.617)	lr 0.00017
Warmup Train [39][1730/3239]	Time 0.340 (0.237)	Data 0.001 (0.015)	Loss 3.9632 (3.9999)	Top-1 acc 32.422 (31.245)	Top-5 acc 51.172 (54.618)	lr 0.00017
Warmup Train [39][1740/3239]	Time 0.254 (0.237)	Data 0.001 (0.015)	Loss 3.9732 (3.9998)	Top-1 acc 32.812 (31.243)	Top-5 acc 55.469 (54.621)	lr 0.00016
Warmup Train [39][1750/3239]	Time 0.164 (0.237)	Data 0.001 (0.015)	Loss 4.0810 (3.9998)	Top-1 acc 32.031 (31.244)	Top-5 acc 53.516 (54.619)	lr 0.00016
Warmup Train [39][1760/3239]	Time 0.196 (0.237)	Data 0.001 (0.015)	Loss 3.9822 (3.9995)	Top-1 acc 32.031 (31.248)	Top-5 acc 57.031 (54.622)	lr 0.00016
Warmup Train [39][1770/3239]	Time 0.274 (0.237)	Data 0.001 (0.015)	Loss 4.0744 (3.9995)	Top-1 acc 26.562 (31.244)	Top-5 acc 54.297 (54.628)	lr 0.00016
Warmup Train [39][1780/3239]	Time 0.181 (0.237)	Data 0.001 (0.015)	Loss 3.8965 (3.9997)	Top-1 acc 34.375 (31.241)	Top-5 acc 53.516 (54.625)	lr 0.00016
Warmup Train [39][1790/3239]	Time 0.225 (0.236)	Data 0.001 (0.014)	Loss 4.0212 (3.9998)	Top-1 acc 32.031 (31.240)	Top-5 acc 54.688 (54.622)	lr 0.00016
Warmup Train [39][1800/3239]	Time 0.208 (0.236)	Data 0.002 (0.014)	Loss 4.1136 (4.0000)	Top-1 acc 28.125 (31.235)	Top-5 acc 51.953 (54.617)	lr 0.00016
Warmup Train [39][1810/3239]	Time 0.201 (0.236)	Data 0.001 (0.014)	Loss 3.8648 (4.0000)	Top-1 acc 33.984 (31.239)	Top-5 acc 54.297 (54.614)	lr 0.00016
Warmup Train [39][1820/3239]	Time 0.298 (0.236)	Data 0.001 (0.014)	Loss 4.2215 (4.0003)	Top-1 acc 28.125 (31.232)	Top-5 acc 51.953 (54.608)	lr 0.00016
Warmup Train [39][1830/3239]	Time 0.186 (0.236)	Data 0.001 (0.014)	Loss 3.9605 (4.0002)	Top-1 acc 30.469 (31.234)	Top-5 acc 53.516 (54.609)	lr 0.00016
Warmup Train [39][1840/3239]	Time 0.299 (0.236)	Data 0.001 (0.014)	Loss 4.0460 (4.0006)	Top-1 acc 26.953 (31.229)	Top-5 acc 51.562 (54.606)	lr 0.00016
Warmup Train [39][1850/3239]	Time 0.320 (0.236)	Data 0.001 (0.014)	Loss 3.9051 (4.0005)	Top-1 acc 33.984 (31.232)	Top-5 acc 55.078 (54.613)	lr 0.00016
Warmup Train [39][1860/3239]	Time 0.206 (0.236)	Data 0.001 (0.014)	Loss 3.9161 (4.0003)	Top-1 acc 30.859 (31.228)	Top-5 acc 58.984 (54.617)	lr 0.00016
Warmup Train [39][1870/3239]	Time 0.257 (0.236)	Data 0.001 (0.014)	Loss 4.0393 (4.0001)	Top-1 acc 30.859 (31.229)	Top-5 acc 50.781 (54.624)	lr 0.00016
Warmup Train [39][1880/3239]	Time 0.211 (0.236)	Data 0.001 (0.014)	Loss 4.0544 (3.9998)	Top-1 acc 32.422 (31.233)	Top-5 acc 53.516 (54.629)	lr 0.00016
Warmup Train [39][1890/3239]	Time 0.239 (0.236)	Data 0.001 (0.014)	Loss 3.8910 (4.0001)	Top-1 acc 31.641 (31.226)	Top-5 acc 57.031 (54.618)	lr 0.00015
Warmup Train [39][1900/3239]	Time 0.201 (0.236)	Data 0.001 (0.014)	Loss 4.0124 (3.9999)	Top-1 acc 29.297 (31.237)	Top-5 acc 52.734 (54.625)	lr 0.00015
Warmup Train [39][1910/3239]	Time 0.171 (0.235)	Data 0.002 (0.014)	Loss 4.1093 (3.9995)	Top-1 acc 29.297 (31.242)	Top-5 acc 51.562 (54.631)	lr 0.00015
Warmup Train [39][1920/3239]	Time 0.246 (0.235)	Data 0.001 (0.014)	Loss 3.9309 (3.9993)	Top-1 acc 29.297 (31.243)	Top-5 acc 55.469 (54.639)	lr 0.00015
Warmup Train [39][1930/3239]	Time 0.213 (0.235)	Data 0.001 (0.014)	Loss 4.1920 (3.9997)	Top-1 acc 28.516 (31.241)	Top-5 acc 51.562 (54.628)	lr 0.00015
Warmup Train [39][1940/3239]	Time 0.193 (0.235)	Data 0.001 (0.014)	Loss 4.1266 (3.9998)	Top-1 acc 31.641 (31.241)	Top-5 acc 51.172 (54.623)	lr 0.00015
Warmup Train [39][1950/3239]	Time 0.329 (0.235)	Data 0.027 (0.014)	Loss 4.1830 (3.9996)	Top-1 acc 28.125 (31.240)	Top-5 acc 51.562 (54.632)	lr 0.00015
Warmup Train [39][1960/3239]	Time 0.296 (0.235)	Data 0.001 (0.013)	Loss 3.9077 (3.9996)	Top-1 acc 31.641 (31.244)	Top-5 acc 56.250 (54.634)	lr 0.00015
Warmup Train [39][1970/3239]	Time 0.131 (0.235)	Data 0.001 (0.013)	Loss 3.9104 (3.9994)	Top-1 acc 33.984 (31.248)	Top-5 acc 54.688 (54.638)	lr 0.00015
Warmup Train [39][1980/3239]	Time 0.219 (0.235)	Data 0.001 (0.013)	Loss 3.9629 (3.9993)	Top-1 acc 33.203 (31.244)	Top-5 acc 57.031 (54.639)	lr 0.00015
Warmup Train [39][1990/3239]	Time 0.242 (0.235)	Data 0.001 (0.013)	Loss 3.9853 (3.9996)	Top-1 acc 32.422 (31.244)	Top-5 acc 56.250 (54.638)	lr 0.00015
Warmup Train [39][2000/3239]	Time 0.233 (0.235)	Data 0.001 (0.013)	Loss 4.0317 (3.9999)	Top-1 acc 33.203 (31.241)	Top-5 acc 54.688 (54.632)	lr 0.00015
Warmup Train [39][2010/3239]	Time 0.194 (0.235)	Data 0.001 (0.013)	Loss 4.0732 (3.9998)	Top-1 acc 29.297 (31.240)	Top-5 acc 55.469 (54.636)	lr 0.00015
Warmup Train [39][2020/3239]	Time 0.208 (0.235)	Data 0.001 (0.013)	Loss 3.8248 (3.9997)	Top-1 acc 33.984 (31.245)	Top-5 acc 59.375 (54.634)	lr 0.00015
Warmup Train [39][2030/3239]	Time 0.239 (0.235)	Data 0.001 (0.013)	Loss 3.8760 (3.9999)	Top-1 acc 35.938 (31.244)	Top-5 acc 54.688 (54.627)	lr 0.00015
Warmup Train [39][2040/3239]	Time 0.211 (0.235)	Data 0.001 (0.013)	Loss 4.1298 (3.9999)	Top-1 acc 25.781 (31.246)	Top-5 acc 49.609 (54.627)	lr 0.00014
Warmup Train [39][2050/3239]	Time 0.193 (0.235)	Data 0.001 (0.013)	Loss 3.9180 (3.9997)	Top-1 acc 31.250 (31.251)	Top-5 acc 54.688 (54.626)	lr 0.00014
Warmup Train [39][2060/3239]	Time 0.336 (0.235)	Data 0.001 (0.013)	Loss 4.0075 (3.9998)	Top-1 acc 31.641 (31.247)	Top-5 acc 55.078 (54.619)	lr 0.00014
Warmup Train [39][2070/3239]	Time 0.189 (0.234)	Data 0.001 (0.013)	Loss 3.9722 (3.9995)	Top-1 acc 34.375 (31.257)	Top-5 acc 56.641 (54.627)	lr 0.00014
Warmup Train [39][2080/3239]	Time 0.268 (0.234)	Data 0.002 (0.013)	Loss 3.9977 (3.9997)	Top-1 acc 33.594 (31.258)	Top-5 acc 54.688 (54.625)	lr 0.00014
Warmup Train [39][2090/3239]	Time 0.143 (0.234)	Data 0.001 (0.013)	Loss 3.9925 (3.9998)	Top-1 acc 31.641 (31.256)	Top-5 acc 55.469 (54.619)	lr 0.00014
Warmup Train [39][2100/3239]	Time 0.276 (0.234)	Data 0.001 (0.013)	Loss 3.8579 (3.9997)	Top-1 acc 33.203 (31.259)	Top-5 acc 56.641 (54.621)	lr 0.00014
Warmup Train [39][2110/3239]	Time 0.160 (0.234)	Data 0.001 (0.013)	Loss 4.0863 (3.9998)	Top-1 acc 27.344 (31.253)	Top-5 acc 49.219 (54.617)	lr 0.00014
Warmup Train [39][2120/3239]	Time 0.236 (0.234)	Data 0.001 (0.013)	Loss 3.9673 (3.9997)	Top-1 acc 33.594 (31.258)	Top-5 acc 54.297 (54.617)	lr 0.00014
Warmup Train [39][2130/3239]	Time 0.210 (0.234)	Data 0.025 (0.013)	Loss 4.0333 (3.9997)	Top-1 acc 27.344 (31.253)	Top-5 acc 51.953 (54.615)	lr 0.00014
Warmup Train [39][2140/3239]	Time 0.201 (0.234)	Data 0.001 (0.013)	Loss 3.9139 (3.9999)	Top-1 acc 35.156 (31.252)	Top-5 acc 57.422 (54.610)	lr 0.00014
Warmup Train [39][2150/3239]	Time 0.287 (0.234)	Data 0.001 (0.013)	Loss 3.9510 (4.0000)	Top-1 acc 30.469 (31.245)	Top-5 acc 56.250 (54.609)	lr 0.00014
Warmup Train [39][2160/3239]	Time 0.318 (0.234)	Data 0.001 (0.012)	Loss 4.0694 (4.0000)	Top-1 acc 29.297 (31.242)	Top-5 acc 52.734 (54.612)	lr 0.00014
Warmup Train [39][2170/3239]	Time 0.172 (0.234)	Data 0.001 (0.012)	Loss 3.9716 (4.0001)	Top-1 acc 32.031 (31.243)	Top-5 acc 55.469 (54.609)	lr 0.00014
Warmup Train [39][2180/3239]	Time 0.152 (0.234)	Data 0.002 (0.012)	Loss 3.8601 (4.0002)	Top-1 acc 29.688 (31.239)	Top-5 acc 55.859 (54.605)	lr 0.00014
Warmup Train [39][2190/3239]	Time 0.241 (0.234)	Data 0.001 (0.012)	Loss 4.0431 (3.9998)	Top-1 acc 29.297 (31.249)	Top-5 acc 53.906 (54.612)	lr 0.00014
Warmup Train [39][2200/3239]	Time 0.237 (0.234)	Data 0.001 (0.012)	Loss 3.9685 (3.9996)	Top-1 acc 33.203 (31.254)	Top-5 acc 56.641 (54.623)	lr 0.00013
Warmup Train [39][2210/3239]	Time 0.173 (0.234)	Data 0.001 (0.012)	Loss 4.0015 (3.9997)	Top-1 acc 28.125 (31.255)	Top-5 acc 59.766 (54.619)	lr 0.00013
Warmup Train [39][2220/3239]	Time 0.271 (0.234)	Data 0.001 (0.012)	Loss 4.1341 (3.9998)	Top-1 acc 31.641 (31.255)	Top-5 acc 53.906 (54.619)	lr 0.00013
Warmup Train [39][2230/3239]	Time 0.247 (0.234)	Data 0.001 (0.012)	Loss 3.9264 (3.9998)	Top-1 acc 29.688 (31.255)	Top-5 acc 57.422 (54.620)	lr 0.00013
Warmup Train [39][2240/3239]	Time 0.215 (0.233)	Data 0.001 (0.012)	Loss 3.8846 (3.9995)	Top-1 acc 31.641 (31.259)	Top-5 acc 57.031 (54.626)	lr 0.00013
Warmup Train [39][2250/3239]	Time 0.171 (0.233)	Data 0.001 (0.012)	Loss 4.1409 (4.0000)	Top-1 acc 28.906 (31.251)	Top-5 acc 49.609 (54.610)	lr 0.00013
Warmup Train [39][2260/3239]	Time 0.206 (0.233)	Data 0.001 (0.012)	Loss 3.7407 (3.9997)	Top-1 acc 33.984 (31.253)	Top-5 acc 57.422 (54.620)	lr 0.00013
Warmup Train [39][2270/3239]	Time 0.316 (0.233)	Data 0.001 (0.012)	Loss 3.9911 (3.9996)	Top-1 acc 30.859 (31.259)	Top-5 acc 53.906 (54.627)	lr 0.00013
Warmup Train [39][2280/3239]	Time 0.168 (0.233)	Data 0.001 (0.012)	Loss 3.9206 (3.9996)	Top-1 acc 30.859 (31.263)	Top-5 acc 56.250 (54.631)	lr 0.00013
Warmup Train [39][2290/3239]	Time 0.224 (0.233)	Data 0.001 (0.012)	Loss 4.0260 (3.9997)	Top-1 acc 30.859 (31.261)	Top-5 acc 55.078 (54.630)	lr 0.00013
Warmup Train [39][2300/3239]	Time 0.225 (0.233)	Data 0.002 (0.012)	Loss 4.0229 (3.9997)	Top-1 acc 33.594 (31.258)	Top-5 acc 52.344 (54.627)	lr 0.00013
Warmup Train [39][2310/3239]	Time 0.187 (0.233)	Data 0.002 (0.012)	Loss 4.1591 (4.0000)	Top-1 acc 29.297 (31.252)	Top-5 acc 51.953 (54.623)	lr 0.00013
Warmup Train [39][2320/3239]	Time 0.185 (0.233)	Data 0.001 (0.012)	Loss 3.8891 (3.9998)	Top-1 acc 30.859 (31.252)	Top-5 acc 60.156 (54.631)	lr 0.00013
Warmup Train [39][2330/3239]	Time 0.214 (0.233)	Data 0.001 (0.012)	Loss 3.9355 (4.0000)	Top-1 acc 32.031 (31.246)	Top-5 acc 57.031 (54.628)	lr 0.00013
Warmup Train [39][2340/3239]	Time 0.194 (0.233)	Data 0.001 (0.012)	Loss 3.8111 (4.0001)	Top-1 acc 37.891 (31.240)	Top-5 acc 59.766 (54.624)	lr 0.00013
Warmup Train [39][2350/3239]	Time 0.223 (0.233)	Data 0.001 (0.012)	Loss 4.2178 (3.9998)	Top-1 acc 26.172 (31.243)	Top-5 acc 51.172 (54.632)	lr 0.00013
Warmup Train [39][2360/3239]	Time 0.166 (0.233)	Data 0.001 (0.012)	Loss 4.0162 (3.9997)	Top-1 acc 29.688 (31.240)	Top-5 acc 56.641 (54.629)	lr 0.00012
Warmup Train [39][2370/3239]	Time 0.214 (0.233)	Data 0.001 (0.012)	Loss 3.9633 (3.9997)	Top-1 acc 30.469 (31.241)	Top-5 acc 56.250 (54.631)	lr 0.00012
Warmup Train [39][2380/3239]	Time 0.325 (0.233)	Data 0.002 (0.012)	Loss 4.0233 (3.9996)	Top-1 acc 30.859 (31.242)	Top-5 acc 55.859 (54.636)	lr 0.00012
Warmup Train [39][2390/3239]	Time 0.146 (0.233)	Data 0.001 (0.012)	Loss 4.0851 (3.9998)	Top-1 acc 25.781 (31.236)	Top-5 acc 51.953 (54.633)	lr 0.00012
Warmup Train [39][2400/3239]	Time 0.136 (0.233)	Data 0.001 (0.012)	Loss 4.2286 (3.9996)	Top-1 acc 25.000 (31.242)	Top-5 acc 51.953 (54.638)	lr 0.00012
Warmup Train [39][2410/3239]	Time 0.233 (0.233)	Data 0.001 (0.012)	Loss 4.0774 (3.9995)	Top-1 acc 26.562 (31.243)	Top-5 acc 51.562 (54.637)	lr 0.00012
Warmup Train [39][2420/3239]	Time 0.187 (0.233)	Data 0.001 (0.011)	Loss 4.0803 (3.9995)	Top-1 acc 30.078 (31.239)	Top-5 acc 52.344 (54.638)	lr 0.00012
Warmup Train [39][2430/3239]	Time 0.188 (0.233)	Data 0.002 (0.011)	Loss 3.8972 (3.9995)	Top-1 acc 29.297 (31.234)	Top-5 acc 57.031 (54.640)	lr 0.00012
Warmup Train [39][2440/3239]	Time 0.126 (0.232)	Data 0.002 (0.011)	Loss 4.1126 (3.9998)	Top-1 acc 29.297 (31.226)	Top-5 acc 50.781 (54.632)	lr 0.00012
Warmup Train [39][2450/3239]	Time 0.139 (0.232)	Data 0.001 (0.011)	Loss 4.1054 (4.0000)	Top-1 acc 29.297 (31.223)	Top-5 acc 52.344 (54.627)	lr 0.00012
Warmup Train [39][2460/3239]	Time 0.265 (0.232)	Data 0.001 (0.011)	Loss 3.9440 (4.0000)	Top-1 acc 30.859 (31.223)	Top-5 acc 55.469 (54.627)	lr 0.00012
Warmup Train [39][2470/3239]	Time 0.383 (0.232)	Data 0.001 (0.011)	Loss 3.9811 (4.0001)	Top-1 acc 34.375 (31.225)	Top-5 acc 58.984 (54.627)	lr 0.00012
Warmup Train [39][2480/3239]	Time 0.202 (0.232)	Data 0.001 (0.011)	Loss 4.0375 (4.0000)	Top-1 acc 30.078 (31.229)	Top-5 acc 53.906 (54.631)	lr 0.00012
Warmup Train [39][2490/3239]	Time 0.232 (0.232)	Data 0.001 (0.011)	Loss 3.9748 (3.9998)	Top-1 acc 33.203 (31.231)	Top-5 acc 58.984 (54.636)	lr 0.00012
Warmup Train [39][2500/3239]	Time 0.132 (0.232)	Data 0.001 (0.011)	Loss 3.8771 (3.9995)	Top-1 acc 33.984 (31.236)	Top-5 acc 56.250 (54.644)	lr 0.00012
Warmup Train [39][2510/3239]	Time 0.207 (0.232)	Data 0.001 (0.011)	Loss 4.0193 (3.9996)	Top-1 acc 28.516 (31.233)	Top-5 acc 56.250 (54.643)	lr 0.00012
Warmup Train [39][2520/3239]	Time 0.175 (0.232)	Data 0.001 (0.011)	Loss 3.6925 (3.9996)	Top-1 acc 34.375 (31.234)	Top-5 acc 59.375 (54.643)	lr 0.00012
Warmup Train [39][2530/3239]	Time 0.173 (0.232)	Data 0.002 (0.011)	Loss 4.0151 (3.9998)	Top-1 acc 30.859 (31.229)	Top-5 acc 53.906 (54.637)	lr 0.00011
Warmup Train [39][2540/3239]	Time 0.200 (0.232)	Data 0.002 (0.011)	Loss 3.9570 (3.9998)	Top-1 acc 32.031 (31.229)	Top-5 acc 55.078 (54.638)	lr 0.00011
Warmup Train [39][2550/3239]	Time 0.229 (0.232)	Data 0.001 (0.011)	Loss 3.9426 (3.9997)	Top-1 acc 32.031 (31.233)	Top-5 acc 54.688 (54.640)	lr 0.00011
Warmup Train [39][2560/3239]	Time 0.227 (0.232)	Data 0.001 (0.011)	Loss 4.1091 (3.9997)	Top-1 acc 32.812 (31.235)	Top-5 acc 48.828 (54.640)	lr 0.00011
Warmup Train [39][2570/3239]	Time 0.261 (0.232)	Data 0.001 (0.011)	Loss 4.0378 (3.9995)	Top-1 acc 30.078 (31.241)	Top-5 acc 53.125 (54.642)	lr 0.00011
Warmup Train [39][2580/3239]	Time 0.294 (0.232)	Data 0.001 (0.011)	Loss 3.8933 (3.9992)	Top-1 acc 32.812 (31.248)	Top-5 acc 56.641 (54.647)	lr 0.00011
Warmup Train [39][2590/3239]	Time 0.183 (0.232)	Data 0.001 (0.011)	Loss 4.0635 (3.9991)	Top-1 acc 28.125 (31.249)	Top-5 acc 57.031 (54.649)	lr 0.00011
Warmup Train [39][2600/3239]	Time 0.191 (0.232)	Data 0.002 (0.011)	Loss 4.0394 (3.9992)	Top-1 acc 30.078 (31.244)	Top-5 acc 51.562 (54.647)	lr 0.00011
Warmup Train [39][2610/3239]	Time 0.180 (0.232)	Data 0.001 (0.011)	Loss 3.7427 (3.9990)	Top-1 acc 36.719 (31.248)	Top-5 acc 61.328 (54.652)	lr 0.00011
Warmup Train [39][2620/3239]	Time 0.292 (0.232)	Data 0.001 (0.011)	Loss 3.9126 (3.9986)	Top-1 acc 31.250 (31.256)	Top-5 acc 57.031 (54.659)	lr 0.00011
Warmup Train [39][2630/3239]	Time 0.175 (0.232)	Data 0.001 (0.011)	Loss 4.0053 (3.9988)	Top-1 acc 28.516 (31.253)	Top-5 acc 52.344 (54.654)	lr 0.00011
Warmup Train [39][2640/3239]	Time 0.232 (0.232)	Data 0.001 (0.011)	Loss 4.0999 (3.9988)	Top-1 acc 29.688 (31.252)	Top-5 acc 50.391 (54.649)	lr 0.00011
Warmup Train [39][2650/3239]	Time 0.199 (0.232)	Data 0.001 (0.011)	Loss 4.0662 (3.9988)	Top-1 acc 25.391 (31.247)	Top-5 acc 51.172 (54.649)	lr 0.00011
Warmup Train [39][2660/3239]	Time 0.203 (0.232)	Data 0.001 (0.011)	Loss 4.1242 (3.9988)	Top-1 acc 29.688 (31.250)	Top-5 acc 49.609 (54.645)	lr 0.00011
Warmup Train [39][2670/3239]	Time 0.167 (0.231)	Data 0.001 (0.011)	Loss 4.0594 (3.9988)	Top-1 acc 29.297 (31.251)	Top-5 acc 53.906 (54.648)	lr 0.00011
Warmup Train [39][2680/3239]	Time 0.280 (0.231)	Data 0.001 (0.011)	Loss 4.0845 (3.9987)	Top-1 acc 26.953 (31.248)	Top-5 acc 55.859 (54.653)	lr 0.00011
Warmup Train [39][2690/3239]	Time 0.176 (0.231)	Data 0.001 (0.011)	Loss 3.8657 (3.9988)	Top-1 acc 36.328 (31.249)	Top-5 acc 60.156 (54.654)	lr 0.00011
Warmup Train [39][2700/3239]	Time 0.133 (0.231)	Data 0.002 (0.011)	Loss 4.0651 (3.9988)	Top-1 acc 32.031 (31.251)	Top-5 acc 53.125 (54.657)	lr 0.00010
Warmup Train [39][2710/3239]	Time 0.250 (0.231)	Data 0.001 (0.011)	Loss 3.8386 (3.9986)	Top-1 acc 38.672 (31.255)	Top-5 acc 59.375 (54.661)	lr 0.00010
Warmup Train [39][2720/3239]	Time 0.192 (0.231)	Data 0.002 (0.011)	Loss 4.1064 (3.9990)	Top-1 acc 26.562 (31.248)	Top-5 acc 50.000 (54.653)	lr 0.00010
Warmup Train [39][2730/3239]	Time 0.208 (0.231)	Data 0.001 (0.011)	Loss 3.9788 (3.9991)	Top-1 acc 34.375 (31.244)	Top-5 acc 57.031 (54.650)	lr 0.00010
Warmup Train [39][2740/3239]	Time 0.205 (0.231)	Data 0.001 (0.011)	Loss 3.9514 (3.9990)	Top-1 acc 32.812 (31.249)	Top-5 acc 57.031 (54.656)	lr 0.00010
Warmup Train [39][2750/3239]	Time 0.226 (0.231)	Data 0.001 (0.010)	Loss 3.9486 (3.9988)	Top-1 acc 33.594 (31.255)	Top-5 acc 54.688 (54.659)	lr 0.00010
Warmup Train [39][2760/3239]	Time 0.195 (0.231)	Data 0.001 (0.010)	Loss 4.1072 (3.9987)	Top-1 acc 27.734 (31.259)	Top-5 acc 53.125 (54.660)	lr 0.00010
Warmup Train [39][2770/3239]	Time 0.347 (0.231)	Data 0.001 (0.010)	Loss 3.7818 (3.9987)	Top-1 acc 35.547 (31.259)	Top-5 acc 61.328 (54.661)	lr 0.00010
Warmup Train [39][2780/3239]	Time 0.214 (0.231)	Data 0.001 (0.010)	Loss 3.9244 (3.9989)	Top-1 acc 32.812 (31.259)	Top-5 acc 53.906 (54.658)	lr 0.00010
Warmup Train [39][2790/3239]	Time 0.208 (0.231)	Data 0.001 (0.010)	Loss 3.8556 (3.9986)	Top-1 acc 32.422 (31.264)	Top-5 acc 57.422 (54.666)	lr 0.00010
Warmup Train [39][2800/3239]	Time 0.191 (0.231)	Data 0.001 (0.010)	Loss 3.9132 (3.9985)	Top-1 acc 29.688 (31.268)	Top-5 acc 53.516 (54.668)	lr 0.00010
Warmup Train [39][2810/3239]	Time 0.189 (0.231)	Data 0.001 (0.010)	Loss 3.8934 (3.9986)	Top-1 acc 32.812 (31.265)	Top-5 acc 55.859 (54.662)	lr 0.00010
Warmup Train [39][2820/3239]	Time 0.198 (0.231)	Data 0.001 (0.010)	Loss 4.1923 (3.9988)	Top-1 acc 27.734 (31.262)	Top-5 acc 50.781 (54.656)	lr 0.00010
Warmup Train [39][2830/3239]	Time 0.217 (0.231)	Data 0.002 (0.010)	Loss 4.2399 (3.9988)	Top-1 acc 27.734 (31.260)	Top-5 acc 47.266 (54.657)	lr 0.00010
Warmup Train [39][2840/3239]	Time 0.173 (0.231)	Data 0.002 (0.010)	Loss 3.9519 (3.9986)	Top-1 acc 29.297 (31.261)	Top-5 acc 59.375 (54.661)	lr 0.00010
Warmup Train [39][2850/3239]	Time 0.283 (0.231)	Data 0.002 (0.010)	Loss 3.9940 (3.9984)	Top-1 acc 30.469 (31.267)	Top-5 acc 51.953 (54.663)	lr 0.00010
Warmup Train [39][2860/3239]	Time 0.177 (0.231)	Data 0.001 (0.010)	Loss 4.0673 (3.9985)	Top-1 acc 27.344 (31.265)	Top-5 acc 53.906 (54.659)	lr 0.00010
Warmup Train [39][2870/3239]	Time 0.273 (0.231)	Data 0.001 (0.010)	Loss 3.9091 (3.9986)	Top-1 acc 33.594 (31.266)	Top-5 acc 56.641 (54.656)	lr 0.00010
Warmup Train [39][2880/3239]	Time 0.366 (0.231)	Data 0.001 (0.010)	Loss 3.9983 (3.9988)	Top-1 acc 33.203 (31.263)	Top-5 acc 56.641 (54.654)	lr 0.00010
Warmup Train [39][2890/3239]	Time 0.202 (0.231)	Data 0.001 (0.010)	Loss 3.9962 (3.9990)	Top-1 acc 28.906 (31.255)	Top-5 acc 58.594 (54.650)	lr 0.00009
Warmup Train [39][2900/3239]	Time 0.160 (0.231)	Data 0.001 (0.010)	Loss 3.9461 (3.9990)	Top-1 acc 32.422 (31.260)	Top-5 acc 56.641 (54.651)	lr 0.00009
Warmup Train [39][2910/3239]	Time 0.223 (0.231)	Data 0.001 (0.010)	Loss 3.9890 (3.9990)	Top-1 acc 30.078 (31.258)	Top-5 acc 57.812 (54.649)	lr 0.00009
Warmup Train [39][2920/3239]	Time 0.172 (0.231)	Data 0.001 (0.010)	Loss 3.8519 (3.9989)	Top-1 acc 34.766 (31.261)	Top-5 acc 58.984 (54.652)	lr 0.00009
Warmup Train [39][2930/3239]	Time 0.264 (0.231)	Data 0.001 (0.010)	Loss 4.0765 (3.9991)	Top-1 acc 28.906 (31.255)	Top-5 acc 53.125 (54.649)	lr 0.00009
Warmup Train [39][2940/3239]	Time 0.198 (0.231)	Data 0.001 (0.010)	Loss 3.8711 (3.9989)	Top-1 acc 30.859 (31.257)	Top-5 acc 57.422 (54.651)	lr 0.00009
Warmup Train [39][2950/3239]	Time 0.200 (0.231)	Data 0.001 (0.010)	Loss 3.7259 (3.9991)	Top-1 acc 37.891 (31.255)	Top-5 acc 64.844 (54.652)	lr 0.00009
Warmup Train [39][2960/3239]	Time 0.263 (0.231)	Data 0.001 (0.010)	Loss 3.7903 (3.9991)	Top-1 acc 34.375 (31.254)	Top-5 acc 56.250 (54.653)	lr 0.00009
Warmup Train [39][2970/3239]	Time 0.150 (0.231)	Data 0.001 (0.010)	Loss 3.8715 (3.9992)	Top-1 acc 34.375 (31.254)	Top-5 acc 55.859 (54.654)	lr 0.00009
Warmup Train [39][2980/3239]	Time 0.192 (0.231)	Data 0.001 (0.010)	Loss 4.1902 (3.9994)	Top-1 acc 27.734 (31.250)	Top-5 acc 49.609 (54.648)	lr 0.00009
Warmup Train [39][2990/3239]	Time 0.374 (0.231)	Data 0.001 (0.010)	Loss 3.8254 (3.9994)	Top-1 acc 35.938 (31.247)	Top-5 acc 60.156 (54.650)	lr 0.00009
Warmup Train [39][3000/3239]	Time 0.198 (0.231)	Data 0.001 (0.010)	Loss 3.9081 (3.9994)	Top-1 acc 34.375 (31.250)	Top-5 acc 57.422 (54.649)	lr 0.00009
Warmup Train [39][3010/3239]	Time 0.246 (0.231)	Data 0.001 (0.010)	Loss 4.0954 (3.9993)	Top-1 acc 27.344 (31.251)	Top-5 acc 55.078 (54.650)	lr 0.00009
Warmup Train [39][3020/3239]	Time 0.207 (0.231)	Data 0.001 (0.010)	Loss 4.0433 (3.9993)	Top-1 acc 32.422 (31.253)	Top-5 acc 56.250 (54.650)	lr 0.00009
Warmup Train [39][3030/3239]	Time 0.218 (0.231)	Data 0.001 (0.010)	Loss 3.9589 (3.9991)	Top-1 acc 33.203 (31.257)	Top-5 acc 56.641 (54.651)	lr 0.00009
Warmup Train [39][3040/3239]	Time 0.257 (0.231)	Data 0.001 (0.010)	Loss 3.8891 (3.9990)	Top-1 acc 37.500 (31.262)	Top-5 acc 57.422 (54.657)	lr 0.00009
Warmup Train [39][3050/3239]	Time 0.175 (0.231)	Data 0.002 (0.010)	Loss 4.1504 (3.9991)	Top-1 acc 29.297 (31.261)	Top-5 acc 51.953 (54.653)	lr 0.00009
Warmup Train [39][3060/3239]	Time 0.215 (0.230)	Data 0.001 (0.010)	Loss 4.1494 (3.9992)	Top-1 acc 29.688 (31.257)	Top-5 acc 53.906 (54.652)	lr 0.00009
Warmup Train [39][3070/3239]	Time 0.198 (0.230)	Data 0.001 (0.010)	Loss 3.9766 (3.9991)	Top-1 acc 33.203 (31.255)	Top-5 acc 54.688 (54.649)	lr 0.00009
Warmup Train [39][3080/3239]	Time 0.202 (0.230)	Data 0.001 (0.010)	Loss 3.9878 (3.9992)	Top-1 acc 29.688 (31.254)	Top-5 acc 55.078 (54.648)	lr 0.00008
Warmup Train [39][3090/3239]	Time 0.160 (0.230)	Data 0.002 (0.010)	Loss 3.9667 (3.9992)	Top-1 acc 33.203 (31.252)	Top-5 acc 55.469 (54.649)	lr 0.00008
Warmup Train [39][3100/3239]	Time 0.211 (0.230)	Data 0.001 (0.010)	Loss 3.9148 (3.9989)	Top-1 acc 32.812 (31.257)	Top-5 acc 56.641 (54.657)	lr 0.00008
Warmup Train [39][3110/3239]	Time 0.331 (0.230)	Data 0.001 (0.010)	Loss 3.9357 (3.9991)	Top-1 acc 33.203 (31.253)	Top-5 acc 55.859 (54.652)	lr 0.00008
Warmup Train [39][3120/3239]	Time 0.200 (0.230)	Data 0.002 (0.010)	Loss 4.1024 (3.9990)	Top-1 acc 28.516 (31.256)	Top-5 acc 53.516 (54.653)	lr 0.00008
Warmup Train [39][3130/3239]	Time 0.195 (0.230)	Data 0.001 (0.010)	Loss 4.0274 (3.9990)	Top-1 acc 28.516 (31.254)	Top-5 acc 53.516 (54.653)	lr 0.00008
Warmup Train [39][3140/3239]	Time 0.194 (0.230)	Data 0.001 (0.010)	Loss 3.9779 (3.9990)	Top-1 acc 35.156 (31.257)	Top-5 acc 54.688 (54.652)	lr 0.00008
Warmup Train [39][3150/3239]	Time 0.203 (0.230)	Data 0.003 (0.010)	Loss 3.8059 (3.9989)	Top-1 acc 33.984 (31.257)	Top-5 acc 58.984 (54.653)	lr 0.00008
Warmup Train [39][3160/3239]	Time 0.193 (0.230)	Data 0.001 (0.009)	Loss 4.1439 (3.9989)	Top-1 acc 30.078 (31.255)	Top-5 acc 50.000 (54.655)	lr 0.00008
Warmup Train [39][3170/3239]	Time 0.187 (0.230)	Data 0.001 (0.009)	Loss 4.1082 (3.9990)	Top-1 acc 30.469 (31.253)	Top-5 acc 52.734 (54.653)	lr 0.00008
Warmup Train [39][3180/3239]	Time 0.248 (0.230)	Data 0.000 (0.009)	Loss 3.9246 (3.9990)	Top-1 acc 34.375 (31.253)	Top-5 acc 57.031 (54.654)	lr 0.00008
Warmup Train [39][3190/3239]	Time 0.206 (0.230)	Data 0.000 (0.009)	Loss 3.8715 (3.9990)	Top-1 acc 33.594 (31.249)	Top-5 acc 55.469 (54.653)	lr 0.00008
Warmup Train [39][3200/3239]	Time 0.164 (0.230)	Data 0.000 (0.009)	Loss 3.8130 (3.9990)	Top-1 acc 30.469 (31.244)	Top-5 acc 58.984 (54.653)	lr 0.00008
Warmup Train [39][3210/3239]	Time 0.324 (0.230)	Data 0.000 (0.009)	Loss 4.0385 (3.9990)	Top-1 acc 31.641 (31.243)	Top-5 acc 55.469 (54.651)	lr 0.00008
Warmup Train [39][3220/3239]	Time 0.344 (0.230)	Data 0.000 (0.009)	Loss 3.8012 (3.9990)	Top-1 acc 35.547 (31.248)	Top-5 acc 57.031 (54.651)	lr 0.00008
Warmup Train [39][3230/3239]	Time 0.207 (0.230)	Data 0.000 (0.009)	Loss 3.9812 (3.9989)	Top-1 acc 27.344 (31.247)	Top-5 acc 56.250 (54.652)	lr 0.00008
Warmup Train [39][3239/3239]	Time 0.158 (0.230)	Data 0.000 (0.009)	Loss 4.3166 (3.9990)	Top-1 acc 25.926 (31.247)	Top-5 acc 48.148 (54.650)	lr 0.00008
==========Warmup Valid [39/40]	loss 2.941	top-1 acc 38.626	top-5 acc 63.588	Train top-1 31.247	top-5 54.650	flops: 442.4M
Warmup Train [40][0/3239]	Time 18.818 (18.818)	Data 17.252 (17.252)	Loss 4.0302 (4.0302)	Top-1 acc 28.516 (28.516)	Top-5 acc 49.609 (49.609)	lr 0.00008
Warmup Train [40][10/3239]	Time 0.402 (2.081)	Data 0.002 (1.626)	Loss 3.9316 (3.9910)	Top-1 acc 31.641 (31.143)	Top-5 acc 55.078 (54.190)	lr 0.00008
Warmup Train [40][20/3239]	Time 0.208 (1.212)	Data 0.002 (0.854)	Loss 4.1288 (3.9960)	Top-1 acc 31.641 (30.971)	Top-5 acc 51.562 (54.390)	lr 0.00008
Warmup Train [40][30/3239]	Time 0.159 (0.892)	Data 0.002 (0.580)	Loss 3.9655 (3.9993)	Top-1 acc 33.203 (31.225)	Top-5 acc 55.469 (54.637)	lr 0.00008
Warmup Train [40][40/3239]	Time 0.178 (0.732)	Data 0.001 (0.440)	Loss 4.0086 (4.0000)	Top-1 acc 30.078 (31.364)	Top-5 acc 55.078 (54.745)	lr 0.00008
Warmup Train [40][50/3239]	Time 0.270 (0.633)	Data 0.001 (0.355)	Loss 4.1586 (4.0112)	Top-1 acc 28.125 (31.028)	Top-5 acc 54.688 (54.603)	lr 0.00007
Warmup Train [40][60/3239]	Time 0.206 (0.567)	Data 0.002 (0.297)	Loss 4.0006 (4.0086)	Top-1 acc 32.031 (31.154)	Top-5 acc 52.734 (54.611)	lr 0.00007
Warmup Train [40][70/3239]	Time 0.237 (0.517)	Data 0.001 (0.256)	Loss 3.8940 (4.0033)	Top-1 acc 33.984 (31.333)	Top-5 acc 57.812 (54.610)	lr 0.00007
Warmup Train [40][80/3239]	Time 0.156 (0.482)	Data 0.001 (0.224)	Loss 4.1016 (4.0089)	Top-1 acc 30.859 (31.091)	Top-5 acc 52.734 (54.384)	lr 0.00007
Warmup Train [40][90/3239]	Time 0.135 (0.454)	Data 0.001 (0.200)	Loss 3.8973 (4.0088)	Top-1 acc 32.812 (31.104)	Top-5 acc 55.469 (54.361)	lr 0.00007
Warmup Train [40][100/3239]	Time 0.193 (0.432)	Data 0.002 (0.181)	Loss 3.8284 (4.0088)	Top-1 acc 30.469 (30.999)	Top-5 acc 59.375 (54.452)	lr 0.00007
Warmup Train [40][110/3239]	Time 0.324 (0.416)	Data 0.001 (0.166)	Loss 4.0427 (4.0097)	Top-1 acc 31.250 (31.025)	Top-5 acc 55.078 (54.445)	lr 0.00007
Warmup Train [40][120/3239]	Time 0.224 (0.401)	Data 0.022 (0.153)	Loss 4.0546 (4.0044)	Top-1 acc 30.859 (31.076)	Top-5 acc 54.297 (54.545)	lr 0.00007
Warmup Train [40][130/3239]	Time 0.148 (0.387)	Data 0.001 (0.141)	Loss 3.8464 (4.0027)	Top-1 acc 37.109 (31.083)	Top-5 acc 57.812 (54.586)	lr 0.00007
Warmup Train [40][140/3239]	Time 0.186 (0.376)	Data 0.001 (0.131)	Loss 4.2233 (4.0053)	Top-1 acc 28.125 (31.037)	Top-5 acc 49.609 (54.538)	lr 0.00007
Warmup Train [40][150/3239]	Time 0.139 (0.366)	Data 0.002 (0.123)	Loss 4.0361 (4.0022)	Top-1 acc 33.203 (31.126)	Top-5 acc 53.125 (54.607)	lr 0.00007
Warmup Train [40][160/3239]	Time 0.227 (0.357)	Data 0.001 (0.115)	Loss 3.8983 (3.9993)	Top-1 acc 32.812 (31.206)	Top-5 acc 53.516 (54.632)	lr 0.00007
Warmup Train [40][170/3239]	Time 0.234 (0.348)	Data 0.001 (0.109)	Loss 3.7060 (3.9988)	Top-1 acc 38.672 (31.200)	Top-5 acc 60.547 (54.678)	lr 0.00007
Warmup Train [40][180/3239]	Time 0.187 (0.341)	Data 0.001 (0.103)	Loss 3.7185 (3.9971)	Top-1 acc 33.984 (31.218)	Top-5 acc 59.766 (54.659)	lr 0.00007
Warmup Train [40][190/3239]	Time 0.186 (0.335)	Data 0.002 (0.098)	Loss 4.0950 (3.9985)	Top-1 acc 30.859 (31.254)	Top-5 acc 53.125 (54.665)	lr 0.00007
Warmup Train [40][200/3239]	Time 0.212 (0.330)	Data 0.001 (0.093)	Loss 3.9708 (3.9963)	Top-1 acc 33.984 (31.306)	Top-5 acc 55.859 (54.730)	lr 0.00007
Warmup Train [40][210/3239]	Time 0.277 (0.326)	Data 0.001 (0.089)	Loss 4.0795 (3.9963)	Top-1 acc 27.734 (31.304)	Top-5 acc 50.391 (54.747)	lr 0.00007
Warmup Train [40][220/3239]	Time 0.139 (0.320)	Data 0.001 (0.085)	Loss 3.9891 (3.9942)	Top-1 acc 33.594 (31.375)	Top-5 acc 52.734 (54.825)	lr 0.00007
Warmup Train [40][230/3239]	Time 0.205 (0.316)	Data 0.001 (0.081)	Loss 4.1013 (3.9942)	Top-1 acc 29.297 (31.406)	Top-5 acc 48.047 (54.791)	lr 0.00007
Warmup Train [40][240/3239]	Time 0.269 (0.312)	Data 0.002 (0.078)	Loss 3.8521 (3.9919)	Top-1 acc 35.938 (31.464)	Top-5 acc 58.203 (54.861)	lr 0.00007
Warmup Train [40][250/3239]	Time 0.223 (0.309)	Data 0.002 (0.075)	Loss 3.9089 (3.9949)	Top-1 acc 32.812 (31.384)	Top-5 acc 57.031 (54.742)	lr 0.00007
Warmup Train [40][260/3239]	Time 0.180 (0.305)	Data 0.001 (0.072)	Loss 3.9289 (3.9950)	Top-1 acc 32.031 (31.386)	Top-5 acc 52.344 (54.750)	lr 0.00007
Warmup Train [40][270/3239]	Time 0.225 (0.302)	Data 0.001 (0.070)	Loss 3.9938 (3.9950)	Top-1 acc 32.031 (31.367)	Top-5 acc 53.125 (54.768)	lr 0.00006
Warmup Train [40][280/3239]	Time 0.284 (0.299)	Data 0.001 (0.067)	Loss 4.0295 (3.9961)	Top-1 acc 34.766 (31.350)	Top-5 acc 54.297 (54.743)	lr 0.00006
Warmup Train [40][290/3239]	Time 0.250 (0.296)	Data 0.002 (0.065)	Loss 4.0590 (3.9951)	Top-1 acc 27.344 (31.360)	Top-5 acc 56.250 (54.773)	lr 0.00006
Warmup Train [40][300/3239]	Time 0.198 (0.293)	Data 0.001 (0.063)	Loss 3.9952 (3.9959)	Top-1 acc 33.984 (31.359)	Top-5 acc 55.469 (54.771)	lr 0.00006
Warmup Train [40][310/3239]	Time 0.294 (0.292)	Data 0.001 (0.061)	Loss 3.9929 (3.9936)	Top-1 acc 30.859 (31.372)	Top-5 acc 55.078 (54.822)	lr 0.00006
Warmup Train [40][320/3239]	Time 0.213 (0.289)	Data 0.001 (0.059)	Loss 3.8903 (3.9919)	Top-1 acc 32.422 (31.426)	Top-5 acc 56.250 (54.844)	lr 0.00006
Warmup Train [40][330/3239]	Time 0.235 (0.288)	Data 0.001 (0.058)	Loss 3.7847 (3.9917)	Top-1 acc 34.766 (31.410)	Top-5 acc 61.328 (54.857)	lr 0.00006
Warmup Train [40][340/3239]	Time 0.159 (0.286)	Data 0.001 (0.056)	Loss 3.9548 (3.9922)	Top-1 acc 33.203 (31.387)	Top-5 acc 52.734 (54.834)	lr 0.00006
Warmup Train [40][350/3239]	Time 0.193 (0.284)	Data 0.001 (0.054)	Loss 4.0675 (3.9920)	Top-1 acc 28.516 (31.377)	Top-5 acc 54.297 (54.879)	lr 0.00006
Warmup Train [40][360/3239]	Time 0.268 (0.282)	Data 0.002 (0.053)	Loss 3.8805 (3.9927)	Top-1 acc 33.984 (31.376)	Top-5 acc 51.172 (54.810)	lr 0.00006
Warmup Train [40][370/3239]	Time 0.211 (0.280)	Data 0.001 (0.052)	Loss 4.0757 (3.9927)	Top-1 acc 30.859 (31.358)	Top-5 acc 50.781 (54.783)	lr 0.00006
Warmup Train [40][380/3239]	Time 0.236 (0.279)	Data 0.001 (0.050)	Loss 4.1946 (3.9936)	Top-1 acc 27.344 (31.337)	Top-5 acc 50.391 (54.746)	lr 0.00006
Warmup Train [40][390/3239]	Time 0.159 (0.277)	Data 0.001 (0.049)	Loss 4.0037 (3.9942)	Top-1 acc 33.984 (31.353)	Top-5 acc 56.250 (54.756)	lr 0.00006
Warmup Train [40][400/3239]	Time 0.236 (0.276)	Data 0.002 (0.048)	Loss 3.9424 (3.9934)	Top-1 acc 30.859 (31.378)	Top-5 acc 53.906 (54.772)	lr 0.00006
Warmup Train [40][410/3239]	Time 0.279 (0.275)	Data 0.001 (0.047)	Loss 3.8389 (3.9938)	Top-1 acc 39.844 (31.373)	Top-5 acc 58.984 (54.769)	lr 0.00006
Warmup Train [40][420/3239]	Time 0.148 (0.273)	Data 0.001 (0.046)	Loss 3.8439 (3.9941)	Top-1 acc 34.375 (31.377)	Top-5 acc 58.203 (54.751)	lr 0.00006
Warmup Train [40][430/3239]	Time 0.177 (0.272)	Data 0.001 (0.045)	Loss 3.9999 (3.9948)	Top-1 acc 32.422 (31.356)	Top-5 acc 56.641 (54.726)	lr 0.00006
Warmup Train [40][440/3239]	Time 0.228 (0.271)	Data 0.002 (0.044)	Loss 4.1013 (3.9941)	Top-1 acc 32.422 (31.384)	Top-5 acc 50.391 (54.734)	lr 0.00006
Warmup Train [40][450/3239]	Time 0.179 (0.270)	Data 0.001 (0.043)	Loss 4.0810 (3.9945)	Top-1 acc 29.297 (31.367)	Top-5 acc 50.781 (54.720)	lr 0.00006
Warmup Train [40][460/3239]	Time 0.270 (0.269)	Data 0.001 (0.042)	Loss 3.8278 (3.9942)	Top-1 acc 30.859 (31.370)	Top-5 acc 53.906 (54.732)	lr 0.00006
Warmup Train [40][470/3239]	Time 0.152 (0.268)	Data 0.001 (0.041)	Loss 3.9433 (3.9948)	Top-1 acc 35.156 (31.361)	Top-5 acc 54.297 (54.718)	lr 0.00006
Warmup Train [40][480/3239]	Time 0.199 (0.267)	Data 0.002 (0.041)	Loss 4.1464 (3.9958)	Top-1 acc 28.516 (31.339)	Top-5 acc 51.562 (54.694)	lr 0.00006
Warmup Train [40][490/3239]	Time 0.181 (0.266)	Data 0.001 (0.040)	Loss 3.9231 (3.9952)	Top-1 acc 34.375 (31.335)	Top-5 acc 57.031 (54.695)	lr 0.00006
Warmup Train [40][500/3239]	Time 0.144 (0.265)	Data 0.001 (0.039)	Loss 3.9517 (3.9944)	Top-1 acc 29.688 (31.355)	Top-5 acc 54.688 (54.718)	lr 0.00006
Warmup Train [40][510/3239]	Time 0.236 (0.264)	Data 0.002 (0.038)	Loss 3.9401 (3.9956)	Top-1 acc 28.906 (31.317)	Top-5 acc 55.078 (54.696)	lr 0.00005
Warmup Train [40][520/3239]	Time 0.150 (0.264)	Data 0.001 (0.038)	Loss 4.1009 (3.9953)	Top-1 acc 30.078 (31.318)	Top-5 acc 53.906 (54.715)	lr 0.00005
Warmup Train [40][530/3239]	Time 0.242 (0.263)	Data 0.001 (0.037)	Loss 3.9595 (3.9952)	Top-1 acc 33.203 (31.333)	Top-5 acc 56.641 (54.723)	lr 0.00005
Warmup Train [40][540/3239]	Time 0.174 (0.262)	Data 0.001 (0.037)	Loss 3.9191 (3.9954)	Top-1 acc 32.031 (31.345)	Top-5 acc 57.422 (54.708)	lr 0.00005
Warmup Train [40][550/3239]	Time 0.156 (0.261)	Data 0.001 (0.036)	Loss 4.0375 (3.9953)	Top-1 acc 27.734 (31.358)	Top-5 acc 52.734 (54.712)	lr 0.00005
Warmup Train [40][560/3239]	Time 0.152 (0.261)	Data 0.002 (0.035)	Loss 3.9768 (3.9960)	Top-1 acc 30.078 (31.352)	Top-5 acc 55.469 (54.696)	lr 0.00005
Warmup Train [40][570/3239]	Time 0.201 (0.260)	Data 0.001 (0.035)	Loss 3.9955 (3.9954)	Top-1 acc 29.297 (31.354)	Top-5 acc 54.297 (54.709)	lr 0.00005
Warmup Train [40][580/3239]	Time 0.264 (0.260)	Data 0.001 (0.034)	Loss 3.9947 (3.9959)	Top-1 acc 32.422 (31.337)	Top-5 acc 53.125 (54.677)	lr 0.00005
Warmup Train [40][590/3239]	Time 0.199 (0.259)	Data 0.001 (0.034)	Loss 4.1624 (3.9964)	Top-1 acc 31.250 (31.347)	Top-5 acc 52.734 (54.671)	lr 0.00005
Warmup Train [40][600/3239]	Time 0.363 (0.259)	Data 0.002 (0.033)	Loss 3.8207 (3.9957)	Top-1 acc 33.594 (31.351)	Top-5 acc 60.547 (54.698)	lr 0.00005
Warmup Train [40][610/3239]	Time 0.240 (0.258)	Data 0.001 (0.033)	Loss 3.9234 (3.9951)	Top-1 acc 29.688 (31.352)	Top-5 acc 55.078 (54.694)	lr 0.00005
Warmup Train [40][620/3239]	Time 0.156 (0.258)	Data 0.002 (0.032)	Loss 3.8819 (3.9951)	Top-1 acc 31.641 (31.342)	Top-5 acc 58.203 (54.695)	lr 0.00005
Warmup Train [40][630/3239]	Time 0.172 (0.257)	Data 0.001 (0.032)	Loss 4.0030 (3.9965)	Top-1 acc 31.250 (31.322)	Top-5 acc 53.516 (54.661)	lr 0.00005
Warmup Train [40][640/3239]	Time 0.254 (0.256)	Data 0.001 (0.031)	Loss 3.9818 (3.9972)	Top-1 acc 30.469 (31.296)	Top-5 acc 52.734 (54.647)	lr 0.00005
Warmup Train [40][650/3239]	Time 0.198 (0.256)	Data 0.001 (0.031)	Loss 4.0210 (3.9969)	Top-1 acc 25.391 (31.293)	Top-5 acc 53.125 (54.653)	lr 0.00005
Warmup Train [40][660/3239]	Time 0.160 (0.255)	Data 0.001 (0.031)	Loss 3.9156 (3.9974)	Top-1 acc 31.250 (31.298)	Top-5 acc 56.641 (54.657)	lr 0.00005
Warmup Train [40][670/3239]	Time 0.248 (0.254)	Data 0.001 (0.030)	Loss 4.0545 (3.9982)	Top-1 acc 28.906 (31.281)	Top-5 acc 54.688 (54.646)	lr 0.00005
Warmup Train [40][680/3239]	Time 0.137 (0.253)	Data 0.001 (0.030)	Loss 3.8688 (3.9984)	Top-1 acc 33.594 (31.269)	Top-5 acc 60.156 (54.636)	lr 0.00005
Warmup Train [40][690/3239]	Time 0.243 (0.253)	Data 0.002 (0.029)	Loss 4.2207 (3.9980)	Top-1 acc 28.125 (31.263)	Top-5 acc 47.266 (54.621)	lr 0.00005
Warmup Train [40][700/3239]	Time 0.264 (0.253)	Data 0.001 (0.029)	Loss 3.9167 (3.9972)	Top-1 acc 32.422 (31.290)	Top-5 acc 57.422 (54.637)	lr 0.00005
Warmup Train [40][710/3239]	Time 0.305 (0.253)	Data 0.002 (0.029)	Loss 4.2189 (3.9978)	Top-1 acc 29.297 (31.276)	Top-5 acc 50.000 (54.630)	lr 0.00005
Warmup Train [40][720/3239]	Time 0.146 (0.252)	Data 0.001 (0.028)	Loss 3.9352 (3.9974)	Top-1 acc 29.688 (31.289)	Top-5 acc 57.812 (54.637)	lr 0.00005
Warmup Train [40][730/3239]	Time 0.187 (0.252)	Data 0.001 (0.028)	Loss 4.0949 (3.9981)	Top-1 acc 29.688 (31.270)	Top-5 acc 48.828 (54.618)	lr 0.00005
Warmup Train [40][740/3239]	Time 0.225 (0.251)	Data 0.001 (0.028)	Loss 3.8985 (3.9972)	Top-1 acc 35.938 (31.298)	Top-5 acc 53.906 (54.635)	lr 0.00005
Warmup Train [40][750/3239]	Time 0.302 (0.251)	Data 0.002 (0.027)	Loss 4.0971 (3.9978)	Top-1 acc 30.859 (31.286)	Top-5 acc 51.172 (54.611)	lr 0.00005
Warmup Train [40][760/3239]	Time 0.161 (0.251)	Data 0.001 (0.027)	Loss 3.8617 (3.9979)	Top-1 acc 31.641 (31.286)	Top-5 acc 58.594 (54.613)	lr 0.00005
Warmup Train [40][770/3239]	Time 0.174 (0.250)	Data 0.003 (0.027)	Loss 4.1520 (3.9972)	Top-1 acc 28.906 (31.303)	Top-5 acc 50.391 (54.620)	lr 0.00004
Warmup Train [40][780/3239]	Time 0.234 (0.250)	Data 0.001 (0.027)	Loss 3.8692 (3.9971)	Top-1 acc 34.766 (31.306)	Top-5 acc 56.641 (54.619)	lr 0.00004
Warmup Train [40][790/3239]	Time 0.195 (0.249)	Data 0.023 (0.026)	Loss 3.9859 (3.9970)	Top-1 acc 33.594 (31.302)	Top-5 acc 53.516 (54.631)	lr 0.00004
Warmup Train [40][800/3239]	Time 0.223 (0.249)	Data 0.001 (0.026)	Loss 4.0037 (3.9971)	Top-1 acc 32.422 (31.309)	Top-5 acc 53.125 (54.628)	lr 0.00004
Warmup Train [40][810/3239]	Time 0.316 (0.249)	Data 0.001 (0.026)	Loss 3.9550 (3.9970)	Top-1 acc 29.688 (31.313)	Top-5 acc 57.031 (54.638)	lr 0.00004
Warmup Train [40][820/3239]	Time 0.170 (0.248)	Data 0.002 (0.025)	Loss 4.0568 (3.9961)	Top-1 acc 25.391 (31.337)	Top-5 acc 51.953 (54.658)	lr 0.00004
Warmup Train [40][830/3239]	Time 0.198 (0.248)	Data 0.001 (0.025)	Loss 4.0731 (3.9971)	Top-1 acc 31.641 (31.316)	Top-5 acc 53.906 (54.655)	lr 0.00004
Warmup Train [40][840/3239]	Time 0.219 (0.247)	Data 0.001 (0.025)	Loss 3.8156 (3.9976)	Top-1 acc 37.891 (31.321)	Top-5 acc 60.938 (54.652)	lr 0.00004
Warmup Train [40][850/3239]	Time 0.173 (0.247)	Data 0.001 (0.025)	Loss 3.9248 (3.9979)	Top-1 acc 32.812 (31.323)	Top-5 acc 57.422 (54.654)	lr 0.00004
Warmup Train [40][860/3239]	Time 0.229 (0.247)	Data 0.001 (0.024)	Loss 3.9586 (3.9977)	Top-1 acc 30.078 (31.320)	Top-5 acc 57.031 (54.669)	lr 0.00004
Warmup Train [40][870/3239]	Time 0.248 (0.247)	Data 0.001 (0.024)	Loss 3.9010 (3.9975)	Top-1 acc 33.984 (31.328)	Top-5 acc 57.031 (54.676)	lr 0.00004
Warmup Train [40][880/3239]	Time 0.200 (0.246)	Data 0.001 (0.024)	Loss 4.0348 (3.9979)	Top-1 acc 29.688 (31.324)	Top-5 acc 55.078 (54.664)	lr 0.00004
Warmup Train [40][890/3239]	Time 0.238 (0.246)	Data 0.002 (0.024)	Loss 3.9231 (3.9974)	Top-1 acc 33.203 (31.334)	Top-5 acc 56.641 (54.664)	lr 0.00004
Warmup Train [40][900/3239]	Time 0.227 (0.246)	Data 0.002 (0.023)	Loss 4.1047 (3.9980)	Top-1 acc 31.641 (31.319)	Top-5 acc 48.047 (54.642)	lr 0.00004
Warmup Train [40][910/3239]	Time 0.196 (0.246)	Data 0.001 (0.023)	Loss 3.7473 (3.9978)	Top-1 acc 35.547 (31.331)	Top-5 acc 61.328 (54.649)	lr 0.00004
Warmup Train [40][920/3239]	Time 0.348 (0.245)	Data 0.001 (0.023)	Loss 4.0205 (3.9979)	Top-1 acc 34.375 (31.336)	Top-5 acc 55.859 (54.647)	lr 0.00004
Warmup Train [40][930/3239]	Time 0.222 (0.245)	Data 0.004 (0.023)	Loss 3.9312 (3.9984)	Top-1 acc 32.031 (31.335)	Top-5 acc 54.297 (54.642)	lr 0.00004
Warmup Train [40][940/3239]	Time 0.207 (0.245)	Data 0.001 (0.023)	Loss 3.8696 (3.9983)	Top-1 acc 35.156 (31.323)	Top-5 acc 56.250 (54.638)	lr 0.00004
Warmup Train [40][950/3239]	Time 0.161 (0.244)	Data 0.001 (0.022)	Loss 3.8068 (3.9979)	Top-1 acc 33.203 (31.324)	Top-5 acc 59.766 (54.649)	lr 0.00004
Warmup Train [40][960/3239]	Time 0.189 (0.244)	Data 0.002 (0.022)	Loss 4.3467 (3.9985)	Top-1 acc 26.562 (31.306)	Top-5 acc 43.750 (54.620)	lr 0.00004
Warmup Train [40][970/3239]	Time 0.271 (0.244)	Data 0.001 (0.022)	Loss 3.9207 (3.9988)	Top-1 acc 36.719 (31.299)	Top-5 acc 55.859 (54.612)	lr 0.00004
Warmup Train [40][980/3239]	Time 0.216 (0.244)	Data 0.001 (0.022)	Loss 3.7714 (3.9984)	Top-1 acc 36.328 (31.309)	Top-5 acc 60.156 (54.624)	lr 0.00004
Warmup Train [40][990/3239]	Time 0.208 (0.243)	Data 0.001 (0.022)	Loss 3.6454 (3.9981)	Top-1 acc 38.281 (31.298)	Top-5 acc 62.891 (54.631)	lr 0.00004
Warmup Train [40][1000/3239]	Time 0.191 (0.243)	Data 0.001 (0.021)	Loss 3.9184 (3.9977)	Top-1 acc 31.641 (31.303)	Top-5 acc 55.469 (54.634)	lr 0.00004
Warmup Train [40][1010/3239]	Time 0.210 (0.243)	Data 0.001 (0.021)	Loss 4.0518 (3.9978)	Top-1 acc 30.078 (31.304)	Top-5 acc 54.688 (54.624)	lr 0.00004
Warmup Train [40][1020/3239]	Time 0.163 (0.243)	Data 0.002 (0.021)	Loss 4.1243 (3.9980)	Top-1 acc 29.297 (31.299)	Top-5 acc 50.391 (54.621)	lr 0.00004
Warmup Train [40][1030/3239]	Time 0.321 (0.242)	Data 0.001 (0.021)	Loss 3.8876 (3.9974)	Top-1 acc 33.984 (31.307)	Top-5 acc 57.422 (54.627)	lr 0.00004
Warmup Train [40][1040/3239]	Time 0.274 (0.242)	Data 0.001 (0.021)	Loss 3.8006 (3.9966)	Top-1 acc 38.281 (31.312)	Top-5 acc 57.031 (54.642)	lr 0.00004
Warmup Train [40][1050/3239]	Time 0.233 (0.242)	Data 0.001 (0.020)	Loss 3.8890 (3.9964)	Top-1 acc 30.859 (31.319)	Top-5 acc 59.766 (54.661)	lr 0.00004
Warmup Train [40][1060/3239]	Time 0.238 (0.242)	Data 0.001 (0.020)	Loss 4.0313 (3.9966)	Top-1 acc 28.516 (31.317)	Top-5 acc 56.250 (54.655)	lr 0.00003
Warmup Train [40][1070/3239]	Time 0.278 (0.242)	Data 0.001 (0.020)	Loss 4.0053 (3.9965)	Top-1 acc 32.031 (31.326)	Top-5 acc 55.469 (54.652)	lr 0.00003
Warmup Train [40][1080/3239]	Time 0.237 (0.242)	Data 0.002 (0.020)	Loss 4.1133 (3.9965)	Top-1 acc 32.422 (31.324)	Top-5 acc 53.125 (54.654)	lr 0.00003
Warmup Train [40][1090/3239]	Time 0.202 (0.241)	Data 0.001 (0.020)	Loss 4.1117 (3.9966)	Top-1 acc 31.250 (31.324)	Top-5 acc 51.953 (54.661)	lr 0.00003
Warmup Train [40][1100/3239]	Time 0.206 (0.241)	Data 0.001 (0.020)	Loss 4.0997 (3.9963)	Top-1 acc 28.516 (31.323)	Top-5 acc 53.906 (54.665)	lr 0.00003
Warmup Train [40][1110/3239]	Time 0.211 (0.241)	Data 0.001 (0.019)	Loss 3.9113 (3.9963)	Top-1 acc 32.812 (31.326)	Top-5 acc 56.641 (54.652)	lr 0.00003
Warmup Train [40][1120/3239]	Time 0.236 (0.241)	Data 0.001 (0.019)	Loss 4.1207 (3.9970)	Top-1 acc 25.781 (31.306)	Top-5 acc 55.078 (54.643)	lr 0.00003
Warmup Train [40][1130/3239]	Time 0.248 (0.241)	Data 0.002 (0.019)	Loss 4.0177 (3.9967)	Top-1 acc 32.422 (31.320)	Top-5 acc 53.516 (54.655)	lr 0.00003
Warmup Train [40][1140/3239]	Time 0.190 (0.240)	Data 0.001 (0.019)	Loss 3.9512 (3.9964)	Top-1 acc 33.203 (31.336)	Top-5 acc 50.781 (54.655)	lr 0.00003
Warmup Train [40][1150/3239]	Time 0.171 (0.240)	Data 0.001 (0.019)	Loss 4.0639 (3.9967)	Top-1 acc 29.688 (31.323)	Top-5 acc 52.734 (54.639)	lr 0.00003
Warmup Train [40][1160/3239]	Time 0.243 (0.240)	Data 0.001 (0.019)	Loss 3.8295 (3.9965)	Top-1 acc 33.984 (31.324)	Top-5 acc 60.547 (54.641)	lr 0.00003
Warmup Train [40][1170/3239]	Time 0.189 (0.240)	Data 0.001 (0.019)	Loss 3.8263 (3.9960)	Top-1 acc 34.375 (31.329)	Top-5 acc 59.766 (54.646)	lr 0.00003
Warmup Train [40][1180/3239]	Time 0.229 (0.240)	Data 0.001 (0.018)	Loss 4.0325 (3.9961)	Top-1 acc 31.250 (31.324)	Top-5 acc 54.688 (54.645)	lr 0.00003
Warmup Train [40][1190/3239]	Time 0.195 (0.240)	Data 0.001 (0.018)	Loss 4.1599 (3.9965)	Top-1 acc 29.297 (31.315)	Top-5 acc 50.391 (54.638)	lr 0.00003
Warmup Train [40][1200/3239]	Time 0.209 (0.239)	Data 0.001 (0.018)	Loss 4.0270 (3.9958)	Top-1 acc 34.766 (31.325)	Top-5 acc 53.516 (54.646)	lr 0.00003
Warmup Train [40][1210/3239]	Time 0.251 (0.239)	Data 0.002 (0.018)	Loss 3.9300 (3.9955)	Top-1 acc 31.641 (31.328)	Top-5 acc 56.641 (54.659)	lr 0.00003
Warmup Train [40][1220/3239]	Time 0.140 (0.239)	Data 0.001 (0.018)	Loss 4.0098 (3.9955)	Top-1 acc 33.594 (31.336)	Top-5 acc 53.516 (54.656)	lr 0.00003
Warmup Train [40][1230/3239]	Time 0.267 (0.239)	Data 0.002 (0.018)	Loss 3.8096 (3.9961)	Top-1 acc 34.766 (31.332)	Top-5 acc 59.375 (54.654)	lr 0.00003
Warmup Train [40][1240/3239]	Time 0.195 (0.239)	Data 0.002 (0.018)	Loss 4.0527 (3.9960)	Top-1 acc 32.031 (31.331)	Top-5 acc 57.812 (54.659)	lr 0.00003
Warmup Train [40][1250/3239]	Time 0.296 (0.239)	Data 0.002 (0.018)	Loss 3.9616 (3.9961)	Top-1 acc 32.812 (31.323)	Top-5 acc 56.250 (54.665)	lr 0.00003
Warmup Train [40][1260/3239]	Time 0.237 (0.239)	Data 0.030 (0.018)	Loss 3.9652 (3.9963)	Top-1 acc 33.984 (31.314)	Top-5 acc 51.562 (54.653)	lr 0.00003
Warmup Train [40][1270/3239]	Time 0.210 (0.239)	Data 0.002 (0.017)	Loss 3.9464 (3.9965)	Top-1 acc 35.156 (31.307)	Top-5 acc 57.031 (54.643)	lr 0.00003
Warmup Train [40][1280/3239]	Time 0.180 (0.238)	Data 0.001 (0.017)	Loss 3.9673 (3.9964)	Top-1 acc 30.469 (31.305)	Top-5 acc 57.422 (54.645)	lr 0.00003
Warmup Train [40][1290/3239]	Time 0.251 (0.238)	Data 0.001 (0.017)	Loss 3.9855 (3.9960)	Top-1 acc 33.203 (31.319)	Top-5 acc 57.812 (54.658)	lr 0.00003
Warmup Train [40][1300/3239]	Time 0.281 (0.238)	Data 0.001 (0.017)	Loss 3.7494 (3.9961)	Top-1 acc 37.891 (31.317)	Top-5 acc 60.156 (54.655)	lr 0.00003
Warmup Train [40][1310/3239]	Time 0.293 (0.238)	Data 0.001 (0.017)	Loss 4.0757 (3.9959)	Top-1 acc 28.906 (31.321)	Top-5 acc 52.344 (54.652)	lr 0.00003
Warmup Train [40][1320/3239]	Time 0.246 (0.238)	Data 0.001 (0.017)	Loss 4.1058 (3.9965)	Top-1 acc 26.953 (31.308)	Top-5 acc 55.078 (54.645)	lr 0.00003
Warmup Train [40][1330/3239]	Time 0.225 (0.238)	Data 0.001 (0.017)	Loss 3.9221 (3.9964)	Top-1 acc 32.422 (31.318)	Top-5 acc 53.125 (54.641)	lr 0.00003
Warmup Train [40][1340/3239]	Time 0.290 (0.238)	Data 0.002 (0.017)	Loss 3.9219 (3.9963)	Top-1 acc 33.203 (31.318)	Top-5 acc 55.078 (54.649)	lr 0.00003
Warmup Train [40][1350/3239]	Time 0.292 (0.238)	Data 0.002 (0.017)	Loss 4.2141 (3.9968)	Top-1 acc 29.688 (31.307)	Top-5 acc 50.000 (54.637)	lr 0.00003
Warmup Train [40][1360/3239]	Time 0.226 (0.238)	Data 0.001 (0.016)	Loss 3.9773 (3.9968)	Top-1 acc 35.156 (31.309)	Top-5 acc 56.250 (54.636)	lr 0.00003
Warmup Train [40][1370/3239]	Time 0.224 (0.238)	Data 0.001 (0.016)	Loss 4.0055 (3.9969)	Top-1 acc 30.078 (31.301)	Top-5 acc 54.297 (54.634)	lr 0.00003
Warmup Train [40][1380/3239]	Time 0.263 (0.238)	Data 0.001 (0.016)	Loss 4.0003 (3.9965)	Top-1 acc 33.203 (31.311)	Top-5 acc 53.906 (54.645)	lr 0.00003
Warmup Train [40][1390/3239]	Time 0.169 (0.238)	Data 0.002 (0.016)	Loss 4.2983 (3.9970)	Top-1 acc 23.047 (31.297)	Top-5 acc 48.047 (54.631)	lr 0.00003
Warmup Train [40][1400/3239]	Time 0.192 (0.237)	Data 0.001 (0.016)	Loss 3.9252 (3.9968)	Top-1 acc 31.641 (31.298)	Top-5 acc 57.812 (54.634)	lr 0.00002
Warmup Train [40][1410/3239]	Time 0.288 (0.237)	Data 0.001 (0.016)	Loss 3.8566 (3.9963)	Top-1 acc 35.547 (31.314)	Top-5 acc 58.594 (54.649)	lr 0.00002
Warmup Train [40][1420/3239]	Time 0.196 (0.237)	Data 0.001 (0.016)	Loss 4.0430 (3.9967)	Top-1 acc 30.469 (31.307)	Top-5 acc 50.781 (54.648)	lr 0.00002
Warmup Train [40][1430/3239]	Time 0.183 (0.237)	Data 0.001 (0.016)	Loss 4.0207 (3.9968)	Top-1 acc 31.641 (31.301)	Top-5 acc 55.078 (54.649)	lr 0.00002
Warmup Train [40][1440/3239]	Time 0.231 (0.237)	Data 0.001 (0.016)	Loss 4.2032 (3.9972)	Top-1 acc 26.172 (31.292)	Top-5 acc 48.438 (54.642)	lr 0.00002
Warmup Train [40][1450/3239]	Time 0.305 (0.237)	Data 0.001 (0.016)	Loss 4.0211 (3.9978)	Top-1 acc 29.297 (31.281)	Top-5 acc 56.250 (54.630)	lr 0.00002
Warmup Train [40][1460/3239]	Time 0.210 (0.237)	Data 0.002 (0.015)	Loss 3.8823 (3.9981)	Top-1 acc 29.297 (31.268)	Top-5 acc 57.031 (54.617)	lr 0.00002
Warmup Train [40][1470/3239]	Time 0.223 (0.237)	Data 0.002 (0.015)	Loss 3.9412 (3.9983)	Top-1 acc 32.031 (31.264)	Top-5 acc 57.422 (54.611)	lr 0.00002
Warmup Train [40][1480/3239]	Time 0.190 (0.237)	Data 0.001 (0.015)	Loss 3.7986 (3.9982)	Top-1 acc 35.156 (31.267)	Top-5 acc 56.250 (54.614)	lr 0.00002
Warmup Train [40][1490/3239]	Time 0.226 (0.236)	Data 0.002 (0.015)	Loss 4.0689 (3.9985)	Top-1 acc 29.297 (31.263)	Top-5 acc 53.906 (54.603)	lr 0.00002
Warmup Train [40][1500/3239]	Time 0.198 (0.236)	Data 0.001 (0.015)	Loss 4.2101 (3.9985)	Top-1 acc 27.344 (31.262)	Top-5 acc 49.609 (54.607)	lr 0.00002
Warmup Train [40][1510/3239]	Time 0.222 (0.236)	Data 0.002 (0.015)	Loss 4.0588 (3.9986)	Top-1 acc 28.906 (31.263)	Top-5 acc 53.516 (54.600)	lr 0.00002
Warmup Train [40][1520/3239]	Time 0.146 (0.236)	Data 0.001 (0.015)	Loss 4.0793 (3.9987)	Top-1 acc 28.516 (31.266)	Top-5 acc 53.906 (54.594)	lr 0.00002
Warmup Train [40][1530/3239]	Time 0.245 (0.236)	Data 0.001 (0.015)	Loss 4.1860 (3.9983)	Top-1 acc 27.734 (31.270)	Top-5 acc 51.172 (54.605)	lr 0.00002
Warmup Train [40][1540/3239]	Time 0.284 (0.236)	Data 0.001 (0.015)	Loss 3.9494 (3.9980)	Top-1 acc 32.031 (31.281)	Top-5 acc 57.422 (54.613)	lr 0.00002
Warmup Train [40][1550/3239]	Time 0.304 (0.236)	Data 0.001 (0.015)	Loss 4.0587 (3.9980)	Top-1 acc 31.250 (31.280)	Top-5 acc 52.734 (54.603)	lr 0.00002
Warmup Train [40][1560/3239]	Time 0.197 (0.236)	Data 0.001 (0.015)	Loss 4.1049 (3.9977)	Top-1 acc 27.344 (31.282)	Top-5 acc 51.562 (54.613)	lr 0.00002
Warmup Train [40][1570/3239]	Time 0.258 (0.236)	Data 0.001 (0.015)	Loss 4.1072 (3.9979)	Top-1 acc 32.422 (31.272)	Top-5 acc 56.250 (54.613)	lr 0.00002
Warmup Train [40][1580/3239]	Time 0.251 (0.236)	Data 0.001 (0.015)	Loss 3.9300 (3.9977)	Top-1 acc 35.547 (31.276)	Top-5 acc 57.422 (54.623)	lr 0.00002
Warmup Train [40][1590/3239]	Time 0.206 (0.236)	Data 0.001 (0.014)	Loss 3.9980 (3.9978)	Top-1 acc 32.422 (31.275)	Top-5 acc 54.297 (54.620)	lr 0.00002
Warmup Train [40][1600/3239]	Time 0.201 (0.236)	Data 0.001 (0.014)	Loss 4.2332 (3.9977)	Top-1 acc 26.953 (31.281)	Top-5 acc 47.656 (54.625)	lr 0.00002
Warmup Train [40][1610/3239]	Time 0.244 (0.235)	Data 0.001 (0.014)	Loss 4.2050 (3.9979)	Top-1 acc 26.562 (31.274)	Top-5 acc 50.000 (54.618)	lr 0.00002
Warmup Train [40][1620/3239]	Time 0.219 (0.235)	Data 0.001 (0.014)	Loss 4.1087 (3.9978)	Top-1 acc 30.078 (31.273)	Top-5 acc 49.609 (54.622)	lr 0.00002
Warmup Train [40][1630/3239]	Time 0.239 (0.235)	Data 0.002 (0.014)	Loss 3.9463 (3.9977)	Top-1 acc 32.812 (31.277)	Top-5 acc 58.203 (54.631)	lr 0.00002
Warmup Train [40][1640/3239]	Time 0.301 (0.235)	Data 0.001 (0.014)	Loss 3.8374 (3.9977)	Top-1 acc 38.672 (31.281)	Top-5 acc 59.766 (54.637)	lr 0.00002
Warmup Train [40][1650/3239]	Time 0.315 (0.235)	Data 0.001 (0.014)	Loss 4.1060 (3.9977)	Top-1 acc 29.297 (31.276)	Top-5 acc 55.078 (54.644)	lr 0.00002
Warmup Train [40][1660/3239]	Time 0.205 (0.235)	Data 0.001 (0.014)	Loss 3.7976 (3.9973)	Top-1 acc 33.984 (31.287)	Top-5 acc 57.422 (54.655)	lr 0.00002
Warmup Train [40][1670/3239]	Time 0.197 (0.235)	Data 0.001 (0.014)	Loss 4.1451 (3.9974)	Top-1 acc 28.516 (31.283)	Top-5 acc 49.609 (54.655)	lr 0.00002
Warmup Train [40][1680/3239]	Time 0.184 (0.235)	Data 0.002 (0.014)	Loss 3.9406 (3.9975)	Top-1 acc 29.297 (31.277)	Top-5 acc 57.031 (54.654)	lr 0.00002
Warmup Train [40][1690/3239]	Time 0.188 (0.235)	Data 0.001 (0.014)	Loss 4.0361 (3.9972)	Top-1 acc 29.297 (31.279)	Top-5 acc 53.906 (54.664)	lr 0.00002
Warmup Train [40][1700/3239]	Time 0.177 (0.234)	Data 0.001 (0.014)	Loss 4.0771 (3.9972)	Top-1 acc 28.125 (31.271)	Top-5 acc 49.219 (54.660)	lr 0.00002
Warmup Train [40][1710/3239]	Time 0.183 (0.234)	Data 0.001 (0.014)	Loss 4.0342 (3.9973)	Top-1 acc 28.516 (31.271)	Top-5 acc 53.906 (54.657)	lr 0.00002
Warmup Train [40][1720/3239]	Time 0.232 (0.234)	Data 0.001 (0.013)	Loss 3.9326 (3.9973)	Top-1 acc 32.812 (31.271)	Top-5 acc 57.031 (54.653)	lr 0.00002
Warmup Train [40][1730/3239]	Time 0.145 (0.234)	Data 0.001 (0.013)	Loss 4.1758 (3.9974)	Top-1 acc 26.953 (31.268)	Top-5 acc 48.438 (54.650)	lr 0.00002
Warmup Train [40][1740/3239]	Time 0.188 (0.234)	Data 0.001 (0.013)	Loss 4.1099 (3.9972)	Top-1 acc 28.906 (31.269)	Top-5 acc 52.734 (54.659)	lr 0.00002
Warmup Train [40][1750/3239]	Time 0.221 (0.234)	Data 0.001 (0.013)	Loss 4.1427 (3.9972)	Top-1 acc 28.906 (31.268)	Top-5 acc 52.734 (54.660)	lr 0.00002
Warmup Train [40][1760/3239]	Time 0.126 (0.234)	Data 0.001 (0.013)	Loss 3.9691 (3.9972)	Top-1 acc 33.594 (31.263)	Top-5 acc 55.859 (54.664)	lr 0.00002
Warmup Train [40][1770/3239]	Time 0.343 (0.234)	Data 0.001 (0.013)	Loss 3.9731 (3.9970)	Top-1 acc 31.250 (31.261)	Top-5 acc 57.031 (54.673)	lr 0.00002
Warmup Train [40][1780/3239]	Time 0.370 (0.234)	Data 0.001 (0.013)	Loss 3.8774 (3.9967)	Top-1 acc 35.156 (31.263)	Top-5 acc 58.203 (54.675)	lr 0.00002
Warmup Train [40][1790/3239]	Time 0.259 (0.234)	Data 0.001 (0.013)	Loss 3.9389 (3.9967)	Top-1 acc 29.297 (31.262)	Top-5 acc 56.250 (54.674)	lr 0.00002
Warmup Train [40][1800/3239]	Time 0.208 (0.234)	Data 0.001 (0.013)	Loss 3.7895 (3.9964)	Top-1 acc 36.328 (31.266)	Top-5 acc 60.938 (54.679)	lr 0.00002
Warmup Train [40][1810/3239]	Time 0.160 (0.234)	Data 0.001 (0.013)	Loss 3.9211 (3.9963)	Top-1 acc 33.984 (31.270)	Top-5 acc 57.812 (54.678)	lr 0.00002
Warmup Train [40][1820/3239]	Time 0.232 (0.234)	Data 0.001 (0.013)	Loss 3.9344 (3.9960)	Top-1 acc 31.641 (31.276)	Top-5 acc 58.203 (54.683)	lr 0.00001
Warmup Train [40][1830/3239]	Time 0.153 (0.234)	Data 0.001 (0.013)	Loss 4.1460 (3.9963)	Top-1 acc 24.609 (31.267)	Top-5 acc 49.219 (54.677)	lr 0.00001
Warmup Train [40][1840/3239]	Time 0.196 (0.233)	Data 0.002 (0.013)	Loss 3.9414 (3.9962)	Top-1 acc 33.594 (31.274)	Top-5 acc 55.859 (54.679)	lr 0.00001
Warmup Train [40][1850/3239]	Time 0.239 (0.233)	Data 0.002 (0.013)	Loss 3.8885 (3.9959)	Top-1 acc 26.953 (31.278)	Top-5 acc 55.078 (54.686)	lr 0.00001
Warmup Train [40][1860/3239]	Time 0.211 (0.233)	Data 0.001 (0.013)	Loss 3.8840 (3.9959)	Top-1 acc 33.203 (31.276)	Top-5 acc 53.906 (54.681)	lr 0.00001
Warmup Train [40][1870/3239]	Time 0.213 (0.233)	Data 0.001 (0.013)	Loss 3.8289 (3.9960)	Top-1 acc 34.375 (31.272)	Top-5 acc 57.422 (54.677)	lr 0.00001
Warmup Train [40][1880/3239]	Time 0.190 (0.233)	Data 0.002 (0.013)	Loss 4.0536 (3.9959)	Top-1 acc 28.516 (31.274)	Top-5 acc 53.906 (54.676)	lr 0.00001
Warmup Train [40][1890/3239]	Time 0.278 (0.233)	Data 0.001 (0.013)	Loss 4.1328 (3.9956)	Top-1 acc 27.734 (31.275)	Top-5 acc 51.953 (54.680)	lr 0.00001
Warmup Train [40][1900/3239]	Time 0.239 (0.233)	Data 0.001 (0.013)	Loss 3.9420 (3.9959)	Top-1 acc 32.422 (31.272)	Top-5 acc 57.031 (54.676)	lr 0.00001
Warmup Train [40][1910/3239]	Time 0.216 (0.233)	Data 0.002 (0.012)	Loss 3.8282 (3.9954)	Top-1 acc 36.719 (31.280)	Top-5 acc 59.766 (54.687)	lr 0.00001
Warmup Train [40][1920/3239]	Time 0.219 (0.233)	Data 0.001 (0.012)	Loss 3.9456 (3.9954)	Top-1 acc 32.812 (31.280)	Top-5 acc 54.688 (54.691)	lr 0.00001
Warmup Train [40][1930/3239]	Time 0.273 (0.233)	Data 0.001 (0.012)	Loss 3.9606 (3.9953)	Top-1 acc 32.422 (31.283)	Top-5 acc 56.250 (54.694)	lr 0.00001
Warmup Train [40][1940/3239]	Time 0.240 (0.233)	Data 0.001 (0.012)	Loss 3.9515 (3.9956)	Top-1 acc 33.203 (31.281)	Top-5 acc 57.812 (54.683)	lr 0.00001
Warmup Train [40][1950/3239]	Time 0.270 (0.233)	Data 0.001 (0.012)	Loss 4.0857 (3.9957)	Top-1 acc 27.734 (31.279)	Top-5 acc 50.391 (54.678)	lr 0.00001
Warmup Train [40][1960/3239]	Time 0.202 (0.233)	Data 0.001 (0.012)	Loss 4.0482 (3.9958)	Top-1 acc 32.812 (31.273)	Top-5 acc 57.812 (54.674)	lr 0.00001
Warmup Train [40][1970/3239]	Time 0.236 (0.233)	Data 0.001 (0.012)	Loss 4.0450 (3.9957)	Top-1 acc 32.422 (31.274)	Top-5 acc 53.125 (54.673)	lr 0.00001
Warmup Train [40][1980/3239]	Time 0.230 (0.233)	Data 0.001 (0.012)	Loss 3.9143 (3.9956)	Top-1 acc 31.641 (31.279)	Top-5 acc 56.250 (54.673)	lr 0.00001
Warmup Train [40][1990/3239]	Time 0.316 (0.233)	Data 0.001 (0.012)	Loss 3.9981 (3.9955)	Top-1 acc 29.297 (31.282)	Top-5 acc 54.297 (54.675)	lr 0.00001
Warmup Train [40][2000/3239]	Time 0.293 (0.233)	Data 0.001 (0.012)	Loss 4.1357 (3.9953)	Top-1 acc 31.250 (31.287)	Top-5 acc 51.172 (54.681)	lr 0.00001
Warmup Train [40][2010/3239]	Time 0.215 (0.233)	Data 0.001 (0.012)	Loss 4.0768 (3.9950)	Top-1 acc 29.297 (31.295)	Top-5 acc 54.297 (54.695)	lr 0.00001
Warmup Train [40][2020/3239]	Time 0.136 (0.233)	Data 0.002 (0.012)	Loss 3.9336 (3.9951)	Top-1 acc 33.203 (31.291)	Top-5 acc 56.250 (54.693)	lr 0.00001
Warmup Train [40][2030/3239]	Time 0.194 (0.233)	Data 0.002 (0.012)	Loss 3.9675 (3.9951)	Top-1 acc 31.641 (31.294)	Top-5 acc 55.078 (54.689)	lr 0.00001
Warmup Train [40][2040/3239]	Time 0.189 (0.233)	Data 0.002 (0.012)	Loss 4.2280 (3.9951)	Top-1 acc 26.562 (31.301)	Top-5 acc 51.562 (54.690)	lr 0.00001
Warmup Train [40][2050/3239]	Time 0.192 (0.233)	Data 0.001 (0.012)	Loss 3.9028 (3.9949)	Top-1 acc 30.469 (31.304)	Top-5 acc 57.422 (54.697)	lr 0.00001
Warmup Train [40][2060/3239]	Time 0.220 (0.233)	Data 0.002 (0.012)	Loss 4.2445 (3.9952)	Top-1 acc 29.688 (31.301)	Top-5 acc 49.609 (54.694)	lr 0.00001
Warmup Train [40][2070/3239]	Time 0.270 (0.233)	Data 0.001 (0.012)	Loss 4.0976 (3.9953)	Top-1 acc 30.078 (31.298)	Top-5 acc 52.734 (54.690)	lr 0.00001
Warmup Train [40][2080/3239]	Time 0.226 (0.233)	Data 0.001 (0.012)	Loss 4.0150 (3.9955)	Top-1 acc 29.297 (31.294)	Top-5 acc 58.984 (54.692)	lr 0.00001
Warmup Train [40][2090/3239]	Time 0.207 (0.232)	Data 0.001 (0.012)	Loss 3.9701 (3.9956)	Top-1 acc 30.078 (31.291)	Top-5 acc 55.078 (54.695)	lr 0.00001
Warmup Train [40][2100/3239]	Time 0.286 (0.233)	Data 0.001 (0.012)	Loss 3.6242 (3.9957)	Top-1 acc 35.156 (31.287)	Top-5 acc 62.109 (54.692)	lr 0.00001
Warmup Train [40][2110/3239]	Time 0.200 (0.232)	Data 0.001 (0.012)	Loss 4.1256 (3.9957)	Top-1 acc 30.078 (31.292)	Top-5 acc 50.391 (54.690)	lr 0.00001
Warmup Train [40][2120/3239]	Time 0.240 (0.232)	Data 0.001 (0.012)	Loss 3.9412 (3.9957)	Top-1 acc 30.078 (31.288)	Top-5 acc 58.203 (54.691)	lr 0.00001
Warmup Train [40][2130/3239]	Time 0.273 (0.232)	Data 0.001 (0.011)	Loss 3.9055 (3.9959)	Top-1 acc 34.766 (31.290)	Top-5 acc 56.250 (54.687)	lr 0.00001
Warmup Train [40][2140/3239]	Time 0.203 (0.232)	Data 0.002 (0.011)	Loss 4.0250 (3.9959)	Top-1 acc 32.031 (31.285)	Top-5 acc 51.172 (54.688)	lr 0.00001
Warmup Train [40][2150/3239]	Time 0.233 (0.232)	Data 0.001 (0.011)	Loss 3.9611 (3.9961)	Top-1 acc 32.422 (31.284)	Top-5 acc 54.688 (54.683)	lr 0.00001
Warmup Train [40][2160/3239]	Time 0.226 (0.232)	Data 0.002 (0.011)	Loss 3.8036 (3.9959)	Top-1 acc 34.766 (31.289)	Top-5 acc 60.938 (54.685)	lr 0.00001
Warmup Train [40][2170/3239]	Time 0.215 (0.232)	Data 0.001 (0.011)	Loss 4.1243 (3.9960)	Top-1 acc 27.344 (31.287)	Top-5 acc 50.781 (54.680)	lr 0.00001
Warmup Train [40][2180/3239]	Time 0.225 (0.232)	Data 0.001 (0.011)	Loss 4.1172 (3.9962)	Top-1 acc 28.125 (31.281)	Top-5 acc 50.000 (54.670)	lr 0.00001
Warmup Train [40][2190/3239]	Time 0.274 (0.232)	Data 0.001 (0.011)	Loss 4.0848 (3.9966)	Top-1 acc 25.391 (31.273)	Top-5 acc 53.906 (54.662)	lr 0.00001
Warmup Train [40][2200/3239]	Time 0.170 (0.232)	Data 0.001 (0.011)	Loss 3.9913 (3.9967)	Top-1 acc 35.156 (31.277)	Top-5 acc 58.203 (54.663)	lr 0.00001
Warmup Train [40][2210/3239]	Time 0.302 (0.232)	Data 0.002 (0.011)	Loss 4.0456 (3.9966)	Top-1 acc 31.250 (31.276)	Top-5 acc 54.688 (54.670)	lr 0.00001
Warmup Train [40][2220/3239]	Time 0.177 (0.232)	Data 0.001 (0.011)	Loss 3.8405 (3.9965)	Top-1 acc 33.203 (31.285)	Top-5 acc 56.250 (54.673)	lr 0.00001
Warmup Train [40][2230/3239]	Time 0.181 (0.232)	Data 0.001 (0.011)	Loss 3.9805 (3.9967)	Top-1 acc 27.344 (31.278)	Top-5 acc 52.344 (54.669)	lr 0.00001
Warmup Train [40][2240/3239]	Time 0.220 (0.232)	Data 0.002 (0.011)	Loss 4.0402 (3.9964)	Top-1 acc 30.469 (31.283)	Top-5 acc 55.859 (54.673)	lr 0.00001
Warmup Train [40][2250/3239]	Time 0.198 (0.232)	Data 0.001 (0.011)	Loss 4.0409 (3.9963)	Top-1 acc 30.859 (31.285)	Top-5 acc 51.953 (54.674)	lr 0.00001
Warmup Train [40][2260/3239]	Time 0.200 (0.232)	Data 0.001 (0.011)	Loss 4.2265 (3.9964)	Top-1 acc 27.344 (31.279)	Top-5 acc 51.562 (54.673)	lr 0.00001
Warmup Train [40][2270/3239]	Time 0.206 (0.232)	Data 0.001 (0.011)	Loss 3.8812 (3.9963)	Top-1 acc 39.062 (31.288)	Top-5 acc 59.375 (54.678)	lr 0.00001
Warmup Train [40][2280/3239]	Time 0.212 (0.232)	Data 0.001 (0.011)	Loss 4.1547 (3.9964)	Top-1 acc 24.609 (31.283)	Top-5 acc 51.953 (54.678)	lr 0.00001
Warmup Train [40][2290/3239]	Time 0.169 (0.232)	Data 0.001 (0.011)	Loss 3.9671 (3.9963)	Top-1 acc 33.594 (31.288)	Top-5 acc 57.812 (54.681)	lr 0.00001
Warmup Train [40][2300/3239]	Time 0.335 (0.232)	Data 0.001 (0.011)	Loss 3.7850 (3.9962)	Top-1 acc 36.719 (31.291)	Top-5 acc 61.719 (54.686)	lr 0.00001
Warmup Train [40][2310/3239]	Time 0.293 (0.232)	Data 0.002 (0.011)	Loss 4.0498 (3.9964)	Top-1 acc 29.297 (31.287)	Top-5 acc 57.031 (54.687)	lr 0.00001
Warmup Train [40][2320/3239]	Time 0.168 (0.232)	Data 0.001 (0.011)	Loss 3.8734 (3.9961)	Top-1 acc 31.641 (31.292)	Top-5 acc 54.297 (54.689)	lr 0.00001
Warmup Train [40][2330/3239]	Time 0.170 (0.232)	Data 0.001 (0.011)	Loss 4.0450 (3.9960)	Top-1 acc 26.953 (31.290)	Top-5 acc 55.469 (54.688)	lr 0.00001
Warmup Train [40][2340/3239]	Time 0.212 (0.232)	Data 0.001 (0.011)	Loss 3.6959 (3.9958)	Top-1 acc 39.453 (31.290)	Top-5 acc 60.547 (54.693)	lr 0.00001
Warmup Train [40][2350/3239]	Time 0.199 (0.231)	Data 0.001 (0.011)	Loss 3.6913 (3.9956)	Top-1 acc 33.594 (31.291)	Top-5 acc 61.328 (54.702)	lr 0.00001
Warmup Train [40][2360/3239]	Time 0.215 (0.231)	Data 0.001 (0.011)	Loss 3.7582 (3.9953)	Top-1 acc 35.938 (31.292)	Top-5 acc 60.938 (54.710)	lr 0.00001
Warmup Train [40][2370/3239]	Time 0.202 (0.231)	Data 0.001 (0.011)	Loss 3.9930 (3.9952)	Top-1 acc 32.031 (31.290)	Top-5 acc 53.516 (54.714)	lr 0.00001
Warmup Train [40][2380/3239]	Time 0.233 (0.231)	Data 0.001 (0.011)	Loss 3.9756 (3.9951)	Top-1 acc 32.031 (31.292)	Top-5 acc 55.078 (54.718)	lr 0.00001
Warmup Train [40][2390/3239]	Time 0.135 (0.231)	Data 0.001 (0.011)	Loss 3.9443 (3.9953)	Top-1 acc 30.078 (31.287)	Top-5 acc 56.641 (54.716)	lr 0.00001
Warmup Train [40][2400/3239]	Time 0.217 (0.231)	Data 0.003 (0.011)	Loss 4.0705 (3.9957)	Top-1 acc 31.641 (31.281)	Top-5 acc 55.469 (54.708)	lr 0.00001
Warmup Train [40][2410/3239]	Time 0.215 (0.231)	Data 0.001 (0.010)	Loss 3.8404 (3.9956)	Top-1 acc 31.250 (31.276)	Top-5 acc 55.469 (54.709)	lr 0.00001
Warmup Train [40][2420/3239]	Time 0.138 (0.231)	Data 0.001 (0.010)	Loss 4.0606 (3.9957)	Top-1 acc 28.906 (31.275)	Top-5 acc 53.516 (54.709)	lr 0.00000
Warmup Train [40][2430/3239]	Time 0.406 (0.231)	Data 0.001 (0.010)	Loss 4.2048 (3.9957)	Top-1 acc 24.219 (31.275)	Top-5 acc 49.609 (54.707)	lr 0.00000
Warmup Train [40][2440/3239]	Time 0.228 (0.231)	Data 0.001 (0.010)	Loss 3.7992 (3.9957)	Top-1 acc 30.859 (31.274)	Top-5 acc 54.297 (54.708)	lr 0.00000
Warmup Train [40][2450/3239]	Time 0.234 (0.231)	Data 0.001 (0.010)	Loss 3.9222 (3.9958)	Top-1 acc 32.422 (31.273)	Top-5 acc 53.906 (54.703)	lr 0.00000
Warmup Train [40][2460/3239]	Time 0.189 (0.231)	Data 0.001 (0.010)	Loss 3.9322 (3.9957)	Top-1 acc 34.375 (31.279)	Top-5 acc 57.031 (54.708)	lr 0.00000
Warmup Train [40][2470/3239]	Time 0.269 (0.231)	Data 0.001 (0.010)	Loss 3.9313 (3.9958)	Top-1 acc 34.766 (31.276)	Top-5 acc 57.422 (54.709)	lr 0.00000
Warmup Train [40][2480/3239]	Time 0.186 (0.231)	Data 0.002 (0.010)	Loss 3.9450 (3.9957)	Top-1 acc 31.250 (31.276)	Top-5 acc 54.688 (54.709)	lr 0.00000
Warmup Train [40][2490/3239]	Time 0.260 (0.231)	Data 0.001 (0.010)	Loss 4.1130 (3.9957)	Top-1 acc 28.906 (31.275)	Top-5 acc 51.562 (54.708)	lr 0.00000
Warmup Train [40][2500/3239]	Time 0.236 (0.231)	Data 0.001 (0.010)	Loss 3.9429 (3.9956)	Top-1 acc 32.422 (31.276)	Top-5 acc 58.203 (54.708)	lr 0.00000
Warmup Train [40][2510/3239]	Time 0.221 (0.231)	Data 0.001 (0.010)	Loss 3.9639 (3.9958)	Top-1 acc 32.031 (31.273)	Top-5 acc 57.031 (54.707)	lr 0.00000
Warmup Train [40][2520/3239]	Time 0.181 (0.231)	Data 0.001 (0.010)	Loss 4.0650 (3.9957)	Top-1 acc 29.688 (31.272)	Top-5 acc 53.125 (54.711)	lr 0.00000
Warmup Train [40][2530/3239]	Time 0.270 (0.231)	Data 0.003 (0.010)	Loss 3.9938 (3.9953)	Top-1 acc 32.031 (31.281)	Top-5 acc 53.516 (54.719)	lr 0.00000
Warmup Train [40][2540/3239]	Time 0.253 (0.231)	Data 0.001 (0.010)	Loss 4.2612 (3.9956)	Top-1 acc 30.078 (31.274)	Top-5 acc 47.266 (54.714)	lr 0.00000
Warmup Train [40][2550/3239]	Time 0.191 (0.231)	Data 0.001 (0.010)	Loss 4.2148 (3.9956)	Top-1 acc 28.906 (31.272)	Top-5 acc 50.391 (54.713)	lr 0.00000
Warmup Train [40][2560/3239]	Time 0.230 (0.231)	Data 0.001 (0.010)	Loss 4.0657 (3.9956)	Top-1 acc 32.422 (31.277)	Top-5 acc 56.250 (54.714)	lr 0.00000
Warmup Train [40][2570/3239]	Time 0.264 (0.231)	Data 0.001 (0.010)	Loss 4.0924 (3.9959)	Top-1 acc 26.953 (31.266)	Top-5 acc 54.688 (54.705)	lr 0.00000
Warmup Train [40][2580/3239]	Time 0.199 (0.231)	Data 0.001 (0.010)	Loss 3.7621 (3.9958)	Top-1 acc 35.547 (31.269)	Top-5 acc 60.156 (54.707)	lr 0.00000
Warmup Train [40][2590/3239]	Time 0.184 (0.231)	Data 0.002 (0.010)	Loss 3.7911 (3.9957)	Top-1 acc 35.938 (31.276)	Top-5 acc 57.812 (54.709)	lr 0.00000
Warmup Train [40][2600/3239]	Time 0.220 (0.231)	Data 0.001 (0.010)	Loss 3.9765 (3.9957)	Top-1 acc 33.594 (31.276)	Top-5 acc 51.953 (54.705)	lr 0.00000
Warmup Train [40][2610/3239]	Time 0.265 (0.231)	Data 0.001 (0.010)	Loss 4.0437 (3.9959)	Top-1 acc 29.688 (31.270)	Top-5 acc 53.516 (54.698)	lr 0.00000
Warmup Train [40][2620/3239]	Time 0.244 (0.231)	Data 0.001 (0.010)	Loss 3.9397 (3.9958)	Top-1 acc 33.203 (31.271)	Top-5 acc 55.859 (54.701)	lr 0.00000
Warmup Train [40][2630/3239]	Time 0.168 (0.231)	Data 0.001 (0.010)	Loss 3.8766 (3.9959)	Top-1 acc 32.812 (31.268)	Top-5 acc 57.812 (54.698)	lr 0.00000
Warmup Train [40][2640/3239]	Time 0.252 (0.231)	Data 0.002 (0.010)	Loss 3.8632 (3.9958)	Top-1 acc 37.891 (31.268)	Top-5 acc 58.594 (54.704)	lr 0.00000
Warmup Train [40][2650/3239]	Time 0.225 (0.231)	Data 0.001 (0.010)	Loss 4.1684 (3.9957)	Top-1 acc 27.734 (31.272)	Top-5 acc 50.781 (54.708)	lr 0.00000
Warmup Train [40][2660/3239]	Time 0.133 (0.230)	Data 0.001 (0.010)	Loss 3.9786 (3.9960)	Top-1 acc 31.250 (31.268)	Top-5 acc 53.906 (54.704)	lr 0.00000
Warmup Train [40][2670/3239]	Time 0.250 (0.230)	Data 0.001 (0.010)	Loss 4.0243 (3.9958)	Top-1 acc 30.859 (31.271)	Top-5 acc 56.641 (54.709)	lr 0.00000
Warmup Train [40][2680/3239]	Time 0.170 (0.230)	Data 0.001 (0.010)	Loss 4.0260 (3.9956)	Top-1 acc 34.375 (31.278)	Top-5 acc 54.688 (54.710)	lr 0.00000
Warmup Train [40][2690/3239]	Time 0.208 (0.230)	Data 0.001 (0.010)	Loss 3.9145 (3.9957)	Top-1 acc 31.250 (31.279)	Top-5 acc 53.516 (54.709)	lr 0.00000
Warmup Train [40][2700/3239]	Time 0.174 (0.230)	Data 0.002 (0.010)	Loss 4.1517 (3.9957)	Top-1 acc 30.469 (31.280)	Top-5 acc 50.391 (54.707)	lr 0.00000
Warmup Train [40][2710/3239]	Time 0.230 (0.230)	Data 0.001 (0.010)	Loss 4.0579 (3.9956)	Top-1 acc 28.516 (31.280)	Top-5 acc 52.344 (54.707)	lr 0.00000
Warmup Train [40][2720/3239]	Time 0.188 (0.230)	Data 0.002 (0.010)	Loss 3.9491 (3.9958)	Top-1 acc 33.203 (31.276)	Top-5 acc 57.422 (54.705)	lr 0.00000
Warmup Train [40][2730/3239]	Time 0.178 (0.230)	Data 0.001 (0.010)	Loss 4.0488 (3.9958)	Top-1 acc 30.078 (31.284)	Top-5 acc 55.859 (54.705)	lr 0.00000
Warmup Train [40][2740/3239]	Time 0.270 (0.230)	Data 0.001 (0.010)	Loss 3.9135 (3.9957)	Top-1 acc 34.375 (31.287)	Top-5 acc 54.688 (54.704)	lr 0.00000
Warmup Train [40][2750/3239]	Time 0.189 (0.230)	Data 0.001 (0.010)	Loss 4.0889 (3.9957)	Top-1 acc 26.172 (31.283)	Top-5 acc 49.219 (54.705)	lr 0.00000
Warmup Train [40][2760/3239]	Time 0.192 (0.230)	Data 0.001 (0.010)	Loss 3.7877 (3.9955)	Top-1 acc 39.062 (31.285)	Top-5 acc 58.984 (54.708)	lr 0.00000
Warmup Train [40][2770/3239]	Time 0.134 (0.230)	Data 0.002 (0.010)	Loss 3.9769 (3.9955)	Top-1 acc 29.297 (31.284)	Top-5 acc 54.688 (54.706)	lr 0.00000
Warmup Train [40][2780/3239]	Time 0.315 (0.230)	Data 0.001 (0.010)	Loss 3.9011 (3.9954)	Top-1 acc 33.203 (31.288)	Top-5 acc 57.031 (54.709)	lr 0.00000
Warmup Train [40][2790/3239]	Time 0.177 (0.230)	Data 0.001 (0.009)	Loss 4.1444 (3.9954)	Top-1 acc 27.734 (31.288)	Top-5 acc 54.297 (54.709)	lr 0.00000
Warmup Train [40][2800/3239]	Time 0.212 (0.230)	Data 0.001 (0.009)	Loss 3.9926 (3.9955)	Top-1 acc 35.938 (31.286)	Top-5 acc 54.688 (54.707)	lr 0.00000
Warmup Train [40][2810/3239]	Time 0.228 (0.230)	Data 0.001 (0.009)	Loss 4.1440 (3.9957)	Top-1 acc 25.391 (31.283)	Top-5 acc 50.781 (54.703)	lr 0.00000
Warmup Train [40][2820/3239]	Time 0.237 (0.230)	Data 0.002 (0.009)	Loss 4.1053 (3.9957)	Top-1 acc 31.250 (31.283)	Top-5 acc 51.172 (54.704)	lr 0.00000
Warmup Train [40][2830/3239]	Time 0.233 (0.230)	Data 0.001 (0.009)	Loss 4.1873 (3.9958)	Top-1 acc 25.781 (31.277)	Top-5 acc 54.297 (54.705)	lr 0.00000
Warmup Train [40][2840/3239]	Time 0.213 (0.230)	Data 0.002 (0.009)	Loss 3.9570 (3.9960)	Top-1 acc 30.469 (31.272)	Top-5 acc 58.203 (54.702)	lr 0.00000
Warmup Train [40][2850/3239]	Time 0.268 (0.230)	Data 0.001 (0.009)	Loss 4.1027 (3.9958)	Top-1 acc 28.125 (31.274)	Top-5 acc 52.734 (54.707)	lr 0.00000
Warmup Train [40][2860/3239]	Time 0.302 (0.230)	Data 0.002 (0.009)	Loss 4.0850 (3.9958)	Top-1 acc 29.688 (31.271)	Top-5 acc 51.562 (54.708)	lr 0.00000
Warmup Train [40][2870/3239]	Time 0.225 (0.230)	Data 0.001 (0.009)	Loss 4.0748 (3.9957)	Top-1 acc 30.859 (31.269)	Top-5 acc 50.781 (54.708)	lr 0.00000
Warmup Train [40][2880/3239]	Time 0.184 (0.230)	Data 0.001 (0.009)	Loss 4.1624 (3.9959)	Top-1 acc 28.906 (31.263)	Top-5 acc 49.609 (54.704)	lr 0.00000
Warmup Train [40][2890/3239]	Time 0.204 (0.230)	Data 0.001 (0.009)	Loss 3.9629 (3.9959)	Top-1 acc 32.422 (31.266)	Top-5 acc 55.469 (54.710)	lr 0.00000
Warmup Train [40][2900/3239]	Time 0.219 (0.230)	Data 0.002 (0.009)	Loss 4.0137 (3.9959)	Top-1 acc 29.297 (31.263)	Top-5 acc 53.906 (54.710)	lr 0.00000
Warmup Train [40][2910/3239]	Time 0.211 (0.230)	Data 0.001 (0.009)	Loss 4.0771 (3.9959)	Top-1 acc 30.469 (31.261)	Top-5 acc 50.781 (54.709)	lr 0.00000
Warmup Train [40][2920/3239]	Time 0.196 (0.230)	Data 0.001 (0.009)	Loss 3.9023 (3.9959)	Top-1 acc 31.250 (31.262)	Top-5 acc 55.078 (54.710)	lr 0.00000
Warmup Train [40][2930/3239]	Time 0.210 (0.230)	Data 0.001 (0.009)	Loss 4.0226 (3.9962)	Top-1 acc 31.250 (31.257)	Top-5 acc 52.344 (54.703)	lr 0.00000
Warmup Train [40][2940/3239]	Time 0.189 (0.230)	Data 0.001 (0.009)	Loss 4.0764 (3.9963)	Top-1 acc 30.469 (31.255)	Top-5 acc 53.516 (54.699)	lr 0.00000
Warmup Train [40][2950/3239]	Time 0.291 (0.230)	Data 0.002 (0.009)	Loss 3.8757 (3.9962)	Top-1 acc 34.766 (31.258)	Top-5 acc 58.203 (54.703)	lr 0.00000
Warmup Train [40][2960/3239]	Time 0.434 (0.230)	Data 0.001 (0.009)	Loss 4.1013 (3.9964)	Top-1 acc 29.297 (31.253)	Top-5 acc 51.562 (54.700)	lr 0.00000
Warmup Train [40][2970/3239]	Time 0.205 (0.230)	Data 0.002 (0.009)	Loss 3.9803 (3.9964)	Top-1 acc 29.688 (31.253)	Top-5 acc 57.031 (54.702)	lr 0.00000
Warmup Train [40][2980/3239]	Time 0.206 (0.230)	Data 0.002 (0.009)	Loss 4.1962 (3.9964)	Top-1 acc 28.906 (31.252)	Top-5 acc 50.000 (54.698)	lr 0.00000
Warmup Train [40][2990/3239]	Time 0.288 (0.230)	Data 0.001 (0.009)	Loss 4.0925 (3.9964)	Top-1 acc 34.375 (31.259)	Top-5 acc 53.125 (54.701)	lr 0.00000
Warmup Train [40][3000/3239]	Time 0.179 (0.230)	Data 0.001 (0.009)	Loss 3.9774 (3.9964)	Top-1 acc 34.766 (31.263)	Top-5 acc 57.031 (54.703)	lr 0.00000
Warmup Train [40][3010/3239]	Time 0.157 (0.230)	Data 0.001 (0.009)	Loss 3.9813 (3.9964)	Top-1 acc 27.344 (31.265)	Top-5 acc 51.953 (54.698)	lr 0.00000
Warmup Train [40][3020/3239]	Time 0.278 (0.230)	Data 0.003 (0.009)	Loss 4.0668 (3.9966)	Top-1 acc 32.812 (31.266)	Top-5 acc 53.906 (54.695)	lr 0.00000
Warmup Train [40][3030/3239]	Time 0.221 (0.231)	Data 0.003 (0.009)	Loss 4.2318 (3.9966)	Top-1 acc 28.516 (31.265)	Top-5 acc 53.516 (54.695)	lr 0.00000
Warmup Train [40][3040/3239]	Time 0.254 (0.231)	Data 0.002 (0.009)	Loss 3.8931 (3.9968)	Top-1 acc 30.859 (31.260)	Top-5 acc 55.859 (54.691)	lr 0.00000
Warmup Train [40][3050/3239]	Time 0.296 (0.231)	Data 0.001 (0.009)	Loss 3.8070 (3.9968)	Top-1 acc 35.547 (31.258)	Top-5 acc 60.938 (54.690)	lr 0.00000
Warmup Train [40][3060/3239]	Time 0.171 (0.231)	Data 0.001 (0.009)	Loss 4.1119 (3.9969)	Top-1 acc 30.859 (31.257)	Top-5 acc 51.562 (54.687)	lr 0.00000
Warmup Train [40][3070/3239]	Time 0.392 (0.231)	Data 0.001 (0.009)	Loss 3.7382 (3.9969)	Top-1 acc 37.891 (31.256)	Top-5 acc 60.156 (54.682)	lr 0.00000
Warmup Train [40][3080/3239]	Time 0.231 (0.231)	Data 0.002 (0.009)	Loss 4.0410 (3.9969)	Top-1 acc 31.641 (31.254)	Top-5 acc 57.812 (54.685)	lr 0.00000
Warmup Train [40][3090/3239]	Time 0.238 (0.231)	Data 0.001 (0.009)	Loss 4.1579 (3.9968)	Top-1 acc 28.906 (31.254)	Top-5 acc 51.562 (54.687)	lr 0.00000
Warmup Train [40][3100/3239]	Time 0.181 (0.231)	Data 0.001 (0.009)	Loss 3.9649 (3.9968)	Top-1 acc 28.906 (31.256)	Top-5 acc 56.250 (54.689)	lr 0.00000
Warmup Train [40][3110/3239]	Time 0.245 (0.231)	Data 0.001 (0.009)	Loss 3.9511 (3.9968)	Top-1 acc 30.469 (31.252)	Top-5 acc 57.031 (54.688)	lr 0.00000
Warmup Train [40][3120/3239]	Time 0.238 (0.231)	Data 0.001 (0.009)	Loss 3.9693 (3.9967)	Top-1 acc 34.375 (31.255)	Top-5 acc 55.469 (54.687)	lr 0.00000
Warmup Train [40][3130/3239]	Time 0.225 (0.231)	Data 0.001 (0.009)	Loss 3.9720 (3.9970)	Top-1 acc 30.859 (31.247)	Top-5 acc 55.859 (54.684)	lr 0.00000
Warmup Train [40][3140/3239]	Time 0.257 (0.231)	Data 0.001 (0.009)	Loss 3.9569 (3.9971)	Top-1 acc 34.766 (31.248)	Top-5 acc 54.688 (54.686)	lr 0.00000
Warmup Train [40][3150/3239]	Time 0.213 (0.231)	Data 0.002 (0.009)	Loss 4.1285 (3.9972)	Top-1 acc 27.344 (31.249)	Top-5 acc 51.172 (54.683)	lr 0.00000
Warmup Train [40][3160/3239]	Time 0.218 (0.231)	Data 0.001 (0.009)	Loss 3.8641 (3.9973)	Top-1 acc 32.422 (31.242)	Top-5 acc 58.203 (54.681)	lr 0.00000
Warmup Train [40][3170/3239]	Time 0.325 (0.231)	Data 0.002 (0.009)	Loss 4.0787 (3.9973)	Top-1 acc 30.859 (31.246)	Top-5 acc 49.609 (54.679)	lr 0.00000
Warmup Train [40][3180/3239]	Time 0.226 (0.231)	Data 0.000 (0.009)	Loss 4.1294 (3.9973)	Top-1 acc 28.906 (31.246)	Top-5 acc 51.172 (54.679)	lr 0.00000
Warmup Train [40][3190/3239]	Time 0.212 (0.231)	Data 0.000 (0.009)	Loss 3.9884 (3.9973)	Top-1 acc 29.688 (31.250)	Top-5 acc 52.734 (54.679)	lr 0.00000
Warmup Train [40][3200/3239]	Time 0.157 (0.231)	Data 0.000 (0.009)	Loss 4.3230 (3.9974)	Top-1 acc 25.781 (31.247)	Top-5 acc 47.266 (54.677)	lr 0.00000
Warmup Train [40][3210/3239]	Time 0.221 (0.231)	Data 0.000 (0.009)	Loss 3.9018 (3.9974)	Top-1 acc 35.938 (31.254)	Top-5 acc 58.984 (54.680)	lr 0.00000
Warmup Train [40][3220/3239]	Time 0.195 (0.231)	Data 0.000 (0.009)	Loss 4.1934 (3.9975)	Top-1 acc 25.391 (31.247)	Top-5 acc 48.828 (54.676)	lr 0.00000
Warmup Train [40][3230/3239]	Time 0.202 (0.231)	Data 0.000 (0.009)	Loss 4.0255 (3.9975)	Top-1 acc 26.562 (31.248)	Top-5 acc 55.078 (54.680)	lr 0.00000
Warmup Train [40][3239/3239]	Time 0.133 (0.231)	Data 0.000 (0.009)	Loss 4.4508 (3.9975)	Top-1 acc 27.160 (31.249)	Top-5 acc 40.741 (54.682)	lr 0.00000
==========Warmup Valid [40/40]	loss 2.937	top-1 acc 38.593	top-5 acc 63.516	Train top-1 31.249	top-5 54.682	flops: 442.4M
Train [1][0/3239]	Time 18.098 (18.098)	Data Time 16.613 (16.613)	Loss 4.0061 (4.0061)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (29.688)	Top-5 acc 50.391 (50.391)	lr 0.02500
Train [1][10/3239]	Time 0.335 (2.065)	Data Time 0.002 (1.651)	Loss 4.0560 (4.0629)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (30.682)	Top-5 acc 52.734 (52.699)	lr 0.02500
Train [1][20/3239]	Time 0.258 (1.205)	Data Time 0.001 (0.866)	Loss 4.1004 (4.0818)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (29.836)	Top-5 acc 47.656 (52.139)	lr 0.02500
Train [1][30/3239]	Time 0.254 (0.894)	Data Time 0.001 (0.587)	Loss 3.9811 (4.0766)	Entropy 1.90187 (1.90187)	Top-1 acc 33.984 (29.650)	Top-5 acc 51.953 (52.533)	lr 0.02500
Train [1][40/3239]	Time 0.290 (0.727)	Data Time 0.001 (0.444)	Loss 4.1125 (4.0674)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (29.516)	Top-5 acc 54.688 (53.030)	lr 0.02500
Train [1][50/3239]	Time 0.164 (0.633)	Data Time 0.001 (0.358)	Loss 4.0235 (4.0621)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (29.603)	Top-5 acc 54.688 (53.064)	lr 0.02500
Train [1][60/3239]	Time 0.200 (0.566)	Data Time 0.002 (0.300)	Loss 4.1058 (4.0534)	Entropy 1.90187 (1.90187)	Top-1 acc 34.766 (29.988)	Top-5 acc 48.828 (53.234)	lr 0.02500
Train [1][70/3239]	Time 0.336 (0.520)	Data Time 0.001 (0.258)	Loss 4.2431 (4.0588)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (29.935)	Top-5 acc 49.219 (53.136)	lr 0.02500
Train [1][80/3239]	Time 0.268 (0.485)	Data Time 0.002 (0.227)	Loss 4.2019 (4.0619)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (29.885)	Top-5 acc 55.469 (53.144)	lr 0.02500
Train [1][90/3239]	Time 0.277 (0.456)	Data Time 0.001 (0.202)	Loss 3.8196 (4.0653)	Entropy 1.90187 (1.90187)	Top-1 acc 33.984 (29.881)	Top-5 acc 59.766 (53.121)	lr 0.02500
Train [1][100/3239]	Time 0.220 (0.433)	Data Time 0.001 (0.182)	Loss 4.1424 (4.0648)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (29.865)	Top-5 acc 51.953 (53.260)	lr 0.02500
Train [1][110/3239]	Time 0.209 (0.414)	Data Time 0.002 (0.166)	Loss 4.0445 (4.0615)	Entropy 1.90187 (1.90187)	Top-1 acc 34.766 (29.853)	Top-5 acc 52.734 (53.238)	lr 0.02500
Train [1][120/3239]	Time 0.183 (0.398)	Data Time 0.002 (0.152)	Loss 4.0613 (4.0590)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (29.762)	Top-5 acc 52.734 (53.248)	lr 0.02500
Train [1][130/3239]	Time 0.281 (0.385)	Data Time 0.002 (0.141)	Loss 4.1316 (4.0628)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (29.661)	Top-5 acc 52.344 (53.119)	lr 0.02500
Train [1][140/3239]	Time 0.145 (0.375)	Data Time 0.001 (0.132)	Loss 4.1600 (4.0681)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (29.607)	Top-5 acc 48.828 (52.939)	lr 0.02500
Train [1][150/3239]	Time 0.298 (0.365)	Data Time 0.001 (0.123)	Loss 4.0506 (4.0722)	Entropy 1.90187 (1.90187)	Top-1 acc 32.422 (29.587)	Top-5 acc 51.953 (52.874)	lr 0.02500
Train [1][160/3239]	Time 0.218 (0.356)	Data Time 0.001 (0.116)	Loss 4.1366 (4.0718)	Entropy 1.90187 (1.90187)	Top-1 acc 23.438 (29.479)	Top-5 acc 52.344 (52.882)	lr 0.02500
Train [1][170/3239]	Time 0.154 (0.349)	Data Time 0.001 (0.109)	Loss 3.9497 (4.0673)	Entropy 1.90187 (1.90187)	Top-1 acc 30.859 (29.594)	Top-5 acc 57.031 (53.040)	lr 0.02500
Train [1][180/3239]	Time 0.185 (0.342)	Data Time 0.001 (0.104)	Loss 4.1094 (4.0680)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (29.612)	Top-5 acc 50.000 (53.004)	lr 0.02500
Train [1][190/3239]	Time 0.167 (0.336)	Data Time 0.001 (0.098)	Loss 4.0356 (4.0704)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (29.542)	Top-5 acc 54.297 (52.912)	lr 0.02500
Train [1][200/3239]	Time 0.139 (0.330)	Data Time 0.001 (0.094)	Loss 4.3250 (4.0713)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (29.555)	Top-5 acc 44.531 (52.898)	lr 0.02500
Train [1][210/3239]	Time 0.193 (0.325)	Data Time 0.001 (0.089)	Loss 4.1843 (4.0721)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (29.551)	Top-5 acc 50.000 (52.851)	lr 0.02500
Train [1][220/3239]	Time 0.269 (0.320)	Data Time 0.001 (0.086)	Loss 4.1294 (4.0720)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (29.604)	Top-5 acc 55.859 (52.888)	lr 0.02500
Train [1][230/3239]	Time 0.230 (0.316)	Data Time 0.001 (0.082)	Loss 3.9131 (4.0699)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (29.600)	Top-5 acc 54.688 (52.927)	lr 0.02500
Train [1][240/3239]	Time 0.252 (0.312)	Data Time 0.002 (0.079)	Loss 4.1412 (4.0704)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (29.608)	Top-5 acc 52.344 (52.914)	lr 0.02500
Train [1][250/3239]	Time 0.228 (0.308)	Data Time 0.001 (0.076)	Loss 3.9729 (4.0699)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (29.591)	Top-5 acc 54.688 (52.913)	lr 0.02500
Train [1][260/3239]	Time 0.221 (0.304)	Data Time 0.001 (0.073)	Loss 4.2032 (4.0720)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (29.571)	Top-5 acc 51.953 (52.868)	lr 0.02500
Train [1][270/3239]	Time 0.211 (0.302)	Data Time 0.001 (0.070)	Loss 4.3709 (4.0749)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (29.561)	Top-5 acc 44.531 (52.808)	lr 0.02500
Train [1][280/3239]	Time 0.335 (0.299)	Data Time 0.001 (0.068)	Loss 4.2211 (4.0752)	Entropy 1.90187 (1.90187)	Top-1 acc 23.828 (29.517)	Top-5 acc 52.344 (52.807)	lr 0.02500
Train [1][290/3239]	Time 0.211 (0.297)	Data Time 0.001 (0.065)	Loss 3.9956 (4.0759)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (29.483)	Top-5 acc 52.734 (52.771)	lr 0.02500
Train [1][300/3239]	Time 0.221 (0.295)	Data Time 0.001 (0.063)	Loss 4.1711 (4.0760)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (29.462)	Top-5 acc 49.219 (52.734)	lr 0.02500
Train [1][310/3239]	Time 0.215 (0.292)	Data Time 0.001 (0.061)	Loss 4.0915 (4.0774)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (29.428)	Top-5 acc 49.609 (52.717)	lr 0.02500
Train [1][320/3239]	Time 0.225 (0.290)	Data Time 0.001 (0.059)	Loss 3.9913 (4.0794)	Entropy 1.90187 (1.90187)	Top-1 acc 32.812 (29.367)	Top-5 acc 58.203 (52.728)	lr 0.02500
Train [1][330/3239]	Time 0.264 (0.289)	Data Time 0.001 (0.058)	Loss 4.0939 (4.0806)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (29.325)	Top-5 acc 51.172 (52.686)	lr 0.02500
Train [1][340/3239]	Time 0.147 (0.287)	Data Time 0.002 (0.056)	Loss 4.1098 (4.0798)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (29.348)	Top-5 acc 52.344 (52.706)	lr 0.02500
Train [1][350/3239]	Time 0.157 (0.285)	Data Time 0.001 (0.055)	Loss 4.2870 (4.0792)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (29.377)	Top-5 acc 46.875 (52.692)	lr 0.02500
Train [1][360/3239]	Time 0.204 (0.283)	Data Time 0.001 (0.053)	Loss 4.0643 (4.0803)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (29.347)	Top-5 acc 53.906 (52.689)	lr 0.02500
Train [1][370/3239]	Time 0.286 (0.282)	Data Time 0.001 (0.052)	Loss 3.9820 (4.0800)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (29.358)	Top-5 acc 58.203 (52.706)	lr 0.02500
Train [1][380/3239]	Time 0.314 (0.280)	Data Time 0.001 (0.051)	Loss 4.1125 (4.0803)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (29.347)	Top-5 acc 53.125 (52.715)	lr 0.02500
Train [1][390/3239]	Time 0.206 (0.279)	Data Time 0.030 (0.049)	Loss 4.1893 (4.0818)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (29.342)	Top-5 acc 48.828 (52.692)	lr 0.02500
Train [1][400/3239]	Time 0.165 (0.277)	Data Time 0.001 (0.048)	Loss 4.0834 (4.0815)	Entropy 1.90187 (1.90187)	Top-1 acc 23.828 (29.309)	Top-5 acc 52.734 (52.698)	lr 0.02500
Train [1][410/3239]	Time 0.177 (0.275)	Data Time 0.001 (0.047)	Loss 4.2394 (4.0831)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (29.267)	Top-5 acc 48.828 (52.667)	lr 0.02500
Train [1][420/3239]	Time 0.216 (0.274)	Data Time 0.001 (0.046)	Loss 4.2384 (4.0848)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (29.212)	Top-5 acc 50.781 (52.630)	lr 0.02500
Train [1][430/3239]	Time 0.237 (0.273)	Data Time 0.001 (0.045)	Loss 4.2515 (4.0872)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (29.150)	Top-5 acc 47.656 (52.595)	lr 0.02500
Train [1][440/3239]	Time 0.198 (0.272)	Data Time 0.001 (0.044)	Loss 4.1775 (4.0897)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (29.107)	Top-5 acc 51.953 (52.539)	lr 0.02500
Train [1][450/3239]	Time 0.228 (0.271)	Data Time 0.001 (0.043)	Loss 4.0056 (4.0903)	Entropy 1.90187 (1.90187)	Top-1 acc 33.203 (29.090)	Top-5 acc 54.688 (52.527)	lr 0.02500
Train [1][460/3239]	Time 0.163 (0.270)	Data Time 0.001 (0.042)	Loss 4.1538 (4.0906)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (29.087)	Top-5 acc 48.047 (52.510)	lr 0.02500
Train [1][470/3239]	Time 0.210 (0.269)	Data Time 0.001 (0.041)	Loss 4.1212 (4.0914)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (29.090)	Top-5 acc 50.781 (52.505)	lr 0.02500
Train [1][480/3239]	Time 0.294 (0.268)	Data Time 0.002 (0.041)	Loss 4.1128 (4.0931)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (29.084)	Top-5 acc 53.516 (52.481)	lr 0.02500
Train [1][490/3239]	Time 0.366 (0.267)	Data Time 0.003 (0.040)	Loss 3.9548 (4.0941)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (29.083)	Top-5 acc 53.516 (52.462)	lr 0.02500
Train [1][500/3239]	Time 0.345 (0.267)	Data Time 0.001 (0.039)	Loss 4.1741 (4.0956)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (29.058)	Top-5 acc 48.047 (52.426)	lr 0.02500
Train [1][510/3239]	Time 0.246 (0.266)	Data Time 0.002 (0.038)	Loss 4.1691 (4.0967)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (29.023)	Top-5 acc 50.781 (52.389)	lr 0.02500
Train [1][520/3239]	Time 0.238 (0.265)	Data Time 0.002 (0.038)	Loss 4.0208 (4.0966)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (29.010)	Top-5 acc 53.125 (52.392)	lr 0.02500
Train [1][530/3239]	Time 0.229 (0.264)	Data Time 0.001 (0.037)	Loss 4.2319 (4.0973)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (29.029)	Top-5 acc 47.266 (52.389)	lr 0.02500
Train [1][540/3239]	Time 0.189 (0.263)	Data Time 0.001 (0.036)	Loss 4.1495 (4.0978)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (29.010)	Top-5 acc 51.562 (52.376)	lr 0.02500
Train [1][550/3239]	Time 0.226 (0.262)	Data Time 0.001 (0.036)	Loss 3.8935 (4.0972)	Entropy 1.90187 (1.90187)	Top-1 acc 30.859 (29.017)	Top-5 acc 57.812 (52.388)	lr 0.02500
Train [1][560/3239]	Time 0.198 (0.262)	Data Time 0.001 (0.035)	Loss 4.1777 (4.0985)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (28.959)	Top-5 acc 48.828 (52.358)	lr 0.02500
Train [1][570/3239]	Time 0.228 (0.261)	Data Time 0.001 (0.035)	Loss 4.2991 (4.0989)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.956)	Top-5 acc 50.000 (52.344)	lr 0.02500
Train [1][580/3239]	Time 0.203 (0.261)	Data Time 0.001 (0.034)	Loss 4.0591 (4.0989)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.972)	Top-5 acc 47.656 (52.330)	lr 0.02500
Train [1][590/3239]	Time 0.382 (0.261)	Data Time 0.001 (0.034)	Loss 4.0335 (4.0980)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.977)	Top-5 acc 51.953 (52.336)	lr 0.02500
Train [1][600/3239]	Time 0.273 (0.260)	Data Time 0.001 (0.033)	Loss 4.1290 (4.0989)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.956)	Top-5 acc 50.781 (52.304)	lr 0.02500
Train [1][610/3239]	Time 0.198 (0.260)	Data Time 0.001 (0.032)	Loss 4.1997 (4.0985)	Entropy 1.90187 (1.90187)	Top-1 acc 30.859 (28.965)	Top-5 acc 52.344 (52.319)	lr 0.02500
Train [1][620/3239]	Time 0.172 (0.259)	Data Time 0.001 (0.032)	Loss 4.2414 (4.0988)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.962)	Top-5 acc 48.438 (52.314)	lr 0.02500
Train [1][630/3239]	Time 0.191 (0.258)	Data Time 0.001 (0.032)	Loss 4.0988 (4.0993)	Entropy 1.90187 (1.90187)	Top-1 acc 32.422 (28.947)	Top-5 acc 53.516 (52.307)	lr 0.02500
Train [1][640/3239]	Time 0.206 (0.258)	Data Time 0.002 (0.031)	Loss 4.1335 (4.0994)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (28.947)	Top-5 acc 53.516 (52.310)	lr 0.02500
Train [1][650/3239]	Time 0.232 (0.257)	Data Time 0.001 (0.031)	Loss 4.1859 (4.0999)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.947)	Top-5 acc 48.828 (52.309)	lr 0.02500
Train [1][660/3239]	Time 0.207 (0.257)	Data Time 0.001 (0.030)	Loss 4.0892 (4.1000)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.930)	Top-5 acc 51.562 (52.294)	lr 0.02500
Train [1][670/3239]	Time 0.140 (0.256)	Data Time 0.002 (0.030)	Loss 4.2597 (4.1013)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.914)	Top-5 acc 49.219 (52.258)	lr 0.02500
Train [1][680/3239]	Time 0.139 (0.256)	Data Time 0.001 (0.029)	Loss 4.1389 (4.1009)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.915)	Top-5 acc 50.000 (52.258)	lr 0.02500
Train [1][690/3239]	Time 0.355 (0.255)	Data Time 0.001 (0.029)	Loss 4.3019 (4.1020)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (28.898)	Top-5 acc 46.094 (52.248)	lr 0.02500
Train [1][700/3239]	Time 0.305 (0.255)	Data Time 0.001 (0.029)	Loss 4.2382 (4.1032)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.876)	Top-5 acc 47.656 (52.221)	lr 0.02500
Train [1][710/3239]	Time 0.192 (0.254)	Data Time 0.001 (0.028)	Loss 4.0103 (4.1035)	Entropy 1.90187 (1.90187)	Top-1 acc 32.422 (28.861)	Top-5 acc 54.688 (52.201)	lr 0.02500
Train [1][720/3239]	Time 0.171 (0.254)	Data Time 0.001 (0.028)	Loss 4.2100 (4.1046)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (28.846)	Top-5 acc 51.562 (52.201)	lr 0.02500
Train [1][730/3239]	Time 0.280 (0.254)	Data Time 0.002 (0.028)	Loss 4.0994 (4.1048)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.844)	Top-5 acc 51.953 (52.193)	lr 0.02500
Train [1][740/3239]	Time 0.234 (0.253)	Data Time 0.001 (0.027)	Loss 4.2487 (4.1050)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.831)	Top-5 acc 46.484 (52.188)	lr 0.02500
Train [1][750/3239]	Time 0.221 (0.253)	Data Time 0.001 (0.027)	Loss 4.2711 (4.1048)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (28.834)	Top-5 acc 50.391 (52.198)	lr 0.02500
Train [1][760/3239]	Time 0.174 (0.252)	Data Time 0.001 (0.027)	Loss 4.0703 (4.1046)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.829)	Top-5 acc 51.953 (52.185)	lr 0.02500
Train [1][770/3239]	Time 0.175 (0.252)	Data Time 0.001 (0.026)	Loss 4.2475 (4.1050)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.812)	Top-5 acc 48.828 (52.174)	lr 0.02500
Train [1][780/3239]	Time 0.261 (0.252)	Data Time 0.002 (0.026)	Loss 4.0625 (4.1057)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (28.792)	Top-5 acc 54.688 (52.160)	lr 0.02500
Train [1][790/3239]	Time 0.243 (0.251)	Data Time 0.001 (0.026)	Loss 4.3404 (4.1064)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.775)	Top-5 acc 47.656 (52.144)	lr 0.02500
Train [1][800/3239]	Time 0.171 (0.251)	Data Time 0.001 (0.026)	Loss 4.1418 (4.1062)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.790)	Top-5 acc 49.609 (52.151)	lr 0.02500
Train [1][810/3239]	Time 0.265 (0.251)	Data Time 0.001 (0.025)	Loss 3.9662 (4.1064)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (28.784)	Top-5 acc 54.297 (52.145)	lr 0.02500
Train [1][820/3239]	Time 0.239 (0.250)	Data Time 0.001 (0.025)	Loss 4.1707 (4.1067)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.783)	Top-5 acc 50.391 (52.131)	lr 0.02500
Train [1][830/3239]	Time 0.204 (0.250)	Data Time 0.001 (0.025)	Loss 4.2481 (4.1076)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (28.766)	Top-5 acc 48.828 (52.114)	lr 0.02500
Train [1][840/3239]	Time 0.205 (0.250)	Data Time 0.001 (0.025)	Loss 4.1945 (4.1081)	Entropy 1.90187 (1.90187)	Top-1 acc 24.219 (28.765)	Top-5 acc 51.562 (52.110)	lr 0.02500
Train [1][850/3239]	Time 0.258 (0.249)	Data Time 0.001 (0.024)	Loss 4.1720 (4.1083)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.766)	Top-5 acc 51.953 (52.120)	lr 0.02500
Train [1][860/3239]	Time 0.310 (0.249)	Data Time 0.001 (0.024)	Loss 4.0461 (4.1088)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.747)	Top-5 acc 52.344 (52.107)	lr 0.02500
Train [1][870/3239]	Time 0.270 (0.249)	Data Time 0.001 (0.024)	Loss 4.1858 (4.1089)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.742)	Top-5 acc 51.562 (52.107)	lr 0.02500
Train [1][880/3239]	Time 0.183 (0.249)	Data Time 0.002 (0.024)	Loss 4.0824 (4.1090)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.739)	Top-5 acc 51.562 (52.099)	lr 0.02500
Train [1][890/3239]	Time 0.167 (0.249)	Data Time 0.001 (0.023)	Loss 4.3790 (4.1096)	Entropy 1.90187 (1.90187)	Top-1 acc 23.438 (28.730)	Top-5 acc 47.656 (52.089)	lr 0.02500
Train [1][900/3239]	Time 0.205 (0.249)	Data Time 0.001 (0.023)	Loss 4.3825 (4.1098)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.726)	Top-5 acc 44.141 (52.071)	lr 0.02500
Train [1][910/3239]	Time 0.127 (0.248)	Data Time 0.001 (0.023)	Loss 4.2143 (4.1105)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.712)	Top-5 acc 50.391 (52.053)	lr 0.02500
Train [1][920/3239]	Time 0.242 (0.248)	Data Time 0.002 (0.023)	Loss 4.0548 (4.1108)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (28.711)	Top-5 acc 53.125 (52.036)	lr 0.02500
Train [1][930/3239]	Time 0.281 (0.248)	Data Time 0.002 (0.023)	Loss 4.0872 (4.1110)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.706)	Top-5 acc 51.953 (52.028)	lr 0.02500
Train [1][940/3239]	Time 0.235 (0.247)	Data Time 0.001 (0.022)	Loss 4.1278 (4.1111)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.709)	Top-5 acc 51.172 (52.024)	lr 0.02500
Train [1][950/3239]	Time 0.244 (0.247)	Data Time 0.002 (0.022)	Loss 4.1927 (4.1116)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (28.703)	Top-5 acc 48.438 (52.008)	lr 0.02500
Train [1][960/3239]	Time 0.176 (0.247)	Data Time 0.002 (0.022)	Loss 4.1703 (4.1122)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (28.694)	Top-5 acc 47.656 (51.985)	lr 0.02500
Train [1][970/3239]	Time 0.189 (0.247)	Data Time 0.002 (0.022)	Loss 4.3552 (4.1117)	Entropy 1.90187 (1.90187)	Top-1 acc 22.266 (28.696)	Top-5 acc 46.484 (51.991)	lr 0.02500
Train [1][980/3239]	Time 0.241 (0.247)	Data Time 0.001 (0.022)	Loss 4.2166 (4.1120)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.692)	Top-5 acc 48.828 (51.988)	lr 0.02500
Train [1][990/3239]	Time 0.144 (0.246)	Data Time 0.001 (0.021)	Loss 4.0362 (4.1126)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.687)	Top-5 acc 51.172 (51.966)	lr 0.02500
Train [1][1000/3239]	Time 0.166 (0.246)	Data Time 0.001 (0.021)	Loss 4.2916 (4.1126)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.692)	Top-5 acc 48.438 (51.967)	lr 0.02500
Train [1][1010/3239]	Time 0.331 (0.246)	Data Time 0.002 (0.021)	Loss 4.2758 (4.1133)	Entropy 1.90187 (1.90187)	Top-1 acc 23.438 (28.680)	Top-5 acc 50.391 (51.968)	lr 0.02500
Train [1][1020/3239]	Time 0.219 (0.246)	Data Time 0.002 (0.021)	Loss 4.1103 (4.1134)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (28.678)	Top-5 acc 49.219 (51.960)	lr 0.02500
Train [1][1030/3239]	Time 0.217 (0.246)	Data Time 0.001 (0.021)	Loss 4.2012 (4.1132)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.681)	Top-5 acc 51.562 (51.963)	lr 0.02500
Train [1][1040/3239]	Time 0.205 (0.246)	Data Time 0.002 (0.020)	Loss 4.0380 (4.1132)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.679)	Top-5 acc 56.250 (51.966)	lr 0.02500
Train [1][1050/3239]	Time 0.204 (0.246)	Data Time 0.001 (0.020)	Loss 4.0876 (4.1136)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.674)	Top-5 acc 51.562 (51.956)	lr 0.02500
Train [1][1060/3239]	Time 0.150 (0.245)	Data Time 0.001 (0.020)	Loss 4.1126 (4.1133)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.688)	Top-5 acc 53.125 (51.961)	lr 0.02500
Train [1][1070/3239]	Time 0.259 (0.245)	Data Time 0.027 (0.020)	Loss 4.2633 (4.1136)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.683)	Top-5 acc 48.438 (51.958)	lr 0.02500
Train [1][1080/3239]	Time 0.212 (0.245)	Data Time 0.001 (0.020)	Loss 4.3653 (4.1137)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.686)	Top-5 acc 46.094 (51.968)	lr 0.02500
Train [1][1090/3239]	Time 0.236 (0.245)	Data Time 0.001 (0.020)	Loss 4.1609 (4.1135)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (28.691)	Top-5 acc 48.438 (51.975)	lr 0.02500
Train [1][1100/3239]	Time 0.278 (0.245)	Data Time 0.002 (0.020)	Loss 4.0462 (4.1136)	Entropy 1.90187 (1.90187)	Top-1 acc 30.859 (28.694)	Top-5 acc 50.781 (51.964)	lr 0.02500
Train [1][1110/3239]	Time 0.229 (0.245)	Data Time 0.002 (0.019)	Loss 4.0249 (4.1135)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.692)	Top-5 acc 51.562 (51.961)	lr 0.02500
Train [1][1120/3239]	Time 0.364 (0.244)	Data Time 0.001 (0.019)	Loss 4.1679 (4.1143)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (28.691)	Top-5 acc 51.172 (51.946)	lr 0.02500
Train [1][1130/3239]	Time 0.205 (0.244)	Data Time 0.001 (0.019)	Loss 4.1139 (4.1145)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.685)	Top-5 acc 53.516 (51.938)	lr 0.02500
Train [1][1140/3239]	Time 0.207 (0.244)	Data Time 0.001 (0.019)	Loss 4.1040 (4.1142)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (28.694)	Top-5 acc 53.516 (51.946)	lr 0.02500
Train [1][1150/3239]	Time 0.196 (0.244)	Data Time 0.001 (0.019)	Loss 4.1496 (4.1145)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.692)	Top-5 acc 52.344 (51.947)	lr 0.02500
Train [1][1160/3239]	Time 0.212 (0.244)	Data Time 0.001 (0.019)	Loss 4.3790 (4.1155)	Entropy 1.90187 (1.90187)	Top-1 acc 20.703 (28.671)	Top-5 acc 43.750 (51.930)	lr 0.02500
Train [1][1170/3239]	Time 0.179 (0.243)	Data Time 0.001 (0.019)	Loss 4.1530 (4.1155)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.663)	Top-5 acc 53.125 (51.937)	lr 0.02500
Train [1][1180/3239]	Time 0.217 (0.243)	Data Time 0.001 (0.019)	Loss 4.2835 (4.1159)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.670)	Top-5 acc 46.484 (51.935)	lr 0.02500
Train [1][1190/3239]	Time 0.296 (0.243)	Data Time 0.001 (0.018)	Loss 4.4542 (4.1161)	Entropy 1.90187 (1.90187)	Top-1 acc 22.266 (28.660)	Top-5 acc 42.969 (51.922)	lr 0.02500
Train [1][1200/3239]	Time 0.221 (0.243)	Data Time 0.001 (0.018)	Loss 4.2705 (4.1166)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.647)	Top-5 acc 51.562 (51.918)	lr 0.02500
Train [1][1210/3239]	Time 0.200 (0.243)	Data Time 0.001 (0.018)	Loss 4.0089 (4.1168)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.650)	Top-5 acc 53.906 (51.915)	lr 0.02500
Train [1][1220/3239]	Time 0.349 (0.243)	Data Time 0.002 (0.018)	Loss 4.0915 (4.1174)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.637)	Top-5 acc 53.516 (51.907)	lr 0.02500
Train [1][1230/3239]	Time 0.216 (0.243)	Data Time 0.001 (0.018)	Loss 3.8009 (4.1174)	Entropy 1.90187 (1.90187)	Top-1 acc 33.203 (28.640)	Top-5 acc 63.281 (51.905)	lr 0.02500
Train [1][1240/3239]	Time 0.212 (0.243)	Data Time 0.001 (0.018)	Loss 4.0936 (4.1177)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.651)	Top-5 acc 51.562 (51.908)	lr 0.02500
Train [1][1250/3239]	Time 0.236 (0.243)	Data Time 0.001 (0.018)	Loss 4.1347 (4.1183)	Entropy 1.90187 (1.90187)	Top-1 acc 34.766 (28.640)	Top-5 acc 50.781 (51.888)	lr 0.02500
Train [1][1260/3239]	Time 0.213 (0.243)	Data Time 0.001 (0.018)	Loss 4.0281 (4.1181)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (28.636)	Top-5 acc 51.562 (51.891)	lr 0.02500
Train [1][1270/3239]	Time 0.210 (0.242)	Data Time 0.001 (0.017)	Loss 4.0009 (4.1182)	Entropy 1.90187 (1.90187)	Top-1 acc 33.984 (28.635)	Top-5 acc 53.906 (51.896)	lr 0.02500
Train [1][1280/3239]	Time 0.268 (0.242)	Data Time 0.001 (0.017)	Loss 4.0756 (4.1181)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.642)	Top-5 acc 51.953 (51.898)	lr 0.02500
Train [1][1290/3239]	Time 0.202 (0.242)	Data Time 0.001 (0.017)	Loss 4.1125 (4.1184)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.637)	Top-5 acc 52.344 (51.890)	lr 0.02500
Train [1][1300/3239]	Time 0.191 (0.242)	Data Time 0.002 (0.017)	Loss 4.3107 (4.1183)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (28.638)	Top-5 acc 44.922 (51.896)	lr 0.02500
Train [1][1310/3239]	Time 0.194 (0.242)	Data Time 0.001 (0.017)	Loss 4.2445 (4.1181)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (28.639)	Top-5 acc 48.438 (51.892)	lr 0.02500
Train [1][1320/3239]	Time 0.311 (0.242)	Data Time 0.001 (0.017)	Loss 4.0051 (4.1183)	Entropy 1.90187 (1.90187)	Top-1 acc 34.766 (28.636)	Top-5 acc 55.859 (51.885)	lr 0.02500
Train [1][1330/3239]	Time 0.247 (0.242)	Data Time 0.001 (0.017)	Loss 4.0609 (4.1185)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.626)	Top-5 acc 55.078 (51.882)	lr 0.02500
Train [1][1340/3239]	Time 0.201 (0.242)	Data Time 0.001 (0.017)	Loss 4.0785 (4.1183)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.629)	Top-5 acc 51.172 (51.883)	lr 0.02500
Train [1][1350/3239]	Time 0.174 (0.241)	Data Time 0.001 (0.017)	Loss 4.0321 (4.1186)	Entropy 1.90187 (1.90187)	Top-1 acc 33.984 (28.626)	Top-5 acc 56.250 (51.882)	lr 0.02500
Train [1][1360/3239]	Time 0.191 (0.241)	Data Time 0.001 (0.017)	Loss 4.1983 (4.1184)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.620)	Top-5 acc 50.391 (51.891)	lr 0.02500
Train [1][1370/3239]	Time 0.209 (0.241)	Data Time 0.001 (0.016)	Loss 4.0326 (4.1186)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.616)	Top-5 acc 54.297 (51.888)	lr 0.02500
Train [1][1380/3239]	Time 0.173 (0.241)	Data Time 0.001 (0.016)	Loss 4.0351 (4.1182)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.622)	Top-5 acc 53.516 (51.892)	lr 0.02500
Train [1][1390/3239]	Time 0.201 (0.241)	Data Time 0.001 (0.016)	Loss 4.1045 (4.1179)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.624)	Top-5 acc 55.859 (51.897)	lr 0.02500
Train [1][1400/3239]	Time 0.198 (0.241)	Data Time 0.001 (0.016)	Loss 4.2832 (4.1180)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.625)	Top-5 acc 46.875 (51.894)	lr 0.02500
Train [1][1410/3239]	Time 0.324 (0.241)	Data Time 0.001 (0.016)	Loss 4.1503 (4.1182)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (28.623)	Top-5 acc 51.562 (51.884)	lr 0.02500
Train [1][1420/3239]	Time 0.302 (0.241)	Data Time 0.001 (0.016)	Loss 4.0885 (4.1184)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (28.620)	Top-5 acc 52.344 (51.881)	lr 0.02500
Train [1][1430/3239]	Time 0.249 (0.240)	Data Time 0.002 (0.016)	Loss 4.0570 (4.1182)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (28.622)	Top-5 acc 57.031 (51.892)	lr 0.02500
Train [1][1440/3239]	Time 0.194 (0.240)	Data Time 0.001 (0.016)	Loss 4.1101 (4.1186)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.612)	Top-5 acc 54.297 (51.887)	lr 0.02500
Train [1][1450/3239]	Time 0.255 (0.240)	Data Time 0.001 (0.016)	Loss 4.1508 (4.1185)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.609)	Top-5 acc 51.953 (51.888)	lr 0.02500
Train [1][1460/3239]	Time 0.234 (0.240)	Data Time 0.001 (0.016)	Loss 4.1281 (4.1183)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.616)	Top-5 acc 50.781 (51.896)	lr 0.02500
Train [1][1470/3239]	Time 0.255 (0.240)	Data Time 0.001 (0.016)	Loss 4.1874 (4.1185)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.611)	Top-5 acc 48.438 (51.890)	lr 0.02500
Train [1][1480/3239]	Time 0.206 (0.240)	Data Time 0.001 (0.015)	Loss 4.2406 (4.1190)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.601)	Top-5 acc 48.047 (51.879)	lr 0.02500
Train [1][1490/3239]	Time 0.218 (0.240)	Data Time 0.001 (0.015)	Loss 4.2540 (4.1193)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.597)	Top-5 acc 48.438 (51.872)	lr 0.02500
Train [1][1500/3239]	Time 0.150 (0.240)	Data Time 0.001 (0.015)	Loss 4.1012 (4.1195)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.589)	Top-5 acc 50.781 (51.859)	lr 0.02500
Train [1][1510/3239]	Time 0.176 (0.240)	Data Time 0.001 (0.015)	Loss 4.0488 (4.1201)	Entropy 1.90187 (1.90187)	Top-1 acc 33.203 (28.579)	Top-5 acc 49.219 (51.839)	lr 0.02500
Train [1][1520/3239]	Time 0.332 (0.240)	Data Time 0.001 (0.015)	Loss 4.2653 (4.1203)	Entropy 1.90187 (1.90187)	Top-1 acc 24.609 (28.578)	Top-5 acc 48.047 (51.836)	lr 0.02500
Train [1][1530/3239]	Time 0.202 (0.240)	Data Time 0.001 (0.015)	Loss 4.0384 (4.1201)	Entropy 1.90187 (1.90187)	Top-1 acc 33.594 (28.588)	Top-5 acc 55.078 (51.841)	lr 0.02500
Train [1][1540/3239]	Time 0.279 (0.240)	Data Time 0.001 (0.015)	Loss 4.1846 (4.1204)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.584)	Top-5 acc 50.781 (51.839)	lr 0.02500
Train [1][1550/3239]	Time 0.166 (0.240)	Data Time 0.001 (0.015)	Loss 4.1466 (4.1206)	Entropy 1.90187 (1.90187)	Top-1 acc 24.609 (28.576)	Top-5 acc 53.516 (51.838)	lr 0.02500
Train [1][1560/3239]	Time 0.211 (0.239)	Data Time 0.002 (0.015)	Loss 4.2244 (4.1207)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.569)	Top-5 acc 50.391 (51.829)	lr 0.02500
Train [1][1570/3239]	Time 0.225 (0.239)	Data Time 0.001 (0.015)	Loss 4.1555 (4.1207)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.570)	Top-5 acc 49.219 (51.828)	lr 0.02500
Train [1][1580/3239]	Time 0.279 (0.239)	Data Time 0.002 (0.015)	Loss 3.9038 (4.1209)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (28.563)	Top-5 acc 57.422 (51.824)	lr 0.02500
Train [1][1590/3239]	Time 0.217 (0.239)	Data Time 0.001 (0.015)	Loss 4.1732 (4.1210)	Entropy 1.90187 (1.90187)	Top-1 acc 30.859 (28.570)	Top-5 acc 55.078 (51.825)	lr 0.02500
Train [1][1600/3239]	Time 0.274 (0.239)	Data Time 0.002 (0.015)	Loss 4.1039 (4.1209)	Entropy 1.90187 (1.90187)	Top-1 acc 33.594 (28.573)	Top-5 acc 53.906 (51.830)	lr 0.02500
Train [1][1610/3239]	Time 0.253 (0.239)	Data Time 0.001 (0.014)	Loss 4.2092 (4.1210)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.571)	Top-5 acc 46.484 (51.831)	lr 0.02500
Train [1][1620/3239]	Time 0.314 (0.239)	Data Time 0.001 (0.014)	Loss 3.9765 (4.1210)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.574)	Top-5 acc 55.078 (51.829)	lr 0.02500
Train [1][1630/3239]	Time 0.257 (0.239)	Data Time 0.002 (0.014)	Loss 4.0304 (4.1212)	Entropy 1.90187 (1.90187)	Top-1 acc 30.859 (28.566)	Top-5 acc 55.469 (51.827)	lr 0.02500
Train [1][1640/3239]	Time 0.145 (0.239)	Data Time 0.001 (0.014)	Loss 4.1999 (4.1213)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.564)	Top-5 acc 51.562 (51.820)	lr 0.02500
Train [1][1650/3239]	Time 0.191 (0.239)	Data Time 0.001 (0.014)	Loss 3.9506 (4.1211)	Entropy 1.90187 (1.90187)	Top-1 acc 33.203 (28.567)	Top-5 acc 55.078 (51.821)	lr 0.02500
Train [1][1660/3239]	Time 0.169 (0.238)	Data Time 0.001 (0.014)	Loss 4.0296 (4.1215)	Entropy 1.90187 (1.90187)	Top-1 acc 32.422 (28.564)	Top-5 acc 54.688 (51.811)	lr 0.02500
Train [1][1670/3239]	Time 0.203 (0.238)	Data Time 0.001 (0.014)	Loss 4.0125 (4.1214)	Entropy 1.90187 (1.90187)	Top-1 acc 31.641 (28.572)	Top-5 acc 51.562 (51.815)	lr 0.02500
Train [1][1680/3239]	Time 0.202 (0.238)	Data Time 0.001 (0.014)	Loss 3.9932 (4.1215)	Entropy 1.90187 (1.90187)	Top-1 acc 32.812 (28.567)	Top-5 acc 55.859 (51.806)	lr 0.02500
Train [1][1690/3239]	Time 0.222 (0.238)	Data Time 0.001 (0.014)	Loss 4.3316 (4.1213)	Entropy 1.90187 (1.90187)	Top-1 acc 23.438 (28.563)	Top-5 acc 46.875 (51.812)	lr 0.02500
Train [1][1700/3239]	Time 0.239 (0.238)	Data Time 0.001 (0.014)	Loss 4.1827 (4.1217)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.556)	Top-5 acc 52.734 (51.796)	lr 0.02500
Train [1][1710/3239]	Time 0.209 (0.238)	Data Time 0.001 (0.014)	Loss 3.9393 (4.1216)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (28.554)	Top-5 acc 55.859 (51.802)	lr 0.02500
Train [1][1720/3239]	Time 0.199 (0.238)	Data Time 0.001 (0.014)	Loss 3.9666 (4.1219)	Entropy 1.90187 (1.90187)	Top-1 acc 34.375 (28.554)	Top-5 acc 53.906 (51.796)	lr 0.02500
Train [1][1730/3239]	Time 0.184 (0.238)	Data Time 0.002 (0.014)	Loss 4.2184 (4.1218)	Entropy 1.90187 (1.90187)	Top-1 acc 23.828 (28.558)	Top-5 acc 48.047 (51.797)	lr 0.02500
Train [1][1740/3239]	Time 0.280 (0.238)	Data Time 0.001 (0.014)	Loss 4.3056 (4.1219)	Entropy 1.90187 (1.90187)	Top-1 acc 23.828 (28.551)	Top-5 acc 43.750 (51.789)	lr 0.02500
Train [1][1750/3239]	Time 0.166 (0.238)	Data Time 0.001 (0.013)	Loss 4.1254 (4.1223)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.546)	Top-5 acc 50.781 (51.780)	lr 0.02500
Train [1][1760/3239]	Time 0.174 (0.237)	Data Time 0.001 (0.013)	Loss 4.2456 (4.1222)	Entropy 1.90187 (1.90187)	Top-1 acc 23.828 (28.548)	Top-5 acc 48.828 (51.786)	lr 0.02500
Train [1][1770/3239]	Time 0.148 (0.237)	Data Time 0.001 (0.013)	Loss 4.1173 (4.1225)	Entropy 1.90187 (1.90187)	Top-1 acc 30.859 (28.545)	Top-5 acc 50.000 (51.777)	lr 0.02500
Train [1][1780/3239]	Time 0.213 (0.237)	Data Time 0.001 (0.013)	Loss 4.1408 (4.1228)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.542)	Top-5 acc 53.516 (51.770)	lr 0.02500
Train [1][1790/3239]	Time 0.180 (0.237)	Data Time 0.001 (0.013)	Loss 3.9971 (4.1230)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.535)	Top-5 acc 52.734 (51.761)	lr 0.02500
Train [1][1800/3239]	Time 0.170 (0.237)	Data Time 0.003 (0.013)	Loss 4.1142 (4.1231)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.531)	Top-5 acc 51.953 (51.761)	lr 0.02500
Train [1][1810/3239]	Time 0.272 (0.237)	Data Time 0.001 (0.013)	Loss 4.1879 (4.1231)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.537)	Top-5 acc 50.391 (51.758)	lr 0.02500
Train [1][1820/3239]	Time 0.248 (0.237)	Data Time 0.001 (0.013)	Loss 4.3031 (4.1231)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.539)	Top-5 acc 48.047 (51.758)	lr 0.02500
Train [1][1830/3239]	Time 0.174 (0.237)	Data Time 0.001 (0.013)	Loss 4.2771 (4.1235)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (28.532)	Top-5 acc 48.438 (51.746)	lr 0.02500
Train [1][1840/3239]	Time 0.276 (0.237)	Data Time 0.003 (0.013)	Loss 3.9675 (4.1235)	Entropy 1.90187 (1.90187)	Top-1 acc 33.984 (28.533)	Top-5 acc 55.469 (51.748)	lr 0.02500
Train [1][1850/3239]	Time 0.175 (0.237)	Data Time 0.001 (0.013)	Loss 4.0036 (4.1235)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (28.536)	Top-5 acc 55.859 (51.755)	lr 0.02500
Train [1][1860/3239]	Time 0.284 (0.237)	Data Time 0.001 (0.013)	Loss 4.1172 (4.1235)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (28.536)	Top-5 acc 48.828 (51.752)	lr 0.02500
Train [1][1870/3239]	Time 0.240 (0.237)	Data Time 0.001 (0.013)	Loss 4.2072 (4.1240)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.531)	Top-5 acc 48.438 (51.739)	lr 0.02500
Train [1][1880/3239]	Time 0.201 (0.237)	Data Time 0.001 (0.013)	Loss 4.1167 (4.1241)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.536)	Top-5 acc 55.078 (51.738)	lr 0.02500
Train [1][1890/3239]	Time 0.205 (0.237)	Data Time 0.001 (0.013)	Loss 4.0491 (4.1240)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (28.540)	Top-5 acc 53.516 (51.743)	lr 0.02500
Train [1][1900/3239]	Time 0.283 (0.237)	Data Time 0.003 (0.013)	Loss 4.1104 (4.1240)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (28.543)	Top-5 acc 49.609 (51.742)	lr 0.02500
Train [1][1910/3239]	Time 0.256 (0.237)	Data Time 0.001 (0.013)	Loss 4.1189 (4.1240)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.540)	Top-5 acc 51.562 (51.741)	lr 0.02500
Train [1][1920/3239]	Time 0.219 (0.236)	Data Time 0.001 (0.013)	Loss 4.1565 (4.1241)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.538)	Top-5 acc 51.562 (51.738)	lr 0.02500
Train [1][1930/3239]	Time 0.251 (0.236)	Data Time 0.001 (0.013)	Loss 4.2426 (4.1245)	Entropy 1.90187 (1.90187)	Top-1 acc 24.219 (28.530)	Top-5 acc 46.875 (51.724)	lr 0.02500
Train [1][1940/3239]	Time 0.194 (0.236)	Data Time 0.001 (0.012)	Loss 3.9403 (4.1245)	Entropy 1.90187 (1.90187)	Top-1 acc 31.641 (28.530)	Top-5 acc 56.250 (51.719)	lr 0.02500
Train [1][1950/3239]	Time 0.223 (0.236)	Data Time 0.001 (0.012)	Loss 4.1954 (4.1248)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.526)	Top-5 acc 50.781 (51.712)	lr 0.02500
Train [1][1960/3239]	Time 0.253 (0.236)	Data Time 0.002 (0.012)	Loss 4.2076 (4.1249)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.523)	Top-5 acc 46.094 (51.706)	lr 0.02500
Train [1][1970/3239]	Time 0.331 (0.236)	Data Time 0.001 (0.012)	Loss 4.2176 (4.1249)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.528)	Top-5 acc 50.781 (51.707)	lr 0.02500
Train [1][1980/3239]	Time 0.195 (0.236)	Data Time 0.001 (0.012)	Loss 4.1383 (4.1249)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.530)	Top-5 acc 52.344 (51.711)	lr 0.02500
Train [1][1990/3239]	Time 0.224 (0.236)	Data Time 0.001 (0.012)	Loss 4.2118 (4.1252)	Entropy 1.90187 (1.90187)	Top-1 acc 24.609 (28.524)	Top-5 acc 51.953 (51.708)	lr 0.02500
Train [1][2000/3239]	Time 0.205 (0.236)	Data Time 0.001 (0.012)	Loss 4.1664 (4.1251)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.532)	Top-5 acc 52.734 (51.715)	lr 0.02500
Train [1][2010/3239]	Time 0.227 (0.236)	Data Time 0.002 (0.012)	Loss 4.2285 (4.1253)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.529)	Top-5 acc 51.172 (51.709)	lr 0.02500
Train [1][2020/3239]	Time 0.233 (0.236)	Data Time 0.001 (0.012)	Loss 4.2878 (4.1255)	Entropy 1.90187 (1.90187)	Top-1 acc 21.875 (28.519)	Top-5 acc 48.828 (51.706)	lr 0.02500
Train [1][2030/3239]	Time 0.237 (0.236)	Data Time 0.002 (0.012)	Loss 4.1457 (4.1258)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.515)	Top-5 acc 49.609 (51.698)	lr 0.02500
Train [1][2040/3239]	Time 0.187 (0.236)	Data Time 0.001 (0.012)	Loss 4.1376 (4.1259)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.518)	Top-5 acc 50.781 (51.698)	lr 0.02500
Train [1][2050/3239]	Time 0.171 (0.236)	Data Time 0.001 (0.012)	Loss 4.2027 (4.1263)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.509)	Top-5 acc 49.609 (51.688)	lr 0.02500
Train [1][2060/3239]	Time 0.212 (0.236)	Data Time 0.001 (0.012)	Loss 4.1943 (4.1264)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.513)	Top-5 acc 50.391 (51.685)	lr 0.02500
Train [1][2070/3239]	Time 0.305 (0.236)	Data Time 0.001 (0.012)	Loss 3.9437 (4.1263)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.516)	Top-5 acc 56.641 (51.683)	lr 0.02500
Train [1][2080/3239]	Time 0.218 (0.236)	Data Time 0.001 (0.012)	Loss 4.1645 (4.1264)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.510)	Top-5 acc 51.562 (51.685)	lr 0.02500
Train [1][2090/3239]	Time 0.277 (0.236)	Data Time 0.001 (0.012)	Loss 4.2984 (4.1264)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (28.507)	Top-5 acc 49.219 (51.683)	lr 0.02500
Train [1][2100/3239]	Time 0.232 (0.236)	Data Time 0.001 (0.012)	Loss 4.0955 (4.1265)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.504)	Top-5 acc 53.906 (51.681)	lr 0.02500
Train [1][2110/3239]	Time 0.145 (0.235)	Data Time 0.001 (0.012)	Loss 4.1525 (4.1265)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (28.507)	Top-5 acc 48.438 (51.681)	lr 0.02500
Train [1][2120/3239]	Time 0.219 (0.235)	Data Time 0.001 (0.012)	Loss 4.0496 (4.1262)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.512)	Top-5 acc 56.641 (51.689)	lr 0.02500
Train [1][2130/3239]	Time 0.259 (0.235)	Data Time 0.003 (0.012)	Loss 4.1927 (4.1265)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.507)	Top-5 acc 49.219 (51.685)	lr 0.02500
Train [1][2140/3239]	Time 0.245 (0.235)	Data Time 0.001 (0.012)	Loss 4.1717 (4.1267)	Entropy 1.90187 (1.90187)	Top-1 acc 30.859 (28.507)	Top-5 acc 50.391 (51.679)	lr 0.02500
Train [1][2150/3239]	Time 0.168 (0.235)	Data Time 0.002 (0.012)	Loss 4.2610 (4.1265)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.515)	Top-5 acc 50.391 (51.686)	lr 0.02500
Train [1][2160/3239]	Time 0.236 (0.235)	Data Time 0.001 (0.012)	Loss 4.1728 (4.1265)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.514)	Top-5 acc 53.516 (51.688)	lr 0.02500
Train [1][2170/3239]	Time 0.224 (0.235)	Data Time 0.002 (0.012)	Loss 4.2217 (4.1264)	Entropy 1.90187 (1.90187)	Top-1 acc 24.609 (28.520)	Top-5 acc 48.047 (51.692)	lr 0.02500
Train [1][2180/3239]	Time 0.281 (0.235)	Data Time 0.001 (0.011)	Loss 4.0137 (4.1263)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.522)	Top-5 acc 51.953 (51.693)	lr 0.02500
Train [1][2190/3239]	Time 0.156 (0.235)	Data Time 0.001 (0.011)	Loss 4.0761 (4.1266)	Entropy 1.90187 (1.90187)	Top-1 acc 31.641 (28.519)	Top-5 acc 51.172 (51.688)	lr 0.02500
Train [1][2200/3239]	Time 0.208 (0.235)	Data Time 0.001 (0.011)	Loss 4.1168 (4.1268)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.511)	Top-5 acc 52.344 (51.687)	lr 0.02500
Train [1][2210/3239]	Time 0.163 (0.235)	Data Time 0.001 (0.011)	Loss 4.1221 (4.1268)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.511)	Top-5 acc 52.734 (51.689)	lr 0.02500
Train [1][2220/3239]	Time 0.229 (0.235)	Data Time 0.001 (0.011)	Loss 4.2015 (4.1268)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.510)	Top-5 acc 48.828 (51.688)	lr 0.02500
Train [1][2230/3239]	Time 0.189 (0.235)	Data Time 0.001 (0.011)	Loss 4.1997 (4.1266)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.510)	Top-5 acc 53.516 (51.691)	lr 0.02500
Train [1][2240/3239]	Time 0.214 (0.235)	Data Time 0.001 (0.011)	Loss 4.0252 (4.1265)	Entropy 1.90187 (1.90187)	Top-1 acc 33.984 (28.515)	Top-5 acc 54.688 (51.694)	lr 0.02500
Train [1][2250/3239]	Time 0.283 (0.235)	Data Time 0.001 (0.011)	Loss 4.3109 (4.1264)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.519)	Top-5 acc 47.656 (51.696)	lr 0.02500
Train [1][2260/3239]	Time 0.264 (0.235)	Data Time 0.001 (0.011)	Loss 3.7746 (4.1260)	Entropy 1.90187 (1.90187)	Top-1 acc 36.719 (28.523)	Top-5 acc 60.938 (51.702)	lr 0.02500
Train [1][2270/3239]	Time 0.240 (0.235)	Data Time 0.001 (0.011)	Loss 4.0785 (4.1261)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (28.523)	Top-5 acc 55.469 (51.701)	lr 0.02500
Train [1][2280/3239]	Time 0.288 (0.235)	Data Time 0.001 (0.011)	Loss 4.2715 (4.1262)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.523)	Top-5 acc 44.922 (51.696)	lr 0.02500
Train [1][2290/3239]	Time 0.182 (0.235)	Data Time 0.001 (0.011)	Loss 3.9797 (4.1260)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.527)	Top-5 acc 55.078 (51.703)	lr 0.02500
Train [1][2300/3239]	Time 0.151 (0.235)	Data Time 0.002 (0.011)	Loss 3.8603 (4.1259)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.528)	Top-5 acc 58.203 (51.707)	lr 0.02500
Train [1][2310/3239]	Time 0.254 (0.235)	Data Time 0.001 (0.011)	Loss 4.0142 (4.1259)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.526)	Top-5 acc 56.250 (51.704)	lr 0.02500
Train [1][2320/3239]	Time 0.233 (0.235)	Data Time 0.001 (0.011)	Loss 4.2298 (4.1258)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.524)	Top-5 acc 47.656 (51.707)	lr 0.02500
Train [1][2330/3239]	Time 0.239 (0.235)	Data Time 0.001 (0.011)	Loss 4.1699 (4.1260)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (28.521)	Top-5 acc 46.875 (51.699)	lr 0.02500
Train [1][2340/3239]	Time 0.200 (0.235)	Data Time 0.001 (0.011)	Loss 4.1642 (4.1259)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.521)	Top-5 acc 50.781 (51.702)	lr 0.02500
Train [1][2350/3239]	Time 0.165 (0.234)	Data Time 0.001 (0.011)	Loss 4.3708 (4.1266)	Entropy 1.90187 (1.90187)	Top-1 acc 18.750 (28.507)	Top-5 acc 42.578 (51.683)	lr 0.02500
Train [1][2360/3239]	Time 0.227 (0.234)	Data Time 0.001 (0.011)	Loss 4.0479 (4.1265)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.515)	Top-5 acc 54.297 (51.682)	lr 0.02500
Train [1][2370/3239]	Time 0.175 (0.234)	Data Time 0.002 (0.011)	Loss 3.9825 (4.1264)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.519)	Top-5 acc 56.250 (51.680)	lr 0.02500
Train [1][2380/3239]	Time 0.361 (0.234)	Data Time 0.001 (0.011)	Loss 4.0030 (4.1263)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.520)	Top-5 acc 57.422 (51.686)	lr 0.02500
Train [1][2390/3239]	Time 0.287 (0.234)	Data Time 0.001 (0.011)	Loss 3.9610 (4.1263)	Entropy 1.90187 (1.90187)	Top-1 acc 32.422 (28.518)	Top-5 acc 53.906 (51.684)	lr 0.02500
Train [1][2400/3239]	Time 0.191 (0.234)	Data Time 0.001 (0.011)	Loss 4.0566 (4.1265)	Entropy 1.90187 (1.90187)	Top-1 acc 32.812 (28.515)	Top-5 acc 55.078 (51.678)	lr 0.02500
Train [1][2410/3239]	Time 0.220 (0.234)	Data Time 0.001 (0.011)	Loss 4.1140 (4.1266)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (28.508)	Top-5 acc 51.172 (51.673)	lr 0.02500
Train [1][2420/3239]	Time 0.226 (0.234)	Data Time 0.001 (0.011)	Loss 4.0428 (4.1266)	Entropy 1.90187 (1.90187)	Top-1 acc 31.641 (28.510)	Top-5 acc 55.078 (51.668)	lr 0.02500
Train [1][2430/3239]	Time 0.198 (0.234)	Data Time 0.001 (0.011)	Loss 4.1657 (4.1267)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.507)	Top-5 acc 50.781 (51.663)	lr 0.02500
Train [1][2440/3239]	Time 0.217 (0.234)	Data Time 0.001 (0.011)	Loss 4.1622 (4.1267)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.507)	Top-5 acc 51.562 (51.660)	lr 0.02500
Train [1][2450/3239]	Time 0.223 (0.234)	Data Time 0.001 (0.011)	Loss 4.1050 (4.1265)	Entropy 1.90187 (1.90187)	Top-1 acc 31.641 (28.514)	Top-5 acc 50.000 (51.668)	lr 0.02500
Train [1][2460/3239]	Time 0.158 (0.234)	Data Time 0.001 (0.011)	Loss 4.2104 (4.1266)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.512)	Top-5 acc 51.562 (51.666)	lr 0.02500
Train [1][2470/3239]	Time 0.229 (0.234)	Data Time 0.001 (0.010)	Loss 4.1591 (4.1267)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.509)	Top-5 acc 51.953 (51.666)	lr 0.02500
Train [1][2480/3239]	Time 0.296 (0.234)	Data Time 0.001 (0.010)	Loss 4.2746 (4.1266)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.510)	Top-5 acc 47.266 (51.664)	lr 0.02500
Train [1][2490/3239]	Time 0.199 (0.234)	Data Time 0.001 (0.010)	Loss 4.1686 (4.1269)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.505)	Top-5 acc 50.000 (51.659)	lr 0.02500
Train [1][2500/3239]	Time 0.155 (0.234)	Data Time 0.001 (0.010)	Loss 3.9881 (4.1272)	Entropy 1.90187 (1.90187)	Top-1 acc 30.859 (28.501)	Top-5 acc 57.422 (51.656)	lr 0.02500
Train [1][2510/3239]	Time 0.254 (0.234)	Data Time 0.001 (0.010)	Loss 4.2235 (4.1273)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.496)	Top-5 acc 48.828 (51.656)	lr 0.02500
Train [1][2520/3239]	Time 0.198 (0.234)	Data Time 0.002 (0.010)	Loss 4.3254 (4.1273)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (28.495)	Top-5 acc 45.703 (51.651)	lr 0.02500
Train [1][2530/3239]	Time 0.180 (0.234)	Data Time 0.002 (0.010)	Loss 4.0959 (4.1270)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.499)	Top-5 acc 53.516 (51.661)	lr 0.02500
Train [1][2540/3239]	Time 0.302 (0.234)	Data Time 0.001 (0.010)	Loss 4.0837 (4.1270)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.501)	Top-5 acc 53.516 (51.661)	lr 0.02500
Train [1][2550/3239]	Time 0.201 (0.234)	Data Time 0.001 (0.010)	Loss 4.1237 (4.1271)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.504)	Top-5 acc 53.516 (51.659)	lr 0.02500
Train [1][2560/3239]	Time 0.148 (0.234)	Data Time 0.001 (0.010)	Loss 4.0684 (4.1271)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (28.504)	Top-5 acc 52.734 (51.659)	lr 0.02500
Train [1][2570/3239]	Time 0.239 (0.234)	Data Time 0.001 (0.010)	Loss 4.0432 (4.1272)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.498)	Top-5 acc 54.297 (51.654)	lr 0.02500
Train [1][2580/3239]	Time 0.181 (0.234)	Data Time 0.001 (0.010)	Loss 4.1905 (4.1273)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.498)	Top-5 acc 49.609 (51.651)	lr 0.02500
Train [1][2590/3239]	Time 0.343 (0.234)	Data Time 0.001 (0.010)	Loss 4.1433 (4.1273)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.503)	Top-5 acc 50.000 (51.649)	lr 0.02500
Train [1][2600/3239]	Time 0.242 (0.234)	Data Time 0.001 (0.010)	Loss 4.1607 (4.1273)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.504)	Top-5 acc 48.828 (51.649)	lr 0.02500
Train [1][2610/3239]	Time 0.209 (0.234)	Data Time 0.001 (0.010)	Loss 4.3177 (4.1275)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (28.501)	Top-5 acc 48.438 (51.644)	lr 0.02500
Train [1][2620/3239]	Time 0.189 (0.234)	Data Time 0.001 (0.010)	Loss 4.1881 (4.1277)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.495)	Top-5 acc 49.219 (51.639)	lr 0.02500
Train [1][2630/3239]	Time 0.188 (0.234)	Data Time 0.002 (0.010)	Loss 4.0106 (4.1276)	Entropy 1.90187 (1.90187)	Top-1 acc 30.078 (28.497)	Top-5 acc 55.078 (51.640)	lr 0.02500
Train [1][2640/3239]	Time 0.177 (0.234)	Data Time 0.001 (0.010)	Loss 3.9470 (4.1275)	Entropy 1.90187 (1.90187)	Top-1 acc 34.375 (28.503)	Top-5 acc 55.469 (51.648)	lr 0.02500
Train [1][2650/3239]	Time 0.217 (0.234)	Data Time 0.002 (0.010)	Loss 3.9877 (4.1275)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.500)	Top-5 acc 55.469 (51.648)	lr 0.02500
Train [1][2660/3239]	Time 0.228 (0.234)	Data Time 0.001 (0.010)	Loss 4.2785 (4.1276)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.498)	Top-5 acc 48.047 (51.646)	lr 0.02500
Train [1][2670/3239]	Time 0.258 (0.234)	Data Time 0.001 (0.010)	Loss 3.9525 (4.1273)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.501)	Top-5 acc 56.641 (51.653)	lr 0.02500
Train [1][2680/3239]	Time 0.170 (0.234)	Data Time 0.002 (0.010)	Loss 4.2125 (4.1274)	Entropy 1.90187 (1.90187)	Top-1 acc 22.656 (28.498)	Top-5 acc 49.609 (51.653)	lr 0.02500
Train [1][2690/3239]	Time 0.159 (0.233)	Data Time 0.001 (0.010)	Loss 4.4304 (4.1277)	Entropy 1.90187 (1.90187)	Top-1 acc 23.047 (28.492)	Top-5 acc 44.141 (51.646)	lr 0.02500
Train [1][2700/3239]	Time 0.327 (0.233)	Data Time 0.001 (0.010)	Loss 4.0903 (4.1278)	Entropy 1.90187 (1.90187)	Top-1 acc 32.422 (28.486)	Top-5 acc 53.516 (51.639)	lr 0.02500
Train [1][2710/3239]	Time 0.262 (0.233)	Data Time 0.001 (0.010)	Loss 4.2209 (4.1278)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.487)	Top-5 acc 51.172 (51.638)	lr 0.02500
Train [1][2720/3239]	Time 0.171 (0.233)	Data Time 0.001 (0.010)	Loss 4.3105 (4.1280)	Entropy 1.90187 (1.90187)	Top-1 acc 23.438 (28.480)	Top-5 acc 46.875 (51.633)	lr 0.02500
Train [1][2730/3239]	Time 0.160 (0.233)	Data Time 0.001 (0.010)	Loss 4.2665 (4.1282)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.478)	Top-5 acc 46.875 (51.627)	lr 0.02500
Train [1][2740/3239]	Time 0.276 (0.233)	Data Time 0.001 (0.010)	Loss 4.1033 (4.1281)	Entropy 1.90187 (1.90187)	Top-1 acc 25.000 (28.480)	Top-5 acc 53.125 (51.629)	lr 0.02500
Train [1][2750/3239]	Time 0.242 (0.233)	Data Time 0.001 (0.010)	Loss 4.1730 (4.1281)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (28.479)	Top-5 acc 53.516 (51.633)	lr 0.02500
Train [1][2760/3239]	Time 0.203 (0.233)	Data Time 0.001 (0.010)	Loss 4.3266 (4.1283)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.476)	Top-5 acc 47.266 (51.627)	lr 0.02500
Train [1][2770/3239]	Time 0.270 (0.233)	Data Time 0.001 (0.010)	Loss 4.1270 (4.1285)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.475)	Top-5 acc 49.219 (51.623)	lr 0.02500
Train [1][2780/3239]	Time 0.247 (0.233)	Data Time 0.001 (0.010)	Loss 4.0619 (4.1285)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (28.477)	Top-5 acc 53.906 (51.623)	lr 0.02500
Train [1][2790/3239]	Time 0.231 (0.233)	Data Time 0.001 (0.010)	Loss 4.2182 (4.1287)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.472)	Top-5 acc 50.000 (51.619)	lr 0.02500
Train [1][2800/3239]	Time 0.322 (0.233)	Data Time 0.001 (0.010)	Loss 3.9971 (4.1288)	Entropy 1.90187 (1.90187)	Top-1 acc 29.297 (28.469)	Top-5 acc 55.469 (51.617)	lr 0.02500
Train [1][2810/3239]	Time 0.227 (0.233)	Data Time 0.001 (0.010)	Loss 4.0530 (4.1287)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.467)	Top-5 acc 51.562 (51.616)	lr 0.02500
Train [1][2820/3239]	Time 0.225 (0.233)	Data Time 0.001 (0.010)	Loss 4.0410 (4.1287)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.468)	Top-5 acc 53.516 (51.617)	lr 0.02500
Train [1][2830/3239]	Time 0.230 (0.233)	Data Time 0.001 (0.010)	Loss 4.2300 (4.1289)	Entropy 1.90187 (1.90187)	Top-1 acc 23.438 (28.464)	Top-5 acc 50.391 (51.611)	lr 0.02500
Train [1][2840/3239]	Time 0.183 (0.233)	Data Time 0.001 (0.010)	Loss 4.3840 (4.1289)	Entropy 1.90187 (1.90187)	Top-1 acc 22.656 (28.461)	Top-5 acc 44.141 (51.612)	lr 0.02500
Train [1][2850/3239]	Time 0.206 (0.233)	Data Time 0.001 (0.010)	Loss 4.0163 (4.1289)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.459)	Top-5 acc 56.250 (51.612)	lr 0.02500
Train [1][2860/3239]	Time 0.214 (0.233)	Data Time 0.001 (0.010)	Loss 4.2006 (4.1290)	Entropy 1.90187 (1.90187)	Top-1 acc 24.219 (28.457)	Top-5 acc 49.219 (51.606)	lr 0.02500
Train [1][2870/3239]	Time 0.223 (0.233)	Data Time 0.001 (0.010)	Loss 4.1107 (4.1291)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.454)	Top-5 acc 51.562 (51.603)	lr 0.02500
Train [1][2880/3239]	Time 0.199 (0.233)	Data Time 0.001 (0.009)	Loss 3.8377 (4.1290)	Entropy 1.90187 (1.90187)	Top-1 acc 32.812 (28.454)	Top-5 acc 58.594 (51.603)	lr 0.02500
Train [1][2890/3239]	Time 0.263 (0.233)	Data Time 0.001 (0.009)	Loss 4.0687 (4.1289)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.453)	Top-5 acc 52.344 (51.605)	lr 0.02500
Train [1][2900/3239]	Time 0.258 (0.233)	Data Time 0.003 (0.009)	Loss 3.9705 (4.1287)	Entropy 1.90187 (1.90187)	Top-1 acc 31.641 (28.458)	Top-5 acc 52.344 (51.611)	lr 0.02500
Train [1][2910/3239]	Time 0.195 (0.233)	Data Time 0.002 (0.009)	Loss 4.1743 (4.1290)	Entropy 1.90187 (1.90187)	Top-1 acc 23.438 (28.454)	Top-5 acc 50.781 (51.603)	lr 0.02500
Train [1][2920/3239]	Time 0.177 (0.233)	Data Time 0.001 (0.009)	Loss 4.2476 (4.1290)	Entropy 1.90187 (1.90187)	Top-1 acc 23.047 (28.452)	Top-5 acc 48.047 (51.601)	lr 0.02500
Train [1][2930/3239]	Time 0.194 (0.233)	Data Time 0.001 (0.009)	Loss 4.0087 (4.1289)	Entropy 1.90187 (1.90187)	Top-1 acc 31.641 (28.454)	Top-5 acc 55.078 (51.603)	lr 0.02500
Train [1][2940/3239]	Time 0.239 (0.233)	Data Time 0.001 (0.009)	Loss 3.9145 (4.1289)	Entropy 1.90187 (1.90187)	Top-1 acc 32.422 (28.452)	Top-5 acc 55.469 (51.603)	lr 0.02500
Train [1][2950/3239]	Time 0.143 (0.233)	Data Time 0.002 (0.009)	Loss 4.2244 (4.1289)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.453)	Top-5 acc 49.609 (51.603)	lr 0.02500
Train [1][2960/3239]	Time 0.208 (0.233)	Data Time 0.001 (0.009)	Loss 4.0592 (4.1286)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.457)	Top-5 acc 51.172 (51.606)	lr 0.02500
Train [1][2970/3239]	Time 0.146 (0.233)	Data Time 0.001 (0.009)	Loss 4.0252 (4.1287)	Entropy 1.90187 (1.90187)	Top-1 acc 31.250 (28.456)	Top-5 acc 53.516 (51.605)	lr 0.02500
Train [1][2980/3239]	Time 0.186 (0.233)	Data Time 0.001 (0.009)	Loss 4.3095 (4.1288)	Entropy 1.90187 (1.90187)	Top-1 acc 23.047 (28.451)	Top-5 acc 49.219 (51.603)	lr 0.02500
Train [1][2990/3239]	Time 0.200 (0.233)	Data Time 0.001 (0.009)	Loss 3.9742 (4.1288)	Entropy 1.90187 (1.90187)	Top-1 acc 34.766 (28.451)	Top-5 acc 55.078 (51.602)	lr 0.02500
Train [1][3000/3239]	Time 0.229 (0.233)	Data Time 0.001 (0.009)	Loss 4.1425 (4.1286)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.453)	Top-5 acc 53.906 (51.608)	lr 0.02500
Train [1][3010/3239]	Time 0.341 (0.233)	Data Time 0.001 (0.009)	Loss 4.2453 (4.1288)	Entropy 1.90187 (1.90187)	Top-1 acc 28.516 (28.448)	Top-5 acc 50.391 (51.603)	lr 0.02500
Train [1][3020/3239]	Time 0.221 (0.233)	Data Time 0.001 (0.009)	Loss 4.2366 (4.1288)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.448)	Top-5 acc 51.172 (51.606)	lr 0.02500
Train [1][3030/3239]	Time 0.241 (0.233)	Data Time 0.002 (0.009)	Loss 4.0547 (4.1286)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.446)	Top-5 acc 53.125 (51.610)	lr 0.02500
Train [1][3040/3239]	Time 0.230 (0.233)	Data Time 0.001 (0.009)	Loss 4.0913 (4.1285)	Entropy 1.90187 (1.90187)	Top-1 acc 33.203 (28.448)	Top-5 acc 53.516 (51.612)	lr 0.02500
Train [1][3050/3239]	Time 0.244 (0.233)	Data Time 0.001 (0.009)	Loss 4.0014 (4.1284)	Entropy 1.90187 (1.90187)	Top-1 acc 32.422 (28.451)	Top-5 acc 51.562 (51.615)	lr 0.02500
Train [1][3060/3239]	Time 0.223 (0.233)	Data Time 0.001 (0.009)	Loss 4.0752 (4.1284)	Entropy 1.90187 (1.90187)	Top-1 acc 32.422 (28.454)	Top-5 acc 57.812 (51.619)	lr 0.02500
Train [1][3070/3239]	Time 0.209 (0.233)	Data Time 0.001 (0.009)	Loss 4.1667 (4.1285)	Entropy 1.90187 (1.90187)	Top-1 acc 28.125 (28.453)	Top-5 acc 51.172 (51.619)	lr 0.02500
Train [1][3080/3239]	Time 0.229 (0.233)	Data Time 0.002 (0.009)	Loss 4.2420 (4.1285)	Entropy 1.90187 (1.90187)	Top-1 acc 25.781 (28.453)	Top-5 acc 49.609 (51.617)	lr 0.02500
Train [1][3090/3239]	Time 0.222 (0.233)	Data Time 0.002 (0.009)	Loss 4.1022 (4.1284)	Entropy 1.90187 (1.90187)	Top-1 acc 26.953 (28.457)	Top-5 acc 52.344 (51.621)	lr 0.02500
Train [1][3100/3239]	Time 0.218 (0.233)	Data Time 0.001 (0.009)	Loss 4.1498 (4.1284)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.457)	Top-5 acc 48.047 (51.620)	lr 0.02500
Train [1][3110/3239]	Time 0.364 (0.233)	Data Time 0.001 (0.009)	Loss 4.2037 (4.1284)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.456)	Top-5 acc 48.438 (51.618)	lr 0.02500
Train [1][3120/3239]	Time 0.227 (0.233)	Data Time 0.001 (0.009)	Loss 4.1192 (4.1283)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.459)	Top-5 acc 51.172 (51.619)	lr 0.02500
Train [1][3130/3239]	Time 0.202 (0.233)	Data Time 0.001 (0.009)	Loss 4.1815 (4.1282)	Entropy 1.90187 (1.90187)	Top-1 acc 26.172 (28.461)	Top-5 acc 52.344 (51.622)	lr 0.02500
Train [1][3140/3239]	Time 0.279 (0.233)	Data Time 0.001 (0.009)	Loss 4.2952 (4.1280)	Entropy 1.90187 (1.90187)	Top-1 acc 23.828 (28.466)	Top-5 acc 45.703 (51.623)	lr 0.02500
Train [1][3150/3239]	Time 0.277 (0.233)	Data Time 0.001 (0.009)	Loss 4.1211 (4.1282)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.463)	Top-5 acc 51.953 (51.621)	lr 0.02500
Train [1][3160/3239]	Time 0.172 (0.233)	Data Time 0.001 (0.009)	Loss 4.0843 (4.1284)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (28.459)	Top-5 acc 52.344 (51.615)	lr 0.02500
Train [1][3170/3239]	Time 0.253 (0.233)	Data Time 0.002 (0.009)	Loss 4.2039 (4.1284)	Entropy 1.90187 (1.90187)	Top-1 acc 25.391 (28.460)	Top-5 acc 50.781 (51.615)	lr 0.02500
Train [1][3180/3239]	Time 0.162 (0.232)	Data Time 0.000 (0.009)	Loss 4.2311 (4.1283)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.462)	Top-5 acc 49.219 (51.621)	lr 0.02500
Train [1][3190/3239]	Time 0.176 (0.232)	Data Time 0.000 (0.009)	Loss 4.2935 (4.1284)	Entropy 1.90187 (1.90187)	Top-1 acc 26.562 (28.462)	Top-5 acc 48.047 (51.623)	lr 0.02500
Train [1][3200/3239]	Time 0.216 (0.232)	Data Time 0.000 (0.009)	Loss 4.0872 (4.1282)	Entropy 1.90187 (1.90187)	Top-1 acc 29.688 (28.465)	Top-5 acc 51.562 (51.628)	lr 0.02500
Train [1][3210/3239]	Time 0.161 (0.232)	Data Time 0.000 (0.009)	Loss 4.1815 (4.1281)	Entropy 1.90187 (1.90187)	Top-1 acc 27.344 (28.465)	Top-5 acc 51.562 (51.629)	lr 0.02500
Train [1][3220/3239]	Time 0.240 (0.232)	Data Time 0.000 (0.009)	Loss 4.0606 (4.1281)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.465)	Top-5 acc 51.562 (51.630)	lr 0.02500
Train [1][3230/3239]	Time 0.185 (0.232)	Data Time 0.000 (0.009)	Loss 4.2286 (4.1281)	Entropy 1.90187 (1.90187)	Top-1 acc 27.734 (28.465)	Top-5 acc 48.047 (51.628)	lr 0.02500
Train [1][3239/3239]	Time 0.164 (0.232)	Data Time 0.000 (0.009)	Loss 4.2899 (4.1280)	Entropy 1.90187 (1.90187)	Top-1 acc 28.395 (28.468)	Top-5 acc 48.148 (51.631)	lr 0.02500
==========Valid [1/120]	loss 3.069	top-1 acc 36.190 (36.190)	top-5 acc 61.163	Train top-1 28.468	top-5 51.631	Entropy 1.90187	Latency-None: 0.000ms	Flops: 442.44M
Train [2][0/3239]	Time 22.757 (22.757)	Data Time 20.110 (20.110)	Loss 4.0097 (4.0097)	Entropy 1.90187 (1.90187)	Top-1 acc 31.641 (31.641)	Top-5 acc 53.516 (53.516)	lr 0.02500
Train [2][10/3239]	Time 30.389 (5.119)	Data Time 0.001 (1.883)	Loss 4.1405 (4.1583)	Entropy 1.90187 (1.90187)	Top-1 acc 30.469 (28.942)	Top-5 acc 54.688 (51.030)	lr 0.02500
Train [2][20/3239]	Time 0.689 (2.941)	Data Time 0.002 (0.995)	Loss 3.9635 (4.1063)	Entropy 1.90187 (1.90187)	Top-1 acc 32.031 (29.483)	Top-5 acc 56.641 (52.548)	lr 0.02500
Train [2][30/3239]	Time 0.189 (2.162)	Data Time 0.001 (0.675)	Loss 4.1122 (4.0933)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (29.990)	Top-5 acc 51.562 (52.608)	lr 0.02500
Train [2][40/3239]	Time 0.220 (1.739)	Data Time 0.002 (0.511)	Loss 4.0415 (4.0950)	Entropy 1.90187 (1.90187)	Top-1 acc 28.906 (29.592)	Top-5 acc 50.781 (52.658)	lr 0.02500
Train [2][50/3239]	Time 0.191 (1.477)	Data Time 0.001 (0.411)	Loss 4.0256 (4.0845)	Entropy 1.90186 (1.90187)	Top-1 acc 28.906 (29.779)	Top-5 acc 54.688 (53.010)	lr 0.02500
Train [2][60/3239]	Time 0.209 (1.300)	Data Time 0.001 (0.344)	Loss 4.0967 (4.0858)	Entropy 1.90186 (1.90187)	Top-1 acc 30.078 (29.636)	Top-5 acc 52.734 (52.997)	lr 0.02500
Train [2][70/3239]	Time 0.220 (1.174)	Data Time 0.001 (0.296)	Loss 4.2028 (4.0968)	Entropy 1.90186 (1.90187)	Top-1 acc 23.047 (29.302)	Top-5 acc 51.562 (52.635)	lr 0.02500
Train [2][80/3239]	Time 0.236 (1.077)	Data Time 0.002 (0.260)	Loss 4.0779 (4.0962)	Entropy 1.90186 (1.90187)	Top-1 acc 24.219 (29.302)	Top-5 acc 53.516 (52.537)	lr 0.02500
Train [2][90/3239]	Time 0.192 (1.003)	Data Time 0.002 (0.231)	Loss 3.9885 (4.0945)	Entropy 1.90186 (1.90186)	Top-1 acc 30.469 (29.271)	Top-5 acc 53.906 (52.511)	lr 0.02500
Train [2][100/3239]	Time 0.187 (0.945)	Data Time 0.001 (0.209)	Loss 3.9197 (4.0957)	Entropy 1.90186 (1.90186)	Top-1 acc 30.859 (29.262)	Top-5 acc 57.422 (52.580)	lr 0.02500
Train [2][110/3239]	Time 0.134 (0.896)	Data Time 0.001 (0.190)	Loss 4.2136 (4.0952)	Entropy 1.90186 (1.90186)	Top-1 acc 24.609 (29.230)	Top-5 acc 47.266 (52.534)	lr 0.02500
Train [2][120/3239]	Time 2.156 (0.855)	Data Time 0.001 (0.175)	Loss 4.0660 (4.0985)	Entropy 1.90186 (1.90186)	Top-1 acc 30.469 (29.229)	Top-5 acc 53.125 (52.405)	lr 0.02500
Train [2][130/3239]	Time 0.250 (0.806)	Data Time 0.001 (0.162)	Loss 4.0268 (4.0988)	Entropy 1.90186 (1.90186)	Top-1 acc 30.078 (29.195)	Top-5 acc 53.125 (52.353)	lr 0.02500
Train [2][140/3239]	Time 0.299 (0.777)	Data Time 0.002 (0.150)	Loss 4.0041 (4.0970)	Entropy 1.90186 (1.90186)	Top-1 acc 33.203 (29.255)	Top-5 acc 53.125 (52.366)	lr 0.02500
Train [2][150/3239]	Time 0.289 (0.750)	Data Time 0.002 (0.140)	Loss 4.2200 (4.0995)	Entropy 1.90186 (1.90186)	Top-1 acc 27.344 (29.175)	Top-5 acc 48.047 (52.331)	lr 0.02500
Train [2][160/3239]	Time 0.279 (0.728)	Data Time 0.001 (0.132)	Loss 3.8301 (4.0982)	Entropy 1.90186 (1.90186)	Top-1 acc 33.594 (29.156)	Top-5 acc 60.938 (52.346)	lr 0.02500
Train [2][170/3239]	Time 0.157 (0.709)	Data Time 0.001 (0.124)	Loss 4.2167 (4.1011)	Entropy 1.90186 (1.90186)	Top-1 acc 26.172 (29.057)	Top-5 acc 51.562 (52.284)	lr 0.02500
Train [2][180/3239]	Time 0.134 (0.691)	Data Time 0.001 (0.118)	Loss 4.3362 (4.1053)	Entropy 1.90186 (1.90186)	Top-1 acc 21.094 (28.958)	Top-5 acc 46.484 (52.154)	lr 0.02500
Train [2][190/3239]	Time 0.158 (0.675)	Data Time 0.001 (0.112)	Loss 4.1365 (4.1047)	Entropy 1.90186 (1.90186)	Top-1 acc 27.734 (28.996)	Top-5 acc 50.000 (52.092)	lr 0.02500
Train [2][200/3239]	Time 0.226 (0.661)	Data Time 0.001 (0.107)	Loss 3.9458 (4.1056)	Entropy 1.90186 (1.90186)	Top-1 acc 33.594 (28.980)	Top-5 acc 57.812 (52.091)	lr 0.02500
Train [2][210/3239]	Time 0.193 (0.649)	Data Time 0.001 (0.102)	Loss 4.1516 (4.1067)	Entropy 1.90186 (1.90186)	Top-1 acc 28.906 (28.956)	Top-5 acc 51.562 (52.090)	lr 0.02500
Train [2][220/3239]	Time 0.198 (0.638)	Data Time 0.001 (0.097)	Loss 3.9492 (4.1049)	Entropy 1.90186 (1.90186)	Top-1 acc 33.203 (28.952)	Top-5 acc 54.297 (52.142)	lr 0.02500
Train [2][230/3239]	Time 2.110 (0.627)	Data Time 0.001 (0.093)	Loss 4.3709 (4.1067)	Entropy 1.90186 (1.90186)	Top-1 acc 25.781 (28.903)	Top-5 acc 46.094 (52.104)	lr 0.02500
Train [2][240/3239]	Time 0.219 (0.610)	Data Time 0.001 (0.089)	Loss 4.0617 (4.1097)	Entropy 1.90185 (1.90186)	Top-1 acc 29.688 (28.851)	Top-5 acc 50.391 (52.020)	lr 0.02500
Train [2][250/3239]	Time 0.177 (0.602)	Data Time 0.002 (0.086)	Loss 4.2258 (4.1104)	Entropy 1.90185 (1.90186)	Top-1 acc 25.391 (28.824)	Top-5 acc 45.703 (52.004)	lr 0.02500
Train [2][260/3239]	Time 0.142 (0.593)	Data Time 0.001 (0.083)	Loss 3.9550 (4.1098)	Entropy 1.90185 (1.90186)	Top-1 acc 32.812 (28.849)	Top-5 acc 55.078 (52.044)	lr 0.02500
Train [2][270/3239]	Time 0.234 (0.586)	Data Time 0.001 (0.080)	Loss 4.2184 (4.1109)	Entropy 1.90185 (1.90186)	Top-1 acc 27.344 (28.856)	Top-5 acc 51.953 (52.032)	lr 0.02499
Train [2][280/3239]	Time 0.165 (0.579)	Data Time 0.002 (0.077)	Loss 3.9349 (4.1118)	Entropy 1.90185 (1.90186)	Top-1 acc 32.422 (28.862)	Top-5 acc 58.984 (52.007)	lr 0.02499
Train [2][290/3239]	Time 0.193 (0.573)	Data Time 0.002 (0.075)	Loss 4.3086 (4.1128)	Entropy 1.90185 (1.90186)	Top-1 acc 26.562 (28.858)	Top-5 acc 42.969 (51.961)	lr 0.02499
Train [2][300/3239]	Time 0.199 (0.567)	Data Time 0.002 (0.072)	Loss 4.1468 (4.1146)	Entropy 1.90185 (1.90186)	Top-1 acc 25.781 (28.783)	Top-5 acc 50.391 (51.918)	lr 0.02499
Train [2][310/3239]	Time 0.197 (0.562)	Data Time 0.001 (0.070)	Loss 4.1026 (4.1152)	Entropy 1.90185 (1.90186)	Top-1 acc 27.734 (28.787)	Top-5 acc 55.078 (51.897)	lr 0.02499
Train [2][320/3239]	Time 0.202 (0.557)	Data Time 0.001 (0.068)	Loss 4.1966 (4.1162)	Entropy 1.90185 (1.90186)	Top-1 acc 25.000 (28.740)	Top-5 acc 51.562 (51.848)	lr 0.02499
Train [2][330/3239]	Time 0.313 (0.552)	Data Time 0.001 (0.066)	Loss 4.3400 (4.1173)	Entropy 1.90185 (1.90186)	Top-1 acc 23.828 (28.726)	Top-5 acc 48.047 (51.839)	lr 0.02499
Train [2][340/3239]	Time 2.233 (0.547)	Data Time 0.001 (0.064)	Loss 4.1081 (4.1178)	Entropy 1.90185 (1.90186)	Top-1 acc 26.953 (28.729)	Top-5 acc 49.219 (51.829)	lr 0.02499
Train [2][350/3239]	Time 0.159 (0.537)	Data Time 0.001 (0.062)	Loss 4.1308 (4.1165)	Entropy 1.90184 (1.90186)	Top-1 acc 28.125 (28.747)	Top-5 acc 49.609 (51.855)	lr 0.02499
Train [2][360/3239]	Time 0.234 (0.532)	Data Time 0.001 (0.061)	Loss 4.3219 (4.1183)	Entropy 1.90184 (1.90186)	Top-1 acc 25.000 (28.742)	Top-5 acc 47.656 (51.824)	lr 0.02499
Train [2][370/3239]	Time 0.214 (0.530)	Data Time 0.001 (0.059)	Loss 4.1570 (4.1177)	Entropy 1.90184 (1.90186)	Top-1 acc 28.125 (28.744)	Top-5 acc 50.391 (51.840)	lr 0.02499
Train [2][380/3239]	Time 0.168 (0.608)	Data Time 0.002 (0.058)	Loss 4.0124 (4.1174)	Entropy 1.90184 (1.90186)	Top-1 acc 33.594 (28.763)	Top-5 acc 55.469 (51.861)	lr 0.02499
Train [2][390/3239]	Time 0.196 (0.603)	Data Time 0.002 (0.056)	Loss 4.1687 (4.1149)	Entropy 1.90184 (1.90186)	Top-1 acc 28.125 (28.769)	Top-5 acc 51.172 (51.892)	lr 0.02499
Train [2][400/3239]	Time 0.195 (0.598)	Data Time 0.001 (0.055)	Loss 4.1261 (4.1159)	Entropy 1.90184 (1.90186)	Top-1 acc 30.859 (28.768)	Top-5 acc 50.781 (51.890)	lr 0.02499
Train [2][410/3239]	Time 0.178 (0.593)	Data Time 0.001 (0.054)	Loss 4.0899 (4.1153)	Entropy 1.90184 (1.90185)	Top-1 acc 28.125 (28.763)	Top-5 acc 50.391 (51.902)	lr 0.02499
Train [2][420/3239]	Time 0.192 (0.588)	Data Time 0.001 (0.052)	Loss 4.0776 (4.1150)	Entropy 1.90184 (1.90185)	Top-1 acc 26.562 (28.758)	Top-5 acc 53.906 (51.914)	lr 0.02499
Train [2][430/3239]	Time 0.219 (0.583)	Data Time 0.001 (0.051)	Loss 4.2078 (4.1150)	Entropy 1.90183 (1.90185)	Top-1 acc 24.609 (28.753)	Top-5 acc 50.781 (51.924)	lr 0.02499
Train [2][440/3239]	Time 0.208 (0.579)	Data Time 0.002 (0.050)	Loss 4.2048 (4.1142)	Entropy 1.90183 (1.90185)	Top-1 acc 22.656 (28.756)	Top-5 acc 48.828 (51.940)	lr 0.02499
Train [2][450/3239]	Time 2.170 (0.575)	Data Time 0.001 (0.049)	Loss 4.1387 (4.1140)	Entropy 1.90183 (1.90185)	Top-1 acc 29.688 (28.745)	Top-5 acc 50.781 (51.923)	lr 0.02499
Train [2][460/3239]	Time 0.243 (0.567)	Data Time 0.002 (0.048)	Loss 4.1549 (4.1133)	Entropy 1.90183 (1.90185)	Top-1 acc 32.031 (28.763)	Top-5 acc 52.344 (51.937)	lr 0.02499
Train [2][470/3239]	Time 0.202 (0.564)	Data Time 0.001 (0.047)	Loss 3.8746 (4.1130)	Entropy 1.90183 (1.90185)	Top-1 acc 33.203 (28.774)	Top-5 acc 56.250 (51.946)	lr 0.02499
Train [2][480/3239]	Time 0.138 (0.560)	Data Time 0.002 (0.046)	Loss 4.1478 (4.1138)	Entropy 1.90183 (1.90185)	Top-1 acc 28.906 (28.741)	Top-5 acc 53.516 (51.925)	lr 0.02499
Train [2][490/3239]	Time 0.195 (0.557)	Data Time 0.001 (0.045)	Loss 4.0321 (4.1140)	Entropy 1.90183 (1.90185)	Top-1 acc 29.688 (28.732)	Top-5 acc 56.250 (51.923)	lr 0.02499
Train [2][500/3239]	Time 0.163 (0.554)	Data Time 0.001 (0.044)	Loss 4.0806 (4.1156)	Entropy 1.90183 (1.90185)	Top-1 acc 29.297 (28.692)	Top-5 acc 47.656 (51.892)	lr 0.02499
Train [2][510/3239]	Time 0.187 (0.551)	Data Time 0.001 (0.044)	Loss 4.1928 (4.1155)	Entropy 1.90182 (1.90185)	Top-1 acc 28.906 (28.678)	Top-5 acc 50.000 (51.899)	lr 0.02499
Train [2][520/3239]	Time 0.196 (0.548)	Data Time 0.001 (0.043)	Loss 4.0381 (4.1156)	Entropy 1.90182 (1.90185)	Top-1 acc 26.562 (28.674)	Top-5 acc 52.734 (51.882)	lr 0.02499
Train [2][530/3239]	Time 0.198 (0.546)	Data Time 0.001 (0.042)	Loss 4.1203 (4.1151)	Entropy 1.90182 (1.90185)	Top-1 acc 27.344 (28.683)	Top-5 acc 51.172 (51.909)	lr 0.02499
Train [2][540/3239]	Time 0.290 (0.543)	Data Time 0.001 (0.041)	Loss 4.0928 (4.1156)	Entropy 1.90182 (1.90185)	Top-1 acc 31.641 (28.675)	Top-5 acc 52.734 (51.902)	lr 0.02499
Train [2][550/3239]	Time 0.240 (0.541)	Data Time 0.001 (0.041)	Loss 4.1336 (4.1153)	Entropy 1.90182 (1.90185)	Top-1 acc 28.516 (28.669)	Top-5 acc 51.172 (51.902)	lr 0.02499
Train [2][560/3239]	Time 2.174 (0.538)	Data Time 0.001 (0.040)	Loss 4.1625 (4.1150)	Entropy 1.90182 (1.90185)	Top-1 acc 28.906 (28.672)	Top-5 acc 50.391 (51.898)	lr 0.02499
Train [2][570/3239]	Time 0.239 (0.532)	Data Time 0.001 (0.039)	Loss 4.0310 (4.1156)	Entropy 1.90182 (1.90185)	Top-1 acc 33.984 (28.678)	Top-5 acc 52.734 (51.882)	lr 0.02499
Train [2][580/3239]	Time 0.208 (0.530)	Data Time 0.001 (0.039)	Loss 3.9743 (4.1143)	Entropy 1.90182 (1.90185)	Top-1 acc 30.469 (28.691)	Top-5 acc 54.688 (51.928)	lr 0.02499
Train [2][590/3239]	Time 0.193 (0.528)	Data Time 0.001 (0.038)	Loss 4.2101 (4.1148)	Entropy 1.90182 (1.90185)	Top-1 acc 26.953 (28.680)	Top-5 acc 50.000 (51.933)	lr 0.02499
Train [2][600/3239]	Time 0.142 (0.526)	Data Time 0.001 (0.037)	Loss 4.1163 (4.1153)	Entropy 1.90182 (1.90185)	Top-1 acc 29.688 (28.700)	Top-5 acc 50.781 (51.928)	lr 0.02499
Train [2][610/3239]	Time 0.226 (0.524)	Data Time 0.001 (0.037)	Loss 4.1603 (4.1144)	Entropy 1.90182 (1.90185)	Top-1 acc 30.078 (28.721)	Top-5 acc 50.781 (51.952)	lr 0.02499
Train [2][620/3239]	Time 0.159 (0.523)	Data Time 0.001 (0.036)	Loss 4.0054 (4.1145)	Entropy 1.90182 (1.90184)	Top-1 acc 27.734 (28.710)	Top-5 acc 54.297 (51.942)	lr 0.02499
Train [2][630/3239]	Time 0.197 (0.521)	Data Time 0.001 (0.036)	Loss 4.1985 (4.1142)	Entropy 1.90181 (1.90184)	Top-1 acc 28.516 (28.716)	Top-5 acc 51.172 (51.950)	lr 0.02499
Train [2][640/3239]	Time 0.193 (0.519)	Data Time 0.001 (0.035)	Loss 4.3045 (4.1150)	Entropy 1.90181 (1.90184)	Top-1 acc 22.656 (28.698)	Top-5 acc 47.656 (51.947)	lr 0.02499
Train [2][650/3239]	Time 0.174 (0.517)	Data Time 0.001 (0.035)	Loss 4.0494 (4.1145)	Entropy 1.90181 (1.90184)	Top-1 acc 29.297 (28.715)	Top-5 acc 53.125 (51.954)	lr 0.02499
Train [2][660/3239]	Time 0.155 (0.515)	Data Time 0.001 (0.034)	Loss 4.1158 (4.1140)	Entropy 1.90181 (1.90184)	Top-1 acc 32.422 (28.724)	Top-5 acc 53.125 (51.971)	lr 0.02499
Train [2][670/3239]	Time 2.355 (0.514)	Data Time 0.001 (0.034)	Loss 4.0626 (4.1142)	Entropy 1.90181 (1.90184)	Top-1 acc 26.562 (28.714)	Top-5 acc 55.859 (51.972)	lr 0.02499
Train [2][680/3239]	Time 0.299 (0.509)	Data Time 0.001 (0.033)	Loss 4.1455 (4.1143)	Entropy 1.90181 (1.90184)	Top-1 acc 25.781 (28.694)	Top-5 acc 50.000 (51.962)	lr 0.02499
Train [2][690/3239]	Time 0.138 (0.508)	Data Time 0.001 (0.033)	Loss 4.5067 (4.1147)	Entropy 1.90180 (1.90184)	Top-1 acc 17.969 (28.689)	Top-5 acc 40.234 (51.964)	lr 0.02499
Train [2][700/3239]	Time 0.137 (0.506)	Data Time 0.001 (0.033)	Loss 4.2903 (4.1138)	Entropy 1.90180 (1.90184)	Top-1 acc 23.828 (28.700)	Top-5 acc 48.438 (51.994)	lr 0.02499
Train [2][710/3239]	Time 0.202 (0.505)	Data Time 0.001 (0.032)	Loss 4.1645 (4.1130)	Entropy 1.90180 (1.90184)	Top-1 acc 26.953 (28.716)	Top-5 acc 51.562 (52.007)	lr 0.02499
Train [2][720/3239]	Time 0.234 (0.503)	Data Time 0.001 (0.032)	Loss 4.1427 (4.1134)	Entropy 1.90180 (1.90184)	Top-1 acc 26.172 (28.725)	Top-5 acc 49.219 (51.994)	lr 0.02499
Train [2][730/3239]	Time 0.256 (0.502)	Data Time 0.001 (0.031)	Loss 4.0536 (4.1133)	Entropy 1.90180 (1.90184)	Top-1 acc 32.031 (28.738)	Top-5 acc 52.734 (52.004)	lr 0.02499
Train [2][740/3239]	Time 0.274 (0.540)	Data Time 0.004 (0.031)	Loss 4.1194 (4.1136)	Entropy 1.90180 (1.90184)	Top-1 acc 30.078 (28.728)	Top-5 acc 51.562 (51.984)	lr 0.02499
Train [2][750/3239]	Time 0.207 (0.540)	Data Time 0.002 (0.031)	Loss 4.2761 (4.1136)	Entropy 1.90180 (1.90184)	Top-1 acc 24.609 (28.727)	Top-5 acc 47.266 (51.994)	lr 0.02499
Train [2][760/3239]	Time 0.142 (0.538)	Data Time 0.001 (0.030)	Loss 4.2861 (4.1135)	Entropy 1.90180 (1.90184)	Top-1 acc 24.219 (28.723)	Top-5 acc 46.875 (52.000)	lr 0.02499
Train [2][770/3239]	Time 0.224 (0.536)	Data Time 0.001 (0.030)	Loss 4.0848 (4.1126)	Entropy 1.90179 (1.90184)	Top-1 acc 32.812 (28.727)	Top-5 acc 52.344 (52.020)	lr 0.02499
Train [2][780/3239]	Time 2.339 (0.535)	Data Time 0.001 (0.030)	Loss 4.0315 (4.1124)	Entropy 1.90179 (1.90184)	Top-1 acc 27.344 (28.732)	Top-5 acc 51.172 (52.023)	lr 0.02499
Train [2][790/3239]	Time 0.224 (0.531)	Data Time 0.001 (0.029)	Loss 4.1210 (4.1120)	Entropy 1.90179 (1.90184)	Top-1 acc 27.734 (28.739)	Top-5 acc 53.516 (52.049)	lr 0.02499
Train [2][800/3239]	Time 0.180 (0.529)	Data Time 0.001 (0.029)	Loss 4.1030 (4.1114)	Entropy 1.90179 (1.90184)	Top-1 acc 28.125 (28.746)	Top-5 acc 52.344 (52.067)	lr 0.02499
Train [2][810/3239]	Time 0.211 (0.527)	Data Time 0.001 (0.029)	Loss 4.1236 (4.1111)	Entropy 1.90179 (1.90183)	Top-1 acc 26.953 (28.747)	Top-5 acc 50.391 (52.082)	lr 0.02499
Train [2][820/3239]	Time 0.322 (0.526)	Data Time 0.002 (0.028)	Loss 3.9944 (4.1109)	Entropy 1.90179 (1.90183)	Top-1 acc 32.031 (28.746)	Top-5 acc 55.469 (52.086)	lr 0.02499
Train [2][830/3239]	Time 0.333 (0.524)	Data Time 0.001 (0.028)	Loss 4.2039 (4.1106)	Entropy 1.90179 (1.90183)	Top-1 acc 29.688 (28.766)	Top-5 acc 50.781 (52.094)	lr 0.02499
Train [2][840/3239]	Time 0.211 (0.523)	Data Time 0.001 (0.028)	Loss 4.0056 (4.1104)	Entropy 1.90178 (1.90183)	Top-1 acc 28.125 (28.783)	Top-5 acc 53.516 (52.110)	lr 0.02499
Train [2][850/3239]	Time 0.145 (0.521)	Data Time 0.001 (0.027)	Loss 4.1466 (4.1103)	Entropy 1.90178 (1.90183)	Top-1 acc 25.391 (28.779)	Top-5 acc 51.562 (52.100)	lr 0.02499
Train [2][860/3239]	Time 0.220 (0.520)	Data Time 0.022 (0.027)	Loss 3.9422 (4.1106)	Entropy 1.90178 (1.90183)	Top-1 acc 32.422 (28.767)	Top-5 acc 57.031 (52.095)	lr 0.02499
Train [2][870/3239]	Time 0.193 (0.519)	Data Time 0.001 (0.027)	Loss 4.1593 (4.1107)	Entropy 1.90178 (1.90183)	Top-1 acc 25.391 (28.758)	Top-5 acc 49.609 (52.089)	lr 0.02499
Train [2][880/3239]	Time 0.238 (0.517)	Data Time 0.001 (0.027)	Loss 3.9668 (4.1106)	Entropy 1.90178 (1.90183)	Top-1 acc 33.594 (28.769)	Top-5 acc 56.641 (52.095)	lr 0.02499
Train [2][890/3239]	Time 2.166 (0.516)	Data Time 0.001 (0.026)	Loss 3.8636 (4.1100)	Entropy 1.90178 (1.90183)	Top-1 acc 33.594 (28.783)	Top-5 acc 58.984 (52.108)	lr 0.02499
Train [2][900/3239]	Time 0.146 (0.513)	Data Time 0.001 (0.026)	Loss 4.1578 (4.1103)	Entropy 1.90177 (1.90183)	Top-1 acc 29.688 (28.783)	Top-5 acc 49.609 (52.099)	lr 0.02499
Train [2][910/3239]	Time 0.175 (0.511)	Data Time 0.001 (0.026)	Loss 4.1623 (4.1101)	Entropy 1.90177 (1.90183)	Top-1 acc 29.688 (28.778)	Top-5 acc 51.562 (52.100)	lr 0.02499
Train [2][920/3239]	Time 0.233 (0.510)	Data Time 0.001 (0.026)	Loss 4.1359 (4.1099)	Entropy 1.90177 (1.90183)	Top-1 acc 26.953 (28.766)	Top-5 acc 51.172 (52.094)	lr 0.02499
Train [2][930/3239]	Time 0.157 (0.509)	Data Time 0.001 (0.025)	Loss 3.9250 (4.1097)	Entropy 1.90177 (1.90183)	Top-1 acc 35.547 (28.774)	Top-5 acc 58.984 (52.108)	lr 0.02499
Train [2][940/3239]	Time 0.131 (0.508)	Data Time 0.001 (0.025)	Loss 4.0424 (4.1096)	Entropy 1.90177 (1.90183)	Top-1 acc 29.688 (28.775)	Top-5 acc 55.469 (52.111)	lr 0.02499
Train [2][950/3239]	Time 0.218 (0.506)	Data Time 0.001 (0.025)	Loss 4.1015 (4.1099)	Entropy 1.90177 (1.90183)	Top-1 acc 28.906 (28.765)	Top-5 acc 50.391 (52.103)	lr 0.02499
Train [2][960/3239]	Time 0.170 (0.505)	Data Time 0.001 (0.025)	Loss 4.1753 (4.1103)	Entropy 1.90177 (1.90183)	Top-1 acc 28.516 (28.762)	Top-5 acc 49.609 (52.093)	lr 0.02499
Train [2][970/3239]	Time 0.227 (0.504)	Data Time 0.001 (0.024)	Loss 4.3160 (4.1109)	Entropy 1.90177 (1.90183)	Top-1 acc 26.172 (28.752)	Top-5 acc 45.703 (52.069)	lr 0.02499
Train [2][980/3239]	Time 0.202 (0.503)	Data Time 0.002 (0.024)	Loss 4.1921 (4.1111)	Entropy 1.90176 (1.90182)	Top-1 acc 27.344 (28.749)	Top-5 acc 49.609 (52.062)	lr 0.02499
Train [2][990/3239]	Time 0.198 (0.502)	Data Time 0.001 (0.024)	Loss 4.1536 (4.1111)	Entropy 1.90176 (1.90182)	Top-1 acc 30.859 (28.745)	Top-5 acc 51.562 (52.069)	lr 0.02499
Train [2][1000/3239]	Time 2.102 (0.501)	Data Time 0.001 (0.024)	Loss 4.0127 (4.1110)	Entropy 1.90176 (1.90182)	Top-1 acc 29.297 (28.748)	Top-5 acc 54.688 (52.065)	lr 0.02499
Train [2][1010/3239]	Time 0.242 (0.498)	Data Time 0.001 (0.024)	Loss 4.3729 (4.1112)	Entropy 1.90176 (1.90182)	Top-1 acc 24.219 (28.737)	Top-5 acc 45.312 (52.056)	lr 0.02499
Train [2][1020/3239]	Time 0.343 (0.497)	Data Time 0.002 (0.023)	Loss 4.0873 (4.1110)	Entropy 1.90175 (1.90182)	Top-1 acc 29.688 (28.739)	Top-5 acc 51.172 (52.060)	lr 0.02499
Train [2][1030/3239]	Time 0.280 (0.496)	Data Time 0.001 (0.023)	Loss 4.1033 (4.1112)	Entropy 1.90175 (1.90182)	Top-1 acc 26.953 (28.724)	Top-5 acc 54.297 (52.064)	lr 0.02499
Train [2][1040/3239]	Time 0.199 (0.495)	Data Time 0.001 (0.023)	Loss 4.1160 (4.1114)	Entropy 1.90175 (1.90182)	Top-1 acc 30.469 (28.713)	Top-5 acc 50.781 (52.059)	lr 0.02499
Train [2][1050/3239]	Time 0.246 (0.494)	Data Time 0.001 (0.023)	Loss 4.1133 (4.1113)	Entropy 1.90174 (1.90182)	Top-1 acc 30.078 (28.719)	Top-5 acc 53.516 (52.066)	lr 0.02499
Train [2][1060/3239]	Time 0.148 (0.493)	Data Time 0.001 (0.023)	Loss 4.0055 (4.1117)	Entropy 1.90174 (1.90182)	Top-1 acc 32.031 (28.712)	Top-5 acc 51.172 (52.060)	lr 0.02499
Train [2][1070/3239]	Time 0.223 (0.493)	Data Time 0.001 (0.022)	Loss 4.0984 (4.1114)	Entropy 1.90174 (1.90182)	Top-1 acc 29.688 (28.717)	Top-5 acc 53.125 (52.063)	lr 0.02499
Train [2][1080/3239]	Time 0.233 (0.492)	Data Time 0.001 (0.022)	Loss 4.2081 (4.1116)	Entropy 1.90173 (1.90182)	Top-1 acc 26.562 (28.723)	Top-5 acc 50.000 (52.057)	lr 0.02499
Train [2][1090/3239]	Time 0.204 (0.491)	Data Time 0.002 (0.022)	Loss 4.1609 (4.1115)	Entropy 1.90173 (1.90182)	Top-1 acc 26.172 (28.725)	Top-5 acc 46.875 (52.063)	lr 0.02499
Train [2][1100/3239]	Time 0.269 (0.516)	Data Time 0.004 (0.022)	Loss 4.1776 (4.1115)	Entropy 1.90173 (1.90182)	Top-1 acc 27.734 (28.729)	Top-5 acc 52.734 (52.068)	lr 0.02499
Train [2][1110/3239]	Time 2.685 (0.516)	Data Time 0.002 (0.022)	Loss 4.0035 (4.1114)	Entropy 1.90173 (1.90182)	Top-1 acc 30.859 (28.748)	Top-5 acc 53.516 (52.071)	lr 0.02499
Train [2][1120/3239]	Time 0.190 (0.513)	Data Time 0.002 (0.021)	Loss 3.8216 (4.1114)	Entropy 1.90173 (1.90181)	Top-1 acc 35.547 (28.747)	Top-5 acc 56.250 (52.068)	lr 0.02499
Train [2][1130/3239]	Time 0.230 (0.512)	Data Time 0.001 (0.021)	Loss 4.1423 (4.1118)	Entropy 1.90173 (1.90181)	Top-1 acc 29.297 (28.733)	Top-5 acc 55.078 (52.067)	lr 0.02499
Train [2][1140/3239]	Time 0.197 (0.511)	Data Time 0.001 (0.021)	Loss 3.9352 (4.1119)	Entropy 1.90173 (1.90181)	Top-1 acc 32.812 (28.732)	Top-5 acc 57.812 (52.068)	lr 0.02499
Train [2][1150/3239]	Time 0.208 (0.510)	Data Time 0.001 (0.021)	Loss 4.0940 (4.1118)	Entropy 1.90172 (1.90181)	Top-1 acc 28.516 (28.731)	Top-5 acc 51.953 (52.068)	lr 0.02499
Train [2][1160/3239]	Time 0.170 (0.509)	Data Time 0.001 (0.021)	Loss 4.1625 (4.1122)	Entropy 1.90172 (1.90181)	Top-1 acc 29.688 (28.728)	Top-5 acc 48.438 (52.054)	lr 0.02499
Train [2][1170/3239]	Time 0.276 (0.508)	Data Time 0.001 (0.021)	Loss 4.0545 (4.1122)	Entropy 1.90172 (1.90181)	Top-1 acc 24.609 (28.724)	Top-5 acc 49.609 (52.042)	lr 0.02499
Train [2][1180/3239]	Time 0.279 (0.508)	Data Time 0.001 (0.020)	Loss 4.1833 (4.1122)	Entropy 1.90171 (1.90181)	Top-1 acc 27.734 (28.722)	Top-5 acc 51.562 (52.044)	lr 0.02499
Train [2][1190/3239]	Time 0.235 (0.507)	Data Time 0.001 (0.020)	Loss 4.0042 (4.1123)	Entropy 1.90171 (1.90181)	Top-1 acc 30.469 (28.715)	Top-5 acc 54.688 (52.038)	lr 0.02499
Train [2][1200/3239]	Time 0.331 (0.506)	Data Time 0.001 (0.020)	Loss 4.2461 (4.1124)	Entropy 1.90171 (1.90181)	Top-1 acc 22.266 (28.717)	Top-5 acc 48.828 (52.042)	lr 0.02499
Train [2][1210/3239]	Time 0.141 (0.505)	Data Time 0.002 (0.020)	Loss 4.0388 (4.1116)	Entropy 1.90170 (1.90181)	Top-1 acc 29.297 (28.738)	Top-5 acc 55.469 (52.064)	lr 0.02499
Train [2][1220/3239]	Time 2.158 (0.504)	Data Time 0.001 (0.020)	Loss 4.0812 (4.1111)	Entropy 1.90170 (1.90181)	Top-1 acc 30.078 (28.745)	Top-5 acc 53.516 (52.075)	lr 0.02499
Train [2][1230/3239]	Time 0.191 (0.502)	Data Time 0.001 (0.020)	Loss 3.8714 (4.1108)	Entropy 1.90170 (1.90181)	Top-1 acc 35.156 (28.750)	Top-5 acc 54.297 (52.081)	lr 0.02499
Train [2][1240/3239]	Time 0.205 (0.501)	Data Time 0.001 (0.020)	Loss 4.2670 (4.1108)	Entropy 1.90170 (1.90181)	Top-1 acc 24.219 (28.749)	Top-5 acc 46.094 (52.082)	lr 0.02499
Train [2][1250/3239]	Time 0.216 (0.500)	Data Time 0.001 (0.020)	Loss 4.0512 (4.1107)	Entropy 1.90170 (1.90180)	Top-1 acc 28.906 (28.744)	Top-5 acc 54.688 (52.083)	lr 0.02499
Train [2][1260/3239]	Time 0.262 (0.499)	Data Time 0.001 (0.019)	Loss 4.1926 (4.1106)	Entropy 1.90170 (1.90180)	Top-1 acc 28.906 (28.745)	Top-5 acc 49.609 (52.086)	lr 0.02499
Train [2][1270/3239]	Time 0.242 (0.498)	Data Time 0.001 (0.019)	Loss 4.0272 (4.1105)	Entropy 1.90169 (1.90180)	Top-1 acc 29.688 (28.745)	Top-5 acc 55.469 (52.083)	lr 0.02499
Train [2][1280/3239]	Time 0.184 (0.498)	Data Time 0.001 (0.019)	Loss 4.2585 (4.1111)	Entropy 1.90169 (1.90180)	Top-1 acc 28.125 (28.733)	Top-5 acc 46.875 (52.071)	lr 0.02499
Train [2][1290/3239]	Time 0.142 (0.497)	Data Time 0.001 (0.019)	Loss 4.0270 (4.1112)	Entropy 1.90169 (1.90180)	Top-1 acc 28.906 (28.727)	Top-5 acc 55.469 (52.074)	lr 0.02499
Train [2][1300/3239]	Time 0.234 (0.496)	Data Time 0.001 (0.019)	Loss 3.8582 (4.1101)	Entropy 1.90169 (1.90180)	Top-1 acc 30.859 (28.750)	Top-5 acc 59.375 (52.103)	lr 0.02499
Train [2][1310/3239]	Time 0.186 (0.495)	Data Time 0.001 (0.019)	Loss 3.9573 (4.1101)	Entropy 1.90169 (1.90180)	Top-1 acc 29.688 (28.745)	Top-5 acc 53.906 (52.095)	lr 0.02499
Train [2][1320/3239]	Time 0.205 (0.495)	Data Time 0.001 (0.019)	Loss 4.2636 (4.1105)	Entropy 1.90168 (1.90180)	Top-1 acc 23.828 (28.734)	Top-5 acc 48.047 (52.083)	lr 0.02499
Train [2][1330/3239]	Time 2.154 (0.494)	Data Time 0.001 (0.018)	Loss 4.1208 (4.1104)	Entropy 1.90168 (1.90180)	Top-1 acc 27.734 (28.728)	Top-5 acc 55.078 (52.090)	lr 0.02499
Train [2][1340/3239]	Time 0.185 (0.492)	Data Time 0.001 (0.018)	Loss 4.2512 (4.1105)	Entropy 1.90168 (1.90180)	Top-1 acc 26.953 (28.724)	Top-5 acc 48.438 (52.091)	lr 0.02499
Train [2][1350/3239]	Time 0.252 (0.491)	Data Time 0.001 (0.018)	Loss 4.2188 (4.1107)	Entropy 1.90168 (1.90180)	Top-1 acc 26.953 (28.722)	Top-5 acc 51.562 (52.089)	lr 0.02499
Train [2][1360/3239]	Time 0.308 (0.490)	Data Time 0.001 (0.018)	Loss 4.0014 (4.1108)	Entropy 1.90167 (1.90179)	Top-1 acc 28.906 (28.725)	Top-5 acc 51.172 (52.091)	lr 0.02499
Train [2][1370/3239]	Time 0.176 (0.490)	Data Time 0.001 (0.018)	Loss 4.1964 (4.1104)	Entropy 1.90167 (1.90179)	Top-1 acc 28.125 (28.730)	Top-5 acc 49.609 (52.094)	lr 0.02499
Train [2][1380/3239]	Time 0.211 (0.489)	Data Time 0.001 (0.018)	Loss 3.9568 (4.1101)	Entropy 1.90167 (1.90179)	Top-1 acc 32.422 (28.734)	Top-5 acc 53.516 (52.105)	lr 0.02499
Train [2][1390/3239]	Time 0.195 (0.489)	Data Time 0.001 (0.018)	Loss 4.0827 (4.1103)	Entropy 1.90167 (1.90179)	Top-1 acc 27.344 (28.725)	Top-5 acc 51.172 (52.097)	lr 0.02499
Train [2][1400/3239]	Time 0.161 (0.488)	Data Time 0.001 (0.018)	Loss 4.0089 (4.1103)	Entropy 1.90166 (1.90179)	Top-1 acc 30.078 (28.728)	Top-5 acc 52.344 (52.096)	lr 0.02499
Train [2][1410/3239]	Time 0.185 (0.488)	Data Time 0.001 (0.018)	Loss 4.1554 (4.1104)	Entropy 1.90166 (1.90179)	Top-1 acc 26.953 (28.732)	Top-5 acc 51.562 (52.095)	lr 0.02499
Train [2][1420/3239]	Time 0.184 (0.487)	Data Time 0.001 (0.017)	Loss 4.0379 (4.1104)	Entropy 1.90166 (1.90179)	Top-1 acc 32.812 (28.736)	Top-5 acc 55.469 (52.098)	lr 0.02499
Train [2][1430/3239]	Time 0.196 (0.486)	Data Time 0.001 (0.017)	Loss 4.1164 (4.1102)	Entropy 1.90166 (1.90179)	Top-1 acc 30.859 (28.741)	Top-5 acc 54.688 (52.113)	lr 0.02499
Train [2][1440/3239]	Time 2.163 (0.486)	Data Time 0.001 (0.017)	Loss 3.9122 (4.1098)	Entropy 1.90166 (1.90179)	Top-1 acc 32.812 (28.741)	Top-5 acc 57.422 (52.115)	lr 0.02499
Train [2][1450/3239]	Time 0.217 (0.484)	Data Time 0.001 (0.017)	Loss 4.0983 (4.1099)	Entropy 1.90165 (1.90179)	Top-1 acc 27.734 (28.734)	Top-5 acc 50.391 (52.108)	lr 0.02499
Train [2][1460/3239]	Time 0.215 (0.483)	Data Time 0.001 (0.017)	Loss 4.3222 (4.1098)	Entropy 1.90165 (1.90179)	Top-1 acc 25.781 (28.733)	Top-5 acc 46.484 (52.104)	lr 0.02499
Train [2][1470/3239]	Time 0.256 (0.505)	Data Time 0.002 (0.017)	Loss 3.9108 (4.1097)	Entropy 1.90165 (1.90178)	Top-1 acc 30.469 (28.730)	Top-5 acc 55.078 (52.107)	lr 0.02499
Train [2][1480/3239]	Time 0.199 (0.505)	Data Time 0.002 (0.017)	Loss 3.8978 (4.1100)	Entropy 1.90166 (1.90178)	Top-1 acc 37.109 (28.727)	Top-5 acc 56.641 (52.095)	lr 0.02499
Train [2][1490/3239]	Time 0.189 (0.504)	Data Time 0.001 (0.017)	Loss 4.0985 (4.1100)	Entropy 1.90165 (1.90178)	Top-1 acc 28.516 (28.722)	Top-5 acc 48.828 (52.091)	lr 0.02499
Train [2][1500/3239]	Time 0.305 (0.503)	Data Time 0.002 (0.017)	Loss 4.1449 (4.1099)	Entropy 1.90165 (1.90178)	Top-1 acc 30.859 (28.726)	Top-5 acc 52.734 (52.091)	lr 0.02499
Train [2][1510/3239]	Time 0.179 (0.503)	Data Time 0.002 (0.017)	Loss 4.0135 (4.1100)	Entropy 1.90165 (1.90178)	Top-1 acc 29.297 (28.730)	Top-5 acc 52.344 (52.095)	lr 0.02499
Train [2][1520/3239]	Time 0.194 (0.502)	Data Time 0.001 (0.016)	Loss 4.0764 (4.1102)	Entropy 1.90164 (1.90178)	Top-1 acc 31.641 (28.728)	Top-5 acc 54.297 (52.097)	lr 0.02499
Train [2][1530/3239]	Time 0.187 (0.502)	Data Time 0.001 (0.016)	Loss 4.1016 (4.1105)	Entropy 1.90164 (1.90178)	Top-1 acc 27.734 (28.721)	Top-5 acc 48.828 (52.094)	lr 0.02499
Train [2][1540/3239]	Time 0.168 (0.501)	Data Time 0.001 (0.016)	Loss 3.9389 (4.1102)	Entropy 1.90163 (1.90178)	Top-1 acc 36.328 (28.732)	Top-5 acc 57.812 (52.105)	lr 0.02499
Train [2][1550/3239]	Time 2.079 (0.500)	Data Time 0.001 (0.016)	Loss 4.1753 (4.1102)	Entropy 1.90163 (1.90178)	Top-1 acc 29.688 (28.735)	Top-5 acc 50.391 (52.106)	lr 0.02499
Train [2][1560/3239]	Time 0.221 (0.498)	Data Time 0.001 (0.016)	Loss 4.0400 (4.1105)	Entropy 1.90163 (1.90178)	Top-1 acc 28.125 (28.738)	Top-5 acc 51.953 (52.096)	lr 0.02499
Train [2][1570/3239]	Time 0.169 (0.498)	Data Time 0.001 (0.016)	Loss 3.9770 (4.1101)	Entropy 1.90163 (1.90178)	Top-1 acc 30.859 (28.746)	Top-5 acc 54.297 (52.101)	lr 0.02499
Train [2][1580/3239]	Time 0.185 (0.497)	Data Time 0.001 (0.016)	Loss 4.2026 (4.1102)	Entropy 1.90163 (1.90177)	Top-1 acc 28.125 (28.740)	Top-5 acc 48.438 (52.096)	lr 0.02499
Train [2][1590/3239]	Time 0.206 (0.497)	Data Time 0.001 (0.016)	Loss 4.1821 (4.1101)	Entropy 1.90162 (1.90177)	Top-1 acc 28.516 (28.748)	Top-5 acc 51.562 (52.099)	lr 0.02499
Train [2][1600/3239]	Time 0.154 (0.496)	Data Time 0.001 (0.016)	Loss 4.0297 (4.1102)	Entropy 1.90162 (1.90177)	Top-1 acc 33.203 (28.753)	Top-5 acc 53.906 (52.095)	lr 0.02499
Train [2][1610/3239]	Time 0.161 (0.495)	Data Time 0.001 (0.016)	Loss 4.1249 (4.1101)	Entropy 1.90162 (1.90177)	Top-1 acc 28.906 (28.751)	Top-5 acc 54.688 (52.096)	lr 0.02499
Train [2][1620/3239]	Time 0.187 (0.495)	Data Time 0.002 (0.016)	Loss 3.8895 (4.1098)	Entropy 1.90162 (1.90177)	Top-1 acc 32.812 (28.760)	Top-5 acc 56.250 (52.104)	lr 0.02499
Train [2][1630/3239]	Time 0.191 (0.494)	Data Time 0.001 (0.016)	Loss 4.2873 (4.1100)	Entropy 1.90162 (1.90177)	Top-1 acc 24.609 (28.757)	Top-5 acc 47.656 (52.095)	lr 0.02499
Train [2][1640/3239]	Time 0.176 (0.494)	Data Time 0.001 (0.016)	Loss 4.0527 (4.1096)	Entropy 1.90161 (1.90177)	Top-1 acc 29.297 (28.765)	Top-5 acc 48.828 (52.098)	lr 0.02499
Train [2][1650/3239]	Time 0.223 (0.493)	Data Time 0.002 (0.015)	Loss 4.1225 (4.1092)	Entropy 1.90162 (1.90177)	Top-1 acc 30.469 (28.773)	Top-5 acc 52.344 (52.105)	lr 0.02499
Train [2][1660/3239]	Time 2.179 (0.493)	Data Time 0.001 (0.015)	Loss 4.2740 (4.1089)	Entropy 1.90162 (1.90177)	Top-1 acc 25.000 (28.787)	Top-5 acc 48.828 (52.109)	lr 0.02499
Train [2][1670/3239]	Time 0.163 (0.491)	Data Time 0.001 (0.015)	Loss 4.0733 (4.1090)	Entropy 1.90161 (1.90177)	Top-1 acc 30.469 (28.791)	Top-5 acc 55.078 (52.109)	lr 0.02499
Train [2][1680/3239]	Time 0.285 (0.490)	Data Time 0.001 (0.015)	Loss 4.1078 (4.1089)	Entropy 1.90161 (1.90177)	Top-1 acc 27.344 (28.792)	Top-5 acc 53.906 (52.111)	lr 0.02499
Train [2][1690/3239]	Time 0.225 (0.490)	Data Time 0.001 (0.015)	Loss 4.0550 (4.1089)	Entropy 1.90161 (1.90176)	Top-1 acc 31.641 (28.789)	Top-5 acc 53.906 (52.112)	lr 0.02499
Train [2][1700/3239]	Time 0.199 (0.490)	Data Time 0.001 (0.015)	Loss 4.0545 (4.1088)	Entropy 1.90160 (1.90176)	Top-1 acc 28.516 (28.789)	Top-5 acc 51.172 (52.114)	lr 0.02499
Train [2][1710/3239]	Time 0.225 (0.489)	Data Time 0.001 (0.015)	Loss 4.0456 (4.1086)	Entropy 1.90160 (1.90176)	Top-1 acc 27.344 (28.791)	Top-5 acc 54.688 (52.117)	lr 0.02499
Train [2][1720/3239]	Time 0.227 (0.489)	Data Time 0.001 (0.015)	Loss 4.3360 (4.1087)	Entropy 1.90160 (1.90176)	Top-1 acc 25.391 (28.784)	Top-5 acc 45.312 (52.112)	lr 0.02499
Train [2][1730/3239]	Time 0.188 (0.488)	Data Time 0.001 (0.015)	Loss 4.2639 (4.1086)	Entropy 1.90160 (1.90176)	Top-1 acc 26.562 (28.788)	Top-5 acc 50.000 (52.116)	lr 0.02499
Train [2][1740/3239]	Time 0.184 (0.487)	Data Time 0.001 (0.015)	Loss 4.2310 (4.1089)	Entropy 1.90159 (1.90176)	Top-1 acc 27.344 (28.783)	Top-5 acc 47.266 (52.105)	lr 0.02499
Train [2][1750/3239]	Time 0.272 (0.487)	Data Time 0.001 (0.015)	Loss 4.2151 (4.1089)	Entropy 1.90159 (1.90176)	Top-1 acc 28.125 (28.783)	Top-5 acc 50.391 (52.109)	lr 0.02499
Train [2][1760/3239]	Time 0.251 (0.486)	Data Time 0.001 (0.015)	Loss 4.1111 (4.1087)	Entropy 1.90159 (1.90176)	Top-1 acc 27.344 (28.785)	Top-5 acc 55.078 (52.118)	lr 0.02499
Train [2][1770/3239]	Time 2.153 (0.486)	Data Time 0.001 (0.015)	Loss 4.2942 (4.1086)	Entropy 1.90159 (1.90176)	Top-1 acc 26.953 (28.786)	Top-5 acc 48.438 (52.126)	lr 0.02499
Train [2][1780/3239]	Time 0.159 (0.484)	Data Time 0.001 (0.015)	Loss 3.9647 (4.1085)	Entropy 1.90159 (1.90176)	Top-1 acc 28.125 (28.789)	Top-5 acc 56.641 (52.129)	lr 0.02499
Train [2][1790/3239]	Time 0.154 (0.484)	Data Time 0.002 (0.014)	Loss 4.1527 (4.1085)	Entropy 1.90159 (1.90176)	Top-1 acc 29.297 (28.787)	Top-5 acc 50.781 (52.135)	lr 0.02499
Train [2][1800/3239]	Time 0.185 (0.483)	Data Time 0.001 (0.014)	Loss 4.1692 (4.1088)	Entropy 1.90159 (1.90175)	Top-1 acc 26.562 (28.777)	Top-5 acc 52.344 (52.127)	lr 0.02499
Train [2][1810/3239]	Time 0.240 (0.483)	Data Time 0.001 (0.014)	Loss 4.1489 (4.1089)	Entropy 1.90158 (1.90175)	Top-1 acc 27.734 (28.771)	Top-5 acc 51.953 (52.123)	lr 0.02499
Train [2][1820/3239]	Time 0.210 (0.483)	Data Time 0.001 (0.014)	Loss 4.2435 (4.1091)	Entropy 1.90158 (1.90175)	Top-1 acc 25.781 (28.767)	Top-5 acc 46.875 (52.119)	lr 0.02499
Train [2][1830/3239]	Time 0.298 (0.498)	Data Time 0.002 (0.014)	Loss 4.0634 (4.1095)	Entropy 1.90158 (1.90175)	Top-1 acc 29.688 (28.758)	Top-5 acc 51.953 (52.105)	lr 0.02499
Train [2][1840/3239]	Time 0.160 (0.498)	Data Time 0.002 (0.014)	Loss 4.0419 (4.1097)	Entropy 1.90158 (1.90175)	Top-1 acc 30.078 (28.754)	Top-5 acc 53.516 (52.102)	lr 0.02499
Train [2][1850/3239]	Time 0.174 (0.498)	Data Time 0.002 (0.014)	Loss 4.1297 (4.1093)	Entropy 1.90157 (1.90175)	Top-1 acc 28.516 (28.762)	Top-5 acc 52.344 (52.114)	lr 0.02499
Train [2][1860/3239]	Time 0.230 (0.497)	Data Time 0.002 (0.014)	Loss 4.1042 (4.1095)	Entropy 1.90157 (1.90175)	Top-1 acc 27.344 (28.758)	Top-5 acc 53.125 (52.114)	lr 0.02499
Train [2][1870/3239]	Time 0.240 (0.497)	Data Time 0.002 (0.014)	Loss 4.0679 (4.1094)	Entropy 1.90156 (1.90175)	Top-1 acc 29.297 (28.758)	Top-5 acc 56.641 (52.118)	lr 0.02499
Train [2][1880/3239]	Time 2.141 (0.496)	Data Time 0.002 (0.014)	Loss 4.2059 (4.1091)	Entropy 1.90156 (1.90175)	Top-1 acc 28.906 (28.758)	Top-5 acc 48.438 (52.124)	lr 0.02499
Train [2][1890/3239]	Time 0.180 (0.495)	Data Time 0.001 (0.014)	Loss 4.1800 (4.1093)	Entropy 1.90156 (1.90175)	Top-1 acc 26.953 (28.762)	Top-5 acc 49.609 (52.119)	lr 0.02499
Train [2][1900/3239]	Time 0.275 (0.494)	Data Time 0.002 (0.014)	Loss 4.0916 (4.1095)	Entropy 1.90156 (1.90174)	Top-1 acc 25.781 (28.760)	Top-5 acc 52.344 (52.115)	lr 0.02499
Train [2][1910/3239]	Time 0.141 (0.494)	Data Time 0.002 (0.014)	Loss 4.1892 (4.1095)	Entropy 1.90156 (1.90174)	Top-1 acc 25.000 (28.761)	Top-5 acc 49.219 (52.114)	lr 0.02499
Train [2][1920/3239]	Time 0.195 (0.493)	Data Time 0.001 (0.014)	Loss 3.9451 (4.1094)	Entropy 1.90155 (1.90174)	Top-1 acc 30.859 (28.759)	Top-5 acc 54.297 (52.112)	lr 0.02499
Train [2][1930/3239]	Time 0.189 (0.493)	Data Time 0.001 (0.014)	Loss 4.1139 (4.1096)	Entropy 1.90155 (1.90174)	Top-1 acc 28.125 (28.763)	Top-5 acc 55.469 (52.107)	lr 0.02499
Train [2][1940/3239]	Time 0.174 (0.493)	Data Time 0.001 (0.014)	Loss 4.3186 (4.1097)	Entropy 1.90155 (1.90174)	Top-1 acc 25.781 (28.766)	Top-5 acc 50.000 (52.115)	lr 0.02499
Train [2][1950/3239]	Time 0.133 (0.492)	Data Time 0.001 (0.014)	Loss 4.1480 (4.1095)	Entropy 1.90154 (1.90174)	Top-1 acc 25.781 (28.774)	Top-5 acc 53.516 (52.121)	lr 0.02499
Train [2][1960/3239]	Time 0.190 (0.492)	Data Time 0.001 (0.014)	Loss 4.0641 (4.1096)	Entropy 1.90154 (1.90174)	Top-1 acc 29.688 (28.775)	Top-5 acc 57.031 (52.119)	lr 0.02499
Train [2][1970/3239]	Time 0.295 (0.491)	Data Time 0.001 (0.014)	Loss 4.2037 (4.1095)	Entropy 1.90153 (1.90174)	Top-1 acc 28.516 (28.784)	Top-5 acc 48.828 (52.120)	lr 0.02499
Train [2][1980/3239]	Time 0.223 (0.491)	Data Time 0.001 (0.013)	Loss 3.9085 (4.1091)	Entropy 1.90153 (1.90174)	Top-1 acc 30.859 (28.793)	Top-5 acc 59.766 (52.130)	lr 0.02499
Train [2][1990/3239]	Time 2.138 (0.490)	Data Time 0.001 (0.013)	Loss 4.2200 (4.1092)	Entropy 1.90153 (1.90174)	Top-1 acc 25.781 (28.791)	Top-5 acc 54.688 (52.131)	lr 0.02499
Train [2][2000/3239]	Time 0.190 (0.489)	Data Time 0.001 (0.013)	Loss 4.2472 (4.1089)	Entropy 1.90153 (1.90173)	Top-1 acc 29.297 (28.801)	Top-5 acc 51.562 (52.137)	lr 0.02499
Train [2][2010/3239]	Time 0.151 (0.488)	Data Time 0.001 (0.013)	Loss 4.0110 (4.1088)	Entropy 1.90152 (1.90173)	Top-1 acc 28.125 (28.801)	Top-5 acc 55.859 (52.139)	lr 0.02499
Train [2][2020/3239]	Time 0.215 (0.488)	Data Time 0.001 (0.013)	Loss 4.1529 (4.1089)	Entropy 1.90152 (1.90173)	Top-1 acc 27.344 (28.803)	Top-5 acc 49.219 (52.137)	lr 0.02499
Train [2][2030/3239]	Time 0.168 (0.488)	Data Time 0.001 (0.013)	Loss 4.2271 (4.1088)	Entropy 1.90152 (1.90173)	Top-1 acc 26.562 (28.797)	Top-5 acc 47.656 (52.132)	lr 0.02499
Train [2][2040/3239]	Time 0.218 (0.487)	Data Time 0.001 (0.013)	Loss 4.1458 (4.1088)	Entropy 1.90152 (1.90173)	Top-1 acc 26.953 (28.793)	Top-5 acc 51.562 (52.130)	lr 0.02499
Train [2][2050/3239]	Time 0.207 (0.487)	Data Time 0.001 (0.013)	Loss 3.9830 (4.1088)	Entropy 1.90151 (1.90173)	Top-1 acc 31.250 (28.794)	Top-5 acc 53.516 (52.129)	lr 0.02499
Train [2][2060/3239]	Time 0.231 (0.487)	Data Time 0.001 (0.013)	Loss 4.0233 (4.1086)	Entropy 1.90151 (1.90173)	Top-1 acc 28.906 (28.795)	Top-5 acc 53.125 (52.128)	lr 0.02499
Train [2][2070/3239]	Time 0.214 (0.486)	Data Time 0.002 (0.013)	Loss 3.9098 (4.1087)	Entropy 1.90150 (1.90173)	Top-1 acc 34.766 (28.794)	Top-5 acc 56.641 (52.127)	lr 0.02499
Train [2][2080/3239]	Time 0.137 (0.486)	Data Time 0.002 (0.013)	Loss 4.2742 (4.1085)	Entropy 1.90150 (1.90173)	Top-1 acc 27.344 (28.796)	Top-5 acc 48.438 (52.136)	lr 0.02499
Train [2][2090/3239]	Time 0.226 (0.485)	Data Time 0.001 (0.013)	Loss 4.0992 (4.1084)	Entropy 1.90150 (1.90172)	Top-1 acc 30.078 (28.797)	Top-5 acc 53.906 (52.135)	lr 0.02499
Train [2][2100/3239]	Time 2.155 (0.485)	Data Time 0.001 (0.013)	Loss 4.0820 (4.1083)	Entropy 1.90150 (1.90172)	Top-1 acc 26.562 (28.792)	Top-5 acc 54.297 (52.134)	lr 0.02499
Train [2][2110/3239]	Time 0.148 (0.484)	Data Time 0.003 (0.013)	Loss 4.0693 (4.1083)	Entropy 1.90149 (1.90172)	Top-1 acc 27.344 (28.794)	Top-5 acc 54.297 (52.137)	lr 0.02499
Train [2][2120/3239]	Time 0.197 (0.483)	Data Time 0.001 (0.013)	Loss 3.9915 (4.1084)	Entropy 1.90149 (1.90172)	Top-1 acc 32.031 (28.795)	Top-5 acc 55.469 (52.134)	lr 0.02499
Train [2][2130/3239]	Time 0.320 (0.483)	Data Time 0.001 (0.013)	Loss 4.1014 (4.1081)	Entropy 1.90149 (1.90172)	Top-1 acc 27.734 (28.800)	Top-5 acc 50.391 (52.143)	lr 0.02499
Train [2][2140/3239]	Time 0.207 (0.483)	Data Time 0.001 (0.013)	Loss 4.1158 (4.1080)	Entropy 1.90149 (1.90172)	Top-1 acc 26.953 (28.803)	Top-5 acc 51.953 (52.144)	lr 0.02499
Train [2][2150/3239]	Time 0.167 (0.482)	Data Time 0.002 (0.013)	Loss 4.0385 (4.1077)	Entropy 1.90148 (1.90172)	Top-1 acc 31.250 (28.811)	Top-5 acc 50.781 (52.148)	lr 0.02499
Train [2][2160/3239]	Time 0.185 (0.482)	Data Time 0.001 (0.013)	Loss 4.2157 (4.1076)	Entropy 1.90148 (1.90172)	Top-1 acc 25.000 (28.815)	Top-5 acc 50.391 (52.153)	lr 0.02499
Train [2][2170/3239]	Time 0.210 (0.482)	Data Time 0.001 (0.013)	Loss 4.2895 (4.1073)	Entropy 1.90148 (1.90172)	Top-1 acc 28.906 (28.818)	Top-5 acc 51.172 (52.162)	lr 0.02499
Train [2][2180/3239]	Time 0.199 (0.481)	Data Time 0.001 (0.012)	Loss 4.1901 (4.1075)	Entropy 1.90148 (1.90172)	Top-1 acc 28.906 (28.810)	Top-5 acc 50.391 (52.159)	lr 0.02499
Train [2][2190/3239]	Time 0.254 (0.494)	Data Time 0.002 (0.012)	Loss 3.9898 (4.1072)	Entropy 1.90147 (1.90171)	Top-1 acc 32.422 (28.817)	Top-5 acc 53.125 (52.164)	lr 0.02499
Train [2][2200/3239]	Time 0.203 (0.494)	Data Time 0.002 (0.012)	Loss 4.3036 (4.1072)	Entropy 1.90146 (1.90171)	Top-1 acc 23.828 (28.814)	Top-5 acc 46.094 (52.163)	lr 0.02499
Train [2][2210/3239]	Time 2.232 (0.494)	Data Time 0.001 (0.012)	Loss 4.0049 (4.1072)	Entropy 1.90146 (1.90171)	Top-1 acc 27.734 (28.815)	Top-5 acc 55.469 (52.163)	lr 0.02499
Train [2][2220/3239]	Time 0.192 (0.493)	Data Time 0.002 (0.012)	Loss 4.0393 (4.1074)	Entropy 1.90146 (1.90171)	Top-1 acc 30.859 (28.810)	Top-5 acc 53.516 (52.159)	lr 0.02499
Train [2][2230/3239]	Time 0.238 (0.492)	Data Time 0.001 (0.012)	Loss 4.0913 (4.1074)	Entropy 1.90145 (1.90171)	Top-1 acc 27.344 (28.808)	Top-5 acc 51.953 (52.161)	lr 0.02499
Train [2][2240/3239]	Time 0.190 (0.492)	Data Time 0.001 (0.012)	Loss 4.1242 (4.1074)	Entropy 1.90145 (1.90171)	Top-1 acc 26.953 (28.804)	Top-5 acc 48.438 (52.156)	lr 0.02499
Train [2][2250/3239]	Time 0.181 (0.491)	Data Time 0.001 (0.012)	Loss 4.2297 (4.1076)	Entropy 1.90145 (1.90171)	Top-1 acc 25.000 (28.803)	Top-5 acc 47.656 (52.154)	lr 0.02499
Train [2][2260/3239]	Time 0.244 (0.491)	Data Time 0.001 (0.012)	Loss 4.0292 (4.1073)	Entropy 1.90145 (1.90171)	Top-1 acc 28.125 (28.806)	Top-5 acc 49.609 (52.161)	lr 0.02499
Train [2][2270/3239]	Time 0.302 (0.491)	Data Time 0.001 (0.012)	Loss 3.9783 (4.1070)	Entropy 1.90145 (1.90170)	Top-1 acc 28.125 (28.809)	Top-5 acc 52.734 (52.168)	lr 0.02499
Train [2][2280/3239]	Time 0.344 (0.490)	Data Time 0.001 (0.012)	Loss 4.0621 (4.1070)	Entropy 1.90145 (1.90170)	Top-1 acc 32.812 (28.810)	Top-5 acc 52.734 (52.165)	lr 0.02499
Train [2][2290/3239]	Time 0.142 (0.490)	Data Time 0.001 (0.012)	Loss 3.8511 (4.1069)	Entropy 1.90144 (1.90170)	Top-1 acc 33.984 (28.813)	Top-5 acc 56.641 (52.167)	lr 0.02499
Train [2][2300/3239]	Time 0.186 (0.489)	Data Time 0.001 (0.012)	Loss 4.2456 (4.1067)	Entropy 1.90144 (1.90170)	Top-1 acc 22.266 (28.816)	Top-5 acc 48.047 (52.172)	lr 0.02499
Train [2][2310/3239]	Time 0.198 (0.489)	Data Time 0.001 (0.012)	Loss 4.1564 (4.1068)	Entropy 1.90141 (1.90170)	Top-1 acc 29.297 (28.816)	Top-5 acc 51.172 (52.173)	lr 0.02499
Train [2][2320/3239]	Time 2.236 (0.489)	Data Time 0.002 (0.012)	Loss 4.0484 (4.1068)	Entropy 1.90141 (1.90170)	Top-1 acc 28.906 (28.817)	Top-5 acc 53.906 (52.170)	lr 0.02499
Train [2][2330/3239]	Time 0.188 (0.487)	Data Time 0.001 (0.012)	Loss 3.8364 (4.1068)	Entropy 1.90140 (1.90170)	Top-1 acc 32.031 (28.821)	Top-5 acc 59.766 (52.175)	lr 0.02499
Train [2][2340/3239]	Time 0.190 (0.487)	Data Time 0.001 (0.012)	Loss 4.2181 (4.1069)	Entropy 1.90140 (1.90170)	Top-1 acc 27.734 (28.820)	Top-5 acc 51.953 (52.172)	lr 0.02499
Train [2][2350/3239]	Time 0.200 (0.487)	Data Time 0.001 (0.012)	Loss 4.1276 (4.1068)	Entropy 1.90140 (1.90170)	Top-1 acc 30.469 (28.826)	Top-5 acc 55.859 (52.178)	lr 0.02499
Train [2][2360/3239]	Time 0.239 (0.486)	Data Time 0.001 (0.012)	Loss 4.0157 (4.1070)	Entropy 1.90139 (1.90169)	Top-1 acc 28.125 (28.822)	Top-5 acc 55.078 (52.173)	lr 0.02499
Train [2][2370/3239]	Time 0.140 (0.486)	Data Time 0.001 (0.012)	Loss 4.1842 (4.1071)	Entropy 1.90139 (1.90169)	Top-1 acc 28.125 (28.824)	Top-5 acc 53.906 (52.172)	lr 0.02499
Train [2][2380/3239]	Time 0.212 (0.485)	Data Time 0.001 (0.012)	Loss 3.9946 (4.1070)	Entropy 1.90139 (1.90169)	Top-1 acc 34.766 (28.827)	Top-5 acc 53.906 (52.171)	lr 0.02499
Train [2][2390/3239]	Time 0.132 (0.485)	Data Time 0.001 (0.012)	Loss 4.4906 (4.1071)	Entropy 1.90139 (1.90169)	Top-1 acc 23.047 (28.829)	Top-5 acc 43.750 (52.170)	lr 0.02499
Train [2][2400/3239]	Time 0.191 (0.485)	Data Time 0.001 (0.012)	Loss 4.1224 (4.1068)	Entropy 1.90138 (1.90169)	Top-1 acc 31.641 (28.839)	Top-5 acc 52.734 (52.176)	lr 0.02499
Train [2][2410/3239]	Time 0.229 (0.484)	Data Time 0.002 (0.012)	Loss 4.0836 (4.1069)	Entropy 1.90138 (1.90169)	Top-1 acc 31.250 (28.844)	Top-5 acc 53.125 (52.177)	lr 0.02499
Train [2][2420/3239]	Time 0.247 (0.484)	Data Time 0.001 (0.011)	Loss 3.8687 (4.1065)	Entropy 1.90138 (1.90169)	Top-1 acc 36.328 (28.848)	Top-5 acc 55.859 (52.186)	lr 0.02499
Train [2][2430/3239]	Time 2.225 (0.484)	Data Time 0.001 (0.011)	Loss 4.0526 (4.1063)	Entropy 1.90138 (1.90169)	Top-1 acc 31.250 (28.854)	Top-5 acc 55.078 (52.187)	lr 0.02499
Train [2][2440/3239]	Time 0.186 (0.483)	Data Time 0.001 (0.011)	Loss 3.8561 (4.1061)	Entropy 1.90138 (1.90168)	Top-1 acc 33.203 (28.856)	Top-5 acc 55.078 (52.189)	lr 0.02499
Train [2][2450/3239]	Time 0.219 (0.482)	Data Time 0.001 (0.011)	Loss 4.1148 (4.1061)	Entropy 1.90137 (1.90168)	Top-1 acc 26.562 (28.862)	Top-5 acc 51.562 (52.189)	lr 0.02499
Train [2][2460/3239]	Time 0.170 (0.482)	Data Time 0.001 (0.011)	Loss 4.0645 (4.1058)	Entropy 1.90137 (1.90168)	Top-1 acc 32.812 (28.870)	Top-5 acc 55.469 (52.196)	lr 0.02499
Train [2][2470/3239]	Time 0.199 (0.482)	Data Time 0.001 (0.011)	Loss 4.3137 (4.1056)	Entropy 1.90137 (1.90168)	Top-1 acc 26.562 (28.876)	Top-5 acc 46.875 (52.200)	lr 0.02499
Train [2][2480/3239]	Time 0.185 (0.481)	Data Time 0.001 (0.011)	Loss 4.2357 (4.1054)	Entropy 1.90136 (1.90168)	Top-1 acc 28.516 (28.882)	Top-5 acc 45.703 (52.205)	lr 0.02499
Train [2][2490/3239]	Time 0.235 (0.481)	Data Time 0.001 (0.011)	Loss 4.0177 (4.1053)	Entropy 1.90136 (1.90168)	Top-1 acc 30.469 (28.884)	Top-5 acc 54.297 (52.202)	lr 0.02499
Train [2][2500/3239]	Time 0.199 (0.481)	Data Time 0.002 (0.011)	Loss 4.0834 (4.1052)	Entropy 1.90136 (1.90168)	Top-1 acc 29.297 (28.887)	Top-5 acc 51.562 (52.203)	lr 0.02499
Train [2][2510/3239]	Time 0.187 (0.480)	Data Time 0.001 (0.011)	Loss 4.0419 (4.1052)	Entropy 1.90135 (1.90167)	Top-1 acc 30.078 (28.886)	Top-5 acc 51.953 (52.202)	lr 0.02499
Train [2][2520/3239]	Time 0.270 (0.480)	Data Time 0.001 (0.011)	Loss 3.9390 (4.1051)	Entropy 1.90135 (1.90167)	Top-1 acc 33.203 (28.894)	Top-5 acc 58.203 (52.209)	lr 0.02499
Train [2][2530/3239]	Time 0.211 (0.480)	Data Time 0.001 (0.011)	Loss 4.1635 (4.1050)	Entropy 1.90135 (1.90167)	Top-1 acc 23.828 (28.898)	Top-5 acc 47.656 (52.209)	lr 0.02499
Train [2][2540/3239]	Time 2.197 (0.479)	Data Time 0.001 (0.011)	Loss 4.1239 (4.1051)	Entropy 1.90135 (1.90167)	Top-1 acc 26.953 (28.894)	Top-5 acc 50.000 (52.205)	lr 0.02499
Train [2][2550/3239]	Time 0.229 (0.478)	Data Time 0.001 (0.011)	Loss 4.1147 (4.1051)	Entropy 1.90134 (1.90167)	Top-1 acc 30.859 (28.894)	Top-5 acc 52.734 (52.207)	lr 0.02499
Train [2][2560/3239]	Time 0.215 (0.491)	Data Time 0.002 (0.011)	Loss 4.1787 (4.1052)	Entropy 1.90133 (1.90167)	Top-1 acc 30.078 (28.894)	Top-5 acc 53.125 (52.208)	lr 0.02499
Train [2][2570/3239]	Time 0.307 (0.491)	Data Time 0.002 (0.011)	Loss 3.8124 (4.1050)	Entropy 1.90133 (1.90167)	Top-1 acc 37.109 (28.902)	Top-5 acc 55.859 (52.216)	lr 0.02499
Train [2][2580/3239]	Time 0.253 (0.490)	Data Time 0.001 (0.011)	Loss 4.0531 (4.1052)	Entropy 1.90133 (1.90167)	Top-1 acc 27.344 (28.895)	Top-5 acc 51.562 (52.210)	lr 0.02499
Train [2][2590/3239]	Time 0.170 (0.490)	Data Time 0.001 (0.011)	Loss 4.2857 (4.1052)	Entropy 1.90133 (1.90166)	Top-1 acc 26.562 (28.900)	Top-5 acc 48.047 (52.215)	lr 0.02499
Train [2][2600/3239]	Time 0.173 (0.490)	Data Time 0.001 (0.011)	Loss 4.2783 (4.1055)	Entropy 1.90132 (1.90166)	Top-1 acc 24.219 (28.896)	Top-5 acc 51.172 (52.210)	lr 0.02499
Train [2][2610/3239]	Time 0.191 (0.489)	Data Time 0.001 (0.011)	Loss 4.1301 (4.1054)	Entropy 1.90132 (1.90166)	Top-1 acc 26.562 (28.899)	Top-5 acc 53.125 (52.214)	lr 0.02499
Train [2][2620/3239]	Time 0.197 (0.489)	Data Time 0.002 (0.011)	Loss 4.0541 (4.1054)	Entropy 1.90131 (1.90166)	Top-1 acc 28.906 (28.901)	Top-5 acc 56.641 (52.219)	lr 0.02499
Train [2][2630/3239]	Time 0.193 (0.489)	Data Time 0.001 (0.011)	Loss 4.1005 (4.1053)	Entropy 1.90131 (1.90166)	Top-1 acc 25.000 (28.902)	Top-5 acc 53.125 (52.219)	lr 0.02499
Train [2][2640/3239]	Time 0.227 (0.489)	Data Time 0.001 (0.011)	Loss 4.0349 (4.1051)	Entropy 1.90130 (1.90166)	Top-1 acc 33.203 (28.907)	Top-5 acc 53.516 (52.220)	lr 0.02499
Train [2][2650/3239]	Time 0.230 (0.488)	Data Time 0.001 (0.011)	Loss 4.0387 (4.1049)	Entropy 1.90130 (1.90166)	Top-1 acc 33.203 (28.905)	Top-5 acc 51.953 (52.225)	lr 0.02499
Train [2][2660/3239]	Time 0.236 (0.488)	Data Time 0.001 (0.011)	Loss 3.7813 (4.1046)	Entropy 1.90129 (1.90166)	Top-1 acc 37.109 (28.914)	Top-5 acc 59.766 (52.233)	lr 0.02499
Train [2][2670/3239]	Time 0.145 (0.488)	Data Time 0.001 (0.011)	Loss 3.8453 (4.1045)	Entropy 1.90129 (1.90165)	Top-1 acc 33.984 (28.919)	Top-5 acc 55.859 (52.238)	lr 0.02499
Train [2][2680/3239]	Time 0.222 (0.487)	Data Time 0.001 (0.011)	Loss 4.2236 (4.1047)	Entropy 1.90128 (1.90165)	Top-1 acc 25.391 (28.913)	Top-5 acc 49.219 (52.234)	lr 0.02499
Train [2][2690/3239]	Time 0.191 (0.487)	Data Time 0.001 (0.011)	Loss 4.0149 (4.1047)	Entropy 1.90128 (1.90165)	Top-1 acc 31.250 (28.912)	Top-5 acc 53.516 (52.237)	lr 0.02499
Train [2][2700/3239]	Time 0.177 (0.487)	Data Time 0.001 (0.011)	Loss 3.9865 (4.1045)	Entropy 1.90128 (1.90165)	Top-1 acc 29.297 (28.910)	Top-5 acc 58.594 (52.239)	lr 0.02499
Train [2][2710/3239]	Time 0.242 (0.487)	Data Time 0.001 (0.011)	Loss 3.8835 (4.1040)	Entropy 1.90128 (1.90165)	Top-1 acc 32.812 (28.917)	Top-5 acc 55.078 (52.254)	lr 0.02499
Train [2][2720/3239]	Time 0.188 (0.486)	Data Time 0.001 (0.011)	Loss 4.0139 (4.1041)	Entropy 1.90127 (1.90165)	Top-1 acc 27.344 (28.918)	Top-5 acc 60.547 (52.257)	lr 0.02499
Train [2][2730/3239]	Time 0.285 (0.486)	Data Time 0.001 (0.010)	Loss 4.0011 (4.1041)	Entropy 1.90127 (1.90165)	Top-1 acc 28.906 (28.916)	Top-5 acc 52.344 (52.253)	lr 0.02499
Train [2][2740/3239]	Time 0.182 (0.486)	Data Time 0.001 (0.010)	Loss 4.0839 (4.1043)	Entropy 1.90127 (1.90164)	Top-1 acc 30.078 (28.914)	Top-5 acc 51.562 (52.252)	lr 0.02499
Train [2][2750/3239]	Time 0.229 (0.485)	Data Time 0.001 (0.010)	Loss 4.1211 (4.1043)	Entropy 1.90127 (1.90164)	Top-1 acc 28.125 (28.916)	Top-5 acc 52.344 (52.252)	lr 0.02499
Train [2][2760/3239]	Time 0.212 (0.485)	Data Time 0.001 (0.010)	Loss 4.1355 (4.1044)	Entropy 1.90126 (1.90164)	Top-1 acc 28.906 (28.916)	Top-5 acc 50.000 (52.251)	lr 0.02499
Train [2][2770/3239]	Time 0.363 (0.485)	Data Time 0.001 (0.010)	Loss 4.0264 (4.1043)	Entropy 1.90126 (1.90164)	Top-1 acc 29.688 (28.921)	Top-5 acc 54.297 (52.253)	lr 0.02499
Train [2][2780/3239]	Time 0.286 (0.485)	Data Time 0.002 (0.010)	Loss 4.1217 (4.1043)	Entropy 1.90126 (1.90164)	Top-1 acc 28.516 (28.920)	Top-5 acc 51.953 (52.254)	lr 0.02499
Train [2][2790/3239]	Time 0.248 (0.484)	Data Time 0.001 (0.010)	Loss 4.2052 (4.1043)	Entropy 1.90126 (1.90164)	Top-1 acc 25.781 (28.922)	Top-5 acc 50.781 (52.256)	lr 0.02499
Train [2][2800/3239]	Time 0.258 (0.484)	Data Time 0.001 (0.010)	Loss 4.1614 (4.1044)	Entropy 1.90126 (1.90164)	Top-1 acc 29.297 (28.919)	Top-5 acc 48.438 (52.253)	lr 0.02499
Train [2][2810/3239]	Time 0.165 (0.484)	Data Time 0.001 (0.010)	Loss 4.0817 (4.1044)	Entropy 1.90125 (1.90163)	Top-1 acc 26.562 (28.918)	Top-5 acc 50.000 (52.254)	lr 0.02499
Train [2][2820/3239]	Time 0.211 (0.483)	Data Time 0.002 (0.010)	Loss 4.0427 (4.1042)	Entropy 1.90125 (1.90163)	Top-1 acc 26.172 (28.923)	Top-5 acc 57.422 (52.256)	lr 0.02499
Train [2][2830/3239]	Time 0.237 (0.483)	Data Time 0.001 (0.010)	Loss 4.0043 (4.1040)	Entropy 1.90125 (1.90163)	Top-1 acc 30.859 (28.927)	Top-5 acc 54.297 (52.262)	lr 0.02498
Train [2][2840/3239]	Time 0.229 (0.483)	Data Time 0.001 (0.010)	Loss 3.9958 (4.1040)	Entropy 1.90124 (1.90163)	Top-1 acc 28.906 (28.928)	Top-5 acc 54.688 (52.259)	lr 0.02498
Train [2][2850/3239]	Time 0.226 (0.483)	Data Time 0.001 (0.010)	Loss 3.9883 (4.1038)	Entropy 1.90123 (1.90163)	Top-1 acc 29.688 (28.931)	Top-5 acc 57.031 (52.266)	lr 0.02498
Train [2][2860/3239]	Time 0.226 (0.482)	Data Time 0.001 (0.010)	Loss 3.8790 (4.1037)	Entropy 1.90123 (1.90163)	Top-1 acc 34.375 (28.936)	Top-5 acc 59.766 (52.269)	lr 0.02498
Train [2][2870/3239]	Time 0.208 (0.482)	Data Time 0.002 (0.010)	Loss 4.3897 (4.1039)	Entropy 1.90122 (1.90163)	Top-1 acc 26.953 (28.931)	Top-5 acc 46.094 (52.261)	lr 0.02498
Train [2][2880/3239]	Time 0.201 (0.482)	Data Time 0.001 (0.010)	Loss 4.1583 (4.1038)	Entropy 1.90122 (1.90163)	Top-1 acc 25.391 (28.934)	Top-5 acc 51.562 (52.266)	lr 0.02498
Train [2][2890/3239]	Time 0.248 (0.491)	Data Time 0.003 (0.010)	Loss 3.9308 (4.1036)	Entropy 1.90122 (1.90162)	Top-1 acc 33.594 (28.936)	Top-5 acc 56.641 (52.270)	lr 0.02498
Train [2][2900/3239]	Time 0.163 (0.492)	Data Time 0.002 (0.010)	Loss 3.9873 (4.1035)	Entropy 1.90122 (1.90162)	Top-1 acc 31.250 (28.937)	Top-5 acc 56.641 (52.270)	lr 0.02498
Train [2][2910/3239]	Time 0.293 (0.491)	Data Time 0.002 (0.010)	Loss 4.0051 (4.1034)	Entropy 1.90121 (1.90162)	Top-1 acc 29.297 (28.941)	Top-5 acc 55.078 (52.274)	lr 0.02498
Train [2][2920/3239]	Time 0.280 (0.491)	Data Time 0.001 (0.010)	Loss 4.0618 (4.1032)	Entropy 1.90121 (1.90162)	Top-1 acc 25.391 (28.939)	Top-5 acc 53.906 (52.283)	lr 0.02498
Train [2][2930/3239]	Time 0.140 (0.491)	Data Time 0.001 (0.010)	Loss 3.8673 (4.1030)	Entropy 1.90121 (1.90162)	Top-1 acc 34.375 (28.943)	Top-5 acc 58.203 (52.286)	lr 0.02498
Train [2][2940/3239]	Time 0.146 (0.490)	Data Time 0.002 (0.010)	Loss 4.0990 (4.1030)	Entropy 1.90121 (1.90162)	Top-1 acc 26.172 (28.942)	Top-5 acc 52.734 (52.283)	lr 0.02498
Train [2][2950/3239]	Time 0.229 (0.490)	Data Time 0.001 (0.010)	Loss 4.2378 (4.1029)	Entropy 1.90120 (1.90162)	Top-1 acc 25.000 (28.943)	Top-5 acc 48.438 (52.283)	lr 0.02498
Train [2][2960/3239]	Time 0.192 (0.490)	Data Time 0.001 (0.010)	Loss 3.9885 (4.1027)	Entropy 1.90120 (1.90161)	Top-1 acc 27.344 (28.942)	Top-5 acc 57.812 (52.286)	lr 0.02498
Train [2][2970/3239]	Time 0.188 (0.490)	Data Time 0.001 (0.010)	Loss 4.0108 (4.1027)	Entropy 1.90119 (1.90161)	Top-1 acc 30.859 (28.945)	Top-5 acc 53.906 (52.288)	lr 0.02498
Train [2][2980/3239]	Time 0.251 (0.489)	Data Time 0.001 (0.010)	Loss 4.0996 (4.1026)	Entropy 1.90119 (1.90161)	Top-1 acc 28.125 (28.946)	Top-5 acc 51.562 (52.287)	lr 0.02498
Train [2][2990/3239]	Time 0.223 (0.489)	Data Time 0.001 (0.010)	Loss 4.1264 (4.1026)	Entropy 1.90119 (1.90161)	Top-1 acc 24.609 (28.949)	Top-5 acc 50.391 (52.285)	lr 0.02498
Train [2][3000/3239]	Time 0.198 (0.489)	Data Time 0.001 (0.010)	Loss 3.8709 (4.1024)	Entropy 1.90118 (1.90161)	Top-1 acc 29.297 (28.951)	Top-5 acc 53.516 (52.286)	lr 0.02498
Train [2][3010/3239]	Time 0.163 (0.489)	Data Time 0.001 (0.010)	Loss 4.1568 (4.1024)	Entropy 1.90118 (1.90161)	Top-1 acc 31.641 (28.950)	Top-5 acc 53.516 (52.288)	lr 0.02498
Train [2][3020/3239]	Time 0.177 (0.489)	Data Time 0.001 (0.010)	Loss 4.2789 (4.1023)	Entropy 1.90117 (1.90161)	Top-1 acc 29.297 (28.951)	Top-5 acc 51.562 (52.291)	lr 0.02498
Train [2][3030/3239]	Time 0.278 (0.488)	Data Time 0.001 (0.010)	Loss 4.0189 (4.1020)	Entropy 1.90116 (1.90160)	Top-1 acc 32.422 (28.958)	Top-5 acc 55.469 (52.298)	lr 0.02498
Train [2][3040/3239]	Time 0.290 (0.488)	Data Time 0.002 (0.010)	Loss 3.9735 (4.1019)	Entropy 1.90116 (1.90160)	Top-1 acc 31.250 (28.962)	Top-5 acc 51.953 (52.298)	lr 0.02498
Train [2][3050/3239]	Time 0.310 (0.488)	Data Time 0.001 (0.010)	Loss 3.9419 (4.1019)	Entropy 1.90115 (1.90160)	Top-1 acc 33.984 (28.963)	Top-5 acc 58.594 (52.299)	lr 0.02498
Train [2][3060/3239]	Time 0.322 (0.488)	Data Time 0.002 (0.010)	Loss 4.2167 (4.1018)	Entropy 1.90115 (1.90160)	Top-1 acc 29.688 (28.968)	Top-5 acc 50.391 (52.299)	lr 0.02498
Train [2][3070/3239]	Time 0.207 (0.488)	Data Time 0.001 (0.010)	Loss 4.0945 (4.1018)	Entropy 1.90115 (1.90160)	Top-1 acc 32.422 (28.970)	Top-5 acc 55.469 (52.302)	lr 0.02498
Train [2][3080/3239]	Time 0.285 (0.487)	Data Time 0.001 (0.010)	Loss 4.2074 (4.1015)	Entropy 1.90114 (1.90160)	Top-1 acc 26.172 (28.972)	Top-5 acc 51.172 (52.309)	lr 0.02498
Train [2][3090/3239]	Time 0.209 (0.487)	Data Time 0.001 (0.010)	Loss 4.2265 (4.1016)	Entropy 1.90112 (1.90160)	Top-1 acc 26.562 (28.967)	Top-5 acc 50.000 (52.307)	lr 0.02498
Train [2][3100/3239]	Time 0.213 (0.487)	Data Time 0.001 (0.010)	Loss 3.9092 (4.1014)	Entropy 1.90112 (1.90159)	Top-1 acc 32.422 (28.971)	Top-5 acc 55.859 (52.310)	lr 0.02498
Train [2][3110/3239]	Time 0.181 (0.487)	Data Time 0.001 (0.010)	Loss 4.0064 (4.1015)	Entropy 1.90112 (1.90159)	Top-1 acc 30.469 (28.970)	Top-5 acc 56.641 (52.311)	lr 0.02498
Train [2][3120/3239]	Time 0.229 (0.486)	Data Time 0.001 (0.010)	Loss 3.9292 (4.1016)	Entropy 1.90111 (1.90159)	Top-1 acc 30.859 (28.967)	Top-5 acc 59.766 (52.309)	lr 0.02498
Train [2][3130/3239]	Time 0.259 (0.486)	Data Time 0.001 (0.010)	Loss 4.2011 (4.1013)	Entropy 1.90110 (1.90159)	Top-1 acc 32.812 (28.976)	Top-5 acc 52.344 (52.316)	lr 0.02498
Train [2][3140/3239]	Time 0.178 (0.486)	Data Time 0.001 (0.009)	Loss 4.3047 (4.1015)	Entropy 1.90110 (1.90159)	Top-1 acc 28.516 (28.979)	Top-5 acc 47.656 (52.316)	lr 0.02498
Train [2][3150/3239]	Time 0.218 (0.485)	Data Time 0.001 (0.009)	Loss 4.0980 (4.1014)	Entropy 1.90109 (1.90159)	Top-1 acc 30.078 (28.982)	Top-5 acc 49.609 (52.316)	lr 0.02498
Train [2][3160/3239]	Time 0.245 (0.485)	Data Time 0.001 (0.009)	Loss 4.0119 (4.1014)	Entropy 1.90108 (1.90158)	Top-1 acc 30.859 (28.982)	Top-5 acc 53.125 (52.315)	lr 0.02498
Train [2][3170/3239]	Time 0.219 (0.485)	Data Time 0.029 (0.009)	Loss 4.1026 (4.1015)	Entropy 1.90108 (1.90158)	Top-1 acc 29.688 (28.982)	Top-5 acc 52.344 (52.313)	lr 0.02498
Train [2][3180/3239]	Time 0.199 (0.485)	Data Time 0.000 (0.009)	Loss 4.0781 (4.1016)	Entropy 1.90108 (1.90158)	Top-1 acc 30.078 (28.981)	Top-5 acc 50.781 (52.313)	lr 0.02498
Train [2][3190/3239]	Time 0.194 (0.484)	Data Time 0.000 (0.009)	Loss 4.1715 (4.1016)	Entropy 1.90108 (1.90158)	Top-1 acc 29.297 (28.981)	Top-5 acc 49.609 (52.313)	lr 0.02498
Train [2][3200/3239]	Time 0.284 (0.484)	Data Time 0.000 (0.009)	Loss 4.3312 (4.1016)	Entropy 1.90107 (1.90158)	Top-1 acc 26.172 (28.983)	Top-5 acc 45.312 (52.317)	lr 0.02498
Train [2][3210/3239]	Time 0.226 (0.484)	Data Time 0.000 (0.009)	Loss 4.1616 (4.1017)	Entropy 1.90107 (1.90158)	Top-1 acc 29.297 (28.981)	Top-5 acc 52.344 (52.314)	lr 0.02498
Train [2][3220/3239]	Time 0.293 (0.494)	Data Time 0.000 (0.009)	Loss 3.9897 (4.1016)	Entropy 1.90107 (1.90158)	Top-1 acc 29.297 (28.982)	Top-5 acc 53.516 (52.316)	lr 0.02498
Train [2][3230/3239]	Time 0.177 (0.494)	Data Time 0.000 (0.009)	Loss 4.0722 (4.1014)	Entropy 1.90106 (1.90157)	Top-1 acc 31.250 (28.983)	Top-5 acc 50.000 (52.316)	lr 0.02498
Train [2][3239/3239]	Time 1.972 (0.493)	Data Time 0.000 (0.009)	Loss 4.4472 (4.1013)	Entropy 1.90106 (1.90157)	Top-1 acc 27.160 (28.983)	Top-5 acc 43.210 (52.318)	lr 0.02498
==========Valid [2/120]	loss 2.995	top-1 acc 37.484 (37.484)	top-5 acc 62.368	Train top-1 28.983	top-5 52.318	Entropy 1.90106	Latency-None: 0.000ms	Flops: 493.93M
Train [3][0/3239]	Time 21.628 (21.628)	Data Time 19.642 (19.642)	Loss 3.9020 (3.9020)	Entropy 1.90106 (1.90106)	Top-1 acc 32.031 (32.031)	Top-5 acc 55.859 (55.859)	lr 0.02498
Train [3][10/3239]	Time 2.313 (2.430)	Data Time 0.002 (1.787)	Loss 3.9011 (4.0793)	Entropy 1.90106 (1.90106)	Top-1 acc 30.859 (28.267)	Top-5 acc 58.984 (52.663)	lr 0.02498
Train [3][20/3239]	Time 0.261 (1.374)	Data Time 0.002 (0.937)	Loss 3.9724 (4.0787)	Entropy 1.90106 (1.90106)	Top-1 acc 30.859 (28.367)	Top-5 acc 55.078 (53.051)	lr 0.02498
Train [3][30/3239]	Time 0.209 (1.061)	Data Time 0.002 (0.635)	Loss 3.9945 (4.0748)	Entropy 1.90106 (1.90106)	Top-1 acc 28.516 (28.957)	Top-5 acc 53.125 (52.873)	lr 0.02498
Train [3][40/3239]	Time 0.237 (0.903)	Data Time 0.001 (0.481)	Loss 4.0736 (4.0659)	Entropy 1.90106 (1.90106)	Top-1 acc 27.734 (29.163)	Top-5 acc 53.906 (53.306)	lr 0.02498
Train [3][50/3239]	Time 0.199 (0.804)	Data Time 0.001 (0.387)	Loss 4.0125 (4.0773)	Entropy 1.90106 (1.90106)	Top-1 acc 30.859 (28.906)	Top-5 acc 53.516 (52.956)	lr 0.02498
Train [3][60/3239]	Time 0.238 (0.734)	Data Time 0.001 (0.324)	Loss 3.8853 (4.0736)	Entropy 1.90106 (1.90106)	Top-1 acc 32.031 (28.932)	Top-5 acc 55.078 (52.971)	lr 0.02498
Train [3][70/3239]	Time 0.143 (0.686)	Data Time 0.001 (0.278)	Loss 4.2257 (4.0771)	Entropy 1.90105 (1.90106)	Top-1 acc 26.172 (28.994)	Top-5 acc 50.781 (52.811)	lr 0.02498
Train [3][80/3239]	Time 0.196 (0.652)	Data Time 0.001 (0.245)	Loss 4.0426 (4.0749)	Entropy 1.90105 (1.90106)	Top-1 acc 26.953 (29.099)	Top-5 acc 53.906 (52.778)	lr 0.02498
Train [3][90/3239]	Time 0.214 (0.625)	Data Time 0.001 (0.218)	Loss 4.0365 (4.0776)	Entropy 1.90104 (1.90106)	Top-1 acc 30.859 (29.228)	Top-5 acc 54.297 (52.837)	lr 0.02498
Train [3][100/3239]	Time 0.196 (0.604)	Data Time 0.001 (0.196)	Loss 4.1358 (4.0771)	Entropy 1.90104 (1.90105)	Top-1 acc 26.562 (29.212)	Top-5 acc 50.000 (52.893)	lr 0.02498
Train [3][110/3239]	Time 0.288 (0.587)	Data Time 0.001 (0.179)	Loss 4.2179 (4.0755)	Entropy 1.90104 (1.90105)	Top-1 acc 29.688 (29.378)	Top-5 acc 49.219 (52.872)	lr 0.02498
Train [3][120/3239]	Time 2.136 (0.571)	Data Time 0.001 (0.164)	Loss 3.9304 (4.0731)	Entropy 1.90104 (1.90105)	Top-1 acc 28.125 (29.381)	Top-5 acc 51.562 (52.825)	lr 0.02498
Train [3][130/3239]	Time 0.240 (0.545)	Data Time 0.001 (0.152)	Loss 3.8911 (4.0730)	Entropy 1.90102 (1.90105)	Top-1 acc 35.938 (29.374)	Top-5 acc 57.031 (52.785)	lr 0.02498
Train [3][140/3239]	Time 0.242 (0.534)	Data Time 0.001 (0.141)	Loss 4.2011 (4.0715)	Entropy 1.90102 (1.90105)	Top-1 acc 30.078 (29.471)	Top-5 acc 51.953 (52.848)	lr 0.02498
Train [3][150/3239]	Time 0.291 (0.525)	Data Time 0.001 (0.132)	Loss 3.8501 (4.0730)	Entropy 1.90101 (1.90104)	Top-1 acc 32.031 (29.367)	Top-5 acc 60.156 (52.817)	lr 0.02498
Train [3][160/3239]	Time 0.224 (0.517)	Data Time 0.001 (0.124)	Loss 4.2665 (4.0751)	Entropy 1.90100 (1.90104)	Top-1 acc 25.391 (29.319)	Top-5 acc 47.656 (52.768)	lr 0.02498
Train [3][170/3239]	Time 0.206 (0.511)	Data Time 0.001 (0.117)	Loss 4.2315 (4.0745)	Entropy 1.90100 (1.90104)	Top-1 acc 27.344 (29.404)	Top-5 acc 48.828 (52.741)	lr 0.02498
Train [3][180/3239]	Time 0.213 (0.506)	Data Time 0.002 (0.111)	Loss 4.2205 (4.0737)	Entropy 1.90100 (1.90104)	Top-1 acc 26.562 (29.381)	Top-5 acc 48.438 (52.762)	lr 0.02498
Train [3][190/3239]	Time 0.235 (0.501)	Data Time 0.001 (0.105)	Loss 3.9857 (4.0762)	Entropy 1.90100 (1.90104)	Top-1 acc 34.375 (29.356)	Top-5 acc 56.250 (52.689)	lr 0.02498
Train [3][200/3239]	Time 0.174 (0.495)	Data Time 0.001 (0.100)	Loss 3.9962 (4.0757)	Entropy 1.90100 (1.90103)	Top-1 acc 29.297 (29.351)	Top-5 acc 53.516 (52.713)	lr 0.02498
Train [3][210/3239]	Time 0.204 (0.491)	Data Time 0.001 (0.095)	Loss 3.9178 (4.0732)	Entropy 1.90099 (1.90103)	Top-1 acc 34.375 (29.438)	Top-5 acc 57.422 (52.797)	lr 0.02498
Train [3][220/3239]	Time 0.178 (0.486)	Data Time 0.001 (0.091)	Loss 3.9718 (4.0726)	Entropy 1.90099 (1.90103)	Top-1 acc 29.297 (29.431)	Top-5 acc 60.547 (52.839)	lr 0.02498
Train [3][230/3239]	Time 2.131 (0.482)	Data Time 0.001 (0.087)	Loss 3.9386 (4.0717)	Entropy 1.90099 (1.90103)	Top-1 acc 33.594 (29.466)	Top-5 acc 53.906 (52.866)	lr 0.02498
Train [3][240/3239]	Time 0.228 (0.471)	Data Time 0.001 (0.083)	Loss 4.1438 (4.0726)	Entropy 1.90098 (1.90103)	Top-1 acc 29.688 (29.464)	Top-5 acc 51.172 (52.836)	lr 0.02498
Train [3][250/3239]	Time 0.201 (0.468)	Data Time 0.001 (0.080)	Loss 3.9874 (4.0725)	Entropy 1.90096 (1.90102)	Top-1 acc 33.203 (29.460)	Top-5 acc 54.688 (52.836)	lr 0.02498
Train [3][260/3239]	Time 0.192 (0.465)	Data Time 0.002 (0.077)	Loss 4.1826 (4.0732)	Entropy 1.90096 (1.90102)	Top-1 acc 26.172 (29.406)	Top-5 acc 54.297 (52.859)	lr 0.02498
Train [3][270/3239]	Time 0.188 (0.463)	Data Time 0.001 (0.075)	Loss 4.0515 (4.0728)	Entropy 1.90096 (1.90102)	Top-1 acc 30.078 (29.393)	Top-5 acc 52.344 (52.897)	lr 0.02498
Train [3][280/3239]	Time 0.154 (0.461)	Data Time 0.001 (0.072)	Loss 4.2199 (4.0731)	Entropy 1.90095 (1.90102)	Top-1 acc 26.953 (29.369)	Top-5 acc 50.781 (52.904)	lr 0.02498
Train [3][290/3239]	Time 0.290 (0.459)	Data Time 0.001 (0.070)	Loss 4.1144 (4.0730)	Entropy 1.90095 (1.90101)	Top-1 acc 25.391 (29.338)	Top-5 acc 53.125 (52.917)	lr 0.02498
Train [3][300/3239]	Time 0.200 (0.457)	Data Time 0.001 (0.067)	Loss 4.0812 (4.0724)	Entropy 1.90094 (1.90101)	Top-1 acc 28.125 (29.342)	Top-5 acc 53.906 (52.950)	lr 0.02498
Train [3][310/3239]	Time 0.203 (0.456)	Data Time 0.001 (0.065)	Loss 4.1837 (4.0749)	Entropy 1.90094 (1.90101)	Top-1 acc 24.609 (29.294)	Top-5 acc 51.172 (52.880)	lr 0.02498
Train [3][320/3239]	Time 0.248 (0.454)	Data Time 0.002 (0.063)	Loss 4.1974 (4.0742)	Entropy 1.90093 (1.90101)	Top-1 acc 26.562 (29.290)	Top-5 acc 50.000 (52.877)	lr 0.02498
Train [3][330/3239]	Time 0.220 (0.453)	Data Time 0.001 (0.062)	Loss 4.0676 (4.0737)	Entropy 1.90092 (1.90101)	Top-1 acc 28.516 (29.292)	Top-5 acc 51.562 (52.881)	lr 0.02498
Train [3][340/3239]	Time 34.944 (0.548)	Data Time 0.001 (0.060)	Loss 4.0548 (4.0734)	Entropy 1.90092 (1.90100)	Top-1 acc 29.688 (29.313)	Top-5 acc 55.469 (52.900)	lr 0.02498
Train [3][350/3239]	Time 0.161 (0.539)	Data Time 0.002 (0.058)	Loss 4.1918 (4.0742)	Entropy 1.90091 (1.90100)	Top-1 acc 26.953 (29.310)	Top-5 acc 49.609 (52.899)	lr 0.02498
Train [3][360/3239]	Time 0.228 (0.536)	Data Time 0.001 (0.057)	Loss 4.0457 (4.0743)	Entropy 1.90091 (1.90100)	Top-1 acc 32.031 (29.343)	Top-5 acc 50.781 (52.922)	lr 0.02498
Train [3][370/3239]	Time 0.236 (0.532)	Data Time 0.001 (0.055)	Loss 4.2075 (4.0732)	Entropy 1.90090 (1.90100)	Top-1 acc 25.000 (29.353)	Top-5 acc 48.438 (52.942)	lr 0.02498
Train [3][380/3239]	Time 0.186 (0.529)	Data Time 0.001 (0.054)	Loss 4.0713 (4.0735)	Entropy 1.90090 (1.90099)	Top-1 acc 31.250 (29.378)	Top-5 acc 54.688 (52.957)	lr 0.02498
Train [3][390/3239]	Time 0.247 (0.526)	Data Time 0.001 (0.052)	Loss 4.1333 (4.0750)	Entropy 1.90090 (1.90099)	Top-1 acc 27.344 (29.368)	Top-5 acc 50.391 (52.904)	lr 0.02498
Train [3][400/3239]	Time 0.219 (0.523)	Data Time 0.001 (0.051)	Loss 4.2347 (4.0743)	Entropy 1.90089 (1.90099)	Top-1 acc 24.219 (29.377)	Top-5 acc 48.438 (52.912)	lr 0.02498
Train [3][410/3239]	Time 0.190 (0.520)	Data Time 0.002 (0.050)	Loss 3.9869 (4.0747)	Entropy 1.90089 (1.90099)	Top-1 acc 28.906 (29.355)	Top-5 acc 53.516 (52.903)	lr 0.02498
Train [3][420/3239]	Time 0.375 (0.517)	Data Time 0.001 (0.049)	Loss 4.0768 (4.0752)	Entropy 1.90088 (1.90098)	Top-1 acc 29.688 (29.353)	Top-5 acc 53.125 (52.924)	lr 0.02498
Train [3][430/3239]	Time 0.232 (0.515)	Data Time 0.001 (0.048)	Loss 3.9741 (4.0730)	Entropy 1.90088 (1.90098)	Top-1 acc 32.422 (29.418)	Top-5 acc 53.516 (52.986)	lr 0.02498
Train [3][440/3239]	Time 0.215 (0.512)	Data Time 0.001 (0.047)	Loss 4.0581 (4.0722)	Entropy 1.90088 (1.90098)	Top-1 acc 26.953 (29.427)	Top-5 acc 50.000 (52.989)	lr 0.02498
Train [3][450/3239]	Time 2.125 (0.510)	Data Time 0.002 (0.046)	Loss 4.0446 (4.0718)	Entropy 1.90088 (1.90098)	Top-1 acc 31.250 (29.402)	Top-5 acc 52.734 (52.957)	lr 0.02498
Train [3][460/3239]	Time 0.224 (0.503)	Data Time 0.001 (0.045)	Loss 3.9930 (4.0724)	Entropy 1.90088 (1.90097)	Top-1 acc 32.812 (29.403)	Top-5 acc 53.516 (52.950)	lr 0.02498
Train [3][470/3239]	Time 0.163 (0.501)	Data Time 0.001 (0.044)	Loss 4.0702 (4.0728)	Entropy 1.90087 (1.90097)	Top-1 acc 30.469 (29.405)	Top-5 acc 53.906 (52.947)	lr 0.02498
Train [3][480/3239]	Time 0.196 (0.499)	Data Time 0.001 (0.043)	Loss 3.9122 (4.0731)	Entropy 1.90087 (1.90097)	Top-1 acc 34.375 (29.389)	Top-5 acc 55.859 (52.928)	lr 0.02498
Train [3][490/3239]	Time 0.210 (0.497)	Data Time 0.001 (0.042)	Loss 4.0307 (4.0734)	Entropy 1.90086 (1.90097)	Top-1 acc 30.078 (29.378)	Top-5 acc 52.344 (52.944)	lr 0.02498
Train [3][500/3239]	Time 0.232 (0.495)	Data Time 0.002 (0.041)	Loss 4.1115 (4.0745)	Entropy 1.90086 (1.90097)	Top-1 acc 30.859 (29.369)	Top-5 acc 53.516 (52.922)	lr 0.02498
Train [3][510/3239]	Time 0.247 (0.494)	Data Time 0.001 (0.041)	Loss 4.0404 (4.0747)	Entropy 1.90086 (1.90096)	Top-1 acc 30.469 (29.376)	Top-5 acc 54.688 (52.917)	lr 0.02498
Train [3][520/3239]	Time 0.178 (0.492)	Data Time 0.001 (0.040)	Loss 4.0790 (4.0750)	Entropy 1.90085 (1.90096)	Top-1 acc 31.250 (29.377)	Top-5 acc 53.125 (52.896)	lr 0.02498
Train [3][530/3239]	Time 0.202 (0.490)	Data Time 0.001 (0.039)	Loss 3.9505 (4.0749)	Entropy 1.90085 (1.90096)	Top-1 acc 31.250 (29.373)	Top-5 acc 55.859 (52.903)	lr 0.02498
Train [3][540/3239]	Time 0.316 (0.489)	Data Time 0.001 (0.038)	Loss 4.2253 (4.0751)	Entropy 1.90084 (1.90096)	Top-1 acc 26.172 (29.373)	Top-5 acc 51.172 (52.907)	lr 0.02498
Train [3][550/3239]	Time 0.306 (0.487)	Data Time 0.001 (0.038)	Loss 4.0704 (4.0743)	Entropy 1.90084 (1.90096)	Top-1 acc 26.172 (29.386)	Top-5 acc 53.516 (52.929)	lr 0.02498
Train [3][560/3239]	Time 2.149 (0.486)	Data Time 0.001 (0.037)	Loss 4.2410 (4.0732)	Entropy 1.90084 (1.90095)	Top-1 acc 22.656 (29.403)	Top-5 acc 50.000 (52.956)	lr 0.02498
Train [3][570/3239]	Time 0.223 (0.481)	Data Time 0.001 (0.037)	Loss 4.0599 (4.0726)	Entropy 1.90083 (1.90095)	Top-1 acc 30.859 (29.408)	Top-5 acc 53.125 (52.972)	lr 0.02498
Train [3][580/3239]	Time 0.214 (0.480)	Data Time 0.001 (0.036)	Loss 3.8658 (4.0718)	Entropy 1.90083 (1.90095)	Top-1 acc 31.250 (29.430)	Top-5 acc 57.031 (52.988)	lr 0.02498
Train [3][590/3239]	Time 0.154 (0.479)	Data Time 0.001 (0.035)	Loss 4.0341 (4.0711)	Entropy 1.90083 (1.90095)	Top-1 acc 29.688 (29.440)	Top-5 acc 56.250 (53.011)	lr 0.02498
Train [3][600/3239]	Time 0.162 (0.478)	Data Time 0.001 (0.035)	Loss 3.9822 (4.0714)	Entropy 1.90083 (1.90095)	Top-1 acc 30.078 (29.446)	Top-5 acc 57.031 (53.005)	lr 0.02498
Train [3][610/3239]	Time 0.243 (0.477)	Data Time 0.001 (0.035)	Loss 4.2651 (4.0720)	Entropy 1.90082 (1.90094)	Top-1 acc 27.734 (29.452)	Top-5 acc 50.000 (52.982)	lr 0.02498
Train [3][620/3239]	Time 0.162 (0.475)	Data Time 0.001 (0.034)	Loss 3.9876 (4.0723)	Entropy 1.90081 (1.90094)	Top-1 acc 28.125 (29.435)	Top-5 acc 53.516 (52.963)	lr 0.02498
Train [3][630/3239]	Time 0.155 (0.474)	Data Time 0.001 (0.034)	Loss 3.9020 (4.0722)	Entropy 1.90080 (1.90094)	Top-1 acc 32.812 (29.444)	Top-5 acc 57.812 (52.981)	lr 0.02498
Train [3][640/3239]	Time 0.195 (0.473)	Data Time 0.001 (0.033)	Loss 4.3489 (4.0730)	Entropy 1.90079 (1.90094)	Top-1 acc 25.391 (29.442)	Top-5 acc 48.438 (52.964)	lr 0.02498
Train [3][650/3239]	Time 0.237 (0.472)	Data Time 0.002 (0.033)	Loss 4.2179 (4.0740)	Entropy 1.90078 (1.90093)	Top-1 acc 28.516 (29.432)	Top-5 acc 49.219 (52.942)	lr 0.02498
Train [3][660/3239]	Time 0.224 (0.470)	Data Time 0.001 (0.032)	Loss 3.9778 (4.0733)	Entropy 1.90077 (1.90093)	Top-1 acc 26.562 (29.439)	Top-5 acc 52.734 (52.954)	lr 0.02498
Train [3][670/3239]	Time 2.247 (0.469)	Data Time 0.001 (0.032)	Loss 4.1644 (4.0738)	Entropy 1.90077 (1.90093)	Top-1 acc 26.172 (29.414)	Top-5 acc 48.828 (52.932)	lr 0.02498
Train [3][680/3239]	Time 0.153 (0.466)	Data Time 0.001 (0.031)	Loss 4.0897 (4.0733)	Entropy 1.90076 (1.90093)	Top-1 acc 29.297 (29.423)	Top-5 acc 49.219 (52.932)	lr 0.02498
Train [3][690/3239]	Time 0.216 (0.465)	Data Time 0.001 (0.031)	Loss 4.2414 (4.0741)	Entropy 1.90075 (1.90092)	Top-1 acc 28.125 (29.410)	Top-5 acc 51.953 (52.917)	lr 0.02498
Train [3][700/3239]	Time 0.204 (0.464)	Data Time 0.001 (0.031)	Loss 4.1105 (4.0733)	Entropy 1.90075 (1.90092)	Top-1 acc 30.859 (29.426)	Top-5 acc 47.656 (52.935)	lr 0.02498
Train [3][710/3239]	Time 0.307 (0.511)	Data Time 0.002 (0.030)	Loss 3.9201 (4.0729)	Entropy 1.90074 (1.90092)	Top-1 acc 30.859 (29.431)	Top-5 acc 55.859 (52.932)	lr 0.02498
Train [3][720/3239]	Time 0.324 (0.510)	Data Time 0.001 (0.030)	Loss 4.0777 (4.0726)	Entropy 1.90074 (1.90092)	Top-1 acc 33.594 (29.432)	Top-5 acc 52.344 (52.940)	lr 0.02498
Train [3][730/3239]	Time 0.191 (0.508)	Data Time 0.002 (0.029)	Loss 4.0349 (4.0720)	Entropy 1.90074 (1.90091)	Top-1 acc 30.078 (29.453)	Top-5 acc 53.906 (52.951)	lr 0.02498
Train [3][740/3239]	Time 0.255 (0.507)	Data Time 0.002 (0.029)	Loss 3.9614 (4.0724)	Entropy 1.90073 (1.90091)	Top-1 acc 30.078 (29.447)	Top-5 acc 51.953 (52.936)	lr 0.02498
Train [3][750/3239]	Time 0.250 (0.506)	Data Time 0.002 (0.029)	Loss 4.0072 (4.0719)	Entropy 1.90073 (1.90091)	Top-1 acc 33.594 (29.469)	Top-5 acc 53.125 (52.934)	lr 0.02498
Train [3][760/3239]	Time 0.209 (0.505)	Data Time 0.001 (0.028)	Loss 4.0221 (4.0721)	Entropy 1.90072 (1.90091)	Top-1 acc 33.594 (29.463)	Top-5 acc 55.078 (52.934)	lr 0.02498
Train [3][770/3239]	Time 0.199 (0.503)	Data Time 0.001 (0.028)	Loss 4.1617 (4.0720)	Entropy 1.90072 (1.90091)	Top-1 acc 28.516 (29.466)	Top-5 acc 52.344 (52.938)	lr 0.02498
Train [3][780/3239]	Time 2.206 (0.502)	Data Time 0.001 (0.028)	Loss 3.9758 (4.0718)	Entropy 1.90072 (1.90090)	Top-1 acc 31.250 (29.462)	Top-5 acc 54.688 (52.936)	lr 0.02498
Train [3][790/3239]	Time 0.146 (0.498)	Data Time 0.001 (0.027)	Loss 4.2521 (4.0716)	Entropy 1.90072 (1.90090)	Top-1 acc 27.734 (29.476)	Top-5 acc 49.609 (52.940)	lr 0.02498
Train [3][800/3239]	Time 0.163 (0.497)	Data Time 0.001 (0.027)	Loss 4.0419 (4.0716)	Entropy 1.90071 (1.90090)	Top-1 acc 32.422 (29.472)	Top-5 acc 51.172 (52.933)	lr 0.02498
Train [3][810/3239]	Time 0.211 (0.496)	Data Time 0.001 (0.027)	Loss 4.0091 (4.0713)	Entropy 1.90071 (1.90090)	Top-1 acc 31.250 (29.482)	Top-5 acc 52.734 (52.936)	lr 0.02498
Train [3][820/3239]	Time 0.237 (0.495)	Data Time 0.001 (0.026)	Loss 4.0210 (4.0719)	Entropy 1.90071 (1.90089)	Top-1 acc 27.344 (29.467)	Top-5 acc 55.859 (52.926)	lr 0.02498
Train [3][830/3239]	Time 0.205 (0.494)	Data Time 0.001 (0.026)	Loss 4.0901 (4.0717)	Entropy 1.90070 (1.90089)	Top-1 acc 28.125 (29.470)	Top-5 acc 58.594 (52.936)	lr 0.02498
Train [3][840/3239]	Time 0.197 (0.493)	Data Time 0.002 (0.026)	Loss 3.9615 (4.0715)	Entropy 1.90069 (1.90089)	Top-1 acc 29.688 (29.467)	Top-5 acc 53.125 (52.937)	lr 0.02498
Train [3][850/3239]	Time 0.246 (0.492)	Data Time 0.002 (0.026)	Loss 4.0666 (4.0711)	Entropy 1.90069 (1.90089)	Top-1 acc 29.297 (29.483)	Top-5 acc 56.250 (52.952)	lr 0.02498
Train [3][860/3239]	Time 0.210 (0.491)	Data Time 0.001 (0.025)	Loss 4.0200 (4.0702)	Entropy 1.90068 (1.90088)	Top-1 acc 28.906 (29.494)	Top-5 acc 57.031 (52.975)	lr 0.02498
Train [3][870/3239]	Time 0.156 (0.490)	Data Time 0.001 (0.025)	Loss 4.2149 (4.0707)	Entropy 1.90067 (1.90088)	Top-1 acc 26.953 (29.494)	Top-5 acc 49.609 (52.971)	lr 0.02498
Train [3][880/3239]	Time 0.168 (0.489)	Data Time 0.001 (0.025)	Loss 4.0947 (4.0706)	Entropy 1.90067 (1.90088)	Top-1 acc 27.734 (29.490)	Top-5 acc 49.609 (52.975)	lr 0.02498
Train [3][890/3239]	Time 2.202 (0.488)	Data Time 0.001 (0.025)	Loss 4.0769 (4.0699)	Entropy 1.90067 (1.90088)	Top-1 acc 31.250 (29.509)	Top-5 acc 53.516 (52.997)	lr 0.02498
Train [3][900/3239]	Time 0.213 (0.485)	Data Time 0.001 (0.024)	Loss 3.9590 (4.0700)	Entropy 1.90066 (1.90087)	Top-1 acc 33.984 (29.505)	Top-5 acc 53.906 (52.982)	lr 0.02498
Train [3][910/3239]	Time 0.182 (0.485)	Data Time 0.001 (0.024)	Loss 4.1128 (4.0702)	Entropy 1.90065 (1.90087)	Top-1 acc 28.516 (29.506)	Top-5 acc 51.562 (52.971)	lr 0.02498
Train [3][920/3239]	Time 0.224 (0.484)	Data Time 0.001 (0.024)	Loss 4.0664 (4.0700)	Entropy 1.90064 (1.90087)	Top-1 acc 28.906 (29.517)	Top-5 acc 53.125 (52.976)	lr 0.02498
Train [3][930/3239]	Time 0.193 (0.483)	Data Time 0.001 (0.024)	Loss 4.0174 (4.0704)	Entropy 1.90063 (1.90087)	Top-1 acc 30.078 (29.510)	Top-5 acc 54.297 (52.952)	lr 0.02498
Train [3][940/3239]	Time 0.197 (0.482)	Data Time 0.001 (0.023)	Loss 3.9661 (4.0701)	Entropy 1.90063 (1.90086)	Top-1 acc 32.031 (29.528)	Top-5 acc 51.172 (52.966)	lr 0.02498
Train [3][950/3239]	Time 0.218 (0.481)	Data Time 0.001 (0.023)	Loss 3.9983 (4.0697)	Entropy 1.90062 (1.90086)	Top-1 acc 33.594 (29.531)	Top-5 acc 53.125 (52.967)	lr 0.02498
Train [3][960/3239]	Time 0.188 (0.480)	Data Time 0.001 (0.023)	Loss 4.1682 (4.0694)	Entropy 1.90061 (1.90086)	Top-1 acc 25.781 (29.533)	Top-5 acc 51.562 (52.984)	lr 0.02498
Train [3][970/3239]	Time 0.153 (0.479)	Data Time 0.001 (0.023)	Loss 4.1883 (4.0700)	Entropy 1.90060 (1.90086)	Top-1 acc 28.125 (29.527)	Top-5 acc 50.781 (52.966)	lr 0.02498
Train [3][980/3239]	Time 0.271 (0.479)	Data Time 0.001 (0.023)	Loss 4.0254 (4.0707)	Entropy 1.90059 (1.90085)	Top-1 acc 28.516 (29.509)	Top-5 acc 53.906 (52.952)	lr 0.02498
Train [3][990/3239]	Time 0.269 (0.478)	Data Time 0.001 (0.022)	Loss 3.9158 (4.0699)	Entropy 1.90058 (1.90085)	Top-1 acc 35.156 (29.526)	Top-5 acc 58.203 (52.976)	lr 0.02498
Train [3][1000/3239]	Time 2.274 (0.477)	Data Time 0.001 (0.022)	Loss 4.0885 (4.0698)	Entropy 1.90058 (1.90085)	Top-1 acc 29.688 (29.517)	Top-5 acc 51.953 (52.976)	lr 0.02498
Train [3][1010/3239]	Time 0.247 (0.475)	Data Time 0.001 (0.022)	Loss 4.1935 (4.0709)	Entropy 1.90058 (1.90085)	Top-1 acc 27.734 (29.499)	Top-5 acc 51.562 (52.959)	lr 0.02498
Train [3][1020/3239]	Time 0.205 (0.474)	Data Time 0.001 (0.022)	Loss 4.1063 (4.0707)	Entropy 1.90057 (1.90084)	Top-1 acc 32.812 (29.515)	Top-5 acc 51.953 (52.967)	lr 0.02498
Train [3][1030/3239]	Time 0.191 (0.473)	Data Time 0.001 (0.022)	Loss 4.0042 (4.0704)	Entropy 1.90056 (1.90084)	Top-1 acc 30.469 (29.519)	Top-5 acc 57.031 (52.985)	lr 0.02498
Train [3][1040/3239]	Time 0.205 (0.473)	Data Time 0.001 (0.021)	Loss 4.1349 (4.0704)	Entropy 1.90056 (1.90084)	Top-1 acc 28.125 (29.509)	Top-5 acc 50.391 (52.982)	lr 0.02498
Train [3][1050/3239]	Time 0.197 (0.472)	Data Time 0.001 (0.021)	Loss 3.9552 (4.0703)	Entropy 1.90055 (1.90084)	Top-1 acc 31.250 (29.507)	Top-5 acc 57.031 (52.981)	lr 0.02498
Train [3][1060/3239]	Time 0.198 (0.471)	Data Time 0.001 (0.021)	Loss 3.6835 (4.0703)	Entropy 1.90055 (1.90083)	Top-1 acc 36.719 (29.504)	Top-5 acc 61.719 (52.973)	lr 0.02498
Train [3][1070/3239]	Time 0.303 (0.500)	Data Time 0.003 (0.021)	Loss 4.0246 (4.0706)	Entropy 1.90055 (1.90083)	Top-1 acc 27.344 (29.508)	Top-5 acc 53.125 (52.965)	lr 0.02498
Train [3][1080/3239]	Time 0.187 (0.500)	Data Time 0.002 (0.021)	Loss 3.9009 (4.0706)	Entropy 1.90053 (1.90083)	Top-1 acc 32.812 (29.509)	Top-5 acc 53.516 (52.964)	lr 0.02498
Train [3][1090/3239]	Time 0.186 (0.499)	Data Time 0.001 (0.021)	Loss 3.9869 (4.0705)	Entropy 1.90053 (1.90082)	Top-1 acc 30.469 (29.514)	Top-5 acc 53.906 (52.969)	lr 0.02498
Train [3][1100/3239]	Time 0.242 (0.499)	Data Time 0.001 (0.021)	Loss 3.8915 (4.0694)	Entropy 1.90053 (1.90082)	Top-1 acc 35.938 (29.537)	Top-5 acc 56.250 (52.986)	lr 0.02498
Train [3][1110/3239]	Time 2.148 (0.498)	Data Time 0.002 (0.020)	Loss 4.1248 (4.0688)	Entropy 1.90053 (1.90082)	Top-1 acc 29.297 (29.556)	Top-5 acc 51.562 (52.990)	lr 0.02498
Train [3][1120/3239]	Time 0.208 (0.495)	Data Time 0.001 (0.020)	Loss 4.0714 (4.0681)	Entropy 1.90053 (1.90082)	Top-1 acc 30.078 (29.566)	Top-5 acc 52.344 (52.998)	lr 0.02498
Train [3][1130/3239]	Time 0.187 (0.494)	Data Time 0.002 (0.020)	Loss 3.9772 (4.0680)	Entropy 1.90052 (1.90081)	Top-1 acc 33.203 (29.577)	Top-5 acc 50.781 (53.000)	lr 0.02498
Train [3][1140/3239]	Time 0.185 (0.494)	Data Time 0.001 (0.020)	Loss 4.0940 (4.0681)	Entropy 1.90051 (1.90081)	Top-1 acc 29.297 (29.570)	Top-5 acc 51.953 (52.998)	lr 0.02498
Train [3][1150/3239]	Time 0.309 (0.493)	Data Time 0.001 (0.020)	Loss 3.9496 (4.0682)	Entropy 1.90050 (1.90081)	Top-1 acc 33.203 (29.562)	Top-5 acc 55.469 (52.995)	lr 0.02498
Train [3][1160/3239]	Time 0.213 (0.492)	Data Time 0.001 (0.020)	Loss 3.7343 (4.0682)	Entropy 1.90049 (1.90081)	Top-1 acc 37.891 (29.567)	Top-5 acc 63.281 (52.991)	lr 0.02498
Train [3][1170/3239]	Time 0.215 (0.492)	Data Time 0.001 (0.019)	Loss 3.9894 (4.0682)	Entropy 1.90047 (1.90080)	Top-1 acc 28.125 (29.556)	Top-5 acc 53.125 (52.989)	lr 0.02498
Train [3][1180/3239]	Time 0.192 (0.491)	Data Time 0.002 (0.019)	Loss 4.2366 (4.0685)	Entropy 1.90047 (1.90080)	Top-1 acc 26.172 (29.550)	Top-5 acc 50.000 (52.980)	lr 0.02498
Train [3][1190/3239]	Time 0.154 (0.490)	Data Time 0.001 (0.019)	Loss 4.2393 (4.0684)	Entropy 1.90047 (1.90080)	Top-1 acc 27.344 (29.557)	Top-5 acc 48.047 (52.978)	lr 0.02498
Train [3][1200/3239]	Time 0.171 (0.489)	Data Time 0.001 (0.019)	Loss 4.0770 (4.0682)	Entropy 1.90046 (1.90080)	Top-1 acc 33.203 (29.571)	Top-5 acc 53.906 (52.984)	lr 0.02498
Train [3][1210/3239]	Time 0.163 (0.489)	Data Time 0.001 (0.019)	Loss 3.8442 (4.0676)	Entropy 1.90046 (1.90079)	Top-1 acc 30.859 (29.573)	Top-5 acc 58.594 (52.997)	lr 0.02498
Train [3][1220/3239]	Time 2.061 (0.488)	Data Time 0.001 (0.019)	Loss 4.0506 (4.0672)	Entropy 1.90046 (1.90079)	Top-1 acc 32.812 (29.585)	Top-5 acc 53.516 (53.011)	lr 0.02498
Train [3][1230/3239]	Time 0.223 (0.486)	Data Time 0.002 (0.019)	Loss 3.9340 (4.0671)	Entropy 1.90045 (1.90079)	Top-1 acc 30.469 (29.588)	Top-5 acc 55.859 (53.020)	lr 0.02498
Train [3][1240/3239]	Time 0.150 (0.485)	Data Time 0.002 (0.019)	Loss 4.0961 (4.0668)	Entropy 1.90045 (1.90078)	Top-1 acc 27.344 (29.596)	Top-5 acc 52.734 (53.030)	lr 0.02498
Train [3][1250/3239]	Time 0.171 (0.484)	Data Time 0.001 (0.018)	Loss 4.1639 (4.0668)	Entropy 1.90044 (1.90078)	Top-1 acc 28.516 (29.596)	Top-5 acc 50.781 (53.036)	lr 0.02498
Train [3][1260/3239]	Time 0.212 (0.484)	Data Time 0.001 (0.018)	Loss 4.0894 (4.0665)	Entropy 1.90044 (1.90078)	Top-1 acc 27.734 (29.603)	Top-5 acc 50.781 (53.038)	lr 0.02498
Train [3][1270/3239]	Time 0.194 (0.483)	Data Time 0.001 (0.018)	Loss 3.8720 (4.0661)	Entropy 1.90043 (1.90078)	Top-1 acc 30.859 (29.609)	Top-5 acc 57.812 (53.051)	lr 0.02498
Train [3][1280/3239]	Time 0.197 (0.482)	Data Time 0.001 (0.018)	Loss 4.0772 (4.0660)	Entropy 1.90042 (1.90077)	Top-1 acc 32.422 (29.611)	Top-5 acc 53.125 (53.057)	lr 0.02498
Train [3][1290/3239]	Time 0.284 (0.482)	Data Time 0.001 (0.018)	Loss 4.0730 (4.0663)	Entropy 1.90042 (1.90077)	Top-1 acc 28.125 (29.610)	Top-5 acc 51.562 (53.045)	lr 0.02498
Train [3][1300/3239]	Time 0.221 (0.481)	Data Time 0.001 (0.018)	Loss 4.1927 (4.0665)	Entropy 1.90041 (1.90077)	Top-1 acc 30.859 (29.598)	Top-5 acc 54.688 (53.040)	lr 0.02498
Train [3][1310/3239]	Time 0.200 (0.481)	Data Time 0.001 (0.018)	Loss 3.9970 (4.0664)	Entropy 1.90041 (1.90076)	Top-1 acc 33.203 (29.602)	Top-5 acc 51.562 (53.038)	lr 0.02498
Train [3][1320/3239]	Time 0.191 (0.480)	Data Time 0.002 (0.018)	Loss 3.9518 (4.0666)	Entropy 1.90041 (1.90076)	Top-1 acc 36.328 (29.606)	Top-5 acc 53.125 (53.036)	lr 0.02498
Train [3][1330/3239]	Time 2.148 (0.479)	Data Time 0.001 (0.017)	Loss 4.0844 (4.0666)	Entropy 1.90041 (1.90076)	Top-1 acc 32.812 (29.599)	Top-5 acc 54.297 (53.038)	lr 0.02498
Train [3][1340/3239]	Time 0.186 (0.477)	Data Time 0.001 (0.017)	Loss 4.0979 (4.0664)	Entropy 1.90040 (1.90076)	Top-1 acc 27.734 (29.609)	Top-5 acc 53.906 (53.048)	lr 0.02498
Train [3][1350/3239]	Time 0.189 (0.477)	Data Time 0.001 (0.017)	Loss 4.0517 (4.0668)	Entropy 1.90040 (1.90075)	Top-1 acc 28.125 (29.596)	Top-5 acc 52.344 (53.041)	lr 0.02497
Train [3][1360/3239]	Time 0.148 (0.476)	Data Time 0.001 (0.017)	Loss 4.1131 (4.0669)	Entropy 1.90039 (1.90075)	Top-1 acc 28.906 (29.596)	Top-5 acc 52.734 (53.047)	lr 0.02497
Train [3][1370/3239]	Time 0.222 (0.476)	Data Time 0.001 (0.017)	Loss 4.1382 (4.0673)	Entropy 1.90038 (1.90075)	Top-1 acc 27.734 (29.598)	Top-5 acc 49.219 (53.038)	lr 0.02497
Train [3][1380/3239]	Time 0.189 (0.475)	Data Time 0.001 (0.017)	Loss 4.0374 (4.0675)	Entropy 1.90038 (1.90075)	Top-1 acc 33.203 (29.601)	Top-5 acc 56.641 (53.036)	lr 0.02497
Train [3][1390/3239]	Time 0.214 (0.474)	Data Time 0.001 (0.017)	Loss 4.0184 (4.0676)	Entropy 1.90037 (1.90074)	Top-1 acc 32.812 (29.597)	Top-5 acc 56.641 (53.039)	lr 0.02497
Train [3][1400/3239]	Time 0.197 (0.474)	Data Time 0.001 (0.017)	Loss 4.0492 (4.0676)	Entropy 1.90037 (1.90074)	Top-1 acc 32.031 (29.592)	Top-5 acc 51.562 (53.038)	lr 0.02497
Train [3][1410/3239]	Time 0.203 (0.473)	Data Time 0.001 (0.017)	Loss 4.2316 (4.0678)	Entropy 1.90036 (1.90074)	Top-1 acc 27.734 (29.581)	Top-5 acc 50.000 (53.030)	lr 0.02497
Train [3][1420/3239]	Time 0.192 (0.473)	Data Time 0.001 (0.017)	Loss 4.0444 (4.0680)	Entropy 1.90036 (1.90074)	Top-1 acc 26.562 (29.579)	Top-5 acc 54.297 (53.028)	lr 0.02497
Train [3][1430/3239]	Time 0.273 (0.493)	Data Time 0.004 (0.016)	Loss 3.9098 (4.0675)	Entropy 1.90035 (1.90073)	Top-1 acc 32.031 (29.593)	Top-5 acc 58.594 (53.041)	lr 0.02497
Train [3][1440/3239]	Time 2.791 (0.494)	Data Time 0.002 (0.016)	Loss 4.0248 (4.0671)	Entropy 1.90035 (1.90073)	Top-1 acc 30.078 (29.593)	Top-5 acc 56.641 (53.051)	lr 0.02497
Train [3][1450/3239]	Time 0.308 (0.492)	Data Time 0.002 (0.016)	Loss 3.7766 (4.0671)	Entropy 1.90035 (1.90073)	Top-1 acc 35.938 (29.592)	Top-5 acc 57.422 (53.045)	lr 0.02497
Train [3][1460/3239]	Time 0.224 (0.491)	Data Time 0.001 (0.016)	Loss 4.1855 (4.0671)	Entropy 1.90035 (1.90073)	Top-1 acc 25.391 (29.586)	Top-5 acc 50.781 (53.047)	lr 0.02497
Train [3][1470/3239]	Time 0.152 (0.490)	Data Time 0.001 (0.016)	Loss 4.0132 (4.0670)	Entropy 1.90034 (1.90072)	Top-1 acc 31.641 (29.588)	Top-5 acc 48.828 (53.038)	lr 0.02497
Train [3][1480/3239]	Time 0.203 (0.490)	Data Time 0.001 (0.016)	Loss 4.0164 (4.0666)	Entropy 1.90034 (1.90072)	Top-1 acc 28.516 (29.589)	Top-5 acc 55.469 (53.050)	lr 0.02497
Train [3][1490/3239]	Time 0.182 (0.489)	Data Time 0.001 (0.016)	Loss 3.8359 (4.0667)	Entropy 1.90034 (1.90072)	Top-1 acc 33.203 (29.590)	Top-5 acc 57.812 (53.056)	lr 0.02497
Train [3][1500/3239]	Time 0.221 (0.489)	Data Time 0.001 (0.016)	Loss 3.9299 (4.0669)	Entropy 1.90033 (1.90071)	Top-1 acc 29.688 (29.590)	Top-5 acc 55.078 (53.050)	lr 0.02497
Train [3][1510/3239]	Time 0.155 (0.488)	Data Time 0.001 (0.016)	Loss 3.8361 (4.0669)	Entropy 1.90032 (1.90071)	Top-1 acc 33.984 (29.602)	Top-5 acc 58.203 (53.052)	lr 0.02497
Train [3][1520/3239]	Time 0.139 (0.488)	Data Time 0.001 (0.016)	Loss 3.9922 (4.0666)	Entropy 1.90032 (1.90071)	Top-1 acc 32.031 (29.610)	Top-5 acc 51.172 (53.059)	lr 0.02497
Train [3][1530/3239]	Time 0.230 (0.487)	Data Time 0.002 (0.016)	Loss 4.1200 (4.0666)	Entropy 1.90030 (1.90071)	Top-1 acc 27.734 (29.610)	Top-5 acc 48.828 (53.060)	lr 0.02497
Train [3][1540/3239]	Time 0.218 (0.487)	Data Time 0.002 (0.016)	Loss 3.9503 (4.0662)	Entropy 1.90030 (1.90070)	Top-1 acc 32.031 (29.620)	Top-5 acc 53.906 (53.070)	lr 0.02497
Train [3][1550/3239]	Time 2.112 (0.486)	Data Time 0.001 (0.015)	Loss 4.0014 (4.0663)	Entropy 1.90030 (1.90070)	Top-1 acc 32.031 (29.614)	Top-5 acc 54.297 (53.066)	lr 0.02497
Train [3][1560/3239]	Time 0.195 (0.484)	Data Time 0.001 (0.015)	Loss 4.1195 (4.0660)	Entropy 1.90029 (1.90070)	Top-1 acc 30.469 (29.618)	Top-5 acc 50.000 (53.070)	lr 0.02497
Train [3][1570/3239]	Time 0.148 (0.484)	Data Time 0.001 (0.015)	Loss 4.0672 (4.0662)	Entropy 1.90028 (1.90070)	Top-1 acc 35.547 (29.624)	Top-5 acc 51.172 (53.064)	lr 0.02497
Train [3][1580/3239]	Time 0.288 (0.483)	Data Time 0.001 (0.015)	Loss 3.9903 (4.0660)	Entropy 1.90028 (1.90069)	Top-1 acc 30.078 (29.630)	Top-5 acc 57.422 (53.078)	lr 0.02497
Train [3][1590/3239]	Time 0.232 (0.483)	Data Time 0.001 (0.015)	Loss 4.3206 (4.0659)	Entropy 1.90027 (1.90069)	Top-1 acc 22.656 (29.635)	Top-5 acc 47.656 (53.081)	lr 0.02497
Train [3][1600/3239]	Time 0.182 (0.482)	Data Time 0.001 (0.015)	Loss 4.1027 (4.0656)	Entropy 1.90026 (1.90069)	Top-1 acc 27.344 (29.638)	Top-5 acc 53.125 (53.088)	lr 0.02497
Train [3][1610/3239]	Time 0.246 (0.482)	Data Time 0.001 (0.015)	Loss 3.9832 (4.0656)	Entropy 1.90025 (1.90069)	Top-1 acc 32.031 (29.645)	Top-5 acc 52.734 (53.094)	lr 0.02497
Train [3][1620/3239]	Time 0.202 (0.481)	Data Time 0.001 (0.015)	Loss 3.8941 (4.0656)	Entropy 1.90023 (1.90068)	Top-1 acc 35.156 (29.643)	Top-5 acc 55.078 (53.095)	lr 0.02497
Train [3][1630/3239]	Time 0.169 (0.481)	Data Time 0.001 (0.015)	Loss 4.0219 (4.0655)	Entropy 1.90023 (1.90068)	Top-1 acc 31.641 (29.646)	Top-5 acc 50.391 (53.092)	lr 0.02497
Train [3][1640/3239]	Time 0.231 (0.480)	Data Time 0.001 (0.015)	Loss 3.9976 (4.0655)	Entropy 1.90022 (1.90068)	Top-1 acc 31.250 (29.645)	Top-5 acc 55.859 (53.090)	lr 0.02497
Train [3][1650/3239]	Time 0.195 (0.480)	Data Time 0.001 (0.015)	Loss 4.1113 (4.0658)	Entropy 1.90022 (1.90067)	Top-1 acc 26.172 (29.639)	Top-5 acc 53.516 (53.084)	lr 0.02497
Train [3][1660/3239]	Time 2.138 (0.479)	Data Time 0.001 (0.015)	Loss 4.0758 (4.0657)	Entropy 1.90022 (1.90067)	Top-1 acc 28.516 (29.642)	Top-5 acc 55.859 (53.088)	lr 0.02497
Train [3][1670/3239]	Time 0.203 (0.478)	Data Time 0.001 (0.015)	Loss 3.9226 (4.0657)	Entropy 1.90021 (1.90067)	Top-1 acc 34.766 (29.641)	Top-5 acc 55.078 (53.084)	lr 0.02497
Train [3][1680/3239]	Time 0.159 (0.477)	Data Time 0.001 (0.014)	Loss 3.9419 (4.0657)	Entropy 1.90020 (1.90067)	Top-1 acc 26.172 (29.642)	Top-5 acc 54.688 (53.085)	lr 0.02497
Train [3][1690/3239]	Time 0.167 (0.477)	Data Time 0.001 (0.014)	Loss 4.0710 (4.0656)	Entropy 1.90019 (1.90066)	Top-1 acc 32.031 (29.645)	Top-5 acc 51.562 (53.087)	lr 0.02497
Train [3][1700/3239]	Time 0.164 (0.477)	Data Time 0.001 (0.014)	Loss 4.1626 (4.0658)	Entropy 1.90019 (1.90066)	Top-1 acc 23.438 (29.635)	Top-5 acc 51.172 (53.082)	lr 0.02497
Train [3][1710/3239]	Time 0.331 (0.476)	Data Time 0.001 (0.014)	Loss 3.9755 (4.0664)	Entropy 1.90019 (1.90066)	Top-1 acc 33.594 (29.625)	Top-5 acc 56.641 (53.070)	lr 0.02497
Train [3][1720/3239]	Time 0.147 (0.476)	Data Time 0.001 (0.014)	Loss 4.1917 (4.0663)	Entropy 1.90017 (1.90066)	Top-1 acc 29.297 (29.624)	Top-5 acc 51.172 (53.069)	lr 0.02497
Train [3][1730/3239]	Time 0.189 (0.475)	Data Time 0.001 (0.014)	Loss 4.2512 (4.0662)	Entropy 1.90016 (1.90065)	Top-1 acc 31.250 (29.626)	Top-5 acc 50.000 (53.069)	lr 0.02497
Train [3][1740/3239]	Time 0.234 (0.475)	Data Time 0.001 (0.014)	Loss 4.1772 (4.0662)	Entropy 1.90015 (1.90065)	Top-1 acc 25.391 (29.626)	Top-5 acc 50.391 (53.069)	lr 0.02497
Train [3][1750/3239]	Time 0.188 (0.474)	Data Time 0.001 (0.014)	Loss 3.9758 (4.0661)	Entropy 1.90015 (1.90065)	Top-1 acc 30.469 (29.629)	Top-5 acc 57.031 (53.070)	lr 0.02497
Train [3][1760/3239]	Time 0.241 (0.474)	Data Time 0.001 (0.014)	Loss 4.1556 (4.0662)	Entropy 1.90013 (1.90064)	Top-1 acc 27.344 (29.630)	Top-5 acc 48.828 (53.066)	lr 0.02497
Train [3][1770/3239]	Time 2.257 (0.474)	Data Time 0.001 (0.014)	Loss 4.0304 (4.0662)	Entropy 1.90013 (1.90064)	Top-1 acc 27.734 (29.622)	Top-5 acc 51.562 (53.059)	lr 0.02497
Train [3][1780/3239]	Time 0.235 (0.472)	Data Time 0.001 (0.014)	Loss 3.9305 (4.0663)	Entropy 1.90012 (1.90064)	Top-1 acc 30.469 (29.617)	Top-5 acc 58.594 (53.059)	lr 0.02497
Train [3][1790/3239]	Time 0.133 (0.472)	Data Time 0.001 (0.014)	Loss 3.9540 (4.0663)	Entropy 1.90012 (1.90064)	Top-1 acc 33.203 (29.621)	Top-5 acc 57.812 (53.057)	lr 0.02497
Train [3][1800/3239]	Time 0.237 (0.490)	Data Time 0.002 (0.014)	Loss 4.1845 (4.0666)	Entropy 1.90012 (1.90063)	Top-1 acc 27.344 (29.616)	Top-5 acc 46.094 (53.048)	lr 0.02497
Train [3][1810/3239]	Time 0.223 (0.489)	Data Time 0.002 (0.014)	Loss 3.9310 (4.0664)	Entropy 1.90011 (1.90063)	Top-1 acc 34.375 (29.621)	Top-5 acc 57.422 (53.051)	lr 0.02497
Train [3][1820/3239]	Time 0.200 (0.489)	Data Time 0.002 (0.014)	Loss 4.2459 (4.0666)	Entropy 1.90010 (1.90063)	Top-1 acc 28.906 (29.612)	Top-5 acc 51.172 (53.052)	lr 0.02497
Train [3][1830/3239]	Time 0.202 (0.488)	Data Time 0.001 (0.014)	Loss 4.0591 (4.0662)	Entropy 1.90010 (1.90062)	Top-1 acc 28.125 (29.617)	Top-5 acc 52.344 (53.057)	lr 0.02497
Train [3][1840/3239]	Time 0.299 (0.488)	Data Time 0.001 (0.014)	Loss 4.1551 (4.0663)	Entropy 1.90010 (1.90062)	Top-1 acc 29.688 (29.611)	Top-5 acc 50.391 (53.052)	lr 0.02497
Train [3][1850/3239]	Time 0.179 (0.488)	Data Time 0.001 (0.013)	Loss 4.0165 (4.0665)	Entropy 1.90009 (1.90062)	Top-1 acc 31.641 (29.611)	Top-5 acc 53.125 (53.047)	lr 0.02497
Train [3][1860/3239]	Time 0.210 (0.487)	Data Time 0.001 (0.013)	Loss 4.4289 (4.0671)	Entropy 1.90008 (1.90062)	Top-1 acc 20.703 (29.599)	Top-5 acc 44.922 (53.034)	lr 0.02497
Train [3][1870/3239]	Time 0.250 (0.487)	Data Time 0.001 (0.013)	Loss 4.0363 (4.0669)	Entropy 1.90007 (1.90061)	Top-1 acc 30.469 (29.601)	Top-5 acc 55.859 (53.038)	lr 0.02497
Train [3][1880/3239]	Time 2.508 (0.487)	Data Time 0.001 (0.013)	Loss 3.9127 (4.0666)	Entropy 1.90007 (1.90061)	Top-1 acc 32.812 (29.604)	Top-5 acc 58.203 (53.044)	lr 0.02497
Train [3][1890/3239]	Time 0.262 (0.485)	Data Time 0.003 (0.013)	Loss 4.0313 (4.0667)	Entropy 1.90006 (1.90061)	Top-1 acc 29.688 (29.600)	Top-5 acc 53.906 (53.040)	lr 0.02497
Train [3][1900/3239]	Time 0.236 (0.485)	Data Time 0.001 (0.013)	Loss 4.3356 (4.0668)	Entropy 1.90005 (1.90060)	Top-1 acc 29.688 (29.601)	Top-5 acc 46.094 (53.038)	lr 0.02497
Train [3][1910/3239]	Time 0.233 (0.485)	Data Time 0.001 (0.013)	Loss 4.0823 (4.0666)	Entropy 1.90005 (1.90060)	Top-1 acc 26.562 (29.603)	Top-5 acc 53.125 (53.040)	lr 0.02497
Train [3][1920/3239]	Time 0.201 (0.484)	Data Time 0.001 (0.013)	Loss 3.9814 (4.0662)	Entropy 1.90005 (1.90060)	Top-1 acc 34.375 (29.614)	Top-5 acc 55.859 (53.043)	lr 0.02497
Train [3][1930/3239]	Time 0.191 (0.484)	Data Time 0.001 (0.013)	Loss 3.8604 (4.0660)	Entropy 1.90004 (1.90060)	Top-1 acc 31.641 (29.614)	Top-5 acc 59.375 (53.053)	lr 0.02497
Train [3][1940/3239]	Time 0.238 (0.484)	Data Time 0.001 (0.013)	Loss 4.0936 (4.0661)	Entropy 1.90002 (1.90059)	Top-1 acc 29.297 (29.611)	Top-5 acc 54.297 (53.048)	lr 0.02497
Train [3][1950/3239]	Time 0.144 (0.483)	Data Time 0.001 (0.013)	Loss 4.0634 (4.0659)	Entropy 1.90002 (1.90059)	Top-1 acc 31.641 (29.615)	Top-5 acc 52.734 (53.060)	lr 0.02497
Train [3][1960/3239]	Time 0.224 (0.483)	Data Time 0.001 (0.013)	Loss 3.9423 (4.0657)	Entropy 1.90001 (1.90059)	Top-1 acc 29.688 (29.618)	Top-5 acc 56.641 (53.066)	lr 0.02497
Train [3][1970/3239]	Time 0.203 (0.482)	Data Time 0.001 (0.013)	Loss 4.0801 (4.0655)	Entropy 1.90000 (1.90058)	Top-1 acc 30.469 (29.625)	Top-5 acc 53.125 (53.072)	lr 0.02497
Train [3][1980/3239]	Time 0.214 (0.482)	Data Time 0.001 (0.013)	Loss 3.8864 (4.0655)	Entropy 1.89999 (1.90058)	Top-1 acc 32.422 (29.621)	Top-5 acc 57.812 (53.073)	lr 0.02497
Train [3][1990/3239]	Time 2.390 (0.482)	Data Time 0.002 (0.013)	Loss 4.0946 (4.0655)	Entropy 1.89999 (1.90058)	Top-1 acc 32.031 (29.618)	Top-5 acc 53.516 (53.071)	lr 0.02497
Train [3][2000/3239]	Time 0.160 (0.481)	Data Time 0.001 (0.013)	Loss 4.1262 (4.0656)	Entropy 1.89999 (1.90057)	Top-1 acc 29.297 (29.619)	Top-5 acc 50.000 (53.068)	lr 0.02497
Train [3][2010/3239]	Time 0.222 (0.480)	Data Time 0.002 (0.013)	Loss 4.1282 (4.0658)	Entropy 1.89998 (1.90057)	Top-1 acc 23.438 (29.610)	Top-5 acc 48.438 (53.065)	lr 0.02497
Train [3][2020/3239]	Time 0.197 (0.480)	Data Time 0.001 (0.013)	Loss 4.1730 (4.0657)	Entropy 1.89998 (1.90057)	Top-1 acc 26.172 (29.603)	Top-5 acc 52.344 (53.067)	lr 0.02497
Train [3][2030/3239]	Time 0.230 (0.480)	Data Time 0.001 (0.012)	Loss 3.9796 (4.0656)	Entropy 1.89997 (1.90057)	Top-1 acc 29.297 (29.607)	Top-5 acc 59.766 (53.072)	lr 0.02497
Train [3][2040/3239]	Time 0.216 (0.480)	Data Time 0.002 (0.012)	Loss 3.9711 (4.0653)	Entropy 1.89996 (1.90056)	Top-1 acc 29.297 (29.609)	Top-5 acc 54.297 (53.080)	lr 0.02497
Train [3][2050/3239]	Time 0.203 (0.479)	Data Time 0.001 (0.012)	Loss 3.9762 (4.0652)	Entropy 1.89995 (1.90056)	Top-1 acc 31.250 (29.611)	Top-5 acc 53.125 (53.083)	lr 0.02497
Train [3][2060/3239]	Time 0.236 (0.479)	Data Time 0.001 (0.012)	Loss 3.9592 (4.0652)	Entropy 1.89994 (1.90056)	Top-1 acc 33.203 (29.615)	Top-5 acc 53.516 (53.082)	lr 0.02497
Train [3][2070/3239]	Time 0.235 (0.479)	Data Time 0.001 (0.012)	Loss 4.1835 (4.0649)	Entropy 1.89994 (1.90055)	Top-1 acc 26.172 (29.625)	Top-5 acc 50.391 (53.087)	lr 0.02497
Train [3][2080/3239]	Time 0.176 (0.478)	Data Time 0.001 (0.012)	Loss 4.0215 (4.0649)	Entropy 1.89993 (1.90055)	Top-1 acc 31.250 (29.632)	Top-5 acc 55.078 (53.088)	lr 0.02497
Train [3][2090/3239]	Time 0.207 (0.478)	Data Time 0.001 (0.012)	Loss 3.9269 (4.0647)	Entropy 1.89991 (1.90055)	Top-1 acc 32.422 (29.639)	Top-5 acc 57.031 (53.092)	lr 0.02497
Train [3][2100/3239]	Time 2.163 (0.477)	Data Time 0.001 (0.012)	Loss 4.1572 (4.0648)	Entropy 1.89991 (1.90054)	Top-1 acc 26.172 (29.635)	Top-5 acc 50.391 (53.087)	lr 0.02497
Train [3][2110/3239]	Time 0.183 (0.476)	Data Time 0.001 (0.012)	Loss 4.0427 (4.0650)	Entropy 1.89990 (1.90054)	Top-1 acc 25.391 (29.629)	Top-5 acc 55.078 (53.079)	lr 0.02497
Train [3][2120/3239]	Time 0.257 (0.476)	Data Time 0.001 (0.012)	Loss 4.0036 (4.0651)	Entropy 1.89989 (1.90054)	Top-1 acc 27.734 (29.625)	Top-5 acc 55.859 (53.078)	lr 0.02497
Train [3][2130/3239]	Time 0.178 (0.475)	Data Time 0.001 (0.012)	Loss 3.9136 (4.0651)	Entropy 1.89989 (1.90054)	Top-1 acc 34.375 (29.627)	Top-5 acc 57.422 (53.078)	lr 0.02497
Train [3][2140/3239]	Time 0.252 (0.475)	Data Time 0.001 (0.012)	Loss 3.7192 (4.0650)	Entropy 1.89988 (1.90053)	Top-1 acc 35.938 (29.633)	Top-5 acc 62.109 (53.076)	lr 0.02497
Train [3][2150/3239]	Time 0.287 (0.475)	Data Time 0.001 (0.012)	Loss 4.0476 (4.0654)	Entropy 1.89987 (1.90053)	Top-1 acc 29.688 (29.628)	Top-5 acc 50.391 (53.066)	lr 0.02497
Train [3][2160/3239]	Time 0.252 (0.490)	Data Time 0.002 (0.012)	Loss 3.8562 (4.0654)	Entropy 1.89986 (1.90053)	Top-1 acc 35.938 (29.629)	Top-5 acc 60.156 (53.070)	lr 0.02497
Train [3][2170/3239]	Time 0.190 (0.490)	Data Time 0.002 (0.012)	Loss 3.9384 (4.0652)	Entropy 1.89985 (1.90052)	Top-1 acc 33.203 (29.637)	Top-5 acc 55.859 (53.078)	lr 0.02497
Train [3][2180/3239]	Time 0.203 (0.489)	Data Time 0.001 (0.012)	Loss 3.8375 (4.0650)	Entropy 1.89985 (1.90052)	Top-1 acc 36.719 (29.642)	Top-5 acc 58.203 (53.085)	lr 0.02497
Train [3][2190/3239]	Time 0.201 (0.489)	Data Time 0.001 (0.012)	Loss 4.2687 (4.0653)	Entropy 1.89985 (1.90052)	Top-1 acc 26.172 (29.638)	Top-5 acc 47.656 (53.076)	lr 0.02497
Train [3][2200/3239]	Time 0.208 (0.488)	Data Time 0.001 (0.012)	Loss 4.0669 (4.0651)	Entropy 1.89985 (1.90051)	Top-1 acc 28.125 (29.636)	Top-5 acc 53.906 (53.083)	lr 0.02497
Train [3][2210/3239]	Time 2.233 (0.488)	Data Time 0.002 (0.012)	Loss 3.9398 (4.0649)	Entropy 1.89985 (1.90051)	Top-1 acc 30.078 (29.638)	Top-5 acc 55.469 (53.086)	lr 0.02497
Train [3][2220/3239]	Time 0.131 (0.487)	Data Time 0.001 (0.012)	Loss 4.0680 (4.0647)	Entropy 1.89983 (1.90051)	Top-1 acc 28.906 (29.640)	Top-5 acc 51.953 (53.090)	lr 0.02497
Train [3][2230/3239]	Time 0.190 (0.486)	Data Time 0.001 (0.012)	Loss 4.1621 (4.0646)	Entropy 1.89982 (1.90051)	Top-1 acc 26.172 (29.642)	Top-5 acc 52.344 (53.094)	lr 0.02497
Train [3][2240/3239]	Time 0.131 (0.486)	Data Time 0.001 (0.012)	Loss 4.1746 (4.0644)	Entropy 1.89982 (1.90050)	Top-1 acc 27.344 (29.647)	Top-5 acc 52.344 (53.099)	lr 0.02497
Train [3][2250/3239]	Time 0.129 (0.485)	Data Time 0.001 (0.012)	Loss 3.9044 (4.0644)	Entropy 1.89981 (1.90050)	Top-1 acc 37.109 (29.645)	Top-5 acc 57.812 (53.103)	lr 0.02497
Train [3][2260/3239]	Time 0.169 (0.485)	Data Time 0.001 (0.011)	Loss 4.1985 (4.0646)	Entropy 1.89980 (1.90050)	Top-1 acc 24.609 (29.641)	Top-5 acc 47.656 (53.092)	lr 0.02497
Train [3][2270/3239]	Time 0.211 (0.485)	Data Time 0.002 (0.011)	Loss 4.0865 (4.0648)	Entropy 1.89980 (1.90049)	Top-1 acc 28.906 (29.635)	Top-5 acc 53.906 (53.087)	lr 0.02497
Train [3][2280/3239]	Time 0.197 (0.484)	Data Time 0.001 (0.011)	Loss 4.0911 (4.0647)	Entropy 1.89979 (1.90049)	Top-1 acc 31.250 (29.635)	Top-5 acc 52.734 (53.087)	lr 0.02497
Train [3][2290/3239]	Time 0.243 (0.484)	Data Time 0.001 (0.011)	Loss 4.1999 (4.0647)	Entropy 1.89978 (1.90049)	Top-1 acc 24.609 (29.637)	Top-5 acc 51.172 (53.091)	lr 0.02497
Train [3][2300/3239]	Time 0.353 (0.484)	Data Time 0.001 (0.011)	Loss 3.9266 (4.0647)	Entropy 1.89978 (1.90048)	Top-1 acc 31.250 (29.637)	Top-5 acc 54.297 (53.091)	lr 0.02497
Train [3][2310/3239]	Time 0.173 (0.483)	Data Time 0.001 (0.011)	Loss 4.0392 (4.0646)	Entropy 1.89975 (1.90048)	Top-1 acc 32.031 (29.638)	Top-5 acc 54.297 (53.094)	lr 0.02497
Train [3][2320/3239]	Time 2.156 (0.483)	Data Time 0.001 (0.011)	Loss 3.8900 (4.0645)	Entropy 1.89975 (1.90048)	Top-1 acc 35.156 (29.639)	Top-5 acc 55.469 (53.095)	lr 0.02497
Train [3][2330/3239]	Time 0.208 (0.482)	Data Time 0.001 (0.011)	Loss 4.1990 (4.0647)	Entropy 1.89973 (1.90047)	Top-1 acc 28.516 (29.636)	Top-5 acc 50.781 (53.090)	lr 0.02497
Train [3][2340/3239]	Time 0.176 (0.481)	Data Time 0.001 (0.011)	Loss 3.9763 (4.0644)	Entropy 1.89972 (1.90047)	Top-1 acc 33.203 (29.645)	Top-5 acc 54.297 (53.096)	lr 0.02497
Train [3][2350/3239]	Time 0.194 (0.481)	Data Time 0.001 (0.011)	Loss 4.0959 (4.0643)	Entropy 1.89970 (1.90047)	Top-1 acc 26.953 (29.650)	Top-5 acc 49.609 (53.094)	lr 0.02497
Train [3][2360/3239]	Time 0.185 (0.481)	Data Time 0.001 (0.011)	Loss 3.9320 (4.0639)	Entropy 1.89969 (1.90046)	Top-1 acc 30.078 (29.656)	Top-5 acc 56.250 (53.106)	lr 0.02497
Train [3][2370/3239]	Time 0.157 (0.480)	Data Time 0.001 (0.011)	Loss 4.0780 (4.0637)	Entropy 1.89968 (1.90046)	Top-1 acc 29.688 (29.662)	Top-5 acc 47.266 (53.105)	lr 0.02497
Train [3][2380/3239]	Time 0.202 (0.480)	Data Time 0.001 (0.011)	Loss 4.0544 (4.0637)	Entropy 1.89968 (1.90046)	Top-1 acc 29.297 (29.665)	Top-5 acc 53.516 (53.107)	lr 0.02497
Train [3][2390/3239]	Time 0.237 (0.480)	Data Time 0.002 (0.011)	Loss 3.8012 (4.0639)	Entropy 1.89967 (1.90045)	Top-1 acc 31.641 (29.663)	Top-5 acc 58.984 (53.102)	lr 0.02497
Train [3][2400/3239]	Time 0.219 (0.480)	Data Time 0.001 (0.011)	Loss 4.0031 (4.0639)	Entropy 1.89966 (1.90045)	Top-1 acc 34.375 (29.667)	Top-5 acc 54.688 (53.101)	lr 0.02497
Train [3][2410/3239]	Time 0.193 (0.479)	Data Time 0.001 (0.011)	Loss 3.9792 (4.0639)	Entropy 1.89965 (1.90045)	Top-1 acc 31.641 (29.673)	Top-5 acc 55.469 (53.106)	lr 0.02497
Train [3][2420/3239]	Time 0.227 (0.479)	Data Time 0.002 (0.011)	Loss 3.9023 (4.0639)	Entropy 1.89964 (1.90044)	Top-1 acc 26.953 (29.672)	Top-5 acc 57.812 (53.104)	lr 0.02497
Train [3][2430/3239]	Time 2.151 (0.479)	Data Time 0.001 (0.011)	Loss 3.8818 (4.0641)	Entropy 1.89964 (1.90044)	Top-1 acc 34.375 (29.666)	Top-5 acc 55.859 (53.099)	lr 0.02497
Train [3][2440/3239]	Time 0.214 (0.478)	Data Time 0.001 (0.011)	Loss 4.1164 (4.0640)	Entropy 1.89964 (1.90044)	Top-1 acc 32.031 (29.667)	Top-5 acc 53.125 (53.102)	lr 0.02497
Train [3][2450/3239]	Time 0.208 (0.477)	Data Time 0.001 (0.011)	Loss 4.2185 (4.0639)	Entropy 1.89963 (1.90043)	Top-1 acc 27.344 (29.667)	Top-5 acc 48.047 (53.100)	lr 0.02497
Train [3][2460/3239]	Time 0.223 (0.477)	Data Time 0.001 (0.011)	Loss 3.9856 (4.0638)	Entropy 1.89962 (1.90043)	Top-1 acc 32.422 (29.665)	Top-5 acc 53.125 (53.099)	lr 0.02497
Train [3][2470/3239]	Time 0.176 (0.477)	Data Time 0.001 (0.011)	Loss 4.1301 (4.0637)	Entropy 1.89959 (1.90043)	Top-1 acc 29.688 (29.670)	Top-5 acc 50.391 (53.099)	lr 0.02497
Train [3][2480/3239]	Time 0.260 (0.476)	Data Time 0.001 (0.011)	Loss 4.1730 (4.0634)	Entropy 1.89958 (1.90042)	Top-1 acc 30.469 (29.680)	Top-5 acc 51.562 (53.104)	lr 0.02497
Train [3][2490/3239]	Time 0.211 (0.476)	Data Time 0.001 (0.011)	Loss 4.0470 (4.0636)	Entropy 1.89958 (1.90042)	Top-1 acc 27.344 (29.679)	Top-5 acc 53.516 (53.103)	lr 0.02497
Train [3][2500/3239]	Time 0.212 (0.476)	Data Time 0.002 (0.011)	Loss 4.1392 (4.0637)	Entropy 1.89956 (1.90042)	Top-1 acc 28.125 (29.677)	Top-5 acc 51.953 (53.101)	lr 0.02497
Train [3][2510/3239]	Time 0.150 (0.476)	Data Time 0.002 (0.011)	Loss 4.1523 (4.0637)	Entropy 1.89955 (1.90041)	Top-1 acc 28.516 (29.674)	Top-5 acc 52.344 (53.103)	lr 0.02497
Train [3][2520/3239]	Time 0.292 (0.487)	Data Time 0.003 (0.011)	Loss 4.0738 (4.0635)	Entropy 1.89955 (1.90041)	Top-1 acc 32.812 (29.679)	Top-5 acc 58.594 (53.107)	lr 0.02497
Train [3][2530/3239]	Time 0.197 (0.487)	Data Time 0.002 (0.011)	Loss 4.0477 (4.0635)	Entropy 1.89954 (1.90041)	Top-1 acc 26.562 (29.675)	Top-5 acc 55.078 (53.106)	lr 0.02497
Train [3][2540/3239]	Time 2.204 (0.487)	Data Time 0.002 (0.011)	Loss 3.9826 (4.0636)	Entropy 1.89954 (1.90040)	Top-1 acc 32.812 (29.678)	Top-5 acc 54.688 (53.107)	lr 0.02497
Train [3][2550/3239]	Time 0.205 (0.486)	Data Time 0.002 (0.011)	Loss 4.0786 (4.0635)	Entropy 1.89954 (1.90040)	Top-1 acc 33.984 (29.682)	Top-5 acc 53.906 (53.109)	lr 0.02497
Train [3][2560/3239]	Time 0.209 (0.486)	Data Time 0.001 (0.010)	Loss 3.9502 (4.0634)	Entropy 1.89954 (1.90040)	Top-1 acc 32.812 (29.688)	Top-5 acc 53.516 (53.110)	lr 0.02497
Train [3][2570/3239]	Time 0.252 (0.485)	Data Time 0.001 (0.010)	Loss 4.0138 (4.0633)	Entropy 1.89952 (1.90039)	Top-1 acc 30.078 (29.686)	Top-5 acc 54.297 (53.109)	lr 0.02497
Train [3][2580/3239]	Time 0.148 (0.485)	Data Time 0.001 (0.010)	Loss 4.2516 (4.0637)	Entropy 1.89952 (1.90039)	Top-1 acc 26.562 (29.680)	Top-5 acc 47.266 (53.094)	lr 0.02497
Train [3][2590/3239]	Time 0.195 (0.485)	Data Time 0.001 (0.010)	Loss 4.0330 (4.0638)	Entropy 1.89950 (1.90039)	Top-1 acc 34.375 (29.682)	Top-5 acc 54.688 (53.092)	lr 0.02497
Train [3][2600/3239]	Time 0.155 (0.484)	Data Time 0.001 (0.010)	Loss 4.2494 (4.0639)	Entropy 1.89950 (1.90038)	Top-1 acc 26.953 (29.685)	Top-5 acc 49.609 (53.089)	lr 0.02497
Train [3][2610/3239]	Time 0.308 (0.484)	Data Time 0.001 (0.010)	Loss 4.0775 (4.0637)	Entropy 1.89949 (1.90038)	Top-1 acc 32.812 (29.690)	Top-5 acc 52.734 (53.096)	lr 0.02497
Train [3][2620/3239]	Time 0.191 (0.484)	Data Time 0.001 (0.010)	Loss 4.1880 (4.0636)	Entropy 1.89947 (1.90038)	Top-1 acc 27.344 (29.691)	Top-5 acc 52.734 (53.095)	lr 0.02497
Train [3][2630/3239]	Time 0.133 (0.484)	Data Time 0.002 (0.010)	Loss 4.2582 (4.0640)	Entropy 1.89947 (1.90037)	Top-1 acc 27.344 (29.687)	Top-5 acc 45.703 (53.085)	lr 0.02497
Train [3][2640/3239]	Time 0.209 (0.483)	Data Time 0.002 (0.010)	Loss 4.0106 (4.0640)	Entropy 1.89945 (1.90037)	Top-1 acc 30.859 (29.684)	Top-5 acc 53.125 (53.086)	lr 0.02497
Train [3][2650/3239]	Time 0.188 (0.483)	Data Time 0.001 (0.010)	Loss 3.9015 (4.0640)	Entropy 1.89944 (1.90037)	Top-1 acc 29.297 (29.685)	Top-5 acc 59.766 (53.088)	lr 0.02497
Train [3][2660/3239]	Time 0.208 (0.483)	Data Time 0.001 (0.010)	Loss 3.8936 (4.0637)	Entropy 1.89943 (1.90036)	Top-1 acc 32.031 (29.690)	Top-5 acc 55.469 (53.091)	lr 0.02497
Train [3][2670/3239]	Time 0.222 (0.482)	Data Time 0.001 (0.010)	Loss 4.0184 (4.0638)	Entropy 1.89942 (1.90036)	Top-1 acc 32.812 (29.692)	Top-5 acc 54.297 (53.090)	lr 0.02497
Train [3][2680/3239]	Time 0.151 (0.482)	Data Time 0.002 (0.010)	Loss 4.3318 (4.0638)	Entropy 1.89942 (1.90036)	Top-1 acc 25.781 (29.688)	Top-5 acc 46.094 (53.091)	lr 0.02497
Train [3][2690/3239]	Time 0.224 (0.482)	Data Time 0.001 (0.010)	Loss 4.0519 (4.0639)	Entropy 1.89941 (1.90035)	Top-1 acc 32.031 (29.686)	Top-5 acc 54.297 (53.083)	lr 0.02497
Train [3][2700/3239]	Time 0.258 (0.482)	Data Time 0.002 (0.010)	Loss 3.8453 (4.0638)	Entropy 1.89941 (1.90035)	Top-1 acc 37.109 (29.689)	Top-5 acc 56.641 (53.085)	lr 0.02497
Train [3][2710/3239]	Time 0.226 (0.481)	Data Time 0.001 (0.010)	Loss 3.8847 (4.0638)	Entropy 1.89938 (1.90035)	Top-1 acc 34.375 (29.689)	Top-5 acc 58.594 (53.087)	lr 0.02497
Train [3][2720/3239]	Time 0.235 (0.481)	Data Time 0.001 (0.010)	Loss 3.9361 (4.0635)	Entropy 1.89937 (1.90034)	Top-1 acc 34.766 (29.694)	Top-5 acc 56.641 (53.095)	lr 0.02497
Train [3][2730/3239]	Time 0.152 (0.481)	Data Time 0.001 (0.010)	Loss 3.9556 (4.0634)	Entropy 1.89936 (1.90034)	Top-1 acc 31.250 (29.693)	Top-5 acc 55.469 (53.092)	lr 0.02497
Train [3][2740/3239]	Time 0.261 (0.480)	Data Time 0.001 (0.010)	Loss 3.8593 (4.0633)	Entropy 1.89936 (1.90034)	Top-1 acc 35.547 (29.693)	Top-5 acc 57.812 (53.095)	lr 0.02497
Train [3][2750/3239]	Time 0.154 (0.480)	Data Time 0.001 (0.010)	Loss 4.0782 (4.0633)	Entropy 1.89936 (1.90033)	Top-1 acc 25.781 (29.692)	Top-5 acc 50.391 (53.097)	lr 0.02497
Train [3][2760/3239]	Time 0.309 (0.480)	Data Time 0.001 (0.010)	Loss 4.0085 (4.0630)	Entropy 1.89935 (1.90033)	Top-1 acc 35.156 (29.696)	Top-5 acc 52.344 (53.106)	lr 0.02497
Train [3][2770/3239]	Time 0.191 (0.480)	Data Time 0.001 (0.010)	Loss 4.2352 (4.0631)	Entropy 1.89935 (1.90032)	Top-1 acc 24.219 (29.695)	Top-5 acc 51.953 (53.108)	lr 0.02497
Train [3][2780/3239]	Time 0.192 (0.479)	Data Time 0.001 (0.010)	Loss 4.0486 (4.0628)	Entropy 1.89935 (1.90032)	Top-1 acc 26.953 (29.703)	Top-5 acc 55.859 (53.114)	lr 0.02497
Train [3][2790/3239]	Time 0.216 (0.479)	Data Time 0.001 (0.010)	Loss 3.9704 (4.0629)	Entropy 1.89933 (1.90032)	Top-1 acc 30.078 (29.701)	Top-5 acc 53.906 (53.112)	lr 0.02496
Train [3][2800/3239]	Time 0.153 (0.479)	Data Time 0.001 (0.010)	Loss 4.1542 (4.0631)	Entropy 1.89933 (1.90031)	Top-1 acc 25.000 (29.698)	Top-5 acc 50.781 (53.107)	lr 0.02496
Train [3][2810/3239]	Time 0.158 (0.478)	Data Time 0.002 (0.010)	Loss 4.1187 (4.0632)	Entropy 1.89931 (1.90031)	Top-1 acc 28.125 (29.700)	Top-5 acc 48.828 (53.105)	lr 0.02496
Train [3][2820/3239]	Time 0.233 (0.478)	Data Time 0.001 (0.010)	Loss 4.1176 (4.0633)	Entropy 1.89926 (1.90031)	Top-1 acc 31.641 (29.702)	Top-5 acc 48.828 (53.101)	lr 0.02496
Train [3][2830/3239]	Time 0.234 (0.478)	Data Time 0.001 (0.010)	Loss 3.9948 (4.0633)	Entropy 1.89926 (1.90030)	Top-1 acc 30.078 (29.706)	Top-5 acc 52.344 (53.102)	lr 0.02496
Train [3][2840/3239]	Time 0.217 (0.478)	Data Time 0.001 (0.010)	Loss 4.0453 (4.0634)	Entropy 1.89925 (1.90030)	Top-1 acc 29.688 (29.702)	Top-5 acc 53.125 (53.098)	lr 0.02496
Train [3][2850/3239]	Time 0.204 (0.477)	Data Time 0.001 (0.010)	Loss 3.9993 (4.0635)	Entropy 1.89925 (1.90030)	Top-1 acc 30.078 (29.701)	Top-5 acc 54.297 (53.099)	lr 0.02496
Train [3][2860/3239]	Time 0.298 (0.489)	Data Time 0.095 (0.010)	Loss 3.9939 (4.0636)	Entropy 1.89925 (1.90029)	Top-1 acc 28.125 (29.699)	Top-5 acc 53.906 (53.098)	lr 0.02496
Train [3][2870/3239]	Time 0.232 (0.489)	Data Time 0.002 (0.010)	Loss 4.0217 (4.0636)	Entropy 1.89925 (1.90029)	Top-1 acc 28.516 (29.699)	Top-5 acc 55.859 (53.101)	lr 0.02496
Train [3][2880/3239]	Time 0.224 (0.488)	Data Time 0.002 (0.010)	Loss 4.0295 (4.0639)	Entropy 1.89924 (1.90028)	Top-1 acc 25.781 (29.692)	Top-5 acc 53.125 (53.093)	lr 0.02496
Train [3][2890/3239]	Time 0.166 (0.488)	Data Time 0.002 (0.010)	Loss 4.0977 (4.0638)	Entropy 1.89923 (1.90028)	Top-1 acc 28.516 (29.690)	Top-5 acc 48.438 (53.092)	lr 0.02496
Train [3][2900/3239]	Time 0.285 (0.488)	Data Time 0.001 (0.010)	Loss 3.8799 (4.0633)	Entropy 1.89922 (1.90028)	Top-1 acc 32.031 (29.697)	Top-5 acc 58.594 (53.104)	lr 0.02496
Train [3][2910/3239]	Time 0.296 (0.488)	Data Time 0.001 (0.010)	Loss 4.0306 (4.0635)	Entropy 1.89920 (1.90027)	Top-1 acc 29.297 (29.696)	Top-5 acc 56.250 (53.100)	lr 0.02496
Train [3][2920/3239]	Time 0.232 (0.487)	Data Time 0.001 (0.010)	Loss 3.9229 (4.0633)	Entropy 1.89919 (1.90027)	Top-1 acc 35.938 (29.702)	Top-5 acc 55.078 (53.109)	lr 0.02496
Train [3][2930/3239]	Time 0.183 (0.487)	Data Time 0.001 (0.010)	Loss 4.0835 (4.0631)	Entropy 1.89917 (1.90027)	Top-1 acc 29.297 (29.706)	Top-5 acc 52.734 (53.114)	lr 0.02496
Train [3][2940/3239]	Time 0.195 (0.487)	Data Time 0.001 (0.010)	Loss 4.0467 (4.0628)	Entropy 1.89917 (1.90026)	Top-1 acc 28.516 (29.712)	Top-5 acc 51.562 (53.121)	lr 0.02496
Train [3][2950/3239]	Time 0.203 (0.487)	Data Time 0.001 (0.010)	Loss 3.9649 (4.0628)	Entropy 1.89916 (1.90026)	Top-1 acc 28.516 (29.709)	Top-5 acc 55.469 (53.125)	lr 0.02496
Train [3][2960/3239]	Time 0.199 (0.486)	Data Time 0.001 (0.010)	Loss 4.2124 (4.0628)	Entropy 1.89915 (1.90026)	Top-1 acc 25.391 (29.709)	Top-5 acc 51.172 (53.123)	lr 0.02496
Train [3][2970/3239]	Time 0.222 (0.486)	Data Time 0.002 (0.010)	Loss 3.9987 (4.0626)	Entropy 1.89912 (1.90025)	Top-1 acc 30.078 (29.714)	Top-5 acc 52.734 (53.126)	lr 0.02496
Train [3][2980/3239]	Time 0.171 (0.486)	Data Time 0.003 (0.009)	Loss 3.9874 (4.0626)	Entropy 1.89911 (1.90025)	Top-1 acc 32.031 (29.716)	Top-5 acc 56.250 (53.131)	lr 0.02496
Train [3][2990/3239]	Time 0.158 (0.485)	Data Time 0.001 (0.009)	Loss 4.2006 (4.0628)	Entropy 1.89910 (1.90024)	Top-1 acc 30.859 (29.713)	Top-5 acc 50.781 (53.128)	lr 0.02496
Train [3][3000/3239]	Time 0.249 (0.485)	Data Time 0.001 (0.009)	Loss 4.0075 (4.0626)	Entropy 1.89909 (1.90024)	Top-1 acc 33.203 (29.715)	Top-5 acc 55.859 (53.132)	lr 0.02496
Train [3][3010/3239]	Time 0.211 (0.485)	Data Time 0.002 (0.009)	Loss 3.9722 (4.0624)	Entropy 1.89909 (1.90024)	Top-1 acc 30.859 (29.718)	Top-5 acc 52.734 (53.136)	lr 0.02496
Train [3][3020/3239]	Time 0.189 (0.485)	Data Time 0.001 (0.009)	Loss 4.1105 (4.0623)	Entropy 1.89909 (1.90023)	Top-1 acc 31.250 (29.721)	Top-5 acc 52.344 (53.142)	lr 0.02496
Train [3][3030/3239]	Time 0.213 (0.484)	Data Time 0.001 (0.009)	Loss 4.1724 (4.0623)	Entropy 1.89907 (1.90023)	Top-1 acc 26.953 (29.719)	Top-5 acc 51.953 (53.144)	lr 0.02496
Train [3][3040/3239]	Time 0.321 (0.484)	Data Time 0.001 (0.009)	Loss 3.9146 (4.0623)	Entropy 1.89906 (1.90023)	Top-1 acc 30.078 (29.718)	Top-5 acc 55.859 (53.142)	lr 0.02496
Train [3][3050/3239]	Time 0.304 (0.484)	Data Time 0.001 (0.009)	Loss 4.0839 (4.0622)	Entropy 1.89905 (1.90022)	Top-1 acc 27.734 (29.721)	Top-5 acc 53.906 (53.147)	lr 0.02496
Train [3][3060/3239]	Time 0.138 (0.484)	Data Time 0.001 (0.009)	Loss 4.2286 (4.0622)	Entropy 1.89905 (1.90022)	Top-1 acc 28.125 (29.723)	Top-5 acc 50.000 (53.145)	lr 0.02496
Train [3][3070/3239]	Time 0.183 (0.484)	Data Time 0.001 (0.009)	Loss 3.9134 (4.0623)	Entropy 1.89904 (1.90021)	Top-1 acc 30.469 (29.719)	Top-5 acc 54.688 (53.142)	lr 0.02496
Train [3][3080/3239]	Time 0.205 (0.483)	Data Time 0.001 (0.009)	Loss 3.9785 (4.0622)	Entropy 1.89904 (1.90021)	Top-1 acc 31.250 (29.723)	Top-5 acc 58.203 (53.147)	lr 0.02496
Train [3][3090/3239]	Time 0.139 (0.483)	Data Time 0.001 (0.009)	Loss 4.3072 (4.0620)	Entropy 1.89903 (1.90021)	Top-1 acc 27.344 (29.728)	Top-5 acc 48.438 (53.150)	lr 0.02496
Train [3][3100/3239]	Time 0.272 (0.483)	Data Time 0.001 (0.009)	Loss 4.0054 (4.0619)	Entropy 1.89903 (1.90020)	Top-1 acc 33.594 (29.728)	Top-5 acc 55.078 (53.152)	lr 0.02496
Train [3][3110/3239]	Time 0.198 (0.482)	Data Time 0.002 (0.009)	Loss 3.9600 (4.0617)	Entropy 1.89903 (1.90020)	Top-1 acc 30.859 (29.732)	Top-5 acc 55.469 (53.158)	lr 0.02496
Train [3][3120/3239]	Time 0.219 (0.482)	Data Time 0.001 (0.009)	Loss 3.9465 (4.0616)	Entropy 1.89902 (1.90019)	Top-1 acc 29.688 (29.732)	Top-5 acc 56.641 (53.159)	lr 0.02496
Train [3][3130/3239]	Time 0.205 (0.482)	Data Time 0.001 (0.009)	Loss 3.9319 (4.0615)	Entropy 1.89901 (1.90019)	Top-1 acc 32.812 (29.736)	Top-5 acc 55.859 (53.160)	lr 0.02496
Train [3][3140/3239]	Time 0.182 (0.482)	Data Time 0.029 (0.009)	Loss 4.1945 (4.0616)	Entropy 1.89900 (1.90019)	Top-1 acc 28.516 (29.731)	Top-5 acc 48.047 (53.156)	lr 0.02496
Train [3][3150/3239]	Time 0.147 (0.482)	Data Time 0.001 (0.009)	Loss 3.9708 (4.0614)	Entropy 1.89898 (1.90018)	Top-1 acc 29.297 (29.734)	Top-5 acc 53.516 (53.163)	lr 0.02496
Train [3][3160/3239]	Time 0.226 (0.481)	Data Time 0.001 (0.009)	Loss 4.1315 (4.0613)	Entropy 1.89898 (1.90018)	Top-1 acc 30.469 (29.732)	Top-5 acc 50.391 (53.164)	lr 0.02496
Train [3][3170/3239]	Time 0.296 (0.481)	Data Time 0.002 (0.009)	Loss 4.1097 (4.0613)	Entropy 1.89896 (1.90018)	Top-1 acc 27.734 (29.731)	Top-5 acc 52.734 (53.163)	lr 0.02496
Train [3][3180/3239]	Time 0.253 (0.481)	Data Time 0.000 (0.009)	Loss 4.0033 (4.0611)	Entropy 1.89894 (1.90017)	Top-1 acc 28.906 (29.734)	Top-5 acc 56.641 (53.168)	lr 0.02496
Train [3][3190/3239]	Time 0.311 (0.491)	Data Time 0.000 (0.009)	Loss 3.9137 (4.0610)	Entropy 1.89894 (1.90017)	Top-1 acc 32.812 (29.738)	Top-5 acc 58.594 (53.171)	lr 0.02496
Train [3][3200/3239]	Time 0.181 (0.491)	Data Time 0.000 (0.009)	Loss 3.9320 (4.0610)	Entropy 1.89892 (1.90016)	Top-1 acc 33.984 (29.737)	Top-5 acc 55.859 (53.171)	lr 0.02496
Train [3][3210/3239]	Time 0.209 (0.490)	Data Time 0.000 (0.009)	Loss 3.9556 (4.0610)	Entropy 1.89891 (1.90016)	Top-1 acc 30.469 (29.738)	Top-5 acc 54.688 (53.169)	lr 0.02496
Train [3][3220/3239]	Time 0.210 (0.490)	Data Time 0.000 (0.009)	Loss 4.0622 (4.0609)	Entropy 1.89890 (1.90016)	Top-1 acc 27.734 (29.741)	Top-5 acc 51.172 (53.168)	lr 0.02496
Train [3][3230/3239]	Time 0.159 (0.490)	Data Time 0.000 (0.009)	Loss 4.1581 (4.0607)	Entropy 1.89889 (1.90015)	Top-1 acc 27.734 (29.747)	Top-5 acc 50.000 (53.177)	lr 0.02496
Train [3][3239/3239]	Time 2.010 (0.489)	Data Time 0.000 (0.009)	Loss 4.2708 (4.0606)	Entropy 1.89889 (1.90015)	Top-1 acc 23.457 (29.748)	Top-5 acc 50.617 (53.179)	lr 0.02496
==========Valid [3/120]	loss 2.957	top-1 acc 38.297 (38.297)	top-5 acc 63.341	Train top-1 29.748	top-5 53.179	Entropy 1.89889	Latency-None: 0.000ms	Flops: 490.69M
Train [4][0/3239]	Time 20.679 (20.679)	Data Time 20.115 (20.115)	Loss 3.9526 (3.9526)	Entropy 1.89889 (1.89889)	Top-1 acc 33.984 (33.984)	Top-5 acc 57.031 (57.031)	lr 0.02496
Train [4][10/3239]	Time 2.298 (2.432)	Data Time 0.003 (1.907)	Loss 3.8346 (4.0608)	Entropy 1.89889 (1.89889)	Top-1 acc 35.156 (30.824)	Top-5 acc 59.375 (53.622)	lr 0.02496
Train [4][20/3239]	Time 0.189 (1.375)	Data Time 0.001 (1.000)	Loss 4.0849 (4.0707)	Entropy 1.89888 (1.89889)	Top-1 acc 25.781 (30.097)	Top-5 acc 51.953 (52.995)	lr 0.02496
Train [4][30/3239]	Time 0.189 (1.059)	Data Time 0.001 (0.678)	Loss 4.0835 (4.0611)	Entropy 1.89887 (1.89888)	Top-1 acc 29.297 (30.355)	Top-5 acc 51.172 (53.100)	lr 0.02496
Train [4][40/3239]	Time 0.171 (0.900)	Data Time 0.001 (0.513)	Loss 3.7593 (4.0341)	Entropy 1.89885 (1.89887)	Top-1 acc 37.109 (30.831)	Top-5 acc 60.156 (53.582)	lr 0.02496
Train [4][50/3239]	Time 0.153 (0.801)	Data Time 0.001 (0.413)	Loss 4.0779 (4.0503)	Entropy 1.89884 (1.89887)	Top-1 acc 33.984 (30.607)	Top-5 acc 51.953 (53.117)	lr 0.02496
Train [4][60/3239]	Time 0.197 (0.737)	Data Time 0.001 (0.347)	Loss 3.7949 (4.0313)	Entropy 1.89884 (1.89886)	Top-1 acc 36.328 (30.911)	Top-5 acc 60.156 (53.650)	lr 0.02496
Train [4][70/3239]	Time 0.232 (0.689)	Data Time 0.001 (0.298)	Loss 4.1512 (4.0343)	Entropy 1.89883 (1.89886)	Top-1 acc 28.516 (30.848)	Top-5 acc 52.734 (53.538)	lr 0.02496
Train [4][80/3239]	Time 0.321 (0.654)	Data Time 0.001 (0.261)	Loss 4.0280 (4.0371)	Entropy 1.89880 (1.89885)	Top-1 acc 33.203 (30.671)	Top-5 acc 54.297 (53.448)	lr 0.02496
Train [4][90/3239]	Time 0.222 (0.627)	Data Time 0.001 (0.233)	Loss 4.0665 (4.0335)	Entropy 1.89879 (1.89885)	Top-1 acc 30.078 (30.640)	Top-5 acc 54.297 (53.546)	lr 0.02496
Train [4][100/3239]	Time 0.202 (0.606)	Data Time 0.001 (0.210)	Loss 4.0757 (4.0371)	Entropy 1.89878 (1.89884)	Top-1 acc 30.469 (30.562)	Top-5 acc 51.562 (53.508)	lr 0.02496
Train [4][110/3239]	Time 0.190 (0.589)	Data Time 0.001 (0.191)	Loss 3.8630 (4.0326)	Entropy 1.89878 (1.89884)	Top-1 acc 30.078 (30.715)	Top-5 acc 58.594 (53.653)	lr 0.02496
Train [4][120/3239]	Time 2.234 (0.574)	Data Time 0.001 (0.176)	Loss 4.0834 (4.0343)	Entropy 1.89878 (1.89883)	Top-1 acc 28.516 (30.601)	Top-5 acc 51.953 (53.622)	lr 0.02496
Train [4][130/3239]	Time 0.218 (0.546)	Data Time 0.001 (0.162)	Loss 4.1384 (4.0347)	Entropy 1.89878 (1.89883)	Top-1 acc 28.516 (30.549)	Top-5 acc 53.906 (53.620)	lr 0.02496
Train [4][140/3239]	Time 0.204 (0.537)	Data Time 0.002 (0.151)	Loss 4.1516 (4.0359)	Entropy 1.89877 (1.89882)	Top-1 acc 27.734 (30.474)	Top-5 acc 51.562 (53.593)	lr 0.02496
Train [4][150/3239]	Time 0.235 (0.528)	Data Time 0.001 (0.141)	Loss 4.0330 (4.0400)	Entropy 1.89876 (1.89882)	Top-1 acc 30.078 (30.345)	Top-5 acc 54.297 (53.495)	lr 0.02496
Train [4][160/3239]	Time 0.150 (0.520)	Data Time 0.001 (0.133)	Loss 3.8159 (4.0394)	Entropy 1.89875 (1.89882)	Top-1 acc 34.375 (30.316)	Top-5 acc 55.469 (53.503)	lr 0.02496
Train [4][170/3239]	Time 0.143 (0.513)	Data Time 0.001 (0.125)	Loss 3.9859 (4.0397)	Entropy 1.89873 (1.89881)	Top-1 acc 29.297 (30.300)	Top-5 acc 54.688 (53.502)	lr 0.02496
Train [4][180/3239]	Time 0.194 (0.507)	Data Time 0.001 (0.118)	Loss 4.2283 (4.0415)	Entropy 1.89872 (1.89881)	Top-1 acc 24.219 (30.238)	Top-5 acc 49.219 (53.481)	lr 0.02496
Train [4][190/3239]	Time 0.199 (0.502)	Data Time 0.002 (0.112)	Loss 3.9923 (4.0409)	Entropy 1.89871 (1.89880)	Top-1 acc 33.984 (30.315)	Top-5 acc 57.422 (53.516)	lr 0.02496
Train [4][200/3239]	Time 0.154 (0.497)	Data Time 0.001 (0.107)	Loss 3.8893 (4.0382)	Entropy 1.89870 (1.89880)	Top-1 acc 32.422 (30.356)	Top-5 acc 55.469 (53.564)	lr 0.02496
Train [4][210/3239]	Time 0.291 (0.493)	Data Time 0.001 (0.102)	Loss 4.1550 (4.0342)	Entropy 1.89870 (1.89879)	Top-1 acc 28.516 (30.410)	Top-5 acc 52.734 (53.667)	lr 0.02496
Train [4][220/3239]	Time 0.192 (0.489)	Data Time 0.003 (0.098)	Loss 4.0756 (4.0351)	Entropy 1.89868 (1.89879)	Top-1 acc 31.250 (30.391)	Top-5 acc 54.297 (53.682)	lr 0.02496
Train [4][230/3239]	Time 2.026 (0.485)	Data Time 0.001 (0.093)	Loss 4.3409 (4.0347)	Entropy 1.89868 (1.89878)	Top-1 acc 23.047 (30.391)	Top-5 acc 44.922 (53.703)	lr 0.02496
Train [4][240/3239]	Time 0.147 (0.474)	Data Time 0.001 (0.090)	Loss 3.9591 (4.0338)	Entropy 1.89867 (1.89878)	Top-1 acc 32.422 (30.412)	Top-5 acc 55.859 (53.747)	lr 0.02496
Train [4][250/3239]	Time 0.155 (0.471)	Data Time 0.001 (0.086)	Loss 4.0082 (4.0335)	Entropy 1.89866 (1.89877)	Top-1 acc 30.078 (30.450)	Top-5 acc 51.953 (53.779)	lr 0.02496
Train [4][260/3239]	Time 0.208 (0.469)	Data Time 0.001 (0.083)	Loss 4.0663 (4.0329)	Entropy 1.89865 (1.89877)	Top-1 acc 30.859 (30.482)	Top-5 acc 50.391 (53.773)	lr 0.02496
Train [4][270/3239]	Time 0.203 (0.467)	Data Time 0.001 (0.080)	Loss 4.0698 (4.0356)	Entropy 1.89865 (1.89876)	Top-1 acc 29.297 (30.414)	Top-5 acc 55.078 (53.710)	lr 0.02496
Train [4][280/3239]	Time 0.150 (0.465)	Data Time 0.002 (0.077)	Loss 4.0050 (4.0346)	Entropy 1.89864 (1.89876)	Top-1 acc 31.250 (30.395)	Top-5 acc 53.125 (53.742)	lr 0.02496
Train [4][290/3239]	Time 0.161 (0.462)	Data Time 0.002 (0.075)	Loss 3.9849 (4.0323)	Entropy 1.89863 (1.89876)	Top-1 acc 30.469 (30.386)	Top-5 acc 55.078 (53.836)	lr 0.02496
Train [4][300/3239]	Time 0.202 (0.461)	Data Time 0.001 (0.073)	Loss 4.0020 (4.0326)	Entropy 1.89863 (1.89875)	Top-1 acc 30.078 (30.348)	Top-5 acc 53.516 (53.831)	lr 0.02496
Train [4][310/3239]	Time 0.226 (0.577)	Data Time 0.003 (0.070)	Loss 4.1096 (4.0344)	Entropy 1.89863 (1.89875)	Top-1 acc 28.516 (30.337)	Top-5 acc 51.562 (53.768)	lr 0.02496
Train [4][320/3239]	Time 0.211 (0.572)	Data Time 0.002 (0.068)	Loss 4.2098 (4.0365)	Entropy 1.89861 (1.89874)	Top-1 acc 27.344 (30.285)	Top-5 acc 50.391 (53.712)	lr 0.02496
Train [4][330/3239]	Time 0.222 (0.567)	Data Time 0.002 (0.066)	Loss 4.0503 (4.0376)	Entropy 1.89860 (1.89874)	Top-1 acc 28.125 (30.246)	Top-5 acc 54.297 (53.709)	lr 0.02496
Train [4][340/3239]	Time 2.182 (0.564)	Data Time 0.001 (0.064)	Loss 4.0054 (4.0365)	Entropy 1.89860 (1.89874)	Top-1 acc 29.688 (30.243)	Top-5 acc 53.516 (53.726)	lr 0.02496
Train [4][350/3239]	Time 0.219 (0.553)	Data Time 0.002 (0.063)	Loss 3.9514 (4.0384)	Entropy 1.89860 (1.89873)	Top-1 acc 27.344 (30.196)	Top-5 acc 53.516 (53.698)	lr 0.02496
Train [4][360/3239]	Time 0.195 (0.550)	Data Time 0.001 (0.061)	Loss 3.7506 (4.0362)	Entropy 1.89858 (1.89873)	Top-1 acc 38.672 (30.256)	Top-5 acc 60.547 (53.740)	lr 0.02496
Train [4][370/3239]	Time 0.170 (0.546)	Data Time 0.002 (0.059)	Loss 4.0887 (4.0368)	Entropy 1.89856 (1.89872)	Top-1 acc 29.297 (30.241)	Top-5 acc 51.953 (53.711)	lr 0.02496
Train [4][380/3239]	Time 0.212 (0.543)	Data Time 0.001 (0.058)	Loss 4.1858 (4.0364)	Entropy 1.89855 (1.89872)	Top-1 acc 30.469 (30.244)	Top-5 acc 51.172 (53.722)	lr 0.02496
Train [4][390/3239]	Time 0.232 (0.540)	Data Time 0.001 (0.056)	Loss 4.0932 (4.0349)	Entropy 1.89854 (1.89871)	Top-1 acc 27.734 (30.271)	Top-5 acc 53.516 (53.764)	lr 0.02496
Train [4][400/3239]	Time 0.140 (0.537)	Data Time 0.001 (0.055)	Loss 4.2175 (4.0358)	Entropy 1.89853 (1.89871)	Top-1 acc 30.469 (30.218)	Top-5 acc 50.000 (53.725)	lr 0.02496
Train [4][410/3239]	Time 0.193 (0.533)	Data Time 0.001 (0.054)	Loss 3.9711 (4.0353)	Entropy 1.89852 (1.89871)	Top-1 acc 28.516 (30.223)	Top-5 acc 53.125 (53.728)	lr 0.02496
Train [4][420/3239]	Time 0.253 (0.530)	Data Time 0.001 (0.053)	Loss 3.8947 (4.0355)	Entropy 1.89850 (1.89870)	Top-1 acc 36.328 (30.227)	Top-5 acc 57.031 (53.718)	lr 0.02496
Train [4][430/3239]	Time 0.144 (0.528)	Data Time 0.001 (0.051)	Loss 4.2606 (4.0357)	Entropy 1.89850 (1.89870)	Top-1 acc 25.391 (30.220)	Top-5 acc 43.750 (53.701)	lr 0.02496
Train [4][440/3239]	Time 0.208 (0.525)	Data Time 0.001 (0.050)	Loss 4.0303 (4.0352)	Entropy 1.89849 (1.89869)	Top-1 acc 27.344 (30.215)	Top-5 acc 53.516 (53.712)	lr 0.02496
Train [4][450/3239]	Time 2.308 (0.522)	Data Time 0.002 (0.049)	Loss 4.0879 (4.0364)	Entropy 1.89849 (1.89869)	Top-1 acc 30.469 (30.190)	Top-5 acc 50.781 (53.668)	lr 0.02496
Train [4][460/3239]	Time 0.354 (0.516)	Data Time 0.002 (0.048)	Loss 3.9959 (4.0351)	Entropy 1.89849 (1.89868)	Top-1 acc 33.984 (30.213)	Top-5 acc 51.953 (53.689)	lr 0.02496
Train [4][470/3239]	Time 0.234 (0.514)	Data Time 0.001 (0.047)	Loss 3.9899 (4.0346)	Entropy 1.89847 (1.89868)	Top-1 acc 28.125 (30.235)	Top-5 acc 54.688 (53.711)	lr 0.02496
Train [4][480/3239]	Time 0.240 (0.511)	Data Time 0.001 (0.046)	Loss 4.0641 (4.0346)	Entropy 1.89846 (1.89867)	Top-1 acc 31.250 (30.238)	Top-5 acc 54.688 (53.711)	lr 0.02496
Train [4][490/3239]	Time 0.209 (0.509)	Data Time 0.001 (0.046)	Loss 4.1669 (4.0346)	Entropy 1.89844 (1.89867)	Top-1 acc 25.781 (30.221)	Top-5 acc 50.391 (53.721)	lr 0.02496
Train [4][500/3239]	Time 0.192 (0.507)	Data Time 0.002 (0.045)	Loss 3.9881 (4.0358)	Entropy 1.89842 (1.89866)	Top-1 acc 30.859 (30.211)	Top-5 acc 54.297 (53.703)	lr 0.02496
Train [4][510/3239]	Time 0.199 (0.506)	Data Time 0.001 (0.044)	Loss 4.0289 (4.0347)	Entropy 1.89841 (1.89866)	Top-1 acc 30.469 (30.239)	Top-5 acc 53.906 (53.731)	lr 0.02496
Train [4][520/3239]	Time 0.209 (0.504)	Data Time 0.002 (0.043)	Loss 4.1764 (4.0334)	Entropy 1.89840 (1.89865)	Top-1 acc 28.516 (30.260)	Top-5 acc 50.781 (53.772)	lr 0.02496
Train [4][530/3239]	Time 0.206 (0.502)	Data Time 0.001 (0.042)	Loss 4.0924 (4.0343)	Entropy 1.89839 (1.89865)	Top-1 acc 28.516 (30.243)	Top-5 acc 53.125 (53.730)	lr 0.02496
Train [4][540/3239]	Time 0.203 (0.500)	Data Time 0.001 (0.042)	Loss 3.9602 (4.0335)	Entropy 1.89838 (1.89864)	Top-1 acc 35.938 (30.261)	Top-5 acc 55.469 (53.752)	lr 0.02496
Train [4][550/3239]	Time 0.205 (0.499)	Data Time 0.001 (0.041)	Loss 3.8979 (4.0341)	Entropy 1.89836 (1.89864)	Top-1 acc 34.375 (30.248)	Top-5 acc 56.641 (53.734)	lr 0.02496
Train [4][560/3239]	Time 2.209 (0.497)	Data Time 0.002 (0.040)	Loss 4.2090 (4.0344)	Entropy 1.89836 (1.89863)	Top-1 acc 32.031 (30.247)	Top-5 acc 51.953 (53.754)	lr 0.02496
Train [4][570/3239]	Time 0.368 (0.492)	Data Time 0.001 (0.039)	Loss 4.0402 (4.0340)	Entropy 1.89833 (1.89863)	Top-1 acc 32.812 (30.241)	Top-5 acc 53.906 (53.767)	lr 0.02496
Train [4][580/3239]	Time 0.252 (0.491)	Data Time 0.001 (0.039)	Loss 3.8553 (4.0338)	Entropy 1.89833 (1.89862)	Top-1 acc 34.375 (30.245)	Top-5 acc 58.984 (53.782)	lr 0.02496
Train [4][590/3239]	Time 0.194 (0.490)	Data Time 0.001 (0.038)	Loss 3.8959 (4.0347)	Entropy 1.89832 (1.89862)	Top-1 acc 31.250 (30.220)	Top-5 acc 54.688 (53.752)	lr 0.02496
Train [4][600/3239]	Time 0.199 (0.488)	Data Time 0.001 (0.038)	Loss 4.0463 (4.0343)	Entropy 1.89832 (1.89861)	Top-1 acc 30.469 (30.222)	Top-5 acc 52.344 (53.772)	lr 0.02496
Train [4][610/3239]	Time 0.196 (0.487)	Data Time 0.001 (0.037)	Loss 3.8388 (4.0331)	Entropy 1.89831 (1.89861)	Top-1 acc 30.859 (30.232)	Top-5 acc 56.250 (53.794)	lr 0.02496
Train [4][620/3239]	Time 0.192 (0.485)	Data Time 0.001 (0.036)	Loss 4.0740 (4.0316)	Entropy 1.89831 (1.89860)	Top-1 acc 28.516 (30.251)	Top-5 acc 52.734 (53.826)	lr 0.02496
Train [4][630/3239]	Time 0.223 (0.484)	Data Time 0.001 (0.036)	Loss 3.9516 (4.0315)	Entropy 1.89830 (1.89860)	Top-1 acc 31.250 (30.250)	Top-5 acc 52.344 (53.832)	lr 0.02496
Train [4][640/3239]	Time 0.211 (0.483)	Data Time 0.001 (0.035)	Loss 4.1754 (4.0315)	Entropy 1.89829 (1.89860)	Top-1 acc 25.391 (30.240)	Top-5 acc 51.562 (53.831)	lr 0.02496
Train [4][650/3239]	Time 0.220 (0.482)	Data Time 0.001 (0.035)	Loss 4.1156 (4.0306)	Entropy 1.89828 (1.89859)	Top-1 acc 30.859 (30.271)	Top-5 acc 51.953 (53.841)	lr 0.02496
Train [4][660/3239]	Time 0.239 (0.481)	Data Time 0.001 (0.034)	Loss 3.9288 (4.0303)	Entropy 1.89827 (1.89859)	Top-1 acc 30.859 (30.280)	Top-5 acc 52.734 (53.842)	lr 0.02496
Train [4][670/3239]	Time 37.857 (0.533)	Data Time 0.001 (0.034)	Loss 3.9240 (4.0295)	Entropy 1.89827 (1.89858)	Top-1 acc 32.422 (30.307)	Top-5 acc 56.250 (53.863)	lr 0.02496
Train [4][680/3239]	Time 0.284 (0.528)	Data Time 0.002 (0.033)	Loss 4.2208 (4.0304)	Entropy 1.89826 (1.89858)	Top-1 acc 25.000 (30.290)	Top-5 acc 48.828 (53.851)	lr 0.02496
Train [4][690/3239]	Time 0.353 (0.527)	Data Time 0.002 (0.033)	Loss 4.0853 (4.0297)	Entropy 1.89825 (1.89857)	Top-1 acc 29.688 (30.293)	Top-5 acc 51.562 (53.868)	lr 0.02496
Train [4][700/3239]	Time 0.164 (0.525)	Data Time 0.001 (0.032)	Loss 4.1213 (4.0298)	Entropy 1.89824 (1.89857)	Top-1 acc 26.562 (30.276)	Top-5 acc 52.734 (53.862)	lr 0.02496
Train [4][710/3239]	Time 0.188 (0.524)	Data Time 0.001 (0.032)	Loss 4.0342 (4.0283)	Entropy 1.89822 (1.89856)	Top-1 acc 29.297 (30.301)	Top-5 acc 55.078 (53.897)	lr 0.02496
Train [4][720/3239]	Time 0.204 (0.522)	Data Time 0.001 (0.032)	Loss 3.9912 (4.0286)	Entropy 1.89820 (1.89856)	Top-1 acc 28.516 (30.290)	Top-5 acc 53.125 (53.894)	lr 0.02496
Train [4][730/3239]	Time 0.210 (0.520)	Data Time 0.001 (0.031)	Loss 3.9925 (4.0286)	Entropy 1.89821 (1.89855)	Top-1 acc 30.469 (30.288)	Top-5 acc 56.641 (53.897)	lr 0.02496
Train [4][740/3239]	Time 0.179 (0.519)	Data Time 0.001 (0.031)	Loss 3.9317 (4.0285)	Entropy 1.89820 (1.89855)	Top-1 acc 30.469 (30.284)	Top-5 acc 53.516 (53.885)	lr 0.02496
Train [4][750/3239]	Time 0.246 (0.517)	Data Time 0.001 (0.031)	Loss 3.8546 (4.0282)	Entropy 1.89819 (1.89854)	Top-1 acc 35.938 (30.299)	Top-5 acc 61.719 (53.904)	lr 0.02496
Train [4][760/3239]	Time 0.137 (0.516)	Data Time 0.001 (0.030)	Loss 4.0323 (4.0282)	Entropy 1.89818 (1.89854)	Top-1 acc 31.250 (30.293)	Top-5 acc 54.297 (53.903)	lr 0.02496
Train [4][770/3239]	Time 0.182 (0.514)	Data Time 0.001 (0.030)	Loss 3.7353 (4.0276)	Entropy 1.89817 (1.89853)	Top-1 acc 37.109 (30.306)	Top-5 acc 62.109 (53.924)	lr 0.02496
Train [4][780/3239]	Time 2.034 (0.512)	Data Time 0.001 (0.029)	Loss 3.9335 (4.0276)	Entropy 1.89817 (1.89853)	Top-1 acc 28.516 (30.309)	Top-5 acc 57.031 (53.926)	lr 0.02496
Train [4][790/3239]	Time 0.198 (0.508)	Data Time 0.001 (0.029)	Loss 4.0869 (4.0275)	Entropy 1.89816 (1.89852)	Top-1 acc 30.859 (30.307)	Top-5 acc 51.953 (53.938)	lr 0.02495
Train [4][800/3239]	Time 0.202 (0.507)	Data Time 0.001 (0.029)	Loss 3.9907 (4.0270)	Entropy 1.89815 (1.89852)	Top-1 acc 31.250 (30.308)	Top-5 acc 55.859 (53.956)	lr 0.02495
Train [4][810/3239]	Time 0.249 (0.506)	Data Time 0.002 (0.028)	Loss 4.1118 (4.0275)	Entropy 1.89815 (1.89852)	Top-1 acc 27.344 (30.301)	Top-5 acc 53.125 (53.947)	lr 0.02495
Train [4][820/3239]	Time 0.293 (0.505)	Data Time 0.001 (0.028)	Loss 4.0313 (4.0274)	Entropy 1.89815 (1.89851)	Top-1 acc 30.078 (30.307)	Top-5 acc 51.953 (53.934)	lr 0.02495
Train [4][830/3239]	Time 0.279 (0.504)	Data Time 0.001 (0.028)	Loss 3.8907 (4.0278)	Entropy 1.89814 (1.89851)	Top-1 acc 34.766 (30.306)	Top-5 acc 57.812 (53.918)	lr 0.02495
Train [4][840/3239]	Time 0.230 (0.503)	Data Time 0.001 (0.028)	Loss 4.0459 (4.0279)	Entropy 1.89813 (1.89850)	Top-1 acc 32.031 (30.321)	Top-5 acc 54.688 (53.921)	lr 0.02495
Train [4][850/3239]	Time 0.164 (0.502)	Data Time 0.001 (0.027)	Loss 4.1304 (4.0278)	Entropy 1.89812 (1.89850)	Top-1 acc 32.422 (30.332)	Top-5 acc 52.734 (53.919)	lr 0.02495
Train [4][860/3239]	Time 0.138 (0.501)	Data Time 0.001 (0.027)	Loss 3.9879 (4.0286)	Entropy 1.89811 (1.89849)	Top-1 acc 34.766 (30.325)	Top-5 acc 55.078 (53.896)	lr 0.02495
Train [4][870/3239]	Time 0.236 (0.500)	Data Time 0.001 (0.027)	Loss 4.0471 (4.0293)	Entropy 1.89811 (1.89849)	Top-1 acc 31.641 (30.306)	Top-5 acc 52.344 (53.880)	lr 0.02495
Train [4][880/3239]	Time 0.195 (0.499)	Data Time 0.001 (0.026)	Loss 3.9733 (4.0295)	Entropy 1.89808 (1.89848)	Top-1 acc 28.125 (30.293)	Top-5 acc 53.125 (53.867)	lr 0.02495
Train [4][890/3239]	Time 2.242 (0.498)	Data Time 0.001 (0.026)	Loss 3.9251 (4.0290)	Entropy 1.89808 (1.89848)	Top-1 acc 33.594 (30.307)	Top-5 acc 57.422 (53.879)	lr 0.02495
Train [4][900/3239]	Time 0.170 (0.495)	Data Time 0.001 (0.026)	Loss 4.0979 (4.0287)	Entropy 1.89807 (1.89848)	Top-1 acc 28.906 (30.314)	Top-5 acc 52.344 (53.884)	lr 0.02495
Train [4][910/3239]	Time 0.212 (0.493)	Data Time 0.001 (0.026)	Loss 3.9370 (4.0280)	Entropy 1.89806 (1.89847)	Top-1 acc 32.031 (30.335)	Top-5 acc 53.516 (53.890)	lr 0.02495
Train [4][920/3239]	Time 0.252 (0.493)	Data Time 0.001 (0.025)	Loss 4.0026 (4.0281)	Entropy 1.89806 (1.89847)	Top-1 acc 29.688 (30.326)	Top-5 acc 55.078 (53.891)	lr 0.02495
Train [4][930/3239]	Time 0.168 (0.492)	Data Time 0.002 (0.025)	Loss 4.0590 (4.0276)	Entropy 1.89805 (1.89846)	Top-1 acc 28.125 (30.338)	Top-5 acc 53.125 (53.895)	lr 0.02495
Train [4][940/3239]	Time 0.252 (0.491)	Data Time 0.001 (0.025)	Loss 3.6901 (4.0270)	Entropy 1.89804 (1.89846)	Top-1 acc 35.938 (30.362)	Top-5 acc 63.281 (53.910)	lr 0.02495
Train [4][950/3239]	Time 0.291 (0.491)	Data Time 0.001 (0.025)	Loss 3.8132 (4.0267)	Entropy 1.89803 (1.89845)	Top-1 acc 36.328 (30.357)	Top-5 acc 58.984 (53.919)	lr 0.02495
Train [4][960/3239]	Time 0.188 (0.490)	Data Time 0.001 (0.024)	Loss 4.1221 (4.0273)	Entropy 1.89801 (1.89845)	Top-1 acc 28.125 (30.341)	Top-5 acc 51.172 (53.897)	lr 0.02495
Train [4][970/3239]	Time 0.235 (0.489)	Data Time 0.001 (0.024)	Loss 4.0923 (4.0274)	Entropy 1.89801 (1.89844)	Top-1 acc 29.297 (30.352)	Top-5 acc 54.297 (53.897)	lr 0.02495
Train [4][980/3239]	Time 0.168 (0.489)	Data Time 0.002 (0.024)	Loss 4.0037 (4.0272)	Entropy 1.89800 (1.89844)	Top-1 acc 30.859 (30.358)	Top-5 acc 54.297 (53.887)	lr 0.02495
Train [4][990/3239]	Time 0.243 (0.488)	Data Time 0.001 (0.024)	Loss 4.2031 (4.0272)	Entropy 1.89799 (1.89844)	Top-1 acc 25.000 (30.356)	Top-5 acc 48.047 (53.884)	lr 0.02495
Train [4][1000/3239]	Time 2.342 (0.487)	Data Time 0.001 (0.024)	Loss 4.0301 (4.0271)	Entropy 1.89799 (1.89843)	Top-1 acc 30.859 (30.357)	Top-5 acc 55.859 (53.880)	lr 0.02495
Train [4][1010/3239]	Time 0.150 (0.485)	Data Time 0.002 (0.023)	Loss 4.0279 (4.0272)	Entropy 1.89799 (1.89843)	Top-1 acc 28.516 (30.356)	Top-5 acc 54.297 (53.877)	lr 0.02495
Train [4][1020/3239]	Time 0.178 (0.484)	Data Time 0.001 (0.023)	Loss 3.9755 (4.0271)	Entropy 1.89798 (1.89842)	Top-1 acc 30.859 (30.354)	Top-5 acc 55.859 (53.879)	lr 0.02495
Train [4][1030/3239]	Time 0.221 (0.483)	Data Time 0.001 (0.023)	Loss 3.9541 (4.0273)	Entropy 1.89797 (1.89842)	Top-1 acc 33.203 (30.364)	Top-5 acc 57.422 (53.890)	lr 0.02495
Train [4][1040/3239]	Time 0.215 (0.516)	Data Time 0.002 (0.023)	Loss 3.8873 (4.0266)	Entropy 1.89797 (1.89841)	Top-1 acc 33.984 (30.382)	Top-5 acc 58.203 (53.915)	lr 0.02495
Train [4][1050/3239]	Time 0.224 (0.515)	Data Time 0.002 (0.023)	Loss 3.9188 (4.0267)	Entropy 1.89797 (1.89841)	Top-1 acc 35.156 (30.396)	Top-5 acc 56.641 (53.918)	lr 0.02495
Train [4][1060/3239]	Time 0.192 (0.514)	Data Time 0.001 (0.023)	Loss 4.2295 (4.0270)	Entropy 1.89796 (1.89840)	Top-1 acc 25.781 (30.384)	Top-5 acc 53.516 (53.913)	lr 0.02495
Train [4][1070/3239]	Time 0.234 (0.513)	Data Time 0.001 (0.022)	Loss 3.9584 (4.0272)	Entropy 1.89796 (1.89840)	Top-1 acc 30.469 (30.375)	Top-5 acc 54.688 (53.911)	lr 0.02495
Train [4][1080/3239]	Time 0.158 (0.512)	Data Time 0.001 (0.022)	Loss 3.8260 (4.0270)	Entropy 1.89796 (1.89840)	Top-1 acc 34.766 (30.376)	Top-5 acc 61.328 (53.917)	lr 0.02495
Train [4][1090/3239]	Time 0.198 (0.511)	Data Time 0.001 (0.022)	Loss 4.0605 (4.0272)	Entropy 1.89794 (1.89839)	Top-1 acc 31.641 (30.381)	Top-5 acc 54.688 (53.907)	lr 0.02495
Train [4][1100/3239]	Time 0.207 (0.510)	Data Time 0.001 (0.022)	Loss 3.9768 (4.0264)	Entropy 1.89792 (1.89839)	Top-1 acc 33.203 (30.403)	Top-5 acc 57.031 (53.925)	lr 0.02495
Train [4][1110/3239]	Time 2.283 (0.509)	Data Time 0.001 (0.022)	Loss 4.0268 (4.0270)	Entropy 1.89792 (1.89838)	Top-1 acc 30.078 (30.389)	Top-5 acc 52.344 (53.905)	lr 0.02495
Train [4][1120/3239]	Time 0.225 (0.507)	Data Time 0.001 (0.022)	Loss 4.0205 (4.0270)	Entropy 1.89792 (1.89838)	Top-1 acc 32.422 (30.390)	Top-5 acc 54.688 (53.902)	lr 0.02495
Train [4][1130/3239]	Time 0.228 (0.506)	Data Time 0.001 (0.021)	Loss 3.8900 (4.0268)	Entropy 1.89790 (1.89838)	Top-1 acc 35.156 (30.395)	Top-5 acc 58.984 (53.904)	lr 0.02495
Train [4][1140/3239]	Time 0.202 (0.505)	Data Time 0.002 (0.021)	Loss 4.1922 (4.0271)	Entropy 1.89789 (1.89837)	Top-1 acc 26.562 (30.388)	Top-5 acc 50.000 (53.892)	lr 0.02495
Train [4][1150/3239]	Time 0.186 (0.504)	Data Time 0.001 (0.021)	Loss 4.3174 (4.0277)	Entropy 1.89788 (1.89837)	Top-1 acc 27.344 (30.385)	Top-5 acc 46.875 (53.879)	lr 0.02495
Train [4][1160/3239]	Time 0.199 (0.503)	Data Time 0.001 (0.021)	Loss 4.0994 (4.0271)	Entropy 1.89787 (1.89836)	Top-1 acc 31.641 (30.405)	Top-5 acc 51.953 (53.890)	lr 0.02495
Train [4][1170/3239]	Time 0.214 (0.503)	Data Time 0.001 (0.021)	Loss 4.2794 (4.0273)	Entropy 1.89787 (1.89836)	Top-1 acc 26.172 (30.410)	Top-5 acc 51.172 (53.890)	lr 0.02495
Train [4][1180/3239]	Time 0.217 (0.502)	Data Time 0.001 (0.021)	Loss 3.8964 (4.0272)	Entropy 1.89786 (1.89835)	Top-1 acc 34.375 (30.413)	Top-5 acc 58.203 (53.888)	lr 0.02495
Train [4][1190/3239]	Time 0.201 (0.501)	Data Time 0.001 (0.020)	Loss 3.8457 (4.0273)	Entropy 1.89784 (1.89835)	Top-1 acc 34.766 (30.415)	Top-5 acc 59.766 (53.880)	lr 0.02495
Train [4][1200/3239]	Time 0.209 (0.500)	Data Time 0.002 (0.020)	Loss 4.0920 (4.0278)	Entropy 1.89784 (1.89835)	Top-1 acc 23.438 (30.396)	Top-5 acc 50.391 (53.871)	lr 0.02495
Train [4][1210/3239]	Time 0.207 (0.499)	Data Time 0.001 (0.020)	Loss 4.0741 (4.0278)	Entropy 1.89783 (1.89834)	Top-1 acc 29.688 (30.394)	Top-5 acc 53.516 (53.875)	lr 0.02495
Train [4][1220/3239]	Time 2.544 (0.499)	Data Time 0.002 (0.020)	Loss 4.0037 (4.0278)	Entropy 1.89783 (1.89834)	Top-1 acc 30.469 (30.396)	Top-5 acc 53.516 (53.874)	lr 0.02495
Train [4][1230/3239]	Time 0.199 (0.497)	Data Time 0.001 (0.020)	Loss 3.9492 (4.0275)	Entropy 1.89782 (1.89833)	Top-1 acc 30.859 (30.409)	Top-5 acc 55.078 (53.884)	lr 0.02495
Train [4][1240/3239]	Time 0.208 (0.496)	Data Time 0.001 (0.020)	Loss 4.0622 (4.0271)	Entropy 1.89780 (1.89833)	Top-1 acc 31.250 (30.421)	Top-5 acc 52.344 (53.893)	lr 0.02495
Train [4][1250/3239]	Time 0.192 (0.495)	Data Time 0.001 (0.020)	Loss 4.0563 (4.0275)	Entropy 1.89779 (1.89833)	Top-1 acc 30.078 (30.411)	Top-5 acc 56.250 (53.886)	lr 0.02495
Train [4][1260/3239]	Time 0.192 (0.495)	Data Time 0.001 (0.019)	Loss 4.0059 (4.0279)	Entropy 1.89778 (1.89832)	Top-1 acc 28.516 (30.394)	Top-5 acc 55.078 (53.879)	lr 0.02495
Train [4][1270/3239]	Time 0.239 (0.494)	Data Time 0.001 (0.019)	Loss 4.0719 (4.0284)	Entropy 1.89777 (1.89832)	Top-1 acc 29.297 (30.387)	Top-5 acc 55.469 (53.866)	lr 0.02495
Train [4][1280/3239]	Time 0.213 (0.493)	Data Time 0.001 (0.019)	Loss 4.1037 (4.0285)	Entropy 1.89776 (1.89831)	Top-1 acc 23.438 (30.386)	Top-5 acc 52.734 (53.863)	lr 0.02495
Train [4][1290/3239]	Time 0.184 (0.493)	Data Time 0.001 (0.019)	Loss 4.0224 (4.0285)	Entropy 1.89774 (1.89831)	Top-1 acc 30.078 (30.384)	Top-5 acc 51.562 (53.861)	lr 0.02495
Train [4][1300/3239]	Time 0.220 (0.492)	Data Time 0.001 (0.019)	Loss 4.1396 (4.0286)	Entropy 1.89773 (1.89830)	Top-1 acc 33.203 (30.377)	Top-5 acc 52.344 (53.862)	lr 0.02495
Train [4][1310/3239]	Time 0.165 (0.492)	Data Time 0.001 (0.019)	Loss 4.0203 (4.0290)	Entropy 1.89769 (1.89830)	Top-1 acc 30.859 (30.369)	Top-5 acc 52.344 (53.848)	lr 0.02495
Train [4][1320/3239]	Time 0.183 (0.491)	Data Time 0.001 (0.019)	Loss 4.0162 (4.0283)	Entropy 1.89767 (1.89829)	Top-1 acc 30.078 (30.380)	Top-5 acc 55.469 (53.867)	lr 0.02495
Train [4][1330/3239]	Time 2.228 (0.490)	Data Time 0.001 (0.019)	Loss 4.1269 (4.0284)	Entropy 1.89767 (1.89829)	Top-1 acc 27.344 (30.379)	Top-5 acc 49.609 (53.851)	lr 0.02495
Train [4][1340/3239]	Time 0.182 (0.488)	Data Time 0.001 (0.018)	Loss 3.8359 (4.0288)	Entropy 1.89767 (1.89829)	Top-1 acc 31.250 (30.376)	Top-5 acc 59.766 (53.849)	lr 0.02495
Train [4][1350/3239]	Time 0.191 (0.488)	Data Time 0.001 (0.018)	Loss 3.9986 (4.0283)	Entropy 1.89766 (1.89828)	Top-1 acc 31.250 (30.390)	Top-5 acc 51.172 (53.853)	lr 0.02495
Train [4][1360/3239]	Time 0.207 (0.487)	Data Time 0.001 (0.018)	Loss 3.9988 (4.0280)	Entropy 1.89764 (1.89828)	Top-1 acc 29.297 (30.388)	Top-5 acc 51.953 (53.853)	lr 0.02495
Train [4][1370/3239]	Time 0.205 (0.487)	Data Time 0.001 (0.018)	Loss 4.1792 (4.0279)	Entropy 1.89762 (1.89827)	Top-1 acc 27.734 (30.390)	Top-5 acc 52.344 (53.853)	lr 0.02495
Train [4][1380/3239]	Time 0.159 (0.486)	Data Time 0.001 (0.018)	Loss 3.9993 (4.0280)	Entropy 1.89760 (1.89827)	Top-1 acc 29.297 (30.390)	Top-5 acc 54.297 (53.858)	lr 0.02495
Train [4][1390/3239]	Time 0.224 (0.486)	Data Time 0.001 (0.018)	Loss 3.9958 (4.0278)	Entropy 1.89759 (1.89826)	Top-1 acc 33.594 (30.384)	Top-5 acc 55.469 (53.865)	lr 0.02495
Train [4][1400/3239]	Time 0.198 (0.510)	Data Time 0.002 (0.018)	Loss 4.1046 (4.0276)	Entropy 1.89758 (1.89826)	Top-1 acc 29.688 (30.393)	Top-5 acc 53.516 (53.869)	lr 0.02495
Train [4][1410/3239]	Time 0.188 (0.510)	Data Time 0.002 (0.018)	Loss 4.1212 (4.0275)	Entropy 1.89758 (1.89825)	Top-1 acc 27.344 (30.401)	Top-5 acc 51.562 (53.875)	lr 0.02495
Train [4][1420/3239]	Time 0.247 (0.509)	Data Time 0.002 (0.018)	Loss 4.0099 (4.0275)	Entropy 1.89757 (1.89825)	Top-1 acc 35.938 (30.402)	Top-5 acc 53.906 (53.873)	lr 0.02495
Train [4][1430/3239]	Time 0.144 (0.508)	Data Time 0.001 (0.017)	Loss 4.0994 (4.0275)	Entropy 1.89755 (1.89824)	Top-1 acc 30.469 (30.402)	Top-5 acc 50.781 (53.878)	lr 0.02495
Train [4][1440/3239]	Time 2.209 (0.508)	Data Time 0.002 (0.017)	Loss 4.0806 (4.0273)	Entropy 1.89755 (1.89824)	Top-1 acc 29.688 (30.399)	Top-5 acc 51.562 (53.886)	lr 0.02495
Train [4][1450/3239]	Time 0.201 (0.506)	Data Time 0.001 (0.017)	Loss 4.0713 (4.0271)	Entropy 1.89754 (1.89823)	Top-1 acc 30.859 (30.405)	Top-5 acc 55.859 (53.894)	lr 0.02495
Train [4][1460/3239]	Time 0.207 (0.505)	Data Time 0.002 (0.017)	Loss 4.1557 (4.0271)	Entropy 1.89753 (1.89823)	Top-1 acc 26.562 (30.408)	Top-5 acc 53.516 (53.901)	lr 0.02495
Train [4][1470/3239]	Time 0.294 (0.505)	Data Time 0.001 (0.017)	Loss 4.0265 (4.0266)	Entropy 1.89752 (1.89822)	Top-1 acc 30.859 (30.421)	Top-5 acc 57.422 (53.917)	lr 0.02495
Train [4][1480/3239]	Time 0.224 (0.504)	Data Time 0.001 (0.017)	Loss 4.0562 (4.0264)	Entropy 1.89751 (1.89822)	Top-1 acc 29.297 (30.424)	Top-5 acc 55.078 (53.926)	lr 0.02495
Train [4][1490/3239]	Time 0.201 (0.503)	Data Time 0.001 (0.017)	Loss 3.9919 (4.0263)	Entropy 1.89751 (1.89821)	Top-1 acc 28.125 (30.422)	Top-5 acc 53.906 (53.924)	lr 0.02495
Train [4][1500/3239]	Time 0.196 (0.503)	Data Time 0.001 (0.017)	Loss 4.1292 (4.0263)	Entropy 1.89750 (1.89821)	Top-1 acc 28.516 (30.427)	Top-5 acc 51.562 (53.924)	lr 0.02495
Train [4][1510/3239]	Time 0.250 (0.502)	Data Time 0.001 (0.017)	Loss 3.9984 (4.0259)	Entropy 1.89749 (1.89820)	Top-1 acc 34.766 (30.438)	Top-5 acc 54.297 (53.935)	lr 0.02495
Train [4][1520/3239]	Time 0.207 (0.502)	Data Time 0.001 (0.017)	Loss 4.0932 (4.0263)	Entropy 1.89748 (1.89820)	Top-1 acc 29.297 (30.425)	Top-5 acc 53.125 (53.924)	lr 0.02495
Train [4][1530/3239]	Time 0.213 (0.501)	Data Time 0.001 (0.016)	Loss 4.1469 (4.0260)	Entropy 1.89746 (1.89819)	Top-1 acc 27.344 (30.428)	Top-5 acc 50.391 (53.933)	lr 0.02495
Train [4][1540/3239]	Time 0.219 (0.500)	Data Time 0.001 (0.016)	Loss 3.8859 (4.0264)	Entropy 1.89746 (1.89819)	Top-1 acc 34.375 (30.416)	Top-5 acc 55.078 (53.925)	lr 0.02495
Train [4][1550/3239]	Time 2.237 (0.500)	Data Time 0.001 (0.016)	Loss 4.2526 (4.0271)	Entropy 1.89746 (1.89819)	Top-1 acc 24.609 (30.402)	Top-5 acc 50.781 (53.913)	lr 0.02495
Train [4][1560/3239]	Time 0.188 (0.498)	Data Time 0.001 (0.016)	Loss 3.8253 (4.0271)	Entropy 1.89745 (1.89818)	Top-1 acc 35.938 (30.404)	Top-5 acc 57.031 (53.914)	lr 0.02495
Train [4][1570/3239]	Time 0.179 (0.497)	Data Time 0.001 (0.016)	Loss 3.9607 (4.0272)	Entropy 1.89744 (1.89818)	Top-1 acc 31.641 (30.410)	Top-5 acc 55.469 (53.909)	lr 0.02495
Train [4][1580/3239]	Time 0.203 (0.497)	Data Time 0.001 (0.016)	Loss 4.0381 (4.0271)	Entropy 1.89744 (1.89817)	Top-1 acc 28.516 (30.409)	Top-5 acc 50.781 (53.913)	lr 0.02495
Train [4][1590/3239]	Time 0.294 (0.496)	Data Time 0.001 (0.016)	Loss 4.1411 (4.0270)	Entropy 1.89743 (1.89817)	Top-1 acc 28.906 (30.412)	Top-5 acc 52.734 (53.919)	lr 0.02495
Train [4][1600/3239]	Time 0.248 (0.496)	Data Time 0.001 (0.016)	Loss 3.7897 (4.0269)	Entropy 1.89741 (1.89816)	Top-1 acc 35.938 (30.416)	Top-5 acc 57.422 (53.925)	lr 0.02495
Train [4][1610/3239]	Time 0.185 (0.495)	Data Time 0.001 (0.016)	Loss 4.0859 (4.0270)	Entropy 1.89741 (1.89816)	Top-1 acc 29.688 (30.420)	Top-5 acc 51.172 (53.922)	lr 0.02495
Train [4][1620/3239]	Time 0.189 (0.495)	Data Time 0.001 (0.016)	Loss 4.0173 (4.0269)	Entropy 1.89739 (1.89815)	Top-1 acc 30.859 (30.428)	Top-5 acc 55.859 (53.920)	lr 0.02495
Train [4][1630/3239]	Time 0.236 (0.494)	Data Time 0.001 (0.016)	Loss 3.9644 (4.0265)	Entropy 1.89738 (1.89815)	Top-1 acc 30.859 (30.428)	Top-5 acc 54.297 (53.931)	lr 0.02495
Train [4][1640/3239]	Time 0.232 (0.494)	Data Time 0.001 (0.015)	Loss 4.0006 (4.0266)	Entropy 1.89735 (1.89814)	Top-1 acc 29.297 (30.428)	Top-5 acc 51.562 (53.921)	lr 0.02495
Train [4][1650/3239]	Time 0.230 (0.493)	Data Time 0.001 (0.015)	Loss 3.9423 (4.0261)	Entropy 1.89733 (1.89814)	Top-1 acc 32.031 (30.428)	Top-5 acc 55.859 (53.929)	lr 0.02495
Train [4][1660/3239]	Time 2.260 (0.493)	Data Time 0.001 (0.015)	Loss 4.2222 (4.0262)	Entropy 1.89733 (1.89813)	Top-1 acc 26.953 (30.425)	Top-5 acc 48.047 (53.931)	lr 0.02495
Train [4][1670/3239]	Time 0.207 (0.491)	Data Time 0.002 (0.015)	Loss 3.9610 (4.0263)	Entropy 1.89733 (1.89813)	Top-1 acc 30.078 (30.418)	Top-5 acc 54.688 (53.929)	lr 0.02495
Train [4][1680/3239]	Time 0.167 (0.491)	Data Time 0.001 (0.015)	Loss 4.2354 (4.0262)	Entropy 1.89732 (1.89812)	Top-1 acc 29.297 (30.418)	Top-5 acc 50.391 (53.930)	lr 0.02495
Train [4][1690/3239]	Time 0.212 (0.490)	Data Time 0.001 (0.015)	Loss 4.0013 (4.0262)	Entropy 1.89731 (1.89812)	Top-1 acc 30.078 (30.415)	Top-5 acc 55.859 (53.936)	lr 0.02495
Train [4][1700/3239]	Time 0.173 (0.490)	Data Time 0.001 (0.015)	Loss 3.9669 (4.0259)	Entropy 1.89729 (1.89811)	Top-1 acc 29.688 (30.422)	Top-5 acc 54.688 (53.943)	lr 0.02495
Train [4][1710/3239]	Time 0.315 (0.489)	Data Time 0.001 (0.015)	Loss 3.9956 (4.0258)	Entropy 1.89728 (1.89811)	Top-1 acc 30.078 (30.421)	Top-5 acc 54.297 (53.943)	lr 0.02495
Train [4][1720/3239]	Time 0.300 (0.489)	Data Time 0.001 (0.015)	Loss 4.0132 (4.0260)	Entropy 1.89727 (1.89810)	Top-1 acc 30.078 (30.411)	Top-5 acc 55.078 (53.944)	lr 0.02495
Train [4][1730/3239]	Time 0.204 (0.489)	Data Time 0.001 (0.015)	Loss 4.0586 (4.0262)	Entropy 1.89725 (1.89810)	Top-1 acc 28.125 (30.400)	Top-5 acc 51.172 (53.935)	lr 0.02495
Train [4][1740/3239]	Time 0.160 (0.488)	Data Time 0.001 (0.015)	Loss 4.0888 (4.0263)	Entropy 1.89724 (1.89809)	Top-1 acc 28.906 (30.402)	Top-5 acc 53.516 (53.937)	lr 0.02495
Train [4][1750/3239]	Time 0.209 (0.488)	Data Time 0.001 (0.015)	Loss 3.9751 (4.0261)	Entropy 1.89722 (1.89809)	Top-1 acc 31.250 (30.401)	Top-5 acc 55.469 (53.940)	lr 0.02495
Train [4][1760/3239]	Time 0.367 (0.507)	Data Time 0.004 (0.015)	Loss 3.9921 (4.0263)	Entropy 1.89721 (1.89808)	Top-1 acc 29.297 (30.401)	Top-5 acc 56.250 (53.942)	lr 0.02495
Train [4][1770/3239]	Time 2.242 (0.506)	Data Time 0.002 (0.015)	Loss 3.9163 (4.0264)	Entropy 1.89721 (1.89808)	Top-1 acc 35.156 (30.403)	Top-5 acc 57.031 (53.943)	lr 0.02495
Train [4][1780/3239]	Time 0.186 (0.505)	Data Time 0.002 (0.014)	Loss 4.0610 (4.0264)	Entropy 1.89720 (1.89808)	Top-1 acc 30.469 (30.409)	Top-5 acc 53.906 (53.940)	lr 0.02495
Train [4][1790/3239]	Time 0.199 (0.504)	Data Time 0.002 (0.014)	Loss 4.1135 (4.0264)	Entropy 1.89718 (1.89807)	Top-1 acc 27.344 (30.404)	Top-5 acc 50.781 (53.933)	lr 0.02495
Train [4][1800/3239]	Time 0.224 (0.504)	Data Time 0.001 (0.014)	Loss 4.0353 (4.0263)	Entropy 1.89717 (1.89807)	Top-1 acc 31.641 (30.411)	Top-5 acc 52.344 (53.937)	lr 0.02495
Train [4][1810/3239]	Time 0.205 (0.503)	Data Time 0.001 (0.014)	Loss 4.0280 (4.0263)	Entropy 1.89715 (1.89806)	Top-1 acc 27.734 (30.409)	Top-5 acc 53.516 (53.937)	lr 0.02495
Train [4][1820/3239]	Time 0.213 (0.503)	Data Time 0.001 (0.014)	Loss 4.0933 (4.0263)	Entropy 1.89710 (1.89805)	Top-1 acc 28.516 (30.412)	Top-5 acc 51.562 (53.935)	lr 0.02495
Train [4][1830/3239]	Time 0.226 (0.502)	Data Time 0.001 (0.014)	Loss 3.9570 (4.0265)	Entropy 1.89708 (1.89805)	Top-1 acc 29.688 (30.413)	Top-5 acc 53.906 (53.931)	lr 0.02495
Train [4][1840/3239]	Time 0.208 (0.502)	Data Time 0.001 (0.014)	Loss 3.9756 (4.0260)	Entropy 1.89708 (1.89804)	Top-1 acc 27.734 (30.423)	Top-5 acc 58.594 (53.942)	lr 0.02495
Train [4][1850/3239]	Time 0.233 (0.501)	Data Time 0.001 (0.014)	Loss 4.0322 (4.0261)	Entropy 1.89707 (1.89804)	Top-1 acc 30.078 (30.422)	Top-5 acc 52.734 (53.938)	lr 0.02495
Train [4][1860/3239]	Time 0.268 (0.501)	Data Time 0.001 (0.014)	Loss 3.9983 (4.0262)	Entropy 1.89706 (1.89803)	Top-1 acc 30.469 (30.423)	Top-5 acc 58.203 (53.939)	lr 0.02495
Train [4][1870/3239]	Time 0.249 (0.500)	Data Time 0.001 (0.014)	Loss 4.1287 (4.0263)	Entropy 1.89705 (1.89803)	Top-1 acc 27.734 (30.428)	Top-5 acc 48.828 (53.931)	lr 0.02495
Train [4][1880/3239]	Time 2.177 (0.500)	Data Time 0.001 (0.014)	Loss 4.0815 (4.0264)	Entropy 1.89705 (1.89802)	Top-1 acc 29.688 (30.424)	Top-5 acc 55.469 (53.932)	lr 0.02495
Train [4][1890/3239]	Time 0.142 (0.498)	Data Time 0.001 (0.014)	Loss 3.9586 (4.0263)	Entropy 1.89704 (1.89802)	Top-1 acc 30.078 (30.420)	Top-5 acc 53.906 (53.932)	lr 0.02495
Train [4][1900/3239]	Time 0.183 (0.498)	Data Time 0.002 (0.014)	Loss 3.9453 (4.0265)	Entropy 1.89701 (1.89801)	Top-1 acc 31.641 (30.410)	Top-5 acc 54.297 (53.927)	lr 0.02494
Train [4][1910/3239]	Time 0.166 (0.497)	Data Time 0.001 (0.014)	Loss 3.9188 (4.0262)	Entropy 1.89698 (1.89801)	Top-1 acc 34.766 (30.420)	Top-5 acc 54.688 (53.928)	lr 0.02494
Train [4][1920/3239]	Time 0.193 (0.497)	Data Time 0.001 (0.014)	Loss 3.8945 (4.0259)	Entropy 1.89699 (1.89800)	Top-1 acc 32.422 (30.426)	Top-5 acc 56.641 (53.933)	lr 0.02494
Train [4][1930/3239]	Time 0.182 (0.496)	Data Time 0.001 (0.014)	Loss 3.8607 (4.0261)	Entropy 1.89698 (1.89800)	Top-1 acc 32.031 (30.419)	Top-5 acc 59.375 (53.929)	lr 0.02494
Train [4][1940/3239]	Time 0.223 (0.496)	Data Time 0.001 (0.014)	Loss 3.9906 (4.0261)	Entropy 1.89696 (1.89799)	Top-1 acc 30.469 (30.420)	Top-5 acc 52.344 (53.927)	lr 0.02494
Train [4][1950/3239]	Time 0.223 (0.495)	Data Time 0.001 (0.013)	Loss 3.8337 (4.0260)	Entropy 1.89695 (1.89799)	Top-1 acc 33.203 (30.419)	Top-5 acc 56.641 (53.924)	lr 0.02494
Train [4][1960/3239]	Time 0.152 (0.495)	Data Time 0.001 (0.013)	Loss 3.9982 (4.0258)	Entropy 1.89692 (1.89798)	Top-1 acc 30.469 (30.421)	Top-5 acc 53.516 (53.936)	lr 0.02494
Train [4][1970/3239]	Time 0.207 (0.495)	Data Time 0.001 (0.013)	Loss 3.8367 (4.0258)	Entropy 1.89692 (1.89798)	Top-1 acc 37.109 (30.427)	Top-5 acc 59.766 (53.941)	lr 0.02494
Train [4][1980/3239]	Time 0.180 (0.494)	Data Time 0.001 (0.013)	Loss 4.0341 (4.0257)	Entropy 1.89690 (1.89797)	Top-1 acc 33.594 (30.439)	Top-5 acc 53.906 (53.947)	lr 0.02494
Train [4][1990/3239]	Time 2.351 (0.494)	Data Time 0.001 (0.013)	Loss 3.9667 (4.0259)	Entropy 1.89690 (1.89797)	Top-1 acc 33.984 (30.438)	Top-5 acc 57.031 (53.941)	lr 0.02494
Train [4][2000/3239]	Time 0.216 (0.492)	Data Time 0.001 (0.013)	Loss 4.0639 (4.0261)	Entropy 1.89689 (1.89796)	Top-1 acc 30.859 (30.435)	Top-5 acc 53.125 (53.932)	lr 0.02494
Train [4][2010/3239]	Time 0.198 (0.492)	Data Time 0.001 (0.013)	Loss 3.9044 (4.0260)	Entropy 1.89689 (1.89795)	Top-1 acc 35.156 (30.440)	Top-5 acc 57.422 (53.931)	lr 0.02494
Train [4][2020/3239]	Time 0.238 (0.492)	Data Time 0.001 (0.013)	Loss 4.0289 (4.0259)	Entropy 1.89688 (1.89795)	Top-1 acc 33.594 (30.444)	Top-5 acc 50.000 (53.928)	lr 0.02494
Train [4][2030/3239]	Time 0.218 (0.491)	Data Time 0.001 (0.013)	Loss 3.9908 (4.0260)	Entropy 1.89686 (1.89794)	Top-1 acc 32.812 (30.445)	Top-5 acc 53.516 (53.926)	lr 0.02494
Train [4][2040/3239]	Time 0.195 (0.491)	Data Time 0.001 (0.013)	Loss 4.3824 (4.0260)	Entropy 1.89685 (1.89794)	Top-1 acc 24.609 (30.444)	Top-5 acc 44.531 (53.929)	lr 0.02494
Train [4][2050/3239]	Time 0.197 (0.490)	Data Time 0.001 (0.013)	Loss 4.0810 (4.0263)	Entropy 1.89682 (1.89793)	Top-1 acc 26.172 (30.440)	Top-5 acc 49.219 (53.921)	lr 0.02494
Train [4][2060/3239]	Time 0.186 (0.490)	Data Time 0.001 (0.013)	Loss 4.1278 (4.0260)	Entropy 1.89680 (1.89793)	Top-1 acc 30.859 (30.448)	Top-5 acc 55.078 (53.929)	lr 0.02494
Train [4][2070/3239]	Time 0.254 (0.490)	Data Time 0.001 (0.013)	Loss 3.9612 (4.0259)	Entropy 1.89680 (1.89792)	Top-1 acc 33.203 (30.447)	Top-5 acc 55.078 (53.930)	lr 0.02494
Train [4][2080/3239]	Time 0.205 (0.489)	Data Time 0.002 (0.013)	Loss 4.2130 (4.0261)	Entropy 1.89676 (1.89792)	Top-1 acc 25.000 (30.435)	Top-5 acc 48.828 (53.929)	lr 0.02494
Train [4][2090/3239]	Time 0.206 (0.489)	Data Time 0.001 (0.013)	Loss 4.1648 (4.0259)	Entropy 1.89673 (1.89791)	Top-1 acc 26.953 (30.439)	Top-5 acc 51.562 (53.930)	lr 0.02494
Train [4][2100/3239]	Time 2.253 (0.489)	Data Time 0.001 (0.013)	Loss 4.3026 (4.0259)	Entropy 1.89673 (1.89791)	Top-1 acc 26.953 (30.443)	Top-5 acc 46.875 (53.935)	lr 0.02494
Train [4][2110/3239]	Time 0.183 (0.487)	Data Time 0.002 (0.013)	Loss 4.0910 (4.0259)	Entropy 1.89671 (1.89790)	Top-1 acc 31.641 (30.450)	Top-5 acc 52.344 (53.934)	lr 0.02494
Train [4][2120/3239]	Time 0.172 (0.487)	Data Time 0.002 (0.013)	Loss 3.8770 (4.0257)	Entropy 1.89669 (1.89789)	Top-1 acc 30.078 (30.453)	Top-5 acc 59.766 (53.942)	lr 0.02494
Train [4][2130/3239]	Time 0.244 (0.503)	Data Time 0.002 (0.013)	Loss 4.1652 (4.0259)	Entropy 1.89667 (1.89789)	Top-1 acc 32.031 (30.448)	Top-5 acc 55.859 (53.940)	lr 0.02494
Train [4][2140/3239]	Time 0.216 (0.503)	Data Time 0.002 (0.013)	Loss 4.1641 (4.0260)	Entropy 1.89666 (1.89788)	Top-1 acc 30.078 (30.446)	Top-5 acc 53.516 (53.939)	lr 0.02494
Train [4][2150/3239]	Time 0.226 (0.502)	Data Time 0.001 (0.012)	Loss 4.0999 (4.0258)	Entropy 1.89666 (1.89788)	Top-1 acc 34.375 (30.454)	Top-5 acc 53.516 (53.941)	lr 0.02494
Train [4][2160/3239]	Time 0.183 (0.502)	Data Time 0.001 (0.012)	Loss 3.9059 (4.0260)	Entropy 1.89665 (1.89787)	Top-1 acc 32.422 (30.451)	Top-5 acc 55.469 (53.937)	lr 0.02494
Train [4][2170/3239]	Time 0.206 (0.501)	Data Time 0.001 (0.012)	Loss 3.9223 (4.0260)	Entropy 1.89663 (1.89787)	Top-1 acc 30.859 (30.446)	Top-5 acc 53.906 (53.934)	lr 0.02494
Train [4][2180/3239]	Time 0.202 (0.501)	Data Time 0.001 (0.012)	Loss 3.9160 (4.0260)	Entropy 1.89663 (1.89786)	Top-1 acc 34.766 (30.446)	Top-5 acc 57.422 (53.935)	lr 0.02494
Train [4][2190/3239]	Time 0.221 (0.500)	Data Time 0.001 (0.012)	Loss 3.9086 (4.0258)	Entropy 1.89662 (1.89785)	Top-1 acc 30.469 (30.448)	Top-5 acc 57.031 (53.939)	lr 0.02494
Train [4][2200/3239]	Time 0.217 (0.500)	Data Time 0.001 (0.012)	Loss 3.9425 (4.0259)	Entropy 1.89662 (1.89785)	Top-1 acc 35.938 (30.449)	Top-5 acc 54.297 (53.933)	lr 0.02494
Train [4][2210/3239]	Time 2.034 (0.500)	Data Time 0.001 (0.012)	Loss 4.1458 (4.0258)	Entropy 1.89662 (1.89784)	Top-1 acc 28.906 (30.451)	Top-5 acc 50.391 (53.936)	lr 0.02494
Train [4][2220/3239]	Time 0.288 (0.498)	Data Time 0.001 (0.012)	Loss 3.9765 (4.0258)	Entropy 1.89659 (1.89784)	Top-1 acc 30.469 (30.451)	Top-5 acc 51.562 (53.936)	lr 0.02494
Train [4][2230/3239]	Time 0.323 (0.498)	Data Time 0.002 (0.012)	Loss 4.1555 (4.0255)	Entropy 1.89659 (1.89783)	Top-1 acc 32.812 (30.461)	Top-5 acc 51.172 (53.943)	lr 0.02494
Train [4][2240/3239]	Time 0.178 (0.498)	Data Time 0.001 (0.012)	Loss 4.1882 (4.0254)	Entropy 1.89659 (1.89783)	Top-1 acc 28.906 (30.458)	Top-5 acc 49.609 (53.943)	lr 0.02494
Train [4][2250/3239]	Time 0.138 (0.497)	Data Time 0.001 (0.012)	Loss 3.9297 (4.0255)	Entropy 1.89655 (1.89782)	Top-1 acc 33.203 (30.456)	Top-5 acc 53.125 (53.937)	lr 0.02494
Train [4][2260/3239]	Time 0.194 (0.497)	Data Time 0.001 (0.012)	Loss 4.0715 (4.0256)	Entropy 1.89653 (1.89782)	Top-1 acc 29.297 (30.456)	Top-5 acc 52.734 (53.938)	lr 0.02494
Train [4][2270/3239]	Time 0.194 (0.496)	Data Time 0.001 (0.012)	Loss 3.9574 (4.0255)	Entropy 1.89652 (1.89781)	Top-1 acc 27.344 (30.459)	Top-5 acc 55.859 (53.939)	lr 0.02494
Train [4][2280/3239]	Time 0.223 (0.496)	Data Time 0.001 (0.012)	Loss 3.9950 (4.0252)	Entropy 1.89650 (1.89780)	Top-1 acc 30.078 (30.462)	Top-5 acc 56.250 (53.943)	lr 0.02494
Train [4][2290/3239]	Time 0.200 (0.496)	Data Time 0.001 (0.012)	Loss 3.9437 (4.0250)	Entropy 1.89649 (1.89780)	Top-1 acc 28.125 (30.464)	Top-5 acc 57.031 (53.943)	lr 0.02494
Train [4][2300/3239]	Time 0.207 (0.495)	Data Time 0.001 (0.012)	Loss 4.0291 (4.0252)	Entropy 1.89645 (1.89779)	Top-1 acc 32.031 (30.463)	Top-5 acc 51.953 (53.941)	lr 0.02494
Train [4][2310/3239]	Time 0.203 (0.495)	Data Time 0.001 (0.012)	Loss 4.0550 (4.0251)	Entropy 1.89644 (1.89779)	Top-1 acc 29.688 (30.463)	Top-5 acc 53.125 (53.942)	lr 0.02494
Train [4][2320/3239]	Time 2.073 (0.494)	Data Time 0.001 (0.012)	Loss 3.9011 (4.0250)	Entropy 1.89644 (1.89778)	Top-1 acc 31.250 (30.465)	Top-5 acc 57.812 (53.946)	lr 0.02494
Train [4][2330/3239]	Time 0.228 (0.493)	Data Time 0.001 (0.012)	Loss 4.0364 (4.0250)	Entropy 1.89642 (1.89777)	Top-1 acc 31.641 (30.462)	Top-5 acc 52.344 (53.944)	lr 0.02494
Train [4][2340/3239]	Time 0.244 (0.493)	Data Time 0.001 (0.012)	Loss 4.0648 (4.0249)	Entropy 1.89641 (1.89777)	Top-1 acc 25.781 (30.461)	Top-5 acc 53.125 (53.944)	lr 0.02494
Train [4][2350/3239]	Time 0.232 (0.492)	Data Time 0.001 (0.012)	Loss 3.9832 (4.0248)	Entropy 1.89641 (1.89776)	Top-1 acc 30.859 (30.466)	Top-5 acc 55.859 (53.946)	lr 0.02494
Train [4][2360/3239]	Time 0.303 (0.492)	Data Time 0.001 (0.012)	Loss 4.0685 (4.0249)	Entropy 1.89640 (1.89776)	Top-1 acc 28.125 (30.466)	Top-5 acc 54.297 (53.942)	lr 0.02494
Train [4][2370/3239]	Time 0.296 (0.492)	Data Time 0.001 (0.011)	Loss 3.8741 (4.0247)	Entropy 1.89639 (1.89775)	Top-1 acc 34.375 (30.465)	Top-5 acc 57.031 (53.946)	lr 0.02494
Train [4][2380/3239]	Time 0.218 (0.491)	Data Time 0.001 (0.011)	Loss 4.0630 (4.0245)	Entropy 1.89639 (1.89775)	Top-1 acc 28.516 (30.472)	Top-5 acc 49.609 (53.952)	lr 0.02494
Train [4][2390/3239]	Time 0.183 (0.491)	Data Time 0.001 (0.011)	Loss 3.6915 (4.0242)	Entropy 1.89639 (1.89774)	Top-1 acc 39.453 (30.478)	Top-5 acc 64.062 (53.959)	lr 0.02494
Train [4][2400/3239]	Time 0.220 (0.491)	Data Time 0.002 (0.011)	Loss 3.8969 (4.0242)	Entropy 1.89638 (1.89773)	Top-1 acc 32.031 (30.478)	Top-5 acc 58.203 (53.958)	lr 0.02494
Train [4][2410/3239]	Time 0.256 (0.490)	Data Time 0.001 (0.011)	Loss 4.1625 (4.0239)	Entropy 1.89637 (1.89773)	Top-1 acc 23.438 (30.478)	Top-5 acc 51.172 (53.963)	lr 0.02494
Train [4][2420/3239]	Time 0.245 (0.490)	Data Time 0.001 (0.011)	Loss 3.8222 (4.0238)	Entropy 1.89636 (1.89772)	Top-1 acc 33.203 (30.477)	Top-5 acc 59.766 (53.965)	lr 0.02494
Train [4][2430/3239]	Time 2.347 (0.490)	Data Time 0.001 (0.011)	Loss 3.9692 (4.0236)	Entropy 1.89636 (1.89772)	Top-1 acc 32.812 (30.481)	Top-5 acc 53.906 (53.974)	lr 0.02494
Train [4][2440/3239]	Time 0.240 (0.489)	Data Time 0.002 (0.011)	Loss 3.8158 (4.0234)	Entropy 1.89636 (1.89771)	Top-1 acc 31.250 (30.483)	Top-5 acc 61.719 (53.977)	lr 0.02494
Train [4][2450/3239]	Time 0.191 (0.488)	Data Time 0.001 (0.011)	Loss 4.0971 (4.0234)	Entropy 1.89636 (1.89771)	Top-1 acc 30.859 (30.486)	Top-5 acc 53.125 (53.974)	lr 0.02494
Train [4][2460/3239]	Time 0.188 (0.488)	Data Time 0.001 (0.011)	Loss 4.0848 (4.0235)	Entropy 1.89635 (1.89770)	Top-1 acc 26.562 (30.485)	Top-5 acc 53.125 (53.974)	lr 0.02494
Train [4][2470/3239]	Time 0.187 (0.488)	Data Time 0.001 (0.011)	Loss 4.0825 (4.0235)	Entropy 1.89634 (1.89770)	Top-1 acc 32.812 (30.489)	Top-5 acc 52.344 (53.973)	lr 0.02494
Train [4][2480/3239]	Time 0.207 (0.487)	Data Time 0.001 (0.011)	Loss 4.2975 (4.0233)	Entropy 1.89630 (1.89769)	Top-1 acc 25.000 (30.490)	Top-5 acc 45.312 (53.974)	lr 0.02494
Train [4][2490/3239]	Time 0.360 (0.501)	Data Time 0.003 (0.011)	Loss 3.9753 (4.0233)	Entropy 1.89629 (1.89768)	Top-1 acc 32.031 (30.491)	Top-5 acc 53.516 (53.977)	lr 0.02494
Train [4][2500/3239]	Time 0.209 (0.500)	Data Time 0.002 (0.011)	Loss 3.9776 (4.0231)	Entropy 1.89627 (1.89768)	Top-1 acc 30.859 (30.489)	Top-5 acc 54.688 (53.980)	lr 0.02494
Train [4][2510/3239]	Time 0.216 (0.500)	Data Time 0.001 (0.011)	Loss 4.0872 (4.0230)	Entropy 1.89627 (1.89767)	Top-1 acc 34.375 (30.493)	Top-5 acc 52.734 (53.984)	lr 0.02494
Train [4][2520/3239]	Time 0.191 (0.500)	Data Time 0.002 (0.011)	Loss 3.9245 (4.0230)	Entropy 1.89624 (1.89767)	Top-1 acc 29.297 (30.493)	Top-5 acc 55.078 (53.983)	lr 0.02494
Train [4][2530/3239]	Time 0.208 (0.499)	Data Time 0.002 (0.011)	Loss 4.1190 (4.0233)	Entropy 1.89623 (1.89766)	Top-1 acc 31.250 (30.490)	Top-5 acc 51.172 (53.972)	lr 0.02494
Train [4][2540/3239]	Time 2.124 (0.499)	Data Time 0.002 (0.011)	Loss 3.8132 (4.0230)	Entropy 1.89623 (1.89766)	Top-1 acc 32.031 (30.492)	Top-5 acc 57.031 (53.984)	lr 0.02494
Train [4][2550/3239]	Time 0.208 (0.498)	Data Time 0.001 (0.011)	Loss 3.8671 (4.0225)	Entropy 1.89623 (1.89765)	Top-1 acc 30.469 (30.500)	Top-5 acc 55.859 (53.990)	lr 0.02494
Train [4][2560/3239]	Time 0.150 (0.497)	Data Time 0.001 (0.011)	Loss 3.8831 (4.0223)	Entropy 1.89620 (1.89765)	Top-1 acc 31.641 (30.501)	Top-5 acc 53.906 (53.991)	lr 0.02494
Train [4][2570/3239]	Time 0.224 (0.497)	Data Time 0.001 (0.011)	Loss 4.0046 (4.0222)	Entropy 1.89620 (1.89764)	Top-1 acc 29.688 (30.504)	Top-5 acc 56.641 (53.996)	lr 0.02494
Train [4][2580/3239]	Time 0.329 (0.497)	Data Time 0.001 (0.011)	Loss 3.9010 (4.0224)	Entropy 1.89617 (1.89763)	Top-1 acc 30.859 (30.501)	Top-5 acc 53.516 (53.993)	lr 0.02494
Train [4][2590/3239]	Time 0.308 (0.497)	Data Time 0.002 (0.011)	Loss 3.9679 (4.0224)	Entropy 1.89615 (1.89763)	Top-1 acc 32.031 (30.499)	Top-5 acc 55.859 (53.997)	lr 0.02494
Train [4][2600/3239]	Time 0.186 (0.496)	Data Time 0.001 (0.011)	Loss 3.9495 (4.0225)	Entropy 1.89612 (1.89762)	Top-1 acc 32.812 (30.499)	Top-5 acc 55.469 (53.992)	lr 0.02494
Train [4][2610/3239]	Time 0.200 (0.496)	Data Time 0.001 (0.011)	Loss 3.9187 (4.0225)	Entropy 1.89611 (1.89762)	Top-1 acc 32.031 (30.494)	Top-5 acc 57.422 (53.993)	lr 0.02494
Train [4][2620/3239]	Time 0.162 (0.496)	Data Time 0.001 (0.011)	Loss 3.9591 (4.0224)	Entropy 1.89609 (1.89761)	Top-1 acc 32.031 (30.495)	Top-5 acc 51.953 (53.996)	lr 0.02494
Train [4][2630/3239]	Time 0.207 (0.495)	Data Time 0.001 (0.011)	Loss 4.0941 (4.0222)	Entropy 1.89608 (1.89761)	Top-1 acc 25.391 (30.503)	Top-5 acc 51.172 (53.999)	lr 0.02494
Train [4][2640/3239]	Time 0.217 (0.495)	Data Time 0.002 (0.011)	Loss 4.0744 (4.0221)	Entropy 1.89606 (1.89760)	Top-1 acc 28.516 (30.504)	Top-5 acc 51.562 (53.999)	lr 0.02494
Train [4][2650/3239]	Time 0.244 (0.495)	Data Time 0.001 (0.011)	Loss 4.0797 (4.0224)	Entropy 1.89604 (1.89759)	Top-1 acc 26.562 (30.497)	Top-5 acc 50.000 (53.993)	lr 0.02494
Train [4][2660/3239]	Time 0.247 (0.494)	Data Time 0.001 (0.010)	Loss 4.1778 (4.0224)	Entropy 1.89600 (1.89759)	Top-1 acc 26.172 (30.493)	Top-5 acc 47.266 (53.989)	lr 0.02494
Train [4][2670/3239]	Time 0.163 (0.494)	Data Time 0.002 (0.010)	Loss 4.1501 (4.0225)	Entropy 1.89598 (1.89758)	Top-1 acc 28.516 (30.492)	Top-5 acc 53.906 (53.987)	lr 0.02494
Train [4][2680/3239]	Time 0.198 (0.494)	Data Time 0.001 (0.010)	Loss 4.3308 (4.0228)	Entropy 1.89597 (1.89758)	Top-1 acc 25.391 (30.492)	Top-5 acc 51.562 (53.986)	lr 0.02494
Train [4][2690/3239]	Time 0.208 (0.493)	Data Time 0.001 (0.010)	Loss 4.1079 (4.0229)	Entropy 1.89595 (1.89757)	Top-1 acc 28.516 (30.490)	Top-5 acc 53.516 (53.982)	lr 0.02494
Train [4][2700/3239]	Time 0.179 (0.493)	Data Time 0.001 (0.010)	Loss 4.1606 (4.0231)	Entropy 1.89592 (1.89756)	Top-1 acc 27.734 (30.489)	Top-5 acc 51.562 (53.979)	lr 0.02494
Train [4][2710/3239]	Time 0.316 (0.493)	Data Time 0.001 (0.010)	Loss 3.8773 (4.0232)	Entropy 1.89590 (1.89756)	Top-1 acc 32.031 (30.483)	Top-5 acc 57.422 (53.978)	lr 0.02494
Train [4][2720/3239]	Time 0.241 (0.492)	Data Time 0.001 (0.010)	Loss 4.0832 (4.0234)	Entropy 1.89588 (1.89755)	Top-1 acc 30.469 (30.484)	Top-5 acc 49.609 (53.977)	lr 0.02494
Train [4][2730/3239]	Time 0.200 (0.492)	Data Time 0.001 (0.010)	Loss 3.8353 (4.0231)	Entropy 1.89589 (1.89755)	Top-1 acc 34.375 (30.486)	Top-5 acc 58.203 (53.979)	lr 0.02494
Train [4][2740/3239]	Time 0.163 (0.492)	Data Time 0.001 (0.010)	Loss 4.0699 (4.0232)	Entropy 1.89587 (1.89754)	Top-1 acc 28.516 (30.487)	Top-5 acc 54.297 (53.976)	lr 0.02494
Train [4][2750/3239]	Time 0.189 (0.491)	Data Time 0.001 (0.010)	Loss 3.8956 (4.0232)	Entropy 1.89587 (1.89753)	Top-1 acc 29.688 (30.490)	Top-5 acc 57.812 (53.978)	lr 0.02494
Train [4][2760/3239]	Time 0.233 (0.491)	Data Time 0.001 (0.010)	Loss 3.9970 (4.0232)	Entropy 1.89586 (1.89753)	Top-1 acc 33.203 (30.488)	Top-5 acc 54.297 (53.977)	lr 0.02494
Train [4][2770/3239]	Time 0.201 (0.491)	Data Time 0.001 (0.010)	Loss 3.9032 (4.0230)	Entropy 1.89584 (1.89752)	Top-1 acc 36.328 (30.496)	Top-5 acc 55.859 (53.985)	lr 0.02494
Train [4][2780/3239]	Time 0.248 (0.491)	Data Time 0.001 (0.010)	Loss 3.9976 (4.0227)	Entropy 1.89584 (1.89752)	Top-1 acc 33.594 (30.500)	Top-5 acc 51.172 (53.991)	lr 0.02494
Train [4][2790/3239]	Time 0.239 (0.490)	Data Time 0.001 (0.010)	Loss 4.0191 (4.0227)	Entropy 1.89583 (1.89751)	Top-1 acc 33.984 (30.503)	Top-5 acc 55.469 (53.993)	lr 0.02494
Train [4][2800/3239]	Time 0.241 (0.490)	Data Time 0.001 (0.010)	Loss 3.9010 (4.0224)	Entropy 1.89582 (1.89750)	Top-1 acc 34.375 (30.507)	Top-5 acc 57.812 (54.004)	lr 0.02494
Train [4][2810/3239]	Time 0.176 (0.490)	Data Time 0.001 (0.010)	Loss 3.9743 (4.0224)	Entropy 1.89580 (1.89750)	Top-1 acc 31.250 (30.511)	Top-5 acc 55.469 (54.003)	lr 0.02494
Train [4][2820/3239]	Time 0.254 (0.489)	Data Time 0.001 (0.010)	Loss 4.1179 (4.0226)	Entropy 1.89579 (1.89749)	Top-1 acc 29.297 (30.507)	Top-5 acc 51.172 (54.000)	lr 0.02494
Train [4][2830/3239]	Time 0.415 (0.501)	Data Time 0.005 (0.010)	Loss 4.0337 (4.0227)	Entropy 1.89578 (1.89749)	Top-1 acc 31.641 (30.505)	Top-5 acc 53.125 (54.000)	lr 0.02494
Train [4][2840/3239]	Time 0.162 (0.501)	Data Time 0.002 (0.010)	Loss 4.3021 (4.0226)	Entropy 1.89578 (1.89748)	Top-1 acc 26.172 (30.507)	Top-5 acc 48.047 (54.002)	lr 0.02494
Train [4][2850/3239]	Time 0.189 (0.500)	Data Time 0.001 (0.010)	Loss 3.9847 (4.0226)	Entropy 1.89577 (1.89747)	Top-1 acc 31.641 (30.508)	Top-5 acc 55.469 (54.004)	lr 0.02494
Train [4][2860/3239]	Time 0.171 (0.500)	Data Time 0.001 (0.010)	Loss 3.8408 (4.0225)	Entropy 1.89577 (1.89747)	Top-1 acc 34.375 (30.507)	Top-5 acc 57.422 (54.011)	lr 0.02494
Train [4][2870/3239]	Time 0.257 (0.500)	Data Time 0.001 (0.010)	Loss 4.1143 (4.0227)	Entropy 1.89576 (1.89746)	Top-1 acc 25.391 (30.501)	Top-5 acc 50.391 (54.007)	lr 0.02494
Train [4][2880/3239]	Time 0.213 (0.499)	Data Time 0.001 (0.010)	Loss 4.0535 (4.0226)	Entropy 1.89576 (1.89746)	Top-1 acc 32.031 (30.503)	Top-5 acc 51.172 (54.011)	lr 0.02494
Train [4][2890/3239]	Time 0.186 (0.499)	Data Time 0.001 (0.010)	Loss 3.7967 (4.0226)	Entropy 1.89574 (1.89745)	Top-1 acc 34.375 (30.500)	Top-5 acc 61.328 (54.011)	lr 0.02494
Train [4][2900/3239]	Time 0.256 (0.499)	Data Time 0.003 (0.010)	Loss 3.9948 (4.0226)	Entropy 1.89574 (1.89744)	Top-1 acc 32.812 (30.501)	Top-5 acc 56.641 (54.010)	lr 0.02494
Train [4][2910/3239]	Time 0.158 (0.498)	Data Time 0.001 (0.010)	Loss 3.8853 (4.0224)	Entropy 1.89573 (1.89744)	Top-1 acc 32.812 (30.504)	Top-5 acc 60.156 (54.015)	lr 0.02493
Train [4][2920/3239]	Time 0.224 (0.498)	Data Time 0.001 (0.010)	Loss 4.0742 (4.0224)	Entropy 1.89573 (1.89743)	Top-1 acc 28.125 (30.506)	Top-5 acc 52.344 (54.014)	lr 0.02493
Train [4][2930/3239]	Time 0.209 (0.498)	Data Time 0.001 (0.010)	Loss 4.0952 (4.0227)	Entropy 1.89572 (1.89743)	Top-1 acc 30.859 (30.504)	Top-5 acc 53.125 (54.013)	lr 0.02493
Train [4][2940/3239]	Time 0.202 (0.497)	Data Time 0.001 (0.010)	Loss 3.8426 (4.0226)	Entropy 1.89569 (1.89742)	Top-1 acc 30.859 (30.503)	Top-5 acc 58.203 (54.014)	lr 0.02493
Train [4][2950/3239]	Time 0.150 (0.497)	Data Time 0.001 (0.010)	Loss 3.9693 (4.0226)	Entropy 1.89568 (1.89741)	Top-1 acc 33.203 (30.502)	Top-5 acc 53.906 (54.016)	lr 0.02493
Train [4][2960/3239]	Time 0.273 (0.497)	Data Time 0.001 (0.010)	Loss 3.9136 (4.0224)	Entropy 1.89566 (1.89741)	Top-1 acc 31.641 (30.507)	Top-5 acc 58.594 (54.020)	lr 0.02493
Train [4][2970/3239]	Time 0.370 (0.496)	Data Time 0.001 (0.010)	Loss 4.0612 (4.0225)	Entropy 1.89565 (1.89740)	Top-1 acc 28.516 (30.501)	Top-5 acc 51.172 (54.016)	lr 0.02493
Train [4][2980/3239]	Time 0.231 (0.496)	Data Time 0.001 (0.010)	Loss 3.9409 (4.0224)	Entropy 1.89562 (1.89740)	Top-1 acc 32.422 (30.505)	Top-5 acc 55.078 (54.020)	lr 0.02493
Train [4][2990/3239]	Time 0.189 (0.496)	Data Time 0.001 (0.010)	Loss 3.8443 (4.0220)	Entropy 1.89559 (1.89739)	Top-1 acc 35.156 (30.512)	Top-5 acc 55.859 (54.027)	lr 0.02493
Train [4][3000/3239]	Time 0.182 (0.496)	Data Time 0.001 (0.010)	Loss 3.9421 (4.0221)	Entropy 1.89559 (1.89739)	Top-1 acc 32.031 (30.511)	Top-5 acc 53.906 (54.024)	lr 0.02493
Train [4][3010/3239]	Time 0.152 (0.495)	Data Time 0.001 (0.010)	Loss 3.8802 (4.0220)	Entropy 1.89558 (1.89738)	Top-1 acc 36.328 (30.514)	Top-5 acc 59.375 (54.027)	lr 0.02493
Train [4][3020/3239]	Time 0.201 (0.495)	Data Time 0.001 (0.010)	Loss 4.1888 (4.0220)	Entropy 1.89555 (1.89737)	Top-1 acc 31.250 (30.516)	Top-5 acc 48.438 (54.026)	lr 0.02493
Train [4][3030/3239]	Time 0.172 (0.495)	Data Time 0.001 (0.010)	Loss 3.9174 (4.0218)	Entropy 1.89553 (1.89737)	Top-1 acc 30.078 (30.517)	Top-5 acc 55.078 (54.032)	lr 0.02493
Train [4][3040/3239]	Time 0.211 (0.494)	Data Time 0.001 (0.010)	Loss 4.1776 (4.0219)	Entropy 1.89551 (1.89736)	Top-1 acc 25.391 (30.516)	Top-5 acc 52.344 (54.029)	lr 0.02493
Train [4][3050/3239]	Time 0.147 (0.494)	Data Time 0.001 (0.010)	Loss 3.8933 (4.0218)	Entropy 1.89549 (1.89735)	Top-1 acc 32.422 (30.520)	Top-5 acc 57.031 (54.032)	lr 0.02493
Train [4][3060/3239]	Time 0.145 (0.494)	Data Time 0.001 (0.009)	Loss 4.1694 (4.0221)	Entropy 1.89548 (1.89735)	Top-1 acc 25.391 (30.514)	Top-5 acc 50.391 (54.028)	lr 0.02493
Train [4][3070/3239]	Time 0.207 (0.493)	Data Time 0.001 (0.009)	Loss 3.9696 (4.0222)	Entropy 1.89547 (1.89734)	Top-1 acc 32.812 (30.514)	Top-5 acc 53.906 (54.029)	lr 0.02493
Train [4][3080/3239]	Time 0.188 (0.493)	Data Time 0.001 (0.009)	Loss 4.1750 (4.0222)	Entropy 1.89544 (1.89734)	Top-1 acc 28.516 (30.515)	Top-5 acc 48.438 (54.030)	lr 0.02493
Train [4][3090/3239]	Time 0.210 (0.493)	Data Time 0.001 (0.009)	Loss 3.9765 (4.0221)	Entropy 1.89544 (1.89733)	Top-1 acc 30.469 (30.515)	Top-5 acc 57.031 (54.033)	lr 0.02493
Train [4][3100/3239]	Time 0.182 (0.493)	Data Time 0.001 (0.009)	Loss 3.8809 (4.0220)	Entropy 1.89543 (1.89732)	Top-1 acc 35.156 (30.523)	Top-5 acc 59.375 (54.039)	lr 0.02493
Train [4][3110/3239]	Time 0.266 (0.492)	Data Time 0.002 (0.009)	Loss 4.0260 (4.0219)	Entropy 1.89544 (1.89732)	Top-1 acc 26.953 (30.525)	Top-5 acc 55.078 (54.039)	lr 0.02493
Train [4][3120/3239]	Time 0.300 (0.492)	Data Time 0.001 (0.009)	Loss 4.0330 (4.0216)	Entropy 1.89542 (1.89731)	Top-1 acc 30.469 (30.524)	Top-5 acc 54.297 (54.046)	lr 0.02493
Train [4][3130/3239]	Time 0.189 (0.492)	Data Time 0.001 (0.009)	Loss 3.9709 (4.0217)	Entropy 1.89539 (1.89731)	Top-1 acc 26.953 (30.522)	Top-5 acc 54.688 (54.047)	lr 0.02493
Train [4][3140/3239]	Time 0.192 (0.492)	Data Time 0.001 (0.009)	Loss 4.2046 (4.0218)	Entropy 1.89538 (1.89730)	Top-1 acc 23.828 (30.520)	Top-5 acc 50.000 (54.049)	lr 0.02493
Train [4][3150/3239]	Time 0.236 (0.491)	Data Time 0.001 (0.009)	Loss 3.9138 (4.0216)	Entropy 1.89537 (1.89729)	Top-1 acc 32.422 (30.524)	Top-5 acc 56.250 (54.056)	lr 0.02493
Train [4][3160/3239]	Time 0.160 (0.503)	Data Time 0.003 (0.009)	Loss 4.0816 (4.0215)	Entropy 1.89534 (1.89729)	Top-1 acc 30.469 (30.528)	Top-5 acc 52.734 (54.057)	lr 0.02493
Train [4][3170/3239]	Time 0.228 (0.502)	Data Time 0.002 (0.009)	Loss 3.8822 (4.0213)	Entropy 1.89532 (1.89728)	Top-1 acc 33.203 (30.531)	Top-5 acc 58.594 (54.063)	lr 0.02493
Train [4][3180/3239]	Time 0.202 (0.502)	Data Time 0.000 (0.009)	Loss 4.0522 (4.0213)	Entropy 1.89531 (1.89728)	Top-1 acc 30.469 (30.533)	Top-5 acc 51.562 (54.060)	lr 0.02493
Train [4][3190/3239]	Time 0.204 (0.502)	Data Time 0.000 (0.009)	Loss 3.9386 (4.0215)	Entropy 1.89531 (1.89727)	Top-1 acc 33.203 (30.529)	Top-5 acc 53.516 (54.054)	lr 0.02493
Train [4][3200/3239]	Time 0.191 (0.501)	Data Time 0.000 (0.009)	Loss 4.1722 (4.0212)	Entropy 1.89530 (1.89726)	Top-1 acc 27.344 (30.534)	Top-5 acc 50.391 (54.059)	lr 0.02493
Train [4][3210/3239]	Time 0.218 (0.501)	Data Time 0.000 (0.009)	Loss 4.0202 (4.0212)	Entropy 1.89529 (1.89726)	Top-1 acc 29.297 (30.534)	Top-5 acc 55.078 (54.062)	lr 0.02493
Train [4][3220/3239]	Time 0.138 (0.501)	Data Time 0.000 (0.009)	Loss 4.0566 (4.0211)	Entropy 1.89527 (1.89725)	Top-1 acc 32.031 (30.538)	Top-5 acc 51.172 (54.065)	lr 0.02493
Train [4][3230/3239]	Time 0.181 (0.500)	Data Time 0.000 (0.009)	Loss 4.0105 (4.0213)	Entropy 1.89526 (1.89725)	Top-1 acc 30.078 (30.533)	Top-5 acc 55.078 (54.061)	lr 0.02493
Train [4][3239/3239]	Time 1.992 (0.500)	Data Time 0.000 (0.009)	Loss 4.2518 (4.0212)	Entropy 1.89526 (1.89724)	Top-1 acc 27.160 (30.535)	Top-5 acc 48.148 (54.063)	lr 0.02493
==========Valid [4/120]	loss 2.897	top-1 acc 39.300 (39.300)	top-5 acc 64.199	Train top-1 30.535	top-5 54.063	Entropy 1.89526	Latency-None: 0.000ms	Flops: 514.96M
Train [5][0/3239]	Time 21.357 (21.357)	Data Time 20.315 (20.315)	Loss 3.7550 (3.7550)	Entropy 1.89524 (1.89524)	Top-1 acc 37.891 (37.891)	Top-5 acc 58.203 (58.203)	lr 0.02493
Train [5][10/3239]	Time 2.853 (2.535)	Data Time 0.003 (1.885)	Loss 4.0442 (3.9332)	Entropy 1.89524 (1.89524)	Top-1 acc 29.688 (32.457)	Top-5 acc 50.781 (55.043)	lr 0.02493
Train [5][20/3239]	Time 0.195 (1.432)	Data Time 0.001 (0.988)	Loss 3.9761 (3.9744)	Entropy 1.89524 (1.89524)	Top-1 acc 27.734 (31.622)	Top-5 acc 55.859 (54.483)	lr 0.02493
Train [5][30/3239]	Time 0.253 (1.106)	Data Time 0.001 (0.670)	Loss 3.9750 (3.9944)	Entropy 1.89520 (1.89523)	Top-1 acc 34.766 (31.351)	Top-5 acc 54.297 (54.335)	lr 0.02493
Train [5][40/3239]	Time 0.192 (0.938)	Data Time 0.001 (0.507)	Loss 4.0021 (3.9995)	Entropy 1.89519 (1.89522)	Top-1 acc 29.297 (30.955)	Top-5 acc 54.297 (54.154)	lr 0.02493
Train [5][50/3239]	Time 0.263 (0.834)	Data Time 0.001 (0.408)	Loss 4.0779 (4.0045)	Entropy 1.89517 (1.89521)	Top-1 acc 28.125 (30.829)	Top-5 acc 55.859 (53.929)	lr 0.02493
Train [5][60/3239]	Time 0.167 (0.764)	Data Time 0.002 (0.341)	Loss 3.7545 (4.0079)	Entropy 1.89516 (1.89520)	Top-1 acc 37.891 (30.565)	Top-5 acc 58.203 (53.893)	lr 0.02493
Train [5][70/3239]	Time 0.187 (0.714)	Data Time 0.001 (0.294)	Loss 4.0645 (4.0031)	Entropy 1.89515 (1.89520)	Top-1 acc 29.688 (30.683)	Top-5 acc 53.125 (54.033)	lr 0.02493
Train [5][80/3239]	Time 0.162 (0.677)	Data Time 0.002 (0.258)	Loss 3.8551 (4.0027)	Entropy 1.89514 (1.89519)	Top-1 acc 32.422 (30.594)	Top-5 acc 55.469 (54.022)	lr 0.02493
Train [5][90/3239]	Time 0.206 (0.648)	Data Time 0.001 (0.230)	Loss 3.9521 (4.0015)	Entropy 1.89512 (1.89518)	Top-1 acc 34.766 (30.615)	Top-5 acc 58.594 (54.142)	lr 0.02493
Train [5][100/3239]	Time 0.260 (0.625)	Data Time 0.001 (0.207)	Loss 3.9888 (3.9948)	Entropy 1.89510 (1.89518)	Top-1 acc 27.344 (30.743)	Top-5 acc 53.516 (54.289)	lr 0.02493
Train [5][110/3239]	Time 0.330 (0.606)	Data Time 0.001 (0.189)	Loss 3.9786 (4.0000)	Entropy 1.89510 (1.89517)	Top-1 acc 30.469 (30.655)	Top-5 acc 58.984 (54.174)	lr 0.02493
Train [5][120/3239]	Time 2.204 (0.589)	Data Time 0.001 (0.173)	Loss 4.0449 (4.0018)	Entropy 1.89510 (1.89516)	Top-1 acc 29.688 (30.646)	Top-5 acc 54.297 (54.106)	lr 0.02493
Train [5][130/3239]	Time 0.213 (0.560)	Data Time 0.001 (0.160)	Loss 4.0115 (3.9990)	Entropy 1.89511 (1.89516)	Top-1 acc 34.766 (30.752)	Top-5 acc 56.641 (54.276)	lr 0.02493
Train [5][140/3239]	Time 0.197 (0.549)	Data Time 0.001 (0.149)	Loss 3.8206 (4.0000)	Entropy 1.89511 (1.89516)	Top-1 acc 32.031 (30.693)	Top-5 acc 56.250 (54.319)	lr 0.02493
Train [5][150/3239]	Time 0.185 (0.540)	Data Time 0.001 (0.139)	Loss 4.1317 (4.0009)	Entropy 1.89510 (1.89515)	Top-1 acc 27.344 (30.663)	Top-5 acc 53.125 (54.331)	lr 0.02493
Train [5][160/3239]	Time 0.216 (0.531)	Data Time 0.001 (0.131)	Loss 4.3063 (4.0003)	Entropy 1.89509 (1.89515)	Top-1 acc 27.734 (30.724)	Top-5 acc 49.219 (54.372)	lr 0.02493
Train [5][170/3239]	Time 0.237 (0.522)	Data Time 0.002 (0.123)	Loss 4.0961 (3.9982)	Entropy 1.89505 (1.89514)	Top-1 acc 28.516 (30.814)	Top-5 acc 53.516 (54.413)	lr 0.02493
Train [5][180/3239]	Time 0.149 (0.516)	Data Time 0.001 (0.116)	Loss 3.9784 (3.9932)	Entropy 1.89503 (1.89514)	Top-1 acc 28.125 (30.877)	Top-5 acc 55.469 (54.536)	lr 0.02493
Train [5][190/3239]	Time 0.179 (0.510)	Data Time 0.001 (0.111)	Loss 3.8668 (3.9944)	Entropy 1.89502 (1.89513)	Top-1 acc 32.422 (30.847)	Top-5 acc 57.812 (54.561)	lr 0.02493
Train [5][200/3239]	Time 0.210 (0.506)	Data Time 0.002 (0.105)	Loss 3.9436 (3.9921)	Entropy 1.89500 (1.89513)	Top-1 acc 32.812 (30.912)	Top-5 acc 57.422 (54.600)	lr 0.02493
Train [5][210/3239]	Time 0.275 (0.501)	Data Time 0.001 (0.101)	Loss 3.9630 (3.9909)	Entropy 1.89498 (1.89512)	Top-1 acc 35.547 (30.920)	Top-5 acc 55.469 (54.645)	lr 0.02493
Train [5][220/3239]	Time 0.309 (0.497)	Data Time 0.001 (0.096)	Loss 3.8564 (3.9928)	Entropy 1.89496 (1.89511)	Top-1 acc 32.422 (30.921)	Top-5 acc 55.469 (54.615)	lr 0.02493
Train [5][230/3239]	Time 2.067 (0.492)	Data Time 0.001 (0.092)	Loss 3.9378 (3.9894)	Entropy 1.89496 (1.89511)	Top-1 acc 32.422 (30.957)	Top-5 acc 53.906 (54.691)	lr 0.02493
Train [5][240/3239]	Time 0.193 (0.480)	Data Time 0.002 (0.088)	Loss 3.9221 (3.9881)	Entropy 1.89494 (1.89510)	Top-1 acc 33.984 (31.013)	Top-5 acc 57.812 (54.735)	lr 0.02493
Train [5][250/3239]	Time 0.247 (0.478)	Data Time 0.001 (0.085)	Loss 4.0557 (3.9871)	Entropy 1.89491 (1.89509)	Top-1 acc 30.469 (31.015)	Top-5 acc 52.344 (54.720)	lr 0.02493
Train [5][260/3239]	Time 0.173 (0.474)	Data Time 0.001 (0.082)	Loss 3.9913 (3.9843)	Entropy 1.89488 (1.89508)	Top-1 acc 28.906 (31.076)	Top-5 acc 57.422 (54.798)	lr 0.02493
Train [5][270/3239]	Time 0.142 (0.472)	Data Time 0.001 (0.079)	Loss 3.8913 (3.9853)	Entropy 1.89487 (1.89508)	Top-1 acc 31.250 (31.041)	Top-5 acc 56.641 (54.790)	lr 0.02493
Train [5][280/3239]	Time 0.265 (0.581)	Data Time 0.003 (0.076)	Loss 3.9231 (3.9868)	Entropy 1.89486 (1.89507)	Top-1 acc 30.859 (30.996)	Top-5 acc 56.250 (54.736)	lr 0.02493
Train [5][290/3239]	Time 0.166 (0.579)	Data Time 0.002 (0.074)	Loss 4.1568 (3.9890)	Entropy 1.89481 (1.89506)	Top-1 acc 25.781 (30.990)	Top-5 acc 50.391 (54.677)	lr 0.02493
Train [5][300/3239]	Time 0.182 (0.573)	Data Time 0.002 (0.072)	Loss 4.1415 (3.9899)	Entropy 1.89479 (1.89505)	Top-1 acc 29.688 (30.989)	Top-5 acc 51.562 (54.660)	lr 0.02493
Train [5][310/3239]	Time 0.204 (0.568)	Data Time 0.001 (0.069)	Loss 4.0389 (3.9888)	Entropy 1.89477 (1.89504)	Top-1 acc 33.594 (31.043)	Top-5 acc 54.297 (54.719)	lr 0.02493
Train [5][320/3239]	Time 0.177 (0.563)	Data Time 0.001 (0.067)	Loss 3.9727 (3.9875)	Entropy 1.89473 (1.89504)	Top-1 acc 33.203 (31.121)	Top-5 acc 54.688 (54.765)	lr 0.02493
Train [5][330/3239]	Time 0.208 (0.558)	Data Time 0.002 (0.065)	Loss 4.1826 (3.9868)	Entropy 1.89472 (1.89503)	Top-1 acc 26.953 (31.088)	Top-5 acc 46.875 (54.781)	lr 0.02493
Train [5][340/3239]	Time 2.293 (0.554)	Data Time 0.001 (0.064)	Loss 3.7013 (3.9853)	Entropy 1.89472 (1.89502)	Top-1 acc 32.812 (31.124)	Top-5 acc 63.281 (54.831)	lr 0.02493
Train [5][350/3239]	Time 0.199 (0.544)	Data Time 0.001 (0.062)	Loss 3.9686 (3.9872)	Entropy 1.89471 (1.89501)	Top-1 acc 26.953 (31.100)	Top-5 acc 55.078 (54.778)	lr 0.02493
Train [5][360/3239]	Time 0.191 (0.541)	Data Time 0.001 (0.060)	Loss 3.9690 (3.9861)	Entropy 1.89466 (1.89500)	Top-1 acc 33.594 (31.108)	Top-5 acc 56.250 (54.796)	lr 0.02493
Train [5][370/3239]	Time 0.275 (0.537)	Data Time 0.001 (0.059)	Loss 4.2206 (3.9877)	Entropy 1.89465 (1.89499)	Top-1 acc 29.297 (31.095)	Top-5 acc 49.609 (54.766)	lr 0.02493
Train [5][380/3239]	Time 0.297 (0.534)	Data Time 0.002 (0.057)	Loss 3.9651 (3.9877)	Entropy 1.89463 (1.89498)	Top-1 acc 32.422 (31.073)	Top-5 acc 56.641 (54.772)	lr 0.02493
Train [5][390/3239]	Time 0.191 (0.531)	Data Time 0.002 (0.056)	Loss 3.9889 (3.9883)	Entropy 1.89461 (1.89497)	Top-1 acc 29.297 (31.023)	Top-5 acc 54.688 (54.760)	lr 0.02493
Train [5][400/3239]	Time 0.151 (0.528)	Data Time 0.001 (0.055)	Loss 4.1936 (3.9899)	Entropy 1.89459 (1.89496)	Top-1 acc 25.781 (30.981)	Top-5 acc 50.781 (54.746)	lr 0.02493
Train [5][410/3239]	Time 0.178 (0.525)	Data Time 0.001 (0.053)	Loss 3.8114 (3.9895)	Entropy 1.89458 (1.89495)	Top-1 acc 30.078 (31.009)	Top-5 acc 60.156 (54.751)	lr 0.02493
Train [5][420/3239]	Time 0.146 (0.522)	Data Time 0.001 (0.052)	Loss 4.0209 (3.9900)	Entropy 1.89457 (1.89494)	Top-1 acc 30.859 (30.998)	Top-5 acc 52.344 (54.757)	lr 0.02493
Train [5][430/3239]	Time 0.195 (0.519)	Data Time 0.001 (0.051)	Loss 4.0787 (3.9896)	Entropy 1.89455 (1.89493)	Top-1 acc 30.859 (31.013)	Top-5 acc 55.859 (54.765)	lr 0.02493
Train [5][440/3239]	Time 0.139 (0.517)	Data Time 0.001 (0.050)	Loss 3.8241 (3.9886)	Entropy 1.89452 (1.89493)	Top-1 acc 35.156 (31.025)	Top-5 acc 58.203 (54.794)	lr 0.02493
Train [5][450/3239]	Time 2.170 (0.514)	Data Time 0.001 (0.049)	Loss 4.1357 (3.9891)	Entropy 1.89452 (1.89492)	Top-1 acc 30.469 (31.026)	Top-5 acc 51.953 (54.785)	lr 0.02493
Train [5][460/3239]	Time 0.210 (0.507)	Data Time 0.001 (0.048)	Loss 3.9575 (3.9900)	Entropy 1.89452 (1.89491)	Top-1 acc 32.422 (30.998)	Top-5 acc 55.078 (54.738)	lr 0.02493
Train [5][470/3239]	Time 0.183 (0.505)	Data Time 0.001 (0.047)	Loss 4.1212 (3.9904)	Entropy 1.89448 (1.89490)	Top-1 acc 32.422 (30.985)	Top-5 acc 51.562 (54.726)	lr 0.02493
Train [5][480/3239]	Time 0.184 (0.503)	Data Time 0.002 (0.046)	Loss 4.0500 (3.9901)	Entropy 1.89445 (1.89489)	Top-1 acc 29.297 (30.987)	Top-5 acc 55.469 (54.737)	lr 0.02493
Train [5][490/3239]	Time 0.216 (0.501)	Data Time 0.002 (0.045)	Loss 4.0068 (3.9896)	Entropy 1.89443 (1.89488)	Top-1 acc 31.641 (31.012)	Top-5 acc 53.906 (54.769)	lr 0.02493
Train [5][500/3239]	Time 0.311 (0.500)	Data Time 0.001 (0.044)	Loss 3.9732 (3.9888)	Entropy 1.89443 (1.89487)	Top-1 acc 32.812 (31.041)	Top-5 acc 53.125 (54.783)	lr 0.02493
Train [5][510/3239]	Time 0.215 (0.498)	Data Time 0.001 (0.043)	Loss 4.0402 (3.9886)	Entropy 1.89441 (1.89486)	Top-1 acc 33.594 (31.057)	Top-5 acc 54.297 (54.778)	lr 0.02493
Train [5][520/3239]	Time 0.201 (0.496)	Data Time 0.001 (0.043)	Loss 3.9244 (3.9893)	Entropy 1.89441 (1.89485)	Top-1 acc 32.422 (31.027)	Top-5 acc 55.469 (54.768)	lr 0.02493
Train [5][530/3239]	Time 0.219 (0.494)	Data Time 0.001 (0.042)	Loss 3.9365 (3.9893)	Entropy 1.89439 (1.89485)	Top-1 acc 33.984 (31.043)	Top-5 acc 55.469 (54.755)	lr 0.02493
Train [5][540/3239]	Time 0.201 (0.493)	Data Time 0.001 (0.041)	Loss 3.8799 (3.9892)	Entropy 1.89437 (1.89484)	Top-1 acc 33.984 (31.051)	Top-5 acc 58.984 (54.745)	lr 0.02493
Train [5][550/3239]	Time 0.172 (0.491)	Data Time 0.001 (0.040)	Loss 4.0784 (3.9893)	Entropy 1.89435 (1.89483)	Top-1 acc 28.906 (31.053)	Top-5 acc 52.734 (54.756)	lr 0.02493
Train [5][560/3239]	Time 2.312 (0.490)	Data Time 0.001 (0.040)	Loss 3.8657 (3.9892)	Entropy 1.89435 (1.89482)	Top-1 acc 32.422 (31.051)	Top-5 acc 59.375 (54.748)	lr 0.02493
Train [5][570/3239]	Time 0.207 (0.485)	Data Time 0.001 (0.039)	Loss 3.7952 (3.9890)	Entropy 1.89434 (1.89481)	Top-1 acc 34.375 (31.054)	Top-5 acc 56.641 (54.733)	lr 0.02493
Train [5][580/3239]	Time 0.212 (0.484)	Data Time 0.001 (0.038)	Loss 3.9115 (3.9895)	Entropy 1.89431 (1.89480)	Top-1 acc 31.250 (31.034)	Top-5 acc 58.203 (54.716)	lr 0.02493
Train [5][590/3239]	Time 0.169 (0.482)	Data Time 0.001 (0.038)	Loss 4.1513 (3.9896)	Entropy 1.89430 (1.89480)	Top-1 acc 29.688 (31.038)	Top-5 acc 49.609 (54.722)	lr 0.02493
Train [5][600/3239]	Time 0.222 (0.481)	Data Time 0.002 (0.037)	Loss 3.8121 (3.9890)	Entropy 1.89429 (1.89479)	Top-1 acc 35.156 (31.062)	Top-5 acc 58.594 (54.741)	lr 0.02493
Train [5][610/3239]	Time 0.207 (0.480)	Data Time 0.001 (0.037)	Loss 3.8726 (3.9881)	Entropy 1.89428 (1.89478)	Top-1 acc 32.422 (31.074)	Top-5 acc 60.156 (54.776)	lr 0.02492
Train [5][620/3239]	Time 0.187 (0.479)	Data Time 0.001 (0.036)	Loss 4.0315 (3.9888)	Entropy 1.89427 (1.89477)	Top-1 acc 28.516 (31.042)	Top-5 acc 55.078 (54.762)	lr 0.02492
Train [5][630/3239]	Time 0.214 (0.478)	Data Time 0.002 (0.036)	Loss 4.2209 (3.9889)	Entropy 1.89426 (1.89476)	Top-1 acc 28.516 (31.040)	Top-5 acc 50.391 (54.765)	lr 0.02492
Train [5][640/3239]	Time 0.304 (0.531)	Data Time 0.003 (0.035)	Loss 4.0670 (3.9898)	Entropy 1.89425 (1.89475)	Top-1 acc 30.078 (31.029)	Top-5 acc 55.078 (54.748)	lr 0.02492
Train [5][650/3239]	Time 0.178 (0.529)	Data Time 0.002 (0.035)	Loss 3.9783 (3.9886)	Entropy 1.89422 (1.89475)	Top-1 acc 28.516 (31.050)	Top-5 acc 55.859 (54.782)	lr 0.02492
Train [5][660/3239]	Time 0.202 (0.528)	Data Time 0.001 (0.034)	Loss 3.9876 (3.9889)	Entropy 1.89422 (1.89474)	Top-1 acc 33.203 (31.043)	Top-5 acc 54.688 (54.768)	lr 0.02492
Train [5][670/3239]	Time 2.163 (0.526)	Data Time 0.001 (0.034)	Loss 4.2466 (3.9890)	Entropy 1.89422 (1.89473)	Top-1 acc 24.219 (31.047)	Top-5 acc 49.609 (54.766)	lr 0.02492
Train [5][680/3239]	Time 0.199 (0.521)	Data Time 0.001 (0.033)	Loss 4.1610 (3.9895)	Entropy 1.89420 (1.89472)	Top-1 acc 24.609 (31.031)	Top-5 acc 49.219 (54.748)	lr 0.02492
Train [5][690/3239]	Time 0.212 (0.520)	Data Time 0.001 (0.033)	Loss 3.9201 (3.9895)	Entropy 1.89420 (1.89472)	Top-1 acc 29.297 (31.010)	Top-5 acc 57.031 (54.764)	lr 0.02492
Train [5][700/3239]	Time 0.141 (0.518)	Data Time 0.001 (0.032)	Loss 4.0103 (3.9890)	Entropy 1.89418 (1.89471)	Top-1 acc 32.422 (31.014)	Top-5 acc 53.906 (54.765)	lr 0.02492
Train [5][710/3239]	Time 0.220 (0.517)	Data Time 0.001 (0.032)	Loss 4.1812 (3.9889)	Entropy 1.89416 (1.89470)	Top-1 acc 28.906 (31.034)	Top-5 acc 49.219 (54.769)	lr 0.02492
Train [5][720/3239]	Time 0.211 (0.515)	Data Time 0.001 (0.032)	Loss 3.8939 (3.9894)	Entropy 1.89413 (1.89469)	Top-1 acc 35.938 (31.033)	Top-5 acc 57.812 (54.770)	lr 0.02492
Train [5][730/3239]	Time 0.198 (0.514)	Data Time 0.001 (0.031)	Loss 4.0217 (3.9896)	Entropy 1.89413 (1.89468)	Top-1 acc 28.906 (31.030)	Top-5 acc 52.344 (54.770)	lr 0.02492
Train [5][740/3239]	Time 0.221 (0.512)	Data Time 0.001 (0.031)	Loss 3.8225 (3.9887)	Entropy 1.89412 (1.89468)	Top-1 acc 35.938 (31.045)	Top-5 acc 58.594 (54.791)	lr 0.02492
Train [5][750/3239]	Time 0.188 (0.511)	Data Time 0.001 (0.030)	Loss 3.8754 (3.9895)	Entropy 1.89411 (1.89467)	Top-1 acc 35.156 (31.033)	Top-5 acc 57.812 (54.774)	lr 0.02492
Train [5][760/3239]	Time 0.314 (0.509)	Data Time 0.001 (0.030)	Loss 4.0474 (3.9893)	Entropy 1.89411 (1.89466)	Top-1 acc 34.375 (31.037)	Top-5 acc 53.516 (54.778)	lr 0.02492
Train [5][770/3239]	Time 0.372 (0.508)	Data Time 0.001 (0.030)	Loss 4.0015 (3.9896)	Entropy 1.89408 (1.89466)	Top-1 acc 28.906 (31.049)	Top-5 acc 53.906 (54.771)	lr 0.02492
Train [5][780/3239]	Time 2.238 (0.507)	Data Time 0.001 (0.029)	Loss 4.0829 (3.9893)	Entropy 1.89408 (1.89465)	Top-1 acc 28.125 (31.054)	Top-5 acc 52.344 (54.776)	lr 0.02492
Train [5][790/3239]	Time 0.225 (0.503)	Data Time 0.001 (0.029)	Loss 4.1230 (3.9886)	Entropy 1.89406 (1.89464)	Top-1 acc 28.516 (31.064)	Top-5 acc 50.391 (54.791)	lr 0.02492
Train [5][800/3239]	Time 0.234 (0.502)	Data Time 0.001 (0.029)	Loss 3.8797 (3.9881)	Entropy 1.89405 (1.89463)	Top-1 acc 32.031 (31.078)	Top-5 acc 56.641 (54.809)	lr 0.02492
Train [5][810/3239]	Time 0.159 (0.501)	Data Time 0.001 (0.028)	Loss 3.8658 (3.9879)	Entropy 1.89404 (1.89463)	Top-1 acc 35.156 (31.082)	Top-5 acc 55.469 (54.816)	lr 0.02492
Train [5][820/3239]	Time 0.199 (0.500)	Data Time 0.001 (0.028)	Loss 3.9888 (3.9888)	Entropy 1.89401 (1.89462)	Top-1 acc 27.344 (31.070)	Top-5 acc 55.469 (54.790)	lr 0.02492
Train [5][830/3239]	Time 0.187 (0.499)	Data Time 0.001 (0.028)	Loss 3.8573 (3.9878)	Entropy 1.89400 (1.89461)	Top-1 acc 35.938 (31.095)	Top-5 acc 57.422 (54.812)	lr 0.02492
Train [5][840/3239]	Time 0.141 (0.498)	Data Time 0.001 (0.027)	Loss 3.7984 (3.9872)	Entropy 1.89399 (1.89460)	Top-1 acc 32.422 (31.101)	Top-5 acc 58.203 (54.830)	lr 0.02492
Train [5][850/3239]	Time 0.133 (0.497)	Data Time 0.001 (0.027)	Loss 3.9830 (3.9872)	Entropy 1.89396 (1.89460)	Top-1 acc 32.422 (31.097)	Top-5 acc 57.422 (54.829)	lr 0.02492
Train [5][860/3239]	Time 0.188 (0.496)	Data Time 0.001 (0.027)	Loss 4.1575 (3.9880)	Entropy 1.89395 (1.89459)	Top-1 acc 29.297 (31.088)	Top-5 acc 50.000 (54.816)	lr 0.02492
Train [5][870/3239]	Time 0.265 (0.495)	Data Time 0.005 (0.027)	Loss 3.9623 (3.9881)	Entropy 1.89393 (1.89458)	Top-1 acc 29.297 (31.085)	Top-5 acc 57.812 (54.835)	lr 0.02492
Train [5][880/3239]	Time 0.291 (0.494)	Data Time 0.001 (0.026)	Loss 4.0809 (3.9887)	Entropy 1.89391 (1.89457)	Top-1 acc 25.000 (31.066)	Top-5 acc 51.562 (54.817)	lr 0.02492
Train [5][890/3239]	Time 2.154 (0.493)	Data Time 0.001 (0.026)	Loss 3.9420 (3.9882)	Entropy 1.89391 (1.89457)	Top-1 acc 34.375 (31.084)	Top-5 acc 56.250 (54.830)	lr 0.02492
Train [5][900/3239]	Time 0.200 (0.490)	Data Time 0.001 (0.026)	Loss 3.9769 (3.9880)	Entropy 1.89390 (1.89456)	Top-1 acc 30.469 (31.080)	Top-5 acc 54.688 (54.837)	lr 0.02492
Train [5][910/3239]	Time 0.221 (0.489)	Data Time 0.002 (0.026)	Loss 4.0001 (3.9882)	Entropy 1.89388 (1.89455)	Top-1 acc 29.688 (31.087)	Top-5 acc 54.297 (54.841)	lr 0.02492
Train [5][920/3239]	Time 0.159 (0.488)	Data Time 0.001 (0.025)	Loss 3.9565 (3.9884)	Entropy 1.89386 (1.89454)	Top-1 acc 33.203 (31.089)	Top-5 acc 55.469 (54.831)	lr 0.02492
Train [5][930/3239]	Time 0.183 (0.487)	Data Time 0.001 (0.025)	Loss 4.0423 (3.9887)	Entropy 1.89385 (1.89454)	Top-1 acc 30.469 (31.102)	Top-5 acc 53.125 (54.826)	lr 0.02492
Train [5][940/3239]	Time 0.221 (0.487)	Data Time 0.002 (0.025)	Loss 4.0150 (3.9888)	Entropy 1.89381 (1.89453)	Top-1 acc 30.469 (31.100)	Top-5 acc 53.906 (54.823)	lr 0.02492
Train [5][950/3239]	Time 0.224 (0.486)	Data Time 0.002 (0.025)	Loss 3.7788 (3.9890)	Entropy 1.89376 (1.89452)	Top-1 acc 37.500 (31.104)	Top-5 acc 62.891 (54.821)	lr 0.02492
Train [5][960/3239]	Time 0.155 (0.485)	Data Time 0.001 (0.024)	Loss 4.0407 (3.9891)	Entropy 1.89374 (1.89451)	Top-1 acc 28.906 (31.097)	Top-5 acc 55.859 (54.825)	lr 0.02492
Train [5][970/3239]	Time 0.149 (0.484)	Data Time 0.001 (0.024)	Loss 3.8594 (3.9893)	Entropy 1.89373 (1.89451)	Top-1 acc 31.250 (31.095)	Top-5 acc 58.594 (54.818)	lr 0.02492
Train [5][980/3239]	Time 0.158 (0.483)	Data Time 0.001 (0.024)	Loss 4.1966 (3.9899)	Entropy 1.89370 (1.89450)	Top-1 acc 25.781 (31.075)	Top-5 acc 51.172 (54.811)	lr 0.02492
Train [5][990/3239]	Time 0.225 (0.482)	Data Time 0.002 (0.024)	Loss 4.0192 (3.9903)	Entropy 1.89367 (1.89449)	Top-1 acc 32.031 (31.086)	Top-5 acc 56.641 (54.803)	lr 0.02492
Train [5][1000/3239]	Time 32.852 (0.512)	Data Time 0.002 (0.024)	Loss 3.8106 (3.9895)	Entropy 1.89367 (1.89448)	Top-1 acc 35.547 (31.109)	Top-5 acc 56.250 (54.817)	lr 0.02492
Train [5][1010/3239]	Time 0.324 (0.510)	Data Time 0.003 (0.023)	Loss 4.0217 (3.9888)	Entropy 1.89366 (1.89447)	Top-1 acc 29.688 (31.126)	Top-5 acc 54.297 (54.831)	lr 0.02492
Train [5][1020/3239]	Time 0.191 (0.510)	Data Time 0.002 (0.023)	Loss 3.9061 (3.9882)	Entropy 1.89363 (1.89447)	Top-1 acc 37.500 (31.148)	Top-5 acc 56.250 (54.849)	lr 0.02492
Train [5][1030/3239]	Time 0.196 (0.509)	Data Time 0.002 (0.023)	Loss 4.0166 (3.9889)	Entropy 1.89360 (1.89446)	Top-1 acc 26.562 (31.137)	Top-5 acc 53.516 (54.831)	lr 0.02492
Train [5][1040/3239]	Time 0.241 (0.508)	Data Time 0.001 (0.023)	Loss 3.8768 (3.9880)	Entropy 1.89359 (1.89445)	Top-1 acc 34.375 (31.159)	Top-5 acc 56.641 (54.842)	lr 0.02492
Train [5][1050/3239]	Time 0.252 (0.508)	Data Time 0.001 (0.023)	Loss 3.9767 (3.9881)	Entropy 1.89357 (1.89444)	Top-1 acc 32.422 (31.159)	Top-5 acc 53.125 (54.832)	lr 0.02492
Train [5][1060/3239]	Time 0.182 (0.507)	Data Time 0.001 (0.022)	Loss 3.9027 (3.9888)	Entropy 1.89355 (1.89443)	Top-1 acc 34.375 (31.155)	Top-5 acc 54.297 (54.819)	lr 0.02492
Train [5][1070/3239]	Time 0.231 (0.506)	Data Time 0.001 (0.022)	Loss 3.9591 (3.9886)	Entropy 1.89354 (1.89442)	Top-1 acc 29.688 (31.156)	Top-5 acc 51.562 (54.830)	lr 0.02492
Train [5][1080/3239]	Time 0.237 (0.505)	Data Time 0.001 (0.022)	Loss 3.8528 (3.9886)	Entropy 1.89352 (1.89442)	Top-1 acc 35.547 (31.165)	Top-5 acc 57.422 (54.824)	lr 0.02492
Train [5][1090/3239]	Time 0.199 (0.504)	Data Time 0.001 (0.022)	Loss 4.2064 (3.9884)	Entropy 1.89350 (1.89441)	Top-1 acc 25.000 (31.163)	Top-5 acc 50.391 (54.824)	lr 0.02492
Train [5][1100/3239]	Time 0.268 (0.503)	Data Time 0.001 (0.022)	Loss 4.0577 (3.9889)	Entropy 1.89349 (1.89440)	Top-1 acc 25.781 (31.147)	Top-5 acc 52.344 (54.812)	lr 0.02492
Train [5][1110/3239]	Time 2.382 (0.502)	Data Time 0.001 (0.021)	Loss 3.9885 (3.9892)	Entropy 1.89349 (1.89439)	Top-1 acc 28.906 (31.136)	Top-5 acc 53.125 (54.805)	lr 0.02492
Train [5][1120/3239]	Time 0.290 (0.500)	Data Time 0.001 (0.021)	Loss 4.0700 (3.9889)	Entropy 1.89349 (1.89438)	Top-1 acc 28.125 (31.141)	Top-5 acc 53.906 (54.814)	lr 0.02492
Train [5][1130/3239]	Time 0.211 (0.499)	Data Time 0.001 (0.021)	Loss 4.3091 (3.9889)	Entropy 1.89350 (1.89437)	Top-1 acc 26.953 (31.144)	Top-5 acc 46.875 (54.808)	lr 0.02492
Train [5][1140/3239]	Time 0.268 (0.498)	Data Time 0.001 (0.021)	Loss 3.7577 (3.9881)	Entropy 1.89349 (1.89437)	Top-1 acc 35.156 (31.156)	Top-5 acc 57.422 (54.826)	lr 0.02492
Train [5][1150/3239]	Time 0.141 (0.497)	Data Time 0.001 (0.021)	Loss 3.9232 (3.9878)	Entropy 1.89348 (1.89436)	Top-1 acc 32.812 (31.166)	Top-5 acc 54.688 (54.835)	lr 0.02492
Train [5][1160/3239]	Time 0.164 (0.497)	Data Time 0.001 (0.021)	Loss 4.0159 (3.9881)	Entropy 1.89344 (1.89435)	Top-1 acc 28.125 (31.163)	Top-5 acc 51.953 (54.826)	lr 0.02492
Train [5][1170/3239]	Time 0.172 (0.496)	Data Time 0.001 (0.021)	Loss 4.0786 (3.9875)	Entropy 1.89340 (1.89434)	Top-1 acc 31.250 (31.167)	Top-5 acc 50.000 (54.841)	lr 0.02492
Train [5][1180/3239]	Time 0.237 (0.495)	Data Time 0.001 (0.020)	Loss 3.8434 (3.9869)	Entropy 1.89338 (1.89434)	Top-1 acc 34.766 (31.172)	Top-5 acc 56.250 (54.848)	lr 0.02492
Train [5][1190/3239]	Time 0.206 (0.494)	Data Time 0.001 (0.020)	Loss 3.9202 (3.9867)	Entropy 1.89338 (1.89433)	Top-1 acc 33.594 (31.170)	Top-5 acc 57.422 (54.850)	lr 0.02492
Train [5][1200/3239]	Time 0.185 (0.493)	Data Time 0.001 (0.020)	Loss 3.9326 (3.9864)	Entropy 1.89333 (1.89432)	Top-1 acc 30.859 (31.164)	Top-5 acc 55.859 (54.858)	lr 0.02492
Train [5][1210/3239]	Time 0.140 (0.493)	Data Time 0.001 (0.020)	Loss 4.0407 (3.9863)	Entropy 1.89330 (1.89431)	Top-1 acc 27.734 (31.166)	Top-5 acc 53.125 (54.853)	lr 0.02492
Train [5][1220/3239]	Time 2.265 (0.492)	Data Time 0.001 (0.020)	Loss 3.9012 (3.9863)	Entropy 1.89330 (1.89430)	Top-1 acc 33.984 (31.174)	Top-5 acc 55.859 (54.851)	lr 0.02492
Train [5][1230/3239]	Time 0.341 (0.490)	Data Time 0.001 (0.020)	Loss 3.8847 (3.9862)	Entropy 1.89331 (1.89430)	Top-1 acc 32.812 (31.168)	Top-5 acc 59.766 (54.854)	lr 0.02492
Train [5][1240/3239]	Time 0.196 (0.489)	Data Time 0.001 (0.019)	Loss 3.9721 (3.9863)	Entropy 1.89327 (1.89429)	Top-1 acc 31.641 (31.171)	Top-5 acc 55.078 (54.851)	lr 0.02492
Train [5][1250/3239]	Time 0.141 (0.489)	Data Time 0.001 (0.019)	Loss 4.0376 (3.9863)	Entropy 1.89325 (1.89428)	Top-1 acc 29.297 (31.171)	Top-5 acc 50.000 (54.851)	lr 0.02492
Train [5][1260/3239]	Time 0.230 (0.488)	Data Time 0.002 (0.019)	Loss 3.8857 (3.9863)	Entropy 1.89322 (1.89427)	Top-1 acc 31.250 (31.171)	Top-5 acc 56.250 (54.846)	lr 0.02492
Train [5][1270/3239]	Time 0.221 (0.487)	Data Time 0.001 (0.019)	Loss 3.8860 (3.9861)	Entropy 1.89318 (1.89426)	Top-1 acc 29.297 (31.176)	Top-5 acc 57.812 (54.848)	lr 0.02492
Train [5][1280/3239]	Time 0.203 (0.487)	Data Time 0.001 (0.019)	Loss 4.0782 (3.9867)	Entropy 1.89315 (1.89425)	Top-1 acc 30.469 (31.175)	Top-5 acc 50.000 (54.839)	lr 0.02492
Train [5][1290/3239]	Time 0.187 (0.486)	Data Time 0.001 (0.019)	Loss 3.8746 (3.9865)	Entropy 1.89313 (1.89424)	Top-1 acc 30.859 (31.175)	Top-5 acc 57.422 (54.842)	lr 0.02492
Train [5][1300/3239]	Time 0.144 (0.486)	Data Time 0.001 (0.019)	Loss 4.0478 (3.9867)	Entropy 1.89313 (1.89424)	Top-1 acc 33.203 (31.173)	Top-5 acc 56.250 (54.839)	lr 0.02492
Train [5][1310/3239]	Time 0.222 (0.485)	Data Time 0.002 (0.019)	Loss 3.8694 (3.9865)	Entropy 1.89311 (1.89423)	Top-1 acc 34.766 (31.175)	Top-5 acc 57.812 (54.839)	lr 0.02492
Train [5][1320/3239]	Time 0.212 (0.484)	Data Time 0.001 (0.018)	Loss 3.8515 (3.9865)	Entropy 1.89309 (1.89422)	Top-1 acc 36.719 (31.182)	Top-5 acc 60.938 (54.848)	lr 0.02492
Train [5][1330/3239]	Time 2.271 (0.484)	Data Time 0.001 (0.018)	Loss 3.9311 (3.9868)	Entropy 1.89309 (1.89421)	Top-1 acc 31.250 (31.180)	Top-5 acc 53.906 (54.840)	lr 0.02492
Train [5][1340/3239]	Time 0.235 (0.482)	Data Time 0.001 (0.018)	Loss 3.9846 (3.9866)	Entropy 1.89305 (1.89420)	Top-1 acc 34.375 (31.190)	Top-5 acc 55.469 (54.847)	lr 0.02492
Train [5][1350/3239]	Time 0.291 (0.481)	Data Time 0.001 (0.018)	Loss 3.9345 (3.9866)	Entropy 1.89301 (1.89419)	Top-1 acc 31.641 (31.191)	Top-5 acc 55.859 (54.843)	lr 0.02492
Train [5][1360/3239]	Time 0.196 (0.481)	Data Time 0.001 (0.018)	Loss 4.0647 (3.9868)	Entropy 1.89301 (1.89418)	Top-1 acc 32.812 (31.190)	Top-5 acc 54.688 (54.842)	lr 0.02492
Train [5][1370/3239]	Time 0.205 (0.506)	Data Time 0.002 (0.018)	Loss 4.0883 (3.9867)	Entropy 1.89296 (1.89418)	Top-1 acc 29.297 (31.192)	Top-5 acc 50.000 (54.842)	lr 0.02492
Train [5][1380/3239]	Time 0.227 (0.505)	Data Time 0.002 (0.018)	Loss 3.7340 (3.9863)	Entropy 1.89294 (1.89417)	Top-1 acc 37.891 (31.198)	Top-5 acc 62.500 (54.849)	lr 0.02492
Train [5][1390/3239]	Time 0.213 (0.504)	Data Time 0.002 (0.018)	Loss 3.8243 (3.9863)	Entropy 1.89291 (1.89416)	Top-1 acc 33.203 (31.190)	Top-5 acc 58.594 (54.844)	lr 0.02492
Train [5][1400/3239]	Time 0.211 (0.504)	Data Time 0.001 (0.017)	Loss 3.9800 (3.9858)	Entropy 1.89289 (1.89415)	Top-1 acc 28.906 (31.201)	Top-5 acc 55.078 (54.860)	lr 0.02492
Train [5][1410/3239]	Time 0.233 (0.503)	Data Time 0.001 (0.017)	Loss 4.0456 (3.9854)	Entropy 1.89287 (1.89414)	Top-1 acc 33.594 (31.211)	Top-5 acc 57.031 (54.870)	lr 0.02492
Train [5][1420/3239]	Time 0.221 (0.503)	Data Time 0.001 (0.017)	Loss 4.0012 (3.9853)	Entropy 1.89285 (1.89413)	Top-1 acc 32.422 (31.213)	Top-5 acc 56.250 (54.870)	lr 0.02492
Train [5][1430/3239]	Time 0.194 (0.502)	Data Time 0.002 (0.017)	Loss 3.8685 (3.9853)	Entropy 1.89282 (1.89412)	Top-1 acc 34.375 (31.210)	Top-5 acc 57.031 (54.867)	lr 0.02492
Train [5][1440/3239]	Time 2.292 (0.502)	Data Time 0.001 (0.017)	Loss 3.8373 (3.9853)	Entropy 1.89282 (1.89411)	Top-1 acc 36.328 (31.212)	Top-5 acc 57.812 (54.866)	lr 0.02492
Train [5][1450/3239]	Time 0.156 (0.500)	Data Time 0.001 (0.017)	Loss 3.9156 (3.9850)	Entropy 1.89283 (1.89410)	Top-1 acc 36.328 (31.214)	Top-5 acc 57.031 (54.868)	lr 0.02492
Train [5][1460/3239]	Time 0.328 (0.499)	Data Time 0.001 (0.017)	Loss 4.0146 (3.9853)	Entropy 1.89279 (1.89410)	Top-1 acc 34.766 (31.212)	Top-5 acc 53.125 (54.866)	lr 0.02492
Train [5][1470/3239]	Time 0.306 (0.498)	Data Time 0.001 (0.017)	Loss 3.7245 (3.9852)	Entropy 1.89278 (1.89409)	Top-1 acc 38.672 (31.214)	Top-5 acc 58.594 (54.867)	lr 0.02492
Train [5][1480/3239]	Time 0.185 (0.498)	Data Time 0.001 (0.017)	Loss 4.0048 (3.9851)	Entropy 1.89275 (1.89408)	Top-1 acc 29.297 (31.213)	Top-5 acc 54.688 (54.873)	lr 0.02492
Train [5][1490/3239]	Time 0.191 (0.497)	Data Time 0.001 (0.017)	Loss 4.0343 (3.9852)	Entropy 1.89274 (1.89407)	Top-1 acc 28.125 (31.211)	Top-5 acc 53.906 (54.865)	lr 0.02491
Train [5][1500/3239]	Time 0.195 (0.497)	Data Time 0.002 (0.016)	Loss 3.8995 (3.9847)	Entropy 1.89274 (1.89406)	Top-1 acc 31.250 (31.222)	Top-5 acc 57.031 (54.877)	lr 0.02491
Train [5][1510/3239]	Time 0.152 (0.496)	Data Time 0.001 (0.016)	Loss 4.3791 (3.9849)	Entropy 1.89273 (1.89405)	Top-1 acc 22.656 (31.220)	Top-5 acc 46.094 (54.871)	lr 0.02491
Train [5][1520/3239]	Time 0.184 (0.496)	Data Time 0.002 (0.016)	Loss 3.9592 (3.9847)	Entropy 1.89272 (1.89404)	Top-1 acc 31.250 (31.221)	Top-5 acc 56.641 (54.876)	lr 0.02491
Train [5][1530/3239]	Time 0.166 (0.495)	Data Time 0.001 (0.016)	Loss 3.8688 (3.9847)	Entropy 1.89271 (1.89403)	Top-1 acc 33.594 (31.220)	Top-5 acc 58.984 (54.880)	lr 0.02491
Train [5][1540/3239]	Time 0.247 (0.494)	Data Time 0.001 (0.016)	Loss 3.8809 (3.9841)	Entropy 1.89270 (1.89402)	Top-1 acc 31.641 (31.229)	Top-5 acc 57.422 (54.896)	lr 0.02491
Train [5][1550/3239]	Time 2.121 (0.494)	Data Time 0.001 (0.016)	Loss 4.0998 (3.9840)	Entropy 1.89270 (1.89402)	Top-1 acc 25.781 (31.227)	Top-5 acc 52.734 (54.904)	lr 0.02491
Train [5][1560/3239]	Time 0.188 (0.492)	Data Time 0.001 (0.016)	Loss 4.0877 (3.9839)	Entropy 1.89267 (1.89401)	Top-1 acc 28.516 (31.227)	Top-5 acc 51.953 (54.907)	lr 0.02491
Train [5][1570/3239]	Time 0.203 (0.492)	Data Time 0.001 (0.016)	Loss 3.9149 (3.9837)	Entropy 1.89265 (1.89400)	Top-1 acc 28.516 (31.227)	Top-5 acc 59.766 (54.912)	lr 0.02491
Train [5][1580/3239]	Time 0.212 (0.491)	Data Time 0.001 (0.016)	Loss 4.0254 (3.9839)	Entropy 1.89262 (1.89399)	Top-1 acc 27.734 (31.219)	Top-5 acc 51.953 (54.903)	lr 0.02491
Train [5][1590/3239]	Time 0.196 (0.490)	Data Time 0.001 (0.016)	Loss 3.9446 (3.9839)	Entropy 1.89259 (1.89398)	Top-1 acc 30.859 (31.217)	Top-5 acc 58.203 (54.904)	lr 0.02491
Train [5][1600/3239]	Time 0.138 (0.490)	Data Time 0.001 (0.016)	Loss 3.8016 (3.9836)	Entropy 1.89257 (1.89397)	Top-1 acc 34.375 (31.221)	Top-5 acc 56.641 (54.911)	lr 0.02491
Train [5][1610/3239]	Time 0.315 (0.490)	Data Time 0.001 (0.016)	Loss 4.0280 (3.9839)	Entropy 1.89254 (1.89396)	Top-1 acc 30.859 (31.219)	Top-5 acc 50.391 (54.904)	lr 0.02491
Train [5][1620/3239]	Time 0.196 (0.489)	Data Time 0.001 (0.015)	Loss 3.8707 (3.9838)	Entropy 1.89251 (1.89396)	Top-1 acc 34.766 (31.224)	Top-5 acc 55.469 (54.906)	lr 0.02491
Train [5][1630/3239]	Time 0.199 (0.489)	Data Time 0.001 (0.015)	Loss 3.9676 (3.9842)	Entropy 1.89249 (1.89395)	Top-1 acc 34.766 (31.215)	Top-5 acc 56.250 (54.901)	lr 0.02491
Train [5][1640/3239]	Time 0.227 (0.488)	Data Time 0.001 (0.015)	Loss 3.8441 (3.9844)	Entropy 1.89247 (1.89394)	Top-1 acc 33.594 (31.209)	Top-5 acc 59.375 (54.901)	lr 0.02491
Train [5][1650/3239]	Time 0.185 (0.488)	Data Time 0.002 (0.015)	Loss 4.0002 (3.9847)	Entropy 1.89247 (1.89393)	Top-1 acc 30.078 (31.207)	Top-5 acc 57.031 (54.900)	lr 0.02491
Train [5][1660/3239]	Time 2.360 (0.487)	Data Time 0.001 (0.015)	Loss 3.8428 (3.9845)	Entropy 1.89247 (1.89392)	Top-1 acc 31.250 (31.203)	Top-5 acc 59.766 (54.905)	lr 0.02491
Train [5][1670/3239]	Time 0.159 (0.486)	Data Time 0.001 (0.015)	Loss 4.1092 (3.9847)	Entropy 1.89243 (1.89391)	Top-1 acc 27.344 (31.200)	Top-5 acc 52.344 (54.900)	lr 0.02491
Train [5][1680/3239]	Time 0.202 (0.485)	Data Time 0.001 (0.015)	Loss 3.8629 (3.9846)	Entropy 1.89242 (1.89390)	Top-1 acc 36.719 (31.206)	Top-5 acc 57.422 (54.905)	lr 0.02491
Train [5][1690/3239]	Time 0.228 (0.485)	Data Time 0.001 (0.015)	Loss 4.0100 (3.9849)	Entropy 1.89237 (1.89389)	Top-1 acc 34.766 (31.203)	Top-5 acc 55.859 (54.898)	lr 0.02491
Train [5][1700/3239]	Time 0.190 (0.484)	Data Time 0.001 (0.015)	Loss 3.9932 (3.9852)	Entropy 1.89234 (1.89388)	Top-1 acc 29.297 (31.201)	Top-5 acc 55.078 (54.897)	lr 0.02491
Train [5][1710/3239]	Time 0.288 (0.484)	Data Time 0.001 (0.015)	Loss 4.0566 (3.9851)	Entropy 1.89232 (1.89387)	Top-1 acc 30.078 (31.204)	Top-5 acc 55.078 (54.903)	lr 0.02491
Train [5][1720/3239]	Time 0.305 (0.483)	Data Time 0.001 (0.015)	Loss 4.1752 (3.9854)	Entropy 1.89231 (1.89387)	Top-1 acc 25.781 (31.203)	Top-5 acc 53.125 (54.895)	lr 0.02491
Train [5][1730/3239]	Time 0.310 (0.502)	Data Time 0.002 (0.015)	Loss 3.9601 (3.9852)	Entropy 1.89232 (1.89386)	Top-1 acc 33.594 (31.215)	Top-5 acc 55.078 (54.902)	lr 0.02491
Train [5][1740/3239]	Time 0.199 (0.502)	Data Time 0.002 (0.015)	Loss 3.9623 (3.9853)	Entropy 1.89230 (1.89385)	Top-1 acc 34.766 (31.217)	Top-5 acc 55.859 (54.898)	lr 0.02491
Train [5][1750/3239]	Time 0.281 (0.501)	Data Time 0.002 (0.015)	Loss 3.9824 (3.9850)	Entropy 1.89229 (1.89384)	Top-1 acc 32.031 (31.225)	Top-5 acc 57.422 (54.906)	lr 0.02491
Train [5][1760/3239]	Time 0.215 (0.501)	Data Time 0.001 (0.014)	Loss 3.9111 (3.9853)	Entropy 1.89228 (1.89383)	Top-1 acc 33.203 (31.217)	Top-5 acc 57.031 (54.898)	lr 0.02491
Train [5][1770/3239]	Time 2.101 (0.500)	Data Time 0.001 (0.014)	Loss 4.0547 (3.9850)	Entropy 1.89228 (1.89382)	Top-1 acc 29.297 (31.220)	Top-5 acc 50.781 (54.899)	lr 0.02491
Train [5][1780/3239]	Time 0.146 (0.499)	Data Time 0.001 (0.014)	Loss 3.8521 (3.9852)	Entropy 1.89227 (1.89381)	Top-1 acc 30.859 (31.218)	Top-5 acc 58.594 (54.894)	lr 0.02491
Train [5][1790/3239]	Time 0.178 (0.498)	Data Time 0.002 (0.014)	Loss 4.0181 (3.9850)	Entropy 1.89224 (1.89380)	Top-1 acc 30.078 (31.220)	Top-5 acc 51.953 (54.894)	lr 0.02491
Train [5][1800/3239]	Time 0.228 (0.498)	Data Time 0.001 (0.014)	Loss 3.9925 (3.9852)	Entropy 1.89222 (1.89380)	Top-1 acc 30.469 (31.226)	Top-5 acc 56.641 (54.890)	lr 0.02491
Train [5][1810/3239]	Time 0.217 (0.497)	Data Time 0.001 (0.014)	Loss 4.1076 (3.9851)	Entropy 1.89217 (1.89379)	Top-1 acc 28.516 (31.227)	Top-5 acc 51.953 (54.894)	lr 0.02491
Train [5][1820/3239]	Time 0.162 (0.497)	Data Time 0.001 (0.014)	Loss 3.8497 (3.9847)	Entropy 1.89211 (1.89378)	Top-1 acc 33.203 (31.229)	Top-5 acc 57.812 (54.902)	lr 0.02491
Train [5][1830/3239]	Time 0.353 (0.496)	Data Time 0.001 (0.014)	Loss 4.0284 (3.9848)	Entropy 1.89211 (1.89377)	Top-1 acc 26.172 (31.226)	Top-5 acc 51.172 (54.899)	lr 0.02491
Train [5][1840/3239]	Time 0.268 (0.496)	Data Time 0.002 (0.014)	Loss 3.9409 (3.9848)	Entropy 1.89211 (1.89376)	Top-1 acc 30.078 (31.228)	Top-5 acc 54.297 (54.899)	lr 0.02491
Train [5][1850/3239]	Time 0.192 (0.496)	Data Time 0.001 (0.014)	Loss 4.0980 (3.9848)	Entropy 1.89211 (1.89375)	Top-1 acc 30.078 (31.224)	Top-5 acc 50.391 (54.899)	lr 0.02491
Train [5][1860/3239]	Time 0.203 (0.495)	Data Time 0.001 (0.014)	Loss 4.2368 (3.9853)	Entropy 1.89208 (1.89374)	Top-1 acc 28.125 (31.219)	Top-5 acc 48.047 (54.888)	lr 0.02491
Train [5][1870/3239]	Time 0.177 (0.495)	Data Time 0.001 (0.014)	Loss 4.1544 (3.9856)	Entropy 1.89206 (1.89373)	Top-1 acc 32.031 (31.220)	Top-5 acc 50.781 (54.886)	lr 0.02491
Train [5][1880/3239]	Time 2.173 (0.494)	Data Time 0.002 (0.014)	Loss 3.9885 (3.9857)	Entropy 1.89206 (1.89372)	Top-1 acc 29.297 (31.217)	Top-5 acc 53.125 (54.885)	lr 0.02491
Train [5][1890/3239]	Time 0.221 (0.493)	Data Time 0.002 (0.014)	Loss 4.0012 (3.9863)	Entropy 1.89205 (1.89371)	Top-1 acc 26.172 (31.203)	Top-5 acc 54.688 (54.874)	lr 0.02491
Train [5][1900/3239]	Time 0.233 (0.492)	Data Time 0.001 (0.014)	Loss 3.9497 (3.9864)	Entropy 1.89203 (1.89371)	Top-1 acc 31.250 (31.204)	Top-5 acc 56.641 (54.874)	lr 0.02491
Train [5][1910/3239]	Time 0.253 (0.492)	Data Time 0.002 (0.014)	Loss 3.8669 (3.9861)	Entropy 1.89201 (1.89370)	Top-1 acc 32.422 (31.215)	Top-5 acc 57.031 (54.885)	lr 0.02491
Train [5][1920/3239]	Time 0.221 (0.491)	Data Time 0.001 (0.013)	Loss 3.8477 (3.9860)	Entropy 1.89200 (1.89369)	Top-1 acc 31.641 (31.217)	Top-5 acc 58.203 (54.880)	lr 0.02491
Train [5][1930/3239]	Time 0.215 (0.491)	Data Time 0.001 (0.013)	Loss 4.0177 (3.9860)	Entropy 1.89198 (1.89368)	Top-1 acc 30.469 (31.215)	Top-5 acc 55.469 (54.885)	lr 0.02491
Train [5][1940/3239]	Time 0.217 (0.491)	Data Time 0.002 (0.013)	Loss 3.9363 (3.9859)	Entropy 1.89196 (1.89367)	Top-1 acc 27.734 (31.217)	Top-5 acc 56.641 (54.888)	lr 0.02491
Train [5][1950/3239]	Time 0.311 (0.490)	Data Time 0.001 (0.013)	Loss 3.8887 (3.9858)	Entropy 1.89191 (1.89366)	Top-1 acc 30.469 (31.215)	Top-5 acc 58.594 (54.893)	lr 0.02491
Train [5][1960/3239]	Time 0.217 (0.490)	Data Time 0.001 (0.013)	Loss 3.8734 (3.9859)	Entropy 1.89187 (1.89365)	Top-1 acc 34.766 (31.210)	Top-5 acc 53.906 (54.890)	lr 0.02491
Train [5][1970/3239]	Time 0.203 (0.489)	Data Time 0.001 (0.013)	Loss 3.8647 (3.9857)	Entropy 1.89184 (1.89364)	Top-1 acc 35.156 (31.213)	Top-5 acc 58.594 (54.894)	lr 0.02491
Train [5][1980/3239]	Time 0.159 (0.489)	Data Time 0.002 (0.013)	Loss 4.0736 (3.9861)	Entropy 1.89177 (1.89363)	Top-1 acc 27.734 (31.208)	Top-5 acc 51.172 (54.884)	lr 0.02491
Train [5][1990/3239]	Time 2.160 (0.489)	Data Time 0.002 (0.013)	Loss 3.8391 (3.9861)	Entropy 1.89177 (1.89362)	Top-1 acc 31.641 (31.212)	Top-5 acc 58.984 (54.886)	lr 0.02491
Train [5][2000/3239]	Time 0.240 (0.487)	Data Time 0.002 (0.013)	Loss 3.9690 (3.9861)	Entropy 1.89178 (1.89362)	Top-1 acc 31.641 (31.211)	Top-5 acc 53.125 (54.880)	lr 0.02491
Train [5][2010/3239]	Time 0.261 (0.487)	Data Time 0.002 (0.013)	Loss 4.0006 (3.9861)	Entropy 1.89177 (1.89361)	Top-1 acc 32.812 (31.217)	Top-5 acc 55.469 (54.878)	lr 0.02491
Train [5][2020/3239]	Time 0.201 (0.487)	Data Time 0.001 (0.013)	Loss 3.8977 (3.9863)	Entropy 1.89177 (1.89360)	Top-1 acc 34.766 (31.219)	Top-5 acc 55.469 (54.873)	lr 0.02491
Train [5][2030/3239]	Time 0.151 (0.486)	Data Time 0.001 (0.013)	Loss 3.9730 (3.9863)	Entropy 1.89171 (1.89359)	Top-1 acc 33.594 (31.224)	Top-5 acc 55.859 (54.873)	lr 0.02491
Train [5][2040/3239]	Time 0.181 (0.486)	Data Time 0.001 (0.013)	Loss 3.9027 (3.9860)	Entropy 1.89170 (1.89358)	Top-1 acc 31.641 (31.227)	Top-5 acc 57.812 (54.878)	lr 0.02491
Train [5][2050/3239]	Time 0.162 (0.485)	Data Time 0.001 (0.013)	Loss 3.9905 (3.9858)	Entropy 1.89168 (1.89357)	Top-1 acc 29.688 (31.227)	Top-5 acc 60.156 (54.881)	lr 0.02491
Train [5][2060/3239]	Time 0.306 (0.485)	Data Time 0.002 (0.013)	Loss 3.9508 (3.9858)	Entropy 1.89165 (1.89356)	Top-1 acc 30.469 (31.228)	Top-5 acc 54.688 (54.877)	lr 0.02491
Train [5][2070/3239]	Time 0.276 (0.485)	Data Time 0.001 (0.013)	Loss 4.1146 (3.9858)	Entropy 1.89164 (1.89355)	Top-1 acc 30.078 (31.230)	Top-5 acc 56.250 (54.883)	lr 0.02491
Train [5][2080/3239]	Time 0.264 (0.484)	Data Time 0.001 (0.013)	Loss 3.8564 (3.9856)	Entropy 1.89164 (1.89354)	Top-1 acc 35.156 (31.235)	Top-5 acc 55.469 (54.886)	lr 0.02491
Train [5][2090/3239]	Time 0.289 (0.501)	Data Time 0.004 (0.013)	Loss 3.9010 (3.9853)	Entropy 1.89161 (1.89353)	Top-1 acc 33.203 (31.236)	Top-5 acc 54.297 (54.890)	lr 0.02491
Train [5][2100/3239]	Time 2.365 (0.501)	Data Time 0.002 (0.013)	Loss 4.0854 (3.9856)	Entropy 1.89161 (1.89352)	Top-1 acc 29.688 (31.230)	Top-5 acc 53.516 (54.882)	lr 0.02491
Train [5][2110/3239]	Time 0.213 (0.500)	Data Time 0.001 (0.013)	Loss 3.8884 (3.9856)	Entropy 1.89158 (1.89351)	Top-1 acc 35.938 (31.232)	Top-5 acc 56.641 (54.885)	lr 0.02491
Train [5][2120/3239]	Time 0.209 (0.499)	Data Time 0.002 (0.012)	Loss 3.8729 (3.9855)	Entropy 1.89154 (1.89351)	Top-1 acc 35.156 (31.235)	Top-5 acc 56.641 (54.884)	lr 0.02491
Train [5][2130/3239]	Time 0.252 (0.499)	Data Time 0.001 (0.012)	Loss 3.8178 (3.9857)	Entropy 1.89152 (1.89350)	Top-1 acc 35.156 (31.230)	Top-5 acc 55.078 (54.879)	lr 0.02491
Train [5][2140/3239]	Time 0.214 (0.499)	Data Time 0.002 (0.012)	Loss 3.9205 (3.9857)	Entropy 1.89152 (1.89349)	Top-1 acc 33.203 (31.232)	Top-5 acc 53.906 (54.878)	lr 0.02491
Train [5][2150/3239]	Time 0.180 (0.499)	Data Time 0.003 (0.012)	Loss 4.3409 (3.9862)	Entropy 1.89149 (1.89348)	Top-1 acc 23.828 (31.221)	Top-5 acc 46.875 (54.868)	lr 0.02491
Train [5][2160/3239]	Time 0.416 (0.499)	Data Time 0.002 (0.012)	Loss 3.9586 (3.9863)	Entropy 1.89147 (1.89347)	Top-1 acc 31.250 (31.215)	Top-5 acc 55.078 (54.865)	lr 0.02491
Train [5][2170/3239]	Time 0.194 (0.498)	Data Time 0.001 (0.012)	Loss 3.8130 (3.9857)	Entropy 1.89146 (1.89346)	Top-1 acc 33.594 (31.225)	Top-5 acc 59.766 (54.879)	lr 0.02491
Train [5][2180/3239]	Time 0.157 (0.498)	Data Time 0.001 (0.012)	Loss 3.9715 (3.9854)	Entropy 1.89142 (1.89345)	Top-1 acc 33.984 (31.231)	Top-5 acc 54.688 (54.886)	lr 0.02491
Train [5][2190/3239]	Time 0.198 (0.498)	Data Time 0.002 (0.012)	Loss 3.9831 (3.9853)	Entropy 1.89137 (1.89344)	Top-1 acc 33.203 (31.237)	Top-5 acc 51.953 (54.888)	lr 0.02491
Train [5][2200/3239]	Time 0.188 (0.498)	Data Time 0.001 (0.012)	Loss 3.9866 (3.9854)	Entropy 1.89136 (1.89343)	Top-1 acc 30.078 (31.238)	Top-5 acc 57.422 (54.881)	lr 0.02491
Train [5][2210/3239]	Time 2.175 (0.497)	Data Time 0.001 (0.012)	Loss 3.9971 (3.9853)	Entropy 1.89136 (1.89342)	Top-1 acc 32.812 (31.243)	Top-5 acc 59.375 (54.885)	lr 0.02491
Train [5][2220/3239]	Time 0.214 (0.496)	Data Time 0.001 (0.012)	Loss 4.0061 (3.9853)	Entropy 1.89131 (1.89341)	Top-1 acc 30.469 (31.242)	Top-5 acc 53.906 (54.885)	lr 0.02491
Train [5][2230/3239]	Time 0.143 (0.496)	Data Time 0.001 (0.012)	Loss 3.8510 (3.9854)	Entropy 1.89128 (1.89340)	Top-1 acc 32.422 (31.244)	Top-5 acc 58.594 (54.881)	lr 0.02491
Train [5][2240/3239]	Time 0.202 (0.495)	Data Time 0.002 (0.012)	Loss 3.8520 (3.9851)	Entropy 1.89128 (1.89339)	Top-1 acc 34.375 (31.248)	Top-5 acc 59.375 (54.886)	lr 0.02491
Train [5][2250/3239]	Time 0.173 (0.495)	Data Time 0.001 (0.012)	Loss 4.0251 (3.9847)	Entropy 1.89128 (1.89338)	Top-1 acc 31.641 (31.257)	Top-5 acc 51.953 (54.897)	lr 0.02491
Train [5][2260/3239]	Time 0.257 (0.494)	Data Time 0.001 (0.012)	Loss 4.0291 (3.9850)	Entropy 1.89126 (1.89337)	Top-1 acc 30.469 (31.248)	Top-5 acc 54.688 (54.895)	lr 0.02491
Train [5][2270/3239]	Time 0.167 (0.494)	Data Time 0.001 (0.012)	Loss 4.2717 (3.9850)	Entropy 1.89125 (1.89337)	Top-1 acc 25.781 (31.245)	Top-5 acc 50.781 (54.894)	lr 0.02491
Train [5][2280/3239]	Time 0.275 (0.494)	Data Time 0.001 (0.012)	Loss 3.9945 (3.9849)	Entropy 1.89124 (1.89336)	Top-1 acc 32.812 (31.247)	Top-5 acc 56.641 (54.891)	lr 0.02491
Train [5][2290/3239]	Time 0.198 (0.493)	Data Time 0.002 (0.012)	Loss 3.9140 (3.9847)	Entropy 1.89122 (1.89335)	Top-1 acc 30.469 (31.249)	Top-5 acc 59.375 (54.904)	lr 0.02491
Train [5][2300/3239]	Time 0.202 (0.493)	Data Time 0.002 (0.012)	Loss 4.1111 (3.9849)	Entropy 1.89118 (1.89334)	Top-1 acc 30.859 (31.246)	Top-5 acc 54.688 (54.897)	lr 0.02491
Train [5][2310/3239]	Time 0.158 (0.493)	Data Time 0.001 (0.012)	Loss 4.0047 (3.9847)	Entropy 1.89118 (1.89333)	Top-1 acc 31.250 (31.243)	Top-5 acc 53.906 (54.900)	lr 0.02490
Train [5][2320/3239]	Time 2.238 (0.492)	Data Time 0.001 (0.012)	Loss 4.0982 (3.9848)	Entropy 1.89118 (1.89332)	Top-1 acc 30.859 (31.245)	Top-5 acc 51.953 (54.898)	lr 0.02490
Train [5][2330/3239]	Time 0.215 (0.491)	Data Time 0.001 (0.012)	Loss 4.0329 (3.9846)	Entropy 1.89117 (1.89331)	Top-1 acc 29.688 (31.251)	Top-5 acc 53.516 (54.901)	lr 0.02490
Train [5][2340/3239]	Time 0.183 (0.491)	Data Time 0.001 (0.012)	Loss 4.0384 (3.9844)	Entropy 1.89115 (1.89330)	Top-1 acc 27.344 (31.252)	Top-5 acc 53.516 (54.904)	lr 0.02490
Train [5][2350/3239]	Time 0.162 (0.490)	Data Time 0.001 (0.012)	Loss 4.0398 (3.9842)	Entropy 1.89111 (1.89329)	Top-1 acc 30.859 (31.258)	Top-5 acc 53.906 (54.910)	lr 0.02490
Train [5][2360/3239]	Time 0.139 (0.490)	Data Time 0.001 (0.011)	Loss 4.1725 (3.9845)	Entropy 1.89110 (1.89328)	Top-1 acc 26.953 (31.253)	Top-5 acc 51.562 (54.907)	lr 0.02490
Train [5][2370/3239]	Time 0.145 (0.490)	Data Time 0.001 (0.011)	Loss 4.0747 (3.9847)	Entropy 1.89109 (1.89327)	Top-1 acc 30.859 (31.248)	Top-5 acc 53.906 (54.907)	lr 0.02490
Train [5][2380/3239]	Time 0.193 (0.489)	Data Time 0.001 (0.011)	Loss 4.1190 (3.9848)	Entropy 1.89106 (1.89326)	Top-1 acc 28.906 (31.243)	Top-5 acc 53.125 (54.902)	lr 0.02490
Train [5][2390/3239]	Time 0.182 (0.489)	Data Time 0.001 (0.011)	Loss 4.0970 (3.9849)	Entropy 1.89103 (1.89325)	Top-1 acc 33.203 (31.246)	Top-5 acc 54.297 (54.903)	lr 0.02490
Train [5][2400/3239]	Time 0.330 (0.489)	Data Time 0.001 (0.011)	Loss 3.9869 (3.9850)	Entropy 1.89102 (1.89324)	Top-1 acc 27.734 (31.245)	Top-5 acc 58.984 (54.899)	lr 0.02490
Train [5][2410/3239]	Time 0.166 (0.488)	Data Time 0.001 (0.011)	Loss 4.0983 (3.9851)	Entropy 1.89102 (1.89324)	Top-1 acc 27.344 (31.245)	Top-5 acc 51.953 (54.897)	lr 0.02490
Train [5][2420/3239]	Time 0.163 (0.488)	Data Time 0.002 (0.011)	Loss 4.2586 (3.9851)	Entropy 1.89101 (1.89323)	Top-1 acc 28.516 (31.250)	Top-5 acc 50.781 (54.899)	lr 0.02490
Train [5][2430/3239]	Time 2.144 (0.488)	Data Time 0.001 (0.011)	Loss 4.0787 (3.9851)	Entropy 1.89101 (1.89322)	Top-1 acc 32.422 (31.251)	Top-5 acc 53.516 (54.901)	lr 0.02490
Train [5][2440/3239]	Time 0.220 (0.486)	Data Time 0.001 (0.011)	Loss 3.9222 (3.9852)	Entropy 1.89101 (1.89321)	Top-1 acc 31.250 (31.250)	Top-5 acc 55.078 (54.896)	lr 0.02490
Train [5][2450/3239]	Time 0.196 (0.486)	Data Time 0.001 (0.011)	Loss 3.8373 (3.9852)	Entropy 1.89099 (1.89320)	Top-1 acc 31.641 (31.250)	Top-5 acc 58.984 (54.893)	lr 0.02490
Train [5][2460/3239]	Time 0.228 (0.501)	Data Time 0.003 (0.011)	Loss 3.8829 (3.9851)	Entropy 1.89099 (1.89319)	Top-1 acc 33.203 (31.249)	Top-5 acc 60.156 (54.892)	lr 0.02490
Train [5][2470/3239]	Time 0.206 (0.500)	Data Time 0.002 (0.011)	Loss 4.0754 (3.9853)	Entropy 1.89098 (1.89318)	Top-1 acc 30.469 (31.245)	Top-5 acc 51.953 (54.890)	lr 0.02490
Train [5][2480/3239]	Time 0.156 (0.500)	Data Time 0.002 (0.011)	Loss 3.9860 (3.9856)	Entropy 1.89096 (1.89317)	Top-1 acc 32.812 (31.243)	Top-5 acc 53.125 (54.883)	lr 0.02490
Train [5][2490/3239]	Time 0.201 (0.500)	Data Time 0.001 (0.011)	Loss 4.2365 (3.9856)	Entropy 1.89093 (1.89316)	Top-1 acc 27.344 (31.242)	Top-5 acc 48.828 (54.882)	lr 0.02490
Train [5][2500/3239]	Time 0.184 (0.500)	Data Time 0.002 (0.011)	Loss 3.9621 (3.9855)	Entropy 1.89090 (1.89315)	Top-1 acc 26.562 (31.237)	Top-5 acc 53.906 (54.882)	lr 0.02490
Train [5][2510/3239]	Time 0.254 (0.499)	Data Time 0.001 (0.011)	Loss 3.9757 (3.9855)	Entropy 1.89087 (1.89315)	Top-1 acc 31.250 (31.237)	Top-5 acc 52.734 (54.884)	lr 0.02490
Train [5][2520/3239]	Time 0.219 (0.499)	Data Time 0.001 (0.011)	Loss 3.9874 (3.9853)	Entropy 1.89083 (1.89314)	Top-1 acc 32.031 (31.244)	Top-5 acc 54.297 (54.889)	lr 0.02490
Train [5][2530/3239]	Time 0.145 (0.499)	Data Time 0.002 (0.011)	Loss 3.9371 (3.9852)	Entropy 1.89081 (1.89313)	Top-1 acc 29.297 (31.246)	Top-5 acc 58.594 (54.895)	lr 0.02490
Train [5][2540/3239]	Time 2.051 (0.498)	Data Time 0.002 (0.011)	Loss 4.0699 (3.9853)	Entropy 1.89081 (1.89312)	Top-1 acc 30.859 (31.242)	Top-5 acc 51.562 (54.894)	lr 0.02490
Train [5][2550/3239]	Time 0.170 (0.497)	Data Time 0.001 (0.011)	Loss 3.7299 (3.9850)	Entropy 1.89077 (1.89311)	Top-1 acc 37.500 (31.246)	Top-5 acc 58.203 (54.895)	lr 0.02490
Train [5][2560/3239]	Time 0.228 (0.497)	Data Time 0.001 (0.011)	Loss 3.9221 (3.9851)	Entropy 1.89072 (1.89310)	Top-1 acc 31.250 (31.245)	Top-5 acc 55.469 (54.892)	lr 0.02490
Train [5][2570/3239]	Time 0.216 (0.496)	Data Time 0.001 (0.011)	Loss 4.0844 (3.9851)	Entropy 1.89069 (1.89309)	Top-1 acc 25.781 (31.248)	Top-5 acc 55.078 (54.892)	lr 0.02490
Train [5][2580/3239]	Time 0.235 (0.496)	Data Time 0.002 (0.011)	Loss 3.9512 (3.9850)	Entropy 1.89069 (1.89308)	Top-1 acc 32.812 (31.249)	Top-5 acc 53.516 (54.892)	lr 0.02490
Train [5][2590/3239]	Time 0.208 (0.496)	Data Time 0.001 (0.011)	Loss 4.0282 (3.9847)	Entropy 1.89068 (1.89307)	Top-1 acc 28.125 (31.256)	Top-5 acc 53.125 (54.899)	lr 0.02490
Train [5][2600/3239]	Time 0.197 (0.495)	Data Time 0.001 (0.011)	Loss 3.8958 (3.9847)	Entropy 1.89065 (1.89306)	Top-1 acc 31.250 (31.256)	Top-5 acc 57.812 (54.899)	lr 0.02490
Train [5][2610/3239]	Time 0.293 (0.495)	Data Time 0.006 (0.011)	Loss 3.9536 (3.9845)	Entropy 1.89064 (1.89305)	Top-1 acc 26.562 (31.259)	Top-5 acc 55.469 (54.901)	lr 0.02490
Train [5][2620/3239]	Time 0.246 (0.495)	Data Time 0.001 (0.011)	Loss 3.8884 (3.9844)	Entropy 1.89064 (1.89304)	Top-1 acc 35.938 (31.261)	Top-5 acc 58.984 (54.899)	lr 0.02490
Train [5][2630/3239]	Time 0.366 (0.494)	Data Time 0.001 (0.011)	Loss 3.8899 (3.9845)	Entropy 1.89060 (1.89303)	Top-1 acc 33.594 (31.261)	Top-5 acc 57.812 (54.896)	lr 0.02490
Train [5][2640/3239]	Time 0.218 (0.494)	Data Time 0.001 (0.011)	Loss 3.9610 (3.9844)	Entropy 1.89053 (1.89303)	Top-1 acc 31.250 (31.261)	Top-5 acc 52.734 (54.902)	lr 0.02490
Train [5][2650/3239]	Time 0.211 (0.494)	Data Time 0.001 (0.011)	Loss 4.0860 (3.9844)	Entropy 1.89053 (1.89302)	Top-1 acc 29.688 (31.263)	Top-5 acc 51.562 (54.900)	lr 0.02490
Train [5][2660/3239]	Time 0.153 (0.493)	Data Time 0.001 (0.011)	Loss 3.7311 (3.9841)	Entropy 1.89052 (1.89301)	Top-1 acc 37.891 (31.271)	Top-5 acc 59.375 (54.904)	lr 0.02490
Train [5][2670/3239]	Time 0.251 (0.493)	Data Time 0.002 (0.011)	Loss 3.9256 (3.9840)	Entropy 1.89050 (1.89300)	Top-1 acc 32.031 (31.273)	Top-5 acc 55.469 (54.905)	lr 0.02490
Train [5][2680/3239]	Time 0.204 (0.493)	Data Time 0.001 (0.011)	Loss 3.9297 (3.9839)	Entropy 1.89047 (1.89299)	Top-1 acc 34.375 (31.275)	Top-5 acc 56.250 (54.904)	lr 0.02490
Train [5][2690/3239]	Time 0.220 (0.492)	Data Time 0.001 (0.010)	Loss 4.1003 (3.9839)	Entropy 1.89046 (1.89298)	Top-1 acc 29.297 (31.274)	Top-5 acc 51.562 (54.903)	lr 0.02490
Train [5][2700/3239]	Time 0.217 (0.492)	Data Time 0.001 (0.010)	Loss 3.9015 (3.9836)	Entropy 1.89045 (1.89297)	Top-1 acc 32.812 (31.279)	Top-5 acc 53.906 (54.906)	lr 0.02490
Train [5][2710/3239]	Time 0.212 (0.492)	Data Time 0.001 (0.010)	Loss 3.8963 (3.9835)	Entropy 1.89041 (1.89296)	Top-1 acc 33.594 (31.278)	Top-5 acc 55.078 (54.904)	lr 0.02490
Train [5][2720/3239]	Time 0.222 (0.491)	Data Time 0.001 (0.010)	Loss 3.8295 (3.9835)	Entropy 1.89040 (1.89295)	Top-1 acc 33.594 (31.281)	Top-5 acc 60.547 (54.906)	lr 0.02490
Train [5][2730/3239]	Time 0.248 (0.491)	Data Time 0.001 (0.010)	Loss 3.9697 (3.9836)	Entropy 1.89036 (1.89294)	Top-1 acc 33.984 (31.285)	Top-5 acc 55.078 (54.902)	lr 0.02490
Train [5][2740/3239]	Time 0.193 (0.491)	Data Time 0.001 (0.010)	Loss 4.1824 (3.9835)	Entropy 1.89035 (1.89293)	Top-1 acc 26.562 (31.289)	Top-5 acc 51.953 (54.905)	lr 0.02490
Train [5][2750/3239]	Time 0.191 (0.491)	Data Time 0.002 (0.010)	Loss 4.1019 (3.9838)	Entropy 1.89031 (1.89292)	Top-1 acc 28.906 (31.288)	Top-5 acc 50.000 (54.896)	lr 0.02490
Train [5][2760/3239]	Time 0.311 (0.490)	Data Time 0.002 (0.010)	Loss 3.7673 (3.9835)	Entropy 1.89030 (1.89291)	Top-1 acc 35.547 (31.295)	Top-5 acc 57.812 (54.899)	lr 0.02490
Train [5][2770/3239]	Time 0.283 (0.490)	Data Time 0.001 (0.010)	Loss 3.9855 (3.9833)	Entropy 1.89025 (1.89290)	Top-1 acc 30.859 (31.296)	Top-5 acc 53.516 (54.904)	lr 0.02490
Train [5][2780/3239]	Time 0.200 (0.490)	Data Time 0.001 (0.010)	Loss 4.2458 (3.9833)	Entropy 1.89025 (1.89289)	Top-1 acc 28.906 (31.298)	Top-5 acc 48.828 (54.905)	lr 0.02490
Train [5][2790/3239]	Time 0.193 (0.489)	Data Time 0.001 (0.010)	Loss 4.1286 (3.9831)	Entropy 1.89023 (1.89288)	Top-1 acc 30.859 (31.305)	Top-5 acc 53.125 (54.913)	lr 0.02490
Train [5][2800/3239]	Time 0.180 (0.502)	Data Time 0.003 (0.010)	Loss 4.2025 (3.9831)	Entropy 1.89022 (1.89288)	Top-1 acc 27.734 (31.302)	Top-5 acc 52.344 (54.913)	lr 0.02490
Train [5][2810/3239]	Time 0.149 (0.502)	Data Time 0.002 (0.010)	Loss 4.0081 (3.9830)	Entropy 1.89020 (1.89287)	Top-1 acc 31.250 (31.302)	Top-5 acc 53.125 (54.915)	lr 0.02490
Train [5][2820/3239]	Time 0.237 (0.501)	Data Time 0.002 (0.010)	Loss 3.8851 (3.9828)	Entropy 1.89018 (1.89286)	Top-1 acc 34.766 (31.303)	Top-5 acc 60.156 (54.919)	lr 0.02490
Train [5][2830/3239]	Time 0.226 (0.501)	Data Time 0.002 (0.010)	Loss 3.8745 (3.9830)	Entropy 1.89015 (1.89285)	Top-1 acc 33.984 (31.300)	Top-5 acc 55.859 (54.915)	lr 0.02490
Train [5][2840/3239]	Time 0.208 (0.501)	Data Time 0.001 (0.010)	Loss 3.9299 (3.9828)	Entropy 1.89014 (1.89284)	Top-1 acc 36.719 (31.307)	Top-5 acc 57.031 (54.917)	lr 0.02490
Train [5][2850/3239]	Time 0.207 (0.500)	Data Time 0.003 (0.010)	Loss 3.9998 (3.9829)	Entropy 1.89012 (1.89283)	Top-1 acc 30.469 (31.305)	Top-5 acc 53.906 (54.916)	lr 0.02490
Train [5][2860/3239]	Time 0.184 (0.500)	Data Time 0.001 (0.010)	Loss 4.0285 (3.9828)	Entropy 1.89010 (1.89282)	Top-1 acc 31.641 (31.308)	Top-5 acc 53.906 (54.921)	lr 0.02490
Train [5][2870/3239]	Time 0.186 (0.500)	Data Time 0.001 (0.010)	Loss 3.9848 (3.9828)	Entropy 1.89008 (1.89281)	Top-1 acc 32.031 (31.307)	Top-5 acc 53.906 (54.921)	lr 0.02490
Train [5][2880/3239]	Time 0.206 (0.500)	Data Time 0.001 (0.010)	Loss 3.8679 (3.9829)	Entropy 1.89009 (1.89280)	Top-1 acc 34.375 (31.307)	Top-5 acc 59.375 (54.918)	lr 0.02490
Train [5][2890/3239]	Time 0.255 (0.499)	Data Time 0.001 (0.010)	Loss 3.9620 (3.9826)	Entropy 1.89007 (1.89279)	Top-1 acc 32.031 (31.315)	Top-5 acc 53.125 (54.927)	lr 0.02490
Train [5][2900/3239]	Time 0.358 (0.499)	Data Time 0.002 (0.010)	Loss 3.9953 (3.9824)	Entropy 1.89005 (1.89278)	Top-1 acc 33.594 (31.318)	Top-5 acc 55.469 (54.930)	lr 0.02490
Train [5][2910/3239]	Time 0.235 (0.499)	Data Time 0.001 (0.010)	Loss 3.9099 (3.9824)	Entropy 1.89001 (1.89277)	Top-1 acc 28.125 (31.319)	Top-5 acc 55.469 (54.931)	lr 0.02490
Train [5][2920/3239]	Time 0.227 (0.498)	Data Time 0.001 (0.010)	Loss 4.0794 (3.9826)	Entropy 1.88998 (1.89276)	Top-1 acc 28.516 (31.316)	Top-5 acc 49.609 (54.927)	lr 0.02490
Train [5][2930/3239]	Time 0.191 (0.498)	Data Time 0.002 (0.010)	Loss 4.1006 (3.9824)	Entropy 1.88997 (1.89275)	Top-1 acc 30.859 (31.324)	Top-5 acc 51.562 (54.931)	lr 0.02490
Train [5][2940/3239]	Time 0.203 (0.498)	Data Time 0.002 (0.010)	Loss 4.0445 (3.9825)	Entropy 1.88994 (1.89274)	Top-1 acc 27.344 (31.321)	Top-5 acc 53.125 (54.929)	lr 0.02490
Train [5][2950/3239]	Time 0.235 (0.497)	Data Time 0.001 (0.010)	Loss 4.0597 (3.9824)	Entropy 1.88995 (1.89273)	Top-1 acc 32.031 (31.323)	Top-5 acc 50.000 (54.929)	lr 0.02490
Train [5][2960/3239]	Time 0.236 (0.497)	Data Time 0.001 (0.010)	Loss 3.9760 (3.9826)	Entropy 1.88994 (1.89272)	Top-1 acc 32.812 (31.322)	Top-5 acc 55.078 (54.925)	lr 0.02490
Train [5][2970/3239]	Time 0.187 (0.497)	Data Time 0.001 (0.010)	Loss 4.1037 (3.9825)	Entropy 1.88989 (1.89271)	Top-1 acc 28.906 (31.323)	Top-5 acc 52.344 (54.929)	lr 0.02490
Train [5][2980/3239]	Time 0.230 (0.497)	Data Time 0.002 (0.010)	Loss 3.9594 (3.9825)	Entropy 1.88987 (1.89271)	Top-1 acc 31.250 (31.325)	Top-5 acc 54.688 (54.930)	lr 0.02490
Train [5][2990/3239]	Time 0.184 (0.496)	Data Time 0.002 (0.010)	Loss 3.8375 (3.9824)	Entropy 1.88981 (1.89270)	Top-1 acc 34.766 (31.332)	Top-5 acc 59.766 (54.934)	lr 0.02490
Train [5][3000/3239]	Time 0.152 (0.496)	Data Time 0.001 (0.010)	Loss 3.9673 (3.9822)	Entropy 1.88978 (1.89269)	Top-1 acc 31.250 (31.339)	Top-5 acc 55.078 (54.938)	lr 0.02490
Train [5][3010/3239]	Time 0.374 (0.496)	Data Time 0.001 (0.010)	Loss 3.9457 (3.9822)	Entropy 1.88976 (1.89268)	Top-1 acc 30.078 (31.336)	Top-5 acc 58.594 (54.938)	lr 0.02490
Train [5][3020/3239]	Time 0.178 (0.496)	Data Time 0.001 (0.010)	Loss 4.0844 (3.9821)	Entropy 1.88972 (1.89267)	Top-1 acc 28.125 (31.336)	Top-5 acc 46.875 (54.936)	lr 0.02490
Train [5][3030/3239]	Time 0.217 (0.495)	Data Time 0.001 (0.010)	Loss 3.7997 (3.9818)	Entropy 1.88972 (1.89266)	Top-1 acc 33.984 (31.343)	Top-5 acc 60.547 (54.945)	lr 0.02490
Train [5][3040/3239]	Time 0.200 (0.495)	Data Time 0.001 (0.010)	Loss 4.0960 (3.9819)	Entropy 1.88969 (1.89265)	Top-1 acc 33.984 (31.342)	Top-5 acc 53.125 (54.941)	lr 0.02490
Train [5][3050/3239]	Time 0.221 (0.495)	Data Time 0.002 (0.010)	Loss 3.9108 (3.9819)	Entropy 1.88968 (1.89264)	Top-1 acc 36.719 (31.344)	Top-5 acc 55.859 (54.939)	lr 0.02490
Train [5][3060/3239]	Time 0.204 (0.494)	Data Time 0.001 (0.010)	Loss 4.2354 (3.9820)	Entropy 1.88965 (1.89263)	Top-1 acc 27.344 (31.345)	Top-5 acc 51.953 (54.939)	lr 0.02490
Train [5][3070/3239]	Time 0.151 (0.494)	Data Time 0.001 (0.010)	Loss 4.0039 (3.9819)	Entropy 1.88963 (1.89262)	Top-1 acc 28.516 (31.343)	Top-5 acc 52.734 (54.939)	lr 0.02490
Train [5][3080/3239]	Time 0.247 (0.494)	Data Time 0.001 (0.010)	Loss 4.0859 (3.9818)	Entropy 1.88958 (1.89261)	Top-1 acc 33.203 (31.345)	Top-5 acc 53.906 (54.941)	lr 0.02490
Train [5][3090/3239]	Time 0.229 (0.494)	Data Time 0.001 (0.009)	Loss 3.9205 (3.9816)	Entropy 1.88957 (1.89260)	Top-1 acc 32.812 (31.351)	Top-5 acc 55.859 (54.948)	lr 0.02490
Train [5][3100/3239]	Time 0.199 (0.493)	Data Time 0.001 (0.009)	Loss 3.8354 (3.9816)	Entropy 1.88953 (1.89259)	Top-1 acc 28.516 (31.349)	Top-5 acc 58.984 (54.950)	lr 0.02489
Train [5][3110/3239]	Time 0.227 (0.493)	Data Time 0.001 (0.009)	Loss 3.7859 (3.9814)	Entropy 1.88952 (1.89258)	Top-1 acc 33.203 (31.351)	Top-5 acc 58.203 (54.956)	lr 0.02489
Train [5][3120/3239]	Time 0.264 (0.493)	Data Time 0.001 (0.009)	Loss 3.9564 (3.9816)	Entropy 1.88950 (1.89257)	Top-1 acc 30.469 (31.348)	Top-5 acc 58.594 (54.956)	lr 0.02489
Train [5][3130/3239]	Time 0.302 (0.504)	Data Time 0.003 (0.009)	Loss 4.1303 (3.9816)	Entropy 1.88948 (1.89256)	Top-1 acc 27.344 (31.347)	Top-5 acc 55.078 (54.955)	lr 0.02489
Train [5][3140/3239]	Time 0.207 (0.504)	Data Time 0.002 (0.009)	Loss 3.9713 (3.9817)	Entropy 1.88945 (1.89255)	Top-1 acc 29.297 (31.345)	Top-5 acc 57.031 (54.957)	lr 0.02489
Train [5][3150/3239]	Time 0.213 (0.504)	Data Time 0.002 (0.009)	Loss 3.8735 (3.9816)	Entropy 1.88938 (1.89254)	Top-1 acc 33.594 (31.347)	Top-5 acc 57.031 (54.955)	lr 0.02489
Train [5][3160/3239]	Time 0.215 (0.504)	Data Time 0.002 (0.009)	Loss 3.8999 (3.9817)	Entropy 1.88936 (1.89253)	Top-1 acc 35.938 (31.344)	Top-5 acc 57.812 (54.948)	lr 0.02489
Train [5][3170/3239]	Time 0.186 (0.503)	Data Time 0.002 (0.009)	Loss 4.0241 (3.9815)	Entropy 1.88931 (1.89252)	Top-1 acc 29.297 (31.347)	Top-5 acc 53.125 (54.953)	lr 0.02489
Train [5][3180/3239]	Time 0.135 (0.503)	Data Time 0.000 (0.009)	Loss 3.8914 (3.9814)	Entropy 1.88931 (1.89251)	Top-1 acc 32.422 (31.348)	Top-5 acc 57.422 (54.955)	lr 0.02489
Train [5][3190/3239]	Time 0.138 (0.503)	Data Time 0.000 (0.009)	Loss 4.0659 (3.9815)	Entropy 1.88931 (1.89250)	Top-1 acc 28.906 (31.346)	Top-5 acc 51.953 (54.952)	lr 0.02489
Train [5][3200/3239]	Time 0.211 (0.502)	Data Time 0.000 (0.009)	Loss 3.8185 (3.9813)	Entropy 1.88926 (1.89249)	Top-1 acc 33.203 (31.354)	Top-5 acc 57.031 (54.954)	lr 0.02489
Train [5][3210/3239]	Time 0.195 (0.502)	Data Time 0.000 (0.009)	Loss 3.9437 (3.9814)	Entropy 1.88926 (1.89248)	Top-1 acc 32.812 (31.357)	Top-5 acc 54.688 (54.953)	lr 0.02489
Train [5][3220/3239]	Time 0.175 (0.501)	Data Time 0.000 (0.009)	Loss 4.0082 (3.9815)	Entropy 1.88921 (1.89247)	Top-1 acc 30.859 (31.355)	Top-5 acc 53.906 (54.953)	lr 0.02489
Train [5][3230/3239]	Time 0.206 (0.501)	Data Time 0.000 (0.009)	Loss 3.8596 (3.9813)	Entropy 1.88920 (1.89246)	Top-1 acc 35.547 (31.362)	Top-5 acc 59.766 (54.957)	lr 0.02489
Train [5][3239/3239]	Time 1.955 (0.501)	Data Time 0.000 (0.009)	Loss 4.3800 (3.9813)	Entropy 1.88920 (1.89245)	Top-1 acc 29.630 (31.364)	Top-5 acc 51.852 (54.956)	lr 0.02489
==========Valid [5/120]	loss 2.811	top-1 acc 40.890 (40.890)	top-5 acc 65.368	Train top-1 31.364	top-5 54.956	Entropy 1.88920	Latency-None: 0.000ms	Flops: 521.58M
Train [6][0/3239]	Time 23.007 (23.007)	Data Time 22.406 (22.406)	Loss 3.8922 (3.8922)	Entropy 1.88918 (1.88918)	Top-1 acc 32.812 (32.812)	Top-5 acc 53.516 (53.516)	lr 0.02489
Train [6][10/3239]	Time 2.497 (2.629)	Data Time 0.002 (2.057)	Loss 4.0395 (3.8756)	Entropy 1.88918 (1.88918)	Top-1 acc 31.250 (33.274)	Top-5 acc 54.297 (56.783)	lr 0.02489
Train [6][20/3239]	Time 0.200 (1.485)	Data Time 0.001 (1.078)	Loss 3.8745 (3.9003)	Entropy 1.88914 (1.88916)	Top-1 acc 35.156 (33.129)	Top-5 acc 56.250 (56.157)	lr 0.02489
Train [6][30/3239]	Time 0.231 (1.142)	Data Time 0.001 (0.731)	Loss 3.7700 (3.9185)	Entropy 1.88913 (1.88915)	Top-1 acc 33.984 (32.472)	Top-5 acc 57.422 (56.074)	lr 0.02489
Train [6][40/3239]	Time 0.207 (0.965)	Data Time 0.001 (0.553)	Loss 3.7315 (3.9051)	Entropy 1.88909 (1.88914)	Top-1 acc 36.328 (32.498)	Top-5 acc 61.328 (56.393)	lr 0.02489
Train [6][50/3239]	Time 0.152 (0.855)	Data Time 0.001 (0.445)	Loss 3.9212 (3.9038)	Entropy 1.88907 (1.88913)	Top-1 acc 33.203 (32.652)	Top-5 acc 55.469 (56.503)	lr 0.02489
Train [6][60/3239]	Time 0.275 (0.783)	Data Time 0.001 (0.373)	Loss 3.9378 (3.9152)	Entropy 1.88905 (1.88912)	Top-1 acc 29.297 (32.371)	Top-5 acc 58.984 (56.116)	lr 0.02489
Train [6][70/3239]	Time 0.211 (0.733)	Data Time 0.002 (0.321)	Loss 3.9948 (3.9228)	Entropy 1.88904 (1.88911)	Top-1 acc 32.422 (32.174)	Top-5 acc 52.734 (55.887)	lr 0.02489
Train [6][80/3239]	Time 0.251 (0.693)	Data Time 0.001 (0.281)	Loss 3.8776 (3.9339)	Entropy 1.88903 (1.88910)	Top-1 acc 30.859 (31.978)	Top-5 acc 57.812 (55.652)	lr 0.02489
Train [6][90/3239]	Time 0.198 (0.661)	Data Time 0.001 (0.251)	Loss 3.9784 (3.9353)	Entropy 1.88902 (1.88909)	Top-1 acc 35.938 (31.971)	Top-5 acc 53.906 (55.636)	lr 0.02489
Train [6][100/3239]	Time 0.167 (0.635)	Data Time 0.001 (0.226)	Loss 3.9390 (3.9332)	Entropy 1.88901 (1.88908)	Top-1 acc 39.062 (32.174)	Top-5 acc 55.078 (55.654)	lr 0.02489
Train [6][110/3239]	Time 0.320 (0.618)	Data Time 0.001 (0.206)	Loss 3.9279 (3.9327)	Entropy 1.88899 (1.88908)	Top-1 acc 30.469 (32.112)	Top-5 acc 56.641 (55.673)	lr 0.02489
Train [6][120/3239]	Time 2.248 (0.601)	Data Time 0.001 (0.189)	Loss 3.8634 (3.9289)	Entropy 1.88899 (1.88907)	Top-1 acc 37.891 (32.277)	Top-5 acc 57.812 (55.779)	lr 0.02489
Train [6][130/3239]	Time 0.211 (0.571)	Data Time 0.001 (0.175)	Loss 3.9392 (3.9328)	Entropy 1.88894 (1.88906)	Top-1 acc 33.203 (32.240)	Top-5 acc 53.516 (55.654)	lr 0.02489
Train [6][140/3239]	Time 0.174 (0.559)	Data Time 0.001 (0.162)	Loss 3.7687 (3.9328)	Entropy 1.88891 (1.88905)	Top-1 acc 37.500 (32.270)	Top-5 acc 58.594 (55.638)	lr 0.02489
Train [6][150/3239]	Time 0.140 (0.549)	Data Time 0.001 (0.152)	Loss 4.0310 (3.9290)	Entropy 1.88885 (1.88904)	Top-1 acc 29.688 (32.427)	Top-5 acc 53.906 (55.779)	lr 0.02489
Train [6][160/3239]	Time 0.199 (0.540)	Data Time 0.001 (0.143)	Loss 3.9651 (3.9317)	Entropy 1.88883 (1.88902)	Top-1 acc 32.812 (32.378)	Top-5 acc 53.516 (55.724)	lr 0.02489
Train [6][170/3239]	Time 0.200 (0.533)	Data Time 0.001 (0.135)	Loss 4.1375 (3.9338)	Entropy 1.88880 (1.88901)	Top-1 acc 27.734 (32.310)	Top-5 acc 50.391 (55.670)	lr 0.02489
Train [6][180/3239]	Time 0.185 (0.526)	Data Time 0.001 (0.127)	Loss 3.8064 (3.9314)	Entropy 1.88880 (1.88900)	Top-1 acc 34.766 (32.338)	Top-5 acc 58.984 (55.788)	lr 0.02489
Train [6][190/3239]	Time 0.233 (0.519)	Data Time 0.001 (0.121)	Loss 4.0775 (3.9355)	Entropy 1.88877 (1.88899)	Top-1 acc 31.250 (32.299)	Top-5 acc 53.516 (55.741)	lr 0.02489
Train [6][200/3239]	Time 0.191 (0.514)	Data Time 0.001 (0.115)	Loss 3.8836 (3.9367)	Entropy 1.88878 (1.88898)	Top-1 acc 32.812 (32.268)	Top-5 acc 53.906 (55.665)	lr 0.02489
Train [6][210/3239]	Time 0.191 (0.509)	Data Time 0.001 (0.109)	Loss 3.7612 (3.9376)	Entropy 1.88876 (1.88897)	Top-1 acc 36.719 (32.266)	Top-5 acc 60.938 (55.665)	lr 0.02489
Train [6][220/3239]	Time 0.311 (0.505)	Data Time 0.001 (0.105)	Loss 3.9462 (3.9357)	Entropy 1.88874 (1.88896)	Top-1 acc 35.938 (32.305)	Top-5 acc 55.859 (55.723)	lr 0.02489
Train [6][230/3239]	Time 2.281 (0.501)	Data Time 0.001 (0.100)	Loss 4.0467 (3.9345)	Entropy 1.88874 (1.88895)	Top-1 acc 30.859 (32.317)	Top-5 acc 55.469 (55.827)	lr 0.02489
Train [6][240/3239]	Time 0.153 (0.489)	Data Time 0.001 (0.096)	Loss 4.0842 (3.9338)	Entropy 1.88868 (1.88894)	Top-1 acc 29.688 (32.302)	Top-5 acc 51.172 (55.858)	lr 0.02489
Train [6][250/3239]	Time 0.431 (0.624)	Data Time 0.112 (0.093)	Loss 3.9077 (3.9336)	Entropy 1.88865 (1.88893)	Top-1 acc 33.594 (32.335)	Top-5 acc 59.766 (55.873)	lr 0.02489
Train [6][260/3239]	Time 0.197 (0.616)	Data Time 0.002 (0.089)	Loss 3.9406 (3.9343)	Entropy 1.88863 (1.88892)	Top-1 acc 32.422 (32.326)	Top-5 acc 52.344 (55.846)	lr 0.02489
Train [6][270/3239]	Time 0.140 (0.608)	Data Time 0.001 (0.086)	Loss 3.9478 (3.9361)	Entropy 1.88862 (1.88890)	Top-1 acc 30.469 (32.315)	Top-5 acc 56.641 (55.875)	lr 0.02489
Train [6][280/3239]	Time 0.159 (0.601)	Data Time 0.001 (0.083)	Loss 4.0046 (3.9335)	Entropy 1.88860 (1.88889)	Top-1 acc 33.594 (32.377)	Top-5 acc 55.078 (55.939)	lr 0.02489
Train [6][290/3239]	Time 0.226 (0.595)	Data Time 0.001 (0.081)	Loss 4.1368 (3.9378)	Entropy 1.88859 (1.88888)	Top-1 acc 28.906 (32.292)	Top-5 acc 53.516 (55.853)	lr 0.02489
Train [6][300/3239]	Time 0.203 (0.589)	Data Time 0.001 (0.078)	Loss 4.1142 (3.9386)	Entropy 1.88858 (1.88887)	Top-1 acc 28.516 (32.241)	Top-5 acc 49.609 (55.840)	lr 0.02489
Train [6][310/3239]	Time 0.199 (0.583)	Data Time 0.001 (0.076)	Loss 3.9724 (3.9389)	Entropy 1.88857 (1.88886)	Top-1 acc 30.859 (32.246)	Top-5 acc 51.562 (55.839)	lr 0.02489
Train [6][320/3239]	Time 0.347 (0.578)	Data Time 0.001 (0.073)	Loss 4.0172 (3.9398)	Entropy 1.88855 (1.88886)	Top-1 acc 31.641 (32.228)	Top-5 acc 53.125 (55.817)	lr 0.02489
Train [6][330/3239]	Time 0.329 (0.573)	Data Time 0.001 (0.071)	Loss 4.0108 (3.9391)	Entropy 1.88850 (1.88885)	Top-1 acc 31.250 (32.254)	Top-5 acc 53.906 (55.800)	lr 0.02489
Train [6][340/3239]	Time 2.195 (0.568)	Data Time 0.001 (0.069)	Loss 4.0618 (3.9382)	Entropy 1.88850 (1.88884)	Top-1 acc 26.172 (32.231)	Top-5 acc 51.172 (55.819)	lr 0.02489
Train [6][350/3239]	Time 0.219 (0.558)	Data Time 0.001 (0.067)	Loss 3.6620 (3.9363)	Entropy 1.88848 (1.88883)	Top-1 acc 35.547 (32.269)	Top-5 acc 60.938 (55.863)	lr 0.02489
Train [6][360/3239]	Time 0.149 (0.554)	Data Time 0.001 (0.065)	Loss 4.0110 (3.9380)	Entropy 1.88847 (1.88882)	Top-1 acc 30.078 (32.238)	Top-5 acc 50.391 (55.791)	lr 0.02489
Train [6][370/3239]	Time 0.182 (0.550)	Data Time 0.001 (0.064)	Loss 4.0188 (3.9384)	Entropy 1.88844 (1.88881)	Top-1 acc 28.516 (32.213)	Top-5 acc 58.203 (55.781)	lr 0.02489
Train [6][380/3239]	Time 0.191 (0.547)	Data Time 0.001 (0.062)	Loss 3.9248 (3.9390)	Entropy 1.88841 (1.88880)	Top-1 acc 33.203 (32.228)	Top-5 acc 55.078 (55.758)	lr 0.02489
Train [6][390/3239]	Time 0.201 (0.543)	Data Time 0.001 (0.061)	Loss 3.9641 (3.9373)	Entropy 1.88839 (1.88879)	Top-1 acc 32.031 (32.254)	Top-5 acc 53.906 (55.813)	lr 0.02489
Train [6][400/3239]	Time 0.213 (0.540)	Data Time 0.001 (0.059)	Loss 3.8527 (3.9365)	Entropy 1.88836 (1.88878)	Top-1 acc 27.344 (32.285)	Top-5 acc 57.422 (55.849)	lr 0.02489
Train [6][410/3239]	Time 0.208 (0.537)	Data Time 0.001 (0.058)	Loss 3.8255 (3.9363)	Entropy 1.88833 (1.88876)	Top-1 acc 34.766 (32.287)	Top-5 acc 58.203 (55.874)	lr 0.02489
Train [6][420/3239]	Time 0.151 (0.534)	Data Time 0.001 (0.056)	Loss 3.8790 (3.9364)	Entropy 1.88832 (1.88875)	Top-1 acc 32.031 (32.271)	Top-5 acc 60.156 (55.871)	lr 0.02489
Train [6][430/3239]	Time 0.208 (0.531)	Data Time 0.001 (0.055)	Loss 3.7594 (3.9365)	Entropy 1.88828 (1.88874)	Top-1 acc 31.641 (32.268)	Top-5 acc 58.984 (55.884)	lr 0.02489
Train [6][440/3239]	Time 0.298 (0.529)	Data Time 0.002 (0.054)	Loss 3.8980 (3.9382)	Entropy 1.88826 (1.88873)	Top-1 acc 30.859 (32.230)	Top-5 acc 59.375 (55.854)	lr 0.02489
Train [6][450/3239]	Time 2.127 (0.526)	Data Time 0.002 (0.053)	Loss 3.9205 (3.9403)	Entropy 1.88826 (1.88872)	Top-1 acc 32.031 (32.172)	Top-5 acc 52.344 (55.804)	lr 0.02489
Train [6][460/3239]	Time 0.195 (0.519)	Data Time 0.001 (0.052)	Loss 3.9225 (3.9403)	Entropy 1.88825 (1.88871)	Top-1 acc 33.594 (32.176)	Top-5 acc 57.031 (55.806)	lr 0.02489
Train [6][470/3239]	Time 0.183 (0.517)	Data Time 0.001 (0.051)	Loss 3.8553 (3.9410)	Entropy 1.88823 (1.88870)	Top-1 acc 37.500 (32.190)	Top-5 acc 55.859 (55.786)	lr 0.02489
Train [6][480/3239]	Time 0.198 (0.514)	Data Time 0.001 (0.050)	Loss 4.0663 (3.9414)	Entropy 1.88822 (1.88869)	Top-1 acc 28.906 (32.161)	Top-5 acc 49.219 (55.777)	lr 0.02489
Train [6][490/3239]	Time 0.186 (0.512)	Data Time 0.002 (0.049)	Loss 4.0688 (3.9424)	Entropy 1.88815 (1.88868)	Top-1 acc 32.031 (32.143)	Top-5 acc 55.078 (55.774)	lr 0.02489
Train [6][500/3239]	Time 0.223 (0.510)	Data Time 0.001 (0.048)	Loss 3.8814 (3.9436)	Entropy 1.88811 (1.88867)	Top-1 acc 31.641 (32.109)	Top-5 acc 59.375 (55.753)	lr 0.02489
Train [6][510/3239]	Time 0.207 (0.508)	Data Time 0.001 (0.047)	Loss 3.8501 (3.9435)	Entropy 1.88809 (1.88866)	Top-1 acc 34.375 (32.115)	Top-5 acc 57.031 (55.741)	lr 0.02489
Train [6][520/3239]	Time 0.241 (0.506)	Data Time 0.001 (0.046)	Loss 3.8441 (3.9429)	Entropy 1.88802 (1.88865)	Top-1 acc 34.375 (32.138)	Top-5 acc 56.641 (55.744)	lr 0.02489
Train [6][530/3239]	Time 0.194 (0.504)	Data Time 0.001 (0.045)	Loss 3.7739 (3.9412)	Entropy 1.88801 (1.88864)	Top-1 acc 37.500 (32.179)	Top-5 acc 60.156 (55.801)	lr 0.02489
Train [6][540/3239]	Time 0.213 (0.502)	Data Time 0.001 (0.044)	Loss 3.8370 (3.9401)	Entropy 1.88800 (1.88862)	Top-1 acc 31.250 (32.181)	Top-5 acc 57.031 (55.807)	lr 0.02489
Train [6][550/3239]	Time 0.191 (0.501)	Data Time 0.001 (0.044)	Loss 4.1289 (3.9396)	Entropy 1.88797 (1.88861)	Top-1 acc 30.078 (32.204)	Top-5 acc 53.125 (55.813)	lr 0.02489
Train [6][560/3239]	Time 2.156 (0.500)	Data Time 0.001 (0.043)	Loss 3.9692 (3.9406)	Entropy 1.88797 (1.88860)	Top-1 acc 29.297 (32.180)	Top-5 acc 55.469 (55.802)	lr 0.02489
Train [6][570/3239]	Time 0.194 (0.494)	Data Time 0.001 (0.042)	Loss 4.1473 (3.9412)	Entropy 1.88795 (1.88859)	Top-1 acc 30.469 (32.174)	Top-5 acc 52.734 (55.793)	lr 0.02489
Train [6][580/3239]	Time 0.178 (0.493)	Data Time 0.001 (0.041)	Loss 3.8251 (3.9412)	Entropy 1.88792 (1.88858)	Top-1 acc 35.156 (32.174)	Top-5 acc 60.547 (55.788)	lr 0.02489
Train [6][590/3239]	Time 0.246 (0.491)	Data Time 0.001 (0.041)	Loss 3.8097 (3.9407)	Entropy 1.88789 (1.88857)	Top-1 acc 32.422 (32.167)	Top-5 acc 57.422 (55.789)	lr 0.02489
Train [6][600/3239]	Time 0.203 (0.490)	Data Time 0.001 (0.040)	Loss 3.8086 (3.9405)	Entropy 1.88787 (1.88856)	Top-1 acc 34.375 (32.151)	Top-5 acc 57.422 (55.787)	lr 0.02489
Train [6][610/3239]	Time 0.327 (0.540)	Data Time 0.002 (0.039)	Loss 4.0404 (3.9410)	Entropy 1.88782 (1.88854)	Top-1 acc 28.906 (32.139)	Top-5 acc 53.125 (55.768)	lr 0.02488
Train [6][620/3239]	Time 0.129 (0.540)	Data Time 0.002 (0.039)	Loss 4.0640 (3.9414)	Entropy 1.88780 (1.88853)	Top-1 acc 29.688 (32.118)	Top-5 acc 56.641 (55.756)	lr 0.02488
Train [6][630/3239]	Time 0.135 (0.538)	Data Time 0.001 (0.038)	Loss 3.8231 (3.9413)	Entropy 1.88776 (1.88852)	Top-1 acc 35.547 (32.131)	Top-5 acc 59.375 (55.771)	lr 0.02488
Train [6][640/3239]	Time 0.194 (0.537)	Data Time 0.001 (0.038)	Loss 3.9133 (3.9409)	Entropy 1.88775 (1.88851)	Top-1 acc 37.891 (32.165)	Top-5 acc 55.078 (55.780)	lr 0.02488
Train [6][650/3239]	Time 0.223 (0.535)	Data Time 0.002 (0.037)	Loss 3.9377 (3.9411)	Entropy 1.88773 (1.88850)	Top-1 acc 32.031 (32.149)	Top-5 acc 57.031 (55.777)	lr 0.02488
Train [6][660/3239]	Time 0.249 (0.533)	Data Time 0.001 (0.037)	Loss 4.0524 (3.9415)	Entropy 1.88768 (1.88849)	Top-1 acc 27.734 (32.125)	Top-5 acc 55.859 (55.775)	lr 0.02488
Train [6][670/3239]	Time 2.144 (0.531)	Data Time 0.001 (0.036)	Loss 3.9196 (3.9413)	Entropy 1.88768 (1.88847)	Top-1 acc 32.031 (32.129)	Top-5 acc 55.078 (55.759)	lr 0.02488
Train [6][680/3239]	Time 0.324 (0.527)	Data Time 0.002 (0.036)	Loss 3.9503 (3.9412)	Entropy 1.88765 (1.88846)	Top-1 acc 32.812 (32.136)	Top-5 acc 55.859 (55.773)	lr 0.02488
Train [6][690/3239]	Time 0.149 (0.525)	Data Time 0.001 (0.035)	Loss 3.8854 (3.9417)	Entropy 1.88764 (1.88845)	Top-1 acc 32.812 (32.119)	Top-5 acc 54.297 (55.768)	lr 0.02488
Train [6][700/3239]	Time 0.219 (0.523)	Data Time 0.001 (0.035)	Loss 3.9639 (3.9417)	Entropy 1.88758 (1.88844)	Top-1 acc 31.250 (32.113)	Top-5 acc 54.297 (55.752)	lr 0.02488
Train [6][710/3239]	Time 0.157 (0.521)	Data Time 0.001 (0.034)	Loss 3.7365 (3.9416)	Entropy 1.88756 (1.88843)	Top-1 acc 35.156 (32.104)	Top-5 acc 63.281 (55.754)	lr 0.02488
Train [6][720/3239]	Time 0.208 (0.520)	Data Time 0.001 (0.034)	Loss 3.9377 (3.9421)	Entropy 1.88752 (1.88841)	Top-1 acc 29.297 (32.095)	Top-5 acc 50.391 (55.752)	lr 0.02488
Train [6][730/3239]	Time 0.203 (0.518)	Data Time 0.001 (0.034)	Loss 4.1118 (3.9421)	Entropy 1.88748 (1.88840)	Top-1 acc 31.250 (32.099)	Top-5 acc 51.953 (55.756)	lr 0.02488
Train [6][740/3239]	Time 0.210 (0.517)	Data Time 0.001 (0.033)	Loss 3.8676 (3.9423)	Entropy 1.88747 (1.88839)	Top-1 acc 32.031 (32.107)	Top-5 acc 56.641 (55.752)	lr 0.02488
Train [6][750/3239]	Time 0.191 (0.516)	Data Time 0.002 (0.033)	Loss 3.9743 (3.9425)	Entropy 1.88744 (1.88838)	Top-1 acc 32.812 (32.107)	Top-5 acc 53.516 (55.750)	lr 0.02488
Train [6][760/3239]	Time 0.243 (0.514)	Data Time 0.001 (0.032)	Loss 3.8922 (3.9424)	Entropy 1.88742 (1.88836)	Top-1 acc 33.984 (32.102)	Top-5 acc 57.422 (55.759)	lr 0.02488
Train [6][770/3239]	Time 0.237 (0.513)	Data Time 0.002 (0.032)	Loss 3.9632 (3.9431)	Entropy 1.88740 (1.88835)	Top-1 acc 29.688 (32.091)	Top-5 acc 57.422 (55.743)	lr 0.02488
Train [6][780/3239]	Time 2.307 (0.512)	Data Time 0.002 (0.032)	Loss 3.9783 (3.9435)	Entropy 1.88740 (1.88834)	Top-1 acc 33.984 (32.084)	Top-5 acc 56.641 (55.729)	lr 0.02488
Train [6][790/3239]	Time 0.207 (0.508)	Data Time 0.001 (0.031)	Loss 3.8540 (3.9432)	Entropy 1.88734 (1.88833)	Top-1 acc 33.984 (32.083)	Top-5 acc 57.812 (55.736)	lr 0.02488
Train [6][800/3239]	Time 0.188 (0.507)	Data Time 0.001 (0.031)	Loss 3.8931 (3.9430)	Entropy 1.88732 (1.88831)	Top-1 acc 30.469 (32.082)	Top-5 acc 57.422 (55.736)	lr 0.02488
Train [6][810/3239]	Time 0.196 (0.505)	Data Time 0.001 (0.030)	Loss 3.8576 (3.9423)	Entropy 1.88729 (1.88830)	Top-1 acc 34.766 (32.103)	Top-5 acc 59.375 (55.757)	lr 0.02488
Train [6][820/3239]	Time 0.218 (0.504)	Data Time 0.002 (0.030)	Loss 3.9539 (3.9426)	Entropy 1.88729 (1.88829)	Top-1 acc 32.031 (32.085)	Top-5 acc 53.906 (55.745)	lr 0.02488
Train [6][830/3239]	Time 0.196 (0.503)	Data Time 0.001 (0.030)	Loss 3.8749 (3.9426)	Entropy 1.88728 (1.88828)	Top-1 acc 35.938 (32.087)	Top-5 acc 57.031 (55.744)	lr 0.02488
Train [6][840/3239]	Time 0.262 (0.502)	Data Time 0.002 (0.029)	Loss 3.9130 (3.9423)	Entropy 1.88725 (1.88826)	Top-1 acc 35.156 (32.087)	Top-5 acc 57.812 (55.747)	lr 0.02488
Train [6][850/3239]	Time 0.154 (0.501)	Data Time 0.001 (0.029)	Loss 3.8578 (3.9423)	Entropy 1.88722 (1.88825)	Top-1 acc 32.812 (32.086)	Top-5 acc 58.984 (55.755)	lr 0.02488
Train [6][860/3239]	Time 0.156 (0.500)	Data Time 0.001 (0.029)	Loss 3.9043 (3.9422)	Entropy 1.88715 (1.88824)	Top-1 acc 30.469 (32.089)	Top-5 acc 60.547 (55.757)	lr 0.02488
Train [6][870/3239]	Time 0.185 (0.499)	Data Time 0.001 (0.029)	Loss 4.0356 (3.9421)	Entropy 1.88715 (1.88823)	Top-1 acc 30.469 (32.100)	Top-5 acc 55.078 (55.761)	lr 0.02488
Train [6][880/3239]	Time 0.163 (0.498)	Data Time 0.001 (0.028)	Loss 3.9095 (3.9420)	Entropy 1.88714 (1.88822)	Top-1 acc 30.078 (32.103)	Top-5 acc 57.812 (55.762)	lr 0.02488
Train [6][890/3239]	Time 2.061 (0.497)	Data Time 0.001 (0.028)	Loss 3.9878 (3.9419)	Entropy 1.88714 (1.88820)	Top-1 acc 30.859 (32.110)	Top-5 acc 52.734 (55.766)	lr 0.02488
Train [6][900/3239]	Time 0.196 (0.494)	Data Time 0.001 (0.028)	Loss 3.9525 (3.9421)	Entropy 1.88710 (1.88819)	Top-1 acc 32.812 (32.104)	Top-5 acc 56.250 (55.765)	lr 0.02488
Train [6][910/3239]	Time 0.296 (0.493)	Data Time 0.001 (0.027)	Loss 3.9584 (3.9416)	Entropy 1.88709 (1.88818)	Top-1 acc 28.125 (32.118)	Top-5 acc 55.078 (55.773)	lr 0.02488
Train [6][920/3239]	Time 0.193 (0.492)	Data Time 0.001 (0.027)	Loss 3.6959 (3.9413)	Entropy 1.88701 (1.88817)	Top-1 acc 32.031 (32.112)	Top-5 acc 64.453 (55.788)	lr 0.02488
Train [6][930/3239]	Time 0.199 (0.491)	Data Time 0.001 (0.027)	Loss 4.1940 (3.9412)	Entropy 1.88701 (1.88815)	Top-1 acc 28.125 (32.106)	Top-5 acc 46.484 (55.783)	lr 0.02488
Train [6][940/3239]	Time 0.202 (0.490)	Data Time 0.001 (0.027)	Loss 3.8565 (3.9410)	Entropy 1.88698 (1.88814)	Top-1 acc 33.594 (32.110)	Top-5 acc 58.203 (55.799)	lr 0.02488
Train [6][950/3239]	Time 0.196 (0.489)	Data Time 0.001 (0.026)	Loss 3.9694 (3.9409)	Entropy 1.88697 (1.88813)	Top-1 acc 29.688 (32.106)	Top-5 acc 55.078 (55.799)	lr 0.02488
Train [6][960/3239]	Time 0.200 (0.488)	Data Time 0.001 (0.026)	Loss 3.9623 (3.9409)	Entropy 1.88694 (1.88812)	Top-1 acc 30.078 (32.108)	Top-5 acc 55.469 (55.809)	lr 0.02488
Train [6][970/3239]	Time 0.369 (0.520)	Data Time 0.088 (0.026)	Loss 4.1037 (3.9412)	Entropy 1.88693 (1.88811)	Top-1 acc 30.078 (32.099)	Top-5 acc 50.000 (55.798)	lr 0.02488
Train [6][980/3239]	Time 0.199 (0.521)	Data Time 0.002 (0.026)	Loss 3.9823 (3.9417)	Entropy 1.88692 (1.88809)	Top-1 acc 27.344 (32.083)	Top-5 acc 54.688 (55.787)	lr 0.02488
Train [6][990/3239]	Time 0.198 (0.520)	Data Time 0.002 (0.025)	Loss 4.0590 (3.9419)	Entropy 1.88688 (1.88808)	Top-1 acc 31.250 (32.076)	Top-5 acc 54.688 (55.785)	lr 0.02488
Train [6][1000/3239]	Time 2.157 (0.519)	Data Time 0.002 (0.025)	Loss 4.0041 (3.9427)	Entropy 1.88688 (1.88807)	Top-1 acc 28.125 (32.064)	Top-5 acc 54.688 (55.768)	lr 0.02488
Train [6][1010/3239]	Time 0.188 (0.516)	Data Time 0.002 (0.025)	Loss 4.1026 (3.9421)	Entropy 1.88682 (1.88806)	Top-1 acc 28.516 (32.068)	Top-5 acc 52.344 (55.769)	lr 0.02488
Train [6][1020/3239]	Time 0.350 (0.515)	Data Time 0.002 (0.025)	Loss 4.1480 (3.9429)	Entropy 1.88680 (1.88804)	Top-1 acc 28.516 (32.061)	Top-5 acc 52.734 (55.752)	lr 0.02488
Train [6][1030/3239]	Time 0.203 (0.514)	Data Time 0.002 (0.025)	Loss 4.0014 (3.9426)	Entropy 1.88680 (1.88803)	Top-1 acc 32.812 (32.067)	Top-5 acc 52.734 (55.753)	lr 0.02488
Train [6][1040/3239]	Time 0.212 (0.513)	Data Time 0.001 (0.024)	Loss 3.8584 (3.9426)	Entropy 1.88679 (1.88802)	Top-1 acc 36.328 (32.078)	Top-5 acc 58.984 (55.754)	lr 0.02488
Train [6][1050/3239]	Time 0.199 (0.512)	Data Time 0.001 (0.024)	Loss 3.8507 (3.9422)	Entropy 1.88676 (1.88801)	Top-1 acc 32.812 (32.079)	Top-5 acc 57.812 (55.753)	lr 0.02488
Train [6][1060/3239]	Time 0.203 (0.511)	Data Time 0.001 (0.024)	Loss 3.9487 (3.9420)	Entropy 1.88670 (1.88800)	Top-1 acc 33.203 (32.080)	Top-5 acc 56.250 (55.762)	lr 0.02488
Train [6][1070/3239]	Time 0.187 (0.510)	Data Time 0.001 (0.024)	Loss 3.9341 (3.9421)	Entropy 1.88662 (1.88798)	Top-1 acc 29.688 (32.080)	Top-5 acc 57.812 (55.762)	lr 0.02488
Train [6][1080/3239]	Time 0.188 (0.509)	Data Time 0.001 (0.024)	Loss 4.0976 (3.9420)	Entropy 1.88661 (1.88797)	Top-1 acc 28.516 (32.083)	Top-5 acc 52.344 (55.770)	lr 0.02488
Train [6][1090/3239]	Time 0.221 (0.508)	Data Time 0.001 (0.023)	Loss 3.9261 (3.9421)	Entropy 1.88659 (1.88796)	Top-1 acc 30.859 (32.081)	Top-5 acc 57.812 (55.774)	lr 0.02488
Train [6][1100/3239]	Time 0.199 (0.507)	Data Time 0.001 (0.023)	Loss 4.0771 (3.9424)	Entropy 1.88659 (1.88795)	Top-1 acc 30.859 (32.079)	Top-5 acc 50.781 (55.764)	lr 0.02488
Train [6][1110/3239]	Time 2.213 (0.507)	Data Time 0.001 (0.023)	Loss 4.0925 (3.9424)	Entropy 1.88659 (1.88793)	Top-1 acc 33.203 (32.080)	Top-5 acc 51.172 (55.774)	lr 0.02488
Train [6][1120/3239]	Time 0.215 (0.504)	Data Time 0.002 (0.023)	Loss 3.9122 (3.9423)	Entropy 1.88656 (1.88792)	Top-1 acc 31.250 (32.082)	Top-5 acc 60.156 (55.777)	lr 0.02488
Train [6][1130/3239]	Time 0.205 (0.503)	Data Time 0.001 (0.023)	Loss 3.9919 (3.9420)	Entropy 1.88655 (1.88791)	Top-1 acc 33.594 (32.092)	Top-5 acc 54.688 (55.789)	lr 0.02488
Train [6][1140/3239]	Time 0.299 (0.503)	Data Time 0.001 (0.022)	Loss 3.9328 (3.9422)	Entropy 1.88650 (1.88790)	Top-1 acc 29.688 (32.094)	Top-5 acc 55.859 (55.794)	lr 0.02488
Train [6][1150/3239]	Time 0.208 (0.502)	Data Time 0.001 (0.022)	Loss 3.9832 (3.9424)	Entropy 1.88648 (1.88789)	Top-1 acc 30.859 (32.092)	Top-5 acc 53.516 (55.789)	lr 0.02488
Train [6][1160/3239]	Time 0.167 (0.501)	Data Time 0.001 (0.022)	Loss 4.0061 (3.9426)	Entropy 1.88646 (1.88787)	Top-1 acc 26.953 (32.084)	Top-5 acc 52.344 (55.779)	lr 0.02488
Train [6][1170/3239]	Time 0.150 (0.501)	Data Time 0.001 (0.022)	Loss 3.9285 (3.9420)	Entropy 1.88645 (1.88786)	Top-1 acc 28.516 (32.095)	Top-5 acc 58.203 (55.795)	lr 0.02488
Train [6][1180/3239]	Time 0.209 (0.500)	Data Time 0.002 (0.022)	Loss 3.8779 (3.9419)	Entropy 1.88645 (1.88785)	Top-1 acc 29.688 (32.094)	Top-5 acc 55.859 (55.797)	lr 0.02488
Train [6][1190/3239]	Time 0.224 (0.499)	Data Time 0.001 (0.022)	Loss 3.8143 (3.9412)	Entropy 1.88643 (1.88784)	Top-1 acc 32.812 (32.110)	Top-5 acc 59.766 (55.823)	lr 0.02488
Train [6][1200/3239]	Time 0.193 (0.498)	Data Time 0.001 (0.021)	Loss 3.9402 (3.9410)	Entropy 1.88642 (1.88783)	Top-1 acc 32.422 (32.125)	Top-5 acc 53.516 (55.826)	lr 0.02488
Train [6][1210/3239]	Time 0.236 (0.498)	Data Time 0.001 (0.021)	Loss 3.9367 (3.9410)	Entropy 1.88641 (1.88781)	Top-1 acc 31.250 (32.121)	Top-5 acc 55.859 (55.818)	lr 0.02488
Train [6][1220/3239]	Time 2.165 (0.497)	Data Time 0.001 (0.021)	Loss 3.7844 (3.9406)	Entropy 1.88641 (1.88780)	Top-1 acc 35.547 (32.129)	Top-5 acc 60.547 (55.820)	lr 0.02488
Train [6][1230/3239]	Time 0.210 (0.494)	Data Time 0.001 (0.021)	Loss 4.1947 (3.9405)	Entropy 1.88637 (1.88779)	Top-1 acc 26.172 (32.133)	Top-5 acc 50.000 (55.828)	lr 0.02488
Train [6][1240/3239]	Time 0.306 (0.494)	Data Time 0.001 (0.021)	Loss 3.8534 (3.9401)	Entropy 1.88634 (1.88778)	Top-1 acc 34.766 (32.140)	Top-5 acc 58.984 (55.832)	lr 0.02488
Train [6][1250/3239]	Time 0.198 (0.493)	Data Time 0.001 (0.021)	Loss 3.9012 (3.9404)	Entropy 1.88634 (1.88777)	Top-1 acc 35.938 (32.137)	Top-5 acc 55.078 (55.820)	lr 0.02488
Train [6][1260/3239]	Time 0.185 (0.492)	Data Time 0.001 (0.020)	Loss 3.8163 (3.9400)	Entropy 1.88631 (1.88776)	Top-1 acc 35.938 (32.145)	Top-5 acc 59.375 (55.833)	lr 0.02488
Train [6][1270/3239]	Time 0.195 (0.492)	Data Time 0.001 (0.020)	Loss 3.7444 (3.9396)	Entropy 1.88628 (1.88775)	Top-1 acc 35.938 (32.152)	Top-5 acc 58.594 (55.842)	lr 0.02488
Train [6][1280/3239]	Time 0.210 (0.491)	Data Time 0.001 (0.020)	Loss 3.8063 (3.9394)	Entropy 1.88626 (1.88773)	Top-1 acc 37.109 (32.150)	Top-5 acc 59.766 (55.853)	lr 0.02488
Train [6][1290/3239]	Time 0.215 (0.491)	Data Time 0.001 (0.020)	Loss 3.9589 (3.9399)	Entropy 1.88625 (1.88772)	Top-1 acc 29.688 (32.136)	Top-5 acc 55.078 (55.833)	lr 0.02488
Train [6][1300/3239]	Time 0.194 (0.490)	Data Time 0.001 (0.020)	Loss 3.8357 (3.9394)	Entropy 1.88623 (1.88771)	Top-1 acc 31.250 (32.145)	Top-5 acc 58.203 (55.842)	lr 0.02488
Train [6][1310/3239]	Time 0.157 (0.489)	Data Time 0.001 (0.020)	Loss 3.9699 (3.9393)	Entropy 1.88618 (1.88770)	Top-1 acc 32.422 (32.147)	Top-5 acc 55.859 (55.844)	lr 0.02488
Train [6][1320/3239]	Time 0.221 (0.489)	Data Time 0.001 (0.020)	Loss 3.8902 (3.9392)	Entropy 1.88615 (1.88769)	Top-1 acc 32.031 (32.144)	Top-5 acc 55.469 (55.844)	lr 0.02487
Train [6][1330/3239]	Time 36.669 (0.514)	Data Time 0.001 (0.020)	Loss 3.9709 (3.9393)	Entropy 1.88615 (1.88768)	Top-1 acc 31.641 (32.140)	Top-5 acc 51.562 (55.835)	lr 0.02487
Train [6][1340/3239]	Time 0.343 (0.513)	Data Time 0.002 (0.019)	Loss 3.9899 (3.9389)	Entropy 1.88610 (1.88766)	Top-1 acc 31.250 (32.138)	Top-5 acc 55.078 (55.838)	lr 0.02487
Train [6][1350/3239]	Time 0.337 (0.512)	Data Time 0.001 (0.019)	Loss 4.1082 (3.9394)	Entropy 1.88609 (1.88765)	Top-1 acc 28.906 (32.116)	Top-5 acc 50.781 (55.826)	lr 0.02487
Train [6][1360/3239]	Time 0.196 (0.511)	Data Time 0.001 (0.019)	Loss 3.8358 (3.9393)	Entropy 1.88607 (1.88764)	Top-1 acc 30.859 (32.116)	Top-5 acc 58.984 (55.830)	lr 0.02487
Train [6][1370/3239]	Time 0.195 (0.510)	Data Time 0.001 (0.019)	Loss 3.9575 (3.9393)	Entropy 1.88604 (1.88763)	Top-1 acc 31.641 (32.124)	Top-5 acc 55.078 (55.829)	lr 0.02487
Train [6][1380/3239]	Time 0.182 (0.510)	Data Time 0.001 (0.019)	Loss 4.0869 (3.9396)	Entropy 1.88604 (1.88762)	Top-1 acc 30.469 (32.121)	Top-5 acc 55.859 (55.828)	lr 0.02487
Train [6][1390/3239]	Time 0.247 (0.509)	Data Time 0.001 (0.019)	Loss 4.1905 (3.9402)	Entropy 1.88597 (1.88761)	Top-1 acc 25.781 (32.101)	Top-5 acc 52.344 (55.813)	lr 0.02487
Train [6][1400/3239]	Time 0.185 (0.508)	Data Time 0.001 (0.019)	Loss 4.0714 (3.9405)	Entropy 1.88596 (1.88759)	Top-1 acc 30.078 (32.091)	Top-5 acc 52.734 (55.804)	lr 0.02487
Train [6][1410/3239]	Time 0.166 (0.508)	Data Time 0.001 (0.019)	Loss 4.0078 (3.9408)	Entropy 1.88594 (1.88758)	Top-1 acc 30.469 (32.087)	Top-5 acc 51.562 (55.794)	lr 0.02487
Train [6][1420/3239]	Time 0.206 (0.507)	Data Time 0.001 (0.018)	Loss 4.0137 (3.9408)	Entropy 1.88590 (1.88757)	Top-1 acc 32.422 (32.095)	Top-5 acc 53.125 (55.793)	lr 0.02487
Train [6][1430/3239]	Time 0.228 (0.506)	Data Time 0.002 (0.018)	Loss 3.8852 (3.9408)	Entropy 1.88587 (1.88756)	Top-1 acc 31.250 (32.098)	Top-5 acc 57.031 (55.798)	lr 0.02487
Train [6][1440/3239]	Time 2.047 (0.505)	Data Time 0.001 (0.018)	Loss 3.8790 (3.9407)	Entropy 1.88587 (1.88755)	Top-1 acc 33.594 (32.094)	Top-5 acc 58.594 (55.804)	lr 0.02487
Train [6][1450/3239]	Time 0.237 (0.503)	Data Time 0.001 (0.018)	Loss 3.9897 (3.9407)	Entropy 1.88586 (1.88754)	Top-1 acc 33.984 (32.089)	Top-5 acc 54.297 (55.800)	lr 0.02487
Train [6][1460/3239]	Time 0.203 (0.503)	Data Time 0.001 (0.018)	Loss 3.7397 (3.9406)	Entropy 1.88584 (1.88753)	Top-1 acc 34.766 (32.081)	Top-5 acc 59.766 (55.794)	lr 0.02487
Train [6][1470/3239]	Time 0.287 (0.502)	Data Time 0.001 (0.018)	Loss 3.8405 (3.9403)	Entropy 1.88583 (1.88751)	Top-1 acc 34.766 (32.085)	Top-5 acc 57.031 (55.800)	lr 0.02487
Train [6][1480/3239]	Time 0.215 (0.501)	Data Time 0.001 (0.018)	Loss 4.0018 (3.9407)	Entropy 1.88582 (1.88750)	Top-1 acc 30.859 (32.072)	Top-5 acc 56.250 (55.788)	lr 0.02487
Train [6][1490/3239]	Time 0.238 (0.501)	Data Time 0.001 (0.018)	Loss 4.1964 (3.9411)	Entropy 1.88574 (1.88749)	Top-1 acc 26.562 (32.066)	Top-5 acc 49.219 (55.770)	lr 0.02487
Train [6][1500/3239]	Time 0.215 (0.500)	Data Time 0.001 (0.018)	Loss 4.0621 (3.9411)	Entropy 1.88574 (1.88748)	Top-1 acc 29.297 (32.073)	Top-5 acc 51.562 (55.762)	lr 0.02487
Train [6][1510/3239]	Time 0.144 (0.500)	Data Time 0.001 (0.017)	Loss 3.7921 (3.9410)	Entropy 1.88567 (1.88747)	Top-1 acc 31.641 (32.072)	Top-5 acc 57.422 (55.758)	lr 0.02487
Train [6][1520/3239]	Time 0.191 (0.499)	Data Time 0.001 (0.017)	Loss 3.9252 (3.9406)	Entropy 1.88564 (1.88746)	Top-1 acc 28.516 (32.068)	Top-5 acc 56.641 (55.771)	lr 0.02487
Train [6][1530/3239]	Time 0.202 (0.499)	Data Time 0.001 (0.017)	Loss 4.0193 (3.9404)	Entropy 1.88560 (1.88744)	Top-1 acc 29.297 (32.078)	Top-5 acc 57.031 (55.780)	lr 0.02487
Train [6][1540/3239]	Time 0.238 (0.498)	Data Time 0.001 (0.017)	Loss 3.8746 (3.9408)	Entropy 1.88559 (1.88743)	Top-1 acc 35.938 (32.070)	Top-5 acc 53.516 (55.775)	lr 0.02487
Train [6][1550/3239]	Time 2.330 (0.498)	Data Time 0.001 (0.017)	Loss 3.8447 (3.9412)	Entropy 1.88559 (1.88742)	Top-1 acc 32.031 (32.056)	Top-5 acc 56.250 (55.760)	lr 0.02487
Train [6][1560/3239]	Time 0.162 (0.496)	Data Time 0.001 (0.017)	Loss 3.9517 (3.9407)	Entropy 1.88557 (1.88741)	Top-1 acc 28.516 (32.065)	Top-5 acc 57.031 (55.770)	lr 0.02487
Train [6][1570/3239]	Time 0.196 (0.495)	Data Time 0.001 (0.017)	Loss 3.9555 (3.9408)	Entropy 1.88556 (1.88740)	Top-1 acc 35.156 (32.070)	Top-5 acc 52.734 (55.770)	lr 0.02487
Train [6][1580/3239]	Time 0.307 (0.495)	Data Time 0.001 (0.017)	Loss 3.7604 (3.9405)	Entropy 1.88550 (1.88738)	Top-1 acc 36.328 (32.069)	Top-5 acc 58.984 (55.771)	lr 0.02487
Train [6][1590/3239]	Time 0.200 (0.494)	Data Time 0.001 (0.017)	Loss 3.7267 (3.9406)	Entropy 1.88549 (1.88737)	Top-1 acc 39.453 (32.065)	Top-5 acc 58.984 (55.767)	lr 0.02487
Train [6][1600/3239]	Time 0.209 (0.494)	Data Time 0.001 (0.017)	Loss 3.7307 (3.9405)	Entropy 1.88545 (1.88736)	Top-1 acc 36.328 (32.072)	Top-5 acc 61.719 (55.768)	lr 0.02487
Train [6][1610/3239]	Time 0.203 (0.493)	Data Time 0.001 (0.016)	Loss 3.9410 (3.9404)	Entropy 1.88544 (1.88735)	Top-1 acc 32.812 (32.071)	Top-5 acc 55.859 (55.769)	lr 0.02487
Train [6][1620/3239]	Time 0.200 (0.493)	Data Time 0.001 (0.016)	Loss 4.0115 (3.9400)	Entropy 1.88541 (1.88734)	Top-1 acc 25.391 (32.075)	Top-5 acc 54.688 (55.779)	lr 0.02487
Train [6][1630/3239]	Time 0.165 (0.492)	Data Time 0.001 (0.016)	Loss 3.9747 (3.9404)	Entropy 1.88536 (1.88732)	Top-1 acc 32.812 (32.071)	Top-5 acc 56.641 (55.769)	lr 0.02487
Train [6][1640/3239]	Time 0.173 (0.492)	Data Time 0.001 (0.016)	Loss 4.0639 (3.9404)	Entropy 1.88534 (1.88731)	Top-1 acc 27.734 (32.070)	Top-5 acc 49.219 (55.768)	lr 0.02487
Train [6][1650/3239]	Time 0.185 (0.491)	Data Time 0.001 (0.016)	Loss 3.9344 (3.9408)	Entropy 1.88531 (1.88730)	Top-1 acc 31.250 (32.056)	Top-5 acc 52.344 (55.758)	lr 0.02487
Train [6][1660/3239]	Time 2.162 (0.491)	Data Time 0.001 (0.016)	Loss 3.8710 (3.9410)	Entropy 1.88531 (1.88729)	Top-1 acc 35.547 (32.054)	Top-5 acc 59.766 (55.754)	lr 0.02487
Train [6][1670/3239]	Time 0.209 (0.489)	Data Time 0.001 (0.016)	Loss 3.6396 (3.9406)	Entropy 1.88527 (1.88728)	Top-1 acc 38.672 (32.062)	Top-5 acc 61.328 (55.766)	lr 0.02487
Train [6][1680/3239]	Time 0.299 (0.488)	Data Time 0.001 (0.016)	Loss 3.8479 (3.9411)	Entropy 1.88528 (1.88726)	Top-1 acc 33.984 (32.050)	Top-5 acc 61.328 (55.752)	lr 0.02487
Train [6][1690/3239]	Time 0.341 (0.488)	Data Time 0.001 (0.016)	Loss 3.7671 (3.9410)	Entropy 1.88525 (1.88725)	Top-1 acc 30.469 (32.050)	Top-5 acc 56.641 (55.750)	lr 0.02487
Train [6][1700/3239]	Time 0.417 (0.507)	Data Time 0.002 (0.016)	Loss 4.0228 (3.9409)	Entropy 1.88521 (1.88724)	Top-1 acc 36.328 (32.050)	Top-5 acc 54.297 (55.749)	lr 0.02487
Train [6][1710/3239]	Time 0.213 (0.507)	Data Time 0.002 (0.016)	Loss 4.0572 (3.9411)	Entropy 1.88516 (1.88723)	Top-1 acc 28.125 (32.043)	Top-5 acc 53.125 (55.739)	lr 0.02487
Train [6][1720/3239]	Time 0.153 (0.506)	Data Time 0.001 (0.016)	Loss 4.0541 (3.9413)	Entropy 1.88512 (1.88722)	Top-1 acc 30.859 (32.045)	Top-5 acc 53.906 (55.737)	lr 0.02487
Train [6][1730/3239]	Time 0.198 (0.506)	Data Time 0.001 (0.015)	Loss 3.9435 (3.9411)	Entropy 1.88510 (1.88720)	Top-1 acc 31.250 (32.050)	Top-5 acc 53.125 (55.745)	lr 0.02487
Train [6][1740/3239]	Time 0.207 (0.505)	Data Time 0.001 (0.015)	Loss 3.8698 (3.9409)	Entropy 1.88502 (1.88719)	Top-1 acc 34.375 (32.055)	Top-5 acc 53.516 (55.751)	lr 0.02487
Train [6][1750/3239]	Time 0.219 (0.505)	Data Time 0.001 (0.015)	Loss 3.7848 (3.9404)	Entropy 1.88501 (1.88718)	Top-1 acc 39.844 (32.068)	Top-5 acc 59.766 (55.763)	lr 0.02487
Train [6][1760/3239]	Time 0.215 (0.504)	Data Time 0.001 (0.015)	Loss 3.8002 (3.9403)	Entropy 1.88500 (1.88717)	Top-1 acc 36.328 (32.075)	Top-5 acc 57.422 (55.766)	lr 0.02487
Train [6][1770/3239]	Time 2.303 (0.504)	Data Time 0.001 (0.015)	Loss 3.8701 (3.9406)	Entropy 1.88500 (1.88716)	Top-1 acc 32.812 (32.070)	Top-5 acc 59.766 (55.761)	lr 0.02487
Train [6][1780/3239]	Time 0.278 (0.502)	Data Time 0.001 (0.015)	Loss 3.8986 (3.9404)	Entropy 1.88499 (1.88714)	Top-1 acc 32.812 (32.072)	Top-5 acc 57.812 (55.768)	lr 0.02487
Train [6][1790/3239]	Time 0.202 (0.502)	Data Time 0.001 (0.015)	Loss 3.8963 (3.9403)	Entropy 1.88498 (1.88713)	Top-1 acc 31.250 (32.072)	Top-5 acc 58.594 (55.775)	lr 0.02487
Train [6][1800/3239]	Time 0.154 (0.501)	Data Time 0.001 (0.015)	Loss 3.8908 (3.9404)	Entropy 1.88494 (1.88712)	Top-1 acc 36.328 (32.072)	Top-5 acc 58.203 (55.773)	lr 0.02487
Train [6][1810/3239]	Time 0.187 (0.500)	Data Time 0.001 (0.015)	Loss 3.8457 (3.9404)	Entropy 1.88491 (1.88711)	Top-1 acc 33.594 (32.073)	Top-5 acc 56.250 (55.774)	lr 0.02487
Train [6][1820/3239]	Time 0.237 (0.500)	Data Time 0.002 (0.015)	Loss 4.0683 (3.9405)	Entropy 1.88489 (1.88709)	Top-1 acc 26.953 (32.067)	Top-5 acc 54.688 (55.777)	lr 0.02487
Train [6][1830/3239]	Time 0.190 (0.499)	Data Time 0.001 (0.015)	Loss 4.0702 (3.9404)	Entropy 1.88488 (1.88708)	Top-1 acc 33.594 (32.068)	Top-5 acc 52.734 (55.782)	lr 0.02487
Train [6][1840/3239]	Time 0.211 (0.499)	Data Time 0.001 (0.015)	Loss 3.9967 (3.9404)	Entropy 1.88481 (1.88707)	Top-1 acc 29.688 (32.065)	Top-5 acc 56.641 (55.783)	lr 0.02487
Train [6][1850/3239]	Time 0.183 (0.498)	Data Time 0.001 (0.015)	Loss 4.0882 (3.9403)	Entropy 1.88478 (1.88706)	Top-1 acc 33.203 (32.068)	Top-5 acc 51.953 (55.785)	lr 0.02487
Train [6][1860/3239]	Time 0.190 (0.498)	Data Time 0.001 (0.015)	Loss 3.8333 (3.9401)	Entropy 1.88472 (1.88705)	Top-1 acc 33.594 (32.074)	Top-5 acc 60.156 (55.799)	lr 0.02487
Train [6][1870/3239]	Time 0.196 (0.497)	Data Time 0.001 (0.015)	Loss 3.8355 (3.9398)	Entropy 1.88468 (1.88703)	Top-1 acc 32.812 (32.079)	Top-5 acc 62.109 (55.809)	lr 0.02487
Train [6][1880/3239]	Time 2.089 (0.497)	Data Time 0.001 (0.014)	Loss 3.9682 (3.9396)	Entropy 1.88468 (1.88702)	Top-1 acc 34.766 (32.088)	Top-5 acc 52.734 (55.812)	lr 0.02487
Train [6][1890/3239]	Time 0.212 (0.495)	Data Time 0.001 (0.014)	Loss 4.0288 (3.9395)	Entropy 1.88464 (1.88701)	Top-1 acc 31.641 (32.092)	Top-5 acc 50.391 (55.815)	lr 0.02487
Train [6][1900/3239]	Time 0.303 (0.495)	Data Time 0.001 (0.014)	Loss 3.9725 (3.9395)	Entropy 1.88457 (1.88700)	Top-1 acc 25.781 (32.088)	Top-5 acc 53.516 (55.802)	lr 0.02487
Train [6][1910/3239]	Time 0.197 (0.495)	Data Time 0.001 (0.014)	Loss 3.9700 (3.9395)	Entropy 1.88456 (1.88698)	Top-1 acc 30.469 (32.086)	Top-5 acc 53.125 (55.802)	lr 0.02487
Train [6][1920/3239]	Time 0.196 (0.494)	Data Time 0.001 (0.014)	Loss 4.1691 (3.9399)	Entropy 1.88450 (1.88697)	Top-1 acc 29.297 (32.089)	Top-5 acc 49.609 (55.794)	lr 0.02487
Train [6][1930/3239]	Time 0.211 (0.494)	Data Time 0.001 (0.014)	Loss 3.9731 (3.9398)	Entropy 1.88446 (1.88696)	Top-1 acc 32.031 (32.092)	Top-5 acc 57.031 (55.799)	lr 0.02487
Train [6][1940/3239]	Time 0.220 (0.493)	Data Time 0.001 (0.014)	Loss 4.0064 (3.9397)	Entropy 1.88446 (1.88694)	Top-1 acc 29.688 (32.088)	Top-5 acc 56.641 (55.805)	lr 0.02487
Train [6][1950/3239]	Time 0.157 (0.493)	Data Time 0.001 (0.014)	Loss 3.8214 (3.9395)	Entropy 1.88445 (1.88693)	Top-1 acc 33.203 (32.093)	Top-5 acc 57.031 (55.803)	lr 0.02487
Train [6][1960/3239]	Time 0.180 (0.492)	Data Time 0.001 (0.014)	Loss 4.1264 (3.9396)	Entropy 1.88443 (1.88692)	Top-1 acc 29.297 (32.089)	Top-5 acc 53.125 (55.800)	lr 0.02487
Train [6][1970/3239]	Time 0.146 (0.492)	Data Time 0.001 (0.014)	Loss 4.0253 (3.9393)	Entropy 1.88442 (1.88691)	Top-1 acc 33.594 (32.098)	Top-5 acc 56.641 (55.807)	lr 0.02487
Train [6][1980/3239]	Time 0.216 (0.491)	Data Time 0.001 (0.014)	Loss 4.0113 (3.9391)	Entropy 1.88434 (1.88689)	Top-1 acc 29.297 (32.106)	Top-5 acc 53.125 (55.810)	lr 0.02487
Train [6][1990/3239]	Time 2.203 (0.491)	Data Time 0.001 (0.014)	Loss 3.7693 (3.9393)	Entropy 1.88434 (1.88688)	Top-1 acc 34.766 (32.103)	Top-5 acc 58.594 (55.807)	lr 0.02487
Train [6][2000/3239]	Time 0.226 (0.490)	Data Time 0.001 (0.014)	Loss 3.9479 (3.9392)	Entropy 1.88432 (1.88687)	Top-1 acc 31.250 (32.107)	Top-5 acc 57.031 (55.810)	lr 0.02487
Train [6][2010/3239]	Time 0.152 (0.489)	Data Time 0.002 (0.014)	Loss 4.0136 (3.9390)	Entropy 1.88429 (1.88685)	Top-1 acc 31.641 (32.109)	Top-5 acc 55.859 (55.814)	lr 0.02486
Train [6][2020/3239]	Time 0.202 (0.489)	Data Time 0.001 (0.014)	Loss 3.7540 (3.9389)	Entropy 1.88429 (1.88684)	Top-1 acc 35.547 (32.114)	Top-5 acc 59.375 (55.819)	lr 0.02486
Train [6][2030/3239]	Time 0.131 (0.489)	Data Time 0.001 (0.014)	Loss 3.9352 (3.9391)	Entropy 1.88428 (1.88683)	Top-1 acc 30.078 (32.110)	Top-5 acc 52.344 (55.813)	lr 0.02486
Train [6][2040/3239]	Time 0.193 (0.488)	Data Time 0.001 (0.014)	Loss 3.7393 (3.9391)	Entropy 1.88422 (1.88682)	Top-1 acc 39.062 (32.112)	Top-5 acc 63.281 (55.811)	lr 0.02486
Train [6][2050/3239]	Time 0.171 (0.488)	Data Time 0.001 (0.014)	Loss 3.9138 (3.9390)	Entropy 1.88417 (1.88680)	Top-1 acc 32.031 (32.113)	Top-5 acc 55.469 (55.819)	lr 0.02486
Train [6][2060/3239]	Time 0.251 (0.504)	Data Time 0.004 (0.013)	Loss 3.5704 (3.9387)	Entropy 1.88412 (1.88679)	Top-1 acc 39.844 (32.119)	Top-5 acc 63.281 (55.827)	lr 0.02486
Train [6][2070/3239]	Time 0.239 (0.504)	Data Time 0.002 (0.013)	Loss 3.9882 (3.9390)	Entropy 1.88409 (1.88678)	Top-1 acc 32.422 (32.116)	Top-5 acc 54.688 (55.821)	lr 0.02486
Train [6][2080/3239]	Time 0.230 (0.504)	Data Time 0.003 (0.013)	Loss 4.1144 (3.9392)	Entropy 1.88404 (1.88677)	Top-1 acc 26.172 (32.111)	Top-5 acc 51.562 (55.818)	lr 0.02486
Train [6][2090/3239]	Time 0.183 (0.503)	Data Time 0.001 (0.013)	Loss 3.9914 (3.9391)	Entropy 1.88401 (1.88675)	Top-1 acc 28.906 (32.114)	Top-5 acc 53.125 (55.820)	lr 0.02486
Train [6][2100/3239]	Time 2.252 (0.503)	Data Time 0.001 (0.013)	Loss 4.0264 (3.9390)	Entropy 1.88401 (1.88674)	Top-1 acc 32.422 (32.116)	Top-5 acc 55.469 (55.825)	lr 0.02486
Train [6][2110/3239]	Time 0.251 (0.501)	Data Time 0.002 (0.013)	Loss 3.8636 (3.9389)	Entropy 1.88399 (1.88673)	Top-1 acc 33.203 (32.122)	Top-5 acc 56.250 (55.832)	lr 0.02486
Train [6][2120/3239]	Time 0.321 (0.501)	Data Time 0.002 (0.013)	Loss 4.2010 (3.9391)	Entropy 1.88399 (1.88671)	Top-1 acc 26.953 (32.120)	Top-5 acc 49.219 (55.822)	lr 0.02486
Train [6][2130/3239]	Time 0.161 (0.501)	Data Time 0.002 (0.013)	Loss 4.0661 (3.9391)	Entropy 1.88398 (1.88670)	Top-1 acc 28.906 (32.123)	Top-5 acc 51.562 (55.825)	lr 0.02486
Train [6][2140/3239]	Time 0.228 (0.500)	Data Time 0.001 (0.013)	Loss 3.8028 (3.9390)	Entropy 1.88397 (1.88669)	Top-1 acc 38.281 (32.127)	Top-5 acc 57.031 (55.822)	lr 0.02486
Train [6][2150/3239]	Time 0.170 (0.500)	Data Time 0.001 (0.013)	Loss 3.8148 (3.9387)	Entropy 1.88395 (1.88668)	Top-1 acc 38.281 (32.131)	Top-5 acc 61.719 (55.828)	lr 0.02486
Train [6][2160/3239]	Time 0.139 (0.500)	Data Time 0.001 (0.013)	Loss 3.9159 (3.9385)	Entropy 1.88393 (1.88666)	Top-1 acc 30.859 (32.137)	Top-5 acc 54.297 (55.832)	lr 0.02486
Train [6][2170/3239]	Time 0.236 (0.499)	Data Time 0.001 (0.013)	Loss 3.9481 (3.9384)	Entropy 1.88384 (1.88665)	Top-1 acc 37.109 (32.135)	Top-5 acc 57.031 (55.829)	lr 0.02486
Train [6][2180/3239]	Time 0.195 (0.499)	Data Time 0.001 (0.013)	Loss 3.9457 (3.9385)	Entropy 1.88385 (1.88664)	Top-1 acc 30.078 (32.128)	Top-5 acc 53.125 (55.823)	lr 0.02486
Train [6][2190/3239]	Time 0.183 (0.499)	Data Time 0.001 (0.013)	Loss 4.1186 (3.9385)	Entropy 1.88381 (1.88662)	Top-1 acc 33.203 (32.141)	Top-5 acc 54.297 (55.824)	lr 0.02486
Train [6][2200/3239]	Time 0.210 (0.498)	Data Time 0.001 (0.013)	Loss 3.8442 (3.9382)	Entropy 1.88378 (1.88661)	Top-1 acc 31.641 (32.149)	Top-5 acc 61.719 (55.830)	lr 0.02486
Train [6][2210/3239]	Time 2.092 (0.498)	Data Time 0.001 (0.013)	Loss 3.8888 (3.9381)	Entropy 1.88378 (1.88660)	Top-1 acc 30.078 (32.153)	Top-5 acc 55.859 (55.835)	lr 0.02486
Train [6][2220/3239]	Time 0.295 (0.496)	Data Time 0.002 (0.013)	Loss 4.0066 (3.9382)	Entropy 1.88369 (1.88659)	Top-1 acc 30.078 (32.153)	Top-5 acc 52.734 (55.835)	lr 0.02486
Train [6][2230/3239]	Time 0.325 (0.496)	Data Time 0.002 (0.013)	Loss 3.7758 (3.9382)	Entropy 1.88364 (1.88657)	Top-1 acc 35.156 (32.154)	Top-5 acc 61.328 (55.835)	lr 0.02486
Train [6][2240/3239]	Time 0.205 (0.496)	Data Time 0.001 (0.013)	Loss 3.7214 (3.9382)	Entropy 1.88362 (1.88656)	Top-1 acc 37.891 (32.154)	Top-5 acc 64.844 (55.834)	lr 0.02486
Train [6][2250/3239]	Time 0.208 (0.495)	Data Time 0.001 (0.013)	Loss 3.8448 (3.9382)	Entropy 1.88360 (1.88655)	Top-1 acc 33.594 (32.157)	Top-5 acc 60.156 (55.835)	lr 0.02486
Train [6][2260/3239]	Time 0.237 (0.495)	Data Time 0.001 (0.013)	Loss 3.8329 (3.9382)	Entropy 1.88356 (1.88653)	Top-1 acc 30.859 (32.155)	Top-5 acc 60.547 (55.835)	lr 0.02486
Train [6][2270/3239]	Time 0.195 (0.495)	Data Time 0.002 (0.013)	Loss 3.8944 (3.9379)	Entropy 1.88352 (1.88652)	Top-1 acc 36.328 (32.165)	Top-5 acc 56.250 (55.840)	lr 0.02486
Train [6][2280/3239]	Time 0.200 (0.494)	Data Time 0.001 (0.012)	Loss 3.9965 (3.9380)	Entropy 1.88350 (1.88651)	Top-1 acc 35.156 (32.166)	Top-5 acc 56.250 (55.839)	lr 0.02486
Train [6][2290/3239]	Time 0.222 (0.494)	Data Time 0.001 (0.012)	Loss 3.9351 (3.9382)	Entropy 1.88349 (1.88649)	Top-1 acc 32.031 (32.163)	Top-5 acc 57.031 (55.832)	lr 0.02486
Train [6][2300/3239]	Time 0.192 (0.494)	Data Time 0.001 (0.012)	Loss 4.0300 (3.9383)	Entropy 1.88343 (1.88648)	Top-1 acc 26.172 (32.157)	Top-5 acc 52.734 (55.827)	lr 0.02486
Train [6][2310/3239]	Time 0.229 (0.493)	Data Time 0.001 (0.012)	Loss 3.9482 (3.9384)	Entropy 1.88342 (1.88647)	Top-1 acc 31.250 (32.153)	Top-5 acc 53.516 (55.822)	lr 0.02486
Train [6][2320/3239]	Time 2.156 (0.493)	Data Time 0.001 (0.012)	Loss 3.9590 (3.9385)	Entropy 1.88342 (1.88645)	Top-1 acc 33.594 (32.147)	Top-5 acc 54.688 (55.820)	lr 0.02486
Train [6][2330/3239]	Time 0.296 (0.492)	Data Time 0.001 (0.012)	Loss 3.8733 (3.9385)	Entropy 1.88336 (1.88644)	Top-1 acc 33.203 (32.147)	Top-5 acc 57.812 (55.822)	lr 0.02486
Train [6][2340/3239]	Time 0.309 (0.492)	Data Time 0.001 (0.012)	Loss 3.9731 (3.9386)	Entropy 1.88334 (1.88643)	Top-1 acc 30.469 (32.148)	Top-5 acc 56.250 (55.824)	lr 0.02486
Train [6][2350/3239]	Time 0.195 (0.491)	Data Time 0.001 (0.012)	Loss 4.0371 (3.9386)	Entropy 1.88333 (1.88641)	Top-1 acc 33.203 (32.156)	Top-5 acc 54.688 (55.820)	lr 0.02486
Train [6][2360/3239]	Time 0.232 (0.491)	Data Time 0.001 (0.012)	Loss 4.0694 (3.9386)	Entropy 1.88332 (1.88640)	Top-1 acc 28.125 (32.156)	Top-5 acc 52.344 (55.821)	lr 0.02486
Train [6][2370/3239]	Time 0.213 (0.491)	Data Time 0.001 (0.012)	Loss 3.7829 (3.9385)	Entropy 1.88328 (1.88639)	Top-1 acc 34.766 (32.154)	Top-5 acc 54.688 (55.820)	lr 0.02486
Train [6][2380/3239]	Time 0.207 (0.490)	Data Time 0.001 (0.012)	Loss 3.7307 (3.9385)	Entropy 1.88326 (1.88637)	Top-1 acc 34.375 (32.153)	Top-5 acc 57.812 (55.818)	lr 0.02486
Train [6][2390/3239]	Time 0.240 (0.490)	Data Time 0.001 (0.012)	Loss 3.8908 (3.9386)	Entropy 1.88321 (1.88636)	Top-1 acc 32.031 (32.151)	Top-5 acc 54.688 (55.816)	lr 0.02486
Train [6][2400/3239]	Time 0.204 (0.490)	Data Time 0.001 (0.012)	Loss 3.9421 (3.9387)	Entropy 1.88318 (1.88635)	Top-1 acc 30.859 (32.153)	Top-5 acc 54.297 (55.816)	lr 0.02486
Train [6][2410/3239]	Time 0.225 (0.489)	Data Time 0.001 (0.012)	Loss 3.9617 (3.9385)	Entropy 1.88314 (1.88634)	Top-1 acc 31.641 (32.157)	Top-5 acc 56.250 (55.819)	lr 0.02486
Train [6][2420/3239]	Time 0.373 (0.502)	Data Time 0.006 (0.012)	Loss 3.9538 (3.9384)	Entropy 1.88311 (1.88632)	Top-1 acc 32.812 (32.162)	Top-5 acc 54.688 (55.822)	lr 0.02486
Train [6][2430/3239]	Time 2.436 (0.503)	Data Time 0.002 (0.012)	Loss 3.8791 (3.9383)	Entropy 1.88311 (1.88631)	Top-1 acc 33.594 (32.163)	Top-5 acc 59.375 (55.826)	lr 0.02486
Train [6][2440/3239]	Time 0.143 (0.501)	Data Time 0.002 (0.012)	Loss 3.8602 (3.9382)	Entropy 1.88309 (1.88630)	Top-1 acc 32.812 (32.166)	Top-5 acc 58.203 (55.828)	lr 0.02486
Train [6][2450/3239]	Time 0.331 (0.501)	Data Time 0.002 (0.012)	Loss 3.9296 (3.9380)	Entropy 1.88307 (1.88628)	Top-1 acc 30.469 (32.167)	Top-5 acc 55.469 (55.836)	lr 0.02486
Train [6][2460/3239]	Time 0.144 (0.501)	Data Time 0.001 (0.012)	Loss 4.1358 (3.9381)	Entropy 1.88305 (1.88627)	Top-1 acc 28.125 (32.163)	Top-5 acc 51.953 (55.833)	lr 0.02486
Train [6][2470/3239]	Time 0.219 (0.501)	Data Time 0.001 (0.012)	Loss 3.7624 (3.9381)	Entropy 1.88304 (1.88626)	Top-1 acc 34.375 (32.165)	Top-5 acc 60.938 (55.832)	lr 0.02486
Train [6][2480/3239]	Time 0.140 (0.500)	Data Time 0.001 (0.012)	Loss 3.8163 (3.9378)	Entropy 1.88297 (1.88624)	Top-1 acc 34.766 (32.167)	Top-5 acc 58.594 (55.837)	lr 0.02486
Train [6][2490/3239]	Time 0.190 (0.500)	Data Time 0.001 (0.012)	Loss 3.8641 (3.9378)	Entropy 1.88292 (1.88623)	Top-1 acc 37.500 (32.167)	Top-5 acc 58.984 (55.838)	lr 0.02486
Train [6][2500/3239]	Time 0.206 (0.499)	Data Time 0.001 (0.012)	Loss 3.8287 (3.9378)	Entropy 1.88291 (1.88622)	Top-1 acc 32.812 (32.169)	Top-5 acc 58.594 (55.843)	lr 0.02486
Train [6][2510/3239]	Time 0.177 (0.499)	Data Time 0.001 (0.012)	Loss 4.0632 (3.9383)	Entropy 1.88288 (1.88620)	Top-1 acc 31.250 (32.163)	Top-5 acc 55.469 (55.830)	lr 0.02486
Train [6][2520/3239]	Time 0.211 (0.499)	Data Time 0.001 (0.012)	Loss 3.8019 (3.9385)	Entropy 1.88285 (1.88619)	Top-1 acc 29.688 (32.156)	Top-5 acc 54.688 (55.826)	lr 0.02486
Train [6][2530/3239]	Time 0.185 (0.498)	Data Time 0.001 (0.011)	Loss 4.1201 (3.9386)	Entropy 1.88281 (1.88618)	Top-1 acc 28.516 (32.155)	Top-5 acc 52.734 (55.823)	lr 0.02486
Train [6][2540/3239]	Time 2.151 (0.498)	Data Time 0.001 (0.011)	Loss 4.1205 (3.9387)	Entropy 1.88281 (1.88616)	Top-1 acc 26.172 (32.150)	Top-5 acc 51.562 (55.821)	lr 0.02486
Train [6][2550/3239]	Time 0.213 (0.497)	Data Time 0.001 (0.011)	Loss 3.7803 (3.9387)	Entropy 1.88280 (1.88615)	Top-1 acc 33.984 (32.149)	Top-5 acc 61.328 (55.822)	lr 0.02486
Train [6][2560/3239]	Time 0.252 (0.497)	Data Time 0.001 (0.011)	Loss 3.9001 (3.9387)	Entropy 1.88278 (1.88614)	Top-1 acc 35.547 (32.149)	Top-5 acc 55.469 (55.821)	lr 0.02486
Train [6][2570/3239]	Time 0.195 (0.496)	Data Time 0.001 (0.011)	Loss 3.8169 (3.9387)	Entropy 1.88273 (1.88612)	Top-1 acc 35.547 (32.150)	Top-5 acc 58.594 (55.822)	lr 0.02486
Train [6][2580/3239]	Time 0.177 (0.496)	Data Time 0.001 (0.011)	Loss 3.7990 (3.9385)	Entropy 1.88273 (1.88611)	Top-1 acc 35.547 (32.155)	Top-5 acc 60.156 (55.830)	lr 0.02486
Train [6][2590/3239]	Time 0.185 (0.496)	Data Time 0.001 (0.011)	Loss 3.8474 (3.9382)	Entropy 1.88267 (1.88610)	Top-1 acc 35.156 (32.163)	Top-5 acc 57.422 (55.837)	lr 0.02486
Train [6][2600/3239]	Time 0.236 (0.495)	Data Time 0.001 (0.011)	Loss 3.9199 (3.9383)	Entropy 1.88264 (1.88609)	Top-1 acc 34.766 (32.161)	Top-5 acc 56.641 (55.832)	lr 0.02486
Train [6][2610/3239]	Time 0.174 (0.495)	Data Time 0.001 (0.011)	Loss 3.9839 (3.9385)	Entropy 1.88261 (1.88607)	Top-1 acc 33.203 (32.161)	Top-5 acc 56.250 (55.831)	lr 0.02486
Train [6][2620/3239]	Time 0.215 (0.495)	Data Time 0.001 (0.011)	Loss 4.0915 (3.9387)	Entropy 1.88259 (1.88606)	Top-1 acc 30.469 (32.159)	Top-5 acc 51.172 (55.831)	lr 0.02486
Train [6][2630/3239]	Time 0.188 (0.494)	Data Time 0.001 (0.011)	Loss 3.6649 (3.9388)	Entropy 1.88248 (1.88605)	Top-1 acc 36.719 (32.157)	Top-5 acc 64.453 (55.833)	lr 0.02486
Train [6][2640/3239]	Time 0.229 (0.494)	Data Time 0.001 (0.011)	Loss 3.8121 (3.9386)	Entropy 1.88246 (1.88603)	Top-1 acc 32.422 (32.159)	Top-5 acc 58.594 (55.837)	lr 0.02486
Train [6][2650/3239]	Time 0.172 (0.494)	Data Time 0.001 (0.011)	Loss 4.0517 (3.9386)	Entropy 1.88241 (1.88602)	Top-1 acc 28.125 (32.160)	Top-5 acc 50.391 (55.837)	lr 0.02486
Train [6][2660/3239]	Time 0.198 (0.493)	Data Time 0.001 (0.011)	Loss 4.1022 (3.9385)	Entropy 1.88238 (1.88600)	Top-1 acc 29.297 (32.158)	Top-5 acc 52.734 (55.837)	lr 0.02486
Train [6][2670/3239]	Time 0.190 (0.493)	Data Time 0.001 (0.011)	Loss 4.0947 (3.9386)	Entropy 1.88235 (1.88599)	Top-1 acc 29.688 (32.159)	Top-5 acc 52.344 (55.838)	lr 0.02485
Train [6][2680/3239]	Time 0.141 (0.493)	Data Time 0.001 (0.011)	Loss 4.0597 (3.9387)	Entropy 1.88231 (1.88598)	Top-1 acc 26.562 (32.153)	Top-5 acc 52.734 (55.836)	lr 0.02485
Train [6][2690/3239]	Time 0.327 (0.493)	Data Time 0.001 (0.011)	Loss 3.8467 (3.9388)	Entropy 1.88230 (1.88596)	Top-1 acc 32.812 (32.153)	Top-5 acc 57.031 (55.834)	lr 0.02485
Train [6][2700/3239]	Time 0.280 (0.492)	Data Time 0.029 (0.011)	Loss 3.9928 (3.9385)	Entropy 1.88225 (1.88595)	Top-1 acc 32.422 (32.162)	Top-5 acc 51.562 (55.840)	lr 0.02485
Train [6][2710/3239]	Time 0.238 (0.492)	Data Time 0.001 (0.011)	Loss 3.9117 (3.9386)	Entropy 1.88223 (1.88594)	Top-1 acc 28.516 (32.161)	Top-5 acc 57.812 (55.840)	lr 0.02485
Train [6][2720/3239]	Time 0.199 (0.492)	Data Time 0.001 (0.011)	Loss 3.8807 (3.9381)	Entropy 1.88221 (1.88592)	Top-1 acc 34.766 (32.172)	Top-5 acc 55.859 (55.848)	lr 0.02485
Train [6][2730/3239]	Time 0.221 (0.491)	Data Time 0.002 (0.011)	Loss 3.8653 (3.9382)	Entropy 1.88219 (1.88591)	Top-1 acc 34.375 (32.174)	Top-5 acc 59.766 (55.845)	lr 0.02485
Train [6][2740/3239]	Time 0.228 (0.491)	Data Time 0.001 (0.011)	Loss 3.8818 (3.9382)	Entropy 1.88216 (1.88590)	Top-1 acc 32.422 (32.174)	Top-5 acc 59.375 (55.849)	lr 0.02485
Train [6][2750/3239]	Time 0.173 (0.491)	Data Time 0.001 (0.011)	Loss 4.0252 (3.9380)	Entropy 1.88215 (1.88588)	Top-1 acc 32.422 (32.178)	Top-5 acc 53.516 (55.855)	lr 0.02485
Train [6][2760/3239]	Time 0.206 (0.491)	Data Time 0.001 (0.011)	Loss 3.9640 (3.9378)	Entropy 1.88210 (1.88587)	Top-1 acc 30.859 (32.179)	Top-5 acc 55.859 (55.860)	lr 0.02485
Train [6][2770/3239]	Time 0.256 (0.503)	Data Time 0.004 (0.011)	Loss 3.9367 (3.9376)	Entropy 1.88202 (1.88586)	Top-1 acc 33.984 (32.186)	Top-5 acc 57.031 (55.864)	lr 0.02485
Train [6][2780/3239]	Time 0.220 (0.503)	Data Time 0.002 (0.011)	Loss 4.0347 (3.9376)	Entropy 1.88200 (1.88584)	Top-1 acc 29.688 (32.189)	Top-5 acc 52.734 (55.866)	lr 0.02485
Train [6][2790/3239]	Time 0.243 (0.502)	Data Time 0.002 (0.011)	Loss 3.7958 (3.9376)	Entropy 1.88199 (1.88583)	Top-1 acc 35.547 (32.195)	Top-5 acc 58.594 (55.867)	lr 0.02485
Train [6][2800/3239]	Time 0.316 (0.502)	Data Time 0.001 (0.011)	Loss 3.7698 (3.9373)	Entropy 1.88199 (1.88581)	Top-1 acc 38.281 (32.200)	Top-5 acc 60.156 (55.874)	lr 0.02485
Train [6][2810/3239]	Time 0.269 (0.502)	Data Time 0.002 (0.011)	Loss 3.8804 (3.9372)	Entropy 1.88197 (1.88580)	Top-1 acc 33.203 (32.201)	Top-5 acc 55.078 (55.877)	lr 0.02485
Train [6][2820/3239]	Time 0.173 (0.501)	Data Time 0.002 (0.011)	Loss 3.8831 (3.9373)	Entropy 1.88188 (1.88579)	Top-1 acc 32.812 (32.199)	Top-5 acc 59.766 (55.881)	lr 0.02485
Train [6][2830/3239]	Time 0.257 (0.501)	Data Time 0.001 (0.011)	Loss 3.8354 (3.9371)	Entropy 1.88183 (1.88577)	Top-1 acc 31.641 (32.202)	Top-5 acc 55.469 (55.886)	lr 0.02485
Train [6][2840/3239]	Time 0.201 (0.501)	Data Time 0.001 (0.011)	Loss 3.9279 (3.9370)	Entropy 1.88183 (1.88576)	Top-1 acc 28.906 (32.199)	Top-5 acc 57.031 (55.887)	lr 0.02485
Train [6][2850/3239]	Time 0.232 (0.500)	Data Time 0.001 (0.011)	Loss 3.7805 (3.9366)	Entropy 1.88178 (1.88575)	Top-1 acc 33.984 (32.210)	Top-5 acc 59.375 (55.897)	lr 0.02485
Train [6][2860/3239]	Time 0.209 (0.500)	Data Time 0.001 (0.011)	Loss 3.9045 (3.9366)	Entropy 1.88172 (1.88573)	Top-1 acc 36.328 (32.213)	Top-5 acc 53.125 (55.897)	lr 0.02485
Train [6][2870/3239]	Time 0.183 (0.500)	Data Time 0.001 (0.010)	Loss 3.8684 (3.9363)	Entropy 1.88171 (1.88572)	Top-1 acc 33.984 (32.220)	Top-5 acc 57.812 (55.904)	lr 0.02485
Train [6][2880/3239]	Time 0.248 (0.500)	Data Time 0.001 (0.010)	Loss 4.0036 (3.9361)	Entropy 1.88170 (1.88570)	Top-1 acc 27.734 (32.224)	Top-5 acc 54.688 (55.907)	lr 0.02485
Train [6][2890/3239]	Time 0.212 (0.499)	Data Time 0.002 (0.010)	Loss 3.7623 (3.9361)	Entropy 1.88168 (1.88569)	Top-1 acc 33.203 (32.220)	Top-5 acc 58.594 (55.908)	lr 0.02485
Train [6][2900/3239]	Time 0.210 (0.499)	Data Time 0.002 (0.010)	Loss 3.9442 (3.9360)	Entropy 1.88166 (1.88568)	Top-1 acc 29.297 (32.222)	Top-5 acc 53.906 (55.910)	lr 0.02485
Train [6][2910/3239]	Time 0.385 (0.499)	Data Time 0.001 (0.010)	Loss 4.0397 (3.9360)	Entropy 1.88161 (1.88566)	Top-1 acc 26.953 (32.217)	Top-5 acc 52.344 (55.909)	lr 0.02485
Train [6][2920/3239]	Time 0.320 (0.499)	Data Time 0.002 (0.010)	Loss 3.9076 (3.9358)	Entropy 1.88162 (1.88565)	Top-1 acc 34.766 (32.219)	Top-5 acc 56.250 (55.914)	lr 0.02485
Train [6][2930/3239]	Time 0.204 (0.498)	Data Time 0.001 (0.010)	Loss 3.6958 (3.9360)	Entropy 1.88157 (1.88563)	Top-1 acc 38.281 (32.215)	Top-5 acc 63.281 (55.914)	lr 0.02485
Train [6][2940/3239]	Time 0.228 (0.498)	Data Time 0.001 (0.010)	Loss 3.8589 (3.9360)	Entropy 1.88155 (1.88562)	Top-1 acc 35.156 (32.214)	Top-5 acc 54.688 (55.913)	lr 0.02485
Train [6][2950/3239]	Time 0.210 (0.498)	Data Time 0.001 (0.010)	Loss 3.9379 (3.9360)	Entropy 1.88154 (1.88561)	Top-1 acc 31.250 (32.216)	Top-5 acc 55.078 (55.914)	lr 0.02485
Train [6][2960/3239]	Time 0.199 (0.498)	Data Time 0.001 (0.010)	Loss 4.0262 (3.9360)	Entropy 1.88149 (1.88559)	Top-1 acc 28.125 (32.209)	Top-5 acc 52.344 (55.914)	lr 0.02485
Train [6][2970/3239]	Time 0.154 (0.497)	Data Time 0.001 (0.010)	Loss 3.8489 (3.9360)	Entropy 1.88144 (1.88558)	Top-1 acc 30.469 (32.208)	Top-5 acc 56.250 (55.915)	lr 0.02485
Train [6][2980/3239]	Time 0.200 (0.497)	Data Time 0.001 (0.010)	Loss 3.8769 (3.9358)	Entropy 1.88140 (1.88557)	Top-1 acc 37.500 (32.214)	Top-5 acc 57.031 (55.916)	lr 0.02485
Train [6][2990/3239]	Time 0.257 (0.497)	Data Time 0.001 (0.010)	Loss 3.7394 (3.9359)	Entropy 1.88139 (1.88555)	Top-1 acc 34.766 (32.212)	Top-5 acc 60.156 (55.914)	lr 0.02485
Train [6][3000/3239]	Time 0.239 (0.496)	Data Time 0.001 (0.010)	Loss 4.0429 (3.9359)	Entropy 1.88138 (1.88554)	Top-1 acc 29.297 (32.214)	Top-5 acc 51.562 (55.914)	lr 0.02485
Train [6][3010/3239]	Time 0.249 (0.496)	Data Time 0.001 (0.010)	Loss 3.9482 (3.9358)	Entropy 1.88134 (1.88552)	Top-1 acc 35.938 (32.215)	Top-5 acc 55.859 (55.915)	lr 0.02485
Train [6][3020/3239]	Time 0.305 (0.496)	Data Time 0.001 (0.010)	Loss 3.8730 (3.9357)	Entropy 1.88131 (1.88551)	Top-1 acc 33.594 (32.222)	Top-5 acc 57.031 (55.920)	lr 0.02485
Train [6][3030/3239]	Time 0.263 (0.496)	Data Time 0.002 (0.010)	Loss 4.1400 (3.9359)	Entropy 1.88130 (1.88550)	Top-1 acc 26.953 (32.217)	Top-5 acc 49.609 (55.916)	lr 0.02485
Train [6][3040/3239]	Time 0.218 (0.495)	Data Time 0.001 (0.010)	Loss 3.9120 (3.9359)	Entropy 1.88125 (1.88548)	Top-1 acc 32.422 (32.214)	Top-5 acc 57.031 (55.915)	lr 0.02485
Train [6][3050/3239]	Time 0.227 (0.495)	Data Time 0.001 (0.010)	Loss 3.7735 (3.9358)	Entropy 1.88124 (1.88547)	Top-1 acc 35.547 (32.215)	Top-5 acc 60.547 (55.923)	lr 0.02485
Train [6][3060/3239]	Time 0.205 (0.495)	Data Time 0.009 (0.010)	Loss 3.9494 (3.9358)	Entropy 1.88122 (1.88545)	Top-1 acc 32.812 (32.214)	Top-5 acc 57.422 (55.925)	lr 0.02485
Train [6][3070/3239]	Time 0.183 (0.495)	Data Time 0.001 (0.010)	Loss 3.9862 (3.9356)	Entropy 1.88121 (1.88544)	Top-1 acc 28.125 (32.218)	Top-5 acc 54.297 (55.929)	lr 0.02485
Train [6][3080/3239]	Time 0.273 (0.494)	Data Time 0.001 (0.010)	Loss 3.9978 (3.9356)	Entropy 1.88121 (1.88543)	Top-1 acc 31.641 (32.216)	Top-5 acc 56.641 (55.929)	lr 0.02485
Train [6][3090/3239]	Time 0.187 (0.494)	Data Time 0.001 (0.010)	Loss 4.0621 (3.9356)	Entropy 1.88117 (1.88541)	Top-1 acc 30.469 (32.218)	Top-5 acc 53.906 (55.930)	lr 0.02485
Train [6][3100/3239]	Time 0.201 (0.506)	Data Time 0.003 (0.010)	Loss 3.7802 (3.9356)	Entropy 1.88113 (1.88540)	Top-1 acc 35.938 (32.220)	Top-5 acc 60.547 (55.930)	lr 0.02485
Train [6][3110/3239]	Time 0.356 (0.506)	Data Time 0.002 (0.010)	Loss 4.2138 (3.9354)	Entropy 1.88112 (1.88539)	Top-1 acc 25.391 (32.222)	Top-5 acc 51.172 (55.932)	lr 0.02485
Train [6][3120/3239]	Time 0.204 (0.505)	Data Time 0.001 (0.010)	Loss 3.8604 (3.9354)	Entropy 1.88111 (1.88537)	Top-1 acc 36.328 (32.226)	Top-5 acc 56.250 (55.934)	lr 0.02485
Train [6][3130/3239]	Time 0.240 (0.505)	Data Time 0.001 (0.010)	Loss 3.9132 (3.9353)	Entropy 1.88108 (1.88536)	Top-1 acc 37.891 (32.229)	Top-5 acc 53.906 (55.931)	lr 0.02485
Train [6][3140/3239]	Time 0.194 (0.505)	Data Time 0.001 (0.010)	Loss 4.1315 (3.9353)	Entropy 1.88103 (1.88534)	Top-1 acc 27.344 (32.229)	Top-5 acc 53.125 (55.935)	lr 0.02485
Train [6][3150/3239]	Time 0.223 (0.504)	Data Time 0.001 (0.010)	Loss 3.7416 (3.9352)	Entropy 1.88097 (1.88533)	Top-1 acc 35.156 (32.230)	Top-5 acc 60.938 (55.936)	lr 0.02485
Train [6][3160/3239]	Time 0.265 (0.504)	Data Time 0.001 (0.010)	Loss 3.9106 (3.9352)	Entropy 1.88090 (1.88532)	Top-1 acc 31.641 (32.231)	Top-5 acc 54.688 (55.935)	lr 0.02485
Train [6][3170/3239]	Time 0.228 (0.504)	Data Time 0.001 (0.010)	Loss 3.9718 (3.9353)	Entropy 1.88083 (1.88530)	Top-1 acc 33.594 (32.230)	Top-5 acc 55.469 (55.936)	lr 0.02485
Train [6][3180/3239]	Time 0.191 (0.503)	Data Time 0.000 (0.010)	Loss 3.8767 (3.9352)	Entropy 1.88081 (1.88529)	Top-1 acc 37.109 (32.235)	Top-5 acc 58.984 (55.942)	lr 0.02485
Train [6][3190/3239]	Time 0.184 (0.503)	Data Time 0.000 (0.010)	Loss 3.9769 (3.9351)	Entropy 1.88081 (1.88528)	Top-1 acc 32.422 (32.239)	Top-5 acc 54.297 (55.941)	lr 0.02485
Train [6][3200/3239]	Time 0.136 (0.503)	Data Time 0.000 (0.010)	Loss 3.9891 (3.9349)	Entropy 1.88076 (1.88526)	Top-1 acc 32.031 (32.243)	Top-5 acc 52.344 (55.945)	lr 0.02485
Train [6][3210/3239]	Time 0.201 (0.502)	Data Time 0.000 (0.010)	Loss 3.8384 (3.9348)	Entropy 1.88073 (1.88525)	Top-1 acc 36.719 (32.248)	Top-5 acc 59.766 (55.951)	lr 0.02485
Train [6][3220/3239]	Time 0.125 (0.502)	Data Time 0.000 (0.010)	Loss 3.9287 (3.9345)	Entropy 1.88072 (1.88523)	Top-1 acc 30.469 (32.249)	Top-5 acc 55.078 (55.953)	lr 0.02485
Train [6][3230/3239]	Time 0.271 (0.501)	Data Time 0.000 (0.010)	Loss 3.8537 (3.9345)	Entropy 1.88064 (1.88522)	Top-1 acc 32.812 (32.248)	Top-5 acc 57.422 (55.952)	lr 0.02485
Train [6][3239/3239]	Time 2.039 (0.501)	Data Time 0.000 (0.010)	Loss 4.1240 (3.9345)	Entropy 1.88064 (1.88521)	Top-1 acc 29.630 (32.250)	Top-5 acc 53.086 (55.953)	lr 0.02485
==========Valid [6/120]	loss 2.773	top-1 acc 41.552 (41.552)	top-5 acc 66.282	Train top-1 32.250	top-5 55.953	Entropy 1.88064	Latency-None: 0.000ms	Flops: 510.67M
Train [7][0/3239]	Time 21.949 (21.949)	Data Time 21.255 (21.255)	Loss 4.0232 (4.0232)	Entropy 1.88059 (1.88059)	Top-1 acc 33.203 (33.203)	Top-5 acc 56.641 (56.641)	lr 0.02485
Train [7][10/3239]	Time 2.350 (2.682)	Data Time 0.003 (2.120)	Loss 4.0884 (3.9051)	Entropy 1.88059 (1.88059)	Top-1 acc 30.469 (32.493)	Top-5 acc 53.125 (56.321)	lr 0.02485
Train [7][20/3239]	Time 0.245 (1.502)	Data Time 0.001 (1.111)	Loss 3.9805 (3.9276)	Entropy 1.88058 (1.88058)	Top-1 acc 32.422 (32.422)	Top-5 acc 52.734 (56.231)	lr 0.02485
Train [7][30/3239]	Time 0.206 (1.153)	Data Time 0.001 (0.753)	Loss 3.6937 (3.9041)	Entropy 1.88056 (1.88058)	Top-1 acc 37.891 (32.863)	Top-5 acc 62.500 (57.031)	lr 0.02485
Train [7][40/3239]	Time 0.190 (0.975)	Data Time 0.001 (0.570)	Loss 3.8541 (3.9249)	Entropy 1.88053 (1.88057)	Top-1 acc 34.375 (32.222)	Top-5 acc 58.984 (56.364)	lr 0.02485
Train [7][50/3239]	Time 0.167 (0.865)	Data Time 0.001 (0.459)	Loss 3.9522 (3.9237)	Entropy 1.88047 (1.88055)	Top-1 acc 33.594 (32.475)	Top-5 acc 54.688 (56.434)	lr 0.02485
Train [7][60/3239]	Time 0.157 (0.793)	Data Time 0.001 (0.384)	Loss 3.8649 (3.9247)	Entropy 1.88045 (1.88054)	Top-1 acc 30.859 (32.351)	Top-5 acc 62.500 (56.532)	lr 0.02485
Train [7][70/3239]	Time 0.208 (0.738)	Data Time 0.001 (0.331)	Loss 3.9021 (3.9300)	Entropy 1.88041 (1.88052)	Top-1 acc 30.859 (32.312)	Top-5 acc 57.812 (56.388)	lr 0.02484
Train [7][80/3239]	Time 0.204 (0.697)	Data Time 0.001 (0.290)	Loss 3.9631 (3.9296)	Entropy 1.88038 (1.88050)	Top-1 acc 36.328 (32.369)	Top-5 acc 57.812 (56.390)	lr 0.02484
Train [7][90/3239]	Time 0.216 (0.667)	Data Time 0.002 (0.258)	Loss 3.9295 (3.9211)	Entropy 1.88038 (1.88049)	Top-1 acc 34.375 (32.508)	Top-5 acc 57.422 (56.452)	lr 0.02484
Train [7][100/3239]	Time 0.216 (0.643)	Data Time 0.002 (0.233)	Loss 4.0499 (3.9168)	Entropy 1.88036 (1.88048)	Top-1 acc 31.250 (32.611)	Top-5 acc 53.906 (56.501)	lr 0.02484
Train [7][110/3239]	Time 0.286 (0.623)	Data Time 0.001 (0.212)	Loss 3.9333 (3.9176)	Entropy 1.88033 (1.88047)	Top-1 acc 35.938 (32.686)	Top-5 acc 58.594 (56.419)	lr 0.02484
Train [7][120/3239]	Time 2.279 (0.606)	Data Time 0.002 (0.195)	Loss 3.7381 (3.9150)	Entropy 1.88033 (1.88046)	Top-1 acc 36.328 (32.690)	Top-5 acc 59.375 (56.440)	lr 0.02484
Train [7][130/3239]	Time 0.159 (0.575)	Data Time 0.001 (0.180)	Loss 3.7919 (3.9120)	Entropy 1.88030 (1.88045)	Top-1 acc 34.766 (32.651)	Top-5 acc 57.422 (56.497)	lr 0.02484
Train [7][140/3239]	Time 0.156 (0.564)	Data Time 0.001 (0.167)	Loss 3.9634 (3.9122)	Entropy 1.88028 (1.88043)	Top-1 acc 33.203 (32.685)	Top-5 acc 59.375 (56.535)	lr 0.02484
Train [7][150/3239]	Time 0.226 (0.553)	Data Time 0.001 (0.157)	Loss 3.8101 (3.9087)	Entropy 1.88027 (1.88042)	Top-1 acc 33.594 (32.737)	Top-5 acc 56.641 (56.620)	lr 0.02484
Train [7][160/3239]	Time 0.132 (0.544)	Data Time 0.001 (0.147)	Loss 3.9349 (3.9103)	Entropy 1.88025 (1.88041)	Top-1 acc 30.859 (32.672)	Top-5 acc 56.250 (56.592)	lr 0.02484
Train [7][170/3239]	Time 0.142 (0.536)	Data Time 0.001 (0.139)	Loss 4.0227 (3.9095)	Entropy 1.88023 (1.88040)	Top-1 acc 28.906 (32.632)	Top-5 acc 52.344 (56.570)	lr 0.02484
Train [7][180/3239]	Time 0.191 (0.528)	Data Time 0.001 (0.132)	Loss 4.0302 (3.9100)	Entropy 1.88021 (1.88039)	Top-1 acc 28.125 (32.597)	Top-5 acc 55.859 (56.582)	lr 0.02484
Train [7][190/3239]	Time 0.170 (0.522)	Data Time 0.001 (0.125)	Loss 3.6224 (3.9066)	Entropy 1.88019 (1.88038)	Top-1 acc 37.891 (32.686)	Top-5 acc 60.156 (56.657)	lr 0.02484
Train [7][200/3239]	Time 0.215 (0.517)	Data Time 0.001 (0.119)	Loss 3.7703 (3.9048)	Entropy 1.88017 (1.88037)	Top-1 acc 35.547 (32.704)	Top-5 acc 60.938 (56.718)	lr 0.02484
Train [7][210/3239]	Time 0.288 (0.690)	Data Time 0.002 (0.113)	Loss 3.8163 (3.9049)	Entropy 1.88014 (1.88036)	Top-1 acc 32.812 (32.688)	Top-5 acc 57.812 (56.680)	lr 0.02484
Train [7][220/3239]	Time 0.202 (0.679)	Data Time 0.002 (0.108)	Loss 3.6349 (3.9041)	Entropy 1.88009 (1.88035)	Top-1 acc 39.062 (32.673)	Top-5 acc 62.891 (56.699)	lr 0.02484
Train [7][230/3239]	Time 2.157 (0.668)	Data Time 0.002 (0.104)	Loss 3.8995 (3.9034)	Entropy 1.88009 (1.88034)	Top-1 acc 34.375 (32.696)	Top-5 acc 55.078 (56.700)	lr 0.02484
Train [7][240/3239]	Time 0.159 (0.649)	Data Time 0.001 (0.099)	Loss 3.8134 (3.9032)	Entropy 1.88004 (1.88033)	Top-1 acc 32.812 (32.707)	Top-5 acc 61.719 (56.725)	lr 0.02484
Train [7][250/3239]	Time 0.253 (0.639)	Data Time 0.001 (0.096)	Loss 3.8801 (3.9016)	Entropy 1.88000 (1.88032)	Top-1 acc 33.594 (32.774)	Top-5 acc 55.078 (56.756)	lr 0.02484
Train [7][260/3239]	Time 0.203 (0.630)	Data Time 0.001 (0.092)	Loss 4.0468 (3.9015)	Entropy 1.87999 (1.88030)	Top-1 acc 30.469 (32.783)	Top-5 acc 51.953 (56.772)	lr 0.02484
Train [7][270/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.089)	Loss 3.8999 (3.9000)	Entropy 1.87995 (1.88029)	Top-1 acc 33.984 (32.828)	Top-5 acc 59.375 (56.811)	lr 0.02484
Train [7][280/3239]	Time 0.219 (0.615)	Data Time 0.002 (0.086)	Loss 3.8242 (3.9016)	Entropy 1.87994 (1.88028)	Top-1 acc 37.109 (32.822)	Top-5 acc 56.250 (56.739)	lr 0.02484
Train [7][290/3239]	Time 0.211 (0.608)	Data Time 0.001 (0.083)	Loss 3.8920 (3.9019)	Entropy 1.87992 (1.88027)	Top-1 acc 29.688 (32.817)	Top-5 acc 55.469 (56.740)	lr 0.02484
Train [7][300/3239]	Time 0.242 (0.601)	Data Time 0.001 (0.080)	Loss 3.8181 (3.9049)	Entropy 1.87988 (1.88025)	Top-1 acc 34.375 (32.797)	Top-5 acc 55.078 (56.651)	lr 0.02484
Train [7][310/3239]	Time 0.245 (0.594)	Data Time 0.001 (0.077)	Loss 3.9571 (3.9063)	Entropy 1.87985 (1.88024)	Top-1 acc 32.422 (32.750)	Top-5 acc 56.250 (56.589)	lr 0.02484
Train [7][320/3239]	Time 0.281 (0.589)	Data Time 0.001 (0.075)	Loss 3.7952 (3.9068)	Entropy 1.87981 (1.88023)	Top-1 acc 35.547 (32.772)	Top-5 acc 57.812 (56.605)	lr 0.02484
Train [7][330/3239]	Time 0.295 (0.584)	Data Time 0.001 (0.073)	Loss 3.9526 (3.9076)	Entropy 1.87981 (1.88022)	Top-1 acc 33.984 (32.769)	Top-5 acc 59.766 (56.599)	lr 0.02484
Train [7][340/3239]	Time 2.183 (0.579)	Data Time 0.001 (0.071)	Loss 3.9912 (3.9082)	Entropy 1.87981 (1.88020)	Top-1 acc 30.078 (32.758)	Top-5 acc 53.906 (56.549)	lr 0.02484
Train [7][350/3239]	Time 0.201 (0.569)	Data Time 0.001 (0.069)	Loss 3.9239 (3.9081)	Entropy 1.87979 (1.88019)	Top-1 acc 32.031 (32.778)	Top-5 acc 55.469 (56.566)	lr 0.02484
Train [7][360/3239]	Time 0.187 (0.565)	Data Time 0.001 (0.067)	Loss 3.7880 (3.9069)	Entropy 1.87979 (1.88018)	Top-1 acc 36.328 (32.791)	Top-5 acc 59.375 (56.615)	lr 0.02484
Train [7][370/3239]	Time 0.128 (0.560)	Data Time 0.001 (0.065)	Loss 3.8904 (3.9069)	Entropy 1.87973 (1.88017)	Top-1 acc 30.859 (32.795)	Top-5 acc 56.250 (56.590)	lr 0.02484
Train [7][380/3239]	Time 0.185 (0.557)	Data Time 0.001 (0.064)	Loss 3.7648 (3.9071)	Entropy 1.87970 (1.88016)	Top-1 acc 35.938 (32.779)	Top-5 acc 55.078 (56.567)	lr 0.02484
Train [7][390/3239]	Time 0.190 (0.553)	Data Time 0.001 (0.062)	Loss 3.6389 (3.9059)	Entropy 1.87968 (1.88015)	Top-1 acc 40.625 (32.788)	Top-5 acc 60.156 (56.575)	lr 0.02484
Train [7][400/3239]	Time 0.173 (0.549)	Data Time 0.001 (0.061)	Loss 3.9133 (3.9053)	Entropy 1.87963 (1.88013)	Top-1 acc 33.203 (32.807)	Top-5 acc 56.250 (56.598)	lr 0.02484
Train [7][410/3239]	Time 0.149 (0.546)	Data Time 0.001 (0.059)	Loss 3.7967 (3.9042)	Entropy 1.87956 (1.88012)	Top-1 acc 33.203 (32.843)	Top-5 acc 57.812 (56.638)	lr 0.02484
Train [7][420/3239]	Time 0.225 (0.543)	Data Time 0.001 (0.058)	Loss 3.7735 (3.9041)	Entropy 1.87950 (1.88011)	Top-1 acc 38.281 (32.844)	Top-5 acc 60.938 (56.640)	lr 0.02484
Train [7][430/3239]	Time 0.226 (0.540)	Data Time 0.001 (0.057)	Loss 4.0729 (3.9061)	Entropy 1.87944 (1.88009)	Top-1 acc 30.859 (32.801)	Top-5 acc 52.734 (56.600)	lr 0.02484
Train [7][440/3239]	Time 0.319 (0.537)	Data Time 0.001 (0.055)	Loss 3.8912 (3.9059)	Entropy 1.87939 (1.88008)	Top-1 acc 34.375 (32.812)	Top-5 acc 57.812 (56.596)	lr 0.02484
Train [7][450/3239]	Time 2.080 (0.534)	Data Time 0.001 (0.054)	Loss 3.9267 (3.9050)	Entropy 1.87939 (1.88006)	Top-1 acc 32.422 (32.829)	Top-5 acc 58.594 (56.645)	lr 0.02484
Train [7][460/3239]	Time 0.196 (0.527)	Data Time 0.001 (0.053)	Loss 3.9811 (3.9059)	Entropy 1.87938 (1.88005)	Top-1 acc 30.859 (32.807)	Top-5 acc 57.031 (56.626)	lr 0.02484
Train [7][470/3239]	Time 0.160 (0.524)	Data Time 0.001 (0.052)	Loss 3.8549 (3.9058)	Entropy 1.87937 (1.88003)	Top-1 acc 33.203 (32.823)	Top-5 acc 57.422 (56.636)	lr 0.02484
Train [7][480/3239]	Time 0.294 (0.522)	Data Time 0.002 (0.051)	Loss 3.7227 (3.9053)	Entropy 1.87936 (1.88002)	Top-1 acc 34.766 (32.825)	Top-5 acc 61.328 (56.653)	lr 0.02484
Train [7][490/3239]	Time 0.190 (0.520)	Data Time 0.001 (0.050)	Loss 4.0500 (3.9052)	Entropy 1.87933 (1.88001)	Top-1 acc 30.469 (32.835)	Top-5 acc 53.516 (56.668)	lr 0.02484
Train [7][500/3239]	Time 0.163 (0.518)	Data Time 0.001 (0.049)	Loss 3.7587 (3.9058)	Entropy 1.87929 (1.87999)	Top-1 acc 40.234 (32.841)	Top-5 acc 60.156 (56.655)	lr 0.02484
Train [7][510/3239]	Time 0.157 (0.516)	Data Time 0.001 (0.048)	Loss 3.9515 (3.9060)	Entropy 1.87926 (1.87998)	Top-1 acc 32.422 (32.833)	Top-5 acc 55.078 (56.635)	lr 0.02484
Train [7][520/3239]	Time 0.201 (0.515)	Data Time 0.002 (0.047)	Loss 3.9086 (3.9069)	Entropy 1.87918 (1.87996)	Top-1 acc 29.297 (32.817)	Top-5 acc 58.203 (56.608)	lr 0.02484
Train [7][530/3239]	Time 0.210 (0.513)	Data Time 0.001 (0.047)	Loss 3.8682 (3.9071)	Entropy 1.87917 (1.87995)	Top-1 acc 33.984 (32.818)	Top-5 acc 59.375 (56.610)	lr 0.02484
Train [7][540/3239]	Time 0.214 (0.511)	Data Time 0.001 (0.046)	Loss 3.7349 (3.9071)	Entropy 1.87914 (1.87993)	Top-1 acc 40.234 (32.807)	Top-5 acc 60.938 (56.604)	lr 0.02484
Train [7][550/3239]	Time 0.274 (0.510)	Data Time 0.002 (0.045)	Loss 4.1622 (3.9071)	Entropy 1.87909 (1.87992)	Top-1 acc 28.125 (32.818)	Top-5 acc 51.172 (56.596)	lr 0.02484
Train [7][560/3239]	Time 2.221 (0.508)	Data Time 0.001 (0.044)	Loss 4.0029 (3.9066)	Entropy 1.87909 (1.87990)	Top-1 acc 28.516 (32.824)	Top-5 acc 53.125 (56.609)	lr 0.02484
Train [7][570/3239]	Time 0.191 (0.503)	Data Time 0.001 (0.043)	Loss 3.7935 (3.9057)	Entropy 1.87909 (1.87989)	Top-1 acc 36.328 (32.859)	Top-5 acc 59.375 (56.630)	lr 0.02484
Train [7][580/3239]	Time 0.227 (0.564)	Data Time 0.002 (0.043)	Loss 3.8213 (3.9059)	Entropy 1.87902 (1.87988)	Top-1 acc 33.203 (32.870)	Top-5 acc 58.984 (56.632)	lr 0.02484
Train [7][590/3239]	Time 0.169 (0.562)	Data Time 0.001 (0.042)	Loss 3.7512 (3.9053)	Entropy 1.87895 (1.87986)	Top-1 acc 33.594 (32.873)	Top-5 acc 60.547 (56.643)	lr 0.02484
Train [7][600/3239]	Time 0.227 (0.560)	Data Time 0.002 (0.042)	Loss 3.8338 (3.9049)	Entropy 1.87890 (1.87984)	Top-1 acc 32.422 (32.884)	Top-5 acc 58.984 (56.648)	lr 0.02484
Train [7][610/3239]	Time 0.262 (0.557)	Data Time 0.001 (0.041)	Loss 3.8448 (3.9050)	Entropy 1.87887 (1.87983)	Top-1 acc 34.375 (32.880)	Top-5 acc 59.375 (56.660)	lr 0.02484
Train [7][620/3239]	Time 0.160 (0.555)	Data Time 0.001 (0.040)	Loss 3.7889 (3.9054)	Entropy 1.87885 (1.87981)	Top-1 acc 34.766 (32.867)	Top-5 acc 61.328 (56.663)	lr 0.02484
Train [7][630/3239]	Time 0.209 (0.553)	Data Time 0.001 (0.040)	Loss 3.6945 (3.9050)	Entropy 1.87883 (1.87980)	Top-1 acc 30.469 (32.852)	Top-5 acc 62.500 (56.671)	lr 0.02484
Train [7][640/3239]	Time 0.181 (0.551)	Data Time 0.001 (0.039)	Loss 3.8837 (3.9048)	Entropy 1.87884 (1.87978)	Top-1 acc 33.984 (32.831)	Top-5 acc 56.641 (56.675)	lr 0.02484
Train [7][650/3239]	Time 0.206 (0.549)	Data Time 0.001 (0.039)	Loss 3.9689 (3.9040)	Entropy 1.87881 (1.87977)	Top-1 acc 29.688 (32.832)	Top-5 acc 53.516 (56.675)	lr 0.02484
Train [7][660/3239]	Time 0.238 (0.547)	Data Time 0.001 (0.038)	Loss 3.8924 (3.9038)	Entropy 1.87879 (1.87975)	Top-1 acc 33.594 (32.831)	Top-5 acc 54.688 (56.670)	lr 0.02484
Train [7][670/3239]	Time 2.148 (0.545)	Data Time 0.001 (0.038)	Loss 3.8726 (3.9031)	Entropy 1.87879 (1.87974)	Top-1 acc 34.766 (32.843)	Top-5 acc 53.906 (56.674)	lr 0.02484
Train [7][680/3239]	Time 0.243 (0.540)	Data Time 0.001 (0.037)	Loss 3.7996 (3.9032)	Entropy 1.87873 (1.87972)	Top-1 acc 32.031 (32.834)	Top-5 acc 62.109 (56.682)	lr 0.02484
Train [7][690/3239]	Time 0.241 (0.538)	Data Time 0.001 (0.036)	Loss 3.9311 (3.9036)	Entropy 1.87868 (1.87971)	Top-1 acc 31.641 (32.833)	Top-5 acc 57.422 (56.667)	lr 0.02484
Train [7][700/3239]	Time 0.229 (0.536)	Data Time 0.001 (0.036)	Loss 3.9949 (3.9036)	Entropy 1.87865 (1.87969)	Top-1 acc 31.641 (32.825)	Top-5 acc 54.297 (56.667)	lr 0.02483
Train [7][710/3239]	Time 0.232 (0.534)	Data Time 0.002 (0.035)	Loss 4.0995 (3.9045)	Entropy 1.87862 (1.87968)	Top-1 acc 28.516 (32.814)	Top-5 acc 53.516 (56.646)	lr 0.02483
Train [7][720/3239]	Time 0.185 (0.532)	Data Time 0.001 (0.035)	Loss 4.0240 (3.9052)	Entropy 1.87858 (1.87966)	Top-1 acc 31.250 (32.799)	Top-5 acc 52.344 (56.637)	lr 0.02483
Train [7][730/3239]	Time 0.268 (0.531)	Data Time 0.001 (0.035)	Loss 3.8715 (3.9049)	Entropy 1.87856 (1.87965)	Top-1 acc 36.328 (32.805)	Top-5 acc 58.203 (56.643)	lr 0.02483
Train [7][740/3239]	Time 0.210 (0.529)	Data Time 0.001 (0.034)	Loss 3.8784 (3.9051)	Entropy 1.87854 (1.87963)	Top-1 acc 34.766 (32.823)	Top-5 acc 57.422 (56.624)	lr 0.02483
Train [7][750/3239]	Time 0.198 (0.528)	Data Time 0.001 (0.034)	Loss 4.0291 (3.9040)	Entropy 1.87853 (1.87962)	Top-1 acc 30.859 (32.843)	Top-5 acc 50.781 (56.642)	lr 0.02483
Train [7][760/3239]	Time 0.148 (0.526)	Data Time 0.002 (0.033)	Loss 3.8436 (3.9039)	Entropy 1.87845 (1.87961)	Top-1 acc 32.031 (32.840)	Top-5 acc 56.250 (56.647)	lr 0.02483
Train [7][770/3239]	Time 0.187 (0.525)	Data Time 0.001 (0.033)	Loss 3.7827 (3.9036)	Entropy 1.87842 (1.87959)	Top-1 acc 36.328 (32.829)	Top-5 acc 60.156 (56.649)	lr 0.02483
Train [7][780/3239]	Time 2.231 (0.523)	Data Time 0.002 (0.033)	Loss 3.9133 (3.9026)	Entropy 1.87842 (1.87958)	Top-1 acc 33.594 (32.839)	Top-5 acc 54.688 (56.680)	lr 0.02483
Train [7][790/3239]	Time 0.193 (0.519)	Data Time 0.001 (0.032)	Loss 3.8143 (3.9021)	Entropy 1.87841 (1.87956)	Top-1 acc 36.719 (32.853)	Top-5 acc 63.281 (56.691)	lr 0.02483
Train [7][800/3239]	Time 0.182 (0.518)	Data Time 0.001 (0.032)	Loss 3.8781 (3.9018)	Entropy 1.87837 (1.87955)	Top-1 acc 34.766 (32.866)	Top-5 acc 59.375 (56.699)	lr 0.02483
Train [7][810/3239]	Time 0.206 (0.517)	Data Time 0.001 (0.031)	Loss 3.6874 (3.9014)	Entropy 1.87835 (1.87953)	Top-1 acc 37.891 (32.876)	Top-5 acc 60.938 (56.705)	lr 0.02483
Train [7][820/3239]	Time 0.185 (0.515)	Data Time 0.001 (0.031)	Loss 3.8231 (3.9016)	Entropy 1.87834 (1.87952)	Top-1 acc 34.766 (32.888)	Top-5 acc 57.812 (56.701)	lr 0.02483
Train [7][830/3239]	Time 0.206 (0.514)	Data Time 0.001 (0.031)	Loss 3.9407 (3.9017)	Entropy 1.87831 (1.87950)	Top-1 acc 31.641 (32.887)	Top-5 acc 53.125 (56.696)	lr 0.02483
Train [7][840/3239]	Time 0.301 (0.513)	Data Time 0.001 (0.030)	Loss 3.9173 (3.9026)	Entropy 1.87831 (1.87949)	Top-1 acc 30.859 (32.861)	Top-5 acc 54.297 (56.682)	lr 0.02483
Train [7][850/3239]	Time 0.192 (0.512)	Data Time 0.001 (0.030)	Loss 3.8215 (3.9021)	Entropy 1.87828 (1.87947)	Top-1 acc 38.281 (32.868)	Top-5 acc 58.594 (56.698)	lr 0.02483
Train [7][860/3239]	Time 0.198 (0.511)	Data Time 0.002 (0.030)	Loss 3.9175 (3.9023)	Entropy 1.87826 (1.87946)	Top-1 acc 36.328 (32.875)	Top-5 acc 59.375 (56.695)	lr 0.02483
Train [7][870/3239]	Time 0.185 (0.510)	Data Time 0.001 (0.029)	Loss 4.0270 (3.9021)	Entropy 1.87824 (1.87945)	Top-1 acc 30.859 (32.883)	Top-5 acc 55.078 (56.701)	lr 0.02483
Train [7][880/3239]	Time 0.199 (0.509)	Data Time 0.001 (0.029)	Loss 3.7437 (3.9029)	Entropy 1.87820 (1.87943)	Top-1 acc 35.938 (32.865)	Top-5 acc 62.891 (56.692)	lr 0.02483
Train [7][890/3239]	Time 2.271 (0.508)	Data Time 0.002 (0.029)	Loss 4.0911 (3.9032)	Entropy 1.87820 (1.87942)	Top-1 acc 33.203 (32.864)	Top-5 acc 52.344 (56.682)	lr 0.02483
Train [7][900/3239]	Time 0.184 (0.505)	Data Time 0.002 (0.029)	Loss 4.1378 (3.9033)	Entropy 1.87819 (1.87940)	Top-1 acc 26.953 (32.858)	Top-5 acc 50.391 (56.679)	lr 0.02483
Train [7][910/3239]	Time 0.205 (0.504)	Data Time 0.001 (0.028)	Loss 3.8822 (3.9036)	Entropy 1.87817 (1.87939)	Top-1 acc 35.547 (32.857)	Top-5 acc 56.250 (56.674)	lr 0.02483
Train [7][920/3239]	Time 0.155 (0.503)	Data Time 0.001 (0.028)	Loss 3.8346 (3.9029)	Entropy 1.87810 (1.87938)	Top-1 acc 34.375 (32.868)	Top-5 acc 59.766 (56.685)	lr 0.02483
Train [7][930/3239]	Time 0.232 (0.502)	Data Time 0.001 (0.028)	Loss 3.9127 (3.9026)	Entropy 1.87808 (1.87936)	Top-1 acc 32.812 (32.878)	Top-5 acc 55.078 (56.689)	lr 0.02483
Train [7][940/3239]	Time 0.283 (0.537)	Data Time 0.003 (0.028)	Loss 3.8864 (3.9030)	Entropy 1.87803 (1.87935)	Top-1 acc 30.469 (32.878)	Top-5 acc 60.547 (56.687)	lr 0.02483
Train [7][950/3239]	Time 0.206 (0.536)	Data Time 0.002 (0.027)	Loss 3.9273 (3.9030)	Entropy 1.87801 (1.87934)	Top-1 acc 28.125 (32.870)	Top-5 acc 57.422 (56.692)	lr 0.02483
Train [7][960/3239]	Time 0.183 (0.535)	Data Time 0.002 (0.027)	Loss 3.9645 (3.9030)	Entropy 1.87796 (1.87932)	Top-1 acc 32.031 (32.870)	Top-5 acc 53.516 (56.686)	lr 0.02483
Train [7][970/3239]	Time 0.196 (0.534)	Data Time 0.002 (0.027)	Loss 3.8424 (3.9029)	Entropy 1.87792 (1.87931)	Top-1 acc 35.156 (32.878)	Top-5 acc 55.078 (56.686)	lr 0.02483
Train [7][980/3239]	Time 0.184 (0.533)	Data Time 0.001 (0.027)	Loss 4.0115 (3.9027)	Entropy 1.87788 (1.87929)	Top-1 acc 28.906 (32.887)	Top-5 acc 53.906 (56.691)	lr 0.02483
Train [7][990/3239]	Time 0.207 (0.532)	Data Time 0.002 (0.026)	Loss 3.8703 (3.9023)	Entropy 1.87784 (1.87928)	Top-1 acc 35.547 (32.894)	Top-5 acc 58.203 (56.697)	lr 0.02483
Train [7][1000/3239]	Time 2.232 (0.531)	Data Time 0.001 (0.026)	Loss 3.9021 (3.9023)	Entropy 1.87784 (1.87926)	Top-1 acc 31.250 (32.889)	Top-5 acc 52.734 (56.682)	lr 0.02483
Train [7][1010/3239]	Time 0.208 (0.527)	Data Time 0.002 (0.026)	Loss 3.7891 (3.9018)	Entropy 1.87781 (1.87925)	Top-1 acc 36.719 (32.897)	Top-5 acc 59.375 (56.687)	lr 0.02483
Train [7][1020/3239]	Time 0.291 (0.527)	Data Time 0.001 (0.026)	Loss 3.9549 (3.9020)	Entropy 1.87781 (1.87924)	Top-1 acc 35.938 (32.900)	Top-5 acc 56.250 (56.681)	lr 0.02483
Train [7][1030/3239]	Time 0.286 (0.526)	Data Time 0.001 (0.025)	Loss 4.0110 (3.9019)	Entropy 1.87776 (1.87922)	Top-1 acc 31.250 (32.899)	Top-5 acc 53.516 (56.676)	lr 0.02483
Train [7][1040/3239]	Time 0.210 (0.524)	Data Time 0.001 (0.025)	Loss 3.6773 (3.9018)	Entropy 1.87776 (1.87921)	Top-1 acc 35.547 (32.900)	Top-5 acc 62.500 (56.679)	lr 0.02483
Train [7][1050/3239]	Time 0.194 (0.523)	Data Time 0.001 (0.025)	Loss 4.0054 (3.9027)	Entropy 1.87773 (1.87919)	Top-1 acc 31.250 (32.886)	Top-5 acc 53.906 (56.665)	lr 0.02483
Train [7][1060/3239]	Time 0.144 (0.522)	Data Time 0.001 (0.025)	Loss 4.0179 (3.9022)	Entropy 1.87769 (1.87918)	Top-1 acc 31.641 (32.887)	Top-5 acc 53.125 (56.675)	lr 0.02483
Train [7][1070/3239]	Time 0.230 (0.521)	Data Time 0.001 (0.025)	Loss 3.5781 (3.9024)	Entropy 1.87768 (1.87917)	Top-1 acc 37.109 (32.872)	Top-5 acc 62.109 (56.669)	lr 0.02483
Train [7][1080/3239]	Time 0.177 (0.520)	Data Time 0.001 (0.024)	Loss 3.9538 (3.9024)	Entropy 1.87765 (1.87915)	Top-1 acc 27.734 (32.865)	Top-5 acc 51.953 (56.660)	lr 0.02483
Train [7][1090/3239]	Time 0.196 (0.519)	Data Time 0.001 (0.024)	Loss 4.0799 (3.9022)	Entropy 1.87764 (1.87914)	Top-1 acc 28.516 (32.859)	Top-5 acc 53.125 (56.657)	lr 0.02483
Train [7][1100/3239]	Time 0.208 (0.518)	Data Time 0.001 (0.024)	Loss 3.8651 (3.9023)	Entropy 1.87763 (1.87912)	Top-1 acc 35.938 (32.862)	Top-5 acc 55.469 (56.658)	lr 0.02483
Train [7][1110/3239]	Time 2.168 (0.517)	Data Time 0.001 (0.024)	Loss 3.7598 (3.9019)	Entropy 1.87763 (1.87911)	Top-1 acc 36.719 (32.869)	Top-5 acc 60.938 (56.673)	lr 0.02483
Train [7][1120/3239]	Time 0.291 (0.514)	Data Time 0.001 (0.024)	Loss 3.8840 (3.9024)	Entropy 1.87760 (1.87910)	Top-1 acc 32.422 (32.859)	Top-5 acc 58.594 (56.665)	lr 0.02483
Train [7][1130/3239]	Time 0.209 (0.514)	Data Time 0.001 (0.023)	Loss 3.7070 (3.9022)	Entropy 1.87754 (1.87908)	Top-1 acc 38.281 (32.865)	Top-5 acc 60.547 (56.666)	lr 0.02483
Train [7][1140/3239]	Time 0.219 (0.513)	Data Time 0.002 (0.023)	Loss 3.9893 (3.9024)	Entropy 1.87752 (1.87907)	Top-1 acc 32.422 (32.863)	Top-5 acc 54.297 (56.663)	lr 0.02483
Train [7][1150/3239]	Time 0.220 (0.512)	Data Time 0.001 (0.023)	Loss 3.8716 (3.9022)	Entropy 1.87750 (1.87906)	Top-1 acc 34.766 (32.867)	Top-5 acc 54.297 (56.667)	lr 0.02483
Train [7][1160/3239]	Time 0.235 (0.511)	Data Time 0.001 (0.023)	Loss 3.9671 (3.9029)	Entropy 1.87747 (1.87904)	Top-1 acc 27.344 (32.855)	Top-5 acc 50.391 (56.648)	lr 0.02483
Train [7][1170/3239]	Time 0.152 (0.510)	Data Time 0.001 (0.023)	Loss 4.2033 (3.9031)	Entropy 1.87745 (1.87903)	Top-1 acc 29.688 (32.848)	Top-5 acc 49.609 (56.638)	lr 0.02483
Train [7][1180/3239]	Time 0.203 (0.509)	Data Time 0.001 (0.023)	Loss 3.7585 (3.9030)	Entropy 1.87741 (1.87902)	Top-1 acc 37.500 (32.846)	Top-5 acc 61.328 (56.641)	lr 0.02483
Train [7][1190/3239]	Time 0.250 (0.508)	Data Time 0.001 (0.022)	Loss 3.8144 (3.9026)	Entropy 1.87734 (1.87900)	Top-1 acc 32.812 (32.844)	Top-5 acc 58.594 (56.648)	lr 0.02483
Train [7][1200/3239]	Time 0.238 (0.508)	Data Time 0.001 (0.022)	Loss 3.9278 (3.9031)	Entropy 1.87731 (1.87899)	Top-1 acc 29.297 (32.829)	Top-5 acc 57.031 (56.633)	lr 0.02483
Train [7][1210/3239]	Time 0.194 (0.507)	Data Time 0.001 (0.022)	Loss 3.7479 (3.9030)	Entropy 1.87725 (1.87897)	Top-1 acc 34.766 (32.832)	Top-5 acc 60.938 (56.637)	lr 0.02483
Train [7][1220/3239]	Time 2.201 (0.506)	Data Time 0.002 (0.022)	Loss 3.9955 (3.9030)	Entropy 1.87725 (1.87896)	Top-1 acc 31.250 (32.828)	Top-5 acc 56.250 (56.636)	lr 0.02483
Train [7][1230/3239]	Time 0.185 (0.504)	Data Time 0.001 (0.022)	Loss 3.8080 (3.9026)	Entropy 1.87723 (1.87895)	Top-1 acc 32.422 (32.838)	Top-5 acc 59.375 (56.645)	lr 0.02483
Train [7][1240/3239]	Time 0.188 (0.503)	Data Time 0.001 (0.022)	Loss 3.6895 (3.9029)	Entropy 1.87717 (1.87893)	Top-1 acc 34.375 (32.829)	Top-5 acc 61.328 (56.634)	lr 0.02483
Train [7][1250/3239]	Time 0.169 (0.502)	Data Time 0.001 (0.021)	Loss 3.9880 (3.9031)	Entropy 1.87715 (1.87892)	Top-1 acc 26.953 (32.832)	Top-5 acc 53.125 (56.636)	lr 0.02483
Train [7][1260/3239]	Time 0.157 (0.502)	Data Time 0.001 (0.021)	Loss 3.9716 (3.9031)	Entropy 1.87711 (1.87890)	Top-1 acc 26.953 (32.824)	Top-5 acc 55.078 (56.634)	lr 0.02483
Train [7][1270/3239]	Time 0.206 (0.501)	Data Time 0.001 (0.021)	Loss 3.8987 (3.9030)	Entropy 1.87710 (1.87889)	Top-1 acc 34.375 (32.820)	Top-5 acc 55.859 (56.628)	lr 0.02483
Train [7][1280/3239]	Time 0.254 (0.501)	Data Time 0.001 (0.021)	Loss 3.8289 (3.9030)	Entropy 1.87708 (1.87888)	Top-1 acc 29.688 (32.817)	Top-5 acc 58.594 (56.628)	lr 0.02483
Train [7][1290/3239]	Time 0.177 (0.500)	Data Time 0.001 (0.021)	Loss 4.0296 (3.9032)	Entropy 1.87707 (1.87886)	Top-1 acc 30.469 (32.807)	Top-5 acc 51.562 (56.625)	lr 0.02483
Train [7][1300/3239]	Time 0.206 (0.528)	Data Time 0.002 (0.021)	Loss 4.0201 (3.9039)	Entropy 1.87707 (1.87885)	Top-1 acc 28.125 (32.787)	Top-5 acc 53.125 (56.604)	lr 0.02482
Train [7][1310/3239]	Time 0.259 (0.528)	Data Time 0.003 (0.021)	Loss 3.8981 (3.9038)	Entropy 1.87706 (1.87883)	Top-1 acc 29.688 (32.782)	Top-5 acc 55.078 (56.609)	lr 0.02482
Train [7][1320/3239]	Time 0.272 (0.527)	Data Time 0.002 (0.020)	Loss 3.9691 (3.9033)	Entropy 1.87706 (1.87882)	Top-1 acc 31.641 (32.794)	Top-5 acc 55.859 (56.613)	lr 0.02482
Train [7][1330/3239]	Time 2.169 (0.526)	Data Time 0.001 (0.020)	Loss 3.9115 (3.9036)	Entropy 1.87706 (1.87881)	Top-1 acc 32.422 (32.789)	Top-5 acc 57.031 (56.607)	lr 0.02482
Train [7][1340/3239]	Time 0.251 (0.523)	Data Time 0.002 (0.020)	Loss 3.6631 (3.9034)	Entropy 1.87702 (1.87879)	Top-1 acc 35.938 (32.797)	Top-5 acc 60.547 (56.604)	lr 0.02482
Train [7][1350/3239]	Time 0.196 (0.523)	Data Time 0.001 (0.020)	Loss 3.9564 (3.9027)	Entropy 1.87699 (1.87878)	Top-1 acc 31.641 (32.810)	Top-5 acc 55.469 (56.614)	lr 0.02482
Train [7][1360/3239]	Time 0.141 (0.522)	Data Time 0.001 (0.020)	Loss 3.9265 (3.9030)	Entropy 1.87696 (1.87877)	Top-1 acc 32.422 (32.809)	Top-5 acc 58.203 (56.614)	lr 0.02482
Train [7][1370/3239]	Time 0.197 (0.521)	Data Time 0.001 (0.020)	Loss 3.7999 (3.9028)	Entropy 1.87690 (1.87875)	Top-1 acc 37.500 (32.813)	Top-5 acc 58.594 (56.620)	lr 0.02482
Train [7][1380/3239]	Time 0.231 (0.520)	Data Time 0.001 (0.020)	Loss 3.7158 (3.9027)	Entropy 1.87685 (1.87874)	Top-1 acc 35.156 (32.815)	Top-5 acc 60.547 (56.628)	lr 0.02482
Train [7][1390/3239]	Time 0.157 (0.519)	Data Time 0.001 (0.020)	Loss 4.2442 (3.9034)	Entropy 1.87681 (1.87873)	Top-1 acc 31.250 (32.808)	Top-5 acc 49.219 (56.606)	lr 0.02482
Train [7][1400/3239]	Time 0.218 (0.519)	Data Time 0.001 (0.019)	Loss 3.8265 (3.9030)	Entropy 1.87679 (1.87871)	Top-1 acc 35.938 (32.814)	Top-5 acc 56.641 (56.615)	lr 0.02482
Train [7][1410/3239]	Time 0.203 (0.518)	Data Time 0.001 (0.019)	Loss 3.9136 (3.9032)	Entropy 1.87676 (1.87870)	Top-1 acc 32.422 (32.811)	Top-5 acc 56.641 (56.605)	lr 0.02482
Train [7][1420/3239]	Time 0.262 (0.517)	Data Time 0.001 (0.019)	Loss 3.9405 (3.9033)	Entropy 1.87670 (1.87869)	Top-1 acc 35.938 (32.812)	Top-5 acc 57.031 (56.604)	lr 0.02482
Train [7][1430/3239]	Time 0.378 (0.517)	Data Time 0.001 (0.019)	Loss 3.9256 (3.9037)	Entropy 1.87667 (1.87867)	Top-1 acc 30.859 (32.813)	Top-5 acc 54.688 (56.596)	lr 0.02482
Train [7][1440/3239]	Time 2.143 (0.516)	Data Time 0.001 (0.019)	Loss 3.9641 (3.9034)	Entropy 1.87667 (1.87866)	Top-1 acc 31.641 (32.816)	Top-5 acc 50.781 (56.594)	lr 0.02482
Train [7][1450/3239]	Time 0.215 (0.514)	Data Time 0.001 (0.019)	Loss 3.8940 (3.9033)	Entropy 1.87659 (1.87864)	Top-1 acc 34.766 (32.812)	Top-5 acc 57.031 (56.601)	lr 0.02482
Train [7][1460/3239]	Time 0.264 (0.513)	Data Time 0.001 (0.019)	Loss 3.8710 (3.9038)	Entropy 1.87657 (1.87863)	Top-1 acc 33.594 (32.807)	Top-5 acc 58.984 (56.594)	lr 0.02482
Train [7][1470/3239]	Time 0.211 (0.512)	Data Time 0.001 (0.019)	Loss 3.6526 (3.9033)	Entropy 1.87650 (1.87861)	Top-1 acc 36.328 (32.819)	Top-5 acc 62.109 (56.604)	lr 0.02482
Train [7][1480/3239]	Time 0.251 (0.512)	Data Time 0.001 (0.018)	Loss 3.8510 (3.9033)	Entropy 1.87648 (1.87860)	Top-1 acc 30.859 (32.806)	Top-5 acc 59.766 (56.607)	lr 0.02482
Train [7][1490/3239]	Time 0.237 (0.511)	Data Time 0.002 (0.018)	Loss 4.0174 (3.9030)	Entropy 1.87644 (1.87859)	Top-1 acc 32.031 (32.815)	Top-5 acc 54.297 (56.617)	lr 0.02482
Train [7][1500/3239]	Time 0.200 (0.510)	Data Time 0.001 (0.018)	Loss 3.7630 (3.9030)	Entropy 1.87643 (1.87857)	Top-1 acc 36.719 (32.812)	Top-5 acc 60.547 (56.616)	lr 0.02482
Train [7][1510/3239]	Time 0.238 (0.510)	Data Time 0.002 (0.018)	Loss 3.9637 (3.9026)	Entropy 1.87640 (1.87856)	Top-1 acc 32.812 (32.822)	Top-5 acc 52.344 (56.620)	lr 0.02482
Train [7][1520/3239]	Time 0.232 (0.509)	Data Time 0.001 (0.018)	Loss 4.1037 (3.9028)	Entropy 1.87639 (1.87854)	Top-1 acc 28.125 (32.815)	Top-5 acc 49.219 (56.613)	lr 0.02482
Train [7][1530/3239]	Time 0.236 (0.509)	Data Time 0.001 (0.018)	Loss 3.6651 (3.9025)	Entropy 1.87639 (1.87853)	Top-1 acc 36.719 (32.820)	Top-5 acc 62.500 (56.617)	lr 0.02482
Train [7][1540/3239]	Time 0.187 (0.508)	Data Time 0.001 (0.018)	Loss 3.9758 (3.9024)	Entropy 1.87635 (1.87852)	Top-1 acc 33.984 (32.818)	Top-5 acc 56.250 (56.618)	lr 0.02482
Train [7][1550/3239]	Time 2.181 (0.507)	Data Time 0.001 (0.018)	Loss 4.0216 (3.9027)	Entropy 1.87635 (1.87850)	Top-1 acc 31.641 (32.811)	Top-5 acc 54.688 (56.616)	lr 0.02482
Train [7][1560/3239]	Time 0.190 (0.506)	Data Time 0.001 (0.018)	Loss 3.6698 (3.9024)	Entropy 1.87628 (1.87849)	Top-1 acc 32.031 (32.811)	Top-5 acc 63.672 (56.628)	lr 0.02482
Train [7][1570/3239]	Time 0.207 (0.505)	Data Time 0.001 (0.018)	Loss 3.9341 (3.9024)	Entropy 1.87624 (1.87847)	Top-1 acc 33.203 (32.807)	Top-5 acc 58.203 (56.626)	lr 0.02482
Train [7][1580/3239]	Time 0.194 (0.504)	Data Time 0.001 (0.017)	Loss 3.9371 (3.9023)	Entropy 1.87623 (1.87846)	Top-1 acc 33.594 (32.801)	Top-5 acc 55.859 (56.625)	lr 0.02482
Train [7][1590/3239]	Time 0.253 (0.504)	Data Time 0.001 (0.017)	Loss 3.5558 (3.9020)	Entropy 1.87622 (1.87844)	Top-1 acc 38.672 (32.804)	Top-5 acc 67.578 (56.636)	lr 0.02482
Train [7][1600/3239]	Time 0.181 (0.503)	Data Time 0.001 (0.017)	Loss 4.0768 (3.9019)	Entropy 1.87617 (1.87843)	Top-1 acc 32.031 (32.807)	Top-5 acc 52.344 (56.640)	lr 0.02482
Train [7][1610/3239]	Time 0.205 (0.503)	Data Time 0.001 (0.017)	Loss 3.8940 (3.9019)	Entropy 1.87611 (1.87842)	Top-1 acc 37.109 (32.808)	Top-5 acc 57.031 (56.639)	lr 0.02482
Train [7][1620/3239]	Time 0.212 (0.502)	Data Time 0.001 (0.017)	Loss 4.0570 (3.9023)	Entropy 1.87605 (1.87840)	Top-1 acc 32.812 (32.807)	Top-5 acc 54.688 (56.632)	lr 0.02482
Train [7][1630/3239]	Time 0.183 (0.502)	Data Time 0.001 (0.017)	Loss 4.0220 (3.9023)	Entropy 1.87603 (1.87839)	Top-1 acc 27.734 (32.810)	Top-5 acc 54.688 (56.633)	lr 0.02482
Train [7][1640/3239]	Time 0.306 (0.501)	Data Time 0.001 (0.017)	Loss 3.7024 (3.9021)	Entropy 1.87594 (1.87837)	Top-1 acc 39.844 (32.819)	Top-5 acc 64.453 (56.640)	lr 0.02482
Train [7][1650/3239]	Time 0.225 (0.501)	Data Time 0.001 (0.017)	Loss 3.8176 (3.9021)	Entropy 1.87589 (1.87836)	Top-1 acc 38.281 (32.821)	Top-5 acc 59.375 (56.632)	lr 0.02482
Train [7][1660/3239]	Time 38.935 (0.522)	Data Time 0.001 (0.017)	Loss 3.8518 (3.9019)	Entropy 1.87589 (1.87834)	Top-1 acc 32.812 (32.823)	Top-5 acc 57.812 (56.641)	lr 0.02482
Train [7][1670/3239]	Time 0.217 (0.520)	Data Time 0.002 (0.017)	Loss 3.7133 (3.9016)	Entropy 1.87585 (1.87833)	Top-1 acc 37.500 (32.832)	Top-5 acc 60.547 (56.643)	lr 0.02482
Train [7][1680/3239]	Time 0.148 (0.520)	Data Time 0.001 (0.017)	Loss 3.8993 (3.9014)	Entropy 1.87576 (1.87831)	Top-1 acc 32.812 (32.834)	Top-5 acc 58.594 (56.651)	lr 0.02482
Train [7][1690/3239]	Time 0.229 (0.519)	Data Time 0.001 (0.016)	Loss 3.9126 (3.9013)	Entropy 1.87572 (1.87830)	Top-1 acc 33.594 (32.840)	Top-5 acc 58.594 (56.654)	lr 0.02482
Train [7][1700/3239]	Time 0.225 (0.518)	Data Time 0.002 (0.016)	Loss 3.9058 (3.9009)	Entropy 1.87570 (1.87828)	Top-1 acc 35.156 (32.851)	Top-5 acc 57.812 (56.666)	lr 0.02482
Train [7][1710/3239]	Time 0.202 (0.518)	Data Time 0.001 (0.016)	Loss 3.9438 (3.9009)	Entropy 1.87566 (1.87827)	Top-1 acc 32.812 (32.849)	Top-5 acc 57.031 (56.663)	lr 0.02482
Train [7][1720/3239]	Time 0.212 (0.517)	Data Time 0.001 (0.016)	Loss 3.8304 (3.9007)	Entropy 1.87563 (1.87825)	Top-1 acc 35.547 (32.852)	Top-5 acc 59.766 (56.671)	lr 0.02482
Train [7][1730/3239]	Time 0.326 (0.517)	Data Time 0.002 (0.016)	Loss 3.9549 (3.9008)	Entropy 1.87559 (1.87824)	Top-1 acc 29.688 (32.852)	Top-5 acc 57.422 (56.671)	lr 0.02482
Train [7][1740/3239]	Time 0.188 (0.516)	Data Time 0.002 (0.016)	Loss 4.0152 (3.9008)	Entropy 1.87559 (1.87822)	Top-1 acc 31.250 (32.858)	Top-5 acc 54.297 (56.673)	lr 0.02482
Train [7][1750/3239]	Time 0.195 (0.516)	Data Time 0.002 (0.016)	Loss 3.9240 (3.9008)	Entropy 1.87557 (1.87821)	Top-1 acc 34.766 (32.853)	Top-5 acc 57.812 (56.670)	lr 0.02482
Train [7][1760/3239]	Time 0.204 (0.515)	Data Time 0.001 (0.016)	Loss 3.6488 (3.9009)	Entropy 1.87552 (1.87819)	Top-1 acc 34.766 (32.845)	Top-5 acc 62.109 (56.670)	lr 0.02482
Train [7][1770/3239]	Time 2.219 (0.514)	Data Time 0.001 (0.016)	Loss 3.6663 (3.9009)	Entropy 1.87552 (1.87818)	Top-1 acc 38.281 (32.849)	Top-5 acc 58.594 (56.672)	lr 0.02482
Train [7][1780/3239]	Time 0.203 (0.513)	Data Time 0.001 (0.016)	Loss 3.9423 (3.9004)	Entropy 1.87547 (1.87816)	Top-1 acc 35.547 (32.861)	Top-5 acc 58.984 (56.689)	lr 0.02482
Train [7][1790/3239]	Time 0.216 (0.512)	Data Time 0.001 (0.016)	Loss 3.7405 (3.9002)	Entropy 1.87539 (1.87815)	Top-1 acc 34.766 (32.868)	Top-5 acc 54.688 (56.692)	lr 0.02482
Train [7][1800/3239]	Time 0.204 (0.512)	Data Time 0.001 (0.016)	Loss 4.0176 (3.9002)	Entropy 1.87533 (1.87813)	Top-1 acc 31.250 (32.872)	Top-5 acc 53.516 (56.690)	lr 0.02482
Train [7][1810/3239]	Time 0.333 (0.511)	Data Time 0.001 (0.016)	Loss 3.6530 (3.8999)	Entropy 1.87531 (1.87812)	Top-1 acc 38.281 (32.867)	Top-5 acc 62.109 (56.698)	lr 0.02482
Train [7][1820/3239]	Time 0.296 (0.511)	Data Time 0.001 (0.015)	Loss 3.9653 (3.8998)	Entropy 1.87531 (1.87810)	Top-1 acc 34.375 (32.869)	Top-5 acc 59.375 (56.706)	lr 0.02482
Train [7][1830/3239]	Time 0.207 (0.510)	Data Time 0.001 (0.015)	Loss 3.7784 (3.8996)	Entropy 1.87524 (1.87808)	Top-1 acc 39.062 (32.881)	Top-5 acc 57.812 (56.713)	lr 0.02482
Train [7][1840/3239]	Time 0.182 (0.510)	Data Time 0.001 (0.015)	Loss 3.7334 (3.8992)	Entropy 1.87524 (1.87807)	Top-1 acc 35.547 (32.881)	Top-5 acc 56.641 (56.715)	lr 0.02482
Train [7][1850/3239]	Time 0.206 (0.509)	Data Time 0.001 (0.015)	Loss 3.8034 (3.8990)	Entropy 1.87521 (1.87805)	Top-1 acc 35.156 (32.885)	Top-5 acc 59.766 (56.723)	lr 0.02482
Train [7][1860/3239]	Time 0.156 (0.508)	Data Time 0.001 (0.015)	Loss 3.7660 (3.8990)	Entropy 1.87521 (1.87804)	Top-1 acc 35.547 (32.886)	Top-5 acc 60.547 (56.725)	lr 0.02482
Train [7][1870/3239]	Time 0.231 (0.508)	Data Time 0.001 (0.015)	Loss 3.9802 (3.8993)	Entropy 1.87520 (1.87802)	Top-1 acc 33.984 (32.887)	Top-5 acc 53.125 (56.727)	lr 0.02482
Train [7][1880/3239]	Time 2.299 (0.508)	Data Time 0.001 (0.015)	Loss 3.8409 (3.8985)	Entropy 1.87520 (1.87801)	Top-1 acc 33.984 (32.907)	Top-5 acc 59.375 (56.739)	lr 0.02481
Train [7][1890/3239]	Time 0.158 (0.506)	Data Time 0.001 (0.015)	Loss 3.7917 (3.8982)	Entropy 1.87510 (1.87799)	Top-1 acc 37.500 (32.914)	Top-5 acc 58.594 (56.744)	lr 0.02481
Train [7][1900/3239]	Time 0.186 (0.506)	Data Time 0.001 (0.015)	Loss 3.9354 (3.8986)	Entropy 1.87507 (1.87798)	Top-1 acc 32.031 (32.909)	Top-5 acc 56.641 (56.736)	lr 0.02481
Train [7][1910/3239]	Time 0.235 (0.505)	Data Time 0.001 (0.015)	Loss 3.9351 (3.8980)	Entropy 1.87501 (1.87796)	Top-1 acc 32.031 (32.919)	Top-5 acc 57.812 (56.751)	lr 0.02481
Train [7][1920/3239]	Time 0.334 (0.505)	Data Time 0.001 (0.015)	Loss 3.8011 (3.8978)	Entropy 1.87497 (1.87795)	Top-1 acc 34.766 (32.919)	Top-5 acc 61.328 (56.754)	lr 0.02481
Train [7][1930/3239]	Time 0.198 (0.504)	Data Time 0.001 (0.015)	Loss 3.9278 (3.8980)	Entropy 1.87498 (1.87793)	Top-1 acc 32.422 (32.914)	Top-5 acc 51.953 (56.745)	lr 0.02481
Train [7][1940/3239]	Time 0.207 (0.504)	Data Time 0.001 (0.015)	Loss 3.6558 (3.8974)	Entropy 1.87495 (1.87792)	Top-1 acc 36.328 (32.931)	Top-5 acc 62.500 (56.758)	lr 0.02481
Train [7][1950/3239]	Time 0.203 (0.503)	Data Time 0.001 (0.015)	Loss 3.8630 (3.8973)	Entropy 1.87495 (1.87790)	Top-1 acc 34.766 (32.928)	Top-5 acc 57.422 (56.760)	lr 0.02481
Train [7][1960/3239]	Time 0.192 (0.503)	Data Time 0.001 (0.015)	Loss 3.8960 (3.8976)	Entropy 1.87492 (1.87789)	Top-1 acc 35.156 (32.923)	Top-5 acc 57.422 (56.756)	lr 0.02481
Train [7][1970/3239]	Time 0.140 (0.502)	Data Time 0.001 (0.014)	Loss 4.0371 (3.8977)	Entropy 1.87485 (1.87787)	Top-1 acc 37.109 (32.927)	Top-5 acc 55.859 (56.754)	lr 0.02481
Train [7][1980/3239]	Time 0.254 (0.502)	Data Time 0.001 (0.014)	Loss 3.6801 (3.8973)	Entropy 1.87481 (1.87785)	Top-1 acc 39.844 (32.939)	Top-5 acc 61.328 (56.763)	lr 0.02481
Train [7][1990/3239]	Time 2.149 (0.501)	Data Time 0.001 (0.014)	Loss 4.0241 (3.8973)	Entropy 1.87481 (1.87784)	Top-1 acc 26.953 (32.938)	Top-5 acc 56.250 (56.767)	lr 0.02481
Train [7][2000/3239]	Time 0.206 (0.500)	Data Time 0.001 (0.014)	Loss 3.7791 (3.8972)	Entropy 1.87480 (1.87782)	Top-1 acc 34.766 (32.934)	Top-5 acc 61.719 (56.771)	lr 0.02481
Train [7][2010/3239]	Time 0.279 (0.500)	Data Time 0.001 (0.014)	Loss 3.8967 (3.8971)	Entropy 1.87478 (1.87781)	Top-1 acc 35.938 (32.939)	Top-5 acc 57.812 (56.769)	lr 0.02481
Train [7][2020/3239]	Time 0.195 (0.499)	Data Time 0.001 (0.014)	Loss 3.8659 (3.8972)	Entropy 1.87477 (1.87779)	Top-1 acc 34.766 (32.944)	Top-5 acc 57.812 (56.769)	lr 0.02481
Train [7][2030/3239]	Time 0.378 (0.515)	Data Time 0.003 (0.014)	Loss 3.7415 (3.8971)	Entropy 1.87473 (1.87778)	Top-1 acc 35.156 (32.947)	Top-5 acc 61.328 (56.774)	lr 0.02481
Train [7][2040/3239]	Time 0.190 (0.515)	Data Time 0.002 (0.014)	Loss 3.7952 (3.8967)	Entropy 1.87471 (1.87776)	Top-1 acc 37.891 (32.954)	Top-5 acc 59.766 (56.783)	lr 0.02481
Train [7][2050/3239]	Time 0.207 (0.514)	Data Time 0.001 (0.014)	Loss 3.9017 (3.8966)	Entropy 1.87464 (1.87775)	Top-1 acc 31.641 (32.956)	Top-5 acc 56.641 (56.785)	lr 0.02481
Train [7][2060/3239]	Time 0.201 (0.514)	Data Time 0.002 (0.014)	Loss 3.9032 (3.8964)	Entropy 1.87459 (1.87773)	Top-1 acc 35.156 (32.962)	Top-5 acc 57.422 (56.793)	lr 0.02481
Train [7][2070/3239]	Time 0.167 (0.513)	Data Time 0.001 (0.014)	Loss 3.7500 (3.8960)	Entropy 1.87457 (1.87772)	Top-1 acc 37.500 (32.969)	Top-5 acc 59.766 (56.807)	lr 0.02481
Train [7][2080/3239]	Time 0.217 (0.513)	Data Time 0.001 (0.014)	Loss 4.0465 (3.8960)	Entropy 1.87454 (1.87770)	Top-1 acc 27.344 (32.972)	Top-5 acc 58.203 (56.810)	lr 0.02481
Train [7][2090/3239]	Time 0.177 (0.512)	Data Time 0.001 (0.014)	Loss 3.8366 (3.8955)	Entropy 1.87450 (1.87769)	Top-1 acc 36.328 (32.985)	Top-5 acc 62.500 (56.823)	lr 0.02481
Train [7][2100/3239]	Time 2.330 (0.512)	Data Time 0.001 (0.014)	Loss 3.9296 (3.8956)	Entropy 1.87450 (1.87767)	Top-1 acc 34.375 (32.987)	Top-5 acc 55.859 (56.819)	lr 0.02481
Train [7][2110/3239]	Time 0.203 (0.510)	Data Time 0.001 (0.014)	Loss 3.7428 (3.8954)	Entropy 1.87446 (1.87766)	Top-1 acc 32.422 (32.989)	Top-5 acc 58.984 (56.823)	lr 0.02481
Train [7][2120/3239]	Time 0.207 (0.510)	Data Time 0.001 (0.014)	Loss 3.8875 (3.8955)	Entropy 1.87442 (1.87764)	Top-1 acc 35.547 (32.989)	Top-5 acc 57.422 (56.826)	lr 0.02481
Train [7][2130/3239]	Time 0.151 (0.509)	Data Time 0.001 (0.014)	Loss 3.7194 (3.8956)	Entropy 1.87435 (1.87763)	Top-1 acc 31.641 (32.987)	Top-5 acc 57.031 (56.820)	lr 0.02481
Train [7][2140/3239]	Time 0.191 (0.509)	Data Time 0.001 (0.014)	Loss 3.7008 (3.8953)	Entropy 1.87429 (1.87761)	Top-1 acc 35.547 (32.987)	Top-5 acc 62.891 (56.828)	lr 0.02481
Train [7][2150/3239]	Time 0.317 (0.508)	Data Time 0.001 (0.014)	Loss 3.9602 (3.8955)	Entropy 1.87429 (1.87760)	Top-1 acc 30.469 (32.982)	Top-5 acc 57.812 (56.827)	lr 0.02481
Train [7][2160/3239]	Time 0.207 (0.508)	Data Time 0.001 (0.013)	Loss 3.8209 (3.8954)	Entropy 1.87425 (1.87758)	Top-1 acc 33.984 (32.987)	Top-5 acc 60.156 (56.834)	lr 0.02481
Train [7][2170/3239]	Time 0.168 (0.508)	Data Time 0.001 (0.013)	Loss 3.8642 (3.8952)	Entropy 1.87424 (1.87757)	Top-1 acc 34.766 (32.994)	Top-5 acc 58.984 (56.837)	lr 0.02481
Train [7][2180/3239]	Time 0.244 (0.507)	Data Time 0.001 (0.013)	Loss 3.9382 (3.8952)	Entropy 1.87423 (1.87755)	Top-1 acc 33.984 (32.995)	Top-5 acc 57.422 (56.837)	lr 0.02481
Train [7][2190/3239]	Time 0.201 (0.507)	Data Time 0.001 (0.013)	Loss 3.9058 (3.8956)	Entropy 1.87418 (1.87754)	Top-1 acc 33.984 (32.992)	Top-5 acc 54.297 (56.829)	lr 0.02481
Train [7][2200/3239]	Time 0.198 (0.506)	Data Time 0.001 (0.013)	Loss 3.8804 (3.8954)	Entropy 1.87415 (1.87752)	Top-1 acc 35.547 (32.995)	Top-5 acc 59.375 (56.831)	lr 0.02481
Train [7][2210/3239]	Time 2.127 (0.506)	Data Time 0.001 (0.013)	Loss 4.0152 (3.8954)	Entropy 1.87415 (1.87750)	Top-1 acc 29.688 (32.998)	Top-5 acc 50.000 (56.827)	lr 0.02481
Train [7][2220/3239]	Time 0.217 (0.504)	Data Time 0.001 (0.013)	Loss 3.8227 (3.8956)	Entropy 1.87408 (1.87749)	Top-1 acc 31.641 (32.994)	Top-5 acc 60.547 (56.822)	lr 0.02481
Train [7][2230/3239]	Time 0.317 (0.504)	Data Time 0.001 (0.013)	Loss 3.7529 (3.8955)	Entropy 1.87401 (1.87747)	Top-1 acc 39.844 (32.995)	Top-5 acc 59.375 (56.827)	lr 0.02481
Train [7][2240/3239]	Time 0.164 (0.504)	Data Time 0.001 (0.013)	Loss 3.9512 (3.8953)	Entropy 1.87397 (1.87746)	Top-1 acc 28.906 (32.999)	Top-5 acc 54.297 (56.832)	lr 0.02481
Train [7][2250/3239]	Time 0.229 (0.503)	Data Time 0.002 (0.013)	Loss 4.0086 (3.8953)	Entropy 1.87395 (1.87744)	Top-1 acc 30.859 (32.999)	Top-5 acc 54.297 (56.832)	lr 0.02481
Train [7][2260/3239]	Time 0.135 (0.503)	Data Time 0.001 (0.013)	Loss 3.8645 (3.8952)	Entropy 1.87390 (1.87743)	Top-1 acc 30.859 (33.000)	Top-5 acc 54.688 (56.833)	lr 0.02481
Train [7][2270/3239]	Time 0.210 (0.502)	Data Time 0.001 (0.013)	Loss 3.9996 (3.8952)	Entropy 1.87389 (1.87741)	Top-1 acc 35.156 (33.005)	Top-5 acc 55.859 (56.837)	lr 0.02481
Train [7][2280/3239]	Time 0.240 (0.502)	Data Time 0.001 (0.013)	Loss 3.9299 (3.8951)	Entropy 1.87386 (1.87740)	Top-1 acc 26.172 (33.005)	Top-5 acc 56.250 (56.842)	lr 0.02481
Train [7][2290/3239]	Time 0.219 (0.502)	Data Time 0.001 (0.013)	Loss 3.7702 (3.8948)	Entropy 1.87383 (1.87738)	Top-1 acc 33.984 (33.011)	Top-5 acc 62.500 (56.848)	lr 0.02481
Train [7][2300/3239]	Time 0.170 (0.501)	Data Time 0.001 (0.013)	Loss 3.6920 (3.8946)	Entropy 1.87382 (1.87737)	Top-1 acc 36.719 (33.019)	Top-5 acc 63.281 (56.856)	lr 0.02481
Train [7][2310/3239]	Time 0.167 (0.501)	Data Time 0.002 (0.013)	Loss 3.7594 (3.8944)	Entropy 1.87380 (1.87735)	Top-1 acc 34.766 (33.021)	Top-5 acc 58.594 (56.862)	lr 0.02481
Train [7][2320/3239]	Time 2.209 (0.501)	Data Time 0.001 (0.013)	Loss 3.9446 (3.8946)	Entropy 1.87380 (1.87733)	Top-1 acc 33.984 (33.015)	Top-5 acc 57.422 (56.856)	lr 0.02481
Train [7][2330/3239]	Time 0.232 (0.499)	Data Time 0.001 (0.013)	Loss 3.8989 (3.8946)	Entropy 1.87376 (1.87732)	Top-1 acc 33.594 (33.014)	Top-5 acc 58.594 (56.856)	lr 0.02481
Train [7][2340/3239]	Time 0.246 (0.499)	Data Time 0.001 (0.013)	Loss 3.9541 (3.8945)	Entropy 1.87373 (1.87730)	Top-1 acc 32.031 (33.016)	Top-5 acc 54.297 (56.855)	lr 0.02481
Train [7][2350/3239]	Time 0.153 (0.499)	Data Time 0.001 (0.013)	Loss 3.8841 (3.8943)	Entropy 1.87371 (1.87729)	Top-1 acc 29.297 (33.019)	Top-5 acc 56.250 (56.859)	lr 0.02481
Train [7][2360/3239]	Time 0.219 (0.498)	Data Time 0.001 (0.013)	Loss 3.7018 (3.8944)	Entropy 1.87366 (1.87727)	Top-1 acc 37.109 (33.015)	Top-5 acc 61.719 (56.857)	lr 0.02481
Train [7][2370/3239]	Time 0.188 (0.498)	Data Time 0.001 (0.013)	Loss 3.6714 (3.8945)	Entropy 1.87363 (1.87726)	Top-1 acc 37.109 (33.013)	Top-5 acc 60.156 (56.853)	lr 0.02481
Train [7][2380/3239]	Time 0.189 (0.497)	Data Time 0.001 (0.013)	Loss 3.8302 (3.8944)	Entropy 1.87359 (1.87724)	Top-1 acc 36.328 (33.016)	Top-5 acc 56.641 (56.858)	lr 0.02481
Train [7][2390/3239]	Time 0.377 (0.511)	Data Time 0.003 (0.012)	Loss 3.8176 (3.8942)	Entropy 1.87356 (1.87723)	Top-1 acc 33.594 (33.021)	Top-5 acc 58.984 (56.863)	lr 0.02481
Train [7][2400/3239]	Time 0.279 (0.511)	Data Time 0.002 (0.012)	Loss 3.8813 (3.8939)	Entropy 1.87354 (1.87721)	Top-1 acc 31.250 (33.032)	Top-5 acc 55.859 (56.868)	lr 0.02481
Train [7][2410/3239]	Time 0.264 (0.511)	Data Time 0.002 (0.012)	Loss 3.9464 (3.8936)	Entropy 1.87350 (1.87720)	Top-1 acc 30.859 (33.041)	Top-5 acc 56.641 (56.873)	lr 0.02481
Train [7][2420/3239]	Time 0.177 (0.511)	Data Time 0.002 (0.012)	Loss 3.9614 (3.8934)	Entropy 1.87345 (1.87718)	Top-1 acc 28.125 (33.040)	Top-5 acc 50.391 (56.874)	lr 0.02481
Train [7][2430/3239]	Time 2.346 (0.511)	Data Time 0.001 (0.012)	Loss 3.8001 (3.8933)	Entropy 1.87345 (1.87717)	Top-1 acc 33.203 (33.043)	Top-5 acc 63.281 (56.877)	lr 0.02481
Train [7][2440/3239]	Time 0.282 (0.509)	Data Time 0.001 (0.012)	Loss 3.9422 (3.8934)	Entropy 1.87339 (1.87715)	Top-1 acc 31.641 (33.037)	Top-5 acc 57.031 (56.875)	lr 0.02481
Train [7][2450/3239]	Time 0.287 (0.509)	Data Time 0.001 (0.012)	Loss 4.0788 (3.8934)	Entropy 1.87332 (1.87713)	Top-1 acc 29.297 (33.037)	Top-5 acc 56.250 (56.877)	lr 0.02480
Train [7][2460/3239]	Time 0.204 (0.509)	Data Time 0.001 (0.012)	Loss 3.8230 (3.8935)	Entropy 1.87330 (1.87712)	Top-1 acc 36.719 (33.038)	Top-5 acc 56.641 (56.875)	lr 0.02480
Train [7][2470/3239]	Time 0.163 (0.508)	Data Time 0.002 (0.012)	Loss 3.6776 (3.8934)	Entropy 1.87329 (1.87710)	Top-1 acc 37.109 (33.040)	Top-5 acc 60.547 (56.879)	lr 0.02480
Train [7][2480/3239]	Time 0.205 (0.508)	Data Time 0.001 (0.012)	Loss 4.1548 (3.8935)	Entropy 1.87327 (1.87709)	Top-1 acc 28.516 (33.036)	Top-5 acc 48.438 (56.872)	lr 0.02480
Train [7][2490/3239]	Time 0.196 (0.508)	Data Time 0.002 (0.012)	Loss 3.9985 (3.8935)	Entropy 1.87322 (1.87707)	Top-1 acc 31.250 (33.035)	Top-5 acc 53.906 (56.871)	lr 0.02480
Train [7][2500/3239]	Time 0.257 (0.507)	Data Time 0.001 (0.012)	Loss 3.8554 (3.8935)	Entropy 1.87318 (1.87706)	Top-1 acc 31.250 (33.031)	Top-5 acc 59.766 (56.874)	lr 0.02480
Train [7][2510/3239]	Time 0.200 (0.507)	Data Time 0.001 (0.012)	Loss 3.6520 (3.8934)	Entropy 1.87314 (1.87704)	Top-1 acc 39.844 (33.033)	Top-5 acc 62.109 (56.873)	lr 0.02480
Train [7][2520/3239]	Time 0.207 (0.507)	Data Time 0.001 (0.012)	Loss 3.9611 (3.8935)	Entropy 1.87312 (1.87703)	Top-1 acc 28.516 (33.027)	Top-5 acc 55.469 (56.865)	lr 0.02480
Train [7][2530/3239]	Time 0.244 (0.506)	Data Time 0.001 (0.012)	Loss 4.1654 (3.8935)	Entropy 1.87308 (1.87701)	Top-1 acc 26.953 (33.032)	Top-5 acc 47.656 (56.864)	lr 0.02480
Train [7][2540/3239]	Time 2.396 (0.506)	Data Time 0.001 (0.012)	Loss 3.8160 (3.8934)	Entropy 1.87308 (1.87700)	Top-1 acc 34.375 (33.037)	Top-5 acc 58.203 (56.869)	lr 0.02480
Train [7][2550/3239]	Time 0.166 (0.505)	Data Time 0.001 (0.012)	Loss 3.7538 (3.8932)	Entropy 1.87301 (1.87698)	Top-1 acc 35.547 (33.040)	Top-5 acc 61.719 (56.875)	lr 0.02480
Train [7][2560/3239]	Time 0.170 (0.504)	Data Time 0.001 (0.012)	Loss 3.8084 (3.8931)	Entropy 1.87298 (1.87696)	Top-1 acc 35.156 (33.046)	Top-5 acc 58.984 (56.877)	lr 0.02480
Train [7][2570/3239]	Time 0.173 (0.504)	Data Time 0.001 (0.012)	Loss 3.8393 (3.8932)	Entropy 1.87296 (1.87695)	Top-1 acc 32.422 (33.042)	Top-5 acc 57.031 (56.874)	lr 0.02480
Train [7][2580/3239]	Time 0.164 (0.504)	Data Time 0.001 (0.012)	Loss 3.9445 (3.8932)	Entropy 1.87290 (1.87693)	Top-1 acc 32.031 (33.043)	Top-5 acc 55.859 (56.875)	lr 0.02480
Train [7][2590/3239]	Time 0.191 (0.503)	Data Time 0.001 (0.012)	Loss 4.1750 (3.8932)	Entropy 1.87290 (1.87692)	Top-1 acc 25.000 (33.040)	Top-5 acc 50.781 (56.878)	lr 0.02480
Train [7][2600/3239]	Time 0.199 (0.503)	Data Time 0.001 (0.012)	Loss 3.7962 (3.8931)	Entropy 1.87285 (1.87690)	Top-1 acc 36.719 (33.045)	Top-5 acc 58.203 (56.878)	lr 0.02480
Train [7][2610/3239]	Time 0.222 (0.503)	Data Time 0.001 (0.012)	Loss 3.8844 (3.8930)	Entropy 1.87281 (1.87689)	Top-1 acc 31.641 (33.054)	Top-5 acc 57.031 (56.880)	lr 0.02480
Train [7][2620/3239]	Time 0.215 (0.502)	Data Time 0.001 (0.012)	Loss 3.9483 (3.8928)	Entropy 1.87276 (1.87687)	Top-1 acc 33.203 (33.057)	Top-5 acc 55.469 (56.885)	lr 0.02480
Train [7][2630/3239]	Time 0.199 (0.502)	Data Time 0.001 (0.012)	Loss 4.0847 (3.8929)	Entropy 1.87274 (1.87686)	Top-1 acc 24.219 (33.051)	Top-5 acc 53.125 (56.887)	lr 0.02480
Train [7][2640/3239]	Time 0.253 (0.502)	Data Time 0.001 (0.012)	Loss 4.0464 (3.8930)	Entropy 1.87271 (1.87684)	Top-1 acc 29.297 (33.046)	Top-5 acc 53.516 (56.888)	lr 0.02480
Train [7][2650/3239]	Time 0.281 (0.501)	Data Time 0.001 (0.012)	Loss 3.7243 (3.8930)	Entropy 1.87268 (1.87682)	Top-1 acc 38.672 (33.051)	Top-5 acc 58.594 (56.889)	lr 0.02480
Train [7][2660/3239]	Time 0.231 (0.501)	Data Time 0.001 (0.011)	Loss 4.0661 (3.8931)	Entropy 1.87262 (1.87681)	Top-1 acc 29.297 (33.044)	Top-5 acc 51.953 (56.882)	lr 0.02480
Train [7][2670/3239]	Time 0.217 (0.501)	Data Time 0.001 (0.011)	Loss 4.0494 (3.8932)	Entropy 1.87256 (1.87679)	Top-1 acc 28.906 (33.037)	Top-5 acc 52.734 (56.881)	lr 0.02480
Train [7][2680/3239]	Time 0.212 (0.500)	Data Time 0.001 (0.011)	Loss 3.7453 (3.8934)	Entropy 1.87252 (1.87678)	Top-1 acc 37.109 (33.034)	Top-5 acc 59.375 (56.874)	lr 0.02480
Train [7][2690/3239]	Time 0.171 (0.500)	Data Time 0.001 (0.011)	Loss 3.9250 (3.8936)	Entropy 1.87250 (1.87676)	Top-1 acc 30.078 (33.032)	Top-5 acc 56.641 (56.869)	lr 0.02480
Train [7][2700/3239]	Time 0.151 (0.500)	Data Time 0.001 (0.011)	Loss 4.1158 (3.8937)	Entropy 1.87250 (1.87675)	Top-1 acc 28.125 (33.026)	Top-5 acc 50.000 (56.865)	lr 0.02480
Train [7][2710/3239]	Time 0.210 (0.499)	Data Time 0.001 (0.011)	Loss 4.0494 (3.8939)	Entropy 1.87247 (1.87673)	Top-1 acc 33.203 (33.023)	Top-5 acc 54.297 (56.864)	lr 0.02480
Train [7][2720/3239]	Time 0.287 (0.499)	Data Time 0.001 (0.011)	Loss 3.7100 (3.8938)	Entropy 1.87244 (1.87671)	Top-1 acc 36.719 (33.025)	Top-5 acc 62.891 (56.867)	lr 0.02480
Train [7][2730/3239]	Time 0.199 (0.499)	Data Time 0.005 (0.011)	Loss 3.9808 (3.8937)	Entropy 1.87238 (1.87670)	Top-1 acc 31.250 (33.026)	Top-5 acc 57.812 (56.870)	lr 0.02480
Train [7][2740/3239]	Time 0.529 (0.511)	Data Time 0.004 (0.011)	Loss 3.7603 (3.8937)	Entropy 1.87235 (1.87668)	Top-1 acc 37.109 (33.031)	Top-5 acc 60.547 (56.872)	lr 0.02480
Train [7][2750/3239]	Time 0.148 (0.511)	Data Time 0.002 (0.011)	Loss 3.9175 (3.8935)	Entropy 1.87231 (1.87667)	Top-1 acc 30.469 (33.034)	Top-5 acc 55.078 (56.876)	lr 0.02480
Train [7][2760/3239]	Time 0.219 (0.510)	Data Time 0.001 (0.011)	Loss 3.9656 (3.8935)	Entropy 1.87228 (1.87665)	Top-1 acc 34.375 (33.033)	Top-5 acc 58.203 (56.877)	lr 0.02480
Train [7][2770/3239]	Time 0.242 (0.510)	Data Time 0.001 (0.011)	Loss 3.9652 (3.8934)	Entropy 1.87224 (1.87664)	Top-1 acc 32.422 (33.035)	Top-5 acc 58.203 (56.881)	lr 0.02480
Train [7][2780/3239]	Time 0.225 (0.510)	Data Time 0.001 (0.011)	Loss 3.7871 (3.8935)	Entropy 1.87216 (1.87662)	Top-1 acc 35.547 (33.033)	Top-5 acc 57.812 (56.875)	lr 0.02480
Train [7][2790/3239]	Time 0.211 (0.509)	Data Time 0.001 (0.011)	Loss 3.7350 (3.8935)	Entropy 1.87213 (1.87660)	Top-1 acc 40.234 (33.037)	Top-5 acc 59.375 (56.877)	lr 0.02480
Train [7][2800/3239]	Time 0.235 (0.509)	Data Time 0.001 (0.011)	Loss 3.9032 (3.8937)	Entropy 1.87205 (1.87659)	Top-1 acc 30.859 (33.030)	Top-5 acc 55.469 (56.871)	lr 0.02480
Train [7][2810/3239]	Time 0.221 (0.509)	Data Time 0.001 (0.011)	Loss 3.7451 (3.8936)	Entropy 1.87203 (1.87657)	Top-1 acc 39.062 (33.034)	Top-5 acc 59.766 (56.869)	lr 0.02480
Train [7][2820/3239]	Time 0.236 (0.509)	Data Time 0.001 (0.011)	Loss 3.8847 (3.8933)	Entropy 1.87200 (1.87656)	Top-1 acc 32.031 (33.042)	Top-5 acc 53.516 (56.879)	lr 0.02480
Train [7][2830/3239]	Time 0.242 (0.508)	Data Time 0.002 (0.011)	Loss 4.0397 (3.8933)	Entropy 1.87197 (1.87654)	Top-1 acc 33.594 (33.040)	Top-5 acc 53.906 (56.879)	lr 0.02480
Train [7][2840/3239]	Time 0.207 (0.508)	Data Time 0.001 (0.011)	Loss 3.8900 (3.8931)	Entropy 1.87193 (1.87652)	Top-1 acc 33.203 (33.044)	Top-5 acc 59.375 (56.885)	lr 0.02480
Train [7][2850/3239]	Time 0.228 (0.508)	Data Time 0.001 (0.011)	Loss 4.2381 (3.8932)	Entropy 1.87191 (1.87651)	Top-1 acc 26.562 (33.043)	Top-5 acc 48.828 (56.884)	lr 0.02480
Train [7][2860/3239]	Time 0.283 (0.507)	Data Time 0.001 (0.011)	Loss 3.8505 (3.8932)	Entropy 1.87191 (1.87649)	Top-1 acc 37.109 (33.047)	Top-5 acc 58.594 (56.885)	lr 0.02480
Train [7][2870/3239]	Time 0.206 (0.507)	Data Time 0.001 (0.011)	Loss 4.0516 (3.8933)	Entropy 1.87183 (1.87647)	Top-1 acc 33.203 (33.046)	Top-5 acc 52.734 (56.882)	lr 0.02480
Train [7][2880/3239]	Time 0.215 (0.507)	Data Time 0.001 (0.011)	Loss 3.9198 (3.8934)	Entropy 1.87176 (1.87646)	Top-1 acc 31.250 (33.041)	Top-5 acc 60.938 (56.883)	lr 0.02480
Train [7][2890/3239]	Time 0.214 (0.506)	Data Time 0.002 (0.011)	Loss 4.2062 (3.8937)	Entropy 1.87172 (1.87644)	Top-1 acc 26.953 (33.040)	Top-5 acc 47.266 (56.880)	lr 0.02480
Train [7][2900/3239]	Time 0.223 (0.506)	Data Time 0.002 (0.011)	Loss 3.7956 (3.8935)	Entropy 1.87171 (1.87643)	Top-1 acc 35.156 (33.045)	Top-5 acc 60.156 (56.881)	lr 0.02480
Train [7][2910/3239]	Time 0.201 (0.506)	Data Time 0.001 (0.011)	Loss 3.8789 (3.8934)	Entropy 1.87165 (1.87641)	Top-1 acc 33.594 (33.049)	Top-5 acc 59.375 (56.885)	lr 0.02480
Train [7][2920/3239]	Time 0.248 (0.505)	Data Time 0.001 (0.011)	Loss 3.9425 (3.8933)	Entropy 1.87165 (1.87639)	Top-1 acc 30.859 (33.050)	Top-5 acc 55.078 (56.886)	lr 0.02480
Train [7][2930/3239]	Time 0.317 (0.505)	Data Time 0.001 (0.011)	Loss 3.9702 (3.8932)	Entropy 1.87165 (1.87638)	Top-1 acc 32.031 (33.052)	Top-5 acc 52.734 (56.884)	lr 0.02480
Train [7][2940/3239]	Time 0.205 (0.505)	Data Time 0.001 (0.011)	Loss 3.7437 (3.8932)	Entropy 1.87162 (1.87636)	Top-1 acc 37.891 (33.052)	Top-5 acc 60.938 (56.883)	lr 0.02480
Train [7][2950/3239]	Time 0.240 (0.505)	Data Time 0.002 (0.011)	Loss 3.8638 (3.8932)	Entropy 1.87153 (1.87635)	Top-1 acc 33.594 (33.052)	Top-5 acc 57.812 (56.884)	lr 0.02480
Train [7][2960/3239]	Time 0.246 (0.504)	Data Time 0.001 (0.011)	Loss 3.7716 (3.8933)	Entropy 1.87143 (1.87633)	Top-1 acc 37.109 (33.053)	Top-5 acc 59.766 (56.883)	lr 0.02480
Train [7][2970/3239]	Time 0.215 (0.504)	Data Time 0.001 (0.011)	Loss 4.0137 (3.8931)	Entropy 1.87140 (1.87631)	Top-1 acc 29.688 (33.053)	Top-5 acc 55.078 (56.884)	lr 0.02480
Train [7][2980/3239]	Time 0.222 (0.504)	Data Time 0.001 (0.010)	Loss 3.9035 (3.8930)	Entropy 1.87140 (1.87630)	Top-1 acc 33.984 (33.060)	Top-5 acc 56.250 (56.890)	lr 0.02480
Train [7][2990/3239]	Time 0.193 (0.503)	Data Time 0.001 (0.010)	Loss 3.6840 (3.8928)	Entropy 1.87132 (1.87628)	Top-1 acc 37.500 (33.065)	Top-5 acc 62.109 (56.893)	lr 0.02480
Train [7][3000/3239]	Time 0.188 (0.503)	Data Time 0.001 (0.010)	Loss 3.8159 (3.8927)	Entropy 1.87130 (1.87626)	Top-1 acc 35.156 (33.068)	Top-5 acc 58.984 (56.895)	lr 0.02480
Train [7][3010/3239]	Time 0.193 (0.503)	Data Time 0.001 (0.010)	Loss 3.8198 (3.8927)	Entropy 1.87128 (1.87625)	Top-1 acc 32.422 (33.073)	Top-5 acc 57.031 (56.896)	lr 0.02479
Train [7][3020/3239]	Time 0.188 (0.502)	Data Time 0.001 (0.010)	Loss 3.8023 (3.8928)	Entropy 1.87119 (1.87623)	Top-1 acc 31.641 (33.071)	Top-5 acc 55.859 (56.894)	lr 0.02479
Train [7][3030/3239]	Time 0.237 (0.502)	Data Time 0.001 (0.010)	Loss 3.7473 (3.8928)	Entropy 1.87108 (1.87621)	Top-1 acc 35.938 (33.071)	Top-5 acc 59.375 (56.893)	lr 0.02479
Train [7][3040/3239]	Time 0.293 (0.502)	Data Time 0.001 (0.010)	Loss 3.9807 (3.8927)	Entropy 1.87106 (1.87620)	Top-1 acc 32.812 (33.071)	Top-5 acc 54.688 (56.893)	lr 0.02479
Train [7][3050/3239]	Time 0.209 (0.502)	Data Time 0.001 (0.010)	Loss 3.9313 (3.8926)	Entropy 1.87100 (1.87618)	Top-1 acc 30.859 (33.074)	Top-5 acc 53.516 (56.891)	lr 0.02479
Train [7][3060/3239]	Time 0.194 (0.501)	Data Time 0.001 (0.010)	Loss 3.9102 (3.8926)	Entropy 1.87095 (1.87616)	Top-1 acc 33.984 (33.073)	Top-5 acc 56.250 (56.894)	lr 0.02479
Train [7][3070/3239]	Time 0.404 (0.511)	Data Time 0.114 (0.010)	Loss 3.7792 (3.8926)	Entropy 1.87088 (1.87615)	Top-1 acc 33.594 (33.075)	Top-5 acc 60.938 (56.898)	lr 0.02479
Train [7][3080/3239]	Time 0.166 (0.512)	Data Time 0.002 (0.010)	Loss 3.8616 (3.8926)	Entropy 1.87082 (1.87613)	Top-1 acc 33.984 (33.072)	Top-5 acc 57.812 (56.895)	lr 0.02479
Train [7][3090/3239]	Time 0.180 (0.511)	Data Time 0.002 (0.010)	Loss 3.9926 (3.8925)	Entropy 1.87078 (1.87611)	Top-1 acc 33.203 (33.075)	Top-5 acc 52.344 (56.894)	lr 0.02479
Train [7][3100/3239]	Time 0.198 (0.511)	Data Time 0.002 (0.010)	Loss 4.0130 (3.8922)	Entropy 1.87076 (1.87609)	Top-1 acc 31.250 (33.079)	Top-5 acc 53.516 (56.899)	lr 0.02479
Train [7][3110/3239]	Time 0.182 (0.511)	Data Time 0.002 (0.010)	Loss 3.8622 (3.8923)	Entropy 1.87071 (1.87608)	Top-1 acc 33.594 (33.078)	Top-5 acc 57.031 (56.898)	lr 0.02479
Train [7][3120/3239]	Time 0.210 (0.511)	Data Time 0.001 (0.010)	Loss 4.0705 (3.8924)	Entropy 1.87065 (1.87606)	Top-1 acc 28.516 (33.078)	Top-5 acc 52.734 (56.895)	lr 0.02479
Train [7][3130/3239]	Time 0.191 (0.510)	Data Time 0.001 (0.010)	Loss 4.0845 (3.8922)	Entropy 1.87064 (1.87604)	Top-1 acc 28.516 (33.082)	Top-5 acc 50.391 (56.895)	lr 0.02479
Train [7][3140/3239]	Time 0.273 (0.510)	Data Time 0.001 (0.010)	Loss 3.7787 (3.8922)	Entropy 1.87059 (1.87603)	Top-1 acc 36.328 (33.082)	Top-5 acc 56.641 (56.895)	lr 0.02479
Train [7][3150/3239]	Time 0.172 (0.510)	Data Time 0.002 (0.010)	Loss 3.8794 (3.8921)	Entropy 1.87057 (1.87601)	Top-1 acc 35.156 (33.082)	Top-5 acc 57.031 (56.900)	lr 0.02479
Train [7][3160/3239]	Time 0.230 (0.509)	Data Time 0.001 (0.010)	Loss 3.8892 (3.8923)	Entropy 1.87056 (1.87599)	Top-1 acc 30.859 (33.080)	Top-5 acc 54.688 (56.898)	lr 0.02479
Train [7][3170/3239]	Time 0.217 (0.509)	Data Time 0.001 (0.010)	Loss 3.9249 (3.8922)	Entropy 1.87050 (1.87597)	Top-1 acc 32.422 (33.085)	Top-5 acc 55.859 (56.901)	lr 0.02479
Train [7][3180/3239]	Time 0.207 (0.509)	Data Time 0.000 (0.010)	Loss 3.9196 (3.8922)	Entropy 1.87045 (1.87596)	Top-1 acc 33.594 (33.083)	Top-5 acc 55.078 (56.900)	lr 0.02479
Train [7][3190/3239]	Time 0.194 (0.508)	Data Time 0.000 (0.010)	Loss 3.8429 (3.8921)	Entropy 1.87040 (1.87594)	Top-1 acc 32.422 (33.082)	Top-5 acc 57.422 (56.901)	lr 0.02479
Train [7][3200/3239]	Time 0.143 (0.508)	Data Time 0.000 (0.010)	Loss 3.9443 (3.8921)	Entropy 1.87031 (1.87592)	Top-1 acc 33.594 (33.084)	Top-5 acc 58.203 (56.903)	lr 0.02479
Train [7][3210/3239]	Time 0.186 (0.508)	Data Time 0.000 (0.010)	Loss 3.9927 (3.8921)	Entropy 1.87029 (1.87590)	Top-1 acc 30.078 (33.085)	Top-5 acc 53.906 (56.906)	lr 0.02479
Train [7][3220/3239]	Time 0.211 (0.507)	Data Time 0.000 (0.010)	Loss 3.8929 (3.8918)	Entropy 1.87025 (1.87589)	Top-1 acc 31.641 (33.091)	Top-5 acc 57.422 (56.910)	lr 0.02479
Train [7][3230/3239]	Time 0.194 (0.507)	Data Time 0.000 (0.010)	Loss 3.7865 (3.8918)	Entropy 1.87020 (1.87587)	Top-1 acc 38.281 (33.091)	Top-5 acc 57.812 (56.909)	lr 0.02479
Train [7][3239/3239]	Time 1.996 (0.507)	Data Time 0.000 (0.010)	Loss 3.8794 (3.8918)	Entropy 1.87020 (1.87585)	Top-1 acc 27.160 (33.093)	Top-5 acc 55.556 (56.907)	lr 0.02479
==========Valid [7/120]	loss 2.723	top-1 acc 42.344 (42.344)	top-5 acc 67.377	Train top-1 33.093	top-5 56.907	Entropy 1.87020	Latency-None: 0.000ms	Flops: 516.09M
Train [8][0/3239]	Time 24.165 (24.165)	Data Time 22.854 (22.854)	Loss 3.4336 (3.4336)	Entropy 1.87016 (1.87016)	Top-1 acc 39.062 (39.062)	Top-5 acc 65.625 (65.625)	lr 0.02479
Train [8][10/3239]	Time 2.488 (2.697)	Data Time 0.002 (2.079)	Loss 4.0547 (3.8708)	Entropy 1.87016 (1.87016)	Top-1 acc 31.250 (33.026)	Top-5 acc 51.953 (56.747)	lr 0.02479
Train [8][20/3239]	Time 0.239 (1.513)	Data Time 0.001 (1.090)	Loss 3.8239 (3.8878)	Entropy 1.87016 (1.87016)	Top-1 acc 33.594 (32.924)	Top-5 acc 57.422 (56.864)	lr 0.02479
Train [8][30/3239]	Time 0.159 (1.157)	Data Time 0.001 (0.739)	Loss 3.7943 (3.8986)	Entropy 1.87012 (1.87015)	Top-1 acc 39.453 (32.951)	Top-5 acc 60.547 (56.552)	lr 0.02479
Train [8][40/3239]	Time 0.233 (0.975)	Data Time 0.001 (0.560)	Loss 3.8709 (3.8794)	Entropy 1.87009 (1.87014)	Top-1 acc 35.156 (33.394)	Top-5 acc 57.031 (57.012)	lr 0.02479
Train [8][50/3239]	Time 0.184 (0.861)	Data Time 0.001 (0.450)	Loss 4.1060 (3.8747)	Entropy 1.87003 (1.87012)	Top-1 acc 30.469 (33.318)	Top-5 acc 51.172 (57.192)	lr 0.02479
Train [8][60/3239]	Time 0.224 (0.786)	Data Time 0.001 (0.377)	Loss 4.0689 (3.8742)	Entropy 1.86990 (1.87009)	Top-1 acc 31.250 (33.511)	Top-5 acc 53.125 (57.179)	lr 0.02479
Train [8][70/3239]	Time 0.209 (0.732)	Data Time 0.001 (0.324)	Loss 3.6735 (3.8690)	Entropy 1.86986 (1.87006)	Top-1 acc 39.062 (33.720)	Top-5 acc 62.891 (57.488)	lr 0.02479
Train [8][80/3239]	Time 0.201 (0.694)	Data Time 0.001 (0.284)	Loss 3.7593 (3.8618)	Entropy 1.86983 (1.87003)	Top-1 acc 35.547 (33.767)	Top-5 acc 60.547 (57.509)	lr 0.02479
Train [8][90/3239]	Time 0.188 (0.665)	Data Time 0.001 (0.253)	Loss 3.9120 (3.8613)	Entropy 1.86980 (1.87001)	Top-1 acc 34.375 (33.868)	Top-5 acc 57.812 (57.533)	lr 0.02479
Train [8][100/3239]	Time 0.236 (0.639)	Data Time 0.024 (0.229)	Loss 4.0510 (3.8675)	Entropy 1.86978 (1.86999)	Top-1 acc 34.375 (33.756)	Top-5 acc 54.688 (57.519)	lr 0.02479
Train [8][110/3239]	Time 0.307 (0.619)	Data Time 0.002 (0.208)	Loss 3.9610 (3.8655)	Entropy 1.86974 (1.86997)	Top-1 acc 33.594 (33.773)	Top-5 acc 52.344 (57.566)	lr 0.02479
Train [8][120/3239]	Time 2.245 (0.602)	Data Time 0.001 (0.191)	Loss 3.9304 (3.8646)	Entropy 1.86974 (1.86995)	Top-1 acc 32.031 (33.752)	Top-5 acc 55.469 (57.574)	lr 0.02479
Train [8][130/3239]	Time 0.225 (0.572)	Data Time 0.001 (0.177)	Loss 3.9469 (3.8639)	Entropy 1.86967 (1.86993)	Top-1 acc 30.469 (33.677)	Top-5 acc 51.562 (57.547)	lr 0.02479
Train [8][140/3239]	Time 0.275 (0.562)	Data Time 0.001 (0.164)	Loss 3.5916 (3.8635)	Entropy 1.86961 (1.86991)	Top-1 acc 38.672 (33.702)	Top-5 acc 62.500 (57.535)	lr 0.02479
Train [8][150/3239]	Time 0.196 (0.551)	Data Time 0.001 (0.153)	Loss 3.8206 (3.8579)	Entropy 1.86958 (1.86989)	Top-1 acc 32.812 (33.834)	Top-5 acc 60.156 (57.701)	lr 0.02479
Train [8][160/3239]	Time 0.152 (0.543)	Data Time 0.001 (0.144)	Loss 3.7956 (3.8569)	Entropy 1.86957 (1.86987)	Top-1 acc 37.109 (33.849)	Top-5 acc 58.594 (57.796)	lr 0.02479
Train [8][170/3239]	Time 0.238 (0.536)	Data Time 0.002 (0.136)	Loss 3.9043 (3.8559)	Entropy 1.86953 (1.86985)	Top-1 acc 31.250 (33.868)	Top-5 acc 56.641 (57.872)	lr 0.02479
Train [8][180/3239]	Time 0.273 (0.723)	Data Time 0.003 (0.128)	Loss 3.9515 (3.8560)	Entropy 1.86948 (1.86983)	Top-1 acc 30.469 (33.820)	Top-5 acc 53.516 (57.830)	lr 0.02479
Train [8][190/3239]	Time 0.258 (0.712)	Data Time 0.002 (0.122)	Loss 3.9110 (3.8557)	Entropy 1.86941 (1.86981)	Top-1 acc 35.156 (33.843)	Top-5 acc 56.250 (57.874)	lr 0.02479
Train [8][200/3239]	Time 0.258 (0.698)	Data Time 0.002 (0.116)	Loss 3.9064 (3.8528)	Entropy 1.86939 (1.86979)	Top-1 acc 34.766 (33.833)	Top-5 acc 57.422 (57.890)	lr 0.02479
Train [8][210/3239]	Time 0.250 (0.685)	Data Time 0.002 (0.110)	Loss 4.1004 (3.8522)	Entropy 1.86937 (1.86977)	Top-1 acc 27.734 (33.851)	Top-5 acc 51.562 (57.874)	lr 0.02479
Train [8][220/3239]	Time 0.203 (0.673)	Data Time 0.001 (0.106)	Loss 3.8095 (3.8490)	Entropy 1.86930 (1.86975)	Top-1 acc 30.859 (33.877)	Top-5 acc 60.156 (57.919)	lr 0.02479
Train [8][230/3239]	Time 2.151 (0.661)	Data Time 0.001 (0.101)	Loss 3.8862 (3.8490)	Entropy 1.86930 (1.86973)	Top-1 acc 30.469 (33.854)	Top-5 acc 56.641 (57.922)	lr 0.02479
Train [8][240/3239]	Time 0.195 (0.642)	Data Time 0.001 (0.097)	Loss 3.9307 (3.8510)	Entropy 1.86926 (1.86971)	Top-1 acc 32.812 (33.848)	Top-5 acc 56.250 (57.881)	lr 0.02479
Train [8][250/3239]	Time 0.187 (0.632)	Data Time 0.001 (0.093)	Loss 3.7756 (3.8525)	Entropy 1.86915 (1.86969)	Top-1 acc 35.938 (33.807)	Top-5 acc 57.031 (57.823)	lr 0.02479
Train [8][260/3239]	Time 0.244 (0.624)	Data Time 0.001 (0.090)	Loss 3.7096 (3.8534)	Entropy 1.86904 (1.86967)	Top-1 acc 35.547 (33.803)	Top-5 acc 60.156 (57.836)	lr 0.02479
Train [8][270/3239]	Time 0.258 (0.616)	Data Time 0.001 (0.087)	Loss 4.0046 (3.8525)	Entropy 1.86897 (1.86964)	Top-1 acc 30.078 (33.814)	Top-5 acc 52.344 (57.834)	lr 0.02479
Train [8][280/3239]	Time 0.138 (0.609)	Data Time 0.001 (0.083)	Loss 4.0335 (3.8545)	Entropy 1.86893 (1.86962)	Top-1 acc 32.031 (33.794)	Top-5 acc 50.391 (57.758)	lr 0.02479
Train [8][290/3239]	Time 0.218 (0.602)	Data Time 0.001 (0.081)	Loss 3.7052 (3.8523)	Entropy 1.86889 (1.86959)	Top-1 acc 34.766 (33.845)	Top-5 acc 62.891 (57.811)	lr 0.02479
Train [8][300/3239]	Time 0.182 (0.596)	Data Time 0.001 (0.078)	Loss 3.7585 (3.8531)	Entropy 1.86878 (1.86957)	Top-1 acc 33.594 (33.825)	Top-5 acc 61.328 (57.794)	lr 0.02479
Train [8][310/3239]	Time 0.183 (0.591)	Data Time 0.001 (0.076)	Loss 3.7517 (3.8525)	Entropy 1.86878 (1.86954)	Top-1 acc 36.719 (33.807)	Top-5 acc 60.547 (57.792)	lr 0.02478
Train [8][320/3239]	Time 0.277 (0.585)	Data Time 0.001 (0.073)	Loss 3.9793 (3.8523)	Entropy 1.86872 (1.86952)	Top-1 acc 30.859 (33.821)	Top-5 acc 52.734 (57.797)	lr 0.02478
Train [8][330/3239]	Time 0.289 (0.579)	Data Time 0.001 (0.071)	Loss 3.6900 (3.8519)	Entropy 1.86871 (1.86949)	Top-1 acc 40.625 (33.834)	Top-5 acc 61.719 (57.824)	lr 0.02478
Train [8][340/3239]	Time 2.224 (0.574)	Data Time 0.001 (0.069)	Loss 3.8729 (3.8509)	Entropy 1.86871 (1.86947)	Top-1 acc 28.125 (33.849)	Top-5 acc 58.594 (57.830)	lr 0.02478
Train [8][350/3239]	Time 0.171 (0.564)	Data Time 0.001 (0.067)	Loss 3.8310 (3.8518)	Entropy 1.86864 (1.86945)	Top-1 acc 33.984 (33.832)	Top-5 acc 58.203 (57.802)	lr 0.02478
Train [8][360/3239]	Time 0.196 (0.560)	Data Time 0.001 (0.066)	Loss 3.9747 (3.8537)	Entropy 1.86860 (1.86942)	Top-1 acc 32.812 (33.820)	Top-5 acc 57.031 (57.766)	lr 0.02478
Train [8][370/3239]	Time 0.205 (0.556)	Data Time 0.001 (0.064)	Loss 3.9275 (3.8524)	Entropy 1.86862 (1.86940)	Top-1 acc 31.641 (33.836)	Top-5 acc 54.297 (57.776)	lr 0.02478
Train [8][380/3239]	Time 0.160 (0.553)	Data Time 0.001 (0.062)	Loss 3.9426 (3.8512)	Entropy 1.86861 (1.86938)	Top-1 acc 32.031 (33.852)	Top-5 acc 54.688 (57.816)	lr 0.02478
Train [8][390/3239]	Time 0.162 (0.549)	Data Time 0.001 (0.061)	Loss 3.8082 (3.8523)	Entropy 1.86857 (1.86936)	Top-1 acc 33.594 (33.835)	Top-5 acc 58.203 (57.794)	lr 0.02478
Train [8][400/3239]	Time 0.217 (0.546)	Data Time 0.001 (0.059)	Loss 3.8279 (3.8538)	Entropy 1.86854 (1.86934)	Top-1 acc 36.719 (33.788)	Top-5 acc 55.078 (57.774)	lr 0.02478
Train [8][410/3239]	Time 0.198 (0.543)	Data Time 0.001 (0.058)	Loss 3.9072 (3.8526)	Entropy 1.86853 (1.86932)	Top-1 acc 34.766 (33.809)	Top-5 acc 52.734 (57.786)	lr 0.02478
Train [8][420/3239]	Time 0.308 (0.540)	Data Time 0.001 (0.057)	Loss 3.8986 (3.8533)	Entropy 1.86853 (1.86930)	Top-1 acc 31.250 (33.806)	Top-5 acc 58.594 (57.757)	lr 0.02478
Train [8][430/3239]	Time 0.180 (0.537)	Data Time 0.001 (0.055)	Loss 3.9412 (3.8541)	Entropy 1.86850 (1.86928)	Top-1 acc 32.031 (33.813)	Top-5 acc 50.000 (57.745)	lr 0.02478
Train [8][440/3239]	Time 0.208 (0.534)	Data Time 0.001 (0.054)	Loss 3.8193 (3.8546)	Entropy 1.86842 (1.86927)	Top-1 acc 37.109 (33.816)	Top-5 acc 59.375 (57.739)	lr 0.02478
Train [8][450/3239]	Time 2.261 (0.532)	Data Time 0.001 (0.053)	Loss 3.8681 (3.8548)	Entropy 1.86842 (1.86925)	Top-1 acc 33.203 (33.799)	Top-5 acc 59.766 (57.761)	lr 0.02478
Train [8][460/3239]	Time 0.229 (0.525)	Data Time 0.001 (0.052)	Loss 3.8129 (3.8549)	Entropy 1.86838 (1.86923)	Top-1 acc 37.109 (33.796)	Top-5 acc 61.328 (57.759)	lr 0.02478
Train [8][470/3239]	Time 0.186 (0.523)	Data Time 0.001 (0.051)	Loss 4.0741 (3.8547)	Entropy 1.86832 (1.86921)	Top-1 acc 28.125 (33.806)	Top-5 acc 54.297 (57.769)	lr 0.02478
Train [8][480/3239]	Time 0.190 (0.521)	Data Time 0.001 (0.050)	Loss 3.7018 (3.8563)	Entropy 1.86830 (1.86919)	Top-1 acc 33.594 (33.771)	Top-5 acc 59.766 (57.730)	lr 0.02478
Train [8][490/3239]	Time 0.188 (0.518)	Data Time 0.001 (0.049)	Loss 3.9147 (3.8560)	Entropy 1.86827 (1.86917)	Top-1 acc 36.328 (33.774)	Top-5 acc 55.469 (57.726)	lr 0.02478
Train [8][500/3239]	Time 0.194 (0.516)	Data Time 0.001 (0.048)	Loss 3.9949 (3.8549)	Entropy 1.86821 (1.86915)	Top-1 acc 30.859 (33.789)	Top-5 acc 56.250 (57.743)	lr 0.02478
Train [8][510/3239]	Time 0.219 (0.514)	Data Time 0.001 (0.047)	Loss 3.9088 (3.8553)	Entropy 1.86819 (1.86914)	Top-1 acc 31.250 (33.791)	Top-5 acc 52.344 (57.738)	lr 0.02478
Train [8][520/3239]	Time 0.182 (0.512)	Data Time 0.001 (0.046)	Loss 3.8818 (3.8562)	Entropy 1.86819 (1.86912)	Top-1 acc 36.328 (33.790)	Top-5 acc 55.078 (57.714)	lr 0.02478
Train [8][530/3239]	Time 0.135 (0.510)	Data Time 0.001 (0.046)	Loss 3.7438 (3.8555)	Entropy 1.86813 (1.86910)	Top-1 acc 37.109 (33.802)	Top-5 acc 61.328 (57.723)	lr 0.02478
Train [8][540/3239]	Time 0.287 (0.575)	Data Time 0.004 (0.045)	Loss 3.7208 (3.8555)	Entropy 1.86811 (1.86908)	Top-1 acc 39.453 (33.797)	Top-5 acc 58.203 (57.718)	lr 0.02478
Train [8][550/3239]	Time 0.231 (0.573)	Data Time 0.002 (0.044)	Loss 3.7434 (3.8558)	Entropy 1.86807 (1.86906)	Top-1 acc 33.984 (33.789)	Top-5 acc 60.547 (57.717)	lr 0.02478
Train [8][560/3239]	Time 2.184 (0.570)	Data Time 0.001 (0.043)	Loss 3.9999 (3.8560)	Entropy 1.86807 (1.86905)	Top-1 acc 32.422 (33.798)	Top-5 acc 55.078 (57.703)	lr 0.02478
Train [8][570/3239]	Time 0.238 (0.564)	Data Time 0.001 (0.043)	Loss 4.0819 (3.8569)	Entropy 1.86805 (1.86903)	Top-1 acc 30.859 (33.776)	Top-5 acc 51.953 (57.685)	lr 0.02478
Train [8][580/3239]	Time 0.216 (0.562)	Data Time 0.001 (0.042)	Loss 3.8046 (3.8561)	Entropy 1.86802 (1.86901)	Top-1 acc 34.375 (33.806)	Top-5 acc 58.203 (57.696)	lr 0.02478
Train [8][590/3239]	Time 0.134 (0.559)	Data Time 0.001 (0.041)	Loss 3.9793 (3.8555)	Entropy 1.86801 (1.86899)	Top-1 acc 32.422 (33.836)	Top-5 acc 53.906 (57.721)	lr 0.02478
Train [8][600/3239]	Time 0.348 (0.557)	Data Time 0.003 (0.041)	Loss 3.8369 (3.8553)	Entropy 1.86798 (1.86898)	Top-1 acc 32.031 (33.817)	Top-5 acc 62.500 (57.727)	lr 0.02478
Train [8][610/3239]	Time 0.203 (0.555)	Data Time 0.001 (0.040)	Loss 3.8507 (3.8556)	Entropy 1.86793 (1.86896)	Top-1 acc 33.594 (33.814)	Top-5 acc 57.812 (57.719)	lr 0.02478
Train [8][620/3239]	Time 0.185 (0.552)	Data Time 0.001 (0.039)	Loss 4.0652 (3.8557)	Entropy 1.86785 (1.86894)	Top-1 acc 29.688 (33.826)	Top-5 acc 55.859 (57.711)	lr 0.02478
Train [8][630/3239]	Time 0.176 (0.550)	Data Time 0.001 (0.039)	Loss 4.1200 (3.8555)	Entropy 1.86784 (1.86893)	Top-1 acc 29.297 (33.817)	Top-5 acc 50.781 (57.718)	lr 0.02478
Train [8][640/3239]	Time 0.165 (0.548)	Data Time 0.001 (0.038)	Loss 3.5836 (3.8560)	Entropy 1.86784 (1.86891)	Top-1 acc 39.844 (33.794)	Top-5 acc 64.453 (57.705)	lr 0.02478
Train [8][650/3239]	Time 0.256 (0.546)	Data Time 0.001 (0.038)	Loss 3.9597 (3.8560)	Entropy 1.86775 (1.86889)	Top-1 acc 32.031 (33.804)	Top-5 acc 57.422 (57.716)	lr 0.02478
Train [8][660/3239]	Time 0.153 (0.544)	Data Time 0.001 (0.037)	Loss 3.8991 (3.8559)	Entropy 1.86774 (1.86887)	Top-1 acc 31.250 (33.804)	Top-5 acc 57.812 (57.734)	lr 0.02478
Train [8][670/3239]	Time 2.246 (0.542)	Data Time 0.001 (0.037)	Loss 3.9288 (3.8564)	Entropy 1.86774 (1.86886)	Top-1 acc 32.422 (33.789)	Top-5 acc 54.688 (57.736)	lr 0.02478
Train [8][680/3239]	Time 0.212 (0.537)	Data Time 0.001 (0.036)	Loss 3.9746 (3.8575)	Entropy 1.86773 (1.86884)	Top-1 acc 32.031 (33.769)	Top-5 acc 54.688 (57.705)	lr 0.02478
Train [8][690/3239]	Time 0.194 (0.535)	Data Time 0.001 (0.036)	Loss 3.8323 (3.8579)	Entropy 1.86767 (1.86882)	Top-1 acc 34.766 (33.762)	Top-5 acc 59.375 (57.705)	lr 0.02478
Train [8][700/3239]	Time 0.263 (0.533)	Data Time 0.001 (0.035)	Loss 3.7022 (3.8588)	Entropy 1.86760 (1.86881)	Top-1 acc 39.453 (33.744)	Top-5 acc 57.812 (57.687)	lr 0.02478
Train [8][710/3239]	Time 0.326 (0.532)	Data Time 0.001 (0.035)	Loss 3.8247 (3.8589)	Entropy 1.86754 (1.86879)	Top-1 acc 34.766 (33.739)	Top-5 acc 60.156 (57.687)	lr 0.02478
Train [8][720/3239]	Time 0.188 (0.530)	Data Time 0.002 (0.034)	Loss 4.0044 (3.8591)	Entropy 1.86750 (1.86877)	Top-1 acc 33.984 (33.739)	Top-5 acc 57.422 (57.693)	lr 0.02478
Train [8][730/3239]	Time 0.246 (0.529)	Data Time 0.002 (0.034)	Loss 3.8454 (3.8592)	Entropy 1.86747 (1.86875)	Top-1 acc 36.719 (33.736)	Top-5 acc 60.547 (57.698)	lr 0.02478
Train [8][740/3239]	Time 0.186 (0.527)	Data Time 0.002 (0.033)	Loss 3.7812 (3.8593)	Entropy 1.86740 (1.86874)	Top-1 acc 40.234 (33.744)	Top-5 acc 57.031 (57.689)	lr 0.02478
Train [8][750/3239]	Time 0.197 (0.526)	Data Time 0.001 (0.033)	Loss 3.9446 (3.8593)	Entropy 1.86735 (1.86872)	Top-1 acc 32.812 (33.725)	Top-5 acc 57.031 (57.687)	lr 0.02478
Train [8][760/3239]	Time 0.197 (0.524)	Data Time 0.001 (0.033)	Loss 4.0393 (3.8596)	Entropy 1.86729 (1.86870)	Top-1 acc 26.172 (33.714)	Top-5 acc 51.562 (57.673)	lr 0.02478
Train [8][770/3239]	Time 0.243 (0.523)	Data Time 0.001 (0.032)	Loss 3.9726 (3.8594)	Entropy 1.86722 (1.86868)	Top-1 acc 29.297 (33.708)	Top-5 acc 54.688 (57.672)	lr 0.02478
Train [8][780/3239]	Time 2.264 (0.521)	Data Time 0.001 (0.032)	Loss 3.7608 (3.8593)	Entropy 1.86722 (1.86866)	Top-1 acc 32.031 (33.708)	Top-5 acc 61.719 (57.679)	lr 0.02478
Train [8][790/3239]	Time 0.242 (0.517)	Data Time 0.001 (0.032)	Loss 3.8844 (3.8599)	Entropy 1.86719 (1.86864)	Top-1 acc 33.984 (33.705)	Top-5 acc 54.297 (57.663)	lr 0.02478
Train [8][800/3239]	Time 0.201 (0.516)	Data Time 0.001 (0.031)	Loss 3.9937 (3.8600)	Entropy 1.86716 (1.86863)	Top-1 acc 28.516 (33.694)	Top-5 acc 53.125 (57.657)	lr 0.02478
Train [8][810/3239]	Time 0.158 (0.514)	Data Time 0.001 (0.031)	Loss 3.9844 (3.8604)	Entropy 1.86713 (1.86861)	Top-1 acc 30.078 (33.677)	Top-5 acc 53.516 (57.640)	lr 0.02478
Train [8][820/3239]	Time 0.297 (0.513)	Data Time 0.001 (0.030)	Loss 3.8411 (3.8597)	Entropy 1.86704 (1.86859)	Top-1 acc 34.375 (33.688)	Top-5 acc 55.469 (57.651)	lr 0.02478
Train [8][830/3239]	Time 0.210 (0.512)	Data Time 0.001 (0.030)	Loss 3.9975 (3.8596)	Entropy 1.86699 (1.86857)	Top-1 acc 31.641 (33.685)	Top-5 acc 57.031 (57.657)	lr 0.02478
Train [8][840/3239]	Time 0.227 (0.511)	Data Time 0.002 (0.030)	Loss 3.7070 (3.8588)	Entropy 1.86699 (1.86855)	Top-1 acc 33.984 (33.700)	Top-5 acc 58.984 (57.669)	lr 0.02477
Train [8][850/3239]	Time 0.152 (0.510)	Data Time 0.002 (0.029)	Loss 3.8908 (3.8590)	Entropy 1.86695 (1.86853)	Top-1 acc 35.938 (33.692)	Top-5 acc 56.250 (57.660)	lr 0.02477
Train [8][860/3239]	Time 0.219 (0.509)	Data Time 0.002 (0.029)	Loss 3.6610 (3.8587)	Entropy 1.86692 (1.86851)	Top-1 acc 32.031 (33.695)	Top-5 acc 60.156 (57.669)	lr 0.02477
Train [8][870/3239]	Time 0.186 (0.508)	Data Time 0.001 (0.029)	Loss 4.0660 (3.8585)	Entropy 1.86692 (1.86850)	Top-1 acc 28.906 (33.709)	Top-5 acc 52.734 (57.660)	lr 0.02477
Train [8][880/3239]	Time 0.144 (0.506)	Data Time 0.001 (0.029)	Loss 3.9542 (3.8583)	Entropy 1.86690 (1.86848)	Top-1 acc 28.906 (33.716)	Top-5 acc 52.344 (57.654)	lr 0.02477
Train [8][890/3239]	Time 2.315 (0.506)	Data Time 0.001 (0.028)	Loss 3.7817 (3.8584)	Entropy 1.86690 (1.86846)	Top-1 acc 34.766 (33.720)	Top-5 acc 60.938 (57.652)	lr 0.02477
Train [8][900/3239]	Time 0.261 (0.503)	Data Time 0.002 (0.028)	Loss 3.7605 (3.8584)	Entropy 1.86687 (1.86844)	Top-1 acc 31.641 (33.703)	Top-5 acc 65.234 (57.665)	lr 0.02477
Train [8][910/3239]	Time 0.319 (0.542)	Data Time 0.002 (0.028)	Loss 3.8586 (3.8583)	Entropy 1.86685 (1.86843)	Top-1 acc 35.938 (33.713)	Top-5 acc 58.203 (57.668)	lr 0.02477
Train [8][920/3239]	Time 0.161 (0.540)	Data Time 0.002 (0.028)	Loss 3.9220 (3.8579)	Entropy 1.86681 (1.86841)	Top-1 acc 30.469 (33.717)	Top-5 acc 54.297 (57.680)	lr 0.02477
Train [8][930/3239]	Time 0.201 (0.539)	Data Time 0.002 (0.027)	Loss 3.7538 (3.8577)	Entropy 1.86676 (1.86839)	Top-1 acc 36.328 (33.723)	Top-5 acc 60.547 (57.676)	lr 0.02477
Train [8][940/3239]	Time 0.212 (0.538)	Data Time 0.001 (0.027)	Loss 3.9427 (3.8586)	Entropy 1.86672 (1.86837)	Top-1 acc 30.078 (33.695)	Top-5 acc 53.906 (57.646)	lr 0.02477
Train [8][950/3239]	Time 0.210 (0.536)	Data Time 0.001 (0.027)	Loss 3.8725 (3.8581)	Entropy 1.86670 (1.86836)	Top-1 acc 30.859 (33.698)	Top-5 acc 59.766 (57.660)	lr 0.02477
Train [8][960/3239]	Time 0.239 (0.535)	Data Time 0.001 (0.026)	Loss 3.9416 (3.8582)	Entropy 1.86663 (1.86834)	Top-1 acc 35.156 (33.688)	Top-5 acc 56.250 (57.651)	lr 0.02477
Train [8][970/3239]	Time 0.204 (0.534)	Data Time 0.001 (0.026)	Loss 4.0760 (3.8586)	Entropy 1.86656 (1.86832)	Top-1 acc 27.734 (33.694)	Top-5 acc 54.688 (57.647)	lr 0.02477
Train [8][980/3239]	Time 0.193 (0.532)	Data Time 0.001 (0.026)	Loss 3.7413 (3.8585)	Entropy 1.86650 (1.86830)	Top-1 acc 37.500 (33.704)	Top-5 acc 60.938 (57.650)	lr 0.02477
Train [8][990/3239]	Time 0.193 (0.531)	Data Time 0.001 (0.026)	Loss 3.8867 (3.8585)	Entropy 1.86644 (1.86828)	Top-1 acc 32.812 (33.706)	Top-5 acc 53.906 (57.644)	lr 0.02477
Train [8][1000/3239]	Time 2.208 (0.530)	Data Time 0.001 (0.026)	Loss 3.7955 (3.8583)	Entropy 1.86644 (1.86827)	Top-1 acc 33.203 (33.708)	Top-5 acc 58.203 (57.639)	lr 0.02477
Train [8][1010/3239]	Time 0.206 (0.527)	Data Time 0.001 (0.025)	Loss 3.8936 (3.8587)	Entropy 1.86641 (1.86825)	Top-1 acc 32.422 (33.700)	Top-5 acc 56.250 (57.633)	lr 0.02477
Train [8][1020/3239]	Time 0.214 (0.526)	Data Time 0.002 (0.025)	Loss 3.6714 (3.8582)	Entropy 1.86638 (1.86823)	Top-1 acc 35.938 (33.715)	Top-5 acc 62.500 (57.647)	lr 0.02477
Train [8][1030/3239]	Time 0.205 (0.525)	Data Time 0.001 (0.025)	Loss 3.9543 (3.8583)	Entropy 1.86634 (1.86821)	Top-1 acc 29.688 (33.711)	Top-5 acc 53.906 (57.639)	lr 0.02477
Train [8][1040/3239]	Time 0.219 (0.524)	Data Time 0.002 (0.025)	Loss 3.8532 (3.8581)	Entropy 1.86626 (1.86819)	Top-1 acc 31.641 (33.710)	Top-5 acc 58.203 (57.647)	lr 0.02477
Train [8][1050/3239]	Time 0.218 (0.523)	Data Time 0.001 (0.024)	Loss 3.8551 (3.8587)	Entropy 1.86625 (1.86817)	Top-1 acc 31.250 (33.699)	Top-5 acc 56.250 (57.633)	lr 0.02477
Train [8][1060/3239]	Time 0.248 (0.521)	Data Time 0.001 (0.024)	Loss 3.9857 (3.8589)	Entropy 1.86621 (1.86816)	Top-1 acc 32.031 (33.697)	Top-5 acc 53.906 (57.623)	lr 0.02477
Train [8][1070/3239]	Time 0.178 (0.520)	Data Time 0.001 (0.024)	Loss 3.6625 (3.8583)	Entropy 1.86615 (1.86814)	Top-1 acc 39.453 (33.718)	Top-5 acc 65.625 (57.641)	lr 0.02477
Train [8][1080/3239]	Time 0.227 (0.519)	Data Time 0.001 (0.024)	Loss 3.9731 (3.8584)	Entropy 1.86609 (1.86812)	Top-1 acc 30.078 (33.713)	Top-5 acc 52.734 (57.640)	lr 0.02477
Train [8][1090/3239]	Time 0.231 (0.518)	Data Time 0.001 (0.024)	Loss 3.9887 (3.8584)	Entropy 1.86606 (1.86810)	Top-1 acc 29.297 (33.718)	Top-5 acc 53.125 (57.640)	lr 0.02477
Train [8][1100/3239]	Time 0.334 (0.517)	Data Time 0.001 (0.023)	Loss 3.9260 (3.8580)	Entropy 1.86602 (1.86808)	Top-1 acc 31.641 (33.725)	Top-5 acc 57.422 (57.650)	lr 0.02477
Train [8][1110/3239]	Time 2.236 (0.516)	Data Time 0.001 (0.023)	Loss 3.8799 (3.8578)	Entropy 1.86602 (1.86806)	Top-1 acc 33.594 (33.746)	Top-5 acc 53.516 (57.654)	lr 0.02477
Train [8][1120/3239]	Time 0.156 (0.514)	Data Time 0.001 (0.023)	Loss 3.8547 (3.8582)	Entropy 1.86600 (1.86804)	Top-1 acc 35.156 (33.747)	Top-5 acc 59.766 (57.650)	lr 0.02477
Train [8][1130/3239]	Time 0.192 (0.513)	Data Time 0.001 (0.023)	Loss 3.7789 (3.8580)	Entropy 1.86597 (1.86803)	Top-1 acc 35.156 (33.756)	Top-5 acc 60.547 (57.649)	lr 0.02477
Train [8][1140/3239]	Time 0.232 (0.512)	Data Time 0.001 (0.023)	Loss 3.9063 (3.8585)	Entropy 1.86596 (1.86801)	Top-1 acc 31.641 (33.746)	Top-5 acc 55.469 (57.639)	lr 0.02477
Train [8][1150/3239]	Time 0.156 (0.511)	Data Time 0.001 (0.022)	Loss 3.7638 (3.8580)	Entropy 1.86591 (1.86799)	Top-1 acc 32.812 (33.756)	Top-5 acc 60.938 (57.648)	lr 0.02477
Train [8][1160/3239]	Time 0.237 (0.510)	Data Time 0.001 (0.022)	Loss 3.8671 (3.8579)	Entropy 1.86589 (1.86797)	Top-1 acc 32.812 (33.761)	Top-5 acc 59.375 (57.655)	lr 0.02477
Train [8][1170/3239]	Time 0.208 (0.510)	Data Time 0.001 (0.022)	Loss 4.0221 (3.8576)	Entropy 1.86587 (1.86795)	Top-1 acc 29.688 (33.760)	Top-5 acc 53.906 (57.661)	lr 0.02477
Train [8][1180/3239]	Time 0.231 (0.509)	Data Time 0.002 (0.022)	Loss 4.0514 (3.8576)	Entropy 1.86586 (1.86794)	Top-1 acc 32.031 (33.756)	Top-5 acc 52.344 (57.661)	lr 0.02477
Train [8][1190/3239]	Time 0.273 (0.508)	Data Time 0.001 (0.022)	Loss 3.8197 (3.8572)	Entropy 1.86585 (1.86792)	Top-1 acc 33.203 (33.757)	Top-5 acc 55.859 (57.663)	lr 0.02477
Train [8][1200/3239]	Time 0.222 (0.507)	Data Time 0.001 (0.022)	Loss 3.9637 (3.8573)	Entropy 1.86581 (1.86790)	Top-1 acc 31.641 (33.756)	Top-5 acc 52.734 (57.656)	lr 0.02477
Train [8][1210/3239]	Time 0.195 (0.507)	Data Time 0.001 (0.021)	Loss 4.0233 (3.8580)	Entropy 1.86578 (1.86788)	Top-1 acc 30.078 (33.747)	Top-5 acc 53.125 (57.640)	lr 0.02477
Train [8][1220/3239]	Time 2.159 (0.506)	Data Time 0.001 (0.021)	Loss 3.9442 (3.8584)	Entropy 1.86578 (1.86787)	Top-1 acc 33.203 (33.737)	Top-5 acc 53.516 (57.629)	lr 0.02477
Train [8][1230/3239]	Time 0.203 (0.504)	Data Time 0.001 (0.021)	Loss 3.7943 (3.8580)	Entropy 1.86574 (1.86785)	Top-1 acc 37.109 (33.739)	Top-5 acc 58.594 (57.637)	lr 0.02477
Train [8][1240/3239]	Time 0.204 (0.503)	Data Time 0.001 (0.021)	Loss 3.8483 (3.8582)	Entropy 1.86573 (1.86783)	Top-1 acc 35.156 (33.738)	Top-5 acc 57.812 (57.637)	lr 0.02477
Train [8][1250/3239]	Time 0.160 (0.502)	Data Time 0.001 (0.021)	Loss 4.0554 (3.8585)	Entropy 1.86567 (1.86782)	Top-1 acc 28.906 (33.731)	Top-5 acc 53.906 (57.629)	lr 0.02477
Train [8][1260/3239]	Time 0.213 (0.502)	Data Time 0.001 (0.021)	Loss 3.9167 (3.8584)	Entropy 1.86565 (1.86780)	Top-1 acc 27.344 (33.730)	Top-5 acc 53.906 (57.631)	lr 0.02477
Train [8][1270/3239]	Time 0.214 (0.531)	Data Time 0.002 (0.021)	Loss 3.8800 (3.8587)	Entropy 1.86566 (1.86778)	Top-1 acc 33.203 (33.732)	Top-5 acc 55.469 (57.635)	lr 0.02477
Train [8][1280/3239]	Time 0.304 (0.531)	Data Time 0.002 (0.020)	Loss 4.0376 (3.8590)	Entropy 1.86565 (1.86776)	Top-1 acc 33.203 (33.721)	Top-5 acc 53.906 (57.631)	lr 0.02477
Train [8][1290/3239]	Time 0.154 (0.530)	Data Time 0.001 (0.020)	Loss 3.6288 (3.8590)	Entropy 1.86559 (1.86775)	Top-1 acc 39.844 (33.724)	Top-5 acc 61.328 (57.625)	lr 0.02477
Train [8][1300/3239]	Time 0.218 (0.529)	Data Time 0.001 (0.020)	Loss 4.1150 (3.8591)	Entropy 1.86557 (1.86773)	Top-1 acc 32.031 (33.722)	Top-5 acc 51.953 (57.621)	lr 0.02477
Train [8][1310/3239]	Time 0.237 (0.528)	Data Time 0.001 (0.020)	Loss 3.7007 (3.8590)	Entropy 1.86549 (1.86771)	Top-1 acc 38.281 (33.727)	Top-5 acc 59.375 (57.621)	lr 0.02477
Train [8][1320/3239]	Time 0.228 (0.527)	Data Time 0.001 (0.020)	Loss 3.9377 (3.8590)	Entropy 1.86543 (1.86770)	Top-1 acc 33.203 (33.729)	Top-5 acc 54.688 (57.618)	lr 0.02477
Train [8][1330/3239]	Time 2.235 (0.526)	Data Time 0.001 (0.020)	Loss 3.8365 (3.8592)	Entropy 1.86543 (1.86768)	Top-1 acc 38.281 (33.729)	Top-5 acc 56.641 (57.609)	lr 0.02477
Train [8][1340/3239]	Time 0.258 (0.524)	Data Time 0.002 (0.020)	Loss 3.7770 (3.8592)	Entropy 1.86541 (1.86766)	Top-1 acc 34.375 (33.725)	Top-5 acc 58.203 (57.607)	lr 0.02477
Train [8][1350/3239]	Time 0.192 (0.523)	Data Time 0.002 (0.019)	Loss 4.0288 (3.8590)	Entropy 1.86538 (1.86765)	Top-1 acc 30.469 (33.729)	Top-5 acc 55.469 (57.613)	lr 0.02477
Train [8][1360/3239]	Time 0.221 (0.522)	Data Time 0.001 (0.019)	Loss 3.6602 (3.8591)	Entropy 1.86531 (1.86763)	Top-1 acc 37.891 (33.726)	Top-5 acc 64.062 (57.607)	lr 0.02476
Train [8][1370/3239]	Time 0.305 (0.522)	Data Time 0.001 (0.019)	Loss 3.7263 (3.8589)	Entropy 1.86524 (1.86761)	Top-1 acc 34.766 (33.733)	Top-5 acc 59.375 (57.608)	lr 0.02476
Train [8][1380/3239]	Time 0.230 (0.521)	Data Time 0.001 (0.019)	Loss 3.7447 (3.8588)	Entropy 1.86517 (1.86760)	Top-1 acc 33.594 (33.728)	Top-5 acc 60.156 (57.603)	lr 0.02476
Train [8][1390/3239]	Time 0.154 (0.520)	Data Time 0.001 (0.019)	Loss 3.9225 (3.8584)	Entropy 1.86513 (1.86758)	Top-1 acc 32.031 (33.734)	Top-5 acc 58.594 (57.608)	lr 0.02476
Train [8][1400/3239]	Time 0.194 (0.519)	Data Time 0.001 (0.019)	Loss 3.8246 (3.8578)	Entropy 1.86509 (1.86756)	Top-1 acc 32.031 (33.747)	Top-5 acc 58.984 (57.621)	lr 0.02476
Train [8][1410/3239]	Time 0.158 (0.518)	Data Time 0.001 (0.019)	Loss 3.8914 (3.8579)	Entropy 1.86502 (1.86754)	Top-1 acc 32.031 (33.744)	Top-5 acc 56.641 (57.619)	lr 0.02476
Train [8][1420/3239]	Time 0.266 (0.518)	Data Time 0.001 (0.019)	Loss 3.9604 (3.8574)	Entropy 1.86493 (1.86752)	Top-1 acc 31.250 (33.758)	Top-5 acc 55.078 (57.635)	lr 0.02476
Train [8][1430/3239]	Time 0.207 (0.517)	Data Time 0.001 (0.019)	Loss 4.0227 (3.8577)	Entropy 1.86487 (1.86751)	Top-1 acc 29.688 (33.746)	Top-5 acc 54.297 (57.625)	lr 0.02476
Train [8][1440/3239]	Time 2.230 (0.516)	Data Time 0.001 (0.018)	Loss 3.9632 (3.8577)	Entropy 1.86487 (1.86749)	Top-1 acc 31.641 (33.746)	Top-5 acc 57.031 (57.623)	lr 0.02476
Train [8][1450/3239]	Time 0.209 (0.514)	Data Time 0.001 (0.018)	Loss 3.8991 (3.8577)	Entropy 1.86478 (1.86747)	Top-1 acc 34.766 (33.746)	Top-5 acc 55.469 (57.627)	lr 0.02476
Train [8][1460/3239]	Time 0.215 (0.514)	Data Time 0.002 (0.018)	Loss 4.1719 (3.8579)	Entropy 1.86471 (1.86745)	Top-1 acc 25.391 (33.749)	Top-5 acc 52.734 (57.628)	lr 0.02476
Train [8][1470/3239]	Time 0.168 (0.513)	Data Time 0.001 (0.018)	Loss 4.0359 (3.8578)	Entropy 1.86466 (1.86743)	Top-1 acc 32.812 (33.753)	Top-5 acc 57.031 (57.627)	lr 0.02476
Train [8][1480/3239]	Time 0.327 (0.512)	Data Time 0.001 (0.018)	Loss 3.8254 (3.8578)	Entropy 1.86461 (1.86741)	Top-1 acc 35.547 (33.755)	Top-5 acc 58.203 (57.627)	lr 0.02476
Train [8][1490/3239]	Time 0.239 (0.512)	Data Time 0.001 (0.018)	Loss 3.7247 (3.8575)	Entropy 1.86455 (1.86739)	Top-1 acc 37.891 (33.764)	Top-5 acc 58.594 (57.634)	lr 0.02476
Train [8][1500/3239]	Time 0.235 (0.511)	Data Time 0.001 (0.018)	Loss 3.9041 (3.8574)	Entropy 1.86454 (1.86737)	Top-1 acc 33.594 (33.765)	Top-5 acc 58.984 (57.634)	lr 0.02476
Train [8][1510/3239]	Time 0.199 (0.510)	Data Time 0.001 (0.018)	Loss 3.8540 (3.8574)	Entropy 1.86448 (1.86736)	Top-1 acc 36.328 (33.766)	Top-5 acc 59.375 (57.630)	lr 0.02476
Train [8][1520/3239]	Time 0.213 (0.510)	Data Time 0.001 (0.018)	Loss 3.8894 (3.8572)	Entropy 1.86441 (1.86734)	Top-1 acc 32.031 (33.766)	Top-5 acc 55.859 (57.634)	lr 0.02476
Train [8][1530/3239]	Time 0.234 (0.509)	Data Time 0.001 (0.017)	Loss 3.9400 (3.8569)	Entropy 1.86429 (1.86732)	Top-1 acc 31.250 (33.773)	Top-5 acc 56.641 (57.642)	lr 0.02476
Train [8][1540/3239]	Time 0.239 (0.509)	Data Time 0.001 (0.017)	Loss 3.9784 (3.8566)	Entropy 1.86428 (1.86730)	Top-1 acc 30.469 (33.768)	Top-5 acc 54.688 (57.646)	lr 0.02476
Train [8][1550/3239]	Time 2.273 (0.508)	Data Time 0.001 (0.017)	Loss 3.7915 (3.8569)	Entropy 1.86428 (1.86728)	Top-1 acc 38.672 (33.764)	Top-5 acc 57.422 (57.637)	lr 0.02476
Train [8][1560/3239]	Time 0.232 (0.506)	Data Time 0.001 (0.017)	Loss 3.8153 (3.8572)	Entropy 1.86424 (1.86726)	Top-1 acc 37.109 (33.757)	Top-5 acc 57.031 (57.629)	lr 0.02476
Train [8][1570/3239]	Time 0.175 (0.505)	Data Time 0.001 (0.017)	Loss 3.8268 (3.8570)	Entropy 1.86422 (1.86724)	Top-1 acc 33.984 (33.756)	Top-5 acc 56.641 (57.629)	lr 0.02476
Train [8][1580/3239]	Time 0.213 (0.505)	Data Time 0.001 (0.017)	Loss 3.8267 (3.8568)	Entropy 1.86422 (1.86722)	Top-1 acc 32.031 (33.757)	Top-5 acc 60.938 (57.634)	lr 0.02476
Train [8][1590/3239]	Time 0.267 (0.505)	Data Time 0.001 (0.017)	Loss 3.7351 (3.8567)	Entropy 1.86415 (1.86720)	Top-1 acc 37.500 (33.757)	Top-5 acc 61.719 (57.636)	lr 0.02476
Train [8][1600/3239]	Time 0.190 (0.504)	Data Time 0.001 (0.017)	Loss 3.9034 (3.8568)	Entropy 1.86404 (1.86718)	Top-1 acc 33.203 (33.761)	Top-5 acc 56.250 (57.631)	lr 0.02476
Train [8][1610/3239]	Time 0.211 (0.503)	Data Time 0.001 (0.017)	Loss 3.6359 (3.8562)	Entropy 1.86400 (1.86716)	Top-1 acc 38.672 (33.771)	Top-5 acc 59.766 (57.644)	lr 0.02476
Train [8][1620/3239]	Time 0.213 (0.503)	Data Time 0.001 (0.017)	Loss 3.7385 (3.8563)	Entropy 1.86392 (1.86714)	Top-1 acc 39.453 (33.772)	Top-5 acc 61.328 (57.644)	lr 0.02476
Train [8][1630/3239]	Time 0.393 (0.525)	Data Time 0.003 (0.017)	Loss 3.8336 (3.8560)	Entropy 1.86393 (1.86712)	Top-1 acc 35.938 (33.776)	Top-5 acc 59.766 (57.645)	lr 0.02476
Train [8][1640/3239]	Time 0.181 (0.524)	Data Time 0.002 (0.016)	Loss 3.8404 (3.8559)	Entropy 1.86392 (1.86710)	Top-1 acc 33.594 (33.778)	Top-5 acc 57.031 (57.639)	lr 0.02476
Train [8][1650/3239]	Time 0.205 (0.524)	Data Time 0.002 (0.016)	Loss 3.8142 (3.8558)	Entropy 1.86389 (1.86708)	Top-1 acc 32.422 (33.771)	Top-5 acc 58.984 (57.647)	lr 0.02476
Train [8][1660/3239]	Time 2.161 (0.523)	Data Time 0.001 (0.016)	Loss 3.9403 (3.8559)	Entropy 1.86389 (1.86706)	Top-1 acc 31.250 (33.767)	Top-5 acc 56.250 (57.646)	lr 0.02476
Train [8][1670/3239]	Time 0.260 (0.521)	Data Time 0.002 (0.016)	Loss 3.8985 (3.8561)	Entropy 1.86382 (1.86705)	Top-1 acc 30.859 (33.764)	Top-5 acc 55.469 (57.646)	lr 0.02476
Train [8][1680/3239]	Time 0.229 (0.520)	Data Time 0.001 (0.016)	Loss 3.8714 (3.8564)	Entropy 1.86379 (1.86703)	Top-1 acc 34.766 (33.757)	Top-5 acc 53.125 (57.637)	lr 0.02476
Train [8][1690/3239]	Time 0.228 (0.520)	Data Time 0.002 (0.016)	Loss 3.8706 (3.8561)	Entropy 1.86372 (1.86701)	Top-1 acc 35.156 (33.771)	Top-5 acc 56.641 (57.641)	lr 0.02476
Train [8][1700/3239]	Time 0.193 (0.519)	Data Time 0.001 (0.016)	Loss 3.5483 (3.8559)	Entropy 1.86369 (1.86699)	Top-1 acc 41.406 (33.781)	Top-5 acc 64.844 (57.642)	lr 0.02476
Train [8][1710/3239]	Time 0.184 (0.519)	Data Time 0.002 (0.016)	Loss 3.9559 (3.8557)	Entropy 1.86365 (1.86697)	Top-1 acc 34.375 (33.788)	Top-5 acc 55.469 (57.647)	lr 0.02476
Train [8][1720/3239]	Time 0.149 (0.518)	Data Time 0.001 (0.016)	Loss 3.7789 (3.8556)	Entropy 1.86358 (1.86695)	Top-1 acc 36.719 (33.792)	Top-5 acc 58.594 (57.647)	lr 0.02476
Train [8][1730/3239]	Time 0.184 (0.518)	Data Time 0.001 (0.016)	Loss 3.7347 (3.8553)	Entropy 1.86352 (1.86693)	Top-1 acc 36.328 (33.796)	Top-5 acc 57.031 (57.651)	lr 0.02476
Train [8][1740/3239]	Time 0.224 (0.517)	Data Time 0.001 (0.016)	Loss 3.9086 (3.8552)	Entropy 1.86346 (1.86691)	Top-1 acc 33.203 (33.798)	Top-5 acc 57.422 (57.652)	lr 0.02476
Train [8][1750/3239]	Time 0.199 (0.516)	Data Time 0.002 (0.016)	Loss 3.8265 (3.8551)	Entropy 1.86341 (1.86689)	Top-1 acc 37.500 (33.803)	Top-5 acc 56.250 (57.653)	lr 0.02476
Train [8][1760/3239]	Time 0.359 (0.516)	Data Time 0.001 (0.016)	Loss 3.6888 (3.8549)	Entropy 1.86330 (1.86687)	Top-1 acc 36.328 (33.809)	Top-5 acc 60.547 (57.658)	lr 0.02476
Train [8][1770/3239]	Time 2.236 (0.515)	Data Time 0.001 (0.015)	Loss 3.9431 (3.8550)	Entropy 1.86330 (1.86685)	Top-1 acc 30.078 (33.810)	Top-5 acc 54.297 (57.651)	lr 0.02476
Train [8][1780/3239]	Time 0.251 (0.514)	Data Time 0.001 (0.015)	Loss 3.9068 (3.8547)	Entropy 1.86328 (1.86683)	Top-1 acc 35.938 (33.824)	Top-5 acc 58.984 (57.656)	lr 0.02476
Train [8][1790/3239]	Time 0.204 (0.513)	Data Time 0.001 (0.015)	Loss 3.8374 (3.8547)	Entropy 1.86319 (1.86681)	Top-1 acc 35.156 (33.821)	Top-5 acc 59.375 (57.653)	lr 0.02476
Train [8][1800/3239]	Time 0.211 (0.513)	Data Time 0.001 (0.015)	Loss 3.8314 (3.8548)	Entropy 1.86314 (1.86679)	Top-1 acc 37.109 (33.826)	Top-5 acc 60.156 (57.646)	lr 0.02476
Train [8][1810/3239]	Time 0.187 (0.512)	Data Time 0.001 (0.015)	Loss 3.7709 (3.8548)	Entropy 1.86309 (1.86677)	Top-1 acc 36.328 (33.828)	Top-5 acc 56.250 (57.643)	lr 0.02476
Train [8][1820/3239]	Time 0.197 (0.511)	Data Time 0.001 (0.015)	Loss 3.7711 (3.8546)	Entropy 1.86301 (1.86675)	Top-1 acc 35.938 (33.831)	Top-5 acc 60.156 (57.649)	lr 0.02476
Train [8][1830/3239]	Time 0.212 (0.511)	Data Time 0.001 (0.015)	Loss 3.9742 (3.8548)	Entropy 1.86299 (1.86673)	Top-1 acc 30.469 (33.822)	Top-5 acc 55.859 (57.644)	lr 0.02476
Train [8][1840/3239]	Time 0.213 (0.510)	Data Time 0.001 (0.015)	Loss 3.7764 (3.8545)	Entropy 1.86297 (1.86671)	Top-1 acc 34.375 (33.827)	Top-5 acc 58.984 (57.650)	lr 0.02476
Train [8][1850/3239]	Time 0.266 (0.510)	Data Time 0.001 (0.015)	Loss 3.6814 (3.8546)	Entropy 1.86297 (1.86669)	Top-1 acc 33.984 (33.825)	Top-5 acc 62.500 (57.646)	lr 0.02476
Train [8][1860/3239]	Time 0.198 (0.510)	Data Time 0.001 (0.015)	Loss 3.7348 (3.8546)	Entropy 1.86287 (1.86667)	Top-1 acc 40.625 (33.830)	Top-5 acc 60.156 (57.644)	lr 0.02476
Train [8][1870/3239]	Time 0.205 (0.509)	Data Time 0.001 (0.015)	Loss 4.1360 (3.8548)	Entropy 1.86284 (1.86665)	Top-1 acc 31.250 (33.830)	Top-5 acc 53.125 (57.639)	lr 0.02475
Train [8][1880/3239]	Time 2.304 (0.509)	Data Time 0.001 (0.015)	Loss 3.8608 (3.8550)	Entropy 1.86284 (1.86663)	Top-1 acc 32.422 (33.829)	Top-5 acc 58.984 (57.635)	lr 0.02475
Train [8][1890/3239]	Time 0.201 (0.507)	Data Time 0.001 (0.015)	Loss 3.8696 (3.8548)	Entropy 1.86282 (1.86661)	Top-1 acc 33.984 (33.829)	Top-5 acc 57.422 (57.635)	lr 0.02475
Train [8][1900/3239]	Time 0.205 (0.507)	Data Time 0.001 (0.015)	Loss 3.7268 (3.8550)	Entropy 1.86279 (1.86659)	Top-1 acc 33.594 (33.824)	Top-5 acc 60.938 (57.635)	lr 0.02475
Train [8][1910/3239]	Time 0.212 (0.506)	Data Time 0.002 (0.014)	Loss 3.8017 (3.8551)	Entropy 1.86279 (1.86657)	Top-1 acc 35.938 (33.816)	Top-5 acc 58.984 (57.633)	lr 0.02475
Train [8][1920/3239]	Time 0.207 (0.506)	Data Time 0.002 (0.014)	Loss 3.7794 (3.8549)	Entropy 1.86273 (1.86655)	Top-1 acc 38.672 (33.825)	Top-5 acc 58.984 (57.633)	lr 0.02475
Train [8][1930/3239]	Time 0.282 (0.505)	Data Time 0.001 (0.014)	Loss 3.8877 (3.8553)	Entropy 1.86266 (1.86653)	Top-1 acc 32.031 (33.819)	Top-5 acc 55.859 (57.628)	lr 0.02475
Train [8][1940/3239]	Time 0.285 (0.505)	Data Time 0.001 (0.014)	Loss 3.7809 (3.8554)	Entropy 1.86263 (1.86651)	Top-1 acc 35.547 (33.818)	Top-5 acc 58.984 (57.618)	lr 0.02475
Train [8][1950/3239]	Time 0.211 (0.504)	Data Time 0.001 (0.014)	Loss 3.9664 (3.8555)	Entropy 1.86258 (1.86649)	Top-1 acc 26.172 (33.816)	Top-5 acc 56.641 (57.616)	lr 0.02475
Train [8][1960/3239]	Time 0.142 (0.504)	Data Time 0.001 (0.014)	Loss 3.7889 (3.8551)	Entropy 1.86256 (1.86647)	Top-1 acc 37.500 (33.824)	Top-5 acc 57.031 (57.620)	lr 0.02475
Train [8][1970/3239]	Time 0.170 (0.503)	Data Time 0.001 (0.014)	Loss 3.7886 (3.8548)	Entropy 1.86253 (1.86645)	Top-1 acc 35.938 (33.825)	Top-5 acc 58.594 (57.627)	lr 0.02475
Train [8][1980/3239]	Time 0.187 (0.503)	Data Time 0.001 (0.014)	Loss 3.7661 (3.8548)	Entropy 1.86247 (1.86643)	Top-1 acc 36.719 (33.824)	Top-5 acc 60.156 (57.626)	lr 0.02475
Train [8][1990/3239]	Time 37.548 (0.520)	Data Time 0.001 (0.014)	Loss 3.8328 (3.8549)	Entropy 1.86247 (1.86641)	Top-1 acc 33.984 (33.826)	Top-5 acc 57.422 (57.621)	lr 0.02475
Train [8][2000/3239]	Time 0.219 (0.519)	Data Time 0.002 (0.014)	Loss 3.6914 (3.8550)	Entropy 1.86246 (1.86639)	Top-1 acc 37.500 (33.826)	Top-5 acc 59.766 (57.620)	lr 0.02475
Train [8][2010/3239]	Time 0.197 (0.519)	Data Time 0.002 (0.014)	Loss 4.0600 (3.8549)	Entropy 1.86242 (1.86637)	Top-1 acc 28.906 (33.826)	Top-5 acc 54.297 (57.624)	lr 0.02475
Train [8][2020/3239]	Time 0.218 (0.518)	Data Time 0.002 (0.014)	Loss 3.8255 (3.8548)	Entropy 1.86240 (1.86635)	Top-1 acc 33.594 (33.827)	Top-5 acc 57.812 (57.629)	lr 0.02475
Train [8][2030/3239]	Time 0.300 (0.518)	Data Time 0.001 (0.014)	Loss 3.9249 (3.8549)	Entropy 1.86237 (1.86633)	Top-1 acc 34.766 (33.828)	Top-5 acc 57.812 (57.626)	lr 0.02475
Train [8][2040/3239]	Time 0.329 (0.517)	Data Time 0.001 (0.014)	Loss 3.8393 (3.8547)	Entropy 1.86238 (1.86631)	Top-1 acc 35.547 (33.833)	Top-5 acc 61.328 (57.632)	lr 0.02475
Train [8][2050/3239]	Time 0.212 (0.517)	Data Time 0.001 (0.014)	Loss 3.8286 (3.8544)	Entropy 1.86233 (1.86629)	Top-1 acc 33.984 (33.838)	Top-5 acc 59.766 (57.634)	lr 0.02475
Train [8][2060/3239]	Time 0.230 (0.516)	Data Time 0.001 (0.014)	Loss 3.9631 (3.8543)	Entropy 1.86230 (1.86627)	Top-1 acc 33.203 (33.847)	Top-5 acc 53.906 (57.635)	lr 0.02475
Train [8][2070/3239]	Time 0.145 (0.516)	Data Time 0.002 (0.013)	Loss 3.8135 (3.8540)	Entropy 1.86226 (1.86625)	Top-1 acc 32.812 (33.851)	Top-5 acc 57.422 (57.639)	lr 0.02475
Train [8][2080/3239]	Time 0.180 (0.515)	Data Time 0.002 (0.013)	Loss 3.8701 (3.8539)	Entropy 1.86222 (1.86623)	Top-1 acc 34.766 (33.856)	Top-5 acc 53.516 (57.640)	lr 0.02475
Train [8][2090/3239]	Time 0.225 (0.515)	Data Time 0.001 (0.013)	Loss 3.7933 (3.8540)	Entropy 1.86219 (1.86621)	Top-1 acc 32.812 (33.850)	Top-5 acc 55.469 (57.639)	lr 0.02475
Train [8][2100/3239]	Time 2.214 (0.514)	Data Time 0.001 (0.013)	Loss 3.7409 (3.8537)	Entropy 1.86219 (1.86619)	Top-1 acc 36.719 (33.855)	Top-5 acc 61.328 (57.642)	lr 0.02475
Train [8][2110/3239]	Time 0.182 (0.513)	Data Time 0.002 (0.013)	Loss 3.9174 (3.8540)	Entropy 1.86211 (1.86617)	Top-1 acc 30.078 (33.849)	Top-5 acc 56.641 (57.636)	lr 0.02475
Train [8][2120/3239]	Time 0.233 (0.512)	Data Time 0.001 (0.013)	Loss 3.9120 (3.8541)	Entropy 1.86200 (1.86615)	Top-1 acc 29.688 (33.844)	Top-5 acc 56.250 (57.629)	lr 0.02475
Train [8][2130/3239]	Time 0.198 (0.512)	Data Time 0.001 (0.013)	Loss 3.7828 (3.8541)	Entropy 1.86194 (1.86613)	Top-1 acc 35.547 (33.843)	Top-5 acc 57.812 (57.626)	lr 0.02475
Train [8][2140/3239]	Time 0.226 (0.512)	Data Time 0.001 (0.013)	Loss 3.7316 (3.8540)	Entropy 1.86189 (1.86612)	Top-1 acc 38.281 (33.847)	Top-5 acc 62.500 (57.633)	lr 0.02475
Train [8][2150/3239]	Time 0.326 (0.511)	Data Time 0.001 (0.013)	Loss 3.7093 (3.8538)	Entropy 1.86183 (1.86610)	Top-1 acc 34.375 (33.847)	Top-5 acc 60.938 (57.637)	lr 0.02475
Train [8][2160/3239]	Time 0.202 (0.511)	Data Time 0.001 (0.013)	Loss 3.7443 (3.8538)	Entropy 1.86178 (1.86608)	Top-1 acc 38.672 (33.848)	Top-5 acc 57.031 (57.636)	lr 0.02475
Train [8][2170/3239]	Time 0.207 (0.510)	Data Time 0.001 (0.013)	Loss 3.8015 (3.8540)	Entropy 1.86176 (1.86606)	Top-1 acc 36.328 (33.847)	Top-5 acc 58.594 (57.632)	lr 0.02475
Train [8][2180/3239]	Time 0.212 (0.510)	Data Time 0.001 (0.013)	Loss 3.8401 (3.8538)	Entropy 1.86172 (1.86604)	Top-1 acc 33.203 (33.847)	Top-5 acc 58.594 (57.635)	lr 0.02475
Train [8][2190/3239]	Time 0.176 (0.510)	Data Time 0.001 (0.013)	Loss 3.9648 (3.8536)	Entropy 1.86166 (1.86602)	Top-1 acc 27.734 (33.848)	Top-5 acc 54.297 (57.642)	lr 0.02475
Train [8][2200/3239]	Time 0.204 (0.509)	Data Time 0.001 (0.013)	Loss 3.7979 (3.8538)	Entropy 1.86163 (1.86600)	Top-1 acc 35.547 (33.846)	Top-5 acc 60.547 (57.641)	lr 0.02475
Train [8][2210/3239]	Time 2.296 (0.509)	Data Time 0.001 (0.013)	Loss 3.9778 (3.8538)	Entropy 1.86163 (1.86598)	Top-1 acc 32.812 (33.845)	Top-5 acc 53.125 (57.640)	lr 0.02475
Train [8][2220/3239]	Time 0.190 (0.507)	Data Time 0.001 (0.013)	Loss 3.7774 (3.8537)	Entropy 1.86162 (1.86596)	Top-1 acc 31.641 (33.844)	Top-5 acc 62.500 (57.644)	lr 0.02475
Train [8][2230/3239]	Time 0.197 (0.507)	Data Time 0.002 (0.013)	Loss 3.8669 (3.8535)	Entropy 1.86160 (1.86594)	Top-1 acc 32.812 (33.851)	Top-5 acc 55.078 (57.648)	lr 0.02475
Train [8][2240/3239]	Time 0.247 (0.506)	Data Time 0.001 (0.013)	Loss 3.9733 (3.8536)	Entropy 1.86155 (1.86592)	Top-1 acc 31.250 (33.853)	Top-5 acc 57.422 (57.647)	lr 0.02475
Train [8][2250/3239]	Time 0.220 (0.506)	Data Time 0.001 (0.013)	Loss 3.7625 (3.8535)	Entropy 1.86151 (1.86590)	Top-1 acc 32.812 (33.854)	Top-5 acc 58.984 (57.651)	lr 0.02475
Train [8][2260/3239]	Time 0.228 (0.506)	Data Time 0.001 (0.013)	Loss 3.7720 (3.8535)	Entropy 1.86141 (1.86588)	Top-1 acc 30.859 (33.853)	Top-5 acc 55.078 (57.653)	lr 0.02475
Train [8][2270/3239]	Time 0.242 (0.505)	Data Time 0.001 (0.013)	Loss 3.9430 (3.8536)	Entropy 1.86134 (1.86586)	Top-1 acc 35.938 (33.852)	Top-5 acc 55.078 (57.656)	lr 0.02475
Train [8][2280/3239]	Time 0.232 (0.505)	Data Time 0.001 (0.012)	Loss 3.8463 (3.8536)	Entropy 1.86128 (1.86584)	Top-1 acc 33.594 (33.856)	Top-5 acc 56.641 (57.656)	lr 0.02475
Train [8][2290/3239]	Time 0.225 (0.504)	Data Time 0.001 (0.012)	Loss 3.8983 (3.8533)	Entropy 1.86122 (1.86582)	Top-1 acc 38.672 (33.863)	Top-5 acc 55.078 (57.662)	lr 0.02475
Train [8][2300/3239]	Time 0.195 (0.504)	Data Time 0.001 (0.012)	Loss 4.0131 (3.8534)	Entropy 1.86120 (1.86580)	Top-1 acc 32.812 (33.861)	Top-5 acc 52.734 (57.657)	lr 0.02475
Train [8][2310/3239]	Time 0.225 (0.504)	Data Time 0.001 (0.012)	Loss 3.9024 (3.8533)	Entropy 1.86117 (1.86578)	Top-1 acc 32.031 (33.864)	Top-5 acc 54.297 (57.656)	lr 0.02475
Train [8][2320/3239]	Time 2.223 (0.503)	Data Time 0.001 (0.012)	Loss 3.8640 (3.8532)	Entropy 1.86117 (1.86576)	Top-1 acc 31.641 (33.866)	Top-5 acc 60.156 (57.660)	lr 0.02475
Train [8][2330/3239]	Time 0.167 (0.502)	Data Time 0.001 (0.012)	Loss 3.9497 (3.8531)	Entropy 1.86112 (1.86574)	Top-1 acc 35.156 (33.872)	Top-5 acc 59.766 (57.668)	lr 0.02475
Train [8][2340/3239]	Time 0.346 (0.502)	Data Time 0.001 (0.012)	Loss 3.7564 (3.8530)	Entropy 1.86112 (1.86572)	Top-1 acc 36.719 (33.874)	Top-5 acc 59.766 (57.676)	lr 0.02475
Train [8][2350/3239]	Time 0.204 (0.502)	Data Time 0.001 (0.012)	Loss 3.7135 (3.8530)	Entropy 1.86107 (1.86570)	Top-1 acc 33.594 (33.877)	Top-5 acc 61.328 (57.678)	lr 0.02475
Train [8][2360/3239]	Time 0.277 (0.515)	Data Time 0.005 (0.012)	Loss 3.6152 (3.8528)	Entropy 1.86102 (1.86568)	Top-1 acc 37.500 (33.886)	Top-5 acc 61.328 (57.680)	lr 0.02475
Train [8][2370/3239]	Time 0.194 (0.515)	Data Time 0.002 (0.012)	Loss 3.7890 (3.8531)	Entropy 1.86096 (1.86566)	Top-1 acc 34.375 (33.883)	Top-5 acc 59.766 (57.674)	lr 0.02474
Train [8][2380/3239]	Time 0.160 (0.515)	Data Time 0.001 (0.012)	Loss 3.8314 (3.8532)	Entropy 1.86093 (1.86564)	Top-1 acc 33.594 (33.882)	Top-5 acc 56.250 (57.674)	lr 0.02474
Train [8][2390/3239]	Time 0.200 (0.514)	Data Time 0.002 (0.012)	Loss 3.8823 (3.8532)	Entropy 1.86090 (1.86562)	Top-1 acc 37.500 (33.885)	Top-5 acc 57.031 (57.677)	lr 0.02474
Train [8][2400/3239]	Time 0.224 (0.514)	Data Time 0.001 (0.012)	Loss 3.7713 (3.8530)	Entropy 1.86086 (1.86560)	Top-1 acc 36.328 (33.891)	Top-5 acc 57.031 (57.680)	lr 0.02474
Train [8][2410/3239]	Time 0.198 (0.514)	Data Time 0.001 (0.012)	Loss 3.7314 (3.8526)	Entropy 1.86082 (1.86558)	Top-1 acc 36.719 (33.900)	Top-5 acc 58.594 (57.684)	lr 0.02474
Train [8][2420/3239]	Time 0.147 (0.513)	Data Time 0.001 (0.012)	Loss 3.8681 (3.8524)	Entropy 1.86075 (1.86556)	Top-1 acc 34.766 (33.902)	Top-5 acc 57.812 (57.685)	lr 0.02474
Train [8][2430/3239]	Time 2.152 (0.513)	Data Time 0.002 (0.012)	Loss 3.6838 (3.8527)	Entropy 1.86075 (1.86554)	Top-1 acc 32.422 (33.895)	Top-5 acc 61.719 (57.680)	lr 0.02474
Train [8][2440/3239]	Time 0.293 (0.511)	Data Time 0.001 (0.012)	Loss 3.7563 (3.8523)	Entropy 1.86074 (1.86552)	Top-1 acc 38.281 (33.905)	Top-5 acc 57.422 (57.687)	lr 0.02474
Train [8][2450/3239]	Time 0.285 (0.511)	Data Time 0.001 (0.012)	Loss 3.9485 (3.8522)	Entropy 1.86071 (1.86550)	Top-1 acc 31.250 (33.913)	Top-5 acc 52.344 (57.691)	lr 0.02474
Train [8][2460/3239]	Time 0.209 (0.511)	Data Time 0.001 (0.012)	Loss 3.7745 (3.8521)	Entropy 1.86070 (1.86548)	Top-1 acc 31.641 (33.912)	Top-5 acc 58.594 (57.694)	lr 0.02474
Train [8][2470/3239]	Time 0.247 (0.510)	Data Time 0.001 (0.012)	Loss 3.8399 (3.8520)	Entropy 1.86069 (1.86546)	Top-1 acc 35.547 (33.912)	Top-5 acc 59.375 (57.694)	lr 0.02474
Train [8][2480/3239]	Time 0.208 (0.510)	Data Time 0.001 (0.012)	Loss 3.7989 (3.8521)	Entropy 1.86066 (1.86544)	Top-1 acc 35.938 (33.908)	Top-5 acc 58.984 (57.694)	lr 0.02474
Train [8][2490/3239]	Time 0.190 (0.510)	Data Time 0.001 (0.012)	Loss 3.6554 (3.8517)	Entropy 1.86062 (1.86542)	Top-1 acc 37.109 (33.913)	Top-5 acc 63.672 (57.705)	lr 0.02474
Train [8][2500/3239]	Time 0.234 (0.509)	Data Time 0.001 (0.012)	Loss 3.6311 (3.8516)	Entropy 1.86055 (1.86541)	Top-1 acc 38.672 (33.914)	Top-5 acc 61.328 (57.713)	lr 0.02474
Train [8][2510/3239]	Time 0.198 (0.509)	Data Time 0.001 (0.012)	Loss 3.7907 (3.8512)	Entropy 1.86050 (1.86539)	Top-1 acc 35.156 (33.919)	Top-5 acc 58.594 (57.722)	lr 0.02474
Train [8][2520/3239]	Time 0.217 (0.509)	Data Time 0.002 (0.012)	Loss 3.7822 (3.8513)	Entropy 1.86045 (1.86537)	Top-1 acc 35.938 (33.918)	Top-5 acc 59.375 (57.721)	lr 0.02474
Train [8][2530/3239]	Time 0.244 (0.508)	Data Time 0.001 (0.012)	Loss 3.8567 (3.8514)	Entropy 1.86041 (1.86535)	Top-1 acc 35.156 (33.915)	Top-5 acc 58.594 (57.720)	lr 0.02474
Train [8][2540/3239]	Time 2.250 (0.508)	Data Time 0.001 (0.011)	Loss 4.0198 (3.8515)	Entropy 1.86041 (1.86533)	Top-1 acc 30.078 (33.912)	Top-5 acc 54.297 (57.717)	lr 0.02474
Train [8][2550/3239]	Time 0.198 (0.507)	Data Time 0.001 (0.011)	Loss 3.7303 (3.8514)	Entropy 1.86037 (1.86531)	Top-1 acc 35.547 (33.917)	Top-5 acc 62.500 (57.720)	lr 0.02474
Train [8][2560/3239]	Time 0.162 (0.506)	Data Time 0.001 (0.011)	Loss 3.9805 (3.8509)	Entropy 1.86032 (1.86529)	Top-1 acc 30.859 (33.925)	Top-5 acc 51.562 (57.730)	lr 0.02474
Train [8][2570/3239]	Time 0.196 (0.506)	Data Time 0.001 (0.011)	Loss 3.8163 (3.8510)	Entropy 1.86025 (1.86527)	Top-1 acc 35.156 (33.921)	Top-5 acc 57.812 (57.728)	lr 0.02474
Train [8][2580/3239]	Time 0.191 (0.506)	Data Time 0.001 (0.011)	Loss 4.0555 (3.8510)	Entropy 1.86018 (1.86525)	Top-1 acc 30.469 (33.916)	Top-5 acc 53.906 (57.730)	lr 0.02474
Train [8][2590/3239]	Time 0.211 (0.505)	Data Time 0.001 (0.011)	Loss 3.8502 (3.8512)	Entropy 1.86016 (1.86523)	Top-1 acc 33.984 (33.911)	Top-5 acc 58.984 (57.730)	lr 0.02474
Train [8][2600/3239]	Time 0.205 (0.505)	Data Time 0.001 (0.011)	Loss 3.7994 (3.8510)	Entropy 1.86008 (1.86521)	Top-1 acc 32.422 (33.909)	Top-5 acc 57.422 (57.732)	lr 0.02474
Train [8][2610/3239]	Time 0.229 (0.505)	Data Time 0.002 (0.011)	Loss 4.0101 (3.8510)	Entropy 1.86002 (1.86519)	Top-1 acc 32.422 (33.909)	Top-5 acc 57.031 (57.734)	lr 0.02474
Train [8][2620/3239]	Time 0.194 (0.504)	Data Time 0.001 (0.011)	Loss 3.6695 (3.8509)	Entropy 1.86000 (1.86517)	Top-1 acc 39.062 (33.913)	Top-5 acc 63.281 (57.734)	lr 0.02474
Train [8][2630/3239]	Time 0.243 (0.504)	Data Time 0.001 (0.011)	Loss 3.7900 (3.8508)	Entropy 1.86000 (1.86515)	Top-1 acc 35.938 (33.914)	Top-5 acc 61.328 (57.737)	lr 0.02474
Train [8][2640/3239]	Time 0.228 (0.504)	Data Time 0.003 (0.011)	Loss 3.8469 (3.8506)	Entropy 1.85997 (1.86513)	Top-1 acc 34.766 (33.916)	Top-5 acc 56.641 (57.739)	lr 0.02474
Train [8][2650/3239]	Time 0.327 (0.503)	Data Time 0.001 (0.011)	Loss 3.7563 (3.8506)	Entropy 1.85997 (1.86511)	Top-1 acc 34.375 (33.918)	Top-5 acc 59.766 (57.740)	lr 0.02474
Train [8][2660/3239]	Time 0.205 (0.503)	Data Time 0.001 (0.011)	Loss 3.9103 (3.8508)	Entropy 1.85991 (1.86509)	Top-1 acc 33.984 (33.914)	Top-5 acc 55.469 (57.737)	lr 0.02474
Train [8][2670/3239]	Time 0.245 (0.503)	Data Time 0.001 (0.011)	Loss 3.7530 (3.8509)	Entropy 1.85989 (1.86507)	Top-1 acc 33.594 (33.912)	Top-5 acc 56.250 (57.734)	lr 0.02474
Train [8][2680/3239]	Time 0.224 (0.502)	Data Time 0.001 (0.011)	Loss 4.0620 (3.8510)	Entropy 1.85986 (1.86505)	Top-1 acc 33.203 (33.912)	Top-5 acc 55.859 (57.733)	lr 0.02474
Train [8][2690/3239]	Time 0.228 (0.502)	Data Time 0.001 (0.011)	Loss 3.8308 (3.8510)	Entropy 1.85985 (1.86504)	Top-1 acc 38.281 (33.923)	Top-5 acc 61.328 (57.736)	lr 0.02474
Train [8][2700/3239]	Time 0.220 (0.502)	Data Time 0.001 (0.011)	Loss 3.4635 (3.8505)	Entropy 1.85972 (1.86502)	Top-1 acc 42.188 (33.930)	Top-5 acc 67.578 (57.747)	lr 0.02474
Train [8][2710/3239]	Time 0.266 (0.514)	Data Time 0.004 (0.011)	Loss 3.7683 (3.8506)	Entropy 1.85970 (1.86500)	Top-1 acc 40.625 (33.935)	Top-5 acc 59.375 (57.747)	lr 0.02474
Train [8][2720/3239]	Time 0.191 (0.514)	Data Time 0.002 (0.011)	Loss 3.8710 (3.8506)	Entropy 1.85969 (1.86498)	Top-1 acc 30.078 (33.936)	Top-5 acc 58.594 (57.749)	lr 0.02474
Train [8][2730/3239]	Time 0.273 (0.513)	Data Time 0.005 (0.011)	Loss 3.7376 (3.8505)	Entropy 1.85960 (1.86496)	Top-1 acc 33.203 (33.937)	Top-5 acc 60.547 (57.750)	lr 0.02474
Train [8][2740/3239]	Time 0.183 (0.513)	Data Time 0.002 (0.011)	Loss 3.7000 (3.8503)	Entropy 1.85957 (1.86494)	Top-1 acc 39.453 (33.941)	Top-5 acc 60.938 (57.755)	lr 0.02474
Train [8][2750/3239]	Time 0.212 (0.513)	Data Time 0.001 (0.011)	Loss 3.7949 (3.8505)	Entropy 1.85953 (1.86492)	Top-1 acc 35.547 (33.941)	Top-5 acc 61.719 (57.750)	lr 0.02474
Train [8][2760/3239]	Time 0.203 (0.512)	Data Time 0.001 (0.011)	Loss 4.0815 (3.8506)	Entropy 1.85948 (1.86490)	Top-1 acc 30.078 (33.937)	Top-5 acc 53.125 (57.750)	lr 0.02474
Train [8][2770/3239]	Time 0.263 (0.512)	Data Time 0.001 (0.011)	Loss 3.7772 (3.8505)	Entropy 1.85945 (1.86488)	Top-1 acc 32.031 (33.940)	Top-5 acc 59.375 (57.754)	lr 0.02474
Train [8][2780/3239]	Time 0.192 (0.512)	Data Time 0.001 (0.011)	Loss 3.7138 (3.8504)	Entropy 1.85934 (1.86486)	Top-1 acc 35.938 (33.940)	Top-5 acc 62.891 (57.756)	lr 0.02474
Train [8][2790/3239]	Time 0.219 (0.511)	Data Time 0.001 (0.011)	Loss 3.5581 (3.8501)	Entropy 1.85933 (1.86484)	Top-1 acc 39.453 (33.945)	Top-5 acc 66.016 (57.765)	lr 0.02474
Train [8][2800/3239]	Time 0.203 (0.511)	Data Time 0.001 (0.011)	Loss 3.7659 (3.8501)	Entropy 1.85932 (1.86482)	Top-1 acc 32.812 (33.944)	Top-5 acc 59.766 (57.764)	lr 0.02474
Train [8][2810/3239]	Time 0.178 (0.511)	Data Time 0.001 (0.011)	Loss 3.6324 (3.8500)	Entropy 1.85930 (1.86480)	Top-1 acc 39.844 (33.949)	Top-5 acc 60.547 (57.766)	lr 0.02474
Train [8][2820/3239]	Time 0.188 (0.510)	Data Time 0.001 (0.011)	Loss 3.8537 (3.8498)	Entropy 1.85925 (1.86478)	Top-1 acc 32.422 (33.956)	Top-5 acc 58.203 (57.770)	lr 0.02474
Train [8][2830/3239]	Time 0.239 (0.510)	Data Time 0.001 (0.011)	Loss 3.9145 (3.8497)	Entropy 1.85917 (1.86476)	Top-1 acc 33.594 (33.959)	Top-5 acc 52.734 (57.769)	lr 0.02474
Train [8][2840/3239]	Time 0.256 (0.510)	Data Time 0.001 (0.011)	Loss 3.6407 (3.8497)	Entropy 1.85909 (1.86474)	Top-1 acc 37.109 (33.957)	Top-5 acc 62.500 (57.772)	lr 0.02474
Train [8][2850/3239]	Time 0.321 (0.510)	Data Time 0.001 (0.010)	Loss 3.8439 (3.8497)	Entropy 1.85909 (1.86472)	Top-1 acc 34.766 (33.958)	Top-5 acc 57.422 (57.771)	lr 0.02473
Train [8][2860/3239]	Time 0.191 (0.509)	Data Time 0.001 (0.010)	Loss 3.8458 (3.8495)	Entropy 1.85905 (1.86470)	Top-1 acc 33.984 (33.963)	Top-5 acc 58.594 (57.773)	lr 0.02473
Train [8][2870/3239]	Time 0.257 (0.509)	Data Time 0.001 (0.010)	Loss 3.9344 (3.8496)	Entropy 1.85905 (1.86468)	Top-1 acc 33.594 (33.964)	Top-5 acc 52.734 (57.769)	lr 0.02473
Train [8][2880/3239]	Time 0.180 (0.509)	Data Time 0.001 (0.010)	Loss 4.0037 (3.8496)	Entropy 1.85898 (1.86466)	Top-1 acc 31.250 (33.969)	Top-5 acc 55.469 (57.770)	lr 0.02473
Train [8][2890/3239]	Time 0.211 (0.508)	Data Time 0.001 (0.010)	Loss 3.6611 (3.8495)	Entropy 1.85895 (1.86464)	Top-1 acc 34.766 (33.968)	Top-5 acc 59.766 (57.767)	lr 0.02473
Train [8][2900/3239]	Time 0.244 (0.508)	Data Time 0.001 (0.010)	Loss 3.8144 (3.8496)	Entropy 1.85889 (1.86462)	Top-1 acc 34.766 (33.965)	Top-5 acc 58.594 (57.764)	lr 0.02473
Train [8][2910/3239]	Time 0.191 (0.508)	Data Time 0.001 (0.010)	Loss 3.8805 (3.8494)	Entropy 1.85879 (1.86460)	Top-1 acc 30.859 (33.968)	Top-5 acc 60.547 (57.772)	lr 0.02473
Train [8][2920/3239]	Time 0.216 (0.507)	Data Time 0.001 (0.010)	Loss 3.7604 (3.8494)	Entropy 1.85878 (1.86458)	Top-1 acc 33.984 (33.968)	Top-5 acc 60.547 (57.770)	lr 0.02473
Train [8][2930/3239]	Time 0.201 (0.507)	Data Time 0.002 (0.010)	Loss 3.7027 (3.8494)	Entropy 1.85876 (1.86456)	Top-1 acc 34.375 (33.970)	Top-5 acc 61.328 (57.769)	lr 0.02473
Train [8][2940/3239]	Time 0.204 (0.507)	Data Time 0.001 (0.010)	Loss 3.8620 (3.8492)	Entropy 1.85870 (1.86454)	Top-1 acc 31.641 (33.974)	Top-5 acc 55.078 (57.774)	lr 0.02473
Train [8][2950/3239]	Time 0.273 (0.506)	Data Time 0.001 (0.010)	Loss 3.8665 (3.8491)	Entropy 1.85863 (1.86452)	Top-1 acc 31.641 (33.974)	Top-5 acc 55.469 (57.773)	lr 0.02473
Train [8][2960/3239]	Time 0.213 (0.506)	Data Time 0.001 (0.010)	Loss 3.7830 (3.8490)	Entropy 1.85860 (1.86450)	Top-1 acc 35.547 (33.976)	Top-5 acc 58.203 (57.774)	lr 0.02473
Train [8][2970/3239]	Time 0.339 (0.506)	Data Time 0.001 (0.010)	Loss 3.8854 (3.8489)	Entropy 1.85858 (1.86448)	Top-1 acc 32.812 (33.979)	Top-5 acc 52.734 (57.773)	lr 0.02473
Train [8][2980/3239]	Time 0.142 (0.505)	Data Time 0.001 (0.010)	Loss 3.7534 (3.8489)	Entropy 1.85850 (1.86447)	Top-1 acc 37.500 (33.979)	Top-5 acc 60.156 (57.775)	lr 0.02473
Train [8][2990/3239]	Time 0.202 (0.505)	Data Time 0.002 (0.010)	Loss 3.9690 (3.8490)	Entropy 1.85846 (1.86445)	Top-1 acc 35.547 (33.979)	Top-5 acc 54.688 (57.770)	lr 0.02473
Train [8][3000/3239]	Time 0.213 (0.505)	Data Time 0.001 (0.010)	Loss 3.6267 (3.8490)	Entropy 1.85844 (1.86443)	Top-1 acc 39.453 (33.980)	Top-5 acc 62.109 (57.768)	lr 0.02473
Train [8][3010/3239]	Time 0.195 (0.505)	Data Time 0.001 (0.010)	Loss 3.6646 (3.8488)	Entropy 1.85838 (1.86441)	Top-1 acc 38.281 (33.984)	Top-5 acc 60.156 (57.770)	lr 0.02473
Train [8][3020/3239]	Time 0.236 (0.504)	Data Time 0.001 (0.010)	Loss 3.6206 (3.8487)	Entropy 1.85833 (1.86439)	Top-1 acc 36.328 (33.982)	Top-5 acc 61.328 (57.769)	lr 0.02473
Train [8][3030/3239]	Time 0.221 (0.504)	Data Time 0.002 (0.010)	Loss 3.6003 (3.8486)	Entropy 1.85826 (1.86437)	Top-1 acc 39.844 (33.983)	Top-5 acc 63.672 (57.772)	lr 0.02473
Train [8][3040/3239]	Time 0.302 (0.515)	Data Time 0.004 (0.010)	Loss 3.8635 (3.8487)	Entropy 1.85820 (1.86435)	Top-1 acc 32.031 (33.982)	Top-5 acc 57.812 (57.773)	lr 0.02473
Train [8][3050/3239]	Time 0.152 (0.515)	Data Time 0.002 (0.010)	Loss 3.8809 (3.8488)	Entropy 1.85816 (1.86433)	Top-1 acc 35.547 (33.982)	Top-5 acc 56.250 (57.769)	lr 0.02473
Train [8][3060/3239]	Time 0.237 (0.515)	Data Time 0.002 (0.010)	Loss 3.8292 (3.8486)	Entropy 1.85812 (1.86430)	Top-1 acc 35.938 (33.983)	Top-5 acc 60.156 (57.772)	lr 0.02473
Train [8][3070/3239]	Time 0.223 (0.514)	Data Time 0.001 (0.010)	Loss 4.0855 (3.8487)	Entropy 1.85811 (1.86428)	Top-1 acc 33.203 (33.983)	Top-5 acc 50.391 (57.772)	lr 0.02473
Train [8][3080/3239]	Time 0.344 (0.514)	Data Time 0.001 (0.010)	Loss 3.8340 (3.8486)	Entropy 1.85809 (1.86426)	Top-1 acc 37.500 (33.985)	Top-5 acc 59.375 (57.773)	lr 0.02473
Train [8][3090/3239]	Time 0.226 (0.514)	Data Time 0.001 (0.010)	Loss 3.8490 (3.8485)	Entropy 1.85803 (1.86424)	Top-1 acc 34.766 (33.989)	Top-5 acc 57.422 (57.775)	lr 0.02473
Train [8][3100/3239]	Time 0.258 (0.513)	Data Time 0.001 (0.010)	Loss 3.9116 (3.8485)	Entropy 1.85799 (1.86422)	Top-1 acc 32.422 (33.991)	Top-5 acc 59.375 (57.777)	lr 0.02473
Train [8][3110/3239]	Time 0.244 (0.513)	Data Time 0.002 (0.010)	Loss 3.7004 (3.8483)	Entropy 1.85795 (1.86420)	Top-1 acc 36.719 (33.996)	Top-5 acc 60.547 (57.783)	lr 0.02473
Train [8][3120/3239]	Time 0.218 (0.513)	Data Time 0.001 (0.010)	Loss 3.8917 (3.8483)	Entropy 1.85793 (1.86418)	Top-1 acc 32.031 (33.997)	Top-5 acc 58.203 (57.782)	lr 0.02473
Train [8][3130/3239]	Time 0.250 (0.513)	Data Time 0.001 (0.010)	Loss 3.9143 (3.8484)	Entropy 1.85790 (1.86416)	Top-1 acc 33.594 (33.999)	Top-5 acc 57.812 (57.781)	lr 0.02473
Train [8][3140/3239]	Time 0.140 (0.512)	Data Time 0.001 (0.010)	Loss 4.0418 (3.8485)	Entropy 1.85786 (1.86414)	Top-1 acc 28.125 (33.995)	Top-5 acc 53.906 (57.780)	lr 0.02473
Train [8][3150/3239]	Time 0.246 (0.512)	Data Time 0.001 (0.010)	Loss 3.8170 (3.8483)	Entropy 1.85784 (1.86412)	Top-1 acc 35.156 (33.998)	Top-5 acc 54.688 (57.783)	lr 0.02473
Train [8][3160/3239]	Time 0.198 (0.512)	Data Time 0.001 (0.010)	Loss 3.7555 (3.8483)	Entropy 1.85779 (1.86410)	Top-1 acc 35.156 (33.998)	Top-5 acc 62.109 (57.785)	lr 0.02473
Train [8][3170/3239]	Time 0.193 (0.511)	Data Time 0.001 (0.010)	Loss 3.7673 (3.8484)	Entropy 1.85776 (1.86408)	Top-1 acc 33.984 (33.995)	Top-5 acc 61.719 (57.785)	lr 0.02473
Train [8][3180/3239]	Time 0.297 (0.511)	Data Time 0.000 (0.010)	Loss 3.9031 (3.8484)	Entropy 1.85772 (1.86406)	Top-1 acc 30.859 (33.993)	Top-5 acc 56.641 (57.783)	lr 0.02473
Train [8][3190/3239]	Time 0.202 (0.511)	Data Time 0.000 (0.010)	Loss 3.8192 (3.8482)	Entropy 1.85769 (1.86405)	Top-1 acc 35.938 (33.998)	Top-5 acc 57.031 (57.787)	lr 0.02473
Train [8][3200/3239]	Time 0.179 (0.510)	Data Time 0.000 (0.010)	Loss 3.9763 (3.8481)	Entropy 1.85758 (1.86403)	Top-1 acc 29.688 (34.003)	Top-5 acc 54.688 (57.788)	lr 0.02473
Train [8][3210/3239]	Time 0.165 (0.510)	Data Time 0.000 (0.010)	Loss 3.8091 (3.8481)	Entropy 1.85753 (1.86401)	Top-1 acc 34.766 (34.003)	Top-5 acc 57.422 (57.788)	lr 0.02473
Train [8][3220/3239]	Time 0.202 (0.510)	Data Time 0.000 (0.010)	Loss 3.8716 (3.8478)	Entropy 1.85750 (1.86398)	Top-1 acc 37.109 (34.013)	Top-5 acc 54.297 (57.796)	lr 0.02473
Train [8][3230/3239]	Time 0.200 (0.509)	Data Time 0.000 (0.010)	Loss 3.8485 (3.8477)	Entropy 1.85742 (1.86396)	Top-1 acc 32.422 (34.014)	Top-5 acc 58.984 (57.799)	lr 0.02473
Train [8][3239/3239]	Time 2.023 (0.509)	Data Time 0.000 (0.009)	Loss 4.0078 (3.8479)	Entropy 1.85742 (1.86395)	Top-1 acc 25.926 (34.012)	Top-5 acc 50.617 (57.797)	lr 0.02473
==========Valid [8/120]	loss 2.695	top-1 acc 42.507 (42.507)	top-5 acc 67.525	Train top-1 34.012	top-5 57.797	Entropy 1.85742	Latency-None: 0.000ms	Flops: 527.05M
Train [9][0/3239]	Time 26.592 (26.592)	Data Time 25.385 (25.385)	Loss 3.6986 (3.6986)	Entropy 1.85730 (1.85730)	Top-1 acc 35.547 (35.547)	Top-5 acc 58.594 (58.594)	lr 0.02473
Train [9][10/3239]	Time 2.563 (2.985)	Data Time 0.001 (2.342)	Loss 3.6494 (3.7696)	Entropy 1.85730 (1.85730)	Top-1 acc 36.719 (35.156)	Top-5 acc 62.109 (58.523)	lr 0.02473
Train [9][20/3239]	Time 0.180 (1.669)	Data Time 0.001 (1.227)	Loss 3.8319 (3.7659)	Entropy 1.85726 (1.85728)	Top-1 acc 33.203 (35.454)	Top-5 acc 58.203 (59.115)	lr 0.02473
Train [9][30/3239]	Time 0.157 (1.273)	Data Time 0.001 (0.833)	Loss 3.8942 (3.7803)	Entropy 1.85721 (1.85726)	Top-1 acc 33.203 (35.761)	Top-5 acc 56.641 (58.947)	lr 0.02473
Train [9][40/3239]	Time 0.248 (1.071)	Data Time 0.002 (0.631)	Loss 3.9601 (3.7983)	Entropy 1.85712 (1.85723)	Top-1 acc 33.594 (35.156)	Top-5 acc 58.203 (58.794)	lr 0.02473
Train [9][50/3239]	Time 0.212 (0.942)	Data Time 0.001 (0.507)	Loss 3.8334 (3.8034)	Entropy 1.85711 (1.85721)	Top-1 acc 32.031 (34.873)	Top-5 acc 60.156 (58.594)	lr 0.02473
Train [9][60/3239]	Time 0.221 (0.859)	Data Time 0.001 (0.425)	Loss 3.8573 (3.7970)	Entropy 1.85708 (1.85719)	Top-1 acc 35.156 (35.150)	Top-5 acc 62.500 (58.850)	lr 0.02473
Train [9][70/3239]	Time 0.184 (0.795)	Data Time 0.001 (0.365)	Loss 3.8004 (3.7947)	Entropy 1.85705 (1.85717)	Top-1 acc 33.203 (35.228)	Top-5 acc 57.812 (58.841)	lr 0.02473
Train [9][80/3239]	Time 0.193 (0.747)	Data Time 0.001 (0.320)	Loss 3.8894 (3.8002)	Entropy 1.85701 (1.85715)	Top-1 acc 31.250 (35.089)	Top-5 acc 58.984 (58.796)	lr 0.02473
Train [9][90/3239]	Time 0.241 (0.710)	Data Time 0.001 (0.286)	Loss 3.8555 (3.8043)	Entropy 1.85696 (1.85714)	Top-1 acc 29.688 (35.122)	Top-5 acc 57.812 (58.692)	lr 0.02472
Train [9][100/3239]	Time 0.222 (0.681)	Data Time 0.001 (0.258)	Loss 3.6982 (3.7986)	Entropy 1.85693 (1.85712)	Top-1 acc 38.281 (35.207)	Top-5 acc 61.328 (58.834)	lr 0.02472
Train [9][110/3239]	Time 0.210 (0.658)	Data Time 0.001 (0.235)	Loss 4.0251 (3.8035)	Entropy 1.85690 (1.85710)	Top-1 acc 29.297 (35.044)	Top-5 acc 55.078 (58.738)	lr 0.02472
Train [9][120/3239]	Time 2.204 (0.638)	Data Time 0.001 (0.216)	Loss 3.5602 (3.7988)	Entropy 1.85690 (1.85709)	Top-1 acc 37.891 (35.111)	Top-5 acc 66.797 (58.865)	lr 0.02472
Train [9][130/3239]	Time 0.252 (0.606)	Data Time 0.002 (0.199)	Loss 3.8330 (3.8041)	Entropy 1.85686 (1.85707)	Top-1 acc 31.250 (35.046)	Top-5 acc 57.812 (58.776)	lr 0.02472
Train [9][140/3239]	Time 0.311 (0.595)	Data Time 0.001 (0.185)	Loss 4.1091 (3.8066)	Entropy 1.85683 (1.85705)	Top-1 acc 27.344 (35.015)	Top-5 acc 51.172 (58.699)	lr 0.02472
Train [9][150/3239]	Time 0.247 (0.834)	Data Time 0.003 (0.173)	Loss 3.9009 (3.8066)	Entropy 1.85678 (1.85703)	Top-1 acc 31.250 (34.947)	Top-5 acc 57.031 (58.651)	lr 0.02472
Train [9][160/3239]	Time 0.199 (0.808)	Data Time 0.002 (0.163)	Loss 3.8378 (3.8085)	Entropy 1.85673 (1.85702)	Top-1 acc 32.812 (34.892)	Top-5 acc 55.859 (58.557)	lr 0.02472
Train [9][170/3239]	Time 0.223 (0.786)	Data Time 0.002 (0.153)	Loss 3.8153 (3.8070)	Entropy 1.85660 (1.85699)	Top-1 acc 31.641 (34.875)	Top-5 acc 56.641 (58.596)	lr 0.02472
Train [9][180/3239]	Time 0.237 (0.766)	Data Time 0.002 (0.145)	Loss 3.7065 (3.8057)	Entropy 1.85649 (1.85697)	Top-1 acc 37.891 (34.962)	Top-5 acc 61.719 (58.551)	lr 0.02472
Train [9][190/3239]	Time 0.211 (0.749)	Data Time 0.001 (0.137)	Loss 3.8326 (3.8060)	Entropy 1.85640 (1.85694)	Top-1 acc 32.031 (34.970)	Top-5 acc 55.859 (58.549)	lr 0.02472
Train [9][200/3239]	Time 0.213 (0.732)	Data Time 0.001 (0.131)	Loss 3.7630 (3.8097)	Entropy 1.85637 (1.85692)	Top-1 acc 35.156 (34.806)	Top-5 acc 57.812 (58.454)	lr 0.02472
Train [9][210/3239]	Time 0.247 (0.717)	Data Time 0.001 (0.125)	Loss 3.7349 (3.8088)	Entropy 1.85633 (1.85689)	Top-1 acc 37.500 (34.793)	Top-5 acc 60.156 (58.473)	lr 0.02472
Train [9][220/3239]	Time 0.194 (0.703)	Data Time 0.001 (0.119)	Loss 3.7695 (3.8079)	Entropy 1.85632 (1.85686)	Top-1 acc 37.891 (34.849)	Top-5 acc 58.984 (58.493)	lr 0.02472
Train [9][230/3239]	Time 2.314 (0.691)	Data Time 0.001 (0.114)	Loss 3.8444 (3.8074)	Entropy 1.85632 (1.85684)	Top-1 acc 34.766 (34.843)	Top-5 acc 56.250 (58.492)	lr 0.02472
Train [9][240/3239]	Time 0.216 (0.672)	Data Time 0.001 (0.109)	Loss 3.8633 (3.8087)	Entropy 1.85629 (1.85682)	Top-1 acc 34.766 (34.814)	Top-5 acc 58.984 (58.475)	lr 0.02472
Train [9][250/3239]	Time 0.195 (0.661)	Data Time 0.001 (0.105)	Loss 3.6599 (3.8065)	Entropy 1.85626 (1.85680)	Top-1 acc 37.109 (34.826)	Top-5 acc 65.625 (58.546)	lr 0.02472
Train [9][260/3239]	Time 0.193 (0.651)	Data Time 0.001 (0.101)	Loss 3.8155 (3.8080)	Entropy 1.85618 (1.85677)	Top-1 acc 36.328 (34.814)	Top-5 acc 56.641 (58.502)	lr 0.02472
Train [9][270/3239]	Time 0.204 (0.642)	Data Time 0.001 (0.098)	Loss 3.9804 (3.8074)	Entropy 1.85610 (1.85675)	Top-1 acc 33.203 (34.777)	Top-5 acc 54.297 (58.516)	lr 0.02472
Train [9][280/3239]	Time 0.184 (0.634)	Data Time 0.001 (0.094)	Loss 3.6288 (3.8044)	Entropy 1.85602 (1.85672)	Top-1 acc 35.547 (34.831)	Top-5 acc 59.766 (58.573)	lr 0.02472
Train [9][290/3239]	Time 0.141 (0.627)	Data Time 0.001 (0.091)	Loss 3.9163 (3.8043)	Entropy 1.85595 (1.85670)	Top-1 acc 31.250 (34.830)	Top-5 acc 55.078 (58.596)	lr 0.02472
Train [9][300/3239]	Time 0.246 (0.620)	Data Time 0.001 (0.088)	Loss 3.8890 (3.8038)	Entropy 1.85589 (1.85667)	Top-1 acc 35.156 (34.821)	Top-5 acc 56.641 (58.594)	lr 0.02472
Train [9][310/3239]	Time 0.207 (0.614)	Data Time 0.001 (0.085)	Loss 3.9211 (3.8056)	Entropy 1.85585 (1.85665)	Top-1 acc 35.547 (34.761)	Top-5 acc 53.516 (58.535)	lr 0.02472
Train [9][320/3239]	Time 0.246 (0.608)	Data Time 0.001 (0.083)	Loss 3.8902 (3.8070)	Entropy 1.85581 (1.85662)	Top-1 acc 33.203 (34.722)	Top-5 acc 55.078 (58.493)	lr 0.02472
Train [9][330/3239]	Time 0.229 (0.603)	Data Time 0.001 (0.080)	Loss 3.8776 (3.8084)	Entropy 1.85581 (1.85660)	Top-1 acc 33.984 (34.703)	Top-5 acc 56.641 (58.467)	lr 0.02472
Train [9][340/3239]	Time 2.337 (0.597)	Data Time 0.001 (0.078)	Loss 3.8136 (3.8091)	Entropy 1.85581 (1.85657)	Top-1 acc 32.031 (34.681)	Top-5 acc 57.422 (58.472)	lr 0.02472
Train [9][350/3239]	Time 0.218 (0.587)	Data Time 0.001 (0.076)	Loss 3.8106 (3.8093)	Entropy 1.85576 (1.85655)	Top-1 acc 33.984 (34.678)	Top-5 acc 57.812 (58.478)	lr 0.02472
Train [9][360/3239]	Time 0.247 (0.583)	Data Time 0.002 (0.074)	Loss 3.8066 (3.8090)	Entropy 1.85576 (1.85653)	Top-1 acc 33.594 (34.687)	Top-5 acc 60.938 (58.501)	lr 0.02472
Train [9][370/3239]	Time 0.218 (0.578)	Data Time 0.001 (0.072)	Loss 3.8936 (3.8097)	Entropy 1.85566 (1.85651)	Top-1 acc 28.125 (34.652)	Top-5 acc 56.250 (58.500)	lr 0.02472
Train [9][380/3239]	Time 0.240 (0.574)	Data Time 0.002 (0.070)	Loss 3.7666 (3.8099)	Entropy 1.85564 (1.85648)	Top-1 acc 34.766 (34.656)	Top-5 acc 58.594 (58.518)	lr 0.02472
Train [9][390/3239]	Time 0.205 (0.570)	Data Time 0.001 (0.068)	Loss 3.7469 (3.8095)	Entropy 1.85562 (1.85646)	Top-1 acc 35.156 (34.681)	Top-5 acc 61.328 (58.536)	lr 0.02472
Train [9][400/3239]	Time 0.243 (0.566)	Data Time 0.001 (0.067)	Loss 3.8994 (3.8120)	Entropy 1.85559 (1.85644)	Top-1 acc 30.469 (34.641)	Top-5 acc 56.641 (58.494)	lr 0.02472
Train [9][410/3239]	Time 0.297 (0.563)	Data Time 0.001 (0.065)	Loss 3.7026 (3.8123)	Entropy 1.85553 (1.85642)	Top-1 acc 35.547 (34.593)	Top-5 acc 59.375 (58.484)	lr 0.02472
Train [9][420/3239]	Time 0.188 (0.559)	Data Time 0.001 (0.064)	Loss 3.9492 (3.8130)	Entropy 1.85550 (1.85640)	Top-1 acc 33.203 (34.576)	Top-5 acc 56.641 (58.466)	lr 0.02472
Train [9][430/3239]	Time 0.193 (0.557)	Data Time 0.001 (0.062)	Loss 3.8601 (3.8122)	Entropy 1.85547 (1.85638)	Top-1 acc 32.812 (34.604)	Top-5 acc 57.422 (58.480)	lr 0.02472
Train [9][440/3239]	Time 0.247 (0.553)	Data Time 0.001 (0.061)	Loss 3.8708 (3.8135)	Entropy 1.85542 (1.85636)	Top-1 acc 32.031 (34.588)	Top-5 acc 55.469 (58.474)	lr 0.02472
Train [9][450/3239]	Time 2.172 (0.550)	Data Time 0.001 (0.059)	Loss 3.7876 (3.8140)	Entropy 1.85542 (1.85634)	Top-1 acc 37.500 (34.605)	Top-5 acc 56.250 (58.441)	lr 0.02472
Train [9][460/3239]	Time 0.193 (0.543)	Data Time 0.001 (0.058)	Loss 3.5986 (3.8124)	Entropy 1.85541 (1.85632)	Top-1 acc 42.188 (34.642)	Top-5 acc 64.062 (58.473)	lr 0.02472
Train [9][470/3239]	Time 0.228 (0.540)	Data Time 0.001 (0.057)	Loss 3.8355 (3.8117)	Entropy 1.85535 (1.85630)	Top-1 acc 36.328 (34.661)	Top-5 acc 55.859 (58.499)	lr 0.02472
Train [9][480/3239]	Time 0.189 (0.538)	Data Time 0.002 (0.056)	Loss 3.7998 (3.8115)	Entropy 1.85527 (1.85627)	Top-1 acc 33.203 (34.650)	Top-5 acc 58.594 (58.522)	lr 0.02472
Train [9][490/3239]	Time 0.224 (0.535)	Data Time 0.001 (0.055)	Loss 3.8290 (3.8109)	Entropy 1.85527 (1.85625)	Top-1 acc 33.594 (34.652)	Top-5 acc 57.031 (58.547)	lr 0.02472
Train [9][500/3239]	Time 0.153 (0.533)	Data Time 0.001 (0.054)	Loss 3.7792 (3.8110)	Entropy 1.85522 (1.85623)	Top-1 acc 34.375 (34.660)	Top-5 acc 57.422 (58.524)	lr 0.02472
Train [9][510/3239]	Time 0.279 (0.599)	Data Time 0.004 (0.053)	Loss 3.8469 (3.8110)	Entropy 1.85518 (1.85621)	Top-1 acc 36.719 (34.671)	Top-5 acc 58.984 (58.523)	lr 0.02472
Train [9][520/3239]	Time 0.166 (0.598)	Data Time 0.002 (0.052)	Loss 3.9807 (3.8115)	Entropy 1.85514 (1.85619)	Top-1 acc 31.250 (34.677)	Top-5 acc 55.859 (58.508)	lr 0.02472
Train [9][530/3239]	Time 0.258 (0.594)	Data Time 0.002 (0.051)	Loss 3.7782 (3.8123)	Entropy 1.85508 (1.85617)	Top-1 acc 35.156 (34.621)	Top-5 acc 60.938 (58.510)	lr 0.02472
Train [9][540/3239]	Time 0.224 (0.591)	Data Time 0.025 (0.050)	Loss 3.7786 (3.8108)	Entropy 1.85492 (1.85615)	Top-1 acc 32.812 (34.646)	Top-5 acc 58.984 (58.540)	lr 0.02472
Train [9][550/3239]	Time 0.252 (0.588)	Data Time 0.001 (0.049)	Loss 3.7736 (3.8119)	Entropy 1.85486 (1.85613)	Top-1 acc 33.594 (34.630)	Top-5 acc 63.672 (58.538)	lr 0.02472
Train [9][560/3239]	Time 2.212 (0.585)	Data Time 0.002 (0.048)	Loss 3.9201 (3.8116)	Entropy 1.85486 (1.85611)	Top-1 acc 33.594 (34.647)	Top-5 acc 56.641 (58.545)	lr 0.02471
Train [9][570/3239]	Time 0.241 (0.579)	Data Time 0.002 (0.048)	Loss 3.9279 (3.8122)	Entropy 1.85484 (1.85609)	Top-1 acc 33.594 (34.604)	Top-5 acc 55.078 (58.538)	lr 0.02471
Train [9][580/3239]	Time 0.211 (0.576)	Data Time 0.001 (0.047)	Loss 3.6835 (3.8120)	Entropy 1.85478 (1.85606)	Top-1 acc 37.109 (34.604)	Top-5 acc 62.891 (58.541)	lr 0.02471
Train [9][590/3239]	Time 0.220 (0.574)	Data Time 0.001 (0.046)	Loss 3.7525 (3.8118)	Entropy 1.85478 (1.85604)	Top-1 acc 39.062 (34.606)	Top-5 acc 59.375 (58.546)	lr 0.02471
Train [9][600/3239]	Time 0.236 (0.572)	Data Time 0.001 (0.045)	Loss 3.8115 (3.8115)	Entropy 1.85466 (1.85602)	Top-1 acc 32.031 (34.606)	Top-5 acc 60.547 (58.554)	lr 0.02471
Train [9][610/3239]	Time 0.220 (0.569)	Data Time 0.001 (0.045)	Loss 3.8571 (3.8118)	Entropy 1.85465 (1.85600)	Top-1 acc 28.906 (34.585)	Top-5 acc 55.469 (58.548)	lr 0.02471
Train [9][620/3239]	Time 0.163 (0.567)	Data Time 0.001 (0.044)	Loss 4.0528 (3.8133)	Entropy 1.85456 (1.85597)	Top-1 acc 30.078 (34.571)	Top-5 acc 55.078 (58.516)	lr 0.02471
Train [9][630/3239]	Time 0.208 (0.564)	Data Time 0.002 (0.043)	Loss 3.8192 (3.8138)	Entropy 1.85449 (1.85595)	Top-1 acc 33.594 (34.566)	Top-5 acc 57.422 (58.488)	lr 0.02471
Train [9][640/3239]	Time 0.214 (0.562)	Data Time 0.001 (0.043)	Loss 3.8559 (3.8130)	Entropy 1.85444 (1.85593)	Top-1 acc 34.375 (34.593)	Top-5 acc 55.469 (58.498)	lr 0.02471
Train [9][650/3239]	Time 0.196 (0.560)	Data Time 0.001 (0.042)	Loss 3.9762 (3.8135)	Entropy 1.85434 (1.85591)	Top-1 acc 28.906 (34.575)	Top-5 acc 56.250 (58.485)	lr 0.02471
Train [9][660/3239]	Time 0.246 (0.558)	Data Time 0.001 (0.042)	Loss 3.8578 (3.8130)	Entropy 1.85429 (1.85588)	Top-1 acc 35.938 (34.578)	Top-5 acc 58.594 (58.490)	lr 0.02471
Train [9][670/3239]	Time 2.294 (0.556)	Data Time 0.002 (0.041)	Loss 3.6634 (3.8134)	Entropy 1.85429 (1.85586)	Top-1 acc 39.453 (34.559)	Top-5 acc 60.938 (58.467)	lr 0.02471
Train [9][680/3239]	Time 0.323 (0.551)	Data Time 0.001 (0.040)	Loss 3.7443 (3.8127)	Entropy 1.85416 (1.85583)	Top-1 acc 42.578 (34.604)	Top-5 acc 58.203 (58.491)	lr 0.02471
Train [9][690/3239]	Time 0.206 (0.549)	Data Time 0.001 (0.040)	Loss 3.9537 (3.8132)	Entropy 1.85411 (1.85581)	Top-1 acc 31.641 (34.607)	Top-5 acc 52.734 (58.472)	lr 0.02471
Train [9][700/3239]	Time 0.187 (0.547)	Data Time 0.001 (0.039)	Loss 3.9248 (3.8135)	Entropy 1.85402 (1.85578)	Top-1 acc 31.250 (34.603)	Top-5 acc 55.078 (58.453)	lr 0.02471
Train [9][710/3239]	Time 0.202 (0.545)	Data Time 0.001 (0.039)	Loss 3.8676 (3.8120)	Entropy 1.85397 (1.85576)	Top-1 acc 34.766 (34.645)	Top-5 acc 57.031 (58.483)	lr 0.02471
Train [9][720/3239]	Time 0.221 (0.543)	Data Time 0.001 (0.038)	Loss 3.6692 (3.8118)	Entropy 1.85396 (1.85573)	Top-1 acc 38.281 (34.663)	Top-5 acc 58.594 (58.488)	lr 0.02471
Train [9][730/3239]	Time 0.182 (0.541)	Data Time 0.001 (0.038)	Loss 3.8340 (3.8126)	Entropy 1.85386 (1.85571)	Top-1 acc 33.984 (34.662)	Top-5 acc 58.203 (58.463)	lr 0.02471
Train [9][740/3239]	Time 0.204 (0.540)	Data Time 0.001 (0.037)	Loss 3.7661 (3.8119)	Entropy 1.85382 (1.85568)	Top-1 acc 31.250 (34.680)	Top-5 acc 58.984 (58.480)	lr 0.02471
Train [9][750/3239]	Time 0.138 (0.538)	Data Time 0.001 (0.037)	Loss 3.9415 (3.8124)	Entropy 1.85379 (1.85566)	Top-1 acc 30.469 (34.673)	Top-5 acc 57.812 (58.483)	lr 0.02471
Train [9][760/3239]	Time 0.241 (0.536)	Data Time 0.001 (0.036)	Loss 3.9042 (3.8125)	Entropy 1.85374 (1.85563)	Top-1 acc 30.078 (34.661)	Top-5 acc 55.859 (58.464)	lr 0.02471
Train [9][770/3239]	Time 0.158 (0.535)	Data Time 0.002 (0.036)	Loss 3.8298 (3.8120)	Entropy 1.85379 (1.85561)	Top-1 acc 30.469 (34.667)	Top-5 acc 55.469 (58.478)	lr 0.02471
Train [9][780/3239]	Time 2.284 (0.533)	Data Time 0.002 (0.036)	Loss 3.5491 (3.8111)	Entropy 1.85379 (1.85558)	Top-1 acc 39.453 (34.686)	Top-5 acc 62.500 (58.492)	lr 0.02471
Train [9][790/3239]	Time 0.192 (0.529)	Data Time 0.001 (0.035)	Loss 3.7916 (3.8107)	Entropy 1.85374 (1.85556)	Top-1 acc 31.641 (34.689)	Top-5 acc 57.422 (58.490)	lr 0.02471
Train [9][800/3239]	Time 0.199 (0.528)	Data Time 0.001 (0.035)	Loss 3.7854 (3.8109)	Entropy 1.85365 (1.85554)	Top-1 acc 37.891 (34.691)	Top-5 acc 62.500 (58.491)	lr 0.02471
Train [9][810/3239]	Time 0.215 (0.526)	Data Time 0.001 (0.034)	Loss 3.6160 (3.8099)	Entropy 1.85360 (1.85551)	Top-1 acc 37.500 (34.701)	Top-5 acc 62.500 (58.505)	lr 0.02471
Train [9][820/3239]	Time 0.150 (0.525)	Data Time 0.001 (0.034)	Loss 3.8802 (3.8096)	Entropy 1.85359 (1.85549)	Top-1 acc 35.156 (34.714)	Top-5 acc 56.641 (58.508)	lr 0.02471
Train [9][830/3239]	Time 0.200 (0.524)	Data Time 0.001 (0.034)	Loss 3.6155 (3.8090)	Entropy 1.85356 (1.85547)	Top-1 acc 38.281 (34.720)	Top-5 acc 62.891 (58.518)	lr 0.02471
Train [9][840/3239]	Time 0.301 (0.523)	Data Time 0.001 (0.033)	Loss 3.9052 (3.8092)	Entropy 1.85354 (1.85544)	Top-1 acc 35.938 (34.713)	Top-5 acc 55.078 (58.515)	lr 0.02471
Train [9][850/3239]	Time 0.222 (0.522)	Data Time 0.001 (0.033)	Loss 3.8139 (3.8092)	Entropy 1.85351 (1.85542)	Top-1 acc 35.156 (34.711)	Top-5 acc 58.594 (58.522)	lr 0.02471
Train [9][860/3239]	Time 0.208 (0.521)	Data Time 0.001 (0.032)	Loss 4.0863 (3.8093)	Entropy 1.85352 (1.85540)	Top-1 acc 29.297 (34.706)	Top-5 acc 53.125 (58.525)	lr 0.02471
Train [9][870/3239]	Time 0.335 (0.563)	Data Time 0.003 (0.032)	Loss 3.7472 (3.8090)	Entropy 1.85347 (1.85538)	Top-1 acc 36.328 (34.716)	Top-5 acc 62.109 (58.532)	lr 0.02471
Train [9][880/3239]	Time 0.255 (0.561)	Data Time 0.002 (0.032)	Loss 3.8353 (3.8095)	Entropy 1.85346 (1.85536)	Top-1 acc 35.547 (34.715)	Top-5 acc 53.516 (58.509)	lr 0.02471
Train [9][890/3239]	Time 2.191 (0.559)	Data Time 0.002 (0.031)	Loss 3.6968 (3.8095)	Entropy 1.85346 (1.85534)	Top-1 acc 36.328 (34.713)	Top-5 acc 59.766 (58.503)	lr 0.02471
Train [9][900/3239]	Time 0.168 (0.556)	Data Time 0.002 (0.031)	Loss 3.7571 (3.8101)	Entropy 1.85344 (1.85531)	Top-1 acc 35.938 (34.705)	Top-5 acc 57.031 (58.481)	lr 0.02471
Train [9][910/3239]	Time 0.143 (0.554)	Data Time 0.001 (0.031)	Loss 3.8666 (3.8102)	Entropy 1.85336 (1.85529)	Top-1 acc 34.375 (34.706)	Top-5 acc 55.469 (58.475)	lr 0.02471
Train [9][920/3239]	Time 0.200 (0.552)	Data Time 0.001 (0.031)	Loss 3.7263 (3.8100)	Entropy 1.85329 (1.85527)	Top-1 acc 35.156 (34.710)	Top-5 acc 60.156 (58.488)	lr 0.02471
Train [9][930/3239]	Time 0.222 (0.551)	Data Time 0.001 (0.030)	Loss 3.7050 (3.8103)	Entropy 1.85321 (1.85525)	Top-1 acc 32.812 (34.702)	Top-5 acc 60.156 (58.478)	lr 0.02471
Train [9][940/3239]	Time 0.197 (0.549)	Data Time 0.001 (0.030)	Loss 3.9497 (3.8104)	Entropy 1.85312 (1.85523)	Top-1 acc 31.641 (34.693)	Top-5 acc 57.422 (58.469)	lr 0.02471
Train [9][950/3239]	Time 0.209 (0.548)	Data Time 0.001 (0.030)	Loss 3.6834 (3.8098)	Entropy 1.85298 (1.85520)	Top-1 acc 34.766 (34.707)	Top-5 acc 61.328 (58.483)	lr 0.02471
Train [9][960/3239]	Time 0.185 (0.547)	Data Time 0.001 (0.029)	Loss 3.7823 (3.8097)	Entropy 1.85289 (1.85518)	Top-1 acc 37.109 (34.710)	Top-5 acc 60.156 (58.493)	lr 0.02471
Train [9][970/3239]	Time 0.214 (0.545)	Data Time 0.001 (0.029)	Loss 4.1106 (3.8099)	Entropy 1.85277 (1.85516)	Top-1 acc 28.125 (34.702)	Top-5 acc 53.125 (58.492)	lr 0.02471
Train [9][980/3239]	Time 0.159 (0.544)	Data Time 0.001 (0.029)	Loss 3.7064 (3.8102)	Entropy 1.85274 (1.85513)	Top-1 acc 37.500 (34.697)	Top-5 acc 60.938 (58.490)	lr 0.02471
Train [9][990/3239]	Time 0.185 (0.542)	Data Time 0.002 (0.029)	Loss 3.6730 (3.8098)	Entropy 1.85267 (1.85511)	Top-1 acc 35.156 (34.695)	Top-5 acc 58.984 (58.496)	lr 0.02471
Train [9][1000/3239]	Time 2.230 (0.541)	Data Time 0.001 (0.028)	Loss 3.8417 (3.8101)	Entropy 1.85267 (1.85508)	Top-1 acc 34.766 (34.691)	Top-5 acc 55.469 (58.484)	lr 0.02471
Train [9][1010/3239]	Time 0.178 (0.538)	Data Time 0.001 (0.028)	Loss 3.7780 (3.8106)	Entropy 1.85263 (1.85506)	Top-1 acc 35.938 (34.686)	Top-5 acc 58.203 (58.477)	lr 0.02471
Train [9][1020/3239]	Time 0.195 (0.537)	Data Time 0.001 (0.028)	Loss 3.6918 (3.8105)	Entropy 1.85260 (1.85504)	Top-1 acc 38.281 (34.686)	Top-5 acc 62.109 (58.485)	lr 0.02471
Train [9][1030/3239]	Time 0.196 (0.536)	Data Time 0.001 (0.028)	Loss 3.9845 (3.8102)	Entropy 1.85257 (1.85501)	Top-1 acc 36.719 (34.693)	Top-5 acc 56.641 (58.489)	lr 0.02470
Train [9][1040/3239]	Time 0.203 (0.535)	Data Time 0.001 (0.027)	Loss 3.8832 (3.8106)	Entropy 1.85256 (1.85499)	Top-1 acc 32.031 (34.685)	Top-5 acc 56.250 (58.489)	lr 0.02470
Train [9][1050/3239]	Time 0.223 (0.534)	Data Time 0.002 (0.027)	Loss 3.9035 (3.8108)	Entropy 1.85250 (1.85496)	Top-1 acc 31.250 (34.680)	Top-5 acc 58.984 (58.478)	lr 0.02470
Train [9][1060/3239]	Time 0.196 (0.533)	Data Time 0.001 (0.027)	Loss 3.8885 (3.8112)	Entropy 1.85243 (1.85494)	Top-1 acc 31.641 (34.674)	Top-5 acc 56.250 (58.468)	lr 0.02470
Train [9][1070/3239]	Time 0.193 (0.531)	Data Time 0.001 (0.027)	Loss 3.8823 (3.8111)	Entropy 1.85235 (1.85492)	Top-1 acc 32.031 (34.667)	Top-5 acc 54.297 (58.473)	lr 0.02470
Train [9][1080/3239]	Time 0.140 (0.530)	Data Time 0.001 (0.026)	Loss 3.9893 (3.8107)	Entropy 1.85232 (1.85489)	Top-1 acc 28.906 (34.679)	Top-5 acc 54.688 (58.484)	lr 0.02470
Train [9][1090/3239]	Time 0.214 (0.529)	Data Time 0.001 (0.026)	Loss 3.6435 (3.8106)	Entropy 1.85230 (1.85487)	Top-1 acc 36.328 (34.687)	Top-5 acc 58.984 (58.483)	lr 0.02470
Train [9][1100/3239]	Time 0.196 (0.528)	Data Time 0.001 (0.026)	Loss 4.0252 (3.8107)	Entropy 1.85225 (1.85485)	Top-1 acc 32.031 (34.677)	Top-5 acc 53.516 (58.479)	lr 0.02470
Train [9][1110/3239]	Time 2.139 (0.527)	Data Time 0.001 (0.026)	Loss 3.6785 (3.8099)	Entropy 1.85225 (1.85482)	Top-1 acc 39.844 (34.702)	Top-5 acc 64.453 (58.499)	lr 0.02470
Train [9][1120/3239]	Time 0.216 (0.525)	Data Time 0.001 (0.026)	Loss 3.8990 (3.8098)	Entropy 1.85214 (1.85480)	Top-1 acc 30.078 (34.708)	Top-5 acc 55.078 (58.504)	lr 0.02470
Train [9][1130/3239]	Time 0.238 (0.524)	Data Time 0.001 (0.025)	Loss 3.6897 (3.8100)	Entropy 1.85211 (1.85478)	Top-1 acc 39.453 (34.702)	Top-5 acc 65.625 (58.503)	lr 0.02470
Train [9][1140/3239]	Time 0.209 (0.523)	Data Time 0.001 (0.025)	Loss 3.8406 (3.8100)	Entropy 1.85205 (1.85475)	Top-1 acc 32.031 (34.695)	Top-5 acc 57.812 (58.509)	lr 0.02470
Train [9][1150/3239]	Time 0.204 (0.522)	Data Time 0.001 (0.025)	Loss 3.7574 (3.8101)	Entropy 1.85198 (1.85473)	Top-1 acc 33.594 (34.691)	Top-5 acc 60.547 (58.510)	lr 0.02470
Train [9][1160/3239]	Time 0.220 (0.521)	Data Time 0.001 (0.025)	Loss 3.9196 (3.8100)	Entropy 1.85195 (1.85470)	Top-1 acc 29.297 (34.681)	Top-5 acc 55.078 (58.513)	lr 0.02470
Train [9][1170/3239]	Time 0.153 (0.520)	Data Time 0.001 (0.025)	Loss 3.7220 (3.8101)	Entropy 1.85189 (1.85468)	Top-1 acc 33.203 (34.663)	Top-5 acc 60.547 (58.509)	lr 0.02470
Train [9][1180/3239]	Time 0.194 (0.519)	Data Time 0.001 (0.024)	Loss 3.8212 (3.8103)	Entropy 1.85184 (1.85466)	Top-1 acc 34.375 (34.667)	Top-5 acc 59.375 (58.509)	lr 0.02470
Train [9][1190/3239]	Time 0.204 (0.518)	Data Time 0.001 (0.024)	Loss 3.7413 (3.8099)	Entropy 1.85177 (1.85463)	Top-1 acc 33.594 (34.673)	Top-5 acc 63.281 (58.519)	lr 0.02470
Train [9][1200/3239]	Time 0.301 (0.517)	Data Time 0.001 (0.024)	Loss 3.7579 (3.8094)	Entropy 1.85170 (1.85461)	Top-1 acc 38.281 (34.687)	Top-5 acc 60.156 (58.527)	lr 0.02470
Train [9][1210/3239]	Time 0.232 (0.516)	Data Time 0.001 (0.024)	Loss 3.7949 (3.8093)	Entropy 1.85161 (1.85458)	Top-1 acc 35.938 (34.684)	Top-5 acc 58.203 (58.531)	lr 0.02470
Train [9][1220/3239]	Time 2.291 (0.516)	Data Time 0.001 (0.024)	Loss 3.9314 (3.8098)	Entropy 1.85161 (1.85456)	Top-1 acc 29.297 (34.675)	Top-5 acc 56.250 (58.522)	lr 0.02470
Train [9][1230/3239]	Time 0.190 (0.513)	Data Time 0.001 (0.023)	Loss 3.5537 (3.8093)	Entropy 1.85157 (1.85454)	Top-1 acc 37.109 (34.691)	Top-5 acc 63.672 (58.531)	lr 0.02470
Train [9][1240/3239]	Time 0.216 (0.543)	Data Time 0.002 (0.023)	Loss 3.6683 (3.8093)	Entropy 1.85156 (1.85451)	Top-1 acc 37.891 (34.696)	Top-5 acc 61.328 (58.540)	lr 0.02470
Train [9][1250/3239]	Time 0.226 (0.542)	Data Time 0.002 (0.023)	Loss 3.9628 (3.8091)	Entropy 1.85149 (1.85449)	Top-1 acc 35.547 (34.703)	Top-5 acc 54.297 (58.550)	lr 0.02470
Train [9][1260/3239]	Time 0.144 (0.541)	Data Time 0.001 (0.023)	Loss 3.7627 (3.8088)	Entropy 1.85142 (1.85446)	Top-1 acc 40.234 (34.716)	Top-5 acc 61.719 (58.557)	lr 0.02470
Train [9][1270/3239]	Time 0.150 (0.540)	Data Time 0.001 (0.023)	Loss 3.9253 (3.8092)	Entropy 1.85140 (1.85444)	Top-1 acc 35.156 (34.706)	Top-5 acc 53.516 (58.545)	lr 0.02470
Train [9][1280/3239]	Time 0.131 (0.539)	Data Time 0.001 (0.023)	Loss 3.9844 (3.8096)	Entropy 1.85137 (1.85442)	Top-1 acc 29.297 (34.701)	Top-5 acc 58.203 (58.541)	lr 0.02470
Train [9][1290/3239]	Time 0.324 (0.538)	Data Time 0.001 (0.023)	Loss 3.6959 (3.8100)	Entropy 1.85136 (1.85439)	Top-1 acc 35.156 (34.693)	Top-5 acc 61.328 (58.540)	lr 0.02470
Train [9][1300/3239]	Time 0.198 (0.537)	Data Time 0.001 (0.023)	Loss 3.6809 (3.8099)	Entropy 1.85132 (1.85437)	Top-1 acc 38.281 (34.692)	Top-5 acc 58.984 (58.543)	lr 0.02470
Train [9][1310/3239]	Time 0.237 (0.537)	Data Time 0.001 (0.022)	Loss 3.6696 (3.8096)	Entropy 1.85128 (1.85435)	Top-1 acc 38.672 (34.697)	Top-5 acc 62.500 (58.553)	lr 0.02470
Train [9][1320/3239]	Time 0.241 (0.536)	Data Time 0.001 (0.022)	Loss 3.8664 (3.8096)	Entropy 1.85121 (1.85432)	Top-1 acc 33.594 (34.698)	Top-5 acc 57.812 (58.549)	lr 0.02470
Train [9][1330/3239]	Time 2.079 (0.535)	Data Time 0.001 (0.022)	Loss 3.8231 (3.8097)	Entropy 1.85121 (1.85430)	Top-1 acc 37.891 (34.696)	Top-5 acc 60.156 (58.549)	lr 0.02470
Train [9][1340/3239]	Time 0.248 (0.532)	Data Time 0.001 (0.022)	Loss 3.7381 (3.8103)	Entropy 1.85114 (1.85428)	Top-1 acc 30.859 (34.677)	Top-5 acc 58.984 (58.530)	lr 0.02470
Train [9][1350/3239]	Time 0.209 (0.532)	Data Time 0.002 (0.022)	Loss 3.6806 (3.8095)	Entropy 1.85112 (1.85425)	Top-1 acc 37.109 (34.691)	Top-5 acc 63.281 (58.545)	lr 0.02470
Train [9][1360/3239]	Time 0.207 (0.531)	Data Time 0.001 (0.022)	Loss 3.8871 (3.8096)	Entropy 1.85109 (1.85423)	Top-1 acc 34.375 (34.692)	Top-5 acc 55.859 (58.541)	lr 0.02470
Train [9][1370/3239]	Time 0.194 (0.530)	Data Time 0.002 (0.021)	Loss 3.7215 (3.8095)	Entropy 1.85098 (1.85421)	Top-1 acc 35.938 (34.693)	Top-5 acc 58.984 (58.548)	lr 0.02470
Train [9][1380/3239]	Time 0.239 (0.529)	Data Time 0.001 (0.021)	Loss 3.8067 (3.8093)	Entropy 1.85093 (1.85418)	Top-1 acc 35.156 (34.702)	Top-5 acc 56.250 (58.555)	lr 0.02470
Train [9][1390/3239]	Time 0.239 (0.528)	Data Time 0.001 (0.021)	Loss 4.0476 (3.8089)	Entropy 1.85084 (1.85416)	Top-1 acc 31.641 (34.716)	Top-5 acc 52.734 (58.560)	lr 0.02470
Train [9][1400/3239]	Time 0.298 (0.528)	Data Time 0.001 (0.021)	Loss 3.7727 (3.8088)	Entropy 1.85081 (1.85413)	Top-1 acc 35.938 (34.721)	Top-5 acc 58.594 (58.566)	lr 0.02470
Train [9][1410/3239]	Time 0.156 (0.527)	Data Time 0.001 (0.021)	Loss 3.8748 (3.8091)	Entropy 1.85080 (1.85411)	Top-1 acc 32.031 (34.713)	Top-5 acc 56.641 (58.560)	lr 0.02470
Train [9][1420/3239]	Time 0.129 (0.526)	Data Time 0.001 (0.021)	Loss 3.7559 (3.8096)	Entropy 1.85078 (1.85409)	Top-1 acc 35.938 (34.703)	Top-5 acc 57.422 (58.546)	lr 0.02470
Train [9][1430/3239]	Time 0.254 (0.525)	Data Time 0.001 (0.021)	Loss 3.8449 (3.8093)	Entropy 1.85073 (1.85406)	Top-1 acc 35.938 (34.705)	Top-5 acc 57.031 (58.552)	lr 0.02470
Train [9][1440/3239]	Time 2.214 (0.524)	Data Time 0.001 (0.021)	Loss 3.9266 (3.8095)	Entropy 1.85073 (1.85404)	Top-1 acc 34.375 (34.703)	Top-5 acc 55.078 (58.538)	lr 0.02470
Train [9][1450/3239]	Time 0.204 (0.522)	Data Time 0.002 (0.020)	Loss 3.8029 (3.8095)	Entropy 1.85065 (1.85402)	Top-1 acc 35.156 (34.705)	Top-5 acc 61.328 (58.538)	lr 0.02470
Train [9][1460/3239]	Time 0.210 (0.521)	Data Time 0.001 (0.020)	Loss 3.8568 (3.8099)	Entropy 1.85058 (1.85399)	Top-1 acc 36.328 (34.701)	Top-5 acc 60.547 (58.527)	lr 0.02470
Train [9][1470/3239]	Time 0.203 (0.521)	Data Time 0.001 (0.020)	Loss 3.8316 (3.8101)	Entropy 1.85045 (1.85397)	Top-1 acc 32.812 (34.688)	Top-5 acc 56.250 (58.526)	lr 0.02470
Train [9][1480/3239]	Time 0.137 (0.520)	Data Time 0.001 (0.020)	Loss 4.0107 (3.8106)	Entropy 1.85039 (1.85395)	Top-1 acc 33.203 (34.684)	Top-5 acc 53.125 (58.517)	lr 0.02469
Train [9][1490/3239]	Time 0.211 (0.519)	Data Time 0.001 (0.020)	Loss 3.7587 (3.8108)	Entropy 1.85035 (1.85392)	Top-1 acc 31.641 (34.676)	Top-5 acc 55.859 (58.507)	lr 0.02469
Train [9][1500/3239]	Time 0.326 (0.519)	Data Time 0.001 (0.020)	Loss 3.8491 (3.8107)	Entropy 1.85034 (1.85390)	Top-1 acc 30.859 (34.666)	Top-5 acc 55.078 (58.513)	lr 0.02469
Train [9][1510/3239]	Time 0.155 (0.518)	Data Time 0.001 (0.020)	Loss 3.8052 (3.8102)	Entropy 1.85031 (1.85387)	Top-1 acc 37.891 (34.678)	Top-5 acc 58.594 (58.527)	lr 0.02469
Train [9][1520/3239]	Time 0.195 (0.517)	Data Time 0.001 (0.020)	Loss 3.8792 (3.8101)	Entropy 1.85013 (1.85385)	Top-1 acc 31.250 (34.683)	Top-5 acc 57.422 (58.534)	lr 0.02469
Train [9][1530/3239]	Time 0.222 (0.517)	Data Time 0.002 (0.019)	Loss 3.9615 (3.8102)	Entropy 1.85011 (1.85383)	Top-1 acc 30.469 (34.678)	Top-5 acc 54.297 (58.531)	lr 0.02469
Train [9][1540/3239]	Time 0.188 (0.516)	Data Time 0.001 (0.019)	Loss 3.8536 (3.8101)	Entropy 1.85004 (1.85380)	Top-1 acc 34.375 (34.682)	Top-5 acc 54.688 (58.535)	lr 0.02469
Train [9][1550/3239]	Time 2.250 (0.515)	Data Time 0.001 (0.019)	Loss 3.7719 (3.8101)	Entropy 1.85004 (1.85378)	Top-1 acc 37.891 (34.685)	Top-5 acc 59.375 (58.529)	lr 0.02469
Train [9][1560/3239]	Time 0.182 (0.513)	Data Time 0.001 (0.019)	Loss 3.8646 (3.8097)	Entropy 1.84999 (1.85375)	Top-1 acc 32.812 (34.686)	Top-5 acc 55.469 (58.540)	lr 0.02469
Train [9][1570/3239]	Time 0.213 (0.513)	Data Time 0.001 (0.019)	Loss 3.6302 (3.8096)	Entropy 1.84997 (1.85373)	Top-1 acc 40.234 (34.687)	Top-5 acc 61.328 (58.542)	lr 0.02469
Train [9][1580/3239]	Time 0.206 (0.512)	Data Time 0.001 (0.019)	Loss 3.8373 (3.8095)	Entropy 1.84989 (1.85371)	Top-1 acc 36.328 (34.685)	Top-5 acc 58.984 (58.543)	lr 0.02469
Train [9][1590/3239]	Time 0.295 (0.512)	Data Time 0.001 (0.019)	Loss 3.9200 (3.8096)	Entropy 1.84983 (1.85368)	Top-1 acc 32.812 (34.684)	Top-5 acc 58.594 (58.539)	lr 0.02469
Train [9][1600/3239]	Time 0.273 (0.534)	Data Time 0.004 (0.019)	Loss 3.9950 (3.8099)	Entropy 1.84977 (1.85366)	Top-1 acc 33.203 (34.679)	Top-5 acc 54.688 (58.529)	lr 0.02469
Train [9][1610/3239]	Time 0.182 (0.533)	Data Time 0.002 (0.019)	Loss 3.7058 (3.8099)	Entropy 1.84968 (1.85363)	Top-1 acc 38.281 (34.685)	Top-5 acc 61.328 (58.534)	lr 0.02469
Train [9][1620/3239]	Time 0.222 (0.532)	Data Time 0.002 (0.019)	Loss 3.8901 (3.8100)	Entropy 1.84965 (1.85361)	Top-1 acc 32.031 (34.691)	Top-5 acc 57.031 (58.534)	lr 0.02469
Train [9][1630/3239]	Time 0.152 (0.532)	Data Time 0.002 (0.018)	Loss 3.7570 (3.8099)	Entropy 1.84962 (1.85358)	Top-1 acc 33.984 (34.688)	Top-5 acc 58.203 (58.535)	lr 0.02469
Train [9][1640/3239]	Time 0.169 (0.531)	Data Time 0.002 (0.018)	Loss 3.6639 (3.8102)	Entropy 1.84956 (1.85356)	Top-1 acc 36.328 (34.681)	Top-5 acc 62.500 (58.533)	lr 0.02469
Train [9][1650/3239]	Time 0.195 (0.530)	Data Time 0.002 (0.018)	Loss 3.8625 (3.8100)	Entropy 1.84950 (1.85354)	Top-1 acc 34.766 (34.681)	Top-5 acc 58.594 (58.541)	lr 0.02469
Train [9][1660/3239]	Time 2.142 (0.529)	Data Time 0.001 (0.018)	Loss 3.8963 (3.8101)	Entropy 1.84950 (1.85351)	Top-1 acc 35.547 (34.681)	Top-5 acc 58.594 (58.544)	lr 0.02469
Train [9][1670/3239]	Time 0.252 (0.527)	Data Time 0.002 (0.018)	Loss 3.8492 (3.8099)	Entropy 1.84944 (1.85349)	Top-1 acc 33.984 (34.684)	Top-5 acc 61.328 (58.550)	lr 0.02469
Train [9][1680/3239]	Time 0.253 (0.527)	Data Time 0.001 (0.018)	Loss 3.7327 (3.8098)	Entropy 1.84941 (1.85346)	Top-1 acc 33.984 (34.686)	Top-5 acc 60.156 (58.549)	lr 0.02469
Train [9][1690/3239]	Time 0.201 (0.526)	Data Time 0.001 (0.018)	Loss 3.9057 (3.8100)	Entropy 1.84934 (1.85344)	Top-1 acc 32.812 (34.680)	Top-5 acc 53.906 (58.536)	lr 0.02469
Train [9][1700/3239]	Time 0.205 (0.526)	Data Time 0.001 (0.018)	Loss 3.9100 (3.8097)	Entropy 1.84930 (1.85341)	Top-1 acc 33.984 (34.691)	Top-5 acc 55.859 (58.543)	lr 0.02469
Train [9][1710/3239]	Time 0.180 (0.525)	Data Time 0.002 (0.018)	Loss 3.7643 (3.8098)	Entropy 1.84929 (1.85339)	Top-1 acc 38.281 (34.687)	Top-5 acc 59.375 (58.541)	lr 0.02469
Train [9][1720/3239]	Time 0.202 (0.524)	Data Time 0.001 (0.018)	Loss 3.7617 (3.8096)	Entropy 1.84919 (1.85337)	Top-1 acc 33.984 (34.689)	Top-5 acc 62.891 (58.546)	lr 0.02469
Train [9][1730/3239]	Time 0.197 (0.524)	Data Time 0.001 (0.018)	Loss 3.6487 (3.8099)	Entropy 1.84912 (1.85334)	Top-1 acc 37.109 (34.690)	Top-5 acc 60.547 (58.537)	lr 0.02469
Train [9][1740/3239]	Time 0.240 (0.523)	Data Time 0.001 (0.018)	Loss 3.6665 (3.8103)	Entropy 1.84906 (1.85332)	Top-1 acc 35.547 (34.688)	Top-5 acc 60.938 (58.533)	lr 0.02469
Train [9][1750/3239]	Time 0.200 (0.522)	Data Time 0.002 (0.017)	Loss 3.7497 (3.8102)	Entropy 1.84901 (1.85329)	Top-1 acc 34.375 (34.689)	Top-5 acc 64.062 (58.537)	lr 0.02469
Train [9][1760/3239]	Time 0.253 (0.522)	Data Time 0.001 (0.017)	Loss 3.9112 (3.8101)	Entropy 1.84896 (1.85327)	Top-1 acc 30.469 (34.685)	Top-5 acc 56.641 (58.535)	lr 0.02469
Train [9][1770/3239]	Time 2.286 (0.521)	Data Time 0.001 (0.017)	Loss 4.0429 (3.8103)	Entropy 1.84896 (1.85324)	Top-1 acc 31.641 (34.687)	Top-5 acc 55.078 (58.530)	lr 0.02469
Train [9][1780/3239]	Time 0.320 (0.519)	Data Time 0.001 (0.017)	Loss 3.7891 (3.8105)	Entropy 1.84896 (1.85322)	Top-1 acc 35.938 (34.684)	Top-5 acc 59.375 (58.532)	lr 0.02469
Train [9][1790/3239]	Time 0.157 (0.519)	Data Time 0.001 (0.017)	Loss 3.9123 (3.8106)	Entropy 1.84893 (1.85320)	Top-1 acc 29.297 (34.684)	Top-5 acc 55.469 (58.526)	lr 0.02469
Train [9][1800/3239]	Time 0.206 (0.518)	Data Time 0.002 (0.017)	Loss 3.8235 (3.8106)	Entropy 1.84888 (1.85317)	Top-1 acc 34.375 (34.682)	Top-5 acc 57.422 (58.527)	lr 0.02469
Train [9][1810/3239]	Time 0.227 (0.518)	Data Time 0.001 (0.017)	Loss 3.6208 (3.8106)	Entropy 1.84886 (1.85315)	Top-1 acc 36.328 (34.683)	Top-5 acc 62.109 (58.525)	lr 0.02469
Train [9][1820/3239]	Time 0.227 (0.517)	Data Time 0.001 (0.017)	Loss 3.6920 (3.8106)	Entropy 1.84881 (1.85312)	Top-1 acc 37.500 (34.684)	Top-5 acc 62.500 (58.523)	lr 0.02469
Train [9][1830/3239]	Time 0.212 (0.517)	Data Time 0.001 (0.017)	Loss 4.0060 (3.8107)	Entropy 1.84872 (1.85310)	Top-1 acc 31.641 (34.682)	Top-5 acc 53.906 (58.518)	lr 0.02469
Train [9][1840/3239]	Time 0.218 (0.516)	Data Time 0.001 (0.017)	Loss 3.8238 (3.8107)	Entropy 1.84868 (1.85308)	Top-1 acc 37.500 (34.684)	Top-5 acc 56.250 (58.512)	lr 0.02469
Train [9][1850/3239]	Time 0.206 (0.515)	Data Time 0.001 (0.017)	Loss 3.7987 (3.8105)	Entropy 1.84862 (1.85305)	Top-1 acc 35.938 (34.691)	Top-5 acc 59.766 (58.515)	lr 0.02469
Train [9][1860/3239]	Time 0.258 (0.515)	Data Time 0.001 (0.017)	Loss 3.7134 (3.8110)	Entropy 1.84854 (1.85303)	Top-1 acc 39.453 (34.687)	Top-5 acc 60.547 (58.512)	lr 0.02469
Train [9][1870/3239]	Time 0.318 (0.514)	Data Time 0.001 (0.016)	Loss 3.6046 (3.8108)	Entropy 1.84852 (1.85301)	Top-1 acc 35.938 (34.686)	Top-5 acc 65.234 (58.515)	lr 0.02469
Train [9][1880/3239]	Time 2.072 (0.514)	Data Time 0.001 (0.016)	Loss 3.7336 (3.8107)	Entropy 1.84852 (1.85298)	Top-1 acc 38.281 (34.692)	Top-5 acc 58.594 (58.513)	lr 0.02469
Train [9][1890/3239]	Time 0.223 (0.512)	Data Time 0.001 (0.016)	Loss 3.5247 (3.8107)	Entropy 1.84848 (1.85296)	Top-1 acc 39.062 (34.686)	Top-5 acc 64.062 (58.519)	lr 0.02469
Train [9][1900/3239]	Time 0.219 (0.512)	Data Time 0.001 (0.016)	Loss 3.7575 (3.8108)	Entropy 1.84846 (1.85293)	Top-1 acc 38.672 (34.683)	Top-5 acc 59.375 (58.519)	lr 0.02469
Train [9][1910/3239]	Time 0.206 (0.511)	Data Time 0.001 (0.016)	Loss 3.8000 (3.8107)	Entropy 1.84846 (1.85291)	Top-1 acc 34.766 (34.690)	Top-5 acc 57.031 (58.519)	lr 0.02469
Train [9][1920/3239]	Time 0.241 (0.511)	Data Time 0.002 (0.016)	Loss 3.8033 (3.8108)	Entropy 1.84844 (1.85289)	Top-1 acc 35.547 (34.685)	Top-5 acc 62.891 (58.521)	lr 0.02469
Train [9][1930/3239]	Time 0.215 (0.510)	Data Time 0.001 (0.016)	Loss 3.8331 (3.8108)	Entropy 1.84839 (1.85286)	Top-1 acc 31.641 (34.682)	Top-5 acc 59.375 (58.525)	lr 0.02468
Train [9][1940/3239]	Time 0.151 (0.510)	Data Time 0.001 (0.016)	Loss 3.8599 (3.8106)	Entropy 1.84837 (1.85284)	Top-1 acc 30.859 (34.683)	Top-5 acc 55.859 (58.527)	lr 0.02468
Train [9][1950/3239]	Time 0.223 (0.509)	Data Time 0.001 (0.016)	Loss 3.7333 (3.8102)	Entropy 1.84832 (1.85282)	Top-1 acc 35.156 (34.686)	Top-5 acc 59.375 (58.539)	lr 0.02468
Train [9][1960/3239]	Time 0.334 (0.528)	Data Time 0.003 (0.016)	Loss 3.7240 (3.8099)	Entropy 1.84829 (1.85280)	Top-1 acc 35.156 (34.694)	Top-5 acc 60.156 (58.547)	lr 0.02468
Train [9][1970/3239]	Time 0.195 (0.528)	Data Time 0.002 (0.016)	Loss 4.0115 (3.8102)	Entropy 1.84824 (1.85277)	Top-1 acc 31.641 (34.687)	Top-5 acc 54.297 (58.539)	lr 0.02468
Train [9][1980/3239]	Time 0.161 (0.527)	Data Time 0.001 (0.016)	Loss 4.0223 (3.8103)	Entropy 1.84820 (1.85275)	Top-1 acc 33.984 (34.689)	Top-5 acc 52.734 (58.532)	lr 0.02468
Train [9][1990/3239]	Time 2.212 (0.527)	Data Time 0.001 (0.016)	Loss 3.8338 (3.8104)	Entropy 1.84820 (1.85273)	Top-1 acc 38.672 (34.686)	Top-5 acc 58.594 (58.528)	lr 0.02468
Train [9][2000/3239]	Time 0.205 (0.525)	Data Time 0.001 (0.015)	Loss 3.7856 (3.8105)	Entropy 1.84809 (1.85270)	Top-1 acc 37.109 (34.683)	Top-5 acc 57.812 (58.522)	lr 0.02468
Train [9][2010/3239]	Time 0.194 (0.524)	Data Time 0.001 (0.015)	Loss 3.7753 (3.8104)	Entropy 1.84802 (1.85268)	Top-1 acc 36.328 (34.685)	Top-5 acc 60.156 (58.520)	lr 0.02468
Train [9][2020/3239]	Time 0.219 (0.524)	Data Time 0.002 (0.015)	Loss 4.0072 (3.8102)	Entropy 1.84802 (1.85266)	Top-1 acc 27.734 (34.685)	Top-5 acc 55.859 (58.521)	lr 0.02468
Train [9][2030/3239]	Time 0.207 (0.523)	Data Time 0.003 (0.015)	Loss 3.6619 (3.8099)	Entropy 1.84800 (1.85263)	Top-1 acc 37.500 (34.693)	Top-5 acc 62.109 (58.529)	lr 0.02468
Train [9][2040/3239]	Time 0.278 (0.523)	Data Time 0.001 (0.015)	Loss 4.0171 (3.8103)	Entropy 1.84791 (1.85261)	Top-1 acc 31.250 (34.692)	Top-5 acc 54.297 (58.525)	lr 0.02468
Train [9][2050/3239]	Time 0.190 (0.522)	Data Time 0.001 (0.015)	Loss 3.7854 (3.8101)	Entropy 1.84787 (1.85259)	Top-1 acc 35.938 (34.693)	Top-5 acc 60.156 (58.529)	lr 0.02468
Train [9][2060/3239]	Time 0.200 (0.522)	Data Time 0.001 (0.015)	Loss 3.6644 (3.8101)	Entropy 1.84784 (1.85257)	Top-1 acc 34.766 (34.689)	Top-5 acc 60.938 (58.529)	lr 0.02468
Train [9][2070/3239]	Time 0.200 (0.521)	Data Time 0.001 (0.015)	Loss 3.7740 (3.8101)	Entropy 1.84778 (1.85254)	Top-1 acc 36.719 (34.691)	Top-5 acc 58.203 (58.528)	lr 0.02468
Train [9][2080/3239]	Time 0.202 (0.521)	Data Time 0.001 (0.015)	Loss 4.0287 (3.8104)	Entropy 1.84777 (1.85252)	Top-1 acc 30.859 (34.680)	Top-5 acc 50.391 (58.520)	lr 0.02468
Train [9][2090/3239]	Time 0.232 (0.520)	Data Time 0.001 (0.015)	Loss 3.8021 (3.8106)	Entropy 1.84763 (1.85250)	Top-1 acc 35.938 (34.681)	Top-5 acc 56.250 (58.515)	lr 0.02468
Train [9][2100/3239]	Time 2.178 (0.520)	Data Time 0.001 (0.015)	Loss 3.7287 (3.8106)	Entropy 1.84763 (1.85247)	Top-1 acc 38.672 (34.682)	Top-5 acc 59.375 (58.516)	lr 0.02468
Train [9][2110/3239]	Time 0.243 (0.518)	Data Time 0.002 (0.015)	Loss 3.5294 (3.8102)	Entropy 1.84756 (1.85245)	Top-1 acc 39.844 (34.692)	Top-5 acc 63.672 (58.527)	lr 0.02468
Train [9][2120/3239]	Time 0.192 (0.518)	Data Time 0.001 (0.015)	Loss 3.9563 (3.8103)	Entropy 1.84748 (1.85243)	Top-1 acc 32.031 (34.691)	Top-5 acc 54.297 (58.528)	lr 0.02468
Train [9][2130/3239]	Time 0.204 (0.518)	Data Time 0.001 (0.015)	Loss 3.9663 (3.8100)	Entropy 1.84745 (1.85240)	Top-1 acc 30.859 (34.697)	Top-5 acc 56.641 (58.532)	lr 0.02468
Train [9][2140/3239]	Time 0.215 (0.517)	Data Time 0.001 (0.015)	Loss 3.6872 (3.8100)	Entropy 1.84740 (1.85238)	Top-1 acc 37.500 (34.701)	Top-5 acc 62.500 (58.535)	lr 0.02468
Train [9][2150/3239]	Time 0.279 (0.516)	Data Time 0.001 (0.015)	Loss 3.6924 (3.8103)	Entropy 1.84730 (1.85236)	Top-1 acc 36.328 (34.691)	Top-5 acc 60.938 (58.531)	lr 0.02468
Train [9][2160/3239]	Time 0.220 (0.516)	Data Time 0.001 (0.014)	Loss 3.6120 (3.8100)	Entropy 1.84724 (1.85233)	Top-1 acc 41.797 (34.707)	Top-5 acc 62.109 (58.540)	lr 0.02468
Train [9][2170/3239]	Time 0.173 (0.516)	Data Time 0.001 (0.014)	Loss 3.8624 (3.8101)	Entropy 1.84720 (1.85231)	Top-1 acc 33.203 (34.713)	Top-5 acc 54.688 (58.544)	lr 0.02468
Train [9][2180/3239]	Time 0.195 (0.515)	Data Time 0.001 (0.014)	Loss 3.6796 (3.8101)	Entropy 1.84717 (1.85229)	Top-1 acc 37.109 (34.714)	Top-5 acc 59.766 (58.547)	lr 0.02468
Train [9][2190/3239]	Time 0.203 (0.515)	Data Time 0.001 (0.014)	Loss 3.6480 (3.8100)	Entropy 1.84714 (1.85226)	Top-1 acc 35.156 (34.706)	Top-5 acc 63.281 (58.547)	lr 0.02468
Train [9][2200/3239]	Time 0.283 (0.514)	Data Time 0.001 (0.014)	Loss 3.9121 (3.8098)	Entropy 1.84706 (1.85224)	Top-1 acc 31.250 (34.706)	Top-5 acc 56.250 (58.555)	lr 0.02468
Train [9][2210/3239]	Time 2.086 (0.514)	Data Time 0.001 (0.014)	Loss 3.9120 (3.8096)	Entropy 1.84706 (1.85222)	Top-1 acc 31.250 (34.715)	Top-5 acc 58.594 (58.563)	lr 0.02468
Train [9][2220/3239]	Time 0.345 (0.513)	Data Time 0.001 (0.014)	Loss 4.1840 (3.8094)	Entropy 1.84697 (1.85219)	Top-1 acc 29.688 (34.720)	Top-5 acc 52.344 (58.570)	lr 0.02468
Train [9][2230/3239]	Time 0.194 (0.512)	Data Time 0.001 (0.014)	Loss 3.9759 (3.8094)	Entropy 1.84694 (1.85217)	Top-1 acc 32.422 (34.719)	Top-5 acc 54.688 (58.568)	lr 0.02468
Train [9][2240/3239]	Time 0.245 (0.512)	Data Time 0.001 (0.014)	Loss 3.7152 (3.8093)	Entropy 1.84692 (1.85215)	Top-1 acc 36.719 (34.720)	Top-5 acc 60.938 (58.574)	lr 0.02468
Train [9][2250/3239]	Time 0.209 (0.512)	Data Time 0.001 (0.014)	Loss 3.8512 (3.8093)	Entropy 1.84686 (1.85212)	Top-1 acc 31.250 (34.719)	Top-5 acc 57.422 (58.571)	lr 0.02468
Train [9][2260/3239]	Time 0.227 (0.511)	Data Time 0.002 (0.014)	Loss 3.6698 (3.8089)	Entropy 1.84680 (1.85210)	Top-1 acc 37.500 (34.729)	Top-5 acc 58.984 (58.578)	lr 0.02468
Train [9][2270/3239]	Time 0.180 (0.511)	Data Time 0.002 (0.014)	Loss 4.0060 (3.8091)	Entropy 1.84669 (1.85208)	Top-1 acc 30.469 (34.724)	Top-5 acc 55.859 (58.578)	lr 0.02468
Train [9][2280/3239]	Time 0.206 (0.511)	Data Time 0.001 (0.014)	Loss 3.6416 (3.8088)	Entropy 1.84663 (1.85205)	Top-1 acc 35.938 (34.725)	Top-5 acc 63.281 (58.581)	lr 0.02468
Train [9][2290/3239]	Time 0.284 (0.511)	Data Time 0.002 (0.014)	Loss 3.7858 (3.8086)	Entropy 1.84655 (1.85203)	Top-1 acc 36.328 (34.732)	Top-5 acc 58.984 (58.587)	lr 0.02468
Train [9][2300/3239]	Time 0.236 (0.510)	Data Time 0.001 (0.014)	Loss 3.8742 (3.8086)	Entropy 1.84652 (1.85200)	Top-1 acc 35.156 (34.732)	Top-5 acc 58.203 (58.588)	lr 0.02468
Train [9][2310/3239]	Time 0.371 (0.510)	Data Time 0.001 (0.014)	Loss 3.9083 (3.8085)	Entropy 1.84651 (1.85198)	Top-1 acc 33.594 (34.734)	Top-5 acc 60.156 (58.594)	lr 0.02468
Train [9][2320/3239]	Time 41.919 (0.527)	Data Time 0.001 (0.014)	Loss 3.8798 (3.8083)	Entropy 1.84651 (1.85196)	Top-1 acc 30.859 (34.737)	Top-5 acc 55.859 (58.599)	lr 0.02468
Train [9][2330/3239]	Time 0.156 (0.526)	Data Time 0.002 (0.014)	Loss 3.8637 (3.8084)	Entropy 1.84648 (1.85193)	Top-1 acc 33.203 (34.741)	Top-5 acc 55.859 (58.598)	lr 0.02468
Train [9][2340/3239]	Time 0.181 (0.525)	Data Time 0.002 (0.014)	Loss 3.7641 (3.8081)	Entropy 1.84638 (1.85191)	Top-1 acc 34.375 (34.744)	Top-5 acc 61.719 (58.606)	lr 0.02468
Train [9][2350/3239]	Time 0.207 (0.525)	Data Time 0.001 (0.014)	Loss 3.9441 (3.8085)	Entropy 1.84635 (1.85189)	Top-1 acc 33.594 (34.734)	Top-5 acc 57.422 (58.595)	lr 0.02468
Train [9][2360/3239]	Time 0.210 (0.524)	Data Time 0.001 (0.013)	Loss 3.7786 (3.8084)	Entropy 1.84623 (1.85186)	Top-1 acc 33.984 (34.738)	Top-5 acc 60.938 (58.596)	lr 0.02468
Train [9][2370/3239]	Time 0.185 (0.524)	Data Time 0.001 (0.013)	Loss 3.6958 (3.8084)	Entropy 1.84610 (1.85184)	Top-1 acc 37.500 (34.736)	Top-5 acc 57.422 (58.589)	lr 0.02467
Train [9][2380/3239]	Time 0.137 (0.523)	Data Time 0.001 (0.013)	Loss 3.7796 (3.8082)	Entropy 1.84610 (1.85181)	Top-1 acc 35.156 (34.740)	Top-5 acc 60.156 (58.593)	lr 0.02467
Train [9][2390/3239]	Time 0.186 (0.523)	Data Time 0.001 (0.013)	Loss 3.6406 (3.8078)	Entropy 1.84605 (1.85179)	Top-1 acc 39.062 (34.747)	Top-5 acc 60.547 (58.602)	lr 0.02467
Train [9][2400/3239]	Time 0.272 (0.523)	Data Time 0.002 (0.013)	Loss 3.7802 (3.8078)	Entropy 1.84601 (1.85177)	Top-1 acc 36.328 (34.749)	Top-5 acc 59.375 (58.602)	lr 0.02467
Train [9][2410/3239]	Time 0.251 (0.522)	Data Time 0.001 (0.013)	Loss 3.7844 (3.8081)	Entropy 1.84598 (1.85174)	Top-1 acc 31.641 (34.744)	Top-5 acc 59.375 (58.595)	lr 0.02467
Train [9][2420/3239]	Time 0.268 (0.522)	Data Time 0.001 (0.013)	Loss 3.8624 (3.8078)	Entropy 1.84596 (1.85172)	Top-1 acc 37.891 (34.748)	Top-5 acc 58.203 (58.601)	lr 0.02467
Train [9][2430/3239]	Time 2.204 (0.521)	Data Time 0.001 (0.013)	Loss 3.8939 (3.8078)	Entropy 1.84596 (1.85169)	Top-1 acc 33.203 (34.750)	Top-5 acc 53.906 (58.601)	lr 0.02467
Train [9][2440/3239]	Time 0.252 (0.520)	Data Time 0.001 (0.013)	Loss 3.8968 (3.8078)	Entropy 1.84586 (1.85167)	Top-1 acc 32.031 (34.748)	Top-5 acc 57.422 (58.602)	lr 0.02467
Train [9][2450/3239]	Time 0.203 (0.520)	Data Time 0.001 (0.013)	Loss 3.7323 (3.8078)	Entropy 1.84584 (1.85165)	Top-1 acc 33.984 (34.751)	Top-5 acc 59.375 (58.605)	lr 0.02467
Train [9][2460/3239]	Time 0.211 (0.519)	Data Time 0.001 (0.013)	Loss 3.8010 (3.8077)	Entropy 1.84578 (1.85162)	Top-1 acc 36.328 (34.753)	Top-5 acc 59.766 (58.609)	lr 0.02467
Train [9][2470/3239]	Time 0.236 (0.519)	Data Time 0.002 (0.013)	Loss 3.5926 (3.8077)	Entropy 1.84576 (1.85160)	Top-1 acc 36.328 (34.749)	Top-5 acc 60.156 (58.607)	lr 0.02467
Train [9][2480/3239]	Time 0.188 (0.518)	Data Time 0.001 (0.013)	Loss 3.6842 (3.8077)	Entropy 1.84573 (1.85158)	Top-1 acc 34.375 (34.747)	Top-5 acc 59.766 (58.606)	lr 0.02467
Train [9][2490/3239]	Time 0.268 (0.518)	Data Time 0.001 (0.013)	Loss 3.8405 (3.8076)	Entropy 1.84571 (1.85155)	Top-1 acc 32.422 (34.746)	Top-5 acc 57.812 (58.606)	lr 0.02467
Train [9][2500/3239]	Time 0.347 (0.518)	Data Time 0.001 (0.013)	Loss 3.8464 (3.8076)	Entropy 1.84570 (1.85153)	Top-1 acc 33.984 (34.741)	Top-5 acc 59.766 (58.610)	lr 0.02467
Train [9][2510/3239]	Time 0.134 (0.517)	Data Time 0.001 (0.013)	Loss 3.7796 (3.8075)	Entropy 1.84568 (1.85151)	Top-1 acc 35.156 (34.741)	Top-5 acc 58.984 (58.614)	lr 0.02467
Train [9][2520/3239]	Time 0.256 (0.517)	Data Time 0.001 (0.013)	Loss 3.8312 (3.8076)	Entropy 1.84563 (1.85148)	Top-1 acc 32.422 (34.737)	Top-5 acc 57.031 (58.615)	lr 0.02467
Train [9][2530/3239]	Time 0.144 (0.517)	Data Time 0.001 (0.013)	Loss 4.0208 (3.8075)	Entropy 1.84558 (1.85146)	Top-1 acc 30.078 (34.736)	Top-5 acc 53.516 (58.614)	lr 0.02467
Train [9][2540/3239]	Time 2.204 (0.516)	Data Time 0.001 (0.013)	Loss 3.6658 (3.8073)	Entropy 1.84558 (1.85144)	Top-1 acc 37.109 (34.735)	Top-5 acc 60.938 (58.619)	lr 0.02467
Train [9][2550/3239]	Time 0.220 (0.515)	Data Time 0.002 (0.013)	Loss 3.7403 (3.8071)	Entropy 1.84555 (1.85141)	Top-1 acc 32.422 (34.736)	Top-5 acc 61.719 (58.623)	lr 0.02467
Train [9][2560/3239]	Time 0.198 (0.515)	Data Time 0.001 (0.013)	Loss 3.7250 (3.8071)	Entropy 1.84548 (1.85139)	Top-1 acc 35.156 (34.739)	Top-5 acc 60.938 (58.624)	lr 0.02467
Train [9][2570/3239]	Time 0.235 (0.514)	Data Time 0.001 (0.013)	Loss 3.9888 (3.8072)	Entropy 1.84546 (1.85137)	Top-1 acc 33.594 (34.735)	Top-5 acc 54.297 (58.620)	lr 0.02467
Train [9][2580/3239]	Time 0.215 (0.514)	Data Time 0.001 (0.013)	Loss 3.6103 (3.8069)	Entropy 1.84546 (1.85134)	Top-1 acc 44.141 (34.742)	Top-5 acc 64.453 (58.629)	lr 0.02467
Train [9][2590/3239]	Time 0.271 (0.513)	Data Time 0.001 (0.013)	Loss 3.7797 (3.8068)	Entropy 1.84529 (1.85132)	Top-1 acc 33.203 (34.743)	Top-5 acc 57.812 (58.629)	lr 0.02467
Train [9][2600/3239]	Time 0.195 (0.513)	Data Time 0.001 (0.012)	Loss 3.7062 (3.8067)	Entropy 1.84526 (1.85130)	Top-1 acc 39.062 (34.747)	Top-5 acc 62.500 (58.631)	lr 0.02467
Train [9][2610/3239]	Time 0.192 (0.513)	Data Time 0.001 (0.012)	Loss 3.6174 (3.8065)	Entropy 1.84524 (1.85127)	Top-1 acc 41.797 (34.755)	Top-5 acc 62.500 (58.636)	lr 0.02467
Train [9][2620/3239]	Time 0.230 (0.512)	Data Time 0.002 (0.012)	Loss 3.8318 (3.8065)	Entropy 1.84519 (1.85125)	Top-1 acc 32.812 (34.754)	Top-5 acc 58.984 (58.635)	lr 0.02467
Train [9][2630/3239]	Time 0.230 (0.512)	Data Time 0.001 (0.012)	Loss 3.6571 (3.8066)	Entropy 1.84515 (1.85123)	Top-1 acc 35.938 (34.754)	Top-5 acc 60.938 (58.633)	lr 0.02467
Train [9][2640/3239]	Time 0.179 (0.512)	Data Time 0.001 (0.012)	Loss 3.7940 (3.8066)	Entropy 1.84510 (1.85121)	Top-1 acc 37.891 (34.751)	Top-5 acc 58.594 (58.636)	lr 0.02467
Train [9][2650/3239]	Time 0.212 (0.511)	Data Time 0.001 (0.012)	Loss 3.7511 (3.8064)	Entropy 1.84502 (1.85118)	Top-1 acc 37.891 (34.759)	Top-5 acc 60.156 (58.638)	lr 0.02467
Train [9][2660/3239]	Time 0.199 (0.511)	Data Time 0.001 (0.012)	Loss 3.7491 (3.8063)	Entropy 1.84500 (1.85116)	Top-1 acc 37.109 (34.760)	Top-5 acc 60.547 (58.641)	lr 0.02467
Train [9][2670/3239]	Time 0.164 (0.510)	Data Time 0.001 (0.012)	Loss 3.6747 (3.8063)	Entropy 1.84496 (1.85114)	Top-1 acc 39.844 (34.761)	Top-5 acc 63.672 (58.645)	lr 0.02467
Train [9][2680/3239]	Time 0.333 (0.525)	Data Time 0.003 (0.012)	Loss 3.8163 (3.8066)	Entropy 1.84490 (1.85111)	Top-1 acc 32.812 (34.755)	Top-5 acc 54.688 (58.639)	lr 0.02467
Train [9][2690/3239]	Time 0.193 (0.524)	Data Time 0.002 (0.012)	Loss 3.7260 (3.8066)	Entropy 1.84488 (1.85109)	Top-1 acc 35.938 (34.756)	Top-5 acc 61.719 (58.640)	lr 0.02467
Train [9][2700/3239]	Time 0.255 (0.524)	Data Time 0.001 (0.012)	Loss 3.7259 (3.8064)	Entropy 1.84474 (1.85107)	Top-1 acc 35.547 (34.757)	Top-5 acc 58.594 (58.642)	lr 0.02467
Train [9][2710/3239]	Time 0.211 (0.524)	Data Time 0.002 (0.012)	Loss 3.7176 (3.8066)	Entropy 1.84470 (1.85104)	Top-1 acc 39.453 (34.751)	Top-5 acc 61.328 (58.638)	lr 0.02467
Train [9][2720/3239]	Time 0.224 (0.523)	Data Time 0.001 (0.012)	Loss 3.9793 (3.8067)	Entropy 1.84469 (1.85102)	Top-1 acc 30.469 (34.748)	Top-5 acc 57.422 (58.635)	lr 0.02467
Train [9][2730/3239]	Time 0.139 (0.523)	Data Time 0.001 (0.012)	Loss 4.0423 (3.8068)	Entropy 1.84454 (1.85100)	Top-1 acc 31.250 (34.746)	Top-5 acc 51.953 (58.633)	lr 0.02467
Train [9][2740/3239]	Time 0.220 (0.522)	Data Time 0.001 (0.012)	Loss 3.6938 (3.8067)	Entropy 1.84453 (1.85097)	Top-1 acc 38.281 (34.748)	Top-5 acc 59.375 (58.633)	lr 0.02467
Train [9][2750/3239]	Time 0.213 (0.522)	Data Time 0.001 (0.012)	Loss 3.8548 (3.8067)	Entropy 1.84451 (1.85095)	Top-1 acc 33.984 (34.748)	Top-5 acc 58.984 (58.632)	lr 0.02467
Train [9][2760/3239]	Time 0.213 (0.522)	Data Time 0.001 (0.012)	Loss 3.6352 (3.8066)	Entropy 1.84449 (1.85093)	Top-1 acc 39.844 (34.754)	Top-5 acc 60.156 (58.635)	lr 0.02467
Train [9][2770/3239]	Time 0.212 (0.521)	Data Time 0.001 (0.012)	Loss 3.7957 (3.8065)	Entropy 1.84447 (1.85090)	Top-1 acc 33.984 (34.754)	Top-5 acc 59.375 (58.640)	lr 0.02467
Train [9][2780/3239]	Time 0.339 (0.521)	Data Time 0.001 (0.012)	Loss 3.6731 (3.8063)	Entropy 1.84442 (1.85088)	Top-1 acc 41.016 (34.761)	Top-5 acc 62.891 (58.645)	lr 0.02467
Train [9][2790/3239]	Time 0.307 (0.520)	Data Time 0.001 (0.012)	Loss 3.8575 (3.8061)	Entropy 1.84439 (1.85086)	Top-1 acc 37.891 (34.764)	Top-5 acc 57.812 (58.649)	lr 0.02467
Train [9][2800/3239]	Time 0.188 (0.520)	Data Time 0.001 (0.012)	Loss 4.0386 (3.8063)	Entropy 1.84430 (1.85083)	Top-1 acc 29.688 (34.762)	Top-5 acc 55.859 (58.646)	lr 0.02466
Train [9][2810/3239]	Time 0.240 (0.520)	Data Time 0.002 (0.012)	Loss 3.5674 (3.8061)	Entropy 1.84421 (1.85081)	Top-1 acc 38.281 (34.767)	Top-5 acc 66.797 (58.655)	lr 0.02466
Train [9][2820/3239]	Time 0.230 (0.519)	Data Time 0.001 (0.012)	Loss 3.6903 (3.8061)	Entropy 1.84415 (1.85079)	Top-1 acc 39.062 (34.768)	Top-5 acc 61.719 (58.656)	lr 0.02466
Train [9][2830/3239]	Time 0.215 (0.519)	Data Time 0.001 (0.012)	Loss 3.8690 (3.8062)	Entropy 1.84409 (1.85076)	Top-1 acc 35.156 (34.769)	Top-5 acc 55.859 (58.654)	lr 0.02466
Train [9][2840/3239]	Time 0.215 (0.519)	Data Time 0.001 (0.012)	Loss 3.9323 (3.8060)	Entropy 1.84404 (1.85074)	Top-1 acc 31.641 (34.769)	Top-5 acc 54.688 (58.658)	lr 0.02466
Train [9][2850/3239]	Time 0.194 (0.518)	Data Time 0.001 (0.012)	Loss 3.8796 (3.8061)	Entropy 1.84400 (1.85072)	Top-1 acc 32.031 (34.770)	Top-5 acc 57.031 (58.656)	lr 0.02466
Train [9][2860/3239]	Time 0.222 (0.518)	Data Time 0.001 (0.012)	Loss 3.9187 (3.8062)	Entropy 1.84386 (1.85069)	Top-1 acc 34.766 (34.767)	Top-5 acc 56.250 (58.654)	lr 0.02466
Train [9][2870/3239]	Time 0.147 (0.518)	Data Time 0.001 (0.012)	Loss 3.7398 (3.8062)	Entropy 1.84380 (1.85067)	Top-1 acc 37.500 (34.767)	Top-5 acc 58.594 (58.652)	lr 0.02466
Train [9][2880/3239]	Time 0.164 (0.517)	Data Time 0.001 (0.011)	Loss 4.0068 (3.8062)	Entropy 1.84379 (1.85065)	Top-1 acc 30.078 (34.767)	Top-5 acc 53.906 (58.651)	lr 0.02466
Train [9][2890/3239]	Time 0.282 (0.517)	Data Time 0.001 (0.011)	Loss 3.8631 (3.8060)	Entropy 1.84373 (1.85062)	Top-1 acc 35.156 (34.773)	Top-5 acc 57.812 (58.658)	lr 0.02466
Train [9][2900/3239]	Time 0.162 (0.517)	Data Time 0.001 (0.011)	Loss 3.9532 (3.8061)	Entropy 1.84369 (1.85060)	Top-1 acc 33.594 (34.767)	Top-5 acc 55.078 (58.657)	lr 0.02466
Train [9][2910/3239]	Time 0.244 (0.516)	Data Time 0.001 (0.011)	Loss 3.6621 (3.8061)	Entropy 1.84366 (1.85057)	Top-1 acc 35.156 (34.762)	Top-5 acc 62.109 (58.655)	lr 0.02466
Train [9][2920/3239]	Time 0.157 (0.516)	Data Time 0.001 (0.011)	Loss 3.9638 (3.8061)	Entropy 1.84359 (1.85055)	Top-1 acc 30.859 (34.762)	Top-5 acc 50.000 (58.655)	lr 0.02466
Train [9][2930/3239]	Time 0.267 (0.516)	Data Time 0.001 (0.011)	Loss 3.6186 (3.8061)	Entropy 1.84348 (1.85053)	Top-1 acc 41.797 (34.761)	Top-5 acc 63.281 (58.657)	lr 0.02466
Train [9][2940/3239]	Time 0.234 (0.515)	Data Time 0.002 (0.011)	Loss 3.8575 (3.8060)	Entropy 1.84340 (1.85050)	Top-1 acc 30.469 (34.764)	Top-5 acc 55.469 (58.660)	lr 0.02466
Train [9][2950/3239]	Time 0.220 (0.515)	Data Time 0.001 (0.011)	Loss 3.6193 (3.8058)	Entropy 1.84331 (1.85048)	Top-1 acc 40.625 (34.771)	Top-5 acc 61.719 (58.664)	lr 0.02466
Train [9][2960/3239]	Time 0.242 (0.515)	Data Time 0.001 (0.011)	Loss 3.8053 (3.8056)	Entropy 1.84324 (1.85045)	Top-1 acc 39.453 (34.778)	Top-5 acc 57.422 (58.670)	lr 0.02466
Train [9][2970/3239]	Time 0.319 (0.514)	Data Time 0.001 (0.011)	Loss 3.6500 (3.8056)	Entropy 1.84320 (1.85043)	Top-1 acc 37.891 (34.779)	Top-5 acc 60.938 (58.669)	lr 0.02466
Train [9][2980/3239]	Time 0.201 (0.514)	Data Time 0.001 (0.011)	Loss 3.8264 (3.8053)	Entropy 1.84320 (1.85041)	Top-1 acc 30.859 (34.785)	Top-5 acc 57.812 (58.673)	lr 0.02466
Train [9][2990/3239]	Time 0.200 (0.514)	Data Time 0.001 (0.011)	Loss 3.6935 (3.8054)	Entropy 1.84318 (1.85038)	Top-1 acc 35.547 (34.784)	Top-5 acc 63.281 (58.672)	lr 0.02466
Train [9][3000/3239]	Time 0.239 (0.513)	Data Time 0.001 (0.011)	Loss 3.9253 (3.8055)	Entropy 1.84315 (1.85036)	Top-1 acc 33.594 (34.781)	Top-5 acc 55.859 (58.672)	lr 0.02466
Train [9][3010/3239]	Time 0.265 (0.524)	Data Time 0.004 (0.011)	Loss 3.9717 (3.8053)	Entropy 1.84305 (1.85033)	Top-1 acc 35.156 (34.786)	Top-5 acc 58.984 (58.678)	lr 0.02466
Train [9][3020/3239]	Time 0.211 (0.524)	Data Time 0.002 (0.011)	Loss 3.9775 (3.8053)	Entropy 1.84304 (1.85031)	Top-1 acc 29.297 (34.787)	Top-5 acc 51.953 (58.680)	lr 0.02466
Train [9][3030/3239]	Time 0.235 (0.524)	Data Time 0.002 (0.011)	Loss 3.7767 (3.8052)	Entropy 1.84299 (1.85029)	Top-1 acc 33.984 (34.790)	Top-5 acc 58.594 (58.680)	lr 0.02466
Train [9][3040/3239]	Time 0.266 (0.523)	Data Time 0.002 (0.011)	Loss 3.7947 (3.8050)	Entropy 1.84299 (1.85026)	Top-1 acc 34.375 (34.793)	Top-5 acc 59.375 (58.684)	lr 0.02466
Train [9][3050/3239]	Time 0.255 (0.523)	Data Time 0.001 (0.011)	Loss 3.9669 (3.8050)	Entropy 1.84297 (1.85024)	Top-1 acc 31.250 (34.792)	Top-5 acc 54.688 (58.685)	lr 0.02466
Train [9][3060/3239]	Time 0.220 (0.523)	Data Time 0.001 (0.011)	Loss 3.8646 (3.8049)	Entropy 1.84293 (1.85021)	Top-1 acc 30.859 (34.791)	Top-5 acc 56.641 (58.687)	lr 0.02466
Train [9][3070/3239]	Time 0.336 (0.522)	Data Time 0.001 (0.011)	Loss 3.8653 (3.8050)	Entropy 1.84286 (1.85019)	Top-1 acc 29.688 (34.787)	Top-5 acc 56.641 (58.687)	lr 0.02466
Train [9][3080/3239]	Time 0.182 (0.522)	Data Time 0.001 (0.011)	Loss 3.5183 (3.8049)	Entropy 1.84281 (1.85017)	Top-1 acc 41.016 (34.787)	Top-5 acc 63.281 (58.689)	lr 0.02466
Train [9][3090/3239]	Time 0.192 (0.522)	Data Time 0.001 (0.011)	Loss 3.7980 (3.8048)	Entropy 1.84274 (1.85014)	Top-1 acc 38.672 (34.790)	Top-5 acc 58.984 (58.689)	lr 0.02466
Train [9][3100/3239]	Time 0.223 (0.521)	Data Time 0.001 (0.011)	Loss 3.8232 (3.8047)	Entropy 1.84266 (1.85012)	Top-1 acc 35.547 (34.790)	Top-5 acc 60.938 (58.692)	lr 0.02466
Train [9][3110/3239]	Time 0.234 (0.521)	Data Time 0.001 (0.011)	Loss 3.7546 (3.8048)	Entropy 1.84261 (1.85009)	Top-1 acc 34.375 (34.790)	Top-5 acc 63.672 (58.693)	lr 0.02466
Train [9][3120/3239]	Time 0.251 (0.521)	Data Time 0.001 (0.011)	Loss 3.8212 (3.8046)	Entropy 1.84255 (1.85007)	Top-1 acc 38.672 (34.792)	Top-5 acc 60.547 (58.698)	lr 0.02466
Train [9][3130/3239]	Time 0.161 (0.520)	Data Time 0.001 (0.011)	Loss 3.7549 (3.8043)	Entropy 1.84245 (1.85005)	Top-1 acc 36.719 (34.801)	Top-5 acc 59.766 (58.703)	lr 0.02466
Train [9][3140/3239]	Time 0.312 (0.520)	Data Time 0.001 (0.011)	Loss 3.9276 (3.8043)	Entropy 1.84238 (1.85002)	Top-1 acc 32.031 (34.800)	Top-5 acc 57.812 (58.702)	lr 0.02466
Train [9][3150/3239]	Time 0.309 (0.520)	Data Time 0.001 (0.011)	Loss 3.6480 (3.8044)	Entropy 1.84230 (1.85000)	Top-1 acc 36.719 (34.796)	Top-5 acc 60.156 (58.701)	lr 0.02466
Train [9][3160/3239]	Time 0.228 (0.519)	Data Time 0.002 (0.011)	Loss 3.8249 (3.8043)	Entropy 1.84208 (1.84997)	Top-1 acc 35.547 (34.801)	Top-5 acc 58.594 (58.701)	lr 0.02466
Train [9][3170/3239]	Time 0.256 (0.519)	Data Time 0.002 (0.011)	Loss 3.6148 (3.8042)	Entropy 1.84199 (1.84995)	Top-1 acc 37.500 (34.800)	Top-5 acc 62.891 (58.707)	lr 0.02466
Train [9][3180/3239]	Time 0.191 (0.519)	Data Time 0.000 (0.011)	Loss 3.8647 (3.8044)	Entropy 1.84194 (1.84992)	Top-1 acc 34.766 (34.798)	Top-5 acc 57.812 (58.705)	lr 0.02466
Train [9][3190/3239]	Time 0.152 (0.518)	Data Time 0.000 (0.011)	Loss 3.8630 (3.8043)	Entropy 1.84190 (1.84990)	Top-1 acc 33.203 (34.800)	Top-5 acc 58.984 (58.709)	lr 0.02466
Train [9][3200/3239]	Time 0.195 (0.518)	Data Time 0.000 (0.011)	Loss 3.9553 (3.8042)	Entropy 1.84189 (1.84987)	Top-1 acc 30.469 (34.803)	Top-5 acc 56.250 (58.711)	lr 0.02466
Train [9][3210/3239]	Time 0.210 (0.518)	Data Time 0.000 (0.011)	Loss 3.6931 (3.8042)	Entropy 1.84184 (1.84985)	Top-1 acc 37.500 (34.805)	Top-5 acc 59.375 (58.710)	lr 0.02466
Train [9][3220/3239]	Time 0.190 (0.517)	Data Time 0.000 (0.010)	Loss 3.7330 (3.8042)	Entropy 1.84180 (1.84982)	Top-1 acc 37.500 (34.808)	Top-5 acc 58.984 (58.710)	lr 0.02466
Train [9][3230/3239]	Time 0.176 (0.517)	Data Time 0.000 (0.010)	Loss 3.8584 (3.8041)	Entropy 1.84180 (1.84980)	Top-1 acc 35.547 (34.809)	Top-5 acc 58.203 (58.713)	lr 0.02465
Train [9][3239/3239]	Time 2.007 (0.517)	Data Time 0.000 (0.010)	Loss 3.6521 (3.8041)	Entropy 1.84180 (1.84978)	Top-1 acc 33.333 (34.809)	Top-5 acc 62.963 (58.713)	lr 0.02465
==========Valid [9/120]	loss 2.649	top-1 acc 43.941 (43.941)	top-5 acc 68.418	Train top-1 34.809	top-5 58.713	Entropy 1.84180	Latency-None: 0.000ms	Flops: 536.88M
Train [10][0/3239]	Time 27.000 (27.000)	Data Time 24.486 (24.486)	Loss 3.7421 (3.7421)	Entropy 1.84172 (1.84172)	Top-1 acc 34.766 (34.766)	Top-5 acc 58.203 (58.203)	lr 0.02465
Train [10][10/3239]	Time 2.315 (2.922)	Data Time 0.003 (2.228)	Loss 3.8200 (3.7675)	Entropy 1.84172 (1.84172)	Top-1 acc 40.234 (35.831)	Top-5 acc 58.203 (59.837)	lr 0.02465
Train [10][20/3239]	Time 0.158 (1.629)	Data Time 0.001 (1.168)	Loss 3.6568 (3.7233)	Entropy 1.84172 (1.84172)	Top-1 acc 41.406 (37.072)	Top-5 acc 64.062 (60.379)	lr 0.02465
Train [10][30/3239]	Time 0.219 (1.247)	Data Time 0.001 (0.793)	Loss 3.6699 (3.7344)	Entropy 1.84161 (1.84169)	Top-1 acc 40.234 (36.593)	Top-5 acc 60.547 (60.207)	lr 0.02465
Train [10][40/3239]	Time 0.253 (1.043)	Data Time 0.002 (0.600)	Loss 3.9321 (3.7367)	Entropy 1.84160 (1.84167)	Top-1 acc 32.812 (36.385)	Top-5 acc 57.422 (60.147)	lr 0.02465
Train [10][50/3239]	Time 0.225 (0.922)	Data Time 0.002 (0.483)	Loss 4.0196 (3.7499)	Entropy 1.84151 (1.84164)	Top-1 acc 28.906 (36.175)	Top-5 acc 57.422 (60.057)	lr 0.02465
Train [10][60/3239]	Time 0.205 (0.837)	Data Time 0.001 (0.404)	Loss 3.8073 (3.7575)	Entropy 1.84146 (1.84162)	Top-1 acc 33.594 (36.034)	Top-5 acc 56.250 (59.836)	lr 0.02465
Train [10][70/3239]	Time 0.197 (0.777)	Data Time 0.001 (0.347)	Loss 3.7743 (3.7594)	Entropy 1.84140 (1.84159)	Top-1 acc 33.594 (35.877)	Top-5 acc 59.375 (59.887)	lr 0.02465
Train [10][80/3239]	Time 0.201 (0.733)	Data Time 0.001 (0.305)	Loss 3.6208 (3.7617)	Entropy 1.84133 (1.84156)	Top-1 acc 36.328 (35.745)	Top-5 acc 62.500 (59.713)	lr 0.02465
Train [10][90/3239]	Time 0.207 (0.700)	Data Time 0.001 (0.271)	Loss 3.6936 (3.7674)	Entropy 1.84129 (1.84154)	Top-1 acc 34.766 (35.598)	Top-5 acc 62.500 (59.564)	lr 0.02465
Train [10][100/3239]	Time 0.343 (0.674)	Data Time 0.001 (0.245)	Loss 3.7064 (3.7654)	Entropy 1.84123 (1.84151)	Top-1 acc 32.422 (35.647)	Top-5 acc 63.281 (59.580)	lr 0.02465
Train [10][110/3239]	Time 0.185 (1.008)	Data Time 0.003 (0.223)	Loss 3.6866 (3.7693)	Entropy 1.84112 (1.84148)	Top-1 acc 37.500 (35.659)	Top-5 acc 58.984 (59.519)	lr 0.02465
Train [10][120/3239]	Time 2.382 (0.960)	Data Time 0.002 (0.204)	Loss 3.5788 (3.7673)	Entropy 1.84112 (1.84145)	Top-1 acc 39.062 (35.653)	Top-5 acc 64.453 (59.549)	lr 0.02465
Train [10][130/3239]	Time 0.204 (0.903)	Data Time 0.002 (0.189)	Loss 3.6887 (3.7620)	Entropy 1.84106 (1.84142)	Top-1 acc 41.406 (35.806)	Top-5 acc 62.891 (59.643)	lr 0.02465
Train [10][140/3239]	Time 0.195 (0.868)	Data Time 0.002 (0.176)	Loss 3.5990 (3.7647)	Entropy 1.84100 (1.84139)	Top-1 acc 36.328 (35.777)	Top-5 acc 66.406 (59.608)	lr 0.02465
Train [10][150/3239]	Time 0.239 (0.838)	Data Time 0.002 (0.164)	Loss 3.8709 (3.7662)	Entropy 1.84097 (1.84137)	Top-1 acc 34.766 (35.637)	Top-5 acc 56.641 (59.572)	lr 0.02465
Train [10][160/3239]	Time 0.187 (0.812)	Data Time 0.001 (0.154)	Loss 3.8582 (3.7703)	Entropy 1.84096 (1.84134)	Top-1 acc 33.594 (35.646)	Top-5 acc 56.250 (59.523)	lr 0.02465
Train [10][170/3239]	Time 0.157 (0.789)	Data Time 0.001 (0.145)	Loss 3.7704 (3.7695)	Entropy 1.84086 (1.84132)	Top-1 acc 36.328 (35.695)	Top-5 acc 58.203 (59.512)	lr 0.02465
Train [10][180/3239]	Time 0.277 (0.769)	Data Time 0.001 (0.138)	Loss 3.5795 (3.7670)	Entropy 1.84081 (1.84129)	Top-1 acc 36.719 (35.748)	Top-5 acc 63.281 (59.545)	lr 0.02465
Train [10][190/3239]	Time 0.269 (0.751)	Data Time 0.001 (0.130)	Loss 3.7841 (3.7679)	Entropy 1.84074 (1.84126)	Top-1 acc 37.500 (35.729)	Top-5 acc 58.203 (59.526)	lr 0.02465
Train [10][200/3239]	Time 0.215 (0.733)	Data Time 0.001 (0.124)	Loss 3.6409 (3.7672)	Entropy 1.84071 (1.84124)	Top-1 acc 35.156 (35.702)	Top-5 acc 61.719 (59.536)	lr 0.02465
Train [10][210/3239]	Time 0.238 (0.718)	Data Time 0.002 (0.118)	Loss 3.8898 (3.7676)	Entropy 1.84065 (1.84121)	Top-1 acc 34.375 (35.702)	Top-5 acc 57.031 (59.534)	lr 0.02465
Train [10][220/3239]	Time 0.264 (0.706)	Data Time 0.001 (0.113)	Loss 3.7276 (3.7673)	Entropy 1.84059 (1.84119)	Top-1 acc 32.031 (35.741)	Top-5 acc 58.594 (59.561)	lr 0.02465
Train [10][230/3239]	Time 2.196 (0.693)	Data Time 0.001 (0.108)	Loss 3.6687 (3.7627)	Entropy 1.84059 (1.84116)	Top-1 acc 35.156 (35.819)	Top-5 acc 66.016 (59.664)	lr 0.02465
Train [10][240/3239]	Time 0.209 (0.674)	Data Time 0.001 (0.104)	Loss 3.8292 (3.7637)	Entropy 1.84056 (1.84113)	Top-1 acc 39.453 (35.832)	Top-5 acc 59.766 (59.639)	lr 0.02465
Train [10][250/3239]	Time 0.194 (0.663)	Data Time 0.001 (0.100)	Loss 3.6180 (3.7616)	Entropy 1.84048 (1.84111)	Top-1 acc 34.766 (35.891)	Top-5 acc 65.625 (59.694)	lr 0.02465
Train [10][260/3239]	Time 0.206 (0.655)	Data Time 0.001 (0.096)	Loss 3.7282 (3.7626)	Entropy 1.84041 (1.84108)	Top-1 acc 37.500 (35.902)	Top-5 acc 58.594 (59.665)	lr 0.02465
Train [10][270/3239]	Time 0.292 (0.646)	Data Time 0.001 (0.092)	Loss 3.5991 (3.7629)	Entropy 1.84033 (1.84106)	Top-1 acc 40.625 (35.904)	Top-5 acc 66.797 (59.658)	lr 0.02465
Train [10][280/3239]	Time 0.184 (0.638)	Data Time 0.001 (0.089)	Loss 3.8477 (3.7653)	Entropy 1.84027 (1.84103)	Top-1 acc 34.766 (35.860)	Top-5 acc 62.109 (59.600)	lr 0.02465
Train [10][290/3239]	Time 0.196 (0.631)	Data Time 0.001 (0.086)	Loss 3.7762 (3.7648)	Entropy 1.84021 (1.84100)	Top-1 acc 36.328 (35.858)	Top-5 acc 58.594 (59.613)	lr 0.02465
Train [10][300/3239]	Time 0.200 (0.624)	Data Time 0.001 (0.083)	Loss 3.8693 (3.7638)	Entropy 1.84014 (1.84097)	Top-1 acc 35.938 (35.865)	Top-5 acc 60.547 (59.650)	lr 0.02465
Train [10][310/3239]	Time 0.222 (0.617)	Data Time 0.001 (0.081)	Loss 3.7305 (3.7626)	Entropy 1.84014 (1.84095)	Top-1 acc 36.719 (35.930)	Top-5 acc 60.547 (59.668)	lr 0.02465
Train [10][320/3239]	Time 0.195 (0.611)	Data Time 0.001 (0.078)	Loss 3.9346 (3.7631)	Entropy 1.84011 (1.84092)	Top-1 acc 31.250 (35.891)	Top-5 acc 53.906 (59.639)	lr 0.02465
Train [10][330/3239]	Time 0.240 (0.606)	Data Time 0.001 (0.076)	Loss 3.7247 (3.7611)	Entropy 1.84005 (1.84090)	Top-1 acc 38.281 (35.887)	Top-5 acc 60.547 (59.697)	lr 0.02465
Train [10][340/3239]	Time 2.312 (0.600)	Data Time 0.001 (0.074)	Loss 3.6862 (3.7606)	Entropy 1.84005 (1.84087)	Top-1 acc 35.156 (35.915)	Top-5 acc 58.984 (59.709)	lr 0.02465
Train [10][350/3239]	Time 0.212 (0.589)	Data Time 0.001 (0.072)	Loss 3.7113 (3.7619)	Entropy 1.84001 (1.84085)	Top-1 acc 39.062 (35.865)	Top-5 acc 56.641 (59.670)	lr 0.02465
Train [10][360/3239]	Time 0.209 (0.585)	Data Time 0.001 (0.070)	Loss 3.7091 (3.7617)	Entropy 1.83997 (1.84082)	Top-1 acc 38.281 (35.874)	Top-5 acc 62.891 (59.667)	lr 0.02465
Train [10][370/3239]	Time 0.219 (0.580)	Data Time 0.001 (0.068)	Loss 3.7595 (3.7614)	Entropy 1.83992 (1.84080)	Top-1 acc 34.375 (35.863)	Top-5 acc 55.859 (59.656)	lr 0.02465
Train [10][380/3239]	Time 0.198 (0.576)	Data Time 0.001 (0.066)	Loss 3.8612 (3.7606)	Entropy 1.83982 (1.84077)	Top-1 acc 35.938 (35.887)	Top-5 acc 57.422 (59.669)	lr 0.02465
Train [10][390/3239]	Time 0.258 (0.572)	Data Time 0.001 (0.065)	Loss 3.9178 (3.7609)	Entropy 1.83979 (1.84075)	Top-1 acc 35.547 (35.876)	Top-5 acc 55.078 (59.653)	lr 0.02465
Train [10][400/3239]	Time 0.207 (0.568)	Data Time 0.001 (0.063)	Loss 3.6267 (3.7607)	Entropy 1.83978 (1.84073)	Top-1 acc 41.016 (35.862)	Top-5 acc 64.453 (59.687)	lr 0.02465
Train [10][410/3239]	Time 0.174 (0.565)	Data Time 0.001 (0.061)	Loss 3.5486 (3.7607)	Entropy 1.83971 (1.84070)	Top-1 acc 42.578 (35.879)	Top-5 acc 59.766 (59.672)	lr 0.02464
Train [10][420/3239]	Time 0.160 (0.561)	Data Time 0.002 (0.060)	Loss 3.5103 (3.7593)	Entropy 1.83966 (1.84068)	Top-1 acc 41.406 (35.889)	Top-5 acc 66.406 (59.690)	lr 0.02464
Train [10][430/3239]	Time 0.226 (0.557)	Data Time 0.001 (0.059)	Loss 3.9834 (3.7596)	Entropy 1.83960 (1.84065)	Top-1 acc 31.250 (35.897)	Top-5 acc 53.516 (59.672)	lr 0.02464
Train [10][440/3239]	Time 0.245 (0.554)	Data Time 0.001 (0.058)	Loss 3.8238 (3.7595)	Entropy 1.83958 (1.84063)	Top-1 acc 33.203 (35.905)	Top-5 acc 58.203 (59.658)	lr 0.02464
Train [10][450/3239]	Time 2.306 (0.552)	Data Time 0.001 (0.056)	Loss 3.8367 (3.7603)	Entropy 1.83958 (1.84061)	Top-1 acc 31.250 (35.895)	Top-5 acc 59.766 (59.620)	lr 0.02464
Train [10][460/3239]	Time 0.213 (0.544)	Data Time 0.002 (0.055)	Loss 3.8386 (3.7610)	Entropy 1.83946 (1.84058)	Top-1 acc 35.156 (35.900)	Top-5 acc 56.641 (59.604)	lr 0.02464
Train [10][470/3239]	Time 0.253 (0.542)	Data Time 0.001 (0.054)	Loss 3.8141 (3.7617)	Entropy 1.83942 (1.84056)	Top-1 acc 35.156 (35.892)	Top-5 acc 56.641 (59.601)	lr 0.02464
Train [10][480/3239]	Time 0.270 (0.616)	Data Time 0.004 (0.053)	Loss 3.9622 (3.7622)	Entropy 1.83938 (1.84053)	Top-1 acc 33.984 (35.869)	Top-5 acc 56.641 (59.589)	lr 0.02464
Train [10][490/3239]	Time 0.206 (0.612)	Data Time 0.002 (0.052)	Loss 3.6201 (3.7630)	Entropy 1.83930 (1.84051)	Top-1 acc 41.797 (35.864)	Top-5 acc 63.672 (59.562)	lr 0.02464
Train [10][500/3239]	Time 0.197 (0.609)	Data Time 0.001 (0.051)	Loss 3.8648 (3.7628)	Entropy 1.83922 (1.84048)	Top-1 acc 34.766 (35.891)	Top-5 acc 57.422 (59.560)	lr 0.02464
Train [10][510/3239]	Time 0.249 (0.605)	Data Time 0.001 (0.050)	Loss 3.8141 (3.7620)	Entropy 1.83911 (1.84046)	Top-1 acc 34.766 (35.902)	Top-5 acc 59.375 (59.581)	lr 0.02464
Train [10][520/3239]	Time 0.185 (0.601)	Data Time 0.002 (0.049)	Loss 3.8295 (3.7632)	Entropy 1.83898 (1.84043)	Top-1 acc 33.203 (35.866)	Top-5 acc 58.203 (59.540)	lr 0.02464
Train [10][530/3239]	Time 0.195 (0.598)	Data Time 0.002 (0.048)	Loss 3.7930 (3.7620)	Entropy 1.83893 (1.84040)	Top-1 acc 35.156 (35.912)	Top-5 acc 59.766 (59.565)	lr 0.02464
Train [10][540/3239]	Time 0.304 (0.595)	Data Time 0.001 (0.047)	Loss 3.5736 (3.7618)	Entropy 1.83891 (1.84038)	Top-1 acc 41.406 (35.930)	Top-5 acc 65.234 (59.575)	lr 0.02464
Train [10][550/3239]	Time 0.226 (0.592)	Data Time 0.001 (0.047)	Loss 3.7205 (3.7621)	Entropy 1.83886 (1.84035)	Top-1 acc 36.328 (35.922)	Top-5 acc 62.500 (59.567)	lr 0.02464
Train [10][560/3239]	Time 2.257 (0.589)	Data Time 0.002 (0.046)	Loss 3.7199 (3.7619)	Entropy 1.83886 (1.84032)	Top-1 acc 38.281 (35.926)	Top-5 acc 61.328 (59.574)	lr 0.02464
Train [10][570/3239]	Time 0.159 (0.582)	Data Time 0.001 (0.045)	Loss 3.6684 (3.7614)	Entropy 1.83877 (1.84030)	Top-1 acc 35.938 (35.931)	Top-5 acc 62.109 (59.592)	lr 0.02464
Train [10][580/3239]	Time 0.157 (0.579)	Data Time 0.001 (0.044)	Loss 3.7496 (3.7620)	Entropy 1.83871 (1.84027)	Top-1 acc 37.891 (35.912)	Top-5 acc 60.547 (59.589)	lr 0.02464
Train [10][590/3239]	Time 0.200 (0.577)	Data Time 0.002 (0.044)	Loss 3.7210 (3.7617)	Entropy 1.83863 (1.84024)	Top-1 acc 35.547 (35.910)	Top-5 acc 55.859 (59.576)	lr 0.02464
Train [10][600/3239]	Time 0.140 (0.574)	Data Time 0.001 (0.043)	Loss 3.5461 (3.7609)	Entropy 1.83860 (1.84021)	Top-1 acc 40.234 (35.900)	Top-5 acc 66.406 (59.604)	lr 0.02464
Train [10][610/3239]	Time 0.203 (0.572)	Data Time 0.001 (0.042)	Loss 3.6834 (3.7619)	Entropy 1.83855 (1.84019)	Top-1 acc 41.797 (35.892)	Top-5 acc 57.812 (59.592)	lr 0.02464
Train [10][620/3239]	Time 0.204 (0.569)	Data Time 0.001 (0.042)	Loss 3.8936 (3.7620)	Entropy 1.83849 (1.84016)	Top-1 acc 35.938 (35.878)	Top-5 acc 56.641 (59.591)	lr 0.02464
Train [10][630/3239]	Time 0.140 (0.567)	Data Time 0.001 (0.041)	Loss 3.8211 (3.7622)	Entropy 1.83843 (1.84013)	Top-1 acc 34.375 (35.869)	Top-5 acc 56.641 (59.593)	lr 0.02464
Train [10][640/3239]	Time 0.184 (0.564)	Data Time 0.002 (0.041)	Loss 3.4792 (3.7616)	Entropy 1.83838 (1.84011)	Top-1 acc 42.969 (35.869)	Top-5 acc 66.406 (59.604)	lr 0.02464
Train [10][650/3239]	Time 0.209 (0.562)	Data Time 0.002 (0.040)	Loss 3.6773 (3.7616)	Entropy 1.83832 (1.84008)	Top-1 acc 37.891 (35.891)	Top-5 acc 60.547 (59.610)	lr 0.02464
Train [10][660/3239]	Time 0.225 (0.560)	Data Time 0.001 (0.039)	Loss 3.5894 (3.7611)	Entropy 1.83825 (1.84005)	Top-1 acc 40.234 (35.894)	Top-5 acc 64.453 (59.621)	lr 0.02464
Train [10][670/3239]	Time 2.191 (0.558)	Data Time 0.002 (0.039)	Loss 3.8609 (3.7613)	Entropy 1.83825 (1.84003)	Top-1 acc 31.641 (35.885)	Top-5 acc 54.297 (59.611)	lr 0.02464
Train [10][680/3239]	Time 0.150 (0.553)	Data Time 0.001 (0.038)	Loss 3.6091 (3.7602)	Entropy 1.83824 (1.84000)	Top-1 acc 39.062 (35.905)	Top-5 acc 63.672 (59.646)	lr 0.02464
Train [10][690/3239]	Time 0.203 (0.551)	Data Time 0.001 (0.038)	Loss 3.6246 (3.7591)	Entropy 1.83819 (1.83997)	Top-1 acc 37.500 (35.921)	Top-5 acc 62.109 (59.668)	lr 0.02464
Train [10][700/3239]	Time 0.189 (0.549)	Data Time 0.001 (0.037)	Loss 3.7687 (3.7586)	Entropy 1.83813 (1.83995)	Top-1 acc 33.203 (35.921)	Top-5 acc 59.766 (59.683)	lr 0.02464
Train [10][710/3239]	Time 0.193 (0.547)	Data Time 0.001 (0.037)	Loss 3.7797 (3.7584)	Entropy 1.83809 (1.83992)	Top-1 acc 33.984 (35.919)	Top-5 acc 58.203 (59.710)	lr 0.02464
Train [10][720/3239]	Time 0.217 (0.545)	Data Time 0.001 (0.036)	Loss 3.8095 (3.7589)	Entropy 1.83811 (1.83990)	Top-1 acc 37.500 (35.909)	Top-5 acc 60.547 (59.687)	lr 0.02464
Train [10][730/3239]	Time 0.200 (0.544)	Data Time 0.001 (0.036)	Loss 3.6333 (3.7584)	Entropy 1.83806 (1.83987)	Top-1 acc 37.500 (35.924)	Top-5 acc 63.672 (59.711)	lr 0.02464
Train [10][740/3239]	Time 0.217 (0.542)	Data Time 0.001 (0.036)	Loss 3.7068 (3.7577)	Entropy 1.83797 (1.83985)	Top-1 acc 34.766 (35.926)	Top-5 acc 62.109 (59.730)	lr 0.02464
Train [10][750/3239]	Time 0.228 (0.541)	Data Time 0.001 (0.035)	Loss 3.8046 (3.7582)	Entropy 1.83791 (1.83982)	Top-1 acc 31.250 (35.891)	Top-5 acc 57.031 (59.716)	lr 0.02464
Train [10][760/3239]	Time 0.174 (0.539)	Data Time 0.001 (0.035)	Loss 3.7728 (3.7588)	Entropy 1.83787 (1.83980)	Top-1 acc 36.328 (35.884)	Top-5 acc 58.594 (59.695)	lr 0.02464
Train [10][770/3239]	Time 0.206 (0.537)	Data Time 0.001 (0.034)	Loss 3.8460 (3.7592)	Entropy 1.83782 (1.83977)	Top-1 acc 33.203 (35.888)	Top-5 acc 59.766 (59.698)	lr 0.02464
Train [10][780/3239]	Time 2.264 (0.536)	Data Time 0.002 (0.034)	Loss 3.8592 (3.7596)	Entropy 1.83782 (1.83975)	Top-1 acc 27.344 (35.861)	Top-5 acc 56.250 (59.675)	lr 0.02464
Train [10][790/3239]	Time 0.195 (0.532)	Data Time 0.001 (0.033)	Loss 3.9173 (3.7603)	Entropy 1.83776 (1.83972)	Top-1 acc 32.812 (35.838)	Top-5 acc 56.250 (59.660)	lr 0.02464
Train [10][800/3239]	Time 0.294 (0.531)	Data Time 0.001 (0.033)	Loss 3.6471 (3.7600)	Entropy 1.83774 (1.83970)	Top-1 acc 34.375 (35.837)	Top-5 acc 62.109 (59.673)	lr 0.02464
Train [10][810/3239]	Time 0.192 (0.529)	Data Time 0.001 (0.033)	Loss 3.7560 (3.7595)	Entropy 1.83768 (1.83967)	Top-1 acc 35.547 (35.841)	Top-5 acc 61.328 (59.681)	lr 0.02464
Train [10][820/3239]	Time 0.232 (0.528)	Data Time 0.001 (0.032)	Loss 3.8205 (3.7595)	Entropy 1.83766 (1.83965)	Top-1 acc 38.281 (35.840)	Top-5 acc 60.156 (59.684)	lr 0.02464
Train [10][830/3239]	Time 0.239 (0.527)	Data Time 0.001 (0.032)	Loss 3.9905 (3.7597)	Entropy 1.83759 (1.83962)	Top-1 acc 28.516 (35.838)	Top-5 acc 51.172 (59.672)	lr 0.02463
Train [10][840/3239]	Time 0.207 (0.574)	Data Time 0.002 (0.032)	Loss 3.7123 (3.7599)	Entropy 1.83756 (1.83960)	Top-1 acc 40.234 (35.843)	Top-5 acc 60.156 (59.665)	lr 0.02463
Train [10][850/3239]	Time 0.202 (0.572)	Data Time 0.002 (0.031)	Loss 3.7388 (3.7598)	Entropy 1.83756 (1.83957)	Top-1 acc 34.375 (35.842)	Top-5 acc 59.766 (59.668)	lr 0.02463
Train [10][860/3239]	Time 0.204 (0.571)	Data Time 0.006 (0.031)	Loss 3.6116 (3.7592)	Entropy 1.83752 (1.83955)	Top-1 acc 35.938 (35.851)	Top-5 acc 61.719 (59.691)	lr 0.02463
Train [10][870/3239]	Time 0.167 (0.569)	Data Time 0.001 (0.031)	Loss 3.7540 (3.7594)	Entropy 1.83745 (1.83953)	Top-1 acc 39.453 (35.857)	Top-5 acc 59.766 (59.696)	lr 0.02463
Train [10][880/3239]	Time 0.303 (0.567)	Data Time 0.001 (0.030)	Loss 3.4944 (3.7588)	Entropy 1.83741 (1.83950)	Top-1 acc 42.578 (35.865)	Top-5 acc 63.672 (59.709)	lr 0.02463
Train [10][890/3239]	Time 2.188 (0.565)	Data Time 0.001 (0.030)	Loss 3.8004 (3.7598)	Entropy 1.83741 (1.83948)	Top-1 acc 35.938 (35.845)	Top-5 acc 58.203 (59.685)	lr 0.02463
Train [10][900/3239]	Time 0.206 (0.561)	Data Time 0.001 (0.030)	Loss 3.6453 (3.7596)	Entropy 1.83739 (1.83946)	Top-1 acc 36.719 (35.840)	Top-5 acc 62.109 (59.692)	lr 0.02463
Train [10][910/3239]	Time 0.223 (0.560)	Data Time 0.002 (0.029)	Loss 3.7302 (3.7597)	Entropy 1.83736 (1.83943)	Top-1 acc 39.062 (35.832)	Top-5 acc 60.156 (59.697)	lr 0.02463
Train [10][920/3239]	Time 0.212 (0.558)	Data Time 0.001 (0.029)	Loss 3.7449 (3.7601)	Entropy 1.83733 (1.83941)	Top-1 acc 35.547 (35.826)	Top-5 acc 59.375 (59.700)	lr 0.02463
Train [10][930/3239]	Time 0.219 (0.557)	Data Time 0.001 (0.029)	Loss 3.6807 (3.7609)	Entropy 1.83729 (1.83939)	Top-1 acc 36.328 (35.799)	Top-5 acc 60.156 (59.676)	lr 0.02463
Train [10][940/3239]	Time 0.205 (0.555)	Data Time 0.001 (0.029)	Loss 3.5869 (3.7614)	Entropy 1.83719 (1.83937)	Top-1 acc 37.891 (35.794)	Top-5 acc 61.328 (59.661)	lr 0.02463
Train [10][950/3239]	Time 0.195 (0.554)	Data Time 0.001 (0.028)	Loss 3.8652 (3.7615)	Entropy 1.83717 (1.83934)	Top-1 acc 32.031 (35.797)	Top-5 acc 55.859 (59.658)	lr 0.02463
Train [10][960/3239]	Time 0.310 (0.553)	Data Time 0.001 (0.028)	Loss 3.7765 (3.7620)	Entropy 1.83711 (1.83932)	Top-1 acc 33.594 (35.781)	Top-5 acc 57.812 (59.644)	lr 0.02463
Train [10][970/3239]	Time 0.144 (0.551)	Data Time 0.001 (0.028)	Loss 3.8710 (3.7622)	Entropy 1.83709 (1.83930)	Top-1 acc 34.375 (35.777)	Top-5 acc 57.031 (59.642)	lr 0.02463
Train [10][980/3239]	Time 0.237 (0.550)	Data Time 0.002 (0.027)	Loss 3.7175 (3.7624)	Entropy 1.83709 (1.83927)	Top-1 acc 34.766 (35.779)	Top-5 acc 57.031 (59.627)	lr 0.02463
Train [10][990/3239]	Time 0.214 (0.549)	Data Time 0.001 (0.027)	Loss 3.8263 (3.7624)	Entropy 1.83709 (1.83925)	Top-1 acc 31.641 (35.765)	Top-5 acc 55.469 (59.629)	lr 0.02463
Train [10][1000/3239]	Time 2.168 (0.548)	Data Time 0.001 (0.027)	Loss 3.6279 (3.7623)	Entropy 1.83709 (1.83923)	Top-1 acc 35.156 (35.754)	Top-5 acc 65.234 (59.637)	lr 0.02463
Train [10][1010/3239]	Time 0.234 (0.545)	Data Time 0.001 (0.027)	Loss 3.7431 (3.7634)	Entropy 1.83704 (1.83921)	Top-1 acc 35.938 (35.733)	Top-5 acc 61.328 (59.611)	lr 0.02463
Train [10][1020/3239]	Time 0.232 (0.543)	Data Time 0.001 (0.026)	Loss 3.8728 (3.7640)	Entropy 1.83702 (1.83919)	Top-1 acc 31.250 (35.711)	Top-5 acc 56.641 (59.596)	lr 0.02463
Train [10][1030/3239]	Time 0.199 (0.542)	Data Time 0.001 (0.026)	Loss 3.9467 (3.7645)	Entropy 1.83703 (1.83917)	Top-1 acc 34.375 (35.704)	Top-5 acc 57.812 (59.593)	lr 0.02463
Train [10][1040/3239]	Time 0.215 (0.541)	Data Time 0.002 (0.026)	Loss 3.7402 (3.7649)	Entropy 1.83700 (1.83915)	Top-1 acc 37.891 (35.701)	Top-5 acc 61.328 (59.586)	lr 0.02463
Train [10][1050/3239]	Time 0.385 (0.540)	Data Time 0.002 (0.026)	Loss 3.8706 (3.7642)	Entropy 1.83694 (1.83913)	Top-1 acc 33.203 (35.711)	Top-5 acc 59.766 (59.600)	lr 0.02463
Train [10][1060/3239]	Time 0.219 (0.540)	Data Time 0.001 (0.026)	Loss 3.6978 (3.7637)	Entropy 1.83693 (1.83910)	Top-1 acc 38.672 (35.720)	Top-5 acc 58.203 (59.616)	lr 0.02463
Train [10][1070/3239]	Time 0.245 (0.539)	Data Time 0.002 (0.025)	Loss 3.7923 (3.7640)	Entropy 1.83687 (1.83908)	Top-1 acc 34.375 (35.715)	Top-5 acc 58.203 (59.610)	lr 0.02463
Train [10][1080/3239]	Time 0.332 (0.539)	Data Time 0.001 (0.025)	Loss 3.5280 (3.7636)	Entropy 1.83676 (1.83906)	Top-1 acc 42.188 (35.728)	Top-5 acc 62.109 (59.620)	lr 0.02463
Train [10][1090/3239]	Time 0.210 (0.539)	Data Time 0.001 (0.025)	Loss 3.9653 (3.7637)	Entropy 1.83670 (1.83904)	Top-1 acc 36.328 (35.732)	Top-5 acc 56.250 (59.622)	lr 0.02463
Train [10][1100/3239]	Time 0.285 (0.538)	Data Time 0.001 (0.025)	Loss 3.6996 (3.7636)	Entropy 1.83670 (1.83902)	Top-1 acc 35.547 (35.732)	Top-5 acc 60.156 (59.619)	lr 0.02463
Train [10][1110/3239]	Time 2.230 (0.537)	Data Time 0.001 (0.025)	Loss 3.8320 (3.7637)	Entropy 1.83670 (1.83900)	Top-1 acc 32.422 (35.736)	Top-5 acc 58.984 (59.621)	lr 0.02463
Train [10][1120/3239]	Time 0.214 (0.534)	Data Time 0.002 (0.024)	Loss 3.6179 (3.7642)	Entropy 1.83661 (1.83898)	Top-1 acc 38.281 (35.720)	Top-5 acc 66.406 (59.620)	lr 0.02463
Train [10][1130/3239]	Time 0.253 (0.533)	Data Time 0.001 (0.024)	Loss 3.6316 (3.7647)	Entropy 1.83653 (1.83896)	Top-1 acc 37.500 (35.705)	Top-5 acc 62.891 (59.604)	lr 0.02463
Train [10][1140/3239]	Time 0.201 (0.532)	Data Time 0.001 (0.024)	Loss 4.0167 (3.7648)	Entropy 1.83649 (1.83894)	Top-1 acc 30.469 (35.698)	Top-5 acc 56.250 (59.607)	lr 0.02463
Train [10][1150/3239]	Time 0.235 (0.531)	Data Time 0.001 (0.024)	Loss 3.6438 (3.7645)	Entropy 1.83641 (1.83891)	Top-1 acc 41.016 (35.699)	Top-5 acc 63.281 (59.619)	lr 0.02463
Train [10][1160/3239]	Time 0.225 (0.530)	Data Time 0.002 (0.024)	Loss 3.9418 (3.7649)	Entropy 1.83640 (1.83889)	Top-1 acc 33.594 (35.694)	Top-5 acc 55.078 (59.611)	lr 0.02463
Train [10][1170/3239]	Time 0.216 (0.529)	Data Time 0.001 (0.023)	Loss 3.7729 (3.7655)	Entropy 1.83633 (1.83887)	Top-1 acc 39.844 (35.696)	Top-5 acc 59.375 (59.599)	lr 0.02463
Train [10][1180/3239]	Time 0.215 (0.529)	Data Time 0.001 (0.023)	Loss 3.7559 (3.7661)	Entropy 1.83622 (1.83885)	Top-1 acc 36.719 (35.676)	Top-5 acc 59.766 (59.579)	lr 0.02463
Train [10][1190/3239]	Time 0.223 (0.528)	Data Time 0.002 (0.023)	Loss 3.8009 (3.7661)	Entropy 1.83616 (1.83883)	Top-1 acc 31.641 (35.667)	Top-5 acc 57.812 (59.571)	lr 0.02463
Train [10][1200/3239]	Time 0.317 (0.558)	Data Time 0.003 (0.023)	Loss 3.7219 (3.7659)	Entropy 1.83613 (1.83880)	Top-1 acc 34.766 (35.662)	Top-5 acc 60.547 (59.582)	lr 0.02463
Train [10][1210/3239]	Time 0.278 (0.558)	Data Time 0.002 (0.023)	Loss 3.6162 (3.7659)	Entropy 1.83612 (1.83878)	Top-1 acc 38.672 (35.656)	Top-5 acc 59.766 (59.573)	lr 0.02463
Train [10][1220/3239]	Time 2.188 (0.556)	Data Time 0.002 (0.023)	Loss 3.6883 (3.7666)	Entropy 1.83612 (1.83876)	Top-1 acc 34.766 (35.645)	Top-5 acc 60.547 (59.553)	lr 0.02463
Train [10][1230/3239]	Time 0.330 (0.554)	Data Time 0.002 (0.022)	Loss 3.8064 (3.7666)	Entropy 1.83607 (1.83874)	Top-1 acc 35.156 (35.644)	Top-5 acc 60.156 (59.550)	lr 0.02463
Train [10][1240/3239]	Time 0.221 (0.553)	Data Time 0.001 (0.022)	Loss 3.6839 (3.7668)	Entropy 1.83605 (1.83872)	Top-1 acc 38.281 (35.645)	Top-5 acc 62.109 (59.544)	lr 0.02462
Train [10][1250/3239]	Time 0.199 (0.552)	Data Time 0.001 (0.022)	Loss 3.7314 (3.7667)	Entropy 1.83603 (1.83870)	Top-1 acc 38.672 (35.653)	Top-5 acc 61.328 (59.542)	lr 0.02462
Train [10][1260/3239]	Time 0.201 (0.550)	Data Time 0.001 (0.022)	Loss 3.7975 (3.7667)	Entropy 1.83597 (1.83867)	Top-1 acc 33.984 (35.657)	Top-5 acc 55.859 (59.534)	lr 0.02462
Train [10][1270/3239]	Time 0.204 (0.549)	Data Time 0.001 (0.022)	Loss 3.7238 (3.7668)	Entropy 1.83594 (1.83865)	Top-1 acc 37.891 (35.654)	Top-5 acc 59.375 (59.531)	lr 0.02462
Train [10][1280/3239]	Time 0.210 (0.548)	Data Time 0.001 (0.022)	Loss 3.6413 (3.7666)	Entropy 1.83585 (1.83863)	Top-1 acc 39.453 (35.655)	Top-5 acc 62.109 (59.534)	lr 0.02462
Train [10][1290/3239]	Time 0.128 (0.547)	Data Time 0.001 (0.021)	Loss 3.9465 (3.7666)	Entropy 1.83582 (1.83861)	Top-1 acc 29.297 (35.661)	Top-5 acc 56.641 (59.531)	lr 0.02462
Train [10][1300/3239]	Time 0.200 (0.546)	Data Time 0.002 (0.021)	Loss 3.8700 (3.7662)	Entropy 1.83578 (1.83859)	Top-1 acc 33.984 (35.662)	Top-5 acc 59.375 (59.546)	lr 0.02462
Train [10][1310/3239]	Time 0.214 (0.546)	Data Time 0.002 (0.021)	Loss 3.6362 (3.7664)	Entropy 1.83573 (1.83857)	Top-1 acc 35.938 (35.652)	Top-5 acc 60.938 (59.536)	lr 0.02462
Train [10][1320/3239]	Time 0.319 (0.545)	Data Time 0.001 (0.021)	Loss 3.7905 (3.7666)	Entropy 1.83560 (1.83854)	Top-1 acc 34.766 (35.654)	Top-5 acc 58.594 (59.530)	lr 0.02462
Train [10][1330/3239]	Time 2.276 (0.544)	Data Time 0.002 (0.021)	Loss 3.6569 (3.7668)	Entropy 1.83560 (1.83852)	Top-1 acc 39.062 (35.653)	Top-5 acc 60.156 (59.526)	lr 0.02462
Train [10][1340/3239]	Time 0.197 (0.541)	Data Time 0.001 (0.021)	Loss 3.5898 (3.7666)	Entropy 1.83555 (1.83850)	Top-1 acc 39.453 (35.663)	Top-5 acc 59.375 (59.525)	lr 0.02462
Train [10][1350/3239]	Time 0.139 (0.540)	Data Time 0.001 (0.021)	Loss 3.9098 (3.7663)	Entropy 1.83555 (1.83848)	Top-1 acc 32.422 (35.663)	Top-5 acc 56.250 (59.525)	lr 0.02462
Train [10][1360/3239]	Time 0.218 (0.539)	Data Time 0.001 (0.021)	Loss 3.5327 (3.7660)	Entropy 1.83533 (1.83846)	Top-1 acc 43.359 (35.675)	Top-5 acc 66.797 (59.532)	lr 0.02462
Train [10][1370/3239]	Time 0.187 (0.538)	Data Time 0.001 (0.020)	Loss 3.7884 (3.7664)	Entropy 1.83531 (1.83843)	Top-1 acc 34.766 (35.669)	Top-5 acc 58.594 (59.529)	lr 0.02462
Train [10][1380/3239]	Time 0.214 (0.538)	Data Time 0.001 (0.020)	Loss 3.8245 (3.7664)	Entropy 1.83525 (1.83841)	Top-1 acc 32.031 (35.667)	Top-5 acc 57.031 (59.528)	lr 0.02462
Train [10][1390/3239]	Time 0.201 (0.537)	Data Time 0.001 (0.020)	Loss 3.6956 (3.7663)	Entropy 1.83519 (1.83839)	Top-1 acc 37.500 (35.667)	Top-5 acc 59.766 (59.532)	lr 0.02462
Train [10][1400/3239]	Time 0.225 (0.536)	Data Time 0.001 (0.020)	Loss 3.7149 (3.7662)	Entropy 1.83509 (1.83836)	Top-1 acc 36.328 (35.663)	Top-5 acc 60.156 (59.535)	lr 0.02462
Train [10][1410/3239]	Time 0.306 (0.535)	Data Time 0.001 (0.020)	Loss 3.7547 (3.7659)	Entropy 1.83506 (1.83834)	Top-1 acc 37.500 (35.666)	Top-5 acc 58.203 (59.538)	lr 0.02462
Train [10][1420/3239]	Time 0.240 (0.534)	Data Time 0.005 (0.020)	Loss 3.7986 (3.7655)	Entropy 1.83505 (1.83832)	Top-1 acc 33.594 (35.673)	Top-5 acc 56.641 (59.544)	lr 0.02462
Train [10][1430/3239]	Time 0.200 (0.533)	Data Time 0.001 (0.020)	Loss 3.7302 (3.7649)	Entropy 1.83499 (1.83829)	Top-1 acc 36.328 (35.686)	Top-5 acc 60.547 (59.556)	lr 0.02462
Train [10][1440/3239]	Time 2.294 (0.533)	Data Time 0.001 (0.019)	Loss 3.6300 (3.7652)	Entropy 1.83499 (1.83827)	Top-1 acc 39.844 (35.685)	Top-5 acc 64.453 (59.552)	lr 0.02462
Train [10][1450/3239]	Time 0.247 (0.530)	Data Time 0.001 (0.019)	Loss 3.8494 (3.7651)	Entropy 1.83492 (1.83825)	Top-1 acc 33.984 (35.681)	Top-5 acc 54.688 (59.551)	lr 0.02462
Train [10][1460/3239]	Time 0.208 (0.530)	Data Time 0.001 (0.019)	Loss 3.7214 (3.7652)	Entropy 1.83491 (1.83823)	Top-1 acc 35.156 (35.673)	Top-5 acc 61.328 (59.550)	lr 0.02462
Train [10][1470/3239]	Time 0.241 (0.529)	Data Time 0.002 (0.019)	Loss 3.7257 (3.7652)	Entropy 1.83486 (1.83820)	Top-1 acc 35.938 (35.675)	Top-5 acc 61.328 (59.552)	lr 0.02462
Train [10][1480/3239]	Time 0.227 (0.528)	Data Time 0.001 (0.019)	Loss 3.9475 (3.7653)	Entropy 1.83482 (1.83818)	Top-1 acc 31.641 (35.669)	Top-5 acc 53.516 (59.555)	lr 0.02462
Train [10][1490/3239]	Time 0.237 (0.528)	Data Time 0.005 (0.019)	Loss 3.6883 (3.7653)	Entropy 1.83475 (1.83816)	Top-1 acc 36.719 (35.667)	Top-5 acc 62.500 (59.552)	lr 0.02462
Train [10][1500/3239]	Time 0.194 (0.527)	Data Time 0.005 (0.019)	Loss 3.6810 (3.7657)	Entropy 1.83465 (1.83813)	Top-1 acc 35.156 (35.659)	Top-5 acc 59.766 (59.542)	lr 0.02462
Train [10][1510/3239]	Time 0.300 (0.526)	Data Time 0.001 (0.019)	Loss 3.8216 (3.7663)	Entropy 1.83461 (1.83811)	Top-1 acc 33.984 (35.643)	Top-5 acc 58.203 (59.526)	lr 0.02462
Train [10][1520/3239]	Time 0.184 (0.526)	Data Time 0.001 (0.019)	Loss 3.8990 (3.7667)	Entropy 1.83457 (1.83809)	Top-1 acc 34.766 (35.637)	Top-5 acc 57.422 (59.517)	lr 0.02462
Train [10][1530/3239]	Time 0.233 (0.525)	Data Time 0.001 (0.018)	Loss 4.0684 (3.7666)	Entropy 1.83453 (1.83807)	Top-1 acc 31.250 (35.643)	Top-5 acc 51.562 (59.518)	lr 0.02462
Train [10][1540/3239]	Time 0.195 (0.524)	Data Time 0.006 (0.018)	Loss 3.7561 (3.7663)	Entropy 1.83451 (1.83804)	Top-1 acc 35.938 (35.654)	Top-5 acc 60.156 (59.523)	lr 0.02462
Train [10][1550/3239]	Time 2.239 (0.523)	Data Time 0.001 (0.018)	Loss 3.5919 (3.7662)	Entropy 1.83451 (1.83802)	Top-1 acc 37.891 (35.655)	Top-5 acc 64.453 (59.529)	lr 0.02462
Train [10][1560/3239]	Time 0.197 (0.521)	Data Time 0.001 (0.018)	Loss 3.5187 (3.7655)	Entropy 1.83446 (1.83800)	Top-1 acc 41.406 (35.672)	Top-5 acc 64.844 (59.542)	lr 0.02462
Train [10][1570/3239]	Time 0.402 (0.543)	Data Time 0.004 (0.018)	Loss 3.9751 (3.7657)	Entropy 1.83440 (1.83797)	Top-1 acc 30.078 (35.664)	Top-5 acc 57.031 (59.546)	lr 0.02462
Train [10][1580/3239]	Time 0.213 (0.543)	Data Time 0.002 (0.018)	Loss 3.6875 (3.7658)	Entropy 1.83439 (1.83795)	Top-1 acc 40.625 (35.662)	Top-5 acc 60.938 (59.536)	lr 0.02462
Train [10][1590/3239]	Time 0.272 (0.542)	Data Time 0.002 (0.018)	Loss 3.7988 (3.7659)	Entropy 1.83432 (1.83793)	Top-1 acc 32.812 (35.658)	Top-5 acc 61.719 (59.540)	lr 0.02462
Train [10][1600/3239]	Time 0.204 (0.541)	Data Time 0.001 (0.018)	Loss 3.7898 (3.7659)	Entropy 1.83427 (1.83791)	Top-1 acc 39.844 (35.657)	Top-5 acc 58.984 (59.543)	lr 0.02462
Train [10][1610/3239]	Time 0.187 (0.541)	Data Time 0.001 (0.018)	Loss 3.7453 (3.7657)	Entropy 1.83426 (1.83788)	Top-1 acc 32.812 (35.663)	Top-5 acc 58.594 (59.545)	lr 0.02462
Train [10][1620/3239]	Time 0.190 (0.540)	Data Time 0.001 (0.018)	Loss 3.6733 (3.7657)	Entropy 1.83419 (1.83786)	Top-1 acc 39.453 (35.660)	Top-5 acc 60.156 (59.550)	lr 0.02462
Train [10][1630/3239]	Time 0.206 (0.539)	Data Time 0.001 (0.017)	Loss 3.7566 (3.7653)	Entropy 1.83410 (1.83784)	Top-1 acc 33.594 (35.662)	Top-5 acc 58.594 (59.560)	lr 0.02462
Train [10][1640/3239]	Time 0.184 (0.538)	Data Time 0.001 (0.017)	Loss 3.9168 (3.7653)	Entropy 1.83407 (1.83782)	Top-1 acc 35.156 (35.658)	Top-5 acc 55.469 (59.562)	lr 0.02461
Train [10][1650/3239]	Time 0.178 (0.537)	Data Time 0.001 (0.017)	Loss 3.8622 (3.7652)	Entropy 1.83402 (1.83779)	Top-1 acc 31.641 (35.661)	Top-5 acc 57.031 (59.567)	lr 0.02461
Train [10][1660/3239]	Time 2.193 (0.537)	Data Time 0.001 (0.017)	Loss 3.7711 (3.7649)	Entropy 1.83402 (1.83777)	Top-1 acc 35.156 (35.664)	Top-5 acc 58.984 (59.573)	lr 0.02461
Train [10][1670/3239]	Time 0.297 (0.535)	Data Time 0.001 (0.017)	Loss 3.8156 (3.7651)	Entropy 1.83401 (1.83775)	Top-1 acc 34.375 (35.658)	Top-5 acc 60.938 (59.569)	lr 0.02461
Train [10][1680/3239]	Time 0.215 (0.534)	Data Time 0.001 (0.017)	Loss 3.9126 (3.7653)	Entropy 1.83398 (1.83772)	Top-1 acc 32.031 (35.650)	Top-5 acc 54.297 (59.562)	lr 0.02461
Train [10][1690/3239]	Time 0.235 (0.533)	Data Time 0.001 (0.017)	Loss 3.7394 (3.7655)	Entropy 1.83390 (1.83770)	Top-1 acc 35.547 (35.650)	Top-5 acc 61.719 (59.557)	lr 0.02461
Train [10][1700/3239]	Time 0.213 (0.533)	Data Time 0.001 (0.017)	Loss 3.6516 (3.7655)	Entropy 1.83385 (1.83768)	Top-1 acc 38.281 (35.644)	Top-5 acc 58.984 (59.553)	lr 0.02461
Train [10][1710/3239]	Time 0.202 (0.532)	Data Time 0.001 (0.017)	Loss 3.7495 (3.7656)	Entropy 1.83386 (1.83766)	Top-1 acc 36.719 (35.642)	Top-5 acc 58.984 (59.549)	lr 0.02461
Train [10][1720/3239]	Time 0.151 (0.531)	Data Time 0.001 (0.017)	Loss 3.9295 (3.7657)	Entropy 1.83387 (1.83764)	Top-1 acc 33.984 (35.642)	Top-5 acc 56.641 (59.550)	lr 0.02461
Train [10][1730/3239]	Time 0.207 (0.531)	Data Time 0.001 (0.017)	Loss 3.7559 (3.7655)	Entropy 1.83384 (1.83761)	Top-1 acc 32.422 (35.641)	Top-5 acc 58.984 (59.558)	lr 0.02461
Train [10][1740/3239]	Time 0.206 (0.530)	Data Time 0.002 (0.016)	Loss 3.8817 (3.7655)	Entropy 1.83375 (1.83759)	Top-1 acc 36.328 (35.639)	Top-5 acc 57.812 (59.555)	lr 0.02461
Train [10][1750/3239]	Time 0.199 (0.529)	Data Time 0.001 (0.016)	Loss 3.8407 (3.7656)	Entropy 1.83370 (1.83757)	Top-1 acc 34.375 (35.637)	Top-5 acc 58.984 (59.553)	lr 0.02461
Train [10][1760/3239]	Time 0.326 (0.529)	Data Time 0.001 (0.016)	Loss 3.8016 (3.7655)	Entropy 1.83365 (1.83755)	Top-1 acc 37.109 (35.634)	Top-5 acc 62.500 (59.556)	lr 0.02461
Train [10][1770/3239]	Time 2.270 (0.528)	Data Time 0.002 (0.016)	Loss 3.8319 (3.7654)	Entropy 1.83365 (1.83753)	Top-1 acc 32.031 (35.635)	Top-5 acc 58.594 (59.559)	lr 0.02461
Train [10][1780/3239]	Time 0.211 (0.526)	Data Time 0.001 (0.016)	Loss 3.6547 (3.7654)	Entropy 1.83360 (1.83750)	Top-1 acc 35.156 (35.634)	Top-5 acc 64.062 (59.562)	lr 0.02461
Train [10][1790/3239]	Time 0.207 (0.526)	Data Time 0.001 (0.016)	Loss 3.6793 (3.7655)	Entropy 1.83346 (1.83748)	Top-1 acc 32.812 (35.630)	Top-5 acc 57.812 (59.560)	lr 0.02461
Train [10][1800/3239]	Time 0.217 (0.525)	Data Time 0.001 (0.016)	Loss 3.5564 (3.7651)	Entropy 1.83337 (1.83746)	Top-1 acc 42.578 (35.638)	Top-5 acc 65.625 (59.571)	lr 0.02461
Train [10][1810/3239]	Time 0.227 (0.525)	Data Time 0.001 (0.016)	Loss 3.8478 (3.7650)	Entropy 1.83324 (1.83744)	Top-1 acc 34.766 (35.639)	Top-5 acc 56.250 (59.577)	lr 0.02461
Train [10][1820/3239]	Time 0.220 (0.524)	Data Time 0.001 (0.016)	Loss 3.9202 (3.7651)	Entropy 1.83324 (1.83741)	Top-1 acc 31.250 (35.637)	Top-5 acc 57.422 (59.572)	lr 0.02461
Train [10][1830/3239]	Time 0.135 (0.524)	Data Time 0.001 (0.016)	Loss 3.8018 (3.7653)	Entropy 1.83318 (1.83739)	Top-1 acc 30.859 (35.638)	Top-5 acc 60.547 (59.566)	lr 0.02461
Train [10][1840/3239]	Time 0.240 (0.523)	Data Time 0.002 (0.016)	Loss 3.6610 (3.7655)	Entropy 1.83313 (1.83737)	Top-1 acc 38.672 (35.639)	Top-5 acc 58.984 (59.560)	lr 0.02461
Train [10][1850/3239]	Time 0.200 (0.523)	Data Time 0.001 (0.016)	Loss 3.7890 (3.7653)	Entropy 1.83307 (1.83734)	Top-1 acc 39.062 (35.648)	Top-5 acc 58.594 (59.569)	lr 0.02461
Train [10][1860/3239]	Time 0.215 (0.522)	Data Time 0.001 (0.016)	Loss 3.7963 (3.7650)	Entropy 1.83303 (1.83732)	Top-1 acc 35.156 (35.658)	Top-5 acc 59.375 (59.570)	lr 0.02461
Train [10][1870/3239]	Time 0.160 (0.522)	Data Time 0.001 (0.015)	Loss 3.9323 (3.7650)	Entropy 1.83294 (1.83730)	Top-1 acc 32.422 (35.658)	Top-5 acc 56.250 (59.570)	lr 0.02461
Train [10][1880/3239]	Time 2.222 (0.521)	Data Time 0.001 (0.015)	Loss 3.8875 (3.7651)	Entropy 1.83294 (1.83727)	Top-1 acc 34.766 (35.651)	Top-5 acc 57.031 (59.565)	lr 0.02461
Train [10][1890/3239]	Time 0.165 (0.519)	Data Time 0.001 (0.015)	Loss 3.7324 (3.7649)	Entropy 1.83287 (1.83725)	Top-1 acc 37.891 (35.660)	Top-5 acc 62.109 (59.575)	lr 0.02461
Train [10][1900/3239]	Time 0.223 (0.519)	Data Time 0.001 (0.015)	Loss 3.6521 (3.7646)	Entropy 1.83284 (1.83723)	Top-1 acc 37.500 (35.665)	Top-5 acc 62.500 (59.586)	lr 0.02461
Train [10][1910/3239]	Time 0.186 (0.518)	Data Time 0.001 (0.015)	Loss 3.8097 (3.7643)	Entropy 1.83270 (1.83720)	Top-1 acc 32.422 (35.670)	Top-5 acc 57.422 (59.592)	lr 0.02461
Train [10][1920/3239]	Time 0.205 (0.518)	Data Time 0.001 (0.015)	Loss 3.8104 (3.7642)	Entropy 1.83260 (1.83718)	Top-1 acc 38.281 (35.666)	Top-5 acc 58.984 (59.595)	lr 0.02461
Train [10][1930/3239]	Time 0.315 (0.537)	Data Time 0.002 (0.015)	Loss 3.8981 (3.7645)	Entropy 1.83254 (1.83716)	Top-1 acc 33.984 (35.658)	Top-5 acc 57.422 (59.590)	lr 0.02461
Train [10][1940/3239]	Time 0.191 (0.537)	Data Time 0.002 (0.015)	Loss 3.6216 (3.7646)	Entropy 1.83253 (1.83713)	Top-1 acc 39.453 (35.653)	Top-5 acc 61.328 (59.587)	lr 0.02461
Train [10][1950/3239]	Time 0.186 (0.536)	Data Time 0.001 (0.015)	Loss 3.8087 (3.7646)	Entropy 1.83245 (1.83711)	Top-1 acc 34.766 (35.650)	Top-5 acc 57.812 (59.586)	lr 0.02461
Train [10][1960/3239]	Time 0.227 (0.536)	Data Time 0.001 (0.015)	Loss 3.6789 (3.7645)	Entropy 1.83242 (1.83709)	Top-1 acc 37.109 (35.652)	Top-5 acc 62.109 (59.591)	lr 0.02461
Train [10][1970/3239]	Time 0.161 (0.535)	Data Time 0.002 (0.015)	Loss 3.6698 (3.7643)	Entropy 1.83232 (1.83706)	Top-1 acc 37.109 (35.656)	Top-5 acc 59.375 (59.594)	lr 0.02461
Train [10][1980/3239]	Time 0.209 (0.534)	Data Time 0.001 (0.015)	Loss 3.6634 (3.7643)	Entropy 1.83231 (1.83704)	Top-1 acc 35.156 (35.658)	Top-5 acc 59.766 (59.602)	lr 0.02461
Train [10][1990/3239]	Time 2.277 (0.534)	Data Time 0.001 (0.015)	Loss 3.6187 (3.7640)	Entropy 1.83231 (1.83701)	Top-1 acc 37.500 (35.657)	Top-5 acc 60.547 (59.606)	lr 0.02461
Train [10][2000/3239]	Time 0.211 (0.532)	Data Time 0.001 (0.015)	Loss 3.7564 (3.7639)	Entropy 1.83225 (1.83699)	Top-1 acc 39.062 (35.663)	Top-5 acc 61.328 (59.605)	lr 0.02461
Train [10][2010/3239]	Time 0.174 (0.532)	Data Time 0.001 (0.015)	Loss 3.6010 (3.7638)	Entropy 1.83220 (1.83697)	Top-1 acc 42.188 (35.672)	Top-5 acc 61.719 (59.607)	lr 0.02461
Train [10][2020/3239]	Time 0.303 (0.531)	Data Time 0.002 (0.015)	Loss 3.5954 (3.7637)	Entropy 1.83214 (1.83694)	Top-1 acc 39.453 (35.672)	Top-5 acc 62.109 (59.606)	lr 0.02461
Train [10][2030/3239]	Time 0.326 (0.531)	Data Time 0.001 (0.014)	Loss 3.5772 (3.7636)	Entropy 1.83190 (1.83692)	Top-1 acc 38.281 (35.671)	Top-5 acc 64.844 (59.601)	lr 0.02461
Train [10][2040/3239]	Time 0.164 (0.530)	Data Time 0.001 (0.014)	Loss 3.7734 (3.7636)	Entropy 1.83187 (1.83689)	Top-1 acc 33.984 (35.671)	Top-5 acc 59.375 (59.600)	lr 0.02460
Train [10][2050/3239]	Time 0.221 (0.530)	Data Time 0.001 (0.014)	Loss 3.6652 (3.7637)	Entropy 1.83179 (1.83687)	Top-1 acc 39.062 (35.674)	Top-5 acc 60.547 (59.596)	lr 0.02460
Train [10][2060/3239]	Time 0.228 (0.529)	Data Time 0.001 (0.014)	Loss 3.6727 (3.7635)	Entropy 1.83172 (1.83684)	Top-1 acc 36.719 (35.675)	Top-5 acc 60.156 (59.601)	lr 0.02460
Train [10][2070/3239]	Time 0.227 (0.529)	Data Time 0.001 (0.014)	Loss 3.6822 (3.7635)	Entropy 1.83166 (1.83682)	Top-1 acc 39.062 (35.674)	Top-5 acc 62.109 (59.600)	lr 0.02460
Train [10][2080/3239]	Time 0.147 (0.528)	Data Time 0.001 (0.014)	Loss 3.8860 (3.7634)	Entropy 1.83164 (1.83679)	Top-1 acc 35.156 (35.678)	Top-5 acc 58.203 (59.603)	lr 0.02460
Train [10][2090/3239]	Time 0.265 (0.528)	Data Time 0.001 (0.014)	Loss 3.7597 (3.7632)	Entropy 1.83160 (1.83677)	Top-1 acc 35.156 (35.680)	Top-5 acc 58.203 (59.605)	lr 0.02460
Train [10][2100/3239]	Time 2.339 (0.527)	Data Time 0.001 (0.014)	Loss 3.7669 (3.7635)	Entropy 1.83160 (1.83674)	Top-1 acc 34.766 (35.676)	Top-5 acc 61.719 (59.600)	lr 0.02460
Train [10][2110/3239]	Time 0.326 (0.526)	Data Time 0.001 (0.014)	Loss 3.9889 (3.7636)	Entropy 1.83160 (1.83672)	Top-1 acc 31.250 (35.675)	Top-5 acc 54.297 (59.597)	lr 0.02460
Train [10][2120/3239]	Time 0.181 (0.525)	Data Time 0.002 (0.014)	Loss 3.7336 (3.7636)	Entropy 1.83156 (1.83670)	Top-1 acc 35.938 (35.675)	Top-5 acc 60.547 (59.597)	lr 0.02460
Train [10][2130/3239]	Time 0.252 (0.525)	Data Time 0.001 (0.014)	Loss 3.8177 (3.7636)	Entropy 1.83151 (1.83667)	Top-1 acc 35.156 (35.672)	Top-5 acc 60.547 (59.596)	lr 0.02460
Train [10][2140/3239]	Time 0.242 (0.524)	Data Time 0.002 (0.014)	Loss 3.9024 (3.7639)	Entropy 1.83146 (1.83665)	Top-1 acc 32.812 (35.664)	Top-5 acc 56.250 (59.593)	lr 0.02460
Train [10][2150/3239]	Time 0.197 (0.524)	Data Time 0.001 (0.014)	Loss 3.7773 (3.7640)	Entropy 1.83136 (1.83662)	Top-1 acc 33.594 (35.662)	Top-5 acc 59.766 (59.592)	lr 0.02460
Train [10][2160/3239]	Time 0.191 (0.524)	Data Time 0.001 (0.014)	Loss 3.7616 (3.7637)	Entropy 1.83132 (1.83660)	Top-1 acc 37.891 (35.670)	Top-5 acc 58.984 (59.600)	lr 0.02460
Train [10][2170/3239]	Time 0.236 (0.523)	Data Time 0.001 (0.014)	Loss 3.6750 (3.7635)	Entropy 1.83129 (1.83657)	Top-1 acc 37.891 (35.676)	Top-5 acc 62.891 (59.607)	lr 0.02460
Train [10][2180/3239]	Time 0.198 (0.523)	Data Time 0.002 (0.014)	Loss 3.7291 (3.7635)	Entropy 1.83127 (1.83655)	Top-1 acc 37.891 (35.675)	Top-5 acc 58.594 (59.611)	lr 0.02460
Train [10][2190/3239]	Time 0.267 (0.522)	Data Time 0.002 (0.014)	Loss 3.7383 (3.7634)	Entropy 1.83124 (1.83653)	Top-1 acc 37.500 (35.678)	Top-5 acc 58.984 (59.618)	lr 0.02460
Train [10][2200/3239]	Time 0.283 (0.522)	Data Time 0.001 (0.014)	Loss 3.7015 (3.7633)	Entropy 1.83119 (1.83650)	Top-1 acc 36.719 (35.681)	Top-5 acc 58.594 (59.617)	lr 0.02460
Train [10][2210/3239]	Time 2.251 (0.521)	Data Time 0.001 (0.014)	Loss 3.6017 (3.7630)	Entropy 1.83119 (1.83648)	Top-1 acc 37.500 (35.686)	Top-5 acc 62.500 (59.625)	lr 0.02460
Train [10][2220/3239]	Time 0.258 (0.520)	Data Time 0.001 (0.013)	Loss 3.8596 (3.7631)	Entropy 1.83116 (1.83645)	Top-1 acc 35.156 (35.685)	Top-5 acc 59.375 (59.628)	lr 0.02460
Train [10][2230/3239]	Time 0.222 (0.520)	Data Time 0.002 (0.013)	Loss 3.8713 (3.7632)	Entropy 1.83116 (1.83643)	Top-1 acc 35.156 (35.680)	Top-5 acc 56.250 (59.625)	lr 0.02460
Train [10][2240/3239]	Time 0.226 (0.519)	Data Time 0.001 (0.013)	Loss 3.7303 (3.7634)	Entropy 1.83110 (1.83641)	Top-1 acc 37.109 (35.676)	Top-5 acc 58.203 (59.617)	lr 0.02460
Train [10][2250/3239]	Time 0.206 (0.519)	Data Time 0.001 (0.013)	Loss 3.7655 (3.7631)	Entropy 1.83106 (1.83638)	Top-1 acc 34.766 (35.686)	Top-5 acc 58.594 (59.624)	lr 0.02460
Train [10][2260/3239]	Time 0.151 (0.518)	Data Time 0.001 (0.013)	Loss 3.6906 (3.7632)	Entropy 1.83099 (1.83636)	Top-1 acc 37.109 (35.683)	Top-5 acc 60.547 (59.622)	lr 0.02460
Train [10][2270/3239]	Time 0.174 (0.518)	Data Time 0.001 (0.013)	Loss 3.7326 (3.7631)	Entropy 1.83094 (1.83634)	Top-1 acc 37.109 (35.691)	Top-5 acc 60.156 (59.623)	lr 0.02460
Train [10][2280/3239]	Time 0.212 (0.518)	Data Time 0.001 (0.013)	Loss 3.8907 (3.7633)	Entropy 1.83092 (1.83631)	Top-1 acc 34.766 (35.686)	Top-5 acc 57.422 (59.620)	lr 0.02460
Train [10][2290/3239]	Time 0.203 (0.534)	Data Time 0.002 (0.013)	Loss 4.0133 (3.7633)	Entropy 1.83090 (1.83629)	Top-1 acc 29.688 (35.687)	Top-5 acc 54.688 (59.616)	lr 0.02460
Train [10][2300/3239]	Time 0.287 (0.533)	Data Time 0.002 (0.013)	Loss 3.7605 (3.7632)	Entropy 1.83088 (1.83626)	Top-1 acc 34.375 (35.690)	Top-5 acc 59.375 (59.622)	lr 0.02460
Train [10][2310/3239]	Time 0.241 (0.533)	Data Time 0.003 (0.013)	Loss 3.7380 (3.7629)	Entropy 1.83085 (1.83624)	Top-1 acc 37.891 (35.697)	Top-5 acc 57.812 (59.625)	lr 0.02460
Train [10][2320/3239]	Time 2.311 (0.533)	Data Time 0.002 (0.013)	Loss 3.8661 (3.7633)	Entropy 1.83085 (1.83622)	Top-1 acc 32.422 (35.687)	Top-5 acc 59.375 (59.616)	lr 0.02460
Train [10][2330/3239]	Time 0.230 (0.531)	Data Time 0.001 (0.013)	Loss 3.8366 (3.7635)	Entropy 1.83082 (1.83620)	Top-1 acc 38.281 (35.685)	Top-5 acc 59.375 (59.609)	lr 0.02460
Train [10][2340/3239]	Time 0.208 (0.531)	Data Time 0.001 (0.013)	Loss 3.4164 (3.7630)	Entropy 1.83072 (1.83617)	Top-1 acc 45.312 (35.698)	Top-5 acc 67.188 (59.620)	lr 0.02460
Train [10][2350/3239]	Time 0.169 (0.530)	Data Time 0.001 (0.013)	Loss 3.6589 (3.7630)	Entropy 1.83063 (1.83615)	Top-1 acc 39.453 (35.695)	Top-5 acc 61.328 (59.621)	lr 0.02460
Train [10][2360/3239]	Time 0.191 (0.530)	Data Time 0.001 (0.013)	Loss 3.9438 (3.7630)	Entropy 1.83058 (1.83612)	Top-1 acc 34.375 (35.698)	Top-5 acc 55.859 (59.620)	lr 0.02460
Train [10][2370/3239]	Time 0.142 (0.529)	Data Time 0.001 (0.013)	Loss 4.1429 (3.7630)	Entropy 1.83053 (1.83610)	Top-1 acc 29.688 (35.692)	Top-5 acc 51.172 (59.619)	lr 0.02460
Train [10][2380/3239]	Time 0.331 (0.529)	Data Time 0.001 (0.013)	Loss 3.6654 (3.7634)	Entropy 1.83043 (1.83608)	Top-1 acc 39.844 (35.685)	Top-5 acc 62.109 (59.610)	lr 0.02460
Train [10][2390/3239]	Time 0.134 (0.528)	Data Time 0.001 (0.013)	Loss 4.0521 (3.7635)	Entropy 1.83037 (1.83605)	Top-1 acc 32.422 (35.683)	Top-5 acc 54.297 (59.604)	lr 0.02460
Train [10][2400/3239]	Time 0.156 (0.528)	Data Time 0.001 (0.013)	Loss 4.0503 (3.7637)	Entropy 1.83034 (1.83603)	Top-1 acc 26.172 (35.674)	Top-5 acc 56.250 (59.605)	lr 0.02460
Train [10][2410/3239]	Time 0.212 (0.528)	Data Time 0.001 (0.013)	Loss 3.7189 (3.7638)	Entropy 1.83029 (1.83601)	Top-1 acc 35.938 (35.672)	Top-5 acc 58.594 (59.602)	lr 0.02460
Train [10][2420/3239]	Time 0.215 (0.527)	Data Time 0.001 (0.013)	Loss 3.8308 (3.7636)	Entropy 1.83023 (1.83598)	Top-1 acc 34.375 (35.674)	Top-5 acc 58.594 (59.608)	lr 0.02460
Train [10][2430/3239]	Time 2.336 (0.527)	Data Time 0.001 (0.013)	Loss 3.5847 (3.7637)	Entropy 1.83023 (1.83596)	Top-1 acc 39.844 (35.670)	Top-5 acc 63.281 (59.606)	lr 0.02459
Train [10][2440/3239]	Time 0.239 (0.526)	Data Time 0.001 (0.012)	Loss 3.7226 (3.7636)	Entropy 1.83016 (1.83594)	Top-1 acc 37.500 (35.676)	Top-5 acc 63.672 (59.611)	lr 0.02459
Train [10][2450/3239]	Time 0.187 (0.525)	Data Time 0.001 (0.012)	Loss 3.8975 (3.7636)	Entropy 1.83014 (1.83591)	Top-1 acc 31.641 (35.681)	Top-5 acc 56.250 (59.609)	lr 0.02459
Train [10][2460/3239]	Time 0.197 (0.525)	Data Time 0.002 (0.012)	Loss 3.8663 (3.7633)	Entropy 1.83013 (1.83589)	Top-1 acc 35.156 (35.689)	Top-5 acc 58.203 (59.616)	lr 0.02459
Train [10][2470/3239]	Time 0.313 (0.524)	Data Time 0.001 (0.012)	Loss 3.7239 (3.7631)	Entropy 1.83007 (1.83586)	Top-1 acc 33.203 (35.688)	Top-5 acc 58.984 (59.624)	lr 0.02459
Train [10][2480/3239]	Time 0.204 (0.524)	Data Time 0.001 (0.012)	Loss 3.7952 (3.7630)	Entropy 1.83000 (1.83584)	Top-1 acc 37.109 (35.685)	Top-5 acc 56.641 (59.623)	lr 0.02459
Train [10][2490/3239]	Time 0.169 (0.523)	Data Time 0.001 (0.012)	Loss 3.7036 (3.7629)	Entropy 1.82997 (1.83582)	Top-1 acc 33.594 (35.685)	Top-5 acc 60.547 (59.626)	lr 0.02459
Train [10][2500/3239]	Time 0.179 (0.523)	Data Time 0.002 (0.012)	Loss 3.6040 (3.7629)	Entropy 1.82986 (1.83579)	Top-1 acc 39.062 (35.688)	Top-5 acc 62.891 (59.623)	lr 0.02459
Train [10][2510/3239]	Time 0.228 (0.523)	Data Time 0.001 (0.012)	Loss 3.5721 (3.7625)	Entropy 1.82979 (1.83577)	Top-1 acc 36.719 (35.699)	Top-5 acc 63.672 (59.633)	lr 0.02459
Train [10][2520/3239]	Time 0.188 (0.522)	Data Time 0.002 (0.012)	Loss 3.8654 (3.7623)	Entropy 1.82976 (1.83575)	Top-1 acc 29.688 (35.699)	Top-5 acc 53.906 (59.636)	lr 0.02459
Train [10][2530/3239]	Time 0.221 (0.522)	Data Time 0.001 (0.012)	Loss 3.8581 (3.7622)	Entropy 1.82974 (1.83572)	Top-1 acc 34.375 (35.699)	Top-5 acc 58.594 (59.636)	lr 0.02459
Train [10][2540/3239]	Time 2.247 (0.522)	Data Time 0.001 (0.012)	Loss 3.7387 (3.7621)	Entropy 1.82974 (1.83570)	Top-1 acc 35.156 (35.704)	Top-5 acc 59.766 (59.642)	lr 0.02459
Train [10][2550/3239]	Time 0.292 (0.520)	Data Time 0.001 (0.012)	Loss 3.8470 (3.7618)	Entropy 1.82962 (1.83568)	Top-1 acc 33.594 (35.710)	Top-5 acc 60.547 (59.649)	lr 0.02459
Train [10][2560/3239]	Time 0.193 (0.520)	Data Time 0.001 (0.012)	Loss 3.6090 (3.7616)	Entropy 1.82959 (1.83565)	Top-1 acc 35.547 (35.711)	Top-5 acc 63.672 (59.650)	lr 0.02459
Train [10][2570/3239]	Time 0.207 (0.520)	Data Time 0.001 (0.012)	Loss 3.5420 (3.7615)	Entropy 1.82952 (1.83563)	Top-1 acc 39.844 (35.714)	Top-5 acc 64.062 (59.650)	lr 0.02459
Train [10][2580/3239]	Time 0.190 (0.519)	Data Time 0.001 (0.012)	Loss 3.9215 (3.7615)	Entropy 1.82951 (1.83560)	Top-1 acc 30.469 (35.715)	Top-5 acc 57.422 (59.652)	lr 0.02459
Train [10][2590/3239]	Time 0.199 (0.519)	Data Time 0.001 (0.012)	Loss 3.5607 (3.7615)	Entropy 1.82942 (1.83558)	Top-1 acc 41.016 (35.717)	Top-5 acc 61.719 (59.652)	lr 0.02459
Train [10][2600/3239]	Time 0.239 (0.518)	Data Time 0.001 (0.012)	Loss 4.0152 (3.7615)	Entropy 1.82937 (1.83556)	Top-1 acc 30.078 (35.717)	Top-5 acc 55.469 (59.648)	lr 0.02459
Train [10][2610/3239]	Time 0.201 (0.518)	Data Time 0.001 (0.012)	Loss 3.8638 (3.7617)	Entropy 1.82935 (1.83553)	Top-1 acc 28.125 (35.712)	Top-5 acc 59.375 (59.648)	lr 0.02459
Train [10][2620/3239]	Time 0.211 (0.518)	Data Time 0.001 (0.012)	Loss 3.6888 (3.7617)	Entropy 1.82934 (1.83551)	Top-1 acc 40.234 (35.714)	Top-5 acc 62.891 (59.646)	lr 0.02459
Train [10][2630/3239]	Time 0.319 (0.517)	Data Time 0.002 (0.012)	Loss 3.5299 (3.7614)	Entropy 1.82934 (1.83549)	Top-1 acc 40.625 (35.721)	Top-5 acc 64.062 (59.655)	lr 0.02459
Train [10][2640/3239]	Time 0.222 (0.517)	Data Time 0.001 (0.012)	Loss 3.5454 (3.7612)	Entropy 1.82929 (1.83546)	Top-1 acc 38.281 (35.730)	Top-5 acc 64.062 (59.660)	lr 0.02459
Train [10][2650/3239]	Time 0.334 (0.529)	Data Time 0.004 (0.012)	Loss 3.8005 (3.7613)	Entropy 1.82926 (1.83544)	Top-1 acc 33.984 (35.728)	Top-5 acc 57.812 (59.660)	lr 0.02459
Train [10][2660/3239]	Time 0.301 (0.529)	Data Time 0.002 (0.012)	Loss 3.8991 (3.7611)	Entropy 1.82922 (1.83542)	Top-1 acc 33.594 (35.731)	Top-5 acc 55.859 (59.664)	lr 0.02459
Train [10][2670/3239]	Time 0.274 (0.529)	Data Time 0.002 (0.012)	Loss 3.8613 (3.7612)	Entropy 1.82919 (1.83539)	Top-1 acc 35.547 (35.729)	Top-5 acc 57.031 (59.660)	lr 0.02459
Train [10][2680/3239]	Time 0.179 (0.528)	Data Time 0.001 (0.012)	Loss 3.5836 (3.7613)	Entropy 1.82910 (1.83537)	Top-1 acc 38.672 (35.726)	Top-5 acc 62.891 (59.659)	lr 0.02459
Train [10][2690/3239]	Time 0.267 (0.528)	Data Time 0.001 (0.012)	Loss 3.7643 (3.7611)	Entropy 1.82900 (1.83535)	Top-1 acc 37.500 (35.723)	Top-5 acc 59.375 (59.667)	lr 0.02459
Train [10][2700/3239]	Time 0.210 (0.528)	Data Time 0.002 (0.012)	Loss 3.8024 (3.7611)	Entropy 1.82890 (1.83532)	Top-1 acc 34.375 (35.726)	Top-5 acc 59.375 (59.665)	lr 0.02459
Train [10][2710/3239]	Time 0.312 (0.527)	Data Time 0.001 (0.011)	Loss 3.9044 (3.7609)	Entropy 1.82883 (1.83530)	Top-1 acc 34.766 (35.736)	Top-5 acc 58.984 (59.674)	lr 0.02459
Train [10][2720/3239]	Time 0.208 (0.527)	Data Time 0.001 (0.011)	Loss 3.9269 (3.7608)	Entropy 1.82881 (1.83528)	Top-1 acc 29.297 (35.737)	Top-5 acc 54.297 (59.676)	lr 0.02459
Train [10][2730/3239]	Time 0.231 (0.526)	Data Time 0.001 (0.011)	Loss 3.8488 (3.7608)	Entropy 1.82879 (1.83525)	Top-1 acc 35.547 (35.737)	Top-5 acc 55.859 (59.681)	lr 0.02459
Train [10][2740/3239]	Time 0.237 (0.526)	Data Time 0.001 (0.011)	Loss 3.8910 (3.7608)	Entropy 1.82877 (1.83523)	Top-1 acc 34.766 (35.736)	Top-5 acc 55.469 (59.680)	lr 0.02459
Train [10][2750/3239]	Time 0.202 (0.526)	Data Time 0.001 (0.011)	Loss 3.8823 (3.7607)	Entropy 1.82868 (1.83520)	Top-1 acc 30.469 (35.739)	Top-5 acc 57.422 (59.682)	lr 0.02459
Train [10][2760/3239]	Time 0.270 (0.525)	Data Time 0.001 (0.011)	Loss 3.7084 (3.7605)	Entropy 1.82867 (1.83518)	Top-1 acc 38.672 (35.747)	Top-5 acc 63.281 (59.689)	lr 0.02459
Train [10][2770/3239]	Time 0.238 (0.525)	Data Time 0.001 (0.011)	Loss 3.8044 (3.7603)	Entropy 1.82865 (1.83516)	Top-1 acc 33.594 (35.754)	Top-5 acc 57.031 (59.693)	lr 0.02459
Train [10][2780/3239]	Time 0.229 (0.525)	Data Time 0.001 (0.011)	Loss 3.6713 (3.7601)	Entropy 1.82862 (1.83513)	Top-1 acc 38.672 (35.762)	Top-5 acc 61.719 (59.699)	lr 0.02459
Train [10][2790/3239]	Time 0.191 (0.524)	Data Time 0.001 (0.011)	Loss 3.8507 (3.7603)	Entropy 1.82860 (1.83511)	Top-1 acc 34.375 (35.754)	Top-5 acc 60.156 (59.695)	lr 0.02459
Train [10][2800/3239]	Time 0.321 (0.524)	Data Time 0.001 (0.011)	Loss 3.5854 (3.7603)	Entropy 1.82856 (1.83509)	Top-1 acc 37.109 (35.753)	Top-5 acc 64.844 (59.698)	lr 0.02459
Train [10][2810/3239]	Time 0.224 (0.523)	Data Time 0.001 (0.011)	Loss 3.8436 (3.7604)	Entropy 1.82854 (1.83506)	Top-1 acc 31.641 (35.751)	Top-5 acc 57.031 (59.698)	lr 0.02459
Train [10][2820/3239]	Time 0.184 (0.523)	Data Time 0.001 (0.011)	Loss 3.6262 (3.7602)	Entropy 1.82850 (1.83504)	Top-1 acc 38.672 (35.752)	Top-5 acc 61.719 (59.697)	lr 0.02458
Train [10][2830/3239]	Time 0.154 (0.523)	Data Time 0.001 (0.011)	Loss 4.0587 (3.7603)	Entropy 1.82843 (1.83502)	Top-1 acc 26.562 (35.754)	Top-5 acc 54.297 (59.692)	lr 0.02458
Train [10][2840/3239]	Time 0.190 (0.522)	Data Time 0.001 (0.011)	Loss 3.9019 (3.7602)	Entropy 1.82838 (1.83500)	Top-1 acc 37.109 (35.757)	Top-5 acc 57.031 (59.692)	lr 0.02458
Train [10][2850/3239]	Time 0.229 (0.522)	Data Time 0.001 (0.011)	Loss 3.5320 (3.7598)	Entropy 1.82836 (1.83497)	Top-1 acc 41.406 (35.764)	Top-5 acc 62.109 (59.702)	lr 0.02458
Train [10][2860/3239]	Time 0.203 (0.522)	Data Time 0.002 (0.011)	Loss 3.4725 (3.7596)	Entropy 1.82828 (1.83495)	Top-1 acc 43.750 (35.774)	Top-5 acc 65.234 (59.708)	lr 0.02458
Train [10][2870/3239]	Time 0.194 (0.521)	Data Time 0.001 (0.011)	Loss 3.8630 (3.7596)	Entropy 1.82829 (1.83493)	Top-1 acc 35.156 (35.778)	Top-5 acc 56.250 (59.712)	lr 0.02458
Train [10][2880/3239]	Time 0.151 (0.521)	Data Time 0.001 (0.011)	Loss 3.6513 (3.7597)	Entropy 1.82824 (1.83490)	Top-1 acc 38.672 (35.776)	Top-5 acc 59.766 (59.708)	lr 0.02458
Train [10][2890/3239]	Time 0.208 (0.521)	Data Time 0.001 (0.011)	Loss 3.8991 (3.7599)	Entropy 1.82819 (1.83488)	Top-1 acc 36.719 (35.774)	Top-5 acc 57.422 (59.705)	lr 0.02458
Train [10][2900/3239]	Time 0.300 (0.520)	Data Time 0.001 (0.011)	Loss 3.7242 (3.7597)	Entropy 1.82815 (1.83486)	Top-1 acc 39.453 (35.779)	Top-5 acc 63.672 (59.711)	lr 0.02458
Train [10][2910/3239]	Time 0.227 (0.520)	Data Time 0.001 (0.011)	Loss 3.8362 (3.7597)	Entropy 1.82812 (1.83483)	Top-1 acc 35.156 (35.780)	Top-5 acc 58.203 (59.710)	lr 0.02458
Train [10][2920/3239]	Time 0.233 (0.520)	Data Time 0.001 (0.011)	Loss 3.6649 (3.7595)	Entropy 1.82807 (1.83481)	Top-1 acc 37.891 (35.784)	Top-5 acc 59.766 (59.714)	lr 0.02458
Train [10][2930/3239]	Time 0.245 (0.519)	Data Time 0.001 (0.011)	Loss 3.7662 (3.7594)	Entropy 1.82796 (1.83479)	Top-1 acc 35.938 (35.788)	Top-5 acc 58.203 (59.714)	lr 0.02458
Train [10][2940/3239]	Time 0.216 (0.519)	Data Time 0.001 (0.011)	Loss 3.7940 (3.7592)	Entropy 1.82790 (1.83476)	Top-1 acc 37.500 (35.794)	Top-5 acc 59.766 (59.719)	lr 0.02458
Train [10][2950/3239]	Time 0.214 (0.519)	Data Time 0.001 (0.011)	Loss 3.5825 (3.7592)	Entropy 1.82786 (1.83474)	Top-1 acc 34.766 (35.798)	Top-5 acc 66.016 (59.720)	lr 0.02458
Train [10][2960/3239]	Time 0.191 (0.518)	Data Time 0.001 (0.011)	Loss 3.6791 (3.7592)	Entropy 1.82778 (1.83472)	Top-1 acc 35.547 (35.800)	Top-5 acc 60.156 (59.718)	lr 0.02458
Train [10][2970/3239]	Time 0.237 (0.518)	Data Time 0.001 (0.011)	Loss 3.7715 (3.7591)	Entropy 1.82777 (1.83469)	Top-1 acc 33.203 (35.802)	Top-5 acc 58.984 (59.721)	lr 0.02458
Train [10][2980/3239]	Time 0.261 (0.529)	Data Time 0.004 (0.011)	Loss 3.6904 (3.7592)	Entropy 1.82775 (1.83467)	Top-1 acc 35.938 (35.800)	Top-5 acc 61.328 (59.718)	lr 0.02458
Train [10][2990/3239]	Time 0.308 (0.529)	Data Time 0.002 (0.011)	Loss 3.7643 (3.7595)	Entropy 1.82768 (1.83465)	Top-1 acc 34.766 (35.793)	Top-5 acc 57.812 (59.712)	lr 0.02458
Train [10][3000/3239]	Time 0.226 (0.529)	Data Time 0.001 (0.011)	Loss 3.9269 (3.7595)	Entropy 1.82764 (1.83462)	Top-1 acc 39.453 (35.796)	Top-5 acc 56.250 (59.710)	lr 0.02458
Train [10][3010/3239]	Time 0.226 (0.529)	Data Time 0.002 (0.011)	Loss 3.7089 (3.7596)	Entropy 1.82755 (1.83460)	Top-1 acc 35.938 (35.797)	Top-5 acc 61.719 (59.709)	lr 0.02458
Train [10][3020/3239]	Time 0.289 (0.528)	Data Time 0.001 (0.011)	Loss 3.7095 (3.7597)	Entropy 1.82750 (1.83458)	Top-1 acc 36.328 (35.794)	Top-5 acc 60.547 (59.708)	lr 0.02458
Train [10][3030/3239]	Time 0.210 (0.528)	Data Time 0.001 (0.011)	Loss 3.7737 (3.7596)	Entropy 1.82747 (1.83455)	Top-1 acc 33.203 (35.796)	Top-5 acc 59.375 (59.709)	lr 0.02458
Train [10][3040/3239]	Time 0.230 (0.528)	Data Time 0.001 (0.010)	Loss 3.5468 (3.7592)	Entropy 1.82743 (1.83453)	Top-1 acc 42.578 (35.810)	Top-5 acc 66.016 (59.718)	lr 0.02458
Train [10][3050/3239]	Time 0.183 (0.527)	Data Time 0.001 (0.010)	Loss 3.6418 (3.7591)	Entropy 1.82737 (1.83451)	Top-1 acc 36.719 (35.813)	Top-5 acc 61.328 (59.720)	lr 0.02458
Train [10][3060/3239]	Time 0.257 (0.527)	Data Time 0.001 (0.010)	Loss 3.9138 (3.7591)	Entropy 1.82735 (1.83448)	Top-1 acc 30.469 (35.811)	Top-5 acc 55.859 (59.719)	lr 0.02458
Train [10][3070/3239]	Time 0.316 (0.527)	Data Time 0.001 (0.010)	Loss 3.7028 (3.7587)	Entropy 1.82725 (1.83446)	Top-1 acc 40.234 (35.819)	Top-5 acc 63.672 (59.728)	lr 0.02458
Train [10][3080/3239]	Time 0.240 (0.526)	Data Time 0.001 (0.010)	Loss 3.8335 (3.7587)	Entropy 1.82717 (1.83444)	Top-1 acc 33.984 (35.819)	Top-5 acc 58.594 (59.732)	lr 0.02458
Train [10][3090/3239]	Time 0.246 (0.526)	Data Time 0.001 (0.010)	Loss 3.7178 (3.7587)	Entropy 1.82713 (1.83441)	Top-1 acc 37.891 (35.818)	Top-5 acc 56.250 (59.727)	lr 0.02458
Train [10][3100/3239]	Time 0.205 (0.526)	Data Time 0.002 (0.010)	Loss 3.5561 (3.7584)	Entropy 1.82705 (1.83439)	Top-1 acc 42.578 (35.825)	Top-5 acc 61.328 (59.736)	lr 0.02458
Train [10][3110/3239]	Time 0.251 (0.525)	Data Time 0.001 (0.010)	Loss 3.6619 (3.7586)	Entropy 1.82699 (1.83437)	Top-1 acc 38.672 (35.824)	Top-5 acc 60.547 (59.736)	lr 0.02458
Train [10][3120/3239]	Time 0.223 (0.525)	Data Time 0.001 (0.010)	Loss 3.7806 (3.7585)	Entropy 1.82699 (1.83434)	Top-1 acc 31.641 (35.825)	Top-5 acc 56.641 (59.735)	lr 0.02458
Train [10][3130/3239]	Time 0.253 (0.524)	Data Time 0.001 (0.010)	Loss 3.8999 (3.7584)	Entropy 1.82698 (1.83432)	Top-1 acc 36.328 (35.827)	Top-5 acc 58.203 (59.734)	lr 0.02458
Train [10][3140/3239]	Time 0.261 (0.524)	Data Time 0.001 (0.010)	Loss 3.6395 (3.7585)	Entropy 1.82693 (1.83430)	Top-1 acc 35.938 (35.828)	Top-5 acc 61.719 (59.732)	lr 0.02458
Train [10][3150/3239]	Time 0.359 (0.524)	Data Time 0.001 (0.010)	Loss 3.6393 (3.7582)	Entropy 1.82693 (1.83427)	Top-1 acc 37.109 (35.834)	Top-5 acc 63.281 (59.739)	lr 0.02458
Train [10][3160/3239]	Time 0.217 (0.524)	Data Time 0.001 (0.010)	Loss 3.8203 (3.7582)	Entropy 1.82692 (1.83425)	Top-1 acc 36.328 (35.835)	Top-5 acc 58.594 (59.742)	lr 0.02458
Train [10][3170/3239]	Time 0.221 (0.523)	Data Time 0.001 (0.010)	Loss 3.6088 (3.7580)	Entropy 1.82687 (1.83423)	Top-1 acc 38.281 (35.838)	Top-5 acc 61.328 (59.741)	lr 0.02458
Train [10][3180/3239]	Time 0.203 (0.523)	Data Time 0.000 (0.010)	Loss 3.6568 (3.7579)	Entropy 1.82681 (1.83420)	Top-1 acc 36.719 (35.839)	Top-5 acc 61.719 (59.746)	lr 0.02458
Train [10][3190/3239]	Time 0.187 (0.522)	Data Time 0.000 (0.010)	Loss 3.9411 (3.7580)	Entropy 1.82680 (1.83418)	Top-1 acc 32.422 (35.838)	Top-5 acc 57.031 (59.745)	lr 0.02458
Train [10][3200/3239]	Time 0.217 (0.522)	Data Time 0.000 (0.010)	Loss 3.4757 (3.7578)	Entropy 1.82679 (1.83416)	Top-1 acc 42.188 (35.844)	Top-5 acc 64.844 (59.748)	lr 0.02458
Train [10][3210/3239]	Time 0.212 (0.522)	Data Time 0.000 (0.010)	Loss 3.6705 (3.7576)	Entropy 1.82673 (1.83413)	Top-1 acc 38.281 (35.850)	Top-5 acc 60.156 (59.751)	lr 0.02457
Train [10][3220/3239]	Time 0.217 (0.521)	Data Time 0.000 (0.010)	Loss 3.8141 (3.7575)	Entropy 1.82669 (1.83411)	Top-1 acc 35.547 (35.856)	Top-5 acc 57.422 (59.752)	lr 0.02457
Train [10][3230/3239]	Time 0.296 (0.521)	Data Time 0.000 (0.010)	Loss 3.7832 (3.7575)	Entropy 1.82662 (1.83409)	Top-1 acc 35.156 (35.856)	Top-5 acc 58.203 (59.749)	lr 0.02457
Train [10][3239/3239]	Time 2.090 (0.521)	Data Time 0.000 (0.010)	Loss 4.0201 (3.7576)	Entropy 1.82662 (1.83407)	Top-1 acc 38.272 (35.857)	Top-5 acc 51.852 (59.748)	lr 0.02457
==========Valid [10/120]	loss 2.575	top-1 acc 45.033 (45.033)	top-5 acc 69.774	Train top-1 35.857	top-5 59.748	Entropy 1.82662	Latency-None: 0.000ms	Flops: 535.07M
Train [11][0/3239]	Time 25.258 (25.258)	Data Time 23.438 (23.438)	Loss 3.4811 (3.4811)	Entropy 1.82658 (1.82658)	Top-1 acc 41.797 (41.797)	Top-5 acc 64.844 (64.844)	lr 0.02457
Train [11][10/3239]	Time 2.263 (2.827)	Data Time 0.001 (2.195)	Loss 3.6834 (3.6941)	Entropy 1.82658 (1.82658)	Top-1 acc 37.109 (36.683)	Top-5 acc 61.719 (61.719)	lr 0.02457
Train [11][20/3239]	Time 0.182 (1.583)	Data Time 0.002 (1.150)	Loss 3.7234 (3.7261)	Entropy 1.82648 (1.82653)	Top-1 acc 37.500 (36.124)	Top-5 acc 64.453 (60.993)	lr 0.02457
Train [11][30/3239]	Time 0.214 (1.207)	Data Time 0.001 (0.780)	Loss 3.6363 (3.7021)	Entropy 1.82649 (1.82652)	Top-1 acc 39.453 (36.807)	Top-5 acc 60.547 (61.656)	lr 0.02457
Train [11][40/3239]	Time 0.246 (1.014)	Data Time 0.002 (0.590)	Loss 3.7759 (3.7198)	Entropy 1.82638 (1.82649)	Top-1 acc 35.547 (36.566)	Top-5 acc 57.031 (60.909)	lr 0.02457
Train [11][50/3239]	Time 0.211 (0.896)	Data Time 0.001 (0.475)	Loss 3.5813 (3.7162)	Entropy 1.82632 (1.82646)	Top-1 acc 37.891 (36.573)	Top-5 acc 59.766 (60.861)	lr 0.02457
Train [11][60/3239]	Time 0.229 (0.818)	Data Time 0.001 (0.397)	Loss 3.6038 (3.7149)	Entropy 1.82629 (1.82643)	Top-1 acc 37.109 (36.623)	Top-5 acc 59.766 (60.765)	lr 0.02457
Train [11][70/3239]	Time 0.298 (0.762)	Data Time 0.001 (0.341)	Loss 3.6881 (3.7187)	Entropy 1.82622 (1.82641)	Top-1 acc 37.891 (36.488)	Top-5 acc 61.328 (60.662)	lr 0.02457
Train [11][80/3239]	Time 0.227 (1.250)	Data Time 0.002 (0.299)	Loss 3.9080 (3.7225)	Entropy 1.82622 (1.82639)	Top-1 acc 35.156 (36.560)	Top-5 acc 61.328 (60.672)	lr 0.02457
Train [11][90/3239]	Time 0.236 (1.161)	Data Time 0.002 (0.267)	Loss 3.8686 (3.7295)	Entropy 1.82622 (1.82637)	Top-1 acc 33.984 (36.362)	Top-5 acc 58.594 (60.500)	lr 0.02457
Train [11][100/3239]	Time 0.182 (1.088)	Data Time 0.001 (0.241)	Loss 3.7538 (3.7321)	Entropy 1.82615 (1.82635)	Top-1 acc 34.375 (36.413)	Top-5 acc 58.203 (60.500)	lr 0.02457
Train [11][110/3239]	Time 0.180 (1.027)	Data Time 0.001 (0.220)	Loss 3.4914 (3.7229)	Entropy 1.82610 (1.82633)	Top-1 acc 41.016 (36.617)	Top-5 acc 64.844 (60.638)	lr 0.02457
Train [11][120/3239]	Time 2.189 (0.976)	Data Time 0.001 (0.202)	Loss 3.7231 (3.7141)	Entropy 1.82610 (1.82631)	Top-1 acc 38.281 (36.793)	Top-5 acc 61.719 (60.799)	lr 0.02457
Train [11][130/3239]	Time 0.206 (0.918)	Data Time 0.001 (0.186)	Loss 3.7654 (3.7125)	Entropy 1.82610 (1.82630)	Top-1 acc 34.766 (36.784)	Top-5 acc 64.844 (60.872)	lr 0.02457
Train [11][140/3239]	Time 0.207 (0.883)	Data Time 0.001 (0.173)	Loss 3.6686 (3.7131)	Entropy 1.82605 (1.82628)	Top-1 acc 34.766 (36.680)	Top-5 acc 59.766 (60.760)	lr 0.02457
Train [11][150/3239]	Time 0.203 (0.851)	Data Time 0.001 (0.162)	Loss 3.9278 (3.7147)	Entropy 1.82596 (1.82626)	Top-1 acc 34.766 (36.701)	Top-5 acc 58.594 (60.746)	lr 0.02457
Train [11][160/3239]	Time 0.279 (0.825)	Data Time 0.001 (0.152)	Loss 3.8526 (3.7177)	Entropy 1.82595 (1.82624)	Top-1 acc 33.203 (36.663)	Top-5 acc 57.422 (60.656)	lr 0.02457
Train [11][170/3239]	Time 0.248 (0.800)	Data Time 0.001 (0.143)	Loss 3.6873 (3.7170)	Entropy 1.82583 (1.82622)	Top-1 acc 37.891 (36.712)	Top-5 acc 60.547 (60.679)	lr 0.02457
Train [11][180/3239]	Time 0.219 (0.779)	Data Time 0.001 (0.136)	Loss 3.5383 (3.7156)	Entropy 1.82581 (1.82620)	Top-1 acc 42.578 (36.745)	Top-5 acc 65.234 (60.717)	lr 0.02457
Train [11][190/3239]	Time 0.203 (0.761)	Data Time 0.001 (0.129)	Loss 3.5886 (3.7163)	Entropy 1.82575 (1.82618)	Top-1 acc 42.969 (36.721)	Top-5 acc 62.109 (60.694)	lr 0.02457
Train [11][200/3239]	Time 0.157 (0.743)	Data Time 0.001 (0.122)	Loss 3.7706 (3.7180)	Entropy 1.82573 (1.82616)	Top-1 acc 36.328 (36.728)	Top-5 acc 58.594 (60.611)	lr 0.02457
Train [11][210/3239]	Time 0.199 (0.728)	Data Time 0.001 (0.117)	Loss 3.5742 (3.7179)	Entropy 1.82564 (1.82613)	Top-1 acc 40.625 (36.769)	Top-5 acc 64.453 (60.601)	lr 0.02457
Train [11][220/3239]	Time 0.228 (0.713)	Data Time 0.002 (0.112)	Loss 3.7417 (3.7179)	Entropy 1.82556 (1.82611)	Top-1 acc 34.766 (36.775)	Top-5 acc 62.500 (60.584)	lr 0.02457
Train [11][230/3239]	Time 2.347 (0.700)	Data Time 0.001 (0.107)	Loss 3.5996 (3.7212)	Entropy 1.82556 (1.82609)	Top-1 acc 38.281 (36.722)	Top-5 acc 62.109 (60.510)	lr 0.02457
Train [11][240/3239]	Time 0.205 (0.680)	Data Time 0.001 (0.102)	Loss 3.6549 (3.7191)	Entropy 1.82553 (1.82606)	Top-1 acc 41.016 (36.733)	Top-5 acc 59.766 (60.550)	lr 0.02457
Train [11][250/3239]	Time 0.360 (0.670)	Data Time 0.001 (0.098)	Loss 3.6024 (3.7197)	Entropy 1.82548 (1.82604)	Top-1 acc 37.500 (36.716)	Top-5 acc 62.109 (60.503)	lr 0.02457
Train [11][260/3239]	Time 0.368 (0.660)	Data Time 0.001 (0.095)	Loss 3.5044 (3.7160)	Entropy 1.82544 (1.82602)	Top-1 acc 38.672 (36.825)	Top-5 acc 63.672 (60.620)	lr 0.02457
Train [11][270/3239]	Time 0.180 (0.651)	Data Time 0.001 (0.091)	Loss 3.7895 (3.7163)	Entropy 1.82541 (1.82600)	Top-1 acc 35.547 (36.784)	Top-5 acc 56.641 (60.597)	lr 0.02457
Train [11][280/3239]	Time 0.217 (0.643)	Data Time 0.001 (0.088)	Loss 3.8530 (3.7164)	Entropy 1.82537 (1.82597)	Top-1 acc 37.891 (36.799)	Top-5 acc 58.594 (60.587)	lr 0.02457
Train [11][290/3239]	Time 0.208 (0.635)	Data Time 0.001 (0.085)	Loss 3.5493 (3.7144)	Entropy 1.82532 (1.82595)	Top-1 acc 39.453 (36.806)	Top-5 acc 66.016 (60.649)	lr 0.02457
Train [11][300/3239]	Time 0.200 (0.628)	Data Time 0.001 (0.082)	Loss 3.7950 (3.7135)	Entropy 1.82525 (1.82593)	Top-1 acc 38.672 (36.824)	Top-5 acc 57.812 (60.691)	lr 0.02457
Train [11][310/3239]	Time 0.240 (0.621)	Data Time 0.001 (0.080)	Loss 3.6712 (3.7117)	Entropy 1.82524 (1.82591)	Top-1 acc 40.625 (36.859)	Top-5 acc 59.375 (60.703)	lr 0.02457
Train [11][320/3239]	Time 0.243 (0.615)	Data Time 0.001 (0.077)	Loss 3.7149 (3.7094)	Entropy 1.82522 (1.82589)	Top-1 acc 36.328 (36.906)	Top-5 acc 62.109 (60.742)	lr 0.02457
Train [11][330/3239]	Time 0.216 (0.609)	Data Time 0.001 (0.075)	Loss 3.4805 (3.7065)	Entropy 1.82513 (1.82587)	Top-1 acc 41.406 (36.978)	Top-5 acc 67.578 (60.810)	lr 0.02457
Train [11][340/3239]	Time 2.256 (0.604)	Data Time 0.001 (0.073)	Loss 3.6887 (3.7074)	Entropy 1.82513 (1.82585)	Top-1 acc 37.109 (36.954)	Top-5 acc 64.844 (60.779)	lr 0.02457
Train [11][350/3239]	Time 0.223 (0.593)	Data Time 0.001 (0.071)	Loss 3.9171 (3.7067)	Entropy 1.82508 (1.82582)	Top-1 acc 35.156 (36.955)	Top-5 acc 55.469 (60.815)	lr 0.02456
Train [11][360/3239]	Time 0.155 (0.588)	Data Time 0.001 (0.069)	Loss 3.6997 (3.7064)	Entropy 1.82500 (1.82580)	Top-1 acc 36.328 (36.990)	Top-5 acc 61.719 (60.816)	lr 0.02456
Train [11][370/3239]	Time 0.156 (0.583)	Data Time 0.001 (0.067)	Loss 3.6968 (3.7080)	Entropy 1.82498 (1.82578)	Top-1 acc 37.891 (36.936)	Top-5 acc 63.281 (60.781)	lr 0.02456
Train [11][380/3239]	Time 0.189 (0.579)	Data Time 0.001 (0.066)	Loss 3.4699 (3.7065)	Entropy 1.82494 (1.82576)	Top-1 acc 41.797 (36.980)	Top-5 acc 67.969 (60.810)	lr 0.02456
Train [11][390/3239]	Time 0.199 (0.575)	Data Time 0.001 (0.064)	Loss 3.8975 (3.7068)	Entropy 1.82484 (1.82574)	Top-1 acc 32.812 (36.949)	Top-5 acc 56.250 (60.795)	lr 0.02456
Train [11][400/3239]	Time 0.156 (0.571)	Data Time 0.001 (0.062)	Loss 3.8749 (3.7059)	Entropy 1.82484 (1.82571)	Top-1 acc 35.156 (36.962)	Top-5 acc 58.594 (60.827)	lr 0.02456
Train [11][410/3239]	Time 0.185 (0.567)	Data Time 0.001 (0.061)	Loss 3.7948 (3.7060)	Entropy 1.82476 (1.82569)	Top-1 acc 35.156 (36.934)	Top-5 acc 61.328 (60.842)	lr 0.02456
Train [11][420/3239]	Time 0.305 (0.564)	Data Time 0.001 (0.060)	Loss 3.7557 (3.7073)	Entropy 1.82475 (1.82567)	Top-1 acc 37.500 (36.902)	Top-5 acc 60.156 (60.824)	lr 0.02456
Train [11][430/3239]	Time 0.323 (0.560)	Data Time 0.002 (0.058)	Loss 3.5048 (3.7076)	Entropy 1.82473 (1.82565)	Top-1 acc 42.969 (36.891)	Top-5 acc 66.406 (60.815)	lr 0.02456
Train [11][440/3239]	Time 0.390 (0.639)	Data Time 0.004 (0.057)	Loss 3.8848 (3.7095)	Entropy 1.82473 (1.82563)	Top-1 acc 35.156 (36.864)	Top-5 acc 58.203 (60.759)	lr 0.02456
Train [11][450/3239]	Time 2.397 (0.636)	Data Time 0.002 (0.056)	Loss 3.6496 (3.7105)	Entropy 1.82473 (1.82561)	Top-1 acc 35.547 (36.836)	Top-5 acc 62.109 (60.752)	lr 0.02456
Train [11][460/3239]	Time 0.221 (0.627)	Data Time 0.002 (0.055)	Loss 3.6165 (3.7108)	Entropy 1.82461 (1.82559)	Top-1 acc 39.844 (36.830)	Top-5 acc 62.500 (60.735)	lr 0.02456
Train [11][470/3239]	Time 0.236 (0.623)	Data Time 0.002 (0.053)	Loss 3.9044 (3.7121)	Entropy 1.82458 (1.82556)	Top-1 acc 32.422 (36.810)	Top-5 acc 57.812 (60.719)	lr 0.02456
Train [11][480/3239]	Time 0.196 (0.619)	Data Time 0.002 (0.052)	Loss 3.6037 (3.7128)	Entropy 1.82453 (1.82554)	Top-1 acc 33.594 (36.793)	Top-5 acc 64.062 (60.704)	lr 0.02456
Train [11][490/3239]	Time 0.205 (0.615)	Data Time 0.001 (0.051)	Loss 3.5885 (3.7131)	Entropy 1.82452 (1.82552)	Top-1 acc 38.672 (36.791)	Top-5 acc 63.672 (60.685)	lr 0.02456
Train [11][500/3239]	Time 0.327 (0.611)	Data Time 0.001 (0.050)	Loss 3.7176 (3.7116)	Entropy 1.82444 (1.82550)	Top-1 acc 36.719 (36.822)	Top-5 acc 62.109 (60.717)	lr 0.02456
Train [11][510/3239]	Time 0.210 (0.607)	Data Time 0.001 (0.049)	Loss 3.7883 (3.7110)	Entropy 1.82442 (1.82548)	Top-1 acc 36.328 (36.843)	Top-5 acc 56.641 (60.734)	lr 0.02456
Train [11][520/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.049)	Loss 3.7683 (3.7119)	Entropy 1.82440 (1.82546)	Top-1 acc 40.234 (36.857)	Top-5 acc 60.156 (60.710)	lr 0.02456
Train [11][530/3239]	Time 0.237 (0.600)	Data Time 0.002 (0.048)	Loss 3.5175 (3.7111)	Entropy 1.82435 (1.82544)	Top-1 acc 39.453 (36.862)	Top-5 acc 65.625 (60.735)	lr 0.02456
Train [11][540/3239]	Time 0.235 (0.596)	Data Time 0.001 (0.047)	Loss 3.7027 (3.7116)	Entropy 1.82435 (1.82542)	Top-1 acc 36.719 (36.843)	Top-5 acc 58.594 (60.719)	lr 0.02456
Train [11][550/3239]	Time 0.207 (0.593)	Data Time 0.001 (0.046)	Loss 3.7413 (3.7119)	Entropy 1.82430 (1.82540)	Top-1 acc 37.109 (36.840)	Top-5 acc 59.375 (60.717)	lr 0.02456
Train [11][560/3239]	Time 2.187 (0.590)	Data Time 0.001 (0.045)	Loss 3.6286 (3.7109)	Entropy 1.82430 (1.82538)	Top-1 acc 41.016 (36.877)	Top-5 acc 62.109 (60.753)	lr 0.02456
Train [11][570/3239]	Time 0.317 (0.583)	Data Time 0.001 (0.044)	Loss 3.7037 (3.7121)	Entropy 1.82422 (1.82536)	Top-1 acc 35.547 (36.821)	Top-5 acc 64.062 (60.732)	lr 0.02456
Train [11][580/3239]	Time 0.319 (0.580)	Data Time 0.001 (0.044)	Loss 3.7758 (3.7126)	Entropy 1.82412 (1.82534)	Top-1 acc 35.547 (36.810)	Top-5 acc 57.812 (60.726)	lr 0.02456
Train [11][590/3239]	Time 0.218 (0.577)	Data Time 0.001 (0.043)	Loss 3.7802 (3.7133)	Entropy 1.82400 (1.82532)	Top-1 acc 32.812 (36.802)	Top-5 acc 60.156 (60.702)	lr 0.02456
Train [11][600/3239]	Time 0.207 (0.574)	Data Time 0.001 (0.042)	Loss 3.7713 (3.7138)	Entropy 1.82399 (1.82529)	Top-1 acc 36.328 (36.771)	Top-5 acc 62.500 (60.704)	lr 0.02456
Train [11][610/3239]	Time 0.169 (0.572)	Data Time 0.001 (0.042)	Loss 3.6572 (3.7147)	Entropy 1.82393 (1.82527)	Top-1 acc 35.938 (36.746)	Top-5 acc 61.328 (60.688)	lr 0.02456
Train [11][620/3239]	Time 0.211 (0.569)	Data Time 0.001 (0.041)	Loss 3.7128 (3.7152)	Entropy 1.82375 (1.82525)	Top-1 acc 35.938 (36.746)	Top-5 acc 59.375 (60.669)	lr 0.02456
Train [11][630/3239]	Time 0.147 (0.567)	Data Time 0.001 (0.040)	Loss 3.8481 (3.7155)	Entropy 1.82370 (1.82523)	Top-1 acc 35.547 (36.740)	Top-5 acc 55.469 (60.666)	lr 0.02456
Train [11][640/3239]	Time 0.171 (0.565)	Data Time 0.001 (0.040)	Loss 3.7218 (3.7151)	Entropy 1.82369 (1.82520)	Top-1 acc 38.281 (36.742)	Top-5 acc 59.375 (60.665)	lr 0.02456
Train [11][650/3239]	Time 0.228 (0.562)	Data Time 0.001 (0.039)	Loss 3.7265 (3.7139)	Entropy 1.82366 (1.82518)	Top-1 acc 37.500 (36.741)	Top-5 acc 62.109 (60.700)	lr 0.02456
Train [11][660/3239]	Time 0.260 (0.560)	Data Time 0.001 (0.039)	Loss 3.7352 (3.7142)	Entropy 1.82361 (1.82516)	Top-1 acc 33.203 (36.742)	Top-5 acc 55.469 (60.683)	lr 0.02456
Train [11][670/3239]	Time 2.244 (0.558)	Data Time 0.001 (0.038)	Loss 3.5266 (3.7134)	Entropy 1.82361 (1.82513)	Top-1 acc 42.188 (36.755)	Top-5 acc 64.062 (60.705)	lr 0.02456
Train [11][680/3239]	Time 0.254 (0.553)	Data Time 0.001 (0.038)	Loss 3.5695 (3.7142)	Entropy 1.82358 (1.82511)	Top-1 acc 37.891 (36.747)	Top-5 acc 63.672 (60.689)	lr 0.02456
Train [11][690/3239]	Time 0.225 (0.551)	Data Time 0.001 (0.037)	Loss 4.0592 (3.7143)	Entropy 1.82355 (1.82509)	Top-1 acc 30.469 (36.745)	Top-5 acc 53.516 (60.686)	lr 0.02456
Train [11][700/3239]	Time 0.217 (0.550)	Data Time 0.001 (0.037)	Loss 3.7856 (3.7146)	Entropy 1.82353 (1.82507)	Top-1 acc 39.062 (36.726)	Top-5 acc 60.938 (60.685)	lr 0.02456
Train [11][710/3239]	Time 0.174 (0.548)	Data Time 0.001 (0.036)	Loss 3.9741 (3.7159)	Entropy 1.82348 (1.82504)	Top-1 acc 27.734 (36.682)	Top-5 acc 53.125 (60.650)	lr 0.02456
Train [11][720/3239]	Time 0.260 (0.546)	Data Time 0.002 (0.036)	Loss 3.6142 (3.7165)	Entropy 1.82336 (1.82502)	Top-1 acc 35.156 (36.668)	Top-5 acc 64.453 (60.647)	lr 0.02456
Train [11][730/3239]	Time 0.188 (0.545)	Data Time 0.001 (0.035)	Loss 3.6651 (3.7166)	Entropy 1.82332 (1.82500)	Top-1 acc 35.547 (36.680)	Top-5 acc 62.109 (60.647)	lr 0.02455
Train [11][740/3239]	Time 0.215 (0.543)	Data Time 0.001 (0.035)	Loss 3.6814 (3.7170)	Entropy 1.82324 (1.82497)	Top-1 acc 40.234 (36.680)	Top-5 acc 60.156 (60.640)	lr 0.02455
Train [11][750/3239]	Time 0.299 (0.541)	Data Time 0.001 (0.035)	Loss 3.6004 (3.7166)	Entropy 1.82322 (1.82495)	Top-1 acc 37.109 (36.683)	Top-5 acc 63.281 (60.649)	lr 0.02455
Train [11][760/3239]	Time 0.268 (0.540)	Data Time 0.001 (0.034)	Loss 3.8349 (3.7165)	Entropy 1.82317 (1.82493)	Top-1 acc 34.375 (36.695)	Top-5 acc 57.031 (60.648)	lr 0.02455
Train [11][770/3239]	Time 0.195 (0.538)	Data Time 0.001 (0.034)	Loss 3.5956 (3.7166)	Entropy 1.82308 (1.82491)	Top-1 acc 37.500 (36.691)	Top-5 acc 61.719 (60.646)	lr 0.02455
Train [11][780/3239]	Time 2.125 (0.537)	Data Time 0.001 (0.033)	Loss 3.7350 (3.7165)	Entropy 1.82308 (1.82488)	Top-1 acc 37.109 (36.677)	Top-5 acc 60.938 (60.648)	lr 0.02455
Train [11][790/3239]	Time 0.158 (0.533)	Data Time 0.001 (0.033)	Loss 3.7578 (3.7162)	Entropy 1.82301 (1.82486)	Top-1 acc 35.547 (36.677)	Top-5 acc 58.203 (60.656)	lr 0.02455
Train [11][800/3239]	Time 0.209 (0.531)	Data Time 0.001 (0.033)	Loss 3.8182 (3.7173)	Entropy 1.82292 (1.82483)	Top-1 acc 36.328 (36.660)	Top-5 acc 57.812 (60.633)	lr 0.02455
Train [11][810/3239]	Time 0.207 (0.580)	Data Time 0.002 (0.032)	Loss 3.5999 (3.7164)	Entropy 1.82288 (1.82481)	Top-1 acc 35.547 (36.681)	Top-5 acc 62.891 (60.642)	lr 0.02455
Train [11][820/3239]	Time 0.206 (0.578)	Data Time 0.003 (0.032)	Loss 3.6780 (3.7168)	Entropy 1.82284 (1.82479)	Top-1 acc 39.844 (36.665)	Top-5 acc 61.328 (60.633)	lr 0.02455
Train [11][830/3239]	Time 0.308 (0.576)	Data Time 0.002 (0.031)	Loss 3.7567 (3.7174)	Entropy 1.82277 (1.82476)	Top-1 acc 36.328 (36.638)	Top-5 acc 61.719 (60.623)	lr 0.02455
Train [11][840/3239]	Time 0.219 (0.574)	Data Time 0.002 (0.031)	Loss 3.7159 (3.7182)	Entropy 1.82275 (1.82474)	Top-1 acc 39.062 (36.637)	Top-5 acc 60.547 (60.608)	lr 0.02455
Train [11][850/3239]	Time 0.204 (0.572)	Data Time 0.002 (0.031)	Loss 3.5210 (3.7177)	Entropy 1.82272 (1.82472)	Top-1 acc 40.234 (36.649)	Top-5 acc 65.234 (60.625)	lr 0.02455
Train [11][860/3239]	Time 0.207 (0.571)	Data Time 0.001 (0.030)	Loss 3.6674 (3.7178)	Entropy 1.82268 (1.82469)	Top-1 acc 37.500 (36.642)	Top-5 acc 62.500 (60.623)	lr 0.02455
Train [11][870/3239]	Time 0.181 (0.569)	Data Time 0.001 (0.030)	Loss 3.6748 (3.7180)	Entropy 1.82259 (1.82467)	Top-1 acc 40.625 (36.651)	Top-5 acc 61.328 (60.615)	lr 0.02455
Train [11][880/3239]	Time 0.195 (0.567)	Data Time 0.002 (0.030)	Loss 3.9792 (3.7173)	Entropy 1.82252 (1.82465)	Top-1 acc 31.250 (36.668)	Top-5 acc 53.125 (60.623)	lr 0.02455
Train [11][890/3239]	Time 2.142 (0.565)	Data Time 0.002 (0.029)	Loss 3.7118 (3.7172)	Entropy 1.82252 (1.82462)	Top-1 acc 38.672 (36.668)	Top-5 acc 57.422 (60.628)	lr 0.02455
Train [11][900/3239]	Time 0.226 (0.561)	Data Time 0.002 (0.029)	Loss 3.8747 (3.7173)	Entropy 1.82244 (1.82460)	Top-1 acc 29.297 (36.661)	Top-5 acc 56.641 (60.615)	lr 0.02455
Train [11][910/3239]	Time 0.199 (0.560)	Data Time 0.001 (0.029)	Loss 3.8521 (3.7171)	Entropy 1.82241 (1.82457)	Top-1 acc 33.594 (36.667)	Top-5 acc 57.031 (60.614)	lr 0.02455
Train [11][920/3239]	Time 0.301 (0.559)	Data Time 0.001 (0.029)	Loss 3.7048 (3.7169)	Entropy 1.82227 (1.82455)	Top-1 acc 36.328 (36.667)	Top-5 acc 62.500 (60.613)	lr 0.02455
Train [11][930/3239]	Time 0.279 (0.557)	Data Time 0.001 (0.028)	Loss 3.6616 (3.7172)	Entropy 1.82214 (1.82452)	Top-1 acc 37.500 (36.646)	Top-5 acc 60.938 (60.612)	lr 0.02455
Train [11][940/3239]	Time 0.222 (0.556)	Data Time 0.001 (0.028)	Loss 3.6526 (3.7173)	Entropy 1.82210 (1.82450)	Top-1 acc 37.891 (36.648)	Top-5 acc 62.891 (60.619)	lr 0.02455
Train [11][950/3239]	Time 0.201 (0.554)	Data Time 0.001 (0.028)	Loss 3.6005 (3.7172)	Entropy 1.82205 (1.82447)	Top-1 acc 39.062 (36.645)	Top-5 acc 62.891 (60.622)	lr 0.02455
Train [11][960/3239]	Time 0.203 (0.553)	Data Time 0.001 (0.028)	Loss 3.7398 (3.7176)	Entropy 1.82202 (1.82445)	Top-1 acc 36.719 (36.631)	Top-5 acc 59.375 (60.612)	lr 0.02455
Train [11][970/3239]	Time 0.211 (0.552)	Data Time 0.001 (0.027)	Loss 3.7369 (3.7180)	Entropy 1.82181 (1.82442)	Top-1 acc 33.984 (36.619)	Top-5 acc 61.328 (60.612)	lr 0.02455
Train [11][980/3239]	Time 0.207 (0.550)	Data Time 0.001 (0.027)	Loss 3.7406 (3.7181)	Entropy 1.82177 (1.82439)	Top-1 acc 37.891 (36.632)	Top-5 acc 60.156 (60.619)	lr 0.02455
Train [11][990/3239]	Time 0.201 (0.549)	Data Time 0.002 (0.027)	Loss 3.9197 (3.7182)	Entropy 1.82176 (1.82437)	Top-1 acc 28.516 (36.626)	Top-5 acc 58.203 (60.618)	lr 0.02455
Train [11][1000/3239]	Time 2.251 (0.548)	Data Time 0.001 (0.026)	Loss 3.8362 (3.7179)	Entropy 1.82176 (1.82434)	Top-1 acc 37.500 (36.643)	Top-5 acc 60.938 (60.639)	lr 0.02455
Train [11][1010/3239]	Time 0.203 (0.545)	Data Time 0.001 (0.026)	Loss 3.6942 (3.7185)	Entropy 1.82173 (1.82432)	Top-1 acc 32.812 (36.623)	Top-5 acc 60.156 (60.621)	lr 0.02455
Train [11][1020/3239]	Time 0.208 (0.544)	Data Time 0.001 (0.026)	Loss 3.6426 (3.7190)	Entropy 1.82165 (1.82429)	Top-1 acc 42.188 (36.619)	Top-5 acc 61.328 (60.607)	lr 0.02455
Train [11][1030/3239]	Time 0.152 (0.542)	Data Time 0.001 (0.026)	Loss 3.6997 (3.7186)	Entropy 1.82160 (1.82426)	Top-1 acc 38.281 (36.637)	Top-5 acc 63.672 (60.620)	lr 0.02455
Train [11][1040/3239]	Time 0.220 (0.541)	Data Time 0.001 (0.026)	Loss 3.7070 (3.7190)	Entropy 1.82158 (1.82424)	Top-1 acc 33.594 (36.620)	Top-5 acc 61.328 (60.610)	lr 0.02455
Train [11][1050/3239]	Time 0.205 (0.540)	Data Time 0.001 (0.025)	Loss 3.7005 (3.7191)	Entropy 1.82158 (1.82421)	Top-1 acc 37.500 (36.622)	Top-5 acc 59.375 (60.593)	lr 0.02455
Train [11][1060/3239]	Time 0.207 (0.539)	Data Time 0.001 (0.025)	Loss 3.7903 (3.7188)	Entropy 1.82157 (1.82419)	Top-1 acc 35.547 (36.621)	Top-5 acc 59.766 (60.605)	lr 0.02455
Train [11][1070/3239]	Time 0.205 (0.538)	Data Time 0.001 (0.025)	Loss 3.7049 (3.7185)	Entropy 1.82155 (1.82416)	Top-1 acc 38.672 (36.626)	Top-5 acc 61.328 (60.613)	lr 0.02455
Train [11][1080/3239]	Time 0.288 (0.537)	Data Time 0.001 (0.025)	Loss 3.5689 (3.7179)	Entropy 1.82152 (1.82414)	Top-1 acc 38.281 (36.640)	Top-5 acc 62.109 (60.620)	lr 0.02455
Train [11][1090/3239]	Time 0.210 (0.536)	Data Time 0.001 (0.024)	Loss 3.4289 (3.7177)	Entropy 1.82142 (1.82412)	Top-1 acc 41.406 (36.634)	Top-5 acc 67.188 (60.630)	lr 0.02455
Train [11][1100/3239]	Time 0.146 (0.535)	Data Time 0.001 (0.024)	Loss 3.6606 (3.7178)	Entropy 1.82140 (1.82409)	Top-1 acc 40.625 (36.635)	Top-5 acc 62.891 (60.632)	lr 0.02454
Train [11][1110/3239]	Time 2.272 (0.534)	Data Time 0.001 (0.024)	Loss 3.7140 (3.7177)	Entropy 1.82140 (1.82407)	Top-1 acc 36.719 (36.630)	Top-5 acc 62.891 (60.641)	lr 0.02454
Train [11][1120/3239]	Time 0.198 (0.531)	Data Time 0.001 (0.024)	Loss 3.6730 (3.7180)	Entropy 1.82138 (1.82404)	Top-1 acc 40.234 (36.627)	Top-5 acc 62.500 (60.635)	lr 0.02454
Train [11][1130/3239]	Time 0.208 (0.530)	Data Time 0.001 (0.024)	Loss 3.7284 (3.7182)	Entropy 1.82134 (1.82402)	Top-1 acc 37.109 (36.615)	Top-5 acc 59.766 (60.629)	lr 0.02454
Train [11][1140/3239]	Time 0.211 (0.529)	Data Time 0.001 (0.023)	Loss 3.9453 (3.7184)	Entropy 1.82123 (1.82399)	Top-1 acc 33.984 (36.612)	Top-5 acc 55.469 (60.626)	lr 0.02454
Train [11][1150/3239]	Time 0.271 (0.528)	Data Time 0.002 (0.023)	Loss 3.6343 (3.7183)	Entropy 1.82120 (1.82397)	Top-1 acc 37.109 (36.609)	Top-5 acc 60.156 (60.630)	lr 0.02454
Train [11][1160/3239]	Time 0.237 (0.527)	Data Time 0.001 (0.023)	Loss 3.7947 (3.7188)	Entropy 1.82116 (1.82395)	Top-1 acc 36.328 (36.593)	Top-5 acc 58.594 (60.628)	lr 0.02454
Train [11][1170/3239]	Time 0.171 (0.559)	Data Time 0.002 (0.023)	Loss 3.5433 (3.7186)	Entropy 1.82111 (1.82392)	Top-1 acc 38.672 (36.600)	Top-5 acc 64.844 (60.631)	lr 0.02454
Train [11][1180/3239]	Time 0.307 (0.558)	Data Time 0.002 (0.023)	Loss 3.8041 (3.7188)	Entropy 1.82107 (1.82390)	Top-1 acc 32.812 (36.600)	Top-5 acc 58.203 (60.623)	lr 0.02454
Train [11][1190/3239]	Time 0.223 (0.557)	Data Time 0.001 (0.023)	Loss 3.7706 (3.7192)	Entropy 1.82103 (1.82387)	Top-1 acc 35.938 (36.603)	Top-5 acc 61.719 (60.606)	lr 0.02454
Train [11][1200/3239]	Time 0.207 (0.556)	Data Time 0.001 (0.022)	Loss 3.4956 (3.7185)	Entropy 1.82100 (1.82385)	Top-1 acc 39.062 (36.611)	Top-5 acc 67.188 (60.624)	lr 0.02454
Train [11][1210/3239]	Time 0.171 (0.555)	Data Time 0.001 (0.022)	Loss 3.7146 (3.7183)	Entropy 1.82095 (1.82383)	Top-1 acc 35.938 (36.614)	Top-5 acc 61.328 (60.630)	lr 0.02454
Train [11][1220/3239]	Time 2.223 (0.553)	Data Time 0.001 (0.022)	Loss 3.9861 (3.7186)	Entropy 1.82095 (1.82380)	Top-1 acc 31.250 (36.613)	Top-5 acc 54.297 (60.623)	lr 0.02454
Train [11][1230/3239]	Time 0.252 (0.551)	Data Time 0.001 (0.022)	Loss 3.6459 (3.7182)	Entropy 1.82089 (1.82378)	Top-1 acc 41.016 (36.623)	Top-5 acc 62.500 (60.633)	lr 0.02454
Train [11][1240/3239]	Time 0.203 (0.550)	Data Time 0.001 (0.022)	Loss 3.5758 (3.7189)	Entropy 1.82089 (1.82376)	Top-1 acc 38.281 (36.594)	Top-5 acc 63.672 (60.613)	lr 0.02454
Train [11][1250/3239]	Time 0.249 (0.549)	Data Time 0.001 (0.022)	Loss 3.5194 (3.7191)	Entropy 1.82084 (1.82373)	Top-1 acc 39.453 (36.591)	Top-5 acc 64.062 (60.612)	lr 0.02454
Train [11][1260/3239]	Time 0.213 (0.548)	Data Time 0.001 (0.021)	Loss 3.7140 (3.7190)	Entropy 1.82084 (1.82371)	Top-1 acc 33.984 (36.578)	Top-5 acc 59.766 (60.616)	lr 0.02454
Train [11][1270/3239]	Time 0.204 (0.547)	Data Time 0.001 (0.021)	Loss 3.6463 (3.7184)	Entropy 1.82078 (1.82369)	Top-1 acc 37.500 (36.589)	Top-5 acc 59.766 (60.628)	lr 0.02454
Train [11][1280/3239]	Time 0.189 (0.546)	Data Time 0.001 (0.021)	Loss 3.7967 (3.7184)	Entropy 1.82076 (1.82366)	Top-1 acc 38.281 (36.594)	Top-5 acc 58.984 (60.631)	lr 0.02454
Train [11][1290/3239]	Time 0.210 (0.545)	Data Time 0.001 (0.021)	Loss 3.8188 (3.7186)	Entropy 1.82072 (1.82364)	Top-1 acc 35.156 (36.598)	Top-5 acc 56.250 (60.625)	lr 0.02454
Train [11][1300/3239]	Time 0.217 (0.544)	Data Time 0.002 (0.021)	Loss 3.4995 (3.7186)	Entropy 1.82069 (1.82362)	Top-1 acc 37.500 (36.597)	Top-5 acc 64.844 (60.623)	lr 0.02454
Train [11][1310/3239]	Time 0.223 (0.543)	Data Time 0.001 (0.021)	Loss 3.5346 (3.7182)	Entropy 1.82061 (1.82360)	Top-1 acc 38.672 (36.612)	Top-5 acc 66.016 (60.637)	lr 0.02454
Train [11][1320/3239]	Time 0.188 (0.542)	Data Time 0.001 (0.021)	Loss 3.5521 (3.7182)	Entropy 1.82054 (1.82357)	Top-1 acc 41.406 (36.617)	Top-5 acc 63.672 (60.640)	lr 0.02454
Train [11][1330/3239]	Time 2.351 (0.541)	Data Time 0.001 (0.020)	Loss 3.6441 (3.7177)	Entropy 1.82054 (1.82355)	Top-1 acc 37.109 (36.620)	Top-5 acc 59.766 (60.650)	lr 0.02454
Train [11][1340/3239]	Time 0.202 (0.539)	Data Time 0.001 (0.020)	Loss 3.6997 (3.7180)	Entropy 1.82052 (1.82353)	Top-1 acc 35.156 (36.605)	Top-5 acc 58.594 (60.634)	lr 0.02454
Train [11][1350/3239]	Time 0.259 (0.538)	Data Time 0.001 (0.020)	Loss 3.8163 (3.7180)	Entropy 1.82047 (1.82351)	Top-1 acc 35.156 (36.597)	Top-5 acc 56.250 (60.639)	lr 0.02454
Train [11][1360/3239]	Time 0.200 (0.537)	Data Time 0.001 (0.020)	Loss 3.6991 (3.7178)	Entropy 1.82046 (1.82348)	Top-1 acc 38.672 (36.602)	Top-5 acc 61.328 (60.640)	lr 0.02454
Train [11][1370/3239]	Time 0.209 (0.536)	Data Time 0.001 (0.020)	Loss 3.6010 (3.7177)	Entropy 1.82037 (1.82346)	Top-1 acc 35.938 (36.597)	Top-5 acc 62.891 (60.635)	lr 0.02454
Train [11][1380/3239]	Time 0.153 (0.536)	Data Time 0.001 (0.020)	Loss 3.6815 (3.7177)	Entropy 1.82025 (1.82344)	Top-1 acc 41.016 (36.589)	Top-5 acc 60.938 (60.635)	lr 0.02454
Train [11][1390/3239]	Time 0.185 (0.535)	Data Time 0.001 (0.020)	Loss 3.7465 (3.7174)	Entropy 1.82023 (1.82342)	Top-1 acc 39.062 (36.593)	Top-5 acc 62.891 (60.640)	lr 0.02454
Train [11][1400/3239]	Time 0.201 (0.534)	Data Time 0.001 (0.020)	Loss 3.6177 (3.7180)	Entropy 1.82021 (1.82339)	Top-1 acc 38.281 (36.587)	Top-5 acc 63.672 (60.630)	lr 0.02454
Train [11][1410/3239]	Time 0.192 (0.533)	Data Time 0.001 (0.019)	Loss 3.8087 (3.7183)	Entropy 1.82020 (1.82337)	Top-1 acc 35.547 (36.574)	Top-5 acc 57.422 (60.621)	lr 0.02454
Train [11][1420/3239]	Time 0.357 (0.532)	Data Time 0.001 (0.019)	Loss 3.8373 (3.7185)	Entropy 1.82013 (1.82335)	Top-1 acc 31.250 (36.576)	Top-5 acc 57.031 (60.621)	lr 0.02454
Train [11][1430/3239]	Time 0.202 (0.532)	Data Time 0.001 (0.019)	Loss 3.6206 (3.7185)	Entropy 1.82012 (1.82333)	Top-1 acc 34.375 (36.574)	Top-5 acc 61.719 (60.614)	lr 0.02454
Train [11][1440/3239]	Time 2.264 (0.531)	Data Time 0.001 (0.019)	Loss 3.7544 (3.7188)	Entropy 1.82012 (1.82330)	Top-1 acc 33.594 (36.565)	Top-5 acc 61.719 (60.605)	lr 0.02454
Train [11][1450/3239]	Time 0.229 (0.529)	Data Time 0.001 (0.019)	Loss 3.7461 (3.7185)	Entropy 1.82009 (1.82328)	Top-1 acc 34.766 (36.575)	Top-5 acc 60.547 (60.609)	lr 0.02454
Train [11][1460/3239]	Time 0.202 (0.528)	Data Time 0.001 (0.019)	Loss 3.7787 (3.7185)	Entropy 1.81999 (1.82326)	Top-1 acc 39.062 (36.590)	Top-5 acc 58.203 (60.605)	lr 0.02454
Train [11][1470/3239]	Time 0.205 (0.527)	Data Time 0.001 (0.019)	Loss 3.8795 (3.7186)	Entropy 1.81991 (1.82324)	Top-1 acc 35.156 (36.593)	Top-5 acc 57.422 (60.604)	lr 0.02453
Train [11][1480/3239]	Time 0.167 (0.526)	Data Time 0.001 (0.019)	Loss 3.9513 (3.7190)	Entropy 1.81990 (1.82321)	Top-1 acc 35.156 (36.589)	Top-5 acc 56.641 (60.602)	lr 0.02453
Train [11][1490/3239]	Time 0.203 (0.526)	Data Time 0.001 (0.018)	Loss 3.6741 (3.7188)	Entropy 1.81989 (1.82319)	Top-1 acc 39.844 (36.588)	Top-5 acc 59.766 (60.605)	lr 0.02453
Train [11][1500/3239]	Time 0.271 (0.525)	Data Time 0.001 (0.018)	Loss 3.5961 (3.7190)	Entropy 1.81984 (1.82317)	Top-1 acc 42.188 (36.581)	Top-5 acc 62.500 (60.600)	lr 0.02453
Train [11][1510/3239]	Time 0.221 (0.525)	Data Time 0.001 (0.018)	Loss 4.0567 (3.7195)	Entropy 1.81978 (1.82315)	Top-1 acc 30.078 (36.572)	Top-5 acc 52.344 (60.589)	lr 0.02453
Train [11][1520/3239]	Time 0.197 (0.524)	Data Time 0.002 (0.018)	Loss 3.6283 (3.7194)	Entropy 1.81968 (1.82312)	Top-1 acc 36.719 (36.571)	Top-5 acc 59.766 (60.589)	lr 0.02453
Train [11][1530/3239]	Time 0.192 (0.549)	Data Time 0.002 (0.018)	Loss 3.7068 (3.7192)	Entropy 1.81962 (1.82310)	Top-1 acc 36.719 (36.574)	Top-5 acc 60.938 (60.594)	lr 0.02453
Train [11][1540/3239]	Time 0.243 (0.548)	Data Time 0.004 (0.018)	Loss 3.8662 (3.7194)	Entropy 1.81961 (1.82308)	Top-1 acc 36.328 (36.571)	Top-5 acc 54.688 (60.582)	lr 0.02453
Train [11][1550/3239]	Time 2.118 (0.547)	Data Time 0.002 (0.018)	Loss 3.6683 (3.7193)	Entropy 1.81961 (1.82306)	Top-1 acc 37.109 (36.573)	Top-5 acc 63.672 (60.583)	lr 0.02453
Train [11][1560/3239]	Time 0.197 (0.545)	Data Time 0.001 (0.018)	Loss 3.6183 (3.7191)	Entropy 1.81956 (1.82303)	Top-1 acc 38.281 (36.580)	Top-5 acc 63.281 (60.586)	lr 0.02453
Train [11][1570/3239]	Time 0.164 (0.544)	Data Time 0.001 (0.018)	Loss 3.7561 (3.7190)	Entropy 1.81957 (1.82301)	Top-1 acc 33.203 (36.580)	Top-5 acc 57.422 (60.587)	lr 0.02453
Train [11][1580/3239]	Time 0.287 (0.544)	Data Time 0.002 (0.018)	Loss 3.7639 (3.7189)	Entropy 1.81951 (1.82299)	Top-1 acc 36.719 (36.577)	Top-5 acc 58.984 (60.586)	lr 0.02453
Train [11][1590/3239]	Time 0.199 (0.543)	Data Time 0.001 (0.017)	Loss 3.7192 (3.7188)	Entropy 1.81948 (1.82297)	Top-1 acc 37.109 (36.587)	Top-5 acc 62.500 (60.588)	lr 0.02453
Train [11][1600/3239]	Time 0.186 (0.542)	Data Time 0.001 (0.017)	Loss 3.6048 (3.7191)	Entropy 1.81947 (1.82295)	Top-1 acc 39.453 (36.582)	Top-5 acc 65.234 (60.582)	lr 0.02453
Train [11][1610/3239]	Time 0.221 (0.541)	Data Time 0.001 (0.017)	Loss 3.5603 (3.7190)	Entropy 1.81943 (1.82292)	Top-1 acc 37.500 (36.585)	Top-5 acc 64.062 (60.582)	lr 0.02453
Train [11][1620/3239]	Time 0.203 (0.541)	Data Time 0.001 (0.017)	Loss 3.8455 (3.7189)	Entropy 1.81934 (1.82290)	Top-1 acc 34.766 (36.585)	Top-5 acc 58.984 (60.587)	lr 0.02453
Train [11][1630/3239]	Time 0.137 (0.540)	Data Time 0.001 (0.017)	Loss 3.8839 (3.7192)	Entropy 1.81930 (1.82288)	Top-1 acc 35.938 (36.589)	Top-5 acc 58.984 (60.581)	lr 0.02453
Train [11][1640/3239]	Time 0.263 (0.539)	Data Time 0.001 (0.017)	Loss 3.8142 (3.7191)	Entropy 1.81925 (1.82286)	Top-1 acc 37.891 (36.592)	Top-5 acc 57.812 (60.579)	lr 0.02453
Train [11][1650/3239]	Time 0.211 (0.538)	Data Time 0.001 (0.017)	Loss 3.6620 (3.7194)	Entropy 1.81921 (1.82284)	Top-1 acc 36.328 (36.587)	Top-5 acc 60.547 (60.569)	lr 0.02453
Train [11][1660/3239]	Time 2.422 (0.538)	Data Time 0.002 (0.017)	Loss 3.5339 (3.7190)	Entropy 1.81921 (1.82282)	Top-1 acc 40.625 (36.595)	Top-5 acc 65.625 (60.580)	lr 0.02453
Train [11][1670/3239]	Time 0.204 (0.536)	Data Time 0.001 (0.017)	Loss 3.3466 (3.7191)	Entropy 1.81913 (1.82279)	Top-1 acc 44.922 (36.590)	Top-5 acc 67.969 (60.580)	lr 0.02453
Train [11][1680/3239]	Time 0.199 (0.535)	Data Time 0.001 (0.017)	Loss 3.8693 (3.7188)	Entropy 1.81905 (1.82277)	Top-1 acc 32.812 (36.588)	Top-5 acc 54.688 (60.584)	lr 0.02453
Train [11][1690/3239]	Time 0.228 (0.534)	Data Time 0.001 (0.017)	Loss 3.6452 (3.7189)	Entropy 1.81902 (1.82275)	Top-1 acc 36.328 (36.584)	Top-5 acc 62.109 (60.582)	lr 0.02453
Train [11][1700/3239]	Time 0.196 (0.534)	Data Time 0.001 (0.016)	Loss 4.0080 (3.7188)	Entropy 1.81898 (1.82273)	Top-1 acc 31.641 (36.583)	Top-5 acc 54.688 (60.585)	lr 0.02453
Train [11][1710/3239]	Time 0.204 (0.533)	Data Time 0.001 (0.016)	Loss 3.7741 (3.7190)	Entropy 1.81890 (1.82270)	Top-1 acc 33.203 (36.581)	Top-5 acc 60.156 (60.586)	lr 0.02453
Train [11][1720/3239]	Time 0.211 (0.532)	Data Time 0.001 (0.016)	Loss 3.7103 (3.7188)	Entropy 1.81888 (1.82268)	Top-1 acc 34.766 (36.588)	Top-5 acc 60.938 (60.587)	lr 0.02453
Train [11][1730/3239]	Time 0.151 (0.532)	Data Time 0.001 (0.016)	Loss 3.9372 (3.7189)	Entropy 1.81881 (1.82266)	Top-1 acc 32.422 (36.588)	Top-5 acc 59.766 (60.592)	lr 0.02453
Train [11][1740/3239]	Time 0.230 (0.531)	Data Time 0.001 (0.016)	Loss 3.9300 (3.7188)	Entropy 1.81878 (1.82264)	Top-1 acc 30.859 (36.584)	Top-5 acc 55.859 (60.595)	lr 0.02453
Train [11][1750/3239]	Time 0.354 (0.530)	Data Time 0.001 (0.016)	Loss 3.8191 (3.7188)	Entropy 1.81875 (1.82262)	Top-1 acc 34.375 (36.588)	Top-5 acc 58.984 (60.599)	lr 0.02453
Train [11][1760/3239]	Time 0.208 (0.530)	Data Time 0.001 (0.016)	Loss 3.6585 (3.7185)	Entropy 1.81868 (1.82259)	Top-1 acc 37.891 (36.591)	Top-5 acc 58.984 (60.601)	lr 0.02453
Train [11][1770/3239]	Time 2.278 (0.529)	Data Time 0.002 (0.016)	Loss 3.7564 (3.7185)	Entropy 1.81868 (1.82257)	Top-1 acc 34.766 (36.594)	Top-5 acc 60.547 (60.600)	lr 0.02453
Train [11][1780/3239]	Time 0.218 (0.527)	Data Time 0.001 (0.016)	Loss 3.7230 (3.7186)	Entropy 1.81858 (1.82255)	Top-1 acc 38.281 (36.590)	Top-5 acc 64.062 (60.600)	lr 0.02453
Train [11][1790/3239]	Time 0.226 (0.527)	Data Time 0.002 (0.016)	Loss 3.5564 (3.7187)	Entropy 1.81854 (1.82253)	Top-1 acc 39.062 (36.592)	Top-5 acc 65.625 (60.596)	lr 0.02453
Train [11][1800/3239]	Time 0.206 (0.526)	Data Time 0.001 (0.016)	Loss 3.7774 (3.7189)	Entropy 1.81845 (1.82250)	Top-1 acc 37.891 (36.588)	Top-5 acc 61.328 (60.595)	lr 0.02453
Train [11][1810/3239]	Time 0.188 (0.526)	Data Time 0.001 (0.016)	Loss 3.7299 (3.7186)	Entropy 1.81839 (1.82248)	Top-1 acc 34.375 (36.596)	Top-5 acc 60.938 (60.600)	lr 0.02453
Train [11][1820/3239]	Time 0.217 (0.525)	Data Time 0.001 (0.016)	Loss 3.7216 (3.7189)	Entropy 1.81831 (1.82246)	Top-1 acc 33.594 (36.594)	Top-5 acc 60.938 (60.590)	lr 0.02453
Train [11][1830/3239]	Time 0.198 (0.524)	Data Time 0.001 (0.015)	Loss 3.5444 (3.7186)	Entropy 1.81827 (1.82244)	Top-1 acc 39.453 (36.603)	Top-5 acc 66.406 (60.591)	lr 0.02452
Train [11][1840/3239]	Time 0.338 (0.524)	Data Time 0.001 (0.015)	Loss 3.8246 (3.7188)	Entropy 1.81822 (1.82241)	Top-1 acc 34.766 (36.605)	Top-5 acc 58.984 (60.585)	lr 0.02452
Train [11][1850/3239]	Time 0.183 (0.524)	Data Time 0.001 (0.015)	Loss 3.9286 (3.7190)	Entropy 1.81820 (1.82239)	Top-1 acc 31.641 (36.604)	Top-5 acc 53.125 (60.581)	lr 0.02452
Train [11][1860/3239]	Time 0.199 (0.523)	Data Time 0.001 (0.015)	Loss 3.5401 (3.7190)	Entropy 1.81822 (1.82237)	Top-1 acc 42.188 (36.612)	Top-5 acc 66.016 (60.579)	lr 0.02452
Train [11][1870/3239]	Time 0.180 (0.522)	Data Time 0.001 (0.015)	Loss 3.9271 (3.7191)	Entropy 1.81805 (1.82235)	Top-1 acc 33.203 (36.605)	Top-5 acc 55.078 (60.573)	lr 0.02452
Train [11][1880/3239]	Time 2.216 (0.522)	Data Time 0.001 (0.015)	Loss 3.6785 (3.7194)	Entropy 1.81805 (1.82232)	Top-1 acc 37.891 (36.602)	Top-5 acc 61.328 (60.566)	lr 0.02452
Train [11][1890/3239]	Time 0.255 (0.520)	Data Time 0.001 (0.015)	Loss 3.6792 (3.7193)	Entropy 1.81797 (1.82230)	Top-1 acc 41.016 (36.610)	Top-5 acc 60.938 (60.570)	lr 0.02452
Train [11][1900/3239]	Time 0.273 (0.538)	Data Time 0.003 (0.015)	Loss 3.9811 (3.7193)	Entropy 1.81796 (1.82228)	Top-1 acc 31.250 (36.605)	Top-5 acc 58.594 (60.572)	lr 0.02452
Train [11][1910/3239]	Time 0.247 (0.538)	Data Time 0.002 (0.015)	Loss 3.6880 (3.7193)	Entropy 1.81790 (1.82225)	Top-1 acc 34.766 (36.599)	Top-5 acc 62.109 (60.572)	lr 0.02452
Train [11][1920/3239]	Time 0.291 (0.537)	Data Time 0.001 (0.015)	Loss 3.5259 (3.7192)	Entropy 1.81788 (1.82223)	Top-1 acc 38.281 (36.601)	Top-5 acc 66.016 (60.572)	lr 0.02452
Train [11][1930/3239]	Time 0.198 (0.537)	Data Time 0.001 (0.015)	Loss 4.0003 (3.7196)	Entropy 1.81785 (1.82221)	Top-1 acc 33.984 (36.595)	Top-5 acc 58.203 (60.561)	lr 0.02452
Train [11][1940/3239]	Time 0.181 (0.536)	Data Time 0.001 (0.015)	Loss 3.6246 (3.7194)	Entropy 1.81778 (1.82219)	Top-1 acc 39.062 (36.600)	Top-5 acc 60.938 (60.560)	lr 0.02452
Train [11][1950/3239]	Time 0.224 (0.536)	Data Time 0.001 (0.015)	Loss 3.6073 (3.7194)	Entropy 1.81775 (1.82216)	Top-1 acc 39.062 (36.600)	Top-5 acc 64.062 (60.558)	lr 0.02452
Train [11][1960/3239]	Time 0.209 (0.535)	Data Time 0.001 (0.015)	Loss 3.5635 (3.7190)	Entropy 1.81772 (1.82214)	Top-1 acc 34.766 (36.601)	Top-5 acc 65.234 (60.567)	lr 0.02452
Train [11][1970/3239]	Time 0.199 (0.534)	Data Time 0.001 (0.014)	Loss 3.8293 (3.7192)	Entropy 1.81765 (1.82212)	Top-1 acc 32.812 (36.598)	Top-5 acc 57.422 (60.564)	lr 0.02452
Train [11][1980/3239]	Time 0.223 (0.534)	Data Time 0.001 (0.014)	Loss 3.6593 (3.7190)	Entropy 1.81755 (1.82210)	Top-1 acc 39.844 (36.597)	Top-5 acc 60.547 (60.565)	lr 0.02452
Train [11][1990/3239]	Time 2.216 (0.533)	Data Time 0.002 (0.014)	Loss 3.8894 (3.7193)	Entropy 1.81755 (1.82207)	Top-1 acc 34.375 (36.587)	Top-5 acc 53.906 (60.557)	lr 0.02452
Train [11][2000/3239]	Time 0.357 (0.532)	Data Time 0.001 (0.014)	Loss 3.6994 (3.7194)	Entropy 1.81745 (1.82205)	Top-1 acc 38.281 (36.584)	Top-5 acc 60.547 (60.551)	lr 0.02452
Train [11][2010/3239]	Time 0.225 (0.531)	Data Time 0.001 (0.014)	Loss 3.7426 (3.7194)	Entropy 1.81732 (1.82203)	Top-1 acc 37.500 (36.585)	Top-5 acc 55.469 (60.550)	lr 0.02452
Train [11][2020/3239]	Time 0.254 (0.531)	Data Time 0.001 (0.014)	Loss 3.7512 (3.7193)	Entropy 1.81727 (1.82200)	Top-1 acc 37.891 (36.588)	Top-5 acc 59.375 (60.552)	lr 0.02452
Train [11][2030/3239]	Time 0.208 (0.530)	Data Time 0.001 (0.014)	Loss 3.6140 (3.7190)	Entropy 1.81720 (1.82198)	Top-1 acc 40.234 (36.592)	Top-5 acc 61.719 (60.554)	lr 0.02452
Train [11][2040/3239]	Time 0.206 (0.530)	Data Time 0.001 (0.014)	Loss 3.6221 (3.7191)	Entropy 1.81717 (1.82196)	Top-1 acc 40.234 (36.590)	Top-5 acc 62.891 (60.549)	lr 0.02452
Train [11][2050/3239]	Time 0.164 (0.529)	Data Time 0.001 (0.014)	Loss 3.8273 (3.7194)	Entropy 1.81704 (1.82193)	Top-1 acc 33.984 (36.587)	Top-5 acc 58.984 (60.545)	lr 0.02452
Train [11][2060/3239]	Time 0.182 (0.529)	Data Time 0.001 (0.014)	Loss 3.7440 (3.7190)	Entropy 1.81702 (1.82191)	Top-1 acc 38.281 (36.597)	Top-5 acc 57.422 (60.550)	lr 0.02452
Train [11][2070/3239]	Time 0.331 (0.528)	Data Time 0.001 (0.014)	Loss 3.7228 (3.7192)	Entropy 1.81687 (1.82189)	Top-1 acc 35.547 (36.594)	Top-5 acc 58.984 (60.547)	lr 0.02452
Train [11][2080/3239]	Time 0.132 (0.528)	Data Time 0.001 (0.014)	Loss 3.6702 (3.7191)	Entropy 1.81681 (1.82186)	Top-1 acc 37.500 (36.592)	Top-5 acc 60.938 (60.545)	lr 0.02452
Train [11][2090/3239]	Time 0.143 (0.527)	Data Time 0.001 (0.014)	Loss 3.6408 (3.7191)	Entropy 1.81679 (1.82184)	Top-1 acc 41.016 (36.584)	Top-5 acc 64.453 (60.546)	lr 0.02452
Train [11][2100/3239]	Time 2.408 (0.527)	Data Time 0.001 (0.014)	Loss 3.6988 (3.7188)	Entropy 1.81679 (1.82181)	Top-1 acc 35.938 (36.586)	Top-5 acc 59.766 (60.547)	lr 0.02452
Train [11][2110/3239]	Time 0.204 (0.525)	Data Time 0.001 (0.014)	Loss 3.7792 (3.7186)	Entropy 1.81675 (1.82179)	Top-1 acc 33.594 (36.587)	Top-5 acc 58.594 (60.550)	lr 0.02452
Train [11][2120/3239]	Time 0.233 (0.525)	Data Time 0.002 (0.014)	Loss 3.7518 (3.7187)	Entropy 1.81674 (1.82177)	Top-1 acc 36.328 (36.588)	Top-5 acc 60.547 (60.549)	lr 0.02452
Train [11][2130/3239]	Time 0.209 (0.524)	Data Time 0.001 (0.014)	Loss 3.6071 (3.7190)	Entropy 1.81659 (1.82174)	Top-1 acc 38.672 (36.582)	Top-5 acc 65.234 (60.543)	lr 0.02452
Train [11][2140/3239]	Time 0.240 (0.524)	Data Time 0.001 (0.014)	Loss 3.8014 (3.7188)	Entropy 1.81654 (1.82172)	Top-1 acc 33.203 (36.582)	Top-5 acc 57.812 (60.543)	lr 0.02452
Train [11][2150/3239]	Time 0.209 (0.523)	Data Time 0.001 (0.013)	Loss 3.6923 (3.7186)	Entropy 1.81651 (1.82169)	Top-1 acc 39.453 (36.584)	Top-5 acc 60.938 (60.549)	lr 0.02452
Train [11][2160/3239]	Time 0.357 (0.523)	Data Time 0.001 (0.013)	Loss 3.8948 (3.7188)	Entropy 1.81646 (1.82167)	Top-1 acc 32.031 (36.580)	Top-5 acc 55.859 (60.544)	lr 0.02452
Train [11][2170/3239]	Time 0.163 (0.523)	Data Time 0.001 (0.013)	Loss 3.5505 (3.7187)	Entropy 1.81638 (1.82164)	Top-1 acc 39.844 (36.581)	Top-5 acc 67.578 (60.545)	lr 0.02452
Train [11][2180/3239]	Time 0.202 (0.522)	Data Time 0.001 (0.013)	Loss 3.7493 (3.7187)	Entropy 1.81635 (1.82162)	Top-1 acc 35.938 (36.582)	Top-5 acc 62.891 (60.543)	lr 0.02452
Train [11][2190/3239]	Time 0.214 (0.522)	Data Time 0.001 (0.013)	Loss 3.6301 (3.7186)	Entropy 1.81630 (1.82160)	Top-1 acc 40.234 (36.588)	Top-5 acc 60.547 (60.547)	lr 0.02451
Train [11][2200/3239]	Time 0.204 (0.521)	Data Time 0.001 (0.013)	Loss 3.7995 (3.7185)	Entropy 1.81626 (1.82157)	Top-1 acc 37.109 (36.592)	Top-5 acc 60.938 (60.545)	lr 0.02451
Train [11][2210/3239]	Time 2.227 (0.521)	Data Time 0.001 (0.013)	Loss 3.5752 (3.7182)	Entropy 1.81626 (1.82155)	Top-1 acc 35.547 (36.596)	Top-5 acc 63.281 (60.555)	lr 0.02451
Train [11][2220/3239]	Time 0.214 (0.520)	Data Time 0.001 (0.013)	Loss 3.8601 (3.7183)	Entropy 1.81618 (1.82152)	Top-1 acc 35.547 (36.591)	Top-5 acc 58.984 (60.553)	lr 0.02451
Train [11][2230/3239]	Time 0.225 (0.519)	Data Time 0.001 (0.013)	Loss 3.9374 (3.7185)	Entropy 1.81603 (1.82150)	Top-1 acc 31.250 (36.588)	Top-5 acc 57.422 (60.552)	lr 0.02451
Train [11][2240/3239]	Time 0.341 (0.519)	Data Time 0.001 (0.013)	Loss 3.8815 (3.7184)	Entropy 1.81600 (1.82147)	Top-1 acc 32.422 (36.589)	Top-5 acc 54.297 (60.554)	lr 0.02451
Train [11][2250/3239]	Time 0.223 (0.518)	Data Time 0.002 (0.013)	Loss 3.6917 (3.7185)	Entropy 1.81595 (1.82145)	Top-1 acc 38.281 (36.588)	Top-5 acc 58.984 (60.550)	lr 0.02451
Train [11][2260/3239]	Time 0.350 (0.534)	Data Time 0.003 (0.013)	Loss 3.6587 (3.7186)	Entropy 1.81586 (1.82143)	Top-1 acc 40.625 (36.580)	Top-5 acc 64.062 (60.545)	lr 0.02451
Train [11][2270/3239]	Time 0.230 (0.534)	Data Time 0.002 (0.013)	Loss 3.4931 (3.7183)	Entropy 1.81582 (1.82140)	Top-1 acc 40.234 (36.582)	Top-5 acc 64.453 (60.553)	lr 0.02451
Train [11][2280/3239]	Time 0.194 (0.533)	Data Time 0.002 (0.013)	Loss 3.7390 (3.7184)	Entropy 1.81574 (1.82138)	Top-1 acc 38.672 (36.580)	Top-5 acc 62.109 (60.552)	lr 0.02451
Train [11][2290/3239]	Time 0.221 (0.533)	Data Time 0.001 (0.013)	Loss 3.7173 (3.7186)	Entropy 1.81570 (1.82135)	Top-1 acc 39.062 (36.580)	Top-5 acc 62.109 (60.551)	lr 0.02451
Train [11][2300/3239]	Time 0.204 (0.532)	Data Time 0.001 (0.013)	Loss 3.7927 (3.7188)	Entropy 1.81558 (1.82133)	Top-1 acc 35.156 (36.573)	Top-5 acc 62.109 (60.548)	lr 0.02451
Train [11][2310/3239]	Time 0.211 (0.532)	Data Time 0.001 (0.013)	Loss 3.6113 (3.7187)	Entropy 1.81555 (1.82130)	Top-1 acc 42.188 (36.573)	Top-5 acc 60.938 (60.549)	lr 0.02451
Train [11][2320/3239]	Time 2.203 (0.531)	Data Time 0.002 (0.013)	Loss 3.4399 (3.7188)	Entropy 1.81555 (1.82128)	Top-1 acc 39.453 (36.574)	Top-5 acc 65.625 (60.549)	lr 0.02451
Train [11][2330/3239]	Time 0.187 (0.530)	Data Time 0.001 (0.013)	Loss 4.0273 (3.7187)	Entropy 1.81552 (1.82125)	Top-1 acc 28.906 (36.579)	Top-5 acc 54.297 (60.551)	lr 0.02451
Train [11][2340/3239]	Time 0.314 (0.529)	Data Time 0.001 (0.013)	Loss 3.7724 (3.7192)	Entropy 1.81550 (1.82123)	Top-1 acc 35.938 (36.568)	Top-5 acc 60.156 (60.542)	lr 0.02451
Train [11][2350/3239]	Time 0.189 (0.529)	Data Time 0.001 (0.012)	Loss 3.5229 (3.7188)	Entropy 1.81548 (1.82120)	Top-1 acc 40.625 (36.575)	Top-5 acc 64.062 (60.550)	lr 0.02451
Train [11][2360/3239]	Time 0.222 (0.528)	Data Time 0.001 (0.012)	Loss 3.6324 (3.7188)	Entropy 1.81544 (1.82118)	Top-1 acc 36.719 (36.573)	Top-5 acc 63.281 (60.551)	lr 0.02451
Train [11][2370/3239]	Time 0.212 (0.528)	Data Time 0.002 (0.012)	Loss 3.6190 (3.7185)	Entropy 1.81541 (1.82116)	Top-1 acc 37.500 (36.579)	Top-5 acc 62.891 (60.554)	lr 0.02451
Train [11][2380/3239]	Time 0.199 (0.527)	Data Time 0.001 (0.012)	Loss 3.5936 (3.7185)	Entropy 1.81539 (1.82113)	Top-1 acc 39.844 (36.580)	Top-5 acc 64.453 (60.558)	lr 0.02451
Train [11][2390/3239]	Time 0.197 (0.527)	Data Time 0.001 (0.012)	Loss 3.8425 (3.7185)	Entropy 1.81533 (1.82111)	Top-1 acc 35.547 (36.579)	Top-5 acc 57.812 (60.559)	lr 0.02451
Train [11][2400/3239]	Time 0.214 (0.527)	Data Time 0.001 (0.012)	Loss 3.4272 (3.7184)	Entropy 1.81530 (1.82108)	Top-1 acc 41.016 (36.581)	Top-5 acc 65.625 (60.566)	lr 0.02451
Train [11][2410/3239]	Time 0.263 (0.526)	Data Time 0.001 (0.012)	Loss 3.4641 (3.7183)	Entropy 1.81527 (1.82106)	Top-1 acc 40.625 (36.580)	Top-5 acc 63.281 (60.571)	lr 0.02451
Train [11][2420/3239]	Time 0.230 (0.526)	Data Time 0.002 (0.012)	Loss 3.6972 (3.7181)	Entropy 1.81521 (1.82103)	Top-1 acc 32.031 (36.580)	Top-5 acc 62.109 (60.573)	lr 0.02451
Train [11][2430/3239]	Time 2.376 (0.526)	Data Time 0.001 (0.012)	Loss 3.8511 (3.7180)	Entropy 1.81521 (1.82101)	Top-1 acc 33.594 (36.578)	Top-5 acc 60.156 (60.575)	lr 0.02451
Train [11][2440/3239]	Time 0.176 (0.524)	Data Time 0.001 (0.012)	Loss 3.8768 (3.7181)	Entropy 1.81521 (1.82099)	Top-1 acc 32.812 (36.579)	Top-5 acc 57.422 (60.573)	lr 0.02451
Train [11][2450/3239]	Time 0.156 (0.524)	Data Time 0.001 (0.012)	Loss 3.7534 (3.7182)	Entropy 1.81520 (1.82096)	Top-1 acc 35.938 (36.577)	Top-5 acc 56.641 (60.571)	lr 0.02451
Train [11][2460/3239]	Time 0.218 (0.523)	Data Time 0.001 (0.012)	Loss 3.6785 (3.7179)	Entropy 1.81513 (1.82094)	Top-1 acc 37.500 (36.582)	Top-5 acc 62.500 (60.578)	lr 0.02451
Train [11][2470/3239]	Time 0.222 (0.523)	Data Time 0.001 (0.012)	Loss 3.7206 (3.7180)	Entropy 1.81509 (1.82092)	Top-1 acc 38.281 (36.581)	Top-5 acc 60.156 (60.576)	lr 0.02451
Train [11][2480/3239]	Time 0.187 (0.523)	Data Time 0.001 (0.012)	Loss 3.9466 (3.7180)	Entropy 1.81509 (1.82089)	Top-1 acc 32.812 (36.581)	Top-5 acc 56.250 (60.577)	lr 0.02451
Train [11][2490/3239]	Time 0.293 (0.522)	Data Time 0.001 (0.012)	Loss 3.9312 (3.7183)	Entropy 1.81505 (1.82087)	Top-1 acc 35.547 (36.576)	Top-5 acc 54.688 (60.571)	lr 0.02451
Train [11][2500/3239]	Time 0.174 (0.522)	Data Time 0.001 (0.012)	Loss 3.8064 (3.7181)	Entropy 1.81502 (1.82085)	Top-1 acc 35.938 (36.581)	Top-5 acc 57.812 (60.576)	lr 0.02451
Train [11][2510/3239]	Time 0.226 (0.521)	Data Time 0.001 (0.012)	Loss 3.6039 (3.7179)	Entropy 1.81495 (1.82082)	Top-1 acc 36.719 (36.585)	Top-5 acc 65.234 (60.581)	lr 0.02451
Train [11][2520/3239]	Time 0.220 (0.521)	Data Time 0.001 (0.012)	Loss 3.6439 (3.7180)	Entropy 1.81495 (1.82080)	Top-1 acc 37.500 (36.585)	Top-5 acc 60.938 (60.579)	lr 0.02451
Train [11][2530/3239]	Time 0.217 (0.521)	Data Time 0.001 (0.012)	Loss 3.5922 (3.7179)	Entropy 1.81486 (1.82078)	Top-1 acc 41.016 (36.587)	Top-5 acc 65.234 (60.580)	lr 0.02451
Train [11][2540/3239]	Time 2.243 (0.520)	Data Time 0.002 (0.012)	Loss 3.8338 (3.7177)	Entropy 1.81486 (1.82075)	Top-1 acc 36.328 (36.587)	Top-5 acc 57.812 (60.581)	lr 0.02451
Train [11][2550/3239]	Time 0.147 (0.519)	Data Time 0.002 (0.012)	Loss 3.5998 (3.7178)	Entropy 1.81479 (1.82073)	Top-1 acc 41.406 (36.587)	Top-5 acc 63.672 (60.581)	lr 0.02450
Train [11][2560/3239]	Time 0.191 (0.519)	Data Time 0.001 (0.012)	Loss 3.6474 (3.7176)	Entropy 1.81468 (1.82071)	Top-1 acc 36.328 (36.590)	Top-5 acc 64.062 (60.582)	lr 0.02450
Train [11][2570/3239]	Time 0.262 (0.518)	Data Time 0.001 (0.012)	Loss 3.6857 (3.7176)	Entropy 1.81462 (1.82068)	Top-1 acc 37.109 (36.590)	Top-5 acc 61.719 (60.581)	lr 0.02450
Train [11][2580/3239]	Time 0.339 (0.518)	Data Time 0.001 (0.012)	Loss 3.5254 (3.7171)	Entropy 1.81456 (1.82066)	Top-1 acc 41.797 (36.601)	Top-5 acc 65.234 (60.594)	lr 0.02450
Train [11][2590/3239]	Time 0.219 (0.518)	Data Time 0.001 (0.012)	Loss 3.6823 (3.7170)	Entropy 1.81452 (1.82064)	Top-1 acc 35.156 (36.602)	Top-5 acc 60.938 (60.595)	lr 0.02450
Train [11][2600/3239]	Time 0.208 (0.517)	Data Time 0.001 (0.012)	Loss 3.5052 (3.7168)	Entropy 1.81448 (1.82061)	Top-1 acc 37.891 (36.607)	Top-5 acc 62.109 (60.598)	lr 0.02450
Train [11][2610/3239]	Time 0.214 (0.517)	Data Time 0.001 (0.011)	Loss 3.7494 (3.7168)	Entropy 1.81446 (1.82059)	Top-1 acc 35.938 (36.605)	Top-5 acc 58.594 (60.597)	lr 0.02450
Train [11][2620/3239]	Time 0.182 (0.531)	Data Time 0.002 (0.011)	Loss 3.9928 (3.7170)	Entropy 1.81440 (1.82056)	Top-1 acc 33.984 (36.606)	Top-5 acc 51.562 (60.591)	lr 0.02450
Train [11][2630/3239]	Time 0.219 (0.531)	Data Time 0.003 (0.011)	Loss 3.8006 (3.7168)	Entropy 1.81438 (1.82054)	Top-1 acc 32.812 (36.612)	Top-5 acc 60.156 (60.595)	lr 0.02450
Train [11][2640/3239]	Time 0.222 (0.531)	Data Time 0.003 (0.011)	Loss 3.6433 (3.7168)	Entropy 1.81424 (1.82052)	Top-1 acc 38.281 (36.611)	Top-5 acc 61.719 (60.595)	lr 0.02450
Train [11][2650/3239]	Time 0.242 (0.530)	Data Time 0.001 (0.011)	Loss 3.8726 (3.7170)	Entropy 1.81421 (1.82049)	Top-1 acc 33.594 (36.609)	Top-5 acc 56.641 (60.594)	lr 0.02450
Train [11][2660/3239]	Time 0.204 (0.530)	Data Time 0.002 (0.011)	Loss 3.6978 (3.7170)	Entropy 1.81410 (1.82047)	Top-1 acc 35.547 (36.607)	Top-5 acc 60.547 (60.593)	lr 0.02450
Train [11][2670/3239]	Time 0.242 (0.530)	Data Time 0.001 (0.011)	Loss 3.4405 (3.7168)	Entropy 1.81406 (1.82045)	Top-1 acc 46.875 (36.609)	Top-5 acc 67.578 (60.596)	lr 0.02450
Train [11][2680/3239]	Time 0.209 (0.529)	Data Time 0.001 (0.011)	Loss 3.8086 (3.7168)	Entropy 1.81402 (1.82042)	Top-1 acc 37.891 (36.610)	Top-5 acc 58.203 (60.597)	lr 0.02450
Train [11][2690/3239]	Time 0.207 (0.529)	Data Time 0.001 (0.011)	Loss 3.9639 (3.7168)	Entropy 1.81398 (1.82040)	Top-1 acc 33.203 (36.614)	Top-5 acc 57.422 (60.600)	lr 0.02450
Train [11][2700/3239]	Time 0.231 (0.528)	Data Time 0.001 (0.011)	Loss 3.7683 (3.7167)	Entropy 1.81391 (1.82038)	Top-1 acc 35.156 (36.616)	Top-5 acc 58.594 (60.601)	lr 0.02450
Train [11][2710/3239]	Time 0.184 (0.528)	Data Time 0.002 (0.011)	Loss 3.5082 (3.7164)	Entropy 1.81392 (1.82035)	Top-1 acc 40.625 (36.624)	Top-5 acc 61.719 (60.604)	lr 0.02450
Train [11][2720/3239]	Time 0.262 (0.528)	Data Time 0.001 (0.011)	Loss 3.4847 (3.7162)	Entropy 1.81386 (1.82033)	Top-1 acc 41.797 (36.625)	Top-5 acc 67.188 (60.611)	lr 0.02450
Train [11][2730/3239]	Time 0.355 (0.527)	Data Time 0.001 (0.011)	Loss 3.7239 (3.7161)	Entropy 1.81374 (1.82030)	Top-1 acc 36.328 (36.627)	Top-5 acc 60.547 (60.613)	lr 0.02450
Train [11][2740/3239]	Time 0.200 (0.527)	Data Time 0.001 (0.011)	Loss 3.8764 (3.7160)	Entropy 1.81370 (1.82028)	Top-1 acc 34.375 (36.631)	Top-5 acc 57.422 (60.620)	lr 0.02450
Train [11][2750/3239]	Time 0.178 (0.526)	Data Time 0.001 (0.011)	Loss 3.8517 (3.7159)	Entropy 1.81362 (1.82026)	Top-1 acc 33.203 (36.638)	Top-5 acc 60.156 (60.624)	lr 0.02450
Train [11][2760/3239]	Time 0.228 (0.526)	Data Time 0.001 (0.011)	Loss 3.7715 (3.7157)	Entropy 1.81352 (1.82023)	Top-1 acc 32.812 (36.641)	Top-5 acc 60.156 (60.629)	lr 0.02450
Train [11][2770/3239]	Time 0.227 (0.526)	Data Time 0.001 (0.011)	Loss 3.9227 (3.7158)	Entropy 1.81347 (1.82021)	Top-1 acc 33.984 (36.639)	Top-5 acc 57.812 (60.630)	lr 0.02450
Train [11][2780/3239]	Time 0.250 (0.525)	Data Time 0.001 (0.011)	Loss 3.8121 (3.7156)	Entropy 1.81344 (1.82018)	Top-1 acc 33.594 (36.641)	Top-5 acc 58.594 (60.635)	lr 0.02450
Train [11][2790/3239]	Time 0.238 (0.525)	Data Time 0.001 (0.011)	Loss 3.7852 (3.7152)	Entropy 1.81340 (1.82016)	Top-1 acc 33.984 (36.655)	Top-5 acc 55.859 (60.639)	lr 0.02450
Train [11][2800/3239]	Time 0.261 (0.524)	Data Time 0.001 (0.011)	Loss 3.9498 (3.7155)	Entropy 1.81340 (1.82014)	Top-1 acc 30.078 (36.646)	Top-5 acc 55.469 (60.632)	lr 0.02450
Train [11][2810/3239]	Time 0.312 (0.524)	Data Time 0.001 (0.011)	Loss 3.5040 (3.7152)	Entropy 1.81330 (1.82011)	Top-1 acc 37.500 (36.647)	Top-5 acc 64.844 (60.638)	lr 0.02450
Train [11][2820/3239]	Time 0.253 (0.524)	Data Time 0.001 (0.011)	Loss 3.8707 (3.7152)	Entropy 1.81330 (1.82009)	Top-1 acc 36.328 (36.649)	Top-5 acc 58.984 (60.639)	lr 0.02450
Train [11][2830/3239]	Time 0.209 (0.523)	Data Time 0.001 (0.011)	Loss 3.7013 (3.7153)	Entropy 1.81330 (1.82006)	Top-1 acc 42.969 (36.649)	Top-5 acc 59.766 (60.637)	lr 0.02450
Train [11][2840/3239]	Time 0.221 (0.523)	Data Time 0.002 (0.011)	Loss 3.8024 (3.7154)	Entropy 1.81326 (1.82004)	Top-1 acc 35.156 (36.645)	Top-5 acc 57.812 (60.634)	lr 0.02450
Train [11][2850/3239]	Time 0.196 (0.523)	Data Time 0.001 (0.011)	Loss 3.7458 (3.7152)	Entropy 1.81326 (1.82002)	Top-1 acc 37.500 (36.648)	Top-5 acc 61.719 (60.640)	lr 0.02450
Train [11][2860/3239]	Time 0.222 (0.522)	Data Time 0.001 (0.011)	Loss 3.6414 (3.7153)	Entropy 1.81321 (1.81999)	Top-1 acc 37.891 (36.650)	Top-5 acc 66.016 (60.641)	lr 0.02450
Train [11][2870/3239]	Time 0.200 (0.522)	Data Time 0.002 (0.011)	Loss 3.7917 (3.7152)	Entropy 1.81317 (1.81997)	Top-1 acc 35.156 (36.654)	Top-5 acc 60.547 (60.641)	lr 0.02450
Train [11][2880/3239]	Time 0.221 (0.522)	Data Time 0.002 (0.011)	Loss 3.4690 (3.7151)	Entropy 1.81309 (1.81994)	Top-1 acc 39.844 (36.658)	Top-5 acc 66.797 (60.643)	lr 0.02450
Train [11][2890/3239]	Time 0.331 (0.521)	Data Time 0.001 (0.011)	Loss 3.7247 (3.7150)	Entropy 1.81307 (1.81992)	Top-1 acc 37.109 (36.656)	Top-5 acc 61.719 (60.642)	lr 0.02450
Train [11][2900/3239]	Time 0.206 (0.521)	Data Time 0.002 (0.011)	Loss 3.5415 (3.7151)	Entropy 1.81301 (1.81990)	Top-1 acc 42.188 (36.653)	Top-5 acc 64.453 (60.640)	lr 0.02449
Train [11][2910/3239]	Time 0.226 (0.521)	Data Time 0.001 (0.011)	Loss 3.5132 (3.7148)	Entropy 1.81297 (1.81987)	Top-1 acc 40.625 (36.661)	Top-5 acc 62.891 (60.641)	lr 0.02449
Train [11][2920/3239]	Time 0.262 (0.520)	Data Time 0.001 (0.010)	Loss 3.5642 (3.7146)	Entropy 1.81296 (1.81985)	Top-1 acc 39.062 (36.668)	Top-5 acc 61.719 (60.648)	lr 0.02449
Train [11][2930/3239]	Time 0.207 (0.520)	Data Time 0.001 (0.010)	Loss 3.7063 (3.7147)	Entropy 1.81293 (1.81983)	Top-1 acc 38.281 (36.661)	Top-5 acc 58.594 (60.645)	lr 0.02449
Train [11][2940/3239]	Time 0.214 (0.520)	Data Time 0.001 (0.010)	Loss 3.4535 (3.7147)	Entropy 1.81289 (1.81980)	Top-1 acc 43.750 (36.663)	Top-5 acc 67.578 (60.648)	lr 0.02449
Train [11][2950/3239]	Time 0.265 (0.531)	Data Time 0.004 (0.010)	Loss 3.5329 (3.7148)	Entropy 1.81289 (1.81978)	Top-1 acc 41.016 (36.658)	Top-5 acc 65.234 (60.646)	lr 0.02449
Train [11][2960/3239]	Time 0.200 (0.532)	Data Time 0.002 (0.010)	Loss 3.6046 (3.7146)	Entropy 1.81280 (1.81976)	Top-1 acc 39.062 (36.659)	Top-5 acc 66.406 (60.652)	lr 0.02449
Train [11][2970/3239]	Time 0.247 (0.531)	Data Time 0.002 (0.010)	Loss 3.6546 (3.7146)	Entropy 1.81273 (1.81973)	Top-1 acc 32.812 (36.659)	Top-5 acc 61.719 (60.653)	lr 0.02449
Train [11][2980/3239]	Time 0.361 (0.531)	Data Time 0.002 (0.010)	Loss 3.7361 (3.7146)	Entropy 1.81274 (1.81971)	Top-1 acc 41.016 (36.662)	Top-5 acc 60.938 (60.655)	lr 0.02449
Train [11][2990/3239]	Time 0.211 (0.531)	Data Time 0.001 (0.010)	Loss 3.6420 (3.7145)	Entropy 1.81273 (1.81969)	Top-1 acc 39.062 (36.664)	Top-5 acc 60.938 (60.654)	lr 0.02449
Train [11][3000/3239]	Time 0.246 (0.530)	Data Time 0.001 (0.010)	Loss 3.8861 (3.7144)	Entropy 1.81264 (1.81966)	Top-1 acc 37.109 (36.666)	Top-5 acc 59.766 (60.654)	lr 0.02449
Train [11][3010/3239]	Time 0.222 (0.530)	Data Time 0.001 (0.010)	Loss 3.8038 (3.7145)	Entropy 1.81260 (1.81964)	Top-1 acc 35.938 (36.665)	Top-5 acc 57.812 (60.650)	lr 0.02449
Train [11][3020/3239]	Time 0.194 (0.530)	Data Time 0.001 (0.010)	Loss 3.6647 (3.7145)	Entropy 1.81260 (1.81962)	Top-1 acc 39.062 (36.665)	Top-5 acc 58.203 (60.647)	lr 0.02449
Train [11][3030/3239]	Time 0.217 (0.529)	Data Time 0.001 (0.010)	Loss 3.6107 (3.7144)	Entropy 1.81255 (1.81959)	Top-1 acc 37.891 (36.667)	Top-5 acc 64.062 (60.649)	lr 0.02449
Train [11][3040/3239]	Time 0.208 (0.529)	Data Time 0.001 (0.010)	Loss 3.7539 (3.7144)	Entropy 1.81251 (1.81957)	Top-1 acc 35.156 (36.670)	Top-5 acc 56.250 (60.649)	lr 0.02449
Train [11][3050/3239]	Time 0.291 (0.529)	Data Time 0.002 (0.010)	Loss 3.6437 (3.7144)	Entropy 1.81243 (1.81955)	Top-1 acc 37.891 (36.669)	Top-5 acc 61.328 (60.648)	lr 0.02449
Train [11][3060/3239]	Time 0.223 (0.528)	Data Time 0.001 (0.010)	Loss 3.7525 (3.7145)	Entropy 1.81235 (1.81952)	Top-1 acc 37.109 (36.668)	Top-5 acc 61.719 (60.648)	lr 0.02449
Train [11][3070/3239]	Time 0.230 (0.528)	Data Time 0.001 (0.010)	Loss 3.7576 (3.7143)	Entropy 1.81233 (1.81950)	Top-1 acc 37.891 (36.669)	Top-5 acc 60.938 (60.652)	lr 0.02449
Train [11][3080/3239]	Time 0.220 (0.528)	Data Time 0.001 (0.010)	Loss 3.7194 (3.7141)	Entropy 1.81226 (1.81948)	Top-1 acc 36.328 (36.675)	Top-5 acc 63.281 (60.660)	lr 0.02449
Train [11][3090/3239]	Time 0.229 (0.527)	Data Time 0.001 (0.010)	Loss 3.8375 (3.7140)	Entropy 1.81224 (1.81945)	Top-1 acc 33.203 (36.679)	Top-5 acc 57.422 (60.662)	lr 0.02449
Train [11][3100/3239]	Time 0.235 (0.527)	Data Time 0.003 (0.010)	Loss 3.7187 (3.7140)	Entropy 1.81220 (1.81943)	Top-1 acc 33.984 (36.681)	Top-5 acc 62.109 (60.663)	lr 0.02449
Train [11][3110/3239]	Time 0.230 (0.527)	Data Time 0.001 (0.010)	Loss 3.3816 (3.7137)	Entropy 1.81211 (1.81941)	Top-1 acc 43.750 (36.684)	Top-5 acc 66.406 (60.672)	lr 0.02449
Train [11][3120/3239]	Time 0.301 (0.526)	Data Time 0.002 (0.010)	Loss 3.8335 (3.7135)	Entropy 1.81204 (1.81938)	Top-1 acc 35.938 (36.688)	Top-5 acc 56.641 (60.675)	lr 0.02449
Train [11][3130/3239]	Time 0.245 (0.526)	Data Time 0.002 (0.010)	Loss 3.7109 (3.7136)	Entropy 1.81200 (1.81936)	Top-1 acc 35.938 (36.689)	Top-5 acc 64.062 (60.677)	lr 0.02449
Train [11][3140/3239]	Time 0.236 (0.526)	Data Time 0.001 (0.010)	Loss 3.7790 (3.7136)	Entropy 1.81197 (1.81934)	Top-1 acc 34.375 (36.687)	Top-5 acc 58.594 (60.680)	lr 0.02449
Train [11][3150/3239]	Time 0.218 (0.525)	Data Time 0.001 (0.010)	Loss 3.6681 (3.7135)	Entropy 1.81191 (1.81931)	Top-1 acc 42.969 (36.689)	Top-5 acc 59.375 (60.678)	lr 0.02449
Train [11][3160/3239]	Time 0.247 (0.525)	Data Time 0.001 (0.010)	Loss 3.6405 (3.7133)	Entropy 1.81190 (1.81929)	Top-1 acc 33.594 (36.690)	Top-5 acc 62.500 (60.683)	lr 0.02449
Train [11][3170/3239]	Time 0.236 (0.525)	Data Time 0.001 (0.010)	Loss 4.0545 (3.7134)	Entropy 1.81189 (1.81927)	Top-1 acc 32.422 (36.689)	Top-5 acc 53.516 (60.685)	lr 0.02449
Train [11][3180/3239]	Time 0.219 (0.524)	Data Time 0.000 (0.010)	Loss 3.5689 (3.7133)	Entropy 1.81183 (1.81924)	Top-1 acc 41.016 (36.690)	Top-5 acc 64.062 (60.685)	lr 0.02449
Train [11][3190/3239]	Time 0.153 (0.524)	Data Time 0.000 (0.010)	Loss 3.9222 (3.7133)	Entropy 1.81178 (1.81922)	Top-1 acc 32.422 (36.689)	Top-5 acc 53.125 (60.685)	lr 0.02449
Train [11][3200/3239]	Time 0.218 (0.524)	Data Time 0.000 (0.010)	Loss 3.7642 (3.7132)	Entropy 1.81173 (1.81920)	Top-1 acc 34.766 (36.691)	Top-5 acc 60.938 (60.689)	lr 0.02449
Train [11][3210/3239]	Time 0.261 (0.523)	Data Time 0.000 (0.010)	Loss 3.7740 (3.7133)	Entropy 1.81165 (1.81917)	Top-1 acc 37.500 (36.692)	Top-5 acc 61.719 (60.689)	lr 0.02449
Train [11][3220/3239]	Time 0.183 (0.523)	Data Time 0.000 (0.010)	Loss 3.4955 (3.7133)	Entropy 1.81157 (1.81915)	Top-1 acc 43.750 (36.696)	Top-5 acc 64.844 (60.691)	lr 0.02449
Train [11][3230/3239]	Time 0.205 (0.523)	Data Time 0.000 (0.010)	Loss 3.7531 (3.7132)	Entropy 1.81150 (1.81913)	Top-1 acc 34.766 (36.697)	Top-5 acc 59.375 (60.691)	lr 0.02449
Train [11][3239/3239]	Time 2.008 (0.522)	Data Time 0.000 (0.010)	Loss 4.2040 (3.7132)	Entropy 1.81150 (1.81911)	Top-1 acc 34.568 (36.699)	Top-5 acc 51.852 (60.691)	lr 0.02449
==========Valid [11/120]	loss 2.549	top-1 acc 45.635 (45.635)	top-5 acc 70.193	Train top-1 36.699	top-5 60.691	Entropy 1.81150	Latency-None: 0.000ms	Flops: 535.07M
Train [12][0/3239]	Time 26.185 (26.185)	Data Time 25.274 (25.274)	Loss 3.5639 (3.5639)	Entropy 1.81141 (1.81141)	Top-1 acc 38.672 (38.672)	Top-5 acc 65.625 (65.625)	lr 0.02449
Train [12][10/3239]	Time 2.487 (2.912)	Data Time 0.002 (2.339)	Loss 3.5880 (3.6919)	Entropy 1.81141 (1.81141)	Top-1 acc 37.500 (36.186)	Top-5 acc 60.938 (61.612)	lr 0.02448
Train [12][20/3239]	Time 0.258 (1.629)	Data Time 0.002 (1.226)	Loss 4.0040 (3.7297)	Entropy 1.81135 (1.81138)	Top-1 acc 29.688 (35.379)	Top-5 acc 55.078 (60.565)	lr 0.02448
Train [12][30/3239]	Time 0.162 (1.236)	Data Time 0.001 (0.831)	Loss 3.7519 (3.7164)	Entropy 1.81136 (1.81138)	Top-1 acc 37.109 (35.698)	Top-5 acc 59.375 (60.774)	lr 0.02448
Train [12][40/3239]	Time 0.200 (1.041)	Data Time 0.001 (0.629)	Loss 3.6842 (3.6911)	Entropy 1.81130 (1.81136)	Top-1 acc 37.891 (36.242)	Top-5 acc 62.891 (61.319)	lr 0.02448
Train [12][50/3239]	Time 0.356 (1.699)	Data Time 0.003 (0.507)	Loss 3.7900 (3.7078)	Entropy 1.81128 (1.81135)	Top-1 acc 35.547 (35.976)	Top-5 acc 61.328 (61.022)	lr 0.02448
Train [12][60/3239]	Time 0.217 (1.493)	Data Time 0.002 (0.424)	Loss 3.5961 (3.7036)	Entropy 1.81126 (1.81133)	Top-1 acc 37.500 (36.168)	Top-5 acc 61.328 (61.110)	lr 0.02448
Train [12][70/3239]	Time 0.313 (1.342)	Data Time 0.002 (0.364)	Loss 3.7869 (3.7083)	Entropy 1.81118 (1.81132)	Top-1 acc 33.594 (36.196)	Top-5 acc 61.719 (61.092)	lr 0.02448
Train [12][80/3239]	Time 0.212 (1.231)	Data Time 0.002 (0.320)	Loss 3.6151 (3.7073)	Entropy 1.81110 (1.81130)	Top-1 acc 37.500 (36.135)	Top-5 acc 63.672 (61.270)	lr 0.02448
Train [12][90/3239]	Time 0.212 (1.143)	Data Time 0.002 (0.285)	Loss 3.6995 (3.7073)	Entropy 1.81108 (1.81127)	Top-1 acc 37.500 (36.208)	Top-5 acc 60.156 (61.238)	lr 0.02448
Train [12][100/3239]	Time 0.239 (1.074)	Data Time 0.002 (0.257)	Loss 3.6548 (3.7044)	Entropy 1.81101 (1.81125)	Top-1 acc 37.500 (36.293)	Top-5 acc 60.938 (61.336)	lr 0.02448
Train [12][110/3239]	Time 0.206 (1.016)	Data Time 0.001 (0.234)	Loss 3.5123 (3.7046)	Entropy 1.81099 (1.81123)	Top-1 acc 41.016 (36.427)	Top-5 acc 65.234 (61.307)	lr 0.02448
Train [12][120/3239]	Time 2.247 (0.968)	Data Time 0.002 (0.215)	Loss 3.6860 (3.7000)	Entropy 1.81099 (1.81121)	Top-1 acc 40.625 (36.538)	Top-5 acc 62.109 (61.406)	lr 0.02448
Train [12][130/3239]	Time 0.161 (0.909)	Data Time 0.002 (0.199)	Loss 3.5741 (3.6999)	Entropy 1.81094 (1.81119)	Top-1 acc 39.062 (36.588)	Top-5 acc 65.625 (61.432)	lr 0.02448
Train [12][140/3239]	Time 0.201 (0.876)	Data Time 0.001 (0.185)	Loss 3.6486 (3.7009)	Entropy 1.81085 (1.81117)	Top-1 acc 35.547 (36.544)	Top-5 acc 65.625 (61.408)	lr 0.02448
Train [12][150/3239]	Time 0.160 (0.848)	Data Time 0.001 (0.173)	Loss 3.5663 (3.7010)	Entropy 1.81083 (1.81115)	Top-1 acc 40.234 (36.620)	Top-5 acc 64.453 (61.341)	lr 0.02448
Train [12][160/3239]	Time 0.200 (0.821)	Data Time 0.001 (0.163)	Loss 3.4734 (3.7020)	Entropy 1.81075 (1.81112)	Top-1 acc 40.234 (36.612)	Top-5 acc 65.234 (61.270)	lr 0.02448
Train [12][170/3239]	Time 0.213 (0.798)	Data Time 0.001 (0.153)	Loss 3.3455 (3.6998)	Entropy 1.81072 (1.81110)	Top-1 acc 41.406 (36.682)	Top-5 acc 72.266 (61.264)	lr 0.02448
Train [12][180/3239]	Time 0.197 (0.777)	Data Time 0.001 (0.145)	Loss 3.6447 (3.7010)	Entropy 1.81066 (1.81108)	Top-1 acc 41.406 (36.762)	Top-5 acc 60.938 (61.173)	lr 0.02448
Train [12][190/3239]	Time 0.180 (0.758)	Data Time 0.001 (0.137)	Loss 3.6617 (3.7021)	Entropy 1.81046 (1.81105)	Top-1 acc 36.328 (36.690)	Top-5 acc 64.844 (61.185)	lr 0.02448
Train [12][200/3239]	Time 0.156 (0.742)	Data Time 0.001 (0.131)	Loss 3.5585 (3.7004)	Entropy 1.81040 (1.81102)	Top-1 acc 39.062 (36.769)	Top-5 acc 63.672 (61.250)	lr 0.02448
Train [12][210/3239]	Time 0.254 (0.726)	Data Time 0.001 (0.125)	Loss 3.8184 (3.7014)	Entropy 1.81034 (1.81099)	Top-1 acc 33.203 (36.748)	Top-5 acc 58.594 (61.211)	lr 0.02448
Train [12][220/3239]	Time 0.248 (0.712)	Data Time 0.001 (0.119)	Loss 3.6124 (3.6989)	Entropy 1.81030 (1.81096)	Top-1 acc 38.281 (36.793)	Top-5 acc 63.672 (61.259)	lr 0.02448
Train [12][230/3239]	Time 2.290 (0.700)	Data Time 0.002 (0.114)	Loss 3.7080 (3.6968)	Entropy 1.81030 (1.81093)	Top-1 acc 39.453 (36.844)	Top-5 acc 60.547 (61.313)	lr 0.02448
Train [12][240/3239]	Time 0.250 (0.679)	Data Time 0.001 (0.109)	Loss 4.0901 (3.6956)	Entropy 1.81029 (1.81091)	Top-1 acc 31.641 (36.907)	Top-5 acc 51.953 (61.331)	lr 0.02448
Train [12][250/3239]	Time 0.225 (0.669)	Data Time 0.001 (0.105)	Loss 3.4419 (3.6917)	Entropy 1.81022 (1.81088)	Top-1 acc 40.625 (36.972)	Top-5 acc 68.750 (61.392)	lr 0.02448
Train [12][260/3239]	Time 0.216 (0.659)	Data Time 0.001 (0.101)	Loss 3.6205 (3.6920)	Entropy 1.81020 (1.81085)	Top-1 acc 37.500 (37.002)	Top-5 acc 59.766 (61.360)	lr 0.02448
Train [12][270/3239]	Time 0.206 (0.651)	Data Time 0.001 (0.097)	Loss 3.6549 (3.6912)	Entropy 1.81014 (1.81083)	Top-1 acc 36.328 (37.019)	Top-5 acc 61.328 (61.364)	lr 0.02448
Train [12][280/3239]	Time 0.209 (0.643)	Data Time 0.001 (0.094)	Loss 3.5890 (3.6902)	Entropy 1.81012 (1.81080)	Top-1 acc 36.719 (37.023)	Top-5 acc 66.016 (61.403)	lr 0.02448
Train [12][290/3239]	Time 0.208 (0.636)	Data Time 0.001 (0.091)	Loss 3.5697 (3.6893)	Entropy 1.81010 (1.81078)	Top-1 acc 37.891 (37.029)	Top-5 acc 62.500 (61.426)	lr 0.02448
Train [12][300/3239]	Time 0.292 (0.628)	Data Time 0.001 (0.088)	Loss 3.8477 (3.6909)	Entropy 1.81005 (1.81076)	Top-1 acc 32.812 (37.012)	Top-5 acc 59.766 (61.405)	lr 0.02448
Train [12][310/3239]	Time 0.238 (0.622)	Data Time 0.001 (0.085)	Loss 3.4144 (3.6895)	Entropy 1.81004 (1.81073)	Top-1 acc 43.359 (37.014)	Top-5 acc 64.453 (61.412)	lr 0.02448
Train [12][320/3239]	Time 0.196 (0.616)	Data Time 0.001 (0.083)	Loss 3.8096 (3.6891)	Entropy 1.81004 (1.81071)	Top-1 acc 37.109 (37.039)	Top-5 acc 58.594 (61.385)	lr 0.02448
Train [12][330/3239]	Time 0.189 (0.610)	Data Time 0.001 (0.080)	Loss 3.9061 (3.6885)	Entropy 1.81006 (1.81069)	Top-1 acc 33.594 (37.043)	Top-5 acc 55.078 (61.392)	lr 0.02448
Train [12][340/3239]	Time 2.140 (0.604)	Data Time 0.001 (0.078)	Loss 3.6568 (3.6877)	Entropy 1.81006 (1.81067)	Top-1 acc 35.938 (37.038)	Top-5 acc 64.062 (61.398)	lr 0.02448
Train [12][350/3239]	Time 0.223 (0.593)	Data Time 0.001 (0.076)	Loss 3.7830 (3.6877)	Entropy 1.80997 (1.81065)	Top-1 acc 34.766 (37.041)	Top-5 acc 61.719 (61.398)	lr 0.02448
Train [12][360/3239]	Time 0.205 (0.588)	Data Time 0.001 (0.074)	Loss 3.8361 (3.6891)	Entropy 1.80995 (1.81063)	Top-1 acc 37.109 (37.042)	Top-5 acc 57.812 (61.352)	lr 0.02447
Train [12][370/3239]	Time 0.219 (0.584)	Data Time 0.002 (0.072)	Loss 3.6141 (3.6893)	Entropy 1.80990 (1.81061)	Top-1 acc 39.453 (37.039)	Top-5 acc 62.109 (61.346)	lr 0.02447
Train [12][380/3239]	Time 0.232 (0.579)	Data Time 0.001 (0.070)	Loss 3.6300 (3.6903)	Entropy 1.80981 (1.81059)	Top-1 acc 38.672 (37.058)	Top-5 acc 61.719 (61.311)	lr 0.02447
Train [12][390/3239]	Time 0.219 (0.576)	Data Time 0.001 (0.068)	Loss 3.7757 (3.6901)	Entropy 1.80965 (1.81057)	Top-1 acc 37.109 (37.086)	Top-5 acc 59.766 (61.316)	lr 0.02447
Train [12][400/3239]	Time 0.209 (0.572)	Data Time 0.001 (0.066)	Loss 3.6358 (3.6904)	Entropy 1.80963 (1.81055)	Top-1 acc 39.844 (37.091)	Top-5 acc 59.375 (61.263)	lr 0.02447
Train [12][410/3239]	Time 0.288 (0.659)	Data Time 0.003 (0.065)	Loss 3.6377 (3.6904)	Entropy 1.80962 (1.81053)	Top-1 acc 39.062 (37.111)	Top-5 acc 64.453 (61.274)	lr 0.02447
Train [12][420/3239]	Time 0.213 (0.656)	Data Time 0.002 (0.063)	Loss 3.6049 (3.6899)	Entropy 1.80963 (1.81050)	Top-1 acc 39.844 (37.120)	Top-5 acc 64.844 (61.297)	lr 0.02447
Train [12][430/3239]	Time 0.258 (0.650)	Data Time 0.007 (0.062)	Loss 3.5885 (3.6881)	Entropy 1.80954 (1.81048)	Top-1 acc 38.281 (37.125)	Top-5 acc 67.578 (61.337)	lr 0.02447
Train [12][440/3239]	Time 0.223 (0.645)	Data Time 0.001 (0.061)	Loss 3.9545 (3.6895)	Entropy 1.80952 (1.81046)	Top-1 acc 32.812 (37.107)	Top-5 acc 56.250 (61.306)	lr 0.02447
Train [12][450/3239]	Time 2.240 (0.640)	Data Time 0.002 (0.059)	Loss 3.3703 (3.6871)	Entropy 1.80952 (1.81044)	Top-1 acc 46.094 (37.160)	Top-5 acc 67.188 (61.347)	lr 0.02447
Train [12][460/3239]	Time 0.201 (0.631)	Data Time 0.001 (0.058)	Loss 3.6507 (3.6878)	Entropy 1.80947 (1.81042)	Top-1 acc 38.281 (37.124)	Top-5 acc 62.891 (61.345)	lr 0.02447
Train [12][470/3239]	Time 0.259 (0.627)	Data Time 0.001 (0.057)	Loss 3.7046 (3.6880)	Entropy 1.80944 (1.81040)	Top-1 acc 38.672 (37.140)	Top-5 acc 64.062 (61.330)	lr 0.02447
Train [12][480/3239]	Time 0.225 (0.622)	Data Time 0.001 (0.056)	Loss 3.8173 (3.6874)	Entropy 1.80942 (1.81038)	Top-1 acc 35.938 (37.162)	Top-5 acc 59.375 (61.346)	lr 0.02447
Train [12][490/3239]	Time 0.227 (0.618)	Data Time 0.001 (0.055)	Loss 3.6750 (3.6863)	Entropy 1.80943 (1.81036)	Top-1 acc 35.156 (37.160)	Top-5 acc 60.938 (61.374)	lr 0.02447
Train [12][500/3239]	Time 0.221 (0.614)	Data Time 0.001 (0.054)	Loss 3.5106 (3.6861)	Entropy 1.80938 (1.81034)	Top-1 acc 41.016 (37.160)	Top-5 acc 64.844 (61.383)	lr 0.02447
Train [12][510/3239]	Time 0.229 (0.610)	Data Time 0.001 (0.053)	Loss 3.4103 (3.6855)	Entropy 1.80932 (1.81032)	Top-1 acc 41.406 (37.172)	Top-5 acc 67.578 (61.408)	lr 0.02447
Train [12][520/3239]	Time 0.199 (0.607)	Data Time 0.001 (0.052)	Loss 3.5806 (3.6852)	Entropy 1.80928 (1.81030)	Top-1 acc 44.141 (37.199)	Top-5 acc 62.500 (61.413)	lr 0.02447
Train [12][530/3239]	Time 0.206 (0.603)	Data Time 0.001 (0.051)	Loss 3.7927 (3.6854)	Entropy 1.80925 (1.81028)	Top-1 acc 37.109 (37.189)	Top-5 acc 60.938 (61.419)	lr 0.02447
Train [12][540/3239]	Time 0.314 (0.600)	Data Time 0.001 (0.050)	Loss 4.0340 (3.6867)	Entropy 1.80925 (1.81026)	Top-1 acc 29.688 (37.177)	Top-5 acc 55.078 (61.404)	lr 0.02447
Train [12][550/3239]	Time 0.211 (0.597)	Data Time 0.001 (0.049)	Loss 3.6097 (3.6858)	Entropy 1.80921 (1.81024)	Top-1 acc 37.891 (37.192)	Top-5 acc 64.062 (61.410)	lr 0.02447
Train [12][560/3239]	Time 2.238 (0.594)	Data Time 0.001 (0.048)	Loss 3.3826 (3.6856)	Entropy 1.80921 (1.81023)	Top-1 acc 42.188 (37.192)	Top-5 acc 70.312 (61.417)	lr 0.02447
Train [12][570/3239]	Time 0.241 (0.587)	Data Time 0.001 (0.047)	Loss 3.6370 (3.6859)	Entropy 1.80920 (1.81021)	Top-1 acc 38.672 (37.194)	Top-5 acc 62.891 (61.416)	lr 0.02447
Train [12][580/3239]	Time 0.199 (0.584)	Data Time 0.001 (0.046)	Loss 3.8478 (3.6855)	Entropy 1.80918 (1.81019)	Top-1 acc 31.641 (37.201)	Top-5 acc 56.250 (61.415)	lr 0.02447
Train [12][590/3239]	Time 0.208 (0.581)	Data Time 0.001 (0.046)	Loss 3.8045 (3.6853)	Entropy 1.80914 (1.81017)	Top-1 acc 35.156 (37.209)	Top-5 acc 57.422 (61.421)	lr 0.02447
Train [12][600/3239]	Time 0.213 (0.579)	Data Time 0.001 (0.045)	Loss 3.6554 (3.6853)	Entropy 1.80908 (1.81015)	Top-1 acc 37.500 (37.218)	Top-5 acc 59.375 (61.410)	lr 0.02447
Train [12][610/3239]	Time 0.262 (0.576)	Data Time 0.001 (0.044)	Loss 3.9038 (3.6865)	Entropy 1.80891 (1.81014)	Top-1 acc 30.078 (37.195)	Top-5 acc 59.766 (61.391)	lr 0.02447
Train [12][620/3239]	Time 0.204 (0.574)	Data Time 0.002 (0.044)	Loss 3.4474 (3.6847)	Entropy 1.80890 (1.81012)	Top-1 acc 42.578 (37.247)	Top-5 acc 66.797 (61.422)	lr 0.02447
Train [12][630/3239]	Time 0.172 (0.571)	Data Time 0.001 (0.043)	Loss 3.3958 (3.6847)	Entropy 1.80886 (1.81010)	Top-1 acc 41.406 (37.257)	Top-5 acc 69.531 (61.409)	lr 0.02447
Train [12][640/3239]	Time 0.261 (0.569)	Data Time 0.001 (0.042)	Loss 3.7605 (3.6833)	Entropy 1.80881 (1.81008)	Top-1 acc 35.547 (37.278)	Top-5 acc 61.719 (61.446)	lr 0.02447
Train [12][650/3239]	Time 0.212 (0.567)	Data Time 0.001 (0.042)	Loss 3.5815 (3.6841)	Entropy 1.80875 (1.81006)	Top-1 acc 39.453 (37.270)	Top-5 acc 63.672 (61.418)	lr 0.02447
Train [12][660/3239]	Time 0.202 (0.565)	Data Time 0.001 (0.041)	Loss 3.7990 (3.6849)	Entropy 1.80876 (1.81004)	Top-1 acc 35.938 (37.259)	Top-5 acc 57.422 (61.385)	lr 0.02447
Train [12][670/3239]	Time 2.217 (0.563)	Data Time 0.001 (0.041)	Loss 3.7341 (3.6849)	Entropy 1.80876 (1.81002)	Top-1 acc 39.062 (37.272)	Top-5 acc 62.500 (61.389)	lr 0.02447
Train [12][680/3239]	Time 0.191 (0.558)	Data Time 0.001 (0.040)	Loss 3.3966 (3.6837)	Entropy 1.80870 (1.81000)	Top-1 acc 41.406 (37.306)	Top-5 acc 64.453 (61.410)	lr 0.02447
Train [12][690/3239]	Time 0.184 (0.556)	Data Time 0.001 (0.039)	Loss 3.7760 (3.6836)	Entropy 1.80871 (1.80998)	Top-1 acc 34.375 (37.295)	Top-5 acc 59.375 (61.407)	lr 0.02447
Train [12][700/3239]	Time 0.283 (0.554)	Data Time 0.001 (0.039)	Loss 3.6102 (3.6836)	Entropy 1.80859 (1.80996)	Top-1 acc 35.938 (37.297)	Top-5 acc 59.766 (61.412)	lr 0.02446
Train [12][710/3239]	Time 0.238 (0.552)	Data Time 0.001 (0.038)	Loss 3.5855 (3.6838)	Entropy 1.80853 (1.80994)	Top-1 acc 37.109 (37.307)	Top-5 acc 61.719 (61.400)	lr 0.02446
Train [12][720/3239]	Time 0.237 (0.550)	Data Time 0.001 (0.038)	Loss 3.7326 (3.6832)	Entropy 1.80843 (1.80992)	Top-1 acc 39.062 (37.331)	Top-5 acc 59.375 (61.406)	lr 0.02446
Train [12][730/3239]	Time 0.151 (0.548)	Data Time 0.001 (0.037)	Loss 3.4910 (3.6828)	Entropy 1.80839 (1.80990)	Top-1 acc 43.359 (37.328)	Top-5 acc 64.062 (61.411)	lr 0.02446
Train [12][740/3239]	Time 0.233 (0.547)	Data Time 0.001 (0.037)	Loss 3.5701 (3.6823)	Entropy 1.80834 (1.80988)	Top-1 acc 39.453 (37.330)	Top-5 acc 64.062 (61.423)	lr 0.02446
Train [12][750/3239]	Time 0.225 (0.545)	Data Time 0.001 (0.036)	Loss 3.6314 (3.6827)	Entropy 1.80826 (1.80986)	Top-1 acc 38.672 (37.312)	Top-5 acc 60.547 (61.411)	lr 0.02446
Train [12][760/3239]	Time 0.301 (0.543)	Data Time 0.001 (0.036)	Loss 3.6952 (3.6832)	Entropy 1.80821 (1.80984)	Top-1 acc 37.109 (37.304)	Top-5 acc 59.766 (61.397)	lr 0.02446
Train [12][770/3239]	Time 0.272 (0.587)	Data Time 0.004 (0.036)	Loss 3.6902 (3.6829)	Entropy 1.80815 (1.80982)	Top-1 acc 34.766 (37.301)	Top-5 acc 59.375 (61.400)	lr 0.02446
Train [12][780/3239]	Time 3.026 (0.587)	Data Time 0.004 (0.035)	Loss 3.8221 (3.6835)	Entropy 1.80815 (1.80979)	Top-1 acc 34.766 (37.287)	Top-5 acc 58.594 (61.382)	lr 0.02446
Train [12][790/3239]	Time 0.203 (0.582)	Data Time 0.002 (0.035)	Loss 3.8541 (3.6844)	Entropy 1.80814 (1.80977)	Top-1 acc 36.328 (37.273)	Top-5 acc 58.984 (61.363)	lr 0.02446
Train [12][800/3239]	Time 0.293 (0.581)	Data Time 0.002 (0.034)	Loss 3.6114 (3.6834)	Entropy 1.80813 (1.80975)	Top-1 acc 35.938 (37.283)	Top-5 acc 62.500 (61.380)	lr 0.02446
Train [12][810/3239]	Time 0.224 (0.579)	Data Time 0.001 (0.034)	Loss 3.7740 (3.6827)	Entropy 1.80811 (1.80973)	Top-1 acc 33.594 (37.298)	Top-5 acc 58.594 (61.392)	lr 0.02446
Train [12][820/3239]	Time 0.277 (0.578)	Data Time 0.002 (0.034)	Loss 3.6338 (3.6823)	Entropy 1.80806 (1.80971)	Top-1 acc 40.234 (37.312)	Top-5 acc 63.281 (61.409)	lr 0.02446
Train [12][830/3239]	Time 0.198 (0.576)	Data Time 0.001 (0.033)	Loss 3.5902 (3.6820)	Entropy 1.80805 (1.80969)	Top-1 acc 37.500 (37.319)	Top-5 acc 63.672 (61.414)	lr 0.02446
Train [12][840/3239]	Time 0.186 (0.574)	Data Time 0.001 (0.033)	Loss 3.8965 (3.6828)	Entropy 1.80801 (1.80967)	Top-1 acc 32.422 (37.305)	Top-5 acc 55.859 (61.394)	lr 0.02446
Train [12][850/3239]	Time 0.219 (0.573)	Data Time 0.002 (0.033)	Loss 3.6676 (3.6830)	Entropy 1.80797 (1.80965)	Top-1 acc 36.328 (37.306)	Top-5 acc 61.719 (61.389)	lr 0.02446
Train [12][860/3239]	Time 0.191 (0.571)	Data Time 0.003 (0.032)	Loss 3.6894 (3.6830)	Entropy 1.80791 (1.80963)	Top-1 acc 35.156 (37.313)	Top-5 acc 62.500 (61.381)	lr 0.02446
Train [12][870/3239]	Time 0.392 (0.570)	Data Time 0.002 (0.032)	Loss 3.6489 (3.6829)	Entropy 1.80776 (1.80961)	Top-1 acc 37.500 (37.317)	Top-5 acc 58.984 (61.368)	lr 0.02446
Train [12][880/3239]	Time 0.205 (0.569)	Data Time 0.002 (0.031)	Loss 3.6181 (3.6829)	Entropy 1.80771 (1.80959)	Top-1 acc 39.062 (37.316)	Top-5 acc 64.062 (61.379)	lr 0.02446
Train [12][890/3239]	Time 2.309 (0.568)	Data Time 0.002 (0.031)	Loss 3.3970 (3.6828)	Entropy 1.80771 (1.80957)	Top-1 acc 44.141 (37.301)	Top-5 acc 66.797 (61.381)	lr 0.02446
Train [12][900/3239]	Time 0.251 (0.564)	Data Time 0.002 (0.031)	Loss 3.6859 (3.6830)	Entropy 1.80767 (1.80955)	Top-1 acc 32.812 (37.284)	Top-5 acc 62.500 (61.374)	lr 0.02446
Train [12][910/3239]	Time 0.270 (0.563)	Data Time 0.002 (0.031)	Loss 3.6349 (3.6824)	Entropy 1.80757 (1.80953)	Top-1 acc 39.844 (37.307)	Top-5 acc 64.453 (61.387)	lr 0.02446
Train [12][920/3239]	Time 0.216 (0.562)	Data Time 0.002 (0.030)	Loss 3.6082 (3.6823)	Entropy 1.80750 (1.80951)	Top-1 acc 40.234 (37.297)	Top-5 acc 63.281 (61.391)	lr 0.02446
Train [12][930/3239]	Time 0.168 (0.561)	Data Time 0.001 (0.030)	Loss 3.7791 (3.6826)	Entropy 1.80748 (1.80949)	Top-1 acc 32.812 (37.297)	Top-5 acc 59.375 (61.385)	lr 0.02446
Train [12][940/3239]	Time 0.286 (0.559)	Data Time 0.001 (0.030)	Loss 3.6641 (3.6825)	Entropy 1.80744 (1.80946)	Top-1 acc 37.500 (37.298)	Top-5 acc 60.547 (61.387)	lr 0.02446
Train [12][950/3239]	Time 0.224 (0.558)	Data Time 0.001 (0.029)	Loss 3.6333 (3.6823)	Entropy 1.80742 (1.80944)	Top-1 acc 37.109 (37.305)	Top-5 acc 58.984 (61.384)	lr 0.02446
Train [12][960/3239]	Time 0.200 (0.557)	Data Time 0.001 (0.029)	Loss 3.7310 (3.6823)	Entropy 1.80737 (1.80942)	Top-1 acc 35.938 (37.307)	Top-5 acc 58.203 (61.381)	lr 0.02446
Train [12][970/3239]	Time 0.247 (0.555)	Data Time 0.001 (0.029)	Loss 3.4058 (3.6823)	Entropy 1.80736 (1.80940)	Top-1 acc 39.844 (37.301)	Top-5 acc 69.141 (61.386)	lr 0.02446
Train [12][980/3239]	Time 0.210 (0.554)	Data Time 0.002 (0.028)	Loss 3.6128 (3.6822)	Entropy 1.80733 (1.80938)	Top-1 acc 36.328 (37.301)	Top-5 acc 61.719 (61.381)	lr 0.02446
Train [12][990/3239]	Time 0.266 (0.553)	Data Time 0.002 (0.028)	Loss 3.6547 (3.6819)	Entropy 1.80730 (1.80936)	Top-1 acc 37.891 (37.295)	Top-5 acc 60.547 (61.378)	lr 0.02446
Train [12][1000/3239]	Time 2.256 (0.551)	Data Time 0.001 (0.028)	Loss 4.0379 (3.6819)	Entropy 1.80730 (1.80934)	Top-1 acc 31.250 (37.294)	Top-5 acc 55.469 (61.379)	lr 0.02446
Train [12][1010/3239]	Time 0.220 (0.548)	Data Time 0.001 (0.028)	Loss 3.7564 (3.6823)	Entropy 1.80727 (1.80932)	Top-1 acc 37.109 (37.287)	Top-5 acc 61.328 (61.368)	lr 0.02446
Train [12][1020/3239]	Time 0.198 (0.547)	Data Time 0.001 (0.027)	Loss 3.7124 (3.6827)	Entropy 1.80724 (1.80930)	Top-1 acc 33.594 (37.275)	Top-5 acc 58.203 (61.356)	lr 0.02446
Train [12][1030/3239]	Time 0.230 (0.545)	Data Time 0.001 (0.027)	Loss 3.9278 (3.6829)	Entropy 1.80721 (1.80928)	Top-1 acc 32.422 (37.275)	Top-5 acc 57.812 (61.357)	lr 0.02446
Train [12][1040/3239]	Time 0.225 (0.544)	Data Time 0.001 (0.027)	Loss 3.6349 (3.6822)	Entropy 1.80713 (1.80926)	Top-1 acc 37.891 (37.290)	Top-5 acc 62.891 (61.370)	lr 0.02445
Train [12][1050/3239]	Time 0.200 (0.543)	Data Time 0.001 (0.027)	Loss 3.5005 (3.6825)	Entropy 1.80707 (1.80924)	Top-1 acc 39.453 (37.288)	Top-5 acc 67.578 (61.368)	lr 0.02445
Train [12][1060/3239]	Time 0.199 (0.542)	Data Time 0.001 (0.027)	Loss 3.7334 (3.6821)	Entropy 1.80704 (1.80922)	Top-1 acc 36.328 (37.300)	Top-5 acc 59.766 (61.383)	lr 0.02445
Train [12][1070/3239]	Time 0.210 (0.541)	Data Time 0.001 (0.026)	Loss 3.7477 (3.6821)	Entropy 1.80695 (1.80919)	Top-1 acc 34.375 (37.291)	Top-5 acc 57.812 (61.373)	lr 0.02445
Train [12][1080/3239]	Time 0.186 (0.540)	Data Time 0.001 (0.026)	Loss 3.7383 (3.6821)	Entropy 1.80692 (1.80917)	Top-1 acc 33.594 (37.293)	Top-5 acc 60.938 (61.370)	lr 0.02445
Train [12][1090/3239]	Time 0.227 (0.539)	Data Time 0.001 (0.026)	Loss 3.8315 (3.6816)	Entropy 1.80692 (1.80915)	Top-1 acc 37.109 (37.308)	Top-5 acc 56.641 (61.383)	lr 0.02445
Train [12][1100/3239]	Time 0.213 (0.538)	Data Time 0.002 (0.026)	Loss 3.7397 (3.6829)	Entropy 1.80687 (1.80913)	Top-1 acc 34.766 (37.288)	Top-5 acc 58.203 (61.355)	lr 0.02445
Train [12][1110/3239]	Time 2.237 (0.537)	Data Time 0.001 (0.025)	Loss 3.7011 (3.6825)	Entropy 1.80687 (1.80911)	Top-1 acc 36.719 (37.288)	Top-5 acc 60.156 (61.360)	lr 0.02445
Train [12][1120/3239]	Time 0.215 (0.534)	Data Time 0.001 (0.025)	Loss 3.5340 (3.6833)	Entropy 1.80679 (1.80909)	Top-1 acc 39.844 (37.269)	Top-5 acc 65.625 (61.337)	lr 0.02445
Train [12][1130/3239]	Time 0.217 (0.533)	Data Time 0.001 (0.025)	Loss 3.8919 (3.6832)	Entropy 1.80674 (1.80907)	Top-1 acc 33.984 (37.258)	Top-5 acc 59.375 (61.335)	lr 0.02445
Train [12][1140/3239]	Time 0.264 (0.567)	Data Time 0.002 (0.025)	Loss 3.7156 (3.6835)	Entropy 1.80669 (1.80905)	Top-1 acc 36.719 (37.256)	Top-5 acc 60.938 (61.334)	lr 0.02445
Train [12][1150/3239]	Time 0.212 (0.566)	Data Time 0.006 (0.025)	Loss 3.6817 (3.6843)	Entropy 1.80666 (1.80903)	Top-1 acc 35.938 (37.237)	Top-5 acc 61.719 (61.319)	lr 0.02445
Train [12][1160/3239]	Time 0.385 (0.565)	Data Time 0.002 (0.024)	Loss 3.7342 (3.6842)	Entropy 1.80662 (1.80901)	Top-1 acc 33.203 (37.237)	Top-5 acc 58.984 (61.314)	lr 0.02445
Train [12][1170/3239]	Time 0.220 (0.564)	Data Time 0.001 (0.024)	Loss 3.6015 (3.6840)	Entropy 1.80660 (1.80899)	Top-1 acc 39.062 (37.242)	Top-5 acc 63.672 (61.315)	lr 0.02445
Train [12][1180/3239]	Time 0.216 (0.563)	Data Time 0.001 (0.024)	Loss 3.5769 (3.6840)	Entropy 1.80657 (1.80897)	Top-1 acc 40.234 (37.250)	Top-5 acc 63.281 (61.318)	lr 0.02445
Train [12][1190/3239]	Time 0.204 (0.561)	Data Time 0.002 (0.024)	Loss 3.7089 (3.6841)	Entropy 1.80645 (1.80895)	Top-1 acc 38.281 (37.247)	Top-5 acc 63.672 (61.322)	lr 0.02445
Train [12][1200/3239]	Time 0.195 (0.560)	Data Time 0.002 (0.024)	Loss 3.6654 (3.6836)	Entropy 1.80640 (1.80893)	Top-1 acc 36.719 (37.258)	Top-5 acc 64.062 (61.339)	lr 0.02445
Train [12][1210/3239]	Time 0.217 (0.559)	Data Time 0.001 (0.024)	Loss 3.7014 (3.6829)	Entropy 1.80639 (1.80891)	Top-1 acc 39.453 (37.277)	Top-5 acc 61.328 (61.351)	lr 0.02445
Train [12][1220/3239]	Time 2.417 (0.558)	Data Time 0.001 (0.023)	Loss 3.6068 (3.6828)	Entropy 1.80639 (1.80889)	Top-1 acc 40.625 (37.287)	Top-5 acc 62.891 (61.355)	lr 0.02445
Train [12][1230/3239]	Time 0.241 (0.555)	Data Time 0.002 (0.023)	Loss 3.6588 (3.6828)	Entropy 1.80633 (1.80886)	Top-1 acc 39.844 (37.288)	Top-5 acc 59.375 (61.351)	lr 0.02445
Train [12][1240/3239]	Time 0.304 (0.554)	Data Time 0.001 (0.023)	Loss 3.6537 (3.6826)	Entropy 1.80627 (1.80884)	Top-1 acc 33.594 (37.287)	Top-5 acc 62.109 (61.351)	lr 0.02445
Train [12][1250/3239]	Time 0.193 (0.553)	Data Time 0.001 (0.023)	Loss 3.7932 (3.6829)	Entropy 1.80625 (1.80882)	Top-1 acc 31.250 (37.271)	Top-5 acc 59.766 (61.347)	lr 0.02445
Train [12][1260/3239]	Time 0.245 (0.552)	Data Time 0.001 (0.023)	Loss 3.6882 (3.6829)	Entropy 1.80618 (1.80880)	Top-1 acc 38.672 (37.275)	Top-5 acc 62.500 (61.346)	lr 0.02445
Train [12][1270/3239]	Time 0.252 (0.552)	Data Time 0.001 (0.022)	Loss 3.8128 (3.6832)	Entropy 1.80615 (1.80878)	Top-1 acc 34.375 (37.267)	Top-5 acc 58.594 (61.339)	lr 0.02445
Train [12][1280/3239]	Time 0.163 (0.551)	Data Time 0.001 (0.022)	Loss 3.8585 (3.6829)	Entropy 1.80611 (1.80876)	Top-1 acc 36.328 (37.270)	Top-5 acc 55.078 (61.339)	lr 0.02445
Train [12][1290/3239]	Time 0.202 (0.550)	Data Time 0.001 (0.022)	Loss 3.8090 (3.6831)	Entropy 1.80603 (1.80874)	Top-1 acc 33.594 (37.262)	Top-5 acc 56.641 (61.331)	lr 0.02445
Train [12][1300/3239]	Time 0.243 (0.549)	Data Time 0.002 (0.022)	Loss 3.6714 (3.6831)	Entropy 1.80600 (1.80872)	Top-1 acc 37.500 (37.254)	Top-5 acc 60.156 (61.328)	lr 0.02445
Train [12][1310/3239]	Time 0.196 (0.548)	Data Time 0.001 (0.022)	Loss 3.4160 (3.6822)	Entropy 1.80598 (1.80870)	Top-1 acc 41.406 (37.269)	Top-5 acc 66.406 (61.350)	lr 0.02445
Train [12][1320/3239]	Time 0.313 (0.547)	Data Time 0.002 (0.022)	Loss 3.4425 (3.6814)	Entropy 1.80594 (1.80868)	Top-1 acc 44.531 (37.288)	Top-5 acc 66.016 (61.368)	lr 0.02445
Train [12][1330/3239]	Time 2.112 (0.546)	Data Time 0.001 (0.022)	Loss 3.5842 (3.6813)	Entropy 1.80594 (1.80866)	Top-1 acc 41.797 (37.283)	Top-5 acc 61.328 (61.372)	lr 0.02445
Train [12][1340/3239]	Time 0.257 (0.543)	Data Time 0.001 (0.021)	Loss 3.6892 (3.6811)	Entropy 1.80590 (1.80864)	Top-1 acc 39.453 (37.289)	Top-5 acc 64.062 (61.382)	lr 0.02445
Train [12][1350/3239]	Time 0.242 (0.542)	Data Time 0.001 (0.021)	Loss 3.5583 (3.6810)	Entropy 1.80587 (1.80862)	Top-1 acc 39.844 (37.290)	Top-5 acc 60.938 (61.380)	lr 0.02445
Train [12][1360/3239]	Time 0.222 (0.542)	Data Time 0.001 (0.021)	Loss 3.6483 (3.6806)	Entropy 1.80580 (1.80860)	Top-1 acc 36.328 (37.303)	Top-5 acc 63.281 (61.388)	lr 0.02445
Train [12][1370/3239]	Time 0.217 (0.541)	Data Time 0.002 (0.021)	Loss 3.6922 (3.6802)	Entropy 1.80575 (1.80857)	Top-1 acc 39.844 (37.309)	Top-5 acc 61.719 (61.399)	lr 0.02445
Train [12][1380/3239]	Time 0.208 (0.540)	Data Time 0.001 (0.021)	Loss 3.6023 (3.6800)	Entropy 1.80571 (1.80855)	Top-1 acc 39.844 (37.314)	Top-5 acc 62.109 (61.400)	lr 0.02444
Train [12][1390/3239]	Time 0.194 (0.539)	Data Time 0.001 (0.021)	Loss 3.8133 (3.6801)	Entropy 1.80565 (1.80853)	Top-1 acc 35.156 (37.304)	Top-5 acc 57.812 (61.393)	lr 0.02444
Train [12][1400/3239]	Time 0.247 (0.539)	Data Time 0.001 (0.021)	Loss 3.5879 (3.6802)	Entropy 1.80561 (1.80851)	Top-1 acc 43.359 (37.306)	Top-5 acc 67.969 (61.393)	lr 0.02444
Train [12][1410/3239]	Time 0.324 (0.538)	Data Time 0.001 (0.020)	Loss 3.6860 (3.6800)	Entropy 1.80553 (1.80849)	Top-1 acc 35.547 (37.308)	Top-5 acc 60.938 (61.396)	lr 0.02444
Train [12][1420/3239]	Time 0.253 (0.537)	Data Time 0.002 (0.020)	Loss 3.7720 (3.6802)	Entropy 1.80549 (1.80847)	Top-1 acc 33.594 (37.305)	Top-5 acc 59.766 (61.385)	lr 0.02444
Train [12][1430/3239]	Time 0.223 (0.536)	Data Time 0.001 (0.020)	Loss 3.4489 (3.6797)	Entropy 1.80541 (1.80845)	Top-1 acc 44.141 (37.323)	Top-5 acc 67.188 (61.393)	lr 0.02444
Train [12][1440/3239]	Time 2.319 (0.535)	Data Time 0.001 (0.020)	Loss 3.7278 (3.6798)	Entropy 1.80541 (1.80843)	Top-1 acc 36.719 (37.322)	Top-5 acc 60.156 (61.393)	lr 0.02444
Train [12][1450/3239]	Time 0.214 (0.533)	Data Time 0.001 (0.020)	Loss 3.7787 (3.6796)	Entropy 1.80539 (1.80841)	Top-1 acc 33.984 (37.331)	Top-5 acc 60.547 (61.394)	lr 0.02444
Train [12][1460/3239]	Time 0.171 (0.532)	Data Time 0.001 (0.020)	Loss 3.6103 (3.6798)	Entropy 1.80534 (1.80839)	Top-1 acc 34.766 (37.321)	Top-5 acc 64.453 (61.394)	lr 0.02444
Train [12][1470/3239]	Time 0.195 (0.532)	Data Time 0.002 (0.020)	Loss 3.4613 (3.6797)	Entropy 1.80530 (1.80837)	Top-1 acc 38.672 (37.322)	Top-5 acc 66.406 (61.398)	lr 0.02444
Train [12][1480/3239]	Time 0.220 (0.531)	Data Time 0.001 (0.020)	Loss 3.7555 (3.6799)	Entropy 1.80523 (1.80835)	Top-1 acc 33.203 (37.321)	Top-5 acc 61.719 (61.396)	lr 0.02444
Train [12][1490/3239]	Time 0.285 (0.530)	Data Time 0.001 (0.019)	Loss 3.4469 (3.6799)	Entropy 1.80521 (1.80832)	Top-1 acc 46.484 (37.329)	Top-5 acc 66.016 (61.393)	lr 0.02444
Train [12][1500/3239]	Time 0.288 (0.556)	Data Time 0.003 (0.019)	Loss 3.6602 (3.6798)	Entropy 1.80507 (1.80830)	Top-1 acc 37.109 (37.337)	Top-5 acc 63.672 (61.394)	lr 0.02444
Train [12][1510/3239]	Time 0.214 (0.555)	Data Time 0.002 (0.019)	Loss 3.7120 (3.6800)	Entropy 1.80507 (1.80828)	Top-1 acc 36.719 (37.325)	Top-5 acc 63.672 (61.391)	lr 0.02444
Train [12][1520/3239]	Time 0.211 (0.554)	Data Time 0.001 (0.019)	Loss 3.5953 (3.6803)	Entropy 1.80500 (1.80826)	Top-1 acc 40.625 (37.319)	Top-5 acc 63.672 (61.392)	lr 0.02444
Train [12][1530/3239]	Time 0.207 (0.553)	Data Time 0.001 (0.019)	Loss 3.8661 (3.6803)	Entropy 1.80491 (1.80824)	Top-1 acc 34.766 (37.320)	Top-5 acc 58.984 (61.390)	lr 0.02444
Train [12][1540/3239]	Time 0.230 (0.553)	Data Time 0.001 (0.019)	Loss 3.6170 (3.6804)	Entropy 1.80490 (1.80822)	Top-1 acc 39.453 (37.316)	Top-5 acc 61.328 (61.386)	lr 0.02444
Train [12][1550/3239]	Time 2.262 (0.552)	Data Time 0.001 (0.019)	Loss 3.6588 (3.6807)	Entropy 1.80490 (1.80820)	Top-1 acc 38.672 (37.312)	Top-5 acc 59.766 (61.380)	lr 0.02444
Train [12][1560/3239]	Time 0.233 (0.549)	Data Time 0.001 (0.019)	Loss 3.5071 (3.6805)	Entropy 1.80486 (1.80817)	Top-1 acc 38.281 (37.313)	Top-5 acc 68.750 (61.387)	lr 0.02444
Train [12][1570/3239]	Time 0.256 (0.549)	Data Time 0.001 (0.019)	Loss 4.1474 (3.6808)	Entropy 1.80482 (1.80815)	Top-1 acc 31.641 (37.306)	Top-5 acc 51.953 (61.377)	lr 0.02444
Train [12][1580/3239]	Time 0.154 (0.548)	Data Time 0.001 (0.019)	Loss 3.6088 (3.6805)	Entropy 1.80470 (1.80813)	Top-1 acc 37.109 (37.314)	Top-5 acc 66.016 (61.384)	lr 0.02444
Train [12][1590/3239]	Time 0.215 (0.547)	Data Time 0.001 (0.018)	Loss 3.6460 (3.6805)	Entropy 1.80464 (1.80811)	Top-1 acc 34.375 (37.309)	Top-5 acc 59.375 (61.382)	lr 0.02444
Train [12][1600/3239]	Time 0.173 (0.547)	Data Time 0.001 (0.018)	Loss 3.8734 (3.6804)	Entropy 1.80458 (1.80809)	Top-1 acc 37.500 (37.318)	Top-5 acc 58.203 (61.386)	lr 0.02444
Train [12][1610/3239]	Time 0.202 (0.546)	Data Time 0.001 (0.018)	Loss 3.7610 (3.6807)	Entropy 1.80456 (1.80807)	Top-1 acc 33.203 (37.309)	Top-5 acc 58.594 (61.378)	lr 0.02444
Train [12][1620/3239]	Time 0.223 (0.545)	Data Time 0.001 (0.018)	Loss 3.7292 (3.6810)	Entropy 1.80444 (1.80804)	Top-1 acc 32.812 (37.298)	Top-5 acc 61.328 (61.374)	lr 0.02444
Train [12][1630/3239]	Time 0.211 (0.544)	Data Time 0.001 (0.018)	Loss 3.7452 (3.6815)	Entropy 1.80436 (1.80802)	Top-1 acc 38.281 (37.293)	Top-5 acc 58.984 (61.367)	lr 0.02444
Train [12][1640/3239]	Time 0.197 (0.544)	Data Time 0.001 (0.018)	Loss 3.6674 (3.6813)	Entropy 1.80430 (1.80800)	Top-1 acc 38.672 (37.295)	Top-5 acc 60.938 (61.371)	lr 0.02444
Train [12][1650/3239]	Time 0.265 (0.543)	Data Time 0.002 (0.018)	Loss 3.5064 (3.6809)	Entropy 1.80428 (1.80798)	Top-1 acc 46.875 (37.310)	Top-5 acc 64.062 (61.380)	lr 0.02444
Train [12][1660/3239]	Time 2.282 (0.542)	Data Time 0.001 (0.018)	Loss 3.5561 (3.6809)	Entropy 1.80428 (1.80795)	Top-1 acc 37.891 (37.307)	Top-5 acc 60.938 (61.380)	lr 0.02444
Train [12][1670/3239]	Time 0.234 (0.540)	Data Time 0.001 (0.018)	Loss 3.6376 (3.6809)	Entropy 1.80420 (1.80793)	Top-1 acc 41.406 (37.314)	Top-5 acc 64.844 (61.381)	lr 0.02444
Train [12][1680/3239]	Time 0.201 (0.540)	Data Time 0.001 (0.018)	Loss 3.7290 (3.6809)	Entropy 1.80414 (1.80791)	Top-1 acc 35.156 (37.313)	Top-5 acc 61.328 (61.387)	lr 0.02444
Train [12][1690/3239]	Time 0.174 (0.539)	Data Time 0.001 (0.017)	Loss 3.5268 (3.6809)	Entropy 1.80411 (1.80789)	Top-1 acc 40.234 (37.319)	Top-5 acc 66.797 (61.390)	lr 0.02444
Train [12][1700/3239]	Time 0.212 (0.538)	Data Time 0.001 (0.017)	Loss 3.5407 (3.6806)	Entropy 1.80404 (1.80786)	Top-1 acc 42.969 (37.325)	Top-5 acc 64.062 (61.396)	lr 0.02444
Train [12][1710/3239]	Time 0.266 (0.538)	Data Time 0.001 (0.017)	Loss 3.6137 (3.6807)	Entropy 1.80399 (1.80784)	Top-1 acc 37.891 (37.325)	Top-5 acc 62.500 (61.390)	lr 0.02444
Train [12][1720/3239]	Time 0.258 (0.537)	Data Time 0.001 (0.017)	Loss 3.4668 (3.6807)	Entropy 1.80397 (1.80782)	Top-1 acc 39.453 (37.322)	Top-5 acc 64.844 (61.385)	lr 0.02443
Train [12][1730/3239]	Time 0.160 (0.536)	Data Time 0.001 (0.017)	Loss 3.6617 (3.6803)	Entropy 1.80382 (1.80780)	Top-1 acc 38.281 (37.336)	Top-5 acc 63.281 (61.394)	lr 0.02443
Train [12][1740/3239]	Time 0.249 (0.536)	Data Time 0.001 (0.017)	Loss 3.6684 (3.6806)	Entropy 1.80380 (1.80777)	Top-1 acc 41.406 (37.327)	Top-5 acc 62.891 (61.386)	lr 0.02443
Train [12][1750/3239]	Time 0.231 (0.535)	Data Time 0.001 (0.017)	Loss 3.6746 (3.6802)	Entropy 1.80377 (1.80775)	Top-1 acc 38.672 (37.334)	Top-5 acc 63.281 (61.394)	lr 0.02443
Train [12][1760/3239]	Time 0.276 (0.534)	Data Time 0.001 (0.017)	Loss 3.4363 (3.6796)	Entropy 1.80375 (1.80773)	Top-1 acc 42.578 (37.340)	Top-5 acc 66.406 (61.406)	lr 0.02443
Train [12][1770/3239]	Time 2.263 (0.534)	Data Time 0.001 (0.017)	Loss 3.6978 (3.6797)	Entropy 1.80375 (1.80771)	Top-1 acc 35.547 (37.327)	Top-5 acc 61.328 (61.401)	lr 0.02443
Train [12][1780/3239]	Time 0.190 (0.532)	Data Time 0.002 (0.017)	Loss 3.6981 (3.6796)	Entropy 1.80369 (1.80768)	Top-1 acc 35.547 (37.326)	Top-5 acc 58.203 (61.397)	lr 0.02443
Train [12][1790/3239]	Time 0.310 (0.532)	Data Time 0.002 (0.017)	Loss 3.6759 (3.6795)	Entropy 1.80366 (1.80766)	Top-1 acc 33.594 (37.325)	Top-5 acc 61.328 (61.399)	lr 0.02443
Train [12][1800/3239]	Time 0.216 (0.531)	Data Time 0.001 (0.017)	Loss 3.7228 (3.6797)	Entropy 1.80360 (1.80764)	Top-1 acc 37.109 (37.323)	Top-5 acc 58.984 (61.397)	lr 0.02443
Train [12][1810/3239]	Time 0.194 (0.530)	Data Time 0.001 (0.016)	Loss 3.5303 (3.6796)	Entropy 1.80349 (1.80762)	Top-1 acc 40.234 (37.323)	Top-5 acc 62.891 (61.398)	lr 0.02443
Train [12][1820/3239]	Time 0.231 (0.530)	Data Time 0.001 (0.016)	Loss 3.6592 (3.6794)	Entropy 1.80344 (1.80759)	Top-1 acc 37.109 (37.326)	Top-5 acc 59.766 (61.397)	lr 0.02443
Train [12][1830/3239]	Time 0.208 (0.529)	Data Time 0.001 (0.016)	Loss 3.7534 (3.6797)	Entropy 1.80339 (1.80757)	Top-1 acc 35.938 (37.315)	Top-5 acc 58.594 (61.392)	lr 0.02443
Train [12][1840/3239]	Time 0.214 (0.529)	Data Time 0.001 (0.016)	Loss 3.9112 (3.6800)	Entropy 1.80335 (1.80755)	Top-1 acc 33.594 (37.305)	Top-5 acc 55.078 (61.386)	lr 0.02443
Train [12][1850/3239]	Time 0.166 (0.528)	Data Time 0.001 (0.016)	Loss 3.5123 (3.6802)	Entropy 1.80333 (1.80753)	Top-1 acc 39.844 (37.307)	Top-5 acc 66.016 (61.380)	lr 0.02443
Train [12][1860/3239]	Time 0.300 (0.547)	Data Time 0.002 (0.016)	Loss 3.5692 (3.6800)	Entropy 1.80323 (1.80750)	Top-1 acc 36.719 (37.306)	Top-5 acc 63.672 (61.378)	lr 0.02443
Train [12][1870/3239]	Time 0.320 (0.547)	Data Time 0.002 (0.016)	Loss 3.5652 (3.6800)	Entropy 1.80319 (1.80748)	Top-1 acc 38.281 (37.306)	Top-5 acc 64.062 (61.379)	lr 0.02443
Train [12][1880/3239]	Time 2.219 (0.546)	Data Time 0.002 (0.016)	Loss 3.7137 (3.6800)	Entropy 1.80319 (1.80746)	Top-1 acc 35.938 (37.304)	Top-5 acc 58.984 (61.382)	lr 0.02443
Train [12][1890/3239]	Time 0.219 (0.545)	Data Time 0.001 (0.016)	Loss 3.7731 (3.6799)	Entropy 1.80319 (1.80743)	Top-1 acc 37.109 (37.309)	Top-5 acc 61.328 (61.383)	lr 0.02443
Train [12][1900/3239]	Time 0.193 (0.544)	Data Time 0.001 (0.016)	Loss 3.6252 (3.6795)	Entropy 1.80313 (1.80741)	Top-1 acc 38.672 (37.319)	Top-5 acc 64.844 (61.393)	lr 0.02443
Train [12][1910/3239]	Time 0.166 (0.543)	Data Time 0.001 (0.016)	Loss 4.0136 (3.6801)	Entropy 1.80313 (1.80739)	Top-1 acc 29.688 (37.312)	Top-5 acc 49.609 (61.381)	lr 0.02443
Train [12][1920/3239]	Time 0.218 (0.542)	Data Time 0.001 (0.016)	Loss 3.7557 (3.6801)	Entropy 1.80307 (1.80737)	Top-1 acc 38.672 (37.314)	Top-5 acc 64.062 (61.382)	lr 0.02443
Train [12][1930/3239]	Time 0.200 (0.542)	Data Time 0.001 (0.016)	Loss 3.6646 (3.6800)	Entropy 1.80306 (1.80734)	Top-1 acc 37.109 (37.317)	Top-5 acc 58.984 (61.379)	lr 0.02443
Train [12][1940/3239]	Time 0.151 (0.541)	Data Time 0.001 (0.016)	Loss 3.4814 (3.6801)	Entropy 1.80300 (1.80732)	Top-1 acc 41.797 (37.320)	Top-5 acc 65.234 (61.375)	lr 0.02443
Train [12][1950/3239]	Time 0.334 (0.541)	Data Time 0.001 (0.016)	Loss 3.7514 (3.6802)	Entropy 1.80299 (1.80730)	Top-1 acc 38.672 (37.316)	Top-5 acc 58.984 (61.373)	lr 0.02443
Train [12][1960/3239]	Time 0.159 (0.540)	Data Time 0.001 (0.015)	Loss 3.5315 (3.6802)	Entropy 1.80299 (1.80728)	Top-1 acc 39.453 (37.309)	Top-5 acc 62.891 (61.374)	lr 0.02443
Train [12][1970/3239]	Time 0.200 (0.539)	Data Time 0.001 (0.015)	Loss 3.6453 (3.6799)	Entropy 1.80299 (1.80726)	Top-1 acc 37.109 (37.306)	Top-5 acc 60.938 (61.381)	lr 0.02443
Train [12][1980/3239]	Time 0.299 (0.539)	Data Time 0.002 (0.015)	Loss 3.6211 (3.6800)	Entropy 1.80293 (1.80723)	Top-1 acc 35.156 (37.305)	Top-5 acc 64.453 (61.377)	lr 0.02443
Train [12][1990/3239]	Time 2.179 (0.538)	Data Time 0.002 (0.015)	Loss 3.4370 (3.6800)	Entropy 1.80293 (1.80721)	Top-1 acc 43.359 (37.304)	Top-5 acc 68.750 (61.376)	lr 0.02443
Train [12][2000/3239]	Time 0.193 (0.537)	Data Time 0.001 (0.015)	Loss 3.5432 (3.6800)	Entropy 1.80281 (1.80719)	Top-1 acc 40.234 (37.303)	Top-5 acc 64.844 (61.375)	lr 0.02443
Train [12][2010/3239]	Time 0.206 (0.536)	Data Time 0.001 (0.015)	Loss 3.3944 (3.6796)	Entropy 1.80271 (1.80717)	Top-1 acc 44.922 (37.305)	Top-5 acc 69.531 (61.384)	lr 0.02443
Train [12][2020/3239]	Time 0.193 (0.536)	Data Time 0.001 (0.015)	Loss 3.7267 (3.6795)	Entropy 1.80266 (1.80715)	Top-1 acc 32.422 (37.308)	Top-5 acc 62.500 (61.386)	lr 0.02443
Train [12][2030/3239]	Time 0.297 (0.535)	Data Time 0.001 (0.015)	Loss 3.7055 (3.6793)	Entropy 1.80265 (1.80712)	Top-1 acc 40.625 (37.313)	Top-5 acc 58.984 (61.389)	lr 0.02443
Train [12][2040/3239]	Time 0.215 (0.534)	Data Time 0.001 (0.015)	Loss 3.6736 (3.6789)	Entropy 1.80257 (1.80710)	Top-1 acc 34.766 (37.318)	Top-5 acc 58.984 (61.393)	lr 0.02443
Train [12][2050/3239]	Time 0.217 (0.534)	Data Time 0.001 (0.015)	Loss 3.6621 (3.6789)	Entropy 1.80253 (1.80708)	Top-1 acc 35.938 (37.313)	Top-5 acc 62.109 (61.398)	lr 0.02442
Train [12][2060/3239]	Time 0.200 (0.533)	Data Time 0.001 (0.015)	Loss 3.7548 (3.6789)	Entropy 1.80250 (1.80706)	Top-1 acc 35.156 (37.310)	Top-5 acc 62.500 (61.391)	lr 0.02442
Train [12][2070/3239]	Time 0.214 (0.533)	Data Time 0.001 (0.015)	Loss 3.6194 (3.6787)	Entropy 1.80247 (1.80704)	Top-1 acc 33.984 (37.314)	Top-5 acc 58.984 (61.391)	lr 0.02442
Train [12][2080/3239]	Time 0.243 (0.532)	Data Time 0.001 (0.015)	Loss 3.6869 (3.6785)	Entropy 1.80241 (1.80701)	Top-1 acc 33.203 (37.314)	Top-5 acc 61.719 (61.396)	lr 0.02442
Train [12][2090/3239]	Time 0.194 (0.532)	Data Time 0.002 (0.015)	Loss 3.7977 (3.6783)	Entropy 1.80230 (1.80699)	Top-1 acc 39.062 (37.322)	Top-5 acc 58.594 (61.398)	lr 0.02442
Train [12][2100/3239]	Time 2.308 (0.531)	Data Time 0.002 (0.015)	Loss 3.6096 (3.6785)	Entropy 1.80230 (1.80697)	Top-1 acc 39.062 (37.321)	Top-5 acc 62.500 (61.393)	lr 0.02442
Train [12][2110/3239]	Time 0.264 (0.530)	Data Time 0.001 (0.014)	Loss 3.4847 (3.6788)	Entropy 1.80229 (1.80695)	Top-1 acc 43.359 (37.319)	Top-5 acc 64.062 (61.388)	lr 0.02442
Train [12][2120/3239]	Time 0.190 (0.529)	Data Time 0.001 (0.014)	Loss 3.8481 (3.6789)	Entropy 1.80225 (1.80693)	Top-1 acc 33.594 (37.317)	Top-5 acc 60.547 (61.383)	lr 0.02442
Train [12][2130/3239]	Time 0.218 (0.529)	Data Time 0.001 (0.014)	Loss 3.5729 (3.6786)	Entropy 1.80222 (1.80690)	Top-1 acc 41.406 (37.324)	Top-5 acc 66.797 (61.395)	lr 0.02442
Train [12][2140/3239]	Time 0.187 (0.528)	Data Time 0.001 (0.014)	Loss 3.7747 (3.6785)	Entropy 1.80219 (1.80688)	Top-1 acc 35.156 (37.324)	Top-5 acc 58.203 (61.396)	lr 0.02442
Train [12][2150/3239]	Time 0.225 (0.528)	Data Time 0.001 (0.014)	Loss 3.5507 (3.6785)	Entropy 1.80218 (1.80686)	Top-1 acc 36.719 (37.322)	Top-5 acc 61.328 (61.393)	lr 0.02442
Train [12][2160/3239]	Time 0.201 (0.528)	Data Time 0.001 (0.014)	Loss 3.9812 (3.6787)	Entropy 1.80215 (1.80684)	Top-1 acc 30.859 (37.321)	Top-5 acc 56.250 (61.390)	lr 0.02442
Train [12][2170/3239]	Time 0.225 (0.527)	Data Time 0.001 (0.014)	Loss 3.7983 (3.6785)	Entropy 1.80212 (1.80682)	Top-1 acc 37.891 (37.327)	Top-5 acc 57.031 (61.393)	lr 0.02442
Train [12][2180/3239]	Time 0.212 (0.527)	Data Time 0.001 (0.014)	Loss 3.7250 (3.6784)	Entropy 1.80207 (1.80679)	Top-1 acc 35.938 (37.329)	Top-5 acc 61.328 (61.395)	lr 0.02442
Train [12][2190/3239]	Time 0.294 (0.526)	Data Time 0.001 (0.014)	Loss 3.6102 (3.6784)	Entropy 1.80205 (1.80677)	Top-1 acc 39.453 (37.329)	Top-5 acc 64.453 (61.396)	lr 0.02442
Train [12][2200/3239]	Time 0.198 (0.526)	Data Time 0.001 (0.014)	Loss 3.5208 (3.6783)	Entropy 1.80199 (1.80675)	Top-1 acc 42.969 (37.338)	Top-5 acc 66.797 (61.395)	lr 0.02442
Train [12][2210/3239]	Time 2.420 (0.525)	Data Time 0.001 (0.014)	Loss 3.8672 (3.6782)	Entropy 1.80199 (1.80673)	Top-1 acc 35.938 (37.339)	Top-5 acc 57.422 (61.399)	lr 0.02442
Train [12][2220/3239]	Time 0.211 (0.524)	Data Time 0.001 (0.014)	Loss 3.8298 (3.6785)	Entropy 1.80195 (1.80671)	Top-1 acc 33.203 (37.334)	Top-5 acc 57.031 (61.397)	lr 0.02442
Train [12][2230/3239]	Time 0.295 (0.540)	Data Time 0.002 (0.014)	Loss 3.6566 (3.6784)	Entropy 1.80192 (1.80669)	Top-1 acc 35.156 (37.332)	Top-5 acc 62.500 (61.399)	lr 0.02442
Train [12][2240/3239]	Time 0.214 (0.540)	Data Time 0.002 (0.014)	Loss 3.7747 (3.6784)	Entropy 1.80187 (1.80667)	Top-1 acc 35.547 (37.330)	Top-5 acc 58.984 (61.398)	lr 0.02442
Train [12][2250/3239]	Time 0.196 (0.539)	Data Time 0.001 (0.014)	Loss 3.7192 (3.6785)	Entropy 1.80185 (1.80664)	Top-1 acc 33.594 (37.332)	Top-5 acc 62.891 (61.396)	lr 0.02442
Train [12][2260/3239]	Time 0.224 (0.539)	Data Time 0.001 (0.014)	Loss 3.6739 (3.6783)	Entropy 1.80184 (1.80662)	Top-1 acc 38.672 (37.340)	Top-5 acc 59.766 (61.402)	lr 0.02442
Train [12][2270/3239]	Time 0.300 (0.538)	Data Time 0.001 (0.014)	Loss 3.7310 (3.6783)	Entropy 1.80181 (1.80660)	Top-1 acc 33.594 (37.341)	Top-5 acc 60.156 (61.405)	lr 0.02442
Train [12][2280/3239]	Time 0.215 (0.538)	Data Time 0.001 (0.014)	Loss 3.6360 (3.6782)	Entropy 1.80166 (1.80658)	Top-1 acc 39.062 (37.340)	Top-5 acc 62.891 (61.409)	lr 0.02442
Train [12][2290/3239]	Time 0.180 (0.537)	Data Time 0.001 (0.014)	Loss 3.7077 (3.6782)	Entropy 1.80165 (1.80656)	Top-1 acc 35.156 (37.338)	Top-5 acc 61.719 (61.407)	lr 0.02442
Train [12][2300/3239]	Time 0.227 (0.537)	Data Time 0.001 (0.013)	Loss 3.6482 (3.6782)	Entropy 1.80161 (1.80654)	Top-1 acc 36.719 (37.337)	Top-5 acc 61.328 (61.408)	lr 0.02442
Train [12][2310/3239]	Time 0.246 (0.536)	Data Time 0.001 (0.013)	Loss 3.8127 (3.6780)	Entropy 1.80163 (1.80652)	Top-1 acc 37.891 (37.343)	Top-5 acc 59.375 (61.411)	lr 0.02442
Train [12][2320/3239]	Time 2.392 (0.536)	Data Time 0.001 (0.013)	Loss 3.7391 (3.6781)	Entropy 1.80163 (1.80650)	Top-1 acc 36.719 (37.338)	Top-5 acc 61.328 (61.409)	lr 0.02442
Train [12][2330/3239]	Time 0.167 (0.534)	Data Time 0.002 (0.013)	Loss 3.9839 (3.6781)	Entropy 1.80153 (1.80647)	Top-1 acc 33.203 (37.336)	Top-5 acc 54.297 (61.410)	lr 0.02442
Train [12][2340/3239]	Time 0.184 (0.534)	Data Time 0.001 (0.013)	Loss 3.5645 (3.6785)	Entropy 1.80153 (1.80645)	Top-1 acc 37.109 (37.330)	Top-5 acc 64.453 (61.401)	lr 0.02442
Train [12][2350/3239]	Time 0.244 (0.533)	Data Time 0.001 (0.013)	Loss 3.8683 (3.6787)	Entropy 1.80154 (1.80643)	Top-1 acc 32.422 (37.330)	Top-5 acc 58.594 (61.395)	lr 0.02442
Train [12][2360/3239]	Time 0.211 (0.533)	Data Time 0.001 (0.013)	Loss 3.5816 (3.6785)	Entropy 1.80143 (1.80641)	Top-1 acc 38.281 (37.327)	Top-5 acc 62.500 (61.399)	lr 0.02442
Train [12][2370/3239]	Time 0.277 (0.533)	Data Time 0.002 (0.013)	Loss 3.6976 (3.6782)	Entropy 1.80143 (1.80639)	Top-1 acc 35.547 (37.334)	Top-5 acc 59.375 (61.407)	lr 0.02442
Train [12][2380/3239]	Time 0.204 (0.532)	Data Time 0.001 (0.013)	Loss 3.6904 (3.6783)	Entropy 1.80139 (1.80637)	Top-1 acc 38.281 (37.329)	Top-5 acc 59.375 (61.407)	lr 0.02441
Train [12][2390/3239]	Time 0.199 (0.531)	Data Time 0.001 (0.013)	Loss 3.7445 (3.6782)	Entropy 1.80130 (1.80635)	Top-1 acc 36.328 (37.330)	Top-5 acc 57.812 (61.409)	lr 0.02441
Train [12][2400/3239]	Time 0.208 (0.531)	Data Time 0.001 (0.013)	Loss 3.6161 (3.6780)	Entropy 1.80127 (1.80633)	Top-1 acc 35.547 (37.338)	Top-5 acc 59.766 (61.413)	lr 0.02441
Train [12][2410/3239]	Time 0.165 (0.531)	Data Time 0.001 (0.013)	Loss 3.5878 (3.6781)	Entropy 1.80124 (1.80631)	Top-1 acc 39.844 (37.338)	Top-5 acc 62.891 (61.411)	lr 0.02441
Train [12][2420/3239]	Time 0.242 (0.530)	Data Time 0.001 (0.013)	Loss 3.6376 (3.6781)	Entropy 1.80118 (1.80629)	Top-1 acc 36.328 (37.333)	Top-5 acc 60.938 (61.409)	lr 0.02441
Train [12][2430/3239]	Time 2.368 (0.530)	Data Time 0.001 (0.013)	Loss 3.6444 (3.6780)	Entropy 1.80118 (1.80626)	Top-1 acc 33.594 (37.331)	Top-5 acc 62.109 (61.412)	lr 0.02441
Train [12][2440/3239]	Time 0.217 (0.529)	Data Time 0.002 (0.013)	Loss 3.8390 (3.6781)	Entropy 1.80100 (1.80624)	Top-1 acc 37.500 (37.330)	Top-5 acc 55.859 (61.410)	lr 0.02441
Train [12][2450/3239]	Time 0.233 (0.528)	Data Time 0.001 (0.013)	Loss 3.6029 (3.6783)	Entropy 1.80097 (1.80622)	Top-1 acc 41.016 (37.323)	Top-5 acc 61.719 (61.405)	lr 0.02441
Train [12][2460/3239]	Time 0.144 (0.528)	Data Time 0.001 (0.013)	Loss 3.7941 (3.6784)	Entropy 1.80092 (1.80620)	Top-1 acc 35.156 (37.321)	Top-5 acc 58.203 (61.400)	lr 0.02441
Train [12][2470/3239]	Time 0.219 (0.527)	Data Time 0.001 (0.013)	Loss 3.5259 (3.6785)	Entropy 1.80090 (1.80618)	Top-1 acc 42.188 (37.322)	Top-5 acc 65.625 (61.397)	lr 0.02441
Train [12][2480/3239]	Time 0.199 (0.527)	Data Time 0.001 (0.013)	Loss 3.5948 (3.6787)	Entropy 1.80087 (1.80616)	Top-1 acc 37.891 (37.316)	Top-5 acc 62.891 (61.393)	lr 0.02441
Train [12][2490/3239]	Time 0.210 (0.527)	Data Time 0.001 (0.013)	Loss 3.7036 (3.6788)	Entropy 1.80079 (1.80614)	Top-1 acc 36.719 (37.313)	Top-5 acc 60.938 (61.397)	lr 0.02441
Train [12][2500/3239]	Time 0.298 (0.526)	Data Time 0.001 (0.013)	Loss 3.8189 (3.6785)	Entropy 1.80076 (1.80611)	Top-1 acc 32.031 (37.319)	Top-5 acc 57.031 (61.406)	lr 0.02441
Train [12][2510/3239]	Time 0.218 (0.526)	Data Time 0.001 (0.013)	Loss 3.5676 (3.6783)	Entropy 1.80075 (1.80609)	Top-1 acc 41.016 (37.327)	Top-5 acc 60.547 (61.410)	lr 0.02441
Train [12][2520/3239]	Time 0.195 (0.525)	Data Time 0.001 (0.013)	Loss 3.5971 (3.6784)	Entropy 1.80062 (1.80607)	Top-1 acc 38.281 (37.327)	Top-5 acc 61.719 (61.406)	lr 0.02441
Train [12][2530/3239]	Time 0.258 (0.525)	Data Time 0.001 (0.012)	Loss 3.5550 (3.6784)	Entropy 1.80055 (1.80605)	Top-1 acc 34.375 (37.321)	Top-5 acc 69.141 (61.403)	lr 0.02441
Train [12][2540/3239]	Time 2.185 (0.525)	Data Time 0.001 (0.012)	Loss 3.7707 (3.6785)	Entropy 1.80055 (1.80603)	Top-1 acc 33.594 (37.320)	Top-5 acc 57.812 (61.399)	lr 0.02441
Train [12][2550/3239]	Time 0.199 (0.523)	Data Time 0.001 (0.012)	Loss 3.5854 (3.6782)	Entropy 1.80052 (1.80601)	Top-1 acc 35.938 (37.327)	Top-5 acc 64.062 (61.406)	lr 0.02441
Train [12][2560/3239]	Time 0.234 (0.523)	Data Time 0.001 (0.012)	Loss 3.7745 (3.6783)	Entropy 1.80045 (1.80598)	Top-1 acc 35.156 (37.329)	Top-5 acc 60.156 (61.409)	lr 0.02441
Train [12][2570/3239]	Time 0.335 (0.523)	Data Time 0.001 (0.012)	Loss 3.6047 (3.6781)	Entropy 1.80038 (1.80596)	Top-1 acc 38.281 (37.331)	Top-5 acc 65.234 (61.413)	lr 0.02441
Train [12][2580/3239]	Time 0.256 (0.522)	Data Time 0.001 (0.012)	Loss 3.8002 (3.6782)	Entropy 1.80033 (1.80594)	Top-1 acc 33.984 (37.332)	Top-5 acc 57.812 (61.413)	lr 0.02441
Train [12][2590/3239]	Time 0.212 (0.537)	Data Time 0.002 (0.012)	Loss 3.6646 (3.6782)	Entropy 1.80033 (1.80592)	Top-1 acc 37.109 (37.334)	Top-5 acc 59.766 (61.410)	lr 0.02441
Train [12][2600/3239]	Time 0.241 (0.537)	Data Time 0.002 (0.012)	Loss 3.6204 (3.6781)	Entropy 1.80035 (1.80590)	Top-1 acc 36.328 (37.338)	Top-5 acc 62.500 (61.413)	lr 0.02441
Train [12][2610/3239]	Time 0.223 (0.536)	Data Time 0.002 (0.012)	Loss 3.8900 (3.6782)	Entropy 1.80026 (1.80588)	Top-1 acc 32.031 (37.332)	Top-5 acc 59.375 (61.410)	lr 0.02441
Train [12][2620/3239]	Time 0.222 (0.536)	Data Time 0.002 (0.012)	Loss 3.9273 (3.6780)	Entropy 1.80025 (1.80586)	Top-1 acc 31.250 (37.341)	Top-5 acc 50.391 (61.411)	lr 0.02441
Train [12][2630/3239]	Time 0.207 (0.535)	Data Time 0.001 (0.012)	Loss 3.8832 (3.6780)	Entropy 1.80021 (1.80583)	Top-1 acc 30.859 (37.344)	Top-5 acc 57.422 (61.412)	lr 0.02441
Train [12][2640/3239]	Time 0.210 (0.535)	Data Time 0.001 (0.012)	Loss 3.7158 (3.6780)	Entropy 1.80009 (1.80581)	Top-1 acc 35.938 (37.349)	Top-5 acc 60.156 (61.413)	lr 0.02441
Train [12][2650/3239]	Time 0.219 (0.535)	Data Time 0.001 (0.012)	Loss 3.6888 (3.6779)	Entropy 1.80005 (1.80579)	Top-1 acc 37.891 (37.351)	Top-5 acc 64.453 (61.417)	lr 0.02441
Train [12][2660/3239]	Time 0.358 (0.534)	Data Time 0.001 (0.012)	Loss 3.6605 (3.6780)	Entropy 1.80001 (1.80577)	Top-1 acc 42.188 (37.352)	Top-5 acc 60.547 (61.416)	lr 0.02441
Train [12][2670/3239]	Time 0.232 (0.534)	Data Time 0.001 (0.012)	Loss 3.6641 (3.6778)	Entropy 1.79997 (1.80575)	Top-1 acc 39.453 (37.356)	Top-5 acc 62.109 (61.423)	lr 0.02441
Train [12][2680/3239]	Time 0.211 (0.533)	Data Time 0.001 (0.012)	Loss 3.6052 (3.6778)	Entropy 1.79990 (1.80573)	Top-1 acc 34.375 (37.360)	Top-5 acc 62.891 (61.422)	lr 0.02441
Train [12][2690/3239]	Time 0.184 (0.533)	Data Time 0.001 (0.012)	Loss 3.7673 (3.6774)	Entropy 1.79983 (1.80570)	Top-1 acc 35.938 (37.372)	Top-5 acc 59.375 (61.430)	lr 0.02441
Train [12][2700/3239]	Time 0.189 (0.533)	Data Time 0.001 (0.012)	Loss 3.7556 (3.6775)	Entropy 1.79974 (1.80568)	Top-1 acc 36.719 (37.371)	Top-5 acc 57.422 (61.424)	lr 0.02440
Train [12][2710/3239]	Time 0.262 (0.532)	Data Time 0.001 (0.012)	Loss 3.7774 (3.6775)	Entropy 1.79968 (1.80566)	Top-1 acc 36.719 (37.367)	Top-5 acc 58.203 (61.423)	lr 0.02440
Train [12][2720/3239]	Time 0.233 (0.532)	Data Time 0.001 (0.012)	Loss 3.5064 (3.6776)	Entropy 1.79959 (1.80564)	Top-1 acc 39.062 (37.364)	Top-5 acc 65.625 (61.422)	lr 0.02440
Train [12][2730/3239]	Time 0.204 (0.531)	Data Time 0.001 (0.012)	Loss 3.6359 (3.6774)	Entropy 1.79954 (1.80562)	Top-1 acc 41.797 (37.370)	Top-5 acc 64.062 (61.426)	lr 0.02440
Train [12][2740/3239]	Time 0.200 (0.531)	Data Time 0.001 (0.012)	Loss 3.6943 (3.6776)	Entropy 1.79953 (1.80559)	Top-1 acc 34.766 (37.370)	Top-5 acc 63.672 (61.426)	lr 0.02440
Train [12][2750/3239]	Time 0.228 (0.531)	Data Time 0.002 (0.012)	Loss 3.6284 (3.6775)	Entropy 1.79950 (1.80557)	Top-1 acc 38.281 (37.374)	Top-5 acc 62.891 (61.429)	lr 0.02440
Train [12][2760/3239]	Time 0.211 (0.530)	Data Time 0.001 (0.012)	Loss 3.5014 (3.6773)	Entropy 1.79944 (1.80555)	Top-1 acc 43.359 (37.380)	Top-5 acc 66.406 (61.432)	lr 0.02440
Train [12][2770/3239]	Time 0.196 (0.530)	Data Time 0.001 (0.012)	Loss 3.6692 (3.6773)	Entropy 1.79943 (1.80553)	Top-1 acc 36.719 (37.380)	Top-5 acc 60.938 (61.433)	lr 0.02440
Train [12][2780/3239]	Time 0.230 (0.529)	Data Time 0.001 (0.012)	Loss 3.5973 (3.6773)	Entropy 1.79945 (1.80551)	Top-1 acc 39.062 (37.385)	Top-5 acc 62.500 (61.436)	lr 0.02440
Train [12][2790/3239]	Time 0.165 (0.529)	Data Time 0.001 (0.012)	Loss 3.8100 (3.6773)	Entropy 1.79940 (1.80548)	Top-1 acc 33.984 (37.383)	Top-5 acc 57.031 (61.439)	lr 0.02440
Train [12][2800/3239]	Time 0.181 (0.529)	Data Time 0.001 (0.011)	Loss 3.8425 (3.6771)	Entropy 1.79938 (1.80546)	Top-1 acc 35.156 (37.386)	Top-5 acc 57.422 (61.442)	lr 0.02440
Train [12][2810/3239]	Time 0.228 (0.528)	Data Time 0.001 (0.011)	Loss 3.5506 (3.6771)	Entropy 1.79938 (1.80544)	Top-1 acc 41.406 (37.389)	Top-5 acc 64.062 (61.443)	lr 0.02440
Train [12][2820/3239]	Time 0.217 (0.528)	Data Time 0.001 (0.011)	Loss 3.5629 (3.6769)	Entropy 1.79931 (1.80542)	Top-1 acc 37.500 (37.391)	Top-5 acc 65.234 (61.448)	lr 0.02440
Train [12][2830/3239]	Time 0.222 (0.528)	Data Time 0.001 (0.011)	Loss 3.4954 (3.6768)	Entropy 1.79924 (1.80540)	Top-1 acc 39.844 (37.391)	Top-5 acc 62.500 (61.451)	lr 0.02440
Train [12][2840/3239]	Time 0.230 (0.527)	Data Time 0.001 (0.011)	Loss 3.4069 (3.6767)	Entropy 1.79921 (1.80538)	Top-1 acc 42.969 (37.392)	Top-5 acc 64.844 (61.454)	lr 0.02440
Train [12][2850/3239]	Time 0.239 (0.527)	Data Time 0.001 (0.011)	Loss 3.6446 (3.6766)	Entropy 1.79913 (1.80535)	Top-1 acc 39.062 (37.394)	Top-5 acc 64.062 (61.458)	lr 0.02440
Train [12][2860/3239]	Time 0.262 (0.527)	Data Time 0.002 (0.011)	Loss 3.7247 (3.6764)	Entropy 1.79909 (1.80533)	Top-1 acc 35.938 (37.398)	Top-5 acc 62.109 (61.465)	lr 0.02440
Train [12][2870/3239]	Time 0.232 (0.526)	Data Time 0.001 (0.011)	Loss 3.6645 (3.6764)	Entropy 1.79907 (1.80531)	Top-1 acc 37.891 (37.400)	Top-5 acc 61.328 (61.463)	lr 0.02440
Train [12][2880/3239]	Time 0.184 (0.526)	Data Time 0.001 (0.011)	Loss 3.7709 (3.6764)	Entropy 1.79906 (1.80529)	Top-1 acc 34.766 (37.400)	Top-5 acc 60.547 (61.463)	lr 0.02440
Train [12][2890/3239]	Time 0.219 (0.525)	Data Time 0.001 (0.011)	Loss 3.9505 (3.6763)	Entropy 1.79904 (1.80527)	Top-1 acc 31.250 (37.402)	Top-5 acc 53.516 (61.463)	lr 0.02440
Train [12][2900/3239]	Time 0.186 (0.525)	Data Time 0.001 (0.011)	Loss 3.8787 (3.6764)	Entropy 1.79902 (1.80525)	Top-1 acc 32.031 (37.400)	Top-5 acc 56.641 (61.461)	lr 0.02440
Train [12][2910/3239]	Time 0.227 (0.525)	Data Time 0.001 (0.011)	Loss 3.5078 (3.6765)	Entropy 1.79900 (1.80523)	Top-1 acc 42.188 (37.399)	Top-5 acc 63.281 (61.457)	lr 0.02440
Train [12][2920/3239]	Time 0.171 (0.538)	Data Time 0.003 (0.011)	Loss 3.6725 (3.6766)	Entropy 1.79894 (1.80520)	Top-1 acc 36.719 (37.397)	Top-5 acc 59.375 (61.454)	lr 0.02440
Train [12][2930/3239]	Time 0.243 (0.538)	Data Time 0.002 (0.011)	Loss 3.6632 (3.6767)	Entropy 1.79892 (1.80518)	Top-1 acc 39.453 (37.397)	Top-5 acc 62.500 (61.448)	lr 0.02440
Train [12][2940/3239]	Time 0.192 (0.538)	Data Time 0.002 (0.011)	Loss 3.7574 (3.6768)	Entropy 1.79886 (1.80516)	Top-1 acc 38.281 (37.400)	Top-5 acc 57.812 (61.444)	lr 0.02440
Train [12][2950/3239]	Time 0.211 (0.537)	Data Time 0.002 (0.011)	Loss 3.6937 (3.6769)	Entropy 1.79882 (1.80514)	Top-1 acc 39.844 (37.399)	Top-5 acc 60.156 (61.445)	lr 0.02440
Train [12][2960/3239]	Time 0.149 (0.537)	Data Time 0.001 (0.011)	Loss 3.8373 (3.6770)	Entropy 1.79877 (1.80512)	Top-1 acc 32.422 (37.396)	Top-5 acc 55.469 (61.443)	lr 0.02440
Train [12][2970/3239]	Time 0.237 (0.536)	Data Time 0.001 (0.011)	Loss 3.4850 (3.6771)	Entropy 1.79871 (1.80510)	Top-1 acc 44.922 (37.396)	Top-5 acc 64.453 (61.441)	lr 0.02440
Train [12][2980/3239]	Time 0.200 (0.536)	Data Time 0.001 (0.011)	Loss 3.8881 (3.6771)	Entropy 1.79869 (1.80508)	Top-1 acc 31.250 (37.398)	Top-5 acc 57.031 (61.444)	lr 0.02440
Train [12][2990/3239]	Time 0.207 (0.536)	Data Time 0.001 (0.011)	Loss 3.6691 (3.6771)	Entropy 1.79867 (1.80505)	Top-1 acc 34.766 (37.397)	Top-5 acc 60.547 (61.443)	lr 0.02440
Train [12][3000/3239]	Time 0.259 (0.536)	Data Time 0.002 (0.011)	Loss 3.7040 (3.6770)	Entropy 1.79868 (1.80503)	Top-1 acc 38.672 (37.400)	Top-5 acc 58.594 (61.443)	lr 0.02440
Train [12][3010/3239]	Time 0.252 (0.536)	Data Time 0.002 (0.011)	Loss 3.4979 (3.6767)	Entropy 1.79865 (1.80501)	Top-1 acc 38.281 (37.405)	Top-5 acc 67.188 (61.447)	lr 0.02440
Train [12][3020/3239]	Time 0.262 (0.536)	Data Time 0.001 (0.011)	Loss 3.5269 (3.6765)	Entropy 1.79856 (1.80499)	Top-1 acc 43.359 (37.408)	Top-5 acc 64.453 (61.452)	lr 0.02440
Train [12][3030/3239]	Time 0.254 (0.535)	Data Time 0.001 (0.011)	Loss 3.4835 (3.6768)	Entropy 1.79854 (1.80497)	Top-1 acc 44.531 (37.405)	Top-5 acc 66.406 (61.444)	lr 0.02439
Train [12][3040/3239]	Time 0.215 (0.535)	Data Time 0.002 (0.011)	Loss 3.6359 (3.6766)	Entropy 1.79844 (1.80495)	Top-1 acc 39.062 (37.408)	Top-5 acc 58.984 (61.446)	lr 0.02439
Train [12][3050/3239]	Time 0.247 (0.535)	Data Time 0.001 (0.011)	Loss 3.8158 (3.6765)	Entropy 1.79839 (1.80493)	Top-1 acc 29.688 (37.409)	Top-5 acc 59.766 (61.449)	lr 0.02439
Train [12][3060/3239]	Time 0.249 (0.534)	Data Time 0.001 (0.011)	Loss 3.7385 (3.6765)	Entropy 1.79831 (1.80491)	Top-1 acc 39.453 (37.411)	Top-5 acc 61.328 (61.448)	lr 0.02439
Train [12][3070/3239]	Time 0.162 (0.534)	Data Time 0.001 (0.011)	Loss 3.6931 (3.6766)	Entropy 1.79827 (1.80488)	Top-1 acc 31.250 (37.407)	Top-5 acc 58.594 (61.447)	lr 0.02439
Train [12][3080/3239]	Time 0.182 (0.534)	Data Time 0.002 (0.011)	Loss 3.6139 (3.6763)	Entropy 1.79826 (1.80486)	Top-1 acc 39.062 (37.416)	Top-5 acc 62.891 (61.451)	lr 0.02439
Train [12][3090/3239]	Time 0.222 (0.533)	Data Time 0.001 (0.011)	Loss 3.5549 (3.6762)	Entropy 1.79823 (1.80484)	Top-1 acc 44.141 (37.419)	Top-5 acc 64.453 (61.453)	lr 0.02439
Train [12][3100/3239]	Time 0.217 (0.533)	Data Time 0.001 (0.011)	Loss 3.5113 (3.6762)	Entropy 1.79808 (1.80482)	Top-1 acc 41.406 (37.417)	Top-5 acc 61.719 (61.450)	lr 0.02439
Train [12][3110/3239]	Time 0.167 (0.533)	Data Time 0.001 (0.011)	Loss 3.9137 (3.6762)	Entropy 1.79807 (1.80480)	Top-1 acc 31.250 (37.418)	Top-5 acc 56.250 (61.451)	lr 0.02439
Train [12][3120/3239]	Time 0.277 (0.533)	Data Time 0.001 (0.011)	Loss 3.6905 (3.6760)	Entropy 1.79801 (1.80478)	Top-1 acc 36.328 (37.422)	Top-5 acc 60.156 (61.457)	lr 0.02439
Train [12][3130/3239]	Time 0.386 (0.532)	Data Time 0.001 (0.011)	Loss 3.5109 (3.6759)	Entropy 1.79797 (1.80476)	Top-1 acc 38.672 (37.419)	Top-5 acc 66.797 (61.459)	lr 0.02439
Train [12][3140/3239]	Time 0.201 (0.532)	Data Time 0.001 (0.011)	Loss 3.6151 (3.6756)	Entropy 1.79790 (1.80473)	Top-1 acc 34.766 (37.430)	Top-5 acc 64.062 (61.468)	lr 0.02439
Train [12][3150/3239]	Time 0.188 (0.532)	Data Time 0.001 (0.011)	Loss 3.6868 (3.6755)	Entropy 1.79782 (1.80471)	Top-1 acc 35.938 (37.436)	Top-5 acc 64.453 (61.471)	lr 0.02439
Train [12][3160/3239]	Time 0.134 (0.531)	Data Time 0.001 (0.010)	Loss 3.8019 (3.6755)	Entropy 1.79779 (1.80469)	Top-1 acc 35.938 (37.435)	Top-5 acc 58.984 (61.470)	lr 0.02439
Train [12][3170/3239]	Time 0.218 (0.531)	Data Time 0.002 (0.010)	Loss 3.4953 (3.6755)	Entropy 1.79776 (1.80467)	Top-1 acc 39.062 (37.434)	Top-5 acc 67.969 (61.469)	lr 0.02439
Train [12][3180/3239]	Time 0.156 (0.531)	Data Time 0.000 (0.010)	Loss 3.7024 (3.6755)	Entropy 1.79776 (1.80465)	Top-1 acc 38.672 (37.433)	Top-5 acc 62.891 (61.470)	lr 0.02439
Train [12][3190/3239]	Time 0.157 (0.530)	Data Time 0.000 (0.010)	Loss 3.6265 (3.6755)	Entropy 1.79771 (1.80463)	Top-1 acc 36.719 (37.431)	Top-5 acc 62.109 (61.473)	lr 0.02439
Train [12][3200/3239]	Time 0.266 (0.530)	Data Time 0.000 (0.010)	Loss 3.6148 (3.6755)	Entropy 1.79770 (1.80460)	Top-1 acc 39.453 (37.435)	Top-5 acc 62.500 (61.474)	lr 0.02439
Train [12][3210/3239]	Time 0.202 (0.529)	Data Time 0.000 (0.010)	Loss 3.5980 (3.6754)	Entropy 1.79761 (1.80458)	Top-1 acc 39.453 (37.436)	Top-5 acc 62.891 (61.473)	lr 0.02439
Train [12][3220/3239]	Time 0.209 (0.529)	Data Time 0.000 (0.010)	Loss 3.5502 (3.6753)	Entropy 1.79761 (1.80456)	Top-1 acc 41.406 (37.438)	Top-5 acc 60.156 (61.473)	lr 0.02439
Train [12][3230/3239]	Time 0.150 (0.529)	Data Time 0.000 (0.010)	Loss 3.7661 (3.6753)	Entropy 1.79749 (1.80454)	Top-1 acc 33.984 (37.437)	Top-5 acc 60.156 (61.474)	lr 0.02439
Train [12][3239/3239]	Time 2.028 (0.528)	Data Time 0.000 (0.010)	Loss 3.8294 (3.6753)	Entropy 1.79749 (1.80452)	Top-1 acc 38.272 (37.436)	Top-5 acc 58.025 (61.473)	lr 0.02439
==========Valid [12/120]	loss 2.488	top-1 acc 46.828 (46.828)	top-5 acc 71.036	Train top-1 37.436	top-5 61.473	Entropy 1.79749	Latency-None: 0.000ms	Flops: 513.10M
Train [13][0/3239]	Time 30.355 (30.355)	Data Time 26.108 (26.108)	Loss 3.6330 (3.6330)	Entropy 1.79744 (1.79744)	Top-1 acc 36.719 (36.719)	Top-5 acc 61.328 (61.328)	lr 0.02439
Train [13][10/3239]	Time 45.891 (7.169)	Data Time 0.001 (2.380)	Loss 3.8859 (3.6442)	Entropy 1.79744 (1.79744)	Top-1 acc 33.203 (37.642)	Top-5 acc 55.469 (61.612)	lr 0.02439
Train [13][20/3239]	Time 0.212 (3.865)	Data Time 0.002 (1.252)	Loss 3.5704 (3.6319)	Entropy 1.79738 (1.79741)	Top-1 acc 38.281 (38.300)	Top-5 acc 67.188 (62.333)	lr 0.02439
Train [13][30/3239]	Time 0.160 (2.753)	Data Time 0.002 (0.849)	Loss 3.7310 (3.6298)	Entropy 1.79723 (1.79736)	Top-1 acc 38.281 (38.571)	Top-5 acc 58.984 (62.550)	lr 0.02439
Train [13][40/3239]	Time 0.195 (2.189)	Data Time 0.001 (0.643)	Loss 3.5565 (3.6464)	Entropy 1.79723 (1.79733)	Top-1 acc 44.141 (38.510)	Top-5 acc 66.406 (62.376)	lr 0.02439
Train [13][50/3239]	Time 0.316 (1.846)	Data Time 0.002 (0.517)	Loss 3.5140 (3.6356)	Entropy 1.79716 (1.79730)	Top-1 acc 41.797 (38.595)	Top-5 acc 65.234 (62.676)	lr 0.02439
Train [13][60/3239]	Time 0.253 (1.614)	Data Time 0.001 (0.433)	Loss 3.5576 (3.6280)	Entropy 1.79710 (1.79727)	Top-1 acc 39.062 (38.518)	Top-5 acc 64.844 (62.961)	lr 0.02439
Train [13][70/3239]	Time 0.223 (1.448)	Data Time 0.001 (0.372)	Loss 3.7265 (3.6295)	Entropy 1.79706 (1.79724)	Top-1 acc 35.156 (38.375)	Top-5 acc 58.594 (62.825)	lr 0.02439
Train [13][80/3239]	Time 0.210 (1.323)	Data Time 0.001 (0.326)	Loss 3.4191 (3.6317)	Entropy 1.79706 (1.79722)	Top-1 acc 45.312 (38.373)	Top-5 acc 66.406 (62.703)	lr 0.02439
Train [13][90/3239]	Time 0.195 (1.224)	Data Time 0.001 (0.291)	Loss 3.8335 (3.6349)	Entropy 1.79695 (1.79720)	Top-1 acc 36.328 (38.268)	Top-5 acc 59.375 (62.612)	lr 0.02439
Train [13][100/3239]	Time 0.221 (1.145)	Data Time 0.005 (0.262)	Loss 3.6125 (3.6314)	Entropy 1.79694 (1.79718)	Top-1 acc 34.375 (38.304)	Top-5 acc 63.672 (62.786)	lr 0.02439
Train [13][110/3239]	Time 0.264 (1.080)	Data Time 0.002 (0.239)	Loss 3.4711 (3.6301)	Entropy 1.79682 (1.79715)	Top-1 acc 43.359 (38.415)	Top-5 acc 66.797 (62.746)	lr 0.02438
Train [13][120/3239]	Time 2.411 (1.028)	Data Time 0.003 (0.219)	Loss 3.8654 (3.6371)	Entropy 1.79682 (1.79713)	Top-1 acc 35.156 (38.355)	Top-5 acc 56.250 (62.552)	lr 0.02438
Train [13][130/3239]	Time 0.196 (0.965)	Data Time 0.001 (0.202)	Loss 3.8402 (3.6390)	Entropy 1.79674 (1.79710)	Top-1 acc 33.203 (38.350)	Top-5 acc 59.766 (62.503)	lr 0.02438
Train [13][140/3239]	Time 0.206 (0.926)	Data Time 0.001 (0.188)	Loss 3.6154 (3.6415)	Entropy 1.79669 (1.79707)	Top-1 acc 41.016 (38.339)	Top-5 acc 64.062 (62.470)	lr 0.02438
Train [13][150/3239]	Time 0.163 (0.893)	Data Time 0.001 (0.176)	Loss 3.5212 (3.6446)	Entropy 1.79669 (1.79704)	Top-1 acc 42.969 (38.204)	Top-5 acc 65.625 (62.402)	lr 0.02438
Train [13][160/3239]	Time 0.200 (0.865)	Data Time 0.001 (0.165)	Loss 3.7031 (3.6439)	Entropy 1.79660 (1.79702)	Top-1 acc 32.812 (38.245)	Top-5 acc 62.109 (62.405)	lr 0.02438
Train [13][170/3239]	Time 0.241 (0.840)	Data Time 0.001 (0.156)	Loss 3.6510 (3.6380)	Entropy 1.79656 (1.79699)	Top-1 acc 33.594 (38.306)	Top-5 acc 63.281 (62.523)	lr 0.02438
Train [13][180/3239]	Time 0.165 (0.818)	Data Time 0.001 (0.147)	Loss 3.7549 (3.6356)	Entropy 1.79655 (1.79697)	Top-1 acc 37.891 (38.393)	Top-5 acc 60.156 (62.513)	lr 0.02438
Train [13][190/3239]	Time 0.227 (0.797)	Data Time 0.001 (0.140)	Loss 3.5231 (3.6327)	Entropy 1.79648 (1.79694)	Top-1 acc 41.797 (38.441)	Top-5 acc 64.062 (62.555)	lr 0.02438
Train [13][200/3239]	Time 0.301 (0.779)	Data Time 0.001 (0.133)	Loss 3.6076 (3.6331)	Entropy 1.79638 (1.79692)	Top-1 acc 37.500 (38.417)	Top-5 acc 64.062 (62.496)	lr 0.02438
Train [13][210/3239]	Time 0.213 (0.762)	Data Time 0.001 (0.127)	Loss 3.8380 (3.6346)	Entropy 1.79632 (1.79689)	Top-1 acc 39.453 (38.422)	Top-5 acc 57.812 (62.419)	lr 0.02438
Train [13][220/3239]	Time 0.193 (0.747)	Data Time 0.001 (0.121)	Loss 3.4085 (3.6320)	Entropy 1.79616 (1.79687)	Top-1 acc 43.750 (38.497)	Top-5 acc 64.844 (62.481)	lr 0.02438
Train [13][230/3239]	Time 2.243 (0.732)	Data Time 0.002 (0.116)	Loss 3.7468 (3.6315)	Entropy 1.79616 (1.79684)	Top-1 acc 34.766 (38.452)	Top-5 acc 62.500 (62.458)	lr 0.02438
Train [13][240/3239]	Time 0.247 (0.711)	Data Time 0.001 (0.111)	Loss 3.5731 (3.6302)	Entropy 1.79621 (1.79681)	Top-1 acc 41.016 (38.451)	Top-5 acc 64.453 (62.487)	lr 0.02438
Train [13][250/3239]	Time 0.235 (0.700)	Data Time 0.001 (0.107)	Loss 3.8005 (3.6305)	Entropy 1.79620 (1.79679)	Top-1 acc 37.891 (38.448)	Top-5 acc 61.719 (62.491)	lr 0.02438
Train [13][260/3239]	Time 0.219 (0.689)	Data Time 0.001 (0.103)	Loss 3.5954 (3.6290)	Entropy 1.79616 (1.79676)	Top-1 acc 37.891 (38.455)	Top-5 acc 66.016 (62.522)	lr 0.02438
Train [13][270/3239]	Time 0.303 (0.680)	Data Time 0.001 (0.099)	Loss 3.4306 (3.6292)	Entropy 1.79612 (1.79674)	Top-1 acc 42.188 (38.430)	Top-5 acc 68.359 (62.487)	lr 0.02438
Train [13][280/3239]	Time 0.182 (0.671)	Data Time 0.001 (0.096)	Loss 3.6311 (3.6292)	Entropy 1.79604 (1.79671)	Top-1 acc 38.281 (38.459)	Top-5 acc 63.672 (62.483)	lr 0.02438
Train [13][290/3239]	Time 0.199 (0.663)	Data Time 0.001 (0.092)	Loss 3.7430 (3.6280)	Entropy 1.79604 (1.79669)	Top-1 acc 33.594 (38.452)	Top-5 acc 58.203 (62.484)	lr 0.02438
Train [13][300/3239]	Time 0.201 (0.655)	Data Time 0.001 (0.089)	Loss 3.5325 (3.6281)	Entropy 1.79601 (1.79667)	Top-1 acc 40.625 (38.411)	Top-5 acc 64.062 (62.469)	lr 0.02438
Train [13][310/3239]	Time 0.236 (0.648)	Data Time 0.002 (0.086)	Loss 3.7395 (3.6273)	Entropy 1.79596 (1.79665)	Top-1 acc 34.766 (38.436)	Top-5 acc 56.641 (62.503)	lr 0.02438
Train [13][320/3239]	Time 0.237 (0.641)	Data Time 0.001 (0.084)	Loss 3.4938 (3.6270)	Entropy 1.79593 (1.79663)	Top-1 acc 46.875 (38.467)	Top-5 acc 64.453 (62.527)	lr 0.02438
Train [13][330/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.081)	Loss 3.6943 (3.6264)	Entropy 1.79593 (1.79661)	Top-1 acc 37.500 (38.504)	Top-5 acc 59.766 (62.532)	lr 0.02438
Train [13][340/3239]	Time 2.390 (0.629)	Data Time 0.001 (0.079)	Loss 3.7489 (3.6250)	Entropy 1.79593 (1.79659)	Top-1 acc 35.156 (38.515)	Top-5 acc 59.375 (62.579)	lr 0.02438
Train [13][350/3239]	Time 0.199 (0.617)	Data Time 0.001 (0.077)	Loss 3.8500 (3.6267)	Entropy 1.79586 (1.79656)	Top-1 acc 33.203 (38.497)	Top-5 acc 56.250 (62.542)	lr 0.02438
Train [13][360/3239]	Time 0.215 (0.612)	Data Time 0.002 (0.075)	Loss 3.7772 (3.6262)	Entropy 1.79583 (1.79654)	Top-1 acc 35.156 (38.505)	Top-5 acc 56.250 (62.548)	lr 0.02438
Train [13][370/3239]	Time 0.155 (0.606)	Data Time 0.001 (0.073)	Loss 3.8738 (3.6274)	Entropy 1.79582 (1.79653)	Top-1 acc 33.984 (38.501)	Top-5 acc 57.031 (62.520)	lr 0.02438
Train [13][380/3239]	Time 0.223 (0.711)	Data Time 0.002 (0.071)	Loss 3.4701 (3.6260)	Entropy 1.79575 (1.79651)	Top-1 acc 41.406 (38.546)	Top-5 acc 65.625 (62.550)	lr 0.02438
Train [13][390/3239]	Time 0.207 (0.705)	Data Time 0.002 (0.070)	Loss 3.5468 (3.6252)	Entropy 1.79567 (1.79649)	Top-1 acc 42.578 (38.564)	Top-5 acc 65.625 (62.564)	lr 0.02438
Train [13][400/3239]	Time 0.203 (0.698)	Data Time 0.001 (0.068)	Loss 3.5407 (3.6249)	Entropy 1.79559 (1.79646)	Top-1 acc 38.281 (38.557)	Top-5 acc 65.234 (62.570)	lr 0.02438
Train [13][410/3239]	Time 0.305 (0.691)	Data Time 0.001 (0.066)	Loss 3.7103 (3.6267)	Entropy 1.79553 (1.79644)	Top-1 acc 35.156 (38.510)	Top-5 acc 60.547 (62.543)	lr 0.02438
Train [13][420/3239]	Time 0.197 (0.685)	Data Time 0.001 (0.065)	Loss 3.6030 (3.6276)	Entropy 1.79551 (1.79642)	Top-1 acc 40.234 (38.506)	Top-5 acc 62.500 (62.515)	lr 0.02438
Train [13][430/3239]	Time 0.211 (0.679)	Data Time 0.001 (0.063)	Loss 3.6246 (3.6280)	Entropy 1.79549 (1.79640)	Top-1 acc 39.844 (38.506)	Top-5 acc 60.156 (62.514)	lr 0.02437
Train [13][440/3239]	Time 0.196 (0.673)	Data Time 0.001 (0.062)	Loss 3.5051 (3.6276)	Entropy 1.79547 (1.79638)	Top-1 acc 38.281 (38.497)	Top-5 acc 68.750 (62.544)	lr 0.02437
Train [13][450/3239]	Time 2.352 (0.668)	Data Time 0.002 (0.061)	Loss 3.5604 (3.6278)	Entropy 1.79547 (1.79636)	Top-1 acc 36.328 (38.498)	Top-5 acc 63.281 (62.528)	lr 0.02437
Train [13][460/3239]	Time 0.228 (0.658)	Data Time 0.002 (0.059)	Loss 3.3759 (3.6270)	Entropy 1.79534 (1.79634)	Top-1 acc 43.750 (38.504)	Top-5 acc 69.922 (62.566)	lr 0.02437
Train [13][470/3239]	Time 0.219 (0.654)	Data Time 0.001 (0.058)	Loss 3.6089 (3.6270)	Entropy 1.79525 (1.79631)	Top-1 acc 39.062 (38.508)	Top-5 acc 63.281 (62.573)	lr 0.02437
Train [13][480/3239]	Time 0.293 (0.649)	Data Time 0.001 (0.057)	Loss 3.6757 (3.6285)	Entropy 1.79524 (1.79629)	Top-1 acc 42.188 (38.510)	Top-5 acc 60.547 (62.550)	lr 0.02437
Train [13][490/3239]	Time 0.191 (0.644)	Data Time 0.001 (0.056)	Loss 3.6920 (3.6285)	Entropy 1.79524 (1.79627)	Top-1 acc 37.109 (38.515)	Top-5 acc 60.156 (62.552)	lr 0.02437
Train [13][500/3239]	Time 0.170 (0.640)	Data Time 0.001 (0.055)	Loss 3.6205 (3.6285)	Entropy 1.79515 (1.79625)	Top-1 acc 36.328 (38.500)	Top-5 acc 61.328 (62.534)	lr 0.02437
Train [13][510/3239]	Time 0.209 (0.636)	Data Time 0.001 (0.054)	Loss 3.7566 (3.6292)	Entropy 1.79514 (1.79623)	Top-1 acc 40.625 (38.494)	Top-5 acc 59.766 (62.522)	lr 0.02437
Train [13][520/3239]	Time 0.250 (0.632)	Data Time 0.001 (0.053)	Loss 3.4917 (3.6304)	Entropy 1.79517 (1.79621)	Top-1 acc 41.797 (38.456)	Top-5 acc 64.844 (62.514)	lr 0.02437
Train [13][530/3239]	Time 0.155 (0.628)	Data Time 0.001 (0.052)	Loss 3.7534 (3.6310)	Entropy 1.79518 (1.79619)	Top-1 acc 37.500 (38.453)	Top-5 acc 60.938 (62.501)	lr 0.02437
Train [13][540/3239]	Time 0.219 (0.624)	Data Time 0.001 (0.051)	Loss 3.5658 (3.6324)	Entropy 1.79511 (1.79617)	Top-1 acc 41.406 (38.433)	Top-5 acc 61.719 (62.471)	lr 0.02437
Train [13][550/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.050)	Loss 3.5982 (3.6336)	Entropy 1.79511 (1.79615)	Top-1 acc 39.453 (38.403)	Top-5 acc 65.625 (62.444)	lr 0.02437
Train [13][560/3239]	Time 2.003 (0.617)	Data Time 0.001 (0.049)	Loss 3.5874 (3.6324)	Entropy 1.79511 (1.79613)	Top-1 acc 40.234 (38.421)	Top-5 acc 64.453 (62.468)	lr 0.02437
Train [13][570/3239]	Time 0.316 (0.610)	Data Time 0.001 (0.048)	Loss 3.4632 (3.6309)	Entropy 1.79510 (1.79611)	Top-1 acc 36.719 (38.441)	Top-5 acc 66.016 (62.518)	lr 0.02437
Train [13][580/3239]	Time 0.226 (0.606)	Data Time 0.001 (0.048)	Loss 3.6507 (3.6315)	Entropy 1.79507 (1.79609)	Top-1 acc 37.891 (38.410)	Top-5 acc 60.938 (62.495)	lr 0.02437
Train [13][590/3239]	Time 0.230 (0.603)	Data Time 0.001 (0.047)	Loss 3.5820 (3.6310)	Entropy 1.79500 (1.79608)	Top-1 acc 41.797 (38.415)	Top-5 acc 67.188 (62.507)	lr 0.02437
Train [13][600/3239]	Time 0.226 (0.600)	Data Time 0.001 (0.046)	Loss 3.7346 (3.6314)	Entropy 1.79494 (1.79606)	Top-1 acc 34.375 (38.409)	Top-5 acc 57.812 (62.490)	lr 0.02437
Train [13][610/3239]	Time 0.193 (0.598)	Data Time 0.002 (0.045)	Loss 3.7135 (3.6321)	Entropy 1.79479 (1.79604)	Top-1 acc 34.766 (38.399)	Top-5 acc 62.500 (62.480)	lr 0.02437
Train [13][620/3239]	Time 0.185 (0.595)	Data Time 0.001 (0.045)	Loss 3.7857 (3.6323)	Entropy 1.79475 (1.79602)	Top-1 acc 34.375 (38.396)	Top-5 acc 60.156 (62.488)	lr 0.02437
Train [13][630/3239]	Time 0.233 (0.592)	Data Time 0.001 (0.044)	Loss 3.6050 (3.6318)	Entropy 1.79470 (1.79600)	Top-1 acc 38.281 (38.402)	Top-5 acc 63.672 (62.502)	lr 0.02437
Train [13][640/3239]	Time 0.280 (0.590)	Data Time 0.001 (0.043)	Loss 3.6061 (3.6331)	Entropy 1.79467 (1.79598)	Top-1 acc 39.844 (38.379)	Top-5 acc 62.891 (62.479)	lr 0.02437
Train [13][650/3239]	Time 0.193 (0.587)	Data Time 0.002 (0.043)	Loss 3.6356 (3.6333)	Entropy 1.79466 (1.79596)	Top-1 acc 41.797 (38.366)	Top-5 acc 62.500 (62.490)	lr 0.02437
Train [13][660/3239]	Time 0.209 (0.585)	Data Time 0.001 (0.042)	Loss 3.5246 (3.6326)	Entropy 1.79462 (1.79594)	Top-1 acc 39.844 (38.378)	Top-5 acc 63.672 (62.502)	lr 0.02437
Train [13][670/3239]	Time 2.227 (0.582)	Data Time 0.001 (0.041)	Loss 3.6666 (3.6332)	Entropy 1.79462 (1.79592)	Top-1 acc 35.938 (38.373)	Top-5 acc 62.109 (62.480)	lr 0.02437
Train [13][680/3239]	Time 0.214 (0.577)	Data Time 0.001 (0.041)	Loss 3.7450 (3.6326)	Entropy 1.79463 (1.79590)	Top-1 acc 35.547 (38.376)	Top-5 acc 58.594 (62.483)	lr 0.02437
Train [13][690/3239]	Time 0.206 (0.575)	Data Time 0.002 (0.040)	Loss 3.8411 (3.6326)	Entropy 1.79458 (1.79588)	Top-1 acc 34.766 (38.378)	Top-5 acc 58.203 (62.489)	lr 0.02437
Train [13][700/3239]	Time 0.158 (0.573)	Data Time 0.002 (0.040)	Loss 3.6929 (3.6317)	Entropy 1.79448 (1.79586)	Top-1 acc 35.938 (38.393)	Top-5 acc 58.984 (62.501)	lr 0.02437
Train [13][710/3239]	Time 0.253 (0.571)	Data Time 0.001 (0.039)	Loss 3.6069 (3.6315)	Entropy 1.79434 (1.79584)	Top-1 acc 42.969 (38.399)	Top-5 acc 64.453 (62.513)	lr 0.02437
Train [13][720/3239]	Time 0.246 (0.569)	Data Time 0.001 (0.039)	Loss 3.8141 (3.6320)	Entropy 1.79434 (1.79582)	Top-1 acc 33.594 (38.379)	Top-5 acc 62.109 (62.519)	lr 0.02437
Train [13][730/3239]	Time 0.215 (0.568)	Data Time 0.001 (0.038)	Loss 3.6494 (3.6316)	Entropy 1.79433 (1.79580)	Top-1 acc 37.109 (38.388)	Top-5 acc 61.328 (62.540)	lr 0.02437
Train [13][740/3239]	Time 0.306 (0.614)	Data Time 0.007 (0.038)	Loss 3.7197 (3.6330)	Entropy 1.79428 (1.79578)	Top-1 acc 37.891 (38.346)	Top-5 acc 60.547 (62.511)	lr 0.02436
Train [13][750/3239]	Time 0.242 (0.614)	Data Time 0.002 (0.037)	Loss 3.7077 (3.6336)	Entropy 1.79417 (1.79576)	Top-1 acc 37.500 (38.339)	Top-5 acc 65.234 (62.499)	lr 0.02436
Train [13][760/3239]	Time 0.186 (0.611)	Data Time 0.002 (0.037)	Loss 3.7643 (3.6337)	Entropy 1.79407 (1.79574)	Top-1 acc 34.375 (38.340)	Top-5 acc 62.891 (62.495)	lr 0.02436
Train [13][770/3239]	Time 0.248 (0.609)	Data Time 0.001 (0.036)	Loss 3.4277 (3.6341)	Entropy 1.79401 (1.79571)	Top-1 acc 44.531 (38.347)	Top-5 acc 67.188 (62.484)	lr 0.02436
Train [13][780/3239]	Time 2.120 (0.606)	Data Time 0.001 (0.036)	Loss 3.6748 (3.6345)	Entropy 1.79401 (1.79569)	Top-1 acc 38.672 (38.336)	Top-5 acc 60.547 (62.470)	lr 0.02436
Train [13][790/3239]	Time 0.258 (0.601)	Data Time 0.002 (0.036)	Loss 3.7314 (3.6349)	Entropy 1.79397 (1.79567)	Top-1 acc 37.891 (38.339)	Top-5 acc 62.891 (62.461)	lr 0.02436
Train [13][800/3239]	Time 0.219 (0.599)	Data Time 0.001 (0.035)	Loss 3.6215 (3.6342)	Entropy 1.79393 (1.79565)	Top-1 acc 36.328 (38.340)	Top-5 acc 66.016 (62.480)	lr 0.02436
Train [13][810/3239]	Time 0.238 (0.597)	Data Time 0.001 (0.035)	Loss 3.7575 (3.6343)	Entropy 1.79393 (1.79563)	Top-1 acc 35.156 (38.334)	Top-5 acc 58.594 (62.462)	lr 0.02436
Train [13][820/3239]	Time 0.217 (0.595)	Data Time 0.001 (0.034)	Loss 3.6080 (3.6343)	Entropy 1.79384 (1.79561)	Top-1 acc 35.156 (38.319)	Top-5 acc 63.281 (62.465)	lr 0.02436
Train [13][830/3239]	Time 0.204 (0.593)	Data Time 0.001 (0.034)	Loss 3.6813 (3.6342)	Entropy 1.79381 (1.79558)	Top-1 acc 37.891 (38.328)	Top-5 acc 63.281 (62.458)	lr 0.02436
Train [13][840/3239]	Time 0.214 (0.591)	Data Time 0.001 (0.034)	Loss 3.5670 (3.6337)	Entropy 1.79376 (1.79556)	Top-1 acc 38.672 (38.340)	Top-5 acc 65.234 (62.467)	lr 0.02436
Train [13][850/3239]	Time 0.144 (0.589)	Data Time 0.001 (0.033)	Loss 3.7233 (3.6337)	Entropy 1.79364 (1.79554)	Top-1 acc 38.281 (38.345)	Top-5 acc 61.719 (62.458)	lr 0.02436
Train [13][860/3239]	Time 0.271 (0.587)	Data Time 0.001 (0.033)	Loss 3.5497 (3.6334)	Entropy 1.79357 (1.79552)	Top-1 acc 41.016 (38.355)	Top-5 acc 64.453 (62.472)	lr 0.02436
Train [13][870/3239]	Time 0.329 (0.585)	Data Time 0.001 (0.033)	Loss 3.5545 (3.6337)	Entropy 1.79356 (1.79550)	Top-1 acc 43.359 (38.366)	Top-5 acc 60.547 (62.461)	lr 0.02436
Train [13][880/3239]	Time 0.198 (0.583)	Data Time 0.001 (0.032)	Loss 3.8305 (3.6342)	Entropy 1.79352 (1.79547)	Top-1 acc 34.375 (38.363)	Top-5 acc 58.594 (62.461)	lr 0.02436
Train [13][890/3239]	Time 2.276 (0.581)	Data Time 0.001 (0.032)	Loss 3.7435 (3.6349)	Entropy 1.79352 (1.79545)	Top-1 acc 33.203 (38.345)	Top-5 acc 59.375 (62.444)	lr 0.02436
Train [13][900/3239]	Time 0.262 (0.577)	Data Time 0.001 (0.032)	Loss 3.5246 (3.6343)	Entropy 1.79351 (1.79543)	Top-1 acc 37.500 (38.356)	Top-5 acc 66.797 (62.450)	lr 0.02436
Train [13][910/3239]	Time 0.201 (0.575)	Data Time 0.001 (0.031)	Loss 3.6323 (3.6342)	Entropy 1.79344 (1.79541)	Top-1 acc 41.797 (38.362)	Top-5 acc 59.766 (62.443)	lr 0.02436
Train [13][920/3239]	Time 0.207 (0.574)	Data Time 0.001 (0.031)	Loss 3.8466 (3.6350)	Entropy 1.79337 (1.79539)	Top-1 acc 33.594 (38.349)	Top-5 acc 59.375 (62.419)	lr 0.02436
Train [13][930/3239]	Time 0.194 (0.572)	Data Time 0.001 (0.031)	Loss 3.6586 (3.6347)	Entropy 1.79334 (1.79537)	Top-1 acc 35.156 (38.354)	Top-5 acc 62.891 (62.427)	lr 0.02436
Train [13][940/3239]	Time 0.243 (0.571)	Data Time 0.001 (0.030)	Loss 3.6872 (3.6339)	Entropy 1.79329 (1.79534)	Top-1 acc 40.625 (38.379)	Top-5 acc 63.281 (62.449)	lr 0.02436
Train [13][950/3239]	Time 0.212 (0.569)	Data Time 0.001 (0.030)	Loss 3.5870 (3.6344)	Entropy 1.79325 (1.79532)	Top-1 acc 37.109 (38.368)	Top-5 acc 62.500 (62.440)	lr 0.02436
Train [13][960/3239]	Time 0.287 (0.568)	Data Time 0.001 (0.030)	Loss 3.5970 (3.6331)	Entropy 1.79324 (1.79530)	Top-1 acc 39.844 (38.397)	Top-5 acc 62.109 (62.465)	lr 0.02436
Train [13][970/3239]	Time 0.223 (0.566)	Data Time 0.001 (0.029)	Loss 3.7365 (3.6331)	Entropy 1.79321 (1.79528)	Top-1 acc 36.719 (38.396)	Top-5 acc 58.203 (62.469)	lr 0.02436
Train [13][980/3239]	Time 0.206 (0.565)	Data Time 0.001 (0.029)	Loss 3.6045 (3.6334)	Entropy 1.79317 (1.79526)	Top-1 acc 41.406 (38.386)	Top-5 acc 64.062 (62.462)	lr 0.02436
Train [13][990/3239]	Time 0.268 (0.563)	Data Time 0.001 (0.029)	Loss 3.5212 (3.6338)	Entropy 1.79315 (1.79524)	Top-1 acc 38.672 (38.381)	Top-5 acc 66.406 (62.456)	lr 0.02436
Train [13][1000/3239]	Time 2.248 (0.562)	Data Time 0.002 (0.029)	Loss 3.5501 (3.6332)	Entropy 1.79315 (1.79522)	Top-1 acc 37.891 (38.392)	Top-5 acc 60.938 (62.464)	lr 0.02436
Train [13][1010/3239]	Time 0.146 (0.559)	Data Time 0.001 (0.028)	Loss 3.7739 (3.6331)	Entropy 1.79314 (1.79520)	Top-1 acc 37.109 (38.389)	Top-5 acc 57.812 (62.461)	lr 0.02436
Train [13][1020/3239]	Time 0.187 (0.557)	Data Time 0.001 (0.028)	Loss 3.7121 (3.6329)	Entropy 1.79311 (1.79517)	Top-1 acc 38.672 (38.395)	Top-5 acc 63.281 (62.463)	lr 0.02436
Train [13][1030/3239]	Time 0.245 (0.556)	Data Time 0.006 (0.028)	Loss 3.5287 (3.6327)	Entropy 1.79295 (1.79515)	Top-1 acc 39.453 (38.394)	Top-5 acc 65.234 (62.456)	lr 0.02436
Train [13][1040/3239]	Time 0.181 (0.555)	Data Time 0.001 (0.028)	Loss 3.7069 (3.6335)	Entropy 1.79293 (1.79513)	Top-1 acc 36.328 (38.380)	Top-5 acc 57.812 (62.434)	lr 0.02436
Train [13][1050/3239]	Time 0.175 (0.554)	Data Time 0.002 (0.027)	Loss 3.7044 (3.6333)	Entropy 1.79293 (1.79511)	Top-1 acc 37.891 (38.383)	Top-5 acc 60.938 (62.438)	lr 0.02436
Train [13][1060/3239]	Time 0.159 (0.552)	Data Time 0.001 (0.027)	Loss 3.8657 (3.6335)	Entropy 1.79281 (1.79509)	Top-1 acc 31.641 (38.374)	Top-5 acc 56.641 (62.431)	lr 0.02435
Train [13][1070/3239]	Time 0.146 (0.551)	Data Time 0.001 (0.027)	Loss 3.3816 (3.6331)	Entropy 1.79278 (1.79507)	Top-1 acc 44.531 (38.384)	Top-5 acc 67.578 (62.436)	lr 0.02435
Train [13][1080/3239]	Time 0.182 (0.550)	Data Time 0.001 (0.027)	Loss 3.4891 (3.6326)	Entropy 1.79272 (1.79505)	Top-1 acc 38.672 (38.387)	Top-5 acc 62.109 (62.435)	lr 0.02435
Train [13][1090/3239]	Time 0.228 (0.549)	Data Time 0.001 (0.027)	Loss 3.6290 (3.6322)	Entropy 1.79267 (1.79503)	Top-1 acc 36.719 (38.390)	Top-5 acc 61.719 (62.443)	lr 0.02435
Train [13][1100/3239]	Time 0.259 (0.585)	Data Time 0.003 (0.026)	Loss 3.5661 (3.6324)	Entropy 1.79251 (1.79500)	Top-1 acc 40.234 (38.387)	Top-5 acc 62.109 (62.438)	lr 0.02435
Train [13][1110/3239]	Time 2.464 (0.583)	Data Time 0.002 (0.026)	Loss 3.5716 (3.6326)	Entropy 1.79251 (1.79498)	Top-1 acc 37.109 (38.376)	Top-5 acc 62.500 (62.426)	lr 0.02435
Train [13][1120/3239]	Time 0.201 (0.580)	Data Time 0.001 (0.026)	Loss 3.8668 (3.6328)	Entropy 1.79245 (1.79496)	Top-1 acc 35.938 (38.383)	Top-5 acc 60.156 (62.429)	lr 0.02435
Train [13][1130/3239]	Time 0.204 (0.579)	Data Time 0.001 (0.026)	Loss 3.6186 (3.6325)	Entropy 1.79236 (1.79494)	Top-1 acc 35.156 (38.392)	Top-5 acc 62.891 (62.436)	lr 0.02435
Train [13][1140/3239]	Time 0.221 (0.577)	Data Time 0.001 (0.026)	Loss 3.5967 (3.6325)	Entropy 1.79230 (1.79491)	Top-1 acc 39.453 (38.380)	Top-5 acc 63.672 (62.435)	lr 0.02435
Train [13][1150/3239]	Time 0.219 (0.576)	Data Time 0.001 (0.025)	Loss 3.6800 (3.6324)	Entropy 1.79229 (1.79489)	Top-1 acc 33.203 (38.388)	Top-5 acc 62.891 (62.438)	lr 0.02435
Train [13][1160/3239]	Time 0.185 (0.575)	Data Time 0.001 (0.025)	Loss 3.5915 (3.6332)	Entropy 1.79229 (1.79487)	Top-1 acc 39.453 (38.372)	Top-5 acc 63.281 (62.422)	lr 0.02435
Train [13][1170/3239]	Time 0.172 (0.573)	Data Time 0.001 (0.025)	Loss 3.6163 (3.6337)	Entropy 1.79224 (1.79485)	Top-1 acc 38.281 (38.357)	Top-5 acc 62.109 (62.410)	lr 0.02435
Train [13][1180/3239]	Time 0.268 (0.572)	Data Time 0.001 (0.025)	Loss 3.5015 (3.6342)	Entropy 1.79219 (1.79482)	Top-1 acc 40.625 (38.353)	Top-5 acc 64.844 (62.406)	lr 0.02435
Train [13][1190/3239]	Time 0.143 (0.571)	Data Time 0.002 (0.025)	Loss 3.6201 (3.6339)	Entropy 1.79216 (1.79480)	Top-1 acc 41.016 (38.366)	Top-5 acc 63.281 (62.410)	lr 0.02435
Train [13][1200/3239]	Time 0.198 (0.570)	Data Time 0.001 (0.024)	Loss 3.7617 (3.6347)	Entropy 1.79210 (1.79478)	Top-1 acc 36.328 (38.349)	Top-5 acc 60.547 (62.387)	lr 0.02435
Train [13][1210/3239]	Time 0.235 (0.569)	Data Time 0.001 (0.024)	Loss 3.6067 (3.6343)	Entropy 1.79205 (1.79476)	Top-1 acc 41.016 (38.364)	Top-5 acc 62.891 (62.403)	lr 0.02435
Train [13][1220/3239]	Time 2.296 (0.567)	Data Time 0.001 (0.024)	Loss 3.7619 (3.6343)	Entropy 1.79205 (1.79474)	Top-1 acc 38.672 (38.365)	Top-5 acc 57.031 (62.400)	lr 0.02435
Train [13][1230/3239]	Time 0.228 (0.565)	Data Time 0.001 (0.024)	Loss 3.7391 (3.6346)	Entropy 1.79206 (1.79471)	Top-1 acc 33.984 (38.356)	Top-5 acc 60.547 (62.396)	lr 0.02435
Train [13][1240/3239]	Time 0.211 (0.563)	Data Time 0.001 (0.024)	Loss 3.5854 (3.6349)	Entropy 1.79200 (1.79469)	Top-1 acc 37.891 (38.346)	Top-5 acc 64.062 (62.387)	lr 0.02435
Train [13][1250/3239]	Time 0.231 (0.562)	Data Time 0.001 (0.024)	Loss 3.4458 (3.6344)	Entropy 1.79193 (1.79467)	Top-1 acc 40.625 (38.360)	Top-5 acc 65.234 (62.393)	lr 0.02435
Train [13][1260/3239]	Time 0.269 (0.561)	Data Time 0.001 (0.023)	Loss 3.7575 (3.6344)	Entropy 1.79193 (1.79465)	Top-1 acc 38.281 (38.357)	Top-5 acc 59.375 (62.397)	lr 0.02435
Train [13][1270/3239]	Time 0.164 (0.560)	Data Time 0.001 (0.023)	Loss 3.8042 (3.6346)	Entropy 1.79184 (1.79463)	Top-1 acc 32.812 (38.355)	Top-5 acc 55.078 (62.386)	lr 0.02435
Train [13][1280/3239]	Time 0.207 (0.559)	Data Time 0.001 (0.023)	Loss 3.5047 (3.6342)	Entropy 1.79180 (1.79460)	Top-1 acc 41.016 (38.363)	Top-5 acc 62.500 (62.392)	lr 0.02435
Train [13][1290/3239]	Time 0.225 (0.558)	Data Time 0.001 (0.023)	Loss 3.4529 (3.6337)	Entropy 1.79167 (1.79458)	Top-1 acc 42.969 (38.367)	Top-5 acc 66.797 (62.410)	lr 0.02435
Train [13][1300/3239]	Time 0.162 (0.557)	Data Time 0.002 (0.023)	Loss 3.6639 (3.6342)	Entropy 1.79163 (1.79456)	Top-1 acc 38.281 (38.351)	Top-5 acc 60.938 (62.392)	lr 0.02435
Train [13][1310/3239]	Time 0.224 (0.556)	Data Time 0.001 (0.023)	Loss 3.6245 (3.6341)	Entropy 1.79155 (1.79454)	Top-1 acc 38.281 (38.353)	Top-5 acc 61.719 (62.400)	lr 0.02435
Train [13][1320/3239]	Time 0.178 (0.555)	Data Time 0.001 (0.022)	Loss 3.5606 (3.6341)	Entropy 1.79155 (1.79451)	Top-1 acc 41.016 (38.357)	Top-5 acc 66.016 (62.398)	lr 0.02435
Train [13][1330/3239]	Time 2.330 (0.554)	Data Time 0.001 (0.022)	Loss 3.7332 (3.6339)	Entropy 1.79155 (1.79449)	Top-1 acc 33.594 (38.352)	Top-5 acc 60.938 (62.403)	lr 0.02435
Train [13][1340/3239]	Time 0.279 (0.552)	Data Time 0.001 (0.022)	Loss 3.4896 (3.6339)	Entropy 1.79153 (1.79447)	Top-1 acc 39.844 (38.357)	Top-5 acc 66.016 (62.404)	lr 0.02435
Train [13][1350/3239]	Time 0.186 (0.551)	Data Time 0.001 (0.022)	Loss 3.5872 (3.6342)	Entropy 1.79145 (1.79445)	Top-1 acc 41.797 (38.348)	Top-5 acc 66.797 (62.398)	lr 0.02435
Train [13][1360/3239]	Time 0.195 (0.550)	Data Time 0.001 (0.022)	Loss 3.7747 (3.6341)	Entropy 1.79143 (1.79443)	Top-1 acc 34.375 (38.352)	Top-5 acc 57.031 (62.395)	lr 0.02435
Train [13][1370/3239]	Time 0.198 (0.549)	Data Time 0.001 (0.022)	Loss 3.5790 (3.6338)	Entropy 1.79142 (1.79440)	Top-1 acc 37.891 (38.358)	Top-5 acc 60.547 (62.399)	lr 0.02434
Train [13][1380/3239]	Time 0.196 (0.548)	Data Time 0.002 (0.022)	Loss 3.6909 (3.6337)	Entropy 1.79133 (1.79438)	Top-1 acc 37.109 (38.361)	Top-5 acc 58.594 (62.392)	lr 0.02434
Train [13][1390/3239]	Time 0.271 (0.547)	Data Time 0.001 (0.022)	Loss 3.8599 (3.6343)	Entropy 1.79134 (1.79436)	Top-1 acc 32.812 (38.338)	Top-5 acc 59.766 (62.386)	lr 0.02434
Train [13][1400/3239]	Time 0.240 (0.546)	Data Time 0.001 (0.021)	Loss 3.5601 (3.6341)	Entropy 1.79119 (1.79434)	Top-1 acc 39.062 (38.344)	Top-5 acc 64.844 (62.388)	lr 0.02434
Train [13][1410/3239]	Time 0.283 (0.545)	Data Time 0.001 (0.021)	Loss 3.4903 (3.6342)	Entropy 1.79113 (1.79432)	Top-1 acc 43.359 (38.345)	Top-5 acc 63.281 (62.390)	lr 0.02434
Train [13][1420/3239]	Time 0.195 (0.544)	Data Time 0.001 (0.021)	Loss 3.7094 (3.6348)	Entropy 1.79103 (1.79429)	Top-1 acc 35.547 (38.334)	Top-5 acc 63.281 (62.377)	lr 0.02434
Train [13][1430/3239]	Time 0.199 (0.543)	Data Time 0.001 (0.021)	Loss 3.4681 (3.6346)	Entropy 1.79097 (1.79427)	Top-1 acc 42.969 (38.344)	Top-5 acc 66.797 (62.388)	lr 0.02434
Train [13][1440/3239]	Time 2.224 (0.542)	Data Time 0.001 (0.021)	Loss 3.7165 (3.6347)	Entropy 1.79097 (1.79425)	Top-1 acc 33.594 (38.347)	Top-5 acc 59.375 (62.388)	lr 0.02434
Train [13][1450/3239]	Time 0.224 (0.540)	Data Time 0.002 (0.021)	Loss 3.6709 (3.6348)	Entropy 1.79095 (1.79422)	Top-1 acc 34.375 (38.341)	Top-5 acc 62.891 (62.384)	lr 0.02434
Train [13][1460/3239]	Time 0.178 (0.539)	Data Time 0.001 (0.021)	Loss 3.7537 (3.6350)	Entropy 1.79081 (1.79420)	Top-1 acc 36.719 (38.343)	Top-5 acc 58.984 (62.377)	lr 0.02434
Train [13][1470/3239]	Time 0.202 (0.565)	Data Time 0.002 (0.020)	Loss 3.4820 (3.6348)	Entropy 1.79077 (1.79418)	Top-1 acc 39.453 (38.344)	Top-5 acc 67.188 (62.379)	lr 0.02434
Train [13][1480/3239]	Time 0.217 (0.564)	Data Time 0.002 (0.020)	Loss 3.6228 (3.6347)	Entropy 1.79074 (1.79415)	Top-1 acc 36.328 (38.340)	Top-5 acc 64.453 (62.383)	lr 0.02434
Train [13][1490/3239]	Time 0.212 (0.563)	Data Time 0.001 (0.020)	Loss 3.7926 (3.6350)	Entropy 1.79068 (1.79413)	Top-1 acc 37.109 (38.333)	Top-5 acc 60.547 (62.382)	lr 0.02434
Train [13][1500/3239]	Time 0.214 (0.562)	Data Time 0.001 (0.020)	Loss 3.7142 (3.6350)	Entropy 1.79058 (1.79411)	Top-1 acc 36.719 (38.339)	Top-5 acc 64.062 (62.386)	lr 0.02434
Train [13][1510/3239]	Time 0.217 (0.561)	Data Time 0.002 (0.020)	Loss 3.6657 (3.6348)	Entropy 1.79055 (1.79408)	Top-1 acc 34.766 (38.340)	Top-5 acc 61.719 (62.395)	lr 0.02434
Train [13][1520/3239]	Time 0.198 (0.560)	Data Time 0.001 (0.020)	Loss 3.6448 (3.6345)	Entropy 1.79049 (1.79406)	Top-1 acc 41.016 (38.348)	Top-5 acc 60.156 (62.399)	lr 0.02434
Train [13][1530/3239]	Time 0.224 (0.560)	Data Time 0.001 (0.020)	Loss 3.6613 (3.6343)	Entropy 1.79047 (1.79404)	Top-1 acc 35.938 (38.353)	Top-5 acc 62.500 (62.411)	lr 0.02434
Train [13][1540/3239]	Time 0.282 (0.559)	Data Time 0.001 (0.020)	Loss 3.7370 (3.6346)	Entropy 1.79038 (1.79401)	Top-1 acc 36.719 (38.343)	Top-5 acc 62.109 (62.405)	lr 0.02434
Train [13][1550/3239]	Time 2.254 (0.558)	Data Time 0.002 (0.019)	Loss 3.6382 (3.6347)	Entropy 1.79038 (1.79399)	Top-1 acc 39.453 (38.344)	Top-5 acc 64.453 (62.403)	lr 0.02434
Train [13][1560/3239]	Time 0.219 (0.556)	Data Time 0.001 (0.019)	Loss 3.6790 (3.6348)	Entropy 1.79032 (1.79397)	Top-1 acc 38.281 (38.344)	Top-5 acc 62.891 (62.401)	lr 0.02434
Train [13][1570/3239]	Time 0.321 (0.555)	Data Time 0.001 (0.019)	Loss 3.7263 (3.6345)	Entropy 1.79022 (1.79394)	Top-1 acc 37.500 (38.348)	Top-5 acc 60.547 (62.406)	lr 0.02434
Train [13][1580/3239]	Time 0.181 (0.554)	Data Time 0.001 (0.019)	Loss 3.6947 (3.6348)	Entropy 1.79014 (1.79392)	Top-1 acc 38.281 (38.356)	Top-5 acc 60.547 (62.397)	lr 0.02434
Train [13][1590/3239]	Time 0.173 (0.553)	Data Time 0.001 (0.019)	Loss 3.7826 (3.6348)	Entropy 1.79011 (1.79390)	Top-1 acc 36.328 (38.353)	Top-5 acc 60.156 (62.393)	lr 0.02434
Train [13][1600/3239]	Time 0.156 (0.552)	Data Time 0.001 (0.019)	Loss 3.4927 (3.6349)	Entropy 1.79011 (1.79387)	Top-1 acc 38.672 (38.354)	Top-5 acc 64.844 (62.395)	lr 0.02434
Train [13][1610/3239]	Time 0.168 (0.552)	Data Time 0.001 (0.019)	Loss 3.4442 (3.6349)	Entropy 1.79005 (1.79385)	Top-1 acc 44.531 (38.352)	Top-5 acc 70.703 (62.396)	lr 0.02434
Train [13][1620/3239]	Time 0.216 (0.551)	Data Time 0.001 (0.019)	Loss 3.5873 (3.6350)	Entropy 1.78992 (1.79383)	Top-1 acc 39.453 (38.353)	Top-5 acc 64.453 (62.401)	lr 0.02434
Train [13][1630/3239]	Time 0.186 (0.550)	Data Time 0.001 (0.019)	Loss 3.4021 (3.6345)	Entropy 1.78991 (1.79380)	Top-1 acc 44.922 (38.365)	Top-5 acc 66.406 (62.409)	lr 0.02434
Train [13][1640/3239]	Time 0.212 (0.549)	Data Time 0.001 (0.019)	Loss 3.8395 (3.6342)	Entropy 1.78992 (1.79378)	Top-1 acc 36.328 (38.369)	Top-5 acc 56.250 (62.417)	lr 0.02434
Train [13][1650/3239]	Time 0.221 (0.549)	Data Time 0.001 (0.018)	Loss 3.7711 (3.6344)	Entropy 1.78991 (1.79375)	Top-1 acc 32.031 (38.366)	Top-5 acc 58.203 (62.414)	lr 0.02434
Train [13][1660/3239]	Time 2.422 (0.548)	Data Time 0.001 (0.018)	Loss 3.6871 (3.6341)	Entropy 1.78991 (1.79373)	Top-1 acc 36.719 (38.361)	Top-5 acc 62.891 (62.419)	lr 0.02434
Train [13][1670/3239]	Time 0.216 (0.546)	Data Time 0.002 (0.018)	Loss 3.5287 (3.6336)	Entropy 1.78989 (1.79371)	Top-1 acc 41.016 (38.370)	Top-5 acc 60.547 (62.426)	lr 0.02434
Train [13][1680/3239]	Time 0.232 (0.545)	Data Time 0.001 (0.018)	Loss 3.7168 (3.6338)	Entropy 1.78985 (1.79369)	Top-1 acc 34.766 (38.360)	Top-5 acc 60.156 (62.416)	lr 0.02433
Train [13][1690/3239]	Time 0.193 (0.544)	Data Time 0.001 (0.018)	Loss 3.4692 (3.6339)	Entropy 1.78988 (1.79366)	Top-1 acc 38.281 (38.355)	Top-5 acc 66.797 (62.412)	lr 0.02433
Train [13][1700/3239]	Time 0.205 (0.544)	Data Time 0.001 (0.018)	Loss 3.4915 (3.6339)	Entropy 1.78984 (1.79364)	Top-1 acc 42.578 (38.352)	Top-5 acc 66.797 (62.414)	lr 0.02433
Train [13][1710/3239]	Time 0.204 (0.543)	Data Time 0.001 (0.018)	Loss 3.8044 (3.6341)	Entropy 1.78983 (1.79362)	Top-1 acc 38.672 (38.352)	Top-5 acc 60.938 (62.415)	lr 0.02433
Train [13][1720/3239]	Time 0.202 (0.542)	Data Time 0.001 (0.018)	Loss 3.6555 (3.6339)	Entropy 1.78982 (1.79360)	Top-1 acc 36.328 (38.350)	Top-5 acc 59.766 (62.417)	lr 0.02433
Train [13][1730/3239]	Time 0.160 (0.542)	Data Time 0.001 (0.018)	Loss 3.7572 (3.6339)	Entropy 1.78973 (1.79357)	Top-1 acc 37.500 (38.353)	Top-5 acc 62.109 (62.417)	lr 0.02433
Train [13][1740/3239]	Time 0.291 (0.541)	Data Time 0.001 (0.018)	Loss 3.4989 (3.6337)	Entropy 1.78969 (1.79355)	Top-1 acc 42.578 (38.357)	Top-5 acc 65.234 (62.423)	lr 0.02433
Train [13][1750/3239]	Time 0.219 (0.540)	Data Time 0.001 (0.018)	Loss 3.3704 (3.6334)	Entropy 1.78968 (1.79353)	Top-1 acc 45.312 (38.367)	Top-5 acc 69.531 (62.432)	lr 0.02433
Train [13][1760/3239]	Time 0.168 (0.539)	Data Time 0.001 (0.017)	Loss 3.8193 (3.6339)	Entropy 1.78964 (1.79351)	Top-1 acc 39.062 (38.365)	Top-5 acc 60.547 (62.422)	lr 0.02433
Train [13][1770/3239]	Time 2.242 (0.539)	Data Time 0.001 (0.017)	Loss 3.6793 (3.6341)	Entropy 1.78964 (1.79349)	Top-1 acc 35.547 (38.367)	Top-5 acc 61.719 (62.418)	lr 0.02433
Train [13][1780/3239]	Time 0.204 (0.537)	Data Time 0.001 (0.017)	Loss 3.8179 (3.6339)	Entropy 1.78960 (1.79346)	Top-1 acc 35.156 (38.369)	Top-5 acc 57.812 (62.418)	lr 0.02433
Train [13][1790/3239]	Time 0.186 (0.536)	Data Time 0.001 (0.017)	Loss 3.7649 (3.6339)	Entropy 1.78955 (1.79344)	Top-1 acc 33.984 (38.366)	Top-5 acc 59.766 (62.418)	lr 0.02433
Train [13][1800/3239]	Time 0.156 (0.536)	Data Time 0.001 (0.017)	Loss 3.8230 (3.6342)	Entropy 1.78958 (1.79342)	Top-1 acc 33.203 (38.364)	Top-5 acc 56.641 (62.407)	lr 0.02433
Train [13][1810/3239]	Time 0.211 (0.535)	Data Time 0.001 (0.017)	Loss 3.6886 (3.6341)	Entropy 1.78952 (1.79340)	Top-1 acc 37.500 (38.364)	Top-5 acc 60.156 (62.411)	lr 0.02433
Train [13][1820/3239]	Time 0.308 (0.534)	Data Time 0.001 (0.017)	Loss 3.5530 (3.6340)	Entropy 1.78946 (1.79338)	Top-1 acc 42.188 (38.366)	Top-5 acc 64.453 (62.413)	lr 0.02433
Train [13][1830/3239]	Time 0.328 (0.555)	Data Time 0.003 (0.017)	Loss 3.7037 (3.6338)	Entropy 1.78944 (1.79336)	Top-1 acc 35.547 (38.365)	Top-5 acc 58.984 (62.413)	lr 0.02433
Train [13][1840/3239]	Time 0.216 (0.555)	Data Time 0.002 (0.017)	Loss 3.7477 (3.6337)	Entropy 1.78940 (1.79334)	Top-1 acc 33.203 (38.365)	Top-5 acc 61.719 (62.414)	lr 0.02433
Train [13][1850/3239]	Time 0.218 (0.554)	Data Time 0.002 (0.017)	Loss 3.6569 (3.6338)	Entropy 1.78940 (1.79331)	Top-1 acc 35.156 (38.356)	Top-5 acc 61.328 (62.408)	lr 0.02433
Train [13][1860/3239]	Time 0.222 (0.554)	Data Time 0.002 (0.017)	Loss 3.5651 (3.6339)	Entropy 1.78937 (1.79329)	Top-1 acc 37.500 (38.350)	Top-5 acc 62.109 (62.405)	lr 0.02433
Train [13][1870/3239]	Time 0.240 (0.553)	Data Time 0.001 (0.017)	Loss 3.6598 (3.6339)	Entropy 1.78936 (1.79327)	Top-1 acc 40.625 (38.351)	Top-5 acc 65.234 (62.402)	lr 0.02433
Train [13][1880/3239]	Time 2.323 (0.553)	Data Time 0.001 (0.016)	Loss 3.6738 (3.6339)	Entropy 1.78936 (1.79325)	Top-1 acc 38.281 (38.354)	Top-5 acc 60.547 (62.403)	lr 0.02433
Train [13][1890/3239]	Time 0.231 (0.551)	Data Time 0.002 (0.016)	Loss 3.6362 (3.6342)	Entropy 1.78933 (1.79323)	Top-1 acc 36.328 (38.348)	Top-5 acc 60.547 (62.391)	lr 0.02433
Train [13][1900/3239]	Time 0.378 (0.550)	Data Time 0.001 (0.016)	Loss 3.6596 (3.6343)	Entropy 1.78930 (1.79321)	Top-1 acc 38.281 (38.347)	Top-5 acc 62.891 (62.387)	lr 0.02433
Train [13][1910/3239]	Time 0.225 (0.550)	Data Time 0.001 (0.016)	Loss 3.5712 (3.6343)	Entropy 1.78928 (1.79319)	Top-1 acc 39.844 (38.347)	Top-5 acc 65.625 (62.389)	lr 0.02433
Train [13][1920/3239]	Time 0.216 (0.549)	Data Time 0.001 (0.016)	Loss 3.6651 (3.6343)	Entropy 1.78923 (1.79317)	Top-1 acc 41.016 (38.349)	Top-5 acc 62.500 (62.391)	lr 0.02433
Train [13][1930/3239]	Time 0.245 (0.548)	Data Time 0.001 (0.016)	Loss 3.7549 (3.6345)	Entropy 1.78921 (1.79315)	Top-1 acc 34.375 (38.343)	Top-5 acc 62.109 (62.387)	lr 0.02433
Train [13][1940/3239]	Time 0.211 (0.548)	Data Time 0.001 (0.016)	Loss 3.8211 (3.6346)	Entropy 1.78919 (1.79313)	Top-1 acc 32.812 (38.333)	Top-5 acc 58.594 (62.387)	lr 0.02433
Train [13][1950/3239]	Time 0.221 (0.547)	Data Time 0.002 (0.016)	Loss 3.6578 (3.6347)	Entropy 1.78912 (1.79311)	Top-1 acc 35.938 (38.334)	Top-5 acc 62.891 (62.383)	lr 0.02433
Train [13][1960/3239]	Time 0.220 (0.547)	Data Time 0.001 (0.016)	Loss 3.3981 (3.6345)	Entropy 1.78908 (1.79309)	Top-1 acc 43.359 (38.336)	Top-5 acc 66.797 (62.390)	lr 0.02433
Train [13][1970/3239]	Time 0.299 (0.546)	Data Time 0.001 (0.016)	Loss 3.6218 (3.6343)	Entropy 1.78898 (1.79307)	Top-1 acc 41.797 (38.339)	Top-5 acc 61.328 (62.392)	lr 0.02433
Train [13][1980/3239]	Time 0.249 (0.546)	Data Time 0.001 (0.016)	Loss 3.6620 (3.6342)	Entropy 1.78886 (1.79305)	Top-1 acc 39.062 (38.341)	Top-5 acc 62.109 (62.395)	lr 0.02432
Train [13][1990/3239]	Time 2.260 (0.545)	Data Time 0.001 (0.016)	Loss 3.6932 (3.6343)	Entropy 1.78886 (1.79302)	Top-1 acc 33.984 (38.336)	Top-5 acc 57.422 (62.387)	lr 0.02432
Train [13][2000/3239]	Time 0.195 (0.543)	Data Time 0.001 (0.016)	Loss 3.9450 (3.6347)	Entropy 1.78874 (1.79300)	Top-1 acc 36.328 (38.330)	Top-5 acc 55.469 (62.384)	lr 0.02432
Train [13][2010/3239]	Time 0.190 (0.543)	Data Time 0.001 (0.016)	Loss 3.5063 (3.6345)	Entropy 1.78871 (1.79298)	Top-1 acc 39.453 (38.332)	Top-5 acc 66.016 (62.387)	lr 0.02432
Train [13][2020/3239]	Time 0.188 (0.542)	Data Time 0.001 (0.015)	Loss 3.7825 (3.6345)	Entropy 1.78869 (1.79296)	Top-1 acc 30.859 (38.331)	Top-5 acc 58.594 (62.386)	lr 0.02432
Train [13][2030/3239]	Time 0.213 (0.541)	Data Time 0.001 (0.015)	Loss 3.5781 (3.6347)	Entropy 1.78868 (1.79294)	Top-1 acc 40.625 (38.330)	Top-5 acc 64.062 (62.383)	lr 0.02432
Train [13][2040/3239]	Time 0.201 (0.541)	Data Time 0.001 (0.015)	Loss 3.6238 (3.6346)	Entropy 1.78864 (1.79292)	Top-1 acc 34.766 (38.334)	Top-5 acc 61.328 (62.382)	lr 0.02432
Train [13][2050/3239]	Time 0.139 (0.540)	Data Time 0.001 (0.015)	Loss 3.6945 (3.6345)	Entropy 1.78856 (1.79290)	Top-1 acc 37.891 (38.335)	Top-5 acc 60.156 (62.379)	lr 0.02432
Train [13][2060/3239]	Time 0.230 (0.540)	Data Time 0.001 (0.015)	Loss 3.5076 (3.6344)	Entropy 1.78843 (1.79288)	Top-1 acc 41.406 (38.339)	Top-5 acc 66.016 (62.385)	lr 0.02432
Train [13][2070/3239]	Time 0.237 (0.539)	Data Time 0.001 (0.015)	Loss 3.9587 (3.6344)	Entropy 1.78842 (1.79285)	Top-1 acc 32.812 (38.337)	Top-5 acc 53.125 (62.384)	lr 0.02432
Train [13][2080/3239]	Time 0.223 (0.539)	Data Time 0.001 (0.015)	Loss 3.8013 (3.6345)	Entropy 1.78840 (1.79283)	Top-1 acc 35.547 (38.332)	Top-5 acc 58.984 (62.382)	lr 0.02432
Train [13][2090/3239]	Time 0.234 (0.538)	Data Time 0.001 (0.015)	Loss 3.5924 (3.6348)	Entropy 1.78837 (1.79281)	Top-1 acc 42.578 (38.325)	Top-5 acc 63.281 (62.372)	lr 0.02432
Train [13][2100/3239]	Time 2.344 (0.537)	Data Time 0.002 (0.015)	Loss 3.4685 (3.6346)	Entropy 1.78837 (1.79279)	Top-1 acc 42.578 (38.328)	Top-5 acc 65.234 (62.375)	lr 0.02432
Train [13][2110/3239]	Time 0.162 (0.536)	Data Time 0.001 (0.015)	Loss 3.6584 (3.6344)	Entropy 1.78836 (1.79277)	Top-1 acc 39.453 (38.332)	Top-5 acc 64.062 (62.381)	lr 0.02432
Train [13][2120/3239]	Time 0.212 (0.535)	Data Time 0.002 (0.015)	Loss 3.6070 (3.6345)	Entropy 1.78819 (1.79275)	Top-1 acc 40.625 (38.334)	Top-5 acc 63.672 (62.385)	lr 0.02432
Train [13][2130/3239]	Time 0.324 (0.535)	Data Time 0.001 (0.015)	Loss 3.6086 (3.6346)	Entropy 1.78817 (1.79273)	Top-1 acc 41.016 (38.327)	Top-5 acc 59.766 (62.383)	lr 0.02432
Train [13][2140/3239]	Time 0.198 (0.534)	Data Time 0.002 (0.015)	Loss 3.4819 (3.6344)	Entropy 1.78816 (1.79271)	Top-1 acc 39.453 (38.327)	Top-5 acc 64.844 (62.388)	lr 0.02432
Train [13][2150/3239]	Time 0.243 (0.534)	Data Time 0.001 (0.015)	Loss 3.5324 (3.6345)	Entropy 1.78814 (1.79268)	Top-1 acc 36.328 (38.326)	Top-5 acc 66.406 (62.384)	lr 0.02432
Train [13][2160/3239]	Time 0.211 (0.533)	Data Time 0.001 (0.015)	Loss 3.6082 (3.6346)	Entropy 1.78809 (1.79266)	Top-1 acc 36.719 (38.323)	Top-5 acc 62.891 (62.378)	lr 0.02432
Train [13][2170/3239]	Time 0.161 (0.533)	Data Time 0.001 (0.015)	Loss 3.5140 (3.6346)	Entropy 1.78806 (1.79264)	Top-1 acc 35.938 (38.317)	Top-5 acc 69.141 (62.376)	lr 0.02432
Train [13][2180/3239]	Time 0.199 (0.532)	Data Time 0.001 (0.015)	Loss 3.6659 (3.6346)	Entropy 1.78795 (1.79262)	Top-1 acc 37.500 (38.319)	Top-5 acc 63.281 (62.378)	lr 0.02432
Train [13][2190/3239]	Time 0.223 (0.551)	Data Time 0.002 (0.014)	Loss 3.5473 (3.6344)	Entropy 1.78790 (1.79260)	Top-1 acc 41.406 (38.328)	Top-5 acc 65.234 (62.384)	lr 0.02432
Train [13][2200/3239]	Time 0.355 (0.551)	Data Time 0.002 (0.014)	Loss 3.8140 (3.6344)	Entropy 1.78780 (1.79258)	Top-1 acc 30.078 (38.326)	Top-5 acc 58.984 (62.388)	lr 0.02432
Train [13][2210/3239]	Time 2.301 (0.550)	Data Time 0.002 (0.014)	Loss 3.4279 (3.6343)	Entropy 1.78780 (1.79256)	Top-1 acc 42.578 (38.331)	Top-5 acc 71.875 (62.395)	lr 0.02432
Train [13][2220/3239]	Time 0.192 (0.549)	Data Time 0.001 (0.014)	Loss 3.7249 (3.6343)	Entropy 1.78774 (1.79254)	Top-1 acc 35.547 (38.328)	Top-5 acc 61.328 (62.395)	lr 0.02432
Train [13][2230/3239]	Time 0.200 (0.548)	Data Time 0.001 (0.014)	Loss 3.5044 (3.6342)	Entropy 1.78769 (1.79251)	Top-1 acc 41.016 (38.326)	Top-5 acc 64.062 (62.394)	lr 0.02432
Train [13][2240/3239]	Time 0.191 (0.547)	Data Time 0.001 (0.014)	Loss 3.8522 (3.6344)	Entropy 1.78760 (1.79249)	Top-1 acc 38.672 (38.325)	Top-5 acc 55.859 (62.385)	lr 0.02432
Train [13][2250/3239]	Time 0.199 (0.547)	Data Time 0.001 (0.014)	Loss 3.9080 (3.6346)	Entropy 1.78758 (1.79247)	Top-1 acc 33.203 (38.324)	Top-5 acc 55.859 (62.379)	lr 0.02432
Train [13][2260/3239]	Time 0.207 (0.546)	Data Time 0.001 (0.014)	Loss 3.7427 (3.6347)	Entropy 1.78750 (1.79245)	Top-1 acc 38.672 (38.325)	Top-5 acc 56.250 (62.377)	lr 0.02432
Train [13][2270/3239]	Time 0.315 (0.546)	Data Time 0.002 (0.014)	Loss 3.6755 (3.6349)	Entropy 1.78740 (1.79243)	Top-1 acc 33.203 (38.319)	Top-5 acc 64.844 (62.373)	lr 0.02432
Train [13][2280/3239]	Time 0.228 (0.545)	Data Time 0.001 (0.014)	Loss 3.7627 (3.6351)	Entropy 1.78739 (1.79240)	Top-1 acc 33.203 (38.317)	Top-5 acc 56.250 (62.372)	lr 0.02432
Train [13][2290/3239]	Time 0.164 (0.545)	Data Time 0.001 (0.014)	Loss 3.7545 (3.6352)	Entropy 1.78735 (1.79238)	Top-1 acc 34.375 (38.311)	Top-5 acc 62.109 (62.371)	lr 0.02431
Train [13][2300/3239]	Time 0.172 (0.544)	Data Time 0.001 (0.014)	Loss 3.7715 (3.6353)	Entropy 1.78729 (1.79236)	Top-1 acc 38.281 (38.312)	Top-5 acc 59.766 (62.369)	lr 0.02431
Train [13][2310/3239]	Time 0.255 (0.544)	Data Time 0.001 (0.014)	Loss 3.9598 (3.6358)	Entropy 1.78726 (1.79234)	Top-1 acc 33.203 (38.303)	Top-5 acc 54.688 (62.360)	lr 0.02431
Train [13][2320/3239]	Time 2.378 (0.543)	Data Time 0.001 (0.014)	Loss 3.6843 (3.6360)	Entropy 1.78726 (1.79232)	Top-1 acc 34.375 (38.303)	Top-5 acc 62.109 (62.361)	lr 0.02431
Train [13][2330/3239]	Time 0.194 (0.542)	Data Time 0.001 (0.014)	Loss 3.3923 (3.6359)	Entropy 1.78720 (1.79229)	Top-1 acc 48.047 (38.307)	Top-5 acc 69.141 (62.362)	lr 0.02431
Train [13][2340/3239]	Time 0.239 (0.541)	Data Time 0.001 (0.014)	Loss 3.5227 (3.6358)	Entropy 1.78717 (1.79227)	Top-1 acc 37.500 (38.306)	Top-5 acc 64.844 (62.365)	lr 0.02431
Train [13][2350/3239]	Time 0.310 (0.541)	Data Time 0.001 (0.014)	Loss 3.5909 (3.6359)	Entropy 1.78703 (1.79225)	Top-1 acc 40.625 (38.306)	Top-5 acc 64.062 (62.362)	lr 0.02431
Train [13][2360/3239]	Time 0.146 (0.540)	Data Time 0.001 (0.014)	Loss 3.9672 (3.6361)	Entropy 1.78702 (1.79223)	Top-1 acc 31.641 (38.296)	Top-5 acc 55.469 (62.357)	lr 0.02431
Train [13][2370/3239]	Time 0.208 (0.540)	Data Time 0.001 (0.014)	Loss 3.5873 (3.6361)	Entropy 1.78693 (1.79221)	Top-1 acc 38.672 (38.295)	Top-5 acc 62.891 (62.358)	lr 0.02431
Train [13][2380/3239]	Time 0.225 (0.539)	Data Time 0.001 (0.014)	Loss 3.6759 (3.6362)	Entropy 1.78692 (1.79218)	Top-1 acc 37.109 (38.297)	Top-5 acc 58.984 (62.352)	lr 0.02431
Train [13][2390/3239]	Time 0.257 (0.539)	Data Time 0.001 (0.013)	Loss 3.6370 (3.6360)	Entropy 1.78693 (1.79216)	Top-1 acc 41.016 (38.305)	Top-5 acc 60.156 (62.357)	lr 0.02431
Train [13][2400/3239]	Time 0.219 (0.539)	Data Time 0.001 (0.013)	Loss 3.5372 (3.6357)	Entropy 1.78684 (1.79214)	Top-1 acc 43.750 (38.312)	Top-5 acc 64.453 (62.366)	lr 0.02431
Train [13][2410/3239]	Time 0.235 (0.538)	Data Time 0.002 (0.013)	Loss 3.8032 (3.6357)	Entropy 1.78679 (1.79212)	Top-1 acc 38.281 (38.315)	Top-5 acc 59.766 (62.365)	lr 0.02431
Train [13][2420/3239]	Time 0.220 (0.538)	Data Time 0.001 (0.013)	Loss 3.4373 (3.6353)	Entropy 1.78677 (1.79210)	Top-1 acc 46.094 (38.322)	Top-5 acc 68.359 (62.374)	lr 0.02431
Train [13][2430/3239]	Time 2.370 (0.537)	Data Time 0.001 (0.013)	Loss 3.5493 (3.6354)	Entropy 1.78677 (1.79207)	Top-1 acc 38.672 (38.319)	Top-5 acc 64.844 (62.373)	lr 0.02431
Train [13][2440/3239]	Time 0.140 (0.536)	Data Time 0.001 (0.013)	Loss 3.6419 (3.6354)	Entropy 1.78674 (1.79205)	Top-1 acc 37.891 (38.315)	Top-5 acc 63.281 (62.373)	lr 0.02431
Train [13][2450/3239]	Time 0.228 (0.535)	Data Time 0.001 (0.013)	Loss 3.4494 (3.6355)	Entropy 1.78666 (1.79203)	Top-1 acc 40.234 (38.315)	Top-5 acc 67.188 (62.373)	lr 0.02431
Train [13][2460/3239]	Time 0.220 (0.535)	Data Time 0.001 (0.013)	Loss 3.6341 (3.6352)	Entropy 1.78662 (1.79201)	Top-1 acc 37.500 (38.320)	Top-5 acc 61.328 (62.375)	lr 0.02431
Train [13][2470/3239]	Time 0.208 (0.535)	Data Time 0.001 (0.013)	Loss 3.7425 (3.6355)	Entropy 1.78661 (1.79199)	Top-1 acc 33.203 (38.307)	Top-5 acc 57.422 (62.368)	lr 0.02431
Train [13][2480/3239]	Time 0.218 (0.534)	Data Time 0.001 (0.013)	Loss 3.4494 (3.6353)	Entropy 1.78656 (1.79196)	Top-1 acc 42.188 (38.311)	Top-5 acc 67.188 (62.370)	lr 0.02431
Train [13][2490/3239]	Time 0.201 (0.534)	Data Time 0.001 (0.013)	Loss 3.5706 (3.6352)	Entropy 1.78651 (1.79194)	Top-1 acc 39.844 (38.312)	Top-5 acc 65.625 (62.376)	lr 0.02431
Train [13][2500/3239]	Time 0.207 (0.533)	Data Time 0.002 (0.013)	Loss 3.5935 (3.6353)	Entropy 1.78649 (1.79192)	Top-1 acc 39.844 (38.306)	Top-5 acc 62.891 (62.376)	lr 0.02431
Train [13][2510/3239]	Time 0.277 (0.533)	Data Time 0.001 (0.013)	Loss 3.6189 (3.6355)	Entropy 1.78644 (1.79190)	Top-1 acc 41.016 (38.306)	Top-5 acc 62.891 (62.372)	lr 0.02431
Train [13][2520/3239]	Time 0.194 (0.532)	Data Time 0.001 (0.013)	Loss 3.6690 (3.6354)	Entropy 1.78642 (1.79188)	Top-1 acc 34.375 (38.303)	Top-5 acc 61.719 (62.372)	lr 0.02431
Train [13][2530/3239]	Time 0.225 (0.532)	Data Time 0.001 (0.013)	Loss 3.5540 (3.6354)	Entropy 1.78640 (1.79186)	Top-1 acc 35.156 (38.301)	Top-5 acc 61.719 (62.374)	lr 0.02431
Train [13][2540/3239]	Time 2.301 (0.532)	Data Time 0.001 (0.013)	Loss 3.6574 (3.6351)	Entropy 1.78640 (1.79183)	Top-1 acc 39.844 (38.305)	Top-5 acc 62.500 (62.381)	lr 0.02431
Train [13][2550/3239]	Time 0.222 (0.530)	Data Time 0.001 (0.013)	Loss 3.5480 (3.6351)	Entropy 1.78637 (1.79181)	Top-1 acc 39.844 (38.309)	Top-5 acc 64.844 (62.380)	lr 0.02431
Train [13][2560/3239]	Time 0.172 (0.546)	Data Time 0.003 (0.013)	Loss 3.6710 (3.6352)	Entropy 1.78631 (1.79179)	Top-1 acc 37.891 (38.305)	Top-5 acc 61.719 (62.380)	lr 0.02431
Train [13][2570/3239]	Time 0.212 (0.545)	Data Time 0.001 (0.013)	Loss 3.6641 (3.6356)	Entropy 1.78626 (1.79177)	Top-1 acc 37.500 (38.300)	Top-5 acc 59.766 (62.371)	lr 0.02431
Train [13][2580/3239]	Time 0.295 (0.545)	Data Time 0.002 (0.013)	Loss 3.4557 (3.6355)	Entropy 1.78608 (1.79175)	Top-1 acc 45.312 (38.301)	Top-5 acc 67.969 (62.369)	lr 0.02431
Train [13][2590/3239]	Time 0.155 (0.544)	Data Time 0.001 (0.013)	Loss 3.6327 (3.6356)	Entropy 1.78606 (1.79173)	Top-1 acc 40.234 (38.300)	Top-5 acc 67.188 (62.369)	lr 0.02430
Train [13][2600/3239]	Time 0.201 (0.544)	Data Time 0.001 (0.013)	Loss 3.5639 (3.6355)	Entropy 1.78606 (1.79170)	Top-1 acc 40.625 (38.301)	Top-5 acc 65.234 (62.369)	lr 0.02430
Train [13][2610/3239]	Time 0.211 (0.544)	Data Time 0.001 (0.013)	Loss 3.5662 (3.6355)	Entropy 1.78608 (1.79168)	Top-1 acc 40.234 (38.299)	Top-5 acc 64.453 (62.370)	lr 0.02430
Train [13][2620/3239]	Time 0.270 (0.543)	Data Time 0.001 (0.012)	Loss 3.6542 (3.6358)	Entropy 1.78607 (1.79166)	Top-1 acc 39.453 (38.290)	Top-5 acc 63.281 (62.367)	lr 0.02430
Train [13][2630/3239]	Time 0.239 (0.543)	Data Time 0.002 (0.012)	Loss 3.4925 (3.6359)	Entropy 1.78605 (1.79164)	Top-1 acc 41.016 (38.289)	Top-5 acc 64.844 (62.365)	lr 0.02430
Train [13][2640/3239]	Time 0.235 (0.542)	Data Time 0.001 (0.012)	Loss 3.5570 (3.6356)	Entropy 1.78602 (1.79162)	Top-1 acc 43.359 (38.296)	Top-5 acc 65.625 (62.371)	lr 0.02430
Train [13][2650/3239]	Time 0.335 (0.542)	Data Time 0.001 (0.012)	Loss 3.6292 (3.6356)	Entropy 1.78600 (1.79160)	Top-1 acc 39.062 (38.299)	Top-5 acc 62.500 (62.374)	lr 0.02430
Train [13][2660/3239]	Time 0.232 (0.541)	Data Time 0.001 (0.012)	Loss 3.5597 (3.6354)	Entropy 1.78589 (1.79158)	Top-1 acc 38.672 (38.299)	Top-5 acc 63.281 (62.369)	lr 0.02430
Train [13][2670/3239]	Time 0.200 (0.541)	Data Time 0.001 (0.012)	Loss 3.5545 (3.6352)	Entropy 1.78578 (1.79156)	Top-1 acc 39.844 (38.305)	Top-5 acc 64.844 (62.373)	lr 0.02430
Train [13][2680/3239]	Time 0.203 (0.540)	Data Time 0.001 (0.012)	Loss 3.6021 (3.6352)	Entropy 1.78573 (1.79153)	Top-1 acc 36.328 (38.303)	Top-5 acc 63.281 (62.375)	lr 0.02430
Train [13][2690/3239]	Time 0.207 (0.540)	Data Time 0.001 (0.012)	Loss 3.3834 (3.6352)	Entropy 1.78568 (1.79151)	Top-1 acc 43.359 (38.305)	Top-5 acc 69.141 (62.374)	lr 0.02430
Train [13][2700/3239]	Time 0.237 (0.539)	Data Time 0.001 (0.012)	Loss 3.7288 (3.6351)	Entropy 1.78569 (1.79149)	Top-1 acc 35.547 (38.309)	Top-5 acc 57.031 (62.374)	lr 0.02430
Train [13][2710/3239]	Time 0.212 (0.539)	Data Time 0.001 (0.012)	Loss 3.5634 (3.6350)	Entropy 1.78561 (1.79147)	Top-1 acc 36.328 (38.312)	Top-5 acc 64.453 (62.376)	lr 0.02430
Train [13][2720/3239]	Time 0.218 (0.539)	Data Time 0.002 (0.012)	Loss 3.5787 (3.6349)	Entropy 1.78554 (1.79145)	Top-1 acc 40.234 (38.312)	Top-5 acc 66.797 (62.378)	lr 0.02430
Train [13][2730/3239]	Time 0.281 (0.538)	Data Time 0.001 (0.012)	Loss 3.6147 (3.6351)	Entropy 1.78548 (1.79143)	Top-1 acc 39.062 (38.306)	Top-5 acc 60.938 (62.373)	lr 0.02430
Train [13][2740/3239]	Time 0.294 (0.538)	Data Time 0.001 (0.012)	Loss 3.7395 (3.6351)	Entropy 1.78543 (1.79140)	Top-1 acc 38.672 (38.301)	Top-5 acc 60.938 (62.371)	lr 0.02430
Train [13][2750/3239]	Time 0.231 (0.537)	Data Time 0.001 (0.012)	Loss 3.7972 (3.6352)	Entropy 1.78542 (1.79138)	Top-1 acc 35.547 (38.301)	Top-5 acc 58.984 (62.370)	lr 0.02430
Train [13][2760/3239]	Time 0.204 (0.537)	Data Time 0.001 (0.012)	Loss 3.5982 (3.6350)	Entropy 1.78539 (1.79136)	Top-1 acc 41.797 (38.307)	Top-5 acc 62.500 (62.374)	lr 0.02430
Train [13][2770/3239]	Time 0.206 (0.536)	Data Time 0.001 (0.012)	Loss 3.7240 (3.6348)	Entropy 1.78536 (1.79134)	Top-1 acc 34.375 (38.309)	Top-5 acc 59.766 (62.376)	lr 0.02430
Train [13][2780/3239]	Time 0.248 (0.536)	Data Time 0.001 (0.012)	Loss 3.6664 (3.6349)	Entropy 1.78533 (1.79132)	Top-1 acc 39.062 (38.311)	Top-5 acc 63.281 (62.377)	lr 0.02430
Train [13][2790/3239]	Time 0.185 (0.536)	Data Time 0.001 (0.012)	Loss 3.4833 (3.6346)	Entropy 1.78530 (1.79130)	Top-1 acc 42.969 (38.317)	Top-5 acc 66.797 (62.383)	lr 0.02430
Train [13][2800/3239]	Time 0.202 (0.535)	Data Time 0.001 (0.012)	Loss 3.7303 (3.6346)	Entropy 1.78522 (1.79128)	Top-1 acc 36.328 (38.317)	Top-5 acc 60.156 (62.381)	lr 0.02430
Train [13][2810/3239]	Time 0.326 (0.535)	Data Time 0.001 (0.012)	Loss 3.5312 (3.6347)	Entropy 1.78516 (1.79125)	Top-1 acc 42.578 (38.316)	Top-5 acc 64.062 (62.382)	lr 0.02430
Train [13][2820/3239]	Time 0.168 (0.535)	Data Time 0.002 (0.012)	Loss 3.6364 (3.6344)	Entropy 1.78509 (1.79123)	Top-1 acc 37.891 (38.320)	Top-5 acc 66.797 (62.386)	lr 0.02430
Train [13][2830/3239]	Time 0.140 (0.534)	Data Time 0.001 (0.012)	Loss 3.6494 (3.6343)	Entropy 1.78511 (1.79121)	Top-1 acc 38.281 (38.321)	Top-5 acc 62.109 (62.387)	lr 0.02430
Train [13][2840/3239]	Time 0.199 (0.534)	Data Time 0.001 (0.012)	Loss 3.5807 (3.6344)	Entropy 1.78511 (1.79119)	Top-1 acc 38.281 (38.319)	Top-5 acc 65.625 (62.387)	lr 0.02430
Train [13][2850/3239]	Time 0.250 (0.533)	Data Time 0.002 (0.012)	Loss 3.6181 (3.6343)	Entropy 1.78512 (1.79117)	Top-1 acc 37.109 (38.317)	Top-5 acc 62.500 (62.390)	lr 0.02430
Train [13][2860/3239]	Time 0.194 (0.533)	Data Time 0.001 (0.012)	Loss 3.6645 (3.6342)	Entropy 1.78508 (1.79115)	Top-1 acc 36.719 (38.314)	Top-5 acc 62.891 (62.392)	lr 0.02430
Train [13][2870/3239]	Time 0.253 (0.533)	Data Time 0.002 (0.012)	Loss 3.4053 (3.6340)	Entropy 1.78500 (1.79113)	Top-1 acc 42.969 (38.320)	Top-5 acc 68.750 (62.396)	lr 0.02430
Train [13][2880/3239]	Time 0.209 (0.532)	Data Time 0.001 (0.012)	Loss 3.5888 (3.6339)	Entropy 1.78494 (1.79110)	Top-1 acc 37.109 (38.318)	Top-5 acc 61.719 (62.394)	lr 0.02430
Train [13][2890/3239]	Time 0.414 (0.545)	Data Time 0.004 (0.012)	Loss 3.5447 (3.6339)	Entropy 1.78487 (1.79108)	Top-1 acc 36.328 (38.323)	Top-5 acc 64.844 (62.396)	lr 0.02429
Train [13][2900/3239]	Time 0.201 (0.545)	Data Time 0.002 (0.012)	Loss 3.4306 (3.6339)	Entropy 1.78479 (1.79106)	Top-1 acc 43.359 (38.322)	Top-5 acc 68.359 (62.397)	lr 0.02429
Train [13][2910/3239]	Time 0.219 (0.545)	Data Time 0.002 (0.011)	Loss 3.6157 (3.6338)	Entropy 1.78475 (1.79104)	Top-1 acc 39.453 (38.321)	Top-5 acc 65.234 (62.397)	lr 0.02429
Train [13][2920/3239]	Time 0.190 (0.544)	Data Time 0.002 (0.011)	Loss 3.5677 (3.6336)	Entropy 1.78477 (1.79102)	Top-1 acc 37.500 (38.326)	Top-5 acc 62.109 (62.397)	lr 0.02429
Train [13][2930/3239]	Time 0.256 (0.544)	Data Time 0.001 (0.011)	Loss 3.5322 (3.6336)	Entropy 1.78464 (1.79100)	Top-1 acc 41.016 (38.326)	Top-5 acc 65.234 (62.400)	lr 0.02429
Train [13][2940/3239]	Time 0.215 (0.544)	Data Time 0.001 (0.011)	Loss 3.6123 (3.6336)	Entropy 1.78463 (1.79098)	Top-1 acc 40.234 (38.328)	Top-5 acc 63.672 (62.397)	lr 0.02429
Train [13][2950/3239]	Time 0.208 (0.543)	Data Time 0.002 (0.011)	Loss 3.4622 (3.6335)	Entropy 1.78460 (1.79095)	Top-1 acc 41.016 (38.330)	Top-5 acc 64.453 (62.403)	lr 0.02429
Train [13][2960/3239]	Time 0.235 (0.543)	Data Time 0.002 (0.011)	Loss 3.8417 (3.6336)	Entropy 1.78452 (1.79093)	Top-1 acc 36.719 (38.330)	Top-5 acc 59.375 (62.401)	lr 0.02429
Train [13][2970/3239]	Time 0.217 (0.542)	Data Time 0.001 (0.011)	Loss 3.5427 (3.6337)	Entropy 1.78444 (1.79091)	Top-1 acc 40.625 (38.331)	Top-5 acc 66.797 (62.401)	lr 0.02429
Train [13][2980/3239]	Time 0.190 (0.542)	Data Time 0.001 (0.011)	Loss 3.7074 (3.6336)	Entropy 1.78429 (1.79089)	Top-1 acc 39.844 (38.333)	Top-5 acc 59.766 (62.399)	lr 0.02429
Train [13][2990/3239]	Time 0.233 (0.541)	Data Time 0.001 (0.011)	Loss 3.5575 (3.6335)	Entropy 1.78426 (1.79087)	Top-1 acc 41.797 (38.331)	Top-5 acc 65.234 (62.400)	lr 0.02429
Train [13][3000/3239]	Time 0.226 (0.541)	Data Time 0.001 (0.011)	Loss 3.6900 (3.6335)	Entropy 1.78422 (1.79084)	Top-1 acc 36.328 (38.329)	Top-5 acc 63.281 (62.402)	lr 0.02429
Train [13][3010/3239]	Time 0.202 (0.541)	Data Time 0.001 (0.011)	Loss 3.8790 (3.6336)	Entropy 1.78418 (1.79082)	Top-1 acc 37.891 (38.330)	Top-5 acc 59.766 (62.399)	lr 0.02429
Train [13][3020/3239]	Time 0.202 (0.540)	Data Time 0.001 (0.011)	Loss 3.5264 (3.6335)	Entropy 1.78412 (1.79080)	Top-1 acc 42.578 (38.333)	Top-5 acc 61.719 (62.402)	lr 0.02429
Train [13][3030/3239]	Time 0.147 (0.540)	Data Time 0.001 (0.011)	Loss 3.4752 (3.6334)	Entropy 1.78408 (1.79078)	Top-1 acc 45.703 (38.338)	Top-5 acc 67.188 (62.405)	lr 0.02429
Train [13][3040/3239]	Time 0.386 (0.540)	Data Time 0.001 (0.011)	Loss 3.6760 (3.6334)	Entropy 1.78405 (1.79076)	Top-1 acc 37.500 (38.338)	Top-5 acc 60.156 (62.403)	lr 0.02429
Train [13][3050/3239]	Time 0.185 (0.539)	Data Time 0.001 (0.011)	Loss 3.7864 (3.6334)	Entropy 1.78401 (1.79073)	Top-1 acc 35.547 (38.338)	Top-5 acc 62.500 (62.404)	lr 0.02429
Train [13][3060/3239]	Time 0.251 (0.539)	Data Time 0.002 (0.011)	Loss 3.4373 (3.6333)	Entropy 1.78392 (1.79071)	Top-1 acc 45.312 (38.340)	Top-5 acc 65.234 (62.406)	lr 0.02429
Train [13][3070/3239]	Time 0.207 (0.538)	Data Time 0.001 (0.011)	Loss 3.8755 (3.6334)	Entropy 1.78389 (1.79069)	Top-1 acc 32.422 (38.339)	Top-5 acc 58.203 (62.403)	lr 0.02429
Train [13][3080/3239]	Time 0.254 (0.538)	Data Time 0.002 (0.011)	Loss 3.4821 (3.6332)	Entropy 1.78387 (1.79067)	Top-1 acc 43.359 (38.342)	Top-5 acc 65.234 (62.405)	lr 0.02429
Train [13][3090/3239]	Time 0.207 (0.538)	Data Time 0.001 (0.011)	Loss 3.9104 (3.6332)	Entropy 1.78385 (1.79065)	Top-1 acc 32.812 (38.345)	Top-5 acc 56.250 (62.404)	lr 0.02429
Train [13][3100/3239]	Time 0.230 (0.537)	Data Time 0.001 (0.011)	Loss 3.5315 (3.6331)	Entropy 1.78383 (1.79062)	Top-1 acc 43.359 (38.347)	Top-5 acc 66.406 (62.406)	lr 0.02429
Train [13][3110/3239]	Time 0.189 (0.537)	Data Time 0.001 (0.011)	Loss 3.7088 (3.6333)	Entropy 1.78376 (1.79060)	Top-1 acc 39.062 (38.342)	Top-5 acc 62.109 (62.403)	lr 0.02429
Train [13][3120/3239]	Time 0.207 (0.537)	Data Time 0.001 (0.011)	Loss 3.7376 (3.6334)	Entropy 1.78371 (1.79058)	Top-1 acc 35.938 (38.337)	Top-5 acc 59.766 (62.398)	lr 0.02429
Train [13][3130/3239]	Time 0.214 (0.536)	Data Time 0.001 (0.011)	Loss 3.5783 (3.6334)	Entropy 1.78368 (1.79056)	Top-1 acc 41.406 (38.342)	Top-5 acc 63.672 (62.397)	lr 0.02429
Train [13][3140/3239]	Time 0.227 (0.536)	Data Time 0.001 (0.011)	Loss 3.6317 (3.6334)	Entropy 1.78359 (1.79054)	Top-1 acc 42.188 (38.342)	Top-5 acc 62.891 (62.396)	lr 0.02429
Train [13][3150/3239]	Time 0.262 (0.536)	Data Time 0.001 (0.011)	Loss 3.6213 (3.6334)	Entropy 1.78357 (1.79051)	Top-1 acc 39.453 (38.341)	Top-5 acc 62.109 (62.396)	lr 0.02429
Train [13][3160/3239]	Time 0.254 (0.535)	Data Time 0.001 (0.011)	Loss 3.3740 (3.6333)	Entropy 1.78354 (1.79049)	Top-1 acc 41.797 (38.341)	Top-5 acc 67.578 (62.397)	lr 0.02429
Train [13][3170/3239]	Time 0.263 (0.535)	Data Time 0.001 (0.011)	Loss 3.7221 (3.6333)	Entropy 1.78356 (1.79047)	Top-1 acc 39.844 (38.339)	Top-5 acc 62.109 (62.399)	lr 0.02429
Train [13][3180/3239]	Time 0.206 (0.535)	Data Time 0.000 (0.011)	Loss 3.4187 (3.6331)	Entropy 1.78347 (1.79045)	Top-1 acc 43.750 (38.343)	Top-5 acc 68.359 (62.404)	lr 0.02429
Train [13][3190/3239]	Time 0.197 (0.534)	Data Time 0.000 (0.011)	Loss 3.6956 (3.6330)	Entropy 1.78336 (1.79043)	Top-1 acc 34.375 (38.340)	Top-5 acc 58.594 (62.402)	lr 0.02428
Train [13][3200/3239]	Time 0.196 (0.534)	Data Time 0.000 (0.011)	Loss 3.7057 (3.6331)	Entropy 1.78336 (1.79041)	Top-1 acc 39.062 (38.340)	Top-5 acc 60.938 (62.400)	lr 0.02428
Train [13][3210/3239]	Time 0.205 (0.533)	Data Time 0.000 (0.011)	Loss 3.8067 (3.6331)	Entropy 1.78329 (1.79038)	Top-1 acc 34.375 (38.339)	Top-5 acc 57.812 (62.400)	lr 0.02428
Train [13][3220/3239]	Time 0.223 (0.546)	Data Time 0.000 (0.011)	Loss 3.6691 (3.6329)	Entropy 1.78327 (1.79036)	Top-1 acc 38.281 (38.344)	Top-5 acc 61.328 (62.407)	lr 0.02428
Train [13][3230/3239]	Time 0.201 (0.546)	Data Time 0.000 (0.011)	Loss 3.4141 (3.6328)	Entropy 1.78328 (1.79034)	Top-1 acc 41.016 (38.347)	Top-5 acc 66.797 (62.409)	lr 0.02428
Train [13][3239/3239]	Time 2.116 (0.545)	Data Time 0.000 (0.010)	Loss 3.7890 (3.6329)	Entropy 1.78328 (1.79032)	Top-1 acc 29.630 (38.348)	Top-5 acc 60.494 (62.406)	lr 0.02428
==========Valid [13/120]	loss 2.447	top-1 acc 47.540 (47.540)	top-5 acc 71.789	Train top-1 38.348	top-5 62.406	Entropy 1.78328	Latency-None: 0.000ms	Flops: 522.98M
Train [14][0/3239]	Time 27.459 (27.459)	Data Time 26.603 (26.603)	Loss 3.6252 (3.6252)	Entropy 1.78324 (1.78324)	Top-1 acc 37.500 (37.500)	Top-5 acc 63.672 (63.672)	lr 0.02428
Train [14][10/3239]	Time 2.251 (2.974)	Data Time 0.001 (2.421)	Loss 3.6934 (3.6092)	Entropy 1.78324 (1.78324)	Top-1 acc 37.891 (39.986)	Top-5 acc 62.109 (62.749)	lr 0.02428
Train [14][20/3239]	Time 0.195 (1.658)	Data Time 0.001 (1.268)	Loss 3.7107 (3.6352)	Entropy 1.78318 (1.78321)	Top-1 acc 36.328 (39.174)	Top-5 acc 60.938 (62.426)	lr 0.02428
Train [14][30/3239]	Time 0.337 (1.264)	Data Time 0.001 (0.860)	Loss 3.5581 (3.6386)	Entropy 1.78313 (1.78319)	Top-1 acc 38.672 (38.722)	Top-5 acc 63.672 (62.273)	lr 0.02428
Train [14][40/3239]	Time 0.233 (1.058)	Data Time 0.001 (0.650)	Loss 3.8996 (3.6422)	Entropy 1.78309 (1.78316)	Top-1 acc 34.766 (38.739)	Top-5 acc 55.859 (62.138)	lr 0.02428
Train [14][50/3239]	Time 0.254 (0.932)	Data Time 0.001 (0.523)	Loss 3.4598 (3.6241)	Entropy 1.78308 (1.78315)	Top-1 acc 40.625 (38.971)	Top-5 acc 65.234 (62.423)	lr 0.02428
Train [14][60/3239]	Time 0.202 (0.849)	Data Time 0.001 (0.438)	Loss 3.5102 (3.6093)	Entropy 1.78303 (1.78313)	Top-1 acc 37.500 (39.050)	Top-5 acc 61.328 (62.775)	lr 0.02428
Train [14][70/3239]	Time 0.206 (0.791)	Data Time 0.001 (0.376)	Loss 3.6586 (3.6057)	Entropy 1.78303 (1.78312)	Top-1 acc 37.109 (39.140)	Top-5 acc 61.719 (62.830)	lr 0.02428
Train [14][80/3239]	Time 0.198 (0.745)	Data Time 0.001 (0.330)	Loss 3.2927 (3.6045)	Entropy 1.78294 (1.78310)	Top-1 acc 44.531 (39.241)	Top-5 acc 70.703 (62.876)	lr 0.02428
Train [14][90/3239]	Time 0.153 (0.707)	Data Time 0.002 (0.294)	Loss 3.6704 (3.6026)	Entropy 1.78286 (1.78308)	Top-1 acc 34.766 (39.196)	Top-5 acc 60.547 (63.007)	lr 0.02428
Train [14][100/3239]	Time 0.208 (0.682)	Data Time 0.002 (0.266)	Loss 3.5927 (3.5958)	Entropy 1.78274 (1.78306)	Top-1 acc 38.281 (39.341)	Top-5 acc 62.500 (63.212)	lr 0.02428
Train [14][110/3239]	Time 0.232 (0.660)	Data Time 0.002 (0.242)	Loss 3.3543 (3.5924)	Entropy 1.78269 (1.78303)	Top-1 acc 41.016 (39.341)	Top-5 acc 68.750 (63.309)	lr 0.02428
Train [14][120/3239]	Time 2.311 (0.641)	Data Time 0.001 (0.222)	Loss 3.5204 (3.5925)	Entropy 1.78269 (1.78300)	Top-1 acc 42.578 (39.408)	Top-5 acc 66.406 (63.294)	lr 0.02428
Train [14][130/3239]	Time 0.163 (0.609)	Data Time 0.001 (0.205)	Loss 3.5567 (3.5932)	Entropy 1.78268 (1.78298)	Top-1 acc 40.234 (39.447)	Top-5 acc 64.062 (63.260)	lr 0.02428
Train [14][140/3239]	Time 0.197 (0.596)	Data Time 0.001 (0.191)	Loss 3.7995 (3.5982)	Entropy 1.78264 (1.78295)	Top-1 acc 34.375 (39.323)	Top-5 acc 53.906 (63.126)	lr 0.02428
Train [14][150/3239]	Time 0.229 (0.585)	Data Time 0.001 (0.178)	Loss 3.5550 (3.5971)	Entropy 1.78261 (1.78293)	Top-1 acc 39.844 (39.319)	Top-5 acc 60.938 (63.095)	lr 0.02428
Train [14][160/3239]	Time 0.176 (0.574)	Data Time 0.001 (0.167)	Loss 3.3858 (3.5982)	Entropy 1.78258 (1.78291)	Top-1 acc 44.141 (39.300)	Top-5 acc 66.406 (63.034)	lr 0.02428
Train [14][170/3239]	Time 0.212 (0.566)	Data Time 0.001 (0.158)	Loss 3.6283 (3.6000)	Entropy 1.78255 (1.78289)	Top-1 acc 37.109 (39.197)	Top-5 acc 60.938 (63.019)	lr 0.02428
Train [14][180/3239]	Time 0.214 (0.558)	Data Time 0.001 (0.149)	Loss 3.6711 (3.6031)	Entropy 1.78256 (1.78287)	Top-1 acc 37.891 (39.170)	Top-5 acc 60.547 (62.973)	lr 0.02428
Train [14][190/3239]	Time 0.181 (0.552)	Data Time 0.001 (0.141)	Loss 3.3987 (3.5970)	Entropy 1.78254 (1.78285)	Top-1 acc 44.141 (39.318)	Top-5 acc 66.406 (63.116)	lr 0.02428
Train [14][200/3239]	Time 0.197 (0.546)	Data Time 0.001 (0.135)	Loss 3.5415 (3.5959)	Entropy 1.78249 (1.78284)	Top-1 acc 39.062 (39.270)	Top-5 acc 62.109 (63.145)	lr 0.02428
Train [14][210/3239]	Time 0.249 (0.539)	Data Time 0.001 (0.128)	Loss 3.4219 (3.5927)	Entropy 1.78245 (1.78282)	Top-1 acc 42.188 (39.290)	Top-5 acc 64.844 (63.168)	lr 0.02428
Train [14][220/3239]	Time 0.205 (0.533)	Data Time 0.001 (0.123)	Loss 3.7088 (3.5937)	Entropy 1.78246 (1.78280)	Top-1 acc 32.812 (39.269)	Top-5 acc 60.938 (63.177)	lr 0.02428
Train [14][230/3239]	Time 2.194 (0.529)	Data Time 0.001 (0.117)	Loss 3.4563 (3.5927)	Entropy 1.78246 (1.78279)	Top-1 acc 38.281 (39.255)	Top-5 acc 66.016 (63.197)	lr 0.02428
Train [14][240/3239]	Time 0.219 (0.515)	Data Time 0.001 (0.113)	Loss 3.6835 (3.5946)	Entropy 1.78245 (1.78278)	Top-1 acc 37.109 (39.234)	Top-5 acc 64.062 (63.137)	lr 0.02427
Train [14][250/3239]	Time 0.177 (0.511)	Data Time 0.001 (0.108)	Loss 3.6980 (3.5957)	Entropy 1.78244 (1.78276)	Top-1 acc 40.234 (39.221)	Top-5 acc 61.719 (63.096)	lr 0.02427
Train [14][260/3239]	Time 0.237 (0.508)	Data Time 0.001 (0.104)	Loss 3.2471 (3.5921)	Entropy 1.78239 (1.78275)	Top-1 acc 47.266 (39.266)	Top-5 acc 68.750 (63.156)	lr 0.02427
Train [14][270/3239]	Time 0.299 (0.505)	Data Time 0.001 (0.100)	Loss 3.3702 (3.5884)	Entropy 1.78233 (1.78273)	Top-1 acc 40.625 (39.333)	Top-5 acc 66.406 (63.252)	lr 0.02427
Train [14][280/3239]	Time 0.210 (0.502)	Data Time 0.001 (0.097)	Loss 3.5471 (3.5872)	Entropy 1.78229 (1.78272)	Top-1 acc 41.016 (39.386)	Top-5 acc 64.453 (63.288)	lr 0.02427
Train [14][290/3239]	Time 0.196 (0.499)	Data Time 0.001 (0.093)	Loss 3.6192 (3.5867)	Entropy 1.78228 (1.78270)	Top-1 acc 40.625 (39.385)	Top-5 acc 63.672 (63.295)	lr 0.02427
Train [14][300/3239]	Time 0.178 (0.496)	Data Time 0.001 (0.090)	Loss 3.6750 (3.5846)	Entropy 1.78214 (1.78269)	Top-1 acc 43.750 (39.457)	Top-5 acc 62.891 (63.357)	lr 0.02427
Train [14][310/3239]	Time 0.212 (0.494)	Data Time 0.001 (0.088)	Loss 3.5449 (3.5854)	Entropy 1.78210 (1.78267)	Top-1 acc 40.234 (39.449)	Top-5 acc 65.625 (63.321)	lr 0.02427
Train [14][320/3239]	Time 0.288 (0.491)	Data Time 0.001 (0.085)	Loss 3.5809 (3.5847)	Entropy 1.78209 (1.78265)	Top-1 acc 41.406 (39.436)	Top-5 acc 67.969 (63.355)	lr 0.02427
Train [14][330/3239]	Time 0.212 (0.489)	Data Time 0.001 (0.082)	Loss 3.6000 (3.5851)	Entropy 1.78211 (1.78263)	Top-1 acc 38.672 (39.427)	Top-5 acc 61.328 (63.327)	lr 0.02427
Train [14][340/3239]	Time 45.889 (0.615)	Data Time 0.001 (0.080)	Loss 3.5565 (3.5838)	Entropy 1.78211 (1.78262)	Top-1 acc 41.016 (39.441)	Top-5 acc 62.891 (63.361)	lr 0.02427
Train [14][350/3239]	Time 0.217 (0.604)	Data Time 0.002 (0.078)	Loss 3.4875 (3.5828)	Entropy 1.78211 (1.78260)	Top-1 acc 43.750 (39.462)	Top-5 acc 64.062 (63.384)	lr 0.02427
Train [14][360/3239]	Time 0.219 (0.599)	Data Time 0.002 (0.076)	Loss 3.6661 (3.5835)	Entropy 1.78207 (1.78259)	Top-1 acc 34.375 (39.412)	Top-5 acc 62.109 (63.360)	lr 0.02427
Train [14][370/3239]	Time 0.212 (0.595)	Data Time 0.001 (0.074)	Loss 3.5222 (3.5836)	Entropy 1.78201 (1.78257)	Top-1 acc 44.141 (39.434)	Top-5 acc 63.281 (63.344)	lr 0.02427
Train [14][380/3239]	Time 0.218 (0.590)	Data Time 0.001 (0.072)	Loss 3.5777 (3.5843)	Entropy 1.78196 (1.78256)	Top-1 acc 41.016 (39.419)	Top-5 acc 62.500 (63.330)	lr 0.02427
Train [14][390/3239]	Time 0.183 (0.586)	Data Time 0.001 (0.070)	Loss 3.6521 (3.5835)	Entropy 1.78191 (1.78254)	Top-1 acc 37.891 (39.440)	Top-5 acc 61.719 (63.344)	lr 0.02427
Train [14][400/3239]	Time 0.205 (0.581)	Data Time 0.001 (0.068)	Loss 3.6454 (3.5831)	Entropy 1.78190 (1.78253)	Top-1 acc 40.625 (39.439)	Top-5 acc 62.500 (63.355)	lr 0.02427
Train [14][410/3239]	Time 0.218 (0.578)	Data Time 0.001 (0.067)	Loss 3.7408 (3.5868)	Entropy 1.78189 (1.78251)	Top-1 acc 34.375 (39.366)	Top-5 acc 58.984 (63.257)	lr 0.02427
Train [14][420/3239]	Time 0.193 (0.574)	Data Time 0.001 (0.065)	Loss 3.6019 (3.5878)	Entropy 1.78184 (1.78250)	Top-1 acc 39.844 (39.351)	Top-5 acc 63.281 (63.251)	lr 0.02427
Train [14][430/3239]	Time 0.203 (0.571)	Data Time 0.002 (0.064)	Loss 3.6048 (3.5883)	Entropy 1.78171 (1.78248)	Top-1 acc 39.453 (39.358)	Top-5 acc 64.453 (63.234)	lr 0.02427
Train [14][440/3239]	Time 0.212 (0.568)	Data Time 0.002 (0.062)	Loss 3.6366 (3.5887)	Entropy 1.78160 (1.78246)	Top-1 acc 38.281 (39.332)	Top-5 acc 62.109 (63.251)	lr 0.02427
Train [14][450/3239]	Time 2.248 (0.565)	Data Time 0.001 (0.061)	Loss 3.8099 (3.5902)	Entropy 1.78160 (1.78244)	Top-1 acc 34.375 (39.293)	Top-5 acc 59.375 (63.225)	lr 0.02427
Train [14][460/3239]	Time 0.172 (0.557)	Data Time 0.001 (0.060)	Loss 3.5841 (3.5903)	Entropy 1.78156 (1.78242)	Top-1 acc 37.891 (39.261)	Top-5 acc 60.547 (63.218)	lr 0.02427
Train [14][470/3239]	Time 0.249 (0.554)	Data Time 0.001 (0.058)	Loss 3.5262 (3.5906)	Entropy 1.78152 (1.78241)	Top-1 acc 38.672 (39.248)	Top-5 acc 65.234 (63.196)	lr 0.02427
Train [14][480/3239]	Time 0.208 (0.551)	Data Time 0.001 (0.057)	Loss 3.5060 (3.5901)	Entropy 1.78139 (1.78238)	Top-1 acc 42.188 (39.257)	Top-5 acc 65.234 (63.215)	lr 0.02427
Train [14][490/3239]	Time 0.220 (0.548)	Data Time 0.001 (0.056)	Loss 3.4775 (3.5908)	Entropy 1.78141 (1.78236)	Top-1 acc 42.578 (39.242)	Top-5 acc 65.234 (63.212)	lr 0.02427
Train [14][500/3239]	Time 0.284 (0.546)	Data Time 0.001 (0.055)	Loss 3.7608 (3.5911)	Entropy 1.78134 (1.78234)	Top-1 acc 33.203 (39.222)	Top-5 acc 56.641 (63.211)	lr 0.02427
Train [14][510/3239]	Time 0.196 (0.543)	Data Time 0.001 (0.054)	Loss 3.8398 (3.5910)	Entropy 1.78126 (1.78232)	Top-1 acc 38.672 (39.241)	Top-5 acc 57.422 (63.206)	lr 0.02427
Train [14][520/3239]	Time 0.208 (0.541)	Data Time 0.001 (0.053)	Loss 3.4304 (3.5910)	Entropy 1.78118 (1.78230)	Top-1 acc 43.750 (39.248)	Top-5 acc 66.016 (63.207)	lr 0.02427
Train [14][530/3239]	Time 0.272 (0.539)	Data Time 0.001 (0.052)	Loss 3.6354 (3.5915)	Entropy 1.78116 (1.78228)	Top-1 acc 39.453 (39.229)	Top-5 acc 58.594 (63.192)	lr 0.02427
Train [14][540/3239]	Time 0.229 (0.537)	Data Time 0.005 (0.051)	Loss 3.5505 (3.5911)	Entropy 1.78114 (1.78226)	Top-1 acc 39.062 (39.224)	Top-5 acc 66.016 (63.192)	lr 0.02426
Train [14][550/3239]	Time 0.225 (0.535)	Data Time 0.001 (0.050)	Loss 3.3907 (3.5908)	Entropy 1.78112 (1.78224)	Top-1 acc 44.141 (39.234)	Top-5 acc 66.797 (63.196)	lr 0.02426
Train [14][560/3239]	Time 2.461 (0.533)	Data Time 0.003 (0.049)	Loss 3.5440 (3.5909)	Entropy 1.78112 (1.78222)	Top-1 acc 35.938 (39.207)	Top-5 acc 66.797 (63.207)	lr 0.02426
Train [14][570/3239]	Time 0.198 (0.527)	Data Time 0.001 (0.048)	Loss 3.6506 (3.5919)	Entropy 1.78108 (1.78220)	Top-1 acc 37.500 (39.175)	Top-5 acc 64.453 (63.199)	lr 0.02426
Train [14][580/3239]	Time 0.314 (0.526)	Data Time 0.001 (0.048)	Loss 3.4735 (3.5922)	Entropy 1.78106 (1.78218)	Top-1 acc 41.406 (39.156)	Top-5 acc 64.844 (63.200)	lr 0.02426
Train [14][590/3239]	Time 0.163 (0.524)	Data Time 0.001 (0.047)	Loss 3.5825 (3.5928)	Entropy 1.78102 (1.78216)	Top-1 acc 40.234 (39.149)	Top-5 acc 62.500 (63.194)	lr 0.02426
Train [14][600/3239]	Time 0.224 (0.523)	Data Time 0.001 (0.046)	Loss 3.6623 (3.5919)	Entropy 1.78086 (1.78214)	Top-1 acc 41.016 (39.194)	Top-5 acc 60.156 (63.206)	lr 0.02426
Train [14][610/3239]	Time 0.135 (0.521)	Data Time 0.001 (0.046)	Loss 3.5990 (3.5918)	Entropy 1.78081 (1.78212)	Top-1 acc 41.016 (39.206)	Top-5 acc 62.109 (63.198)	lr 0.02426
Train [14][620/3239]	Time 0.191 (0.519)	Data Time 0.001 (0.045)	Loss 3.7238 (3.5915)	Entropy 1.78077 (1.78210)	Top-1 acc 36.328 (39.206)	Top-5 acc 62.500 (63.220)	lr 0.02426
Train [14][630/3239]	Time 0.205 (0.518)	Data Time 0.001 (0.044)	Loss 3.4151 (3.5912)	Entropy 1.78065 (1.78208)	Top-1 acc 44.531 (39.211)	Top-5 acc 67.578 (63.225)	lr 0.02426
Train [14][640/3239]	Time 0.248 (0.517)	Data Time 0.001 (0.044)	Loss 3.6627 (3.5913)	Entropy 1.78055 (1.78205)	Top-1 acc 35.547 (39.198)	Top-5 acc 60.938 (63.231)	lr 0.02426
Train [14][650/3239]	Time 0.227 (0.516)	Data Time 0.001 (0.043)	Loss 3.4932 (3.5925)	Entropy 1.78053 (1.78203)	Top-1 acc 42.188 (39.176)	Top-5 acc 67.969 (63.219)	lr 0.02426
Train [14][660/3239]	Time 0.331 (0.515)	Data Time 0.001 (0.042)	Loss 3.3932 (3.5928)	Entropy 1.78049 (1.78201)	Top-1 acc 40.234 (39.174)	Top-5 acc 69.922 (63.220)	lr 0.02426
Train [14][670/3239]	Time 2.378 (0.514)	Data Time 0.002 (0.042)	Loss 3.6463 (3.5929)	Entropy 1.78049 (1.78199)	Top-1 acc 38.281 (39.170)	Top-5 acc 61.719 (63.228)	lr 0.02426
Train [14][680/3239]	Time 0.227 (0.510)	Data Time 0.001 (0.041)	Loss 3.5401 (3.5942)	Entropy 1.78047 (1.78196)	Top-1 acc 44.531 (39.144)	Top-5 acc 62.109 (63.197)	lr 0.02426
Train [14][690/3239]	Time 0.202 (0.509)	Data Time 0.001 (0.041)	Loss 3.3849 (3.5946)	Entropy 1.78043 (1.78194)	Top-1 acc 41.406 (39.129)	Top-5 acc 66.797 (63.178)	lr 0.02426
Train [14][700/3239]	Time 0.229 (0.507)	Data Time 0.001 (0.040)	Loss 3.5094 (3.5945)	Entropy 1.78036 (1.78192)	Top-1 acc 39.844 (39.142)	Top-5 acc 65.625 (63.178)	lr 0.02426
Train [14][710/3239]	Time 0.251 (0.557)	Data Time 0.002 (0.039)	Loss 3.5114 (3.5949)	Entropy 1.78032 (1.78190)	Top-1 acc 43.359 (39.138)	Top-5 acc 65.234 (63.159)	lr 0.02426
Train [14][720/3239]	Time 0.216 (0.557)	Data Time 0.001 (0.039)	Loss 3.6532 (3.5952)	Entropy 1.78028 (1.78187)	Top-1 acc 35.156 (39.115)	Top-5 acc 60.156 (63.147)	lr 0.02426
Train [14][730/3239]	Time 0.245 (0.555)	Data Time 0.001 (0.039)	Loss 3.3880 (3.5954)	Entropy 1.78026 (1.78185)	Top-1 acc 43.359 (39.100)	Top-5 acc 64.844 (63.134)	lr 0.02426
Train [14][740/3239]	Time 0.222 (0.553)	Data Time 0.001 (0.038)	Loss 3.5475 (3.5946)	Entropy 1.78020 (1.78183)	Top-1 acc 37.500 (39.104)	Top-5 acc 67.188 (63.154)	lr 0.02426
Train [14][750/3239]	Time 0.222 (0.552)	Data Time 0.001 (0.038)	Loss 3.6189 (3.5948)	Entropy 1.78010 (1.78181)	Top-1 acc 40.234 (39.104)	Top-5 acc 62.891 (63.156)	lr 0.02426
Train [14][760/3239]	Time 0.207 (0.550)	Data Time 0.001 (0.037)	Loss 3.5749 (3.5952)	Entropy 1.78006 (1.78179)	Top-1 acc 41.406 (39.104)	Top-5 acc 65.625 (63.143)	lr 0.02426
Train [14][770/3239]	Time 0.209 (0.548)	Data Time 0.001 (0.037)	Loss 3.4804 (3.5946)	Entropy 1.77999 (1.78176)	Top-1 acc 41.797 (39.121)	Top-5 acc 66.016 (63.155)	lr 0.02426
Train [14][780/3239]	Time 2.464 (0.547)	Data Time 0.001 (0.036)	Loss 3.5004 (3.5940)	Entropy 1.77999 (1.78174)	Top-1 acc 44.922 (39.140)	Top-5 acc 63.281 (63.155)	lr 0.02426
Train [14][790/3239]	Time 0.188 (0.543)	Data Time 0.001 (0.036)	Loss 3.6692 (3.5933)	Entropy 1.77994 (1.78172)	Top-1 acc 36.719 (39.146)	Top-5 acc 63.281 (63.171)	lr 0.02426
Train [14][800/3239]	Time 0.197 (0.541)	Data Time 0.001 (0.036)	Loss 3.4443 (3.5940)	Entropy 1.77986 (1.78169)	Top-1 acc 44.531 (39.138)	Top-5 acc 67.188 (63.158)	lr 0.02426
Train [14][810/3239]	Time 0.315 (0.540)	Data Time 0.001 (0.035)	Loss 3.6205 (3.5944)	Entropy 1.77980 (1.78167)	Top-1 acc 39.453 (39.150)	Top-5 acc 59.375 (63.151)	lr 0.02426
Train [14][820/3239]	Time 0.228 (0.539)	Data Time 0.002 (0.035)	Loss 3.8331 (3.5945)	Entropy 1.77976 (1.78165)	Top-1 acc 30.469 (39.141)	Top-5 acc 58.203 (63.138)	lr 0.02426
Train [14][830/3239]	Time 0.121 (0.537)	Data Time 0.001 (0.034)	Loss 3.9701 (3.5954)	Entropy 1.77976 (1.78163)	Top-1 acc 29.688 (39.129)	Top-5 acc 51.172 (63.123)	lr 0.02425
Train [14][840/3239]	Time 0.200 (0.536)	Data Time 0.001 (0.034)	Loss 3.4157 (3.5950)	Entropy 1.77972 (1.78160)	Top-1 acc 42.578 (39.141)	Top-5 acc 66.406 (63.144)	lr 0.02425
Train [14][850/3239]	Time 0.217 (0.534)	Data Time 0.001 (0.034)	Loss 3.5818 (3.5944)	Entropy 1.77967 (1.78158)	Top-1 acc 40.625 (39.157)	Top-5 acc 66.406 (63.163)	lr 0.02425
Train [14][860/3239]	Time 0.245 (0.533)	Data Time 0.001 (0.033)	Loss 3.6365 (3.5945)	Entropy 1.77964 (1.78156)	Top-1 acc 41.406 (39.166)	Top-5 acc 61.719 (63.155)	lr 0.02425
Train [14][870/3239]	Time 0.236 (0.532)	Data Time 0.001 (0.033)	Loss 3.5485 (3.5945)	Entropy 1.77959 (1.78154)	Top-1 acc 37.500 (39.167)	Top-5 acc 65.234 (63.148)	lr 0.02425
Train [14][880/3239]	Time 0.317 (0.531)	Data Time 0.001 (0.033)	Loss 3.5169 (3.5949)	Entropy 1.77947 (1.78151)	Top-1 acc 42.969 (39.152)	Top-5 acc 64.844 (63.146)	lr 0.02425
Train [14][890/3239]	Time 2.391 (0.530)	Data Time 0.001 (0.032)	Loss 3.5578 (3.5959)	Entropy 1.77947 (1.78149)	Top-1 acc 37.891 (39.130)	Top-5 acc 67.578 (63.133)	lr 0.02425
Train [14][900/3239]	Time 0.216 (0.527)	Data Time 0.001 (0.032)	Loss 3.6200 (3.5958)	Entropy 1.77928 (1.78147)	Top-1 acc 38.281 (39.141)	Top-5 acc 62.891 (63.135)	lr 0.02425
Train [14][910/3239]	Time 0.201 (0.526)	Data Time 0.001 (0.031)	Loss 3.7496 (3.5952)	Entropy 1.77927 (1.78144)	Top-1 acc 38.281 (39.151)	Top-5 acc 61.719 (63.136)	lr 0.02425
Train [14][920/3239]	Time 0.236 (0.525)	Data Time 0.001 (0.031)	Loss 3.6029 (3.5957)	Entropy 1.77921 (1.78142)	Top-1 acc 40.625 (39.138)	Top-5 acc 62.500 (63.127)	lr 0.02425
Train [14][930/3239]	Time 0.236 (0.523)	Data Time 0.001 (0.031)	Loss 3.6904 (3.5957)	Entropy 1.77919 (1.78139)	Top-1 acc 38.672 (39.140)	Top-5 acc 62.500 (63.138)	lr 0.02425
Train [14][940/3239]	Time 0.200 (0.522)	Data Time 0.001 (0.031)	Loss 3.4976 (3.5951)	Entropy 1.77920 (1.78137)	Top-1 acc 37.109 (39.148)	Top-5 acc 64.844 (63.148)	lr 0.02425
Train [14][950/3239]	Time 0.266 (0.521)	Data Time 0.001 (0.030)	Loss 3.6623 (3.5950)	Entropy 1.77919 (1.78135)	Top-1 acc 37.500 (39.147)	Top-5 acc 62.500 (63.153)	lr 0.02425
Train [14][960/3239]	Time 0.214 (0.521)	Data Time 0.001 (0.030)	Loss 3.5918 (3.5947)	Entropy 1.77917 (1.78133)	Top-1 acc 37.500 (39.151)	Top-5 acc 64.062 (63.159)	lr 0.02425
Train [14][970/3239]	Time 0.211 (0.520)	Data Time 0.001 (0.030)	Loss 3.5838 (3.5947)	Entropy 1.77918 (1.78130)	Top-1 acc 41.797 (39.149)	Top-5 acc 65.625 (63.162)	lr 0.02425
Train [14][980/3239]	Time 0.183 (0.519)	Data Time 0.001 (0.029)	Loss 3.6827 (3.5953)	Entropy 1.77910 (1.78128)	Top-1 acc 37.109 (39.138)	Top-5 acc 63.281 (63.151)	lr 0.02425
Train [14][990/3239]	Time 0.183 (0.517)	Data Time 0.001 (0.029)	Loss 3.3464 (3.5949)	Entropy 1.77903 (1.78126)	Top-1 acc 41.797 (39.151)	Top-5 acc 67.188 (63.156)	lr 0.02425
Train [14][1000/3239]	Time 2.360 (0.517)	Data Time 0.001 (0.029)	Loss 3.5649 (3.5950)	Entropy 1.77903 (1.78124)	Top-1 acc 37.891 (39.150)	Top-5 acc 66.406 (63.164)	lr 0.02425
Train [14][1010/3239]	Time 0.201 (0.514)	Data Time 0.001 (0.029)	Loss 3.5239 (3.5947)	Entropy 1.77893 (1.78121)	Top-1 acc 37.500 (39.164)	Top-5 acc 65.234 (63.177)	lr 0.02425
Train [14][1020/3239]	Time 0.284 (0.513)	Data Time 0.003 (0.028)	Loss 3.5508 (3.5947)	Entropy 1.77892 (1.78119)	Top-1 acc 38.281 (39.164)	Top-5 acc 62.891 (63.176)	lr 0.02425
Train [14][1030/3239]	Time 0.242 (0.512)	Data Time 0.001 (0.028)	Loss 3.6440 (3.5944)	Entropy 1.77891 (1.78117)	Top-1 acc 40.625 (39.171)	Top-5 acc 57.422 (63.174)	lr 0.02425
Train [14][1040/3239]	Time 0.169 (0.512)	Data Time 0.001 (0.028)	Loss 3.4637 (3.5950)	Entropy 1.77888 (1.78115)	Top-1 acc 40.625 (39.161)	Top-5 acc 66.016 (63.160)	lr 0.02425
Train [14][1050/3239]	Time 0.195 (0.511)	Data Time 0.001 (0.028)	Loss 3.6011 (3.5952)	Entropy 1.77881 (1.78113)	Top-1 acc 40.625 (39.148)	Top-5 acc 68.750 (63.154)	lr 0.02425
Train [14][1060/3239]	Time 0.197 (0.510)	Data Time 0.002 (0.027)	Loss 3.4740 (3.5949)	Entropy 1.77875 (1.78110)	Top-1 acc 42.188 (39.153)	Top-5 acc 69.141 (63.164)	lr 0.02425
Train [14][1070/3239]	Time 0.299 (0.547)	Data Time 0.003 (0.027)	Loss 3.7249 (3.5946)	Entropy 1.77872 (1.78108)	Top-1 acc 37.500 (39.155)	Top-5 acc 63.672 (63.174)	lr 0.02425
Train [14][1080/3239]	Time 0.235 (0.546)	Data Time 0.002 (0.027)	Loss 3.5005 (3.5943)	Entropy 1.77863 (1.78106)	Top-1 acc 40.234 (39.159)	Top-5 acc 68.359 (63.182)	lr 0.02425
Train [14][1090/3239]	Time 0.255 (0.545)	Data Time 0.002 (0.027)	Loss 3.5146 (3.5945)	Entropy 1.77861 (1.78104)	Top-1 acc 43.359 (39.149)	Top-5 acc 66.797 (63.184)	lr 0.02425
Train [14][1100/3239]	Time 0.245 (0.544)	Data Time 0.002 (0.026)	Loss 3.4542 (3.5941)	Entropy 1.77854 (1.78102)	Top-1 acc 42.969 (39.161)	Top-5 acc 66.016 (63.190)	lr 0.02425
Train [14][1110/3239]	Time 2.283 (0.543)	Data Time 0.001 (0.026)	Loss 3.6655 (3.5941)	Entropy 1.77854 (1.78099)	Top-1 acc 39.453 (39.167)	Top-5 acc 62.891 (63.186)	lr 0.02425
Train [14][1120/3239]	Time 0.215 (0.540)	Data Time 0.001 (0.026)	Loss 3.7556 (3.5940)	Entropy 1.77839 (1.78097)	Top-1 acc 38.281 (39.165)	Top-5 acc 57.812 (63.186)	lr 0.02424
Train [14][1130/3239]	Time 0.209 (0.539)	Data Time 0.002 (0.026)	Loss 3.5856 (3.5941)	Entropy 1.77838 (1.78095)	Top-1 acc 41.016 (39.164)	Top-5 acc 62.891 (63.184)	lr 0.02424
Train [14][1140/3239]	Time 0.193 (0.538)	Data Time 0.001 (0.026)	Loss 3.4145 (3.5939)	Entropy 1.77839 (1.78092)	Top-1 acc 43.750 (39.173)	Top-5 acc 66.406 (63.182)	lr 0.02424
Train [14][1150/3239]	Time 0.196 (0.537)	Data Time 0.001 (0.025)	Loss 3.4795 (3.5943)	Entropy 1.77831 (1.78090)	Top-1 acc 41.797 (39.163)	Top-5 acc 64.844 (63.168)	lr 0.02424
Train [14][1160/3239]	Time 0.362 (0.536)	Data Time 0.001 (0.025)	Loss 3.5108 (3.5942)	Entropy 1.77823 (1.78088)	Top-1 acc 42.188 (39.156)	Top-5 acc 65.234 (63.171)	lr 0.02424
Train [14][1170/3239]	Time 0.218 (0.536)	Data Time 0.001 (0.025)	Loss 3.4416 (3.5945)	Entropy 1.77818 (1.78086)	Top-1 acc 41.797 (39.143)	Top-5 acc 66.406 (63.169)	lr 0.02424
Train [14][1180/3239]	Time 0.208 (0.535)	Data Time 0.001 (0.025)	Loss 3.6930 (3.5944)	Entropy 1.77810 (1.78083)	Top-1 acc 37.109 (39.143)	Top-5 acc 61.719 (63.176)	lr 0.02424
Train [14][1190/3239]	Time 0.249 (0.534)	Data Time 0.001 (0.025)	Loss 3.4629 (3.5938)	Entropy 1.77810 (1.78081)	Top-1 acc 44.922 (39.151)	Top-5 acc 67.578 (63.191)	lr 0.02424
Train [14][1200/3239]	Time 0.214 (0.533)	Data Time 0.001 (0.024)	Loss 3.5358 (3.5938)	Entropy 1.77806 (1.78079)	Top-1 acc 39.062 (39.160)	Top-5 acc 66.016 (63.186)	lr 0.02424
Train [14][1210/3239]	Time 0.196 (0.532)	Data Time 0.001 (0.024)	Loss 3.5425 (3.5937)	Entropy 1.77801 (1.78077)	Top-1 acc 40.625 (39.159)	Top-5 acc 66.406 (63.187)	lr 0.02424
Train [14][1220/3239]	Time 2.246 (0.532)	Data Time 0.001 (0.024)	Loss 3.6985 (3.5940)	Entropy 1.77801 (1.78074)	Top-1 acc 39.062 (39.156)	Top-5 acc 59.375 (63.187)	lr 0.02424
Train [14][1230/3239]	Time 0.308 (0.529)	Data Time 0.001 (0.024)	Loss 3.7967 (3.5940)	Entropy 1.77795 (1.78072)	Top-1 acc 36.719 (39.161)	Top-5 acc 60.547 (63.190)	lr 0.02424
Train [14][1240/3239]	Time 0.209 (0.528)	Data Time 0.001 (0.024)	Loss 3.6165 (3.5945)	Entropy 1.77792 (1.78070)	Top-1 acc 40.234 (39.153)	Top-5 acc 63.672 (63.180)	lr 0.02424
Train [14][1250/3239]	Time 0.229 (0.528)	Data Time 0.001 (0.024)	Loss 3.5336 (3.5949)	Entropy 1.77790 (1.78068)	Top-1 acc 40.234 (39.143)	Top-5 acc 64.062 (63.170)	lr 0.02424
Train [14][1260/3239]	Time 0.254 (0.527)	Data Time 0.001 (0.023)	Loss 3.5089 (3.5947)	Entropy 1.77782 (1.78065)	Top-1 acc 39.062 (39.143)	Top-5 acc 64.453 (63.178)	lr 0.02424
Train [14][1270/3239]	Time 0.208 (0.526)	Data Time 0.002 (0.023)	Loss 3.4057 (3.5945)	Entropy 1.77776 (1.78063)	Top-1 acc 41.797 (39.141)	Top-5 acc 67.969 (63.179)	lr 0.02424
Train [14][1280/3239]	Time 0.207 (0.525)	Data Time 0.001 (0.023)	Loss 3.5084 (3.5946)	Entropy 1.77770 (1.78061)	Top-1 acc 41.406 (39.146)	Top-5 acc 64.844 (63.173)	lr 0.02424
Train [14][1290/3239]	Time 0.203 (0.524)	Data Time 0.001 (0.023)	Loss 3.5063 (3.5947)	Entropy 1.77766 (1.78058)	Top-1 acc 40.234 (39.137)	Top-5 acc 64.844 (63.171)	lr 0.02424
Train [14][1300/3239]	Time 0.210 (0.524)	Data Time 0.001 (0.023)	Loss 3.8609 (3.5950)	Entropy 1.77763 (1.78056)	Top-1 acc 34.766 (39.136)	Top-5 acc 60.547 (63.172)	lr 0.02424
Train [14][1310/3239]	Time 0.260 (0.523)	Data Time 0.002 (0.023)	Loss 3.5448 (3.5946)	Entropy 1.77762 (1.78054)	Top-1 acc 40.625 (39.137)	Top-5 acc 62.891 (63.180)	lr 0.02424
Train [14][1320/3239]	Time 0.224 (0.522)	Data Time 0.001 (0.022)	Loss 3.5807 (3.5942)	Entropy 1.77761 (1.78052)	Top-1 acc 43.359 (39.141)	Top-5 acc 61.719 (63.189)	lr 0.02424
Train [14][1330/3239]	Time 2.344 (0.521)	Data Time 0.001 (0.022)	Loss 3.5408 (3.5941)	Entropy 1.77761 (1.78050)	Top-1 acc 38.281 (39.133)	Top-5 acc 62.891 (63.188)	lr 0.02424
Train [14][1340/3239]	Time 0.230 (0.519)	Data Time 0.002 (0.022)	Loss 3.4703 (3.5943)	Entropy 1.77758 (1.78047)	Top-1 acc 42.969 (39.136)	Top-5 acc 67.188 (63.187)	lr 0.02424
Train [14][1350/3239]	Time 0.212 (0.518)	Data Time 0.001 (0.022)	Loss 3.4115 (3.5940)	Entropy 1.77754 (1.78045)	Top-1 acc 44.922 (39.147)	Top-5 acc 65.625 (63.196)	lr 0.02424
Train [14][1360/3239]	Time 0.249 (0.518)	Data Time 0.001 (0.022)	Loss 3.3786 (3.5936)	Entropy 1.77743 (1.78043)	Top-1 acc 44.531 (39.147)	Top-5 acc 69.531 (63.205)	lr 0.02424
Train [14][1370/3239]	Time 0.212 (0.517)	Data Time 0.001 (0.022)	Loss 3.3710 (3.5938)	Entropy 1.77742 (1.78041)	Top-1 acc 44.141 (39.148)	Top-5 acc 66.406 (63.203)	lr 0.02424
Train [14][1380/3239]	Time 0.305 (0.516)	Data Time 0.001 (0.021)	Loss 3.4962 (3.5940)	Entropy 1.77740 (1.78039)	Top-1 acc 39.844 (39.145)	Top-5 acc 66.406 (63.203)	lr 0.02424
Train [14][1390/3239]	Time 0.190 (0.516)	Data Time 0.001 (0.021)	Loss 3.6938 (3.5936)	Entropy 1.77733 (1.78037)	Top-1 acc 37.500 (39.155)	Top-5 acc 62.891 (63.216)	lr 0.02424
Train [14][1400/3239]	Time 0.216 (0.515)	Data Time 0.001 (0.021)	Loss 3.4892 (3.5936)	Entropy 1.77734 (1.78034)	Top-1 acc 40.625 (39.155)	Top-5 acc 63.672 (63.217)	lr 0.02424
Train [14][1410/3239]	Time 0.218 (0.515)	Data Time 0.002 (0.021)	Loss 3.5538 (3.5938)	Entropy 1.77731 (1.78032)	Top-1 acc 39.062 (39.151)	Top-5 acc 63.672 (63.213)	lr 0.02423
Train [14][1420/3239]	Time 0.187 (0.514)	Data Time 0.001 (0.021)	Loss 3.4592 (3.5942)	Entropy 1.77730 (1.78030)	Top-1 acc 39.453 (39.144)	Top-5 acc 67.188 (63.203)	lr 0.02423
Train [14][1430/3239]	Time 0.335 (0.540)	Data Time 0.004 (0.021)	Loss 3.6950 (3.5946)	Entropy 1.77722 (1.78028)	Top-1 acc 37.500 (39.131)	Top-5 acc 59.375 (63.196)	lr 0.02423
Train [14][1440/3239]	Time 2.400 (0.540)	Data Time 0.002 (0.021)	Loss 3.6638 (3.5948)	Entropy 1.77722 (1.78026)	Top-1 acc 39.062 (39.124)	Top-5 acc 59.375 (63.185)	lr 0.02423
Train [14][1450/3239]	Time 0.312 (0.538)	Data Time 0.001 (0.020)	Loss 3.4516 (3.5945)	Entropy 1.77714 (1.78024)	Top-1 acc 40.625 (39.127)	Top-5 acc 66.797 (63.194)	lr 0.02423
Train [14][1460/3239]	Time 0.217 (0.537)	Data Time 0.001 (0.020)	Loss 3.5743 (3.5946)	Entropy 1.77713 (1.78022)	Top-1 acc 41.016 (39.125)	Top-5 acc 65.625 (63.192)	lr 0.02423
Train [14][1470/3239]	Time 0.220 (0.536)	Data Time 0.001 (0.020)	Loss 3.5869 (3.5944)	Entropy 1.77710 (1.78019)	Top-1 acc 42.578 (39.131)	Top-5 acc 64.453 (63.196)	lr 0.02423
Train [14][1480/3239]	Time 0.198 (0.535)	Data Time 0.001 (0.020)	Loss 3.6254 (3.5937)	Entropy 1.77711 (1.78017)	Top-1 acc 39.062 (39.149)	Top-5 acc 64.844 (63.212)	lr 0.02423
Train [14][1490/3239]	Time 0.201 (0.535)	Data Time 0.001 (0.020)	Loss 3.5533 (3.5938)	Entropy 1.77706 (1.78015)	Top-1 acc 37.500 (39.148)	Top-5 acc 63.281 (63.209)	lr 0.02423
Train [14][1500/3239]	Time 0.202 (0.534)	Data Time 0.001 (0.020)	Loss 3.5658 (3.5940)	Entropy 1.77699 (1.78013)	Top-1 acc 41.016 (39.141)	Top-5 acc 62.109 (63.203)	lr 0.02423
Train [14][1510/3239]	Time 0.232 (0.533)	Data Time 0.001 (0.020)	Loss 3.4688 (3.5937)	Entropy 1.77690 (1.78011)	Top-1 acc 41.406 (39.149)	Top-5 acc 64.062 (63.210)	lr 0.02423
Train [14][1520/3239]	Time 0.289 (0.532)	Data Time 0.001 (0.020)	Loss 3.6461 (3.5937)	Entropy 1.77689 (1.78009)	Top-1 acc 37.109 (39.139)	Top-5 acc 62.109 (63.211)	lr 0.02423
Train [14][1530/3239]	Time 0.259 (0.532)	Data Time 0.001 (0.020)	Loss 3.6835 (3.5940)	Entropy 1.77686 (1.78007)	Top-1 acc 38.281 (39.135)	Top-5 acc 60.547 (63.198)	lr 0.02423
Train [14][1540/3239]	Time 0.133 (0.531)	Data Time 0.001 (0.019)	Loss 3.7730 (3.5942)	Entropy 1.77684 (1.78005)	Top-1 acc 33.203 (39.127)	Top-5 acc 59.375 (63.191)	lr 0.02423
Train [14][1550/3239]	Time 2.192 (0.530)	Data Time 0.001 (0.019)	Loss 3.8163 (3.5945)	Entropy 1.77684 (1.78003)	Top-1 acc 37.109 (39.135)	Top-5 acc 58.203 (63.187)	lr 0.02423
Train [14][1560/3239]	Time 0.201 (0.528)	Data Time 0.001 (0.019)	Loss 3.6149 (3.5949)	Entropy 1.77682 (1.78001)	Top-1 acc 36.328 (39.132)	Top-5 acc 63.281 (63.177)	lr 0.02423
Train [14][1570/3239]	Time 0.234 (0.528)	Data Time 0.001 (0.019)	Loss 3.6837 (3.5952)	Entropy 1.77680 (1.77999)	Top-1 acc 34.766 (39.124)	Top-5 acc 61.328 (63.168)	lr 0.02423
Train [14][1580/3239]	Time 0.208 (0.527)	Data Time 0.001 (0.019)	Loss 3.6378 (3.5956)	Entropy 1.77682 (1.77997)	Top-1 acc 33.984 (39.114)	Top-5 acc 58.984 (63.161)	lr 0.02423
Train [14][1590/3239]	Time 0.221 (0.526)	Data Time 0.002 (0.019)	Loss 3.7829 (3.5955)	Entropy 1.77675 (1.77995)	Top-1 acc 35.938 (39.118)	Top-5 acc 60.547 (63.169)	lr 0.02423
Train [14][1600/3239]	Time 0.301 (0.526)	Data Time 0.001 (0.019)	Loss 3.6960 (3.5951)	Entropy 1.77674 (1.77993)	Top-1 acc 34.375 (39.121)	Top-5 acc 64.062 (63.179)	lr 0.02423
Train [14][1610/3239]	Time 0.210 (0.525)	Data Time 0.001 (0.019)	Loss 3.5675 (3.5948)	Entropy 1.77671 (1.77991)	Top-1 acc 37.500 (39.126)	Top-5 acc 64.453 (63.186)	lr 0.02423
Train [14][1620/3239]	Time 0.220 (0.525)	Data Time 0.001 (0.019)	Loss 3.6000 (3.5946)	Entropy 1.77667 (1.77989)	Top-1 acc 38.281 (39.136)	Top-5 acc 60.156 (63.195)	lr 0.02423
Train [14][1630/3239]	Time 0.239 (0.524)	Data Time 0.001 (0.018)	Loss 3.6577 (3.5946)	Entropy 1.77666 (1.77987)	Top-1 acc 35.547 (39.137)	Top-5 acc 61.719 (63.188)	lr 0.02423
Train [14][1640/3239]	Time 0.203 (0.523)	Data Time 0.002 (0.018)	Loss 3.7638 (3.5948)	Entropy 1.77665 (1.77985)	Top-1 acc 36.719 (39.130)	Top-5 acc 58.203 (63.181)	lr 0.02423
Train [14][1650/3239]	Time 0.261 (0.523)	Data Time 0.001 (0.018)	Loss 3.5134 (3.5947)	Entropy 1.77656 (1.77983)	Top-1 acc 39.844 (39.128)	Top-5 acc 66.797 (63.183)	lr 0.02423
Train [14][1660/3239]	Time 2.320 (0.522)	Data Time 0.002 (0.018)	Loss 3.4731 (3.5944)	Entropy 1.77656 (1.77981)	Top-1 acc 42.578 (39.140)	Top-5 acc 66.797 (63.191)	lr 0.02423
Train [14][1670/3239]	Time 0.192 (0.520)	Data Time 0.001 (0.018)	Loss 3.7320 (3.5945)	Entropy 1.77653 (1.77979)	Top-1 acc 40.234 (39.145)	Top-5 acc 58.594 (63.190)	lr 0.02423
Train [14][1680/3239]	Time 0.327 (0.520)	Data Time 0.001 (0.018)	Loss 3.6874 (3.5949)	Entropy 1.77653 (1.77977)	Top-1 acc 39.844 (39.144)	Top-5 acc 59.766 (63.179)	lr 0.02423
Train [14][1690/3239]	Time 0.201 (0.519)	Data Time 0.001 (0.018)	Loss 3.5289 (3.5947)	Entropy 1.77649 (1.77975)	Top-1 acc 41.016 (39.148)	Top-5 acc 63.672 (63.185)	lr 0.02422
Train [14][1700/3239]	Time 0.222 (0.519)	Data Time 0.001 (0.018)	Loss 3.5280 (3.5948)	Entropy 1.77644 (1.77973)	Top-1 acc 42.578 (39.141)	Top-5 acc 64.062 (63.186)	lr 0.02422
Train [14][1710/3239]	Time 0.211 (0.518)	Data Time 0.001 (0.018)	Loss 3.6371 (3.5950)	Entropy 1.77644 (1.77971)	Top-1 acc 37.891 (39.133)	Top-5 acc 61.328 (63.180)	lr 0.02422
Train [14][1720/3239]	Time 0.223 (0.518)	Data Time 0.001 (0.018)	Loss 3.4926 (3.5948)	Entropy 1.77640 (1.77969)	Top-1 acc 41.406 (39.140)	Top-5 acc 65.234 (63.188)	lr 0.02422
Train [14][1730/3239]	Time 0.218 (0.517)	Data Time 0.001 (0.018)	Loss 3.5060 (3.5946)	Entropy 1.77630 (1.77967)	Top-1 acc 42.188 (39.141)	Top-5 acc 62.500 (63.189)	lr 0.02422
Train [14][1740/3239]	Time 0.208 (0.517)	Data Time 0.002 (0.017)	Loss 3.6355 (3.5945)	Entropy 1.77617 (1.77965)	Top-1 acc 38.281 (39.145)	Top-5 acc 61.719 (63.188)	lr 0.02422
Train [14][1750/3239]	Time 0.207 (0.516)	Data Time 0.001 (0.017)	Loss 3.6163 (3.5941)	Entropy 1.77615 (1.77963)	Top-1 acc 38.281 (39.154)	Top-5 acc 63.672 (63.195)	lr 0.02422
Train [14][1760/3239]	Time 0.238 (0.516)	Data Time 0.001 (0.017)	Loss 3.5608 (3.5942)	Entropy 1.77608 (1.77961)	Top-1 acc 37.891 (39.155)	Top-5 acc 62.500 (63.188)	lr 0.02422
Train [14][1770/3239]	Time 2.329 (0.515)	Data Time 0.001 (0.017)	Loss 3.5995 (3.5945)	Entropy 1.77608 (1.77959)	Top-1 acc 36.719 (39.146)	Top-5 acc 61.328 (63.180)	lr 0.02422
Train [14][1780/3239]	Time 0.247 (0.514)	Data Time 0.001 (0.017)	Loss 3.4927 (3.5944)	Entropy 1.77593 (1.77957)	Top-1 acc 38.672 (39.139)	Top-5 acc 62.109 (63.179)	lr 0.02422
Train [14][1790/3239]	Time 0.229 (0.513)	Data Time 0.001 (0.017)	Loss 3.6804 (3.5948)	Entropy 1.77597 (1.77955)	Top-1 acc 34.766 (39.128)	Top-5 acc 61.328 (63.176)	lr 0.02422
Train [14][1800/3239]	Time 0.242 (0.536)	Data Time 0.002 (0.017)	Loss 3.4054 (3.5949)	Entropy 1.77592 (1.77953)	Top-1 acc 39.062 (39.123)	Top-5 acc 68.359 (63.175)	lr 0.02422
Train [14][1810/3239]	Time 0.180 (0.535)	Data Time 0.002 (0.017)	Loss 3.6634 (3.5945)	Entropy 1.77589 (1.77951)	Top-1 acc 37.891 (39.127)	Top-5 acc 60.938 (63.182)	lr 0.02422
Train [14][1820/3239]	Time 0.188 (0.535)	Data Time 0.001 (0.017)	Loss 3.5829 (3.5946)	Entropy 1.77588 (1.77949)	Top-1 acc 41.016 (39.128)	Top-5 acc 63.281 (63.180)	lr 0.02422
Train [14][1830/3239]	Time 0.234 (0.534)	Data Time 0.001 (0.017)	Loss 3.4969 (3.5944)	Entropy 1.77587 (1.77947)	Top-1 acc 39.844 (39.136)	Top-5 acc 63.281 (63.179)	lr 0.02422
Train [14][1840/3239]	Time 0.204 (0.533)	Data Time 0.001 (0.017)	Loss 3.5748 (3.5943)	Entropy 1.77583 (1.77945)	Top-1 acc 37.500 (39.136)	Top-5 acc 64.453 (63.188)	lr 0.02422
Train [14][1850/3239]	Time 0.229 (0.533)	Data Time 0.001 (0.017)	Loss 3.8276 (3.5940)	Entropy 1.77582 (1.77943)	Top-1 acc 34.766 (39.132)	Top-5 acc 62.109 (63.193)	lr 0.02422
Train [14][1860/3239]	Time 0.198 (0.532)	Data Time 0.001 (0.016)	Loss 3.3850 (3.5936)	Entropy 1.77559 (1.77941)	Top-1 acc 45.703 (39.143)	Top-5 acc 67.188 (63.204)	lr 0.02422
Train [14][1870/3239]	Time 0.239 (0.532)	Data Time 0.001 (0.016)	Loss 3.5095 (3.5935)	Entropy 1.77554 (1.77939)	Top-1 acc 41.797 (39.146)	Top-5 acc 64.453 (63.208)	lr 0.02422
Train [14][1880/3239]	Time 2.285 (0.531)	Data Time 0.001 (0.016)	Loss 3.4878 (3.5934)	Entropy 1.77554 (1.77937)	Top-1 acc 38.672 (39.149)	Top-5 acc 67.578 (63.205)	lr 0.02422
Train [14][1890/3239]	Time 0.269 (0.529)	Data Time 0.002 (0.016)	Loss 3.6874 (3.5935)	Entropy 1.77545 (1.77935)	Top-1 acc 37.500 (39.144)	Top-5 acc 61.719 (63.205)	lr 0.02422
Train [14][1900/3239]	Time 0.238 (0.529)	Data Time 0.001 (0.016)	Loss 3.5221 (3.5933)	Entropy 1.77543 (1.77933)	Top-1 acc 36.719 (39.144)	Top-5 acc 65.234 (63.211)	lr 0.02422
Train [14][1910/3239]	Time 0.283 (0.528)	Data Time 0.001 (0.016)	Loss 3.6144 (3.5934)	Entropy 1.77540 (1.77931)	Top-1 acc 41.016 (39.143)	Top-5 acc 62.891 (63.211)	lr 0.02422
Train [14][1920/3239]	Time 0.237 (0.528)	Data Time 0.001 (0.016)	Loss 3.6399 (3.5937)	Entropy 1.77536 (1.77929)	Top-1 acc 38.281 (39.135)	Top-5 acc 63.672 (63.201)	lr 0.02422
Train [14][1930/3239]	Time 0.227 (0.527)	Data Time 0.001 (0.016)	Loss 3.3790 (3.5934)	Entropy 1.77536 (1.77927)	Top-1 acc 44.922 (39.143)	Top-5 acc 68.750 (63.210)	lr 0.02422
Train [14][1940/3239]	Time 0.214 (0.527)	Data Time 0.001 (0.016)	Loss 3.4809 (3.5934)	Entropy 1.77528 (1.77925)	Top-1 acc 44.141 (39.140)	Top-5 acc 66.406 (63.211)	lr 0.02422
Train [14][1950/3239]	Time 0.214 (0.527)	Data Time 0.001 (0.016)	Loss 3.4519 (3.5930)	Entropy 1.77518 (1.77923)	Top-1 acc 45.312 (39.150)	Top-5 acc 68.359 (63.221)	lr 0.02422
Train [14][1960/3239]	Time 0.204 (0.526)	Data Time 0.001 (0.016)	Loss 3.4032 (3.5926)	Entropy 1.77514 (1.77921)	Top-1 acc 39.062 (39.162)	Top-5 acc 67.578 (63.228)	lr 0.02422
Train [14][1970/3239]	Time 0.211 (0.526)	Data Time 0.001 (0.016)	Loss 3.6674 (3.5928)	Entropy 1.77514 (1.77919)	Top-1 acc 37.500 (39.158)	Top-5 acc 62.500 (63.225)	lr 0.02422
Train [14][1980/3239]	Time 0.416 (0.525)	Data Time 0.001 (0.016)	Loss 3.4276 (3.5927)	Entropy 1.77508 (1.77917)	Top-1 acc 42.188 (39.159)	Top-5 acc 65.625 (63.224)	lr 0.02421
Train [14][1990/3239]	Time 2.231 (0.525)	Data Time 0.001 (0.016)	Loss 3.7289 (3.5924)	Entropy 1.77508 (1.77915)	Top-1 acc 39.453 (39.165)	Top-5 acc 60.156 (63.236)	lr 0.02421
Train [14][2000/3239]	Time 0.213 (0.523)	Data Time 0.001 (0.015)	Loss 3.6745 (3.5926)	Entropy 1.77507 (1.77913)	Top-1 acc 37.109 (39.162)	Top-5 acc 62.500 (63.225)	lr 0.02421
Train [14][2010/3239]	Time 0.191 (0.523)	Data Time 0.001 (0.015)	Loss 3.5551 (3.5926)	Entropy 1.77506 (1.77911)	Top-1 acc 39.453 (39.164)	Top-5 acc 63.281 (63.226)	lr 0.02421
Train [14][2020/3239]	Time 0.203 (0.522)	Data Time 0.001 (0.015)	Loss 3.5724 (3.5925)	Entropy 1.77504 (1.77909)	Top-1 acc 35.547 (39.166)	Top-5 acc 62.500 (63.227)	lr 0.02421
Train [14][2030/3239]	Time 0.213 (0.522)	Data Time 0.002 (0.015)	Loss 3.5691 (3.5926)	Entropy 1.77504 (1.77907)	Top-1 acc 39.062 (39.159)	Top-5 acc 60.938 (63.225)	lr 0.02421
Train [14][2040/3239]	Time 0.202 (0.521)	Data Time 0.001 (0.015)	Loss 3.4771 (3.5925)	Entropy 1.77500 (1.77905)	Top-1 acc 42.188 (39.162)	Top-5 acc 64.844 (63.230)	lr 0.02421
Train [14][2050/3239]	Time 0.246 (0.521)	Data Time 0.001 (0.015)	Loss 3.5185 (3.5924)	Entropy 1.77495 (1.77903)	Top-1 acc 35.938 (39.162)	Top-5 acc 62.891 (63.229)	lr 0.02421
Train [14][2060/3239]	Time 0.308 (0.520)	Data Time 0.001 (0.015)	Loss 3.6262 (3.5925)	Entropy 1.77495 (1.77901)	Top-1 acc 38.672 (39.155)	Top-5 acc 62.891 (63.228)	lr 0.02421
Train [14][2070/3239]	Time 0.188 (0.520)	Data Time 0.001 (0.015)	Loss 3.7062 (3.5927)	Entropy 1.77491 (1.77899)	Top-1 acc 37.500 (39.152)	Top-5 acc 62.109 (63.225)	lr 0.02421
Train [14][2080/3239]	Time 0.197 (0.520)	Data Time 0.001 (0.015)	Loss 3.6276 (3.5925)	Entropy 1.77490 (1.77897)	Top-1 acc 43.359 (39.157)	Top-5 acc 66.016 (63.228)	lr 0.02421
Train [14][2090/3239]	Time 0.221 (0.519)	Data Time 0.001 (0.015)	Loss 3.6503 (3.5927)	Entropy 1.77488 (1.77895)	Top-1 acc 36.328 (39.157)	Top-5 acc 64.062 (63.227)	lr 0.02421
Train [14][2100/3239]	Time 2.375 (0.519)	Data Time 0.001 (0.015)	Loss 3.5726 (3.5930)	Entropy 1.77488 (1.77893)	Top-1 acc 37.500 (39.151)	Top-5 acc 64.062 (63.223)	lr 0.02421
Train [14][2110/3239]	Time 0.213 (0.517)	Data Time 0.001 (0.015)	Loss 3.6657 (3.5928)	Entropy 1.77485 (1.77891)	Top-1 acc 37.109 (39.154)	Top-5 acc 61.328 (63.227)	lr 0.02421
Train [14][2120/3239]	Time 0.202 (0.517)	Data Time 0.001 (0.015)	Loss 3.7064 (3.5929)	Entropy 1.77484 (1.77889)	Top-1 acc 34.375 (39.156)	Top-5 acc 60.938 (63.223)	lr 0.02421
Train [14][2130/3239]	Time 0.307 (0.516)	Data Time 0.001 (0.015)	Loss 3.5143 (3.5931)	Entropy 1.77471 (1.77887)	Top-1 acc 41.797 (39.156)	Top-5 acc 64.062 (63.224)	lr 0.02421
Train [14][2140/3239]	Time 0.221 (0.516)	Data Time 0.001 (0.015)	Loss 3.9020 (3.5931)	Entropy 1.77457 (1.77885)	Top-1 acc 33.984 (39.157)	Top-5 acc 54.688 (63.217)	lr 0.02421
Train [14][2150/3239]	Time 0.208 (0.516)	Data Time 0.002 (0.015)	Loss 3.6936 (3.5933)	Entropy 1.77451 (1.77883)	Top-1 acc 35.547 (39.156)	Top-5 acc 60.938 (63.214)	lr 0.02421
Train [14][2160/3239]	Time 0.421 (0.533)	Data Time 0.002 (0.014)	Loss 3.4597 (3.5930)	Entropy 1.77446 (1.77881)	Top-1 acc 40.234 (39.160)	Top-5 acc 65.625 (63.221)	lr 0.02421
Train [14][2170/3239]	Time 0.209 (0.532)	Data Time 0.002 (0.014)	Loss 3.5246 (3.5931)	Entropy 1.77437 (1.77879)	Top-1 acc 37.891 (39.159)	Top-5 acc 66.406 (63.220)	lr 0.02421
Train [14][2180/3239]	Time 0.190 (0.532)	Data Time 0.001 (0.014)	Loss 3.7148 (3.5932)	Entropy 1.77430 (1.77877)	Top-1 acc 33.594 (39.161)	Top-5 acc 60.547 (63.219)	lr 0.02421
Train [14][2190/3239]	Time 0.200 (0.532)	Data Time 0.002 (0.014)	Loss 3.6611 (3.5930)	Entropy 1.77430 (1.77875)	Top-1 acc 37.500 (39.164)	Top-5 acc 64.062 (63.222)	lr 0.02421
Train [14][2200/3239]	Time 0.232 (0.531)	Data Time 0.001 (0.014)	Loss 3.7200 (3.5934)	Entropy 1.77426 (1.77873)	Top-1 acc 38.672 (39.163)	Top-5 acc 63.281 (63.216)	lr 0.02421
Train [14][2210/3239]	Time 2.283 (0.531)	Data Time 0.001 (0.014)	Loss 3.5657 (3.5932)	Entropy 1.77426 (1.77871)	Top-1 acc 42.578 (39.174)	Top-5 acc 64.062 (63.219)	lr 0.02421
Train [14][2220/3239]	Time 0.151 (0.529)	Data Time 0.001 (0.014)	Loss 3.8759 (3.5933)	Entropy 1.77419 (1.77869)	Top-1 acc 34.375 (39.174)	Top-5 acc 57.031 (63.215)	lr 0.02421
Train [14][2230/3239]	Time 0.190 (0.529)	Data Time 0.001 (0.014)	Loss 4.1563 (3.5934)	Entropy 1.77422 (1.77867)	Top-1 acc 30.469 (39.165)	Top-5 acc 51.172 (63.211)	lr 0.02421
Train [14][2240/3239]	Time 0.204 (0.528)	Data Time 0.001 (0.014)	Loss 3.4863 (3.5936)	Entropy 1.77414 (1.77865)	Top-1 acc 39.844 (39.164)	Top-5 acc 64.453 (63.204)	lr 0.02421
Train [14][2250/3239]	Time 0.237 (0.528)	Data Time 0.001 (0.014)	Loss 3.6314 (3.5937)	Entropy 1.77414 (1.77863)	Top-1 acc 36.719 (39.168)	Top-5 acc 62.500 (63.202)	lr 0.02421
Train [14][2260/3239]	Time 0.198 (0.528)	Data Time 0.001 (0.014)	Loss 3.5148 (3.5939)	Entropy 1.77412 (1.77861)	Top-1 acc 42.969 (39.163)	Top-5 acc 61.328 (63.199)	lr 0.02420
Train [14][2270/3239]	Time 0.217 (0.527)	Data Time 0.001 (0.014)	Loss 3.4303 (3.5939)	Entropy 1.77405 (1.77859)	Top-1 acc 42.969 (39.158)	Top-5 acc 67.578 (63.198)	lr 0.02420
Train [14][2280/3239]	Time 0.216 (0.527)	Data Time 0.001 (0.014)	Loss 3.4753 (3.5943)	Entropy 1.77404 (1.77857)	Top-1 acc 42.188 (39.151)	Top-5 acc 65.234 (63.190)	lr 0.02420
Train [14][2290/3239]	Time 0.200 (0.526)	Data Time 0.001 (0.014)	Loss 3.4221 (3.5941)	Entropy 1.77396 (1.77855)	Top-1 acc 44.531 (39.161)	Top-5 acc 66.797 (63.191)	lr 0.02420
Train [14][2300/3239]	Time 0.226 (0.526)	Data Time 0.001 (0.014)	Loss 3.5762 (3.5941)	Entropy 1.77389 (1.77853)	Top-1 acc 34.375 (39.159)	Top-5 acc 62.891 (63.191)	lr 0.02420
Train [14][2310/3239]	Time 0.249 (0.525)	Data Time 0.001 (0.014)	Loss 3.5857 (3.5940)	Entropy 1.77390 (1.77851)	Top-1 acc 41.406 (39.156)	Top-5 acc 60.547 (63.194)	lr 0.02420
Train [14][2320/3239]	Time 2.274 (0.525)	Data Time 0.001 (0.014)	Loss 3.5630 (3.5938)	Entropy 1.77390 (1.77849)	Top-1 acc 38.672 (39.157)	Top-5 acc 63.281 (63.198)	lr 0.02420
Train [14][2330/3239]	Time 0.220 (0.524)	Data Time 0.001 (0.014)	Loss 3.6325 (3.5938)	Entropy 1.77386 (1.77847)	Top-1 acc 38.672 (39.155)	Top-5 acc 62.891 (63.196)	lr 0.02420
Train [14][2340/3239]	Time 0.215 (0.523)	Data Time 0.001 (0.014)	Loss 3.4230 (3.5937)	Entropy 1.77384 (1.77845)	Top-1 acc 45.703 (39.168)	Top-5 acc 66.016 (63.198)	lr 0.02420
Train [14][2350/3239]	Time 0.197 (0.523)	Data Time 0.001 (0.013)	Loss 3.8066 (3.5941)	Entropy 1.77382 (1.77843)	Top-1 acc 36.328 (39.161)	Top-5 acc 59.375 (63.188)	lr 0.02420
Train [14][2360/3239]	Time 0.366 (0.522)	Data Time 0.001 (0.013)	Loss 3.6160 (3.5939)	Entropy 1.77380 (1.77841)	Top-1 acc 39.453 (39.165)	Top-5 acc 62.891 (63.191)	lr 0.02420
Train [14][2370/3239]	Time 0.214 (0.522)	Data Time 0.001 (0.013)	Loss 3.3116 (3.5935)	Entropy 1.77374 (1.77839)	Top-1 acc 44.141 (39.172)	Top-5 acc 70.312 (63.196)	lr 0.02420
Train [14][2380/3239]	Time 0.196 (0.522)	Data Time 0.001 (0.013)	Loss 3.6258 (3.5937)	Entropy 1.77372 (1.77837)	Top-1 acc 35.547 (39.172)	Top-5 acc 64.844 (63.192)	lr 0.02420
Train [14][2390/3239]	Time 0.192 (0.521)	Data Time 0.001 (0.013)	Loss 3.5824 (3.5938)	Entropy 1.77371 (1.77835)	Top-1 acc 39.062 (39.170)	Top-5 acc 62.500 (63.191)	lr 0.02420
Train [14][2400/3239]	Time 0.215 (0.521)	Data Time 0.001 (0.013)	Loss 3.4213 (3.5936)	Entropy 1.77365 (1.77833)	Top-1 acc 44.531 (39.177)	Top-5 acc 68.750 (63.195)	lr 0.02420
Train [14][2410/3239]	Time 0.220 (0.520)	Data Time 0.001 (0.013)	Loss 3.4544 (3.5932)	Entropy 1.77358 (1.77831)	Top-1 acc 42.969 (39.187)	Top-5 acc 65.234 (63.204)	lr 0.02420
Train [14][2420/3239]	Time 0.208 (0.520)	Data Time 0.001 (0.013)	Loss 3.6302 (3.5932)	Entropy 1.77353 (1.77829)	Top-1 acc 37.109 (39.187)	Top-5 acc 62.500 (63.202)	lr 0.02420
Train [14][2430/3239]	Time 2.326 (0.520)	Data Time 0.001 (0.013)	Loss 3.5107 (3.5933)	Entropy 1.77353 (1.77827)	Top-1 acc 39.453 (39.184)	Top-5 acc 62.109 (63.200)	lr 0.02420
Train [14][2440/3239]	Time 0.311 (0.518)	Data Time 0.001 (0.013)	Loss 3.5796 (3.5933)	Entropy 1.77349 (1.77825)	Top-1 acc 38.281 (39.185)	Top-5 acc 63.281 (63.201)	lr 0.02420
Train [14][2450/3239]	Time 0.141 (0.518)	Data Time 0.001 (0.013)	Loss 3.7180 (3.5934)	Entropy 1.77344 (1.77823)	Top-1 acc 38.672 (39.182)	Top-5 acc 62.500 (63.198)	lr 0.02420
Train [14][2460/3239]	Time 0.215 (0.518)	Data Time 0.001 (0.013)	Loss 3.5631 (3.5931)	Entropy 1.77342 (1.77822)	Top-1 acc 35.547 (39.187)	Top-5 acc 63.672 (63.202)	lr 0.02420
Train [14][2470/3239]	Time 0.239 (0.517)	Data Time 0.001 (0.013)	Loss 3.7199 (3.5933)	Entropy 1.77343 (1.77820)	Top-1 acc 38.672 (39.187)	Top-5 acc 61.328 (63.199)	lr 0.02420
Train [14][2480/3239]	Time 0.189 (0.517)	Data Time 0.001 (0.013)	Loss 3.5582 (3.5933)	Entropy 1.77331 (1.77818)	Top-1 acc 42.969 (39.188)	Top-5 acc 63.672 (63.200)	lr 0.02420
Train [14][2490/3239]	Time 0.151 (0.517)	Data Time 0.001 (0.013)	Loss 3.7126 (3.5934)	Entropy 1.77330 (1.77816)	Top-1 acc 38.672 (39.183)	Top-5 acc 61.328 (63.197)	lr 0.02420
Train [14][2500/3239]	Time 0.225 (0.516)	Data Time 0.001 (0.013)	Loss 3.7423 (3.5934)	Entropy 1.77325 (1.77814)	Top-1 acc 37.500 (39.187)	Top-5 acc 59.375 (63.196)	lr 0.02420
Train [14][2510/3239]	Time 0.293 (0.516)	Data Time 0.001 (0.013)	Loss 3.5241 (3.5934)	Entropy 1.77322 (1.77812)	Top-1 acc 40.625 (39.189)	Top-5 acc 65.625 (63.198)	lr 0.02420
Train [14][2520/3239]	Time 0.210 (0.532)	Data Time 0.002 (0.013)	Loss 3.4879 (3.5934)	Entropy 1.77316 (1.77810)	Top-1 acc 44.531 (39.189)	Top-5 acc 67.188 (63.202)	lr 0.02420
Train [14][2530/3239]	Time 0.252 (0.531)	Data Time 0.002 (0.013)	Loss 3.4601 (3.5935)	Entropy 1.77305 (1.77808)	Top-1 acc 40.234 (39.189)	Top-5 acc 67.188 (63.200)	lr 0.02420
Train [14][2540/3239]	Time 2.300 (0.531)	Data Time 0.001 (0.013)	Loss 3.5552 (3.5934)	Entropy 1.77305 (1.77806)	Top-1 acc 40.625 (39.193)	Top-5 acc 63.281 (63.200)	lr 0.02419
Train [14][2550/3239]	Time 0.238 (0.530)	Data Time 0.002 (0.013)	Loss 3.4468 (3.5936)	Entropy 1.77302 (1.77804)	Top-1 acc 43.359 (39.189)	Top-5 acc 66.016 (63.193)	lr 0.02419
Train [14][2560/3239]	Time 0.267 (0.529)	Data Time 0.001 (0.013)	Loss 3.3996 (3.5935)	Entropy 1.77292 (1.77802)	Top-1 acc 44.531 (39.190)	Top-5 acc 64.453 (63.196)	lr 0.02419
Train [14][2570/3239]	Time 0.259 (0.529)	Data Time 0.001 (0.012)	Loss 3.5450 (3.5937)	Entropy 1.77292 (1.77800)	Top-1 acc 40.234 (39.186)	Top-5 acc 62.109 (63.193)	lr 0.02419
Train [14][2580/3239]	Time 0.318 (0.528)	Data Time 0.001 (0.012)	Loss 3.4215 (3.5935)	Entropy 1.77289 (1.77798)	Top-1 acc 43.750 (39.189)	Top-5 acc 67.969 (63.198)	lr 0.02419
Train [14][2590/3239]	Time 0.274 (0.528)	Data Time 0.002 (0.012)	Loss 3.5832 (3.5935)	Entropy 1.77280 (1.77796)	Top-1 acc 38.281 (39.187)	Top-5 acc 62.500 (63.194)	lr 0.02419
Train [14][2600/3239]	Time 0.149 (0.528)	Data Time 0.001 (0.012)	Loss 3.5747 (3.5935)	Entropy 1.77274 (1.77794)	Top-1 acc 40.234 (39.185)	Top-5 acc 63.672 (63.194)	lr 0.02419
Train [14][2610/3239]	Time 0.205 (0.527)	Data Time 0.001 (0.012)	Loss 3.5655 (3.5933)	Entropy 1.77267 (1.77792)	Top-1 acc 39.453 (39.190)	Top-5 acc 61.328 (63.199)	lr 0.02419
Train [14][2620/3239]	Time 0.229 (0.527)	Data Time 0.001 (0.012)	Loss 3.5986 (3.5933)	Entropy 1.77250 (1.77790)	Top-1 acc 37.891 (39.197)	Top-5 acc 62.109 (63.199)	lr 0.02419
Train [14][2630/3239]	Time 0.240 (0.527)	Data Time 0.001 (0.012)	Loss 3.7365 (3.5934)	Entropy 1.77249 (1.77788)	Top-1 acc 37.891 (39.197)	Top-5 acc 57.812 (63.192)	lr 0.02419
Train [14][2640/3239]	Time 0.200 (0.526)	Data Time 0.001 (0.012)	Loss 3.4509 (3.5932)	Entropy 1.77243 (1.77786)	Top-1 acc 44.922 (39.201)	Top-5 acc 68.359 (63.195)	lr 0.02419
Train [14][2650/3239]	Time 0.224 (0.526)	Data Time 0.001 (0.012)	Loss 3.5874 (3.5930)	Entropy 1.77238 (1.77784)	Top-1 acc 40.234 (39.207)	Top-5 acc 60.938 (63.201)	lr 0.02419
Train [14][2660/3239]	Time 0.357 (0.525)	Data Time 0.001 (0.012)	Loss 3.7289 (3.5932)	Entropy 1.77231 (1.77782)	Top-1 acc 38.281 (39.204)	Top-5 acc 58.203 (63.198)	lr 0.02419
Train [14][2670/3239]	Time 0.265 (0.525)	Data Time 0.001 (0.012)	Loss 3.6175 (3.5934)	Entropy 1.77227 (1.77780)	Top-1 acc 42.578 (39.205)	Top-5 acc 62.891 (63.191)	lr 0.02419
Train [14][2680/3239]	Time 0.211 (0.525)	Data Time 0.001 (0.012)	Loss 3.7405 (3.5933)	Entropy 1.77225 (1.77778)	Top-1 acc 38.672 (39.208)	Top-5 acc 59.375 (63.193)	lr 0.02419
Train [14][2690/3239]	Time 0.204 (0.524)	Data Time 0.001 (0.012)	Loss 3.3998 (3.5931)	Entropy 1.77219 (1.77776)	Top-1 acc 42.578 (39.216)	Top-5 acc 69.141 (63.202)	lr 0.02419
Train [14][2700/3239]	Time 0.231 (0.524)	Data Time 0.002 (0.012)	Loss 3.6830 (3.5930)	Entropy 1.77209 (1.77774)	Top-1 acc 44.141 (39.219)	Top-5 acc 61.719 (63.205)	lr 0.02419
Train [14][2710/3239]	Time 0.223 (0.523)	Data Time 0.001 (0.012)	Loss 3.6754 (3.5932)	Entropy 1.77202 (1.77771)	Top-1 acc 39.844 (39.215)	Top-5 acc 62.109 (63.199)	lr 0.02419
Train [14][2720/3239]	Time 0.248 (0.523)	Data Time 0.001 (0.012)	Loss 3.5061 (3.5929)	Entropy 1.77198 (1.77769)	Top-1 acc 38.672 (39.219)	Top-5 acc 64.062 (63.202)	lr 0.02419
Train [14][2730/3239]	Time 0.213 (0.522)	Data Time 0.001 (0.012)	Loss 3.7082 (3.5933)	Entropy 1.77193 (1.77767)	Top-1 acc 33.984 (39.210)	Top-5 acc 62.891 (63.196)	lr 0.02419
Train [14][2740/3239]	Time 0.323 (0.522)	Data Time 0.001 (0.012)	Loss 3.8377 (3.5934)	Entropy 1.77180 (1.77765)	Top-1 acc 36.719 (39.210)	Top-5 acc 55.859 (63.192)	lr 0.02419
Train [14][2750/3239]	Time 0.231 (0.522)	Data Time 0.024 (0.012)	Loss 3.5821 (3.5933)	Entropy 1.77178 (1.77763)	Top-1 acc 39.453 (39.211)	Top-5 acc 62.891 (63.194)	lr 0.02419
Train [14][2760/3239]	Time 0.212 (0.521)	Data Time 0.001 (0.012)	Loss 3.4299 (3.5930)	Entropy 1.77179 (1.77761)	Top-1 acc 43.750 (39.220)	Top-5 acc 64.844 (63.197)	lr 0.02419
Train [14][2770/3239]	Time 0.220 (0.521)	Data Time 0.001 (0.012)	Loss 3.5517 (3.5930)	Entropy 1.77182 (1.77759)	Top-1 acc 41.016 (39.219)	Top-5 acc 66.406 (63.199)	lr 0.02419
Train [14][2780/3239]	Time 0.209 (0.521)	Data Time 0.001 (0.012)	Loss 3.6583 (3.5930)	Entropy 1.77165 (1.77757)	Top-1 acc 39.062 (39.224)	Top-5 acc 65.234 (63.200)	lr 0.02419
Train [14][2790/3239]	Time 0.145 (0.520)	Data Time 0.001 (0.012)	Loss 3.8472 (3.5933)	Entropy 1.77159 (1.77755)	Top-1 acc 32.031 (39.217)	Top-5 acc 56.250 (63.193)	lr 0.02419
Train [14][2800/3239]	Time 0.212 (0.520)	Data Time 0.001 (0.012)	Loss 3.5641 (3.5932)	Entropy 1.77154 (1.77752)	Top-1 acc 39.453 (39.217)	Top-5 acc 63.281 (63.191)	lr 0.02419
Train [14][2810/3239]	Time 0.361 (0.520)	Data Time 0.001 (0.012)	Loss 3.5440 (3.5932)	Entropy 1.77145 (1.77750)	Top-1 acc 41.016 (39.219)	Top-5 acc 61.328 (63.191)	lr 0.02419
Train [14][2820/3239]	Time 0.186 (0.519)	Data Time 0.002 (0.012)	Loss 3.8457 (3.5935)	Entropy 1.77128 (1.77748)	Top-1 acc 33.203 (39.209)	Top-5 acc 57.422 (63.185)	lr 0.02418
Train [14][2830/3239]	Time 0.186 (0.519)	Data Time 0.001 (0.012)	Loss 3.5968 (3.5935)	Entropy 1.77125 (1.77746)	Top-1 acc 40.234 (39.209)	Top-5 acc 63.281 (63.187)	lr 0.02418
Train [14][2840/3239]	Time 0.261 (0.519)	Data Time 0.001 (0.012)	Loss 3.6433 (3.5935)	Entropy 1.77121 (1.77744)	Top-1 acc 34.375 (39.209)	Top-5 acc 63.672 (63.188)	lr 0.02418
Train [14][2850/3239]	Time 0.214 (0.518)	Data Time 0.001 (0.011)	Loss 3.4085 (3.5936)	Entropy 1.77121 (1.77742)	Top-1 acc 41.406 (39.208)	Top-5 acc 67.969 (63.188)	lr 0.02418
Train [14][2860/3239]	Time 0.314 (0.531)	Data Time 0.004 (0.011)	Loss 3.5603 (3.5935)	Entropy 1.77109 (1.77739)	Top-1 acc 40.234 (39.209)	Top-5 acc 64.453 (63.189)	lr 0.02418
Train [14][2870/3239]	Time 0.244 (0.531)	Data Time 0.002 (0.011)	Loss 3.5950 (3.5935)	Entropy 1.77108 (1.77737)	Top-1 acc 37.500 (39.208)	Top-5 acc 64.453 (63.188)	lr 0.02418
Train [14][2880/3239]	Time 0.332 (0.531)	Data Time 0.002 (0.011)	Loss 3.5583 (3.5935)	Entropy 1.77100 (1.77735)	Top-1 acc 41.016 (39.213)	Top-5 acc 63.672 (63.185)	lr 0.02418
Train [14][2890/3239]	Time 0.206 (0.530)	Data Time 0.001 (0.011)	Loss 3.5019 (3.5934)	Entropy 1.77096 (1.77733)	Top-1 acc 38.672 (39.215)	Top-5 acc 64.844 (63.187)	lr 0.02418
Train [14][2900/3239]	Time 0.194 (0.530)	Data Time 0.001 (0.011)	Loss 3.5594 (3.5933)	Entropy 1.77089 (1.77731)	Top-1 acc 39.453 (39.223)	Top-5 acc 63.281 (63.189)	lr 0.02418
Train [14][2910/3239]	Time 0.197 (0.530)	Data Time 0.001 (0.011)	Loss 3.6707 (3.5931)	Entropy 1.77087 (1.77728)	Top-1 acc 36.328 (39.227)	Top-5 acc 60.938 (63.190)	lr 0.02418
Train [14][2920/3239]	Time 0.243 (0.530)	Data Time 0.002 (0.011)	Loss 3.4659 (3.5930)	Entropy 1.77084 (1.77726)	Top-1 acc 40.625 (39.231)	Top-5 acc 64.062 (63.192)	lr 0.02418
Train [14][2930/3239]	Time 0.264 (0.529)	Data Time 0.001 (0.011)	Loss 3.5299 (3.5929)	Entropy 1.77082 (1.77724)	Top-1 acc 41.406 (39.235)	Top-5 acc 64.453 (63.192)	lr 0.02418
Train [14][2940/3239]	Time 0.212 (0.529)	Data Time 0.001 (0.011)	Loss 3.5755 (3.5931)	Entropy 1.77075 (1.77722)	Top-1 acc 38.672 (39.229)	Top-5 acc 66.016 (63.192)	lr 0.02418
Train [14][2950/3239]	Time 0.148 (0.528)	Data Time 0.002 (0.011)	Loss 3.8187 (3.5932)	Entropy 1.77073 (1.77720)	Top-1 acc 37.109 (39.230)	Top-5 acc 59.375 (63.193)	lr 0.02418
Train [14][2960/3239]	Time 0.214 (0.528)	Data Time 0.001 (0.011)	Loss 3.6631 (3.5932)	Entropy 1.77065 (1.77718)	Top-1 acc 38.672 (39.231)	Top-5 acc 60.156 (63.192)	lr 0.02418
Train [14][2970/3239]	Time 0.271 (0.528)	Data Time 0.001 (0.011)	Loss 3.5901 (3.5933)	Entropy 1.77061 (1.77715)	Top-1 acc 39.453 (39.233)	Top-5 acc 62.109 (63.188)	lr 0.02418
Train [14][2980/3239]	Time 0.220 (0.527)	Data Time 0.001 (0.011)	Loss 3.4218 (3.5933)	Entropy 1.77047 (1.77713)	Top-1 acc 42.188 (39.230)	Top-5 acc 66.406 (63.189)	lr 0.02418
Train [14][2990/3239]	Time 0.230 (0.527)	Data Time 0.001 (0.011)	Loss 3.6095 (3.5934)	Entropy 1.77045 (1.77711)	Top-1 acc 41.016 (39.230)	Top-5 acc 66.406 (63.188)	lr 0.02418
Train [14][3000/3239]	Time 0.200 (0.527)	Data Time 0.001 (0.011)	Loss 3.3937 (3.5933)	Entropy 1.77038 (1.77709)	Top-1 acc 43.359 (39.228)	Top-5 acc 67.969 (63.189)	lr 0.02418
Train [14][3010/3239]	Time 0.207 (0.526)	Data Time 0.001 (0.011)	Loss 3.5646 (3.5931)	Entropy 1.77032 (1.77706)	Top-1 acc 39.062 (39.229)	Top-5 acc 60.938 (63.192)	lr 0.02418
Train [14][3020/3239]	Time 0.342 (0.526)	Data Time 0.001 (0.011)	Loss 3.5054 (3.5930)	Entropy 1.77017 (1.77704)	Top-1 acc 38.672 (39.229)	Top-5 acc 67.188 (63.197)	lr 0.02418
Train [14][3030/3239]	Time 0.227 (0.526)	Data Time 0.001 (0.011)	Loss 3.5061 (3.5929)	Entropy 1.77017 (1.77702)	Top-1 acc 42.969 (39.229)	Top-5 acc 66.406 (63.199)	lr 0.02418
Train [14][3040/3239]	Time 0.230 (0.525)	Data Time 0.001 (0.011)	Loss 3.5316 (3.5928)	Entropy 1.77011 (1.77700)	Top-1 acc 42.188 (39.231)	Top-5 acc 64.453 (63.201)	lr 0.02418
Train [14][3050/3239]	Time 0.206 (0.525)	Data Time 0.001 (0.011)	Loss 3.6345 (3.5930)	Entropy 1.77006 (1.77697)	Top-1 acc 37.500 (39.228)	Top-5 acc 61.719 (63.195)	lr 0.02418
Train [14][3060/3239]	Time 0.244 (0.525)	Data Time 0.001 (0.011)	Loss 3.5311 (3.5928)	Entropy 1.77003 (1.77695)	Top-1 acc 39.453 (39.229)	Top-5 acc 65.625 (63.195)	lr 0.02418
Train [14][3070/3239]	Time 0.224 (0.524)	Data Time 0.001 (0.011)	Loss 3.6593 (3.5928)	Entropy 1.77001 (1.77693)	Top-1 acc 37.500 (39.233)	Top-5 acc 61.328 (63.194)	lr 0.02418
Train [14][3080/3239]	Time 0.226 (0.524)	Data Time 0.002 (0.011)	Loss 3.6754 (3.5928)	Entropy 1.76998 (1.77691)	Top-1 acc 39.453 (39.230)	Top-5 acc 64.844 (63.195)	lr 0.02418
Train [14][3090/3239]	Time 0.146 (0.524)	Data Time 0.001 (0.011)	Loss 3.5144 (3.5929)	Entropy 1.76988 (1.77688)	Top-1 acc 42.188 (39.231)	Top-5 acc 66.406 (63.194)	lr 0.02418
Train [14][3100/3239]	Time 0.355 (0.523)	Data Time 0.001 (0.011)	Loss 3.4328 (3.5928)	Entropy 1.76985 (1.77686)	Top-1 acc 42.188 (39.232)	Top-5 acc 66.406 (63.191)	lr 0.02417
Train [14][3110/3239]	Time 0.220 (0.523)	Data Time 0.001 (0.011)	Loss 3.5945 (3.5928)	Entropy 1.76981 (1.77684)	Top-1 acc 38.672 (39.230)	Top-5 acc 60.156 (63.193)	lr 0.02417
Train [14][3120/3239]	Time 0.236 (0.523)	Data Time 0.001 (0.011)	Loss 3.6053 (3.5928)	Entropy 1.76971 (1.77682)	Top-1 acc 38.281 (39.228)	Top-5 acc 62.500 (63.193)	lr 0.02417
Train [14][3130/3239]	Time 0.211 (0.522)	Data Time 0.001 (0.011)	Loss 3.6511 (3.5928)	Entropy 1.76966 (1.77679)	Top-1 acc 38.672 (39.226)	Top-5 acc 66.406 (63.196)	lr 0.02417
Train [14][3140/3239]	Time 0.219 (0.522)	Data Time 0.001 (0.011)	Loss 3.5889 (3.5926)	Entropy 1.76966 (1.77677)	Top-1 acc 39.844 (39.230)	Top-5 acc 59.766 (63.197)	lr 0.02417
Train [14][3150/3239]	Time 0.261 (0.522)	Data Time 0.001 (0.011)	Loss 3.6807 (3.5927)	Entropy 1.76965 (1.77675)	Top-1 acc 38.672 (39.231)	Top-5 acc 59.375 (63.197)	lr 0.02417
Train [14][3160/3239]	Time 0.277 (0.522)	Data Time 0.001 (0.011)	Loss 3.7994 (3.5927)	Entropy 1.76962 (1.77673)	Top-1 acc 33.203 (39.232)	Top-5 acc 57.031 (63.196)	lr 0.02417
Train [14][3170/3239]	Time 0.225 (0.521)	Data Time 0.001 (0.011)	Loss 3.5560 (3.5930)	Entropy 1.76965 (1.77670)	Top-1 acc 42.188 (39.223)	Top-5 acc 65.625 (63.188)	lr 0.02417
Train [14][3180/3239]	Time 0.337 (0.521)	Data Time 0.000 (0.010)	Loss 3.5560 (3.5926)	Entropy 1.76960 (1.77668)	Top-1 acc 41.016 (39.230)	Top-5 acc 63.672 (63.194)	lr 0.02417
Train [14][3190/3239]	Time 0.252 (0.532)	Data Time 0.000 (0.010)	Loss 3.6913 (3.5928)	Entropy 1.76954 (1.77666)	Top-1 acc 33.203 (39.231)	Top-5 acc 58.594 (63.191)	lr 0.02417
Train [14][3200/3239]	Time 0.209 (0.532)	Data Time 0.000 (0.010)	Loss 3.6393 (3.5929)	Entropy 1.76955 (1.77664)	Top-1 acc 39.453 (39.229)	Top-5 acc 62.891 (63.189)	lr 0.02417
Train [14][3210/3239]	Time 0.190 (0.531)	Data Time 0.000 (0.010)	Loss 3.5134 (3.5931)	Entropy 1.76949 (1.77661)	Top-1 acc 42.188 (39.223)	Top-5 acc 65.234 (63.188)	lr 0.02417
Train [14][3220/3239]	Time 0.147 (0.531)	Data Time 0.000 (0.010)	Loss 3.5721 (3.5930)	Entropy 1.76948 (1.77659)	Top-1 acc 37.500 (39.229)	Top-5 acc 61.719 (63.189)	lr 0.02417
Train [14][3230/3239]	Time 0.209 (0.531)	Data Time 0.000 (0.010)	Loss 3.6474 (3.5931)	Entropy 1.76943 (1.77657)	Top-1 acc 38.672 (39.226)	Top-5 acc 59.766 (63.184)	lr 0.02417
Train [14][3239/3239]	Time 2.154 (0.530)	Data Time 0.000 (0.010)	Loss 3.9957 (3.5931)	Entropy 1.76943 (1.77655)	Top-1 acc 32.099 (39.228)	Top-5 acc 59.259 (63.186)	lr 0.02417
==========Valid [14/120]	loss 2.404	top-1 acc 48.000 (48.000)	top-5 acc 72.309	Train top-1 39.228	top-5 63.186	Entropy 1.76943	Latency-None: 0.000ms	Flops: 535.12M
Train [15][0/3239]	Time 25.350 (25.350)	Data Time 24.448 (24.448)	Loss 3.4472 (3.4472)	Entropy 1.76944 (1.76944)	Top-1 acc 46.875 (46.875)	Top-5 acc 64.844 (64.844)	lr 0.02417
Train [15][10/3239]	Time 2.382 (2.959)	Data Time 0.001 (2.391)	Loss 3.5685 (3.5268)	Entropy 1.76944 (1.76944)	Top-1 acc 37.891 (40.803)	Top-5 acc 64.844 (64.631)	lr 0.02417
Train [15][20/3239]	Time 0.335 (1.665)	Data Time 0.001 (1.253)	Loss 3.6497 (3.5480)	Entropy 1.76940 (1.76942)	Top-1 acc 38.281 (39.900)	Top-5 acc 60.547 (64.174)	lr 0.02417
Train [15][30/3239]	Time 0.207 (1.270)	Data Time 0.001 (0.849)	Loss 3.5491 (3.5498)	Entropy 1.76940 (1.76941)	Top-1 acc 37.891 (39.957)	Top-5 acc 64.453 (64.352)	lr 0.02417
Train [15][40/3239]	Time 0.210 (1.066)	Data Time 0.001 (0.643)	Loss 3.3597 (3.5513)	Entropy 1.76937 (1.76940)	Top-1 acc 44.141 (40.072)	Top-5 acc 69.531 (64.110)	lr 0.02417
Train [15][50/3239]	Time 0.255 (0.940)	Data Time 0.001 (0.517)	Loss 3.6561 (3.5474)	Entropy 1.76931 (1.76939)	Top-1 acc 41.016 (40.043)	Top-5 acc 63.672 (64.262)	lr 0.02417
Train [15][60/3239]	Time 0.184 (0.853)	Data Time 0.001 (0.432)	Loss 3.4384 (3.5443)	Entropy 1.76931 (1.76938)	Top-1 acc 42.578 (40.074)	Top-5 acc 67.969 (64.248)	lr 0.02417
Train [15][70/3239]	Time 0.193 (0.791)	Data Time 0.002 (0.372)	Loss 3.4542 (3.5466)	Entropy 1.76925 (1.76936)	Top-1 acc 41.406 (39.954)	Top-5 acc 67.578 (64.250)	lr 0.02417
Train [15][80/3239]	Time 0.208 (0.747)	Data Time 0.001 (0.326)	Loss 3.5632 (3.5493)	Entropy 1.76920 (1.76935)	Top-1 acc 39.844 (39.950)	Top-5 acc 63.281 (64.091)	lr 0.02417
Train [15][90/3239]	Time 0.190 (0.711)	Data Time 0.001 (0.290)	Loss 3.5073 (3.5500)	Entropy 1.76918 (1.76933)	Top-1 acc 46.094 (39.882)	Top-5 acc 67.188 (64.144)	lr 0.02417
Train [15][100/3239]	Time 0.296 (0.683)	Data Time 0.001 (0.262)	Loss 3.5133 (3.5551)	Entropy 1.76916 (1.76932)	Top-1 acc 39.062 (39.766)	Top-5 acc 66.016 (64.043)	lr 0.02417
Train [15][110/3239]	Time 0.242 (0.661)	Data Time 0.001 (0.238)	Loss 3.5959 (3.5564)	Entropy 1.76912 (1.76930)	Top-1 acc 39.062 (39.745)	Top-5 acc 61.328 (63.932)	lr 0.02417
Train [15][120/3239]	Time 2.404 (0.643)	Data Time 0.001 (0.219)	Loss 3.5245 (3.5544)	Entropy 1.76912 (1.76929)	Top-1 acc 39.453 (39.757)	Top-5 acc 65.625 (63.972)	lr 0.02417
Train [15][130/3239]	Time 0.222 (0.610)	Data Time 0.001 (0.202)	Loss 3.7135 (3.5589)	Entropy 1.76909 (1.76927)	Top-1 acc 42.969 (39.784)	Top-5 acc 61.328 (63.827)	lr 0.02417
Train [15][140/3239]	Time 0.199 (0.597)	Data Time 0.001 (0.188)	Loss 3.5320 (3.5576)	Entropy 1.76904 (1.76926)	Top-1 acc 41.406 (39.794)	Top-5 acc 65.234 (63.885)	lr 0.02416
Train [15][150/3239]	Time 0.221 (0.586)	Data Time 0.001 (0.176)	Loss 3.5877 (3.5571)	Entropy 1.76907 (1.76924)	Top-1 acc 36.719 (39.769)	Top-5 acc 63.672 (63.915)	lr 0.02416
Train [15][160/3239]	Time 0.240 (0.577)	Data Time 0.001 (0.165)	Loss 3.5350 (3.5560)	Entropy 1.76897 (1.76923)	Top-1 acc 43.750 (39.807)	Top-5 acc 66.406 (63.922)	lr 0.02416
Train [15][170/3239]	Time 0.337 (0.569)	Data Time 0.001 (0.155)	Loss 3.5526 (3.5534)	Entropy 1.76889 (1.76921)	Top-1 acc 44.141 (39.876)	Top-5 acc 66.797 (64.005)	lr 0.02416
Train [15][180/3239]	Time 0.227 (0.562)	Data Time 0.001 (0.147)	Loss 3.3831 (3.5516)	Entropy 1.76886 (1.76919)	Top-1 acc 41.406 (39.930)	Top-5 acc 67.969 (64.058)	lr 0.02416
Train [15][190/3239]	Time 0.219 (0.556)	Data Time 0.001 (0.139)	Loss 3.4658 (3.5491)	Entropy 1.76883 (1.76917)	Top-1 acc 41.016 (39.966)	Top-5 acc 61.719 (64.054)	lr 0.02416
Train [15][200/3239]	Time 0.194 (0.549)	Data Time 0.001 (0.132)	Loss 3.7531 (3.5521)	Entropy 1.76874 (1.76916)	Top-1 acc 37.109 (39.951)	Top-5 acc 60.547 (64.033)	lr 0.02416
Train [15][210/3239]	Time 0.211 (0.543)	Data Time 0.001 (0.126)	Loss 3.6039 (3.5550)	Entropy 1.76875 (1.76914)	Top-1 acc 40.625 (39.931)	Top-5 acc 62.109 (63.974)	lr 0.02416
Train [15][220/3239]	Time 0.233 (0.537)	Data Time 0.001 (0.120)	Loss 3.6517 (3.5570)	Entropy 1.76873 (1.76912)	Top-1 acc 40.625 (39.929)	Top-5 acc 63.672 (63.907)	lr 0.02416
Train [15][230/3239]	Time 2.337 (0.533)	Data Time 0.001 (0.115)	Loss 3.6278 (3.5579)	Entropy 1.76873 (1.76910)	Top-1 acc 37.500 (39.862)	Top-5 acc 61.719 (63.907)	lr 0.02416
Train [15][240/3239]	Time 0.284 (0.520)	Data Time 0.001 (0.111)	Loss 3.6274 (3.5606)	Entropy 1.76869 (1.76908)	Top-1 acc 33.984 (39.828)	Top-5 acc 64.453 (63.886)	lr 0.02416
Train [15][250/3239]	Time 0.191 (0.516)	Data Time 0.001 (0.106)	Loss 3.5838 (3.5591)	Entropy 1.76864 (1.76907)	Top-1 acc 44.141 (39.875)	Top-5 acc 63.672 (63.949)	lr 0.02416
Train [15][260/3239]	Time 0.140 (0.512)	Data Time 0.001 (0.102)	Loss 3.8849 (3.5593)	Entropy 1.76860 (1.76905)	Top-1 acc 33.594 (39.877)	Top-5 acc 53.906 (63.919)	lr 0.02416
Train [15][270/3239]	Time 0.210 (0.510)	Data Time 0.001 (0.099)	Loss 3.5047 (3.5568)	Entropy 1.76848 (1.76903)	Top-1 acc 42.969 (39.956)	Top-5 acc 63.672 (63.969)	lr 0.02416
Train [15][280/3239]	Time 0.215 (0.507)	Data Time 0.001 (0.095)	Loss 3.4641 (3.5566)	Entropy 1.76847 (1.76901)	Top-1 acc 42.969 (39.984)	Top-5 acc 68.359 (63.987)	lr 0.02416
Train [15][290/3239]	Time 0.209 (0.504)	Data Time 0.001 (0.092)	Loss 3.3770 (3.5550)	Entropy 1.76847 (1.76899)	Top-1 acc 46.094 (40.038)	Top-5 acc 64.844 (64.021)	lr 0.02416
Train [15][300/3239]	Time 0.197 (0.502)	Data Time 0.001 (0.089)	Loss 3.5830 (3.5554)	Entropy 1.76830 (1.76897)	Top-1 acc 38.672 (40.073)	Top-5 acc 66.797 (64.011)	lr 0.02416
Train [15][310/3239]	Time 0.219 (0.635)	Data Time 0.002 (0.086)	Loss 3.4421 (3.5558)	Entropy 1.76825 (1.76895)	Top-1 acc 41.797 (40.061)	Top-5 acc 69.141 (64.007)	lr 0.02416
Train [15][320/3239]	Time 0.159 (0.629)	Data Time 0.002 (0.084)	Loss 3.5221 (3.5557)	Entropy 1.76821 (1.76893)	Top-1 acc 40.625 (40.054)	Top-5 acc 67.578 (63.999)	lr 0.02416
Train [15][330/3239]	Time 0.203 (0.623)	Data Time 0.002 (0.081)	Loss 3.5436 (3.5566)	Entropy 1.76815 (1.76891)	Top-1 acc 39.844 (40.060)	Top-5 acc 65.625 (64.013)	lr 0.02416
Train [15][340/3239]	Time 2.197 (0.616)	Data Time 0.001 (0.079)	Loss 3.7066 (3.5562)	Entropy 1.76815 (1.76888)	Top-1 acc 32.031 (40.053)	Top-5 acc 59.766 (64.016)	lr 0.02416
Train [15][350/3239]	Time 0.145 (0.605)	Data Time 0.001 (0.077)	Loss 3.3045 (3.5569)	Entropy 1.76807 (1.76886)	Top-1 acc 47.266 (40.065)	Top-5 acc 70.703 (63.998)	lr 0.02416
Train [15][360/3239]	Time 0.165 (0.599)	Data Time 0.002 (0.075)	Loss 3.6344 (3.5583)	Entropy 1.76804 (1.76884)	Top-1 acc 37.109 (40.009)	Top-5 acc 60.938 (63.975)	lr 0.02416
Train [15][370/3239]	Time 0.218 (0.595)	Data Time 0.001 (0.073)	Loss 3.4540 (3.5574)	Entropy 1.76801 (1.76882)	Top-1 acc 41.016 (40.026)	Top-5 acc 65.625 (63.996)	lr 0.02416
Train [15][380/3239]	Time 0.230 (0.591)	Data Time 0.001 (0.071)	Loss 3.4890 (3.5570)	Entropy 1.76796 (1.76879)	Top-1 acc 37.891 (40.028)	Top-5 acc 64.844 (63.989)	lr 0.02416
Train [15][390/3239]	Time 0.324 (0.587)	Data Time 0.001 (0.069)	Loss 3.8048 (3.5582)	Entropy 1.76789 (1.76877)	Top-1 acc 34.766 (40.006)	Top-5 acc 55.078 (63.947)	lr 0.02416
Train [15][400/3239]	Time 0.186 (0.583)	Data Time 0.001 (0.068)	Loss 3.5317 (3.5588)	Entropy 1.76780 (1.76875)	Top-1 acc 39.453 (39.993)	Top-5 acc 65.625 (63.912)	lr 0.02416
Train [15][410/3239]	Time 0.162 (0.579)	Data Time 0.001 (0.066)	Loss 3.5136 (3.5596)	Entropy 1.76777 (1.76872)	Top-1 acc 39.844 (39.977)	Top-5 acc 63.672 (63.881)	lr 0.02415
Train [15][420/3239]	Time 0.150 (0.575)	Data Time 0.001 (0.065)	Loss 3.3999 (3.5562)	Entropy 1.76772 (1.76870)	Top-1 acc 44.141 (40.028)	Top-5 acc 66.797 (63.952)	lr 0.02415
Train [15][430/3239]	Time 0.204 (0.572)	Data Time 0.002 (0.063)	Loss 3.8249 (3.5587)	Entropy 1.76771 (1.76868)	Top-1 acc 33.594 (39.995)	Top-5 acc 58.984 (63.903)	lr 0.02415
Train [15][440/3239]	Time 0.208 (0.570)	Data Time 0.001 (0.062)	Loss 3.6384 (3.5586)	Entropy 1.76771 (1.76866)	Top-1 acc 41.016 (40.012)	Top-5 acc 62.891 (63.908)	lr 0.02415
Train [15][450/3239]	Time 2.274 (0.566)	Data Time 0.001 (0.061)	Loss 3.6678 (3.5592)	Entropy 1.76771 (1.76864)	Top-1 acc 35.547 (39.978)	Top-5 acc 61.719 (63.896)	lr 0.02415
Train [15][460/3239]	Time 0.194 (0.559)	Data Time 0.001 (0.059)	Loss 3.4761 (3.5587)	Entropy 1.76768 (1.76861)	Top-1 acc 37.891 (39.986)	Top-5 acc 66.797 (63.900)	lr 0.02415
Train [15][470/3239]	Time 0.344 (0.556)	Data Time 0.001 (0.058)	Loss 3.6698 (3.5577)	Entropy 1.76768 (1.76860)	Top-1 acc 36.719 (40.030)	Top-5 acc 64.453 (63.908)	lr 0.02415
Train [15][480/3239]	Time 0.173 (0.554)	Data Time 0.001 (0.057)	Loss 3.5527 (3.5583)	Entropy 1.76756 (1.76857)	Top-1 acc 39.062 (40.010)	Top-5 acc 62.500 (63.890)	lr 0.02415
Train [15][490/3239]	Time 0.212 (0.552)	Data Time 0.001 (0.056)	Loss 3.5752 (3.5586)	Entropy 1.76755 (1.76855)	Top-1 acc 38.281 (40.005)	Top-5 acc 63.672 (63.889)	lr 0.02415
Train [15][500/3239]	Time 0.207 (0.549)	Data Time 0.002 (0.055)	Loss 3.6790 (3.5599)	Entropy 1.76753 (1.76853)	Top-1 acc 33.594 (39.974)	Top-5 acc 62.109 (63.863)	lr 0.02415
Train [15][510/3239]	Time 0.218 (0.547)	Data Time 0.001 (0.054)	Loss 3.6287 (3.5594)	Entropy 1.76747 (1.76851)	Top-1 acc 41.797 (39.991)	Top-5 acc 63.672 (63.878)	lr 0.02415
Train [15][520/3239]	Time 0.222 (0.545)	Data Time 0.001 (0.053)	Loss 3.7053 (3.5592)	Entropy 1.76740 (1.76849)	Top-1 acc 39.453 (40.009)	Top-5 acc 60.156 (63.878)	lr 0.02415
Train [15][530/3239]	Time 0.220 (0.543)	Data Time 0.002 (0.052)	Loss 3.3775 (3.5592)	Entropy 1.76741 (1.76847)	Top-1 acc 40.234 (40.000)	Top-5 acc 66.797 (63.868)	lr 0.02415
Train [15][540/3239]	Time 0.318 (0.541)	Data Time 0.001 (0.051)	Loss 3.5794 (3.5606)	Entropy 1.76739 (1.76845)	Top-1 acc 41.016 (39.984)	Top-5 acc 62.109 (63.820)	lr 0.02415
Train [15][550/3239]	Time 0.246 (0.539)	Data Time 0.001 (0.050)	Loss 3.6228 (3.5609)	Entropy 1.76735 (1.76843)	Top-1 acc 42.969 (39.984)	Top-5 acc 64.844 (63.820)	lr 0.02415
Train [15][560/3239]	Time 2.312 (0.537)	Data Time 0.001 (0.049)	Loss 3.8196 (3.5619)	Entropy 1.76735 (1.76841)	Top-1 acc 36.328 (39.941)	Top-5 acc 58.984 (63.802)	lr 0.02415
Train [15][570/3239]	Time 0.217 (0.531)	Data Time 0.001 (0.048)	Loss 3.7120 (3.5623)	Entropy 1.76729 (1.76839)	Top-1 acc 36.328 (39.946)	Top-5 acc 61.719 (63.797)	lr 0.02415
Train [15][580/3239]	Time 0.230 (0.530)	Data Time 0.002 (0.047)	Loss 3.5922 (3.5627)	Entropy 1.76728 (1.76837)	Top-1 acc 41.406 (39.949)	Top-5 acc 62.109 (63.777)	lr 0.02415
Train [15][590/3239]	Time 0.238 (0.528)	Data Time 0.001 (0.047)	Loss 3.6584 (3.5636)	Entropy 1.76728 (1.76836)	Top-1 acc 36.328 (39.924)	Top-5 acc 62.891 (63.760)	lr 0.02415
Train [15][600/3239]	Time 0.225 (0.527)	Data Time 0.001 (0.046)	Loss 3.5578 (3.5634)	Entropy 1.76723 (1.76834)	Top-1 acc 37.109 (39.921)	Top-5 acc 66.016 (63.767)	lr 0.02415
Train [15][610/3239]	Time 0.306 (0.525)	Data Time 0.002 (0.045)	Loss 3.4835 (3.5631)	Entropy 1.76719 (1.76832)	Top-1 acc 39.062 (39.912)	Top-5 acc 66.797 (63.776)	lr 0.02415
Train [15][620/3239]	Time 0.170 (0.524)	Data Time 0.001 (0.044)	Loss 3.5448 (3.5630)	Entropy 1.76716 (1.76830)	Top-1 acc 40.234 (39.925)	Top-5 acc 65.625 (63.766)	lr 0.02415
Train [15][630/3239]	Time 0.231 (0.522)	Data Time 0.002 (0.044)	Loss 3.6575 (3.5630)	Entropy 1.76711 (1.76828)	Top-1 acc 37.891 (39.922)	Top-5 acc 62.500 (63.783)	lr 0.02415
Train [15][640/3239]	Time 0.232 (0.521)	Data Time 0.001 (0.043)	Loss 3.4681 (3.5630)	Entropy 1.76697 (1.76826)	Top-1 acc 43.359 (39.939)	Top-5 acc 64.062 (63.784)	lr 0.02415
Train [15][650/3239]	Time 0.259 (0.519)	Data Time 0.001 (0.043)	Loss 3.5898 (3.5633)	Entropy 1.76695 (1.76824)	Top-1 acc 38.672 (39.914)	Top-5 acc 59.766 (63.775)	lr 0.02415
Train [15][660/3239]	Time 0.171 (0.518)	Data Time 0.001 (0.042)	Loss 3.5346 (3.5633)	Entropy 1.76693 (1.76822)	Top-1 acc 38.281 (39.893)	Top-5 acc 61.328 (63.768)	lr 0.02415
Train [15][670/3239]	Time 41.583 (0.575)	Data Time 0.001 (0.041)	Loss 3.6006 (3.5646)	Entropy 1.76693 (1.76820)	Top-1 acc 39.062 (39.865)	Top-5 acc 63.672 (63.738)	lr 0.02415
Train [15][680/3239]	Time 0.309 (0.571)	Data Time 0.002 (0.041)	Loss 3.4864 (3.5649)	Entropy 1.76686 (1.76818)	Top-1 acc 42.969 (39.859)	Top-5 acc 66.016 (63.744)	lr 0.02414
Train [15][690/3239]	Time 0.154 (0.569)	Data Time 0.001 (0.040)	Loss 3.6461 (3.5650)	Entropy 1.76685 (1.76817)	Top-1 acc 35.547 (39.854)	Top-5 acc 61.328 (63.735)	lr 0.02414
Train [15][700/3239]	Time 0.211 (0.567)	Data Time 0.002 (0.040)	Loss 3.5466 (3.5652)	Entropy 1.76676 (1.76815)	Top-1 acc 37.891 (39.844)	Top-5 acc 66.016 (63.732)	lr 0.02414
Train [15][710/3239]	Time 0.184 (0.565)	Data Time 0.002 (0.039)	Loss 3.6442 (3.5652)	Entropy 1.76672 (1.76813)	Top-1 acc 35.938 (39.844)	Top-5 acc 62.891 (63.749)	lr 0.02414
Train [15][720/3239]	Time 0.205 (0.563)	Data Time 0.001 (0.039)	Loss 3.7873 (3.5652)	Entropy 1.76669 (1.76811)	Top-1 acc 35.547 (39.829)	Top-5 acc 60.156 (63.746)	lr 0.02414
Train [15][730/3239]	Time 0.145 (0.561)	Data Time 0.001 (0.038)	Loss 3.5213 (3.5648)	Entropy 1.76670 (1.76809)	Top-1 acc 39.062 (39.823)	Top-5 acc 66.016 (63.756)	lr 0.02414
Train [15][740/3239]	Time 0.191 (0.559)	Data Time 0.001 (0.038)	Loss 3.7683 (3.5660)	Entropy 1.76662 (1.76807)	Top-1 acc 34.766 (39.797)	Top-5 acc 58.984 (63.729)	lr 0.02414
Train [15][750/3239]	Time 0.193 (0.557)	Data Time 0.001 (0.037)	Loss 3.6602 (3.5662)	Entropy 1.76661 (1.76805)	Top-1 acc 35.547 (39.791)	Top-5 acc 60.156 (63.719)	lr 0.02414
Train [15][760/3239]	Time 0.208 (0.556)	Data Time 0.002 (0.037)	Loss 3.5665 (3.5656)	Entropy 1.76655 (1.76803)	Top-1 acc 41.797 (39.806)	Top-5 acc 63.672 (63.739)	lr 0.02414
Train [15][770/3239]	Time 0.233 (0.554)	Data Time 0.001 (0.037)	Loss 3.5401 (3.5653)	Entropy 1.76652 (1.76801)	Top-1 acc 42.188 (39.816)	Top-5 acc 63.281 (63.737)	lr 0.02414
Train [15][780/3239]	Time 2.155 (0.552)	Data Time 0.001 (0.036)	Loss 3.4586 (3.5653)	Entropy 1.76652 (1.76799)	Top-1 acc 41.797 (39.822)	Top-5 acc 67.969 (63.735)	lr 0.02414
Train [15][790/3239]	Time 0.269 (0.548)	Data Time 0.001 (0.036)	Loss 3.4434 (3.5654)	Entropy 1.76643 (1.76797)	Top-1 acc 46.484 (39.810)	Top-5 acc 68.750 (63.727)	lr 0.02414
Train [15][800/3239]	Time 0.213 (0.547)	Data Time 0.001 (0.035)	Loss 3.4670 (3.5645)	Entropy 1.76640 (1.76795)	Top-1 acc 40.625 (39.833)	Top-5 acc 64.062 (63.738)	lr 0.02414
Train [15][810/3239]	Time 0.195 (0.545)	Data Time 0.001 (0.035)	Loss 3.5080 (3.5638)	Entropy 1.76639 (1.76793)	Top-1 acc 39.453 (39.863)	Top-5 acc 64.062 (63.765)	lr 0.02414
Train [15][820/3239]	Time 0.227 (0.544)	Data Time 0.002 (0.034)	Loss 3.5701 (3.5639)	Entropy 1.76642 (1.76791)	Top-1 acc 42.578 (39.855)	Top-5 acc 65.234 (63.774)	lr 0.02414
Train [15][830/3239]	Time 0.323 (0.543)	Data Time 0.001 (0.034)	Loss 3.4252 (3.5635)	Entropy 1.76641 (1.76790)	Top-1 acc 40.625 (39.874)	Top-5 acc 66.406 (63.784)	lr 0.02414
Train [15][840/3239]	Time 0.161 (0.541)	Data Time 0.001 (0.034)	Loss 3.9808 (3.5633)	Entropy 1.76635 (1.76788)	Top-1 acc 27.344 (39.876)	Top-5 acc 55.469 (63.788)	lr 0.02414
Train [15][850/3239]	Time 0.210 (0.540)	Data Time 0.001 (0.033)	Loss 3.3132 (3.5629)	Entropy 1.76625 (1.76786)	Top-1 acc 48.047 (39.889)	Top-5 acc 70.703 (63.794)	lr 0.02414
Train [15][860/3239]	Time 0.219 (0.539)	Data Time 0.002 (0.033)	Loss 3.4101 (3.5627)	Entropy 1.76614 (1.76784)	Top-1 acc 44.922 (39.881)	Top-5 acc 66.016 (63.793)	lr 0.02414
Train [15][870/3239]	Time 0.225 (0.537)	Data Time 0.001 (0.033)	Loss 3.6734 (3.5620)	Entropy 1.76608 (1.76782)	Top-1 acc 35.938 (39.897)	Top-5 acc 64.062 (63.823)	lr 0.02414
Train [15][880/3239]	Time 0.189 (0.536)	Data Time 0.001 (0.032)	Loss 3.6082 (3.5623)	Entropy 1.76610 (1.76780)	Top-1 acc 40.625 (39.890)	Top-5 acc 62.500 (63.818)	lr 0.02414
Train [15][890/3239]	Time 2.357 (0.535)	Data Time 0.001 (0.032)	Loss 3.2107 (3.5616)	Entropy 1.76610 (1.76778)	Top-1 acc 45.312 (39.899)	Top-5 acc 71.484 (63.833)	lr 0.02414
Train [15][900/3239]	Time 0.229 (0.531)	Data Time 0.001 (0.032)	Loss 3.5061 (3.5617)	Entropy 1.76602 (1.76776)	Top-1 acc 39.453 (39.898)	Top-5 acc 67.188 (63.842)	lr 0.02414
Train [15][910/3239]	Time 0.343 (0.530)	Data Time 0.001 (0.031)	Loss 3.5123 (3.5622)	Entropy 1.76599 (1.76774)	Top-1 acc 40.234 (39.881)	Top-5 acc 61.328 (63.826)	lr 0.02414
Train [15][920/3239]	Time 0.208 (0.529)	Data Time 0.001 (0.031)	Loss 3.4432 (3.5626)	Entropy 1.76589 (1.76772)	Top-1 acc 44.922 (39.874)	Top-5 acc 67.188 (63.819)	lr 0.02414
Train [15][930/3239]	Time 0.187 (0.528)	Data Time 0.001 (0.031)	Loss 3.6640 (3.5631)	Entropy 1.76585 (1.76770)	Top-1 acc 36.328 (39.853)	Top-5 acc 60.547 (63.807)	lr 0.02414
Train [15][940/3239]	Time 0.198 (0.527)	Data Time 0.001 (0.030)	Loss 3.4651 (3.5636)	Entropy 1.76583 (1.76768)	Top-1 acc 40.234 (39.855)	Top-5 acc 67.188 (63.800)	lr 0.02414
Train [15][950/3239]	Time 0.199 (0.526)	Data Time 0.001 (0.030)	Loss 3.5728 (3.5638)	Entropy 1.76580 (1.76766)	Top-1 acc 41.797 (39.858)	Top-5 acc 66.406 (63.798)	lr 0.02414
Train [15][960/3239]	Time 0.205 (0.525)	Data Time 0.001 (0.030)	Loss 3.3586 (3.5637)	Entropy 1.76578 (1.76764)	Top-1 acc 41.016 (39.852)	Top-5 acc 73.438 (63.798)	lr 0.02413
Train [15][970/3239]	Time 0.192 (0.524)	Data Time 0.001 (0.029)	Loss 3.5095 (3.5634)	Entropy 1.76580 (1.76762)	Top-1 acc 38.281 (39.857)	Top-5 acc 63.672 (63.800)	lr 0.02413
Train [15][980/3239]	Time 0.156 (0.523)	Data Time 0.001 (0.029)	Loss 3.4578 (3.5630)	Entropy 1.76577 (1.76761)	Top-1 acc 42.188 (39.862)	Top-5 acc 67.969 (63.803)	lr 0.02413
Train [15][990/3239]	Time 0.281 (0.522)	Data Time 0.001 (0.029)	Loss 3.4579 (3.5626)	Entropy 1.76576 (1.76759)	Top-1 acc 43.750 (39.869)	Top-5 acc 65.234 (63.806)	lr 0.02413
Train [15][1000/3239]	Time 2.322 (0.521)	Data Time 0.001 (0.029)	Loss 3.5096 (3.5617)	Entropy 1.76576 (1.76757)	Top-1 acc 41.016 (39.883)	Top-5 acc 65.234 (63.826)	lr 0.02413
Train [15][1010/3239]	Time 0.202 (0.518)	Data Time 0.001 (0.028)	Loss 3.8445 (3.5625)	Entropy 1.76570 (1.76755)	Top-1 acc 30.078 (39.873)	Top-5 acc 58.984 (63.809)	lr 0.02413
Train [15][1020/3239]	Time 0.220 (0.517)	Data Time 0.001 (0.028)	Loss 3.7908 (3.5629)	Entropy 1.76568 (1.76753)	Top-1 acc 36.328 (39.863)	Top-5 acc 57.812 (63.799)	lr 0.02413
Train [15][1030/3239]	Time 0.238 (0.516)	Data Time 0.001 (0.028)	Loss 3.4994 (3.5623)	Entropy 1.76566 (1.76751)	Top-1 acc 39.062 (39.874)	Top-5 acc 64.453 (63.810)	lr 0.02413
Train [15][1040/3239]	Time 0.252 (0.550)	Data Time 0.002 (0.028)	Loss 3.6474 (3.5626)	Entropy 1.76563 (1.76750)	Top-1 acc 39.062 (39.866)	Top-5 acc 61.719 (63.799)	lr 0.02413
Train [15][1050/3239]	Time 0.304 (0.550)	Data Time 0.002 (0.027)	Loss 3.3920 (3.5626)	Entropy 1.76555 (1.76748)	Top-1 acc 41.797 (39.866)	Top-5 acc 67.188 (63.797)	lr 0.02413
Train [15][1060/3239]	Time 0.153 (0.549)	Data Time 0.001 (0.027)	Loss 3.5238 (3.5622)	Entropy 1.76555 (1.76746)	Top-1 acc 39.844 (39.876)	Top-5 acc 65.625 (63.811)	lr 0.02413
Train [15][1070/3239]	Time 0.216 (0.548)	Data Time 0.001 (0.027)	Loss 3.4602 (3.5623)	Entropy 1.76549 (1.76744)	Top-1 acc 41.797 (39.863)	Top-5 acc 66.406 (63.810)	lr 0.02413
Train [15][1080/3239]	Time 0.188 (0.546)	Data Time 0.001 (0.027)	Loss 3.5027 (3.5619)	Entropy 1.76548 (1.76742)	Top-1 acc 42.578 (39.867)	Top-5 acc 64.062 (63.815)	lr 0.02413
Train [15][1090/3239]	Time 0.211 (0.545)	Data Time 0.002 (0.026)	Loss 3.6639 (3.5618)	Entropy 1.76545 (1.76741)	Top-1 acc 38.281 (39.868)	Top-5 acc 60.156 (63.820)	lr 0.02413
Train [15][1100/3239]	Time 0.222 (0.544)	Data Time 0.001 (0.026)	Loss 3.4208 (3.5610)	Entropy 1.76547 (1.76739)	Top-1 acc 46.875 (39.888)	Top-5 acc 64.844 (63.829)	lr 0.02413
Train [15][1110/3239]	Time 2.161 (0.543)	Data Time 0.002 (0.026)	Loss 3.7513 (3.5604)	Entropy 1.76547 (1.76737)	Top-1 acc 39.062 (39.913)	Top-5 acc 60.156 (63.845)	lr 0.02413
Train [15][1120/3239]	Time 0.191 (0.540)	Data Time 0.001 (0.026)	Loss 3.8158 (3.5607)	Entropy 1.76537 (1.76735)	Top-1 acc 32.422 (39.900)	Top-5 acc 59.766 (63.841)	lr 0.02413
Train [15][1130/3239]	Time 0.339 (0.539)	Data Time 0.001 (0.026)	Loss 3.5280 (3.5605)	Entropy 1.76531 (1.76734)	Top-1 acc 41.406 (39.906)	Top-5 acc 61.328 (63.843)	lr 0.02413
Train [15][1140/3239]	Time 0.174 (0.538)	Data Time 0.001 (0.025)	Loss 3.7455 (3.5615)	Entropy 1.76528 (1.76732)	Top-1 acc 36.719 (39.885)	Top-5 acc 60.938 (63.827)	lr 0.02413
Train [15][1150/3239]	Time 0.225 (0.537)	Data Time 0.001 (0.025)	Loss 3.6726 (3.5607)	Entropy 1.76524 (1.76730)	Top-1 acc 35.938 (39.891)	Top-5 acc 57.812 (63.841)	lr 0.02413
Train [15][1160/3239]	Time 0.201 (0.536)	Data Time 0.001 (0.025)	Loss 3.7463 (3.5610)	Entropy 1.76513 (1.76728)	Top-1 acc 35.938 (39.886)	Top-5 acc 61.719 (63.835)	lr 0.02413
Train [15][1170/3239]	Time 0.199 (0.535)	Data Time 0.001 (0.025)	Loss 3.6266 (3.5612)	Entropy 1.76510 (1.76726)	Top-1 acc 38.672 (39.879)	Top-5 acc 63.281 (63.827)	lr 0.02413
Train [15][1180/3239]	Time 0.229 (0.534)	Data Time 0.001 (0.025)	Loss 3.6495 (3.5608)	Entropy 1.76502 (1.76724)	Top-1 acc 36.719 (39.891)	Top-5 acc 61.328 (63.833)	lr 0.02413
Train [15][1190/3239]	Time 0.213 (0.533)	Data Time 0.001 (0.024)	Loss 3.5808 (3.5601)	Entropy 1.76497 (1.76723)	Top-1 acc 37.500 (39.895)	Top-5 acc 65.234 (63.844)	lr 0.02413
Train [15][1200/3239]	Time 0.194 (0.532)	Data Time 0.001 (0.024)	Loss 3.6581 (3.5603)	Entropy 1.76490 (1.76721)	Top-1 acc 33.984 (39.893)	Top-5 acc 66.797 (63.853)	lr 0.02413
Train [15][1210/3239]	Time 0.233 (0.531)	Data Time 0.001 (0.024)	Loss 3.5584 (3.5596)	Entropy 1.76486 (1.76719)	Top-1 acc 40.625 (39.909)	Top-5 acc 64.062 (63.865)	lr 0.02413
Train [15][1220/3239]	Time 2.254 (0.531)	Data Time 0.001 (0.024)	Loss 3.4449 (3.5596)	Entropy 1.76486 (1.76717)	Top-1 acc 44.531 (39.910)	Top-5 acc 68.750 (63.872)	lr 0.02413
Train [15][1230/3239]	Time 0.229 (0.528)	Data Time 0.001 (0.024)	Loss 3.4389 (3.5597)	Entropy 1.76483 (1.76715)	Top-1 acc 42.188 (39.917)	Top-5 acc 64.844 (63.864)	lr 0.02412
Train [15][1240/3239]	Time 0.195 (0.527)	Data Time 0.001 (0.024)	Loss 3.6182 (3.5596)	Entropy 1.76478 (1.76713)	Top-1 acc 39.062 (39.914)	Top-5 acc 60.547 (63.866)	lr 0.02412
Train [15][1250/3239]	Time 0.205 (0.526)	Data Time 0.001 (0.023)	Loss 3.7356 (3.5604)	Entropy 1.76470 (1.76711)	Top-1 acc 36.328 (39.897)	Top-5 acc 58.594 (63.852)	lr 0.02412
Train [15][1260/3239]	Time 0.217 (0.526)	Data Time 0.001 (0.023)	Loss 3.4750 (3.5605)	Entropy 1.76467 (1.76709)	Top-1 acc 42.969 (39.897)	Top-5 acc 66.797 (63.842)	lr 0.02412
Train [15][1270/3239]	Time 0.216 (0.525)	Data Time 0.001 (0.023)	Loss 3.5480 (3.5602)	Entropy 1.76461 (1.76707)	Top-1 acc 41.797 (39.908)	Top-5 acc 62.109 (63.842)	lr 0.02412
Train [15][1280/3239]	Time 0.198 (0.524)	Data Time 0.001 (0.023)	Loss 3.6279 (3.5607)	Entropy 1.76456 (1.76705)	Top-1 acc 35.156 (39.902)	Top-5 acc 60.156 (63.835)	lr 0.02412
Train [15][1290/3239]	Time 0.181 (0.523)	Data Time 0.001 (0.023)	Loss 3.4412 (3.5604)	Entropy 1.76447 (1.76703)	Top-1 acc 36.719 (39.910)	Top-5 acc 66.797 (63.853)	lr 0.02412
Train [15][1300/3239]	Time 0.305 (0.522)	Data Time 0.001 (0.023)	Loss 3.4634 (3.5604)	Entropy 1.76430 (1.76701)	Top-1 acc 41.797 (39.918)	Top-5 acc 67.578 (63.855)	lr 0.02412
Train [15][1310/3239]	Time 0.227 (0.522)	Data Time 0.001 (0.022)	Loss 3.6276 (3.5606)	Entropy 1.76423 (1.76699)	Top-1 acc 41.406 (39.918)	Top-5 acc 60.547 (63.851)	lr 0.02412
Train [15][1320/3239]	Time 0.183 (0.521)	Data Time 0.001 (0.022)	Loss 3.8086 (3.5609)	Entropy 1.76421 (1.76697)	Top-1 acc 35.156 (39.904)	Top-5 acc 59.766 (63.844)	lr 0.02412
Train [15][1330/3239]	Time 2.280 (0.520)	Data Time 0.001 (0.022)	Loss 3.5621 (3.5605)	Entropy 1.76421 (1.76695)	Top-1 acc 39.453 (39.911)	Top-5 acc 64.062 (63.856)	lr 0.02412
Train [15][1340/3239]	Time 0.145 (0.518)	Data Time 0.001 (0.022)	Loss 3.4796 (3.5608)	Entropy 1.76421 (1.76693)	Top-1 acc 42.969 (39.912)	Top-5 acc 67.578 (63.854)	lr 0.02412
Train [15][1350/3239]	Time 0.210 (0.517)	Data Time 0.001 (0.022)	Loss 3.9716 (3.5606)	Entropy 1.76421 (1.76691)	Top-1 acc 33.984 (39.916)	Top-5 acc 54.688 (63.858)	lr 0.02412
Train [15][1360/3239]	Time 0.209 (0.517)	Data Time 0.001 (0.022)	Loss 3.5118 (3.5614)	Entropy 1.76418 (1.76689)	Top-1 acc 39.453 (39.904)	Top-5 acc 63.281 (63.841)	lr 0.02412
Train [15][1370/3239]	Time 0.202 (0.516)	Data Time 0.001 (0.022)	Loss 3.3905 (3.5610)	Entropy 1.76409 (1.76687)	Top-1 acc 44.141 (39.908)	Top-5 acc 67.188 (63.849)	lr 0.02412
Train [15][1380/3239]	Time 0.339 (0.516)	Data Time 0.001 (0.021)	Loss 3.5407 (3.5616)	Entropy 1.76410 (1.76685)	Top-1 acc 41.016 (39.897)	Top-5 acc 63.672 (63.831)	lr 0.02412
Train [15][1390/3239]	Time 0.247 (0.515)	Data Time 0.001 (0.021)	Loss 3.4037 (3.5615)	Entropy 1.76408 (1.76683)	Top-1 acc 45.312 (39.900)	Top-5 acc 63.281 (63.838)	lr 0.02412
Train [15][1400/3239]	Time 0.210 (0.545)	Data Time 0.002 (0.021)	Loss 3.5147 (3.5610)	Entropy 1.76403 (1.76681)	Top-1 acc 42.188 (39.912)	Top-5 acc 66.016 (63.852)	lr 0.02412
Train [15][1410/3239]	Time 0.201 (0.544)	Data Time 0.002 (0.021)	Loss 3.6319 (3.5613)	Entropy 1.76404 (1.76679)	Top-1 acc 37.891 (39.902)	Top-5 acc 58.203 (63.844)	lr 0.02412
Train [15][1420/3239]	Time 0.135 (0.543)	Data Time 0.002 (0.021)	Loss 3.7684 (3.5621)	Entropy 1.76403 (1.76677)	Top-1 acc 38.281 (39.891)	Top-5 acc 60.156 (63.831)	lr 0.02412
Train [15][1430/3239]	Time 0.215 (0.542)	Data Time 0.001 (0.021)	Loss 3.3490 (3.5618)	Entropy 1.76399 (1.76675)	Top-1 acc 44.922 (39.903)	Top-5 acc 68.359 (63.838)	lr 0.02412
Train [15][1440/3239]	Time 2.435 (0.542)	Data Time 0.002 (0.021)	Loss 3.6122 (3.5621)	Entropy 1.76399 (1.76673)	Top-1 acc 37.109 (39.894)	Top-5 acc 61.328 (63.823)	lr 0.02412
Train [15][1450/3239]	Time 0.209 (0.539)	Data Time 0.001 (0.020)	Loss 3.3295 (3.5618)	Entropy 1.76391 (1.76671)	Top-1 acc 44.531 (39.906)	Top-5 acc 66.016 (63.831)	lr 0.02412
Train [15][1460/3239]	Time 0.207 (0.539)	Data Time 0.001 (0.020)	Loss 3.5363 (3.5613)	Entropy 1.76391 (1.76669)	Top-1 acc 40.234 (39.911)	Top-5 acc 64.844 (63.837)	lr 0.02412
Train [15][1470/3239]	Time 0.146 (0.538)	Data Time 0.001 (0.020)	Loss 3.6174 (3.5612)	Entropy 1.76389 (1.76667)	Top-1 acc 39.062 (39.916)	Top-5 acc 62.500 (63.835)	lr 0.02412
Train [15][1480/3239]	Time 0.246 (0.537)	Data Time 0.001 (0.020)	Loss 3.7705 (3.5614)	Entropy 1.76382 (1.76666)	Top-1 acc 37.109 (39.918)	Top-5 acc 56.250 (63.827)	lr 0.02412
Train [15][1490/3239]	Time 0.220 (0.536)	Data Time 0.001 (0.020)	Loss 3.5088 (3.5611)	Entropy 1.76375 (1.76664)	Top-1 acc 42.578 (39.924)	Top-5 acc 64.062 (63.825)	lr 0.02411
Train [15][1500/3239]	Time 0.219 (0.536)	Data Time 0.001 (0.020)	Loss 3.6154 (3.5612)	Entropy 1.76368 (1.76662)	Top-1 acc 38.672 (39.921)	Top-5 acc 61.719 (63.824)	lr 0.02411
Train [15][1510/3239]	Time 0.259 (0.535)	Data Time 0.002 (0.020)	Loss 3.5524 (3.5613)	Entropy 1.76357 (1.76660)	Top-1 acc 42.969 (39.917)	Top-5 acc 65.625 (63.826)	lr 0.02411
Train [15][1520/3239]	Time 0.345 (0.534)	Data Time 0.001 (0.020)	Loss 3.6268 (3.5613)	Entropy 1.76356 (1.76658)	Top-1 acc 39.453 (39.907)	Top-5 acc 62.109 (63.830)	lr 0.02411
Train [15][1530/3239]	Time 0.212 (0.534)	Data Time 0.002 (0.020)	Loss 3.6845 (3.5615)	Entropy 1.76360 (1.76656)	Top-1 acc 35.938 (39.902)	Top-5 acc 62.500 (63.822)	lr 0.02411
Train [15][1540/3239]	Time 0.257 (0.533)	Data Time 0.003 (0.019)	Loss 3.6246 (3.5615)	Entropy 1.76349 (1.76654)	Top-1 acc 44.141 (39.906)	Top-5 acc 66.016 (63.822)	lr 0.02411
Train [15][1550/3239]	Time 2.196 (0.532)	Data Time 0.001 (0.019)	Loss 3.5783 (3.5616)	Entropy 1.76349 (1.76652)	Top-1 acc 39.844 (39.898)	Top-5 acc 67.188 (63.821)	lr 0.02411
Train [15][1560/3239]	Time 0.215 (0.530)	Data Time 0.001 (0.019)	Loss 3.4378 (3.5619)	Entropy 1.76342 (1.76650)	Top-1 acc 38.281 (39.890)	Top-5 acc 66.797 (63.817)	lr 0.02411
Train [15][1570/3239]	Time 0.160 (0.530)	Data Time 0.001 (0.019)	Loss 3.6511 (3.5619)	Entropy 1.76336 (1.76648)	Top-1 acc 36.328 (39.889)	Top-5 acc 64.062 (63.817)	lr 0.02411
Train [15][1580/3239]	Time 0.305 (0.529)	Data Time 0.001 (0.019)	Loss 3.7369 (3.5621)	Entropy 1.76335 (1.76646)	Top-1 acc 35.156 (39.883)	Top-5 acc 59.766 (63.818)	lr 0.02411
Train [15][1590/3239]	Time 0.213 (0.528)	Data Time 0.001 (0.019)	Loss 3.5413 (3.5624)	Entropy 1.76331 (1.76644)	Top-1 acc 39.062 (39.878)	Top-5 acc 62.891 (63.812)	lr 0.02411
Train [15][1600/3239]	Time 0.204 (0.528)	Data Time 0.001 (0.019)	Loss 3.3459 (3.5622)	Entropy 1.76327 (1.76642)	Top-1 acc 44.141 (39.881)	Top-5 acc 67.578 (63.813)	lr 0.02411
Train [15][1610/3239]	Time 0.159 (0.527)	Data Time 0.001 (0.019)	Loss 3.3265 (3.5620)	Entropy 1.76311 (1.76640)	Top-1 acc 45.703 (39.892)	Top-5 acc 67.969 (63.821)	lr 0.02411
Train [15][1620/3239]	Time 0.212 (0.527)	Data Time 0.001 (0.019)	Loss 3.5382 (3.5618)	Entropy 1.76309 (1.76638)	Top-1 acc 36.328 (39.896)	Top-5 acc 65.625 (63.824)	lr 0.02411
Train [15][1630/3239]	Time 0.198 (0.526)	Data Time 0.001 (0.018)	Loss 3.7052 (3.5620)	Entropy 1.76305 (1.76636)	Top-1 acc 39.062 (39.903)	Top-5 acc 62.500 (63.817)	lr 0.02411
Train [15][1640/3239]	Time 0.198 (0.526)	Data Time 0.001 (0.018)	Loss 3.5581 (3.5619)	Entropy 1.76296 (1.76634)	Top-1 acc 42.188 (39.906)	Top-5 acc 65.234 (63.820)	lr 0.02411
Train [15][1650/3239]	Time 0.376 (0.525)	Data Time 0.001 (0.018)	Loss 3.4635 (3.5616)	Entropy 1.76291 (1.76632)	Top-1 acc 41.406 (39.910)	Top-5 acc 65.234 (63.828)	lr 0.02411
Train [15][1660/3239]	Time 2.402 (0.525)	Data Time 0.001 (0.018)	Loss 3.4321 (3.5615)	Entropy 1.76291 (1.76630)	Top-1 acc 42.578 (39.912)	Top-5 acc 67.188 (63.831)	lr 0.02411
Train [15][1670/3239]	Time 0.277 (0.523)	Data Time 0.003 (0.018)	Loss 3.5728 (3.5613)	Entropy 1.76288 (1.76628)	Top-1 acc 39.844 (39.917)	Top-5 acc 64.453 (63.833)	lr 0.02411
Train [15][1680/3239]	Time 0.193 (0.523)	Data Time 0.002 (0.018)	Loss 3.5816 (3.5610)	Entropy 1.76281 (1.76626)	Top-1 acc 39.062 (39.923)	Top-5 acc 64.453 (63.838)	lr 0.02411
Train [15][1690/3239]	Time 0.167 (0.522)	Data Time 0.001 (0.018)	Loss 3.4113 (3.5607)	Entropy 1.76275 (1.76624)	Top-1 acc 46.094 (39.927)	Top-5 acc 66.797 (63.848)	lr 0.02411
Train [15][1700/3239]	Time 0.205 (0.522)	Data Time 0.001 (0.018)	Loss 3.5671 (3.5605)	Entropy 1.76273 (1.76622)	Top-1 acc 40.234 (39.927)	Top-5 acc 62.891 (63.857)	lr 0.02411
Train [15][1710/3239]	Time 0.211 (0.521)	Data Time 0.001 (0.018)	Loss 3.4979 (3.5604)	Entropy 1.76272 (1.76620)	Top-1 acc 41.016 (39.930)	Top-5 acc 66.797 (63.864)	lr 0.02411
Train [15][1720/3239]	Time 0.210 (0.521)	Data Time 0.001 (0.018)	Loss 3.6218 (3.5604)	Entropy 1.76267 (1.76617)	Top-1 acc 41.406 (39.930)	Top-5 acc 62.500 (63.868)	lr 0.02411
Train [15][1730/3239]	Time 0.223 (0.520)	Data Time 0.001 (0.018)	Loss 3.7118 (3.5604)	Entropy 1.76266 (1.76615)	Top-1 acc 39.453 (39.930)	Top-5 acc 58.203 (63.866)	lr 0.02411
Train [15][1740/3239]	Time 0.205 (0.519)	Data Time 0.001 (0.017)	Loss 3.3199 (3.5601)	Entropy 1.76266 (1.76613)	Top-1 acc 46.484 (39.933)	Top-5 acc 67.969 (63.873)	lr 0.02411
Train [15][1750/3239]	Time 0.178 (0.519)	Data Time 0.001 (0.017)	Loss 3.6408 (3.5600)	Entropy 1.76269 (1.76611)	Top-1 acc 37.500 (39.937)	Top-5 acc 63.672 (63.878)	lr 0.02411
Train [15][1760/3239]	Time 0.267 (0.541)	Data Time 0.004 (0.017)	Loss 3.9654 (3.5606)	Entropy 1.76263 (1.76610)	Top-1 acc 32.422 (39.922)	Top-5 acc 54.297 (63.861)	lr 0.02410
Train [15][1770/3239]	Time 2.444 (0.541)	Data Time 0.002 (0.017)	Loss 3.4264 (3.5607)	Entropy 1.76263 (1.76608)	Top-1 acc 40.234 (39.917)	Top-5 acc 64.844 (63.854)	lr 0.02410
Train [15][1780/3239]	Time 0.245 (0.539)	Data Time 0.002 (0.017)	Loss 3.6188 (3.5606)	Entropy 1.76264 (1.76606)	Top-1 acc 40.625 (39.917)	Top-5 acc 61.719 (63.856)	lr 0.02410
Train [15][1790/3239]	Time 0.192 (0.538)	Data Time 0.002 (0.017)	Loss 3.7041 (3.5611)	Entropy 1.76261 (1.76604)	Top-1 acc 38.672 (39.907)	Top-5 acc 60.938 (63.847)	lr 0.02410
Train [15][1800/3239]	Time 0.284 (0.538)	Data Time 0.001 (0.017)	Loss 3.4857 (3.5612)	Entropy 1.76253 (1.76602)	Top-1 acc 42.969 (39.906)	Top-5 acc 61.719 (63.842)	lr 0.02410
Train [15][1810/3239]	Time 0.170 (0.537)	Data Time 0.001 (0.017)	Loss 3.4435 (3.5612)	Entropy 1.76246 (1.76600)	Top-1 acc 44.141 (39.903)	Top-5 acc 66.016 (63.846)	lr 0.02410
Train [15][1820/3239]	Time 0.230 (0.537)	Data Time 0.001 (0.017)	Loss 3.8154 (3.5611)	Entropy 1.76244 (1.76598)	Top-1 acc 35.938 (39.906)	Top-5 acc 57.812 (63.846)	lr 0.02410
Train [15][1830/3239]	Time 0.199 (0.536)	Data Time 0.001 (0.017)	Loss 3.6194 (3.5609)	Entropy 1.76241 (1.76596)	Top-1 acc 40.625 (39.915)	Top-5 acc 66.016 (63.853)	lr 0.02410
Train [15][1840/3239]	Time 0.174 (0.536)	Data Time 0.003 (0.017)	Loss 3.5628 (3.5609)	Entropy 1.76238 (1.76594)	Top-1 acc 41.406 (39.919)	Top-5 acc 61.719 (63.853)	lr 0.02410
Train [15][1850/3239]	Time 0.211 (0.535)	Data Time 0.001 (0.017)	Loss 3.5217 (3.5607)	Entropy 1.76226 (1.76592)	Top-1 acc 41.016 (39.922)	Top-5 acc 64.844 (63.858)	lr 0.02410
Train [15][1860/3239]	Time 0.215 (0.534)	Data Time 0.001 (0.017)	Loss 3.5402 (3.5605)	Entropy 1.76216 (1.76590)	Top-1 acc 37.109 (39.920)	Top-5 acc 65.234 (63.866)	lr 0.02410
Train [15][1870/3239]	Time 0.228 (0.534)	Data Time 0.001 (0.016)	Loss 3.5707 (3.5605)	Entropy 1.76203 (1.76588)	Top-1 acc 37.891 (39.918)	Top-5 acc 65.234 (63.865)	lr 0.02410
Train [15][1880/3239]	Time 2.270 (0.533)	Data Time 0.001 (0.016)	Loss 3.6431 (3.5603)	Entropy 1.76203 (1.76586)	Top-1 acc 40.625 (39.926)	Top-5 acc 62.500 (63.874)	lr 0.02410
Train [15][1890/3239]	Time 0.217 (0.532)	Data Time 0.001 (0.016)	Loss 3.4919 (3.5599)	Entropy 1.76199 (1.76584)	Top-1 acc 43.750 (39.938)	Top-5 acc 69.531 (63.885)	lr 0.02410
Train [15][1900/3239]	Time 0.219 (0.531)	Data Time 0.001 (0.016)	Loss 3.5359 (3.5597)	Entropy 1.76193 (1.76582)	Top-1 acc 39.844 (39.943)	Top-5 acc 64.844 (63.888)	lr 0.02410
Train [15][1910/3239]	Time 0.221 (0.531)	Data Time 0.001 (0.016)	Loss 3.5926 (3.5594)	Entropy 1.76191 (1.76580)	Top-1 acc 42.578 (39.954)	Top-5 acc 62.109 (63.898)	lr 0.02410
Train [15][1920/3239]	Time 0.164 (0.530)	Data Time 0.001 (0.016)	Loss 3.5860 (3.5596)	Entropy 1.76189 (1.76578)	Top-1 acc 39.062 (39.953)	Top-5 acc 63.281 (63.899)	lr 0.02410
Train [15][1930/3239]	Time 0.241 (0.529)	Data Time 0.001 (0.016)	Loss 3.6910 (3.5597)	Entropy 1.76183 (1.76576)	Top-1 acc 37.500 (39.948)	Top-5 acc 60.938 (63.899)	lr 0.02410
Train [15][1940/3239]	Time 0.197 (0.529)	Data Time 0.001 (0.016)	Loss 3.4315 (3.5596)	Entropy 1.76180 (1.76574)	Top-1 acc 43.750 (39.953)	Top-5 acc 65.625 (63.900)	lr 0.02410
Train [15][1950/3239]	Time 0.297 (0.528)	Data Time 0.001 (0.016)	Loss 3.5346 (3.5602)	Entropy 1.76178 (1.76572)	Top-1 acc 42.188 (39.944)	Top-5 acc 65.234 (63.884)	lr 0.02410
Train [15][1960/3239]	Time 0.235 (0.528)	Data Time 0.001 (0.016)	Loss 3.6004 (3.5604)	Entropy 1.76175 (1.76570)	Top-1 acc 37.891 (39.941)	Top-5 acc 64.453 (63.883)	lr 0.02410
Train [15][1970/3239]	Time 0.223 (0.527)	Data Time 0.001 (0.016)	Loss 3.4967 (3.5605)	Entropy 1.76172 (1.76568)	Top-1 acc 39.453 (39.940)	Top-5 acc 66.406 (63.883)	lr 0.02410
Train [15][1980/3239]	Time 0.159 (0.527)	Data Time 0.001 (0.016)	Loss 3.5823 (3.5603)	Entropy 1.76167 (1.76566)	Top-1 acc 41.016 (39.940)	Top-5 acc 63.281 (63.882)	lr 0.02410
Train [15][1990/3239]	Time 2.300 (0.527)	Data Time 0.001 (0.016)	Loss 3.5616 (3.5601)	Entropy 1.76167 (1.76564)	Top-1 acc 40.234 (39.945)	Top-5 acc 65.625 (63.891)	lr 0.02410
Train [15][2000/3239]	Time 0.170 (0.525)	Data Time 0.001 (0.016)	Loss 3.6969 (3.5600)	Entropy 1.76166 (1.76562)	Top-1 acc 39.453 (39.945)	Top-5 acc 59.766 (63.890)	lr 0.02410
Train [15][2010/3239]	Time 0.153 (0.524)	Data Time 0.001 (0.016)	Loss 3.5790 (3.5601)	Entropy 1.76156 (1.76560)	Top-1 acc 39.844 (39.939)	Top-5 acc 61.719 (63.890)	lr 0.02410
Train [15][2020/3239]	Time 0.157 (0.524)	Data Time 0.001 (0.015)	Loss 3.4447 (3.5601)	Entropy 1.76155 (1.76558)	Top-1 acc 39.844 (39.938)	Top-5 acc 65.625 (63.897)	lr 0.02410
Train [15][2030/3239]	Time 0.192 (0.523)	Data Time 0.002 (0.015)	Loss 3.5042 (3.5599)	Entropy 1.76152 (1.76556)	Top-1 acc 41.016 (39.946)	Top-5 acc 62.500 (63.897)	lr 0.02409
Train [15][2040/3239]	Time 0.315 (0.523)	Data Time 0.001 (0.015)	Loss 3.6222 (3.5598)	Entropy 1.76146 (1.76554)	Top-1 acc 41.016 (39.949)	Top-5 acc 64.453 (63.898)	lr 0.02409
Train [15][2050/3239]	Time 0.222 (0.523)	Data Time 0.002 (0.015)	Loss 3.4526 (3.5598)	Entropy 1.76145 (1.76552)	Top-1 acc 42.578 (39.946)	Top-5 acc 65.234 (63.901)	lr 0.02409
Train [15][2060/3239]	Time 0.199 (0.522)	Data Time 0.001 (0.015)	Loss 3.6556 (3.5598)	Entropy 1.76139 (1.76550)	Top-1 acc 37.109 (39.941)	Top-5 acc 58.594 (63.900)	lr 0.02409
Train [15][2070/3239]	Time 0.226 (0.522)	Data Time 0.001 (0.015)	Loss 3.5518 (3.5600)	Entropy 1.76130 (1.76548)	Top-1 acc 39.062 (39.941)	Top-5 acc 66.797 (63.898)	lr 0.02409
Train [15][2080/3239]	Time 0.192 (0.521)	Data Time 0.001 (0.015)	Loss 3.5537 (3.5600)	Entropy 1.76128 (1.76546)	Top-1 acc 37.891 (39.937)	Top-5 acc 64.453 (63.896)	lr 0.02409
Train [15][2090/3239]	Time 0.276 (0.521)	Data Time 0.001 (0.015)	Loss 3.4832 (3.5602)	Entropy 1.76126 (1.76544)	Top-1 acc 42.578 (39.931)	Top-5 acc 64.453 (63.890)	lr 0.02409
Train [15][2100/3239]	Time 2.227 (0.520)	Data Time 0.001 (0.015)	Loss 3.6907 (3.5603)	Entropy 1.76126 (1.76542)	Top-1 acc 40.625 (39.930)	Top-5 acc 58.984 (63.887)	lr 0.02409
Train [15][2110/3239]	Time 0.305 (0.519)	Data Time 0.001 (0.015)	Loss 3.5690 (3.5602)	Entropy 1.76119 (1.76540)	Top-1 acc 40.234 (39.936)	Top-5 acc 65.234 (63.893)	lr 0.02409
Train [15][2120/3239]	Time 0.166 (0.519)	Data Time 0.001 (0.015)	Loss 3.5117 (3.5598)	Entropy 1.76115 (1.76538)	Top-1 acc 39.844 (39.942)	Top-5 acc 64.453 (63.900)	lr 0.02409
Train [15][2130/3239]	Time 0.543 (0.535)	Data Time 0.006 (0.015)	Loss 3.6397 (3.5601)	Entropy 1.76113 (1.76536)	Top-1 acc 38.672 (39.938)	Top-5 acc 60.938 (63.894)	lr 0.02409
Train [15][2140/3239]	Time 0.166 (0.535)	Data Time 0.002 (0.015)	Loss 3.5584 (3.5603)	Entropy 1.76107 (1.76534)	Top-1 acc 38.672 (39.934)	Top-5 acc 62.891 (63.889)	lr 0.02409
Train [15][2150/3239]	Time 0.213 (0.535)	Data Time 0.001 (0.015)	Loss 3.6115 (3.5600)	Entropy 1.76105 (1.76532)	Top-1 acc 41.016 (39.939)	Top-5 acc 64.844 (63.895)	lr 0.02409
Train [15][2160/3239]	Time 0.226 (0.534)	Data Time 0.002 (0.015)	Loss 3.2884 (3.5599)	Entropy 1.76101 (1.76530)	Top-1 acc 47.266 (39.945)	Top-5 acc 69.922 (63.899)	lr 0.02409
Train [15][2170/3239]	Time 0.254 (0.534)	Data Time 0.001 (0.015)	Loss 3.4889 (3.5600)	Entropy 1.76102 (1.76528)	Top-1 acc 41.406 (39.942)	Top-5 acc 62.109 (63.893)	lr 0.02409
Train [15][2180/3239]	Time 0.326 (0.533)	Data Time 0.001 (0.015)	Loss 3.7218 (3.5598)	Entropy 1.76097 (1.76526)	Top-1 acc 33.594 (39.948)	Top-5 acc 60.156 (63.897)	lr 0.02409
Train [15][2190/3239]	Time 0.219 (0.533)	Data Time 0.001 (0.014)	Loss 3.7412 (3.5598)	Entropy 1.76088 (1.76524)	Top-1 acc 37.500 (39.946)	Top-5 acc 58.594 (63.896)	lr 0.02409
Train [15][2200/3239]	Time 0.181 (0.532)	Data Time 0.001 (0.014)	Loss 3.4863 (3.5599)	Entropy 1.76085 (1.76522)	Top-1 acc 40.625 (39.945)	Top-5 acc 66.406 (63.894)	lr 0.02409
Train [15][2210/3239]	Time 2.185 (0.532)	Data Time 0.001 (0.014)	Loss 3.4493 (3.5598)	Entropy 1.76085 (1.76520)	Top-1 acc 41.797 (39.942)	Top-5 acc 65.625 (63.895)	lr 0.02409
Train [15][2220/3239]	Time 0.228 (0.530)	Data Time 0.001 (0.014)	Loss 3.6998 (3.5596)	Entropy 1.76091 (1.76518)	Top-1 acc 37.891 (39.942)	Top-5 acc 62.109 (63.899)	lr 0.02409
Train [15][2230/3239]	Time 0.144 (0.530)	Data Time 0.001 (0.014)	Loss 3.4809 (3.5595)	Entropy 1.76086 (1.76516)	Top-1 acc 37.500 (39.938)	Top-5 acc 69.141 (63.902)	lr 0.02409
Train [15][2240/3239]	Time 0.198 (0.529)	Data Time 0.002 (0.014)	Loss 3.5248 (3.5595)	Entropy 1.76077 (1.76514)	Top-1 acc 45.703 (39.940)	Top-5 acc 65.625 (63.903)	lr 0.02409
Train [15][2250/3239]	Time 0.226 (0.529)	Data Time 0.001 (0.014)	Loss 3.4639 (3.5594)	Entropy 1.76076 (1.76512)	Top-1 acc 42.188 (39.945)	Top-5 acc 65.625 (63.906)	lr 0.02409
Train [15][2260/3239]	Time 0.217 (0.528)	Data Time 0.001 (0.014)	Loss 3.5260 (3.5593)	Entropy 1.76070 (1.76510)	Top-1 acc 40.625 (39.947)	Top-5 acc 65.625 (63.905)	lr 0.02409
Train [15][2270/3239]	Time 0.263 (0.528)	Data Time 0.001 (0.014)	Loss 3.4400 (3.5594)	Entropy 1.76070 (1.76508)	Top-1 acc 46.094 (39.945)	Top-5 acc 66.016 (63.900)	lr 0.02409
Train [15][2280/3239]	Time 0.251 (0.528)	Data Time 0.001 (0.014)	Loss 3.7929 (3.5595)	Entropy 1.76067 (1.76506)	Top-1 acc 30.078 (39.943)	Top-5 acc 59.375 (63.898)	lr 0.02409
Train [15][2290/3239]	Time 0.152 (0.527)	Data Time 0.001 (0.014)	Loss 3.4217 (3.5591)	Entropy 1.76062 (1.76504)	Top-1 acc 42.578 (39.951)	Top-5 acc 67.969 (63.909)	lr 0.02408
Train [15][2300/3239]	Time 0.232 (0.527)	Data Time 0.001 (0.014)	Loss 3.4530 (3.5589)	Entropy 1.76055 (1.76503)	Top-1 acc 42.188 (39.962)	Top-5 acc 67.188 (63.914)	lr 0.02408
Train [15][2310/3239]	Time 0.198 (0.526)	Data Time 0.001 (0.014)	Loss 3.4937 (3.5588)	Entropy 1.76054 (1.76501)	Top-1 acc 42.578 (39.965)	Top-5 acc 64.062 (63.918)	lr 0.02408
Train [15][2320/3239]	Time 2.222 (0.526)	Data Time 0.001 (0.014)	Loss 3.6594 (3.5591)	Entropy 1.76054 (1.76499)	Top-1 acc 38.672 (39.954)	Top-5 acc 60.938 (63.912)	lr 0.02408
Train [15][2330/3239]	Time 0.142 (0.524)	Data Time 0.001 (0.014)	Loss 3.7115 (3.5592)	Entropy 1.76051 (1.76497)	Top-1 acc 35.156 (39.952)	Top-5 acc 58.984 (63.906)	lr 0.02408
Train [15][2340/3239]	Time 0.310 (0.524)	Data Time 0.001 (0.014)	Loss 3.4482 (3.5591)	Entropy 1.76048 (1.76495)	Top-1 acc 42.578 (39.954)	Top-5 acc 67.578 (63.909)	lr 0.02408
Train [15][2350/3239]	Time 0.201 (0.524)	Data Time 0.001 (0.014)	Loss 3.4924 (3.5591)	Entropy 1.76047 (1.76493)	Top-1 acc 44.922 (39.954)	Top-5 acc 63.281 (63.908)	lr 0.02408
Train [15][2360/3239]	Time 0.173 (0.523)	Data Time 0.002 (0.014)	Loss 3.4698 (3.5591)	Entropy 1.76043 (1.76491)	Top-1 acc 38.672 (39.954)	Top-5 acc 68.359 (63.907)	lr 0.02408
Train [15][2370/3239]	Time 0.241 (0.523)	Data Time 0.001 (0.014)	Loss 3.5035 (3.5590)	Entropy 1.76028 (1.76489)	Top-1 acc 39.453 (39.959)	Top-5 acc 66.797 (63.909)	lr 0.02408
Train [15][2380/3239]	Time 0.197 (0.523)	Data Time 0.001 (0.014)	Loss 3.5066 (3.5590)	Entropy 1.76020 (1.76487)	Top-1 acc 40.234 (39.953)	Top-5 acc 66.016 (63.907)	lr 0.02408
Train [15][2390/3239]	Time 0.216 (0.522)	Data Time 0.001 (0.013)	Loss 3.4751 (3.5590)	Entropy 1.76012 (1.76485)	Top-1 acc 38.672 (39.952)	Top-5 acc 66.016 (63.908)	lr 0.02408
Train [15][2400/3239]	Time 0.221 (0.522)	Data Time 0.002 (0.013)	Loss 3.4913 (3.5591)	Entropy 1.76007 (1.76483)	Top-1 acc 39.844 (39.952)	Top-5 acc 66.797 (63.907)	lr 0.02408
Train [15][2410/3239]	Time 0.199 (0.521)	Data Time 0.001 (0.013)	Loss 3.7206 (3.5590)	Entropy 1.76001 (1.76481)	Top-1 acc 34.375 (39.952)	Top-5 acc 58.203 (63.908)	lr 0.02408
Train [15][2420/3239]	Time 0.324 (0.521)	Data Time 0.001 (0.013)	Loss 3.2718 (3.5588)	Entropy 1.75993 (1.76479)	Top-1 acc 48.047 (39.960)	Top-5 acc 71.484 (63.913)	lr 0.02408
Train [15][2430/3239]	Time 2.274 (0.521)	Data Time 0.001 (0.013)	Loss 3.3336 (3.5585)	Entropy 1.75993 (1.76477)	Top-1 acc 43.750 (39.965)	Top-5 acc 70.312 (63.920)	lr 0.02408
Train [15][2440/3239]	Time 0.162 (0.519)	Data Time 0.001 (0.013)	Loss 3.6450 (3.5585)	Entropy 1.75987 (1.76475)	Top-1 acc 39.453 (39.963)	Top-5 acc 60.156 (63.921)	lr 0.02408
Train [15][2450/3239]	Time 0.230 (0.519)	Data Time 0.001 (0.013)	Loss 3.2870 (3.5584)	Entropy 1.75981 (1.76473)	Top-1 acc 43.359 (39.962)	Top-5 acc 67.188 (63.922)	lr 0.02408
Train [15][2460/3239]	Time 0.205 (0.519)	Data Time 0.001 (0.013)	Loss 3.5745 (3.5582)	Entropy 1.75981 (1.76471)	Top-1 acc 42.188 (39.961)	Top-5 acc 65.625 (63.926)	lr 0.02408
Train [15][2470/3239]	Time 0.199 (0.518)	Data Time 0.001 (0.013)	Loss 3.4821 (3.5583)	Entropy 1.75977 (1.76469)	Top-1 acc 42.969 (39.965)	Top-5 acc 65.234 (63.923)	lr 0.02408
Train [15][2480/3239]	Time 0.190 (0.518)	Data Time 0.001 (0.013)	Loss 3.7816 (3.5582)	Entropy 1.75972 (1.76467)	Top-1 acc 36.328 (39.968)	Top-5 acc 62.109 (63.928)	lr 0.02408
Train [15][2490/3239]	Time 0.563 (0.533)	Data Time 0.003 (0.013)	Loss 3.6403 (3.5581)	Entropy 1.75964 (1.76465)	Top-1 acc 37.109 (39.967)	Top-5 acc 60.547 (63.929)	lr 0.02408
Train [15][2500/3239]	Time 0.201 (0.532)	Data Time 0.002 (0.013)	Loss 3.4908 (3.5582)	Entropy 1.75958 (1.76463)	Top-1 acc 45.703 (39.965)	Top-5 acc 67.969 (63.929)	lr 0.02408
Train [15][2510/3239]	Time 0.211 (0.532)	Data Time 0.001 (0.013)	Loss 3.4873 (3.5583)	Entropy 1.75954 (1.76461)	Top-1 acc 39.844 (39.967)	Top-5 acc 63.672 (63.926)	lr 0.02408
Train [15][2520/3239]	Time 0.200 (0.531)	Data Time 0.001 (0.013)	Loss 3.4899 (3.5582)	Entropy 1.75953 (1.76459)	Top-1 acc 39.844 (39.971)	Top-5 acc 64.453 (63.928)	lr 0.02408
Train [15][2530/3239]	Time 0.234 (0.531)	Data Time 0.001 (0.013)	Loss 3.5067 (3.5580)	Entropy 1.75946 (1.76457)	Top-1 acc 38.672 (39.974)	Top-5 acc 64.453 (63.931)	lr 0.02408
Train [15][2540/3239]	Time 2.479 (0.531)	Data Time 0.001 (0.013)	Loss 3.3870 (3.5582)	Entropy 1.75946 (1.76455)	Top-1 acc 44.531 (39.973)	Top-5 acc 67.969 (63.927)	lr 0.02408
Train [15][2550/3239]	Time 0.198 (0.530)	Data Time 0.001 (0.013)	Loss 3.5909 (3.5582)	Entropy 1.75941 (1.76453)	Top-1 acc 38.281 (39.975)	Top-5 acc 65.625 (63.925)	lr 0.02407
Train [15][2560/3239]	Time 0.219 (0.529)	Data Time 0.001 (0.013)	Loss 3.4805 (3.5582)	Entropy 1.75938 (1.76451)	Top-1 acc 39.844 (39.979)	Top-5 acc 66.406 (63.925)	lr 0.02407
Train [15][2570/3239]	Time 0.340 (0.529)	Data Time 0.001 (0.013)	Loss 3.4836 (3.5582)	Entropy 1.75935 (1.76449)	Top-1 acc 44.141 (39.976)	Top-5 acc 64.844 (63.924)	lr 0.02407
Train [15][2580/3239]	Time 0.236 (0.528)	Data Time 0.001 (0.013)	Loss 3.2778 (3.5581)	Entropy 1.75931 (1.76447)	Top-1 acc 43.750 (39.978)	Top-5 acc 71.094 (63.926)	lr 0.02407
Train [15][2590/3239]	Time 0.193 (0.528)	Data Time 0.001 (0.013)	Loss 3.4128 (3.5580)	Entropy 1.75921 (1.76445)	Top-1 acc 44.922 (39.978)	Top-5 acc 67.578 (63.929)	lr 0.02407
Train [15][2600/3239]	Time 0.242 (0.528)	Data Time 0.001 (0.013)	Loss 3.4129 (3.5580)	Entropy 1.75919 (1.76443)	Top-1 acc 43.750 (39.977)	Top-5 acc 67.969 (63.926)	lr 0.02407
Train [15][2610/3239]	Time 0.155 (0.527)	Data Time 0.001 (0.013)	Loss 3.5288 (3.5579)	Entropy 1.75915 (1.76441)	Top-1 acc 41.406 (39.984)	Top-5 acc 67.969 (63.933)	lr 0.02407
Train [15][2620/3239]	Time 0.239 (0.527)	Data Time 0.001 (0.013)	Loss 3.5759 (3.5578)	Entropy 1.75904 (1.76439)	Top-1 acc 39.062 (39.983)	Top-5 acc 65.234 (63.931)	lr 0.02407
Train [15][2630/3239]	Time 0.243 (0.527)	Data Time 0.001 (0.012)	Loss 3.4758 (3.5578)	Entropy 1.75899 (1.76437)	Top-1 acc 39.453 (39.980)	Top-5 acc 64.844 (63.929)	lr 0.02407
Train [15][2640/3239]	Time 0.335 (0.526)	Data Time 0.001 (0.012)	Loss 3.6653 (3.5577)	Entropy 1.75894 (1.76435)	Top-1 acc 37.109 (39.981)	Top-5 acc 62.109 (63.930)	lr 0.02407
Train [15][2650/3239]	Time 0.195 (0.526)	Data Time 0.001 (0.012)	Loss 3.6358 (3.5575)	Entropy 1.75898 (1.76433)	Top-1 acc 40.234 (39.985)	Top-5 acc 60.547 (63.935)	lr 0.02407
Train [15][2660/3239]	Time 0.238 (0.525)	Data Time 0.001 (0.012)	Loss 3.4465 (3.5571)	Entropy 1.75896 (1.76431)	Top-1 acc 44.141 (39.990)	Top-5 acc 66.016 (63.942)	lr 0.02407
Train [15][2670/3239]	Time 0.209 (0.525)	Data Time 0.001 (0.012)	Loss 3.5164 (3.5572)	Entropy 1.75892 (1.76429)	Top-1 acc 42.188 (39.987)	Top-5 acc 64.844 (63.937)	lr 0.02407
Train [15][2680/3239]	Time 0.237 (0.525)	Data Time 0.001 (0.012)	Loss 3.3130 (3.5570)	Entropy 1.75888 (1.76427)	Top-1 acc 43.750 (39.994)	Top-5 acc 71.094 (63.944)	lr 0.02407
Train [15][2690/3239]	Time 0.200 (0.524)	Data Time 0.001 (0.012)	Loss 3.9282 (3.5572)	Entropy 1.75885 (1.76425)	Top-1 acc 32.812 (39.992)	Top-5 acc 55.469 (63.943)	lr 0.02407
Train [15][2700/3239]	Time 0.201 (0.524)	Data Time 0.001 (0.012)	Loss 3.4906 (3.5571)	Entropy 1.75881 (1.76423)	Top-1 acc 41.797 (39.995)	Top-5 acc 63.672 (63.944)	lr 0.02407
Train [15][2710/3239]	Time 0.333 (0.524)	Data Time 0.002 (0.012)	Loss 3.6909 (3.5574)	Entropy 1.75879 (1.76421)	Top-1 acc 36.328 (39.990)	Top-5 acc 57.812 (63.939)	lr 0.02407
Train [15][2720/3239]	Time 0.228 (0.523)	Data Time 0.001 (0.012)	Loss 3.4729 (3.5572)	Entropy 1.75875 (1.76419)	Top-1 acc 42.969 (39.994)	Top-5 acc 61.719 (63.941)	lr 0.02407
Train [15][2730/3239]	Time 0.225 (0.523)	Data Time 0.001 (0.012)	Loss 3.5151 (3.5574)	Entropy 1.75873 (1.76417)	Top-1 acc 41.797 (39.991)	Top-5 acc 63.672 (63.934)	lr 0.02407
Train [15][2740/3239]	Time 0.255 (0.522)	Data Time 0.001 (0.012)	Loss 3.9208 (3.5578)	Entropy 1.75866 (1.76415)	Top-1 acc 31.641 (39.985)	Top-5 acc 55.859 (63.930)	lr 0.02407
Train [15][2750/3239]	Time 0.155 (0.522)	Data Time 0.001 (0.012)	Loss 3.5057 (3.5576)	Entropy 1.75862 (1.76413)	Top-1 acc 40.234 (39.988)	Top-5 acc 64.062 (63.932)	lr 0.02407
Train [15][2760/3239]	Time 0.199 (0.522)	Data Time 0.001 (0.012)	Loss 3.5439 (3.5577)	Entropy 1.75850 (1.76411)	Top-1 acc 39.453 (39.986)	Top-5 acc 65.234 (63.926)	lr 0.02407
Train [15][2770/3239]	Time 0.242 (0.521)	Data Time 0.001 (0.012)	Loss 3.3609 (3.5578)	Entropy 1.75840 (1.76409)	Top-1 acc 40.234 (39.981)	Top-5 acc 69.141 (63.929)	lr 0.02407
Train [15][2780/3239]	Time 0.205 (0.521)	Data Time 0.002 (0.012)	Loss 3.4017 (3.5575)	Entropy 1.75836 (1.76407)	Top-1 acc 45.703 (39.987)	Top-5 acc 70.312 (63.935)	lr 0.02407
Train [15][2790/3239]	Time 0.319 (0.521)	Data Time 0.001 (0.012)	Loss 3.5642 (3.5575)	Entropy 1.75835 (1.76405)	Top-1 acc 39.453 (39.990)	Top-5 acc 64.062 (63.936)	lr 0.02407
Train [15][2800/3239]	Time 0.204 (0.520)	Data Time 0.001 (0.012)	Loss 3.5269 (3.5572)	Entropy 1.75830 (1.76403)	Top-1 acc 41.797 (39.997)	Top-5 acc 60.938 (63.940)	lr 0.02407
Train [15][2810/3239]	Time 0.155 (0.520)	Data Time 0.001 (0.012)	Loss 3.4738 (3.5572)	Entropy 1.75835 (1.76401)	Top-1 acc 45.312 (39.999)	Top-5 acc 63.281 (63.935)	lr 0.02407
Train [15][2820/3239]	Time 0.196 (0.520)	Data Time 0.001 (0.012)	Loss 3.4029 (3.5571)	Entropy 1.75833 (1.76399)	Top-1 acc 39.844 (40.002)	Top-5 acc 68.359 (63.940)	lr 0.02406
Train [15][2830/3239]	Time 0.285 (0.533)	Data Time 0.004 (0.012)	Loss 3.4387 (3.5572)	Entropy 1.75830 (1.76397)	Top-1 acc 42.578 (39.997)	Top-5 acc 66.016 (63.937)	lr 0.02406
Train [15][2840/3239]	Time 0.199 (0.533)	Data Time 0.002 (0.012)	Loss 3.4621 (3.5573)	Entropy 1.75825 (1.76395)	Top-1 acc 40.234 (39.998)	Top-5 acc 65.234 (63.934)	lr 0.02406
Train [15][2850/3239]	Time 0.254 (0.532)	Data Time 0.002 (0.012)	Loss 3.4588 (3.5573)	Entropy 1.75825 (1.76393)	Top-1 acc 41.797 (39.998)	Top-5 acc 64.062 (63.936)	lr 0.02406
Train [15][2860/3239]	Time 0.270 (0.532)	Data Time 0.002 (0.012)	Loss 3.8123 (3.5572)	Entropy 1.75817 (1.76391)	Top-1 acc 37.109 (40.003)	Top-5 acc 54.297 (63.936)	lr 0.02406
Train [15][2870/3239]	Time 0.383 (0.532)	Data Time 0.001 (0.012)	Loss 3.5651 (3.5571)	Entropy 1.75815 (1.76389)	Top-1 acc 41.406 (40.002)	Top-5 acc 62.500 (63.935)	lr 0.02406
Train [15][2880/3239]	Time 0.208 (0.531)	Data Time 0.001 (0.012)	Loss 3.7098 (3.5573)	Entropy 1.75813 (1.76387)	Top-1 acc 33.203 (39.998)	Top-5 acc 60.547 (63.933)	lr 0.02406
Train [15][2890/3239]	Time 0.168 (0.531)	Data Time 0.001 (0.012)	Loss 3.5533 (3.5573)	Entropy 1.75800 (1.76385)	Top-1 acc 39.062 (39.994)	Top-5 acc 64.453 (63.931)	lr 0.02406
Train [15][2900/3239]	Time 0.234 (0.531)	Data Time 0.002 (0.012)	Loss 3.4670 (3.5572)	Entropy 1.75793 (1.76383)	Top-1 acc 41.797 (39.993)	Top-5 acc 62.109 (63.931)	lr 0.02406
Train [15][2910/3239]	Time 0.226 (0.530)	Data Time 0.002 (0.011)	Loss 3.5019 (3.5570)	Entropy 1.75790 (1.76381)	Top-1 acc 41.406 (40.000)	Top-5 acc 62.891 (63.938)	lr 0.02406
Train [15][2920/3239]	Time 0.248 (0.530)	Data Time 0.001 (0.011)	Loss 3.5777 (3.5571)	Entropy 1.75784 (1.76379)	Top-1 acc 42.188 (39.998)	Top-5 acc 62.500 (63.933)	lr 0.02406
Train [15][2930/3239]	Time 0.228 (0.530)	Data Time 0.001 (0.011)	Loss 3.3439 (3.5575)	Entropy 1.75779 (1.76377)	Top-1 acc 46.484 (39.993)	Top-5 acc 67.578 (63.923)	lr 0.02406
Train [15][2940/3239]	Time 0.319 (0.529)	Data Time 0.001 (0.011)	Loss 3.8839 (3.5575)	Entropy 1.75770 (1.76375)	Top-1 acc 32.031 (39.994)	Top-5 acc 56.641 (63.924)	lr 0.02406
Train [15][2950/3239]	Time 0.271 (0.529)	Data Time 0.001 (0.011)	Loss 3.4032 (3.5573)	Entropy 1.75762 (1.76373)	Top-1 acc 44.531 (39.995)	Top-5 acc 65.625 (63.926)	lr 0.02406
Train [15][2960/3239]	Time 0.254 (0.529)	Data Time 0.002 (0.011)	Loss 3.5198 (3.5573)	Entropy 1.75763 (1.76371)	Top-1 acc 38.672 (39.995)	Top-5 acc 66.016 (63.930)	lr 0.02406
Train [15][2970/3239]	Time 0.225 (0.528)	Data Time 0.002 (0.011)	Loss 3.5600 (3.5572)	Entropy 1.75759 (1.76368)	Top-1 acc 38.672 (39.998)	Top-5 acc 64.062 (63.930)	lr 0.02406
Train [15][2980/3239]	Time 0.217 (0.528)	Data Time 0.001 (0.011)	Loss 3.5351 (3.5572)	Entropy 1.75754 (1.76366)	Top-1 acc 40.234 (39.998)	Top-5 acc 62.109 (63.927)	lr 0.02406
Train [15][2990/3239]	Time 0.219 (0.528)	Data Time 0.001 (0.011)	Loss 3.5079 (3.5572)	Entropy 1.75733 (1.76364)	Top-1 acc 37.109 (39.998)	Top-5 acc 61.719 (63.923)	lr 0.02406
Train [15][3000/3239]	Time 0.222 (0.527)	Data Time 0.001 (0.011)	Loss 3.7998 (3.5573)	Entropy 1.75724 (1.76362)	Top-1 acc 33.203 (39.996)	Top-5 acc 57.031 (63.922)	lr 0.02406
Train [15][3010/3239]	Time 0.310 (0.527)	Data Time 0.001 (0.011)	Loss 3.4477 (3.5572)	Entropy 1.75723 (1.76360)	Top-1 acc 41.797 (40.001)	Top-5 acc 62.891 (63.923)	lr 0.02406
Train [15][3020/3239]	Time 0.159 (0.527)	Data Time 0.001 (0.011)	Loss 3.7304 (3.5573)	Entropy 1.75721 (1.76358)	Top-1 acc 35.938 (40.000)	Top-5 acc 62.109 (63.919)	lr 0.02406
Train [15][3030/3239]	Time 0.186 (0.526)	Data Time 0.001 (0.011)	Loss 3.5149 (3.5573)	Entropy 1.75715 (1.76356)	Top-1 acc 40.625 (39.997)	Top-5 acc 66.016 (63.921)	lr 0.02406
Train [15][3040/3239]	Time 0.150 (0.526)	Data Time 0.001 (0.011)	Loss 3.4711 (3.5574)	Entropy 1.75709 (1.76354)	Top-1 acc 44.141 (39.993)	Top-5 acc 63.672 (63.919)	lr 0.02406
Train [15][3050/3239]	Time 0.223 (0.526)	Data Time 0.002 (0.011)	Loss 3.7528 (3.5575)	Entropy 1.75708 (1.76352)	Top-1 acc 39.453 (39.991)	Top-5 acc 62.891 (63.918)	lr 0.02406
Train [15][3060/3239]	Time 0.232 (0.526)	Data Time 0.001 (0.011)	Loss 3.5124 (3.5574)	Entropy 1.75702 (1.76350)	Top-1 acc 38.672 (39.991)	Top-5 acc 65.625 (63.923)	lr 0.02406
Train [15][3070/3239]	Time 0.238 (0.525)	Data Time 0.001 (0.011)	Loss 3.5226 (3.5572)	Entropy 1.75703 (1.76348)	Top-1 acc 41.016 (39.997)	Top-5 acc 64.844 (63.926)	lr 0.02406
Train [15][3080/3239]	Time 0.306 (0.525)	Data Time 0.001 (0.011)	Loss 3.4623 (3.5573)	Entropy 1.75701 (1.76345)	Top-1 acc 41.406 (39.995)	Top-5 acc 64.062 (63.926)	lr 0.02405
Train [15][3090/3239]	Time 0.185 (0.525)	Data Time 0.002 (0.011)	Loss 3.6236 (3.5573)	Entropy 1.75703 (1.76343)	Top-1 acc 34.375 (39.996)	Top-5 acc 62.109 (63.926)	lr 0.02405
Train [15][3100/3239]	Time 0.165 (0.524)	Data Time 0.001 (0.011)	Loss 3.5679 (3.5573)	Entropy 1.75707 (1.76341)	Top-1 acc 36.719 (39.996)	Top-5 acc 63.672 (63.928)	lr 0.02405
Train [15][3110/3239]	Time 0.179 (0.524)	Data Time 0.001 (0.011)	Loss 3.6755 (3.5573)	Entropy 1.75704 (1.76339)	Top-1 acc 39.453 (39.993)	Top-5 acc 62.109 (63.930)	lr 0.02405
Train [15][3120/3239]	Time 0.194 (0.524)	Data Time 0.001 (0.011)	Loss 3.5818 (3.5574)	Entropy 1.75703 (1.76337)	Top-1 acc 40.234 (39.991)	Top-5 acc 59.375 (63.928)	lr 0.02405
Train [15][3130/3239]	Time 0.268 (0.523)	Data Time 0.001 (0.011)	Loss 3.7288 (3.5575)	Entropy 1.75699 (1.76335)	Top-1 acc 37.891 (39.988)	Top-5 acc 63.672 (63.925)	lr 0.02405
Train [15][3140/3239]	Time 0.223 (0.523)	Data Time 0.001 (0.011)	Loss 3.3916 (3.5576)	Entropy 1.75696 (1.76333)	Top-1 acc 44.141 (39.987)	Top-5 acc 66.406 (63.923)	lr 0.02405
Train [15][3150/3239]	Time 0.323 (0.523)	Data Time 0.001 (0.011)	Loss 3.5552 (3.5576)	Entropy 1.75697 (1.76331)	Top-1 acc 39.453 (39.985)	Top-5 acc 63.281 (63.921)	lr 0.02405
Train [15][3160/3239]	Time 0.219 (0.535)	Data Time 0.003 (0.011)	Loss 3.6539 (3.5578)	Entropy 1.75694 (1.76329)	Top-1 acc 37.500 (39.980)	Top-5 acc 60.547 (63.919)	lr 0.02405
Train [15][3170/3239]	Time 0.256 (0.535)	Data Time 0.002 (0.011)	Loss 3.6003 (3.5578)	Entropy 1.75691 (1.76327)	Top-1 acc 39.062 (39.980)	Top-5 acc 62.109 (63.917)	lr 0.02405
Train [15][3180/3239]	Time 0.201 (0.534)	Data Time 0.000 (0.011)	Loss 3.8285 (3.5580)	Entropy 1.75686 (1.76325)	Top-1 acc 35.156 (39.978)	Top-5 acc 59.766 (63.913)	lr 0.02405
Train [15][3190/3239]	Time 0.201 (0.534)	Data Time 0.000 (0.011)	Loss 3.5141 (3.5579)	Entropy 1.75684 (1.76323)	Top-1 acc 42.188 (39.979)	Top-5 acc 64.844 (63.914)	lr 0.02405
Train [15][3200/3239]	Time 0.205 (0.534)	Data Time 0.000 (0.011)	Loss 3.6814 (3.5579)	Entropy 1.75680 (1.76321)	Top-1 acc 39.062 (39.977)	Top-5 acc 59.766 (63.914)	lr 0.02405
Train [15][3210/3239]	Time 0.184 (0.533)	Data Time 0.000 (0.011)	Loss 3.5664 (3.5579)	Entropy 1.75680 (1.76319)	Top-1 acc 38.281 (39.975)	Top-5 acc 62.891 (63.908)	lr 0.02405
Train [15][3220/3239]	Time 0.326 (0.533)	Data Time 0.000 (0.011)	Loss 3.2810 (3.5577)	Entropy 1.75682 (1.76317)	Top-1 acc 46.875 (39.980)	Top-5 acc 70.312 (63.912)	lr 0.02405
Train [15][3230/3239]	Time 0.164 (0.532)	Data Time 0.000 (0.011)	Loss 3.8230 (3.5578)	Entropy 1.75683 (1.76315)	Top-1 acc 35.938 (39.977)	Top-5 acc 61.328 (63.911)	lr 0.02405
Train [15][3239/3239]	Time 2.157 (0.532)	Data Time 0.000 (0.011)	Loss 3.7263 (3.5580)	Entropy 1.75683 (1.76313)	Top-1 acc 39.506 (39.975)	Top-5 acc 61.728 (63.910)	lr 0.02405
==========Valid [15/120]	loss 2.377	top-1 acc 48.715 (48.715)	top-5 acc 72.837	Train top-1 39.975	top-5 63.910	Entropy 1.75683	Latency-None: 0.000ms	Flops: 533.31M
Train [16][0/3239]	Time 27.242 (27.242)	Data Time 24.484 (24.484)	Loss 3.4944 (3.4944)	Entropy 1.75683 (1.75683)	Top-1 acc 42.578 (42.578)	Top-5 acc 64.062 (64.062)	lr 0.02405
Train [16][10/3239]	Time 2.619 (3.009)	Data Time 0.001 (2.228)	Loss 3.6482 (3.6174)	Entropy 1.75683 (1.75683)	Top-1 acc 37.500 (38.991)	Top-5 acc 56.641 (62.322)	lr 0.02405
Train [16][20/3239]	Time 0.209 (1.687)	Data Time 0.001 (1.168)	Loss 3.4137 (3.5886)	Entropy 1.75673 (1.75678)	Top-1 acc 41.797 (39.472)	Top-5 acc 69.531 (63.077)	lr 0.02405
Train [16][30/3239]	Time 0.185 (1.285)	Data Time 0.002 (0.792)	Loss 3.4081 (3.5497)	Entropy 1.75672 (1.75676)	Top-1 acc 45.312 (40.549)	Top-5 acc 66.797 (63.936)	lr 0.02405
Train [16][40/3239]	Time 0.213 (1.080)	Data Time 0.001 (0.600)	Loss 3.4998 (3.5520)	Entropy 1.75669 (1.75675)	Top-1 acc 40.234 (40.320)	Top-5 acc 64.453 (63.853)	lr 0.02405
Train [16][50/3239]	Time 0.219 (0.952)	Data Time 0.001 (0.483)	Loss 3.6147 (3.5417)	Entropy 1.75663 (1.75673)	Top-1 acc 37.500 (40.579)	Top-5 acc 59.766 (63.863)	lr 0.02405
Train [16][60/3239]	Time 0.159 (0.867)	Data Time 0.001 (0.404)	Loss 3.6591 (3.5443)	Entropy 1.75658 (1.75671)	Top-1 acc 40.234 (40.548)	Top-5 acc 61.719 (63.883)	lr 0.02405
Train [16][70/3239]	Time 0.155 (0.805)	Data Time 0.001 (0.348)	Loss 3.4852 (3.5404)	Entropy 1.75656 (1.75669)	Top-1 acc 40.625 (40.697)	Top-5 acc 64.844 (64.051)	lr 0.02405
Train [16][80/3239]	Time 0.193 (0.758)	Data Time 0.001 (0.306)	Loss 3.3093 (3.5354)	Entropy 1.75649 (1.75667)	Top-1 acc 43.359 (40.760)	Top-5 acc 72.266 (64.275)	lr 0.02405
Train [16][90/3239]	Time 0.196 (0.722)	Data Time 0.001 (0.272)	Loss 3.5568 (3.5354)	Entropy 1.75647 (1.75665)	Top-1 acc 39.062 (40.655)	Top-5 acc 58.984 (64.247)	lr 0.02405
Train [16][100/3239]	Time 0.248 (0.694)	Data Time 0.001 (0.245)	Loss 3.4949 (3.5283)	Entropy 1.75645 (1.75663)	Top-1 acc 39.062 (40.729)	Top-5 acc 68.359 (64.411)	lr 0.02404
Train [16][110/3239]	Time 0.234 (0.669)	Data Time 0.001 (0.223)	Loss 3.6438 (3.5326)	Entropy 1.75638 (1.75661)	Top-1 acc 38.281 (40.671)	Top-5 acc 60.156 (64.397)	lr 0.02404
Train [16][120/3239]	Time 2.328 (0.648)	Data Time 0.001 (0.205)	Loss 3.5806 (3.5315)	Entropy 1.75638 (1.75659)	Top-1 acc 43.750 (40.780)	Top-5 acc 60.547 (64.356)	lr 0.02404
Train [16][130/3239]	Time 0.176 (0.615)	Data Time 0.001 (0.189)	Loss 3.4971 (3.5282)	Entropy 1.75633 (1.75657)	Top-1 acc 45.312 (40.843)	Top-5 acc 66.797 (64.382)	lr 0.02404
Train [16][140/3239]	Time 0.297 (0.602)	Data Time 0.001 (0.176)	Loss 3.6879 (3.5310)	Entropy 1.75626 (1.75655)	Top-1 acc 38.281 (40.852)	Top-5 acc 60.156 (64.320)	lr 0.02404
Train [16][150/3239]	Time 0.250 (0.590)	Data Time 0.001 (0.165)	Loss 3.8265 (3.5347)	Entropy 1.75620 (1.75653)	Top-1 acc 35.156 (40.796)	Top-5 acc 60.547 (64.290)	lr 0.02404
Train [16][160/3239]	Time 0.191 (0.580)	Data Time 0.001 (0.155)	Loss 3.5693 (3.5377)	Entropy 1.75611 (1.75651)	Top-1 acc 35.156 (40.746)	Top-5 acc 60.938 (64.206)	lr 0.02404
Train [16][170/3239]	Time 0.230 (0.571)	Data Time 0.001 (0.146)	Loss 3.4991 (3.5362)	Entropy 1.75609 (1.75648)	Top-1 acc 41.406 (40.785)	Top-5 acc 63.281 (64.184)	lr 0.02404
Train [16][180/3239]	Time 0.214 (0.564)	Data Time 0.001 (0.138)	Loss 3.5462 (3.5361)	Entropy 1.75608 (1.75646)	Top-1 acc 40.234 (40.793)	Top-5 acc 64.453 (64.179)	lr 0.02404
Train [16][190/3239]	Time 0.205 (0.556)	Data Time 0.001 (0.131)	Loss 3.5809 (3.5346)	Entropy 1.75607 (1.75644)	Top-1 acc 37.891 (40.844)	Top-5 acc 64.453 (64.240)	lr 0.02404
Train [16][200/3239]	Time 0.227 (0.550)	Data Time 0.001 (0.124)	Loss 3.3661 (3.5349)	Entropy 1.75603 (1.75642)	Top-1 acc 47.266 (40.845)	Top-5 acc 69.141 (64.191)	lr 0.02404
Train [16][210/3239]	Time 0.224 (0.543)	Data Time 0.001 (0.118)	Loss 3.3555 (3.5340)	Entropy 1.75594 (1.75640)	Top-1 acc 46.484 (40.880)	Top-5 acc 67.578 (64.238)	lr 0.02404
Train [16][220/3239]	Time 0.206 (0.540)	Data Time 0.002 (0.113)	Loss 3.5763 (3.5329)	Entropy 1.75583 (1.75638)	Top-1 acc 40.234 (40.887)	Top-5 acc 63.672 (64.305)	lr 0.02404
Train [16][230/3239]	Time 2.377 (0.535)	Data Time 0.001 (0.108)	Loss 3.4422 (3.5313)	Entropy 1.75583 (1.75636)	Top-1 acc 39.062 (40.897)	Top-5 acc 64.844 (64.345)	lr 0.02404
Train [16][240/3239]	Time 0.179 (0.522)	Data Time 0.002 (0.104)	Loss 3.3819 (3.5267)	Entropy 1.75575 (1.75633)	Top-1 acc 41.797 (40.957)	Top-5 acc 64.062 (64.455)	lr 0.02404
Train [16][250/3239]	Time 0.208 (0.518)	Data Time 0.001 (0.100)	Loss 3.5719 (3.5263)	Entropy 1.75557 (1.75630)	Top-1 acc 43.359 (40.953)	Top-5 acc 64.844 (64.458)	lr 0.02404
Train [16][260/3239]	Time 0.213 (0.515)	Data Time 0.001 (0.096)	Loss 3.4602 (3.5268)	Entropy 1.75554 (1.75627)	Top-1 acc 44.922 (40.932)	Top-5 acc 69.141 (64.438)	lr 0.02404
Train [16][270/3239]	Time 0.215 (0.512)	Data Time 0.001 (0.093)	Loss 3.6722 (3.5266)	Entropy 1.75540 (1.75624)	Top-1 acc 40.234 (40.941)	Top-5 acc 60.156 (64.442)	lr 0.02404
Train [16][280/3239]	Time 0.554 (0.654)	Data Time 0.003 (0.090)	Loss 3.4038 (3.5287)	Entropy 1.75535 (1.75621)	Top-1 acc 42.969 (40.893)	Top-5 acc 66.797 (64.423)	lr 0.02404
Train [16][290/3239]	Time 0.250 (0.651)	Data Time 0.002 (0.087)	Loss 3.5587 (3.5274)	Entropy 1.75538 (1.75618)	Top-1 acc 43.359 (40.912)	Top-5 acc 65.625 (64.477)	lr 0.02404
Train [16][300/3239]	Time 0.233 (0.645)	Data Time 0.002 (0.084)	Loss 3.6217 (3.5275)	Entropy 1.75532 (1.75615)	Top-1 acc 42.188 (40.935)	Top-5 acc 64.062 (64.486)	lr 0.02404
Train [16][310/3239]	Time 0.162 (0.638)	Data Time 0.001 (0.081)	Loss 3.3506 (3.5288)	Entropy 1.75526 (1.75613)	Top-1 acc 45.703 (40.898)	Top-5 acc 66.016 (64.434)	lr 0.02404
Train [16][320/3239]	Time 0.223 (0.631)	Data Time 0.001 (0.079)	Loss 3.3555 (3.5300)	Entropy 1.75524 (1.75610)	Top-1 acc 47.656 (40.860)	Top-5 acc 66.016 (64.397)	lr 0.02404
Train [16][330/3239]	Time 0.208 (0.625)	Data Time 0.001 (0.076)	Loss 3.7504 (3.5299)	Entropy 1.75524 (1.75607)	Top-1 acc 35.156 (40.853)	Top-5 acc 57.422 (64.414)	lr 0.02404
Train [16][340/3239]	Time 2.390 (0.620)	Data Time 0.001 (0.074)	Loss 3.6414 (3.5309)	Entropy 1.75524 (1.75605)	Top-1 acc 39.453 (40.780)	Top-5 acc 61.328 (64.387)	lr 0.02404
Train [16][350/3239]	Time 0.289 (0.609)	Data Time 0.001 (0.072)	Loss 3.9513 (3.5314)	Entropy 1.75526 (1.75603)	Top-1 acc 35.156 (40.750)	Top-5 acc 53.125 (64.375)	lr 0.02403
Train [16][360/3239]	Time 0.240 (0.604)	Data Time 0.001 (0.070)	Loss 3.5409 (3.5322)	Entropy 1.75519 (1.75600)	Top-1 acc 41.797 (40.717)	Top-5 acc 63.281 (64.360)	lr 0.02403
Train [16][370/3239]	Time 0.214 (0.599)	Data Time 0.001 (0.068)	Loss 3.7959 (3.5316)	Entropy 1.75518 (1.75598)	Top-1 acc 37.109 (40.770)	Top-5 acc 58.984 (64.380)	lr 0.02403
Train [16][380/3239]	Time 0.230 (0.595)	Data Time 0.001 (0.067)	Loss 3.3658 (3.5307)	Entropy 1.75512 (1.75596)	Top-1 acc 41.016 (40.790)	Top-5 acc 67.969 (64.405)	lr 0.02403
Train [16][390/3239]	Time 0.216 (0.592)	Data Time 0.001 (0.065)	Loss 3.5961 (3.5309)	Entropy 1.75506 (1.75594)	Top-1 acc 39.844 (40.769)	Top-5 acc 62.500 (64.377)	lr 0.02403
Train [16][400/3239]	Time 0.198 (0.588)	Data Time 0.001 (0.063)	Loss 3.3206 (3.5307)	Entropy 1.75497 (1.75591)	Top-1 acc 43.359 (40.767)	Top-5 acc 71.875 (64.398)	lr 0.02403
Train [16][410/3239]	Time 0.233 (0.584)	Data Time 0.001 (0.062)	Loss 3.5533 (3.5290)	Entropy 1.75496 (1.75589)	Top-1 acc 44.531 (40.795)	Top-5 acc 65.625 (64.416)	lr 0.02403
Train [16][420/3239]	Time 0.194 (0.580)	Data Time 0.001 (0.061)	Loss 3.3794 (3.5278)	Entropy 1.75497 (1.75587)	Top-1 acc 43.359 (40.837)	Top-5 acc 69.922 (64.447)	lr 0.02403
Train [16][430/3239]	Time 0.235 (0.577)	Data Time 0.001 (0.059)	Loss 3.4272 (3.5263)	Entropy 1.75492 (1.75585)	Top-1 acc 42.969 (40.847)	Top-5 acc 67.188 (64.492)	lr 0.02403
Train [16][440/3239]	Time 0.183 (0.573)	Data Time 0.001 (0.058)	Loss 3.5617 (3.5273)	Entropy 1.75490 (1.75583)	Top-1 acc 40.625 (40.814)	Top-5 acc 61.719 (64.472)	lr 0.02403
Train [16][450/3239]	Time 2.275 (0.570)	Data Time 0.001 (0.057)	Loss 3.4803 (3.5273)	Entropy 1.75490 (1.75581)	Top-1 acc 39.453 (40.824)	Top-5 acc 66.016 (64.474)	lr 0.02403
Train [16][460/3239]	Time 0.239 (0.563)	Data Time 0.001 (0.055)	Loss 3.2953 (3.5263)	Entropy 1.75487 (1.75579)	Top-1 acc 42.188 (40.842)	Top-5 acc 67.578 (64.495)	lr 0.02403
Train [16][470/3239]	Time 0.196 (0.560)	Data Time 0.001 (0.054)	Loss 3.6092 (3.5270)	Entropy 1.75482 (1.75576)	Top-1 acc 39.453 (40.829)	Top-5 acc 62.500 (64.482)	lr 0.02403
Train [16][480/3239]	Time 0.213 (0.557)	Data Time 0.001 (0.053)	Loss 3.4640 (3.5273)	Entropy 1.75481 (1.75575)	Top-1 acc 43.750 (40.820)	Top-5 acc 66.406 (64.481)	lr 0.02403
Train [16][490/3239]	Time 0.214 (0.555)	Data Time 0.001 (0.052)	Loss 3.6050 (3.5268)	Entropy 1.75476 (1.75573)	Top-1 acc 38.281 (40.818)	Top-5 acc 65.625 (64.500)	lr 0.02403
Train [16][500/3239]	Time 0.155 (0.552)	Data Time 0.001 (0.051)	Loss 3.2339 (3.5266)	Entropy 1.75473 (1.75571)	Top-1 acc 45.703 (40.798)	Top-5 acc 69.141 (64.512)	lr 0.02403
Train [16][510/3239]	Time 0.202 (0.549)	Data Time 0.001 (0.050)	Loss 3.5450 (3.5269)	Entropy 1.75465 (1.75569)	Top-1 acc 42.188 (40.792)	Top-5 acc 65.625 (64.527)	lr 0.02403
Train [16][520/3239]	Time 0.219 (0.547)	Data Time 0.001 (0.049)	Loss 3.5140 (3.5287)	Entropy 1.75456 (1.75567)	Top-1 acc 41.797 (40.761)	Top-5 acc 65.234 (64.473)	lr 0.02403
Train [16][530/3239]	Time 0.209 (0.545)	Data Time 0.001 (0.048)	Loss 3.7443 (3.5299)	Entropy 1.75461 (1.75565)	Top-1 acc 36.328 (40.735)	Top-5 acc 60.938 (64.452)	lr 0.02403
Train [16][540/3239]	Time 0.197 (0.543)	Data Time 0.001 (0.048)	Loss 3.4650 (3.5293)	Entropy 1.75455 (1.75563)	Top-1 acc 39.453 (40.746)	Top-5 acc 67.578 (64.470)	lr 0.02403
Train [16][550/3239]	Time 0.185 (0.541)	Data Time 0.001 (0.047)	Loss 3.3846 (3.5289)	Entropy 1.75451 (1.75561)	Top-1 acc 44.531 (40.765)	Top-5 acc 65.625 (64.474)	lr 0.02403
Train [16][560/3239]	Time 2.289 (0.538)	Data Time 0.001 (0.046)	Loss 3.6552 (3.5305)	Entropy 1.75451 (1.75559)	Top-1 acc 38.281 (40.727)	Top-5 acc 61.328 (64.437)	lr 0.02403
Train [16][570/3239]	Time 0.227 (0.533)	Data Time 0.001 (0.045)	Loss 3.4984 (3.5301)	Entropy 1.75447 (1.75557)	Top-1 acc 44.141 (40.748)	Top-5 acc 63.281 (64.428)	lr 0.02403
Train [16][580/3239]	Time 0.213 (0.531)	Data Time 0.001 (0.044)	Loss 3.6513 (3.5303)	Entropy 1.75446 (1.75555)	Top-1 acc 37.109 (40.764)	Top-5 acc 64.453 (64.430)	lr 0.02403
Train [16][590/3239]	Time 0.196 (0.529)	Data Time 0.001 (0.044)	Loss 3.7559 (3.5306)	Entropy 1.75443 (1.75553)	Top-1 acc 33.203 (40.764)	Top-5 acc 58.984 (64.421)	lr 0.02403
Train [16][600/3239]	Time 0.234 (0.527)	Data Time 0.001 (0.043)	Loss 3.4980 (3.5299)	Entropy 1.75444 (1.75551)	Top-1 acc 44.531 (40.780)	Top-5 acc 65.234 (64.423)	lr 0.02403
Train [16][610/3239]	Time 0.148 (0.526)	Data Time 0.001 (0.042)	Loss 3.4865 (3.5302)	Entropy 1.75442 (1.75549)	Top-1 acc 36.328 (40.750)	Top-5 acc 62.891 (64.430)	lr 0.02402
Train [16][620/3239]	Time 0.213 (0.524)	Data Time 0.001 (0.042)	Loss 3.6316 (3.5308)	Entropy 1.75439 (1.75548)	Top-1 acc 40.234 (40.731)	Top-5 acc 62.500 (64.430)	lr 0.02402
Train [16][630/3239]	Time 0.164 (0.523)	Data Time 0.001 (0.041)	Loss 3.5351 (3.5305)	Entropy 1.75433 (1.75546)	Top-1 acc 41.016 (40.738)	Top-5 acc 60.547 (64.408)	lr 0.02402
Train [16][640/3239]	Time 0.174 (0.587)	Data Time 0.002 (0.040)	Loss 3.6067 (3.5311)	Entropy 1.75433 (1.75544)	Top-1 acc 34.375 (40.710)	Top-5 acc 61.719 (64.396)	lr 0.02402
Train [16][650/3239]	Time 0.328 (0.585)	Data Time 0.002 (0.040)	Loss 3.3964 (3.5311)	Entropy 1.75425 (1.75542)	Top-1 acc 42.578 (40.719)	Top-5 acc 68.359 (64.380)	lr 0.02402
Train [16][660/3239]	Time 0.268 (0.582)	Data Time 0.001 (0.039)	Loss 3.4013 (3.5312)	Entropy 1.75424 (1.75541)	Top-1 acc 44.922 (40.717)	Top-5 acc 65.234 (64.367)	lr 0.02402
Train [16][670/3239]	Time 2.410 (0.580)	Data Time 0.002 (0.039)	Loss 3.6099 (3.5311)	Entropy 1.75424 (1.75539)	Top-1 acc 40.625 (40.705)	Top-5 acc 60.547 (64.363)	lr 0.02402
Train [16][680/3239]	Time 0.169 (0.575)	Data Time 0.001 (0.038)	Loss 3.5809 (3.5318)	Entropy 1.75420 (1.75537)	Top-1 acc 42.188 (40.684)	Top-5 acc 63.281 (64.351)	lr 0.02402
Train [16][690/3239]	Time 0.203 (0.573)	Data Time 0.001 (0.038)	Loss 3.5095 (3.5311)	Entropy 1.75417 (1.75535)	Top-1 acc 42.969 (40.702)	Top-5 acc 63.281 (64.353)	lr 0.02402
Train [16][700/3239]	Time 0.216 (0.571)	Data Time 0.001 (0.037)	Loss 3.6638 (3.5310)	Entropy 1.75402 (1.75533)	Top-1 acc 36.328 (40.691)	Top-5 acc 62.500 (64.349)	lr 0.02402
Train [16][710/3239]	Time 0.211 (0.569)	Data Time 0.001 (0.037)	Loss 3.5716 (3.5309)	Entropy 1.75401 (1.75532)	Top-1 acc 40.625 (40.705)	Top-5 acc 61.719 (64.358)	lr 0.02402
Train [16][720/3239]	Time 0.168 (0.567)	Data Time 0.001 (0.036)	Loss 3.5182 (3.5311)	Entropy 1.75393 (1.75530)	Top-1 acc 36.719 (40.701)	Top-5 acc 65.234 (64.357)	lr 0.02402
Train [16][730/3239]	Time 0.291 (0.565)	Data Time 0.002 (0.036)	Loss 3.5508 (3.5314)	Entropy 1.75394 (1.75528)	Top-1 acc 39.062 (40.683)	Top-5 acc 65.625 (64.342)	lr 0.02402
Train [16][740/3239]	Time 0.207 (0.564)	Data Time 0.001 (0.035)	Loss 3.4204 (3.5307)	Entropy 1.75390 (1.75526)	Top-1 acc 40.234 (40.700)	Top-5 acc 67.969 (64.356)	lr 0.02402
Train [16][750/3239]	Time 0.229 (0.562)	Data Time 0.001 (0.035)	Loss 3.5361 (3.5304)	Entropy 1.75389 (1.75524)	Top-1 acc 37.500 (40.704)	Top-5 acc 63.672 (64.361)	lr 0.02402
Train [16][760/3239]	Time 0.225 (0.560)	Data Time 0.001 (0.035)	Loss 3.3806 (3.5302)	Entropy 1.75385 (1.75522)	Top-1 acc 41.797 (40.708)	Top-5 acc 67.969 (64.365)	lr 0.02402
Train [16][770/3239]	Time 0.288 (0.559)	Data Time 0.001 (0.034)	Loss 3.4065 (3.5297)	Entropy 1.75374 (1.75521)	Top-1 acc 45.703 (40.730)	Top-5 acc 68.750 (64.381)	lr 0.02402
Train [16][780/3239]	Time 2.340 (0.557)	Data Time 0.001 (0.034)	Loss 3.2730 (3.5295)	Entropy 1.75374 (1.75519)	Top-1 acc 48.047 (40.734)	Top-5 acc 68.359 (64.385)	lr 0.02402
Train [16][790/3239]	Time 0.326 (0.553)	Data Time 0.001 (0.033)	Loss 3.4050 (3.5297)	Entropy 1.75369 (1.75517)	Top-1 acc 45.312 (40.727)	Top-5 acc 66.406 (64.389)	lr 0.02402
Train [16][800/3239]	Time 0.198 (0.552)	Data Time 0.001 (0.033)	Loss 3.4524 (3.5292)	Entropy 1.75366 (1.75515)	Top-1 acc 42.188 (40.745)	Top-5 acc 65.625 (64.386)	lr 0.02402
Train [16][810/3239]	Time 0.206 (0.550)	Data Time 0.002 (0.033)	Loss 3.4657 (3.5294)	Entropy 1.75367 (1.75513)	Top-1 acc 39.453 (40.731)	Top-5 acc 65.625 (64.386)	lr 0.02402
Train [16][820/3239]	Time 0.229 (0.549)	Data Time 0.001 (0.032)	Loss 3.6109 (3.5302)	Entropy 1.75366 (1.75511)	Top-1 acc 36.328 (40.722)	Top-5 acc 64.453 (64.359)	lr 0.02402
Train [16][830/3239]	Time 0.215 (0.547)	Data Time 0.001 (0.032)	Loss 3.5712 (3.5305)	Entropy 1.75358 (1.75510)	Top-1 acc 39.453 (40.711)	Top-5 acc 63.281 (64.357)	lr 0.02402
Train [16][840/3239]	Time 0.186 (0.546)	Data Time 0.002 (0.031)	Loss 3.4521 (3.5306)	Entropy 1.75356 (1.75508)	Top-1 acc 39.453 (40.708)	Top-5 acc 66.797 (64.367)	lr 0.02402
Train [16][850/3239]	Time 0.207 (0.544)	Data Time 0.001 (0.031)	Loss 3.5436 (3.5307)	Entropy 1.75352 (1.75506)	Top-1 acc 37.109 (40.692)	Top-5 acc 65.625 (64.375)	lr 0.02402
Train [16][860/3239]	Time 0.211 (0.543)	Data Time 0.001 (0.031)	Loss 3.5645 (3.5295)	Entropy 1.75343 (1.75504)	Top-1 acc 41.016 (40.708)	Top-5 acc 64.062 (64.397)	lr 0.02401
Train [16][870/3239]	Time 0.366 (0.542)	Data Time 0.001 (0.030)	Loss 3.4425 (3.5296)	Entropy 1.75337 (1.75502)	Top-1 acc 42.969 (40.714)	Top-5 acc 65.234 (64.400)	lr 0.02401
Train [16][880/3239]	Time 0.286 (0.541)	Data Time 0.001 (0.030)	Loss 3.5554 (3.5290)	Entropy 1.75343 (1.75500)	Top-1 acc 39.844 (40.730)	Top-5 acc 63.672 (64.402)	lr 0.02401
Train [16][890/3239]	Time 2.358 (0.539)	Data Time 0.001 (0.030)	Loss 3.4786 (3.5289)	Entropy 1.75343 (1.75499)	Top-1 acc 41.016 (40.729)	Top-5 acc 65.625 (64.409)	lr 0.02401
Train [16][900/3239]	Time 0.196 (0.536)	Data Time 0.001 (0.029)	Loss 3.5702 (3.5285)	Entropy 1.75340 (1.75497)	Top-1 acc 36.328 (40.737)	Top-5 acc 64.062 (64.426)	lr 0.02401
Train [16][910/3239]	Time 0.215 (0.535)	Data Time 0.001 (0.029)	Loss 3.4789 (3.5292)	Entropy 1.75330 (1.75495)	Top-1 acc 37.109 (40.717)	Top-5 acc 66.406 (64.420)	lr 0.02401
Train [16][920/3239]	Time 0.202 (0.533)	Data Time 0.001 (0.029)	Loss 3.5916 (3.5298)	Entropy 1.75326 (1.75493)	Top-1 acc 42.578 (40.703)	Top-5 acc 66.016 (64.411)	lr 0.02401
Train [16][930/3239]	Time 0.270 (0.532)	Data Time 0.001 (0.029)	Loss 3.3056 (3.5290)	Entropy 1.75323 (1.75491)	Top-1 acc 41.406 (40.719)	Top-5 acc 69.922 (64.434)	lr 0.02401
Train [16][940/3239]	Time 0.195 (0.531)	Data Time 0.001 (0.028)	Loss 3.6344 (3.5292)	Entropy 1.75321 (1.75490)	Top-1 acc 39.453 (40.700)	Top-5 acc 60.547 (64.434)	lr 0.02401
Train [16][950/3239]	Time 0.224 (0.530)	Data Time 0.001 (0.028)	Loss 3.5731 (3.5292)	Entropy 1.75311 (1.75488)	Top-1 acc 39.062 (40.700)	Top-5 acc 65.234 (64.427)	lr 0.02401
Train [16][960/3239]	Time 0.216 (0.529)	Data Time 0.001 (0.028)	Loss 3.4362 (3.5293)	Entropy 1.75305 (1.75486)	Top-1 acc 42.578 (40.697)	Top-5 acc 62.891 (64.419)	lr 0.02401
Train [16][970/3239]	Time 0.196 (0.528)	Data Time 0.001 (0.027)	Loss 3.3594 (3.5287)	Entropy 1.75304 (1.75484)	Top-1 acc 41.406 (40.707)	Top-5 acc 70.312 (64.434)	lr 0.02401
Train [16][980/3239]	Time 0.207 (0.527)	Data Time 0.002 (0.027)	Loss 3.4079 (3.5280)	Entropy 1.75303 (1.75482)	Top-1 acc 40.234 (40.712)	Top-5 acc 67.188 (64.445)	lr 0.02401
Train [16][990/3239]	Time 0.218 (0.526)	Data Time 0.001 (0.027)	Loss 3.5028 (3.5280)	Entropy 1.75297 (1.75480)	Top-1 acc 37.891 (40.715)	Top-5 acc 65.625 (64.447)	lr 0.02401
Train [16][1000/3239]	Time 44.842 (0.567)	Data Time 0.001 (0.027)	Loss 3.3863 (3.5277)	Entropy 1.75297 (1.75479)	Top-1 acc 42.969 (40.711)	Top-5 acc 65.234 (64.455)	lr 0.02401
Train [16][1010/3239]	Time 0.386 (0.564)	Data Time 0.003 (0.026)	Loss 3.4796 (3.5278)	Entropy 1.75288 (1.75477)	Top-1 acc 42.969 (40.709)	Top-5 acc 65.234 (64.451)	lr 0.02401
Train [16][1020/3239]	Time 0.202 (0.563)	Data Time 0.002 (0.026)	Loss 3.5990 (3.5286)	Entropy 1.75285 (1.75475)	Top-1 acc 39.062 (40.689)	Top-5 acc 61.328 (64.436)	lr 0.02401
Train [16][1030/3239]	Time 0.192 (0.561)	Data Time 0.002 (0.026)	Loss 3.5834 (3.5280)	Entropy 1.75280 (1.75473)	Top-1 acc 39.062 (40.692)	Top-5 acc 64.844 (64.458)	lr 0.02401
Train [16][1040/3239]	Time 0.251 (0.560)	Data Time 0.002 (0.026)	Loss 3.4242 (3.5283)	Entropy 1.75275 (1.75471)	Top-1 acc 41.797 (40.683)	Top-5 acc 64.844 (64.447)	lr 0.02401
Train [16][1050/3239]	Time 0.209 (0.559)	Data Time 0.001 (0.026)	Loss 3.4242 (3.5279)	Entropy 1.75272 (1.75469)	Top-1 acc 46.094 (40.692)	Top-5 acc 67.969 (64.462)	lr 0.02401
Train [16][1060/3239]	Time 0.226 (0.558)	Data Time 0.001 (0.025)	Loss 3.4015 (3.5281)	Entropy 1.75271 (1.75467)	Top-1 acc 44.531 (40.693)	Top-5 acc 64.453 (64.459)	lr 0.02401
Train [16][1070/3239]	Time 0.190 (0.557)	Data Time 0.002 (0.025)	Loss 3.3577 (3.5279)	Entropy 1.75267 (1.75465)	Top-1 acc 39.062 (40.697)	Top-5 acc 67.969 (64.467)	lr 0.02401
Train [16][1080/3239]	Time 0.238 (0.556)	Data Time 0.002 (0.025)	Loss 3.7353 (3.5282)	Entropy 1.75258 (1.75464)	Top-1 acc 37.891 (40.695)	Top-5 acc 59.375 (64.465)	lr 0.02401
Train [16][1090/3239]	Time 0.207 (0.555)	Data Time 0.002 (0.025)	Loss 3.3438 (3.5284)	Entropy 1.75249 (1.75462)	Top-1 acc 47.266 (40.683)	Top-5 acc 69.141 (64.459)	lr 0.02401
Train [16][1100/3239]	Time 0.207 (0.553)	Data Time 0.001 (0.024)	Loss 3.3353 (3.5275)	Entropy 1.75242 (1.75460)	Top-1 acc 44.922 (40.711)	Top-5 acc 70.312 (64.488)	lr 0.02401
Train [16][1110/3239]	Time 2.221 (0.552)	Data Time 0.001 (0.024)	Loss 3.6563 (3.5277)	Entropy 1.75242 (1.75458)	Top-1 acc 38.672 (40.709)	Top-5 acc 62.891 (64.493)	lr 0.02401
Train [16][1120/3239]	Time 0.204 (0.549)	Data Time 0.001 (0.024)	Loss 3.6372 (3.5273)	Entropy 1.75234 (1.75456)	Top-1 acc 39.844 (40.713)	Top-5 acc 60.547 (64.515)	lr 0.02400
Train [16][1130/3239]	Time 0.273 (0.548)	Data Time 0.002 (0.024)	Loss 3.3965 (3.5272)	Entropy 1.75226 (1.75454)	Top-1 acc 40.625 (40.711)	Top-5 acc 68.359 (64.520)	lr 0.02400
Train [16][1140/3239]	Time 0.158 (0.547)	Data Time 0.001 (0.024)	Loss 3.3797 (3.5271)	Entropy 1.75222 (1.75452)	Top-1 acc 41.016 (40.711)	Top-5 acc 67.969 (64.525)	lr 0.02400
Train [16][1150/3239]	Time 0.203 (0.546)	Data Time 0.001 (0.024)	Loss 3.6484 (3.5265)	Entropy 1.75215 (1.75450)	Top-1 acc 39.453 (40.727)	Top-5 acc 62.500 (64.537)	lr 0.02400
Train [16][1160/3239]	Time 0.207 (0.545)	Data Time 0.001 (0.023)	Loss 3.5352 (3.5263)	Entropy 1.75211 (1.75448)	Top-1 acc 40.625 (40.727)	Top-5 acc 63.672 (64.544)	lr 0.02400
Train [16][1170/3239]	Time 0.217 (0.544)	Data Time 0.001 (0.023)	Loss 3.5481 (3.5259)	Entropy 1.75206 (1.75446)	Top-1 acc 37.109 (40.736)	Top-5 acc 63.672 (64.555)	lr 0.02400
Train [16][1180/3239]	Time 0.228 (0.543)	Data Time 0.001 (0.023)	Loss 3.5432 (3.5269)	Entropy 1.75197 (1.75444)	Top-1 acc 40.234 (40.708)	Top-5 acc 66.016 (64.536)	lr 0.02400
Train [16][1190/3239]	Time 0.255 (0.543)	Data Time 0.001 (0.023)	Loss 3.5517 (3.5263)	Entropy 1.75201 (1.75442)	Top-1 acc 39.453 (40.723)	Top-5 acc 64.453 (64.547)	lr 0.02400
Train [16][1200/3239]	Time 0.226 (0.541)	Data Time 0.001 (0.023)	Loss 3.6627 (3.5265)	Entropy 1.75197 (1.75440)	Top-1 acc 35.156 (40.710)	Top-5 acc 62.500 (64.552)	lr 0.02400
Train [16][1210/3239]	Time 0.239 (0.541)	Data Time 0.001 (0.022)	Loss 3.3427 (3.5256)	Entropy 1.75191 (1.75438)	Top-1 acc 43.750 (40.730)	Top-5 acc 68.750 (64.568)	lr 0.02400
Train [16][1220/3239]	Time 2.170 (0.539)	Data Time 0.001 (0.022)	Loss 3.5640 (3.5254)	Entropy 1.75191 (1.75435)	Top-1 acc 38.672 (40.727)	Top-5 acc 65.625 (64.579)	lr 0.02400
Train [16][1230/3239]	Time 0.232 (0.537)	Data Time 0.001 (0.022)	Loss 3.4413 (3.5260)	Entropy 1.75181 (1.75433)	Top-1 acc 39.062 (40.712)	Top-5 acc 65.625 (64.568)	lr 0.02400
Train [16][1240/3239]	Time 0.212 (0.536)	Data Time 0.001 (0.022)	Loss 3.3400 (3.5258)	Entropy 1.75173 (1.75431)	Top-1 acc 44.531 (40.711)	Top-5 acc 66.016 (64.569)	lr 0.02400
Train [16][1250/3239]	Time 0.239 (0.536)	Data Time 0.001 (0.022)	Loss 3.3381 (3.5258)	Entropy 1.75168 (1.75429)	Top-1 acc 42.578 (40.711)	Top-5 acc 70.312 (64.575)	lr 0.02400
Train [16][1260/3239]	Time 0.244 (0.535)	Data Time 0.001 (0.022)	Loss 3.7405 (3.5262)	Entropy 1.75165 (1.75427)	Top-1 acc 36.328 (40.703)	Top-5 acc 62.891 (64.568)	lr 0.02400
Train [16][1270/3239]	Time 0.207 (0.534)	Data Time 0.002 (0.021)	Loss 3.5653 (3.5269)	Entropy 1.75162 (1.75425)	Top-1 acc 38.672 (40.691)	Top-5 acc 60.547 (64.555)	lr 0.02400
Train [16][1280/3239]	Time 0.228 (0.533)	Data Time 0.001 (0.021)	Loss 3.6114 (3.5276)	Entropy 1.75160 (1.75423)	Top-1 acc 37.891 (40.677)	Top-5 acc 62.109 (64.543)	lr 0.02400
Train [16][1290/3239]	Time 0.255 (0.532)	Data Time 0.001 (0.021)	Loss 3.3175 (3.5273)	Entropy 1.75154 (1.75421)	Top-1 acc 44.531 (40.686)	Top-5 acc 67.578 (64.550)	lr 0.02400
Train [16][1300/3239]	Time 0.236 (0.531)	Data Time 0.001 (0.021)	Loss 3.3932 (3.5268)	Entropy 1.75154 (1.75419)	Top-1 acc 42.578 (40.692)	Top-5 acc 69.141 (64.561)	lr 0.02400
Train [16][1310/3239]	Time 0.173 (0.531)	Data Time 0.001 (0.021)	Loss 3.3763 (3.5263)	Entropy 1.75148 (1.75417)	Top-1 acc 46.094 (40.709)	Top-5 acc 66.016 (64.567)	lr 0.02400
Train [16][1320/3239]	Time 0.320 (0.530)	Data Time 0.001 (0.021)	Loss 3.4782 (3.5262)	Entropy 1.75143 (1.75415)	Top-1 acc 39.453 (40.700)	Top-5 acc 63.672 (64.565)	lr 0.02400
Train [16][1330/3239]	Time 2.337 (0.529)	Data Time 0.001 (0.021)	Loss 3.4810 (3.5266)	Entropy 1.75143 (1.75413)	Top-1 acc 42.578 (40.694)	Top-5 acc 65.625 (64.561)	lr 0.02400
Train [16][1340/3239]	Time 0.196 (0.527)	Data Time 0.001 (0.020)	Loss 3.4081 (3.5266)	Entropy 1.75132 (1.75411)	Top-1 acc 41.406 (40.700)	Top-5 acc 70.703 (64.564)	lr 0.02400
Train [16][1350/3239]	Time 0.202 (0.526)	Data Time 0.001 (0.020)	Loss 3.4773 (3.5268)	Entropy 1.75118 (1.75409)	Top-1 acc 42.969 (40.697)	Top-5 acc 64.062 (64.555)	lr 0.02400
Train [16][1360/3239]	Time 0.217 (0.525)	Data Time 0.001 (0.020)	Loss 3.4747 (3.5273)	Entropy 1.75110 (1.75406)	Top-1 acc 44.141 (40.686)	Top-5 acc 66.406 (64.539)	lr 0.02400
Train [16][1370/3239]	Time 0.208 (0.555)	Data Time 0.002 (0.020)	Loss 3.3096 (3.5273)	Entropy 1.75101 (1.75404)	Top-1 acc 41.406 (40.681)	Top-5 acc 67.188 (64.534)	lr 0.02399
Train [16][1380/3239]	Time 0.246 (0.554)	Data Time 0.002 (0.020)	Loss 3.6253 (3.5275)	Entropy 1.75110 (1.75402)	Top-1 acc 36.719 (40.672)	Top-5 acc 60.547 (64.528)	lr 0.02399
Train [16][1390/3239]	Time 0.247 (0.554)	Data Time 0.001 (0.020)	Loss 3.3289 (3.5272)	Entropy 1.75108 (1.75400)	Top-1 acc 45.312 (40.674)	Top-5 acc 68.750 (64.538)	lr 0.02399
Train [16][1400/3239]	Time 0.197 (0.553)	Data Time 0.001 (0.020)	Loss 3.5226 (3.5271)	Entropy 1.75110 (1.75398)	Top-1 acc 41.406 (40.669)	Top-5 acc 64.844 (64.540)	lr 0.02399
Train [16][1410/3239]	Time 0.208 (0.552)	Data Time 0.001 (0.019)	Loss 3.5598 (3.5271)	Entropy 1.75094 (1.75396)	Top-1 acc 41.406 (40.676)	Top-5 acc 62.891 (64.544)	lr 0.02399
Train [16][1420/3239]	Time 0.204 (0.551)	Data Time 0.001 (0.019)	Loss 3.6390 (3.5267)	Entropy 1.75084 (1.75394)	Top-1 acc 38.672 (40.682)	Top-5 acc 64.062 (64.556)	lr 0.02399
Train [16][1430/3239]	Time 0.182 (0.550)	Data Time 0.001 (0.019)	Loss 3.6355 (3.5267)	Entropy 1.75077 (1.75391)	Top-1 acc 38.281 (40.678)	Top-5 acc 62.109 (64.555)	lr 0.02399
Train [16][1440/3239]	Time 2.422 (0.549)	Data Time 0.002 (0.019)	Loss 3.4840 (3.5264)	Entropy 1.75077 (1.75389)	Top-1 acc 42.969 (40.687)	Top-5 acc 65.625 (64.559)	lr 0.02399
Train [16][1450/3239]	Time 0.218 (0.547)	Data Time 0.001 (0.019)	Loss 3.4408 (3.5263)	Entropy 1.75076 (1.75387)	Top-1 acc 42.188 (40.690)	Top-5 acc 66.406 (64.565)	lr 0.02399
Train [16][1460/3239]	Time 0.313 (0.546)	Data Time 0.001 (0.019)	Loss 3.6299 (3.5264)	Entropy 1.75075 (1.75385)	Top-1 acc 35.547 (40.694)	Top-5 acc 61.719 (64.565)	lr 0.02399
Train [16][1470/3239]	Time 0.210 (0.545)	Data Time 0.001 (0.019)	Loss 3.6574 (3.5263)	Entropy 1.75068 (1.75383)	Top-1 acc 34.375 (40.699)	Top-5 acc 63.281 (64.559)	lr 0.02399
Train [16][1480/3239]	Time 0.209 (0.545)	Data Time 0.001 (0.019)	Loss 3.3332 (3.5261)	Entropy 1.75068 (1.75381)	Top-1 acc 41.797 (40.706)	Top-5 acc 70.312 (64.568)	lr 0.02399
Train [16][1490/3239]	Time 0.197 (0.544)	Data Time 0.001 (0.019)	Loss 3.5322 (3.5260)	Entropy 1.75060 (1.75379)	Top-1 acc 42.188 (40.708)	Top-5 acc 62.891 (64.568)	lr 0.02399
Train [16][1500/3239]	Time 0.201 (0.543)	Data Time 0.001 (0.018)	Loss 3.4694 (3.5260)	Entropy 1.75052 (1.75376)	Top-1 acc 41.406 (40.709)	Top-5 acc 66.406 (64.568)	lr 0.02399
Train [16][1510/3239]	Time 0.225 (0.542)	Data Time 0.002 (0.018)	Loss 3.5056 (3.5257)	Entropy 1.75047 (1.75374)	Top-1 acc 39.062 (40.721)	Top-5 acc 65.234 (64.576)	lr 0.02399
Train [16][1520/3239]	Time 0.176 (0.542)	Data Time 0.001 (0.018)	Loss 3.2497 (3.5255)	Entropy 1.75046 (1.75372)	Top-1 acc 43.750 (40.721)	Top-5 acc 69.141 (64.577)	lr 0.02399
Train [16][1530/3239]	Time 0.311 (0.541)	Data Time 0.001 (0.018)	Loss 3.6692 (3.5253)	Entropy 1.75035 (1.75370)	Top-1 acc 35.938 (40.723)	Top-5 acc 61.719 (64.577)	lr 0.02399
Train [16][1540/3239]	Time 0.250 (0.540)	Data Time 0.001 (0.018)	Loss 3.2950 (3.5247)	Entropy 1.75036 (1.75368)	Top-1 acc 46.875 (40.736)	Top-5 acc 68.750 (64.591)	lr 0.02399
Train [16][1550/3239]	Time 2.179 (0.539)	Data Time 0.001 (0.018)	Loss 3.6317 (3.5249)	Entropy 1.75036 (1.75366)	Top-1 acc 39.844 (40.730)	Top-5 acc 63.672 (64.589)	lr 0.02399
Train [16][1560/3239]	Time 0.201 (0.537)	Data Time 0.001 (0.018)	Loss 3.7185 (3.5249)	Entropy 1.75033 (1.75363)	Top-1 acc 33.594 (40.726)	Top-5 acc 61.328 (64.590)	lr 0.02399
Train [16][1570/3239]	Time 0.206 (0.536)	Data Time 0.001 (0.018)	Loss 3.6348 (3.5248)	Entropy 1.75007 (1.75361)	Top-1 acc 38.672 (40.728)	Top-5 acc 65.625 (64.598)	lr 0.02399
Train [16][1580/3239]	Time 0.232 (0.536)	Data Time 0.001 (0.018)	Loss 3.7351 (3.5254)	Entropy 1.75002 (1.75359)	Top-1 acc 35.547 (40.709)	Top-5 acc 60.547 (64.584)	lr 0.02399
Train [16][1590/3239]	Time 0.206 (0.535)	Data Time 0.001 (0.017)	Loss 3.5972 (3.5253)	Entropy 1.75001 (1.75357)	Top-1 acc 39.062 (40.706)	Top-5 acc 63.672 (64.587)	lr 0.02399
Train [16][1600/3239]	Time 0.313 (0.535)	Data Time 0.001 (0.017)	Loss 3.4342 (3.5251)	Entropy 1.74995 (1.75354)	Top-1 acc 43.750 (40.702)	Top-5 acc 66.406 (64.587)	lr 0.02399
Train [16][1610/3239]	Time 0.204 (0.534)	Data Time 0.001 (0.017)	Loss 3.5213 (3.5251)	Entropy 1.74991 (1.75352)	Top-1 acc 42.578 (40.708)	Top-5 acc 65.234 (64.587)	lr 0.02399
Train [16][1620/3239]	Time 0.243 (0.533)	Data Time 0.001 (0.017)	Loss 3.8181 (3.5253)	Entropy 1.74989 (1.75350)	Top-1 acc 33.594 (40.699)	Top-5 acc 55.469 (64.578)	lr 0.02398
Train [16][1630/3239]	Time 0.228 (0.533)	Data Time 0.001 (0.017)	Loss 3.5452 (3.5254)	Entropy 1.74989 (1.75348)	Top-1 acc 44.531 (40.695)	Top-5 acc 65.625 (64.574)	lr 0.02398
Train [16][1640/3239]	Time 0.202 (0.532)	Data Time 0.001 (0.017)	Loss 3.5831 (3.5253)	Entropy 1.74981 (1.75346)	Top-1 acc 39.453 (40.696)	Top-5 acc 64.062 (64.579)	lr 0.02398
Train [16][1650/3239]	Time 0.202 (0.531)	Data Time 0.001 (0.017)	Loss 3.6568 (3.5259)	Entropy 1.74974 (1.75343)	Top-1 acc 35.938 (40.679)	Top-5 acc 61.719 (64.568)	lr 0.02398
Train [16][1660/3239]	Time 2.266 (0.531)	Data Time 0.001 (0.017)	Loss 3.6407 (3.5260)	Entropy 1.74974 (1.75341)	Top-1 acc 36.719 (40.676)	Top-5 acc 61.328 (64.570)	lr 0.02398
Train [16][1670/3239]	Time 0.275 (0.529)	Data Time 0.001 (0.017)	Loss 3.6103 (3.5257)	Entropy 1.74965 (1.75339)	Top-1 acc 38.281 (40.683)	Top-5 acc 60.547 (64.577)	lr 0.02398
Train [16][1680/3239]	Time 0.203 (0.528)	Data Time 0.002 (0.017)	Loss 3.6660 (3.5257)	Entropy 1.74955 (1.75337)	Top-1 acc 35.938 (40.679)	Top-5 acc 60.938 (64.579)	lr 0.02398
Train [16][1690/3239]	Time 0.237 (0.528)	Data Time 0.001 (0.017)	Loss 3.5152 (3.5259)	Entropy 1.74943 (1.75334)	Top-1 acc 41.406 (40.681)	Top-5 acc 64.844 (64.576)	lr 0.02398
Train [16][1700/3239]	Time 0.193 (0.527)	Data Time 0.001 (0.016)	Loss 3.4678 (3.5256)	Entropy 1.74942 (1.75332)	Top-1 acc 41.406 (40.686)	Top-5 acc 66.797 (64.581)	lr 0.02398
Train [16][1710/3239]	Time 0.236 (0.527)	Data Time 0.001 (0.016)	Loss 3.3476 (3.5256)	Entropy 1.74928 (1.75330)	Top-1 acc 44.141 (40.686)	Top-5 acc 68.359 (64.581)	lr 0.02398
Train [16][1720/3239]	Time 0.166 (0.526)	Data Time 0.001 (0.016)	Loss 3.3571 (3.5256)	Entropy 1.74913 (1.75327)	Top-1 acc 40.234 (40.685)	Top-5 acc 67.969 (64.582)	lr 0.02398
Train [16][1730/3239]	Time 0.238 (0.549)	Data Time 0.002 (0.016)	Loss 3.5429 (3.5254)	Entropy 1.74908 (1.75325)	Top-1 acc 37.500 (40.682)	Top-5 acc 66.406 (64.587)	lr 0.02398
Train [16][1740/3239]	Time 0.197 (0.549)	Data Time 0.002 (0.016)	Loss 3.5750 (3.5256)	Entropy 1.74902 (1.75323)	Top-1 acc 41.797 (40.677)	Top-5 acc 60.938 (64.579)	lr 0.02398
Train [16][1750/3239]	Time 0.395 (0.548)	Data Time 0.002 (0.016)	Loss 3.4554 (3.5253)	Entropy 1.74898 (1.75320)	Top-1 acc 41.797 (40.683)	Top-5 acc 66.016 (64.583)	lr 0.02398
Train [16][1760/3239]	Time 0.197 (0.548)	Data Time 0.002 (0.016)	Loss 3.6963 (3.5254)	Entropy 1.74892 (1.75318)	Top-1 acc 39.844 (40.679)	Top-5 acc 62.891 (64.585)	lr 0.02398
Train [16][1770/3239]	Time 2.244 (0.547)	Data Time 0.001 (0.016)	Loss 3.4641 (3.5252)	Entropy 1.74892 (1.75315)	Top-1 acc 40.234 (40.682)	Top-5 acc 66.406 (64.589)	lr 0.02398
Train [16][1780/3239]	Time 0.203 (0.545)	Data Time 0.001 (0.016)	Loss 3.4890 (3.5251)	Entropy 1.74885 (1.75313)	Top-1 acc 42.578 (40.687)	Top-5 acc 66.797 (64.591)	lr 0.02398
Train [16][1790/3239]	Time 0.157 (0.544)	Data Time 0.001 (0.016)	Loss 3.4412 (3.5248)	Entropy 1.74880 (1.75310)	Top-1 acc 40.625 (40.691)	Top-5 acc 65.625 (64.595)	lr 0.02398
Train [16][1800/3239]	Time 0.159 (0.544)	Data Time 0.001 (0.016)	Loss 3.8766 (3.5249)	Entropy 1.74873 (1.75308)	Top-1 acc 35.547 (40.693)	Top-5 acc 59.766 (64.598)	lr 0.02398
Train [16][1810/3239]	Time 0.225 (0.543)	Data Time 0.001 (0.016)	Loss 3.4739 (3.5247)	Entropy 1.74873 (1.75306)	Top-1 acc 41.016 (40.700)	Top-5 acc 67.578 (64.605)	lr 0.02398
Train [16][1820/3239]	Time 0.326 (0.543)	Data Time 0.001 (0.016)	Loss 3.4993 (3.5247)	Entropy 1.74867 (1.75303)	Top-1 acc 41.016 (40.699)	Top-5 acc 67.578 (64.605)	lr 0.02398
Train [16][1830/3239]	Time 0.220 (0.542)	Data Time 0.002 (0.015)	Loss 3.4467 (3.5244)	Entropy 1.74856 (1.75301)	Top-1 acc 44.141 (40.703)	Top-5 acc 67.578 (64.612)	lr 0.02398
Train [16][1840/3239]	Time 0.157 (0.542)	Data Time 0.001 (0.015)	Loss 3.4202 (3.5243)	Entropy 1.74850 (1.75298)	Top-1 acc 41.406 (40.707)	Top-5 acc 67.188 (64.615)	lr 0.02398
Train [16][1850/3239]	Time 0.218 (0.541)	Data Time 0.001 (0.015)	Loss 3.3770 (3.5240)	Entropy 1.74846 (1.75296)	Top-1 acc 41.406 (40.709)	Top-5 acc 67.578 (64.623)	lr 0.02398
Train [16][1860/3239]	Time 0.214 (0.541)	Data Time 0.001 (0.015)	Loss 3.2085 (3.5240)	Entropy 1.74841 (1.75294)	Top-1 acc 42.578 (40.698)	Top-5 acc 71.094 (64.629)	lr 0.02398
Train [16][1870/3239]	Time 0.264 (0.540)	Data Time 0.001 (0.015)	Loss 3.4199 (3.5240)	Entropy 1.74837 (1.75291)	Top-1 acc 41.406 (40.698)	Top-5 acc 67.969 (64.631)	lr 0.02397
Train [16][1880/3239]	Time 2.205 (0.539)	Data Time 0.001 (0.015)	Loss 3.6383 (3.5242)	Entropy 1.74837 (1.75289)	Top-1 acc 37.891 (40.698)	Top-5 acc 63.672 (64.629)	lr 0.02397
Train [16][1890/3239]	Time 0.318 (0.538)	Data Time 0.001 (0.015)	Loss 3.3477 (3.5238)	Entropy 1.74839 (1.75286)	Top-1 acc 42.578 (40.701)	Top-5 acc 68.750 (64.634)	lr 0.02397
Train [16][1900/3239]	Time 0.195 (0.537)	Data Time 0.002 (0.015)	Loss 3.3263 (3.5243)	Entropy 1.74837 (1.75284)	Top-1 acc 42.578 (40.690)	Top-5 acc 70.312 (64.623)	lr 0.02397
Train [16][1910/3239]	Time 0.201 (0.536)	Data Time 0.001 (0.015)	Loss 3.4465 (3.5242)	Entropy 1.74832 (1.75282)	Top-1 acc 41.406 (40.682)	Top-5 acc 64.453 (64.625)	lr 0.02397
Train [16][1920/3239]	Time 0.208 (0.536)	Data Time 0.001 (0.015)	Loss 3.4283 (3.5244)	Entropy 1.74825 (1.75279)	Top-1 acc 41.797 (40.683)	Top-5 acc 66.016 (64.619)	lr 0.02397
Train [16][1930/3239]	Time 0.193 (0.535)	Data Time 0.001 (0.015)	Loss 3.5429 (3.5245)	Entropy 1.74823 (1.75277)	Top-1 acc 42.578 (40.685)	Top-5 acc 62.109 (64.609)	lr 0.02397
Train [16][1940/3239]	Time 0.202 (0.535)	Data Time 0.001 (0.015)	Loss 3.7076 (3.5250)	Entropy 1.74815 (1.75275)	Top-1 acc 37.891 (40.673)	Top-5 acc 59.766 (64.601)	lr 0.02397
Train [16][1950/3239]	Time 0.213 (0.534)	Data Time 0.001 (0.015)	Loss 3.5662 (3.5247)	Entropy 1.74809 (1.75272)	Top-1 acc 39.844 (40.675)	Top-5 acc 64.062 (64.605)	lr 0.02397
Train [16][1960/3239]	Time 0.235 (0.534)	Data Time 0.001 (0.015)	Loss 3.5257 (3.5246)	Entropy 1.74802 (1.75270)	Top-1 acc 40.625 (40.677)	Top-5 acc 63.672 (64.606)	lr 0.02397
Train [16][1970/3239]	Time 0.167 (0.533)	Data Time 0.001 (0.015)	Loss 3.5544 (3.5246)	Entropy 1.74803 (1.75267)	Top-1 acc 39.453 (40.676)	Top-5 acc 61.328 (64.604)	lr 0.02397
Train [16][1980/3239]	Time 0.137 (0.533)	Data Time 0.001 (0.014)	Loss 3.6092 (3.5246)	Entropy 1.74796 (1.75265)	Top-1 acc 42.969 (40.673)	Top-5 acc 62.500 (64.602)	lr 0.02397
Train [16][1990/3239]	Time 2.313 (0.532)	Data Time 0.001 (0.014)	Loss 3.4544 (3.5243)	Entropy 1.74796 (1.75263)	Top-1 acc 40.234 (40.677)	Top-5 acc 63.672 (64.601)	lr 0.02397
Train [16][2000/3239]	Time 0.210 (0.531)	Data Time 0.001 (0.014)	Loss 3.4501 (3.5245)	Entropy 1.74799 (1.75260)	Top-1 acc 41.406 (40.674)	Top-5 acc 68.359 (64.598)	lr 0.02397
Train [16][2010/3239]	Time 0.227 (0.530)	Data Time 0.001 (0.014)	Loss 3.6046 (3.5246)	Entropy 1.74794 (1.75258)	Top-1 acc 40.234 (40.669)	Top-5 acc 62.891 (64.601)	lr 0.02397
Train [16][2020/3239]	Time 0.216 (0.529)	Data Time 0.001 (0.014)	Loss 3.3674 (3.5245)	Entropy 1.74784 (1.75256)	Top-1 acc 43.750 (40.674)	Top-5 acc 68.359 (64.604)	lr 0.02397
Train [16][2030/3239]	Time 0.196 (0.529)	Data Time 0.001 (0.014)	Loss 3.6295 (3.5245)	Entropy 1.74780 (1.75253)	Top-1 acc 39.453 (40.672)	Top-5 acc 62.891 (64.604)	lr 0.02397
Train [16][2040/3239]	Time 0.321 (0.528)	Data Time 0.001 (0.014)	Loss 3.3902 (3.5243)	Entropy 1.74778 (1.75251)	Top-1 acc 39.453 (40.677)	Top-5 acc 66.406 (64.611)	lr 0.02397
Train [16][2050/3239]	Time 0.207 (0.528)	Data Time 0.001 (0.014)	Loss 3.3523 (3.5241)	Entropy 1.74776 (1.75249)	Top-1 acc 46.094 (40.680)	Top-5 acc 68.359 (64.613)	lr 0.02397
Train [16][2060/3239]	Time 0.213 (0.527)	Data Time 0.001 (0.014)	Loss 3.4020 (3.5237)	Entropy 1.74775 (1.75246)	Top-1 acc 45.703 (40.688)	Top-5 acc 67.969 (64.627)	lr 0.02397
Train [16][2070/3239]	Time 0.190 (0.527)	Data Time 0.001 (0.014)	Loss 3.4698 (3.5237)	Entropy 1.74775 (1.75244)	Top-1 acc 40.234 (40.693)	Top-5 acc 63.672 (64.622)	lr 0.02397
Train [16][2080/3239]	Time 0.195 (0.526)	Data Time 0.001 (0.014)	Loss 3.5174 (3.5239)	Entropy 1.74766 (1.75242)	Top-1 acc 42.578 (40.695)	Top-5 acc 64.453 (64.617)	lr 0.02397
Train [16][2090/3239]	Time 0.236 (0.546)	Data Time 0.003 (0.014)	Loss 3.4315 (3.5237)	Entropy 1.74766 (1.75240)	Top-1 acc 41.016 (40.694)	Top-5 acc 64.844 (64.618)	lr 0.02397
Train [16][2100/3239]	Time 2.306 (0.546)	Data Time 0.002 (0.014)	Loss 3.4302 (3.5242)	Entropy 1.74766 (1.75237)	Top-1 acc 44.141 (40.686)	Top-5 acc 65.625 (64.608)	lr 0.02397
Train [16][2110/3239]	Time 0.235 (0.544)	Data Time 0.002 (0.014)	Loss 3.6330 (3.5245)	Entropy 1.74762 (1.75235)	Top-1 acc 41.406 (40.677)	Top-5 acc 66.016 (64.604)	lr 0.02397
Train [16][2120/3239]	Time 0.191 (0.544)	Data Time 0.005 (0.014)	Loss 3.6743 (3.5242)	Entropy 1.74754 (1.75233)	Top-1 acc 41.406 (40.690)	Top-5 acc 63.281 (64.615)	lr 0.02396
Train [16][2130/3239]	Time 0.222 (0.543)	Data Time 0.001 (0.014)	Loss 3.6490 (3.5241)	Entropy 1.74750 (1.75231)	Top-1 acc 38.281 (40.687)	Top-5 acc 61.719 (64.620)	lr 0.02396
Train [16][2140/3239]	Time 0.206 (0.543)	Data Time 0.001 (0.014)	Loss 3.6462 (3.5242)	Entropy 1.74731 (1.75228)	Top-1 acc 39.844 (40.685)	Top-5 acc 62.109 (64.616)	lr 0.02396
Train [16][2150/3239]	Time 0.210 (0.542)	Data Time 0.001 (0.013)	Loss 3.3794 (3.5241)	Entropy 1.74726 (1.75226)	Top-1 acc 45.703 (40.687)	Top-5 acc 67.969 (64.616)	lr 0.02396
Train [16][2160/3239]	Time 0.227 (0.542)	Data Time 0.001 (0.013)	Loss 3.5830 (3.5241)	Entropy 1.74725 (1.75224)	Top-1 acc 37.500 (40.692)	Top-5 acc 63.281 (64.616)	lr 0.02396
Train [16][2170/3239]	Time 0.214 (0.541)	Data Time 0.001 (0.013)	Loss 3.5151 (3.5243)	Entropy 1.74724 (1.75221)	Top-1 acc 40.234 (40.692)	Top-5 acc 66.016 (64.608)	lr 0.02396
Train [16][2180/3239]	Time 0.214 (0.541)	Data Time 0.001 (0.013)	Loss 3.7228 (3.5243)	Entropy 1.74721 (1.75219)	Top-1 acc 38.281 (40.696)	Top-5 acc 60.547 (64.611)	lr 0.02396
Train [16][2190/3239]	Time 0.236 (0.540)	Data Time 0.001 (0.013)	Loss 3.4097 (3.5243)	Entropy 1.74717 (1.75217)	Top-1 acc 41.797 (40.695)	Top-5 acc 64.844 (64.608)	lr 0.02396
Train [16][2200/3239]	Time 0.205 (0.540)	Data Time 0.001 (0.013)	Loss 3.5254 (3.5244)	Entropy 1.74713 (1.75215)	Top-1 acc 44.141 (40.689)	Top-5 acc 63.281 (64.606)	lr 0.02396
Train [16][2210/3239]	Time 2.356 (0.539)	Data Time 0.001 (0.013)	Loss 3.5195 (3.5244)	Entropy 1.74713 (1.75212)	Top-1 acc 41.016 (40.688)	Top-5 acc 64.062 (64.605)	lr 0.02396
Train [16][2220/3239]	Time 0.191 (0.538)	Data Time 0.001 (0.013)	Loss 3.4731 (3.5245)	Entropy 1.74696 (1.75210)	Top-1 acc 39.062 (40.686)	Top-5 acc 65.234 (64.600)	lr 0.02396
Train [16][2230/3239]	Time 0.214 (0.537)	Data Time 0.001 (0.013)	Loss 3.5121 (3.5246)	Entropy 1.74695 (1.75208)	Top-1 acc 41.016 (40.682)	Top-5 acc 62.500 (64.601)	lr 0.02396
Train [16][2240/3239]	Time 0.316 (0.537)	Data Time 0.001 (0.013)	Loss 3.4543 (3.5245)	Entropy 1.74688 (1.75205)	Top-1 acc 42.578 (40.691)	Top-5 acc 66.797 (64.605)	lr 0.02396
Train [16][2250/3239]	Time 0.217 (0.536)	Data Time 0.001 (0.013)	Loss 3.5998 (3.5248)	Entropy 1.74684 (1.75203)	Top-1 acc 37.500 (40.686)	Top-5 acc 62.109 (64.598)	lr 0.02396
Train [16][2260/3239]	Time 0.209 (0.536)	Data Time 0.001 (0.013)	Loss 3.5395 (3.5246)	Entropy 1.74680 (1.75201)	Top-1 acc 38.281 (40.692)	Top-5 acc 65.625 (64.605)	lr 0.02396
Train [16][2270/3239]	Time 0.218 (0.535)	Data Time 0.001 (0.013)	Loss 3.2560 (3.5243)	Entropy 1.74675 (1.75198)	Top-1 acc 44.922 (40.694)	Top-5 acc 69.531 (64.609)	lr 0.02396
Train [16][2280/3239]	Time 0.179 (0.535)	Data Time 0.001 (0.013)	Loss 3.6977 (3.5245)	Entropy 1.74676 (1.75196)	Top-1 acc 37.891 (40.688)	Top-5 acc 60.547 (64.604)	lr 0.02396
Train [16][2290/3239]	Time 0.216 (0.534)	Data Time 0.002 (0.013)	Loss 3.8904 (3.5245)	Entropy 1.74664 (1.75194)	Top-1 acc 33.984 (40.690)	Top-5 acc 58.594 (64.605)	lr 0.02396
Train [16][2300/3239]	Time 0.213 (0.534)	Data Time 0.001 (0.013)	Loss 3.7170 (3.5247)	Entropy 1.74659 (1.75192)	Top-1 acc 36.328 (40.688)	Top-5 acc 61.328 (64.599)	lr 0.02396
Train [16][2310/3239]	Time 0.367 (0.533)	Data Time 0.001 (0.013)	Loss 3.6407 (3.5251)	Entropy 1.74654 (1.75189)	Top-1 acc 33.203 (40.679)	Top-5 acc 63.672 (64.594)	lr 0.02396
Train [16][2320/3239]	Time 2.232 (0.533)	Data Time 0.001 (0.013)	Loss 3.4602 (3.5247)	Entropy 1.74654 (1.75187)	Top-1 acc 43.359 (40.685)	Top-5 acc 64.844 (64.603)	lr 0.02396
Train [16][2330/3239]	Time 0.227 (0.532)	Data Time 0.001 (0.013)	Loss 3.3565 (3.5248)	Entropy 1.74645 (1.75185)	Top-1 acc 45.312 (40.681)	Top-5 acc 68.359 (64.606)	lr 0.02396
Train [16][2340/3239]	Time 0.197 (0.531)	Data Time 0.001 (0.013)	Loss 3.4509 (3.5247)	Entropy 1.74628 (1.75182)	Top-1 acc 42.578 (40.685)	Top-5 acc 64.453 (64.607)	lr 0.02396
Train [16][2350/3239]	Time 0.218 (0.531)	Data Time 0.001 (0.012)	Loss 3.5616 (3.5248)	Entropy 1.74623 (1.75180)	Top-1 acc 42.188 (40.682)	Top-5 acc 62.500 (64.608)	lr 0.02396
Train [16][2360/3239]	Time 0.218 (0.530)	Data Time 0.001 (0.012)	Loss 3.5430 (3.5248)	Entropy 1.74610 (1.75177)	Top-1 acc 42.188 (40.680)	Top-5 acc 64.453 (64.610)	lr 0.02396
Train [16][2370/3239]	Time 0.218 (0.530)	Data Time 0.002 (0.012)	Loss 3.6576 (3.5249)	Entropy 1.74601 (1.75175)	Top-1 acc 35.547 (40.674)	Top-5 acc 66.016 (64.609)	lr 0.02395
Train [16][2380/3239]	Time 0.312 (0.530)	Data Time 0.001 (0.012)	Loss 3.4892 (3.5247)	Entropy 1.74591 (1.75173)	Top-1 acc 41.016 (40.686)	Top-5 acc 67.578 (64.614)	lr 0.02395
Train [16][2390/3239]	Time 0.225 (0.529)	Data Time 0.001 (0.012)	Loss 3.5486 (3.5245)	Entropy 1.74582 (1.75170)	Top-1 acc 40.234 (40.690)	Top-5 acc 63.672 (64.619)	lr 0.02395
Train [16][2400/3239]	Time 0.212 (0.529)	Data Time 0.001 (0.012)	Loss 3.5231 (3.5246)	Entropy 1.74580 (1.75168)	Top-1 acc 42.578 (40.695)	Top-5 acc 63.672 (64.618)	lr 0.02395
Train [16][2410/3239]	Time 0.192 (0.528)	Data Time 0.001 (0.012)	Loss 3.4306 (3.5243)	Entropy 1.74569 (1.75165)	Top-1 acc 39.062 (40.698)	Top-5 acc 66.797 (64.629)	lr 0.02395
Train [16][2420/3239]	Time 0.172 (0.528)	Data Time 0.001 (0.012)	Loss 3.6403 (3.5243)	Entropy 1.74565 (1.75163)	Top-1 acc 36.719 (40.694)	Top-5 acc 62.891 (64.624)	lr 0.02395
Train [16][2430/3239]	Time 2.288 (0.527)	Data Time 0.001 (0.012)	Loss 3.5152 (3.5243)	Entropy 1.74565 (1.75160)	Top-1 acc 42.578 (40.694)	Top-5 acc 64.062 (64.625)	lr 0.02395
Train [16][2440/3239]	Time 0.196 (0.526)	Data Time 0.001 (0.012)	Loss 3.5222 (3.5242)	Entropy 1.74561 (1.75158)	Top-1 acc 40.625 (40.697)	Top-5 acc 66.797 (64.626)	lr 0.02395
Train [16][2450/3239]	Time 0.324 (0.526)	Data Time 0.001 (0.012)	Loss 3.6731 (3.5242)	Entropy 1.74558 (1.75155)	Top-1 acc 35.938 (40.699)	Top-5 acc 59.375 (64.623)	lr 0.02395
Train [16][2460/3239]	Time 0.178 (0.543)	Data Time 0.002 (0.012)	Loss 3.4849 (3.5242)	Entropy 1.74555 (1.75153)	Top-1 acc 42.188 (40.699)	Top-5 acc 63.281 (64.624)	lr 0.02395
Train [16][2470/3239]	Time 0.179 (0.543)	Data Time 0.002 (0.012)	Loss 3.5046 (3.5243)	Entropy 1.74557 (1.75151)	Top-1 acc 36.719 (40.694)	Top-5 acc 65.234 (64.624)	lr 0.02395
Train [16][2480/3239]	Time 0.207 (0.542)	Data Time 0.002 (0.012)	Loss 3.5822 (3.5242)	Entropy 1.74554 (1.75148)	Top-1 acc 40.234 (40.692)	Top-5 acc 62.500 (64.625)	lr 0.02395
Train [16][2490/3239]	Time 0.181 (0.541)	Data Time 0.001 (0.012)	Loss 3.6783 (3.5243)	Entropy 1.74542 (1.75146)	Top-1 acc 35.938 (40.691)	Top-5 acc 59.766 (64.619)	lr 0.02395
Train [16][2500/3239]	Time 0.164 (0.541)	Data Time 0.001 (0.012)	Loss 3.5062 (3.5241)	Entropy 1.74538 (1.75143)	Top-1 acc 39.453 (40.695)	Top-5 acc 64.844 (64.622)	lr 0.02395
Train [16][2510/3239]	Time 0.251 (0.541)	Data Time 0.002 (0.012)	Loss 3.6305 (3.5242)	Entropy 1.74537 (1.75141)	Top-1 acc 37.109 (40.689)	Top-5 acc 64.844 (64.619)	lr 0.02395
Train [16][2520/3239]	Time 0.200 (0.540)	Data Time 0.003 (0.012)	Loss 3.5960 (3.5242)	Entropy 1.74535 (1.75139)	Top-1 acc 37.891 (40.687)	Top-5 acc 62.891 (64.621)	lr 0.02395
Train [16][2530/3239]	Time 0.348 (0.540)	Data Time 0.001 (0.012)	Loss 3.4606 (3.5241)	Entropy 1.74532 (1.75136)	Top-1 acc 44.922 (40.691)	Top-5 acc 63.281 (64.620)	lr 0.02395
Train [16][2540/3239]	Time 2.510 (0.540)	Data Time 0.001 (0.012)	Loss 3.5787 (3.5240)	Entropy 1.74532 (1.75134)	Top-1 acc 39.062 (40.691)	Top-5 acc 64.062 (64.620)	lr 0.02395
Train [16][2550/3239]	Time 0.271 (0.539)	Data Time 0.002 (0.012)	Loss 3.7052 (3.5240)	Entropy 1.74528 (1.75131)	Top-1 acc 35.938 (40.691)	Top-5 acc 61.719 (64.622)	lr 0.02395
Train [16][2560/3239]	Time 0.230 (0.538)	Data Time 0.001 (0.012)	Loss 3.5911 (3.5241)	Entropy 1.74527 (1.75129)	Top-1 acc 43.359 (40.690)	Top-5 acc 66.016 (64.622)	lr 0.02395
Train [16][2570/3239]	Time 0.176 (0.538)	Data Time 0.001 (0.012)	Loss 3.4209 (3.5241)	Entropy 1.74517 (1.75127)	Top-1 acc 43.750 (40.690)	Top-5 acc 64.844 (64.620)	lr 0.02395
Train [16][2580/3239]	Time 0.217 (0.538)	Data Time 0.002 (0.012)	Loss 3.6088 (3.5240)	Entropy 1.74513 (1.75124)	Top-1 acc 41.797 (40.691)	Top-5 acc 60.156 (64.620)	lr 0.02395
Train [16][2590/3239]	Time 0.322 (0.537)	Data Time 0.002 (0.012)	Loss 3.4396 (3.5238)	Entropy 1.74506 (1.75122)	Top-1 acc 39.844 (40.692)	Top-5 acc 66.016 (64.626)	lr 0.02395
Train [16][2600/3239]	Time 0.201 (0.537)	Data Time 0.001 (0.012)	Loss 3.5767 (3.5236)	Entropy 1.74499 (1.75120)	Top-1 acc 38.672 (40.698)	Top-5 acc 65.625 (64.627)	lr 0.02395
Train [16][2610/3239]	Time 0.293 (0.536)	Data Time 0.001 (0.012)	Loss 3.5139 (3.5235)	Entropy 1.74501 (1.75117)	Top-1 acc 41.016 (40.702)	Top-5 acc 65.234 (64.628)	lr 0.02395
Train [16][2620/3239]	Time 0.221 (0.536)	Data Time 0.001 (0.011)	Loss 3.4550 (3.5235)	Entropy 1.74485 (1.75115)	Top-1 acc 42.578 (40.702)	Top-5 acc 66.016 (64.627)	lr 0.02394
Train [16][2630/3239]	Time 0.199 (0.536)	Data Time 0.001 (0.011)	Loss 3.5037 (3.5235)	Entropy 1.74486 (1.75112)	Top-1 acc 44.531 (40.704)	Top-5 acc 65.234 (64.628)	lr 0.02394
Train [16][2640/3239]	Time 0.192 (0.535)	Data Time 0.001 (0.011)	Loss 3.5473 (3.5236)	Entropy 1.74482 (1.75110)	Top-1 acc 41.797 (40.703)	Top-5 acc 64.062 (64.629)	lr 0.02394
Train [16][2650/3239]	Time 0.234 (0.535)	Data Time 0.001 (0.011)	Loss 3.5187 (3.5235)	Entropy 1.74478 (1.75108)	Top-1 acc 42.188 (40.706)	Top-5 acc 64.844 (64.628)	lr 0.02394
Train [16][2660/3239]	Time 0.253 (0.534)	Data Time 0.001 (0.011)	Loss 3.4098 (3.5234)	Entropy 1.74465 (1.75105)	Top-1 acc 37.500 (40.702)	Top-5 acc 67.578 (64.631)	lr 0.02394
Train [16][2670/3239]	Time 0.216 (0.534)	Data Time 0.001 (0.011)	Loss 3.4677 (3.5232)	Entropy 1.74464 (1.75103)	Top-1 acc 40.234 (40.706)	Top-5 acc 67.188 (64.632)	lr 0.02394
Train [16][2680/3239]	Time 0.399 (0.534)	Data Time 0.001 (0.011)	Loss 3.4022 (3.5233)	Entropy 1.74461 (1.75100)	Top-1 acc 39.844 (40.704)	Top-5 acc 66.797 (64.631)	lr 0.02394
Train [16][2690/3239]	Time 0.240 (0.533)	Data Time 0.001 (0.011)	Loss 3.7782 (3.5232)	Entropy 1.74459 (1.75098)	Top-1 acc 36.328 (40.704)	Top-5 acc 61.328 (64.633)	lr 0.02394
Train [16][2700/3239]	Time 0.224 (0.533)	Data Time 0.001 (0.011)	Loss 3.3912 (3.5230)	Entropy 1.74433 (1.75096)	Top-1 acc 46.484 (40.711)	Top-5 acc 67.578 (64.641)	lr 0.02394
Train [16][2710/3239]	Time 0.250 (0.533)	Data Time 0.002 (0.011)	Loss 3.3801 (3.5230)	Entropy 1.74428 (1.75093)	Top-1 acc 39.453 (40.707)	Top-5 acc 69.141 (64.644)	lr 0.02394
Train [16][2720/3239]	Time 0.193 (0.532)	Data Time 0.001 (0.011)	Loss 3.6493 (3.5232)	Entropy 1.74418 (1.75091)	Top-1 acc 41.406 (40.707)	Top-5 acc 59.375 (64.639)	lr 0.02394
Train [16][2730/3239]	Time 0.197 (0.532)	Data Time 0.001 (0.011)	Loss 3.5143 (3.5232)	Entropy 1.74406 (1.75088)	Top-1 acc 42.578 (40.711)	Top-5 acc 64.453 (64.640)	lr 0.02394
Train [16][2740/3239]	Time 0.206 (0.531)	Data Time 0.001 (0.011)	Loss 3.5991 (3.5235)	Entropy 1.74413 (1.75086)	Top-1 acc 37.500 (40.704)	Top-5 acc 67.188 (64.634)	lr 0.02394
Train [16][2750/3239]	Time 0.203 (0.531)	Data Time 0.001 (0.011)	Loss 3.3665 (3.5234)	Entropy 1.74409 (1.75083)	Top-1 acc 40.625 (40.706)	Top-5 acc 69.922 (64.636)	lr 0.02394
Train [16][2760/3239]	Time 0.356 (0.531)	Data Time 0.001 (0.011)	Loss 3.4971 (3.5234)	Entropy 1.74403 (1.75081)	Top-1 acc 38.281 (40.707)	Top-5 acc 65.234 (64.638)	lr 0.02394
Train [16][2770/3239]	Time 0.242 (0.530)	Data Time 0.002 (0.011)	Loss 3.5158 (3.5234)	Entropy 1.74398 (1.75079)	Top-1 acc 39.062 (40.707)	Top-5 acc 62.500 (64.635)	lr 0.02394
Train [16][2780/3239]	Time 0.227 (0.530)	Data Time 0.002 (0.011)	Loss 3.6055 (3.5234)	Entropy 1.74400 (1.75076)	Top-1 acc 38.672 (40.709)	Top-5 acc 63.281 (64.636)	lr 0.02394
Train [16][2790/3239]	Time 0.194 (0.530)	Data Time 0.001 (0.011)	Loss 3.4829 (3.5233)	Entropy 1.74394 (1.75074)	Top-1 acc 43.359 (40.708)	Top-5 acc 65.625 (64.637)	lr 0.02394
Train [16][2800/3239]	Time 0.276 (0.543)	Data Time 0.004 (0.011)	Loss 3.5735 (3.5232)	Entropy 1.74384 (1.75071)	Top-1 acc 44.922 (40.710)	Top-5 acc 62.891 (64.638)	lr 0.02394
Train [16][2810/3239]	Time 0.210 (0.543)	Data Time 0.002 (0.011)	Loss 3.3857 (3.5230)	Entropy 1.74381 (1.75069)	Top-1 acc 42.969 (40.716)	Top-5 acc 67.188 (64.643)	lr 0.02394
Train [16][2820/3239]	Time 0.408 (0.543)	Data Time 0.002 (0.011)	Loss 3.3434 (3.5230)	Entropy 1.74370 (1.75066)	Top-1 acc 44.531 (40.716)	Top-5 acc 66.406 (64.642)	lr 0.02394
Train [16][2830/3239]	Time 0.235 (0.543)	Data Time 0.002 (0.011)	Loss 3.4775 (3.5228)	Entropy 1.74358 (1.75064)	Top-1 acc 42.188 (40.718)	Top-5 acc 68.750 (64.650)	lr 0.02394
Train [16][2840/3239]	Time 0.245 (0.542)	Data Time 0.001 (0.011)	Loss 3.3330 (3.5227)	Entropy 1.74350 (1.75061)	Top-1 acc 48.047 (40.723)	Top-5 acc 69.922 (64.650)	lr 0.02394
Train [16][2850/3239]	Time 0.259 (0.542)	Data Time 0.001 (0.011)	Loss 3.7358 (3.5228)	Entropy 1.74348 (1.75059)	Top-1 acc 39.062 (40.725)	Top-5 acc 58.203 (64.648)	lr 0.02394
Train [16][2860/3239]	Time 0.234 (0.542)	Data Time 0.008 (0.011)	Loss 3.1540 (3.5230)	Entropy 1.74346 (1.75056)	Top-1 acc 48.438 (40.721)	Top-5 acc 72.266 (64.643)	lr 0.02393
Train [16][2870/3239]	Time 0.225 (0.541)	Data Time 0.001 (0.011)	Loss 3.4435 (3.5228)	Entropy 1.74333 (1.75054)	Top-1 acc 40.234 (40.724)	Top-5 acc 66.797 (64.646)	lr 0.02393
Train [16][2880/3239]	Time 0.249 (0.541)	Data Time 0.001 (0.011)	Loss 3.4193 (3.5227)	Entropy 1.74330 (1.75051)	Top-1 acc 42.578 (40.727)	Top-5 acc 65.625 (64.650)	lr 0.02393
Train [16][2890/3239]	Time 0.325 (0.541)	Data Time 0.001 (0.011)	Loss 3.5181 (3.5228)	Entropy 1.74329 (1.75049)	Top-1 acc 39.453 (40.722)	Top-5 acc 67.578 (64.648)	lr 0.02393
Train [16][2900/3239]	Time 0.245 (0.540)	Data Time 0.001 (0.011)	Loss 3.5266 (3.5228)	Entropy 1.74321 (1.75046)	Top-1 acc 42.188 (40.726)	Top-5 acc 64.453 (64.649)	lr 0.02393
Train [16][2910/3239]	Time 0.262 (0.540)	Data Time 0.001 (0.011)	Loss 3.6171 (3.5229)	Entropy 1.74319 (1.75044)	Top-1 acc 42.188 (40.723)	Top-5 acc 62.891 (64.648)	lr 0.02393
Train [16][2920/3239]	Time 0.218 (0.540)	Data Time 0.001 (0.010)	Loss 3.5243 (3.5229)	Entropy 1.74314 (1.75041)	Top-1 acc 38.672 (40.721)	Top-5 acc 63.672 (64.650)	lr 0.02393
Train [16][2930/3239]	Time 0.151 (0.539)	Data Time 0.001 (0.010)	Loss 3.4933 (3.5229)	Entropy 1.74314 (1.75039)	Top-1 acc 42.578 (40.721)	Top-5 acc 65.234 (64.646)	lr 0.02393
Train [16][2940/3239]	Time 0.250 (0.539)	Data Time 0.002 (0.010)	Loss 3.5065 (3.5230)	Entropy 1.74313 (1.75037)	Top-1 acc 39.844 (40.719)	Top-5 acc 65.234 (64.643)	lr 0.02393
Train [16][2950/3239]	Time 0.271 (0.538)	Data Time 0.001 (0.010)	Loss 3.4050 (3.5233)	Entropy 1.74312 (1.75034)	Top-1 acc 42.578 (40.710)	Top-5 acc 66.016 (64.641)	lr 0.02393
Train [16][2960/3239]	Time 0.207 (0.538)	Data Time 0.001 (0.010)	Loss 3.5018 (3.5232)	Entropy 1.74304 (1.75032)	Top-1 acc 40.234 (40.710)	Top-5 acc 66.406 (64.644)	lr 0.02393
Train [16][2970/3239]	Time 0.241 (0.538)	Data Time 0.001 (0.010)	Loss 3.4761 (3.5232)	Entropy 1.74304 (1.75029)	Top-1 acc 42.188 (40.709)	Top-5 acc 64.062 (64.641)	lr 0.02393
Train [16][2980/3239]	Time 0.180 (0.537)	Data Time 0.001 (0.010)	Loss 3.7968 (3.5233)	Entropy 1.74294 (1.75027)	Top-1 acc 36.719 (40.703)	Top-5 acc 56.641 (64.637)	lr 0.02393
Train [16][2990/3239]	Time 0.240 (0.537)	Data Time 0.001 (0.010)	Loss 3.2224 (3.5229)	Entropy 1.74290 (1.75024)	Top-1 acc 43.750 (40.708)	Top-5 acc 69.531 (64.643)	lr 0.02393
Train [16][3000/3239]	Time 0.266 (0.537)	Data Time 0.001 (0.010)	Loss 3.7747 (3.5228)	Entropy 1.74287 (1.75022)	Top-1 acc 38.281 (40.713)	Top-5 acc 56.641 (64.645)	lr 0.02393
Train [16][3010/3239]	Time 0.217 (0.536)	Data Time 0.001 (0.010)	Loss 3.3740 (3.5228)	Entropy 1.74285 (1.75019)	Top-1 acc 41.016 (40.714)	Top-5 acc 66.797 (64.646)	lr 0.02393
Train [16][3020/3239]	Time 0.156 (0.536)	Data Time 0.001 (0.010)	Loss 3.5833 (3.5226)	Entropy 1.74278 (1.75017)	Top-1 acc 42.188 (40.716)	Top-5 acc 61.719 (64.650)	lr 0.02393
Train [16][3030/3239]	Time 0.246 (0.536)	Data Time 0.001 (0.010)	Loss 3.3399 (3.5224)	Entropy 1.74267 (1.75015)	Top-1 acc 40.234 (40.719)	Top-5 acc 65.625 (64.653)	lr 0.02393
Train [16][3040/3239]	Time 0.278 (0.535)	Data Time 0.001 (0.010)	Loss 3.7667 (3.5223)	Entropy 1.74263 (1.75012)	Top-1 acc 36.328 (40.723)	Top-5 acc 58.594 (64.655)	lr 0.02393
Train [16][3050/3239]	Time 0.252 (0.535)	Data Time 0.002 (0.010)	Loss 3.5025 (3.5224)	Entropy 1.74242 (1.75010)	Top-1 acc 40.625 (40.721)	Top-5 acc 66.016 (64.655)	lr 0.02393
Train [16][3060/3239]	Time 0.230 (0.535)	Data Time 0.002 (0.010)	Loss 3.3533 (3.5221)	Entropy 1.74242 (1.75007)	Top-1 acc 41.016 (40.727)	Top-5 acc 68.750 (64.663)	lr 0.02393
Train [16][3070/3239]	Time 0.147 (0.535)	Data Time 0.001 (0.010)	Loss 3.7399 (3.5223)	Entropy 1.74239 (1.75005)	Top-1 acc 35.547 (40.719)	Top-5 acc 59.766 (64.658)	lr 0.02393
Train [16][3080/3239]	Time 0.217 (0.534)	Data Time 0.001 (0.010)	Loss 3.5036 (3.5223)	Entropy 1.74233 (1.75002)	Top-1 acc 41.406 (40.722)	Top-5 acc 64.062 (64.654)	lr 0.02393
Train [16][3090/3239]	Time 0.230 (0.534)	Data Time 0.001 (0.010)	Loss 3.4839 (3.5222)	Entropy 1.74230 (1.75000)	Top-1 acc 36.719 (40.719)	Top-5 acc 62.500 (64.654)	lr 0.02393
Train [16][3100/3239]	Time 0.237 (0.534)	Data Time 0.002 (0.010)	Loss 3.5549 (3.5221)	Entropy 1.74227 (1.74997)	Top-1 acc 43.359 (40.721)	Top-5 acc 62.891 (64.658)	lr 0.02393
Train [16][3110/3239]	Time 0.346 (0.533)	Data Time 0.001 (0.010)	Loss 3.4490 (3.5222)	Entropy 1.74225 (1.74995)	Top-1 acc 39.062 (40.720)	Top-5 acc 64.453 (64.656)	lr 0.02392
Train [16][3120/3239]	Time 0.173 (0.533)	Data Time 0.001 (0.010)	Loss 3.4371 (3.5222)	Entropy 1.74221 (1.74992)	Top-1 acc 42.188 (40.718)	Top-5 acc 67.188 (64.657)	lr 0.02392
Train [16][3130/3239]	Time 0.274 (0.545)	Data Time 0.004 (0.010)	Loss 3.5147 (3.5222)	Entropy 1.74216 (1.74990)	Top-1 acc 40.625 (40.714)	Top-5 acc 67.188 (64.656)	lr 0.02392
Train [16][3140/3239]	Time 0.275 (0.545)	Data Time 0.002 (0.010)	Loss 3.4672 (3.5223)	Entropy 1.74212 (1.74987)	Top-1 acc 42.188 (40.713)	Top-5 acc 67.969 (64.652)	lr 0.02392
Train [16][3150/3239]	Time 0.213 (0.545)	Data Time 0.001 (0.010)	Loss 3.5260 (3.5223)	Entropy 1.74211 (1.74985)	Top-1 acc 42.969 (40.715)	Top-5 acc 62.500 (64.654)	lr 0.02392
Train [16][3160/3239]	Time 0.239 (0.544)	Data Time 0.003 (0.010)	Loss 3.5502 (3.5222)	Entropy 1.74202 (1.74982)	Top-1 acc 40.625 (40.716)	Top-5 acc 63.281 (64.653)	lr 0.02392
Train [16][3170/3239]	Time 0.235 (0.544)	Data Time 0.002 (0.010)	Loss 3.4915 (3.5220)	Entropy 1.74195 (1.74980)	Top-1 acc 39.453 (40.720)	Top-5 acc 67.188 (64.659)	lr 0.02392
Train [16][3180/3239]	Time 0.304 (0.544)	Data Time 0.000 (0.010)	Loss 3.4129 (3.5220)	Entropy 1.74189 (1.74977)	Top-1 acc 41.016 (40.721)	Top-5 acc 66.797 (64.661)	lr 0.02392
Train [16][3190/3239]	Time 0.188 (0.543)	Data Time 0.000 (0.010)	Loss 3.6613 (3.5218)	Entropy 1.74190 (1.74975)	Top-1 acc 41.016 (40.726)	Top-5 acc 62.891 (64.664)	lr 0.02392
Train [16][3200/3239]	Time 0.179 (0.543)	Data Time 0.000 (0.010)	Loss 3.5722 (3.5217)	Entropy 1.74189 (1.74973)	Top-1 acc 38.281 (40.727)	Top-5 acc 64.844 (64.670)	lr 0.02392
Train [16][3210/3239]	Time 0.235 (0.543)	Data Time 0.000 (0.010)	Loss 3.4215 (3.5217)	Entropy 1.74188 (1.74970)	Top-1 acc 42.578 (40.729)	Top-5 acc 66.406 (64.670)	lr 0.02392
Train [16][3220/3239]	Time 0.204 (0.542)	Data Time 0.000 (0.010)	Loss 3.6267 (3.5218)	Entropy 1.74186 (1.74968)	Top-1 acc 34.375 (40.723)	Top-5 acc 60.938 (64.664)	lr 0.02392
Train [16][3230/3239]	Time 0.152 (0.542)	Data Time 0.000 (0.010)	Loss 3.8313 (3.5219)	Entropy 1.74176 (1.74965)	Top-1 acc 35.156 (40.719)	Top-5 acc 55.469 (64.662)	lr 0.02392
Train [16][3239/3239]	Time 2.091 (0.542)	Data Time 0.000 (0.010)	Loss 4.1791 (3.5219)	Entropy 1.74176 (1.74963)	Top-1 acc 27.160 (40.720)	Top-5 acc 55.556 (64.663)	lr 0.02392
==========Valid [16/120]	loss 2.326	top-1 acc 49.680 (49.680)	top-5 acc 73.677	Train top-1 40.720	top-5 64.663	Entropy 1.74176	Latency-None: 0.000ms	Flops: 514.85M
Train [17][0/3239]	Time 27.496 (27.496)	Data Time 26.798 (26.798)	Loss 3.5164 (3.5164)	Entropy 1.74171 (1.74171)	Top-1 acc 37.109 (37.109)	Top-5 acc 66.797 (66.797)	lr 0.02392
Train [17][10/3239]	Time 2.700 (3.149)	Data Time 0.002 (2.473)	Loss 3.5165 (3.4615)	Entropy 1.74171 (1.74171)	Top-1 acc 40.234 (41.619)	Top-5 acc 64.844 (66.548)	lr 0.02392
Train [17][20/3239]	Time 0.311 (1.758)	Data Time 0.001 (1.296)	Loss 3.4428 (3.4732)	Entropy 1.74163 (1.74167)	Top-1 acc 40.625 (41.071)	Top-5 acc 65.625 (65.960)	lr 0.02392
Train [17][30/3239]	Time 0.223 (1.333)	Data Time 0.001 (0.879)	Loss 3.5853 (3.4968)	Entropy 1.74149 (1.74162)	Top-1 acc 39.062 (40.953)	Top-5 acc 63.281 (65.512)	lr 0.02392
Train [17][40/3239]	Time 0.202 (1.120)	Data Time 0.001 (0.665)	Loss 3.4630 (3.4948)	Entropy 1.74142 (1.74157)	Top-1 acc 40.234 (40.949)	Top-5 acc 65.625 (65.253)	lr 0.02392
Train [17][50/3239]	Time 0.198 (0.987)	Data Time 0.001 (0.535)	Loss 3.6847 (3.4965)	Entropy 1.74140 (1.74154)	Top-1 acc 37.500 (41.008)	Top-5 acc 62.500 (65.150)	lr 0.02392
Train [17][60/3239]	Time 0.216 (0.899)	Data Time 0.003 (0.447)	Loss 3.4143 (3.4890)	Entropy 1.74138 (1.74152)	Top-1 acc 43.750 (41.329)	Top-5 acc 68.359 (65.266)	lr 0.02392
Train [17][70/3239]	Time 0.203 (0.833)	Data Time 0.001 (0.385)	Loss 3.5456 (3.4925)	Entropy 1.74130 (1.74149)	Top-1 acc 36.719 (41.098)	Top-5 acc 63.672 (65.251)	lr 0.02392
Train [17][80/3239]	Time 0.302 (0.783)	Data Time 0.001 (0.338)	Loss 3.4963 (3.4920)	Entropy 1.74122 (1.74146)	Top-1 acc 41.016 (41.223)	Top-5 acc 66.016 (65.287)	lr 0.02392
Train [17][90/3239]	Time 0.379 (0.744)	Data Time 0.001 (0.301)	Loss 3.1667 (3.4875)	Entropy 1.74120 (1.74144)	Top-1 acc 51.562 (41.393)	Top-5 acc 72.656 (65.316)	lr 0.02392
Train [17][100/3239]	Time 0.224 (0.714)	Data Time 0.001 (0.271)	Loss 3.3350 (3.4876)	Entropy 1.74109 (1.74141)	Top-1 acc 43.359 (41.348)	Top-5 acc 67.188 (65.319)	lr 0.02392
Train [17][110/3239]	Time 0.227 (0.687)	Data Time 0.002 (0.247)	Loss 3.5088 (3.4907)	Entropy 1.74102 (1.74138)	Top-1 acc 41.016 (41.262)	Top-5 acc 64.062 (65.206)	lr 0.02391
Train [17][120/3239]	Time 2.265 (0.665)	Data Time 0.001 (0.227)	Loss 3.5679 (3.4958)	Entropy 1.74102 (1.74135)	Top-1 acc 38.672 (41.164)	Top-5 acc 62.891 (65.060)	lr 0.02391
Train [17][130/3239]	Time 0.206 (0.631)	Data Time 0.002 (0.209)	Loss 3.2054 (3.4902)	Entropy 1.74086 (1.74131)	Top-1 acc 48.438 (41.326)	Top-5 acc 72.266 (65.273)	lr 0.02391
Train [17][140/3239]	Time 0.230 (0.617)	Data Time 0.002 (0.195)	Loss 3.4139 (3.4902)	Entropy 1.74073 (1.74127)	Top-1 acc 43.750 (41.343)	Top-5 acc 65.625 (65.254)	lr 0.02391
Train [17][150/3239]	Time 0.291 (0.605)	Data Time 0.001 (0.182)	Loss 3.2927 (3.4889)	Entropy 1.74070 (1.74124)	Top-1 acc 44.922 (41.300)	Top-5 acc 69.922 (65.271)	lr 0.02391
Train [17][160/3239]	Time 0.214 (0.594)	Data Time 0.001 (0.171)	Loss 3.6040 (3.4863)	Entropy 1.74068 (1.74120)	Top-1 acc 39.453 (41.363)	Top-5 acc 62.109 (65.307)	lr 0.02391
Train [17][170/3239]	Time 0.171 (0.583)	Data Time 0.001 (0.161)	Loss 3.6374 (3.4890)	Entropy 1.74062 (1.74117)	Top-1 acc 37.891 (41.374)	Top-5 acc 63.281 (65.239)	lr 0.02391
Train [17][180/3239]	Time 0.202 (0.574)	Data Time 0.001 (0.152)	Loss 3.4985 (3.4921)	Entropy 1.74061 (1.74114)	Top-1 acc 39.453 (41.307)	Top-5 acc 65.625 (65.228)	lr 0.02391
Train [17][190/3239]	Time 0.223 (0.567)	Data Time 0.001 (0.144)	Loss 3.4787 (3.4904)	Entropy 1.74053 (1.74111)	Top-1 acc 42.188 (41.337)	Top-5 acc 62.891 (65.263)	lr 0.02391
Train [17][200/3239]	Time 0.185 (0.560)	Data Time 0.001 (0.137)	Loss 3.6159 (3.4914)	Entropy 1.74051 (1.74108)	Top-1 acc 38.281 (41.327)	Top-5 acc 62.891 (65.211)	lr 0.02391
Train [17][210/3239]	Time 0.226 (0.553)	Data Time 0.002 (0.131)	Loss 3.5669 (3.4875)	Entropy 1.74058 (1.74105)	Top-1 acc 39.062 (41.404)	Top-5 acc 63.672 (65.238)	lr 0.02391
Train [17][220/3239]	Time 0.146 (0.547)	Data Time 0.001 (0.125)	Loss 3.5060 (3.4846)	Entropy 1.74055 (1.74103)	Top-1 acc 37.891 (41.436)	Top-5 acc 66.016 (65.303)	lr 0.02391
Train [17][230/3239]	Time 2.361 (0.542)	Data Time 0.002 (0.120)	Loss 3.3876 (3.4870)	Entropy 1.74055 (1.74101)	Top-1 acc 44.531 (41.369)	Top-5 acc 65.625 (65.278)	lr 0.02391
Train [17][240/3239]	Time 0.234 (0.529)	Data Time 0.002 (0.115)	Loss 3.3945 (3.4887)	Entropy 1.74051 (1.74099)	Top-1 acc 45.312 (41.354)	Top-5 acc 66.406 (65.255)	lr 0.02391
Train [17][250/3239]	Time 0.254 (0.703)	Data Time 0.002 (0.110)	Loss 3.3163 (3.4881)	Entropy 1.74051 (1.74097)	Top-1 acc 47.656 (41.377)	Top-5 acc 71.094 (65.328)	lr 0.02391
Train [17][260/3239]	Time 0.217 (0.693)	Data Time 0.002 (0.106)	Loss 3.3280 (3.4860)	Entropy 1.74049 (1.74095)	Top-1 acc 44.922 (41.438)	Top-5 acc 66.406 (65.329)	lr 0.02391
Train [17][270/3239]	Time 0.220 (0.683)	Data Time 0.001 (0.102)	Loss 3.4834 (3.4847)	Entropy 1.74046 (1.74094)	Top-1 acc 41.406 (41.477)	Top-5 acc 64.844 (65.357)	lr 0.02391
Train [17][280/3239]	Time 0.231 (0.674)	Data Time 0.001 (0.099)	Loss 3.3928 (3.4846)	Entropy 1.74041 (1.74092)	Top-1 acc 46.094 (41.508)	Top-5 acc 64.062 (65.387)	lr 0.02391
Train [17][290/3239]	Time 0.221 (0.666)	Data Time 0.001 (0.095)	Loss 3.5428 (3.4835)	Entropy 1.74043 (1.74090)	Top-1 acc 37.500 (41.489)	Top-5 acc 61.719 (65.410)	lr 0.02391
Train [17][300/3239]	Time 0.309 (0.658)	Data Time 0.001 (0.092)	Loss 3.4792 (3.4844)	Entropy 1.74041 (1.74088)	Top-1 acc 44.141 (41.505)	Top-5 acc 67.188 (65.399)	lr 0.02391
Train [17][310/3239]	Time 0.200 (0.651)	Data Time 0.001 (0.089)	Loss 3.5126 (3.4847)	Entropy 1.74038 (1.74087)	Top-1 acc 42.188 (41.484)	Top-5 acc 67.188 (65.415)	lr 0.02391
Train [17][320/3239]	Time 0.217 (0.644)	Data Time 0.001 (0.087)	Loss 3.5547 (3.4820)	Entropy 1.74030 (1.74085)	Top-1 acc 42.969 (41.549)	Top-5 acc 66.406 (65.487)	lr 0.02391
Train [17][330/3239]	Time 0.208 (0.637)	Data Time 0.001 (0.084)	Loss 3.4504 (3.4822)	Entropy 1.74019 (1.74084)	Top-1 acc 41.016 (41.556)	Top-5 acc 64.453 (65.480)	lr 0.02391
Train [17][340/3239]	Time 2.283 (0.631)	Data Time 0.001 (0.082)	Loss 3.6906 (3.4822)	Entropy 1.74019 (1.74082)	Top-1 acc 37.109 (41.554)	Top-5 acc 62.500 (65.475)	lr 0.02391
Train [17][350/3239]	Time 0.156 (0.619)	Data Time 0.001 (0.079)	Loss 3.4387 (3.4830)	Entropy 1.74013 (1.74080)	Top-1 acc 40.625 (41.505)	Top-5 acc 65.625 (65.473)	lr 0.02390
Train [17][360/3239]	Time 0.182 (0.613)	Data Time 0.001 (0.077)	Loss 3.6503 (3.4835)	Entropy 1.74007 (1.74078)	Top-1 acc 39.453 (41.491)	Top-5 acc 60.547 (65.440)	lr 0.02390
Train [17][370/3239]	Time 0.289 (0.608)	Data Time 0.001 (0.075)	Loss 3.4459 (3.4822)	Entropy 1.73990 (1.74075)	Top-1 acc 39.453 (41.500)	Top-5 acc 64.453 (65.460)	lr 0.02390
Train [17][380/3239]	Time 0.203 (0.604)	Data Time 0.001 (0.073)	Loss 3.4531 (3.4832)	Entropy 1.73983 (1.74073)	Top-1 acc 41.797 (41.501)	Top-5 acc 64.844 (65.446)	lr 0.02390
Train [17][390/3239]	Time 0.220 (0.600)	Data Time 0.001 (0.072)	Loss 3.6369 (3.4855)	Entropy 1.73975 (1.74071)	Top-1 acc 37.500 (41.432)	Top-5 acc 62.109 (65.400)	lr 0.02390
Train [17][400/3239]	Time 0.207 (0.595)	Data Time 0.001 (0.070)	Loss 3.3852 (3.4852)	Entropy 1.73976 (1.74068)	Top-1 acc 48.438 (41.464)	Top-5 acc 67.188 (65.412)	lr 0.02390
Train [17][410/3239]	Time 0.191 (0.591)	Data Time 0.001 (0.068)	Loss 3.4895 (3.4843)	Entropy 1.73972 (1.74066)	Top-1 acc 44.531 (41.465)	Top-5 acc 65.625 (65.440)	lr 0.02390
Train [17][420/3239]	Time 0.237 (0.587)	Data Time 0.001 (0.067)	Loss 3.3506 (3.4828)	Entropy 1.73965 (1.74064)	Top-1 acc 44.141 (41.480)	Top-5 acc 66.406 (65.454)	lr 0.02390
Train [17][430/3239]	Time 0.209 (0.583)	Data Time 0.002 (0.065)	Loss 3.3647 (3.4856)	Entropy 1.73958 (1.74061)	Top-1 acc 47.656 (41.413)	Top-5 acc 67.188 (65.420)	lr 0.02390
Train [17][440/3239]	Time 0.227 (0.580)	Data Time 0.001 (0.064)	Loss 3.5602 (3.4863)	Entropy 1.73956 (1.74059)	Top-1 acc 41.406 (41.403)	Top-5 acc 64.453 (65.404)	lr 0.02390
Train [17][450/3239]	Time 2.284 (0.576)	Data Time 0.001 (0.062)	Loss 4.0618 (3.4874)	Entropy 1.73956 (1.74057)	Top-1 acc 31.641 (41.367)	Top-5 acc 52.344 (65.376)	lr 0.02390
Train [17][460/3239]	Time 0.228 (0.568)	Data Time 0.001 (0.061)	Loss 3.4721 (3.4873)	Entropy 1.73936 (1.74054)	Top-1 acc 36.719 (41.355)	Top-5 acc 64.062 (65.363)	lr 0.02390
Train [17][470/3239]	Time 0.206 (0.565)	Data Time 0.001 (0.060)	Loss 3.3132 (3.4868)	Entropy 1.73934 (1.74052)	Top-1 acc 41.797 (41.352)	Top-5 acc 67.188 (65.355)	lr 0.02390
Train [17][480/3239]	Time 0.154 (0.563)	Data Time 0.001 (0.058)	Loss 3.5308 (3.4879)	Entropy 1.73926 (1.74049)	Top-1 acc 42.188 (41.353)	Top-5 acc 62.500 (65.336)	lr 0.02390
Train [17][490/3239]	Time 0.231 (0.560)	Data Time 0.002 (0.057)	Loss 3.5191 (3.4886)	Entropy 1.73923 (1.74046)	Top-1 acc 41.406 (41.361)	Top-5 acc 65.234 (65.316)	lr 0.02390
Train [17][500/3239]	Time 0.202 (0.558)	Data Time 0.001 (0.056)	Loss 3.4997 (3.4879)	Entropy 1.73913 (1.74044)	Top-1 acc 43.750 (41.366)	Top-5 acc 64.453 (65.327)	lr 0.02390
Train [17][510/3239]	Time 0.153 (0.555)	Data Time 0.001 (0.055)	Loss 3.4765 (3.4878)	Entropy 1.73907 (1.74041)	Top-1 acc 41.797 (41.346)	Top-5 acc 66.406 (65.342)	lr 0.02390
Train [17][520/3239]	Time 0.210 (0.553)	Data Time 0.001 (0.054)	Loss 3.5239 (3.4874)	Entropy 1.73895 (1.74039)	Top-1 acc 39.844 (41.384)	Top-5 acc 66.797 (65.365)	lr 0.02390
Train [17][530/3239]	Time 0.184 (0.550)	Data Time 0.001 (0.053)	Loss 3.4749 (3.4866)	Entropy 1.73885 (1.74036)	Top-1 acc 40.234 (41.406)	Top-5 acc 60.938 (65.384)	lr 0.02390
Train [17][540/3239]	Time 0.258 (0.548)	Data Time 0.001 (0.052)	Loss 3.4507 (3.4870)	Entropy 1.73881 (1.74033)	Top-1 acc 42.188 (41.424)	Top-5 acc 64.453 (65.362)	lr 0.02390
Train [17][550/3239]	Time 0.284 (0.546)	Data Time 0.001 (0.051)	Loss 3.5282 (3.4862)	Entropy 1.73878 (1.74030)	Top-1 acc 39.062 (41.423)	Top-5 acc 62.891 (65.367)	lr 0.02390
Train [17][560/3239]	Time 2.294 (0.544)	Data Time 0.001 (0.051)	Loss 3.4831 (3.4878)	Entropy 1.73878 (1.74028)	Top-1 acc 38.281 (41.391)	Top-5 acc 63.672 (65.323)	lr 0.02390
Train [17][570/3239]	Time 0.148 (0.538)	Data Time 0.001 (0.050)	Loss 3.6184 (3.4898)	Entropy 1.73857 (1.74025)	Top-1 acc 39.844 (41.335)	Top-5 acc 62.500 (65.290)	lr 0.02390
Train [17][580/3239]	Time 0.214 (0.536)	Data Time 0.001 (0.049)	Loss 3.4866 (3.4902)	Entropy 1.73849 (1.74022)	Top-1 acc 41.797 (41.326)	Top-5 acc 68.359 (65.281)	lr 0.02390
Train [17][590/3239]	Time 0.177 (0.535)	Data Time 0.001 (0.048)	Loss 3.6094 (3.4907)	Entropy 1.73845 (1.74019)	Top-1 acc 40.234 (41.305)	Top-5 acc 61.328 (65.256)	lr 0.02389
Train [17][600/3239]	Time 0.217 (0.533)	Data Time 0.001 (0.047)	Loss 3.5344 (3.4897)	Entropy 1.73830 (1.74016)	Top-1 acc 43.359 (41.334)	Top-5 acc 62.109 (65.288)	lr 0.02389
Train [17][610/3239]	Time 0.314 (0.597)	Data Time 0.005 (0.047)	Loss 3.5866 (3.4905)	Entropy 1.73822 (1.74012)	Top-1 acc 37.109 (41.316)	Top-5 acc 64.062 (65.289)	lr 0.02389
Train [17][620/3239]	Time 0.204 (0.596)	Data Time 0.002 (0.046)	Loss 3.7563 (3.4907)	Entropy 1.73815 (1.74009)	Top-1 acc 36.328 (41.308)	Top-5 acc 59.766 (65.294)	lr 0.02389
Train [17][630/3239]	Time 0.240 (0.593)	Data Time 0.002 (0.045)	Loss 3.5425 (3.4905)	Entropy 1.73811 (1.74006)	Top-1 acc 37.500 (41.292)	Top-5 acc 65.625 (65.307)	lr 0.02389
Train [17][640/3239]	Time 0.257 (0.591)	Data Time 0.002 (0.045)	Loss 3.3425 (3.4888)	Entropy 1.73809 (1.74003)	Top-1 acc 47.266 (41.333)	Top-5 acc 70.312 (65.350)	lr 0.02389
Train [17][650/3239]	Time 0.222 (0.588)	Data Time 0.002 (0.044)	Loss 3.6836 (3.4888)	Entropy 1.73802 (1.74000)	Top-1 acc 37.500 (41.335)	Top-5 acc 62.109 (65.356)	lr 0.02389
Train [17][660/3239]	Time 0.333 (0.586)	Data Time 0.002 (0.043)	Loss 3.5346 (3.4893)	Entropy 1.73796 (1.73997)	Top-1 acc 40.234 (41.320)	Top-5 acc 62.891 (65.334)	lr 0.02389
Train [17][670/3239]	Time 2.406 (0.584)	Data Time 0.002 (0.043)	Loss 3.4839 (3.4898)	Entropy 1.73796 (1.73994)	Top-1 acc 42.578 (41.310)	Top-5 acc 69.141 (65.326)	lr 0.02389
Train [17][680/3239]	Time 0.227 (0.579)	Data Time 0.001 (0.042)	Loss 3.5414 (3.4891)	Entropy 1.73784 (1.73991)	Top-1 acc 39.453 (41.337)	Top-5 acc 64.844 (65.351)	lr 0.02389
Train [17][690/3239]	Time 0.219 (0.577)	Data Time 0.001 (0.042)	Loss 3.4161 (3.4880)	Entropy 1.73772 (1.73988)	Top-1 acc 37.500 (41.343)	Top-5 acc 68.359 (65.385)	lr 0.02389
Train [17][700/3239]	Time 0.210 (0.575)	Data Time 0.002 (0.041)	Loss 3.4684 (3.4876)	Entropy 1.73756 (1.73985)	Top-1 acc 40.625 (41.347)	Top-5 acc 62.500 (65.385)	lr 0.02389
Train [17][710/3239]	Time 0.214 (0.573)	Data Time 0.001 (0.040)	Loss 3.3848 (3.4873)	Entropy 1.73751 (1.73981)	Top-1 acc 40.234 (41.344)	Top-5 acc 65.625 (65.378)	lr 0.02389
Train [17][720/3239]	Time 0.158 (0.571)	Data Time 0.001 (0.040)	Loss 3.4410 (3.4882)	Entropy 1.73741 (1.73978)	Top-1 acc 39.062 (41.318)	Top-5 acc 68.750 (65.371)	lr 0.02389
Train [17][730/3239]	Time 0.298 (0.569)	Data Time 0.001 (0.039)	Loss 3.7172 (3.4883)	Entropy 1.73721 (1.73975)	Top-1 acc 39.453 (41.323)	Top-5 acc 60.156 (65.363)	lr 0.02389
Train [17][740/3239]	Time 0.237 (0.567)	Data Time 0.002 (0.039)	Loss 3.5665 (3.4894)	Entropy 1.73707 (1.73971)	Top-1 acc 41.016 (41.306)	Top-5 acc 61.719 (65.339)	lr 0.02389
Train [17][750/3239]	Time 0.262 (0.565)	Data Time 0.001 (0.038)	Loss 3.6642 (3.4890)	Entropy 1.73702 (1.73968)	Top-1 acc 37.891 (41.322)	Top-5 acc 60.547 (65.345)	lr 0.02389
Train [17][760/3239]	Time 0.205 (0.563)	Data Time 0.001 (0.038)	Loss 3.3851 (3.4889)	Entropy 1.73694 (1.73964)	Top-1 acc 46.875 (41.323)	Top-5 acc 66.406 (65.343)	lr 0.02389
Train [17][770/3239]	Time 0.286 (0.561)	Data Time 0.001 (0.037)	Loss 3.4843 (3.4894)	Entropy 1.73690 (1.73961)	Top-1 acc 41.406 (41.323)	Top-5 acc 67.188 (65.338)	lr 0.02389
Train [17][780/3239]	Time 2.201 (0.559)	Data Time 0.001 (0.037)	Loss 3.3799 (3.4898)	Entropy 1.73690 (1.73957)	Top-1 acc 48.047 (41.316)	Top-5 acc 67.969 (65.320)	lr 0.02389
Train [17][790/3239]	Time 0.205 (0.555)	Data Time 0.002 (0.037)	Loss 3.4938 (3.4898)	Entropy 1.73683 (1.73954)	Top-1 acc 36.719 (41.290)	Top-5 acc 66.016 (65.321)	lr 0.02389
Train [17][800/3239]	Time 0.204 (0.553)	Data Time 0.001 (0.036)	Loss 3.4943 (3.4902)	Entropy 1.73679 (1.73950)	Top-1 acc 37.109 (41.270)	Top-5 acc 66.406 (65.327)	lr 0.02389
Train [17][810/3239]	Time 0.322 (0.552)	Data Time 0.001 (0.036)	Loss 3.7557 (3.4902)	Entropy 1.73665 (1.73947)	Top-1 acc 38.281 (41.288)	Top-5 acc 59.375 (65.322)	lr 0.02389
Train [17][820/3239]	Time 0.189 (0.550)	Data Time 0.002 (0.035)	Loss 3.6736 (3.4904)	Entropy 1.73651 (1.73943)	Top-1 acc 39.844 (41.304)	Top-5 acc 62.109 (65.321)	lr 0.02389
Train [17][830/3239]	Time 0.223 (0.549)	Data Time 0.002 (0.035)	Loss 3.5813 (3.4910)	Entropy 1.73647 (1.73940)	Top-1 acc 40.625 (41.291)	Top-5 acc 65.625 (65.316)	lr 0.02388
Train [17][840/3239]	Time 0.232 (0.547)	Data Time 0.001 (0.034)	Loss 3.4312 (3.4905)	Entropy 1.73650 (1.73936)	Top-1 acc 40.625 (41.297)	Top-5 acc 65.234 (65.327)	lr 0.02388
Train [17][850/3239]	Time 0.206 (0.546)	Data Time 0.001 (0.034)	Loss 3.2945 (3.4908)	Entropy 1.73650 (1.73933)	Top-1 acc 47.656 (41.290)	Top-5 acc 73.438 (65.324)	lr 0.02388
Train [17][860/3239]	Time 0.225 (0.545)	Data Time 0.001 (0.034)	Loss 3.3000 (3.4904)	Entropy 1.73639 (1.73930)	Top-1 acc 45.312 (41.293)	Top-5 acc 71.875 (65.340)	lr 0.02388
Train [17][870/3239]	Time 0.203 (0.543)	Data Time 0.001 (0.033)	Loss 3.7208 (3.4908)	Entropy 1.73633 (1.73926)	Top-1 acc 33.984 (41.294)	Top-5 acc 59.766 (65.323)	lr 0.02388
Train [17][880/3239]	Time 0.179 (0.542)	Data Time 0.001 (0.033)	Loss 3.1985 (3.4908)	Entropy 1.73624 (1.73923)	Top-1 acc 50.781 (41.298)	Top-5 acc 72.266 (65.323)	lr 0.02388
Train [17][890/3239]	Time 2.187 (0.541)	Data Time 0.001 (0.033)	Loss 3.4150 (3.4912)	Entropy 1.73624 (1.73920)	Top-1 acc 43.750 (41.284)	Top-5 acc 66.406 (65.316)	lr 0.02388
Train [17][900/3239]	Time 0.195 (0.537)	Data Time 0.001 (0.032)	Loss 3.4901 (3.4919)	Entropy 1.73620 (1.73916)	Top-1 acc 42.969 (41.275)	Top-5 acc 66.016 (65.310)	lr 0.02388
Train [17][910/3239]	Time 0.219 (0.536)	Data Time 0.001 (0.032)	Loss 3.3434 (3.4920)	Entropy 1.73617 (1.73913)	Top-1 acc 48.438 (41.268)	Top-5 acc 66.406 (65.297)	lr 0.02388
Train [17][920/3239]	Time 0.248 (0.535)	Data Time 0.002 (0.032)	Loss 3.3574 (3.4922)	Entropy 1.73614 (1.73910)	Top-1 acc 43.750 (41.253)	Top-5 acc 69.531 (65.294)	lr 0.02388
Train [17][930/3239]	Time 0.202 (0.534)	Data Time 0.001 (0.031)	Loss 3.3385 (3.4922)	Entropy 1.73613 (1.73906)	Top-1 acc 42.578 (41.251)	Top-5 acc 72.656 (65.304)	lr 0.02388
Train [17][940/3239]	Time 0.231 (0.532)	Data Time 0.001 (0.031)	Loss 3.5519 (3.4923)	Entropy 1.73605 (1.73903)	Top-1 acc 40.625 (41.237)	Top-5 acc 62.500 (65.294)	lr 0.02388
Train [17][950/3239]	Time 0.213 (0.532)	Data Time 0.001 (0.031)	Loss 3.4158 (3.4925)	Entropy 1.73596 (1.73900)	Top-1 acc 44.531 (41.237)	Top-5 acc 69.141 (65.285)	lr 0.02388
Train [17][960/3239]	Time 0.202 (0.531)	Data Time 0.001 (0.030)	Loss 3.5967 (3.4924)	Entropy 1.73587 (1.73897)	Top-1 acc 39.844 (41.244)	Top-5 acc 65.234 (65.282)	lr 0.02388
Train [17][970/3239]	Time 0.345 (0.571)	Data Time 0.003 (0.030)	Loss 3.5674 (3.4927)	Entropy 1.73582 (1.73894)	Top-1 acc 36.719 (41.243)	Top-5 acc 63.672 (65.278)	lr 0.02388
Train [17][980/3239]	Time 0.217 (0.571)	Data Time 0.002 (0.030)	Loss 3.4631 (3.4938)	Entropy 1.73576 (1.73891)	Top-1 acc 39.844 (41.225)	Top-5 acc 65.234 (65.252)	lr 0.02388
Train [17][990/3239]	Time 0.266 (0.569)	Data Time 0.002 (0.030)	Loss 3.5917 (3.4939)	Entropy 1.73572 (1.73887)	Top-1 acc 40.234 (41.226)	Top-5 acc 62.500 (65.252)	lr 0.02388
Train [17][1000/3239]	Time 2.395 (0.568)	Data Time 0.002 (0.029)	Loss 3.3999 (3.4940)	Entropy 1.73572 (1.73884)	Top-1 acc 43.359 (41.221)	Top-5 acc 65.234 (65.251)	lr 0.02388
Train [17][1010/3239]	Time 0.302 (0.565)	Data Time 0.001 (0.029)	Loss 3.3029 (3.4932)	Entropy 1.73565 (1.73881)	Top-1 acc 43.359 (41.230)	Top-5 acc 71.094 (65.273)	lr 0.02388
Train [17][1020/3239]	Time 0.221 (0.563)	Data Time 0.002 (0.029)	Loss 3.3635 (3.4935)	Entropy 1.73564 (1.73878)	Top-1 acc 46.484 (41.238)	Top-5 acc 66.406 (65.268)	lr 0.02388
Train [17][1030/3239]	Time 0.163 (0.562)	Data Time 0.001 (0.029)	Loss 3.5002 (3.4941)	Entropy 1.73558 (1.73875)	Top-1 acc 40.234 (41.224)	Top-5 acc 67.969 (65.265)	lr 0.02388
Train [17][1040/3239]	Time 0.139 (0.561)	Data Time 0.001 (0.028)	Loss 3.5468 (3.4940)	Entropy 1.73553 (1.73872)	Top-1 acc 39.062 (41.221)	Top-5 acc 63.672 (65.261)	lr 0.02388
Train [17][1050/3239]	Time 0.216 (0.559)	Data Time 0.001 (0.028)	Loss 3.2865 (3.4948)	Entropy 1.73550 (1.73869)	Top-1 acc 46.094 (41.200)	Top-5 acc 69.922 (65.243)	lr 0.02388
Train [17][1060/3239]	Time 0.208 (0.558)	Data Time 0.001 (0.028)	Loss 3.4715 (3.4948)	Entropy 1.73548 (1.73866)	Top-1 acc 39.844 (41.197)	Top-5 acc 65.234 (65.241)	lr 0.02388
Train [17][1070/3239]	Time 0.213 (0.557)	Data Time 0.001 (0.028)	Loss 3.4987 (3.4945)	Entropy 1.73537 (1.73863)	Top-1 acc 40.625 (41.200)	Top-5 acc 61.328 (65.249)	lr 0.02387
Train [17][1080/3239]	Time 0.297 (0.556)	Data Time 0.001 (0.027)	Loss 3.4553 (3.4941)	Entropy 1.73534 (1.73860)	Top-1 acc 44.141 (41.210)	Top-5 acc 63.672 (65.257)	lr 0.02387
Train [17][1090/3239]	Time 0.210 (0.555)	Data Time 0.001 (0.027)	Loss 3.5057 (3.4936)	Entropy 1.73527 (1.73857)	Top-1 acc 37.891 (41.226)	Top-5 acc 61.719 (65.270)	lr 0.02387
Train [17][1100/3239]	Time 0.262 (0.554)	Data Time 0.001 (0.027)	Loss 3.4419 (3.4940)	Entropy 1.73523 (1.73854)	Top-1 acc 45.312 (41.227)	Top-5 acc 67.578 (65.264)	lr 0.02387
Train [17][1110/3239]	Time 2.269 (0.553)	Data Time 0.001 (0.027)	Loss 3.4449 (3.4942)	Entropy 1.73523 (1.73851)	Top-1 acc 37.500 (41.221)	Top-5 acc 62.891 (65.252)	lr 0.02387
Train [17][1120/3239]	Time 0.175 (0.550)	Data Time 0.001 (0.026)	Loss 3.5948 (3.4941)	Entropy 1.73522 (1.73848)	Top-1 acc 39.453 (41.242)	Top-5 acc 64.453 (65.254)	lr 0.02387
Train [17][1130/3239]	Time 0.155 (0.548)	Data Time 0.001 (0.026)	Loss 3.3760 (3.4936)	Entropy 1.73521 (1.73845)	Top-1 acc 44.531 (41.254)	Top-5 acc 62.891 (65.266)	lr 0.02387
Train [17][1140/3239]	Time 0.251 (0.547)	Data Time 0.001 (0.026)	Loss 3.7307 (3.4938)	Entropy 1.73519 (1.73842)	Top-1 acc 38.281 (41.247)	Top-5 acc 58.984 (65.257)	lr 0.02387
Train [17][1150/3239]	Time 0.218 (0.546)	Data Time 0.002 (0.026)	Loss 3.4728 (3.4936)	Entropy 1.73515 (1.73839)	Top-1 acc 41.406 (41.253)	Top-5 acc 66.016 (65.262)	lr 0.02387
Train [17][1160/3239]	Time 0.193 (0.545)	Data Time 0.001 (0.026)	Loss 3.5317 (3.4938)	Entropy 1.73515 (1.73836)	Top-1 acc 40.234 (41.248)	Top-5 acc 67.578 (65.267)	lr 0.02387
Train [17][1170/3239]	Time 0.172 (0.544)	Data Time 0.001 (0.025)	Loss 3.3261 (3.4934)	Entropy 1.73503 (1.73834)	Top-1 acc 41.797 (41.261)	Top-5 acc 67.578 (65.277)	lr 0.02387
Train [17][1180/3239]	Time 0.191 (0.543)	Data Time 0.001 (0.025)	Loss 3.5165 (3.4937)	Entropy 1.73516 (1.73831)	Top-1 acc 39.844 (41.253)	Top-5 acc 64.453 (65.271)	lr 0.02387
Train [17][1190/3239]	Time 0.210 (0.542)	Data Time 0.001 (0.025)	Loss 3.4435 (3.4932)	Entropy 1.73512 (1.73828)	Top-1 acc 41.797 (41.262)	Top-5 acc 67.578 (65.279)	lr 0.02387
Train [17][1200/3239]	Time 0.249 (0.541)	Data Time 0.001 (0.025)	Loss 3.1882 (3.4929)	Entropy 1.73510 (1.73826)	Top-1 acc 43.750 (41.265)	Top-5 acc 69.141 (65.286)	lr 0.02387
Train [17][1210/3239]	Time 0.224 (0.541)	Data Time 0.001 (0.025)	Loss 3.4527 (3.4930)	Entropy 1.73497 (1.73823)	Top-1 acc 41.406 (41.267)	Top-5 acc 62.500 (65.279)	lr 0.02387
Train [17][1220/3239]	Time 2.642 (0.540)	Data Time 0.001 (0.025)	Loss 3.4771 (3.4930)	Entropy 1.73497 (1.73820)	Top-1 acc 39.062 (41.265)	Top-5 acc 64.453 (65.279)	lr 0.02387
Train [17][1230/3239]	Time 0.269 (0.538)	Data Time 0.002 (0.024)	Loss 3.2490 (3.4926)	Entropy 1.73490 (1.73818)	Top-1 acc 48.047 (41.282)	Top-5 acc 69.922 (65.285)	lr 0.02387
Train [17][1240/3239]	Time 0.188 (0.537)	Data Time 0.001 (0.024)	Loss 3.5745 (3.4925)	Entropy 1.73486 (1.73815)	Top-1 acc 40.625 (41.279)	Top-5 acc 62.891 (65.282)	lr 0.02387
Train [17][1250/3239]	Time 0.215 (0.536)	Data Time 0.001 (0.024)	Loss 3.4973 (3.4921)	Entropy 1.73483 (1.73812)	Top-1 acc 41.016 (41.288)	Top-5 acc 64.844 (65.286)	lr 0.02387
Train [17][1260/3239]	Time 0.245 (0.535)	Data Time 0.001 (0.024)	Loss 3.2577 (3.4920)	Entropy 1.73481 (1.73810)	Top-1 acc 44.531 (41.293)	Top-5 acc 73.828 (65.293)	lr 0.02387
Train [17][1270/3239]	Time 0.219 (0.534)	Data Time 0.001 (0.024)	Loss 3.5585 (3.4923)	Entropy 1.73479 (1.73807)	Top-1 acc 41.406 (41.287)	Top-5 acc 66.797 (65.285)	lr 0.02387
Train [17][1280/3239]	Time 0.201 (0.534)	Data Time 0.001 (0.023)	Loss 3.4310 (3.4924)	Entropy 1.73476 (1.73804)	Top-1 acc 40.625 (41.279)	Top-5 acc 67.188 (65.284)	lr 0.02387
Train [17][1290/3239]	Time 0.278 (0.533)	Data Time 0.001 (0.023)	Loss 3.2383 (3.4920)	Entropy 1.73470 (1.73802)	Top-1 acc 42.969 (41.277)	Top-5 acc 67.969 (65.291)	lr 0.02387
Train [17][1300/3239]	Time 0.318 (0.533)	Data Time 0.001 (0.023)	Loss 3.4499 (3.4919)	Entropy 1.73462 (1.73799)	Top-1 acc 41.406 (41.279)	Top-5 acc 62.891 (65.289)	lr 0.02387
Train [17][1310/3239]	Time 0.193 (0.532)	Data Time 0.001 (0.023)	Loss 3.4870 (3.4917)	Entropy 1.73452 (1.73797)	Top-1 acc 40.234 (41.282)	Top-5 acc 66.797 (65.299)	lr 0.02386
Train [17][1320/3239]	Time 0.199 (0.532)	Data Time 0.001 (0.023)	Loss 3.2914 (3.4915)	Entropy 1.73444 (1.73794)	Top-1 acc 46.484 (41.301)	Top-5 acc 70.312 (65.302)	lr 0.02386
Train [17][1330/3239]	Time 47.181 (0.565)	Data Time 0.002 (0.023)	Loss 3.5430 (3.4913)	Entropy 1.73444 (1.73792)	Top-1 acc 41.016 (41.299)	Top-5 acc 60.547 (65.302)	lr 0.02386
Train [17][1340/3239]	Time 0.209 (0.562)	Data Time 0.002 (0.023)	Loss 3.8612 (3.4917)	Entropy 1.73449 (1.73789)	Top-1 acc 32.812 (41.293)	Top-5 acc 57.031 (65.290)	lr 0.02386
Train [17][1350/3239]	Time 0.161 (0.561)	Data Time 0.002 (0.022)	Loss 3.3701 (3.4917)	Entropy 1.73448 (1.73786)	Top-1 acc 41.016 (41.283)	Top-5 acc 70.703 (65.288)	lr 0.02386
Train [17][1360/3239]	Time 0.193 (0.560)	Data Time 0.001 (0.022)	Loss 3.4521 (3.4912)	Entropy 1.73447 (1.73784)	Top-1 acc 46.484 (41.296)	Top-5 acc 65.234 (65.299)	lr 0.02386
Train [17][1370/3239]	Time 0.186 (0.560)	Data Time 0.001 (0.022)	Loss 3.2155 (3.4914)	Entropy 1.73446 (1.73781)	Top-1 acc 45.312 (41.287)	Top-5 acc 71.484 (65.295)	lr 0.02386
Train [17][1380/3239]	Time 0.229 (0.559)	Data Time 0.001 (0.022)	Loss 3.6000 (3.4916)	Entropy 1.73444 (1.73779)	Top-1 acc 36.719 (41.283)	Top-5 acc 64.844 (65.300)	lr 0.02386
Train [17][1390/3239]	Time 0.201 (0.558)	Data Time 0.001 (0.022)	Loss 3.6664 (3.4915)	Entropy 1.73436 (1.73777)	Top-1 acc 41.406 (41.285)	Top-5 acc 60.156 (65.294)	lr 0.02386
Train [17][1400/3239]	Time 0.245 (0.557)	Data Time 0.002 (0.022)	Loss 3.5375 (3.4916)	Entropy 1.73429 (1.73774)	Top-1 acc 39.062 (41.279)	Top-5 acc 66.797 (65.297)	lr 0.02386
Train [17][1410/3239]	Time 0.168 (0.556)	Data Time 0.001 (0.022)	Loss 3.3244 (3.4914)	Entropy 1.73416 (1.73772)	Top-1 acc 48.438 (41.290)	Top-5 acc 68.750 (65.298)	lr 0.02386
Train [17][1420/3239]	Time 0.203 (0.555)	Data Time 0.002 (0.021)	Loss 3.2269 (3.4912)	Entropy 1.73412 (1.73769)	Top-1 acc 47.656 (41.290)	Top-5 acc 69.922 (65.303)	lr 0.02386
Train [17][1430/3239]	Time 0.310 (0.554)	Data Time 0.001 (0.021)	Loss 3.4837 (3.4907)	Entropy 1.73405 (1.73767)	Top-1 acc 40.234 (41.295)	Top-5 acc 64.062 (65.306)	lr 0.02386
Train [17][1440/3239]	Time 2.271 (0.554)	Data Time 0.001 (0.021)	Loss 3.6735 (3.4910)	Entropy 1.73405 (1.73764)	Top-1 acc 37.109 (41.290)	Top-5 acc 62.500 (65.309)	lr 0.02386
Train [17][1450/3239]	Time 0.259 (0.551)	Data Time 0.001 (0.021)	Loss 3.3973 (3.4914)	Entropy 1.73398 (1.73762)	Top-1 acc 41.797 (41.276)	Top-5 acc 67.969 (65.299)	lr 0.02386
Train [17][1460/3239]	Time 0.170 (0.550)	Data Time 0.001 (0.021)	Loss 3.6820 (3.4914)	Entropy 1.73388 (1.73759)	Top-1 acc 41.406 (41.283)	Top-5 acc 63.281 (65.297)	lr 0.02386
Train [17][1470/3239]	Time 0.213 (0.550)	Data Time 0.001 (0.021)	Loss 3.3616 (3.4912)	Entropy 1.73381 (1.73757)	Top-1 acc 43.750 (41.288)	Top-5 acc 67.578 (65.298)	lr 0.02386
Train [17][1480/3239]	Time 0.201 (0.549)	Data Time 0.001 (0.021)	Loss 3.6021 (3.4914)	Entropy 1.73382 (1.73754)	Top-1 acc 39.453 (41.278)	Top-5 acc 62.109 (65.294)	lr 0.02386
Train [17][1490/3239]	Time 0.211 (0.548)	Data Time 0.001 (0.021)	Loss 3.4423 (3.4913)	Entropy 1.73375 (1.73751)	Top-1 acc 39.453 (41.280)	Top-5 acc 65.625 (65.291)	lr 0.02386
Train [17][1500/3239]	Time 0.342 (0.547)	Data Time 0.001 (0.020)	Loss 3.4741 (3.4915)	Entropy 1.73363 (1.73749)	Top-1 acc 44.922 (41.277)	Top-5 acc 69.141 (65.287)	lr 0.02386
Train [17][1510/3239]	Time 0.224 (0.547)	Data Time 0.001 (0.020)	Loss 3.3228 (3.4915)	Entropy 1.73351 (1.73746)	Top-1 acc 46.875 (41.281)	Top-5 acc 68.750 (65.288)	lr 0.02386
Train [17][1520/3239]	Time 0.212 (0.546)	Data Time 0.001 (0.020)	Loss 3.4607 (3.4914)	Entropy 1.73344 (1.73744)	Top-1 acc 40.625 (41.283)	Top-5 acc 65.625 (65.288)	lr 0.02386
Train [17][1530/3239]	Time 0.213 (0.545)	Data Time 0.001 (0.020)	Loss 3.5293 (3.4916)	Entropy 1.73342 (1.73741)	Top-1 acc 41.016 (41.282)	Top-5 acc 65.625 (65.283)	lr 0.02386
Train [17][1540/3239]	Time 0.207 (0.544)	Data Time 0.001 (0.020)	Loss 3.3382 (3.4913)	Entropy 1.73331 (1.73739)	Top-1 acc 41.797 (41.288)	Top-5 acc 66.016 (65.286)	lr 0.02386
Train [17][1550/3239]	Time 2.381 (0.544)	Data Time 0.001 (0.020)	Loss 3.4777 (3.4913)	Entropy 1.73331 (1.73736)	Top-1 acc 39.062 (41.293)	Top-5 acc 65.625 (65.286)	lr 0.02385
Train [17][1560/3239]	Time 0.287 (0.542)	Data Time 0.001 (0.020)	Loss 3.6719 (3.4916)	Entropy 1.73332 (1.73733)	Top-1 acc 39.062 (41.283)	Top-5 acc 60.547 (65.276)	lr 0.02385
Train [17][1570/3239]	Time 0.211 (0.541)	Data Time 0.001 (0.020)	Loss 3.3851 (3.4917)	Entropy 1.73332 (1.73731)	Top-1 acc 39.062 (41.276)	Top-5 acc 67.578 (65.274)	lr 0.02385
Train [17][1580/3239]	Time 0.203 (0.540)	Data Time 0.001 (0.020)	Loss 3.5160 (3.4916)	Entropy 1.73325 (1.73728)	Top-1 acc 43.359 (41.275)	Top-5 acc 65.234 (65.271)	lr 0.02385
Train [17][1590/3239]	Time 0.188 (0.540)	Data Time 0.001 (0.019)	Loss 3.5644 (3.4918)	Entropy 1.73318 (1.73726)	Top-1 acc 42.578 (41.270)	Top-5 acc 66.016 (65.269)	lr 0.02385
Train [17][1600/3239]	Time 0.222 (0.539)	Data Time 0.001 (0.019)	Loss 3.4339 (3.4918)	Entropy 1.73319 (1.73723)	Top-1 acc 41.016 (41.271)	Top-5 acc 66.016 (65.264)	lr 0.02385
Train [17][1610/3239]	Time 0.205 (0.538)	Data Time 0.002 (0.019)	Loss 3.4175 (3.4913)	Entropy 1.73315 (1.73721)	Top-1 acc 43.359 (41.278)	Top-5 acc 66.406 (65.272)	lr 0.02385
Train [17][1620/3239]	Time 0.208 (0.538)	Data Time 0.001 (0.019)	Loss 3.5764 (3.4920)	Entropy 1.73312 (1.73718)	Top-1 acc 36.328 (41.269)	Top-5 acc 61.719 (65.258)	lr 0.02385
Train [17][1630/3239]	Time 0.290 (0.537)	Data Time 0.001 (0.019)	Loss 3.6348 (3.4915)	Entropy 1.73312 (1.73716)	Top-1 acc 41.016 (41.286)	Top-5 acc 64.062 (65.272)	lr 0.02385
Train [17][1640/3239]	Time 0.154 (0.537)	Data Time 0.001 (0.019)	Loss 3.5762 (3.4916)	Entropy 1.73299 (1.73713)	Top-1 acc 41.797 (41.282)	Top-5 acc 64.062 (65.271)	lr 0.02385
Train [17][1650/3239]	Time 0.255 (0.536)	Data Time 0.002 (0.019)	Loss 3.4546 (3.4918)	Entropy 1.73293 (1.73711)	Top-1 acc 44.141 (41.275)	Top-5 acc 65.234 (65.266)	lr 0.02385
Train [17][1660/3239]	Time 2.282 (0.535)	Data Time 0.001 (0.019)	Loss 3.3422 (3.4915)	Entropy 1.73293 (1.73708)	Top-1 acc 45.703 (41.286)	Top-5 acc 67.188 (65.264)	lr 0.02385
Train [17][1670/3239]	Time 0.187 (0.533)	Data Time 0.001 (0.019)	Loss 3.2664 (3.4916)	Entropy 1.73289 (1.73706)	Top-1 acc 48.047 (41.290)	Top-5 acc 69.531 (65.260)	lr 0.02385
Train [17][1680/3239]	Time 0.215 (0.533)	Data Time 0.001 (0.019)	Loss 3.6694 (3.4919)	Entropy 1.73289 (1.73703)	Top-1 acc 39.844 (41.281)	Top-5 acc 61.719 (65.256)	lr 0.02385
Train [17][1690/3239]	Time 0.234 (0.532)	Data Time 0.001 (0.018)	Loss 3.2558 (3.4914)	Entropy 1.73281 (1.73701)	Top-1 acc 52.734 (41.294)	Top-5 acc 70.312 (65.272)	lr 0.02385
Train [17][1700/3239]	Time 0.551 (0.554)	Data Time 0.003 (0.018)	Loss 3.4182 (3.4917)	Entropy 1.73273 (1.73698)	Top-1 acc 44.141 (41.296)	Top-5 acc 66.016 (65.266)	lr 0.02385
Train [17][1710/3239]	Time 0.218 (0.554)	Data Time 0.002 (0.018)	Loss 3.2714 (3.4916)	Entropy 1.73273 (1.73696)	Top-1 acc 44.922 (41.299)	Top-5 acc 68.359 (65.270)	lr 0.02385
Train [17][1720/3239]	Time 0.223 (0.554)	Data Time 0.001 (0.018)	Loss 3.6382 (3.4914)	Entropy 1.73268 (1.73693)	Top-1 acc 41.797 (41.308)	Top-5 acc 61.328 (65.279)	lr 0.02385
Train [17][1730/3239]	Time 0.219 (0.553)	Data Time 0.001 (0.018)	Loss 3.5221 (3.4916)	Entropy 1.73258 (1.73691)	Top-1 acc 40.234 (41.306)	Top-5 acc 66.406 (65.275)	lr 0.02385
Train [17][1740/3239]	Time 0.139 (0.552)	Data Time 0.001 (0.018)	Loss 3.5290 (3.4914)	Entropy 1.73253 (1.73688)	Top-1 acc 37.500 (41.306)	Top-5 acc 65.234 (65.282)	lr 0.02385
Train [17][1750/3239]	Time 0.222 (0.551)	Data Time 0.001 (0.018)	Loss 3.2798 (3.4911)	Entropy 1.73247 (1.73686)	Top-1 acc 46.094 (41.309)	Top-5 acc 69.922 (65.294)	lr 0.02385
Train [17][1760/3239]	Time 0.194 (0.551)	Data Time 0.001 (0.018)	Loss 3.3998 (3.4906)	Entropy 1.73245 (1.73683)	Top-1 acc 41.797 (41.316)	Top-5 acc 68.750 (65.300)	lr 0.02385
Train [17][1770/3239]	Time 2.278 (0.550)	Data Time 0.001 (0.018)	Loss 3.6741 (3.4907)	Entropy 1.73245 (1.73681)	Top-1 acc 41.016 (41.314)	Top-5 acc 57.031 (65.292)	lr 0.02385
Train [17][1780/3239]	Time 0.228 (0.548)	Data Time 0.001 (0.018)	Loss 3.4420 (3.4905)	Entropy 1.73241 (1.73678)	Top-1 acc 45.312 (41.320)	Top-5 acc 64.453 (65.299)	lr 0.02385
Train [17][1790/3239]	Time 0.205 (0.548)	Data Time 0.001 (0.018)	Loss 3.4706 (3.4904)	Entropy 1.73238 (1.73676)	Top-1 acc 43.750 (41.321)	Top-5 acc 63.672 (65.300)	lr 0.02384
Train [17][1800/3239]	Time 0.179 (0.547)	Data Time 0.001 (0.017)	Loss 3.5240 (3.4905)	Entropy 1.73231 (1.73673)	Top-1 acc 41.406 (41.325)	Top-5 acc 60.938 (65.294)	lr 0.02384
Train [17][1810/3239]	Time 0.205 (0.546)	Data Time 0.001 (0.017)	Loss 3.4314 (3.4908)	Entropy 1.73224 (1.73671)	Top-1 acc 46.484 (41.323)	Top-5 acc 66.797 (65.287)	lr 0.02384
Train [17][1820/3239]	Time 0.243 (0.546)	Data Time 0.001 (0.017)	Loss 3.5371 (3.4905)	Entropy 1.73219 (1.73668)	Top-1 acc 41.797 (41.331)	Top-5 acc 67.188 (65.296)	lr 0.02384
Train [17][1830/3239]	Time 0.222 (0.545)	Data Time 0.001 (0.017)	Loss 3.5924 (3.4906)	Entropy 1.73219 (1.73666)	Top-1 acc 37.109 (41.337)	Top-5 acc 58.984 (65.294)	lr 0.02384
Train [17][1840/3239]	Time 0.193 (0.545)	Data Time 0.001 (0.017)	Loss 3.4993 (3.4906)	Entropy 1.73210 (1.73663)	Top-1 acc 39.844 (41.339)	Top-5 acc 64.453 (65.292)	lr 0.02384
Train [17][1850/3239]	Time 0.236 (0.544)	Data Time 0.001 (0.017)	Loss 3.4631 (3.4902)	Entropy 1.73218 (1.73661)	Top-1 acc 43.750 (41.347)	Top-5 acc 65.625 (65.299)	lr 0.02384
Train [17][1860/3239]	Time 0.207 (0.543)	Data Time 0.001 (0.017)	Loss 3.4425 (3.4902)	Entropy 1.73211 (1.73659)	Top-1 acc 39.844 (41.337)	Top-5 acc 67.188 (65.296)	lr 0.02384
Train [17][1870/3239]	Time 0.205 (0.543)	Data Time 0.001 (0.017)	Loss 3.5451 (3.4903)	Entropy 1.73207 (1.73656)	Top-1 acc 40.625 (41.333)	Top-5 acc 60.547 (65.295)	lr 0.02384
Train [17][1880/3239]	Time 2.324 (0.542)	Data Time 0.001 (0.017)	Loss 3.4153 (3.4905)	Entropy 1.73207 (1.73654)	Top-1 acc 41.406 (41.330)	Top-5 acc 65.234 (65.293)	lr 0.02384
Train [17][1890/3239]	Time 0.266 (0.541)	Data Time 0.001 (0.017)	Loss 3.3989 (3.4906)	Entropy 1.73199 (1.73651)	Top-1 acc 40.234 (41.326)	Top-5 acc 66.016 (65.289)	lr 0.02384
Train [17][1900/3239]	Time 0.176 (0.540)	Data Time 0.001 (0.017)	Loss 3.5134 (3.4904)	Entropy 1.73186 (1.73649)	Top-1 acc 41.797 (41.328)	Top-5 acc 64.453 (65.299)	lr 0.02384
Train [17][1910/3239]	Time 0.325 (0.539)	Data Time 0.001 (0.017)	Loss 3.5144 (3.4906)	Entropy 1.73181 (1.73647)	Top-1 acc 39.453 (41.324)	Top-5 acc 64.453 (65.295)	lr 0.02384
Train [17][1920/3239]	Time 0.215 (0.539)	Data Time 0.001 (0.016)	Loss 3.3930 (3.4906)	Entropy 1.73172 (1.73644)	Top-1 acc 44.922 (41.328)	Top-5 acc 65.625 (65.293)	lr 0.02384
Train [17][1930/3239]	Time 0.209 (0.538)	Data Time 0.001 (0.016)	Loss 3.4064 (3.4909)	Entropy 1.73156 (1.73642)	Top-1 acc 42.969 (41.320)	Top-5 acc 66.016 (65.280)	lr 0.02384
Train [17][1940/3239]	Time 0.249 (0.538)	Data Time 0.001 (0.016)	Loss 3.5084 (3.4910)	Entropy 1.73137 (1.73639)	Top-1 acc 40.234 (41.319)	Top-5 acc 66.406 (65.277)	lr 0.02384
Train [17][1950/3239]	Time 0.189 (0.537)	Data Time 0.001 (0.016)	Loss 3.4950 (3.4908)	Entropy 1.73128 (1.73636)	Top-1 acc 41.406 (41.318)	Top-5 acc 67.578 (65.284)	lr 0.02384
Train [17][1960/3239]	Time 0.210 (0.537)	Data Time 0.001 (0.016)	Loss 3.6190 (3.4908)	Entropy 1.73127 (1.73634)	Top-1 acc 39.062 (41.316)	Top-5 acc 61.719 (65.281)	lr 0.02384
Train [17][1970/3239]	Time 0.191 (0.536)	Data Time 0.001 (0.016)	Loss 3.3725 (3.4906)	Entropy 1.73120 (1.73631)	Top-1 acc 45.312 (41.315)	Top-5 acc 67.188 (65.284)	lr 0.02384
Train [17][1980/3239]	Time 0.337 (0.536)	Data Time 0.001 (0.016)	Loss 3.2078 (3.4903)	Entropy 1.73116 (1.73629)	Top-1 acc 47.656 (41.321)	Top-5 acc 71.484 (65.294)	lr 0.02384
Train [17][1990/3239]	Time 2.324 (0.535)	Data Time 0.001 (0.016)	Loss 3.4710 (3.4899)	Entropy 1.73116 (1.73626)	Top-1 acc 41.797 (41.327)	Top-5 acc 67.188 (65.303)	lr 0.02384
Train [17][2000/3239]	Time 0.192 (0.534)	Data Time 0.001 (0.016)	Loss 3.4145 (3.4900)	Entropy 1.73114 (1.73624)	Top-1 acc 41.016 (41.322)	Top-5 acc 66.016 (65.297)	lr 0.02384
Train [17][2010/3239]	Time 0.196 (0.533)	Data Time 0.001 (0.016)	Loss 3.6903 (3.4901)	Entropy 1.73113 (1.73621)	Top-1 acc 35.938 (41.320)	Top-5 acc 62.891 (65.298)	lr 0.02384
Train [17][2020/3239]	Time 0.207 (0.533)	Data Time 0.002 (0.016)	Loss 3.4510 (3.4903)	Entropy 1.73102 (1.73619)	Top-1 acc 44.531 (41.315)	Top-5 acc 67.969 (65.294)	lr 0.02383
Train [17][2030/3239]	Time 0.209 (0.532)	Data Time 0.001 (0.016)	Loss 3.6460 (3.4905)	Entropy 1.73090 (1.73616)	Top-1 acc 37.891 (41.309)	Top-5 acc 62.109 (65.289)	lr 0.02383
Train [17][2040/3239]	Time 0.150 (0.532)	Data Time 0.001 (0.016)	Loss 3.6092 (3.4906)	Entropy 1.73086 (1.73613)	Top-1 acc 39.844 (41.308)	Top-5 acc 62.891 (65.286)	lr 0.02383
Train [17][2050/3239]	Time 0.197 (0.531)	Data Time 0.001 (0.016)	Loss 3.6208 (3.4904)	Entropy 1.73074 (1.73611)	Top-1 acc 41.016 (41.312)	Top-5 acc 59.766 (65.292)	lr 0.02383
Train [17][2060/3239]	Time 0.498 (0.551)	Data Time 0.002 (0.015)	Loss 3.2867 (3.4903)	Entropy 1.73069 (1.73608)	Top-1 acc 47.266 (41.319)	Top-5 acc 70.312 (65.296)	lr 0.02383
Train [17][2070/3239]	Time 0.239 (0.550)	Data Time 0.002 (0.015)	Loss 3.6287 (3.4902)	Entropy 1.73069 (1.73606)	Top-1 acc 40.625 (41.326)	Top-5 acc 61.328 (65.300)	lr 0.02383
Train [17][2080/3239]	Time 0.173 (0.550)	Data Time 0.002 (0.015)	Loss 3.4959 (3.4899)	Entropy 1.73062 (1.73603)	Top-1 acc 41.016 (41.332)	Top-5 acc 66.016 (65.305)	lr 0.02383
Train [17][2090/3239]	Time 0.234 (0.549)	Data Time 0.001 (0.015)	Loss 3.4548 (3.4900)	Entropy 1.73051 (1.73600)	Top-1 acc 41.797 (41.325)	Top-5 acc 66.797 (65.304)	lr 0.02383
Train [17][2100/3239]	Time 2.310 (0.549)	Data Time 0.001 (0.015)	Loss 3.5057 (3.4898)	Entropy 1.73051 (1.73598)	Top-1 acc 40.625 (41.328)	Top-5 acc 67.188 (65.308)	lr 0.02383
Train [17][2110/3239]	Time 0.216 (0.547)	Data Time 0.002 (0.015)	Loss 3.3759 (3.4900)	Entropy 1.73052 (1.73595)	Top-1 acc 42.969 (41.324)	Top-5 acc 69.922 (65.308)	lr 0.02383
Train [17][2120/3239]	Time 0.231 (0.547)	Data Time 0.001 (0.015)	Loss 3.2736 (3.4898)	Entropy 1.73046 (1.73593)	Top-1 acc 47.656 (41.326)	Top-5 acc 69.922 (65.312)	lr 0.02383
Train [17][2130/3239]	Time 0.204 (0.546)	Data Time 0.001 (0.015)	Loss 3.6251 (3.4898)	Entropy 1.73046 (1.73590)	Top-1 acc 41.406 (41.328)	Top-5 acc 65.625 (65.313)	lr 0.02383
Train [17][2140/3239]	Time 0.291 (0.545)	Data Time 0.001 (0.015)	Loss 3.4577 (3.4897)	Entropy 1.73044 (1.73587)	Top-1 acc 42.188 (41.328)	Top-5 acc 66.797 (65.316)	lr 0.02383
Train [17][2150/3239]	Time 0.209 (0.545)	Data Time 0.001 (0.015)	Loss 3.4541 (3.4893)	Entropy 1.73043 (1.73585)	Top-1 acc 42.578 (41.331)	Top-5 acc 67.578 (65.326)	lr 0.02383
Train [17][2160/3239]	Time 0.216 (0.544)	Data Time 0.001 (0.015)	Loss 3.3615 (3.4892)	Entropy 1.73030 (1.73582)	Top-1 acc 43.750 (41.335)	Top-5 acc 67.578 (65.328)	lr 0.02383
Train [17][2170/3239]	Time 0.233 (0.544)	Data Time 0.002 (0.015)	Loss 3.5476 (3.4890)	Entropy 1.73027 (1.73580)	Top-1 acc 38.672 (41.341)	Top-5 acc 63.281 (65.333)	lr 0.02383
Train [17][2180/3239]	Time 0.214 (0.543)	Data Time 0.001 (0.015)	Loss 3.4527 (3.4886)	Entropy 1.73020 (1.73577)	Top-1 acc 40.625 (41.353)	Top-5 acc 66.797 (65.342)	lr 0.02383
Train [17][2190/3239]	Time 0.202 (0.543)	Data Time 0.001 (0.015)	Loss 3.4463 (3.4886)	Entropy 1.73010 (1.73575)	Top-1 acc 40.234 (41.357)	Top-5 acc 68.359 (65.347)	lr 0.02383
Train [17][2200/3239]	Time 0.199 (0.542)	Data Time 0.001 (0.015)	Loss 3.6702 (3.4889)	Entropy 1.73008 (1.73572)	Top-1 acc 38.281 (41.349)	Top-5 acc 59.766 (65.338)	lr 0.02383
Train [17][2210/3239]	Time 2.380 (0.542)	Data Time 0.001 (0.015)	Loss 3.4907 (3.4890)	Entropy 1.73008 (1.73570)	Top-1 acc 42.188 (41.344)	Top-5 acc 65.625 (65.332)	lr 0.02383
Train [17][2220/3239]	Time 0.212 (0.540)	Data Time 0.001 (0.015)	Loss 3.2904 (3.4890)	Entropy 1.73001 (1.73567)	Top-1 acc 45.703 (41.344)	Top-5 acc 68.750 (65.332)	lr 0.02383
Train [17][2230/3239]	Time 0.231 (0.540)	Data Time 0.001 (0.014)	Loss 3.2307 (3.4889)	Entropy 1.72997 (1.73564)	Top-1 acc 44.141 (41.347)	Top-5 acc 68.750 (65.335)	lr 0.02383
Train [17][2240/3239]	Time 0.197 (0.539)	Data Time 0.001 (0.014)	Loss 3.5087 (3.4886)	Entropy 1.72990 (1.73562)	Top-1 acc 42.969 (41.357)	Top-5 acc 65.625 (65.341)	lr 0.02383
Train [17][2250/3239]	Time 0.168 (0.539)	Data Time 0.001 (0.014)	Loss 3.4745 (3.4889)	Entropy 1.72987 (1.73559)	Top-1 acc 42.188 (41.352)	Top-5 acc 68.359 (65.337)	lr 0.02383
Train [17][2260/3239]	Time 0.210 (0.538)	Data Time 0.002 (0.014)	Loss 3.5086 (3.4888)	Entropy 1.72982 (1.73557)	Top-1 acc 40.625 (41.355)	Top-5 acc 64.453 (65.332)	lr 0.02382
Train [17][2270/3239]	Time 0.265 (0.538)	Data Time 0.002 (0.014)	Loss 3.2967 (3.4888)	Entropy 1.72976 (1.73554)	Top-1 acc 46.875 (41.359)	Top-5 acc 69.141 (65.335)	lr 0.02382
Train [17][2280/3239]	Time 0.203 (0.538)	Data Time 0.002 (0.014)	Loss 3.6856 (3.4889)	Entropy 1.72968 (1.73552)	Top-1 acc 39.844 (41.359)	Top-5 acc 63.281 (65.330)	lr 0.02382
Train [17][2290/3239]	Time 0.288 (0.537)	Data Time 0.001 (0.014)	Loss 3.4786 (3.4887)	Entropy 1.72964 (1.73549)	Top-1 acc 43.750 (41.363)	Top-5 acc 64.844 (65.333)	lr 0.02382
Train [17][2300/3239]	Time 0.236 (0.537)	Data Time 0.001 (0.014)	Loss 3.3570 (3.4885)	Entropy 1.72958 (1.73547)	Top-1 acc 44.531 (41.372)	Top-5 acc 68.750 (65.336)	lr 0.02382
Train [17][2310/3239]	Time 0.230 (0.536)	Data Time 0.001 (0.014)	Loss 3.3508 (3.4884)	Entropy 1.72954 (1.73544)	Top-1 acc 45.703 (41.369)	Top-5 acc 73.438 (65.339)	lr 0.02382
Train [17][2320/3239]	Time 2.216 (0.536)	Data Time 0.001 (0.014)	Loss 3.6099 (3.4884)	Entropy 1.72954 (1.73542)	Top-1 acc 41.797 (41.374)	Top-5 acc 60.547 (65.338)	lr 0.02382
Train [17][2330/3239]	Time 0.230 (0.534)	Data Time 0.001 (0.014)	Loss 3.6201 (3.4885)	Entropy 1.72946 (1.73539)	Top-1 acc 37.500 (41.371)	Top-5 acc 62.109 (65.333)	lr 0.02382
Train [17][2340/3239]	Time 0.220 (0.534)	Data Time 0.001 (0.014)	Loss 3.4012 (3.4884)	Entropy 1.72944 (1.73536)	Top-1 acc 41.797 (41.378)	Top-5 acc 68.359 (65.334)	lr 0.02382
Train [17][2350/3239]	Time 0.211 (0.533)	Data Time 0.001 (0.014)	Loss 3.5400 (3.4885)	Entropy 1.72932 (1.73534)	Top-1 acc 41.797 (41.376)	Top-5 acc 62.109 (65.330)	lr 0.02382
Train [17][2360/3239]	Time 0.274 (0.533)	Data Time 0.001 (0.014)	Loss 3.4855 (3.4885)	Entropy 1.72925 (1.73531)	Top-1 acc 43.359 (41.374)	Top-5 acc 65.625 (65.327)	lr 0.02382
Train [17][2370/3239]	Time 0.209 (0.533)	Data Time 0.001 (0.014)	Loss 3.7032 (3.4886)	Entropy 1.72918 (1.73529)	Top-1 acc 35.547 (41.376)	Top-5 acc 60.938 (65.327)	lr 0.02382
Train [17][2380/3239]	Time 0.196 (0.532)	Data Time 0.001 (0.014)	Loss 3.3027 (3.4885)	Entropy 1.72912 (1.73526)	Top-1 acc 46.094 (41.382)	Top-5 acc 66.016 (65.327)	lr 0.02382
Train [17][2390/3239]	Time 0.192 (0.532)	Data Time 0.001 (0.014)	Loss 3.4465 (3.4882)	Entropy 1.72907 (1.73524)	Top-1 acc 40.234 (41.391)	Top-5 acc 66.406 (65.333)	lr 0.02382
Train [17][2400/3239]	Time 0.162 (0.531)	Data Time 0.001 (0.014)	Loss 3.8929 (3.4881)	Entropy 1.72900 (1.73521)	Top-1 acc 35.156 (41.391)	Top-5 acc 57.422 (65.335)	lr 0.02382
Train [17][2410/3239]	Time 0.208 (0.531)	Data Time 0.001 (0.014)	Loss 3.3361 (3.4880)	Entropy 1.72899 (1.73518)	Top-1 acc 40.625 (41.392)	Top-5 acc 68.359 (65.338)	lr 0.02382
Train [17][2420/3239]	Time 0.486 (0.547)	Data Time 0.005 (0.014)	Loss 3.4852 (3.4879)	Entropy 1.72895 (1.73516)	Top-1 acc 42.188 (41.393)	Top-5 acc 64.453 (65.342)	lr 0.02382
Train [17][2430/3239]	Time 2.420 (0.547)	Data Time 0.002 (0.013)	Loss 3.5670 (3.4879)	Entropy 1.72895 (1.73513)	Top-1 acc 39.844 (41.395)	Top-5 acc 62.109 (65.341)	lr 0.02382
Train [17][2440/3239]	Time 0.154 (0.546)	Data Time 0.002 (0.013)	Loss 3.5622 (3.4876)	Entropy 1.72890 (1.73511)	Top-1 acc 40.625 (41.404)	Top-5 acc 64.062 (65.348)	lr 0.02382
Train [17][2450/3239]	Time 0.276 (0.545)	Data Time 0.002 (0.013)	Loss 3.3910 (3.4875)	Entropy 1.72884 (1.73508)	Top-1 acc 41.797 (41.403)	Top-5 acc 65.234 (65.349)	lr 0.02382
Train [17][2460/3239]	Time 0.230 (0.545)	Data Time 0.001 (0.013)	Loss 3.6400 (3.4874)	Entropy 1.72882 (1.73506)	Top-1 acc 39.062 (41.402)	Top-5 acc 60.547 (65.349)	lr 0.02382
Train [17][2470/3239]	Time 0.176 (0.544)	Data Time 0.002 (0.013)	Loss 3.5171 (3.4876)	Entropy 1.72880 (1.73503)	Top-1 acc 37.891 (41.400)	Top-5 acc 65.234 (65.344)	lr 0.02382
Train [17][2480/3239]	Time 0.208 (0.544)	Data Time 0.001 (0.013)	Loss 3.6435 (3.4876)	Entropy 1.72884 (1.73501)	Top-1 acc 39.844 (41.398)	Top-5 acc 62.109 (65.342)	lr 0.02382
Train [17][2490/3239]	Time 0.294 (0.544)	Data Time 0.001 (0.013)	Loss 3.4113 (3.4879)	Entropy 1.72879 (1.73498)	Top-1 acc 44.141 (41.392)	Top-5 acc 68.359 (65.334)	lr 0.02381
Train [17][2500/3239]	Time 0.213 (0.543)	Data Time 0.001 (0.013)	Loss 3.5334 (3.4877)	Entropy 1.72870 (1.73496)	Top-1 acc 42.969 (41.400)	Top-5 acc 65.234 (65.341)	lr 0.02381
Train [17][2510/3239]	Time 0.208 (0.543)	Data Time 0.001 (0.013)	Loss 3.5479 (3.4878)	Entropy 1.72861 (1.73493)	Top-1 acc 42.969 (41.399)	Top-5 acc 64.844 (65.339)	lr 0.02381
Train [17][2520/3239]	Time 0.224 (0.542)	Data Time 0.001 (0.013)	Loss 3.3416 (3.4879)	Entropy 1.72860 (1.73491)	Top-1 acc 43.750 (41.396)	Top-5 acc 69.141 (65.336)	lr 0.02381
Train [17][2530/3239]	Time 0.258 (0.542)	Data Time 0.001 (0.013)	Loss 3.8320 (3.4883)	Entropy 1.72857 (1.73488)	Top-1 acc 33.594 (41.384)	Top-5 acc 58.203 (65.326)	lr 0.02381
Train [17][2540/3239]	Time 2.146 (0.541)	Data Time 0.001 (0.013)	Loss 3.3965 (3.4885)	Entropy 1.72857 (1.73486)	Top-1 acc 41.406 (41.381)	Top-5 acc 66.797 (65.322)	lr 0.02381
Train [17][2550/3239]	Time 0.313 (0.540)	Data Time 0.002 (0.013)	Loss 3.3322 (3.4884)	Entropy 1.72854 (1.73483)	Top-1 acc 41.797 (41.382)	Top-5 acc 70.312 (65.322)	lr 0.02381
Train [17][2560/3239]	Time 0.215 (0.540)	Data Time 0.001 (0.013)	Loss 3.3698 (3.4882)	Entropy 1.72849 (1.73481)	Top-1 acc 44.922 (41.387)	Top-5 acc 66.016 (65.324)	lr 0.02381
Train [17][2570/3239]	Time 0.168 (0.539)	Data Time 0.001 (0.013)	Loss 3.6756 (3.4883)	Entropy 1.72837 (1.73478)	Top-1 acc 37.109 (41.387)	Top-5 acc 62.891 (65.323)	lr 0.02381
Train [17][2580/3239]	Time 0.205 (0.539)	Data Time 0.001 (0.013)	Loss 3.5433 (3.4884)	Entropy 1.72830 (1.73476)	Top-1 acc 37.891 (41.385)	Top-5 acc 63.281 (65.325)	lr 0.02381
Train [17][2590/3239]	Time 0.177 (0.538)	Data Time 0.001 (0.013)	Loss 3.5128 (3.4885)	Entropy 1.72826 (1.73473)	Top-1 acc 37.891 (41.387)	Top-5 acc 69.922 (65.325)	lr 0.02381
Train [17][2600/3239]	Time 0.206 (0.538)	Data Time 0.001 (0.013)	Loss 3.6165 (3.4886)	Entropy 1.72818 (1.73471)	Top-1 acc 38.281 (41.384)	Top-5 acc 62.500 (65.320)	lr 0.02381
Train [17][2610/3239]	Time 0.227 (0.538)	Data Time 0.001 (0.013)	Loss 3.5238 (3.4885)	Entropy 1.72813 (1.73468)	Top-1 acc 42.578 (41.387)	Top-5 acc 62.891 (65.323)	lr 0.02381
Train [17][2620/3239]	Time 0.214 (0.537)	Data Time 0.001 (0.013)	Loss 3.5423 (3.4885)	Entropy 1.72809 (1.73466)	Top-1 acc 39.453 (41.383)	Top-5 acc 66.016 (65.327)	lr 0.02381
Train [17][2630/3239]	Time 0.194 (0.537)	Data Time 0.001 (0.013)	Loss 3.5504 (3.4885)	Entropy 1.72796 (1.73463)	Top-1 acc 39.453 (41.380)	Top-5 acc 65.234 (65.324)	lr 0.02381
Train [17][2640/3239]	Time 0.184 (0.536)	Data Time 0.001 (0.013)	Loss 3.5472 (3.4887)	Entropy 1.72793 (1.73461)	Top-1 acc 35.156 (41.378)	Top-5 acc 62.891 (65.323)	lr 0.02381
Train [17][2650/3239]	Time 0.248 (0.536)	Data Time 0.001 (0.013)	Loss 3.3298 (3.4888)	Entropy 1.72787 (1.73458)	Top-1 acc 46.875 (41.378)	Top-5 acc 71.094 (65.321)	lr 0.02381
Train [17][2660/3239]	Time 0.187 (0.535)	Data Time 0.001 (0.013)	Loss 3.5072 (3.4888)	Entropy 1.72781 (1.73456)	Top-1 acc 39.453 (41.377)	Top-5 acc 65.234 (65.322)	lr 0.02381
Train [17][2670/3239]	Time 0.186 (0.535)	Data Time 0.001 (0.012)	Loss 3.3199 (3.4886)	Entropy 1.72777 (1.73453)	Top-1 acc 40.625 (41.376)	Top-5 acc 68.750 (65.325)	lr 0.02381
Train [17][2680/3239]	Time 0.229 (0.535)	Data Time 0.001 (0.012)	Loss 3.5546 (3.4888)	Entropy 1.72772 (1.73451)	Top-1 acc 38.672 (41.375)	Top-5 acc 66.406 (65.322)	lr 0.02381
Train [17][2690/3239]	Time 0.321 (0.534)	Data Time 0.001 (0.012)	Loss 3.3683 (3.4883)	Entropy 1.72762 (1.73448)	Top-1 acc 44.922 (41.387)	Top-5 acc 65.625 (65.333)	lr 0.02381
Train [17][2700/3239]	Time 0.246 (0.534)	Data Time 0.001 (0.012)	Loss 3.6133 (3.4887)	Entropy 1.72760 (1.73446)	Top-1 acc 38.672 (41.379)	Top-5 acc 61.719 (65.323)	lr 0.02381
Train [17][2710/3239]	Time 0.201 (0.534)	Data Time 0.001 (0.012)	Loss 3.4787 (3.4888)	Entropy 1.72759 (1.73443)	Top-1 acc 39.453 (41.374)	Top-5 acc 66.406 (65.318)	lr 0.02381
Train [17][2720/3239]	Time 0.221 (0.533)	Data Time 0.001 (0.012)	Loss 3.7374 (3.4891)	Entropy 1.72758 (1.73440)	Top-1 acc 33.984 (41.371)	Top-5 acc 57.422 (65.313)	lr 0.02380
Train [17][2730/3239]	Time 0.260 (0.533)	Data Time 0.001 (0.012)	Loss 3.4493 (3.4891)	Entropy 1.72750 (1.73438)	Top-1 acc 41.797 (41.372)	Top-5 acc 63.672 (65.312)	lr 0.02380
Train [17][2740/3239]	Time 0.280 (0.532)	Data Time 0.001 (0.012)	Loss 3.6236 (3.4890)	Entropy 1.72733 (1.73435)	Top-1 acc 38.281 (41.377)	Top-5 acc 62.500 (65.314)	lr 0.02380
Train [17][2750/3239]	Time 0.234 (0.532)	Data Time 0.002 (0.012)	Loss 3.4485 (3.4889)	Entropy 1.72726 (1.73433)	Top-1 acc 41.016 (41.378)	Top-5 acc 67.188 (65.314)	lr 0.02380
Train [17][2760/3239]	Time 0.215 (0.532)	Data Time 0.001 (0.012)	Loss 3.6295 (3.4890)	Entropy 1.72717 (1.73430)	Top-1 acc 39.453 (41.372)	Top-5 acc 61.719 (65.312)	lr 0.02380
Train [17][2770/3239]	Time 0.494 (0.545)	Data Time 0.006 (0.012)	Loss 3.3659 (3.4889)	Entropy 1.72713 (1.73428)	Top-1 acc 44.531 (41.374)	Top-5 acc 68.750 (65.319)	lr 0.02380
Train [17][2780/3239]	Time 0.223 (0.545)	Data Time 0.002 (0.012)	Loss 3.6591 (3.4888)	Entropy 1.72703 (1.73425)	Top-1 acc 38.281 (41.372)	Top-5 acc 60.547 (65.321)	lr 0.02380
Train [17][2790/3239]	Time 0.209 (0.545)	Data Time 0.002 (0.012)	Loss 3.4009 (3.4890)	Entropy 1.72698 (1.73423)	Top-1 acc 45.703 (41.368)	Top-5 acc 66.797 (65.313)	lr 0.02380
Train [17][2800/3239]	Time 0.222 (0.545)	Data Time 0.003 (0.012)	Loss 3.5337 (3.4889)	Entropy 1.72697 (1.73420)	Top-1 acc 40.234 (41.371)	Top-5 acc 66.016 (65.316)	lr 0.02380
Train [17][2810/3239]	Time 0.240 (0.544)	Data Time 0.001 (0.012)	Loss 3.3710 (3.4890)	Entropy 1.72696 (1.73417)	Top-1 acc 44.141 (41.369)	Top-5 acc 67.188 (65.317)	lr 0.02380
Train [17][2820/3239]	Time 0.189 (0.544)	Data Time 0.001 (0.012)	Loss 3.5346 (3.4889)	Entropy 1.72692 (1.73415)	Top-1 acc 36.719 (41.372)	Top-5 acc 63.281 (65.318)	lr 0.02380
Train [17][2830/3239]	Time 0.236 (0.544)	Data Time 0.002 (0.012)	Loss 3.5121 (3.4888)	Entropy 1.72691 (1.73412)	Top-1 acc 41.406 (41.373)	Top-5 acc 66.797 (65.319)	lr 0.02380
Train [17][2840/3239]	Time 0.291 (0.543)	Data Time 0.001 (0.012)	Loss 3.2118 (3.4886)	Entropy 1.72687 (1.73410)	Top-1 acc 50.000 (41.382)	Top-5 acc 71.484 (65.324)	lr 0.02380
Train [17][2850/3239]	Time 0.270 (0.543)	Data Time 0.001 (0.012)	Loss 3.4813 (3.4889)	Entropy 1.72684 (1.73407)	Top-1 acc 41.797 (41.376)	Top-5 acc 67.969 (65.317)	lr 0.02380
Train [17][2860/3239]	Time 0.185 (0.542)	Data Time 0.001 (0.012)	Loss 3.4375 (3.4888)	Entropy 1.72678 (1.73405)	Top-1 acc 43.750 (41.377)	Top-5 acc 65.234 (65.317)	lr 0.02380
Train [17][2870/3239]	Time 0.136 (0.542)	Data Time 0.001 (0.012)	Loss 3.4069 (3.4888)	Entropy 1.72679 (1.73402)	Top-1 acc 44.922 (41.380)	Top-5 acc 65.234 (65.318)	lr 0.02380
Train [17][2880/3239]	Time 0.240 (0.542)	Data Time 0.001 (0.012)	Loss 3.4460 (3.4888)	Entropy 1.72684 (1.73400)	Top-1 acc 43.359 (41.382)	Top-5 acc 67.188 (65.320)	lr 0.02380
Train [17][2890/3239]	Time 0.199 (0.541)	Data Time 0.001 (0.012)	Loss 3.7055 (3.4886)	Entropy 1.72677 (1.73397)	Top-1 acc 37.891 (41.387)	Top-5 acc 61.328 (65.320)	lr 0.02380
Train [17][2900/3239]	Time 0.218 (0.541)	Data Time 0.001 (0.012)	Loss 3.4277 (3.4887)	Entropy 1.72673 (1.73395)	Top-1 acc 42.578 (41.388)	Top-5 acc 67.969 (65.318)	lr 0.02380
Train [17][2910/3239]	Time 0.182 (0.540)	Data Time 0.001 (0.012)	Loss 3.5981 (3.4888)	Entropy 1.72666 (1.73392)	Top-1 acc 43.359 (41.391)	Top-5 acc 67.969 (65.316)	lr 0.02380
Train [17][2920/3239]	Time 0.197 (0.540)	Data Time 0.001 (0.012)	Loss 3.4493 (3.4888)	Entropy 1.72659 (1.73390)	Top-1 acc 45.703 (41.388)	Top-5 acc 68.359 (65.314)	lr 0.02380
Train [17][2930/3239]	Time 0.212 (0.540)	Data Time 0.001 (0.012)	Loss 3.2696 (3.4888)	Entropy 1.72653 (1.73387)	Top-1 acc 47.266 (41.393)	Top-5 acc 70.703 (65.316)	lr 0.02380
Train [17][2940/3239]	Time 0.143 (0.539)	Data Time 0.001 (0.012)	Loss 3.7194 (3.4888)	Entropy 1.72649 (1.73385)	Top-1 acc 34.766 (41.389)	Top-5 acc 56.250 (65.313)	lr 0.02380
Train [17][2950/3239]	Time 0.205 (0.539)	Data Time 0.001 (0.011)	Loss 3.3395 (3.4886)	Entropy 1.72650 (1.73382)	Top-1 acc 48.438 (41.395)	Top-5 acc 65.625 (65.314)	lr 0.02379
Train [17][2960/3239]	Time 0.197 (0.538)	Data Time 0.001 (0.011)	Loss 3.4192 (3.4885)	Entropy 1.72648 (1.73380)	Top-1 acc 40.234 (41.394)	Top-5 acc 68.750 (65.316)	lr 0.02379
Train [17][2970/3239]	Time 0.264 (0.538)	Data Time 0.002 (0.011)	Loss 3.6423 (3.4887)	Entropy 1.72640 (1.73377)	Top-1 acc 37.109 (41.386)	Top-5 acc 62.891 (65.308)	lr 0.02379
Train [17][2980/3239]	Time 0.235 (0.538)	Data Time 0.002 (0.011)	Loss 3.6255 (3.4889)	Entropy 1.72635 (1.73375)	Top-1 acc 38.281 (41.384)	Top-5 acc 62.500 (65.303)	lr 0.02379
Train [17][2990/3239]	Time 0.237 (0.537)	Data Time 0.001 (0.011)	Loss 3.5770 (3.4890)	Entropy 1.72625 (1.73372)	Top-1 acc 42.188 (41.389)	Top-5 acc 64.453 (65.302)	lr 0.02379
Train [17][3000/3239]	Time 0.210 (0.537)	Data Time 0.001 (0.011)	Loss 3.3457 (3.4889)	Entropy 1.72621 (1.73370)	Top-1 acc 41.797 (41.387)	Top-5 acc 69.922 (65.305)	lr 0.02379
Train [17][3010/3239]	Time 0.248 (0.537)	Data Time 0.001 (0.011)	Loss 3.3058 (3.4888)	Entropy 1.72622 (1.73367)	Top-1 acc 43.359 (41.389)	Top-5 acc 69.141 (65.309)	lr 0.02379
Train [17][3020/3239]	Time 0.260 (0.536)	Data Time 0.001 (0.011)	Loss 3.3197 (3.4888)	Entropy 1.72614 (1.73365)	Top-1 acc 43.750 (41.393)	Top-5 acc 68.359 (65.307)	lr 0.02379
Train [17][3030/3239]	Time 0.235 (0.536)	Data Time 0.001 (0.011)	Loss 3.6040 (3.4889)	Entropy 1.72615 (1.73362)	Top-1 acc 35.938 (41.391)	Top-5 acc 65.625 (65.305)	lr 0.02379
Train [17][3040/3239]	Time 0.196 (0.536)	Data Time 0.001 (0.011)	Loss 3.5184 (3.4890)	Entropy 1.72616 (1.73360)	Top-1 acc 41.797 (41.392)	Top-5 acc 64.062 (65.305)	lr 0.02379
Train [17][3050/3239]	Time 0.228 (0.535)	Data Time 0.002 (0.011)	Loss 3.3913 (3.4889)	Entropy 1.72611 (1.73358)	Top-1 acc 41.406 (41.395)	Top-5 acc 65.234 (65.303)	lr 0.02379
Train [17][3060/3239]	Time 0.172 (0.535)	Data Time 0.001 (0.011)	Loss 3.4034 (3.4888)	Entropy 1.72598 (1.73355)	Top-1 acc 39.062 (41.390)	Top-5 acc 66.797 (65.302)	lr 0.02379
Train [17][3070/3239]	Time 0.270 (0.535)	Data Time 0.001 (0.011)	Loss 3.5427 (3.4888)	Entropy 1.72582 (1.73353)	Top-1 acc 42.188 (41.393)	Top-5 acc 67.578 (65.304)	lr 0.02379
Train [17][3080/3239]	Time 0.234 (0.534)	Data Time 0.001 (0.011)	Loss 3.5231 (3.4890)	Entropy 1.72583 (1.73350)	Top-1 acc 40.234 (41.391)	Top-5 acc 66.016 (65.298)	lr 0.02379
Train [17][3090/3239]	Time 0.213 (0.534)	Data Time 0.001 (0.011)	Loss 3.5340 (3.4891)	Entropy 1.72575 (1.73348)	Top-1 acc 41.797 (41.393)	Top-5 acc 64.062 (65.298)	lr 0.02379
Train [17][3100/3239]	Time 0.291 (0.547)	Data Time 0.004 (0.011)	Loss 3.6368 (3.4888)	Entropy 1.72571 (1.73345)	Top-1 acc 37.891 (41.401)	Top-5 acc 62.109 (65.304)	lr 0.02379
Train [17][3110/3239]	Time 0.261 (0.547)	Data Time 0.002 (0.011)	Loss 3.5707 (3.4890)	Entropy 1.72557 (1.73343)	Top-1 acc 39.062 (41.395)	Top-5 acc 64.062 (65.299)	lr 0.02379
Train [17][3120/3239]	Time 0.343 (0.547)	Data Time 0.003 (0.011)	Loss 3.4576 (3.4888)	Entropy 1.72557 (1.73340)	Top-1 acc 46.484 (41.401)	Top-5 acc 65.625 (65.302)	lr 0.02379
Train [17][3130/3239]	Time 0.208 (0.546)	Data Time 0.001 (0.011)	Loss 3.3443 (3.4888)	Entropy 1.72548 (1.73338)	Top-1 acc 44.141 (41.402)	Top-5 acc 69.531 (65.303)	lr 0.02379
Train [17][3140/3239]	Time 0.244 (0.546)	Data Time 0.001 (0.011)	Loss 3.4318 (3.4887)	Entropy 1.72540 (1.73335)	Top-1 acc 40.625 (41.402)	Top-5 acc 65.625 (65.303)	lr 0.02379
Train [17][3150/3239]	Time 0.205 (0.546)	Data Time 0.001 (0.011)	Loss 3.4728 (3.4887)	Entropy 1.72534 (1.73333)	Top-1 acc 43.750 (41.405)	Top-5 acc 65.625 (65.306)	lr 0.02379
Train [17][3160/3239]	Time 0.265 (0.545)	Data Time 0.001 (0.011)	Loss 3.3035 (3.4884)	Entropy 1.72532 (1.73330)	Top-1 acc 49.609 (41.413)	Top-5 acc 68.750 (65.315)	lr 0.02379
Train [17][3170/3239]	Time 0.229 (0.545)	Data Time 0.001 (0.011)	Loss 3.4402 (3.4885)	Entropy 1.72527 (1.73328)	Top-1 acc 39.844 (41.409)	Top-5 acc 67.188 (65.314)	lr 0.02379
Train [17][3180/3239]	Time 0.200 (0.545)	Data Time 0.000 (0.011)	Loss 3.4872 (3.4883)	Entropy 1.72521 (1.73325)	Top-1 acc 41.406 (41.412)	Top-5 acc 66.406 (65.318)	lr 0.02378
Train [17][3190/3239]	Time 0.215 (0.544)	Data Time 0.000 (0.011)	Loss 3.3694 (3.4881)	Entropy 1.72516 (1.73323)	Top-1 acc 39.062 (41.411)	Top-5 acc 69.531 (65.321)	lr 0.02378
Train [17][3200/3239]	Time 0.215 (0.544)	Data Time 0.000 (0.011)	Loss 3.4639 (3.4883)	Entropy 1.72508 (1.73320)	Top-1 acc 40.625 (41.410)	Top-5 acc 62.891 (65.318)	lr 0.02378
Train [17][3210/3239]	Time 0.193 (0.544)	Data Time 0.000 (0.011)	Loss 3.5507 (3.4883)	Entropy 1.72504 (1.73317)	Top-1 acc 41.406 (41.413)	Top-5 acc 66.016 (65.317)	lr 0.02378
Train [17][3220/3239]	Time 0.210 (0.543)	Data Time 0.000 (0.011)	Loss 3.3498 (3.4882)	Entropy 1.72497 (1.73315)	Top-1 acc 46.484 (41.418)	Top-5 acc 66.797 (65.320)	lr 0.02378
Train [17][3230/3239]	Time 0.213 (0.543)	Data Time 0.003 (0.011)	Loss 3.4568 (3.4880)	Entropy 1.72488 (1.73312)	Top-1 acc 43.750 (41.423)	Top-5 acc 63.672 (65.328)	lr 0.02378
Train [17][3239/3239]	Time 2.070 (0.542)	Data Time 0.000 (0.011)	Loss 3.4266 (3.4880)	Entropy 1.72488 (1.73310)	Top-1 acc 44.444 (41.426)	Top-5 acc 67.901 (65.327)	lr 0.02378
==========Valid [17/120]	loss 2.267	top-1 acc 50.350 (50.350)	top-5 acc 74.303	Train top-1 41.426	top-5 65.327	Entropy 1.72488	Latency-None: 0.000ms	Flops: 530.60M
Train [18][0/3239]	Time 28.855 (28.855)	Data Time 28.004 (28.004)	Loss 3.6542 (3.6542)	Entropy 1.72482 (1.72482)	Top-1 acc 40.234 (40.234)	Top-5 acc 64.062 (64.062)	lr 0.02378
Train [18][10/3239]	Time 2.514 (3.189)	Data Time 0.002 (2.576)	Loss 3.6294 (3.4592)	Entropy 1.72482 (1.72482)	Top-1 acc 37.500 (41.584)	Top-5 acc 60.156 (66.193)	lr 0.02378
Train [18][20/3239]	Time 0.255 (1.782)	Data Time 0.002 (1.350)	Loss 3.2918 (3.4343)	Entropy 1.72475 (1.72478)	Top-1 acc 46.484 (43.099)	Top-5 acc 69.531 (66.611)	lr 0.02378
Train [18][30/3239]	Time 0.334 (1.353)	Data Time 0.001 (0.916)	Loss 3.3448 (3.4269)	Entropy 1.72469 (1.72476)	Top-1 acc 41.797 (42.931)	Top-5 acc 69.141 (66.835)	lr 0.02378
Train [18][40/3239]	Time 0.221 (1.130)	Data Time 0.001 (0.693)	Loss 3.4803 (3.4436)	Entropy 1.72464 (1.72473)	Top-1 acc 45.312 (42.511)	Top-5 acc 64.844 (66.168)	lr 0.02378
Train [18][50/3239]	Time 0.199 (1.001)	Data Time 0.001 (0.558)	Loss 3.4674 (3.4429)	Entropy 1.72463 (1.72471)	Top-1 acc 40.234 (42.563)	Top-5 acc 66.797 (66.345)	lr 0.02378
Train [18][60/3239]	Time 0.204 (0.909)	Data Time 0.001 (0.467)	Loss 3.8431 (3.4537)	Entropy 1.72462 (1.72470)	Top-1 acc 37.500 (42.495)	Top-5 acc 58.594 (66.124)	lr 0.02378
Train [18][70/3239]	Time 0.199 (0.841)	Data Time 0.002 (0.401)	Loss 3.4593 (3.4586)	Entropy 1.72457 (1.72468)	Top-1 acc 45.312 (42.589)	Top-5 acc 65.625 (66.005)	lr 0.02378
Train [18][80/3239]	Time 0.189 (0.791)	Data Time 0.001 (0.352)	Loss 3.4747 (3.4634)	Entropy 1.72451 (1.72467)	Top-1 acc 43.750 (42.376)	Top-5 acc 64.453 (65.890)	lr 0.02378
Train [18][90/3239]	Time 0.259 (0.752)	Data Time 0.001 (0.314)	Loss 3.6841 (3.4633)	Entropy 1.72445 (1.72465)	Top-1 acc 36.719 (42.291)	Top-5 acc 61.328 (65.891)	lr 0.02378
Train [18][100/3239]	Time 0.284 (0.722)	Data Time 0.002 (0.283)	Loss 3.2954 (3.4569)	Entropy 1.72442 (1.72463)	Top-1 acc 43.359 (42.362)	Top-5 acc 70.703 (66.035)	lr 0.02378
Train [18][110/3239]	Time 0.292 (0.696)	Data Time 0.001 (0.258)	Loss 3.3373 (3.4544)	Entropy 1.72443 (1.72461)	Top-1 acc 43.750 (42.367)	Top-5 acc 68.750 (66.093)	lr 0.02378
Train [18][120/3239]	Time 2.313 (0.675)	Data Time 0.001 (0.236)	Loss 3.2939 (3.4510)	Entropy 1.72443 (1.72460)	Top-1 acc 44.922 (42.365)	Top-5 acc 69.922 (66.190)	lr 0.02378
Train [18][130/3239]	Time 0.224 (0.640)	Data Time 0.001 (0.218)	Loss 3.3718 (3.4473)	Entropy 1.72429 (1.72457)	Top-1 acc 43.750 (42.441)	Top-5 acc 69.531 (66.248)	lr 0.02378
Train [18][140/3239]	Time 0.201 (0.626)	Data Time 0.001 (0.203)	Loss 3.4246 (3.4483)	Entropy 1.72417 (1.72454)	Top-1 acc 41.016 (42.395)	Top-5 acc 65.625 (66.223)	lr 0.02378
Train [18][150/3239]	Time 0.221 (0.612)	Data Time 0.001 (0.190)	Loss 3.4034 (3.4473)	Entropy 1.72409 (1.72452)	Top-1 acc 43.750 (42.309)	Top-5 acc 66.406 (66.189)	lr 0.02378
Train [18][160/3239]	Time 0.231 (0.601)	Data Time 0.001 (0.178)	Loss 3.2872 (3.4465)	Entropy 1.72422 (1.72449)	Top-1 acc 42.578 (42.241)	Top-5 acc 67.578 (66.166)	lr 0.02378
Train [18][170/3239]	Time 0.308 (0.592)	Data Time 0.001 (0.168)	Loss 3.4809 (3.4474)	Entropy 1.72419 (1.72448)	Top-1 acc 44.141 (42.247)	Top-5 acc 64.062 (66.128)	lr 0.02377
Train [18][180/3239]	Time 0.200 (0.582)	Data Time 0.001 (0.159)	Loss 3.3253 (3.4459)	Entropy 1.72418 (1.72446)	Top-1 acc 46.094 (42.330)	Top-5 acc 67.969 (66.171)	lr 0.02377
Train [18][190/3239]	Time 0.201 (0.574)	Data Time 0.001 (0.150)	Loss 3.2834 (3.4433)	Entropy 1.72413 (1.72445)	Top-1 acc 44.531 (42.370)	Top-5 acc 70.312 (66.237)	lr 0.02377
Train [18][200/3239]	Time 0.207 (0.567)	Data Time 0.001 (0.143)	Loss 3.3899 (3.4426)	Entropy 1.72408 (1.72443)	Top-1 acc 42.188 (42.357)	Top-5 acc 64.844 (66.214)	lr 0.02377
Train [18][210/3239]	Time 0.253 (0.769)	Data Time 0.003 (0.136)	Loss 3.3291 (3.4430)	Entropy 1.72405 (1.72441)	Top-1 acc 44.141 (42.373)	Top-5 acc 71.875 (66.230)	lr 0.02377
Train [18][220/3239]	Time 0.219 (0.755)	Data Time 0.002 (0.130)	Loss 3.6635 (3.4429)	Entropy 1.72391 (1.72439)	Top-1 acc 36.719 (42.357)	Top-5 acc 62.891 (66.235)	lr 0.02377
Train [18][230/3239]	Time 2.317 (0.741)	Data Time 0.002 (0.125)	Loss 3.5149 (3.4463)	Entropy 1.72391 (1.72437)	Top-1 acc 43.750 (42.291)	Top-5 acc 66.406 (66.153)	lr 0.02377
Train [18][240/3239]	Time 0.182 (0.719)	Data Time 0.002 (0.120)	Loss 3.4688 (3.4453)	Entropy 1.72389 (1.72435)	Top-1 acc 40.625 (42.306)	Top-5 acc 66.016 (66.186)	lr 0.02377
Train [18][250/3239]	Time 0.298 (0.708)	Data Time 0.001 (0.115)	Loss 3.4686 (3.4470)	Entropy 1.72386 (1.72433)	Top-1 acc 38.281 (42.248)	Top-5 acc 67.578 (66.114)	lr 0.02377
Train [18][260/3239]	Time 0.218 (0.697)	Data Time 0.001 (0.111)	Loss 3.5239 (3.4467)	Entropy 1.72383 (1.72431)	Top-1 acc 42.578 (42.282)	Top-5 acc 62.500 (66.072)	lr 0.02377
Train [18][270/3239]	Time 0.210 (0.687)	Data Time 0.002 (0.107)	Loss 3.3883 (3.4479)	Entropy 1.72382 (1.72430)	Top-1 acc 44.141 (42.273)	Top-5 acc 67.578 (66.046)	lr 0.02377
Train [18][280/3239]	Time 0.222 (0.678)	Data Time 0.002 (0.103)	Loss 3.4232 (3.4508)	Entropy 1.72374 (1.72428)	Top-1 acc 42.578 (42.211)	Top-5 acc 63.672 (65.992)	lr 0.02377
Train [18][290/3239]	Time 0.161 (0.669)	Data Time 0.001 (0.100)	Loss 3.4412 (3.4506)	Entropy 1.72370 (1.72426)	Top-1 acc 42.578 (42.237)	Top-5 acc 71.094 (66.038)	lr 0.02377
Train [18][300/3239]	Time 0.204 (0.661)	Data Time 0.001 (0.097)	Loss 3.4164 (3.4515)	Entropy 1.72359 (1.72424)	Top-1 acc 41.406 (42.202)	Top-5 acc 67.969 (66.005)	lr 0.02377
Train [18][310/3239]	Time 0.314 (0.654)	Data Time 0.001 (0.093)	Loss 3.4453 (3.4508)	Entropy 1.72354 (1.72422)	Top-1 acc 39.844 (42.224)	Top-5 acc 68.750 (66.018)	lr 0.02377
Train [18][320/3239]	Time 0.213 (0.647)	Data Time 0.001 (0.091)	Loss 3.7002 (3.4543)	Entropy 1.72352 (1.72420)	Top-1 acc 39.453 (42.195)	Top-5 acc 60.547 (65.939)	lr 0.02377
Train [18][330/3239]	Time 0.207 (0.641)	Data Time 0.001 (0.088)	Loss 3.5017 (3.4530)	Entropy 1.72347 (1.72418)	Top-1 acc 41.406 (42.218)	Top-5 acc 62.500 (65.940)	lr 0.02377
Train [18][340/3239]	Time 2.375 (0.635)	Data Time 0.001 (0.085)	Loss 3.5553 (3.4532)	Entropy 1.72347 (1.72416)	Top-1 acc 39.062 (42.225)	Top-5 acc 62.109 (65.921)	lr 0.02377
Train [18][350/3239]	Time 0.188 (0.623)	Data Time 0.002 (0.083)	Loss 3.4334 (3.4526)	Entropy 1.72345 (1.72413)	Top-1 acc 44.922 (42.208)	Top-5 acc 70.703 (65.956)	lr 0.02377
Train [18][360/3239]	Time 0.233 (0.618)	Data Time 0.002 (0.081)	Loss 3.5404 (3.4508)	Entropy 1.72340 (1.72411)	Top-1 acc 39.062 (42.246)	Top-5 acc 66.406 (65.984)	lr 0.02377
Train [18][370/3239]	Time 0.200 (0.613)	Data Time 0.001 (0.079)	Loss 3.3409 (3.4502)	Entropy 1.72337 (1.72409)	Top-1 acc 47.656 (42.278)	Top-5 acc 68.359 (65.992)	lr 0.02377
Train [18][380/3239]	Time 0.295 (0.608)	Data Time 0.001 (0.077)	Loss 3.3818 (3.4508)	Entropy 1.72333 (1.72408)	Top-1 acc 41.797 (42.246)	Top-5 acc 70.312 (65.977)	lr 0.02377
Train [18][390/3239]	Time 0.217 (0.604)	Data Time 0.001 (0.075)	Loss 3.2360 (3.4498)	Entropy 1.72329 (1.72406)	Top-1 acc 44.531 (42.237)	Top-5 acc 69.531 (66.009)	lr 0.02377
Train [18][400/3239]	Time 0.231 (0.600)	Data Time 0.001 (0.073)	Loss 3.2224 (3.4490)	Entropy 1.72323 (1.72404)	Top-1 acc 48.047 (42.268)	Top-5 acc 69.141 (66.000)	lr 0.02376
Train [18][410/3239]	Time 0.208 (0.596)	Data Time 0.001 (0.071)	Loss 3.4187 (3.4476)	Entropy 1.72319 (1.72402)	Top-1 acc 42.578 (42.303)	Top-5 acc 67.969 (66.032)	lr 0.02376
Train [18][420/3239]	Time 0.228 (0.592)	Data Time 0.001 (0.069)	Loss 3.4909 (3.4493)	Entropy 1.72317 (1.72400)	Top-1 acc 46.094 (42.269)	Top-5 acc 64.062 (66.016)	lr 0.02376
Train [18][430/3239]	Time 0.223 (0.588)	Data Time 0.001 (0.068)	Loss 3.3062 (3.4484)	Entropy 1.72316 (1.72398)	Top-1 acc 44.531 (42.282)	Top-5 acc 68.359 (66.038)	lr 0.02376
Train [18][440/3239]	Time 0.282 (0.584)	Data Time 0.001 (0.066)	Loss 3.5209 (3.4491)	Entropy 1.72312 (1.72396)	Top-1 acc 43.750 (42.278)	Top-5 acc 66.016 (66.033)	lr 0.02376
Train [18][450/3239]	Time 2.262 (0.581)	Data Time 0.001 (0.065)	Loss 3.3444 (3.4510)	Entropy 1.72312 (1.72394)	Top-1 acc 44.531 (42.233)	Top-5 acc 69.531 (66.005)	lr 0.02376
Train [18][460/3239]	Time 0.219 (0.573)	Data Time 0.001 (0.064)	Loss 3.4969 (3.4510)	Entropy 1.72311 (1.72392)	Top-1 acc 41.797 (42.236)	Top-5 acc 66.797 (66.025)	lr 0.02376
Train [18][470/3239]	Time 0.192 (0.570)	Data Time 0.001 (0.062)	Loss 3.3713 (3.4517)	Entropy 1.72303 (1.72390)	Top-1 acc 42.578 (42.228)	Top-5 acc 69.922 (66.023)	lr 0.02376
Train [18][480/3239]	Time 0.216 (0.567)	Data Time 0.001 (0.061)	Loss 3.4906 (3.4516)	Entropy 1.72293 (1.72388)	Top-1 acc 39.453 (42.229)	Top-5 acc 63.672 (66.019)	lr 0.02376
Train [18][490/3239]	Time 0.196 (0.565)	Data Time 0.001 (0.060)	Loss 3.4706 (3.4519)	Entropy 1.72291 (1.72386)	Top-1 acc 38.672 (42.214)	Top-5 acc 67.188 (66.003)	lr 0.02376
Train [18][500/3239]	Time 0.236 (0.562)	Data Time 0.001 (0.059)	Loss 3.4657 (3.4521)	Entropy 1.72287 (1.72384)	Top-1 acc 44.922 (42.217)	Top-5 acc 66.016 (66.004)	lr 0.02376
Train [18][510/3239]	Time 0.192 (0.560)	Data Time 0.001 (0.058)	Loss 3.3399 (3.4517)	Entropy 1.72280 (1.72382)	Top-1 acc 44.141 (42.223)	Top-5 acc 69.922 (66.027)	lr 0.02376
Train [18][520/3239]	Time 0.310 (0.557)	Data Time 0.001 (0.057)	Loss 3.4323 (3.4519)	Entropy 1.72277 (1.72380)	Top-1 acc 41.016 (42.231)	Top-5 acc 66.406 (66.021)	lr 0.02376
Train [18][530/3239]	Time 0.225 (0.554)	Data Time 0.001 (0.056)	Loss 3.5350 (3.4525)	Entropy 1.72279 (1.72379)	Top-1 acc 44.922 (42.204)	Top-5 acc 66.406 (66.021)	lr 0.02376
Train [18][540/3239]	Time 0.226 (0.552)	Data Time 0.002 (0.055)	Loss 3.4015 (3.4525)	Entropy 1.72274 (1.72377)	Top-1 acc 42.188 (42.203)	Top-5 acc 64.844 (66.022)	lr 0.02376
Train [18][550/3239]	Time 0.244 (0.550)	Data Time 0.001 (0.054)	Loss 3.5631 (3.4539)	Entropy 1.72272 (1.72375)	Top-1 acc 39.453 (42.188)	Top-5 acc 61.719 (65.993)	lr 0.02376
Train [18][560/3239]	Time 2.304 (0.548)	Data Time 0.001 (0.053)	Loss 3.4951 (3.4541)	Entropy 1.72272 (1.72373)	Top-1 acc 42.188 (42.183)	Top-5 acc 63.281 (65.975)	lr 0.02376
Train [18][570/3239]	Time 0.210 (0.542)	Data Time 0.002 (0.052)	Loss 3.4681 (3.4541)	Entropy 1.72261 (1.72371)	Top-1 acc 46.094 (42.186)	Top-5 acc 63.672 (65.970)	lr 0.02376
Train [18][580/3239]	Time 0.359 (0.611)	Data Time 0.003 (0.051)	Loss 3.3797 (3.4531)	Entropy 1.72261 (1.72369)	Top-1 acc 47.656 (42.214)	Top-5 acc 69.531 (65.987)	lr 0.02376
Train [18][590/3239]	Time 0.344 (0.609)	Data Time 0.004 (0.050)	Loss 3.3831 (3.4532)	Entropy 1.72252 (1.72367)	Top-1 acc 44.141 (42.228)	Top-5 acc 64.062 (65.988)	lr 0.02376
Train [18][600/3239]	Time 0.214 (0.606)	Data Time 0.001 (0.049)	Loss 3.4314 (3.4537)	Entropy 1.72247 (1.72365)	Top-1 acc 45.312 (42.228)	Top-5 acc 68.359 (65.979)	lr 0.02376
Train [18][610/3239]	Time 0.175 (0.603)	Data Time 0.001 (0.049)	Loss 3.5082 (3.4537)	Entropy 1.72247 (1.72363)	Top-1 acc 42.578 (42.239)	Top-5 acc 62.500 (65.981)	lr 0.02376
Train [18][620/3239]	Time 0.233 (0.600)	Data Time 0.001 (0.048)	Loss 3.2880 (3.4525)	Entropy 1.72242 (1.72361)	Top-1 acc 44.531 (42.267)	Top-5 acc 70.703 (66.010)	lr 0.02376
Train [18][630/3239]	Time 0.212 (0.598)	Data Time 0.001 (0.047)	Loss 3.3532 (3.4524)	Entropy 1.72237 (1.72359)	Top-1 acc 44.141 (42.262)	Top-5 acc 71.484 (66.026)	lr 0.02375
Train [18][640/3239]	Time 0.218 (0.595)	Data Time 0.001 (0.046)	Loss 3.4694 (3.4522)	Entropy 1.72227 (1.72357)	Top-1 acc 39.062 (42.252)	Top-5 acc 64.844 (66.022)	lr 0.02375
Train [18][650/3239]	Time 0.310 (0.593)	Data Time 0.001 (0.046)	Loss 3.4236 (3.4525)	Entropy 1.72225 (1.72355)	Top-1 acc 42.578 (42.236)	Top-5 acc 64.062 (66.016)	lr 0.02375
Train [18][660/3239]	Time 0.280 (0.590)	Data Time 0.002 (0.045)	Loss 3.4289 (3.4523)	Entropy 1.72215 (1.72353)	Top-1 acc 40.625 (42.238)	Top-5 acc 65.625 (66.022)	lr 0.02375
Train [18][670/3239]	Time 2.253 (0.588)	Data Time 0.001 (0.044)	Loss 3.4825 (3.4525)	Entropy 1.72215 (1.72351)	Top-1 acc 39.844 (42.225)	Top-5 acc 66.016 (66.034)	lr 0.02375
Train [18][680/3239]	Time 0.222 (0.582)	Data Time 0.001 (0.044)	Loss 3.2572 (3.4520)	Entropy 1.72213 (1.72349)	Top-1 acc 44.922 (42.232)	Top-5 acc 69.141 (66.042)	lr 0.02375
Train [18][690/3239]	Time 0.198 (0.580)	Data Time 0.002 (0.043)	Loss 3.4526 (3.4522)	Entropy 1.72202 (1.72347)	Top-1 acc 42.578 (42.237)	Top-5 acc 64.844 (66.039)	lr 0.02375
Train [18][700/3239]	Time 0.198 (0.578)	Data Time 0.001 (0.043)	Loss 3.3490 (3.4514)	Entropy 1.72193 (1.72345)	Top-1 acc 48.047 (42.258)	Top-5 acc 65.625 (66.051)	lr 0.02375
Train [18][710/3239]	Time 0.218 (0.576)	Data Time 0.002 (0.042)	Loss 3.3022 (3.4510)	Entropy 1.72184 (1.72343)	Top-1 acc 48.047 (42.261)	Top-5 acc 69.141 (66.069)	lr 0.02375
Train [18][720/3239]	Time 0.272 (0.574)	Data Time 0.001 (0.042)	Loss 3.4038 (3.4513)	Entropy 1.72177 (1.72341)	Top-1 acc 43.359 (42.254)	Top-5 acc 66.016 (66.045)	lr 0.02375
Train [18][730/3239]	Time 0.202 (0.572)	Data Time 0.002 (0.041)	Loss 3.3465 (3.4513)	Entropy 1.72171 (1.72338)	Top-1 acc 46.484 (42.266)	Top-5 acc 67.188 (66.048)	lr 0.02375
Train [18][740/3239]	Time 0.198 (0.570)	Data Time 0.001 (0.040)	Loss 3.5098 (3.4517)	Entropy 1.72152 (1.72336)	Top-1 acc 35.938 (42.250)	Top-5 acc 62.891 (66.035)	lr 0.02375
Train [18][750/3239]	Time 0.232 (0.568)	Data Time 0.001 (0.040)	Loss 3.5184 (3.4520)	Entropy 1.72143 (1.72333)	Top-1 acc 41.406 (42.225)	Top-5 acc 67.188 (66.029)	lr 0.02375
Train [18][760/3239]	Time 0.223 (0.567)	Data Time 0.002 (0.039)	Loss 3.4201 (3.4524)	Entropy 1.72141 (1.72331)	Top-1 acc 40.625 (42.205)	Top-5 acc 65.625 (66.012)	lr 0.02375
Train [18][770/3239]	Time 0.244 (0.565)	Data Time 0.002 (0.039)	Loss 3.3297 (3.4519)	Entropy 1.72130 (1.72328)	Top-1 acc 43.359 (42.210)	Top-5 acc 70.703 (66.022)	lr 0.02375
Train [18][780/3239]	Time 2.276 (0.563)	Data Time 0.001 (0.039)	Loss 3.3393 (3.4529)	Entropy 1.72130 (1.72326)	Top-1 acc 44.141 (42.197)	Top-5 acc 69.141 (66.005)	lr 0.02375
Train [18][790/3239]	Time 0.177 (0.559)	Data Time 0.001 (0.038)	Loss 3.6278 (3.4529)	Entropy 1.72130 (1.72323)	Top-1 acc 41.406 (42.212)	Top-5 acc 62.891 (65.994)	lr 0.02375
Train [18][800/3239]	Time 0.357 (0.557)	Data Time 0.001 (0.038)	Loss 3.3035 (3.4527)	Entropy 1.72124 (1.72321)	Top-1 acc 42.578 (42.219)	Top-5 acc 69.531 (65.997)	lr 0.02375
Train [18][810/3239]	Time 0.203 (0.556)	Data Time 0.001 (0.037)	Loss 3.3686 (3.4527)	Entropy 1.72126 (1.72319)	Top-1 acc 42.578 (42.213)	Top-5 acc 68.359 (66.004)	lr 0.02375
Train [18][820/3239]	Time 0.243 (0.554)	Data Time 0.001 (0.037)	Loss 3.4384 (3.4529)	Entropy 1.72122 (1.72316)	Top-1 acc 40.625 (42.211)	Top-5 acc 64.844 (65.986)	lr 0.02375
Train [18][830/3239]	Time 0.223 (0.553)	Data Time 0.001 (0.036)	Loss 3.6000 (3.4525)	Entropy 1.72132 (1.72314)	Top-1 acc 41.406 (42.229)	Top-5 acc 62.500 (65.986)	lr 0.02375
Train [18][840/3239]	Time 0.237 (0.551)	Data Time 0.001 (0.036)	Loss 3.3086 (3.4526)	Entropy 1.72128 (1.72312)	Top-1 acc 44.922 (42.214)	Top-5 acc 67.578 (65.979)	lr 0.02375
Train [18][850/3239]	Time 0.245 (0.550)	Data Time 0.002 (0.035)	Loss 3.6570 (3.4531)	Entropy 1.72117 (1.72310)	Top-1 acc 40.625 (42.210)	Top-5 acc 63.281 (65.967)	lr 0.02375
Train [18][860/3239]	Time 0.150 (0.548)	Data Time 0.001 (0.035)	Loss 3.4372 (3.4533)	Entropy 1.72112 (1.72307)	Top-1 acc 42.188 (42.216)	Top-5 acc 69.531 (65.977)	lr 0.02374
Train [18][870/3239]	Time 0.224 (0.547)	Data Time 0.001 (0.035)	Loss 3.5111 (3.4530)	Entropy 1.72108 (1.72305)	Top-1 acc 41.797 (42.231)	Top-5 acc 62.891 (65.986)	lr 0.02374
Train [18][880/3239]	Time 0.143 (0.546)	Data Time 0.001 (0.034)	Loss 3.5221 (3.4539)	Entropy 1.72106 (1.72303)	Top-1 acc 39.453 (42.212)	Top-5 acc 67.188 (65.972)	lr 0.02374
Train [18][890/3239]	Time 2.237 (0.545)	Data Time 0.001 (0.034)	Loss 3.4457 (3.4539)	Entropy 1.72106 (1.72301)	Top-1 acc 46.094 (42.211)	Top-5 acc 64.062 (65.972)	lr 0.02374
Train [18][900/3239]	Time 0.188 (0.541)	Data Time 0.001 (0.034)	Loss 3.5279 (3.4540)	Entropy 1.72099 (1.72298)	Top-1 acc 39.062 (42.213)	Top-5 acc 65.234 (65.977)	lr 0.02374
Train [18][910/3239]	Time 0.227 (0.540)	Data Time 0.001 (0.033)	Loss 3.8326 (3.4549)	Entropy 1.72098 (1.72296)	Top-1 acc 35.938 (42.198)	Top-5 acc 59.766 (65.965)	lr 0.02374
Train [18][920/3239]	Time 0.193 (0.539)	Data Time 0.001 (0.033)	Loss 3.4251 (3.4551)	Entropy 1.72085 (1.72294)	Top-1 acc 39.062 (42.182)	Top-5 acc 67.969 (65.960)	lr 0.02374
Train [18][930/3239]	Time 0.212 (0.538)	Data Time 0.001 (0.033)	Loss 3.3632 (3.4553)	Entropy 1.72080 (1.72292)	Top-1 acc 41.797 (42.173)	Top-5 acc 68.359 (65.960)	lr 0.02374
Train [18][940/3239]	Time 0.357 (0.577)	Data Time 0.003 (0.032)	Loss 3.4086 (3.4556)	Entropy 1.72077 (1.72289)	Top-1 acc 42.188 (42.169)	Top-5 acc 66.797 (65.946)	lr 0.02374
Train [18][950/3239]	Time 0.205 (0.577)	Data Time 0.002 (0.032)	Loss 3.3915 (3.4558)	Entropy 1.72072 (1.72287)	Top-1 acc 43.750 (42.162)	Top-5 acc 67.969 (65.943)	lr 0.02374
Train [18][960/3239]	Time 0.225 (0.576)	Data Time 0.001 (0.032)	Loss 3.5688 (3.4557)	Entropy 1.72069 (1.72285)	Top-1 acc 38.672 (42.166)	Top-5 acc 62.500 (65.942)	lr 0.02374
Train [18][970/3239]	Time 0.204 (0.574)	Data Time 0.001 (0.031)	Loss 3.5501 (3.4565)	Entropy 1.72062 (1.72283)	Top-1 acc 38.672 (42.154)	Top-5 acc 63.672 (65.916)	lr 0.02374
Train [18][980/3239]	Time 0.248 (0.573)	Data Time 0.030 (0.031)	Loss 3.3083 (3.4566)	Entropy 1.72059 (1.72280)	Top-1 acc 43.359 (42.151)	Top-5 acc 68.359 (65.913)	lr 0.02374
Train [18][990/3239]	Time 0.248 (0.571)	Data Time 0.002 (0.031)	Loss 3.3537 (3.4564)	Entropy 1.72053 (1.72278)	Top-1 acc 48.438 (42.156)	Top-5 acc 67.578 (65.915)	lr 0.02374
Train [18][1000/3239]	Time 2.384 (0.570)	Data Time 0.001 (0.031)	Loss 3.5817 (3.4566)	Entropy 1.72053 (1.72276)	Top-1 acc 36.328 (42.141)	Top-5 acc 66.406 (65.909)	lr 0.02374
Train [18][1010/3239]	Time 0.209 (0.567)	Data Time 0.001 (0.030)	Loss 3.4860 (3.4564)	Entropy 1.72049 (1.72274)	Top-1 acc 40.625 (42.149)	Top-5 acc 66.406 (65.911)	lr 0.02374
Train [18][1020/3239]	Time 0.203 (0.565)	Data Time 0.001 (0.030)	Loss 3.4323 (3.4565)	Entropy 1.72022 (1.72271)	Top-1 acc 40.234 (42.145)	Top-5 acc 66.406 (65.904)	lr 0.02374
Train [18][1030/3239]	Time 0.205 (0.564)	Data Time 0.001 (0.030)	Loss 3.2784 (3.4556)	Entropy 1.72011 (1.72269)	Top-1 acc 43.359 (42.159)	Top-5 acc 69.141 (65.930)	lr 0.02374
Train [18][1040/3239]	Time 0.209 (0.563)	Data Time 0.001 (0.029)	Loss 3.5199 (3.4561)	Entropy 1.72007 (1.72266)	Top-1 acc 43.359 (42.150)	Top-5 acc 62.891 (65.912)	lr 0.02374
Train [18][1050/3239]	Time 0.205 (0.561)	Data Time 0.001 (0.029)	Loss 3.2939 (3.4552)	Entropy 1.71987 (1.72264)	Top-1 acc 44.922 (42.171)	Top-5 acc 68.750 (65.929)	lr 0.02374
Train [18][1060/3239]	Time 0.217 (0.560)	Data Time 0.002 (0.029)	Loss 3.7889 (3.4560)	Entropy 1.71985 (1.72261)	Top-1 acc 35.938 (42.145)	Top-5 acc 62.109 (65.915)	lr 0.02374
Train [18][1070/3239]	Time 0.325 (0.559)	Data Time 0.001 (0.029)	Loss 3.2115 (3.4564)	Entropy 1.71976 (1.72258)	Top-1 acc 46.875 (42.143)	Top-5 acc 72.266 (65.906)	lr 0.02374
Train [18][1080/3239]	Time 0.236 (0.558)	Data Time 0.001 (0.028)	Loss 3.5432 (3.4565)	Entropy 1.71968 (1.72256)	Top-1 acc 41.797 (42.133)	Top-5 acc 65.625 (65.908)	lr 0.02373
Train [18][1090/3239]	Time 0.143 (0.556)	Data Time 0.001 (0.028)	Loss 3.6276 (3.4578)	Entropy 1.71965 (1.72253)	Top-1 acc 39.844 (42.103)	Top-5 acc 62.500 (65.881)	lr 0.02373
Train [18][1100/3239]	Time 0.157 (0.555)	Data Time 0.001 (0.028)	Loss 3.3012 (3.4569)	Entropy 1.71957 (1.72250)	Top-1 acc 43.359 (42.116)	Top-5 acc 71.875 (65.902)	lr 0.02373
Train [18][1110/3239]	Time 2.312 (0.554)	Data Time 0.001 (0.028)	Loss 3.5119 (3.4568)	Entropy 1.71957 (1.72248)	Top-1 acc 44.141 (42.127)	Top-5 acc 62.891 (65.907)	lr 0.02373
Train [18][1120/3239]	Time 0.209 (0.551)	Data Time 0.001 (0.028)	Loss 3.5907 (3.4566)	Entropy 1.71944 (1.72245)	Top-1 acc 39.062 (42.129)	Top-5 acc 60.938 (65.920)	lr 0.02373
Train [18][1130/3239]	Time 0.214 (0.550)	Data Time 0.001 (0.027)	Loss 3.3445 (3.4562)	Entropy 1.71949 (1.72242)	Top-1 acc 43.750 (42.133)	Top-5 acc 66.797 (65.932)	lr 0.02373
Train [18][1140/3239]	Time 0.212 (0.549)	Data Time 0.002 (0.027)	Loss 3.7069 (3.4568)	Entropy 1.71942 (1.72240)	Top-1 acc 39.844 (42.123)	Top-5 acc 60.547 (65.923)	lr 0.02373
Train [18][1150/3239]	Time 0.313 (0.548)	Data Time 0.001 (0.027)	Loss 3.4518 (3.4566)	Entropy 1.71938 (1.72237)	Top-1 acc 40.625 (42.134)	Top-5 acc 67.188 (65.923)	lr 0.02373
Train [18][1160/3239]	Time 0.259 (0.547)	Data Time 0.001 (0.027)	Loss 3.5973 (3.4563)	Entropy 1.71926 (1.72235)	Top-1 acc 44.141 (42.138)	Top-5 acc 64.844 (65.932)	lr 0.02373
Train [18][1170/3239]	Time 0.254 (0.546)	Data Time 0.001 (0.026)	Loss 3.2023 (3.4562)	Entropy 1.71914 (1.72232)	Top-1 acc 45.312 (42.133)	Top-5 acc 68.750 (65.928)	lr 0.02373
Train [18][1180/3239]	Time 0.206 (0.546)	Data Time 0.001 (0.026)	Loss 3.2660 (3.4559)	Entropy 1.71917 (1.72229)	Top-1 acc 44.141 (42.139)	Top-5 acc 70.312 (65.928)	lr 0.02373
Train [18][1190/3239]	Time 0.229 (0.545)	Data Time 0.001 (0.026)	Loss 3.4865 (3.4562)	Entropy 1.71923 (1.72227)	Top-1 acc 39.453 (42.116)	Top-5 acc 64.062 (65.923)	lr 0.02373
Train [18][1200/3239]	Time 0.206 (0.544)	Data Time 0.001 (0.026)	Loss 3.5941 (3.4562)	Entropy 1.71919 (1.72224)	Top-1 acc 41.016 (42.121)	Top-5 acc 62.500 (65.921)	lr 0.02373
Train [18][1210/3239]	Time 0.228 (0.543)	Data Time 0.001 (0.026)	Loss 3.4570 (3.4565)	Entropy 1.71916 (1.72222)	Top-1 acc 41.797 (42.109)	Top-5 acc 64.453 (65.915)	lr 0.02373
Train [18][1220/3239]	Time 2.297 (0.542)	Data Time 0.001 (0.025)	Loss 3.3165 (3.4563)	Entropy 1.71916 (1.72219)	Top-1 acc 44.922 (42.110)	Top-5 acc 67.969 (65.923)	lr 0.02373
Train [18][1230/3239]	Time 0.232 (0.539)	Data Time 0.001 (0.025)	Loss 3.4617 (3.4561)	Entropy 1.71908 (1.72217)	Top-1 acc 41.406 (42.112)	Top-5 acc 67.578 (65.931)	lr 0.02373
Train [18][1240/3239]	Time 0.211 (0.538)	Data Time 0.001 (0.025)	Loss 3.1512 (3.4559)	Entropy 1.71901 (1.72214)	Top-1 acc 48.047 (42.114)	Top-5 acc 71.094 (65.931)	lr 0.02373
Train [18][1250/3239]	Time 0.218 (0.537)	Data Time 0.001 (0.025)	Loss 3.4271 (3.4561)	Entropy 1.71898 (1.72211)	Top-1 acc 40.234 (42.106)	Top-5 acc 69.141 (65.928)	lr 0.02373
Train [18][1260/3239]	Time 0.201 (0.536)	Data Time 0.001 (0.025)	Loss 3.3608 (3.4562)	Entropy 1.71891 (1.72209)	Top-1 acc 42.578 (42.103)	Top-5 acc 67.969 (65.926)	lr 0.02373
Train [18][1270/3239]	Time 0.239 (0.536)	Data Time 0.002 (0.025)	Loss 3.2750 (3.4560)	Entropy 1.71889 (1.72206)	Top-1 acc 46.094 (42.111)	Top-5 acc 70.703 (65.923)	lr 0.02373
Train [18][1280/3239]	Time 0.232 (0.535)	Data Time 0.001 (0.024)	Loss 3.4375 (3.4561)	Entropy 1.71888 (1.72204)	Top-1 acc 40.625 (42.110)	Top-5 acc 66.016 (65.920)	lr 0.02373
Train [18][1290/3239]	Time 0.307 (0.534)	Data Time 0.002 (0.024)	Loss 3.3501 (3.4565)	Entropy 1.71878 (1.72201)	Top-1 acc 45.312 (42.099)	Top-5 acc 68.750 (65.910)	lr 0.02373
Train [18][1300/3239]	Time 0.398 (0.564)	Data Time 0.003 (0.024)	Loss 3.4779 (3.4560)	Entropy 1.71868 (1.72199)	Top-1 acc 42.578 (42.109)	Top-5 acc 63.281 (65.921)	lr 0.02373
Train [18][1310/3239]	Time 0.193 (0.564)	Data Time 0.003 (0.024)	Loss 3.6019 (3.4560)	Entropy 1.71859 (1.72196)	Top-1 acc 40.625 (42.112)	Top-5 acc 63.672 (65.923)	lr 0.02372
Train [18][1320/3239]	Time 0.225 (0.562)	Data Time 0.002 (0.024)	Loss 3.4026 (3.4560)	Entropy 1.71852 (1.72194)	Top-1 acc 41.016 (42.104)	Top-5 acc 69.922 (65.926)	lr 0.02372
Train [18][1330/3239]	Time 2.247 (0.561)	Data Time 0.001 (0.024)	Loss 3.5013 (3.4556)	Entropy 1.71852 (1.72191)	Top-1 acc 42.578 (42.114)	Top-5 acc 65.234 (65.938)	lr 0.02372
Train [18][1340/3239]	Time 0.258 (0.559)	Data Time 0.001 (0.023)	Loss 3.5792 (3.4558)	Entropy 1.71848 (1.72189)	Top-1 acc 40.625 (42.106)	Top-5 acc 64.453 (65.929)	lr 0.02372
Train [18][1350/3239]	Time 0.199 (0.558)	Data Time 0.001 (0.023)	Loss 3.5982 (3.4564)	Entropy 1.71844 (1.72186)	Top-1 acc 38.672 (42.093)	Top-5 acc 66.406 (65.922)	lr 0.02372
Train [18][1360/3239]	Time 0.213 (0.557)	Data Time 0.001 (0.023)	Loss 3.4862 (3.4560)	Entropy 1.71845 (1.72184)	Top-1 acc 40.625 (42.093)	Top-5 acc 65.625 (65.933)	lr 0.02372
Train [18][1370/3239]	Time 0.217 (0.556)	Data Time 0.001 (0.023)	Loss 3.5644 (3.4564)	Entropy 1.71838 (1.72181)	Top-1 acc 40.625 (42.087)	Top-5 acc 61.328 (65.922)	lr 0.02372
Train [18][1380/3239]	Time 0.224 (0.555)	Data Time 0.001 (0.023)	Loss 3.4614 (3.4558)	Entropy 1.71844 (1.72179)	Top-1 acc 41.016 (42.094)	Top-5 acc 64.453 (65.932)	lr 0.02372
Train [18][1390/3239]	Time 0.219 (0.555)	Data Time 0.001 (0.023)	Loss 3.4887 (3.4555)	Entropy 1.71838 (1.72176)	Top-1 acc 38.672 (42.101)	Top-5 acc 64.453 (65.942)	lr 0.02372
Train [18][1400/3239]	Time 0.150 (0.554)	Data Time 0.001 (0.022)	Loss 3.6093 (3.4553)	Entropy 1.71833 (1.72174)	Top-1 acc 39.844 (42.107)	Top-5 acc 62.109 (65.949)	lr 0.02372
Train [18][1410/3239]	Time 0.214 (0.553)	Data Time 0.001 (0.022)	Loss 3.4242 (3.4553)	Entropy 1.71827 (1.72171)	Top-1 acc 40.234 (42.107)	Top-5 acc 67.969 (65.949)	lr 0.02372
Train [18][1420/3239]	Time 0.213 (0.552)	Data Time 0.001 (0.022)	Loss 3.8260 (3.4555)	Entropy 1.71820 (1.72169)	Top-1 acc 33.594 (42.105)	Top-5 acc 61.328 (65.947)	lr 0.02372
Train [18][1430/3239]	Time 0.227 (0.551)	Data Time 0.001 (0.022)	Loss 3.3229 (3.4555)	Entropy 1.71803 (1.72167)	Top-1 acc 45.312 (42.100)	Top-5 acc 69.922 (65.949)	lr 0.02372
Train [18][1440/3239]	Time 2.272 (0.550)	Data Time 0.001 (0.022)	Loss 3.5143 (3.4560)	Entropy 1.71803 (1.72164)	Top-1 acc 38.672 (42.090)	Top-5 acc 65.625 (65.941)	lr 0.02372
Train [18][1450/3239]	Time 0.232 (0.548)	Data Time 0.004 (0.022)	Loss 3.3978 (3.4560)	Entropy 1.71799 (1.72162)	Top-1 acc 41.797 (42.090)	Top-5 acc 67.188 (65.943)	lr 0.02372
Train [18][1460/3239]	Time 0.208 (0.547)	Data Time 0.001 (0.022)	Loss 3.2625 (3.4557)	Entropy 1.71791 (1.72159)	Top-1 acc 45.312 (42.094)	Top-5 acc 70.312 (65.953)	lr 0.02372
Train [18][1470/3239]	Time 0.234 (0.546)	Data Time 0.001 (0.021)	Loss 3.5427 (3.4559)	Entropy 1.71779 (1.72156)	Top-1 acc 41.016 (42.086)	Top-5 acc 66.797 (65.958)	lr 0.02372
Train [18][1480/3239]	Time 0.194 (0.546)	Data Time 0.001 (0.021)	Loss 3.4772 (3.4557)	Entropy 1.71771 (1.72154)	Top-1 acc 39.062 (42.098)	Top-5 acc 67.969 (65.965)	lr 0.02372
Train [18][1490/3239]	Time 0.230 (0.545)	Data Time 0.001 (0.021)	Loss 3.5205 (3.4558)	Entropy 1.71761 (1.72151)	Top-1 acc 37.500 (42.091)	Top-5 acc 66.016 (65.965)	lr 0.02372
Train [18][1500/3239]	Time 0.266 (0.544)	Data Time 0.001 (0.021)	Loss 3.5921 (3.4557)	Entropy 1.71759 (1.72149)	Top-1 acc 42.188 (42.091)	Top-5 acc 61.328 (65.964)	lr 0.02372
Train [18][1510/3239]	Time 0.200 (0.544)	Data Time 0.001 (0.021)	Loss 3.5604 (3.4558)	Entropy 1.71757 (1.72146)	Top-1 acc 40.625 (42.089)	Top-5 acc 62.891 (65.961)	lr 0.02372
Train [18][1520/3239]	Time 0.185 (0.543)	Data Time 0.001 (0.021)	Loss 3.5169 (3.4564)	Entropy 1.71751 (1.72143)	Top-1 acc 38.672 (42.081)	Top-5 acc 62.500 (65.952)	lr 0.02372
Train [18][1530/3239]	Time 0.218 (0.542)	Data Time 0.014 (0.021)	Loss 3.4053 (3.4563)	Entropy 1.71749 (1.72141)	Top-1 acc 39.062 (42.075)	Top-5 acc 66.797 (65.950)	lr 0.02371
Train [18][1540/3239]	Time 0.250 (0.542)	Data Time 0.001 (0.021)	Loss 3.4606 (3.4565)	Entropy 1.71739 (1.72138)	Top-1 acc 39.453 (42.065)	Top-5 acc 64.062 (65.944)	lr 0.02371
Train [18][1550/3239]	Time 2.420 (0.541)	Data Time 0.002 (0.020)	Loss 3.5283 (3.4569)	Entropy 1.71739 (1.72136)	Top-1 acc 39.844 (42.063)	Top-5 acc 66.406 (65.938)	lr 0.02371
Train [18][1560/3239]	Time 0.247 (0.539)	Data Time 0.001 (0.020)	Loss 3.5055 (3.4571)	Entropy 1.71740 (1.72133)	Top-1 acc 39.453 (42.054)	Top-5 acc 65.234 (65.937)	lr 0.02371
Train [18][1570/3239]	Time 0.258 (0.538)	Data Time 0.001 (0.020)	Loss 3.4099 (3.4569)	Entropy 1.71734 (1.72131)	Top-1 acc 41.406 (42.062)	Top-5 acc 65.625 (65.941)	lr 0.02371
Train [18][1580/3239]	Time 0.209 (0.538)	Data Time 0.002 (0.020)	Loss 3.5368 (3.4573)	Entropy 1.71731 (1.72128)	Top-1 acc 41.797 (42.055)	Top-5 acc 66.406 (65.935)	lr 0.02371
Train [18][1590/3239]	Time 0.208 (0.537)	Data Time 0.001 (0.020)	Loss 3.6644 (3.4578)	Entropy 1.71721 (1.72126)	Top-1 acc 36.328 (42.045)	Top-5 acc 60.547 (65.922)	lr 0.02371
Train [18][1600/3239]	Time 0.212 (0.536)	Data Time 0.001 (0.020)	Loss 3.3747 (3.4577)	Entropy 1.71714 (1.72123)	Top-1 acc 43.359 (42.046)	Top-5 acc 66.406 (65.925)	lr 0.02371
Train [18][1610/3239]	Time 0.217 (0.536)	Data Time 0.001 (0.020)	Loss 3.5196 (3.4577)	Entropy 1.71712 (1.72121)	Top-1 acc 39.453 (42.044)	Top-5 acc 67.969 (65.928)	lr 0.02371
Train [18][1620/3239]	Time 0.213 (0.535)	Data Time 0.001 (0.020)	Loss 3.4528 (3.4576)	Entropy 1.71705 (1.72118)	Top-1 acc 41.797 (42.048)	Top-5 acc 66.406 (65.929)	lr 0.02371
Train [18][1630/3239]	Time 0.198 (0.535)	Data Time 0.001 (0.020)	Loss 3.8405 (3.4578)	Entropy 1.71703 (1.72115)	Top-1 acc 37.500 (42.049)	Top-5 acc 58.594 (65.928)	lr 0.02371
Train [18][1640/3239]	Time 0.154 (0.534)	Data Time 0.001 (0.019)	Loss 3.6237 (3.4580)	Entropy 1.71692 (1.72113)	Top-1 acc 39.453 (42.051)	Top-5 acc 64.844 (65.926)	lr 0.02371
Train [18][1650/3239]	Time 0.190 (0.533)	Data Time 0.001 (0.019)	Loss 3.3325 (3.4579)	Entropy 1.71691 (1.72110)	Top-1 acc 44.922 (42.055)	Top-5 acc 69.531 (65.926)	lr 0.02371
Train [18][1660/3239]	Time 43.490 (0.557)	Data Time 0.001 (0.019)	Loss 3.5411 (3.4581)	Entropy 1.71691 (1.72108)	Top-1 acc 42.578 (42.056)	Top-5 acc 63.672 (65.926)	lr 0.02371
Train [18][1670/3239]	Time 0.326 (0.556)	Data Time 0.002 (0.019)	Loss 3.4440 (3.4577)	Entropy 1.71691 (1.72105)	Top-1 acc 40.625 (42.065)	Top-5 acc 66.406 (65.934)	lr 0.02371
Train [18][1680/3239]	Time 0.236 (0.555)	Data Time 0.003 (0.019)	Loss 3.3386 (3.4574)	Entropy 1.71690 (1.72103)	Top-1 acc 47.266 (42.071)	Top-5 acc 70.703 (65.938)	lr 0.02371
Train [18][1690/3239]	Time 0.220 (0.555)	Data Time 0.001 (0.019)	Loss 3.3867 (3.4572)	Entropy 1.71684 (1.72100)	Top-1 acc 43.750 (42.076)	Top-5 acc 67.188 (65.946)	lr 0.02371
Train [18][1700/3239]	Time 0.239 (0.554)	Data Time 0.001 (0.019)	Loss 3.5718 (3.4571)	Entropy 1.71681 (1.72098)	Top-1 acc 42.188 (42.077)	Top-5 acc 61.328 (65.944)	lr 0.02371
Train [18][1710/3239]	Time 0.229 (0.553)	Data Time 0.002 (0.019)	Loss 3.5041 (3.4576)	Entropy 1.71672 (1.72095)	Top-1 acc 40.234 (42.068)	Top-5 acc 67.578 (65.937)	lr 0.02371
Train [18][1720/3239]	Time 0.236 (0.553)	Data Time 0.001 (0.019)	Loss 3.2146 (3.4574)	Entropy 1.71670 (1.72093)	Top-1 acc 45.703 (42.072)	Top-5 acc 71.484 (65.946)	lr 0.02371
Train [18][1730/3239]	Time 0.208 (0.552)	Data Time 0.001 (0.019)	Loss 3.7132 (3.4579)	Entropy 1.71665 (1.72091)	Top-1 acc 39.453 (42.063)	Top-5 acc 61.719 (65.940)	lr 0.02371
Train [18][1740/3239]	Time 0.201 (0.551)	Data Time 0.001 (0.018)	Loss 3.3748 (3.4580)	Entropy 1.71663 (1.72088)	Top-1 acc 47.656 (42.058)	Top-5 acc 67.969 (65.940)	lr 0.02371
Train [18][1750/3239]	Time 0.221 (0.551)	Data Time 0.001 (0.018)	Loss 3.4088 (3.4581)	Entropy 1.71663 (1.72086)	Top-1 acc 43.359 (42.055)	Top-5 acc 68.359 (65.941)	lr 0.02371
Train [18][1760/3239]	Time 0.265 (0.550)	Data Time 0.001 (0.018)	Loss 3.4053 (3.4583)	Entropy 1.71665 (1.72083)	Top-1 acc 41.797 (42.048)	Top-5 acc 66.797 (65.930)	lr 0.02370
Train [18][1770/3239]	Time 2.505 (0.549)	Data Time 0.001 (0.018)	Loss 3.4533 (3.4584)	Entropy 1.71665 (1.72081)	Top-1 acc 44.531 (42.049)	Top-5 acc 68.359 (65.936)	lr 0.02370
Train [18][1780/3239]	Time 0.261 (0.548)	Data Time 0.001 (0.018)	Loss 3.3412 (3.4587)	Entropy 1.71646 (1.72078)	Top-1 acc 47.266 (42.048)	Top-5 acc 69.922 (65.936)	lr 0.02370
Train [18][1790/3239]	Time 0.240 (0.547)	Data Time 0.001 (0.018)	Loss 3.5487 (3.4587)	Entropy 1.71627 (1.72076)	Top-1 acc 41.797 (42.050)	Top-5 acc 63.281 (65.936)	lr 0.02370
Train [18][1800/3239]	Time 0.253 (0.546)	Data Time 0.001 (0.018)	Loss 3.4945 (3.4587)	Entropy 1.71620 (1.72073)	Top-1 acc 39.453 (42.051)	Top-5 acc 66.797 (65.936)	lr 0.02370
Train [18][1810/3239]	Time 0.197 (0.546)	Data Time 0.001 (0.018)	Loss 3.6092 (3.4588)	Entropy 1.71609 (1.72071)	Top-1 acc 41.016 (42.046)	Top-5 acc 62.500 (65.933)	lr 0.02370
Train [18][1820/3239]	Time 0.220 (0.545)	Data Time 0.001 (0.018)	Loss 3.5397 (3.4587)	Entropy 1.71598 (1.72068)	Top-1 acc 42.969 (42.050)	Top-5 acc 64.062 (65.936)	lr 0.02370
Train [18][1830/3239]	Time 0.212 (0.544)	Data Time 0.001 (0.018)	Loss 3.4109 (3.4584)	Entropy 1.71588 (1.72066)	Top-1 acc 39.453 (42.055)	Top-5 acc 67.188 (65.942)	lr 0.02370
Train [18][1840/3239]	Time 0.218 (0.544)	Data Time 0.001 (0.018)	Loss 3.6205 (3.4585)	Entropy 1.71583 (1.72063)	Top-1 acc 40.234 (42.057)	Top-5 acc 64.844 (65.945)	lr 0.02370
Train [18][1850/3239]	Time 0.317 (0.543)	Data Time 0.001 (0.017)	Loss 3.3385 (3.4580)	Entropy 1.71582 (1.72061)	Top-1 acc 48.047 (42.069)	Top-5 acc 66.797 (65.951)	lr 0.02370
Train [18][1860/3239]	Time 0.211 (0.543)	Data Time 0.001 (0.017)	Loss 3.3674 (3.4580)	Entropy 1.71581 (1.72058)	Top-1 acc 40.234 (42.065)	Top-5 acc 68.359 (65.953)	lr 0.02370
Train [18][1870/3239]	Time 0.226 (0.542)	Data Time 0.002 (0.017)	Loss 3.2958 (3.4579)	Entropy 1.71567 (1.72055)	Top-1 acc 48.047 (42.077)	Top-5 acc 64.062 (65.956)	lr 0.02370
Train [18][1880/3239]	Time 2.383 (0.542)	Data Time 0.002 (0.017)	Loss 3.5074 (3.4581)	Entropy 1.71567 (1.72053)	Top-1 acc 42.188 (42.071)	Top-5 acc 66.406 (65.953)	lr 0.02370
Train [18][1890/3239]	Time 0.199 (0.540)	Data Time 0.002 (0.017)	Loss 3.5004 (3.4580)	Entropy 1.71567 (1.72050)	Top-1 acc 44.531 (42.074)	Top-5 acc 64.844 (65.953)	lr 0.02370
Train [18][1900/3239]	Time 0.279 (0.539)	Data Time 0.002 (0.017)	Loss 3.5512 (3.4579)	Entropy 1.71565 (1.72048)	Top-1 acc 45.703 (42.077)	Top-5 acc 63.672 (65.950)	lr 0.02370
Train [18][1910/3239]	Time 0.317 (0.539)	Data Time 0.001 (0.017)	Loss 3.4506 (3.4578)	Entropy 1.71578 (1.72045)	Top-1 acc 43.750 (42.078)	Top-5 acc 66.797 (65.950)	lr 0.02370
Train [18][1920/3239]	Time 0.217 (0.538)	Data Time 0.001 (0.017)	Loss 3.3266 (3.4578)	Entropy 1.71575 (1.72043)	Top-1 acc 46.094 (42.074)	Top-5 acc 68.359 (65.951)	lr 0.02370
Train [18][1930/3239]	Time 0.200 (0.538)	Data Time 0.001 (0.017)	Loss 3.5121 (3.4577)	Entropy 1.71571 (1.72040)	Top-1 acc 41.797 (42.076)	Top-5 acc 67.969 (65.956)	lr 0.02370
Train [18][1940/3239]	Time 0.243 (0.537)	Data Time 0.001 (0.017)	Loss 3.6092 (3.4575)	Entropy 1.71563 (1.72038)	Top-1 acc 41.406 (42.084)	Top-5 acc 65.234 (65.957)	lr 0.02370
Train [18][1950/3239]	Time 0.200 (0.537)	Data Time 0.002 (0.017)	Loss 3.5444 (3.4576)	Entropy 1.71562 (1.72035)	Top-1 acc 40.234 (42.082)	Top-5 acc 65.234 (65.957)	lr 0.02370
Train [18][1960/3239]	Time 0.228 (0.536)	Data Time 0.002 (0.017)	Loss 3.3776 (3.4579)	Entropy 1.71558 (1.72033)	Top-1 acc 43.359 (42.078)	Top-5 acc 66.016 (65.951)	lr 0.02370
Train [18][1970/3239]	Time 0.305 (0.536)	Data Time 0.002 (0.016)	Loss 3.4062 (3.4576)	Entropy 1.71556 (1.72031)	Top-1 acc 40.234 (42.084)	Top-5 acc 66.797 (65.957)	lr 0.02370
Train [18][1980/3239]	Time 0.238 (0.535)	Data Time 0.001 (0.016)	Loss 3.4098 (3.4576)	Entropy 1.71550 (1.72028)	Top-1 acc 41.797 (42.087)	Top-5 acc 64.453 (65.954)	lr 0.02369
Train [18][1990/3239]	Time 2.215 (0.535)	Data Time 0.001 (0.016)	Loss 3.5268 (3.4577)	Entropy 1.71550 (1.72026)	Top-1 acc 40.625 (42.090)	Top-5 acc 62.891 (65.950)	lr 0.02369
Train [18][2000/3239]	Time 0.233 (0.533)	Data Time 0.001 (0.016)	Loss 3.4134 (3.4579)	Entropy 1.71549 (1.72023)	Top-1 acc 41.797 (42.091)	Top-5 acc 67.188 (65.946)	lr 0.02369
Train [18][2010/3239]	Time 0.188 (0.533)	Data Time 0.001 (0.016)	Loss 3.5444 (3.4581)	Entropy 1.71548 (1.72021)	Top-1 acc 39.453 (42.093)	Top-5 acc 60.156 (65.940)	lr 0.02369
Train [18][2020/3239]	Time 0.204 (0.532)	Data Time 0.001 (0.016)	Loss 3.4466 (3.4579)	Entropy 1.71529 (1.72019)	Top-1 acc 41.016 (42.097)	Top-5 acc 68.359 (65.944)	lr 0.02369
Train [18][2030/3239]	Time 0.444 (0.550)	Data Time 0.003 (0.016)	Loss 3.4381 (3.4580)	Entropy 1.71524 (1.72016)	Top-1 acc 40.625 (42.094)	Top-5 acc 63.281 (65.945)	lr 0.02369
Train [18][2040/3239]	Time 0.334 (0.550)	Data Time 0.002 (0.016)	Loss 3.5481 (3.4581)	Entropy 1.71520 (1.72014)	Top-1 acc 38.672 (42.092)	Top-5 acc 65.234 (65.944)	lr 0.02369
Train [18][2050/3239]	Time 0.240 (0.550)	Data Time 0.002 (0.016)	Loss 3.4838 (3.4579)	Entropy 1.71517 (1.72011)	Top-1 acc 40.234 (42.097)	Top-5 acc 64.844 (65.952)	lr 0.02369
Train [18][2060/3239]	Time 0.318 (0.550)	Data Time 0.002 (0.016)	Loss 3.4302 (3.4580)	Entropy 1.71509 (1.72009)	Top-1 acc 45.703 (42.099)	Top-5 acc 66.406 (65.952)	lr 0.02369
Train [18][2070/3239]	Time 0.224 (0.549)	Data Time 0.001 (0.016)	Loss 3.3088 (3.4583)	Entropy 1.71506 (1.72007)	Top-1 acc 45.312 (42.088)	Top-5 acc 69.531 (65.942)	lr 0.02369
Train [18][2080/3239]	Time 0.199 (0.549)	Data Time 0.001 (0.016)	Loss 3.4158 (3.4583)	Entropy 1.71503 (1.72004)	Top-1 acc 42.969 (42.086)	Top-5 acc 67.578 (65.950)	lr 0.02369
Train [18][2090/3239]	Time 0.184 (0.548)	Data Time 0.001 (0.016)	Loss 3.7046 (3.4584)	Entropy 1.71500 (1.72002)	Top-1 acc 35.547 (42.082)	Top-5 acc 60.156 (65.948)	lr 0.02369
Train [18][2100/3239]	Time 2.260 (0.548)	Data Time 0.001 (0.016)	Loss 3.3854 (3.4583)	Entropy 1.71500 (1.71999)	Top-1 acc 42.578 (42.085)	Top-5 acc 67.969 (65.954)	lr 0.02369
Train [18][2110/3239]	Time 0.254 (0.546)	Data Time 0.001 (0.016)	Loss 3.4004 (3.4584)	Entropy 1.71495 (1.71997)	Top-1 acc 43.750 (42.090)	Top-5 acc 68.359 (65.952)	lr 0.02369
Train [18][2120/3239]	Time 0.242 (0.546)	Data Time 0.001 (0.016)	Loss 3.3403 (3.4583)	Entropy 1.71495 (1.71995)	Top-1 acc 45.312 (42.092)	Top-5 acc 69.922 (65.952)	lr 0.02369
Train [18][2130/3239]	Time 0.225 (0.545)	Data Time 0.001 (0.015)	Loss 3.4636 (3.4585)	Entropy 1.71485 (1.71992)	Top-1 acc 42.188 (42.091)	Top-5 acc 64.844 (65.946)	lr 0.02369
Train [18][2140/3239]	Time 0.155 (0.545)	Data Time 0.001 (0.015)	Loss 3.5771 (3.4582)	Entropy 1.71482 (1.71990)	Top-1 acc 39.062 (42.096)	Top-5 acc 64.844 (65.954)	lr 0.02369
Train [18][2150/3239]	Time 0.251 (0.544)	Data Time 0.002 (0.015)	Loss 3.4538 (3.4580)	Entropy 1.71478 (1.71987)	Top-1 acc 39.062 (42.103)	Top-5 acc 67.188 (65.958)	lr 0.02369
Train [18][2160/3239]	Time 0.190 (0.544)	Data Time 0.001 (0.015)	Loss 3.4899 (3.4578)	Entropy 1.71456 (1.71985)	Top-1 acc 43.359 (42.108)	Top-5 acc 65.234 (65.964)	lr 0.02369
Train [18][2170/3239]	Time 0.316 (0.544)	Data Time 0.001 (0.015)	Loss 3.5486 (3.4578)	Entropy 1.71454 (1.71983)	Top-1 acc 42.969 (42.107)	Top-5 acc 63.672 (65.964)	lr 0.02369
Train [18][2180/3239]	Time 0.201 (0.543)	Data Time 0.001 (0.015)	Loss 3.4487 (3.4575)	Entropy 1.71444 (1.71980)	Top-1 acc 43.750 (42.111)	Top-5 acc 67.578 (65.970)	lr 0.02369
Train [18][2190/3239]	Time 0.185 (0.542)	Data Time 0.001 (0.015)	Loss 3.5036 (3.4573)	Entropy 1.71439 (1.71978)	Top-1 acc 38.672 (42.108)	Top-5 acc 63.672 (65.970)	lr 0.02369
Train [18][2200/3239]	Time 0.229 (0.542)	Data Time 0.001 (0.015)	Loss 3.5164 (3.4572)	Entropy 1.71433 (1.71975)	Top-1 acc 39.453 (42.109)	Top-5 acc 62.109 (65.967)	lr 0.02368
Train [18][2210/3239]	Time 2.221 (0.541)	Data Time 0.001 (0.015)	Loss 3.3914 (3.4571)	Entropy 1.71433 (1.71973)	Top-1 acc 46.484 (42.112)	Top-5 acc 63.672 (65.966)	lr 0.02368
Train [18][2220/3239]	Time 0.182 (0.540)	Data Time 0.001 (0.015)	Loss 3.4241 (3.4569)	Entropy 1.71424 (1.71970)	Top-1 acc 44.141 (42.117)	Top-5 acc 66.016 (65.969)	lr 0.02368
Train [18][2230/3239]	Time 0.166 (0.539)	Data Time 0.001 (0.015)	Loss 3.3839 (3.4569)	Entropy 1.71422 (1.71968)	Top-1 acc 41.797 (42.122)	Top-5 acc 67.578 (65.968)	lr 0.02368
Train [18][2240/3239]	Time 0.192 (0.539)	Data Time 0.001 (0.015)	Loss 3.3577 (3.4566)	Entropy 1.71421 (1.71965)	Top-1 acc 44.922 (42.126)	Top-5 acc 68.750 (65.973)	lr 0.02368
Train [18][2250/3239]	Time 0.300 (0.538)	Data Time 0.001 (0.015)	Loss 3.4624 (3.4567)	Entropy 1.71416 (1.71963)	Top-1 acc 43.359 (42.121)	Top-5 acc 66.016 (65.975)	lr 0.02368
Train [18][2260/3239]	Time 0.217 (0.538)	Data Time 0.001 (0.015)	Loss 3.3551 (3.4565)	Entropy 1.71408 (1.71961)	Top-1 acc 43.750 (42.121)	Top-5 acc 69.531 (65.976)	lr 0.02368
Train [18][2270/3239]	Time 0.192 (0.538)	Data Time 0.001 (0.015)	Loss 3.5757 (3.4566)	Entropy 1.71399 (1.71958)	Top-1 acc 40.625 (42.122)	Top-5 acc 62.500 (65.972)	lr 0.02368
Train [18][2280/3239]	Time 0.182 (0.537)	Data Time 0.001 (0.015)	Loss 3.4519 (3.4564)	Entropy 1.71392 (1.71956)	Top-1 acc 39.453 (42.121)	Top-5 acc 66.797 (65.975)	lr 0.02368
Train [18][2290/3239]	Time 0.218 (0.537)	Data Time 0.001 (0.015)	Loss 3.5544 (3.4563)	Entropy 1.71386 (1.71953)	Top-1 acc 42.969 (42.124)	Top-5 acc 66.797 (65.979)	lr 0.02368
Train [18][2300/3239]	Time 0.215 (0.536)	Data Time 0.002 (0.014)	Loss 3.4734 (3.4564)	Entropy 1.71384 (1.71951)	Top-1 acc 40.625 (42.121)	Top-5 acc 65.625 (65.978)	lr 0.02368
Train [18][2310/3239]	Time 0.330 (0.536)	Data Time 0.001 (0.014)	Loss 3.2542 (3.4560)	Entropy 1.71374 (1.71948)	Top-1 acc 43.359 (42.127)	Top-5 acc 68.750 (65.988)	lr 0.02368
Train [18][2320/3239]	Time 2.261 (0.535)	Data Time 0.001 (0.014)	Loss 3.6250 (3.4561)	Entropy 1.71374 (1.71946)	Top-1 acc 39.453 (42.128)	Top-5 acc 62.109 (65.987)	lr 0.02368
Train [18][2330/3239]	Time 0.215 (0.534)	Data Time 0.001 (0.014)	Loss 3.6225 (3.4559)	Entropy 1.71361 (1.71943)	Top-1 acc 38.281 (42.140)	Top-5 acc 59.375 (65.989)	lr 0.02368
Train [18][2340/3239]	Time 0.204 (0.534)	Data Time 0.001 (0.014)	Loss 3.5103 (3.4559)	Entropy 1.71356 (1.71941)	Top-1 acc 41.016 (42.134)	Top-5 acc 63.281 (65.986)	lr 0.02368
Train [18][2350/3239]	Time 0.213 (0.533)	Data Time 0.001 (0.014)	Loss 3.5523 (3.4559)	Entropy 1.71348 (1.71938)	Top-1 acc 42.188 (42.140)	Top-5 acc 65.234 (65.990)	lr 0.02368
Train [18][2360/3239]	Time 0.190 (0.533)	Data Time 0.001 (0.014)	Loss 3.4822 (3.4557)	Entropy 1.71345 (1.71936)	Top-1 acc 44.141 (42.147)	Top-5 acc 64.062 (65.994)	lr 0.02368
Train [18][2370/3239]	Time 0.230 (0.532)	Data Time 0.002 (0.014)	Loss 3.4442 (3.4558)	Entropy 1.71341 (1.71933)	Top-1 acc 44.141 (42.148)	Top-5 acc 64.453 (65.990)	lr 0.02368
Train [18][2380/3239]	Time 0.211 (0.532)	Data Time 0.001 (0.014)	Loss 3.3554 (3.4556)	Entropy 1.71331 (1.71931)	Top-1 acc 46.094 (42.150)	Top-5 acc 65.234 (65.994)	lr 0.02368
Train [18][2390/3239]	Time 0.327 (0.549)	Data Time 0.003 (0.014)	Loss 3.3201 (3.4557)	Entropy 1.71331 (1.71928)	Top-1 acc 42.188 (42.147)	Top-5 acc 67.969 (65.988)	lr 0.02368
Train [18][2400/3239]	Time 0.221 (0.548)	Data Time 0.002 (0.014)	Loss 3.3769 (3.4559)	Entropy 1.71326 (1.71926)	Top-1 acc 42.188 (42.143)	Top-5 acc 66.797 (65.985)	lr 0.02368
Train [18][2410/3239]	Time 0.143 (0.548)	Data Time 0.001 (0.014)	Loss 3.4070 (3.4557)	Entropy 1.71324 (1.71923)	Top-1 acc 41.016 (42.144)	Top-5 acc 67.969 (65.990)	lr 0.02368
Train [18][2420/3239]	Time 0.264 (0.547)	Data Time 0.002 (0.014)	Loss 3.1607 (3.4556)	Entropy 1.71319 (1.71921)	Top-1 acc 51.562 (42.145)	Top-5 acc 74.219 (65.993)	lr 0.02367
Train [18][2430/3239]	Time 2.443 (0.547)	Data Time 0.001 (0.014)	Loss 3.4905 (3.4555)	Entropy 1.71319 (1.71918)	Top-1 acc 41.406 (42.146)	Top-5 acc 68.359 (66.000)	lr 0.02367
Train [18][2440/3239]	Time 0.226 (0.546)	Data Time 0.002 (0.014)	Loss 3.3663 (3.4554)	Entropy 1.71312 (1.71916)	Top-1 acc 42.969 (42.146)	Top-5 acc 70.703 (66.003)	lr 0.02367
Train [18][2450/3239]	Time 0.228 (0.545)	Data Time 0.001 (0.014)	Loss 3.2483 (3.4553)	Entropy 1.71309 (1.71913)	Top-1 acc 48.438 (42.150)	Top-5 acc 69.922 (66.005)	lr 0.02367
Train [18][2460/3239]	Time 0.196 (0.545)	Data Time 0.001 (0.014)	Loss 3.3759 (3.4552)	Entropy 1.71304 (1.71911)	Top-1 acc 44.922 (42.152)	Top-5 acc 69.922 (66.006)	lr 0.02367
Train [18][2470/3239]	Time 0.220 (0.544)	Data Time 0.002 (0.014)	Loss 3.4008 (3.4550)	Entropy 1.71302 (1.71908)	Top-1 acc 42.969 (42.152)	Top-5 acc 68.750 (66.011)	lr 0.02367
Train [18][2480/3239]	Time 0.198 (0.544)	Data Time 0.001 (0.014)	Loss 3.7319 (3.4549)	Entropy 1.71294 (1.71906)	Top-1 acc 33.984 (42.150)	Top-5 acc 56.250 (66.012)	lr 0.02367
Train [18][2490/3239]	Time 0.227 (0.543)	Data Time 0.001 (0.014)	Loss 3.2820 (3.4545)	Entropy 1.71271 (1.71903)	Top-1 acc 46.875 (42.159)	Top-5 acc 70.703 (66.022)	lr 0.02367
Train [18][2500/3239]	Time 0.307 (0.543)	Data Time 0.001 (0.014)	Loss 3.4309 (3.4545)	Entropy 1.71268 (1.71901)	Top-1 acc 41.016 (42.158)	Top-5 acc 66.406 (66.021)	lr 0.02367
Train [18][2510/3239]	Time 0.223 (0.543)	Data Time 0.001 (0.013)	Loss 3.5652 (3.4545)	Entropy 1.71257 (1.71898)	Top-1 acc 38.672 (42.161)	Top-5 acc 64.453 (66.021)	lr 0.02367
Train [18][2520/3239]	Time 0.209 (0.542)	Data Time 0.002 (0.013)	Loss 3.4942 (3.4546)	Entropy 1.71247 (1.71896)	Top-1 acc 40.625 (42.157)	Top-5 acc 68.359 (66.022)	lr 0.02367
Train [18][2530/3239]	Time 0.160 (0.542)	Data Time 0.001 (0.013)	Loss 3.5062 (3.4545)	Entropy 1.71234 (1.71893)	Top-1 acc 41.797 (42.156)	Top-5 acc 64.062 (66.025)	lr 0.02367
Train [18][2540/3239]	Time 2.345 (0.541)	Data Time 0.001 (0.013)	Loss 3.6205 (3.4546)	Entropy 1.71234 (1.71891)	Top-1 acc 36.328 (42.152)	Top-5 acc 62.500 (66.020)	lr 0.02367
Train [18][2550/3239]	Time 0.205 (0.540)	Data Time 0.001 (0.013)	Loss 3.3984 (3.4549)	Entropy 1.71232 (1.71888)	Top-1 acc 44.922 (42.151)	Top-5 acc 68.359 (66.015)	lr 0.02367
Train [18][2560/3239]	Time 0.194 (0.539)	Data Time 0.001 (0.013)	Loss 3.7083 (3.4551)	Entropy 1.71224 (1.71886)	Top-1 acc 37.500 (42.146)	Top-5 acc 61.328 (66.007)	lr 0.02367
Train [18][2570/3239]	Time 0.326 (0.539)	Data Time 0.001 (0.013)	Loss 3.5010 (3.4551)	Entropy 1.71219 (1.71883)	Top-1 acc 42.969 (42.151)	Top-5 acc 63.672 (66.009)	lr 0.02367
Train [18][2580/3239]	Time 0.210 (0.539)	Data Time 0.002 (0.013)	Loss 3.5031 (3.4552)	Entropy 1.71217 (1.71880)	Top-1 acc 39.062 (42.149)	Top-5 acc 64.844 (66.007)	lr 0.02367
Train [18][2590/3239]	Time 0.141 (0.538)	Data Time 0.001 (0.013)	Loss 3.4290 (3.4554)	Entropy 1.71211 (1.71878)	Top-1 acc 43.750 (42.145)	Top-5 acc 64.844 (66.000)	lr 0.02367
Train [18][2600/3239]	Time 0.266 (0.538)	Data Time 0.001 (0.013)	Loss 3.3081 (3.4553)	Entropy 1.71192 (1.71875)	Top-1 acc 48.047 (42.147)	Top-5 acc 67.578 (65.999)	lr 0.02367
Train [18][2610/3239]	Time 0.271 (0.537)	Data Time 0.001 (0.013)	Loss 3.4984 (3.4554)	Entropy 1.71187 (1.71873)	Top-1 acc 39.062 (42.147)	Top-5 acc 67.188 (65.999)	lr 0.02367
Train [18][2620/3239]	Time 0.163 (0.537)	Data Time 0.001 (0.013)	Loss 3.6351 (3.4554)	Entropy 1.71186 (1.71870)	Top-1 acc 41.016 (42.149)	Top-5 acc 65.234 (65.995)	lr 0.02367
Train [18][2630/3239]	Time 0.213 (0.537)	Data Time 0.001 (0.013)	Loss 3.5422 (3.4554)	Entropy 1.71182 (1.71867)	Top-1 acc 42.578 (42.148)	Top-5 acc 62.500 (65.995)	lr 0.02367
Train [18][2640/3239]	Time 0.165 (0.536)	Data Time 0.001 (0.013)	Loss 3.6375 (3.4556)	Entropy 1.71182 (1.71865)	Top-1 acc 39.453 (42.144)	Top-5 acc 59.375 (65.994)	lr 0.02366
Train [18][2650/3239]	Time 0.203 (0.536)	Data Time 0.001 (0.013)	Loss 3.5034 (3.4555)	Entropy 1.71180 (1.71862)	Top-1 acc 43.750 (42.143)	Top-5 acc 64.062 (65.996)	lr 0.02366
Train [18][2660/3239]	Time 0.253 (0.536)	Data Time 0.001 (0.013)	Loss 3.3746 (3.4555)	Entropy 1.71182 (1.71860)	Top-1 acc 40.234 (42.143)	Top-5 acc 66.797 (65.997)	lr 0.02366
Train [18][2670/3239]	Time 0.260 (0.535)	Data Time 0.002 (0.013)	Loss 3.1995 (3.4553)	Entropy 1.71176 (1.71857)	Top-1 acc 51.172 (42.148)	Top-5 acc 73.047 (66.000)	lr 0.02366
Train [18][2680/3239]	Time 0.171 (0.535)	Data Time 0.001 (0.013)	Loss 3.2035 (3.4553)	Entropy 1.71153 (1.71854)	Top-1 acc 46.094 (42.147)	Top-5 acc 73.438 (66.001)	lr 0.02366
Train [18][2690/3239]	Time 0.232 (0.535)	Data Time 0.001 (0.013)	Loss 3.2967 (3.4552)	Entropy 1.71160 (1.71852)	Top-1 acc 46.484 (42.149)	Top-5 acc 69.531 (66.002)	lr 0.02366
Train [18][2700/3239]	Time 0.214 (0.534)	Data Time 0.001 (0.013)	Loss 3.6183 (3.4553)	Entropy 1.71156 (1.71849)	Top-1 acc 39.453 (42.146)	Top-5 acc 62.500 (65.998)	lr 0.02366
Train [18][2710/3239]	Time 0.348 (0.534)	Data Time 0.001 (0.013)	Loss 3.4709 (3.4554)	Entropy 1.71165 (1.71847)	Top-1 acc 44.141 (42.146)	Top-5 acc 66.016 (65.996)	lr 0.02366
Train [18][2720/3239]	Time 0.219 (0.533)	Data Time 0.001 (0.013)	Loss 3.5598 (3.4555)	Entropy 1.71166 (1.71844)	Top-1 acc 41.406 (42.146)	Top-5 acc 61.719 (65.996)	lr 0.02366
Train [18][2730/3239]	Time 0.225 (0.533)	Data Time 0.001 (0.013)	Loss 3.4308 (3.4554)	Entropy 1.71167 (1.71842)	Top-1 acc 40.625 (42.145)	Top-5 acc 64.844 (66.000)	lr 0.02366
Train [18][2740/3239]	Time 0.286 (0.548)	Data Time 0.004 (0.013)	Loss 3.4186 (3.4556)	Entropy 1.71148 (1.71839)	Top-1 acc 39.844 (42.144)	Top-5 acc 63.281 (65.994)	lr 0.02366
Train [18][2750/3239]	Time 0.233 (0.548)	Data Time 0.002 (0.013)	Loss 3.5201 (3.4553)	Entropy 1.71131 (1.71837)	Top-1 acc 43.750 (42.153)	Top-5 acc 64.062 (66.001)	lr 0.02366
Train [18][2760/3239]	Time 0.229 (0.547)	Data Time 0.002 (0.012)	Loss 3.4644 (3.4553)	Entropy 1.71126 (1.71834)	Top-1 acc 37.500 (42.147)	Top-5 acc 65.234 (66.005)	lr 0.02366
Train [18][2770/3239]	Time 0.243 (0.547)	Data Time 0.002 (0.012)	Loss 3.5249 (3.4551)	Entropy 1.71124 (1.71832)	Top-1 acc 39.844 (42.150)	Top-5 acc 63.672 (66.008)	lr 0.02366
Train [18][2780/3239]	Time 0.383 (0.546)	Data Time 0.002 (0.012)	Loss 3.9287 (3.4551)	Entropy 1.71104 (1.71829)	Top-1 acc 34.375 (42.151)	Top-5 acc 56.250 (66.006)	lr 0.02366
Train [18][2790/3239]	Time 0.207 (0.546)	Data Time 0.001 (0.012)	Loss 3.4790 (3.4551)	Entropy 1.71101 (1.71827)	Top-1 acc 40.625 (42.153)	Top-5 acc 66.406 (66.008)	lr 0.02366
Train [18][2800/3239]	Time 0.235 (0.546)	Data Time 0.001 (0.012)	Loss 3.3425 (3.4550)	Entropy 1.71098 (1.71824)	Top-1 acc 42.188 (42.154)	Top-5 acc 68.750 (66.008)	lr 0.02366
Train [18][2810/3239]	Time 0.216 (0.545)	Data Time 0.002 (0.012)	Loss 3.2643 (3.4550)	Entropy 1.71091 (1.71821)	Top-1 acc 46.484 (42.157)	Top-5 acc 69.141 (66.012)	lr 0.02366
Train [18][2820/3239]	Time 0.221 (0.545)	Data Time 0.001 (0.012)	Loss 3.3620 (3.4548)	Entropy 1.71090 (1.71819)	Top-1 acc 44.141 (42.156)	Top-5 acc 66.406 (66.016)	lr 0.02366
Train [18][2830/3239]	Time 0.217 (0.544)	Data Time 0.001 (0.012)	Loss 3.5026 (3.4548)	Entropy 1.71080 (1.71816)	Top-1 acc 41.406 (42.153)	Top-5 acc 64.062 (66.017)	lr 0.02366
Train [18][2840/3239]	Time 0.227 (0.544)	Data Time 0.001 (0.012)	Loss 3.5833 (3.4548)	Entropy 1.71073 (1.71814)	Top-1 acc 38.281 (42.153)	Top-5 acc 66.016 (66.018)	lr 0.02366
Train [18][2850/3239]	Time 0.345 (0.544)	Data Time 0.001 (0.012)	Loss 3.4885 (3.4548)	Entropy 1.71056 (1.71811)	Top-1 acc 42.188 (42.152)	Top-5 acc 67.969 (66.019)	lr 0.02366
Train [18][2860/3239]	Time 0.232 (0.543)	Data Time 0.001 (0.012)	Loss 3.3468 (3.4548)	Entropy 1.71050 (1.71808)	Top-1 acc 44.922 (42.157)	Top-5 acc 69.922 (66.019)	lr 0.02365
Train [18][2870/3239]	Time 0.157 (0.543)	Data Time 0.001 (0.012)	Loss 3.4764 (3.4548)	Entropy 1.71049 (1.71806)	Top-1 acc 43.359 (42.151)	Top-5 acc 67.188 (66.017)	lr 0.02365
Train [18][2880/3239]	Time 0.261 (0.543)	Data Time 0.001 (0.012)	Loss 3.4233 (3.4548)	Entropy 1.71029 (1.71803)	Top-1 acc 42.188 (42.152)	Top-5 acc 67.188 (66.015)	lr 0.02365
Train [18][2890/3239]	Time 0.235 (0.542)	Data Time 0.001 (0.012)	Loss 3.3027 (3.4547)	Entropy 1.71024 (1.71800)	Top-1 acc 50.000 (42.154)	Top-5 acc 67.969 (66.015)	lr 0.02365
Train [18][2900/3239]	Time 0.215 (0.542)	Data Time 0.001 (0.012)	Loss 3.6168 (3.4550)	Entropy 1.70992 (1.71798)	Top-1 acc 40.625 (42.149)	Top-5 acc 65.234 (66.009)	lr 0.02365
Train [18][2910/3239]	Time 0.321 (0.542)	Data Time 0.001 (0.012)	Loss 3.6658 (3.4550)	Entropy 1.70992 (1.71795)	Top-1 acc 36.719 (42.149)	Top-5 acc 62.891 (66.010)	lr 0.02365
Train [18][2920/3239]	Time 0.195 (0.541)	Data Time 0.001 (0.012)	Loss 3.3293 (3.4549)	Entropy 1.70989 (1.71792)	Top-1 acc 46.094 (42.151)	Top-5 acc 69.922 (66.015)	lr 0.02365
Train [18][2930/3239]	Time 0.239 (0.541)	Data Time 0.001 (0.012)	Loss 3.4839 (3.4549)	Entropy 1.70979 (1.71789)	Top-1 acc 38.672 (42.151)	Top-5 acc 62.891 (66.015)	lr 0.02365
Train [18][2940/3239]	Time 0.248 (0.540)	Data Time 0.001 (0.012)	Loss 3.5654 (3.4549)	Entropy 1.70971 (1.71787)	Top-1 acc 42.969 (42.150)	Top-5 acc 65.625 (66.017)	lr 0.02365
Train [18][2950/3239]	Time 0.224 (0.540)	Data Time 0.001 (0.012)	Loss 3.5809 (3.4550)	Entropy 1.70959 (1.71784)	Top-1 acc 39.062 (42.151)	Top-5 acc 63.672 (66.017)	lr 0.02365
Train [18][2960/3239]	Time 0.244 (0.540)	Data Time 0.001 (0.012)	Loss 3.2978 (3.4550)	Entropy 1.70952 (1.71781)	Top-1 acc 43.359 (42.152)	Top-5 acc 69.922 (66.016)	lr 0.02365
Train [18][2970/3239]	Time 0.241 (0.539)	Data Time 0.001 (0.012)	Loss 3.4789 (3.4550)	Entropy 1.70941 (1.71778)	Top-1 acc 42.188 (42.150)	Top-5 acc 66.406 (66.014)	lr 0.02365
Train [18][2980/3239]	Time 0.341 (0.539)	Data Time 0.001 (0.012)	Loss 3.3414 (3.4551)	Entropy 1.70940 (1.71776)	Top-1 acc 43.750 (42.147)	Top-5 acc 71.094 (66.013)	lr 0.02365
Train [18][2990/3239]	Time 0.217 (0.539)	Data Time 0.001 (0.012)	Loss 3.6109 (3.4552)	Entropy 1.70932 (1.71773)	Top-1 acc 38.281 (42.143)	Top-5 acc 60.938 (66.011)	lr 0.02365
Train [18][3000/3239]	Time 0.213 (0.538)	Data Time 0.001 (0.012)	Loss 3.3188 (3.4551)	Entropy 1.70920 (1.71770)	Top-1 acc 44.922 (42.141)	Top-5 acc 69.531 (66.011)	lr 0.02365
Train [18][3010/3239]	Time 0.224 (0.538)	Data Time 0.001 (0.012)	Loss 3.6143 (3.4551)	Entropy 1.70917 (1.71767)	Top-1 acc 38.672 (42.143)	Top-5 acc 62.891 (66.012)	lr 0.02365
Train [18][3020/3239]	Time 0.247 (0.538)	Data Time 0.001 (0.012)	Loss 3.4730 (3.4550)	Entropy 1.70908 (1.71764)	Top-1 acc 41.797 (42.144)	Top-5 acc 64.062 (66.014)	lr 0.02365
Train [18][3030/3239]	Time 0.268 (0.537)	Data Time 0.001 (0.012)	Loss 3.4633 (3.4550)	Entropy 1.70906 (1.71761)	Top-1 acc 43.750 (42.144)	Top-5 acc 67.969 (66.013)	lr 0.02365
Train [18][3040/3239]	Time 0.201 (0.537)	Data Time 0.001 (0.012)	Loss 3.7321 (3.4550)	Entropy 1.70900 (1.71759)	Top-1 acc 32.422 (42.138)	Top-5 acc 58.203 (66.013)	lr 0.02365
Train [18][3050/3239]	Time 0.351 (0.536)	Data Time 0.002 (0.011)	Loss 3.4851 (3.4550)	Entropy 1.70896 (1.71756)	Top-1 acc 42.578 (42.138)	Top-5 acc 62.109 (66.012)	lr 0.02365
Train [18][3060/3239]	Time 0.230 (0.536)	Data Time 0.002 (0.011)	Loss 3.5011 (3.4551)	Entropy 1.70889 (1.71753)	Top-1 acc 37.891 (42.135)	Top-5 acc 62.109 (66.011)	lr 0.02365
Train [18][3070/3239]	Time 0.278 (0.550)	Data Time 0.004 (0.011)	Loss 3.5738 (3.4553)	Entropy 1.70884 (1.71750)	Top-1 acc 40.625 (42.133)	Top-5 acc 58.984 (66.005)	lr 0.02365
Train [18][3080/3239]	Time 0.229 (0.549)	Data Time 0.002 (0.011)	Loss 3.4503 (3.4553)	Entropy 1.70872 (1.71747)	Top-1 acc 41.016 (42.133)	Top-5 acc 64.453 (66.005)	lr 0.02364
Train [18][3090/3239]	Time 0.234 (0.549)	Data Time 0.002 (0.011)	Loss 3.5249 (3.4552)	Entropy 1.70864 (1.71745)	Top-1 acc 39.062 (42.135)	Top-5 acc 64.062 (66.008)	lr 0.02364
Train [18][3100/3239]	Time 0.230 (0.549)	Data Time 0.002 (0.011)	Loss 3.3374 (3.4551)	Entropy 1.70865 (1.71742)	Top-1 acc 44.531 (42.133)	Top-5 acc 69.922 (66.012)	lr 0.02364
Train [18][3110/3239]	Time 0.200 (0.548)	Data Time 0.001 (0.011)	Loss 3.4200 (3.4550)	Entropy 1.70833 (1.71739)	Top-1 acc 39.844 (42.135)	Top-5 acc 66.016 (66.013)	lr 0.02364
Train [18][3120/3239]	Time 0.264 (0.548)	Data Time 0.001 (0.011)	Loss 3.5541 (3.4550)	Entropy 1.70828 (1.71736)	Top-1 acc 39.844 (42.136)	Top-5 acc 67.578 (66.017)	lr 0.02364
Train [18][3130/3239]	Time 0.231 (0.547)	Data Time 0.002 (0.011)	Loss 3.5032 (3.4552)	Entropy 1.70827 (1.71733)	Top-1 acc 39.453 (42.131)	Top-5 acc 65.625 (66.014)	lr 0.02364
Train [18][3140/3239]	Time 0.244 (0.547)	Data Time 0.001 (0.011)	Loss 3.4226 (3.4550)	Entropy 1.70812 (1.71730)	Top-1 acc 43.750 (42.136)	Top-5 acc 66.797 (66.017)	lr 0.02364
Train [18][3150/3239]	Time 0.208 (0.547)	Data Time 0.001 (0.011)	Loss 3.4191 (3.4549)	Entropy 1.70789 (1.71727)	Top-1 acc 42.578 (42.140)	Top-5 acc 64.453 (66.022)	lr 0.02364
Train [18][3160/3239]	Time 0.205 (0.546)	Data Time 0.001 (0.011)	Loss 3.5405 (3.4550)	Entropy 1.70785 (1.71724)	Top-1 acc 38.672 (42.138)	Top-5 acc 61.719 (66.017)	lr 0.02364
Train [18][3170/3239]	Time 0.250 (0.546)	Data Time 0.001 (0.011)	Loss 3.4112 (3.4549)	Entropy 1.70782 (1.71721)	Top-1 acc 42.969 (42.139)	Top-5 acc 68.750 (66.022)	lr 0.02364
Train [18][3180/3239]	Time 0.215 (0.546)	Data Time 0.000 (0.011)	Loss 3.4371 (3.4549)	Entropy 1.70766 (1.71718)	Top-1 acc 47.656 (42.137)	Top-5 acc 66.406 (66.022)	lr 0.02364
Train [18][3190/3239]	Time 0.252 (0.545)	Data Time 0.000 (0.011)	Loss 3.4099 (3.4549)	Entropy 1.70753 (1.71715)	Top-1 acc 38.672 (42.138)	Top-5 acc 67.578 (66.022)	lr 0.02364
Train [18][3200/3239]	Time 0.217 (0.545)	Data Time 0.000 (0.011)	Loss 3.6198 (3.4549)	Entropy 1.70739 (1.71712)	Top-1 acc 41.016 (42.136)	Top-5 acc 60.938 (66.024)	lr 0.02364
Train [18][3210/3239]	Time 0.217 (0.544)	Data Time 0.000 (0.011)	Loss 3.4340 (3.4549)	Entropy 1.70734 (1.71709)	Top-1 acc 41.797 (42.138)	Top-5 acc 65.625 (66.025)	lr 0.02364
Train [18][3220/3239]	Time 0.211 (0.544)	Data Time 0.000 (0.011)	Loss 3.6051 (3.4549)	Entropy 1.70736 (1.71706)	Top-1 acc 37.109 (42.139)	Top-5 acc 65.234 (66.023)	lr 0.02364
Train [18][3230/3239]	Time 0.209 (0.544)	Data Time 0.000 (0.011)	Loss 3.4082 (3.4549)	Entropy 1.70743 (1.71703)	Top-1 acc 45.703 (42.139)	Top-5 acc 66.016 (66.023)	lr 0.02364
Train [18][3239/3239]	Time 2.105 (0.543)	Data Time 0.000 (0.011)	Loss 3.4188 (3.4548)	Entropy 1.70743 (1.71701)	Top-1 acc 41.975 (42.142)	Top-5 acc 64.198 (66.024)	lr 0.02364
==========Valid [18/120]	loss 2.235	top-1 acc 51.415 (51.415)	top-5 acc 75.113	Train top-1 42.142	top-5 66.024	Entropy 1.70743	Latency-None: 0.000ms	Flops: 567.75M
Train [19][0/3239]	Time 29.606 (29.606)	Data Time 27.935 (27.935)	Loss 3.4459 (3.4459)	Entropy 1.70741 (1.70741)	Top-1 acc 39.844 (39.844)	Top-5 acc 65.234 (65.234)	lr 0.02364
Train [19][10/3239]	Time 2.739 (3.237)	Data Time 0.034 (2.544)	Loss 3.4229 (3.3677)	Entropy 1.70741 (1.70741)	Top-1 acc 41.016 (43.288)	Top-5 acc 67.578 (67.116)	lr 0.02364
Train [19][20/3239]	Time 0.257 (1.805)	Data Time 0.001 (1.333)	Loss 3.5466 (3.3972)	Entropy 1.70735 (1.70738)	Top-1 acc 41.016 (42.615)	Top-5 acc 65.234 (66.797)	lr 0.02364
Train [19][30/3239]	Time 0.313 (1.365)	Data Time 0.001 (0.904)	Loss 3.3526 (3.4274)	Entropy 1.70733 (1.70737)	Top-1 acc 48.047 (42.162)	Top-5 acc 69.141 (66.331)	lr 0.02364
Train [19][40/3239]	Time 0.220 (1.137)	Data Time 0.001 (0.684)	Loss 3.5747 (3.4368)	Entropy 1.70727 (1.70734)	Top-1 acc 39.062 (42.006)	Top-5 acc 63.672 (66.368)	lr 0.02364
Train [19][50/3239]	Time 0.216 (0.999)	Data Time 0.001 (0.550)	Loss 3.5587 (3.4311)	Entropy 1.70722 (1.70732)	Top-1 acc 38.281 (42.134)	Top-5 acc 66.016 (66.491)	lr 0.02364
Train [19][60/3239]	Time 0.266 (0.909)	Data Time 0.001 (0.460)	Loss 3.4157 (3.4334)	Entropy 1.70721 (1.70730)	Top-1 acc 40.234 (42.239)	Top-5 acc 67.188 (66.374)	lr 0.02363
Train [19][70/3239]	Time 0.209 (0.843)	Data Time 0.001 (0.396)	Loss 3.4966 (3.4383)	Entropy 1.70707 (1.70728)	Top-1 acc 39.844 (42.143)	Top-5 acc 64.844 (66.318)	lr 0.02363
Train [19][80/3239]	Time 0.220 (0.794)	Data Time 0.001 (0.347)	Loss 3.0977 (3.4346)	Entropy 1.70704 (1.70725)	Top-1 acc 52.734 (42.168)	Top-5 acc 75.000 (66.426)	lr 0.02363
Train [19][90/3239]	Time 0.222 (0.756)	Data Time 0.001 (0.309)	Loss 3.2971 (3.4248)	Entropy 1.70701 (1.70723)	Top-1 acc 44.141 (42.462)	Top-5 acc 69.141 (66.608)	lr 0.02363
Train [19][100/3239]	Time 0.251 (0.725)	Data Time 0.001 (0.279)	Loss 3.1773 (3.4276)	Entropy 1.70700 (1.70721)	Top-1 acc 45.312 (42.567)	Top-5 acc 75.391 (66.603)	lr 0.02363
Train [19][110/3239]	Time 0.256 (0.700)	Data Time 0.001 (0.254)	Loss 3.3699 (3.4251)	Entropy 1.70688 (1.70719)	Top-1 acc 48.047 (42.775)	Top-5 acc 68.750 (66.639)	lr 0.02363
Train [19][120/3239]	Time 2.310 (0.679)	Data Time 0.002 (0.233)	Loss 3.4153 (3.4249)	Entropy 1.70688 (1.70716)	Top-1 acc 45.312 (42.765)	Top-5 acc 64.062 (66.645)	lr 0.02363
Train [19][130/3239]	Time 0.222 (0.644)	Data Time 0.001 (0.216)	Loss 3.4939 (3.4254)	Entropy 1.70689 (1.70714)	Top-1 acc 42.969 (42.808)	Top-5 acc 66.016 (66.704)	lr 0.02363
Train [19][140/3239]	Time 0.211 (0.630)	Data Time 0.001 (0.201)	Loss 3.5311 (3.4253)	Entropy 1.70678 (1.70712)	Top-1 acc 41.797 (42.833)	Top-5 acc 66.016 (66.766)	lr 0.02363
Train [19][150/3239]	Time 0.214 (0.618)	Data Time 0.001 (0.187)	Loss 3.0997 (3.4241)	Entropy 1.70668 (1.70709)	Top-1 acc 50.000 (42.834)	Top-5 acc 71.094 (66.758)	lr 0.02363
Train [19][160/3239]	Time 0.226 (0.608)	Data Time 0.001 (0.176)	Loss 3.5094 (3.4202)	Entropy 1.70664 (1.70706)	Top-1 acc 39.844 (42.913)	Top-5 acc 66.797 (66.799)	lr 0.02363
Train [19][170/3239]	Time 0.211 (0.598)	Data Time 0.001 (0.166)	Loss 3.4937 (3.4191)	Entropy 1.70655 (1.70703)	Top-1 acc 42.969 (42.941)	Top-5 acc 65.625 (66.836)	lr 0.02363
Train [19][180/3239]	Time 0.322 (0.811)	Data Time 0.003 (0.157)	Loss 3.7622 (3.4200)	Entropy 1.70653 (1.70701)	Top-1 acc 32.422 (42.865)	Top-5 acc 60.156 (66.818)	lr 0.02363
Train [19][190/3239]	Time 0.245 (0.795)	Data Time 0.002 (0.149)	Loss 3.6488 (3.4184)	Entropy 1.70638 (1.70698)	Top-1 acc 40.234 (42.946)	Top-5 acc 61.328 (66.791)	lr 0.02363
Train [19][200/3239]	Time 0.256 (0.778)	Data Time 0.002 (0.141)	Loss 3.4372 (3.4179)	Entropy 1.70630 (1.70695)	Top-1 acc 44.141 (42.961)	Top-5 acc 67.969 (66.799)	lr 0.02363
Train [19][210/3239]	Time 0.239 (0.761)	Data Time 0.002 (0.135)	Loss 3.5510 (3.4184)	Entropy 1.70628 (1.70692)	Top-1 acc 39.453 (42.943)	Top-5 acc 63.672 (66.801)	lr 0.02363
Train [19][220/3239]	Time 0.210 (0.747)	Data Time 0.003 (0.129)	Loss 3.3547 (3.4165)	Entropy 1.70634 (1.70689)	Top-1 acc 41.406 (42.956)	Top-5 acc 71.094 (66.852)	lr 0.02363
Train [19][230/3239]	Time 2.341 (0.734)	Data Time 0.002 (0.123)	Loss 3.3485 (3.4141)	Entropy 1.70634 (1.70686)	Top-1 acc 45.312 (43.016)	Top-5 acc 68.750 (66.917)	lr 0.02363
Train [19][240/3239]	Time 0.310 (0.713)	Data Time 0.001 (0.118)	Loss 3.4889 (3.4134)	Entropy 1.70631 (1.70684)	Top-1 acc 44.922 (43.072)	Top-5 acc 66.797 (66.881)	lr 0.02363
Train [19][250/3239]	Time 0.243 (0.702)	Data Time 0.001 (0.114)	Loss 3.4065 (3.4140)	Entropy 1.70624 (1.70682)	Top-1 acc 45.312 (43.061)	Top-5 acc 68.750 (66.882)	lr 0.02363
Train [19][260/3239]	Time 0.267 (0.692)	Data Time 0.002 (0.109)	Loss 3.5074 (3.4145)	Entropy 1.70625 (1.70679)	Top-1 acc 40.625 (43.081)	Top-5 acc 64.844 (66.851)	lr 0.02363
Train [19][270/3239]	Time 0.187 (0.683)	Data Time 0.001 (0.105)	Loss 3.3330 (3.4154)	Entropy 1.70620 (1.70677)	Top-1 acc 41.406 (43.060)	Top-5 acc 71.484 (66.850)	lr 0.02363
Train [19][280/3239]	Time 0.215 (0.674)	Data Time 0.001 (0.102)	Loss 3.3390 (3.4138)	Entropy 1.70617 (1.70675)	Top-1 acc 44.141 (43.081)	Top-5 acc 70.703 (66.918)	lr 0.02362
Train [19][290/3239]	Time 0.233 (0.666)	Data Time 0.002 (0.098)	Loss 3.4071 (3.4156)	Entropy 1.70605 (1.70673)	Top-1 acc 41.406 (43.013)	Top-5 acc 66.016 (66.867)	lr 0.02362
Train [19][300/3239]	Time 0.293 (0.658)	Data Time 0.001 (0.095)	Loss 3.4596 (3.4164)	Entropy 1.70600 (1.70671)	Top-1 acc 39.453 (42.983)	Top-5 acc 66.016 (66.867)	lr 0.02362
Train [19][310/3239]	Time 0.186 (0.651)	Data Time 0.001 (0.092)	Loss 3.3344 (3.4161)	Entropy 1.70599 (1.70668)	Top-1 acc 47.266 (42.981)	Top-5 acc 70.312 (66.902)	lr 0.02362
Train [19][320/3239]	Time 0.229 (0.645)	Data Time 0.001 (0.089)	Loss 3.1797 (3.4147)	Entropy 1.70597 (1.70666)	Top-1 acc 46.484 (43.015)	Top-5 acc 69.922 (66.908)	lr 0.02362
Train [19][330/3239]	Time 0.240 (0.638)	Data Time 0.001 (0.087)	Loss 3.4237 (3.4157)	Entropy 1.70590 (1.70664)	Top-1 acc 39.453 (42.976)	Top-5 acc 64.453 (66.888)	lr 0.02362
Train [19][340/3239]	Time 2.349 (0.632)	Data Time 0.001 (0.084)	Loss 3.2541 (3.4146)	Entropy 1.70590 (1.70662)	Top-1 acc 47.266 (43.005)	Top-5 acc 69.531 (66.876)	lr 0.02362
Train [19][350/3239]	Time 0.172 (0.620)	Data Time 0.001 (0.082)	Loss 3.4657 (3.4131)	Entropy 1.70581 (1.70660)	Top-1 acc 40.625 (43.021)	Top-5 acc 62.891 (66.895)	lr 0.02362
Train [19][360/3239]	Time 0.255 (0.615)	Data Time 0.001 (0.080)	Loss 3.4089 (3.4113)	Entropy 1.70571 (1.70657)	Top-1 acc 40.625 (43.067)	Top-5 acc 69.531 (66.969)	lr 0.02362
Train [19][370/3239]	Time 0.225 (0.610)	Data Time 0.001 (0.078)	Loss 3.4480 (3.4121)	Entropy 1.70561 (1.70655)	Top-1 acc 39.062 (43.026)	Top-5 acc 68.359 (66.948)	lr 0.02362
Train [19][380/3239]	Time 0.169 (0.606)	Data Time 0.001 (0.076)	Loss 3.4437 (3.4123)	Entropy 1.70553 (1.70652)	Top-1 acc 37.891 (42.994)	Top-5 acc 66.406 (66.936)	lr 0.02362
Train [19][390/3239]	Time 0.218 (0.601)	Data Time 0.001 (0.074)	Loss 3.3835 (3.4136)	Entropy 1.70551 (1.70649)	Top-1 acc 43.750 (42.981)	Top-5 acc 66.406 (66.927)	lr 0.02362
Train [19][400/3239]	Time 0.175 (0.597)	Data Time 0.002 (0.072)	Loss 3.3230 (3.4130)	Entropy 1.70550 (1.70647)	Top-1 acc 42.969 (43.013)	Top-5 acc 70.312 (66.941)	lr 0.02362
Train [19][410/3239]	Time 0.208 (0.594)	Data Time 0.001 (0.070)	Loss 3.2193 (3.4124)	Entropy 1.70539 (1.70645)	Top-1 acc 45.312 (43.022)	Top-5 acc 71.094 (66.949)	lr 0.02362
Train [19][420/3239]	Time 0.208 (0.590)	Data Time 0.001 (0.069)	Loss 3.2854 (3.4130)	Entropy 1.70514 (1.70642)	Top-1 acc 47.266 (42.997)	Top-5 acc 68.750 (66.932)	lr 0.02362
Train [19][430/3239]	Time 0.206 (0.586)	Data Time 0.001 (0.067)	Loss 3.4938 (3.4131)	Entropy 1.70506 (1.70639)	Top-1 acc 41.406 (43.020)	Top-5 acc 65.625 (66.916)	lr 0.02362
Train [19][440/3239]	Time 0.311 (0.583)	Data Time 0.002 (0.066)	Loss 3.7780 (3.4152)	Entropy 1.70503 (1.70636)	Top-1 acc 35.938 (42.983)	Top-5 acc 62.109 (66.870)	lr 0.02362
Train [19][450/3239]	Time 2.316 (0.580)	Data Time 0.002 (0.064)	Loss 3.5224 (3.4130)	Entropy 1.70503 (1.70633)	Top-1 acc 39.844 (43.019)	Top-5 acc 66.406 (66.905)	lr 0.02362
Train [19][460/3239]	Time 0.168 (0.572)	Data Time 0.002 (0.063)	Loss 3.4730 (3.4120)	Entropy 1.70494 (1.70630)	Top-1 acc 42.188 (43.036)	Top-5 acc 64.453 (66.916)	lr 0.02362
Train [19][470/3239]	Time 0.169 (0.569)	Data Time 0.001 (0.062)	Loss 3.4613 (3.4137)	Entropy 1.70490 (1.70627)	Top-1 acc 39.844 (42.985)	Top-5 acc 62.891 (66.876)	lr 0.02362
Train [19][480/3239]	Time 0.247 (0.566)	Data Time 0.001 (0.061)	Loss 3.5046 (3.4153)	Entropy 1.70487 (1.70624)	Top-1 acc 41.016 (42.954)	Top-5 acc 66.406 (66.850)	lr 0.02362
Train [19][490/3239]	Time 0.213 (0.564)	Data Time 0.001 (0.059)	Loss 3.2599 (3.4155)	Entropy 1.70484 (1.70621)	Top-1 acc 44.922 (42.962)	Top-5 acc 71.484 (66.825)	lr 0.02362
Train [19][500/3239]	Time 0.166 (0.562)	Data Time 0.001 (0.058)	Loss 3.4625 (3.4160)	Entropy 1.70474 (1.70618)	Top-1 acc 37.891 (42.952)	Top-5 acc 64.844 (66.823)	lr 0.02361
Train [19][510/3239]	Time 0.157 (0.559)	Data Time 0.001 (0.057)	Loss 3.3211 (3.4154)	Entropy 1.70467 (1.70615)	Top-1 acc 44.922 (42.968)	Top-5 acc 66.797 (66.843)	lr 0.02361
Train [19][520/3239]	Time 0.217 (0.557)	Data Time 0.001 (0.056)	Loss 3.2833 (3.4158)	Entropy 1.70467 (1.70613)	Top-1 acc 44.531 (42.967)	Top-5 acc 70.703 (66.828)	lr 0.02361
Train [19][530/3239]	Time 0.194 (0.554)	Data Time 0.001 (0.055)	Loss 3.5134 (3.4155)	Entropy 1.70462 (1.70610)	Top-1 acc 42.188 (42.982)	Top-5 acc 64.844 (66.834)	lr 0.02361
Train [19][540/3239]	Time 0.328 (0.626)	Data Time 0.002 (0.054)	Loss 3.4470 (3.4161)	Entropy 1.70449 (1.70607)	Top-1 acc 40.234 (42.959)	Top-5 acc 63.281 (66.804)	lr 0.02361
Train [19][550/3239]	Time 0.220 (0.624)	Data Time 0.002 (0.053)	Loss 3.4355 (3.4166)	Entropy 1.70447 (1.70604)	Top-1 acc 43.359 (42.950)	Top-5 acc 65.234 (66.778)	lr 0.02361
Train [19][560/3239]	Time 2.423 (0.620)	Data Time 0.002 (0.052)	Loss 3.2900 (3.4173)	Entropy 1.70447 (1.70601)	Top-1 acc 46.875 (42.962)	Top-5 acc 71.094 (66.773)	lr 0.02361
Train [19][570/3239]	Time 0.208 (0.613)	Data Time 0.001 (0.051)	Loss 3.4749 (3.4179)	Entropy 1.70441 (1.70599)	Top-1 acc 43.359 (42.939)	Top-5 acc 67.578 (66.754)	lr 0.02361
Train [19][580/3239]	Time 0.254 (0.610)	Data Time 0.002 (0.051)	Loss 3.4812 (3.4183)	Entropy 1.70441 (1.70596)	Top-1 acc 41.797 (42.924)	Top-5 acc 67.188 (66.746)	lr 0.02361
Train [19][590/3239]	Time 0.211 (0.607)	Data Time 0.001 (0.050)	Loss 3.3893 (3.4181)	Entropy 1.70439 (1.70593)	Top-1 acc 41.797 (42.926)	Top-5 acc 66.406 (66.744)	lr 0.02361
Train [19][600/3239]	Time 0.214 (0.604)	Data Time 0.001 (0.049)	Loss 3.2841 (3.4178)	Entropy 1.70431 (1.70590)	Top-1 acc 44.531 (42.935)	Top-5 acc 71.484 (66.744)	lr 0.02361
Train [19][610/3239]	Time 0.224 (0.602)	Data Time 0.001 (0.048)	Loss 3.5302 (3.4173)	Entropy 1.70426 (1.70588)	Top-1 acc 35.938 (42.932)	Top-5 acc 65.234 (66.759)	lr 0.02361
Train [19][620/3239]	Time 0.190 (0.599)	Data Time 0.001 (0.047)	Loss 3.2825 (3.4168)	Entropy 1.70419 (1.70585)	Top-1 acc 49.219 (42.956)	Top-5 acc 69.922 (66.769)	lr 0.02361
Train [19][630/3239]	Time 0.301 (0.597)	Data Time 0.002 (0.047)	Loss 3.4566 (3.4173)	Entropy 1.70417 (1.70583)	Top-1 acc 41.406 (42.954)	Top-5 acc 66.016 (66.765)	lr 0.02361
Train [19][640/3239]	Time 0.215 (0.594)	Data Time 0.001 (0.046)	Loss 3.4876 (3.4169)	Entropy 1.70408 (1.70580)	Top-1 acc 37.891 (42.955)	Top-5 acc 64.062 (66.763)	lr 0.02361
Train [19][650/3239]	Time 0.213 (0.592)	Data Time 0.001 (0.045)	Loss 3.3808 (3.4169)	Entropy 1.70401 (1.70577)	Top-1 acc 42.578 (42.946)	Top-5 acc 65.234 (66.750)	lr 0.02361
Train [19][660/3239]	Time 0.219 (0.589)	Data Time 0.001 (0.045)	Loss 3.5776 (3.4174)	Entropy 1.70396 (1.70575)	Top-1 acc 42.188 (42.962)	Top-5 acc 66.016 (66.742)	lr 0.02361
Train [19][670/3239]	Time 2.219 (0.586)	Data Time 0.001 (0.044)	Loss 3.3868 (3.4173)	Entropy 1.70396 (1.70572)	Top-1 acc 42.188 (42.966)	Top-5 acc 68.750 (66.738)	lr 0.02361
Train [19][680/3239]	Time 0.200 (0.581)	Data Time 0.001 (0.043)	Loss 3.4530 (3.4171)	Entropy 1.70397 (1.70569)	Top-1 acc 43.359 (42.985)	Top-5 acc 66.797 (66.754)	lr 0.02361
Train [19][690/3239]	Time 0.184 (0.579)	Data Time 0.001 (0.043)	Loss 3.3610 (3.4171)	Entropy 1.70392 (1.70567)	Top-1 acc 45.312 (43.004)	Top-5 acc 67.188 (66.745)	lr 0.02361
Train [19][700/3239]	Time 0.184 (0.577)	Data Time 0.001 (0.042)	Loss 3.6214 (3.4171)	Entropy 1.70377 (1.70564)	Top-1 acc 38.672 (43.020)	Top-5 acc 60.938 (66.741)	lr 0.02361
Train [19][710/3239]	Time 0.224 (0.575)	Data Time 0.001 (0.042)	Loss 3.4728 (3.4163)	Entropy 1.70364 (1.70561)	Top-1 acc 41.016 (43.021)	Top-5 acc 64.844 (66.757)	lr 0.02360
Train [19][720/3239]	Time 0.232 (0.572)	Data Time 0.001 (0.041)	Loss 3.4081 (3.4168)	Entropy 1.70359 (1.70559)	Top-1 acc 41.797 (43.022)	Top-5 acc 69.141 (66.748)	lr 0.02360
Train [19][730/3239]	Time 0.199 (0.571)	Data Time 0.001 (0.041)	Loss 3.6412 (3.4172)	Entropy 1.70358 (1.70556)	Top-1 acc 39.062 (42.992)	Top-5 acc 61.328 (66.733)	lr 0.02360
Train [19][740/3239]	Time 0.168 (0.569)	Data Time 0.001 (0.040)	Loss 3.4545 (3.4173)	Entropy 1.70353 (1.70553)	Top-1 acc 42.969 (42.991)	Top-5 acc 65.625 (66.730)	lr 0.02360
Train [19][750/3239]	Time 0.193 (0.567)	Data Time 0.002 (0.040)	Loss 3.5634 (3.4180)	Entropy 1.70348 (1.70550)	Top-1 acc 42.578 (42.985)	Top-5 acc 64.844 (66.715)	lr 0.02360
Train [19][760/3239]	Time 0.226 (0.565)	Data Time 0.001 (0.039)	Loss 3.2505 (3.4185)	Entropy 1.70340 (1.70548)	Top-1 acc 47.266 (42.975)	Top-5 acc 68.750 (66.710)	lr 0.02360
Train [19][770/3239]	Time 0.243 (0.563)	Data Time 0.001 (0.039)	Loss 3.3150 (3.4183)	Entropy 1.70334 (1.70545)	Top-1 acc 46.094 (42.974)	Top-5 acc 67.969 (66.714)	lr 0.02360
Train [19][780/3239]	Time 2.400 (0.561)	Data Time 0.002 (0.038)	Loss 3.3932 (3.4174)	Entropy 1.70334 (1.70542)	Top-1 acc 44.141 (42.992)	Top-5 acc 66.406 (66.741)	lr 0.02360
Train [19][790/3239]	Time 0.217 (0.557)	Data Time 0.001 (0.038)	Loss 3.5062 (3.4176)	Entropy 1.70328 (1.70540)	Top-1 acc 44.141 (42.965)	Top-5 acc 67.188 (66.746)	lr 0.02360
Train [19][800/3239]	Time 0.210 (0.555)	Data Time 0.001 (0.037)	Loss 3.4895 (3.4179)	Entropy 1.70322 (1.70537)	Top-1 acc 38.281 (42.956)	Top-5 acc 65.234 (66.735)	lr 0.02360
Train [19][810/3239]	Time 0.217 (0.554)	Data Time 0.001 (0.037)	Loss 3.4208 (3.4185)	Entropy 1.70316 (1.70534)	Top-1 acc 41.406 (42.934)	Top-5 acc 63.672 (66.726)	lr 0.02360
Train [19][820/3239]	Time 0.202 (0.553)	Data Time 0.001 (0.036)	Loss 3.5373 (3.4178)	Entropy 1.70309 (1.70532)	Top-1 acc 40.625 (42.933)	Top-5 acc 62.500 (66.736)	lr 0.02360
Train [19][830/3239]	Time 0.263 (0.551)	Data Time 0.001 (0.036)	Loss 3.4055 (3.4169)	Entropy 1.70306 (1.70529)	Top-1 acc 40.625 (42.944)	Top-5 acc 69.531 (66.764)	lr 0.02360
Train [19][840/3239]	Time 0.218 (0.550)	Data Time 0.001 (0.035)	Loss 3.4402 (3.4176)	Entropy 1.70300 (1.70526)	Top-1 acc 41.016 (42.933)	Top-5 acc 67.188 (66.747)	lr 0.02360
Train [19][850/3239]	Time 0.213 (0.549)	Data Time 0.001 (0.035)	Loss 3.4528 (3.4172)	Entropy 1.70298 (1.70523)	Top-1 acc 42.188 (42.938)	Top-5 acc 67.188 (66.758)	lr 0.02360
Train [19][860/3239]	Time 0.211 (0.547)	Data Time 0.001 (0.035)	Loss 3.6959 (3.4175)	Entropy 1.70286 (1.70521)	Top-1 acc 37.891 (42.921)	Top-5 acc 62.500 (66.756)	lr 0.02360
Train [19][870/3239]	Time 0.260 (0.546)	Data Time 0.001 (0.034)	Loss 3.3783 (3.4178)	Entropy 1.70266 (1.70518)	Top-1 acc 42.578 (42.919)	Top-5 acc 69.141 (66.751)	lr 0.02360
Train [19][880/3239]	Time 0.250 (0.546)	Data Time 0.002 (0.034)	Loss 3.4700 (3.4177)	Entropy 1.70268 (1.70515)	Top-1 acc 38.281 (42.909)	Top-5 acc 65.234 (66.744)	lr 0.02360
Train [19][890/3239]	Time 2.605 (0.545)	Data Time 0.002 (0.034)	Loss 3.3383 (3.4180)	Entropy 1.70268 (1.70512)	Top-1 acc 41.406 (42.899)	Top-5 acc 66.797 (66.741)	lr 0.02360
Train [19][900/3239]	Time 0.259 (0.542)	Data Time 0.001 (0.033)	Loss 3.4612 (3.4190)	Entropy 1.70268 (1.70510)	Top-1 acc 42.188 (42.879)	Top-5 acc 64.453 (66.714)	lr 0.02360
Train [19][910/3239]	Time 0.319 (0.587)	Data Time 0.002 (0.033)	Loss 3.4697 (3.4190)	Entropy 1.70262 (1.70507)	Top-1 acc 41.797 (42.877)	Top-5 acc 64.062 (66.722)	lr 0.02360
Train [19][920/3239]	Time 0.218 (0.586)	Data Time 0.002 (0.033)	Loss 3.2784 (3.4185)	Entropy 1.70260 (1.70504)	Top-1 acc 49.609 (42.900)	Top-5 acc 70.703 (66.746)	lr 0.02360
Train [19][930/3239]	Time 0.192 (0.584)	Data Time 0.002 (0.032)	Loss 3.4212 (3.4184)	Entropy 1.70251 (1.70502)	Top-1 acc 40.625 (42.898)	Top-5 acc 67.578 (66.747)	lr 0.02359
Train [19][940/3239]	Time 0.239 (0.583)	Data Time 0.002 (0.032)	Loss 3.3950 (3.4191)	Entropy 1.70245 (1.70499)	Top-1 acc 43.750 (42.880)	Top-5 acc 67.578 (66.722)	lr 0.02359
Train [19][950/3239]	Time 0.155 (0.581)	Data Time 0.002 (0.032)	Loss 3.4458 (3.4189)	Entropy 1.70238 (1.70496)	Top-1 acc 39.453 (42.887)	Top-5 acc 67.188 (66.732)	lr 0.02359
Train [19][960/3239]	Time 0.221 (0.580)	Data Time 0.002 (0.031)	Loss 3.5730 (3.4197)	Entropy 1.70232 (1.70494)	Top-1 acc 39.844 (42.879)	Top-5 acc 67.969 (66.726)	lr 0.02359
Train [19][970/3239]	Time 0.192 (0.578)	Data Time 0.001 (0.031)	Loss 3.3547 (3.4200)	Entropy 1.70238 (1.70491)	Top-1 acc 41.016 (42.883)	Top-5 acc 66.016 (66.725)	lr 0.02359
Train [19][980/3239]	Time 0.301 (0.577)	Data Time 0.001 (0.031)	Loss 3.4475 (3.4200)	Entropy 1.70215 (1.70488)	Top-1 acc 41.797 (42.875)	Top-5 acc 66.406 (66.735)	lr 0.02359
Train [19][990/3239]	Time 0.229 (0.575)	Data Time 0.002 (0.030)	Loss 3.4208 (3.4202)	Entropy 1.70211 (1.70485)	Top-1 acc 40.234 (42.881)	Top-5 acc 65.625 (66.729)	lr 0.02359
Train [19][1000/3239]	Time 2.407 (0.574)	Data Time 0.001 (0.030)	Loss 3.3517 (3.4203)	Entropy 1.70211 (1.70483)	Top-1 acc 42.578 (42.883)	Top-5 acc 69.141 (66.730)	lr 0.02359
Train [19][1010/3239]	Time 0.147 (0.571)	Data Time 0.001 (0.030)	Loss 3.5040 (3.4206)	Entropy 1.70203 (1.70480)	Top-1 acc 41.406 (42.879)	Top-5 acc 65.234 (66.716)	lr 0.02359
Train [19][1020/3239]	Time 0.236 (0.570)	Data Time 0.002 (0.030)	Loss 3.5452 (3.4207)	Entropy 1.70201 (1.70477)	Top-1 acc 37.109 (42.867)	Top-5 acc 64.453 (66.702)	lr 0.02359
Train [19][1030/3239]	Time 0.220 (0.568)	Data Time 0.001 (0.029)	Loss 3.4781 (3.4213)	Entropy 1.70199 (1.70475)	Top-1 acc 37.500 (42.852)	Top-5 acc 64.062 (66.698)	lr 0.02359
Train [19][1040/3239]	Time 0.175 (0.567)	Data Time 0.001 (0.029)	Loss 3.3683 (3.4216)	Entropy 1.70191 (1.70472)	Top-1 acc 44.141 (42.843)	Top-5 acc 65.234 (66.686)	lr 0.02359
Train [19][1050/3239]	Time 0.164 (0.566)	Data Time 0.001 (0.029)	Loss 3.5334 (3.4212)	Entropy 1.70180 (1.70469)	Top-1 acc 42.188 (42.855)	Top-5 acc 64.453 (66.697)	lr 0.02359
Train [19][1060/3239]	Time 0.222 (0.565)	Data Time 0.001 (0.029)	Loss 3.3038 (3.4206)	Entropy 1.70178 (1.70466)	Top-1 acc 41.406 (42.875)	Top-5 acc 73.438 (66.715)	lr 0.02359
Train [19][1070/3239]	Time 0.220 (0.563)	Data Time 0.001 (0.028)	Loss 3.5592 (3.4200)	Entropy 1.70167 (1.70464)	Top-1 acc 38.672 (42.873)	Top-5 acc 66.406 (66.738)	lr 0.02359
Train [19][1080/3239]	Time 0.208 (0.562)	Data Time 0.001 (0.028)	Loss 3.4867 (3.4198)	Entropy 1.70157 (1.70461)	Top-1 acc 37.109 (42.881)	Top-5 acc 64.453 (66.745)	lr 0.02359
Train [19][1090/3239]	Time 0.277 (0.561)	Data Time 0.001 (0.028)	Loss 3.4984 (3.4196)	Entropy 1.70156 (1.70458)	Top-1 acc 41.406 (42.896)	Top-5 acc 65.625 (66.760)	lr 0.02359
Train [19][1100/3239]	Time 0.229 (0.560)	Data Time 0.001 (0.028)	Loss 3.3813 (3.4193)	Entropy 1.70151 (1.70455)	Top-1 acc 41.797 (42.900)	Top-5 acc 69.531 (66.768)	lr 0.02359
Train [19][1110/3239]	Time 2.439 (0.559)	Data Time 0.001 (0.027)	Loss 3.2880 (3.4193)	Entropy 1.70151 (1.70453)	Top-1 acc 45.703 (42.896)	Top-5 acc 67.578 (66.770)	lr 0.02359
Train [19][1120/3239]	Time 0.381 (0.556)	Data Time 0.002 (0.027)	Loss 3.4669 (3.4193)	Entropy 1.70149 (1.70450)	Top-1 acc 35.547 (42.893)	Top-5 acc 65.625 (66.766)	lr 0.02359
Train [19][1130/3239]	Time 0.207 (0.555)	Data Time 0.001 (0.027)	Loss 3.3636 (3.4192)	Entropy 1.70127 (1.70447)	Top-1 acc 46.875 (42.897)	Top-5 acc 69.531 (66.771)	lr 0.02359
Train [19][1140/3239]	Time 0.247 (0.554)	Data Time 0.001 (0.027)	Loss 3.3911 (3.4190)	Entropy 1.70121 (1.70444)	Top-1 acc 44.531 (42.905)	Top-5 acc 69.922 (66.777)	lr 0.02358
Train [19][1150/3239]	Time 0.151 (0.553)	Data Time 0.001 (0.027)	Loss 3.3353 (3.4189)	Entropy 1.70119 (1.70441)	Top-1 acc 45.312 (42.906)	Top-5 acc 69.531 (66.777)	lr 0.02358
Train [19][1160/3239]	Time 0.190 (0.552)	Data Time 0.001 (0.026)	Loss 3.3219 (3.4192)	Entropy 1.70114 (1.70439)	Top-1 acc 41.406 (42.896)	Top-5 acc 69.141 (66.767)	lr 0.02358
Train [19][1170/3239]	Time 0.221 (0.551)	Data Time 0.001 (0.026)	Loss 3.4391 (3.4194)	Entropy 1.70111 (1.70436)	Top-1 acc 42.188 (42.903)	Top-5 acc 67.188 (66.767)	lr 0.02358
Train [19][1180/3239]	Time 0.151 (0.550)	Data Time 0.001 (0.026)	Loss 3.4141 (3.4198)	Entropy 1.70101 (1.70433)	Top-1 acc 40.625 (42.889)	Top-5 acc 66.406 (66.761)	lr 0.02358
Train [19][1190/3239]	Time 0.156 (0.549)	Data Time 0.001 (0.026)	Loss 3.2496 (3.4199)	Entropy 1.70071 (1.70430)	Top-1 acc 46.484 (42.881)	Top-5 acc 69.141 (66.758)	lr 0.02358
Train [19][1200/3239]	Time 0.211 (0.548)	Data Time 0.001 (0.026)	Loss 3.5610 (3.4201)	Entropy 1.70061 (1.70427)	Top-1 acc 39.453 (42.875)	Top-5 acc 65.234 (66.762)	lr 0.02358
Train [19][1210/3239]	Time 0.221 (0.547)	Data Time 0.001 (0.025)	Loss 3.4448 (3.4201)	Entropy 1.70060 (1.70424)	Top-1 acc 42.578 (42.864)	Top-5 acc 67.188 (66.762)	lr 0.02358
Train [19][1220/3239]	Time 2.226 (0.546)	Data Time 0.001 (0.025)	Loss 3.2513 (3.4202)	Entropy 1.70060 (1.70421)	Top-1 acc 45.312 (42.864)	Top-5 acc 73.828 (66.764)	lr 0.02358
Train [19][1230/3239]	Time 0.209 (0.543)	Data Time 0.001 (0.025)	Loss 3.6421 (3.4201)	Entropy 1.70052 (1.70418)	Top-1 acc 36.328 (42.861)	Top-5 acc 64.453 (66.772)	lr 0.02358
Train [19][1240/3239]	Time 0.187 (0.542)	Data Time 0.001 (0.025)	Loss 3.4548 (3.4204)	Entropy 1.70047 (1.70415)	Top-1 acc 39.062 (42.857)	Top-5 acc 65.625 (66.766)	lr 0.02358
Train [19][1250/3239]	Time 0.206 (0.541)	Data Time 0.002 (0.025)	Loss 3.1518 (3.4197)	Entropy 1.70043 (1.70412)	Top-1 acc 53.125 (42.873)	Top-5 acc 70.312 (66.780)	lr 0.02358
Train [19][1260/3239]	Time 0.219 (0.540)	Data Time 0.001 (0.025)	Loss 3.2692 (3.4197)	Entropy 1.70040 (1.70409)	Top-1 acc 46.094 (42.867)	Top-5 acc 70.312 (66.784)	lr 0.02358
Train [19][1270/3239]	Time 0.279 (0.568)	Data Time 0.003 (0.024)	Loss 3.4702 (3.4197)	Entropy 1.70040 (1.70406)	Top-1 acc 40.625 (42.864)	Top-5 acc 66.797 (66.786)	lr 0.02358
Train [19][1280/3239]	Time 0.242 (0.569)	Data Time 0.002 (0.024)	Loss 3.3672 (3.4202)	Entropy 1.70025 (1.70403)	Top-1 acc 46.875 (42.857)	Top-5 acc 69.922 (66.779)	lr 0.02358
Train [19][1290/3239]	Time 0.216 (0.568)	Data Time 0.003 (0.024)	Loss 3.4719 (3.4197)	Entropy 1.70013 (1.70400)	Top-1 acc 44.531 (42.870)	Top-5 acc 64.453 (66.781)	lr 0.02358
Train [19][1300/3239]	Time 0.228 (0.567)	Data Time 0.001 (0.024)	Loss 3.5270 (3.4198)	Entropy 1.70004 (1.70397)	Top-1 acc 39.453 (42.874)	Top-5 acc 62.891 (66.772)	lr 0.02358
Train [19][1310/3239]	Time 0.200 (0.566)	Data Time 0.001 (0.024)	Loss 3.2746 (3.4195)	Entropy 1.70003 (1.70394)	Top-1 acc 45.703 (42.876)	Top-5 acc 68.359 (66.779)	lr 0.02358
Train [19][1320/3239]	Time 0.167 (0.565)	Data Time 0.001 (0.024)	Loss 3.2540 (3.4196)	Entropy 1.70002 (1.70391)	Top-1 acc 45.312 (42.875)	Top-5 acc 70.312 (66.777)	lr 0.02358
Train [19][1330/3239]	Time 2.344 (0.564)	Data Time 0.001 (0.023)	Loss 3.5154 (3.4198)	Entropy 1.70002 (1.70389)	Top-1 acc 43.359 (42.877)	Top-5 acc 64.844 (66.770)	lr 0.02358
Train [19][1340/3239]	Time 0.180 (0.561)	Data Time 0.001 (0.023)	Loss 3.4915 (3.4198)	Entropy 1.69997 (1.70386)	Top-1 acc 46.484 (42.878)	Top-5 acc 60.938 (66.767)	lr 0.02358
Train [19][1350/3239]	Time 0.252 (0.560)	Data Time 0.001 (0.023)	Loss 3.5569 (3.4202)	Entropy 1.69992 (1.70383)	Top-1 acc 41.797 (42.871)	Top-5 acc 67.969 (66.757)	lr 0.02358
Train [19][1360/3239]	Time 0.220 (0.559)	Data Time 0.001 (0.023)	Loss 3.2591 (3.4202)	Entropy 1.69989 (1.70380)	Top-1 acc 48.047 (42.860)	Top-5 acc 68.750 (66.750)	lr 0.02357
Train [19][1370/3239]	Time 0.203 (0.558)	Data Time 0.001 (0.023)	Loss 3.5268 (3.4203)	Entropy 1.69985 (1.70377)	Top-1 acc 41.406 (42.863)	Top-5 acc 65.625 (66.759)	lr 0.02357
Train [19][1380/3239]	Time 0.194 (0.557)	Data Time 0.001 (0.023)	Loss 3.4295 (3.4202)	Entropy 1.69977 (1.70374)	Top-1 acc 45.312 (42.869)	Top-5 acc 67.188 (66.765)	lr 0.02357
Train [19][1390/3239]	Time 0.241 (0.557)	Data Time 0.001 (0.022)	Loss 3.4049 (3.4206)	Entropy 1.69978 (1.70371)	Top-1 acc 46.484 (42.872)	Top-5 acc 66.406 (66.755)	lr 0.02357
Train [19][1400/3239]	Time 0.205 (0.556)	Data Time 0.001 (0.022)	Loss 3.4319 (3.4208)	Entropy 1.69978 (1.70368)	Top-1 acc 41.406 (42.869)	Top-5 acc 64.453 (66.752)	lr 0.02357
Train [19][1410/3239]	Time 0.160 (0.555)	Data Time 0.001 (0.022)	Loss 3.3588 (3.4211)	Entropy 1.69974 (1.70366)	Top-1 acc 43.750 (42.864)	Top-5 acc 67.578 (66.746)	lr 0.02357
Train [19][1420/3239]	Time 0.251 (0.554)	Data Time 0.001 (0.022)	Loss 3.6222 (3.4212)	Entropy 1.69971 (1.70363)	Top-1 acc 39.062 (42.862)	Top-5 acc 63.672 (66.739)	lr 0.02357
Train [19][1430/3239]	Time 0.226 (0.553)	Data Time 0.001 (0.022)	Loss 3.5394 (3.4213)	Entropy 1.69968 (1.70360)	Top-1 acc 39.453 (42.869)	Top-5 acc 65.234 (66.738)	lr 0.02357
Train [19][1440/3239]	Time 2.435 (0.553)	Data Time 0.001 (0.022)	Loss 3.3747 (3.4209)	Entropy 1.69968 (1.70357)	Top-1 acc 43.359 (42.879)	Top-5 acc 68.750 (66.745)	lr 0.02357
Train [19][1450/3239]	Time 0.245 (0.550)	Data Time 0.001 (0.022)	Loss 3.1215 (3.4210)	Entropy 1.69966 (1.70355)	Top-1 acc 50.000 (42.877)	Top-5 acc 71.875 (66.742)	lr 0.02357
Train [19][1460/3239]	Time 0.316 (0.549)	Data Time 0.001 (0.021)	Loss 3.4108 (3.4213)	Entropy 1.69961 (1.70352)	Top-1 acc 43.750 (42.878)	Top-5 acc 66.016 (66.733)	lr 0.02357
Train [19][1470/3239]	Time 0.202 (0.549)	Data Time 0.001 (0.021)	Loss 3.6571 (3.4215)	Entropy 1.69959 (1.70349)	Top-1 acc 42.188 (42.876)	Top-5 acc 61.328 (66.723)	lr 0.02357
Train [19][1480/3239]	Time 0.243 (0.548)	Data Time 0.002 (0.021)	Loss 3.4708 (3.4213)	Entropy 1.69951 (1.70347)	Top-1 acc 41.797 (42.878)	Top-5 acc 64.062 (66.722)	lr 0.02357
Train [19][1490/3239]	Time 0.218 (0.547)	Data Time 0.001 (0.021)	Loss 3.3675 (3.4216)	Entropy 1.69941 (1.70344)	Top-1 acc 42.969 (42.873)	Top-5 acc 66.016 (66.712)	lr 0.02357
Train [19][1500/3239]	Time 0.194 (0.546)	Data Time 0.001 (0.021)	Loss 3.5568 (3.4211)	Entropy 1.69934 (1.70341)	Top-1 acc 38.672 (42.883)	Top-5 acc 64.062 (66.723)	lr 0.02357
Train [19][1510/3239]	Time 0.208 (0.546)	Data Time 0.001 (0.021)	Loss 3.4947 (3.4208)	Entropy 1.69927 (1.70339)	Top-1 acc 45.312 (42.898)	Top-5 acc 67.188 (66.728)	lr 0.02357
Train [19][1520/3239]	Time 0.211 (0.545)	Data Time 0.001 (0.021)	Loss 3.4859 (3.4209)	Entropy 1.69924 (1.70336)	Top-1 acc 44.922 (42.898)	Top-5 acc 67.969 (66.728)	lr 0.02357
Train [19][1530/3239]	Time 0.315 (0.544)	Data Time 0.001 (0.021)	Loss 3.4034 (3.4210)	Entropy 1.69921 (1.70333)	Top-1 acc 42.969 (42.896)	Top-5 acc 66.406 (66.721)	lr 0.02357
Train [19][1540/3239]	Time 0.276 (0.544)	Data Time 0.001 (0.020)	Loss 3.4144 (3.4209)	Entropy 1.69916 (1.70330)	Top-1 acc 44.531 (42.900)	Top-5 acc 65.625 (66.722)	lr 0.02357
Train [19][1550/3239]	Time 2.344 (0.543)	Data Time 0.002 (0.020)	Loss 3.4950 (3.4203)	Entropy 1.69916 (1.70328)	Top-1 acc 41.797 (42.908)	Top-5 acc 64.844 (66.737)	lr 0.02357
Train [19][1560/3239]	Time 0.207 (0.541)	Data Time 0.002 (0.020)	Loss 3.5765 (3.4205)	Entropy 1.69911 (1.70325)	Top-1 acc 39.453 (42.905)	Top-5 acc 63.672 (66.739)	lr 0.02357
Train [19][1570/3239]	Time 0.165 (0.540)	Data Time 0.001 (0.020)	Loss 3.4965 (3.4204)	Entropy 1.69912 (1.70323)	Top-1 acc 41.406 (42.911)	Top-5 acc 66.016 (66.744)	lr 0.02356
Train [19][1580/3239]	Time 0.140 (0.540)	Data Time 0.001 (0.020)	Loss 3.4939 (3.4207)	Entropy 1.69912 (1.70320)	Top-1 acc 39.453 (42.902)	Top-5 acc 67.578 (66.742)	lr 0.02356
Train [19][1590/3239]	Time 0.213 (0.539)	Data Time 0.001 (0.020)	Loss 3.5245 (3.4208)	Entropy 1.69909 (1.70317)	Top-1 acc 41.797 (42.897)	Top-5 acc 65.234 (66.738)	lr 0.02356
Train [19][1600/3239]	Time 0.295 (0.538)	Data Time 0.001 (0.020)	Loss 3.3443 (3.4205)	Entropy 1.69902 (1.70315)	Top-1 acc 46.094 (42.907)	Top-5 acc 68.750 (66.744)	lr 0.02356
Train [19][1610/3239]	Time 0.209 (0.538)	Data Time 0.001 (0.020)	Loss 3.4932 (3.4203)	Entropy 1.69898 (1.70312)	Top-1 acc 38.281 (42.915)	Top-5 acc 63.281 (66.742)	lr 0.02356
Train [19][1620/3239]	Time 0.203 (0.537)	Data Time 0.001 (0.020)	Loss 3.3003 (3.4201)	Entropy 1.69894 (1.70310)	Top-1 acc 47.266 (42.921)	Top-5 acc 66.016 (66.746)	lr 0.02356
Train [19][1630/3239]	Time 0.279 (0.558)	Data Time 0.002 (0.019)	Loss 3.5023 (3.4203)	Entropy 1.69847 (1.70307)	Top-1 acc 39.453 (42.919)	Top-5 acc 65.234 (66.744)	lr 0.02356
Train [19][1640/3239]	Time 0.222 (0.559)	Data Time 0.006 (0.019)	Loss 3.3044 (3.4202)	Entropy 1.69846 (1.70304)	Top-1 acc 44.922 (42.921)	Top-5 acc 69.922 (66.749)	lr 0.02356
Train [19][1650/3239]	Time 0.238 (0.558)	Data Time 0.001 (0.019)	Loss 3.3118 (3.4200)	Entropy 1.69837 (1.70301)	Top-1 acc 45.312 (42.924)	Top-5 acc 67.578 (66.751)	lr 0.02356
Train [19][1660/3239]	Time 2.279 (0.557)	Data Time 0.002 (0.019)	Loss 3.4223 (3.4198)	Entropy 1.69837 (1.70299)	Top-1 acc 41.797 (42.932)	Top-5 acc 67.188 (66.754)	lr 0.02356
Train [19][1670/3239]	Time 0.220 (0.556)	Data Time 0.001 (0.019)	Loss 3.4644 (3.4195)	Entropy 1.69825 (1.70296)	Top-1 acc 42.969 (42.936)	Top-5 acc 65.625 (66.762)	lr 0.02356
Train [19][1680/3239]	Time 0.205 (0.555)	Data Time 0.001 (0.019)	Loss 3.3449 (3.4194)	Entropy 1.69822 (1.70293)	Top-1 acc 41.797 (42.934)	Top-5 acc 67.188 (66.764)	lr 0.02356
Train [19][1690/3239]	Time 0.210 (0.554)	Data Time 0.001 (0.019)	Loss 3.6414 (3.4194)	Entropy 1.69804 (1.70290)	Top-1 acc 38.281 (42.937)	Top-5 acc 60.938 (66.763)	lr 0.02356
Train [19][1700/3239]	Time 0.178 (0.553)	Data Time 0.001 (0.019)	Loss 3.3968 (3.4194)	Entropy 1.69796 (1.70287)	Top-1 acc 42.969 (42.924)	Top-5 acc 66.406 (66.760)	lr 0.02356
Train [19][1710/3239]	Time 0.211 (0.553)	Data Time 0.001 (0.019)	Loss 3.4125 (3.4195)	Entropy 1.69793 (1.70284)	Top-1 acc 44.141 (42.926)	Top-5 acc 67.188 (66.757)	lr 0.02356
Train [19][1720/3239]	Time 0.197 (0.552)	Data Time 0.001 (0.019)	Loss 3.4128 (3.4193)	Entropy 1.69783 (1.70281)	Top-1 acc 41.406 (42.930)	Top-5 acc 69.922 (66.766)	lr 0.02356
Train [19][1730/3239]	Time 0.324 (0.551)	Data Time 0.001 (0.018)	Loss 3.4213 (3.4192)	Entropy 1.69786 (1.70279)	Top-1 acc 41.406 (42.930)	Top-5 acc 64.844 (66.762)	lr 0.02356
Train [19][1740/3239]	Time 0.191 (0.551)	Data Time 0.001 (0.018)	Loss 3.4435 (3.4189)	Entropy 1.69781 (1.70276)	Top-1 acc 42.188 (42.937)	Top-5 acc 66.406 (66.771)	lr 0.02356
Train [19][1750/3239]	Time 0.238 (0.550)	Data Time 0.001 (0.018)	Loss 3.4028 (3.4190)	Entropy 1.69761 (1.70273)	Top-1 acc 45.312 (42.933)	Top-5 acc 66.016 (66.773)	lr 0.02356
Train [19][1760/3239]	Time 0.237 (0.549)	Data Time 0.001 (0.018)	Loss 3.4970 (3.4184)	Entropy 1.69756 (1.70270)	Top-1 acc 39.453 (42.939)	Top-5 acc 66.016 (66.786)	lr 0.02356
Train [19][1770/3239]	Time 2.296 (0.549)	Data Time 0.001 (0.018)	Loss 3.4313 (3.4185)	Entropy 1.69756 (1.70267)	Top-1 acc 43.750 (42.935)	Top-5 acc 67.188 (66.794)	lr 0.02356
Train [19][1780/3239]	Time 0.204 (0.547)	Data Time 0.001 (0.018)	Loss 3.4920 (3.4185)	Entropy 1.69754 (1.70264)	Top-1 acc 40.234 (42.937)	Top-5 acc 64.453 (66.792)	lr 0.02355
Train [19][1790/3239]	Time 0.201 (0.546)	Data Time 0.001 (0.018)	Loss 3.3615 (3.4181)	Entropy 1.69752 (1.70261)	Top-1 acc 45.312 (42.946)	Top-5 acc 65.234 (66.801)	lr 0.02355
Train [19][1800/3239]	Time 0.216 (0.546)	Data Time 0.001 (0.018)	Loss 3.5077 (3.4182)	Entropy 1.69750 (1.70259)	Top-1 acc 38.281 (42.938)	Top-5 acc 62.500 (66.800)	lr 0.02355
Train [19][1810/3239]	Time 0.212 (0.545)	Data Time 0.001 (0.018)	Loss 3.2932 (3.4178)	Entropy 1.69747 (1.70256)	Top-1 acc 48.828 (42.953)	Top-5 acc 67.188 (66.809)	lr 0.02355
Train [19][1820/3239]	Time 0.188 (0.544)	Data Time 0.001 (0.018)	Loss 3.3852 (3.4178)	Entropy 1.69744 (1.70253)	Top-1 acc 42.188 (42.952)	Top-5 acc 65.625 (66.809)	lr 0.02355
Train [19][1830/3239]	Time 0.231 (0.544)	Data Time 0.001 (0.018)	Loss 3.5690 (3.4179)	Entropy 1.69736 (1.70250)	Top-1 acc 38.281 (42.949)	Top-5 acc 63.281 (66.811)	lr 0.02355
Train [19][1840/3239]	Time 0.202 (0.543)	Data Time 0.001 (0.017)	Loss 3.5916 (3.4181)	Entropy 1.69714 (1.70247)	Top-1 acc 39.062 (42.939)	Top-5 acc 64.844 (66.804)	lr 0.02355
Train [19][1850/3239]	Time 0.197 (0.542)	Data Time 0.001 (0.017)	Loss 3.5432 (3.4183)	Entropy 1.69712 (1.70244)	Top-1 acc 38.672 (42.934)	Top-5 acc 60.156 (66.790)	lr 0.02355
Train [19][1860/3239]	Time 0.282 (0.542)	Data Time 0.002 (0.017)	Loss 3.5375 (3.4184)	Entropy 1.69711 (1.70241)	Top-1 acc 42.969 (42.935)	Top-5 acc 63.672 (66.790)	lr 0.02355
Train [19][1870/3239]	Time 0.316 (0.541)	Data Time 0.001 (0.017)	Loss 3.4441 (3.4181)	Entropy 1.69704 (1.70239)	Top-1 acc 46.484 (42.943)	Top-5 acc 67.188 (66.793)	lr 0.02355
Train [19][1880/3239]	Time 2.301 (0.541)	Data Time 0.002 (0.017)	Loss 3.3923 (3.4180)	Entropy 1.69704 (1.70236)	Top-1 acc 42.969 (42.946)	Top-5 acc 66.406 (66.801)	lr 0.02355
Train [19][1890/3239]	Time 0.189 (0.539)	Data Time 0.001 (0.017)	Loss 3.5027 (3.4185)	Entropy 1.69704 (1.70233)	Top-1 acc 39.453 (42.938)	Top-5 acc 66.406 (66.791)	lr 0.02355
Train [19][1900/3239]	Time 0.212 (0.539)	Data Time 0.001 (0.017)	Loss 3.3174 (3.4184)	Entropy 1.69697 (1.70230)	Top-1 acc 45.703 (42.946)	Top-5 acc 68.359 (66.793)	lr 0.02355
Train [19][1910/3239]	Time 0.196 (0.538)	Data Time 0.001 (0.017)	Loss 3.3615 (3.4182)	Entropy 1.69693 (1.70227)	Top-1 acc 43.750 (42.951)	Top-5 acc 68.750 (66.799)	lr 0.02355
Train [19][1920/3239]	Time 0.210 (0.538)	Data Time 0.001 (0.017)	Loss 3.3885 (3.4182)	Entropy 1.69692 (1.70225)	Top-1 acc 42.969 (42.950)	Top-5 acc 66.016 (66.800)	lr 0.02355
Train [19][1930/3239]	Time 0.253 (0.537)	Data Time 0.001 (0.017)	Loss 3.6026 (3.4185)	Entropy 1.69690 (1.70222)	Top-1 acc 45.703 (42.948)	Top-5 acc 64.062 (66.794)	lr 0.02355
Train [19][1940/3239]	Time 0.212 (0.537)	Data Time 0.001 (0.017)	Loss 3.4829 (3.4187)	Entropy 1.69675 (1.70219)	Top-1 acc 41.016 (42.944)	Top-5 acc 64.453 (66.789)	lr 0.02355
Train [19][1950/3239]	Time 0.195 (0.536)	Data Time 0.001 (0.017)	Loss 3.4559 (3.4188)	Entropy 1.69669 (1.70216)	Top-1 acc 42.969 (42.944)	Top-5 acc 65.625 (66.790)	lr 0.02355
Train [19][1960/3239]	Time 0.214 (0.536)	Data Time 0.001 (0.017)	Loss 3.2643 (3.4188)	Entropy 1.69669 (1.70213)	Top-1 acc 42.969 (42.938)	Top-5 acc 69.141 (66.789)	lr 0.02355
Train [19][1970/3239]	Time 0.210 (0.535)	Data Time 0.001 (0.016)	Loss 3.5978 (3.4192)	Entropy 1.69664 (1.70211)	Top-1 acc 44.141 (42.931)	Top-5 acc 62.500 (66.782)	lr 0.02355
Train [19][1980/3239]	Time 0.289 (0.535)	Data Time 0.003 (0.016)	Loss 3.3953 (3.4189)	Entropy 1.69656 (1.70208)	Top-1 acc 44.531 (42.934)	Top-5 acc 66.016 (66.788)	lr 0.02355
Train [19][1990/3239]	Time 44.037 (0.555)	Data Time 0.002 (0.016)	Loss 3.1698 (3.4189)	Entropy 1.69656 (1.70205)	Top-1 acc 48.047 (42.930)	Top-5 acc 72.656 (66.785)	lr 0.02354
Train [19][2000/3239]	Time 0.295 (0.554)	Data Time 0.002 (0.016)	Loss 3.4251 (3.4190)	Entropy 1.69654 (1.70202)	Top-1 acc 42.578 (42.926)	Top-5 acc 68.359 (66.785)	lr 0.02354
Train [19][2010/3239]	Time 0.194 (0.553)	Data Time 0.002 (0.016)	Loss 3.4729 (3.4193)	Entropy 1.69634 (1.70200)	Top-1 acc 40.234 (42.916)	Top-5 acc 62.891 (66.778)	lr 0.02354
Train [19][2020/3239]	Time 0.213 (0.553)	Data Time 0.002 (0.016)	Loss 3.6165 (3.4191)	Entropy 1.69631 (1.70197)	Top-1 acc 42.188 (42.921)	Top-5 acc 64.062 (66.784)	lr 0.02354
Train [19][2030/3239]	Time 0.230 (0.552)	Data Time 0.001 (0.016)	Loss 3.4069 (3.4191)	Entropy 1.69629 (1.70194)	Top-1 acc 46.875 (42.925)	Top-5 acc 64.062 (66.786)	lr 0.02354
Train [19][2040/3239]	Time 0.209 (0.551)	Data Time 0.001 (0.016)	Loss 3.4508 (3.4191)	Entropy 1.69626 (1.70191)	Top-1 acc 41.797 (42.920)	Top-5 acc 67.188 (66.784)	lr 0.02354
Train [19][2050/3239]	Time 0.137 (0.551)	Data Time 0.001 (0.016)	Loss 3.4283 (3.4191)	Entropy 1.69615 (1.70188)	Top-1 acc 40.625 (42.912)	Top-5 acc 70.312 (66.784)	lr 0.02354
Train [19][2060/3239]	Time 0.210 (0.550)	Data Time 0.001 (0.016)	Loss 3.4553 (3.4192)	Entropy 1.69611 (1.70186)	Top-1 acc 42.969 (42.915)	Top-5 acc 64.844 (66.778)	lr 0.02354
Train [19][2070/3239]	Time 0.348 (0.550)	Data Time 0.001 (0.016)	Loss 3.3240 (3.4194)	Entropy 1.69604 (1.70183)	Top-1 acc 44.922 (42.913)	Top-5 acc 71.094 (66.775)	lr 0.02354
Train [19][2080/3239]	Time 0.289 (0.549)	Data Time 0.001 (0.016)	Loss 3.2676 (3.4193)	Entropy 1.69593 (1.70180)	Top-1 acc 46.484 (42.918)	Top-5 acc 69.531 (66.776)	lr 0.02354
Train [19][2090/3239]	Time 0.190 (0.549)	Data Time 0.001 (0.016)	Loss 3.3541 (3.4194)	Entropy 1.69582 (1.70177)	Top-1 acc 46.875 (42.911)	Top-5 acc 66.016 (66.772)	lr 0.02354
Train [19][2100/3239]	Time 2.282 (0.548)	Data Time 0.001 (0.016)	Loss 3.3843 (3.4196)	Entropy 1.69582 (1.70174)	Top-1 acc 44.922 (42.909)	Top-5 acc 66.016 (66.767)	lr 0.02354
Train [19][2110/3239]	Time 0.239 (0.547)	Data Time 0.001 (0.015)	Loss 3.4444 (3.4198)	Entropy 1.69574 (1.70172)	Top-1 acc 44.141 (42.904)	Top-5 acc 66.406 (66.758)	lr 0.02354
Train [19][2120/3239]	Time 0.214 (0.546)	Data Time 0.001 (0.015)	Loss 3.3312 (3.4197)	Entropy 1.69569 (1.70169)	Top-1 acc 42.578 (42.901)	Top-5 acc 69.922 (66.759)	lr 0.02354
Train [19][2130/3239]	Time 0.265 (0.546)	Data Time 0.001 (0.015)	Loss 3.4040 (3.4199)	Entropy 1.69547 (1.70166)	Top-1 acc 40.625 (42.899)	Top-5 acc 69.141 (66.757)	lr 0.02354
Train [19][2140/3239]	Time 0.222 (0.545)	Data Time 0.002 (0.015)	Loss 3.3542 (3.4200)	Entropy 1.69545 (1.70163)	Top-1 acc 44.922 (42.895)	Top-5 acc 67.578 (66.755)	lr 0.02354
Train [19][2150/3239]	Time 0.218 (0.545)	Data Time 0.002 (0.015)	Loss 3.6259 (3.4201)	Entropy 1.69543 (1.70160)	Top-1 acc 39.844 (42.899)	Top-5 acc 64.844 (66.754)	lr 0.02354
Train [19][2160/3239]	Time 0.217 (0.544)	Data Time 0.001 (0.015)	Loss 3.3945 (3.4196)	Entropy 1.69540 (1.70157)	Top-1 acc 43.359 (42.907)	Top-5 acc 67.188 (66.762)	lr 0.02354
Train [19][2170/3239]	Time 0.188 (0.544)	Data Time 0.002 (0.015)	Loss 3.6407 (3.4198)	Entropy 1.69526 (1.70154)	Top-1 acc 38.672 (42.910)	Top-5 acc 62.500 (66.760)	lr 0.02354
Train [19][2180/3239]	Time 0.196 (0.543)	Data Time 0.001 (0.015)	Loss 3.2748 (3.4197)	Entropy 1.69527 (1.70151)	Top-1 acc 45.312 (42.912)	Top-5 acc 68.750 (66.759)	lr 0.02354
Train [19][2190/3239]	Time 0.219 (0.543)	Data Time 0.001 (0.015)	Loss 3.2825 (3.4196)	Entropy 1.69526 (1.70149)	Top-1 acc 45.312 (42.907)	Top-5 acc 72.656 (66.761)	lr 0.02354
Train [19][2200/3239]	Time 0.320 (0.542)	Data Time 0.001 (0.015)	Loss 3.4553 (3.4196)	Entropy 1.69522 (1.70146)	Top-1 acc 39.844 (42.906)	Top-5 acc 66.016 (66.766)	lr 0.02353
Train [19][2210/3239]	Time 2.401 (0.542)	Data Time 0.002 (0.015)	Loss 3.4711 (3.4195)	Entropy 1.69522 (1.70143)	Top-1 acc 41.797 (42.906)	Top-5 acc 63.672 (66.770)	lr 0.02353
Train [19][2220/3239]	Time 0.229 (0.540)	Data Time 0.001 (0.015)	Loss 3.3216 (3.4192)	Entropy 1.69509 (1.70140)	Top-1 acc 46.875 (42.910)	Top-5 acc 69.922 (66.776)	lr 0.02353
Train [19][2230/3239]	Time 0.222 (0.540)	Data Time 0.001 (0.015)	Loss 3.3315 (3.4193)	Entropy 1.69506 (1.70137)	Top-1 acc 44.141 (42.909)	Top-5 acc 68.359 (66.776)	lr 0.02353
Train [19][2240/3239]	Time 0.223 (0.539)	Data Time 0.001 (0.015)	Loss 3.3007 (3.4196)	Entropy 1.69502 (1.70134)	Top-1 acc 52.734 (42.905)	Top-5 acc 69.531 (66.772)	lr 0.02353
Train [19][2250/3239]	Time 0.177 (0.539)	Data Time 0.002 (0.015)	Loss 3.4571 (3.4197)	Entropy 1.69493 (1.70132)	Top-1 acc 43.359 (42.895)	Top-5 acc 64.844 (66.771)	lr 0.02353
Train [19][2260/3239]	Time 0.268 (0.538)	Data Time 0.001 (0.015)	Loss 3.6124 (3.4200)	Entropy 1.69494 (1.70129)	Top-1 acc 36.328 (42.887)	Top-5 acc 61.719 (66.762)	lr 0.02353
Train [19][2270/3239]	Time 0.240 (0.538)	Data Time 0.001 (0.015)	Loss 3.4922 (3.4203)	Entropy 1.69479 (1.70126)	Top-1 acc 41.016 (42.882)	Top-5 acc 65.625 (66.759)	lr 0.02353
Train [19][2280/3239]	Time 0.221 (0.538)	Data Time 0.001 (0.014)	Loss 3.4728 (3.4206)	Entropy 1.69466 (1.70123)	Top-1 acc 41.406 (42.870)	Top-5 acc 67.578 (66.750)	lr 0.02353
Train [19][2290/3239]	Time 0.194 (0.537)	Data Time 0.001 (0.014)	Loss 3.2287 (3.4207)	Entropy 1.69459 (1.70120)	Top-1 acc 46.484 (42.872)	Top-5 acc 68.750 (66.746)	lr 0.02353
Train [19][2300/3239]	Time 0.198 (0.537)	Data Time 0.002 (0.014)	Loss 3.3583 (3.4205)	Entropy 1.69453 (1.70117)	Top-1 acc 41.797 (42.872)	Top-5 acc 66.016 (66.748)	lr 0.02353
Train [19][2310/3239]	Time 0.229 (0.536)	Data Time 0.001 (0.014)	Loss 3.4997 (3.4207)	Entropy 1.69438 (1.70114)	Top-1 acc 39.844 (42.865)	Top-5 acc 65.625 (66.741)	lr 0.02353
Train [19][2320/3239]	Time 2.350 (0.536)	Data Time 0.001 (0.014)	Loss 3.3269 (3.4207)	Entropy 1.69438 (1.70111)	Top-1 acc 42.969 (42.867)	Top-5 acc 67.969 (66.738)	lr 0.02353
Train [19][2330/3239]	Time 0.345 (0.534)	Data Time 0.001 (0.014)	Loss 3.3298 (3.4204)	Entropy 1.69440 (1.70109)	Top-1 acc 43.750 (42.872)	Top-5 acc 66.797 (66.742)	lr 0.02353
Train [19][2340/3239]	Time 0.168 (0.534)	Data Time 0.001 (0.014)	Loss 3.3941 (3.4203)	Entropy 1.69432 (1.70106)	Top-1 acc 43.359 (42.878)	Top-5 acc 62.891 (66.740)	lr 0.02353
Train [19][2350/3239]	Time 0.199 (0.534)	Data Time 0.001 (0.014)	Loss 3.2762 (3.4201)	Entropy 1.69414 (1.70103)	Top-1 acc 48.047 (42.885)	Top-5 acc 67.188 (66.746)	lr 0.02353
Train [19][2360/3239]	Time 0.433 (0.549)	Data Time 0.003 (0.014)	Loss 3.5251 (3.4201)	Entropy 1.69404 (1.70100)	Top-1 acc 41.797 (42.887)	Top-5 acc 66.406 (66.748)	lr 0.02353
Train [19][2370/3239]	Time 0.192 (0.549)	Data Time 0.001 (0.014)	Loss 3.4279 (3.4204)	Entropy 1.69403 (1.70097)	Top-1 acc 43.750 (42.882)	Top-5 acc 67.188 (66.743)	lr 0.02353
Train [19][2380/3239]	Time 0.186 (0.548)	Data Time 0.002 (0.014)	Loss 3.3821 (3.4202)	Entropy 1.69391 (1.70094)	Top-1 acc 41.797 (42.880)	Top-5 acc 65.234 (66.743)	lr 0.02353
Train [19][2390/3239]	Time 0.199 (0.548)	Data Time 0.001 (0.014)	Loss 3.5886 (3.4200)	Entropy 1.69388 (1.70091)	Top-1 acc 41.406 (42.887)	Top-5 acc 63.281 (66.749)	lr 0.02353
Train [19][2400/3239]	Time 0.245 (0.547)	Data Time 0.001 (0.014)	Loss 3.5291 (3.4199)	Entropy 1.69383 (1.70088)	Top-1 acc 37.891 (42.891)	Top-5 acc 64.844 (66.750)	lr 0.02353
Train [19][2410/3239]	Time 0.180 (0.547)	Data Time 0.001 (0.014)	Loss 3.4592 (3.4200)	Entropy 1.69375 (1.70085)	Top-1 acc 40.625 (42.884)	Top-5 acc 68.359 (66.751)	lr 0.02352
Train [19][2420/3239]	Time 0.190 (0.547)	Data Time 0.001 (0.014)	Loss 3.3924 (3.4200)	Entropy 1.69368 (1.70082)	Top-1 acc 43.359 (42.882)	Top-5 acc 69.531 (66.746)	lr 0.02352
Train [19][2430/3239]	Time 2.405 (0.546)	Data Time 0.001 (0.014)	Loss 3.2962 (3.4198)	Entropy 1.69368 (1.70079)	Top-1 acc 44.531 (42.886)	Top-5 acc 70.312 (66.749)	lr 0.02352
Train [19][2440/3239]	Time 0.226 (0.545)	Data Time 0.001 (0.014)	Loss 3.4280 (3.4199)	Entropy 1.69365 (1.70076)	Top-1 acc 44.531 (42.886)	Top-5 acc 66.406 (66.749)	lr 0.02352
Train [19][2450/3239]	Time 0.162 (0.544)	Data Time 0.001 (0.014)	Loss 3.3545 (3.4199)	Entropy 1.69357 (1.70073)	Top-1 acc 40.234 (42.884)	Top-5 acc 67.578 (66.750)	lr 0.02352
Train [19][2460/3239]	Time 0.213 (0.544)	Data Time 0.001 (0.014)	Loss 3.7134 (3.4201)	Entropy 1.69351 (1.70071)	Top-1 acc 33.984 (42.876)	Top-5 acc 61.328 (66.746)	lr 0.02352
Train [19][2470/3239]	Time 0.330 (0.543)	Data Time 0.001 (0.014)	Loss 3.3385 (3.4201)	Entropy 1.69351 (1.70068)	Top-1 acc 44.141 (42.874)	Top-5 acc 68.359 (66.745)	lr 0.02352
Train [19][2480/3239]	Time 0.212 (0.543)	Data Time 0.001 (0.014)	Loss 3.3178 (3.4202)	Entropy 1.69343 (1.70065)	Top-1 acc 45.312 (42.875)	Top-5 acc 68.359 (66.741)	lr 0.02352
Train [19][2490/3239]	Time 0.197 (0.543)	Data Time 0.001 (0.013)	Loss 3.4785 (3.4204)	Entropy 1.69339 (1.70062)	Top-1 acc 38.672 (42.868)	Top-5 acc 62.891 (66.736)	lr 0.02352
Train [19][2500/3239]	Time 0.207 (0.542)	Data Time 0.001 (0.013)	Loss 3.5088 (3.4204)	Entropy 1.69328 (1.70059)	Top-1 acc 40.234 (42.865)	Top-5 acc 63.672 (66.737)	lr 0.02352
Train [19][2510/3239]	Time 0.207 (0.542)	Data Time 0.001 (0.013)	Loss 3.2674 (3.4205)	Entropy 1.69322 (1.70056)	Top-1 acc 48.828 (42.865)	Top-5 acc 69.141 (66.736)	lr 0.02352
Train [19][2520/3239]	Time 0.160 (0.541)	Data Time 0.001 (0.013)	Loss 3.2400 (3.4202)	Entropy 1.69326 (1.70053)	Top-1 acc 47.656 (42.875)	Top-5 acc 70.703 (66.740)	lr 0.02352
Train [19][2530/3239]	Time 0.249 (0.541)	Data Time 0.001 (0.013)	Loss 3.4693 (3.4203)	Entropy 1.69316 (1.70050)	Top-1 acc 41.406 (42.874)	Top-5 acc 64.844 (66.739)	lr 0.02352
Train [19][2540/3239]	Time 2.232 (0.540)	Data Time 0.001 (0.013)	Loss 3.2098 (3.4202)	Entropy 1.69316 (1.70047)	Top-1 acc 49.219 (42.877)	Top-5 acc 69.531 (66.740)	lr 0.02352
Train [19][2550/3239]	Time 0.210 (0.539)	Data Time 0.001 (0.013)	Loss 3.4260 (3.4201)	Entropy 1.69308 (1.70044)	Top-1 acc 45.312 (42.880)	Top-5 acc 71.094 (66.746)	lr 0.02352
Train [19][2560/3239]	Time 0.234 (0.539)	Data Time 0.002 (0.013)	Loss 3.3116 (3.4199)	Entropy 1.69300 (1.70041)	Top-1 acc 40.625 (42.881)	Top-5 acc 70.312 (66.751)	lr 0.02352
Train [19][2570/3239]	Time 0.239 (0.538)	Data Time 0.001 (0.013)	Loss 3.3665 (3.4199)	Entropy 1.69295 (1.70039)	Top-1 acc 42.969 (42.883)	Top-5 acc 66.016 (66.755)	lr 0.02352
Train [19][2580/3239]	Time 0.194 (0.538)	Data Time 0.001 (0.013)	Loss 3.5853 (3.4199)	Entropy 1.69282 (1.70036)	Top-1 acc 39.844 (42.881)	Top-5 acc 64.062 (66.756)	lr 0.02352
Train [19][2590/3239]	Time 0.187 (0.537)	Data Time 0.001 (0.013)	Loss 3.5813 (3.4202)	Entropy 1.69270 (1.70033)	Top-1 acc 36.719 (42.874)	Top-5 acc 62.500 (66.745)	lr 0.02352
Train [19][2600/3239]	Time 0.305 (0.537)	Data Time 0.001 (0.013)	Loss 3.3692 (3.4203)	Entropy 1.69267 (1.70030)	Top-1 acc 44.141 (42.877)	Top-5 acc 67.578 (66.745)	lr 0.02352
Train [19][2610/3239]	Time 0.210 (0.537)	Data Time 0.001 (0.013)	Loss 3.3412 (3.4205)	Entropy 1.69260 (1.70027)	Top-1 acc 44.531 (42.874)	Top-5 acc 66.797 (66.740)	lr 0.02352
Train [19][2620/3239]	Time 0.209 (0.536)	Data Time 0.001 (0.013)	Loss 3.4726 (3.4207)	Entropy 1.69254 (1.70024)	Top-1 acc 40.234 (42.876)	Top-5 acc 64.062 (66.734)	lr 0.02351
Train [19][2630/3239]	Time 0.226 (0.536)	Data Time 0.001 (0.013)	Loss 3.3708 (3.4208)	Entropy 1.69256 (1.70021)	Top-1 acc 41.797 (42.875)	Top-5 acc 68.359 (66.736)	lr 0.02351
Train [19][2640/3239]	Time 0.221 (0.535)	Data Time 0.001 (0.013)	Loss 3.2707 (3.4206)	Entropy 1.69256 (1.70018)	Top-1 acc 46.094 (42.875)	Top-5 acc 67.578 (66.738)	lr 0.02351
Train [19][2650/3239]	Time 0.268 (0.535)	Data Time 0.002 (0.013)	Loss 3.5052 (3.4204)	Entropy 1.69229 (1.70015)	Top-1 acc 39.062 (42.880)	Top-5 acc 67.969 (66.743)	lr 0.02351
Train [19][2660/3239]	Time 0.210 (0.535)	Data Time 0.001 (0.013)	Loss 3.5621 (3.4203)	Entropy 1.69219 (1.70012)	Top-1 acc 40.234 (42.881)	Top-5 acc 63.672 (66.744)	lr 0.02351
Train [19][2670/3239]	Time 0.330 (0.534)	Data Time 0.001 (0.013)	Loss 3.3694 (3.4204)	Entropy 1.69216 (1.70009)	Top-1 acc 42.578 (42.878)	Top-5 acc 68.750 (66.742)	lr 0.02351
Train [19][2680/3239]	Time 0.217 (0.534)	Data Time 0.001 (0.013)	Loss 3.1840 (3.4204)	Entropy 1.69216 (1.70006)	Top-1 acc 47.266 (42.875)	Top-5 acc 72.266 (66.743)	lr 0.02351
Train [19][2690/3239]	Time 0.229 (0.534)	Data Time 0.001 (0.013)	Loss 3.4335 (3.4204)	Entropy 1.69206 (1.70003)	Top-1 acc 41.406 (42.875)	Top-5 acc 67.578 (66.744)	lr 0.02351
Train [19][2700/3239]	Time 0.223 (0.533)	Data Time 0.001 (0.013)	Loss 3.4419 (3.4205)	Entropy 1.69202 (1.70000)	Top-1 acc 41.406 (42.871)	Top-5 acc 70.312 (66.739)	lr 0.02351
Train [19][2710/3239]	Time 0.311 (0.546)	Data Time 0.004 (0.013)	Loss 3.4846 (3.4204)	Entropy 1.69195 (1.69997)	Top-1 acc 40.234 (42.876)	Top-5 acc 66.016 (66.743)	lr 0.02351
Train [19][2720/3239]	Time 0.229 (0.547)	Data Time 0.002 (0.013)	Loss 3.3857 (3.4206)	Entropy 1.69183 (1.69995)	Top-1 acc 43.359 (42.872)	Top-5 acc 67.578 (66.740)	lr 0.02351
Train [19][2730/3239]	Time 0.208 (0.546)	Data Time 0.001 (0.012)	Loss 3.3615 (3.4206)	Entropy 1.69177 (1.69992)	Top-1 acc 39.844 (42.869)	Top-5 acc 68.750 (66.743)	lr 0.02351
Train [19][2740/3239]	Time 0.323 (0.546)	Data Time 0.001 (0.012)	Loss 3.3746 (3.4206)	Entropy 1.69168 (1.69989)	Top-1 acc 37.891 (42.868)	Top-5 acc 67.578 (66.743)	lr 0.02351
Train [19][2750/3239]	Time 0.187 (0.545)	Data Time 0.001 (0.012)	Loss 3.3784 (3.4207)	Entropy 1.69144 (1.69986)	Top-1 acc 41.016 (42.863)	Top-5 acc 67.188 (66.742)	lr 0.02351
Train [19][2760/3239]	Time 0.207 (0.545)	Data Time 0.001 (0.012)	Loss 3.4396 (3.4208)	Entropy 1.69139 (1.69983)	Top-1 acc 42.188 (42.865)	Top-5 acc 68.359 (66.742)	lr 0.02351
Train [19][2770/3239]	Time 0.293 (0.544)	Data Time 0.002 (0.012)	Loss 3.3992 (3.4209)	Entropy 1.69126 (1.69979)	Top-1 acc 41.406 (42.861)	Top-5 acc 67.969 (66.739)	lr 0.02351
Train [19][2780/3239]	Time 0.247 (0.544)	Data Time 0.001 (0.012)	Loss 3.2811 (3.4208)	Entropy 1.69115 (1.69976)	Top-1 acc 47.266 (42.861)	Top-5 acc 68.750 (66.740)	lr 0.02351
Train [19][2790/3239]	Time 0.244 (0.544)	Data Time 0.001 (0.012)	Loss 3.6020 (3.4208)	Entropy 1.69105 (1.69973)	Top-1 acc 39.844 (42.860)	Top-5 acc 62.500 (66.738)	lr 0.02351
Train [19][2800/3239]	Time 0.250 (0.543)	Data Time 0.001 (0.012)	Loss 3.5353 (3.4208)	Entropy 1.69100 (1.69970)	Top-1 acc 40.625 (42.858)	Top-5 acc 64.844 (66.739)	lr 0.02351
Train [19][2810/3239]	Time 0.249 (0.543)	Data Time 0.003 (0.012)	Loss 3.1531 (3.4207)	Entropy 1.69089 (1.69967)	Top-1 acc 46.875 (42.862)	Top-5 acc 72.266 (66.742)	lr 0.02351
Train [19][2820/3239]	Time 0.162 (0.543)	Data Time 0.001 (0.012)	Loss 3.2593 (3.4206)	Entropy 1.69084 (1.69964)	Top-1 acc 44.922 (42.864)	Top-5 acc 69.141 (66.743)	lr 0.02351
Train [19][2830/3239]	Time 0.240 (0.542)	Data Time 0.001 (0.012)	Loss 3.4977 (3.4207)	Entropy 1.69079 (1.69961)	Top-1 acc 40.234 (42.861)	Top-5 acc 64.844 (66.742)	lr 0.02350
Train [19][2840/3239]	Time 0.250 (0.542)	Data Time 0.002 (0.012)	Loss 3.4696 (3.4203)	Entropy 1.69071 (1.69958)	Top-1 acc 40.234 (42.871)	Top-5 acc 63.281 (66.753)	lr 0.02350
Train [19][2850/3239]	Time 0.241 (0.542)	Data Time 0.001 (0.012)	Loss 3.4135 (3.4205)	Entropy 1.69065 (1.69955)	Top-1 acc 44.141 (42.872)	Top-5 acc 64.062 (66.749)	lr 0.02350
Train [19][2860/3239]	Time 0.223 (0.541)	Data Time 0.001 (0.012)	Loss 3.5228 (3.4205)	Entropy 1.69065 (1.69952)	Top-1 acc 42.188 (42.871)	Top-5 acc 66.406 (66.754)	lr 0.02350
Train [19][2870/3239]	Time 0.233 (0.541)	Data Time 0.001 (0.012)	Loss 3.4864 (3.4204)	Entropy 1.69062 (1.69948)	Top-1 acc 41.016 (42.871)	Top-5 acc 61.719 (66.753)	lr 0.02350
Train [19][2880/3239]	Time 0.282 (0.540)	Data Time 0.001 (0.012)	Loss 3.3229 (3.4204)	Entropy 1.69050 (1.69945)	Top-1 acc 40.234 (42.869)	Top-5 acc 69.141 (66.749)	lr 0.02350
Train [19][2890/3239]	Time 0.205 (0.540)	Data Time 0.001 (0.012)	Loss 3.6187 (3.4202)	Entropy 1.69039 (1.69942)	Top-1 acc 40.234 (42.869)	Top-5 acc 62.891 (66.754)	lr 0.02350
Train [19][2900/3239]	Time 0.202 (0.540)	Data Time 0.001 (0.012)	Loss 3.3596 (3.4201)	Entropy 1.69037 (1.69939)	Top-1 acc 41.406 (42.875)	Top-5 acc 69.141 (66.757)	lr 0.02350
Train [19][2910/3239]	Time 0.238 (0.539)	Data Time 0.001 (0.012)	Loss 3.4994 (3.4205)	Entropy 1.69033 (1.69936)	Top-1 acc 41.406 (42.864)	Top-5 acc 67.578 (66.753)	lr 0.02350
Train [19][2920/3239]	Time 0.228 (0.539)	Data Time 0.001 (0.012)	Loss 3.4624 (3.4205)	Entropy 1.69029 (1.69933)	Top-1 acc 44.141 (42.864)	Top-5 acc 66.016 (66.752)	lr 0.02350
Train [19][2930/3239]	Time 0.189 (0.538)	Data Time 0.001 (0.012)	Loss 3.5473 (3.4204)	Entropy 1.69025 (1.69930)	Top-1 acc 40.234 (42.865)	Top-5 acc 64.062 (66.751)	lr 0.02350
Train [19][2940/3239]	Time 0.216 (0.538)	Data Time 0.001 (0.012)	Loss 3.3560 (3.4205)	Entropy 1.69021 (1.69927)	Top-1 acc 44.141 (42.864)	Top-5 acc 66.797 (66.748)	lr 0.02350
Train [19][2950/3239]	Time 0.211 (0.538)	Data Time 0.001 (0.012)	Loss 3.5714 (3.4209)	Entropy 1.69015 (1.69924)	Top-1 acc 41.797 (42.859)	Top-5 acc 64.062 (66.740)	lr 0.02350
Train [19][2960/3239]	Time 0.244 (0.537)	Data Time 0.001 (0.012)	Loss 3.6166 (3.4208)	Entropy 1.69008 (1.69921)	Top-1 acc 38.281 (42.857)	Top-5 acc 63.281 (66.741)	lr 0.02350
Train [19][2970/3239]	Time 0.244 (0.537)	Data Time 0.001 (0.012)	Loss 3.3503 (3.4209)	Entropy 1.69004 (1.69918)	Top-1 acc 43.359 (42.853)	Top-5 acc 67.188 (66.739)	lr 0.02350
Train [19][2980/3239]	Time 0.257 (0.537)	Data Time 0.001 (0.012)	Loss 3.0413 (3.4208)	Entropy 1.68996 (1.69915)	Top-1 acc 53.906 (42.859)	Top-5 acc 74.219 (66.738)	lr 0.02350
Train [19][2990/3239]	Time 0.258 (0.536)	Data Time 0.001 (0.012)	Loss 3.3995 (3.4208)	Entropy 1.68993 (1.69911)	Top-1 acc 41.797 (42.863)	Top-5 acc 66.797 (66.739)	lr 0.02350
Train [19][3000/3239]	Time 0.179 (0.536)	Data Time 0.001 (0.012)	Loss 3.4496 (3.4211)	Entropy 1.68990 (1.69908)	Top-1 acc 42.578 (42.859)	Top-5 acc 66.016 (66.731)	lr 0.02350
Train [19][3010/3239]	Time 0.262 (0.536)	Data Time 0.001 (0.011)	Loss 3.4751 (3.4211)	Entropy 1.68986 (1.69905)	Top-1 acc 41.406 (42.858)	Top-5 acc 65.234 (66.734)	lr 0.02350
Train [19][3020/3239]	Time 0.203 (0.535)	Data Time 0.001 (0.011)	Loss 3.4963 (3.4212)	Entropy 1.68982 (1.69902)	Top-1 acc 47.266 (42.857)	Top-5 acc 60.938 (66.730)	lr 0.02350
Train [19][3030/3239]	Time 0.295 (0.535)	Data Time 0.001 (0.011)	Loss 3.7035 (3.4210)	Entropy 1.68982 (1.69899)	Top-1 acc 39.844 (42.862)	Top-5 acc 61.328 (66.734)	lr 0.02350
Train [19][3040/3239]	Time 0.281 (0.549)	Data Time 0.003 (0.011)	Loss 3.4457 (3.4209)	Entropy 1.68981 (1.69896)	Top-1 acc 43.750 (42.864)	Top-5 acc 66.406 (66.739)	lr 0.02349
Train [19][3050/3239]	Time 0.236 (0.549)	Data Time 0.002 (0.011)	Loss 3.4332 (3.4211)	Entropy 1.68971 (1.69893)	Top-1 acc 43.359 (42.860)	Top-5 acc 67.188 (66.736)	lr 0.02349
Train [19][3060/3239]	Time 0.237 (0.548)	Data Time 0.002 (0.011)	Loss 3.3636 (3.4207)	Entropy 1.68966 (1.69890)	Top-1 acc 44.141 (42.870)	Top-5 acc 67.188 (66.744)	lr 0.02349
Train [19][3070/3239]	Time 0.264 (0.548)	Data Time 0.002 (0.011)	Loss 3.3787 (3.4207)	Entropy 1.68964 (1.69887)	Top-1 acc 44.531 (42.871)	Top-5 acc 69.141 (66.744)	lr 0.02349
Train [19][3080/3239]	Time 0.174 (0.547)	Data Time 0.001 (0.011)	Loss 3.6451 (3.4209)	Entropy 1.68964 (1.69884)	Top-1 acc 42.188 (42.871)	Top-5 acc 60.938 (66.740)	lr 0.02349
Train [19][3090/3239]	Time 0.193 (0.547)	Data Time 0.001 (0.011)	Loss 3.1495 (3.4207)	Entropy 1.68953 (1.69881)	Top-1 acc 50.000 (42.872)	Top-5 acc 75.000 (66.745)	lr 0.02349
Train [19][3100/3239]	Time 0.171 (0.547)	Data Time 0.001 (0.011)	Loss 3.3856 (3.4206)	Entropy 1.68942 (1.69878)	Top-1 acc 44.922 (42.882)	Top-5 acc 70.312 (66.750)	lr 0.02349
Train [19][3110/3239]	Time 0.236 (0.546)	Data Time 0.001 (0.011)	Loss 3.4248 (3.4204)	Entropy 1.68932 (1.69875)	Top-1 acc 44.141 (42.885)	Top-5 acc 67.188 (66.753)	lr 0.02349
Train [19][3120/3239]	Time 0.214 (0.546)	Data Time 0.001 (0.011)	Loss 3.6771 (3.4203)	Entropy 1.68931 (1.69872)	Top-1 acc 37.500 (42.891)	Top-5 acc 61.719 (66.756)	lr 0.02349
Train [19][3130/3239]	Time 0.237 (0.546)	Data Time 0.001 (0.011)	Loss 3.4478 (3.4203)	Entropy 1.68932 (1.69869)	Top-1 acc 46.875 (42.892)	Top-5 acc 66.016 (66.754)	lr 0.02349
Train [19][3140/3239]	Time 0.231 (0.545)	Data Time 0.001 (0.011)	Loss 3.8138 (3.4204)	Entropy 1.68921 (1.69866)	Top-1 acc 35.156 (42.886)	Top-5 acc 58.203 (66.750)	lr 0.02349
Train [19][3150/3239]	Time 0.386 (0.545)	Data Time 0.001 (0.011)	Loss 3.4840 (3.4203)	Entropy 1.68921 (1.69863)	Top-1 acc 42.969 (42.891)	Top-5 acc 65.625 (66.753)	lr 0.02349
Train [19][3160/3239]	Time 0.229 (0.545)	Data Time 0.001 (0.011)	Loss 3.3707 (3.4201)	Entropy 1.68920 (1.69860)	Top-1 acc 42.578 (42.895)	Top-5 acc 71.484 (66.760)	lr 0.02349
Train [19][3170/3239]	Time 0.251 (0.544)	Data Time 0.001 (0.011)	Loss 3.3778 (3.4201)	Entropy 1.68916 (1.69857)	Top-1 acc 44.141 (42.892)	Top-5 acc 67.969 (66.759)	lr 0.02349
Train [19][3180/3239]	Time 0.189 (0.544)	Data Time 0.000 (0.011)	Loss 3.2624 (3.4201)	Entropy 1.68913 (1.69854)	Top-1 acc 42.969 (42.888)	Top-5 acc 71.484 (66.757)	lr 0.02349
Train [19][3190/3239]	Time 0.240 (0.544)	Data Time 0.000 (0.011)	Loss 3.2389 (3.4201)	Entropy 1.68911 (1.69851)	Top-1 acc 47.656 (42.892)	Top-5 acc 69.141 (66.758)	lr 0.02349
Train [19][3200/3239]	Time 0.225 (0.543)	Data Time 0.000 (0.011)	Loss 3.3808 (3.4199)	Entropy 1.68906 (1.69848)	Top-1 acc 44.531 (42.893)	Top-5 acc 68.750 (66.763)	lr 0.02349
Train [19][3210/3239]	Time 0.238 (0.543)	Data Time 0.000 (0.011)	Loss 3.3310 (3.4199)	Entropy 1.68900 (1.69845)	Top-1 acc 48.438 (42.895)	Top-5 acc 67.578 (66.765)	lr 0.02349
Train [19][3220/3239]	Time 0.162 (0.543)	Data Time 0.000 (0.011)	Loss 3.4152 (3.4200)	Entropy 1.68898 (1.69843)	Top-1 acc 40.625 (42.891)	Top-5 acc 69.922 (66.763)	lr 0.02349
Train [19][3230/3239]	Time 0.221 (0.542)	Data Time 0.000 (0.011)	Loss 3.4285 (3.4199)	Entropy 1.68894 (1.69840)	Top-1 acc 43.750 (42.891)	Top-5 acc 65.625 (66.763)	lr 0.02349
Train [19][3239/3239]	Time 2.149 (0.542)	Data Time 0.000 (0.011)	Loss 3.4734 (3.4200)	Entropy 1.68894 (1.69837)	Top-1 acc 39.506 (42.889)	Top-5 acc 65.432 (66.761)	lr 0.02349
==========Valid [19/120]	loss 2.209	top-1 acc 51.822 (51.822)	top-5 acc 75.401	Train top-1 42.889	top-5 66.761	Entropy 1.68894	Latency-None: 0.000ms	Flops: 557.92M
Train [20][0/3239]	Time 26.814 (26.814)	Data Time 25.794 (25.794)	Loss 3.4247 (3.4247)	Entropy 1.68886 (1.68886)	Top-1 acc 43.750 (43.750)	Top-5 acc 68.750 (68.750)	lr 0.02349
Train [20][10/3239]	Time 2.393 (3.111)	Data Time 0.001 (2.523)	Loss 3.2432 (3.3784)	Entropy 1.68886 (1.68886)	Top-1 acc 45.703 (42.791)	Top-5 acc 70.703 (66.406)	lr 0.02348
Train [20][20/3239]	Time 0.208 (1.735)	Data Time 0.001 (1.322)	Loss 3.2700 (3.3402)	Entropy 1.68876 (1.68881)	Top-1 acc 45.312 (43.824)	Top-5 acc 73.047 (68.490)	lr 0.02348
Train [20][30/3239]	Time 0.179 (1.313)	Data Time 0.001 (0.896)	Loss 3.5943 (3.3516)	Entropy 1.68872 (1.68878)	Top-1 acc 40.234 (43.813)	Top-5 acc 62.109 (67.780)	lr 0.02348
Train [20][40/3239]	Time 0.221 (1.098)	Data Time 0.001 (0.679)	Loss 3.4898 (3.3545)	Entropy 1.68869 (1.68876)	Top-1 acc 44.141 (44.274)	Top-5 acc 67.188 (67.616)	lr 0.02348
Train [20][50/3239]	Time 0.352 (0.970)	Data Time 0.001 (0.546)	Loss 3.0809 (3.3670)	Entropy 1.68860 (1.68874)	Top-1 acc 51.172 (44.033)	Top-5 acc 71.875 (67.547)	lr 0.02348
Train [20][60/3239]	Time 0.210 (0.883)	Data Time 0.002 (0.457)	Loss 3.4635 (3.3657)	Entropy 1.68851 (1.68870)	Top-1 acc 45.312 (44.006)	Top-5 acc 67.188 (67.706)	lr 0.02348
Train [20][70/3239]	Time 0.232 (0.821)	Data Time 0.001 (0.393)	Loss 3.2613 (3.3626)	Entropy 1.68850 (1.68868)	Top-1 acc 45.703 (44.234)	Top-5 acc 68.359 (67.721)	lr 0.02348
Train [20][80/3239]	Time 0.200 (0.776)	Data Time 0.001 (0.345)	Loss 3.4939 (3.3635)	Entropy 1.68841 (1.68865)	Top-1 acc 36.328 (44.208)	Top-5 acc 64.844 (67.646)	lr 0.02348
Train [20][90/3239]	Time 0.270 (0.740)	Data Time 0.002 (0.307)	Loss 3.4550 (3.3666)	Entropy 1.68839 (1.68862)	Top-1 acc 41.016 (43.999)	Top-5 acc 66.016 (67.582)	lr 0.02348
Train [20][100/3239]	Time 0.232 (0.709)	Data Time 0.001 (0.277)	Loss 3.4630 (3.3674)	Entropy 1.68837 (1.68860)	Top-1 acc 41.406 (43.920)	Top-5 acc 64.844 (67.644)	lr 0.02348
Train [20][110/3239]	Time 0.232 (0.685)	Data Time 0.001 (0.252)	Loss 3.2380 (3.3677)	Entropy 1.68830 (1.68858)	Top-1 acc 46.094 (43.894)	Top-5 acc 69.922 (67.582)	lr 0.02348
Train [20][120/3239]	Time 2.261 (0.664)	Data Time 0.001 (0.231)	Loss 3.4743 (3.3679)	Entropy 1.68830 (1.68855)	Top-1 acc 41.797 (43.966)	Top-5 acc 66.797 (67.659)	lr 0.02348
Train [20][130/3239]	Time 0.149 (0.630)	Data Time 0.002 (0.214)	Loss 3.4821 (3.3656)	Entropy 1.68826 (1.68853)	Top-1 acc 43.359 (44.000)	Top-5 acc 64.453 (67.694)	lr 0.02348
Train [20][140/3239]	Time 0.203 (0.616)	Data Time 0.007 (0.199)	Loss 3.3294 (3.3643)	Entropy 1.68816 (1.68851)	Top-1 acc 46.094 (43.933)	Top-5 acc 65.234 (67.703)	lr 0.02348
Train [20][150/3239]	Time 0.231 (0.895)	Data Time 0.002 (0.186)	Loss 3.3998 (3.3681)	Entropy 1.68813 (1.68848)	Top-1 acc 43.359 (43.944)	Top-5 acc 66.016 (67.627)	lr 0.02348
Train [20][160/3239]	Time 0.213 (0.866)	Data Time 0.002 (0.175)	Loss 3.2995 (3.3725)	Entropy 1.68812 (1.68846)	Top-1 acc 45.312 (43.879)	Top-5 acc 69.531 (67.593)	lr 0.02348
Train [20][170/3239]	Time 0.230 (0.842)	Data Time 0.002 (0.165)	Loss 3.3377 (3.3691)	Entropy 1.68798 (1.68844)	Top-1 acc 47.656 (43.953)	Top-5 acc 66.016 (67.667)	lr 0.02348
Train [20][180/3239]	Time 0.309 (0.820)	Data Time 0.002 (0.156)	Loss 3.4280 (3.3681)	Entropy 1.68795 (1.68841)	Top-1 acc 42.969 (43.953)	Top-5 acc 70.312 (67.671)	lr 0.02348
Train [20][190/3239]	Time 0.217 (0.799)	Data Time 0.001 (0.148)	Loss 3.5785 (3.3690)	Entropy 1.68793 (1.68838)	Top-1 acc 39.453 (43.977)	Top-5 acc 65.234 (67.674)	lr 0.02348
Train [20][200/3239]	Time 0.205 (0.780)	Data Time 0.001 (0.140)	Loss 3.3055 (3.3696)	Entropy 1.68787 (1.68836)	Top-1 acc 43.750 (43.950)	Top-5 acc 69.141 (67.650)	lr 0.02348
Train [20][210/3239]	Time 0.223 (0.764)	Data Time 0.001 (0.134)	Loss 3.3082 (3.3686)	Entropy 1.68784 (1.68834)	Top-1 acc 44.531 (43.968)	Top-5 acc 67.188 (67.647)	lr 0.02348
Train [20][220/3239]	Time 0.224 (0.749)	Data Time 0.001 (0.128)	Loss 3.2797 (3.3710)	Entropy 1.68777 (1.68832)	Top-1 acc 44.922 (43.888)	Top-5 acc 69.141 (67.636)	lr 0.02347
Train [20][230/3239]	Time 2.337 (0.736)	Data Time 0.001 (0.123)	Loss 3.3415 (3.3713)	Entropy 1.68777 (1.68829)	Top-1 acc 49.219 (43.904)	Top-5 acc 66.406 (67.651)	lr 0.02347
Train [20][240/3239]	Time 0.261 (0.715)	Data Time 0.001 (0.118)	Loss 3.6313 (3.3714)	Entropy 1.68774 (1.68827)	Top-1 acc 38.281 (43.919)	Top-5 acc 60.156 (67.624)	lr 0.02347
Train [20][250/3239]	Time 0.310 (0.704)	Data Time 0.001 (0.113)	Loss 3.3612 (3.3711)	Entropy 1.68770 (1.68825)	Top-1 acc 45.312 (43.929)	Top-5 acc 69.531 (67.676)	lr 0.02347
Train [20][260/3239]	Time 0.215 (0.694)	Data Time 0.001 (0.109)	Loss 3.4482 (3.3714)	Entropy 1.68768 (1.68822)	Top-1 acc 42.969 (43.889)	Top-5 acc 67.188 (67.680)	lr 0.02347
Train [20][270/3239]	Time 0.229 (0.685)	Data Time 0.001 (0.105)	Loss 3.3538 (3.3743)	Entropy 1.68760 (1.68820)	Top-1 acc 42.578 (43.847)	Top-5 acc 66.016 (67.608)	lr 0.02347
Train [20][280/3239]	Time 0.227 (0.675)	Data Time 0.001 (0.101)	Loss 3.6181 (3.3749)	Entropy 1.68756 (1.68818)	Top-1 acc 37.891 (43.890)	Top-5 acc 62.500 (67.586)	lr 0.02347
Train [20][290/3239]	Time 0.212 (0.667)	Data Time 0.001 (0.098)	Loss 3.1824 (3.3750)	Entropy 1.68748 (1.68816)	Top-1 acc 48.438 (43.867)	Top-5 acc 69.922 (67.585)	lr 0.02347
Train [20][300/3239]	Time 0.208 (0.659)	Data Time 0.001 (0.094)	Loss 3.3057 (3.3742)	Entropy 1.68733 (1.68813)	Top-1 acc 45.703 (43.901)	Top-5 acc 69.922 (67.616)	lr 0.02347
Train [20][310/3239]	Time 0.250 (0.652)	Data Time 0.001 (0.092)	Loss 3.4532 (3.3740)	Entropy 1.68732 (1.68811)	Top-1 acc 42.188 (43.867)	Top-5 acc 66.406 (67.641)	lr 0.02347
Train [20][320/3239]	Time 0.207 (0.646)	Data Time 0.002 (0.089)	Loss 3.3511 (3.3759)	Entropy 1.68715 (1.68808)	Top-1 acc 47.266 (43.866)	Top-5 acc 66.406 (67.606)	lr 0.02347
Train [20][330/3239]	Time 0.219 (0.639)	Data Time 0.001 (0.086)	Loss 3.2251 (3.3771)	Entropy 1.68712 (1.68805)	Top-1 acc 47.656 (43.805)	Top-5 acc 68.359 (67.572)	lr 0.02347
Train [20][340/3239]	Time 2.344 (0.633)	Data Time 0.001 (0.084)	Loss 3.6240 (3.3770)	Entropy 1.68712 (1.68803)	Top-1 acc 39.844 (43.805)	Top-5 acc 62.500 (67.600)	lr 0.02347
Train [20][350/3239]	Time 0.155 (0.621)	Data Time 0.001 (0.081)	Loss 3.6280 (3.3766)	Entropy 1.68711 (1.68800)	Top-1 acc 36.719 (43.795)	Top-5 acc 60.938 (67.579)	lr 0.02347
Train [20][360/3239]	Time 0.205 (0.616)	Data Time 0.001 (0.079)	Loss 3.3172 (3.3751)	Entropy 1.68710 (1.68797)	Top-1 acc 46.875 (43.864)	Top-5 acc 68.750 (67.593)	lr 0.02347
Train [20][370/3239]	Time 0.206 (0.611)	Data Time 0.001 (0.077)	Loss 3.2792 (3.3755)	Entropy 1.68696 (1.68795)	Top-1 acc 41.406 (43.850)	Top-5 acc 70.312 (67.583)	lr 0.02347
Train [20][380/3239]	Time 0.343 (0.606)	Data Time 0.001 (0.075)	Loss 3.3185 (3.3774)	Entropy 1.68700 (1.68792)	Top-1 acc 45.703 (43.808)	Top-5 acc 69.922 (67.545)	lr 0.02347
Train [20][390/3239]	Time 0.199 (0.602)	Data Time 0.001 (0.073)	Loss 3.6380 (3.3780)	Entropy 1.68694 (1.68790)	Top-1 acc 35.156 (43.773)	Top-5 acc 59.375 (67.523)	lr 0.02347
Train [20][400/3239]	Time 0.199 (0.597)	Data Time 0.001 (0.072)	Loss 3.5219 (3.3805)	Entropy 1.68689 (1.68787)	Top-1 acc 40.234 (43.762)	Top-5 acc 64.844 (67.466)	lr 0.02347
Train [20][410/3239]	Time 0.204 (0.593)	Data Time 0.001 (0.070)	Loss 3.3270 (3.3803)	Entropy 1.68683 (1.68785)	Top-1 acc 44.922 (43.771)	Top-5 acc 67.969 (67.478)	lr 0.02347
Train [20][420/3239]	Time 0.210 (0.589)	Data Time 0.001 (0.068)	Loss 3.6197 (3.3810)	Entropy 1.68686 (1.68783)	Top-1 acc 35.547 (43.761)	Top-5 acc 62.891 (67.479)	lr 0.02346
Train [20][430/3239]	Time 0.207 (0.586)	Data Time 0.001 (0.067)	Loss 3.2562 (3.3822)	Entropy 1.68682 (1.68780)	Top-1 acc 43.750 (43.735)	Top-5 acc 69.922 (67.445)	lr 0.02346
Train [20][440/3239]	Time 0.231 (0.583)	Data Time 0.002 (0.065)	Loss 3.4663 (3.3823)	Entropy 1.68676 (1.68778)	Top-1 acc 41.797 (43.729)	Top-5 acc 64.062 (67.433)	lr 0.02346
Train [20][450/3239]	Time 2.324 (0.580)	Data Time 0.003 (0.064)	Loss 3.4629 (3.3827)	Entropy 1.68676 (1.68776)	Top-1 acc 40.625 (43.690)	Top-5 acc 68.750 (67.427)	lr 0.02346
Train [20][460/3239]	Time 0.207 (0.572)	Data Time 0.001 (0.063)	Loss 3.1734 (3.3838)	Entropy 1.68671 (1.68773)	Top-1 acc 42.969 (43.645)	Top-5 acc 70.312 (67.387)	lr 0.02346
Train [20][470/3239]	Time 0.208 (0.569)	Data Time 0.001 (0.061)	Loss 3.3876 (3.3834)	Entropy 1.68661 (1.68771)	Top-1 acc 42.578 (43.632)	Top-5 acc 65.625 (67.385)	lr 0.02346
Train [20][480/3239]	Time 0.228 (0.566)	Data Time 0.001 (0.060)	Loss 3.2822 (3.3834)	Entropy 1.68659 (1.68769)	Top-1 acc 43.359 (43.627)	Top-5 acc 71.094 (67.408)	lr 0.02346
Train [20][490/3239]	Time 0.212 (0.563)	Data Time 0.002 (0.059)	Loss 3.4394 (3.3845)	Entropy 1.68660 (1.68767)	Top-1 acc 43.750 (43.599)	Top-5 acc 67.188 (67.386)	lr 0.02346
Train [20][500/3239]	Time 0.187 (0.561)	Data Time 0.001 (0.058)	Loss 3.5451 (3.3841)	Entropy 1.68651 (1.68764)	Top-1 acc 42.578 (43.599)	Top-5 acc 63.281 (67.396)	lr 0.02346
Train [20][510/3239]	Time 0.299 (0.639)	Data Time 0.005 (0.057)	Loss 3.4300 (3.3841)	Entropy 1.68647 (1.68762)	Top-1 acc 44.141 (43.580)	Top-5 acc 66.016 (67.404)	lr 0.02346
Train [20][520/3239]	Time 0.287 (0.637)	Data Time 0.002 (0.056)	Loss 3.3725 (3.3861)	Entropy 1.68648 (1.68760)	Top-1 acc 37.891 (43.529)	Top-5 acc 67.188 (67.382)	lr 0.02346
Train [20][530/3239]	Time 0.217 (0.633)	Data Time 0.002 (0.055)	Loss 3.2062 (3.3860)	Entropy 1.68649 (1.68758)	Top-1 acc 48.828 (43.545)	Top-5 acc 70.703 (67.378)	lr 0.02346
Train [20][540/3239]	Time 0.283 (0.630)	Data Time 0.001 (0.054)	Loss 3.1815 (3.3856)	Entropy 1.68643 (1.68756)	Top-1 acc 50.391 (43.546)	Top-5 acc 72.656 (67.381)	lr 0.02346
Train [20][550/3239]	Time 0.252 (0.626)	Data Time 0.001 (0.053)	Loss 3.4108 (3.3849)	Entropy 1.68641 (1.68754)	Top-1 acc 42.188 (43.536)	Top-5 acc 68.359 (67.395)	lr 0.02346
Train [20][560/3239]	Time 2.253 (0.623)	Data Time 0.002 (0.052)	Loss 3.5112 (3.3842)	Entropy 1.68641 (1.68752)	Top-1 acc 39.844 (43.551)	Top-5 acc 65.234 (67.416)	lr 0.02346
Train [20][570/3239]	Time 0.208 (0.615)	Data Time 0.001 (0.051)	Loss 3.5197 (3.3845)	Entropy 1.68633 (1.68750)	Top-1 acc 39.844 (43.518)	Top-5 acc 65.625 (67.428)	lr 0.02346
Train [20][580/3239]	Time 0.179 (0.612)	Data Time 0.001 (0.050)	Loss 3.3320 (3.3843)	Entropy 1.68631 (1.68748)	Top-1 acc 47.656 (43.530)	Top-5 acc 67.188 (67.440)	lr 0.02346
Train [20][590/3239]	Time 0.216 (0.609)	Data Time 0.002 (0.049)	Loss 3.5919 (3.3853)	Entropy 1.68621 (1.68745)	Top-1 acc 38.672 (43.512)	Top-5 acc 64.453 (67.412)	lr 0.02346
Train [20][600/3239]	Time 0.208 (0.607)	Data Time 0.001 (0.048)	Loss 3.4592 (3.3853)	Entropy 1.68623 (1.68743)	Top-1 acc 41.406 (43.504)	Top-5 acc 67.188 (67.414)	lr 0.02346
Train [20][610/3239]	Time 0.195 (0.604)	Data Time 0.001 (0.048)	Loss 3.3347 (3.3861)	Entropy 1.68608 (1.68741)	Top-1 acc 43.359 (43.492)	Top-5 acc 69.141 (67.405)	lr 0.02346
Train [20][620/3239]	Time 0.221 (0.601)	Data Time 0.001 (0.047)	Loss 3.1728 (3.3852)	Entropy 1.68597 (1.68739)	Top-1 acc 50.000 (43.520)	Top-5 acc 73.828 (67.408)	lr 0.02346
Train [20][630/3239]	Time 0.215 (0.598)	Data Time 0.001 (0.046)	Loss 3.3684 (3.3852)	Entropy 1.68585 (1.68737)	Top-1 acc 41.016 (43.531)	Top-5 acc 69.531 (67.420)	lr 0.02345
Train [20][640/3239]	Time 0.223 (0.596)	Data Time 0.001 (0.046)	Loss 3.5188 (3.3850)	Entropy 1.68562 (1.68734)	Top-1 acc 45.703 (43.545)	Top-5 acc 64.453 (67.420)	lr 0.02345
Train [20][650/3239]	Time 0.322 (0.593)	Data Time 0.001 (0.045)	Loss 3.2692 (3.3856)	Entropy 1.68557 (1.68732)	Top-1 acc 45.703 (43.526)	Top-5 acc 71.484 (67.414)	lr 0.02345
Train [20][660/3239]	Time 0.215 (0.591)	Data Time 0.001 (0.044)	Loss 3.5335 (3.3865)	Entropy 1.68551 (1.68729)	Top-1 acc 40.234 (43.492)	Top-5 acc 61.719 (67.391)	lr 0.02345
Train [20][670/3239]	Time 2.325 (0.588)	Data Time 0.002 (0.044)	Loss 3.4520 (3.3866)	Entropy 1.68551 (1.68726)	Top-1 acc 41.016 (43.483)	Top-5 acc 68.359 (67.391)	lr 0.02345
Train [20][680/3239]	Time 0.222 (0.583)	Data Time 0.001 (0.043)	Loss 3.3454 (3.3870)	Entropy 1.68541 (1.68724)	Top-1 acc 43.750 (43.487)	Top-5 acc 68.359 (67.372)	lr 0.02345
Train [20][690/3239]	Time 0.207 (0.581)	Data Time 0.001 (0.042)	Loss 3.5046 (3.3870)	Entropy 1.68536 (1.68721)	Top-1 acc 41.016 (43.487)	Top-5 acc 66.797 (67.376)	lr 0.02345
Train [20][700/3239]	Time 0.211 (0.579)	Data Time 0.002 (0.042)	Loss 3.5107 (3.3869)	Entropy 1.68529 (1.68718)	Top-1 acc 44.922 (43.498)	Top-5 acc 65.234 (67.362)	lr 0.02345
Train [20][710/3239]	Time 0.226 (0.577)	Data Time 0.001 (0.041)	Loss 3.3450 (3.3867)	Entropy 1.68528 (1.68715)	Top-1 acc 45.703 (43.514)	Top-5 acc 67.188 (67.357)	lr 0.02345
Train [20][720/3239]	Time 0.316 (0.575)	Data Time 0.001 (0.041)	Loss 3.3746 (3.3861)	Entropy 1.68522 (1.68713)	Top-1 acc 43.750 (43.525)	Top-5 acc 68.359 (67.367)	lr 0.02345
Train [20][730/3239]	Time 0.168 (0.573)	Data Time 0.001 (0.040)	Loss 3.3466 (3.3853)	Entropy 1.68518 (1.68710)	Top-1 acc 42.188 (43.549)	Top-5 acc 69.531 (67.381)	lr 0.02345
Train [20][740/3239]	Time 0.231 (0.571)	Data Time 0.001 (0.040)	Loss 3.2957 (3.3857)	Entropy 1.68514 (1.68708)	Top-1 acc 45.312 (43.540)	Top-5 acc 69.141 (67.365)	lr 0.02345
Train [20][750/3239]	Time 0.186 (0.569)	Data Time 0.001 (0.039)	Loss 3.5597 (3.3855)	Entropy 1.68506 (1.68705)	Top-1 acc 40.625 (43.535)	Top-5 acc 67.188 (67.366)	lr 0.02345
Train [20][760/3239]	Time 0.193 (0.567)	Data Time 0.001 (0.039)	Loss 3.5035 (3.3856)	Entropy 1.68492 (1.68702)	Top-1 acc 38.281 (43.535)	Top-5 acc 64.453 (67.361)	lr 0.02345
Train [20][770/3239]	Time 0.198 (0.566)	Data Time 0.001 (0.038)	Loss 3.2054 (3.3855)	Entropy 1.68490 (1.68700)	Top-1 acc 44.141 (43.534)	Top-5 acc 73.047 (67.360)	lr 0.02345
Train [20][780/3239]	Time 2.305 (0.564)	Data Time 0.001 (0.038)	Loss 3.4566 (3.3854)	Entropy 1.68490 (1.68697)	Top-1 acc 39.062 (43.534)	Top-5 acc 66.406 (67.367)	lr 0.02345
Train [20][790/3239]	Time 0.372 (0.560)	Data Time 0.001 (0.037)	Loss 3.4769 (3.3858)	Entropy 1.68478 (1.68694)	Top-1 acc 42.969 (43.530)	Top-5 acc 66.016 (67.363)	lr 0.02345
Train [20][800/3239]	Time 0.198 (0.559)	Data Time 0.001 (0.037)	Loss 3.3517 (3.3859)	Entropy 1.68473 (1.68691)	Top-1 acc 45.312 (43.507)	Top-5 acc 67.578 (67.367)	lr 0.02345
Train [20][810/3239]	Time 0.200 (0.557)	Data Time 0.001 (0.036)	Loss 3.3346 (3.3847)	Entropy 1.68453 (1.68688)	Top-1 acc 41.797 (43.523)	Top-5 acc 68.750 (67.399)	lr 0.02345
Train [20][820/3239]	Time 0.221 (0.555)	Data Time 0.002 (0.036)	Loss 3.2133 (3.3845)	Entropy 1.68446 (1.68686)	Top-1 acc 49.609 (43.514)	Top-5 acc 70.312 (67.406)	lr 0.02345
Train [20][830/3239]	Time 0.204 (0.554)	Data Time 0.001 (0.036)	Loss 3.3791 (3.3853)	Entropy 1.68441 (1.68683)	Top-1 acc 46.094 (43.482)	Top-5 acc 67.188 (67.393)	lr 0.02344
Train [20][840/3239]	Time 0.235 (0.553)	Data Time 0.001 (0.035)	Loss 3.5575 (3.3851)	Entropy 1.68429 (1.68680)	Top-1 acc 38.281 (43.490)	Top-5 acc 62.109 (67.401)	lr 0.02344
Train [20][850/3239]	Time 0.208 (0.551)	Data Time 0.001 (0.035)	Loss 3.4470 (3.3852)	Entropy 1.68427 (1.68677)	Top-1 acc 42.188 (43.476)	Top-5 acc 68.359 (67.406)	lr 0.02344
Train [20][860/3239]	Time 0.205 (0.550)	Data Time 0.001 (0.034)	Loss 3.2969 (3.3857)	Entropy 1.68423 (1.68674)	Top-1 acc 45.703 (43.484)	Top-5 acc 70.312 (67.408)	lr 0.02344
Train [20][870/3239]	Time 0.367 (0.594)	Data Time 0.002 (0.034)	Loss 3.3348 (3.3854)	Entropy 1.68413 (1.68671)	Top-1 acc 45.703 (43.491)	Top-5 acc 69.922 (67.412)	lr 0.02344
Train [20][880/3239]	Time 0.267 (0.593)	Data Time 0.002 (0.034)	Loss 3.3257 (3.3852)	Entropy 1.68411 (1.68668)	Top-1 acc 42.969 (43.492)	Top-5 acc 69.531 (67.414)	lr 0.02344
Train [20][890/3239]	Time 2.284 (0.591)	Data Time 0.002 (0.033)	Loss 3.5270 (3.3852)	Entropy 1.68411 (1.68665)	Top-1 acc 41.406 (43.500)	Top-5 acc 64.844 (67.410)	lr 0.02344
Train [20][900/3239]	Time 0.203 (0.587)	Data Time 0.002 (0.033)	Loss 3.3715 (3.3855)	Entropy 1.68401 (1.68662)	Top-1 acc 45.703 (43.503)	Top-5 acc 65.234 (67.403)	lr 0.02344
Train [20][910/3239]	Time 0.168 (0.586)	Data Time 0.002 (0.033)	Loss 3.5030 (3.3858)	Entropy 1.68377 (1.68659)	Top-1 acc 42.188 (43.503)	Top-5 acc 65.234 (67.397)	lr 0.02344
Train [20][920/3239]	Time 0.203 (0.584)	Data Time 0.002 (0.032)	Loss 3.3496 (3.3859)	Entropy 1.68373 (1.68656)	Top-1 acc 47.656 (43.513)	Top-5 acc 69.531 (67.396)	lr 0.02344
Train [20][930/3239]	Time 0.189 (0.583)	Data Time 0.001 (0.032)	Loss 3.4761 (3.3863)	Entropy 1.68368 (1.68653)	Top-1 acc 46.875 (43.507)	Top-5 acc 66.406 (67.389)	lr 0.02344
Train [20][940/3239]	Time 0.206 (0.581)	Data Time 0.001 (0.032)	Loss 3.3413 (3.3864)	Entropy 1.68359 (1.68650)	Top-1 acc 45.703 (43.500)	Top-5 acc 67.969 (67.390)	lr 0.02344
Train [20][950/3239]	Time 0.222 (0.580)	Data Time 0.001 (0.031)	Loss 3.2522 (3.3857)	Entropy 1.68355 (1.68647)	Top-1 acc 42.188 (43.508)	Top-5 acc 71.094 (67.412)	lr 0.02344
Train [20][960/3239]	Time 0.218 (0.578)	Data Time 0.001 (0.031)	Loss 3.3892 (3.3856)	Entropy 1.68345 (1.68644)	Top-1 acc 39.062 (43.500)	Top-5 acc 66.406 (67.419)	lr 0.02344
Train [20][970/3239]	Time 0.199 (0.576)	Data Time 0.001 (0.031)	Loss 3.5416 (3.3860)	Entropy 1.68336 (1.68641)	Top-1 acc 41.797 (43.489)	Top-5 acc 62.500 (67.406)	lr 0.02344
Train [20][980/3239]	Time 0.179 (0.575)	Data Time 0.001 (0.031)	Loss 3.5048 (3.3860)	Entropy 1.68331 (1.68637)	Top-1 acc 39.844 (43.494)	Top-5 acc 63.672 (67.412)	lr 0.02344
Train [20][990/3239]	Time 0.210 (0.574)	Data Time 0.001 (0.030)	Loss 3.4438 (3.3867)	Entropy 1.68332 (1.68634)	Top-1 acc 37.109 (43.474)	Top-5 acc 64.453 (67.403)	lr 0.02344
Train [20][1000/3239]	Time 2.324 (0.572)	Data Time 0.002 (0.030)	Loss 3.4721 (3.3866)	Entropy 1.68332 (1.68631)	Top-1 acc 40.625 (43.471)	Top-5 acc 68.359 (67.417)	lr 0.02344
Train [20][1010/3239]	Time 0.208 (0.569)	Data Time 0.001 (0.030)	Loss 3.4851 (3.3860)	Entropy 1.68326 (1.68628)	Top-1 acc 40.234 (43.490)	Top-5 acc 66.406 (67.427)	lr 0.02344
Train [20][1020/3239]	Time 0.171 (0.568)	Data Time 0.001 (0.029)	Loss 3.3279 (3.3858)	Entropy 1.68305 (1.68625)	Top-1 acc 43.750 (43.492)	Top-5 acc 69.141 (67.435)	lr 0.02344
Train [20][1030/3239]	Time 0.204 (0.566)	Data Time 0.001 (0.029)	Loss 3.2849 (3.3859)	Entropy 1.68302 (1.68622)	Top-1 acc 45.312 (43.486)	Top-5 acc 69.922 (67.427)	lr 0.02344
Train [20][1040/3239]	Time 0.158 (0.565)	Data Time 0.001 (0.029)	Loss 3.4452 (3.3864)	Entropy 1.68295 (1.68619)	Top-1 acc 41.797 (43.488)	Top-5 acc 63.281 (67.416)	lr 0.02343
Train [20][1050/3239]	Time 0.256 (0.564)	Data Time 0.002 (0.029)	Loss 3.3757 (3.3862)	Entropy 1.68293 (1.68616)	Top-1 acc 40.234 (43.492)	Top-5 acc 69.531 (67.424)	lr 0.02343
Train [20][1060/3239]	Time 0.291 (0.563)	Data Time 0.001 (0.028)	Loss 3.4512 (3.3860)	Entropy 1.68294 (1.68613)	Top-1 acc 41.406 (43.499)	Top-5 acc 66.797 (67.430)	lr 0.02343
Train [20][1070/3239]	Time 0.197 (0.562)	Data Time 0.001 (0.028)	Loss 3.3693 (3.3871)	Entropy 1.68287 (1.68610)	Top-1 acc 41.016 (43.482)	Top-5 acc 68.750 (67.406)	lr 0.02343
Train [20][1080/3239]	Time 0.219 (0.560)	Data Time 0.001 (0.028)	Loss 3.2813 (3.3872)	Entropy 1.68281 (1.68607)	Top-1 acc 47.656 (43.475)	Top-5 acc 71.094 (67.405)	lr 0.02343
Train [20][1090/3239]	Time 0.232 (0.559)	Data Time 0.001 (0.028)	Loss 3.5182 (3.3877)	Entropy 1.68279 (1.68604)	Top-1 acc 43.359 (43.471)	Top-5 acc 63.672 (67.401)	lr 0.02343
Train [20][1100/3239]	Time 0.228 (0.558)	Data Time 0.001 (0.027)	Loss 3.2659 (3.3882)	Entropy 1.68273 (1.68601)	Top-1 acc 41.016 (43.460)	Top-5 acc 71.484 (67.387)	lr 0.02343
Train [20][1110/3239]	Time 2.356 (0.557)	Data Time 0.002 (0.027)	Loss 3.6350 (3.3888)	Entropy 1.68273 (1.68598)	Top-1 acc 41.406 (43.453)	Top-5 acc 61.719 (67.372)	lr 0.02343
Train [20][1120/3239]	Time 0.324 (0.554)	Data Time 0.001 (0.027)	Loss 3.3061 (3.3890)	Entropy 1.68274 (1.68595)	Top-1 acc 48.047 (43.452)	Top-5 acc 70.312 (67.371)	lr 0.02343
Train [20][1130/3239]	Time 0.221 (0.553)	Data Time 0.001 (0.027)	Loss 3.3154 (3.3893)	Entropy 1.68267 (1.68592)	Top-1 acc 48.438 (43.446)	Top-5 acc 67.578 (67.371)	lr 0.02343
Train [20][1140/3239]	Time 0.206 (0.552)	Data Time 0.001 (0.027)	Loss 3.3536 (3.3896)	Entropy 1.68254 (1.68589)	Top-1 acc 45.312 (43.443)	Top-5 acc 72.266 (67.370)	lr 0.02343
Train [20][1150/3239]	Time 0.229 (0.551)	Data Time 0.001 (0.026)	Loss 3.5564 (3.3891)	Entropy 1.68250 (1.68586)	Top-1 acc 38.672 (43.459)	Top-5 acc 63.672 (67.383)	lr 0.02343
Train [20][1160/3239]	Time 0.254 (0.550)	Data Time 0.001 (0.026)	Loss 3.2549 (3.3889)	Entropy 1.68246 (1.68583)	Top-1 acc 45.703 (43.464)	Top-5 acc 69.531 (67.382)	lr 0.02343
Train [20][1170/3239]	Time 0.210 (0.549)	Data Time 0.001 (0.026)	Loss 3.3376 (3.3887)	Entropy 1.68237 (1.68580)	Top-1 acc 44.922 (43.472)	Top-5 acc 67.578 (67.389)	lr 0.02343
Train [20][1180/3239]	Time 0.263 (0.548)	Data Time 0.001 (0.026)	Loss 3.2921 (3.3882)	Entropy 1.68229 (1.68577)	Top-1 acc 45.312 (43.477)	Top-5 acc 67.188 (67.397)	lr 0.02343
Train [20][1190/3239]	Time 0.330 (0.547)	Data Time 0.001 (0.025)	Loss 3.4041 (3.3877)	Entropy 1.68219 (1.68574)	Top-1 acc 44.922 (43.484)	Top-5 acc 65.625 (67.406)	lr 0.02343
Train [20][1200/3239]	Time 0.217 (0.546)	Data Time 0.002 (0.025)	Loss 3.3743 (3.3878)	Entropy 1.68223 (1.68572)	Top-1 acc 42.578 (43.483)	Top-5 acc 67.969 (67.391)	lr 0.02343
Train [20][1210/3239]	Time 0.241 (0.545)	Data Time 0.001 (0.025)	Loss 3.2862 (3.3872)	Entropy 1.68201 (1.68569)	Top-1 acc 47.266 (43.513)	Top-5 acc 69.531 (67.396)	lr 0.02343
Train [20][1220/3239]	Time 2.347 (0.544)	Data Time 0.001 (0.025)	Loss 3.3889 (3.3872)	Entropy 1.68201 (1.68566)	Top-1 acc 45.703 (43.507)	Top-5 acc 65.234 (67.396)	lr 0.02343
Train [20][1230/3239]	Time 0.195 (0.542)	Data Time 0.001 (0.025)	Loss 3.4765 (3.3872)	Entropy 1.68200 (1.68563)	Top-1 acc 43.359 (43.511)	Top-5 acc 66.797 (67.393)	lr 0.02343
Train [20][1240/3239]	Time 0.239 (0.576)	Data Time 0.003 (0.025)	Loss 3.2449 (3.3864)	Entropy 1.68192 (1.68560)	Top-1 acc 47.266 (43.521)	Top-5 acc 69.922 (67.407)	lr 0.02342
Train [20][1250/3239]	Time 0.330 (0.575)	Data Time 0.002 (0.024)	Loss 3.3422 (3.3866)	Entropy 1.68192 (1.68557)	Top-1 acc 45.703 (43.529)	Top-5 acc 68.359 (67.406)	lr 0.02342
Train [20][1260/3239]	Time 0.234 (0.574)	Data Time 0.005 (0.024)	Loss 3.3246 (3.3866)	Entropy 1.68192 (1.68554)	Top-1 acc 49.609 (43.529)	Top-5 acc 69.922 (67.404)	lr 0.02342
Train [20][1270/3239]	Time 0.210 (0.573)	Data Time 0.001 (0.024)	Loss 3.3708 (3.3871)	Entropy 1.68187 (1.68551)	Top-1 acc 42.969 (43.517)	Top-5 acc 65.625 (67.393)	lr 0.02342
Train [20][1280/3239]	Time 0.210 (0.572)	Data Time 0.001 (0.024)	Loss 3.3868 (3.3869)	Entropy 1.68185 (1.68548)	Top-1 acc 44.531 (43.526)	Top-5 acc 66.406 (67.395)	lr 0.02342
Train [20][1290/3239]	Time 0.206 (0.571)	Data Time 0.001 (0.024)	Loss 3.3457 (3.3864)	Entropy 1.68180 (1.68545)	Top-1 acc 42.188 (43.543)	Top-5 acc 67.578 (67.413)	lr 0.02342
Train [20][1300/3239]	Time 0.205 (0.570)	Data Time 0.001 (0.024)	Loss 3.3559 (3.3862)	Entropy 1.68172 (1.68542)	Top-1 acc 43.750 (43.551)	Top-5 acc 64.844 (67.412)	lr 0.02342
Train [20][1310/3239]	Time 0.298 (0.569)	Data Time 0.001 (0.023)	Loss 3.4564 (3.3864)	Entropy 1.68157 (1.68540)	Top-1 acc 44.531 (43.542)	Top-5 acc 68.359 (67.411)	lr 0.02342
Train [20][1320/3239]	Time 0.235 (0.568)	Data Time 0.001 (0.023)	Loss 3.2463 (3.3862)	Entropy 1.68158 (1.68537)	Top-1 acc 45.312 (43.546)	Top-5 acc 68.750 (67.415)	lr 0.02342
Train [20][1330/3239]	Time 2.223 (0.567)	Data Time 0.001 (0.023)	Loss 3.4192 (3.3860)	Entropy 1.68158 (1.68534)	Top-1 acc 41.406 (43.552)	Top-5 acc 66.406 (67.417)	lr 0.02342
Train [20][1340/3239]	Time 0.262 (0.565)	Data Time 0.001 (0.023)	Loss 3.5063 (3.3861)	Entropy 1.68150 (1.68531)	Top-1 acc 39.844 (43.545)	Top-5 acc 66.406 (67.419)	lr 0.02342
Train [20][1350/3239]	Time 0.219 (0.564)	Data Time 0.001 (0.023)	Loss 3.3239 (3.3861)	Entropy 1.68143 (1.68528)	Top-1 acc 44.141 (43.539)	Top-5 acc 67.969 (67.423)	lr 0.02342
Train [20][1360/3239]	Time 0.212 (0.563)	Data Time 0.001 (0.023)	Loss 3.4882 (3.3857)	Entropy 1.68131 (1.68525)	Top-1 acc 37.500 (43.542)	Top-5 acc 67.578 (67.427)	lr 0.02342
Train [20][1370/3239]	Time 0.238 (0.562)	Data Time 0.001 (0.022)	Loss 3.2674 (3.3859)	Entropy 1.68115 (1.68522)	Top-1 acc 46.094 (43.534)	Top-5 acc 71.094 (67.429)	lr 0.02342
Train [20][1380/3239]	Time 0.369 (0.561)	Data Time 0.001 (0.022)	Loss 3.1711 (3.3859)	Entropy 1.68112 (1.68519)	Top-1 acc 49.219 (43.538)	Top-5 acc 72.266 (67.427)	lr 0.02342
Train [20][1390/3239]	Time 0.215 (0.560)	Data Time 0.001 (0.022)	Loss 3.4402 (3.3859)	Entropy 1.68103 (1.68516)	Top-1 acc 38.672 (43.541)	Top-5 acc 65.625 (67.431)	lr 0.02342
Train [20][1400/3239]	Time 0.234 (0.559)	Data Time 0.001 (0.022)	Loss 3.4673 (3.3860)	Entropy 1.68098 (1.68513)	Top-1 acc 41.406 (43.540)	Top-5 acc 66.016 (67.434)	lr 0.02342
Train [20][1410/3239]	Time 0.136 (0.558)	Data Time 0.001 (0.022)	Loss 3.4460 (3.3858)	Entropy 1.68091 (1.68510)	Top-1 acc 44.531 (43.545)	Top-5 acc 66.406 (67.435)	lr 0.02342
Train [20][1420/3239]	Time 0.149 (0.557)	Data Time 0.001 (0.022)	Loss 3.4659 (3.3859)	Entropy 1.68091 (1.68507)	Top-1 acc 42.188 (43.552)	Top-5 acc 63.672 (67.430)	lr 0.02342
Train [20][1430/3239]	Time 0.217 (0.557)	Data Time 0.001 (0.022)	Loss 3.5165 (3.3858)	Entropy 1.68090 (1.68505)	Top-1 acc 41.406 (43.555)	Top-5 acc 64.062 (67.432)	lr 0.02342
Train [20][1440/3239]	Time 2.332 (0.556)	Data Time 0.002 (0.021)	Loss 3.4791 (3.3858)	Entropy 1.68090 (1.68502)	Top-1 acc 45.312 (43.548)	Top-5 acc 66.016 (67.432)	lr 0.02342
Train [20][1450/3239]	Time 0.240 (0.554)	Data Time 0.002 (0.021)	Loss 3.4261 (3.3859)	Entropy 1.68081 (1.68499)	Top-1 acc 38.672 (43.546)	Top-5 acc 62.500 (67.428)	lr 0.02341
Train [20][1460/3239]	Time 0.225 (0.553)	Data Time 0.001 (0.021)	Loss 3.4124 (3.3859)	Entropy 1.68073 (1.68496)	Top-1 acc 42.188 (43.545)	Top-5 acc 69.141 (67.428)	lr 0.02341
Train [20][1470/3239]	Time 0.221 (0.552)	Data Time 0.001 (0.021)	Loss 3.4502 (3.3859)	Entropy 1.68057 (1.68493)	Top-1 acc 42.188 (43.551)	Top-5 acc 64.062 (67.428)	lr 0.02341
Train [20][1480/3239]	Time 0.227 (0.551)	Data Time 0.001 (0.021)	Loss 3.8709 (3.3865)	Entropy 1.68042 (1.68490)	Top-1 acc 33.594 (43.540)	Top-5 acc 58.203 (67.415)	lr 0.02341
Train [20][1490/3239]	Time 0.223 (0.550)	Data Time 0.001 (0.021)	Loss 3.0198 (3.3863)	Entropy 1.68037 (1.68487)	Top-1 acc 51.953 (43.542)	Top-5 acc 77.344 (67.423)	lr 0.02341
Train [20][1500/3239]	Time 0.153 (0.549)	Data Time 0.001 (0.021)	Loss 3.7032 (3.3865)	Entropy 1.68030 (1.68484)	Top-1 acc 34.375 (43.532)	Top-5 acc 61.719 (67.417)	lr 0.02341
Train [20][1510/3239]	Time 0.299 (0.549)	Data Time 0.001 (0.021)	Loss 3.3667 (3.3863)	Entropy 1.68029 (1.68481)	Top-1 acc 40.234 (43.538)	Top-5 acc 68.750 (67.422)	lr 0.02341
Train [20][1520/3239]	Time 0.199 (0.548)	Data Time 0.001 (0.020)	Loss 3.2457 (3.3861)	Entropy 1.68028 (1.68478)	Top-1 acc 46.875 (43.539)	Top-5 acc 70.312 (67.426)	lr 0.02341
Train [20][1530/3239]	Time 0.207 (0.547)	Data Time 0.001 (0.020)	Loss 3.3453 (3.3857)	Entropy 1.68027 (1.68475)	Top-1 acc 48.047 (43.541)	Top-5 acc 67.969 (67.433)	lr 0.02341
Train [20][1540/3239]	Time 0.155 (0.547)	Data Time 0.001 (0.020)	Loss 3.0869 (3.3856)	Entropy 1.68021 (1.68472)	Top-1 acc 50.000 (43.536)	Top-5 acc 71.875 (67.431)	lr 0.02341
Train [20][1550/3239]	Time 2.300 (0.546)	Data Time 0.001 (0.020)	Loss 3.2629 (3.3849)	Entropy 1.68021 (1.68469)	Top-1 acc 46.484 (43.550)	Top-5 acc 71.094 (67.446)	lr 0.02341
Train [20][1560/3239]	Time 0.216 (0.544)	Data Time 0.001 (0.020)	Loss 3.5220 (3.3846)	Entropy 1.68017 (1.68466)	Top-1 acc 42.188 (43.554)	Top-5 acc 66.016 (67.454)	lr 0.02341
Train [20][1570/3239]	Time 0.175 (0.543)	Data Time 0.002 (0.020)	Loss 3.3746 (3.3848)	Entropy 1.68015 (1.68463)	Top-1 acc 45.703 (43.557)	Top-5 acc 69.922 (67.451)	lr 0.02341
Train [20][1580/3239]	Time 0.301 (0.543)	Data Time 0.001 (0.020)	Loss 3.4100 (3.3844)	Entropy 1.68007 (1.68460)	Top-1 acc 43.750 (43.566)	Top-5 acc 66.797 (67.458)	lr 0.02341
Train [20][1590/3239]	Time 0.225 (0.542)	Data Time 0.001 (0.020)	Loss 3.7968 (3.3846)	Entropy 1.67995 (1.68458)	Top-1 acc 37.891 (43.560)	Top-5 acc 62.891 (67.451)	lr 0.02341
Train [20][1600/3239]	Time 0.197 (0.567)	Data Time 0.002 (0.020)	Loss 3.5119 (3.3847)	Entropy 1.67994 (1.68455)	Top-1 acc 41.797 (43.561)	Top-5 acc 66.016 (67.450)	lr 0.02341
Train [20][1610/3239]	Time 0.217 (0.566)	Data Time 0.002 (0.019)	Loss 3.4031 (3.3844)	Entropy 1.67991 (1.68452)	Top-1 acc 43.750 (43.572)	Top-5 acc 67.578 (67.453)	lr 0.02341
Train [20][1620/3239]	Time 0.198 (0.566)	Data Time 0.001 (0.019)	Loss 3.4553 (3.3843)	Entropy 1.67987 (1.68449)	Top-1 acc 40.625 (43.576)	Top-5 acc 71.094 (67.458)	lr 0.02341
Train [20][1630/3239]	Time 0.224 (0.565)	Data Time 0.001 (0.019)	Loss 3.2946 (3.3845)	Entropy 1.67986 (1.68446)	Top-1 acc 47.266 (43.576)	Top-5 acc 67.969 (67.451)	lr 0.02341
Train [20][1640/3239]	Time 0.222 (0.564)	Data Time 0.001 (0.019)	Loss 3.3877 (3.3843)	Entropy 1.67987 (1.68443)	Top-1 acc 49.609 (43.579)	Top-5 acc 68.359 (67.458)	lr 0.02341
Train [20][1650/3239]	Time 0.314 (0.564)	Data Time 0.001 (0.019)	Loss 3.3146 (3.3848)	Entropy 1.67978 (1.68441)	Top-1 acc 42.188 (43.571)	Top-5 acc 69.922 (67.448)	lr 0.02340
Train [20][1660/3239]	Time 2.236 (0.563)	Data Time 0.002 (0.019)	Loss 3.4045 (3.3855)	Entropy 1.67978 (1.68438)	Top-1 acc 42.969 (43.570)	Top-5 acc 66.797 (67.434)	lr 0.02340
Train [20][1670/3239]	Time 0.229 (0.561)	Data Time 0.001 (0.019)	Loss 3.2706 (3.3854)	Entropy 1.67972 (1.68435)	Top-1 acc 48.047 (43.577)	Top-5 acc 72.266 (67.437)	lr 0.02340
Train [20][1680/3239]	Time 0.222 (0.560)	Data Time 0.001 (0.019)	Loss 3.2645 (3.3854)	Entropy 1.67963 (1.68432)	Top-1 acc 46.875 (43.578)	Top-5 acc 68.359 (67.436)	lr 0.02340
Train [20][1690/3239]	Time 0.217 (0.559)	Data Time 0.001 (0.019)	Loss 3.1459 (3.3855)	Entropy 1.67952 (1.68429)	Top-1 acc 49.609 (43.573)	Top-5 acc 69.922 (67.429)	lr 0.02340
Train [20][1700/3239]	Time 0.241 (0.558)	Data Time 0.001 (0.019)	Loss 3.2962 (3.3851)	Entropy 1.67934 (1.68426)	Top-1 acc 43.359 (43.580)	Top-5 acc 69.531 (67.442)	lr 0.02340
Train [20][1710/3239]	Time 0.310 (0.558)	Data Time 0.001 (0.018)	Loss 3.4541 (3.3847)	Entropy 1.67934 (1.68424)	Top-1 acc 43.750 (43.595)	Top-5 acc 64.062 (67.449)	lr 0.02340
Train [20][1720/3239]	Time 0.224 (0.557)	Data Time 0.001 (0.018)	Loss 3.5108 (3.3846)	Entropy 1.67935 (1.68421)	Top-1 acc 41.797 (43.594)	Top-5 acc 68.359 (67.450)	lr 0.02340
Train [20][1730/3239]	Time 0.227 (0.557)	Data Time 0.001 (0.018)	Loss 3.3202 (3.3844)	Entropy 1.67934 (1.68418)	Top-1 acc 45.312 (43.599)	Top-5 acc 70.703 (67.456)	lr 0.02340
Train [20][1740/3239]	Time 0.209 (0.556)	Data Time 0.001 (0.018)	Loss 3.3290 (3.3841)	Entropy 1.67927 (1.68415)	Top-1 acc 46.484 (43.605)	Top-5 acc 69.922 (67.465)	lr 0.02340
Train [20][1750/3239]	Time 0.250 (0.555)	Data Time 0.002 (0.018)	Loss 3.2772 (3.3839)	Entropy 1.67920 (1.68412)	Top-1 acc 45.312 (43.612)	Top-5 acc 70.312 (67.472)	lr 0.02340
Train [20][1760/3239]	Time 0.192 (0.555)	Data Time 0.001 (0.018)	Loss 3.5324 (3.3838)	Entropy 1.67913 (1.68410)	Top-1 acc 42.578 (43.614)	Top-5 acc 62.891 (67.473)	lr 0.02340
Train [20][1770/3239]	Time 2.286 (0.554)	Data Time 0.001 (0.018)	Loss 3.4648 (3.3838)	Entropy 1.67913 (1.68407)	Top-1 acc 41.016 (43.615)	Top-5 acc 63.281 (67.470)	lr 0.02340
Train [20][1780/3239]	Time 0.305 (0.552)	Data Time 0.001 (0.018)	Loss 3.4787 (3.3841)	Entropy 1.67901 (1.68404)	Top-1 acc 39.062 (43.607)	Top-5 acc 64.453 (67.459)	lr 0.02340
Train [20][1790/3239]	Time 0.246 (0.551)	Data Time 0.001 (0.018)	Loss 3.3900 (3.3840)	Entropy 1.67894 (1.68401)	Top-1 acc 46.094 (43.613)	Top-5 acc 66.016 (67.465)	lr 0.02340
Train [20][1800/3239]	Time 0.267 (0.551)	Data Time 0.001 (0.018)	Loss 3.4684 (3.3846)	Entropy 1.67882 (1.68398)	Top-1 acc 41.797 (43.603)	Top-5 acc 64.844 (67.451)	lr 0.02340
Train [20][1810/3239]	Time 0.259 (0.550)	Data Time 0.001 (0.017)	Loss 3.4203 (3.3846)	Entropy 1.67882 (1.68395)	Top-1 acc 43.750 (43.608)	Top-5 acc 66.797 (67.451)	lr 0.02340
Train [20][1820/3239]	Time 0.180 (0.550)	Data Time 0.001 (0.017)	Loss 3.2599 (3.3844)	Entropy 1.67877 (1.68393)	Top-1 acc 49.609 (43.619)	Top-5 acc 69.922 (67.452)	lr 0.02340
Train [20][1830/3239]	Time 0.214 (0.549)	Data Time 0.001 (0.017)	Loss 3.5136 (3.3847)	Entropy 1.67878 (1.68390)	Top-1 acc 41.406 (43.614)	Top-5 acc 63.672 (67.443)	lr 0.02340
Train [20][1840/3239]	Time 0.203 (0.549)	Data Time 0.001 (0.017)	Loss 3.5323 (3.3848)	Entropy 1.67875 (1.68387)	Top-1 acc 43.750 (43.616)	Top-5 acc 66.797 (67.439)	lr 0.02340
Train [20][1850/3239]	Time 0.205 (0.548)	Data Time 0.001 (0.017)	Loss 3.5211 (3.3848)	Entropy 1.67872 (1.68384)	Top-1 acc 44.531 (43.619)	Top-5 acc 64.453 (67.435)	lr 0.02339
Train [20][1860/3239]	Time 0.207 (0.547)	Data Time 0.001 (0.017)	Loss 3.5820 (3.3849)	Entropy 1.67860 (1.68381)	Top-1 acc 38.672 (43.609)	Top-5 acc 66.016 (67.433)	lr 0.02339
Train [20][1870/3239]	Time 0.201 (0.547)	Data Time 0.001 (0.017)	Loss 3.4283 (3.3850)	Entropy 1.67849 (1.68379)	Top-1 acc 42.188 (43.603)	Top-5 acc 66.406 (67.427)	lr 0.02339
Train [20][1880/3239]	Time 2.285 (0.546)	Data Time 0.001 (0.017)	Loss 3.3324 (3.3848)	Entropy 1.67849 (1.68376)	Top-1 acc 45.312 (43.614)	Top-5 acc 69.531 (67.429)	lr 0.02339
Train [20][1890/3239]	Time 0.211 (0.544)	Data Time 0.001 (0.017)	Loss 3.5351 (3.3849)	Entropy 1.67842 (1.68373)	Top-1 acc 42.969 (43.608)	Top-5 acc 60.938 (67.428)	lr 0.02339
Train [20][1900/3239]	Time 0.219 (0.544)	Data Time 0.001 (0.017)	Loss 3.3486 (3.3850)	Entropy 1.67827 (1.68370)	Top-1 acc 41.406 (43.604)	Top-5 acc 67.578 (67.425)	lr 0.02339
Train [20][1910/3239]	Time 0.219 (0.543)	Data Time 0.001 (0.017)	Loss 3.3565 (3.3848)	Entropy 1.67825 (1.68367)	Top-1 acc 46.094 (43.606)	Top-5 acc 68.750 (67.430)	lr 0.02339
Train [20][1920/3239]	Time 0.209 (0.543)	Data Time 0.001 (0.017)	Loss 3.4868 (3.3850)	Entropy 1.67810 (1.68364)	Top-1 acc 43.750 (43.611)	Top-5 acc 62.891 (67.424)	lr 0.02339
Train [20][1930/3239]	Time 0.197 (0.542)	Data Time 0.001 (0.017)	Loss 3.2972 (3.3852)	Entropy 1.67807 (1.68361)	Top-1 acc 46.875 (43.602)	Top-5 acc 70.312 (67.420)	lr 0.02339
Train [20][1940/3239]	Time 0.232 (0.542)	Data Time 0.001 (0.016)	Loss 3.2395 (3.3850)	Entropy 1.67804 (1.68359)	Top-1 acc 50.391 (43.607)	Top-5 acc 70.703 (67.426)	lr 0.02339
Train [20][1950/3239]	Time 0.332 (0.541)	Data Time 0.001 (0.016)	Loss 3.4775 (3.3855)	Entropy 1.67801 (1.68356)	Top-1 acc 43.359 (43.593)	Top-5 acc 66.797 (67.418)	lr 0.02339
Train [20][1960/3239]	Time 0.265 (0.560)	Data Time 0.003 (0.016)	Loss 3.3547 (3.3853)	Entropy 1.67791 (1.68353)	Top-1 acc 41.797 (43.595)	Top-5 acc 68.750 (67.419)	lr 0.02339
Train [20][1970/3239]	Time 0.224 (0.560)	Data Time 0.002 (0.016)	Loss 3.2847 (3.3853)	Entropy 1.67785 (1.68350)	Top-1 acc 48.438 (43.602)	Top-5 acc 67.578 (67.421)	lr 0.02339
Train [20][1980/3239]	Time 0.201 (0.559)	Data Time 0.001 (0.016)	Loss 3.5209 (3.3856)	Entropy 1.67779 (1.68347)	Top-1 acc 45.312 (43.594)	Top-5 acc 63.281 (67.421)	lr 0.02339
Train [20][1990/3239]	Time 2.286 (0.559)	Data Time 0.002 (0.016)	Loss 3.2934 (3.3853)	Entropy 1.67779 (1.68344)	Top-1 acc 47.656 (43.596)	Top-5 acc 68.359 (67.430)	lr 0.02339
Train [20][2000/3239]	Time 0.208 (0.557)	Data Time 0.001 (0.016)	Loss 3.4882 (3.3854)	Entropy 1.67772 (1.68341)	Top-1 acc 41.406 (43.594)	Top-5 acc 64.453 (67.432)	lr 0.02339
Train [20][2010/3239]	Time 0.320 (0.557)	Data Time 0.001 (0.016)	Loss 3.3541 (3.3857)	Entropy 1.67771 (1.68339)	Top-1 acc 50.781 (43.594)	Top-5 acc 69.141 (67.427)	lr 0.02339
Train [20][2020/3239]	Time 0.205 (0.556)	Data Time 0.001 (0.016)	Loss 3.5324 (3.3856)	Entropy 1.67768 (1.68336)	Top-1 acc 38.281 (43.593)	Top-5 acc 64.844 (67.423)	lr 0.02339
Train [20][2030/3239]	Time 0.222 (0.555)	Data Time 0.002 (0.016)	Loss 3.5501 (3.3858)	Entropy 1.67767 (1.68333)	Top-1 acc 39.062 (43.589)	Top-5 acc 62.500 (67.420)	lr 0.02339
Train [20][2040/3239]	Time 0.192 (0.555)	Data Time 0.001 (0.016)	Loss 3.3355 (3.3857)	Entropy 1.67764 (1.68330)	Top-1 acc 46.875 (43.593)	Top-5 acc 71.094 (67.423)	lr 0.02339
Train [20][2050/3239]	Time 0.199 (0.554)	Data Time 0.001 (0.016)	Loss 3.2610 (3.3856)	Entropy 1.67763 (1.68327)	Top-1 acc 49.219 (43.597)	Top-5 acc 69.531 (67.423)	lr 0.02338
Train [20][2060/3239]	Time 0.275 (0.553)	Data Time 0.001 (0.016)	Loss 3.3012 (3.3854)	Entropy 1.67761 (1.68325)	Top-1 acc 44.531 (43.602)	Top-5 acc 69.141 (67.424)	lr 0.02338
Train [20][2070/3239]	Time 0.197 (0.553)	Data Time 0.001 (0.016)	Loss 3.3957 (3.3856)	Entropy 1.67757 (1.68322)	Top-1 acc 44.531 (43.598)	Top-5 acc 65.625 (67.421)	lr 0.02338
Train [20][2080/3239]	Time 0.163 (0.552)	Data Time 0.001 (0.015)	Loss 3.4296 (3.3856)	Entropy 1.67749 (1.68319)	Top-1 acc 41.406 (43.597)	Top-5 acc 66.406 (67.416)	lr 0.02338
Train [20][2090/3239]	Time 0.380 (0.552)	Data Time 0.003 (0.015)	Loss 3.5644 (3.3856)	Entropy 1.67739 (1.68317)	Top-1 acc 38.281 (43.599)	Top-5 acc 63.672 (67.417)	lr 0.02338
Train [20][2100/3239]	Time 2.286 (0.551)	Data Time 0.001 (0.015)	Loss 3.3583 (3.3856)	Entropy 1.67739 (1.68314)	Top-1 acc 44.922 (43.601)	Top-5 acc 70.312 (67.415)	lr 0.02338
Train [20][2110/3239]	Time 0.265 (0.550)	Data Time 0.001 (0.015)	Loss 3.4645 (3.3857)	Entropy 1.67735 (1.68311)	Top-1 acc 40.625 (43.599)	Top-5 acc 66.797 (67.411)	lr 0.02338
Train [20][2120/3239]	Time 0.205 (0.549)	Data Time 0.001 (0.015)	Loss 3.3462 (3.3858)	Entropy 1.67730 (1.68308)	Top-1 acc 45.312 (43.596)	Top-5 acc 67.969 (67.412)	lr 0.02338
Train [20][2130/3239]	Time 0.216 (0.548)	Data Time 0.001 (0.015)	Loss 3.4522 (3.3858)	Entropy 1.67726 (1.68306)	Top-1 acc 41.406 (43.599)	Top-5 acc 65.625 (67.406)	lr 0.02338
Train [20][2140/3239]	Time 0.190 (0.548)	Data Time 0.001 (0.015)	Loss 3.2787 (3.3858)	Entropy 1.67721 (1.68303)	Top-1 acc 45.312 (43.602)	Top-5 acc 71.484 (67.407)	lr 0.02338
Train [20][2150/3239]	Time 0.207 (0.547)	Data Time 0.001 (0.015)	Loss 3.5443 (3.3858)	Entropy 1.67717 (1.68300)	Top-1 acc 39.062 (43.603)	Top-5 acc 64.062 (67.405)	lr 0.02338
Train [20][2160/3239]	Time 0.245 (0.547)	Data Time 0.001 (0.015)	Loss 3.5061 (3.3859)	Entropy 1.67702 (1.68297)	Top-1 acc 42.969 (43.602)	Top-5 acc 64.453 (67.403)	lr 0.02338
Train [20][2170/3239]	Time 0.274 (0.546)	Data Time 0.001 (0.015)	Loss 3.4931 (3.3858)	Entropy 1.67693 (1.68295)	Top-1 acc 41.016 (43.602)	Top-5 acc 64.062 (67.407)	lr 0.02338
Train [20][2180/3239]	Time 0.193 (0.546)	Data Time 0.001 (0.015)	Loss 3.4537 (3.3860)	Entropy 1.67681 (1.68292)	Top-1 acc 40.234 (43.602)	Top-5 acc 66.797 (67.400)	lr 0.02338
Train [20][2190/3239]	Time 0.234 (0.545)	Data Time 0.001 (0.015)	Loss 3.2813 (3.3859)	Entropy 1.67669 (1.68289)	Top-1 acc 48.828 (43.607)	Top-5 acc 66.797 (67.402)	lr 0.02338
Train [20][2200/3239]	Time 0.168 (0.545)	Data Time 0.001 (0.015)	Loss 3.1879 (3.3858)	Entropy 1.67660 (1.68286)	Top-1 acc 47.656 (43.609)	Top-5 acc 68.750 (67.404)	lr 0.02338
Train [20][2210/3239]	Time 2.387 (0.544)	Data Time 0.002 (0.015)	Loss 3.3146 (3.3858)	Entropy 1.67660 (1.68283)	Top-1 acc 46.094 (43.606)	Top-5 acc 68.750 (67.405)	lr 0.02338
Train [20][2220/3239]	Time 0.274 (0.543)	Data Time 0.001 (0.015)	Loss 3.5495 (3.3860)	Entropy 1.67654 (1.68281)	Top-1 acc 42.969 (43.600)	Top-5 acc 62.500 (67.402)	lr 0.02338
Train [20][2230/3239]	Time 0.235 (0.543)	Data Time 0.001 (0.015)	Loss 3.1927 (3.3857)	Entropy 1.67638 (1.68278)	Top-1 acc 46.484 (43.608)	Top-5 acc 71.094 (67.409)	lr 0.02338
Train [20][2240/3239]	Time 0.180 (0.542)	Data Time 0.001 (0.015)	Loss 3.4558 (3.3856)	Entropy 1.67617 (1.68275)	Top-1 acc 40.625 (43.608)	Top-5 acc 65.625 (67.406)	lr 0.02338
Train [20][2250/3239]	Time 0.228 (0.542)	Data Time 0.001 (0.015)	Loss 3.4602 (3.3855)	Entropy 1.67615 (1.68272)	Top-1 acc 39.453 (43.605)	Top-5 acc 66.797 (67.409)	lr 0.02337
Train [20][2260/3239]	Time 0.245 (0.541)	Data Time 0.001 (0.014)	Loss 3.4064 (3.3856)	Entropy 1.67606 (1.68269)	Top-1 acc 44.922 (43.608)	Top-5 acc 66.406 (67.411)	lr 0.02337
Train [20][2270/3239]	Time 0.228 (0.541)	Data Time 0.001 (0.014)	Loss 3.3727 (3.3857)	Entropy 1.67603 (1.68266)	Top-1 acc 42.969 (43.606)	Top-5 acc 69.531 (67.412)	lr 0.02337
Train [20][2280/3239]	Time 0.283 (0.540)	Data Time 0.001 (0.014)	Loss 3.5897 (3.3856)	Entropy 1.67589 (1.68263)	Top-1 acc 40.234 (43.608)	Top-5 acc 63.672 (67.411)	lr 0.02337
Train [20][2290/3239]	Time 0.196 (0.540)	Data Time 0.001 (0.014)	Loss 3.4194 (3.3855)	Entropy 1.67586 (1.68260)	Top-1 acc 46.094 (43.614)	Top-5 acc 65.625 (67.414)	lr 0.02337
Train [20][2300/3239]	Time 0.217 (0.539)	Data Time 0.001 (0.014)	Loss 3.2029 (3.3854)	Entropy 1.67582 (1.68257)	Top-1 acc 46.094 (43.614)	Top-5 acc 71.875 (67.413)	lr 0.02337
Train [20][2310/3239]	Time 0.255 (0.539)	Data Time 0.001 (0.014)	Loss 3.5009 (3.3853)	Entropy 1.67580 (1.68254)	Top-1 acc 44.922 (43.619)	Top-5 acc 67.188 (67.416)	lr 0.02337
Train [20][2320/3239]	Time 44.063 (0.556)	Data Time 0.001 (0.014)	Loss 3.3988 (3.3854)	Entropy 1.67580 (1.68251)	Top-1 acc 46.875 (43.620)	Top-5 acc 66.406 (67.417)	lr 0.02337
Train [20][2330/3239]	Time 0.242 (0.555)	Data Time 0.002 (0.014)	Loss 3.5067 (3.3856)	Entropy 1.67574 (1.68248)	Top-1 acc 41.797 (43.612)	Top-5 acc 66.016 (67.411)	lr 0.02337
Train [20][2340/3239]	Time 0.307 (0.555)	Data Time 0.001 (0.014)	Loss 3.5320 (3.3855)	Entropy 1.67551 (1.68246)	Top-1 acc 38.672 (43.614)	Top-5 acc 65.625 (67.412)	lr 0.02337
Train [20][2350/3239]	Time 0.223 (0.554)	Data Time 0.001 (0.014)	Loss 3.4241 (3.3854)	Entropy 1.67541 (1.68243)	Top-1 acc 38.281 (43.615)	Top-5 acc 70.312 (67.416)	lr 0.02337
Train [20][2360/3239]	Time 0.191 (0.554)	Data Time 0.001 (0.014)	Loss 3.2763 (3.3854)	Entropy 1.67540 (1.68240)	Top-1 acc 47.266 (43.619)	Top-5 acc 67.188 (67.414)	lr 0.02337
Train [20][2370/3239]	Time 0.190 (0.553)	Data Time 0.002 (0.014)	Loss 3.3947 (3.3855)	Entropy 1.67529 (1.68237)	Top-1 acc 44.531 (43.616)	Top-5 acc 69.531 (67.413)	lr 0.02337
Train [20][2380/3239]	Time 0.219 (0.553)	Data Time 0.001 (0.014)	Loss 3.2192 (3.3852)	Entropy 1.67524 (1.68234)	Top-1 acc 49.219 (43.623)	Top-5 acc 70.703 (67.422)	lr 0.02337
Train [20][2390/3239]	Time 0.191 (0.552)	Data Time 0.001 (0.014)	Loss 3.4446 (3.3852)	Entropy 1.67514 (1.68231)	Top-1 acc 39.453 (43.619)	Top-5 acc 68.359 (67.423)	lr 0.02337
Train [20][2400/3239]	Time 0.194 (0.552)	Data Time 0.001 (0.014)	Loss 3.2635 (3.3853)	Entropy 1.67508 (1.68228)	Top-1 acc 42.188 (43.619)	Top-5 acc 69.922 (67.421)	lr 0.02337
Train [20][2410/3239]	Time 0.217 (0.551)	Data Time 0.001 (0.014)	Loss 3.1690 (3.3856)	Entropy 1.67504 (1.68225)	Top-1 acc 46.875 (43.611)	Top-5 acc 73.047 (67.413)	lr 0.02337
Train [20][2420/3239]	Time 0.205 (0.551)	Data Time 0.001 (0.014)	Loss 3.5166 (3.3857)	Entropy 1.67503 (1.68222)	Top-1 acc 39.062 (43.608)	Top-5 acc 65.234 (67.407)	lr 0.02337
Train [20][2430/3239]	Time 2.314 (0.550)	Data Time 0.001 (0.014)	Loss 3.1158 (3.3858)	Entropy 1.67503 (1.68219)	Top-1 acc 48.828 (43.607)	Top-5 acc 72.656 (67.406)	lr 0.02337
Train [20][2440/3239]	Time 0.202 (0.549)	Data Time 0.001 (0.014)	Loss 3.2634 (3.3856)	Entropy 1.67502 (1.68216)	Top-1 acc 46.484 (43.612)	Top-5 acc 67.578 (67.411)	lr 0.02337
Train [20][2450/3239]	Time 0.168 (0.549)	Data Time 0.001 (0.013)	Loss 3.4346 (3.3855)	Entropy 1.67499 (1.68213)	Top-1 acc 40.625 (43.616)	Top-5 acc 66.406 (67.416)	lr 0.02336
Train [20][2460/3239]	Time 0.208 (0.548)	Data Time 0.001 (0.013)	Loss 3.4383 (3.3854)	Entropy 1.67497 (1.68210)	Top-1 acc 42.969 (43.617)	Top-5 acc 68.750 (67.420)	lr 0.02336
Train [20][2470/3239]	Time 0.327 (0.548)	Data Time 0.001 (0.013)	Loss 3.3896 (3.3854)	Entropy 1.67489 (1.68207)	Top-1 acc 42.188 (43.614)	Top-5 acc 66.406 (67.418)	lr 0.02336
Train [20][2480/3239]	Time 0.225 (0.547)	Data Time 0.001 (0.013)	Loss 3.4778 (3.3854)	Entropy 1.67490 (1.68204)	Top-1 acc 42.578 (43.616)	Top-5 acc 64.844 (67.418)	lr 0.02336
Train [20][2490/3239]	Time 0.193 (0.547)	Data Time 0.001 (0.013)	Loss 3.3651 (3.3852)	Entropy 1.67492 (1.68201)	Top-1 acc 45.703 (43.616)	Top-5 acc 67.578 (67.417)	lr 0.02336
Train [20][2500/3239]	Time 0.207 (0.546)	Data Time 0.001 (0.013)	Loss 3.4794 (3.3852)	Entropy 1.67484 (1.68198)	Top-1 acc 42.188 (43.616)	Top-5 acc 62.109 (67.418)	lr 0.02336
Train [20][2510/3239]	Time 0.218 (0.546)	Data Time 0.002 (0.013)	Loss 3.4760 (3.3852)	Entropy 1.67480 (1.68196)	Top-1 acc 39.453 (43.615)	Top-5 acc 63.672 (67.419)	lr 0.02336
Train [20][2520/3239]	Time 0.198 (0.546)	Data Time 0.001 (0.013)	Loss 3.5767 (3.3853)	Entropy 1.67470 (1.68193)	Top-1 acc 40.234 (43.610)	Top-5 acc 63.281 (67.417)	lr 0.02336
Train [20][2530/3239]	Time 0.346 (0.545)	Data Time 0.001 (0.013)	Loss 3.2609 (3.3854)	Entropy 1.67460 (1.68190)	Top-1 acc 43.750 (43.607)	Top-5 acc 68.750 (67.413)	lr 0.02336
Train [20][2540/3239]	Time 2.411 (0.545)	Data Time 0.001 (0.013)	Loss 3.4927 (3.3856)	Entropy 1.67460 (1.68187)	Top-1 acc 40.234 (43.603)	Top-5 acc 64.453 (67.411)	lr 0.02336
Train [20][2550/3239]	Time 0.233 (0.543)	Data Time 0.001 (0.013)	Loss 3.3942 (3.3854)	Entropy 1.67458 (1.68184)	Top-1 acc 45.312 (43.608)	Top-5 acc 68.359 (67.416)	lr 0.02336
Train [20][2560/3239]	Time 0.211 (0.543)	Data Time 0.001 (0.013)	Loss 3.3657 (3.3855)	Entropy 1.67454 (1.68181)	Top-1 acc 46.484 (43.609)	Top-5 acc 67.578 (67.416)	lr 0.02336
Train [20][2570/3239]	Time 0.204 (0.543)	Data Time 0.001 (0.013)	Loss 3.2296 (3.3856)	Entropy 1.67452 (1.68178)	Top-1 acc 47.656 (43.608)	Top-5 acc 71.094 (67.410)	lr 0.02336
Train [20][2580/3239]	Time 0.249 (0.542)	Data Time 0.001 (0.013)	Loss 3.3809 (3.3857)	Entropy 1.67446 (1.68176)	Top-1 acc 41.016 (43.610)	Top-5 acc 68.359 (67.406)	lr 0.02336
Train [20][2590/3239]	Time 0.207 (0.542)	Data Time 0.001 (0.013)	Loss 3.4235 (3.3855)	Entropy 1.67437 (1.68173)	Top-1 acc 38.672 (43.614)	Top-5 acc 70.703 (67.415)	lr 0.02336
Train [20][2600/3239]	Time 0.203 (0.541)	Data Time 0.001 (0.013)	Loss 3.4251 (3.3857)	Entropy 1.67429 (1.68170)	Top-1 acc 42.188 (43.610)	Top-5 acc 69.141 (67.412)	lr 0.02336
Train [20][2610/3239]	Time 0.237 (0.541)	Data Time 0.001 (0.013)	Loss 3.3359 (3.3857)	Entropy 1.67425 (1.68167)	Top-1 acc 42.969 (43.610)	Top-5 acc 66.406 (67.411)	lr 0.02336
Train [20][2620/3239]	Time 0.200 (0.540)	Data Time 0.001 (0.013)	Loss 3.7101 (3.3861)	Entropy 1.67413 (1.68164)	Top-1 acc 35.938 (43.608)	Top-5 acc 60.156 (67.405)	lr 0.02336
Train [20][2630/3239]	Time 0.200 (0.540)	Data Time 0.001 (0.013)	Loss 3.2463 (3.3859)	Entropy 1.67409 (1.68161)	Top-1 acc 48.438 (43.606)	Top-5 acc 70.312 (67.410)	lr 0.02336
Train [20][2640/3239]	Time 0.253 (0.540)	Data Time 0.001 (0.013)	Loss 3.1818 (3.3856)	Entropy 1.67399 (1.68159)	Top-1 acc 46.875 (43.615)	Top-5 acc 70.312 (67.413)	lr 0.02336
Train [20][2650/3239]	Time 0.158 (0.539)	Data Time 0.001 (0.013)	Loss 3.3573 (3.3854)	Entropy 1.67392 (1.68156)	Top-1 acc 42.578 (43.620)	Top-5 acc 69.531 (67.419)	lr 0.02335
Train [20][2660/3239]	Time 0.325 (0.539)	Data Time 0.001 (0.013)	Loss 3.2394 (3.3853)	Entropy 1.67383 (1.68153)	Top-1 acc 44.141 (43.616)	Top-5 acc 72.266 (67.423)	lr 0.02335
Train [20][2670/3239]	Time 0.252 (0.539)	Data Time 0.001 (0.013)	Loss 3.4787 (3.3855)	Entropy 1.67379 (1.68150)	Top-1 acc 46.094 (43.615)	Top-5 acc 63.672 (67.416)	lr 0.02335
Train [20][2680/3239]	Time 0.299 (0.552)	Data Time 0.004 (0.013)	Loss 3.3033 (3.3856)	Entropy 1.67367 (1.68147)	Top-1 acc 47.266 (43.617)	Top-5 acc 69.922 (67.413)	lr 0.02335
Train [20][2690/3239]	Time 0.210 (0.552)	Data Time 0.002 (0.012)	Loss 3.4980 (3.3855)	Entropy 1.67355 (1.68144)	Top-1 acc 38.672 (43.619)	Top-5 acc 66.406 (67.417)	lr 0.02335
Train [20][2700/3239]	Time 0.210 (0.552)	Data Time 0.002 (0.012)	Loss 3.6190 (3.3858)	Entropy 1.67345 (1.68141)	Top-1 acc 40.234 (43.612)	Top-5 acc 64.062 (67.409)	lr 0.02335
Train [20][2710/3239]	Time 0.224 (0.551)	Data Time 0.001 (0.012)	Loss 3.4601 (3.3860)	Entropy 1.67332 (1.68138)	Top-1 acc 41.797 (43.609)	Top-5 acc 65.625 (67.407)	lr 0.02335
Train [20][2720/3239]	Time 0.191 (0.551)	Data Time 0.001 (0.012)	Loss 3.3851 (3.3859)	Entropy 1.67327 (1.68135)	Top-1 acc 41.406 (43.605)	Top-5 acc 66.797 (67.404)	lr 0.02335
Train [20][2730/3239]	Time 0.204 (0.551)	Data Time 0.002 (0.012)	Loss 3.2264 (3.3860)	Entropy 1.67322 (1.68132)	Top-1 acc 47.266 (43.605)	Top-5 acc 68.750 (67.402)	lr 0.02335
Train [20][2740/3239]	Time 0.187 (0.550)	Data Time 0.001 (0.012)	Loss 3.3816 (3.3859)	Entropy 1.67303 (1.68129)	Top-1 acc 42.969 (43.604)	Top-5 acc 65.625 (67.405)	lr 0.02335
Train [20][2750/3239]	Time 0.232 (0.550)	Data Time 0.001 (0.012)	Loss 3.2432 (3.3857)	Entropy 1.67295 (1.68126)	Top-1 acc 49.219 (43.613)	Top-5 acc 69.141 (67.409)	lr 0.02335
Train [20][2760/3239]	Time 0.278 (0.549)	Data Time 0.001 (0.012)	Loss 3.3532 (3.3859)	Entropy 1.67284 (1.68123)	Top-1 acc 47.656 (43.612)	Top-5 acc 71.094 (67.406)	lr 0.02335
Train [20][2770/3239]	Time 0.199 (0.549)	Data Time 0.001 (0.012)	Loss 3.4233 (3.3859)	Entropy 1.67281 (1.68120)	Top-1 acc 44.141 (43.613)	Top-5 acc 66.406 (67.408)	lr 0.02335
Train [20][2780/3239]	Time 0.233 (0.548)	Data Time 0.001 (0.012)	Loss 3.2831 (3.3858)	Entropy 1.67271 (1.68117)	Top-1 acc 46.484 (43.614)	Top-5 acc 69.141 (67.410)	lr 0.02335
Train [20][2790/3239]	Time 0.352 (0.548)	Data Time 0.002 (0.012)	Loss 3.3940 (3.3858)	Entropy 1.67267 (1.68114)	Top-1 acc 47.266 (43.615)	Top-5 acc 65.625 (67.405)	lr 0.02335
Train [20][2800/3239]	Time 0.210 (0.548)	Data Time 0.001 (0.012)	Loss 3.5618 (3.3858)	Entropy 1.67260 (1.68111)	Top-1 acc 37.109 (43.614)	Top-5 acc 63.672 (67.406)	lr 0.02335
Train [20][2810/3239]	Time 0.182 (0.547)	Data Time 0.001 (0.012)	Loss 3.3302 (3.3858)	Entropy 1.67248 (1.68108)	Top-1 acc 46.484 (43.615)	Top-5 acc 67.578 (67.406)	lr 0.02335
Train [20][2820/3239]	Time 0.195 (0.547)	Data Time 0.001 (0.012)	Loss 3.2933 (3.3859)	Entropy 1.67231 (1.68105)	Top-1 acc 45.703 (43.610)	Top-5 acc 71.094 (67.404)	lr 0.02335
Train [20][2830/3239]	Time 0.207 (0.546)	Data Time 0.001 (0.012)	Loss 3.6518 (3.3860)	Entropy 1.67223 (1.68102)	Top-1 acc 33.984 (43.601)	Top-5 acc 60.156 (67.403)	lr 0.02335
Train [20][2840/3239]	Time 0.166 (0.546)	Data Time 0.001 (0.012)	Loss 3.2437 (3.3860)	Entropy 1.67216 (1.68099)	Top-1 acc 43.750 (43.598)	Top-5 acc 70.703 (67.405)	lr 0.02335
Train [20][2850/3239]	Time 0.295 (0.545)	Data Time 0.001 (0.012)	Loss 3.3313 (3.3862)	Entropy 1.67212 (1.68096)	Top-1 acc 43.359 (43.593)	Top-5 acc 70.703 (67.399)	lr 0.02334
Train [20][2860/3239]	Time 0.205 (0.545)	Data Time 0.001 (0.012)	Loss 3.3991 (3.3864)	Entropy 1.67218 (1.68093)	Top-1 acc 43.359 (43.588)	Top-5 acc 68.359 (67.399)	lr 0.02334
Train [20][2870/3239]	Time 0.330 (0.545)	Data Time 0.002 (0.012)	Loss 3.2382 (3.3862)	Entropy 1.67218 (1.68090)	Top-1 acc 46.094 (43.595)	Top-5 acc 73.438 (67.405)	lr 0.02334
Train [20][2880/3239]	Time 0.260 (0.544)	Data Time 0.001 (0.012)	Loss 3.2224 (3.3862)	Entropy 1.67214 (1.68087)	Top-1 acc 47.266 (43.593)	Top-5 acc 69.922 (67.404)	lr 0.02334
Train [20][2890/3239]	Time 0.149 (0.544)	Data Time 0.001 (0.012)	Loss 3.1444 (3.3863)	Entropy 1.67211 (1.68084)	Top-1 acc 47.656 (43.593)	Top-5 acc 76.953 (67.402)	lr 0.02334
Train [20][2900/3239]	Time 0.231 (0.544)	Data Time 0.001 (0.012)	Loss 3.1839 (3.3864)	Entropy 1.67202 (1.68081)	Top-1 acc 49.609 (43.588)	Top-5 acc 69.141 (67.399)	lr 0.02334
Train [20][2910/3239]	Time 0.329 (0.543)	Data Time 0.001 (0.012)	Loss 3.3488 (3.3863)	Entropy 1.67202 (1.68078)	Top-1 acc 43.750 (43.587)	Top-5 acc 69.141 (67.402)	lr 0.02334
Train [20][2920/3239]	Time 0.204 (0.543)	Data Time 0.001 (0.012)	Loss 3.3841 (3.3862)	Entropy 1.67200 (1.68075)	Top-1 acc 40.234 (43.584)	Top-5 acc 67.969 (67.405)	lr 0.02334
Train [20][2930/3239]	Time 0.201 (0.543)	Data Time 0.001 (0.012)	Loss 3.5039 (3.3862)	Entropy 1.67190 (1.68072)	Top-1 acc 45.312 (43.581)	Top-5 acc 65.625 (67.402)	lr 0.02334
Train [20][2940/3239]	Time 0.233 (0.542)	Data Time 0.001 (0.012)	Loss 3.4301 (3.3862)	Entropy 1.67183 (1.68069)	Top-1 acc 42.578 (43.581)	Top-5 acc 67.578 (67.402)	lr 0.02334
Train [20][2950/3239]	Time 0.218 (0.542)	Data Time 0.001 (0.012)	Loss 3.3617 (3.3867)	Entropy 1.67178 (1.68066)	Top-1 acc 43.359 (43.569)	Top-5 acc 69.922 (67.391)	lr 0.02334
Train [20][2960/3239]	Time 0.255 (0.541)	Data Time 0.001 (0.012)	Loss 3.3661 (3.3866)	Entropy 1.67172 (1.68063)	Top-1 acc 44.531 (43.573)	Top-5 acc 65.234 (67.394)	lr 0.02334
Train [20][2970/3239]	Time 0.254 (0.541)	Data Time 0.001 (0.012)	Loss 3.1679 (3.3864)	Entropy 1.67161 (1.68060)	Top-1 acc 50.391 (43.580)	Top-5 acc 73.438 (67.402)	lr 0.02334
Train [20][2980/3239]	Time 0.241 (0.541)	Data Time 0.001 (0.011)	Loss 3.2691 (3.3864)	Entropy 1.67149 (1.68057)	Top-1 acc 46.484 (43.584)	Top-5 acc 71.484 (67.403)	lr 0.02334
Train [20][2990/3239]	Time 0.218 (0.540)	Data Time 0.001 (0.011)	Loss 3.4818 (3.3862)	Entropy 1.67147 (1.68054)	Top-1 acc 39.062 (43.584)	Top-5 acc 66.016 (67.405)	lr 0.02334
Train [20][3000/3239]	Time 0.224 (0.540)	Data Time 0.001 (0.011)	Loss 3.3980 (3.3861)	Entropy 1.67142 (1.68051)	Top-1 acc 43.359 (43.585)	Top-5 acc 67.188 (67.409)	lr 0.02334
Train [20][3010/3239]	Time 0.271 (0.552)	Data Time 0.004 (0.011)	Loss 3.4135 (3.3859)	Entropy 1.67123 (1.68048)	Top-1 acc 43.359 (43.587)	Top-5 acc 64.844 (67.411)	lr 0.02334
Train [20][3020/3239]	Time 0.231 (0.552)	Data Time 0.002 (0.011)	Loss 3.4430 (3.3861)	Entropy 1.67096 (1.68045)	Top-1 acc 40.625 (43.583)	Top-5 acc 64.062 (67.407)	lr 0.02334
Train [20][3030/3239]	Time 0.254 (0.552)	Data Time 0.002 (0.011)	Loss 3.4571 (3.3863)	Entropy 1.67092 (1.68041)	Top-1 acc 39.453 (43.581)	Top-5 acc 67.188 (67.404)	lr 0.02334
Train [20][3040/3239]	Time 0.396 (0.552)	Data Time 0.002 (0.011)	Loss 3.4719 (3.3864)	Entropy 1.67072 (1.68038)	Top-1 acc 43.750 (43.577)	Top-5 acc 64.062 (67.400)	lr 0.02334
Train [20][3050/3239]	Time 0.173 (0.551)	Data Time 0.001 (0.011)	Loss 3.4923 (3.3864)	Entropy 1.67071 (1.68035)	Top-1 acc 39.062 (43.577)	Top-5 acc 62.109 (67.398)	lr 0.02333
Train [20][3060/3239]	Time 0.247 (0.551)	Data Time 0.001 (0.011)	Loss 3.3615 (3.3865)	Entropy 1.67063 (1.68032)	Top-1 acc 44.141 (43.575)	Top-5 acc 66.797 (67.398)	lr 0.02333
Train [20][3070/3239]	Time 0.237 (0.550)	Data Time 0.001 (0.011)	Loss 3.2830 (3.3861)	Entropy 1.67053 (1.68029)	Top-1 acc 43.359 (43.585)	Top-5 acc 66.016 (67.403)	lr 0.02333
Train [20][3080/3239]	Time 0.282 (0.550)	Data Time 0.001 (0.011)	Loss 3.3087 (3.3861)	Entropy 1.67050 (1.68026)	Top-1 acc 46.484 (43.585)	Top-5 acc 66.406 (67.404)	lr 0.02333
Train [20][3090/3239]	Time 0.220 (0.550)	Data Time 0.001 (0.011)	Loss 3.4216 (3.3861)	Entropy 1.67044 (1.68022)	Top-1 acc 40.625 (43.590)	Top-5 acc 67.578 (67.406)	lr 0.02333
Train [20][3100/3239]	Time 0.295 (0.549)	Data Time 0.001 (0.011)	Loss 3.3958 (3.3862)	Entropy 1.67036 (1.68019)	Top-1 acc 44.922 (43.589)	Top-5 acc 69.141 (67.408)	lr 0.02333
Train [20][3110/3239]	Time 0.238 (0.549)	Data Time 0.001 (0.011)	Loss 3.1141 (3.3863)	Entropy 1.67034 (1.68016)	Top-1 acc 48.047 (43.589)	Top-5 acc 72.266 (67.406)	lr 0.02333
Train [20][3120/3239]	Time 0.268 (0.549)	Data Time 0.001 (0.011)	Loss 3.4833 (3.3864)	Entropy 1.67015 (1.68013)	Top-1 acc 45.703 (43.589)	Top-5 acc 68.359 (67.404)	lr 0.02333
Train [20][3130/3239]	Time 0.198 (0.548)	Data Time 0.001 (0.011)	Loss 3.3009 (3.3863)	Entropy 1.67000 (1.68010)	Top-1 acc 47.656 (43.591)	Top-5 acc 66.797 (67.405)	lr 0.02333
Train [20][3140/3239]	Time 0.161 (0.548)	Data Time 0.001 (0.011)	Loss 3.3432 (3.3862)	Entropy 1.66992 (1.68007)	Top-1 acc 47.266 (43.596)	Top-5 acc 66.797 (67.409)	lr 0.02333
Train [20][3150/3239]	Time 0.197 (0.548)	Data Time 0.001 (0.011)	Loss 3.3206 (3.3859)	Entropy 1.66986 (1.68003)	Top-1 acc 45.312 (43.603)	Top-5 acc 69.141 (67.416)	lr 0.02333
Train [20][3160/3239]	Time 0.211 (0.547)	Data Time 0.001 (0.011)	Loss 3.3071 (3.3859)	Entropy 1.66978 (1.68000)	Top-1 acc 43.750 (43.607)	Top-5 acc 70.703 (67.416)	lr 0.02333
Train [20][3170/3239]	Time 0.333 (0.547)	Data Time 0.001 (0.011)	Loss 3.2056 (3.3858)	Entropy 1.66972 (1.67997)	Top-1 acc 45.703 (43.604)	Top-5 acc 73.438 (67.421)	lr 0.02333
Train [20][3180/3239]	Time 0.208 (0.547)	Data Time 0.000 (0.011)	Loss 3.7562 (3.3861)	Entropy 1.66969 (1.67994)	Top-1 acc 35.938 (43.595)	Top-5 acc 58.984 (67.416)	lr 0.02333
Train [20][3190/3239]	Time 0.205 (0.546)	Data Time 0.000 (0.011)	Loss 3.3464 (3.3860)	Entropy 1.66968 (1.67990)	Top-1 acc 42.188 (43.595)	Top-5 acc 70.312 (67.416)	lr 0.02333
Train [20][3200/3239]	Time 0.206 (0.546)	Data Time 0.000 (0.011)	Loss 3.3484 (3.3861)	Entropy 1.66960 (1.67987)	Top-1 acc 44.141 (43.595)	Top-5 acc 66.406 (67.415)	lr 0.02333
Train [20][3210/3239]	Time 0.208 (0.545)	Data Time 0.000 (0.011)	Loss 3.2799 (3.3857)	Entropy 1.66949 (1.67984)	Top-1 acc 48.828 (43.603)	Top-5 acc 68.359 (67.422)	lr 0.02333
Train [20][3220/3239]	Time 0.218 (0.545)	Data Time 0.000 (0.011)	Loss 3.4231 (3.3857)	Entropy 1.66939 (1.67981)	Top-1 acc 43.750 (43.605)	Top-5 acc 67.188 (67.422)	lr 0.02333
Train [20][3230/3239]	Time 0.216 (0.545)	Data Time 0.000 (0.011)	Loss 3.4226 (3.3859)	Entropy 1.66925 (1.67978)	Top-1 acc 43.359 (43.604)	Top-5 acc 67.578 (67.421)	lr 0.02333
Train [20][3239/3239]	Time 2.172 (0.544)	Data Time 0.000 (0.011)	Loss 3.2663 (3.3859)	Entropy 1.66925 (1.67975)	Top-1 acc 43.210 (43.603)	Top-5 acc 71.605 (67.422)	lr 0.02333
==========Valid [20/120]	loss 2.165	top-1 acc 52.510 (52.510)	top-5 acc 76.166	Train top-1 43.603	top-5 67.422	Entropy 1.66925	Latency-None: 0.000ms	Flops: 558.37M
Train [21][0/3239]	Time 30.981 (30.981)	Data Time 27.506 (27.506)	Loss 3.2701 (3.2701)	Entropy 1.66911 (1.66911)	Top-1 acc 47.656 (47.656)	Top-5 acc 71.875 (71.875)	lr 0.02333
Train [21][10/3239]	Time 2.448 (3.375)	Data Time 0.002 (2.603)	Loss 3.2034 (3.3193)	Entropy 1.66911 (1.66911)	Top-1 acc 48.828 (45.312)	Top-5 acc 70.312 (68.750)	lr 0.02332
Train [21][20/3239]	Time 0.216 (1.874)	Data Time 0.001 (1.364)	Loss 3.7210 (3.3467)	Entropy 1.66904 (1.66908)	Top-1 acc 38.281 (43.899)	Top-5 acc 58.594 (67.987)	lr 0.02332
Train [21][30/3239]	Time 0.247 (1.415)	Data Time 0.001 (0.924)	Loss 3.1913 (3.3389)	Entropy 1.66905 (1.66907)	Top-1 acc 52.344 (44.506)	Top-5 acc 71.875 (68.347)	lr 0.02332
Train [21][40/3239]	Time 0.226 (1.178)	Data Time 0.001 (0.699)	Loss 3.2326 (3.3417)	Entropy 1.66893 (1.66904)	Top-1 acc 50.781 (44.646)	Top-5 acc 68.750 (68.178)	lr 0.02332
Train [21][50/3239]	Time 0.216 (1.032)	Data Time 0.002 (0.563)	Loss 3.3305 (3.3479)	Entropy 1.66885 (1.66901)	Top-1 acc 45.312 (44.516)	Top-5 acc 69.922 (68.084)	lr 0.02332
Train [21][60/3239]	Time 0.242 (0.934)	Data Time 0.002 (0.471)	Loss 3.0835 (3.3413)	Entropy 1.66874 (1.66897)	Top-1 acc 52.344 (44.672)	Top-5 acc 72.656 (68.052)	lr 0.02332
Train [21][70/3239]	Time 0.371 (0.867)	Data Time 0.001 (0.405)	Loss 3.6068 (3.3454)	Entropy 1.66871 (1.66894)	Top-1 acc 39.453 (44.630)	Top-5 acc 58.984 (67.980)	lr 0.02332
Train [21][80/3239]	Time 0.217 (0.816)	Data Time 0.001 (0.355)	Loss 3.2998 (3.3530)	Entropy 1.66869 (1.66891)	Top-1 acc 43.359 (44.314)	Top-5 acc 70.312 (67.843)	lr 0.02332
Train [21][90/3239]	Time 0.221 (0.774)	Data Time 0.001 (0.316)	Loss 3.3822 (3.3573)	Entropy 1.66864 (1.66888)	Top-1 acc 45.703 (44.119)	Top-5 acc 67.188 (67.784)	lr 0.02332
Train [21][100/3239]	Time 0.227 (0.742)	Data Time 0.001 (0.285)	Loss 3.5176 (3.3520)	Entropy 1.66861 (1.66886)	Top-1 acc 41.406 (44.206)	Top-5 acc 65.234 (67.918)	lr 0.02332
Train [21][110/3239]	Time 0.332 (1.107)	Data Time 0.004 (0.259)	Loss 3.1677 (3.3451)	Entropy 1.66830 (1.66883)	Top-1 acc 46.484 (44.359)	Top-5 acc 73.438 (68.060)	lr 0.02332
Train [21][120/3239]	Time 2.399 (1.057)	Data Time 0.002 (0.238)	Loss 3.1881 (3.3510)	Entropy 1.66830 (1.66879)	Top-1 acc 49.219 (44.276)	Top-5 acc 71.094 (67.962)	lr 0.02332
Train [21][130/3239]	Time 0.329 (0.995)	Data Time 0.002 (0.220)	Loss 3.3706 (3.3507)	Entropy 1.66822 (1.66875)	Top-1 acc 39.844 (44.275)	Top-5 acc 71.875 (68.028)	lr 0.02332
Train [21][140/3239]	Time 0.219 (0.957)	Data Time 0.001 (0.205)	Loss 3.1690 (3.3527)	Entropy 1.66822 (1.66871)	Top-1 acc 47.266 (44.238)	Top-5 acc 72.266 (68.019)	lr 0.02332
Train [21][150/3239]	Time 0.192 (0.923)	Data Time 0.001 (0.191)	Loss 3.5508 (3.3525)	Entropy 1.66814 (1.66867)	Top-1 acc 42.969 (44.309)	Top-5 acc 63.672 (67.997)	lr 0.02332
Train [21][160/3239]	Time 0.274 (0.893)	Data Time 0.001 (0.179)	Loss 3.4662 (3.3542)	Entropy 1.66806 (1.66863)	Top-1 acc 40.234 (44.264)	Top-5 acc 68.750 (67.993)	lr 0.02332
Train [21][170/3239]	Time 0.210 (0.868)	Data Time 0.002 (0.169)	Loss 3.3968 (3.3565)	Entropy 1.66792 (1.66860)	Top-1 acc 39.844 (44.186)	Top-5 acc 66.797 (67.944)	lr 0.02332
Train [21][180/3239]	Time 0.196 (0.844)	Data Time 0.001 (0.160)	Loss 3.4114 (3.3570)	Entropy 1.66784 (1.66856)	Top-1 acc 41.797 (44.199)	Top-5 acc 67.188 (67.908)	lr 0.02332
Train [21][190/3239]	Time 0.206 (0.823)	Data Time 0.001 (0.152)	Loss 3.1958 (3.3548)	Entropy 1.66783 (1.66852)	Top-1 acc 45.312 (44.204)	Top-5 acc 71.094 (67.922)	lr 0.02332
Train [21][200/3239]	Time 0.209 (0.804)	Data Time 0.001 (0.144)	Loss 3.2903 (3.3544)	Entropy 1.66779 (1.66848)	Top-1 acc 43.359 (44.230)	Top-5 acc 72.266 (67.957)	lr 0.02332
Train [21][210/3239]	Time 0.264 (0.787)	Data Time 0.001 (0.137)	Loss 3.2542 (3.3528)	Entropy 1.66765 (1.66845)	Top-1 acc 45.312 (44.263)	Top-5 acc 71.094 (68.043)	lr 0.02331
Train [21][220/3239]	Time 0.192 (0.771)	Data Time 0.001 (0.131)	Loss 3.1623 (3.3509)	Entropy 1.66751 (1.66841)	Top-1 acc 49.609 (44.307)	Top-5 acc 70.703 (68.130)	lr 0.02331
Train [21][230/3239]	Time 2.346 (0.756)	Data Time 0.001 (0.126)	Loss 3.3119 (3.3484)	Entropy 1.66751 (1.66837)	Top-1 acc 45.703 (44.364)	Top-5 acc 65.625 (68.160)	lr 0.02331
Train [21][240/3239]	Time 0.211 (0.734)	Data Time 0.001 (0.121)	Loss 3.4037 (3.3498)	Entropy 1.66747 (1.66834)	Top-1 acc 41.016 (44.355)	Top-5 acc 69.141 (68.131)	lr 0.02331
Train [21][250/3239]	Time 0.242 (0.723)	Data Time 0.001 (0.116)	Loss 3.3346 (3.3490)	Entropy 1.66739 (1.66830)	Top-1 acc 50.391 (44.382)	Top-5 acc 70.703 (68.194)	lr 0.02331
Train [21][260/3239]	Time 0.291 (0.712)	Data Time 0.001 (0.111)	Loss 3.3204 (3.3483)	Entropy 1.66731 (1.66826)	Top-1 acc 44.922 (44.364)	Top-5 acc 65.625 (68.190)	lr 0.02331
Train [21][270/3239]	Time 0.215 (0.702)	Data Time 0.001 (0.107)	Loss 3.3901 (3.3476)	Entropy 1.66731 (1.66823)	Top-1 acc 41.406 (44.360)	Top-5 acc 67.188 (68.184)	lr 0.02331
Train [21][280/3239]	Time 0.207 (0.691)	Data Time 0.001 (0.104)	Loss 3.2761 (3.3487)	Entropy 1.66726 (1.66819)	Top-1 acc 45.312 (44.356)	Top-5 acc 70.703 (68.116)	lr 0.02331
Train [21][290/3239]	Time 0.237 (0.682)	Data Time 0.001 (0.100)	Loss 3.1051 (3.3482)	Entropy 1.66714 (1.66816)	Top-1 acc 51.172 (44.358)	Top-5 acc 71.875 (68.118)	lr 0.02331
Train [21][300/3239]	Time 0.249 (0.674)	Data Time 0.001 (0.097)	Loss 3.4225 (3.3496)	Entropy 1.66703 (1.66812)	Top-1 acc 43.359 (44.322)	Top-5 acc 64.062 (68.087)	lr 0.02331
Train [21][310/3239]	Time 0.185 (0.666)	Data Time 0.001 (0.094)	Loss 3.4691 (3.3494)	Entropy 1.66700 (1.66809)	Top-1 acc 42.578 (44.330)	Top-5 acc 66.797 (68.081)	lr 0.02331
Train [21][320/3239]	Time 0.276 (0.659)	Data Time 0.001 (0.091)	Loss 3.5498 (3.3504)	Entropy 1.66696 (1.66805)	Top-1 acc 41.016 (44.290)	Top-5 acc 63.281 (68.055)	lr 0.02331
Train [21][330/3239]	Time 0.193 (0.651)	Data Time 0.001 (0.088)	Loss 3.4211 (3.3510)	Entropy 1.66692 (1.66802)	Top-1 acc 42.578 (44.296)	Top-5 acc 65.234 (68.017)	lr 0.02331
Train [21][340/3239]	Time 2.320 (0.644)	Data Time 0.001 (0.086)	Loss 3.3648 (3.3510)	Entropy 1.66692 (1.66799)	Top-1 acc 44.141 (44.331)	Top-5 acc 66.797 (68.024)	lr 0.02331
Train [21][350/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.083)	Loss 3.3943 (3.3517)	Entropy 1.66689 (1.66796)	Top-1 acc 42.578 (44.315)	Top-5 acc 67.188 (67.991)	lr 0.02331
Train [21][360/3239]	Time 0.217 (0.627)	Data Time 0.001 (0.081)	Loss 3.3186 (3.3529)	Entropy 1.66685 (1.66793)	Top-1 acc 44.922 (44.312)	Top-5 acc 69.922 (67.958)	lr 0.02331
Train [21][370/3239]	Time 0.213 (0.622)	Data Time 0.001 (0.079)	Loss 3.1626 (3.3525)	Entropy 1.66676 (1.66790)	Top-1 acc 47.656 (44.331)	Top-5 acc 69.922 (67.981)	lr 0.02331
Train [21][380/3239]	Time 0.225 (0.617)	Data Time 0.001 (0.077)	Loss 3.4220 (3.3529)	Entropy 1.66666 (1.66786)	Top-1 acc 41.797 (44.305)	Top-5 acc 66.797 (67.968)	lr 0.02331
Train [21][390/3239]	Time 0.355 (0.614)	Data Time 0.001 (0.075)	Loss 3.4601 (3.3524)	Entropy 1.66655 (1.66783)	Top-1 acc 44.531 (44.317)	Top-5 acc 66.406 (67.976)	lr 0.02331
Train [21][400/3239]	Time 0.194 (0.609)	Data Time 0.001 (0.073)	Loss 3.2273 (3.3509)	Entropy 1.66636 (1.66780)	Top-1 acc 45.312 (44.363)	Top-5 acc 71.484 (68.003)	lr 0.02331
Train [21][410/3239]	Time 0.205 (0.606)	Data Time 0.001 (0.071)	Loss 3.4047 (3.3508)	Entropy 1.66631 (1.66776)	Top-1 acc 43.750 (44.359)	Top-5 acc 67.969 (67.991)	lr 0.02330
Train [21][420/3239]	Time 0.227 (0.602)	Data Time 0.001 (0.070)	Loss 3.3338 (3.3516)	Entropy 1.66623 (1.66773)	Top-1 acc 45.312 (44.344)	Top-5 acc 66.016 (67.958)	lr 0.02330
Train [21][430/3239]	Time 0.195 (0.598)	Data Time 0.001 (0.068)	Loss 3.4401 (3.3516)	Entropy 1.66604 (1.66769)	Top-1 acc 41.406 (44.329)	Top-5 acc 67.578 (67.939)	lr 0.02330
Train [21][440/3239]	Time 0.216 (0.594)	Data Time 0.002 (0.067)	Loss 3.4719 (3.3533)	Entropy 1.66599 (1.66765)	Top-1 acc 45.312 (44.305)	Top-5 acc 66.797 (67.922)	lr 0.02330
Train [21][450/3239]	Time 2.594 (0.592)	Data Time 0.003 (0.065)	Loss 3.5588 (3.3534)	Entropy 1.66599 (1.66762)	Top-1 acc 40.625 (44.305)	Top-5 acc 64.062 (67.919)	lr 0.02330
Train [21][460/3239]	Time 0.256 (0.585)	Data Time 0.002 (0.064)	Loss 3.2294 (3.3518)	Entropy 1.66598 (1.66758)	Top-1 acc 46.484 (44.336)	Top-5 acc 71.875 (67.937)	lr 0.02330
Train [21][470/3239]	Time 0.261 (0.583)	Data Time 0.001 (0.063)	Loss 3.3921 (3.3521)	Entropy 1.66596 (1.66755)	Top-1 acc 42.969 (44.333)	Top-5 acc 66.797 (67.952)	lr 0.02330
Train [21][480/3239]	Time 0.219 (0.673)	Data Time 0.002 (0.061)	Loss 3.3691 (3.3541)	Entropy 1.66593 (1.66751)	Top-1 acc 40.234 (44.294)	Top-5 acc 67.969 (67.909)	lr 0.02330
Train [21][490/3239]	Time 0.211 (0.668)	Data Time 0.002 (0.060)	Loss 3.4267 (3.3540)	Entropy 1.66596 (1.66748)	Top-1 acc 45.312 (44.287)	Top-5 acc 67.188 (67.932)	lr 0.02330
Train [21][500/3239]	Time 0.167 (0.664)	Data Time 0.001 (0.059)	Loss 3.4518 (3.3533)	Entropy 1.66591 (1.66745)	Top-1 acc 37.500 (44.273)	Top-5 acc 65.234 (67.936)	lr 0.02330
Train [21][510/3239]	Time 0.242 (0.660)	Data Time 0.002 (0.058)	Loss 3.0124 (3.3520)	Entropy 1.66591 (1.66742)	Top-1 acc 50.781 (44.294)	Top-5 acc 77.344 (67.974)	lr 0.02330
Train [21][520/3239]	Time 0.215 (0.656)	Data Time 0.001 (0.057)	Loss 3.3207 (3.3508)	Entropy 1.66581 (1.66739)	Top-1 acc 47.266 (44.315)	Top-5 acc 69.922 (68.029)	lr 0.02330
Train [21][530/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.056)	Loss 3.1953 (3.3495)	Entropy 1.66576 (1.66736)	Top-1 acc 49.219 (44.350)	Top-5 acc 72.656 (68.053)	lr 0.02330
Train [21][540/3239]	Time 0.135 (0.649)	Data Time 0.001 (0.055)	Loss 3.4289 (3.3515)	Entropy 1.66568 (1.66733)	Top-1 acc 41.797 (44.288)	Top-5 acc 69.141 (68.032)	lr 0.02330
Train [21][550/3239]	Time 0.198 (0.645)	Data Time 0.001 (0.054)	Loss 3.3551 (3.3515)	Entropy 1.66564 (1.66730)	Top-1 acc 42.188 (44.304)	Top-5 acc 66.406 (68.040)	lr 0.02330
Train [21][560/3239]	Time 2.308 (0.641)	Data Time 0.001 (0.053)	Loss 3.4888 (3.3515)	Entropy 1.66564 (1.66727)	Top-1 acc 41.797 (44.312)	Top-5 acc 64.453 (68.033)	lr 0.02330
Train [21][570/3239]	Time 0.225 (0.634)	Data Time 0.001 (0.052)	Loss 3.4134 (3.3518)	Entropy 1.66569 (1.66724)	Top-1 acc 46.484 (44.310)	Top-5 acc 66.406 (68.028)	lr 0.02330
Train [21][580/3239]	Time 0.345 (0.631)	Data Time 0.001 (0.051)	Loss 3.2340 (3.3506)	Entropy 1.66568 (1.66722)	Top-1 acc 47.656 (44.352)	Top-5 acc 71.875 (68.049)	lr 0.02330
Train [21][590/3239]	Time 0.249 (0.628)	Data Time 0.002 (0.050)	Loss 3.2788 (3.3503)	Entropy 1.66563 (1.66719)	Top-1 acc 44.531 (44.358)	Top-5 acc 70.703 (68.066)	lr 0.02330
Train [21][600/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.050)	Loss 3.1684 (3.3503)	Entropy 1.66558 (1.66716)	Top-1 acc 46.875 (44.361)	Top-5 acc 71.875 (68.064)	lr 0.02329
Train [21][610/3239]	Time 0.228 (0.622)	Data Time 0.002 (0.049)	Loss 3.3128 (3.3503)	Entropy 1.66561 (1.66714)	Top-1 acc 40.625 (44.348)	Top-5 acc 71.094 (68.067)	lr 0.02329
Train [21][620/3239]	Time 0.212 (0.619)	Data Time 0.001 (0.048)	Loss 3.2989 (3.3498)	Entropy 1.66559 (1.66711)	Top-1 acc 45.703 (44.365)	Top-5 acc 67.969 (68.074)	lr 0.02329
Train [21][630/3239]	Time 0.255 (0.616)	Data Time 0.002 (0.047)	Loss 3.4887 (3.3493)	Entropy 1.66550 (1.66709)	Top-1 acc 39.844 (44.378)	Top-5 acc 66.016 (68.092)	lr 0.02329
Train [21][640/3239]	Time 0.310 (0.614)	Data Time 0.001 (0.047)	Loss 3.3111 (3.3505)	Entropy 1.66536 (1.66706)	Top-1 acc 45.703 (44.341)	Top-5 acc 69.922 (68.074)	lr 0.02329
Train [21][650/3239]	Time 0.266 (0.611)	Data Time 0.001 (0.046)	Loss 3.5393 (3.3515)	Entropy 1.66516 (1.66704)	Top-1 acc 39.062 (44.321)	Top-5 acc 66.016 (68.055)	lr 0.02329
Train [21][660/3239]	Time 0.198 (0.608)	Data Time 0.001 (0.045)	Loss 3.3169 (3.3524)	Entropy 1.66512 (1.66701)	Top-1 acc 44.531 (44.300)	Top-5 acc 70.703 (68.032)	lr 0.02329
Train [21][670/3239]	Time 2.289 (0.606)	Data Time 0.001 (0.045)	Loss 3.3292 (3.3526)	Entropy 1.66512 (1.66698)	Top-1 acc 44.531 (44.298)	Top-5 acc 68.750 (68.021)	lr 0.02329
Train [21][680/3239]	Time 0.202 (0.600)	Data Time 0.001 (0.044)	Loss 3.3761 (3.3532)	Entropy 1.66510 (1.66695)	Top-1 acc 42.578 (44.303)	Top-5 acc 66.016 (68.005)	lr 0.02329
Train [21][690/3239]	Time 0.227 (0.598)	Data Time 0.001 (0.043)	Loss 3.3155 (3.3534)	Entropy 1.66508 (1.66692)	Top-1 acc 46.875 (44.300)	Top-5 acc 67.188 (68.004)	lr 0.02329
Train [21][700/3239]	Time 0.229 (0.595)	Data Time 0.001 (0.043)	Loss 3.1805 (3.3532)	Entropy 1.66504 (1.66690)	Top-1 acc 47.656 (44.290)	Top-5 acc 72.656 (67.993)	lr 0.02329
Train [21][710/3239]	Time 0.375 (0.593)	Data Time 0.001 (0.042)	Loss 3.2303 (3.3530)	Entropy 1.66500 (1.66687)	Top-1 acc 48.438 (44.286)	Top-5 acc 68.750 (68.012)	lr 0.02329
Train [21][720/3239]	Time 0.224 (0.591)	Data Time 0.001 (0.042)	Loss 3.2869 (3.3531)	Entropy 1.66489 (1.66684)	Top-1 acc 48.047 (44.293)	Top-5 acc 69.922 (68.007)	lr 0.02329
Train [21][730/3239]	Time 0.171 (0.589)	Data Time 0.001 (0.041)	Loss 3.4227 (3.3521)	Entropy 1.66488 (1.66682)	Top-1 acc 41.406 (44.307)	Top-5 acc 66.406 (68.022)	lr 0.02329
Train [21][740/3239]	Time 0.210 (0.587)	Data Time 0.001 (0.041)	Loss 3.3230 (3.3525)	Entropy 1.66484 (1.66679)	Top-1 acc 47.266 (44.293)	Top-5 acc 69.141 (68.006)	lr 0.02329
Train [21][750/3239]	Time 0.210 (0.585)	Data Time 0.001 (0.040)	Loss 3.3030 (3.3535)	Entropy 1.66476 (1.66677)	Top-1 acc 41.016 (44.265)	Top-5 acc 66.797 (67.987)	lr 0.02329
Train [21][760/3239]	Time 0.164 (0.583)	Data Time 0.001 (0.040)	Loss 3.4460 (3.3525)	Entropy 1.66464 (1.66674)	Top-1 acc 46.484 (44.295)	Top-5 acc 67.188 (68.008)	lr 0.02329
Train [21][770/3239]	Time 0.238 (0.581)	Data Time 0.001 (0.039)	Loss 3.3101 (3.3527)	Entropy 1.66451 (1.66671)	Top-1 acc 42.578 (44.295)	Top-5 acc 69.531 (68.004)	lr 0.02329
Train [21][780/3239]	Time 2.289 (0.579)	Data Time 0.001 (0.039)	Loss 3.4468 (3.3525)	Entropy 1.66451 (1.66668)	Top-1 acc 41.406 (44.294)	Top-5 acc 66.406 (67.999)	lr 0.02329
Train [21][790/3239]	Time 0.196 (0.575)	Data Time 0.001 (0.038)	Loss 3.1953 (3.3529)	Entropy 1.66447 (1.66666)	Top-1 acc 44.531 (44.295)	Top-5 acc 70.703 (67.997)	lr 0.02329
Train [21][800/3239]	Time 0.220 (0.573)	Data Time 0.001 (0.038)	Loss 3.5284 (3.3537)	Entropy 1.66436 (1.66663)	Top-1 acc 42.188 (44.284)	Top-5 acc 66.797 (67.975)	lr 0.02328
Train [21][810/3239]	Time 0.210 (0.571)	Data Time 0.001 (0.037)	Loss 3.5869 (3.3539)	Entropy 1.66424 (1.66660)	Top-1 acc 39.062 (44.275)	Top-5 acc 63.672 (67.978)	lr 0.02328
Train [21][820/3239]	Time 0.171 (0.570)	Data Time 0.001 (0.037)	Loss 3.2643 (3.3544)	Entropy 1.66415 (1.66657)	Top-1 acc 47.266 (44.265)	Top-5 acc 71.875 (67.968)	lr 0.02328
Train [21][830/3239]	Time 0.170 (0.568)	Data Time 0.001 (0.036)	Loss 3.2678 (3.3544)	Entropy 1.66410 (1.66654)	Top-1 acc 46.484 (44.268)	Top-5 acc 71.094 (67.988)	lr 0.02328
Train [21][840/3239]	Time 0.287 (0.611)	Data Time 0.002 (0.036)	Loss 3.1998 (3.3537)	Entropy 1.66405 (1.66651)	Top-1 acc 46.094 (44.295)	Top-5 acc 70.312 (68.003)	lr 0.02328
Train [21][850/3239]	Time 0.203 (0.612)	Data Time 0.002 (0.036)	Loss 3.4473 (3.3536)	Entropy 1.66396 (1.66648)	Top-1 acc 39.453 (44.286)	Top-5 acc 65.234 (68.004)	lr 0.02328
Train [21][860/3239]	Time 0.218 (0.610)	Data Time 0.002 (0.035)	Loss 3.3136 (3.3539)	Entropy 1.66389 (1.66645)	Top-1 acc 42.969 (44.288)	Top-5 acc 71.094 (68.002)	lr 0.02328
Train [21][870/3239]	Time 0.233 (0.608)	Data Time 0.003 (0.035)	Loss 3.2744 (3.3540)	Entropy 1.66381 (1.66642)	Top-1 acc 47.656 (44.286)	Top-5 acc 69.141 (67.988)	lr 0.02328
Train [21][880/3239]	Time 0.252 (0.606)	Data Time 0.001 (0.035)	Loss 3.4494 (3.3547)	Entropy 1.66359 (1.66639)	Top-1 acc 41.406 (44.271)	Top-5 acc 65.234 (67.962)	lr 0.02328
Train [21][890/3239]	Time 2.433 (0.604)	Data Time 0.001 (0.034)	Loss 3.4494 (3.3547)	Entropy 1.66359 (1.66636)	Top-1 acc 41.016 (44.255)	Top-5 acc 67.578 (67.968)	lr 0.02328
Train [21][900/3239]	Time 0.276 (0.600)	Data Time 0.001 (0.034)	Loss 3.3448 (3.3552)	Entropy 1.66348 (1.66633)	Top-1 acc 47.656 (44.255)	Top-5 acc 69.531 (67.961)	lr 0.02328
Train [21][910/3239]	Time 0.253 (0.598)	Data Time 0.001 (0.034)	Loss 3.1995 (3.3545)	Entropy 1.66347 (1.66630)	Top-1 acc 50.391 (44.285)	Top-5 acc 72.266 (67.979)	lr 0.02328
Train [21][920/3239]	Time 0.204 (0.596)	Data Time 0.001 (0.033)	Loss 3.5724 (3.3542)	Entropy 1.66343 (1.66627)	Top-1 acc 38.672 (44.300)	Top-5 acc 65.625 (67.987)	lr 0.02328
Train [21][930/3239]	Time 0.166 (0.594)	Data Time 0.001 (0.033)	Loss 3.3565 (3.3541)	Entropy 1.66337 (1.66623)	Top-1 acc 43.750 (44.317)	Top-5 acc 69.531 (67.994)	lr 0.02328
Train [21][940/3239]	Time 0.234 (0.593)	Data Time 0.001 (0.033)	Loss 3.3964 (3.3534)	Entropy 1.66329 (1.66620)	Top-1 acc 45.312 (44.325)	Top-5 acc 68.359 (68.006)	lr 0.02328
Train [21][950/3239]	Time 0.155 (0.591)	Data Time 0.001 (0.032)	Loss 3.4802 (3.3536)	Entropy 1.66319 (1.66617)	Top-1 acc 41.797 (44.321)	Top-5 acc 64.062 (68.001)	lr 0.02328
Train [21][960/3239]	Time 0.166 (0.589)	Data Time 0.001 (0.032)	Loss 3.5062 (3.3530)	Entropy 1.66304 (1.66614)	Top-1 acc 41.406 (44.332)	Top-5 acc 65.234 (68.009)	lr 0.02328
Train [21][970/3239]	Time 0.191 (0.588)	Data Time 0.001 (0.032)	Loss 3.2544 (3.3535)	Entropy 1.66293 (1.66611)	Top-1 acc 45.312 (44.316)	Top-5 acc 69.922 (67.999)	lr 0.02328
Train [21][980/3239]	Time 0.277 (0.586)	Data Time 0.001 (0.031)	Loss 3.4619 (3.3533)	Entropy 1.66276 (1.66608)	Top-1 acc 43.359 (44.308)	Top-5 acc 66.016 (68.008)	lr 0.02328
Train [21][990/3239]	Time 0.250 (0.585)	Data Time 0.001 (0.031)	Loss 3.4977 (3.3535)	Entropy 1.66274 (1.66604)	Top-1 acc 44.141 (44.307)	Top-5 acc 64.453 (67.997)	lr 0.02327
Train [21][1000/3239]	Time 2.358 (0.583)	Data Time 0.001 (0.031)	Loss 3.3623 (3.3543)	Entropy 1.66274 (1.66601)	Top-1 acc 43.750 (44.287)	Top-5 acc 67.188 (67.979)	lr 0.02327
Train [21][1010/3239]	Time 0.212 (0.580)	Data Time 0.001 (0.030)	Loss 3.2629 (3.3543)	Entropy 1.66269 (1.66598)	Top-1 acc 47.266 (44.289)	Top-5 acc 73.438 (67.984)	lr 0.02327
Train [21][1020/3239]	Time 0.214 (0.579)	Data Time 0.002 (0.030)	Loss 3.3607 (3.3547)	Entropy 1.66267 (1.66594)	Top-1 acc 43.750 (44.283)	Top-5 acc 69.922 (67.978)	lr 0.02327
Train [21][1030/3239]	Time 0.223 (0.577)	Data Time 0.002 (0.030)	Loss 3.3550 (3.3548)	Entropy 1.66265 (1.66591)	Top-1 acc 46.094 (44.287)	Top-5 acc 67.578 (67.971)	lr 0.02327
Train [21][1040/3239]	Time 0.311 (0.576)	Data Time 0.002 (0.030)	Loss 3.0972 (3.3547)	Entropy 1.66258 (1.66588)	Top-1 acc 51.172 (44.303)	Top-5 acc 74.609 (67.971)	lr 0.02327
Train [21][1050/3239]	Time 0.215 (0.575)	Data Time 0.001 (0.029)	Loss 3.2287 (3.3552)	Entropy 1.66253 (1.66585)	Top-1 acc 49.609 (44.287)	Top-5 acc 70.312 (67.966)	lr 0.02327
Train [21][1060/3239]	Time 0.226 (0.573)	Data Time 0.001 (0.029)	Loss 3.5492 (3.3548)	Entropy 1.66245 (1.66582)	Top-1 acc 39.062 (44.299)	Top-5 acc 65.234 (67.972)	lr 0.02327
Train [21][1070/3239]	Time 0.197 (0.572)	Data Time 0.001 (0.029)	Loss 3.1807 (3.3547)	Entropy 1.66248 (1.66579)	Top-1 acc 47.656 (44.292)	Top-5 acc 72.656 (67.977)	lr 0.02327
Train [21][1080/3239]	Time 0.209 (0.571)	Data Time 0.001 (0.029)	Loss 3.2626 (3.3549)	Entropy 1.66242 (1.66575)	Top-1 acc 42.578 (44.289)	Top-5 acc 71.484 (67.979)	lr 0.02327
Train [21][1090/3239]	Time 0.262 (0.570)	Data Time 0.001 (0.028)	Loss 3.1376 (3.3551)	Entropy 1.66242 (1.66572)	Top-1 acc 52.344 (44.289)	Top-5 acc 73.047 (67.982)	lr 0.02327
Train [21][1100/3239]	Time 0.299 (0.568)	Data Time 0.001 (0.028)	Loss 3.3273 (3.3555)	Entropy 1.66238 (1.66569)	Top-1 acc 45.312 (44.284)	Top-5 acc 69.922 (67.981)	lr 0.02327
Train [21][1110/3239]	Time 2.438 (0.567)	Data Time 0.002 (0.028)	Loss 3.3265 (3.3555)	Entropy 1.66238 (1.66566)	Top-1 acc 42.578 (44.277)	Top-5 acc 69.531 (67.981)	lr 0.02327
Train [21][1120/3239]	Time 0.230 (0.564)	Data Time 0.002 (0.028)	Loss 3.1471 (3.3559)	Entropy 1.66239 (1.66564)	Top-1 acc 50.000 (44.273)	Top-5 acc 70.703 (67.976)	lr 0.02327
Train [21][1130/3239]	Time 0.215 (0.563)	Data Time 0.001 (0.027)	Loss 3.3803 (3.3561)	Entropy 1.66229 (1.66561)	Top-1 acc 46.484 (44.277)	Top-5 acc 68.750 (67.969)	lr 0.02327
Train [21][1140/3239]	Time 0.199 (0.562)	Data Time 0.001 (0.027)	Loss 3.2907 (3.3564)	Entropy 1.66223 (1.66558)	Top-1 acc 44.531 (44.270)	Top-5 acc 69.531 (67.960)	lr 0.02327
Train [21][1150/3239]	Time 0.208 (0.561)	Data Time 0.001 (0.027)	Loss 3.3190 (3.3560)	Entropy 1.66201 (1.66555)	Top-1 acc 45.312 (44.266)	Top-5 acc 69.141 (67.967)	lr 0.02327
Train [21][1160/3239]	Time 0.199 (0.560)	Data Time 0.001 (0.027)	Loss 3.4699 (3.3558)	Entropy 1.66196 (1.66551)	Top-1 acc 44.922 (44.276)	Top-5 acc 66.016 (67.978)	lr 0.02327
Train [21][1170/3239]	Time 0.288 (0.559)	Data Time 0.001 (0.027)	Loss 3.3398 (3.3560)	Entropy 1.66194 (1.66548)	Top-1 acc 45.703 (44.265)	Top-5 acc 68.359 (67.983)	lr 0.02327
Train [21][1180/3239]	Time 0.206 (0.558)	Data Time 0.001 (0.026)	Loss 3.4201 (3.3562)	Entropy 1.66200 (1.66545)	Top-1 acc 42.188 (44.262)	Top-5 acc 68.359 (67.980)	lr 0.02327
Train [21][1190/3239]	Time 0.198 (0.557)	Data Time 0.001 (0.026)	Loss 3.5912 (3.3566)	Entropy 1.66190 (1.66543)	Top-1 acc 39.453 (44.261)	Top-5 acc 62.891 (67.975)	lr 0.02326
Train [21][1200/3239]	Time 0.224 (0.593)	Data Time 0.002 (0.026)	Loss 3.4598 (3.3570)	Entropy 1.66177 (1.66540)	Top-1 acc 41.016 (44.246)	Top-5 acc 65.625 (67.966)	lr 0.02326
Train [21][1210/3239]	Time 0.194 (0.592)	Data Time 0.002 (0.026)	Loss 3.7405 (3.3574)	Entropy 1.66175 (1.66537)	Top-1 acc 35.938 (44.235)	Top-5 acc 62.109 (67.955)	lr 0.02326
Train [21][1220/3239]	Time 2.226 (0.591)	Data Time 0.001 (0.025)	Loss 3.6226 (3.3577)	Entropy 1.66175 (1.66534)	Top-1 acc 38.672 (44.230)	Top-5 acc 62.109 (67.951)	lr 0.02326
Train [21][1230/3239]	Time 0.209 (0.588)	Data Time 0.002 (0.025)	Loss 3.2387 (3.3580)	Entropy 1.66173 (1.66531)	Top-1 acc 43.750 (44.220)	Top-5 acc 71.094 (67.944)	lr 0.02326
Train [21][1240/3239]	Time 0.221 (0.586)	Data Time 0.001 (0.025)	Loss 3.3807 (3.3587)	Entropy 1.66160 (1.66528)	Top-1 acc 41.406 (44.212)	Top-5 acc 68.359 (67.923)	lr 0.02326
Train [21][1250/3239]	Time 0.216 (0.585)	Data Time 0.001 (0.025)	Loss 3.4239 (3.3595)	Entropy 1.66152 (1.66525)	Top-1 acc 42.578 (44.203)	Top-5 acc 67.578 (67.910)	lr 0.02326
Train [21][1260/3239]	Time 0.200 (0.584)	Data Time 0.001 (0.025)	Loss 3.3447 (3.3592)	Entropy 1.66142 (1.66522)	Top-1 acc 42.188 (44.211)	Top-5 acc 69.141 (67.919)	lr 0.02326
Train [21][1270/3239]	Time 0.211 (0.583)	Data Time 0.001 (0.025)	Loss 3.2211 (3.3593)	Entropy 1.66137 (1.66519)	Top-1 acc 48.047 (44.212)	Top-5 acc 71.094 (67.912)	lr 0.02326
Train [21][1280/3239]	Time 0.169 (0.582)	Data Time 0.001 (0.024)	Loss 3.3630 (3.3594)	Entropy 1.66130 (1.66516)	Top-1 acc 40.234 (44.206)	Top-5 acc 65.625 (67.906)	lr 0.02326
Train [21][1290/3239]	Time 0.208 (0.581)	Data Time 0.001 (0.024)	Loss 3.4630 (3.3596)	Entropy 1.66139 (1.66513)	Top-1 acc 45.312 (44.201)	Top-5 acc 63.281 (67.891)	lr 0.02326
Train [21][1300/3239]	Time 0.257 (0.579)	Data Time 0.001 (0.024)	Loss 3.3512 (3.3591)	Entropy 1.66133 (1.66510)	Top-1 acc 43.359 (44.205)	Top-5 acc 67.578 (67.901)	lr 0.02326
Train [21][1310/3239]	Time 0.215 (0.578)	Data Time 0.001 (0.024)	Loss 3.3541 (3.3597)	Entropy 1.66119 (1.66507)	Top-1 acc 47.266 (44.198)	Top-5 acc 70.312 (67.891)	lr 0.02326
Train [21][1320/3239]	Time 0.215 (0.577)	Data Time 0.001 (0.024)	Loss 3.3614 (3.3597)	Entropy 1.66114 (1.66504)	Top-1 acc 42.578 (44.191)	Top-5 acc 70.312 (67.892)	lr 0.02326
Train [21][1330/3239]	Time 2.392 (0.576)	Data Time 0.001 (0.024)	Loss 3.3354 (3.3604)	Entropy 1.66114 (1.66501)	Top-1 acc 44.141 (44.170)	Top-5 acc 67.969 (67.879)	lr 0.02326
Train [21][1340/3239]	Time 0.218 (0.574)	Data Time 0.001 (0.023)	Loss 3.1974 (3.3599)	Entropy 1.66107 (1.66498)	Top-1 acc 44.922 (44.172)	Top-5 acc 71.875 (67.888)	lr 0.02326
Train [21][1350/3239]	Time 0.224 (0.573)	Data Time 0.001 (0.023)	Loss 3.4964 (3.3601)	Entropy 1.66097 (1.66495)	Top-1 acc 43.750 (44.174)	Top-5 acc 65.625 (67.887)	lr 0.02326
Train [21][1360/3239]	Time 0.201 (0.572)	Data Time 0.001 (0.023)	Loss 3.4477 (3.3601)	Entropy 1.66095 (1.66492)	Top-1 acc 42.188 (44.183)	Top-5 acc 62.500 (67.881)	lr 0.02326
Train [21][1370/3239]	Time 0.222 (0.571)	Data Time 0.001 (0.023)	Loss 3.1693 (3.3600)	Entropy 1.66091 (1.66489)	Top-1 acc 48.438 (44.188)	Top-5 acc 70.703 (67.877)	lr 0.02326
Train [21][1380/3239]	Time 0.241 (0.570)	Data Time 0.001 (0.023)	Loss 3.4166 (3.3599)	Entropy 1.66089 (1.66486)	Top-1 acc 42.188 (44.195)	Top-5 acc 67.188 (67.886)	lr 0.02325
Train [21][1390/3239]	Time 0.211 (0.569)	Data Time 0.001 (0.023)	Loss 3.4214 (3.3598)	Entropy 1.66083 (1.66484)	Top-1 acc 42.969 (44.197)	Top-5 acc 66.406 (67.888)	lr 0.02325
Train [21][1400/3239]	Time 0.207 (0.568)	Data Time 0.003 (0.022)	Loss 3.2820 (3.3598)	Entropy 1.66080 (1.66481)	Top-1 acc 46.875 (44.197)	Top-5 acc 71.484 (67.888)	lr 0.02325
Train [21][1410/3239]	Time 0.143 (0.567)	Data Time 0.002 (0.022)	Loss 3.4285 (3.3601)	Entropy 1.66062 (1.66478)	Top-1 acc 45.703 (44.190)	Top-5 acc 67.188 (67.888)	lr 0.02325
Train [21][1420/3239]	Time 0.344 (0.566)	Data Time 0.001 (0.022)	Loss 3.3345 (3.3601)	Entropy 1.66055 (1.66475)	Top-1 acc 46.484 (44.188)	Top-5 acc 67.969 (67.886)	lr 0.02325
Train [21][1430/3239]	Time 0.199 (0.565)	Data Time 0.001 (0.022)	Loss 3.4996 (3.3599)	Entropy 1.66044 (1.66472)	Top-1 acc 42.188 (44.194)	Top-5 acc 65.234 (67.889)	lr 0.02325
Train [21][1440/3239]	Time 2.351 (0.564)	Data Time 0.001 (0.022)	Loss 3.4772 (3.3596)	Entropy 1.66044 (1.66469)	Top-1 acc 41.016 (44.204)	Top-5 acc 65.234 (67.900)	lr 0.02325
Train [21][1450/3239]	Time 0.209 (0.562)	Data Time 0.001 (0.022)	Loss 3.2999 (3.3594)	Entropy 1.66035 (1.66466)	Top-1 acc 50.391 (44.215)	Top-5 acc 67.578 (67.902)	lr 0.02325
Train [21][1460/3239]	Time 0.257 (0.561)	Data Time 0.001 (0.022)	Loss 3.5773 (3.3597)	Entropy 1.66025 (1.66463)	Top-1 acc 39.453 (44.205)	Top-5 acc 63.672 (67.892)	lr 0.02325
Train [21][1470/3239]	Time 0.278 (0.560)	Data Time 0.001 (0.022)	Loss 3.4156 (3.3597)	Entropy 1.66007 (1.66460)	Top-1 acc 43.359 (44.207)	Top-5 acc 65.234 (67.893)	lr 0.02325
Train [21][1480/3239]	Time 0.343 (0.560)	Data Time 0.002 (0.021)	Loss 3.3530 (3.3599)	Entropy 1.65999 (1.66457)	Top-1 acc 43.750 (44.205)	Top-5 acc 68.750 (67.890)	lr 0.02325
Train [21][1490/3239]	Time 0.210 (0.559)	Data Time 0.001 (0.021)	Loss 3.3399 (3.3599)	Entropy 1.65985 (1.66454)	Top-1 acc 42.578 (44.203)	Top-5 acc 69.531 (67.893)	lr 0.02325
Train [21][1500/3239]	Time 0.212 (0.558)	Data Time 0.001 (0.021)	Loss 3.3201 (3.3600)	Entropy 1.65978 (1.66451)	Top-1 acc 43.359 (44.199)	Top-5 acc 66.016 (67.889)	lr 0.02325
Train [21][1510/3239]	Time 0.224 (0.557)	Data Time 0.001 (0.021)	Loss 3.2053 (3.3598)	Entropy 1.65965 (1.66447)	Top-1 acc 48.828 (44.202)	Top-5 acc 73.047 (67.894)	lr 0.02325
Train [21][1520/3239]	Time 0.198 (0.556)	Data Time 0.001 (0.021)	Loss 3.2712 (3.3594)	Entropy 1.65968 (1.66444)	Top-1 acc 42.969 (44.215)	Top-5 acc 70.703 (67.900)	lr 0.02325
Train [21][1530/3239]	Time 0.221 (0.556)	Data Time 0.001 (0.021)	Loss 3.4995 (3.3593)	Entropy 1.65959 (1.66441)	Top-1 acc 38.672 (44.213)	Top-5 acc 65.625 (67.909)	lr 0.02325
Train [21][1540/3239]	Time 0.251 (0.555)	Data Time 0.001 (0.021)	Loss 3.3627 (3.3592)	Entropy 1.65962 (1.66438)	Top-1 acc 41.797 (44.212)	Top-5 acc 67.969 (67.915)	lr 0.02325
Train [21][1550/3239]	Time 2.513 (0.554)	Data Time 0.001 (0.020)	Loss 3.4137 (3.3597)	Entropy 1.65962 (1.66435)	Top-1 acc 46.484 (44.201)	Top-5 acc 64.453 (67.906)	lr 0.02325
Train [21][1560/3239]	Time 0.228 (0.552)	Data Time 0.001 (0.020)	Loss 3.2106 (3.3597)	Entropy 1.65954 (1.66432)	Top-1 acc 45.312 (44.201)	Top-5 acc 72.656 (67.904)	lr 0.02325
Train [21][1570/3239]	Time 0.319 (0.576)	Data Time 0.002 (0.020)	Loss 3.4087 (3.3599)	Entropy 1.65938 (1.66429)	Top-1 acc 42.969 (44.196)	Top-5 acc 68.359 (67.904)	lr 0.02325
Train [21][1580/3239]	Time 0.227 (0.576)	Data Time 0.002 (0.020)	Loss 3.5037 (3.3599)	Entropy 1.65928 (1.66426)	Top-1 acc 41.016 (44.204)	Top-5 acc 64.062 (67.906)	lr 0.02324
Train [21][1590/3239]	Time 0.206 (0.575)	Data Time 0.001 (0.020)	Loss 3.2734 (3.3598)	Entropy 1.65919 (1.66422)	Top-1 acc 42.969 (44.201)	Top-5 acc 70.312 (67.908)	lr 0.02324
Train [21][1600/3239]	Time 0.216 (0.574)	Data Time 0.001 (0.020)	Loss 3.4083 (3.3604)	Entropy 1.65914 (1.66419)	Top-1 acc 43.359 (44.189)	Top-5 acc 66.406 (67.896)	lr 0.02324
Train [21][1610/3239]	Time 0.258 (0.574)	Data Time 0.001 (0.020)	Loss 3.4330 (3.3608)	Entropy 1.65906 (1.66416)	Top-1 acc 39.062 (44.182)	Top-5 acc 65.234 (67.889)	lr 0.02324
Train [21][1620/3239]	Time 0.196 (0.573)	Data Time 0.001 (0.020)	Loss 3.5185 (3.3607)	Entropy 1.65904 (1.66413)	Top-1 acc 43.359 (44.183)	Top-5 acc 63.672 (67.894)	lr 0.02324
Train [21][1630/3239]	Time 0.231 (0.572)	Data Time 0.001 (0.020)	Loss 3.3291 (3.3606)	Entropy 1.65901 (1.66410)	Top-1 acc 45.703 (44.184)	Top-5 acc 69.141 (67.897)	lr 0.02324
Train [21][1640/3239]	Time 0.215 (0.571)	Data Time 0.001 (0.019)	Loss 3.4384 (3.3606)	Entropy 1.65892 (1.66407)	Top-1 acc 41.406 (44.185)	Top-5 acc 66.797 (67.898)	lr 0.02324
Train [21][1650/3239]	Time 0.225 (0.570)	Data Time 0.001 (0.019)	Loss 3.7702 (3.3609)	Entropy 1.65872 (1.66403)	Top-1 acc 36.719 (44.179)	Top-5 acc 62.109 (67.889)	lr 0.02324
Train [21][1660/3239]	Time 2.254 (0.569)	Data Time 0.001 (0.019)	Loss 3.3420 (3.3607)	Entropy 1.65872 (1.66400)	Top-1 acc 44.141 (44.184)	Top-5 acc 69.141 (67.890)	lr 0.02324
Train [21][1670/3239]	Time 0.254 (0.567)	Data Time 0.001 (0.019)	Loss 3.3448 (3.3610)	Entropy 1.65871 (1.66397)	Top-1 acc 44.141 (44.177)	Top-5 acc 66.797 (67.886)	lr 0.02324
Train [21][1680/3239]	Time 0.228 (0.567)	Data Time 0.001 (0.019)	Loss 3.1479 (3.3609)	Entropy 1.65871 (1.66394)	Top-1 acc 51.562 (44.173)	Top-5 acc 69.922 (67.886)	lr 0.02324
Train [21][1690/3239]	Time 0.235 (0.566)	Data Time 0.001 (0.019)	Loss 3.3240 (3.3608)	Entropy 1.65841 (1.66391)	Top-1 acc 46.875 (44.176)	Top-5 acc 67.188 (67.888)	lr 0.02324
Train [21][1700/3239]	Time 0.193 (0.565)	Data Time 0.001 (0.019)	Loss 3.4507 (3.3611)	Entropy 1.65837 (1.66388)	Top-1 acc 41.797 (44.169)	Top-5 acc 65.625 (67.881)	lr 0.02324
Train [21][1710/3239]	Time 0.213 (0.564)	Data Time 0.001 (0.019)	Loss 3.4280 (3.3612)	Entropy 1.65833 (1.66384)	Top-1 acc 44.141 (44.170)	Top-5 acc 67.578 (67.878)	lr 0.02324
Train [21][1720/3239]	Time 0.225 (0.563)	Data Time 0.001 (0.019)	Loss 3.3949 (3.3613)	Entropy 1.65829 (1.66381)	Top-1 acc 44.531 (44.168)	Top-5 acc 65.625 (67.870)	lr 0.02324
Train [21][1730/3239]	Time 0.231 (0.563)	Data Time 0.001 (0.019)	Loss 3.4873 (3.3613)	Entropy 1.65813 (1.66378)	Top-1 acc 40.234 (44.170)	Top-5 acc 67.578 (67.871)	lr 0.02324
Train [21][1740/3239]	Time 0.321 (0.562)	Data Time 0.001 (0.018)	Loss 3.4992 (3.3612)	Entropy 1.65806 (1.66375)	Top-1 acc 42.578 (44.179)	Top-5 acc 64.844 (67.876)	lr 0.02324
Train [21][1750/3239]	Time 0.227 (0.561)	Data Time 0.001 (0.018)	Loss 3.3341 (3.3615)	Entropy 1.65800 (1.66371)	Top-1 acc 43.359 (44.173)	Top-5 acc 68.359 (67.868)	lr 0.02324
Train [21][1760/3239]	Time 0.228 (0.561)	Data Time 0.001 (0.018)	Loss 3.3980 (3.3617)	Entropy 1.65791 (1.66368)	Top-1 acc 42.578 (44.163)	Top-5 acc 67.578 (67.862)	lr 0.02324
Train [21][1770/3239]	Time 2.364 (0.560)	Data Time 0.001 (0.018)	Loss 3.4982 (3.3617)	Entropy 1.65791 (1.66365)	Top-1 acc 41.797 (44.165)	Top-5 acc 66.016 (67.869)	lr 0.02323
Train [21][1780/3239]	Time 0.220 (0.558)	Data Time 0.014 (0.018)	Loss 3.3168 (3.3617)	Entropy 1.65785 (1.66362)	Top-1 acc 48.047 (44.166)	Top-5 acc 70.312 (67.868)	lr 0.02323
Train [21][1790/3239]	Time 0.229 (0.557)	Data Time 0.001 (0.018)	Loss 3.2925 (3.3618)	Entropy 1.65771 (1.66358)	Top-1 acc 44.922 (44.160)	Top-5 acc 68.359 (67.868)	lr 0.02323
Train [21][1800/3239]	Time 0.214 (0.557)	Data Time 0.001 (0.018)	Loss 3.4324 (3.3618)	Entropy 1.65766 (1.66355)	Top-1 acc 41.016 (44.164)	Top-5 acc 62.109 (67.864)	lr 0.02323
Train [21][1810/3239]	Time 0.295 (0.556)	Data Time 0.001 (0.018)	Loss 3.2127 (3.3620)	Entropy 1.65752 (1.66352)	Top-1 acc 48.438 (44.159)	Top-5 acc 71.875 (67.858)	lr 0.02323
Train [21][1820/3239]	Time 0.229 (0.556)	Data Time 0.001 (0.018)	Loss 3.2744 (3.3620)	Entropy 1.65742 (1.66348)	Top-1 acc 44.531 (44.158)	Top-5 acc 69.922 (67.861)	lr 0.02323
Train [21][1830/3239]	Time 0.206 (0.555)	Data Time 0.001 (0.018)	Loss 3.3383 (3.3617)	Entropy 1.65729 (1.66345)	Top-1 acc 44.531 (44.162)	Top-5 acc 69.922 (67.869)	lr 0.02323
Train [21][1840/3239]	Time 0.263 (0.554)	Data Time 0.001 (0.018)	Loss 3.3394 (3.3616)	Entropy 1.65722 (1.66342)	Top-1 acc 46.875 (44.165)	Top-5 acc 69.531 (67.870)	lr 0.02323
Train [21][1850/3239]	Time 0.154 (0.554)	Data Time 0.001 (0.017)	Loss 3.2917 (3.3619)	Entropy 1.65717 (1.66338)	Top-1 acc 46.094 (44.164)	Top-5 acc 68.359 (67.866)	lr 0.02323
Train [21][1860/3239]	Time 0.244 (0.553)	Data Time 0.001 (0.017)	Loss 3.3406 (3.3616)	Entropy 1.65721 (1.66335)	Top-1 acc 44.922 (44.168)	Top-5 acc 69.531 (67.868)	lr 0.02323
Train [21][1870/3239]	Time 0.323 (0.553)	Data Time 0.001 (0.017)	Loss 3.2102 (3.3618)	Entropy 1.65707 (1.66332)	Top-1 acc 49.219 (44.172)	Top-5 acc 69.531 (67.862)	lr 0.02323
Train [21][1880/3239]	Time 2.292 (0.552)	Data Time 0.001 (0.017)	Loss 3.4670 (3.3618)	Entropy 1.65707 (1.66328)	Top-1 acc 40.234 (44.163)	Top-5 acc 69.141 (67.860)	lr 0.02323
Train [21][1890/3239]	Time 0.242 (0.550)	Data Time 0.001 (0.017)	Loss 3.4959 (3.3617)	Entropy 1.65697 (1.66325)	Top-1 acc 45.312 (44.165)	Top-5 acc 66.016 (67.860)	lr 0.02323
Train [21][1900/3239]	Time 0.209 (0.550)	Data Time 0.001 (0.017)	Loss 3.5402 (3.3616)	Entropy 1.65688 (1.66322)	Top-1 acc 41.016 (44.169)	Top-5 acc 62.500 (67.857)	lr 0.02323
Train [21][1910/3239]	Time 0.241 (0.549)	Data Time 0.001 (0.017)	Loss 3.4273 (3.3616)	Entropy 1.65687 (1.66318)	Top-1 acc 46.094 (44.170)	Top-5 acc 69.531 (67.860)	lr 0.02323
Train [21][1920/3239]	Time 0.203 (0.549)	Data Time 0.001 (0.017)	Loss 3.3000 (3.3615)	Entropy 1.65686 (1.66315)	Top-1 acc 46.094 (44.171)	Top-5 acc 67.969 (67.864)	lr 0.02323
Train [21][1930/3239]	Time 0.257 (0.569)	Data Time 0.003 (0.017)	Loss 3.3201 (3.3613)	Entropy 1.65682 (1.66312)	Top-1 acc 45.312 (44.171)	Top-5 acc 70.312 (67.872)	lr 0.02323
Train [21][1940/3239]	Time 0.336 (0.569)	Data Time 0.001 (0.017)	Loss 3.4475 (3.3615)	Entropy 1.65677 (1.66309)	Top-1 acc 42.188 (44.168)	Top-5 acc 64.844 (67.868)	lr 0.02323
Train [21][1950/3239]	Time 0.210 (0.568)	Data Time 0.001 (0.017)	Loss 3.5095 (3.3616)	Entropy 1.65670 (1.66305)	Top-1 acc 44.531 (44.173)	Top-5 acc 66.797 (67.867)	lr 0.02323
Train [21][1960/3239]	Time 0.204 (0.568)	Data Time 0.001 (0.017)	Loss 3.7487 (3.3612)	Entropy 1.65667 (1.66302)	Top-1 acc 32.031 (44.178)	Top-5 acc 60.938 (67.877)	lr 0.02322
Train [21][1970/3239]	Time 0.203 (0.567)	Data Time 0.002 (0.017)	Loss 3.3413 (3.3613)	Entropy 1.65663 (1.66299)	Top-1 acc 42.188 (44.174)	Top-5 acc 67.969 (67.874)	lr 0.02322
Train [21][1980/3239]	Time 0.227 (0.566)	Data Time 0.001 (0.016)	Loss 3.4181 (3.3613)	Entropy 1.65656 (1.66296)	Top-1 acc 40.234 (44.177)	Top-5 acc 66.797 (67.875)	lr 0.02322
Train [21][1990/3239]	Time 2.363 (0.566)	Data Time 0.001 (0.016)	Loss 3.2049 (3.3612)	Entropy 1.65656 (1.66292)	Top-1 acc 50.000 (44.179)	Top-5 acc 71.875 (67.876)	lr 0.02322
Train [21][2000/3239]	Time 0.341 (0.564)	Data Time 0.001 (0.016)	Loss 3.4141 (3.3613)	Entropy 1.65650 (1.66289)	Top-1 acc 43.750 (44.178)	Top-5 acc 66.016 (67.869)	lr 0.02322
Train [21][2010/3239]	Time 0.214 (0.563)	Data Time 0.001 (0.016)	Loss 3.2092 (3.3614)	Entropy 1.65647 (1.66286)	Top-1 acc 45.312 (44.174)	Top-5 acc 71.094 (67.868)	lr 0.02322
Train [21][2020/3239]	Time 0.222 (0.563)	Data Time 0.001 (0.016)	Loss 3.4018 (3.3612)	Entropy 1.65637 (1.66283)	Top-1 acc 42.969 (44.179)	Top-5 acc 69.531 (67.877)	lr 0.02322
Train [21][2030/3239]	Time 0.224 (0.562)	Data Time 0.001 (0.016)	Loss 3.3018 (3.3609)	Entropy 1.65632 (1.66280)	Top-1 acc 44.922 (44.180)	Top-5 acc 70.703 (67.884)	lr 0.02322
Train [21][2040/3239]	Time 0.199 (0.561)	Data Time 0.001 (0.016)	Loss 3.4161 (3.3610)	Entropy 1.65623 (1.66276)	Top-1 acc 46.484 (44.174)	Top-5 acc 67.578 (67.889)	lr 0.02322
Train [21][2050/3239]	Time 0.209 (0.561)	Data Time 0.002 (0.016)	Loss 3.8190 (3.3615)	Entropy 1.65618 (1.66273)	Top-1 acc 33.594 (44.162)	Top-5 acc 60.938 (67.884)	lr 0.02322
Train [21][2060/3239]	Time 0.210 (0.560)	Data Time 0.001 (0.016)	Loss 3.6015 (3.3618)	Entropy 1.65612 (1.66270)	Top-1 acc 39.453 (44.161)	Top-5 acc 64.453 (67.872)	lr 0.02322
Train [21][2070/3239]	Time 0.216 (0.560)	Data Time 0.001 (0.016)	Loss 3.3502 (3.3616)	Entropy 1.65610 (1.66267)	Top-1 acc 45.703 (44.174)	Top-5 acc 68.359 (67.875)	lr 0.02322
Train [21][2080/3239]	Time 0.208 (0.559)	Data Time 0.001 (0.016)	Loss 3.2655 (3.3618)	Entropy 1.65603 (1.66264)	Top-1 acc 44.922 (44.173)	Top-5 acc 70.703 (67.872)	lr 0.02322
Train [21][2090/3239]	Time 0.284 (0.559)	Data Time 0.001 (0.016)	Loss 3.5607 (3.3617)	Entropy 1.65591 (1.66260)	Top-1 acc 39.062 (44.173)	Top-5 acc 63.672 (67.875)	lr 0.02322
Train [21][2100/3239]	Time 2.266 (0.558)	Data Time 0.001 (0.016)	Loss 3.3440 (3.3614)	Entropy 1.65591 (1.66257)	Top-1 acc 42.969 (44.177)	Top-5 acc 69.922 (67.882)	lr 0.02322
Train [21][2110/3239]	Time 0.231 (0.556)	Data Time 0.001 (0.016)	Loss 3.2324 (3.3610)	Entropy 1.65589 (1.66254)	Top-1 acc 46.875 (44.185)	Top-5 acc 68.359 (67.889)	lr 0.02322
Train [21][2120/3239]	Time 0.308 (0.556)	Data Time 0.001 (0.016)	Loss 3.5120 (3.3612)	Entropy 1.65572 (1.66251)	Top-1 acc 39.062 (44.182)	Top-5 acc 63.672 (67.886)	lr 0.02322
Train [21][2130/3239]	Time 0.229 (0.555)	Data Time 0.001 (0.015)	Loss 3.1289 (3.3608)	Entropy 1.65560 (1.66248)	Top-1 acc 46.875 (44.182)	Top-5 acc 71.484 (67.889)	lr 0.02322
Train [21][2140/3239]	Time 0.203 (0.555)	Data Time 0.001 (0.015)	Loss 3.4254 (3.3608)	Entropy 1.65554 (1.66244)	Top-1 acc 43.750 (44.183)	Top-5 acc 67.969 (67.887)	lr 0.02322
Train [21][2150/3239]	Time 0.200 (0.554)	Data Time 0.001 (0.015)	Loss 3.5183 (3.3607)	Entropy 1.65542 (1.66241)	Top-1 acc 41.016 (44.186)	Top-5 acc 62.500 (67.890)	lr 0.02322
Train [21][2160/3239]	Time 0.225 (0.554)	Data Time 0.001 (0.015)	Loss 3.2590 (3.3608)	Entropy 1.65537 (1.66238)	Top-1 acc 47.656 (44.186)	Top-5 acc 68.750 (67.886)	lr 0.02321
Train [21][2170/3239]	Time 0.207 (0.553)	Data Time 0.001 (0.015)	Loss 3.4468 (3.3609)	Entropy 1.65527 (1.66235)	Top-1 acc 41.797 (44.185)	Top-5 acc 64.453 (67.887)	lr 0.02321
Train [21][2180/3239]	Time 0.223 (0.553)	Data Time 0.001 (0.015)	Loss 3.3762 (3.3606)	Entropy 1.65520 (1.66231)	Top-1 acc 48.047 (44.193)	Top-5 acc 67.969 (67.891)	lr 0.02321
Train [21][2190/3239]	Time 0.423 (0.552)	Data Time 0.001 (0.015)	Loss 3.1758 (3.3603)	Entropy 1.65518 (1.66228)	Top-1 acc 49.219 (44.199)	Top-5 acc 69.531 (67.895)	lr 0.02321
Train [21][2200/3239]	Time 0.206 (0.552)	Data Time 0.001 (0.015)	Loss 3.5223 (3.3604)	Entropy 1.65499 (1.66225)	Top-1 acc 40.625 (44.194)	Top-5 acc 64.062 (67.889)	lr 0.02321
Train [21][2210/3239]	Time 2.341 (0.551)	Data Time 0.002 (0.015)	Loss 3.3770 (3.3603)	Entropy 1.65499 (1.66222)	Top-1 acc 47.266 (44.194)	Top-5 acc 69.141 (67.895)	lr 0.02321
Train [21][2220/3239]	Time 0.208 (0.550)	Data Time 0.001 (0.015)	Loss 3.4603 (3.3601)	Entropy 1.65500 (1.66218)	Top-1 acc 41.797 (44.194)	Top-5 acc 65.625 (67.901)	lr 0.02321
Train [21][2230/3239]	Time 0.236 (0.549)	Data Time 0.001 (0.015)	Loss 3.3982 (3.3601)	Entropy 1.65488 (1.66215)	Top-1 acc 41.016 (44.196)	Top-5 acc 66.016 (67.900)	lr 0.02321
Train [21][2240/3239]	Time 0.229 (0.549)	Data Time 0.001 (0.015)	Loss 3.4574 (3.3603)	Entropy 1.65485 (1.66212)	Top-1 acc 41.016 (44.193)	Top-5 acc 66.016 (67.893)	lr 0.02321
Train [21][2250/3239]	Time 0.318 (0.548)	Data Time 0.001 (0.015)	Loss 3.1178 (3.3602)	Entropy 1.65477 (1.66209)	Top-1 acc 49.609 (44.197)	Top-5 acc 73.438 (67.894)	lr 0.02321
Train [21][2260/3239]	Time 0.211 (0.548)	Data Time 0.001 (0.015)	Loss 3.2597 (3.3601)	Entropy 1.65465 (1.66205)	Top-1 acc 42.188 (44.205)	Top-5 acc 70.312 (67.899)	lr 0.02321
Train [21][2270/3239]	Time 0.223 (0.548)	Data Time 0.001 (0.015)	Loss 3.3965 (3.3599)	Entropy 1.65466 (1.66202)	Top-1 acc 42.188 (44.204)	Top-5 acc 67.578 (67.902)	lr 0.02321
Train [21][2280/3239]	Time 0.220 (0.547)	Data Time 0.002 (0.015)	Loss 3.6635 (3.3601)	Entropy 1.65462 (1.66199)	Top-1 acc 36.719 (44.201)	Top-5 acc 62.109 (67.897)	lr 0.02321
Train [21][2290/3239]	Time 0.217 (0.566)	Data Time 0.002 (0.014)	Loss 3.2017 (3.3600)	Entropy 1.65450 (1.66196)	Top-1 acc 47.266 (44.201)	Top-5 acc 70.312 (67.898)	lr 0.02321
Train [21][2300/3239]	Time 0.255 (0.566)	Data Time 0.002 (0.014)	Loss 3.1383 (3.3598)	Entropy 1.65448 (1.66192)	Top-1 acc 48.047 (44.202)	Top-5 acc 74.219 (67.902)	lr 0.02321
Train [21][2310/3239]	Time 0.374 (0.565)	Data Time 0.002 (0.014)	Loss 3.2070 (3.3596)	Entropy 1.65429 (1.66189)	Top-1 acc 46.875 (44.202)	Top-5 acc 71.094 (67.902)	lr 0.02321
Train [21][2320/3239]	Time 2.300 (0.565)	Data Time 0.001 (0.014)	Loss 3.4419 (3.3596)	Entropy 1.65429 (1.66186)	Top-1 acc 43.750 (44.205)	Top-5 acc 66.797 (67.901)	lr 0.02321
Train [21][2330/3239]	Time 0.249 (0.563)	Data Time 0.002 (0.014)	Loss 3.3320 (3.3596)	Entropy 1.65425 (1.66183)	Top-1 acc 45.703 (44.204)	Top-5 acc 67.969 (67.903)	lr 0.02321
Train [21][2340/3239]	Time 0.218 (0.563)	Data Time 0.001 (0.014)	Loss 3.2205 (3.3597)	Entropy 1.65416 (1.66179)	Top-1 acc 46.484 (44.200)	Top-5 acc 67.578 (67.902)	lr 0.02321
Train [21][2350/3239]	Time 0.207 (0.562)	Data Time 0.002 (0.014)	Loss 3.2367 (3.3595)	Entropy 1.65411 (1.66176)	Top-1 acc 47.656 (44.209)	Top-5 acc 71.875 (67.905)	lr 0.02320
Train [21][2360/3239]	Time 0.209 (0.562)	Data Time 0.001 (0.014)	Loss 3.4895 (3.3595)	Entropy 1.65406 (1.66173)	Top-1 acc 42.578 (44.210)	Top-5 acc 65.625 (67.907)	lr 0.02320
Train [21][2370/3239]	Time 0.240 (0.561)	Data Time 0.001 (0.014)	Loss 3.3712 (3.3595)	Entropy 1.65400 (1.66170)	Top-1 acc 42.969 (44.215)	Top-5 acc 66.406 (67.910)	lr 0.02320
Train [21][2380/3239]	Time 0.143 (0.561)	Data Time 0.001 (0.014)	Loss 3.3457 (3.3597)	Entropy 1.65390 (1.66166)	Top-1 acc 46.094 (44.210)	Top-5 acc 66.797 (67.904)	lr 0.02320
Train [21][2390/3239]	Time 0.163 (0.560)	Data Time 0.001 (0.014)	Loss 3.5336 (3.3597)	Entropy 1.65398 (1.66163)	Top-1 acc 41.016 (44.213)	Top-5 acc 67.188 (67.905)	lr 0.02320
Train [21][2400/3239]	Time 0.195 (0.560)	Data Time 0.001 (0.014)	Loss 3.3665 (3.3598)	Entropy 1.65396 (1.66160)	Top-1 acc 47.266 (44.214)	Top-5 acc 69.141 (67.902)	lr 0.02320
Train [21][2410/3239]	Time 0.225 (0.559)	Data Time 0.001 (0.014)	Loss 3.2310 (3.3598)	Entropy 1.65383 (1.66157)	Top-1 acc 48.047 (44.218)	Top-5 acc 68.359 (67.901)	lr 0.02320
Train [21][2420/3239]	Time 0.215 (0.558)	Data Time 0.001 (0.014)	Loss 3.2363 (3.3597)	Entropy 1.65365 (1.66154)	Top-1 acc 48.047 (44.220)	Top-5 acc 73.047 (67.905)	lr 0.02320
Train [21][2430/3239]	Time 2.304 (0.558)	Data Time 0.001 (0.014)	Loss 3.2819 (3.3597)	Entropy 1.65365 (1.66150)	Top-1 acc 48.438 (44.218)	Top-5 acc 67.969 (67.903)	lr 0.02320
Train [21][2440/3239]	Time 0.301 (0.557)	Data Time 0.001 (0.014)	Loss 3.4381 (3.3596)	Entropy 1.65372 (1.66147)	Top-1 acc 46.484 (44.221)	Top-5 acc 67.969 (67.908)	lr 0.02320
Train [21][2450/3239]	Time 0.206 (0.556)	Data Time 0.001 (0.014)	Loss 3.3238 (3.3595)	Entropy 1.65363 (1.66144)	Top-1 acc 46.094 (44.228)	Top-5 acc 66.797 (67.910)	lr 0.02320
Train [21][2460/3239]	Time 0.221 (0.556)	Data Time 0.001 (0.014)	Loss 3.2441 (3.3594)	Entropy 1.65354 (1.66141)	Top-1 acc 44.922 (44.231)	Top-5 acc 69.141 (67.910)	lr 0.02320
Train [21][2470/3239]	Time 0.219 (0.555)	Data Time 0.001 (0.014)	Loss 3.3280 (3.3593)	Entropy 1.65343 (1.66138)	Top-1 acc 41.406 (44.233)	Top-5 acc 65.234 (67.915)	lr 0.02320
Train [21][2480/3239]	Time 0.219 (0.555)	Data Time 0.001 (0.014)	Loss 3.2192 (3.3593)	Entropy 1.65341 (1.66134)	Top-1 acc 47.266 (44.234)	Top-5 acc 71.484 (67.915)	lr 0.02320
Train [21][2490/3239]	Time 0.238 (0.554)	Data Time 0.001 (0.013)	Loss 3.1561 (3.3593)	Entropy 1.65325 (1.66131)	Top-1 acc 49.609 (44.237)	Top-5 acc 70.703 (67.911)	lr 0.02320
Train [21][2500/3239]	Time 0.200 (0.554)	Data Time 0.001 (0.013)	Loss 3.3583 (3.3590)	Entropy 1.65323 (1.66128)	Top-1 acc 48.047 (44.242)	Top-5 acc 67.578 (67.916)	lr 0.02320
Train [21][2510/3239]	Time 0.241 (0.553)	Data Time 0.001 (0.013)	Loss 3.2952 (3.3590)	Entropy 1.65318 (1.66125)	Top-1 acc 45.703 (44.242)	Top-5 acc 68.750 (67.918)	lr 0.02320
Train [21][2520/3239]	Time 0.211 (0.553)	Data Time 0.001 (0.013)	Loss 3.2377 (3.3589)	Entropy 1.65312 (1.66121)	Top-1 acc 51.172 (44.244)	Top-5 acc 69.141 (67.920)	lr 0.02320
Train [21][2530/3239]	Time 0.250 (0.552)	Data Time 0.001 (0.013)	Loss 3.4939 (3.3589)	Entropy 1.65294 (1.66118)	Top-1 acc 38.281 (44.241)	Top-5 acc 66.016 (67.920)	lr 0.02320
Train [21][2540/3239]	Time 2.245 (0.552)	Data Time 0.001 (0.013)	Loss 3.1844 (3.3588)	Entropy 1.65294 (1.66115)	Top-1 acc 49.219 (44.245)	Top-5 acc 70.312 (67.916)	lr 0.02319
Train [21][2550/3239]	Time 0.209 (0.551)	Data Time 0.001 (0.013)	Loss 3.5514 (3.3589)	Entropy 1.65291 (1.66112)	Top-1 acc 46.094 (44.247)	Top-5 acc 63.281 (67.919)	lr 0.02319
Train [21][2560/3239]	Time 0.256 (0.550)	Data Time 0.002 (0.013)	Loss 3.2458 (3.3588)	Entropy 1.65290 (1.66109)	Top-1 acc 49.219 (44.255)	Top-5 acc 66.016 (67.919)	lr 0.02319
Train [21][2570/3239]	Time 0.324 (0.550)	Data Time 0.001 (0.013)	Loss 3.3487 (3.3590)	Entropy 1.65261 (1.66105)	Top-1 acc 44.922 (44.251)	Top-5 acc 66.016 (67.913)	lr 0.02319
Train [21][2580/3239]	Time 0.229 (0.549)	Data Time 0.001 (0.013)	Loss 3.1214 (3.3591)	Entropy 1.65253 (1.66102)	Top-1 acc 49.219 (44.249)	Top-5 acc 69.141 (67.906)	lr 0.02319
Train [21][2590/3239]	Time 0.220 (0.549)	Data Time 0.002 (0.013)	Loss 3.4061 (3.3588)	Entropy 1.65252 (1.66099)	Top-1 acc 45.703 (44.257)	Top-5 acc 67.188 (67.911)	lr 0.02319
Train [21][2600/3239]	Time 0.221 (0.549)	Data Time 0.001 (0.013)	Loss 3.4070 (3.3586)	Entropy 1.65245 (1.66095)	Top-1 acc 38.672 (44.256)	Top-5 acc 67.578 (67.911)	lr 0.02319
Train [21][2610/3239]	Time 0.211 (0.548)	Data Time 0.001 (0.013)	Loss 3.3719 (3.3585)	Entropy 1.65231 (1.66092)	Top-1 acc 46.875 (44.262)	Top-5 acc 67.578 (67.913)	lr 0.02319
Train [21][2620/3239]	Time 0.219 (0.548)	Data Time 0.001 (0.013)	Loss 3.4334 (3.3587)	Entropy 1.65221 (1.66089)	Top-1 acc 39.844 (44.258)	Top-5 acc 64.062 (67.909)	lr 0.02319
Train [21][2630/3239]	Time 0.212 (0.547)	Data Time 0.001 (0.013)	Loss 3.2027 (3.3588)	Entropy 1.65215 (1.66086)	Top-1 acc 44.922 (44.258)	Top-5 acc 72.656 (67.905)	lr 0.02319
Train [21][2640/3239]	Time 0.363 (0.547)	Data Time 0.001 (0.013)	Loss 3.2927 (3.3588)	Entropy 1.65209 (1.66082)	Top-1 acc 44.141 (44.257)	Top-5 acc 69.531 (67.905)	lr 0.02319
Train [21][2650/3239]	Time 0.424 (0.562)	Data Time 0.004 (0.013)	Loss 3.4990 (3.3590)	Entropy 1.65199 (1.66079)	Top-1 acc 38.672 (44.253)	Top-5 acc 61.719 (67.897)	lr 0.02319
Train [21][2660/3239]	Time 0.242 (0.562)	Data Time 0.002 (0.013)	Loss 3.2689 (3.3588)	Entropy 1.65193 (1.66076)	Top-1 acc 45.312 (44.257)	Top-5 acc 68.359 (67.905)	lr 0.02319
Train [21][2670/3239]	Time 0.230 (0.561)	Data Time 0.002 (0.013)	Loss 3.3071 (3.3588)	Entropy 1.65191 (1.66072)	Top-1 acc 46.875 (44.256)	Top-5 acc 67.969 (67.907)	lr 0.02319
Train [21][2680/3239]	Time 0.235 (0.561)	Data Time 0.001 (0.013)	Loss 3.3955 (3.3584)	Entropy 1.65187 (1.66069)	Top-1 acc 38.672 (44.263)	Top-5 acc 67.578 (67.915)	lr 0.02319
Train [21][2690/3239]	Time 0.225 (0.560)	Data Time 0.001 (0.013)	Loss 3.4417 (3.3585)	Entropy 1.65179 (1.66066)	Top-1 acc 42.969 (44.259)	Top-5 acc 67.578 (67.913)	lr 0.02319
Train [21][2700/3239]	Time 0.302 (0.560)	Data Time 0.001 (0.013)	Loss 3.2380 (3.3584)	Entropy 1.65173 (1.66063)	Top-1 acc 45.312 (44.264)	Top-5 acc 70.312 (67.919)	lr 0.02319
Train [21][2710/3239]	Time 0.220 (0.559)	Data Time 0.001 (0.013)	Loss 3.6999 (3.3584)	Entropy 1.65164 (1.66059)	Top-1 acc 36.719 (44.267)	Top-5 acc 62.500 (67.919)	lr 0.02319
Train [21][2720/3239]	Time 0.237 (0.559)	Data Time 0.001 (0.012)	Loss 3.2666 (3.3581)	Entropy 1.65158 (1.66056)	Top-1 acc 48.828 (44.269)	Top-5 acc 71.484 (67.928)	lr 0.02319
Train [21][2730/3239]	Time 0.280 (0.558)	Data Time 0.001 (0.012)	Loss 3.5312 (3.3582)	Entropy 1.65161 (1.66053)	Top-1 acc 46.094 (44.266)	Top-5 acc 63.672 (67.924)	lr 0.02318
Train [21][2740/3239]	Time 0.243 (0.558)	Data Time 0.001 (0.012)	Loss 3.6296 (3.3583)	Entropy 1.65162 (1.66049)	Top-1 acc 32.422 (44.259)	Top-5 acc 58.203 (67.921)	lr 0.02318
Train [21][2750/3239]	Time 0.238 (0.558)	Data Time 0.001 (0.012)	Loss 3.4002 (3.3583)	Entropy 1.65157 (1.66046)	Top-1 acc 42.578 (44.258)	Top-5 acc 66.406 (67.923)	lr 0.02318
Train [21][2760/3239]	Time 0.332 (0.557)	Data Time 0.001 (0.012)	Loss 3.3093 (3.3582)	Entropy 1.65156 (1.66043)	Top-1 acc 43.359 (44.259)	Top-5 acc 69.141 (67.923)	lr 0.02318
Train [21][2770/3239]	Time 0.199 (0.557)	Data Time 0.001 (0.012)	Loss 3.5620 (3.3584)	Entropy 1.65151 (1.66040)	Top-1 acc 37.109 (44.248)	Top-5 acc 65.625 (67.923)	lr 0.02318
Train [21][2780/3239]	Time 0.234 (0.556)	Data Time 0.001 (0.012)	Loss 3.5527 (3.3584)	Entropy 1.65147 (1.66037)	Top-1 acc 42.188 (44.250)	Top-5 acc 64.844 (67.925)	lr 0.02318
Train [21][2790/3239]	Time 0.208 (0.556)	Data Time 0.001 (0.012)	Loss 3.1433 (3.3584)	Entropy 1.65143 (1.66033)	Top-1 acc 50.000 (44.253)	Top-5 acc 73.047 (67.925)	lr 0.02318
Train [21][2800/3239]	Time 0.233 (0.555)	Data Time 0.001 (0.012)	Loss 3.2153 (3.3583)	Entropy 1.65145 (1.66030)	Top-1 acc 45.703 (44.253)	Top-5 acc 71.875 (67.926)	lr 0.02318
Train [21][2810/3239]	Time 0.193 (0.555)	Data Time 0.001 (0.012)	Loss 3.3356 (3.3582)	Entropy 1.65141 (1.66027)	Top-1 acc 42.578 (44.255)	Top-5 acc 70.312 (67.928)	lr 0.02318
Train [21][2820/3239]	Time 0.291 (0.555)	Data Time 0.001 (0.012)	Loss 3.3298 (3.3581)	Entropy 1.65135 (1.66024)	Top-1 acc 44.531 (44.256)	Top-5 acc 68.359 (67.925)	lr 0.02318
Train [21][2830/3239]	Time 0.263 (0.554)	Data Time 0.001 (0.012)	Loss 3.0333 (3.3580)	Entropy 1.65131 (1.66021)	Top-1 acc 51.562 (44.260)	Top-5 acc 74.219 (67.930)	lr 0.02318
Train [21][2840/3239]	Time 0.281 (0.554)	Data Time 0.001 (0.012)	Loss 3.2159 (3.3579)	Entropy 1.65128 (1.66018)	Top-1 acc 45.312 (44.258)	Top-5 acc 69.141 (67.929)	lr 0.02318
Train [21][2850/3239]	Time 0.236 (0.553)	Data Time 0.001 (0.012)	Loss 3.4471 (3.3576)	Entropy 1.65125 (1.66014)	Top-1 acc 43.750 (44.267)	Top-5 acc 65.234 (67.937)	lr 0.02318
Train [21][2860/3239]	Time 0.210 (0.553)	Data Time 0.001 (0.012)	Loss 3.2707 (3.3574)	Entropy 1.65115 (1.66011)	Top-1 acc 45.703 (44.273)	Top-5 acc 69.922 (67.940)	lr 0.02318
Train [21][2870/3239]	Time 0.171 (0.553)	Data Time 0.001 (0.012)	Loss 3.3695 (3.3573)	Entropy 1.65111 (1.66008)	Top-1 acc 48.047 (44.276)	Top-5 acc 70.312 (67.942)	lr 0.02318
Train [21][2880/3239]	Time 0.269 (0.552)	Data Time 0.001 (0.012)	Loss 3.3050 (3.3574)	Entropy 1.65111 (1.66005)	Top-1 acc 42.188 (44.269)	Top-5 acc 71.094 (67.943)	lr 0.02318
Train [21][2890/3239]	Time 0.198 (0.552)	Data Time 0.001 (0.012)	Loss 3.2567 (3.3574)	Entropy 1.65077 (1.66002)	Top-1 acc 48.047 (44.270)	Top-5 acc 68.359 (67.942)	lr 0.02318
Train [21][2900/3239]	Time 0.210 (0.551)	Data Time 0.001 (0.012)	Loss 3.2224 (3.3572)	Entropy 1.65070 (1.65999)	Top-1 acc 47.656 (44.273)	Top-5 acc 71.094 (67.945)	lr 0.02318
Train [21][2910/3239]	Time 0.206 (0.551)	Data Time 0.001 (0.012)	Loss 3.3499 (3.3570)	Entropy 1.65062 (1.65996)	Top-1 acc 45.312 (44.275)	Top-5 acc 68.359 (67.948)	lr 0.02318
Train [21][2920/3239]	Time 0.160 (0.551)	Data Time 0.001 (0.012)	Loss 3.3760 (3.3569)	Entropy 1.65055 (1.65992)	Top-1 acc 43.750 (44.276)	Top-5 acc 67.969 (67.954)	lr 0.02317
Train [21][2930/3239]	Time 0.242 (0.550)	Data Time 0.001 (0.012)	Loss 3.5805 (3.3567)	Entropy 1.65049 (1.65989)	Top-1 acc 41.797 (44.277)	Top-5 acc 63.281 (67.957)	lr 0.02317
Train [21][2940/3239]	Time 0.234 (0.550)	Data Time 0.001 (0.012)	Loss 3.3894 (3.3569)	Entropy 1.65033 (1.65986)	Top-1 acc 43.750 (44.273)	Top-5 acc 66.406 (67.955)	lr 0.02317
Train [21][2950/3239]	Time 0.195 (0.549)	Data Time 0.001 (0.012)	Loss 3.2443 (3.3567)	Entropy 1.65020 (1.65983)	Top-1 acc 50.000 (44.278)	Top-5 acc 67.188 (67.957)	lr 0.02317
Train [21][2960/3239]	Time 0.217 (0.549)	Data Time 0.001 (0.012)	Loss 3.3674 (3.3566)	Entropy 1.65020 (1.65980)	Top-1 acc 44.531 (44.285)	Top-5 acc 70.312 (67.961)	lr 0.02317
Train [21][2970/3239]	Time 0.257 (0.549)	Data Time 0.002 (0.012)	Loss 3.3328 (3.3564)	Entropy 1.65004 (1.65976)	Top-1 acc 46.875 (44.293)	Top-5 acc 66.797 (67.966)	lr 0.02317
Train [21][2980/3239]	Time 0.312 (0.561)	Data Time 0.004 (0.012)	Loss 3.4115 (3.3565)	Entropy 1.64998 (1.65973)	Top-1 acc 43.750 (44.292)	Top-5 acc 66.797 (67.965)	lr 0.02317
Train [21][2990/3239]	Time 0.153 (0.561)	Data Time 0.002 (0.012)	Loss 3.1665 (3.3563)	Entropy 1.64989 (1.65970)	Top-1 acc 50.781 (44.293)	Top-5 acc 74.219 (67.970)	lr 0.02317
Train [21][3000/3239]	Time 0.189 (0.561)	Data Time 0.001 (0.012)	Loss 3.2700 (3.3563)	Entropy 1.64968 (1.65967)	Top-1 acc 44.922 (44.294)	Top-5 acc 68.359 (67.972)	lr 0.02317
Train [21][3010/3239]	Time 0.377 (0.560)	Data Time 0.001 (0.011)	Loss 3.4927 (3.3566)	Entropy 1.64960 (1.65963)	Top-1 acc 40.234 (44.287)	Top-5 acc 64.844 (67.965)	lr 0.02317
Train [21][3020/3239]	Time 0.216 (0.560)	Data Time 0.001 (0.011)	Loss 3.2309 (3.3564)	Entropy 1.64957 (1.65960)	Top-1 acc 48.438 (44.294)	Top-5 acc 71.875 (67.968)	lr 0.02317
Train [21][3030/3239]	Time 0.221 (0.560)	Data Time 0.001 (0.011)	Loss 3.3097 (3.3563)	Entropy 1.64948 (1.65957)	Top-1 acc 43.359 (44.296)	Top-5 acc 69.531 (67.971)	lr 0.02317
Train [21][3040/3239]	Time 0.212 (0.559)	Data Time 0.001 (0.011)	Loss 3.5661 (3.3562)	Entropy 1.64946 (1.65953)	Top-1 acc 39.453 (44.299)	Top-5 acc 63.281 (67.973)	lr 0.02317
Train [21][3050/3239]	Time 0.224 (0.559)	Data Time 0.001 (0.011)	Loss 3.1682 (3.3561)	Entropy 1.64938 (1.65950)	Top-1 acc 48.438 (44.303)	Top-5 acc 70.312 (67.975)	lr 0.02317
Train [21][3060/3239]	Time 0.186 (0.559)	Data Time 0.001 (0.011)	Loss 3.1717 (3.3560)	Entropy 1.64928 (1.65947)	Top-1 acc 48.828 (44.302)	Top-5 acc 70.703 (67.975)	lr 0.02317
Train [21][3070/3239]	Time 0.214 (0.558)	Data Time 0.002 (0.011)	Loss 3.1584 (3.3557)	Entropy 1.64923 (1.65943)	Top-1 acc 46.094 (44.310)	Top-5 acc 72.656 (67.982)	lr 0.02317
Train [21][3080/3239]	Time 0.250 (0.558)	Data Time 0.001 (0.011)	Loss 3.3961 (3.3556)	Entropy 1.64920 (1.65940)	Top-1 acc 44.141 (44.308)	Top-5 acc 69.141 (67.982)	lr 0.02317
Train [21][3090/3239]	Time 0.231 (0.557)	Data Time 0.001 (0.011)	Loss 3.4888 (3.3558)	Entropy 1.64914 (1.65937)	Top-1 acc 44.141 (44.304)	Top-5 acc 66.406 (67.979)	lr 0.02317
Train [21][3100/3239]	Time 0.196 (0.557)	Data Time 0.001 (0.011)	Loss 3.2558 (3.3557)	Entropy 1.64908 (1.65933)	Top-1 acc 43.750 (44.305)	Top-5 acc 69.141 (67.982)	lr 0.02317
Train [21][3110/3239]	Time 0.162 (0.557)	Data Time 0.001 (0.011)	Loss 3.3219 (3.3557)	Entropy 1.64898 (1.65930)	Top-1 acc 46.875 (44.305)	Top-5 acc 69.922 (67.982)	lr 0.02316
Train [21][3120/3239]	Time 0.236 (0.556)	Data Time 0.001 (0.011)	Loss 3.3575 (3.3555)	Entropy 1.64896 (1.65927)	Top-1 acc 44.141 (44.308)	Top-5 acc 66.406 (67.985)	lr 0.02316
Train [21][3130/3239]	Time 0.249 (0.556)	Data Time 0.001 (0.011)	Loss 3.1675 (3.3554)	Entropy 1.64890 (1.65923)	Top-1 acc 46.875 (44.312)	Top-5 acc 73.438 (67.989)	lr 0.02316
Train [21][3140/3239]	Time 0.195 (0.556)	Data Time 0.001 (0.011)	Loss 3.4421 (3.3556)	Entropy 1.64888 (1.65920)	Top-1 acc 43.359 (44.305)	Top-5 acc 64.062 (67.984)	lr 0.02316
Train [21][3150/3239]	Time 0.213 (0.555)	Data Time 0.001 (0.011)	Loss 3.5354 (3.3558)	Entropy 1.64895 (1.65917)	Top-1 acc 41.797 (44.300)	Top-5 acc 63.672 (67.977)	lr 0.02316
Train [21][3160/3239]	Time 0.225 (0.555)	Data Time 0.001 (0.011)	Loss 3.3206 (3.3557)	Entropy 1.64883 (1.65914)	Top-1 acc 45.312 (44.303)	Top-5 acc 67.969 (67.980)	lr 0.02316
Train [21][3170/3239]	Time 0.242 (0.555)	Data Time 0.001 (0.011)	Loss 3.1696 (3.3560)	Entropy 1.64877 (1.65910)	Top-1 acc 51.562 (44.296)	Top-5 acc 72.266 (67.971)	lr 0.02316
Train [21][3180/3239]	Time 0.222 (0.554)	Data Time 0.000 (0.011)	Loss 3.2383 (3.3559)	Entropy 1.64875 (1.65907)	Top-1 acc 49.219 (44.301)	Top-5 acc 70.312 (67.974)	lr 0.02316
Train [21][3190/3239]	Time 0.297 (0.554)	Data Time 0.000 (0.011)	Loss 3.4747 (3.3557)	Entropy 1.64865 (1.65904)	Top-1 acc 39.062 (44.306)	Top-5 acc 66.016 (67.978)	lr 0.02316
Train [21][3200/3239]	Time 0.237 (0.553)	Data Time 0.000 (0.011)	Loss 3.2474 (3.3555)	Entropy 1.64864 (1.65901)	Top-1 acc 44.531 (44.309)	Top-5 acc 71.484 (67.985)	lr 0.02316
Train [21][3210/3239]	Time 0.204 (0.553)	Data Time 0.000 (0.011)	Loss 3.1483 (3.3554)	Entropy 1.64857 (1.65897)	Top-1 acc 46.875 (44.309)	Top-5 acc 72.266 (67.989)	lr 0.02316
Train [21][3220/3239]	Time 0.156 (0.553)	Data Time 0.000 (0.011)	Loss 3.3244 (3.3553)	Entropy 1.64842 (1.65894)	Top-1 acc 44.922 (44.312)	Top-5 acc 73.438 (67.994)	lr 0.02316
Train [21][3230/3239]	Time 0.209 (0.552)	Data Time 0.000 (0.011)	Loss 3.4182 (3.3552)	Entropy 1.64834 (1.65891)	Top-1 acc 40.625 (44.313)	Top-5 acc 67.188 (67.996)	lr 0.02316
Train [21][3239/3239]	Time 2.119 (0.552)	Data Time 0.000 (0.011)	Loss 3.0749 (3.3551)	Entropy 1.64834 (1.65888)	Top-1 acc 49.383 (44.315)	Top-5 acc 76.543 (67.997)	lr 0.02316
==========Valid [21/120]	loss 2.144	top-1 acc 53.045 (53.045)	top-5 acc 76.641	Train top-1 44.315	top-5 67.997	Entropy 1.64834	Latency-None: 0.000ms	Flops: 558.37M
Train [22][0/3239]	Time 31.235 (31.235)	Data Time 29.618 (29.618)	Loss 3.3302 (3.3302)	Entropy 1.64833 (1.64833)	Top-1 acc 41.016 (41.016)	Top-5 acc 68.359 (68.359)	lr 0.02316
Train [22][10/3239]	Time 2.673 (3.358)	Data Time 0.002 (2.694)	Loss 3.5885 (3.3232)	Entropy 1.64833 (1.64833)	Top-1 acc 42.969 (45.170)	Top-5 acc 63.281 (68.075)	lr 0.02316
Train [22][20/3239]	Time 0.320 (1.870)	Data Time 0.001 (1.412)	Loss 3.2008 (3.3158)	Entropy 1.64828 (1.64831)	Top-1 acc 49.609 (44.829)	Top-5 acc 70.703 (68.229)	lr 0.02316
Train [22][30/3239]	Time 0.233 (1.411)	Data Time 0.001 (0.957)	Loss 3.3204 (3.3259)	Entropy 1.64819 (1.64827)	Top-1 acc 44.922 (44.745)	Top-5 acc 69.531 (68.208)	lr 0.02316
Train [22][40/3239]	Time 0.199 (1.172)	Data Time 0.001 (0.724)	Loss 3.5569 (3.3253)	Entropy 1.64814 (1.64824)	Top-1 acc 42.188 (44.836)	Top-5 acc 62.109 (68.331)	lr 0.02316
Train [22][50/3239]	Time 0.206 (1.027)	Data Time 0.001 (0.582)	Loss 3.2375 (3.3220)	Entropy 1.64804 (1.64821)	Top-1 acc 45.703 (44.723)	Top-5 acc 67.969 (68.482)	lr 0.02316
Train [22][60/3239]	Time 0.192 (0.930)	Data Time 0.001 (0.487)	Loss 3.4824 (3.3187)	Entropy 1.64793 (1.64817)	Top-1 acc 40.625 (44.871)	Top-5 acc 64.453 (68.532)	lr 0.02315
Train [22][70/3239]	Time 0.209 (0.858)	Data Time 0.001 (0.419)	Loss 3.1362 (3.3082)	Entropy 1.64788 (1.64813)	Top-1 acc 50.000 (45.164)	Top-5 acc 72.266 (68.706)	lr 0.02315
Train [22][80/3239]	Time 0.308 (1.361)	Data Time 0.002 (0.367)	Loss 3.1733 (3.3042)	Entropy 1.64774 (1.64810)	Top-1 acc 48.047 (45.288)	Top-5 acc 73.828 (68.890)	lr 0.02315
Train [22][90/3239]	Time 0.198 (1.262)	Data Time 0.002 (0.327)	Loss 3.3961 (3.3035)	Entropy 1.64772 (1.64806)	Top-1 acc 44.531 (45.244)	Top-5 acc 68.750 (68.969)	lr 0.02315
Train [22][100/3239]	Time 0.212 (1.181)	Data Time 0.001 (0.295)	Loss 3.4481 (3.3008)	Entropy 1.64756 (1.64802)	Top-1 acc 38.672 (45.289)	Top-5 acc 67.969 (69.114)	lr 0.02315
Train [22][110/3239]	Time 0.234 (1.114)	Data Time 0.001 (0.269)	Loss 3.2361 (3.3012)	Entropy 1.64740 (1.64798)	Top-1 acc 44.531 (45.298)	Top-5 acc 67.578 (69.091)	lr 0.02315
Train [22][120/3239]	Time 2.309 (1.058)	Data Time 0.001 (0.247)	Loss 3.2640 (3.3019)	Entropy 1.64740 (1.64793)	Top-1 acc 50.781 (45.293)	Top-5 acc 67.578 (69.050)	lr 0.02315
Train [22][130/3239]	Time 0.211 (0.994)	Data Time 0.001 (0.228)	Loss 3.5223 (3.3032)	Entropy 1.64723 (1.64788)	Top-1 acc 37.891 (45.277)	Top-5 acc 66.016 (69.060)	lr 0.02315
Train [22][140/3239]	Time 0.208 (0.954)	Data Time 0.001 (0.212)	Loss 3.2148 (3.3046)	Entropy 1.64718 (1.64783)	Top-1 acc 45.312 (45.185)	Top-5 acc 71.094 (69.102)	lr 0.02315
Train [22][150/3239]	Time 0.348 (0.919)	Data Time 0.001 (0.198)	Loss 3.2284 (3.3079)	Entropy 1.64705 (1.64778)	Top-1 acc 46.875 (45.090)	Top-5 acc 70.703 (69.040)	lr 0.02315
Train [22][160/3239]	Time 0.237 (0.889)	Data Time 0.001 (0.186)	Loss 3.3670 (3.3082)	Entropy 1.64692 (1.64773)	Top-1 acc 46.875 (45.116)	Top-5 acc 67.578 (69.080)	lr 0.02315
Train [22][170/3239]	Time 0.190 (0.863)	Data Time 0.001 (0.175)	Loss 3.2190 (3.3088)	Entropy 1.64687 (1.64768)	Top-1 acc 45.703 (45.107)	Top-5 acc 71.094 (69.056)	lr 0.02315
Train [22][180/3239]	Time 0.220 (0.840)	Data Time 0.001 (0.165)	Loss 3.3752 (3.3124)	Entropy 1.64679 (1.64763)	Top-1 acc 44.922 (45.008)	Top-5 acc 70.312 (68.987)	lr 0.02315
Train [22][190/3239]	Time 0.208 (0.819)	Data Time 0.001 (0.157)	Loss 3.2983 (3.3132)	Entropy 1.64675 (1.64759)	Top-1 acc 45.312 (44.944)	Top-5 acc 68.750 (68.991)	lr 0.02315
Train [22][200/3239]	Time 0.186 (0.800)	Data Time 0.001 (0.149)	Loss 3.5617 (3.3108)	Entropy 1.64664 (1.64754)	Top-1 acc 41.016 (45.068)	Top-5 acc 66.406 (68.989)	lr 0.02315
Train [22][210/3239]	Time 0.242 (0.782)	Data Time 0.001 (0.142)	Loss 3.2668 (3.3122)	Entropy 1.64663 (1.64750)	Top-1 acc 46.875 (45.051)	Top-5 acc 71.484 (68.955)	lr 0.02315
Train [22][220/3239]	Time 0.137 (0.766)	Data Time 0.001 (0.136)	Loss 3.9502 (3.3146)	Entropy 1.64655 (1.64746)	Top-1 acc 32.031 (45.035)	Top-5 acc 54.297 (68.900)	lr 0.02315
Train [22][230/3239]	Time 2.347 (0.752)	Data Time 0.001 (0.130)	Loss 3.2598 (3.3123)	Entropy 1.64655 (1.64742)	Top-1 acc 46.484 (45.103)	Top-5 acc 69.141 (68.933)	lr 0.02315
Train [22][240/3239]	Time 0.220 (0.730)	Data Time 0.001 (0.125)	Loss 3.3494 (3.3110)	Entropy 1.64649 (1.64738)	Top-1 acc 47.656 (45.141)	Top-5 acc 65.625 (68.966)	lr 0.02315
Train [22][250/3239]	Time 0.223 (0.718)	Data Time 0.001 (0.120)	Loss 3.2142 (3.3108)	Entropy 1.64646 (1.64735)	Top-1 acc 48.828 (45.166)	Top-5 acc 70.312 (69.005)	lr 0.02314
Train [22][260/3239]	Time 0.214 (0.707)	Data Time 0.001 (0.115)	Loss 3.3227 (3.3130)	Entropy 1.64646 (1.64731)	Top-1 acc 44.141 (45.088)	Top-5 acc 66.016 (68.927)	lr 0.02314
Train [22][270/3239]	Time 0.216 (0.697)	Data Time 0.001 (0.111)	Loss 3.1747 (3.3122)	Entropy 1.64638 (1.64728)	Top-1 acc 46.875 (45.131)	Top-5 acc 73.438 (68.953)	lr 0.02314
Train [22][280/3239]	Time 0.222 (0.688)	Data Time 0.001 (0.107)	Loss 3.2999 (3.3153)	Entropy 1.64637 (1.64725)	Top-1 acc 46.484 (45.087)	Top-5 acc 69.141 (68.885)	lr 0.02314
Train [22][290/3239]	Time 0.207 (0.679)	Data Time 0.001 (0.104)	Loss 3.5306 (3.3163)	Entropy 1.64631 (1.64721)	Top-1 acc 41.016 (45.060)	Top-5 acc 62.891 (68.832)	lr 0.02314
Train [22][300/3239]	Time 0.225 (0.671)	Data Time 0.001 (0.100)	Loss 3.3105 (3.3159)	Entropy 1.64621 (1.64718)	Top-1 acc 46.875 (45.079)	Top-5 acc 70.703 (68.856)	lr 0.02314
Train [22][310/3239]	Time 0.206 (0.663)	Data Time 0.001 (0.097)	Loss 3.2416 (3.3156)	Entropy 1.64631 (1.64715)	Top-1 acc 44.531 (45.093)	Top-5 acc 68.359 (68.853)	lr 0.02314
Train [22][320/3239]	Time 0.209 (0.656)	Data Time 0.001 (0.094)	Loss 3.4228 (3.3165)	Entropy 1.64624 (1.64713)	Top-1 acc 44.922 (45.103)	Top-5 acc 67.969 (68.830)	lr 0.02314
Train [22][330/3239]	Time 0.227 (0.649)	Data Time 0.001 (0.091)	Loss 3.3594 (3.3169)	Entropy 1.64615 (1.64710)	Top-1 acc 46.484 (45.079)	Top-5 acc 67.188 (68.817)	lr 0.02314
Train [22][340/3239]	Time 2.484 (0.643)	Data Time 0.001 (0.089)	Loss 3.3385 (3.3200)	Entropy 1.64615 (1.64707)	Top-1 acc 44.922 (45.050)	Top-5 acc 67.969 (68.753)	lr 0.02314
Train [22][350/3239]	Time 0.272 (0.631)	Data Time 0.002 (0.086)	Loss 3.4773 (3.3214)	Entropy 1.64607 (1.64704)	Top-1 acc 43.359 (45.015)	Top-5 acc 65.625 (68.722)	lr 0.02314
Train [22][360/3239]	Time 0.190 (0.626)	Data Time 0.001 (0.084)	Loss 3.5354 (3.3205)	Entropy 1.64603 (1.64701)	Top-1 acc 37.891 (45.023)	Top-5 acc 65.625 (68.738)	lr 0.02314
Train [22][370/3239]	Time 0.198 (0.621)	Data Time 0.001 (0.082)	Loss 3.4222 (3.3209)	Entropy 1.64593 (1.64699)	Top-1 acc 43.750 (45.016)	Top-5 acc 66.406 (68.727)	lr 0.02314
Train [22][380/3239]	Time 0.210 (0.616)	Data Time 0.001 (0.079)	Loss 3.2145 (3.3174)	Entropy 1.64584 (1.64696)	Top-1 acc 44.922 (45.047)	Top-5 acc 71.094 (68.812)	lr 0.02314
Train [22][390/3239]	Time 0.174 (0.611)	Data Time 0.001 (0.077)	Loss 3.3220 (3.3164)	Entropy 1.64587 (1.64693)	Top-1 acc 42.188 (45.062)	Top-5 acc 67.578 (68.807)	lr 0.02314
Train [22][400/3239]	Time 0.317 (0.607)	Data Time 0.001 (0.076)	Loss 3.3583 (3.3154)	Entropy 1.64580 (1.64690)	Top-1 acc 44.531 (45.079)	Top-5 acc 69.141 (68.829)	lr 0.02314
Train [22][410/3239]	Time 0.211 (0.603)	Data Time 0.001 (0.074)	Loss 3.2061 (3.3154)	Entropy 1.64570 (1.64687)	Top-1 acc 49.609 (45.087)	Top-5 acc 71.875 (68.842)	lr 0.02314
Train [22][420/3239]	Time 0.146 (0.599)	Data Time 0.002 (0.072)	Loss 3.3688 (3.3147)	Entropy 1.64571 (1.64685)	Top-1 acc 45.312 (45.122)	Top-5 acc 69.141 (68.860)	lr 0.02314
Train [22][430/3239]	Time 0.199 (0.595)	Data Time 0.001 (0.071)	Loss 3.2687 (3.3153)	Entropy 1.64564 (1.64682)	Top-1 acc 47.266 (45.094)	Top-5 acc 66.797 (68.842)	lr 0.02314
Train [22][440/3239]	Time 0.319 (0.683)	Data Time 0.004 (0.069)	Loss 3.2813 (3.3158)	Entropy 1.64556 (1.64679)	Top-1 acc 46.875 (45.064)	Top-5 acc 68.750 (68.832)	lr 0.02313
Train [22][450/3239]	Time 2.409 (0.679)	Data Time 0.002 (0.068)	Loss 3.1201 (3.3145)	Entropy 1.64556 (1.64677)	Top-1 acc 50.000 (45.084)	Top-5 acc 70.703 (68.851)	lr 0.02313
Train [22][460/3239]	Time 0.219 (0.669)	Data Time 0.001 (0.066)	Loss 3.2162 (3.3153)	Entropy 1.64545 (1.64674)	Top-1 acc 45.703 (45.049)	Top-5 acc 71.094 (68.824)	lr 0.02313
Train [22][470/3239]	Time 0.349 (0.665)	Data Time 0.002 (0.065)	Loss 3.2548 (3.3150)	Entropy 1.64542 (1.64671)	Top-1 acc 46.094 (45.022)	Top-5 acc 66.406 (68.826)	lr 0.02313
Train [22][480/3239]	Time 0.208 (0.660)	Data Time 0.001 (0.064)	Loss 3.4368 (3.3160)	Entropy 1.64538 (1.64668)	Top-1 acc 43.359 (45.011)	Top-5 acc 67.188 (68.805)	lr 0.02313
Train [22][490/3239]	Time 0.216 (0.655)	Data Time 0.001 (0.062)	Loss 3.3368 (3.3159)	Entropy 1.64533 (1.64665)	Top-1 acc 47.656 (45.021)	Top-5 acc 68.359 (68.802)	lr 0.02313
Train [22][500/3239]	Time 0.214 (0.651)	Data Time 0.001 (0.061)	Loss 3.2022 (3.3147)	Entropy 1.64524 (1.64663)	Top-1 acc 47.266 (45.065)	Top-5 acc 74.219 (68.823)	lr 0.02313
Train [22][510/3239]	Time 0.191 (0.647)	Data Time 0.001 (0.060)	Loss 3.3728 (3.3160)	Entropy 1.64519 (1.64660)	Top-1 acc 42.578 (45.027)	Top-5 acc 66.406 (68.784)	lr 0.02313
Train [22][520/3239]	Time 0.230 (0.643)	Data Time 0.001 (0.059)	Loss 3.3986 (3.3186)	Entropy 1.64507 (1.64657)	Top-1 acc 42.578 (44.978)	Top-5 acc 66.016 (68.730)	lr 0.02313
Train [22][530/3239]	Time 0.305 (0.639)	Data Time 0.001 (0.058)	Loss 3.3710 (3.3182)	Entropy 1.64506 (1.64654)	Top-1 acc 40.625 (44.981)	Top-5 acc 66.016 (68.743)	lr 0.02313
Train [22][540/3239]	Time 0.213 (0.635)	Data Time 0.001 (0.057)	Loss 3.3374 (3.3192)	Entropy 1.64495 (1.64651)	Top-1 acc 43.750 (44.962)	Top-5 acc 67.969 (68.725)	lr 0.02313
Train [22][550/3239]	Time 0.264 (0.631)	Data Time 0.001 (0.056)	Loss 3.2332 (3.3177)	Entropy 1.64491 (1.64649)	Top-1 acc 48.047 (45.006)	Top-5 acc 73.828 (68.746)	lr 0.02313
Train [22][560/3239]	Time 2.290 (0.628)	Data Time 0.001 (0.055)	Loss 3.3255 (3.3182)	Entropy 1.64491 (1.64646)	Top-1 acc 45.312 (44.987)	Top-5 acc 67.188 (68.751)	lr 0.02313
Train [22][570/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.054)	Loss 3.7856 (3.3195)	Entropy 1.64455 (1.64642)	Top-1 acc 39.062 (44.961)	Top-5 acc 63.672 (68.726)	lr 0.02313
Train [22][580/3239]	Time 0.155 (0.617)	Data Time 0.002 (0.053)	Loss 3.1704 (3.3198)	Entropy 1.64444 (1.64639)	Top-1 acc 49.609 (44.972)	Top-5 acc 72.266 (68.729)	lr 0.02313
Train [22][590/3239]	Time 0.248 (0.614)	Data Time 0.001 (0.052)	Loss 3.5962 (3.3207)	Entropy 1.64439 (1.64636)	Top-1 acc 37.500 (44.957)	Top-5 acc 65.625 (68.722)	lr 0.02313
Train [22][600/3239]	Time 0.197 (0.611)	Data Time 0.001 (0.051)	Loss 3.4006 (3.3201)	Entropy 1.64430 (1.64632)	Top-1 acc 46.484 (44.956)	Top-5 acc 66.797 (68.733)	lr 0.02313
Train [22][610/3239]	Time 0.222 (0.608)	Data Time 0.002 (0.050)	Loss 3.2900 (3.3202)	Entropy 1.64428 (1.64629)	Top-1 acc 47.266 (44.968)	Top-5 acc 71.484 (68.735)	lr 0.02313
Train [22][620/3239]	Time 0.231 (0.605)	Data Time 0.001 (0.050)	Loss 3.3583 (3.3207)	Entropy 1.64419 (1.64626)	Top-1 acc 46.875 (44.965)	Top-5 acc 69.922 (68.717)	lr 0.02313
Train [22][630/3239]	Time 0.215 (0.603)	Data Time 0.001 (0.049)	Loss 3.4945 (3.3204)	Entropy 1.64411 (1.64622)	Top-1 acc 44.531 (44.973)	Top-5 acc 66.016 (68.708)	lr 0.02312
Train [22][640/3239]	Time 0.267 (0.600)	Data Time 0.001 (0.048)	Loss 3.2687 (3.3212)	Entropy 1.64409 (1.64619)	Top-1 acc 46.484 (44.964)	Top-5 acc 70.703 (68.701)	lr 0.02312
Train [22][650/3239]	Time 0.219 (0.597)	Data Time 0.001 (0.047)	Loss 3.1972 (3.3204)	Entropy 1.64408 (1.64616)	Top-1 acc 48.438 (44.987)	Top-5 acc 71.094 (68.717)	lr 0.02312
Train [22][660/3239]	Time 0.376 (0.595)	Data Time 0.001 (0.047)	Loss 3.1966 (3.3196)	Entropy 1.64389 (1.64613)	Top-1 acc 44.141 (44.995)	Top-5 acc 71.094 (68.736)	lr 0.02312
Train [22][670/3239]	Time 2.351 (0.593)	Data Time 0.001 (0.046)	Loss 3.1941 (3.3201)	Entropy 1.64389 (1.64609)	Top-1 acc 48.047 (44.996)	Top-5 acc 68.750 (68.717)	lr 0.02312
Train [22][680/3239]	Time 0.226 (0.587)	Data Time 0.002 (0.045)	Loss 3.4284 (3.3195)	Entropy 1.64386 (1.64606)	Top-1 acc 43.750 (45.000)	Top-5 acc 66.406 (68.737)	lr 0.02312
Train [22][690/3239]	Time 0.218 (0.585)	Data Time 0.001 (0.045)	Loss 3.4635 (3.3204)	Entropy 1.64385 (1.64603)	Top-1 acc 42.578 (44.984)	Top-5 acc 63.672 (68.713)	lr 0.02312
Train [22][700/3239]	Time 0.208 (0.583)	Data Time 0.001 (0.044)	Loss 3.3691 (3.3204)	Entropy 1.64380 (1.64600)	Top-1 acc 40.625 (44.984)	Top-5 acc 69.531 (68.705)	lr 0.02312
Train [22][710/3239]	Time 0.218 (0.581)	Data Time 0.001 (0.044)	Loss 3.0897 (3.3196)	Entropy 1.64378 (1.64597)	Top-1 acc 47.266 (45.014)	Top-5 acc 71.484 (68.730)	lr 0.02312
Train [22][720/3239]	Time 0.306 (0.579)	Data Time 0.001 (0.043)	Loss 3.3316 (3.3195)	Entropy 1.64359 (1.64593)	Top-1 acc 41.406 (45.026)	Top-5 acc 69.141 (68.743)	lr 0.02312
Train [22][730/3239]	Time 0.223 (0.577)	Data Time 0.001 (0.042)	Loss 3.2179 (3.3192)	Entropy 1.64357 (1.64590)	Top-1 acc 47.266 (45.030)	Top-5 acc 69.531 (68.745)	lr 0.02312
Train [22][740/3239]	Time 0.195 (0.575)	Data Time 0.001 (0.042)	Loss 3.4709 (3.3192)	Entropy 1.64360 (1.64587)	Top-1 acc 39.844 (45.029)	Top-5 acc 66.016 (68.747)	lr 0.02312
Train [22][750/3239]	Time 0.220 (0.574)	Data Time 0.001 (0.041)	Loss 3.2940 (3.3191)	Entropy 1.64359 (1.64584)	Top-1 acc 45.312 (45.030)	Top-5 acc 69.141 (68.748)	lr 0.02312
Train [22][760/3239]	Time 0.207 (0.572)	Data Time 0.001 (0.041)	Loss 3.5031 (3.3200)	Entropy 1.64355 (1.64581)	Top-1 acc 41.797 (45.024)	Top-5 acc 64.062 (68.732)	lr 0.02312
Train [22][770/3239]	Time 0.219 (0.570)	Data Time 0.001 (0.040)	Loss 3.1261 (3.3198)	Entropy 1.64352 (1.64578)	Top-1 acc 46.875 (45.026)	Top-5 acc 72.656 (68.731)	lr 0.02312
Train [22][780/3239]	Time 2.238 (0.568)	Data Time 0.002 (0.040)	Loss 3.4014 (3.3197)	Entropy 1.64352 (1.64575)	Top-1 acc 46.094 (45.032)	Top-5 acc 66.797 (68.726)	lr 0.02312
Train [22][790/3239]	Time 0.199 (0.564)	Data Time 0.001 (0.039)	Loss 3.3022 (3.3197)	Entropy 1.64348 (1.64572)	Top-1 acc 42.969 (45.046)	Top-5 acc 69.141 (68.733)	lr 0.02312
Train [22][800/3239]	Time 0.165 (0.563)	Data Time 0.001 (0.039)	Loss 3.5021 (3.3199)	Entropy 1.64344 (1.64569)	Top-1 acc 41.797 (45.049)	Top-5 acc 66.406 (68.732)	lr 0.02312
Train [22][810/3239]	Time 0.217 (0.613)	Data Time 0.002 (0.038)	Loss 3.3245 (3.3211)	Entropy 1.64331 (1.64567)	Top-1 acc 45.703 (45.021)	Top-5 acc 68.750 (68.710)	lr 0.02312
Train [22][820/3239]	Time 0.226 (0.611)	Data Time 0.002 (0.038)	Loss 3.4518 (3.3214)	Entropy 1.64317 (1.64564)	Top-1 acc 44.922 (45.012)	Top-5 acc 66.016 (68.699)	lr 0.02311
Train [22][830/3239]	Time 0.218 (0.609)	Data Time 0.001 (0.038)	Loss 3.2374 (3.3209)	Entropy 1.64303 (1.64561)	Top-1 acc 46.094 (45.019)	Top-5 acc 69.922 (68.706)	lr 0.02311
Train [22][840/3239]	Time 0.237 (0.607)	Data Time 0.001 (0.037)	Loss 3.3945 (3.3211)	Entropy 1.64297 (1.64557)	Top-1 acc 40.625 (44.990)	Top-5 acc 68.359 (68.716)	lr 0.02311
Train [22][850/3239]	Time 0.204 (0.605)	Data Time 0.001 (0.037)	Loss 3.1418 (3.3211)	Entropy 1.64287 (1.64554)	Top-1 acc 46.875 (44.985)	Top-5 acc 74.219 (68.723)	lr 0.02311
Train [22][860/3239]	Time 0.214 (0.603)	Data Time 0.001 (0.036)	Loss 3.4086 (3.3207)	Entropy 1.64282 (1.64551)	Top-1 acc 41.797 (45.016)	Top-5 acc 68.359 (68.735)	lr 0.02311
Train [22][870/3239]	Time 0.285 (0.601)	Data Time 0.001 (0.036)	Loss 3.1062 (3.3201)	Entropy 1.64275 (1.64548)	Top-1 acc 48.047 (45.033)	Top-5 acc 73.828 (68.746)	lr 0.02311
Train [22][880/3239]	Time 0.265 (0.599)	Data Time 0.001 (0.036)	Loss 3.5237 (3.3203)	Entropy 1.64264 (1.64545)	Top-1 acc 41.016 (45.035)	Top-5 acc 63.672 (68.741)	lr 0.02311
Train [22][890/3239]	Time 2.326 (0.598)	Data Time 0.001 (0.035)	Loss 3.5284 (3.3202)	Entropy 1.64264 (1.64542)	Top-1 acc 41.016 (45.037)	Top-5 acc 66.016 (68.739)	lr 0.02311
Train [22][900/3239]	Time 0.208 (0.593)	Data Time 0.001 (0.035)	Loss 3.4858 (3.3208)	Entropy 1.64259 (1.64539)	Top-1 acc 40.234 (45.041)	Top-5 acc 67.969 (68.735)	lr 0.02311
Train [22][910/3239]	Time 0.305 (0.592)	Data Time 0.001 (0.034)	Loss 3.1657 (3.3214)	Entropy 1.64256 (1.64536)	Top-1 acc 44.531 (45.023)	Top-5 acc 73.047 (68.725)	lr 0.02311
Train [22][920/3239]	Time 0.219 (0.590)	Data Time 0.001 (0.034)	Loss 3.1103 (3.3211)	Entropy 1.64255 (1.64533)	Top-1 acc 51.172 (45.034)	Top-5 acc 73.047 (68.724)	lr 0.02311
Train [22][930/3239]	Time 0.201 (0.589)	Data Time 0.001 (0.034)	Loss 3.1964 (3.3211)	Entropy 1.64236 (1.64529)	Top-1 acc 45.703 (45.031)	Top-5 acc 72.266 (68.726)	lr 0.02311
Train [22][940/3239]	Time 0.219 (0.587)	Data Time 0.001 (0.033)	Loss 3.2050 (3.3215)	Entropy 1.64237 (1.64526)	Top-1 acc 45.312 (45.017)	Top-5 acc 73.047 (68.711)	lr 0.02311
Train [22][950/3239]	Time 0.215 (0.586)	Data Time 0.001 (0.033)	Loss 3.1283 (3.3211)	Entropy 1.64220 (1.64523)	Top-1 acc 49.609 (45.023)	Top-5 acc 73.828 (68.724)	lr 0.02311
Train [22][960/3239]	Time 0.201 (0.584)	Data Time 0.002 (0.033)	Loss 3.3009 (3.3212)	Entropy 1.64212 (1.64520)	Top-1 acc 48.438 (45.028)	Top-5 acc 71.094 (68.730)	lr 0.02311
Train [22][970/3239]	Time 0.295 (0.583)	Data Time 0.001 (0.032)	Loss 3.5182 (3.3213)	Entropy 1.64213 (1.64517)	Top-1 acc 46.094 (45.016)	Top-5 acc 65.234 (68.723)	lr 0.02311
Train [22][980/3239]	Time 0.205 (0.581)	Data Time 0.001 (0.032)	Loss 3.2796 (3.3213)	Entropy 1.64200 (1.64514)	Top-1 acc 41.797 (45.019)	Top-5 acc 71.094 (68.733)	lr 0.02311
Train [22][990/3239]	Time 0.190 (0.580)	Data Time 0.001 (0.032)	Loss 3.4627 (3.3218)	Entropy 1.64191 (1.64510)	Top-1 acc 40.625 (45.010)	Top-5 acc 61.328 (68.719)	lr 0.02311
Train [22][1000/3239]	Time 2.380 (0.578)	Data Time 0.001 (0.032)	Loss 3.2815 (3.3223)	Entropy 1.64191 (1.64507)	Top-1 acc 50.781 (45.000)	Top-5 acc 70.703 (68.716)	lr 0.02310
Train [22][1010/3239]	Time 0.211 (0.575)	Data Time 0.001 (0.031)	Loss 3.8029 (3.3226)	Entropy 1.64187 (1.64504)	Top-1 acc 35.547 (44.983)	Top-5 acc 57.812 (68.709)	lr 0.02310
Train [22][1020/3239]	Time 0.263 (0.573)	Data Time 0.001 (0.031)	Loss 3.0658 (3.3222)	Entropy 1.64184 (1.64501)	Top-1 acc 48.438 (44.989)	Top-5 acc 72.656 (68.713)	lr 0.02310
Train [22][1030/3239]	Time 0.214 (0.572)	Data Time 0.001 (0.031)	Loss 3.2874 (3.3219)	Entropy 1.64168 (1.64498)	Top-1 acc 43.359 (44.993)	Top-5 acc 70.703 (68.719)	lr 0.02310
Train [22][1040/3239]	Time 0.381 (0.571)	Data Time 0.001 (0.030)	Loss 3.2850 (3.3219)	Entropy 1.64165 (1.64495)	Top-1 acc 44.922 (44.986)	Top-5 acc 73.047 (68.727)	lr 0.02310
Train [22][1050/3239]	Time 0.155 (0.569)	Data Time 0.001 (0.030)	Loss 3.2806 (3.3223)	Entropy 1.64159 (1.64491)	Top-1 acc 44.922 (44.968)	Top-5 acc 70.703 (68.709)	lr 0.02310
Train [22][1060/3239]	Time 0.200 (0.568)	Data Time 0.001 (0.030)	Loss 3.1430 (3.3220)	Entropy 1.64152 (1.64488)	Top-1 acc 49.219 (44.975)	Top-5 acc 71.875 (68.720)	lr 0.02310
Train [22][1070/3239]	Time 0.194 (0.567)	Data Time 0.001 (0.030)	Loss 3.2826 (3.3217)	Entropy 1.64144 (1.64485)	Top-1 acc 46.484 (44.982)	Top-5 acc 71.094 (68.720)	lr 0.02310
Train [22][1080/3239]	Time 0.205 (0.566)	Data Time 0.002 (0.029)	Loss 3.3621 (3.3221)	Entropy 1.64131 (1.64482)	Top-1 acc 46.875 (44.983)	Top-5 acc 67.578 (68.714)	lr 0.02310
Train [22][1090/3239]	Time 0.208 (0.565)	Data Time 0.001 (0.029)	Loss 3.3933 (3.3221)	Entropy 1.64120 (1.64479)	Top-1 acc 46.094 (44.976)	Top-5 acc 67.188 (68.714)	lr 0.02310
Train [22][1100/3239]	Time 0.206 (0.563)	Data Time 0.002 (0.029)	Loss 3.7823 (3.3217)	Entropy 1.64116 (1.64475)	Top-1 acc 33.594 (44.981)	Top-5 acc 57.422 (68.718)	lr 0.02310
Train [22][1110/3239]	Time 2.329 (0.562)	Data Time 0.001 (0.029)	Loss 3.2145 (3.3213)	Entropy 1.64116 (1.64472)	Top-1 acc 44.141 (44.991)	Top-5 acc 70.312 (68.722)	lr 0.02310
Train [22][1120/3239]	Time 0.219 (0.559)	Data Time 0.001 (0.028)	Loss 3.2132 (3.3214)	Entropy 1.64109 (1.64469)	Top-1 acc 44.922 (44.992)	Top-5 acc 67.969 (68.724)	lr 0.02310
Train [22][1130/3239]	Time 0.209 (0.558)	Data Time 0.001 (0.028)	Loss 3.2150 (3.3214)	Entropy 1.64097 (1.64466)	Top-1 acc 44.141 (44.993)	Top-5 acc 69.922 (68.720)	lr 0.02310
Train [22][1140/3239]	Time 0.275 (0.557)	Data Time 0.001 (0.028)	Loss 3.3433 (3.3212)	Entropy 1.64085 (1.64462)	Top-1 acc 45.312 (44.999)	Top-5 acc 68.359 (68.727)	lr 0.02310
Train [22][1150/3239]	Time 0.212 (0.556)	Data Time 0.001 (0.028)	Loss 3.3444 (3.3209)	Entropy 1.64079 (1.64459)	Top-1 acc 41.406 (45.000)	Top-5 acc 66.797 (68.730)	lr 0.02310
Train [22][1160/3239]	Time 0.202 (0.555)	Data Time 0.001 (0.027)	Loss 3.5264 (3.3215)	Entropy 1.64076 (1.64456)	Top-1 acc 40.625 (44.986)	Top-5 acc 63.281 (68.717)	lr 0.02310
Train [22][1170/3239]	Time 0.221 (0.591)	Data Time 0.002 (0.027)	Loss 3.3670 (3.3223)	Entropy 1.64070 (1.64452)	Top-1 acc 40.234 (44.962)	Top-5 acc 63.672 (68.702)	lr 0.02310
Train [22][1180/3239]	Time 0.168 (0.590)	Data Time 0.002 (0.027)	Loss 3.3366 (3.3223)	Entropy 1.64068 (1.64449)	Top-1 acc 44.922 (44.960)	Top-5 acc 69.141 (68.705)	lr 0.02310
Train [22][1190/3239]	Time 0.215 (0.589)	Data Time 0.002 (0.027)	Loss 3.1581 (3.3226)	Entropy 1.64066 (1.64446)	Top-1 acc 47.656 (44.953)	Top-5 acc 69.922 (68.695)	lr 0.02309
Train [22][1200/3239]	Time 0.275 (0.588)	Data Time 0.002 (0.027)	Loss 3.3217 (3.3227)	Entropy 1.64067 (1.64443)	Top-1 acc 46.484 (44.940)	Top-5 acc 70.703 (68.698)	lr 0.02309
Train [22][1210/3239]	Time 0.267 (0.587)	Data Time 0.001 (0.026)	Loss 3.2837 (3.3228)	Entropy 1.64058 (1.64440)	Top-1 acc 46.094 (44.938)	Top-5 acc 67.969 (68.692)	lr 0.02309
Train [22][1220/3239]	Time 2.556 (0.586)	Data Time 0.002 (0.026)	Loss 3.2395 (3.3225)	Entropy 1.64058 (1.64437)	Top-1 acc 47.266 (44.942)	Top-5 acc 69.141 (68.697)	lr 0.02309
Train [22][1230/3239]	Time 0.164 (0.583)	Data Time 0.001 (0.026)	Loss 3.4075 (3.3222)	Entropy 1.64047 (1.64433)	Top-1 acc 40.625 (44.952)	Top-5 acc 67.969 (68.701)	lr 0.02309
Train [22][1240/3239]	Time 0.251 (0.583)	Data Time 0.001 (0.026)	Loss 3.5397 (3.3222)	Entropy 1.64038 (1.64430)	Top-1 acc 41.797 (44.948)	Top-5 acc 64.062 (68.693)	lr 0.02309
Train [22][1250/3239]	Time 0.223 (0.582)	Data Time 0.001 (0.026)	Loss 3.0997 (3.3220)	Entropy 1.64034 (1.64427)	Top-1 acc 48.438 (44.951)	Top-5 acc 73.828 (68.695)	lr 0.02309
Train [22][1260/3239]	Time 0.214 (0.581)	Data Time 0.001 (0.025)	Loss 3.2904 (3.3216)	Entropy 1.64029 (1.64424)	Top-1 acc 47.266 (44.963)	Top-5 acc 69.531 (68.699)	lr 0.02309
Train [22][1270/3239]	Time 0.226 (0.580)	Data Time 0.001 (0.025)	Loss 3.3574 (3.3216)	Entropy 1.64025 (1.64421)	Top-1 acc 42.969 (44.965)	Top-5 acc 65.625 (68.696)	lr 0.02309
Train [22][1280/3239]	Time 0.284 (0.579)	Data Time 0.001 (0.025)	Loss 3.3097 (3.3216)	Entropy 1.64037 (1.64418)	Top-1 acc 45.312 (44.972)	Top-5 acc 69.531 (68.695)	lr 0.02309
Train [22][1290/3239]	Time 0.308 (0.578)	Data Time 0.001 (0.025)	Loss 3.2056 (3.3213)	Entropy 1.64031 (1.64415)	Top-1 acc 44.531 (44.978)	Top-5 acc 69.922 (68.706)	lr 0.02309
Train [22][1300/3239]	Time 0.231 (0.577)	Data Time 0.001 (0.025)	Loss 3.3404 (3.3210)	Entropy 1.64028 (1.64412)	Top-1 acc 43.750 (44.986)	Top-5 acc 65.234 (68.708)	lr 0.02309
Train [22][1310/3239]	Time 0.210 (0.576)	Data Time 0.001 (0.025)	Loss 3.4472 (3.3214)	Entropy 1.64025 (1.64409)	Top-1 acc 40.234 (44.973)	Top-5 acc 65.234 (68.703)	lr 0.02309
Train [22][1320/3239]	Time 0.224 (0.575)	Data Time 0.001 (0.024)	Loss 3.3410 (3.3217)	Entropy 1.64019 (1.64406)	Top-1 acc 42.188 (44.969)	Top-5 acc 69.922 (68.695)	lr 0.02309
Train [22][1330/3239]	Time 2.318 (0.574)	Data Time 0.001 (0.024)	Loss 3.1734 (3.3215)	Entropy 1.64019 (1.64403)	Top-1 acc 45.312 (44.965)	Top-5 acc 73.438 (68.703)	lr 0.02309
Train [22][1340/3239]	Time 0.284 (0.571)	Data Time 0.001 (0.024)	Loss 3.2094 (3.3219)	Entropy 1.64013 (1.64400)	Top-1 acc 48.047 (44.963)	Top-5 acc 72.266 (68.702)	lr 0.02309
Train [22][1350/3239]	Time 0.302 (0.570)	Data Time 0.001 (0.024)	Loss 3.4828 (3.3219)	Entropy 1.64007 (1.64397)	Top-1 acc 41.016 (44.964)	Top-5 acc 66.797 (68.702)	lr 0.02309
Train [22][1360/3239]	Time 0.222 (0.569)	Data Time 0.001 (0.024)	Loss 3.5696 (3.3217)	Entropy 1.63998 (1.64394)	Top-1 acc 40.234 (44.971)	Top-5 acc 62.891 (68.699)	lr 0.02309
Train [22][1370/3239]	Time 0.228 (0.568)	Data Time 0.001 (0.024)	Loss 3.3668 (3.3216)	Entropy 1.63993 (1.64391)	Top-1 acc 46.484 (44.976)	Top-5 acc 65.625 (68.695)	lr 0.02309
Train [22][1380/3239]	Time 0.239 (0.568)	Data Time 0.001 (0.023)	Loss 3.3768 (3.3220)	Entropy 1.63986 (1.64389)	Top-1 acc 41.797 (44.964)	Top-5 acc 68.359 (68.687)	lr 0.02308
Train [22][1390/3239]	Time 0.152 (0.567)	Data Time 0.001 (0.023)	Loss 3.2328 (3.3219)	Entropy 1.63977 (1.64386)	Top-1 acc 48.828 (44.972)	Top-5 acc 72.266 (68.694)	lr 0.02308
Train [22][1400/3239]	Time 0.203 (0.566)	Data Time 0.001 (0.023)	Loss 3.4476 (3.3218)	Entropy 1.63969 (1.64383)	Top-1 acc 41.797 (44.975)	Top-5 acc 68.750 (68.703)	lr 0.02308
Train [22][1410/3239]	Time 0.349 (0.565)	Data Time 0.001 (0.023)	Loss 3.1664 (3.3221)	Entropy 1.63961 (1.64380)	Top-1 acc 51.953 (44.975)	Top-5 acc 71.484 (68.690)	lr 0.02308
Train [22][1420/3239]	Time 0.214 (0.564)	Data Time 0.001 (0.023)	Loss 3.0165 (3.3212)	Entropy 1.63956 (1.64377)	Top-1 acc 48.828 (44.991)	Top-5 acc 76.562 (68.710)	lr 0.02308
Train [22][1430/3239]	Time 0.221 (0.563)	Data Time 0.001 (0.023)	Loss 3.3701 (3.3212)	Entropy 1.63950 (1.64374)	Top-1 acc 41.406 (44.997)	Top-5 acc 66.406 (68.710)	lr 0.02308
Train [22][1440/3239]	Time 2.355 (0.562)	Data Time 0.002 (0.022)	Loss 3.3077 (3.3214)	Entropy 1.63950 (1.64371)	Top-1 acc 44.141 (44.989)	Top-5 acc 71.094 (68.702)	lr 0.02308
Train [22][1450/3239]	Time 0.166 (0.560)	Data Time 0.001 (0.022)	Loss 3.3071 (3.3212)	Entropy 1.63941 (1.64368)	Top-1 acc 44.141 (44.987)	Top-5 acc 69.141 (68.709)	lr 0.02308
Train [22][1460/3239]	Time 0.221 (0.559)	Data Time 0.001 (0.022)	Loss 3.1554 (3.3210)	Entropy 1.63940 (1.64365)	Top-1 acc 44.141 (44.993)	Top-5 acc 71.094 (68.714)	lr 0.02308
Train [22][1470/3239]	Time 0.219 (0.558)	Data Time 0.001 (0.022)	Loss 3.2665 (3.3206)	Entropy 1.63928 (1.64362)	Top-1 acc 46.094 (44.996)	Top-5 acc 70.703 (68.724)	lr 0.02308
Train [22][1480/3239]	Time 0.213 (0.558)	Data Time 0.001 (0.022)	Loss 3.3484 (3.3205)	Entropy 1.63934 (1.64359)	Top-1 acc 45.312 (45.006)	Top-5 acc 65.625 (68.725)	lr 0.02308
Train [22][1490/3239]	Time 0.197 (0.557)	Data Time 0.001 (0.022)	Loss 3.3557 (3.3206)	Entropy 1.63930 (1.64356)	Top-1 acc 42.969 (45.003)	Top-5 acc 67.188 (68.725)	lr 0.02308
Train [22][1500/3239]	Time 0.227 (0.556)	Data Time 0.001 (0.022)	Loss 3.2306 (3.3210)	Entropy 1.63923 (1.64353)	Top-1 acc 46.094 (44.987)	Top-5 acc 72.656 (68.715)	lr 0.02308
Train [22][1510/3239]	Time 0.205 (0.555)	Data Time 0.001 (0.022)	Loss 3.3268 (3.3208)	Entropy 1.63921 (1.64351)	Top-1 acc 44.531 (44.995)	Top-5 acc 69.922 (68.724)	lr 0.02308
Train [22][1520/3239]	Time 0.213 (0.555)	Data Time 0.001 (0.021)	Loss 3.4454 (3.3204)	Entropy 1.63915 (1.64348)	Top-1 acc 42.969 (44.998)	Top-5 acc 65.234 (68.731)	lr 0.02308
Train [22][1530/3239]	Time 0.270 (0.579)	Data Time 0.002 (0.021)	Loss 3.2125 (3.3204)	Entropy 1.63910 (1.64345)	Top-1 acc 50.391 (45.007)	Top-5 acc 69.141 (68.729)	lr 0.02308
Train [22][1540/3239]	Time 0.311 (0.580)	Data Time 0.002 (0.021)	Loss 3.3726 (3.3205)	Entropy 1.63910 (1.64342)	Top-1 acc 42.969 (45.008)	Top-5 acc 69.922 (68.732)	lr 0.02308
Train [22][1550/3239]	Time 2.378 (0.579)	Data Time 0.002 (0.021)	Loss 3.3059 (3.3210)	Entropy 1.63910 (1.64339)	Top-1 acc 46.875 (45.001)	Top-5 acc 69.922 (68.719)	lr 0.02308
Train [22][1560/3239]	Time 0.246 (0.576)	Data Time 0.001 (0.021)	Loss 3.5025 (3.3208)	Entropy 1.63906 (1.64337)	Top-1 acc 39.844 (44.999)	Top-5 acc 67.578 (68.722)	lr 0.02307
Train [22][1570/3239]	Time 0.236 (0.575)	Data Time 0.002 (0.021)	Loss 3.5120 (3.3213)	Entropy 1.63893 (1.64334)	Top-1 acc 41.406 (44.984)	Top-5 acc 67.969 (68.712)	lr 0.02307
Train [22][1580/3239]	Time 0.186 (0.575)	Data Time 0.001 (0.021)	Loss 3.4419 (3.3215)	Entropy 1.63887 (1.64331)	Top-1 acc 41.406 (44.981)	Top-5 acc 66.797 (68.708)	lr 0.02307
Train [22][1590/3239]	Time 0.225 (0.574)	Data Time 0.001 (0.021)	Loss 3.4066 (3.3216)	Entropy 1.63878 (1.64328)	Top-1 acc 39.844 (44.979)	Top-5 acc 69.141 (68.709)	lr 0.02307
Train [22][1600/3239]	Time 0.323 (0.573)	Data Time 0.001 (0.020)	Loss 3.1998 (3.3216)	Entropy 1.63874 (1.64325)	Top-1 acc 51.562 (44.980)	Top-5 acc 73.828 (68.713)	lr 0.02307
Train [22][1610/3239]	Time 0.181 (0.572)	Data Time 0.001 (0.020)	Loss 3.3010 (3.3213)	Entropy 1.63871 (1.64322)	Top-1 acc 46.875 (44.983)	Top-5 acc 67.969 (68.719)	lr 0.02307
Train [22][1620/3239]	Time 0.219 (0.571)	Data Time 0.004 (0.020)	Loss 3.3580 (3.3210)	Entropy 1.63863 (1.64320)	Top-1 acc 42.578 (44.985)	Top-5 acc 67.578 (68.722)	lr 0.02307
Train [22][1630/3239]	Time 0.287 (0.571)	Data Time 0.001 (0.020)	Loss 3.2402 (3.3215)	Entropy 1.63862 (1.64317)	Top-1 acc 42.578 (44.972)	Top-5 acc 71.484 (68.713)	lr 0.02307
Train [22][1640/3239]	Time 0.207 (0.570)	Data Time 0.001 (0.020)	Loss 3.3705 (3.3216)	Entropy 1.63862 (1.64314)	Top-1 acc 41.016 (44.972)	Top-5 acc 66.797 (68.713)	lr 0.02307
Train [22][1650/3239]	Time 0.212 (0.569)	Data Time 0.001 (0.020)	Loss 3.3220 (3.3221)	Entropy 1.63862 (1.64311)	Top-1 acc 48.438 (44.969)	Top-5 acc 68.359 (68.706)	lr 0.02307
Train [22][1660/3239]	Time 2.433 (0.568)	Data Time 0.002 (0.020)	Loss 3.4782 (3.3223)	Entropy 1.63862 (1.64309)	Top-1 acc 42.969 (44.961)	Top-5 acc 66.797 (68.700)	lr 0.02307
Train [22][1670/3239]	Time 0.326 (0.566)	Data Time 0.001 (0.020)	Loss 3.6084 (3.3227)	Entropy 1.63852 (1.64306)	Top-1 acc 39.062 (44.955)	Top-5 acc 62.109 (68.691)	lr 0.02307
Train [22][1680/3239]	Time 0.182 (0.566)	Data Time 0.001 (0.020)	Loss 3.0524 (3.3229)	Entropy 1.63846 (1.64303)	Top-1 acc 51.172 (44.950)	Top-5 acc 72.656 (68.688)	lr 0.02307
Train [22][1690/3239]	Time 0.209 (0.565)	Data Time 0.001 (0.019)	Loss 3.1872 (3.3229)	Entropy 1.63848 (1.64300)	Top-1 acc 44.922 (44.947)	Top-5 acc 70.312 (68.683)	lr 0.02307
Train [22][1700/3239]	Time 0.216 (0.564)	Data Time 0.001 (0.019)	Loss 3.2778 (3.3230)	Entropy 1.63850 (1.64298)	Top-1 acc 46.875 (44.943)	Top-5 acc 72.656 (68.684)	lr 0.02307
Train [22][1710/3239]	Time 0.230 (0.563)	Data Time 0.001 (0.019)	Loss 3.3671 (3.3229)	Entropy 1.63844 (1.64295)	Top-1 acc 46.094 (44.946)	Top-5 acc 70.703 (68.691)	lr 0.02307
Train [22][1720/3239]	Time 0.247 (0.563)	Data Time 0.001 (0.019)	Loss 3.2069 (3.3227)	Entropy 1.63835 (1.64293)	Top-1 acc 42.578 (44.942)	Top-5 acc 73.438 (68.697)	lr 0.02307
Train [22][1730/3239]	Time 0.341 (0.562)	Data Time 0.001 (0.019)	Loss 3.1713 (3.3233)	Entropy 1.63824 (1.64290)	Top-1 acc 50.781 (44.930)	Top-5 acc 72.656 (68.688)	lr 0.02307
Train [22][1740/3239]	Time 0.224 (0.562)	Data Time 0.002 (0.019)	Loss 3.2792 (3.3233)	Entropy 1.63823 (1.64287)	Top-1 acc 46.484 (44.922)	Top-5 acc 69.531 (68.687)	lr 0.02307
Train [22][1750/3239]	Time 0.229 (0.561)	Data Time 0.002 (0.019)	Loss 3.3665 (3.3238)	Entropy 1.63825 (1.64285)	Top-1 acc 44.531 (44.912)	Top-5 acc 66.797 (68.675)	lr 0.02306
Train [22][1760/3239]	Time 0.232 (0.560)	Data Time 0.002 (0.019)	Loss 3.3200 (3.3238)	Entropy 1.63823 (1.64282)	Top-1 acc 46.484 (44.915)	Top-5 acc 68.750 (68.675)	lr 0.02306
Train [22][1770/3239]	Time 2.558 (0.560)	Data Time 0.001 (0.019)	Loss 3.3836 (3.3237)	Entropy 1.63823 (1.64279)	Top-1 acc 42.188 (44.916)	Top-5 acc 67.969 (68.683)	lr 0.02306
Train [22][1780/3239]	Time 0.259 (0.558)	Data Time 0.002 (0.019)	Loss 3.2093 (3.3237)	Entropy 1.63816 (1.64277)	Top-1 acc 44.922 (44.918)	Top-5 acc 69.531 (68.678)	lr 0.02306
Train [22][1790/3239]	Time 0.222 (0.557)	Data Time 0.001 (0.019)	Loss 3.4607 (3.3233)	Entropy 1.63826 (1.64274)	Top-1 acc 42.969 (44.933)	Top-5 acc 64.453 (68.688)	lr 0.02306
Train [22][1800/3239]	Time 0.354 (0.557)	Data Time 0.001 (0.018)	Loss 3.3018 (3.3232)	Entropy 1.63825 (1.64272)	Top-1 acc 44.531 (44.939)	Top-5 acc 69.922 (68.694)	lr 0.02306
Train [22][1810/3239]	Time 0.198 (0.556)	Data Time 0.001 (0.018)	Loss 3.5620 (3.3236)	Entropy 1.63824 (1.64269)	Top-1 acc 42.969 (44.929)	Top-5 acc 62.500 (68.690)	lr 0.02306
Train [22][1820/3239]	Time 0.216 (0.555)	Data Time 0.001 (0.018)	Loss 3.4311 (3.3239)	Entropy 1.63818 (1.64267)	Top-1 acc 43.750 (44.926)	Top-5 acc 67.578 (68.682)	lr 0.02306
Train [22][1830/3239]	Time 0.229 (0.555)	Data Time 0.001 (0.018)	Loss 3.2569 (3.3239)	Entropy 1.63807 (1.64264)	Top-1 acc 47.656 (44.923)	Top-5 acc 69.531 (68.683)	lr 0.02306
Train [22][1840/3239]	Time 0.165 (0.554)	Data Time 0.001 (0.018)	Loss 3.2386 (3.3237)	Entropy 1.63803 (1.64262)	Top-1 acc 48.047 (44.931)	Top-5 acc 69.141 (68.689)	lr 0.02306
Train [22][1850/3239]	Time 0.210 (0.554)	Data Time 0.001 (0.018)	Loss 3.2568 (3.3237)	Entropy 1.63788 (1.64259)	Top-1 acc 46.094 (44.931)	Top-5 acc 72.656 (68.689)	lr 0.02306
Train [22][1860/3239]	Time 0.228 (0.553)	Data Time 0.001 (0.018)	Loss 3.5926 (3.3237)	Entropy 1.63783 (1.64257)	Top-1 acc 38.672 (44.936)	Top-5 acc 63.672 (68.688)	lr 0.02306
Train [22][1870/3239]	Time 0.263 (0.552)	Data Time 0.001 (0.018)	Loss 3.2842 (3.3235)	Entropy 1.63778 (1.64254)	Top-1 acc 48.438 (44.942)	Top-5 acc 69.141 (68.692)	lr 0.02306
Train [22][1880/3239]	Time 2.281 (0.552)	Data Time 0.001 (0.018)	Loss 3.3527 (3.3237)	Entropy 1.63778 (1.64252)	Top-1 acc 44.141 (44.938)	Top-5 acc 67.188 (68.688)	lr 0.02306
Train [22][1890/3239]	Time 0.222 (0.550)	Data Time 0.001 (0.018)	Loss 3.2742 (3.3234)	Entropy 1.63769 (1.64249)	Top-1 acc 43.359 (44.941)	Top-5 acc 72.266 (68.698)	lr 0.02306
Train [22][1900/3239]	Time 0.243 (0.572)	Data Time 0.002 (0.018)	Loss 3.3436 (3.3236)	Entropy 1.63763 (1.64247)	Top-1 acc 44.531 (44.932)	Top-5 acc 67.578 (68.691)	lr 0.02306
Train [22][1910/3239]	Time 0.257 (0.571)	Data Time 0.002 (0.017)	Loss 3.2990 (3.3236)	Entropy 1.63743 (1.64244)	Top-1 acc 46.094 (44.934)	Top-5 acc 70.312 (68.689)	lr 0.02306
Train [22][1920/3239]	Time 0.168 (0.571)	Data Time 0.001 (0.017)	Loss 3.3371 (3.3234)	Entropy 1.63737 (1.64241)	Top-1 acc 44.141 (44.937)	Top-5 acc 68.750 (68.691)	lr 0.02306
Train [22][1930/3239]	Time 0.300 (0.570)	Data Time 0.001 (0.017)	Loss 3.2024 (3.3232)	Entropy 1.63726 (1.64239)	Top-1 acc 50.391 (44.942)	Top-5 acc 67.969 (68.692)	lr 0.02305
Train [22][1940/3239]	Time 0.217 (0.569)	Data Time 0.001 (0.017)	Loss 3.3885 (3.3231)	Entropy 1.63716 (1.64236)	Top-1 acc 47.266 (44.945)	Top-5 acc 69.141 (68.687)	lr 0.02305
Train [22][1950/3239]	Time 0.209 (0.569)	Data Time 0.001 (0.017)	Loss 3.3846 (3.3231)	Entropy 1.63711 (1.64233)	Top-1 acc 44.531 (44.943)	Top-5 acc 65.234 (68.689)	lr 0.02305
Train [22][1960/3239]	Time 0.198 (0.568)	Data Time 0.001 (0.017)	Loss 3.2164 (3.3226)	Entropy 1.63703 (1.64231)	Top-1 acc 44.141 (44.954)	Top-5 acc 73.047 (68.700)	lr 0.02305
Train [22][1970/3239]	Time 0.172 (0.567)	Data Time 0.001 (0.017)	Loss 3.1665 (3.3225)	Entropy 1.63704 (1.64228)	Top-1 acc 47.266 (44.957)	Top-5 acc 73.047 (68.707)	lr 0.02305
Train [22][1980/3239]	Time 0.231 (0.567)	Data Time 0.001 (0.017)	Loss 3.3246 (3.3227)	Entropy 1.63703 (1.64225)	Top-1 acc 45.312 (44.953)	Top-5 acc 69.141 (68.704)	lr 0.02305
Train [22][1990/3239]	Time 2.209 (0.566)	Data Time 0.001 (0.017)	Loss 3.3033 (3.3226)	Entropy 1.63703 (1.64223)	Top-1 acc 45.312 (44.960)	Top-5 acc 67.578 (68.704)	lr 0.02305
Train [22][2000/3239]	Time 0.308 (0.565)	Data Time 0.001 (0.017)	Loss 3.2228 (3.3227)	Entropy 1.63695 (1.64220)	Top-1 acc 46.484 (44.950)	Top-5 acc 70.703 (68.705)	lr 0.02305
Train [22][2010/3239]	Time 0.223 (0.564)	Data Time 0.001 (0.017)	Loss 3.2378 (3.3228)	Entropy 1.63676 (1.64217)	Top-1 acc 44.922 (44.947)	Top-5 acc 72.656 (68.702)	lr 0.02305
Train [22][2020/3239]	Time 0.221 (0.564)	Data Time 0.001 (0.017)	Loss 3.5136 (3.3229)	Entropy 1.63675 (1.64215)	Top-1 acc 39.844 (44.944)	Top-5 acc 62.109 (68.697)	lr 0.02305
Train [22][2030/3239]	Time 0.219 (0.563)	Data Time 0.001 (0.017)	Loss 3.1949 (3.3228)	Entropy 1.63669 (1.64212)	Top-1 acc 47.266 (44.947)	Top-5 acc 70.312 (68.697)	lr 0.02305
Train [22][2040/3239]	Time 0.185 (0.562)	Data Time 0.002 (0.017)	Loss 3.5217 (3.3227)	Entropy 1.63670 (1.64209)	Top-1 acc 41.406 (44.947)	Top-5 acc 66.797 (68.697)	lr 0.02305
Train [22][2050/3239]	Time 0.224 (0.562)	Data Time 0.001 (0.016)	Loss 3.3153 (3.3230)	Entropy 1.63663 (1.64207)	Top-1 acc 46.094 (44.940)	Top-5 acc 67.969 (68.689)	lr 0.02305
Train [22][2060/3239]	Time 0.262 (0.561)	Data Time 0.001 (0.016)	Loss 3.3112 (3.3232)	Entropy 1.63655 (1.64204)	Top-1 acc 45.703 (44.936)	Top-5 acc 69.141 (68.681)	lr 0.02305
Train [22][2070/3239]	Time 0.199 (0.561)	Data Time 0.001 (0.016)	Loss 3.3436 (3.3236)	Entropy 1.63652 (1.64201)	Top-1 acc 42.188 (44.927)	Top-5 acc 66.797 (68.675)	lr 0.02305
Train [22][2080/3239]	Time 0.218 (0.560)	Data Time 0.001 (0.016)	Loss 3.1399 (3.3237)	Entropy 1.63647 (1.64199)	Top-1 acc 48.828 (44.933)	Top-5 acc 72.266 (68.677)	lr 0.02305
Train [22][2090/3239]	Time 0.253 (0.560)	Data Time 0.001 (0.016)	Loss 3.3883 (3.3235)	Entropy 1.63634 (1.64196)	Top-1 acc 46.875 (44.943)	Top-5 acc 68.359 (68.683)	lr 0.02305
Train [22][2100/3239]	Time 2.284 (0.559)	Data Time 0.002 (0.016)	Loss 3.2938 (3.3236)	Entropy 1.63634 (1.64193)	Top-1 acc 46.875 (44.945)	Top-5 acc 67.969 (68.681)	lr 0.02305
Train [22][2110/3239]	Time 0.286 (0.557)	Data Time 0.001 (0.016)	Loss 3.4653 (3.3235)	Entropy 1.63620 (1.64191)	Top-1 acc 41.406 (44.943)	Top-5 acc 64.453 (68.679)	lr 0.02305
Train [22][2120/3239]	Time 0.360 (0.557)	Data Time 0.001 (0.016)	Loss 3.4104 (3.3235)	Entropy 1.63600 (1.64188)	Top-1 acc 41.797 (44.941)	Top-5 acc 66.406 (68.679)	lr 0.02304
Train [22][2130/3239]	Time 0.218 (0.556)	Data Time 0.001 (0.016)	Loss 3.4151 (3.3234)	Entropy 1.63591 (1.64185)	Top-1 acc 47.266 (44.948)	Top-5 acc 67.188 (68.681)	lr 0.02304
Train [22][2140/3239]	Time 0.197 (0.556)	Data Time 0.002 (0.016)	Loss 3.3100 (3.3234)	Entropy 1.63591 (1.64182)	Top-1 acc 46.094 (44.953)	Top-5 acc 70.312 (68.682)	lr 0.02304
Train [22][2150/3239]	Time 0.235 (0.555)	Data Time 0.001 (0.016)	Loss 3.3911 (3.3234)	Entropy 1.63582 (1.64180)	Top-1 acc 44.141 (44.961)	Top-5 acc 67.969 (68.681)	lr 0.02304
Train [22][2160/3239]	Time 0.227 (0.555)	Data Time 0.001 (0.016)	Loss 3.3400 (3.3233)	Entropy 1.63570 (1.64177)	Top-1 acc 45.312 (44.967)	Top-5 acc 67.969 (68.684)	lr 0.02304
Train [22][2170/3239]	Time 0.203 (0.554)	Data Time 0.001 (0.016)	Loss 3.2458 (3.3232)	Entropy 1.63569 (1.64174)	Top-1 acc 46.875 (44.969)	Top-5 acc 71.484 (68.691)	lr 0.02304
Train [22][2180/3239]	Time 0.253 (0.554)	Data Time 0.001 (0.016)	Loss 3.2682 (3.3228)	Entropy 1.63558 (1.64171)	Top-1 acc 47.266 (44.981)	Top-5 acc 70.703 (68.701)	lr 0.02304
Train [22][2190/3239]	Time 0.243 (0.553)	Data Time 0.001 (0.016)	Loss 3.2024 (3.3228)	Entropy 1.63553 (1.64168)	Top-1 acc 45.703 (44.979)	Top-5 acc 71.484 (68.698)	lr 0.02304
Train [22][2200/3239]	Time 0.220 (0.553)	Data Time 0.001 (0.015)	Loss 3.3636 (3.3227)	Entropy 1.63540 (1.64166)	Top-1 acc 42.188 (44.982)	Top-5 acc 69.922 (68.698)	lr 0.02304
Train [22][2210/3239]	Time 2.249 (0.552)	Data Time 0.001 (0.015)	Loss 3.1978 (3.3226)	Entropy 1.63540 (1.64163)	Top-1 acc 47.266 (44.985)	Top-5 acc 71.484 (68.700)	lr 0.02304
Train [22][2220/3239]	Time 0.225 (0.551)	Data Time 0.001 (0.015)	Loss 3.3000 (3.3225)	Entropy 1.63530 (1.64160)	Top-1 acc 43.750 (44.990)	Top-5 acc 68.750 (68.700)	lr 0.02304
Train [22][2230/3239]	Time 0.234 (0.550)	Data Time 0.002 (0.015)	Loss 3.2756 (3.3225)	Entropy 1.63522 (1.64157)	Top-1 acc 48.438 (44.996)	Top-5 acc 67.188 (68.696)	lr 0.02304
Train [22][2240/3239]	Time 0.225 (0.550)	Data Time 0.001 (0.015)	Loss 3.0680 (3.3224)	Entropy 1.63515 (1.64154)	Top-1 acc 50.391 (44.997)	Top-5 acc 75.000 (68.699)	lr 0.02304
Train [22][2250/3239]	Time 0.352 (0.549)	Data Time 0.001 (0.015)	Loss 3.5220 (3.3228)	Entropy 1.63508 (1.64151)	Top-1 acc 40.625 (44.987)	Top-5 acc 64.062 (68.691)	lr 0.02304
Train [22][2260/3239]	Time 0.298 (0.568)	Data Time 0.003 (0.015)	Loss 3.2572 (3.3229)	Entropy 1.63506 (1.64149)	Top-1 acc 42.188 (44.983)	Top-5 acc 71.484 (68.689)	lr 0.02304
Train [22][2270/3239]	Time 0.254 (0.567)	Data Time 0.003 (0.015)	Loss 3.2913 (3.3229)	Entropy 1.63502 (1.64146)	Top-1 acc 46.875 (44.982)	Top-5 acc 71.875 (68.689)	lr 0.02304
Train [22][2280/3239]	Time 0.222 (0.567)	Data Time 0.002 (0.015)	Loss 3.2495 (3.3230)	Entropy 1.63455 (1.64143)	Top-1 acc 47.266 (44.985)	Top-5 acc 69.141 (68.686)	lr 0.02304
Train [22][2290/3239]	Time 0.234 (0.566)	Data Time 0.002 (0.015)	Loss 3.3594 (3.3228)	Entropy 1.63452 (1.64140)	Top-1 acc 43.359 (44.995)	Top-5 acc 65.625 (68.685)	lr 0.02304
Train [22][2300/3239]	Time 0.228 (0.566)	Data Time 0.001 (0.015)	Loss 3.2158 (3.3227)	Entropy 1.63450 (1.64137)	Top-1 acc 48.828 (44.998)	Top-5 acc 67.188 (68.689)	lr 0.02303
Train [22][2310/3239]	Time 0.216 (0.565)	Data Time 0.001 (0.015)	Loss 3.2347 (3.3228)	Entropy 1.63438 (1.64134)	Top-1 acc 47.656 (44.997)	Top-5 acc 69.922 (68.685)	lr 0.02303
Train [22][2320/3239]	Time 2.449 (0.564)	Data Time 0.001 (0.015)	Loss 3.2571 (3.3226)	Entropy 1.63438 (1.64131)	Top-1 acc 45.703 (45.003)	Top-5 acc 72.266 (68.690)	lr 0.02303
Train [22][2330/3239]	Time 0.225 (0.563)	Data Time 0.001 (0.015)	Loss 3.3214 (3.3224)	Entropy 1.63433 (1.64128)	Top-1 acc 45.703 (45.011)	Top-5 acc 68.359 (68.696)	lr 0.02303
Train [22][2340/3239]	Time 0.220 (0.562)	Data Time 0.001 (0.015)	Loss 3.4013 (3.3223)	Entropy 1.63406 (1.64125)	Top-1 acc 44.531 (45.010)	Top-5 acc 65.234 (68.694)	lr 0.02303
Train [22][2350/3239]	Time 0.219 (0.562)	Data Time 0.001 (0.015)	Loss 3.3328 (3.3223)	Entropy 1.63403 (1.64122)	Top-1 acc 45.312 (45.007)	Top-5 acc 67.578 (68.697)	lr 0.02303
Train [22][2360/3239]	Time 0.181 (0.562)	Data Time 0.002 (0.015)	Loss 3.4627 (3.3225)	Entropy 1.63401 (1.64119)	Top-1 acc 41.797 (45.004)	Top-5 acc 64.844 (68.691)	lr 0.02303
Train [22][2370/3239]	Time 0.207 (0.561)	Data Time 0.001 (0.014)	Loss 3.2504 (3.3226)	Entropy 1.63389 (1.64116)	Top-1 acc 45.703 (45.004)	Top-5 acc 69.922 (68.688)	lr 0.02303
Train [22][2380/3239]	Time 0.182 (0.561)	Data Time 0.001 (0.014)	Loss 3.2623 (3.3226)	Entropy 1.63374 (1.64112)	Top-1 acc 45.703 (45.000)	Top-5 acc 66.797 (68.688)	lr 0.02303
Train [22][2390/3239]	Time 0.218 (0.560)	Data Time 0.001 (0.014)	Loss 3.2457 (3.3225)	Entropy 1.63367 (1.64109)	Top-1 acc 47.656 (45.005)	Top-5 acc 69.922 (68.687)	lr 0.02303
Train [22][2400/3239]	Time 0.233 (0.560)	Data Time 0.001 (0.014)	Loss 3.3220 (3.3225)	Entropy 1.63362 (1.64106)	Top-1 acc 46.484 (45.002)	Top-5 acc 69.141 (68.686)	lr 0.02303
Train [22][2410/3239]	Time 0.230 (0.559)	Data Time 0.001 (0.014)	Loss 3.2312 (3.3223)	Entropy 1.63354 (1.64103)	Top-1 acc 48.828 (45.006)	Top-5 acc 69.922 (68.690)	lr 0.02303
Train [22][2420/3239]	Time 0.215 (0.559)	Data Time 0.001 (0.014)	Loss 3.4599 (3.3224)	Entropy 1.63349 (1.64100)	Top-1 acc 39.844 (45.004)	Top-5 acc 66.016 (68.689)	lr 0.02303
Train [22][2430/3239]	Time 2.404 (0.558)	Data Time 0.001 (0.014)	Loss 3.2162 (3.3222)	Entropy 1.63349 (1.64097)	Top-1 acc 48.047 (45.007)	Top-5 acc 70.703 (68.690)	lr 0.02303
Train [22][2440/3239]	Time 0.347 (0.557)	Data Time 0.001 (0.014)	Loss 3.2733 (3.3223)	Entropy 1.63339 (1.64094)	Top-1 acc 42.969 (45.006)	Top-5 acc 72.656 (68.694)	lr 0.02303
Train [22][2450/3239]	Time 0.249 (0.557)	Data Time 0.001 (0.014)	Loss 3.3295 (3.3225)	Entropy 1.63332 (1.64091)	Top-1 acc 45.703 (45.000)	Top-5 acc 67.578 (68.687)	lr 0.02303
Train [22][2460/3239]	Time 0.199 (0.556)	Data Time 0.001 (0.014)	Loss 3.3528 (3.3226)	Entropy 1.63326 (1.64088)	Top-1 acc 45.703 (45.002)	Top-5 acc 68.750 (68.681)	lr 0.02303
Train [22][2470/3239]	Time 0.209 (0.556)	Data Time 0.001 (0.014)	Loss 3.1891 (3.3225)	Entropy 1.63314 (1.64085)	Top-1 acc 45.703 (45.006)	Top-5 acc 73.438 (68.683)	lr 0.02303
Train [22][2480/3239]	Time 0.229 (0.555)	Data Time 0.002 (0.014)	Loss 3.2612 (3.3227)	Entropy 1.63307 (1.64081)	Top-1 acc 47.656 (45.003)	Top-5 acc 71.484 (68.682)	lr 0.02302
Train [22][2490/3239]	Time 0.187 (0.555)	Data Time 0.001 (0.014)	Loss 3.2277 (3.3228)	Entropy 1.63300 (1.64078)	Top-1 acc 47.266 (45.002)	Top-5 acc 70.312 (68.680)	lr 0.02302
Train [22][2500/3239]	Time 0.258 (0.554)	Data Time 0.001 (0.014)	Loss 3.3513 (3.3227)	Entropy 1.63288 (1.64075)	Top-1 acc 44.531 (45.003)	Top-5 acc 64.844 (68.681)	lr 0.02302
Train [22][2510/3239]	Time 0.177 (0.554)	Data Time 0.001 (0.014)	Loss 3.3935 (3.3228)	Entropy 1.63278 (1.64072)	Top-1 acc 41.797 (45.004)	Top-5 acc 66.406 (68.679)	lr 0.02302
Train [22][2520/3239]	Time 0.207 (0.553)	Data Time 0.001 (0.014)	Loss 3.1789 (3.3226)	Entropy 1.63286 (1.64069)	Top-1 acc 50.391 (45.007)	Top-5 acc 68.359 (68.680)	lr 0.02302
Train [22][2530/3239]	Time 0.212 (0.553)	Data Time 0.001 (0.014)	Loss 3.3617 (3.3230)	Entropy 1.63284 (1.64066)	Top-1 acc 45.312 (44.999)	Top-5 acc 67.578 (68.672)	lr 0.02302
Train [22][2540/3239]	Time 2.357 (0.552)	Data Time 0.001 (0.014)	Loss 3.4027 (3.3228)	Entropy 1.63284 (1.64063)	Top-1 acc 46.484 (45.006)	Top-5 acc 66.406 (68.675)	lr 0.02302
Train [22][2550/3239]	Time 0.210 (0.551)	Data Time 0.001 (0.014)	Loss 3.1771 (3.3229)	Entropy 1.63275 (1.64060)	Top-1 acc 46.875 (45.002)	Top-5 acc 71.094 (68.677)	lr 0.02302
Train [22][2560/3239]	Time 0.199 (0.551)	Data Time 0.001 (0.014)	Loss 3.2546 (3.3231)	Entropy 1.63267 (1.64057)	Top-1 acc 47.656 (44.996)	Top-5 acc 72.266 (68.672)	lr 0.02302
Train [22][2570/3239]	Time 0.210 (0.550)	Data Time 0.001 (0.014)	Loss 3.6136 (3.3231)	Entropy 1.63268 (1.64053)	Top-1 acc 42.188 (45.001)	Top-5 acc 63.281 (68.675)	lr 0.02302
Train [22][2580/3239]	Time 0.194 (0.550)	Data Time 0.001 (0.013)	Loss 3.2762 (3.3231)	Entropy 1.63253 (1.64050)	Top-1 acc 44.922 (45.000)	Top-5 acc 69.531 (68.676)	lr 0.02302
Train [22][2590/3239]	Time 0.221 (0.549)	Data Time 0.001 (0.013)	Loss 3.1393 (3.3231)	Entropy 1.63250 (1.64047)	Top-1 acc 47.656 (44.999)	Top-5 acc 71.484 (68.676)	lr 0.02302
Train [22][2600/3239]	Time 0.221 (0.549)	Data Time 0.001 (0.013)	Loss 3.3814 (3.3230)	Entropy 1.63245 (1.64044)	Top-1 acc 42.969 (45.004)	Top-5 acc 66.406 (68.678)	lr 0.02302
Train [22][2610/3239]	Time 0.203 (0.549)	Data Time 0.001 (0.013)	Loss 3.3150 (3.3231)	Entropy 1.63239 (1.64041)	Top-1 acc 42.188 (45.001)	Top-5 acc 69.531 (68.678)	lr 0.02302
Train [22][2620/3239]	Time 0.253 (0.564)	Data Time 0.006 (0.013)	Loss 3.3873 (3.3233)	Entropy 1.63223 (1.64038)	Top-1 acc 44.531 (44.992)	Top-5 acc 69.141 (68.675)	lr 0.02302
Train [22][2630/3239]	Time 0.207 (0.564)	Data Time 0.002 (0.013)	Loss 3.4535 (3.3234)	Entropy 1.63206 (1.64035)	Top-1 acc 41.797 (44.986)	Top-5 acc 65.625 (68.672)	lr 0.02302
Train [22][2640/3239]	Time 0.347 (0.563)	Data Time 0.001 (0.013)	Loss 3.1376 (3.3234)	Entropy 1.63195 (1.64032)	Top-1 acc 48.438 (44.988)	Top-5 acc 70.312 (68.671)	lr 0.02302
Train [22][2650/3239]	Time 0.236 (0.563)	Data Time 0.002 (0.013)	Loss 3.3018 (3.3231)	Entropy 1.63195 (1.64029)	Top-1 acc 42.578 (44.998)	Top-5 acc 67.969 (68.680)	lr 0.02302
Train [22][2660/3239]	Time 0.212 (0.562)	Data Time 0.001 (0.013)	Loss 3.5307 (3.3235)	Entropy 1.63190 (1.64026)	Top-1 acc 39.844 (44.990)	Top-5 acc 64.062 (68.672)	lr 0.02302
Train [22][2670/3239]	Time 0.193 (0.562)	Data Time 0.001 (0.013)	Loss 3.1757 (3.3236)	Entropy 1.63190 (1.64022)	Top-1 acc 46.484 (44.989)	Top-5 acc 70.703 (68.670)	lr 0.02301
Train [22][2680/3239]	Time 0.209 (0.561)	Data Time 0.001 (0.013)	Loss 3.0707 (3.3235)	Entropy 1.63185 (1.64019)	Top-1 acc 48.047 (44.992)	Top-5 acc 74.609 (68.670)	lr 0.02301
Train [22][2690/3239]	Time 0.258 (0.561)	Data Time 0.002 (0.013)	Loss 3.2851 (3.3235)	Entropy 1.63184 (1.64016)	Top-1 acc 45.703 (44.996)	Top-5 acc 67.578 (68.668)	lr 0.02301
Train [22][2700/3239]	Time 0.366 (0.560)	Data Time 0.001 (0.013)	Loss 3.4492 (3.3236)	Entropy 1.63178 (1.64013)	Top-1 acc 42.969 (44.995)	Top-5 acc 65.625 (68.667)	lr 0.02301
Train [22][2710/3239]	Time 0.225 (0.560)	Data Time 0.001 (0.013)	Loss 3.1254 (3.3236)	Entropy 1.63179 (1.64010)	Top-1 acc 47.266 (44.992)	Top-5 acc 73.438 (68.668)	lr 0.02301
Train [22][2720/3239]	Time 0.221 (0.559)	Data Time 0.001 (0.013)	Loss 3.3236 (3.3236)	Entropy 1.63172 (1.64007)	Top-1 acc 44.531 (44.993)	Top-5 acc 70.312 (68.670)	lr 0.02301
Train [22][2730/3239]	Time 0.238 (0.559)	Data Time 0.001 (0.013)	Loss 3.2868 (3.3234)	Entropy 1.63154 (1.64004)	Top-1 acc 44.922 (44.996)	Top-5 acc 70.703 (68.675)	lr 0.02301
Train [22][2740/3239]	Time 0.204 (0.559)	Data Time 0.001 (0.013)	Loss 3.1843 (3.3233)	Entropy 1.63152 (1.64001)	Top-1 acc 47.656 (44.996)	Top-5 acc 71.875 (68.675)	lr 0.02301
Train [22][2750/3239]	Time 0.264 (0.558)	Data Time 0.002 (0.013)	Loss 3.2596 (3.3231)	Entropy 1.63140 (1.63998)	Top-1 acc 43.750 (44.997)	Top-5 acc 69.141 (68.680)	lr 0.02301
Train [22][2760/3239]	Time 0.252 (0.558)	Data Time 0.001 (0.013)	Loss 3.3130 (3.3231)	Entropy 1.63138 (1.63995)	Top-1 acc 45.703 (44.998)	Top-5 acc 68.359 (68.680)	lr 0.02301
Train [22][2770/3239]	Time 0.339 (0.557)	Data Time 0.001 (0.013)	Loss 3.3980 (3.3234)	Entropy 1.63134 (1.63992)	Top-1 acc 41.406 (44.993)	Top-5 acc 67.188 (68.671)	lr 0.02301
Train [22][2780/3239]	Time 0.292 (0.557)	Data Time 0.001 (0.013)	Loss 3.2635 (3.3234)	Entropy 1.63125 (1.63988)	Top-1 acc 48.438 (44.992)	Top-5 acc 71.094 (68.670)	lr 0.02301
Train [22][2790/3239]	Time 0.155 (0.556)	Data Time 0.001 (0.013)	Loss 3.2176 (3.3232)	Entropy 1.63120 (1.63985)	Top-1 acc 44.531 (44.991)	Top-5 acc 70.703 (68.672)	lr 0.02301
Train [22][2800/3239]	Time 0.223 (0.556)	Data Time 0.001 (0.013)	Loss 3.1900 (3.3232)	Entropy 1.63111 (1.63982)	Top-1 acc 50.391 (44.993)	Top-5 acc 67.969 (68.671)	lr 0.02301
Train [22][2810/3239]	Time 0.259 (0.556)	Data Time 0.002 (0.013)	Loss 3.3779 (3.3232)	Entropy 1.63096 (1.63979)	Top-1 acc 46.875 (44.988)	Top-5 acc 65.625 (68.670)	lr 0.02301
Train [22][2820/3239]	Time 0.210 (0.555)	Data Time 0.001 (0.013)	Loss 3.1298 (3.3233)	Entropy 1.63088 (1.63976)	Top-1 acc 51.953 (44.987)	Top-5 acc 73.047 (68.667)	lr 0.02301
Train [22][2830/3239]	Time 0.351 (0.555)	Data Time 0.001 (0.012)	Loss 3.5164 (3.3233)	Entropy 1.63074 (1.63973)	Top-1 acc 37.109 (44.987)	Top-5 acc 68.359 (68.668)	lr 0.02301
Train [22][2840/3239]	Time 0.266 (0.554)	Data Time 0.001 (0.012)	Loss 3.2732 (3.3233)	Entropy 1.63068 (1.63970)	Top-1 acc 48.047 (44.988)	Top-5 acc 71.094 (68.667)	lr 0.02301
Train [22][2850/3239]	Time 0.244 (0.554)	Data Time 0.001 (0.012)	Loss 3.2181 (3.3235)	Entropy 1.63063 (1.63967)	Top-1 acc 48.438 (44.985)	Top-5 acc 74.609 (68.665)	lr 0.02300
Train [22][2860/3239]	Time 0.256 (0.554)	Data Time 0.001 (0.012)	Loss 3.0683 (3.3235)	Entropy 1.63057 (1.63963)	Top-1 acc 50.000 (44.986)	Top-5 acc 74.219 (68.660)	lr 0.02300
Train [22][2870/3239]	Time 0.199 (0.553)	Data Time 0.002 (0.012)	Loss 3.8660 (3.3239)	Entropy 1.63054 (1.63960)	Top-1 acc 33.203 (44.978)	Top-5 acc 54.297 (68.651)	lr 0.02300
Train [22][2880/3239]	Time 0.253 (0.553)	Data Time 0.001 (0.012)	Loss 3.0674 (3.3234)	Entropy 1.63038 (1.63957)	Top-1 acc 47.656 (44.982)	Top-5 acc 74.219 (68.662)	lr 0.02300
Train [22][2890/3239]	Time 0.197 (0.552)	Data Time 0.001 (0.012)	Loss 3.3182 (3.3236)	Entropy 1.63034 (1.63954)	Top-1 acc 46.094 (44.983)	Top-5 acc 67.188 (68.657)	lr 0.02300
Train [22][2900/3239]	Time 0.247 (0.552)	Data Time 0.001 (0.012)	Loss 3.4507 (3.3237)	Entropy 1.63015 (1.63951)	Top-1 acc 42.578 (44.978)	Top-5 acc 62.891 (68.655)	lr 0.02300
Train [22][2910/3239]	Time 0.310 (0.552)	Data Time 0.002 (0.012)	Loss 3.5072 (3.3237)	Entropy 1.63006 (1.63948)	Top-1 acc 40.234 (44.974)	Top-5 acc 64.062 (68.656)	lr 0.02300
Train [22][2920/3239]	Time 0.257 (0.551)	Data Time 0.001 (0.012)	Loss 3.3259 (3.3237)	Entropy 1.63005 (1.63944)	Top-1 acc 42.578 (44.975)	Top-5 acc 69.531 (68.659)	lr 0.02300
Train [22][2930/3239]	Time 0.275 (0.551)	Data Time 0.001 (0.012)	Loss 3.3027 (3.3238)	Entropy 1.62999 (1.63941)	Top-1 acc 46.875 (44.975)	Top-5 acc 70.312 (68.658)	lr 0.02300
Train [22][2940/3239]	Time 0.227 (0.551)	Data Time 0.001 (0.012)	Loss 3.3590 (3.3238)	Entropy 1.62993 (1.63938)	Top-1 acc 42.969 (44.972)	Top-5 acc 67.969 (68.657)	lr 0.02300
Train [22][2950/3239]	Time 0.283 (0.563)	Data Time 0.004 (0.012)	Loss 3.5088 (3.3237)	Entropy 1.62979 (1.63935)	Top-1 acc 42.969 (44.975)	Top-5 acc 65.234 (68.661)	lr 0.02300
Train [22][2960/3239]	Time 0.213 (0.563)	Data Time 0.002 (0.012)	Loss 3.1851 (3.3237)	Entropy 1.62973 (1.63931)	Top-1 acc 49.609 (44.975)	Top-5 acc 71.094 (68.660)	lr 0.02300
Train [22][2970/3239]	Time 0.309 (0.563)	Data Time 0.002 (0.012)	Loss 3.2513 (3.3236)	Entropy 1.62972 (1.63928)	Top-1 acc 46.484 (44.975)	Top-5 acc 71.094 (68.658)	lr 0.02300
Train [22][2980/3239]	Time 0.216 (0.562)	Data Time 0.001 (0.012)	Loss 3.3536 (3.3237)	Entropy 1.62971 (1.63925)	Top-1 acc 41.797 (44.972)	Top-5 acc 72.266 (68.659)	lr 0.02300
Train [22][2990/3239]	Time 0.196 (0.562)	Data Time 0.001 (0.012)	Loss 3.2890 (3.3237)	Entropy 1.62963 (1.63922)	Top-1 acc 50.000 (44.973)	Top-5 acc 68.359 (68.660)	lr 0.02300
Train [22][3000/3239]	Time 0.240 (0.562)	Data Time 0.001 (0.012)	Loss 3.2782 (3.3235)	Entropy 1.62962 (1.63919)	Top-1 acc 47.266 (44.979)	Top-5 acc 68.359 (68.663)	lr 0.02300
Train [22][3010/3239]	Time 0.199 (0.561)	Data Time 0.001 (0.012)	Loss 3.3530 (3.3234)	Entropy 1.62955 (1.63915)	Top-1 acc 47.266 (44.979)	Top-5 acc 68.359 (68.666)	lr 0.02300
Train [22][3020/3239]	Time 0.242 (0.561)	Data Time 0.001 (0.012)	Loss 3.1362 (3.3234)	Entropy 1.62947 (1.63912)	Top-1 acc 50.000 (44.978)	Top-5 acc 72.266 (68.664)	lr 0.02300
Train [22][3030/3239]	Time 0.319 (0.560)	Data Time 0.002 (0.012)	Loss 3.4790 (3.3235)	Entropy 1.62933 (1.63909)	Top-1 acc 41.406 (44.976)	Top-5 acc 66.016 (68.664)	lr 0.02299
Train [22][3040/3239]	Time 0.221 (0.560)	Data Time 0.001 (0.012)	Loss 3.2727 (3.3234)	Entropy 1.62927 (1.63906)	Top-1 acc 45.703 (44.979)	Top-5 acc 71.094 (68.668)	lr 0.02299
Train [22][3050/3239]	Time 0.211 (0.560)	Data Time 0.001 (0.012)	Loss 3.4423 (3.3233)	Entropy 1.62909 (1.63903)	Top-1 acc 43.359 (44.979)	Top-5 acc 67.188 (68.668)	lr 0.02299
Train [22][3060/3239]	Time 0.223 (0.559)	Data Time 0.001 (0.012)	Loss 3.2720 (3.3232)	Entropy 1.62893 (1.63899)	Top-1 acc 48.047 (44.985)	Top-5 acc 69.922 (68.670)	lr 0.02299
Train [22][3070/3239]	Time 0.200 (0.559)	Data Time 0.001 (0.012)	Loss 3.5256 (3.3232)	Entropy 1.62889 (1.63896)	Top-1 acc 41.406 (44.985)	Top-5 acc 66.016 (68.672)	lr 0.02299
Train [22][3080/3239]	Time 0.252 (0.558)	Data Time 0.001 (0.012)	Loss 3.1631 (3.3232)	Entropy 1.62893 (1.63893)	Top-1 acc 49.609 (44.987)	Top-5 acc 71.875 (68.671)	lr 0.02299
Train [22][3090/3239]	Time 0.236 (0.558)	Data Time 0.001 (0.012)	Loss 3.3108 (3.3232)	Entropy 1.62885 (1.63890)	Top-1 acc 45.312 (44.988)	Top-5 acc 65.625 (68.669)	lr 0.02299
Train [22][3100/3239]	Time 0.248 (0.558)	Data Time 0.001 (0.012)	Loss 3.2368 (3.3232)	Entropy 1.62881 (1.63886)	Top-1 acc 45.703 (44.988)	Top-5 acc 70.703 (68.671)	lr 0.02299
Train [22][3110/3239]	Time 0.232 (0.557)	Data Time 0.001 (0.012)	Loss 3.4863 (3.3234)	Entropy 1.62863 (1.63883)	Top-1 acc 37.891 (44.982)	Top-5 acc 65.234 (68.667)	lr 0.02299
Train [22][3120/3239]	Time 0.202 (0.557)	Data Time 0.001 (0.012)	Loss 3.2551 (3.3234)	Entropy 1.62860 (1.63880)	Top-1 acc 47.656 (44.984)	Top-5 acc 70.312 (68.669)	lr 0.02299
Train [22][3130/3239]	Time 0.240 (0.557)	Data Time 0.001 (0.012)	Loss 3.4922 (3.3235)	Entropy 1.62857 (1.63877)	Top-1 acc 43.750 (44.986)	Top-5 acc 65.625 (68.667)	lr 0.02299
Train [22][3140/3239]	Time 0.209 (0.556)	Data Time 0.001 (0.011)	Loss 3.3127 (3.3235)	Entropy 1.62858 (1.63873)	Top-1 acc 43.359 (44.989)	Top-5 acc 66.406 (68.667)	lr 0.02299
Train [22][3150/3239]	Time 0.190 (0.556)	Data Time 0.001 (0.011)	Loss 3.3814 (3.3235)	Entropy 1.62852 (1.63870)	Top-1 acc 41.797 (44.985)	Top-5 acc 68.359 (68.668)	lr 0.02299
Train [22][3160/3239]	Time 0.219 (0.555)	Data Time 0.001 (0.011)	Loss 3.3280 (3.3236)	Entropy 1.62840 (1.63867)	Top-1 acc 41.406 (44.986)	Top-5 acc 67.969 (68.666)	lr 0.02299
Train [22][3170/3239]	Time 0.203 (0.555)	Data Time 0.002 (0.011)	Loss 3.2645 (3.3235)	Entropy 1.62834 (1.63864)	Top-1 acc 47.266 (44.990)	Top-5 acc 69.922 (68.668)	lr 0.02299
Train [22][3180/3239]	Time 0.213 (0.555)	Data Time 0.000 (0.011)	Loss 3.4472 (3.3235)	Entropy 1.62821 (1.63860)	Top-1 acc 44.141 (44.985)	Top-5 acc 67.578 (68.666)	lr 0.02299
Train [22][3190/3239]	Time 0.227 (0.554)	Data Time 0.000 (0.011)	Loss 3.2683 (3.3235)	Entropy 1.62821 (1.63857)	Top-1 acc 49.219 (44.986)	Top-5 acc 67.578 (68.663)	lr 0.02299
Train [22][3200/3239]	Time 0.212 (0.554)	Data Time 0.000 (0.011)	Loss 3.3223 (3.3234)	Entropy 1.62815 (1.63854)	Top-1 acc 48.828 (44.984)	Top-5 acc 67.188 (68.661)	lr 0.02299
Train [22][3210/3239]	Time 0.213 (0.554)	Data Time 0.000 (0.011)	Loss 3.1386 (3.3233)	Entropy 1.62810 (1.63851)	Top-1 acc 53.125 (44.989)	Top-5 acc 71.875 (68.664)	lr 0.02299
Train [22][3220/3239]	Time 0.295 (0.553)	Data Time 0.000 (0.011)	Loss 3.2857 (3.3230)	Entropy 1.62816 (1.63847)	Top-1 acc 48.438 (44.995)	Top-5 acc 71.484 (68.669)	lr 0.02298
Train [22][3230/3239]	Time 0.207 (0.553)	Data Time 0.000 (0.011)	Loss 3.3433 (3.3228)	Entropy 1.62811 (1.63844)	Top-1 acc 44.531 (44.998)	Top-5 acc 69.531 (68.672)	lr 0.02298
Train [22][3239/3239]	Time 2.106 (0.552)	Data Time 0.000 (0.011)	Loss 3.2883 (3.3230)	Entropy 1.62811 (1.63841)	Top-1 acc 38.272 (44.993)	Top-5 acc 71.605 (68.669)	lr 0.02298
==========Valid [22/120]	loss 2.107	top-1 acc 53.528 (53.528)	top-5 acc 77.163	Train top-1 44.993	top-5 68.669	Entropy 1.62811	Latency-None: 0.000ms	Flops: 570.46M
Train [23][0/3239]	Time 30.279 (30.279)	Data Time 28.231 (28.231)	Loss 3.2561 (3.2561)	Entropy 1.62808 (1.62808)	Top-1 acc 45.312 (45.312)	Top-5 acc 71.094 (71.094)	lr 0.02298
Train [23][10/3239]	Time 2.519 (3.281)	Data Time 0.002 (2.575)	Loss 3.2158 (3.2836)	Entropy 1.62808 (1.62808)	Top-1 acc 47.656 (45.952)	Top-5 acc 70.703 (69.354)	lr 0.02298
Train [23][20/3239]	Time 0.231 (1.831)	Data Time 0.001 (1.349)	Loss 3.3359 (3.2827)	Entropy 1.62805 (1.62806)	Top-1 acc 47.266 (46.168)	Top-5 acc 69.141 (69.289)	lr 0.02298
Train [23][30/3239]	Time 0.223 (1.385)	Data Time 0.001 (0.915)	Loss 3.3939 (3.2774)	Entropy 1.62796 (1.62803)	Top-1 acc 44.531 (46.220)	Top-5 acc 67.578 (69.229)	lr 0.02298
Train [23][40/3239]	Time 0.212 (1.157)	Data Time 0.001 (0.692)	Loss 3.2430 (3.2895)	Entropy 1.62793 (1.62801)	Top-1 acc 46.875 (45.789)	Top-5 acc 68.359 (69.045)	lr 0.02298
Train [23][50/3239]	Time 0.428 (1.813)	Data Time 0.002 (0.557)	Loss 3.1968 (3.2924)	Entropy 1.62787 (1.62798)	Top-1 acc 50.781 (45.810)	Top-5 acc 71.484 (68.972)	lr 0.02298
Train [23][60/3239]	Time 0.221 (1.605)	Data Time 0.001 (0.467)	Loss 3.5950 (3.2937)	Entropy 1.62767 (1.62795)	Top-1 acc 36.328 (45.703)	Top-5 acc 61.719 (69.051)	lr 0.02298
Train [23][70/3239]	Time 0.193 (1.440)	Data Time 0.001 (0.402)	Loss 3.2984 (3.2872)	Entropy 1.62758 (1.62790)	Top-1 acc 46.484 (45.835)	Top-5 acc 70.312 (69.218)	lr 0.02298
Train [23][80/3239]	Time 0.195 (1.316)	Data Time 0.001 (0.352)	Loss 3.3564 (3.2848)	Entropy 1.62745 (1.62785)	Top-1 acc 46.875 (46.021)	Top-5 acc 68.750 (69.290)	lr 0.02298
Train [23][90/3239]	Time 0.222 (1.222)	Data Time 0.001 (0.314)	Loss 3.2557 (3.2829)	Entropy 1.62734 (1.62781)	Top-1 acc 43.750 (45.986)	Top-5 acc 68.750 (69.394)	lr 0.02298
Train [23][100/3239]	Time 0.292 (1.146)	Data Time 0.002 (0.283)	Loss 3.1776 (3.2868)	Entropy 1.62717 (1.62776)	Top-1 acc 46.094 (45.885)	Top-5 acc 70.312 (69.311)	lr 0.02298
Train [23][110/3239]	Time 0.190 (1.082)	Data Time 0.001 (0.258)	Loss 3.2671 (3.2849)	Entropy 1.62705 (1.62770)	Top-1 acc 46.875 (45.921)	Top-5 acc 67.188 (69.306)	lr 0.02298
Train [23][120/3239]	Time 2.437 (1.030)	Data Time 0.001 (0.237)	Loss 3.5724 (3.2864)	Entropy 1.62705 (1.62765)	Top-1 acc 37.891 (45.877)	Top-5 acc 64.062 (69.308)	lr 0.02298
Train [23][130/3239]	Time 0.239 (0.969)	Data Time 0.002 (0.219)	Loss 3.3637 (3.2894)	Entropy 1.62696 (1.62760)	Top-1 acc 42.188 (45.757)	Top-5 acc 67.578 (69.269)	lr 0.02298
Train [23][140/3239]	Time 0.220 (0.931)	Data Time 0.001 (0.203)	Loss 3.3050 (3.2870)	Entropy 1.62689 (1.62755)	Top-1 acc 46.094 (45.844)	Top-5 acc 69.141 (69.271)	lr 0.02298
Train [23][150/3239]	Time 0.233 (0.898)	Data Time 0.002 (0.190)	Loss 3.2982 (3.2856)	Entropy 1.62686 (1.62750)	Top-1 acc 44.531 (45.926)	Top-5 acc 69.922 (69.304)	lr 0.02298
Train [23][160/3239]	Time 0.256 (0.870)	Data Time 0.001 (0.178)	Loss 3.1493 (3.2853)	Entropy 1.62682 (1.62746)	Top-1 acc 47.656 (45.934)	Top-5 acc 71.094 (69.298)	lr 0.02297
Train [23][170/3239]	Time 0.242 (0.845)	Data Time 0.001 (0.168)	Loss 3.2636 (3.2842)	Entropy 1.62672 (1.62742)	Top-1 acc 48.047 (45.996)	Top-5 acc 68.750 (69.317)	lr 0.02297
Train [23][180/3239]	Time 0.212 (0.822)	Data Time 0.001 (0.159)	Loss 3.1127 (3.2828)	Entropy 1.62666 (1.62738)	Top-1 acc 50.391 (46.016)	Top-5 acc 71.484 (69.378)	lr 0.02297
Train [23][190/3239]	Time 0.196 (0.802)	Data Time 0.001 (0.151)	Loss 3.4255 (3.2842)	Entropy 1.62640 (1.62734)	Top-1 acc 43.750 (45.975)	Top-5 acc 65.234 (69.341)	lr 0.02297
Train [23][200/3239]	Time 0.224 (0.784)	Data Time 0.001 (0.143)	Loss 3.0758 (3.2814)	Entropy 1.62633 (1.62729)	Top-1 acc 46.875 (46.084)	Top-5 acc 75.781 (69.382)	lr 0.02297
Train [23][210/3239]	Time 0.251 (0.767)	Data Time 0.001 (0.137)	Loss 3.2023 (3.2802)	Entropy 1.62628 (1.62724)	Top-1 acc 44.531 (46.084)	Top-5 acc 73.438 (69.433)	lr 0.02297
Train [23][220/3239]	Time 0.183 (0.751)	Data Time 0.001 (0.131)	Loss 3.1455 (3.2813)	Entropy 1.62622 (1.62720)	Top-1 acc 50.391 (46.083)	Top-5 acc 71.094 (69.429)	lr 0.02297
Train [23][230/3239]	Time 2.465 (0.738)	Data Time 0.001 (0.125)	Loss 3.4448 (3.2812)	Entropy 1.62622 (1.62716)	Top-1 acc 42.578 (46.104)	Top-5 acc 66.016 (69.437)	lr 0.02297
Train [23][240/3239]	Time 0.226 (0.717)	Data Time 0.001 (0.120)	Loss 3.5039 (3.2808)	Entropy 1.62617 (1.62711)	Top-1 acc 39.453 (46.069)	Top-5 acc 66.016 (69.470)	lr 0.02297
Train [23][250/3239]	Time 0.211 (0.706)	Data Time 0.001 (0.115)	Loss 3.1971 (3.2800)	Entropy 1.62610 (1.62707)	Top-1 acc 47.266 (46.106)	Top-5 acc 75.000 (69.516)	lr 0.02297
Train [23][260/3239]	Time 0.230 (0.696)	Data Time 0.001 (0.111)	Loss 3.3978 (3.2797)	Entropy 1.62606 (1.62704)	Top-1 acc 46.484 (46.091)	Top-5 acc 65.234 (69.522)	lr 0.02297
Train [23][270/3239]	Time 0.252 (0.686)	Data Time 0.001 (0.107)	Loss 3.4469 (3.2799)	Entropy 1.62600 (1.62700)	Top-1 acc 42.969 (46.059)	Top-5 acc 64.453 (69.488)	lr 0.02297
Train [23][280/3239]	Time 0.222 (0.677)	Data Time 0.001 (0.103)	Loss 3.2819 (3.2801)	Entropy 1.62583 (1.62696)	Top-1 acc 43.359 (46.041)	Top-5 acc 69.531 (69.496)	lr 0.02297
Train [23][290/3239]	Time 0.307 (0.670)	Data Time 0.001 (0.099)	Loss 3.1903 (3.2786)	Entropy 1.62585 (1.62692)	Top-1 acc 45.312 (46.033)	Top-5 acc 71.875 (69.541)	lr 0.02297
Train [23][300/3239]	Time 0.213 (0.662)	Data Time 0.001 (0.096)	Loss 3.3802 (3.2787)	Entropy 1.62579 (1.62688)	Top-1 acc 44.141 (46.061)	Top-5 acc 66.797 (69.533)	lr 0.02297
Train [23][310/3239]	Time 0.210 (0.655)	Data Time 0.001 (0.093)	Loss 3.2407 (3.2786)	Entropy 1.62571 (1.62685)	Top-1 acc 50.781 (46.090)	Top-5 acc 68.359 (69.536)	lr 0.02297
Train [23][320/3239]	Time 0.242 (0.648)	Data Time 0.001 (0.090)	Loss 3.1587 (3.2791)	Entropy 1.62580 (1.62681)	Top-1 acc 46.094 (46.077)	Top-5 acc 73.828 (69.530)	lr 0.02297
Train [23][330/3239]	Time 0.232 (0.642)	Data Time 0.001 (0.088)	Loss 3.2944 (3.2792)	Entropy 1.62571 (1.62678)	Top-1 acc 45.703 (46.055)	Top-5 acc 66.016 (69.535)	lr 0.02297
Train [23][340/3239]	Time 2.263 (0.635)	Data Time 0.001 (0.085)	Loss 3.5123 (3.2787)	Entropy 1.62571 (1.62675)	Top-1 acc 41.016 (46.071)	Top-5 acc 60.938 (69.534)	lr 0.02296
Train [23][350/3239]	Time 0.313 (0.624)	Data Time 0.003 (0.083)	Loss 3.3357 (3.2786)	Entropy 1.62563 (1.62672)	Top-1 acc 47.656 (46.046)	Top-5 acc 69.141 (69.557)	lr 0.02296
Train [23][360/3239]	Time 0.205 (0.619)	Data Time 0.001 (0.080)	Loss 3.3595 (3.2785)	Entropy 1.62560 (1.62669)	Top-1 acc 39.844 (46.040)	Top-5 acc 69.141 (69.565)	lr 0.02296
Train [23][370/3239]	Time 0.215 (0.614)	Data Time 0.001 (0.078)	Loss 3.2502 (3.2797)	Entropy 1.62547 (1.62666)	Top-1 acc 42.969 (46.021)	Top-5 acc 67.188 (69.512)	lr 0.02296
Train [23][380/3239]	Time 0.198 (0.609)	Data Time 0.001 (0.076)	Loss 3.1408 (3.2789)	Entropy 1.62541 (1.62662)	Top-1 acc 50.781 (46.035)	Top-5 acc 73.047 (69.529)	lr 0.02296
Train [23][390/3239]	Time 0.224 (0.604)	Data Time 0.001 (0.074)	Loss 3.2856 (3.2792)	Entropy 1.62538 (1.62659)	Top-1 acc 47.266 (46.038)	Top-5 acc 66.797 (69.526)	lr 0.02296
Train [23][400/3239]	Time 0.220 (0.600)	Data Time 0.002 (0.073)	Loss 3.3853 (3.2783)	Entropy 1.62533 (1.62656)	Top-1 acc 41.406 (46.050)	Top-5 acc 64.453 (69.558)	lr 0.02296
Train [23][410/3239]	Time 0.225 (0.702)	Data Time 0.002 (0.071)	Loss 3.5705 (3.2780)	Entropy 1.62526 (1.62653)	Top-1 acc 39.062 (46.028)	Top-5 acc 64.844 (69.546)	lr 0.02296
Train [23][420/3239]	Time 0.306 (0.696)	Data Time 0.002 (0.069)	Loss 3.7713 (3.2791)	Entropy 1.62521 (1.62650)	Top-1 acc 38.281 (46.008)	Top-5 acc 60.547 (69.534)	lr 0.02296
Train [23][430/3239]	Time 0.217 (0.690)	Data Time 0.001 (0.068)	Loss 3.2530 (3.2784)	Entropy 1.62516 (1.62647)	Top-1 acc 47.656 (46.019)	Top-5 acc 68.359 (69.573)	lr 0.02296
Train [23][440/3239]	Time 0.189 (0.684)	Data Time 0.001 (0.066)	Loss 3.2106 (3.2785)	Entropy 1.62499 (1.62644)	Top-1 acc 46.484 (46.027)	Top-5 acc 70.312 (69.574)	lr 0.02296
Train [23][450/3239]	Time 2.196 (0.678)	Data Time 0.001 (0.065)	Loss 3.2038 (3.2787)	Entropy 1.62499 (1.62641)	Top-1 acc 46.094 (45.998)	Top-5 acc 71.875 (69.588)	lr 0.02296
Train [23][460/3239]	Time 0.219 (0.669)	Data Time 0.002 (0.063)	Loss 3.3934 (3.2792)	Entropy 1.62498 (1.62638)	Top-1 acc 42.188 (45.990)	Top-5 acc 69.922 (69.571)	lr 0.02296
Train [23][470/3239]	Time 0.224 (0.664)	Data Time 0.001 (0.062)	Loss 3.3243 (3.2784)	Entropy 1.62497 (1.62635)	Top-1 acc 42.188 (45.994)	Top-5 acc 68.359 (69.586)	lr 0.02296
Train [23][480/3239]	Time 0.312 (0.659)	Data Time 0.001 (0.061)	Loss 3.2700 (3.2780)	Entropy 1.62494 (1.62632)	Top-1 acc 45.703 (46.003)	Top-5 acc 71.484 (69.594)	lr 0.02296
Train [23][490/3239]	Time 0.211 (0.655)	Data Time 0.001 (0.060)	Loss 3.1918 (3.2780)	Entropy 1.62489 (1.62629)	Top-1 acc 46.484 (45.986)	Top-5 acc 72.266 (69.601)	lr 0.02296
Train [23][500/3239]	Time 0.206 (0.650)	Data Time 0.001 (0.058)	Loss 3.5072 (3.2777)	Entropy 1.62481 (1.62626)	Top-1 acc 43.750 (45.988)	Top-5 acc 67.188 (69.605)	lr 0.02296
Train [23][510/3239]	Time 0.196 (0.646)	Data Time 0.001 (0.057)	Loss 3.1155 (3.2766)	Entropy 1.62476 (1.62623)	Top-1 acc 48.828 (46.016)	Top-5 acc 72.266 (69.622)	lr 0.02296
Train [23][520/3239]	Time 0.232 (0.642)	Data Time 0.001 (0.056)	Loss 3.1684 (3.2762)	Entropy 1.62471 (1.62620)	Top-1 acc 47.266 (46.043)	Top-5 acc 75.000 (69.627)	lr 0.02295
Train [23][530/3239]	Time 0.231 (0.638)	Data Time 0.001 (0.055)	Loss 3.3860 (3.2758)	Entropy 1.62468 (1.62617)	Top-1 acc 42.578 (46.055)	Top-5 acc 68.359 (69.611)	lr 0.02295
Train [23][540/3239]	Time 0.293 (0.635)	Data Time 0.001 (0.054)	Loss 3.1645 (3.2754)	Entropy 1.62433 (1.62615)	Top-1 acc 50.391 (46.063)	Top-5 acc 71.484 (69.603)	lr 0.02295
Train [23][550/3239]	Time 0.233 (0.631)	Data Time 0.001 (0.053)	Loss 3.3038 (3.2764)	Entropy 1.62433 (1.62611)	Top-1 acc 49.609 (46.036)	Top-5 acc 69.531 (69.584)	lr 0.02295
Train [23][560/3239]	Time 2.448 (0.628)	Data Time 0.001 (0.052)	Loss 3.3076 (3.2773)	Entropy 1.62433 (1.62608)	Top-1 acc 44.922 (46.026)	Top-5 acc 70.703 (69.563)	lr 0.02295
Train [23][570/3239]	Time 0.220 (0.620)	Data Time 0.001 (0.052)	Loss 3.2863 (3.2772)	Entropy 1.62422 (1.62605)	Top-1 acc 47.266 (46.032)	Top-5 acc 71.094 (69.551)	lr 0.02295
Train [23][580/3239]	Time 0.161 (0.617)	Data Time 0.001 (0.051)	Loss 3.0785 (3.2777)	Entropy 1.62411 (1.62602)	Top-1 acc 48.047 (46.010)	Top-5 acc 74.219 (69.542)	lr 0.02295
Train [23][590/3239]	Time 0.217 (0.614)	Data Time 0.001 (0.050)	Loss 3.4345 (3.2784)	Entropy 1.62400 (1.62598)	Top-1 acc 40.625 (45.989)	Top-5 acc 66.016 (69.531)	lr 0.02295
Train [23][600/3239]	Time 0.226 (0.611)	Data Time 0.001 (0.049)	Loss 3.3069 (3.2781)	Entropy 1.62392 (1.62595)	Top-1 acc 41.797 (46.000)	Top-5 acc 69.922 (69.548)	lr 0.02295
Train [23][610/3239]	Time 0.216 (0.608)	Data Time 0.001 (0.048)	Loss 3.1070 (3.2775)	Entropy 1.62376 (1.62591)	Top-1 acc 48.047 (46.014)	Top-5 acc 70.703 (69.558)	lr 0.02295
Train [23][620/3239]	Time 0.217 (0.606)	Data Time 0.001 (0.048)	Loss 3.2816 (3.2773)	Entropy 1.62367 (1.62588)	Top-1 acc 43.750 (46.020)	Top-5 acc 70.312 (69.555)	lr 0.02295
Train [23][630/3239]	Time 0.242 (0.603)	Data Time 0.001 (0.047)	Loss 3.2441 (3.2779)	Entropy 1.62362 (1.62584)	Top-1 acc 44.141 (46.001)	Top-5 acc 71.875 (69.541)	lr 0.02295
Train [23][640/3239]	Time 0.218 (0.600)	Data Time 0.001 (0.046)	Loss 3.0921 (3.2775)	Entropy 1.62357 (1.62581)	Top-1 acc 50.391 (46.015)	Top-5 acc 76.172 (69.561)	lr 0.02295
Train [23][650/3239]	Time 0.206 (0.598)	Data Time 0.001 (0.045)	Loss 3.3348 (3.2779)	Entropy 1.62346 (1.62577)	Top-1 acc 44.141 (46.008)	Top-5 acc 66.016 (69.547)	lr 0.02295
Train [23][660/3239]	Time 0.247 (0.595)	Data Time 0.001 (0.045)	Loss 3.4112 (3.2786)	Entropy 1.62342 (1.62574)	Top-1 acc 41.797 (46.000)	Top-5 acc 66.797 (69.528)	lr 0.02295
Train [23][670/3239]	Time 2.242 (0.593)	Data Time 0.001 (0.044)	Loss 3.2339 (3.2791)	Entropy 1.62342 (1.62570)	Top-1 acc 44.531 (45.999)	Top-5 acc 68.359 (69.507)	lr 0.02295
Train [23][680/3239]	Time 0.213 (0.587)	Data Time 0.001 (0.043)	Loss 3.4884 (3.2795)	Entropy 1.62334 (1.62567)	Top-1 acc 42.969 (46.000)	Top-5 acc 66.406 (69.493)	lr 0.02295
Train [23][690/3239]	Time 0.210 (0.585)	Data Time 0.001 (0.043)	Loss 3.3054 (3.2800)	Entropy 1.62330 (1.62563)	Top-1 acc 47.266 (45.990)	Top-5 acc 70.312 (69.492)	lr 0.02295
Train [23][700/3239]	Time 0.200 (0.583)	Data Time 0.001 (0.042)	Loss 3.3427 (3.2802)	Entropy 1.62330 (1.62560)	Top-1 acc 43.750 (45.990)	Top-5 acc 68.359 (69.484)	lr 0.02294
Train [23][710/3239]	Time 0.216 (0.581)	Data Time 0.001 (0.042)	Loss 3.3019 (3.2800)	Entropy 1.62323 (1.62557)	Top-1 acc 49.219 (45.990)	Top-5 acc 69.141 (69.481)	lr 0.02294
Train [23][720/3239]	Time 0.208 (0.579)	Data Time 0.001 (0.041)	Loss 3.4534 (3.2803)	Entropy 1.62315 (1.62553)	Top-1 acc 38.672 (45.982)	Top-5 acc 67.578 (69.487)	lr 0.02294
Train [23][730/3239]	Time 0.224 (0.577)	Data Time 0.001 (0.041)	Loss 3.3987 (3.2808)	Entropy 1.62276 (1.62550)	Top-1 acc 47.656 (45.991)	Top-5 acc 67.578 (69.477)	lr 0.02294
Train [23][740/3239]	Time 0.224 (0.575)	Data Time 0.001 (0.040)	Loss 3.3764 (3.2819)	Entropy 1.62264 (1.62546)	Top-1 acc 42.969 (45.966)	Top-5 acc 65.625 (69.452)	lr 0.02294
Train [23][750/3239]	Time 0.211 (0.574)	Data Time 0.001 (0.040)	Loss 3.2457 (3.2818)	Entropy 1.62254 (1.62542)	Top-1 acc 47.266 (45.954)	Top-5 acc 71.094 (69.448)	lr 0.02294
Train [23][760/3239]	Time 0.208 (0.572)	Data Time 0.001 (0.039)	Loss 3.1121 (3.2811)	Entropy 1.62243 (1.62539)	Top-1 acc 50.000 (45.981)	Top-5 acc 72.656 (69.454)	lr 0.02294
Train [23][770/3239]	Time 0.214 (0.628)	Data Time 0.003 (0.039)	Loss 3.4634 (3.2816)	Entropy 1.62235 (1.62535)	Top-1 acc 46.094 (45.962)	Top-5 acc 62.109 (69.436)	lr 0.02294
Train [23][780/3239]	Time 2.423 (0.625)	Data Time 0.002 (0.038)	Loss 3.4608 (3.2813)	Entropy 1.62235 (1.62531)	Top-1 acc 42.188 (45.967)	Top-5 acc 69.531 (69.450)	lr 0.02294
Train [23][790/3239]	Time 0.367 (0.620)	Data Time 0.002 (0.038)	Loss 3.3050 (3.2807)	Entropy 1.62231 (1.62527)	Top-1 acc 46.484 (45.981)	Top-5 acc 70.703 (69.463)	lr 0.02294
Train [23][800/3239]	Time 0.200 (0.618)	Data Time 0.001 (0.037)	Loss 3.1247 (3.2803)	Entropy 1.62229 (1.62523)	Top-1 acc 50.391 (45.974)	Top-5 acc 71.094 (69.468)	lr 0.02294
Train [23][810/3239]	Time 0.162 (0.616)	Data Time 0.001 (0.037)	Loss 3.3048 (3.2811)	Entropy 1.62214 (1.62520)	Top-1 acc 45.312 (45.949)	Top-5 acc 69.922 (69.459)	lr 0.02294
Train [23][820/3239]	Time 0.215 (0.614)	Data Time 0.001 (0.036)	Loss 3.3916 (3.2815)	Entropy 1.62202 (1.62516)	Top-1 acc 41.016 (45.941)	Top-5 acc 66.016 (69.448)	lr 0.02294
Train [23][830/3239]	Time 0.226 (0.612)	Data Time 0.001 (0.036)	Loss 3.2944 (3.2820)	Entropy 1.62194 (1.62512)	Top-1 acc 44.531 (45.941)	Top-5 acc 70.703 (69.435)	lr 0.02294
Train [23][840/3239]	Time 0.202 (0.609)	Data Time 0.001 (0.036)	Loss 3.1905 (3.2823)	Entropy 1.62200 (1.62508)	Top-1 acc 47.656 (45.927)	Top-5 acc 72.266 (69.428)	lr 0.02294
Train [23][850/3239]	Time 0.220 (0.607)	Data Time 0.001 (0.035)	Loss 3.1469 (3.2822)	Entropy 1.62192 (1.62505)	Top-1 acc 44.922 (45.917)	Top-5 acc 73.047 (69.437)	lr 0.02294
Train [23][860/3239]	Time 0.293 (0.606)	Data Time 0.001 (0.035)	Loss 3.3531 (3.2835)	Entropy 1.62187 (1.62501)	Top-1 acc 42.578 (45.884)	Top-5 acc 67.578 (69.407)	lr 0.02294
Train [23][870/3239]	Time 0.187 (0.604)	Data Time 0.001 (0.034)	Loss 3.1508 (3.2833)	Entropy 1.62199 (1.62497)	Top-1 acc 49.219 (45.901)	Top-5 acc 73.047 (69.420)	lr 0.02294
Train [23][880/3239]	Time 0.242 (0.601)	Data Time 0.001 (0.034)	Loss 3.3397 (3.2837)	Entropy 1.62190 (1.62494)	Top-1 acc 43.750 (45.900)	Top-5 acc 69.531 (69.404)	lr 0.02293
Train [23][890/3239]	Time 2.460 (0.600)	Data Time 0.001 (0.034)	Loss 3.4358 (3.2836)	Entropy 1.62190 (1.62491)	Top-1 acc 41.797 (45.909)	Top-5 acc 65.234 (69.409)	lr 0.02293
Train [23][900/3239]	Time 0.259 (0.596)	Data Time 0.001 (0.033)	Loss 3.3089 (3.2838)	Entropy 1.62177 (1.62487)	Top-1 acc 39.844 (45.898)	Top-5 acc 70.312 (69.404)	lr 0.02293
Train [23][910/3239]	Time 0.165 (0.594)	Data Time 0.001 (0.033)	Loss 3.3809 (3.2841)	Entropy 1.62175 (1.62484)	Top-1 acc 46.875 (45.897)	Top-5 acc 68.750 (69.396)	lr 0.02293
Train [23][920/3239]	Time 0.303 (0.592)	Data Time 0.001 (0.033)	Loss 3.4390 (3.2841)	Entropy 1.62170 (1.62480)	Top-1 acc 43.750 (45.901)	Top-5 acc 64.844 (69.396)	lr 0.02293
Train [23][930/3239]	Time 0.223 (0.591)	Data Time 0.001 (0.032)	Loss 3.3831 (3.2840)	Entropy 1.62165 (1.62477)	Top-1 acc 43.359 (45.897)	Top-5 acc 66.406 (69.393)	lr 0.02293
Train [23][940/3239]	Time 0.228 (0.589)	Data Time 0.001 (0.032)	Loss 3.1477 (3.2840)	Entropy 1.62166 (1.62474)	Top-1 acc 45.703 (45.895)	Top-5 acc 70.703 (69.388)	lr 0.02293
Train [23][950/3239]	Time 0.203 (0.588)	Data Time 0.001 (0.032)	Loss 3.4558 (3.2839)	Entropy 1.62172 (1.62470)	Top-1 acc 39.453 (45.904)	Top-5 acc 65.625 (69.385)	lr 0.02293
Train [23][960/3239]	Time 0.207 (0.586)	Data Time 0.001 (0.031)	Loss 3.3290 (3.2842)	Entropy 1.62160 (1.62467)	Top-1 acc 42.578 (45.903)	Top-5 acc 67.188 (69.372)	lr 0.02293
Train [23][970/3239]	Time 0.220 (0.585)	Data Time 0.001 (0.031)	Loss 3.2418 (3.2839)	Entropy 1.62158 (1.62464)	Top-1 acc 49.219 (45.917)	Top-5 acc 71.094 (69.382)	lr 0.02293
Train [23][980/3239]	Time 0.254 (0.583)	Data Time 0.001 (0.031)	Loss 3.3147 (3.2841)	Entropy 1.62156 (1.62461)	Top-1 acc 46.875 (45.922)	Top-5 acc 66.797 (69.378)	lr 0.02293
Train [23][990/3239]	Time 0.236 (0.582)	Data Time 0.002 (0.030)	Loss 3.1696 (3.2842)	Entropy 1.62153 (1.62458)	Top-1 acc 46.094 (45.926)	Top-5 acc 73.828 (69.390)	lr 0.02293
Train [23][1000/3239]	Time 2.236 (0.580)	Data Time 0.001 (0.030)	Loss 3.4341 (3.2844)	Entropy 1.62153 (1.62455)	Top-1 acc 38.281 (45.915)	Top-5 acc 66.016 (69.376)	lr 0.02293
Train [23][1010/3239]	Time 0.209 (0.577)	Data Time 0.001 (0.030)	Loss 3.2190 (3.2839)	Entropy 1.62148 (1.62452)	Top-1 acc 46.875 (45.925)	Top-5 acc 69.531 (69.393)	lr 0.02293
Train [23][1020/3239]	Time 0.225 (0.575)	Data Time 0.001 (0.030)	Loss 3.5751 (3.2845)	Entropy 1.62144 (1.62449)	Top-1 acc 36.328 (45.907)	Top-5 acc 61.328 (69.379)	lr 0.02293
Train [23][1030/3239]	Time 0.233 (0.574)	Data Time 0.001 (0.029)	Loss 3.3635 (3.2843)	Entropy 1.62140 (1.62446)	Top-1 acc 46.484 (45.910)	Top-5 acc 67.969 (69.382)	lr 0.02293
Train [23][1040/3239]	Time 0.372 (0.573)	Data Time 0.002 (0.029)	Loss 3.3095 (3.2847)	Entropy 1.62129 (1.62443)	Top-1 acc 47.656 (45.894)	Top-5 acc 69.531 (69.377)	lr 0.02293
Train [23][1050/3239]	Time 0.230 (0.572)	Data Time 0.001 (0.029)	Loss 3.3707 (3.2853)	Entropy 1.62123 (1.62440)	Top-1 acc 42.578 (45.872)	Top-5 acc 67.578 (69.370)	lr 0.02293
Train [23][1060/3239]	Time 0.220 (0.570)	Data Time 0.001 (0.029)	Loss 3.4614 (3.2858)	Entropy 1.62116 (1.62437)	Top-1 acc 40.625 (45.871)	Top-5 acc 65.234 (69.353)	lr 0.02292
Train [23][1070/3239]	Time 0.206 (0.569)	Data Time 0.001 (0.028)	Loss 3.3319 (3.2858)	Entropy 1.62106 (1.62434)	Top-1 acc 44.922 (45.856)	Top-5 acc 69.531 (69.360)	lr 0.02292
Train [23][1080/3239]	Time 0.189 (0.568)	Data Time 0.001 (0.028)	Loss 3.3630 (3.2857)	Entropy 1.62100 (1.62431)	Top-1 acc 42.969 (45.867)	Top-5 acc 67.969 (69.371)	lr 0.02292
Train [23][1090/3239]	Time 0.222 (0.567)	Data Time 0.003 (0.028)	Loss 3.1927 (3.2855)	Entropy 1.62099 (1.62428)	Top-1 acc 48.828 (45.868)	Top-5 acc 74.219 (69.376)	lr 0.02292
Train [23][1100/3239]	Time 0.231 (0.565)	Data Time 0.001 (0.028)	Loss 3.3038 (3.2856)	Entropy 1.62094 (1.62425)	Top-1 acc 46.484 (45.865)	Top-5 acc 68.359 (69.374)	lr 0.02292
Train [23][1110/3239]	Time 2.388 (0.564)	Data Time 0.001 (0.027)	Loss 3.1152 (3.2850)	Entropy 1.62094 (1.62422)	Top-1 acc 49.609 (45.876)	Top-5 acc 73.047 (69.389)	lr 0.02292
Train [23][1120/3239]	Time 0.219 (0.561)	Data Time 0.001 (0.027)	Loss 3.4131 (3.2852)	Entropy 1.62092 (1.62419)	Top-1 acc 44.531 (45.875)	Top-5 acc 68.750 (69.379)	lr 0.02292
Train [23][1130/3239]	Time 0.211 (0.560)	Data Time 0.001 (0.027)	Loss 3.3400 (3.2853)	Entropy 1.62095 (1.62416)	Top-1 acc 46.484 (45.873)	Top-5 acc 67.969 (69.379)	lr 0.02292
Train [23][1140/3239]	Time 0.202 (0.597)	Data Time 0.002 (0.027)	Loss 3.3549 (3.2860)	Entropy 1.62090 (1.62413)	Top-1 acc 45.312 (45.859)	Top-5 acc 67.578 (69.363)	lr 0.02292
Train [23][1150/3239]	Time 0.221 (0.596)	Data Time 0.002 (0.026)	Loss 3.4049 (3.2864)	Entropy 1.62084 (1.62410)	Top-1 acc 42.578 (45.853)	Top-5 acc 68.359 (69.352)	lr 0.02292
Train [23][1160/3239]	Time 0.239 (0.594)	Data Time 0.001 (0.026)	Loss 3.3178 (3.2861)	Entropy 1.62083 (1.62407)	Top-1 acc 48.047 (45.858)	Top-5 acc 67.969 (69.356)	lr 0.02292
Train [23][1170/3239]	Time 0.229 (0.593)	Data Time 0.001 (0.026)	Loss 3.3130 (3.2863)	Entropy 1.62073 (1.62404)	Top-1 acc 45.703 (45.855)	Top-5 acc 69.141 (69.349)	lr 0.02292
Train [23][1180/3239]	Time 0.221 (0.592)	Data Time 0.001 (0.026)	Loss 3.2776 (3.2866)	Entropy 1.62073 (1.62402)	Top-1 acc 46.094 (45.859)	Top-5 acc 71.484 (69.349)	lr 0.02292
Train [23][1190/3239]	Time 0.223 (0.591)	Data Time 0.001 (0.026)	Loss 3.4990 (3.2871)	Entropy 1.62060 (1.62399)	Top-1 acc 43.359 (45.845)	Top-5 acc 63.672 (69.350)	lr 0.02292
Train [23][1200/3239]	Time 0.224 (0.589)	Data Time 0.002 (0.025)	Loss 3.2598 (3.2871)	Entropy 1.62048 (1.62396)	Top-1 acc 48.047 (45.841)	Top-5 acc 69.531 (69.334)	lr 0.02292
Train [23][1210/3239]	Time 0.217 (0.588)	Data Time 0.002 (0.025)	Loss 3.1310 (3.2872)	Entropy 1.62044 (1.62393)	Top-1 acc 50.781 (45.835)	Top-5 acc 72.656 (69.341)	lr 0.02292
Train [23][1220/3239]	Time 2.384 (0.587)	Data Time 0.002 (0.025)	Loss 3.3003 (3.2873)	Entropy 1.62044 (1.62390)	Top-1 acc 44.531 (45.823)	Top-5 acc 68.359 (69.337)	lr 0.02292
Train [23][1230/3239]	Time 0.229 (0.584)	Data Time 0.001 (0.025)	Loss 3.2407 (3.2870)	Entropy 1.62037 (1.62387)	Top-1 acc 48.047 (45.826)	Top-5 acc 67.578 (69.336)	lr 0.02292
Train [23][1240/3239]	Time 0.234 (0.583)	Data Time 0.001 (0.025)	Loss 3.3236 (3.2872)	Entropy 1.62040 (1.62385)	Top-1 acc 44.141 (45.816)	Top-5 acc 69.141 (69.334)	lr 0.02291
Train [23][1250/3239]	Time 0.179 (0.582)	Data Time 0.001 (0.024)	Loss 3.5689 (3.2881)	Entropy 1.62037 (1.62382)	Top-1 acc 39.844 (45.791)	Top-5 acc 62.891 (69.305)	lr 0.02291
Train [23][1260/3239]	Time 0.221 (0.581)	Data Time 0.001 (0.024)	Loss 3.3947 (3.2884)	Entropy 1.62024 (1.62379)	Top-1 acc 42.578 (45.792)	Top-5 acc 65.625 (69.301)	lr 0.02291
Train [23][1270/3239]	Time 0.234 (0.580)	Data Time 0.001 (0.024)	Loss 3.3035 (3.2886)	Entropy 1.62023 (1.62376)	Top-1 acc 45.312 (45.785)	Top-5 acc 70.312 (69.294)	lr 0.02291
Train [23][1280/3239]	Time 0.205 (0.579)	Data Time 0.001 (0.024)	Loss 3.1685 (3.2883)	Entropy 1.62020 (1.62373)	Top-1 acc 48.047 (45.793)	Top-5 acc 73.047 (69.304)	lr 0.02291
Train [23][1290/3239]	Time 0.305 (0.578)	Data Time 0.001 (0.024)	Loss 3.3808 (3.2881)	Entropy 1.62023 (1.62371)	Top-1 acc 44.531 (45.799)	Top-5 acc 66.016 (69.299)	lr 0.02291
Train [23][1300/3239]	Time 0.220 (0.576)	Data Time 0.001 (0.024)	Loss 3.2963 (3.2887)	Entropy 1.62014 (1.62368)	Top-1 acc 48.438 (45.784)	Top-5 acc 69.922 (69.288)	lr 0.02291
Train [23][1310/3239]	Time 0.210 (0.575)	Data Time 0.001 (0.023)	Loss 3.1599 (3.2888)	Entropy 1.62009 (1.62365)	Top-1 acc 47.266 (45.787)	Top-5 acc 71.094 (69.282)	lr 0.02291
Train [23][1320/3239]	Time 0.226 (0.574)	Data Time 0.001 (0.023)	Loss 3.3194 (3.2886)	Entropy 1.62000 (1.62363)	Top-1 acc 44.141 (45.793)	Top-5 acc 71.094 (69.284)	lr 0.02291
Train [23][1330/3239]	Time 2.283 (0.573)	Data Time 0.001 (0.023)	Loss 3.3617 (3.2888)	Entropy 1.62000 (1.62360)	Top-1 acc 46.094 (45.793)	Top-5 acc 66.797 (69.281)	lr 0.02291
Train [23][1340/3239]	Time 0.200 (0.571)	Data Time 0.001 (0.023)	Loss 3.0657 (3.2883)	Entropy 1.61994 (1.62357)	Top-1 acc 53.516 (45.801)	Top-5 acc 72.656 (69.291)	lr 0.02291
Train [23][1350/3239]	Time 0.256 (0.570)	Data Time 0.001 (0.023)	Loss 3.2567 (3.2882)	Entropy 1.61986 (1.62354)	Top-1 acc 46.875 (45.810)	Top-5 acc 67.969 (69.284)	lr 0.02291
Train [23][1360/3239]	Time 0.212 (0.569)	Data Time 0.001 (0.023)	Loss 3.2003 (3.2884)	Entropy 1.61972 (1.62352)	Top-1 acc 48.047 (45.807)	Top-5 acc 71.094 (69.275)	lr 0.02291
Train [23][1370/3239]	Time 0.202 (0.568)	Data Time 0.001 (0.023)	Loss 3.2151 (3.2883)	Entropy 1.61970 (1.62349)	Top-1 acc 46.094 (45.801)	Top-5 acc 69.922 (69.273)	lr 0.02291
Train [23][1380/3239]	Time 0.211 (0.567)	Data Time 0.001 (0.022)	Loss 3.0517 (3.2883)	Entropy 1.61960 (1.62346)	Top-1 acc 45.312 (45.794)	Top-5 acc 74.219 (69.269)	lr 0.02291
Train [23][1390/3239]	Time 0.224 (0.566)	Data Time 0.001 (0.022)	Loss 3.1146 (3.2890)	Entropy 1.61958 (1.62343)	Top-1 acc 51.953 (45.781)	Top-5 acc 73.438 (69.259)	lr 0.02291
Train [23][1400/3239]	Time 0.234 (0.565)	Data Time 0.001 (0.022)	Loss 3.5442 (3.2892)	Entropy 1.61953 (1.62341)	Top-1 acc 42.578 (45.778)	Top-5 acc 66.016 (69.254)	lr 0.02291
Train [23][1410/3239]	Time 0.246 (0.564)	Data Time 0.001 (0.022)	Loss 3.0201 (3.2894)	Entropy 1.61944 (1.62338)	Top-1 acc 53.516 (45.781)	Top-5 acc 74.609 (69.254)	lr 0.02291
Train [23][1420/3239]	Time 0.209 (0.563)	Data Time 0.001 (0.022)	Loss 3.2823 (3.2895)	Entropy 1.61942 (1.62335)	Top-1 acc 45.312 (45.778)	Top-5 acc 70.703 (69.256)	lr 0.02290
Train [23][1430/3239]	Time 0.239 (0.562)	Data Time 0.001 (0.022)	Loss 3.2598 (3.2895)	Entropy 1.61928 (1.62332)	Top-1 acc 46.094 (45.777)	Top-5 acc 72.266 (69.257)	lr 0.02290
Train [23][1440/3239]	Time 2.349 (0.562)	Data Time 0.001 (0.022)	Loss 3.2067 (3.2892)	Entropy 1.61928 (1.62329)	Top-1 acc 45.703 (45.777)	Top-5 acc 68.359 (69.257)	lr 0.02290
Train [23][1450/3239]	Time 0.145 (0.559)	Data Time 0.001 (0.021)	Loss 3.5611 (3.2887)	Entropy 1.61918 (1.62327)	Top-1 acc 38.672 (45.788)	Top-5 acc 62.891 (69.268)	lr 0.02290
Train [23][1460/3239]	Time 0.299 (0.558)	Data Time 0.001 (0.021)	Loss 3.2479 (3.2887)	Entropy 1.61910 (1.62324)	Top-1 acc 47.656 (45.796)	Top-5 acc 69.922 (69.265)	lr 0.02290
Train [23][1470/3239]	Time 0.219 (0.558)	Data Time 0.001 (0.021)	Loss 3.2709 (3.2887)	Entropy 1.61908 (1.62321)	Top-1 acc 45.312 (45.798)	Top-5 acc 70.312 (69.263)	lr 0.02290
Train [23][1480/3239]	Time 0.216 (0.557)	Data Time 0.001 (0.021)	Loss 3.2366 (3.2886)	Entropy 1.61904 (1.62318)	Top-1 acc 50.000 (45.798)	Top-5 acc 72.266 (69.261)	lr 0.02290
Train [23][1490/3239]	Time 0.231 (0.556)	Data Time 0.001 (0.021)	Loss 3.3245 (3.2888)	Entropy 1.61890 (1.62315)	Top-1 acc 44.531 (45.793)	Top-5 acc 69.922 (69.258)	lr 0.02290
Train [23][1500/3239]	Time 0.308 (0.581)	Data Time 0.003 (0.021)	Loss 3.1993 (3.2888)	Entropy 1.61884 (1.62312)	Top-1 acc 46.484 (45.787)	Top-5 acc 73.047 (69.258)	lr 0.02290
Train [23][1510/3239]	Time 0.239 (0.581)	Data Time 0.002 (0.021)	Loss 3.3149 (3.2891)	Entropy 1.61881 (1.62310)	Top-1 acc 46.484 (45.779)	Top-5 acc 68.359 (69.252)	lr 0.02290
Train [23][1520/3239]	Time 0.308 (0.580)	Data Time 0.001 (0.021)	Loss 3.2042 (3.2890)	Entropy 1.61869 (1.62307)	Top-1 acc 47.266 (45.779)	Top-5 acc 71.875 (69.263)	lr 0.02290
Train [23][1530/3239]	Time 0.211 (0.579)	Data Time 0.001 (0.020)	Loss 3.0491 (3.2889)	Entropy 1.61862 (1.62304)	Top-1 acc 48.047 (45.776)	Top-5 acc 74.609 (69.265)	lr 0.02290
Train [23][1540/3239]	Time 0.240 (0.579)	Data Time 0.001 (0.020)	Loss 3.3440 (3.2889)	Entropy 1.61863 (1.62301)	Top-1 acc 46.875 (45.785)	Top-5 acc 68.750 (69.268)	lr 0.02290
Train [23][1550/3239]	Time 2.448 (0.578)	Data Time 0.002 (0.020)	Loss 3.4156 (3.2887)	Entropy 1.61863 (1.62298)	Top-1 acc 44.141 (45.790)	Top-5 acc 64.453 (69.272)	lr 0.02290
Train [23][1560/3239]	Time 0.217 (0.576)	Data Time 0.001 (0.020)	Loss 3.1530 (3.2883)	Entropy 1.61853 (1.62295)	Top-1 acc 46.094 (45.806)	Top-5 acc 73.047 (69.285)	lr 0.02290
Train [23][1570/3239]	Time 0.203 (0.575)	Data Time 0.001 (0.020)	Loss 3.2928 (3.2888)	Entropy 1.61856 (1.62293)	Top-1 acc 44.531 (45.799)	Top-5 acc 70.312 (69.282)	lr 0.02290
Train [23][1580/3239]	Time 0.260 (0.574)	Data Time 0.001 (0.020)	Loss 3.2944 (3.2890)	Entropy 1.61850 (1.62290)	Top-1 acc 46.094 (45.796)	Top-5 acc 68.750 (69.276)	lr 0.02290
Train [23][1590/3239]	Time 0.216 (0.573)	Data Time 0.001 (0.020)	Loss 3.0924 (3.2891)	Entropy 1.61846 (1.62287)	Top-1 acc 52.344 (45.791)	Top-5 acc 71.875 (69.274)	lr 0.02290
Train [23][1600/3239]	Time 0.210 (0.572)	Data Time 0.001 (0.020)	Loss 3.1031 (3.2890)	Entropy 1.61833 (1.62284)	Top-1 acc 48.438 (45.795)	Top-5 acc 73.828 (69.278)	lr 0.02289
Train [23][1610/3239]	Time 0.212 (0.571)	Data Time 0.001 (0.019)	Loss 3.3837 (3.2890)	Entropy 1.61832 (1.62281)	Top-1 acc 39.844 (45.792)	Top-5 acc 67.969 (69.278)	lr 0.02289
Train [23][1620/3239]	Time 0.160 (0.571)	Data Time 0.001 (0.019)	Loss 3.3168 (3.2889)	Entropy 1.61840 (1.62279)	Top-1 acc 46.094 (45.797)	Top-5 acc 69.922 (69.278)	lr 0.02289
Train [23][1630/3239]	Time 0.224 (0.570)	Data Time 0.001 (0.019)	Loss 3.2641 (3.2889)	Entropy 1.61850 (1.62276)	Top-1 acc 42.578 (45.790)	Top-5 acc 73.047 (69.285)	lr 0.02289
Train [23][1640/3239]	Time 0.209 (0.569)	Data Time 0.001 (0.019)	Loss 3.3958 (3.2885)	Entropy 1.61841 (1.62273)	Top-1 acc 46.484 (45.800)	Top-5 acc 65.234 (69.293)	lr 0.02289
Train [23][1650/3239]	Time 0.313 (0.568)	Data Time 0.001 (0.019)	Loss 3.4347 (3.2887)	Entropy 1.61843 (1.62271)	Top-1 acc 40.625 (45.795)	Top-5 acc 66.797 (69.292)	lr 0.02289
Train [23][1660/3239]	Time 2.391 (0.568)	Data Time 0.001 (0.019)	Loss 3.2013 (3.2885)	Entropy 1.61843 (1.62268)	Top-1 acc 46.094 (45.799)	Top-5 acc 71.875 (69.296)	lr 0.02289
Train [23][1670/3239]	Time 0.213 (0.565)	Data Time 0.001 (0.019)	Loss 3.3307 (3.2882)	Entropy 1.61836 (1.62266)	Top-1 acc 46.875 (45.813)	Top-5 acc 67.969 (69.298)	lr 0.02289
Train [23][1680/3239]	Time 0.210 (0.565)	Data Time 0.001 (0.019)	Loss 3.4420 (3.2885)	Entropy 1.61830 (1.62263)	Top-1 acc 44.922 (45.805)	Top-5 acc 66.406 (69.288)	lr 0.02289
Train [23][1690/3239]	Time 0.220 (0.564)	Data Time 0.001 (0.019)	Loss 3.1436 (3.2887)	Entropy 1.61819 (1.62260)	Top-1 acc 46.094 (45.804)	Top-5 acc 76.172 (69.286)	lr 0.02289
Train [23][1700/3239]	Time 0.218 (0.563)	Data Time 0.001 (0.019)	Loss 3.4434 (3.2888)	Entropy 1.61813 (1.62258)	Top-1 acc 44.531 (45.803)	Top-5 acc 67.578 (69.284)	lr 0.02289
Train [23][1710/3239]	Time 0.208 (0.563)	Data Time 0.001 (0.018)	Loss 3.3648 (3.2887)	Entropy 1.61810 (1.62255)	Top-1 acc 42.188 (45.800)	Top-5 acc 67.969 (69.286)	lr 0.02289
Train [23][1720/3239]	Time 0.308 (0.562)	Data Time 0.001 (0.018)	Loss 3.3118 (3.2884)	Entropy 1.61795 (1.62253)	Top-1 acc 46.484 (45.810)	Top-5 acc 69.141 (69.293)	lr 0.02289
Train [23][1730/3239]	Time 0.265 (0.561)	Data Time 0.001 (0.018)	Loss 3.2307 (3.2887)	Entropy 1.61780 (1.62250)	Top-1 acc 48.828 (45.805)	Top-5 acc 71.875 (69.285)	lr 0.02289
Train [23][1740/3239]	Time 0.226 (0.560)	Data Time 0.001 (0.018)	Loss 3.2342 (3.2886)	Entropy 1.61774 (1.62247)	Top-1 acc 45.312 (45.811)	Top-5 acc 73.828 (69.288)	lr 0.02289
Train [23][1750/3239]	Time 0.247 (0.560)	Data Time 0.001 (0.018)	Loss 3.5194 (3.2885)	Entropy 1.61769 (1.62244)	Top-1 acc 42.578 (45.817)	Top-5 acc 63.672 (69.288)	lr 0.02289
Train [23][1760/3239]	Time 0.223 (0.559)	Data Time 0.001 (0.018)	Loss 3.3465 (3.2890)	Entropy 1.61761 (1.62242)	Top-1 acc 40.625 (45.804)	Top-5 acc 69.141 (69.279)	lr 0.02289
Train [23][1770/3239]	Time 2.349 (0.558)	Data Time 0.001 (0.018)	Loss 3.1789 (3.2885)	Entropy 1.61761 (1.62239)	Top-1 acc 47.656 (45.814)	Top-5 acc 70.703 (69.289)	lr 0.02288
Train [23][1780/3239]	Time 0.227 (0.557)	Data Time 0.002 (0.018)	Loss 3.3348 (3.2888)	Entropy 1.61746 (1.62236)	Top-1 acc 42.578 (45.807)	Top-5 acc 66.406 (69.284)	lr 0.02288
Train [23][1790/3239]	Time 0.219 (0.556)	Data Time 0.001 (0.018)	Loss 3.2105 (3.2888)	Entropy 1.61742 (1.62233)	Top-1 acc 46.875 (45.798)	Top-5 acc 70.703 (69.285)	lr 0.02288
Train [23][1800/3239]	Time 0.227 (0.555)	Data Time 0.002 (0.018)	Loss 3.2584 (3.2886)	Entropy 1.61728 (1.62231)	Top-1 acc 46.875 (45.806)	Top-5 acc 72.266 (69.290)	lr 0.02288
Train [23][1810/3239]	Time 0.206 (0.555)	Data Time 0.001 (0.018)	Loss 3.2876 (3.2886)	Entropy 1.61728 (1.62228)	Top-1 acc 47.266 (45.808)	Top-5 acc 68.750 (69.291)	lr 0.02288
Train [23][1820/3239]	Time 0.211 (0.554)	Data Time 0.001 (0.017)	Loss 3.4200 (3.2887)	Entropy 1.61728 (1.62225)	Top-1 acc 45.703 (45.805)	Top-5 acc 64.453 (69.286)	lr 0.02288
Train [23][1830/3239]	Time 0.305 (0.554)	Data Time 0.001 (0.017)	Loss 3.0999 (3.2886)	Entropy 1.61724 (1.62222)	Top-1 acc 51.562 (45.803)	Top-5 acc 73.047 (69.292)	lr 0.02288
Train [23][1840/3239]	Time 0.220 (0.553)	Data Time 0.001 (0.017)	Loss 3.2706 (3.2889)	Entropy 1.61713 (1.62220)	Top-1 acc 48.047 (45.804)	Top-5 acc 69.531 (69.288)	lr 0.02288
Train [23][1850/3239]	Time 0.216 (0.552)	Data Time 0.001 (0.017)	Loss 3.2552 (3.2889)	Entropy 1.61705 (1.62217)	Top-1 acc 44.922 (45.805)	Top-5 acc 70.703 (69.284)	lr 0.02288
Train [23][1860/3239]	Time 0.271 (0.573)	Data Time 0.004 (0.017)	Loss 3.6470 (3.2892)	Entropy 1.61698 (1.62214)	Top-1 acc 40.234 (45.803)	Top-5 acc 63.672 (69.277)	lr 0.02288
Train [23][1870/3239]	Time 0.193 (0.573)	Data Time 0.002 (0.017)	Loss 3.3836 (3.2892)	Entropy 1.61688 (1.62211)	Top-1 acc 44.141 (45.800)	Top-5 acc 67.578 (69.277)	lr 0.02288
Train [23][1880/3239]	Time 2.271 (0.572)	Data Time 0.001 (0.017)	Loss 3.4460 (3.2893)	Entropy 1.61688 (1.62209)	Top-1 acc 43.359 (45.795)	Top-5 acc 66.406 (69.277)	lr 0.02288
Train [23][1890/3239]	Time 0.338 (0.570)	Data Time 0.002 (0.017)	Loss 3.4111 (3.2892)	Entropy 1.61674 (1.62206)	Top-1 acc 44.922 (45.800)	Top-5 acc 66.797 (69.283)	lr 0.02288
Train [23][1900/3239]	Time 0.251 (0.569)	Data Time 0.002 (0.017)	Loss 3.3170 (3.2891)	Entropy 1.61671 (1.62203)	Top-1 acc 44.922 (45.804)	Top-5 acc 68.359 (69.284)	lr 0.02288
Train [23][1910/3239]	Time 0.201 (0.569)	Data Time 0.001 (0.017)	Loss 3.1541 (3.2892)	Entropy 1.61673 (1.62200)	Top-1 acc 50.391 (45.802)	Top-5 acc 71.875 (69.284)	lr 0.02288
Train [23][1920/3239]	Time 0.215 (0.568)	Data Time 0.001 (0.017)	Loss 3.4451 (3.2894)	Entropy 1.61666 (1.62197)	Top-1 acc 42.969 (45.799)	Top-5 acc 65.234 (69.279)	lr 0.02288
Train [23][1930/3239]	Time 0.219 (0.567)	Data Time 0.001 (0.017)	Loss 3.0640 (3.2889)	Entropy 1.61653 (1.62195)	Top-1 acc 53.906 (45.808)	Top-5 acc 73.438 (69.286)	lr 0.02288
Train [23][1940/3239]	Time 0.208 (0.567)	Data Time 0.001 (0.016)	Loss 3.3309 (3.2892)	Entropy 1.61649 (1.62192)	Top-1 acc 42.969 (45.804)	Top-5 acc 67.969 (69.278)	lr 0.02288
Train [23][1950/3239]	Time 0.297 (0.566)	Data Time 0.001 (0.016)	Loss 3.5267 (3.2890)	Entropy 1.61640 (1.62189)	Top-1 acc 39.453 (45.805)	Top-5 acc 66.016 (69.284)	lr 0.02287
Train [23][1960/3239]	Time 0.220 (0.565)	Data Time 0.002 (0.016)	Loss 3.3571 (3.2887)	Entropy 1.61626 (1.62186)	Top-1 acc 46.875 (45.811)	Top-5 acc 64.844 (69.292)	lr 0.02287
Train [23][1970/3239]	Time 0.172 (0.565)	Data Time 0.001 (0.016)	Loss 3.4182 (3.2887)	Entropy 1.61609 (1.62183)	Top-1 acc 41.016 (45.805)	Top-5 acc 68.359 (69.296)	lr 0.02287
Train [23][1980/3239]	Time 0.282 (0.564)	Data Time 0.001 (0.016)	Loss 3.1848 (3.2887)	Entropy 1.61610 (1.62181)	Top-1 acc 46.875 (45.809)	Top-5 acc 70.312 (69.296)	lr 0.02287
Train [23][1990/3239]	Time 2.270 (0.564)	Data Time 0.001 (0.016)	Loss 3.3356 (3.2887)	Entropy 1.61610 (1.62178)	Top-1 acc 47.656 (45.807)	Top-5 acc 67.969 (69.294)	lr 0.02287
Train [23][2000/3239]	Time 0.226 (0.562)	Data Time 0.001 (0.016)	Loss 3.2588 (3.2886)	Entropy 1.61605 (1.62175)	Top-1 acc 43.750 (45.804)	Top-5 acc 70.703 (69.300)	lr 0.02287
Train [23][2010/3239]	Time 0.222 (0.561)	Data Time 0.001 (0.016)	Loss 3.1872 (3.2886)	Entropy 1.61583 (1.62172)	Top-1 acc 45.312 (45.797)	Top-5 acc 71.484 (69.300)	lr 0.02287
Train [23][2020/3239]	Time 0.316 (0.561)	Data Time 0.001 (0.016)	Loss 3.7132 (3.2887)	Entropy 1.61572 (1.62169)	Top-1 acc 38.672 (45.795)	Top-5 acc 63.672 (69.297)	lr 0.02287
Train [23][2030/3239]	Time 0.213 (0.560)	Data Time 0.001 (0.016)	Loss 3.3178 (3.2886)	Entropy 1.61566 (1.62166)	Top-1 acc 40.625 (45.794)	Top-5 acc 69.922 (69.299)	lr 0.02287
Train [23][2040/3239]	Time 0.216 (0.559)	Data Time 0.001 (0.016)	Loss 3.1192 (3.2887)	Entropy 1.61528 (1.62163)	Top-1 acc 48.047 (45.787)	Top-5 acc 73.047 (69.304)	lr 0.02287
Train [23][2050/3239]	Time 0.207 (0.559)	Data Time 0.001 (0.016)	Loss 3.4828 (3.2890)	Entropy 1.61533 (1.62160)	Top-1 acc 39.453 (45.776)	Top-5 acc 64.844 (69.295)	lr 0.02287
Train [23][2060/3239]	Time 0.217 (0.558)	Data Time 0.001 (0.016)	Loss 3.3134 (3.2890)	Entropy 1.61525 (1.62157)	Top-1 acc 44.531 (45.775)	Top-5 acc 66.797 (69.294)	lr 0.02287
Train [23][2070/3239]	Time 0.211 (0.558)	Data Time 0.001 (0.016)	Loss 3.2476 (3.2891)	Entropy 1.61523 (1.62154)	Top-1 acc 48.047 (45.765)	Top-5 acc 72.656 (69.292)	lr 0.02287
Train [23][2080/3239]	Time 0.338 (0.557)	Data Time 0.025 (0.016)	Loss 3.2306 (3.2890)	Entropy 1.61523 (1.62151)	Top-1 acc 49.609 (45.772)	Top-5 acc 69.922 (69.292)	lr 0.02287
Train [23][2090/3239]	Time 0.202 (0.557)	Data Time 0.001 (0.015)	Loss 3.4178 (3.2892)	Entropy 1.61497 (1.62148)	Top-1 acc 44.922 (45.771)	Top-5 acc 67.578 (69.285)	lr 0.02287
Train [23][2100/3239]	Time 2.343 (0.556)	Data Time 0.001 (0.015)	Loss 3.0275 (3.2888)	Entropy 1.61497 (1.62145)	Top-1 acc 51.172 (45.782)	Top-5 acc 75.000 (69.296)	lr 0.02287
Train [23][2110/3239]	Time 0.204 (0.555)	Data Time 0.001 (0.015)	Loss 3.1733 (3.2891)	Entropy 1.61488 (1.62141)	Top-1 acc 51.953 (45.778)	Top-5 acc 73.438 (69.289)	lr 0.02287
Train [23][2120/3239]	Time 0.227 (0.554)	Data Time 0.002 (0.015)	Loss 3.1313 (3.2891)	Entropy 1.61469 (1.62138)	Top-1 acc 48.047 (45.776)	Top-5 acc 72.656 (69.289)	lr 0.02287
Train [23][2130/3239]	Time 0.162 (0.554)	Data Time 0.001 (0.015)	Loss 3.3687 (3.2888)	Entropy 1.61471 (1.62135)	Top-1 acc 41.797 (45.784)	Top-5 acc 68.359 (69.295)	lr 0.02286
Train [23][2140/3239]	Time 0.196 (0.553)	Data Time 0.001 (0.015)	Loss 3.1758 (3.2887)	Entropy 1.61469 (1.62132)	Top-1 acc 51.953 (45.788)	Top-5 acc 72.656 (69.297)	lr 0.02286
Train [23][2150/3239]	Time 0.214 (0.553)	Data Time 0.001 (0.015)	Loss 3.4151 (3.2885)	Entropy 1.61468 (1.62129)	Top-1 acc 43.359 (45.791)	Top-5 acc 67.578 (69.301)	lr 0.02286
Train [23][2160/3239]	Time 0.201 (0.552)	Data Time 0.001 (0.015)	Loss 3.3483 (3.2885)	Entropy 1.61462 (1.62126)	Top-1 acc 48.047 (45.793)	Top-5 acc 67.969 (69.298)	lr 0.02286
Train [23][2170/3239]	Time 0.204 (0.552)	Data Time 0.001 (0.015)	Loss 3.1543 (3.2881)	Entropy 1.61460 (1.62123)	Top-1 acc 48.047 (45.799)	Top-5 acc 73.047 (69.301)	lr 0.02286
Train [23][2180/3239]	Time 0.226 (0.551)	Data Time 0.001 (0.015)	Loss 3.3571 (3.2880)	Entropy 1.61451 (1.62120)	Top-1 acc 43.750 (45.797)	Top-5 acc 67.188 (69.304)	lr 0.02286
Train [23][2190/3239]	Time 0.184 (0.551)	Data Time 0.001 (0.015)	Loss 3.4129 (3.2881)	Entropy 1.61441 (1.62117)	Top-1 acc 42.188 (45.794)	Top-5 acc 67.969 (69.307)	lr 0.02286
Train [23][2200/3239]	Time 0.132 (0.550)	Data Time 0.001 (0.015)	Loss 3.4502 (3.2880)	Entropy 1.61437 (1.62114)	Top-1 acc 42.188 (45.795)	Top-5 acc 68.359 (69.309)	lr 0.02286
Train [23][2210/3239]	Time 2.355 (0.550)	Data Time 0.001 (0.015)	Loss 3.2522 (3.2879)	Entropy 1.61437 (1.62111)	Top-1 acc 46.875 (45.800)	Top-5 acc 69.922 (69.314)	lr 0.02286
Train [23][2220/3239]	Time 0.182 (0.548)	Data Time 0.002 (0.015)	Loss 3.3774 (3.2879)	Entropy 1.61428 (1.62107)	Top-1 acc 38.672 (45.796)	Top-5 acc 66.797 (69.318)	lr 0.02286
Train [23][2230/3239]	Time 0.206 (0.568)	Data Time 0.002 (0.015)	Loss 3.4137 (3.2881)	Entropy 1.61425 (1.62104)	Top-1 acc 43.359 (45.787)	Top-5 acc 66.016 (69.311)	lr 0.02286
Train [23][2240/3239]	Time 0.194 (0.567)	Data Time 0.002 (0.015)	Loss 3.3858 (3.2880)	Entropy 1.61418 (1.62101)	Top-1 acc 42.578 (45.795)	Top-5 acc 66.797 (69.313)	lr 0.02286
Train [23][2250/3239]	Time 0.211 (0.567)	Data Time 0.002 (0.015)	Loss 3.0614 (3.2877)	Entropy 1.61417 (1.62098)	Top-1 acc 52.344 (45.800)	Top-5 acc 73.438 (69.320)	lr 0.02286
Train [23][2260/3239]	Time 0.204 (0.566)	Data Time 0.001 (0.014)	Loss 2.9722 (3.2877)	Entropy 1.61415 (1.62095)	Top-1 acc 53.516 (45.801)	Top-5 acc 76.953 (69.320)	lr 0.02286
Train [23][2270/3239]	Time 0.251 (0.566)	Data Time 0.002 (0.014)	Loss 3.2682 (3.2877)	Entropy 1.61412 (1.62092)	Top-1 acc 46.094 (45.804)	Top-5 acc 69.141 (69.323)	lr 0.02286
Train [23][2280/3239]	Time 0.186 (0.565)	Data Time 0.001 (0.014)	Loss 3.2675 (3.2876)	Entropy 1.61405 (1.62089)	Top-1 acc 45.703 (45.801)	Top-5 acc 68.359 (69.321)	lr 0.02286
Train [23][2290/3239]	Time 0.246 (0.565)	Data Time 0.001 (0.014)	Loss 3.3114 (3.2876)	Entropy 1.61397 (1.62086)	Top-1 acc 48.047 (45.802)	Top-5 acc 69.922 (69.323)	lr 0.02286
Train [23][2300/3239]	Time 0.215 (0.564)	Data Time 0.001 (0.014)	Loss 3.2736 (3.2876)	Entropy 1.61384 (1.62083)	Top-1 acc 47.656 (45.804)	Top-5 acc 71.094 (69.322)	lr 0.02286
Train [23][2310/3239]	Time 0.219 (0.564)	Data Time 0.001 (0.014)	Loss 3.1048 (3.2876)	Entropy 1.61383 (1.62080)	Top-1 acc 48.828 (45.806)	Top-5 acc 71.484 (69.325)	lr 0.02285
Train [23][2320/3239]	Time 2.366 (0.563)	Data Time 0.001 (0.014)	Loss 3.4397 (3.2879)	Entropy 1.61383 (1.62077)	Top-1 acc 40.625 (45.798)	Top-5 acc 65.234 (69.317)	lr 0.02285
Train [23][2330/3239]	Time 0.203 (0.562)	Data Time 0.001 (0.014)	Loss 3.2071 (3.2879)	Entropy 1.61378 (1.62074)	Top-1 acc 52.344 (45.803)	Top-5 acc 71.875 (69.320)	lr 0.02285
Train [23][2340/3239]	Time 0.232 (0.561)	Data Time 0.001 (0.014)	Loss 3.2026 (3.2879)	Entropy 1.61380 (1.62071)	Top-1 acc 47.656 (45.800)	Top-5 acc 70.703 (69.321)	lr 0.02285
Train [23][2350/3239]	Time 0.218 (0.561)	Data Time 0.001 (0.014)	Loss 3.2735 (3.2879)	Entropy 1.61369 (1.62068)	Top-1 acc 44.922 (45.800)	Top-5 acc 70.703 (69.320)	lr 0.02285
Train [23][2360/3239]	Time 0.196 (0.560)	Data Time 0.001 (0.014)	Loss 3.3726 (3.2880)	Entropy 1.61368 (1.62065)	Top-1 acc 43.750 (45.799)	Top-5 acc 66.797 (69.314)	lr 0.02285
Train [23][2370/3239]	Time 0.212 (0.560)	Data Time 0.001 (0.014)	Loss 3.3896 (3.2880)	Entropy 1.61347 (1.62062)	Top-1 acc 40.625 (45.800)	Top-5 acc 67.969 (69.314)	lr 0.02285
Train [23][2380/3239]	Time 0.280 (0.559)	Data Time 0.001 (0.014)	Loss 3.2520 (3.2878)	Entropy 1.61335 (1.62059)	Top-1 acc 46.875 (45.805)	Top-5 acc 68.750 (69.317)	lr 0.02285
Train [23][2390/3239]	Time 0.198 (0.559)	Data Time 0.002 (0.014)	Loss 3.3307 (3.2877)	Entropy 1.61323 (1.62056)	Top-1 acc 43.750 (45.806)	Top-5 acc 69.141 (69.320)	lr 0.02285
Train [23][2400/3239]	Time 0.188 (0.558)	Data Time 0.001 (0.014)	Loss 3.4048 (3.2878)	Entropy 1.61319 (1.62053)	Top-1 acc 44.922 (45.801)	Top-5 acc 68.750 (69.321)	lr 0.02285
Train [23][2410/3239]	Time 0.217 (0.558)	Data Time 0.001 (0.014)	Loss 3.3267 (3.2877)	Entropy 1.61314 (1.62050)	Top-1 acc 44.141 (45.800)	Top-5 acc 69.531 (69.318)	lr 0.02285
Train [23][2420/3239]	Time 0.234 (0.557)	Data Time 0.001 (0.014)	Loss 3.2389 (3.2876)	Entropy 1.61309 (1.62047)	Top-1 acc 44.922 (45.801)	Top-5 acc 71.484 (69.321)	lr 0.02285
Train [23][2430/3239]	Time 2.300 (0.557)	Data Time 0.001 (0.014)	Loss 3.0655 (3.2875)	Entropy 1.61309 (1.62044)	Top-1 acc 52.734 (45.806)	Top-5 acc 72.656 (69.329)	lr 0.02285
Train [23][2440/3239]	Time 0.214 (0.555)	Data Time 0.005 (0.014)	Loss 3.2163 (3.2875)	Entropy 1.61304 (1.62041)	Top-1 acc 48.047 (45.807)	Top-5 acc 73.438 (69.329)	lr 0.02285
Train [23][2450/3239]	Time 0.209 (0.555)	Data Time 0.001 (0.014)	Loss 3.2184 (3.2875)	Entropy 1.61301 (1.62038)	Top-1 acc 46.484 (45.803)	Top-5 acc 70.312 (69.327)	lr 0.02285
Train [23][2460/3239]	Time 0.207 (0.554)	Data Time 0.001 (0.013)	Loss 3.4799 (3.2876)	Entropy 1.61292 (1.62035)	Top-1 acc 41.797 (45.802)	Top-5 acc 66.406 (69.329)	lr 0.02285
Train [23][2470/3239]	Time 0.209 (0.554)	Data Time 0.001 (0.013)	Loss 3.4311 (3.2878)	Entropy 1.61276 (1.62032)	Top-1 acc 40.625 (45.801)	Top-5 acc 67.969 (69.326)	lr 0.02285
Train [23][2480/3239]	Time 0.233 (0.553)	Data Time 0.001 (0.013)	Loss 3.4761 (3.2879)	Entropy 1.61271 (1.62029)	Top-1 acc 40.234 (45.796)	Top-5 acc 65.625 (69.324)	lr 0.02284
Train [23][2490/3239]	Time 0.229 (0.553)	Data Time 0.001 (0.013)	Loss 3.3117 (3.2880)	Entropy 1.61252 (1.62026)	Top-1 acc 45.703 (45.798)	Top-5 acc 69.141 (69.324)	lr 0.02284
Train [23][2500/3239]	Time 0.212 (0.553)	Data Time 0.001 (0.013)	Loss 3.2160 (3.2883)	Entropy 1.61258 (1.62023)	Top-1 acc 42.188 (45.791)	Top-5 acc 69.531 (69.321)	lr 0.02284
Train [23][2510/3239]	Time 0.248 (0.552)	Data Time 0.001 (0.013)	Loss 3.2450 (3.2883)	Entropy 1.61237 (1.62020)	Top-1 acc 48.438 (45.792)	Top-5 acc 68.359 (69.325)	lr 0.02284
Train [23][2520/3239]	Time 0.256 (0.552)	Data Time 0.001 (0.013)	Loss 3.3785 (3.2885)	Entropy 1.61222 (1.62017)	Top-1 acc 43.359 (45.789)	Top-5 acc 66.406 (69.320)	lr 0.02284
Train [23][2530/3239]	Time 0.232 (0.551)	Data Time 0.001 (0.013)	Loss 3.2591 (3.2884)	Entropy 1.61216 (1.62013)	Top-1 acc 47.656 (45.791)	Top-5 acc 67.969 (69.322)	lr 0.02284
Train [23][2540/3239]	Time 2.352 (0.551)	Data Time 0.001 (0.013)	Loss 3.2276 (3.2883)	Entropy 1.61216 (1.62010)	Top-1 acc 45.703 (45.788)	Top-5 acc 69.531 (69.324)	lr 0.02284
Train [23][2550/3239]	Time 0.261 (0.550)	Data Time 0.001 (0.013)	Loss 3.2247 (3.2887)	Entropy 1.61207 (1.62007)	Top-1 acc 47.656 (45.782)	Top-5 acc 69.141 (69.315)	lr 0.02284
Train [23][2560/3239]	Time 0.195 (0.549)	Data Time 0.001 (0.013)	Loss 3.1611 (3.2889)	Entropy 1.61196 (1.62004)	Top-1 acc 46.875 (45.779)	Top-5 acc 72.656 (69.312)	lr 0.02284
Train [23][2570/3239]	Time 0.341 (0.549)	Data Time 0.001 (0.013)	Loss 3.2679 (3.2889)	Entropy 1.61180 (1.62001)	Top-1 acc 46.875 (45.780)	Top-5 acc 69.531 (69.311)	lr 0.02284
Train [23][2580/3239]	Time 0.210 (0.548)	Data Time 0.001 (0.013)	Loss 3.1948 (3.2887)	Entropy 1.61171 (1.61998)	Top-1 acc 45.703 (45.782)	Top-5 acc 71.094 (69.317)	lr 0.02284
Train [23][2590/3239]	Time 0.167 (0.565)	Data Time 0.002 (0.013)	Loss 3.3008 (3.2891)	Entropy 1.61172 (1.61994)	Top-1 acc 42.969 (45.772)	Top-5 acc 70.703 (69.308)	lr 0.02284
Train [23][2600/3239]	Time 0.147 (0.564)	Data Time 0.002 (0.013)	Loss 3.2418 (3.2890)	Entropy 1.61169 (1.61991)	Top-1 acc 48.828 (45.777)	Top-5 acc 72.656 (69.310)	lr 0.02284
Train [23][2610/3239]	Time 0.223 (0.564)	Data Time 0.002 (0.013)	Loss 3.2477 (3.2891)	Entropy 1.61152 (1.61988)	Top-1 acc 50.000 (45.781)	Top-5 acc 70.703 (69.310)	lr 0.02284
Train [23][2620/3239]	Time 0.222 (0.563)	Data Time 0.001 (0.013)	Loss 3.3485 (3.2891)	Entropy 1.61147 (1.61985)	Top-1 acc 42.969 (45.776)	Top-5 acc 65.234 (69.309)	lr 0.02284
Train [23][2630/3239]	Time 0.293 (0.563)	Data Time 0.001 (0.013)	Loss 3.2427 (3.2888)	Entropy 1.61143 (1.61982)	Top-1 acc 47.656 (45.783)	Top-5 acc 70.312 (69.313)	lr 0.02284
Train [23][2640/3239]	Time 0.239 (0.563)	Data Time 0.001 (0.013)	Loss 3.3136 (3.2891)	Entropy 1.61141 (1.61978)	Top-1 acc 44.922 (45.777)	Top-5 acc 66.406 (69.305)	lr 0.02284
Train [23][2650/3239]	Time 0.197 (0.562)	Data Time 0.001 (0.013)	Loss 3.2057 (3.2890)	Entropy 1.61137 (1.61975)	Top-1 acc 44.531 (45.779)	Top-5 acc 73.438 (69.307)	lr 0.02284
Train [23][2660/3239]	Time 0.228 (0.562)	Data Time 0.002 (0.013)	Loss 3.1238 (3.2891)	Entropy 1.61127 (1.61972)	Top-1 acc 50.000 (45.778)	Top-5 acc 73.047 (69.306)	lr 0.02283
Train [23][2670/3239]	Time 0.221 (0.561)	Data Time 0.001 (0.013)	Loss 3.1808 (3.2892)	Entropy 1.61129 (1.61969)	Top-1 acc 48.438 (45.782)	Top-5 acc 70.703 (69.302)	lr 0.02283
Train [23][2680/3239]	Time 0.217 (0.561)	Data Time 0.001 (0.013)	Loss 3.1874 (3.2890)	Entropy 1.61125 (1.61966)	Top-1 acc 46.094 (45.784)	Top-5 acc 71.875 (69.308)	lr 0.02283
Train [23][2690/3239]	Time 0.200 (0.560)	Data Time 0.001 (0.012)	Loss 3.5893 (3.2893)	Entropy 1.61122 (1.61963)	Top-1 acc 41.406 (45.779)	Top-5 acc 62.891 (69.303)	lr 0.02283
Train [23][2700/3239]	Time 0.217 (0.560)	Data Time 0.001 (0.012)	Loss 3.2497 (3.2893)	Entropy 1.61107 (1.61960)	Top-1 acc 48.438 (45.782)	Top-5 acc 70.312 (69.300)	lr 0.02283
Train [23][2710/3239]	Time 0.213 (0.559)	Data Time 0.001 (0.012)	Loss 3.3061 (3.2892)	Entropy 1.61101 (1.61956)	Top-1 acc 46.484 (45.784)	Top-5 acc 69.141 (69.299)	lr 0.02283
Train [23][2720/3239]	Time 0.261 (0.559)	Data Time 0.001 (0.012)	Loss 3.3693 (3.2891)	Entropy 1.61092 (1.61953)	Top-1 acc 41.797 (45.783)	Top-5 acc 72.266 (69.304)	lr 0.02283
Train [23][2730/3239]	Time 0.314 (0.559)	Data Time 0.002 (0.012)	Loss 3.1212 (3.2889)	Entropy 1.61085 (1.61950)	Top-1 acc 48.438 (45.788)	Top-5 acc 70.703 (69.311)	lr 0.02283
Train [23][2740/3239]	Time 0.246 (0.558)	Data Time 0.001 (0.012)	Loss 3.1532 (3.2887)	Entropy 1.61077 (1.61947)	Top-1 acc 47.656 (45.793)	Top-5 acc 71.484 (69.315)	lr 0.02283
Train [23][2750/3239]	Time 0.355 (0.558)	Data Time 0.001 (0.012)	Loss 3.4365 (3.2890)	Entropy 1.61076 (1.61944)	Top-1 acc 45.703 (45.789)	Top-5 acc 66.797 (69.308)	lr 0.02283
Train [23][2760/3239]	Time 0.207 (0.557)	Data Time 0.001 (0.012)	Loss 3.2370 (3.2889)	Entropy 1.61074 (1.61941)	Top-1 acc 46.875 (45.792)	Top-5 acc 70.703 (69.310)	lr 0.02283
Train [23][2770/3239]	Time 0.240 (0.557)	Data Time 0.001 (0.012)	Loss 3.4539 (3.2890)	Entropy 1.61066 (1.61938)	Top-1 acc 43.359 (45.793)	Top-5 acc 67.188 (69.310)	lr 0.02283
Train [23][2780/3239]	Time 0.243 (0.556)	Data Time 0.001 (0.012)	Loss 3.4502 (3.2891)	Entropy 1.61067 (1.61934)	Top-1 acc 39.453 (45.794)	Top-5 acc 64.844 (69.310)	lr 0.02283
Train [23][2790/3239]	Time 0.319 (0.556)	Data Time 0.002 (0.012)	Loss 3.4724 (3.2892)	Entropy 1.61039 (1.61931)	Top-1 acc 43.750 (45.790)	Top-5 acc 64.453 (69.307)	lr 0.02283
Train [23][2800/3239]	Time 0.239 (0.556)	Data Time 0.001 (0.012)	Loss 3.3006 (3.2893)	Entropy 1.61036 (1.61928)	Top-1 acc 48.047 (45.788)	Top-5 acc 69.531 (69.302)	lr 0.02283
Train [23][2810/3239]	Time 0.339 (0.555)	Data Time 0.001 (0.012)	Loss 3.1277 (3.2895)	Entropy 1.61018 (1.61925)	Top-1 acc 47.656 (45.782)	Top-5 acc 72.266 (69.299)	lr 0.02283
Train [23][2820/3239]	Time 0.211 (0.555)	Data Time 0.001 (0.012)	Loss 3.3172 (3.2896)	Entropy 1.61010 (1.61922)	Top-1 acc 42.578 (45.773)	Top-5 acc 70.312 (69.296)	lr 0.02283
Train [23][2830/3239]	Time 0.279 (0.555)	Data Time 0.001 (0.012)	Loss 3.2358 (3.2899)	Entropy 1.61006 (1.61919)	Top-1 acc 44.531 (45.768)	Top-5 acc 70.312 (69.289)	lr 0.02282
Train [23][2840/3239]	Time 0.189 (0.554)	Data Time 0.001 (0.012)	Loss 3.2014 (3.2898)	Entropy 1.61003 (1.61915)	Top-1 acc 46.094 (45.769)	Top-5 acc 70.312 (69.293)	lr 0.02282
Train [23][2850/3239]	Time 0.224 (0.554)	Data Time 0.001 (0.012)	Loss 3.2934 (3.2900)	Entropy 1.60992 (1.61912)	Top-1 acc 48.438 (45.768)	Top-5 acc 67.578 (69.290)	lr 0.02282
Train [23][2860/3239]	Time 0.210 (0.553)	Data Time 0.001 (0.012)	Loss 3.2323 (3.2901)	Entropy 1.60983 (1.61909)	Top-1 acc 46.484 (45.767)	Top-5 acc 69.531 (69.289)	lr 0.02282
Train [23][2870/3239]	Time 0.277 (0.553)	Data Time 0.001 (0.012)	Loss 3.2348 (3.2900)	Entropy 1.60978 (1.61906)	Top-1 acc 48.438 (45.767)	Top-5 acc 71.484 (69.293)	lr 0.02282
Train [23][2880/3239]	Time 0.206 (0.553)	Data Time 0.001 (0.012)	Loss 3.1121 (3.2898)	Entropy 1.60962 (1.61902)	Top-1 acc 52.344 (45.771)	Top-5 acc 69.141 (69.298)	lr 0.02282
Train [23][2890/3239]	Time 0.205 (0.552)	Data Time 0.001 (0.012)	Loss 3.2909 (3.2898)	Entropy 1.60935 (1.61899)	Top-1 acc 45.312 (45.772)	Top-5 acc 67.969 (69.300)	lr 0.02282
Train [23][2900/3239]	Time 0.247 (0.552)	Data Time 0.001 (0.012)	Loss 3.4490 (3.2899)	Entropy 1.60930 (1.61896)	Top-1 acc 42.969 (45.774)	Top-5 acc 64.453 (69.299)	lr 0.02282
Train [23][2910/3239]	Time 0.273 (0.551)	Data Time 0.001 (0.012)	Loss 3.0495 (3.2900)	Entropy 1.60922 (1.61893)	Top-1 acc 47.266 (45.772)	Top-5 acc 72.656 (69.295)	lr 0.02282
Train [23][2920/3239]	Time 0.296 (0.564)	Data Time 0.004 (0.012)	Loss 3.1959 (3.2900)	Entropy 1.60919 (1.61889)	Top-1 acc 45.703 (45.773)	Top-5 acc 71.484 (69.298)	lr 0.02282
Train [23][2930/3239]	Time 0.205 (0.564)	Data Time 0.002 (0.012)	Loss 3.3130 (3.2900)	Entropy 1.60925 (1.61886)	Top-1 acc 41.406 (45.775)	Top-5 acc 66.406 (69.300)	lr 0.02282
Train [23][2940/3239]	Time 0.216 (0.564)	Data Time 0.001 (0.012)	Loss 3.4384 (3.2899)	Entropy 1.60918 (1.61883)	Top-1 acc 44.531 (45.777)	Top-5 acc 65.234 (69.301)	lr 0.02282
Train [23][2950/3239]	Time 0.159 (0.563)	Data Time 0.001 (0.012)	Loss 3.3447 (3.2902)	Entropy 1.60915 (1.61879)	Top-1 acc 46.484 (45.771)	Top-5 acc 67.578 (69.294)	lr 0.02282
Train [23][2960/3239]	Time 0.207 (0.563)	Data Time 0.001 (0.012)	Loss 3.2935 (3.2901)	Entropy 1.60908 (1.61876)	Top-1 acc 48.047 (45.773)	Top-5 acc 70.312 (69.299)	lr 0.02282
Train [23][2970/3239]	Time 0.208 (0.562)	Data Time 0.001 (0.011)	Loss 3.2974 (3.2901)	Entropy 1.60900 (1.61873)	Top-1 acc 45.703 (45.777)	Top-5 acc 67.969 (69.303)	lr 0.02282
Train [23][2980/3239]	Time 0.311 (0.562)	Data Time 0.002 (0.011)	Loss 3.1420 (3.2900)	Entropy 1.60891 (1.61870)	Top-1 acc 52.734 (45.778)	Top-5 acc 74.609 (69.302)	lr 0.02282
Train [23][2990/3239]	Time 0.234 (0.561)	Data Time 0.001 (0.011)	Loss 3.4139 (3.2902)	Entropy 1.60879 (1.61866)	Top-1 acc 42.578 (45.774)	Top-5 acc 67.188 (69.300)	lr 0.02282
Train [23][3000/3239]	Time 0.223 (0.561)	Data Time 0.001 (0.011)	Loss 3.3991 (3.2902)	Entropy 1.60865 (1.61863)	Top-1 acc 41.797 (45.773)	Top-5 acc 70.312 (69.302)	lr 0.02282
Train [23][3010/3239]	Time 0.222 (0.561)	Data Time 0.001 (0.011)	Loss 3.1740 (3.2902)	Entropy 1.60861 (1.61860)	Top-1 acc 50.781 (45.771)	Top-5 acc 71.875 (69.302)	lr 0.02281
Train [23][3020/3239]	Time 0.227 (0.560)	Data Time 0.001 (0.011)	Loss 3.3927 (3.2900)	Entropy 1.60848 (1.61856)	Top-1 acc 44.531 (45.776)	Top-5 acc 67.578 (69.306)	lr 0.02281
Train [23][3030/3239]	Time 0.243 (0.560)	Data Time 0.029 (0.011)	Loss 3.3750 (3.2901)	Entropy 1.60843 (1.61853)	Top-1 acc 45.703 (45.777)	Top-5 acc 68.359 (69.307)	lr 0.02281
Train [23][3040/3239]	Time 0.349 (0.559)	Data Time 0.001 (0.011)	Loss 3.1231 (3.2904)	Entropy 1.60835 (1.61850)	Top-1 acc 47.656 (45.769)	Top-5 acc 72.656 (69.300)	lr 0.02281
Train [23][3050/3239]	Time 0.213 (0.559)	Data Time 0.002 (0.011)	Loss 3.3432 (3.2904)	Entropy 1.60824 (1.61846)	Top-1 acc 47.656 (45.769)	Top-5 acc 66.016 (69.298)	lr 0.02281
Train [23][3060/3239]	Time 0.258 (0.559)	Data Time 0.001 (0.011)	Loss 3.4183 (3.2903)	Entropy 1.60825 (1.61843)	Top-1 acc 40.625 (45.767)	Top-5 acc 66.016 (69.302)	lr 0.02281
Train [23][3070/3239]	Time 0.220 (0.558)	Data Time 0.001 (0.011)	Loss 3.3878 (3.2903)	Entropy 1.60818 (1.61840)	Top-1 acc 43.359 (45.770)	Top-5 acc 64.453 (69.301)	lr 0.02281
Train [23][3080/3239]	Time 0.166 (0.558)	Data Time 0.001 (0.011)	Loss 3.3153 (3.2905)	Entropy 1.60818 (1.61836)	Top-1 acc 46.484 (45.765)	Top-5 acc 67.578 (69.297)	lr 0.02281
Train [23][3090/3239]	Time 0.262 (0.558)	Data Time 0.001 (0.011)	Loss 3.4142 (3.2907)	Entropy 1.60819 (1.61833)	Top-1 acc 42.578 (45.762)	Top-5 acc 67.578 (69.292)	lr 0.02281
Train [23][3100/3239]	Time 0.201 (0.557)	Data Time 0.001 (0.011)	Loss 3.5162 (3.2906)	Entropy 1.60803 (1.61830)	Top-1 acc 43.750 (45.770)	Top-5 acc 64.844 (69.296)	lr 0.02281
Train [23][3110/3239]	Time 0.215 (0.557)	Data Time 0.001 (0.011)	Loss 3.0947 (3.2905)	Entropy 1.60785 (1.61827)	Top-1 acc 53.125 (45.772)	Top-5 acc 76.953 (69.294)	lr 0.02281
Train [23][3120/3239]	Time 0.215 (0.556)	Data Time 0.001 (0.011)	Loss 3.2934 (3.2904)	Entropy 1.60784 (1.61823)	Top-1 acc 44.141 (45.775)	Top-5 acc 68.750 (69.296)	lr 0.02281
Train [23][3130/3239]	Time 0.201 (0.556)	Data Time 0.001 (0.011)	Loss 3.3027 (3.2903)	Entropy 1.60781 (1.61820)	Top-1 acc 42.188 (45.774)	Top-5 acc 66.797 (69.296)	lr 0.02281
Train [23][3140/3239]	Time 0.255 (0.556)	Data Time 0.001 (0.011)	Loss 3.3760 (3.2906)	Entropy 1.60765 (1.61817)	Top-1 acc 44.141 (45.770)	Top-5 acc 64.844 (69.293)	lr 0.02281
Train [23][3150/3239]	Time 0.206 (0.555)	Data Time 0.001 (0.011)	Loss 3.3193 (3.2907)	Entropy 1.60760 (1.61813)	Top-1 acc 43.359 (45.769)	Top-5 acc 69.922 (69.292)	lr 0.02281
Train [23][3160/3239]	Time 0.359 (0.555)	Data Time 0.001 (0.011)	Loss 3.3121 (3.2906)	Entropy 1.60761 (1.61810)	Top-1 acc 46.875 (45.771)	Top-5 acc 67.969 (69.293)	lr 0.02281
Train [23][3170/3239]	Time 0.211 (0.555)	Data Time 0.001 (0.011)	Loss 3.1482 (3.2904)	Entropy 1.60753 (1.61807)	Top-1 acc 49.219 (45.774)	Top-5 acc 71.875 (69.295)	lr 0.02281
Train [23][3180/3239]	Time 0.225 (0.554)	Data Time 0.000 (0.011)	Loss 3.0448 (3.2904)	Entropy 1.60750 (1.61803)	Top-1 acc 47.266 (45.771)	Top-5 acc 75.781 (69.297)	lr 0.02281
Train [23][3190/3239]	Time 0.212 (0.554)	Data Time 0.000 (0.011)	Loss 3.2573 (3.2903)	Entropy 1.60740 (1.61800)	Top-1 acc 47.266 (45.772)	Top-5 acc 69.922 (69.299)	lr 0.02280
Train [23][3200/3239]	Time 0.235 (0.553)	Data Time 0.000 (0.011)	Loss 3.1334 (3.2902)	Entropy 1.60735 (1.61797)	Top-1 acc 51.562 (45.775)	Top-5 acc 72.266 (69.302)	lr 0.02280
Train [23][3210/3239]	Time 0.219 (0.553)	Data Time 0.000 (0.011)	Loss 3.3069 (3.2904)	Entropy 1.60724 (1.61793)	Top-1 acc 43.359 (45.773)	Top-5 acc 68.359 (69.297)	lr 0.02280
Train [23][3220/3239]	Time 0.299 (0.553)	Data Time 0.000 (0.011)	Loss 3.3543 (3.2904)	Entropy 1.60711 (1.61790)	Top-1 acc 44.922 (45.771)	Top-5 acc 67.969 (69.300)	lr 0.02280
Train [23][3230/3239]	Time 0.185 (0.552)	Data Time 0.000 (0.011)	Loss 3.3927 (3.2903)	Entropy 1.60701 (1.61787)	Top-1 acc 42.188 (45.771)	Top-5 acc 67.188 (69.303)	lr 0.02280
Train [23][3239/3239]	Time 2.119 (0.552)	Data Time 0.000 (0.011)	Loss 3.3395 (3.2902)	Entropy 1.60701 (1.61784)	Top-1 acc 48.148 (45.775)	Top-5 acc 69.136 (69.304)	lr 0.02280
==========Valid [23/120]	loss 2.069	top-1 acc 54.323 (54.323)	top-5 acc 77.751	Train top-1 45.775	top-5 69.304	Entropy 1.60701	Latency-None: 0.000ms	Flops: 560.63M
Train [24][0/3239]	Time 30.898 (30.898)	Data Time 28.912 (28.912)	Loss 3.1189 (3.1189)	Entropy 1.60700 (1.60700)	Top-1 acc 53.516 (53.516)	Top-5 acc 73.438 (73.438)	lr 0.02280
Train [24][10/3239]	Time 44.883 (7.138)	Data Time 0.027 (2.633)	Loss 3.1396 (3.2580)	Entropy 1.60700 (1.60700)	Top-1 acc 45.703 (46.342)	Top-5 acc 71.484 (69.709)	lr 0.02280
Train [24][20/3239]	Time 0.202 (3.901)	Data Time 0.002 (1.381)	Loss 3.1425 (3.2617)	Entropy 1.60698 (1.60699)	Top-1 acc 48.438 (46.168)	Top-5 acc 73.438 (69.587)	lr 0.02280
Train [24][30/3239]	Time 0.230 (2.791)	Data Time 0.002 (0.936)	Loss 3.3020 (3.2881)	Entropy 1.60693 (1.60697)	Top-1 acc 44.141 (45.678)	Top-5 acc 65.625 (68.926)	lr 0.02280
Train [24][40/3239]	Time 0.217 (2.217)	Data Time 0.001 (0.709)	Loss 3.1231 (3.2720)	Entropy 1.60673 (1.60692)	Top-1 acc 46.875 (45.989)	Top-5 acc 70.312 (69.484)	lr 0.02280
Train [24][50/3239]	Time 0.298 (1.870)	Data Time 0.001 (0.570)	Loss 3.0617 (3.2739)	Entropy 1.60672 (1.60688)	Top-1 acc 50.781 (45.841)	Top-5 acc 75.781 (69.616)	lr 0.02280
Train [24][60/3239]	Time 0.190 (1.637)	Data Time 0.001 (0.477)	Loss 3.2397 (3.2645)	Entropy 1.60666 (1.60685)	Top-1 acc 47.656 (46.030)	Top-5 acc 73.438 (69.839)	lr 0.02280
Train [24][70/3239]	Time 0.218 (1.468)	Data Time 0.001 (0.410)	Loss 3.3286 (3.2723)	Entropy 1.60663 (1.60682)	Top-1 acc 47.266 (45.934)	Top-5 acc 68.750 (69.691)	lr 0.02280
Train [24][80/3239]	Time 0.225 (1.341)	Data Time 0.001 (0.360)	Loss 3.0649 (3.2766)	Entropy 1.60651 (1.60679)	Top-1 acc 51.172 (45.906)	Top-5 acc 73.047 (69.613)	lr 0.02280
Train [24][90/3239]	Time 0.232 (1.241)	Data Time 0.002 (0.320)	Loss 3.2918 (3.2701)	Entropy 1.60644 (1.60676)	Top-1 acc 45.703 (46.012)	Top-5 acc 70.312 (69.707)	lr 0.02280
Train [24][100/3239]	Time 0.231 (1.161)	Data Time 0.001 (0.289)	Loss 3.1656 (3.2627)	Entropy 1.60637 (1.60673)	Top-1 acc 48.438 (46.167)	Top-5 acc 69.922 (69.860)	lr 0.02280
Train [24][110/3239]	Time 0.281 (1.095)	Data Time 0.001 (0.263)	Loss 3.3339 (3.2637)	Entropy 1.60623 (1.60669)	Top-1 acc 44.531 (46.094)	Top-5 acc 68.750 (69.880)	lr 0.02280
Train [24][120/3239]	Time 2.434 (1.043)	Data Time 0.001 (0.241)	Loss 3.4727 (3.2644)	Entropy 1.60623 (1.60665)	Top-1 acc 43.359 (46.120)	Top-5 acc 66.406 (69.857)	lr 0.02279
Train [24][130/3239]	Time 0.222 (0.981)	Data Time 0.001 (0.223)	Loss 3.4249 (3.2626)	Entropy 1.60607 (1.60661)	Top-1 acc 46.094 (46.252)	Top-5 acc 67.188 (69.895)	lr 0.02279
Train [24][140/3239]	Time 0.200 (0.942)	Data Time 0.002 (0.207)	Loss 3.5487 (3.2675)	Entropy 1.60593 (1.60656)	Top-1 acc 37.891 (46.149)	Top-5 acc 62.891 (69.781)	lr 0.02279
Train [24][150/3239]	Time 0.201 (0.909)	Data Time 0.001 (0.194)	Loss 3.4173 (3.2695)	Entropy 1.60588 (1.60652)	Top-1 acc 42.578 (46.065)	Top-5 acc 64.062 (69.655)	lr 0.02279
Train [24][160/3239]	Time 0.203 (0.881)	Data Time 0.001 (0.182)	Loss 3.3275 (3.2683)	Entropy 1.60583 (1.60648)	Top-1 acc 46.094 (46.036)	Top-5 acc 68.359 (69.728)	lr 0.02279
Train [24][170/3239]	Time 0.317 (0.855)	Data Time 0.002 (0.171)	Loss 3.1720 (3.2668)	Entropy 1.60577 (1.60644)	Top-1 acc 45.312 (46.053)	Top-5 acc 71.875 (69.794)	lr 0.02279
Train [24][180/3239]	Time 0.196 (0.832)	Data Time 0.001 (0.162)	Loss 3.3299 (3.2655)	Entropy 1.60570 (1.60640)	Top-1 acc 41.406 (46.051)	Top-5 acc 66.797 (69.833)	lr 0.02279
Train [24][190/3239]	Time 0.171 (0.812)	Data Time 0.001 (0.154)	Loss 3.2965 (3.2667)	Entropy 1.60562 (1.60636)	Top-1 acc 46.875 (46.047)	Top-5 acc 67.578 (69.836)	lr 0.02279
Train [24][200/3239]	Time 0.192 (0.793)	Data Time 0.001 (0.146)	Loss 3.5296 (3.2673)	Entropy 1.60551 (1.60632)	Top-1 acc 42.578 (46.086)	Top-5 acc 65.234 (69.801)	lr 0.02279
Train [24][210/3239]	Time 0.241 (0.776)	Data Time 0.001 (0.139)	Loss 3.1837 (3.2663)	Entropy 1.60531 (1.60628)	Top-1 acc 46.094 (46.168)	Top-5 acc 73.438 (69.842)	lr 0.02279
Train [24][220/3239]	Time 0.234 (0.761)	Data Time 0.001 (0.133)	Loss 3.1968 (3.2620)	Entropy 1.60525 (1.60624)	Top-1 acc 48.828 (46.267)	Top-5 acc 70.703 (69.901)	lr 0.02279
Train [24][230/3239]	Time 2.507 (0.748)	Data Time 0.001 (0.127)	Loss 3.3903 (3.2632)	Entropy 1.60525 (1.60619)	Top-1 acc 42.969 (46.200)	Top-5 acc 67.969 (69.856)	lr 0.02279
Train [24][240/3239]	Time 0.267 (0.727)	Data Time 0.002 (0.122)	Loss 3.2747 (3.2628)	Entropy 1.60526 (1.60615)	Top-1 acc 43.359 (46.214)	Top-5 acc 71.094 (69.857)	lr 0.02279
Train [24][250/3239]	Time 0.225 (0.715)	Data Time 0.001 (0.117)	Loss 3.1789 (3.2627)	Entropy 1.60524 (1.60612)	Top-1 acc 48.828 (46.229)	Top-5 acc 71.094 (69.861)	lr 0.02279
Train [24][260/3239]	Time 0.205 (0.704)	Data Time 0.001 (0.113)	Loss 2.9662 (3.2609)	Entropy 1.60512 (1.60608)	Top-1 acc 53.516 (46.297)	Top-5 acc 76.953 (69.889)	lr 0.02279
Train [24][270/3239]	Time 0.207 (0.695)	Data Time 0.001 (0.109)	Loss 3.2961 (3.2621)	Entropy 1.60470 (1.60603)	Top-1 acc 44.922 (46.226)	Top-5 acc 69.922 (69.845)	lr 0.02279
Train [24][280/3239]	Time 0.206 (0.686)	Data Time 0.001 (0.105)	Loss 3.2304 (3.2622)	Entropy 1.60465 (1.60599)	Top-1 acc 49.609 (46.220)	Top-5 acc 72.656 (69.834)	lr 0.02279
Train [24][290/3239]	Time 0.311 (0.678)	Data Time 0.001 (0.101)	Loss 3.4237 (3.2631)	Entropy 1.60449 (1.60594)	Top-1 acc 41.406 (46.160)	Top-5 acc 67.188 (69.837)	lr 0.02278
Train [24][300/3239]	Time 0.215 (0.670)	Data Time 0.001 (0.098)	Loss 3.1050 (3.2609)	Entropy 1.60445 (1.60589)	Top-1 acc 51.953 (46.221)	Top-5 acc 70.312 (69.897)	lr 0.02278
Train [24][310/3239]	Time 0.212 (0.662)	Data Time 0.001 (0.095)	Loss 3.2241 (3.2609)	Entropy 1.60456 (1.60584)	Top-1 acc 48.047 (46.243)	Top-5 acc 71.484 (69.901)	lr 0.02278
Train [24][320/3239]	Time 0.245 (0.655)	Data Time 0.001 (0.092)	Loss 3.1600 (3.2618)	Entropy 1.60453 (1.60580)	Top-1 acc 48.047 (46.203)	Top-5 acc 70.703 (69.876)	lr 0.02278
Train [24][330/3239]	Time 0.199 (0.649)	Data Time 0.001 (0.089)	Loss 3.2762 (3.2636)	Entropy 1.60420 (1.60576)	Top-1 acc 44.922 (46.150)	Top-5 acc 66.797 (69.809)	lr 0.02278
Train [24][340/3239]	Time 2.349 (0.643)	Data Time 0.001 (0.087)	Loss 3.2565 (3.2629)	Entropy 1.60420 (1.60572)	Top-1 acc 52.734 (46.181)	Top-5 acc 69.531 (69.842)	lr 0.02278
Train [24][350/3239]	Time 0.249 (0.632)	Data Time 0.001 (0.084)	Loss 3.2374 (3.2631)	Entropy 1.60412 (1.60567)	Top-1 acc 49.609 (46.172)	Top-5 acc 70.703 (69.842)	lr 0.02278
Train [24][360/3239]	Time 0.218 (0.626)	Data Time 0.001 (0.082)	Loss 3.1665 (3.2623)	Entropy 1.60411 (1.60563)	Top-1 acc 51.172 (46.232)	Top-5 acc 73.828 (69.874)	lr 0.02278
Train [24][370/3239]	Time 0.220 (0.621)	Data Time 0.001 (0.080)	Loss 3.1337 (3.2598)	Entropy 1.60408 (1.60559)	Top-1 acc 48.047 (46.287)	Top-5 acc 75.391 (69.941)	lr 0.02278
Train [24][380/3239]	Time 0.338 (0.726)	Data Time 0.002 (0.078)	Loss 3.1487 (3.2583)	Entropy 1.60404 (1.60555)	Top-1 acc 50.000 (46.321)	Top-5 acc 72.266 (69.970)	lr 0.02278
Train [24][390/3239]	Time 0.187 (0.720)	Data Time 0.002 (0.076)	Loss 3.3429 (3.2588)	Entropy 1.60386 (1.60551)	Top-1 acc 46.875 (46.352)	Top-5 acc 65.625 (69.936)	lr 0.02278
Train [24][400/3239]	Time 0.254 (0.713)	Data Time 0.001 (0.074)	Loss 3.1702 (3.2589)	Entropy 1.60387 (1.60547)	Top-1 acc 49.219 (46.362)	Top-5 acc 72.656 (69.939)	lr 0.02278
Train [24][410/3239]	Time 0.222 (0.706)	Data Time 0.001 (0.072)	Loss 3.1791 (3.2580)	Entropy 1.60383 (1.60543)	Top-1 acc 49.219 (46.405)	Top-5 acc 71.875 (69.960)	lr 0.02278
Train [24][420/3239]	Time 0.218 (0.700)	Data Time 0.001 (0.071)	Loss 3.1665 (3.2583)	Entropy 1.60380 (1.60539)	Top-1 acc 50.391 (46.403)	Top-5 acc 71.875 (69.950)	lr 0.02278
Train [24][430/3239]	Time 0.206 (0.694)	Data Time 0.001 (0.069)	Loss 3.1642 (3.2571)	Entropy 1.60373 (1.60535)	Top-1 acc 50.781 (46.450)	Top-5 acc 72.656 (69.992)	lr 0.02278
Train [24][440/3239]	Time 0.245 (0.688)	Data Time 0.001 (0.068)	Loss 3.3458 (3.2563)	Entropy 1.60370 (1.60531)	Top-1 acc 43.359 (46.470)	Top-5 acc 65.234 (70.002)	lr 0.02278
Train [24][450/3239]	Time 2.430 (0.682)	Data Time 0.001 (0.066)	Loss 3.3023 (3.2572)	Entropy 1.60370 (1.60528)	Top-1 acc 44.531 (46.442)	Top-5 acc 68.359 (70.000)	lr 0.02278
Train [24][460/3239]	Time 0.216 (0.672)	Data Time 0.001 (0.065)	Loss 3.2250 (3.2570)	Entropy 1.60351 (1.60524)	Top-1 acc 48.828 (46.469)	Top-5 acc 71.875 (70.009)	lr 0.02278
Train [24][470/3239]	Time 0.255 (0.667)	Data Time 0.002 (0.063)	Loss 3.4005 (3.2561)	Entropy 1.60343 (1.60520)	Top-1 acc 42.969 (46.489)	Top-5 acc 66.016 (70.016)	lr 0.02277
Train [24][480/3239]	Time 0.204 (0.663)	Data Time 0.001 (0.062)	Loss 3.3388 (3.2573)	Entropy 1.60348 (1.60517)	Top-1 acc 44.531 (46.466)	Top-5 acc 67.188 (69.981)	lr 0.02277
Train [24][490/3239]	Time 0.234 (0.658)	Data Time 0.001 (0.061)	Loss 3.5813 (3.2577)	Entropy 1.60345 (1.60513)	Top-1 acc 39.062 (46.463)	Top-5 acc 64.844 (69.966)	lr 0.02277
Train [24][500/3239]	Time 0.175 (0.654)	Data Time 0.001 (0.060)	Loss 3.1863 (3.2567)	Entropy 1.60345 (1.60510)	Top-1 acc 48.047 (46.491)	Top-5 acc 71.875 (69.988)	lr 0.02277
Train [24][510/3239]	Time 0.220 (0.650)	Data Time 0.001 (0.059)	Loss 3.2712 (3.2569)	Entropy 1.60343 (1.60506)	Top-1 acc 44.141 (46.468)	Top-5 acc 69.922 (69.985)	lr 0.02277
Train [24][520/3239]	Time 0.224 (0.646)	Data Time 0.001 (0.058)	Loss 3.6062 (3.2583)	Entropy 1.60335 (1.60503)	Top-1 acc 40.625 (46.443)	Top-5 acc 63.672 (69.952)	lr 0.02277
Train [24][530/3239]	Time 0.209 (0.642)	Data Time 0.002 (0.057)	Loss 3.3083 (3.2585)	Entropy 1.60329 (1.60500)	Top-1 acc 44.141 (46.434)	Top-5 acc 69.531 (69.954)	lr 0.02277
Train [24][540/3239]	Time 0.230 (0.639)	Data Time 0.001 (0.056)	Loss 3.2164 (3.2584)	Entropy 1.60323 (1.60497)	Top-1 acc 45.703 (46.424)	Top-5 acc 70.703 (69.952)	lr 0.02277
Train [24][550/3239]	Time 0.236 (0.635)	Data Time 0.001 (0.055)	Loss 3.2410 (3.2588)	Entropy 1.60313 (1.60494)	Top-1 acc 47.656 (46.416)	Top-5 acc 69.141 (69.940)	lr 0.02277
Train [24][560/3239]	Time 2.315 (0.631)	Data Time 0.001 (0.054)	Loss 3.1598 (3.2585)	Entropy 1.60313 (1.60490)	Top-1 acc 48.438 (46.424)	Top-5 acc 71.875 (69.945)	lr 0.02277
Train [24][570/3239]	Time 0.234 (0.624)	Data Time 0.001 (0.053)	Loss 3.4776 (3.2587)	Entropy 1.60306 (1.60487)	Top-1 acc 43.750 (46.439)	Top-5 acc 66.016 (69.948)	lr 0.02277
Train [24][580/3239]	Time 0.166 (0.621)	Data Time 0.001 (0.052)	Loss 3.2605 (3.2579)	Entropy 1.60302 (1.60484)	Top-1 acc 48.438 (46.472)	Top-5 acc 67.578 (69.971)	lr 0.02277
Train [24][590/3239]	Time 0.278 (0.618)	Data Time 0.001 (0.051)	Loss 3.0505 (3.2576)	Entropy 1.60296 (1.60481)	Top-1 acc 54.688 (46.482)	Top-5 acc 78.906 (69.969)	lr 0.02277
Train [24][600/3239]	Time 0.222 (0.615)	Data Time 0.001 (0.050)	Loss 3.1758 (3.2562)	Entropy 1.60295 (1.60478)	Top-1 acc 45.312 (46.509)	Top-5 acc 69.531 (69.989)	lr 0.02277
Train [24][610/3239]	Time 0.204 (0.612)	Data Time 0.001 (0.049)	Loss 3.2126 (3.2561)	Entropy 1.60285 (1.60475)	Top-1 acc 48.047 (46.505)	Top-5 acc 71.484 (69.994)	lr 0.02277
Train [24][620/3239]	Time 0.213 (0.609)	Data Time 0.001 (0.049)	Loss 3.1815 (3.2568)	Entropy 1.60287 (1.60472)	Top-1 acc 47.656 (46.504)	Top-5 acc 72.656 (69.983)	lr 0.02277
Train [24][630/3239]	Time 0.209 (0.606)	Data Time 0.001 (0.048)	Loss 3.1347 (3.2562)	Entropy 1.60288 (1.60469)	Top-1 acc 49.609 (46.510)	Top-5 acc 70.703 (69.998)	lr 0.02277
Train [24][640/3239]	Time 0.207 (0.604)	Data Time 0.001 (0.047)	Loss 3.1651 (3.2567)	Entropy 1.60285 (1.60466)	Top-1 acc 48.047 (46.500)	Top-5 acc 70.703 (69.982)	lr 0.02276
Train [24][650/3239]	Time 0.340 (0.601)	Data Time 0.001 (0.046)	Loss 3.2098 (3.2569)	Entropy 1.60279 (1.60463)	Top-1 acc 46.875 (46.493)	Top-5 acc 72.656 (69.973)	lr 0.02276
Train [24][660/3239]	Time 0.215 (0.599)	Data Time 0.001 (0.046)	Loss 3.4220 (3.2573)	Entropy 1.60270 (1.60460)	Top-1 acc 42.969 (46.499)	Top-5 acc 67.188 (69.958)	lr 0.02276
Train [24][670/3239]	Time 2.326 (0.597)	Data Time 0.001 (0.045)	Loss 3.3280 (3.2584)	Entropy 1.60270 (1.60458)	Top-1 acc 41.797 (46.456)	Top-5 acc 67.969 (69.937)	lr 0.02276
Train [24][680/3239]	Time 0.231 (0.591)	Data Time 0.001 (0.045)	Loss 3.2522 (3.2575)	Entropy 1.60262 (1.60455)	Top-1 acc 46.875 (46.472)	Top-5 acc 68.359 (69.955)	lr 0.02276
Train [24][690/3239]	Time 0.202 (0.589)	Data Time 0.001 (0.044)	Loss 3.2443 (3.2576)	Entropy 1.60254 (1.60452)	Top-1 acc 51.172 (46.489)	Top-5 acc 67.578 (69.940)	lr 0.02276
Train [24][700/3239]	Time 0.207 (0.587)	Data Time 0.001 (0.043)	Loss 3.2036 (3.2564)	Entropy 1.60249 (1.60449)	Top-1 acc 47.656 (46.519)	Top-5 acc 73.047 (69.963)	lr 0.02276
Train [24][710/3239]	Time 0.321 (0.585)	Data Time 0.001 (0.043)	Loss 3.2287 (3.2554)	Entropy 1.60246 (1.60446)	Top-1 acc 46.875 (46.521)	Top-5 acc 65.625 (69.976)	lr 0.02276
Train [24][720/3239]	Time 0.273 (0.583)	Data Time 0.001 (0.042)	Loss 3.1828 (3.2558)	Entropy 1.60242 (1.60443)	Top-1 acc 48.438 (46.502)	Top-5 acc 72.266 (69.975)	lr 0.02276
Train [24][730/3239]	Time 0.196 (0.581)	Data Time 0.001 (0.042)	Loss 3.1925 (3.2562)	Entropy 1.60232 (1.60440)	Top-1 acc 44.922 (46.484)	Top-5 acc 71.484 (69.973)	lr 0.02276
Train [24][740/3239]	Time 0.297 (0.637)	Data Time 0.003 (0.041)	Loss 3.1856 (3.2564)	Entropy 1.60232 (1.60438)	Top-1 acc 51.562 (46.489)	Top-5 acc 71.094 (69.966)	lr 0.02276
Train [24][750/3239]	Time 0.202 (0.635)	Data Time 0.002 (0.041)	Loss 3.5002 (3.2567)	Entropy 1.60227 (1.60435)	Top-1 acc 42.578 (46.477)	Top-5 acc 65.234 (69.958)	lr 0.02276
Train [24][760/3239]	Time 0.188 (0.632)	Data Time 0.002 (0.040)	Loss 3.1911 (3.2565)	Entropy 1.60226 (1.60432)	Top-1 acc 46.484 (46.485)	Top-5 acc 73.047 (69.969)	lr 0.02276
Train [24][770/3239]	Time 0.343 (0.630)	Data Time 0.001 (0.040)	Loss 3.0726 (3.2568)	Entropy 1.60213 (1.60429)	Top-1 acc 52.734 (46.490)	Top-5 acc 72.656 (69.952)	lr 0.02276
Train [24][780/3239]	Time 2.207 (0.627)	Data Time 0.001 (0.039)	Loss 3.4402 (3.2567)	Entropy 1.60213 (1.60427)	Top-1 acc 43.750 (46.487)	Top-5 acc 65.234 (69.955)	lr 0.02276
Train [24][790/3239]	Time 0.165 (0.622)	Data Time 0.001 (0.039)	Loss 3.3152 (3.2568)	Entropy 1.60206 (1.60424)	Top-1 acc 42.969 (46.466)	Top-5 acc 71.094 (69.947)	lr 0.02276
Train [24][800/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.038)	Loss 3.5023 (3.2572)	Entropy 1.60200 (1.60421)	Top-1 acc 39.453 (46.457)	Top-5 acc 64.453 (69.939)	lr 0.02276
Train [24][810/3239]	Time 0.214 (0.617)	Data Time 0.001 (0.038)	Loss 3.2597 (3.2581)	Entropy 1.60193 (1.60418)	Top-1 acc 49.219 (46.451)	Top-5 acc 69.531 (69.912)	lr 0.02276
Train [24][820/3239]	Time 0.240 (0.615)	Data Time 0.001 (0.037)	Loss 3.2376 (3.2576)	Entropy 1.60188 (1.60415)	Top-1 acc 46.875 (46.466)	Top-5 acc 70.703 (69.929)	lr 0.02275
Train [24][830/3239]	Time 0.203 (0.613)	Data Time 0.002 (0.037)	Loss 3.4262 (3.2582)	Entropy 1.60183 (1.60413)	Top-1 acc 43.750 (46.451)	Top-5 acc 64.062 (69.909)	lr 0.02275
Train [24][840/3239]	Time 0.209 (0.611)	Data Time 0.001 (0.036)	Loss 3.2054 (3.2591)	Entropy 1.60184 (1.60410)	Top-1 acc 43.750 (46.424)	Top-5 acc 71.875 (69.901)	lr 0.02275
Train [24][850/3239]	Time 0.208 (0.609)	Data Time 0.001 (0.036)	Loss 3.2755 (3.2586)	Entropy 1.60184 (1.60407)	Top-1 acc 44.141 (46.438)	Top-5 acc 69.141 (69.909)	lr 0.02275
Train [24][860/3239]	Time 0.201 (0.607)	Data Time 0.001 (0.036)	Loss 3.2526 (3.2585)	Entropy 1.60183 (1.60405)	Top-1 acc 41.797 (46.419)	Top-5 acc 69.531 (69.905)	lr 0.02275
Train [24][870/3239]	Time 0.223 (0.605)	Data Time 0.001 (0.035)	Loss 3.3457 (3.2579)	Entropy 1.60168 (1.60402)	Top-1 acc 41.797 (46.416)	Top-5 acc 67.578 (69.920)	lr 0.02275
Train [24][880/3239]	Time 0.240 (0.603)	Data Time 0.002 (0.035)	Loss 3.4313 (3.2581)	Entropy 1.60163 (1.60400)	Top-1 acc 43.750 (46.392)	Top-5 acc 68.750 (69.913)	lr 0.02275
Train [24][890/3239]	Time 2.468 (0.602)	Data Time 0.001 (0.035)	Loss 3.2571 (3.2580)	Entropy 1.60163 (1.60397)	Top-1 acc 45.703 (46.395)	Top-5 acc 68.750 (69.902)	lr 0.02275
Train [24][900/3239]	Time 0.264 (0.598)	Data Time 0.001 (0.034)	Loss 3.3899 (3.2581)	Entropy 1.60154 (1.60394)	Top-1 acc 40.625 (46.383)	Top-5 acc 66.016 (69.891)	lr 0.02275
Train [24][910/3239]	Time 0.265 (0.596)	Data Time 0.001 (0.034)	Loss 3.1170 (3.2587)	Entropy 1.60155 (1.60392)	Top-1 acc 50.000 (46.370)	Top-5 acc 73.438 (69.866)	lr 0.02275
Train [24][920/3239]	Time 0.208 (0.594)	Data Time 0.001 (0.033)	Loss 3.4003 (3.2590)	Entropy 1.60156 (1.60389)	Top-1 acc 44.922 (46.370)	Top-5 acc 66.797 (69.856)	lr 0.02275
Train [24][930/3239]	Time 0.202 (0.593)	Data Time 0.001 (0.033)	Loss 3.4730 (3.2597)	Entropy 1.60145 (1.60386)	Top-1 acc 39.844 (46.352)	Top-5 acc 65.625 (69.851)	lr 0.02275
Train [24][940/3239]	Time 0.265 (0.591)	Data Time 0.002 (0.033)	Loss 3.3104 (3.2601)	Entropy 1.60136 (1.60384)	Top-1 acc 42.969 (46.345)	Top-5 acc 69.141 (69.841)	lr 0.02275
Train [24][950/3239]	Time 0.203 (0.589)	Data Time 0.001 (0.032)	Loss 3.3695 (3.2601)	Entropy 1.60131 (1.60381)	Top-1 acc 45.312 (46.332)	Top-5 acc 67.578 (69.843)	lr 0.02275
Train [24][960/3239]	Time 0.201 (0.588)	Data Time 0.001 (0.032)	Loss 3.0742 (3.2598)	Entropy 1.60115 (1.60378)	Top-1 acc 52.734 (46.344)	Top-5 acc 72.266 (69.850)	lr 0.02275
Train [24][970/3239]	Time 0.223 (0.586)	Data Time 0.002 (0.032)	Loss 3.1236 (3.2593)	Entropy 1.60106 (1.60376)	Top-1 acc 48.438 (46.354)	Top-5 acc 73.828 (69.866)	lr 0.02275
Train [24][980/3239]	Time 0.225 (0.585)	Data Time 0.001 (0.032)	Loss 3.1757 (3.2598)	Entropy 1.60124 (1.60373)	Top-1 acc 49.609 (46.339)	Top-5 acc 73.828 (69.861)	lr 0.02275
Train [24][990/3239]	Time 0.245 (0.583)	Data Time 0.001 (0.031)	Loss 3.3331 (3.2600)	Entropy 1.60114 (1.60370)	Top-1 acc 47.266 (46.338)	Top-5 acc 67.578 (69.850)	lr 0.02274
Train [24][1000/3239]	Time 2.207 (0.582)	Data Time 0.001 (0.031)	Loss 3.3097 (3.2601)	Entropy 1.60114 (1.60368)	Top-1 acc 45.312 (46.349)	Top-5 acc 66.797 (69.836)	lr 0.02274
Train [24][1010/3239]	Time 0.322 (0.578)	Data Time 0.001 (0.031)	Loss 3.3384 (3.2602)	Entropy 1.60111 (1.60365)	Top-1 acc 46.094 (46.351)	Top-5 acc 66.797 (69.842)	lr 0.02274
Train [24][1020/3239]	Time 0.208 (0.577)	Data Time 0.001 (0.030)	Loss 2.7975 (3.2596)	Entropy 1.60112 (1.60363)	Top-1 acc 55.859 (46.362)	Top-5 acc 79.297 (69.856)	lr 0.02274
Train [24][1030/3239]	Time 0.209 (0.576)	Data Time 0.001 (0.030)	Loss 3.1547 (3.2594)	Entropy 1.60083 (1.60360)	Top-1 acc 45.703 (46.360)	Top-5 acc 74.219 (69.861)	lr 0.02274
Train [24][1040/3239]	Time 0.167 (0.574)	Data Time 0.001 (0.030)	Loss 3.1332 (3.2587)	Entropy 1.60080 (1.60358)	Top-1 acc 47.656 (46.376)	Top-5 acc 74.609 (69.875)	lr 0.02274
Train [24][1050/3239]	Time 0.226 (0.573)	Data Time 0.002 (0.030)	Loss 3.3843 (3.2583)	Entropy 1.60075 (1.60355)	Top-1 acc 45.312 (46.380)	Top-5 acc 66.016 (69.888)	lr 0.02274
Train [24][1060/3239]	Time 0.227 (0.572)	Data Time 0.001 (0.029)	Loss 3.0303 (3.2583)	Entropy 1.60070 (1.60352)	Top-1 acc 54.297 (46.386)	Top-5 acc 74.219 (69.901)	lr 0.02274
Train [24][1070/3239]	Time 0.271 (0.571)	Data Time 0.001 (0.029)	Loss 3.0612 (3.2579)	Entropy 1.60063 (1.60350)	Top-1 acc 54.688 (46.395)	Top-5 acc 73.828 (69.902)	lr 0.02274
Train [24][1080/3239]	Time 0.262 (0.569)	Data Time 0.001 (0.029)	Loss 3.2183 (3.2582)	Entropy 1.60059 (1.60347)	Top-1 acc 46.484 (46.388)	Top-5 acc 68.750 (69.884)	lr 0.02274
Train [24][1090/3239]	Time 0.209 (0.568)	Data Time 0.002 (0.029)	Loss 3.3288 (3.2587)	Entropy 1.60055 (1.60344)	Top-1 acc 43.359 (46.378)	Top-5 acc 71.484 (69.880)	lr 0.02274
Train [24][1100/3239]	Time 0.227 (0.606)	Data Time 0.003 (0.028)	Loss 3.1288 (3.2588)	Entropy 1.60050 (1.60342)	Top-1 acc 47.266 (46.370)	Top-5 acc 71.875 (69.878)	lr 0.02274
Train [24][1110/3239]	Time 2.416 (0.605)	Data Time 0.002 (0.028)	Loss 3.2405 (3.2586)	Entropy 1.60050 (1.60339)	Top-1 acc 45.312 (46.372)	Top-5 acc 69.141 (69.878)	lr 0.02274
Train [24][1120/3239]	Time 0.217 (0.601)	Data Time 0.002 (0.028)	Loss 3.2668 (3.2589)	Entropy 1.60046 (1.60336)	Top-1 acc 47.266 (46.368)	Top-5 acc 70.312 (69.877)	lr 0.02274
Train [24][1130/3239]	Time 0.222 (0.600)	Data Time 0.001 (0.028)	Loss 3.3423 (3.2588)	Entropy 1.60039 (1.60334)	Top-1 acc 48.438 (46.370)	Top-5 acc 71.094 (69.880)	lr 0.02274
Train [24][1140/3239]	Time 0.342 (0.599)	Data Time 0.002 (0.027)	Loss 3.3194 (3.2590)	Entropy 1.60023 (1.60331)	Top-1 acc 43.359 (46.355)	Top-5 acc 69.922 (69.872)	lr 0.02274
Train [24][1150/3239]	Time 0.217 (0.597)	Data Time 0.002 (0.027)	Loss 3.3635 (3.2589)	Entropy 1.60019 (1.60328)	Top-1 acc 43.750 (46.356)	Top-5 acc 65.234 (69.872)	lr 0.02274
Train [24][1160/3239]	Time 0.222 (0.596)	Data Time 0.001 (0.027)	Loss 3.2214 (3.2586)	Entropy 1.60014 (1.60326)	Top-1 acc 46.094 (46.347)	Top-5 acc 72.266 (69.880)	lr 0.02273
Train [24][1170/3239]	Time 0.196 (0.595)	Data Time 0.001 (0.027)	Loss 3.2221 (3.2592)	Entropy 1.60007 (1.60323)	Top-1 acc 46.094 (46.337)	Top-5 acc 71.875 (69.876)	lr 0.02273
Train [24][1180/3239]	Time 0.238 (0.593)	Data Time 0.003 (0.027)	Loss 3.1521 (3.2585)	Entropy 1.59990 (1.60320)	Top-1 acc 51.172 (46.358)	Top-5 acc 75.391 (69.892)	lr 0.02273
Train [24][1190/3239]	Time 0.272 (0.592)	Data Time 0.001 (0.026)	Loss 3.2562 (3.2582)	Entropy 1.59982 (1.60317)	Top-1 acc 44.922 (46.370)	Top-5 acc 68.750 (69.904)	lr 0.02273
Train [24][1200/3239]	Time 0.232 (0.591)	Data Time 0.001 (0.026)	Loss 3.7311 (3.2587)	Entropy 1.59959 (1.60315)	Top-1 acc 39.062 (46.362)	Top-5 acc 60.156 (69.895)	lr 0.02273
Train [24][1210/3239]	Time 0.204 (0.590)	Data Time 0.001 (0.026)	Loss 3.3456 (3.2584)	Entropy 1.59958 (1.60312)	Top-1 acc 40.625 (46.357)	Top-5 acc 67.188 (69.908)	lr 0.02273
Train [24][1220/3239]	Time 2.380 (0.589)	Data Time 0.002 (0.026)	Loss 3.2215 (3.2584)	Entropy 1.59958 (1.60309)	Top-1 acc 46.094 (46.359)	Top-5 acc 69.141 (69.906)	lr 0.02273
Train [24][1230/3239]	Time 0.212 (0.586)	Data Time 0.001 (0.026)	Loss 3.1915 (3.2584)	Entropy 1.59951 (1.60306)	Top-1 acc 48.828 (46.367)	Top-5 acc 72.266 (69.906)	lr 0.02273
Train [24][1240/3239]	Time 0.235 (0.585)	Data Time 0.001 (0.025)	Loss 3.2802 (3.2586)	Entropy 1.59936 (1.60303)	Top-1 acc 43.359 (46.364)	Top-5 acc 69.531 (69.905)	lr 0.02273
Train [24][1250/3239]	Time 0.214 (0.584)	Data Time 0.001 (0.025)	Loss 3.2140 (3.2583)	Entropy 1.59932 (1.60300)	Top-1 acc 48.047 (46.377)	Top-5 acc 72.266 (69.910)	lr 0.02273
Train [24][1260/3239]	Time 0.236 (0.583)	Data Time 0.001 (0.025)	Loss 2.9950 (3.2586)	Entropy 1.59933 (1.60297)	Top-1 acc 50.391 (46.363)	Top-5 acc 74.609 (69.898)	lr 0.02273
Train [24][1270/3239]	Time 0.363 (0.582)	Data Time 0.002 (0.025)	Loss 3.0166 (3.2585)	Entropy 1.59931 (1.60294)	Top-1 acc 49.219 (46.359)	Top-5 acc 74.219 (69.904)	lr 0.02273
Train [24][1280/3239]	Time 0.214 (0.581)	Data Time 0.001 (0.025)	Loss 3.2727 (3.2586)	Entropy 1.59927 (1.60291)	Top-1 acc 48.828 (46.363)	Top-5 acc 69.141 (69.899)	lr 0.02273
Train [24][1290/3239]	Time 0.222 (0.580)	Data Time 0.001 (0.024)	Loss 3.2780 (3.2585)	Entropy 1.59918 (1.60289)	Top-1 acc 43.359 (46.363)	Top-5 acc 69.141 (69.897)	lr 0.02273
Train [24][1300/3239]	Time 0.234 (0.579)	Data Time 0.002 (0.024)	Loss 3.2549 (3.2586)	Entropy 1.59910 (1.60286)	Top-1 acc 46.094 (46.362)	Top-5 acc 68.359 (69.899)	lr 0.02273
Train [24][1310/3239]	Time 0.215 (0.578)	Data Time 0.001 (0.024)	Loss 3.3882 (3.2583)	Entropy 1.59889 (1.60283)	Top-1 acc 42.969 (46.368)	Top-5 acc 65.625 (69.905)	lr 0.02273
Train [24][1320/3239]	Time 0.207 (0.577)	Data Time 0.001 (0.024)	Loss 3.1994 (3.2587)	Entropy 1.59883 (1.60280)	Top-1 acc 45.312 (46.356)	Top-5 acc 70.703 (69.898)	lr 0.02273
Train [24][1330/3239]	Time 2.312 (0.576)	Data Time 0.001 (0.024)	Loss 3.1813 (3.2584)	Entropy 1.59883 (1.60277)	Top-1 acc 49.609 (46.369)	Top-5 acc 73.438 (69.903)	lr 0.02272
Train [24][1340/3239]	Time 0.224 (0.573)	Data Time 0.001 (0.024)	Loss 3.4012 (3.2587)	Entropy 1.59882 (1.60274)	Top-1 acc 42.188 (46.370)	Top-5 acc 66.406 (69.898)	lr 0.02272
Train [24][1350/3239]	Time 0.250 (0.572)	Data Time 0.001 (0.023)	Loss 3.2743 (3.2587)	Entropy 1.59868 (1.60271)	Top-1 acc 45.703 (46.374)	Top-5 acc 69.922 (69.896)	lr 0.02272
Train [24][1360/3239]	Time 0.250 (0.571)	Data Time 0.001 (0.023)	Loss 3.1984 (3.2590)	Entropy 1.59865 (1.60268)	Top-1 acc 50.000 (46.371)	Top-5 acc 73.047 (69.887)	lr 0.02272
Train [24][1370/3239]	Time 0.219 (0.570)	Data Time 0.001 (0.023)	Loss 3.3557 (3.2593)	Entropy 1.59863 (1.60265)	Top-1 acc 41.797 (46.360)	Top-5 acc 67.969 (69.881)	lr 0.02272
Train [24][1380/3239]	Time 0.345 (0.570)	Data Time 0.001 (0.023)	Loss 3.5466 (3.2593)	Entropy 1.59849 (1.60262)	Top-1 acc 37.891 (46.351)	Top-5 acc 64.453 (69.887)	lr 0.02272
Train [24][1390/3239]	Time 0.260 (0.569)	Data Time 0.001 (0.023)	Loss 3.0864 (3.2595)	Entropy 1.59846 (1.60259)	Top-1 acc 50.000 (46.348)	Top-5 acc 76.172 (69.884)	lr 0.02272
Train [24][1400/3239]	Time 0.215 (0.568)	Data Time 0.001 (0.023)	Loss 3.2159 (3.2598)	Entropy 1.59845 (1.60256)	Top-1 acc 45.312 (46.339)	Top-5 acc 73.828 (69.878)	lr 0.02272
Train [24][1410/3239]	Time 0.226 (0.567)	Data Time 0.001 (0.022)	Loss 3.3274 (3.2599)	Entropy 1.59818 (1.60253)	Top-1 acc 42.969 (46.340)	Top-5 acc 69.531 (69.879)	lr 0.02272
Train [24][1420/3239]	Time 0.245 (0.566)	Data Time 0.002 (0.022)	Loss 3.3939 (3.2598)	Entropy 1.59811 (1.60250)	Top-1 acc 40.625 (46.341)	Top-5 acc 65.234 (69.880)	lr 0.02272
Train [24][1430/3239]	Time 0.242 (0.565)	Data Time 0.001 (0.022)	Loss 3.4385 (3.2600)	Entropy 1.59784 (1.60247)	Top-1 acc 38.672 (46.324)	Top-5 acc 68.750 (69.875)	lr 0.02272
Train [24][1440/3239]	Time 2.551 (0.565)	Data Time 0.002 (0.022)	Loss 3.1743 (3.2601)	Entropy 1.59784 (1.60244)	Top-1 acc 50.391 (46.321)	Top-5 acc 71.094 (69.868)	lr 0.02272
Train [24][1450/3239]	Time 0.255 (0.562)	Data Time 0.001 (0.022)	Loss 3.4580 (3.2603)	Entropy 1.59783 (1.60240)	Top-1 acc 45.312 (46.314)	Top-5 acc 65.625 (69.867)	lr 0.02272
Train [24][1460/3239]	Time 0.222 (0.562)	Data Time 0.001 (0.022)	Loss 3.1695 (3.2606)	Entropy 1.59775 (1.60237)	Top-1 acc 49.609 (46.308)	Top-5 acc 71.094 (69.861)	lr 0.02272
Train [24][1470/3239]	Time 0.374 (0.589)	Data Time 0.003 (0.022)	Loss 3.4473 (3.2605)	Entropy 1.59766 (1.60234)	Top-1 acc 45.312 (46.313)	Top-5 acc 64.453 (69.869)	lr 0.02272
Train [24][1480/3239]	Time 0.212 (0.589)	Data Time 0.002 (0.021)	Loss 3.3289 (3.2601)	Entropy 1.59764 (1.60231)	Top-1 acc 43.750 (46.325)	Top-5 acc 67.188 (69.875)	lr 0.02272
Train [24][1490/3239]	Time 0.206 (0.588)	Data Time 0.001 (0.021)	Loss 3.2503 (3.2600)	Entropy 1.59757 (1.60228)	Top-1 acc 49.219 (46.326)	Top-5 acc 66.797 (69.877)	lr 0.02272
Train [24][1500/3239]	Time 0.304 (0.587)	Data Time 0.001 (0.021)	Loss 3.2098 (3.2601)	Entropy 1.59750 (1.60225)	Top-1 acc 49.609 (46.318)	Top-5 acc 72.656 (69.878)	lr 0.02272
Train [24][1510/3239]	Time 0.229 (0.586)	Data Time 0.001 (0.021)	Loss 3.4037 (3.2604)	Entropy 1.59745 (1.60221)	Top-1 acc 43.750 (46.313)	Top-5 acc 65.625 (69.866)	lr 0.02271
Train [24][1520/3239]	Time 0.250 (0.585)	Data Time 0.002 (0.021)	Loss 3.3660 (3.2603)	Entropy 1.59737 (1.60218)	Top-1 acc 44.922 (46.313)	Top-5 acc 68.359 (69.875)	lr 0.02271
Train [24][1530/3239]	Time 0.164 (0.584)	Data Time 0.001 (0.021)	Loss 3.2521 (3.2601)	Entropy 1.59733 (1.60215)	Top-1 acc 47.266 (46.321)	Top-5 acc 69.141 (69.882)	lr 0.02271
Train [24][1540/3239]	Time 0.251 (0.583)	Data Time 0.001 (0.021)	Loss 3.2498 (3.2603)	Entropy 1.59727 (1.60212)	Top-1 acc 41.797 (46.320)	Top-5 acc 67.578 (69.878)	lr 0.02271
Train [24][1550/3239]	Time 2.289 (0.582)	Data Time 0.001 (0.021)	Loss 3.3383 (3.2601)	Entropy 1.59727 (1.60209)	Top-1 acc 43.359 (46.319)	Top-5 acc 68.750 (69.880)	lr 0.02271
Train [24][1560/3239]	Time 0.216 (0.580)	Data Time 0.001 (0.020)	Loss 3.2248 (3.2600)	Entropy 1.59716 (1.60206)	Top-1 acc 47.656 (46.319)	Top-5 acc 70.312 (69.885)	lr 0.02271
Train [24][1570/3239]	Time 0.235 (0.579)	Data Time 0.001 (0.020)	Loss 3.0427 (3.2597)	Entropy 1.59686 (1.60202)	Top-1 acc 52.344 (46.335)	Top-5 acc 74.609 (69.894)	lr 0.02271
Train [24][1580/3239]	Time 0.265 (0.578)	Data Time 0.002 (0.020)	Loss 3.1790 (3.2593)	Entropy 1.59684 (1.60199)	Top-1 acc 46.875 (46.342)	Top-5 acc 75.000 (69.906)	lr 0.02271
Train [24][1590/3239]	Time 0.216 (0.577)	Data Time 0.001 (0.020)	Loss 3.3278 (3.2592)	Entropy 1.59654 (1.60196)	Top-1 acc 46.875 (46.345)	Top-5 acc 67.578 (69.906)	lr 0.02271
Train [24][1600/3239]	Time 0.202 (0.577)	Data Time 0.001 (0.020)	Loss 3.3022 (3.2592)	Entropy 1.59640 (1.60192)	Top-1 acc 44.141 (46.341)	Top-5 acc 67.188 (69.905)	lr 0.02271
Train [24][1610/3239]	Time 0.232 (0.576)	Data Time 0.001 (0.020)	Loss 3.3118 (3.2593)	Entropy 1.59635 (1.60189)	Top-1 acc 44.141 (46.337)	Top-5 acc 69.141 (69.905)	lr 0.02271
Train [24][1620/3239]	Time 0.248 (0.575)	Data Time 0.001 (0.020)	Loss 3.1752 (3.2594)	Entropy 1.59610 (1.60185)	Top-1 acc 49.609 (46.331)	Top-5 acc 70.703 (69.902)	lr 0.02271
Train [24][1630/3239]	Time 0.320 (0.574)	Data Time 0.002 (0.020)	Loss 3.2869 (3.2595)	Entropy 1.59607 (1.60182)	Top-1 acc 48.438 (46.335)	Top-5 acc 68.359 (69.897)	lr 0.02271
Train [24][1640/3239]	Time 0.208 (0.573)	Data Time 0.002 (0.020)	Loss 3.2547 (3.2591)	Entropy 1.59596 (1.60178)	Top-1 acc 48.828 (46.340)	Top-5 acc 71.484 (69.909)	lr 0.02271
Train [24][1650/3239]	Time 0.234 (0.573)	Data Time 0.001 (0.019)	Loss 3.3170 (3.2592)	Entropy 1.59592 (1.60175)	Top-1 acc 46.484 (46.345)	Top-5 acc 70.312 (69.908)	lr 0.02271
Train [24][1660/3239]	Time 2.215 (0.572)	Data Time 0.001 (0.019)	Loss 3.0811 (3.2591)	Entropy 1.59592 (1.60171)	Top-1 acc 52.344 (46.353)	Top-5 acc 71.484 (69.912)	lr 0.02271
Train [24][1670/3239]	Time 0.234 (0.570)	Data Time 0.001 (0.019)	Loss 3.2266 (3.2591)	Entropy 1.59586 (1.60168)	Top-1 acc 52.734 (46.357)	Top-5 acc 71.094 (69.911)	lr 0.02271
Train [24][1680/3239]	Time 0.208 (0.569)	Data Time 0.001 (0.019)	Loss 3.1353 (3.2593)	Entropy 1.59578 (1.60164)	Top-1 acc 48.438 (46.351)	Top-5 acc 72.656 (69.904)	lr 0.02270
Train [24][1690/3239]	Time 0.204 (0.568)	Data Time 0.001 (0.019)	Loss 3.2318 (3.2593)	Entropy 1.59571 (1.60161)	Top-1 acc 47.266 (46.349)	Top-5 acc 66.797 (69.903)	lr 0.02270
Train [24][1700/3239]	Time 0.219 (0.567)	Data Time 0.001 (0.019)	Loss 3.0333 (3.2588)	Entropy 1.59566 (1.60157)	Top-1 acc 50.781 (46.360)	Top-5 acc 75.391 (69.919)	lr 0.02270
Train [24][1710/3239]	Time 0.224 (0.567)	Data Time 0.001 (0.019)	Loss 3.1354 (3.2586)	Entropy 1.59562 (1.60154)	Top-1 acc 47.656 (46.364)	Top-5 acc 71.484 (69.921)	lr 0.02270
Train [24][1720/3239]	Time 0.217 (0.566)	Data Time 0.001 (0.019)	Loss 3.5000 (3.2585)	Entropy 1.59555 (1.60150)	Top-1 acc 42.969 (46.364)	Top-5 acc 67.969 (69.923)	lr 0.02270
Train [24][1730/3239]	Time 0.223 (0.565)	Data Time 0.001 (0.019)	Loss 3.3142 (3.2590)	Entropy 1.59550 (1.60147)	Top-1 acc 44.922 (46.353)	Top-5 acc 66.406 (69.902)	lr 0.02270
Train [24][1740/3239]	Time 0.247 (0.565)	Data Time 0.001 (0.019)	Loss 3.2740 (3.2592)	Entropy 1.59547 (1.60144)	Top-1 acc 45.703 (46.350)	Top-5 acc 69.922 (69.899)	lr 0.02270
Train [24][1750/3239]	Time 0.301 (0.564)	Data Time 0.001 (0.018)	Loss 3.2004 (3.2592)	Entropy 1.59507 (1.60140)	Top-1 acc 46.875 (46.345)	Top-5 acc 69.141 (69.896)	lr 0.02270
Train [24][1760/3239]	Time 0.246 (0.563)	Data Time 0.001 (0.018)	Loss 3.1906 (3.2590)	Entropy 1.59494 (1.60137)	Top-1 acc 48.438 (46.350)	Top-5 acc 69.531 (69.902)	lr 0.02270
Train [24][1770/3239]	Time 2.407 (0.562)	Data Time 0.001 (0.018)	Loss 3.0617 (3.2588)	Entropy 1.59494 (1.60133)	Top-1 acc 50.391 (46.349)	Top-5 acc 73.828 (69.908)	lr 0.02270
Train [24][1780/3239]	Time 0.220 (0.561)	Data Time 0.001 (0.018)	Loss 3.4399 (3.2591)	Entropy 1.59490 (1.60129)	Top-1 acc 42.188 (46.339)	Top-5 acc 67.578 (69.905)	lr 0.02270
Train [24][1790/3239]	Time 0.236 (0.560)	Data Time 0.001 (0.018)	Loss 3.1363 (3.2591)	Entropy 1.59484 (1.60126)	Top-1 acc 46.094 (46.345)	Top-5 acc 70.703 (69.899)	lr 0.02270
Train [24][1800/3239]	Time 0.210 (0.559)	Data Time 0.001 (0.018)	Loss 3.1986 (3.2589)	Entropy 1.59483 (1.60122)	Top-1 acc 48.438 (46.352)	Top-5 acc 71.875 (69.907)	lr 0.02270
Train [24][1810/3239]	Time 0.326 (0.559)	Data Time 0.001 (0.018)	Loss 3.2266 (3.2590)	Entropy 1.59475 (1.60119)	Top-1 acc 44.141 (46.348)	Top-5 acc 69.141 (69.907)	lr 0.02270
Train [24][1820/3239]	Time 0.229 (0.558)	Data Time 0.002 (0.018)	Loss 3.3316 (3.2592)	Entropy 1.59476 (1.60115)	Top-1 acc 43.750 (46.343)	Top-5 acc 67.578 (69.903)	lr 0.02270
Train [24][1830/3239]	Time 0.256 (0.580)	Data Time 0.002 (0.018)	Loss 3.0041 (3.2590)	Entropy 1.59467 (1.60112)	Top-1 acc 48.828 (46.346)	Top-5 acc 76.562 (69.909)	lr 0.02270
Train [24][1840/3239]	Time 0.155 (0.579)	Data Time 0.002 (0.018)	Loss 3.4001 (3.2587)	Entropy 1.59462 (1.60108)	Top-1 acc 46.094 (46.359)	Top-5 acc 68.359 (69.917)	lr 0.02270
Train [24][1850/3239]	Time 0.193 (0.579)	Data Time 0.002 (0.018)	Loss 3.3146 (3.2590)	Entropy 1.59438 (1.60104)	Top-1 acc 45.703 (46.354)	Top-5 acc 68.359 (69.909)	lr 0.02269
Train [24][1860/3239]	Time 0.231 (0.578)	Data Time 0.001 (0.017)	Loss 3.2249 (3.2590)	Entropy 1.59430 (1.60101)	Top-1 acc 47.266 (46.356)	Top-5 acc 70.703 (69.912)	lr 0.02269
Train [24][1870/3239]	Time 0.355 (0.577)	Data Time 0.001 (0.017)	Loss 3.5090 (3.2591)	Entropy 1.59423 (1.60097)	Top-1 acc 41.797 (46.357)	Top-5 acc 61.719 (69.904)	lr 0.02269
Train [24][1880/3239]	Time 2.338 (0.577)	Data Time 0.002 (0.017)	Loss 3.2935 (3.2592)	Entropy 1.59423 (1.60094)	Top-1 acc 42.188 (46.355)	Top-5 acc 66.797 (69.900)	lr 0.02269
Train [24][1890/3239]	Time 0.262 (0.575)	Data Time 0.002 (0.017)	Loss 3.1390 (3.2594)	Entropy 1.59418 (1.60090)	Top-1 acc 47.266 (46.352)	Top-5 acc 72.266 (69.901)	lr 0.02269
Train [24][1900/3239]	Time 0.217 (0.574)	Data Time 0.002 (0.017)	Loss 3.0653 (3.2593)	Entropy 1.59411 (1.60087)	Top-1 acc 45.703 (46.353)	Top-5 acc 73.438 (69.906)	lr 0.02269
Train [24][1910/3239]	Time 0.219 (0.573)	Data Time 0.001 (0.017)	Loss 3.3623 (3.2591)	Entropy 1.59403 (1.60083)	Top-1 acc 46.094 (46.360)	Top-5 acc 66.797 (69.913)	lr 0.02269
Train [24][1920/3239]	Time 0.195 (0.573)	Data Time 0.001 (0.017)	Loss 3.1925 (3.2592)	Entropy 1.59395 (1.60079)	Top-1 acc 44.922 (46.361)	Top-5 acc 69.922 (69.911)	lr 0.02269
Train [24][1930/3239]	Time 0.160 (0.572)	Data Time 0.001 (0.017)	Loss 3.2373 (3.2591)	Entropy 1.59390 (1.60076)	Top-1 acc 47.266 (46.362)	Top-5 acc 67.969 (69.907)	lr 0.02269
Train [24][1940/3239]	Time 0.301 (0.572)	Data Time 0.001 (0.017)	Loss 3.2839 (3.2591)	Entropy 1.59380 (1.60072)	Top-1 acc 49.609 (46.361)	Top-5 acc 67.578 (69.905)	lr 0.02269
Train [24][1950/3239]	Time 0.205 (0.571)	Data Time 0.001 (0.017)	Loss 3.1334 (3.2591)	Entropy 1.59360 (1.60069)	Top-1 acc 46.484 (46.362)	Top-5 acc 72.266 (69.905)	lr 0.02269
Train [24][1960/3239]	Time 0.217 (0.570)	Data Time 0.001 (0.017)	Loss 3.3368 (3.2593)	Entropy 1.59360 (1.60065)	Top-1 acc 42.578 (46.355)	Top-5 acc 69.531 (69.902)	lr 0.02269
Train [24][1970/3239]	Time 0.196 (0.570)	Data Time 0.001 (0.017)	Loss 3.4188 (3.2594)	Entropy 1.59354 (1.60062)	Top-1 acc 43.359 (46.352)	Top-5 acc 66.797 (69.896)	lr 0.02269
Train [24][1980/3239]	Time 0.207 (0.569)	Data Time 0.001 (0.017)	Loss 3.0966 (3.2594)	Entropy 1.59347 (1.60058)	Top-1 acc 48.047 (46.354)	Top-5 acc 73.438 (69.899)	lr 0.02269
Train [24][1990/3239]	Time 2.382 (0.568)	Data Time 0.001 (0.016)	Loss 3.2755 (3.2592)	Entropy 1.59347 (1.60054)	Top-1 acc 45.312 (46.357)	Top-5 acc 68.359 (69.900)	lr 0.02269
Train [24][2000/3239]	Time 0.303 (0.567)	Data Time 0.001 (0.016)	Loss 3.2820 (3.2593)	Entropy 1.59343 (1.60051)	Top-1 acc 45.312 (46.356)	Top-5 acc 69.141 (69.901)	lr 0.02269
Train [24][2010/3239]	Time 0.240 (0.566)	Data Time 0.001 (0.016)	Loss 3.3509 (3.2593)	Entropy 1.59341 (1.60047)	Top-1 acc 46.875 (46.359)	Top-5 acc 66.406 (69.901)	lr 0.02269
Train [24][2020/3239]	Time 0.232 (0.566)	Data Time 0.001 (0.016)	Loss 3.5252 (3.2594)	Entropy 1.59315 (1.60044)	Top-1 acc 41.797 (46.355)	Top-5 acc 64.844 (69.898)	lr 0.02268
Train [24][2030/3239]	Time 0.200 (0.565)	Data Time 0.001 (0.016)	Loss 3.3525 (3.2593)	Entropy 1.59310 (1.60040)	Top-1 acc 44.141 (46.358)	Top-5 acc 67.578 (69.897)	lr 0.02268
Train [24][2040/3239]	Time 0.220 (0.564)	Data Time 0.001 (0.016)	Loss 3.2399 (3.2591)	Entropy 1.59299 (1.60036)	Top-1 acc 48.047 (46.366)	Top-5 acc 69.531 (69.901)	lr 0.02268
Train [24][2050/3239]	Time 0.160 (0.564)	Data Time 0.001 (0.016)	Loss 3.2347 (3.2588)	Entropy 1.59272 (1.60033)	Top-1 acc 44.141 (46.370)	Top-5 acc 71.875 (69.908)	lr 0.02268
Train [24][2060/3239]	Time 0.204 (0.563)	Data Time 0.001 (0.016)	Loss 3.2415 (3.2590)	Entropy 1.59257 (1.60029)	Top-1 acc 45.703 (46.361)	Top-5 acc 70.703 (69.902)	lr 0.02268
Train [24][2070/3239]	Time 0.309 (0.563)	Data Time 0.001 (0.016)	Loss 3.1557 (3.2586)	Entropy 1.59245 (1.60025)	Top-1 acc 50.781 (46.370)	Top-5 acc 73.047 (69.917)	lr 0.02268
Train [24][2080/3239]	Time 0.193 (0.562)	Data Time 0.001 (0.016)	Loss 3.3011 (3.2585)	Entropy 1.59232 (1.60022)	Top-1 acc 46.875 (46.374)	Top-5 acc 68.750 (69.915)	lr 0.02268
Train [24][2090/3239]	Time 0.225 (0.561)	Data Time 0.001 (0.016)	Loss 3.0625 (3.2584)	Entropy 1.59228 (1.60018)	Top-1 acc 48.047 (46.375)	Top-5 acc 72.266 (69.916)	lr 0.02268
Train [24][2100/3239]	Time 2.255 (0.561)	Data Time 0.001 (0.016)	Loss 3.2721 (3.2583)	Entropy 1.59228 (1.60014)	Top-1 acc 45.703 (46.379)	Top-5 acc 71.094 (69.916)	lr 0.02268
Train [24][2110/3239]	Time 0.208 (0.559)	Data Time 0.001 (0.016)	Loss 3.3407 (3.2584)	Entropy 1.59225 (1.60010)	Top-1 acc 49.609 (46.379)	Top-5 acc 67.578 (69.913)	lr 0.02268
Train [24][2120/3239]	Time 0.223 (0.559)	Data Time 0.001 (0.016)	Loss 3.3841 (3.2583)	Entropy 1.59222 (1.60007)	Top-1 acc 45.312 (46.386)	Top-5 acc 67.578 (69.916)	lr 0.02268
Train [24][2130/3239]	Time 0.349 (0.558)	Data Time 0.002 (0.016)	Loss 3.1806 (3.2584)	Entropy 1.59220 (1.60003)	Top-1 acc 44.922 (46.378)	Top-5 acc 74.219 (69.917)	lr 0.02268
Train [24][2140/3239]	Time 0.229 (0.558)	Data Time 0.001 (0.015)	Loss 3.2764 (3.2584)	Entropy 1.59210 (1.59999)	Top-1 acc 47.266 (46.377)	Top-5 acc 72.266 (69.923)	lr 0.02268
Train [24][2150/3239]	Time 0.194 (0.557)	Data Time 0.001 (0.015)	Loss 3.3352 (3.2583)	Entropy 1.59205 (1.59996)	Top-1 acc 43.359 (46.377)	Top-5 acc 69.922 (69.924)	lr 0.02268
Train [24][2160/3239]	Time 0.218 (0.557)	Data Time 0.002 (0.015)	Loss 3.1725 (3.2582)	Entropy 1.59190 (1.59992)	Top-1 acc 46.875 (46.377)	Top-5 acc 66.797 (69.924)	lr 0.02268
Train [24][2170/3239]	Time 0.209 (0.556)	Data Time 0.001 (0.015)	Loss 3.2344 (3.2582)	Entropy 1.59137 (1.59988)	Top-1 acc 47.266 (46.380)	Top-5 acc 69.141 (69.922)	lr 0.02268
Train [24][2180/3239]	Time 0.233 (0.555)	Data Time 0.001 (0.015)	Loss 3.2003 (3.2579)	Entropy 1.59124 (1.59984)	Top-1 acc 48.828 (46.385)	Top-5 acc 71.094 (69.927)	lr 0.02268
Train [24][2190/3239]	Time 0.349 (0.576)	Data Time 0.002 (0.015)	Loss 3.2781 (3.2580)	Entropy 1.59124 (1.59980)	Top-1 acc 46.484 (46.384)	Top-5 acc 69.922 (69.928)	lr 0.02267
Train [24][2200/3239]	Time 0.220 (0.575)	Data Time 0.002 (0.015)	Loss 3.3744 (3.2582)	Entropy 1.59117 (1.59976)	Top-1 acc 44.141 (46.380)	Top-5 acc 69.922 (69.926)	lr 0.02267
Train [24][2210/3239]	Time 2.395 (0.574)	Data Time 0.002 (0.015)	Loss 3.3347 (3.2582)	Entropy 1.59117 (1.59972)	Top-1 acc 45.312 (46.383)	Top-5 acc 70.703 (69.925)	lr 0.02267
Train [24][2220/3239]	Time 0.222 (0.573)	Data Time 0.002 (0.015)	Loss 3.2054 (3.2582)	Entropy 1.59102 (1.59969)	Top-1 acc 49.609 (46.382)	Top-5 acc 73.047 (69.929)	lr 0.02267
Train [24][2230/3239]	Time 0.219 (0.572)	Data Time 0.002 (0.015)	Loss 3.3592 (3.2582)	Entropy 1.59102 (1.59965)	Top-1 acc 44.141 (46.380)	Top-5 acc 69.531 (69.929)	lr 0.02267
Train [24][2240/3239]	Time 0.298 (0.572)	Data Time 0.001 (0.015)	Loss 3.0227 (3.2579)	Entropy 1.59099 (1.59961)	Top-1 acc 51.562 (46.385)	Top-5 acc 75.391 (69.933)	lr 0.02267
Train [24][2250/3239]	Time 0.223 (0.571)	Data Time 0.001 (0.015)	Loss 3.2389 (3.2581)	Entropy 1.59094 (1.59957)	Top-1 acc 50.000 (46.384)	Top-5 acc 73.047 (69.934)	lr 0.02267
Train [24][2260/3239]	Time 0.160 (0.571)	Data Time 0.001 (0.015)	Loss 3.1311 (3.2582)	Entropy 1.59091 (1.59953)	Top-1 acc 49.609 (46.381)	Top-5 acc 70.703 (69.931)	lr 0.02267
Train [24][2270/3239]	Time 0.224 (0.570)	Data Time 0.001 (0.015)	Loss 3.0816 (3.2581)	Entropy 1.59091 (1.59949)	Top-1 acc 50.781 (46.381)	Top-5 acc 73.438 (69.934)	lr 0.02267
Train [24][2280/3239]	Time 0.230 (0.569)	Data Time 0.001 (0.015)	Loss 3.0916 (3.2580)	Entropy 1.59079 (1.59946)	Top-1 acc 51.953 (46.383)	Top-5 acc 72.266 (69.933)	lr 0.02267
Train [24][2290/3239]	Time 0.202 (0.569)	Data Time 0.001 (0.015)	Loss 3.1026 (3.2578)	Entropy 1.59081 (1.59942)	Top-1 acc 47.656 (46.385)	Top-5 acc 74.609 (69.935)	lr 0.02267
Train [24][2300/3239]	Time 0.327 (0.568)	Data Time 0.001 (0.015)	Loss 3.2619 (3.2578)	Entropy 1.59069 (1.59938)	Top-1 acc 48.047 (46.391)	Top-5 acc 70.312 (69.936)	lr 0.02267
Train [24][2310/3239]	Time 0.220 (0.568)	Data Time 0.001 (0.014)	Loss 3.2061 (3.2577)	Entropy 1.59064 (1.59934)	Top-1 acc 54.297 (46.397)	Top-5 acc 71.094 (69.938)	lr 0.02267
Train [24][2320/3239]	Time 2.339 (0.567)	Data Time 0.001 (0.014)	Loss 3.0079 (3.2574)	Entropy 1.59064 (1.59930)	Top-1 acc 50.391 (46.403)	Top-5 acc 75.391 (69.942)	lr 0.02267
Train [24][2330/3239]	Time 0.260 (0.566)	Data Time 0.001 (0.014)	Loss 3.1595 (3.2573)	Entropy 1.59061 (1.59927)	Top-1 acc 47.656 (46.401)	Top-5 acc 70.312 (69.944)	lr 0.02267
Train [24][2340/3239]	Time 0.223 (0.565)	Data Time 0.001 (0.014)	Loss 3.0131 (3.2572)	Entropy 1.59060 (1.59923)	Top-1 acc 49.219 (46.403)	Top-5 acc 75.000 (69.948)	lr 0.02267
Train [24][2350/3239]	Time 0.227 (0.565)	Data Time 0.001 (0.014)	Loss 3.4148 (3.2573)	Entropy 1.59056 (1.59919)	Top-1 acc 46.484 (46.403)	Top-5 acc 66.406 (69.944)	lr 0.02267
Train [24][2360/3239]	Time 0.226 (0.564)	Data Time 0.002 (0.014)	Loss 2.9955 (3.2572)	Entropy 1.59063 (1.59916)	Top-1 acc 52.344 (46.403)	Top-5 acc 76.172 (69.948)	lr 0.02266
Train [24][2370/3239]	Time 0.251 (0.564)	Data Time 0.001 (0.014)	Loss 3.4737 (3.2572)	Entropy 1.59055 (1.59912)	Top-1 acc 41.797 (46.404)	Top-5 acc 66.797 (69.951)	lr 0.02266
Train [24][2380/3239]	Time 0.164 (0.563)	Data Time 0.001 (0.014)	Loss 3.3011 (3.2570)	Entropy 1.59051 (1.59908)	Top-1 acc 47.656 (46.410)	Top-5 acc 70.703 (69.954)	lr 0.02266
Train [24][2390/3239]	Time 0.210 (0.563)	Data Time 0.001 (0.014)	Loss 3.2172 (3.2570)	Entropy 1.59045 (1.59905)	Top-1 acc 48.047 (46.411)	Top-5 acc 71.094 (69.951)	lr 0.02266
Train [24][2400/3239]	Time 0.236 (0.562)	Data Time 0.001 (0.014)	Loss 3.4426 (3.2571)	Entropy 1.59039 (1.59901)	Top-1 acc 42.969 (46.412)	Top-5 acc 64.062 (69.948)	lr 0.02266
Train [24][2410/3239]	Time 0.336 (0.562)	Data Time 0.001 (0.014)	Loss 3.0725 (3.2572)	Entropy 1.59034 (1.59898)	Top-1 acc 50.781 (46.412)	Top-5 acc 75.391 (69.942)	lr 0.02266
Train [24][2420/3239]	Time 0.242 (0.561)	Data Time 0.001 (0.014)	Loss 3.4850 (3.2574)	Entropy 1.59017 (1.59894)	Top-1 acc 39.453 (46.411)	Top-5 acc 64.844 (69.939)	lr 0.02266
Train [24][2430/3239]	Time 2.452 (0.561)	Data Time 0.001 (0.014)	Loss 3.3884 (3.2577)	Entropy 1.59017 (1.59891)	Top-1 acc 42.188 (46.402)	Top-5 acc 66.797 (69.932)	lr 0.02266
Train [24][2440/3239]	Time 0.235 (0.560)	Data Time 0.001 (0.014)	Loss 3.0872 (3.2578)	Entropy 1.59014 (1.59887)	Top-1 acc 53.516 (46.402)	Top-5 acc 75.000 (69.930)	lr 0.02266
Train [24][2450/3239]	Time 0.273 (0.559)	Data Time 0.002 (0.014)	Loss 3.3842 (3.2580)	Entropy 1.59016 (1.59883)	Top-1 acc 44.922 (46.399)	Top-5 acc 69.141 (69.927)	lr 0.02266
Train [24][2460/3239]	Time 0.204 (0.559)	Data Time 0.001 (0.014)	Loss 3.2650 (3.2583)	Entropy 1.59010 (1.59880)	Top-1 acc 44.531 (46.397)	Top-5 acc 68.750 (69.921)	lr 0.02266
Train [24][2470/3239]	Time 0.313 (0.558)	Data Time 0.001 (0.014)	Loss 3.3651 (3.2585)	Entropy 1.58998 (1.59876)	Top-1 acc 44.531 (46.392)	Top-5 acc 66.016 (69.919)	lr 0.02266
Train [24][2480/3239]	Time 0.191 (0.558)	Data Time 0.001 (0.014)	Loss 3.4071 (3.2588)	Entropy 1.58986 (1.59873)	Top-1 acc 46.875 (46.392)	Top-5 acc 65.234 (69.913)	lr 0.02266
Train [24][2490/3239]	Time 0.212 (0.557)	Data Time 0.002 (0.014)	Loss 3.3672 (3.2589)	Entropy 1.58970 (1.59869)	Top-1 acc 41.797 (46.387)	Top-5 acc 66.406 (69.907)	lr 0.02266
Train [24][2500/3239]	Time 0.231 (0.557)	Data Time 0.001 (0.014)	Loss 3.4324 (3.2591)	Entropy 1.58960 (1.59866)	Top-1 acc 42.188 (46.383)	Top-5 acc 67.188 (69.904)	lr 0.02266
Train [24][2510/3239]	Time 0.212 (0.556)	Data Time 0.001 (0.013)	Loss 3.1700 (3.2589)	Entropy 1.58952 (1.59862)	Top-1 acc 45.703 (46.384)	Top-5 acc 73.438 (69.910)	lr 0.02266
Train [24][2520/3239]	Time 0.221 (0.556)	Data Time 0.001 (0.013)	Loss 3.3114 (3.2591)	Entropy 1.58941 (1.59858)	Top-1 acc 46.094 (46.381)	Top-5 acc 68.750 (69.907)	lr 0.02266
Train [24][2530/3239]	Time 0.309 (0.556)	Data Time 0.001 (0.013)	Loss 3.3214 (3.2592)	Entropy 1.58938 (1.59855)	Top-1 acc 47.266 (46.383)	Top-5 acc 69.141 (69.904)	lr 0.02265
Train [24][2540/3239]	Time 2.348 (0.555)	Data Time 0.001 (0.013)	Loss 3.2844 (3.2592)	Entropy 1.58938 (1.59851)	Top-1 acc 46.484 (46.382)	Top-5 acc 70.703 (69.902)	lr 0.02265
Train [24][2550/3239]	Time 0.196 (0.554)	Data Time 0.001 (0.013)	Loss 3.3650 (3.2593)	Entropy 1.58926 (1.59847)	Top-1 acc 48.438 (46.382)	Top-5 acc 68.750 (69.900)	lr 0.02265
Train [24][2560/3239]	Time 0.214 (0.570)	Data Time 0.002 (0.013)	Loss 3.1673 (3.2593)	Entropy 1.58924 (1.59844)	Top-1 acc 50.781 (46.384)	Top-5 acc 71.875 (69.900)	lr 0.02265
Train [24][2570/3239]	Time 0.231 (0.570)	Data Time 0.002 (0.013)	Loss 3.2766 (3.2590)	Entropy 1.58917 (1.59840)	Top-1 acc 46.094 (46.387)	Top-5 acc 70.312 (69.905)	lr 0.02265
Train [24][2580/3239]	Time 0.207 (0.569)	Data Time 0.001 (0.013)	Loss 3.3712 (3.2590)	Entropy 1.58913 (1.59837)	Top-1 acc 42.188 (46.383)	Top-5 acc 64.062 (69.902)	lr 0.02265
Train [24][2590/3239]	Time 0.249 (0.569)	Data Time 0.001 (0.013)	Loss 3.1738 (3.2591)	Entropy 1.58908 (1.59833)	Top-1 acc 46.094 (46.380)	Top-5 acc 71.875 (69.904)	lr 0.02265
Train [24][2600/3239]	Time 0.199 (0.568)	Data Time 0.001 (0.013)	Loss 3.1906 (3.2590)	Entropy 1.58903 (1.59830)	Top-1 acc 46.875 (46.385)	Top-5 acc 71.875 (69.907)	lr 0.02265
Train [24][2610/3239]	Time 0.250 (0.568)	Data Time 0.001 (0.013)	Loss 3.1890 (3.2589)	Entropy 1.58888 (1.59826)	Top-1 acc 45.703 (46.386)	Top-5 acc 70.703 (69.909)	lr 0.02265
Train [24][2620/3239]	Time 0.194 (0.567)	Data Time 0.001 (0.013)	Loss 3.2354 (3.2591)	Entropy 1.58885 (1.59822)	Top-1 acc 48.828 (46.385)	Top-5 acc 72.266 (69.905)	lr 0.02265
Train [24][2630/3239]	Time 0.239 (0.567)	Data Time 0.002 (0.013)	Loss 3.3236 (3.2590)	Entropy 1.58870 (1.59819)	Top-1 acc 44.531 (46.385)	Top-5 acc 68.359 (69.906)	lr 0.02265
Train [24][2640/3239]	Time 0.252 (0.567)	Data Time 0.001 (0.013)	Loss 3.3427 (3.2589)	Entropy 1.58869 (1.59815)	Top-1 acc 44.531 (46.389)	Top-5 acc 65.234 (69.906)	lr 0.02265
Train [24][2650/3239]	Time 0.330 (0.566)	Data Time 0.001 (0.013)	Loss 3.2443 (3.2591)	Entropy 1.58841 (1.59812)	Top-1 acc 48.047 (46.384)	Top-5 acc 68.359 (69.902)	lr 0.02265
Train [24][2660/3239]	Time 0.240 (0.566)	Data Time 0.001 (0.013)	Loss 3.0861 (3.2587)	Entropy 1.58820 (1.59808)	Top-1 acc 48.438 (46.391)	Top-5 acc 70.703 (69.911)	lr 0.02265
Train [24][2670/3239]	Time 0.252 (0.565)	Data Time 0.001 (0.013)	Loss 3.4622 (3.2587)	Entropy 1.58806 (1.59804)	Top-1 acc 43.359 (46.390)	Top-5 acc 64.844 (69.913)	lr 0.02265
Train [24][2680/3239]	Time 0.221 (0.565)	Data Time 0.002 (0.013)	Loss 3.1081 (3.2586)	Entropy 1.58803 (1.59801)	Top-1 acc 51.172 (46.397)	Top-5 acc 76.172 (69.919)	lr 0.02265
Train [24][2690/3239]	Time 0.232 (0.564)	Data Time 0.001 (0.013)	Loss 3.4779 (3.2584)	Entropy 1.58800 (1.59797)	Top-1 acc 42.578 (46.402)	Top-5 acc 63.672 (69.919)	lr 0.02265
Train [24][2700/3239]	Time 0.250 (0.564)	Data Time 0.001 (0.013)	Loss 3.4138 (3.2585)	Entropy 1.58792 (1.59793)	Top-1 acc 39.844 (46.396)	Top-5 acc 68.750 (69.918)	lr 0.02264
Train [24][2710/3239]	Time 0.317 (0.563)	Data Time 0.001 (0.013)	Loss 3.2299 (3.2585)	Entropy 1.58790 (1.59789)	Top-1 acc 42.969 (46.394)	Top-5 acc 71.875 (69.919)	lr 0.02264
Train [24][2720/3239]	Time 0.231 (0.563)	Data Time 0.001 (0.013)	Loss 3.3095 (3.2584)	Entropy 1.58789 (1.59786)	Top-1 acc 45.312 (46.393)	Top-5 acc 70.703 (69.920)	lr 0.02264
Train [24][2730/3239]	Time 0.235 (0.562)	Data Time 0.001 (0.013)	Loss 3.3308 (3.2584)	Entropy 1.58775 (1.59782)	Top-1 acc 45.703 (46.394)	Top-5 acc 67.969 (69.914)	lr 0.02264
Train [24][2740/3239]	Time 0.236 (0.562)	Data Time 0.003 (0.012)	Loss 3.2072 (3.2582)	Entropy 1.58764 (1.59778)	Top-1 acc 46.094 (46.398)	Top-5 acc 71.875 (69.920)	lr 0.02264
Train [24][2750/3239]	Time 0.248 (0.562)	Data Time 0.001 (0.012)	Loss 3.2585 (3.2581)	Entropy 1.58759 (1.59775)	Top-1 acc 46.484 (46.399)	Top-5 acc 66.406 (69.923)	lr 0.02264
Train [24][2760/3239]	Time 0.250 (0.561)	Data Time 0.001 (0.012)	Loss 3.2238 (3.2581)	Entropy 1.58746 (1.59771)	Top-1 acc 48.438 (46.405)	Top-5 acc 71.094 (69.923)	lr 0.02264
Train [24][2770/3239]	Time 0.316 (0.561)	Data Time 0.001 (0.012)	Loss 3.2755 (3.2582)	Entropy 1.58746 (1.59767)	Top-1 acc 45.703 (46.400)	Top-5 acc 71.875 (69.923)	lr 0.02264
Train [24][2780/3239]	Time 0.189 (0.560)	Data Time 0.001 (0.012)	Loss 3.3886 (3.2580)	Entropy 1.58742 (1.59764)	Top-1 acc 45.703 (46.402)	Top-5 acc 68.359 (69.926)	lr 0.02264
Train [24][2790/3239]	Time 0.204 (0.560)	Data Time 0.001 (0.012)	Loss 3.1159 (3.2580)	Entropy 1.58745 (1.59760)	Top-1 acc 51.562 (46.401)	Top-5 acc 70.703 (69.926)	lr 0.02264
Train [24][2800/3239]	Time 0.211 (0.559)	Data Time 0.001 (0.012)	Loss 3.1704 (3.2580)	Entropy 1.58729 (1.59756)	Top-1 acc 43.750 (46.397)	Top-5 acc 71.875 (69.925)	lr 0.02264
Train [24][2810/3239]	Time 0.217 (0.559)	Data Time 0.001 (0.012)	Loss 3.3438 (3.2581)	Entropy 1.58724 (1.59753)	Top-1 acc 44.922 (46.393)	Top-5 acc 67.188 (69.924)	lr 0.02264
Train [24][2820/3239]	Time 0.257 (0.559)	Data Time 0.002 (0.012)	Loss 3.1722 (3.2580)	Entropy 1.58721 (1.59749)	Top-1 acc 48.047 (46.394)	Top-5 acc 70.703 (69.924)	lr 0.02264
Train [24][2830/3239]	Time 0.314 (0.558)	Data Time 0.001 (0.012)	Loss 3.3807 (3.2581)	Entropy 1.58717 (1.59745)	Top-1 acc 40.625 (46.392)	Top-5 acc 69.922 (69.924)	lr 0.02264
Train [24][2840/3239]	Time 0.251 (0.558)	Data Time 0.001 (0.012)	Loss 3.1390 (3.2580)	Entropy 1.58715 (1.59742)	Top-1 acc 51.562 (46.392)	Top-5 acc 75.000 (69.927)	lr 0.02264
Train [24][2850/3239]	Time 0.189 (0.557)	Data Time 0.001 (0.012)	Loss 3.1826 (3.2581)	Entropy 1.58708 (1.59738)	Top-1 acc 46.484 (46.392)	Top-5 acc 70.703 (69.931)	lr 0.02264
Train [24][2860/3239]	Time 0.195 (0.557)	Data Time 0.001 (0.012)	Loss 3.0071 (3.2580)	Entropy 1.58715 (1.59735)	Top-1 acc 53.516 (46.394)	Top-5 acc 75.000 (69.927)	lr 0.02264
Train [24][2870/3239]	Time 0.202 (0.557)	Data Time 0.001 (0.012)	Loss 3.2189 (3.2581)	Entropy 1.58700 (1.59731)	Top-1 acc 50.391 (46.390)	Top-5 acc 71.484 (69.926)	lr 0.02263
Train [24][2880/3239]	Time 0.243 (0.556)	Data Time 0.001 (0.012)	Loss 3.3022 (3.2581)	Entropy 1.58695 (1.59727)	Top-1 acc 45.312 (46.391)	Top-5 acc 67.188 (69.926)	lr 0.02263
Train [24][2890/3239]	Time 0.412 (0.571)	Data Time 0.005 (0.012)	Loss 3.2287 (3.2581)	Entropy 1.58699 (1.59724)	Top-1 acc 46.875 (46.393)	Top-5 acc 70.312 (69.923)	lr 0.02263
Train [24][2900/3239]	Time 0.244 (0.570)	Data Time 0.002 (0.012)	Loss 3.2106 (3.2581)	Entropy 1.58687 (1.59720)	Top-1 acc 48.828 (46.391)	Top-5 acc 71.094 (69.923)	lr 0.02263
Train [24][2910/3239]	Time 0.225 (0.570)	Data Time 0.002 (0.012)	Loss 3.3013 (3.2581)	Entropy 1.58680 (1.59717)	Top-1 acc 44.141 (46.392)	Top-5 acc 67.969 (69.923)	lr 0.02263
Train [24][2920/3239]	Time 0.247 (0.570)	Data Time 0.003 (0.012)	Loss 3.1539 (3.2582)	Entropy 1.58682 (1.59713)	Top-1 acc 51.172 (46.389)	Top-5 acc 72.656 (69.921)	lr 0.02263
Train [24][2930/3239]	Time 0.260 (0.569)	Data Time 0.001 (0.012)	Loss 3.5018 (3.2584)	Entropy 1.58672 (1.59710)	Top-1 acc 41.797 (46.385)	Top-5 acc 62.891 (69.915)	lr 0.02263
Train [24][2940/3239]	Time 0.279 (0.569)	Data Time 0.001 (0.012)	Loss 3.2015 (3.2586)	Entropy 1.58667 (1.59706)	Top-1 acc 52.344 (46.385)	Top-5 acc 71.094 (69.910)	lr 0.02263
Train [24][2950/3239]	Time 0.356 (0.568)	Data Time 0.001 (0.012)	Loss 3.3048 (3.2587)	Entropy 1.58662 (1.59703)	Top-1 acc 42.969 (46.383)	Top-5 acc 69.531 (69.910)	lr 0.02263
Train [24][2960/3239]	Time 0.271 (0.568)	Data Time 0.001 (0.012)	Loss 3.2370 (3.2588)	Entropy 1.58657 (1.59699)	Top-1 acc 48.438 (46.384)	Top-5 acc 68.750 (69.907)	lr 0.02263
Train [24][2970/3239]	Time 0.215 (0.568)	Data Time 0.001 (0.012)	Loss 3.3454 (3.2588)	Entropy 1.58652 (1.59696)	Top-1 acc 42.578 (46.384)	Top-5 acc 69.922 (69.908)	lr 0.02263
Train [24][2980/3239]	Time 0.244 (0.567)	Data Time 0.001 (0.012)	Loss 3.3396 (3.2586)	Entropy 1.58650 (1.59692)	Top-1 acc 47.656 (46.391)	Top-5 acc 69.141 (69.913)	lr 0.02263
Train [24][2990/3239]	Time 0.274 (0.567)	Data Time 0.001 (0.012)	Loss 3.2144 (3.2584)	Entropy 1.58633 (1.59689)	Top-1 acc 46.875 (46.391)	Top-5 acc 69.141 (69.916)	lr 0.02263
Train [24][3000/3239]	Time 0.279 (0.566)	Data Time 0.004 (0.012)	Loss 3.4402 (3.2586)	Entropy 1.58626 (1.59685)	Top-1 acc 42.578 (46.385)	Top-5 acc 65.234 (69.910)	lr 0.02263
Train [24][3010/3239]	Time 0.295 (0.566)	Data Time 0.002 (0.012)	Loss 3.2123 (3.2585)	Entropy 1.58621 (1.59682)	Top-1 acc 47.656 (46.387)	Top-5 acc 69.922 (69.913)	lr 0.02263
Train [24][3020/3239]	Time 0.216 (0.566)	Data Time 0.001 (0.012)	Loss 3.1028 (3.2585)	Entropy 1.58618 (1.59678)	Top-1 acc 48.828 (46.389)	Top-5 acc 74.219 (69.912)	lr 0.02263
Train [24][3030/3239]	Time 0.238 (0.565)	Data Time 0.001 (0.011)	Loss 3.2733 (3.2585)	Entropy 1.58605 (1.59675)	Top-1 acc 46.094 (46.387)	Top-5 acc 71.094 (69.914)	lr 0.02263
Train [24][3040/3239]	Time 0.226 (0.565)	Data Time 0.001 (0.011)	Loss 3.2568 (3.2586)	Entropy 1.58590 (1.59671)	Top-1 acc 45.312 (46.386)	Top-5 acc 69.531 (69.912)	lr 0.02262
Train [24][3050/3239]	Time 0.282 (0.564)	Data Time 0.001 (0.011)	Loss 3.3064 (3.2584)	Entropy 1.58585 (1.59668)	Top-1 acc 47.266 (46.392)	Top-5 acc 68.750 (69.917)	lr 0.02262
Train [24][3060/3239]	Time 0.278 (0.564)	Data Time 0.001 (0.011)	Loss 3.3427 (3.2585)	Entropy 1.58573 (1.59664)	Top-1 acc 40.234 (46.387)	Top-5 acc 70.703 (69.917)	lr 0.02262
Train [24][3070/3239]	Time 0.206 (0.564)	Data Time 0.001 (0.011)	Loss 3.3267 (3.2588)	Entropy 1.58563 (1.59660)	Top-1 acc 42.969 (46.376)	Top-5 acc 68.359 (69.909)	lr 0.02262
Train [24][3080/3239]	Time 0.354 (0.563)	Data Time 0.002 (0.011)	Loss 3.1785 (3.2587)	Entropy 1.58547 (1.59657)	Top-1 acc 51.953 (46.380)	Top-5 acc 71.484 (69.913)	lr 0.02262
Train [24][3090/3239]	Time 0.241 (0.563)	Data Time 0.001 (0.011)	Loss 3.1183 (3.2587)	Entropy 1.58531 (1.59653)	Top-1 acc 53.516 (46.382)	Top-5 acc 75.781 (69.915)	lr 0.02262
Train [24][3100/3239]	Time 0.233 (0.562)	Data Time 0.001 (0.011)	Loss 3.1782 (3.2587)	Entropy 1.58531 (1.59650)	Top-1 acc 48.828 (46.377)	Top-5 acc 73.047 (69.914)	lr 0.02262
Train [24][3110/3239]	Time 0.223 (0.562)	Data Time 0.001 (0.011)	Loss 3.1169 (3.2587)	Entropy 1.58519 (1.59646)	Top-1 acc 50.781 (46.377)	Top-5 acc 70.703 (69.917)	lr 0.02262
Train [24][3120/3239]	Time 0.263 (0.562)	Data Time 0.001 (0.011)	Loss 3.0740 (3.2586)	Entropy 1.58511 (1.59642)	Top-1 acc 51.953 (46.380)	Top-5 acc 75.391 (69.919)	lr 0.02262
Train [24][3130/3239]	Time 0.257 (0.561)	Data Time 0.001 (0.011)	Loss 3.1859 (3.2588)	Entropy 1.58496 (1.59639)	Top-1 acc 44.922 (46.376)	Top-5 acc 73.438 (69.913)	lr 0.02262
Train [24][3140/3239]	Time 0.180 (0.561)	Data Time 0.001 (0.011)	Loss 3.2559 (3.2585)	Entropy 1.58486 (1.59635)	Top-1 acc 46.484 (46.381)	Top-5 acc 70.312 (69.920)	lr 0.02262
Train [24][3150/3239]	Time 0.205 (0.560)	Data Time 0.001 (0.011)	Loss 3.2599 (3.2585)	Entropy 1.58483 (1.59632)	Top-1 acc 46.094 (46.379)	Top-5 acc 68.750 (69.922)	lr 0.02262
Train [24][3160/3239]	Time 0.230 (0.560)	Data Time 0.002 (0.011)	Loss 3.3000 (3.2586)	Entropy 1.58474 (1.59628)	Top-1 acc 45.312 (46.377)	Top-5 acc 67.188 (69.921)	lr 0.02262
Train [24][3170/3239]	Time 0.240 (0.560)	Data Time 0.001 (0.011)	Loss 3.3865 (3.2586)	Entropy 1.58470 (1.59624)	Top-1 acc 37.891 (46.376)	Top-5 acc 66.406 (69.918)	lr 0.02262
Train [24][3180/3239]	Time 0.213 (0.559)	Data Time 0.000 (0.011)	Loss 3.1755 (3.2585)	Entropy 1.58461 (1.59621)	Top-1 acc 50.781 (46.376)	Top-5 acc 70.312 (69.919)	lr 0.02262
Train [24][3190/3239]	Time 0.223 (0.559)	Data Time 0.000 (0.011)	Loss 3.1138 (3.2586)	Entropy 1.58447 (1.59617)	Top-1 acc 49.219 (46.376)	Top-5 acc 75.000 (69.920)	lr 0.02262
Train [24][3200/3239]	Time 0.207 (0.558)	Data Time 0.000 (0.011)	Loss 3.2367 (3.2585)	Entropy 1.58445 (1.59613)	Top-1 acc 46.484 (46.378)	Top-5 acc 70.703 (69.925)	lr 0.02262
Train [24][3210/3239]	Time 0.307 (0.558)	Data Time 0.000 (0.011)	Loss 3.1279 (3.2583)	Entropy 1.58442 (1.59610)	Top-1 acc 45.703 (46.381)	Top-5 acc 70.703 (69.926)	lr 0.02261
Train [24][3220/3239]	Time 0.376 (0.571)	Data Time 0.000 (0.011)	Loss 3.1770 (3.2582)	Entropy 1.58436 (1.59606)	Top-1 acc 50.000 (46.385)	Top-5 acc 70.703 (69.930)	lr 0.02261
Train [24][3230/3239]	Time 0.211 (0.570)	Data Time 0.000 (0.011)	Loss 3.2499 (3.2583)	Entropy 1.58433 (1.59602)	Top-1 acc 47.266 (46.382)	Top-5 acc 68.359 (69.927)	lr 0.02261
Train [24][3239/3239]	Time 2.103 (0.570)	Data Time 0.000 (0.011)	Loss 3.2742 (3.2584)	Entropy 1.58433 (1.59599)	Top-1 acc 51.852 (46.378)	Top-5 acc 67.901 (69.925)	lr 0.02261
==========Valid [24/120]	loss 2.052	top-1 acc 54.766 (54.766)	top-5 acc 78.157	Train top-1 46.378	top-5 69.925	Entropy 1.58433	Latency-None: 0.000ms	Flops: 559.63M
Train [25][0/3239]	Time 32.207 (32.207)	Data Time 30.178 (30.178)	Loss 3.1809 (3.1809)	Entropy 1.58428 (1.58428)	Top-1 acc 41.406 (41.406)	Top-5 acc 73.047 (73.047)	lr 0.02261
Train [25][10/3239]	Time 2.504 (3.570)	Data Time 0.002 (2.878)	Loss 3.3298 (3.1972)	Entropy 1.58428 (1.58428)	Top-1 acc 46.094 (47.479)	Top-5 acc 67.188 (70.987)	lr 0.02261
Train [25][20/3239]	Time 0.264 (1.986)	Data Time 0.001 (1.508)	Loss 3.1537 (3.1944)	Entropy 1.58422 (1.58425)	Top-1 acc 44.531 (47.879)	Top-5 acc 69.922 (71.075)	lr 0.02261
Train [25][30/3239]	Time 0.232 (1.491)	Data Time 0.001 (1.022)	Loss 3.1489 (3.1856)	Entropy 1.58422 (1.58424)	Top-1 acc 51.562 (48.135)	Top-5 acc 72.266 (71.232)	lr 0.02261
Train [25][40/3239]	Time 0.218 (1.236)	Data Time 0.001 (0.773)	Loss 3.4414 (3.1895)	Entropy 1.58419 (1.58423)	Top-1 acc 42.188 (47.732)	Top-5 acc 68.359 (71.294)	lr 0.02261
Train [25][50/3239]	Time 0.226 (1.082)	Data Time 0.001 (0.622)	Loss 3.3008 (3.1953)	Entropy 1.58419 (1.58422)	Top-1 acc 44.141 (47.564)	Top-5 acc 67.578 (71.232)	lr 0.02261
Train [25][60/3239]	Time 0.242 (0.976)	Data Time 0.001 (0.520)	Loss 3.2437 (3.2030)	Entropy 1.58415 (1.58421)	Top-1 acc 49.219 (47.477)	Top-5 acc 71.484 (71.151)	lr 0.02261
Train [25][70/3239]	Time 0.227 (0.901)	Data Time 0.001 (0.447)	Loss 3.1316 (3.2073)	Entropy 1.58412 (1.58420)	Top-1 acc 49.609 (47.453)	Top-5 acc 72.266 (71.253)	lr 0.02261
Train [25][80/3239]	Time 0.231 (0.845)	Data Time 0.001 (0.393)	Loss 3.3792 (3.2251)	Entropy 1.58407 (1.58419)	Top-1 acc 43.750 (47.184)	Top-5 acc 66.797 (70.877)	lr 0.02261
Train [25][90/3239]	Time 0.197 (0.801)	Data Time 0.001 (0.350)	Loss 3.3492 (3.2281)	Entropy 1.58399 (1.58417)	Top-1 acc 44.531 (47.128)	Top-5 acc 69.922 (70.793)	lr 0.02261
Train [25][100/3239]	Time 0.219 (0.767)	Data Time 0.001 (0.315)	Loss 3.3536 (3.2285)	Entropy 1.58389 (1.58415)	Top-1 acc 45.312 (47.223)	Top-5 acc 69.141 (70.773)	lr 0.02261
Train [25][110/3239]	Time 0.216 (0.738)	Data Time 0.001 (0.287)	Loss 3.2054 (3.2270)	Entropy 1.58376 (1.58413)	Top-1 acc 48.828 (47.220)	Top-5 acc 66.797 (70.721)	lr 0.02261
Train [25][120/3239]	Time 2.348 (0.714)	Data Time 0.001 (0.264)	Loss 3.1987 (3.2246)	Entropy 1.58376 (1.58410)	Top-1 acc 48.047 (47.243)	Top-5 acc 74.219 (70.742)	lr 0.02261
Train [25][130/3239]	Time 0.212 (0.677)	Data Time 0.001 (0.244)	Loss 3.2598 (3.2263)	Entropy 1.58366 (1.58406)	Top-1 acc 45.312 (47.272)	Top-5 acc 67.188 (70.629)	lr 0.02260
Train [25][140/3239]	Time 0.268 (0.660)	Data Time 0.001 (0.226)	Loss 3.3049 (3.2346)	Entropy 1.58360 (1.58403)	Top-1 acc 49.609 (47.119)	Top-5 acc 67.188 (70.504)	lr 0.02260
Train [25][150/3239]	Time 0.209 (0.646)	Data Time 0.001 (0.212)	Loss 3.4381 (3.2345)	Entropy 1.58341 (1.58399)	Top-1 acc 44.141 (47.079)	Top-5 acc 66.406 (70.486)	lr 0.02260
Train [25][160/3239]	Time 0.200 (0.634)	Data Time 0.001 (0.198)	Loss 3.3459 (3.2399)	Entropy 1.58336 (1.58395)	Top-1 acc 46.094 (46.979)	Top-5 acc 66.406 (70.334)	lr 0.02260
Train [25][170/3239]	Time 0.203 (0.622)	Data Time 0.001 (0.187)	Loss 3.4115 (3.2367)	Entropy 1.58324 (1.58392)	Top-1 acc 43.359 (47.037)	Top-5 acc 65.234 (70.390)	lr 0.02260
Train [25][180/3239]	Time 0.206 (0.613)	Data Time 0.001 (0.177)	Loss 3.3431 (3.2319)	Entropy 1.58316 (1.58388)	Top-1 acc 48.828 (47.162)	Top-5 acc 67.578 (70.500)	lr 0.02260
Train [25][190/3239]	Time 0.222 (0.604)	Data Time 0.001 (0.168)	Loss 3.2894 (3.2312)	Entropy 1.58307 (1.58384)	Top-1 acc 41.406 (47.188)	Top-5 acc 67.578 (70.501)	lr 0.02260
Train [25][200/3239]	Time 0.324 (0.597)	Data Time 0.001 (0.159)	Loss 3.1626 (3.2294)	Entropy 1.58296 (1.58380)	Top-1 acc 51.172 (47.256)	Top-5 acc 73.047 (70.552)	lr 0.02260
Train [25][210/3239]	Time 0.216 (0.589)	Data Time 0.001 (0.152)	Loss 3.3512 (3.2294)	Entropy 1.58298 (1.58376)	Top-1 acc 46.094 (47.308)	Top-5 acc 64.062 (70.520)	lr 0.02260
Train [25][220/3239]	Time 0.265 (0.583)	Data Time 0.001 (0.145)	Loss 3.1439 (3.2288)	Entropy 1.58298 (1.58372)	Top-1 acc 49.219 (47.303)	Top-5 acc 73.828 (70.518)	lr 0.02260
Train [25][230/3239]	Time 2.349 (0.576)	Data Time 0.001 (0.139)	Loss 3.1099 (3.2255)	Entropy 1.58298 (1.58369)	Top-1 acc 50.391 (47.362)	Top-5 acc 70.312 (70.603)	lr 0.02260
Train [25][240/3239]	Time 0.228 (0.561)	Data Time 0.001 (0.133)	Loss 3.3208 (3.2295)	Entropy 1.58287 (1.58366)	Top-1 acc 44.141 (47.270)	Top-5 acc 71.094 (70.533)	lr 0.02260
Train [25][250/3239]	Time 0.271 (0.557)	Data Time 0.001 (0.128)	Loss 3.4928 (3.2289)	Entropy 1.58286 (1.58362)	Top-1 acc 42.578 (47.256)	Top-5 acc 65.625 (70.540)	lr 0.02260
Train [25][260/3239]	Time 0.343 (0.553)	Data Time 0.001 (0.123)	Loss 2.9682 (3.2278)	Entropy 1.58269 (1.58359)	Top-1 acc 50.391 (47.246)	Top-5 acc 76.562 (70.573)	lr 0.02260
Train [25][270/3239]	Time 0.236 (0.549)	Data Time 0.001 (0.119)	Loss 3.1727 (3.2257)	Entropy 1.58265 (1.58356)	Top-1 acc 45.703 (47.204)	Top-5 acc 71.875 (70.617)	lr 0.02260
Train [25][280/3239]	Time 0.152 (0.545)	Data Time 0.004 (0.114)	Loss 3.2377 (3.2268)	Entropy 1.58263 (1.58352)	Top-1 acc 44.922 (47.150)	Top-5 acc 70.312 (70.575)	lr 0.02260
Train [25][290/3239]	Time 0.225 (0.541)	Data Time 0.001 (0.111)	Loss 3.2637 (3.2270)	Entropy 1.58254 (1.58349)	Top-1 acc 48.828 (47.188)	Top-5 acc 72.266 (70.580)	lr 0.02260
Train [25][300/3239]	Time 0.209 (0.537)	Data Time 0.001 (0.107)	Loss 3.0372 (3.2242)	Entropy 1.58251 (1.58346)	Top-1 acc 53.516 (47.236)	Top-5 acc 75.000 (70.643)	lr 0.02259
Train [25][310/3239]	Time 0.213 (0.535)	Data Time 0.001 (0.104)	Loss 3.1025 (3.2234)	Entropy 1.58244 (1.58343)	Top-1 acc 47.266 (47.232)	Top-5 acc 74.609 (70.693)	lr 0.02259
Train [25][320/3239]	Time 0.308 (0.532)	Data Time 0.001 (0.100)	Loss 3.2972 (3.2238)	Entropy 1.58241 (1.58340)	Top-1 acc 48.828 (47.234)	Top-5 acc 66.797 (70.665)	lr 0.02259
Train [25][330/3239]	Time 0.219 (0.529)	Data Time 0.001 (0.097)	Loss 3.0270 (3.2211)	Entropy 1.58237 (1.58337)	Top-1 acc 49.219 (47.289)	Top-5 acc 76.172 (70.715)	lr 0.02259
Train [25][340/3239]	Time 43.897 (0.648)	Data Time 0.001 (0.095)	Loss 3.3473 (3.2217)	Entropy 1.58237 (1.58334)	Top-1 acc 44.531 (47.253)	Top-5 acc 67.188 (70.711)	lr 0.02259
Train [25][350/3239]	Time 0.378 (0.638)	Data Time 0.002 (0.092)	Loss 3.4087 (3.2248)	Entropy 1.58232 (1.58331)	Top-1 acc 43.359 (47.189)	Top-5 acc 66.797 (70.663)	lr 0.02259
Train [25][360/3239]	Time 0.205 (0.634)	Data Time 0.001 (0.090)	Loss 3.3473 (3.2267)	Entropy 1.58223 (1.58328)	Top-1 acc 44.531 (47.165)	Top-5 acc 69.531 (70.631)	lr 0.02259
Train [25][370/3239]	Time 0.212 (0.629)	Data Time 0.001 (0.087)	Loss 3.2258 (3.2244)	Entropy 1.58219 (1.58325)	Top-1 acc 45.312 (47.191)	Top-5 acc 71.484 (70.682)	lr 0.02259
Train [25][380/3239]	Time 0.339 (0.624)	Data Time 0.002 (0.085)	Loss 3.2591 (3.2235)	Entropy 1.58211 (1.58322)	Top-1 acc 45.312 (47.182)	Top-5 acc 68.750 (70.685)	lr 0.02259
Train [25][390/3239]	Time 0.229 (0.619)	Data Time 0.001 (0.083)	Loss 3.4025 (3.2260)	Entropy 1.58196 (1.58319)	Top-1 acc 43.359 (47.117)	Top-5 acc 69.141 (70.640)	lr 0.02259
Train [25][400/3239]	Time 0.217 (0.615)	Data Time 0.001 (0.081)	Loss 3.1180 (3.2268)	Entropy 1.58184 (1.58316)	Top-1 acc 46.875 (47.094)	Top-5 acc 73.047 (70.624)	lr 0.02259
Train [25][410/3239]	Time 0.268 (0.611)	Data Time 0.001 (0.079)	Loss 3.1171 (3.2267)	Entropy 1.58178 (1.58312)	Top-1 acc 50.781 (47.096)	Top-5 acc 71.875 (70.593)	lr 0.02259
Train [25][420/3239]	Time 0.208 (0.607)	Data Time 0.001 (0.077)	Loss 3.2488 (3.2283)	Entropy 1.58170 (1.58309)	Top-1 acc 49.609 (47.084)	Top-5 acc 70.312 (70.587)	lr 0.02259
Train [25][430/3239]	Time 0.199 (0.603)	Data Time 0.001 (0.075)	Loss 3.3795 (3.2296)	Entropy 1.58162 (1.58306)	Top-1 acc 42.188 (47.056)	Top-5 acc 66.797 (70.549)	lr 0.02259
Train [25][440/3239]	Time 0.306 (0.600)	Data Time 0.001 (0.074)	Loss 3.1591 (3.2290)	Entropy 1.58167 (1.58303)	Top-1 acc 48.828 (47.080)	Top-5 acc 73.438 (70.590)	lr 0.02259
Train [25][450/3239]	Time 2.341 (0.596)	Data Time 0.001 (0.072)	Loss 3.2148 (3.2295)	Entropy 1.58167 (1.58300)	Top-1 acc 47.656 (47.053)	Top-5 acc 69.922 (70.592)	lr 0.02259
Train [25][460/3239]	Time 0.198 (0.588)	Data Time 0.001 (0.071)	Loss 3.2849 (3.2284)	Entropy 1.58154 (1.58297)	Top-1 acc 45.703 (47.051)	Top-5 acc 70.312 (70.630)	lr 0.02259
Train [25][470/3239]	Time 0.201 (0.585)	Data Time 0.001 (0.069)	Loss 3.1409 (3.2283)	Entropy 1.58149 (1.58293)	Top-1 acc 48.047 (47.061)	Top-5 acc 73.438 (70.647)	lr 0.02258
Train [25][480/3239]	Time 0.192 (0.582)	Data Time 0.001 (0.068)	Loss 3.4868 (3.2274)	Entropy 1.58149 (1.58290)	Top-1 acc 37.891 (47.076)	Top-5 acc 66.016 (70.667)	lr 0.02258
Train [25][490/3239]	Time 0.219 (0.580)	Data Time 0.001 (0.066)	Loss 3.0023 (3.2274)	Entropy 1.58141 (1.58287)	Top-1 acc 50.781 (47.057)	Top-5 acc 72.266 (70.656)	lr 0.02258
Train [25][500/3239]	Time 0.206 (0.577)	Data Time 0.001 (0.065)	Loss 3.3945 (3.2281)	Entropy 1.58140 (1.58285)	Top-1 acc 41.406 (47.019)	Top-5 acc 66.797 (70.636)	lr 0.02258
Train [25][510/3239]	Time 0.209 (0.574)	Data Time 0.001 (0.064)	Loss 3.1612 (3.2274)	Entropy 1.58135 (1.58282)	Top-1 acc 46.094 (47.041)	Top-5 acc 72.656 (70.650)	lr 0.02258
Train [25][520/3239]	Time 0.200 (0.572)	Data Time 0.001 (0.063)	Loss 3.2072 (3.2269)	Entropy 1.58128 (1.58279)	Top-1 acc 49.609 (47.054)	Top-5 acc 70.703 (70.651)	lr 0.02258
Train [25][530/3239]	Time 0.171 (0.569)	Data Time 0.001 (0.062)	Loss 3.4101 (3.2289)	Entropy 1.58123 (1.58276)	Top-1 acc 46.094 (47.006)	Top-5 acc 68.750 (70.624)	lr 0.02258
Train [25][540/3239]	Time 0.178 (0.567)	Data Time 0.001 (0.061)	Loss 3.2932 (3.2288)	Entropy 1.58113 (1.58273)	Top-1 acc 45.703 (47.011)	Top-5 acc 71.484 (70.621)	lr 0.02258
Train [25][550/3239]	Time 0.330 (0.565)	Data Time 0.001 (0.059)	Loss 3.1560 (3.2281)	Entropy 1.58098 (1.58270)	Top-1 acc 51.953 (47.037)	Top-5 acc 73.828 (70.624)	lr 0.02258
Train [25][560/3239]	Time 2.535 (0.563)	Data Time 0.001 (0.058)	Loss 3.1390 (3.2271)	Entropy 1.58098 (1.58267)	Top-1 acc 49.219 (47.096)	Top-5 acc 71.875 (70.635)	lr 0.02258
Train [25][570/3239]	Time 0.215 (0.557)	Data Time 0.001 (0.057)	Loss 3.1917 (3.2277)	Entropy 1.58103 (1.58264)	Top-1 acc 45.703 (47.058)	Top-5 acc 68.750 (70.609)	lr 0.02258
Train [25][580/3239]	Time 0.218 (0.555)	Data Time 0.001 (0.056)	Loss 3.3075 (3.2268)	Entropy 1.58100 (1.58261)	Top-1 acc 43.359 (47.057)	Top-5 acc 67.578 (70.628)	lr 0.02258
Train [25][590/3239]	Time 0.240 (0.553)	Data Time 0.001 (0.056)	Loss 3.1995 (3.2261)	Entropy 1.58097 (1.58259)	Top-1 acc 48.047 (47.044)	Top-5 acc 70.703 (70.638)	lr 0.02258
Train [25][600/3239]	Time 0.239 (0.551)	Data Time 0.001 (0.055)	Loss 3.2256 (3.2270)	Entropy 1.58087 (1.58256)	Top-1 acc 48.047 (47.006)	Top-5 acc 72.656 (70.634)	lr 0.02258
Train [25][610/3239]	Time 0.214 (0.550)	Data Time 0.001 (0.054)	Loss 3.3950 (3.2278)	Entropy 1.58077 (1.58253)	Top-1 acc 44.531 (46.991)	Top-5 acc 66.797 (70.614)	lr 0.02258
Train [25][620/3239]	Time 0.202 (0.548)	Data Time 0.001 (0.053)	Loss 3.3414 (3.2279)	Entropy 1.58063 (1.58250)	Top-1 acc 42.578 (46.990)	Top-5 acc 65.625 (70.603)	lr 0.02258
Train [25][630/3239]	Time 0.244 (0.546)	Data Time 0.001 (0.052)	Loss 3.6514 (3.2286)	Entropy 1.58060 (1.58247)	Top-1 acc 34.766 (46.984)	Top-5 acc 64.062 (70.610)	lr 0.02258
Train [25][640/3239]	Time 0.217 (0.544)	Data Time 0.001 (0.051)	Loss 3.2083 (3.2289)	Entropy 1.58055 (1.58244)	Top-1 acc 46.484 (46.977)	Top-5 acc 69.922 (70.590)	lr 0.02257
Train [25][650/3239]	Time 0.289 (0.542)	Data Time 0.001 (0.051)	Loss 3.3016 (3.2291)	Entropy 1.58049 (1.58241)	Top-1 acc 46.484 (46.981)	Top-5 acc 67.969 (70.592)	lr 0.02257
Train [25][660/3239]	Time 0.244 (0.541)	Data Time 0.001 (0.050)	Loss 3.1708 (3.2294)	Entropy 1.58038 (1.58238)	Top-1 acc 47.656 (46.971)	Top-5 acc 69.922 (70.588)	lr 0.02257
Train [25][670/3239]	Time 2.308 (0.539)	Data Time 0.001 (0.049)	Loss 3.2646 (3.2293)	Entropy 1.58038 (1.58235)	Top-1 acc 46.484 (46.966)	Top-5 acc 72.656 (70.598)	lr 0.02257
Train [25][680/3239]	Time 0.236 (0.535)	Data Time 0.001 (0.048)	Loss 3.1942 (3.2303)	Entropy 1.58033 (1.58232)	Top-1 acc 49.219 (46.936)	Top-5 acc 68.750 (70.581)	lr 0.02257
Train [25][690/3239]	Time 0.215 (0.534)	Data Time 0.001 (0.048)	Loss 3.2646 (3.2296)	Entropy 1.58024 (1.58229)	Top-1 acc 46.875 (46.953)	Top-5 acc 70.703 (70.588)	lr 0.02257
Train [25][700/3239]	Time 0.148 (0.532)	Data Time 0.001 (0.047)	Loss 3.2877 (3.2295)	Entropy 1.58017 (1.58226)	Top-1 acc 44.141 (46.961)	Top-5 acc 68.359 (70.581)	lr 0.02257
Train [25][710/3239]	Time 0.221 (0.595)	Data Time 0.002 (0.046)	Loss 3.1626 (3.2291)	Entropy 1.58005 (1.58223)	Top-1 acc 47.656 (46.968)	Top-5 acc 71.094 (70.583)	lr 0.02257
Train [25][720/3239]	Time 0.205 (0.593)	Data Time 0.002 (0.046)	Loss 3.2423 (3.2297)	Entropy 1.57995 (1.58220)	Top-1 acc 47.266 (46.960)	Top-5 acc 70.703 (70.567)	lr 0.02257
Train [25][730/3239]	Time 0.338 (0.591)	Data Time 0.001 (0.045)	Loss 3.4141 (3.2303)	Entropy 1.57992 (1.58217)	Top-1 acc 44.922 (46.943)	Top-5 acc 66.016 (70.548)	lr 0.02257
Train [25][740/3239]	Time 0.216 (0.589)	Data Time 0.001 (0.045)	Loss 3.1214 (3.2302)	Entropy 1.57987 (1.58214)	Top-1 acc 46.484 (46.945)	Top-5 acc 71.484 (70.542)	lr 0.02257
Train [25][750/3239]	Time 0.230 (0.587)	Data Time 0.001 (0.044)	Loss 3.0827 (3.2294)	Entropy 1.57988 (1.58211)	Top-1 acc 50.781 (46.964)	Top-5 acc 73.047 (70.559)	lr 0.02257
Train [25][760/3239]	Time 0.226 (0.586)	Data Time 0.001 (0.044)	Loss 3.4082 (3.2296)	Entropy 1.57958 (1.58208)	Top-1 acc 43.750 (46.969)	Top-5 acc 67.188 (70.552)	lr 0.02257
Train [25][770/3239]	Time 0.189 (0.584)	Data Time 0.001 (0.043)	Loss 3.3478 (3.2295)	Entropy 1.57955 (1.58205)	Top-1 acc 49.219 (46.971)	Top-5 acc 69.531 (70.547)	lr 0.02257
Train [25][780/3239]	Time 2.335 (0.582)	Data Time 0.001 (0.043)	Loss 3.2668 (3.2303)	Entropy 1.57955 (1.58201)	Top-1 acc 46.875 (46.951)	Top-5 acc 70.703 (70.540)	lr 0.02257
Train [25][790/3239]	Time 0.224 (0.578)	Data Time 0.001 (0.042)	Loss 3.1268 (3.2302)	Entropy 1.57948 (1.58198)	Top-1 acc 48.438 (46.941)	Top-5 acc 74.609 (70.551)	lr 0.02257
Train [25][800/3239]	Time 0.236 (0.576)	Data Time 0.002 (0.042)	Loss 3.0767 (3.2295)	Entropy 1.57943 (1.58195)	Top-1 acc 48.828 (46.948)	Top-5 acc 74.219 (70.570)	lr 0.02257
Train [25][810/3239]	Time 0.237 (0.575)	Data Time 0.001 (0.041)	Loss 3.2004 (3.2298)	Entropy 1.57936 (1.58192)	Top-1 acc 49.609 (46.954)	Top-5 acc 71.484 (70.563)	lr 0.02256
Train [25][820/3239]	Time 0.228 (0.573)	Data Time 0.001 (0.041)	Loss 3.3089 (3.2293)	Entropy 1.57919 (1.58189)	Top-1 acc 43.750 (46.968)	Top-5 acc 70.703 (70.587)	lr 0.02256
Train [25][830/3239]	Time 0.233 (0.572)	Data Time 0.001 (0.040)	Loss 3.1762 (3.2299)	Entropy 1.57910 (1.58185)	Top-1 acc 50.000 (46.960)	Top-5 acc 74.219 (70.574)	lr 0.02256
Train [25][840/3239]	Time 0.243 (0.571)	Data Time 0.001 (0.040)	Loss 3.0609 (3.2304)	Entropy 1.57904 (1.58182)	Top-1 acc 51.562 (46.947)	Top-5 acc 73.047 (70.561)	lr 0.02256
Train [25][850/3239]	Time 0.222 (0.570)	Data Time 0.001 (0.039)	Loss 3.2837 (3.2301)	Entropy 1.57898 (1.58179)	Top-1 acc 41.797 (46.962)	Top-5 acc 71.875 (70.574)	lr 0.02256
Train [25][860/3239]	Time 0.206 (0.569)	Data Time 0.001 (0.039)	Loss 3.2728 (3.2312)	Entropy 1.57895 (1.58175)	Top-1 acc 42.578 (46.939)	Top-5 acc 71.875 (70.552)	lr 0.02256
Train [25][870/3239]	Time 0.226 (0.567)	Data Time 0.001 (0.038)	Loss 3.2973 (3.2318)	Entropy 1.57895 (1.58172)	Top-1 acc 43.359 (46.924)	Top-5 acc 67.188 (70.530)	lr 0.02256
Train [25][880/3239]	Time 0.237 (0.566)	Data Time 0.002 (0.038)	Loss 3.3050 (3.2323)	Entropy 1.57897 (1.58169)	Top-1 acc 44.531 (46.927)	Top-5 acc 67.578 (70.513)	lr 0.02256
Train [25][890/3239]	Time 2.298 (0.565)	Data Time 0.002 (0.038)	Loss 2.9983 (3.2322)	Entropy 1.57897 (1.58166)	Top-1 acc 53.125 (46.936)	Top-5 acc 76.562 (70.516)	lr 0.02256
Train [25][900/3239]	Time 0.354 (0.561)	Data Time 0.004 (0.037)	Loss 3.3132 (3.2325)	Entropy 1.57888 (1.58163)	Top-1 acc 44.922 (46.931)	Top-5 acc 70.312 (70.515)	lr 0.02256
Train [25][910/3239]	Time 0.232 (0.560)	Data Time 0.001 (0.037)	Loss 3.3211 (3.2327)	Entropy 1.57888 (1.58160)	Top-1 acc 43.359 (46.914)	Top-5 acc 66.797 (70.521)	lr 0.02256
Train [25][920/3239]	Time 0.231 (0.559)	Data Time 0.001 (0.036)	Loss 3.1135 (3.2320)	Entropy 1.57882 (1.58157)	Top-1 acc 52.344 (46.940)	Top-5 acc 70.703 (70.534)	lr 0.02256
Train [25][930/3239]	Time 0.230 (0.557)	Data Time 0.001 (0.036)	Loss 3.3540 (3.2321)	Entropy 1.57871 (1.58154)	Top-1 acc 44.141 (46.932)	Top-5 acc 67.969 (70.533)	lr 0.02256
Train [25][940/3239]	Time 0.219 (0.556)	Data Time 0.005 (0.036)	Loss 3.2776 (3.2316)	Entropy 1.57861 (1.58151)	Top-1 acc 42.188 (46.939)	Top-5 acc 69.922 (70.543)	lr 0.02256
Train [25][950/3239]	Time 0.161 (0.555)	Data Time 0.001 (0.035)	Loss 3.2681 (3.2319)	Entropy 1.57858 (1.58148)	Top-1 acc 45.703 (46.943)	Top-5 acc 66.797 (70.536)	lr 0.02256
Train [25][960/3239]	Time 0.333 (0.554)	Data Time 0.001 (0.035)	Loss 3.2387 (3.2316)	Entropy 1.57844 (1.58145)	Top-1 acc 48.438 (46.951)	Top-5 acc 73.438 (70.545)	lr 0.02256
Train [25][970/3239]	Time 0.219 (0.553)	Data Time 0.001 (0.035)	Loss 3.0937 (3.2316)	Entropy 1.57835 (1.58142)	Top-1 acc 49.219 (46.956)	Top-5 acc 71.094 (70.541)	lr 0.02255
Train [25][980/3239]	Time 0.245 (0.551)	Data Time 0.001 (0.034)	Loss 3.3457 (3.2310)	Entropy 1.57829 (1.58138)	Top-1 acc 42.188 (46.967)	Top-5 acc 66.797 (70.563)	lr 0.02255
Train [25][990/3239]	Time 0.213 (0.550)	Data Time 0.001 (0.034)	Loss 3.4909 (3.2311)	Entropy 1.57828 (1.58135)	Top-1 acc 41.406 (46.965)	Top-5 acc 62.891 (70.553)	lr 0.02255
Train [25][1000/3239]	Time 2.228 (0.549)	Data Time 0.001 (0.034)	Loss 3.1141 (3.2305)	Entropy 1.57828 (1.58132)	Top-1 acc 53.125 (46.967)	Top-5 acc 70.703 (70.549)	lr 0.02255
Train [25][1010/3239]	Time 0.230 (0.546)	Data Time 0.001 (0.033)	Loss 3.0956 (3.2299)	Entropy 1.57816 (1.58129)	Top-1 acc 50.000 (46.977)	Top-5 acc 73.438 (70.570)	lr 0.02255
Train [25][1020/3239]	Time 0.275 (0.545)	Data Time 0.001 (0.033)	Loss 3.2824 (3.2299)	Entropy 1.57811 (1.58126)	Top-1 acc 45.312 (46.968)	Top-5 acc 71.484 (70.582)	lr 0.02255
Train [25][1030/3239]	Time 0.217 (0.544)	Data Time 0.001 (0.033)	Loss 3.3145 (3.2300)	Entropy 1.57810 (1.58123)	Top-1 acc 44.922 (46.968)	Top-5 acc 67.188 (70.583)	lr 0.02255
Train [25][1040/3239]	Time 0.227 (0.543)	Data Time 0.002 (0.032)	Loss 3.4462 (3.2307)	Entropy 1.57797 (1.58120)	Top-1 acc 42.969 (46.964)	Top-5 acc 66.406 (70.566)	lr 0.02255
Train [25][1050/3239]	Time 0.203 (0.542)	Data Time 0.001 (0.032)	Loss 3.2035 (3.2314)	Entropy 1.57788 (1.58117)	Top-1 acc 53.125 (46.960)	Top-5 acc 72.266 (70.543)	lr 0.02255
Train [25][1060/3239]	Time 0.225 (0.541)	Data Time 0.001 (0.032)	Loss 3.0826 (3.2310)	Entropy 1.57774 (1.58114)	Top-1 acc 50.391 (46.964)	Top-5 acc 72.266 (70.548)	lr 0.02255
Train [25][1070/3239]	Time 0.263 (0.579)	Data Time 0.004 (0.032)	Loss 3.2468 (3.2313)	Entropy 1.57766 (1.58110)	Top-1 acc 47.266 (46.963)	Top-5 acc 72.266 (70.534)	lr 0.02255
Train [25][1080/3239]	Time 0.221 (0.578)	Data Time 0.002 (0.031)	Loss 3.0820 (3.2314)	Entropy 1.57753 (1.58107)	Top-1 acc 48.828 (46.964)	Top-5 acc 74.609 (70.531)	lr 0.02255
Train [25][1090/3239]	Time 0.212 (0.577)	Data Time 0.001 (0.031)	Loss 3.2246 (3.2316)	Entropy 1.57729 (1.58104)	Top-1 acc 46.484 (46.964)	Top-5 acc 70.703 (70.527)	lr 0.02255
Train [25][1100/3239]	Time 0.259 (0.576)	Data Time 0.001 (0.031)	Loss 3.3279 (3.2320)	Entropy 1.57719 (1.58100)	Top-1 acc 41.797 (46.952)	Top-5 acc 69.141 (70.516)	lr 0.02255
Train [25][1110/3239]	Time 2.401 (0.575)	Data Time 0.001 (0.030)	Loss 3.3277 (3.2324)	Entropy 1.57719 (1.58097)	Top-1 acc 46.484 (46.952)	Top-5 acc 68.359 (70.511)	lr 0.02255
Train [25][1120/3239]	Time 0.213 (0.572)	Data Time 0.001 (0.030)	Loss 3.2661 (3.2322)	Entropy 1.57715 (1.58094)	Top-1 acc 46.484 (46.961)	Top-5 acc 66.797 (70.518)	lr 0.02255
Train [25][1130/3239]	Time 0.216 (0.571)	Data Time 0.001 (0.030)	Loss 3.2377 (3.2329)	Entropy 1.57700 (1.58090)	Top-1 acc 48.438 (46.938)	Top-5 acc 69.531 (70.510)	lr 0.02255
Train [25][1140/3239]	Time 0.177 (0.569)	Data Time 0.001 (0.030)	Loss 3.3355 (3.2333)	Entropy 1.57693 (1.58087)	Top-1 acc 46.094 (46.937)	Top-5 acc 67.969 (70.503)	lr 0.02254
Train [25][1150/3239]	Time 0.303 (0.569)	Data Time 0.001 (0.030)	Loss 3.0089 (3.2331)	Entropy 1.57695 (1.58083)	Top-1 acc 50.391 (46.931)	Top-5 acc 73.438 (70.518)	lr 0.02254
Train [25][1160/3239]	Time 0.224 (0.567)	Data Time 0.001 (0.029)	Loss 3.3847 (3.2329)	Entropy 1.57682 (1.58080)	Top-1 acc 43.359 (46.928)	Top-5 acc 68.359 (70.524)	lr 0.02254
Train [25][1170/3239]	Time 0.235 (0.566)	Data Time 0.001 (0.029)	Loss 3.0583 (3.2335)	Entropy 1.57676 (1.58076)	Top-1 acc 50.781 (46.922)	Top-5 acc 77.344 (70.513)	lr 0.02254
Train [25][1180/3239]	Time 0.212 (0.565)	Data Time 0.001 (0.029)	Loss 3.0908 (3.2332)	Entropy 1.57674 (1.58073)	Top-1 acc 50.000 (46.919)	Top-5 acc 73.438 (70.524)	lr 0.02254
Train [25][1190/3239]	Time 0.242 (0.564)	Data Time 0.001 (0.029)	Loss 3.1603 (3.2331)	Entropy 1.57658 (1.58070)	Top-1 acc 50.781 (46.923)	Top-5 acc 73.828 (70.524)	lr 0.02254
Train [25][1200/3239]	Time 0.218 (0.563)	Data Time 0.001 (0.028)	Loss 3.2476 (3.2332)	Entropy 1.57654 (1.58066)	Top-1 acc 45.703 (46.908)	Top-5 acc 70.703 (70.527)	lr 0.02254
Train [25][1210/3239]	Time 0.195 (0.562)	Data Time 0.001 (0.028)	Loss 3.4694 (3.2333)	Entropy 1.57647 (1.58063)	Top-1 acc 43.750 (46.910)	Top-5 acc 62.500 (70.518)	lr 0.02254
Train [25][1220/3239]	Time 2.362 (0.561)	Data Time 0.001 (0.028)	Loss 3.1475 (3.2339)	Entropy 1.57647 (1.58059)	Top-1 acc 49.219 (46.899)	Top-5 acc 73.047 (70.504)	lr 0.02254
Train [25][1230/3239]	Time 0.188 (0.559)	Data Time 0.001 (0.028)	Loss 3.4405 (3.2338)	Entropy 1.57638 (1.58056)	Top-1 acc 43.750 (46.902)	Top-5 acc 66.406 (70.508)	lr 0.02254
Train [25][1240/3239]	Time 0.225 (0.558)	Data Time 0.002 (0.028)	Loss 3.3974 (3.2340)	Entropy 1.57636 (1.58053)	Top-1 acc 43.359 (46.901)	Top-5 acc 66.406 (70.501)	lr 0.02254
Train [25][1250/3239]	Time 0.244 (0.557)	Data Time 0.002 (0.027)	Loss 3.1969 (3.2338)	Entropy 1.57626 (1.58049)	Top-1 acc 44.531 (46.900)	Top-5 acc 73.828 (70.512)	lr 0.02254
Train [25][1260/3239]	Time 0.209 (0.556)	Data Time 0.001 (0.027)	Loss 3.3578 (3.2336)	Entropy 1.57624 (1.58046)	Top-1 acc 45.703 (46.903)	Top-5 acc 69.531 (70.520)	lr 0.02254
Train [25][1270/3239]	Time 0.206 (0.555)	Data Time 0.001 (0.027)	Loss 3.0324 (3.2335)	Entropy 1.57617 (1.58042)	Top-1 acc 52.734 (46.902)	Top-5 acc 73.438 (70.525)	lr 0.02254
Train [25][1280/3239]	Time 0.321 (0.554)	Data Time 0.001 (0.027)	Loss 3.2354 (3.2336)	Entropy 1.57607 (1.58039)	Top-1 acc 44.141 (46.896)	Top-5 acc 71.484 (70.522)	lr 0.02254
Train [25][1290/3239]	Time 0.209 (0.554)	Data Time 0.001 (0.027)	Loss 3.1926 (3.2335)	Entropy 1.57603 (1.58036)	Top-1 acc 45.703 (46.892)	Top-5 acc 72.656 (70.526)	lr 0.02254
Train [25][1300/3239]	Time 0.217 (0.553)	Data Time 0.001 (0.026)	Loss 3.0925 (3.2337)	Entropy 1.57597 (1.58032)	Top-1 acc 49.609 (46.890)	Top-5 acc 73.828 (70.515)	lr 0.02253
Train [25][1310/3239]	Time 0.219 (0.552)	Data Time 0.001 (0.026)	Loss 3.1918 (3.2337)	Entropy 1.57589 (1.58029)	Top-1 acc 51.172 (46.894)	Top-5 acc 71.094 (70.509)	lr 0.02253
Train [25][1320/3239]	Time 0.259 (0.551)	Data Time 0.001 (0.026)	Loss 3.4092 (3.2335)	Entropy 1.57585 (1.58026)	Top-1 acc 42.969 (46.892)	Top-5 acc 68.359 (70.504)	lr 0.02253
Train [25][1330/3239]	Time 2.454 (0.550)	Data Time 0.001 (0.026)	Loss 3.4780 (3.2335)	Entropy 1.57585 (1.58022)	Top-1 acc 41.406 (46.896)	Top-5 acc 64.453 (70.499)	lr 0.02253
Train [25][1340/3239]	Time 0.340 (0.548)	Data Time 0.001 (0.026)	Loss 3.2636 (3.2335)	Entropy 1.57576 (1.58019)	Top-1 acc 46.094 (46.897)	Top-5 acc 71.484 (70.503)	lr 0.02253
Train [25][1350/3239]	Time 0.258 (0.547)	Data Time 0.001 (0.025)	Loss 3.5136 (3.2336)	Entropy 1.57548 (1.58016)	Top-1 acc 41.016 (46.891)	Top-5 acc 64.844 (70.497)	lr 0.02253
Train [25][1360/3239]	Time 0.206 (0.546)	Data Time 0.001 (0.025)	Loss 3.4392 (3.2336)	Entropy 1.57544 (1.58012)	Top-1 acc 42.578 (46.889)	Top-5 acc 66.406 (70.500)	lr 0.02253
Train [25][1370/3239]	Time 0.197 (0.546)	Data Time 0.002 (0.025)	Loss 3.0990 (3.2335)	Entropy 1.57535 (1.58009)	Top-1 acc 50.391 (46.894)	Top-5 acc 71.484 (70.498)	lr 0.02253
Train [25][1380/3239]	Time 0.199 (0.545)	Data Time 0.001 (0.025)	Loss 3.2759 (3.2334)	Entropy 1.57521 (1.58005)	Top-1 acc 43.750 (46.895)	Top-5 acc 71.094 (70.500)	lr 0.02253
Train [25][1390/3239]	Time 0.216 (0.544)	Data Time 0.001 (0.025)	Loss 3.3915 (3.2334)	Entropy 1.57513 (1.58002)	Top-1 acc 43.750 (46.902)	Top-5 acc 66.797 (70.494)	lr 0.02253
Train [25][1400/3239]	Time 0.204 (0.543)	Data Time 0.001 (0.025)	Loss 3.1709 (3.2339)	Entropy 1.57510 (1.57998)	Top-1 acc 48.047 (46.884)	Top-5 acc 72.266 (70.482)	lr 0.02253
Train [25][1410/3239]	Time 0.249 (0.543)	Data Time 0.002 (0.024)	Loss 3.0219 (3.2341)	Entropy 1.57505 (1.57995)	Top-1 acc 52.734 (46.887)	Top-5 acc 76.953 (70.475)	lr 0.02253
Train [25][1420/3239]	Time 0.238 (0.543)	Data Time 0.002 (0.024)	Loss 3.2736 (3.2342)	Entropy 1.57496 (1.57991)	Top-1 acc 44.922 (46.891)	Top-5 acc 67.969 (70.469)	lr 0.02253
Train [25][1430/3239]	Time 0.288 (0.576)	Data Time 0.013 (0.024)	Loss 3.0845 (3.2343)	Entropy 1.57498 (1.57988)	Top-1 acc 48.828 (46.891)	Top-5 acc 75.391 (70.464)	lr 0.02253
Train [25][1440/3239]	Time 2.534 (0.575)	Data Time 0.002 (0.024)	Loss 3.0777 (3.2342)	Entropy 1.57498 (1.57984)	Top-1 acc 53.125 (46.891)	Top-5 acc 71.875 (70.464)	lr 0.02253
Train [25][1450/3239]	Time 0.199 (0.573)	Data Time 0.002 (0.024)	Loss 3.2396 (3.2340)	Entropy 1.57477 (1.57981)	Top-1 acc 48.047 (46.895)	Top-5 acc 69.531 (70.465)	lr 0.02253
Train [25][1460/3239]	Time 0.326 (0.572)	Data Time 0.001 (0.024)	Loss 3.2865 (3.2341)	Entropy 1.57475 (1.57977)	Top-1 acc 46.094 (46.892)	Top-5 acc 67.188 (70.458)	lr 0.02253
Train [25][1470/3239]	Time 0.241 (0.571)	Data Time 0.001 (0.024)	Loss 3.1932 (3.2338)	Entropy 1.57437 (1.57974)	Top-1 acc 49.609 (46.903)	Top-5 acc 71.875 (70.470)	lr 0.02252
Train [25][1480/3239]	Time 0.208 (0.570)	Data Time 0.001 (0.023)	Loss 3.4035 (3.2341)	Entropy 1.57422 (1.57970)	Top-1 acc 41.797 (46.902)	Top-5 acc 66.797 (70.465)	lr 0.02252
Train [25][1490/3239]	Time 0.211 (0.570)	Data Time 0.001 (0.023)	Loss 3.3121 (3.2343)	Entropy 1.57419 (1.57966)	Top-1 acc 43.359 (46.903)	Top-5 acc 68.750 (70.458)	lr 0.02252
Train [25][1500/3239]	Time 0.198 (0.569)	Data Time 0.001 (0.023)	Loss 3.1781 (3.2346)	Entropy 1.57413 (1.57963)	Top-1 acc 50.000 (46.886)	Top-5 acc 72.656 (70.456)	lr 0.02252
Train [25][1510/3239]	Time 0.227 (0.568)	Data Time 0.001 (0.023)	Loss 3.1326 (3.2344)	Entropy 1.57401 (1.57959)	Top-1 acc 50.391 (46.896)	Top-5 acc 73.438 (70.461)	lr 0.02252
Train [25][1520/3239]	Time 0.319 (0.567)	Data Time 0.001 (0.023)	Loss 3.3836 (3.2352)	Entropy 1.57396 (1.57955)	Top-1 acc 44.141 (46.880)	Top-5 acc 65.625 (70.450)	lr 0.02252
Train [25][1530/3239]	Time 0.214 (0.566)	Data Time 0.001 (0.023)	Loss 3.2385 (3.2348)	Entropy 1.57381 (1.57952)	Top-1 acc 47.266 (46.885)	Top-5 acc 68.750 (70.456)	lr 0.02252
Train [25][1540/3239]	Time 0.198 (0.565)	Data Time 0.001 (0.023)	Loss 3.1157 (3.2345)	Entropy 1.57381 (1.57948)	Top-1 acc 50.391 (46.888)	Top-5 acc 76.172 (70.459)	lr 0.02252
Train [25][1550/3239]	Time 2.309 (0.564)	Data Time 0.001 (0.022)	Loss 3.3099 (3.2342)	Entropy 1.57381 (1.57944)	Top-1 acc 45.703 (46.889)	Top-5 acc 67.969 (70.468)	lr 0.02252
Train [25][1560/3239]	Time 0.272 (0.562)	Data Time 0.001 (0.022)	Loss 3.2161 (3.2345)	Entropy 1.57374 (1.57941)	Top-1 acc 50.391 (46.887)	Top-5 acc 73.047 (70.461)	lr 0.02252
Train [25][1570/3239]	Time 0.206 (0.562)	Data Time 0.002 (0.022)	Loss 3.2524 (3.2350)	Entropy 1.57367 (1.57937)	Top-1 acc 46.875 (46.877)	Top-5 acc 68.750 (70.444)	lr 0.02252
Train [25][1580/3239]	Time 0.327 (0.561)	Data Time 0.002 (0.022)	Loss 3.1784 (3.2348)	Entropy 1.57355 (1.57933)	Top-1 acc 51.172 (46.885)	Top-5 acc 73.047 (70.454)	lr 0.02252
Train [25][1590/3239]	Time 0.233 (0.560)	Data Time 0.001 (0.022)	Loss 3.3869 (3.2353)	Entropy 1.57355 (1.57930)	Top-1 acc 42.188 (46.874)	Top-5 acc 67.578 (70.443)	lr 0.02252
Train [25][1600/3239]	Time 0.211 (0.560)	Data Time 0.002 (0.022)	Loss 3.1047 (3.2353)	Entropy 1.57347 (1.57926)	Top-1 acc 50.000 (46.879)	Top-5 acc 73.438 (70.441)	lr 0.02252
Train [25][1610/3239]	Time 0.220 (0.559)	Data Time 0.001 (0.022)	Loss 3.3780 (3.2354)	Entropy 1.57340 (1.57923)	Top-1 acc 45.703 (46.878)	Top-5 acc 65.625 (70.441)	lr 0.02252
Train [25][1620/3239]	Time 0.307 (0.558)	Data Time 0.001 (0.022)	Loss 3.1586 (3.2354)	Entropy 1.57340 (1.57919)	Top-1 acc 47.266 (46.875)	Top-5 acc 71.875 (70.444)	lr 0.02252
Train [25][1630/3239]	Time 0.211 (0.557)	Data Time 0.001 (0.021)	Loss 3.1903 (3.2354)	Entropy 1.57313 (1.57915)	Top-1 acc 49.219 (46.879)	Top-5 acc 72.656 (70.451)	lr 0.02252
Train [25][1640/3239]	Time 0.293 (0.557)	Data Time 0.001 (0.021)	Loss 3.1716 (3.2353)	Entropy 1.57311 (1.57912)	Top-1 acc 48.438 (46.884)	Top-5 acc 72.266 (70.452)	lr 0.02251
Train [25][1650/3239]	Time 0.191 (0.556)	Data Time 0.001 (0.021)	Loss 3.2012 (3.2354)	Entropy 1.57295 (1.57908)	Top-1 acc 48.828 (46.884)	Top-5 acc 74.609 (70.450)	lr 0.02251
Train [25][1660/3239]	Time 2.364 (0.555)	Data Time 0.001 (0.021)	Loss 3.1629 (3.2359)	Entropy 1.57295 (1.57904)	Top-1 acc 45.312 (46.872)	Top-5 acc 74.609 (70.448)	lr 0.02251
Train [25][1670/3239]	Time 0.218 (0.553)	Data Time 0.001 (0.021)	Loss 3.2881 (3.2362)	Entropy 1.57289 (1.57901)	Top-1 acc 47.266 (46.869)	Top-5 acc 67.969 (70.439)	lr 0.02251
Train [25][1680/3239]	Time 0.203 (0.553)	Data Time 0.001 (0.021)	Loss 3.2247 (3.2364)	Entropy 1.57284 (1.57897)	Top-1 acc 46.094 (46.863)	Top-5 acc 68.750 (70.433)	lr 0.02251
Train [25][1690/3239]	Time 0.231 (0.552)	Data Time 0.001 (0.021)	Loss 3.2741 (3.2363)	Entropy 1.57283 (1.57893)	Top-1 acc 46.875 (46.859)	Top-5 acc 71.484 (70.437)	lr 0.02251
Train [25][1700/3239]	Time 0.206 (0.551)	Data Time 0.001 (0.021)	Loss 3.1032 (3.2360)	Entropy 1.57274 (1.57890)	Top-1 acc 50.000 (46.865)	Top-5 acc 72.656 (70.440)	lr 0.02251
Train [25][1710/3239]	Time 0.212 (0.551)	Data Time 0.001 (0.020)	Loss 3.2605 (3.2360)	Entropy 1.57268 (1.57886)	Top-1 acc 45.312 (46.871)	Top-5 acc 70.703 (70.437)	lr 0.02251
Train [25][1720/3239]	Time 0.195 (0.550)	Data Time 0.001 (0.020)	Loss 3.2376 (3.2360)	Entropy 1.57258 (1.57883)	Top-1 acc 44.922 (46.876)	Top-5 acc 72.266 (70.441)	lr 0.02251
Train [25][1730/3239]	Time 0.245 (0.550)	Data Time 0.001 (0.020)	Loss 3.1789 (3.2358)	Entropy 1.57235 (1.57879)	Top-1 acc 51.172 (46.887)	Top-5 acc 69.922 (70.440)	lr 0.02251
Train [25][1740/3239]	Time 0.215 (0.549)	Data Time 0.001 (0.020)	Loss 3.0888 (3.2356)	Entropy 1.57218 (1.57875)	Top-1 acc 49.219 (46.885)	Top-5 acc 71.875 (70.445)	lr 0.02251
Train [25][1750/3239]	Time 0.216 (0.548)	Data Time 0.001 (0.020)	Loss 3.2188 (3.2355)	Entropy 1.57233 (1.57871)	Top-1 acc 46.094 (46.885)	Top-5 acc 69.141 (70.446)	lr 0.02251
Train [25][1760/3239]	Time 0.273 (0.548)	Data Time 0.001 (0.020)	Loss 3.1559 (3.2352)	Entropy 1.57222 (1.57868)	Top-1 acc 50.781 (46.890)	Top-5 acc 72.266 (70.451)	lr 0.02251
Train [25][1770/3239]	Time 2.207 (0.547)	Data Time 0.001 (0.020)	Loss 3.1396 (3.2354)	Entropy 1.57222 (1.57864)	Top-1 acc 50.391 (46.891)	Top-5 acc 73.438 (70.451)	lr 0.02251
Train [25][1780/3239]	Time 0.244 (0.545)	Data Time 0.001 (0.020)	Loss 3.2587 (3.2353)	Entropy 1.57219 (1.57860)	Top-1 acc 47.266 (46.892)	Top-5 acc 70.703 (70.452)	lr 0.02251
Train [25][1790/3239]	Time 0.216 (0.545)	Data Time 0.001 (0.020)	Loss 3.1092 (3.2352)	Entropy 1.57195 (1.57857)	Top-1 acc 48.828 (46.897)	Top-5 acc 74.219 (70.452)	lr 0.02251
Train [25][1800/3239]	Time 0.536 (0.567)	Data Time 0.002 (0.020)	Loss 3.0174 (3.2352)	Entropy 1.57190 (1.57853)	Top-1 acc 50.781 (46.893)	Top-5 acc 76.562 (70.449)	lr 0.02250
Train [25][1810/3239]	Time 0.226 (0.567)	Data Time 0.002 (0.019)	Loss 3.1192 (3.2354)	Entropy 1.57185 (1.57849)	Top-1 acc 47.266 (46.890)	Top-5 acc 72.266 (70.441)	lr 0.02250
Train [25][1820/3239]	Time 0.322 (0.567)	Data Time 0.001 (0.019)	Loss 3.3005 (3.2355)	Entropy 1.57174 (1.57846)	Top-1 acc 44.141 (46.883)	Top-5 acc 67.188 (70.437)	lr 0.02250
Train [25][1830/3239]	Time 0.228 (0.566)	Data Time 0.002 (0.019)	Loss 3.2746 (3.2358)	Entropy 1.57174 (1.57842)	Top-1 acc 46.875 (46.876)	Top-5 acc 74.219 (70.435)	lr 0.02250
Train [25][1840/3239]	Time 0.207 (0.565)	Data Time 0.001 (0.019)	Loss 3.5649 (3.2357)	Entropy 1.57169 (1.57838)	Top-1 acc 39.844 (46.876)	Top-5 acc 64.453 (70.435)	lr 0.02250
Train [25][1850/3239]	Time 0.251 (0.565)	Data Time 0.001 (0.019)	Loss 3.1315 (3.2355)	Entropy 1.57162 (1.57835)	Top-1 acc 48.438 (46.883)	Top-5 acc 74.219 (70.441)	lr 0.02250
Train [25][1860/3239]	Time 0.211 (0.564)	Data Time 0.001 (0.019)	Loss 3.1413 (3.2353)	Entropy 1.57154 (1.57831)	Top-1 acc 47.266 (46.891)	Top-5 acc 73.438 (70.447)	lr 0.02250
Train [25][1870/3239]	Time 0.209 (0.563)	Data Time 0.001 (0.019)	Loss 3.2018 (3.2349)	Entropy 1.57148 (1.57828)	Top-1 acc 47.266 (46.898)	Top-5 acc 69.531 (70.457)	lr 0.02250
Train [25][1880/3239]	Time 2.470 (0.563)	Data Time 0.001 (0.019)	Loss 3.4090 (3.2349)	Entropy 1.57148 (1.57824)	Top-1 acc 44.922 (46.899)	Top-5 acc 64.453 (70.456)	lr 0.02250
Train [25][1890/3239]	Time 0.189 (0.561)	Data Time 0.001 (0.019)	Loss 3.3643 (3.2347)	Entropy 1.57136 (1.57820)	Top-1 acc 42.578 (46.907)	Top-5 acc 67.188 (70.457)	lr 0.02250
Train [25][1900/3239]	Time 0.170 (0.560)	Data Time 0.001 (0.019)	Loss 3.4191 (3.2349)	Entropy 1.57129 (1.57817)	Top-1 acc 45.703 (46.906)	Top-5 acc 64.453 (70.446)	lr 0.02250
Train [25][1910/3239]	Time 0.213 (0.560)	Data Time 0.001 (0.019)	Loss 3.1255 (3.2345)	Entropy 1.57126 (1.57813)	Top-1 acc 48.047 (46.913)	Top-5 acc 74.219 (70.461)	lr 0.02250
Train [25][1920/3239]	Time 0.246 (0.559)	Data Time 0.001 (0.018)	Loss 3.1333 (3.2342)	Entropy 1.57123 (1.57809)	Top-1 acc 48.438 (46.923)	Top-5 acc 73.438 (70.465)	lr 0.02250
Train [25][1930/3239]	Time 0.211 (0.558)	Data Time 0.001 (0.018)	Loss 3.2850 (3.2343)	Entropy 1.57121 (1.57806)	Top-1 acc 45.312 (46.917)	Top-5 acc 69.531 (70.466)	lr 0.02250
Train [25][1940/3239]	Time 0.204 (0.558)	Data Time 0.001 (0.018)	Loss 3.3858 (3.2342)	Entropy 1.57111 (1.57802)	Top-1 acc 45.703 (46.926)	Top-5 acc 67.969 (70.471)	lr 0.02250
Train [25][1950/3239]	Time 0.223 (0.557)	Data Time 0.001 (0.018)	Loss 3.3711 (3.2344)	Entropy 1.57103 (1.57799)	Top-1 acc 44.922 (46.923)	Top-5 acc 69.531 (70.470)	lr 0.02250
Train [25][1960/3239]	Time 0.202 (0.557)	Data Time 0.002 (0.018)	Loss 3.3986 (3.2344)	Entropy 1.57100 (1.57795)	Top-1 acc 48.438 (46.929)	Top-5 acc 67.578 (70.473)	lr 0.02250
Train [25][1970/3239]	Time 0.218 (0.556)	Data Time 0.001 (0.018)	Loss 3.1773 (3.2342)	Entropy 1.57087 (1.57792)	Top-1 acc 48.047 (46.935)	Top-5 acc 72.266 (70.477)	lr 0.02249
Train [25][1980/3239]	Time 0.224 (0.556)	Data Time 0.001 (0.018)	Loss 3.4050 (3.2340)	Entropy 1.57075 (1.57788)	Top-1 acc 46.484 (46.941)	Top-5 acc 65.625 (70.480)	lr 0.02249
Train [25][1990/3239]	Time 2.489 (0.555)	Data Time 0.001 (0.018)	Loss 3.3240 (3.2339)	Entropy 1.57075 (1.57785)	Top-1 acc 43.750 (46.937)	Top-5 acc 70.312 (70.485)	lr 0.02249
Train [25][2000/3239]	Time 0.225 (0.554)	Data Time 0.001 (0.018)	Loss 2.9389 (3.2333)	Entropy 1.57045 (1.57781)	Top-1 acc 52.734 (46.947)	Top-5 acc 76.562 (70.496)	lr 0.02249
Train [25][2010/3239]	Time 0.230 (0.553)	Data Time 0.001 (0.018)	Loss 3.1493 (3.2333)	Entropy 1.57028 (1.57777)	Top-1 acc 51.562 (46.941)	Top-5 acc 70.312 (70.490)	lr 0.02249
Train [25][2020/3239]	Time 0.226 (0.552)	Data Time 0.001 (0.018)	Loss 3.2291 (3.2336)	Entropy 1.57025 (1.57773)	Top-1 acc 52.344 (46.934)	Top-5 acc 72.656 (70.485)	lr 0.02249
Train [25][2030/3239]	Time 0.227 (0.552)	Data Time 0.001 (0.018)	Loss 3.1367 (3.2334)	Entropy 1.57021 (1.57770)	Top-1 acc 48.828 (46.937)	Top-5 acc 72.656 (70.487)	lr 0.02249
Train [25][2040/3239]	Time 0.198 (0.551)	Data Time 0.001 (0.017)	Loss 3.1312 (3.2333)	Entropy 1.57007 (1.57766)	Top-1 acc 50.781 (46.939)	Top-5 acc 74.219 (70.490)	lr 0.02249
Train [25][2050/3239]	Time 0.299 (0.551)	Data Time 0.001 (0.017)	Loss 3.1282 (3.2333)	Entropy 1.57003 (1.57762)	Top-1 acc 52.734 (46.939)	Top-5 acc 73.047 (70.490)	lr 0.02249
Train [25][2060/3239]	Time 0.165 (0.550)	Data Time 0.001 (0.017)	Loss 3.3909 (3.2335)	Entropy 1.56991 (1.57759)	Top-1 acc 42.188 (46.934)	Top-5 acc 69.141 (70.488)	lr 0.02249
Train [25][2070/3239]	Time 0.190 (0.550)	Data Time 0.001 (0.017)	Loss 3.1801 (3.2333)	Entropy 1.56984 (1.57755)	Top-1 acc 50.000 (46.943)	Top-5 acc 71.875 (70.490)	lr 0.02249
Train [25][2080/3239]	Time 0.228 (0.549)	Data Time 0.001 (0.017)	Loss 3.3463 (3.2334)	Entropy 1.56978 (1.57751)	Top-1 acc 44.922 (46.937)	Top-5 acc 69.531 (70.490)	lr 0.02249
Train [25][2090/3239]	Time 0.216 (0.549)	Data Time 0.001 (0.017)	Loss 3.1833 (3.2335)	Entropy 1.56968 (1.57747)	Top-1 acc 47.266 (46.935)	Top-5 acc 69.531 (70.489)	lr 0.02249
Train [25][2100/3239]	Time 2.358 (0.548)	Data Time 0.001 (0.017)	Loss 3.3071 (3.2332)	Entropy 1.56968 (1.57744)	Top-1 acc 44.922 (46.942)	Top-5 acc 71.484 (70.493)	lr 0.02249
Train [25][2110/3239]	Time 0.313 (0.547)	Data Time 0.001 (0.017)	Loss 3.2697 (3.2331)	Entropy 1.56962 (1.57740)	Top-1 acc 44.922 (46.945)	Top-5 acc 67.188 (70.496)	lr 0.02249
Train [25][2120/3239]	Time 0.239 (0.546)	Data Time 0.001 (0.017)	Loss 3.3020 (3.2332)	Entropy 1.56939 (1.57736)	Top-1 acc 42.578 (46.942)	Top-5 acc 72.656 (70.499)	lr 0.02249
Train [25][2130/3239]	Time 0.233 (0.546)	Data Time 0.001 (0.017)	Loss 3.3267 (3.2332)	Entropy 1.56918 (1.57732)	Top-1 acc 47.266 (46.947)	Top-5 acc 68.750 (70.499)	lr 0.02248
Train [25][2140/3239]	Time 0.213 (0.545)	Data Time 0.001 (0.017)	Loss 3.1161 (3.2330)	Entropy 1.56907 (1.57729)	Top-1 acc 50.781 (46.948)	Top-5 acc 69.141 (70.501)	lr 0.02248
Train [25][2150/3239]	Time 0.211 (0.545)	Data Time 0.001 (0.017)	Loss 3.1070 (3.2333)	Entropy 1.56906 (1.57725)	Top-1 acc 47.266 (46.944)	Top-5 acc 73.828 (70.498)	lr 0.02248
Train [25][2160/3239]	Time 0.231 (0.565)	Data Time 0.002 (0.017)	Loss 3.0489 (3.2332)	Entropy 1.56897 (1.57721)	Top-1 acc 50.781 (46.947)	Top-5 acc 73.438 (70.497)	lr 0.02248
Train [25][2170/3239]	Time 0.306 (0.564)	Data Time 0.002 (0.017)	Loss 3.0841 (3.2330)	Entropy 1.56886 (1.57717)	Top-1 acc 50.391 (46.951)	Top-5 acc 72.266 (70.501)	lr 0.02248
Train [25][2180/3239]	Time 0.212 (0.564)	Data Time 0.001 (0.016)	Loss 3.4240 (3.2332)	Entropy 1.56883 (1.57713)	Top-1 acc 41.016 (46.948)	Top-5 acc 65.625 (70.497)	lr 0.02248
Train [25][2190/3239]	Time 0.235 (0.563)	Data Time 0.002 (0.016)	Loss 3.2361 (3.2332)	Entropy 1.56878 (1.57710)	Top-1 acc 51.953 (46.948)	Top-5 acc 71.094 (70.496)	lr 0.02248
Train [25][2200/3239]	Time 0.225 (0.562)	Data Time 0.001 (0.016)	Loss 3.0834 (3.2330)	Entropy 1.56873 (1.57706)	Top-1 acc 53.125 (46.950)	Top-5 acc 72.656 (70.498)	lr 0.02248
Train [25][2210/3239]	Time 2.399 (0.562)	Data Time 0.001 (0.016)	Loss 3.1694 (3.2328)	Entropy 1.56873 (1.57702)	Top-1 acc 46.484 (46.952)	Top-5 acc 72.266 (70.505)	lr 0.02248
Train [25][2220/3239]	Time 0.234 (0.560)	Data Time 0.002 (0.016)	Loss 3.1908 (3.2325)	Entropy 1.56862 (1.57698)	Top-1 acc 45.703 (46.960)	Top-5 acc 69.922 (70.512)	lr 0.02248
Train [25][2230/3239]	Time 0.324 (0.560)	Data Time 0.001 (0.016)	Loss 2.9895 (3.2326)	Entropy 1.56847 (1.57694)	Top-1 acc 55.078 (46.963)	Top-5 acc 76.953 (70.510)	lr 0.02248
Train [25][2240/3239]	Time 0.159 (0.559)	Data Time 0.001 (0.016)	Loss 3.2050 (3.2326)	Entropy 1.56839 (1.57691)	Top-1 acc 46.094 (46.966)	Top-5 acc 69.922 (70.507)	lr 0.02248
Train [25][2250/3239]	Time 0.202 (0.559)	Data Time 0.001 (0.016)	Loss 3.0718 (3.2329)	Entropy 1.56844 (1.57687)	Top-1 acc 51.172 (46.957)	Top-5 acc 73.828 (70.502)	lr 0.02248
Train [25][2260/3239]	Time 0.231 (0.558)	Data Time 0.001 (0.016)	Loss 3.0733 (3.2327)	Entropy 1.56842 (1.57683)	Top-1 acc 46.094 (46.957)	Top-5 acc 75.391 (70.503)	lr 0.02248
Train [25][2270/3239]	Time 0.228 (0.558)	Data Time 0.001 (0.016)	Loss 3.2488 (3.2328)	Entropy 1.56838 (1.57679)	Top-1 acc 43.359 (46.955)	Top-5 acc 68.750 (70.499)	lr 0.02248
Train [25][2280/3239]	Time 0.196 (0.557)	Data Time 0.001 (0.016)	Loss 3.1409 (3.2328)	Entropy 1.56832 (1.57676)	Top-1 acc 47.266 (46.949)	Top-5 acc 73.047 (70.498)	lr 0.02248
Train [25][2290/3239]	Time 0.237 (0.557)	Data Time 0.001 (0.016)	Loss 3.1556 (3.2329)	Entropy 1.56824 (1.57672)	Top-1 acc 46.484 (46.944)	Top-5 acc 68.750 (70.498)	lr 0.02248
Train [25][2300/3239]	Time 0.204 (0.557)	Data Time 0.001 (0.016)	Loss 3.2664 (3.2329)	Entropy 1.56818 (1.57668)	Top-1 acc 46.484 (46.938)	Top-5 acc 69.922 (70.496)	lr 0.02247
Train [25][2310/3239]	Time 0.224 (0.556)	Data Time 0.002 (0.016)	Loss 3.3240 (3.2327)	Entropy 1.56811 (1.57665)	Top-1 acc 47.656 (46.940)	Top-5 acc 70.703 (70.501)	lr 0.02247
Train [25][2320/3239]	Time 2.392 (0.556)	Data Time 0.001 (0.016)	Loss 3.2469 (3.2326)	Entropy 1.56811 (1.57661)	Top-1 acc 44.922 (46.943)	Top-5 acc 68.359 (70.499)	lr 0.02247
Train [25][2330/3239]	Time 0.238 (0.554)	Data Time 0.003 (0.016)	Loss 3.3625 (3.2328)	Entropy 1.56806 (1.57657)	Top-1 acc 41.406 (46.941)	Top-5 acc 64.844 (70.492)	lr 0.02247
Train [25][2340/3239]	Time 0.217 (0.554)	Data Time 0.001 (0.015)	Loss 3.1402 (3.2328)	Entropy 1.56801 (1.57654)	Top-1 acc 47.656 (46.938)	Top-5 acc 72.266 (70.492)	lr 0.02247
Train [25][2350/3239]	Time 0.227 (0.553)	Data Time 0.001 (0.015)	Loss 3.1022 (3.2327)	Entropy 1.56794 (1.57650)	Top-1 acc 49.609 (46.937)	Top-5 acc 73.047 (70.493)	lr 0.02247
Train [25][2360/3239]	Time 0.330 (0.553)	Data Time 0.001 (0.015)	Loss 3.2415 (3.2327)	Entropy 1.56786 (1.57646)	Top-1 acc 46.094 (46.939)	Top-5 acc 71.094 (70.494)	lr 0.02247
Train [25][2370/3239]	Time 0.229 (0.552)	Data Time 0.001 (0.015)	Loss 3.3031 (3.2328)	Entropy 1.56786 (1.57643)	Top-1 acc 43.750 (46.940)	Top-5 acc 66.797 (70.493)	lr 0.02247
Train [25][2380/3239]	Time 0.202 (0.552)	Data Time 0.001 (0.015)	Loss 3.2933 (3.2327)	Entropy 1.56781 (1.57639)	Top-1 acc 44.531 (46.941)	Top-5 acc 71.484 (70.495)	lr 0.02247
Train [25][2390/3239]	Time 0.202 (0.552)	Data Time 0.001 (0.015)	Loss 3.2319 (3.2328)	Entropy 1.56778 (1.57636)	Top-1 acc 48.047 (46.940)	Top-5 acc 70.703 (70.493)	lr 0.02247
Train [25][2400/3239]	Time 0.221 (0.551)	Data Time 0.001 (0.015)	Loss 3.2649 (3.2327)	Entropy 1.56795 (1.57632)	Top-1 acc 43.359 (46.940)	Top-5 acc 69.531 (70.494)	lr 0.02247
Train [25][2410/3239]	Time 0.200 (0.551)	Data Time 0.001 (0.015)	Loss 3.4051 (3.2325)	Entropy 1.56793 (1.57628)	Top-1 acc 44.922 (46.947)	Top-5 acc 64.062 (70.496)	lr 0.02247
Train [25][2420/3239]	Time 0.237 (0.550)	Data Time 0.001 (0.015)	Loss 3.1798 (3.2321)	Entropy 1.56792 (1.57625)	Top-1 acc 50.391 (46.956)	Top-5 acc 71.875 (70.506)	lr 0.02247
Train [25][2430/3239]	Time 2.312 (0.550)	Data Time 0.001 (0.015)	Loss 3.2220 (3.2320)	Entropy 1.56792 (1.57622)	Top-1 acc 47.656 (46.964)	Top-5 acc 67.188 (70.505)	lr 0.02247
Train [25][2440/3239]	Time 0.213 (0.548)	Data Time 0.001 (0.015)	Loss 3.1181 (3.2324)	Entropy 1.56783 (1.57618)	Top-1 acc 51.953 (46.956)	Top-5 acc 73.047 (70.497)	lr 0.02247
Train [25][2450/3239]	Time 0.221 (0.548)	Data Time 0.001 (0.015)	Loss 3.2533 (3.2322)	Entropy 1.56787 (1.57615)	Top-1 acc 48.047 (46.959)	Top-5 acc 71.484 (70.502)	lr 0.02247
Train [25][2460/3239]	Time 0.246 (0.548)	Data Time 0.002 (0.015)	Loss 3.2090 (3.2322)	Entropy 1.56786 (1.57611)	Top-1 acc 48.828 (46.962)	Top-5 acc 70.703 (70.502)	lr 0.02246
Train [25][2470/3239]	Time 0.236 (0.547)	Data Time 0.001 (0.015)	Loss 3.2349 (3.2319)	Entropy 1.56783 (1.57608)	Top-1 acc 46.484 (46.969)	Top-5 acc 72.266 (70.511)	lr 0.02246
Train [25][2480/3239]	Time 0.204 (0.547)	Data Time 0.001 (0.015)	Loss 3.1215 (3.2316)	Entropy 1.56777 (1.57605)	Top-1 acc 48.828 (46.980)	Top-5 acc 71.875 (70.520)	lr 0.02246
Train [25][2490/3239]	Time 0.216 (0.546)	Data Time 0.001 (0.015)	Loss 3.3078 (3.2316)	Entropy 1.56767 (1.57601)	Top-1 acc 43.359 (46.979)	Top-5 acc 69.141 (70.519)	lr 0.02246
Train [25][2500/3239]	Time 0.192 (0.546)	Data Time 0.001 (0.015)	Loss 3.1294 (3.2317)	Entropy 1.56767 (1.57598)	Top-1 acc 48.438 (46.978)	Top-5 acc 72.266 (70.515)	lr 0.02246
Train [25][2510/3239]	Time 0.215 (0.546)	Data Time 0.001 (0.015)	Loss 3.2899 (3.2317)	Entropy 1.56763 (1.57595)	Top-1 acc 42.578 (46.974)	Top-5 acc 70.703 (70.513)	lr 0.02246
Train [25][2520/3239]	Time 0.248 (0.562)	Data Time 0.002 (0.014)	Loss 3.2800 (3.2316)	Entropy 1.56759 (1.57591)	Top-1 acc 46.875 (46.975)	Top-5 acc 69.141 (70.517)	lr 0.02246
Train [25][2530/3239]	Time 0.231 (0.562)	Data Time 0.002 (0.014)	Loss 3.1682 (3.2316)	Entropy 1.56757 (1.57588)	Top-1 acc 46.484 (46.969)	Top-5 acc 75.000 (70.519)	lr 0.02246
Train [25][2540/3239]	Time 2.367 (0.561)	Data Time 0.002 (0.014)	Loss 3.2575 (3.2315)	Entropy 1.56757 (1.57585)	Top-1 acc 46.484 (46.970)	Top-5 acc 70.312 (70.519)	lr 0.02246
Train [25][2550/3239]	Time 0.222 (0.560)	Data Time 0.001 (0.014)	Loss 3.2046 (3.2315)	Entropy 1.56732 (1.57582)	Top-1 acc 43.750 (46.967)	Top-5 acc 70.312 (70.519)	lr 0.02246
Train [25][2560/3239]	Time 0.237 (0.560)	Data Time 0.002 (0.014)	Loss 3.0962 (3.2318)	Entropy 1.56736 (1.57578)	Top-1 acc 51.562 (46.960)	Top-5 acc 73.047 (70.511)	lr 0.02246
Train [25][2570/3239]	Time 0.219 (0.559)	Data Time 0.002 (0.014)	Loss 3.0841 (3.2315)	Entropy 1.56719 (1.57575)	Top-1 acc 50.781 (46.969)	Top-5 acc 71.875 (70.517)	lr 0.02246
Train [25][2580/3239]	Time 0.215 (0.559)	Data Time 0.001 (0.014)	Loss 3.1297 (3.2314)	Entropy 1.56718 (1.57572)	Top-1 acc 47.656 (46.973)	Top-5 acc 70.703 (70.517)	lr 0.02246
Train [25][2590/3239]	Time 0.281 (0.558)	Data Time 0.001 (0.014)	Loss 3.2567 (3.2314)	Entropy 1.56715 (1.57568)	Top-1 acc 44.922 (46.973)	Top-5 acc 71.094 (70.516)	lr 0.02246
Train [25][2600/3239]	Time 0.215 (0.558)	Data Time 0.001 (0.014)	Loss 3.3908 (3.2314)	Entropy 1.56714 (1.57565)	Top-1 acc 46.875 (46.976)	Top-5 acc 68.359 (70.517)	lr 0.02246
Train [25][2610/3239]	Time 0.206 (0.557)	Data Time 0.001 (0.014)	Loss 3.2389 (3.2312)	Entropy 1.56697 (1.57562)	Top-1 acc 48.047 (46.983)	Top-5 acc 70.312 (70.521)	lr 0.02246
Train [25][2620/3239]	Time 0.227 (0.557)	Data Time 0.002 (0.014)	Loss 3.1445 (3.2312)	Entropy 1.56691 (1.57558)	Top-1 acc 49.609 (46.985)	Top-5 acc 74.609 (70.524)	lr 0.02245
Train [25][2630/3239]	Time 0.199 (0.557)	Data Time 0.001 (0.014)	Loss 3.2421 (3.2311)	Entropy 1.56685 (1.57555)	Top-1 acc 47.266 (46.987)	Top-5 acc 71.094 (70.526)	lr 0.02245
Train [25][2640/3239]	Time 0.243 (0.556)	Data Time 0.001 (0.014)	Loss 3.2169 (3.2309)	Entropy 1.56667 (1.57552)	Top-1 acc 50.000 (46.991)	Top-5 acc 70.703 (70.531)	lr 0.02245
Train [25][2650/3239]	Time 0.326 (0.556)	Data Time 0.001 (0.014)	Loss 3.1696 (3.2309)	Entropy 1.56660 (1.57548)	Top-1 acc 48.438 (46.989)	Top-5 acc 72.266 (70.531)	lr 0.02245
Train [25][2660/3239]	Time 0.247 (0.555)	Data Time 0.001 (0.014)	Loss 3.0726 (3.2309)	Entropy 1.56659 (1.57545)	Top-1 acc 51.953 (46.986)	Top-5 acc 72.266 (70.533)	lr 0.02245
Train [25][2670/3239]	Time 0.206 (0.555)	Data Time 0.001 (0.014)	Loss 3.2393 (3.2311)	Entropy 1.56652 (1.57542)	Top-1 acc 50.781 (46.983)	Top-5 acc 71.484 (70.526)	lr 0.02245
Train [25][2680/3239]	Time 0.278 (0.554)	Data Time 0.001 (0.014)	Loss 2.9299 (3.2311)	Entropy 1.56650 (1.57538)	Top-1 acc 53.125 (46.979)	Top-5 acc 75.391 (70.524)	lr 0.02245
Train [25][2690/3239]	Time 0.236 (0.554)	Data Time 0.001 (0.014)	Loss 3.2885 (3.2313)	Entropy 1.56627 (1.57535)	Top-1 acc 49.609 (46.977)	Top-5 acc 70.312 (70.523)	lr 0.02245
Train [25][2700/3239]	Time 0.294 (0.554)	Data Time 0.001 (0.014)	Loss 3.4250 (3.2310)	Entropy 1.56626 (1.57532)	Top-1 acc 40.234 (46.980)	Top-5 acc 65.234 (70.530)	lr 0.02245
Train [25][2710/3239]	Time 0.223 (0.553)	Data Time 0.001 (0.014)	Loss 3.1126 (3.2308)	Entropy 1.56617 (1.57528)	Top-1 acc 48.828 (46.981)	Top-5 acc 73.828 (70.533)	lr 0.02245
Train [25][2720/3239]	Time 0.236 (0.553)	Data Time 0.001 (0.014)	Loss 3.1362 (3.2307)	Entropy 1.56619 (1.57525)	Top-1 acc 48.438 (46.983)	Top-5 acc 74.219 (70.535)	lr 0.02245
Train [25][2730/3239]	Time 0.203 (0.552)	Data Time 0.001 (0.013)	Loss 3.2379 (3.2306)	Entropy 1.56617 (1.57522)	Top-1 acc 44.922 (46.986)	Top-5 acc 68.359 (70.537)	lr 0.02245
Train [25][2740/3239]	Time 0.246 (0.552)	Data Time 0.001 (0.013)	Loss 3.2708 (3.2307)	Entropy 1.56616 (1.57518)	Top-1 acc 46.484 (46.984)	Top-5 acc 69.922 (70.534)	lr 0.02245
Train [25][2750/3239]	Time 0.192 (0.552)	Data Time 0.001 (0.013)	Loss 3.2148 (3.2305)	Entropy 1.56614 (1.57515)	Top-1 acc 50.391 (46.985)	Top-5 acc 71.875 (70.538)	lr 0.02245
Train [25][2760/3239]	Time 0.277 (0.551)	Data Time 0.001 (0.013)	Loss 3.1650 (3.2306)	Entropy 1.56609 (1.57512)	Top-1 acc 49.609 (46.982)	Top-5 acc 71.875 (70.536)	lr 0.02245
Train [25][2770/3239]	Time 0.225 (0.551)	Data Time 0.001 (0.013)	Loss 3.4053 (3.2308)	Entropy 1.56602 (1.57509)	Top-1 acc 42.578 (46.975)	Top-5 acc 67.578 (70.532)	lr 0.02245
Train [25][2780/3239]	Time 0.253 (0.550)	Data Time 0.001 (0.013)	Loss 3.1843 (3.2309)	Entropy 1.56589 (1.57505)	Top-1 acc 47.656 (46.970)	Top-5 acc 70.703 (70.528)	lr 0.02245
Train [25][2790/3239]	Time 0.191 (0.550)	Data Time 0.001 (0.013)	Loss 3.2720 (3.2308)	Entropy 1.56577 (1.57502)	Top-1 acc 45.703 (46.976)	Top-5 acc 71.875 (70.530)	lr 0.02244
Train [25][2800/3239]	Time 0.289 (0.550)	Data Time 0.002 (0.013)	Loss 3.2310 (3.2306)	Entropy 1.56573 (1.57499)	Top-1 acc 48.047 (46.978)	Top-5 acc 69.141 (70.534)	lr 0.02244
Train [25][2810/3239]	Time 0.231 (0.549)	Data Time 0.001 (0.013)	Loss 3.1343 (3.2305)	Entropy 1.56572 (1.57496)	Top-1 acc 50.781 (46.981)	Top-5 acc 69.922 (70.537)	lr 0.02244
Train [25][2820/3239]	Time 0.247 (0.549)	Data Time 0.001 (0.013)	Loss 3.2531 (3.2306)	Entropy 1.56557 (1.57492)	Top-1 acc 47.656 (46.979)	Top-5 acc 68.750 (70.533)	lr 0.02244
Train [25][2830/3239]	Time 0.320 (0.549)	Data Time 0.001 (0.013)	Loss 3.3490 (3.2306)	Entropy 1.56561 (1.57489)	Top-1 acc 41.016 (46.980)	Top-5 acc 67.578 (70.533)	lr 0.02244
Train [25][2840/3239]	Time 0.250 (0.548)	Data Time 0.001 (0.013)	Loss 3.1379 (3.2306)	Entropy 1.56562 (1.57486)	Top-1 acc 46.484 (46.979)	Top-5 acc 71.875 (70.532)	lr 0.02244
Train [25][2850/3239]	Time 0.219 (0.548)	Data Time 0.001 (0.013)	Loss 3.3931 (3.2306)	Entropy 1.56538 (1.57482)	Top-1 acc 44.531 (46.979)	Top-5 acc 66.016 (70.531)	lr 0.02244
Train [25][2860/3239]	Time 0.228 (0.563)	Data Time 0.004 (0.013)	Loss 3.2804 (3.2305)	Entropy 1.56536 (1.57479)	Top-1 acc 44.531 (46.979)	Top-5 acc 68.359 (70.535)	lr 0.02244
Train [25][2870/3239]	Time 0.217 (0.562)	Data Time 0.002 (0.013)	Loss 3.4581 (3.2307)	Entropy 1.56534 (1.57476)	Top-1 acc 44.141 (46.975)	Top-5 acc 63.672 (70.533)	lr 0.02244
Train [25][2880/3239]	Time 0.201 (0.562)	Data Time 0.001 (0.013)	Loss 3.1957 (3.2308)	Entropy 1.56527 (1.57473)	Top-1 acc 45.312 (46.971)	Top-5 acc 73.828 (70.533)	lr 0.02244
Train [25][2890/3239]	Time 0.260 (0.562)	Data Time 0.001 (0.013)	Loss 3.2987 (3.2310)	Entropy 1.56517 (1.57469)	Top-1 acc 46.094 (46.971)	Top-5 acc 67.188 (70.529)	lr 0.02244
Train [25][2900/3239]	Time 0.236 (0.561)	Data Time 0.001 (0.013)	Loss 3.1055 (3.2309)	Entropy 1.56511 (1.57466)	Top-1 acc 54.297 (46.971)	Top-5 acc 73.438 (70.528)	lr 0.02244
Train [25][2910/3239]	Time 0.263 (0.561)	Data Time 0.001 (0.013)	Loss 3.1209 (3.2308)	Entropy 1.56508 (1.57463)	Top-1 acc 49.219 (46.975)	Top-5 acc 70.312 (70.530)	lr 0.02244
Train [25][2920/3239]	Time 0.260 (0.560)	Data Time 0.001 (0.013)	Loss 3.0071 (3.2306)	Entropy 1.56497 (1.57459)	Top-1 acc 48.047 (46.975)	Top-5 acc 75.781 (70.533)	lr 0.02244
Train [25][2930/3239]	Time 0.224 (0.560)	Data Time 0.001 (0.013)	Loss 3.3354 (3.2309)	Entropy 1.56488 (1.57456)	Top-1 acc 44.922 (46.969)	Top-5 acc 65.625 (70.524)	lr 0.02244
Train [25][2940/3239]	Time 0.196 (0.560)	Data Time 0.001 (0.013)	Loss 3.3235 (3.2309)	Entropy 1.56508 (1.57453)	Top-1 acc 43.359 (46.968)	Top-5 acc 68.359 (70.527)	lr 0.02244
Train [25][2950/3239]	Time 0.362 (0.559)	Data Time 0.001 (0.013)	Loss 3.2236 (3.2309)	Entropy 1.56503 (1.57450)	Top-1 acc 50.391 (46.969)	Top-5 acc 68.750 (70.524)	lr 0.02243
Train [25][2960/3239]	Time 0.208 (0.559)	Data Time 0.001 (0.013)	Loss 3.2858 (3.2312)	Entropy 1.56484 (1.57446)	Top-1 acc 49.219 (46.965)	Top-5 acc 71.875 (70.521)	lr 0.02243
Train [25][2970/3239]	Time 0.196 (0.558)	Data Time 0.003 (0.013)	Loss 3.1637 (3.2313)	Entropy 1.56473 (1.57443)	Top-1 acc 50.391 (46.964)	Top-5 acc 71.875 (70.519)	lr 0.02243
Train [25][2980/3239]	Time 0.239 (0.558)	Data Time 0.001 (0.013)	Loss 3.2842 (3.2312)	Entropy 1.56468 (1.57440)	Top-1 acc 46.875 (46.969)	Top-5 acc 69.922 (70.522)	lr 0.02243
Train [25][2990/3239]	Time 0.175 (0.558)	Data Time 0.001 (0.012)	Loss 3.3297 (3.2314)	Entropy 1.56466 (1.57437)	Top-1 acc 46.094 (46.965)	Top-5 acc 66.406 (70.519)	lr 0.02243
Train [25][3000/3239]	Time 0.220 (0.557)	Data Time 0.001 (0.012)	Loss 3.0863 (3.2312)	Entropy 1.56463 (1.57433)	Top-1 acc 50.000 (46.970)	Top-5 acc 74.219 (70.519)	lr 0.02243
Train [25][3010/3239]	Time 0.329 (0.557)	Data Time 0.001 (0.012)	Loss 3.3897 (3.2312)	Entropy 1.56460 (1.57430)	Top-1 acc 41.016 (46.971)	Top-5 acc 66.797 (70.518)	lr 0.02243
Train [25][3020/3239]	Time 0.241 (0.557)	Data Time 0.001 (0.012)	Loss 3.2443 (3.2310)	Entropy 1.56439 (1.57427)	Top-1 acc 48.828 (46.975)	Top-5 acc 71.875 (70.519)	lr 0.02243
Train [25][3030/3239]	Time 0.250 (0.556)	Data Time 0.001 (0.012)	Loss 3.2010 (3.2309)	Entropy 1.56430 (1.57424)	Top-1 acc 46.094 (46.977)	Top-5 acc 70.312 (70.522)	lr 0.02243
Train [25][3040/3239]	Time 0.247 (0.556)	Data Time 0.002 (0.012)	Loss 3.3072 (3.2311)	Entropy 1.56427 (1.57421)	Top-1 acc 44.922 (46.972)	Top-5 acc 70.703 (70.518)	lr 0.02243
Train [25][3050/3239]	Time 0.238 (0.556)	Data Time 0.001 (0.012)	Loss 3.3676 (3.2313)	Entropy 1.56414 (1.57417)	Top-1 acc 44.141 (46.969)	Top-5 acc 68.359 (70.516)	lr 0.02243
Train [25][3060/3239]	Time 0.261 (0.555)	Data Time 0.001 (0.012)	Loss 3.3365 (3.2313)	Entropy 1.56411 (1.57414)	Top-1 acc 43.750 (46.972)	Top-5 acc 66.406 (70.515)	lr 0.02243
Train [25][3070/3239]	Time 0.411 (0.555)	Data Time 0.001 (0.012)	Loss 3.3923 (3.2314)	Entropy 1.56434 (1.57411)	Top-1 acc 41.797 (46.970)	Top-5 acc 66.797 (70.513)	lr 0.02243
Train [25][3080/3239]	Time 0.226 (0.555)	Data Time 0.001 (0.012)	Loss 3.0672 (3.2312)	Entropy 1.56426 (1.57408)	Top-1 acc 48.047 (46.971)	Top-5 acc 73.828 (70.516)	lr 0.02243
Train [25][3090/3239]	Time 0.207 (0.554)	Data Time 0.001 (0.012)	Loss 3.3838 (3.2311)	Entropy 1.56415 (1.57404)	Top-1 acc 44.141 (46.970)	Top-5 acc 69.531 (70.519)	lr 0.02243
Train [25][3100/3239]	Time 0.300 (0.554)	Data Time 0.001 (0.012)	Loss 3.0798 (3.2312)	Entropy 1.56412 (1.57401)	Top-1 acc 51.562 (46.971)	Top-5 acc 73.047 (70.516)	lr 0.02243
Train [25][3110/3239]	Time 0.203 (0.554)	Data Time 0.002 (0.012)	Loss 3.2622 (3.2313)	Entropy 1.56404 (1.57398)	Top-1 acc 48.047 (46.971)	Top-5 acc 69.141 (70.514)	lr 0.02242
Train [25][3120/3239]	Time 0.237 (0.553)	Data Time 0.002 (0.012)	Loss 3.3238 (3.2313)	Entropy 1.56401 (1.57395)	Top-1 acc 42.578 (46.969)	Top-5 acc 67.969 (70.514)	lr 0.02242
Train [25][3130/3239]	Time 0.347 (0.553)	Data Time 0.001 (0.012)	Loss 3.1868 (3.2314)	Entropy 1.56400 (1.57392)	Top-1 acc 47.656 (46.966)	Top-5 acc 71.484 (70.514)	lr 0.02242
Train [25][3140/3239]	Time 0.216 (0.553)	Data Time 0.001 (0.012)	Loss 3.3450 (3.2314)	Entropy 1.56401 (1.57388)	Top-1 acc 42.578 (46.967)	Top-5 acc 66.406 (70.513)	lr 0.02242
Train [25][3150/3239]	Time 0.188 (0.552)	Data Time 0.001 (0.012)	Loss 3.3609 (3.2314)	Entropy 1.56395 (1.57385)	Top-1 acc 44.922 (46.965)	Top-5 acc 69.531 (70.513)	lr 0.02242
Train [25][3160/3239]	Time 0.219 (0.552)	Data Time 0.001 (0.012)	Loss 3.0386 (3.2316)	Entropy 1.56387 (1.57382)	Top-1 acc 48.438 (46.964)	Top-5 acc 75.781 (70.512)	lr 0.02242
Train [25][3170/3239]	Time 0.207 (0.552)	Data Time 0.001 (0.012)	Loss 3.3285 (3.2316)	Entropy 1.56380 (1.57379)	Top-1 acc 46.094 (46.963)	Top-5 acc 69.922 (70.512)	lr 0.02242
Train [25][3180/3239]	Time 0.242 (0.551)	Data Time 0.000 (0.012)	Loss 3.1396 (3.2316)	Entropy 1.56380 (1.57376)	Top-1 acc 50.000 (46.966)	Top-5 acc 71.875 (70.512)	lr 0.02242
Train [25][3190/3239]	Time 0.259 (0.565)	Data Time 0.000 (0.012)	Loss 3.0994 (3.2314)	Entropy 1.56370 (1.57373)	Top-1 acc 48.438 (46.967)	Top-5 acc 74.609 (70.515)	lr 0.02242
Train [25][3200/3239]	Time 0.204 (0.565)	Data Time 0.000 (0.012)	Loss 3.3313 (3.2313)	Entropy 1.56364 (1.57370)	Top-1 acc 42.969 (46.967)	Top-5 acc 68.359 (70.515)	lr 0.02242
Train [25][3210/3239]	Time 0.234 (0.564)	Data Time 0.000 (0.012)	Loss 3.1171 (3.2313)	Entropy 1.56360 (1.57367)	Top-1 acc 51.953 (46.968)	Top-5 acc 72.656 (70.517)	lr 0.02242
Train [25][3220/3239]	Time 0.216 (0.564)	Data Time 0.000 (0.012)	Loss 3.2526 (3.2313)	Entropy 1.56358 (1.57363)	Top-1 acc 48.047 (46.966)	Top-5 acc 69.922 (70.517)	lr 0.02242
Train [25][3230/3239]	Time 0.230 (0.563)	Data Time 0.000 (0.012)	Loss 3.0580 (3.2311)	Entropy 1.56349 (1.57360)	Top-1 acc 51.562 (46.969)	Top-5 acc 73.047 (70.521)	lr 0.02242
Train [25][3239/3239]	Time 2.192 (0.563)	Data Time 0.000 (0.012)	Loss 3.9686 (3.2312)	Entropy 1.56349 (1.57357)	Top-1 acc 33.333 (46.965)	Top-5 acc 51.852 (70.519)	lr 0.02242
==========Valid [25/120]	loss 2.019	top-1 acc 55.510 (55.510)	top-5 acc 78.270	Train top-1 46.965	top-5 70.519	Entropy 1.56349	Latency-None: 0.000ms	Flops: 559.63M
Train [26][0/3239]	Time 29.119 (29.119)	Data Time 27.828 (27.828)	Loss 3.0808 (3.0808)	Entropy 1.56335 (1.56335)	Top-1 acc 50.391 (50.391)	Top-5 acc 72.656 (72.656)	lr 0.02242
Train [26][10/3239]	Time 2.555 (3.293)	Data Time 0.001 (2.654)	Loss 3.0850 (3.1988)	Entropy 1.56335 (1.56335)	Top-1 acc 53.125 (49.006)	Top-5 acc 74.219 (70.668)	lr 0.02242
Train [26][20/3239]	Time 0.226 (1.835)	Data Time 0.001 (1.391)	Loss 3.0260 (3.1717)	Entropy 1.56332 (1.56334)	Top-1 acc 50.781 (49.256)	Top-5 acc 75.391 (71.373)	lr 0.02242
Train [26][30/3239]	Time 0.223 (1.386)	Data Time 0.001 (0.943)	Loss 3.4606 (3.1790)	Entropy 1.56319 (1.56329)	Top-1 acc 40.234 (48.601)	Top-5 acc 67.188 (71.258)	lr 0.02242
Train [26][40/3239]	Time 0.221 (1.152)	Data Time 0.001 (0.713)	Loss 3.1159 (3.1958)	Entropy 1.56313 (1.56326)	Top-1 acc 50.781 (47.990)	Top-5 acc 70.312 (70.970)	lr 0.02241
Train [26][50/3239]	Time 0.203 (1.011)	Data Time 0.001 (0.574)	Loss 3.0923 (3.1900)	Entropy 1.56304 (1.56322)	Top-1 acc 50.000 (48.001)	Top-5 acc 74.219 (71.086)	lr 0.02241
Train [26][60/3239]	Time 0.325 (0.917)	Data Time 0.001 (0.480)	Loss 3.1522 (3.1839)	Entropy 1.56298 (1.56318)	Top-1 acc 49.219 (48.098)	Top-5 acc 73.047 (71.382)	lr 0.02241
Train [26][70/3239]	Time 0.269 (0.855)	Data Time 0.003 (0.412)	Loss 3.2389 (3.1882)	Entropy 1.56292 (1.56315)	Top-1 acc 47.266 (47.997)	Top-5 acc 67.969 (71.182)	lr 0.02241
Train [26][80/3239]	Time 0.211 (0.810)	Data Time 0.001 (0.362)	Loss 3.1303 (3.1897)	Entropy 1.56290 (1.56312)	Top-1 acc 50.781 (48.081)	Top-5 acc 69.922 (71.094)	lr 0.02241
Train [26][90/3239]	Time 0.253 (0.777)	Data Time 0.002 (0.322)	Loss 3.3031 (3.1861)	Entropy 1.56294 (1.56310)	Top-1 acc 45.312 (48.068)	Top-5 acc 67.578 (71.214)	lr 0.02241
Train [26][100/3239]	Time 0.164 (0.750)	Data Time 0.001 (0.291)	Loss 3.3412 (3.1908)	Entropy 1.56275 (1.56308)	Top-1 acc 46.094 (47.993)	Top-5 acc 67.578 (71.175)	lr 0.02241
Train [26][110/3239]	Time 0.347 (0.727)	Data Time 0.001 (0.265)	Loss 3.2214 (3.1890)	Entropy 1.56266 (1.56305)	Top-1 acc 47.266 (48.219)	Top-5 acc 69.922 (71.178)	lr 0.02241
Train [26][120/3239]	Time 2.457 (0.705)	Data Time 0.001 (0.243)	Loss 3.2020 (3.1886)	Entropy 1.56266 (1.56302)	Top-1 acc 48.047 (48.228)	Top-5 acc 69.141 (71.213)	lr 0.02241
Train [26][130/3239]	Time 0.249 (0.670)	Data Time 0.001 (0.225)	Loss 3.1546 (3.1877)	Entropy 1.56265 (1.56299)	Top-1 acc 50.391 (48.232)	Top-5 acc 73.828 (71.231)	lr 0.02241
Train [26][140/3239]	Time 0.204 (0.654)	Data Time 0.001 (0.209)	Loss 3.2383 (3.1887)	Entropy 1.56267 (1.56296)	Top-1 acc 48.438 (48.174)	Top-5 acc 70.312 (71.210)	lr 0.02241
Train [26][150/3239]	Time 0.237 (0.641)	Data Time 0.001 (0.195)	Loss 3.2260 (3.1906)	Entropy 1.56253 (1.56294)	Top-1 acc 45.312 (48.078)	Top-5 acc 67.969 (71.145)	lr 0.02241
Train [26][160/3239]	Time 0.146 (0.629)	Data Time 0.001 (0.183)	Loss 3.2264 (3.1901)	Entropy 1.56232 (1.56290)	Top-1 acc 47.266 (47.977)	Top-5 acc 71.484 (71.261)	lr 0.02241
Train [26][170/3239]	Time 0.346 (0.620)	Data Time 0.001 (0.173)	Loss 3.0070 (3.1902)	Entropy 1.56225 (1.56287)	Top-1 acc 51.953 (48.006)	Top-5 acc 75.781 (71.265)	lr 0.02241
Train [26][180/3239]	Time 0.215 (0.610)	Data Time 0.001 (0.164)	Loss 3.3433 (3.1909)	Entropy 1.56222 (1.56283)	Top-1 acc 42.188 (48.010)	Top-5 acc 73.047 (71.236)	lr 0.02241
Train [26][190/3239]	Time 0.227 (0.601)	Data Time 0.001 (0.155)	Loss 3.1330 (3.1940)	Entropy 1.56212 (1.56280)	Top-1 acc 48.828 (47.953)	Top-5 acc 71.875 (71.198)	lr 0.02241
Train [26][200/3239]	Time 0.207 (0.593)	Data Time 0.001 (0.147)	Loss 3.0762 (3.1931)	Entropy 1.56208 (1.56276)	Top-1 acc 47.656 (47.936)	Top-5 acc 77.344 (71.267)	lr 0.02240
Train [26][210/3239]	Time 0.226 (0.586)	Data Time 0.001 (0.141)	Loss 3.2419 (3.1939)	Entropy 1.56204 (1.56273)	Top-1 acc 46.875 (47.852)	Top-5 acc 71.875 (71.251)	lr 0.02240
Train [26][220/3239]	Time 0.199 (0.580)	Data Time 0.001 (0.134)	Loss 3.4589 (3.1972)	Entropy 1.56202 (1.56270)	Top-1 acc 43.359 (47.794)	Top-5 acc 67.969 (71.214)	lr 0.02240
Train [26][230/3239]	Time 2.407 (0.574)	Data Time 0.001 (0.128)	Loss 3.3185 (3.1970)	Entropy 1.56202 (1.56267)	Top-1 acc 43.359 (47.763)	Top-5 acc 70.312 (71.239)	lr 0.02240
Train [26][240/3239]	Time 0.214 (0.559)	Data Time 0.001 (0.123)	Loss 3.2235 (3.1956)	Entropy 1.56195 (1.56264)	Top-1 acc 42.969 (47.807)	Top-5 acc 71.875 (71.266)	lr 0.02240
Train [26][250/3239]	Time 0.207 (0.555)	Data Time 0.002 (0.118)	Loss 3.1065 (3.1946)	Entropy 1.56196 (1.56261)	Top-1 acc 48.828 (47.831)	Top-5 acc 75.391 (71.276)	lr 0.02240
Train [26][260/3239]	Time 0.207 (0.551)	Data Time 0.001 (0.114)	Loss 3.3560 (3.1958)	Entropy 1.56194 (1.56259)	Top-1 acc 44.531 (47.777)	Top-5 acc 67.578 (71.257)	lr 0.02240
Train [26][270/3239]	Time 0.188 (0.547)	Data Time 0.001 (0.110)	Loss 3.2689 (3.1954)	Entropy 1.56195 (1.56256)	Top-1 acc 46.094 (47.756)	Top-5 acc 71.484 (71.249)	lr 0.02240
Train [26][280/3239]	Time 0.221 (0.543)	Data Time 0.001 (0.106)	Loss 3.0797 (3.1948)	Entropy 1.56186 (1.56254)	Top-1 acc 46.875 (47.751)	Top-5 acc 73.438 (71.261)	lr 0.02240
Train [26][290/3239]	Time 0.303 (0.540)	Data Time 0.001 (0.102)	Loss 3.3026 (3.1950)	Entropy 1.56182 (1.56252)	Top-1 acc 43.750 (47.735)	Top-5 acc 67.578 (71.249)	lr 0.02240
Train [26][300/3239]	Time 0.204 (0.537)	Data Time 0.002 (0.099)	Loss 3.2760 (3.1950)	Entropy 1.56177 (1.56249)	Top-1 acc 44.922 (47.721)	Top-5 acc 70.703 (71.268)	lr 0.02240
Train [26][310/3239]	Time 0.271 (0.664)	Data Time 0.003 (0.096)	Loss 3.3208 (3.1942)	Entropy 1.56173 (1.56247)	Top-1 acc 41.016 (47.724)	Top-5 acc 71.094 (71.273)	lr 0.02240
Train [26][320/3239]	Time 0.219 (0.662)	Data Time 0.002 (0.093)	Loss 3.2244 (3.1942)	Entropy 1.56166 (1.56244)	Top-1 acc 44.922 (47.724)	Top-5 acc 73.828 (71.295)	lr 0.02240
Train [26][330/3239]	Time 0.251 (0.656)	Data Time 0.002 (0.090)	Loss 3.4785 (3.1950)	Entropy 1.56160 (1.56242)	Top-1 acc 39.062 (47.693)	Top-5 acc 67.188 (71.291)	lr 0.02240
Train [26][340/3239]	Time 2.336 (0.650)	Data Time 0.001 (0.088)	Loss 2.9948 (3.1952)	Entropy 1.56160 (1.56240)	Top-1 acc 51.562 (47.701)	Top-5 acc 78.906 (71.313)	lr 0.02240
Train [26][350/3239]	Time 0.255 (0.638)	Data Time 0.001 (0.085)	Loss 3.1547 (3.1950)	Entropy 1.56154 (1.56237)	Top-1 acc 47.656 (47.676)	Top-5 acc 70.703 (71.297)	lr 0.02240
Train [26][360/3239]	Time 0.221 (0.633)	Data Time 0.002 (0.083)	Loss 3.1971 (3.1958)	Entropy 1.56150 (1.56235)	Top-1 acc 44.922 (47.656)	Top-5 acc 70.703 (71.250)	lr 0.02239
Train [26][370/3239]	Time 0.244 (0.628)	Data Time 0.001 (0.081)	Loss 3.1467 (3.1954)	Entropy 1.56147 (1.56232)	Top-1 acc 48.047 (47.675)	Top-5 acc 70.703 (71.239)	lr 0.02239
Train [26][380/3239]	Time 0.217 (0.623)	Data Time 0.001 (0.079)	Loss 3.3380 (3.1974)	Entropy 1.56145 (1.56230)	Top-1 acc 43.750 (47.647)	Top-5 acc 68.359 (71.198)	lr 0.02239
Train [26][390/3239]	Time 0.239 (0.619)	Data Time 0.001 (0.077)	Loss 3.2435 (3.1980)	Entropy 1.56142 (1.56228)	Top-1 acc 42.578 (47.657)	Top-5 acc 69.531 (71.201)	lr 0.02239
Train [26][400/3239]	Time 0.211 (0.614)	Data Time 0.001 (0.075)	Loss 3.2369 (3.1970)	Entropy 1.56135 (1.56226)	Top-1 acc 44.141 (47.684)	Top-5 acc 72.266 (71.220)	lr 0.02239
Train [26][410/3239]	Time 0.301 (0.610)	Data Time 0.001 (0.073)	Loss 3.3879 (3.1989)	Entropy 1.56132 (1.56224)	Top-1 acc 42.188 (47.642)	Top-5 acc 68.750 (71.194)	lr 0.02239
Train [26][420/3239]	Time 0.208 (0.606)	Data Time 0.001 (0.071)	Loss 3.2066 (3.1999)	Entropy 1.56121 (1.56221)	Top-1 acc 51.172 (47.633)	Top-5 acc 71.484 (71.161)	lr 0.02239
Train [26][430/3239]	Time 0.220 (0.602)	Data Time 0.002 (0.070)	Loss 3.0672 (3.2005)	Entropy 1.56120 (1.56219)	Top-1 acc 51.172 (47.619)	Top-5 acc 75.391 (71.163)	lr 0.02239
Train [26][440/3239]	Time 0.215 (0.598)	Data Time 0.001 (0.068)	Loss 3.2139 (3.2001)	Entropy 1.56114 (1.56217)	Top-1 acc 48.047 (47.639)	Top-5 acc 73.047 (71.177)	lr 0.02239
Train [26][450/3239]	Time 2.445 (0.595)	Data Time 0.001 (0.067)	Loss 3.2275 (3.2000)	Entropy 1.56114 (1.56214)	Top-1 acc 44.141 (47.632)	Top-5 acc 72.266 (71.182)	lr 0.02239
Train [26][460/3239]	Time 0.260 (0.587)	Data Time 0.001 (0.065)	Loss 3.2768 (3.2003)	Entropy 1.56113 (1.56212)	Top-1 acc 44.531 (47.631)	Top-5 acc 67.969 (71.167)	lr 0.02239
Train [26][470/3239]	Time 0.316 (0.584)	Data Time 0.001 (0.064)	Loss 3.1665 (3.2007)	Entropy 1.56112 (1.56210)	Top-1 acc 45.312 (47.616)	Top-5 acc 68.359 (71.146)	lr 0.02239
Train [26][480/3239]	Time 0.259 (0.582)	Data Time 0.001 (0.063)	Loss 3.1221 (3.1999)	Entropy 1.56104 (1.56208)	Top-1 acc 49.609 (47.655)	Top-5 acc 73.828 (71.151)	lr 0.02239
Train [26][490/3239]	Time 0.231 (0.579)	Data Time 0.001 (0.061)	Loss 3.1560 (3.1993)	Entropy 1.56107 (1.56206)	Top-1 acc 47.656 (47.651)	Top-5 acc 70.703 (71.149)	lr 0.02239
Train [26][500/3239]	Time 0.239 (0.577)	Data Time 0.001 (0.060)	Loss 3.0402 (3.1981)	Entropy 1.56105 (1.56204)	Top-1 acc 55.469 (47.673)	Top-5 acc 76.562 (71.174)	lr 0.02239
Train [26][510/3239]	Time 0.201 (0.574)	Data Time 0.001 (0.059)	Loss 3.0882 (3.1979)	Entropy 1.56103 (1.56202)	Top-1 acc 50.000 (47.685)	Top-5 acc 75.000 (71.188)	lr 0.02239
Train [26][520/3239]	Time 0.229 (0.572)	Data Time 0.001 (0.058)	Loss 3.1619 (3.1977)	Entropy 1.56087 (1.56200)	Top-1 acc 44.922 (47.679)	Top-5 acc 72.266 (71.211)	lr 0.02238
Train [26][530/3239]	Time 0.262 (0.570)	Data Time 0.001 (0.057)	Loss 3.0226 (3.1971)	Entropy 1.56088 (1.56198)	Top-1 acc 51.953 (47.700)	Top-5 acc 73.047 (71.219)	lr 0.02238
Train [26][540/3239]	Time 0.207 (0.567)	Data Time 0.001 (0.056)	Loss 3.1202 (3.1960)	Entropy 1.56086 (1.56196)	Top-1 acc 46.875 (47.729)	Top-5 acc 74.609 (71.231)	lr 0.02238
Train [26][550/3239]	Time 0.216 (0.565)	Data Time 0.001 (0.055)	Loss 3.2570 (3.1965)	Entropy 1.56080 (1.56194)	Top-1 acc 42.578 (47.691)	Top-5 acc 70.703 (71.219)	lr 0.02238
Train [26][560/3239]	Time 2.386 (0.563)	Data Time 0.001 (0.054)	Loss 3.0347 (3.1966)	Entropy 1.56080 (1.56192)	Top-1 acc 51.953 (47.690)	Top-5 acc 73.047 (71.216)	lr 0.02238
Train [26][570/3239]	Time 0.207 (0.557)	Data Time 0.002 (0.053)	Loss 3.1827 (3.1971)	Entropy 1.56073 (1.56190)	Top-1 acc 48.047 (47.680)	Top-5 acc 71.484 (71.200)	lr 0.02238
Train [26][580/3239]	Time 0.239 (0.555)	Data Time 0.001 (0.052)	Loss 3.1633 (3.1972)	Entropy 1.56073 (1.56188)	Top-1 acc 50.000 (47.685)	Top-5 acc 71.875 (71.195)	lr 0.02238
Train [26][590/3239]	Time 0.288 (0.553)	Data Time 0.001 (0.051)	Loss 3.0852 (3.1969)	Entropy 1.56075 (1.56186)	Top-1 acc 51.562 (47.698)	Top-5 acc 73.047 (71.200)	lr 0.02238
Train [26][600/3239]	Time 0.206 (0.552)	Data Time 0.001 (0.050)	Loss 3.2608 (3.1973)	Entropy 1.56064 (1.56184)	Top-1 acc 44.922 (47.675)	Top-5 acc 69.531 (71.198)	lr 0.02238
Train [26][610/3239]	Time 0.222 (0.550)	Data Time 0.001 (0.050)	Loss 2.9967 (3.1977)	Entropy 1.56047 (1.56182)	Top-1 acc 50.781 (47.668)	Top-5 acc 76.953 (71.184)	lr 0.02238
Train [26][620/3239]	Time 0.146 (0.548)	Data Time 0.001 (0.049)	Loss 3.2418 (3.1977)	Entropy 1.56042 (1.56179)	Top-1 acc 50.391 (47.685)	Top-5 acc 70.312 (71.191)	lr 0.02238
Train [26][630/3239]	Time 0.171 (0.546)	Data Time 0.001 (0.048)	Loss 3.2274 (3.1982)	Entropy 1.56019 (1.56177)	Top-1 acc 49.609 (47.667)	Top-5 acc 70.312 (71.182)	lr 0.02238
Train [26][640/3239]	Time 0.209 (0.545)	Data Time 0.001 (0.048)	Loss 3.2196 (3.1991)	Entropy 1.56017 (1.56175)	Top-1 acc 46.875 (47.650)	Top-5 acc 72.656 (71.177)	lr 0.02238
Train [26][650/3239]	Time 0.218 (0.543)	Data Time 0.001 (0.047)	Loss 3.2473 (3.1994)	Entropy 1.55997 (1.56172)	Top-1 acc 44.922 (47.641)	Top-5 acc 70.703 (71.173)	lr 0.02238
Train [26][660/3239]	Time 0.201 (0.542)	Data Time 0.001 (0.046)	Loss 3.2915 (3.1995)	Entropy 1.55986 (1.56169)	Top-1 acc 44.531 (47.644)	Top-5 acc 70.703 (71.172)	lr 0.02238
Train [26][670/3239]	Time 47.124 (0.607)	Data Time 0.001 (0.046)	Loss 3.0889 (3.2000)	Entropy 1.55986 (1.56167)	Top-1 acc 50.000 (47.636)	Top-5 acc 73.438 (71.163)	lr 0.02238
Train [26][680/3239]	Time 0.214 (0.601)	Data Time 0.002 (0.045)	Loss 3.1855 (3.2001)	Entropy 1.55986 (1.56164)	Top-1 acc 49.609 (47.647)	Top-5 acc 72.266 (71.156)	lr 0.02237
Train [26][690/3239]	Time 0.238 (0.599)	Data Time 0.002 (0.044)	Loss 3.2366 (3.2005)	Entropy 1.55978 (1.56161)	Top-1 acc 46.484 (47.628)	Top-5 acc 69.141 (71.146)	lr 0.02237
Train [26][700/3239]	Time 0.224 (0.597)	Data Time 0.002 (0.044)	Loss 3.1367 (3.2012)	Entropy 1.55961 (1.56159)	Top-1 acc 49.609 (47.618)	Top-5 acc 73.438 (71.133)	lr 0.02237
Train [26][710/3239]	Time 0.295 (0.595)	Data Time 0.001 (0.043)	Loss 2.9965 (3.2018)	Entropy 1.55945 (1.56156)	Top-1 acc 53.125 (47.616)	Top-5 acc 76.953 (71.111)	lr 0.02237
Train [26][720/3239]	Time 0.144 (0.593)	Data Time 0.001 (0.042)	Loss 3.2440 (3.2023)	Entropy 1.55919 (1.56153)	Top-1 acc 44.922 (47.585)	Top-5 acc 70.703 (71.115)	lr 0.02237
Train [26][730/3239]	Time 0.205 (0.591)	Data Time 0.001 (0.042)	Loss 3.2348 (3.2026)	Entropy 1.55905 (1.56149)	Top-1 acc 46.875 (47.581)	Top-5 acc 71.094 (71.110)	lr 0.02237
Train [26][740/3239]	Time 0.241 (0.589)	Data Time 0.001 (0.041)	Loss 3.1739 (3.2020)	Entropy 1.55901 (1.56146)	Top-1 acc 50.391 (47.583)	Top-5 acc 72.266 (71.109)	lr 0.02237
Train [26][750/3239]	Time 0.230 (0.587)	Data Time 0.001 (0.041)	Loss 3.0750 (3.2027)	Entropy 1.55897 (1.56143)	Top-1 acc 48.438 (47.571)	Top-5 acc 72.656 (71.095)	lr 0.02237
Train [26][760/3239]	Time 0.224 (0.585)	Data Time 0.003 (0.040)	Loss 3.2500 (3.2021)	Entropy 1.55874 (1.56139)	Top-1 acc 45.703 (47.575)	Top-5 acc 71.094 (71.105)	lr 0.02237
Train [26][770/3239]	Time 0.301 (0.584)	Data Time 0.001 (0.040)	Loss 3.1882 (3.2023)	Entropy 1.55864 (1.56136)	Top-1 acc 48.047 (47.578)	Top-5 acc 72.656 (71.110)	lr 0.02237
Train [26][780/3239]	Time 2.481 (0.582)	Data Time 0.001 (0.039)	Loss 3.1149 (3.2025)	Entropy 1.55864 (1.56132)	Top-1 acc 50.391 (47.578)	Top-5 acc 72.266 (71.109)	lr 0.02237
Train [26][790/3239]	Time 0.211 (0.577)	Data Time 0.001 (0.039)	Loss 3.2650 (3.2021)	Entropy 1.55862 (1.56129)	Top-1 acc 39.844 (47.595)	Top-5 acc 67.188 (71.111)	lr 0.02237
Train [26][800/3239]	Time 0.223 (0.576)	Data Time 0.001 (0.038)	Loss 3.2671 (3.2019)	Entropy 1.55849 (1.56126)	Top-1 acc 44.141 (47.610)	Top-5 acc 69.922 (71.109)	lr 0.02237
Train [26][810/3239]	Time 0.211 (0.574)	Data Time 0.001 (0.038)	Loss 3.1951 (3.2020)	Entropy 1.55842 (1.56122)	Top-1 acc 49.609 (47.621)	Top-5 acc 71.875 (71.112)	lr 0.02237
Train [26][820/3239]	Time 0.225 (0.572)	Data Time 0.001 (0.038)	Loss 3.1604 (3.2018)	Entropy 1.55837 (1.56119)	Top-1 acc 50.000 (47.602)	Top-5 acc 70.703 (71.112)	lr 0.02237
Train [26][830/3239]	Time 0.238 (0.571)	Data Time 0.002 (0.037)	Loss 3.2421 (3.2023)	Entropy 1.55831 (1.56115)	Top-1 acc 45.703 (47.606)	Top-5 acc 70.312 (71.089)	lr 0.02237
Train [26][840/3239]	Time 0.203 (0.570)	Data Time 0.001 (0.037)	Loss 3.1114 (3.2016)	Entropy 1.55825 (1.56112)	Top-1 acc 51.172 (47.625)	Top-5 acc 70.312 (71.096)	lr 0.02237
Train [26][850/3239]	Time 0.222 (0.568)	Data Time 0.001 (0.036)	Loss 3.2773 (3.2014)	Entropy 1.55821 (1.56108)	Top-1 acc 44.922 (47.630)	Top-5 acc 68.750 (71.088)	lr 0.02236
Train [26][860/3239]	Time 0.210 (0.567)	Data Time 0.001 (0.036)	Loss 3.2656 (3.2018)	Entropy 1.55810 (1.56105)	Top-1 acc 47.266 (47.616)	Top-5 acc 69.141 (71.075)	lr 0.02236
Train [26][870/3239]	Time 0.199 (0.565)	Data Time 0.001 (0.036)	Loss 3.2696 (3.2018)	Entropy 1.55805 (1.56102)	Top-1 acc 47.656 (47.611)	Top-5 acc 69.531 (71.058)	lr 0.02236
Train [26][880/3239]	Time 0.218 (0.564)	Data Time 0.001 (0.035)	Loss 3.0886 (3.2023)	Entropy 1.55786 (1.56098)	Top-1 acc 51.953 (47.607)	Top-5 acc 72.266 (71.041)	lr 0.02236
Train [26][890/3239]	Time 2.424 (0.562)	Data Time 0.001 (0.035)	Loss 3.2406 (3.2017)	Entropy 1.55786 (1.56095)	Top-1 acc 45.703 (47.622)	Top-5 acc 69.141 (71.061)	lr 0.02236
Train [26][900/3239]	Time 0.213 (0.559)	Data Time 0.001 (0.034)	Loss 3.0579 (3.2009)	Entropy 1.55779 (1.56091)	Top-1 acc 50.781 (47.655)	Top-5 acc 73.828 (71.074)	lr 0.02236
Train [26][910/3239]	Time 0.202 (0.558)	Data Time 0.001 (0.034)	Loss 2.9564 (3.2005)	Entropy 1.55771 (1.56088)	Top-1 acc 51.953 (47.665)	Top-5 acc 76.953 (71.084)	lr 0.02236
Train [26][920/3239]	Time 0.249 (0.556)	Data Time 0.002 (0.034)	Loss 3.2372 (3.2004)	Entropy 1.55759 (1.56084)	Top-1 acc 44.531 (47.669)	Top-5 acc 69.141 (71.083)	lr 0.02236
Train [26][930/3239]	Time 0.215 (0.555)	Data Time 0.001 (0.033)	Loss 3.0902 (3.2005)	Entropy 1.55754 (1.56081)	Top-1 acc 46.484 (47.658)	Top-5 acc 76.562 (71.084)	lr 0.02236
Train [26][940/3239]	Time 0.227 (0.554)	Data Time 0.001 (0.033)	Loss 3.0417 (3.2005)	Entropy 1.55755 (1.56077)	Top-1 acc 51.562 (47.665)	Top-5 acc 72.656 (71.079)	lr 0.02236
Train [26][950/3239]	Time 0.219 (0.553)	Data Time 0.001 (0.033)	Loss 3.2015 (3.2008)	Entropy 1.55750 (1.56074)	Top-1 acc 47.266 (47.658)	Top-5 acc 69.531 (71.070)	lr 0.02236
Train [26][960/3239]	Time 0.162 (0.552)	Data Time 0.001 (0.032)	Loss 3.2195 (3.2004)	Entropy 1.55747 (1.56070)	Top-1 acc 48.438 (47.671)	Top-5 acc 70.312 (71.076)	lr 0.02236
Train [26][970/3239]	Time 0.202 (0.550)	Data Time 0.001 (0.032)	Loss 3.2183 (3.2008)	Entropy 1.55743 (1.56067)	Top-1 acc 46.094 (47.661)	Top-5 acc 71.875 (71.062)	lr 0.02236
Train [26][980/3239]	Time 0.221 (0.549)	Data Time 0.001 (0.032)	Loss 3.0938 (3.2009)	Entropy 1.55732 (1.56064)	Top-1 acc 52.734 (47.667)	Top-5 acc 73.828 (71.067)	lr 0.02236
Train [26][990/3239]	Time 0.229 (0.548)	Data Time 0.002 (0.032)	Loss 3.4091 (3.2008)	Entropy 1.55727 (1.56060)	Top-1 acc 44.141 (47.667)	Top-5 acc 66.797 (71.062)	lr 0.02236
Train [26][1000/3239]	Time 2.181 (0.547)	Data Time 0.001 (0.031)	Loss 3.0423 (3.2003)	Entropy 1.55727 (1.56057)	Top-1 acc 52.734 (47.677)	Top-5 acc 76.172 (71.078)	lr 0.02236
Train [26][1010/3239]	Time 0.310 (0.544)	Data Time 0.001 (0.031)	Loss 3.0413 (3.1997)	Entropy 1.55725 (1.56054)	Top-1 acc 48.438 (47.683)	Top-5 acc 76.172 (71.095)	lr 0.02235
Train [26][1020/3239]	Time 0.220 (0.543)	Data Time 0.001 (0.031)	Loss 3.2188 (3.1999)	Entropy 1.55728 (1.56051)	Top-1 acc 48.828 (47.677)	Top-5 acc 72.266 (71.088)	lr 0.02235
Train [26][1030/3239]	Time 0.224 (0.542)	Data Time 0.002 (0.030)	Loss 3.2708 (3.1997)	Entropy 1.55726 (1.56047)	Top-1 acc 48.438 (47.674)	Top-5 acc 69.922 (71.098)	lr 0.02235
Train [26][1040/3239]	Time 0.242 (0.579)	Data Time 0.002 (0.030)	Loss 3.1860 (3.1994)	Entropy 1.55724 (1.56044)	Top-1 acc 48.047 (47.685)	Top-5 acc 71.484 (71.100)	lr 0.02235
Train [26][1050/3239]	Time 0.241 (0.579)	Data Time 0.002 (0.030)	Loss 3.0641 (3.1991)	Entropy 1.55721 (1.56041)	Top-1 acc 50.000 (47.684)	Top-5 acc 71.875 (71.098)	lr 0.02235
Train [26][1060/3239]	Time 0.227 (0.578)	Data Time 0.001 (0.030)	Loss 3.0772 (3.1994)	Entropy 1.55722 (1.56038)	Top-1 acc 51.172 (47.670)	Top-5 acc 75.781 (71.098)	lr 0.02235
Train [26][1070/3239]	Time 0.324 (0.577)	Data Time 0.001 (0.029)	Loss 3.1018 (3.1989)	Entropy 1.55722 (1.56035)	Top-1 acc 48.438 (47.675)	Top-5 acc 71.484 (71.103)	lr 0.02235
Train [26][1080/3239]	Time 0.210 (0.575)	Data Time 0.001 (0.029)	Loss 3.1244 (3.1996)	Entropy 1.55715 (1.56032)	Top-1 acc 46.484 (47.658)	Top-5 acc 76.172 (71.091)	lr 0.02235
Train [26][1090/3239]	Time 0.179 (0.574)	Data Time 0.002 (0.029)	Loss 3.1789 (3.1998)	Entropy 1.55715 (1.56029)	Top-1 acc 46.484 (47.645)	Top-5 acc 72.266 (71.084)	lr 0.02235
Train [26][1100/3239]	Time 0.214 (0.573)	Data Time 0.001 (0.029)	Loss 3.1971 (3.1998)	Entropy 1.55706 (1.56027)	Top-1 acc 48.828 (47.635)	Top-5 acc 71.484 (71.081)	lr 0.02235
Train [26][1110/3239]	Time 2.446 (0.572)	Data Time 0.001 (0.028)	Loss 3.0849 (3.1993)	Entropy 1.55706 (1.56024)	Top-1 acc 50.391 (47.641)	Top-5 acc 73.047 (71.087)	lr 0.02235
Train [26][1120/3239]	Time 0.223 (0.569)	Data Time 0.001 (0.028)	Loss 3.1719 (3.1994)	Entropy 1.55697 (1.56021)	Top-1 acc 45.703 (47.625)	Top-5 acc 70.703 (71.083)	lr 0.02235
Train [26][1130/3239]	Time 0.326 (0.567)	Data Time 0.001 (0.028)	Loss 3.1625 (3.1990)	Entropy 1.55677 (1.56018)	Top-1 acc 49.609 (47.639)	Top-5 acc 72.266 (71.087)	lr 0.02235
Train [26][1140/3239]	Time 0.205 (0.566)	Data Time 0.001 (0.028)	Loss 3.2650 (3.1990)	Entropy 1.55674 (1.56015)	Top-1 acc 42.188 (47.632)	Top-5 acc 70.312 (71.093)	lr 0.02235
Train [26][1150/3239]	Time 0.244 (0.565)	Data Time 0.002 (0.027)	Loss 3.1221 (3.1990)	Entropy 1.55669 (1.56012)	Top-1 acc 44.922 (47.636)	Top-5 acc 72.656 (71.083)	lr 0.02235
Train [26][1160/3239]	Time 0.213 (0.564)	Data Time 0.001 (0.027)	Loss 3.4010 (3.1993)	Entropy 1.55670 (1.56009)	Top-1 acc 46.484 (47.637)	Top-5 acc 67.578 (71.066)	lr 0.02235
Train [26][1170/3239]	Time 0.142 (0.563)	Data Time 0.001 (0.027)	Loss 3.3224 (3.1988)	Entropy 1.55668 (1.56006)	Top-1 acc 45.312 (47.645)	Top-5 acc 68.359 (71.073)	lr 0.02234
Train [26][1180/3239]	Time 0.197 (0.562)	Data Time 0.001 (0.027)	Loss 3.3235 (3.1984)	Entropy 1.55663 (1.56003)	Top-1 acc 43.750 (47.645)	Top-5 acc 69.922 (71.072)	lr 0.02234
Train [26][1190/3239]	Time 0.202 (0.561)	Data Time 0.001 (0.027)	Loss 3.2287 (3.1984)	Entropy 1.55661 (1.56000)	Top-1 acc 48.438 (47.649)	Top-5 acc 71.875 (71.070)	lr 0.02234
Train [26][1200/3239]	Time 0.319 (0.560)	Data Time 0.001 (0.026)	Loss 3.2653 (3.1984)	Entropy 1.55646 (1.55997)	Top-1 acc 43.750 (47.653)	Top-5 acc 70.312 (71.063)	lr 0.02234
Train [26][1210/3239]	Time 0.237 (0.559)	Data Time 0.002 (0.026)	Loss 3.1729 (3.1989)	Entropy 1.55638 (1.55994)	Top-1 acc 48.047 (47.651)	Top-5 acc 73.047 (71.051)	lr 0.02234
Train [26][1220/3239]	Time 2.451 (0.558)	Data Time 0.001 (0.026)	Loss 3.1026 (3.1985)	Entropy 1.55638 (1.55991)	Top-1 acc 49.609 (47.654)	Top-5 acc 72.266 (71.064)	lr 0.02234
Train [26][1230/3239]	Time 0.203 (0.556)	Data Time 0.001 (0.026)	Loss 3.1278 (3.1981)	Entropy 1.55629 (1.55989)	Top-1 acc 47.656 (47.670)	Top-5 acc 73.828 (71.070)	lr 0.02234
Train [26][1240/3239]	Time 0.218 (0.555)	Data Time 0.001 (0.026)	Loss 3.1228 (3.1981)	Entropy 1.55629 (1.55986)	Top-1 acc 48.828 (47.677)	Top-5 acc 71.875 (71.074)	lr 0.02234
Train [26][1250/3239]	Time 0.207 (0.554)	Data Time 0.001 (0.025)	Loss 3.0093 (3.1980)	Entropy 1.55623 (1.55983)	Top-1 acc 51.953 (47.672)	Top-5 acc 75.391 (71.078)	lr 0.02234
Train [26][1260/3239]	Time 0.254 (0.553)	Data Time 0.001 (0.025)	Loss 3.2613 (3.1979)	Entropy 1.55614 (1.55980)	Top-1 acc 47.266 (47.681)	Top-5 acc 70.703 (71.077)	lr 0.02234
Train [26][1270/3239]	Time 0.211 (0.552)	Data Time 0.001 (0.025)	Loss 3.2731 (3.1978)	Entropy 1.55613 (1.55977)	Top-1 acc 45.703 (47.681)	Top-5 acc 69.141 (71.084)	lr 0.02234
Train [26][1280/3239]	Time 0.206 (0.551)	Data Time 0.001 (0.025)	Loss 3.2628 (3.1978)	Entropy 1.55605 (1.55974)	Top-1 acc 45.312 (47.683)	Top-5 acc 69.922 (71.086)	lr 0.02234
Train [26][1290/3239]	Time 0.208 (0.550)	Data Time 0.001 (0.025)	Loss 3.3902 (3.1983)	Entropy 1.55600 (1.55971)	Top-1 acc 43.750 (47.667)	Top-5 acc 67.578 (71.079)	lr 0.02234
Train [26][1300/3239]	Time 0.200 (0.549)	Data Time 0.001 (0.024)	Loss 3.0124 (3.1984)	Entropy 1.55587 (1.55968)	Top-1 acc 52.344 (47.665)	Top-5 acc 73.828 (71.078)	lr 0.02234
Train [26][1310/3239]	Time 0.229 (0.549)	Data Time 0.001 (0.024)	Loss 3.2555 (3.1984)	Entropy 1.55576 (1.55965)	Top-1 acc 47.656 (47.667)	Top-5 acc 68.750 (71.079)	lr 0.02234
Train [26][1320/3239]	Time 0.212 (0.548)	Data Time 0.001 (0.024)	Loss 3.1986 (3.1983)	Entropy 1.55575 (1.55962)	Top-1 acc 49.609 (47.667)	Top-5 acc 70.312 (71.080)	lr 0.02234
Train [26][1330/3239]	Time 2.368 (0.547)	Data Time 0.002 (0.024)	Loss 3.0052 (3.1982)	Entropy 1.55575 (1.55960)	Top-1 acc 48.047 (47.678)	Top-5 acc 76.172 (71.086)	lr 0.02233
Train [26][1340/3239]	Time 0.199 (0.544)	Data Time 0.001 (0.024)	Loss 3.1684 (3.1983)	Entropy 1.55573 (1.55957)	Top-1 acc 42.578 (47.674)	Top-5 acc 75.781 (71.087)	lr 0.02233
Train [26][1350/3239]	Time 0.225 (0.544)	Data Time 0.001 (0.024)	Loss 3.2100 (3.1985)	Entropy 1.55568 (1.55954)	Top-1 acc 47.656 (47.674)	Top-5 acc 71.484 (71.083)	lr 0.02233
Train [26][1360/3239]	Time 0.201 (0.543)	Data Time 0.001 (0.023)	Loss 3.2780 (3.1985)	Entropy 1.55546 (1.55951)	Top-1 acc 45.312 (47.677)	Top-5 acc 71.094 (71.087)	lr 0.02233
Train [26][1370/3239]	Time 0.227 (0.542)	Data Time 0.002 (0.023)	Loss 3.3051 (3.1985)	Entropy 1.55545 (1.55948)	Top-1 acc 46.875 (47.679)	Top-5 acc 68.750 (71.085)	lr 0.02233
Train [26][1380/3239]	Time 0.202 (0.541)	Data Time 0.001 (0.023)	Loss 3.2198 (3.1985)	Entropy 1.55542 (1.55945)	Top-1 acc 48.438 (47.679)	Top-5 acc 69.141 (71.080)	lr 0.02233
Train [26][1390/3239]	Time 0.240 (0.541)	Data Time 0.001 (0.023)	Loss 3.0904 (3.1980)	Entropy 1.55546 (1.55942)	Top-1 acc 49.609 (47.686)	Top-5 acc 75.391 (71.085)	lr 0.02233
Train [26][1400/3239]	Time 0.279 (0.569)	Data Time 0.002 (0.023)	Loss 3.2980 (3.1981)	Entropy 1.55544 (1.55939)	Top-1 acc 50.781 (47.688)	Top-5 acc 66.797 (71.078)	lr 0.02233
Train [26][1410/3239]	Time 0.205 (0.568)	Data Time 0.002 (0.023)	Loss 3.1738 (3.1981)	Entropy 1.55542 (1.55936)	Top-1 acc 48.828 (47.695)	Top-5 acc 73.828 (71.077)	lr 0.02233
Train [26][1420/3239]	Time 0.161 (0.568)	Data Time 0.001 (0.023)	Loss 3.2673 (3.1980)	Entropy 1.55533 (1.55934)	Top-1 acc 48.047 (47.688)	Top-5 acc 66.797 (71.070)	lr 0.02233
Train [26][1430/3239]	Time 0.305 (0.567)	Data Time 0.001 (0.022)	Loss 3.2093 (3.1980)	Entropy 1.55529 (1.55931)	Top-1 acc 47.266 (47.690)	Top-5 acc 69.141 (71.071)	lr 0.02233
Train [26][1440/3239]	Time 2.303 (0.566)	Data Time 0.001 (0.022)	Loss 3.0675 (3.1977)	Entropy 1.55529 (1.55928)	Top-1 acc 46.484 (47.696)	Top-5 acc 75.000 (71.075)	lr 0.02233
Train [26][1450/3239]	Time 0.281 (0.564)	Data Time 0.001 (0.022)	Loss 3.4023 (3.1981)	Entropy 1.55526 (1.55925)	Top-1 acc 44.922 (47.688)	Top-5 acc 66.797 (71.068)	lr 0.02233
Train [26][1460/3239]	Time 0.203 (0.563)	Data Time 0.002 (0.022)	Loss 3.2193 (3.1982)	Entropy 1.55545 (1.55923)	Top-1 acc 47.656 (47.692)	Top-5 acc 67.969 (71.069)	lr 0.02233
Train [26][1470/3239]	Time 0.209 (0.562)	Data Time 0.001 (0.022)	Loss 2.8996 (3.1982)	Entropy 1.55540 (1.55920)	Top-1 acc 55.469 (47.692)	Top-5 acc 76.953 (71.073)	lr 0.02233
Train [26][1480/3239]	Time 0.194 (0.561)	Data Time 0.001 (0.022)	Loss 3.4929 (3.1984)	Entropy 1.55531 (1.55917)	Top-1 acc 40.234 (47.688)	Top-5 acc 64.844 (71.064)	lr 0.02233
Train [26][1490/3239]	Time 0.320 (0.560)	Data Time 0.001 (0.022)	Loss 3.1657 (3.1982)	Entropy 1.55526 (1.55915)	Top-1 acc 52.344 (47.696)	Top-5 acc 72.656 (71.070)	lr 0.02232
Train [26][1500/3239]	Time 0.242 (0.559)	Data Time 0.001 (0.021)	Loss 3.0829 (3.1982)	Entropy 1.55522 (1.55912)	Top-1 acc 52.344 (47.700)	Top-5 acc 75.000 (71.070)	lr 0.02232
Train [26][1510/3239]	Time 0.207 (0.559)	Data Time 0.001 (0.021)	Loss 3.0520 (3.1979)	Entropy 1.55506 (1.55910)	Top-1 acc 51.172 (47.703)	Top-5 acc 76.172 (71.073)	lr 0.02232
Train [26][1520/3239]	Time 0.219 (0.558)	Data Time 0.001 (0.021)	Loss 3.1055 (3.1975)	Entropy 1.55505 (1.55907)	Top-1 acc 50.000 (47.709)	Top-5 acc 75.000 (71.085)	lr 0.02232
Train [26][1530/3239]	Time 0.217 (0.557)	Data Time 0.001 (0.021)	Loss 3.3076 (3.1974)	Entropy 1.55499 (1.55904)	Top-1 acc 46.484 (47.715)	Top-5 acc 71.094 (71.092)	lr 0.02232
Train [26][1540/3239]	Time 0.221 (0.556)	Data Time 0.001 (0.021)	Loss 3.0217 (3.1974)	Entropy 1.55490 (1.55902)	Top-1 acc 48.828 (47.716)	Top-5 acc 74.609 (71.092)	lr 0.02232
Train [26][1550/3239]	Time 2.552 (0.556)	Data Time 0.002 (0.021)	Loss 2.9913 (3.1972)	Entropy 1.55490 (1.55899)	Top-1 acc 55.859 (47.719)	Top-5 acc 76.562 (71.097)	lr 0.02232
Train [26][1560/3239]	Time 0.211 (0.554)	Data Time 0.001 (0.021)	Loss 3.1924 (3.1968)	Entropy 1.55488 (1.55896)	Top-1 acc 47.266 (47.720)	Top-5 acc 71.484 (71.101)	lr 0.02232
Train [26][1570/3239]	Time 0.195 (0.553)	Data Time 0.001 (0.021)	Loss 3.3308 (3.1968)	Entropy 1.55482 (1.55894)	Top-1 acc 46.484 (47.719)	Top-5 acc 67.578 (71.102)	lr 0.02232
Train [26][1580/3239]	Time 0.208 (0.552)	Data Time 0.001 (0.021)	Loss 3.1750 (3.1966)	Entropy 1.55478 (1.55891)	Top-1 acc 52.344 (47.731)	Top-5 acc 71.094 (71.104)	lr 0.02232
Train [26][1590/3239]	Time 0.223 (0.552)	Data Time 0.001 (0.020)	Loss 3.2733 (3.1963)	Entropy 1.55461 (1.55888)	Top-1 acc 45.703 (47.734)	Top-5 acc 67.578 (71.109)	lr 0.02232
Train [26][1600/3239]	Time 0.221 (0.551)	Data Time 0.001 (0.020)	Loss 3.1527 (3.1961)	Entropy 1.55453 (1.55886)	Top-1 acc 47.656 (47.741)	Top-5 acc 72.266 (71.108)	lr 0.02232
Train [26][1610/3239]	Time 0.249 (0.550)	Data Time 0.001 (0.020)	Loss 3.0406 (3.1961)	Entropy 1.55446 (1.55883)	Top-1 acc 50.391 (47.742)	Top-5 acc 75.781 (71.117)	lr 0.02232
Train [26][1620/3239]	Time 0.216 (0.550)	Data Time 0.001 (0.020)	Loss 3.2733 (3.1962)	Entropy 1.55444 (1.55880)	Top-1 acc 45.703 (47.741)	Top-5 acc 66.797 (71.113)	lr 0.02232
Train [26][1630/3239]	Time 0.216 (0.549)	Data Time 0.001 (0.020)	Loss 3.0766 (3.1960)	Entropy 1.55441 (1.55878)	Top-1 acc 48.828 (47.748)	Top-5 acc 71.484 (71.117)	lr 0.02232
Train [26][1640/3239]	Time 0.210 (0.549)	Data Time 0.001 (0.020)	Loss 3.0699 (3.1958)	Entropy 1.55430 (1.55875)	Top-1 acc 51.562 (47.750)	Top-5 acc 73.047 (71.120)	lr 0.02232
Train [26][1650/3239]	Time 0.243 (0.548)	Data Time 0.001 (0.020)	Loss 3.0570 (3.1956)	Entropy 1.55432 (1.55872)	Top-1 acc 50.781 (47.750)	Top-5 acc 75.000 (71.126)	lr 0.02231
Train [26][1660/3239]	Time 2.340 (0.547)	Data Time 0.001 (0.020)	Loss 3.0983 (3.1955)	Entropy 1.55432 (1.55870)	Top-1 acc 51.172 (47.752)	Top-5 acc 71.875 (71.126)	lr 0.02231
Train [26][1670/3239]	Time 0.202 (0.545)	Data Time 0.001 (0.019)	Loss 3.0369 (3.1959)	Entropy 1.55423 (1.55867)	Top-1 acc 51.562 (47.743)	Top-5 acc 76.953 (71.123)	lr 0.02231
Train [26][1680/3239]	Time 0.164 (0.545)	Data Time 0.001 (0.019)	Loss 3.1474 (3.1958)	Entropy 1.55435 (1.55864)	Top-1 acc 44.922 (47.735)	Top-5 acc 70.703 (71.127)	lr 0.02231
Train [26][1690/3239]	Time 0.216 (0.544)	Data Time 0.001 (0.019)	Loss 3.4139 (3.1960)	Entropy 1.55426 (1.55862)	Top-1 acc 39.453 (47.723)	Top-5 acc 67.578 (71.125)	lr 0.02231
Train [26][1700/3239]	Time 0.223 (0.543)	Data Time 0.001 (0.019)	Loss 3.1656 (3.1961)	Entropy 1.55421 (1.55859)	Top-1 acc 49.219 (47.728)	Top-5 acc 69.922 (71.120)	lr 0.02231
Train [26][1710/3239]	Time 0.207 (0.543)	Data Time 0.001 (0.019)	Loss 3.0163 (3.1963)	Entropy 1.55413 (1.55857)	Top-1 acc 49.219 (47.725)	Top-5 acc 75.000 (71.115)	lr 0.02231
Train [26][1720/3239]	Time 0.355 (0.542)	Data Time 0.001 (0.019)	Loss 3.2156 (3.1964)	Entropy 1.55417 (1.55854)	Top-1 acc 46.484 (47.719)	Top-5 acc 68.750 (71.109)	lr 0.02231
Train [26][1730/3239]	Time 0.224 (0.542)	Data Time 0.002 (0.019)	Loss 3.2633 (3.1961)	Entropy 1.55409 (1.55852)	Top-1 acc 46.484 (47.728)	Top-5 acc 69.922 (71.112)	lr 0.02231
Train [26][1740/3239]	Time 0.241 (0.541)	Data Time 0.002 (0.019)	Loss 3.1781 (3.1963)	Entropy 1.55402 (1.55849)	Top-1 acc 50.391 (47.729)	Top-5 acc 68.750 (71.106)	lr 0.02231
Train [26][1750/3239]	Time 0.168 (0.541)	Data Time 0.001 (0.019)	Loss 3.1626 (3.1966)	Entropy 1.55401 (1.55846)	Top-1 acc 47.656 (47.730)	Top-5 acc 69.531 (71.094)	lr 0.02231
Train [26][1760/3239]	Time 0.269 (0.564)	Data Time 0.004 (0.019)	Loss 3.2492 (3.1964)	Entropy 1.55397 (1.55844)	Top-1 acc 45.703 (47.736)	Top-5 acc 71.484 (71.094)	lr 0.02231
Train [26][1770/3239]	Time 2.474 (0.563)	Data Time 0.002 (0.019)	Loss 3.1613 (3.1961)	Entropy 1.55397 (1.55841)	Top-1 acc 46.094 (47.739)	Top-5 acc 70.312 (71.099)	lr 0.02231
Train [26][1780/3239]	Time 0.378 (0.561)	Data Time 0.002 (0.018)	Loss 3.2817 (3.1959)	Entropy 1.55386 (1.55839)	Top-1 acc 47.656 (47.745)	Top-5 acc 65.234 (71.104)	lr 0.02231
Train [26][1790/3239]	Time 0.176 (0.561)	Data Time 0.001 (0.018)	Loss 3.1712 (3.1962)	Entropy 1.55369 (1.55836)	Top-1 acc 49.609 (47.735)	Top-5 acc 73.828 (71.098)	lr 0.02231
Train [26][1800/3239]	Time 0.204 (0.560)	Data Time 0.001 (0.018)	Loss 3.2393 (3.1964)	Entropy 1.55360 (1.55834)	Top-1 acc 48.047 (47.730)	Top-5 acc 69.922 (71.095)	lr 0.02231
Train [26][1810/3239]	Time 0.214 (0.559)	Data Time 0.001 (0.018)	Loss 3.0624 (3.1964)	Entropy 1.55350 (1.55831)	Top-1 acc 51.562 (47.734)	Top-5 acc 73.828 (71.099)	lr 0.02230
Train [26][1820/3239]	Time 0.145 (0.559)	Data Time 0.001 (0.018)	Loss 3.2637 (3.1964)	Entropy 1.55346 (1.55828)	Top-1 acc 49.219 (47.738)	Top-5 acc 69.531 (71.098)	lr 0.02230
Train [26][1830/3239]	Time 0.231 (0.558)	Data Time 0.001 (0.018)	Loss 3.2052 (3.1964)	Entropy 1.55343 (1.55826)	Top-1 acc 48.047 (47.736)	Top-5 acc 72.266 (71.102)	lr 0.02230
Train [26][1840/3239]	Time 0.313 (0.558)	Data Time 0.001 (0.018)	Loss 3.1540 (3.1966)	Entropy 1.55337 (1.55823)	Top-1 acc 48.438 (47.733)	Top-5 acc 71.094 (71.097)	lr 0.02230
Train [26][1850/3239]	Time 0.222 (0.557)	Data Time 0.001 (0.018)	Loss 3.2718 (3.1967)	Entropy 1.55336 (1.55820)	Top-1 acc 46.094 (47.733)	Top-5 acc 72.266 (71.096)	lr 0.02230
Train [26][1860/3239]	Time 0.213 (0.556)	Data Time 0.001 (0.018)	Loss 3.1953 (3.1968)	Entropy 1.55325 (1.55818)	Top-1 acc 50.000 (47.733)	Top-5 acc 73.438 (71.096)	lr 0.02230
Train [26][1870/3239]	Time 0.234 (0.556)	Data Time 0.001 (0.018)	Loss 2.9282 (3.1966)	Entropy 1.55307 (1.55815)	Top-1 acc 51.172 (47.735)	Top-5 acc 76.172 (71.100)	lr 0.02230
Train [26][1880/3239]	Time 2.338 (0.555)	Data Time 0.001 (0.018)	Loss 2.9164 (3.1966)	Entropy 1.55307 (1.55812)	Top-1 acc 56.250 (47.735)	Top-5 acc 77.344 (71.099)	lr 0.02230
Train [26][1890/3239]	Time 0.226 (0.553)	Data Time 0.001 (0.018)	Loss 3.3368 (3.1964)	Entropy 1.55298 (1.55810)	Top-1 acc 42.188 (47.742)	Top-5 acc 67.188 (71.101)	lr 0.02230
Train [26][1900/3239]	Time 0.230 (0.553)	Data Time 0.001 (0.017)	Loss 3.2428 (3.1966)	Entropy 1.55295 (1.55807)	Top-1 acc 50.781 (47.741)	Top-5 acc 70.703 (71.100)	lr 0.02230
Train [26][1910/3239]	Time 0.221 (0.552)	Data Time 0.001 (0.017)	Loss 2.9882 (3.1965)	Entropy 1.55313 (1.55804)	Top-1 acc 51.953 (47.742)	Top-5 acc 74.609 (71.102)	lr 0.02230
Train [26][1920/3239]	Time 0.203 (0.552)	Data Time 0.001 (0.017)	Loss 3.2276 (3.1964)	Entropy 1.55313 (1.55802)	Top-1 acc 46.484 (47.746)	Top-5 acc 68.750 (71.106)	lr 0.02230
Train [26][1930/3239]	Time 0.237 (0.551)	Data Time 0.001 (0.017)	Loss 3.1452 (3.1962)	Entropy 1.55310 (1.55799)	Top-1 acc 48.438 (47.747)	Top-5 acc 72.266 (71.109)	lr 0.02230
Train [26][1940/3239]	Time 0.237 (0.551)	Data Time 0.001 (0.017)	Loss 3.1279 (3.1962)	Entropy 1.55306 (1.55797)	Top-1 acc 50.000 (47.746)	Top-5 acc 73.047 (71.108)	lr 0.02230
Train [26][1950/3239]	Time 0.245 (0.550)	Data Time 0.001 (0.017)	Loss 3.6691 (3.1964)	Entropy 1.55291 (1.55794)	Top-1 acc 36.719 (47.738)	Top-5 acc 63.672 (71.102)	lr 0.02230
Train [26][1960/3239]	Time 0.305 (0.549)	Data Time 0.001 (0.017)	Loss 3.2859 (3.1965)	Entropy 1.55281 (1.55792)	Top-1 acc 46.094 (47.741)	Top-5 acc 67.969 (71.099)	lr 0.02230
Train [26][1970/3239]	Time 0.212 (0.549)	Data Time 0.001 (0.017)	Loss 3.2378 (3.1970)	Entropy 1.55279 (1.55789)	Top-1 acc 47.656 (47.732)	Top-5 acc 71.875 (71.090)	lr 0.02229
Train [26][1980/3239]	Time 0.240 (0.548)	Data Time 0.001 (0.017)	Loss 3.2054 (3.1968)	Entropy 1.55274 (1.55786)	Top-1 acc 49.609 (47.730)	Top-5 acc 69.922 (71.095)	lr 0.02229
Train [26][1990/3239]	Time 2.480 (0.548)	Data Time 0.001 (0.017)	Loss 3.2796 (3.1968)	Entropy 1.55274 (1.55784)	Top-1 acc 45.703 (47.730)	Top-5 acc 69.531 (71.096)	lr 0.02229
Train [26][2000/3239]	Time 0.253 (0.546)	Data Time 0.001 (0.017)	Loss 3.4190 (3.1969)	Entropy 1.55266 (1.55781)	Top-1 acc 45.312 (47.723)	Top-5 acc 64.062 (71.093)	lr 0.02229
Train [26][2010/3239]	Time 0.237 (0.546)	Data Time 0.001 (0.017)	Loss 3.5260 (3.1970)	Entropy 1.55256 (1.55779)	Top-1 acc 43.359 (47.726)	Top-5 acc 63.281 (71.092)	lr 0.02229
Train [26][2020/3239]	Time 0.317 (0.545)	Data Time 0.001 (0.016)	Loss 3.2698 (3.1970)	Entropy 1.55257 (1.55776)	Top-1 acc 49.609 (47.729)	Top-5 acc 66.797 (71.090)	lr 0.02229
Train [26][2030/3239]	Time 0.221 (0.545)	Data Time 0.001 (0.016)	Loss 3.2479 (3.1971)	Entropy 1.55256 (1.55774)	Top-1 acc 45.312 (47.728)	Top-5 acc 70.312 (71.090)	lr 0.02229
Train [26][2040/3239]	Time 0.221 (0.544)	Data Time 0.001 (0.016)	Loss 3.1495 (3.1968)	Entropy 1.55259 (1.55771)	Top-1 acc 50.781 (47.732)	Top-5 acc 70.703 (71.096)	lr 0.02229
Train [26][2050/3239]	Time 0.224 (0.544)	Data Time 0.001 (0.016)	Loss 3.0678 (3.1965)	Entropy 1.55248 (1.55768)	Top-1 acc 51.172 (47.736)	Top-5 acc 74.219 (71.106)	lr 0.02229
Train [26][2060/3239]	Time 0.243 (0.544)	Data Time 0.001 (0.016)	Loss 3.2088 (3.1964)	Entropy 1.55242 (1.55766)	Top-1 acc 47.656 (47.740)	Top-5 acc 73.828 (71.110)	lr 0.02229
Train [26][2070/3239]	Time 0.209 (0.543)	Data Time 0.001 (0.016)	Loss 3.1612 (3.1965)	Entropy 1.55225 (1.55763)	Top-1 acc 49.219 (47.736)	Top-5 acc 70.703 (71.106)	lr 0.02229
Train [26][2080/3239]	Time 0.240 (0.543)	Data Time 0.001 (0.016)	Loss 3.0397 (3.1967)	Entropy 1.55220 (1.55761)	Top-1 acc 50.391 (47.735)	Top-5 acc 75.781 (71.105)	lr 0.02229
Train [26][2090/3239]	Time 0.206 (0.542)	Data Time 0.001 (0.016)	Loss 3.2515 (3.1966)	Entropy 1.55220 (1.55758)	Top-1 acc 48.047 (47.737)	Top-5 acc 70.312 (71.110)	lr 0.02229
Train [26][2100/3239]	Time 2.318 (0.542)	Data Time 0.001 (0.016)	Loss 3.2662 (3.1967)	Entropy 1.55220 (1.55756)	Top-1 acc 45.703 (47.733)	Top-5 acc 67.578 (71.103)	lr 0.02229
Train [26][2110/3239]	Time 0.245 (0.540)	Data Time 0.001 (0.016)	Loss 3.0933 (3.1968)	Entropy 1.55213 (1.55753)	Top-1 acc 48.828 (47.729)	Top-5 acc 71.094 (71.099)	lr 0.02229
Train [26][2120/3239]	Time 0.228 (0.540)	Data Time 0.001 (0.016)	Loss 3.0974 (3.1969)	Entropy 1.55204 (1.55751)	Top-1 acc 51.172 (47.727)	Top-5 acc 74.609 (71.101)	lr 0.02229
Train [26][2130/3239]	Time 0.289 (0.558)	Data Time 0.003 (0.016)	Loss 2.9690 (3.1968)	Entropy 1.55197 (1.55748)	Top-1 acc 51.172 (47.727)	Top-5 acc 75.391 (71.108)	lr 0.02228
Train [26][2140/3239]	Time 0.301 (0.558)	Data Time 0.002 (0.016)	Loss 3.2135 (3.1971)	Entropy 1.55191 (1.55745)	Top-1 acc 44.531 (47.721)	Top-5 acc 69.141 (71.102)	lr 0.02228
Train [26][2150/3239]	Time 0.217 (0.557)	Data Time 0.001 (0.016)	Loss 3.1925 (3.1973)	Entropy 1.55190 (1.55743)	Top-1 acc 46.875 (47.723)	Top-5 acc 68.750 (71.098)	lr 0.02228
Train [26][2160/3239]	Time 0.222 (0.557)	Data Time 0.001 (0.016)	Loss 3.2228 (3.1974)	Entropy 1.55177 (1.55740)	Top-1 acc 49.609 (47.723)	Top-5 acc 68.750 (71.095)	lr 0.02228
Train [26][2170/3239]	Time 0.243 (0.556)	Data Time 0.002 (0.016)	Loss 3.0351 (3.1973)	Entropy 1.55151 (1.55738)	Top-1 acc 49.609 (47.730)	Top-5 acc 74.219 (71.097)	lr 0.02228
Train [26][2180/3239]	Time 0.216 (0.555)	Data Time 0.001 (0.015)	Loss 3.3261 (3.1973)	Entropy 1.55147 (1.55735)	Top-1 acc 46.875 (47.731)	Top-5 acc 67.969 (71.098)	lr 0.02228
Train [26][2190/3239]	Time 0.235 (0.555)	Data Time 0.001 (0.015)	Loss 3.2425 (3.1973)	Entropy 1.55142 (1.55732)	Top-1 acc 46.484 (47.737)	Top-5 acc 69.531 (71.099)	lr 0.02228
Train [26][2200/3239]	Time 0.349 (0.555)	Data Time 0.001 (0.015)	Loss 3.3390 (3.1973)	Entropy 1.55142 (1.55729)	Top-1 acc 43.359 (47.740)	Top-5 acc 70.312 (71.101)	lr 0.02228
Train [26][2210/3239]	Time 2.359 (0.554)	Data Time 0.001 (0.015)	Loss 3.2776 (3.1972)	Entropy 1.55142 (1.55727)	Top-1 acc 43.359 (47.740)	Top-5 acc 69.531 (71.107)	lr 0.02228
Train [26][2220/3239]	Time 0.286 (0.553)	Data Time 0.001 (0.015)	Loss 3.2473 (3.1971)	Entropy 1.55136 (1.55724)	Top-1 acc 45.312 (47.742)	Top-5 acc 69.141 (71.107)	lr 0.02228
Train [26][2230/3239]	Time 0.204 (0.552)	Data Time 0.001 (0.015)	Loss 3.1760 (3.1973)	Entropy 1.55127 (1.55721)	Top-1 acc 46.875 (47.740)	Top-5 acc 73.828 (71.105)	lr 0.02228
Train [26][2240/3239]	Time 0.235 (0.552)	Data Time 0.001 (0.015)	Loss 3.0195 (3.1973)	Entropy 1.55115 (1.55719)	Top-1 acc 48.438 (47.740)	Top-5 acc 76.953 (71.104)	lr 0.02228
Train [26][2250/3239]	Time 0.228 (0.551)	Data Time 0.001 (0.015)	Loss 3.4442 (3.1974)	Entropy 1.55131 (1.55716)	Top-1 acc 43.359 (47.744)	Top-5 acc 66.016 (71.102)	lr 0.02228
Train [26][2260/3239]	Time 0.267 (0.551)	Data Time 0.001 (0.015)	Loss 3.1720 (3.1974)	Entropy 1.55115 (1.55714)	Top-1 acc 48.438 (47.747)	Top-5 acc 69.531 (71.102)	lr 0.02228
Train [26][2270/3239]	Time 0.241 (0.550)	Data Time 0.001 (0.015)	Loss 3.2650 (3.1973)	Entropy 1.55102 (1.55711)	Top-1 acc 46.875 (47.747)	Top-5 acc 70.312 (71.106)	lr 0.02228
Train [26][2280/3239]	Time 0.226 (0.550)	Data Time 0.001 (0.015)	Loss 3.3016 (3.1975)	Entropy 1.55098 (1.55708)	Top-1 acc 44.922 (47.741)	Top-5 acc 67.188 (71.105)	lr 0.02228
Train [26][2290/3239]	Time 0.221 (0.549)	Data Time 0.002 (0.015)	Loss 2.9665 (3.1973)	Entropy 1.55083 (1.55705)	Top-1 acc 48.828 (47.745)	Top-5 acc 76.953 (71.109)	lr 0.02227
Train [26][2300/3239]	Time 0.233 (0.549)	Data Time 0.001 (0.015)	Loss 3.0808 (3.1976)	Entropy 1.55076 (1.55703)	Top-1 acc 51.172 (47.739)	Top-5 acc 73.438 (71.107)	lr 0.02227
Train [26][2310/3239]	Time 0.203 (0.548)	Data Time 0.001 (0.015)	Loss 3.2731 (3.1975)	Entropy 1.55069 (1.55700)	Top-1 acc 45.312 (47.748)	Top-5 acc 70.703 (71.111)	lr 0.02227
Train [26][2320/3239]	Time 2.613 (0.548)	Data Time 0.001 (0.015)	Loss 3.2129 (3.1974)	Entropy 1.55069 (1.55697)	Top-1 acc 48.438 (47.748)	Top-5 acc 68.359 (71.115)	lr 0.02227
Train [26][2330/3239]	Time 0.206 (0.547)	Data Time 0.001 (0.015)	Loss 3.0594 (3.1973)	Entropy 1.55070 (1.55695)	Top-1 acc 50.000 (47.755)	Top-5 acc 75.781 (71.118)	lr 0.02227
Train [26][2340/3239]	Time 0.216 (0.546)	Data Time 0.001 (0.015)	Loss 3.2705 (3.1973)	Entropy 1.55067 (1.55692)	Top-1 acc 48.438 (47.752)	Top-5 acc 67.969 (71.115)	lr 0.02227
Train [26][2350/3239]	Time 0.235 (0.546)	Data Time 0.001 (0.014)	Loss 3.2204 (3.1972)	Entropy 1.55056 (1.55689)	Top-1 acc 49.219 (47.754)	Top-5 acc 68.750 (71.121)	lr 0.02227
Train [26][2360/3239]	Time 0.206 (0.545)	Data Time 0.001 (0.014)	Loss 3.1005 (3.1972)	Entropy 1.55048 (1.55687)	Top-1 acc 49.609 (47.756)	Top-5 acc 78.125 (71.121)	lr 0.02227
Train [26][2370/3239]	Time 0.214 (0.545)	Data Time 0.001 (0.014)	Loss 3.4249 (3.1975)	Entropy 1.55050 (1.55684)	Top-1 acc 42.969 (47.748)	Top-5 acc 67.578 (71.113)	lr 0.02227
Train [26][2380/3239]	Time 0.321 (0.545)	Data Time 0.001 (0.014)	Loss 3.1562 (3.1976)	Entropy 1.55048 (1.55681)	Top-1 acc 44.922 (47.743)	Top-5 acc 73.828 (71.111)	lr 0.02227
Train [26][2390/3239]	Time 0.197 (0.544)	Data Time 0.002 (0.014)	Loss 3.2111 (3.1975)	Entropy 1.55049 (1.55679)	Top-1 acc 45.703 (47.746)	Top-5 acc 70.703 (71.110)	lr 0.02227
Train [26][2400/3239]	Time 0.211 (0.544)	Data Time 0.001 (0.014)	Loss 3.2624 (3.1976)	Entropy 1.55038 (1.55676)	Top-1 acc 44.141 (47.743)	Top-5 acc 69.531 (71.107)	lr 0.02227
Train [26][2410/3239]	Time 0.256 (0.543)	Data Time 0.001 (0.014)	Loss 3.1489 (3.1974)	Entropy 1.55037 (1.55673)	Top-1 acc 46.875 (47.746)	Top-5 acc 73.438 (71.116)	lr 0.02227
Train [26][2420/3239]	Time 0.256 (0.543)	Data Time 0.001 (0.014)	Loss 3.1264 (3.1976)	Entropy 1.55030 (1.55671)	Top-1 acc 50.000 (47.742)	Top-5 acc 72.656 (71.110)	lr 0.02227
Train [26][2430/3239]	Time 2.334 (0.543)	Data Time 0.001 (0.014)	Loss 2.9715 (3.1975)	Entropy 1.55030 (1.55668)	Top-1 acc 53.125 (47.745)	Top-5 acc 76.172 (71.111)	lr 0.02227
Train [26][2440/3239]	Time 0.216 (0.541)	Data Time 0.001 (0.014)	Loss 3.1372 (3.1974)	Entropy 1.55021 (1.55665)	Top-1 acc 49.609 (47.750)	Top-5 acc 71.094 (71.107)	lr 0.02226
Train [26][2450/3239]	Time 0.244 (0.541)	Data Time 0.001 (0.014)	Loss 3.1754 (3.1976)	Entropy 1.55006 (1.55663)	Top-1 acc 45.703 (47.743)	Top-5 acc 73.438 (71.106)	lr 0.02226
Train [26][2460/3239]	Time 0.232 (0.540)	Data Time 0.002 (0.014)	Loss 3.0355 (3.1976)	Entropy 1.55003 (1.55660)	Top-1 acc 50.391 (47.743)	Top-5 acc 74.219 (71.101)	lr 0.02226
Train [26][2470/3239]	Time 0.228 (0.540)	Data Time 0.001 (0.014)	Loss 3.2088 (3.1977)	Entropy 1.55000 (1.55657)	Top-1 acc 46.484 (47.736)	Top-5 acc 73.047 (71.098)	lr 0.02226
Train [26][2480/3239]	Time 0.241 (0.540)	Data Time 0.002 (0.014)	Loss 3.1064 (3.1976)	Entropy 1.54987 (1.55655)	Top-1 acc 50.391 (47.740)	Top-5 acc 73.828 (71.101)	lr 0.02226
Train [26][2490/3239]	Time 0.313 (0.557)	Data Time 0.002 (0.014)	Loss 3.2037 (3.1976)	Entropy 1.54981 (1.55652)	Top-1 acc 45.703 (47.744)	Top-5 acc 74.609 (71.103)	lr 0.02226
Train [26][2500/3239]	Time 0.220 (0.557)	Data Time 0.002 (0.014)	Loss 3.1412 (3.1977)	Entropy 1.54974 (1.55649)	Top-1 acc 51.172 (47.748)	Top-5 acc 72.656 (71.096)	lr 0.02226
Train [26][2510/3239]	Time 0.227 (0.557)	Data Time 0.001 (0.014)	Loss 2.9969 (3.1977)	Entropy 1.54972 (1.55647)	Top-1 acc 53.125 (47.745)	Top-5 acc 77.344 (71.098)	lr 0.02226
Train [26][2520/3239]	Time 0.168 (0.556)	Data Time 0.001 (0.014)	Loss 3.1408 (3.1974)	Entropy 1.54961 (1.55644)	Top-1 acc 46.094 (47.752)	Top-5 acc 69.922 (71.102)	lr 0.02226
Train [26][2530/3239]	Time 0.219 (0.556)	Data Time 0.001 (0.014)	Loss 3.0372 (3.1971)	Entropy 1.54959 (1.55641)	Top-1 acc 49.609 (47.757)	Top-5 acc 73.438 (71.108)	lr 0.02226
Train [26][2540/3239]	Time 2.315 (0.555)	Data Time 0.001 (0.014)	Loss 3.3100 (3.1973)	Entropy 1.54959 (1.55639)	Top-1 acc 46.094 (47.750)	Top-5 acc 66.797 (71.103)	lr 0.02226
Train [26][2550/3239]	Time 0.326 (0.554)	Data Time 0.001 (0.013)	Loss 3.2298 (3.1975)	Entropy 1.54944 (1.55636)	Top-1 acc 48.047 (47.746)	Top-5 acc 69.531 (71.099)	lr 0.02226
Train [26][2560/3239]	Time 0.225 (0.554)	Data Time 0.001 (0.013)	Loss 3.6690 (3.1976)	Entropy 1.54936 (1.55633)	Top-1 acc 40.234 (47.747)	Top-5 acc 66.016 (71.099)	lr 0.02226
Train [26][2570/3239]	Time 0.231 (0.553)	Data Time 0.001 (0.013)	Loss 3.0999 (3.1977)	Entropy 1.54934 (1.55630)	Top-1 acc 47.266 (47.741)	Top-5 acc 73.828 (71.095)	lr 0.02226
Train [26][2580/3239]	Time 0.210 (0.553)	Data Time 0.001 (0.013)	Loss 3.1553 (3.1978)	Entropy 1.54933 (1.55628)	Top-1 acc 46.484 (47.741)	Top-5 acc 71.484 (71.092)	lr 0.02226
Train [26][2590/3239]	Time 0.217 (0.552)	Data Time 0.002 (0.013)	Loss 3.3838 (3.1975)	Entropy 1.54920 (1.55625)	Top-1 acc 44.531 (47.744)	Top-5 acc 69.141 (71.096)	lr 0.02226
Train [26][2600/3239]	Time 0.209 (0.552)	Data Time 0.001 (0.013)	Loss 3.2010 (3.1974)	Entropy 1.54916 (1.55622)	Top-1 acc 48.047 (47.746)	Top-5 acc 72.266 (71.099)	lr 0.02225
Train [26][2610/3239]	Time 0.245 (0.551)	Data Time 0.001 (0.013)	Loss 3.1793 (3.1978)	Entropy 1.54914 (1.55620)	Top-1 acc 45.703 (47.737)	Top-5 acc 69.922 (71.095)	lr 0.02225
Train [26][2620/3239]	Time 0.235 (0.551)	Data Time 0.001 (0.013)	Loss 3.1629 (3.1977)	Entropy 1.54910 (1.55617)	Top-1 acc 48.438 (47.742)	Top-5 acc 70.703 (71.096)	lr 0.02225
Train [26][2630/3239]	Time 0.216 (0.551)	Data Time 0.001 (0.013)	Loss 3.3113 (3.1977)	Entropy 1.54898 (1.55614)	Top-1 acc 45.703 (47.744)	Top-5 acc 68.750 (71.097)	lr 0.02225
Train [26][2640/3239]	Time 0.223 (0.550)	Data Time 0.001 (0.013)	Loss 3.1325 (3.1978)	Entropy 1.54892 (1.55611)	Top-1 acc 51.953 (47.748)	Top-5 acc 75.000 (71.097)	lr 0.02225
Train [26][2650/3239]	Time 0.231 (0.550)	Data Time 0.002 (0.013)	Loss 3.1537 (3.1981)	Entropy 1.54874 (1.55609)	Top-1 acc 48.438 (47.738)	Top-5 acc 69.922 (71.093)	lr 0.02225
Train [26][2660/3239]	Time 0.285 (0.550)	Data Time 0.001 (0.013)	Loss 3.2797 (3.1981)	Entropy 1.54870 (1.55606)	Top-1 acc 43.750 (47.738)	Top-5 acc 68.359 (71.090)	lr 0.02225
Train [26][2670/3239]	Time 0.214 (0.549)	Data Time 0.001 (0.013)	Loss 3.4339 (3.1982)	Entropy 1.54868 (1.55603)	Top-1 acc 43.359 (47.739)	Top-5 acc 65.234 (71.085)	lr 0.02225
Train [26][2680/3239]	Time 0.208 (0.549)	Data Time 0.001 (0.013)	Loss 3.2115 (3.1983)	Entropy 1.54861 (1.55600)	Top-1 acc 42.578 (47.731)	Top-5 acc 70.312 (71.083)	lr 0.02225
Train [26][2690/3239]	Time 0.241 (0.548)	Data Time 0.001 (0.013)	Loss 3.3390 (3.1985)	Entropy 1.54838 (1.55598)	Top-1 acc 42.969 (47.719)	Top-5 acc 67.969 (71.078)	lr 0.02225
Train [26][2700/3239]	Time 0.240 (0.548)	Data Time 0.001 (0.013)	Loss 3.0027 (3.1986)	Entropy 1.54838 (1.55595)	Top-1 acc 52.734 (47.720)	Top-5 acc 75.391 (71.078)	lr 0.02225
Train [26][2710/3239]	Time 0.218 (0.548)	Data Time 0.001 (0.013)	Loss 3.0534 (3.1985)	Entropy 1.54836 (1.55592)	Top-1 acc 50.391 (47.721)	Top-5 acc 74.219 (71.079)	lr 0.02225
Train [26][2720/3239]	Time 0.253 (0.547)	Data Time 0.001 (0.013)	Loss 3.3188 (3.1989)	Entropy 1.54829 (1.55589)	Top-1 acc 43.750 (47.713)	Top-5 acc 68.750 (71.071)	lr 0.02225
Train [26][2730/3239]	Time 0.203 (0.547)	Data Time 0.001 (0.013)	Loss 3.3475 (3.1990)	Entropy 1.54825 (1.55586)	Top-1 acc 48.047 (47.716)	Top-5 acc 67.969 (71.067)	lr 0.02225
Train [26][2740/3239]	Time 0.235 (0.546)	Data Time 0.001 (0.013)	Loss 3.1073 (3.1990)	Entropy 1.54815 (1.55584)	Top-1 acc 53.125 (47.718)	Top-5 acc 71.875 (71.066)	lr 0.02225
Train [26][2750/3239]	Time 0.254 (0.546)	Data Time 0.001 (0.013)	Loss 3.2801 (3.1991)	Entropy 1.54812 (1.55581)	Top-1 acc 42.578 (47.713)	Top-5 acc 71.094 (71.062)	lr 0.02225
Train [26][2760/3239]	Time 0.152 (0.546)	Data Time 0.002 (0.013)	Loss 3.4148 (3.1992)	Entropy 1.54811 (1.55578)	Top-1 acc 39.844 (47.712)	Top-5 acc 65.625 (71.061)	lr 0.02224
Train [26][2770/3239]	Time 0.283 (0.545)	Data Time 0.001 (0.013)	Loss 3.2597 (3.1993)	Entropy 1.54776 (1.55575)	Top-1 acc 48.047 (47.707)	Top-5 acc 72.266 (71.060)	lr 0.02224
Train [26][2780/3239]	Time 0.372 (0.545)	Data Time 0.001 (0.013)	Loss 3.0509 (3.1993)	Entropy 1.54771 (1.55572)	Top-1 acc 50.000 (47.706)	Top-5 acc 75.781 (71.063)	lr 0.02224
Train [26][2790/3239]	Time 0.258 (0.545)	Data Time 0.001 (0.012)	Loss 3.0961 (3.1993)	Entropy 1.54756 (1.55570)	Top-1 acc 48.047 (47.706)	Top-5 acc 74.219 (71.065)	lr 0.02224
Train [26][2800/3239]	Time 0.220 (0.544)	Data Time 0.001 (0.012)	Loss 3.1830 (3.1992)	Entropy 1.54747 (1.55567)	Top-1 acc 49.609 (47.709)	Top-5 acc 72.656 (71.072)	lr 0.02224
Train [26][2810/3239]	Time 0.228 (0.544)	Data Time 0.001 (0.012)	Loss 3.2822 (3.1991)	Entropy 1.54745 (1.55564)	Top-1 acc 48.828 (47.715)	Top-5 acc 71.484 (71.075)	lr 0.02224
Train [26][2820/3239]	Time 0.212 (0.543)	Data Time 0.001 (0.012)	Loss 3.2705 (3.1990)	Entropy 1.54735 (1.55561)	Top-1 acc 42.578 (47.717)	Top-5 acc 67.578 (71.078)	lr 0.02224
Train [26][2830/3239]	Time 0.243 (0.558)	Data Time 0.003 (0.012)	Loss 2.9634 (3.1989)	Entropy 1.54735 (1.55558)	Top-1 acc 52.734 (47.721)	Top-5 acc 75.391 (71.078)	lr 0.02224
Train [26][2840/3239]	Time 0.394 (0.558)	Data Time 0.002 (0.012)	Loss 3.2771 (3.1988)	Entropy 1.54726 (1.55555)	Top-1 acc 44.141 (47.721)	Top-5 acc 67.969 (71.079)	lr 0.02224
Train [26][2850/3239]	Time 0.264 (0.558)	Data Time 0.004 (0.012)	Loss 3.1005 (3.1988)	Entropy 1.54717 (1.55552)	Top-1 acc 48.438 (47.722)	Top-5 acc 75.781 (71.079)	lr 0.02224
Train [26][2860/3239]	Time 0.254 (0.557)	Data Time 0.002 (0.012)	Loss 3.0164 (3.1986)	Entropy 1.54707 (1.55549)	Top-1 acc 52.344 (47.732)	Top-5 acc 74.609 (71.084)	lr 0.02224
Train [26][2870/3239]	Time 0.224 (0.557)	Data Time 0.002 (0.012)	Loss 3.1022 (3.1985)	Entropy 1.54703 (1.55546)	Top-1 acc 52.344 (47.732)	Top-5 acc 72.266 (71.085)	lr 0.02224
Train [26][2880/3239]	Time 0.239 (0.557)	Data Time 0.001 (0.012)	Loss 3.0749 (3.1984)	Entropy 1.54693 (1.55543)	Top-1 acc 51.172 (47.733)	Top-5 acc 73.828 (71.089)	lr 0.02224
Train [26][2890/3239]	Time 0.271 (0.556)	Data Time 0.001 (0.012)	Loss 3.2818 (3.1984)	Entropy 1.54685 (1.55540)	Top-1 acc 44.531 (47.734)	Top-5 acc 70.312 (71.089)	lr 0.02224
Train [26][2900/3239]	Time 0.326 (0.556)	Data Time 0.003 (0.012)	Loss 3.3504 (3.1985)	Entropy 1.54690 (1.55537)	Top-1 acc 46.484 (47.733)	Top-5 acc 67.969 (71.089)	lr 0.02224
Train [26][2910/3239]	Time 0.220 (0.555)	Data Time 0.001 (0.012)	Loss 3.2882 (3.1984)	Entropy 1.54678 (1.55535)	Top-1 acc 43.750 (47.736)	Top-5 acc 66.406 (71.095)	lr 0.02224
Train [26][2920/3239]	Time 0.295 (0.555)	Data Time 0.001 (0.012)	Loss 3.2986 (3.1984)	Entropy 1.54676 (1.55532)	Top-1 acc 48.438 (47.735)	Top-5 acc 69.922 (71.092)	lr 0.02223
Train [26][2930/3239]	Time 0.234 (0.555)	Data Time 0.001 (0.012)	Loss 3.1687 (3.1985)	Entropy 1.54668 (1.55529)	Top-1 acc 50.391 (47.736)	Top-5 acc 71.875 (71.092)	lr 0.02223
Train [26][2940/3239]	Time 0.196 (0.554)	Data Time 0.001 (0.012)	Loss 3.3706 (3.1987)	Entropy 1.54654 (1.55526)	Top-1 acc 42.969 (47.728)	Top-5 acc 68.750 (71.089)	lr 0.02223
Train [26][2950/3239]	Time 0.272 (0.554)	Data Time 0.009 (0.012)	Loss 3.0071 (3.1988)	Entropy 1.54645 (1.55523)	Top-1 acc 53.125 (47.724)	Top-5 acc 74.219 (71.087)	lr 0.02223
Train [26][2960/3239]	Time 0.193 (0.554)	Data Time 0.001 (0.012)	Loss 3.2455 (3.1989)	Entropy 1.54644 (1.55520)	Top-1 acc 45.703 (47.724)	Top-5 acc 69.141 (71.087)	lr 0.02223
Train [26][2970/3239]	Time 0.204 (0.553)	Data Time 0.001 (0.012)	Loss 3.0009 (3.1987)	Entropy 1.54639 (1.55517)	Top-1 acc 51.172 (47.730)	Top-5 acc 76.172 (71.092)	lr 0.02223
Train [26][2980/3239]	Time 0.258 (0.553)	Data Time 0.001 (0.012)	Loss 3.1633 (3.1986)	Entropy 1.54629 (1.55514)	Top-1 acc 49.609 (47.734)	Top-5 acc 74.219 (71.095)	lr 0.02223
Train [26][2990/3239]	Time 0.251 (0.552)	Data Time 0.001 (0.012)	Loss 3.4171 (3.1987)	Entropy 1.54623 (1.55511)	Top-1 acc 42.578 (47.733)	Top-5 acc 65.625 (71.095)	lr 0.02223
Train [26][3000/3239]	Time 0.271 (0.552)	Data Time 0.001 (0.012)	Loss 3.3754 (3.1989)	Entropy 1.54621 (1.55508)	Top-1 acc 45.703 (47.731)	Top-5 acc 66.797 (71.090)	lr 0.02223
Train [26][3010/3239]	Time 0.394 (0.552)	Data Time 0.001 (0.012)	Loss 3.1849 (3.1990)	Entropy 1.54591 (1.55505)	Top-1 acc 46.094 (47.727)	Top-5 acc 71.484 (71.089)	lr 0.02223
Train [26][3020/3239]	Time 0.249 (0.551)	Data Time 0.002 (0.012)	Loss 3.2193 (3.1989)	Entropy 1.54581 (1.55502)	Top-1 acc 48.438 (47.728)	Top-5 acc 73.438 (71.093)	lr 0.02223
Train [26][3030/3239]	Time 0.249 (0.551)	Data Time 0.001 (0.012)	Loss 3.3834 (3.1990)	Entropy 1.54577 (1.55499)	Top-1 acc 47.656 (47.726)	Top-5 acc 67.578 (71.093)	lr 0.02223
Train [26][3040/3239]	Time 0.206 (0.551)	Data Time 0.001 (0.012)	Loss 3.1299 (3.1989)	Entropy 1.54572 (1.55496)	Top-1 acc 50.000 (47.730)	Top-5 acc 70.703 (71.093)	lr 0.02223
Train [26][3050/3239]	Time 0.222 (0.550)	Data Time 0.001 (0.012)	Loss 3.3258 (3.1991)	Entropy 1.54565 (1.55493)	Top-1 acc 48.438 (47.726)	Top-5 acc 66.797 (71.088)	lr 0.02223
Train [26][3060/3239]	Time 0.208 (0.550)	Data Time 0.001 (0.012)	Loss 3.1262 (3.1990)	Entropy 1.54545 (1.55490)	Top-1 acc 50.391 (47.727)	Top-5 acc 74.219 (71.094)	lr 0.02223
Train [26][3070/3239]	Time 0.240 (0.550)	Data Time 0.001 (0.012)	Loss 3.0580 (3.1991)	Entropy 1.54537 (1.55487)	Top-1 acc 48.047 (47.723)	Top-5 acc 75.000 (71.091)	lr 0.02223
Train [26][3080/3239]	Time 0.221 (0.549)	Data Time 0.001 (0.011)	Loss 3.1241 (3.1991)	Entropy 1.54522 (1.55484)	Top-1 acc 50.000 (47.721)	Top-5 acc 73.828 (71.091)	lr 0.02222
Train [26][3090/3239]	Time 0.199 (0.549)	Data Time 0.001 (0.011)	Loss 3.1664 (3.1992)	Entropy 1.54516 (1.55481)	Top-1 acc 50.000 (47.718)	Top-5 acc 72.656 (71.088)	lr 0.02222
Train [26][3100/3239]	Time 0.254 (0.548)	Data Time 0.001 (0.011)	Loss 3.1497 (3.1990)	Entropy 1.54503 (1.55477)	Top-1 acc 48.438 (47.719)	Top-5 acc 72.266 (71.091)	lr 0.02222
Train [26][3110/3239]	Time 0.198 (0.548)	Data Time 0.001 (0.011)	Loss 3.0998 (3.1991)	Entropy 1.54501 (1.55474)	Top-1 acc 48.828 (47.721)	Top-5 acc 71.484 (71.091)	lr 0.02222
Train [26][3120/3239]	Time 0.251 (0.548)	Data Time 0.001 (0.011)	Loss 3.1656 (3.1989)	Entropy 1.54498 (1.55471)	Top-1 acc 48.438 (47.724)	Top-5 acc 73.047 (71.094)	lr 0.02222
Train [26][3130/3239]	Time 0.313 (0.547)	Data Time 0.001 (0.011)	Loss 3.2380 (3.1990)	Entropy 1.54490 (1.55468)	Top-1 acc 48.047 (47.722)	Top-5 acc 70.703 (71.094)	lr 0.02222
Train [26][3140/3239]	Time 0.217 (0.547)	Data Time 0.001 (0.011)	Loss 3.1412 (3.1990)	Entropy 1.54480 (1.55465)	Top-1 acc 47.656 (47.723)	Top-5 acc 72.656 (71.097)	lr 0.02222
Train [26][3150/3239]	Time 0.244 (0.547)	Data Time 0.001 (0.011)	Loss 3.2548 (3.1990)	Entropy 1.54478 (1.55462)	Top-1 acc 46.484 (47.724)	Top-5 acc 71.094 (71.097)	lr 0.02222
Train [26][3160/3239]	Time 0.424 (0.559)	Data Time 0.004 (0.011)	Loss 2.9384 (3.1988)	Entropy 1.54474 (1.55459)	Top-1 acc 52.344 (47.727)	Top-5 acc 75.391 (71.101)	lr 0.02222
Train [26][3170/3239]	Time 0.228 (0.559)	Data Time 0.002 (0.011)	Loss 3.2526 (3.1988)	Entropy 1.54459 (1.55456)	Top-1 acc 42.969 (47.725)	Top-5 acc 71.484 (71.102)	lr 0.02222
Train [26][3180/3239]	Time 0.282 (0.558)	Data Time 0.000 (0.011)	Loss 3.0474 (3.1988)	Entropy 1.54448 (1.55453)	Top-1 acc 53.516 (47.726)	Top-5 acc 75.391 (71.100)	lr 0.02222
Train [26][3190/3239]	Time 0.222 (0.558)	Data Time 0.000 (0.011)	Loss 3.1878 (3.1988)	Entropy 1.54429 (1.55449)	Top-1 acc 44.922 (47.725)	Top-5 acc 72.266 (71.100)	lr 0.02222
Train [26][3200/3239]	Time 0.202 (0.558)	Data Time 0.000 (0.011)	Loss 3.2283 (3.1990)	Entropy 1.54420 (1.55446)	Top-1 acc 44.531 (47.723)	Top-5 acc 71.875 (71.098)	lr 0.02222
Train [26][3210/3239]	Time 0.202 (0.557)	Data Time 0.000 (0.011)	Loss 3.2081 (3.1990)	Entropy 1.54422 (1.55443)	Top-1 acc 44.141 (47.723)	Top-5 acc 71.484 (71.096)	lr 0.02222
Train [26][3220/3239]	Time 0.222 (0.557)	Data Time 0.000 (0.011)	Loss 2.9907 (3.1987)	Entropy 1.54410 (1.55440)	Top-1 acc 51.953 (47.727)	Top-5 acc 78.125 (71.102)	lr 0.02222
Train [26][3230/3239]	Time 0.204 (0.556)	Data Time 0.000 (0.011)	Loss 3.1594 (3.1987)	Entropy 1.54399 (1.55437)	Top-1 acc 52.344 (47.726)	Top-5 acc 73.438 (71.101)	lr 0.02221
Train [26][3239/3239]	Time 2.125 (0.556)	Data Time 0.000 (0.011)	Loss 3.5506 (3.1989)	Entropy 1.54399 (1.55434)	Top-1 acc 33.333 (47.721)	Top-5 acc 69.136 (71.098)	lr 0.02221
==========Valid [26/120]	loss 1.991	top-1 acc 56.077 (56.077)	top-5 acc 78.843	Train top-1 47.721	top-5 71.098	Entropy 1.54399	Latency-None: 0.000ms	Flops: 561.43M
Train [27][0/3239]	Time 30.652 (30.652)	Data Time 29.139 (29.139)	Loss 2.8897 (2.8897)	Entropy 1.54393 (1.54393)	Top-1 acc 54.688 (54.688)	Top-5 acc 76.562 (76.562)	lr 0.02221
Train [27][10/3239]	Time 2.507 (3.453)	Data Time 0.002 (2.795)	Loss 3.2844 (3.1217)	Entropy 1.54393 (1.54393)	Top-1 acc 44.922 (49.290)	Top-5 acc 69.141 (72.869)	lr 0.02221
Train [27][20/3239]	Time 0.214 (1.915)	Data Time 0.001 (1.464)	Loss 3.0826 (3.1387)	Entropy 1.54382 (1.54388)	Top-1 acc 51.172 (49.163)	Top-5 acc 73.047 (72.638)	lr 0.02221
Train [27][30/3239]	Time 0.212 (1.439)	Data Time 0.001 (0.992)	Loss 3.0212 (3.1286)	Entropy 1.54374 (1.54383)	Top-1 acc 47.656 (49.244)	Top-5 acc 76.172 (73.072)	lr 0.02221
Train [27][40/3239]	Time 0.215 (1.194)	Data Time 0.001 (0.751)	Loss 3.0695 (3.1332)	Entropy 1.54360 (1.54378)	Top-1 acc 50.000 (49.143)	Top-5 acc 73.828 (72.847)	lr 0.02221
Train [27][50/3239]	Time 0.195 (1.043)	Data Time 0.001 (0.604)	Loss 3.3173 (3.1404)	Entropy 1.54351 (1.54374)	Top-1 acc 46.484 (49.219)	Top-5 acc 67.188 (72.656)	lr 0.02221
Train [27][60/3239]	Time 0.221 (0.943)	Data Time 0.001 (0.505)	Loss 3.0317 (3.1479)	Entropy 1.54341 (1.54369)	Top-1 acc 48.438 (48.911)	Top-5 acc 73.438 (72.471)	lr 0.02221
Train [27][70/3239]	Time 0.208 (0.873)	Data Time 0.001 (0.434)	Loss 3.3370 (3.1514)	Entropy 1.54338 (1.54365)	Top-1 acc 44.531 (48.784)	Top-5 acc 67.969 (72.403)	lr 0.02221
Train [27][80/3239]	Time 0.221 (0.820)	Data Time 0.001 (0.381)	Loss 3.2930 (3.1547)	Entropy 1.54329 (1.54361)	Top-1 acc 43.750 (48.799)	Top-5 acc 65.625 (72.222)	lr 0.02221
Train [27][90/3239]	Time 0.220 (0.778)	Data Time 0.002 (0.339)	Loss 3.3531 (3.1638)	Entropy 1.54321 (1.54357)	Top-1 acc 43.359 (48.575)	Top-5 acc 67.969 (72.034)	lr 0.02221
Train [27][100/3239]	Time 0.252 (0.744)	Data Time 0.001 (0.306)	Loss 3.1248 (3.1613)	Entropy 1.54312 (1.54353)	Top-1 acc 51.562 (48.550)	Top-5 acc 69.531 (72.119)	lr 0.02221
Train [27][110/3239]	Time 0.231 (0.717)	Data Time 0.001 (0.278)	Loss 3.0384 (3.1552)	Entropy 1.54311 (1.54350)	Top-1 acc 50.391 (48.744)	Top-5 acc 75.000 (72.188)	lr 0.02221
Train [27][120/3239]	Time 2.468 (0.695)	Data Time 0.001 (0.255)	Loss 3.1883 (3.1561)	Entropy 1.54311 (1.54347)	Top-1 acc 45.703 (48.783)	Top-5 acc 69.141 (72.133)	lr 0.02221
Train [27][130/3239]	Time 0.210 (0.660)	Data Time 0.001 (0.236)	Loss 3.0014 (3.1544)	Entropy 1.54306 (1.54343)	Top-1 acc 52.344 (48.748)	Top-5 acc 76.172 (72.114)	lr 0.02221
Train [27][140/3239]	Time 0.213 (0.644)	Data Time 0.001 (0.219)	Loss 3.3382 (3.1561)	Entropy 1.54298 (1.54340)	Top-1 acc 42.188 (48.795)	Top-5 acc 66.797 (72.038)	lr 0.02221
Train [27][150/3239]	Time 0.197 (0.630)	Data Time 0.001 (0.205)	Loss 3.3971 (3.1551)	Entropy 1.54296 (1.54337)	Top-1 acc 45.312 (48.841)	Top-5 acc 67.578 (72.059)	lr 0.02220
Train [27][160/3239]	Time 0.230 (0.619)	Data Time 0.002 (0.192)	Loss 3.0426 (3.1519)	Entropy 1.54293 (1.54335)	Top-1 acc 55.859 (48.874)	Top-5 acc 75.781 (72.110)	lr 0.02220
Train [27][170/3239]	Time 0.202 (0.609)	Data Time 0.002 (0.181)	Loss 3.2120 (3.1508)	Entropy 1.54291 (1.54332)	Top-1 acc 46.094 (48.906)	Top-5 acc 70.312 (72.106)	lr 0.02220
Train [27][180/3239]	Time 0.245 (0.600)	Data Time 0.001 (0.171)	Loss 3.1256 (3.1512)	Entropy 1.54285 (1.54330)	Top-1 acc 47.656 (48.841)	Top-5 acc 69.531 (72.069)	lr 0.02220
Train [27][190/3239]	Time 0.224 (0.592)	Data Time 0.001 (0.162)	Loss 2.9642 (3.1510)	Entropy 1.54282 (1.54327)	Top-1 acc 52.344 (48.859)	Top-5 acc 76.953 (72.094)	lr 0.02220
Train [27][200/3239]	Time 0.209 (0.584)	Data Time 0.001 (0.154)	Loss 2.9652 (3.1525)	Entropy 1.54278 (1.54325)	Top-1 acc 50.391 (48.807)	Top-5 acc 74.609 (72.034)	lr 0.02220
Train [27][210/3239]	Time 0.192 (0.577)	Data Time 0.001 (0.147)	Loss 3.0890 (3.1495)	Entropy 1.54275 (1.54323)	Top-1 acc 48.438 (48.837)	Top-5 acc 75.391 (72.101)	lr 0.02220
Train [27][220/3239]	Time 0.315 (0.571)	Data Time 0.003 (0.141)	Loss 3.1101 (3.1503)	Entropy 1.54268 (1.54321)	Top-1 acc 50.781 (48.871)	Top-5 acc 73.047 (72.066)	lr 0.02220
Train [27][230/3239]	Time 2.492 (0.566)	Data Time 0.001 (0.135)	Loss 3.1890 (3.1504)	Entropy 1.54268 (1.54318)	Top-1 acc 50.391 (48.928)	Top-5 acc 74.219 (72.066)	lr 0.02220
Train [27][240/3239]	Time 0.264 (0.552)	Data Time 0.002 (0.129)	Loss 3.3401 (3.1513)	Entropy 1.54262 (1.54316)	Top-1 acc 41.797 (48.886)	Top-5 acc 67.188 (72.022)	lr 0.02220
Train [27][250/3239]	Time 0.228 (0.548)	Data Time 0.001 (0.124)	Loss 3.1706 (3.1526)	Entropy 1.54247 (1.54313)	Top-1 acc 46.484 (48.834)	Top-5 acc 69.141 (72.014)	lr 0.02220
Train [27][260/3239]	Time 0.202 (0.543)	Data Time 0.001 (0.119)	Loss 3.2224 (3.1568)	Entropy 1.54242 (1.54311)	Top-1 acc 46.094 (48.722)	Top-5 acc 70.312 (71.944)	lr 0.02220
Train [27][270/3239]	Time 0.204 (0.539)	Data Time 0.001 (0.115)	Loss 2.8936 (3.1565)	Entropy 1.54232 (1.54308)	Top-1 acc 52.734 (48.706)	Top-5 acc 75.781 (71.951)	lr 0.02220
Train [27][280/3239]	Time 0.225 (0.699)	Data Time 0.002 (0.111)	Loss 3.3325 (3.1581)	Entropy 1.54225 (1.54305)	Top-1 acc 41.406 (48.653)	Top-5 acc 68.359 (71.932)	lr 0.02220
Train [27][290/3239]	Time 0.203 (0.691)	Data Time 0.002 (0.107)	Loss 3.0634 (3.1590)	Entropy 1.54208 (1.54302)	Top-1 acc 51.172 (48.601)	Top-5 acc 71.875 (71.914)	lr 0.02220
Train [27][300/3239]	Time 0.213 (0.684)	Data Time 0.001 (0.104)	Loss 3.1839 (3.1590)	Entropy 1.54200 (1.54299)	Top-1 acc 47.656 (48.610)	Top-5 acc 70.312 (71.927)	lr 0.02220
Train [27][310/3239]	Time 0.213 (0.676)	Data Time 0.001 (0.100)	Loss 3.2423 (3.1613)	Entropy 1.54201 (1.54295)	Top-1 acc 43.750 (48.530)	Top-5 acc 71.094 (71.899)	lr 0.02219
Train [27][320/3239]	Time 0.253 (0.669)	Data Time 0.001 (0.097)	Loss 3.2068 (3.1635)	Entropy 1.54191 (1.54292)	Top-1 acc 46.875 (48.459)	Top-5 acc 69.922 (71.845)	lr 0.02219
Train [27][330/3239]	Time 0.235 (0.662)	Data Time 0.001 (0.095)	Loss 3.0935 (3.1641)	Entropy 1.54180 (1.54289)	Top-1 acc 57.812 (48.466)	Top-5 acc 72.266 (71.820)	lr 0.02219
Train [27][340/3239]	Time 2.321 (0.655)	Data Time 0.001 (0.092)	Loss 3.3681 (3.1653)	Entropy 1.54180 (1.54286)	Top-1 acc 47.656 (48.442)	Top-5 acc 67.578 (71.812)	lr 0.02219
Train [27][350/3239]	Time 0.211 (0.643)	Data Time 0.001 (0.089)	Loss 3.0189 (3.1657)	Entropy 1.54175 (1.54283)	Top-1 acc 53.906 (48.406)	Top-5 acc 75.391 (71.797)	lr 0.02219
Train [27][360/3239]	Time 0.253 (0.637)	Data Time 0.001 (0.087)	Loss 3.1564 (3.1652)	Entropy 1.54168 (1.54280)	Top-1 acc 44.141 (48.404)	Top-5 acc 73.828 (71.821)	lr 0.02219
Train [27][370/3239]	Time 0.291 (0.632)	Data Time 0.001 (0.085)	Loss 3.2537 (3.1654)	Entropy 1.54171 (1.54277)	Top-1 acc 49.219 (48.442)	Top-5 acc 68.359 (71.789)	lr 0.02219
Train [27][380/3239]	Time 0.225 (0.627)	Data Time 0.001 (0.082)	Loss 3.0866 (3.1656)	Entropy 1.54162 (1.54274)	Top-1 acc 50.781 (48.450)	Top-5 acc 74.219 (71.772)	lr 0.02219
Train [27][390/3239]	Time 0.197 (0.622)	Data Time 0.001 (0.080)	Loss 3.1341 (3.1656)	Entropy 1.54156 (1.54271)	Top-1 acc 48.828 (48.449)	Top-5 acc 73.438 (71.755)	lr 0.02219
Train [27][400/3239]	Time 0.209 (0.618)	Data Time 0.001 (0.078)	Loss 3.1618 (3.1653)	Entropy 1.54156 (1.54268)	Top-1 acc 46.094 (48.434)	Top-5 acc 71.094 (71.751)	lr 0.02219
Train [27][410/3239]	Time 0.180 (0.613)	Data Time 0.001 (0.077)	Loss 3.1830 (3.1650)	Entropy 1.54150 (1.54265)	Top-1 acc 43.359 (48.408)	Top-5 acc 70.703 (71.757)	lr 0.02219
Train [27][420/3239]	Time 0.226 (0.609)	Data Time 0.001 (0.075)	Loss 3.1459 (3.1649)	Entropy 1.54146 (1.54263)	Top-1 acc 47.656 (48.386)	Top-5 acc 72.266 (71.744)	lr 0.02219
Train [27][430/3239]	Time 0.209 (0.605)	Data Time 0.001 (0.073)	Loss 3.4172 (3.1638)	Entropy 1.54145 (1.54260)	Top-1 acc 45.703 (48.399)	Top-5 acc 66.797 (71.764)	lr 0.02219
Train [27][440/3239]	Time 0.228 (0.601)	Data Time 0.002 (0.071)	Loss 3.3034 (3.1639)	Entropy 1.54143 (1.54257)	Top-1 acc 46.875 (48.406)	Top-5 acc 70.312 (71.778)	lr 0.02219
Train [27][450/3239]	Time 2.350 (0.598)	Data Time 0.002 (0.070)	Loss 3.1325 (3.1646)	Entropy 1.54143 (1.54255)	Top-1 acc 46.094 (48.374)	Top-5 acc 75.000 (71.757)	lr 0.02219
Train [27][460/3239]	Time 0.220 (0.590)	Data Time 0.002 (0.068)	Loss 3.0267 (3.1645)	Entropy 1.54118 (1.54252)	Top-1 acc 50.391 (48.377)	Top-5 acc 76.562 (71.755)	lr 0.02219
Train [27][470/3239]	Time 0.215 (0.586)	Data Time 0.001 (0.067)	Loss 3.3502 (3.1648)	Entropy 1.54114 (1.54249)	Top-1 acc 46.484 (48.403)	Top-5 acc 66.797 (71.764)	lr 0.02218
Train [27][480/3239]	Time 0.213 (0.584)	Data Time 0.001 (0.066)	Loss 3.1748 (3.1643)	Entropy 1.54106 (1.54246)	Top-1 acc 47.266 (48.403)	Top-5 acc 71.875 (71.801)	lr 0.02218
Train [27][490/3239]	Time 0.187 (0.581)	Data Time 0.001 (0.064)	Loss 3.2858 (3.1638)	Entropy 1.54104 (1.54243)	Top-1 acc 44.141 (48.418)	Top-5 acc 71.875 (71.800)	lr 0.02218
Train [27][500/3239]	Time 0.235 (0.578)	Data Time 0.001 (0.063)	Loss 2.9204 (3.1641)	Entropy 1.54105 (1.54240)	Top-1 acc 53.516 (48.425)	Top-5 acc 76.172 (71.802)	lr 0.02218
Train [27][510/3239]	Time 0.167 (0.575)	Data Time 0.001 (0.062)	Loss 3.1783 (3.1638)	Entropy 1.54099 (1.54238)	Top-1 acc 45.703 (48.439)	Top-5 acc 72.266 (71.811)	lr 0.02218
Train [27][520/3239]	Time 0.236 (0.573)	Data Time 0.001 (0.061)	Loss 3.1540 (3.1645)	Entropy 1.54099 (1.54235)	Top-1 acc 46.875 (48.414)	Top-5 acc 74.219 (71.797)	lr 0.02218
Train [27][530/3239]	Time 0.302 (0.570)	Data Time 0.001 (0.060)	Loss 3.1308 (3.1640)	Entropy 1.54089 (1.54232)	Top-1 acc 48.828 (48.420)	Top-5 acc 72.266 (71.804)	lr 0.02218
Train [27][540/3239]	Time 0.225 (0.568)	Data Time 0.001 (0.059)	Loss 3.3195 (3.1643)	Entropy 1.54084 (1.54230)	Top-1 acc 41.016 (48.404)	Top-5 acc 68.750 (71.795)	lr 0.02218
Train [27][550/3239]	Time 0.270 (0.566)	Data Time 0.001 (0.058)	Loss 3.0753 (3.1642)	Entropy 1.54082 (1.54227)	Top-1 acc 51.562 (48.403)	Top-5 acc 74.609 (71.806)	lr 0.02218
Train [27][560/3239]	Time 2.405 (0.564)	Data Time 0.001 (0.057)	Loss 3.2881 (3.1644)	Entropy 1.54082 (1.54224)	Top-1 acc 45.312 (48.405)	Top-5 acc 69.531 (71.810)	lr 0.02218
Train [27][570/3239]	Time 0.218 (0.558)	Data Time 0.001 (0.056)	Loss 3.1767 (3.1645)	Entropy 1.54066 (1.54222)	Top-1 acc 48.047 (48.403)	Top-5 acc 71.875 (71.828)	lr 0.02218
Train [27][580/3239]	Time 0.225 (0.556)	Data Time 0.001 (0.055)	Loss 3.0689 (3.1651)	Entropy 1.54063 (1.54219)	Top-1 acc 49.609 (48.381)	Top-5 acc 69.922 (71.808)	lr 0.02218
Train [27][590/3239]	Time 0.301 (0.554)	Data Time 0.001 (0.054)	Loss 3.3241 (3.1647)	Entropy 1.54063 (1.54216)	Top-1 acc 44.922 (48.398)	Top-5 acc 70.703 (71.818)	lr 0.02218
Train [27][600/3239]	Time 0.201 (0.552)	Data Time 0.001 (0.053)	Loss 3.1578 (3.1644)	Entropy 1.54057 (1.54214)	Top-1 acc 50.000 (48.402)	Top-5 acc 69.531 (71.826)	lr 0.02218
Train [27][610/3239]	Time 0.218 (0.550)	Data Time 0.001 (0.052)	Loss 3.0378 (3.1643)	Entropy 1.54038 (1.54211)	Top-1 acc 51.562 (48.402)	Top-5 acc 72.266 (71.820)	lr 0.02218
Train [27][620/3239]	Time 0.224 (0.549)	Data Time 0.001 (0.051)	Loss 3.3935 (3.1654)	Entropy 1.54030 (1.54208)	Top-1 acc 44.531 (48.363)	Top-5 acc 68.359 (71.799)	lr 0.02217
Train [27][630/3239]	Time 0.183 (0.547)	Data Time 0.001 (0.050)	Loss 3.1617 (3.1644)	Entropy 1.54024 (1.54205)	Top-1 acc 50.000 (48.385)	Top-5 acc 70.312 (71.823)	lr 0.02217
Train [27][640/3239]	Time 0.224 (0.616)	Data Time 0.002 (0.050)	Loss 2.9641 (3.1641)	Entropy 1.54022 (1.54202)	Top-1 acc 53.125 (48.403)	Top-5 acc 73.047 (71.815)	lr 0.02217
Train [27][650/3239]	Time 0.310 (0.614)	Data Time 0.002 (0.049)	Loss 3.1029 (3.1636)	Entropy 1.54008 (1.54200)	Top-1 acc 50.391 (48.409)	Top-5 acc 71.875 (71.827)	lr 0.02217
Train [27][660/3239]	Time 0.256 (0.611)	Data Time 0.002 (0.048)	Loss 2.9840 (3.1643)	Entropy 1.53998 (1.54197)	Top-1 acc 55.078 (48.403)	Top-5 acc 77.734 (71.832)	lr 0.02217
Train [27][670/3239]	Time 2.379 (0.609)	Data Time 0.005 (0.048)	Loss 3.2680 (3.1637)	Entropy 1.53998 (1.54194)	Top-1 acc 44.922 (48.401)	Top-5 acc 68.750 (71.851)	lr 0.02217
Train [27][680/3239]	Time 0.230 (0.603)	Data Time 0.002 (0.047)	Loss 3.3295 (3.1651)	Entropy 1.53973 (1.54190)	Top-1 acc 46.484 (48.377)	Top-5 acc 69.141 (71.822)	lr 0.02217
Train [27][690/3239]	Time 0.212 (0.601)	Data Time 0.001 (0.046)	Loss 3.3273 (3.1655)	Entropy 1.53959 (1.54187)	Top-1 acc 46.875 (48.376)	Top-5 acc 66.797 (71.820)	lr 0.02217
Train [27][700/3239]	Time 0.211 (0.598)	Data Time 0.001 (0.046)	Loss 3.3135 (3.1659)	Entropy 1.53950 (1.54184)	Top-1 acc 44.922 (48.375)	Top-5 acc 68.750 (71.801)	lr 0.02217
Train [27][710/3239]	Time 0.299 (0.596)	Data Time 0.001 (0.045)	Loss 3.0147 (3.1655)	Entropy 1.53937 (1.54180)	Top-1 acc 53.516 (48.395)	Top-5 acc 76.172 (71.813)	lr 0.02217
Train [27][720/3239]	Time 0.201 (0.594)	Data Time 0.001 (0.044)	Loss 3.0787 (3.1647)	Entropy 1.53936 (1.54177)	Top-1 acc 51.562 (48.421)	Top-5 acc 75.781 (71.831)	lr 0.02217
Train [27][730/3239]	Time 0.208 (0.592)	Data Time 0.001 (0.044)	Loss 3.3795 (3.1641)	Entropy 1.53928 (1.54174)	Top-1 acc 46.094 (48.440)	Top-5 acc 67.969 (71.825)	lr 0.02217
Train [27][740/3239]	Time 0.220 (0.589)	Data Time 0.001 (0.043)	Loss 3.1787 (3.1639)	Entropy 1.53942 (1.54170)	Top-1 acc 46.875 (48.443)	Top-5 acc 73.438 (71.829)	lr 0.02217
Train [27][750/3239]	Time 0.219 (0.588)	Data Time 0.001 (0.043)	Loss 3.2952 (3.1645)	Entropy 1.53940 (1.54167)	Top-1 acc 45.312 (48.439)	Top-5 acc 67.578 (71.805)	lr 0.02217
Train [27][760/3239]	Time 0.203 (0.586)	Data Time 0.001 (0.042)	Loss 3.2719 (3.1642)	Entropy 1.53929 (1.54164)	Top-1 acc 44.531 (48.457)	Top-5 acc 68.750 (71.801)	lr 0.02217
Train [27][770/3239]	Time 0.300 (0.584)	Data Time 0.001 (0.042)	Loss 3.1563 (3.1641)	Entropy 1.53922 (1.54161)	Top-1 acc 47.656 (48.467)	Top-5 acc 71.094 (71.801)	lr 0.02217
Train [27][780/3239]	Time 2.402 (0.582)	Data Time 0.002 (0.041)	Loss 3.2808 (3.1642)	Entropy 1.53922 (1.54158)	Top-1 acc 47.266 (48.471)	Top-5 acc 69.531 (71.798)	lr 0.02216
Train [27][790/3239]	Time 0.215 (0.578)	Data Time 0.001 (0.041)	Loss 3.1373 (3.1633)	Entropy 1.53920 (1.54155)	Top-1 acc 49.609 (48.492)	Top-5 acc 73.047 (71.817)	lr 0.02216
Train [27][800/3239]	Time 0.225 (0.576)	Data Time 0.001 (0.040)	Loss 3.4558 (3.1639)	Entropy 1.53885 (1.54152)	Top-1 acc 39.453 (48.467)	Top-5 acc 66.406 (71.799)	lr 0.02216
Train [27][810/3239]	Time 0.225 (0.575)	Data Time 0.001 (0.040)	Loss 3.0625 (3.1635)	Entropy 1.53885 (1.54149)	Top-1 acc 54.688 (48.489)	Top-5 acc 73.438 (71.813)	lr 0.02216
Train [27][820/3239]	Time 0.236 (0.573)	Data Time 0.001 (0.039)	Loss 3.1363 (3.1642)	Entropy 1.53879 (1.54145)	Top-1 acc 47.656 (48.474)	Top-5 acc 71.094 (71.796)	lr 0.02216
Train [27][830/3239]	Time 0.285 (0.572)	Data Time 0.001 (0.039)	Loss 3.0795 (3.1642)	Entropy 1.53874 (1.54142)	Top-1 acc 49.609 (48.474)	Top-5 acc 70.703 (71.794)	lr 0.02216
Train [27][840/3239]	Time 0.215 (0.570)	Data Time 0.001 (0.038)	Loss 3.2636 (3.1656)	Entropy 1.53868 (1.54139)	Top-1 acc 45.703 (48.433)	Top-5 acc 70.312 (71.751)	lr 0.02216
Train [27][850/3239]	Time 0.253 (0.569)	Data Time 0.001 (0.038)	Loss 3.0374 (3.1657)	Entropy 1.53857 (1.54136)	Top-1 acc 51.953 (48.430)	Top-5 acc 74.609 (71.746)	lr 0.02216
Train [27][860/3239]	Time 0.211 (0.567)	Data Time 0.001 (0.038)	Loss 3.2455 (3.1657)	Entropy 1.53844 (1.54132)	Top-1 acc 44.531 (48.437)	Top-5 acc 70.703 (71.743)	lr 0.02216
Train [27][870/3239]	Time 0.202 (0.566)	Data Time 0.001 (0.037)	Loss 3.1767 (3.1658)	Entropy 1.53847 (1.54129)	Top-1 acc 47.656 (48.434)	Top-5 acc 71.484 (71.751)	lr 0.02216
Train [27][880/3239]	Time 0.248 (0.565)	Data Time 0.003 (0.037)	Loss 3.0923 (3.1657)	Entropy 1.53844 (1.54126)	Top-1 acc 49.609 (48.434)	Top-5 acc 73.438 (71.762)	lr 0.02216
Train [27][890/3239]	Time 2.694 (0.564)	Data Time 0.002 (0.036)	Loss 3.1631 (3.1658)	Entropy 1.53844 (1.54123)	Top-1 acc 49.609 (48.439)	Top-5 acc 72.266 (71.759)	lr 0.02216
Train [27][900/3239]	Time 0.225 (0.560)	Data Time 0.001 (0.036)	Loss 3.1135 (3.1658)	Entropy 1.53838 (1.54120)	Top-1 acc 48.828 (48.448)	Top-5 acc 71.484 (71.754)	lr 0.02216
Train [27][910/3239]	Time 0.206 (0.559)	Data Time 0.002 (0.036)	Loss 3.1157 (3.1656)	Entropy 1.53835 (1.54116)	Top-1 acc 50.781 (48.453)	Top-5 acc 73.438 (71.754)	lr 0.02216
Train [27][920/3239]	Time 0.214 (0.558)	Data Time 0.001 (0.035)	Loss 3.1203 (3.1662)	Entropy 1.53826 (1.54113)	Top-1 acc 47.266 (48.432)	Top-5 acc 76.172 (71.747)	lr 0.02216
Train [27][930/3239]	Time 0.219 (0.557)	Data Time 0.001 (0.035)	Loss 3.1313 (3.1665)	Entropy 1.53828 (1.54110)	Top-1 acc 51.172 (48.423)	Top-5 acc 71.484 (71.745)	lr 0.02215
Train [27][940/3239]	Time 0.216 (0.555)	Data Time 0.001 (0.035)	Loss 3.1121 (3.1669)	Entropy 1.53821 (1.54107)	Top-1 acc 48.828 (48.404)	Top-5 acc 73.828 (71.736)	lr 0.02215
Train [27][950/3239]	Time 0.253 (0.554)	Data Time 0.001 (0.034)	Loss 3.2143 (3.1675)	Entropy 1.53820 (1.54104)	Top-1 acc 46.875 (48.382)	Top-5 acc 68.750 (71.723)	lr 0.02215
Train [27][960/3239]	Time 0.210 (0.553)	Data Time 0.001 (0.034)	Loss 3.4107 (3.1684)	Entropy 1.53816 (1.54101)	Top-1 acc 44.141 (48.351)	Top-5 acc 67.578 (71.702)	lr 0.02215
Train [27][970/3239]	Time 0.206 (0.552)	Data Time 0.001 (0.034)	Loss 3.0053 (3.1682)	Entropy 1.53816 (1.54098)	Top-1 acc 56.250 (48.367)	Top-5 acc 75.391 (71.709)	lr 0.02215
Train [27][980/3239]	Time 0.213 (0.551)	Data Time 0.001 (0.033)	Loss 2.9830 (3.1684)	Entropy 1.53805 (1.54095)	Top-1 acc 47.266 (48.361)	Top-5 acc 76.172 (71.697)	lr 0.02215
Train [27][990/3239]	Time 0.225 (0.550)	Data Time 0.001 (0.033)	Loss 3.4274 (3.1686)	Entropy 1.53798 (1.54092)	Top-1 acc 42.969 (48.352)	Top-5 acc 66.797 (71.698)	lr 0.02215
Train [27][1000/3239]	Time 43.926 (0.590)	Data Time 0.001 (0.033)	Loss 3.3596 (3.1691)	Entropy 1.53798 (1.54089)	Top-1 acc 44.531 (48.334)	Top-5 acc 67.969 (71.681)	lr 0.02215
Train [27][1010/3239]	Time 0.392 (0.587)	Data Time 0.003 (0.032)	Loss 3.1498 (3.1692)	Entropy 1.53789 (1.54086)	Top-1 acc 49.219 (48.328)	Top-5 acc 73.438 (71.680)	lr 0.02215
Train [27][1020/3239]	Time 0.216 (0.586)	Data Time 0.002 (0.032)	Loss 3.2746 (3.1696)	Entropy 1.53768 (1.54083)	Top-1 acc 44.922 (48.323)	Top-5 acc 69.141 (71.668)	lr 0.02215
Train [27][1030/3239]	Time 0.218 (0.585)	Data Time 0.001 (0.032)	Loss 3.2144 (3.1701)	Entropy 1.53759 (1.54080)	Top-1 acc 46.875 (48.310)	Top-5 acc 68.359 (71.660)	lr 0.02215
Train [27][1040/3239]	Time 0.221 (0.583)	Data Time 0.002 (0.031)	Loss 3.0611 (3.1703)	Entropy 1.53753 (1.54077)	Top-1 acc 50.000 (48.296)	Top-5 acc 71.094 (71.659)	lr 0.02215
Train [27][1050/3239]	Time 0.206 (0.582)	Data Time 0.001 (0.031)	Loss 3.2236 (3.1706)	Entropy 1.53746 (1.54074)	Top-1 acc 50.000 (48.293)	Top-5 acc 70.703 (71.652)	lr 0.02215
Train [27][1060/3239]	Time 0.220 (0.581)	Data Time 0.001 (0.031)	Loss 3.1608 (3.1711)	Entropy 1.53734 (1.54071)	Top-1 acc 45.703 (48.277)	Top-5 acc 72.656 (71.645)	lr 0.02215
Train [27][1070/3239]	Time 0.204 (0.579)	Data Time 0.001 (0.031)	Loss 3.1198 (3.1714)	Entropy 1.53728 (1.54068)	Top-1 acc 42.578 (48.260)	Top-5 acc 72.266 (71.627)	lr 0.02215
Train [27][1080/3239]	Time 0.223 (0.578)	Data Time 0.001 (0.030)	Loss 3.2211 (3.1716)	Entropy 1.53727 (1.54065)	Top-1 acc 45.312 (48.247)	Top-5 acc 71.094 (71.622)	lr 0.02215
Train [27][1090/3239]	Time 0.284 (0.577)	Data Time 0.002 (0.030)	Loss 3.3234 (3.1718)	Entropy 1.53711 (1.54061)	Top-1 acc 43.750 (48.242)	Top-5 acc 67.188 (71.614)	lr 0.02214
Train [27][1100/3239]	Time 0.235 (0.575)	Data Time 0.001 (0.030)	Loss 3.3146 (3.1725)	Entropy 1.53695 (1.54058)	Top-1 acc 47.266 (48.232)	Top-5 acc 66.016 (71.606)	lr 0.02214
Train [27][1110/3239]	Time 2.381 (0.574)	Data Time 0.001 (0.030)	Loss 3.2803 (3.1723)	Entropy 1.53695 (1.54055)	Top-1 acc 46.094 (48.229)	Top-5 acc 69.141 (71.604)	lr 0.02214
Train [27][1120/3239]	Time 0.222 (0.571)	Data Time 0.001 (0.029)	Loss 3.2625 (3.1721)	Entropy 1.53694 (1.54052)	Top-1 acc 44.922 (48.225)	Top-5 acc 67.188 (71.602)	lr 0.02214
Train [27][1130/3239]	Time 0.377 (0.570)	Data Time 0.001 (0.029)	Loss 3.0287 (3.1721)	Entropy 1.53685 (1.54048)	Top-1 acc 51.953 (48.224)	Top-5 acc 73.047 (71.602)	lr 0.02214
Train [27][1140/3239]	Time 0.226 (0.569)	Data Time 0.001 (0.029)	Loss 3.1416 (3.1721)	Entropy 1.53682 (1.54045)	Top-1 acc 47.656 (48.225)	Top-5 acc 72.656 (71.608)	lr 0.02214
Train [27][1150/3239]	Time 0.190 (0.568)	Data Time 0.001 (0.029)	Loss 3.1568 (3.1723)	Entropy 1.53677 (1.54042)	Top-1 acc 49.609 (48.221)	Top-5 acc 72.656 (71.607)	lr 0.02214
Train [27][1160/3239]	Time 0.199 (0.567)	Data Time 0.001 (0.028)	Loss 3.1329 (3.1728)	Entropy 1.53675 (1.54039)	Top-1 acc 47.656 (48.219)	Top-5 acc 74.219 (71.601)	lr 0.02214
Train [27][1170/3239]	Time 0.220 (0.566)	Data Time 0.001 (0.028)	Loss 3.1128 (3.1728)	Entropy 1.53674 (1.54036)	Top-1 acc 44.531 (48.218)	Top-5 acc 71.484 (71.600)	lr 0.02214
Train [27][1180/3239]	Time 0.219 (0.565)	Data Time 0.001 (0.028)	Loss 3.1459 (3.1725)	Entropy 1.53657 (1.54033)	Top-1 acc 50.000 (48.228)	Top-5 acc 72.656 (71.608)	lr 0.02214
Train [27][1190/3239]	Time 0.228 (0.564)	Data Time 0.001 (0.028)	Loss 3.1192 (3.1724)	Entropy 1.53650 (1.54030)	Top-1 acc 50.391 (48.229)	Top-5 acc 71.094 (71.604)	lr 0.02214
Train [27][1200/3239]	Time 0.212 (0.563)	Data Time 0.001 (0.028)	Loss 3.2026 (3.1727)	Entropy 1.53646 (1.54026)	Top-1 acc 46.875 (48.218)	Top-5 acc 73.438 (71.600)	lr 0.02214
Train [27][1210/3239]	Time 0.209 (0.562)	Data Time 0.001 (0.027)	Loss 3.1243 (3.1727)	Entropy 1.53640 (1.54023)	Top-1 acc 53.906 (48.224)	Top-5 acc 70.312 (71.604)	lr 0.02214
Train [27][1220/3239]	Time 2.386 (0.561)	Data Time 0.001 (0.027)	Loss 3.1750 (3.1726)	Entropy 1.53640 (1.54020)	Top-1 acc 48.438 (48.226)	Top-5 acc 71.484 (71.608)	lr 0.02214
Train [27][1230/3239]	Time 0.274 (0.558)	Data Time 0.001 (0.027)	Loss 3.1861 (3.1728)	Entropy 1.53639 (1.54017)	Top-1 acc 54.688 (48.236)	Top-5 acc 72.656 (71.602)	lr 0.02214
Train [27][1240/3239]	Time 0.233 (0.557)	Data Time 0.001 (0.027)	Loss 3.0138 (3.1724)	Entropy 1.53604 (1.54014)	Top-1 acc 46.875 (48.241)	Top-5 acc 72.656 (71.609)	lr 0.02214
Train [27][1250/3239]	Time 0.226 (0.556)	Data Time 0.001 (0.026)	Loss 3.1509 (3.1724)	Entropy 1.53599 (1.54010)	Top-1 acc 51.172 (48.248)	Top-5 acc 71.094 (71.608)	lr 0.02213
Train [27][1260/3239]	Time 0.227 (0.555)	Data Time 0.001 (0.026)	Loss 3.2043 (3.1728)	Entropy 1.53596 (1.54007)	Top-1 acc 46.484 (48.241)	Top-5 acc 70.703 (71.598)	lr 0.02213
Train [27][1270/3239]	Time 0.212 (0.554)	Data Time 0.001 (0.026)	Loss 2.9398 (3.1724)	Entropy 1.53590 (1.54004)	Top-1 acc 50.781 (48.249)	Top-5 acc 76.562 (71.605)	lr 0.02213
Train [27][1280/3239]	Time 0.231 (0.554)	Data Time 0.001 (0.026)	Loss 3.0664 (3.1720)	Entropy 1.53594 (1.54001)	Top-1 acc 48.438 (48.261)	Top-5 acc 74.219 (71.616)	lr 0.02213
Train [27][1290/3239]	Time 0.220 (0.553)	Data Time 0.001 (0.026)	Loss 3.3754 (3.1720)	Entropy 1.53593 (1.53997)	Top-1 acc 42.578 (48.260)	Top-5 acc 67.578 (71.618)	lr 0.02213
Train [27][1300/3239]	Time 0.325 (0.552)	Data Time 0.001 (0.026)	Loss 3.1756 (3.1723)	Entropy 1.53585 (1.53994)	Top-1 acc 50.391 (48.252)	Top-5 acc 70.703 (71.613)	lr 0.02213
Train [27][1310/3239]	Time 0.211 (0.551)	Data Time 0.001 (0.025)	Loss 3.1880 (3.1724)	Entropy 1.53577 (1.53991)	Top-1 acc 43.750 (48.239)	Top-5 acc 71.875 (71.607)	lr 0.02213
Train [27][1320/3239]	Time 0.237 (0.551)	Data Time 0.001 (0.025)	Loss 3.2205 (3.1726)	Entropy 1.53572 (1.53988)	Top-1 acc 44.922 (48.235)	Top-5 acc 70.312 (71.604)	lr 0.02213
Train [27][1330/3239]	Time 2.489 (0.550)	Data Time 0.001 (0.025)	Loss 3.3023 (3.1731)	Entropy 1.53572 (1.53985)	Top-1 acc 45.312 (48.224)	Top-5 acc 69.531 (71.594)	lr 0.02213
Train [27][1340/3239]	Time 0.209 (0.547)	Data Time 0.001 (0.025)	Loss 3.3629 (3.1731)	Entropy 1.53572 (1.53982)	Top-1 acc 42.969 (48.225)	Top-5 acc 67.578 (71.593)	lr 0.02213
Train [27][1350/3239]	Time 0.216 (0.546)	Data Time 0.001 (0.025)	Loss 3.0784 (3.1727)	Entropy 1.53557 (1.53979)	Top-1 acc 47.656 (48.231)	Top-5 acc 73.828 (71.610)	lr 0.02213
Train [27][1360/3239]	Time 0.219 (0.546)	Data Time 0.001 (0.024)	Loss 3.1612 (3.1723)	Entropy 1.53556 (1.53976)	Top-1 acc 46.484 (48.239)	Top-5 acc 73.047 (71.621)	lr 0.02213
Train [27][1370/3239]	Time 0.305 (0.575)	Data Time 0.003 (0.024)	Loss 3.2639 (3.1724)	Entropy 1.53546 (1.53973)	Top-1 acc 44.531 (48.232)	Top-5 acc 70.703 (71.617)	lr 0.02213
Train [27][1380/3239]	Time 0.174 (0.574)	Data Time 0.002 (0.024)	Loss 3.0783 (3.1722)	Entropy 1.53541 (1.53969)	Top-1 acc 52.734 (48.241)	Top-5 acc 75.000 (71.616)	lr 0.02213
Train [27][1390/3239]	Time 0.169 (0.574)	Data Time 0.002 (0.024)	Loss 3.0214 (3.1722)	Entropy 1.53528 (1.53966)	Top-1 acc 48.828 (48.240)	Top-5 acc 75.391 (71.618)	lr 0.02213
Train [27][1400/3239]	Time 0.197 (0.573)	Data Time 0.001 (0.024)	Loss 3.1701 (3.1728)	Entropy 1.53523 (1.53963)	Top-1 acc 50.000 (48.231)	Top-5 acc 72.656 (71.605)	lr 0.02212
Train [27][1410/3239]	Time 0.246 (0.572)	Data Time 0.001 (0.024)	Loss 3.0449 (3.1729)	Entropy 1.53514 (1.53960)	Top-1 acc 50.000 (48.228)	Top-5 acc 72.656 (71.599)	lr 0.02212
Train [27][1420/3239]	Time 0.223 (0.571)	Data Time 0.001 (0.024)	Loss 3.0459 (3.1731)	Entropy 1.53492 (1.53957)	Top-1 acc 53.516 (48.228)	Top-5 acc 72.656 (71.594)	lr 0.02212
Train [27][1430/3239]	Time 0.207 (0.570)	Data Time 0.001 (0.023)	Loss 3.2669 (3.1732)	Entropy 1.53485 (1.53954)	Top-1 acc 47.266 (48.223)	Top-5 acc 67.578 (71.589)	lr 0.02212
Train [27][1440/3239]	Time 2.281 (0.569)	Data Time 0.001 (0.023)	Loss 3.0863 (3.1731)	Entropy 1.53485 (1.53950)	Top-1 acc 55.078 (48.226)	Top-5 acc 75.000 (71.587)	lr 0.02212
Train [27][1450/3239]	Time 0.221 (0.566)	Data Time 0.001 (0.023)	Loss 3.1627 (3.1730)	Entropy 1.53480 (1.53947)	Top-1 acc 46.875 (48.230)	Top-5 acc 75.000 (71.587)	lr 0.02212
Train [27][1460/3239]	Time 0.222 (0.566)	Data Time 0.001 (0.023)	Loss 3.3564 (3.1728)	Entropy 1.53471 (1.53944)	Top-1 acc 42.188 (48.240)	Top-5 acc 67.969 (71.592)	lr 0.02212
Train [27][1470/3239]	Time 0.325 (0.565)	Data Time 0.001 (0.023)	Loss 3.2282 (3.1728)	Entropy 1.53468 (1.53941)	Top-1 acc 47.266 (48.245)	Top-5 acc 67.969 (71.585)	lr 0.02212
Train [27][1480/3239]	Time 0.207 (0.564)	Data Time 0.001 (0.023)	Loss 3.1041 (3.1727)	Entropy 1.53461 (1.53937)	Top-1 acc 48.438 (48.245)	Top-5 acc 75.000 (71.589)	lr 0.02212
Train [27][1490/3239]	Time 0.234 (0.563)	Data Time 0.003 (0.023)	Loss 3.1400 (3.1729)	Entropy 1.53457 (1.53934)	Top-1 acc 46.484 (48.236)	Top-5 acc 75.000 (71.593)	lr 0.02212
Train [27][1500/3239]	Time 0.193 (0.563)	Data Time 0.001 (0.022)	Loss 3.1837 (3.1728)	Entropy 1.53453 (1.53931)	Top-1 acc 50.781 (48.236)	Top-5 acc 74.219 (71.593)	lr 0.02212
Train [27][1510/3239]	Time 0.208 (0.562)	Data Time 0.001 (0.022)	Loss 3.1506 (3.1729)	Entropy 1.53452 (1.53928)	Top-1 acc 49.609 (48.234)	Top-5 acc 71.875 (71.592)	lr 0.02212
Train [27][1520/3239]	Time 0.225 (0.561)	Data Time 0.001 (0.022)	Loss 3.3984 (3.1732)	Entropy 1.53452 (1.53925)	Top-1 acc 42.969 (48.232)	Top-5 acc 69.531 (71.586)	lr 0.02212
Train [27][1530/3239]	Time 0.326 (0.560)	Data Time 0.001 (0.022)	Loss 3.1088 (3.1729)	Entropy 1.53448 (1.53922)	Top-1 acc 50.391 (48.236)	Top-5 acc 72.266 (71.593)	lr 0.02212
Train [27][1540/3239]	Time 0.222 (0.560)	Data Time 0.001 (0.022)	Loss 3.0419 (3.1730)	Entropy 1.53448 (1.53919)	Top-1 acc 50.781 (48.232)	Top-5 acc 75.781 (71.589)	lr 0.02212
Train [27][1550/3239]	Time 2.381 (0.559)	Data Time 0.001 (0.022)	Loss 2.9329 (3.1727)	Entropy 1.53448 (1.53915)	Top-1 acc 55.078 (48.240)	Top-5 acc 78.125 (71.596)	lr 0.02212
Train [27][1560/3239]	Time 0.214 (0.557)	Data Time 0.001 (0.022)	Loss 3.3105 (3.1727)	Entropy 1.53438 (1.53912)	Top-1 acc 47.266 (48.242)	Top-5 acc 69.141 (71.595)	lr 0.02211
Train [27][1570/3239]	Time 0.220 (0.556)	Data Time 0.001 (0.021)	Loss 3.2495 (3.1726)	Entropy 1.53431 (1.53909)	Top-1 acc 47.266 (48.244)	Top-5 acc 69.922 (71.603)	lr 0.02211
Train [27][1580/3239]	Time 0.204 (0.555)	Data Time 0.001 (0.021)	Loss 3.0814 (3.1724)	Entropy 1.53429 (1.53906)	Top-1 acc 49.609 (48.240)	Top-5 acc 72.656 (71.605)	lr 0.02211
Train [27][1590/3239]	Time 0.309 (0.555)	Data Time 0.001 (0.021)	Loss 3.1604 (3.1722)	Entropy 1.53423 (1.53903)	Top-1 acc 45.703 (48.248)	Top-5 acc 72.266 (71.607)	lr 0.02211
Train [27][1600/3239]	Time 0.195 (0.554)	Data Time 0.001 (0.021)	Loss 3.1294 (3.1719)	Entropy 1.53405 (1.53900)	Top-1 acc 50.391 (48.253)	Top-5 acc 71.484 (71.614)	lr 0.02211
Train [27][1610/3239]	Time 0.221 (0.553)	Data Time 0.001 (0.021)	Loss 3.2633 (3.1719)	Entropy 1.53396 (1.53897)	Top-1 acc 46.094 (48.248)	Top-5 acc 68.359 (71.610)	lr 0.02211
Train [27][1620/3239]	Time 0.209 (0.552)	Data Time 0.001 (0.021)	Loss 3.1300 (3.1718)	Entropy 1.53390 (1.53894)	Top-1 acc 53.125 (48.249)	Top-5 acc 75.000 (71.614)	lr 0.02211
Train [27][1630/3239]	Time 0.223 (0.552)	Data Time 0.001 (0.021)	Loss 2.9802 (3.1714)	Entropy 1.53385 (1.53891)	Top-1 acc 55.078 (48.271)	Top-5 acc 75.391 (71.621)	lr 0.02211
Train [27][1640/3239]	Time 0.215 (0.551)	Data Time 0.001 (0.021)	Loss 3.0745 (3.1716)	Entropy 1.53383 (1.53888)	Top-1 acc 51.172 (48.267)	Top-5 acc 70.703 (71.615)	lr 0.02211
Train [27][1650/3239]	Time 0.312 (0.551)	Data Time 0.001 (0.021)	Loss 2.9381 (3.1713)	Entropy 1.53367 (1.53885)	Top-1 acc 55.078 (48.279)	Top-5 acc 74.219 (71.619)	lr 0.02211
Train [27][1660/3239]	Time 2.306 (0.550)	Data Time 0.002 (0.020)	Loss 3.1130 (3.1712)	Entropy 1.53367 (1.53882)	Top-1 acc 50.000 (48.286)	Top-5 acc 76.172 (71.627)	lr 0.02211
Train [27][1670/3239]	Time 0.210 (0.548)	Data Time 0.001 (0.020)	Loss 3.1262 (3.1713)	Entropy 1.53361 (1.53879)	Top-1 acc 47.266 (48.283)	Top-5 acc 71.484 (71.624)	lr 0.02211
Train [27][1680/3239]	Time 0.203 (0.547)	Data Time 0.001 (0.020)	Loss 3.1279 (3.1717)	Entropy 1.53353 (1.53875)	Top-1 acc 51.172 (48.269)	Top-5 acc 74.219 (71.617)	lr 0.02211
Train [27][1690/3239]	Time 0.197 (0.547)	Data Time 0.001 (0.020)	Loss 2.9690 (3.1709)	Entropy 1.53351 (1.53872)	Top-1 acc 53.125 (48.282)	Top-5 acc 75.000 (71.632)	lr 0.02211
Train [27][1700/3239]	Time 0.215 (0.546)	Data Time 0.001 (0.020)	Loss 3.2438 (3.1708)	Entropy 1.53337 (1.53869)	Top-1 acc 44.922 (48.286)	Top-5 acc 71.875 (71.638)	lr 0.02211
Train [27][1710/3239]	Time 0.199 (0.546)	Data Time 0.001 (0.020)	Loss 3.0497 (3.1705)	Entropy 1.53329 (1.53866)	Top-1 acc 51.172 (48.293)	Top-5 acc 71.875 (71.640)	lr 0.02210
Train [27][1720/3239]	Time 0.209 (0.545)	Data Time 0.001 (0.020)	Loss 3.1964 (3.1708)	Entropy 1.53329 (1.53863)	Top-1 acc 47.656 (48.285)	Top-5 acc 71.484 (71.635)	lr 0.02210
Train [27][1730/3239]	Time 0.268 (0.567)	Data Time 0.003 (0.020)	Loss 3.0537 (3.1707)	Entropy 1.53327 (1.53860)	Top-1 acc 49.219 (48.282)	Top-5 acc 73.828 (71.633)	lr 0.02210
Train [27][1740/3239]	Time 0.226 (0.567)	Data Time 0.002 (0.020)	Loss 3.2218 (3.1707)	Entropy 1.53319 (1.53857)	Top-1 acc 46.484 (48.281)	Top-5 acc 69.922 (71.632)	lr 0.02210
Train [27][1750/3239]	Time 0.236 (0.567)	Data Time 0.002 (0.019)	Loss 3.2049 (3.1706)	Entropy 1.53319 (1.53854)	Top-1 acc 50.391 (48.290)	Top-5 acc 71.094 (71.628)	lr 0.02210
Train [27][1760/3239]	Time 0.244 (0.566)	Data Time 0.001 (0.019)	Loss 3.2104 (3.1704)	Entropy 1.53315 (1.53851)	Top-1 acc 47.656 (48.294)	Top-5 acc 71.094 (71.632)	lr 0.02210
Train [27][1770/3239]	Time 2.522 (0.566)	Data Time 0.002 (0.019)	Loss 2.9817 (3.1705)	Entropy 1.53315 (1.53848)	Top-1 acc 52.344 (48.294)	Top-5 acc 77.344 (71.630)	lr 0.02210
Train [27][1780/3239]	Time 0.225 (0.564)	Data Time 0.001 (0.019)	Loss 3.3418 (3.1703)	Entropy 1.53303 (1.53845)	Top-1 acc 46.094 (48.296)	Top-5 acc 68.359 (71.635)	lr 0.02210
Train [27][1790/3239]	Time 0.207 (0.563)	Data Time 0.001 (0.019)	Loss 3.2814 (3.1707)	Entropy 1.53286 (1.53841)	Top-1 acc 42.188 (48.278)	Top-5 acc 68.359 (71.627)	lr 0.02210
Train [27][1800/3239]	Time 0.226 (0.562)	Data Time 0.001 (0.019)	Loss 3.0469 (3.1704)	Entropy 1.53278 (1.53838)	Top-1 acc 50.000 (48.284)	Top-5 acc 74.219 (71.634)	lr 0.02210
Train [27][1810/3239]	Time 0.221 (0.562)	Data Time 0.001 (0.019)	Loss 3.1183 (3.1701)	Entropy 1.53276 (1.53835)	Top-1 acc 47.266 (48.285)	Top-5 acc 73.438 (71.639)	lr 0.02210
Train [27][1820/3239]	Time 0.341 (0.561)	Data Time 0.003 (0.019)	Loss 3.1226 (3.1700)	Entropy 1.53267 (1.53832)	Top-1 acc 50.000 (48.283)	Top-5 acc 71.484 (71.642)	lr 0.02210
Train [27][1830/3239]	Time 0.282 (0.561)	Data Time 0.001 (0.019)	Loss 2.9928 (3.1702)	Entropy 1.53258 (1.53829)	Top-1 acc 51.562 (48.281)	Top-5 acc 76.562 (71.637)	lr 0.02210
Train [27][1840/3239]	Time 0.208 (0.560)	Data Time 0.001 (0.019)	Loss 3.0185 (3.1705)	Entropy 1.53244 (1.53826)	Top-1 acc 51.172 (48.271)	Top-5 acc 73.438 (71.629)	lr 0.02210
Train [27][1850/3239]	Time 0.209 (0.559)	Data Time 0.001 (0.018)	Loss 3.1793 (3.1704)	Entropy 1.53230 (1.53823)	Top-1 acc 46.094 (48.273)	Top-5 acc 71.875 (71.631)	lr 0.02210
Train [27][1860/3239]	Time 0.260 (0.559)	Data Time 0.001 (0.018)	Loss 3.2274 (3.1706)	Entropy 1.53223 (1.53820)	Top-1 acc 48.828 (48.268)	Top-5 acc 69.922 (71.623)	lr 0.02210
Train [27][1870/3239]	Time 0.201 (0.558)	Data Time 0.001 (0.018)	Loss 3.2420 (3.1705)	Entropy 1.53215 (1.53816)	Top-1 acc 46.484 (48.271)	Top-5 acc 70.312 (71.623)	lr 0.02209
Train [27][1880/3239]	Time 2.501 (0.558)	Data Time 0.001 (0.018)	Loss 3.1950 (3.1705)	Entropy 1.53215 (1.53813)	Top-1 acc 55.859 (48.276)	Top-5 acc 71.484 (71.623)	lr 0.02209
Train [27][1890/3239]	Time 0.212 (0.556)	Data Time 0.001 (0.018)	Loss 3.1937 (3.1704)	Entropy 1.53210 (1.53810)	Top-1 acc 47.656 (48.283)	Top-5 acc 76.172 (71.628)	lr 0.02209
Train [27][1900/3239]	Time 0.222 (0.555)	Data Time 0.001 (0.018)	Loss 3.2144 (3.1709)	Entropy 1.53212 (1.53807)	Top-1 acc 46.875 (48.277)	Top-5 acc 72.656 (71.617)	lr 0.02209
Train [27][1910/3239]	Time 0.227 (0.555)	Data Time 0.001 (0.018)	Loss 3.1276 (3.1709)	Entropy 1.53231 (1.53804)	Top-1 acc 50.000 (48.282)	Top-5 acc 73.047 (71.613)	lr 0.02209
Train [27][1920/3239]	Time 0.233 (0.554)	Data Time 0.001 (0.018)	Loss 3.2387 (3.1711)	Entropy 1.53212 (1.53801)	Top-1 acc 48.828 (48.273)	Top-5 acc 69.922 (71.607)	lr 0.02209
Train [27][1930/3239]	Time 0.281 (0.554)	Data Time 0.001 (0.018)	Loss 3.0996 (3.1710)	Entropy 1.53209 (1.53798)	Top-1 acc 48.438 (48.269)	Top-5 acc 73.438 (71.613)	lr 0.02209
Train [27][1940/3239]	Time 0.215 (0.553)	Data Time 0.001 (0.018)	Loss 3.2468 (3.1712)	Entropy 1.53203 (1.53795)	Top-1 acc 50.000 (48.266)	Top-5 acc 69.922 (71.613)	lr 0.02209
Train [27][1950/3239]	Time 0.215 (0.553)	Data Time 0.001 (0.018)	Loss 2.9921 (3.1715)	Entropy 1.53187 (1.53792)	Top-1 acc 55.859 (48.265)	Top-5 acc 75.000 (71.611)	lr 0.02209
Train [27][1960/3239]	Time 0.202 (0.552)	Data Time 0.002 (0.018)	Loss 3.1334 (3.1712)	Entropy 1.53182 (1.53788)	Top-1 acc 50.781 (48.276)	Top-5 acc 72.266 (71.616)	lr 0.02209
Train [27][1970/3239]	Time 0.205 (0.552)	Data Time 0.001 (0.017)	Loss 3.2079 (3.1714)	Entropy 1.53171 (1.53785)	Top-1 acc 43.359 (48.267)	Top-5 acc 69.531 (71.610)	lr 0.02209
Train [27][1980/3239]	Time 0.258 (0.551)	Data Time 0.001 (0.017)	Loss 3.1251 (3.1714)	Entropy 1.53161 (1.53782)	Top-1 acc 50.391 (48.272)	Top-5 acc 70.312 (71.602)	lr 0.02209
Train [27][1990/3239]	Time 2.441 (0.551)	Data Time 0.001 (0.017)	Loss 3.0693 (3.1714)	Entropy 1.53161 (1.53779)	Top-1 acc 51.562 (48.274)	Top-5 acc 73.828 (71.597)	lr 0.02209
Train [27][2000/3239]	Time 0.201 (0.549)	Data Time 0.001 (0.017)	Loss 3.1793 (3.1715)	Entropy 1.53155 (1.53776)	Top-1 acc 45.312 (48.271)	Top-5 acc 72.266 (71.597)	lr 0.02209
Train [27][2010/3239]	Time 0.214 (0.548)	Data Time 0.001 (0.017)	Loss 3.1011 (3.1716)	Entropy 1.53153 (1.53773)	Top-1 acc 53.125 (48.267)	Top-5 acc 73.047 (71.593)	lr 0.02209
Train [27][2020/3239]	Time 0.228 (0.548)	Data Time 0.001 (0.017)	Loss 3.1024 (3.1716)	Entropy 1.53142 (1.53770)	Top-1 acc 51.953 (48.267)	Top-5 acc 69.531 (71.592)	lr 0.02208
Train [27][2030/3239]	Time 0.227 (0.547)	Data Time 0.001 (0.017)	Loss 3.1237 (3.1719)	Entropy 1.53136 (1.53767)	Top-1 acc 49.609 (48.264)	Top-5 acc 73.047 (71.586)	lr 0.02208
Train [27][2040/3239]	Time 0.217 (0.547)	Data Time 0.001 (0.017)	Loss 3.3245 (3.1724)	Entropy 1.53119 (1.53764)	Top-1 acc 43.359 (48.253)	Top-5 acc 66.797 (71.571)	lr 0.02208
Train [27][2050/3239]	Time 0.228 (0.546)	Data Time 0.001 (0.017)	Loss 3.2022 (3.1726)	Entropy 1.53116 (1.53760)	Top-1 acc 46.484 (48.250)	Top-5 acc 72.656 (71.570)	lr 0.02208
Train [27][2060/3239]	Time 0.241 (0.546)	Data Time 0.001 (0.017)	Loss 2.8097 (3.1723)	Entropy 1.53109 (1.53757)	Top-1 acc 57.031 (48.257)	Top-5 acc 78.125 (71.574)	lr 0.02208
Train [27][2070/3239]	Time 0.220 (0.545)	Data Time 0.001 (0.017)	Loss 3.0628 (3.1724)	Entropy 1.53105 (1.53754)	Top-1 acc 51.562 (48.256)	Top-5 acc 72.656 (71.572)	lr 0.02208
Train [27][2080/3239]	Time 0.211 (0.545)	Data Time 0.001 (0.017)	Loss 3.1957 (3.1725)	Entropy 1.53092 (1.53751)	Top-1 acc 43.750 (48.254)	Top-5 acc 70.312 (71.574)	lr 0.02208
Train [27][2090/3239]	Time 0.292 (0.564)	Data Time 0.004 (0.017)	Loss 3.1819 (3.1724)	Entropy 1.53078 (1.53748)	Top-1 acc 47.656 (48.253)	Top-5 acc 73.047 (71.574)	lr 0.02208
Train [27][2100/3239]	Time 2.464 (0.564)	Data Time 0.003 (0.017)	Loss 3.4301 (3.1725)	Entropy 1.53078 (1.53745)	Top-1 acc 42.578 (48.253)	Top-5 acc 66.406 (71.575)	lr 0.02208
Train [27][2110/3239]	Time 0.255 (0.562)	Data Time 0.002 (0.016)	Loss 3.0054 (3.1723)	Entropy 1.53074 (1.53742)	Top-1 acc 51.172 (48.256)	Top-5 acc 75.000 (71.579)	lr 0.02208
Train [27][2120/3239]	Time 0.214 (0.562)	Data Time 0.002 (0.016)	Loss 3.1557 (3.1722)	Entropy 1.53068 (1.53738)	Top-1 acc 51.562 (48.263)	Top-5 acc 73.047 (71.585)	lr 0.02208
Train [27][2130/3239]	Time 0.251 (0.561)	Data Time 0.002 (0.016)	Loss 3.2114 (3.1724)	Entropy 1.53064 (1.53735)	Top-1 acc 46.094 (48.259)	Top-5 acc 72.656 (71.583)	lr 0.02208
Train [27][2140/3239]	Time 0.210 (0.561)	Data Time 0.001 (0.016)	Loss 3.5662 (3.1722)	Entropy 1.53064 (1.53732)	Top-1 acc 37.500 (48.266)	Top-5 acc 66.406 (71.589)	lr 0.02208
Train [27][2150/3239]	Time 0.205 (0.560)	Data Time 0.001 (0.016)	Loss 3.2737 (3.1723)	Entropy 1.53061 (1.53729)	Top-1 acc 46.094 (48.262)	Top-5 acc 69.922 (71.586)	lr 0.02208
Train [27][2160/3239]	Time 0.319 (0.560)	Data Time 0.002 (0.016)	Loss 3.2582 (3.1724)	Entropy 1.53059 (1.53726)	Top-1 acc 44.141 (48.258)	Top-5 acc 71.484 (71.583)	lr 0.02208
Train [27][2170/3239]	Time 0.223 (0.559)	Data Time 0.001 (0.016)	Loss 3.3662 (3.1726)	Entropy 1.53055 (1.53723)	Top-1 acc 37.500 (48.254)	Top-5 acc 69.141 (71.582)	lr 0.02207
Train [27][2180/3239]	Time 0.205 (0.559)	Data Time 0.001 (0.016)	Loss 3.2487 (3.1725)	Entropy 1.53050 (1.53720)	Top-1 acc 49.609 (48.252)	Top-5 acc 70.312 (71.585)	lr 0.02207
Train [27][2190/3239]	Time 0.184 (0.558)	Data Time 0.001 (0.016)	Loss 3.0599 (3.1727)	Entropy 1.53044 (1.53717)	Top-1 acc 51.172 (48.245)	Top-5 acc 72.656 (71.577)	lr 0.02207
Train [27][2200/3239]	Time 0.205 (0.558)	Data Time 0.001 (0.016)	Loss 3.2363 (3.1727)	Entropy 1.53033 (1.53714)	Top-1 acc 48.828 (48.252)	Top-5 acc 71.484 (71.573)	lr 0.02207
Train [27][2210/3239]	Time 2.389 (0.557)	Data Time 0.001 (0.016)	Loss 3.0286 (3.1724)	Entropy 1.53033 (1.53710)	Top-1 acc 52.344 (48.258)	Top-5 acc 73.047 (71.581)	lr 0.02207
Train [27][2220/3239]	Time 0.303 (0.556)	Data Time 0.001 (0.016)	Loss 3.2880 (3.1726)	Entropy 1.53023 (1.53707)	Top-1 acc 39.453 (48.252)	Top-5 acc 66.797 (71.578)	lr 0.02207
Train [27][2230/3239]	Time 0.214 (0.555)	Data Time 0.001 (0.016)	Loss 3.0948 (3.1724)	Entropy 1.53018 (1.53704)	Top-1 acc 47.266 (48.249)	Top-5 acc 75.000 (71.581)	lr 0.02207
Train [27][2240/3239]	Time 0.164 (0.555)	Data Time 0.001 (0.016)	Loss 3.2406 (3.1724)	Entropy 1.53019 (1.53701)	Top-1 acc 47.656 (48.248)	Top-5 acc 66.797 (71.581)	lr 0.02207
Train [27][2250/3239]	Time 0.210 (0.554)	Data Time 0.001 (0.016)	Loss 3.1508 (3.1724)	Entropy 1.53015 (1.53698)	Top-1 acc 50.000 (48.246)	Top-5 acc 73.047 (71.583)	lr 0.02207
Train [27][2260/3239]	Time 0.230 (0.554)	Data Time 0.001 (0.016)	Loss 3.0132 (3.1721)	Entropy 1.53016 (1.53695)	Top-1 acc 49.219 (48.249)	Top-5 acc 75.000 (71.585)	lr 0.02207
Train [27][2270/3239]	Time 0.221 (0.553)	Data Time 0.001 (0.015)	Loss 3.1806 (3.1722)	Entropy 1.53016 (1.53692)	Top-1 acc 48.828 (48.250)	Top-5 acc 68.750 (71.579)	lr 0.02207
Train [27][2280/3239]	Time 0.297 (0.553)	Data Time 0.001 (0.015)	Loss 3.0053 (3.1719)	Entropy 1.53012 (1.53689)	Top-1 acc 51.172 (48.259)	Top-5 acc 74.609 (71.585)	lr 0.02207
Train [27][2290/3239]	Time 0.204 (0.552)	Data Time 0.001 (0.015)	Loss 2.9843 (3.1717)	Entropy 1.53004 (1.53686)	Top-1 acc 50.391 (48.256)	Top-5 acc 74.609 (71.588)	lr 0.02207
Train [27][2300/3239]	Time 0.224 (0.552)	Data Time 0.002 (0.015)	Loss 3.3245 (3.1716)	Entropy 1.52969 (1.53683)	Top-1 acc 44.531 (48.261)	Top-5 acc 69.531 (71.591)	lr 0.02207
Train [27][2310/3239]	Time 0.239 (0.551)	Data Time 0.001 (0.015)	Loss 3.2315 (3.1715)	Entropy 1.52959 (1.53680)	Top-1 acc 45.312 (48.263)	Top-5 acc 72.656 (71.589)	lr 0.02207
Train [27][2320/3239]	Time 2.318 (0.551)	Data Time 0.001 (0.015)	Loss 3.2245 (3.1715)	Entropy 1.52959 (1.53677)	Top-1 acc 46.484 (48.257)	Top-5 acc 71.094 (71.592)	lr 0.02207
Train [27][2330/3239]	Time 0.236 (0.549)	Data Time 0.001 (0.015)	Loss 3.3759 (3.1720)	Entropy 1.52956 (1.53674)	Top-1 acc 40.625 (48.240)	Top-5 acc 68.750 (71.582)	lr 0.02206
Train [27][2340/3239]	Time 0.198 (0.549)	Data Time 0.001 (0.015)	Loss 3.9052 (3.1722)	Entropy 1.52951 (1.53671)	Top-1 acc 31.250 (48.236)	Top-5 acc 58.594 (71.582)	lr 0.02206
Train [27][2350/3239]	Time 0.214 (0.548)	Data Time 0.001 (0.015)	Loss 3.0540 (3.1722)	Entropy 1.52939 (1.53668)	Top-1 acc 49.219 (48.235)	Top-5 acc 76.953 (71.585)	lr 0.02206
Train [27][2360/3239]	Time 0.254 (0.548)	Data Time 0.001 (0.015)	Loss 3.2755 (3.1725)	Entropy 1.52947 (1.53665)	Top-1 acc 45.312 (48.226)	Top-5 acc 71.484 (71.580)	lr 0.02206
Train [27][2370/3239]	Time 0.199 (0.548)	Data Time 0.001 (0.015)	Loss 3.1221 (3.1727)	Entropy 1.52942 (1.53662)	Top-1 acc 47.656 (48.222)	Top-5 acc 73.828 (71.575)	lr 0.02206
Train [27][2380/3239]	Time 0.243 (0.547)	Data Time 0.001 (0.015)	Loss 3.4753 (3.1731)	Entropy 1.52940 (1.53659)	Top-1 acc 45.312 (48.217)	Top-5 acc 62.891 (71.570)	lr 0.02206
Train [27][2390/3239]	Time 0.317 (0.547)	Data Time 0.001 (0.015)	Loss 3.5359 (3.1733)	Entropy 1.52937 (1.53656)	Top-1 acc 37.891 (48.213)	Top-5 acc 60.547 (71.559)	lr 0.02206
Train [27][2400/3239]	Time 0.221 (0.546)	Data Time 0.001 (0.015)	Loss 3.1718 (3.1732)	Entropy 1.52929 (1.53653)	Top-1 acc 49.219 (48.210)	Top-5 acc 68.359 (71.560)	lr 0.02206
Train [27][2410/3239]	Time 0.171 (0.546)	Data Time 0.001 (0.015)	Loss 3.1188 (3.1736)	Entropy 1.52921 (1.53650)	Top-1 acc 48.438 (48.203)	Top-5 acc 73.438 (71.553)	lr 0.02206
Train [27][2420/3239]	Time 0.210 (0.545)	Data Time 0.001 (0.015)	Loss 3.3245 (3.1737)	Entropy 1.52891 (1.53647)	Top-1 acc 46.094 (48.203)	Top-5 acc 68.359 (71.550)	lr 0.02206
Train [27][2430/3239]	Time 2.336 (0.545)	Data Time 0.001 (0.015)	Loss 3.1947 (3.1739)	Entropy 1.52891 (1.53644)	Top-1 acc 46.484 (48.202)	Top-5 acc 69.531 (71.546)	lr 0.02206
Train [27][2440/3239]	Time 0.221 (0.544)	Data Time 0.001 (0.015)	Loss 3.0847 (3.1739)	Entropy 1.52890 (1.53640)	Top-1 acc 51.953 (48.200)	Top-5 acc 72.266 (71.542)	lr 0.02206
Train [27][2450/3239]	Time 0.381 (0.543)	Data Time 0.001 (0.014)	Loss 3.0606 (3.1738)	Entropy 1.52880 (1.53637)	Top-1 acc 48.828 (48.208)	Top-5 acc 75.000 (71.546)	lr 0.02206
Train [27][2460/3239]	Time 0.265 (0.559)	Data Time 0.002 (0.014)	Loss 3.1607 (3.1737)	Entropy 1.52877 (1.53634)	Top-1 acc 54.688 (48.210)	Top-5 acc 73.438 (71.547)	lr 0.02206
Train [27][2470/3239]	Time 0.202 (0.559)	Data Time 0.002 (0.014)	Loss 3.2279 (3.1740)	Entropy 1.52865 (1.53631)	Top-1 acc 46.094 (48.203)	Top-5 acc 70.312 (71.539)	lr 0.02206
Train [27][2480/3239]	Time 0.231 (0.558)	Data Time 0.001 (0.014)	Loss 3.2044 (3.1740)	Entropy 1.52865 (1.53628)	Top-1 acc 51.172 (48.208)	Top-5 acc 71.484 (71.542)	lr 0.02205
Train [27][2490/3239]	Time 0.212 (0.558)	Data Time 0.001 (0.014)	Loss 2.9740 (3.1740)	Entropy 1.52861 (1.53625)	Top-1 acc 56.641 (48.207)	Top-5 acc 76.953 (71.540)	lr 0.02205
Train [27][2500/3239]	Time 0.322 (0.557)	Data Time 0.001 (0.014)	Loss 3.3535 (3.1741)	Entropy 1.52852 (1.53622)	Top-1 acc 43.750 (48.206)	Top-5 acc 65.625 (71.538)	lr 0.02205
Train [27][2510/3239]	Time 0.221 (0.557)	Data Time 0.001 (0.014)	Loss 3.4177 (3.1742)	Entropy 1.52849 (1.53619)	Top-1 acc 46.094 (48.203)	Top-5 acc 66.016 (71.537)	lr 0.02205
Train [27][2520/3239]	Time 0.225 (0.557)	Data Time 0.001 (0.014)	Loss 3.1204 (3.1739)	Entropy 1.52845 (1.53616)	Top-1 acc 45.312 (48.207)	Top-5 acc 73.828 (71.544)	lr 0.02205
Train [27][2530/3239]	Time 0.240 (0.556)	Data Time 0.001 (0.014)	Loss 3.0892 (3.1737)	Entropy 1.52845 (1.53613)	Top-1 acc 52.344 (48.210)	Top-5 acc 72.656 (71.548)	lr 0.02205
Train [27][2540/3239]	Time 2.346 (0.556)	Data Time 0.001 (0.014)	Loss 3.3558 (3.1740)	Entropy 1.52845 (1.53610)	Top-1 acc 42.969 (48.204)	Top-5 acc 67.188 (71.546)	lr 0.02205
Train [27][2550/3239]	Time 0.220 (0.554)	Data Time 0.001 (0.014)	Loss 3.2932 (3.1741)	Entropy 1.52843 (1.53607)	Top-1 acc 44.922 (48.197)	Top-5 acc 68.359 (71.544)	lr 0.02205
Train [27][2560/3239]	Time 0.238 (0.554)	Data Time 0.002 (0.014)	Loss 3.0548 (3.1741)	Entropy 1.52838 (1.53604)	Top-1 acc 53.125 (48.198)	Top-5 acc 71.484 (71.545)	lr 0.02205
Train [27][2570/3239]	Time 0.206 (0.553)	Data Time 0.001 (0.014)	Loss 3.1241 (3.1741)	Entropy 1.52822 (1.53601)	Top-1 acc 51.953 (48.200)	Top-5 acc 72.266 (71.547)	lr 0.02205
Train [27][2580/3239]	Time 0.209 (0.553)	Data Time 0.001 (0.014)	Loss 2.9724 (3.1740)	Entropy 1.52816 (1.53598)	Top-1 acc 50.781 (48.202)	Top-5 acc 76.953 (71.548)	lr 0.02205
Train [27][2590/3239]	Time 0.195 (0.553)	Data Time 0.001 (0.014)	Loss 3.1762 (3.1743)	Entropy 1.52811 (1.53595)	Top-1 acc 44.922 (48.198)	Top-5 acc 71.875 (71.542)	lr 0.02205
Train [27][2600/3239]	Time 0.212 (0.552)	Data Time 0.001 (0.014)	Loss 3.0787 (3.1743)	Entropy 1.52796 (1.53592)	Top-1 acc 52.734 (48.201)	Top-5 acc 74.609 (71.539)	lr 0.02205
Train [27][2610/3239]	Time 0.222 (0.552)	Data Time 0.001 (0.014)	Loss 3.2163 (3.1744)	Entropy 1.52784 (1.53589)	Top-1 acc 49.609 (48.202)	Top-5 acc 73.047 (71.535)	lr 0.02205
Train [27][2620/3239]	Time 0.214 (0.551)	Data Time 0.001 (0.014)	Loss 3.0849 (3.1743)	Entropy 1.52782 (1.53585)	Top-1 acc 49.609 (48.202)	Top-5 acc 74.219 (71.539)	lr 0.02205
Train [27][2630/3239]	Time 0.212 (0.551)	Data Time 0.001 (0.014)	Loss 3.0997 (3.1743)	Entropy 1.52775 (1.53582)	Top-1 acc 46.484 (48.202)	Top-5 acc 72.656 (71.540)	lr 0.02204
Train [27][2640/3239]	Time 0.260 (0.551)	Data Time 0.001 (0.014)	Loss 3.0355 (3.1742)	Entropy 1.52757 (1.53579)	Top-1 acc 51.953 (48.202)	Top-5 acc 75.781 (71.542)	lr 0.02204
Train [27][2650/3239]	Time 0.280 (0.550)	Data Time 0.001 (0.014)	Loss 3.3482 (3.1744)	Entropy 1.52742 (1.53576)	Top-1 acc 42.969 (48.200)	Top-5 acc 67.188 (71.538)	lr 0.02204
Train [27][2660/3239]	Time 0.256 (0.550)	Data Time 0.001 (0.014)	Loss 3.2257 (3.1743)	Entropy 1.52738 (1.53573)	Top-1 acc 48.438 (48.203)	Top-5 acc 70.312 (71.540)	lr 0.02204
Train [27][2670/3239]	Time 0.232 (0.549)	Data Time 0.001 (0.013)	Loss 3.2692 (3.1745)	Entropy 1.52735 (1.53570)	Top-1 acc 45.703 (48.201)	Top-5 acc 69.922 (71.536)	lr 0.02204
Train [27][2680/3239]	Time 0.351 (0.549)	Data Time 0.001 (0.013)	Loss 3.1204 (3.1744)	Entropy 1.52736 (1.53567)	Top-1 acc 47.656 (48.202)	Top-5 acc 75.391 (71.539)	lr 0.02204
Train [27][2690/3239]	Time 0.202 (0.549)	Data Time 0.001 (0.013)	Loss 3.1148 (3.1742)	Entropy 1.52736 (1.53564)	Top-1 acc 50.000 (48.209)	Top-5 acc 76.172 (71.544)	lr 0.02204
Train [27][2700/3239]	Time 0.234 (0.548)	Data Time 0.001 (0.013)	Loss 3.0742 (3.1739)	Entropy 1.52732 (1.53561)	Top-1 acc 52.734 (48.219)	Top-5 acc 72.266 (71.549)	lr 0.02204
Train [27][2710/3239]	Time 0.249 (0.548)	Data Time 0.001 (0.013)	Loss 3.1332 (3.1740)	Entropy 1.52772 (1.53558)	Top-1 acc 50.000 (48.216)	Top-5 acc 70.312 (71.546)	lr 0.02204
Train [27][2720/3239]	Time 0.249 (0.547)	Data Time 0.001 (0.013)	Loss 3.1055 (3.1741)	Entropy 1.52771 (1.53555)	Top-1 acc 53.125 (48.219)	Top-5 acc 73.047 (71.542)	lr 0.02204
Train [27][2730/3239]	Time 0.246 (0.547)	Data Time 0.001 (0.013)	Loss 3.2299 (3.1741)	Entropy 1.52769 (1.53552)	Top-1 acc 46.875 (48.219)	Top-5 acc 70.703 (71.544)	lr 0.02204
Train [27][2740/3239]	Time 0.319 (0.547)	Data Time 0.002 (0.013)	Loss 3.1776 (3.1739)	Entropy 1.52764 (1.53549)	Top-1 acc 45.312 (48.227)	Top-5 acc 69.531 (71.547)	lr 0.02204
Train [27][2750/3239]	Time 0.216 (0.546)	Data Time 0.001 (0.013)	Loss 3.0953 (3.1739)	Entropy 1.52762 (1.53546)	Top-1 acc 50.391 (48.231)	Top-5 acc 69.922 (71.547)	lr 0.02204
Train [27][2760/3239]	Time 0.239 (0.546)	Data Time 0.001 (0.013)	Loss 3.2582 (3.1740)	Entropy 1.52755 (1.53543)	Top-1 acc 46.875 (48.228)	Top-5 acc 67.969 (71.544)	lr 0.02204
Train [27][2770/3239]	Time 0.217 (0.545)	Data Time 0.001 (0.013)	Loss 3.2454 (3.1742)	Entropy 1.52746 (1.53540)	Top-1 acc 46.875 (48.224)	Top-5 acc 69.141 (71.539)	lr 0.02204
Train [27][2780/3239]	Time 0.244 (0.545)	Data Time 0.001 (0.013)	Loss 3.3401 (3.1743)	Entropy 1.52732 (1.53538)	Top-1 acc 40.234 (48.220)	Top-5 acc 67.188 (71.535)	lr 0.02204
Train [27][2790/3239]	Time 0.228 (0.545)	Data Time 0.001 (0.013)	Loss 3.0629 (3.1744)	Entropy 1.52714 (1.53535)	Top-1 acc 46.094 (48.215)	Top-5 acc 75.781 (71.532)	lr 0.02203
Train [27][2800/3239]	Time 0.331 (0.558)	Data Time 0.004 (0.013)	Loss 3.1255 (3.1744)	Entropy 1.52707 (1.53532)	Top-1 acc 49.219 (48.215)	Top-5 acc 74.609 (71.533)	lr 0.02203
Train [27][2810/3239]	Time 0.249 (0.558)	Data Time 0.002 (0.013)	Loss 3.1045 (3.1745)	Entropy 1.52705 (1.53529)	Top-1 acc 50.000 (48.211)	Top-5 acc 74.609 (71.529)	lr 0.02203
Train [27][2820/3239]	Time 0.264 (0.557)	Data Time 0.001 (0.013)	Loss 3.1271 (3.1745)	Entropy 1.52694 (1.53526)	Top-1 acc 49.609 (48.216)	Top-5 acc 74.609 (71.531)	lr 0.02203
Train [27][2830/3239]	Time 0.252 (0.557)	Data Time 0.002 (0.013)	Loss 3.0571 (3.1744)	Entropy 1.52691 (1.53523)	Top-1 acc 50.391 (48.218)	Top-5 acc 73.438 (71.534)	lr 0.02203
Train [27][2840/3239]	Time 0.261 (0.557)	Data Time 0.002 (0.013)	Loss 3.1912 (3.1742)	Entropy 1.52684 (1.53520)	Top-1 acc 47.266 (48.220)	Top-5 acc 72.266 (71.535)	lr 0.02203
Train [27][2850/3239]	Time 0.423 (0.556)	Data Time 0.002 (0.013)	Loss 3.1514 (3.1744)	Entropy 1.52685 (1.53517)	Top-1 acc 49.609 (48.218)	Top-5 acc 72.656 (71.532)	lr 0.02203
Train [27][2860/3239]	Time 0.287 (0.556)	Data Time 0.002 (0.013)	Loss 3.0214 (3.1743)	Entropy 1.52683 (1.53514)	Top-1 acc 52.344 (48.220)	Top-5 acc 75.000 (71.533)	lr 0.02203
Train [27][2870/3239]	Time 0.282 (0.556)	Data Time 0.001 (0.013)	Loss 3.2333 (3.1742)	Entropy 1.52674 (1.53511)	Top-1 acc 46.094 (48.219)	Top-5 acc 70.703 (71.534)	lr 0.02203
Train [27][2880/3239]	Time 0.187 (0.556)	Data Time 0.001 (0.013)	Loss 3.5038 (3.1744)	Entropy 1.52658 (1.53508)	Top-1 acc 40.234 (48.213)	Top-5 acc 66.797 (71.533)	lr 0.02203
Train [27][2890/3239]	Time 0.262 (0.555)	Data Time 0.001 (0.013)	Loss 3.1179 (3.1745)	Entropy 1.52656 (1.53505)	Top-1 acc 47.656 (48.213)	Top-5 acc 72.266 (71.531)	lr 0.02203
Train [27][2900/3239]	Time 0.165 (0.555)	Data Time 0.001 (0.013)	Loss 3.1580 (3.1744)	Entropy 1.52637 (1.53503)	Top-1 acc 47.656 (48.217)	Top-5 acc 71.094 (71.533)	lr 0.02203
Train [27][2910/3239]	Time 0.238 (0.554)	Data Time 0.001 (0.013)	Loss 3.2578 (3.1744)	Entropy 1.52629 (1.53500)	Top-1 acc 42.188 (48.211)	Top-5 acc 73.047 (71.534)	lr 0.02203
Train [27][2920/3239]	Time 0.251 (0.554)	Data Time 0.001 (0.012)	Loss 3.2841 (3.1744)	Entropy 1.52628 (1.53497)	Top-1 acc 49.219 (48.215)	Top-5 acc 67.969 (71.536)	lr 0.02203
Train [27][2930/3239]	Time 0.204 (0.554)	Data Time 0.001 (0.012)	Loss 3.3821 (3.1743)	Entropy 1.52616 (1.53494)	Top-1 acc 43.359 (48.223)	Top-5 acc 66.797 (71.538)	lr 0.02203
Train [27][2940/3239]	Time 0.247 (0.553)	Data Time 0.001 (0.012)	Loss 3.1927 (3.1741)	Entropy 1.52628 (1.53491)	Top-1 acc 46.484 (48.225)	Top-5 acc 73.047 (71.542)	lr 0.02202
Train [27][2950/3239]	Time 0.207 (0.553)	Data Time 0.001 (0.012)	Loss 3.3707 (3.1742)	Entropy 1.52625 (1.53488)	Top-1 acc 42.969 (48.222)	Top-5 acc 66.406 (71.541)	lr 0.02202
Train [27][2960/3239]	Time 0.264 (0.553)	Data Time 0.001 (0.012)	Loss 3.0395 (3.1742)	Entropy 1.52629 (1.53485)	Top-1 acc 49.219 (48.223)	Top-5 acc 75.000 (71.543)	lr 0.02202
Train [27][2970/3239]	Time 0.334 (0.552)	Data Time 0.002 (0.012)	Loss 3.3272 (3.1744)	Entropy 1.52633 (1.53482)	Top-1 acc 43.359 (48.219)	Top-5 acc 70.312 (71.539)	lr 0.02202
Train [27][2980/3239]	Time 0.272 (0.552)	Data Time 0.030 (0.012)	Loss 3.0552 (3.1745)	Entropy 1.52629 (1.53479)	Top-1 acc 51.953 (48.216)	Top-5 acc 73.438 (71.537)	lr 0.02202
Train [27][2990/3239]	Time 0.232 (0.551)	Data Time 0.001 (0.012)	Loss 2.9328 (3.1744)	Entropy 1.52603 (1.53476)	Top-1 acc 52.344 (48.219)	Top-5 acc 77.734 (71.541)	lr 0.02202
Train [27][3000/3239]	Time 0.286 (0.551)	Data Time 0.001 (0.012)	Loss 3.1610 (3.1746)	Entropy 1.52605 (1.53473)	Top-1 acc 46.094 (48.214)	Top-5 acc 71.094 (71.536)	lr 0.02202
Train [27][3010/3239]	Time 0.217 (0.551)	Data Time 0.001 (0.012)	Loss 3.1353 (3.1747)	Entropy 1.52595 (1.53470)	Top-1 acc 48.438 (48.210)	Top-5 acc 72.656 (71.538)	lr 0.02202
Train [27][3020/3239]	Time 0.303 (0.550)	Data Time 0.002 (0.012)	Loss 3.3063 (3.1748)	Entropy 1.52592 (1.53468)	Top-1 acc 46.875 (48.208)	Top-5 acc 73.047 (71.536)	lr 0.02202
Train [27][3030/3239]	Time 0.271 (0.550)	Data Time 0.001 (0.012)	Loss 3.1297 (3.1747)	Entropy 1.52591 (1.53465)	Top-1 acc 46.484 (48.209)	Top-5 acc 73.047 (71.538)	lr 0.02202
Train [27][3040/3239]	Time 0.209 (0.550)	Data Time 0.002 (0.012)	Loss 3.0203 (3.1744)	Entropy 1.52573 (1.53462)	Top-1 acc 50.000 (48.215)	Top-5 acc 72.656 (71.544)	lr 0.02202
Train [27][3050/3239]	Time 0.244 (0.549)	Data Time 0.004 (0.012)	Loss 3.2062 (3.1745)	Entropy 1.52565 (1.53459)	Top-1 acc 49.219 (48.211)	Top-5 acc 70.703 (71.544)	lr 0.02202
Train [27][3060/3239]	Time 0.209 (0.549)	Data Time 0.001 (0.012)	Loss 3.0290 (3.1744)	Entropy 1.52560 (1.53456)	Top-1 acc 53.125 (48.211)	Top-5 acc 75.000 (71.543)	lr 0.02202
Train [27][3070/3239]	Time 0.282 (0.549)	Data Time 0.002 (0.012)	Loss 3.2770 (3.1743)	Entropy 1.52559 (1.53453)	Top-1 acc 51.562 (48.212)	Top-5 acc 70.703 (71.546)	lr 0.02202
Train [27][3080/3239]	Time 0.253 (0.548)	Data Time 0.001 (0.012)	Loss 3.0712 (3.1742)	Entropy 1.52560 (1.53450)	Top-1 acc 50.781 (48.215)	Top-5 acc 73.438 (71.548)	lr 0.02202
Train [27][3090/3239]	Time 0.283 (0.548)	Data Time 0.001 (0.012)	Loss 3.1895 (3.1742)	Entropy 1.52558 (1.53447)	Top-1 acc 46.094 (48.213)	Top-5 acc 71.484 (71.550)	lr 0.02201
Train [27][3100/3239]	Time 0.310 (0.548)	Data Time 0.001 (0.012)	Loss 3.2256 (3.1741)	Entropy 1.52557 (1.53444)	Top-1 acc 44.531 (48.216)	Top-5 acc 68.750 (71.554)	lr 0.02201
Train [27][3110/3239]	Time 0.257 (0.547)	Data Time 0.001 (0.012)	Loss 3.2827 (3.1741)	Entropy 1.52560 (1.53441)	Top-1 acc 47.266 (48.216)	Top-5 acc 71.094 (71.554)	lr 0.02201
Train [27][3120/3239]	Time 0.175 (0.547)	Data Time 0.001 (0.012)	Loss 3.1892 (3.1741)	Entropy 1.52553 (1.53439)	Top-1 acc 49.609 (48.215)	Top-5 acc 71.484 (71.552)	lr 0.02201
Train [27][3130/3239]	Time 0.260 (0.560)	Data Time 0.004 (0.012)	Loss 3.4517 (3.1743)	Entropy 1.52545 (1.53436)	Top-1 acc 41.016 (48.208)	Top-5 acc 65.234 (71.544)	lr 0.02201
Train [27][3140/3239]	Time 0.231 (0.560)	Data Time 0.002 (0.012)	Loss 2.9230 (3.1742)	Entropy 1.52540 (1.53433)	Top-1 acc 51.953 (48.212)	Top-5 acc 77.344 (71.548)	lr 0.02201
Train [27][3150/3239]	Time 0.376 (0.559)	Data Time 0.002 (0.012)	Loss 3.1112 (3.1740)	Entropy 1.52531 (1.53430)	Top-1 acc 48.828 (48.216)	Top-5 acc 70.703 (71.552)	lr 0.02201
Train [27][3160/3239]	Time 0.179 (0.559)	Data Time 0.001 (0.012)	Loss 3.2626 (3.1741)	Entropy 1.52523 (1.53427)	Top-1 acc 44.141 (48.210)	Top-5 acc 69.922 (71.549)	lr 0.02201
Train [27][3170/3239]	Time 0.212 (0.559)	Data Time 0.001 (0.012)	Loss 3.3146 (3.1742)	Entropy 1.52518 (1.53424)	Top-1 acc 45.703 (48.207)	Top-5 acc 68.359 (71.547)	lr 0.02201
Train [27][3180/3239]	Time 0.214 (0.558)	Data Time 0.000 (0.012)	Loss 3.2092 (3.1741)	Entropy 1.52516 (1.53422)	Top-1 acc 46.484 (48.209)	Top-5 acc 70.312 (71.550)	lr 0.02201
Train [27][3190/3239]	Time 0.214 (0.558)	Data Time 0.000 (0.012)	Loss 3.1322 (3.1740)	Entropy 1.52513 (1.53419)	Top-1 acc 50.781 (48.210)	Top-5 acc 71.875 (71.552)	lr 0.02201
Train [27][3200/3239]	Time 0.228 (0.557)	Data Time 0.000 (0.012)	Loss 3.2055 (3.1741)	Entropy 1.52509 (1.53416)	Top-1 acc 48.438 (48.214)	Top-5 acc 72.266 (71.548)	lr 0.02201
Train [27][3210/3239]	Time 0.313 (0.557)	Data Time 0.000 (0.012)	Loss 3.1891 (3.1743)	Entropy 1.52496 (1.53413)	Top-1 acc 49.609 (48.208)	Top-5 acc 71.094 (71.543)	lr 0.02201
Train [27][3220/3239]	Time 0.202 (0.557)	Data Time 0.000 (0.012)	Loss 3.2907 (3.1743)	Entropy 1.52473 (1.53410)	Top-1 acc 46.094 (48.209)	Top-5 acc 68.359 (71.544)	lr 0.02201
Train [27][3230/3239]	Time 0.224 (0.556)	Data Time 0.000 (0.011)	Loss 3.1422 (3.1742)	Entropy 1.52470 (1.53407)	Top-1 acc 48.828 (48.211)	Top-5 acc 71.484 (71.547)	lr 0.02201
Train [27][3239/3239]	Time 2.124 (0.556)	Data Time 0.000 (0.011)	Loss 3.4329 (3.1743)	Entropy 1.52470 (1.53405)	Top-1 acc 38.272 (48.209)	Top-5 acc 69.136 (71.544)	lr 0.02201
==========Valid [27/120]	loss 1.968	top-1 acc 56.499 (56.499)	top-5 acc 79.202	Train top-1 48.209	top-5 71.544	Entropy 1.52470	Latency-None: 0.000ms	Flops: 559.63M
Train [28][0/3239]	Time 30.935 (30.935)	Data Time 30.291 (30.291)	Loss 3.1091 (3.1091)	Entropy 1.52452 (1.52452)	Top-1 acc 45.703 (45.703)	Top-5 acc 75.781 (75.781)	lr 0.02201
Train [28][10/3239]	Time 2.358 (3.307)	Data Time 0.002 (2.758)	Loss 3.1156 (3.1914)	Entropy 1.52452 (1.52452)	Top-1 acc 47.266 (47.266)	Top-5 acc 73.047 (71.520)	lr 0.02200
Train [28][20/3239]	Time 0.240 (1.846)	Data Time 0.001 (1.445)	Loss 3.0239 (3.1667)	Entropy 1.52453 (1.52453)	Top-1 acc 57.031 (47.954)	Top-5 acc 73.438 (71.819)	lr 0.02200
Train [28][30/3239]	Time 0.325 (1.394)	Data Time 0.001 (0.980)	Loss 3.0229 (3.1500)	Entropy 1.52445 (1.52450)	Top-1 acc 46.484 (47.984)	Top-5 acc 76.562 (72.165)	lr 0.02200
Train [28][40/3239]	Time 0.227 (1.159)	Data Time 0.001 (0.741)	Loss 3.2885 (3.1424)	Entropy 1.52439 (1.52448)	Top-1 acc 44.141 (48.171)	Top-5 acc 70.703 (72.285)	lr 0.02200
Train [28][50/3239]	Time 0.209 (1.020)	Data Time 0.001 (0.596)	Loss 3.2892 (3.1354)	Entropy 1.52430 (1.52445)	Top-1 acc 41.797 (48.407)	Top-5 acc 68.359 (72.197)	lr 0.02200
Train [28][60/3239]	Time 0.212 (0.926)	Data Time 0.001 (0.499)	Loss 3.3524 (3.1412)	Entropy 1.52422 (1.52442)	Top-1 acc 46.875 (48.431)	Top-5 acc 66.406 (71.875)	lr 0.02200
Train [28][70/3239]	Time 0.228 (0.863)	Data Time 0.001 (0.429)	Loss 3.1816 (3.1458)	Entropy 1.52416 (1.52438)	Top-1 acc 48.828 (48.388)	Top-5 acc 68.750 (71.853)	lr 0.02200
Train [28][80/3239]	Time 0.159 (0.811)	Data Time 0.001 (0.376)	Loss 3.1917 (3.1416)	Entropy 1.52404 (1.52435)	Top-1 acc 50.781 (48.736)	Top-5 acc 69.922 (72.010)	lr 0.02200
Train [28][90/3239]	Time 0.224 (0.770)	Data Time 0.001 (0.335)	Loss 3.0850 (3.1452)	Entropy 1.52400 (1.52432)	Top-1 acc 48.047 (48.747)	Top-5 acc 74.609 (71.909)	lr 0.02200
Train [28][100/3239]	Time 0.230 (0.738)	Data Time 0.001 (0.302)	Loss 3.0232 (3.1432)	Entropy 1.52393 (1.52428)	Top-1 acc 51.953 (48.867)	Top-5 acc 74.219 (71.968)	lr 0.02200
Train [28][110/3239]	Time 0.222 (0.711)	Data Time 0.001 (0.275)	Loss 3.2357 (3.1455)	Entropy 1.52387 (1.52425)	Top-1 acc 47.656 (48.807)	Top-5 acc 71.875 (71.998)	lr 0.02200
Train [28][120/3239]	Time 2.534 (0.690)	Data Time 0.001 (0.253)	Loss 3.0554 (3.1488)	Entropy 1.52387 (1.52422)	Top-1 acc 47.656 (48.728)	Top-5 acc 75.781 (72.036)	lr 0.02200
Train [28][130/3239]	Time 0.220 (0.655)	Data Time 0.001 (0.234)	Loss 3.2584 (3.1557)	Entropy 1.52374 (1.52418)	Top-1 acc 46.484 (48.613)	Top-5 acc 71.094 (71.950)	lr 0.02200
Train [28][140/3239]	Time 0.375 (0.642)	Data Time 0.001 (0.217)	Loss 3.2011 (3.1548)	Entropy 1.52372 (1.52415)	Top-1 acc 44.531 (48.593)	Top-5 acc 68.750 (71.941)	lr 0.02200
Train [28][150/3239]	Time 0.206 (0.629)	Data Time 0.001 (0.203)	Loss 3.1646 (3.1513)	Entropy 1.52367 (1.52412)	Top-1 acc 44.922 (48.652)	Top-5 acc 70.703 (71.971)	lr 0.02200
Train [28][160/3239]	Time 0.199 (0.617)	Data Time 0.001 (0.190)	Loss 3.2551 (3.1512)	Entropy 1.52365 (1.52409)	Top-1 acc 40.625 (48.634)	Top-5 acc 75.391 (72.040)	lr 0.02199
Train [28][170/3239]	Time 0.149 (0.606)	Data Time 0.002 (0.179)	Loss 3.1275 (3.1498)	Entropy 1.52352 (1.52406)	Top-1 acc 48.438 (48.696)	Top-5 acc 74.219 (72.087)	lr 0.02199
Train [28][180/3239]	Time 0.199 (0.598)	Data Time 0.001 (0.170)	Loss 3.0417 (3.1477)	Entropy 1.52345 (1.52403)	Top-1 acc 54.297 (48.772)	Top-5 acc 71.484 (72.158)	lr 0.02199
Train [28][190/3239]	Time 0.237 (0.590)	Data Time 0.001 (0.161)	Loss 3.1581 (3.1496)	Entropy 1.52335 (1.52399)	Top-1 acc 49.219 (48.744)	Top-5 acc 74.609 (72.110)	lr 0.02199
Train [28][200/3239]	Time 0.322 (0.584)	Data Time 0.001 (0.153)	Loss 2.9038 (3.1478)	Entropy 1.52329 (1.52396)	Top-1 acc 57.031 (48.776)	Top-5 acc 76.953 (72.172)	lr 0.02199
Train [28][210/3239]	Time 0.200 (0.578)	Data Time 0.001 (0.146)	Loss 3.1476 (3.1496)	Entropy 1.52326 (1.52393)	Top-1 acc 49.609 (48.689)	Top-5 acc 71.875 (72.138)	lr 0.02199
Train [28][220/3239]	Time 0.277 (0.572)	Data Time 0.001 (0.139)	Loss 3.2615 (3.1475)	Entropy 1.52325 (1.52390)	Top-1 acc 45.703 (48.717)	Top-5 acc 65.625 (72.181)	lr 0.02199
Train [28][230/3239]	Time 2.347 (0.566)	Data Time 0.001 (0.133)	Loss 3.1495 (3.1439)	Entropy 1.52325 (1.52387)	Top-1 acc 50.391 (48.796)	Top-5 acc 73.828 (72.228)	lr 0.02199
Train [28][240/3239]	Time 0.219 (0.551)	Data Time 0.001 (0.128)	Loss 3.0341 (3.1439)	Entropy 1.52318 (1.52384)	Top-1 acc 49.609 (48.814)	Top-5 acc 70.703 (72.251)	lr 0.02199
Train [28][250/3239]	Time 0.245 (0.733)	Data Time 0.002 (0.123)	Loss 3.1892 (3.1442)	Entropy 1.52311 (1.52381)	Top-1 acc 50.000 (48.786)	Top-5 acc 69.922 (72.252)	lr 0.02199
Train [28][260/3239]	Time 0.322 (0.722)	Data Time 0.002 (0.118)	Loss 3.3308 (3.1460)	Entropy 1.52307 (1.52379)	Top-1 acc 44.141 (48.753)	Top-5 acc 67.969 (72.194)	lr 0.02199
Train [28][270/3239]	Time 0.199 (0.712)	Data Time 0.001 (0.114)	Loss 3.2431 (3.1449)	Entropy 1.52308 (1.52376)	Top-1 acc 47.266 (48.772)	Top-5 acc 71.094 (72.198)	lr 0.02199
Train [28][280/3239]	Time 0.219 (0.702)	Data Time 0.001 (0.110)	Loss 3.0220 (3.1462)	Entropy 1.52310 (1.52374)	Top-1 acc 53.906 (48.724)	Top-5 acc 73.047 (72.167)	lr 0.02199
Train [28][290/3239]	Time 0.252 (0.694)	Data Time 0.002 (0.106)	Loss 3.0468 (3.1466)	Entropy 1.52304 (1.52371)	Top-1 acc 53.125 (48.698)	Top-5 acc 75.781 (72.161)	lr 0.02199
Train [28][300/3239]	Time 0.217 (0.685)	Data Time 0.001 (0.103)	Loss 2.9843 (3.1476)	Entropy 1.52303 (1.52369)	Top-1 acc 53.906 (48.657)	Top-5 acc 74.219 (72.137)	lr 0.02199
Train [28][310/3239]	Time 0.200 (0.677)	Data Time 0.001 (0.100)	Loss 3.3394 (3.1453)	Entropy 1.52289 (1.52367)	Top-1 acc 44.922 (48.713)	Top-5 acc 67.969 (72.171)	lr 0.02198
Train [28][320/3239]	Time 0.228 (0.670)	Data Time 0.001 (0.097)	Loss 3.0847 (3.1450)	Entropy 1.52277 (1.52364)	Top-1 acc 52.344 (48.719)	Top-5 acc 70.703 (72.171)	lr 0.02198
Train [28][330/3239]	Time 0.251 (0.663)	Data Time 0.001 (0.094)	Loss 2.9443 (3.1440)	Entropy 1.52273 (1.52362)	Top-1 acc 57.422 (48.758)	Top-5 acc 75.391 (72.172)	lr 0.02198
Train [28][340/3239]	Time 2.356 (0.657)	Data Time 0.001 (0.091)	Loss 3.2167 (3.1422)	Entropy 1.52273 (1.52359)	Top-1 acc 51.172 (48.798)	Top-5 acc 71.875 (72.211)	lr 0.02198
Train [28][350/3239]	Time 0.214 (0.645)	Data Time 0.001 (0.088)	Loss 3.1782 (3.1427)	Entropy 1.52257 (1.52356)	Top-1 acc 50.391 (48.801)	Top-5 acc 71.484 (72.200)	lr 0.02198
Train [28][360/3239]	Time 0.238 (0.640)	Data Time 0.002 (0.086)	Loss 3.1871 (3.1424)	Entropy 1.52254 (1.52353)	Top-1 acc 47.656 (48.799)	Top-5 acc 73.438 (72.208)	lr 0.02198
Train [28][370/3239]	Time 0.305 (0.635)	Data Time 0.001 (0.084)	Loss 3.0852 (3.1413)	Entropy 1.52251 (1.52351)	Top-1 acc 50.391 (48.811)	Top-5 acc 74.609 (72.230)	lr 0.02198
Train [28][380/3239]	Time 0.217 (0.630)	Data Time 0.001 (0.082)	Loss 3.1545 (3.1403)	Entropy 1.52232 (1.52348)	Top-1 acc 51.172 (48.855)	Top-5 acc 72.656 (72.264)	lr 0.02198
Train [28][390/3239]	Time 0.213 (0.625)	Data Time 0.001 (0.080)	Loss 3.0919 (3.1407)	Entropy 1.52231 (1.52345)	Top-1 acc 49.609 (48.874)	Top-5 acc 74.219 (72.261)	lr 0.02198
Train [28][400/3239]	Time 0.156 (0.620)	Data Time 0.002 (0.078)	Loss 3.1703 (3.1406)	Entropy 1.52228 (1.52342)	Top-1 acc 48.047 (48.863)	Top-5 acc 71.875 (72.247)	lr 0.02198
Train [28][410/3239]	Time 0.204 (0.616)	Data Time 0.001 (0.076)	Loss 2.9088 (3.1394)	Entropy 1.52224 (1.52339)	Top-1 acc 53.125 (48.888)	Top-5 acc 75.391 (72.290)	lr 0.02198
Train [28][420/3239]	Time 0.241 (0.611)	Data Time 0.001 (0.074)	Loss 2.9527 (3.1398)	Entropy 1.52211 (1.52336)	Top-1 acc 55.078 (48.903)	Top-5 acc 74.219 (72.283)	lr 0.02198
Train [28][430/3239]	Time 0.289 (0.607)	Data Time 0.001 (0.072)	Loss 3.1568 (3.1392)	Entropy 1.52208 (1.52333)	Top-1 acc 50.000 (48.921)	Top-5 acc 69.922 (72.281)	lr 0.02198
Train [28][440/3239]	Time 0.216 (0.604)	Data Time 0.001 (0.071)	Loss 3.1250 (3.1392)	Entropy 1.52202 (1.52330)	Top-1 acc 47.656 (48.916)	Top-5 acc 73.047 (72.263)	lr 0.02198
Train [28][450/3239]	Time 2.414 (0.601)	Data Time 0.001 (0.069)	Loss 3.1150 (3.1394)	Entropy 1.52202 (1.52328)	Top-1 acc 50.391 (48.919)	Top-5 acc 72.266 (72.249)	lr 0.02198
Train [28][460/3239]	Time 0.257 (0.592)	Data Time 0.001 (0.068)	Loss 3.1222 (3.1399)	Entropy 1.52193 (1.52325)	Top-1 acc 50.000 (48.906)	Top-5 acc 72.266 (72.258)	lr 0.02197
Train [28][470/3239]	Time 0.199 (0.589)	Data Time 0.001 (0.066)	Loss 2.9126 (3.1390)	Entropy 1.52181 (1.52322)	Top-1 acc 53.125 (48.918)	Top-5 acc 76.172 (72.281)	lr 0.02197
Train [28][480/3239]	Time 0.220 (0.586)	Data Time 0.001 (0.065)	Loss 3.0310 (3.1401)	Entropy 1.52154 (1.52318)	Top-1 acc 51.562 (48.864)	Top-5 acc 71.484 (72.261)	lr 0.02197
Train [28][490/3239]	Time 0.205 (0.583)	Data Time 0.001 (0.064)	Loss 2.8801 (3.1384)	Entropy 1.52149 (1.52315)	Top-1 acc 54.297 (48.916)	Top-5 acc 78.125 (72.298)	lr 0.02197
Train [28][500/3239]	Time 0.230 (0.580)	Data Time 0.001 (0.063)	Loss 3.1465 (3.1390)	Entropy 1.52140 (1.52311)	Top-1 acc 46.875 (48.917)	Top-5 acc 72.266 (72.282)	lr 0.02197
Train [28][510/3239]	Time 0.227 (0.578)	Data Time 0.001 (0.061)	Loss 3.0846 (3.1397)	Entropy 1.52132 (1.52308)	Top-1 acc 50.000 (48.897)	Top-5 acc 76.172 (72.273)	lr 0.02197
Train [28][520/3239]	Time 0.204 (0.575)	Data Time 0.001 (0.060)	Loss 3.1576 (3.1407)	Entropy 1.52129 (1.52305)	Top-1 acc 48.828 (48.873)	Top-5 acc 72.266 (72.239)	lr 0.02197
Train [28][530/3239]	Time 0.209 (0.572)	Data Time 0.001 (0.059)	Loss 3.5107 (3.1416)	Entropy 1.52127 (1.52301)	Top-1 acc 42.188 (48.845)	Top-5 acc 66.016 (72.236)	lr 0.02197
Train [28][540/3239]	Time 0.358 (0.570)	Data Time 0.001 (0.058)	Loss 3.2991 (3.1427)	Entropy 1.52109 (1.52298)	Top-1 acc 43.359 (48.831)	Top-5 acc 68.359 (72.222)	lr 0.02197
Train [28][550/3239]	Time 0.244 (0.568)	Data Time 0.001 (0.057)	Loss 3.1942 (3.1431)	Entropy 1.52104 (1.52295)	Top-1 acc 48.438 (48.826)	Top-5 acc 70.703 (72.208)	lr 0.02197
Train [28][560/3239]	Time 2.361 (0.566)	Data Time 0.001 (0.056)	Loss 3.0807 (3.1423)	Entropy 1.52104 (1.52291)	Top-1 acc 49.609 (48.851)	Top-5 acc 76.172 (72.231)	lr 0.02197
Train [28][570/3239]	Time 0.248 (0.560)	Data Time 0.001 (0.055)	Loss 3.2199 (3.1424)	Entropy 1.52094 (1.52288)	Top-1 acc 44.531 (48.842)	Top-5 acc 73.438 (72.225)	lr 0.02197
Train [28][580/3239]	Time 0.168 (0.558)	Data Time 0.001 (0.054)	Loss 3.2312 (3.1430)	Entropy 1.52090 (1.52284)	Top-1 acc 45.312 (48.819)	Top-5 acc 70.703 (72.229)	lr 0.02197
Train [28][590/3239]	Time 0.201 (0.556)	Data Time 0.001 (0.053)	Loss 3.2778 (3.1443)	Entropy 1.52085 (1.52281)	Top-1 acc 49.609 (48.821)	Top-5 acc 71.484 (72.207)	lr 0.02197
Train [28][600/3239]	Time 0.234 (0.554)	Data Time 0.001 (0.053)	Loss 3.1080 (3.1454)	Entropy 1.52084 (1.52278)	Top-1 acc 50.391 (48.801)	Top-5 acc 73.047 (72.195)	lr 0.02197
Train [28][610/3239]	Time 0.208 (0.626)	Data Time 0.002 (0.052)	Loss 3.3217 (3.1458)	Entropy 1.52084 (1.52274)	Top-1 acc 46.094 (48.792)	Top-5 acc 68.359 (72.183)	lr 0.02196
Train [28][620/3239]	Time 0.227 (0.623)	Data Time 0.002 (0.051)	Loss 3.3789 (3.1462)	Entropy 1.52078 (1.52271)	Top-1 acc 39.062 (48.764)	Top-5 acc 69.531 (72.189)	lr 0.02196
Train [28][630/3239]	Time 0.197 (0.620)	Data Time 0.001 (0.050)	Loss 3.3241 (3.1467)	Entropy 1.52061 (1.52268)	Top-1 acc 42.969 (48.738)	Top-5 acc 67.578 (72.178)	lr 0.02196
Train [28][640/3239]	Time 0.196 (0.617)	Data Time 0.001 (0.049)	Loss 3.2415 (3.1475)	Entropy 1.52065 (1.52265)	Top-1 acc 44.141 (48.715)	Top-5 acc 71.875 (72.176)	lr 0.02196
Train [28][650/3239]	Time 0.166 (0.614)	Data Time 0.001 (0.049)	Loss 3.0960 (3.1474)	Entropy 1.52057 (1.52262)	Top-1 acc 45.703 (48.694)	Top-5 acc 74.609 (72.177)	lr 0.02196
Train [28][660/3239]	Time 0.292 (0.612)	Data Time 0.001 (0.048)	Loss 3.0371 (3.1463)	Entropy 1.52055 (1.52259)	Top-1 acc 51.562 (48.723)	Top-5 acc 73.828 (72.187)	lr 0.02196
Train [28][670/3239]	Time 2.325 (0.609)	Data Time 0.001 (0.047)	Loss 3.1812 (3.1475)	Entropy 1.52055 (1.52256)	Top-1 acc 50.781 (48.701)	Top-5 acc 67.578 (72.154)	lr 0.02196
Train [28][680/3239]	Time 0.250 (0.604)	Data Time 0.002 (0.047)	Loss 2.8476 (3.1462)	Entropy 1.52049 (1.52253)	Top-1 acc 51.953 (48.732)	Top-5 acc 78.125 (72.201)	lr 0.02196
Train [28][690/3239]	Time 0.269 (0.601)	Data Time 0.002 (0.046)	Loss 3.2716 (3.1464)	Entropy 1.52047 (1.52250)	Top-1 acc 47.656 (48.715)	Top-5 acc 71.094 (72.188)	lr 0.02196
Train [28][700/3239]	Time 0.174 (0.599)	Data Time 0.001 (0.045)	Loss 3.1812 (3.1461)	Entropy 1.52041 (1.52247)	Top-1 acc 46.484 (48.731)	Top-5 acc 67.188 (72.202)	lr 0.02196
Train [28][710/3239]	Time 0.226 (0.597)	Data Time 0.001 (0.045)	Loss 3.2235 (3.1457)	Entropy 1.52031 (1.52244)	Top-1 acc 44.531 (48.747)	Top-5 acc 67.969 (72.205)	lr 0.02196
Train [28][720/3239]	Time 0.308 (0.595)	Data Time 0.001 (0.044)	Loss 3.1737 (3.1467)	Entropy 1.52027 (1.52241)	Top-1 acc 47.266 (48.722)	Top-5 acc 72.656 (72.190)	lr 0.02196
Train [28][730/3239]	Time 0.232 (0.592)	Data Time 0.001 (0.044)	Loss 2.9195 (3.1459)	Entropy 1.52008 (1.52238)	Top-1 acc 57.031 (48.759)	Top-5 acc 75.781 (72.197)	lr 0.02196
Train [28][740/3239]	Time 0.207 (0.590)	Data Time 0.001 (0.043)	Loss 3.2306 (3.1458)	Entropy 1.51998 (1.52235)	Top-1 acc 46.094 (48.764)	Top-5 acc 69.141 (72.184)	lr 0.02196
Train [28][750/3239]	Time 0.214 (0.589)	Data Time 0.001 (0.043)	Loss 3.0590 (3.1455)	Entropy 1.51987 (1.52231)	Top-1 acc 53.516 (48.767)	Top-5 acc 76.172 (72.190)	lr 0.02196
Train [28][760/3239]	Time 0.227 (0.587)	Data Time 0.001 (0.042)	Loss 3.0468 (3.1455)	Entropy 1.51983 (1.52228)	Top-1 acc 49.609 (48.770)	Top-5 acc 75.391 (72.199)	lr 0.02196
Train [28][770/3239]	Time 0.220 (0.585)	Data Time 0.001 (0.042)	Loss 3.3844 (3.1463)	Entropy 1.51975 (1.52225)	Top-1 acc 44.922 (48.756)	Top-5 acc 67.578 (72.172)	lr 0.02195
Train [28][780/3239]	Time 2.581 (0.583)	Data Time 0.001 (0.041)	Loss 3.1448 (3.1470)	Entropy 1.51975 (1.52222)	Top-1 acc 50.000 (48.747)	Top-5 acc 73.828 (72.156)	lr 0.02195
Train [28][790/3239]	Time 0.238 (0.579)	Data Time 0.001 (0.041)	Loss 3.0022 (3.1468)	Entropy 1.51972 (1.52219)	Top-1 acc 53.125 (48.765)	Top-5 acc 75.391 (72.159)	lr 0.02195
Train [28][800/3239]	Time 0.214 (0.577)	Data Time 0.001 (0.040)	Loss 3.1760 (3.1457)	Entropy 1.51964 (1.52215)	Top-1 acc 49.609 (48.798)	Top-5 acc 70.703 (72.189)	lr 0.02195
Train [28][810/3239]	Time 0.206 (0.575)	Data Time 0.001 (0.040)	Loss 3.2270 (3.1451)	Entropy 1.51957 (1.52212)	Top-1 acc 47.656 (48.809)	Top-5 acc 69.531 (72.201)	lr 0.02195
Train [28][820/3239]	Time 0.230 (0.574)	Data Time 0.002 (0.039)	Loss 2.9713 (3.1453)	Entropy 1.51956 (1.52209)	Top-1 acc 51.562 (48.806)	Top-5 acc 74.609 (72.199)	lr 0.02195
Train [28][830/3239]	Time 0.318 (0.573)	Data Time 0.001 (0.039)	Loss 3.9189 (3.1463)	Entropy 1.51951 (1.52206)	Top-1 acc 34.766 (48.768)	Top-5 acc 62.109 (72.181)	lr 0.02195
Train [28][840/3239]	Time 0.207 (0.571)	Data Time 0.001 (0.038)	Loss 3.1394 (3.1466)	Entropy 1.51948 (1.52203)	Top-1 acc 48.828 (48.758)	Top-5 acc 71.094 (72.170)	lr 0.02195
Train [28][850/3239]	Time 0.198 (0.569)	Data Time 0.001 (0.038)	Loss 3.1519 (3.1469)	Entropy 1.51945 (1.52200)	Top-1 acc 43.750 (48.745)	Top-5 acc 70.312 (72.166)	lr 0.02195
Train [28][860/3239]	Time 0.239 (0.568)	Data Time 0.002 (0.037)	Loss 3.1528 (3.1475)	Entropy 1.51934 (1.52197)	Top-1 acc 52.344 (48.735)	Top-5 acc 68.750 (72.147)	lr 0.02195
Train [28][870/3239]	Time 0.215 (0.567)	Data Time 0.002 (0.037)	Loss 3.1130 (3.1473)	Entropy 1.51927 (1.52194)	Top-1 acc 48.438 (48.729)	Top-5 acc 73.438 (72.150)	lr 0.02195
Train [28][880/3239]	Time 0.235 (0.565)	Data Time 0.001 (0.037)	Loss 3.1075 (3.1470)	Entropy 1.51916 (1.52191)	Top-1 acc 51.562 (48.747)	Top-5 acc 71.484 (72.146)	lr 0.02195
Train [28][890/3239]	Time 2.401 (0.564)	Data Time 0.001 (0.036)	Loss 3.0742 (3.1474)	Entropy 1.51916 (1.52188)	Top-1 acc 53.906 (48.742)	Top-5 acc 71.484 (72.140)	lr 0.02195
Train [28][900/3239]	Time 0.229 (0.560)	Data Time 0.002 (0.036)	Loss 3.0177 (3.1470)	Entropy 1.51910 (1.52185)	Top-1 acc 50.000 (48.757)	Top-5 acc 75.781 (72.149)	lr 0.02195
Train [28][910/3239]	Time 0.222 (0.559)	Data Time 0.001 (0.035)	Loss 3.2395 (3.1468)	Entropy 1.51898 (1.52182)	Top-1 acc 46.094 (48.760)	Top-5 acc 67.188 (72.143)	lr 0.02195
Train [28][920/3239]	Time 0.176 (0.558)	Data Time 0.002 (0.035)	Loss 3.7448 (3.1478)	Entropy 1.51897 (1.52179)	Top-1 acc 38.672 (48.747)	Top-5 acc 63.672 (72.118)	lr 0.02194
Train [28][930/3239]	Time 0.195 (0.557)	Data Time 0.001 (0.035)	Loss 3.1355 (3.1483)	Entropy 1.51878 (1.52175)	Top-1 acc 51.953 (48.744)	Top-5 acc 73.438 (72.111)	lr 0.02194
Train [28][940/3239]	Time 0.317 (0.555)	Data Time 0.001 (0.034)	Loss 2.9449 (3.1480)	Entropy 1.51878 (1.52172)	Top-1 acc 50.781 (48.736)	Top-5 acc 78.516 (72.122)	lr 0.02194
Train [28][950/3239]	Time 0.250 (0.554)	Data Time 0.001 (0.034)	Loss 3.0437 (3.1479)	Entropy 1.51872 (1.52169)	Top-1 acc 48.047 (48.730)	Top-5 acc 74.609 (72.127)	lr 0.02194
Train [28][960/3239]	Time 0.246 (0.553)	Data Time 0.001 (0.034)	Loss 3.1725 (3.1472)	Entropy 1.51867 (1.52166)	Top-1 acc 49.609 (48.752)	Top-5 acc 71.484 (72.141)	lr 0.02194
Train [28][970/3239]	Time 0.342 (0.594)	Data Time 0.005 (0.033)	Loss 3.4254 (3.1475)	Entropy 1.51862 (1.52163)	Top-1 acc 39.062 (48.746)	Top-5 acc 69.141 (72.140)	lr 0.02194
Train [28][980/3239]	Time 0.217 (0.594)	Data Time 0.002 (0.033)	Loss 3.3684 (3.1471)	Entropy 1.51850 (1.52160)	Top-1 acc 48.047 (48.761)	Top-5 acc 68.750 (72.154)	lr 0.02194
Train [28][990/3239]	Time 0.204 (0.592)	Data Time 0.002 (0.033)	Loss 3.1654 (3.1474)	Entropy 1.51842 (1.52157)	Top-1 acc 47.266 (48.756)	Top-5 acc 70.703 (72.151)	lr 0.02194
Train [28][1000/3239]	Time 2.479 (0.591)	Data Time 0.002 (0.032)	Loss 3.1527 (3.1476)	Entropy 1.51842 (1.52154)	Top-1 acc 50.781 (48.747)	Top-5 acc 68.750 (72.137)	lr 0.02194
Train [28][1010/3239]	Time 0.244 (0.587)	Data Time 0.002 (0.032)	Loss 3.0634 (3.1474)	Entropy 1.51831 (1.52150)	Top-1 acc 49.219 (48.754)	Top-5 acc 75.781 (72.144)	lr 0.02194
Train [28][1020/3239]	Time 0.228 (0.586)	Data Time 0.002 (0.032)	Loss 3.0883 (3.1475)	Entropy 1.51812 (1.52147)	Top-1 acc 47.266 (48.752)	Top-5 acc 73.438 (72.139)	lr 0.02194
Train [28][1030/3239]	Time 0.203 (0.585)	Data Time 0.001 (0.032)	Loss 3.4681 (3.1476)	Entropy 1.51811 (1.52144)	Top-1 acc 43.750 (48.745)	Top-5 acc 69.141 (72.148)	lr 0.02194
Train [28][1040/3239]	Time 0.202 (0.583)	Data Time 0.001 (0.031)	Loss 3.2481 (3.1472)	Entropy 1.51805 (1.52141)	Top-1 acc 46.875 (48.762)	Top-5 acc 68.750 (72.161)	lr 0.02194
Train [28][1050/3239]	Time 0.205 (0.582)	Data Time 0.001 (0.031)	Loss 3.2176 (3.1464)	Entropy 1.51793 (1.52137)	Top-1 acc 44.141 (48.776)	Top-5 acc 72.656 (72.170)	lr 0.02194
Train [28][1060/3239]	Time 0.203 (0.580)	Data Time 0.001 (0.031)	Loss 3.1290 (3.1465)	Entropy 1.51787 (1.52134)	Top-1 acc 45.703 (48.771)	Top-5 acc 73.047 (72.175)	lr 0.02194
Train [28][1070/3239]	Time 0.189 (0.579)	Data Time 0.001 (0.030)	Loss 3.0555 (3.1465)	Entropy 1.51770 (1.52131)	Top-1 acc 52.344 (48.770)	Top-5 acc 73.047 (72.174)	lr 0.02193
Train [28][1080/3239]	Time 0.211 (0.578)	Data Time 0.001 (0.030)	Loss 3.1653 (3.1464)	Entropy 1.51765 (1.52127)	Top-1 acc 50.000 (48.780)	Top-5 acc 70.703 (72.175)	lr 0.02193
Train [28][1090/3239]	Time 0.196 (0.577)	Data Time 0.001 (0.030)	Loss 3.1660 (3.1464)	Entropy 1.51754 (1.52124)	Top-1 acc 46.875 (48.772)	Top-5 acc 70.703 (72.183)	lr 0.02193
Train [28][1100/3239]	Time 0.250 (0.575)	Data Time 0.001 (0.030)	Loss 2.9699 (3.1463)	Entropy 1.51750 (1.52121)	Top-1 acc 52.344 (48.778)	Top-5 acc 72.656 (72.186)	lr 0.02193
Train [28][1110/3239]	Time 2.370 (0.574)	Data Time 0.001 (0.029)	Loss 3.1340 (3.1468)	Entropy 1.51750 (1.52117)	Top-1 acc 48.047 (48.766)	Top-5 acc 75.000 (72.179)	lr 0.02193
Train [28][1120/3239]	Time 0.309 (0.571)	Data Time 0.001 (0.029)	Loss 3.0290 (3.1471)	Entropy 1.51742 (1.52114)	Top-1 acc 53.125 (48.771)	Top-5 acc 74.609 (72.182)	lr 0.02193
Train [28][1130/3239]	Time 0.239 (0.570)	Data Time 0.001 (0.029)	Loss 3.0238 (3.1463)	Entropy 1.51724 (1.52111)	Top-1 acc 51.172 (48.782)	Top-5 acc 76.172 (72.193)	lr 0.02193
Train [28][1140/3239]	Time 0.234 (0.569)	Data Time 0.001 (0.029)	Loss 3.3645 (3.1464)	Entropy 1.51723 (1.52107)	Top-1 acc 46.875 (48.773)	Top-5 acc 69.922 (72.194)	lr 0.02193
Train [28][1150/3239]	Time 0.219 (0.568)	Data Time 0.001 (0.028)	Loss 3.1605 (3.1461)	Entropy 1.51716 (1.52104)	Top-1 acc 49.609 (48.787)	Top-5 acc 70.312 (72.198)	lr 0.02193
Train [28][1160/3239]	Time 0.194 (0.567)	Data Time 0.001 (0.028)	Loss 3.2458 (3.1463)	Entropy 1.51709 (1.52100)	Top-1 acc 44.531 (48.792)	Top-5 acc 71.875 (72.201)	lr 0.02193
Train [28][1170/3239]	Time 0.227 (0.566)	Data Time 0.001 (0.028)	Loss 3.1460 (3.1459)	Entropy 1.51702 (1.52097)	Top-1 acc 44.531 (48.790)	Top-5 acc 71.484 (72.204)	lr 0.02193
Train [28][1180/3239]	Time 0.341 (0.565)	Data Time 0.001 (0.028)	Loss 3.2211 (3.1458)	Entropy 1.51698 (1.52094)	Top-1 acc 49.219 (48.789)	Top-5 acc 70.312 (72.208)	lr 0.02193
Train [28][1190/3239]	Time 0.218 (0.564)	Data Time 0.001 (0.028)	Loss 3.1685 (3.1461)	Entropy 1.51690 (1.52090)	Top-1 acc 50.781 (48.789)	Top-5 acc 73.047 (72.206)	lr 0.02193
Train [28][1200/3239]	Time 0.200 (0.563)	Data Time 0.001 (0.027)	Loss 3.1596 (3.1458)	Entropy 1.51665 (1.52087)	Top-1 acc 48.047 (48.798)	Top-5 acc 73.828 (72.209)	lr 0.02193
Train [28][1210/3239]	Time 0.252 (0.562)	Data Time 0.001 (0.027)	Loss 3.1789 (3.1457)	Entropy 1.51650 (1.52083)	Top-1 acc 46.875 (48.809)	Top-5 acc 71.094 (72.217)	lr 0.02193
Train [28][1220/3239]	Time 2.331 (0.561)	Data Time 0.002 (0.027)	Loss 3.1564 (3.1456)	Entropy 1.51650 (1.52080)	Top-1 acc 48.828 (48.810)	Top-5 acc 70.312 (72.218)	lr 0.02192
Train [28][1230/3239]	Time 0.225 (0.558)	Data Time 0.002 (0.027)	Loss 3.2747 (3.1463)	Entropy 1.51644 (1.52076)	Top-1 acc 45.703 (48.796)	Top-5 acc 71.875 (72.199)	lr 0.02192
Train [28][1240/3239]	Time 0.304 (0.557)	Data Time 0.001 (0.026)	Loss 2.9714 (3.1463)	Entropy 1.51625 (1.52073)	Top-1 acc 51.562 (48.801)	Top-5 acc 76.172 (72.195)	lr 0.02192
Train [28][1250/3239]	Time 0.210 (0.556)	Data Time 0.001 (0.026)	Loss 3.2213 (3.1460)	Entropy 1.51606 (1.52069)	Top-1 acc 46.484 (48.810)	Top-5 acc 65.234 (72.196)	lr 0.02192
Train [28][1260/3239]	Time 0.186 (0.555)	Data Time 0.002 (0.026)	Loss 3.2785 (3.1459)	Entropy 1.51591 (1.52065)	Top-1 acc 46.484 (48.814)	Top-5 acc 70.312 (72.205)	lr 0.02192
Train [28][1270/3239]	Time 0.213 (0.555)	Data Time 0.001 (0.026)	Loss 2.8271 (3.1457)	Entropy 1.51585 (1.52061)	Top-1 acc 55.469 (48.832)	Top-5 acc 79.688 (72.206)	lr 0.02192
Train [28][1280/3239]	Time 0.246 (0.554)	Data Time 0.001 (0.026)	Loss 3.2289 (3.1462)	Entropy 1.51573 (1.52058)	Top-1 acc 44.922 (48.813)	Top-5 acc 72.656 (72.199)	lr 0.02192
Train [28][1290/3239]	Time 0.320 (0.553)	Data Time 0.002 (0.026)	Loss 3.0252 (3.1456)	Entropy 1.51573 (1.52054)	Top-1 acc 54.688 (48.830)	Top-5 acc 74.219 (72.208)	lr 0.02192
Train [28][1300/3239]	Time 0.230 (0.552)	Data Time 0.001 (0.025)	Loss 3.1061 (3.1460)	Entropy 1.51566 (1.52050)	Top-1 acc 49.609 (48.828)	Top-5 acc 73.047 (72.200)	lr 0.02192
Train [28][1310/3239]	Time 0.291 (0.551)	Data Time 0.001 (0.025)	Loss 3.1386 (3.1458)	Entropy 1.51548 (1.52047)	Top-1 acc 46.094 (48.832)	Top-5 acc 72.656 (72.204)	lr 0.02192
Train [28][1320/3239]	Time 0.317 (0.551)	Data Time 0.001 (0.025)	Loss 3.1507 (3.1460)	Entropy 1.51533 (1.52043)	Top-1 acc 47.266 (48.823)	Top-5 acc 72.656 (72.199)	lr 0.02192
Train [28][1330/3239]	Time 47.084 (0.584)	Data Time 0.001 (0.025)	Loss 3.0820 (3.1462)	Entropy 1.51533 (1.52039)	Top-1 acc 51.953 (48.819)	Top-5 acc 73.438 (72.188)	lr 0.02192
Train [28][1340/3239]	Time 0.334 (0.581)	Data Time 0.002 (0.025)	Loss 3.2329 (3.1461)	Entropy 1.51531 (1.52035)	Top-1 acc 45.703 (48.823)	Top-5 acc 73.438 (72.186)	lr 0.02192
Train [28][1350/3239]	Time 0.210 (0.580)	Data Time 0.002 (0.024)	Loss 3.1243 (3.1460)	Entropy 1.51531 (1.52031)	Top-1 acc 47.656 (48.817)	Top-5 acc 71.094 (72.189)	lr 0.02192
Train [28][1360/3239]	Time 0.224 (0.579)	Data Time 0.001 (0.024)	Loss 3.0382 (3.1458)	Entropy 1.51523 (1.52028)	Top-1 acc 51.953 (48.815)	Top-5 acc 74.219 (72.188)	lr 0.02192
Train [28][1370/3239]	Time 0.230 (0.578)	Data Time 0.001 (0.024)	Loss 3.3388 (3.1463)	Entropy 1.51517 (1.52024)	Top-1 acc 44.922 (48.808)	Top-5 acc 67.578 (72.180)	lr 0.02191
Train [28][1380/3239]	Time 0.248 (0.577)	Data Time 0.002 (0.024)	Loss 3.3229 (3.1458)	Entropy 1.51505 (1.52020)	Top-1 acc 46.875 (48.818)	Top-5 acc 67.969 (72.188)	lr 0.02191
Train [28][1390/3239]	Time 0.245 (0.576)	Data Time 0.001 (0.024)	Loss 3.2538 (3.1454)	Entropy 1.51499 (1.52017)	Top-1 acc 49.219 (48.830)	Top-5 acc 70.703 (72.201)	lr 0.02191
Train [28][1400/3239]	Time 0.324 (0.575)	Data Time 0.001 (0.024)	Loss 3.0085 (3.1449)	Entropy 1.51488 (1.52013)	Top-1 acc 51.562 (48.842)	Top-5 acc 75.391 (72.213)	lr 0.02191
Train [28][1410/3239]	Time 0.210 (0.575)	Data Time 0.001 (0.024)	Loss 3.0558 (3.1446)	Entropy 1.51482 (1.52009)	Top-1 acc 53.516 (48.852)	Top-5 acc 73.047 (72.218)	lr 0.02191
Train [28][1420/3239]	Time 0.242 (0.574)	Data Time 0.001 (0.023)	Loss 3.1591 (3.1447)	Entropy 1.51486 (1.52005)	Top-1 acc 50.391 (48.851)	Top-5 acc 71.484 (72.212)	lr 0.02191
Train [28][1430/3239]	Time 0.212 (0.573)	Data Time 0.001 (0.023)	Loss 3.2632 (3.1445)	Entropy 1.51478 (1.52002)	Top-1 acc 44.141 (48.858)	Top-5 acc 69.141 (72.214)	lr 0.02191
Train [28][1440/3239]	Time 2.301 (0.572)	Data Time 0.001 (0.023)	Loss 3.2277 (3.1444)	Entropy 1.51478 (1.51998)	Top-1 acc 48.047 (48.854)	Top-5 acc 70.312 (72.211)	lr 0.02191
Train [28][1450/3239]	Time 0.228 (0.569)	Data Time 0.001 (0.023)	Loss 3.0392 (3.1447)	Entropy 1.51465 (1.51994)	Top-1 acc 51.953 (48.846)	Top-5 acc 74.609 (72.205)	lr 0.02191
Train [28][1460/3239]	Time 0.305 (0.568)	Data Time 0.001 (0.023)	Loss 3.0959 (3.1446)	Entropy 1.51455 (1.51991)	Top-1 acc 50.781 (48.851)	Top-5 acc 71.875 (72.201)	lr 0.02191
Train [28][1470/3239]	Time 0.216 (0.568)	Data Time 0.001 (0.023)	Loss 2.9736 (3.1447)	Entropy 1.51447 (1.51987)	Top-1 acc 53.516 (48.849)	Top-5 acc 74.609 (72.197)	lr 0.02191
Train [28][1480/3239]	Time 0.200 (0.567)	Data Time 0.001 (0.022)	Loss 3.0527 (3.1450)	Entropy 1.51438 (1.51983)	Top-1 acc 51.172 (48.841)	Top-5 acc 75.781 (72.198)	lr 0.02191
Train [28][1490/3239]	Time 0.212 (0.566)	Data Time 0.001 (0.022)	Loss 3.0525 (3.1449)	Entropy 1.51428 (1.51980)	Top-1 acc 51.172 (48.845)	Top-5 acc 76.172 (72.206)	lr 0.02191
Train [28][1500/3239]	Time 0.212 (0.565)	Data Time 0.001 (0.022)	Loss 3.3050 (3.1451)	Entropy 1.51413 (1.51976)	Top-1 acc 47.266 (48.839)	Top-5 acc 67.188 (72.207)	lr 0.02191
Train [28][1510/3239]	Time 0.194 (0.564)	Data Time 0.001 (0.022)	Loss 3.2991 (3.1453)	Entropy 1.51412 (1.51972)	Top-1 acc 44.531 (48.832)	Top-5 acc 67.188 (72.204)	lr 0.02191
Train [28][1520/3239]	Time 0.259 (0.563)	Data Time 0.001 (0.022)	Loss 3.2981 (3.1459)	Entropy 1.51399 (1.51968)	Top-1 acc 42.578 (48.813)	Top-5 acc 67.969 (72.189)	lr 0.02190
Train [28][1530/3239]	Time 0.209 (0.562)	Data Time 0.002 (0.022)	Loss 3.2317 (3.1461)	Entropy 1.51399 (1.51965)	Top-1 acc 47.656 (48.811)	Top-5 acc 71.094 (72.188)	lr 0.02190
Train [28][1540/3239]	Time 0.264 (0.561)	Data Time 0.001 (0.022)	Loss 3.2428 (3.1461)	Entropy 1.51396 (1.51961)	Top-1 acc 49.219 (48.812)	Top-5 acc 68.750 (72.181)	lr 0.02190
Train [28][1550/3239]	Time 2.512 (0.561)	Data Time 0.002 (0.022)	Loss 3.2837 (3.1461)	Entropy 1.51396 (1.51957)	Top-1 acc 46.484 (48.816)	Top-5 acc 66.016 (72.177)	lr 0.02190
Train [28][1560/3239]	Time 0.244 (0.559)	Data Time 0.002 (0.021)	Loss 3.1124 (3.1461)	Entropy 1.51394 (1.51954)	Top-1 acc 46.484 (48.821)	Top-5 acc 69.922 (72.176)	lr 0.02190
Train [28][1570/3239]	Time 0.225 (0.558)	Data Time 0.001 (0.021)	Loss 3.1595 (3.1463)	Entropy 1.51387 (1.51950)	Top-1 acc 49.609 (48.812)	Top-5 acc 70.703 (72.177)	lr 0.02190
Train [28][1580/3239]	Time 0.218 (0.557)	Data Time 0.001 (0.021)	Loss 3.2608 (3.1464)	Entropy 1.51383 (1.51947)	Top-1 acc 46.484 (48.803)	Top-5 acc 69.922 (72.171)	lr 0.02190
Train [28][1590/3239]	Time 0.292 (0.556)	Data Time 0.001 (0.021)	Loss 3.3195 (3.1467)	Entropy 1.51385 (1.51943)	Top-1 acc 43.359 (48.794)	Top-5 acc 73.438 (72.168)	lr 0.02190
Train [28][1600/3239]	Time 0.280 (0.556)	Data Time 0.001 (0.021)	Loss 3.0987 (3.1468)	Entropy 1.51380 (1.51940)	Top-1 acc 46.484 (48.792)	Top-5 acc 73.047 (72.161)	lr 0.02190
Train [28][1610/3239]	Time 0.214 (0.555)	Data Time 0.001 (0.021)	Loss 3.2108 (3.1468)	Entropy 1.51370 (1.51936)	Top-1 acc 46.094 (48.787)	Top-5 acc 70.312 (72.155)	lr 0.02190
Train [28][1620/3239]	Time 0.232 (0.554)	Data Time 0.001 (0.021)	Loss 3.3151 (3.1471)	Entropy 1.51371 (1.51933)	Top-1 acc 43.750 (48.771)	Top-5 acc 66.797 (72.144)	lr 0.02190
Train [28][1630/3239]	Time 0.194 (0.554)	Data Time 0.001 (0.021)	Loss 3.2954 (3.1470)	Entropy 1.51363 (1.51929)	Top-1 acc 43.359 (48.771)	Top-5 acc 67.578 (72.147)	lr 0.02190
Train [28][1640/3239]	Time 0.216 (0.553)	Data Time 0.001 (0.020)	Loss 3.2189 (3.1474)	Entropy 1.51367 (1.51926)	Top-1 acc 48.047 (48.761)	Top-5 acc 66.016 (72.135)	lr 0.02190
Train [28][1650/3239]	Time 0.267 (0.553)	Data Time 0.001 (0.020)	Loss 3.2314 (3.1475)	Entropy 1.51364 (1.51922)	Top-1 acc 43.750 (48.760)	Top-5 acc 73.047 (72.136)	lr 0.02190
Train [28][1660/3239]	Time 2.474 (0.552)	Data Time 0.002 (0.020)	Loss 2.9434 (3.1475)	Entropy 1.51364 (1.51919)	Top-1 acc 55.078 (48.761)	Top-5 acc 75.781 (72.134)	lr 0.02190
Train [28][1670/3239]	Time 0.165 (0.550)	Data Time 0.001 (0.020)	Loss 3.1765 (3.1475)	Entropy 1.51367 (1.51916)	Top-1 acc 50.391 (48.763)	Top-5 acc 69.922 (72.129)	lr 0.02189
Train [28][1680/3239]	Time 0.231 (0.549)	Data Time 0.001 (0.020)	Loss 3.0589 (3.1473)	Entropy 1.51365 (1.51912)	Top-1 acc 52.734 (48.776)	Top-5 acc 75.781 (72.137)	lr 0.02189
Train [28][1690/3239]	Time 0.213 (0.549)	Data Time 0.001 (0.020)	Loss 3.1802 (3.1469)	Entropy 1.51360 (1.51909)	Top-1 acc 49.219 (48.784)	Top-5 acc 71.875 (72.142)	lr 0.02189
Train [28][1700/3239]	Time 0.360 (0.574)	Data Time 0.002 (0.020)	Loss 3.1599 (3.1469)	Entropy 1.51344 (1.51906)	Top-1 acc 49.609 (48.783)	Top-5 acc 71.484 (72.141)	lr 0.02189
Train [28][1710/3239]	Time 0.212 (0.574)	Data Time 0.002 (0.020)	Loss 3.4637 (3.1471)	Entropy 1.51317 (1.51902)	Top-1 acc 41.016 (48.774)	Top-5 acc 65.625 (72.137)	lr 0.02189
Train [28][1720/3239]	Time 0.227 (0.573)	Data Time 0.002 (0.020)	Loss 3.0521 (3.1474)	Entropy 1.51312 (1.51899)	Top-1 acc 50.781 (48.769)	Top-5 acc 73.047 (72.131)	lr 0.02189
Train [28][1730/3239]	Time 0.221 (0.572)	Data Time 0.001 (0.019)	Loss 2.9091 (3.1476)	Entropy 1.51309 (1.51896)	Top-1 acc 53.906 (48.766)	Top-5 acc 77.734 (72.125)	lr 0.02189
Train [28][1740/3239]	Time 0.216 (0.572)	Data Time 0.001 (0.019)	Loss 3.2212 (3.1474)	Entropy 1.51287 (1.51892)	Top-1 acc 44.531 (48.767)	Top-5 acc 70.312 (72.129)	lr 0.02189
Train [28][1750/3239]	Time 0.183 (0.571)	Data Time 0.002 (0.019)	Loss 3.0332 (3.1472)	Entropy 1.51281 (1.51889)	Top-1 acc 51.172 (48.770)	Top-5 acc 75.391 (72.131)	lr 0.02189
Train [28][1760/3239]	Time 0.336 (0.570)	Data Time 0.001 (0.019)	Loss 3.2836 (3.1475)	Entropy 1.51271 (1.51885)	Top-1 acc 42.969 (48.757)	Top-5 acc 70.703 (72.125)	lr 0.02189
Train [28][1770/3239]	Time 2.330 (0.569)	Data Time 0.001 (0.019)	Loss 3.1129 (3.1473)	Entropy 1.51271 (1.51882)	Top-1 acc 48.047 (48.764)	Top-5 acc 74.219 (72.130)	lr 0.02189
Train [28][1780/3239]	Time 0.215 (0.568)	Data Time 0.001 (0.019)	Loss 3.0490 (3.1470)	Entropy 1.51252 (1.51878)	Top-1 acc 48.828 (48.769)	Top-5 acc 76.953 (72.139)	lr 0.02189
Train [28][1790/3239]	Time 0.216 (0.567)	Data Time 0.001 (0.019)	Loss 3.2259 (3.1473)	Entropy 1.51243 (1.51875)	Top-1 acc 47.656 (48.762)	Top-5 acc 71.484 (72.134)	lr 0.02189
Train [28][1800/3239]	Time 0.206 (0.566)	Data Time 0.001 (0.019)	Loss 3.2501 (3.1474)	Entropy 1.51241 (1.51871)	Top-1 acc 47.656 (48.763)	Top-5 acc 67.969 (72.131)	lr 0.02189
Train [28][1810/3239]	Time 0.218 (0.565)	Data Time 0.001 (0.019)	Loss 3.1334 (3.1475)	Entropy 1.51232 (1.51868)	Top-1 acc 46.875 (48.765)	Top-5 acc 69.922 (72.126)	lr 0.02189
Train [28][1820/3239]	Time 0.322 (0.565)	Data Time 0.002 (0.019)	Loss 3.0455 (3.1474)	Entropy 1.51225 (1.51864)	Top-1 acc 50.391 (48.769)	Top-5 acc 71.484 (72.127)	lr 0.02188
Train [28][1830/3239]	Time 0.205 (0.564)	Data Time 0.001 (0.019)	Loss 3.2007 (3.1475)	Entropy 1.51211 (1.51861)	Top-1 acc 46.875 (48.773)	Top-5 acc 70.312 (72.123)	lr 0.02188
Train [28][1840/3239]	Time 0.257 (0.563)	Data Time 0.001 (0.018)	Loss 3.3195 (3.1475)	Entropy 1.51208 (1.51857)	Top-1 acc 44.531 (48.779)	Top-5 acc 68.359 (72.122)	lr 0.02188
Train [28][1850/3239]	Time 0.196 (0.563)	Data Time 0.001 (0.018)	Loss 3.0999 (3.1475)	Entropy 1.51206 (1.51854)	Top-1 acc 45.703 (48.776)	Top-5 acc 72.266 (72.121)	lr 0.02188
Train [28][1860/3239]	Time 0.189 (0.562)	Data Time 0.001 (0.018)	Loss 3.1728 (3.1472)	Entropy 1.51195 (1.51850)	Top-1 acc 47.266 (48.781)	Top-5 acc 69.922 (72.126)	lr 0.02188
Train [28][1870/3239]	Time 0.214 (0.561)	Data Time 0.001 (0.018)	Loss 3.1093 (3.1470)	Entropy 1.51188 (1.51847)	Top-1 acc 46.484 (48.784)	Top-5 acc 75.000 (72.131)	lr 0.02188
Train [28][1880/3239]	Time 2.505 (0.561)	Data Time 0.001 (0.018)	Loss 3.0825 (3.1471)	Entropy 1.51188 (1.51843)	Top-1 acc 51.562 (48.785)	Top-5 acc 73.047 (72.129)	lr 0.02188
Train [28][1890/3239]	Time 0.278 (0.559)	Data Time 0.001 (0.018)	Loss 2.9799 (3.1469)	Entropy 1.51183 (1.51840)	Top-1 acc 54.688 (48.785)	Top-5 acc 73.828 (72.133)	lr 0.02188
Train [28][1900/3239]	Time 0.222 (0.558)	Data Time 0.001 (0.018)	Loss 3.4289 (3.1471)	Entropy 1.51160 (1.51836)	Top-1 acc 44.922 (48.777)	Top-5 acc 69.141 (72.132)	lr 0.02188
Train [28][1910/3239]	Time 0.208 (0.558)	Data Time 0.001 (0.018)	Loss 3.2380 (3.1469)	Entropy 1.51148 (1.51833)	Top-1 acc 46.094 (48.777)	Top-5 acc 68.750 (72.135)	lr 0.02188
Train [28][1920/3239]	Time 0.211 (0.557)	Data Time 0.001 (0.018)	Loss 3.3328 (3.1470)	Entropy 1.51141 (1.51829)	Top-1 acc 40.625 (48.776)	Top-5 acc 69.531 (72.134)	lr 0.02188
Train [28][1930/3239]	Time 0.203 (0.557)	Data Time 0.001 (0.018)	Loss 3.3703 (3.1468)	Entropy 1.51127 (1.51825)	Top-1 acc 45.312 (48.786)	Top-5 acc 68.750 (72.138)	lr 0.02188
Train [28][1940/3239]	Time 0.219 (0.556)	Data Time 0.001 (0.018)	Loss 3.0677 (3.1467)	Entropy 1.51118 (1.51822)	Top-1 acc 50.000 (48.787)	Top-5 acc 72.266 (72.138)	lr 0.02188
Train [28][1950/3239]	Time 0.220 (0.555)	Data Time 0.001 (0.018)	Loss 3.0778 (3.1466)	Entropy 1.51110 (1.51818)	Top-1 acc 47.656 (48.787)	Top-5 acc 71.094 (72.137)	lr 0.02188
Train [28][1960/3239]	Time 0.225 (0.555)	Data Time 0.001 (0.017)	Loss 3.0939 (3.1467)	Entropy 1.51097 (1.51814)	Top-1 acc 48.828 (48.783)	Top-5 acc 76.562 (72.135)	lr 0.02188
Train [28][1970/3239]	Time 0.187 (0.554)	Data Time 0.001 (0.017)	Loss 3.2007 (3.1467)	Entropy 1.51092 (1.51811)	Top-1 acc 47.266 (48.784)	Top-5 acc 71.875 (72.137)	lr 0.02187
Train [28][1980/3239]	Time 0.248 (0.554)	Data Time 0.001 (0.017)	Loss 3.2052 (3.1469)	Entropy 1.51080 (1.51807)	Top-1 acc 44.531 (48.776)	Top-5 acc 66.406 (72.135)	lr 0.02187
Train [28][1990/3239]	Time 2.357 (0.553)	Data Time 0.001 (0.017)	Loss 3.1114 (3.1467)	Entropy 1.51080 (1.51804)	Top-1 acc 49.609 (48.779)	Top-5 acc 71.875 (72.135)	lr 0.02187
Train [28][2000/3239]	Time 0.219 (0.552)	Data Time 0.001 (0.017)	Loss 3.4365 (3.1471)	Entropy 1.51061 (1.51800)	Top-1 acc 42.188 (48.767)	Top-5 acc 64.844 (72.119)	lr 0.02187
Train [28][2010/3239]	Time 0.217 (0.551)	Data Time 0.001 (0.017)	Loss 3.2072 (3.1472)	Entropy 1.51056 (1.51796)	Top-1 acc 43.750 (48.763)	Top-5 acc 70.312 (72.117)	lr 0.02187
Train [28][2020/3239]	Time 0.208 (0.550)	Data Time 0.001 (0.017)	Loss 3.1683 (3.1472)	Entropy 1.51046 (1.51792)	Top-1 acc 50.391 (48.768)	Top-5 acc 72.266 (72.120)	lr 0.02187
Train [28][2030/3239]	Time 0.201 (0.550)	Data Time 0.002 (0.017)	Loss 3.1483 (3.1470)	Entropy 1.51039 (1.51789)	Top-1 acc 48.047 (48.766)	Top-5 acc 73.047 (72.125)	lr 0.02187
Train [28][2040/3239]	Time 0.230 (0.549)	Data Time 0.001 (0.017)	Loss 3.1300 (3.1468)	Entropy 1.51029 (1.51785)	Top-1 acc 53.516 (48.776)	Top-5 acc 74.609 (72.131)	lr 0.02187
Train [28][2050/3239]	Time 0.330 (0.549)	Data Time 0.001 (0.017)	Loss 3.1052 (3.1469)	Entropy 1.51019 (1.51781)	Top-1 acc 51.562 (48.775)	Top-5 acc 73.438 (72.131)	lr 0.02187
Train [28][2060/3239]	Time 0.289 (0.567)	Data Time 0.004 (0.017)	Loss 3.1809 (3.1469)	Entropy 1.51015 (1.51778)	Top-1 acc 45.703 (48.771)	Top-5 acc 73.047 (72.130)	lr 0.02187
Train [28][2070/3239]	Time 0.205 (0.568)	Data Time 0.002 (0.017)	Loss 3.2179 (3.1469)	Entropy 1.51008 (1.51774)	Top-1 acc 49.609 (48.768)	Top-5 acc 68.359 (72.132)	lr 0.02187
Train [28][2080/3239]	Time 0.216 (0.567)	Data Time 0.001 (0.017)	Loss 3.2351 (3.1466)	Entropy 1.50993 (1.51770)	Top-1 acc 52.344 (48.775)	Top-5 acc 69.531 (72.138)	lr 0.02187
Train [28][2090/3239]	Time 0.271 (0.566)	Data Time 0.001 (0.017)	Loss 3.0870 (3.1464)	Entropy 1.50982 (1.51766)	Top-1 acc 46.094 (48.775)	Top-5 acc 78.906 (72.145)	lr 0.02187
Train [28][2100/3239]	Time 2.447 (0.566)	Data Time 0.001 (0.016)	Loss 3.1942 (3.1465)	Entropy 1.50982 (1.51763)	Top-1 acc 50.000 (48.779)	Top-5 acc 69.922 (72.141)	lr 0.02187
Train [28][2110/3239]	Time 0.275 (0.564)	Data Time 0.002 (0.016)	Loss 3.1994 (3.1464)	Entropy 1.50966 (1.51759)	Top-1 acc 50.781 (48.789)	Top-5 acc 70.312 (72.141)	lr 0.02187
Train [28][2120/3239]	Time 0.223 (0.564)	Data Time 0.001 (0.016)	Loss 3.0966 (3.1462)	Entropy 1.50961 (1.51755)	Top-1 acc 48.438 (48.791)	Top-5 acc 70.312 (72.140)	lr 0.02186
Train [28][2130/3239]	Time 0.241 (0.563)	Data Time 0.001 (0.016)	Loss 3.1909 (3.1460)	Entropy 1.50946 (1.51751)	Top-1 acc 43.750 (48.794)	Top-5 acc 70.703 (72.139)	lr 0.02186
Train [28][2140/3239]	Time 0.240 (0.563)	Data Time 0.001 (0.016)	Loss 3.2582 (3.1461)	Entropy 1.50945 (1.51748)	Top-1 acc 45.703 (48.794)	Top-5 acc 69.141 (72.137)	lr 0.02186
Train [28][2150/3239]	Time 0.217 (0.562)	Data Time 0.001 (0.016)	Loss 3.1824 (3.1460)	Entropy 1.50935 (1.51744)	Top-1 acc 50.781 (48.799)	Top-5 acc 71.875 (72.143)	lr 0.02186
Train [28][2160/3239]	Time 0.207 (0.562)	Data Time 0.001 (0.016)	Loss 3.0199 (3.1462)	Entropy 1.50931 (1.51740)	Top-1 acc 54.297 (48.798)	Top-5 acc 77.344 (72.141)	lr 0.02186
Train [28][2170/3239]	Time 0.232 (0.561)	Data Time 0.002 (0.016)	Loss 3.1178 (3.1465)	Entropy 1.50917 (1.51736)	Top-1 acc 53.125 (48.789)	Top-5 acc 72.656 (72.137)	lr 0.02186
Train [28][2180/3239]	Time 0.216 (0.561)	Data Time 0.001 (0.016)	Loss 3.0739 (3.1464)	Entropy 1.50918 (1.51733)	Top-1 acc 50.000 (48.793)	Top-5 acc 75.000 (72.143)	lr 0.02186
Train [28][2190/3239]	Time 0.166 (0.560)	Data Time 0.001 (0.016)	Loss 3.2547 (3.1466)	Entropy 1.50911 (1.51729)	Top-1 acc 46.094 (48.786)	Top-5 acc 69.531 (72.139)	lr 0.02186
Train [28][2200/3239]	Time 0.209 (0.559)	Data Time 0.001 (0.016)	Loss 3.2539 (3.1465)	Entropy 1.50910 (1.51725)	Top-1 acc 46.484 (48.787)	Top-5 acc 70.703 (72.139)	lr 0.02186
Train [28][2210/3239]	Time 2.282 (0.559)	Data Time 0.001 (0.016)	Loss 3.1851 (3.1465)	Entropy 1.50910 (1.51722)	Top-1 acc 45.312 (48.792)	Top-5 acc 72.656 (72.141)	lr 0.02186
Train [28][2220/3239]	Time 0.222 (0.557)	Data Time 0.001 (0.016)	Loss 3.3626 (3.1464)	Entropy 1.50912 (1.51718)	Top-1 acc 44.141 (48.791)	Top-5 acc 71.875 (72.145)	lr 0.02186
Train [28][2230/3239]	Time 0.304 (0.557)	Data Time 0.001 (0.016)	Loss 3.1540 (3.1466)	Entropy 1.50911 (1.51714)	Top-1 acc 41.797 (48.785)	Top-5 acc 73.828 (72.143)	lr 0.02186
Train [28][2240/3239]	Time 0.224 (0.556)	Data Time 0.001 (0.016)	Loss 3.1902 (3.1468)	Entropy 1.50901 (1.51711)	Top-1 acc 46.875 (48.787)	Top-5 acc 70.312 (72.142)	lr 0.02186
Train [28][2250/3239]	Time 0.207 (0.556)	Data Time 0.001 (0.015)	Loss 3.2318 (3.1470)	Entropy 1.50891 (1.51707)	Top-1 acc 46.094 (48.784)	Top-5 acc 71.094 (72.145)	lr 0.02186
Train [28][2260/3239]	Time 0.231 (0.555)	Data Time 0.001 (0.015)	Loss 3.1767 (3.1469)	Entropy 1.50888 (1.51703)	Top-1 acc 47.266 (48.783)	Top-5 acc 72.656 (72.146)	lr 0.02186
Train [28][2270/3239]	Time 0.211 (0.555)	Data Time 0.001 (0.015)	Loss 3.2105 (3.1472)	Entropy 1.50880 (1.51700)	Top-1 acc 45.312 (48.778)	Top-5 acc 70.312 (72.137)	lr 0.02185
Train [28][2280/3239]	Time 0.253 (0.555)	Data Time 0.001 (0.015)	Loss 3.1812 (3.1473)	Entropy 1.50883 (1.51696)	Top-1 acc 47.656 (48.778)	Top-5 acc 70.312 (72.133)	lr 0.02185
Train [28][2290/3239]	Time 0.319 (0.554)	Data Time 0.001 (0.015)	Loss 2.9965 (3.1470)	Entropy 1.50871 (1.51693)	Top-1 acc 55.859 (48.786)	Top-5 acc 73.828 (72.140)	lr 0.02185
Train [28][2300/3239]	Time 0.227 (0.554)	Data Time 0.001 (0.015)	Loss 3.0693 (3.1467)	Entropy 1.50859 (1.51689)	Top-1 acc 50.000 (48.790)	Top-5 acc 75.000 (72.146)	lr 0.02185
Train [28][2310/3239]	Time 0.211 (0.553)	Data Time 0.001 (0.015)	Loss 2.8636 (3.1463)	Entropy 1.50857 (1.51685)	Top-1 acc 55.078 (48.798)	Top-5 acc 79.297 (72.153)	lr 0.02185
Train [28][2320/3239]	Time 2.337 (0.553)	Data Time 0.002 (0.015)	Loss 3.2143 (3.1467)	Entropy 1.50857 (1.51682)	Top-1 acc 49.219 (48.793)	Top-5 acc 67.969 (72.144)	lr 0.02185
Train [28][2330/3239]	Time 0.208 (0.551)	Data Time 0.001 (0.015)	Loss 3.3545 (3.1469)	Entropy 1.50852 (1.51678)	Top-1 acc 44.531 (48.793)	Top-5 acc 68.359 (72.142)	lr 0.02185
Train [28][2340/3239]	Time 0.201 (0.551)	Data Time 0.001 (0.015)	Loss 3.1399 (3.1470)	Entropy 1.50853 (1.51675)	Top-1 acc 50.000 (48.792)	Top-5 acc 73.438 (72.144)	lr 0.02185
Train [28][2350/3239]	Time 0.237 (0.550)	Data Time 0.001 (0.015)	Loss 3.2879 (3.1469)	Entropy 1.50852 (1.51671)	Top-1 acc 46.094 (48.791)	Top-5 acc 69.922 (72.143)	lr 0.02185
Train [28][2360/3239]	Time 0.208 (0.550)	Data Time 0.001 (0.015)	Loss 3.1927 (3.1472)	Entropy 1.50846 (1.51668)	Top-1 acc 46.094 (48.787)	Top-5 acc 68.750 (72.137)	lr 0.02185
Train [28][2370/3239]	Time 0.219 (0.550)	Data Time 0.001 (0.015)	Loss 3.2218 (3.1472)	Entropy 1.50841 (1.51664)	Top-1 acc 47.266 (48.792)	Top-5 acc 69.531 (72.137)	lr 0.02185
Train [28][2380/3239]	Time 0.222 (0.549)	Data Time 0.001 (0.015)	Loss 3.0263 (3.1473)	Entropy 1.50841 (1.51661)	Top-1 acc 51.953 (48.787)	Top-5 acc 73.438 (72.131)	lr 0.02185
Train [28][2390/3239]	Time 0.225 (0.549)	Data Time 0.002 (0.015)	Loss 3.0277 (3.1472)	Entropy 1.50840 (1.51657)	Top-1 acc 53.516 (48.791)	Top-5 acc 75.781 (72.132)	lr 0.02185
Train [28][2400/3239]	Time 0.342 (0.548)	Data Time 0.001 (0.015)	Loss 3.2857 (3.1474)	Entropy 1.50831 (1.51654)	Top-1 acc 49.609 (48.790)	Top-5 acc 69.531 (72.130)	lr 0.02185
Train [28][2410/3239]	Time 0.234 (0.548)	Data Time 0.001 (0.015)	Loss 3.3062 (3.1471)	Entropy 1.50816 (1.51651)	Top-1 acc 46.484 (48.792)	Top-5 acc 68.750 (72.134)	lr 0.02185
Train [28][2420/3239]	Time 0.373 (0.565)	Data Time 0.004 (0.015)	Loss 3.1445 (3.1469)	Entropy 1.50790 (1.51647)	Top-1 acc 50.781 (48.793)	Top-5 acc 72.266 (72.138)	lr 0.02184
Train [28][2430/3239]	Time 2.366 (0.565)	Data Time 0.002 (0.014)	Loss 3.0867 (3.1470)	Entropy 1.50790 (1.51644)	Top-1 acc 51.172 (48.795)	Top-5 acc 73.828 (72.137)	lr 0.02184
Train [28][2440/3239]	Time 0.211 (0.564)	Data Time 0.001 (0.014)	Loss 2.9773 (3.1468)	Entropy 1.50787 (1.51640)	Top-1 acc 50.000 (48.803)	Top-5 acc 75.781 (72.144)	lr 0.02184
Train [28][2450/3239]	Time 0.341 (0.563)	Data Time 0.002 (0.014)	Loss 3.2126 (3.1467)	Entropy 1.50773 (1.51637)	Top-1 acc 44.531 (48.804)	Top-5 acc 73.047 (72.147)	lr 0.02184
Train [28][2460/3239]	Time 0.273 (0.563)	Data Time 0.001 (0.014)	Loss 3.2010 (3.1467)	Entropy 1.50766 (1.51633)	Top-1 acc 44.922 (48.803)	Top-5 acc 71.094 (72.146)	lr 0.02184
Train [28][2470/3239]	Time 0.211 (0.562)	Data Time 0.001 (0.014)	Loss 3.0678 (3.1467)	Entropy 1.50759 (1.51630)	Top-1 acc 50.781 (48.799)	Top-5 acc 75.391 (72.147)	lr 0.02184
Train [28][2480/3239]	Time 0.260 (0.562)	Data Time 0.001 (0.014)	Loss 3.1103 (3.1468)	Entropy 1.50762 (1.51626)	Top-1 acc 51.562 (48.799)	Top-5 acc 72.656 (72.146)	lr 0.02184
Train [28][2490/3239]	Time 0.208 (0.561)	Data Time 0.001 (0.014)	Loss 3.0180 (3.1468)	Entropy 1.50760 (1.51623)	Top-1 acc 53.125 (48.799)	Top-5 acc 77.344 (72.146)	lr 0.02184
Train [28][2500/3239]	Time 0.220 (0.561)	Data Time 0.001 (0.014)	Loss 3.2793 (3.1470)	Entropy 1.50754 (1.51619)	Top-1 acc 46.094 (48.797)	Top-5 acc 69.141 (72.139)	lr 0.02184
Train [28][2510/3239]	Time 0.341 (0.561)	Data Time 0.001 (0.014)	Loss 3.0674 (3.1467)	Entropy 1.50748 (1.51616)	Top-1 acc 52.344 (48.798)	Top-5 acc 75.781 (72.147)	lr 0.02184
Train [28][2520/3239]	Time 0.167 (0.560)	Data Time 0.001 (0.014)	Loss 3.2077 (3.1467)	Entropy 1.50748 (1.51612)	Top-1 acc 47.266 (48.797)	Top-5 acc 69.141 (72.145)	lr 0.02184
Train [28][2530/3239]	Time 0.247 (0.560)	Data Time 0.002 (0.014)	Loss 3.1770 (3.1469)	Entropy 1.50748 (1.51609)	Top-1 acc 51.953 (48.795)	Top-5 acc 72.656 (72.143)	lr 0.02184
Train [28][2540/3239]	Time 2.365 (0.559)	Data Time 0.001 (0.014)	Loss 3.1180 (3.1469)	Entropy 1.50748 (1.51605)	Top-1 acc 49.219 (48.796)	Top-5 acc 73.828 (72.141)	lr 0.02184
Train [28][2550/3239]	Time 0.209 (0.558)	Data Time 0.001 (0.014)	Loss 3.1455 (3.1470)	Entropy 1.50745 (1.51602)	Top-1 acc 48.828 (48.795)	Top-5 acc 68.750 (72.138)	lr 0.02184
Train [28][2560/3239]	Time 0.225 (0.557)	Data Time 0.001 (0.014)	Loss 3.2169 (3.1469)	Entropy 1.50741 (1.51599)	Top-1 acc 48.438 (48.798)	Top-5 acc 72.266 (72.141)	lr 0.02184
Train [28][2570/3239]	Time 0.337 (0.557)	Data Time 0.001 (0.014)	Loss 3.0786 (3.1468)	Entropy 1.50732 (1.51595)	Top-1 acc 47.656 (48.798)	Top-5 acc 73.828 (72.145)	lr 0.02183
Train [28][2580/3239]	Time 0.243 (0.557)	Data Time 0.002 (0.014)	Loss 3.0552 (3.1466)	Entropy 1.50722 (1.51592)	Top-1 acc 53.906 (48.803)	Top-5 acc 71.484 (72.153)	lr 0.02183
Train [28][2590/3239]	Time 0.213 (0.556)	Data Time 0.001 (0.014)	Loss 3.1567 (3.1466)	Entropy 1.50711 (1.51589)	Top-1 acc 48.047 (48.805)	Top-5 acc 71.484 (72.153)	lr 0.02183
Train [28][2600/3239]	Time 0.221 (0.556)	Data Time 0.002 (0.014)	Loss 3.1027 (3.1470)	Entropy 1.50698 (1.51585)	Top-1 acc 53.516 (48.799)	Top-5 acc 71.484 (72.144)	lr 0.02183
Train [28][2610/3239]	Time 0.218 (0.556)	Data Time 0.001 (0.014)	Loss 3.1458 (3.1468)	Entropy 1.50693 (1.51582)	Top-1 acc 49.609 (48.805)	Top-5 acc 71.484 (72.146)	lr 0.02183
Train [28][2620/3239]	Time 0.319 (0.555)	Data Time 0.001 (0.014)	Loss 3.0515 (3.1468)	Entropy 1.50674 (1.51578)	Top-1 acc 50.781 (48.808)	Top-5 acc 75.391 (72.148)	lr 0.02183
Train [28][2630/3239]	Time 0.213 (0.555)	Data Time 0.001 (0.013)	Loss 3.0905 (3.1467)	Entropy 1.50668 (1.51575)	Top-1 acc 50.000 (48.809)	Top-5 acc 73.828 (72.149)	lr 0.02183
Train [28][2640/3239]	Time 0.226 (0.554)	Data Time 0.001 (0.013)	Loss 3.3076 (3.1467)	Entropy 1.50656 (1.51571)	Top-1 acc 42.969 (48.812)	Top-5 acc 66.016 (72.152)	lr 0.02183
Train [28][2650/3239]	Time 0.237 (0.554)	Data Time 0.001 (0.013)	Loss 2.9957 (3.1466)	Entropy 1.50647 (1.51568)	Top-1 acc 51.953 (48.810)	Top-5 acc 75.391 (72.154)	lr 0.02183
Train [28][2660/3239]	Time 0.212 (0.553)	Data Time 0.001 (0.013)	Loss 3.2980 (3.1467)	Entropy 1.50641 (1.51565)	Top-1 acc 47.266 (48.809)	Top-5 acc 67.578 (72.153)	lr 0.02183
Train [28][2670/3239]	Time 0.212 (0.553)	Data Time 0.001 (0.013)	Loss 3.0913 (3.1468)	Entropy 1.50640 (1.51561)	Top-1 acc 51.953 (48.811)	Top-5 acc 71.484 (72.149)	lr 0.02183
Train [28][2680/3239]	Time 0.214 (0.553)	Data Time 0.001 (0.013)	Loss 3.2522 (3.1467)	Entropy 1.50631 (1.51558)	Top-1 acc 45.312 (48.810)	Top-5 acc 71.094 (72.152)	lr 0.02183
Train [28][2690/3239]	Time 0.267 (0.552)	Data Time 0.001 (0.013)	Loss 2.9240 (3.1469)	Entropy 1.50624 (1.51554)	Top-1 acc 52.734 (48.806)	Top-5 acc 80.078 (72.149)	lr 0.02183
Train [28][2700/3239]	Time 0.255 (0.552)	Data Time 0.001 (0.013)	Loss 3.0402 (3.1467)	Entropy 1.50619 (1.51551)	Top-1 acc 50.781 (48.805)	Top-5 acc 72.266 (72.150)	lr 0.02183
Train [28][2710/3239]	Time 0.206 (0.551)	Data Time 0.001 (0.013)	Loss 3.0780 (3.1466)	Entropy 1.50608 (1.51547)	Top-1 acc 51.953 (48.803)	Top-5 acc 75.391 (72.153)	lr 0.02183
Train [28][2720/3239]	Time 0.247 (0.551)	Data Time 0.001 (0.013)	Loss 3.1439 (3.1466)	Entropy 1.50596 (1.51544)	Top-1 acc 50.391 (48.804)	Top-5 acc 71.094 (72.157)	lr 0.02182
Train [28][2730/3239]	Time 0.184 (0.551)	Data Time 0.001 (0.013)	Loss 3.2582 (3.1465)	Entropy 1.50595 (1.51540)	Top-1 acc 44.531 (48.805)	Top-5 acc 70.312 (72.159)	lr 0.02182
Train [28][2740/3239]	Time 0.346 (0.550)	Data Time 0.002 (0.013)	Loss 3.1864 (3.1465)	Entropy 1.50583 (1.51537)	Top-1 acc 49.219 (48.805)	Top-5 acc 73.047 (72.162)	lr 0.02182
Train [28][2750/3239]	Time 0.191 (0.550)	Data Time 0.001 (0.013)	Loss 3.2002 (3.1464)	Entropy 1.50576 (1.51533)	Top-1 acc 47.266 (48.808)	Top-5 acc 69.141 (72.164)	lr 0.02182
Train [28][2760/3239]	Time 0.238 (0.549)	Data Time 0.001 (0.013)	Loss 3.4186 (3.1465)	Entropy 1.50572 (1.51530)	Top-1 acc 44.141 (48.805)	Top-5 acc 67.578 (72.163)	lr 0.02182
Train [28][2770/3239]	Time 0.266 (0.564)	Data Time 0.004 (0.013)	Loss 2.9611 (3.1463)	Entropy 1.50568 (1.51527)	Top-1 acc 52.344 (48.809)	Top-5 acc 75.781 (72.165)	lr 0.02182
Train [28][2780/3239]	Time 0.211 (0.564)	Data Time 0.002 (0.013)	Loss 3.2356 (3.1463)	Entropy 1.50566 (1.51523)	Top-1 acc 47.656 (48.809)	Top-5 acc 68.359 (72.164)	lr 0.02182
Train [28][2790/3239]	Time 0.211 (0.563)	Data Time 0.002 (0.013)	Loss 3.1447 (3.1465)	Entropy 1.50563 (1.51520)	Top-1 acc 48.047 (48.807)	Top-5 acc 74.609 (72.161)	lr 0.02182
Train [28][2800/3239]	Time 0.342 (0.563)	Data Time 0.001 (0.013)	Loss 2.9269 (3.1464)	Entropy 1.50559 (1.51516)	Top-1 acc 55.859 (48.811)	Top-5 acc 73.438 (72.160)	lr 0.02182
Train [28][2810/3239]	Time 0.225 (0.562)	Data Time 0.001 (0.013)	Loss 3.3629 (3.1462)	Entropy 1.50542 (1.51513)	Top-1 acc 45.312 (48.815)	Top-5 acc 65.625 (72.159)	lr 0.02182
Train [28][2820/3239]	Time 0.206 (0.562)	Data Time 0.001 (0.013)	Loss 2.9717 (3.1459)	Entropy 1.50532 (1.51509)	Top-1 acc 51.172 (48.827)	Top-5 acc 74.219 (72.162)	lr 0.02182
Train [28][2830/3239]	Time 0.220 (0.561)	Data Time 0.001 (0.013)	Loss 3.2325 (3.1457)	Entropy 1.50532 (1.51506)	Top-1 acc 47.656 (48.831)	Top-5 acc 69.531 (72.168)	lr 0.02182
Train [28][2840/3239]	Time 0.220 (0.561)	Data Time 0.001 (0.013)	Loss 3.2980 (3.1458)	Entropy 1.50526 (1.51502)	Top-1 acc 43.750 (48.830)	Top-5 acc 69.531 (72.163)	lr 0.02182
Train [28][2850/3239]	Time 0.358 (0.561)	Data Time 0.001 (0.013)	Loss 3.1148 (3.1456)	Entropy 1.50520 (1.51499)	Top-1 acc 49.609 (48.833)	Top-5 acc 71.484 (72.167)	lr 0.02182
Train [28][2860/3239]	Time 0.249 (0.560)	Data Time 0.001 (0.013)	Loss 3.1068 (3.1454)	Entropy 1.50505 (1.51496)	Top-1 acc 48.438 (48.838)	Top-5 acc 76.562 (72.172)	lr 0.02181
Train [28][2870/3239]	Time 0.250 (0.560)	Data Time 0.001 (0.013)	Loss 3.2108 (3.1456)	Entropy 1.50500 (1.51492)	Top-1 acc 48.047 (48.832)	Top-5 acc 69.531 (72.165)	lr 0.02181
Train [28][2880/3239]	Time 0.251 (0.559)	Data Time 0.001 (0.012)	Loss 3.2292 (3.1455)	Entropy 1.50507 (1.51489)	Top-1 acc 50.391 (48.835)	Top-5 acc 69.922 (72.164)	lr 0.02181
Train [28][2890/3239]	Time 0.212 (0.559)	Data Time 0.001 (0.012)	Loss 3.2394 (3.1455)	Entropy 1.50507 (1.51485)	Top-1 acc 43.750 (48.836)	Top-5 acc 71.094 (72.167)	lr 0.02181
Train [28][2900/3239]	Time 0.243 (0.559)	Data Time 0.001 (0.012)	Loss 3.2143 (3.1456)	Entropy 1.50504 (1.51482)	Top-1 acc 48.438 (48.834)	Top-5 acc 70.312 (72.165)	lr 0.02181
Train [28][2910/3239]	Time 0.369 (0.558)	Data Time 0.001 (0.012)	Loss 3.2803 (3.1456)	Entropy 1.50500 (1.51479)	Top-1 acc 46.094 (48.832)	Top-5 acc 69.531 (72.164)	lr 0.02181
Train [28][2920/3239]	Time 0.247 (0.558)	Data Time 0.001 (0.012)	Loss 3.1453 (3.1458)	Entropy 1.50495 (1.51475)	Top-1 acc 51.172 (48.828)	Top-5 acc 75.781 (72.164)	lr 0.02181
Train [28][2930/3239]	Time 0.268 (0.558)	Data Time 0.001 (0.012)	Loss 3.0065 (3.1456)	Entropy 1.50490 (1.51472)	Top-1 acc 51.562 (48.830)	Top-5 acc 75.000 (72.164)	lr 0.02181
Train [28][2940/3239]	Time 0.239 (0.557)	Data Time 0.002 (0.012)	Loss 3.2380 (3.1456)	Entropy 1.50482 (1.51469)	Top-1 acc 49.609 (48.830)	Top-5 acc 72.266 (72.166)	lr 0.02181
Train [28][2950/3239]	Time 0.243 (0.557)	Data Time 0.001 (0.012)	Loss 3.1765 (3.1455)	Entropy 1.50471 (1.51465)	Top-1 acc 49.609 (48.833)	Top-5 acc 71.484 (72.167)	lr 0.02181
Train [28][2960/3239]	Time 0.227 (0.556)	Data Time 0.001 (0.012)	Loss 3.1696 (3.1454)	Entropy 1.50455 (1.51462)	Top-1 acc 48.438 (48.837)	Top-5 acc 70.312 (72.172)	lr 0.02181
Train [28][2970/3239]	Time 0.231 (0.556)	Data Time 0.001 (0.012)	Loss 3.0740 (3.1456)	Entropy 1.50451 (1.51458)	Top-1 acc 50.000 (48.829)	Top-5 acc 71.875 (72.168)	lr 0.02181
Train [28][2980/3239]	Time 0.255 (0.556)	Data Time 0.001 (0.012)	Loss 3.2449 (3.1456)	Entropy 1.50450 (1.51455)	Top-1 acc 48.438 (48.832)	Top-5 acc 72.266 (72.168)	lr 0.02181
Train [28][2990/3239]	Time 0.210 (0.555)	Data Time 0.001 (0.012)	Loss 3.2691 (3.1457)	Entropy 1.50448 (1.51452)	Top-1 acc 46.094 (48.828)	Top-5 acc 72.656 (72.167)	lr 0.02181
Train [28][3000/3239]	Time 0.176 (0.555)	Data Time 0.001 (0.012)	Loss 3.0455 (3.1455)	Entropy 1.50443 (1.51448)	Top-1 acc 48.828 (48.829)	Top-5 acc 74.609 (72.170)	lr 0.02181
Train [28][3010/3239]	Time 0.236 (0.555)	Data Time 0.001 (0.012)	Loss 3.3368 (3.1456)	Entropy 1.50443 (1.51445)	Top-1 acc 44.141 (48.826)	Top-5 acc 68.359 (72.166)	lr 0.02180
Train [28][3020/3239]	Time 0.426 (0.554)	Data Time 0.001 (0.012)	Loss 3.3675 (3.1456)	Entropy 1.50434 (1.51442)	Top-1 acc 42.188 (48.824)	Top-5 acc 67.578 (72.166)	lr 0.02180
Train [28][3030/3239]	Time 0.256 (0.554)	Data Time 0.001 (0.012)	Loss 3.2671 (3.1456)	Entropy 1.50432 (1.51438)	Top-1 acc 46.875 (48.826)	Top-5 acc 67.188 (72.166)	lr 0.02180
Train [28][3040/3239]	Time 0.247 (0.554)	Data Time 0.001 (0.012)	Loss 2.9645 (3.1452)	Entropy 1.50428 (1.51435)	Top-1 acc 51.953 (48.833)	Top-5 acc 75.781 (72.174)	lr 0.02180
Train [28][3050/3239]	Time 0.247 (0.553)	Data Time 0.002 (0.012)	Loss 3.1607 (3.1450)	Entropy 1.50418 (1.51432)	Top-1 acc 48.047 (48.837)	Top-5 acc 70.703 (72.178)	lr 0.02180
Train [28][3060/3239]	Time 0.224 (0.553)	Data Time 0.002 (0.012)	Loss 2.8442 (3.1449)	Entropy 1.50411 (1.51428)	Top-1 acc 56.641 (48.841)	Top-5 acc 77.344 (72.180)	lr 0.02180
Train [28][3070/3239]	Time 0.227 (0.553)	Data Time 0.001 (0.012)	Loss 3.1665 (3.1449)	Entropy 1.50404 (1.51425)	Top-1 acc 47.656 (48.844)	Top-5 acc 71.875 (72.180)	lr 0.02180
Train [28][3080/3239]	Time 0.329 (0.552)	Data Time 0.002 (0.012)	Loss 3.3051 (3.1450)	Entropy 1.50394 (1.51422)	Top-1 acc 41.016 (48.840)	Top-5 acc 67.578 (72.176)	lr 0.02180
Train [28][3090/3239]	Time 0.217 (0.552)	Data Time 0.001 (0.012)	Loss 3.2132 (3.1452)	Entropy 1.50385 (1.51419)	Top-1 acc 49.609 (48.836)	Top-5 acc 69.531 (72.176)	lr 0.02180
Train [28][3100/3239]	Time 0.298 (0.564)	Data Time 0.004 (0.012)	Loss 3.3266 (3.1452)	Entropy 1.50380 (1.51415)	Top-1 acc 49.219 (48.836)	Top-5 acc 71.484 (72.175)	lr 0.02180
Train [28][3110/3239]	Time 0.425 (0.564)	Data Time 0.112 (0.012)	Loss 3.3284 (3.1452)	Entropy 1.50383 (1.51412)	Top-1 acc 41.797 (48.836)	Top-5 acc 69.141 (72.173)	lr 0.02180
Train [28][3120/3239]	Time 0.253 (0.564)	Data Time 0.002 (0.012)	Loss 3.1396 (3.1451)	Entropy 1.50379 (1.51409)	Top-1 acc 51.953 (48.841)	Top-5 acc 73.828 (72.175)	lr 0.02180
Train [28][3130/3239]	Time 0.257 (0.563)	Data Time 0.002 (0.012)	Loss 3.1812 (3.1452)	Entropy 1.50378 (1.51405)	Top-1 acc 48.047 (48.837)	Top-5 acc 70.312 (72.174)	lr 0.02180
Train [28][3140/3239]	Time 0.200 (0.563)	Data Time 0.001 (0.012)	Loss 3.1027 (3.1452)	Entropy 1.50369 (1.51402)	Top-1 acc 50.391 (48.839)	Top-5 acc 71.875 (72.174)	lr 0.02180
Train [28][3150/3239]	Time 0.218 (0.562)	Data Time 0.001 (0.012)	Loss 3.1164 (3.1452)	Entropy 1.50361 (1.51399)	Top-1 acc 49.609 (48.841)	Top-5 acc 69.531 (72.175)	lr 0.02180
Train [28][3160/3239]	Time 0.268 (0.562)	Data Time 0.001 (0.012)	Loss 3.3218 (3.1453)	Entropy 1.50358 (1.51395)	Top-1 acc 46.484 (48.840)	Top-5 acc 68.359 (72.173)	lr 0.02179
Train [28][3170/3239]	Time 0.212 (0.562)	Data Time 0.001 (0.012)	Loss 3.1431 (3.1454)	Entropy 1.50350 (1.51392)	Top-1 acc 46.484 (48.839)	Top-5 acc 73.438 (72.175)	lr 0.02179
Train [28][3180/3239]	Time 0.202 (0.561)	Data Time 0.000 (0.012)	Loss 3.2728 (3.1455)	Entropy 1.50343 (1.51389)	Top-1 acc 42.188 (48.833)	Top-5 acc 72.656 (72.173)	lr 0.02179
Train [28][3190/3239]	Time 0.211 (0.561)	Data Time 0.000 (0.012)	Loss 3.0105 (3.1453)	Entropy 1.50332 (1.51386)	Top-1 acc 53.516 (48.839)	Top-5 acc 75.000 (72.177)	lr 0.02179
Train [28][3200/3239]	Time 0.233 (0.561)	Data Time 0.000 (0.011)	Loss 3.0965 (3.1454)	Entropy 1.50334 (1.51382)	Top-1 acc 50.000 (48.838)	Top-5 acc 71.484 (72.172)	lr 0.02179
Train [28][3210/3239]	Time 0.206 (0.560)	Data Time 0.000 (0.011)	Loss 2.9128 (3.1452)	Entropy 1.50318 (1.51379)	Top-1 acc 56.250 (48.847)	Top-5 acc 78.516 (72.176)	lr 0.02179
Train [28][3220/3239]	Time 0.213 (0.560)	Data Time 0.000 (0.011)	Loss 3.1882 (3.1453)	Entropy 1.50315 (1.51376)	Top-1 acc 47.656 (48.846)	Top-5 acc 69.141 (72.172)	lr 0.02179
Train [28][3230/3239]	Time 0.218 (0.559)	Data Time 0.000 (0.011)	Loss 3.0223 (3.1453)	Entropy 1.50300 (1.51372)	Top-1 acc 48.828 (48.842)	Top-5 acc 73.828 (72.172)	lr 0.02179
Train [28][3239/3239]	Time 2.130 (0.559)	Data Time 0.000 (0.011)	Loss 3.5738 (3.1454)	Entropy 1.50300 (1.51369)	Top-1 acc 43.210 (48.838)	Top-5 acc 59.259 (72.169)	lr 0.02179
==========Valid [28/120]	loss 1.940	top-1 acc 57.104 (57.104)	top-5 acc 79.688	Train top-1 48.838	top-5 72.169	Entropy 1.50300	Latency-None: 0.000ms	Flops: 550.59M
Train [29][0/3239]	Time 30.824 (30.824)	Data Time 28.817 (28.817)	Loss 3.1037 (3.1037)	Entropy 1.50301 (1.50301)	Top-1 acc 48.047 (48.047)	Top-5 acc 71.875 (71.875)	lr 0.02179
Train [29][10/3239]	Time 2.603 (3.392)	Data Time 0.002 (2.650)	Loss 3.0032 (3.0814)	Entropy 1.50301 (1.50301)	Top-1 acc 50.000 (49.680)	Top-5 acc 75.781 (73.935)	lr 0.02179
Train [29][20/3239]	Time 0.320 (1.886)	Data Time 0.002 (1.389)	Loss 3.0125 (3.0790)	Entropy 1.50294 (1.50298)	Top-1 acc 50.781 (50.223)	Top-5 acc 76.172 (73.903)	lr 0.02179
Train [29][30/3239]	Time 0.231 (1.421)	Data Time 0.001 (0.941)	Loss 2.9974 (3.0881)	Entropy 1.50294 (1.50296)	Top-1 acc 51.953 (49.962)	Top-5 acc 73.828 (73.362)	lr 0.02179
Train [29][40/3239]	Time 0.214 (1.187)	Data Time 0.001 (0.712)	Loss 3.1298 (3.0903)	Entropy 1.50293 (1.50295)	Top-1 acc 50.781 (49.867)	Top-5 acc 72.656 (73.190)	lr 0.02179
Train [29][50/3239]	Time 0.214 (1.044)	Data Time 0.001 (0.573)	Loss 3.0460 (3.0926)	Entropy 1.50286 (1.50294)	Top-1 acc 48.438 (49.862)	Top-5 acc 72.656 (73.200)	lr 0.02179
Train [29][60/3239]	Time 0.235 (0.947)	Data Time 0.001 (0.479)	Loss 3.1550 (3.0902)	Entropy 1.50279 (1.50292)	Top-1 acc 50.781 (49.955)	Top-5 acc 71.094 (73.156)	lr 0.02179
Train [29][70/3239]	Time 0.224 (0.881)	Data Time 0.001 (0.412)	Loss 2.9824 (3.0884)	Entropy 1.50277 (1.50290)	Top-1 acc 51.172 (49.994)	Top-5 acc 75.391 (73.179)	lr 0.02178
Train [29][80/3239]	Time 0.171 (0.826)	Data Time 0.001 (0.361)	Loss 3.1393 (3.0884)	Entropy 1.50268 (1.50288)	Top-1 acc 44.922 (49.971)	Top-5 acc 73.438 (73.182)	lr 0.02178
Train [29][90/3239]	Time 0.200 (0.785)	Data Time 0.002 (0.322)	Loss 3.0599 (3.0902)	Entropy 1.50237 (1.50285)	Top-1 acc 48.438 (49.966)	Top-5 acc 73.047 (73.219)	lr 0.02178
Train [29][100/3239]	Time 0.224 (0.753)	Data Time 0.001 (0.290)	Loss 3.1924 (3.0991)	Entropy 1.50232 (1.50280)	Top-1 acc 51.562 (49.884)	Top-5 acc 71.484 (73.113)	lr 0.02178
Train [29][110/3239]	Time 0.179 (0.724)	Data Time 0.001 (0.264)	Loss 3.0012 (3.0946)	Entropy 1.50220 (1.50276)	Top-1 acc 51.562 (49.894)	Top-5 acc 76.953 (73.258)	lr 0.02178
Train [29][120/3239]	Time 2.468 (0.701)	Data Time 0.001 (0.243)	Loss 2.9818 (3.0964)	Entropy 1.50220 (1.50271)	Top-1 acc 51.562 (49.826)	Top-5 acc 76.953 (73.234)	lr 0.02178
Train [29][130/3239]	Time 0.298 (0.666)	Data Time 0.001 (0.225)	Loss 3.3244 (3.0970)	Entropy 1.50214 (1.50267)	Top-1 acc 46.094 (49.761)	Top-5 acc 68.359 (73.244)	lr 0.02178
Train [29][140/3239]	Time 0.203 (0.649)	Data Time 0.001 (0.209)	Loss 3.3388 (3.1002)	Entropy 1.50204 (1.50262)	Top-1 acc 45.312 (49.637)	Top-5 acc 67.969 (73.100)	lr 0.02178
Train [29][150/3239]	Time 0.210 (0.635)	Data Time 0.001 (0.195)	Loss 3.3886 (3.1082)	Entropy 1.50199 (1.50258)	Top-1 acc 37.891 (49.446)	Top-5 acc 69.922 (72.946)	lr 0.02178
Train [29][160/3239]	Time 0.210 (0.623)	Data Time 0.002 (0.183)	Loss 3.2269 (3.1098)	Entropy 1.50189 (1.50254)	Top-1 acc 47.656 (49.457)	Top-5 acc 71.484 (72.972)	lr 0.02178
Train [29][170/3239]	Time 0.214 (0.612)	Data Time 0.001 (0.172)	Loss 3.2772 (3.1125)	Entropy 1.50183 (1.50250)	Top-1 acc 45.703 (49.420)	Top-5 acc 70.312 (72.910)	lr 0.02178
Train [29][180/3239]	Time 0.207 (0.602)	Data Time 0.001 (0.163)	Loss 3.1444 (3.1102)	Entropy 1.50178 (1.50246)	Top-1 acc 49.219 (49.480)	Top-5 acc 69.922 (72.941)	lr 0.02178
Train [29][190/3239]	Time 0.205 (0.594)	Data Time 0.001 (0.155)	Loss 3.0878 (3.1120)	Entropy 1.50178 (1.50243)	Top-1 acc 46.875 (49.470)	Top-5 acc 72.266 (72.900)	lr 0.02178
Train [29][200/3239]	Time 0.205 (0.587)	Data Time 0.001 (0.147)	Loss 3.3079 (3.1116)	Entropy 1.50167 (1.50239)	Top-1 acc 49.219 (49.563)	Top-5 acc 68.750 (72.897)	lr 0.02178
Train [29][210/3239]	Time 0.211 (0.797)	Data Time 0.002 (0.140)	Loss 2.9699 (3.1111)	Entropy 1.50163 (1.50236)	Top-1 acc 52.734 (49.606)	Top-5 acc 77.344 (72.923)	lr 0.02178
Train [29][220/3239]	Time 0.265 (0.783)	Data Time 0.002 (0.134)	Loss 3.1141 (3.1157)	Entropy 1.50159 (1.50233)	Top-1 acc 50.391 (49.468)	Top-5 acc 70.703 (72.806)	lr 0.02177
Train [29][230/3239]	Time 2.536 (0.770)	Data Time 0.003 (0.128)	Loss 3.0673 (3.1162)	Entropy 1.50159 (1.50229)	Top-1 acc 50.000 (49.455)	Top-5 acc 72.266 (72.756)	lr 0.02177
Train [29][240/3239]	Time 0.390 (0.749)	Data Time 0.002 (0.123)	Loss 3.0341 (3.1161)	Entropy 1.50159 (1.50226)	Top-1 acc 53.125 (49.457)	Top-5 acc 75.781 (72.719)	lr 0.02177
Train [29][250/3239]	Time 0.243 (0.738)	Data Time 0.001 (0.118)	Loss 3.0973 (3.1154)	Entropy 1.50149 (1.50223)	Top-1 acc 51.172 (49.448)	Top-5 acc 73.828 (72.708)	lr 0.02177
Train [29][260/3239]	Time 0.224 (0.727)	Data Time 0.001 (0.114)	Loss 3.1034 (3.1156)	Entropy 1.50131 (1.50220)	Top-1 acc 50.781 (49.472)	Top-5 acc 71.875 (72.692)	lr 0.02177
Train [29][270/3239]	Time 0.245 (0.716)	Data Time 0.002 (0.110)	Loss 2.9918 (3.1161)	Entropy 1.50127 (1.50217)	Top-1 acc 50.391 (49.448)	Top-5 acc 75.391 (72.689)	lr 0.02177
Train [29][280/3239]	Time 0.277 (0.707)	Data Time 0.001 (0.106)	Loss 3.0534 (3.1154)	Entropy 1.50125 (1.50213)	Top-1 acc 51.953 (49.459)	Top-5 acc 74.609 (72.740)	lr 0.02177
Train [29][290/3239]	Time 0.212 (0.698)	Data Time 0.001 (0.102)	Loss 3.0413 (3.1134)	Entropy 1.50110 (1.50210)	Top-1 acc 50.781 (49.479)	Top-5 acc 75.391 (72.756)	lr 0.02177
Train [29][300/3239]	Time 0.307 (0.690)	Data Time 0.001 (0.099)	Loss 3.0319 (3.1133)	Entropy 1.50100 (1.50207)	Top-1 acc 51.172 (49.481)	Top-5 acc 76.953 (72.725)	lr 0.02177
Train [29][310/3239]	Time 0.216 (0.682)	Data Time 0.001 (0.096)	Loss 3.1280 (3.1142)	Entropy 1.50093 (1.50203)	Top-1 acc 49.609 (49.450)	Top-5 acc 70.703 (72.720)	lr 0.02177
Train [29][320/3239]	Time 0.231 (0.676)	Data Time 0.001 (0.093)	Loss 3.0667 (3.1127)	Entropy 1.50082 (1.50200)	Top-1 acc 48.047 (49.474)	Top-5 acc 74.219 (72.754)	lr 0.02177
Train [29][330/3239]	Time 0.222 (0.669)	Data Time 0.001 (0.090)	Loss 3.0594 (3.1111)	Entropy 1.50074 (1.50196)	Top-1 acc 52.734 (49.536)	Top-5 acc 70.312 (72.773)	lr 0.02177
Train [29][340/3239]	Time 2.422 (0.662)	Data Time 0.001 (0.087)	Loss 2.9140 (3.1106)	Entropy 1.50074 (1.50192)	Top-1 acc 54.297 (49.554)	Top-5 acc 77.344 (72.770)	lr 0.02177
Train [29][350/3239]	Time 0.255 (0.650)	Data Time 0.001 (0.085)	Loss 3.1067 (3.1096)	Entropy 1.50069 (1.50189)	Top-1 acc 50.000 (49.590)	Top-5 acc 71.875 (72.788)	lr 0.02177
Train [29][360/3239]	Time 0.210 (0.644)	Data Time 0.001 (0.083)	Loss 3.2439 (3.1086)	Entropy 1.50067 (1.50186)	Top-1 acc 45.703 (49.628)	Top-5 acc 71.484 (72.804)	lr 0.02176
Train [29][370/3239]	Time 0.191 (0.638)	Data Time 0.001 (0.080)	Loss 3.3122 (3.1080)	Entropy 1.50059 (1.50182)	Top-1 acc 44.531 (49.635)	Top-5 acc 66.016 (72.800)	lr 0.02176
Train [29][380/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.079)	Loss 3.1295 (3.1068)	Entropy 1.50057 (1.50179)	Top-1 acc 50.000 (49.670)	Top-5 acc 68.359 (72.813)	lr 0.02176
Train [29][390/3239]	Time 0.203 (0.628)	Data Time 0.001 (0.077)	Loss 3.1228 (3.1076)	Entropy 1.50044 (1.50176)	Top-1 acc 50.000 (49.638)	Top-5 acc 72.656 (72.787)	lr 0.02176
Train [29][400/3239]	Time 0.210 (0.624)	Data Time 0.001 (0.075)	Loss 2.9239 (3.1076)	Entropy 1.50041 (1.50172)	Top-1 acc 56.641 (49.650)	Top-5 acc 78.125 (72.788)	lr 0.02176
Train [29][410/3239]	Time 0.290 (0.620)	Data Time 0.001 (0.073)	Loss 3.1137 (3.1078)	Entropy 1.50036 (1.50169)	Top-1 acc 46.484 (49.631)	Top-5 acc 75.391 (72.790)	lr 0.02176
Train [29][420/3239]	Time 0.189 (0.615)	Data Time 0.002 (0.071)	Loss 3.1343 (3.1073)	Entropy 1.50028 (1.50166)	Top-1 acc 50.781 (49.653)	Top-5 acc 73.438 (72.801)	lr 0.02176
Train [29][430/3239]	Time 0.256 (0.611)	Data Time 0.001 (0.070)	Loss 2.9558 (3.1072)	Entropy 1.50024 (1.50163)	Top-1 acc 52.734 (49.657)	Top-5 acc 75.000 (72.793)	lr 0.02176
Train [29][440/3239]	Time 0.228 (0.608)	Data Time 0.001 (0.068)	Loss 2.9437 (3.1062)	Entropy 1.50012 (1.50159)	Top-1 acc 53.125 (49.688)	Top-5 acc 75.000 (72.807)	lr 0.02176
Train [29][450/3239]	Time 2.304 (0.604)	Data Time 0.001 (0.067)	Loss 2.9997 (3.1059)	Entropy 1.50012 (1.50156)	Top-1 acc 56.250 (49.718)	Top-5 acc 75.391 (72.814)	lr 0.02176
Train [29][460/3239]	Time 0.211 (0.595)	Data Time 0.001 (0.065)	Loss 3.2319 (3.1070)	Entropy 1.50011 (1.50153)	Top-1 acc 46.484 (49.699)	Top-5 acc 69.531 (72.798)	lr 0.02176
Train [29][470/3239]	Time 0.233 (0.592)	Data Time 0.001 (0.064)	Loss 3.0079 (3.1065)	Entropy 1.50010 (1.50150)	Top-1 acc 51.172 (49.687)	Top-5 acc 76.562 (72.810)	lr 0.02176
Train [29][480/3239]	Time 0.168 (0.589)	Data Time 0.001 (0.062)	Loss 3.0282 (3.1063)	Entropy 1.49997 (1.50147)	Top-1 acc 52.344 (49.691)	Top-5 acc 72.656 (72.824)	lr 0.02176
Train [29][490/3239]	Time 0.252 (0.586)	Data Time 0.001 (0.061)	Loss 3.0351 (3.1055)	Entropy 1.49989 (1.50144)	Top-1 acc 48.438 (49.717)	Top-5 acc 73.047 (72.843)	lr 0.02176
Train [29][500/3239]	Time 0.248 (0.583)	Data Time 0.001 (0.060)	Loss 3.1048 (3.1045)	Entropy 1.49981 (1.50141)	Top-1 acc 51.172 (49.747)	Top-5 acc 72.656 (72.871)	lr 0.02176
Train [29][510/3239]	Time 0.205 (0.581)	Data Time 0.001 (0.059)	Loss 3.1318 (3.1030)	Entropy 1.49978 (1.50137)	Top-1 acc 48.828 (49.794)	Top-5 acc 72.266 (72.910)	lr 0.02175
Train [29][520/3239]	Time 0.203 (0.578)	Data Time 0.001 (0.058)	Loss 2.9853 (3.1032)	Entropy 1.49957 (1.50134)	Top-1 acc 55.859 (49.795)	Top-5 acc 73.047 (72.905)	lr 0.02175
Train [29][530/3239]	Time 0.215 (0.575)	Data Time 0.001 (0.057)	Loss 3.1675 (3.1039)	Entropy 1.49949 (1.50131)	Top-1 acc 44.922 (49.776)	Top-5 acc 72.656 (72.898)	lr 0.02175
Train [29][540/3239]	Time 0.211 (0.573)	Data Time 0.001 (0.056)	Loss 3.2233 (3.1056)	Entropy 1.49946 (1.50127)	Top-1 acc 44.141 (49.716)	Top-5 acc 69.531 (72.849)	lr 0.02175
Train [29][550/3239]	Time 0.268 (0.571)	Data Time 0.001 (0.055)	Loss 3.2086 (3.1059)	Entropy 1.49946 (1.50124)	Top-1 acc 47.656 (49.730)	Top-5 acc 71.094 (72.849)	lr 0.02175
Train [29][560/3239]	Time 2.321 (0.568)	Data Time 0.001 (0.054)	Loss 3.2418 (3.1066)	Entropy 1.49946 (1.50121)	Top-1 acc 43.359 (49.718)	Top-5 acc 69.922 (72.839)	lr 0.02175
Train [29][570/3239]	Time 0.211 (0.562)	Data Time 0.001 (0.053)	Loss 3.1236 (3.1074)	Entropy 1.49948 (1.50118)	Top-1 acc 48.047 (49.702)	Top-5 acc 72.656 (72.822)	lr 0.02175
Train [29][580/3239]	Time 0.303 (0.634)	Data Time 0.003 (0.052)	Loss 3.2126 (3.1084)	Entropy 1.49951 (1.50115)	Top-1 acc 44.922 (49.678)	Top-5 acc 72.266 (72.799)	lr 0.02175
Train [29][590/3239]	Time 0.213 (0.631)	Data Time 0.002 (0.051)	Loss 3.0497 (3.1083)	Entropy 1.49937 (1.50112)	Top-1 acc 50.391 (49.679)	Top-5 acc 76.953 (72.808)	lr 0.02175
Train [29][600/3239]	Time 0.207 (0.628)	Data Time 0.001 (0.050)	Loss 3.2353 (3.1085)	Entropy 1.49930 (1.50109)	Top-1 acc 46.875 (49.659)	Top-5 acc 70.312 (72.818)	lr 0.02175
Train [29][610/3239]	Time 0.217 (0.625)	Data Time 0.001 (0.050)	Loss 3.3218 (3.1092)	Entropy 1.49913 (1.50106)	Top-1 acc 42.969 (49.644)	Top-5 acc 70.703 (72.808)	lr 0.02175
Train [29][620/3239]	Time 0.229 (0.622)	Data Time 0.001 (0.049)	Loss 3.3049 (3.1090)	Entropy 1.49910 (1.50103)	Top-1 acc 48.828 (49.660)	Top-5 acc 64.453 (72.800)	lr 0.02175
Train [29][630/3239]	Time 0.201 (0.619)	Data Time 0.002 (0.048)	Loss 3.1594 (3.1095)	Entropy 1.49900 (1.50100)	Top-1 acc 48.047 (49.646)	Top-5 acc 71.875 (72.797)	lr 0.02175
Train [29][640/3239]	Time 0.168 (0.616)	Data Time 0.001 (0.047)	Loss 3.1386 (3.1099)	Entropy 1.49899 (1.50097)	Top-1 acc 51.172 (49.647)	Top-5 acc 72.266 (72.791)	lr 0.02175
Train [29][650/3239]	Time 0.194 (0.614)	Data Time 0.024 (0.047)	Loss 3.1713 (3.1089)	Entropy 1.49899 (1.50094)	Top-1 acc 48.828 (49.649)	Top-5 acc 70.703 (72.814)	lr 0.02175
Train [29][660/3239]	Time 0.210 (0.611)	Data Time 0.001 (0.046)	Loss 3.0557 (3.1086)	Entropy 1.49897 (1.50091)	Top-1 acc 48.438 (49.669)	Top-5 acc 75.000 (72.824)	lr 0.02174
Train [29][670/3239]	Time 2.300 (0.609)	Data Time 0.001 (0.046)	Loss 3.1352 (3.1087)	Entropy 1.49897 (1.50088)	Top-1 acc 46.484 (49.667)	Top-5 acc 69.141 (72.811)	lr 0.02174
Train [29][680/3239]	Time 0.239 (0.603)	Data Time 0.002 (0.045)	Loss 3.0502 (3.1076)	Entropy 1.49905 (1.50085)	Top-1 acc 50.391 (49.683)	Top-5 acc 71.094 (72.839)	lr 0.02174
Train [29][690/3239]	Time 0.333 (0.601)	Data Time 0.001 (0.044)	Loss 3.0244 (3.1096)	Entropy 1.49889 (1.50082)	Top-1 acc 51.172 (49.653)	Top-5 acc 77.344 (72.813)	lr 0.02174
Train [29][700/3239]	Time 0.231 (0.599)	Data Time 0.001 (0.044)	Loss 3.3054 (3.1099)	Entropy 1.49888 (1.50079)	Top-1 acc 43.750 (49.634)	Top-5 acc 67.188 (72.801)	lr 0.02174
Train [29][710/3239]	Time 0.202 (0.597)	Data Time 0.002 (0.043)	Loss 3.1896 (3.1094)	Entropy 1.49880 (1.50077)	Top-1 acc 52.344 (49.654)	Top-5 acc 69.531 (72.800)	lr 0.02174
Train [29][720/3239]	Time 0.208 (0.595)	Data Time 0.001 (0.043)	Loss 3.0799 (3.1097)	Entropy 1.49874 (1.50074)	Top-1 acc 56.250 (49.654)	Top-5 acc 75.000 (72.800)	lr 0.02174
Train [29][730/3239]	Time 0.263 (0.593)	Data Time 0.001 (0.042)	Loss 3.1222 (3.1092)	Entropy 1.49872 (1.50071)	Top-1 acc 50.781 (49.659)	Top-5 acc 72.266 (72.803)	lr 0.02174
Train [29][740/3239]	Time 0.205 (0.590)	Data Time 0.001 (0.041)	Loss 3.2323 (3.1092)	Entropy 1.49857 (1.50068)	Top-1 acc 46.484 (49.667)	Top-5 acc 69.922 (72.800)	lr 0.02174
Train [29][750/3239]	Time 0.219 (0.588)	Data Time 0.001 (0.041)	Loss 3.1576 (3.1096)	Entropy 1.49849 (1.50066)	Top-1 acc 51.953 (49.666)	Top-5 acc 73.047 (72.805)	lr 0.02174
Train [29][760/3239]	Time 0.300 (0.587)	Data Time 0.001 (0.040)	Loss 3.1417 (3.1095)	Entropy 1.49844 (1.50063)	Top-1 acc 48.438 (49.668)	Top-5 acc 75.391 (72.815)	lr 0.02174
Train [29][770/3239]	Time 0.229 (0.585)	Data Time 0.001 (0.040)	Loss 2.9634 (3.1093)	Entropy 1.49817 (1.50060)	Top-1 acc 50.391 (49.661)	Top-5 acc 76.172 (72.816)	lr 0.02174
Train [29][780/3239]	Time 2.394 (0.583)	Data Time 0.001 (0.039)	Loss 3.5467 (3.1096)	Entropy 1.49817 (1.50057)	Top-1 acc 37.891 (49.624)	Top-5 acc 63.672 (72.811)	lr 0.02174
Train [29][790/3239]	Time 0.235 (0.579)	Data Time 0.001 (0.039)	Loss 3.0099 (3.1092)	Entropy 1.49813 (1.50054)	Top-1 acc 51.562 (49.632)	Top-5 acc 75.000 (72.827)	lr 0.02174
Train [29][800/3239]	Time 0.220 (0.577)	Data Time 0.001 (0.038)	Loss 2.9418 (3.1092)	Entropy 1.49813 (1.50051)	Top-1 acc 55.469 (49.636)	Top-5 acc 77.734 (72.834)	lr 0.02174
Train [29][810/3239]	Time 0.324 (0.575)	Data Time 0.001 (0.038)	Loss 3.2051 (3.1095)	Entropy 1.49813 (1.50048)	Top-1 acc 47.656 (49.637)	Top-5 acc 69.531 (72.833)	lr 0.02173
Train [29][820/3239]	Time 0.247 (0.574)	Data Time 0.002 (0.038)	Loss 3.2152 (3.1106)	Entropy 1.49806 (1.50045)	Top-1 acc 48.438 (49.614)	Top-5 acc 73.438 (72.815)	lr 0.02173
Train [29][830/3239]	Time 0.220 (0.573)	Data Time 0.001 (0.037)	Loss 3.1790 (3.1106)	Entropy 1.49807 (1.50042)	Top-1 acc 43.359 (49.607)	Top-5 acc 71.875 (72.809)	lr 0.02173
Train [29][840/3239]	Time 0.204 (0.571)	Data Time 0.001 (0.037)	Loss 3.1425 (3.1104)	Entropy 1.49805 (1.50039)	Top-1 acc 51.172 (49.615)	Top-5 acc 71.094 (72.816)	lr 0.02173
Train [29][850/3239]	Time 0.202 (0.570)	Data Time 0.001 (0.036)	Loss 3.1064 (3.1103)	Entropy 1.49806 (1.50036)	Top-1 acc 49.219 (49.628)	Top-5 acc 73.047 (72.816)	lr 0.02173
Train [29][860/3239]	Time 0.223 (0.568)	Data Time 0.002 (0.036)	Loss 3.2577 (3.1104)	Entropy 1.49800 (1.50034)	Top-1 acc 47.266 (49.621)	Top-5 acc 69.922 (72.826)	lr 0.02173
Train [29][870/3239]	Time 0.230 (0.567)	Data Time 0.001 (0.035)	Loss 3.2834 (3.1105)	Entropy 1.49788 (1.50031)	Top-1 acc 46.094 (49.626)	Top-5 acc 69.141 (72.818)	lr 0.02173
Train [29][880/3239]	Time 0.215 (0.566)	Data Time 0.001 (0.035)	Loss 3.0030 (3.1107)	Entropy 1.49783 (1.50028)	Top-1 acc 50.781 (49.610)	Top-5 acc 75.781 (72.808)	lr 0.02173
Train [29][890/3239]	Time 2.360 (0.565)	Data Time 0.001 (0.035)	Loss 3.0454 (3.1107)	Entropy 1.49783 (1.50025)	Top-1 acc 48.438 (49.605)	Top-5 acc 75.781 (72.809)	lr 0.02173
Train [29][900/3239]	Time 0.227 (0.561)	Data Time 0.002 (0.034)	Loss 3.1085 (3.1105)	Entropy 1.49766 (1.50023)	Top-1 acc 49.219 (49.617)	Top-5 acc 74.609 (72.822)	lr 0.02173
Train [29][910/3239]	Time 0.227 (0.559)	Data Time 0.001 (0.034)	Loss 2.9798 (3.1109)	Entropy 1.49758 (1.50020)	Top-1 acc 52.734 (49.619)	Top-5 acc 75.781 (72.814)	lr 0.02173
Train [29][920/3239]	Time 0.251 (0.558)	Data Time 0.001 (0.034)	Loss 3.1661 (3.1109)	Entropy 1.49753 (1.50017)	Top-1 acc 49.219 (49.614)	Top-5 acc 72.266 (72.819)	lr 0.02173
Train [29][930/3239]	Time 0.213 (0.557)	Data Time 0.001 (0.033)	Loss 3.0580 (3.1112)	Entropy 1.49760 (1.50014)	Top-1 acc 50.391 (49.609)	Top-5 acc 72.266 (72.814)	lr 0.02173
Train [29][940/3239]	Time 0.209 (0.602)	Data Time 0.002 (0.033)	Loss 2.9536 (3.1111)	Entropy 1.49752 (1.50011)	Top-1 acc 55.859 (49.609)	Top-5 acc 78.125 (72.805)	lr 0.02173
Train [29][950/3239]	Time 0.207 (0.601)	Data Time 0.002 (0.033)	Loss 3.2355 (3.1118)	Entropy 1.49740 (1.50008)	Top-1 acc 46.484 (49.590)	Top-5 acc 69.141 (72.789)	lr 0.02172
Train [29][960/3239]	Time 0.211 (0.599)	Data Time 0.001 (0.032)	Loss 3.3049 (3.1117)	Entropy 1.49731 (1.50006)	Top-1 acc 46.484 (49.588)	Top-5 acc 69.141 (72.792)	lr 0.02172
Train [29][970/3239]	Time 0.240 (0.598)	Data Time 0.001 (0.032)	Loss 3.2909 (3.1120)	Entropy 1.49728 (1.50003)	Top-1 acc 45.703 (49.579)	Top-5 acc 68.750 (72.781)	lr 0.02172
Train [29][980/3239]	Time 0.266 (0.596)	Data Time 0.001 (0.032)	Loss 2.9661 (3.1122)	Entropy 1.49724 (1.50000)	Top-1 acc 51.172 (49.568)	Top-5 acc 76.172 (72.780)	lr 0.02172
Train [29][990/3239]	Time 0.234 (0.595)	Data Time 0.001 (0.031)	Loss 3.1992 (3.1121)	Entropy 1.49724 (1.49997)	Top-1 acc 48.438 (49.569)	Top-5 acc 70.703 (72.778)	lr 0.02172
Train [29][1000/3239]	Time 2.450 (0.593)	Data Time 0.001 (0.031)	Loss 3.1058 (3.1122)	Entropy 1.49724 (1.49994)	Top-1 acc 50.391 (49.561)	Top-5 acc 70.312 (72.774)	lr 0.02172
Train [29][1010/3239]	Time 0.227 (0.590)	Data Time 0.001 (0.031)	Loss 3.1495 (3.1126)	Entropy 1.49715 (1.49992)	Top-1 acc 50.000 (49.559)	Top-5 acc 73.047 (72.771)	lr 0.02172
Train [29][1020/3239]	Time 0.234 (0.588)	Data Time 0.001 (0.031)	Loss 3.1475 (3.1130)	Entropy 1.49711 (1.49989)	Top-1 acc 51.562 (49.557)	Top-5 acc 70.703 (72.760)	lr 0.02172
Train [29][1030/3239]	Time 0.239 (0.587)	Data Time 0.001 (0.030)	Loss 3.1335 (3.1132)	Entropy 1.49707 (1.49986)	Top-1 acc 48.438 (49.549)	Top-5 acc 72.266 (72.747)	lr 0.02172
Train [29][1040/3239]	Time 0.305 (0.586)	Data Time 0.001 (0.030)	Loss 3.0948 (3.1133)	Entropy 1.49702 (1.49983)	Top-1 acc 48.047 (49.538)	Top-5 acc 68.750 (72.739)	lr 0.02172
Train [29][1050/3239]	Time 0.198 (0.584)	Data Time 0.001 (0.030)	Loss 3.2290 (3.1140)	Entropy 1.49732 (1.49981)	Top-1 acc 43.750 (49.523)	Top-5 acc 68.750 (72.724)	lr 0.02172
Train [29][1060/3239]	Time 0.179 (0.583)	Data Time 0.001 (0.029)	Loss 3.1834 (3.1147)	Entropy 1.49723 (1.49979)	Top-1 acc 50.000 (49.510)	Top-5 acc 71.094 (72.712)	lr 0.02172
Train [29][1070/3239]	Time 0.213 (0.582)	Data Time 0.001 (0.029)	Loss 3.2349 (3.1148)	Entropy 1.49711 (1.49976)	Top-1 acc 48.438 (49.514)	Top-5 acc 70.312 (72.718)	lr 0.02172
Train [29][1080/3239]	Time 0.210 (0.580)	Data Time 0.001 (0.029)	Loss 2.9430 (3.1142)	Entropy 1.49706 (1.49974)	Top-1 acc 53.516 (49.541)	Top-5 acc 77.344 (72.723)	lr 0.02172
Train [29][1090/3239]	Time 0.212 (0.579)	Data Time 0.002 (0.029)	Loss 3.2158 (3.1145)	Entropy 1.49703 (1.49971)	Top-1 acc 45.703 (49.531)	Top-5 acc 69.141 (72.719)	lr 0.02172
Train [29][1100/3239]	Time 0.330 (0.578)	Data Time 0.001 (0.029)	Loss 3.0948 (3.1141)	Entropy 1.49688 (1.49969)	Top-1 acc 50.781 (49.534)	Top-5 acc 70.703 (72.721)	lr 0.02171
Train [29][1110/3239]	Time 2.351 (0.577)	Data Time 0.001 (0.028)	Loss 3.1516 (3.1143)	Entropy 1.49688 (1.49966)	Top-1 acc 46.484 (49.534)	Top-5 acc 71.094 (72.717)	lr 0.02171
Train [29][1120/3239]	Time 0.222 (0.574)	Data Time 0.002 (0.028)	Loss 3.0129 (3.1146)	Entropy 1.49676 (1.49964)	Top-1 acc 50.781 (49.518)	Top-5 acc 74.609 (72.711)	lr 0.02171
Train [29][1130/3239]	Time 0.189 (0.572)	Data Time 0.001 (0.028)	Loss 3.0467 (3.1143)	Entropy 1.49673 (1.49961)	Top-1 acc 53.516 (49.532)	Top-5 acc 70.703 (72.720)	lr 0.02171
Train [29][1140/3239]	Time 0.211 (0.571)	Data Time 0.001 (0.028)	Loss 3.2229 (3.1142)	Entropy 1.49668 (1.49959)	Top-1 acc 46.875 (49.531)	Top-5 acc 69.922 (72.726)	lr 0.02171
Train [29][1150/3239]	Time 0.228 (0.570)	Data Time 0.001 (0.027)	Loss 3.1512 (3.1141)	Entropy 1.49666 (1.49956)	Top-1 acc 44.531 (49.530)	Top-5 acc 73.438 (72.726)	lr 0.02171
Train [29][1160/3239]	Time 0.225 (0.569)	Data Time 0.001 (0.027)	Loss 3.0707 (3.1142)	Entropy 1.49661 (1.49953)	Top-1 acc 48.047 (49.534)	Top-5 acc 74.219 (72.726)	lr 0.02171
Train [29][1170/3239]	Time 0.203 (0.568)	Data Time 0.001 (0.027)	Loss 3.3504 (3.1148)	Entropy 1.49644 (1.49951)	Top-1 acc 44.922 (49.524)	Top-5 acc 67.969 (72.715)	lr 0.02171
Train [29][1180/3239]	Time 0.220 (0.567)	Data Time 0.001 (0.027)	Loss 3.1691 (3.1151)	Entropy 1.49637 (1.49948)	Top-1 acc 44.531 (49.519)	Top-5 acc 71.875 (72.713)	lr 0.02171
Train [29][1190/3239]	Time 0.215 (0.566)	Data Time 0.001 (0.026)	Loss 3.2326 (3.1151)	Entropy 1.49623 (1.49946)	Top-1 acc 47.266 (49.520)	Top-5 acc 67.188 (72.717)	lr 0.02171
Train [29][1200/3239]	Time 0.233 (0.565)	Data Time 0.001 (0.026)	Loss 3.1076 (3.1151)	Entropy 1.49606 (1.49943)	Top-1 acc 47.266 (49.513)	Top-5 acc 75.391 (72.723)	lr 0.02171
Train [29][1210/3239]	Time 0.344 (0.564)	Data Time 0.001 (0.026)	Loss 3.1906 (3.1159)	Entropy 1.49603 (1.49940)	Top-1 acc 47.266 (49.487)	Top-5 acc 67.969 (72.704)	lr 0.02171
Train [29][1220/3239]	Time 2.350 (0.563)	Data Time 0.001 (0.026)	Loss 3.4096 (3.1164)	Entropy 1.49603 (1.49937)	Top-1 acc 43.750 (49.480)	Top-5 acc 67.578 (72.694)	lr 0.02171
Train [29][1230/3239]	Time 0.214 (0.560)	Data Time 0.001 (0.026)	Loss 3.2212 (3.1168)	Entropy 1.49586 (1.49935)	Top-1 acc 48.438 (49.476)	Top-5 acc 70.703 (72.682)	lr 0.02171
Train [29][1240/3239]	Time 0.228 (0.560)	Data Time 0.001 (0.025)	Loss 3.1492 (3.1168)	Entropy 1.49583 (1.49932)	Top-1 acc 48.438 (49.475)	Top-5 acc 69.922 (72.676)	lr 0.02171
Train [29][1250/3239]	Time 0.230 (0.559)	Data Time 0.001 (0.025)	Loss 3.3172 (3.1169)	Entropy 1.49576 (1.49929)	Top-1 acc 44.141 (49.467)	Top-5 acc 68.359 (72.672)	lr 0.02170
Train [29][1260/3239]	Time 0.231 (0.558)	Data Time 0.001 (0.025)	Loss 3.1391 (3.1171)	Entropy 1.49561 (1.49926)	Top-1 acc 50.000 (49.464)	Top-5 acc 71.484 (72.676)	lr 0.02170
Train [29][1270/3239]	Time 0.210 (0.557)	Data Time 0.001 (0.025)	Loss 3.1565 (3.1173)	Entropy 1.49554 (1.49923)	Top-1 acc 49.219 (49.455)	Top-5 acc 71.484 (72.678)	lr 0.02170
Train [29][1280/3239]	Time 0.211 (0.556)	Data Time 0.001 (0.025)	Loss 3.0337 (3.1172)	Entropy 1.49553 (1.49920)	Top-1 acc 49.609 (49.456)	Top-5 acc 76.953 (72.680)	lr 0.02170
Train [29][1290/3239]	Time 0.218 (0.555)	Data Time 0.001 (0.025)	Loss 3.0705 (3.1171)	Entropy 1.49558 (1.49917)	Top-1 acc 50.391 (49.468)	Top-5 acc 73.438 (72.682)	lr 0.02170
Train [29][1300/3239]	Time 0.285 (0.587)	Data Time 0.002 (0.024)	Loss 3.1223 (3.1170)	Entropy 1.49551 (1.49915)	Top-1 acc 52.734 (49.478)	Top-5 acc 73.438 (72.682)	lr 0.02170
Train [29][1310/3239]	Time 0.220 (0.586)	Data Time 0.002 (0.024)	Loss 3.1393 (3.1175)	Entropy 1.49544 (1.49912)	Top-1 acc 49.609 (49.467)	Top-5 acc 73.438 (72.677)	lr 0.02170
Train [29][1320/3239]	Time 0.290 (0.585)	Data Time 0.001 (0.024)	Loss 3.2285 (3.1179)	Entropy 1.49547 (1.49909)	Top-1 acc 45.312 (49.457)	Top-5 acc 70.703 (72.670)	lr 0.02170
Train [29][1330/3239]	Time 2.493 (0.584)	Data Time 0.002 (0.024)	Loss 3.0604 (3.1179)	Entropy 1.49547 (1.49906)	Top-1 acc 45.703 (49.461)	Top-5 acc 74.219 (72.672)	lr 0.02170
Train [29][1340/3239]	Time 0.256 (0.582)	Data Time 0.001 (0.024)	Loss 3.1632 (3.1182)	Entropy 1.49544 (1.49904)	Top-1 acc 47.266 (49.458)	Top-5 acc 69.531 (72.663)	lr 0.02170
Train [29][1350/3239]	Time 0.222 (0.581)	Data Time 0.001 (0.024)	Loss 3.0938 (3.1182)	Entropy 1.49539 (1.49901)	Top-1 acc 49.219 (49.455)	Top-5 acc 71.875 (72.660)	lr 0.02170
Train [29][1360/3239]	Time 0.226 (0.580)	Data Time 0.002 (0.023)	Loss 3.1083 (3.1182)	Entropy 1.49525 (1.49898)	Top-1 acc 49.609 (49.454)	Top-5 acc 74.219 (72.659)	lr 0.02170
Train [29][1370/3239]	Time 0.248 (0.579)	Data Time 0.001 (0.023)	Loss 3.2269 (3.1177)	Entropy 1.49519 (1.49895)	Top-1 acc 51.562 (49.461)	Top-5 acc 69.141 (72.670)	lr 0.02170
Train [29][1380/3239]	Time 0.286 (0.578)	Data Time 0.001 (0.023)	Loss 3.2009 (3.1177)	Entropy 1.49515 (1.49893)	Top-1 acc 49.609 (49.462)	Top-5 acc 68.750 (72.666)	lr 0.02170
Train [29][1390/3239]	Time 0.231 (0.577)	Data Time 0.001 (0.023)	Loss 3.3000 (3.1176)	Entropy 1.49512 (1.49890)	Top-1 acc 46.875 (49.468)	Top-5 acc 70.703 (72.671)	lr 0.02169
Train [29][1400/3239]	Time 0.221 (0.576)	Data Time 0.001 (0.023)	Loss 3.1786 (3.1174)	Entropy 1.49500 (1.49887)	Top-1 acc 45.312 (49.479)	Top-5 acc 71.484 (72.675)	lr 0.02169
Train [29][1410/3239]	Time 0.210 (0.575)	Data Time 0.001 (0.023)	Loss 2.9715 (3.1176)	Entropy 1.49490 (1.49884)	Top-1 acc 53.125 (49.477)	Top-5 acc 74.219 (72.664)	lr 0.02169
Train [29][1420/3239]	Time 0.202 (0.574)	Data Time 0.001 (0.023)	Loss 2.9656 (3.1179)	Entropy 1.49486 (1.49882)	Top-1 acc 48.438 (49.475)	Top-5 acc 75.781 (72.655)	lr 0.02169
Train [29][1430/3239]	Time 0.256 (0.573)	Data Time 0.001 (0.022)	Loss 3.1073 (3.1177)	Entropy 1.49487 (1.49879)	Top-1 acc 46.875 (49.477)	Top-5 acc 71.484 (72.649)	lr 0.02169
Train [29][1440/3239]	Time 2.491 (0.572)	Data Time 0.001 (0.022)	Loss 3.0043 (3.1174)	Entropy 1.49487 (1.49876)	Top-1 acc 52.344 (49.494)	Top-5 acc 76.953 (72.657)	lr 0.02169
Train [29][1450/3239]	Time 0.232 (0.570)	Data Time 0.001 (0.022)	Loss 3.2027 (3.1175)	Entropy 1.49480 (1.49873)	Top-1 acc 47.266 (49.490)	Top-5 acc 71.875 (72.655)	lr 0.02169
Train [29][1460/3239]	Time 0.209 (0.569)	Data Time 0.001 (0.022)	Loss 2.9456 (3.1176)	Entropy 1.49472 (1.49871)	Top-1 acc 52.344 (49.490)	Top-5 acc 75.391 (72.648)	lr 0.02169
Train [29][1470/3239]	Time 0.227 (0.568)	Data Time 0.001 (0.022)	Loss 3.1032 (3.1175)	Entropy 1.49468 (1.49868)	Top-1 acc 50.781 (49.493)	Top-5 acc 72.266 (72.648)	lr 0.02169
Train [29][1480/3239]	Time 0.209 (0.567)	Data Time 0.001 (0.022)	Loss 3.0298 (3.1179)	Entropy 1.49459 (1.49865)	Top-1 acc 51.562 (49.485)	Top-5 acc 73.047 (72.643)	lr 0.02169
Train [29][1490/3239]	Time 0.222 (0.567)	Data Time 0.002 (0.022)	Loss 2.9098 (3.1176)	Entropy 1.49453 (1.49862)	Top-1 acc 57.031 (49.496)	Top-5 acc 76.562 (72.650)	lr 0.02169
Train [29][1500/3239]	Time 0.218 (0.566)	Data Time 0.002 (0.021)	Loss 3.3181 (3.1175)	Entropy 1.49448 (1.49860)	Top-1 acc 45.703 (49.501)	Top-5 acc 69.922 (72.653)	lr 0.02169
Train [29][1510/3239]	Time 0.228 (0.565)	Data Time 0.001 (0.021)	Loss 2.9755 (3.1172)	Entropy 1.49447 (1.49857)	Top-1 acc 49.219 (49.504)	Top-5 acc 75.000 (72.657)	lr 0.02169
Train [29][1520/3239]	Time 0.220 (0.564)	Data Time 0.001 (0.021)	Loss 2.9547 (3.1169)	Entropy 1.49446 (1.49854)	Top-1 acc 57.031 (49.514)	Top-5 acc 75.781 (72.664)	lr 0.02169
Train [29][1530/3239]	Time 0.155 (0.563)	Data Time 0.001 (0.021)	Loss 3.1392 (3.1165)	Entropy 1.49439 (1.49852)	Top-1 acc 48.438 (49.521)	Top-5 acc 72.656 (72.672)	lr 0.02169
Train [29][1540/3239]	Time 0.214 (0.563)	Data Time 0.001 (0.021)	Loss 3.1081 (3.1166)	Entropy 1.49436 (1.49849)	Top-1 acc 51.562 (49.512)	Top-5 acc 74.219 (72.673)	lr 0.02168
Train [29][1550/3239]	Time 2.480 (0.562)	Data Time 0.001 (0.021)	Loss 3.0277 (3.1163)	Entropy 1.49436 (1.49846)	Top-1 acc 49.219 (49.516)	Top-5 acc 74.219 (72.679)	lr 0.02168
Train [29][1560/3239]	Time 0.257 (0.560)	Data Time 0.001 (0.021)	Loss 3.0205 (3.1160)	Entropy 1.49417 (1.49844)	Top-1 acc 50.391 (49.517)	Top-5 acc 73.828 (72.684)	lr 0.02168
Train [29][1570/3239]	Time 0.216 (0.559)	Data Time 0.001 (0.021)	Loss 3.0636 (3.1163)	Entropy 1.49417 (1.49841)	Top-1 acc 48.828 (49.513)	Top-5 acc 76.172 (72.683)	lr 0.02168
Train [29][1580/3239]	Time 0.225 (0.558)	Data Time 0.001 (0.020)	Loss 3.0041 (3.1166)	Entropy 1.49414 (1.49838)	Top-1 acc 49.609 (49.507)	Top-5 acc 75.391 (72.677)	lr 0.02168
Train [29][1590/3239]	Time 0.251 (0.558)	Data Time 0.001 (0.020)	Loss 3.2540 (3.1167)	Entropy 1.49409 (1.49835)	Top-1 acc 48.047 (49.506)	Top-5 acc 72.266 (72.670)	lr 0.02168
Train [29][1600/3239]	Time 0.216 (0.557)	Data Time 0.001 (0.020)	Loss 3.0051 (3.1166)	Entropy 1.49402 (1.49833)	Top-1 acc 50.781 (49.513)	Top-5 acc 76.172 (72.671)	lr 0.02168
Train [29][1610/3239]	Time 0.361 (0.556)	Data Time 0.001 (0.020)	Loss 3.0772 (3.1170)	Entropy 1.49350 (1.49830)	Top-1 acc 53.516 (49.513)	Top-5 acc 72.656 (72.659)	lr 0.02168
Train [29][1620/3239]	Time 0.206 (0.555)	Data Time 0.001 (0.020)	Loss 3.0618 (3.1169)	Entropy 1.49347 (1.49827)	Top-1 acc 54.297 (49.521)	Top-5 acc 75.000 (72.662)	lr 0.02168
Train [29][1630/3239]	Time 0.191 (0.555)	Data Time 0.001 (0.020)	Loss 2.9848 (3.1166)	Entropy 1.49333 (1.49824)	Top-1 acc 56.641 (49.534)	Top-5 acc 74.219 (72.667)	lr 0.02168
Train [29][1640/3239]	Time 0.207 (0.554)	Data Time 0.002 (0.020)	Loss 3.1096 (3.1168)	Entropy 1.49323 (1.49821)	Top-1 acc 48.438 (49.531)	Top-5 acc 73.047 (72.661)	lr 0.02168
Train [29][1650/3239]	Time 0.271 (0.553)	Data Time 0.001 (0.020)	Loss 2.9758 (3.1165)	Entropy 1.49321 (1.49818)	Top-1 acc 52.734 (49.531)	Top-5 acc 74.609 (72.660)	lr 0.02168
Train [29][1660/3239]	Time 46.248 (0.579)	Data Time 0.001 (0.020)	Loss 3.1403 (3.1162)	Entropy 1.49321 (1.49815)	Top-1 acc 48.047 (49.540)	Top-5 acc 74.219 (72.666)	lr 0.02168
Train [29][1670/3239]	Time 0.212 (0.577)	Data Time 0.002 (0.019)	Loss 2.9363 (3.1163)	Entropy 1.49315 (1.49812)	Top-1 acc 52.344 (49.541)	Top-5 acc 75.000 (72.664)	lr 0.02168
Train [29][1680/3239]	Time 0.219 (0.576)	Data Time 0.002 (0.019)	Loss 2.9626 (3.1165)	Entropy 1.49306 (1.49809)	Top-1 acc 55.078 (49.532)	Top-5 acc 75.781 (72.659)	lr 0.02167
Train [29][1690/3239]	Time 0.224 (0.575)	Data Time 0.001 (0.019)	Loss 3.1886 (3.1164)	Entropy 1.49279 (1.49806)	Top-1 acc 48.438 (49.537)	Top-5 acc 69.531 (72.660)	lr 0.02167
Train [29][1700/3239]	Time 0.238 (0.575)	Data Time 0.001 (0.019)	Loss 3.0105 (3.1166)	Entropy 1.49277 (1.49803)	Top-1 acc 50.391 (49.533)	Top-5 acc 73.828 (72.659)	lr 0.02167
Train [29][1710/3239]	Time 0.205 (0.574)	Data Time 0.001 (0.019)	Loss 3.1399 (3.1163)	Entropy 1.49277 (1.49800)	Top-1 acc 49.219 (49.537)	Top-5 acc 71.094 (72.667)	lr 0.02167
Train [29][1720/3239]	Time 0.223 (0.573)	Data Time 0.001 (0.019)	Loss 3.0939 (3.1167)	Entropy 1.49272 (1.49797)	Top-1 acc 48.828 (49.523)	Top-5 acc 72.656 (72.661)	lr 0.02167
Train [29][1730/3239]	Time 0.330 (0.573)	Data Time 0.001 (0.019)	Loss 2.9866 (3.1166)	Entropy 1.49267 (1.49794)	Top-1 acc 50.781 (49.522)	Top-5 acc 75.781 (72.658)	lr 0.02167
Train [29][1740/3239]	Time 0.211 (0.572)	Data Time 0.001 (0.019)	Loss 3.3084 (3.1168)	Entropy 1.49252 (1.49791)	Top-1 acc 45.703 (49.517)	Top-5 acc 68.750 (72.651)	lr 0.02167
Train [29][1750/3239]	Time 0.252 (0.571)	Data Time 0.001 (0.019)	Loss 3.1083 (3.1169)	Entropy 1.49251 (1.49787)	Top-1 acc 48.828 (49.510)	Top-5 acc 73.047 (72.649)	lr 0.02167
Train [29][1760/3239]	Time 0.209 (0.570)	Data Time 0.001 (0.019)	Loss 3.1741 (3.1169)	Entropy 1.49247 (1.49784)	Top-1 acc 54.297 (49.519)	Top-5 acc 71.875 (72.650)	lr 0.02167
Train [29][1770/3239]	Time 2.268 (0.570)	Data Time 0.001 (0.018)	Loss 3.0069 (3.1165)	Entropy 1.49247 (1.49781)	Top-1 acc 50.000 (49.528)	Top-5 acc 75.781 (72.656)	lr 0.02167
Train [29][1780/3239]	Time 0.340 (0.568)	Data Time 0.001 (0.018)	Loss 3.0909 (3.1163)	Entropy 1.49243 (1.49778)	Top-1 acc 46.875 (49.529)	Top-5 acc 74.609 (72.661)	lr 0.02167
Train [29][1790/3239]	Time 0.212 (0.567)	Data Time 0.001 (0.018)	Loss 3.2138 (3.1161)	Entropy 1.49245 (1.49775)	Top-1 acc 49.609 (49.531)	Top-5 acc 69.141 (72.661)	lr 0.02167
Train [29][1800/3239]	Time 0.241 (0.566)	Data Time 0.001 (0.018)	Loss 2.9687 (3.1159)	Entropy 1.49250 (1.49772)	Top-1 acc 49.219 (49.530)	Top-5 acc 76.172 (72.671)	lr 0.02167
Train [29][1810/3239]	Time 0.206 (0.566)	Data Time 0.001 (0.018)	Loss 3.1958 (3.1162)	Entropy 1.49242 (1.49770)	Top-1 acc 45.312 (49.523)	Top-5 acc 70.312 (72.663)	lr 0.02167
Train [29][1820/3239]	Time 0.207 (0.565)	Data Time 0.001 (0.018)	Loss 3.2157 (3.1160)	Entropy 1.49233 (1.49767)	Top-1 acc 49.219 (49.529)	Top-5 acc 71.094 (72.666)	lr 0.02167
Train [29][1830/3239]	Time 0.226 (0.564)	Data Time 0.002 (0.018)	Loss 3.1663 (3.1160)	Entropy 1.49222 (1.49764)	Top-1 acc 48.828 (49.529)	Top-5 acc 72.266 (72.667)	lr 0.02166
Train [29][1840/3239]	Time 0.306 (0.564)	Data Time 0.001 (0.018)	Loss 2.9801 (3.1160)	Entropy 1.49218 (1.49761)	Top-1 acc 52.734 (49.536)	Top-5 acc 76.953 (72.667)	lr 0.02166
Train [29][1850/3239]	Time 0.222 (0.563)	Data Time 0.001 (0.018)	Loss 3.2682 (3.1158)	Entropy 1.49210 (1.49758)	Top-1 acc 51.562 (49.540)	Top-5 acc 71.875 (72.671)	lr 0.02166
Train [29][1860/3239]	Time 0.246 (0.563)	Data Time 0.001 (0.018)	Loss 2.9447 (3.1156)	Entropy 1.49208 (1.49755)	Top-1 acc 52.734 (49.545)	Top-5 acc 77.344 (72.675)	lr 0.02166
Train [29][1870/3239]	Time 0.225 (0.562)	Data Time 0.001 (0.018)	Loss 3.1681 (3.1156)	Entropy 1.49196 (1.49752)	Top-1 acc 50.391 (49.544)	Top-5 acc 74.219 (72.680)	lr 0.02166
Train [29][1880/3239]	Time 2.422 (0.561)	Data Time 0.001 (0.017)	Loss 3.2153 (3.1156)	Entropy 1.49196 (1.49749)	Top-1 acc 43.359 (49.550)	Top-5 acc 72.656 (72.678)	lr 0.02166
Train [29][1890/3239]	Time 0.222 (0.560)	Data Time 0.001 (0.017)	Loss 3.1732 (3.1157)	Entropy 1.49180 (1.49746)	Top-1 acc 48.047 (49.554)	Top-5 acc 72.266 (72.676)	lr 0.02166
Train [29][1900/3239]	Time 0.223 (0.559)	Data Time 0.001 (0.017)	Loss 3.0750 (3.1154)	Entropy 1.49153 (1.49743)	Top-1 acc 50.000 (49.558)	Top-5 acc 73.828 (72.677)	lr 0.02166
Train [29][1910/3239]	Time 0.247 (0.558)	Data Time 0.001 (0.017)	Loss 2.9370 (3.1156)	Entropy 1.49150 (1.49740)	Top-1 acc 50.391 (49.559)	Top-5 acc 77.344 (72.675)	lr 0.02166
Train [29][1920/3239]	Time 0.225 (0.558)	Data Time 0.001 (0.017)	Loss 3.0992 (3.1156)	Entropy 1.49146 (1.49737)	Top-1 acc 49.609 (49.561)	Top-5 acc 71.484 (72.676)	lr 0.02166
Train [29][1930/3239]	Time 0.241 (0.557)	Data Time 0.001 (0.017)	Loss 3.2342 (3.1159)	Entropy 1.49139 (1.49734)	Top-1 acc 45.703 (49.551)	Top-5 acc 70.703 (72.674)	lr 0.02166
Train [29][1940/3239]	Time 0.214 (0.557)	Data Time 0.001 (0.017)	Loss 3.2281 (3.1159)	Entropy 1.49126 (1.49730)	Top-1 acc 49.219 (49.551)	Top-5 acc 69.922 (72.676)	lr 0.02166
Train [29][1950/3239]	Time 0.333 (0.556)	Data Time 0.001 (0.017)	Loss 2.9196 (3.1161)	Entropy 1.49123 (1.49727)	Top-1 acc 53.125 (49.546)	Top-5 acc 75.391 (72.668)	lr 0.02166
Train [29][1960/3239]	Time 0.216 (0.556)	Data Time 0.001 (0.017)	Loss 3.1942 (3.1163)	Entropy 1.49109 (1.49724)	Top-1 acc 47.266 (49.541)	Top-5 acc 72.266 (72.666)	lr 0.02166
Train [29][1970/3239]	Time 0.217 (0.555)	Data Time 0.001 (0.017)	Loss 2.9322 (3.1159)	Entropy 1.49093 (1.49721)	Top-1 acc 55.078 (49.550)	Top-5 acc 76.562 (72.666)	lr 0.02165
Train [29][1980/3239]	Time 0.293 (0.555)	Data Time 0.001 (0.017)	Loss 2.9824 (3.1160)	Entropy 1.49089 (1.49718)	Top-1 acc 52.344 (49.545)	Top-5 acc 75.000 (72.665)	lr 0.02165
Train [29][1990/3239]	Time 2.279 (0.554)	Data Time 0.001 (0.017)	Loss 3.1399 (3.1162)	Entropy 1.49089 (1.49715)	Top-1 acc 51.562 (49.539)	Top-5 acc 71.875 (72.664)	lr 0.02165
Train [29][2000/3239]	Time 0.210 (0.552)	Data Time 0.001 (0.017)	Loss 3.0004 (3.1162)	Entropy 1.49084 (1.49712)	Top-1 acc 53.516 (49.543)	Top-5 acc 76.172 (72.666)	lr 0.02165
Train [29][2010/3239]	Time 0.221 (0.552)	Data Time 0.001 (0.016)	Loss 3.0598 (3.1165)	Entropy 1.49078 (1.49708)	Top-1 acc 51.953 (49.538)	Top-5 acc 74.219 (72.659)	lr 0.02165
Train [29][2020/3239]	Time 0.257 (0.551)	Data Time 0.001 (0.016)	Loss 3.0840 (3.1163)	Entropy 1.49078 (1.49705)	Top-1 acc 52.734 (49.547)	Top-5 acc 72.266 (72.669)	lr 0.02165
Train [29][2030/3239]	Time 0.284 (0.570)	Data Time 0.003 (0.016)	Loss 3.3560 (3.1166)	Entropy 1.49063 (1.49702)	Top-1 acc 42.969 (49.534)	Top-5 acc 67.578 (72.666)	lr 0.02165
Train [29][2040/3239]	Time 0.229 (0.570)	Data Time 0.002 (0.016)	Loss 3.2181 (3.1166)	Entropy 1.49061 (1.49699)	Top-1 acc 49.219 (49.535)	Top-5 acc 73.047 (72.659)	lr 0.02165
Train [29][2050/3239]	Time 0.170 (0.570)	Data Time 0.001 (0.016)	Loss 3.2186 (3.1169)	Entropy 1.49051 (1.49696)	Top-1 acc 46.875 (49.529)	Top-5 acc 69.922 (72.654)	lr 0.02165
Train [29][2060/3239]	Time 0.223 (0.569)	Data Time 0.001 (0.016)	Loss 3.1990 (3.1169)	Entropy 1.49030 (1.49693)	Top-1 acc 50.391 (49.529)	Top-5 acc 71.484 (72.655)	lr 0.02165
Train [29][2070/3239]	Time 0.170 (0.568)	Data Time 0.001 (0.016)	Loss 3.2151 (3.1172)	Entropy 1.49019 (1.49690)	Top-1 acc 49.219 (49.521)	Top-5 acc 69.922 (72.647)	lr 0.02165
Train [29][2080/3239]	Time 0.228 (0.568)	Data Time 0.001 (0.016)	Loss 3.0521 (3.1170)	Entropy 1.49003 (1.49686)	Top-1 acc 50.781 (49.521)	Top-5 acc 76.562 (72.647)	lr 0.02165
Train [29][2090/3239]	Time 0.234 (0.567)	Data Time 0.001 (0.016)	Loss 3.2404 (3.1166)	Entropy 1.49003 (1.49683)	Top-1 acc 48.047 (49.527)	Top-5 acc 71.094 (72.652)	lr 0.02165
Train [29][2100/3239]	Time 2.426 (0.567)	Data Time 0.002 (0.016)	Loss 3.0407 (3.1166)	Entropy 1.49003 (1.49680)	Top-1 acc 51.562 (49.526)	Top-5 acc 74.219 (72.652)	lr 0.02165
Train [29][2110/3239]	Time 0.226 (0.565)	Data Time 0.001 (0.016)	Loss 3.1960 (3.1168)	Entropy 1.49001 (1.49677)	Top-1 acc 47.266 (49.522)	Top-5 acc 70.312 (72.649)	lr 0.02165
Train [29][2120/3239]	Time 0.225 (0.564)	Data Time 0.001 (0.016)	Loss 2.9932 (3.1171)	Entropy 1.49000 (1.49673)	Top-1 acc 53.516 (49.521)	Top-5 acc 75.781 (72.640)	lr 0.02164
Train [29][2130/3239]	Time 0.280 (0.564)	Data Time 0.003 (0.016)	Loss 3.1829 (3.1172)	Entropy 1.48998 (1.49670)	Top-1 acc 47.656 (49.517)	Top-5 acc 72.266 (72.638)	lr 0.02164
Train [29][2140/3239]	Time 0.218 (0.563)	Data Time 0.001 (0.016)	Loss 3.1717 (3.1171)	Entropy 1.48992 (1.49667)	Top-1 acc 51.172 (49.515)	Top-5 acc 67.188 (72.639)	lr 0.02164
Train [29][2150/3239]	Time 0.211 (0.563)	Data Time 0.001 (0.016)	Loss 3.1151 (3.1171)	Entropy 1.48991 (1.49664)	Top-1 acc 51.562 (49.517)	Top-5 acc 74.219 (72.641)	lr 0.02164
Train [29][2160/3239]	Time 0.208 (0.562)	Data Time 0.001 (0.015)	Loss 3.2249 (3.1174)	Entropy 1.48981 (1.49661)	Top-1 acc 45.312 (49.509)	Top-5 acc 73.047 (72.636)	lr 0.02164
Train [29][2170/3239]	Time 0.207 (0.562)	Data Time 0.001 (0.015)	Loss 3.2628 (3.1180)	Entropy 1.48974 (1.49658)	Top-1 acc 47.266 (49.489)	Top-5 acc 69.531 (72.624)	lr 0.02164
Train [29][2180/3239]	Time 0.202 (0.561)	Data Time 0.001 (0.015)	Loss 3.0293 (3.1180)	Entropy 1.48963 (1.49655)	Top-1 acc 52.734 (49.491)	Top-5 acc 75.781 (72.628)	lr 0.02164
Train [29][2190/3239]	Time 0.294 (0.561)	Data Time 0.001 (0.015)	Loss 3.0762 (3.1177)	Entropy 1.48929 (1.49651)	Top-1 acc 46.484 (49.494)	Top-5 acc 72.266 (72.633)	lr 0.02164
Train [29][2200/3239]	Time 0.230 (0.560)	Data Time 0.001 (0.015)	Loss 3.1082 (3.1178)	Entropy 1.48914 (1.49648)	Top-1 acc 46.484 (49.494)	Top-5 acc 75.000 (72.630)	lr 0.02164
Train [29][2210/3239]	Time 2.219 (0.559)	Data Time 0.001 (0.015)	Loss 3.3225 (3.1177)	Entropy 1.48914 (1.49645)	Top-1 acc 43.359 (49.500)	Top-5 acc 69.531 (72.630)	lr 0.02164
Train [29][2220/3239]	Time 0.221 (0.558)	Data Time 0.001 (0.015)	Loss 3.0497 (3.1179)	Entropy 1.48901 (1.49641)	Top-1 acc 53.516 (49.494)	Top-5 acc 74.609 (72.625)	lr 0.02164
Train [29][2230/3239]	Time 0.214 (0.557)	Data Time 0.001 (0.015)	Loss 3.1694 (3.1181)	Entropy 1.48899 (1.49638)	Top-1 acc 46.875 (49.490)	Top-5 acc 68.750 (72.619)	lr 0.02164
Train [29][2240/3239]	Time 0.204 (0.557)	Data Time 0.001 (0.015)	Loss 3.0840 (3.1182)	Entropy 1.48889 (1.49635)	Top-1 acc 50.781 (49.492)	Top-5 acc 73.047 (72.615)	lr 0.02164
Train [29][2250/3239]	Time 0.265 (0.556)	Data Time 0.001 (0.015)	Loss 2.9855 (3.1180)	Entropy 1.48889 (1.49631)	Top-1 acc 54.688 (49.495)	Top-5 acc 76.172 (72.619)	lr 0.02164
Train [29][2260/3239]	Time 0.198 (0.556)	Data Time 0.001 (0.015)	Loss 3.1226 (3.1181)	Entropy 1.48879 (1.49628)	Top-1 acc 46.875 (49.494)	Top-5 acc 71.484 (72.620)	lr 0.02164
Train [29][2270/3239]	Time 0.201 (0.555)	Data Time 0.001 (0.015)	Loss 3.7156 (3.1185)	Entropy 1.48862 (1.49625)	Top-1 acc 35.938 (49.486)	Top-5 acc 66.797 (72.611)	lr 0.02163
Train [29][2280/3239]	Time 0.207 (0.555)	Data Time 0.001 (0.015)	Loss 3.1445 (3.1187)	Entropy 1.48851 (1.49621)	Top-1 acc 47.266 (49.478)	Top-5 acc 75.000 (72.612)	lr 0.02163
Train [29][2290/3239]	Time 0.233 (0.555)	Data Time 0.001 (0.015)	Loss 3.2140 (3.1186)	Entropy 1.48844 (1.49618)	Top-1 acc 42.578 (49.480)	Top-5 acc 68.359 (72.610)	lr 0.02163
Train [29][2300/3239]	Time 0.214 (0.554)	Data Time 0.001 (0.015)	Loss 3.0107 (3.1186)	Entropy 1.48840 (1.49615)	Top-1 acc 53.906 (49.482)	Top-5 acc 75.781 (72.610)	lr 0.02163
Train [29][2310/3239]	Time 0.377 (0.554)	Data Time 0.001 (0.015)	Loss 2.9294 (3.1183)	Entropy 1.48832 (1.49611)	Top-1 acc 57.422 (49.491)	Top-5 acc 75.781 (72.616)	lr 0.02163
Train [29][2320/3239]	Time 2.241 (0.553)	Data Time 0.001 (0.015)	Loss 3.0842 (3.1183)	Entropy 1.48832 (1.49608)	Top-1 acc 50.781 (49.491)	Top-5 acc 73.828 (72.616)	lr 0.02163
Train [29][2330/3239]	Time 0.227 (0.552)	Data Time 0.001 (0.014)	Loss 3.1622 (3.1183)	Entropy 1.48827 (1.49605)	Top-1 acc 46.875 (49.490)	Top-5 acc 71.484 (72.614)	lr 0.02163
Train [29][2340/3239]	Time 0.210 (0.552)	Data Time 0.001 (0.014)	Loss 3.4481 (3.1184)	Entropy 1.48812 (1.49601)	Top-1 acc 42.188 (49.492)	Top-5 acc 69.531 (72.613)	lr 0.02163
Train [29][2350/3239]	Time 0.209 (0.551)	Data Time 0.001 (0.014)	Loss 3.1492 (3.1185)	Entropy 1.48813 (1.49598)	Top-1 acc 50.391 (49.494)	Top-5 acc 71.094 (72.613)	lr 0.02163
Train [29][2360/3239]	Time 0.261 (0.550)	Data Time 0.001 (0.014)	Loss 3.0842 (3.1184)	Entropy 1.48808 (1.49595)	Top-1 acc 51.562 (49.498)	Top-5 acc 73.047 (72.616)	lr 0.02163
Train [29][2370/3239]	Time 0.316 (0.550)	Data Time 0.001 (0.014)	Loss 3.3180 (3.1186)	Entropy 1.48794 (1.49591)	Top-1 acc 50.000 (49.497)	Top-5 acc 68.750 (72.619)	lr 0.02163
Train [29][2380/3239]	Time 0.220 (0.550)	Data Time 0.001 (0.014)	Loss 3.0666 (3.1188)	Entropy 1.48797 (1.49588)	Top-1 acc 53.125 (49.496)	Top-5 acc 73.438 (72.615)	lr 0.02163
Train [29][2390/3239]	Time 0.251 (0.568)	Data Time 0.002 (0.014)	Loss 3.1723 (3.1187)	Entropy 1.48795 (1.49585)	Top-1 acc 47.656 (49.496)	Top-5 acc 68.359 (72.616)	lr 0.02163
Train [29][2400/3239]	Time 0.200 (0.567)	Data Time 0.002 (0.014)	Loss 3.0385 (3.1188)	Entropy 1.48800 (1.49581)	Top-1 acc 49.609 (49.493)	Top-5 acc 71.484 (72.615)	lr 0.02163
Train [29][2410/3239]	Time 0.200 (0.567)	Data Time 0.002 (0.014)	Loss 3.1583 (3.1190)	Entropy 1.48792 (1.49578)	Top-1 acc 49.609 (49.490)	Top-5 acc 70.703 (72.612)	lr 0.02162
Train [29][2420/3239]	Time 0.293 (0.566)	Data Time 0.002 (0.014)	Loss 3.3864 (3.1191)	Entropy 1.48791 (1.49575)	Top-1 acc 42.969 (49.490)	Top-5 acc 67.188 (72.611)	lr 0.02162
Train [29][2430/3239]	Time 2.503 (0.566)	Data Time 0.001 (0.014)	Loss 2.9931 (3.1191)	Entropy 1.48791 (1.49572)	Top-1 acc 54.297 (49.491)	Top-5 acc 75.000 (72.610)	lr 0.02162
Train [29][2440/3239]	Time 0.237 (0.564)	Data Time 0.001 (0.014)	Loss 3.1305 (3.1192)	Entropy 1.48788 (1.49568)	Top-1 acc 48.828 (49.492)	Top-5 acc 71.484 (72.610)	lr 0.02162
Train [29][2450/3239]	Time 0.204 (0.564)	Data Time 0.001 (0.014)	Loss 3.2239 (3.1192)	Entropy 1.48779 (1.49565)	Top-1 acc 44.922 (49.494)	Top-5 acc 69.531 (72.611)	lr 0.02162
Train [29][2460/3239]	Time 0.257 (0.563)	Data Time 0.001 (0.014)	Loss 3.0608 (3.1191)	Entropy 1.48778 (1.49562)	Top-1 acc 49.609 (49.494)	Top-5 acc 73.828 (72.607)	lr 0.02162
Train [29][2470/3239]	Time 0.224 (0.563)	Data Time 0.001 (0.014)	Loss 3.0568 (3.1191)	Entropy 1.48773 (1.49559)	Top-1 acc 50.000 (49.489)	Top-5 acc 74.219 (72.612)	lr 0.02162
Train [29][2480/3239]	Time 0.257 (0.562)	Data Time 0.001 (0.014)	Loss 3.1416 (3.1193)	Entropy 1.48771 (1.49556)	Top-1 acc 52.734 (49.486)	Top-5 acc 72.266 (72.607)	lr 0.02162
Train [29][2490/3239]	Time 0.240 (0.562)	Data Time 0.001 (0.014)	Loss 3.1407 (3.1193)	Entropy 1.48766 (1.49552)	Top-1 acc 48.828 (49.485)	Top-5 acc 69.531 (72.607)	lr 0.02162
Train [29][2500/3239]	Time 0.218 (0.562)	Data Time 0.001 (0.014)	Loss 3.0144 (3.1191)	Entropy 1.48762 (1.49549)	Top-1 acc 50.781 (49.487)	Top-5 acc 75.000 (72.611)	lr 0.02162
Train [29][2510/3239]	Time 0.172 (0.561)	Data Time 0.001 (0.014)	Loss 3.1784 (3.1192)	Entropy 1.48760 (1.49546)	Top-1 acc 46.094 (49.484)	Top-5 acc 73.047 (72.609)	lr 0.02162
Train [29][2520/3239]	Time 0.258 (0.561)	Data Time 0.002 (0.013)	Loss 3.0425 (3.1191)	Entropy 1.48742 (1.49543)	Top-1 acc 50.781 (49.486)	Top-5 acc 75.000 (72.613)	lr 0.02162
Train [29][2530/3239]	Time 0.221 (0.560)	Data Time 0.001 (0.013)	Loss 3.0147 (3.1189)	Entropy 1.48732 (1.49540)	Top-1 acc 51.953 (49.489)	Top-5 acc 75.000 (72.621)	lr 0.02162
Train [29][2540/3239]	Time 2.535 (0.560)	Data Time 0.002 (0.013)	Loss 3.2841 (3.1188)	Entropy 1.48732 (1.49537)	Top-1 acc 47.656 (49.491)	Top-5 acc 70.312 (72.623)	lr 0.02162
Train [29][2550/3239]	Time 0.217 (0.559)	Data Time 0.001 (0.013)	Loss 3.1326 (3.1186)	Entropy 1.48722 (1.49533)	Top-1 acc 50.391 (49.498)	Top-5 acc 70.312 (72.623)	lr 0.02162
Train [29][2560/3239]	Time 0.222 (0.558)	Data Time 0.001 (0.013)	Loss 3.0464 (3.1187)	Entropy 1.48706 (1.49530)	Top-1 acc 49.609 (49.498)	Top-5 acc 73.828 (72.622)	lr 0.02161
Train [29][2570/3239]	Time 0.212 (0.558)	Data Time 0.001 (0.013)	Loss 3.1838 (3.1184)	Entropy 1.48698 (1.49527)	Top-1 acc 47.656 (49.502)	Top-5 acc 69.141 (72.626)	lr 0.02161
Train [29][2580/3239]	Time 0.224 (0.557)	Data Time 0.002 (0.013)	Loss 3.0093 (3.1182)	Entropy 1.48681 (1.49524)	Top-1 acc 51.953 (49.506)	Top-5 acc 75.391 (72.629)	lr 0.02161
Train [29][2590/3239]	Time 0.217 (0.557)	Data Time 0.001 (0.013)	Loss 3.0162 (3.1183)	Entropy 1.48679 (1.49520)	Top-1 acc 51.562 (49.498)	Top-5 acc 76.562 (72.627)	lr 0.02161
Train [29][2600/3239]	Time 0.214 (0.557)	Data Time 0.001 (0.013)	Loss 3.1575 (3.1183)	Entropy 1.48664 (1.49517)	Top-1 acc 46.875 (49.497)	Top-5 acc 70.703 (72.630)	lr 0.02161
Train [29][2610/3239]	Time 0.217 (0.556)	Data Time 0.001 (0.013)	Loss 3.0204 (3.1181)	Entropy 1.48641 (1.49514)	Top-1 acc 50.391 (49.500)	Top-5 acc 75.000 (72.637)	lr 0.02161
Train [29][2620/3239]	Time 0.240 (0.556)	Data Time 0.001 (0.013)	Loss 2.9656 (3.1183)	Entropy 1.48635 (1.49511)	Top-1 acc 54.688 (49.497)	Top-5 acc 71.875 (72.632)	lr 0.02161
Train [29][2630/3239]	Time 0.251 (0.555)	Data Time 0.001 (0.013)	Loss 3.1070 (3.1180)	Entropy 1.48618 (1.49507)	Top-1 acc 50.000 (49.499)	Top-5 acc 70.703 (72.635)	lr 0.02161
Train [29][2640/3239]	Time 0.240 (0.555)	Data Time 0.001 (0.013)	Loss 3.2205 (3.1180)	Entropy 1.48601 (1.49504)	Top-1 acc 42.969 (49.500)	Top-5 acc 69.531 (72.633)	lr 0.02161
Train [29][2650/3239]	Time 0.269 (0.555)	Data Time 0.001 (0.013)	Loss 3.0154 (3.1179)	Entropy 1.48595 (1.49500)	Top-1 acc 49.609 (49.497)	Top-5 acc 72.656 (72.635)	lr 0.02161
Train [29][2660/3239]	Time 0.216 (0.554)	Data Time 0.001 (0.013)	Loss 3.0135 (3.1178)	Entropy 1.48598 (1.49497)	Top-1 acc 54.688 (49.504)	Top-5 acc 75.391 (72.637)	lr 0.02161
Train [29][2670/3239]	Time 0.225 (0.554)	Data Time 0.001 (0.013)	Loss 2.9964 (3.1177)	Entropy 1.48596 (1.49494)	Top-1 acc 54.297 (49.504)	Top-5 acc 76.172 (72.641)	lr 0.02161
Train [29][2680/3239]	Time 0.200 (0.553)	Data Time 0.001 (0.013)	Loss 3.1999 (3.1178)	Entropy 1.48595 (1.49490)	Top-1 acc 48.438 (49.504)	Top-5 acc 69.531 (72.637)	lr 0.02161
Train [29][2690/3239]	Time 0.267 (0.553)	Data Time 0.001 (0.013)	Loss 3.1557 (3.1179)	Entropy 1.48580 (1.49487)	Top-1 acc 47.266 (49.499)	Top-5 acc 72.656 (72.635)	lr 0.02161
Train [29][2700/3239]	Time 0.256 (0.553)	Data Time 0.001 (0.013)	Loss 3.0895 (3.1179)	Entropy 1.48578 (1.49484)	Top-1 acc 50.000 (49.496)	Top-5 acc 74.219 (72.635)	lr 0.02160
Train [29][2710/3239]	Time 0.316 (0.552)	Data Time 0.001 (0.013)	Loss 3.1103 (3.1179)	Entropy 1.48575 (1.49480)	Top-1 acc 50.781 (49.496)	Top-5 acc 75.391 (72.638)	lr 0.02160
Train [29][2720/3239]	Time 0.289 (0.552)	Data Time 0.001 (0.013)	Loss 3.0376 (3.1177)	Entropy 1.48565 (1.49477)	Top-1 acc 52.344 (49.500)	Top-5 acc 74.219 (72.641)	lr 0.02160
Train [29][2730/3239]	Time 0.211 (0.551)	Data Time 0.001 (0.013)	Loss 3.1214 (3.1181)	Entropy 1.48556 (1.49474)	Top-1 acc 48.438 (49.491)	Top-5 acc 72.656 (72.634)	lr 0.02160
Train [29][2740/3239]	Time 0.236 (0.567)	Data Time 0.003 (0.013)	Loss 3.1029 (3.1180)	Entropy 1.48548 (1.49470)	Top-1 acc 52.734 (49.492)	Top-5 acc 72.656 (72.636)	lr 0.02160
Train [29][2750/3239]	Time 0.232 (0.567)	Data Time 0.002 (0.013)	Loss 3.0375 (3.1178)	Entropy 1.48540 (1.49467)	Top-1 acc 51.562 (49.494)	Top-5 acc 76.172 (72.645)	lr 0.02160
Train [29][2760/3239]	Time 0.266 (0.566)	Data Time 0.002 (0.012)	Loss 3.0162 (3.1178)	Entropy 1.48531 (1.49464)	Top-1 acc 48.438 (49.490)	Top-5 acc 75.781 (72.644)	lr 0.02160
Train [29][2770/3239]	Time 0.228 (0.566)	Data Time 0.002 (0.012)	Loss 3.2230 (3.1179)	Entropy 1.48522 (1.49460)	Top-1 acc 46.484 (49.489)	Top-5 acc 70.312 (72.643)	lr 0.02160
Train [29][2780/3239]	Time 0.240 (0.566)	Data Time 0.002 (0.012)	Loss 3.0129 (3.1177)	Entropy 1.48514 (1.49457)	Top-1 acc 49.609 (49.491)	Top-5 acc 76.172 (72.645)	lr 0.02160
Train [29][2790/3239]	Time 0.218 (0.565)	Data Time 0.001 (0.012)	Loss 3.1089 (3.1178)	Entropy 1.48512 (1.49453)	Top-1 acc 44.531 (49.487)	Top-5 acc 74.219 (72.644)	lr 0.02160
Train [29][2800/3239]	Time 0.249 (0.565)	Data Time 0.001 (0.012)	Loss 3.1707 (3.1179)	Entropy 1.48507 (1.49450)	Top-1 acc 44.141 (49.486)	Top-5 acc 71.484 (72.643)	lr 0.02160
Train [29][2810/3239]	Time 0.294 (0.564)	Data Time 0.001 (0.012)	Loss 3.0122 (3.1178)	Entropy 1.48500 (1.49447)	Top-1 acc 48.438 (49.487)	Top-5 acc 76.172 (72.645)	lr 0.02160
Train [29][2820/3239]	Time 0.250 (0.564)	Data Time 0.001 (0.012)	Loss 3.0410 (3.1179)	Entropy 1.48492 (1.49443)	Top-1 acc 50.000 (49.486)	Top-5 acc 71.875 (72.644)	lr 0.02160
Train [29][2830/3239]	Time 0.265 (0.564)	Data Time 0.001 (0.012)	Loss 3.2848 (3.1179)	Entropy 1.48486 (1.49440)	Top-1 acc 44.531 (49.487)	Top-5 acc 69.141 (72.646)	lr 0.02160
Train [29][2840/3239]	Time 0.189 (0.563)	Data Time 0.001 (0.012)	Loss 3.2718 (3.1177)	Entropy 1.48476 (1.49437)	Top-1 acc 44.531 (49.490)	Top-5 acc 68.750 (72.650)	lr 0.02159
Train [29][2850/3239]	Time 0.244 (0.563)	Data Time 0.001 (0.012)	Loss 2.9655 (3.1177)	Entropy 1.48472 (1.49433)	Top-1 acc 52.734 (49.488)	Top-5 acc 74.609 (72.652)	lr 0.02159
Train [29][2860/3239]	Time 0.210 (0.562)	Data Time 0.001 (0.012)	Loss 3.0849 (3.1177)	Entropy 1.48464 (1.49430)	Top-1 acc 49.219 (49.489)	Top-5 acc 71.875 (72.654)	lr 0.02159
Train [29][2870/3239]	Time 0.295 (0.562)	Data Time 0.001 (0.012)	Loss 2.9884 (3.1175)	Entropy 1.48458 (1.49426)	Top-1 acc 48.438 (49.496)	Top-5 acc 74.609 (72.660)	lr 0.02159
Train [29][2880/3239]	Time 0.211 (0.561)	Data Time 0.001 (0.012)	Loss 2.9183 (3.1173)	Entropy 1.48446 (1.49423)	Top-1 acc 57.812 (49.500)	Top-5 acc 77.344 (72.662)	lr 0.02159
Train [29][2890/3239]	Time 0.205 (0.561)	Data Time 0.001 (0.012)	Loss 3.2859 (3.1174)	Entropy 1.48444 (1.49420)	Top-1 acc 47.656 (49.497)	Top-5 acc 70.703 (72.661)	lr 0.02159
Train [29][2900/3239]	Time 0.244 (0.561)	Data Time 0.001 (0.012)	Loss 3.3337 (3.1175)	Entropy 1.48435 (1.49416)	Top-1 acc 46.875 (49.496)	Top-5 acc 64.453 (72.659)	lr 0.02159
Train [29][2910/3239]	Time 0.189 (0.560)	Data Time 0.001 (0.012)	Loss 3.1033 (3.1173)	Entropy 1.48427 (1.49413)	Top-1 acc 47.266 (49.499)	Top-5 acc 72.656 (72.661)	lr 0.02159
Train [29][2920/3239]	Time 0.240 (0.560)	Data Time 0.001 (0.012)	Loss 3.0760 (3.1174)	Entropy 1.48415 (1.49410)	Top-1 acc 53.906 (49.500)	Top-5 acc 74.609 (72.661)	lr 0.02159
Train [29][2930/3239]	Time 0.328 (0.559)	Data Time 0.001 (0.012)	Loss 2.9781 (3.1173)	Entropy 1.48416 (1.49406)	Top-1 acc 51.562 (49.499)	Top-5 acc 76.172 (72.661)	lr 0.02159
Train [29][2940/3239]	Time 0.236 (0.559)	Data Time 0.001 (0.012)	Loss 2.8910 (3.1172)	Entropy 1.48410 (1.49403)	Top-1 acc 53.125 (49.504)	Top-5 acc 76.953 (72.661)	lr 0.02159
Train [29][2950/3239]	Time 0.259 (0.559)	Data Time 0.001 (0.012)	Loss 3.0004 (3.1173)	Entropy 1.48395 (1.49400)	Top-1 acc 49.609 (49.504)	Top-5 acc 75.391 (72.660)	lr 0.02159
Train [29][2960/3239]	Time 0.218 (0.558)	Data Time 0.001 (0.012)	Loss 3.0676 (3.1173)	Entropy 1.48388 (1.49396)	Top-1 acc 53.906 (49.507)	Top-5 acc 71.484 (72.657)	lr 0.02159
Train [29][2970/3239]	Time 0.244 (0.558)	Data Time 0.001 (0.012)	Loss 3.0568 (3.1171)	Entropy 1.48381 (1.49393)	Top-1 acc 52.344 (49.508)	Top-5 acc 74.609 (72.661)	lr 0.02159
Train [29][2980/3239]	Time 0.337 (0.557)	Data Time 0.001 (0.012)	Loss 3.0055 (3.1172)	Entropy 1.48368 (1.49389)	Top-1 acc 49.219 (49.503)	Top-5 acc 74.219 (72.660)	lr 0.02159
Train [29][2990/3239]	Time 0.252 (0.557)	Data Time 0.001 (0.012)	Loss 3.0476 (3.1174)	Entropy 1.48361 (1.49386)	Top-1 acc 52.734 (49.499)	Top-5 acc 73.828 (72.654)	lr 0.02158
Train [29][3000/3239]	Time 0.244 (0.557)	Data Time 0.001 (0.012)	Loss 2.9813 (3.1173)	Entropy 1.48348 (1.49382)	Top-1 acc 49.219 (49.497)	Top-5 acc 76.172 (72.657)	lr 0.02158
Train [29][3010/3239]	Time 0.210 (0.556)	Data Time 0.001 (0.012)	Loss 2.9359 (3.1173)	Entropy 1.48332 (1.49379)	Top-1 acc 54.688 (49.498)	Top-5 acc 72.656 (72.657)	lr 0.02158
Train [29][3020/3239]	Time 0.224 (0.556)	Data Time 0.001 (0.012)	Loss 3.0974 (3.1171)	Entropy 1.48329 (1.49376)	Top-1 acc 50.781 (49.500)	Top-5 acc 71.484 (72.658)	lr 0.02158
Train [29][3030/3239]	Time 0.224 (0.556)	Data Time 0.001 (0.012)	Loss 3.1017 (3.1170)	Entropy 1.48320 (1.49372)	Top-1 acc 51.172 (49.505)	Top-5 acc 73.828 (72.659)	lr 0.02158
Train [29][3040/3239]	Time 0.231 (0.555)	Data Time 0.001 (0.011)	Loss 3.1406 (3.1169)	Entropy 1.48314 (1.49369)	Top-1 acc 46.094 (49.507)	Top-5 acc 71.094 (72.660)	lr 0.02158
Train [29][3050/3239]	Time 0.221 (0.555)	Data Time 0.001 (0.011)	Loss 3.0931 (3.1168)	Entropy 1.48311 (1.49365)	Top-1 acc 50.391 (49.509)	Top-5 acc 75.391 (72.665)	lr 0.02158
Train [29][3060/3239]	Time 0.158 (0.555)	Data Time 0.001 (0.011)	Loss 3.2600 (3.1168)	Entropy 1.48299 (1.49362)	Top-1 acc 49.219 (49.510)	Top-5 acc 71.484 (72.664)	lr 0.02158
Train [29][3070/3239]	Time 0.270 (0.568)	Data Time 0.004 (0.011)	Loss 3.0534 (3.1167)	Entropy 1.48281 (1.49358)	Top-1 acc 50.391 (49.514)	Top-5 acc 73.438 (72.665)	lr 0.02158
Train [29][3080/3239]	Time 0.232 (0.568)	Data Time 0.002 (0.011)	Loss 3.2463 (3.1168)	Entropy 1.48275 (1.49355)	Top-1 acc 46.094 (49.514)	Top-5 acc 71.094 (72.662)	lr 0.02158
Train [29][3090/3239]	Time 0.237 (0.567)	Data Time 0.001 (0.011)	Loss 3.2076 (3.1168)	Entropy 1.48263 (1.49351)	Top-1 acc 48.438 (49.516)	Top-5 acc 71.484 (72.661)	lr 0.02158
Train [29][3100/3239]	Time 0.220 (0.567)	Data Time 0.001 (0.011)	Loss 3.0725 (3.1170)	Entropy 1.48258 (1.49348)	Top-1 acc 50.781 (49.512)	Top-5 acc 73.438 (72.661)	lr 0.02158
Train [29][3110/3239]	Time 0.167 (0.567)	Data Time 0.001 (0.011)	Loss 3.1897 (3.1167)	Entropy 1.48239 (1.49344)	Top-1 acc 49.219 (49.521)	Top-5 acc 71.875 (72.667)	lr 0.02158
Train [29][3120/3239]	Time 0.244 (0.566)	Data Time 0.001 (0.011)	Loss 3.1390 (3.1166)	Entropy 1.48222 (1.49341)	Top-1 acc 51.172 (49.524)	Top-5 acc 74.609 (72.672)	lr 0.02158
Train [29][3130/3239]	Time 0.204 (0.566)	Data Time 0.001 (0.011)	Loss 3.1038 (3.1166)	Entropy 1.48215 (1.49337)	Top-1 acc 51.562 (49.525)	Top-5 acc 72.266 (72.674)	lr 0.02157
Train [29][3140/3239]	Time 0.247 (0.565)	Data Time 0.001 (0.011)	Loss 3.2255 (3.1166)	Entropy 1.48202 (1.49334)	Top-1 acc 46.484 (49.524)	Top-5 acc 72.266 (72.672)	lr 0.02157
Train [29][3150/3239]	Time 0.256 (0.565)	Data Time 0.001 (0.011)	Loss 3.2396 (3.1168)	Entropy 1.48194 (1.49330)	Top-1 acc 47.656 (49.520)	Top-5 acc 67.969 (72.668)	lr 0.02157
Train [29][3160/3239]	Time 0.208 (0.565)	Data Time 0.001 (0.011)	Loss 2.9397 (3.1167)	Entropy 1.48197 (1.49326)	Top-1 acc 51.953 (49.521)	Top-5 acc 76.562 (72.670)	lr 0.02157
Train [29][3170/3239]	Time 0.297 (0.564)	Data Time 0.001 (0.011)	Loss 3.0964 (3.1167)	Entropy 1.48189 (1.49323)	Top-1 acc 47.266 (49.519)	Top-5 acc 72.656 (72.669)	lr 0.02157
Train [29][3180/3239]	Time 0.223 (0.564)	Data Time 0.000 (0.011)	Loss 3.0522 (3.1168)	Entropy 1.48184 (1.49319)	Top-1 acc 50.391 (49.516)	Top-5 acc 74.219 (72.666)	lr 0.02157
Train [29][3190/3239]	Time 0.214 (0.563)	Data Time 0.000 (0.011)	Loss 3.0121 (3.1169)	Entropy 1.48183 (1.49316)	Top-1 acc 48.828 (49.514)	Top-5 acc 76.562 (72.662)	lr 0.02157
Train [29][3200/3239]	Time 0.202 (0.563)	Data Time 0.000 (0.011)	Loss 3.0993 (3.1169)	Entropy 1.48192 (1.49312)	Top-1 acc 48.828 (49.512)	Top-5 acc 73.047 (72.663)	lr 0.02157
Train [29][3210/3239]	Time 0.330 (0.563)	Data Time 0.000 (0.011)	Loss 3.0465 (3.1169)	Entropy 1.48175 (1.49309)	Top-1 acc 50.781 (49.512)	Top-5 acc 72.656 (72.663)	lr 0.02157
Train [29][3220/3239]	Time 0.226 (0.562)	Data Time 0.000 (0.011)	Loss 3.0302 (3.1171)	Entropy 1.48170 (1.49305)	Top-1 acc 50.391 (49.506)	Top-5 acc 73.438 (72.659)	lr 0.02157
Train [29][3230/3239]	Time 0.215 (0.562)	Data Time 0.000 (0.011)	Loss 3.1988 (3.1171)	Entropy 1.48165 (1.49302)	Top-1 acc 46.875 (49.506)	Top-5 acc 68.750 (72.659)	lr 0.02157
Train [29][3239/3239]	Time 2.125 (0.561)	Data Time 0.000 (0.011)	Loss 3.0050 (3.1170)	Entropy 1.48165 (1.49298)	Top-1 acc 48.148 (49.508)	Top-5 acc 72.840 (72.658)	lr 0.02157
==========Valid [29/120]	loss 1.921	top-1 acc 57.243 (57.243)	top-5 acc 80.056	Train top-1 49.508	top-5 72.658	Entropy 1.48165	Latency-None: 0.000ms	Flops: 550.59M
Train [30][0/3239]	Time 30.675 (30.675)	Data Time 30.176 (30.176)	Loss 3.1256 (3.1256)	Entropy 1.48154 (1.48154)	Top-1 acc 51.172 (51.172)	Top-5 acc 73.438 (73.438)	lr 0.02157
Train [30][10/3239]	Time 2.944 (3.363)	Data Time 0.002 (2.745)	Loss 3.0350 (3.0529)	Entropy 1.48154 (1.48154)	Top-1 acc 53.516 (50.249)	Top-5 acc 73.047 (74.467)	lr 0.02157
Train [30][20/3239]	Time 0.179 (1.878)	Data Time 0.001 (1.440)	Loss 3.0948 (3.0693)	Entropy 1.48154 (1.48154)	Top-1 acc 46.484 (50.167)	Top-5 acc 74.609 (74.386)	lr 0.02157
Train [30][30/3239]	Time 0.215 (1.418)	Data Time 0.001 (0.977)	Loss 3.0003 (3.0813)	Entropy 1.48154 (1.48154)	Top-1 acc 48.828 (50.076)	Top-5 acc 73.828 (73.677)	lr 0.02157
Train [30][40/3239]	Time 0.216 (1.184)	Data Time 0.001 (0.739)	Loss 3.2931 (3.0807)	Entropy 1.48151 (1.48153)	Top-1 acc 45.703 (50.152)	Top-5 acc 69.531 (73.704)	lr 0.02156
Train [30][50/3239]	Time 0.243 (1.042)	Data Time 0.001 (0.595)	Loss 3.2137 (3.0846)	Entropy 1.48146 (1.48152)	Top-1 acc 48.828 (50.054)	Top-5 acc 71.484 (73.560)	lr 0.02156
Train [30][60/3239]	Time 0.197 (0.944)	Data Time 0.001 (0.498)	Loss 3.0824 (3.0910)	Entropy 1.48147 (1.48151)	Top-1 acc 53.906 (49.891)	Top-5 acc 74.219 (73.450)	lr 0.02156
Train [30][70/3239]	Time 0.206 (0.874)	Data Time 0.001 (0.428)	Loss 2.9989 (3.0825)	Entropy 1.48133 (1.48150)	Top-1 acc 50.391 (50.215)	Top-5 acc 75.781 (73.636)	lr 0.02156
Train [30][80/3239]	Time 0.195 (0.820)	Data Time 0.001 (0.375)	Loss 3.1823 (3.0804)	Entropy 1.48118 (1.48147)	Top-1 acc 48.828 (50.159)	Top-5 acc 70.312 (73.650)	lr 0.02156
Train [30][90/3239]	Time 0.307 (0.780)	Data Time 0.001 (0.334)	Loss 3.1169 (3.0834)	Entropy 1.48111 (1.48144)	Top-1 acc 48.828 (50.030)	Top-5 acc 72.266 (73.442)	lr 0.02156
Train [30][100/3239]	Time 0.227 (0.746)	Data Time 0.001 (0.301)	Loss 2.9059 (3.0788)	Entropy 1.48109 (1.48140)	Top-1 acc 51.562 (50.213)	Top-5 acc 75.781 (73.507)	lr 0.02156
Train [30][110/3239]	Time 0.216 (0.718)	Data Time 0.001 (0.274)	Loss 3.2459 (3.0798)	Entropy 1.48096 (1.48137)	Top-1 acc 48.828 (50.253)	Top-5 acc 69.922 (73.462)	lr 0.02156
Train [30][120/3239]	Time 2.408 (0.696)	Data Time 0.001 (0.252)	Loss 2.9679 (3.0764)	Entropy 1.48096 (1.48134)	Top-1 acc 52.344 (50.336)	Top-5 acc 74.219 (73.467)	lr 0.02156
Train [30][130/3239]	Time 0.221 (0.661)	Data Time 0.001 (0.233)	Loss 2.7194 (3.0776)	Entropy 1.48088 (1.48130)	Top-1 acc 60.547 (50.259)	Top-5 acc 81.641 (73.440)	lr 0.02156
Train [30][140/3239]	Time 0.200 (0.646)	Data Time 0.001 (0.216)	Loss 3.1772 (3.0777)	Entropy 1.48083 (1.48127)	Top-1 acc 50.781 (50.355)	Top-5 acc 71.094 (73.429)	lr 0.02156
Train [30][150/3239]	Time 0.310 (0.633)	Data Time 0.001 (0.202)	Loss 3.1783 (3.0795)	Entropy 1.48079 (1.48124)	Top-1 acc 50.391 (50.341)	Top-5 acc 73.828 (73.412)	lr 0.02156
Train [30][160/3239]	Time 0.235 (0.622)	Data Time 0.001 (0.190)	Loss 3.0794 (3.0764)	Entropy 1.48079 (1.48121)	Top-1 acc 51.953 (50.393)	Top-5 acc 73.828 (73.450)	lr 0.02156
Train [30][170/3239]	Time 0.210 (0.612)	Data Time 0.001 (0.179)	Loss 3.2393 (3.0802)	Entropy 1.48067 (1.48118)	Top-1 acc 48.438 (50.288)	Top-5 acc 67.188 (73.360)	lr 0.02156
Train [30][180/3239]	Time 0.272 (0.826)	Data Time 0.003 (0.169)	Loss 3.1524 (3.0797)	Entropy 1.48056 (1.48115)	Top-1 acc 51.172 (50.315)	Top-5 acc 73.438 (73.371)	lr 0.02155
Train [30][190/3239]	Time 0.208 (0.813)	Data Time 0.002 (0.160)	Loss 2.9624 (3.0800)	Entropy 1.48047 (1.48112)	Top-1 acc 53.906 (50.348)	Top-5 acc 74.609 (73.372)	lr 0.02155
Train [30][200/3239]	Time 0.245 (0.794)	Data Time 0.001 (0.152)	Loss 3.0984 (3.0820)	Entropy 1.48042 (1.48109)	Top-1 acc 54.297 (50.352)	Top-5 acc 73.047 (73.352)	lr 0.02155
Train [30][210/3239]	Time 0.230 (0.778)	Data Time 0.002 (0.145)	Loss 3.0624 (3.0832)	Entropy 1.48038 (1.48105)	Top-1 acc 48.438 (50.361)	Top-5 acc 75.391 (73.310)	lr 0.02155
Train [30][220/3239]	Time 0.214 (0.763)	Data Time 0.001 (0.139)	Loss 3.1229 (3.0842)	Entropy 1.48039 (1.48102)	Top-1 acc 45.703 (50.316)	Top-5 acc 71.094 (73.271)	lr 0.02155
Train [30][230/3239]	Time 2.421 (0.749)	Data Time 0.002 (0.133)	Loss 3.1649 (3.0857)	Entropy 1.48039 (1.48100)	Top-1 acc 51.172 (50.284)	Top-5 acc 71.875 (73.223)	lr 0.02155
Train [30][240/3239]	Time 0.156 (0.727)	Data Time 0.001 (0.127)	Loss 3.0732 (3.0835)	Entropy 1.48020 (1.48096)	Top-1 acc 49.609 (50.318)	Top-5 acc 73.047 (73.248)	lr 0.02155
Train [30][250/3239]	Time 0.238 (0.716)	Data Time 0.002 (0.123)	Loss 2.9895 (3.0824)	Entropy 1.48013 (1.48093)	Top-1 acc 53.125 (50.338)	Top-5 acc 78.906 (73.290)	lr 0.02155
Train [30][260/3239]	Time 0.240 (0.706)	Data Time 0.001 (0.118)	Loss 2.9361 (3.0809)	Entropy 1.48005 (1.48090)	Top-1 acc 54.297 (50.361)	Top-5 acc 75.000 (73.331)	lr 0.02155
Train [30][270/3239]	Time 0.334 (0.696)	Data Time 0.001 (0.114)	Loss 3.0193 (3.0814)	Entropy 1.48000 (1.48086)	Top-1 acc 49.219 (50.342)	Top-5 acc 78.516 (73.324)	lr 0.02155
Train [30][280/3239]	Time 0.207 (0.686)	Data Time 0.001 (0.110)	Loss 2.9339 (3.0794)	Entropy 1.47996 (1.48083)	Top-1 acc 53.906 (50.366)	Top-5 acc 77.344 (73.381)	lr 0.02155
Train [30][290/3239]	Time 0.217 (0.678)	Data Time 0.001 (0.106)	Loss 3.1109 (3.0791)	Entropy 1.47997 (1.48080)	Top-1 acc 49.219 (50.342)	Top-5 acc 72.656 (73.423)	lr 0.02155
Train [30][300/3239]	Time 0.228 (0.670)	Data Time 0.001 (0.103)	Loss 3.3123 (3.0795)	Entropy 1.47992 (1.48078)	Top-1 acc 45.312 (50.311)	Top-5 acc 63.672 (73.397)	lr 0.02155
Train [30][310/3239]	Time 0.192 (0.662)	Data Time 0.001 (0.099)	Loss 3.7494 (3.0812)	Entropy 1.47982 (1.48075)	Top-1 acc 37.500 (50.295)	Top-5 acc 61.719 (73.375)	lr 0.02155
Train [30][320/3239]	Time 0.233 (0.655)	Data Time 0.001 (0.096)	Loss 3.0057 (3.0802)	Entropy 1.47971 (1.48072)	Top-1 acc 55.078 (50.318)	Top-5 acc 74.609 (73.396)	lr 0.02154
Train [30][330/3239]	Time 0.246 (0.649)	Data Time 0.001 (0.094)	Loss 3.1117 (3.0797)	Entropy 1.47955 (1.48069)	Top-1 acc 50.391 (50.329)	Top-5 acc 72.266 (73.390)	lr 0.02154
Train [30][340/3239]	Time 2.434 (0.643)	Data Time 0.001 (0.091)	Loss 3.3501 (3.0795)	Entropy 1.47955 (1.48065)	Top-1 acc 42.969 (50.301)	Top-5 acc 68.750 (73.396)	lr 0.02154
Train [30][350/3239]	Time 0.284 (0.631)	Data Time 0.002 (0.088)	Loss 3.0742 (3.0816)	Entropy 1.47951 (1.48062)	Top-1 acc 51.562 (50.262)	Top-5 acc 75.000 (73.372)	lr 0.02154
Train [30][360/3239]	Time 0.245 (0.626)	Data Time 0.001 (0.086)	Loss 2.8924 (3.0824)	Entropy 1.47948 (1.48059)	Top-1 acc 53.125 (50.223)	Top-5 acc 73.438 (73.325)	lr 0.02154
Train [30][370/3239]	Time 0.239 (0.621)	Data Time 0.001 (0.084)	Loss 3.0692 (3.0835)	Entropy 1.47932 (1.48056)	Top-1 acc 53.906 (50.212)	Top-5 acc 72.266 (73.295)	lr 0.02154
Train [30][380/3239]	Time 0.222 (0.617)	Data Time 0.001 (0.081)	Loss 2.8501 (3.0814)	Entropy 1.47924 (1.48052)	Top-1 acc 57.031 (50.280)	Top-5 acc 79.297 (73.339)	lr 0.02154
Train [30][390/3239]	Time 0.218 (0.613)	Data Time 0.001 (0.079)	Loss 2.9999 (3.0820)	Entropy 1.47912 (1.48049)	Top-1 acc 50.000 (50.276)	Top-5 acc 74.219 (73.320)	lr 0.02154
Train [30][400/3239]	Time 0.267 (0.609)	Data Time 0.001 (0.077)	Loss 2.8674 (3.0812)	Entropy 1.47911 (1.48045)	Top-1 acc 56.250 (50.297)	Top-5 acc 78.125 (73.350)	lr 0.02154
Train [30][410/3239]	Time 0.221 (0.605)	Data Time 0.001 (0.076)	Loss 2.8473 (3.0793)	Entropy 1.47913 (1.48042)	Top-1 acc 52.344 (50.341)	Top-5 acc 78.906 (73.383)	lr 0.02154
Train [30][420/3239]	Time 0.219 (0.602)	Data Time 0.001 (0.074)	Loss 3.0512 (3.0792)	Entropy 1.47909 (1.48039)	Top-1 acc 50.000 (50.346)	Top-5 acc 74.609 (73.395)	lr 0.02154
Train [30][430/3239]	Time 0.244 (0.598)	Data Time 0.001 (0.072)	Loss 3.1061 (3.0787)	Entropy 1.47906 (1.48036)	Top-1 acc 46.484 (50.380)	Top-5 acc 71.875 (73.410)	lr 0.02154
Train [30][440/3239]	Time 0.252 (0.595)	Data Time 0.001 (0.071)	Loss 3.1137 (3.0781)	Entropy 1.47902 (1.48033)	Top-1 acc 46.484 (50.377)	Top-5 acc 71.875 (73.424)	lr 0.02154
Train [30][450/3239]	Time 2.411 (0.592)	Data Time 0.001 (0.069)	Loss 3.0497 (3.0772)	Entropy 1.47902 (1.48030)	Top-1 acc 51.953 (50.406)	Top-5 acc 74.219 (73.450)	lr 0.02154
Train [30][460/3239]	Time 0.221 (0.584)	Data Time 0.001 (0.068)	Loss 3.0979 (3.0768)	Entropy 1.47893 (1.48027)	Top-1 acc 48.438 (50.417)	Top-5 acc 73.438 (73.462)	lr 0.02154
Train [30][470/3239]	Time 0.238 (0.581)	Data Time 0.001 (0.066)	Loss 3.1643 (3.0766)	Entropy 1.47893 (1.48024)	Top-1 acc 51.172 (50.407)	Top-5 acc 70.312 (73.451)	lr 0.02153
Train [30][480/3239]	Time 0.219 (0.578)	Data Time 0.001 (0.065)	Loss 3.1225 (3.0758)	Entropy 1.47893 (1.48022)	Top-1 acc 49.219 (50.440)	Top-5 acc 74.219 (73.484)	lr 0.02153
Train [30][490/3239]	Time 0.357 (0.575)	Data Time 0.002 (0.064)	Loss 3.1352 (3.0762)	Entropy 1.47882 (1.48019)	Top-1 acc 49.609 (50.422)	Top-5 acc 73.047 (73.484)	lr 0.02153
Train [30][500/3239]	Time 0.207 (0.573)	Data Time 0.001 (0.062)	Loss 3.1594 (3.0782)	Entropy 1.47874 (1.48016)	Top-1 acc 47.656 (50.402)	Top-5 acc 72.656 (73.434)	lr 0.02153
Train [30][510/3239]	Time 0.226 (0.570)	Data Time 0.001 (0.061)	Loss 2.8459 (3.0776)	Entropy 1.47873 (1.48013)	Top-1 acc 58.594 (50.418)	Top-5 acc 76.953 (73.438)	lr 0.02153
Train [30][520/3239]	Time 0.217 (0.568)	Data Time 0.001 (0.060)	Loss 3.1958 (3.0793)	Entropy 1.47863 (1.48010)	Top-1 acc 44.141 (50.371)	Top-5 acc 70.703 (73.406)	lr 0.02153
Train [30][530/3239]	Time 0.227 (0.565)	Data Time 0.001 (0.059)	Loss 3.0950 (3.0798)	Entropy 1.47855 (1.48008)	Top-1 acc 49.609 (50.355)	Top-5 acc 72.656 (73.399)	lr 0.02153
Train [30][540/3239]	Time 0.509 (0.643)	Data Time 0.005 (0.058)	Loss 2.8999 (3.0791)	Entropy 1.47853 (1.48005)	Top-1 acc 52.734 (50.347)	Top-5 acc 75.391 (73.419)	lr 0.02153
Train [30][550/3239]	Time 0.188 (0.640)	Data Time 0.002 (0.057)	Loss 3.1559 (3.0788)	Entropy 1.47845 (1.48002)	Top-1 acc 51.562 (50.342)	Top-5 acc 69.922 (73.423)	lr 0.02153
Train [30][560/3239]	Time 2.333 (0.637)	Data Time 0.002 (0.056)	Loss 3.2905 (3.0792)	Entropy 1.47845 (1.47999)	Top-1 acc 46.094 (50.328)	Top-5 acc 69.922 (73.424)	lr 0.02153
Train [30][570/3239]	Time 0.202 (0.630)	Data Time 0.001 (0.055)	Loss 2.9132 (3.0794)	Entropy 1.47839 (1.47996)	Top-1 acc 54.297 (50.325)	Top-5 acc 77.344 (73.422)	lr 0.02153
Train [30][580/3239]	Time 0.190 (0.626)	Data Time 0.001 (0.054)	Loss 3.1655 (3.0784)	Entropy 1.47805 (1.47993)	Top-1 acc 48.047 (50.356)	Top-5 acc 73.047 (73.447)	lr 0.02153
Train [30][590/3239]	Time 0.215 (0.623)	Data Time 0.002 (0.053)	Loss 3.0273 (3.0783)	Entropy 1.47791 (1.47990)	Top-1 acc 51.172 (50.344)	Top-5 acc 73.828 (73.437)	lr 0.02153
Train [30][600/3239]	Time 0.324 (0.621)	Data Time 0.001 (0.052)	Loss 3.0180 (3.0784)	Entropy 1.47789 (1.47986)	Top-1 acc 52.344 (50.343)	Top-5 acc 75.391 (73.425)	lr 0.02153
Train [30][610/3239]	Time 0.235 (0.618)	Data Time 0.002 (0.051)	Loss 3.0603 (3.0791)	Entropy 1.47785 (1.47983)	Top-1 acc 50.781 (50.314)	Top-5 acc 75.391 (73.423)	lr 0.02152
Train [30][620/3239]	Time 0.216 (0.615)	Data Time 0.001 (0.051)	Loss 3.1550 (3.0801)	Entropy 1.47776 (1.47980)	Top-1 acc 51.953 (50.320)	Top-5 acc 67.578 (73.397)	lr 0.02152
Train [30][630/3239]	Time 0.201 (0.612)	Data Time 0.001 (0.050)	Loss 2.9721 (3.0808)	Entropy 1.47769 (1.47977)	Top-1 acc 53.906 (50.331)	Top-5 acc 77.734 (73.389)	lr 0.02152
Train [30][640/3239]	Time 0.211 (0.610)	Data Time 0.001 (0.049)	Loss 2.8792 (3.0798)	Entropy 1.47762 (1.47973)	Top-1 acc 55.078 (50.344)	Top-5 acc 76.953 (73.407)	lr 0.02152
Train [30][650/3239]	Time 0.232 (0.607)	Data Time 0.001 (0.048)	Loss 3.1652 (3.0795)	Entropy 1.47752 (1.47970)	Top-1 acc 48.828 (50.360)	Top-5 acc 69.531 (73.411)	lr 0.02152
Train [30][660/3239]	Time 0.348 (0.605)	Data Time 0.007 (0.048)	Loss 3.0351 (3.0790)	Entropy 1.47734 (1.47967)	Top-1 acc 50.391 (50.362)	Top-5 acc 76.172 (73.417)	lr 0.02152
Train [30][670/3239]	Time 2.357 (0.603)	Data Time 0.001 (0.047)	Loss 3.0895 (3.0794)	Entropy 1.47734 (1.47963)	Top-1 acc 52.344 (50.345)	Top-5 acc 72.266 (73.413)	lr 0.02152
Train [30][680/3239]	Time 0.226 (0.597)	Data Time 0.001 (0.046)	Loss 3.2264 (3.0803)	Entropy 1.47726 (1.47960)	Top-1 acc 45.703 (50.327)	Top-5 acc 70.703 (73.380)	lr 0.02152
Train [30][690/3239]	Time 0.219 (0.595)	Data Time 0.001 (0.046)	Loss 3.1567 (3.0801)	Entropy 1.47704 (1.47956)	Top-1 acc 50.000 (50.323)	Top-5 acc 72.656 (73.387)	lr 0.02152
Train [30][700/3239]	Time 0.268 (0.593)	Data Time 0.001 (0.045)	Loss 2.9425 (3.0804)	Entropy 1.47697 (1.47952)	Top-1 acc 48.828 (50.301)	Top-5 acc 76.953 (73.374)	lr 0.02152
Train [30][710/3239]	Time 0.279 (0.591)	Data Time 0.001 (0.044)	Loss 3.2202 (3.0815)	Entropy 1.47688 (1.47949)	Top-1 acc 51.562 (50.289)	Top-5 acc 71.875 (73.360)	lr 0.02152
Train [30][720/3239]	Time 0.307 (0.589)	Data Time 0.001 (0.044)	Loss 3.0999 (3.0812)	Entropy 1.47680 (1.47945)	Top-1 acc 50.000 (50.293)	Top-5 acc 73.047 (73.377)	lr 0.02152
Train [30][730/3239]	Time 0.224 (0.587)	Data Time 0.001 (0.043)	Loss 3.1848 (3.0821)	Entropy 1.47674 (1.47941)	Top-1 acc 48.828 (50.274)	Top-5 acc 71.094 (73.357)	lr 0.02152
Train [30][740/3239]	Time 0.221 (0.585)	Data Time 0.001 (0.043)	Loss 3.0273 (3.0827)	Entropy 1.47666 (1.47938)	Top-1 acc 53.125 (50.267)	Top-5 acc 71.484 (73.337)	lr 0.02152
Train [30][750/3239]	Time 0.204 (0.583)	Data Time 0.001 (0.042)	Loss 2.9096 (3.0823)	Entropy 1.47657 (1.47934)	Top-1 acc 55.078 (50.268)	Top-5 acc 77.734 (73.364)	lr 0.02151
Train [30][760/3239]	Time 0.168 (0.581)	Data Time 0.001 (0.042)	Loss 3.0917 (3.0828)	Entropy 1.47613 (1.47930)	Top-1 acc 50.000 (50.248)	Top-5 acc 72.656 (73.360)	lr 0.02151
Train [30][770/3239]	Time 0.228 (0.579)	Data Time 0.001 (0.041)	Loss 2.9227 (3.0825)	Entropy 1.47604 (1.47926)	Top-1 acc 52.734 (50.260)	Top-5 acc 74.219 (73.369)	lr 0.02151
Train [30][780/3239]	Time 2.268 (0.578)	Data Time 0.001 (0.041)	Loss 3.0297 (3.0828)	Entropy 1.47604 (1.47922)	Top-1 acc 51.172 (50.262)	Top-5 acc 76.562 (73.358)	lr 0.02151
Train [30][790/3239]	Time 0.260 (0.573)	Data Time 0.001 (0.040)	Loss 3.0858 (3.0830)	Entropy 1.47598 (1.47918)	Top-1 acc 48.828 (50.260)	Top-5 acc 74.219 (73.344)	lr 0.02151
Train [30][800/3239]	Time 0.225 (0.572)	Data Time 0.002 (0.040)	Loss 3.0743 (3.0833)	Entropy 1.47596 (1.47914)	Top-1 acc 49.219 (50.257)	Top-5 acc 72.656 (73.340)	lr 0.02151
Train [30][810/3239]	Time 0.213 (0.570)	Data Time 0.001 (0.039)	Loss 2.8801 (3.0826)	Entropy 1.47589 (1.47910)	Top-1 acc 50.781 (50.281)	Top-5 acc 78.125 (73.346)	lr 0.02151
Train [30][820/3239]	Time 0.215 (0.569)	Data Time 0.001 (0.039)	Loss 3.1390 (3.0825)	Entropy 1.47586 (1.47906)	Top-1 acc 49.219 (50.275)	Top-5 acc 71.484 (73.351)	lr 0.02151
Train [30][830/3239]	Time 0.187 (0.567)	Data Time 0.001 (0.038)	Loss 2.9896 (3.0824)	Entropy 1.47579 (1.47902)	Top-1 acc 52.734 (50.282)	Top-5 acc 77.734 (73.366)	lr 0.02151
Train [30][840/3239]	Time 0.203 (0.566)	Data Time 0.001 (0.038)	Loss 2.8237 (3.0822)	Entropy 1.47567 (1.47898)	Top-1 acc 54.297 (50.281)	Top-5 acc 79.688 (73.372)	lr 0.02151
Train [30][850/3239]	Time 0.218 (0.564)	Data Time 0.001 (0.037)	Loss 3.0003 (3.0826)	Entropy 1.47568 (1.47894)	Top-1 acc 49.609 (50.281)	Top-5 acc 73.828 (73.364)	lr 0.02151
Train [30][860/3239]	Time 0.209 (0.563)	Data Time 0.001 (0.037)	Loss 2.9853 (3.0822)	Entropy 1.47560 (1.47891)	Top-1 acc 53.516 (50.287)	Top-5 acc 76.172 (73.359)	lr 0.02151
Train [30][870/3239]	Time 0.225 (0.562)	Data Time 0.001 (0.037)	Loss 3.0129 (3.0824)	Entropy 1.47561 (1.47887)	Top-1 acc 51.562 (50.290)	Top-5 acc 71.094 (73.347)	lr 0.02151
Train [30][880/3239]	Time 0.212 (0.560)	Data Time 0.001 (0.036)	Loss 3.5945 (3.0824)	Entropy 1.47557 (1.47883)	Top-1 acc 40.625 (50.290)	Top-5 acc 63.281 (73.344)	lr 0.02151
Train [30][890/3239]	Time 2.396 (0.559)	Data Time 0.001 (0.036)	Loss 3.1708 (3.0828)	Entropy 1.47557 (1.47879)	Top-1 acc 47.656 (50.282)	Top-5 acc 72.266 (73.334)	lr 0.02151
Train [30][900/3239]	Time 0.204 (0.555)	Data Time 0.001 (0.035)	Loss 3.1802 (3.0830)	Entropy 1.47554 (1.47876)	Top-1 acc 48.828 (50.274)	Top-5 acc 69.531 (73.327)	lr 0.02150
Train [30][910/3239]	Time 0.290 (0.603)	Data Time 0.002 (0.035)	Loss 2.9798 (3.0829)	Entropy 1.47551 (1.47872)	Top-1 acc 53.125 (50.281)	Top-5 acc 72.656 (73.329)	lr 0.02150
Train [30][920/3239]	Time 0.229 (0.601)	Data Time 0.002 (0.035)	Loss 2.9905 (3.0832)	Entropy 1.47537 (1.47869)	Top-1 acc 48.438 (50.261)	Top-5 acc 76.562 (73.326)	lr 0.02150
Train [30][930/3239]	Time 0.213 (0.600)	Data Time 0.001 (0.034)	Loss 2.8936 (3.0826)	Entropy 1.47523 (1.47865)	Top-1 acc 54.688 (50.280)	Top-5 acc 78.125 (73.346)	lr 0.02150
Train [30][940/3239]	Time 0.198 (0.598)	Data Time 0.001 (0.034)	Loss 3.1452 (3.0825)	Entropy 1.47521 (1.47861)	Top-1 acc 50.000 (50.285)	Top-5 acc 70.703 (73.352)	lr 0.02150
Train [30][950/3239]	Time 0.342 (0.596)	Data Time 0.002 (0.034)	Loss 3.1514 (3.0826)	Entropy 1.47516 (1.47858)	Top-1 acc 50.000 (50.285)	Top-5 acc 73.047 (73.355)	lr 0.02150
Train [30][960/3239]	Time 0.210 (0.595)	Data Time 0.001 (0.033)	Loss 3.0504 (3.0823)	Entropy 1.47508 (1.47854)	Top-1 acc 54.297 (50.298)	Top-5 acc 76.562 (73.360)	lr 0.02150
Train [30][970/3239]	Time 0.196 (0.593)	Data Time 0.001 (0.033)	Loss 3.1341 (3.0826)	Entropy 1.47502 (1.47851)	Top-1 acc 49.219 (50.297)	Top-5 acc 72.656 (73.350)	lr 0.02150
Train [30][980/3239]	Time 0.218 (0.592)	Data Time 0.001 (0.033)	Loss 3.1044 (3.0828)	Entropy 1.47484 (1.47847)	Top-1 acc 48.438 (50.298)	Top-5 acc 74.609 (73.346)	lr 0.02150
Train [30][990/3239]	Time 0.269 (0.591)	Data Time 0.001 (0.032)	Loss 3.0424 (3.0829)	Entropy 1.47480 (1.47843)	Top-1 acc 48.828 (50.285)	Top-5 acc 73.828 (73.342)	lr 0.02150
Train [30][1000/3239]	Time 2.472 (0.589)	Data Time 0.001 (0.032)	Loss 3.0912 (3.0830)	Entropy 1.47480 (1.47840)	Top-1 acc 49.219 (50.281)	Top-5 acc 72.656 (73.342)	lr 0.02150
Train [30][1010/3239]	Time 0.258 (0.586)	Data Time 0.001 (0.032)	Loss 3.2228 (3.0835)	Entropy 1.47481 (1.47836)	Top-1 acc 50.000 (50.267)	Top-5 acc 71.094 (73.330)	lr 0.02150
Train [30][1020/3239]	Time 0.134 (0.584)	Data Time 0.001 (0.031)	Loss 3.0265 (3.0834)	Entropy 1.47479 (1.47833)	Top-1 acc 51.172 (50.270)	Top-5 acc 75.781 (73.331)	lr 0.02150
Train [30][1030/3239]	Time 0.199 (0.583)	Data Time 0.001 (0.031)	Loss 3.0319 (3.0830)	Entropy 1.47472 (1.47829)	Top-1 acc 49.219 (50.267)	Top-5 acc 73.438 (73.338)	lr 0.02150
Train [30][1040/3239]	Time 0.234 (0.582)	Data Time 0.001 (0.031)	Loss 3.1776 (3.0837)	Entropy 1.47457 (1.47826)	Top-1 acc 48.438 (50.259)	Top-5 acc 70.312 (73.321)	lr 0.02149
Train [30][1050/3239]	Time 0.204 (0.580)	Data Time 0.001 (0.031)	Loss 2.9849 (3.0836)	Entropy 1.47449 (1.47822)	Top-1 acc 51.562 (50.257)	Top-5 acc 76.172 (73.326)	lr 0.02149
Train [30][1060/3239]	Time 0.333 (0.579)	Data Time 0.001 (0.030)	Loss 3.1102 (3.0835)	Entropy 1.47446 (1.47818)	Top-1 acc 49.609 (50.261)	Top-5 acc 73.047 (73.324)	lr 0.02149
Train [30][1070/3239]	Time 0.213 (0.578)	Data Time 0.001 (0.030)	Loss 3.2475 (3.0834)	Entropy 1.47440 (1.47815)	Top-1 acc 46.484 (50.253)	Top-5 acc 69.531 (73.330)	lr 0.02149
Train [30][1080/3239]	Time 0.216 (0.577)	Data Time 0.001 (0.030)	Loss 3.0440 (3.0840)	Entropy 1.47436 (1.47811)	Top-1 acc 52.734 (50.240)	Top-5 acc 73.828 (73.322)	lr 0.02149
Train [30][1090/3239]	Time 0.218 (0.576)	Data Time 0.001 (0.030)	Loss 3.2063 (3.0845)	Entropy 1.47436 (1.47808)	Top-1 acc 45.703 (50.221)	Top-5 acc 71.484 (73.313)	lr 0.02149
Train [30][1100/3239]	Time 0.221 (0.574)	Data Time 0.001 (0.029)	Loss 3.0925 (3.0847)	Entropy 1.47436 (1.47805)	Top-1 acc 51.172 (50.215)	Top-5 acc 73.438 (73.308)	lr 0.02149
Train [30][1110/3239]	Time 2.352 (0.573)	Data Time 0.001 (0.029)	Loss 2.9891 (3.0847)	Entropy 1.47436 (1.47801)	Top-1 acc 50.000 (50.217)	Top-5 acc 73.828 (73.301)	lr 0.02149
Train [30][1120/3239]	Time 0.313 (0.570)	Data Time 0.001 (0.029)	Loss 3.1645 (3.0845)	Entropy 1.47434 (1.47798)	Top-1 acc 49.219 (50.222)	Top-5 acc 73.047 (73.299)	lr 0.02149
Train [30][1130/3239]	Time 0.199 (0.569)	Data Time 0.001 (0.029)	Loss 3.3222 (3.0847)	Entropy 1.47426 (1.47795)	Top-1 acc 44.141 (50.217)	Top-5 acc 67.969 (73.295)	lr 0.02149
Train [30][1140/3239]	Time 0.212 (0.568)	Data Time 0.001 (0.028)	Loss 3.3901 (3.0850)	Entropy 1.47417 (1.47792)	Top-1 acc 45.312 (50.218)	Top-5 acc 67.578 (73.292)	lr 0.02149
Train [30][1150/3239]	Time 0.213 (0.567)	Data Time 0.002 (0.028)	Loss 3.0909 (3.0849)	Entropy 1.47410 (1.47788)	Top-1 acc 50.000 (50.220)	Top-5 acc 78.125 (73.295)	lr 0.02149
Train [30][1160/3239]	Time 0.208 (0.566)	Data Time 0.001 (0.028)	Loss 3.1707 (3.0843)	Entropy 1.47404 (1.47785)	Top-1 acc 49.609 (50.235)	Top-5 acc 73.828 (73.310)	lr 0.02149
Train [30][1170/3239]	Time 0.209 (0.565)	Data Time 0.001 (0.028)	Loss 3.1811 (3.0843)	Entropy 1.47400 (1.47782)	Top-1 acc 44.531 (50.230)	Top-5 acc 71.484 (73.312)	lr 0.02149
Train [30][1180/3239]	Time 0.221 (0.564)	Data Time 0.001 (0.027)	Loss 3.1947 (3.0842)	Entropy 1.47396 (1.47778)	Top-1 acc 47.266 (50.234)	Top-5 acc 70.312 (73.309)	lr 0.02148
Train [30][1190/3239]	Time 0.182 (0.563)	Data Time 0.001 (0.027)	Loss 3.0964 (3.0840)	Entropy 1.47388 (1.47775)	Top-1 acc 49.219 (50.247)	Top-5 acc 73.438 (73.310)	lr 0.02148
Train [30][1200/3239]	Time 0.204 (0.562)	Data Time 0.001 (0.027)	Loss 3.0074 (3.0838)	Entropy 1.47385 (1.47772)	Top-1 acc 50.391 (50.256)	Top-5 acc 75.781 (73.311)	lr 0.02148
Train [30][1210/3239]	Time 0.208 (0.561)	Data Time 0.001 (0.027)	Loss 2.9721 (3.0841)	Entropy 1.47376 (1.47769)	Top-1 acc 54.297 (50.250)	Top-5 acc 75.781 (73.311)	lr 0.02148
Train [30][1220/3239]	Time 2.420 (0.560)	Data Time 0.001 (0.027)	Loss 3.0469 (3.0841)	Entropy 1.47376 (1.47766)	Top-1 acc 48.047 (50.243)	Top-5 acc 76.172 (73.311)	lr 0.02148
Train [30][1230/3239]	Time 0.284 (0.557)	Data Time 0.002 (0.026)	Loss 3.1374 (3.0847)	Entropy 1.47366 (1.47762)	Top-1 acc 49.219 (50.236)	Top-5 acc 71.875 (73.302)	lr 0.02148
Train [30][1240/3239]	Time 0.205 (0.556)	Data Time 0.001 (0.026)	Loss 3.0337 (3.0846)	Entropy 1.47364 (1.47759)	Top-1 acc 50.781 (50.242)	Top-5 acc 75.000 (73.303)	lr 0.02148
Train [30][1250/3239]	Time 0.253 (0.555)	Data Time 0.002 (0.026)	Loss 2.9403 (3.0844)	Entropy 1.47362 (1.47756)	Top-1 acc 49.609 (50.247)	Top-5 acc 78.516 (73.302)	lr 0.02148
Train [30][1260/3239]	Time 0.231 (0.554)	Data Time 0.002 (0.026)	Loss 3.0453 (3.0848)	Entropy 1.47352 (1.47753)	Top-1 acc 47.266 (50.232)	Top-5 acc 75.000 (73.297)	lr 0.02148
Train [30][1270/3239]	Time 0.253 (0.585)	Data Time 0.002 (0.026)	Loss 3.1543 (3.0846)	Entropy 1.47343 (1.47750)	Top-1 acc 51.562 (50.247)	Top-5 acc 70.312 (73.297)	lr 0.02148
Train [30][1280/3239]	Time 0.177 (0.585)	Data Time 0.002 (0.025)	Loss 2.9485 (3.0847)	Entropy 1.47340 (1.47746)	Top-1 acc 54.688 (50.238)	Top-5 acc 78.125 (73.296)	lr 0.02148
Train [30][1290/3239]	Time 0.349 (0.584)	Data Time 0.003 (0.025)	Loss 2.8465 (3.0843)	Entropy 1.47334 (1.47743)	Top-1 acc 55.078 (50.251)	Top-5 acc 76.562 (73.299)	lr 0.02148
Train [30][1300/3239]	Time 0.139 (0.583)	Data Time 0.001 (0.025)	Loss 3.0296 (3.0840)	Entropy 1.47328 (1.47740)	Top-1 acc 51.172 (50.252)	Top-5 acc 76.562 (73.306)	lr 0.02148
Train [30][1310/3239]	Time 0.223 (0.582)	Data Time 0.001 (0.025)	Loss 3.5282 (3.0841)	Entropy 1.47314 (1.47737)	Top-1 acc 43.750 (50.252)	Top-5 acc 63.672 (73.305)	lr 0.02148
Train [30][1320/3239]	Time 0.217 (0.581)	Data Time 0.001 (0.025)	Loss 3.1446 (3.0844)	Entropy 1.47304 (1.47734)	Top-1 acc 46.875 (50.250)	Top-5 acc 71.484 (73.293)	lr 0.02147
Train [30][1330/3239]	Time 2.472 (0.580)	Data Time 0.001 (0.025)	Loss 3.0672 (3.0845)	Entropy 1.47304 (1.47730)	Top-1 acc 53.516 (50.247)	Top-5 acc 74.609 (73.293)	lr 0.02147
Train [30][1340/3239]	Time 0.217 (0.577)	Data Time 0.001 (0.024)	Loss 3.0579 (3.0844)	Entropy 1.47293 (1.47727)	Top-1 acc 46.875 (50.250)	Top-5 acc 73.047 (73.294)	lr 0.02147
Train [30][1350/3239]	Time 0.309 (0.576)	Data Time 0.001 (0.024)	Loss 3.1064 (3.0852)	Entropy 1.47273 (1.47724)	Top-1 acc 48.828 (50.235)	Top-5 acc 74.609 (73.281)	lr 0.02147
Train [30][1360/3239]	Time 0.222 (0.575)	Data Time 0.001 (0.024)	Loss 3.1738 (3.0853)	Entropy 1.47270 (1.47720)	Top-1 acc 51.953 (50.240)	Top-5 acc 70.312 (73.269)	lr 0.02147
Train [30][1370/3239]	Time 0.216 (0.574)	Data Time 0.001 (0.024)	Loss 3.1799 (3.0855)	Entropy 1.47265 (1.47717)	Top-1 acc 48.438 (50.230)	Top-5 acc 72.656 (73.263)	lr 0.02147
Train [30][1380/3239]	Time 0.199 (0.573)	Data Time 0.001 (0.024)	Loss 3.2366 (3.0855)	Entropy 1.47262 (1.47714)	Top-1 acc 44.922 (50.226)	Top-5 acc 69.141 (73.264)	lr 0.02147
Train [30][1390/3239]	Time 0.216 (0.572)	Data Time 0.002 (0.024)	Loss 3.1840 (3.0859)	Entropy 1.47250 (1.47711)	Top-1 acc 48.828 (50.215)	Top-5 acc 70.703 (73.262)	lr 0.02147
Train [30][1400/3239]	Time 0.155 (0.571)	Data Time 0.001 (0.024)	Loss 3.2796 (3.0860)	Entropy 1.47243 (1.47707)	Top-1 acc 48.047 (50.209)	Top-5 acc 67.578 (73.258)	lr 0.02147
Train [30][1410/3239]	Time 0.343 (0.570)	Data Time 0.001 (0.023)	Loss 3.2223 (3.0864)	Entropy 1.47227 (1.47704)	Top-1 acc 47.266 (50.197)	Top-5 acc 69.531 (73.249)	lr 0.02147
Train [30][1420/3239]	Time 0.250 (0.569)	Data Time 0.002 (0.023)	Loss 3.1189 (3.0866)	Entropy 1.47224 (1.47701)	Top-1 acc 48.047 (50.201)	Top-5 acc 75.781 (73.247)	lr 0.02147
Train [30][1430/3239]	Time 0.245 (0.569)	Data Time 0.001 (0.023)	Loss 3.2741 (3.0866)	Entropy 1.47219 (1.47697)	Top-1 acc 48.047 (50.199)	Top-5 acc 68.359 (73.242)	lr 0.02147
Train [30][1440/3239]	Time 2.396 (0.568)	Data Time 0.001 (0.023)	Loss 3.0626 (3.0864)	Entropy 1.47219 (1.47694)	Top-1 acc 53.125 (50.205)	Top-5 acc 73.828 (73.247)	lr 0.02147
Train [30][1450/3239]	Time 0.280 (0.566)	Data Time 0.001 (0.023)	Loss 3.0238 (3.0865)	Entropy 1.47215 (1.47691)	Top-1 acc 50.781 (50.211)	Top-5 acc 73.438 (73.245)	lr 0.02147
Train [30][1460/3239]	Time 0.241 (0.565)	Data Time 0.001 (0.023)	Loss 2.8747 (3.0865)	Entropy 1.47213 (1.47687)	Top-1 acc 54.297 (50.211)	Top-5 acc 78.125 (73.244)	lr 0.02147
Train [30][1470/3239]	Time 0.208 (0.564)	Data Time 0.001 (0.022)	Loss 3.0488 (3.0861)	Entropy 1.47208 (1.47684)	Top-1 acc 50.000 (50.224)	Top-5 acc 73.047 (73.246)	lr 0.02146
Train [30][1480/3239]	Time 0.211 (0.563)	Data Time 0.001 (0.022)	Loss 3.1015 (3.0856)	Entropy 1.47202 (1.47681)	Top-1 acc 50.391 (50.237)	Top-5 acc 73.438 (73.257)	lr 0.02146
Train [30][1490/3239]	Time 0.214 (0.562)	Data Time 0.001 (0.022)	Loss 2.8151 (3.0852)	Entropy 1.47202 (1.47678)	Top-1 acc 57.422 (50.241)	Top-5 acc 78.125 (73.267)	lr 0.02146
Train [30][1500/3239]	Time 0.221 (0.562)	Data Time 0.001 (0.022)	Loss 2.9702 (3.0851)	Entropy 1.47200 (1.47674)	Top-1 acc 50.781 (50.247)	Top-5 acc 80.469 (73.275)	lr 0.02146
Train [30][1510/3239]	Time 0.254 (0.561)	Data Time 0.001 (0.022)	Loss 3.2053 (3.0856)	Entropy 1.47197 (1.47671)	Top-1 acc 46.484 (50.235)	Top-5 acc 71.875 (73.268)	lr 0.02146
Train [30][1520/3239]	Time 0.316 (0.560)	Data Time 0.001 (0.022)	Loss 3.3063 (3.0860)	Entropy 1.47193 (1.47668)	Top-1 acc 48.047 (50.228)	Top-5 acc 68.750 (73.268)	lr 0.02146
Train [30][1530/3239]	Time 0.235 (0.559)	Data Time 0.002 (0.022)	Loss 3.1857 (3.0862)	Entropy 1.47188 (1.47665)	Top-1 acc 53.516 (50.230)	Top-5 acc 71.484 (73.268)	lr 0.02146
Train [30][1540/3239]	Time 0.253 (0.559)	Data Time 0.001 (0.022)	Loss 3.1283 (3.0862)	Entropy 1.47179 (1.47662)	Top-1 acc 48.828 (50.234)	Top-5 acc 71.094 (73.268)	lr 0.02146
Train [30][1550/3239]	Time 2.415 (0.558)	Data Time 0.002 (0.021)	Loss 3.2402 (3.0863)	Entropy 1.47179 (1.47659)	Top-1 acc 43.750 (50.225)	Top-5 acc 71.484 (73.268)	lr 0.02146
Train [30][1560/3239]	Time 0.244 (0.556)	Data Time 0.002 (0.021)	Loss 3.0853 (3.0859)	Entropy 1.47166 (1.47656)	Top-1 acc 51.172 (50.231)	Top-5 acc 72.656 (73.271)	lr 0.02146
Train [30][1570/3239]	Time 0.197 (0.555)	Data Time 0.001 (0.021)	Loss 3.2642 (3.0862)	Entropy 1.47159 (1.47653)	Top-1 acc 46.875 (50.222)	Top-5 acc 68.750 (73.266)	lr 0.02146
Train [30][1580/3239]	Time 0.203 (0.554)	Data Time 0.001 (0.021)	Loss 3.2846 (3.0865)	Entropy 1.47151 (1.47649)	Top-1 acc 44.531 (50.215)	Top-5 acc 69.531 (73.255)	lr 0.02146
Train [30][1590/3239]	Time 0.220 (0.554)	Data Time 0.001 (0.021)	Loss 2.9464 (3.0863)	Entropy 1.47140 (1.47646)	Top-1 acc 50.781 (50.224)	Top-5 acc 77.734 (73.259)	lr 0.02146
Train [30][1600/3239]	Time 0.214 (0.553)	Data Time 0.001 (0.021)	Loss 3.0333 (3.0863)	Entropy 1.47135 (1.47643)	Top-1 acc 56.641 (50.224)	Top-5 acc 71.875 (73.261)	lr 0.02146
Train [30][1610/3239]	Time 0.225 (0.552)	Data Time 0.001 (0.021)	Loss 2.9962 (3.0863)	Entropy 1.47107 (1.47640)	Top-1 acc 52.344 (50.218)	Top-5 acc 77.344 (73.260)	lr 0.02145
Train [30][1620/3239]	Time 0.218 (0.552)	Data Time 0.001 (0.021)	Loss 3.2498 (3.0866)	Entropy 1.47111 (1.47637)	Top-1 acc 48.047 (50.217)	Top-5 acc 69.531 (73.252)	lr 0.02145
Train [30][1630/3239]	Time 0.407 (0.575)	Data Time 0.003 (0.020)	Loss 3.1664 (3.0868)	Entropy 1.47104 (1.47633)	Top-1 acc 50.781 (50.215)	Top-5 acc 73.047 (73.251)	lr 0.02145
Train [30][1640/3239]	Time 0.217 (0.576)	Data Time 0.002 (0.020)	Loss 3.1995 (3.0867)	Entropy 1.47102 (1.47630)	Top-1 acc 46.875 (50.217)	Top-5 acc 70.312 (73.253)	lr 0.02145
Train [30][1650/3239]	Time 0.275 (0.575)	Data Time 0.002 (0.020)	Loss 3.0272 (3.0863)	Entropy 1.47096 (1.47627)	Top-1 acc 53.906 (50.224)	Top-5 acc 76.562 (73.262)	lr 0.02145
Train [30][1660/3239]	Time 2.372 (0.574)	Data Time 0.004 (0.020)	Loss 3.1491 (3.0865)	Entropy 1.47096 (1.47624)	Top-1 acc 51.562 (50.222)	Top-5 acc 71.875 (73.261)	lr 0.02145
Train [30][1670/3239]	Time 0.214 (0.572)	Data Time 0.001 (0.020)	Loss 3.2422 (3.0867)	Entropy 1.47071 (1.47620)	Top-1 acc 46.484 (50.220)	Top-5 acc 67.969 (73.257)	lr 0.02145
Train [30][1680/3239]	Time 0.260 (0.571)	Data Time 0.001 (0.020)	Loss 3.0462 (3.0867)	Entropy 1.47069 (1.47617)	Top-1 acc 50.781 (50.218)	Top-5 acc 73.828 (73.256)	lr 0.02145
Train [30][1690/3239]	Time 0.342 (0.570)	Data Time 0.001 (0.020)	Loss 3.2115 (3.0869)	Entropy 1.47068 (1.47614)	Top-1 acc 50.000 (50.214)	Top-5 acc 71.484 (73.252)	lr 0.02145
Train [30][1700/3239]	Time 0.148 (0.570)	Data Time 0.001 (0.020)	Loss 3.1233 (3.0869)	Entropy 1.47070 (1.47611)	Top-1 acc 48.438 (50.215)	Top-5 acc 70.312 (73.250)	lr 0.02145
Train [30][1710/3239]	Time 0.237 (0.569)	Data Time 0.001 (0.020)	Loss 3.0242 (3.0866)	Entropy 1.47068 (1.47607)	Top-1 acc 49.609 (50.216)	Top-5 acc 76.562 (73.260)	lr 0.02145
Train [30][1720/3239]	Time 0.219 (0.568)	Data Time 0.001 (0.020)	Loss 3.1793 (3.0867)	Entropy 1.47061 (1.47604)	Top-1 acc 43.750 (50.211)	Top-5 acc 70.312 (73.259)	lr 0.02145
Train [30][1730/3239]	Time 0.235 (0.568)	Data Time 0.001 (0.019)	Loss 3.2867 (3.0867)	Entropy 1.47061 (1.47601)	Top-1 acc 46.094 (50.210)	Top-5 acc 69.531 (73.259)	lr 0.02145
Train [30][1740/3239]	Time 0.162 (0.567)	Data Time 0.001 (0.019)	Loss 3.1226 (3.0871)	Entropy 1.47064 (1.47598)	Top-1 acc 48.828 (50.200)	Top-5 acc 72.656 (73.246)	lr 0.02145
Train [30][1750/3239]	Time 0.343 (0.566)	Data Time 0.001 (0.019)	Loss 3.1636 (3.0870)	Entropy 1.47063 (1.47595)	Top-1 acc 50.000 (50.204)	Top-5 acc 72.266 (73.248)	lr 0.02144
Train [30][1760/3239]	Time 0.170 (0.565)	Data Time 0.001 (0.019)	Loss 2.8943 (3.0870)	Entropy 1.47061 (1.47592)	Top-1 acc 53.906 (50.203)	Top-5 acc 78.125 (73.249)	lr 0.02144
Train [30][1770/3239]	Time 2.340 (0.565)	Data Time 0.002 (0.019)	Loss 3.1140 (3.0874)	Entropy 1.47061 (1.47589)	Top-1 acc 50.781 (50.191)	Top-5 acc 71.875 (73.239)	lr 0.02144
Train [30][1780/3239]	Time 0.225 (0.563)	Data Time 0.001 (0.019)	Loss 3.2278 (3.0871)	Entropy 1.47051 (1.47586)	Top-1 acc 48.828 (50.203)	Top-5 acc 69.141 (73.242)	lr 0.02144
Train [30][1790/3239]	Time 0.245 (0.562)	Data Time 0.001 (0.019)	Loss 3.0369 (3.0871)	Entropy 1.47046 (1.47583)	Top-1 acc 48.438 (50.204)	Top-5 acc 74.219 (73.245)	lr 0.02144
Train [30][1800/3239]	Time 0.236 (0.562)	Data Time 0.001 (0.019)	Loss 3.3071 (3.0871)	Entropy 1.47036 (1.47580)	Top-1 acc 43.750 (50.201)	Top-5 acc 70.703 (73.250)	lr 0.02144
Train [30][1810/3239]	Time 0.308 (0.561)	Data Time 0.001 (0.019)	Loss 3.0814 (3.0871)	Entropy 1.47019 (1.47577)	Top-1 acc 49.609 (50.202)	Top-5 acc 76.172 (73.248)	lr 0.02144
Train [30][1820/3239]	Time 0.225 (0.560)	Data Time 0.001 (0.019)	Loss 2.9319 (3.0870)	Entropy 1.47011 (1.47574)	Top-1 acc 52.344 (50.203)	Top-5 acc 79.297 (73.255)	lr 0.02144
Train [30][1830/3239]	Time 0.228 (0.560)	Data Time 0.001 (0.018)	Loss 3.0708 (3.0872)	Entropy 1.46997 (1.47571)	Top-1 acc 49.609 (50.204)	Top-5 acc 74.219 (73.245)	lr 0.02144
Train [30][1840/3239]	Time 0.190 (0.559)	Data Time 0.001 (0.018)	Loss 3.2376 (3.0876)	Entropy 1.46993 (1.47568)	Top-1 acc 46.094 (50.200)	Top-5 acc 70.312 (73.236)	lr 0.02144
Train [30][1850/3239]	Time 0.196 (0.559)	Data Time 0.001 (0.018)	Loss 3.1255 (3.0875)	Entropy 1.46977 (1.47564)	Top-1 acc 50.391 (50.206)	Top-5 acc 71.484 (73.242)	lr 0.02144
Train [30][1860/3239]	Time 0.311 (0.558)	Data Time 0.001 (0.018)	Loss 2.8356 (3.0873)	Entropy 1.46974 (1.47561)	Top-1 acc 54.297 (50.206)	Top-5 acc 77.344 (73.249)	lr 0.02144
Train [30][1870/3239]	Time 0.217 (0.557)	Data Time 0.001 (0.018)	Loss 3.0805 (3.0873)	Entropy 1.46962 (1.47558)	Top-1 acc 50.781 (50.201)	Top-5 acc 73.047 (73.246)	lr 0.02144
Train [30][1880/3239]	Time 2.333 (0.557)	Data Time 0.001 (0.018)	Loss 2.9592 (3.0871)	Entropy 1.46962 (1.47555)	Top-1 acc 48.828 (50.205)	Top-5 acc 75.781 (73.253)	lr 0.02144
Train [30][1890/3239]	Time 0.204 (0.555)	Data Time 0.001 (0.018)	Loss 3.1038 (3.0873)	Entropy 1.46945 (1.47552)	Top-1 acc 46.875 (50.198)	Top-5 acc 72.266 (73.251)	lr 0.02143
Train [30][1900/3239]	Time 0.258 (0.554)	Data Time 0.001 (0.018)	Loss 3.2418 (3.0875)	Entropy 1.46942 (1.47549)	Top-1 acc 48.828 (50.192)	Top-5 acc 65.625 (73.242)	lr 0.02143
Train [30][1910/3239]	Time 0.211 (0.554)	Data Time 0.001 (0.018)	Loss 3.0529 (3.0881)	Entropy 1.46934 (1.47545)	Top-1 acc 52.344 (50.184)	Top-5 acc 73.047 (73.230)	lr 0.02143
Train [30][1920/3239]	Time 0.300 (0.553)	Data Time 0.001 (0.018)	Loss 3.0553 (3.0881)	Entropy 1.46924 (1.47542)	Top-1 acc 49.609 (50.185)	Top-5 acc 75.781 (73.229)	lr 0.02143
Train [30][1930/3239]	Time 0.216 (0.553)	Data Time 0.001 (0.018)	Loss 3.0975 (3.0882)	Entropy 1.46923 (1.47539)	Top-1 acc 48.438 (50.181)	Top-5 acc 75.391 (73.230)	lr 0.02143
Train [30][1940/3239]	Time 0.222 (0.552)	Data Time 0.001 (0.018)	Loss 3.1822 (3.0882)	Entropy 1.46909 (1.47536)	Top-1 acc 47.656 (50.181)	Top-5 acc 71.875 (73.229)	lr 0.02143
Train [30][1950/3239]	Time 0.188 (0.551)	Data Time 0.001 (0.017)	Loss 3.0522 (3.0884)	Entropy 1.46901 (1.47532)	Top-1 acc 48.047 (50.177)	Top-5 acc 72.656 (73.223)	lr 0.02143
Train [30][1960/3239]	Time 0.225 (0.551)	Data Time 0.001 (0.017)	Loss 3.0768 (3.0885)	Entropy 1.46884 (1.47529)	Top-1 acc 48.047 (50.172)	Top-5 acc 73.438 (73.221)	lr 0.02143
Train [30][1970/3239]	Time 0.229 (0.550)	Data Time 0.001 (0.017)	Loss 3.2509 (3.0886)	Entropy 1.46875 (1.47526)	Top-1 acc 44.922 (50.172)	Top-5 acc 66.406 (73.219)	lr 0.02143
Train [30][1980/3239]	Time 0.209 (0.550)	Data Time 0.001 (0.017)	Loss 3.2191 (3.0890)	Entropy 1.46873 (1.47523)	Top-1 acc 48.828 (50.163)	Top-5 acc 71.875 (73.212)	lr 0.02143
Train [30][1990/3239]	Time 46.330 (0.571)	Data Time 0.001 (0.017)	Loss 3.0240 (3.0889)	Entropy 1.46873 (1.47519)	Top-1 acc 54.297 (50.165)	Top-5 acc 76.953 (73.215)	lr 0.02143
Train [30][2000/3239]	Time 0.248 (0.570)	Data Time 0.002 (0.017)	Loss 3.0904 (3.0888)	Entropy 1.46871 (1.47516)	Top-1 acc 49.219 (50.170)	Top-5 acc 74.219 (73.215)	lr 0.02143
Train [30][2010/3239]	Time 0.221 (0.569)	Data Time 0.002 (0.017)	Loss 3.0001 (3.0886)	Entropy 1.46870 (1.47513)	Top-1 acc 53.906 (50.174)	Top-5 acc 73.047 (73.217)	lr 0.02143
Train [30][2020/3239]	Time 0.229 (0.569)	Data Time 0.002 (0.017)	Loss 3.1513 (3.0885)	Entropy 1.46870 (1.47510)	Top-1 acc 46.875 (50.167)	Top-5 acc 73.438 (73.222)	lr 0.02143
Train [30][2030/3239]	Time 0.302 (0.568)	Data Time 0.001 (0.017)	Loss 2.8459 (3.0885)	Entropy 1.46861 (1.47507)	Top-1 acc 55.078 (50.163)	Top-5 acc 77.344 (73.222)	lr 0.02142
Train [30][2040/3239]	Time 0.223 (0.567)	Data Time 0.001 (0.017)	Loss 3.0379 (3.0889)	Entropy 1.46858 (1.47503)	Top-1 acc 52.344 (50.157)	Top-5 acc 74.219 (73.214)	lr 0.02142
Train [30][2050/3239]	Time 0.221 (0.567)	Data Time 0.001 (0.017)	Loss 2.9773 (3.0889)	Entropy 1.46850 (1.47500)	Top-1 acc 52.344 (50.157)	Top-5 acc 76.562 (73.210)	lr 0.02142
Train [30][2060/3239]	Time 0.220 (0.566)	Data Time 0.004 (0.017)	Loss 3.1035 (3.0890)	Entropy 1.46843 (1.47497)	Top-1 acc 51.562 (50.158)	Top-5 acc 69.141 (73.207)	lr 0.02142
Train [30][2070/3239]	Time 0.200 (0.566)	Data Time 0.001 (0.017)	Loss 3.1359 (3.0888)	Entropy 1.46839 (1.47494)	Top-1 acc 46.875 (50.164)	Top-5 acc 68.750 (73.208)	lr 0.02142
Train [30][2080/3239]	Time 0.253 (0.565)	Data Time 0.002 (0.016)	Loss 3.0302 (3.0887)	Entropy 1.46837 (1.47491)	Top-1 acc 51.953 (50.163)	Top-5 acc 75.000 (73.209)	lr 0.02142
Train [30][2090/3239]	Time 0.228 (0.565)	Data Time 0.001 (0.016)	Loss 2.9021 (3.0888)	Entropy 1.46819 (1.47488)	Top-1 acc 55.469 (50.161)	Top-5 acc 77.344 (73.209)	lr 0.02142
Train [30][2100/3239]	Time 2.339 (0.564)	Data Time 0.001 (0.016)	Loss 2.9018 (3.0886)	Entropy 1.46819 (1.47484)	Top-1 acc 51.953 (50.165)	Top-5 acc 76.953 (73.208)	lr 0.02142
Train [30][2110/3239]	Time 0.211 (0.562)	Data Time 0.002 (0.016)	Loss 3.1879 (3.0886)	Entropy 1.46809 (1.47481)	Top-1 acc 44.531 (50.161)	Top-5 acc 75.000 (73.209)	lr 0.02142
Train [30][2120/3239]	Time 0.215 (0.562)	Data Time 0.001 (0.016)	Loss 3.0694 (3.0884)	Entropy 1.46789 (1.47478)	Top-1 acc 46.094 (50.158)	Top-5 acc 73.047 (73.213)	lr 0.02142
Train [30][2130/3239]	Time 0.228 (0.561)	Data Time 0.001 (0.016)	Loss 3.1351 (3.0884)	Entropy 1.46780 (1.47475)	Top-1 acc 49.609 (50.160)	Top-5 acc 70.703 (73.214)	lr 0.02142
Train [30][2140/3239]	Time 0.324 (0.561)	Data Time 0.001 (0.016)	Loss 3.0252 (3.0884)	Entropy 1.46779 (1.47471)	Top-1 acc 51.953 (50.160)	Top-5 acc 73.828 (73.214)	lr 0.02142
Train [30][2150/3239]	Time 0.211 (0.560)	Data Time 0.001 (0.016)	Loss 3.1379 (3.0885)	Entropy 1.46776 (1.47468)	Top-1 acc 49.219 (50.163)	Top-5 acc 72.656 (73.214)	lr 0.02142
Train [30][2160/3239]	Time 0.202 (0.560)	Data Time 0.001 (0.016)	Loss 2.9185 (3.0885)	Entropy 1.46766 (1.47465)	Top-1 acc 52.734 (50.163)	Top-5 acc 76.562 (73.212)	lr 0.02142
Train [30][2170/3239]	Time 0.190 (0.559)	Data Time 0.001 (0.016)	Loss 3.1937 (3.0883)	Entropy 1.46756 (1.47462)	Top-1 acc 47.266 (50.170)	Top-5 acc 69.141 (73.217)	lr 0.02141
Train [30][2180/3239]	Time 0.207 (0.559)	Data Time 0.001 (0.016)	Loss 3.2680 (3.0887)	Entropy 1.46762 (1.47459)	Top-1 acc 45.703 (50.166)	Top-5 acc 64.062 (73.203)	lr 0.02141
Train [30][2190/3239]	Time 0.229 (0.558)	Data Time 0.001 (0.016)	Loss 2.9160 (3.0883)	Entropy 1.46756 (1.47455)	Top-1 acc 53.906 (50.175)	Top-5 acc 75.391 (73.211)	lr 0.02141
Train [30][2200/3239]	Time 0.307 (0.558)	Data Time 0.001 (0.016)	Loss 3.1379 (3.0884)	Entropy 1.46750 (1.47452)	Top-1 acc 49.219 (50.170)	Top-5 acc 72.656 (73.210)	lr 0.02141
Train [30][2210/3239]	Time 2.490 (0.557)	Data Time 0.001 (0.016)	Loss 2.9473 (3.0883)	Entropy 1.46750 (1.47449)	Top-1 acc 58.594 (50.168)	Top-5 acc 73.438 (73.213)	lr 0.02141
Train [30][2220/3239]	Time 0.220 (0.556)	Data Time 0.001 (0.016)	Loss 2.9273 (3.0880)	Entropy 1.46751 (1.47446)	Top-1 acc 53.516 (50.178)	Top-5 acc 77.734 (73.222)	lr 0.02141
Train [30][2230/3239]	Time 0.215 (0.555)	Data Time 0.001 (0.015)	Loss 2.9870 (3.0881)	Entropy 1.46711 (1.47443)	Top-1 acc 53.906 (50.176)	Top-5 acc 76.953 (73.220)	lr 0.02141
Train [30][2240/3239]	Time 0.218 (0.555)	Data Time 0.002 (0.015)	Loss 3.2746 (3.0881)	Entropy 1.46701 (1.47439)	Top-1 acc 47.656 (50.178)	Top-5 acc 71.484 (73.222)	lr 0.02141
Train [30][2250/3239]	Time 0.332 (0.554)	Data Time 0.001 (0.015)	Loss 3.1748 (3.0880)	Entropy 1.46683 (1.47436)	Top-1 acc 50.000 (50.183)	Top-5 acc 71.484 (73.222)	lr 0.02141
Train [30][2260/3239]	Time 0.213 (0.554)	Data Time 0.001 (0.015)	Loss 3.1077 (3.0883)	Entropy 1.46681 (1.47433)	Top-1 acc 52.344 (50.176)	Top-5 acc 75.781 (73.217)	lr 0.02141
Train [30][2270/3239]	Time 0.243 (0.553)	Data Time 0.002 (0.015)	Loss 3.1123 (3.0882)	Entropy 1.46669 (1.47429)	Top-1 acc 50.781 (50.176)	Top-5 acc 71.875 (73.219)	lr 0.02141
Train [30][2280/3239]	Time 0.214 (0.553)	Data Time 0.001 (0.015)	Loss 3.0353 (3.0881)	Entropy 1.46665 (1.47426)	Top-1 acc 50.000 (50.179)	Top-5 acc 74.219 (73.223)	lr 0.02141
Train [30][2290/3239]	Time 0.216 (0.552)	Data Time 0.001 (0.015)	Loss 3.0675 (3.0881)	Entropy 1.46686 (1.47423)	Top-1 acc 54.688 (50.183)	Top-5 acc 74.219 (73.228)	lr 0.02141
Train [30][2300/3239]	Time 0.261 (0.552)	Data Time 0.001 (0.015)	Loss 3.1001 (3.0880)	Entropy 1.46683 (1.47419)	Top-1 acc 48.828 (50.187)	Top-5 acc 71.484 (73.227)	lr 0.02141
Train [30][2310/3239]	Time 0.350 (0.552)	Data Time 0.001 (0.015)	Loss 3.0860 (3.0880)	Entropy 1.46674 (1.47416)	Top-1 acc 48.438 (50.185)	Top-5 acc 71.484 (73.228)	lr 0.02141
Train [30][2320/3239]	Time 2.403 (0.551)	Data Time 0.001 (0.015)	Loss 3.2452 (3.0879)	Entropy 1.46674 (1.47413)	Top-1 acc 44.531 (50.183)	Top-5 acc 69.141 (73.229)	lr 0.02140
Train [30][2330/3239]	Time 0.214 (0.550)	Data Time 0.001 (0.015)	Loss 3.0548 (3.0880)	Entropy 1.46678 (1.47410)	Top-1 acc 50.391 (50.178)	Top-5 acc 74.609 (73.228)	lr 0.02140
Train [30][2340/3239]	Time 0.198 (0.549)	Data Time 0.001 (0.015)	Loss 3.0093 (3.0880)	Entropy 1.46676 (1.47407)	Top-1 acc 50.781 (50.177)	Top-5 acc 75.781 (73.230)	lr 0.02140
Train [30][2350/3239]	Time 0.217 (0.549)	Data Time 0.001 (0.015)	Loss 3.0961 (3.0883)	Entropy 1.46665 (1.47404)	Top-1 acc 50.391 (50.170)	Top-5 acc 70.312 (73.223)	lr 0.02140
Train [30][2360/3239]	Time 0.316 (0.566)	Data Time 0.003 (0.015)	Loss 3.3138 (3.0885)	Entropy 1.46666 (1.47400)	Top-1 acc 45.703 (50.167)	Top-5 acc 70.703 (73.219)	lr 0.02140
Train [30][2370/3239]	Time 0.207 (0.566)	Data Time 0.002 (0.015)	Loss 2.9860 (3.0885)	Entropy 1.46664 (1.47397)	Top-1 acc 53.125 (50.166)	Top-5 acc 78.516 (73.224)	lr 0.02140
Train [30][2380/3239]	Time 0.210 (0.565)	Data Time 0.001 (0.015)	Loss 3.0788 (3.0883)	Entropy 1.46666 (1.47394)	Top-1 acc 47.656 (50.168)	Top-5 acc 73.828 (73.223)	lr 0.02140
Train [30][2390/3239]	Time 0.221 (0.565)	Data Time 0.001 (0.015)	Loss 2.9487 (3.0885)	Entropy 1.46657 (1.47391)	Top-1 acc 52.734 (50.168)	Top-5 acc 75.000 (73.221)	lr 0.02140
Train [30][2400/3239]	Time 0.215 (0.564)	Data Time 0.002 (0.014)	Loss 3.1864 (3.0886)	Entropy 1.46656 (1.47388)	Top-1 acc 48.828 (50.168)	Top-5 acc 71.484 (73.219)	lr 0.02140
Train [30][2410/3239]	Time 0.216 (0.564)	Data Time 0.001 (0.014)	Loss 3.2658 (3.0888)	Entropy 1.46646 (1.47385)	Top-1 acc 45.703 (50.169)	Top-5 acc 67.969 (73.213)	lr 0.02140
Train [30][2420/3239]	Time 0.234 (0.564)	Data Time 0.003 (0.014)	Loss 3.3771 (3.0890)	Entropy 1.46641 (1.47382)	Top-1 acc 48.828 (50.168)	Top-5 acc 67.188 (73.209)	lr 0.02140
Train [30][2430/3239]	Time 2.397 (0.563)	Data Time 0.001 (0.014)	Loss 3.1274 (3.0891)	Entropy 1.46641 (1.47379)	Top-1 acc 49.219 (50.165)	Top-5 acc 72.656 (73.207)	lr 0.02140
Train [30][2440/3239]	Time 0.253 (0.562)	Data Time 0.001 (0.014)	Loss 3.1758 (3.0889)	Entropy 1.46631 (1.47376)	Top-1 acc 48.438 (50.165)	Top-5 acc 71.094 (73.211)	lr 0.02140
Train [30][2450/3239]	Time 0.218 (0.561)	Data Time 0.001 (0.014)	Loss 3.2071 (3.0889)	Entropy 1.46627 (1.47373)	Top-1 acc 47.656 (50.170)	Top-5 acc 69.141 (73.216)	lr 0.02140
Train [30][2460/3239]	Time 0.214 (0.561)	Data Time 0.001 (0.014)	Loss 3.0713 (3.0891)	Entropy 1.46627 (1.47370)	Top-1 acc 50.000 (50.165)	Top-5 acc 74.219 (73.211)	lr 0.02139
Train [30][2470/3239]	Time 0.306 (0.560)	Data Time 0.001 (0.014)	Loss 3.0678 (3.0892)	Entropy 1.46625 (1.47367)	Top-1 acc 49.609 (50.162)	Top-5 acc 75.000 (73.210)	lr 0.02139
Train [30][2480/3239]	Time 0.217 (0.560)	Data Time 0.001 (0.014)	Loss 3.1490 (3.0894)	Entropy 1.46606 (1.47364)	Top-1 acc 48.047 (50.160)	Top-5 acc 75.391 (73.208)	lr 0.02139
Train [30][2490/3239]	Time 0.217 (0.559)	Data Time 0.001 (0.014)	Loss 3.2794 (3.0896)	Entropy 1.46604 (1.47361)	Top-1 acc 48.047 (50.159)	Top-5 acc 67.188 (73.203)	lr 0.02139
Train [30][2500/3239]	Time 0.249 (0.559)	Data Time 0.001 (0.014)	Loss 2.8708 (3.0895)	Entropy 1.46598 (1.47358)	Top-1 acc 55.469 (50.161)	Top-5 acc 79.297 (73.206)	lr 0.02139
Train [30][2510/3239]	Time 0.250 (0.559)	Data Time 0.002 (0.014)	Loss 2.9667 (3.0895)	Entropy 1.46601 (1.47355)	Top-1 acc 53.516 (50.159)	Top-5 acc 76.562 (73.208)	lr 0.02139
Train [30][2520/3239]	Time 0.278 (0.558)	Data Time 0.001 (0.014)	Loss 3.1104 (3.0895)	Entropy 1.46597 (1.47352)	Top-1 acc 50.781 (50.159)	Top-5 acc 77.734 (73.211)	lr 0.02139
Train [30][2530/3239]	Time 0.231 (0.558)	Data Time 0.001 (0.014)	Loss 3.0175 (3.0896)	Entropy 1.46582 (1.47349)	Top-1 acc 55.078 (50.154)	Top-5 acc 75.781 (73.207)	lr 0.02139
Train [30][2540/3239]	Time 2.349 (0.557)	Data Time 0.001 (0.014)	Loss 2.9066 (3.0895)	Entropy 1.46582 (1.47346)	Top-1 acc 56.250 (50.156)	Top-5 acc 76.953 (73.210)	lr 0.02139
Train [30][2550/3239]	Time 0.224 (0.556)	Data Time 0.001 (0.014)	Loss 3.1787 (3.0896)	Entropy 1.46583 (1.47343)	Top-1 acc 49.609 (50.162)	Top-5 acc 68.750 (73.208)	lr 0.02139
Train [30][2560/3239]	Time 0.212 (0.556)	Data Time 0.001 (0.014)	Loss 2.9778 (3.0895)	Entropy 1.46573 (1.47340)	Top-1 acc 51.953 (50.164)	Top-5 acc 76.172 (73.211)	lr 0.02139
Train [30][2570/3239]	Time 0.224 (0.555)	Data Time 0.001 (0.014)	Loss 3.1843 (3.0894)	Entropy 1.46568 (1.47337)	Top-1 acc 50.391 (50.165)	Top-5 acc 71.094 (73.208)	lr 0.02139
Train [30][2580/3239]	Time 0.325 (0.555)	Data Time 0.002 (0.014)	Loss 3.0097 (3.0894)	Entropy 1.46562 (1.47334)	Top-1 acc 51.953 (50.167)	Top-5 acc 75.781 (73.209)	lr 0.02139
Train [30][2590/3239]	Time 0.255 (0.555)	Data Time 0.001 (0.014)	Loss 3.1190 (3.0895)	Entropy 1.46559 (1.47331)	Top-1 acc 48.828 (50.165)	Top-5 acc 73.047 (73.207)	lr 0.02139
Train [30][2600/3239]	Time 0.226 (0.554)	Data Time 0.001 (0.013)	Loss 3.1796 (3.0895)	Entropy 1.46545 (1.47328)	Top-1 acc 48.438 (50.163)	Top-5 acc 70.312 (73.206)	lr 0.02138
Train [30][2610/3239]	Time 0.214 (0.554)	Data Time 0.001 (0.013)	Loss 2.8551 (3.0893)	Entropy 1.46529 (1.47325)	Top-1 acc 56.641 (50.166)	Top-5 acc 75.391 (73.213)	lr 0.02138
Train [30][2620/3239]	Time 0.193 (0.553)	Data Time 0.001 (0.013)	Loss 3.0100 (3.0891)	Entropy 1.46522 (1.47322)	Top-1 acc 53.125 (50.172)	Top-5 acc 74.609 (73.215)	lr 0.02138
Train [30][2630/3239]	Time 0.203 (0.553)	Data Time 0.001 (0.013)	Loss 3.1900 (3.0890)	Entropy 1.46520 (1.47319)	Top-1 acc 46.484 (50.173)	Top-5 acc 71.484 (73.216)	lr 0.02138
Train [30][2640/3239]	Time 0.385 (0.553)	Data Time 0.002 (0.013)	Loss 3.0900 (3.0891)	Entropy 1.46519 (1.47316)	Top-1 acc 48.047 (50.173)	Top-5 acc 73.047 (73.214)	lr 0.02138
Train [30][2650/3239]	Time 0.248 (0.552)	Data Time 0.001 (0.013)	Loss 2.9667 (3.0890)	Entropy 1.46516 (1.47313)	Top-1 acc 51.562 (50.173)	Top-5 acc 73.828 (73.217)	lr 0.02138
Train [30][2660/3239]	Time 0.251 (0.552)	Data Time 0.001 (0.013)	Loss 3.0603 (3.0890)	Entropy 1.46514 (1.47310)	Top-1 acc 51.953 (50.177)	Top-5 acc 71.875 (73.217)	lr 0.02138
Train [30][2670/3239]	Time 0.213 (0.551)	Data Time 0.001 (0.013)	Loss 3.1127 (3.0890)	Entropy 1.46513 (1.47307)	Top-1 acc 50.391 (50.177)	Top-5 acc 72.266 (73.217)	lr 0.02138
Train [30][2680/3239]	Time 0.206 (0.551)	Data Time 0.001 (0.013)	Loss 3.3274 (3.0892)	Entropy 1.46506 (1.47304)	Top-1 acc 44.141 (50.174)	Top-5 acc 66.797 (73.211)	lr 0.02138
Train [30][2690/3239]	Time 0.233 (0.551)	Data Time 0.002 (0.013)	Loss 3.1407 (3.0891)	Entropy 1.46485 (1.47301)	Top-1 acc 44.531 (50.172)	Top-5 acc 74.609 (73.214)	lr 0.02138
Train [30][2700/3239]	Time 0.403 (0.550)	Data Time 0.001 (0.013)	Loss 3.2501 (3.0896)	Entropy 1.46483 (1.47298)	Top-1 acc 47.656 (50.157)	Top-5 acc 71.094 (73.206)	lr 0.02138
Train [30][2710/3239]	Time 0.427 (0.566)	Data Time 0.004 (0.013)	Loss 3.2979 (3.0894)	Entropy 1.46477 (1.47295)	Top-1 acc 44.141 (50.167)	Top-5 acc 68.359 (73.211)	lr 0.02138
Train [30][2720/3239]	Time 0.267 (0.565)	Data Time 0.002 (0.013)	Loss 3.2785 (3.0895)	Entropy 1.46457 (1.47292)	Top-1 acc 45.312 (50.164)	Top-5 acc 69.922 (73.213)	lr 0.02138
Train [30][2730/3239]	Time 0.247 (0.565)	Data Time 0.003 (0.013)	Loss 3.1483 (3.0895)	Entropy 1.46454 (1.47289)	Top-1 acc 50.391 (50.164)	Top-5 acc 71.484 (73.215)	lr 0.02138
Train [30][2740/3239]	Time 0.276 (0.565)	Data Time 0.002 (0.013)	Loss 3.0362 (3.0896)	Entropy 1.46450 (1.47286)	Top-1 acc 51.562 (50.161)	Top-5 acc 74.609 (73.213)	lr 0.02137
Train [30][2750/3239]	Time 0.295 (0.564)	Data Time 0.001 (0.013)	Loss 3.0215 (3.0897)	Entropy 1.46456 (1.47283)	Top-1 acc 52.734 (50.166)	Top-5 acc 73.438 (73.208)	lr 0.02137
Train [30][2760/3239]	Time 0.226 (0.564)	Data Time 0.001 (0.013)	Loss 2.8281 (3.0895)	Entropy 1.46446 (1.47280)	Top-1 acc 55.078 (50.171)	Top-5 acc 75.781 (73.208)	lr 0.02137
Train [30][2770/3239]	Time 0.157 (0.563)	Data Time 0.002 (0.013)	Loss 3.0856 (3.0895)	Entropy 1.46438 (1.47277)	Top-1 acc 50.781 (50.171)	Top-5 acc 74.219 (73.208)	lr 0.02137
Train [30][2780/3239]	Time 0.261 (0.563)	Data Time 0.001 (0.013)	Loss 3.0327 (3.0896)	Entropy 1.46427 (1.47274)	Top-1 acc 49.609 (50.169)	Top-5 acc 74.609 (73.206)	lr 0.02137
Train [30][2790/3239]	Time 0.202 (0.563)	Data Time 0.001 (0.013)	Loss 3.0415 (3.0897)	Entropy 1.46415 (1.47270)	Top-1 acc 57.031 (50.166)	Top-5 acc 72.656 (73.208)	lr 0.02137
Train [30][2800/3239]	Time 0.232 (0.562)	Data Time 0.001 (0.013)	Loss 2.9850 (3.0898)	Entropy 1.46412 (1.47267)	Top-1 acc 51.172 (50.165)	Top-5 acc 75.781 (73.205)	lr 0.02137
Train [30][2810/3239]	Time 0.201 (0.562)	Data Time 0.001 (0.013)	Loss 3.0446 (3.0898)	Entropy 1.46404 (1.47264)	Top-1 acc 51.172 (50.164)	Top-5 acc 73.047 (73.202)	lr 0.02137
Train [30][2820/3239]	Time 0.385 (0.561)	Data Time 0.001 (0.013)	Loss 3.1025 (3.0898)	Entropy 1.46394 (1.47261)	Top-1 acc 50.000 (50.164)	Top-5 acc 71.875 (73.200)	lr 0.02137
Train [30][2830/3239]	Time 0.208 (0.561)	Data Time 0.001 (0.013)	Loss 3.0080 (3.0898)	Entropy 1.46389 (1.47258)	Top-1 acc 53.125 (50.163)	Top-5 acc 76.172 (73.203)	lr 0.02137
Train [30][2840/3239]	Time 0.248 (0.560)	Data Time 0.001 (0.013)	Loss 3.1870 (3.0899)	Entropy 1.46386 (1.47255)	Top-1 acc 50.391 (50.162)	Top-5 acc 70.312 (73.200)	lr 0.02137
Train [30][2850/3239]	Time 0.307 (0.560)	Data Time 0.001 (0.012)	Loss 3.1527 (3.0901)	Entropy 1.46365 (1.47252)	Top-1 acc 48.438 (50.159)	Top-5 acc 73.438 (73.198)	lr 0.02137
Train [30][2860/3239]	Time 0.251 (0.560)	Data Time 0.001 (0.012)	Loss 2.9638 (3.0900)	Entropy 1.46353 (1.47249)	Top-1 acc 55.469 (50.165)	Top-5 acc 75.781 (73.202)	lr 0.02137
Train [30][2870/3239]	Time 0.306 (0.559)	Data Time 0.001 (0.012)	Loss 3.0775 (3.0902)	Entropy 1.46347 (1.47246)	Top-1 acc 48.047 (50.159)	Top-5 acc 73.047 (73.199)	lr 0.02137
Train [30][2880/3239]	Time 0.298 (0.559)	Data Time 0.001 (0.012)	Loss 2.9136 (3.0900)	Entropy 1.46339 (1.47243)	Top-1 acc 54.688 (50.163)	Top-5 acc 78.125 (73.203)	lr 0.02136
Train [30][2890/3239]	Time 0.218 (0.559)	Data Time 0.002 (0.012)	Loss 3.1690 (3.0900)	Entropy 1.46337 (1.47240)	Top-1 acc 46.875 (50.164)	Top-5 acc 71.094 (73.204)	lr 0.02136
Train [30][2900/3239]	Time 0.296 (0.558)	Data Time 0.001 (0.012)	Loss 3.0607 (3.0899)	Entropy 1.46338 (1.47237)	Top-1 acc 49.219 (50.165)	Top-5 acc 71.875 (73.205)	lr 0.02136
Train [30][2910/3239]	Time 0.215 (0.558)	Data Time 0.001 (0.012)	Loss 3.0637 (3.0898)	Entropy 1.46336 (1.47233)	Top-1 acc 52.344 (50.170)	Top-5 acc 75.781 (73.207)	lr 0.02136
Train [30][2920/3239]	Time 0.219 (0.558)	Data Time 0.001 (0.012)	Loss 3.2409 (3.0897)	Entropy 1.46327 (1.47230)	Top-1 acc 47.656 (50.168)	Top-5 acc 70.703 (73.211)	lr 0.02136
Train [30][2930/3239]	Time 0.335 (0.557)	Data Time 0.001 (0.012)	Loss 3.0114 (3.0899)	Entropy 1.46320 (1.47227)	Top-1 acc 52.734 (50.164)	Top-5 acc 73.047 (73.204)	lr 0.02136
Train [30][2940/3239]	Time 0.247 (0.557)	Data Time 0.001 (0.012)	Loss 3.1134 (3.0898)	Entropy 1.46313 (1.47224)	Top-1 acc 47.266 (50.168)	Top-5 acc 72.656 (73.206)	lr 0.02136
Train [30][2950/3239]	Time 0.236 (0.557)	Data Time 0.001 (0.012)	Loss 3.0064 (3.0898)	Entropy 1.46300 (1.47221)	Top-1 acc 49.609 (50.166)	Top-5 acc 73.828 (73.203)	lr 0.02136
Train [30][2960/3239]	Time 0.257 (0.556)	Data Time 0.001 (0.012)	Loss 3.0926 (3.0898)	Entropy 1.46296 (1.47218)	Top-1 acc 50.391 (50.165)	Top-5 acc 72.656 (73.205)	lr 0.02136
Train [30][2970/3239]	Time 0.247 (0.556)	Data Time 0.001 (0.012)	Loss 3.0200 (3.0897)	Entropy 1.46285 (1.47215)	Top-1 acc 52.344 (50.165)	Top-5 acc 74.609 (73.205)	lr 0.02136
Train [30][2980/3239]	Time 0.379 (0.556)	Data Time 0.001 (0.012)	Loss 2.9979 (3.0898)	Entropy 1.46277 (1.47212)	Top-1 acc 50.781 (50.162)	Top-5 acc 75.000 (73.200)	lr 0.02136
Train [30][2990/3239]	Time 0.261 (0.555)	Data Time 0.001 (0.012)	Loss 3.0743 (3.0899)	Entropy 1.46271 (1.47209)	Top-1 acc 43.359 (50.156)	Top-5 acc 75.000 (73.199)	lr 0.02136
Train [30][3000/3239]	Time 0.207 (0.555)	Data Time 0.001 (0.012)	Loss 3.0810 (3.0898)	Entropy 1.46269 (1.47206)	Top-1 acc 48.828 (50.162)	Top-5 acc 72.656 (73.200)	lr 0.02136
Train [30][3010/3239]	Time 0.208 (0.555)	Data Time 0.001 (0.012)	Loss 3.0374 (3.0899)	Entropy 1.46260 (1.47202)	Top-1 acc 52.344 (50.160)	Top-5 acc 76.562 (73.201)	lr 0.02136
Train [30][3020/3239]	Time 0.223 (0.554)	Data Time 0.001 (0.012)	Loss 2.9385 (3.0897)	Entropy 1.46254 (1.47199)	Top-1 acc 50.781 (50.165)	Top-5 acc 76.562 (73.207)	lr 0.02135
Train [30][3030/3239]	Time 0.263 (0.554)	Data Time 0.004 (0.012)	Loss 2.9794 (3.0896)	Entropy 1.46245 (1.47196)	Top-1 acc 50.781 (50.165)	Top-5 acc 75.000 (73.209)	lr 0.02135
Train [30][3040/3239]	Time 0.282 (0.567)	Data Time 0.004 (0.012)	Loss 3.0870 (3.0895)	Entropy 1.46247 (1.47193)	Top-1 acc 52.344 (50.167)	Top-5 acc 72.656 (73.207)	lr 0.02135
Train [30][3050/3239]	Time 0.268 (0.567)	Data Time 0.002 (0.012)	Loss 2.9781 (3.0894)	Entropy 1.46246 (1.47190)	Top-1 acc 53.906 (50.172)	Top-5 acc 76.953 (73.211)	lr 0.02135
Train [30][3060/3239]	Time 0.380 (0.567)	Data Time 0.002 (0.012)	Loss 3.1050 (3.0897)	Entropy 1.46242 (1.47187)	Top-1 acc 50.391 (50.165)	Top-5 acc 72.656 (73.204)	lr 0.02135
Train [30][3070/3239]	Time 0.242 (0.567)	Data Time 0.002 (0.012)	Loss 3.0978 (3.0897)	Entropy 1.46231 (1.47184)	Top-1 acc 49.609 (50.164)	Top-5 acc 74.609 (73.204)	lr 0.02135
Train [30][3080/3239]	Time 0.288 (0.566)	Data Time 0.002 (0.012)	Loss 3.0823 (3.0897)	Entropy 1.46223 (1.47181)	Top-1 acc 48.047 (50.166)	Top-5 acc 75.000 (73.204)	lr 0.02135
Train [30][3090/3239]	Time 0.260 (0.566)	Data Time 0.002 (0.012)	Loss 3.1108 (3.0897)	Entropy 1.46218 (1.47178)	Top-1 acc 45.703 (50.167)	Top-5 acc 72.656 (73.201)	lr 0.02135
Train [30][3100/3239]	Time 0.217 (0.566)	Data Time 0.001 (0.012)	Loss 3.1177 (3.0895)	Entropy 1.46205 (1.47175)	Top-1 acc 49.609 (50.169)	Top-5 acc 69.922 (73.205)	lr 0.02135
Train [30][3110/3239]	Time 0.294 (0.565)	Data Time 0.001 (0.012)	Loss 3.0452 (3.0896)	Entropy 1.46196 (1.47171)	Top-1 acc 49.609 (50.166)	Top-5 acc 71.484 (73.202)	lr 0.02135
Train [30][3120/3239]	Time 0.229 (0.565)	Data Time 0.002 (0.012)	Loss 3.0901 (3.0897)	Entropy 1.46185 (1.47168)	Top-1 acc 50.000 (50.171)	Top-5 acc 71.875 (73.200)	lr 0.02135
Train [30][3130/3239]	Time 0.262 (0.565)	Data Time 0.002 (0.012)	Loss 3.2858 (3.0899)	Entropy 1.46170 (1.47165)	Top-1 acc 47.266 (50.170)	Top-5 acc 68.750 (73.195)	lr 0.02135
Train [30][3140/3239]	Time 0.249 (0.564)	Data Time 0.001 (0.012)	Loss 2.8078 (3.0900)	Entropy 1.46172 (1.47162)	Top-1 acc 57.422 (50.174)	Top-5 acc 78.125 (73.194)	lr 0.02135
Train [30][3150/3239]	Time 0.347 (0.564)	Data Time 0.001 (0.011)	Loss 3.2417 (3.0900)	Entropy 1.46161 (1.47159)	Top-1 acc 47.266 (50.175)	Top-5 acc 69.141 (73.193)	lr 0.02135
Train [30][3160/3239]	Time 0.276 (0.564)	Data Time 0.001 (0.011)	Loss 2.9549 (3.0899)	Entropy 1.46163 (1.47156)	Top-1 acc 55.078 (50.177)	Top-5 acc 76.562 (73.197)	lr 0.02134
Train [30][3170/3239]	Time 0.209 (0.563)	Data Time 0.001 (0.011)	Loss 3.0924 (3.0898)	Entropy 1.46149 (1.47153)	Top-1 acc 50.391 (50.178)	Top-5 acc 75.000 (73.202)	lr 0.02134
Train [30][3180/3239]	Time 0.214 (0.563)	Data Time 0.000 (0.011)	Loss 3.0820 (3.0897)	Entropy 1.46148 (1.47149)	Top-1 acc 48.828 (50.180)	Top-5 acc 73.047 (73.203)	lr 0.02134
Train [30][3190/3239]	Time 0.218 (0.562)	Data Time 0.000 (0.011)	Loss 3.1991 (3.0895)	Entropy 1.46145 (1.47146)	Top-1 acc 46.875 (50.183)	Top-5 acc 68.359 (73.206)	lr 0.02134
Train [30][3200/3239]	Time 0.327 (0.562)	Data Time 0.000 (0.011)	Loss 3.2670 (3.0894)	Entropy 1.46142 (1.47143)	Top-1 acc 48.047 (50.185)	Top-5 acc 67.969 (73.206)	lr 0.02134
Train [30][3210/3239]	Time 0.222 (0.562)	Data Time 0.000 (0.011)	Loss 2.9378 (3.0892)	Entropy 1.46113 (1.47140)	Top-1 acc 58.594 (50.191)	Top-5 acc 78.125 (73.213)	lr 0.02134
Train [30][3220/3239]	Time 0.198 (0.561)	Data Time 0.000 (0.011)	Loss 2.9925 (3.0891)	Entropy 1.46109 (1.47137)	Top-1 acc 52.344 (50.189)	Top-5 acc 75.000 (73.215)	lr 0.02134
Train [30][3230/3239]	Time 0.212 (0.561)	Data Time 0.000 (0.011)	Loss 3.0449 (3.0889)	Entropy 1.46106 (1.47134)	Top-1 acc 51.172 (50.195)	Top-5 acc 72.656 (73.219)	lr 0.02134
Train [30][3239/3239]	Time 2.147 (0.560)	Data Time 0.000 (0.011)	Loss 3.1781 (3.0889)	Entropy 1.46106 (1.47131)	Top-1 acc 46.914 (50.197)	Top-5 acc 74.074 (73.217)	lr 0.02134
==========Valid [30/120]	loss 1.891	top-1 acc 57.938 (57.938)	top-5 acc 80.380	Train top-1 50.197	top-5 73.217	Entropy 1.46106	Latency-None: 0.000ms	Flops: 550.59M
Train [31][0/3239]	Time 33.063 (33.063)	Data Time 30.004 (30.004)	Loss 3.0552 (3.0552)	Entropy 1.46103 (1.46103)	Top-1 acc 53.125 (53.125)	Top-5 acc 73.047 (73.047)	lr 0.02134
Train [31][10/3239]	Time 2.530 (3.482)	Data Time 0.002 (2.735)	Loss 2.8360 (3.0727)	Entropy 1.46103 (1.46103)	Top-1 acc 60.156 (50.817)	Top-5 acc 79.297 (73.438)	lr 0.02134
Train [31][20/3239]	Time 0.330 (1.942)	Data Time 0.001 (1.433)	Loss 3.0401 (3.0758)	Entropy 1.46091 (1.46097)	Top-1 acc 53.516 (50.893)	Top-5 acc 74.219 (72.917)	lr 0.02134
Train [31][30/3239]	Time 0.232 (1.463)	Data Time 0.001 (0.971)	Loss 3.1081 (3.0573)	Entropy 1.46088 (1.46094)	Top-1 acc 47.656 (50.869)	Top-5 acc 75.781 (73.538)	lr 0.02134
Train [31][40/3239]	Time 0.251 (1.215)	Data Time 0.001 (0.735)	Loss 2.8936 (3.0705)	Entropy 1.46086 (1.46092)	Top-1 acc 54.688 (50.819)	Top-5 acc 76.172 (73.314)	lr 0.02134
Train [31][50/3239]	Time 0.200 (1.065)	Data Time 0.001 (0.591)	Loss 3.0728 (3.0661)	Entropy 1.46053 (1.46087)	Top-1 acc 51.172 (50.850)	Top-5 acc 72.266 (73.514)	lr 0.02134
Train [31][60/3239]	Time 0.194 (0.963)	Data Time 0.001 (0.494)	Loss 3.2332 (3.0797)	Entropy 1.46046 (1.46080)	Top-1 acc 46.875 (50.743)	Top-5 acc 66.406 (73.201)	lr 0.02133
Train [31][70/3239]	Time 0.222 (0.891)	Data Time 0.001 (0.425)	Loss 2.7766 (3.0620)	Entropy 1.46045 (1.46076)	Top-1 acc 52.734 (51.012)	Top-5 acc 81.250 (73.570)	lr 0.02133
Train [31][80/3239]	Time 0.304 (0.837)	Data Time 0.001 (0.373)	Loss 3.3331 (3.0599)	Entropy 1.46039 (1.46071)	Top-1 acc 48.047 (51.056)	Top-5 acc 68.750 (73.611)	lr 0.02133
Train [31][90/3239]	Time 0.214 (0.795)	Data Time 0.001 (0.332)	Loss 2.9465 (3.0542)	Entropy 1.46035 (1.46068)	Top-1 acc 55.469 (51.077)	Top-5 acc 75.391 (73.828)	lr 0.02133
Train [31][100/3239]	Time 0.263 (0.761)	Data Time 0.001 (0.299)	Loss 3.2655 (3.0628)	Entropy 1.46032 (1.46064)	Top-1 acc 48.047 (50.828)	Top-5 acc 71.094 (73.747)	lr 0.02133
Train [31][110/3239]	Time 0.228 (0.732)	Data Time 0.001 (0.272)	Loss 3.3349 (3.0637)	Entropy 1.45999 (1.46061)	Top-1 acc 44.531 (50.788)	Top-5 acc 66.797 (73.765)	lr 0.02133
Train [31][120/3239]	Time 2.419 (0.708)	Data Time 0.001 (0.250)	Loss 2.9455 (3.0647)	Entropy 1.45999 (1.46056)	Top-1 acc 52.344 (50.752)	Top-5 acc 76.172 (73.806)	lr 0.02133
Train [31][130/3239]	Time 0.247 (0.672)	Data Time 0.001 (0.231)	Loss 2.9952 (3.0689)	Entropy 1.45990 (1.46051)	Top-1 acc 53.516 (50.650)	Top-5 acc 77.344 (73.643)	lr 0.02133
Train [31][140/3239]	Time 0.303 (0.656)	Data Time 0.001 (0.215)	Loss 3.4314 (3.0690)	Entropy 1.45984 (1.46046)	Top-1 acc 44.141 (50.676)	Top-5 acc 63.281 (73.662)	lr 0.02133
Train [31][150/3239]	Time 0.278 (0.937)	Data Time 0.003 (0.201)	Loss 3.0632 (3.0668)	Entropy 1.45981 (1.46042)	Top-1 acc 50.391 (50.680)	Top-5 acc 74.219 (73.738)	lr 0.02133
Train [31][160/3239]	Time 0.220 (0.908)	Data Time 0.002 (0.188)	Loss 2.7289 (3.0668)	Entropy 1.45971 (1.46038)	Top-1 acc 61.328 (50.789)	Top-5 acc 81.250 (73.700)	lr 0.02133
Train [31][170/3239]	Time 0.175 (0.881)	Data Time 0.001 (0.177)	Loss 3.3328 (3.0674)	Entropy 1.45963 (1.46034)	Top-1 acc 38.672 (50.726)	Top-5 acc 69.141 (73.691)	lr 0.02133
Train [31][180/3239]	Time 0.214 (0.858)	Data Time 0.002 (0.168)	Loss 3.2740 (3.0699)	Entropy 1.45952 (1.46029)	Top-1 acc 49.609 (50.712)	Top-5 acc 71.094 (73.602)	lr 0.02133
Train [31][190/3239]	Time 0.321 (0.836)	Data Time 0.001 (0.159)	Loss 2.8553 (3.0684)	Entropy 1.45938 (1.46025)	Top-1 acc 57.422 (50.771)	Top-5 acc 77.344 (73.626)	lr 0.02133
Train [31][200/3239]	Time 0.200 (0.817)	Data Time 0.001 (0.151)	Loss 3.1194 (3.0716)	Entropy 1.45929 (1.46021)	Top-1 acc 51.953 (50.705)	Top-5 acc 70.703 (73.548)	lr 0.02132
Train [31][210/3239]	Time 0.221 (0.800)	Data Time 0.001 (0.144)	Loss 2.9768 (3.0729)	Entropy 1.45925 (1.46016)	Top-1 acc 50.000 (50.646)	Top-5 acc 75.781 (73.539)	lr 0.02132
Train [31][220/3239]	Time 0.219 (0.783)	Data Time 0.001 (0.138)	Loss 3.0805 (3.0713)	Entropy 1.45915 (1.46012)	Top-1 acc 50.781 (50.695)	Top-5 acc 72.656 (73.579)	lr 0.02132
Train [31][230/3239]	Time 2.307 (0.768)	Data Time 0.001 (0.132)	Loss 3.1954 (3.0729)	Entropy 1.45915 (1.46008)	Top-1 acc 49.219 (50.659)	Top-5 acc 70.312 (73.544)	lr 0.02132
Train [31][240/3239]	Time 0.230 (0.746)	Data Time 0.001 (0.127)	Loss 3.2063 (3.0708)	Entropy 1.45922 (1.46004)	Top-1 acc 48.047 (50.710)	Top-5 acc 68.359 (73.595)	lr 0.02132
Train [31][250/3239]	Time 0.216 (0.734)	Data Time 0.001 (0.122)	Loss 3.1037 (3.0723)	Entropy 1.45921 (1.46001)	Top-1 acc 51.172 (50.703)	Top-5 acc 72.266 (73.526)	lr 0.02132
Train [31][260/3239]	Time 0.209 (0.723)	Data Time 0.001 (0.117)	Loss 2.9919 (3.0742)	Entropy 1.45919 (1.45998)	Top-1 acc 52.344 (50.682)	Top-5 acc 77.344 (73.500)	lr 0.02132
Train [31][270/3239]	Time 0.236 (0.713)	Data Time 0.001 (0.113)	Loss 3.1483 (3.0755)	Entropy 1.45913 (1.45995)	Top-1 acc 50.781 (50.630)	Top-5 acc 74.609 (73.497)	lr 0.02132
Train [31][280/3239]	Time 0.205 (0.703)	Data Time 0.002 (0.109)	Loss 3.2109 (3.0748)	Entropy 1.45906 (1.45992)	Top-1 acc 48.828 (50.671)	Top-5 acc 70.312 (73.479)	lr 0.02132
Train [31][290/3239]	Time 0.170 (0.694)	Data Time 0.001 (0.105)	Loss 3.1581 (3.0738)	Entropy 1.45896 (1.45989)	Top-1 acc 49.609 (50.695)	Top-5 acc 71.094 (73.499)	lr 0.02132
Train [31][300/3239]	Time 0.318 (0.686)	Data Time 0.002 (0.102)	Loss 2.9351 (3.0738)	Entropy 1.45892 (1.45985)	Top-1 acc 54.688 (50.712)	Top-5 acc 75.391 (73.491)	lr 0.02132
Train [31][310/3239]	Time 0.207 (0.679)	Data Time 0.001 (0.099)	Loss 3.1325 (3.0753)	Entropy 1.45889 (1.45982)	Top-1 acc 51.562 (50.676)	Top-5 acc 73.047 (73.476)	lr 0.02132
Train [31][320/3239]	Time 0.184 (0.671)	Data Time 0.002 (0.096)	Loss 3.1886 (3.0743)	Entropy 1.45880 (1.45979)	Top-1 acc 51.172 (50.688)	Top-5 acc 70.703 (73.519)	lr 0.02132
Train [31][330/3239]	Time 0.249 (0.665)	Data Time 0.001 (0.093)	Loss 3.2985 (3.0745)	Entropy 1.45872 (1.45976)	Top-1 acc 45.312 (50.679)	Top-5 acc 67.188 (73.515)	lr 0.02132
Train [31][340/3239]	Time 2.342 (0.658)	Data Time 0.001 (0.090)	Loss 2.9216 (3.0734)	Entropy 1.45872 (1.45973)	Top-1 acc 51.172 (50.687)	Top-5 acc 75.000 (73.529)	lr 0.02131
Train [31][350/3239]	Time 0.210 (0.646)	Data Time 0.001 (0.088)	Loss 2.9006 (3.0720)	Entropy 1.45870 (1.45970)	Top-1 acc 52.734 (50.721)	Top-5 acc 78.125 (73.569)	lr 0.02131
Train [31][360/3239]	Time 0.306 (0.640)	Data Time 0.001 (0.085)	Loss 3.0144 (3.0707)	Entropy 1.45854 (1.45967)	Top-1 acc 47.656 (50.750)	Top-5 acc 74.609 (73.592)	lr 0.02131
Train [31][370/3239]	Time 0.217 (0.635)	Data Time 0.002 (0.083)	Loss 3.2225 (3.0711)	Entropy 1.45835 (1.45964)	Top-1 acc 47.266 (50.741)	Top-5 acc 69.922 (73.578)	lr 0.02131
Train [31][380/3239]	Time 0.224 (0.630)	Data Time 0.002 (0.081)	Loss 3.0841 (3.0702)	Entropy 1.45833 (1.45960)	Top-1 acc 49.219 (50.760)	Top-5 acc 72.656 (73.596)	lr 0.02131
Train [31][390/3239]	Time 0.198 (0.625)	Data Time 0.001 (0.079)	Loss 3.0225 (3.0680)	Entropy 1.45830 (1.45957)	Top-1 acc 53.125 (50.812)	Top-5 acc 73.047 (73.626)	lr 0.02131
Train [31][400/3239]	Time 0.210 (0.620)	Data Time 0.001 (0.077)	Loss 3.1291 (3.0694)	Entropy 1.45811 (1.45954)	Top-1 acc 48.438 (50.753)	Top-5 acc 75.781 (73.614)	lr 0.02131
Train [31][410/3239]	Time 0.213 (0.616)	Data Time 0.001 (0.075)	Loss 3.1026 (3.0699)	Entropy 1.45804 (1.45950)	Top-1 acc 48.828 (50.746)	Top-5 acc 72.266 (73.635)	lr 0.02131
Train [31][420/3239]	Time 0.349 (0.612)	Data Time 0.001 (0.073)	Loss 3.0049 (3.0699)	Entropy 1.45795 (1.45947)	Top-1 acc 50.000 (50.730)	Top-5 acc 76.172 (73.641)	lr 0.02131
Train [31][430/3239]	Time 0.217 (0.608)	Data Time 0.001 (0.072)	Loss 3.1185 (3.0696)	Entropy 1.45788 (1.45943)	Top-1 acc 51.172 (50.726)	Top-5 acc 75.391 (73.667)	lr 0.02131
Train [31][440/3239]	Time 0.260 (0.604)	Data Time 0.001 (0.070)	Loss 2.9677 (3.0700)	Entropy 1.45793 (1.45940)	Top-1 acc 52.344 (50.703)	Top-5 acc 75.781 (73.671)	lr 0.02131
Train [31][450/3239]	Time 2.361 (0.600)	Data Time 0.001 (0.069)	Loss 3.0365 (3.0703)	Entropy 1.45793 (1.45936)	Top-1 acc 52.734 (50.698)	Top-5 acc 73.438 (73.651)	lr 0.02131
Train [31][460/3239]	Time 0.177 (0.592)	Data Time 0.001 (0.067)	Loss 3.0928 (3.0700)	Entropy 1.45791 (1.45933)	Top-1 acc 46.875 (50.699)	Top-5 acc 71.094 (73.643)	lr 0.02131
Train [31][470/3239]	Time 0.236 (0.590)	Data Time 0.001 (0.066)	Loss 2.9644 (3.0688)	Entropy 1.45776 (1.45930)	Top-1 acc 48.828 (50.720)	Top-5 acc 75.781 (73.674)	lr 0.02131
Train [31][480/3239]	Time 0.313 (0.587)	Data Time 0.001 (0.064)	Loss 2.9540 (3.0686)	Entropy 1.45759 (1.45926)	Top-1 acc 52.344 (50.714)	Top-5 acc 73.438 (73.675)	lr 0.02130
Train [31][490/3239]	Time 0.229 (0.584)	Data Time 0.001 (0.063)	Loss 3.0921 (3.0679)	Entropy 1.45755 (1.45923)	Top-1 acc 48.047 (50.719)	Top-5 acc 72.656 (73.698)	lr 0.02130
Train [31][500/3239]	Time 0.220 (0.581)	Data Time 0.001 (0.062)	Loss 2.9185 (3.0661)	Entropy 1.45752 (1.45920)	Top-1 acc 56.641 (50.778)	Top-5 acc 72.266 (73.720)	lr 0.02130
Train [31][510/3239]	Time 0.313 (0.657)	Data Time 0.005 (0.061)	Loss 2.9598 (3.0662)	Entropy 1.45742 (1.45916)	Top-1 acc 50.781 (50.761)	Top-5 acc 76.172 (73.707)	lr 0.02130
Train [31][520/3239]	Time 0.217 (0.656)	Data Time 0.002 (0.060)	Loss 2.9993 (3.0646)	Entropy 1.45734 (1.45913)	Top-1 acc 52.734 (50.778)	Top-5 acc 74.219 (73.732)	lr 0.02130
Train [31][530/3239]	Time 0.280 (0.652)	Data Time 0.001 (0.059)	Loss 3.0725 (3.0645)	Entropy 1.45720 (1.45909)	Top-1 acc 51.562 (50.776)	Top-5 acc 74.219 (73.724)	lr 0.02130
Train [31][540/3239]	Time 0.223 (0.649)	Data Time 0.002 (0.058)	Loss 3.0867 (3.0639)	Entropy 1.45718 (1.45906)	Top-1 acc 50.781 (50.801)	Top-5 acc 73.438 (73.735)	lr 0.02130
Train [31][550/3239]	Time 0.219 (0.645)	Data Time 0.001 (0.057)	Loss 2.9822 (3.0629)	Entropy 1.45709 (1.45902)	Top-1 acc 54.688 (50.820)	Top-5 acc 75.000 (73.750)	lr 0.02130
Train [31][560/3239]	Time 2.347 (0.642)	Data Time 0.001 (0.056)	Loss 2.8822 (3.0627)	Entropy 1.45709 (1.45899)	Top-1 acc 53.906 (50.833)	Top-5 acc 76.953 (73.759)	lr 0.02130
Train [31][570/3239]	Time 0.222 (0.634)	Data Time 0.001 (0.055)	Loss 2.8684 (3.0637)	Entropy 1.45700 (1.45895)	Top-1 acc 57.812 (50.818)	Top-5 acc 78.125 (73.747)	lr 0.02130
Train [31][580/3239]	Time 0.207 (0.631)	Data Time 0.001 (0.054)	Loss 3.0551 (3.0638)	Entropy 1.45698 (1.45892)	Top-1 acc 48.438 (50.818)	Top-5 acc 75.391 (73.730)	lr 0.02130
Train [31][590/3239]	Time 0.218 (0.628)	Data Time 0.001 (0.053)	Loss 3.0185 (3.0633)	Entropy 1.45686 (1.45889)	Top-1 acc 55.078 (50.833)	Top-5 acc 71.484 (73.751)	lr 0.02130
Train [31][600/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.052)	Loss 2.9773 (3.0632)	Entropy 1.45673 (1.45885)	Top-1 acc 51.562 (50.825)	Top-5 acc 76.953 (73.750)	lr 0.02130
Train [31][610/3239]	Time 0.211 (0.622)	Data Time 0.001 (0.051)	Loss 2.9661 (3.0625)	Entropy 1.45668 (1.45882)	Top-1 acc 52.734 (50.840)	Top-5 acc 78.516 (73.756)	lr 0.02130
Train [31][620/3239]	Time 0.211 (0.619)	Data Time 0.001 (0.050)	Loss 2.9887 (3.0617)	Entropy 1.45668 (1.45878)	Top-1 acc 52.734 (50.847)	Top-5 acc 75.391 (73.780)	lr 0.02129
Train [31][630/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.050)	Loss 3.2114 (3.0612)	Entropy 1.45668 (1.45875)	Top-1 acc 49.219 (50.880)	Top-5 acc 71.875 (73.785)	lr 0.02129
Train [31][640/3239]	Time 0.154 (0.614)	Data Time 0.001 (0.049)	Loss 3.1677 (3.0606)	Entropy 1.45665 (1.45872)	Top-1 acc 50.391 (50.891)	Top-5 acc 69.141 (73.781)	lr 0.02129
Train [31][650/3239]	Time 0.382 (0.612)	Data Time 0.001 (0.048)	Loss 3.0982 (3.0611)	Entropy 1.45659 (1.45868)	Top-1 acc 51.953 (50.893)	Top-5 acc 71.875 (73.770)	lr 0.02129
Train [31][660/3239]	Time 0.245 (0.609)	Data Time 0.001 (0.048)	Loss 3.0448 (3.0618)	Entropy 1.45651 (1.45865)	Top-1 acc 52.734 (50.888)	Top-5 acc 73.438 (73.760)	lr 0.02129
Train [31][670/3239]	Time 2.301 (0.607)	Data Time 0.001 (0.047)	Loss 3.0435 (3.0624)	Entropy 1.45651 (1.45862)	Top-1 acc 49.609 (50.887)	Top-5 acc 74.609 (73.743)	lr 0.02129
Train [31][680/3239]	Time 0.232 (0.601)	Data Time 0.001 (0.046)	Loss 2.8853 (3.0626)	Entropy 1.45638 (1.45859)	Top-1 acc 55.859 (50.882)	Top-5 acc 76.172 (73.731)	lr 0.02129
Train [31][690/3239]	Time 0.204 (0.599)	Data Time 0.001 (0.045)	Loss 3.0965 (3.0625)	Entropy 1.45636 (1.45856)	Top-1 acc 48.828 (50.877)	Top-5 acc 70.703 (73.737)	lr 0.02129
Train [31][700/3239]	Time 0.218 (0.597)	Data Time 0.001 (0.045)	Loss 3.0365 (3.0630)	Entropy 1.45632 (1.45852)	Top-1 acc 47.656 (50.862)	Top-5 acc 73.828 (73.722)	lr 0.02129
Train [31][710/3239]	Time 0.304 (0.595)	Data Time 0.001 (0.044)	Loss 2.8667 (3.0635)	Entropy 1.45623 (1.45849)	Top-1 acc 54.688 (50.858)	Top-5 acc 76.562 (73.706)	lr 0.02129
Train [31][720/3239]	Time 0.223 (0.592)	Data Time 0.001 (0.044)	Loss 2.8903 (3.0637)	Entropy 1.45618 (1.45846)	Top-1 acc 51.953 (50.855)	Top-5 acc 76.172 (73.708)	lr 0.02129
Train [31][730/3239]	Time 0.253 (0.590)	Data Time 0.001 (0.043)	Loss 2.9006 (3.0635)	Entropy 1.45605 (1.45843)	Top-1 acc 51.562 (50.853)	Top-5 acc 80.078 (73.718)	lr 0.02129
Train [31][740/3239]	Time 0.159 (0.588)	Data Time 0.001 (0.043)	Loss 3.0957 (3.0633)	Entropy 1.45601 (1.45840)	Top-1 acc 50.781 (50.865)	Top-5 acc 74.609 (73.719)	lr 0.02129
Train [31][750/3239]	Time 0.216 (0.586)	Data Time 0.001 (0.042)	Loss 3.1782 (3.0636)	Entropy 1.45588 (1.45836)	Top-1 acc 44.531 (50.853)	Top-5 acc 73.047 (73.708)	lr 0.02129
Train [31][760/3239]	Time 0.254 (0.585)	Data Time 0.001 (0.041)	Loss 2.9527 (3.0642)	Entropy 1.45569 (1.45833)	Top-1 acc 56.250 (50.848)	Top-5 acc 75.000 (73.704)	lr 0.02128
Train [31][770/3239]	Time 0.245 (0.583)	Data Time 0.001 (0.041)	Loss 3.1144 (3.0636)	Entropy 1.45557 (1.45830)	Top-1 acc 51.562 (50.876)	Top-5 acc 71.484 (73.707)	lr 0.02128
Train [31][780/3239]	Time 2.270 (0.581)	Data Time 0.001 (0.040)	Loss 3.1009 (3.0639)	Entropy 1.45557 (1.45826)	Top-1 acc 48.828 (50.869)	Top-5 acc 73.438 (73.700)	lr 0.02128
Train [31][790/3239]	Time 0.244 (0.577)	Data Time 0.001 (0.040)	Loss 3.1410 (3.0646)	Entropy 1.45552 (1.45823)	Top-1 acc 43.750 (50.860)	Top-5 acc 73.828 (73.691)	lr 0.02128
Train [31][800/3239]	Time 0.221 (0.575)	Data Time 0.001 (0.039)	Loss 3.0717 (3.0650)	Entropy 1.45551 (1.45819)	Top-1 acc 49.609 (50.847)	Top-5 acc 72.656 (73.686)	lr 0.02128
Train [31][810/3239]	Time 0.215 (0.574)	Data Time 0.001 (0.039)	Loss 3.0238 (3.0649)	Entropy 1.45541 (1.45816)	Top-1 acc 53.906 (50.847)	Top-5 acc 72.656 (73.683)	lr 0.02128
Train [31][820/3239]	Time 0.323 (0.572)	Data Time 0.001 (0.039)	Loss 3.0545 (3.0645)	Entropy 1.45543 (1.45812)	Top-1 acc 53.906 (50.854)	Top-5 acc 74.609 (73.699)	lr 0.02128
Train [31][830/3239]	Time 0.222 (0.571)	Data Time 0.001 (0.038)	Loss 3.0298 (3.0647)	Entropy 1.45533 (1.45809)	Top-1 acc 50.781 (50.855)	Top-5 acc 74.609 (73.691)	lr 0.02128
Train [31][840/3239]	Time 0.233 (0.569)	Data Time 0.001 (0.038)	Loss 2.9720 (3.0644)	Entropy 1.45528 (1.45806)	Top-1 acc 49.219 (50.848)	Top-5 acc 75.391 (73.682)	lr 0.02128
Train [31][850/3239]	Time 0.232 (0.568)	Data Time 0.001 (0.037)	Loss 2.8982 (3.0642)	Entropy 1.45529 (1.45803)	Top-1 acc 53.125 (50.846)	Top-5 acc 76.562 (73.685)	lr 0.02128
Train [31][860/3239]	Time 0.240 (0.566)	Data Time 0.002 (0.037)	Loss 3.1742 (3.0634)	Entropy 1.45533 (1.45799)	Top-1 acc 51.953 (50.864)	Top-5 acc 73.047 (73.702)	lr 0.02128
Train [31][870/3239]	Time 0.240 (0.616)	Data Time 0.002 (0.036)	Loss 3.2932 (3.0637)	Entropy 1.45525 (1.45796)	Top-1 acc 46.875 (50.853)	Top-5 acc 67.969 (73.699)	lr 0.02128
Train [31][880/3239]	Time 0.265 (0.614)	Data Time 0.002 (0.036)	Loss 2.9067 (3.0634)	Entropy 1.45515 (1.45793)	Top-1 acc 57.812 (50.878)	Top-5 acc 75.781 (73.708)	lr 0.02128
Train [31][890/3239]	Time 2.458 (0.612)	Data Time 0.002 (0.036)	Loss 3.2076 (3.0628)	Entropy 1.45515 (1.45790)	Top-1 acc 48.047 (50.889)	Top-5 acc 71.484 (73.716)	lr 0.02128
Train [31][900/3239]	Time 0.237 (0.608)	Data Time 0.002 (0.035)	Loss 3.0406 (3.0628)	Entropy 1.45518 (1.45787)	Top-1 acc 49.219 (50.890)	Top-5 acc 75.781 (73.722)	lr 0.02127
Train [31][910/3239]	Time 0.227 (0.606)	Data Time 0.001 (0.035)	Loss 3.0337 (3.0625)	Entropy 1.45515 (1.45784)	Top-1 acc 49.609 (50.888)	Top-5 acc 75.781 (73.732)	lr 0.02127
Train [31][920/3239]	Time 0.223 (0.605)	Data Time 0.001 (0.035)	Loss 2.8918 (3.0621)	Entropy 1.45509 (1.45781)	Top-1 acc 55.078 (50.900)	Top-5 acc 76.172 (73.737)	lr 0.02127
Train [31][930/3239]	Time 0.310 (0.603)	Data Time 0.001 (0.034)	Loss 3.1203 (3.0623)	Entropy 1.45500 (1.45778)	Top-1 acc 49.609 (50.896)	Top-5 acc 68.359 (73.717)	lr 0.02127
Train [31][940/3239]	Time 0.233 (0.602)	Data Time 0.001 (0.034)	Loss 3.0832 (3.0630)	Entropy 1.45484 (1.45775)	Top-1 acc 46.875 (50.875)	Top-5 acc 73.438 (73.691)	lr 0.02127
Train [31][950/3239]	Time 0.214 (0.600)	Data Time 0.002 (0.034)	Loss 2.9668 (3.0637)	Entropy 1.45482 (1.45772)	Top-1 acc 51.953 (50.847)	Top-5 acc 76.953 (73.679)	lr 0.02127
Train [31][960/3239]	Time 0.211 (0.599)	Data Time 0.002 (0.033)	Loss 3.0310 (3.0635)	Entropy 1.45469 (1.45769)	Top-1 acc 52.734 (50.857)	Top-5 acc 72.266 (73.682)	lr 0.02127
Train [31][970/3239]	Time 0.282 (0.597)	Data Time 0.001 (0.033)	Loss 2.8979 (3.0629)	Entropy 1.45470 (1.45766)	Top-1 acc 51.562 (50.870)	Top-5 acc 78.125 (73.686)	lr 0.02127
Train [31][980/3239]	Time 0.236 (0.596)	Data Time 0.001 (0.033)	Loss 3.1691 (3.0625)	Entropy 1.45460 (1.45763)	Top-1 acc 50.000 (50.873)	Top-5 acc 72.266 (73.697)	lr 0.02127
Train [31][990/3239]	Time 0.228 (0.595)	Data Time 0.001 (0.032)	Loss 3.1166 (3.0627)	Entropy 1.45457 (1.45760)	Top-1 acc 51.172 (50.866)	Top-5 acc 73.438 (73.694)	lr 0.02127
Train [31][1000/3239]	Time 2.395 (0.593)	Data Time 0.001 (0.032)	Loss 2.7707 (3.0619)	Entropy 1.45457 (1.45757)	Top-1 acc 58.984 (50.873)	Top-5 acc 78.906 (73.711)	lr 0.02127
Train [31][1010/3239]	Time 0.238 (0.590)	Data Time 0.001 (0.032)	Loss 2.9039 (3.0621)	Entropy 1.45449 (1.45754)	Top-1 acc 54.688 (50.875)	Top-5 acc 77.734 (73.706)	lr 0.02127
Train [31][1020/3239]	Time 0.209 (0.588)	Data Time 0.001 (0.031)	Loss 2.9541 (3.0625)	Entropy 1.45433 (1.45751)	Top-1 acc 52.344 (50.861)	Top-5 acc 77.734 (73.707)	lr 0.02127
Train [31][1030/3239]	Time 0.226 (0.587)	Data Time 0.001 (0.031)	Loss 2.9153 (3.0629)	Entropy 1.45421 (1.45747)	Top-1 acc 50.000 (50.847)	Top-5 acc 77.734 (73.701)	lr 0.02126
Train [31][1040/3239]	Time 0.319 (0.586)	Data Time 0.001 (0.031)	Loss 2.9283 (3.0630)	Entropy 1.45417 (1.45744)	Top-1 acc 51.172 (50.837)	Top-5 acc 77.734 (73.704)	lr 0.02126
Train [31][1050/3239]	Time 0.223 (0.584)	Data Time 0.001 (0.031)	Loss 3.2155 (3.0629)	Entropy 1.45403 (1.45741)	Top-1 acc 47.266 (50.832)	Top-5 acc 69.141 (73.713)	lr 0.02126
Train [31][1060/3239]	Time 0.216 (0.583)	Data Time 0.001 (0.030)	Loss 3.0489 (3.0636)	Entropy 1.45401 (1.45738)	Top-1 acc 51.953 (50.813)	Top-5 acc 75.391 (73.706)	lr 0.02126
Train [31][1070/3239]	Time 0.206 (0.582)	Data Time 0.001 (0.030)	Loss 3.3169 (3.0638)	Entropy 1.45391 (1.45735)	Top-1 acc 44.531 (50.806)	Top-5 acc 67.969 (73.696)	lr 0.02126
Train [31][1080/3239]	Time 0.215 (0.580)	Data Time 0.001 (0.030)	Loss 2.9421 (3.0639)	Entropy 1.45391 (1.45732)	Top-1 acc 54.297 (50.803)	Top-5 acc 75.391 (73.699)	lr 0.02126
Train [31][1090/3239]	Time 0.280 (0.579)	Data Time 0.001 (0.029)	Loss 3.1534 (3.0640)	Entropy 1.45389 (1.45728)	Top-1 acc 50.000 (50.805)	Top-5 acc 69.531 (73.693)	lr 0.02126
Train [31][1100/3239]	Time 0.210 (0.578)	Data Time 0.001 (0.029)	Loss 2.9419 (3.0635)	Entropy 1.45391 (1.45725)	Top-1 acc 50.000 (50.816)	Top-5 acc 75.391 (73.697)	lr 0.02126
Train [31][1110/3239]	Time 2.309 (0.577)	Data Time 0.001 (0.029)	Loss 3.0634 (3.0631)	Entropy 1.45391 (1.45722)	Top-1 acc 47.656 (50.827)	Top-5 acc 74.219 (73.704)	lr 0.02126
Train [31][1120/3239]	Time 0.234 (0.574)	Data Time 0.001 (0.029)	Loss 3.2539 (3.0640)	Entropy 1.45380 (1.45719)	Top-1 acc 46.484 (50.796)	Top-5 acc 69.141 (73.685)	lr 0.02126
Train [31][1130/3239]	Time 0.229 (0.573)	Data Time 0.001 (0.028)	Loss 3.0313 (3.0642)	Entropy 1.45366 (1.45716)	Top-1 acc 50.391 (50.776)	Top-5 acc 73.047 (73.679)	lr 0.02126
Train [31][1140/3239]	Time 0.231 (0.572)	Data Time 0.001 (0.028)	Loss 2.8397 (3.0642)	Entropy 1.45364 (1.45713)	Top-1 acc 54.297 (50.770)	Top-5 acc 78.125 (73.678)	lr 0.02126
Train [31][1150/3239]	Time 0.352 (0.571)	Data Time 0.001 (0.028)	Loss 3.0980 (3.0644)	Entropy 1.45361 (1.45710)	Top-1 acc 48.438 (50.757)	Top-5 acc 75.781 (73.675)	lr 0.02126
Train [31][1160/3239]	Time 0.208 (0.570)	Data Time 0.001 (0.028)	Loss 2.9070 (3.0641)	Entropy 1.45355 (1.45707)	Top-1 acc 53.906 (50.761)	Top-5 acc 77.344 (73.689)	lr 0.02126
Train [31][1170/3239]	Time 0.213 (0.569)	Data Time 0.001 (0.028)	Loss 3.0293 (3.0641)	Entropy 1.45350 (1.45704)	Top-1 acc 51.953 (50.756)	Top-5 acc 72.656 (73.690)	lr 0.02125
Train [31][1180/3239]	Time 0.209 (0.568)	Data Time 0.001 (0.027)	Loss 3.2572 (3.0642)	Entropy 1.45347 (1.45701)	Top-1 acc 47.656 (50.751)	Top-5 acc 69.922 (73.685)	lr 0.02125
Train [31][1190/3239]	Time 0.170 (0.567)	Data Time 0.001 (0.027)	Loss 2.8913 (3.0640)	Entropy 1.45339 (1.45698)	Top-1 acc 55.469 (50.751)	Top-5 acc 77.344 (73.685)	lr 0.02125
Train [31][1200/3239]	Time 0.203 (0.566)	Data Time 0.001 (0.027)	Loss 3.1827 (3.0643)	Entropy 1.45332 (1.45695)	Top-1 acc 44.141 (50.731)	Top-5 acc 71.484 (73.679)	lr 0.02125
Train [31][1210/3239]	Time 0.209 (0.565)	Data Time 0.001 (0.027)	Loss 3.1177 (3.0648)	Entropy 1.45324 (1.45692)	Top-1 acc 51.562 (50.730)	Top-5 acc 70.312 (73.665)	lr 0.02125
Train [31][1220/3239]	Time 2.318 (0.564)	Data Time 0.001 (0.027)	Loss 2.9508 (3.0644)	Entropy 1.45324 (1.45689)	Top-1 acc 55.078 (50.738)	Top-5 acc 75.391 (73.667)	lr 0.02125
Train [31][1230/3239]	Time 0.225 (0.561)	Data Time 0.001 (0.026)	Loss 3.1042 (3.0642)	Entropy 1.45325 (1.45686)	Top-1 acc 49.219 (50.738)	Top-5 acc 72.266 (73.667)	lr 0.02125
Train [31][1240/3239]	Time 0.233 (0.596)	Data Time 0.002 (0.026)	Loss 3.1046 (3.0639)	Entropy 1.45317 (1.45683)	Top-1 acc 47.266 (50.740)	Top-5 acc 75.781 (73.674)	lr 0.02125
Train [31][1250/3239]	Time 0.227 (0.595)	Data Time 0.002 (0.026)	Loss 2.9541 (3.0640)	Entropy 1.45317 (1.45680)	Top-1 acc 53.906 (50.745)	Top-5 acc 74.609 (73.673)	lr 0.02125
Train [31][1260/3239]	Time 0.215 (0.594)	Data Time 0.001 (0.026)	Loss 3.2152 (3.0645)	Entropy 1.45313 (1.45677)	Top-1 acc 48.047 (50.730)	Top-5 acc 73.828 (73.668)	lr 0.02125
Train [31][1270/3239]	Time 0.222 (0.593)	Data Time 0.001 (0.026)	Loss 3.0111 (3.0640)	Entropy 1.45290 (1.45674)	Top-1 acc 50.000 (50.743)	Top-5 acc 74.609 (73.674)	lr 0.02125
Train [31][1280/3239]	Time 0.205 (0.592)	Data Time 0.001 (0.025)	Loss 3.2694 (3.0638)	Entropy 1.45282 (1.45671)	Top-1 acc 43.750 (50.743)	Top-5 acc 67.578 (73.679)	lr 0.02125
Train [31][1290/3239]	Time 0.218 (0.591)	Data Time 0.001 (0.025)	Loss 2.7753 (3.0631)	Entropy 1.45280 (1.45668)	Top-1 acc 60.156 (50.761)	Top-5 acc 78.906 (73.694)	lr 0.02125
Train [31][1300/3239]	Time 0.206 (0.590)	Data Time 0.001 (0.025)	Loss 3.0010 (3.0627)	Entropy 1.45275 (1.45665)	Top-1 acc 53.906 (50.770)	Top-5 acc 74.219 (73.706)	lr 0.02125
Train [31][1310/3239]	Time 0.224 (0.589)	Data Time 0.002 (0.025)	Loss 2.9620 (3.0625)	Entropy 1.45271 (1.45662)	Top-1 acc 50.000 (50.774)	Top-5 acc 77.344 (73.709)	lr 0.02124
Train [31][1320/3239]	Time 0.390 (0.588)	Data Time 0.001 (0.025)	Loss 3.3057 (3.0627)	Entropy 1.45264 (1.45659)	Top-1 acc 42.188 (50.764)	Top-5 acc 70.312 (73.703)	lr 0.02124
Train [31][1330/3239]	Time 2.452 (0.587)	Data Time 0.001 (0.024)	Loss 3.2112 (3.0625)	Entropy 1.45264 (1.45656)	Top-1 acc 49.219 (50.767)	Top-5 acc 69.531 (73.710)	lr 0.02124
Train [31][1340/3239]	Time 0.264 (0.584)	Data Time 0.001 (0.024)	Loss 3.0243 (3.0626)	Entropy 1.45264 (1.45653)	Top-1 acc 51.562 (50.768)	Top-5 acc 72.656 (73.706)	lr 0.02124
Train [31][1350/3239]	Time 0.240 (0.583)	Data Time 0.001 (0.024)	Loss 3.0988 (3.0626)	Entropy 1.45258 (1.45650)	Top-1 acc 47.656 (50.762)	Top-5 acc 70.312 (73.699)	lr 0.02124
Train [31][1360/3239]	Time 0.200 (0.582)	Data Time 0.001 (0.024)	Loss 3.4691 (3.0630)	Entropy 1.45256 (1.45648)	Top-1 acc 43.750 (50.756)	Top-5 acc 65.625 (73.692)	lr 0.02124
Train [31][1370/3239]	Time 0.372 (0.581)	Data Time 0.001 (0.024)	Loss 3.1985 (3.0632)	Entropy 1.45255 (1.45645)	Top-1 acc 48.828 (50.752)	Top-5 acc 67.578 (73.683)	lr 0.02124
Train [31][1380/3239]	Time 0.214 (0.580)	Data Time 0.001 (0.024)	Loss 2.9344 (3.0625)	Entropy 1.45249 (1.45642)	Top-1 acc 53.516 (50.769)	Top-5 acc 76.953 (73.693)	lr 0.02124
Train [31][1390/3239]	Time 0.226 (0.579)	Data Time 0.001 (0.024)	Loss 2.9419 (3.0622)	Entropy 1.45248 (1.45639)	Top-1 acc 52.734 (50.773)	Top-5 acc 75.391 (73.693)	lr 0.02124
Train [31][1400/3239]	Time 0.214 (0.578)	Data Time 0.001 (0.023)	Loss 3.1126 (3.0626)	Entropy 1.45238 (1.45636)	Top-1 acc 49.219 (50.765)	Top-5 acc 72.656 (73.690)	lr 0.02124
Train [31][1410/3239]	Time 0.233 (0.577)	Data Time 0.001 (0.023)	Loss 3.0233 (3.0624)	Entropy 1.45249 (1.45633)	Top-1 acc 52.734 (50.767)	Top-5 acc 76.562 (73.692)	lr 0.02124
Train [31][1420/3239]	Time 0.225 (0.576)	Data Time 0.002 (0.023)	Loss 2.9919 (3.0626)	Entropy 1.45248 (1.45631)	Top-1 acc 50.781 (50.761)	Top-5 acc 73.438 (73.688)	lr 0.02124
Train [31][1430/3239]	Time 0.310 (0.575)	Data Time 0.001 (0.023)	Loss 3.1469 (3.0626)	Entropy 1.45248 (1.45628)	Top-1 acc 50.391 (50.766)	Top-5 acc 72.656 (73.685)	lr 0.02124
Train [31][1440/3239]	Time 2.251 (0.574)	Data Time 0.001 (0.023)	Loss 3.1719 (3.0629)	Entropy 1.45248 (1.45625)	Top-1 acc 47.656 (50.761)	Top-5 acc 70.312 (73.676)	lr 0.02124
Train [31][1450/3239]	Time 0.218 (0.572)	Data Time 0.001 (0.023)	Loss 3.0370 (3.0630)	Entropy 1.45239 (1.45623)	Top-1 acc 52.344 (50.761)	Top-5 acc 73.047 (73.673)	lr 0.02123
Train [31][1460/3239]	Time 0.229 (0.571)	Data Time 0.001 (0.022)	Loss 3.0571 (3.0633)	Entropy 1.45232 (1.45620)	Top-1 acc 48.828 (50.749)	Top-5 acc 73.047 (73.658)	lr 0.02123
Train [31][1470/3239]	Time 0.227 (0.570)	Data Time 0.001 (0.022)	Loss 3.1431 (3.0634)	Entropy 1.45222 (1.45617)	Top-1 acc 49.609 (50.753)	Top-5 acc 72.266 (73.663)	lr 0.02123
Train [31][1480/3239]	Time 0.233 (0.570)	Data Time 0.001 (0.022)	Loss 3.0040 (3.0635)	Entropy 1.45207 (1.45615)	Top-1 acc 50.391 (50.753)	Top-5 acc 77.344 (73.660)	lr 0.02123
Train [31][1490/3239]	Time 0.234 (0.569)	Data Time 0.001 (0.022)	Loss 3.0325 (3.0637)	Entropy 1.45191 (1.45612)	Top-1 acc 50.391 (50.745)	Top-5 acc 73.828 (73.657)	lr 0.02123
Train [31][1500/3239]	Time 0.215 (0.568)	Data Time 0.001 (0.022)	Loss 3.1206 (3.0631)	Entropy 1.45187 (1.45609)	Top-1 acc 47.266 (50.754)	Top-5 acc 72.656 (73.665)	lr 0.02123
Train [31][1510/3239]	Time 0.216 (0.568)	Data Time 0.001 (0.022)	Loss 2.9151 (3.0630)	Entropy 1.45181 (1.45606)	Top-1 acc 56.250 (50.756)	Top-5 acc 74.609 (73.666)	lr 0.02123
Train [31][1520/3239]	Time 0.202 (0.567)	Data Time 0.001 (0.022)	Loss 3.1599 (3.0637)	Entropy 1.45169 (1.45603)	Top-1 acc 49.609 (50.747)	Top-5 acc 71.094 (73.656)	lr 0.02123
Train [31][1530/3239]	Time 0.224 (0.566)	Data Time 0.002 (0.022)	Loss 2.9955 (3.0643)	Entropy 1.45161 (1.45600)	Top-1 acc 54.297 (50.738)	Top-5 acc 75.781 (73.645)	lr 0.02123
Train [31][1540/3239]	Time 0.343 (0.565)	Data Time 0.002 (0.021)	Loss 3.2136 (3.0644)	Entropy 1.45137 (1.45598)	Top-1 acc 46.094 (50.739)	Top-5 acc 70.312 (73.639)	lr 0.02123
Train [31][1550/3239]	Time 2.426 (0.564)	Data Time 0.001 (0.021)	Loss 3.0010 (3.0644)	Entropy 1.45137 (1.45595)	Top-1 acc 53.906 (50.736)	Top-5 acc 74.609 (73.637)	lr 0.02123
Train [31][1560/3239]	Time 0.199 (0.562)	Data Time 0.001 (0.021)	Loss 3.0683 (3.0649)	Entropy 1.45129 (1.45592)	Top-1 acc 51.953 (50.729)	Top-5 acc 73.438 (73.634)	lr 0.02123
Train [31][1570/3239]	Time 0.234 (0.562)	Data Time 0.001 (0.021)	Loss 2.9364 (3.0651)	Entropy 1.45131 (1.45589)	Top-1 acc 49.609 (50.722)	Top-5 acc 75.000 (73.629)	lr 0.02123
Train [31][1580/3239]	Time 0.226 (0.561)	Data Time 0.001 (0.021)	Loss 3.1229 (3.0651)	Entropy 1.45126 (1.45586)	Top-1 acc 48.047 (50.720)	Top-5 acc 71.484 (73.627)	lr 0.02123
Train [31][1590/3239]	Time 0.217 (0.560)	Data Time 0.001 (0.021)	Loss 3.0053 (3.0652)	Entropy 1.45124 (1.45583)	Top-1 acc 51.172 (50.724)	Top-5 acc 74.609 (73.627)	lr 0.02122
Train [31][1600/3239]	Time 0.222 (0.587)	Data Time 0.002 (0.021)	Loss 2.9963 (3.0651)	Entropy 1.45120 (1.45580)	Top-1 acc 53.516 (50.734)	Top-5 acc 75.781 (73.628)	lr 0.02122
Train [31][1610/3239]	Time 0.238 (0.586)	Data Time 0.002 (0.021)	Loss 3.0241 (3.0648)	Entropy 1.45108 (1.45577)	Top-1 acc 49.219 (50.742)	Top-5 acc 71.875 (73.631)	lr 0.02122
Train [31][1620/3239]	Time 0.243 (0.586)	Data Time 0.002 (0.020)	Loss 3.0610 (3.0650)	Entropy 1.45097 (1.45574)	Top-1 acc 48.828 (50.736)	Top-5 acc 74.219 (73.633)	lr 0.02122
Train [31][1630/3239]	Time 0.247 (0.585)	Data Time 0.002 (0.020)	Loss 3.1698 (3.0650)	Entropy 1.45085 (1.45571)	Top-1 acc 48.438 (50.733)	Top-5 acc 70.703 (73.632)	lr 0.02122
Train [31][1640/3239]	Time 0.238 (0.584)	Data Time 0.002 (0.020)	Loss 3.0449 (3.0648)	Entropy 1.45073 (1.45568)	Top-1 acc 51.953 (50.738)	Top-5 acc 72.266 (73.636)	lr 0.02122
Train [31][1650/3239]	Time 0.224 (0.584)	Data Time 0.002 (0.020)	Loss 2.9345 (3.0649)	Entropy 1.45068 (1.45565)	Top-1 acc 57.031 (50.739)	Top-5 acc 77.344 (73.636)	lr 0.02122
Train [31][1660/3239]	Time 2.316 (0.583)	Data Time 0.001 (0.020)	Loss 3.1222 (3.0644)	Entropy 1.45068 (1.45562)	Top-1 acc 49.609 (50.749)	Top-5 acc 72.656 (73.650)	lr 0.02122
Train [31][1670/3239]	Time 0.208 (0.581)	Data Time 0.001 (0.020)	Loss 3.0380 (3.0648)	Entropy 1.45067 (1.45559)	Top-1 acc 50.000 (50.731)	Top-5 acc 74.609 (73.639)	lr 0.02122
Train [31][1680/3239]	Time 0.230 (0.580)	Data Time 0.001 (0.020)	Loss 3.2448 (3.0651)	Entropy 1.45069 (1.45556)	Top-1 acc 48.828 (50.724)	Top-5 acc 70.312 (73.633)	lr 0.02122
Train [31][1690/3239]	Time 0.234 (0.579)	Data Time 0.002 (0.020)	Loss 3.2669 (3.0651)	Entropy 1.45065 (1.45554)	Top-1 acc 45.312 (50.721)	Top-5 acc 68.750 (73.632)	lr 0.02122
Train [31][1700/3239]	Time 0.321 (0.579)	Data Time 0.001 (0.020)	Loss 3.0258 (3.0651)	Entropy 1.45057 (1.45551)	Top-1 acc 50.000 (50.722)	Top-5 acc 73.438 (73.633)	lr 0.02122
Train [31][1710/3239]	Time 0.276 (0.578)	Data Time 0.002 (0.019)	Loss 3.1276 (3.0652)	Entropy 1.45051 (1.45548)	Top-1 acc 48.047 (50.718)	Top-5 acc 73.438 (73.630)	lr 0.02122
Train [31][1720/3239]	Time 0.207 (0.578)	Data Time 0.001 (0.019)	Loss 3.2158 (3.0647)	Entropy 1.45051 (1.45545)	Top-1 acc 46.094 (50.723)	Top-5 acc 69.531 (73.638)	lr 0.02122
Train [31][1730/3239]	Time 0.232 (0.577)	Data Time 0.002 (0.019)	Loss 3.0806 (3.0646)	Entropy 1.45046 (1.45542)	Top-1 acc 47.266 (50.724)	Top-5 acc 75.000 (73.644)	lr 0.02121
Train [31][1740/3239]	Time 0.221 (0.576)	Data Time 0.002 (0.019)	Loss 3.0035 (3.0645)	Entropy 1.45040 (1.45539)	Top-1 acc 54.297 (50.731)	Top-5 acc 75.781 (73.645)	lr 0.02121
Train [31][1750/3239]	Time 0.228 (0.576)	Data Time 0.001 (0.019)	Loss 3.1349 (3.0644)	Entropy 1.45056 (1.45536)	Top-1 acc 49.219 (50.732)	Top-5 acc 71.875 (73.643)	lr 0.02121
Train [31][1760/3239]	Time 0.363 (0.575)	Data Time 0.001 (0.019)	Loss 2.8818 (3.0642)	Entropy 1.45056 (1.45534)	Top-1 acc 55.078 (50.736)	Top-5 acc 76.172 (73.648)	lr 0.02121
Train [31][1770/3239]	Time 2.420 (0.575)	Data Time 0.002 (0.019)	Loss 3.0604 (3.0642)	Entropy 1.45056 (1.45531)	Top-1 acc 51.562 (50.740)	Top-5 acc 71.875 (73.643)	lr 0.02121
Train [31][1780/3239]	Time 0.256 (0.573)	Data Time 0.002 (0.019)	Loss 3.1485 (3.0640)	Entropy 1.45049 (1.45528)	Top-1 acc 50.391 (50.749)	Top-5 acc 71.875 (73.645)	lr 0.02121
Train [31][1790/3239]	Time 0.218 (0.572)	Data Time 0.002 (0.019)	Loss 3.0204 (3.0642)	Entropy 1.45038 (1.45525)	Top-1 acc 50.391 (50.745)	Top-5 acc 74.609 (73.638)	lr 0.02121
Train [31][1800/3239]	Time 0.235 (0.571)	Data Time 0.001 (0.019)	Loss 3.1224 (3.0643)	Entropy 1.45035 (1.45523)	Top-1 acc 48.828 (50.736)	Top-5 acc 75.781 (73.636)	lr 0.02121
Train [31][1810/3239]	Time 0.234 (0.571)	Data Time 0.001 (0.018)	Loss 2.8743 (3.0640)	Entropy 1.45032 (1.45520)	Top-1 acc 54.688 (50.742)	Top-5 acc 75.000 (73.639)	lr 0.02121
Train [31][1820/3239]	Time 0.217 (0.570)	Data Time 0.001 (0.018)	Loss 3.0286 (3.0642)	Entropy 1.45024 (1.45517)	Top-1 acc 50.000 (50.738)	Top-5 acc 76.562 (73.633)	lr 0.02121
Train [31][1830/3239]	Time 0.216 (0.570)	Data Time 0.001 (0.018)	Loss 2.8440 (3.0641)	Entropy 1.45013 (1.45515)	Top-1 acc 54.297 (50.739)	Top-5 acc 77.344 (73.633)	lr 0.02121
Train [31][1840/3239]	Time 0.209 (0.569)	Data Time 0.001 (0.018)	Loss 2.9688 (3.0641)	Entropy 1.45000 (1.45512)	Top-1 acc 49.609 (50.741)	Top-5 acc 71.875 (73.631)	lr 0.02121
Train [31][1850/3239]	Time 0.207 (0.568)	Data Time 0.001 (0.018)	Loss 2.9952 (3.0641)	Entropy 1.44994 (1.45509)	Top-1 acc 52.734 (50.740)	Top-5 acc 74.609 (73.628)	lr 0.02121
Train [31][1860/3239]	Time 0.217 (0.568)	Data Time 0.002 (0.018)	Loss 2.7745 (3.0640)	Entropy 1.44994 (1.45506)	Top-1 acc 57.031 (50.735)	Top-5 acc 80.078 (73.633)	lr 0.02121
Train [31][1870/3239]	Time 0.322 (0.567)	Data Time 0.002 (0.018)	Loss 3.0055 (3.0637)	Entropy 1.44984 (1.45504)	Top-1 acc 53.906 (50.742)	Top-5 acc 74.609 (73.636)	lr 0.02120
Train [31][1880/3239]	Time 2.436 (0.567)	Data Time 0.002 (0.018)	Loss 3.0499 (3.0640)	Entropy 1.44984 (1.45501)	Top-1 acc 50.781 (50.737)	Top-5 acc 73.047 (73.628)	lr 0.02120
Train [31][1890/3239]	Time 0.205 (0.565)	Data Time 0.001 (0.018)	Loss 3.0092 (3.0644)	Entropy 1.44982 (1.45498)	Top-1 acc 52.734 (50.724)	Top-5 acc 76.172 (73.617)	lr 0.02120
Train [31][1900/3239]	Time 0.232 (0.564)	Data Time 0.001 (0.018)	Loss 3.1120 (3.0645)	Entropy 1.44970 (1.45495)	Top-1 acc 53.125 (50.725)	Top-5 acc 72.266 (73.616)	lr 0.02120
Train [31][1910/3239]	Time 0.212 (0.563)	Data Time 0.001 (0.018)	Loss 2.8269 (3.0646)	Entropy 1.44970 (1.45492)	Top-1 acc 54.688 (50.721)	Top-5 acc 76.172 (73.608)	lr 0.02120
Train [31][1920/3239]	Time 0.168 (0.563)	Data Time 0.001 (0.017)	Loss 2.8954 (3.0647)	Entropy 1.44964 (1.45490)	Top-1 acc 51.953 (50.715)	Top-5 acc 76.562 (73.607)	lr 0.02120
Train [31][1930/3239]	Time 0.230 (0.562)	Data Time 0.001 (0.017)	Loss 3.0790 (3.0645)	Entropy 1.44958 (1.45487)	Top-1 acc 53.906 (50.723)	Top-5 acc 72.656 (73.611)	lr 0.02120
Train [31][1940/3239]	Time 0.220 (0.562)	Data Time 0.001 (0.017)	Loss 2.9892 (3.0645)	Entropy 1.44953 (1.45484)	Top-1 acc 50.781 (50.722)	Top-5 acc 77.734 (73.618)	lr 0.02120
Train [31][1950/3239]	Time 0.214 (0.561)	Data Time 0.003 (0.017)	Loss 3.1900 (3.0647)	Entropy 1.44944 (1.45482)	Top-1 acc 45.703 (50.713)	Top-5 acc 72.656 (73.614)	lr 0.02120
Train [31][1960/3239]	Time 0.226 (0.584)	Data Time 0.002 (0.017)	Loss 3.0389 (3.0650)	Entropy 1.44939 (1.45479)	Top-1 acc 50.391 (50.711)	Top-5 acc 75.391 (73.607)	lr 0.02120
Train [31][1970/3239]	Time 0.203 (0.584)	Data Time 0.002 (0.017)	Loss 3.1810 (3.0653)	Entropy 1.44927 (1.45476)	Top-1 acc 45.703 (50.703)	Top-5 acc 72.656 (73.604)	lr 0.02120
Train [31][1980/3239]	Time 0.337 (0.583)	Data Time 0.002 (0.017)	Loss 3.0880 (3.0654)	Entropy 1.44924 (1.45473)	Top-1 acc 49.219 (50.701)	Top-5 acc 74.219 (73.605)	lr 0.02120
Train [31][1990/3239]	Time 2.388 (0.582)	Data Time 0.001 (0.017)	Loss 2.9546 (3.0654)	Entropy 1.44924 (1.45470)	Top-1 acc 54.297 (50.699)	Top-5 acc 75.000 (73.596)	lr 0.02120
Train [31][2000/3239]	Time 0.251 (0.581)	Data Time 0.001 (0.017)	Loss 2.8899 (3.0656)	Entropy 1.44918 (1.45468)	Top-1 acc 56.250 (50.694)	Top-5 acc 74.609 (73.591)	lr 0.02119
Train [31][2010/3239]	Time 0.226 (0.580)	Data Time 0.001 (0.017)	Loss 2.6863 (3.0653)	Entropy 1.44906 (1.45465)	Top-1 acc 55.469 (50.695)	Top-5 acc 81.250 (73.595)	lr 0.02119
Train [31][2020/3239]	Time 0.216 (0.579)	Data Time 0.001 (0.017)	Loss 3.2857 (3.0657)	Entropy 1.44894 (1.45462)	Top-1 acc 49.219 (50.696)	Top-5 acc 71.484 (73.588)	lr 0.02119
Train [31][2030/3239]	Time 0.300 (0.579)	Data Time 0.001 (0.017)	Loss 3.0381 (3.0657)	Entropy 1.44895 (1.45459)	Top-1 acc 51.172 (50.696)	Top-5 acc 73.438 (73.588)	lr 0.02119
Train [31][2040/3239]	Time 0.237 (0.578)	Data Time 0.001 (0.017)	Loss 2.8650 (3.0656)	Entropy 1.44879 (1.45457)	Top-1 acc 55.859 (50.697)	Top-5 acc 77.734 (73.590)	lr 0.02119
Train [31][2050/3239]	Time 0.172 (0.577)	Data Time 0.001 (0.017)	Loss 3.0853 (3.0657)	Entropy 1.44862 (1.45454)	Top-1 acc 51.172 (50.702)	Top-5 acc 72.266 (73.591)	lr 0.02119
Train [31][2060/3239]	Time 0.229 (0.577)	Data Time 0.001 (0.016)	Loss 3.1126 (3.0654)	Entropy 1.44850 (1.45451)	Top-1 acc 50.391 (50.706)	Top-5 acc 71.875 (73.594)	lr 0.02119
Train [31][2070/3239]	Time 0.210 (0.576)	Data Time 0.001 (0.016)	Loss 3.2711 (3.0657)	Entropy 1.44850 (1.45448)	Top-1 acc 42.969 (50.693)	Top-5 acc 71.484 (73.590)	lr 0.02119
Train [31][2080/3239]	Time 0.282 (0.576)	Data Time 0.001 (0.016)	Loss 3.1830 (3.0657)	Entropy 1.44840 (1.45445)	Top-1 acc 44.922 (50.693)	Top-5 acc 72.266 (73.590)	lr 0.02119
Train [31][2090/3239]	Time 0.242 (0.575)	Data Time 0.002 (0.016)	Loss 3.0540 (3.0658)	Entropy 1.44828 (1.45442)	Top-1 acc 51.562 (50.693)	Top-5 acc 75.781 (73.587)	lr 0.02119
Train [31][2100/3239]	Time 2.431 (0.575)	Data Time 0.001 (0.016)	Loss 3.0625 (3.0657)	Entropy 1.44828 (1.45439)	Top-1 acc 52.344 (50.696)	Top-5 acc 73.438 (73.589)	lr 0.02119
Train [31][2110/3239]	Time 0.253 (0.573)	Data Time 0.001 (0.016)	Loss 3.2565 (3.0655)	Entropy 1.44786 (1.45436)	Top-1 acc 46.484 (50.703)	Top-5 acc 71.484 (73.593)	lr 0.02119
Train [31][2120/3239]	Time 0.244 (0.572)	Data Time 0.002 (0.016)	Loss 3.2094 (3.0655)	Entropy 1.44773 (1.45433)	Top-1 acc 48.438 (50.705)	Top-5 acc 69.922 (73.593)	lr 0.02119
Train [31][2130/3239]	Time 0.219 (0.572)	Data Time 0.001 (0.016)	Loss 3.1091 (3.0656)	Entropy 1.44759 (1.45430)	Top-1 acc 49.609 (50.704)	Top-5 acc 72.656 (73.591)	lr 0.02119
Train [31][2140/3239]	Time 0.211 (0.571)	Data Time 0.001 (0.016)	Loss 3.1418 (3.0657)	Entropy 1.44757 (1.45427)	Top-1 acc 44.531 (50.698)	Top-5 acc 74.609 (73.590)	lr 0.02118
Train [31][2150/3239]	Time 0.213 (0.571)	Data Time 0.001 (0.016)	Loss 3.0334 (3.0659)	Entropy 1.44751 (1.45424)	Top-1 acc 46.484 (50.691)	Top-5 acc 74.609 (73.589)	lr 0.02118
Train [31][2160/3239]	Time 0.169 (0.570)	Data Time 0.001 (0.016)	Loss 2.9964 (3.0655)	Entropy 1.44743 (1.45420)	Top-1 acc 49.219 (50.700)	Top-5 acc 73.828 (73.599)	lr 0.02118
Train [31][2170/3239]	Time 0.236 (0.570)	Data Time 0.001 (0.016)	Loss 2.9944 (3.0654)	Entropy 1.44743 (1.45417)	Top-1 acc 51.562 (50.700)	Top-5 acc 73.828 (73.600)	lr 0.02118
Train [31][2180/3239]	Time 0.215 (0.569)	Data Time 0.001 (0.016)	Loss 3.0736 (3.0653)	Entropy 1.44733 (1.45414)	Top-1 acc 46.875 (50.699)	Top-5 acc 76.562 (73.603)	lr 0.02118
Train [31][2190/3239]	Time 0.211 (0.568)	Data Time 0.001 (0.016)	Loss 3.0809 (3.0653)	Entropy 1.44729 (1.45411)	Top-1 acc 53.125 (50.698)	Top-5 acc 71.484 (73.605)	lr 0.02118
Train [31][2200/3239]	Time 0.253 (0.568)	Data Time 0.001 (0.016)	Loss 3.1577 (3.0654)	Entropy 1.44725 (1.45408)	Top-1 acc 51.562 (50.698)	Top-5 acc 69.141 (73.602)	lr 0.02118
Train [31][2210/3239]	Time 2.374 (0.567)	Data Time 0.001 (0.015)	Loss 3.1288 (3.0652)	Entropy 1.44725 (1.45405)	Top-1 acc 49.219 (50.703)	Top-5 acc 73.828 (73.606)	lr 0.02118
Train [31][2220/3239]	Time 0.234 (0.566)	Data Time 0.001 (0.015)	Loss 3.1741 (3.0654)	Entropy 1.44718 (1.45402)	Top-1 acc 49.219 (50.701)	Top-5 acc 71.094 (73.599)	lr 0.02118
Train [31][2230/3239]	Time 0.217 (0.565)	Data Time 0.001 (0.015)	Loss 3.0693 (3.0652)	Entropy 1.44717 (1.45399)	Top-1 acc 49.609 (50.703)	Top-5 acc 73.438 (73.600)	lr 0.02118
Train [31][2240/3239]	Time 0.263 (0.565)	Data Time 0.001 (0.015)	Loss 3.0151 (3.0653)	Entropy 1.44713 (1.45396)	Top-1 acc 49.219 (50.700)	Top-5 acc 72.656 (73.594)	lr 0.02118
Train [31][2250/3239]	Time 0.214 (0.564)	Data Time 0.002 (0.015)	Loss 3.2414 (3.0655)	Entropy 1.44707 (1.45393)	Top-1 acc 45.312 (50.695)	Top-5 acc 70.703 (73.590)	lr 0.02118
Train [31][2260/3239]	Time 0.299 (0.564)	Data Time 0.001 (0.015)	Loss 3.0942 (3.0654)	Entropy 1.44701 (1.45390)	Top-1 acc 50.781 (50.694)	Top-5 acc 73.828 (73.593)	lr 0.02118
Train [31][2270/3239]	Time 0.207 (0.563)	Data Time 0.001 (0.015)	Loss 3.1570 (3.0653)	Entropy 1.44700 (1.45387)	Top-1 acc 46.094 (50.697)	Top-5 acc 73.438 (73.597)	lr 0.02118
Train [31][2280/3239]	Time 0.223 (0.563)	Data Time 0.001 (0.015)	Loss 3.0824 (3.0652)	Entropy 1.44676 (1.45383)	Top-1 acc 50.781 (50.696)	Top-5 acc 75.000 (73.598)	lr 0.02117
Train [31][2290/3239]	Time 0.202 (0.562)	Data Time 0.001 (0.015)	Loss 2.9917 (3.0652)	Entropy 1.44669 (1.45380)	Top-1 acc 48.828 (50.697)	Top-5 acc 75.391 (73.597)	lr 0.02117
Train [31][2300/3239]	Time 0.236 (0.562)	Data Time 0.001 (0.015)	Loss 3.2322 (3.0652)	Entropy 1.44663 (1.45377)	Top-1 acc 47.266 (50.700)	Top-5 acc 69.531 (73.600)	lr 0.02117
Train [31][2310/3239]	Time 0.223 (0.561)	Data Time 0.002 (0.015)	Loss 3.0154 (3.0657)	Entropy 1.44648 (1.45374)	Top-1 acc 50.000 (50.694)	Top-5 acc 74.219 (73.590)	lr 0.02117
Train [31][2320/3239]	Time 47.913 (0.580)	Data Time 0.001 (0.015)	Loss 3.1997 (3.0657)	Entropy 1.44648 (1.45371)	Top-1 acc 44.922 (50.688)	Top-5 acc 71.875 (73.589)	lr 0.02117
Train [31][2330/3239]	Time 0.221 (0.579)	Data Time 0.002 (0.015)	Loss 3.1195 (3.0657)	Entropy 1.44617 (1.45368)	Top-1 acc 50.391 (50.691)	Top-5 acc 71.484 (73.590)	lr 0.02117
Train [31][2340/3239]	Time 0.242 (0.578)	Data Time 0.002 (0.015)	Loss 3.1286 (3.0657)	Entropy 1.44616 (1.45365)	Top-1 acc 50.781 (50.694)	Top-5 acc 73.828 (73.587)	lr 0.02117
Train [31][2350/3239]	Time 0.218 (0.578)	Data Time 0.002 (0.015)	Loss 3.0986 (3.0657)	Entropy 1.44613 (1.45361)	Top-1 acc 51.562 (50.691)	Top-5 acc 76.172 (73.589)	lr 0.02117
Train [31][2360/3239]	Time 0.213 (0.577)	Data Time 0.001 (0.015)	Loss 3.3629 (3.0660)	Entropy 1.44603 (1.45358)	Top-1 acc 41.406 (50.682)	Top-5 acc 67.188 (73.584)	lr 0.02117
Train [31][2370/3239]	Time 0.329 (0.577)	Data Time 0.001 (0.015)	Loss 3.0415 (3.0659)	Entropy 1.44599 (1.45355)	Top-1 acc 51.172 (50.684)	Top-5 acc 74.219 (73.585)	lr 0.02117
Train [31][2380/3239]	Time 0.224 (0.576)	Data Time 0.001 (0.015)	Loss 3.0613 (3.0658)	Entropy 1.44588 (1.45352)	Top-1 acc 55.469 (50.688)	Top-5 acc 75.781 (73.590)	lr 0.02117
Train [31][2390/3239]	Time 0.228 (0.576)	Data Time 0.001 (0.014)	Loss 2.9439 (3.0658)	Entropy 1.44583 (1.45349)	Top-1 acc 50.781 (50.691)	Top-5 acc 77.734 (73.591)	lr 0.02117
Train [31][2400/3239]	Time 0.219 (0.575)	Data Time 0.001 (0.014)	Loss 3.2090 (3.0659)	Entropy 1.44569 (1.45345)	Top-1 acc 49.219 (50.694)	Top-5 acc 71.875 (73.586)	lr 0.02117
Train [31][2410/3239]	Time 0.198 (0.575)	Data Time 0.001 (0.014)	Loss 3.0527 (3.0660)	Entropy 1.44560 (1.45342)	Top-1 acc 53.516 (50.695)	Top-5 acc 74.219 (73.586)	lr 0.02117
Train [31][2420/3239]	Time 0.276 (0.574)	Data Time 0.001 (0.014)	Loss 3.1572 (3.0659)	Entropy 1.44560 (1.45339)	Top-1 acc 47.266 (50.699)	Top-5 acc 72.266 (73.593)	lr 0.02116
Train [31][2430/3239]	Time 2.360 (0.574)	Data Time 0.001 (0.014)	Loss 3.0597 (3.0661)	Entropy 1.44560 (1.45336)	Top-1 acc 49.219 (50.696)	Top-5 acc 71.094 (73.591)	lr 0.02116
Train [31][2440/3239]	Time 0.239 (0.572)	Data Time 0.001 (0.014)	Loss 3.0996 (3.0662)	Entropy 1.44559 (1.45333)	Top-1 acc 50.391 (50.695)	Top-5 acc 73.047 (73.589)	lr 0.02116
Train [31][2450/3239]	Time 0.200 (0.572)	Data Time 0.001 (0.014)	Loss 3.1839 (3.0660)	Entropy 1.44560 (1.45329)	Top-1 acc 48.438 (50.697)	Top-5 acc 73.828 (73.592)	lr 0.02116
Train [31][2460/3239]	Time 0.258 (0.571)	Data Time 0.002 (0.014)	Loss 3.2068 (3.0658)	Entropy 1.44561 (1.45326)	Top-1 acc 49.219 (50.700)	Top-5 acc 71.484 (73.592)	lr 0.02116
Train [31][2470/3239]	Time 0.227 (0.571)	Data Time 0.001 (0.014)	Loss 2.8998 (3.0658)	Entropy 1.44555 (1.45323)	Top-1 acc 53.516 (50.700)	Top-5 acc 76.172 (73.588)	lr 0.02116
Train [31][2480/3239]	Time 0.301 (0.571)	Data Time 0.002 (0.014)	Loss 3.0186 (3.0657)	Entropy 1.44548 (1.45320)	Top-1 acc 53.516 (50.702)	Top-5 acc 75.781 (73.591)	lr 0.02116
Train [31][2490/3239]	Time 0.216 (0.570)	Data Time 0.001 (0.014)	Loss 3.2595 (3.0657)	Entropy 1.44538 (1.45317)	Top-1 acc 46.484 (50.704)	Top-5 acc 71.094 (73.589)	lr 0.02116
Train [31][2500/3239]	Time 0.201 (0.570)	Data Time 0.001 (0.014)	Loss 3.0355 (3.0658)	Entropy 1.44533 (1.45314)	Top-1 acc 48.047 (50.702)	Top-5 acc 74.609 (73.590)	lr 0.02116
Train [31][2510/3239]	Time 0.249 (0.569)	Data Time 0.001 (0.014)	Loss 2.9583 (3.0657)	Entropy 1.44533 (1.45311)	Top-1 acc 51.172 (50.702)	Top-5 acc 74.609 (73.591)	lr 0.02116
Train [31][2520/3239]	Time 0.216 (0.569)	Data Time 0.001 (0.014)	Loss 2.9639 (3.0656)	Entropy 1.44523 (1.45308)	Top-1 acc 53.125 (50.703)	Top-5 acc 76.562 (73.594)	lr 0.02116
Train [31][2530/3239]	Time 0.210 (0.568)	Data Time 0.001 (0.014)	Loss 3.2115 (3.0658)	Entropy 1.44515 (1.45305)	Top-1 acc 46.875 (50.698)	Top-5 acc 72.266 (73.591)	lr 0.02116
Train [31][2540/3239]	Time 2.521 (0.568)	Data Time 0.001 (0.014)	Loss 3.1000 (3.0657)	Entropy 1.44515 (1.45301)	Top-1 acc 50.000 (50.697)	Top-5 acc 69.922 (73.591)	lr 0.02116
Train [31][2550/3239]	Time 0.212 (0.567)	Data Time 0.001 (0.014)	Loss 3.0204 (3.0656)	Entropy 1.44511 (1.45298)	Top-1 acc 53.906 (50.696)	Top-5 acc 77.344 (73.596)	lr 0.02115
Train [31][2560/3239]	Time 0.240 (0.566)	Data Time 0.001 (0.014)	Loss 3.1372 (3.0657)	Entropy 1.44510 (1.45295)	Top-1 acc 50.391 (50.694)	Top-5 acc 72.656 (73.596)	lr 0.02115
Train [31][2570/3239]	Time 0.203 (0.566)	Data Time 0.001 (0.014)	Loss 2.9795 (3.0658)	Entropy 1.44505 (1.45292)	Top-1 acc 49.219 (50.690)	Top-5 acc 75.781 (73.590)	lr 0.02115
Train [31][2580/3239]	Time 0.230 (0.565)	Data Time 0.001 (0.014)	Loss 2.8485 (3.0656)	Entropy 1.44488 (1.45289)	Top-1 acc 57.812 (50.695)	Top-5 acc 77.734 (73.594)	lr 0.02115
Train [31][2590/3239]	Time 0.220 (0.565)	Data Time 0.001 (0.013)	Loss 3.1915 (3.0655)	Entropy 1.44486 (1.45286)	Top-1 acc 46.094 (50.695)	Top-5 acc 71.094 (73.596)	lr 0.02115
Train [31][2600/3239]	Time 0.331 (0.565)	Data Time 0.001 (0.013)	Loss 3.0407 (3.0655)	Entropy 1.44480 (1.45283)	Top-1 acc 53.516 (50.692)	Top-5 acc 74.609 (73.597)	lr 0.02115
Train [31][2610/3239]	Time 0.225 (0.564)	Data Time 0.001 (0.013)	Loss 2.9611 (3.0654)	Entropy 1.44477 (1.45280)	Top-1 acc 51.953 (50.698)	Top-5 acc 75.391 (73.598)	lr 0.02115
Train [31][2620/3239]	Time 0.219 (0.564)	Data Time 0.001 (0.013)	Loss 3.1000 (3.0654)	Entropy 1.44470 (1.45277)	Top-1 acc 48.438 (50.695)	Top-5 acc 72.266 (73.597)	lr 0.02115
Train [31][2630/3239]	Time 0.227 (0.563)	Data Time 0.001 (0.013)	Loss 2.9720 (3.0654)	Entropy 1.44470 (1.45274)	Top-1 acc 49.609 (50.694)	Top-5 acc 76.172 (73.602)	lr 0.02115
Train [31][2640/3239]	Time 0.256 (0.563)	Data Time 0.001 (0.013)	Loss 2.8760 (3.0652)	Entropy 1.44466 (1.45271)	Top-1 acc 55.469 (50.699)	Top-5 acc 79.688 (73.605)	lr 0.02115
Train [31][2650/3239]	Time 0.232 (0.562)	Data Time 0.001 (0.013)	Loss 3.0731 (3.0653)	Entropy 1.44460 (1.45268)	Top-1 acc 49.609 (50.698)	Top-5 acc 74.219 (73.603)	lr 0.02115
Train [31][2660/3239]	Time 0.303 (0.562)	Data Time 0.001 (0.013)	Loss 3.0704 (3.0653)	Entropy 1.44450 (1.45265)	Top-1 acc 49.609 (50.703)	Top-5 acc 73.438 (73.605)	lr 0.02115
Train [31][2670/3239]	Time 0.259 (0.562)	Data Time 0.001 (0.013)	Loss 2.9067 (3.0653)	Entropy 1.44444 (1.45261)	Top-1 acc 53.125 (50.704)	Top-5 acc 75.000 (73.605)	lr 0.02115
Train [31][2680/3239]	Time 0.278 (0.576)	Data Time 0.004 (0.013)	Loss 2.9981 (3.0654)	Entropy 1.44432 (1.45258)	Top-1 acc 51.562 (50.701)	Top-5 acc 76.562 (73.604)	lr 0.02115
Train [31][2690/3239]	Time 0.227 (0.576)	Data Time 0.002 (0.013)	Loss 2.9377 (3.0653)	Entropy 1.44422 (1.45255)	Top-1 acc 52.734 (50.704)	Top-5 acc 77.734 (73.605)	lr 0.02114
Train [31][2700/3239]	Time 0.206 (0.575)	Data Time 0.001 (0.013)	Loss 3.0494 (3.0653)	Entropy 1.44416 (1.45252)	Top-1 acc 51.953 (50.705)	Top-5 acc 72.266 (73.606)	lr 0.02114
Train [31][2710/3239]	Time 0.388 (0.575)	Data Time 0.002 (0.013)	Loss 3.0218 (3.0653)	Entropy 1.44411 (1.45249)	Top-1 acc 52.344 (50.704)	Top-5 acc 71.484 (73.607)	lr 0.02114
Train [31][2720/3239]	Time 0.208 (0.575)	Data Time 0.001 (0.013)	Loss 2.9042 (3.0651)	Entropy 1.44412 (1.45246)	Top-1 acc 53.516 (50.704)	Top-5 acc 76.562 (73.609)	lr 0.02114
Train [31][2730/3239]	Time 0.236 (0.574)	Data Time 0.002 (0.013)	Loss 3.2208 (3.0654)	Entropy 1.44407 (1.45243)	Top-1 acc 49.219 (50.702)	Top-5 acc 71.094 (73.601)	lr 0.02114
Train [31][2740/3239]	Time 0.227 (0.574)	Data Time 0.001 (0.013)	Loss 3.1725 (3.0654)	Entropy 1.44393 (1.45240)	Top-1 acc 53.516 (50.704)	Top-5 acc 69.141 (73.599)	lr 0.02114
Train [31][2750/3239]	Time 0.217 (0.573)	Data Time 0.001 (0.013)	Loss 3.1167 (3.0655)	Entropy 1.44388 (1.45237)	Top-1 acc 49.219 (50.700)	Top-5 acc 71.484 (73.600)	lr 0.02114
Train [31][2760/3239]	Time 0.212 (0.573)	Data Time 0.001 (0.013)	Loss 3.1337 (3.0654)	Entropy 1.44386 (1.45234)	Top-1 acc 46.875 (50.707)	Top-5 acc 72.266 (73.600)	lr 0.02114
Train [31][2770/3239]	Time 0.294 (0.572)	Data Time 0.001 (0.013)	Loss 3.0178 (3.0654)	Entropy 1.44378 (1.45231)	Top-1 acc 51.953 (50.709)	Top-5 acc 75.781 (73.600)	lr 0.02114
Train [31][2780/3239]	Time 0.211 (0.572)	Data Time 0.001 (0.013)	Loss 2.8994 (3.0653)	Entropy 1.44379 (1.45228)	Top-1 acc 53.125 (50.708)	Top-5 acc 75.391 (73.602)	lr 0.02114
Train [31][2790/3239]	Time 0.267 (0.571)	Data Time 0.001 (0.013)	Loss 3.0205 (3.0653)	Entropy 1.44369 (1.45225)	Top-1 acc 50.781 (50.710)	Top-5 acc 72.656 (73.598)	lr 0.02114
Train [31][2800/3239]	Time 0.237 (0.571)	Data Time 0.001 (0.013)	Loss 2.8987 (3.0653)	Entropy 1.44359 (1.45222)	Top-1 acc 58.984 (50.715)	Top-5 acc 77.344 (73.600)	lr 0.02114
Train [31][2810/3239]	Time 0.244 (0.570)	Data Time 0.001 (0.013)	Loss 3.2712 (3.0654)	Entropy 1.44355 (1.45219)	Top-1 acc 43.750 (50.708)	Top-5 acc 64.453 (73.595)	lr 0.02114
Train [31][2820/3239]	Time 0.237 (0.570)	Data Time 0.001 (0.013)	Loss 3.1256 (3.0654)	Entropy 1.44345 (1.45215)	Top-1 acc 49.609 (50.705)	Top-5 acc 73.047 (73.595)	lr 0.02114
Train [31][2830/3239]	Time 0.359 (0.569)	Data Time 0.001 (0.012)	Loss 3.0929 (3.0653)	Entropy 1.44346 (1.45212)	Top-1 acc 45.703 (50.710)	Top-5 acc 72.656 (73.597)	lr 0.02113
Train [31][2840/3239]	Time 0.230 (0.569)	Data Time 0.001 (0.012)	Loss 3.1524 (3.0653)	Entropy 1.44344 (1.45209)	Top-1 acc 49.219 (50.708)	Top-5 acc 73.828 (73.598)	lr 0.02113
Train [31][2850/3239]	Time 0.200 (0.569)	Data Time 0.001 (0.012)	Loss 3.0630 (3.0652)	Entropy 1.44358 (1.45206)	Top-1 acc 48.047 (50.708)	Top-5 acc 73.047 (73.599)	lr 0.02113
Train [31][2860/3239]	Time 0.214 (0.568)	Data Time 0.001 (0.012)	Loss 2.9801 (3.0654)	Entropy 1.44353 (1.45203)	Top-1 acc 50.391 (50.707)	Top-5 acc 75.391 (73.594)	lr 0.02113
Train [31][2870/3239]	Time 0.214 (0.568)	Data Time 0.001 (0.012)	Loss 3.1535 (3.0656)	Entropy 1.44345 (1.45200)	Top-1 acc 50.000 (50.702)	Top-5 acc 71.875 (73.591)	lr 0.02113
Train [31][2880/3239]	Time 0.239 (0.567)	Data Time 0.001 (0.012)	Loss 3.0365 (3.0656)	Entropy 1.44341 (1.45197)	Top-1 acc 50.000 (50.700)	Top-5 acc 76.562 (73.593)	lr 0.02113
Train [31][2890/3239]	Time 0.286 (0.567)	Data Time 0.001 (0.012)	Loss 3.0710 (3.0656)	Entropy 1.44337 (1.45194)	Top-1 acc 50.781 (50.699)	Top-5 acc 73.438 (73.593)	lr 0.02113
Train [31][2900/3239]	Time 0.240 (0.566)	Data Time 0.001 (0.012)	Loss 3.0893 (3.0655)	Entropy 1.44331 (1.45191)	Top-1 acc 48.438 (50.700)	Top-5 acc 70.703 (73.597)	lr 0.02113
Train [31][2910/3239]	Time 0.222 (0.566)	Data Time 0.001 (0.012)	Loss 2.8193 (3.0652)	Entropy 1.44315 (1.45189)	Top-1 acc 58.594 (50.708)	Top-5 acc 76.172 (73.601)	lr 0.02113
Train [31][2920/3239]	Time 0.200 (0.566)	Data Time 0.001 (0.012)	Loss 3.0348 (3.0652)	Entropy 1.44301 (1.45186)	Top-1 acc 49.609 (50.708)	Top-5 acc 74.219 (73.603)	lr 0.02113
Train [31][2930/3239]	Time 0.207 (0.565)	Data Time 0.001 (0.012)	Loss 3.0406 (3.0651)	Entropy 1.44293 (1.45183)	Top-1 acc 51.562 (50.709)	Top-5 acc 73.047 (73.603)	lr 0.02113
Train [31][2940/3239]	Time 0.292 (0.565)	Data Time 0.001 (0.012)	Loss 3.1678 (3.0651)	Entropy 1.44291 (1.45179)	Top-1 acc 49.219 (50.707)	Top-5 acc 69.922 (73.604)	lr 0.02113
Train [31][2950/3239]	Time 0.257 (0.565)	Data Time 0.001 (0.012)	Loss 3.0972 (3.0651)	Entropy 1.44291 (1.45176)	Top-1 acc 48.047 (50.706)	Top-5 acc 73.047 (73.601)	lr 0.02113
Train [31][2960/3239]	Time 0.228 (0.564)	Data Time 0.001 (0.012)	Loss 3.1244 (3.0652)	Entropy 1.44286 (1.45173)	Top-1 acc 49.219 (50.706)	Top-5 acc 73.828 (73.599)	lr 0.02112
Train [31][2970/3239]	Time 0.213 (0.564)	Data Time 0.001 (0.012)	Loss 3.3658 (3.0651)	Entropy 1.44268 (1.45170)	Top-1 acc 44.922 (50.707)	Top-5 acc 66.016 (73.598)	lr 0.02112
Train [31][2980/3239]	Time 0.230 (0.563)	Data Time 0.001 (0.012)	Loss 3.1508 (3.0652)	Entropy 1.44261 (1.45167)	Top-1 acc 46.875 (50.706)	Top-5 acc 73.047 (73.601)	lr 0.02112
Train [31][2990/3239]	Time 0.319 (0.563)	Data Time 0.001 (0.012)	Loss 2.8916 (3.0650)	Entropy 1.44263 (1.45164)	Top-1 acc 54.297 (50.708)	Top-5 acc 77.734 (73.605)	lr 0.02112
Train [31][3000/3239]	Time 0.236 (0.563)	Data Time 0.001 (0.012)	Loss 3.1365 (3.0650)	Entropy 1.44255 (1.45161)	Top-1 acc 47.266 (50.709)	Top-5 acc 70.312 (73.604)	lr 0.02112
Train [31][3010/3239]	Time 0.330 (0.576)	Data Time 0.004 (0.012)	Loss 3.0387 (3.0650)	Entropy 1.44247 (1.45158)	Top-1 acc 48.828 (50.707)	Top-5 acc 73.828 (73.602)	lr 0.02112
Train [31][3020/3239]	Time 0.231 (0.576)	Data Time 0.002 (0.012)	Loss 2.9689 (3.0651)	Entropy 1.44244 (1.45155)	Top-1 acc 55.859 (50.709)	Top-5 acc 76.172 (73.602)	lr 0.02112
Train [31][3030/3239]	Time 0.302 (0.576)	Data Time 0.002 (0.012)	Loss 3.2102 (3.0653)	Entropy 1.44244 (1.45152)	Top-1 acc 46.094 (50.703)	Top-5 acc 70.312 (73.599)	lr 0.02112
Train [31][3040/3239]	Time 0.224 (0.575)	Data Time 0.001 (0.012)	Loss 3.1914 (3.0653)	Entropy 1.44238 (1.45149)	Top-1 acc 48.047 (50.702)	Top-5 acc 74.609 (73.603)	lr 0.02112
Train [31][3050/3239]	Time 0.356 (0.575)	Data Time 0.001 (0.012)	Loss 2.9814 (3.0653)	Entropy 1.44233 (1.45146)	Top-1 acc 54.688 (50.707)	Top-5 acc 73.828 (73.604)	lr 0.02112
Train [31][3060/3239]	Time 0.215 (0.574)	Data Time 0.001 (0.012)	Loss 2.9267 (3.0652)	Entropy 1.44218 (1.45143)	Top-1 acc 53.906 (50.709)	Top-5 acc 77.344 (73.606)	lr 0.02112
Train [31][3070/3239]	Time 0.262 (0.574)	Data Time 0.002 (0.012)	Loss 3.0004 (3.0652)	Entropy 1.44209 (1.45140)	Top-1 acc 53.125 (50.706)	Top-5 acc 73.438 (73.606)	lr 0.02112
Train [31][3080/3239]	Time 0.285 (0.574)	Data Time 0.001 (0.012)	Loss 3.1347 (3.0654)	Entropy 1.44209 (1.45137)	Top-1 acc 51.953 (50.705)	Top-5 acc 70.703 (73.603)	lr 0.02112
Train [31][3090/3239]	Time 0.204 (0.573)	Data Time 0.001 (0.012)	Loss 3.4332 (3.0655)	Entropy 1.44211 (1.45134)	Top-1 acc 42.188 (50.705)	Top-5 acc 64.844 (73.598)	lr 0.02112
Train [31][3100/3239]	Time 0.240 (0.573)	Data Time 0.001 (0.012)	Loss 3.3298 (3.0655)	Entropy 1.44196 (1.45131)	Top-1 acc 46.094 (50.704)	Top-5 acc 72.656 (73.601)	lr 0.02111
Train [31][3110/3239]	Time 0.354 (0.573)	Data Time 0.001 (0.012)	Loss 2.8111 (3.0653)	Entropy 1.44194 (1.45128)	Top-1 acc 60.156 (50.711)	Top-5 acc 75.781 (73.604)	lr 0.02111
Train [31][3120/3239]	Time 0.230 (0.572)	Data Time 0.001 (0.012)	Loss 3.1744 (3.0652)	Entropy 1.44189 (1.45125)	Top-1 acc 49.609 (50.711)	Top-5 acc 72.656 (73.605)	lr 0.02111
Train [31][3130/3239]	Time 0.248 (0.572)	Data Time 0.001 (0.011)	Loss 3.3517 (3.0653)	Entropy 1.44178 (1.45122)	Top-1 acc 41.016 (50.711)	Top-5 acc 68.359 (73.606)	lr 0.02111
Train [31][3140/3239]	Time 0.219 (0.571)	Data Time 0.001 (0.011)	Loss 3.0065 (3.0653)	Entropy 1.44161 (1.45119)	Top-1 acc 48.828 (50.705)	Top-5 acc 72.656 (73.604)	lr 0.02111
Train [31][3150/3239]	Time 0.211 (0.571)	Data Time 0.001 (0.011)	Loss 3.0359 (3.0651)	Entropy 1.44158 (1.45116)	Top-1 acc 51.172 (50.709)	Top-5 acc 77.344 (73.612)	lr 0.02111
Train [31][3160/3239]	Time 0.269 (0.570)	Data Time 0.001 (0.011)	Loss 3.6391 (3.0654)	Entropy 1.44153 (1.45113)	Top-1 acc 40.234 (50.704)	Top-5 acc 66.016 (73.609)	lr 0.02111
Train [31][3170/3239]	Time 0.219 (0.570)	Data Time 0.002 (0.011)	Loss 2.9164 (3.0652)	Entropy 1.44146 (1.45110)	Top-1 acc 55.469 (50.704)	Top-5 acc 75.781 (73.614)	lr 0.02111
Train [31][3180/3239]	Time 0.211 (0.570)	Data Time 0.000 (0.011)	Loss 2.9832 (3.0652)	Entropy 1.44136 (1.45107)	Top-1 acc 54.297 (50.702)	Top-5 acc 71.875 (73.613)	lr 0.02111
Train [31][3190/3239]	Time 0.211 (0.569)	Data Time 0.000 (0.011)	Loss 2.9212 (3.0652)	Entropy 1.44120 (1.45104)	Top-1 acc 54.297 (50.702)	Top-5 acc 76.172 (73.615)	lr 0.02111
Train [31][3200/3239]	Time 0.204 (0.569)	Data Time 0.000 (0.011)	Loss 2.9793 (3.0652)	Entropy 1.44108 (1.45101)	Top-1 acc 52.344 (50.700)	Top-5 acc 77.344 (73.615)	lr 0.02111
Train [31][3210/3239]	Time 0.215 (0.568)	Data Time 0.000 (0.011)	Loss 3.0819 (3.0648)	Entropy 1.44097 (1.45098)	Top-1 acc 50.781 (50.705)	Top-5 acc 73.047 (73.620)	lr 0.02111
Train [31][3220/3239]	Time 0.303 (0.568)	Data Time 0.000 (0.011)	Loss 3.0599 (3.0647)	Entropy 1.44092 (1.45095)	Top-1 acc 53.516 (50.708)	Top-5 acc 71.094 (73.620)	lr 0.02111
Train [31][3230/3239]	Time 0.206 (0.568)	Data Time 0.000 (0.011)	Loss 2.9481 (3.0648)	Entropy 1.44083 (1.45092)	Top-1 acc 54.297 (50.705)	Top-5 acc 76.953 (73.620)	lr 0.02111
Train [31][3239/3239]	Time 2.144 (0.567)	Data Time 0.000 (0.011)	Loss 2.9906 (3.0646)	Entropy 1.44083 (1.45089)	Top-1 acc 51.852 (50.708)	Top-5 acc 79.012 (73.623)	lr 0.02110
==========Valid [31/120]	loss 1.880	top-1 acc 58.053 (58.053)	top-5 acc 80.647	Train top-1 50.708	top-5 73.623	Entropy 1.44083	Latency-None: 0.000ms	Flops: 548.79M
Train [32][0/3239]	Time 32.854 (32.854)	Data Time 31.298 (31.298)	Loss 3.0181 (3.0181)	Entropy 1.44073 (1.44073)	Top-1 acc 52.344 (52.344)	Top-5 acc 76.562 (76.562)	lr 0.02110
Train [32][10/3239]	Time 2.444 (3.443)	Data Time 0.001 (2.847)	Loss 2.8782 (3.0026)	Entropy 1.44073 (1.44073)	Top-1 acc 56.250 (52.876)	Top-5 acc 77.734 (74.432)	lr 0.02110
Train [32][20/3239]	Time 0.264 (1.913)	Data Time 0.001 (1.492)	Loss 2.9250 (3.0216)	Entropy 1.44062 (1.44068)	Top-1 acc 53.125 (52.176)	Top-5 acc 76.953 (74.182)	lr 0.02110
Train [32][30/3239]	Time 0.221 (1.442)	Data Time 0.001 (1.011)	Loss 2.9808 (3.0320)	Entropy 1.44055 (1.44064)	Top-1 acc 55.078 (51.588)	Top-5 acc 75.000 (74.269)	lr 0.02110
Train [32][40/3239]	Time 0.219 (1.200)	Data Time 0.001 (0.765)	Loss 3.3123 (3.0371)	Entropy 1.44057 (1.44062)	Top-1 acc 44.141 (51.296)	Top-5 acc 67.578 (74.133)	lr 0.02110
Train [32][50/3239]	Time 0.220 (1.052)	Data Time 0.008 (0.615)	Loss 2.9063 (3.0478)	Entropy 1.44049 (1.44060)	Top-1 acc 53.906 (51.187)	Top-5 acc 77.734 (73.897)	lr 0.02110
Train [32][60/3239]	Time 0.218 (0.953)	Data Time 0.001 (0.515)	Loss 3.0771 (3.0394)	Entropy 1.44041 (1.44057)	Top-1 acc 53.516 (51.351)	Top-5 acc 73.047 (74.123)	lr 0.02110
Train [32][70/3239]	Time 0.202 (0.884)	Data Time 0.001 (0.442)	Loss 2.9638 (3.0573)	Entropy 1.44035 (1.44055)	Top-1 acc 55.469 (50.968)	Top-5 acc 76.953 (73.735)	lr 0.02110
Train [32][80/3239]	Time 0.209 (0.831)	Data Time 0.001 (0.388)	Loss 2.8997 (3.0524)	Entropy 1.44012 (1.44051)	Top-1 acc 55.078 (51.075)	Top-5 acc 76.172 (73.843)	lr 0.02110
Train [32][90/3239]	Time 0.215 (0.791)	Data Time 0.001 (0.346)	Loss 2.9460 (3.0508)	Entropy 1.44005 (1.44047)	Top-1 acc 50.000 (51.073)	Top-5 acc 74.609 (73.854)	lr 0.02110
Train [32][100/3239]	Time 0.213 (0.757)	Data Time 0.002 (0.311)	Loss 3.1444 (3.0475)	Entropy 1.43991 (1.44042)	Top-1 acc 50.781 (51.118)	Top-5 acc 72.656 (73.913)	lr 0.02110
Train [32][110/3239]	Time 0.261 (1.105)	Data Time 0.004 (0.284)	Loss 2.8304 (3.0425)	Entropy 1.43980 (1.44038)	Top-1 acc 54.297 (51.200)	Top-5 acc 81.641 (74.032)	lr 0.02110
Train [32][120/3239]	Time 2.843 (1.062)	Data Time 0.003 (0.260)	Loss 2.9508 (3.0458)	Entropy 1.43980 (1.44033)	Top-1 acc 52.734 (51.162)	Top-5 acc 74.609 (73.893)	lr 0.02110
Train [32][130/3239]	Time 0.250 (0.998)	Data Time 0.002 (0.241)	Loss 3.0985 (3.0458)	Entropy 1.43972 (1.44028)	Top-1 acc 54.688 (51.261)	Top-5 acc 70.703 (73.906)	lr 0.02109
Train [32][140/3239]	Time 0.265 (0.959)	Data Time 0.002 (0.224)	Loss 2.8933 (3.0448)	Entropy 1.43963 (1.44024)	Top-1 acc 55.469 (51.297)	Top-5 acc 74.609 (73.920)	lr 0.02109
Train [32][150/3239]	Time 0.207 (0.924)	Data Time 0.001 (0.209)	Loss 3.1202 (3.0424)	Entropy 1.43958 (1.44019)	Top-1 acc 49.609 (51.379)	Top-5 acc 68.750 (74.004)	lr 0.02109
Train [32][160/3239]	Time 0.224 (0.894)	Data Time 0.002 (0.196)	Loss 3.0823 (3.0459)	Entropy 1.43944 (1.44015)	Top-1 acc 50.391 (51.279)	Top-5 acc 71.875 (73.935)	lr 0.02109
Train [32][170/3239]	Time 0.231 (0.868)	Data Time 0.001 (0.185)	Loss 2.9431 (3.0467)	Entropy 1.43936 (1.44010)	Top-1 acc 49.219 (51.234)	Top-5 acc 75.391 (73.892)	lr 0.02109
Train [32][180/3239]	Time 0.214 (0.845)	Data Time 0.001 (0.175)	Loss 2.9283 (3.0444)	Entropy 1.43937 (1.44006)	Top-1 acc 51.562 (51.213)	Top-5 acc 75.391 (73.902)	lr 0.02109
Train [32][190/3239]	Time 0.213 (0.824)	Data Time 0.001 (0.166)	Loss 2.9028 (3.0411)	Entropy 1.43933 (1.44003)	Top-1 acc 54.297 (51.237)	Top-5 acc 78.516 (74.008)	lr 0.02109
Train [32][200/3239]	Time 0.270 (0.805)	Data Time 0.001 (0.157)	Loss 3.0864 (3.0419)	Entropy 1.43929 (1.43999)	Top-1 acc 53.125 (51.230)	Top-5 acc 72.266 (73.972)	lr 0.02109
Train [32][210/3239]	Time 0.179 (0.787)	Data Time 0.001 (0.150)	Loss 2.9546 (3.0400)	Entropy 1.43926 (1.43996)	Top-1 acc 54.297 (51.346)	Top-5 acc 74.219 (73.989)	lr 0.02109
Train [32][220/3239]	Time 0.158 (0.771)	Data Time 0.001 (0.144)	Loss 3.0373 (3.0420)	Entropy 1.43913 (1.43993)	Top-1 acc 50.781 (51.304)	Top-5 acc 76.172 (73.980)	lr 0.02109
Train [32][230/3239]	Time 2.313 (0.757)	Data Time 0.001 (0.138)	Loss 3.0470 (3.0412)	Entropy 1.43913 (1.43989)	Top-1 acc 51.953 (51.324)	Top-5 acc 71.094 (73.984)	lr 0.02109
Train [32][240/3239]	Time 0.192 (0.734)	Data Time 0.001 (0.132)	Loss 2.8786 (3.0412)	Entropy 1.43898 (1.43985)	Top-1 acc 55.859 (51.350)	Top-5 acc 79.297 (74.016)	lr 0.02109
Train [32][250/3239]	Time 0.231 (0.723)	Data Time 0.001 (0.127)	Loss 2.9129 (3.0426)	Entropy 1.43880 (1.43981)	Top-1 acc 53.516 (51.343)	Top-5 acc 76.562 (73.990)	lr 0.02109
Train [32][260/3239]	Time 0.312 (0.713)	Data Time 0.001 (0.122)	Loss 2.8919 (3.0409)	Entropy 1.43866 (1.43977)	Top-1 acc 53.906 (51.384)	Top-5 acc 76.562 (74.014)	lr 0.02109
Train [32][270/3239]	Time 0.218 (0.703)	Data Time 0.001 (0.118)	Loss 3.1629 (3.0407)	Entropy 1.43859 (1.43973)	Top-1 acc 50.391 (51.394)	Top-5 acc 70.703 (74.011)	lr 0.02108
Train [32][280/3239]	Time 0.195 (0.694)	Data Time 0.002 (0.114)	Loss 3.2166 (3.0387)	Entropy 1.43848 (1.43968)	Top-1 acc 45.703 (51.410)	Top-5 acc 70.312 (74.057)	lr 0.02108
Train [32][290/3239]	Time 0.208 (0.686)	Data Time 0.001 (0.110)	Loss 3.1551 (3.0382)	Entropy 1.43837 (1.43964)	Top-1 acc 50.781 (51.393)	Top-5 acc 73.438 (74.086)	lr 0.02108
Train [32][300/3239]	Time 0.188 (0.678)	Data Time 0.001 (0.106)	Loss 3.1082 (3.0386)	Entropy 1.43824 (1.43960)	Top-1 acc 51.172 (51.368)	Top-5 acc 70.703 (74.057)	lr 0.02108
Train [32][310/3239]	Time 0.190 (0.670)	Data Time 0.001 (0.103)	Loss 2.9752 (3.0379)	Entropy 1.43810 (1.43955)	Top-1 acc 52.344 (51.382)	Top-5 acc 78.516 (74.064)	lr 0.02108
Train [32][320/3239]	Time 0.302 (0.664)	Data Time 0.001 (0.100)	Loss 3.0330 (3.0377)	Entropy 1.43807 (1.43951)	Top-1 acc 52.734 (51.396)	Top-5 acc 71.484 (74.084)	lr 0.02108
Train [32][330/3239]	Time 0.205 (0.657)	Data Time 0.001 (0.097)	Loss 3.1101 (3.0372)	Entropy 1.43800 (1.43946)	Top-1 acc 51.562 (51.423)	Top-5 acc 71.875 (74.091)	lr 0.02108
Train [32][340/3239]	Time 2.578 (0.652)	Data Time 0.001 (0.094)	Loss 3.1532 (3.0370)	Entropy 1.43800 (1.43942)	Top-1 acc 48.828 (51.454)	Top-5 acc 74.219 (74.120)	lr 0.02108
Train [32][350/3239]	Time 0.250 (0.640)	Data Time 0.002 (0.091)	Loss 3.0649 (3.0379)	Entropy 1.43792 (1.43938)	Top-1 acc 45.703 (51.420)	Top-5 acc 71.094 (74.105)	lr 0.02108
Train [32][360/3239]	Time 0.225 (0.635)	Data Time 0.001 (0.089)	Loss 2.8525 (3.0369)	Entropy 1.43789 (1.43934)	Top-1 acc 55.469 (51.450)	Top-5 acc 77.734 (74.108)	lr 0.02108
Train [32][370/3239]	Time 0.345 (0.630)	Data Time 0.002 (0.086)	Loss 2.9263 (3.0389)	Entropy 1.43804 (1.43930)	Top-1 acc 53.906 (51.387)	Top-5 acc 74.609 (74.065)	lr 0.02108
Train [32][380/3239]	Time 0.262 (0.626)	Data Time 0.002 (0.084)	Loss 3.1401 (3.0377)	Entropy 1.43799 (1.43927)	Top-1 acc 50.000 (51.404)	Top-5 acc 72.266 (74.084)	lr 0.02108
Train [32][390/3239]	Time 0.206 (0.622)	Data Time 0.001 (0.082)	Loss 3.0436 (3.0383)	Entropy 1.43798 (1.43923)	Top-1 acc 52.344 (51.382)	Top-5 acc 73.438 (74.072)	lr 0.02108
Train [32][400/3239]	Time 0.206 (0.617)	Data Time 0.001 (0.080)	Loss 2.9859 (3.0369)	Entropy 1.43802 (1.43920)	Top-1 acc 53.125 (51.410)	Top-5 acc 78.516 (74.101)	lr 0.02108
Train [32][410/3239]	Time 0.206 (0.613)	Data Time 0.002 (0.078)	Loss 2.8718 (3.0382)	Entropy 1.43796 (1.43917)	Top-1 acc 54.688 (51.382)	Top-5 acc 77.344 (74.083)	lr 0.02107
Train [32][420/3239]	Time 0.212 (0.609)	Data Time 0.001 (0.076)	Loss 3.0043 (3.0373)	Entropy 1.43791 (1.43914)	Top-1 acc 52.344 (51.392)	Top-5 acc 73.438 (74.108)	lr 0.02107
Train [32][430/3239]	Time 0.229 (0.606)	Data Time 0.002 (0.075)	Loss 2.9355 (3.0371)	Entropy 1.43785 (1.43912)	Top-1 acc 50.000 (51.387)	Top-5 acc 75.000 (74.107)	lr 0.02107
Train [32][440/3239]	Time 0.204 (0.602)	Data Time 0.001 (0.073)	Loss 2.8767 (3.0366)	Entropy 1.43782 (1.43909)	Top-1 acc 55.859 (51.385)	Top-5 acc 79.688 (74.125)	lr 0.02107
Train [32][450/3239]	Time 2.433 (0.599)	Data Time 0.002 (0.071)	Loss 3.0759 (3.0355)	Entropy 1.43782 (1.43906)	Top-1 acc 51.562 (51.405)	Top-5 acc 74.609 (74.148)	lr 0.02107
Train [32][460/3239]	Time 0.215 (0.591)	Data Time 0.001 (0.070)	Loss 3.0385 (3.0344)	Entropy 1.43770 (1.43903)	Top-1 acc 50.000 (51.406)	Top-5 acc 73.828 (74.181)	lr 0.02107
Train [32][470/3239]	Time 0.208 (0.588)	Data Time 0.001 (0.068)	Loss 2.9546 (3.0338)	Entropy 1.43762 (1.43900)	Top-1 acc 50.781 (51.403)	Top-5 acc 78.516 (74.194)	lr 0.02107
Train [32][480/3239]	Time 0.363 (0.681)	Data Time 0.002 (0.067)	Loss 3.2063 (3.0349)	Entropy 1.43748 (1.43897)	Top-1 acc 49.219 (51.380)	Top-5 acc 69.141 (74.172)	lr 0.02107
Train [32][490/3239]	Time 0.222 (0.676)	Data Time 0.002 (0.066)	Loss 2.9442 (3.0360)	Entropy 1.43740 (1.43894)	Top-1 acc 51.953 (51.344)	Top-5 acc 75.781 (74.165)	lr 0.02107
Train [32][500/3239]	Time 0.217 (0.672)	Data Time 0.001 (0.064)	Loss 3.0505 (3.0368)	Entropy 1.43734 (1.43891)	Top-1 acc 52.344 (51.339)	Top-5 acc 75.391 (74.148)	lr 0.02107
Train [32][510/3239]	Time 0.241 (0.668)	Data Time 0.002 (0.063)	Loss 3.0233 (3.0370)	Entropy 1.43732 (1.43887)	Top-1 acc 52.344 (51.333)	Top-5 acc 73.828 (74.141)	lr 0.02107
Train [32][520/3239]	Time 0.231 (0.664)	Data Time 0.001 (0.062)	Loss 3.1672 (3.0364)	Entropy 1.43724 (1.43884)	Top-1 acc 50.781 (51.365)	Top-5 acc 69.141 (74.154)	lr 0.02107
Train [32][530/3239]	Time 0.294 (0.660)	Data Time 0.001 (0.061)	Loss 2.9900 (3.0362)	Entropy 1.43716 (1.43881)	Top-1 acc 53.516 (51.373)	Top-5 acc 75.000 (74.155)	lr 0.02107
Train [32][540/3239]	Time 0.208 (0.656)	Data Time 0.001 (0.060)	Loss 3.0590 (3.0375)	Entropy 1.43698 (1.43878)	Top-1 acc 50.000 (51.354)	Top-5 acc 70.703 (74.115)	lr 0.02106
Train [32][550/3239]	Time 0.208 (0.652)	Data Time 0.001 (0.059)	Loss 3.0275 (3.0392)	Entropy 1.43698 (1.43875)	Top-1 acc 51.172 (51.314)	Top-5 acc 75.781 (74.076)	lr 0.02106
Train [32][560/3239]	Time 2.582 (0.649)	Data Time 0.002 (0.058)	Loss 2.9987 (3.0388)	Entropy 1.43698 (1.43872)	Top-1 acc 53.125 (51.330)	Top-5 acc 75.781 (74.080)	lr 0.02106
Train [32][570/3239]	Time 0.227 (0.641)	Data Time 0.001 (0.057)	Loss 3.0496 (3.0376)	Entropy 1.43691 (1.43869)	Top-1 acc 51.172 (51.352)	Top-5 acc 70.703 (74.103)	lr 0.02106
Train [32][580/3239]	Time 0.260 (0.638)	Data Time 0.002 (0.056)	Loss 3.0011 (3.0369)	Entropy 1.43678 (1.43865)	Top-1 acc 49.609 (51.367)	Top-5 acc 76.953 (74.123)	lr 0.02106
Train [32][590/3239]	Time 0.326 (0.635)	Data Time 0.001 (0.055)	Loss 3.0836 (3.0367)	Entropy 1.43671 (1.43862)	Top-1 acc 51.953 (51.385)	Top-5 acc 71.484 (74.119)	lr 0.02106
Train [32][600/3239]	Time 0.224 (0.632)	Data Time 0.001 (0.054)	Loss 3.1531 (3.0367)	Entropy 1.43658 (1.43859)	Top-1 acc 47.266 (51.377)	Top-5 acc 70.312 (74.106)	lr 0.02106
Train [32][610/3239]	Time 0.215 (0.629)	Data Time 0.001 (0.053)	Loss 2.9055 (3.0366)	Entropy 1.43660 (1.43856)	Top-1 acc 55.078 (51.384)	Top-5 acc 76.953 (74.100)	lr 0.02106
Train [32][620/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.052)	Loss 3.0664 (3.0363)	Entropy 1.43653 (1.43852)	Top-1 acc 49.219 (51.405)	Top-5 acc 71.484 (74.112)	lr 0.02106
Train [32][630/3239]	Time 0.222 (0.624)	Data Time 0.001 (0.052)	Loss 2.9183 (3.0361)	Entropy 1.43638 (1.43849)	Top-1 acc 54.297 (51.413)	Top-5 acc 76.562 (74.133)	lr 0.02106
Train [32][640/3239]	Time 0.218 (0.621)	Data Time 0.001 (0.051)	Loss 2.9204 (3.0354)	Entropy 1.43640 (1.43846)	Top-1 acc 51.562 (51.426)	Top-5 acc 75.391 (74.161)	lr 0.02106
Train [32][650/3239]	Time 0.303 (0.618)	Data Time 0.001 (0.050)	Loss 3.1129 (3.0352)	Entropy 1.43636 (1.43843)	Top-1 acc 51.562 (51.436)	Top-5 acc 73.438 (74.164)	lr 0.02106
Train [32][660/3239]	Time 0.250 (0.616)	Data Time 0.001 (0.049)	Loss 3.0110 (3.0347)	Entropy 1.43627 (1.43839)	Top-1 acc 55.078 (51.441)	Top-5 acc 71.484 (74.162)	lr 0.02106
Train [32][670/3239]	Time 2.551 (0.613)	Data Time 0.001 (0.049)	Loss 3.0771 (3.0341)	Entropy 1.43627 (1.43836)	Top-1 acc 45.703 (51.431)	Top-5 acc 72.266 (74.174)	lr 0.02106
Train [32][680/3239]	Time 0.228 (0.608)	Data Time 0.001 (0.048)	Loss 2.9018 (3.0340)	Entropy 1.43621 (1.43833)	Top-1 acc 53.125 (51.444)	Top-5 acc 79.297 (74.187)	lr 0.02105
Train [32][690/3239]	Time 0.220 (0.605)	Data Time 0.001 (0.047)	Loss 2.9548 (3.0341)	Entropy 1.43595 (1.43830)	Top-1 acc 53.906 (51.451)	Top-5 acc 76.953 (74.192)	lr 0.02105
Train [32][700/3239]	Time 0.258 (0.603)	Data Time 0.001 (0.047)	Loss 3.0927 (3.0340)	Entropy 1.43593 (1.43826)	Top-1 acc 50.781 (51.461)	Top-5 acc 73.438 (74.196)	lr 0.02105
Train [32][710/3239]	Time 0.218 (0.601)	Data Time 0.001 (0.046)	Loss 2.8940 (3.0337)	Entropy 1.43585 (1.43823)	Top-1 acc 55.469 (51.468)	Top-5 acc 78.516 (74.206)	lr 0.02105
Train [32][720/3239]	Time 0.237 (0.599)	Data Time 0.001 (0.045)	Loss 3.1491 (3.0341)	Entropy 1.43572 (1.43820)	Top-1 acc 48.828 (51.450)	Top-5 acc 72.656 (74.193)	lr 0.02105
Train [32][730/3239]	Time 0.238 (0.597)	Data Time 0.001 (0.045)	Loss 3.0999 (3.0339)	Entropy 1.43564 (1.43816)	Top-1 acc 51.953 (51.450)	Top-5 acc 72.656 (74.187)	lr 0.02105
Train [32][740/3239]	Time 0.195 (0.595)	Data Time 0.001 (0.044)	Loss 3.0260 (3.0341)	Entropy 1.43563 (1.43813)	Top-1 acc 48.828 (51.434)	Top-5 acc 77.344 (74.195)	lr 0.02105
Train [32][750/3239]	Time 0.224 (0.593)	Data Time 0.002 (0.044)	Loss 2.9157 (3.0327)	Entropy 1.43558 (1.43809)	Top-1 acc 53.906 (51.461)	Top-5 acc 76.172 (74.219)	lr 0.02105
Train [32][760/3239]	Time 0.326 (0.591)	Data Time 0.001 (0.043)	Loss 3.0173 (3.0328)	Entropy 1.43541 (1.43806)	Top-1 acc 50.391 (51.459)	Top-5 acc 75.000 (74.222)	lr 0.02105
Train [32][770/3239]	Time 0.208 (0.589)	Data Time 0.001 (0.042)	Loss 2.9384 (3.0324)	Entropy 1.43535 (1.43803)	Top-1 acc 53.906 (51.452)	Top-5 acc 76.562 (74.231)	lr 0.02105
Train [32][780/3239]	Time 2.447 (0.588)	Data Time 0.001 (0.042)	Loss 3.0937 (3.0321)	Entropy 1.43535 (1.43799)	Top-1 acc 49.609 (51.465)	Top-5 acc 76.953 (74.237)	lr 0.02105
Train [32][790/3239]	Time 0.222 (0.583)	Data Time 0.001 (0.041)	Loss 2.9769 (3.0317)	Entropy 1.43515 (1.43796)	Top-1 acc 50.000 (51.474)	Top-5 acc 74.219 (74.233)	lr 0.02105
Train [32][800/3239]	Time 0.230 (0.581)	Data Time 0.001 (0.041)	Loss 3.1969 (3.0321)	Entropy 1.43508 (1.43792)	Top-1 acc 47.266 (51.471)	Top-5 acc 69.141 (74.230)	lr 0.02105
Train [32][810/3239]	Time 0.256 (0.580)	Data Time 0.001 (0.040)	Loss 3.1159 (3.0319)	Entropy 1.43502 (1.43788)	Top-1 acc 50.000 (51.474)	Top-5 acc 73.047 (74.235)	lr 0.02104
Train [32][820/3239]	Time 0.211 (0.579)	Data Time 0.001 (0.040)	Loss 2.9146 (3.0316)	Entropy 1.43494 (1.43785)	Top-1 acc 53.516 (51.475)	Top-5 acc 75.000 (74.236)	lr 0.02104
Train [32][830/3239]	Time 0.213 (0.577)	Data Time 0.001 (0.040)	Loss 2.8545 (3.0306)	Entropy 1.43492 (1.43781)	Top-1 acc 53.516 (51.492)	Top-5 acc 76.562 (74.245)	lr 0.02104
Train [32][840/3239]	Time 0.299 (0.626)	Data Time 0.003 (0.039)	Loss 2.8673 (3.0303)	Entropy 1.43485 (1.43778)	Top-1 acc 57.812 (51.498)	Top-5 acc 76.172 (74.249)	lr 0.02104
Train [32][850/3239]	Time 0.206 (0.625)	Data Time 0.002 (0.039)	Loss 3.0708 (3.0298)	Entropy 1.43474 (1.43774)	Top-1 acc 50.391 (51.499)	Top-5 acc 74.609 (74.269)	lr 0.02104
Train [32][860/3239]	Time 0.218 (0.623)	Data Time 0.002 (0.038)	Loss 3.1933 (3.0298)	Entropy 1.43467 (1.43771)	Top-1 acc 47.266 (51.508)	Top-5 acc 71.484 (74.256)	lr 0.02104
Train [32][870/3239]	Time 0.202 (0.621)	Data Time 0.002 (0.038)	Loss 2.9825 (3.0307)	Entropy 1.43465 (1.43767)	Top-1 acc 50.391 (51.492)	Top-5 acc 75.000 (74.230)	lr 0.02104
Train [32][880/3239]	Time 0.250 (0.619)	Data Time 0.001 (0.037)	Loss 3.1611 (3.0311)	Entropy 1.43460 (1.43764)	Top-1 acc 51.562 (51.489)	Top-5 acc 69.141 (74.212)	lr 0.02104
Train [32][890/3239]	Time 2.406 (0.617)	Data Time 0.001 (0.037)	Loss 2.9266 (3.0314)	Entropy 1.43460 (1.43761)	Top-1 acc 48.828 (51.477)	Top-5 acc 75.781 (74.201)	lr 0.02104
Train [32][900/3239]	Time 0.213 (0.613)	Data Time 0.001 (0.037)	Loss 2.9107 (3.0319)	Entropy 1.43454 (1.43757)	Top-1 acc 53.516 (51.455)	Top-5 acc 77.734 (74.193)	lr 0.02104
Train [32][910/3239]	Time 0.203 (0.611)	Data Time 0.001 (0.036)	Loss 2.9857 (3.0316)	Entropy 1.43451 (1.43754)	Top-1 acc 51.172 (51.465)	Top-5 acc 73.828 (74.194)	lr 0.02104
Train [32][920/3239]	Time 0.297 (0.609)	Data Time 0.001 (0.036)	Loss 3.0833 (3.0313)	Entropy 1.43448 (1.43750)	Top-1 acc 50.000 (51.469)	Top-5 acc 71.875 (74.195)	lr 0.02104
Train [32][930/3239]	Time 0.230 (0.608)	Data Time 0.001 (0.035)	Loss 3.0072 (3.0308)	Entropy 1.43437 (1.43747)	Top-1 acc 54.297 (51.486)	Top-5 acc 73.828 (74.202)	lr 0.02104
Train [32][940/3239]	Time 0.239 (0.606)	Data Time 0.001 (0.035)	Loss 2.9614 (3.0308)	Entropy 1.43426 (1.43744)	Top-1 acc 54.297 (51.494)	Top-5 acc 76.953 (74.200)	lr 0.02104
Train [32][950/3239]	Time 0.225 (0.604)	Data Time 0.001 (0.035)	Loss 3.2931 (3.0312)	Entropy 1.43423 (1.43740)	Top-1 acc 41.797 (51.475)	Top-5 acc 68.359 (74.195)	lr 0.02103
Train [32][960/3239]	Time 0.205 (0.603)	Data Time 0.001 (0.034)	Loss 2.8248 (3.0307)	Entropy 1.43409 (1.43737)	Top-1 acc 53.125 (51.483)	Top-5 acc 78.125 (74.205)	lr 0.02103
Train [32][970/3239]	Time 0.205 (0.601)	Data Time 0.002 (0.034)	Loss 2.9514 (3.0299)	Entropy 1.43401 (1.43734)	Top-1 acc 52.344 (51.501)	Top-5 acc 75.391 (74.224)	lr 0.02103
Train [32][980/3239]	Time 0.345 (0.600)	Data Time 0.002 (0.034)	Loss 3.0231 (3.0297)	Entropy 1.43400 (1.43730)	Top-1 acc 50.391 (51.504)	Top-5 acc 75.391 (74.238)	lr 0.02103
Train [32][990/3239]	Time 0.256 (0.598)	Data Time 0.001 (0.033)	Loss 2.9823 (3.0300)	Entropy 1.43388 (1.43727)	Top-1 acc 52.344 (51.508)	Top-5 acc 75.781 (74.235)	lr 0.02103
Train [32][1000/3239]	Time 2.406 (0.596)	Data Time 0.001 (0.033)	Loss 2.8996 (3.0305)	Entropy 1.43388 (1.43724)	Top-1 acc 55.859 (51.499)	Top-5 acc 75.391 (74.225)	lr 0.02103
Train [32][1010/3239]	Time 0.224 (0.593)	Data Time 0.001 (0.033)	Loss 3.0268 (3.0308)	Entropy 1.43375 (1.43720)	Top-1 acc 51.172 (51.485)	Top-5 acc 72.266 (74.222)	lr 0.02103
Train [32][1020/3239]	Time 0.236 (0.592)	Data Time 0.001 (0.032)	Loss 3.1648 (3.0303)	Entropy 1.43371 (1.43717)	Top-1 acc 48.047 (51.498)	Top-5 acc 69.531 (74.237)	lr 0.02103
Train [32][1030/3239]	Time 0.306 (0.590)	Data Time 0.001 (0.032)	Loss 3.0345 (3.0299)	Entropy 1.43370 (1.43713)	Top-1 acc 50.391 (51.512)	Top-5 acc 75.000 (74.252)	lr 0.02103
Train [32][1040/3239]	Time 0.214 (0.589)	Data Time 0.002 (0.032)	Loss 2.9783 (3.0304)	Entropy 1.43362 (1.43710)	Top-1 acc 52.344 (51.508)	Top-5 acc 76.562 (74.242)	lr 0.02103
Train [32][1050/3239]	Time 0.233 (0.588)	Data Time 0.001 (0.032)	Loss 3.0198 (3.0303)	Entropy 1.43349 (1.43707)	Top-1 acc 54.688 (51.514)	Top-5 acc 75.391 (74.243)	lr 0.02103
Train [32][1060/3239]	Time 0.279 (0.586)	Data Time 0.001 (0.031)	Loss 2.9588 (3.0303)	Entropy 1.43345 (1.43703)	Top-1 acc 51.562 (51.517)	Top-5 acc 75.781 (74.243)	lr 0.02103
Train [32][1070/3239]	Time 0.220 (0.585)	Data Time 0.001 (0.031)	Loss 2.8511 (3.0298)	Entropy 1.43330 (1.43700)	Top-1 acc 55.469 (51.527)	Top-5 acc 79.688 (74.253)	lr 0.02103
Train [32][1080/3239]	Time 0.203 (0.584)	Data Time 0.001 (0.031)	Loss 3.0356 (3.0305)	Entropy 1.43333 (1.43696)	Top-1 acc 51.953 (51.509)	Top-5 acc 71.484 (74.240)	lr 0.02102
Train [32][1090/3239]	Time 0.227 (0.582)	Data Time 0.001 (0.031)	Loss 3.0791 (3.0304)	Entropy 1.43328 (1.43693)	Top-1 acc 47.656 (51.501)	Top-5 acc 73.047 (74.239)	lr 0.02102
Train [32][1100/3239]	Time 0.254 (0.581)	Data Time 0.002 (0.030)	Loss 2.9549 (3.0304)	Entropy 1.43317 (1.43690)	Top-1 acc 51.562 (51.501)	Top-5 acc 76.562 (74.247)	lr 0.02102
Train [32][1110/3239]	Time 2.413 (0.580)	Data Time 0.002 (0.030)	Loss 3.0134 (3.0311)	Entropy 1.43317 (1.43686)	Top-1 acc 52.734 (51.488)	Top-5 acc 73.438 (74.235)	lr 0.02102
Train [32][1120/3239]	Time 0.236 (0.577)	Data Time 0.001 (0.030)	Loss 3.0183 (3.0314)	Entropy 1.43314 (1.43683)	Top-1 acc 51.172 (51.475)	Top-5 acc 72.656 (74.224)	lr 0.02102
Train [32][1130/3239]	Time 0.251 (0.576)	Data Time 0.002 (0.029)	Loss 2.8622 (3.0314)	Entropy 1.43306 (1.43680)	Top-1 acc 60.547 (51.484)	Top-5 acc 76.953 (74.224)	lr 0.02102
Train [32][1140/3239]	Time 0.242 (0.575)	Data Time 0.001 (0.029)	Loss 2.9755 (3.0315)	Entropy 1.43300 (1.43676)	Top-1 acc 55.859 (51.484)	Top-5 acc 73.438 (74.225)	lr 0.02102
Train [32][1150/3239]	Time 0.212 (0.574)	Data Time 0.001 (0.029)	Loss 2.8711 (3.0318)	Entropy 1.43283 (1.43673)	Top-1 acc 52.734 (51.470)	Top-5 acc 78.125 (74.214)	lr 0.02102
Train [32][1160/3239]	Time 0.212 (0.573)	Data Time 0.001 (0.029)	Loss 3.0131 (3.0317)	Entropy 1.43269 (1.43670)	Top-1 acc 51.953 (51.468)	Top-5 acc 75.781 (74.222)	lr 0.02102
Train [32][1170/3239]	Time 0.207 (0.572)	Data Time 0.001 (0.029)	Loss 3.1922 (3.0321)	Entropy 1.43263 (1.43666)	Top-1 acc 49.219 (51.460)	Top-5 acc 72.656 (74.220)	lr 0.02102
Train [32][1180/3239]	Time 0.209 (0.571)	Data Time 0.001 (0.028)	Loss 2.9528 (3.0321)	Entropy 1.43264 (1.43663)	Top-1 acc 55.859 (51.459)	Top-5 acc 73.438 (74.218)	lr 0.02102
Train [32][1190/3239]	Time 0.136 (0.570)	Data Time 0.001 (0.028)	Loss 3.0434 (3.0328)	Entropy 1.43259 (1.43659)	Top-1 acc 49.609 (51.436)	Top-5 acc 72.266 (74.210)	lr 0.02102
Train [32][1200/3239]	Time 0.254 (0.607)	Data Time 0.002 (0.028)	Loss 2.9426 (3.0325)	Entropy 1.43255 (1.43656)	Top-1 acc 55.469 (51.433)	Top-5 acc 76.172 (74.222)	lr 0.02102
Train [32][1210/3239]	Time 0.236 (0.606)	Data Time 0.002 (0.028)	Loss 3.1772 (3.0325)	Entropy 1.43256 (1.43653)	Top-1 acc 48.828 (51.433)	Top-5 acc 71.094 (74.218)	lr 0.02102
Train [32][1220/3239]	Time 2.360 (0.604)	Data Time 0.002 (0.028)	Loss 2.8491 (3.0321)	Entropy 1.43256 (1.43649)	Top-1 acc 54.688 (51.437)	Top-5 acc 77.734 (74.224)	lr 0.02101
Train [32][1230/3239]	Time 0.224 (0.601)	Data Time 0.002 (0.027)	Loss 3.0673 (3.0321)	Entropy 1.43236 (1.43646)	Top-1 acc 48.438 (51.434)	Top-5 acc 74.219 (74.226)	lr 0.02101
Train [32][1240/3239]	Time 0.166 (0.600)	Data Time 0.001 (0.027)	Loss 2.9923 (3.0320)	Entropy 1.43232 (1.43643)	Top-1 acc 53.906 (51.432)	Top-5 acc 73.438 (74.224)	lr 0.02101
Train [32][1250/3239]	Time 0.239 (0.599)	Data Time 0.001 (0.027)	Loss 3.1662 (3.0324)	Entropy 1.43230 (1.43639)	Top-1 acc 50.391 (51.428)	Top-5 acc 73.438 (74.220)	lr 0.02101
Train [32][1260/3239]	Time 0.314 (0.598)	Data Time 0.001 (0.027)	Loss 2.9934 (3.0326)	Entropy 1.43226 (1.43636)	Top-1 acc 52.734 (51.415)	Top-5 acc 76.562 (74.218)	lr 0.02101
Train [32][1270/3239]	Time 0.210 (0.597)	Data Time 0.001 (0.027)	Loss 2.9977 (3.0329)	Entropy 1.43220 (1.43633)	Top-1 acc 51.562 (51.413)	Top-5 acc 74.219 (74.209)	lr 0.02101
Train [32][1280/3239]	Time 0.282 (0.596)	Data Time 0.002 (0.026)	Loss 3.1463 (3.0334)	Entropy 1.43220 (1.43630)	Top-1 acc 48.438 (51.407)	Top-5 acc 72.266 (74.207)	lr 0.02101
Train [32][1290/3239]	Time 0.229 (0.594)	Data Time 0.002 (0.026)	Loss 2.9887 (3.0333)	Entropy 1.43215 (1.43627)	Top-1 acc 55.469 (51.406)	Top-5 acc 74.219 (74.200)	lr 0.02101
Train [32][1300/3239]	Time 0.250 (0.593)	Data Time 0.001 (0.026)	Loss 2.9392 (3.0332)	Entropy 1.43205 (1.43623)	Top-1 acc 53.125 (51.409)	Top-5 acc 75.781 (74.201)	lr 0.02101
Train [32][1310/3239]	Time 0.168 (0.592)	Data Time 0.001 (0.026)	Loss 2.9280 (3.0332)	Entropy 1.43205 (1.43620)	Top-1 acc 55.859 (51.416)	Top-5 acc 77.734 (74.199)	lr 0.02101
Train [32][1320/3239]	Time 0.253 (0.591)	Data Time 0.001 (0.026)	Loss 3.1383 (3.0332)	Entropy 1.43203 (1.43617)	Top-1 acc 49.609 (51.411)	Top-5 acc 71.094 (74.199)	lr 0.02101
Train [32][1330/3239]	Time 2.305 (0.590)	Data Time 0.001 (0.025)	Loss 2.8900 (3.0333)	Entropy 1.43203 (1.43614)	Top-1 acc 51.562 (51.402)	Top-5 acc 78.516 (74.201)	lr 0.02101
Train [32][1340/3239]	Time 0.194 (0.588)	Data Time 0.001 (0.025)	Loss 2.9260 (3.0332)	Entropy 1.43198 (1.43611)	Top-1 acc 55.078 (51.409)	Top-5 acc 76.172 (74.203)	lr 0.02101
Train [32][1350/3239]	Time 0.224 (0.586)	Data Time 0.001 (0.025)	Loss 3.1035 (3.0333)	Entropy 1.43195 (1.43608)	Top-1 acc 50.781 (51.410)	Top-5 acc 71.094 (74.203)	lr 0.02101
Train [32][1360/3239]	Time 0.227 (0.585)	Data Time 0.001 (0.025)	Loss 3.1083 (3.0340)	Entropy 1.43182 (1.43605)	Top-1 acc 52.344 (51.385)	Top-5 acc 70.312 (74.189)	lr 0.02100
Train [32][1370/3239]	Time 0.255 (0.584)	Data Time 0.001 (0.025)	Loss 3.0142 (3.0342)	Entropy 1.43172 (1.43601)	Top-1 acc 51.953 (51.379)	Top-5 acc 73.438 (74.188)	lr 0.02100
Train [32][1380/3239]	Time 0.216 (0.583)	Data Time 0.001 (0.025)	Loss 2.8173 (3.0337)	Entropy 1.43166 (1.43598)	Top-1 acc 56.250 (51.374)	Top-5 acc 77.734 (74.195)	lr 0.02100
Train [32][1390/3239]	Time 0.218 (0.582)	Data Time 0.001 (0.024)	Loss 3.1896 (3.0338)	Entropy 1.43158 (1.43595)	Top-1 acc 48.828 (51.377)	Top-5 acc 69.531 (74.186)	lr 0.02100
Train [32][1400/3239]	Time 0.212 (0.581)	Data Time 0.001 (0.024)	Loss 2.9577 (3.0339)	Entropy 1.43155 (1.43592)	Top-1 acc 56.641 (51.378)	Top-5 acc 75.000 (74.183)	lr 0.02100
Train [32][1410/3239]	Time 0.238 (0.580)	Data Time 0.001 (0.024)	Loss 2.9432 (3.0336)	Entropy 1.43149 (1.43589)	Top-1 acc 51.172 (51.391)	Top-5 acc 75.000 (74.194)	lr 0.02100
Train [32][1420/3239]	Time 0.205 (0.579)	Data Time 0.001 (0.024)	Loss 3.0591 (3.0338)	Entropy 1.43142 (1.43586)	Top-1 acc 50.000 (51.386)	Top-5 acc 74.219 (74.185)	lr 0.02100
Train [32][1430/3239]	Time 0.361 (0.579)	Data Time 0.001 (0.024)	Loss 2.9480 (3.0341)	Entropy 1.43138 (1.43583)	Top-1 acc 54.297 (51.385)	Top-5 acc 76.953 (74.179)	lr 0.02100
Train [32][1440/3239]	Time 2.340 (0.578)	Data Time 0.001 (0.024)	Loss 2.9240 (3.0341)	Entropy 1.43138 (1.43580)	Top-1 acc 52.344 (51.381)	Top-5 acc 76.953 (74.176)	lr 0.02100
Train [32][1450/3239]	Time 0.250 (0.575)	Data Time 0.002 (0.023)	Loss 3.1140 (3.0342)	Entropy 1.43133 (1.43577)	Top-1 acc 46.094 (51.377)	Top-5 acc 71.094 (74.174)	lr 0.02100
Train [32][1460/3239]	Time 0.221 (0.575)	Data Time 0.001 (0.023)	Loss 3.0971 (3.0342)	Entropy 1.43123 (1.43573)	Top-1 acc 53.516 (51.387)	Top-5 acc 71.484 (74.177)	lr 0.02100
Train [32][1470/3239]	Time 0.273 (0.574)	Data Time 0.002 (0.023)	Loss 3.1385 (3.0338)	Entropy 1.43113 (1.43570)	Top-1 acc 46.484 (51.400)	Top-5 acc 73.047 (74.184)	lr 0.02100
Train [32][1480/3239]	Time 0.242 (0.573)	Data Time 0.001 (0.023)	Loss 3.1887 (3.0348)	Entropy 1.43111 (1.43567)	Top-1 acc 48.828 (51.371)	Top-5 acc 71.484 (74.163)	lr 0.02100
Train [32][1490/3239]	Time 0.236 (0.572)	Data Time 0.001 (0.023)	Loss 3.2308 (3.0352)	Entropy 1.43115 (1.43564)	Top-1 acc 49.219 (51.363)	Top-5 acc 69.531 (74.155)	lr 0.02099
Train [32][1500/3239]	Time 0.230 (0.571)	Data Time 0.001 (0.023)	Loss 3.2207 (3.0355)	Entropy 1.43111 (1.43561)	Top-1 acc 41.406 (51.352)	Top-5 acc 73.438 (74.155)	lr 0.02099
Train [32][1510/3239]	Time 0.244 (0.570)	Data Time 0.001 (0.023)	Loss 2.8966 (3.0349)	Entropy 1.43111 (1.43558)	Top-1 acc 52.734 (51.366)	Top-5 acc 76.953 (74.165)	lr 0.02099
Train [32][1520/3239]	Time 0.221 (0.570)	Data Time 0.001 (0.022)	Loss 3.0852 (3.0352)	Entropy 1.43105 (1.43555)	Top-1 acc 51.172 (51.358)	Top-5 acc 75.391 (74.158)	lr 0.02099
Train [32][1530/3239]	Time 0.209 (0.569)	Data Time 0.001 (0.022)	Loss 3.1224 (3.0357)	Entropy 1.43101 (1.43552)	Top-1 acc 48.828 (51.344)	Top-5 acc 75.391 (74.153)	lr 0.02099
Train [32][1540/3239]	Time 0.280 (0.568)	Data Time 0.001 (0.022)	Loss 3.0601 (3.0357)	Entropy 1.43083 (1.43549)	Top-1 acc 51.172 (51.336)	Top-5 acc 74.609 (74.157)	lr 0.02099
Train [32][1550/3239]	Time 2.374 (0.567)	Data Time 0.001 (0.022)	Loss 3.0988 (3.0355)	Entropy 1.43083 (1.43546)	Top-1 acc 47.656 (51.346)	Top-5 acc 71.484 (74.161)	lr 0.02099
Train [32][1560/3239]	Time 0.217 (0.565)	Data Time 0.002 (0.022)	Loss 2.9099 (3.0354)	Entropy 1.43077 (1.43543)	Top-1 acc 53.516 (51.345)	Top-5 acc 76.953 (74.166)	lr 0.02099
Train [32][1570/3239]	Time 0.219 (0.593)	Data Time 0.002 (0.022)	Loss 3.1025 (3.0356)	Entropy 1.43073 (1.43540)	Top-1 acc 47.656 (51.340)	Top-5 acc 70.703 (74.164)	lr 0.02099
Train [32][1580/3239]	Time 0.197 (0.592)	Data Time 0.002 (0.022)	Loss 2.8945 (3.0355)	Entropy 1.43071 (1.43537)	Top-1 acc 55.469 (51.346)	Top-5 acc 79.297 (74.164)	lr 0.02099
Train [32][1590/3239]	Time 0.236 (0.591)	Data Time 0.002 (0.022)	Loss 2.8567 (3.0355)	Entropy 1.43086 (1.43535)	Top-1 acc 52.734 (51.341)	Top-5 acc 75.781 (74.162)	lr 0.02099
Train [32][1600/3239]	Time 0.301 (0.590)	Data Time 0.001 (0.021)	Loss 2.9805 (3.0353)	Entropy 1.43050 (1.43532)	Top-1 acc 52.734 (51.342)	Top-5 acc 75.000 (74.167)	lr 0.02099
Train [32][1610/3239]	Time 0.226 (0.589)	Data Time 0.001 (0.021)	Loss 3.1244 (3.0360)	Entropy 1.43045 (1.43529)	Top-1 acc 51.172 (51.329)	Top-5 acc 70.312 (74.154)	lr 0.02099
Train [32][1620/3239]	Time 0.207 (0.588)	Data Time 0.001 (0.021)	Loss 2.9491 (3.0360)	Entropy 1.43045 (1.43526)	Top-1 acc 53.906 (51.330)	Top-5 acc 76.172 (74.155)	lr 0.02099
Train [32][1630/3239]	Time 0.216 (0.587)	Data Time 0.001 (0.021)	Loss 3.0138 (3.0358)	Entropy 1.43048 (1.43523)	Top-1 acc 50.391 (51.334)	Top-5 acc 73.047 (74.156)	lr 0.02098
Train [32][1640/3239]	Time 0.223 (0.587)	Data Time 0.002 (0.021)	Loss 3.0005 (3.0356)	Entropy 1.43045 (1.43520)	Top-1 acc 50.000 (51.338)	Top-5 acc 73.047 (74.159)	lr 0.02098
Train [32][1650/3239]	Time 0.348 (0.586)	Data Time 0.001 (0.021)	Loss 2.8127 (3.0354)	Entropy 1.43038 (1.43517)	Top-1 acc 59.375 (51.341)	Top-5 acc 78.906 (74.165)	lr 0.02098
Train [32][1660/3239]	Time 2.422 (0.585)	Data Time 0.001 (0.021)	Loss 2.8526 (3.0355)	Entropy 1.43038 (1.43514)	Top-1 acc 56.641 (51.333)	Top-5 acc 77.344 (74.162)	lr 0.02098
Train [32][1670/3239]	Time 0.204 (0.583)	Data Time 0.001 (0.021)	Loss 2.9794 (3.0355)	Entropy 1.43029 (1.43511)	Top-1 acc 52.344 (51.335)	Top-5 acc 76.172 (74.160)	lr 0.02098
Train [32][1680/3239]	Time 0.233 (0.582)	Data Time 0.002 (0.021)	Loss 3.2251 (3.0357)	Entropy 1.43018 (1.43508)	Top-1 acc 44.922 (51.319)	Top-5 acc 69.922 (74.155)	lr 0.02098
Train [32][1690/3239]	Time 0.231 (0.581)	Data Time 0.001 (0.020)	Loss 3.1567 (3.0358)	Entropy 1.43015 (1.43505)	Top-1 acc 48.828 (51.308)	Top-5 acc 71.875 (74.153)	lr 0.02098
Train [32][1700/3239]	Time 0.215 (0.581)	Data Time 0.001 (0.020)	Loss 3.0317 (3.0359)	Entropy 1.43006 (1.43502)	Top-1 acc 48.438 (51.301)	Top-5 acc 75.000 (74.157)	lr 0.02098
Train [32][1710/3239]	Time 0.311 (0.580)	Data Time 0.001 (0.020)	Loss 3.2515 (3.0361)	Entropy 1.42998 (1.43499)	Top-1 acc 46.094 (51.299)	Top-5 acc 69.922 (74.154)	lr 0.02098
Train [32][1720/3239]	Time 0.141 (0.579)	Data Time 0.001 (0.020)	Loss 2.9529 (3.0365)	Entropy 1.42994 (1.43497)	Top-1 acc 54.688 (51.294)	Top-5 acc 77.734 (74.152)	lr 0.02098
Train [32][1730/3239]	Time 0.217 (0.578)	Data Time 0.001 (0.020)	Loss 2.9635 (3.0367)	Entropy 1.42980 (1.43494)	Top-1 acc 53.125 (51.291)	Top-5 acc 76.172 (74.144)	lr 0.02098
Train [32][1740/3239]	Time 0.178 (0.578)	Data Time 0.001 (0.020)	Loss 3.2012 (3.0366)	Entropy 1.42980 (1.43491)	Top-1 acc 51.953 (51.299)	Top-5 acc 69.531 (74.145)	lr 0.02098
Train [32][1750/3239]	Time 0.191 (0.577)	Data Time 0.001 (0.020)	Loss 2.9512 (3.0367)	Entropy 1.42965 (1.43488)	Top-1 acc 54.297 (51.299)	Top-5 acc 77.344 (74.147)	lr 0.02098
Train [32][1760/3239]	Time 0.195 (0.576)	Data Time 0.001 (0.020)	Loss 3.1494 (3.0365)	Entropy 1.42962 (1.43485)	Top-1 acc 51.562 (51.302)	Top-5 acc 74.219 (74.147)	lr 0.02097
Train [32][1770/3239]	Time 2.352 (0.575)	Data Time 0.037 (0.020)	Loss 2.9581 (3.0363)	Entropy 1.42962 (1.43482)	Top-1 acc 51.953 (51.306)	Top-5 acc 78.516 (74.153)	lr 0.02097
Train [32][1780/3239]	Time 0.235 (0.573)	Data Time 0.001 (0.020)	Loss 2.8529 (3.0363)	Entropy 1.42958 (1.43479)	Top-1 acc 56.641 (51.302)	Top-5 acc 78.516 (74.157)	lr 0.02097
Train [32][1790/3239]	Time 0.225 (0.573)	Data Time 0.001 (0.019)	Loss 3.0532 (3.0362)	Entropy 1.42956 (1.43476)	Top-1 acc 55.078 (51.307)	Top-5 acc 72.656 (74.158)	lr 0.02097
Train [32][1800/3239]	Time 0.211 (0.572)	Data Time 0.005 (0.019)	Loss 3.0408 (3.0361)	Entropy 1.42955 (1.43473)	Top-1 acc 51.562 (51.309)	Top-5 acc 70.312 (74.163)	lr 0.02097
Train [32][1810/3239]	Time 0.220 (0.571)	Data Time 0.002 (0.019)	Loss 3.1081 (3.0365)	Entropy 1.42946 (1.43470)	Top-1 acc 48.047 (51.300)	Top-5 acc 73.828 (74.156)	lr 0.02097
Train [32][1820/3239]	Time 0.208 (0.571)	Data Time 0.001 (0.019)	Loss 3.0060 (3.0363)	Entropy 1.42936 (1.43467)	Top-1 acc 55.859 (51.307)	Top-5 acc 74.609 (74.161)	lr 0.02097
Train [32][1830/3239]	Time 0.228 (0.570)	Data Time 0.002 (0.019)	Loss 2.9022 (3.0360)	Entropy 1.42923 (1.43464)	Top-1 acc 55.469 (51.312)	Top-5 acc 77.734 (74.170)	lr 0.02097
Train [32][1840/3239]	Time 0.231 (0.569)	Data Time 0.001 (0.019)	Loss 2.8569 (3.0359)	Entropy 1.42919 (1.43461)	Top-1 acc 55.469 (51.317)	Top-5 acc 76.562 (74.169)	lr 0.02097
Train [32][1850/3239]	Time 0.234 (0.569)	Data Time 0.001 (0.019)	Loss 3.0909 (3.0356)	Entropy 1.42918 (1.43458)	Top-1 acc 51.172 (51.321)	Top-5 acc 74.219 (74.179)	lr 0.02097
Train [32][1860/3239]	Time 0.222 (0.568)	Data Time 0.001 (0.019)	Loss 3.0792 (3.0355)	Entropy 1.42910 (1.43456)	Top-1 acc 50.391 (51.323)	Top-5 acc 71.094 (74.179)	lr 0.02097
Train [32][1870/3239]	Time 0.346 (0.567)	Data Time 0.001 (0.019)	Loss 2.9053 (3.0355)	Entropy 1.42899 (1.43453)	Top-1 acc 52.734 (51.322)	Top-5 acc 77.734 (74.180)	lr 0.02097
Train [32][1880/3239]	Time 2.433 (0.567)	Data Time 0.001 (0.019)	Loss 2.9455 (3.0353)	Entropy 1.42899 (1.43450)	Top-1 acc 54.297 (51.329)	Top-5 acc 76.953 (74.184)	lr 0.02097
Train [32][1890/3239]	Time 0.209 (0.565)	Data Time 0.001 (0.018)	Loss 2.9691 (3.0350)	Entropy 1.42893 (1.43447)	Top-1 acc 51.172 (51.333)	Top-5 acc 76.172 (74.196)	lr 0.02096
Train [32][1900/3239]	Time 0.221 (0.564)	Data Time 0.001 (0.018)	Loss 3.1077 (3.0352)	Entropy 1.42890 (1.43444)	Top-1 acc 51.562 (51.333)	Top-5 acc 75.000 (74.193)	lr 0.02096
Train [32][1910/3239]	Time 0.220 (0.564)	Data Time 0.001 (0.018)	Loss 3.1329 (3.0357)	Entropy 1.42873 (1.43441)	Top-1 acc 53.516 (51.324)	Top-5 acc 74.219 (74.185)	lr 0.02096
Train [32][1920/3239]	Time 0.342 (0.563)	Data Time 0.001 (0.018)	Loss 2.9065 (3.0354)	Entropy 1.42864 (1.43438)	Top-1 acc 55.859 (51.329)	Top-5 acc 76.953 (74.193)	lr 0.02096
Train [32][1930/3239]	Time 0.282 (0.585)	Data Time 0.004 (0.018)	Loss 2.9286 (3.0352)	Entropy 1.42866 (1.43435)	Top-1 acc 53.516 (51.338)	Top-5 acc 73.828 (74.195)	lr 0.02096
Train [32][1940/3239]	Time 0.206 (0.584)	Data Time 0.002 (0.018)	Loss 2.8203 (3.0354)	Entropy 1.42860 (1.43432)	Top-1 acc 55.859 (51.331)	Top-5 acc 76.953 (74.187)	lr 0.02096
Train [32][1950/3239]	Time 0.191 (0.584)	Data Time 0.001 (0.018)	Loss 3.2600 (3.0357)	Entropy 1.42858 (1.43429)	Top-1 acc 44.922 (51.327)	Top-5 acc 72.266 (74.182)	lr 0.02096
Train [32][1960/3239]	Time 0.206 (0.583)	Data Time 0.001 (0.018)	Loss 2.8826 (3.0354)	Entropy 1.42851 (1.43426)	Top-1 acc 54.688 (51.335)	Top-5 acc 76.953 (74.184)	lr 0.02096
Train [32][1970/3239]	Time 0.319 (0.582)	Data Time 0.001 (0.018)	Loss 3.2521 (3.0358)	Entropy 1.42845 (1.43423)	Top-1 acc 48.047 (51.323)	Top-5 acc 71.484 (74.181)	lr 0.02096
Train [32][1980/3239]	Time 0.257 (0.582)	Data Time 0.001 (0.018)	Loss 2.9351 (3.0354)	Entropy 1.42834 (1.43420)	Top-1 acc 53.906 (51.331)	Top-5 acc 75.781 (74.192)	lr 0.02096
Train [32][1990/3239]	Time 2.287 (0.581)	Data Time 0.001 (0.018)	Loss 3.0700 (3.0357)	Entropy 1.42834 (1.43417)	Top-1 acc 46.875 (51.327)	Top-5 acc 75.781 (74.183)	lr 0.02096
Train [32][2000/3239]	Time 0.208 (0.579)	Data Time 0.001 (0.018)	Loss 2.9496 (3.0355)	Entropy 1.42810 (1.43414)	Top-1 acc 52.344 (51.332)	Top-5 acc 78.516 (74.191)	lr 0.02096
Train [32][2010/3239]	Time 0.201 (0.579)	Data Time 0.001 (0.017)	Loss 3.1259 (3.0354)	Entropy 1.42808 (1.43411)	Top-1 acc 50.000 (51.333)	Top-5 acc 72.656 (74.194)	lr 0.02096
Train [32][2020/3239]	Time 0.220 (0.578)	Data Time 0.001 (0.017)	Loss 3.0691 (3.0359)	Entropy 1.42802 (1.43408)	Top-1 acc 50.000 (51.323)	Top-5 acc 71.094 (74.185)	lr 0.02096
Train [32][2030/3239]	Time 0.309 (0.577)	Data Time 0.001 (0.017)	Loss 3.2298 (3.0361)	Entropy 1.42797 (1.43405)	Top-1 acc 45.312 (51.315)	Top-5 acc 70.312 (74.183)	lr 0.02095
Train [32][2040/3239]	Time 0.212 (0.577)	Data Time 0.002 (0.017)	Loss 3.1879 (3.0359)	Entropy 1.42794 (1.43402)	Top-1 acc 49.219 (51.318)	Top-5 acc 70.703 (74.188)	lr 0.02095
Train [32][2050/3239]	Time 0.220 (0.576)	Data Time 0.001 (0.017)	Loss 2.8967 (3.0360)	Entropy 1.42789 (1.43399)	Top-1 acc 59.766 (51.322)	Top-5 acc 78.516 (74.188)	lr 0.02095
Train [32][2060/3239]	Time 0.251 (0.575)	Data Time 0.001 (0.017)	Loss 2.8932 (3.0358)	Entropy 1.42784 (1.43396)	Top-1 acc 53.125 (51.329)	Top-5 acc 78.125 (74.194)	lr 0.02095
Train [32][2070/3239]	Time 0.223 (0.575)	Data Time 0.001 (0.017)	Loss 2.9031 (3.0359)	Entropy 1.42785 (1.43393)	Top-1 acc 52.734 (51.327)	Top-5 acc 78.516 (74.193)	lr 0.02095
Train [32][2080/3239]	Time 0.216 (0.574)	Data Time 0.001 (0.017)	Loss 3.2740 (3.0363)	Entropy 1.42778 (1.43390)	Top-1 acc 44.922 (51.314)	Top-5 acc 67.578 (74.184)	lr 0.02095
Train [32][2090/3239]	Time 0.345 (0.574)	Data Time 0.001 (0.017)	Loss 3.0140 (3.0362)	Entropy 1.42765 (1.43387)	Top-1 acc 49.219 (51.313)	Top-5 acc 77.344 (74.192)	lr 0.02095
Train [32][2100/3239]	Time 2.317 (0.573)	Data Time 0.001 (0.017)	Loss 3.1729 (3.0364)	Entropy 1.42765 (1.43384)	Top-1 acc 47.656 (51.308)	Top-5 acc 72.656 (74.192)	lr 0.02095
Train [32][2110/3239]	Time 0.209 (0.571)	Data Time 0.001 (0.017)	Loss 3.0593 (3.0362)	Entropy 1.42764 (1.43382)	Top-1 acc 51.172 (51.310)	Top-5 acc 73.828 (74.197)	lr 0.02095
Train [32][2120/3239]	Time 0.198 (0.571)	Data Time 0.001 (0.017)	Loss 2.7313 (3.0360)	Entropy 1.42763 (1.43379)	Top-1 acc 58.594 (51.310)	Top-5 acc 80.859 (74.203)	lr 0.02095
Train [32][2130/3239]	Time 0.227 (0.570)	Data Time 0.002 (0.017)	Loss 3.0916 (3.0366)	Entropy 1.42749 (1.43376)	Top-1 acc 50.391 (51.295)	Top-5 acc 73.828 (74.195)	lr 0.02095
Train [32][2140/3239]	Time 0.225 (0.570)	Data Time 0.002 (0.017)	Loss 3.0556 (3.0365)	Entropy 1.42739 (1.43373)	Top-1 acc 52.734 (51.296)	Top-5 acc 74.609 (74.197)	lr 0.02095
Train [32][2150/3239]	Time 0.203 (0.569)	Data Time 0.001 (0.016)	Loss 3.1474 (3.0365)	Entropy 1.42736 (1.43370)	Top-1 acc 47.656 (51.295)	Top-5 acc 71.875 (74.201)	lr 0.02095
Train [32][2160/3239]	Time 0.229 (0.569)	Data Time 0.001 (0.016)	Loss 3.1468 (3.0365)	Entropy 1.42731 (1.43367)	Top-1 acc 50.391 (51.297)	Top-5 acc 69.531 (74.194)	lr 0.02094
Train [32][2170/3239]	Time 0.208 (0.568)	Data Time 0.001 (0.016)	Loss 3.0100 (3.0364)	Entropy 1.42714 (1.43364)	Top-1 acc 52.734 (51.297)	Top-5 acc 74.219 (74.194)	lr 0.02094
Train [32][2180/3239]	Time 0.213 (0.567)	Data Time 0.001 (0.016)	Loss 3.0644 (3.0364)	Entropy 1.42702 (1.43361)	Top-1 acc 50.000 (51.293)	Top-5 acc 72.656 (74.191)	lr 0.02094
Train [32][2190/3239]	Time 0.294 (0.567)	Data Time 0.002 (0.016)	Loss 3.0762 (3.0366)	Entropy 1.42681 (1.43358)	Top-1 acc 48.047 (51.290)	Top-5 acc 73.047 (74.188)	lr 0.02094
Train [32][2200/3239]	Time 0.198 (0.566)	Data Time 0.001 (0.016)	Loss 3.2006 (3.0367)	Entropy 1.42664 (1.43355)	Top-1 acc 48.047 (51.291)	Top-5 acc 68.750 (74.186)	lr 0.02094
Train [32][2210/3239]	Time 2.383 (0.566)	Data Time 0.001 (0.016)	Loss 3.1596 (3.0366)	Entropy 1.42664 (1.43352)	Top-1 acc 48.438 (51.291)	Top-5 acc 69.922 (74.189)	lr 0.02094
Train [32][2220/3239]	Time 0.226 (0.564)	Data Time 0.001 (0.016)	Loss 3.1311 (3.0366)	Entropy 1.42662 (1.43349)	Top-1 acc 50.000 (51.291)	Top-5 acc 71.484 (74.189)	lr 0.02094
Train [32][2230/3239]	Time 0.230 (0.564)	Data Time 0.001 (0.016)	Loss 3.0546 (3.0364)	Entropy 1.42660 (1.43345)	Top-1 acc 50.391 (51.296)	Top-5 acc 74.609 (74.190)	lr 0.02094
Train [32][2240/3239]	Time 0.218 (0.563)	Data Time 0.001 (0.016)	Loss 3.1865 (3.0365)	Entropy 1.42661 (1.43342)	Top-1 acc 51.172 (51.294)	Top-5 acc 73.047 (74.195)	lr 0.02094
Train [32][2250/3239]	Time 0.222 (0.563)	Data Time 0.002 (0.016)	Loss 2.9558 (3.0365)	Entropy 1.42644 (1.43339)	Top-1 acc 54.688 (51.300)	Top-5 acc 73.828 (74.194)	lr 0.02094
Train [32][2260/3239]	Time 0.304 (0.562)	Data Time 0.001 (0.016)	Loss 3.0062 (3.0363)	Entropy 1.42632 (1.43336)	Top-1 acc 52.734 (51.306)	Top-5 acc 73.438 (74.195)	lr 0.02094
Train [32][2270/3239]	Time 0.208 (0.562)	Data Time 0.002 (0.016)	Loss 3.0365 (3.0363)	Entropy 1.42629 (1.43333)	Top-1 acc 51.172 (51.312)	Top-5 acc 75.391 (74.197)	lr 0.02094
Train [32][2280/3239]	Time 0.224 (0.561)	Data Time 0.001 (0.016)	Loss 2.9988 (3.0364)	Entropy 1.42617 (1.43330)	Top-1 acc 53.125 (51.311)	Top-5 acc 76.562 (74.198)	lr 0.02094
Train [32][2290/3239]	Time 0.372 (0.579)	Data Time 0.003 (0.016)	Loss 3.2392 (3.0366)	Entropy 1.42605 (1.43327)	Top-1 acc 49.609 (51.312)	Top-5 acc 69.141 (74.192)	lr 0.02094
Train [32][2300/3239]	Time 0.217 (0.579)	Data Time 0.002 (0.016)	Loss 2.9871 (3.0365)	Entropy 1.42599 (1.43324)	Top-1 acc 50.391 (51.315)	Top-5 acc 76.953 (74.196)	lr 0.02093
Train [32][2310/3239]	Time 0.336 (0.578)	Data Time 0.002 (0.015)	Loss 3.1009 (3.0366)	Entropy 1.42586 (1.43321)	Top-1 acc 46.484 (51.303)	Top-5 acc 73.828 (74.199)	lr 0.02093
Train [32][2320/3239]	Time 2.421 (0.578)	Data Time 0.001 (0.015)	Loss 3.1311 (3.0367)	Entropy 1.42586 (1.43317)	Top-1 acc 50.000 (51.301)	Top-5 acc 72.656 (74.200)	lr 0.02093
Train [32][2330/3239]	Time 0.201 (0.576)	Data Time 0.001 (0.015)	Loss 3.1142 (3.0364)	Entropy 1.42584 (1.43314)	Top-1 acc 46.484 (51.304)	Top-5 acc 73.047 (74.203)	lr 0.02093
Train [32][2340/3239]	Time 0.237 (0.576)	Data Time 0.001 (0.015)	Loss 3.0566 (3.0363)	Entropy 1.42584 (1.43311)	Top-1 acc 47.266 (51.301)	Top-5 acc 74.219 (74.209)	lr 0.02093
Train [32][2350/3239]	Time 0.226 (0.575)	Data Time 0.001 (0.015)	Loss 2.9463 (3.0364)	Entropy 1.42575 (1.43308)	Top-1 acc 53.906 (51.294)	Top-5 acc 75.391 (74.204)	lr 0.02093
Train [32][2360/3239]	Time 0.210 (0.575)	Data Time 0.002 (0.015)	Loss 3.2210 (3.0363)	Entropy 1.42562 (1.43305)	Top-1 acc 46.875 (51.296)	Top-5 acc 72.656 (74.207)	lr 0.02093
Train [32][2370/3239]	Time 0.232 (0.574)	Data Time 0.001 (0.015)	Loss 3.1630 (3.0366)	Entropy 1.42550 (1.43302)	Top-1 acc 44.531 (51.290)	Top-5 acc 73.047 (74.200)	lr 0.02093
Train [32][2380/3239]	Time 0.245 (0.574)	Data Time 0.001 (0.015)	Loss 2.7931 (3.0364)	Entropy 1.42546 (1.43299)	Top-1 acc 57.812 (51.291)	Top-5 acc 78.906 (74.204)	lr 0.02093
Train [32][2390/3239]	Time 0.220 (0.573)	Data Time 0.001 (0.015)	Loss 3.0441 (3.0366)	Entropy 1.42540 (1.43295)	Top-1 acc 51.562 (51.293)	Top-5 acc 75.000 (74.202)	lr 0.02093
Train [32][2400/3239]	Time 0.219 (0.573)	Data Time 0.001 (0.015)	Loss 2.9316 (3.0368)	Entropy 1.42523 (1.43292)	Top-1 acc 56.641 (51.293)	Top-5 acc 74.219 (74.197)	lr 0.02093
Train [32][2410/3239]	Time 0.219 (0.572)	Data Time 0.001 (0.015)	Loss 2.9399 (3.0369)	Entropy 1.42501 (1.43289)	Top-1 acc 52.344 (51.290)	Top-5 acc 77.734 (74.195)	lr 0.02093
Train [32][2420/3239]	Time 0.325 (0.572)	Data Time 0.001 (0.015)	Loss 3.0316 (3.0372)	Entropy 1.42501 (1.43286)	Top-1 acc 53.516 (51.287)	Top-5 acc 75.000 (74.189)	lr 0.02093
Train [32][2430/3239]	Time 2.420 (0.571)	Data Time 0.001 (0.015)	Loss 3.1019 (3.0373)	Entropy 1.42501 (1.43283)	Top-1 acc 49.609 (51.288)	Top-5 acc 73.828 (74.190)	lr 0.02092
Train [32][2440/3239]	Time 0.242 (0.570)	Data Time 0.001 (0.015)	Loss 3.1410 (3.0374)	Entropy 1.42503 (1.43279)	Top-1 acc 49.219 (51.290)	Top-5 acc 72.266 (74.188)	lr 0.02092
Train [32][2450/3239]	Time 0.221 (0.569)	Data Time 0.001 (0.015)	Loss 3.0529 (3.0376)	Entropy 1.42501 (1.43276)	Top-1 acc 53.906 (51.288)	Top-5 acc 73.047 (74.178)	lr 0.02092
Train [32][2460/3239]	Time 0.223 (0.569)	Data Time 0.001 (0.015)	Loss 3.0792 (3.0377)	Entropy 1.42492 (1.43273)	Top-1 acc 47.656 (51.290)	Top-5 acc 73.828 (74.178)	lr 0.02092
Train [32][2470/3239]	Time 0.314 (0.569)	Data Time 0.001 (0.015)	Loss 2.9929 (3.0377)	Entropy 1.42492 (1.43270)	Top-1 acc 53.125 (51.288)	Top-5 acc 76.562 (74.180)	lr 0.02092
Train [32][2480/3239]	Time 0.218 (0.568)	Data Time 0.001 (0.014)	Loss 2.9432 (3.0380)	Entropy 1.42487 (1.43267)	Top-1 acc 52.734 (51.283)	Top-5 acc 76.953 (74.177)	lr 0.02092
Train [32][2490/3239]	Time 0.210 (0.568)	Data Time 0.001 (0.014)	Loss 3.0761 (3.0377)	Entropy 1.42492 (1.43264)	Top-1 acc 48.047 (51.289)	Top-5 acc 73.047 (74.183)	lr 0.02092
Train [32][2500/3239]	Time 0.211 (0.567)	Data Time 0.001 (0.014)	Loss 3.1989 (3.0378)	Entropy 1.42486 (1.43260)	Top-1 acc 46.484 (51.284)	Top-5 acc 67.188 (74.176)	lr 0.02092
Train [32][2510/3239]	Time 0.215 (0.567)	Data Time 0.001 (0.014)	Loss 3.0989 (3.0377)	Entropy 1.42477 (1.43257)	Top-1 acc 50.781 (51.284)	Top-5 acc 75.000 (74.178)	lr 0.02092
Train [32][2520/3239]	Time 0.221 (0.566)	Data Time 0.001 (0.014)	Loss 2.9075 (3.0378)	Entropy 1.42467 (1.43254)	Top-1 acc 53.125 (51.287)	Top-5 acc 76.172 (74.176)	lr 0.02092
Train [32][2530/3239]	Time 0.315 (0.566)	Data Time 0.001 (0.014)	Loss 3.2800 (3.0381)	Entropy 1.42464 (1.43251)	Top-1 acc 47.266 (51.281)	Top-5 acc 67.188 (74.169)	lr 0.02092
Train [32][2540/3239]	Time 2.283 (0.565)	Data Time 0.001 (0.014)	Loss 2.9882 (3.0381)	Entropy 1.42464 (1.43248)	Top-1 acc 50.781 (51.277)	Top-5 acc 75.781 (74.171)	lr 0.02092
Train [32][2550/3239]	Time 0.239 (0.564)	Data Time 0.001 (0.014)	Loss 2.9478 (3.0381)	Entropy 1.42468 (1.43245)	Top-1 acc 51.953 (51.276)	Top-5 acc 75.000 (74.170)	lr 0.02092
Train [32][2560/3239]	Time 0.237 (0.564)	Data Time 0.002 (0.014)	Loss 3.0766 (3.0384)	Entropy 1.42464 (1.43242)	Top-1 acc 47.266 (51.268)	Top-5 acc 71.094 (74.164)	lr 0.02092
Train [32][2570/3239]	Time 0.220 (0.563)	Data Time 0.001 (0.014)	Loss 3.0340 (3.0384)	Entropy 1.42418 (1.43239)	Top-1 acc 49.219 (51.264)	Top-5 acc 70.703 (74.165)	lr 0.02091
Train [32][2580/3239]	Time 0.235 (0.563)	Data Time 0.001 (0.014)	Loss 2.9965 (3.0384)	Entropy 1.42416 (1.43236)	Top-1 acc 51.953 (51.267)	Top-5 acc 71.875 (74.166)	lr 0.02091
Train [32][2590/3239]	Time 0.331 (0.562)	Data Time 0.001 (0.014)	Loss 2.9662 (3.0385)	Entropy 1.42409 (1.43232)	Top-1 acc 56.250 (51.266)	Top-5 acc 75.391 (74.164)	lr 0.02091
Train [32][2600/3239]	Time 0.212 (0.562)	Data Time 0.001 (0.014)	Loss 2.8783 (3.0384)	Entropy 1.42405 (1.43229)	Top-1 acc 53.906 (51.268)	Top-5 acc 79.297 (74.168)	lr 0.02091
Train [32][2610/3239]	Time 0.218 (0.561)	Data Time 0.001 (0.014)	Loss 2.9940 (3.0381)	Entropy 1.42404 (1.43226)	Top-1 acc 52.344 (51.277)	Top-5 acc 75.391 (74.176)	lr 0.02091
Train [32][2620/3239]	Time 0.199 (0.561)	Data Time 0.001 (0.014)	Loss 3.0679 (3.0381)	Entropy 1.42389 (1.43223)	Top-1 acc 52.344 (51.282)	Top-5 acc 69.922 (74.173)	lr 0.02091
Train [32][2630/3239]	Time 0.223 (0.560)	Data Time 0.001 (0.014)	Loss 3.2229 (3.0380)	Entropy 1.42384 (1.43220)	Top-1 acc 46.875 (51.281)	Top-5 acc 69.141 (74.172)	lr 0.02091
Train [32][2640/3239]	Time 0.335 (0.560)	Data Time 0.002 (0.014)	Loss 2.9212 (3.0378)	Entropy 1.42380 (1.43217)	Top-1 acc 53.516 (51.279)	Top-5 acc 76.172 (74.177)	lr 0.02091
Train [32][2650/3239]	Time 0.381 (0.576)	Data Time 0.004 (0.014)	Loss 3.1884 (3.0381)	Entropy 1.42359 (1.43213)	Top-1 acc 47.656 (51.271)	Top-5 acc 70.703 (74.173)	lr 0.02091
Train [32][2660/3239]	Time 0.256 (0.575)	Data Time 0.002 (0.014)	Loss 3.0457 (3.0379)	Entropy 1.42351 (1.43210)	Top-1 acc 52.734 (51.274)	Top-5 acc 78.125 (74.182)	lr 0.02091
Train [32][2670/3239]	Time 0.221 (0.575)	Data Time 0.002 (0.014)	Loss 3.2654 (3.0381)	Entropy 1.42349 (1.43207)	Top-1 acc 46.875 (51.269)	Top-5 acc 67.969 (74.180)	lr 0.02091
Train [32][2680/3239]	Time 0.238 (0.574)	Data Time 0.003 (0.014)	Loss 3.3563 (3.0381)	Entropy 1.42342 (1.43204)	Top-1 acc 45.703 (51.272)	Top-5 acc 67.188 (74.179)	lr 0.02091
Train [32][2690/3239]	Time 0.243 (0.574)	Data Time 0.001 (0.013)	Loss 2.8306 (3.0380)	Entropy 1.42339 (1.43201)	Top-1 acc 55.469 (51.275)	Top-5 acc 78.125 (74.180)	lr 0.02091
Train [32][2700/3239]	Time 0.356 (0.573)	Data Time 0.001 (0.013)	Loss 3.0230 (3.0378)	Entropy 1.42335 (1.43197)	Top-1 acc 53.906 (51.283)	Top-5 acc 72.656 (74.183)	lr 0.02090
Train [32][2710/3239]	Time 0.238 (0.573)	Data Time 0.001 (0.013)	Loss 2.9206 (3.0377)	Entropy 1.42331 (1.43194)	Top-1 acc 50.781 (51.282)	Top-5 acc 78.125 (74.188)	lr 0.02090
Train [32][2720/3239]	Time 0.222 (0.573)	Data Time 0.001 (0.013)	Loss 3.2325 (3.0380)	Entropy 1.42328 (1.43191)	Top-1 acc 50.000 (51.273)	Top-5 acc 72.656 (74.181)	lr 0.02090
Train [32][2730/3239]	Time 0.298 (0.572)	Data Time 0.001 (0.013)	Loss 3.0578 (3.0378)	Entropy 1.42324 (1.43188)	Top-1 acc 53.516 (51.279)	Top-5 acc 72.656 (74.186)	lr 0.02090
Train [32][2740/3239]	Time 0.251 (0.572)	Data Time 0.001 (0.013)	Loss 2.9440 (3.0382)	Entropy 1.42299 (1.43185)	Top-1 acc 52.734 (51.271)	Top-5 acc 75.781 (74.177)	lr 0.02090
Train [32][2750/3239]	Time 0.269 (0.571)	Data Time 0.001 (0.013)	Loss 3.2287 (3.0384)	Entropy 1.42290 (1.43181)	Top-1 acc 49.219 (51.266)	Top-5 acc 70.312 (74.174)	lr 0.02090
Train [32][2760/3239]	Time 0.151 (0.571)	Data Time 0.001 (0.013)	Loss 3.1624 (3.0386)	Entropy 1.42294 (1.43178)	Top-1 acc 49.609 (51.265)	Top-5 acc 69.922 (74.166)	lr 0.02090
Train [32][2770/3239]	Time 0.216 (0.570)	Data Time 0.001 (0.013)	Loss 3.1325 (3.0387)	Entropy 1.42293 (1.43175)	Top-1 acc 50.391 (51.260)	Top-5 acc 73.438 (74.167)	lr 0.02090
Train [32][2780/3239]	Time 0.323 (0.570)	Data Time 0.002 (0.013)	Loss 2.9669 (3.0385)	Entropy 1.42288 (1.43172)	Top-1 acc 51.172 (51.263)	Top-5 acc 75.000 (74.173)	lr 0.02090
Train [32][2790/3239]	Time 0.211 (0.569)	Data Time 0.001 (0.013)	Loss 2.9262 (3.0383)	Entropy 1.42279 (1.43169)	Top-1 acc 55.859 (51.269)	Top-5 acc 80.859 (74.177)	lr 0.02090
Train [32][2800/3239]	Time 0.265 (0.569)	Data Time 0.001 (0.013)	Loss 2.9487 (3.0380)	Entropy 1.42277 (1.43166)	Top-1 acc 50.391 (51.280)	Top-5 acc 75.781 (74.185)	lr 0.02090
Train [32][2810/3239]	Time 0.316 (0.569)	Data Time 0.001 (0.013)	Loss 2.7986 (3.0379)	Entropy 1.42256 (1.43162)	Top-1 acc 55.469 (51.280)	Top-5 acc 79.688 (74.185)	lr 0.02090
Train [32][2820/3239]	Time 0.240 (0.568)	Data Time 0.001 (0.013)	Loss 2.9354 (3.0379)	Entropy 1.42256 (1.43159)	Top-1 acc 57.812 (51.284)	Top-5 acc 78.125 (74.188)	lr 0.02090
Train [32][2830/3239]	Time 0.254 (0.568)	Data Time 0.001 (0.013)	Loss 2.9942 (3.0379)	Entropy 1.42254 (1.43156)	Top-1 acc 55.078 (51.287)	Top-5 acc 73.438 (74.190)	lr 0.02089
Train [32][2840/3239]	Time 0.237 (0.567)	Data Time 0.001 (0.013)	Loss 2.9416 (3.0378)	Entropy 1.42249 (1.43153)	Top-1 acc 54.688 (51.291)	Top-5 acc 74.609 (74.192)	lr 0.02089
Train [32][2850/3239]	Time 0.271 (0.567)	Data Time 0.001 (0.013)	Loss 3.0382 (3.0380)	Entropy 1.42252 (1.43150)	Top-1 acc 51.953 (51.289)	Top-5 acc 73.828 (74.187)	lr 0.02089
Train [32][2860/3239]	Time 0.303 (0.566)	Data Time 0.001 (0.013)	Loss 2.9852 (3.0378)	Entropy 1.42245 (1.43146)	Top-1 acc 51.172 (51.293)	Top-5 acc 75.781 (74.193)	lr 0.02089
Train [32][2870/3239]	Time 0.217 (0.566)	Data Time 0.001 (0.013)	Loss 2.9530 (3.0380)	Entropy 1.42240 (1.43143)	Top-1 acc 53.516 (51.289)	Top-5 acc 75.391 (74.187)	lr 0.02089
Train [32][2880/3239]	Time 0.215 (0.566)	Data Time 0.001 (0.013)	Loss 3.1117 (3.0379)	Entropy 1.42227 (1.43140)	Top-1 acc 50.000 (51.292)	Top-5 acc 72.266 (74.190)	lr 0.02089
Train [32][2890/3239]	Time 0.249 (0.565)	Data Time 0.001 (0.013)	Loss 2.7871 (3.0376)	Entropy 1.42219 (1.43137)	Top-1 acc 58.203 (51.300)	Top-5 acc 81.641 (74.193)	lr 0.02089
Train [32][2900/3239]	Time 0.262 (0.565)	Data Time 0.001 (0.013)	Loss 3.0476 (3.0377)	Entropy 1.42212 (1.43134)	Top-1 acc 50.391 (51.297)	Top-5 acc 74.609 (74.190)	lr 0.02089
Train [32][2910/3239]	Time 0.219 (0.564)	Data Time 0.002 (0.013)	Loss 3.2953 (3.0380)	Entropy 1.42210 (1.43131)	Top-1 acc 45.703 (51.292)	Top-5 acc 70.312 (74.185)	lr 0.02089
Train [32][2920/3239]	Time 0.349 (0.564)	Data Time 0.001 (0.013)	Loss 2.8639 (3.0380)	Entropy 1.42201 (1.43128)	Top-1 acc 53.516 (51.291)	Top-5 acc 78.125 (74.188)	lr 0.02089
Train [32][2930/3239]	Time 0.244 (0.564)	Data Time 0.001 (0.013)	Loss 3.1821 (3.0381)	Entropy 1.42181 (1.43124)	Top-1 acc 53.125 (51.290)	Top-5 acc 71.484 (74.187)	lr 0.02089
Train [32][2940/3239]	Time 0.272 (0.563)	Data Time 0.001 (0.012)	Loss 2.9085 (3.0381)	Entropy 1.42167 (1.43121)	Top-1 acc 54.688 (51.291)	Top-5 acc 75.391 (74.187)	lr 0.02089
Train [32][2950/3239]	Time 0.203 (0.563)	Data Time 0.001 (0.012)	Loss 3.0396 (3.0381)	Entropy 1.42165 (1.43118)	Top-1 acc 53.125 (51.290)	Top-5 acc 74.219 (74.187)	lr 0.02089
Train [32][2960/3239]	Time 0.200 (0.562)	Data Time 0.001 (0.012)	Loss 2.9737 (3.0381)	Entropy 1.42150 (1.43115)	Top-1 acc 53.906 (51.290)	Top-5 acc 75.000 (74.188)	lr 0.02089
Train [32][2970/3239]	Time 0.267 (0.562)	Data Time 0.001 (0.012)	Loss 2.9497 (3.0382)	Entropy 1.42149 (1.43111)	Top-1 acc 55.078 (51.288)	Top-5 acc 75.000 (74.186)	lr 0.02088
Train [32][2980/3239]	Time 0.323 (0.575)	Data Time 0.004 (0.012)	Loss 2.9427 (3.0382)	Entropy 1.42136 (1.43108)	Top-1 acc 50.391 (51.290)	Top-5 acc 79.297 (74.191)	lr 0.02088
Train [32][2990/3239]	Time 0.214 (0.575)	Data Time 0.002 (0.012)	Loss 3.0730 (3.0384)	Entropy 1.42131 (1.43105)	Top-1 acc 51.953 (51.285)	Top-5 acc 72.266 (74.190)	lr 0.02088
Train [32][3000/3239]	Time 0.269 (0.575)	Data Time 0.002 (0.012)	Loss 3.1580 (3.0385)	Entropy 1.42121 (1.43102)	Top-1 acc 45.703 (51.281)	Top-5 acc 71.484 (74.188)	lr 0.02088
Train [32][3010/3239]	Time 0.268 (0.575)	Data Time 0.002 (0.012)	Loss 2.9779 (3.0385)	Entropy 1.42123 (1.43098)	Top-1 acc 50.781 (51.280)	Top-5 acc 74.609 (74.190)	lr 0.02088
Train [32][3020/3239]	Time 0.227 (0.574)	Data Time 0.001 (0.012)	Loss 3.0143 (3.0385)	Entropy 1.42116 (1.43095)	Top-1 acc 50.000 (51.280)	Top-5 acc 73.828 (74.190)	lr 0.02088
Train [32][3030/3239]	Time 0.367 (0.574)	Data Time 0.001 (0.012)	Loss 2.9949 (3.0384)	Entropy 1.42109 (1.43092)	Top-1 acc 52.734 (51.281)	Top-5 acc 73.438 (74.191)	lr 0.02088
Train [32][3040/3239]	Time 0.218 (0.573)	Data Time 0.001 (0.012)	Loss 3.0331 (3.0384)	Entropy 1.42105 (1.43089)	Top-1 acc 51.172 (51.284)	Top-5 acc 74.219 (74.191)	lr 0.02088
Train [32][3050/3239]	Time 0.176 (0.573)	Data Time 0.001 (0.012)	Loss 3.0761 (3.0387)	Entropy 1.42089 (1.43086)	Top-1 acc 51.562 (51.281)	Top-5 acc 74.219 (74.186)	lr 0.02088
Train [32][3060/3239]	Time 0.229 (0.573)	Data Time 0.001 (0.012)	Loss 2.9958 (3.0385)	Entropy 1.42085 (1.43082)	Top-1 acc 51.172 (51.281)	Top-5 acc 74.219 (74.189)	lr 0.02088
Train [32][3070/3239]	Time 0.197 (0.572)	Data Time 0.001 (0.012)	Loss 3.1946 (3.0386)	Entropy 1.42076 (1.43079)	Top-1 acc 48.438 (51.278)	Top-5 acc 68.359 (74.186)	lr 0.02088
Train [32][3080/3239]	Time 0.251 (0.572)	Data Time 0.001 (0.012)	Loss 2.9529 (3.0386)	Entropy 1.42066 (1.43076)	Top-1 acc 50.781 (51.278)	Top-5 acc 78.125 (74.187)	lr 0.02088
Train [32][3090/3239]	Time 0.204 (0.571)	Data Time 0.001 (0.012)	Loss 3.2315 (3.0385)	Entropy 1.42061 (1.43073)	Top-1 acc 45.312 (51.282)	Top-5 acc 69.531 (74.190)	lr 0.02088
Train [32][3100/3239]	Time 0.240 (0.571)	Data Time 0.001 (0.012)	Loss 3.0088 (3.0385)	Entropy 1.42057 (1.43069)	Top-1 acc 55.469 (51.282)	Top-5 acc 74.219 (74.190)	lr 0.02087
Train [32][3110/3239]	Time 0.241 (0.571)	Data Time 0.001 (0.012)	Loss 3.0074 (3.0386)	Entropy 1.42063 (1.43066)	Top-1 acc 55.469 (51.281)	Top-5 acc 78.125 (74.194)	lr 0.02087
Train [32][3120/3239]	Time 0.196 (0.570)	Data Time 0.001 (0.012)	Loss 3.0167 (3.0386)	Entropy 1.42047 (1.43063)	Top-1 acc 49.609 (51.282)	Top-5 acc 73.438 (74.192)	lr 0.02087
Train [32][3130/3239]	Time 0.208 (0.570)	Data Time 0.001 (0.012)	Loss 2.9456 (3.0389)	Entropy 1.42040 (1.43060)	Top-1 acc 52.734 (51.276)	Top-5 acc 77.734 (74.190)	lr 0.02087
Train [32][3140/3239]	Time 0.255 (0.569)	Data Time 0.001 (0.012)	Loss 3.3705 (3.0392)	Entropy 1.42033 (1.43056)	Top-1 acc 42.578 (51.267)	Top-5 acc 68.359 (74.187)	lr 0.02087
Train [32][3150/3239]	Time 0.234 (0.569)	Data Time 0.001 (0.012)	Loss 2.9288 (3.0392)	Entropy 1.42026 (1.43053)	Top-1 acc 56.641 (51.266)	Top-5 acc 77.344 (74.191)	lr 0.02087
Train [32][3160/3239]	Time 0.210 (0.569)	Data Time 0.001 (0.012)	Loss 2.8570 (3.0391)	Entropy 1.42022 (1.43050)	Top-1 acc 54.297 (51.269)	Top-5 acc 78.516 (74.194)	lr 0.02087
Train [32][3170/3239]	Time 0.218 (0.568)	Data Time 0.001 (0.012)	Loss 3.1085 (3.0393)	Entropy 1.42014 (1.43047)	Top-1 acc 53.906 (51.264)	Top-5 acc 73.047 (74.191)	lr 0.02087
Train [32][3180/3239]	Time 0.216 (0.568)	Data Time 0.000 (0.012)	Loss 2.9724 (3.0394)	Entropy 1.42005 (1.43043)	Top-1 acc 50.391 (51.260)	Top-5 acc 75.781 (74.188)	lr 0.02087
Train [32][3190/3239]	Time 0.199 (0.567)	Data Time 0.000 (0.012)	Loss 3.1410 (3.0393)	Entropy 1.41996 (1.43040)	Top-1 acc 49.219 (51.260)	Top-5 acc 68.750 (74.189)	lr 0.02087
Train [32][3200/3239]	Time 0.320 (0.567)	Data Time 0.000 (0.012)	Loss 2.9482 (3.0392)	Entropy 1.41983 (1.43037)	Top-1 acc 51.172 (51.261)	Top-5 acc 75.781 (74.192)	lr 0.02087
Train [32][3210/3239]	Time 0.219 (0.567)	Data Time 0.000 (0.012)	Loss 3.0797 (3.0390)	Entropy 1.41961 (1.43033)	Top-1 acc 50.000 (51.264)	Top-5 acc 73.047 (74.195)	lr 0.02087
Train [32][3220/3239]	Time 0.224 (0.566)	Data Time 0.000 (0.012)	Loss 3.1566 (3.0390)	Entropy 1.41954 (1.43030)	Top-1 acc 53.906 (51.268)	Top-5 acc 72.266 (74.196)	lr 0.02087
Train [32][3230/3239]	Time 0.216 (0.566)	Data Time 0.000 (0.012)	Loss 3.0535 (3.0391)	Entropy 1.41957 (1.43027)	Top-1 acc 52.734 (51.265)	Top-5 acc 73.828 (74.192)	lr 0.02086
Train [32][3239/3239]	Time 2.204 (0.565)	Data Time 0.000 (0.011)	Loss 3.1844 (3.0391)	Entropy 1.41957 (1.43024)	Top-1 acc 50.617 (51.267)	Top-5 acc 70.370 (74.192)	lr 0.02086
==========Valid [32/120]	loss 1.833	top-1 acc 58.955 (58.955)	top-5 acc 81.282	Train top-1 51.267	top-5 74.192	Entropy 1.41957	Latency-None: 0.000ms	Flops: 548.79M
Train [33][0/3239]	Time 31.332 (31.332)	Data Time 29.592 (29.592)	Loss 3.0121 (3.0121)	Entropy 1.41954 (1.41954)	Top-1 acc 49.609 (49.609)	Top-5 acc 73.828 (73.828)	lr 0.02086
Train [33][10/3239]	Time 2.401 (3.395)	Data Time 0.002 (2.779)	Loss 3.1262 (3.0140)	Entropy 1.41954 (1.41954)	Top-1 acc 45.703 (51.136)	Top-5 acc 73.828 (74.148)	lr 0.02086
Train [33][20/3239]	Time 0.332 (1.898)	Data Time 0.001 (1.456)	Loss 3.0396 (3.0285)	Entropy 1.41946 (1.41950)	Top-1 acc 50.391 (51.060)	Top-5 acc 73.438 (73.865)	lr 0.02086
Train [33][30/3239]	Time 0.252 (1.428)	Data Time 0.001 (0.987)	Loss 3.0020 (3.0176)	Entropy 1.41931 (1.41945)	Top-1 acc 50.781 (51.310)	Top-5 acc 74.219 (74.257)	lr 0.02086
Train [33][40/3239]	Time 0.271 (1.193)	Data Time 0.001 (0.747)	Loss 2.9178 (3.0204)	Entropy 1.41930 (1.41941)	Top-1 acc 54.297 (51.353)	Top-5 acc 76.172 (74.228)	lr 0.02086
Train [33][50/3239]	Time 0.221 (1.047)	Data Time 0.001 (0.601)	Loss 2.9626 (3.0151)	Entropy 1.41912 (1.41936)	Top-1 acc 51.953 (51.294)	Top-5 acc 73.828 (74.326)	lr 0.02086
Train [33][60/3239]	Time 0.211 (0.949)	Data Time 0.001 (0.502)	Loss 3.1472 (3.0108)	Entropy 1.41904 (1.41932)	Top-1 acc 45.703 (51.486)	Top-5 acc 69.141 (74.289)	lr 0.02086
Train [33][70/3239]	Time 0.298 (0.880)	Data Time 0.002 (0.432)	Loss 3.2492 (3.0056)	Entropy 1.41895 (1.41927)	Top-1 acc 47.656 (51.689)	Top-5 acc 71.094 (74.488)	lr 0.02086
Train [33][80/3239]	Time 0.206 (1.392)	Data Time 0.003 (0.379)	Loss 3.1405 (3.0069)	Entropy 1.41890 (1.41923)	Top-1 acc 51.953 (51.717)	Top-5 acc 69.922 (74.503)	lr 0.02086
Train [33][90/3239]	Time 0.225 (1.289)	Data Time 0.002 (0.338)	Loss 2.8991 (3.0022)	Entropy 1.41880 (1.41919)	Top-1 acc 52.734 (51.807)	Top-5 acc 75.781 (74.618)	lr 0.02086
Train [33][100/3239]	Time 0.224 (1.206)	Data Time 0.002 (0.304)	Loss 2.8874 (2.9995)	Entropy 1.41876 (1.41915)	Top-1 acc 57.422 (51.806)	Top-5 acc 77.344 (74.714)	lr 0.02086
Train [33][110/3239]	Time 0.245 (1.138)	Data Time 0.002 (0.277)	Loss 2.8424 (2.9969)	Entropy 1.41869 (1.41911)	Top-1 acc 54.297 (51.900)	Top-5 acc 76.562 (74.817)	lr 0.02086
Train [33][120/3239]	Time 2.374 (1.080)	Data Time 0.001 (0.254)	Loss 3.0570 (3.0011)	Entropy 1.41869 (1.41908)	Top-1 acc 53.906 (51.843)	Top-5 acc 72.656 (74.735)	lr 0.02086
Train [33][130/3239]	Time 0.214 (1.015)	Data Time 0.001 (0.235)	Loss 3.1435 (2.9948)	Entropy 1.41861 (1.41904)	Top-1 acc 48.047 (51.932)	Top-5 acc 69.922 (74.857)	lr 0.02085
Train [33][140/3239]	Time 0.227 (0.976)	Data Time 0.001 (0.219)	Loss 3.0129 (2.9931)	Entropy 1.41853 (1.41901)	Top-1 acc 51.172 (52.006)	Top-5 acc 75.781 (74.920)	lr 0.02085
Train [33][150/3239]	Time 0.224 (0.942)	Data Time 0.001 (0.204)	Loss 3.1397 (2.9885)	Entropy 1.41845 (1.41897)	Top-1 acc 51.953 (52.196)	Top-5 acc 71.875 (75.049)	lr 0.02085
Train [33][160/3239]	Time 0.194 (0.911)	Data Time 0.001 (0.192)	Loss 3.7637 (2.9940)	Entropy 1.41833 (1.41893)	Top-1 acc 37.500 (52.108)	Top-5 acc 61.719 (74.985)	lr 0.02085
Train [33][170/3239]	Time 0.229 (0.884)	Data Time 0.001 (0.180)	Loss 2.9084 (2.9961)	Entropy 1.41823 (1.41889)	Top-1 acc 55.469 (52.113)	Top-5 acc 75.391 (74.925)	lr 0.02085
Train [33][180/3239]	Time 0.308 (0.861)	Data Time 0.001 (0.171)	Loss 2.9203 (2.9950)	Entropy 1.41808 (1.41885)	Top-1 acc 51.953 (52.039)	Top-5 acc 76.562 (74.981)	lr 0.02085
Train [33][190/3239]	Time 0.216 (0.839)	Data Time 0.001 (0.162)	Loss 3.0915 (2.9960)	Entropy 1.41804 (1.41881)	Top-1 acc 49.219 (52.033)	Top-5 acc 73.047 (74.947)	lr 0.02085
Train [33][200/3239]	Time 0.238 (0.819)	Data Time 0.001 (0.154)	Loss 2.9283 (2.9954)	Entropy 1.41797 (1.41877)	Top-1 acc 54.297 (52.070)	Top-5 acc 73.047 (74.913)	lr 0.02085
Train [33][210/3239]	Time 0.219 (0.802)	Data Time 0.001 (0.147)	Loss 3.0921 (2.9968)	Entropy 1.41788 (1.41873)	Top-1 acc 51.953 (52.101)	Top-5 acc 73.828 (74.878)	lr 0.02085
Train [33][220/3239]	Time 0.245 (0.785)	Data Time 0.001 (0.140)	Loss 3.1169 (2.9982)	Entropy 1.41754 (1.41869)	Top-1 acc 48.047 (52.107)	Top-5 acc 73.828 (74.839)	lr 0.02085
Train [33][230/3239]	Time 2.494 (0.771)	Data Time 0.001 (0.134)	Loss 3.1365 (2.9974)	Entropy 1.41754 (1.41864)	Top-1 acc 48.438 (52.124)	Top-5 acc 69.922 (74.860)	lr 0.02085
Train [33][240/3239]	Time 0.235 (0.748)	Data Time 0.001 (0.128)	Loss 2.8649 (2.9985)	Entropy 1.41753 (1.41860)	Top-1 acc 54.297 (52.091)	Top-5 acc 76.172 (74.867)	lr 0.02085
Train [33][250/3239]	Time 0.199 (0.736)	Data Time 0.002 (0.123)	Loss 2.9739 (2.9983)	Entropy 1.41752 (1.41856)	Top-1 acc 52.734 (52.126)	Top-5 acc 76.172 (74.871)	lr 0.02085
Train [33][260/3239]	Time 0.209 (0.725)	Data Time 0.001 (0.119)	Loss 3.0127 (2.9969)	Entropy 1.41745 (1.41851)	Top-1 acc 50.781 (52.172)	Top-5 acc 73.047 (74.894)	lr 0.02084
Train [33][270/3239]	Time 0.224 (0.715)	Data Time 0.001 (0.114)	Loss 2.9392 (2.9970)	Entropy 1.41748 (1.41847)	Top-1 acc 49.609 (52.153)	Top-5 acc 78.906 (74.919)	lr 0.02084
Train [33][280/3239]	Time 0.234 (0.705)	Data Time 0.001 (0.110)	Loss 3.1735 (2.9988)	Entropy 1.41748 (1.41844)	Top-1 acc 49.219 (52.107)	Top-5 acc 71.875 (74.897)	lr 0.02084
Train [33][290/3239]	Time 0.204 (0.696)	Data Time 0.001 (0.107)	Loss 3.0267 (2.9985)	Entropy 1.41744 (1.41841)	Top-1 acc 48.828 (52.095)	Top-5 acc 74.219 (74.893)	lr 0.02084
Train [33][300/3239]	Time 0.226 (0.688)	Data Time 0.001 (0.103)	Loss 3.1643 (2.9990)	Entropy 1.41733 (1.41837)	Top-1 acc 48.047 (52.087)	Top-5 acc 71.484 (74.890)	lr 0.02084
Train [33][310/3239]	Time 0.226 (0.680)	Data Time 0.001 (0.100)	Loss 2.8656 (2.9995)	Entropy 1.41727 (1.41834)	Top-1 acc 55.859 (52.057)	Top-5 acc 79.688 (74.862)	lr 0.02084
Train [33][320/3239]	Time 0.224 (0.673)	Data Time 0.001 (0.097)	Loss 2.9811 (2.9998)	Entropy 1.41717 (1.41830)	Top-1 acc 53.125 (52.054)	Top-5 acc 75.781 (74.845)	lr 0.02084
Train [33][330/3239]	Time 0.240 (0.666)	Data Time 0.001 (0.094)	Loss 2.9952 (3.0008)	Entropy 1.41710 (1.41827)	Top-1 acc 50.781 (52.027)	Top-5 acc 72.266 (74.803)	lr 0.02084
Train [33][340/3239]	Time 2.411 (0.660)	Data Time 0.001 (0.091)	Loss 2.8896 (2.9991)	Entropy 1.41710 (1.41824)	Top-1 acc 54.688 (52.061)	Top-5 acc 76.562 (74.857)	lr 0.02084
Train [33][350/3239]	Time 0.257 (0.647)	Data Time 0.001 (0.089)	Loss 2.9376 (2.9990)	Entropy 1.41702 (1.41820)	Top-1 acc 57.422 (52.078)	Top-5 acc 75.391 (74.864)	lr 0.02084
Train [33][360/3239]	Time 0.259 (0.642)	Data Time 0.002 (0.086)	Loss 3.0271 (2.9985)	Entropy 1.41697 (1.41817)	Top-1 acc 50.000 (52.087)	Top-5 acc 73.438 (74.868)	lr 0.02084
Train [33][370/3239]	Time 0.248 (0.637)	Data Time 0.001 (0.084)	Loss 2.8378 (2.9976)	Entropy 1.41671 (1.41813)	Top-1 acc 57.031 (52.105)	Top-5 acc 75.391 (74.886)	lr 0.02084
Train [33][380/3239]	Time 0.235 (0.632)	Data Time 0.001 (0.082)	Loss 2.8871 (2.9981)	Entropy 1.41668 (1.41809)	Top-1 acc 51.953 (52.113)	Top-5 acc 80.469 (74.892)	lr 0.02084
Train [33][390/3239]	Time 0.202 (0.627)	Data Time 0.001 (0.080)	Loss 2.9583 (2.9971)	Entropy 1.41659 (1.41805)	Top-1 acc 55.469 (52.157)	Top-5 acc 75.781 (74.917)	lr 0.02083
Train [33][400/3239]	Time 0.315 (0.623)	Data Time 0.001 (0.078)	Loss 3.2162 (2.9979)	Entropy 1.41641 (1.41801)	Top-1 acc 50.391 (52.166)	Top-5 acc 69.141 (74.892)	lr 0.02083
Train [33][410/3239]	Time 0.212 (0.618)	Data Time 0.001 (0.076)	Loss 3.0415 (2.9972)	Entropy 1.41629 (1.41797)	Top-1 acc 48.438 (52.176)	Top-5 acc 73.828 (74.910)	lr 0.02083
Train [33][420/3239]	Time 0.214 (0.614)	Data Time 0.001 (0.074)	Loss 3.0587 (2.9978)	Entropy 1.41628 (1.41793)	Top-1 acc 51.953 (52.173)	Top-5 acc 75.391 (74.907)	lr 0.02083
Train [33][430/3239]	Time 0.206 (0.610)	Data Time 0.001 (0.073)	Loss 3.2293 (2.9996)	Entropy 1.41617 (1.41790)	Top-1 acc 46.094 (52.135)	Top-5 acc 68.750 (74.879)	lr 0.02083
Train [33][440/3239]	Time 0.392 (0.706)	Data Time 0.004 (0.071)	Loss 3.1885 (3.0005)	Entropy 1.41605 (1.41786)	Top-1 acc 47.656 (52.112)	Top-5 acc 70.312 (74.871)	lr 0.02083
Train [33][450/3239]	Time 2.442 (0.701)	Data Time 0.002 (0.069)	Loss 2.8626 (3.0021)	Entropy 1.41605 (1.41782)	Top-1 acc 55.078 (52.105)	Top-5 acc 79.297 (74.839)	lr 0.02083
Train [33][460/3239]	Time 0.360 (0.691)	Data Time 0.002 (0.068)	Loss 2.8941 (3.0028)	Entropy 1.41616 (1.41778)	Top-1 acc 53.906 (52.089)	Top-5 acc 76.953 (74.825)	lr 0.02083
Train [33][470/3239]	Time 0.225 (0.686)	Data Time 0.001 (0.067)	Loss 3.0833 (3.0024)	Entropy 1.41616 (1.41775)	Top-1 acc 50.000 (52.106)	Top-5 acc 73.438 (74.828)	lr 0.02083
Train [33][480/3239]	Time 0.202 (0.681)	Data Time 0.001 (0.065)	Loss 2.9200 (3.0027)	Entropy 1.41606 (1.41771)	Top-1 acc 55.078 (52.095)	Top-5 acc 74.609 (74.804)	lr 0.02083
Train [33][490/3239]	Time 0.200 (0.676)	Data Time 0.002 (0.064)	Loss 3.0123 (3.0035)	Entropy 1.41601 (1.41768)	Top-1 acc 51.172 (52.092)	Top-5 acc 75.391 (74.798)	lr 0.02083
Train [33][500/3239]	Time 0.206 (0.671)	Data Time 0.001 (0.063)	Loss 2.9809 (3.0043)	Entropy 1.41591 (1.41764)	Top-1 acc 51.562 (52.074)	Top-5 acc 74.609 (74.791)	lr 0.02083
Train [33][510/3239]	Time 0.218 (0.667)	Data Time 0.001 (0.062)	Loss 2.9238 (3.0040)	Entropy 1.41588 (1.41761)	Top-1 acc 53.906 (52.088)	Top-5 acc 76.172 (74.804)	lr 0.02083
Train [33][520/3239]	Time 0.216 (0.663)	Data Time 0.001 (0.060)	Loss 3.0438 (3.0051)	Entropy 1.41568 (1.41757)	Top-1 acc 50.391 (52.055)	Top-5 acc 73.047 (74.784)	lr 0.02083
Train [33][530/3239]	Time 0.159 (0.659)	Data Time 0.001 (0.059)	Loss 3.0670 (3.0056)	Entropy 1.41556 (1.41754)	Top-1 acc 52.734 (52.067)	Top-5 acc 73.828 (74.765)	lr 0.02082
Train [33][540/3239]	Time 0.234 (0.655)	Data Time 0.001 (0.058)	Loss 3.2130 (3.0066)	Entropy 1.41549 (1.41750)	Top-1 acc 51.562 (52.051)	Top-5 acc 71.094 (74.738)	lr 0.02082
Train [33][550/3239]	Time 0.233 (0.651)	Data Time 0.001 (0.057)	Loss 3.1312 (3.0072)	Entropy 1.41542 (1.41746)	Top-1 acc 51.172 (52.036)	Top-5 acc 71.094 (74.719)	lr 0.02082
Train [33][560/3239]	Time 2.347 (0.647)	Data Time 0.001 (0.056)	Loss 2.8092 (3.0078)	Entropy 1.41542 (1.41743)	Top-1 acc 53.906 (52.020)	Top-5 acc 79.688 (74.708)	lr 0.02082
Train [33][570/3239]	Time 0.326 (0.640)	Data Time 0.001 (0.055)	Loss 2.9684 (3.0078)	Entropy 1.41539 (1.41739)	Top-1 acc 51.953 (52.011)	Top-5 acc 76.562 (74.709)	lr 0.02082
Train [33][580/3239]	Time 0.213 (0.637)	Data Time 0.001 (0.054)	Loss 3.1856 (3.0081)	Entropy 1.41527 (1.41736)	Top-1 acc 46.094 (52.009)	Top-5 acc 72.266 (74.700)	lr 0.02082
Train [33][590/3239]	Time 0.250 (0.634)	Data Time 0.001 (0.053)	Loss 3.1350 (3.0089)	Entropy 1.41518 (1.41732)	Top-1 acc 49.219 (51.984)	Top-5 acc 71.875 (74.693)	lr 0.02082
Train [33][600/3239]	Time 0.214 (0.631)	Data Time 0.001 (0.053)	Loss 2.8032 (3.0078)	Entropy 1.41514 (1.41728)	Top-1 acc 56.641 (52.017)	Top-5 acc 78.125 (74.710)	lr 0.02082
Train [33][610/3239]	Time 0.220 (0.627)	Data Time 0.001 (0.052)	Loss 3.1052 (3.0080)	Entropy 1.41513 (1.41725)	Top-1 acc 49.609 (52.014)	Top-5 acc 75.781 (74.710)	lr 0.02082
Train [33][620/3239]	Time 0.241 (0.625)	Data Time 0.001 (0.051)	Loss 2.9939 (3.0075)	Entropy 1.41511 (1.41721)	Top-1 acc 51.172 (52.025)	Top-5 acc 77.344 (74.731)	lr 0.02082
Train [33][630/3239]	Time 0.216 (0.622)	Data Time 0.001 (0.050)	Loss 2.9075 (3.0070)	Entropy 1.41503 (1.41718)	Top-1 acc 50.781 (52.039)	Top-5 acc 79.688 (74.744)	lr 0.02082
Train [33][640/3239]	Time 0.201 (0.619)	Data Time 0.001 (0.049)	Loss 2.9868 (3.0061)	Entropy 1.41493 (1.41715)	Top-1 acc 55.469 (52.067)	Top-5 acc 74.609 (74.757)	lr 0.02082
Train [33][650/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.049)	Loss 3.0888 (3.0070)	Entropy 1.41485 (1.41711)	Top-1 acc 48.828 (52.036)	Top-5 acc 74.609 (74.740)	lr 0.02082
Train [33][660/3239]	Time 0.204 (0.614)	Data Time 0.001 (0.048)	Loss 3.0478 (3.0076)	Entropy 1.41469 (1.41708)	Top-1 acc 46.484 (52.009)	Top-5 acc 76.172 (74.725)	lr 0.02081
Train [33][670/3239]	Time 2.502 (0.612)	Data Time 0.001 (0.047)	Loss 2.9727 (3.0067)	Entropy 1.41469 (1.41704)	Top-1 acc 51.172 (52.006)	Top-5 acc 77.734 (74.761)	lr 0.02081
Train [33][680/3239]	Time 0.243 (0.607)	Data Time 0.001 (0.047)	Loss 3.0505 (3.0063)	Entropy 1.41451 (1.41700)	Top-1 acc 48.828 (52.012)	Top-5 acc 73.438 (74.765)	lr 0.02081
Train [33][690/3239]	Time 0.235 (0.604)	Data Time 0.001 (0.046)	Loss 3.1667 (3.0071)	Entropy 1.41447 (1.41697)	Top-1 acc 47.656 (51.996)	Top-5 acc 73.828 (74.736)	lr 0.02081
Train [33][700/3239]	Time 0.216 (0.602)	Data Time 0.001 (0.045)	Loss 2.9734 (3.0084)	Entropy 1.41437 (1.41693)	Top-1 acc 52.344 (51.969)	Top-5 acc 75.781 (74.719)	lr 0.02081
Train [33][710/3239]	Time 0.221 (0.600)	Data Time 0.001 (0.045)	Loss 2.9809 (3.0087)	Entropy 1.41429 (1.41689)	Top-1 acc 53.125 (51.968)	Top-5 acc 75.000 (74.702)	lr 0.02081
Train [33][720/3239]	Time 0.213 (0.598)	Data Time 0.001 (0.044)	Loss 3.0815 (3.0092)	Entropy 1.41420 (1.41686)	Top-1 acc 51.562 (51.952)	Top-5 acc 72.266 (74.699)	lr 0.02081
Train [33][730/3239]	Time 0.226 (0.596)	Data Time 0.001 (0.044)	Loss 2.8485 (3.0097)	Entropy 1.41411 (1.41682)	Top-1 acc 55.469 (51.935)	Top-5 acc 77.734 (74.687)	lr 0.02081
Train [33][740/3239]	Time 0.231 (0.594)	Data Time 0.001 (0.043)	Loss 2.9799 (3.0092)	Entropy 1.41408 (1.41678)	Top-1 acc 53.906 (51.946)	Top-5 acc 73.828 (74.697)	lr 0.02081
Train [33][750/3239]	Time 0.209 (0.592)	Data Time 0.001 (0.042)	Loss 2.9942 (3.0092)	Entropy 1.41404 (1.41675)	Top-1 acc 51.953 (51.945)	Top-5 acc 73.828 (74.697)	lr 0.02081
Train [33][760/3239]	Time 0.217 (0.590)	Data Time 0.001 (0.042)	Loss 2.9040 (3.0082)	Entropy 1.41391 (1.41671)	Top-1 acc 51.172 (51.968)	Top-5 acc 76.172 (74.715)	lr 0.02081
Train [33][770/3239]	Time 0.201 (0.588)	Data Time 0.001 (0.041)	Loss 2.9103 (3.0077)	Entropy 1.41395 (1.41668)	Top-1 acc 53.516 (51.992)	Top-5 acc 78.125 (74.723)	lr 0.02081
Train [33][780/3239]	Time 2.434 (0.586)	Data Time 0.001 (0.041)	Loss 3.1961 (3.0074)	Entropy 1.41395 (1.41664)	Top-1 acc 49.219 (51.988)	Top-5 acc 70.312 (74.726)	lr 0.02081
Train [33][790/3239]	Time 0.330 (0.582)	Data Time 0.001 (0.040)	Loss 3.0858 (3.0075)	Entropy 1.41368 (1.41660)	Top-1 acc 49.609 (51.989)	Top-5 acc 73.047 (74.732)	lr 0.02080
Train [33][800/3239]	Time 0.229 (0.580)	Data Time 0.001 (0.040)	Loss 2.8605 (3.0072)	Entropy 1.41363 (1.41657)	Top-1 acc 54.297 (51.989)	Top-5 acc 80.469 (74.751)	lr 0.02080
Train [33][810/3239]	Time 0.270 (0.628)	Data Time 0.003 (0.040)	Loss 2.9766 (3.0080)	Entropy 1.41358 (1.41653)	Top-1 acc 51.953 (51.960)	Top-5 acc 76.172 (74.736)	lr 0.02080
Train [33][820/3239]	Time 0.225 (0.627)	Data Time 0.002 (0.039)	Loss 2.9371 (3.0079)	Entropy 1.41342 (1.41649)	Top-1 acc 54.297 (51.968)	Top-5 acc 77.344 (74.734)	lr 0.02080
Train [33][830/3239]	Time 0.226 (0.625)	Data Time 0.001 (0.039)	Loss 3.1140 (3.0074)	Entropy 1.41340 (1.41645)	Top-1 acc 53.125 (51.971)	Top-5 acc 73.047 (74.752)	lr 0.02080
Train [33][840/3239]	Time 0.197 (0.623)	Data Time 0.002 (0.038)	Loss 3.1299 (3.0071)	Entropy 1.41338 (1.41642)	Top-1 acc 51.562 (51.984)	Top-5 acc 68.359 (74.754)	lr 0.02080
Train [33][850/3239]	Time 0.318 (0.621)	Data Time 0.001 (0.038)	Loss 2.9714 (3.0075)	Entropy 1.41326 (1.41638)	Top-1 acc 53.125 (51.974)	Top-5 acc 74.219 (74.752)	lr 0.02080
Train [33][860/3239]	Time 0.215 (0.619)	Data Time 0.001 (0.037)	Loss 2.8482 (3.0071)	Entropy 1.41323 (1.41635)	Top-1 acc 55.078 (51.993)	Top-5 acc 78.906 (74.762)	lr 0.02080
Train [33][870/3239]	Time 0.210 (0.617)	Data Time 0.001 (0.037)	Loss 2.9104 (3.0073)	Entropy 1.41314 (1.41631)	Top-1 acc 54.297 (51.995)	Top-5 acc 75.781 (74.754)	lr 0.02080
Train [33][880/3239]	Time 0.240 (0.615)	Data Time 0.001 (0.037)	Loss 3.0640 (3.0081)	Entropy 1.41304 (1.41627)	Top-1 acc 46.094 (51.968)	Top-5 acc 73.828 (74.743)	lr 0.02080
Train [33][890/3239]	Time 2.453 (0.613)	Data Time 0.001 (0.036)	Loss 2.8888 (3.0081)	Entropy 1.41304 (1.41624)	Top-1 acc 51.172 (51.967)	Top-5 acc 77.344 (74.745)	lr 0.02080
Train [33][900/3239]	Time 0.255 (0.609)	Data Time 0.001 (0.036)	Loss 2.9324 (3.0086)	Entropy 1.41295 (1.41620)	Top-1 acc 55.078 (51.964)	Top-5 acc 77.734 (74.731)	lr 0.02080
Train [33][910/3239]	Time 0.206 (0.607)	Data Time 0.002 (0.035)	Loss 2.8920 (3.0078)	Entropy 1.41282 (1.41616)	Top-1 acc 55.469 (51.971)	Top-5 acc 76.172 (74.744)	lr 0.02080
Train [33][920/3239]	Time 0.226 (0.605)	Data Time 0.001 (0.035)	Loss 3.0178 (3.0084)	Entropy 1.41282 (1.41613)	Top-1 acc 49.219 (51.959)	Top-5 acc 75.391 (74.737)	lr 0.02079
Train [33][930/3239]	Time 0.217 (0.603)	Data Time 0.001 (0.035)	Loss 3.0569 (3.0089)	Entropy 1.41273 (1.41609)	Top-1 acc 48.828 (51.945)	Top-5 acc 73.047 (74.719)	lr 0.02079
Train [33][940/3239]	Time 0.254 (0.602)	Data Time 0.002 (0.034)	Loss 2.9929 (3.0091)	Entropy 1.41273 (1.41606)	Top-1 acc 53.906 (51.935)	Top-5 acc 73.047 (74.712)	lr 0.02079
Train [33][950/3239]	Time 0.253 (0.601)	Data Time 0.002 (0.034)	Loss 2.9055 (3.0085)	Entropy 1.41266 (1.41602)	Top-1 acc 52.344 (51.948)	Top-5 acc 76.953 (74.719)	lr 0.02079
Train [33][960/3239]	Time 0.370 (0.600)	Data Time 0.002 (0.034)	Loss 3.0598 (3.0084)	Entropy 1.41259 (1.41599)	Top-1 acc 52.344 (51.952)	Top-5 acc 74.219 (74.717)	lr 0.02079
Train [33][970/3239]	Time 0.285 (0.600)	Data Time 0.002 (0.033)	Loss 3.0455 (3.0081)	Entropy 1.41248 (1.41595)	Top-1 acc 50.000 (51.961)	Top-5 acc 75.391 (74.730)	lr 0.02079
Train [33][980/3239]	Time 0.253 (0.599)	Data Time 0.001 (0.033)	Loss 3.0574 (3.0082)	Entropy 1.41242 (1.41591)	Top-1 acc 50.391 (51.966)	Top-5 acc 77.734 (74.734)	lr 0.02079
Train [33][990/3239]	Time 0.210 (0.597)	Data Time 0.001 (0.033)	Loss 2.9776 (3.0087)	Entropy 1.41241 (1.41588)	Top-1 acc 51.562 (51.967)	Top-5 acc 75.000 (74.724)	lr 0.02079
Train [33][1000/3239]	Time 2.337 (0.596)	Data Time 0.001 (0.032)	Loss 3.1475 (3.0086)	Entropy 1.41241 (1.41584)	Top-1 acc 50.781 (51.976)	Top-5 acc 71.875 (74.732)	lr 0.02079
Train [33][1010/3239]	Time 0.301 (0.592)	Data Time 0.001 (0.032)	Loss 2.9057 (3.0089)	Entropy 1.41240 (1.41581)	Top-1 acc 56.250 (51.979)	Top-5 acc 78.906 (74.731)	lr 0.02079
Train [33][1020/3239]	Time 0.221 (0.591)	Data Time 0.001 (0.032)	Loss 3.1227 (3.0093)	Entropy 1.41234 (1.41578)	Top-1 acc 47.656 (51.976)	Top-5 acc 73.438 (74.721)	lr 0.02079
Train [33][1030/3239]	Time 0.224 (0.589)	Data Time 0.001 (0.032)	Loss 2.8839 (3.0095)	Entropy 1.41217 (1.41574)	Top-1 acc 54.688 (51.973)	Top-5 acc 79.297 (74.717)	lr 0.02079
Train [33][1040/3239]	Time 0.252 (0.588)	Data Time 0.001 (0.031)	Loss 3.0583 (3.0093)	Entropy 1.41219 (1.41571)	Top-1 acc 48.438 (51.983)	Top-5 acc 73.047 (74.722)	lr 0.02079
Train [33][1050/3239]	Time 0.213 (0.587)	Data Time 0.001 (0.031)	Loss 2.8895 (3.0088)	Entropy 1.41218 (1.41567)	Top-1 acc 55.859 (51.993)	Top-5 acc 76.562 (74.740)	lr 0.02079
Train [33][1060/3239]	Time 0.321 (0.586)	Data Time 0.001 (0.031)	Loss 3.1996 (3.0095)	Entropy 1.41204 (1.41564)	Top-1 acc 48.047 (51.975)	Top-5 acc 71.094 (74.725)	lr 0.02078
Train [33][1070/3239]	Time 0.257 (0.585)	Data Time 0.001 (0.030)	Loss 3.0630 (3.0097)	Entropy 1.41202 (1.41561)	Top-1 acc 50.000 (51.975)	Top-5 acc 75.781 (74.726)	lr 0.02078
Train [33][1080/3239]	Time 0.204 (0.584)	Data Time 0.001 (0.030)	Loss 3.1879 (3.0103)	Entropy 1.41183 (1.41557)	Top-1 acc 45.703 (51.955)	Top-5 acc 71.484 (74.712)	lr 0.02078
Train [33][1090/3239]	Time 0.218 (0.583)	Data Time 0.002 (0.030)	Loss 3.1855 (3.0104)	Entropy 1.41179 (1.41554)	Top-1 acc 45.703 (51.948)	Top-5 acc 71.094 (74.712)	lr 0.02078
Train [33][1100/3239]	Time 0.236 (0.582)	Data Time 0.001 (0.030)	Loss 2.9708 (3.0103)	Entropy 1.41178 (1.41550)	Top-1 acc 51.562 (51.946)	Top-5 acc 76.953 (74.720)	lr 0.02078
Train [33][1110/3239]	Time 2.518 (0.581)	Data Time 0.002 (0.029)	Loss 3.0812 (3.0101)	Entropy 1.41178 (1.41547)	Top-1 acc 51.172 (51.956)	Top-5 acc 71.484 (74.726)	lr 0.02078
Train [33][1120/3239]	Time 0.227 (0.578)	Data Time 0.001 (0.029)	Loss 3.2103 (3.0108)	Entropy 1.41166 (1.41544)	Top-1 acc 44.922 (51.940)	Top-5 acc 70.312 (74.703)	lr 0.02078
Train [33][1130/3239]	Time 0.250 (0.576)	Data Time 0.001 (0.029)	Loss 2.9973 (3.0105)	Entropy 1.41166 (1.41540)	Top-1 acc 55.078 (51.952)	Top-5 acc 71.094 (74.705)	lr 0.02078
Train [33][1140/3239]	Time 0.220 (0.575)	Data Time 0.001 (0.029)	Loss 3.1757 (3.0101)	Entropy 1.41159 (1.41537)	Top-1 acc 49.609 (51.964)	Top-5 acc 71.094 (74.720)	lr 0.02078
Train [33][1150/3239]	Time 0.212 (0.574)	Data Time 0.001 (0.028)	Loss 2.9094 (3.0096)	Entropy 1.41147 (1.41534)	Top-1 acc 53.516 (51.977)	Top-5 acc 78.125 (74.728)	lr 0.02078
Train [33][1160/3239]	Time 0.254 (0.573)	Data Time 0.001 (0.028)	Loss 2.8621 (3.0092)	Entropy 1.41146 (1.41530)	Top-1 acc 56.641 (51.985)	Top-5 acc 78.125 (74.740)	lr 0.02078
Train [33][1170/3239]	Time 0.401 (0.609)	Data Time 0.002 (0.028)	Loss 3.0193 (3.0089)	Entropy 1.41146 (1.41527)	Top-1 acc 51.953 (51.991)	Top-5 acc 74.219 (74.746)	lr 0.02078
Train [33][1180/3239]	Time 0.228 (0.608)	Data Time 0.002 (0.028)	Loss 3.0733 (3.0091)	Entropy 1.41144 (1.41524)	Top-1 acc 51.953 (51.995)	Top-5 acc 74.609 (74.745)	lr 0.02078
Train [33][1190/3239]	Time 0.229 (0.607)	Data Time 0.002 (0.028)	Loss 3.0798 (3.0094)	Entropy 1.41135 (1.41521)	Top-1 acc 51.172 (51.991)	Top-5 acc 73.828 (74.742)	lr 0.02077
Train [33][1200/3239]	Time 0.219 (0.605)	Data Time 0.001 (0.027)	Loss 3.0110 (3.0087)	Entropy 1.41126 (1.41517)	Top-1 acc 55.078 (52.005)	Top-5 acc 74.219 (74.759)	lr 0.02077
Train [33][1210/3239]	Time 0.228 (0.604)	Data Time 0.001 (0.027)	Loss 3.0956 (3.0093)	Entropy 1.41125 (1.41514)	Top-1 acc 50.391 (51.995)	Top-5 acc 74.609 (74.750)	lr 0.02077
Train [33][1220/3239]	Time 2.511 (0.603)	Data Time 0.002 (0.027)	Loss 3.0440 (3.0090)	Entropy 1.41125 (1.41511)	Top-1 acc 51.562 (52.000)	Top-5 acc 72.266 (74.757)	lr 0.02077
Train [33][1230/3239]	Time 0.241 (0.600)	Data Time 0.001 (0.027)	Loss 3.1555 (3.0095)	Entropy 1.41111 (1.41508)	Top-1 acc 49.219 (51.985)	Top-5 acc 73.438 (74.749)	lr 0.02077
Train [33][1240/3239]	Time 0.222 (0.599)	Data Time 0.001 (0.026)	Loss 3.0477 (3.0100)	Entropy 1.41112 (1.41505)	Top-1 acc 54.688 (51.977)	Top-5 acc 74.609 (74.732)	lr 0.02077
Train [33][1250/3239]	Time 0.239 (0.598)	Data Time 0.001 (0.026)	Loss 2.9020 (3.0103)	Entropy 1.41103 (1.41501)	Top-1 acc 54.297 (51.970)	Top-5 acc 75.000 (74.723)	lr 0.02077
Train [33][1260/3239]	Time 0.239 (0.597)	Data Time 0.001 (0.026)	Loss 3.2120 (3.0100)	Entropy 1.41103 (1.41498)	Top-1 acc 49.609 (51.980)	Top-5 acc 71.094 (74.728)	lr 0.02077
Train [33][1270/3239]	Time 0.315 (0.596)	Data Time 0.001 (0.026)	Loss 3.0435 (3.0103)	Entropy 1.41103 (1.41495)	Top-1 acc 52.344 (51.969)	Top-5 acc 75.781 (74.721)	lr 0.02077
Train [33][1280/3239]	Time 0.229 (0.595)	Data Time 0.001 (0.026)	Loss 2.9850 (3.0106)	Entropy 1.41091 (1.41492)	Top-1 acc 51.562 (51.957)	Top-5 acc 71.875 (74.710)	lr 0.02077
Train [33][1290/3239]	Time 0.176 (0.593)	Data Time 0.001 (0.026)	Loss 2.8086 (3.0103)	Entropy 1.41085 (1.41489)	Top-1 acc 54.688 (51.963)	Top-5 acc 77.344 (74.717)	lr 0.02077
Train [33][1300/3239]	Time 0.246 (0.592)	Data Time 0.001 (0.025)	Loss 2.9964 (3.0105)	Entropy 1.41085 (1.41486)	Top-1 acc 50.391 (51.957)	Top-5 acc 74.219 (74.716)	lr 0.02077
Train [33][1310/3239]	Time 0.201 (0.591)	Data Time 0.001 (0.025)	Loss 3.0906 (3.0106)	Entropy 1.41081 (1.41483)	Top-1 acc 50.781 (51.957)	Top-5 acc 72.656 (74.713)	lr 0.02077
Train [33][1320/3239]	Time 0.251 (0.590)	Data Time 0.001 (0.025)	Loss 2.8929 (3.0101)	Entropy 1.41067 (1.41480)	Top-1 acc 55.078 (51.970)	Top-5 acc 78.125 (74.722)	lr 0.02076
Train [33][1330/3239]	Time 2.445 (0.589)	Data Time 0.026 (0.025)	Loss 3.1094 (3.0105)	Entropy 1.41067 (1.41477)	Top-1 acc 48.438 (51.960)	Top-5 acc 71.484 (74.715)	lr 0.02076
Train [33][1340/3239]	Time 0.212 (0.586)	Data Time 0.001 (0.025)	Loss 3.0720 (3.0102)	Entropy 1.41052 (1.41473)	Top-1 acc 50.781 (51.964)	Top-5 acc 72.266 (74.722)	lr 0.02076
Train [33][1350/3239]	Time 0.223 (0.585)	Data Time 0.001 (0.024)	Loss 3.0994 (3.0108)	Entropy 1.41048 (1.41470)	Top-1 acc 49.609 (51.957)	Top-5 acc 73.828 (74.719)	lr 0.02076
Train [33][1360/3239]	Time 0.210 (0.584)	Data Time 0.001 (0.024)	Loss 3.0175 (3.0106)	Entropy 1.41042 (1.41467)	Top-1 acc 53.516 (51.951)	Top-5 acc 76.172 (74.724)	lr 0.02076
Train [33][1370/3239]	Time 0.217 (0.583)	Data Time 0.001 (0.024)	Loss 2.9955 (3.0109)	Entropy 1.41036 (1.41464)	Top-1 acc 50.391 (51.941)	Top-5 acc 76.172 (74.718)	lr 0.02076
Train [33][1380/3239]	Time 0.253 (0.582)	Data Time 0.002 (0.024)	Loss 3.1809 (3.0105)	Entropy 1.41029 (1.41461)	Top-1 acc 51.562 (51.945)	Top-5 acc 71.094 (74.735)	lr 0.02076
Train [33][1390/3239]	Time 0.203 (0.582)	Data Time 0.001 (0.024)	Loss 3.0867 (3.0103)	Entropy 1.41029 (1.41458)	Top-1 acc 50.000 (51.949)	Top-5 acc 74.219 (74.741)	lr 0.02076
Train [33][1400/3239]	Time 0.228 (0.581)	Data Time 0.001 (0.024)	Loss 3.1409 (3.0100)	Entropy 1.41018 (1.41455)	Top-1 acc 48.438 (51.949)	Top-5 acc 69.922 (74.741)	lr 0.02076
Train [33][1410/3239]	Time 0.226 (0.580)	Data Time 0.001 (0.024)	Loss 2.9410 (3.0103)	Entropy 1.41009 (1.41452)	Top-1 acc 54.297 (51.945)	Top-5 acc 76.953 (74.731)	lr 0.02076
Train [33][1420/3239]	Time 0.231 (0.579)	Data Time 0.002 (0.023)	Loss 2.7760 (3.0104)	Entropy 1.41001 (1.41448)	Top-1 acc 57.031 (51.944)	Top-5 acc 77.344 (74.727)	lr 0.02076
Train [33][1430/3239]	Time 0.239 (0.578)	Data Time 0.002 (0.023)	Loss 3.0984 (3.0103)	Entropy 1.40995 (1.41445)	Top-1 acc 50.781 (51.942)	Top-5 acc 72.656 (74.728)	lr 0.02076
Train [33][1440/3239]	Time 2.581 (0.577)	Data Time 0.002 (0.023)	Loss 3.0405 (3.0101)	Entropy 1.40995 (1.41442)	Top-1 acc 53.516 (51.950)	Top-5 acc 73.047 (74.733)	lr 0.02076
Train [33][1450/3239]	Time 0.257 (0.575)	Data Time 0.001 (0.023)	Loss 3.1439 (3.0105)	Entropy 1.40989 (1.41439)	Top-1 acc 48.828 (51.940)	Top-5 acc 74.219 (74.726)	lr 0.02075
Train [33][1460/3239]	Time 0.286 (0.574)	Data Time 0.001 (0.023)	Loss 3.1576 (3.0105)	Entropy 1.40984 (1.41436)	Top-1 acc 50.391 (51.938)	Top-5 acc 72.266 (74.732)	lr 0.02075
Train [33][1470/3239]	Time 0.237 (0.573)	Data Time 0.001 (0.023)	Loss 3.1525 (3.0103)	Entropy 1.40979 (1.41433)	Top-1 acc 47.266 (51.946)	Top-5 acc 71.094 (74.739)	lr 0.02075
Train [33][1480/3239]	Time 0.207 (0.572)	Data Time 0.001 (0.022)	Loss 3.2831 (3.0102)	Entropy 1.40974 (1.41430)	Top-1 acc 44.922 (51.945)	Top-5 acc 71.875 (74.736)	lr 0.02075
Train [33][1490/3239]	Time 0.228 (0.571)	Data Time 0.001 (0.022)	Loss 3.2716 (3.0104)	Entropy 1.40962 (1.41427)	Top-1 acc 47.656 (51.945)	Top-5 acc 69.141 (74.736)	lr 0.02075
Train [33][1500/3239]	Time 0.212 (0.571)	Data Time 0.001 (0.022)	Loss 2.9431 (3.0100)	Entropy 1.40944 (1.41423)	Top-1 acc 52.344 (51.957)	Top-5 acc 77.344 (74.745)	lr 0.02075
Train [33][1510/3239]	Time 0.221 (0.570)	Data Time 0.002 (0.022)	Loss 3.0390 (3.0099)	Entropy 1.40938 (1.41420)	Top-1 acc 50.781 (51.961)	Top-5 acc 74.609 (74.746)	lr 0.02075
Train [33][1520/3239]	Time 0.222 (0.569)	Data Time 0.001 (0.022)	Loss 3.0047 (3.0101)	Entropy 1.40921 (1.41417)	Top-1 acc 54.688 (51.961)	Top-5 acc 76.172 (74.743)	lr 0.02075
Train [33][1530/3239]	Time 0.226 (0.598)	Data Time 0.002 (0.022)	Loss 3.1450 (3.0100)	Entropy 1.40922 (1.41414)	Top-1 acc 50.000 (51.960)	Top-5 acc 73.047 (74.745)	lr 0.02075
Train [33][1540/3239]	Time 0.210 (0.597)	Data Time 0.002 (0.022)	Loss 3.2378 (3.0107)	Entropy 1.40917 (1.41411)	Top-1 acc 46.094 (51.931)	Top-5 acc 71.094 (74.731)	lr 0.02075
Train [33][1550/3239]	Time 2.560 (0.597)	Data Time 0.002 (0.022)	Loss 2.8951 (3.0107)	Entropy 1.40917 (1.41407)	Top-1 acc 60.938 (51.937)	Top-5 acc 74.219 (74.735)	lr 0.02075
Train [33][1560/3239]	Time 0.214 (0.594)	Data Time 0.001 (0.021)	Loss 3.0534 (3.0109)	Entropy 1.40914 (1.41404)	Top-1 acc 50.000 (51.925)	Top-5 acc 73.828 (74.731)	lr 0.02075
Train [33][1570/3239]	Time 0.200 (0.593)	Data Time 0.001 (0.021)	Loss 2.8746 (3.0105)	Entropy 1.40906 (1.41401)	Top-1 acc 53.516 (51.934)	Top-5 acc 79.297 (74.738)	lr 0.02075
Train [33][1580/3239]	Time 0.265 (0.592)	Data Time 0.002 (0.021)	Loss 3.0937 (3.0106)	Entropy 1.40901 (1.41398)	Top-1 acc 50.781 (51.924)	Top-5 acc 74.609 (74.734)	lr 0.02074
Train [33][1590/3239]	Time 0.223 (0.592)	Data Time 0.001 (0.021)	Loss 2.8957 (3.0106)	Entropy 1.40896 (1.41395)	Top-1 acc 57.812 (51.921)	Top-5 acc 76.953 (74.737)	lr 0.02074
Train [33][1600/3239]	Time 0.221 (0.591)	Data Time 0.001 (0.021)	Loss 3.1610 (3.0111)	Entropy 1.40882 (1.41392)	Top-1 acc 48.047 (51.911)	Top-5 acc 72.266 (74.725)	lr 0.02074
Train [33][1610/3239]	Time 0.296 (0.590)	Data Time 0.001 (0.021)	Loss 3.4198 (3.0116)	Entropy 1.40881 (1.41388)	Top-1 acc 38.281 (51.901)	Top-5 acc 71.094 (74.721)	lr 0.02074
Train [33][1620/3239]	Time 0.208 (0.589)	Data Time 0.001 (0.021)	Loss 2.7287 (3.0122)	Entropy 1.40883 (1.41385)	Top-1 acc 55.859 (51.889)	Top-5 acc 81.641 (74.717)	lr 0.02074
Train [33][1630/3239]	Time 0.201 (0.588)	Data Time 0.001 (0.021)	Loss 2.9782 (3.0123)	Entropy 1.40870 (1.41382)	Top-1 acc 53.906 (51.888)	Top-5 acc 77.734 (74.716)	lr 0.02074
Train [33][1640/3239]	Time 0.167 (0.587)	Data Time 0.001 (0.020)	Loss 3.1948 (3.0122)	Entropy 1.40861 (1.41379)	Top-1 acc 48.047 (51.885)	Top-5 acc 70.312 (74.725)	lr 0.02074
Train [33][1650/3239]	Time 0.211 (0.586)	Data Time 0.001 (0.020)	Loss 3.0744 (3.0123)	Entropy 1.40856 (1.41376)	Top-1 acc 49.609 (51.884)	Top-5 acc 75.000 (74.725)	lr 0.02074
Train [33][1660/3239]	Time 2.472 (0.586)	Data Time 0.001 (0.020)	Loss 3.0998 (3.0123)	Entropy 1.40856 (1.41373)	Top-1 acc 46.094 (51.879)	Top-5 acc 71.094 (74.723)	lr 0.02074
Train [33][1670/3239]	Time 0.213 (0.583)	Data Time 0.001 (0.020)	Loss 3.1207 (3.0128)	Entropy 1.40845 (1.41370)	Top-1 acc 52.344 (51.877)	Top-5 acc 71.875 (74.715)	lr 0.02074
Train [33][1680/3239]	Time 0.204 (0.583)	Data Time 0.001 (0.020)	Loss 2.8122 (3.0128)	Entropy 1.40832 (1.41367)	Top-1 acc 58.984 (51.879)	Top-5 acc 78.125 (74.716)	lr 0.02074
Train [33][1690/3239]	Time 0.219 (0.582)	Data Time 0.001 (0.020)	Loss 3.2702 (3.0129)	Entropy 1.40825 (1.41363)	Top-1 acc 41.797 (51.868)	Top-5 acc 69.141 (74.706)	lr 0.02074
Train [33][1700/3239]	Time 0.224 (0.581)	Data Time 0.001 (0.020)	Loss 3.0081 (3.0126)	Entropy 1.40820 (1.41360)	Top-1 acc 52.734 (51.876)	Top-5 acc 74.219 (74.709)	lr 0.02074
Train [33][1710/3239]	Time 0.212 (0.580)	Data Time 0.001 (0.020)	Loss 3.1632 (3.0124)	Entropy 1.40815 (1.41357)	Top-1 acc 46.484 (51.875)	Top-5 acc 71.094 (74.712)	lr 0.02073
Train [33][1720/3239]	Time 0.164 (0.579)	Data Time 0.001 (0.020)	Loss 2.9187 (3.0127)	Entropy 1.40813 (1.41354)	Top-1 acc 56.250 (51.871)	Top-5 acc 78.125 (74.704)	lr 0.02073
Train [33][1730/3239]	Time 0.219 (0.579)	Data Time 0.002 (0.020)	Loss 2.9410 (3.0125)	Entropy 1.40805 (1.41351)	Top-1 acc 55.078 (51.874)	Top-5 acc 76.562 (74.706)	lr 0.02073
Train [33][1740/3239]	Time 0.213 (0.578)	Data Time 0.001 (0.019)	Loss 2.7767 (3.0125)	Entropy 1.40798 (1.41348)	Top-1 acc 57.422 (51.872)	Top-5 acc 78.516 (74.705)	lr 0.02073
Train [33][1750/3239]	Time 0.224 (0.577)	Data Time 0.001 (0.019)	Loss 2.7996 (3.0121)	Entropy 1.40798 (1.41344)	Top-1 acc 59.375 (51.885)	Top-5 acc 76.562 (74.715)	lr 0.02073
Train [33][1760/3239]	Time 0.208 (0.576)	Data Time 0.001 (0.019)	Loss 3.0915 (3.0125)	Entropy 1.40796 (1.41341)	Top-1 acc 45.703 (51.883)	Top-5 acc 72.266 (74.709)	lr 0.02073
Train [33][1770/3239]	Time 2.266 (0.576)	Data Time 0.001 (0.019)	Loss 3.1850 (3.0126)	Entropy 1.40796 (1.41338)	Top-1 acc 45.312 (51.874)	Top-5 acc 68.359 (74.704)	lr 0.02073
Train [33][1780/3239]	Time 0.139 (0.574)	Data Time 0.001 (0.019)	Loss 3.0807 (3.0124)	Entropy 1.40791 (1.41335)	Top-1 acc 46.484 (51.876)	Top-5 acc 72.656 (74.708)	lr 0.02073
Train [33][1790/3239]	Time 0.221 (0.573)	Data Time 0.001 (0.019)	Loss 3.0953 (3.0123)	Entropy 1.40787 (1.41332)	Top-1 acc 51.953 (51.877)	Top-5 acc 73.828 (74.715)	lr 0.02073
Train [33][1800/3239]	Time 0.224 (0.572)	Data Time 0.001 (0.019)	Loss 2.7043 (3.0120)	Entropy 1.40784 (1.41329)	Top-1 acc 58.984 (51.883)	Top-5 acc 76.953 (74.717)	lr 0.02073
Train [33][1810/3239]	Time 0.232 (0.572)	Data Time 0.001 (0.019)	Loss 2.9829 (3.0119)	Entropy 1.40762 (1.41326)	Top-1 acc 53.125 (51.884)	Top-5 acc 73.828 (74.718)	lr 0.02073
Train [33][1820/3239]	Time 0.236 (0.571)	Data Time 0.001 (0.019)	Loss 2.9458 (3.0118)	Entropy 1.40756 (1.41323)	Top-1 acc 55.859 (51.886)	Top-5 acc 77.734 (74.720)	lr 0.02073
Train [33][1830/3239]	Time 0.256 (0.570)	Data Time 0.001 (0.019)	Loss 2.8524 (3.0118)	Entropy 1.40752 (1.41320)	Top-1 acc 53.906 (51.888)	Top-5 acc 80.078 (74.722)	lr 0.02073
Train [33][1840/3239]	Time 0.215 (0.569)	Data Time 0.001 (0.018)	Loss 3.2344 (3.0117)	Entropy 1.40743 (1.41317)	Top-1 acc 43.359 (51.894)	Top-5 acc 73.438 (74.728)	lr 0.02073
Train [33][1850/3239]	Time 0.224 (0.569)	Data Time 0.001 (0.018)	Loss 3.0443 (3.0119)	Entropy 1.40726 (1.41313)	Top-1 acc 51.953 (51.888)	Top-5 acc 74.219 (74.725)	lr 0.02072
Train [33][1860/3239]	Time 0.200 (0.568)	Data Time 0.001 (0.018)	Loss 3.2306 (3.0121)	Entropy 1.40719 (1.41310)	Top-1 acc 49.219 (51.876)	Top-5 acc 72.266 (74.722)	lr 0.02072
Train [33][1870/3239]	Time 0.235 (0.567)	Data Time 0.001 (0.018)	Loss 2.9384 (3.0123)	Entropy 1.40717 (1.41307)	Top-1 acc 54.688 (51.869)	Top-5 acc 76.953 (74.711)	lr 0.02072
Train [33][1880/3239]	Time 2.323 (0.567)	Data Time 0.001 (0.018)	Loss 3.0033 (3.0126)	Entropy 1.40717 (1.41304)	Top-1 acc 51.172 (51.865)	Top-5 acc 75.000 (74.705)	lr 0.02072
Train [33][1890/3239]	Time 0.328 (0.565)	Data Time 0.001 (0.018)	Loss 3.2606 (3.0127)	Entropy 1.40708 (1.41301)	Top-1 acc 43.750 (51.867)	Top-5 acc 69.141 (74.703)	lr 0.02072
Train [33][1900/3239]	Time 0.528 (0.586)	Data Time 0.003 (0.018)	Loss 3.0604 (3.0123)	Entropy 1.40684 (1.41298)	Top-1 acc 49.609 (51.875)	Top-5 acc 76.172 (74.710)	lr 0.02072
Train [33][1910/3239]	Time 0.234 (0.586)	Data Time 0.002 (0.018)	Loss 2.7436 (3.0123)	Entropy 1.40684 (1.41294)	Top-1 acc 55.859 (51.877)	Top-5 acc 79.297 (74.709)	lr 0.02072
Train [33][1920/3239]	Time 0.210 (0.585)	Data Time 0.001 (0.018)	Loss 3.1570 (3.0127)	Entropy 1.40678 (1.41291)	Top-1 acc 51.953 (51.872)	Top-5 acc 74.219 (74.703)	lr 0.02072
Train [33][1930/3239]	Time 0.201 (0.585)	Data Time 0.001 (0.018)	Loss 3.1772 (3.0127)	Entropy 1.40682 (1.41288)	Top-1 acc 50.391 (51.871)	Top-5 acc 73.047 (74.704)	lr 0.02072
Train [33][1940/3239]	Time 0.283 (0.584)	Data Time 0.001 (0.018)	Loss 2.8808 (3.0129)	Entropy 1.40677 (1.41285)	Top-1 acc 53.516 (51.865)	Top-5 acc 79.688 (74.702)	lr 0.02072
Train [33][1950/3239]	Time 0.229 (0.583)	Data Time 0.001 (0.018)	Loss 2.9354 (3.0131)	Entropy 1.40671 (1.41282)	Top-1 acc 52.734 (51.862)	Top-5 acc 75.781 (74.696)	lr 0.02072
Train [33][1960/3239]	Time 0.224 (0.582)	Data Time 0.001 (0.017)	Loss 2.8367 (3.0131)	Entropy 1.40674 (1.41279)	Top-1 acc 53.125 (51.860)	Top-5 acc 76.953 (74.696)	lr 0.02072
Train [33][1970/3239]	Time 0.206 (0.582)	Data Time 0.001 (0.017)	Loss 3.0681 (3.0130)	Entropy 1.40664 (1.41276)	Top-1 acc 49.219 (51.858)	Top-5 acc 71.094 (74.696)	lr 0.02072
Train [33][1980/3239]	Time 0.206 (0.581)	Data Time 0.001 (0.017)	Loss 3.2001 (3.0130)	Entropy 1.40655 (1.41273)	Top-1 acc 46.094 (51.854)	Top-5 acc 70.703 (74.691)	lr 0.02071
Train [33][1990/3239]	Time 2.338 (0.581)	Data Time 0.001 (0.017)	Loss 3.0393 (3.0131)	Entropy 1.40655 (1.41269)	Top-1 acc 50.000 (51.853)	Top-5 acc 73.047 (74.688)	lr 0.02071
Train [33][2000/3239]	Time 0.270 (0.579)	Data Time 0.001 (0.017)	Loss 2.9634 (3.0128)	Entropy 1.40654 (1.41266)	Top-1 acc 50.000 (51.854)	Top-5 acc 76.562 (74.694)	lr 0.02071
Train [33][2010/3239]	Time 0.248 (0.578)	Data Time 0.001 (0.017)	Loss 2.9806 (3.0131)	Entropy 1.40653 (1.41263)	Top-1 acc 50.391 (51.841)	Top-5 acc 76.172 (74.689)	lr 0.02071
Train [33][2020/3239]	Time 0.227 (0.578)	Data Time 0.001 (0.017)	Loss 3.0236 (3.0131)	Entropy 1.40640 (1.41260)	Top-1 acc 52.734 (51.839)	Top-5 acc 73.438 (74.691)	lr 0.02071
Train [33][2030/3239]	Time 0.243 (0.577)	Data Time 0.001 (0.017)	Loss 2.9377 (3.0131)	Entropy 1.40636 (1.41257)	Top-1 acc 55.859 (51.842)	Top-5 acc 75.000 (74.689)	lr 0.02071
Train [33][2040/3239]	Time 0.227 (0.576)	Data Time 0.001 (0.017)	Loss 3.0951 (3.0136)	Entropy 1.40634 (1.41254)	Top-1 acc 48.438 (51.825)	Top-5 acc 73.828 (74.681)	lr 0.02071
Train [33][2050/3239]	Time 0.224 (0.576)	Data Time 0.001 (0.017)	Loss 3.2111 (3.0141)	Entropy 1.40638 (1.41251)	Top-1 acc 45.312 (51.809)	Top-5 acc 73.047 (74.671)	lr 0.02071
Train [33][2060/3239]	Time 0.317 (0.575)	Data Time 0.002 (0.017)	Loss 2.9029 (3.0140)	Entropy 1.40631 (1.41248)	Top-1 acc 53.516 (51.813)	Top-5 acc 75.781 (74.674)	lr 0.02071
Train [33][2070/3239]	Time 0.219 (0.574)	Data Time 0.001 (0.017)	Loss 3.0407 (3.0140)	Entropy 1.40631 (1.41245)	Top-1 acc 48.828 (51.812)	Top-5 acc 73.828 (74.672)	lr 0.02071
Train [33][2080/3239]	Time 0.213 (0.574)	Data Time 0.001 (0.017)	Loss 2.8294 (3.0140)	Entropy 1.40618 (1.41242)	Top-1 acc 55.859 (51.812)	Top-5 acc 76.562 (74.669)	lr 0.02071
Train [33][2090/3239]	Time 0.231 (0.573)	Data Time 0.001 (0.016)	Loss 2.9329 (3.0142)	Entropy 1.40613 (1.41239)	Top-1 acc 53.516 (51.809)	Top-5 acc 76.953 (74.663)	lr 0.02071
Train [33][2100/3239]	Time 2.411 (0.573)	Data Time 0.001 (0.016)	Loss 3.0374 (3.0141)	Entropy 1.40613 (1.41236)	Top-1 acc 49.219 (51.807)	Top-5 acc 76.172 (74.662)	lr 0.02071
Train [33][2110/3239]	Time 0.355 (0.571)	Data Time 0.001 (0.016)	Loss 3.0298 (3.0141)	Entropy 1.40610 (1.41233)	Top-1 acc 50.781 (51.801)	Top-5 acc 76.562 (74.663)	lr 0.02070
Train [33][2120/3239]	Time 0.216 (0.571)	Data Time 0.002 (0.016)	Loss 3.0026 (3.0139)	Entropy 1.40603 (1.41230)	Top-1 acc 53.516 (51.806)	Top-5 acc 75.000 (74.666)	lr 0.02070
Train [33][2130/3239]	Time 0.229 (0.570)	Data Time 0.001 (0.016)	Loss 2.9362 (3.0139)	Entropy 1.40594 (1.41227)	Top-1 acc 51.953 (51.806)	Top-5 acc 78.516 (74.668)	lr 0.02070
Train [33][2140/3239]	Time 0.214 (0.570)	Data Time 0.001 (0.016)	Loss 2.8331 (3.0138)	Entropy 1.40593 (1.41224)	Top-1 acc 56.250 (51.810)	Top-5 acc 78.906 (74.667)	lr 0.02070
Train [33][2150/3239]	Time 0.218 (0.569)	Data Time 0.002 (0.016)	Loss 2.8727 (3.0139)	Entropy 1.40588 (1.41221)	Top-1 acc 54.688 (51.809)	Top-5 acc 78.516 (74.660)	lr 0.02070
Train [33][2160/3239]	Time 0.330 (0.569)	Data Time 0.001 (0.016)	Loss 3.0403 (3.0140)	Entropy 1.40581 (1.41218)	Top-1 acc 51.562 (51.810)	Top-5 acc 72.266 (74.655)	lr 0.02070
Train [33][2170/3239]	Time 0.221 (0.568)	Data Time 0.001 (0.016)	Loss 3.2277 (3.0139)	Entropy 1.40565 (1.41215)	Top-1 acc 46.484 (51.813)	Top-5 acc 71.094 (74.658)	lr 0.02070
Train [33][2180/3239]	Time 0.223 (0.567)	Data Time 0.001 (0.016)	Loss 3.0172 (3.0138)	Entropy 1.40549 (1.41212)	Top-1 acc 51.953 (51.817)	Top-5 acc 71.484 (74.661)	lr 0.02070
Train [33][2190/3239]	Time 0.213 (0.567)	Data Time 0.001 (0.016)	Loss 3.1482 (3.0139)	Entropy 1.40548 (1.41209)	Top-1 acc 48.828 (51.815)	Top-5 acc 74.609 (74.663)	lr 0.02070
Train [33][2200/3239]	Time 0.212 (0.566)	Data Time 0.001 (0.016)	Loss 3.1020 (3.0140)	Entropy 1.40541 (1.41206)	Top-1 acc 51.562 (51.813)	Top-5 acc 71.875 (74.661)	lr 0.02070
Train [33][2210/3239]	Time 2.464 (0.566)	Data Time 0.001 (0.016)	Loss 3.0215 (3.0142)	Entropy 1.40541 (1.41203)	Top-1 acc 50.000 (51.814)	Top-5 acc 73.047 (74.660)	lr 0.02070
Train [33][2220/3239]	Time 0.211 (0.565)	Data Time 0.002 (0.016)	Loss 2.9228 (3.0144)	Entropy 1.40535 (1.41200)	Top-1 acc 56.250 (51.810)	Top-5 acc 76.953 (74.656)	lr 0.02070
Train [33][2230/3239]	Time 0.220 (0.564)	Data Time 0.001 (0.016)	Loss 2.9179 (3.0141)	Entropy 1.40532 (1.41197)	Top-1 acc 52.734 (51.814)	Top-5 acc 75.781 (74.664)	lr 0.02070
Train [33][2240/3239]	Time 0.235 (0.563)	Data Time 0.001 (0.015)	Loss 3.2425 (3.0145)	Entropy 1.40516 (1.41194)	Top-1 acc 49.609 (51.806)	Top-5 acc 69.531 (74.658)	lr 0.02069
Train [33][2250/3239]	Time 0.224 (0.563)	Data Time 0.001 (0.015)	Loss 2.8990 (3.0147)	Entropy 1.40514 (1.41191)	Top-1 acc 54.688 (51.803)	Top-5 acc 76.562 (74.657)	lr 0.02069
Train [33][2260/3239]	Time 0.269 (0.582)	Data Time 0.002 (0.015)	Loss 2.9654 (3.0146)	Entropy 1.40498 (1.41188)	Top-1 acc 54.297 (51.804)	Top-5 acc 73.438 (74.654)	lr 0.02069
Train [33][2270/3239]	Time 0.239 (0.581)	Data Time 0.002 (0.015)	Loss 3.0110 (3.0148)	Entropy 1.40490 (1.41185)	Top-1 acc 49.219 (51.796)	Top-5 acc 74.609 (74.653)	lr 0.02069
Train [33][2280/3239]	Time 0.206 (0.581)	Data Time 0.002 (0.015)	Loss 2.9656 (3.0147)	Entropy 1.40479 (1.41182)	Top-1 acc 50.781 (51.795)	Top-5 acc 76.562 (74.653)	lr 0.02069
Train [33][2290/3239]	Time 0.223 (0.580)	Data Time 0.001 (0.015)	Loss 3.0106 (3.0149)	Entropy 1.40475 (1.41179)	Top-1 acc 50.391 (51.790)	Top-5 acc 75.781 (74.650)	lr 0.02069
Train [33][2300/3239]	Time 0.191 (0.580)	Data Time 0.001 (0.015)	Loss 3.0998 (3.0153)	Entropy 1.40463 (1.41176)	Top-1 acc 49.609 (51.779)	Top-5 acc 73.828 (74.640)	lr 0.02069
Train [33][2310/3239]	Time 0.207 (0.579)	Data Time 0.001 (0.015)	Loss 2.9571 (3.0153)	Entropy 1.40477 (1.41173)	Top-1 acc 51.562 (51.779)	Top-5 acc 75.000 (74.639)	lr 0.02069
Train [33][2320/3239]	Time 2.353 (0.578)	Data Time 0.001 (0.015)	Loss 2.8820 (3.0152)	Entropy 1.40477 (1.41170)	Top-1 acc 55.859 (51.778)	Top-5 acc 76.172 (74.639)	lr 0.02069
Train [33][2330/3239]	Time 0.263 (0.577)	Data Time 0.001 (0.015)	Loss 2.8331 (3.0149)	Entropy 1.40472 (1.41167)	Top-1 acc 55.469 (51.781)	Top-5 acc 76.953 (74.646)	lr 0.02069
Train [33][2340/3239]	Time 0.224 (0.577)	Data Time 0.001 (0.015)	Loss 2.8659 (3.0152)	Entropy 1.40457 (1.41164)	Top-1 acc 55.078 (51.775)	Top-5 acc 78.516 (74.640)	lr 0.02069
Train [33][2350/3239]	Time 0.218 (0.576)	Data Time 0.001 (0.015)	Loss 2.9621 (3.0151)	Entropy 1.40456 (1.41161)	Top-1 acc 51.562 (51.775)	Top-5 acc 76.562 (74.640)	lr 0.02069
Train [33][2360/3239]	Time 0.229 (0.575)	Data Time 0.001 (0.015)	Loss 2.9013 (3.0151)	Entropy 1.40452 (1.41158)	Top-1 acc 52.344 (51.775)	Top-5 acc 75.781 (74.639)	lr 0.02069
Train [33][2370/3239]	Time 0.228 (0.575)	Data Time 0.001 (0.015)	Loss 2.9291 (3.0150)	Entropy 1.40448 (1.41155)	Top-1 acc 55.469 (51.777)	Top-5 acc 76.953 (74.637)	lr 0.02068
Train [33][2380/3239]	Time 0.210 (0.574)	Data Time 0.001 (0.015)	Loss 2.9663 (3.0151)	Entropy 1.40445 (1.41152)	Top-1 acc 57.031 (51.778)	Top-5 acc 73.828 (74.637)	lr 0.02068
Train [33][2390/3239]	Time 0.217 (0.574)	Data Time 0.001 (0.015)	Loss 2.6214 (3.0150)	Entropy 1.40431 (1.41149)	Top-1 acc 60.938 (51.778)	Top-5 acc 79.688 (74.639)	lr 0.02068
Train [33][2400/3239]	Time 0.216 (0.574)	Data Time 0.001 (0.015)	Loss 3.0522 (3.0153)	Entropy 1.40426 (1.41146)	Top-1 acc 49.219 (51.776)	Top-5 acc 74.219 (74.634)	lr 0.02068
Train [33][2410/3239]	Time 0.210 (0.573)	Data Time 0.001 (0.015)	Loss 2.9874 (3.0152)	Entropy 1.40421 (1.41143)	Top-1 acc 49.219 (51.777)	Top-5 acc 75.000 (74.632)	lr 0.02068
Train [33][2420/3239]	Time 0.252 (0.572)	Data Time 0.001 (0.014)	Loss 3.1700 (3.0153)	Entropy 1.40420 (1.41140)	Top-1 acc 44.922 (51.776)	Top-5 acc 72.656 (74.631)	lr 0.02068
Train [33][2430/3239]	Time 2.538 (0.572)	Data Time 0.002 (0.014)	Loss 2.8261 (3.0150)	Entropy 1.40420 (1.41137)	Top-1 acc 56.250 (51.783)	Top-5 acc 74.609 (74.631)	lr 0.02068
Train [33][2440/3239]	Time 0.242 (0.571)	Data Time 0.002 (0.014)	Loss 3.0270 (3.0150)	Entropy 1.40418 (1.41134)	Top-1 acc 53.516 (51.785)	Top-5 acc 72.656 (74.632)	lr 0.02068
Train [33][2450/3239]	Time 0.225 (0.570)	Data Time 0.002 (0.014)	Loss 2.8917 (3.0154)	Entropy 1.40406 (1.41131)	Top-1 acc 56.641 (51.776)	Top-5 acc 76.953 (74.627)	lr 0.02068
Train [33][2460/3239]	Time 0.245 (0.570)	Data Time 0.001 (0.014)	Loss 2.9162 (3.0152)	Entropy 1.40403 (1.41128)	Top-1 acc 56.641 (51.781)	Top-5 acc 75.781 (74.627)	lr 0.02068
Train [33][2470/3239]	Time 0.217 (0.569)	Data Time 0.001 (0.014)	Loss 2.8263 (3.0151)	Entropy 1.40391 (1.41125)	Top-1 acc 56.641 (51.782)	Top-5 acc 77.734 (74.631)	lr 0.02068
Train [33][2480/3239]	Time 0.300 (0.569)	Data Time 0.002 (0.014)	Loss 3.1313 (3.0152)	Entropy 1.40388 (1.41122)	Top-1 acc 49.219 (51.778)	Top-5 acc 70.703 (74.629)	lr 0.02068
Train [33][2490/3239]	Time 0.233 (0.568)	Data Time 0.001 (0.014)	Loss 2.8453 (3.0149)	Entropy 1.40378 (1.41119)	Top-1 acc 58.203 (51.786)	Top-5 acc 79.297 (74.633)	lr 0.02068
Train [33][2500/3239]	Time 0.208 (0.568)	Data Time 0.001 (0.014)	Loss 2.9849 (3.0147)	Entropy 1.40349 (1.41116)	Top-1 acc 53.516 (51.789)	Top-5 acc 76.172 (74.637)	lr 0.02067
Train [33][2510/3239]	Time 0.251 (0.567)	Data Time 0.001 (0.014)	Loss 3.0478 (3.0147)	Entropy 1.40336 (1.41113)	Top-1 acc 50.391 (51.788)	Top-5 acc 74.219 (74.635)	lr 0.02067
Train [33][2520/3239]	Time 0.198 (0.567)	Data Time 0.001 (0.014)	Loss 2.9729 (3.0145)	Entropy 1.40333 (1.41110)	Top-1 acc 50.000 (51.789)	Top-5 acc 75.391 (74.640)	lr 0.02067
Train [33][2530/3239]	Time 0.288 (0.566)	Data Time 0.001 (0.014)	Loss 2.9296 (3.0146)	Entropy 1.40282 (1.41107)	Top-1 acc 54.688 (51.792)	Top-5 acc 75.000 (74.638)	lr 0.02067
Train [33][2540/3239]	Time 2.486 (0.566)	Data Time 0.001 (0.014)	Loss 3.1209 (3.0144)	Entropy 1.40282 (1.41104)	Top-1 acc 56.250 (51.791)	Top-5 acc 71.484 (74.638)	lr 0.02067
Train [33][2550/3239]	Time 0.222 (0.565)	Data Time 0.001 (0.014)	Loss 3.0823 (3.0144)	Entropy 1.40254 (1.41100)	Top-1 acc 49.219 (51.790)	Top-5 acc 73.438 (74.637)	lr 0.02067
Train [33][2560/3239]	Time 0.210 (0.564)	Data Time 0.001 (0.014)	Loss 3.1513 (3.0145)	Entropy 1.40251 (1.41097)	Top-1 acc 53.125 (51.794)	Top-5 acc 71.484 (74.637)	lr 0.02067
Train [33][2570/3239]	Time 0.231 (0.564)	Data Time 0.001 (0.014)	Loss 3.0701 (3.0146)	Entropy 1.40245 (1.41094)	Top-1 acc 52.734 (51.794)	Top-5 acc 72.266 (74.633)	lr 0.02067
Train [33][2580/3239]	Time 0.231 (0.563)	Data Time 0.001 (0.014)	Loss 3.1050 (3.0145)	Entropy 1.40238 (1.41091)	Top-1 acc 47.656 (51.794)	Top-5 acc 75.000 (74.635)	lr 0.02067
Train [33][2590/3239]	Time 0.302 (0.563)	Data Time 0.001 (0.014)	Loss 2.9907 (3.0145)	Entropy 1.40234 (1.41087)	Top-1 acc 53.516 (51.796)	Top-5 acc 74.219 (74.637)	lr 0.02067
Train [33][2600/3239]	Time 0.210 (0.563)	Data Time 0.001 (0.014)	Loss 2.9321 (3.0148)	Entropy 1.40233 (1.41084)	Top-1 acc 55.469 (51.795)	Top-5 acc 77.344 (74.632)	lr 0.02067
Train [33][2610/3239]	Time 0.217 (0.562)	Data Time 0.001 (0.014)	Loss 2.9366 (3.0146)	Entropy 1.40230 (1.41081)	Top-1 acc 54.297 (51.796)	Top-5 acc 76.172 (74.636)	lr 0.02067
Train [33][2620/3239]	Time 0.260 (0.577)	Data Time 0.002 (0.013)	Loss 2.8031 (3.0147)	Entropy 1.40229 (1.41077)	Top-1 acc 53.516 (51.795)	Top-5 acc 82.031 (74.636)	lr 0.02067
Train [33][2630/3239]	Time 0.216 (0.577)	Data Time 0.002 (0.013)	Loss 3.0026 (3.0149)	Entropy 1.40218 (1.41074)	Top-1 acc 55.469 (51.788)	Top-5 acc 71.875 (74.630)	lr 0.02066
Train [33][2640/3239]	Time 0.312 (0.577)	Data Time 0.001 (0.013)	Loss 3.1439 (3.0149)	Entropy 1.40205 (1.41071)	Top-1 acc 44.922 (51.781)	Top-5 acc 70.312 (74.629)	lr 0.02066
Train [33][2650/3239]	Time 0.218 (0.576)	Data Time 0.001 (0.013)	Loss 3.2779 (3.0152)	Entropy 1.40189 (1.41068)	Top-1 acc 48.047 (51.776)	Top-5 acc 69.922 (74.624)	lr 0.02066
Train [33][2660/3239]	Time 0.266 (0.576)	Data Time 0.001 (0.013)	Loss 2.9742 (3.0150)	Entropy 1.40189 (1.41064)	Top-1 acc 54.297 (51.780)	Top-5 acc 75.391 (74.630)	lr 0.02066
Train [33][2670/3239]	Time 0.233 (0.575)	Data Time 0.001 (0.013)	Loss 2.9661 (3.0149)	Entropy 1.40182 (1.41061)	Top-1 acc 51.953 (51.785)	Top-5 acc 76.562 (74.633)	lr 0.02066
Train [33][2680/3239]	Time 0.254 (0.575)	Data Time 0.002 (0.013)	Loss 3.1854 (3.0149)	Entropy 1.40169 (1.41058)	Top-1 acc 51.953 (51.788)	Top-5 acc 70.703 (74.632)	lr 0.02066
Train [33][2690/3239]	Time 0.252 (0.574)	Data Time 0.001 (0.013)	Loss 3.1469 (3.0153)	Entropy 1.40164 (1.41055)	Top-1 acc 46.875 (51.780)	Top-5 acc 71.875 (74.622)	lr 0.02066
Train [33][2700/3239]	Time 0.216 (0.574)	Data Time 0.001 (0.013)	Loss 3.0362 (3.0152)	Entropy 1.40157 (1.41051)	Top-1 acc 52.344 (51.785)	Top-5 acc 74.609 (74.625)	lr 0.02066
Train [33][2710/3239]	Time 0.217 (0.574)	Data Time 0.001 (0.013)	Loss 3.1900 (3.0152)	Entropy 1.40155 (1.41048)	Top-1 acc 48.047 (51.784)	Top-5 acc 69.531 (74.624)	lr 0.02066
Train [33][2720/3239]	Time 0.255 (0.573)	Data Time 0.001 (0.013)	Loss 3.2707 (3.0153)	Entropy 1.40155 (1.41045)	Top-1 acc 45.703 (51.777)	Top-5 acc 70.703 (74.622)	lr 0.02066
Train [33][2730/3239]	Time 0.330 (0.573)	Data Time 0.001 (0.013)	Loss 3.0065 (3.0152)	Entropy 1.40149 (1.41041)	Top-1 acc 51.953 (51.778)	Top-5 acc 73.828 (74.626)	lr 0.02066
Train [33][2740/3239]	Time 0.242 (0.572)	Data Time 0.002 (0.013)	Loss 3.1142 (3.0152)	Entropy 1.40146 (1.41038)	Top-1 acc 49.609 (51.779)	Top-5 acc 71.094 (74.625)	lr 0.02066
Train [33][2750/3239]	Time 0.393 (0.572)	Data Time 0.001 (0.013)	Loss 3.0542 (3.0152)	Entropy 1.40120 (1.41035)	Top-1 acc 53.125 (51.778)	Top-5 acc 72.266 (74.626)	lr 0.02066
Train [33][2760/3239]	Time 0.219 (0.571)	Data Time 0.001 (0.013)	Loss 3.0706 (3.0151)	Entropy 1.40113 (1.41032)	Top-1 acc 50.391 (51.781)	Top-5 acc 73.828 (74.624)	lr 0.02065
Train [33][2770/3239]	Time 0.210 (0.571)	Data Time 0.001 (0.013)	Loss 3.2885 (3.0152)	Entropy 1.40112 (1.41028)	Top-1 acc 47.266 (51.780)	Top-5 acc 69.531 (74.617)	lr 0.02065
Train [33][2780/3239]	Time 0.207 (0.570)	Data Time 0.001 (0.013)	Loss 2.9767 (3.0151)	Entropy 1.40105 (1.41025)	Top-1 acc 52.344 (51.783)	Top-5 acc 75.781 (74.620)	lr 0.02065
Train [33][2790/3239]	Time 0.197 (0.570)	Data Time 0.001 (0.013)	Loss 2.8858 (3.0151)	Entropy 1.40083 (1.41022)	Top-1 acc 53.906 (51.783)	Top-5 acc 77.734 (74.621)	lr 0.02065
Train [33][2800/3239]	Time 0.263 (0.570)	Data Time 0.001 (0.013)	Loss 3.1491 (3.0149)	Entropy 1.40074 (1.41018)	Top-1 acc 50.391 (51.793)	Top-5 acc 71.875 (74.625)	lr 0.02065
Train [33][2810/3239]	Time 0.226 (0.569)	Data Time 0.001 (0.013)	Loss 3.1963 (3.0148)	Entropy 1.40073 (1.41015)	Top-1 acc 45.703 (51.795)	Top-5 acc 72.266 (74.631)	lr 0.02065
Train [33][2820/3239]	Time 0.246 (0.569)	Data Time 0.001 (0.013)	Loss 2.8877 (3.0145)	Entropy 1.40068 (1.41012)	Top-1 acc 56.250 (51.801)	Top-5 acc 75.000 (74.635)	lr 0.02065
Train [33][2830/3239]	Time 0.236 (0.568)	Data Time 0.002 (0.013)	Loss 3.0088 (3.0143)	Entropy 1.40067 (1.41008)	Top-1 acc 52.344 (51.807)	Top-5 acc 71.875 (74.638)	lr 0.02065
Train [33][2840/3239]	Time 0.239 (0.568)	Data Time 0.002 (0.013)	Loss 3.0949 (3.0143)	Entropy 1.40032 (1.41005)	Top-1 acc 53.516 (51.812)	Top-5 acc 71.875 (74.637)	lr 0.02065
Train [33][2850/3239]	Time 0.250 (0.567)	Data Time 0.001 (0.013)	Loss 3.0884 (3.0145)	Entropy 1.40022 (1.41002)	Top-1 acc 50.781 (51.811)	Top-5 acc 72.266 (74.632)	lr 0.02065
Train [33][2860/3239]	Time 0.338 (0.567)	Data Time 0.001 (0.012)	Loss 2.8837 (3.0144)	Entropy 1.40010 (1.40998)	Top-1 acc 53.906 (51.808)	Top-5 acc 75.781 (74.632)	lr 0.02065
Train [33][2870/3239]	Time 0.220 (0.567)	Data Time 0.002 (0.012)	Loss 2.9954 (3.0143)	Entropy 1.40001 (1.40995)	Top-1 acc 53.125 (51.811)	Top-5 acc 76.562 (74.635)	lr 0.02065
Train [33][2880/3239]	Time 0.229 (0.566)	Data Time 0.001 (0.012)	Loss 3.2531 (3.0144)	Entropy 1.39985 (1.40991)	Top-1 acc 45.312 (51.806)	Top-5 acc 71.094 (74.635)	lr 0.02065
Train [33][2890/3239]	Time 0.276 (0.566)	Data Time 0.001 (0.012)	Loss 3.0007 (3.0143)	Entropy 1.39974 (1.40988)	Top-1 acc 54.688 (51.809)	Top-5 acc 76.953 (74.637)	lr 0.02064
Train [33][2900/3239]	Time 0.218 (0.565)	Data Time 0.001 (0.012)	Loss 2.9977 (3.0144)	Entropy 1.39974 (1.40984)	Top-1 acc 51.172 (51.809)	Top-5 acc 75.781 (74.638)	lr 0.02064
Train [33][2910/3239]	Time 0.306 (0.565)	Data Time 0.001 (0.012)	Loss 2.9211 (3.0143)	Entropy 1.39968 (1.40981)	Top-1 acc 51.953 (51.807)	Top-5 acc 77.344 (74.641)	lr 0.02064
Train [33][2920/3239]	Time 0.277 (0.565)	Data Time 0.025 (0.012)	Loss 2.9450 (3.0143)	Entropy 1.39952 (1.40977)	Top-1 acc 51.172 (51.806)	Top-5 acc 74.609 (74.642)	lr 0.02064
Train [33][2930/3239]	Time 0.253 (0.564)	Data Time 0.001 (0.012)	Loss 3.0527 (3.0142)	Entropy 1.39945 (1.40974)	Top-1 acc 47.656 (51.807)	Top-5 acc 72.656 (74.644)	lr 0.02064
Train [33][2940/3239]	Time 0.220 (0.564)	Data Time 0.001 (0.012)	Loss 3.1721 (3.0143)	Entropy 1.39944 (1.40970)	Top-1 acc 46.094 (51.802)	Top-5 acc 69.141 (74.642)	lr 0.02064
Train [33][2950/3239]	Time 0.272 (0.577)	Data Time 0.004 (0.012)	Loss 3.3591 (3.0147)	Entropy 1.39941 (1.40967)	Top-1 acc 46.875 (51.792)	Top-5 acc 69.531 (74.635)	lr 0.02064
Train [33][2960/3239]	Time 0.253 (0.577)	Data Time 0.002 (0.012)	Loss 3.1682 (3.0148)	Entropy 1.39935 (1.40963)	Top-1 acc 48.828 (51.792)	Top-5 acc 67.969 (74.632)	lr 0.02064
Train [33][2970/3239]	Time 0.277 (0.577)	Data Time 0.002 (0.012)	Loss 2.9572 (3.0148)	Entropy 1.39933 (1.40960)	Top-1 acc 53.125 (51.795)	Top-5 acc 74.609 (74.630)	lr 0.02064
Train [33][2980/3239]	Time 0.264 (0.576)	Data Time 0.001 (0.012)	Loss 3.1901 (3.0148)	Entropy 1.39928 (1.40956)	Top-1 acc 50.000 (51.790)	Top-5 acc 69.141 (74.627)	lr 0.02064
Train [33][2990/3239]	Time 0.225 (0.576)	Data Time 0.002 (0.012)	Loss 3.0156 (3.0149)	Entropy 1.39923 (1.40953)	Top-1 acc 52.344 (51.785)	Top-5 acc 73.047 (74.622)	lr 0.02064
Train [33][3000/3239]	Time 0.245 (0.576)	Data Time 0.001 (0.012)	Loss 2.8802 (3.0148)	Entropy 1.39922 (1.40950)	Top-1 acc 53.516 (51.787)	Top-5 acc 77.344 (74.624)	lr 0.02064
Train [33][3010/3239]	Time 0.218 (0.575)	Data Time 0.001 (0.012)	Loss 3.0876 (3.0148)	Entropy 1.39920 (1.40946)	Top-1 acc 50.391 (51.786)	Top-5 acc 75.000 (74.622)	lr 0.02064
Train [33][3020/3239]	Time 0.275 (0.575)	Data Time 0.001 (0.012)	Loss 3.0089 (3.0149)	Entropy 1.39916 (1.40943)	Top-1 acc 54.297 (51.783)	Top-5 acc 74.609 (74.618)	lr 0.02063
Train [33][3030/3239]	Time 0.269 (0.574)	Data Time 0.001 (0.012)	Loss 3.1450 (3.0150)	Entropy 1.39913 (1.40939)	Top-1 acc 50.391 (51.783)	Top-5 acc 71.484 (74.616)	lr 0.02063
Train [33][3040/3239]	Time 0.343 (0.574)	Data Time 0.002 (0.012)	Loss 2.9355 (3.0150)	Entropy 1.39906 (1.40936)	Top-1 acc 56.250 (51.784)	Top-5 acc 76.562 (74.615)	lr 0.02063
Train [33][3050/3239]	Time 0.265 (0.574)	Data Time 0.002 (0.012)	Loss 3.1294 (3.0150)	Entropy 1.39900 (1.40933)	Top-1 acc 50.391 (51.786)	Top-5 acc 71.875 (74.616)	lr 0.02063
Train [33][3060/3239]	Time 0.227 (0.574)	Data Time 0.003 (0.012)	Loss 3.2377 (3.0150)	Entropy 1.39895 (1.40929)	Top-1 acc 47.656 (51.787)	Top-5 acc 72.656 (74.614)	lr 0.02063
Train [33][3070/3239]	Time 0.291 (0.573)	Data Time 0.001 (0.012)	Loss 3.3609 (3.0151)	Entropy 1.39887 (1.40926)	Top-1 acc 41.406 (51.783)	Top-5 acc 65.625 (74.615)	lr 0.02063
Train [33][3080/3239]	Time 0.217 (0.573)	Data Time 0.001 (0.012)	Loss 2.8440 (3.0148)	Entropy 1.39877 (1.40922)	Top-1 acc 54.688 (51.789)	Top-5 acc 76.562 (74.619)	lr 0.02063
Train [33][3090/3239]	Time 0.222 (0.573)	Data Time 0.001 (0.012)	Loss 2.9718 (3.0149)	Entropy 1.39873 (1.40919)	Top-1 acc 53.906 (51.786)	Top-5 acc 72.266 (74.618)	lr 0.02063
Train [33][3100/3239]	Time 0.216 (0.573)	Data Time 0.001 (0.012)	Loss 3.0095 (3.0149)	Entropy 1.39870 (1.40916)	Top-1 acc 49.219 (51.785)	Top-5 acc 77.734 (74.622)	lr 0.02063
Train [33][3110/3239]	Time 0.198 (0.572)	Data Time 0.001 (0.012)	Loss 3.0709 (3.0148)	Entropy 1.39844 (1.40912)	Top-1 acc 50.000 (51.786)	Top-5 acc 71.875 (74.622)	lr 0.02063
Train [33][3120/3239]	Time 0.219 (0.572)	Data Time 0.001 (0.012)	Loss 2.9494 (3.0148)	Entropy 1.39841 (1.40909)	Top-1 acc 51.562 (51.789)	Top-5 acc 76.562 (74.621)	lr 0.02063
Train [33][3130/3239]	Time 0.366 (0.571)	Data Time 0.001 (0.012)	Loss 2.8099 (3.0147)	Entropy 1.39837 (1.40906)	Top-1 acc 60.938 (51.792)	Top-5 acc 80.078 (74.623)	lr 0.02063
Train [33][3140/3239]	Time 0.236 (0.571)	Data Time 0.001 (0.012)	Loss 3.1345 (3.0149)	Entropy 1.39820 (1.40902)	Top-1 acc 48.047 (51.787)	Top-5 acc 73.828 (74.622)	lr 0.02063
Train [33][3150/3239]	Time 0.237 (0.571)	Data Time 0.001 (0.012)	Loss 3.0282 (3.0149)	Entropy 1.39817 (1.40899)	Top-1 acc 49.219 (51.788)	Top-5 acc 74.219 (74.620)	lr 0.02063
Train [33][3160/3239]	Time 0.272 (0.570)	Data Time 0.001 (0.011)	Loss 2.8926 (3.0150)	Entropy 1.39816 (1.40895)	Top-1 acc 58.203 (51.786)	Top-5 acc 77.344 (74.617)	lr 0.02062
Train [33][3170/3239]	Time 0.217 (0.570)	Data Time 0.001 (0.011)	Loss 3.0036 (3.0151)	Entropy 1.39808 (1.40892)	Top-1 acc 51.172 (51.783)	Top-5 acc 73.828 (74.613)	lr 0.02062
Train [33][3180/3239]	Time 0.318 (0.570)	Data Time 0.000 (0.011)	Loss 2.9533 (3.0150)	Entropy 1.39837 (1.40888)	Top-1 acc 54.688 (51.786)	Top-5 acc 75.000 (74.617)	lr 0.02062
Train [33][3190/3239]	Time 0.214 (0.569)	Data Time 0.000 (0.011)	Loss 3.0247 (3.0148)	Entropy 1.39827 (1.40885)	Top-1 acc 50.391 (51.788)	Top-5 acc 75.000 (74.622)	lr 0.02062
Train [33][3200/3239]	Time 0.202 (0.569)	Data Time 0.000 (0.011)	Loss 3.0521 (3.0146)	Entropy 1.39816 (1.40882)	Top-1 acc 52.344 (51.793)	Top-5 acc 72.656 (74.626)	lr 0.02062
Train [33][3210/3239]	Time 0.208 (0.568)	Data Time 0.000 (0.011)	Loss 3.0220 (3.0145)	Entropy 1.39812 (1.40879)	Top-1 acc 49.609 (51.796)	Top-5 acc 73.047 (74.624)	lr 0.02062
Train [33][3220/3239]	Time 0.222 (0.568)	Data Time 0.000 (0.011)	Loss 3.1877 (3.0147)	Entropy 1.39813 (1.40875)	Top-1 acc 46.094 (51.791)	Top-5 acc 73.828 (74.623)	lr 0.02062
Train [33][3230/3239]	Time 0.168 (0.568)	Data Time 0.000 (0.011)	Loss 3.1959 (3.0146)	Entropy 1.39806 (1.40872)	Top-1 acc 46.094 (51.794)	Top-5 acc 69.531 (74.627)	lr 0.02062
Train [33][3239/3239]	Time 2.116 (0.567)	Data Time 0.000 (0.011)	Loss 3.5149 (3.0145)	Entropy 1.39806 (1.40869)	Top-1 acc 48.148 (51.796)	Top-5 acc 62.963 (74.628)	lr 0.02062
==========Valid [33/120]	loss 1.818	top-1 acc 59.665 (59.665)	top-5 acc 81.451	Train top-1 51.796	top-5 74.628	Entropy 1.39806	Latency-None: 0.000ms	Flops: 556.46M
Train [34][0/3239]	Time 38.275 (38.275)	Data Time 37.455 (37.455)	Loss 2.9406 (2.9406)	Entropy 1.39797 (1.39797)	Top-1 acc 51.953 (51.953)	Top-5 acc 75.391 (75.391)	lr 0.02062
Train [34][10/3239]	Time 2.370 (3.963)	Data Time 0.001 (3.407)	Loss 3.0686 (3.0378)	Entropy 1.39797 (1.39797)	Top-1 acc 48.047 (51.065)	Top-5 acc 71.484 (74.219)	lr 0.02062
Train [34][20/3239]	Time 0.202 (2.187)	Data Time 0.001 (1.785)	Loss 3.2822 (3.0571)	Entropy 1.39771 (1.39785)	Top-1 acc 46.875 (50.818)	Top-5 acc 69.531 (73.884)	lr 0.02062
Train [34][30/3239]	Time 0.223 (1.624)	Data Time 0.001 (1.210)	Loss 3.0145 (3.0519)	Entropy 1.39767 (1.39779)	Top-1 acc 52.344 (50.832)	Top-5 acc 75.391 (74.269)	lr 0.02062
Train [34][40/3239]	Time 0.212 (1.338)	Data Time 0.001 (0.915)	Loss 2.8890 (3.0479)	Entropy 1.39763 (1.39775)	Top-1 acc 54.688 (51.010)	Top-5 acc 76.953 (74.066)	lr 0.02062
Train [34][50/3239]	Time 0.231 (2.100)	Data Time 0.003 (0.736)	Loss 3.0619 (3.0295)	Entropy 1.39757 (1.39772)	Top-1 acc 53.906 (51.448)	Top-5 acc 73.438 (74.334)	lr 0.02061
Train [34][60/3239]	Time 0.345 (1.836)	Data Time 0.002 (0.616)	Loss 2.8830 (3.0180)	Entropy 1.39743 (1.39768)	Top-1 acc 55.078 (51.844)	Top-5 acc 78.516 (74.622)	lr 0.02061
Train [34][70/3239]	Time 0.243 (1.645)	Data Time 0.002 (0.530)	Loss 2.8137 (3.0102)	Entropy 1.39736 (1.39764)	Top-1 acc 57.422 (52.014)	Top-5 acc 77.734 (74.736)	lr 0.02061
Train [34][80/3239]	Time 0.257 (1.499)	Data Time 0.002 (0.465)	Loss 2.9839 (3.0017)	Entropy 1.39709 (1.39759)	Top-1 acc 55.469 (52.079)	Top-5 acc 73.047 (75.005)	lr 0.02061
Train [34][90/3239]	Time 0.232 (1.386)	Data Time 0.003 (0.414)	Loss 3.1574 (3.0025)	Entropy 1.39709 (1.39754)	Top-1 acc 53.125 (52.159)	Top-5 acc 71.875 (74.940)	lr 0.02061
Train [34][100/3239]	Time 0.225 (1.294)	Data Time 0.003 (0.373)	Loss 2.8714 (2.9956)	Entropy 1.39714 (1.39749)	Top-1 acc 53.906 (52.282)	Top-5 acc 76.172 (75.050)	lr 0.02061
Train [34][110/3239]	Time 0.323 (1.219)	Data Time 0.001 (0.340)	Loss 2.9874 (2.9962)	Entropy 1.39710 (1.39746)	Top-1 acc 53.125 (52.333)	Top-5 acc 74.609 (75.025)	lr 0.02061
Train [34][120/3239]	Time 2.390 (1.154)	Data Time 0.002 (0.312)	Loss 3.0726 (2.9996)	Entropy 1.39710 (1.39743)	Top-1 acc 50.000 (52.224)	Top-5 acc 73.438 (75.052)	lr 0.02061
Train [34][130/3239]	Time 0.158 (1.083)	Data Time 0.001 (0.288)	Loss 2.9754 (2.9983)	Entropy 1.39703 (1.39740)	Top-1 acc 51.953 (52.308)	Top-5 acc 76.562 (75.083)	lr 0.02061
Train [34][140/3239]	Time 0.232 (1.038)	Data Time 0.001 (0.268)	Loss 2.9353 (2.9954)	Entropy 1.39693 (1.39737)	Top-1 acc 53.125 (52.341)	Top-5 acc 74.609 (75.047)	lr 0.02061
Train [34][150/3239]	Time 0.268 (0.999)	Data Time 0.001 (0.251)	Loss 2.8805 (2.9945)	Entropy 1.39691 (1.39734)	Top-1 acc 53.125 (52.357)	Top-5 acc 78.516 (75.103)	lr 0.02061
Train [34][160/3239]	Time 0.196 (0.965)	Data Time 0.001 (0.235)	Loss 2.9682 (2.9890)	Entropy 1.39689 (1.39731)	Top-1 acc 52.344 (52.392)	Top-5 acc 77.734 (75.209)	lr 0.02061
Train [34][170/3239]	Time 0.327 (0.935)	Data Time 0.002 (0.221)	Loss 3.0960 (2.9908)	Entropy 1.39681 (1.39728)	Top-1 acc 48.438 (52.348)	Top-5 acc 73.047 (75.183)	lr 0.02061
Train [34][180/3239]	Time 0.229 (0.909)	Data Time 0.001 (0.209)	Loss 2.9111 (2.9888)	Entropy 1.39665 (1.39725)	Top-1 acc 50.781 (52.389)	Top-5 acc 78.516 (75.212)	lr 0.02060
Train [34][190/3239]	Time 0.221 (0.885)	Data Time 0.001 (0.198)	Loss 3.0104 (2.9894)	Entropy 1.39661 (1.39722)	Top-1 acc 53.906 (52.370)	Top-5 acc 74.219 (75.196)	lr 0.02060
Train [34][200/3239]	Time 0.234 (0.864)	Data Time 0.002 (0.189)	Loss 2.8198 (2.9891)	Entropy 1.39653 (1.39719)	Top-1 acc 54.297 (52.336)	Top-5 acc 77.734 (75.212)	lr 0.02060
Train [34][210/3239]	Time 0.204 (0.845)	Data Time 0.002 (0.180)	Loss 3.0062 (2.9875)	Entropy 1.39643 (1.39716)	Top-1 acc 49.609 (52.360)	Top-5 acc 74.219 (75.241)	lr 0.02060
Train [34][220/3239]	Time 0.392 (0.828)	Data Time 0.001 (0.172)	Loss 3.0469 (2.9880)	Entropy 1.39635 (1.39712)	Top-1 acc 53.516 (52.416)	Top-5 acc 72.656 (75.219)	lr 0.02060
Train [34][230/3239]	Time 2.267 (0.811)	Data Time 0.001 (0.165)	Loss 3.2010 (2.9919)	Entropy 1.39635 (1.39709)	Top-1 acc 48.438 (52.334)	Top-5 acc 70.312 (75.174)	lr 0.02060
Train [34][240/3239]	Time 0.211 (0.787)	Data Time 0.001 (0.158)	Loss 2.9629 (2.9903)	Entropy 1.39623 (1.39705)	Top-1 acc 50.781 (52.332)	Top-5 acc 75.781 (75.206)	lr 0.02060
Train [34][250/3239]	Time 0.239 (0.773)	Data Time 0.001 (0.152)	Loss 2.7096 (2.9877)	Entropy 1.39619 (1.39702)	Top-1 acc 58.203 (52.342)	Top-5 acc 80.469 (75.247)	lr 0.02060
Train [34][260/3239]	Time 0.213 (0.761)	Data Time 0.001 (0.146)	Loss 2.9597 (2.9906)	Entropy 1.39620 (1.39699)	Top-1 acc 51.562 (52.297)	Top-5 acc 75.781 (75.172)	lr 0.02060
Train [34][270/3239]	Time 0.249 (0.749)	Data Time 0.001 (0.140)	Loss 2.9622 (2.9870)	Entropy 1.39614 (1.39696)	Top-1 acc 53.125 (52.399)	Top-5 acc 78.906 (75.226)	lr 0.02060
Train [34][280/3239]	Time 0.307 (0.738)	Data Time 0.001 (0.135)	Loss 2.9675 (2.9893)	Entropy 1.39610 (1.39693)	Top-1 acc 53.906 (52.376)	Top-5 acc 76.953 (75.147)	lr 0.02060
Train [34][290/3239]	Time 0.221 (0.728)	Data Time 0.001 (0.131)	Loss 2.9957 (2.9889)	Entropy 1.39598 (1.39690)	Top-1 acc 52.344 (52.369)	Top-5 acc 73.438 (75.140)	lr 0.02060
Train [34][300/3239]	Time 0.207 (0.719)	Data Time 0.001 (0.127)	Loss 2.8624 (2.9887)	Entropy 1.39581 (1.39687)	Top-1 acc 55.469 (52.368)	Top-5 acc 74.219 (75.118)	lr 0.02060
Train [34][310/3239]	Time 0.208 (0.710)	Data Time 0.001 (0.123)	Loss 3.0165 (2.9894)	Entropy 1.39576 (1.39683)	Top-1 acc 51.562 (52.350)	Top-5 acc 77.734 (75.109)	lr 0.02059
Train [34][320/3239]	Time 0.214 (0.702)	Data Time 0.001 (0.119)	Loss 3.0602 (2.9889)	Entropy 1.39574 (1.39680)	Top-1 acc 50.391 (52.389)	Top-5 acc 71.484 (75.118)	lr 0.02059
Train [34][330/3239]	Time 0.261 (0.695)	Data Time 0.001 (0.115)	Loss 3.0484 (2.9852)	Entropy 1.39561 (1.39676)	Top-1 acc 52.734 (52.452)	Top-5 acc 75.781 (75.209)	lr 0.02059
Train [34][340/3239]	Time 2.286 (0.687)	Data Time 0.001 (0.112)	Loss 2.7530 (2.9851)	Entropy 1.39561 (1.39673)	Top-1 acc 56.250 (52.469)	Top-5 acc 80.078 (75.202)	lr 0.02059
Train [34][350/3239]	Time 0.217 (0.674)	Data Time 0.002 (0.109)	Loss 2.8875 (2.9850)	Entropy 1.39553 (1.39670)	Top-1 acc 56.250 (52.456)	Top-5 acc 76.172 (75.198)	lr 0.02059
Train [34][360/3239]	Time 0.146 (0.668)	Data Time 0.001 (0.106)	Loss 3.0504 (2.9852)	Entropy 1.39545 (1.39666)	Top-1 acc 52.344 (52.468)	Top-5 acc 72.266 (75.205)	lr 0.02059
Train [34][370/3239]	Time 0.199 (0.662)	Data Time 0.001 (0.103)	Loss 2.9616 (2.9857)	Entropy 1.39526 (1.39663)	Top-1 acc 52.734 (52.429)	Top-5 acc 75.391 (75.176)	lr 0.02059
Train [34][380/3239]	Time 0.209 (0.656)	Data Time 0.001 (0.100)	Loss 2.8397 (2.9892)	Entropy 1.39524 (1.39659)	Top-1 acc 55.859 (52.388)	Top-5 acc 82.031 (75.116)	lr 0.02059
Train [34][390/3239]	Time 0.185 (0.651)	Data Time 0.001 (0.098)	Loss 2.8206 (2.9889)	Entropy 1.39523 (1.39655)	Top-1 acc 55.078 (52.384)	Top-5 acc 76.562 (75.123)	lr 0.02059
Train [34][400/3239]	Time 0.228 (0.646)	Data Time 0.001 (0.096)	Loss 3.0726 (2.9896)	Entropy 1.39497 (1.39652)	Top-1 acc 51.172 (52.360)	Top-5 acc 73.828 (75.081)	lr 0.02059
Train [34][410/3239]	Time 0.267 (0.744)	Data Time 0.003 (0.093)	Loss 2.7547 (2.9885)	Entropy 1.39495 (1.39648)	Top-1 acc 60.547 (52.390)	Top-5 acc 80.078 (75.109)	lr 0.02059
Train [34][420/3239]	Time 0.222 (0.739)	Data Time 0.002 (0.091)	Loss 3.0226 (2.9878)	Entropy 1.39489 (1.39644)	Top-1 acc 51.953 (52.411)	Top-5 acc 72.266 (75.118)	lr 0.02059
Train [34][430/3239]	Time 0.214 (0.732)	Data Time 0.002 (0.089)	Loss 3.0206 (2.9876)	Entropy 1.39489 (1.39641)	Top-1 acc 53.516 (52.409)	Top-5 acc 73.438 (75.111)	lr 0.02059
Train [34][440/3239]	Time 0.223 (0.726)	Data Time 0.001 (0.087)	Loss 2.9183 (2.9863)	Entropy 1.39478 (1.39637)	Top-1 acc 54.688 (52.416)	Top-5 acc 76.172 (75.126)	lr 0.02058
Train [34][450/3239]	Time 2.321 (0.720)	Data Time 0.001 (0.085)	Loss 2.8410 (2.9854)	Entropy 1.39478 (1.39634)	Top-1 acc 55.859 (52.437)	Top-5 acc 77.734 (75.134)	lr 0.02058
Train [34][460/3239]	Time 0.204 (0.709)	Data Time 0.001 (0.083)	Loss 3.0410 (2.9867)	Entropy 1.39469 (1.39630)	Top-1 acc 55.078 (52.408)	Top-5 acc 73.828 (75.121)	lr 0.02058
Train [34][470/3239]	Time 0.227 (0.703)	Data Time 0.002 (0.082)	Loss 3.1136 (2.9870)	Entropy 1.39466 (1.39627)	Top-1 acc 53.125 (52.423)	Top-5 acc 71.875 (75.117)	lr 0.02058
Train [34][480/3239]	Time 0.223 (0.698)	Data Time 0.001 (0.080)	Loss 2.9475 (2.9869)	Entropy 1.39466 (1.39623)	Top-1 acc 53.516 (52.432)	Top-5 acc 76.953 (75.107)	lr 0.02058
Train [34][490/3239]	Time 0.326 (0.693)	Data Time 0.001 (0.078)	Loss 3.0207 (2.9877)	Entropy 1.39461 (1.39620)	Top-1 acc 54.688 (52.428)	Top-5 acc 72.656 (75.096)	lr 0.02058
Train [34][500/3239]	Time 0.245 (0.688)	Data Time 0.001 (0.077)	Loss 2.9434 (2.9866)	Entropy 1.39454 (1.39617)	Top-1 acc 51.953 (52.452)	Top-5 acc 76.562 (75.127)	lr 0.02058
Train [34][510/3239]	Time 0.232 (0.684)	Data Time 0.001 (0.075)	Loss 2.8289 (2.9872)	Entropy 1.39443 (1.39614)	Top-1 acc 54.688 (52.453)	Top-5 acc 76.953 (75.118)	lr 0.02058
Train [34][520/3239]	Time 0.258 (0.679)	Data Time 0.001 (0.074)	Loss 2.8924 (2.9870)	Entropy 1.39437 (1.39610)	Top-1 acc 57.031 (52.453)	Top-5 acc 80.859 (75.118)	lr 0.02058
Train [34][530/3239]	Time 0.203 (0.675)	Data Time 0.001 (0.073)	Loss 2.8788 (2.9864)	Entropy 1.39435 (1.39607)	Top-1 acc 56.250 (52.453)	Top-5 acc 78.125 (75.141)	lr 0.02058
Train [34][540/3239]	Time 0.211 (0.670)	Data Time 0.001 (0.071)	Loss 3.0536 (2.9872)	Entropy 1.39428 (1.39604)	Top-1 acc 53.516 (52.454)	Top-5 acc 75.000 (75.118)	lr 0.02058
Train [34][550/3239]	Time 0.375 (0.666)	Data Time 0.001 (0.070)	Loss 3.1722 (2.9874)	Entropy 1.39421 (1.39601)	Top-1 acc 45.703 (52.432)	Top-5 acc 71.094 (75.118)	lr 0.02058
Train [34][560/3239]	Time 2.316 (0.662)	Data Time 0.001 (0.069)	Loss 2.7998 (2.9874)	Entropy 1.39421 (1.39597)	Top-1 acc 61.719 (52.437)	Top-5 acc 78.125 (75.123)	lr 0.02058
Train [34][570/3239]	Time 0.232 (0.655)	Data Time 0.001 (0.068)	Loss 3.0652 (2.9867)	Entropy 1.39419 (1.39594)	Top-1 acc 51.172 (52.458)	Top-5 acc 71.875 (75.129)	lr 0.02057
Train [34][580/3239]	Time 0.200 (0.651)	Data Time 0.001 (0.066)	Loss 3.1670 (2.9861)	Entropy 1.39418 (1.39591)	Top-1 acc 47.266 (52.463)	Top-5 acc 71.484 (75.136)	lr 0.02057
Train [34][590/3239]	Time 0.203 (0.647)	Data Time 0.002 (0.065)	Loss 3.0042 (2.9858)	Entropy 1.39419 (1.39588)	Top-1 acc 51.953 (52.467)	Top-5 acc 77.344 (75.156)	lr 0.02057
Train [34][600/3239]	Time 0.261 (0.644)	Data Time 0.001 (0.064)	Loss 3.1192 (2.9855)	Entropy 1.39412 (1.39585)	Top-1 acc 51.562 (52.469)	Top-5 acc 73.047 (75.162)	lr 0.02057
Train [34][610/3239]	Time 0.215 (0.641)	Data Time 0.001 (0.063)	Loss 2.7947 (2.9856)	Entropy 1.39410 (1.39583)	Top-1 acc 54.688 (52.457)	Top-5 acc 80.078 (75.166)	lr 0.02057
Train [34][620/3239]	Time 0.211 (0.638)	Data Time 0.001 (0.062)	Loss 2.8295 (2.9852)	Entropy 1.39408 (1.39580)	Top-1 acc 55.859 (52.458)	Top-5 acc 78.906 (75.179)	lr 0.02057
Train [34][630/3239]	Time 0.210 (0.635)	Data Time 0.001 (0.061)	Loss 3.2675 (2.9853)	Entropy 1.39403 (1.39577)	Top-1 acc 51.562 (52.457)	Top-5 acc 69.531 (75.163)	lr 0.02057
Train [34][640/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.060)	Loss 3.1289 (2.9856)	Entropy 1.39393 (1.39574)	Top-1 acc 47.266 (52.453)	Top-5 acc 72.266 (75.149)	lr 0.02057
Train [34][650/3239]	Time 0.222 (0.629)	Data Time 0.001 (0.060)	Loss 2.9921 (2.9848)	Entropy 1.39390 (1.39571)	Top-1 acc 55.469 (52.450)	Top-5 acc 75.391 (75.173)	lr 0.02057
Train [34][660/3239]	Time 0.331 (0.626)	Data Time 0.001 (0.059)	Loss 2.9469 (2.9849)	Entropy 1.39385 (1.39569)	Top-1 acc 55.469 (52.447)	Top-5 acc 75.391 (75.177)	lr 0.02057
Train [34][670/3239]	Time 2.308 (0.624)	Data Time 0.002 (0.058)	Loss 2.9642 (2.9848)	Entropy 1.39385 (1.39566)	Top-1 acc 53.906 (52.456)	Top-5 acc 77.344 (75.179)	lr 0.02057
Train [34][680/3239]	Time 0.214 (0.618)	Data Time 0.001 (0.057)	Loss 3.0825 (2.9852)	Entropy 1.39364 (1.39563)	Top-1 acc 46.484 (52.452)	Top-5 acc 73.047 (75.152)	lr 0.02057
Train [34][690/3239]	Time 0.219 (0.615)	Data Time 0.001 (0.056)	Loss 3.1161 (2.9849)	Entropy 1.39360 (1.39560)	Top-1 acc 48.438 (52.463)	Top-5 acc 71.094 (75.145)	lr 0.02056
Train [34][700/3239]	Time 0.240 (0.613)	Data Time 0.002 (0.055)	Loss 2.9841 (2.9844)	Entropy 1.39361 (1.39557)	Top-1 acc 55.078 (52.474)	Top-5 acc 77.734 (75.161)	lr 0.02056
Train [34][710/3239]	Time 0.212 (0.611)	Data Time 0.002 (0.055)	Loss 3.0947 (2.9847)	Entropy 1.39356 (1.39554)	Top-1 acc 52.734 (52.481)	Top-5 acc 74.219 (75.153)	lr 0.02056
Train [34][720/3239]	Time 0.209 (0.609)	Data Time 0.002 (0.054)	Loss 3.2249 (2.9844)	Entropy 1.39349 (1.39552)	Top-1 acc 50.391 (52.473)	Top-5 acc 69.922 (75.160)	lr 0.02056
Train [34][730/3239]	Time 0.214 (0.606)	Data Time 0.001 (0.053)	Loss 2.9253 (2.9841)	Entropy 1.39324 (1.39549)	Top-1 acc 55.078 (52.482)	Top-5 acc 75.000 (75.168)	lr 0.02056
Train [34][740/3239]	Time 0.212 (0.604)	Data Time 0.001 (0.053)	Loss 3.0431 (2.9833)	Entropy 1.39318 (1.39546)	Top-1 acc 53.125 (52.506)	Top-5 acc 74.219 (75.187)	lr 0.02056
Train [34][750/3239]	Time 0.218 (0.602)	Data Time 0.002 (0.052)	Loss 2.9200 (2.9838)	Entropy 1.39308 (1.39543)	Top-1 acc 57.812 (52.510)	Top-5 acc 76.562 (75.168)	lr 0.02056
Train [34][760/3239]	Time 0.211 (0.600)	Data Time 0.001 (0.051)	Loss 3.1355 (2.9841)	Entropy 1.39311 (1.39539)	Top-1 acc 51.953 (52.520)	Top-5 acc 69.922 (75.159)	lr 0.02056
Train [34][770/3239]	Time 0.269 (0.659)	Data Time 0.003 (0.051)	Loss 2.9696 (2.9837)	Entropy 1.39305 (1.39536)	Top-1 acc 52.344 (52.524)	Top-5 acc 72.656 (75.163)	lr 0.02056
Train [34][780/3239]	Time 2.427 (0.656)	Data Time 0.002 (0.050)	Loss 3.0059 (2.9841)	Entropy 1.39305 (1.39534)	Top-1 acc 52.344 (52.510)	Top-5 acc 75.000 (75.157)	lr 0.02056
Train [34][790/3239]	Time 0.262 (0.651)	Data Time 0.002 (0.049)	Loss 2.9639 (2.9840)	Entropy 1.39306 (1.39531)	Top-1 acc 55.078 (52.523)	Top-5 acc 76.172 (75.158)	lr 0.02056
Train [34][800/3239]	Time 0.226 (0.648)	Data Time 0.002 (0.049)	Loss 3.0939 (2.9849)	Entropy 1.39306 (1.39528)	Top-1 acc 48.438 (52.497)	Top-5 acc 75.781 (75.141)	lr 0.02056
Train [34][810/3239]	Time 0.236 (0.646)	Data Time 0.001 (0.048)	Loss 2.9179 (2.9847)	Entropy 1.39303 (1.39525)	Top-1 acc 51.953 (52.511)	Top-5 acc 76.172 (75.138)	lr 0.02056
Train [34][820/3239]	Time 0.234 (0.644)	Data Time 0.001 (0.048)	Loss 2.7821 (2.9847)	Entropy 1.39298 (1.39522)	Top-1 acc 57.031 (52.521)	Top-5 acc 81.641 (75.142)	lr 0.02055
Train [34][830/3239]	Time 0.218 (0.642)	Data Time 0.001 (0.047)	Loss 2.8738 (2.9847)	Entropy 1.39285 (1.39520)	Top-1 acc 57.812 (52.531)	Top-5 acc 76.953 (75.140)	lr 0.02055
Train [34][840/3239]	Time 0.216 (0.639)	Data Time 0.001 (0.047)	Loss 2.9198 (2.9843)	Entropy 1.39287 (1.39517)	Top-1 acc 55.078 (52.541)	Top-5 acc 76.953 (75.136)	lr 0.02055
Train [34][850/3239]	Time 0.215 (0.637)	Data Time 0.001 (0.046)	Loss 2.8764 (2.9839)	Entropy 1.39280 (1.39514)	Top-1 acc 52.734 (52.540)	Top-5 acc 79.297 (75.147)	lr 0.02055
Train [34][860/3239]	Time 0.211 (0.635)	Data Time 0.001 (0.045)	Loss 3.2067 (2.9846)	Entropy 1.39279 (1.39511)	Top-1 acc 48.047 (52.537)	Top-5 acc 71.094 (75.142)	lr 0.02055
Train [34][870/3239]	Time 0.311 (0.633)	Data Time 0.001 (0.045)	Loss 3.1872 (2.9842)	Entropy 1.39276 (1.39509)	Top-1 acc 45.703 (52.544)	Top-5 acc 69.531 (75.148)	lr 0.02055
Train [34][880/3239]	Time 0.249 (0.631)	Data Time 0.001 (0.044)	Loss 3.0025 (2.9847)	Entropy 1.39288 (1.39506)	Top-1 acc 51.172 (52.522)	Top-5 acc 75.781 (75.137)	lr 0.02055
Train [34][890/3239]	Time 2.396 (0.629)	Data Time 0.001 (0.044)	Loss 2.9776 (2.9849)	Entropy 1.39288 (1.39504)	Top-1 acc 53.906 (52.513)	Top-5 acc 76.953 (75.135)	lr 0.02055
Train [34][900/3239]	Time 0.222 (0.624)	Data Time 0.001 (0.044)	Loss 2.9797 (2.9859)	Entropy 1.39276 (1.39501)	Top-1 acc 51.562 (52.490)	Top-5 acc 76.172 (75.118)	lr 0.02055
Train [34][910/3239]	Time 0.214 (0.622)	Data Time 0.001 (0.043)	Loss 3.2264 (2.9860)	Entropy 1.39271 (1.39499)	Top-1 acc 42.188 (52.478)	Top-5 acc 71.484 (75.122)	lr 0.02055
Train [34][920/3239]	Time 0.273 (0.620)	Data Time 0.001 (0.043)	Loss 3.0153 (2.9863)	Entropy 1.39270 (1.39496)	Top-1 acc 48.438 (52.466)	Top-5 acc 75.000 (75.112)	lr 0.02055
Train [34][930/3239]	Time 0.327 (0.619)	Data Time 0.001 (0.042)	Loss 2.8722 (2.9863)	Entropy 1.39263 (1.39494)	Top-1 acc 53.516 (52.463)	Top-5 acc 76.953 (75.116)	lr 0.02055
Train [34][940/3239]	Time 0.218 (0.617)	Data Time 0.001 (0.042)	Loss 2.9325 (2.9861)	Entropy 1.39247 (1.39491)	Top-1 acc 53.125 (52.474)	Top-5 acc 75.781 (75.119)	lr 0.02055
Train [34][950/3239]	Time 0.201 (0.615)	Data Time 0.001 (0.041)	Loss 3.0661 (2.9863)	Entropy 1.39243 (1.39488)	Top-1 acc 51.562 (52.472)	Top-5 acc 74.609 (75.116)	lr 0.02054
Train [34][960/3239]	Time 0.217 (0.613)	Data Time 0.001 (0.041)	Loss 2.9121 (2.9862)	Entropy 1.39235 (1.39486)	Top-1 acc 50.781 (52.476)	Top-5 acc 79.297 (75.114)	lr 0.02054
Train [34][970/3239]	Time 0.227 (0.612)	Data Time 0.001 (0.041)	Loss 3.0359 (2.9861)	Entropy 1.39232 (1.39483)	Top-1 acc 52.734 (52.472)	Top-5 acc 75.781 (75.120)	lr 0.02054
Train [34][980/3239]	Time 0.267 (0.610)	Data Time 0.001 (0.040)	Loss 3.0086 (2.9866)	Entropy 1.39227 (1.39481)	Top-1 acc 50.391 (52.454)	Top-5 acc 75.781 (75.114)	lr 0.02054
Train [34][990/3239]	Time 0.218 (0.608)	Data Time 0.001 (0.040)	Loss 3.0178 (2.9863)	Entropy 1.39221 (1.39478)	Top-1 acc 49.609 (52.469)	Top-5 acc 76.562 (75.120)	lr 0.02054
Train [34][1000/3239]	Time 2.508 (0.607)	Data Time 0.002 (0.039)	Loss 3.0266 (2.9859)	Entropy 1.39221 (1.39476)	Top-1 acc 51.172 (52.481)	Top-5 acc 74.609 (75.128)	lr 0.02054
Train [34][1010/3239]	Time 0.253 (0.603)	Data Time 0.001 (0.039)	Loss 2.8787 (2.9862)	Entropy 1.39218 (1.39473)	Top-1 acc 54.688 (52.481)	Top-5 acc 74.609 (75.124)	lr 0.02054
Train [34][1020/3239]	Time 0.254 (0.602)	Data Time 0.001 (0.039)	Loss 2.8969 (2.9870)	Entropy 1.39212 (1.39470)	Top-1 acc 56.250 (52.464)	Top-5 acc 76.953 (75.112)	lr 0.02054
Train [34][1030/3239]	Time 0.228 (0.600)	Data Time 0.002 (0.038)	Loss 2.8612 (2.9870)	Entropy 1.39210 (1.39468)	Top-1 acc 55.859 (52.465)	Top-5 acc 75.781 (75.110)	lr 0.02054
Train [34][1040/3239]	Time 0.232 (0.599)	Data Time 0.001 (0.038)	Loss 3.0319 (2.9863)	Entropy 1.39209 (1.39465)	Top-1 acc 51.953 (52.483)	Top-5 acc 72.656 (75.119)	lr 0.02054
Train [34][1050/3239]	Time 0.194 (0.598)	Data Time 0.001 (0.038)	Loss 2.9980 (2.9865)	Entropy 1.39197 (1.39463)	Top-1 acc 53.906 (52.482)	Top-5 acc 73.438 (75.115)	lr 0.02054
Train [34][1060/3239]	Time 0.239 (0.596)	Data Time 0.001 (0.037)	Loss 2.8824 (2.9864)	Entropy 1.39190 (1.39460)	Top-1 acc 56.250 (52.476)	Top-5 acc 76.562 (75.123)	lr 0.02054
Train [34][1070/3239]	Time 0.218 (0.595)	Data Time 0.001 (0.037)	Loss 2.9508 (2.9865)	Entropy 1.39183 (1.39458)	Top-1 acc 53.125 (52.469)	Top-5 acc 73.828 (75.119)	lr 0.02054
Train [34][1080/3239]	Time 0.224 (0.594)	Data Time 0.001 (0.037)	Loss 3.0118 (2.9867)	Entropy 1.39178 (1.39455)	Top-1 acc 50.781 (52.470)	Top-5 acc 76.172 (75.121)	lr 0.02053
Train [34][1090/3239]	Time 0.222 (0.592)	Data Time 0.001 (0.036)	Loss 3.1189 (2.9865)	Entropy 1.39170 (1.39453)	Top-1 acc 47.656 (52.477)	Top-5 acc 71.484 (75.119)	lr 0.02053
Train [34][1100/3239]	Time 0.258 (0.591)	Data Time 0.001 (0.036)	Loss 3.1886 (2.9863)	Entropy 1.39175 (1.39450)	Top-1 acc 48.047 (52.470)	Top-5 acc 72.266 (75.128)	lr 0.02053
Train [34][1110/3239]	Time 2.301 (0.590)	Data Time 0.001 (0.036)	Loss 2.9670 (2.9866)	Entropy 1.39175 (1.39448)	Top-1 acc 55.078 (52.469)	Top-5 acc 75.000 (75.118)	lr 0.02053
Train [34][1120/3239]	Time 0.215 (0.586)	Data Time 0.001 (0.035)	Loss 2.8356 (2.9875)	Entropy 1.39157 (1.39445)	Top-1 acc 55.469 (52.441)	Top-5 acc 75.781 (75.098)	lr 0.02053
Train [34][1130/3239]	Time 0.230 (0.585)	Data Time 0.001 (0.035)	Loss 2.9562 (2.9871)	Entropy 1.39148 (1.39442)	Top-1 acc 54.297 (52.447)	Top-5 acc 75.781 (75.112)	lr 0.02053
Train [34][1140/3239]	Time 0.234 (0.623)	Data Time 0.002 (0.035)	Loss 3.0378 (2.9869)	Entropy 1.39142 (1.39440)	Top-1 acc 52.344 (52.451)	Top-5 acc 75.000 (75.117)	lr 0.02053
Train [34][1150/3239]	Time 0.228 (0.621)	Data Time 0.002 (0.034)	Loss 2.8231 (2.9868)	Entropy 1.39137 (1.39437)	Top-1 acc 53.516 (52.446)	Top-5 acc 78.125 (75.116)	lr 0.02053
Train [34][1160/3239]	Time 0.224 (0.620)	Data Time 0.002 (0.034)	Loss 3.2261 (2.9870)	Entropy 1.39151 (1.39435)	Top-1 acc 50.000 (52.450)	Top-5 acc 70.312 (75.112)	lr 0.02053
Train [34][1170/3239]	Time 0.223 (0.618)	Data Time 0.001 (0.034)	Loss 2.8732 (2.9867)	Entropy 1.39144 (1.39432)	Top-1 acc 55.078 (52.454)	Top-5 acc 80.078 (75.117)	lr 0.02053
Train [34][1180/3239]	Time 0.204 (0.617)	Data Time 0.001 (0.034)	Loss 3.0313 (2.9868)	Entropy 1.39140 (1.39430)	Top-1 acc 55.078 (52.453)	Top-5 acc 71.094 (75.109)	lr 0.02053
Train [34][1190/3239]	Time 0.309 (0.616)	Data Time 0.001 (0.033)	Loss 2.9027 (2.9870)	Entropy 1.39119 (1.39427)	Top-1 acc 53.906 (52.451)	Top-5 acc 77.344 (75.101)	lr 0.02053
Train [34][1200/3239]	Time 0.214 (0.614)	Data Time 0.001 (0.033)	Loss 3.0585 (2.9872)	Entropy 1.39115 (1.39425)	Top-1 acc 50.391 (52.448)	Top-5 acc 74.219 (75.094)	lr 0.02053
Train [34][1210/3239]	Time 0.189 (0.613)	Data Time 0.001 (0.033)	Loss 3.0168 (2.9869)	Entropy 1.39118 (1.39422)	Top-1 acc 53.125 (52.454)	Top-5 acc 75.781 (75.103)	lr 0.02052
Train [34][1220/3239]	Time 2.353 (0.611)	Data Time 0.001 (0.033)	Loss 3.0634 (2.9869)	Entropy 1.39118 (1.39420)	Top-1 acc 51.953 (52.450)	Top-5 acc 74.609 (75.110)	lr 0.02052
Train [34][1230/3239]	Time 0.230 (0.608)	Data Time 0.001 (0.032)	Loss 3.0026 (2.9868)	Entropy 1.39112 (1.39417)	Top-1 acc 47.656 (52.455)	Top-5 acc 75.781 (75.115)	lr 0.02052
Train [34][1240/3239]	Time 0.215 (0.607)	Data Time 0.001 (0.032)	Loss 2.9804 (2.9870)	Entropy 1.39085 (1.39415)	Top-1 acc 51.172 (52.450)	Top-5 acc 73.828 (75.109)	lr 0.02052
Train [34][1250/3239]	Time 0.318 (0.606)	Data Time 0.001 (0.032)	Loss 2.9801 (2.9870)	Entropy 1.39056 (1.39412)	Top-1 acc 57.812 (52.453)	Top-5 acc 73.828 (75.106)	lr 0.02052
Train [34][1260/3239]	Time 0.230 (0.605)	Data Time 0.001 (0.032)	Loss 2.9651 (2.9865)	Entropy 1.39054 (1.39409)	Top-1 acc 55.469 (52.465)	Top-5 acc 73.828 (75.120)	lr 0.02052
Train [34][1270/3239]	Time 0.216 (0.603)	Data Time 0.001 (0.031)	Loss 2.9126 (2.9868)	Entropy 1.39043 (1.39406)	Top-1 acc 53.516 (52.448)	Top-5 acc 77.344 (75.113)	lr 0.02052
Train [34][1280/3239]	Time 0.203 (0.602)	Data Time 0.001 (0.031)	Loss 2.8240 (2.9867)	Entropy 1.39031 (1.39403)	Top-1 acc 58.203 (52.452)	Top-5 acc 78.906 (75.108)	lr 0.02052
Train [34][1290/3239]	Time 0.235 (0.601)	Data Time 0.001 (0.031)	Loss 2.6871 (2.9865)	Entropy 1.39032 (1.39400)	Top-1 acc 60.938 (52.452)	Top-5 acc 82.031 (75.112)	lr 0.02052
Train [34][1300/3239]	Time 0.314 (0.600)	Data Time 0.001 (0.031)	Loss 2.8512 (2.9865)	Entropy 1.39029 (1.39397)	Top-1 acc 52.734 (52.444)	Top-5 acc 76.562 (75.108)	lr 0.02052
Train [34][1310/3239]	Time 0.238 (0.599)	Data Time 0.002 (0.030)	Loss 2.9999 (2.9865)	Entropy 1.39025 (1.39395)	Top-1 acc 52.344 (52.448)	Top-5 acc 75.781 (75.108)	lr 0.02052
Train [34][1320/3239]	Time 0.195 (0.598)	Data Time 0.001 (0.030)	Loss 2.9511 (2.9868)	Entropy 1.39024 (1.39392)	Top-1 acc 52.734 (52.441)	Top-5 acc 74.609 (75.098)	lr 0.02052
Train [34][1330/3239]	Time 2.482 (0.597)	Data Time 0.002 (0.030)	Loss 3.0561 (2.9870)	Entropy 1.39024 (1.39389)	Top-1 acc 51.172 (52.440)	Top-5 acc 76.562 (75.101)	lr 0.02052
Train [34][1340/3239]	Time 0.232 (0.594)	Data Time 0.001 (0.030)	Loss 3.1795 (2.9871)	Entropy 1.39024 (1.39386)	Top-1 acc 47.266 (52.435)	Top-5 acc 72.656 (75.096)	lr 0.02051
Train [34][1350/3239]	Time 0.208 (0.593)	Data Time 0.001 (0.030)	Loss 3.0318 (2.9872)	Entropy 1.39014 (1.39384)	Top-1 acc 46.875 (52.427)	Top-5 acc 74.219 (75.095)	lr 0.02051
Train [34][1360/3239]	Time 0.250 (0.592)	Data Time 0.001 (0.029)	Loss 3.0049 (2.9874)	Entropy 1.39010 (1.39381)	Top-1 acc 51.953 (52.420)	Top-5 acc 73.828 (75.087)	lr 0.02051
Train [34][1370/3239]	Time 0.233 (0.591)	Data Time 0.001 (0.029)	Loss 2.8498 (2.9876)	Entropy 1.39007 (1.39378)	Top-1 acc 55.078 (52.423)	Top-5 acc 76.562 (75.086)	lr 0.02051
Train [34][1380/3239]	Time 0.212 (0.589)	Data Time 0.001 (0.029)	Loss 2.8713 (2.9879)	Entropy 1.39001 (1.39375)	Top-1 acc 54.297 (52.418)	Top-5 acc 78.516 (75.079)	lr 0.02051
Train [34][1390/3239]	Time 0.204 (0.588)	Data Time 0.001 (0.029)	Loss 2.7955 (2.9874)	Entropy 1.38985 (1.39373)	Top-1 acc 58.203 (52.426)	Top-5 acc 76.953 (75.092)	lr 0.02051
Train [34][1400/3239]	Time 0.204 (0.587)	Data Time 0.001 (0.029)	Loss 2.8617 (2.9870)	Entropy 1.38929 (1.39370)	Top-1 acc 56.250 (52.437)	Top-5 acc 77.344 (75.096)	lr 0.02051
Train [34][1410/3239]	Time 0.339 (0.586)	Data Time 0.001 (0.028)	Loss 2.8522 (2.9866)	Entropy 1.38926 (1.39367)	Top-1 acc 56.641 (52.450)	Top-5 acc 79.297 (75.106)	lr 0.02051
Train [34][1420/3239]	Time 0.259 (0.586)	Data Time 0.001 (0.028)	Loss 2.9251 (2.9862)	Entropy 1.38928 (1.39364)	Top-1 acc 55.469 (52.459)	Top-5 acc 76.172 (75.108)	lr 0.02051
Train [34][1430/3239]	Time 0.251 (0.585)	Data Time 0.001 (0.028)	Loss 3.1004 (2.9866)	Entropy 1.38924 (1.39360)	Top-1 acc 48.828 (52.440)	Top-5 acc 73.438 (75.100)	lr 0.02051
Train [34][1440/3239]	Time 2.361 (0.584)	Data Time 0.002 (0.028)	Loss 2.7855 (2.9863)	Entropy 1.38924 (1.39357)	Top-1 acc 58.594 (52.455)	Top-5 acc 78.516 (75.103)	lr 0.02051
Train [34][1450/3239]	Time 0.232 (0.581)	Data Time 0.001 (0.028)	Loss 3.0503 (2.9869)	Entropy 1.38915 (1.39354)	Top-1 acc 51.172 (52.437)	Top-5 acc 73.438 (75.098)	lr 0.02051
Train [34][1460/3239]	Time 0.209 (0.580)	Data Time 0.001 (0.027)	Loss 2.9362 (2.9870)	Entropy 1.38912 (1.39351)	Top-1 acc 55.469 (52.435)	Top-5 acc 75.000 (75.099)	lr 0.02051
Train [34][1470/3239]	Time 0.219 (0.579)	Data Time 0.001 (0.027)	Loss 2.8711 (2.9870)	Entropy 1.38910 (1.39348)	Top-1 acc 54.688 (52.425)	Top-5 acc 77.344 (75.096)	lr 0.02050
Train [34][1480/3239]	Time 0.217 (0.579)	Data Time 0.001 (0.027)	Loss 2.9871 (2.9870)	Entropy 1.38911 (1.39345)	Top-1 acc 54.297 (52.425)	Top-5 acc 76.562 (75.102)	lr 0.02050
Train [34][1490/3239]	Time 0.224 (0.578)	Data Time 0.001 (0.027)	Loss 3.0588 (2.9870)	Entropy 1.38909 (1.39342)	Top-1 acc 49.609 (52.420)	Top-5 acc 72.656 (75.099)	lr 0.02050
Train [34][1500/3239]	Time 0.237 (0.606)	Data Time 0.002 (0.027)	Loss 2.7571 (2.9869)	Entropy 1.38903 (1.39340)	Top-1 acc 55.078 (52.419)	Top-5 acc 82.422 (75.101)	lr 0.02050
Train [34][1510/3239]	Time 0.159 (0.605)	Data Time 0.002 (0.027)	Loss 3.2243 (2.9868)	Entropy 1.38899 (1.39337)	Top-1 acc 51.172 (52.417)	Top-5 acc 72.656 (75.101)	lr 0.02050
Train [34][1520/3239]	Time 0.277 (0.604)	Data Time 0.001 (0.026)	Loss 3.0429 (2.9869)	Entropy 1.38896 (1.39334)	Top-1 acc 54.297 (52.415)	Top-5 acc 75.781 (75.107)	lr 0.02050
Train [34][1530/3239]	Time 0.196 (0.603)	Data Time 0.001 (0.026)	Loss 3.0647 (2.9869)	Entropy 1.38890 (1.39331)	Top-1 acc 49.219 (52.420)	Top-5 acc 76.953 (75.104)	lr 0.02050
Train [34][1540/3239]	Time 0.238 (0.602)	Data Time 0.001 (0.026)	Loss 3.0424 (2.9867)	Entropy 1.38883 (1.39328)	Top-1 acc 52.734 (52.433)	Top-5 acc 78.125 (75.111)	lr 0.02050
Train [34][1550/3239]	Time 2.425 (0.601)	Data Time 0.002 (0.026)	Loss 3.0613 (2.9868)	Entropy 1.38883 (1.39325)	Top-1 acc 54.297 (52.429)	Top-5 acc 71.484 (75.112)	lr 0.02050
Train [34][1560/3239]	Time 0.220 (0.599)	Data Time 0.001 (0.026)	Loss 2.9664 (2.9866)	Entropy 1.38881 (1.39322)	Top-1 acc 48.828 (52.429)	Top-5 acc 76.562 (75.118)	lr 0.02050
Train [34][1570/3239]	Time 0.221 (0.598)	Data Time 0.001 (0.026)	Loss 3.1567 (2.9865)	Entropy 1.38877 (1.39320)	Top-1 acc 50.781 (52.432)	Top-5 acc 74.609 (75.117)	lr 0.02050
Train [34][1580/3239]	Time 0.225 (0.597)	Data Time 0.001 (0.026)	Loss 2.9592 (2.9863)	Entropy 1.38872 (1.39317)	Top-1 acc 53.516 (52.440)	Top-5 acc 74.609 (75.115)	lr 0.02050
Train [34][1590/3239]	Time 0.225 (0.596)	Data Time 0.001 (0.025)	Loss 3.0954 (2.9865)	Entropy 1.38863 (1.39314)	Top-1 acc 46.875 (52.429)	Top-5 acc 73.828 (75.115)	lr 0.02050
Train [34][1600/3239]	Time 0.235 (0.595)	Data Time 0.001 (0.025)	Loss 2.8954 (2.9863)	Entropy 1.38851 (1.39311)	Top-1 acc 53.516 (52.435)	Top-5 acc 75.781 (75.116)	lr 0.02049
Train [34][1610/3239]	Time 0.220 (0.594)	Data Time 0.001 (0.025)	Loss 3.0038 (2.9860)	Entropy 1.38851 (1.39308)	Top-1 acc 51.953 (52.437)	Top-5 acc 77.734 (75.124)	lr 0.02049
Train [34][1620/3239]	Time 0.223 (0.594)	Data Time 0.001 (0.025)	Loss 2.9692 (2.9866)	Entropy 1.38841 (1.39305)	Top-1 acc 54.297 (52.425)	Top-5 acc 73.828 (75.114)	lr 0.02049
Train [34][1630/3239]	Time 0.295 (0.593)	Data Time 0.001 (0.025)	Loss 3.0543 (2.9871)	Entropy 1.38839 (1.39302)	Top-1 acc 50.781 (52.415)	Top-5 acc 71.875 (75.105)	lr 0.02049
Train [34][1640/3239]	Time 0.213 (0.592)	Data Time 0.001 (0.025)	Loss 2.8049 (2.9870)	Entropy 1.38835 (1.39300)	Top-1 acc 55.469 (52.410)	Top-5 acc 80.469 (75.106)	lr 0.02049
Train [34][1650/3239]	Time 0.238 (0.591)	Data Time 0.001 (0.025)	Loss 2.9356 (2.9870)	Entropy 1.38821 (1.39297)	Top-1 acc 55.859 (52.412)	Top-5 acc 76.172 (75.104)	lr 0.02049
Train [34][1660/3239]	Time 2.403 (0.590)	Data Time 0.001 (0.024)	Loss 3.0486 (2.9870)	Entropy 1.38821 (1.39294)	Top-1 acc 55.469 (52.413)	Top-5 acc 75.781 (75.108)	lr 0.02049
Train [34][1670/3239]	Time 0.256 (0.588)	Data Time 0.002 (0.024)	Loss 3.1641 (2.9871)	Entropy 1.38807 (1.39291)	Top-1 acc 46.484 (52.409)	Top-5 acc 74.219 (75.108)	lr 0.02049
Train [34][1680/3239]	Time 0.214 (0.587)	Data Time 0.001 (0.024)	Loss 2.7844 (2.9877)	Entropy 1.38797 (1.39288)	Top-1 acc 56.641 (52.395)	Top-5 acc 78.906 (75.099)	lr 0.02049
Train [34][1690/3239]	Time 0.220 (0.586)	Data Time 0.001 (0.024)	Loss 2.9883 (2.9875)	Entropy 1.38790 (1.39285)	Top-1 acc 52.344 (52.393)	Top-5 acc 75.781 (75.103)	lr 0.02049
Train [34][1700/3239]	Time 0.176 (0.586)	Data Time 0.001 (0.024)	Loss 3.1729 (2.9877)	Entropy 1.38769 (1.39282)	Top-1 acc 50.781 (52.391)	Top-5 acc 69.922 (75.097)	lr 0.02049
Train [34][1710/3239]	Time 0.229 (0.585)	Data Time 0.001 (0.024)	Loss 2.8989 (2.9874)	Entropy 1.38769 (1.39279)	Top-1 acc 58.203 (52.404)	Top-5 acc 75.000 (75.102)	lr 0.02049
Train [34][1720/3239]	Time 0.224 (0.584)	Data Time 0.002 (0.024)	Loss 3.0170 (2.9871)	Entropy 1.38758 (1.39276)	Top-1 acc 48.828 (52.405)	Top-5 acc 76.953 (75.113)	lr 0.02049
Train [34][1730/3239]	Time 0.211 (0.583)	Data Time 0.001 (0.023)	Loss 3.1211 (2.9872)	Entropy 1.38751 (1.39273)	Top-1 acc 46.094 (52.401)	Top-5 acc 71.875 (75.114)	lr 0.02048
Train [34][1740/3239]	Time 0.198 (0.583)	Data Time 0.001 (0.023)	Loss 3.0996 (2.9874)	Entropy 1.38745 (1.39270)	Top-1 acc 53.906 (52.403)	Top-5 acc 74.609 (75.114)	lr 0.02048
Train [34][1750/3239]	Time 0.243 (0.582)	Data Time 0.001 (0.023)	Loss 2.8425 (2.9871)	Entropy 1.38734 (1.39267)	Top-1 acc 58.984 (52.412)	Top-5 acc 78.125 (75.120)	lr 0.02048
Train [34][1760/3239]	Time 0.258 (0.581)	Data Time 0.001 (0.023)	Loss 2.9309 (2.9872)	Entropy 1.38726 (1.39264)	Top-1 acc 54.688 (52.412)	Top-5 acc 75.781 (75.120)	lr 0.02048
Train [34][1770/3239]	Time 2.486 (0.580)	Data Time 0.001 (0.023)	Loss 3.1722 (2.9873)	Entropy 1.38726 (1.39261)	Top-1 acc 51.172 (52.412)	Top-5 acc 71.094 (75.119)	lr 0.02048
Train [34][1780/3239]	Time 0.207 (0.579)	Data Time 0.001 (0.023)	Loss 2.8406 (2.9877)	Entropy 1.38711 (1.39258)	Top-1 acc 55.859 (52.402)	Top-5 acc 76.172 (75.116)	lr 0.02048
Train [34][1790/3239]	Time 0.217 (0.578)	Data Time 0.001 (0.023)	Loss 3.0613 (2.9883)	Entropy 1.38696 (1.39255)	Top-1 acc 45.703 (52.384)	Top-5 acc 72.266 (75.102)	lr 0.02048
Train [34][1800/3239]	Time 0.216 (0.577)	Data Time 0.001 (0.023)	Loss 2.8922 (2.9887)	Entropy 1.38696 (1.39252)	Top-1 acc 54.297 (52.375)	Top-5 acc 76.953 (75.096)	lr 0.02048
Train [34][1810/3239]	Time 0.223 (0.576)	Data Time 0.001 (0.023)	Loss 2.9427 (2.9888)	Entropy 1.38681 (1.39249)	Top-1 acc 53.906 (52.371)	Top-5 acc 77.734 (75.093)	lr 0.02048
Train [34][1820/3239]	Time 0.238 (0.576)	Data Time 0.002 (0.022)	Loss 2.8817 (2.9888)	Entropy 1.38670 (1.39245)	Top-1 acc 55.859 (52.375)	Top-5 acc 76.172 (75.091)	lr 0.02048
Train [34][1830/3239]	Time 0.232 (0.575)	Data Time 0.001 (0.022)	Loss 3.0888 (2.9892)	Entropy 1.38658 (1.39242)	Top-1 acc 50.391 (52.369)	Top-5 acc 76.562 (75.086)	lr 0.02048
Train [34][1840/3239]	Time 0.246 (0.574)	Data Time 0.001 (0.022)	Loss 2.8660 (2.9889)	Entropy 1.38649 (1.39239)	Top-1 acc 52.344 (52.374)	Top-5 acc 78.906 (75.096)	lr 0.02048
Train [34][1850/3239]	Time 0.352 (0.574)	Data Time 0.001 (0.022)	Loss 3.0578 (2.9891)	Entropy 1.38642 (1.39236)	Top-1 acc 50.781 (52.373)	Top-5 acc 72.656 (75.095)	lr 0.02048
Train [34][1860/3239]	Time 0.243 (0.598)	Data Time 0.002 (0.022)	Loss 2.9783 (2.9888)	Entropy 1.38633 (1.39233)	Top-1 acc 52.734 (52.371)	Top-5 acc 73.828 (75.101)	lr 0.02047
Train [34][1870/3239]	Time 0.249 (0.597)	Data Time 0.002 (0.022)	Loss 2.9896 (2.9888)	Entropy 1.38630 (1.39229)	Top-1 acc 51.953 (52.376)	Top-5 acc 75.000 (75.101)	lr 0.02047
Train [34][1880/3239]	Time 2.340 (0.596)	Data Time 0.002 (0.022)	Loss 2.9604 (2.9886)	Entropy 1.38630 (1.39226)	Top-1 acc 53.125 (52.380)	Top-5 acc 75.781 (75.105)	lr 0.02047
Train [34][1890/3239]	Time 0.237 (0.595)	Data Time 0.001 (0.022)	Loss 2.8908 (2.9892)	Entropy 1.38627 (1.39223)	Top-1 acc 55.859 (52.371)	Top-5 acc 77.344 (75.098)	lr 0.02047
Train [34][1900/3239]	Time 0.316 (0.594)	Data Time 0.002 (0.022)	Loss 2.9939 (2.9892)	Entropy 1.38626 (1.39220)	Top-1 acc 49.609 (52.370)	Top-5 acc 75.781 (75.100)	lr 0.02047
Train [34][1910/3239]	Time 0.237 (0.593)	Data Time 0.001 (0.021)	Loss 2.9711 (2.9897)	Entropy 1.38626 (1.39217)	Top-1 acc 53.125 (52.358)	Top-5 acc 75.391 (75.091)	lr 0.02047
Train [34][1920/3239]	Time 0.202 (0.593)	Data Time 0.002 (0.021)	Loss 3.1638 (2.9897)	Entropy 1.38622 (1.39214)	Top-1 acc 46.484 (52.356)	Top-5 acc 71.094 (75.089)	lr 0.02047
Train [34][1930/3239]	Time 0.230 (0.592)	Data Time 0.002 (0.021)	Loss 3.1815 (2.9898)	Entropy 1.38615 (1.39211)	Top-1 acc 50.391 (52.361)	Top-5 acc 70.703 (75.089)	lr 0.02047
Train [34][1940/3239]	Time 0.230 (0.591)	Data Time 0.001 (0.021)	Loss 2.8983 (2.9899)	Entropy 1.38608 (1.39208)	Top-1 acc 55.469 (52.354)	Top-5 acc 77.734 (75.088)	lr 0.02047
Train [34][1950/3239]	Time 0.230 (0.590)	Data Time 0.001 (0.021)	Loss 2.8179 (2.9898)	Entropy 1.38600 (1.39205)	Top-1 acc 53.516 (52.353)	Top-5 acc 76.953 (75.092)	lr 0.02047
Train [34][1960/3239]	Time 0.227 (0.590)	Data Time 0.001 (0.021)	Loss 3.3483 (2.9904)	Entropy 1.38599 (1.39201)	Top-1 acc 42.578 (52.341)	Top-5 acc 68.359 (75.082)	lr 0.02047
Train [34][1970/3239]	Time 0.214 (0.589)	Data Time 0.001 (0.021)	Loss 2.7944 (2.9901)	Entropy 1.38597 (1.39198)	Top-1 acc 54.297 (52.351)	Top-5 acc 78.906 (75.088)	lr 0.02047
Train [34][1980/3239]	Time 0.286 (0.588)	Data Time 0.001 (0.021)	Loss 3.0371 (2.9904)	Entropy 1.38593 (1.39195)	Top-1 acc 48.047 (52.342)	Top-5 acc 75.391 (75.082)	lr 0.02046
Train [34][1990/3239]	Time 2.387 (0.588)	Data Time 0.001 (0.021)	Loss 2.9173 (2.9908)	Entropy 1.38593 (1.39192)	Top-1 acc 56.641 (52.336)	Top-5 acc 76.172 (75.076)	lr 0.02046
Train [34][2000/3239]	Time 0.218 (0.586)	Data Time 0.001 (0.021)	Loss 3.0438 (2.9908)	Entropy 1.38590 (1.39189)	Top-1 acc 51.172 (52.333)	Top-5 acc 72.656 (75.075)	lr 0.02046
Train [34][2010/3239]	Time 0.221 (0.585)	Data Time 0.001 (0.020)	Loss 3.1266 (2.9909)	Entropy 1.38581 (1.39186)	Top-1 acc 49.609 (52.337)	Top-5 acc 73.828 (75.075)	lr 0.02046
Train [34][2020/3239]	Time 0.241 (0.585)	Data Time 0.001 (0.020)	Loss 2.9313 (2.9909)	Entropy 1.38582 (1.39183)	Top-1 acc 53.906 (52.337)	Top-5 acc 75.781 (75.074)	lr 0.02046
Train [34][2030/3239]	Time 0.218 (0.584)	Data Time 0.001 (0.020)	Loss 3.1604 (2.9909)	Entropy 1.38583 (1.39180)	Top-1 acc 46.875 (52.335)	Top-5 acc 73.438 (75.079)	lr 0.02046
Train [34][2040/3239]	Time 0.227 (0.583)	Data Time 0.001 (0.020)	Loss 3.0446 (2.9910)	Entropy 1.38581 (1.39177)	Top-1 acc 50.781 (52.333)	Top-5 acc 71.875 (75.074)	lr 0.02046
Train [34][2050/3239]	Time 0.204 (0.583)	Data Time 0.001 (0.020)	Loss 2.9684 (2.9912)	Entropy 1.38578 (1.39175)	Top-1 acc 53.906 (52.327)	Top-5 acc 71.094 (75.067)	lr 0.02046
Train [34][2060/3239]	Time 0.358 (0.582)	Data Time 0.001 (0.020)	Loss 3.0697 (2.9908)	Entropy 1.38576 (1.39172)	Top-1 acc 53.906 (52.337)	Top-5 acc 73.438 (75.075)	lr 0.02046
Train [34][2070/3239]	Time 0.213 (0.581)	Data Time 0.001 (0.020)	Loss 3.0469 (2.9912)	Entropy 1.38567 (1.39169)	Top-1 acc 50.391 (52.331)	Top-5 acc 78.125 (75.068)	lr 0.02046
Train [34][2080/3239]	Time 0.217 (0.581)	Data Time 0.002 (0.020)	Loss 3.0108 (2.9910)	Entropy 1.38568 (1.39166)	Top-1 acc 51.562 (52.338)	Top-5 acc 73.047 (75.077)	lr 0.02046
Train [34][2090/3239]	Time 0.272 (0.580)	Data Time 0.001 (0.020)	Loss 2.9613 (2.9907)	Entropy 1.38557 (1.39163)	Top-1 acc 50.391 (52.346)	Top-5 acc 78.125 (75.084)	lr 0.02046
Train [34][2100/3239]	Time 2.453 (0.580)	Data Time 0.001 (0.020)	Loss 3.0244 (2.9909)	Entropy 1.38557 (1.39160)	Top-1 acc 49.219 (52.331)	Top-5 acc 75.000 (75.078)	lr 0.02046
Train [34][2110/3239]	Time 0.241 (0.578)	Data Time 0.001 (0.020)	Loss 2.8661 (2.9908)	Entropy 1.38544 (1.39157)	Top-1 acc 56.250 (52.334)	Top-5 acc 77.734 (75.080)	lr 0.02045
Train [34][2120/3239]	Time 0.242 (0.578)	Data Time 0.002 (0.019)	Loss 3.0918 (2.9909)	Entropy 1.38533 (1.39154)	Top-1 acc 47.266 (52.331)	Top-5 acc 73.047 (75.077)	lr 0.02045
Train [34][2130/3239]	Time 0.235 (0.577)	Data Time 0.001 (0.019)	Loss 2.7810 (2.9909)	Entropy 1.38529 (1.39151)	Top-1 acc 56.641 (52.332)	Top-5 acc 80.078 (75.077)	lr 0.02045
Train [34][2140/3239]	Time 0.245 (0.576)	Data Time 0.001 (0.019)	Loss 3.0674 (2.9909)	Entropy 1.38511 (1.39148)	Top-1 acc 50.391 (52.332)	Top-5 acc 71.094 (75.078)	lr 0.02045
Train [34][2150/3239]	Time 0.202 (0.576)	Data Time 0.001 (0.019)	Loss 3.2187 (2.9908)	Entropy 1.38501 (1.39145)	Top-1 acc 46.875 (52.333)	Top-5 acc 71.484 (75.078)	lr 0.02045
Train [34][2160/3239]	Time 0.230 (0.575)	Data Time 0.001 (0.019)	Loss 3.0851 (2.9906)	Entropy 1.38492 (1.39142)	Top-1 acc 52.734 (52.334)	Top-5 acc 72.266 (75.083)	lr 0.02045
Train [34][2170/3239]	Time 0.317 (0.575)	Data Time 0.001 (0.019)	Loss 3.0274 (2.9909)	Entropy 1.38472 (1.39139)	Top-1 acc 54.297 (52.329)	Top-5 acc 73.438 (75.078)	lr 0.02045
Train [34][2180/3239]	Time 0.214 (0.574)	Data Time 0.001 (0.019)	Loss 2.8446 (2.9910)	Entropy 1.38468 (1.39136)	Top-1 acc 58.203 (52.332)	Top-5 acc 78.516 (75.074)	lr 0.02045
Train [34][2190/3239]	Time 0.243 (0.574)	Data Time 0.001 (0.019)	Loss 2.8441 (2.9907)	Entropy 1.38459 (1.39133)	Top-1 acc 55.859 (52.337)	Top-5 acc 77.734 (75.078)	lr 0.02045
Train [34][2200/3239]	Time 0.264 (0.573)	Data Time 0.001 (0.019)	Loss 3.1821 (2.9907)	Entropy 1.38454 (1.39130)	Top-1 acc 48.828 (52.336)	Top-5 acc 70.703 (75.077)	lr 0.02045
Train [34][2210/3239]	Time 2.281 (0.573)	Data Time 0.001 (0.019)	Loss 2.8994 (2.9909)	Entropy 1.38454 (1.39127)	Top-1 acc 57.031 (52.332)	Top-5 acc 77.344 (75.078)	lr 0.02045
Train [34][2220/3239]	Time 0.376 (0.571)	Data Time 0.001 (0.019)	Loss 2.9361 (2.9908)	Entropy 1.38448 (1.39124)	Top-1 acc 52.344 (52.339)	Top-5 acc 75.391 (75.082)	lr 0.02045
Train [34][2230/3239]	Time 0.252 (0.590)	Data Time 0.002 (0.019)	Loss 3.1088 (2.9909)	Entropy 1.38441 (1.39121)	Top-1 acc 50.391 (52.339)	Top-5 acc 69.922 (75.079)	lr 0.02045
Train [34][2240/3239]	Time 0.257 (0.589)	Data Time 0.002 (0.019)	Loss 3.1327 (2.9908)	Entropy 1.38435 (1.39118)	Top-1 acc 49.609 (52.343)	Top-5 acc 72.266 (75.081)	lr 0.02044
Train [34][2250/3239]	Time 0.220 (0.589)	Data Time 0.001 (0.018)	Loss 2.8722 (2.9909)	Entropy 1.38432 (1.39115)	Top-1 acc 51.562 (52.338)	Top-5 acc 78.516 (75.078)	lr 0.02044
Train [34][2260/3239]	Time 0.247 (0.588)	Data Time 0.001 (0.018)	Loss 3.0409 (2.9912)	Entropy 1.38428 (1.39112)	Top-1 acc 48.828 (52.330)	Top-5 acc 72.656 (75.075)	lr 0.02044
Train [34][2270/3239]	Time 0.226 (0.587)	Data Time 0.001 (0.018)	Loss 2.9540 (2.9910)	Entropy 1.38420 (1.39109)	Top-1 acc 57.031 (52.335)	Top-5 acc 75.781 (75.079)	lr 0.02044
Train [34][2280/3239]	Time 0.227 (0.587)	Data Time 0.001 (0.018)	Loss 3.0014 (2.9910)	Entropy 1.38419 (1.39106)	Top-1 acc 47.266 (52.336)	Top-5 acc 75.391 (75.081)	lr 0.02044
Train [34][2290/3239]	Time 0.202 (0.586)	Data Time 0.001 (0.018)	Loss 3.1675 (2.9913)	Entropy 1.38417 (1.39103)	Top-1 acc 49.219 (52.331)	Top-5 acc 74.219 (75.073)	lr 0.02044
Train [34][2300/3239]	Time 0.197 (0.586)	Data Time 0.001 (0.018)	Loss 2.9509 (2.9912)	Entropy 1.38415 (1.39100)	Top-1 acc 51.562 (52.331)	Top-5 acc 76.562 (75.076)	lr 0.02044
Train [34][2310/3239]	Time 0.225 (0.585)	Data Time 0.001 (0.018)	Loss 2.8598 (2.9917)	Entropy 1.38419 (1.39097)	Top-1 acc 55.078 (52.322)	Top-5 acc 76.172 (75.064)	lr 0.02044
Train [34][2320/3239]	Time 2.474 (0.585)	Data Time 0.001 (0.018)	Loss 2.9162 (2.9917)	Entropy 1.38419 (1.39094)	Top-1 acc 55.469 (52.325)	Top-5 acc 75.000 (75.061)	lr 0.02044
Train [34][2330/3239]	Time 0.332 (0.583)	Data Time 0.001 (0.018)	Loss 3.1174 (2.9918)	Entropy 1.38417 (1.39091)	Top-1 acc 50.781 (52.324)	Top-5 acc 75.000 (75.062)	lr 0.02044
Train [34][2340/3239]	Time 0.208 (0.583)	Data Time 0.001 (0.018)	Loss 3.0474 (2.9919)	Entropy 1.38411 (1.39088)	Top-1 acc 50.391 (52.322)	Top-5 acc 72.656 (75.061)	lr 0.02044
Train [34][2350/3239]	Time 0.223 (0.582)	Data Time 0.002 (0.018)	Loss 2.8994 (2.9918)	Entropy 1.38409 (1.39085)	Top-1 acc 53.516 (52.323)	Top-5 acc 76.172 (75.064)	lr 0.02044
Train [34][2360/3239]	Time 0.203 (0.582)	Data Time 0.001 (0.018)	Loss 3.0746 (2.9919)	Entropy 1.38408 (1.39082)	Top-1 acc 53.125 (52.324)	Top-5 acc 74.609 (75.065)	lr 0.02044
Train [34][2370/3239]	Time 0.238 (0.581)	Data Time 0.001 (0.018)	Loss 2.9814 (2.9919)	Entropy 1.38408 (1.39079)	Top-1 acc 52.734 (52.324)	Top-5 acc 75.781 (75.065)	lr 0.02043
Train [34][2380/3239]	Time 0.255 (0.580)	Data Time 0.002 (0.018)	Loss 3.0074 (2.9921)	Entropy 1.38403 (1.39077)	Top-1 acc 51.562 (52.319)	Top-5 acc 75.781 (75.058)	lr 0.02043
Train [34][2390/3239]	Time 0.258 (0.580)	Data Time 0.001 (0.017)	Loss 3.0011 (2.9919)	Entropy 1.38393 (1.39074)	Top-1 acc 55.859 (52.324)	Top-5 acc 75.000 (75.063)	lr 0.02043
Train [34][2400/3239]	Time 0.215 (0.579)	Data Time 0.002 (0.017)	Loss 2.7335 (2.9919)	Entropy 1.38395 (1.39071)	Top-1 acc 62.891 (52.328)	Top-5 acc 80.469 (75.065)	lr 0.02043
Train [34][2410/3239]	Time 0.211 (0.579)	Data Time 0.001 (0.017)	Loss 3.1397 (2.9920)	Entropy 1.38386 (1.39068)	Top-1 acc 49.609 (52.326)	Top-5 acc 73.047 (75.060)	lr 0.02043
Train [34][2420/3239]	Time 0.280 (0.578)	Data Time 0.001 (0.017)	Loss 2.9054 (2.9918)	Entropy 1.38380 (1.39065)	Top-1 acc 56.641 (52.335)	Top-5 acc 76.953 (75.063)	lr 0.02043
Train [34][2430/3239]	Time 2.381 (0.578)	Data Time 0.001 (0.017)	Loss 2.9410 (2.9917)	Entropy 1.38380 (1.39063)	Top-1 acc 53.125 (52.332)	Top-5 acc 74.219 (75.065)	lr 0.02043
Train [34][2440/3239]	Time 0.230 (0.576)	Data Time 0.001 (0.017)	Loss 3.0889 (2.9919)	Entropy 1.38380 (1.39060)	Top-1 acc 50.781 (52.329)	Top-5 acc 74.609 (75.066)	lr 0.02043
Train [34][2450/3239]	Time 0.224 (0.576)	Data Time 0.001 (0.017)	Loss 3.0198 (2.9917)	Entropy 1.38374 (1.39057)	Top-1 acc 49.609 (52.328)	Top-5 acc 74.609 (75.066)	lr 0.02043
Train [34][2460/3239]	Time 0.244 (0.575)	Data Time 0.001 (0.017)	Loss 3.1277 (2.9917)	Entropy 1.38368 (1.39054)	Top-1 acc 49.609 (52.329)	Top-5 acc 73.438 (75.065)	lr 0.02043
Train [34][2470/3239]	Time 0.218 (0.575)	Data Time 0.001 (0.017)	Loss 3.1690 (2.9920)	Entropy 1.38365 (1.39051)	Top-1 acc 50.391 (52.328)	Top-5 acc 71.094 (75.059)	lr 0.02043
Train [34][2480/3239]	Time 0.280 (0.574)	Data Time 0.001 (0.017)	Loss 3.0577 (2.9921)	Entropy 1.38336 (1.39049)	Top-1 acc 52.734 (52.325)	Top-5 acc 74.219 (75.060)	lr 0.02043
Train [34][2490/3239]	Time 0.212 (0.574)	Data Time 0.001 (0.017)	Loss 3.0757 (2.9923)	Entropy 1.38330 (1.39046)	Top-1 acc 46.875 (52.322)	Top-5 acc 73.828 (75.059)	lr 0.02043
Train [34][2500/3239]	Time 0.339 (0.573)	Data Time 0.001 (0.017)	Loss 2.9896 (2.9924)	Entropy 1.38329 (1.39043)	Top-1 acc 51.953 (52.322)	Top-5 acc 75.391 (75.056)	lr 0.02042
Train [34][2510/3239]	Time 0.223 (0.573)	Data Time 0.001 (0.017)	Loss 2.9301 (2.9922)	Entropy 1.38328 (1.39040)	Top-1 acc 52.344 (52.325)	Top-5 acc 75.781 (75.060)	lr 0.02042
Train [34][2520/3239]	Time 0.232 (0.572)	Data Time 0.001 (0.017)	Loss 3.0578 (2.9922)	Entropy 1.38320 (1.39037)	Top-1 acc 51.562 (52.324)	Top-5 acc 74.219 (75.062)	lr 0.02042
Train [34][2530/3239]	Time 0.235 (0.572)	Data Time 0.001 (0.017)	Loss 3.0817 (2.9923)	Entropy 1.38318 (1.39034)	Top-1 acc 47.656 (52.318)	Top-5 acc 73.828 (75.060)	lr 0.02042
Train [34][2540/3239]	Time 2.435 (0.571)	Data Time 0.002 (0.017)	Loss 3.0426 (2.9924)	Entropy 1.38318 (1.39031)	Top-1 acc 51.172 (52.315)	Top-5 acc 75.000 (75.062)	lr 0.02042
Train [34][2550/3239]	Time 0.229 (0.570)	Data Time 0.002 (0.016)	Loss 3.0070 (2.9924)	Entropy 1.38311 (1.39029)	Top-1 acc 54.688 (52.316)	Top-5 acc 73.438 (75.058)	lr 0.02042
Train [34][2560/3239]	Time 0.360 (0.570)	Data Time 0.001 (0.016)	Loss 2.8770 (2.9924)	Entropy 1.38303 (1.39026)	Top-1 acc 55.078 (52.318)	Top-5 acc 78.125 (75.058)	lr 0.02042
Train [34][2570/3239]	Time 0.231 (0.569)	Data Time 0.001 (0.016)	Loss 3.0193 (2.9927)	Entropy 1.38297 (1.39023)	Top-1 acc 52.344 (52.309)	Top-5 acc 76.562 (75.051)	lr 0.02042
Train [34][2580/3239]	Time 0.224 (0.569)	Data Time 0.001 (0.016)	Loss 2.9010 (2.9925)	Entropy 1.38287 (1.39020)	Top-1 acc 51.953 (52.309)	Top-5 acc 78.125 (75.057)	lr 0.02042
Train [34][2590/3239]	Time 0.370 (0.584)	Data Time 0.002 (0.016)	Loss 2.9899 (2.9923)	Entropy 1.38262 (1.39017)	Top-1 acc 51.953 (52.312)	Top-5 acc 73.438 (75.060)	lr 0.02042
Train [34][2600/3239]	Time 0.225 (0.584)	Data Time 0.002 (0.016)	Loss 3.0150 (2.9925)	Entropy 1.38255 (1.39014)	Top-1 acc 52.734 (52.305)	Top-5 acc 75.000 (75.054)	lr 0.02042
Train [34][2610/3239]	Time 0.216 (0.584)	Data Time 0.002 (0.016)	Loss 2.8724 (2.9926)	Entropy 1.38248 (1.39011)	Top-1 acc 54.688 (52.304)	Top-5 acc 76.562 (75.050)	lr 0.02042
Train [34][2620/3239]	Time 0.218 (0.583)	Data Time 0.001 (0.016)	Loss 2.9491 (2.9927)	Entropy 1.38243 (1.39009)	Top-1 acc 54.688 (52.297)	Top-5 acc 76.172 (75.047)	lr 0.02042
Train [34][2630/3239]	Time 0.219 (0.583)	Data Time 0.002 (0.016)	Loss 2.7681 (2.9925)	Entropy 1.38239 (1.39006)	Top-1 acc 60.938 (52.301)	Top-5 acc 80.859 (75.049)	lr 0.02041
Train [34][2640/3239]	Time 0.254 (0.582)	Data Time 0.001 (0.016)	Loss 3.0566 (2.9925)	Entropy 1.38224 (1.39003)	Top-1 acc 51.953 (52.300)	Top-5 acc 73.438 (75.050)	lr 0.02041
Train [34][2650/3239]	Time 0.248 (0.582)	Data Time 0.001 (0.016)	Loss 2.9084 (2.9923)	Entropy 1.38223 (1.39000)	Top-1 acc 49.609 (52.300)	Top-5 acc 78.516 (75.056)	lr 0.02041
Train [34][2660/3239]	Time 0.360 (0.581)	Data Time 0.001 (0.016)	Loss 2.8530 (2.9923)	Entropy 1.38219 (1.38997)	Top-1 acc 52.734 (52.298)	Top-5 acc 77.734 (75.059)	lr 0.02041
Train [34][2670/3239]	Time 0.281 (0.581)	Data Time 0.001 (0.016)	Loss 2.7350 (2.9923)	Entropy 1.38207 (1.38994)	Top-1 acc 62.891 (52.298)	Top-5 acc 81.641 (75.061)	lr 0.02041
Train [34][2680/3239]	Time 0.196 (0.580)	Data Time 0.001 (0.016)	Loss 3.0027 (2.9920)	Entropy 1.38208 (1.38991)	Top-1 acc 51.953 (52.301)	Top-5 acc 73.438 (75.065)	lr 0.02041
Train [34][2690/3239]	Time 0.161 (0.580)	Data Time 0.001 (0.016)	Loss 3.6601 (2.9924)	Entropy 1.38204 (1.38988)	Top-1 acc 36.719 (52.293)	Top-5 acc 60.938 (75.057)	lr 0.02041
Train [34][2700/3239]	Time 0.208 (0.579)	Data Time 0.001 (0.016)	Loss 3.0133 (2.9924)	Entropy 1.38193 (1.38985)	Top-1 acc 51.953 (52.294)	Top-5 acc 75.000 (75.057)	lr 0.02041
Train [34][2710/3239]	Time 0.228 (0.579)	Data Time 0.002 (0.016)	Loss 2.9615 (2.9924)	Entropy 1.38193 (1.38982)	Top-1 acc 53.125 (52.294)	Top-5 acc 79.297 (75.059)	lr 0.02041
Train [34][2720/3239]	Time 0.222 (0.578)	Data Time 0.001 (0.016)	Loss 3.0568 (2.9923)	Entropy 1.38192 (1.38979)	Top-1 acc 51.953 (52.290)	Top-5 acc 74.609 (75.061)	lr 0.02041
Train [34][2730/3239]	Time 0.217 (0.578)	Data Time 0.001 (0.016)	Loss 2.9238 (2.9924)	Entropy 1.38189 (1.38976)	Top-1 acc 47.656 (52.286)	Top-5 acc 75.781 (75.061)	lr 0.02041
Train [34][2740/3239]	Time 0.284 (0.577)	Data Time 0.001 (0.015)	Loss 3.0452 (2.9925)	Entropy 1.38178 (1.38974)	Top-1 acc 51.562 (52.288)	Top-5 acc 74.219 (75.060)	lr 0.02041
Train [34][2750/3239]	Time 0.260 (0.577)	Data Time 0.001 (0.015)	Loss 3.1709 (2.9924)	Entropy 1.38160 (1.38971)	Top-1 acc 49.609 (52.289)	Top-5 acc 69.531 (75.060)	lr 0.02040
Train [34][2760/3239]	Time 0.258 (0.576)	Data Time 0.001 (0.015)	Loss 3.0124 (2.9925)	Entropy 1.38152 (1.38968)	Top-1 acc 53.125 (52.287)	Top-5 acc 74.609 (75.058)	lr 0.02040
Train [34][2770/3239]	Time 0.331 (0.576)	Data Time 0.001 (0.015)	Loss 3.1120 (2.9925)	Entropy 1.38149 (1.38965)	Top-1 acc 50.391 (52.289)	Top-5 acc 73.828 (75.059)	lr 0.02040
Train [34][2780/3239]	Time 0.235 (0.576)	Data Time 0.002 (0.015)	Loss 3.0695 (2.9923)	Entropy 1.38145 (1.38962)	Top-1 acc 54.297 (52.300)	Top-5 acc 70.703 (75.063)	lr 0.02040
Train [34][2790/3239]	Time 0.213 (0.575)	Data Time 0.001 (0.015)	Loss 3.0558 (2.9921)	Entropy 1.38134 (1.38959)	Top-1 acc 48.828 (52.303)	Top-5 acc 72.656 (75.066)	lr 0.02040
Train [34][2800/3239]	Time 0.224 (0.575)	Data Time 0.001 (0.015)	Loss 3.0421 (2.9921)	Entropy 1.38130 (1.38956)	Top-1 acc 51.953 (52.302)	Top-5 acc 75.391 (75.068)	lr 0.02040
Train [34][2810/3239]	Time 0.247 (0.574)	Data Time 0.001 (0.015)	Loss 2.9661 (2.9926)	Entropy 1.38127 (1.38953)	Top-1 acc 54.688 (52.292)	Top-5 acc 78.125 (75.058)	lr 0.02040
Train [34][2820/3239]	Time 0.352 (0.574)	Data Time 0.001 (0.015)	Loss 3.0080 (2.9924)	Entropy 1.38120 (1.38950)	Top-1 acc 49.609 (52.298)	Top-5 acc 73.438 (75.061)	lr 0.02040
Train [34][2830/3239]	Time 0.241 (0.573)	Data Time 0.001 (0.015)	Loss 2.8893 (2.9923)	Entropy 1.38116 (1.38947)	Top-1 acc 56.641 (52.296)	Top-5 acc 75.781 (75.062)	lr 0.02040
Train [34][2840/3239]	Time 0.245 (0.573)	Data Time 0.001 (0.015)	Loss 2.9735 (2.9926)	Entropy 1.38113 (1.38944)	Top-1 acc 53.125 (52.291)	Top-5 acc 73.828 (75.058)	lr 0.02040
Train [34][2850/3239]	Time 0.244 (0.573)	Data Time 0.001 (0.015)	Loss 2.9575 (2.9927)	Entropy 1.38108 (1.38941)	Top-1 acc 55.469 (52.290)	Top-5 acc 75.781 (75.055)	lr 0.02040
Train [34][2860/3239]	Time 0.241 (0.572)	Data Time 0.002 (0.015)	Loss 2.8749 (2.9927)	Entropy 1.38097 (1.38938)	Top-1 acc 51.953 (52.282)	Top-5 acc 74.609 (75.054)	lr 0.02040
Train [34][2870/3239]	Time 0.314 (0.572)	Data Time 0.001 (0.015)	Loss 2.9928 (2.9926)	Entropy 1.38096 (1.38935)	Top-1 acc 54.297 (52.289)	Top-5 acc 72.656 (75.055)	lr 0.02040
Train [34][2880/3239]	Time 0.229 (0.571)	Data Time 0.001 (0.015)	Loss 3.0189 (2.9927)	Entropy 1.38089 (1.38933)	Top-1 acc 48.047 (52.288)	Top-5 acc 72.656 (75.054)	lr 0.02039
Train [34][2890/3239]	Time 0.207 (0.571)	Data Time 0.001 (0.015)	Loss 3.0737 (2.9928)	Entropy 1.38087 (1.38930)	Top-1 acc 51.953 (52.287)	Top-5 acc 74.219 (75.053)	lr 0.02039
Train [34][2900/3239]	Time 0.227 (0.570)	Data Time 0.001 (0.015)	Loss 2.9027 (2.9925)	Entropy 1.38082 (1.38927)	Top-1 acc 53.516 (52.292)	Top-5 acc 77.344 (75.057)	lr 0.02039
Train [34][2910/3239]	Time 0.247 (0.570)	Data Time 0.001 (0.015)	Loss 2.9921 (2.9924)	Entropy 1.38073 (1.38924)	Top-1 acc 52.344 (52.294)	Top-5 acc 73.828 (75.056)	lr 0.02039
Train [34][2920/3239]	Time 0.333 (0.585)	Data Time 0.003 (0.015)	Loss 2.9320 (2.9922)	Entropy 1.38063 (1.38921)	Top-1 acc 51.953 (52.296)	Top-5 acc 75.781 (75.060)	lr 0.02039
Train [34][2930/3239]	Time 0.263 (0.585)	Data Time 0.006 (0.015)	Loss 3.1328 (2.9923)	Entropy 1.38058 (1.38918)	Top-1 acc 47.656 (52.292)	Top-5 acc 72.266 (75.063)	lr 0.02039
Train [34][2940/3239]	Time 0.265 (0.584)	Data Time 0.002 (0.015)	Loss 2.9922 (2.9922)	Entropy 1.38055 (1.38915)	Top-1 acc 52.734 (52.293)	Top-5 acc 75.781 (75.064)	lr 0.02039
Train [34][2950/3239]	Time 0.216 (0.584)	Data Time 0.001 (0.014)	Loss 2.8898 (2.9923)	Entropy 1.38021 (1.38912)	Top-1 acc 54.297 (52.294)	Top-5 acc 76.953 (75.062)	lr 0.02039
Train [34][2960/3239]	Time 0.225 (0.584)	Data Time 0.003 (0.014)	Loss 3.1300 (2.9922)	Entropy 1.38021 (1.38909)	Top-1 acc 51.172 (52.302)	Top-5 acc 72.656 (75.066)	lr 0.02039
Train [34][2970/3239]	Time 0.214 (0.583)	Data Time 0.001 (0.014)	Loss 2.9935 (2.9923)	Entropy 1.38018 (1.38906)	Top-1 acc 51.562 (52.295)	Top-5 acc 75.391 (75.066)	lr 0.02039
Train [34][2980/3239]	Time 0.231 (0.583)	Data Time 0.001 (0.014)	Loss 2.9628 (2.9924)	Entropy 1.38013 (1.38903)	Top-1 acc 50.000 (52.294)	Top-5 acc 74.219 (75.067)	lr 0.02039
Train [34][2990/3239]	Time 0.244 (0.582)	Data Time 0.001 (0.014)	Loss 2.9129 (2.9922)	Entropy 1.38002 (1.38900)	Top-1 acc 53.125 (52.298)	Top-5 acc 79.297 (75.069)	lr 0.02039
Train [34][3000/3239]	Time 0.248 (0.582)	Data Time 0.001 (0.014)	Loss 3.0799 (2.9920)	Entropy 1.37998 (1.38897)	Top-1 acc 53.125 (52.305)	Top-5 acc 71.484 (75.073)	lr 0.02039
Train [34][3010/3239]	Time 0.268 (0.581)	Data Time 0.001 (0.014)	Loss 3.0707 (2.9919)	Entropy 1.37987 (1.38894)	Top-1 acc 48.828 (52.307)	Top-5 acc 75.000 (75.073)	lr 0.02038
Train [34][3020/3239]	Time 0.220 (0.581)	Data Time 0.001 (0.014)	Loss 2.9002 (2.9922)	Entropy 1.37986 (1.38891)	Top-1 acc 55.078 (52.303)	Top-5 acc 75.000 (75.069)	lr 0.02038
Train [34][3030/3239]	Time 0.316 (0.581)	Data Time 0.001 (0.014)	Loss 2.9587 (2.9921)	Entropy 1.37974 (1.38888)	Top-1 acc 55.078 (52.308)	Top-5 acc 77.344 (75.071)	lr 0.02038
Train [34][3040/3239]	Time 0.250 (0.580)	Data Time 0.001 (0.014)	Loss 2.6944 (2.9919)	Entropy 1.37968 (1.38885)	Top-1 acc 61.719 (52.310)	Top-5 acc 80.078 (75.073)	lr 0.02038
Train [34][3050/3239]	Time 0.288 (0.580)	Data Time 0.001 (0.014)	Loss 3.1943 (2.9922)	Entropy 1.37961 (1.38882)	Top-1 acc 44.531 (52.303)	Top-5 acc 72.266 (75.068)	lr 0.02038
Train [34][3060/3239]	Time 0.233 (0.579)	Data Time 0.001 (0.014)	Loss 2.9769 (2.9922)	Entropy 1.37970 (1.38879)	Top-1 acc 50.781 (52.303)	Top-5 acc 73.828 (75.070)	lr 0.02038
Train [34][3070/3239]	Time 0.270 (0.579)	Data Time 0.001 (0.014)	Loss 3.0718 (2.9922)	Entropy 1.37970 (1.38876)	Top-1 acc 51.562 (52.303)	Top-5 acc 73.438 (75.069)	lr 0.02038
Train [34][3080/3239]	Time 0.342 (0.579)	Data Time 0.001 (0.014)	Loss 3.1918 (2.9923)	Entropy 1.37953 (1.38873)	Top-1 acc 51.953 (52.300)	Top-5 acc 69.531 (75.068)	lr 0.02038
Train [34][3090/3239]	Time 0.246 (0.578)	Data Time 0.001 (0.014)	Loss 2.9414 (2.9923)	Entropy 1.37952 (1.38870)	Top-1 acc 56.250 (52.300)	Top-5 acc 75.391 (75.069)	lr 0.02038
Train [34][3100/3239]	Time 0.266 (0.578)	Data Time 0.001 (0.014)	Loss 2.9099 (2.9922)	Entropy 1.37937 (1.38867)	Top-1 acc 54.297 (52.302)	Top-5 acc 76.562 (75.067)	lr 0.02038
Train [34][3110/3239]	Time 0.270 (0.577)	Data Time 0.001 (0.014)	Loss 3.0332 (2.9919)	Entropy 1.37935 (1.38864)	Top-1 acc 50.781 (52.310)	Top-5 acc 71.875 (75.072)	lr 0.02038
Train [34][3120/3239]	Time 0.266 (0.577)	Data Time 0.001 (0.014)	Loss 3.2835 (2.9920)	Entropy 1.37930 (1.38861)	Top-1 acc 46.875 (52.309)	Top-5 acc 68.750 (75.069)	lr 0.02038
Train [34][3130/3239]	Time 0.355 (0.577)	Data Time 0.001 (0.014)	Loss 2.8997 (2.9922)	Entropy 1.37907 (1.38858)	Top-1 acc 56.641 (52.302)	Top-5 acc 75.781 (75.064)	lr 0.02038
Train [34][3140/3239]	Time 0.254 (0.576)	Data Time 0.001 (0.014)	Loss 3.2935 (2.9924)	Entropy 1.37898 (1.38855)	Top-1 acc 42.969 (52.296)	Top-5 acc 68.750 (75.061)	lr 0.02037
Train [34][3150/3239]	Time 0.316 (0.576)	Data Time 0.001 (0.014)	Loss 2.9249 (2.9924)	Entropy 1.37881 (1.38852)	Top-1 acc 51.562 (52.298)	Top-5 acc 76.172 (75.058)	lr 0.02037
Train [34][3160/3239]	Time 0.231 (0.576)	Data Time 0.001 (0.014)	Loss 2.9897 (2.9924)	Entropy 1.37881 (1.38849)	Top-1 acc 56.250 (52.300)	Top-5 acc 73.047 (75.055)	lr 0.02037
Train [34][3170/3239]	Time 0.207 (0.575)	Data Time 0.001 (0.014)	Loss 3.0908 (2.9926)	Entropy 1.37873 (1.38846)	Top-1 acc 50.391 (52.295)	Top-5 acc 75.781 (75.055)	lr 0.02037
Train [34][3180/3239]	Time 0.299 (0.575)	Data Time 0.000 (0.014)	Loss 2.9101 (2.9926)	Entropy 1.37861 (1.38843)	Top-1 acc 54.688 (52.296)	Top-5 acc 76.562 (75.055)	lr 0.02037
Train [34][3190/3239]	Time 0.248 (0.575)	Data Time 0.000 (0.014)	Loss 3.0833 (2.9926)	Entropy 1.37847 (1.38840)	Top-1 acc 50.000 (52.298)	Top-5 acc 73.047 (75.057)	lr 0.02037
Train [34][3200/3239]	Time 0.168 (0.574)	Data Time 0.000 (0.013)	Loss 2.9505 (2.9927)	Entropy 1.37835 (1.38837)	Top-1 acc 52.734 (52.294)	Top-5 acc 76.953 (75.057)	lr 0.02037
Train [34][3210/3239]	Time 0.216 (0.574)	Data Time 0.000 (0.013)	Loss 2.9568 (2.9925)	Entropy 1.37821 (1.38834)	Top-1 acc 56.250 (52.300)	Top-5 acc 76.953 (75.059)	lr 0.02037
Train [34][3220/3239]	Time 0.229 (0.573)	Data Time 0.000 (0.013)	Loss 3.0975 (2.9927)	Entropy 1.37817 (1.38831)	Top-1 acc 50.391 (52.301)	Top-5 acc 72.656 (75.054)	lr 0.02037
Train [34][3230/3239]	Time 0.217 (0.573)	Data Time 0.000 (0.013)	Loss 3.1639 (2.9930)	Entropy 1.37816 (1.38828)	Top-1 acc 45.703 (52.294)	Top-5 acc 67.578 (75.047)	lr 0.02037
Train [34][3239/3239]	Time 2.333 (0.572)	Data Time 0.000 (0.013)	Loss 2.7115 (2.9929)	Entropy 1.37816 (1.38825)	Top-1 acc 55.556 (52.295)	Top-5 acc 81.481 (75.050)	lr 0.02037
==========Valid [34/120]	loss 1.801	top-1 acc 59.964 (59.964)	top-5 acc 81.700	Train top-1 52.295	top-5 75.050	Entropy 1.37816	Latency-None: 0.000ms	Flops: 559.17M
Train [35][0/3239]	Time 29.268 (29.268)	Data Time 28.645 (28.645)	Loss 2.9958 (2.9958)	Entropy 1.37812 (1.37812)	Top-1 acc 51.953 (51.953)	Top-5 acc 74.219 (74.219)	lr 0.02037
Train [35][10/3239]	Time 49.610 (7.513)	Data Time 0.002 (2.637)	Loss 2.9645 (2.9659)	Entropy 1.37812 (1.37812)	Top-1 acc 53.516 (51.847)	Top-5 acc 76.562 (75.746)	lr 0.02037
Train [35][20/3239]	Time 0.221 (4.059)	Data Time 0.002 (1.383)	Loss 2.9850 (2.9674)	Entropy 1.37805 (1.37808)	Top-1 acc 52.734 (52.418)	Top-5 acc 75.000 (75.707)	lr 0.02036
Train [35][30/3239]	Time 0.253 (2.897)	Data Time 0.001 (0.937)	Loss 2.8916 (2.9400)	Entropy 1.37786 (1.37802)	Top-1 acc 57.422 (52.974)	Top-5 acc 75.781 (76.197)	lr 0.02036
Train [35][40/3239]	Time 0.237 (2.302)	Data Time 0.002 (0.709)	Loss 3.0537 (2.9414)	Entropy 1.37778 (1.37796)	Top-1 acc 51.172 (53.096)	Top-5 acc 72.656 (76.124)	lr 0.02036
Train [35][50/3239]	Time 0.319 (1.940)	Data Time 0.001 (0.570)	Loss 2.7914 (2.9398)	Entropy 1.37777 (1.37793)	Top-1 acc 59.375 (53.202)	Top-5 acc 78.906 (76.080)	lr 0.02036
Train [35][60/3239]	Time 0.207 (1.694)	Data Time 0.001 (0.477)	Loss 2.9709 (2.9391)	Entropy 1.37774 (1.37790)	Top-1 acc 52.734 (53.375)	Top-5 acc 75.000 (75.999)	lr 0.02036
Train [35][70/3239]	Time 0.217 (1.521)	Data Time 0.001 (0.410)	Loss 2.9091 (2.9386)	Entropy 1.37772 (1.37787)	Top-1 acc 52.734 (53.433)	Top-5 acc 78.906 (76.073)	lr 0.02036
Train [35][80/3239]	Time 0.220 (1.389)	Data Time 0.001 (0.360)	Loss 2.9289 (2.9392)	Entropy 1.37765 (1.37785)	Top-1 acc 50.000 (53.463)	Top-5 acc 76.562 (76.133)	lr 0.02036
Train [35][90/3239]	Time 0.214 (1.285)	Data Time 0.001 (0.320)	Loss 3.0157 (2.9465)	Entropy 1.37737 (1.37782)	Top-1 acc 52.344 (53.284)	Top-5 acc 73.828 (75.987)	lr 0.02036
Train [35][100/3239]	Time 0.228 (1.204)	Data Time 0.002 (0.289)	Loss 2.9257 (2.9398)	Entropy 1.37738 (1.37778)	Top-1 acc 50.781 (53.512)	Top-5 acc 74.609 (76.106)	lr 0.02036
Train [35][110/3239]	Time 0.225 (1.137)	Data Time 0.002 (0.263)	Loss 2.9302 (2.9380)	Entropy 1.37723 (1.37774)	Top-1 acc 55.078 (53.625)	Top-5 acc 76.953 (76.059)	lr 0.02036
Train [35][120/3239]	Time 2.280 (1.078)	Data Time 0.002 (0.241)	Loss 2.6989 (2.9405)	Entropy 1.37723 (1.37770)	Top-1 acc 60.938 (53.571)	Top-5 acc 78.516 (75.939)	lr 0.02036
Train [35][130/3239]	Time 0.223 (1.013)	Data Time 0.001 (0.223)	Loss 2.7521 (2.9417)	Entropy 1.37725 (1.37766)	Top-1 acc 58.594 (53.584)	Top-5 acc 82.031 (75.915)	lr 0.02036
Train [35][140/3239]	Time 0.170 (0.971)	Data Time 0.001 (0.207)	Loss 2.8993 (2.9393)	Entropy 1.37723 (1.37763)	Top-1 acc 55.469 (53.704)	Top-5 acc 77.344 (75.959)	lr 0.02036
Train [35][150/3239]	Time 0.210 (0.937)	Data Time 0.001 (0.194)	Loss 2.8344 (2.9378)	Entropy 1.37718 (1.37760)	Top-1 acc 56.250 (53.699)	Top-5 acc 78.125 (75.962)	lr 0.02035
Train [35][160/3239]	Time 0.325 (0.906)	Data Time 0.001 (0.182)	Loss 3.1351 (2.9404)	Entropy 1.37713 (1.37758)	Top-1 acc 48.438 (53.656)	Top-5 acc 70.703 (75.869)	lr 0.02035
Train [35][170/3239]	Time 0.209 (0.880)	Data Time 0.002 (0.172)	Loss 2.8600 (2.9418)	Entropy 1.37707 (1.37755)	Top-1 acc 58.594 (53.662)	Top-5 acc 78.516 (75.886)	lr 0.02035
Train [35][180/3239]	Time 0.223 (0.856)	Data Time 0.001 (0.162)	Loss 2.8203 (2.9438)	Entropy 1.37700 (1.37752)	Top-1 acc 55.469 (53.587)	Top-5 acc 78.516 (75.850)	lr 0.02035
Train [35][190/3239]	Time 0.199 (0.834)	Data Time 0.002 (0.154)	Loss 3.0698 (2.9432)	Entropy 1.37698 (1.37749)	Top-1 acc 50.000 (53.624)	Top-5 acc 73.438 (75.832)	lr 0.02035
Train [35][200/3239]	Time 0.212 (0.816)	Data Time 0.001 (0.146)	Loss 2.9312 (2.9446)	Entropy 1.37672 (1.37746)	Top-1 acc 53.516 (53.545)	Top-5 acc 75.000 (75.816)	lr 0.02035
Train [35][210/3239]	Time 0.289 (0.798)	Data Time 0.001 (0.139)	Loss 3.1712 (2.9478)	Entropy 1.37664 (1.37743)	Top-1 acc 48.828 (53.514)	Top-5 acc 70.703 (75.776)	lr 0.02035
Train [35][220/3239]	Time 0.405 (0.784)	Data Time 0.001 (0.133)	Loss 2.9478 (2.9463)	Entropy 1.37661 (1.37739)	Top-1 acc 55.078 (53.544)	Top-5 acc 76.172 (75.804)	lr 0.02035
Train [35][230/3239]	Time 2.338 (0.769)	Data Time 0.001 (0.127)	Loss 2.8762 (2.9462)	Entropy 1.37661 (1.37736)	Top-1 acc 54.688 (53.573)	Top-5 acc 80.469 (75.854)	lr 0.02035
Train [35][240/3239]	Time 0.268 (0.747)	Data Time 0.002 (0.122)	Loss 2.8206 (2.9462)	Entropy 1.37658 (1.37732)	Top-1 acc 60.156 (53.548)	Top-5 acc 77.344 (75.861)	lr 0.02035
Train [35][250/3239]	Time 0.227 (0.735)	Data Time 0.001 (0.117)	Loss 2.9557 (2.9455)	Entropy 1.37652 (1.37729)	Top-1 acc 48.828 (53.534)	Top-5 acc 75.000 (75.858)	lr 0.02035
Train [35][260/3239]	Time 0.215 (0.724)	Data Time 0.001 (0.113)	Loss 3.0388 (2.9443)	Entropy 1.37646 (1.37726)	Top-1 acc 49.219 (53.495)	Top-5 acc 74.609 (75.898)	lr 0.02035
Train [35][270/3239]	Time 0.198 (0.713)	Data Time 0.001 (0.109)	Loss 3.1631 (2.9464)	Entropy 1.37636 (1.37723)	Top-1 acc 50.000 (53.458)	Top-5 acc 69.922 (75.862)	lr 0.02035
Train [35][280/3239]	Time 0.223 (0.704)	Data Time 0.001 (0.105)	Loss 3.1428 (2.9461)	Entropy 1.37628 (1.37720)	Top-1 acc 46.875 (53.441)	Top-5 acc 73.438 (75.897)	lr 0.02034
Train [35][290/3239]	Time 0.162 (0.695)	Data Time 0.001 (0.102)	Loss 2.9231 (2.9459)	Entropy 1.37624 (1.37716)	Top-1 acc 51.953 (53.450)	Top-5 acc 77.344 (75.910)	lr 0.02034
Train [35][300/3239]	Time 0.209 (0.687)	Data Time 0.001 (0.098)	Loss 3.1394 (2.9472)	Entropy 1.37619 (1.37713)	Top-1 acc 52.734 (53.399)	Top-5 acc 70.312 (75.890)	lr 0.02034
Train [35][310/3239]	Time 0.215 (0.680)	Data Time 0.001 (0.095)	Loss 2.8918 (2.9465)	Entropy 1.37625 (1.37710)	Top-1 acc 52.734 (53.389)	Top-5 acc 77.344 (75.898)	lr 0.02034
Train [35][320/3239]	Time 0.260 (0.673)	Data Time 0.002 (0.092)	Loss 2.9160 (2.9469)	Entropy 1.37619 (1.37708)	Top-1 acc 53.906 (53.379)	Top-5 acc 76.172 (75.887)	lr 0.02034
Train [35][330/3239]	Time 0.216 (0.667)	Data Time 0.001 (0.090)	Loss 2.8283 (2.9475)	Entropy 1.37614 (1.37705)	Top-1 acc 55.469 (53.348)	Top-5 acc 78.125 (75.861)	lr 0.02034
Train [35][340/3239]	Time 2.451 (0.661)	Data Time 0.001 (0.087)	Loss 2.8555 (2.9462)	Entropy 1.37614 (1.37702)	Top-1 acc 54.688 (53.385)	Top-5 acc 76.953 (75.890)	lr 0.02034
Train [35][350/3239]	Time 0.217 (0.649)	Data Time 0.001 (0.085)	Loss 3.0486 (2.9450)	Entropy 1.37610 (1.37700)	Top-1 acc 52.344 (53.443)	Top-5 acc 74.219 (75.899)	lr 0.02034
Train [35][360/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.082)	Loss 3.0040 (2.9458)	Entropy 1.37610 (1.37697)	Top-1 acc 52.734 (53.427)	Top-5 acc 76.562 (75.904)	lr 0.02034
Train [35][370/3239]	Time 0.232 (0.638)	Data Time 0.001 (0.080)	Loss 3.1089 (2.9459)	Entropy 1.37604 (1.37695)	Top-1 acc 50.781 (53.427)	Top-5 acc 72.266 (75.900)	lr 0.02034
Train [35][380/3239]	Time 0.256 (0.756)	Data Time 0.002 (0.078)	Loss 2.7647 (2.9443)	Entropy 1.37599 (1.37692)	Top-1 acc 56.641 (53.431)	Top-5 acc 83.203 (75.960)	lr 0.02034
Train [35][390/3239]	Time 0.241 (0.749)	Data Time 0.002 (0.076)	Loss 3.1208 (2.9465)	Entropy 1.37599 (1.37690)	Top-1 acc 48.047 (53.379)	Top-5 acc 74.609 (75.912)	lr 0.02034
Train [35][400/3239]	Time 0.211 (0.741)	Data Time 0.002 (0.074)	Loss 2.9691 (2.9466)	Entropy 1.37591 (1.37687)	Top-1 acc 54.688 (53.354)	Top-5 acc 75.391 (75.917)	lr 0.02034
Train [35][410/3239]	Time 0.221 (0.735)	Data Time 0.002 (0.072)	Loss 2.9666 (2.9465)	Entropy 1.37591 (1.37685)	Top-1 acc 51.172 (53.355)	Top-5 acc 75.391 (75.930)	lr 0.02033
Train [35][420/3239]	Time 0.208 (0.728)	Data Time 0.001 (0.071)	Loss 2.8815 (2.9465)	Entropy 1.37590 (1.37683)	Top-1 acc 56.641 (53.345)	Top-5 acc 77.344 (75.928)	lr 0.02033
Train [35][430/3239]	Time 0.262 (0.722)	Data Time 0.001 (0.069)	Loss 2.8333 (2.9468)	Entropy 1.37587 (1.37681)	Top-1 acc 57.031 (53.343)	Top-5 acc 78.906 (75.917)	lr 0.02033
Train [35][440/3239]	Time 0.291 (0.716)	Data Time 0.002 (0.068)	Loss 2.9708 (2.9466)	Entropy 1.37583 (1.37679)	Top-1 acc 50.781 (53.341)	Top-5 acc 76.562 (75.929)	lr 0.02033
Train [35][450/3239]	Time 2.547 (0.711)	Data Time 0.002 (0.066)	Loss 3.0466 (2.9466)	Entropy 1.37583 (1.37676)	Top-1 acc 52.344 (53.335)	Top-5 acc 76.172 (75.928)	lr 0.02033
Train [35][460/3239]	Time 0.205 (0.700)	Data Time 0.001 (0.065)	Loss 2.8667 (2.9460)	Entropy 1.37601 (1.37675)	Top-1 acc 55.078 (53.341)	Top-5 acc 77.734 (75.932)	lr 0.02033
Train [35][470/3239]	Time 0.197 (0.695)	Data Time 0.001 (0.063)	Loss 2.8299 (2.9457)	Entropy 1.37599 (1.37673)	Top-1 acc 56.250 (53.340)	Top-5 acc 78.125 (75.930)	lr 0.02033
Train [35][480/3239]	Time 0.215 (0.689)	Data Time 0.001 (0.062)	Loss 2.7819 (2.9458)	Entropy 1.37588 (1.37671)	Top-1 acc 55.469 (53.326)	Top-5 acc 78.516 (75.917)	lr 0.02033
Train [35][490/3239]	Time 0.321 (0.685)	Data Time 0.001 (0.061)	Loss 3.0045 (2.9453)	Entropy 1.37578 (1.37670)	Top-1 acc 53.125 (53.333)	Top-5 acc 76.562 (75.932)	lr 0.02033
Train [35][500/3239]	Time 0.208 (0.681)	Data Time 0.001 (0.060)	Loss 3.0510 (2.9461)	Entropy 1.37573 (1.37668)	Top-1 acc 53.125 (53.318)	Top-5 acc 74.219 (75.931)	lr 0.02033
Train [35][510/3239]	Time 0.255 (0.676)	Data Time 0.001 (0.059)	Loss 3.0306 (2.9459)	Entropy 1.37564 (1.37666)	Top-1 acc 48.438 (53.325)	Top-5 acc 72.266 (75.935)	lr 0.02033
Train [35][520/3239]	Time 0.223 (0.672)	Data Time 0.001 (0.058)	Loss 2.8831 (2.9459)	Entropy 1.37562 (1.37664)	Top-1 acc 52.734 (53.326)	Top-5 acc 79.297 (75.942)	lr 0.02033
Train [35][530/3239]	Time 0.227 (0.668)	Data Time 0.001 (0.056)	Loss 3.0824 (2.9464)	Entropy 1.37548 (1.37662)	Top-1 acc 51.172 (53.317)	Top-5 acc 73.047 (75.911)	lr 0.02032
Train [35][540/3239]	Time 0.228 (0.664)	Data Time 0.002 (0.055)	Loss 2.8806 (2.9487)	Entropy 1.37530 (1.37660)	Top-1 acc 51.562 (53.267)	Top-5 acc 76.172 (75.870)	lr 0.02032
Train [35][550/3239]	Time 0.365 (0.660)	Data Time 0.001 (0.054)	Loss 3.0063 (2.9497)	Entropy 1.37526 (1.37657)	Top-1 acc 52.344 (53.245)	Top-5 acc 74.219 (75.851)	lr 0.02032
Train [35][560/3239]	Time 2.465 (0.657)	Data Time 0.001 (0.054)	Loss 2.9732 (2.9499)	Entropy 1.37526 (1.37655)	Top-1 acc 51.562 (53.225)	Top-5 acc 74.219 (75.853)	lr 0.02032
Train [35][570/3239]	Time 0.213 (0.649)	Data Time 0.001 (0.053)	Loss 2.9241 (2.9507)	Entropy 1.37530 (1.37653)	Top-1 acc 53.125 (53.202)	Top-5 acc 77.344 (75.843)	lr 0.02032
Train [35][580/3239]	Time 0.223 (0.646)	Data Time 0.001 (0.052)	Loss 2.8287 (2.9511)	Entropy 1.37526 (1.37651)	Top-1 acc 56.250 (53.200)	Top-5 acc 77.734 (75.825)	lr 0.02032
Train [35][590/3239]	Time 0.232 (0.643)	Data Time 0.002 (0.051)	Loss 3.0272 (2.9509)	Entropy 1.37521 (1.37648)	Top-1 acc 52.344 (53.215)	Top-5 acc 73.047 (75.823)	lr 0.02032
Train [35][600/3239]	Time 0.322 (0.640)	Data Time 0.001 (0.050)	Loss 2.9111 (2.9507)	Entropy 1.37519 (1.37646)	Top-1 acc 52.734 (53.202)	Top-5 acc 74.219 (75.815)	lr 0.02032
Train [35][610/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.049)	Loss 2.8486 (2.9514)	Entropy 1.37512 (1.37644)	Top-1 acc 55.078 (53.193)	Top-5 acc 76.562 (75.792)	lr 0.02032
Train [35][620/3239]	Time 0.217 (0.634)	Data Time 0.001 (0.049)	Loss 2.9158 (2.9516)	Entropy 1.37502 (1.37642)	Top-1 acc 55.469 (53.192)	Top-5 acc 79.688 (75.805)	lr 0.02032
Train [35][630/3239]	Time 0.257 (0.631)	Data Time 0.001 (0.048)	Loss 2.8017 (2.9521)	Entropy 1.37494 (1.37640)	Top-1 acc 51.953 (53.152)	Top-5 acc 79.297 (75.802)	lr 0.02032
Train [35][640/3239]	Time 0.215 (0.629)	Data Time 0.001 (0.047)	Loss 2.8889 (2.9521)	Entropy 1.37491 (1.37637)	Top-1 acc 52.734 (53.149)	Top-5 acc 78.125 (75.805)	lr 0.02032
Train [35][650/3239]	Time 0.206 (0.626)	Data Time 0.002 (0.046)	Loss 2.8920 (2.9509)	Entropy 1.37468 (1.37635)	Top-1 acc 51.953 (53.177)	Top-5 acc 77.344 (75.829)	lr 0.02032
Train [35][660/3239]	Time 0.302 (0.624)	Data Time 0.001 (0.046)	Loss 3.0703 (2.9518)	Entropy 1.37456 (1.37633)	Top-1 acc 50.391 (53.149)	Top-5 acc 72.266 (75.813)	lr 0.02031
Train [35][670/3239]	Time 2.369 (0.621)	Data Time 0.001 (0.045)	Loss 2.7671 (2.9529)	Entropy 1.37456 (1.37630)	Top-1 acc 58.984 (53.141)	Top-5 acc 78.516 (75.810)	lr 0.02031
Train [35][680/3239]	Time 0.219 (0.616)	Data Time 0.001 (0.044)	Loss 2.9160 (2.9529)	Entropy 1.37453 (1.37627)	Top-1 acc 50.391 (53.124)	Top-5 acc 75.781 (75.816)	lr 0.02031
Train [35][690/3239]	Time 0.225 (0.613)	Data Time 0.001 (0.044)	Loss 3.0338 (2.9524)	Entropy 1.37447 (1.37625)	Top-1 acc 51.562 (53.142)	Top-5 acc 73.828 (75.828)	lr 0.02031
Train [35][700/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.043)	Loss 3.0079 (2.9522)	Entropy 1.37444 (1.37622)	Top-1 acc 56.641 (53.147)	Top-5 acc 76.172 (75.843)	lr 0.02031
Train [35][710/3239]	Time 0.300 (0.608)	Data Time 0.001 (0.043)	Loss 2.8023 (2.9512)	Entropy 1.37440 (1.37620)	Top-1 acc 52.344 (53.164)	Top-5 acc 80.078 (75.866)	lr 0.02031
Train [35][720/3239]	Time 0.233 (0.606)	Data Time 0.001 (0.042)	Loss 3.2293 (2.9514)	Entropy 1.37433 (1.37617)	Top-1 acc 46.094 (53.179)	Top-5 acc 71.875 (75.867)	lr 0.02031
Train [35][730/3239]	Time 0.234 (0.604)	Data Time 0.002 (0.042)	Loss 3.0203 (2.9527)	Entropy 1.37422 (1.37614)	Top-1 acc 52.344 (53.174)	Top-5 acc 74.219 (75.841)	lr 0.02031
Train [35][740/3239]	Time 0.235 (0.661)	Data Time 0.002 (0.041)	Loss 2.9378 (2.9524)	Entropy 1.37382 (1.37612)	Top-1 acc 56.641 (53.182)	Top-5 acc 75.000 (75.844)	lr 0.02031
Train [35][750/3239]	Time 0.200 (0.658)	Data Time 0.002 (0.040)	Loss 3.0567 (2.9527)	Entropy 1.37374 (1.37609)	Top-1 acc 48.438 (53.182)	Top-5 acc 71.484 (75.835)	lr 0.02031
Train [35][760/3239]	Time 0.316 (0.656)	Data Time 0.001 (0.040)	Loss 3.0357 (2.9529)	Entropy 1.37361 (1.37605)	Top-1 acc 49.609 (53.181)	Top-5 acc 77.734 (75.837)	lr 0.02031
Train [35][770/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.039)	Loss 2.9062 (2.9528)	Entropy 1.37345 (1.37602)	Top-1 acc 54.688 (53.188)	Top-5 acc 75.391 (75.839)	lr 0.02031
Train [35][780/3239]	Time 2.480 (0.651)	Data Time 0.003 (0.039)	Loss 3.0141 (2.9532)	Entropy 1.37345 (1.37599)	Top-1 acc 54.688 (53.175)	Top-5 acc 76.172 (75.819)	lr 0.02031
Train [35][790/3239]	Time 0.203 (0.645)	Data Time 0.001 (0.039)	Loss 3.0522 (2.9538)	Entropy 1.37334 (1.37596)	Top-1 acc 48.047 (53.156)	Top-5 acc 71.094 (75.800)	lr 0.02030
Train [35][800/3239]	Time 0.221 (0.643)	Data Time 0.001 (0.038)	Loss 2.9627 (2.9542)	Entropy 1.37330 (1.37592)	Top-1 acc 53.516 (53.149)	Top-5 acc 76.172 (75.791)	lr 0.02030
Train [35][810/3239]	Time 0.232 (0.640)	Data Time 0.001 (0.038)	Loss 2.9204 (2.9541)	Entropy 1.37326 (1.37589)	Top-1 acc 53.516 (53.149)	Top-5 acc 76.562 (75.790)	lr 0.02030
Train [35][820/3239]	Time 0.204 (0.638)	Data Time 0.001 (0.037)	Loss 3.0077 (2.9544)	Entropy 1.37320 (1.37586)	Top-1 acc 55.078 (53.149)	Top-5 acc 75.781 (75.792)	lr 0.02030
Train [35][830/3239]	Time 0.247 (0.636)	Data Time 0.001 (0.037)	Loss 2.8784 (2.9539)	Entropy 1.37320 (1.37583)	Top-1 acc 54.688 (53.158)	Top-5 acc 78.125 (75.808)	lr 0.02030
Train [35][840/3239]	Time 0.205 (0.633)	Data Time 0.001 (0.036)	Loss 3.0094 (2.9542)	Entropy 1.37313 (1.37579)	Top-1 acc 50.781 (53.147)	Top-5 acc 73.828 (75.791)	lr 0.02030
Train [35][850/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.036)	Loss 2.9825 (2.9544)	Entropy 1.37309 (1.37576)	Top-1 acc 56.250 (53.153)	Top-5 acc 71.875 (75.774)	lr 0.02030
Train [35][860/3239]	Time 0.244 (0.629)	Data Time 0.001 (0.036)	Loss 2.8529 (2.9540)	Entropy 1.37304 (1.37573)	Top-1 acc 55.859 (53.154)	Top-5 acc 77.344 (75.777)	lr 0.02030
Train [35][870/3239]	Time 0.219 (0.627)	Data Time 0.001 (0.035)	Loss 2.9878 (2.9541)	Entropy 1.37298 (1.37570)	Top-1 acc 56.641 (53.161)	Top-5 acc 73.828 (75.770)	lr 0.02030
Train [35][880/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.035)	Loss 2.9686 (2.9542)	Entropy 1.37279 (1.37567)	Top-1 acc 51.953 (53.172)	Top-5 acc 75.000 (75.777)	lr 0.02030
Train [35][890/3239]	Time 2.428 (0.623)	Data Time 0.002 (0.034)	Loss 3.0757 (2.9545)	Entropy 1.37279 (1.37564)	Top-1 acc 49.609 (53.152)	Top-5 acc 71.875 (75.774)	lr 0.02030
Train [35][900/3239]	Time 0.222 (0.618)	Data Time 0.001 (0.034)	Loss 3.2621 (2.9558)	Entropy 1.37279 (1.37560)	Top-1 acc 47.266 (53.133)	Top-5 acc 71.875 (75.749)	lr 0.02030
Train [35][910/3239]	Time 0.221 (0.617)	Data Time 0.001 (0.034)	Loss 3.0921 (2.9568)	Entropy 1.37276 (1.37557)	Top-1 acc 50.391 (53.103)	Top-5 acc 72.266 (75.738)	lr 0.02029
Train [35][920/3239]	Time 0.340 (0.615)	Data Time 0.001 (0.033)	Loss 2.8217 (2.9569)	Entropy 1.37274 (1.37554)	Top-1 acc 56.250 (53.110)	Top-5 acc 78.125 (75.735)	lr 0.02029
Train [35][930/3239]	Time 0.232 (0.613)	Data Time 0.001 (0.033)	Loss 2.7659 (2.9575)	Entropy 1.37271 (1.37551)	Top-1 acc 59.375 (53.105)	Top-5 acc 80.078 (75.727)	lr 0.02029
Train [35][940/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.033)	Loss 2.9905 (2.9573)	Entropy 1.37262 (1.37548)	Top-1 acc 50.391 (53.105)	Top-5 acc 75.781 (75.733)	lr 0.02029
Train [35][950/3239]	Time 0.219 (0.610)	Data Time 0.001 (0.032)	Loss 3.2553 (2.9577)	Entropy 1.37258 (1.37545)	Top-1 acc 45.312 (53.095)	Top-5 acc 70.312 (75.720)	lr 0.02029
Train [35][960/3239]	Time 0.207 (0.608)	Data Time 0.001 (0.032)	Loss 2.9742 (2.9580)	Entropy 1.37251 (1.37542)	Top-1 acc 49.609 (53.084)	Top-5 acc 76.953 (75.708)	lr 0.02029
Train [35][970/3239]	Time 0.283 (0.607)	Data Time 0.001 (0.032)	Loss 3.0913 (2.9579)	Entropy 1.37245 (1.37539)	Top-1 acc 50.000 (53.084)	Top-5 acc 73.047 (75.723)	lr 0.02029
Train [35][980/3239]	Time 0.211 (0.605)	Data Time 0.001 (0.031)	Loss 2.9223 (2.9585)	Entropy 1.37237 (1.37536)	Top-1 acc 52.344 (53.069)	Top-5 acc 80.078 (75.715)	lr 0.02029
Train [35][990/3239]	Time 0.205 (0.604)	Data Time 0.001 (0.031)	Loss 3.0428 (2.9585)	Entropy 1.37225 (1.37533)	Top-1 acc 52.344 (53.078)	Top-5 acc 73.438 (75.705)	lr 0.02029
Train [35][1000/3239]	Time 2.413 (0.602)	Data Time 0.001 (0.031)	Loss 2.8589 (2.9587)	Entropy 1.37225 (1.37530)	Top-1 acc 56.250 (53.076)	Top-5 acc 78.516 (75.704)	lr 0.02029
Train [35][1010/3239]	Time 0.209 (0.599)	Data Time 0.001 (0.030)	Loss 2.8067 (2.9584)	Entropy 1.37205 (1.37527)	Top-1 acc 58.203 (53.077)	Top-5 acc 78.516 (75.707)	lr 0.02029
Train [35][1020/3239]	Time 0.289 (0.597)	Data Time 0.001 (0.030)	Loss 3.0018 (2.9585)	Entropy 1.37203 (1.37524)	Top-1 acc 53.906 (53.083)	Top-5 acc 71.875 (75.701)	lr 0.02029
Train [35][1030/3239]	Time 0.215 (0.596)	Data Time 0.001 (0.030)	Loss 2.9356 (2.9582)	Entropy 1.37201 (1.37521)	Top-1 acc 55.859 (53.083)	Top-5 acc 75.391 (75.709)	lr 0.02029
Train [35][1040/3239]	Time 0.215 (0.594)	Data Time 0.001 (0.030)	Loss 3.0767 (2.9584)	Entropy 1.37188 (1.37517)	Top-1 acc 52.344 (53.080)	Top-5 acc 74.609 (75.708)	lr 0.02028
Train [35][1050/3239]	Time 0.221 (0.593)	Data Time 0.001 (0.029)	Loss 2.9513 (2.9584)	Entropy 1.37176 (1.37514)	Top-1 acc 51.172 (53.083)	Top-5 acc 78.906 (75.710)	lr 0.02028
Train [35][1060/3239]	Time 0.214 (0.592)	Data Time 0.001 (0.029)	Loss 3.0143 (2.9591)	Entropy 1.37165 (1.37511)	Top-1 acc 49.219 (53.058)	Top-5 acc 74.609 (75.698)	lr 0.02028
Train [35][1070/3239]	Time 0.224 (0.590)	Data Time 0.001 (0.029)	Loss 2.9927 (2.9596)	Entropy 1.37159 (1.37508)	Top-1 acc 56.250 (53.047)	Top-5 acc 75.781 (75.689)	lr 0.02028
Train [35][1080/3239]	Time 0.308 (0.589)	Data Time 0.001 (0.029)	Loss 3.1893 (2.9594)	Entropy 1.37154 (1.37504)	Top-1 acc 45.703 (53.041)	Top-5 acc 72.656 (75.691)	lr 0.02028
Train [35][1090/3239]	Time 0.205 (0.588)	Data Time 0.001 (0.028)	Loss 2.9753 (2.9589)	Entropy 1.37150 (1.37501)	Top-1 acc 51.562 (53.044)	Top-5 acc 74.609 (75.709)	lr 0.02028
Train [35][1100/3239]	Time 0.323 (0.627)	Data Time 0.004 (0.028)	Loss 2.9491 (2.9593)	Entropy 1.37132 (1.37498)	Top-1 acc 56.250 (53.044)	Top-5 acc 77.734 (75.694)	lr 0.02028
Train [35][1110/3239]	Time 2.342 (0.625)	Data Time 0.002 (0.028)	Loss 2.8795 (2.9589)	Entropy 1.37132 (1.37495)	Top-1 acc 55.078 (53.049)	Top-5 acc 74.219 (75.696)	lr 0.02028
Train [35][1120/3239]	Time 0.236 (0.622)	Data Time 0.002 (0.028)	Loss 2.9094 (2.9594)	Entropy 1.37129 (1.37491)	Top-1 acc 51.172 (53.035)	Top-5 acc 78.125 (75.687)	lr 0.02028
Train [35][1130/3239]	Time 0.210 (0.620)	Data Time 0.002 (0.027)	Loss 2.9313 (2.9598)	Entropy 1.37123 (1.37488)	Top-1 acc 53.516 (53.029)	Top-5 acc 79.688 (75.683)	lr 0.02028
Train [35][1140/3239]	Time 0.205 (0.619)	Data Time 0.001 (0.027)	Loss 3.0982 (2.9601)	Entropy 1.37116 (1.37485)	Top-1 acc 52.344 (53.024)	Top-5 acc 73.828 (75.674)	lr 0.02028
Train [35][1150/3239]	Time 0.218 (0.618)	Data Time 0.001 (0.027)	Loss 2.8664 (2.9597)	Entropy 1.37118 (1.37482)	Top-1 acc 54.688 (53.035)	Top-5 acc 78.516 (75.682)	lr 0.02028
Train [35][1160/3239]	Time 0.228 (0.616)	Data Time 0.002 (0.027)	Loss 3.2274 (2.9597)	Entropy 1.37116 (1.37479)	Top-1 acc 48.438 (53.025)	Top-5 acc 68.750 (75.687)	lr 0.02028
Train [35][1170/3239]	Time 0.196 (0.615)	Data Time 0.001 (0.027)	Loss 2.8798 (2.9591)	Entropy 1.37108 (1.37475)	Top-1 acc 52.734 (53.036)	Top-5 acc 78.906 (75.697)	lr 0.02027
Train [35][1180/3239]	Time 0.222 (0.613)	Data Time 0.001 (0.026)	Loss 3.0884 (2.9595)	Entropy 1.37095 (1.37472)	Top-1 acc 50.000 (53.019)	Top-5 acc 74.219 (75.688)	lr 0.02027
Train [35][1190/3239]	Time 0.237 (0.612)	Data Time 0.001 (0.026)	Loss 3.0426 (2.9598)	Entropy 1.37093 (1.37469)	Top-1 acc 50.391 (53.013)	Top-5 acc 73.438 (75.682)	lr 0.02027
Train [35][1200/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.026)	Loss 2.8581 (2.9600)	Entropy 1.37083 (1.37466)	Top-1 acc 55.469 (52.996)	Top-5 acc 76.953 (75.670)	lr 0.02027
Train [35][1210/3239]	Time 0.232 (0.609)	Data Time 0.001 (0.026)	Loss 2.9197 (2.9603)	Entropy 1.37082 (1.37463)	Top-1 acc 53.516 (52.987)	Top-5 acc 78.125 (75.664)	lr 0.02027
Train [35][1220/3239]	Time 2.403 (0.608)	Data Time 0.001 (0.026)	Loss 2.7796 (2.9601)	Entropy 1.37082 (1.37460)	Top-1 acc 59.766 (52.992)	Top-5 acc 79.297 (75.662)	lr 0.02027
Train [35][1230/3239]	Time 0.250 (0.605)	Data Time 0.001 (0.025)	Loss 2.7965 (2.9604)	Entropy 1.37079 (1.37457)	Top-1 acc 57.422 (52.985)	Top-5 acc 77.734 (75.654)	lr 0.02027
Train [35][1240/3239]	Time 0.329 (0.604)	Data Time 0.001 (0.025)	Loss 3.0184 (2.9605)	Entropy 1.37075 (1.37454)	Top-1 acc 50.000 (52.976)	Top-5 acc 71.875 (75.657)	lr 0.02027
Train [35][1250/3239]	Time 0.226 (0.603)	Data Time 0.001 (0.025)	Loss 2.9567 (2.9609)	Entropy 1.37069 (1.37450)	Top-1 acc 50.781 (52.965)	Top-5 acc 75.000 (75.651)	lr 0.02027
Train [35][1260/3239]	Time 0.201 (0.601)	Data Time 0.001 (0.025)	Loss 3.0865 (2.9612)	Entropy 1.37068 (1.37447)	Top-1 acc 54.688 (52.961)	Top-5 acc 72.656 (75.643)	lr 0.02027
Train [35][1270/3239]	Time 0.234 (0.600)	Data Time 0.002 (0.025)	Loss 3.0909 (2.9612)	Entropy 1.37099 (1.37445)	Top-1 acc 53.125 (52.963)	Top-5 acc 71.484 (75.643)	lr 0.02027
Train [35][1280/3239]	Time 0.239 (0.599)	Data Time 0.001 (0.024)	Loss 3.0188 (2.9613)	Entropy 1.37092 (1.37442)	Top-1 acc 51.172 (52.959)	Top-5 acc 76.172 (75.646)	lr 0.02027
Train [35][1290/3239]	Time 0.226 (0.598)	Data Time 0.001 (0.024)	Loss 2.8758 (2.9615)	Entropy 1.37085 (1.37439)	Top-1 acc 54.688 (52.954)	Top-5 acc 76.172 (75.638)	lr 0.02026
Train [35][1300/3239]	Time 0.215 (0.597)	Data Time 0.001 (0.024)	Loss 3.0576 (2.9614)	Entropy 1.37080 (1.37436)	Top-1 acc 48.828 (52.964)	Top-5 acc 71.484 (75.635)	lr 0.02026
Train [35][1310/3239]	Time 0.212 (0.596)	Data Time 0.001 (0.024)	Loss 3.0095 (2.9619)	Entropy 1.37074 (1.37434)	Top-1 acc 53.125 (52.953)	Top-5 acc 74.219 (75.629)	lr 0.02026
Train [35][1320/3239]	Time 0.214 (0.595)	Data Time 0.001 (0.024)	Loss 3.1684 (2.9625)	Entropy 1.37073 (1.37431)	Top-1 acc 50.000 (52.943)	Top-5 acc 72.656 (75.618)	lr 0.02026
Train [35][1330/3239]	Time 2.360 (0.594)	Data Time 0.001 (0.024)	Loss 2.6950 (2.9623)	Entropy 1.37073 (1.37428)	Top-1 acc 57.031 (52.944)	Top-5 acc 80.859 (75.622)	lr 0.02026
Train [35][1340/3239]	Time 0.251 (0.591)	Data Time 0.002 (0.023)	Loss 2.8587 (2.9622)	Entropy 1.37071 (1.37426)	Top-1 acc 57.812 (52.946)	Top-5 acc 76.562 (75.625)	lr 0.02026
Train [35][1350/3239]	Time 0.310 (0.590)	Data Time 0.001 (0.023)	Loss 3.1441 (2.9623)	Entropy 1.37057 (1.37423)	Top-1 acc 53.125 (52.956)	Top-5 acc 72.656 (75.621)	lr 0.02026
Train [35][1360/3239]	Time 0.229 (0.589)	Data Time 0.001 (0.023)	Loss 3.0871 (2.9626)	Entropy 1.37055 (1.37420)	Top-1 acc 48.828 (52.946)	Top-5 acc 73.438 (75.618)	lr 0.02026
Train [35][1370/3239]	Time 0.274 (0.588)	Data Time 0.001 (0.023)	Loss 2.7133 (2.9624)	Entropy 1.37049 (1.37418)	Top-1 acc 60.156 (52.947)	Top-5 acc 82.031 (75.624)	lr 0.02026
Train [35][1380/3239]	Time 0.239 (0.587)	Data Time 0.001 (0.023)	Loss 2.8526 (2.9621)	Entropy 1.37044 (1.37415)	Top-1 acc 55.469 (52.956)	Top-5 acc 78.125 (75.631)	lr 0.02026
Train [35][1390/3239]	Time 0.225 (0.586)	Data Time 0.001 (0.023)	Loss 2.9340 (2.9621)	Entropy 1.37041 (1.37412)	Top-1 acc 57.422 (52.964)	Top-5 acc 76.953 (75.634)	lr 0.02026
Train [35][1400/3239]	Time 0.287 (0.585)	Data Time 0.001 (0.022)	Loss 2.9865 (2.9627)	Entropy 1.37037 (1.37409)	Top-1 acc 54.688 (52.961)	Top-5 acc 74.609 (75.621)	lr 0.02026
Train [35][1410/3239]	Time 0.238 (0.584)	Data Time 0.001 (0.022)	Loss 2.9915 (2.9628)	Entropy 1.37028 (1.37407)	Top-1 acc 57.031 (52.958)	Top-5 acc 73.047 (75.624)	lr 0.02026
Train [35][1420/3239]	Time 0.218 (0.583)	Data Time 0.001 (0.022)	Loss 2.8823 (2.9628)	Entropy 1.37026 (1.37404)	Top-1 acc 53.516 (52.955)	Top-5 acc 76.562 (75.624)	lr 0.02025
Train [35][1430/3239]	Time 0.262 (0.582)	Data Time 0.001 (0.022)	Loss 2.8500 (2.9635)	Entropy 1.37021 (1.37402)	Top-1 acc 54.688 (52.947)	Top-5 acc 79.688 (75.615)	lr 0.02025
Train [35][1440/3239]	Time 2.387 (0.581)	Data Time 0.001 (0.022)	Loss 3.0249 (2.9636)	Entropy 1.37021 (1.37399)	Top-1 acc 53.906 (52.942)	Top-5 acc 75.781 (75.621)	lr 0.02025
Train [35][1450/3239]	Time 0.293 (0.579)	Data Time 0.001 (0.022)	Loss 3.2153 (2.9638)	Entropy 1.37014 (1.37396)	Top-1 acc 44.922 (52.939)	Top-5 acc 69.531 (75.617)	lr 0.02025
Train [35][1460/3239]	Time 0.228 (0.578)	Data Time 0.001 (0.022)	Loss 2.8876 (2.9640)	Entropy 1.37009 (1.37394)	Top-1 acc 56.250 (52.929)	Top-5 acc 79.688 (75.611)	lr 0.02025
Train [35][1470/3239]	Time 0.384 (0.605)	Data Time 0.003 (0.021)	Loss 3.0485 (2.9641)	Entropy 1.37010 (1.37391)	Top-1 acc 49.609 (52.927)	Top-5 acc 73.438 (75.604)	lr 0.02025
Train [35][1480/3239]	Time 0.239 (0.605)	Data Time 0.002 (0.021)	Loss 2.9255 (2.9637)	Entropy 1.37009 (1.37388)	Top-1 acc 50.781 (52.936)	Top-5 acc 76.172 (75.610)	lr 0.02025
Train [35][1490/3239]	Time 0.224 (0.604)	Data Time 0.002 (0.021)	Loss 3.0705 (2.9640)	Entropy 1.37021 (1.37386)	Top-1 acc 53.906 (52.933)	Top-5 acc 71.875 (75.601)	lr 0.02025
Train [35][1500/3239]	Time 0.213 (0.603)	Data Time 0.002 (0.021)	Loss 2.8627 (2.9641)	Entropy 1.37012 (1.37383)	Top-1 acc 55.078 (52.929)	Top-5 acc 76.562 (75.593)	lr 0.02025
Train [35][1510/3239]	Time 0.219 (0.602)	Data Time 0.001 (0.021)	Loss 2.9453 (2.9642)	Entropy 1.37010 (1.37381)	Top-1 acc 52.734 (52.931)	Top-5 acc 76.953 (75.588)	lr 0.02025
Train [35][1520/3239]	Time 0.216 (0.601)	Data Time 0.001 (0.021)	Loss 2.9515 (2.9646)	Entropy 1.37002 (1.37379)	Top-1 acc 53.516 (52.930)	Top-5 acc 73.047 (75.584)	lr 0.02025
Train [35][1530/3239]	Time 0.232 (0.600)	Data Time 0.002 (0.021)	Loss 3.1694 (2.9649)	Entropy 1.36998 (1.37376)	Top-1 acc 46.484 (52.918)	Top-5 acc 72.266 (75.577)	lr 0.02025
Train [35][1540/3239]	Time 0.227 (0.599)	Data Time 0.001 (0.021)	Loss 2.9753 (2.9650)	Entropy 1.36988 (1.37374)	Top-1 acc 51.562 (52.913)	Top-5 acc 75.391 (75.574)	lr 0.02025
Train [35][1550/3239]	Time 2.515 (0.598)	Data Time 0.001 (0.020)	Loss 3.1278 (2.9648)	Entropy 1.36988 (1.37371)	Top-1 acc 46.484 (52.916)	Top-5 acc 72.656 (75.579)	lr 0.02024
Train [35][1560/3239]	Time 0.320 (0.596)	Data Time 0.001 (0.020)	Loss 2.9853 (2.9653)	Entropy 1.36984 (1.37369)	Top-1 acc 50.391 (52.907)	Top-5 acc 76.172 (75.567)	lr 0.02024
Train [35][1570/3239]	Time 0.213 (0.595)	Data Time 0.001 (0.020)	Loss 2.8934 (2.9652)	Entropy 1.36981 (1.37366)	Top-1 acc 56.641 (52.908)	Top-5 acc 75.781 (75.568)	lr 0.02024
Train [35][1580/3239]	Time 0.233 (0.594)	Data Time 0.001 (0.020)	Loss 3.1180 (2.9654)	Entropy 1.36978 (1.37364)	Top-1 acc 50.781 (52.901)	Top-5 acc 73.828 (75.564)	lr 0.02024
Train [35][1590/3239]	Time 0.221 (0.593)	Data Time 0.001 (0.020)	Loss 2.7162 (2.9653)	Entropy 1.36975 (1.37361)	Top-1 acc 59.766 (52.897)	Top-5 acc 79.688 (75.567)	lr 0.02024
Train [35][1600/3239]	Time 0.222 (0.592)	Data Time 0.001 (0.020)	Loss 2.8578 (2.9652)	Entropy 1.36974 (1.37359)	Top-1 acc 56.250 (52.898)	Top-5 acc 78.125 (75.571)	lr 0.02024
Train [35][1610/3239]	Time 0.310 (0.591)	Data Time 0.001 (0.020)	Loss 3.1212 (2.9651)	Entropy 1.36970 (1.37356)	Top-1 acc 50.781 (52.899)	Top-5 acc 72.266 (75.572)	lr 0.02024
Train [35][1620/3239]	Time 0.203 (0.591)	Data Time 0.001 (0.020)	Loss 2.9551 (2.9649)	Entropy 1.36963 (1.37354)	Top-1 acc 54.297 (52.894)	Top-5 acc 76.562 (75.582)	lr 0.02024
Train [35][1630/3239]	Time 0.239 (0.590)	Data Time 0.001 (0.020)	Loss 2.9232 (2.9648)	Entropy 1.36964 (1.37352)	Top-1 acc 51.172 (52.896)	Top-5 acc 75.781 (75.578)	lr 0.02024
Train [35][1640/3239]	Time 0.262 (0.589)	Data Time 0.001 (0.019)	Loss 2.7749 (2.9645)	Entropy 1.36949 (1.37349)	Top-1 acc 58.203 (52.906)	Top-5 acc 80.469 (75.582)	lr 0.02024
Train [35][1650/3239]	Time 0.235 (0.588)	Data Time 0.001 (0.019)	Loss 3.1038 (2.9650)	Entropy 1.36942 (1.37347)	Top-1 acc 47.656 (52.900)	Top-5 acc 70.703 (75.574)	lr 0.02024
Train [35][1660/3239]	Time 2.570 (0.587)	Data Time 0.002 (0.019)	Loss 2.9114 (2.9652)	Entropy 1.36942 (1.37344)	Top-1 acc 52.734 (52.891)	Top-5 acc 82.031 (75.572)	lr 0.02024
Train [35][1670/3239]	Time 0.254 (0.585)	Data Time 0.001 (0.019)	Loss 2.9511 (2.9650)	Entropy 1.36934 (1.37342)	Top-1 acc 50.391 (52.895)	Top-5 acc 77.734 (75.576)	lr 0.02023
Train [35][1680/3239]	Time 0.217 (0.585)	Data Time 0.001 (0.019)	Loss 2.9542 (2.9653)	Entropy 1.36930 (1.37339)	Top-1 acc 52.734 (52.891)	Top-5 acc 75.000 (75.572)	lr 0.02023
Train [35][1690/3239]	Time 0.221 (0.584)	Data Time 0.002 (0.019)	Loss 2.8916 (2.9652)	Entropy 1.36923 (1.37337)	Top-1 acc 57.812 (52.898)	Top-5 acc 77.344 (75.572)	lr 0.02023
Train [35][1700/3239]	Time 0.224 (0.583)	Data Time 0.001 (0.019)	Loss 3.1245 (2.9653)	Entropy 1.36926 (1.37335)	Top-1 acc 48.438 (52.891)	Top-5 acc 75.000 (75.570)	lr 0.02023
Train [35][1710/3239]	Time 0.248 (0.582)	Data Time 0.001 (0.019)	Loss 2.8336 (2.9654)	Entropy 1.36923 (1.37332)	Top-1 acc 55.859 (52.890)	Top-5 acc 78.906 (75.573)	lr 0.02023
Train [35][1720/3239]	Time 0.300 (0.581)	Data Time 0.001 (0.019)	Loss 2.9908 (2.9656)	Entropy 1.36913 (1.37330)	Top-1 acc 51.562 (52.889)	Top-5 acc 75.391 (75.567)	lr 0.02023
Train [35][1730/3239]	Time 0.229 (0.581)	Data Time 0.002 (0.019)	Loss 2.8864 (2.9656)	Entropy 1.36901 (1.37327)	Top-1 acc 53.516 (52.881)	Top-5 acc 79.688 (75.568)	lr 0.02023
Train [35][1740/3239]	Time 0.221 (0.580)	Data Time 0.001 (0.018)	Loss 3.0007 (2.9654)	Entropy 1.36894 (1.37325)	Top-1 acc 50.000 (52.885)	Top-5 acc 74.609 (75.570)	lr 0.02023
Train [35][1750/3239]	Time 0.226 (0.579)	Data Time 0.001 (0.018)	Loss 3.0571 (2.9653)	Entropy 1.36888 (1.37322)	Top-1 acc 51.953 (52.890)	Top-5 acc 75.000 (75.574)	lr 0.02023
Train [35][1760/3239]	Time 0.218 (0.578)	Data Time 0.001 (0.018)	Loss 2.9792 (2.9654)	Entropy 1.36886 (1.37320)	Top-1 acc 56.250 (52.888)	Top-5 acc 74.609 (75.568)	lr 0.02023
Train [35][1770/3239]	Time 2.573 (0.578)	Data Time 0.001 (0.018)	Loss 2.9460 (2.9656)	Entropy 1.36886 (1.37318)	Top-1 acc 50.781 (52.886)	Top-5 acc 75.781 (75.562)	lr 0.02023
Train [35][1780/3239]	Time 0.212 (0.576)	Data Time 0.001 (0.018)	Loss 2.7510 (2.9656)	Entropy 1.36878 (1.37315)	Top-1 acc 59.375 (52.887)	Top-5 acc 79.297 (75.561)	lr 0.02023
Train [35][1790/3239]	Time 0.206 (0.575)	Data Time 0.001 (0.018)	Loss 2.9965 (2.9659)	Entropy 1.36877 (1.37313)	Top-1 acc 55.078 (52.886)	Top-5 acc 75.781 (75.556)	lr 0.02023
Train [35][1800/3239]	Time 0.232 (0.574)	Data Time 0.002 (0.018)	Loss 3.0322 (2.9658)	Entropy 1.36871 (1.37310)	Top-1 acc 52.734 (52.893)	Top-5 acc 71.875 (75.553)	lr 0.02022
Train [35][1810/3239]	Time 0.222 (0.574)	Data Time 0.001 (0.018)	Loss 2.8218 (2.9657)	Entropy 1.36873 (1.37308)	Top-1 acc 52.344 (52.893)	Top-5 acc 79.297 (75.553)	lr 0.02022
Train [35][1820/3239]	Time 0.222 (0.573)	Data Time 0.001 (0.018)	Loss 2.8315 (2.9656)	Entropy 1.36869 (1.37305)	Top-1 acc 55.859 (52.897)	Top-5 acc 78.906 (75.553)	lr 0.02022
Train [35][1830/3239]	Time 0.247 (0.597)	Data Time 0.002 (0.018)	Loss 3.0370 (2.9657)	Entropy 1.36870 (1.37303)	Top-1 acc 50.000 (52.891)	Top-5 acc 72.266 (75.551)	lr 0.02022
Train [35][1840/3239]	Time 0.219 (0.596)	Data Time 0.002 (0.017)	Loss 3.0339 (2.9658)	Entropy 1.36861 (1.37301)	Top-1 acc 50.000 (52.888)	Top-5 acc 73.438 (75.551)	lr 0.02022
Train [35][1850/3239]	Time 0.230 (0.596)	Data Time 0.001 (0.017)	Loss 2.8921 (2.9659)	Entropy 1.36853 (1.37298)	Top-1 acc 53.516 (52.889)	Top-5 acc 79.688 (75.549)	lr 0.02022
Train [35][1860/3239]	Time 0.164 (0.595)	Data Time 0.001 (0.017)	Loss 2.9237 (2.9658)	Entropy 1.36838 (1.37296)	Top-1 acc 55.859 (52.887)	Top-5 acc 77.344 (75.550)	lr 0.02022
Train [35][1870/3239]	Time 0.236 (0.594)	Data Time 0.002 (0.017)	Loss 2.8687 (2.9657)	Entropy 1.36835 (1.37293)	Top-1 acc 53.125 (52.883)	Top-5 acc 76.172 (75.552)	lr 0.02022
Train [35][1880/3239]	Time 2.393 (0.594)	Data Time 0.001 (0.017)	Loss 2.9614 (2.9657)	Entropy 1.36835 (1.37291)	Top-1 acc 51.172 (52.879)	Top-5 acc 76.562 (75.553)	lr 0.02022
Train [35][1890/3239]	Time 0.211 (0.592)	Data Time 0.001 (0.017)	Loss 2.9560 (2.9656)	Entropy 1.36818 (1.37288)	Top-1 acc 51.562 (52.881)	Top-5 acc 74.219 (75.552)	lr 0.02022
Train [35][1900/3239]	Time 0.216 (0.591)	Data Time 0.001 (0.017)	Loss 3.0321 (2.9654)	Entropy 1.36816 (1.37286)	Top-1 acc 51.172 (52.883)	Top-5 acc 74.609 (75.559)	lr 0.02022
Train [35][1910/3239]	Time 0.224 (0.590)	Data Time 0.001 (0.017)	Loss 2.9613 (2.9654)	Entropy 1.36813 (1.37283)	Top-1 acc 55.469 (52.886)	Top-5 acc 74.219 (75.561)	lr 0.02022
Train [35][1920/3239]	Time 0.230 (0.590)	Data Time 0.001 (0.017)	Loss 2.9096 (2.9652)	Entropy 1.36797 (1.37281)	Top-1 acc 56.250 (52.890)	Top-5 acc 75.391 (75.573)	lr 0.02021
Train [35][1930/3239]	Time 0.312 (0.589)	Data Time 0.002 (0.017)	Loss 2.8185 (2.9650)	Entropy 1.36789 (1.37278)	Top-1 acc 58.203 (52.896)	Top-5 acc 78.516 (75.574)	lr 0.02021
Train [35][1940/3239]	Time 0.253 (0.588)	Data Time 0.001 (0.017)	Loss 3.0244 (2.9652)	Entropy 1.36776 (1.37276)	Top-1 acc 55.469 (52.893)	Top-5 acc 75.000 (75.568)	lr 0.02021
Train [35][1950/3239]	Time 0.203 (0.588)	Data Time 0.001 (0.017)	Loss 2.9253 (2.9652)	Entropy 1.36774 (1.37273)	Top-1 acc 54.297 (52.892)	Top-5 acc 75.781 (75.566)	lr 0.02021
Train [35][1960/3239]	Time 0.235 (0.587)	Data Time 0.002 (0.017)	Loss 2.8006 (2.9651)	Entropy 1.36770 (1.37271)	Top-1 acc 56.641 (52.894)	Top-5 acc 78.125 (75.569)	lr 0.02021
Train [35][1970/3239]	Time 0.194 (0.586)	Data Time 0.001 (0.016)	Loss 2.9940 (2.9651)	Entropy 1.36769 (1.37268)	Top-1 acc 52.344 (52.896)	Top-5 acc 77.734 (75.571)	lr 0.02021
Train [35][1980/3239]	Time 0.208 (0.585)	Data Time 0.001 (0.016)	Loss 3.0016 (2.9652)	Entropy 1.36762 (1.37266)	Top-1 acc 54.297 (52.894)	Top-5 acc 74.219 (75.569)	lr 0.02021
Train [35][1990/3239]	Time 2.376 (0.585)	Data Time 0.001 (0.016)	Loss 3.1431 (2.9649)	Entropy 1.36762 (1.37263)	Top-1 acc 50.000 (52.899)	Top-5 acc 73.828 (75.578)	lr 0.02021
Train [35][2000/3239]	Time 0.203 (0.583)	Data Time 0.001 (0.016)	Loss 3.2403 (2.9650)	Entropy 1.36754 (1.37261)	Top-1 acc 49.219 (52.896)	Top-5 acc 69.922 (75.577)	lr 0.02021
Train [35][2010/3239]	Time 0.219 (0.582)	Data Time 0.001 (0.016)	Loss 3.0089 (2.9659)	Entropy 1.36746 (1.37258)	Top-1 acc 51.953 (52.877)	Top-5 acc 76.172 (75.564)	lr 0.02021
Train [35][2020/3239]	Time 0.221 (0.582)	Data Time 0.001 (0.016)	Loss 2.7780 (2.9659)	Entropy 1.36744 (1.37256)	Top-1 acc 58.984 (52.874)	Top-5 acc 75.781 (75.564)	lr 0.02021
Train [35][2030/3239]	Time 0.274 (0.581)	Data Time 0.001 (0.016)	Loss 2.9660 (2.9659)	Entropy 1.36739 (1.37253)	Top-1 acc 57.031 (52.873)	Top-5 acc 76.172 (75.563)	lr 0.02021
Train [35][2040/3239]	Time 0.314 (0.580)	Data Time 0.001 (0.016)	Loss 2.8135 (2.9657)	Entropy 1.36734 (1.37250)	Top-1 acc 54.688 (52.873)	Top-5 acc 78.906 (75.566)	lr 0.02021
Train [35][2050/3239]	Time 0.219 (0.580)	Data Time 0.001 (0.016)	Loss 2.8924 (2.9656)	Entropy 1.36725 (1.37248)	Top-1 acc 57.031 (52.871)	Top-5 acc 78.125 (75.567)	lr 0.02020
Train [35][2060/3239]	Time 0.210 (0.579)	Data Time 0.001 (0.016)	Loss 2.8990 (2.9653)	Entropy 1.36711 (1.37245)	Top-1 acc 54.688 (52.879)	Top-5 acc 75.781 (75.571)	lr 0.02020
Train [35][2070/3239]	Time 0.217 (0.579)	Data Time 0.001 (0.016)	Loss 2.9438 (2.9654)	Entropy 1.36701 (1.37243)	Top-1 acc 55.469 (52.879)	Top-5 acc 78.516 (75.573)	lr 0.02020
Train [35][2080/3239]	Time 0.217 (0.578)	Data Time 0.001 (0.016)	Loss 2.9284 (2.9656)	Entropy 1.36695 (1.37240)	Top-1 acc 56.250 (52.876)	Top-5 acc 75.781 (75.570)	lr 0.02020
Train [35][2090/3239]	Time 0.368 (0.577)	Data Time 0.001 (0.016)	Loss 2.9788 (2.9655)	Entropy 1.36692 (1.37238)	Top-1 acc 54.688 (52.883)	Top-5 acc 76.953 (75.572)	lr 0.02020
Train [35][2100/3239]	Time 2.392 (0.577)	Data Time 0.001 (0.016)	Loss 2.7955 (2.9656)	Entropy 1.36692 (1.37235)	Top-1 acc 58.984 (52.886)	Top-5 acc 79.297 (75.567)	lr 0.02020
Train [35][2110/3239]	Time 0.194 (0.575)	Data Time 0.001 (0.015)	Loss 3.1619 (2.9656)	Entropy 1.36682 (1.37232)	Top-1 acc 50.000 (52.888)	Top-5 acc 70.312 (75.566)	lr 0.02020
Train [35][2120/3239]	Time 0.213 (0.575)	Data Time 0.001 (0.015)	Loss 3.0915 (2.9665)	Entropy 1.36683 (1.37230)	Top-1 acc 51.562 (52.868)	Top-5 acc 71.484 (75.546)	lr 0.02020
Train [35][2130/3239]	Time 0.213 (0.574)	Data Time 0.001 (0.015)	Loss 3.0069 (2.9670)	Entropy 1.36675 (1.37227)	Top-1 acc 52.734 (52.859)	Top-5 acc 75.781 (75.539)	lr 0.02020
Train [35][2140/3239]	Time 0.331 (0.573)	Data Time 0.001 (0.015)	Loss 3.0137 (2.9669)	Entropy 1.36676 (1.37225)	Top-1 acc 48.047 (52.862)	Top-5 acc 75.000 (75.538)	lr 0.02020
Train [35][2150/3239]	Time 0.206 (0.573)	Data Time 0.001 (0.015)	Loss 2.9396 (2.9669)	Entropy 1.36671 (1.37222)	Top-1 acc 53.906 (52.862)	Top-5 acc 78.125 (75.538)	lr 0.02020
Train [35][2160/3239]	Time 0.224 (0.572)	Data Time 0.001 (0.015)	Loss 2.8234 (2.9668)	Entropy 1.36664 (1.37219)	Top-1 acc 55.859 (52.866)	Top-5 acc 81.250 (75.544)	lr 0.02020
Train [35][2170/3239]	Time 0.222 (0.572)	Data Time 0.001 (0.015)	Loss 2.8114 (2.9666)	Entropy 1.36664 (1.37217)	Top-1 acc 55.078 (52.873)	Top-5 acc 78.906 (75.546)	lr 0.02019
Train [35][2180/3239]	Time 0.222 (0.571)	Data Time 0.001 (0.015)	Loss 3.1083 (2.9666)	Entropy 1.36661 (1.37214)	Top-1 acc 49.609 (52.871)	Top-5 acc 70.703 (75.542)	lr 0.02019
Train [35][2190/3239]	Time 0.215 (0.590)	Data Time 0.002 (0.015)	Loss 3.1573 (2.9666)	Entropy 1.36657 (1.37212)	Top-1 acc 48.438 (52.871)	Top-5 acc 71.484 (75.546)	lr 0.02019
Train [35][2200/3239]	Time 0.249 (0.590)	Data Time 0.002 (0.015)	Loss 2.9650 (2.9667)	Entropy 1.36644 (1.37209)	Top-1 acc 50.000 (52.865)	Top-5 acc 75.781 (75.542)	lr 0.02019
Train [35][2210/3239]	Time 2.466 (0.589)	Data Time 0.002 (0.015)	Loss 2.9972 (2.9666)	Entropy 1.36644 (1.37207)	Top-1 acc 52.734 (52.866)	Top-5 acc 75.000 (75.542)	lr 0.02019
Train [35][2220/3239]	Time 0.227 (0.587)	Data Time 0.001 (0.015)	Loss 2.7909 (2.9665)	Entropy 1.36640 (1.37204)	Top-1 acc 55.469 (52.864)	Top-5 acc 80.078 (75.544)	lr 0.02019
Train [35][2230/3239]	Time 0.229 (0.587)	Data Time 0.001 (0.015)	Loss 2.8762 (2.9666)	Entropy 1.36631 (1.37202)	Top-1 acc 53.906 (52.860)	Top-5 acc 76.562 (75.541)	lr 0.02019
Train [35][2240/3239]	Time 0.233 (0.586)	Data Time 0.001 (0.015)	Loss 2.8961 (2.9663)	Entropy 1.36625 (1.37199)	Top-1 acc 56.250 (52.870)	Top-5 acc 78.125 (75.549)	lr 0.02019
Train [35][2250/3239]	Time 0.206 (0.586)	Data Time 0.001 (0.015)	Loss 3.0275 (2.9664)	Entropy 1.36617 (1.37196)	Top-1 acc 53.516 (52.861)	Top-5 acc 74.219 (75.548)	lr 0.02019
Train [35][2260/3239]	Time 0.225 (0.585)	Data Time 0.001 (0.015)	Loss 2.8830 (2.9662)	Entropy 1.36613 (1.37194)	Top-1 acc 50.391 (52.859)	Top-5 acc 77.344 (75.553)	lr 0.02019
Train [35][2270/3239]	Time 0.210 (0.585)	Data Time 0.001 (0.015)	Loss 3.1248 (2.9666)	Entropy 1.36607 (1.37191)	Top-1 acc 45.312 (52.848)	Top-5 acc 69.531 (75.543)	lr 0.02019
Train [35][2280/3239]	Time 0.255 (0.584)	Data Time 0.001 (0.014)	Loss 3.1529 (2.9665)	Entropy 1.36581 (1.37189)	Top-1 acc 48.438 (52.851)	Top-5 acc 72.656 (75.545)	lr 0.02019
Train [35][2290/3239]	Time 0.210 (0.583)	Data Time 0.002 (0.014)	Loss 2.8762 (2.9664)	Entropy 1.36576 (1.37186)	Top-1 acc 57.812 (52.852)	Top-5 acc 75.391 (75.546)	lr 0.02019
Train [35][2300/3239]	Time 0.311 (0.583)	Data Time 0.001 (0.014)	Loss 3.2141 (2.9664)	Entropy 1.36569 (1.37183)	Top-1 acc 46.484 (52.852)	Top-5 acc 67.578 (75.547)	lr 0.02018
Train [35][2310/3239]	Time 0.224 (0.582)	Data Time 0.001 (0.014)	Loss 3.1372 (2.9665)	Entropy 1.36569 (1.37181)	Top-1 acc 51.172 (52.846)	Top-5 acc 71.875 (75.544)	lr 0.02018
Train [35][2320/3239]	Time 2.413 (0.582)	Data Time 0.002 (0.014)	Loss 2.9673 (2.9666)	Entropy 1.36569 (1.37178)	Top-1 acc 53.906 (52.843)	Top-5 acc 74.219 (75.541)	lr 0.02018
Train [35][2330/3239]	Time 0.270 (0.580)	Data Time 0.002 (0.014)	Loss 3.1780 (2.9667)	Entropy 1.36552 (1.37175)	Top-1 acc 50.781 (52.841)	Top-5 acc 69.531 (75.538)	lr 0.02018
Train [35][2340/3239]	Time 0.223 (0.580)	Data Time 0.001 (0.014)	Loss 3.0800 (2.9668)	Entropy 1.36544 (1.37173)	Top-1 acc 52.344 (52.842)	Top-5 acc 72.656 (75.537)	lr 0.02018
Train [35][2350/3239]	Time 0.320 (0.580)	Data Time 0.001 (0.014)	Loss 3.1924 (2.9670)	Entropy 1.36539 (1.37170)	Top-1 acc 50.391 (52.837)	Top-5 acc 71.484 (75.536)	lr 0.02018
Train [35][2360/3239]	Time 0.215 (0.579)	Data Time 0.001 (0.014)	Loss 2.7765 (2.9669)	Entropy 1.36530 (1.37167)	Top-1 acc 55.078 (52.838)	Top-5 acc 78.906 (75.541)	lr 0.02018
Train [35][2370/3239]	Time 0.267 (0.579)	Data Time 0.001 (0.014)	Loss 2.8640 (2.9668)	Entropy 1.36524 (1.37165)	Top-1 acc 52.344 (52.837)	Top-5 acc 76.953 (75.541)	lr 0.02018
Train [35][2380/3239]	Time 0.206 (0.578)	Data Time 0.001 (0.014)	Loss 3.2941 (2.9671)	Entropy 1.36518 (1.37162)	Top-1 acc 44.922 (52.830)	Top-5 acc 67.578 (75.538)	lr 0.02018
Train [35][2390/3239]	Time 0.206 (0.577)	Data Time 0.001 (0.014)	Loss 2.9515 (2.9669)	Entropy 1.36513 (1.37159)	Top-1 acc 52.734 (52.834)	Top-5 acc 75.391 (75.541)	lr 0.02018
Train [35][2400/3239]	Time 0.241 (0.577)	Data Time 0.001 (0.014)	Loss 3.0370 (2.9671)	Entropy 1.36511 (1.37156)	Top-1 acc 51.953 (52.826)	Top-5 acc 73.828 (75.533)	lr 0.02018
Train [35][2410/3239]	Time 0.266 (0.577)	Data Time 0.001 (0.014)	Loss 2.8748 (2.9670)	Entropy 1.36507 (1.37154)	Top-1 acc 55.469 (52.827)	Top-5 acc 75.000 (75.534)	lr 0.02018
Train [35][2420/3239]	Time 0.241 (0.576)	Data Time 0.001 (0.014)	Loss 2.9140 (2.9670)	Entropy 1.36507 (1.37151)	Top-1 acc 52.734 (52.821)	Top-5 acc 76.953 (75.533)	lr 0.02018
Train [35][2430/3239]	Time 2.477 (0.575)	Data Time 0.001 (0.014)	Loss 2.8406 (2.9669)	Entropy 1.36507 (1.37148)	Top-1 acc 57.031 (52.822)	Top-5 acc 79.297 (75.534)	lr 0.02017
Train [35][2440/3239]	Time 0.224 (0.574)	Data Time 0.001 (0.014)	Loss 2.9978 (2.9670)	Entropy 1.36504 (1.37146)	Top-1 acc 50.391 (52.823)	Top-5 acc 74.609 (75.535)	lr 0.02017
Train [35][2450/3239]	Time 0.324 (0.574)	Data Time 0.001 (0.014)	Loss 2.9543 (2.9671)	Entropy 1.36502 (1.37143)	Top-1 acc 52.734 (52.819)	Top-5 acc 74.219 (75.531)	lr 0.02017
Train [35][2460/3239]	Time 0.197 (0.573)	Data Time 0.001 (0.014)	Loss 2.9133 (2.9670)	Entropy 1.36498 (1.37141)	Top-1 acc 50.781 (52.820)	Top-5 acc 75.391 (75.532)	lr 0.02017
Train [35][2470/3239]	Time 0.264 (0.573)	Data Time 0.001 (0.013)	Loss 3.0095 (2.9670)	Entropy 1.36497 (1.37138)	Top-1 acc 53.125 (52.828)	Top-5 acc 72.656 (75.533)	lr 0.02017
Train [35][2480/3239]	Time 0.238 (0.572)	Data Time 0.001 (0.013)	Loss 2.9198 (2.9668)	Entropy 1.36490 (1.37135)	Top-1 acc 58.203 (52.834)	Top-5 acc 78.125 (75.534)	lr 0.02017
Train [35][2490/3239]	Time 0.216 (0.572)	Data Time 0.001 (0.013)	Loss 3.1605 (2.9670)	Entropy 1.36483 (1.37133)	Top-1 acc 47.266 (52.829)	Top-5 acc 71.484 (75.532)	lr 0.02017
Train [35][2500/3239]	Time 0.247 (0.571)	Data Time 0.001 (0.013)	Loss 2.8775 (2.9671)	Entropy 1.36472 (1.37130)	Top-1 acc 55.469 (52.828)	Top-5 acc 77.734 (75.530)	lr 0.02017
Train [35][2510/3239]	Time 0.210 (0.571)	Data Time 0.001 (0.013)	Loss 3.0880 (2.9672)	Entropy 1.36471 (1.37128)	Top-1 acc 53.906 (52.829)	Top-5 acc 71.875 (75.525)	lr 0.02017
Train [35][2520/3239]	Time 0.262 (0.570)	Data Time 0.001 (0.013)	Loss 3.0543 (2.9671)	Entropy 1.36462 (1.37125)	Top-1 acc 50.391 (52.830)	Top-5 acc 75.781 (75.527)	lr 0.02017
Train [35][2530/3239]	Time 0.209 (0.570)	Data Time 0.001 (0.013)	Loss 2.8935 (2.9670)	Entropy 1.36452 (1.37122)	Top-1 acc 54.297 (52.831)	Top-5 acc 76.562 (75.528)	lr 0.02017
Train [35][2540/3239]	Time 2.330 (0.570)	Data Time 0.002 (0.013)	Loss 2.7945 (2.9668)	Entropy 1.36452 (1.37120)	Top-1 acc 57.031 (52.836)	Top-5 acc 78.125 (75.535)	lr 0.02017
Train [35][2550/3239]	Time 0.223 (0.568)	Data Time 0.001 (0.013)	Loss 2.7788 (2.9663)	Entropy 1.36452 (1.37117)	Top-1 acc 60.938 (52.847)	Top-5 acc 80.078 (75.545)	lr 0.02016
Train [35][2560/3239]	Time 0.360 (0.585)	Data Time 0.002 (0.013)	Loss 2.7441 (2.9661)	Entropy 1.36449 (1.37114)	Top-1 acc 60.547 (52.854)	Top-5 acc 78.906 (75.552)	lr 0.02016
Train [35][2570/3239]	Time 0.215 (0.584)	Data Time 0.002 (0.013)	Loss 2.9486 (2.9658)	Entropy 1.36441 (1.37112)	Top-1 acc 51.953 (52.858)	Top-5 acc 74.609 (75.560)	lr 0.02016
Train [35][2580/3239]	Time 0.230 (0.584)	Data Time 0.001 (0.013)	Loss 3.0439 (2.9658)	Entropy 1.36437 (1.37109)	Top-1 acc 49.219 (52.856)	Top-5 acc 74.219 (75.557)	lr 0.02016
Train [35][2590/3239]	Time 0.232 (0.583)	Data Time 0.002 (0.013)	Loss 2.8736 (2.9658)	Entropy 1.36436 (1.37107)	Top-1 acc 56.641 (52.855)	Top-5 acc 77.344 (75.557)	lr 0.02016
Train [35][2600/3239]	Time 0.204 (0.583)	Data Time 0.001 (0.013)	Loss 2.9255 (2.9658)	Entropy 1.36424 (1.37104)	Top-1 acc 54.688 (52.855)	Top-5 acc 76.172 (75.555)	lr 0.02016
Train [35][2610/3239]	Time 0.242 (0.582)	Data Time 0.001 (0.013)	Loss 2.9460 (2.9657)	Entropy 1.36413 (1.37101)	Top-1 acc 55.469 (52.861)	Top-5 acc 76.172 (75.554)	lr 0.02016
Train [35][2620/3239]	Time 0.167 (0.581)	Data Time 0.001 (0.013)	Loss 3.0445 (2.9658)	Entropy 1.36404 (1.37099)	Top-1 acc 50.781 (52.859)	Top-5 acc 75.000 (75.554)	lr 0.02016
Train [35][2630/3239]	Time 0.239 (0.581)	Data Time 0.002 (0.013)	Loss 2.7735 (2.9656)	Entropy 1.36397 (1.37096)	Top-1 acc 56.641 (52.861)	Top-5 acc 76.172 (75.555)	lr 0.02016
Train [35][2640/3239]	Time 0.236 (0.581)	Data Time 0.001 (0.013)	Loss 3.0566 (2.9656)	Entropy 1.36396 (1.37094)	Top-1 acc 51.562 (52.855)	Top-5 acc 75.391 (75.559)	lr 0.02016
Train [35][2650/3239]	Time 0.249 (0.580)	Data Time 0.001 (0.013)	Loss 3.0326 (2.9657)	Entropy 1.36380 (1.37091)	Top-1 acc 52.734 (52.852)	Top-5 acc 74.609 (75.557)	lr 0.02016
Train [35][2660/3239]	Time 0.223 (0.580)	Data Time 0.002 (0.013)	Loss 2.9679 (2.9657)	Entropy 1.36374 (1.37088)	Top-1 acc 51.172 (52.847)	Top-5 acc 73.438 (75.556)	lr 0.02016
Train [35][2670/3239]	Time 0.305 (0.579)	Data Time 0.001 (0.013)	Loss 3.2058 (2.9659)	Entropy 1.36372 (1.37086)	Top-1 acc 48.438 (52.841)	Top-5 acc 70.312 (75.551)	lr 0.02016
Train [35][2680/3239]	Time 0.267 (0.579)	Data Time 0.001 (0.013)	Loss 2.8310 (2.9656)	Entropy 1.36367 (1.37083)	Top-1 acc 53.906 (52.844)	Top-5 acc 80.469 (75.559)	lr 0.02015
Train [35][2690/3239]	Time 0.221 (0.578)	Data Time 0.001 (0.013)	Loss 2.9490 (2.9654)	Entropy 1.36367 (1.37080)	Top-1 acc 53.125 (52.848)	Top-5 acc 78.125 (75.562)	lr 0.02015
Train [35][2700/3239]	Time 0.215 (0.578)	Data Time 0.002 (0.012)	Loss 2.9677 (2.9656)	Entropy 1.36364 (1.37078)	Top-1 acc 53.516 (52.843)	Top-5 acc 74.609 (75.561)	lr 0.02015
Train [35][2710/3239]	Time 0.232 (0.577)	Data Time 0.001 (0.012)	Loss 2.9560 (2.9657)	Entropy 1.36362 (1.37075)	Top-1 acc 54.297 (52.841)	Top-5 acc 73.828 (75.557)	lr 0.02015
Train [35][2720/3239]	Time 0.334 (0.577)	Data Time 0.001 (0.012)	Loss 2.8588 (2.9657)	Entropy 1.36358 (1.37072)	Top-1 acc 55.859 (52.841)	Top-5 acc 75.391 (75.555)	lr 0.02015
Train [35][2730/3239]	Time 0.244 (0.576)	Data Time 0.001 (0.012)	Loss 3.0895 (2.9657)	Entropy 1.36355 (1.37070)	Top-1 acc 48.828 (52.840)	Top-5 acc 75.000 (75.553)	lr 0.02015
Train [35][2740/3239]	Time 0.218 (0.576)	Data Time 0.001 (0.012)	Loss 3.0047 (2.9660)	Entropy 1.36353 (1.37067)	Top-1 acc 51.953 (52.836)	Top-5 acc 73.828 (75.549)	lr 0.02015
Train [35][2750/3239]	Time 0.247 (0.576)	Data Time 0.001 (0.012)	Loss 2.9112 (2.9661)	Entropy 1.36343 (1.37064)	Top-1 acc 55.469 (52.832)	Top-5 acc 78.516 (75.548)	lr 0.02015
Train [35][2760/3239]	Time 0.243 (0.575)	Data Time 0.001 (0.012)	Loss 2.8516 (2.9663)	Entropy 1.36325 (1.37062)	Top-1 acc 54.297 (52.828)	Top-5 acc 77.344 (75.542)	lr 0.02015
Train [35][2770/3239]	Time 0.227 (0.575)	Data Time 0.001 (0.012)	Loss 3.0445 (2.9667)	Entropy 1.36318 (1.37059)	Top-1 acc 51.953 (52.820)	Top-5 acc 73.828 (75.537)	lr 0.02015
Train [35][2780/3239]	Time 0.385 (0.574)	Data Time 0.001 (0.012)	Loss 2.8219 (2.9666)	Entropy 1.36316 (1.37057)	Top-1 acc 56.641 (52.824)	Top-5 acc 78.516 (75.541)	lr 0.02015
Train [35][2790/3239]	Time 0.182 (0.574)	Data Time 0.001 (0.012)	Loss 3.1826 (2.9667)	Entropy 1.36308 (1.37054)	Top-1 acc 50.000 (52.824)	Top-5 acc 69.922 (75.539)	lr 0.02015
Train [35][2800/3239]	Time 0.210 (0.573)	Data Time 0.001 (0.012)	Loss 2.7643 (2.9666)	Entropy 1.36299 (1.37051)	Top-1 acc 62.109 (52.830)	Top-5 acc 79.688 (75.540)	lr 0.02014
Train [35][2810/3239]	Time 0.213 (0.573)	Data Time 0.001 (0.012)	Loss 3.0247 (2.9665)	Entropy 1.36298 (1.37049)	Top-1 acc 51.562 (52.830)	Top-5 acc 73.828 (75.541)	lr 0.02014
Train [35][2820/3239]	Time 0.155 (0.572)	Data Time 0.001 (0.012)	Loss 3.1323 (2.9666)	Entropy 1.36294 (1.37046)	Top-1 acc 54.297 (52.831)	Top-5 acc 71.875 (75.538)	lr 0.02014
Train [35][2830/3239]	Time 0.330 (0.572)	Data Time 0.001 (0.012)	Loss 2.8796 (2.9666)	Entropy 1.36285 (1.37043)	Top-1 acc 53.125 (52.832)	Top-5 acc 78.125 (75.540)	lr 0.02014
Train [35][2840/3239]	Time 0.228 (0.572)	Data Time 0.001 (0.012)	Loss 2.9938 (2.9665)	Entropy 1.36280 (1.37041)	Top-1 acc 49.609 (52.832)	Top-5 acc 74.219 (75.540)	lr 0.02014
Train [35][2850/3239]	Time 0.214 (0.571)	Data Time 0.001 (0.012)	Loss 3.0683 (2.9664)	Entropy 1.36273 (1.37038)	Top-1 acc 50.781 (52.832)	Top-5 acc 73.047 (75.543)	lr 0.02014
Train [35][2860/3239]	Time 0.231 (0.571)	Data Time 0.001 (0.012)	Loss 3.1729 (2.9664)	Entropy 1.36268 (1.37035)	Top-1 acc 46.875 (52.830)	Top-5 acc 71.484 (75.543)	lr 0.02014
Train [35][2870/3239]	Time 0.258 (0.570)	Data Time 0.001 (0.012)	Loss 2.8780 (2.9662)	Entropy 1.36261 (1.37033)	Top-1 acc 51.562 (52.834)	Top-5 acc 79.688 (75.546)	lr 0.02014
Train [35][2880/3239]	Time 0.317 (0.570)	Data Time 0.001 (0.012)	Loss 2.7550 (2.9662)	Entropy 1.36258 (1.37030)	Top-1 acc 63.281 (52.833)	Top-5 acc 80.859 (75.546)	lr 0.02014
Train [35][2890/3239]	Time 0.328 (0.584)	Data Time 0.004 (0.012)	Loss 2.9242 (2.9664)	Entropy 1.36202 (1.37027)	Top-1 acc 53.906 (52.827)	Top-5 acc 73.828 (75.546)	lr 0.02014
Train [35][2900/3239]	Time 0.285 (0.584)	Data Time 0.002 (0.012)	Loss 2.9803 (2.9665)	Entropy 1.36194 (1.37024)	Top-1 acc 53.516 (52.828)	Top-5 acc 71.484 (75.543)	lr 0.02014
Train [35][2910/3239]	Time 0.237 (0.584)	Data Time 0.001 (0.012)	Loss 3.0131 (2.9663)	Entropy 1.36189 (1.37021)	Top-1 acc 50.000 (52.830)	Top-5 acc 75.000 (75.550)	lr 0.02014
Train [35][2920/3239]	Time 0.235 (0.583)	Data Time 0.001 (0.012)	Loss 2.9416 (2.9664)	Entropy 1.36190 (1.37019)	Top-1 acc 57.031 (52.830)	Top-5 acc 75.391 (75.550)	lr 0.02014
Train [35][2930/3239]	Time 0.304 (0.583)	Data Time 0.001 (0.012)	Loss 2.8502 (2.9663)	Entropy 1.36181 (1.37016)	Top-1 acc 56.250 (52.833)	Top-5 acc 80.469 (75.550)	lr 0.02013
Train [35][2940/3239]	Time 0.303 (0.582)	Data Time 0.004 (0.012)	Loss 3.1130 (2.9661)	Entropy 1.36170 (1.37013)	Top-1 acc 48.438 (52.836)	Top-5 acc 75.000 (75.556)	lr 0.02013
Train [35][2950/3239]	Time 0.261 (0.582)	Data Time 0.001 (0.012)	Loss 2.8014 (2.9665)	Entropy 1.36158 (1.37010)	Top-1 acc 55.469 (52.831)	Top-5 acc 77.734 (75.546)	lr 0.02013
Train [35][2960/3239]	Time 0.272 (0.581)	Data Time 0.001 (0.012)	Loss 2.8795 (2.9663)	Entropy 1.36154 (1.37007)	Top-1 acc 55.078 (52.838)	Top-5 acc 77.734 (75.548)	lr 0.02013
Train [35][2970/3239]	Time 0.274 (0.581)	Data Time 0.001 (0.012)	Loss 2.7768 (2.9663)	Entropy 1.36153 (1.37004)	Top-1 acc 54.688 (52.841)	Top-5 acc 83.594 (75.551)	lr 0.02013
Train [35][2980/3239]	Time 0.294 (0.581)	Data Time 0.001 (0.011)	Loss 3.1210 (2.9663)	Entropy 1.36148 (1.37001)	Top-1 acc 50.000 (52.846)	Top-5 acc 71.484 (75.553)	lr 0.02013
Train [35][2990/3239]	Time 0.243 (0.580)	Data Time 0.002 (0.011)	Loss 3.0977 (2.9663)	Entropy 1.36144 (1.36999)	Top-1 acc 47.266 (52.846)	Top-5 acc 76.562 (75.553)	lr 0.02013
Train [35][3000/3239]	Time 0.293 (0.580)	Data Time 0.001 (0.011)	Loss 2.6952 (2.9662)	Entropy 1.36136 (1.36996)	Top-1 acc 54.688 (52.846)	Top-5 acc 83.203 (75.555)	lr 0.02013
Train [35][3010/3239]	Time 0.205 (0.579)	Data Time 0.001 (0.011)	Loss 3.2419 (2.9663)	Entropy 1.36133 (1.36993)	Top-1 acc 47.266 (52.842)	Top-5 acc 71.484 (75.553)	lr 0.02013
Train [35][3020/3239]	Time 0.249 (0.579)	Data Time 0.001 (0.011)	Loss 2.9110 (2.9663)	Entropy 1.36127 (1.36990)	Top-1 acc 57.812 (52.846)	Top-5 acc 75.391 (75.551)	lr 0.02013
Train [35][3030/3239]	Time 0.263 (0.579)	Data Time 0.001 (0.011)	Loss 2.9111 (2.9663)	Entropy 1.36118 (1.36987)	Top-1 acc 59.375 (52.847)	Top-5 acc 78.125 (75.549)	lr 0.02013
Train [35][3040/3239]	Time 0.314 (0.578)	Data Time 0.001 (0.011)	Loss 3.0802 (2.9662)	Entropy 1.36111 (1.36984)	Top-1 acc 50.391 (52.849)	Top-5 acc 73.047 (75.552)	lr 0.02013
Train [35][3050/3239]	Time 0.247 (0.578)	Data Time 0.001 (0.011)	Loss 3.0124 (2.9664)	Entropy 1.36108 (1.36981)	Top-1 acc 49.219 (52.848)	Top-5 acc 72.656 (75.550)	lr 0.02012
Train [35][3060/3239]	Time 0.253 (0.577)	Data Time 0.001 (0.011)	Loss 3.2115 (2.9664)	Entropy 1.36105 (1.36979)	Top-1 acc 50.000 (52.855)	Top-5 acc 71.875 (75.551)	lr 0.02012
Train [35][3070/3239]	Time 0.216 (0.577)	Data Time 0.001 (0.011)	Loss 2.9849 (2.9665)	Entropy 1.36099 (1.36976)	Top-1 acc 53.125 (52.853)	Top-5 acc 74.609 (75.546)	lr 0.02012
Train [35][3080/3239]	Time 0.235 (0.577)	Data Time 0.001 (0.011)	Loss 3.1736 (2.9666)	Entropy 1.36122 (1.36973)	Top-1 acc 47.266 (52.851)	Top-5 acc 69.922 (75.541)	lr 0.02012
Train [35][3090/3239]	Time 0.347 (0.576)	Data Time 0.001 (0.011)	Loss 3.3195 (2.9667)	Entropy 1.36117 (1.36970)	Top-1 acc 44.922 (52.847)	Top-5 acc 67.188 (75.539)	lr 0.02012
Train [35][3100/3239]	Time 0.272 (0.576)	Data Time 0.002 (0.011)	Loss 3.1081 (2.9669)	Entropy 1.36114 (1.36967)	Top-1 acc 48.047 (52.840)	Top-5 acc 72.266 (75.532)	lr 0.02012
Train [35][3110/3239]	Time 0.255 (0.575)	Data Time 0.002 (0.011)	Loss 3.0716 (2.9670)	Entropy 1.36106 (1.36965)	Top-1 acc 49.219 (52.836)	Top-5 acc 70.703 (75.528)	lr 0.02012
Train [35][3120/3239]	Time 0.247 (0.575)	Data Time 0.001 (0.011)	Loss 2.8647 (2.9669)	Entropy 1.36100 (1.36962)	Top-1 acc 58.594 (52.840)	Top-5 acc 76.562 (75.529)	lr 0.02012
Train [35][3130/3239]	Time 0.311 (0.575)	Data Time 0.001 (0.011)	Loss 2.8034 (2.9668)	Entropy 1.36092 (1.36959)	Top-1 acc 58.594 (52.845)	Top-5 acc 80.859 (75.532)	lr 0.02012
Train [35][3140/3239]	Time 0.378 (0.574)	Data Time 0.004 (0.011)	Loss 3.1574 (2.9668)	Entropy 1.36091 (1.36956)	Top-1 acc 49.609 (52.844)	Top-5 acc 72.266 (75.534)	lr 0.02012
Train [35][3150/3239]	Time 0.225 (0.574)	Data Time 0.002 (0.011)	Loss 2.8325 (2.9667)	Entropy 1.36072 (1.36954)	Top-1 acc 57.031 (52.841)	Top-5 acc 75.391 (75.534)	lr 0.02012
Train [35][3160/3239]	Time 0.291 (0.574)	Data Time 0.002 (0.011)	Loss 2.7648 (2.9665)	Entropy 1.36068 (1.36951)	Top-1 acc 53.906 (52.844)	Top-5 acc 78.516 (75.541)	lr 0.02012
Train [35][3170/3239]	Time 0.220 (0.573)	Data Time 0.001 (0.011)	Loss 3.0964 (2.9665)	Entropy 1.36067 (1.36948)	Top-1 acc 50.391 (52.844)	Top-5 acc 73.047 (75.539)	lr 0.02012
Train [35][3180/3239]	Time 0.217 (0.573)	Data Time 0.000 (0.011)	Loss 2.9158 (2.9664)	Entropy 1.36064 (1.36945)	Top-1 acc 54.688 (52.844)	Top-5 acc 76.562 (75.540)	lr 0.02011
Train [35][3190/3239]	Time 0.212 (0.572)	Data Time 0.000 (0.011)	Loss 3.1074 (2.9666)	Entropy 1.36057 (1.36943)	Top-1 acc 51.172 (52.839)	Top-5 acc 74.219 (75.539)	lr 0.02011
Train [35][3200/3239]	Time 0.301 (0.572)	Data Time 0.000 (0.011)	Loss 3.0068 (2.9666)	Entropy 1.36055 (1.36940)	Top-1 acc 52.734 (52.839)	Top-5 acc 75.000 (75.539)	lr 0.02011
Train [35][3210/3239]	Time 0.214 (0.571)	Data Time 0.000 (0.011)	Loss 3.0883 (2.9666)	Entropy 1.36052 (1.36937)	Top-1 acc 50.391 (52.841)	Top-5 acc 72.656 (75.537)	lr 0.02011
Train [35][3220/3239]	Time 0.304 (0.584)	Data Time 0.000 (0.011)	Loss 3.0935 (2.9665)	Entropy 1.36038 (1.36934)	Top-1 acc 48.438 (52.845)	Top-5 acc 69.141 (75.541)	lr 0.02011
Train [35][3230/3239]	Time 0.228 (0.584)	Data Time 0.000 (0.011)	Loss 2.8255 (2.9665)	Entropy 1.36028 (1.36931)	Top-1 acc 60.156 (52.848)	Top-5 acc 80.078 (75.540)	lr 0.02011
Train [35][3239/3239]	Time 2.209 (0.583)	Data Time 0.000 (0.011)	Loss 3.2844 (2.9665)	Entropy 1.36028 (1.36929)	Top-1 acc 48.148 (52.846)	Top-5 acc 71.605 (75.537)	lr 0.02011
==========Valid [35/120]	loss 1.782	top-1 acc 60.175 (60.175)	top-5 acc 82.193	Train top-1 52.846	top-5 75.537	Entropy 1.36028	Latency-None: 0.000ms	Flops: 556.46M
Train [36][0/3239]	Time 30.886 (30.886)	Data Time 29.086 (29.086)	Loss 3.0930 (3.0930)	Entropy 1.36016 (1.36016)	Top-1 acc 50.391 (50.391)	Top-5 acc 72.266 (72.266)	lr 0.02011
Train [36][10/3239]	Time 2.685 (3.443)	Data Time 0.002 (2.761)	Loss 2.7478 (2.9504)	Entropy 1.36016 (1.36016)	Top-1 acc 59.766 (53.551)	Top-5 acc 79.688 (75.639)	lr 0.02011
Train [36][20/3239]	Time 0.333 (1.918)	Data Time 0.001 (1.447)	Loss 2.8070 (2.9380)	Entropy 1.36015 (1.36016)	Top-1 acc 55.859 (53.795)	Top-5 acc 78.125 (76.042)	lr 0.02011
Train [36][30/3239]	Time 0.235 (1.449)	Data Time 0.001 (0.980)	Loss 2.8618 (2.9336)	Entropy 1.36014 (1.36015)	Top-1 acc 55.469 (53.553)	Top-5 acc 76.172 (76.575)	lr 0.02011
Train [36][40/3239]	Time 0.236 (1.206)	Data Time 0.001 (0.742)	Loss 2.8478 (2.9370)	Entropy 1.36002 (1.36013)	Top-1 acc 55.469 (53.258)	Top-5 acc 78.906 (76.372)	lr 0.02011
Train [36][50/3239]	Time 0.235 (1.057)	Data Time 0.001 (0.597)	Loss 2.7386 (2.9252)	Entropy 1.35991 (1.36009)	Top-1 acc 57.031 (53.263)	Top-5 acc 80.469 (76.593)	lr 0.02011
Train [36][60/3239]	Time 0.226 (0.958)	Data Time 0.002 (0.499)	Loss 2.8426 (2.9247)	Entropy 1.35993 (1.36006)	Top-1 acc 52.734 (53.432)	Top-5 acc 78.516 (76.678)	lr 0.02010
Train [36][70/3239]	Time 0.206 (0.889)	Data Time 0.001 (0.429)	Loss 3.0158 (2.9205)	Entropy 1.35991 (1.36004)	Top-1 acc 51.953 (53.631)	Top-5 acc 76.172 (76.755)	lr 0.02010
Train [36][80/3239]	Time 0.267 (0.837)	Data Time 0.001 (0.376)	Loss 2.9047 (2.9193)	Entropy 1.35986 (1.36002)	Top-1 acc 54.688 (53.742)	Top-5 acc 77.734 (76.804)	lr 0.02010
Train [36][90/3239]	Time 0.226 (0.796)	Data Time 0.001 (0.336)	Loss 3.0180 (2.9304)	Entropy 1.35970 (1.36000)	Top-1 acc 50.391 (53.511)	Top-5 acc 71.484 (76.541)	lr 0.02010
Train [36][100/3239]	Time 0.230 (0.760)	Data Time 0.001 (0.303)	Loss 3.0169 (2.9387)	Entropy 1.35964 (1.35997)	Top-1 acc 53.125 (53.315)	Top-5 acc 75.781 (76.354)	lr 0.02010
Train [36][110/3239]	Time 0.257 (0.734)	Data Time 0.005 (0.275)	Loss 2.9170 (2.9405)	Entropy 1.35960 (1.35994)	Top-1 acc 55.078 (53.340)	Top-5 acc 75.391 (76.274)	lr 0.02010
Train [36][120/3239]	Time 2.496 (0.712)	Data Time 0.001 (0.253)	Loss 2.8820 (2.9431)	Entropy 1.35960 (1.35991)	Top-1 acc 55.078 (53.332)	Top-5 acc 77.344 (76.149)	lr 0.02010
Train [36][130/3239]	Time 0.313 (0.676)	Data Time 0.001 (0.234)	Loss 2.8758 (2.9411)	Entropy 1.35950 (1.35988)	Top-1 acc 55.078 (53.343)	Top-5 acc 76.953 (76.202)	lr 0.02010
Train [36][140/3239]	Time 0.228 (0.660)	Data Time 0.001 (0.217)	Loss 3.0958 (2.9437)	Entropy 1.35951 (1.35985)	Top-1 acc 52.734 (53.319)	Top-5 acc 74.609 (76.100)	lr 0.02010
Train [36][150/3239]	Time 0.214 (0.646)	Data Time 0.001 (0.203)	Loss 2.8477 (2.9434)	Entropy 1.35946 (1.35983)	Top-1 acc 54.297 (53.358)	Top-5 acc 75.391 (76.133)	lr 0.02010
Train [36][160/3239]	Time 0.223 (0.634)	Data Time 0.001 (0.190)	Loss 2.9592 (2.9461)	Entropy 1.35941 (1.35980)	Top-1 acc 52.734 (53.237)	Top-5 acc 78.906 (76.082)	lr 0.02010
Train [36][170/3239]	Time 0.238 (0.624)	Data Time 0.001 (0.179)	Loss 3.0220 (2.9462)	Entropy 1.35941 (1.35978)	Top-1 acc 55.469 (53.237)	Top-5 acc 75.000 (76.037)	lr 0.02010
Train [36][180/3239]	Time 0.242 (0.614)	Data Time 0.001 (0.170)	Loss 2.9681 (2.9451)	Entropy 1.35932 (1.35976)	Top-1 acc 53.906 (53.274)	Top-5 acc 75.391 (76.049)	lr 0.02010
Train [36][190/3239]	Time 0.195 (0.607)	Data Time 0.001 (0.161)	Loss 2.9066 (2.9444)	Entropy 1.35921 (1.35973)	Top-1 acc 53.125 (53.285)	Top-5 acc 78.125 (76.066)	lr 0.02009
Train [36][200/3239]	Time 0.216 (0.599)	Data Time 0.001 (0.153)	Loss 2.9474 (2.9452)	Entropy 1.35914 (1.35971)	Top-1 acc 51.953 (53.286)	Top-5 acc 75.781 (76.065)	lr 0.02009
Train [36][210/3239]	Time 0.260 (0.591)	Data Time 0.001 (0.146)	Loss 2.7433 (2.9433)	Entropy 1.35910 (1.35968)	Top-1 acc 57.422 (53.336)	Top-5 acc 80.859 (76.135)	lr 0.02009
Train [36][220/3239]	Time 0.243 (0.585)	Data Time 0.001 (0.139)	Loss 2.9923 (2.9435)	Entropy 1.35892 (1.35965)	Top-1 acc 48.438 (53.355)	Top-5 acc 75.000 (76.122)	lr 0.02009
Train [36][230/3239]	Time 2.440 (0.579)	Data Time 0.001 (0.133)	Loss 2.8547 (2.9420)	Entropy 1.35892 (1.35962)	Top-1 acc 55.078 (53.412)	Top-5 acc 78.516 (76.172)	lr 0.02009
Train [36][240/3239]	Time 0.319 (0.565)	Data Time 0.001 (0.128)	Loss 3.0294 (2.9416)	Entropy 1.35889 (1.35959)	Top-1 acc 50.000 (53.448)	Top-5 acc 70.703 (76.165)	lr 0.02009
Train [36][250/3239]	Time 0.201 (0.560)	Data Time 0.001 (0.123)	Loss 2.8746 (2.9392)	Entropy 1.35887 (1.35956)	Top-1 acc 57.422 (53.527)	Top-5 acc 78.906 (76.229)	lr 0.02009
Train [36][260/3239]	Time 0.218 (0.556)	Data Time 0.001 (0.118)	Loss 2.9909 (2.9408)	Entropy 1.35888 (1.35953)	Top-1 acc 51.953 (53.486)	Top-5 acc 73.828 (76.196)	lr 0.02009
Train [36][270/3239]	Time 0.219 (0.551)	Data Time 0.001 (0.114)	Loss 2.7940 (2.9388)	Entropy 1.35847 (1.35950)	Top-1 acc 60.938 (53.526)	Top-5 acc 76.953 (76.211)	lr 0.02009
Train [36][280/3239]	Time 0.202 (0.548)	Data Time 0.001 (0.110)	Loss 3.0022 (2.9381)	Entropy 1.35845 (1.35946)	Top-1 acc 49.219 (53.486)	Top-5 acc 76.172 (76.193)	lr 0.02009
Train [36][290/3239]	Time 0.285 (0.545)	Data Time 0.001 (0.106)	Loss 2.8497 (2.9373)	Entropy 1.35833 (1.35943)	Top-1 acc 55.469 (53.537)	Top-5 acc 78.516 (76.184)	lr 0.02009
Train [36][300/3239]	Time 0.225 (0.542)	Data Time 0.001 (0.103)	Loss 2.9461 (2.9373)	Entropy 1.35830 (1.35939)	Top-1 acc 53.906 (53.507)	Top-5 acc 75.781 (76.168)	lr 0.02009
Train [36][310/3239]	Time 0.213 (0.539)	Data Time 0.001 (0.099)	Loss 2.8470 (2.9378)	Entropy 1.35832 (1.35935)	Top-1 acc 54.688 (53.496)	Top-5 acc 76.172 (76.179)	lr 0.02008
Train [36][320/3239]	Time 0.225 (0.536)	Data Time 0.001 (0.096)	Loss 2.8174 (2.9368)	Entropy 1.35821 (1.35932)	Top-1 acc 57.812 (53.538)	Top-5 acc 76.562 (76.183)	lr 0.02008
Train [36][330/3239]	Time 0.258 (0.534)	Data Time 0.001 (0.094)	Loss 2.9400 (2.9376)	Entropy 1.35818 (1.35929)	Top-1 acc 55.469 (53.545)	Top-5 acc 75.781 (76.181)	lr 0.02008
Train [36][340/3239]	Time 43.650 (0.652)	Data Time 0.002 (0.091)	Loss 2.8787 (2.9381)	Entropy 1.35818 (1.35925)	Top-1 acc 53.906 (53.549)	Top-5 acc 76.172 (76.146)	lr 0.02008
Train [36][350/3239]	Time 0.374 (0.644)	Data Time 0.002 (0.088)	Loss 2.9572 (2.9377)	Entropy 1.35815 (1.35922)	Top-1 acc 52.734 (53.568)	Top-5 acc 74.219 (76.137)	lr 0.02008
Train [36][360/3239]	Time 0.226 (0.641)	Data Time 0.002 (0.086)	Loss 2.7867 (2.9371)	Entropy 1.35812 (1.35919)	Top-1 acc 56.641 (53.610)	Top-5 acc 75.391 (76.126)	lr 0.02008
Train [36][370/3239]	Time 0.213 (0.636)	Data Time 0.001 (0.084)	Loss 2.9818 (2.9378)	Entropy 1.35799 (1.35916)	Top-1 acc 50.781 (53.594)	Top-5 acc 76.172 (76.121)	lr 0.02008
Train [36][380/3239]	Time 0.241 (0.631)	Data Time 0.002 (0.082)	Loss 2.9556 (2.9389)	Entropy 1.35797 (1.35913)	Top-1 acc 53.906 (53.607)	Top-5 acc 76.172 (76.104)	lr 0.02008
Train [36][390/3239]	Time 0.237 (0.626)	Data Time 0.001 (0.079)	Loss 3.0155 (2.9379)	Entropy 1.35777 (1.35910)	Top-1 acc 48.047 (53.625)	Top-5 acc 75.391 (76.132)	lr 0.02008
Train [36][400/3239]	Time 0.332 (0.622)	Data Time 0.002 (0.078)	Loss 2.8078 (2.9383)	Entropy 1.35769 (1.35906)	Top-1 acc 57.031 (53.636)	Top-5 acc 78.906 (76.111)	lr 0.02008
Train [36][410/3239]	Time 0.222 (0.618)	Data Time 0.001 (0.076)	Loss 2.8651 (2.9381)	Entropy 1.35763 (1.35903)	Top-1 acc 55.078 (53.633)	Top-5 acc 77.344 (76.113)	lr 0.02008
Train [36][420/3239]	Time 0.221 (0.614)	Data Time 0.001 (0.074)	Loss 3.0252 (2.9376)	Entropy 1.35763 (1.35900)	Top-1 acc 51.953 (53.639)	Top-5 acc 73.438 (76.131)	lr 0.02008
Train [36][430/3239]	Time 0.221 (0.610)	Data Time 0.001 (0.072)	Loss 2.9059 (2.9381)	Entropy 1.35761 (1.35896)	Top-1 acc 53.906 (53.625)	Top-5 acc 75.391 (76.137)	lr 0.02008
Train [36][440/3239]	Time 0.209 (0.607)	Data Time 0.001 (0.071)	Loss 3.0155 (2.9388)	Entropy 1.35750 (1.35893)	Top-1 acc 52.344 (53.612)	Top-5 acc 73.828 (76.119)	lr 0.02007
Train [36][450/3239]	Time 2.507 (0.603)	Data Time 0.001 (0.069)	Loss 2.9397 (2.9381)	Entropy 1.35750 (1.35890)	Top-1 acc 54.297 (53.626)	Top-5 acc 76.953 (76.131)	lr 0.02007
Train [36][460/3239]	Time 0.216 (0.595)	Data Time 0.001 (0.068)	Loss 2.8711 (2.9389)	Entropy 1.35745 (1.35887)	Top-1 acc 57.422 (53.613)	Top-5 acc 76.562 (76.126)	lr 0.02007
Train [36][470/3239]	Time 0.235 (0.592)	Data Time 0.001 (0.066)	Loss 2.8329 (2.9401)	Entropy 1.35732 (1.35884)	Top-1 acc 58.594 (53.595)	Top-5 acc 78.906 (76.087)	lr 0.02007
Train [36][480/3239]	Time 0.251 (0.590)	Data Time 0.001 (0.065)	Loss 3.0022 (2.9408)	Entropy 1.35722 (1.35880)	Top-1 acc 51.172 (53.588)	Top-5 acc 74.219 (76.085)	lr 0.02007
Train [36][490/3239]	Time 0.242 (0.587)	Data Time 0.001 (0.064)	Loss 3.0418 (2.9413)	Entropy 1.35719 (1.35877)	Top-1 acc 48.828 (53.557)	Top-5 acc 75.000 (76.082)	lr 0.02007
Train [36][500/3239]	Time 0.325 (0.584)	Data Time 0.001 (0.062)	Loss 3.0521 (2.9417)	Entropy 1.35718 (1.35874)	Top-1 acc 52.734 (53.560)	Top-5 acc 73.438 (76.062)	lr 0.02007
Train [36][510/3239]	Time 0.223 (0.582)	Data Time 0.001 (0.061)	Loss 3.0028 (2.9413)	Entropy 1.35694 (1.35871)	Top-1 acc 53.906 (53.582)	Top-5 acc 74.609 (76.063)	lr 0.02007
Train [36][520/3239]	Time 0.219 (0.580)	Data Time 0.001 (0.060)	Loss 2.9528 (2.9415)	Entropy 1.35694 (1.35867)	Top-1 acc 51.953 (53.584)	Top-5 acc 75.000 (76.047)	lr 0.02007
Train [36][530/3239]	Time 0.214 (0.577)	Data Time 0.001 (0.059)	Loss 2.9193 (2.9408)	Entropy 1.35694 (1.35864)	Top-1 acc 51.953 (53.583)	Top-5 acc 77.344 (76.070)	lr 0.02007
Train [36][540/3239]	Time 0.218 (0.575)	Data Time 0.002 (0.058)	Loss 2.8545 (2.9407)	Entropy 1.35690 (1.35861)	Top-1 acc 55.469 (53.565)	Top-5 acc 76.953 (76.064)	lr 0.02007
Train [36][550/3239]	Time 0.221 (0.573)	Data Time 0.001 (0.057)	Loss 2.9832 (2.9413)	Entropy 1.35682 (1.35858)	Top-1 acc 50.781 (53.554)	Top-5 acc 74.609 (76.053)	lr 0.02007
Train [36][560/3239]	Time 2.350 (0.571)	Data Time 0.001 (0.056)	Loss 2.9919 (2.9404)	Entropy 1.35682 (1.35855)	Top-1 acc 49.219 (53.564)	Top-5 acc 76.562 (76.071)	lr 0.02006
Train [36][570/3239]	Time 0.219 (0.565)	Data Time 0.001 (0.055)	Loss 3.0583 (2.9407)	Entropy 1.35674 (1.35851)	Top-1 acc 47.656 (53.549)	Top-5 acc 76.562 (76.065)	lr 0.02006
Train [36][580/3239]	Time 0.261 (0.563)	Data Time 0.001 (0.054)	Loss 2.8995 (2.9408)	Entropy 1.35674 (1.35848)	Top-1 acc 53.516 (53.553)	Top-5 acc 75.781 (76.046)	lr 0.02006
Train [36][590/3239]	Time 0.235 (0.561)	Data Time 0.001 (0.053)	Loss 2.9043 (2.9411)	Entropy 1.35665 (1.35845)	Top-1 acc 53.906 (53.541)	Top-5 acc 75.391 (76.037)	lr 0.02006
Train [36][600/3239]	Time 0.231 (0.559)	Data Time 0.001 (0.052)	Loss 3.1748 (2.9417)	Entropy 1.35657 (1.35842)	Top-1 acc 50.000 (53.521)	Top-5 acc 69.922 (76.026)	lr 0.02006
Train [36][610/3239]	Time 0.228 (0.557)	Data Time 0.001 (0.051)	Loss 2.8769 (2.9416)	Entropy 1.35659 (1.35839)	Top-1 acc 58.594 (53.538)	Top-5 acc 79.688 (76.033)	lr 0.02006
Train [36][620/3239]	Time 0.220 (0.556)	Data Time 0.001 (0.051)	Loss 2.8989 (2.9413)	Entropy 1.35657 (1.35836)	Top-1 acc 57.031 (53.540)	Top-5 acc 78.125 (76.050)	lr 0.02006
Train [36][630/3239]	Time 0.211 (0.555)	Data Time 0.001 (0.050)	Loss 3.0857 (2.9419)	Entropy 1.35651 (1.35833)	Top-1 acc 54.297 (53.532)	Top-5 acc 75.781 (76.043)	lr 0.02006
Train [36][640/3239]	Time 0.223 (0.553)	Data Time 0.001 (0.049)	Loss 2.9806 (2.9431)	Entropy 1.35647 (1.35831)	Top-1 acc 53.516 (53.516)	Top-5 acc 73.828 (76.015)	lr 0.02006
Train [36][650/3239]	Time 0.201 (0.552)	Data Time 0.001 (0.048)	Loss 3.0529 (2.9430)	Entropy 1.35644 (1.35828)	Top-1 acc 49.219 (53.525)	Top-5 acc 74.219 (76.016)	lr 0.02006
Train [36][660/3239]	Time 0.263 (0.550)	Data Time 0.001 (0.048)	Loss 2.9227 (2.9431)	Entropy 1.35637 (1.35825)	Top-1 acc 54.688 (53.520)	Top-5 acc 75.781 (76.019)	lr 0.02006
Train [36][670/3239]	Time 2.364 (0.549)	Data Time 0.001 (0.047)	Loss 3.0575 (2.9430)	Entropy 1.35637 (1.35822)	Top-1 acc 50.391 (53.516)	Top-5 acc 72.656 (76.026)	lr 0.02006
Train [36][680/3239]	Time 0.257 (0.544)	Data Time 0.001 (0.046)	Loss 3.1218 (2.9431)	Entropy 1.35637 (1.35819)	Top-1 acc 48.047 (53.499)	Top-5 acc 71.484 (76.013)	lr 0.02005
Train [36][690/3239]	Time 0.236 (0.543)	Data Time 0.001 (0.046)	Loss 2.8823 (2.9441)	Entropy 1.35623 (1.35817)	Top-1 acc 57.422 (53.491)	Top-5 acc 77.734 (75.989)	lr 0.02005
Train [36][700/3239]	Time 0.235 (0.542)	Data Time 0.001 (0.045)	Loss 2.8695 (2.9436)	Entropy 1.35623 (1.35814)	Top-1 acc 55.859 (53.504)	Top-5 acc 76.172 (75.995)	lr 0.02005
Train [36][710/3239]	Time 0.350 (0.601)	Data Time 0.002 (0.044)	Loss 2.9396 (2.9426)	Entropy 1.35616 (1.35811)	Top-1 acc 54.297 (53.527)	Top-5 acc 75.781 (76.028)	lr 0.02005
Train [36][720/3239]	Time 0.262 (0.601)	Data Time 0.002 (0.044)	Loss 3.1851 (2.9440)	Entropy 1.35612 (1.35808)	Top-1 acc 48.438 (53.485)	Top-5 acc 71.484 (76.004)	lr 0.02005
Train [36][730/3239]	Time 0.203 (0.599)	Data Time 0.001 (0.043)	Loss 2.9535 (2.9437)	Entropy 1.35611 (1.35806)	Top-1 acc 57.422 (53.486)	Top-5 acc 76.562 (76.008)	lr 0.02005
Train [36][740/3239]	Time 0.258 (0.597)	Data Time 0.001 (0.043)	Loss 2.8843 (2.9436)	Entropy 1.35593 (1.35803)	Top-1 acc 53.516 (53.491)	Top-5 acc 76.953 (76.007)	lr 0.02005
Train [36][750/3239]	Time 0.226 (0.595)	Data Time 0.001 (0.042)	Loss 3.0098 (2.9432)	Entropy 1.35588 (1.35800)	Top-1 acc 50.781 (53.493)	Top-5 acc 75.391 (76.020)	lr 0.02005
Train [36][760/3239]	Time 0.210 (0.594)	Data Time 0.001 (0.042)	Loss 3.0811 (2.9439)	Entropy 1.35583 (1.35797)	Top-1 acc 52.344 (53.482)	Top-5 acc 71.875 (76.005)	lr 0.02005
Train [36][770/3239]	Time 0.285 (0.592)	Data Time 0.001 (0.041)	Loss 2.8800 (2.9439)	Entropy 1.35584 (1.35795)	Top-1 acc 58.203 (53.488)	Top-5 acc 79.297 (76.001)	lr 0.02005
Train [36][780/3239]	Time 2.408 (0.590)	Data Time 0.001 (0.041)	Loss 2.9776 (2.9446)	Entropy 1.35584 (1.35792)	Top-1 acc 51.562 (53.470)	Top-5 acc 75.000 (75.985)	lr 0.02005
Train [36][790/3239]	Time 0.216 (0.585)	Data Time 0.001 (0.040)	Loss 2.7776 (2.9440)	Entropy 1.35578 (1.35789)	Top-1 acc 56.250 (53.474)	Top-5 acc 79.688 (75.993)	lr 0.02005
Train [36][800/3239]	Time 0.225 (0.584)	Data Time 0.002 (0.040)	Loss 2.8295 (2.9441)	Entropy 1.35576 (1.35786)	Top-1 acc 55.469 (53.463)	Top-5 acc 78.906 (75.999)	lr 0.02005
Train [36][810/3239]	Time 0.223 (0.582)	Data Time 0.001 (0.039)	Loss 2.9314 (2.9439)	Entropy 1.35572 (1.35784)	Top-1 acc 53.516 (53.473)	Top-5 acc 77.734 (75.995)	lr 0.02004
Train [36][820/3239]	Time 0.219 (0.580)	Data Time 0.001 (0.039)	Loss 2.8757 (2.9435)	Entropy 1.35546 (1.35781)	Top-1 acc 55.078 (53.478)	Top-5 acc 77.344 (76.005)	lr 0.02004
Train [36][830/3239]	Time 0.222 (0.579)	Data Time 0.001 (0.038)	Loss 2.8382 (2.9437)	Entropy 1.35543 (1.35778)	Top-1 acc 57.422 (53.465)	Top-5 acc 76.953 (76.006)	lr 0.02004
Train [36][840/3239]	Time 0.210 (0.577)	Data Time 0.001 (0.038)	Loss 3.0112 (2.9440)	Entropy 1.35532 (1.35775)	Top-1 acc 51.172 (53.469)	Top-5 acc 75.391 (75.994)	lr 0.02004
Train [36][850/3239]	Time 0.237 (0.575)	Data Time 0.001 (0.037)	Loss 2.8453 (2.9435)	Entropy 1.35521 (1.35772)	Top-1 acc 55.469 (53.471)	Top-5 acc 76.953 (76.003)	lr 0.02004
Train [36][860/3239]	Time 0.211 (0.574)	Data Time 0.001 (0.037)	Loss 3.0754 (2.9436)	Entropy 1.35517 (1.35769)	Top-1 acc 48.047 (53.458)	Top-5 acc 71.484 (75.996)	lr 0.02004
Train [36][870/3239]	Time 0.219 (0.572)	Data Time 0.001 (0.037)	Loss 3.0269 (2.9434)	Entropy 1.35484 (1.35767)	Top-1 acc 50.000 (53.478)	Top-5 acc 73.828 (75.999)	lr 0.02004
Train [36][880/3239]	Time 0.220 (0.571)	Data Time 0.001 (0.036)	Loss 2.9794 (2.9440)	Entropy 1.35478 (1.35763)	Top-1 acc 49.219 (53.459)	Top-5 acc 74.219 (75.994)	lr 0.02004
Train [36][890/3239]	Time 2.665 (0.571)	Data Time 0.001 (0.036)	Loss 2.8843 (2.9436)	Entropy 1.35478 (1.35760)	Top-1 acc 53.906 (53.471)	Top-5 acc 76.953 (76.004)	lr 0.02004
Train [36][900/3239]	Time 0.245 (0.567)	Data Time 0.001 (0.035)	Loss 3.0063 (2.9438)	Entropy 1.35468 (1.35757)	Top-1 acc 52.734 (53.452)	Top-5 acc 73.438 (75.992)	lr 0.02004
Train [36][910/3239]	Time 0.217 (0.565)	Data Time 0.001 (0.035)	Loss 3.0844 (2.9449)	Entropy 1.35468 (1.35754)	Top-1 acc 50.781 (53.434)	Top-5 acc 71.875 (75.979)	lr 0.02004
Train [36][920/3239]	Time 0.227 (0.564)	Data Time 0.001 (0.035)	Loss 2.8457 (2.9449)	Entropy 1.35459 (1.35751)	Top-1 acc 54.688 (53.432)	Top-5 acc 80.469 (75.968)	lr 0.02004
Train [36][930/3239]	Time 0.320 (0.563)	Data Time 0.002 (0.034)	Loss 2.8892 (2.9445)	Entropy 1.35452 (1.35747)	Top-1 acc 54.297 (53.437)	Top-5 acc 77.344 (75.975)	lr 0.02003
Train [36][940/3239]	Time 0.236 (0.562)	Data Time 0.001 (0.034)	Loss 3.1138 (2.9445)	Entropy 1.35451 (1.35744)	Top-1 acc 44.531 (53.414)	Top-5 acc 73.047 (75.976)	lr 0.02003
Train [36][950/3239]	Time 0.228 (0.561)	Data Time 0.001 (0.034)	Loss 3.0098 (2.9446)	Entropy 1.35448 (1.35741)	Top-1 acc 49.609 (53.413)	Top-5 acc 73.047 (75.978)	lr 0.02003
Train [36][960/3239]	Time 0.222 (0.560)	Data Time 0.001 (0.033)	Loss 3.0072 (2.9446)	Entropy 1.35445 (1.35738)	Top-1 acc 53.125 (53.421)	Top-5 acc 77.344 (75.972)	lr 0.02003
Train [36][970/3239]	Time 0.234 (0.559)	Data Time 0.001 (0.033)	Loss 2.9889 (2.9446)	Entropy 1.35439 (1.35735)	Top-1 acc 51.562 (53.420)	Top-5 acc 75.781 (75.976)	lr 0.02003
Train [36][980/3239]	Time 0.215 (0.558)	Data Time 0.001 (0.033)	Loss 2.9550 (2.9453)	Entropy 1.35409 (1.35732)	Top-1 acc 51.172 (53.400)	Top-5 acc 75.000 (75.958)	lr 0.02003
Train [36][990/3239]	Time 0.369 (0.557)	Data Time 0.001 (0.032)	Loss 2.8608 (2.9449)	Entropy 1.35402 (1.35729)	Top-1 acc 53.516 (53.410)	Top-5 acc 78.906 (75.964)	lr 0.02003
Train [36][1000/3239]	Time 2.415 (0.556)	Data Time 0.002 (0.032)	Loss 2.7790 (2.9450)	Entropy 1.35402 (1.35725)	Top-1 acc 60.547 (53.412)	Top-5 acc 78.906 (75.962)	lr 0.02003
Train [36][1010/3239]	Time 0.228 (0.553)	Data Time 0.001 (0.032)	Loss 2.7983 (2.9452)	Entropy 1.35400 (1.35722)	Top-1 acc 55.078 (53.409)	Top-5 acc 78.516 (75.948)	lr 0.02003
Train [36][1020/3239]	Time 0.249 (0.552)	Data Time 0.002 (0.031)	Loss 3.0273 (2.9459)	Entropy 1.35393 (1.35719)	Top-1 acc 48.047 (53.381)	Top-5 acc 74.219 (75.942)	lr 0.02003
Train [36][1030/3239]	Time 0.223 (0.551)	Data Time 0.001 (0.031)	Loss 2.7762 (2.9461)	Entropy 1.35377 (1.35716)	Top-1 acc 55.078 (53.357)	Top-5 acc 78.906 (75.949)	lr 0.02003
Train [36][1040/3239]	Time 0.208 (0.550)	Data Time 0.001 (0.031)	Loss 3.0772 (2.9458)	Entropy 1.35369 (1.35712)	Top-1 acc 50.391 (53.366)	Top-5 acc 75.000 (75.951)	lr 0.02003
Train [36][1050/3239]	Time 0.216 (0.549)	Data Time 0.001 (0.031)	Loss 3.0499 (2.9468)	Entropy 1.35354 (1.35709)	Top-1 acc 49.219 (53.345)	Top-5 acc 71.875 (75.933)	lr 0.02003
Train [36][1060/3239]	Time 0.223 (0.548)	Data Time 0.001 (0.030)	Loss 2.8977 (2.9467)	Entropy 1.35353 (1.35706)	Top-1 acc 53.516 (53.357)	Top-5 acc 78.516 (75.938)	lr 0.02002
Train [36][1070/3239]	Time 0.220 (0.591)	Data Time 0.002 (0.030)	Loss 3.0464 (2.9464)	Entropy 1.35342 (1.35702)	Top-1 acc 50.781 (53.362)	Top-5 acc 71.094 (75.941)	lr 0.02002
Train [36][1080/3239]	Time 0.243 (0.590)	Data Time 0.002 (0.030)	Loss 2.9432 (2.9462)	Entropy 1.35338 (1.35699)	Top-1 acc 53.906 (53.367)	Top-5 acc 73.828 (75.945)	lr 0.02002
Train [36][1090/3239]	Time 0.263 (0.588)	Data Time 0.001 (0.030)	Loss 2.8388 (2.9468)	Entropy 1.35336 (1.35696)	Top-1 acc 54.688 (53.368)	Top-5 acc 76.172 (75.933)	lr 0.02002
Train [36][1100/3239]	Time 0.202 (0.587)	Data Time 0.001 (0.029)	Loss 2.9740 (2.9469)	Entropy 1.35332 (1.35692)	Top-1 acc 48.828 (53.358)	Top-5 acc 74.609 (75.930)	lr 0.02002
Train [36][1110/3239]	Time 2.461 (0.586)	Data Time 0.001 (0.029)	Loss 3.2507 (2.9466)	Entropy 1.35332 (1.35689)	Top-1 acc 45.703 (53.370)	Top-5 acc 69.531 (75.937)	lr 0.02002
Train [36][1120/3239]	Time 0.221 (0.583)	Data Time 0.001 (0.029)	Loss 3.0760 (2.9466)	Entropy 1.35320 (1.35686)	Top-1 acc 50.000 (53.382)	Top-5 acc 72.266 (75.934)	lr 0.02002
Train [36][1130/3239]	Time 0.214 (0.582)	Data Time 0.001 (0.029)	Loss 2.8557 (2.9463)	Entropy 1.35319 (1.35683)	Top-1 acc 56.250 (53.391)	Top-5 acc 78.516 (75.942)	lr 0.02002
Train [36][1140/3239]	Time 0.235 (0.581)	Data Time 0.001 (0.028)	Loss 2.9798 (2.9467)	Entropy 1.35317 (1.35679)	Top-1 acc 48.828 (53.387)	Top-5 acc 76.953 (75.941)	lr 0.02002
Train [36][1150/3239]	Time 0.311 (0.580)	Data Time 0.001 (0.028)	Loss 3.0519 (2.9464)	Entropy 1.35310 (1.35676)	Top-1 acc 46.875 (53.394)	Top-5 acc 73.438 (75.937)	lr 0.02002
Train [36][1160/3239]	Time 0.209 (0.579)	Data Time 0.001 (0.028)	Loss 2.7167 (2.9460)	Entropy 1.35296 (1.35673)	Top-1 acc 59.375 (53.406)	Top-5 acc 81.250 (75.949)	lr 0.02002
Train [36][1170/3239]	Time 0.223 (0.577)	Data Time 0.001 (0.028)	Loss 2.9677 (2.9458)	Entropy 1.35288 (1.35670)	Top-1 acc 51.562 (53.407)	Top-5 acc 74.219 (75.943)	lr 0.02002
Train [36][1180/3239]	Time 0.207 (0.576)	Data Time 0.001 (0.027)	Loss 2.7449 (2.9452)	Entropy 1.35282 (1.35666)	Top-1 acc 55.859 (53.418)	Top-5 acc 79.688 (75.955)	lr 0.02001
Train [36][1190/3239]	Time 0.232 (0.575)	Data Time 0.001 (0.027)	Loss 2.9804 (2.9456)	Entropy 1.35265 (1.35663)	Top-1 acc 53.906 (53.410)	Top-5 acc 73.438 (75.950)	lr 0.02001
Train [36][1200/3239]	Time 0.203 (0.574)	Data Time 0.001 (0.027)	Loss 2.9389 (2.9458)	Entropy 1.35263 (1.35660)	Top-1 acc 54.297 (53.408)	Top-5 acc 76.953 (75.953)	lr 0.02001
Train [36][1210/3239]	Time 0.249 (0.573)	Data Time 0.001 (0.027)	Loss 2.8295 (2.9455)	Entropy 1.35246 (1.35657)	Top-1 acc 55.469 (53.421)	Top-5 acc 76.172 (75.955)	lr 0.02001
Train [36][1220/3239]	Time 2.465 (0.572)	Data Time 0.001 (0.027)	Loss 3.0105 (2.9457)	Entropy 1.35246 (1.35653)	Top-1 acc 53.516 (53.422)	Top-5 acc 74.609 (75.948)	lr 0.02001
Train [36][1230/3239]	Time 0.225 (0.570)	Data Time 0.001 (0.026)	Loss 2.9362 (2.9457)	Entropy 1.35241 (1.35650)	Top-1 acc 51.172 (53.419)	Top-5 acc 78.906 (75.954)	lr 0.02001
Train [36][1240/3239]	Time 0.218 (0.569)	Data Time 0.001 (0.026)	Loss 2.8902 (2.9457)	Entropy 1.35241 (1.35647)	Top-1 acc 53.906 (53.424)	Top-5 acc 77.734 (75.955)	lr 0.02001
Train [36][1250/3239]	Time 0.267 (0.568)	Data Time 0.001 (0.026)	Loss 2.8244 (2.9454)	Entropy 1.35234 (1.35643)	Top-1 acc 54.688 (53.436)	Top-5 acc 79.297 (75.965)	lr 0.02001
Train [36][1260/3239]	Time 0.225 (0.567)	Data Time 0.002 (0.026)	Loss 2.8659 (2.9459)	Entropy 1.35233 (1.35640)	Top-1 acc 55.859 (53.434)	Top-5 acc 75.391 (75.953)	lr 0.02001
Train [36][1270/3239]	Time 0.196 (0.566)	Data Time 0.002 (0.026)	Loss 3.0167 (2.9461)	Entropy 1.35235 (1.35637)	Top-1 acc 51.562 (53.430)	Top-5 acc 76.172 (75.948)	lr 0.02001
Train [36][1280/3239]	Time 0.217 (0.565)	Data Time 0.002 (0.025)	Loss 2.9636 (2.9458)	Entropy 1.35223 (1.35634)	Top-1 acc 53.516 (53.432)	Top-5 acc 76.562 (75.955)	lr 0.02001
Train [36][1290/3239]	Time 0.222 (0.564)	Data Time 0.002 (0.025)	Loss 2.9438 (2.9455)	Entropy 1.35218 (1.35630)	Top-1 acc 54.688 (53.433)	Top-5 acc 75.781 (75.960)	lr 0.02001
Train [36][1300/3239]	Time 0.195 (0.563)	Data Time 0.001 (0.025)	Loss 3.2291 (2.9458)	Entropy 1.35213 (1.35627)	Top-1 acc 46.094 (53.420)	Top-5 acc 69.531 (75.957)	lr 0.02000
Train [36][1310/3239]	Time 0.220 (0.563)	Data Time 0.001 (0.025)	Loss 3.1374 (2.9461)	Entropy 1.35199 (1.35624)	Top-1 acc 52.344 (53.406)	Top-5 acc 68.359 (75.950)	lr 0.02000
Train [36][1320/3239]	Time 0.237 (0.562)	Data Time 0.001 (0.025)	Loss 3.0080 (2.9461)	Entropy 1.35200 (1.35621)	Top-1 acc 51.172 (53.408)	Top-5 acc 73.828 (75.948)	lr 0.02000
Train [36][1330/3239]	Time 2.475 (0.561)	Data Time 0.001 (0.024)	Loss 3.0954 (2.9465)	Entropy 1.35200 (1.35618)	Top-1 acc 53.906 (53.405)	Top-5 acc 71.094 (75.940)	lr 0.02000
Train [36][1340/3239]	Time 0.233 (0.558)	Data Time 0.001 (0.024)	Loss 3.1952 (2.9462)	Entropy 1.35199 (1.35615)	Top-1 acc 47.656 (53.410)	Top-5 acc 72.266 (75.942)	lr 0.02000
Train [36][1350/3239]	Time 0.234 (0.558)	Data Time 0.001 (0.024)	Loss 3.0330 (2.9466)	Entropy 1.35203 (1.35612)	Top-1 acc 54.688 (53.406)	Top-5 acc 72.656 (75.935)	lr 0.02000
Train [36][1360/3239]	Time 0.215 (0.557)	Data Time 0.001 (0.024)	Loss 3.0334 (2.9464)	Entropy 1.35185 (1.35608)	Top-1 acc 53.125 (53.410)	Top-5 acc 75.391 (75.941)	lr 0.02000
Train [36][1370/3239]	Time 0.249 (0.556)	Data Time 0.001 (0.024)	Loss 3.0371 (2.9464)	Entropy 1.35181 (1.35605)	Top-1 acc 49.219 (53.410)	Top-5 acc 75.391 (75.946)	lr 0.02000
Train [36][1380/3239]	Time 0.191 (0.555)	Data Time 0.001 (0.024)	Loss 2.9254 (2.9462)	Entropy 1.35176 (1.35602)	Top-1 acc 53.906 (53.410)	Top-5 acc 75.000 (75.940)	lr 0.02000
Train [36][1390/3239]	Time 0.219 (0.555)	Data Time 0.001 (0.024)	Loss 2.8650 (2.9461)	Entropy 1.35164 (1.35599)	Top-1 acc 54.688 (53.410)	Top-5 acc 75.781 (75.942)	lr 0.02000
Train [36][1400/3239]	Time 0.224 (0.554)	Data Time 0.001 (0.023)	Loss 3.1197 (2.9463)	Entropy 1.35163 (1.35596)	Top-1 acc 51.953 (53.406)	Top-5 acc 76.953 (75.943)	lr 0.02000
Train [36][1410/3239]	Time 0.221 (0.553)	Data Time 0.001 (0.023)	Loss 2.8430 (2.9462)	Entropy 1.35163 (1.35593)	Top-1 acc 60.156 (53.409)	Top-5 acc 76.562 (75.941)	lr 0.02000
Train [36][1420/3239]	Time 0.229 (0.552)	Data Time 0.001 (0.023)	Loss 2.8781 (2.9463)	Entropy 1.35165 (1.35590)	Top-1 acc 51.562 (53.403)	Top-5 acc 77.344 (75.938)	lr 0.02000
Train [36][1430/3239]	Time 0.302 (0.583)	Data Time 0.004 (0.023)	Loss 2.9241 (2.9462)	Entropy 1.35157 (1.35587)	Top-1 acc 55.859 (53.405)	Top-5 acc 75.391 (75.944)	lr 0.01999
Train [36][1440/3239]	Time 2.563 (0.582)	Data Time 0.002 (0.023)	Loss 2.7756 (2.9458)	Entropy 1.35157 (1.35584)	Top-1 acc 59.375 (53.408)	Top-5 acc 78.906 (75.954)	lr 0.01999
Train [36][1450/3239]	Time 0.231 (0.580)	Data Time 0.002 (0.023)	Loss 2.9847 (2.9462)	Entropy 1.35156 (1.35581)	Top-1 acc 52.734 (53.393)	Top-5 acc 76.172 (75.951)	lr 0.01999
Train [36][1460/3239]	Time 0.229 (0.579)	Data Time 0.002 (0.022)	Loss 2.7993 (2.9459)	Entropy 1.35156 (1.35578)	Top-1 acc 58.984 (53.395)	Top-5 acc 78.125 (75.957)	lr 0.01999
Train [36][1470/3239]	Time 0.229 (0.578)	Data Time 0.001 (0.022)	Loss 2.7312 (2.9456)	Entropy 1.35152 (1.35575)	Top-1 acc 58.203 (53.401)	Top-5 acc 80.469 (75.963)	lr 0.01999
Train [36][1480/3239]	Time 0.253 (0.577)	Data Time 0.001 (0.022)	Loss 3.1054 (2.9456)	Entropy 1.35150 (1.35572)	Top-1 acc 50.781 (53.400)	Top-5 acc 73.438 (75.957)	lr 0.01999
Train [36][1490/3239]	Time 0.226 (0.576)	Data Time 0.001 (0.022)	Loss 2.9769 (2.9453)	Entropy 1.35147 (1.35570)	Top-1 acc 54.688 (53.412)	Top-5 acc 73.438 (75.963)	lr 0.01999
Train [36][1500/3239]	Time 0.227 (0.576)	Data Time 0.001 (0.022)	Loss 2.6814 (2.9451)	Entropy 1.35144 (1.35567)	Top-1 acc 61.328 (53.419)	Top-5 acc 81.641 (75.967)	lr 0.01999
Train [36][1510/3239]	Time 0.210 (0.575)	Data Time 0.001 (0.022)	Loss 2.8394 (2.9447)	Entropy 1.35140 (1.35564)	Top-1 acc 52.734 (53.424)	Top-5 acc 78.125 (75.970)	lr 0.01999
Train [36][1520/3239]	Time 0.303 (0.574)	Data Time 0.001 (0.022)	Loss 3.0060 (2.9446)	Entropy 1.35125 (1.35561)	Top-1 acc 49.609 (53.424)	Top-5 acc 75.781 (75.977)	lr 0.01999
Train [36][1530/3239]	Time 0.213 (0.573)	Data Time 0.001 (0.022)	Loss 2.8248 (2.9444)	Entropy 1.35116 (1.35558)	Top-1 acc 54.297 (53.420)	Top-5 acc 80.859 (75.981)	lr 0.01999
Train [36][1540/3239]	Time 0.246 (0.572)	Data Time 0.001 (0.021)	Loss 3.1108 (2.9444)	Entropy 1.35117 (1.35555)	Top-1 acc 50.391 (53.425)	Top-5 acc 72.656 (75.976)	lr 0.01999
Train [36][1550/3239]	Time 2.451 (0.572)	Data Time 0.001 (0.021)	Loss 3.0088 (2.9443)	Entropy 1.35117 (1.35553)	Top-1 acc 53.906 (53.424)	Top-5 acc 72.266 (75.977)	lr 0.01998
Train [36][1560/3239]	Time 0.218 (0.569)	Data Time 0.001 (0.021)	Loss 2.9047 (2.9443)	Entropy 1.35112 (1.35550)	Top-1 acc 55.859 (53.421)	Top-5 acc 77.734 (75.973)	lr 0.01998
Train [36][1570/3239]	Time 0.334 (0.569)	Data Time 0.001 (0.021)	Loss 2.8546 (2.9445)	Entropy 1.35106 (1.35547)	Top-1 acc 53.125 (53.406)	Top-5 acc 76.172 (75.967)	lr 0.01998
Train [36][1580/3239]	Time 0.196 (0.568)	Data Time 0.001 (0.021)	Loss 3.0424 (2.9446)	Entropy 1.35106 (1.35544)	Top-1 acc 53.906 (53.405)	Top-5 acc 73.438 (75.963)	lr 0.01998
Train [36][1590/3239]	Time 0.215 (0.567)	Data Time 0.001 (0.021)	Loss 3.0988 (2.9442)	Entropy 1.35101 (1.35541)	Top-1 acc 50.781 (53.407)	Top-5 acc 75.781 (75.970)	lr 0.01998
Train [36][1600/3239]	Time 0.219 (0.567)	Data Time 0.001 (0.021)	Loss 3.0822 (2.9443)	Entropy 1.35094 (1.35539)	Top-1 acc 50.781 (53.409)	Top-5 acc 70.703 (75.964)	lr 0.01998
Train [36][1610/3239]	Time 0.176 (0.566)	Data Time 0.001 (0.021)	Loss 2.8791 (2.9450)	Entropy 1.35091 (1.35536)	Top-1 acc 55.469 (53.391)	Top-5 acc 76.562 (75.961)	lr 0.01998
Train [36][1620/3239]	Time 0.252 (0.565)	Data Time 0.001 (0.020)	Loss 2.8634 (2.9452)	Entropy 1.35084 (1.35533)	Top-1 acc 55.078 (53.384)	Top-5 acc 78.516 (75.959)	lr 0.01998
Train [36][1630/3239]	Time 0.210 (0.565)	Data Time 0.001 (0.020)	Loss 2.8272 (2.9454)	Entropy 1.35080 (1.35530)	Top-1 acc 55.469 (53.376)	Top-5 acc 77.344 (75.952)	lr 0.01998
Train [36][1640/3239]	Time 0.240 (0.564)	Data Time 0.001 (0.020)	Loss 3.0481 (2.9455)	Entropy 1.35079 (1.35527)	Top-1 acc 49.609 (53.371)	Top-5 acc 73.047 (75.952)	lr 0.01998
Train [36][1650/3239]	Time 0.252 (0.563)	Data Time 0.001 (0.020)	Loss 3.0219 (2.9457)	Entropy 1.35076 (1.35525)	Top-1 acc 52.344 (53.358)	Top-5 acc 71.875 (75.953)	lr 0.01998
Train [36][1660/3239]	Time 2.473 (0.563)	Data Time 0.001 (0.020)	Loss 2.9716 (2.9457)	Entropy 1.35076 (1.35522)	Top-1 acc 53.125 (53.363)	Top-5 acc 74.219 (75.951)	lr 0.01998
Train [36][1670/3239]	Time 0.245 (0.561)	Data Time 0.002 (0.020)	Loss 2.9647 (2.9460)	Entropy 1.35064 (1.35519)	Top-1 acc 51.172 (53.352)	Top-5 acc 73.828 (75.948)	lr 0.01998
Train [36][1680/3239]	Time 0.257 (0.560)	Data Time 0.001 (0.020)	Loss 2.9882 (2.9459)	Entropy 1.35058 (1.35517)	Top-1 acc 51.953 (53.355)	Top-5 acc 75.391 (75.951)	lr 0.01997
Train [36][1690/3239]	Time 0.241 (0.560)	Data Time 0.001 (0.020)	Loss 3.0996 (2.9456)	Entropy 1.35048 (1.35514)	Top-1 acc 49.219 (53.360)	Top-5 acc 75.781 (75.954)	lr 0.01997
Train [36][1700/3239]	Time 0.207 (0.559)	Data Time 0.001 (0.020)	Loss 2.9733 (2.9462)	Entropy 1.35047 (1.35511)	Top-1 acc 55.859 (53.351)	Top-5 acc 74.609 (75.941)	lr 0.01997
Train [36][1710/3239]	Time 0.211 (0.558)	Data Time 0.001 (0.019)	Loss 2.7329 (2.9457)	Entropy 1.35035 (1.35508)	Top-1 acc 60.547 (53.362)	Top-5 acc 78.125 (75.949)	lr 0.01997
Train [36][1720/3239]	Time 0.219 (0.558)	Data Time 0.001 (0.019)	Loss 3.0790 (2.9457)	Entropy 1.35021 (1.35506)	Top-1 acc 53.906 (53.364)	Top-5 acc 74.609 (75.951)	lr 0.01997
Train [36][1730/3239]	Time 0.248 (0.557)	Data Time 0.001 (0.019)	Loss 2.8643 (2.9456)	Entropy 1.35014 (1.35503)	Top-1 acc 55.469 (53.367)	Top-5 acc 77.734 (75.954)	lr 0.01997
Train [36][1740/3239]	Time 0.232 (0.557)	Data Time 0.001 (0.019)	Loss 2.7423 (2.9455)	Entropy 1.35007 (1.35500)	Top-1 acc 59.766 (53.375)	Top-5 acc 82.031 (75.959)	lr 0.01997
Train [36][1750/3239]	Time 0.204 (0.556)	Data Time 0.001 (0.019)	Loss 2.9457 (2.9455)	Entropy 1.34984 (1.35497)	Top-1 acc 56.250 (53.378)	Top-5 acc 76.172 (75.957)	lr 0.01997
Train [36][1760/3239]	Time 0.248 (0.555)	Data Time 0.001 (0.019)	Loss 2.8930 (2.9453)	Entropy 1.34978 (1.35494)	Top-1 acc 54.297 (53.376)	Top-5 acc 74.219 (75.959)	lr 0.01997
Train [36][1770/3239]	Time 2.368 (0.555)	Data Time 0.001 (0.019)	Loss 3.1580 (2.9456)	Entropy 1.34978 (1.35491)	Top-1 acc 50.000 (53.380)	Top-5 acc 71.875 (75.952)	lr 0.01997
Train [36][1780/3239]	Time 0.299 (0.553)	Data Time 0.002 (0.019)	Loss 3.1486 (2.9456)	Entropy 1.34975 (1.35488)	Top-1 acc 47.266 (53.380)	Top-5 acc 69.922 (75.949)	lr 0.01997
Train [36][1790/3239]	Time 0.259 (0.552)	Data Time 0.001 (0.019)	Loss 2.8810 (2.9457)	Entropy 1.34973 (1.35485)	Top-1 acc 56.641 (53.377)	Top-5 acc 78.516 (75.953)	lr 0.01997
Train [36][1800/3239]	Time 0.377 (0.576)	Data Time 0.006 (0.019)	Loss 2.9173 (2.9455)	Entropy 1.34962 (1.35483)	Top-1 acc 51.172 (53.380)	Top-5 acc 77.734 (75.958)	lr 0.01996
Train [36][1810/3239]	Time 0.216 (0.576)	Data Time 0.002 (0.018)	Loss 2.9111 (2.9454)	Entropy 1.34951 (1.35480)	Top-1 acc 54.688 (53.385)	Top-5 acc 76.562 (75.961)	lr 0.01996
Train [36][1820/3239]	Time 0.209 (0.575)	Data Time 0.001 (0.018)	Loss 3.1689 (2.9456)	Entropy 1.34947 (1.35477)	Top-1 acc 50.391 (53.380)	Top-5 acc 70.703 (75.955)	lr 0.01996
Train [36][1830/3239]	Time 0.279 (0.575)	Data Time 0.002 (0.018)	Loss 3.1833 (2.9456)	Entropy 1.34940 (1.35474)	Top-1 acc 50.391 (53.382)	Top-5 acc 70.312 (75.954)	lr 0.01996
Train [36][1840/3239]	Time 0.243 (0.574)	Data Time 0.002 (0.018)	Loss 2.7712 (2.9456)	Entropy 1.34932 (1.35471)	Top-1 acc 63.281 (53.382)	Top-5 acc 79.297 (75.955)	lr 0.01996
Train [36][1850/3239]	Time 0.210 (0.574)	Data Time 0.002 (0.018)	Loss 2.9433 (2.9456)	Entropy 1.34924 (1.35468)	Top-1 acc 56.250 (53.384)	Top-5 acc 75.781 (75.950)	lr 0.01996
Train [36][1860/3239]	Time 0.238 (0.573)	Data Time 0.001 (0.018)	Loss 2.8973 (2.9456)	Entropy 1.34920 (1.35465)	Top-1 acc 55.078 (53.387)	Top-5 acc 76.562 (75.954)	lr 0.01996
Train [36][1870/3239]	Time 0.232 (0.572)	Data Time 0.001 (0.018)	Loss 3.1262 (2.9460)	Entropy 1.34916 (1.35462)	Top-1 acc 47.656 (53.375)	Top-5 acc 73.047 (75.949)	lr 0.01996
Train [36][1880/3239]	Time 2.440 (0.572)	Data Time 0.001 (0.018)	Loss 3.0220 (2.9460)	Entropy 1.34916 (1.35459)	Top-1 acc 53.516 (53.379)	Top-5 acc 73.438 (75.945)	lr 0.01996
Train [36][1890/3239]	Time 0.236 (0.570)	Data Time 0.001 (0.018)	Loss 3.0278 (2.9462)	Entropy 1.34910 (1.35456)	Top-1 acc 52.344 (53.377)	Top-5 acc 76.562 (75.943)	lr 0.01996
Train [36][1900/3239]	Time 0.225 (0.569)	Data Time 0.001 (0.018)	Loss 2.8972 (2.9461)	Entropy 1.34904 (1.35453)	Top-1 acc 53.516 (53.380)	Top-5 acc 76.172 (75.942)	lr 0.01996
Train [36][1910/3239]	Time 0.221 (0.569)	Data Time 0.001 (0.018)	Loss 2.9656 (2.9462)	Entropy 1.34899 (1.35451)	Top-1 acc 54.297 (53.381)	Top-5 acc 74.609 (75.938)	lr 0.01996
Train [36][1920/3239]	Time 0.212 (0.568)	Data Time 0.001 (0.017)	Loss 2.9313 (2.9463)	Entropy 1.34898 (1.35448)	Top-1 acc 53.906 (53.381)	Top-5 acc 78.125 (75.938)	lr 0.01995
Train [36][1930/3239]	Time 0.218 (0.567)	Data Time 0.001 (0.017)	Loss 2.7460 (2.9463)	Entropy 1.34897 (1.35445)	Top-1 acc 55.469 (53.381)	Top-5 acc 79.297 (75.936)	lr 0.01995
Train [36][1940/3239]	Time 0.201 (0.567)	Data Time 0.001 (0.017)	Loss 2.9244 (2.9463)	Entropy 1.34891 (1.35442)	Top-1 acc 55.469 (53.377)	Top-5 acc 75.781 (75.935)	lr 0.01995
Train [36][1950/3239]	Time 0.195 (0.566)	Data Time 0.001 (0.017)	Loss 2.9235 (2.9462)	Entropy 1.34896 (1.35439)	Top-1 acc 55.469 (53.380)	Top-5 acc 76.172 (75.941)	lr 0.01995
Train [36][1960/3239]	Time 0.177 (0.565)	Data Time 0.001 (0.017)	Loss 2.6731 (2.9459)	Entropy 1.34895 (1.35436)	Top-1 acc 56.250 (53.385)	Top-5 acc 82.812 (75.945)	lr 0.01995
Train [36][1970/3239]	Time 0.282 (0.565)	Data Time 0.002 (0.017)	Loss 2.9425 (2.9459)	Entropy 1.34889 (1.35434)	Top-1 acc 51.953 (53.380)	Top-5 acc 73.438 (75.940)	lr 0.01995
Train [36][1980/3239]	Time 0.261 (0.564)	Data Time 0.001 (0.017)	Loss 2.9084 (2.9457)	Entropy 1.34884 (1.35431)	Top-1 acc 49.609 (53.386)	Top-5 acc 75.000 (75.940)	lr 0.01995
Train [36][1990/3239]	Time 2.500 (0.564)	Data Time 0.001 (0.017)	Loss 3.1201 (2.9456)	Entropy 1.34884 (1.35428)	Top-1 acc 53.516 (53.388)	Top-5 acc 71.875 (75.938)	lr 0.01995
Train [36][2000/3239]	Time 0.207 (0.562)	Data Time 0.001 (0.017)	Loss 3.1526 (2.9461)	Entropy 1.34878 (1.35425)	Top-1 acc 45.703 (53.376)	Top-5 acc 70.312 (75.929)	lr 0.01995
Train [36][2010/3239]	Time 0.223 (0.562)	Data Time 0.001 (0.017)	Loss 2.8501 (2.9458)	Entropy 1.34873 (1.35423)	Top-1 acc 55.078 (53.380)	Top-5 acc 78.125 (75.938)	lr 0.01995
Train [36][2020/3239]	Time 0.236 (0.561)	Data Time 0.001 (0.017)	Loss 2.8673 (2.9458)	Entropy 1.34857 (1.35420)	Top-1 acc 55.859 (53.374)	Top-5 acc 75.391 (75.937)	lr 0.01995
Train [36][2030/3239]	Time 0.234 (0.560)	Data Time 0.001 (0.017)	Loss 3.0164 (2.9460)	Entropy 1.34851 (1.35417)	Top-1 acc 50.781 (53.369)	Top-5 acc 73.828 (75.931)	lr 0.01995
Train [36][2040/3239]	Time 0.230 (0.560)	Data Time 0.001 (0.017)	Loss 3.0443 (2.9460)	Entropy 1.34852 (1.35414)	Top-1 acc 53.125 (53.371)	Top-5 acc 74.219 (75.932)	lr 0.01995
Train [36][2050/3239]	Time 0.335 (0.559)	Data Time 0.001 (0.016)	Loss 3.1826 (2.9460)	Entropy 1.34849 (1.35412)	Top-1 acc 49.219 (53.370)	Top-5 acc 69.141 (75.932)	lr 0.01994
Train [36][2060/3239]	Time 0.223 (0.559)	Data Time 0.001 (0.016)	Loss 2.7608 (2.9458)	Entropy 1.34845 (1.35409)	Top-1 acc 55.859 (53.373)	Top-5 acc 80.859 (75.939)	lr 0.01994
Train [36][2070/3239]	Time 0.220 (0.558)	Data Time 0.001 (0.016)	Loss 2.9936 (2.9456)	Entropy 1.34842 (1.35406)	Top-1 acc 52.344 (53.379)	Top-5 acc 74.219 (75.944)	lr 0.01994
Train [36][2080/3239]	Time 0.237 (0.558)	Data Time 0.001 (0.016)	Loss 2.9720 (2.9454)	Entropy 1.34832 (1.35403)	Top-1 acc 52.344 (53.387)	Top-5 acc 75.781 (75.949)	lr 0.01994
Train [36][2090/3239]	Time 0.165 (0.557)	Data Time 0.002 (0.016)	Loss 2.7017 (2.9454)	Entropy 1.34827 (1.35401)	Top-1 acc 58.984 (53.390)	Top-5 acc 83.203 (75.947)	lr 0.01994
Train [36][2100/3239]	Time 2.465 (0.557)	Data Time 0.001 (0.016)	Loss 2.8111 (2.9453)	Entropy 1.34827 (1.35398)	Top-1 acc 58.594 (53.392)	Top-5 acc 79.297 (75.951)	lr 0.01994
Train [36][2110/3239]	Time 0.229 (0.555)	Data Time 0.001 (0.016)	Loss 2.8667 (2.9453)	Entropy 1.34827 (1.35395)	Top-1 acc 55.078 (53.391)	Top-5 acc 77.734 (75.949)	lr 0.01994
Train [36][2120/3239]	Time 0.239 (0.555)	Data Time 0.001 (0.016)	Loss 2.9497 (2.9456)	Entropy 1.34825 (1.35393)	Top-1 acc 50.391 (53.383)	Top-5 acc 75.000 (75.940)	lr 0.01994
Train [36][2130/3239]	Time 0.205 (0.555)	Data Time 0.001 (0.016)	Loss 2.8289 (2.9456)	Entropy 1.34819 (1.35390)	Top-1 acc 57.031 (53.380)	Top-5 acc 76.172 (75.937)	lr 0.01994
Train [36][2140/3239]	Time 0.205 (0.554)	Data Time 0.001 (0.016)	Loss 2.9230 (2.9458)	Entropy 1.34813 (1.35387)	Top-1 acc 53.125 (53.374)	Top-5 acc 74.219 (75.931)	lr 0.01994
Train [36][2150/3239]	Time 0.224 (0.554)	Data Time 0.001 (0.016)	Loss 2.9630 (2.9455)	Entropy 1.34800 (1.35384)	Top-1 acc 53.906 (53.382)	Top-5 acc 73.438 (75.938)	lr 0.01994
Train [36][2160/3239]	Time 0.316 (0.574)	Data Time 0.002 (0.016)	Loss 2.8928 (2.9452)	Entropy 1.34797 (1.35382)	Top-1 acc 53.906 (53.386)	Top-5 acc 76.172 (75.945)	lr 0.01994
Train [36][2170/3239]	Time 0.206 (0.574)	Data Time 0.002 (0.016)	Loss 2.9275 (2.9452)	Entropy 1.34798 (1.35379)	Top-1 acc 53.516 (53.388)	Top-5 acc 74.609 (75.947)	lr 0.01993
Train [36][2180/3239]	Time 0.241 (0.573)	Data Time 0.001 (0.016)	Loss 2.7848 (2.9454)	Entropy 1.34794 (1.35376)	Top-1 acc 56.641 (53.385)	Top-5 acc 79.688 (75.942)	lr 0.01993
Train [36][2190/3239]	Time 0.231 (0.573)	Data Time 0.001 (0.016)	Loss 2.9784 (2.9453)	Entropy 1.34787 (1.35374)	Top-1 acc 49.609 (53.386)	Top-5 acc 74.219 (75.947)	lr 0.01993
Train [36][2200/3239]	Time 0.252 (0.572)	Data Time 0.001 (0.016)	Loss 2.8389 (2.9451)	Entropy 1.34774 (1.35371)	Top-1 acc 55.859 (53.394)	Top-5 acc 80.859 (75.957)	lr 0.01993
Train [36][2210/3239]	Time 2.576 (0.572)	Data Time 0.001 (0.015)	Loss 2.7967 (2.9448)	Entropy 1.34774 (1.35368)	Top-1 acc 57.422 (53.396)	Top-5 acc 78.125 (75.963)	lr 0.01993
Train [36][2220/3239]	Time 0.235 (0.570)	Data Time 0.001 (0.015)	Loss 3.0073 (2.9450)	Entropy 1.34768 (1.35366)	Top-1 acc 52.344 (53.394)	Top-5 acc 75.000 (75.961)	lr 0.01993
Train [36][2230/3239]	Time 0.231 (0.570)	Data Time 0.001 (0.015)	Loss 2.9486 (2.9450)	Entropy 1.34762 (1.35363)	Top-1 acc 55.469 (53.389)	Top-5 acc 76.953 (75.961)	lr 0.01993
Train [36][2240/3239]	Time 0.157 (0.569)	Data Time 0.001 (0.015)	Loss 3.1673 (2.9451)	Entropy 1.34752 (1.35360)	Top-1 acc 51.172 (53.390)	Top-5 acc 72.656 (75.964)	lr 0.01993
Train [36][2250/3239]	Time 0.266 (0.568)	Data Time 0.001 (0.015)	Loss 2.9819 (2.9450)	Entropy 1.34744 (1.35357)	Top-1 acc 50.781 (53.390)	Top-5 acc 73.047 (75.967)	lr 0.01993
Train [36][2260/3239]	Time 0.300 (0.568)	Data Time 0.001 (0.015)	Loss 2.9961 (2.9450)	Entropy 1.34736 (1.35355)	Top-1 acc 50.781 (53.389)	Top-5 acc 74.609 (75.963)	lr 0.01993
Train [36][2270/3239]	Time 0.224 (0.568)	Data Time 0.001 (0.015)	Loss 3.0558 (2.9452)	Entropy 1.34735 (1.35352)	Top-1 acc 49.609 (53.387)	Top-5 acc 72.266 (75.958)	lr 0.01993
Train [36][2280/3239]	Time 0.240 (0.567)	Data Time 0.002 (0.015)	Loss 2.8897 (2.9455)	Entropy 1.34721 (1.35349)	Top-1 acc 55.469 (53.380)	Top-5 acc 78.125 (75.952)	lr 0.01993
Train [36][2290/3239]	Time 0.208 (0.567)	Data Time 0.001 (0.015)	Loss 3.0172 (2.9457)	Entropy 1.34707 (1.35347)	Top-1 acc 52.344 (53.378)	Top-5 acc 73.438 (75.947)	lr 0.01992
Train [36][2300/3239]	Time 0.226 (0.566)	Data Time 0.001 (0.015)	Loss 3.0927 (2.9458)	Entropy 1.34702 (1.35344)	Top-1 acc 48.828 (53.376)	Top-5 acc 70.703 (75.947)	lr 0.01992
Train [36][2310/3239]	Time 0.200 (0.566)	Data Time 0.002 (0.015)	Loss 2.9469 (2.9456)	Entropy 1.34696 (1.35341)	Top-1 acc 55.078 (53.378)	Top-5 acc 75.781 (75.949)	lr 0.01992
Train [36][2320/3239]	Time 2.362 (0.565)	Data Time 0.001 (0.015)	Loss 2.7582 (2.9456)	Entropy 1.34696 (1.35338)	Top-1 acc 59.766 (53.379)	Top-5 acc 77.344 (75.949)	lr 0.01992
Train [36][2330/3239]	Time 0.236 (0.564)	Data Time 0.001 (0.015)	Loss 2.8585 (2.9457)	Entropy 1.34689 (1.35335)	Top-1 acc 53.906 (53.376)	Top-5 acc 77.344 (75.951)	lr 0.01992
Train [36][2340/3239]	Time 0.217 (0.563)	Data Time 0.001 (0.015)	Loss 3.1549 (2.9458)	Entropy 1.34678 (1.35333)	Top-1 acc 46.875 (53.374)	Top-5 acc 70.703 (75.949)	lr 0.01992
Train [36][2350/3239]	Time 0.214 (0.563)	Data Time 0.001 (0.015)	Loss 2.9389 (2.9455)	Entropy 1.34672 (1.35330)	Top-1 acc 54.688 (53.383)	Top-5 acc 74.219 (75.953)	lr 0.01992
Train [36][2360/3239]	Time 0.204 (0.562)	Data Time 0.001 (0.015)	Loss 3.0068 (2.9456)	Entropy 1.34669 (1.35327)	Top-1 acc 51.172 (53.381)	Top-5 acc 75.781 (75.950)	lr 0.01992
Train [36][2370/3239]	Time 0.230 (0.562)	Data Time 0.001 (0.015)	Loss 2.9114 (2.9457)	Entropy 1.34668 (1.35324)	Top-1 acc 52.734 (53.382)	Top-5 acc 74.609 (75.948)	lr 0.01992
Train [36][2380/3239]	Time 0.215 (0.561)	Data Time 0.001 (0.014)	Loss 2.7151 (2.9454)	Entropy 1.34660 (1.35321)	Top-1 acc 58.594 (53.389)	Top-5 acc 81.641 (75.956)	lr 0.01992
Train [36][2390/3239]	Time 0.224 (0.561)	Data Time 0.001 (0.014)	Loss 3.0554 (2.9454)	Entropy 1.34665 (1.35319)	Top-1 acc 50.000 (53.392)	Top-5 acc 74.219 (75.953)	lr 0.01992
Train [36][2400/3239]	Time 0.214 (0.560)	Data Time 0.001 (0.014)	Loss 3.0385 (2.9455)	Entropy 1.34679 (1.35316)	Top-1 acc 54.297 (53.389)	Top-5 acc 73.828 (75.947)	lr 0.01992
Train [36][2410/3239]	Time 0.219 (0.560)	Data Time 0.001 (0.014)	Loss 2.9003 (2.9454)	Entropy 1.34672 (1.35313)	Top-1 acc 51.172 (53.390)	Top-5 acc 76.953 (75.945)	lr 0.01991
Train [36][2420/3239]	Time 0.223 (0.560)	Data Time 0.001 (0.014)	Loss 2.8525 (2.9455)	Entropy 1.34672 (1.35311)	Top-1 acc 51.562 (53.387)	Top-5 acc 76.172 (75.942)	lr 0.01991
Train [36][2430/3239]	Time 2.482 (0.559)	Data Time 0.001 (0.014)	Loss 2.8220 (2.9453)	Entropy 1.34672 (1.35308)	Top-1 acc 52.734 (53.386)	Top-5 acc 76.953 (75.945)	lr 0.01991
Train [36][2440/3239]	Time 0.211 (0.558)	Data Time 0.001 (0.014)	Loss 3.1803 (2.9454)	Entropy 1.34665 (1.35305)	Top-1 acc 48.828 (53.386)	Top-5 acc 69.141 (75.943)	lr 0.01991
Train [36][2450/3239]	Time 0.217 (0.557)	Data Time 0.001 (0.014)	Loss 2.8638 (2.9452)	Entropy 1.34655 (1.35303)	Top-1 acc 53.125 (53.388)	Top-5 acc 80.078 (75.944)	lr 0.01991
Train [36][2460/3239]	Time 0.214 (0.557)	Data Time 0.001 (0.014)	Loss 2.9366 (2.9450)	Entropy 1.34636 (1.35300)	Top-1 acc 55.078 (53.396)	Top-5 acc 77.344 (75.948)	lr 0.01991
Train [36][2470/3239]	Time 0.235 (0.557)	Data Time 0.001 (0.014)	Loss 2.8497 (2.9454)	Entropy 1.34628 (1.35297)	Top-1 acc 52.734 (53.388)	Top-5 acc 79.297 (75.943)	lr 0.01991
Train [36][2480/3239]	Time 0.220 (0.556)	Data Time 0.001 (0.014)	Loss 3.2378 (2.9455)	Entropy 1.34621 (1.35295)	Top-1 acc 48.438 (53.387)	Top-5 acc 67.578 (75.940)	lr 0.01991
Train [36][2490/3239]	Time 0.205 (0.556)	Data Time 0.001 (0.014)	Loss 2.7979 (2.9454)	Entropy 1.34616 (1.35292)	Top-1 acc 54.297 (53.389)	Top-5 acc 81.250 (75.943)	lr 0.01991
Train [36][2500/3239]	Time 0.228 (0.555)	Data Time 0.001 (0.014)	Loss 3.0205 (2.9453)	Entropy 1.34612 (1.35289)	Top-1 acc 48.047 (53.393)	Top-5 acc 74.219 (75.947)	lr 0.01991
Train [36][2510/3239]	Time 0.212 (0.555)	Data Time 0.001 (0.014)	Loss 3.1691 (2.9454)	Entropy 1.34609 (1.35287)	Top-1 acc 48.047 (53.390)	Top-5 acc 70.703 (75.944)	lr 0.01991
Train [36][2520/3239]	Time 0.318 (0.573)	Data Time 0.002 (0.014)	Loss 3.0041 (2.9453)	Entropy 1.34603 (1.35284)	Top-1 acc 51.953 (53.389)	Top-5 acc 74.219 (75.949)	lr 0.01991
Train [36][2530/3239]	Time 0.234 (0.573)	Data Time 0.002 (0.014)	Loss 3.0383 (2.9451)	Entropy 1.34603 (1.35281)	Top-1 acc 48.438 (53.388)	Top-5 acc 74.219 (75.952)	lr 0.01991
Train [36][2540/3239]	Time 2.357 (0.572)	Data Time 0.002 (0.014)	Loss 2.8231 (2.9451)	Entropy 1.34603 (1.35278)	Top-1 acc 57.422 (53.385)	Top-5 acc 78.125 (75.955)	lr 0.01990
Train [36][2550/3239]	Time 0.281 (0.571)	Data Time 0.003 (0.014)	Loss 2.8143 (2.9450)	Entropy 1.34599 (1.35276)	Top-1 acc 55.078 (53.387)	Top-5 acc 79.297 (75.959)	lr 0.01990
Train [36][2560/3239]	Time 0.247 (0.570)	Data Time 0.001 (0.014)	Loss 2.9875 (2.9453)	Entropy 1.34596 (1.35273)	Top-1 acc 52.344 (53.378)	Top-5 acc 74.219 (75.954)	lr 0.01990
Train [36][2570/3239]	Time 0.234 (0.570)	Data Time 0.002 (0.014)	Loss 2.9703 (2.9451)	Entropy 1.34585 (1.35270)	Top-1 acc 51.953 (53.385)	Top-5 acc 77.734 (75.959)	lr 0.01990
Train [36][2580/3239]	Time 0.227 (0.570)	Data Time 0.001 (0.013)	Loss 2.9477 (2.9451)	Entropy 1.34585 (1.35268)	Top-1 acc 52.734 (53.386)	Top-5 acc 75.391 (75.956)	lr 0.01990
Train [36][2590/3239]	Time 0.214 (0.569)	Data Time 0.001 (0.013)	Loss 3.0101 (2.9451)	Entropy 1.34581 (1.35265)	Top-1 acc 57.812 (53.387)	Top-5 acc 73.438 (75.958)	lr 0.01990
Train [36][2600/3239]	Time 0.260 (0.569)	Data Time 0.001 (0.013)	Loss 2.7478 (2.9449)	Entropy 1.34578 (1.35263)	Top-1 acc 58.594 (53.392)	Top-5 acc 78.516 (75.963)	lr 0.01990
Train [36][2610/3239]	Time 0.233 (0.568)	Data Time 0.002 (0.013)	Loss 2.9769 (2.9451)	Entropy 1.34577 (1.35260)	Top-1 acc 52.734 (53.387)	Top-5 acc 75.781 (75.959)	lr 0.01990
Train [36][2620/3239]	Time 0.214 (0.568)	Data Time 0.001 (0.013)	Loss 2.9813 (2.9453)	Entropy 1.34568 (1.35257)	Top-1 acc 53.125 (53.383)	Top-5 acc 75.000 (75.953)	lr 0.01990
Train [36][2630/3239]	Time 0.213 (0.568)	Data Time 0.001 (0.013)	Loss 2.7413 (2.9452)	Entropy 1.34560 (1.35255)	Top-1 acc 55.078 (53.386)	Top-5 acc 79.688 (75.951)	lr 0.01990
Train [36][2640/3239]	Time 0.243 (0.567)	Data Time 0.001 (0.013)	Loss 3.0283 (2.9452)	Entropy 1.34564 (1.35252)	Top-1 acc 51.953 (53.383)	Top-5 acc 73.047 (75.951)	lr 0.01990
Train [36][2650/3239]	Time 0.209 (0.567)	Data Time 0.001 (0.013)	Loss 3.0219 (2.9454)	Entropy 1.34555 (1.35249)	Top-1 acc 53.516 (53.384)	Top-5 acc 74.609 (75.948)	lr 0.01990
Train [36][2660/3239]	Time 0.283 (0.566)	Data Time 0.001 (0.013)	Loss 3.0506 (2.9454)	Entropy 1.34545 (1.35247)	Top-1 acc 53.125 (53.381)	Top-5 acc 74.609 (75.947)	lr 0.01989
Train [36][2670/3239]	Time 0.253 (0.566)	Data Time 0.001 (0.013)	Loss 3.1808 (2.9457)	Entropy 1.34538 (1.35244)	Top-1 acc 46.875 (53.377)	Top-5 acc 71.484 (75.943)	lr 0.01989
Train [36][2680/3239]	Time 0.206 (0.566)	Data Time 0.001 (0.013)	Loss 2.8577 (2.9457)	Entropy 1.34536 (1.35242)	Top-1 acc 53.516 (53.377)	Top-5 acc 78.125 (75.941)	lr 0.01989
Train [36][2690/3239]	Time 0.294 (0.565)	Data Time 0.001 (0.013)	Loss 2.7402 (2.9456)	Entropy 1.34531 (1.35239)	Top-1 acc 56.641 (53.377)	Top-5 acc 78.906 (75.940)	lr 0.01989
Train [36][2700/3239]	Time 0.219 (0.565)	Data Time 0.001 (0.013)	Loss 3.0566 (2.9455)	Entropy 1.34529 (1.35236)	Top-1 acc 49.219 (53.376)	Top-5 acc 68.750 (75.938)	lr 0.01989
Train [36][2710/3239]	Time 0.251 (0.564)	Data Time 0.001 (0.013)	Loss 2.9331 (2.9456)	Entropy 1.34525 (1.35234)	Top-1 acc 55.859 (53.374)	Top-5 acc 74.609 (75.935)	lr 0.01989
Train [36][2720/3239]	Time 0.288 (0.564)	Data Time 0.001 (0.013)	Loss 2.8552 (2.9457)	Entropy 1.34510 (1.35231)	Top-1 acc 56.250 (53.371)	Top-5 acc 79.688 (75.934)	lr 0.01989
Train [36][2730/3239]	Time 0.324 (0.564)	Data Time 0.001 (0.013)	Loss 2.8030 (2.9457)	Entropy 1.34509 (1.35228)	Top-1 acc 55.859 (53.370)	Top-5 acc 81.250 (75.929)	lr 0.01989
Train [36][2740/3239]	Time 0.228 (0.563)	Data Time 0.001 (0.013)	Loss 2.8295 (2.9459)	Entropy 1.34509 (1.35226)	Top-1 acc 53.906 (53.364)	Top-5 acc 76.953 (75.927)	lr 0.01989
Train [36][2750/3239]	Time 0.263 (0.563)	Data Time 0.001 (0.013)	Loss 3.1045 (2.9459)	Entropy 1.34528 (1.35223)	Top-1 acc 48.828 (53.361)	Top-5 acc 72.656 (75.927)	lr 0.01989
Train [36][2760/3239]	Time 0.239 (0.562)	Data Time 0.001 (0.013)	Loss 2.9110 (2.9459)	Entropy 1.34516 (1.35221)	Top-1 acc 53.516 (53.358)	Top-5 acc 77.344 (75.930)	lr 0.01989
Train [36][2770/3239]	Time 0.242 (0.562)	Data Time 0.001 (0.013)	Loss 3.0327 (2.9461)	Entropy 1.34517 (1.35218)	Top-1 acc 50.000 (53.353)	Top-5 acc 75.391 (75.926)	lr 0.01989
Train [36][2780/3239]	Time 0.221 (0.562)	Data Time 0.002 (0.013)	Loss 2.9417 (2.9460)	Entropy 1.34513 (1.35216)	Top-1 acc 53.125 (53.357)	Top-5 acc 75.391 (75.926)	lr 0.01988
Train [36][2790/3239]	Time 0.216 (0.561)	Data Time 0.001 (0.013)	Loss 3.1138 (2.9462)	Entropy 1.34505 (1.35213)	Top-1 acc 47.266 (53.352)	Top-5 acc 72.656 (75.922)	lr 0.01988
Train [36][2800/3239]	Time 0.203 (0.561)	Data Time 0.001 (0.013)	Loss 3.0284 (2.9462)	Entropy 1.34499 (1.35211)	Top-1 acc 51.172 (53.355)	Top-5 acc 72.656 (75.921)	lr 0.01988
Train [36][2810/3239]	Time 0.256 (0.561)	Data Time 0.001 (0.012)	Loss 2.8844 (2.9462)	Entropy 1.34488 (1.35208)	Top-1 acc 53.125 (53.355)	Top-5 acc 78.125 (75.922)	lr 0.01988
Train [36][2820/3239]	Time 0.247 (0.560)	Data Time 0.002 (0.012)	Loss 2.9016 (2.9463)	Entropy 1.34481 (1.35206)	Top-1 acc 58.203 (53.354)	Top-5 acc 75.781 (75.919)	lr 0.01988
Train [36][2830/3239]	Time 0.252 (0.560)	Data Time 0.001 (0.012)	Loss 3.0685 (2.9463)	Entropy 1.34471 (1.35203)	Top-1 acc 50.391 (53.357)	Top-5 acc 74.219 (75.919)	lr 0.01988
Train [36][2840/3239]	Time 0.261 (0.559)	Data Time 0.001 (0.012)	Loss 2.8712 (2.9464)	Entropy 1.34464 (1.35200)	Top-1 acc 57.812 (53.353)	Top-5 acc 78.906 (75.917)	lr 0.01988
Train [36][2850/3239]	Time 0.209 (0.559)	Data Time 0.001 (0.012)	Loss 2.7329 (2.9464)	Entropy 1.34462 (1.35198)	Top-1 acc 56.641 (53.356)	Top-5 acc 81.250 (75.922)	lr 0.01988
Train [36][2860/3239]	Time 0.264 (0.572)	Data Time 0.003 (0.012)	Loss 2.9736 (2.9464)	Entropy 1.34454 (1.35195)	Top-1 acc 51.562 (53.356)	Top-5 acc 76.172 (75.921)	lr 0.01988
Train [36][2870/3239]	Time 0.228 (0.572)	Data Time 0.002 (0.012)	Loss 2.8133 (2.9465)	Entropy 1.34440 (1.35193)	Top-1 acc 60.156 (53.356)	Top-5 acc 77.734 (75.919)	lr 0.01988
Train [36][2880/3239]	Time 0.275 (0.572)	Data Time 0.002 (0.012)	Loss 2.9092 (2.9464)	Entropy 1.34437 (1.35190)	Top-1 acc 49.219 (53.354)	Top-5 acc 79.688 (75.921)	lr 0.01988
Train [36][2890/3239]	Time 0.221 (0.572)	Data Time 0.001 (0.012)	Loss 2.8610 (2.9464)	Entropy 1.34403 (1.35187)	Top-1 acc 54.688 (53.353)	Top-5 acc 79.297 (75.921)	lr 0.01988
Train [36][2900/3239]	Time 0.223 (0.571)	Data Time 0.001 (0.012)	Loss 2.8774 (2.9465)	Entropy 1.34401 (1.35185)	Top-1 acc 52.344 (53.357)	Top-5 acc 78.516 (75.918)	lr 0.01988
Train [36][2910/3239]	Time 0.219 (0.571)	Data Time 0.001 (0.012)	Loss 2.9524 (2.9466)	Entropy 1.34401 (1.35182)	Top-1 acc 55.078 (53.356)	Top-5 acc 75.000 (75.914)	lr 0.01987
Train [36][2920/3239]	Time 0.218 (0.570)	Data Time 0.001 (0.012)	Loss 2.7692 (2.9466)	Entropy 1.34397 (1.35179)	Top-1 acc 55.859 (53.355)	Top-5 acc 78.906 (75.917)	lr 0.01987
Train [36][2930/3239]	Time 0.249 (0.570)	Data Time 0.001 (0.012)	Loss 2.9412 (2.9468)	Entropy 1.34393 (1.35177)	Top-1 acc 54.688 (53.347)	Top-5 acc 76.562 (75.914)	lr 0.01987
Train [36][2940/3239]	Time 0.388 (0.570)	Data Time 0.001 (0.012)	Loss 3.0020 (2.9468)	Entropy 1.34398 (1.35174)	Top-1 acc 50.000 (53.347)	Top-5 acc 72.656 (75.912)	lr 0.01987
Train [36][2950/3239]	Time 0.263 (0.569)	Data Time 0.001 (0.012)	Loss 3.0906 (2.9469)	Entropy 1.34390 (1.35171)	Top-1 acc 49.609 (53.347)	Top-5 acc 71.875 (75.909)	lr 0.01987
Train [36][2960/3239]	Time 0.250 (0.569)	Data Time 0.001 (0.012)	Loss 2.9707 (2.9469)	Entropy 1.34381 (1.35169)	Top-1 acc 51.172 (53.344)	Top-5 acc 75.391 (75.908)	lr 0.01987
Train [36][2970/3239]	Time 0.214 (0.568)	Data Time 0.001 (0.012)	Loss 3.0051 (2.9466)	Entropy 1.34374 (1.35166)	Top-1 acc 52.734 (53.349)	Top-5 acc 75.391 (75.914)	lr 0.01987
Train [36][2980/3239]	Time 0.300 (0.568)	Data Time 0.001 (0.012)	Loss 2.7786 (2.9464)	Entropy 1.34370 (1.35163)	Top-1 acc 54.297 (53.352)	Top-5 acc 80.078 (75.919)	lr 0.01987
Train [36][2990/3239]	Time 0.305 (0.568)	Data Time 0.001 (0.012)	Loss 2.9435 (2.9463)	Entropy 1.34368 (1.35161)	Top-1 acc 51.562 (53.356)	Top-5 acc 74.219 (75.921)	lr 0.01987
Train [36][3000/3239]	Time 0.210 (0.567)	Data Time 0.001 (0.012)	Loss 2.9569 (2.9465)	Entropy 1.34344 (1.35158)	Top-1 acc 53.906 (53.350)	Top-5 acc 76.953 (75.917)	lr 0.01987
Train [36][3010/3239]	Time 0.287 (0.567)	Data Time 0.001 (0.012)	Loss 3.0427 (2.9464)	Entropy 1.34342 (1.35155)	Top-1 acc 51.953 (53.353)	Top-5 acc 73.047 (75.917)	lr 0.01987
Train [36][3020/3239]	Time 0.245 (0.567)	Data Time 0.001 (0.012)	Loss 2.7865 (2.9463)	Entropy 1.34344 (1.35153)	Top-1 acc 60.938 (53.351)	Top-5 acc 77.734 (75.920)	lr 0.01987
Train [36][3030/3239]	Time 0.311 (0.566)	Data Time 0.002 (0.012)	Loss 3.1823 (2.9465)	Entropy 1.34340 (1.35150)	Top-1 acc 50.000 (53.348)	Top-5 acc 72.266 (75.916)	lr 0.01986
Train [36][3040/3239]	Time 0.217 (0.566)	Data Time 0.001 (0.012)	Loss 2.9351 (2.9467)	Entropy 1.34328 (1.35147)	Top-1 acc 50.781 (53.343)	Top-5 acc 78.516 (75.911)	lr 0.01986
Train [36][3050/3239]	Time 0.238 (0.565)	Data Time 0.001 (0.012)	Loss 2.8637 (2.9468)	Entropy 1.34328 (1.35145)	Top-1 acc 55.078 (53.338)	Top-5 acc 76.172 (75.905)	lr 0.01986
Train [36][3060/3239]	Time 0.202 (0.565)	Data Time 0.001 (0.012)	Loss 3.0461 (2.9468)	Entropy 1.34328 (1.35142)	Top-1 acc 51.953 (53.340)	Top-5 acc 72.656 (75.904)	lr 0.01986
Train [36][3070/3239]	Time 0.240 (0.565)	Data Time 0.001 (0.012)	Loss 2.9613 (2.9470)	Entropy 1.34317 (1.35139)	Top-1 acc 51.172 (53.334)	Top-5 acc 76.172 (75.899)	lr 0.01986
Train [36][3080/3239]	Time 0.264 (0.564)	Data Time 0.002 (0.012)	Loss 2.9994 (2.9468)	Entropy 1.34315 (1.35137)	Top-1 acc 50.391 (53.337)	Top-5 acc 73.828 (75.902)	lr 0.01986
Train [36][3090/3239]	Time 0.221 (0.564)	Data Time 0.001 (0.012)	Loss 3.1537 (2.9467)	Entropy 1.34304 (1.35134)	Top-1 acc 49.609 (53.343)	Top-5 acc 73.438 (75.903)	lr 0.01986
Train [36][3100/3239]	Time 0.180 (0.564)	Data Time 0.001 (0.011)	Loss 2.9582 (2.9467)	Entropy 1.34301 (1.35131)	Top-1 acc 51.562 (53.340)	Top-5 acc 74.609 (75.900)	lr 0.01986
Train [36][3110/3239]	Time 0.230 (0.563)	Data Time 0.001 (0.011)	Loss 2.7626 (2.9467)	Entropy 1.34293 (1.35129)	Top-1 acc 59.375 (53.340)	Top-5 acc 78.125 (75.900)	lr 0.01986
Train [36][3120/3239]	Time 0.231 (0.563)	Data Time 0.001 (0.011)	Loss 3.1793 (2.9467)	Entropy 1.34294 (1.35126)	Top-1 acc 47.656 (53.339)	Top-5 acc 73.438 (75.902)	lr 0.01986
Train [36][3130/3239]	Time 0.179 (0.562)	Data Time 0.002 (0.011)	Loss 2.9468 (2.9469)	Entropy 1.34295 (1.35123)	Top-1 acc 52.344 (53.334)	Top-5 acc 76.953 (75.897)	lr 0.01986
Train [36][3140/3239]	Time 0.245 (0.562)	Data Time 0.001 (0.011)	Loss 2.5904 (2.9466)	Entropy 1.34280 (1.35121)	Top-1 acc 61.328 (53.341)	Top-5 acc 84.375 (75.903)	lr 0.01986
Train [36][3150/3239]	Time 0.217 (0.562)	Data Time 0.001 (0.011)	Loss 2.8694 (2.9467)	Entropy 1.34278 (1.35118)	Top-1 acc 55.078 (53.339)	Top-5 acc 76.562 (75.900)	lr 0.01985
Train [36][3160/3239]	Time 0.247 (0.561)	Data Time 0.001 (0.011)	Loss 2.7482 (2.9467)	Entropy 1.34277 (1.35115)	Top-1 acc 57.422 (53.339)	Top-5 acc 79.297 (75.898)	lr 0.01985
Train [36][3170/3239]	Time 0.220 (0.561)	Data Time 0.001 (0.011)	Loss 3.2314 (2.9467)	Entropy 1.34274 (1.35113)	Top-1 acc 44.141 (53.340)	Top-5 acc 67.578 (75.897)	lr 0.01985
Train [36][3180/3239]	Time 0.219 (0.561)	Data Time 0.000 (0.011)	Loss 3.0581 (2.9468)	Entropy 1.34282 (1.35110)	Top-1 acc 52.734 (53.336)	Top-5 acc 72.266 (75.891)	lr 0.01985
Train [36][3190/3239]	Time 0.304 (0.573)	Data Time 0.000 (0.011)	Loss 3.0860 (2.9469)	Entropy 1.34278 (1.35108)	Top-1 acc 55.859 (53.333)	Top-5 acc 76.172 (75.890)	lr 0.01985
Train [36][3200/3239]	Time 0.229 (0.573)	Data Time 0.000 (0.011)	Loss 3.0370 (2.9469)	Entropy 1.34271 (1.35105)	Top-1 acc 50.000 (53.333)	Top-5 acc 75.000 (75.890)	lr 0.01985
Train [36][3210/3239]	Time 0.217 (0.573)	Data Time 0.000 (0.011)	Loss 2.8408 (2.9470)	Entropy 1.34253 (1.35102)	Top-1 acc 54.297 (53.334)	Top-5 acc 78.906 (75.890)	lr 0.01985
Train [36][3220/3239]	Time 0.244 (0.572)	Data Time 0.000 (0.011)	Loss 2.9423 (2.9469)	Entropy 1.34242 (1.35100)	Top-1 acc 51.562 (53.334)	Top-5 acc 73.828 (75.889)	lr 0.01985
Train [36][3230/3239]	Time 0.238 (0.572)	Data Time 0.000 (0.011)	Loss 2.9201 (2.9469)	Entropy 1.34234 (1.35097)	Top-1 acc 55.078 (53.334)	Top-5 acc 79.297 (75.889)	lr 0.01985
Train [36][3239/3239]	Time 2.192 (0.571)	Data Time 0.000 (0.011)	Loss 3.3499 (2.9468)	Entropy 1.34234 (1.35095)	Top-1 acc 45.679 (53.334)	Top-5 acc 64.198 (75.890)	lr 0.01985
==========Valid [36/120]	loss 1.752	top-1 acc 60.813 (60.813)	top-5 acc 82.226	Train top-1 53.334	top-5 75.890	Entropy 1.34234	Latency-None: 0.000ms	Flops: 559.17M
Train [37][0/3239]	Time 33.040 (33.040)	Data Time 32.135 (32.135)	Loss 3.0255 (3.0255)	Entropy 1.34230 (1.34230)	Top-1 acc 50.781 (50.781)	Top-5 acc 72.656 (72.656)	lr 0.01985
Train [37][10/3239]	Time 2.808 (3.575)	Data Time 0.031 (2.928)	Loss 2.9056 (2.9311)	Entropy 1.34230 (1.34230)	Top-1 acc 56.250 (53.267)	Top-5 acc 77.344 (75.320)	lr 0.01985
Train [37][20/3239]	Time 0.231 (1.985)	Data Time 0.001 (1.535)	Loss 2.8260 (2.9170)	Entropy 1.34225 (1.34227)	Top-1 acc 51.562 (53.144)	Top-5 acc 81.250 (76.042)	lr 0.01985
Train [37][30/3239]	Time 0.216 (1.493)	Data Time 0.002 (1.040)	Loss 3.0164 (2.9382)	Entropy 1.34220 (1.34225)	Top-1 acc 51.953 (52.823)	Top-5 acc 74.609 (75.819)	lr 0.01984
Train [37][40/3239]	Time 0.229 (1.240)	Data Time 0.001 (0.787)	Loss 2.8105 (2.9387)	Entropy 1.34215 (1.34223)	Top-1 acc 54.297 (53.106)	Top-5 acc 78.516 (75.972)	lr 0.01984
Train [37][50/3239]	Time 0.213 (1.089)	Data Time 0.001 (0.633)	Loss 2.8421 (2.9298)	Entropy 1.34209 (1.34220)	Top-1 acc 53.516 (53.133)	Top-5 acc 78.906 (76.164)	lr 0.01984
Train [37][60/3239]	Time 0.318 (0.985)	Data Time 0.001 (0.529)	Loss 2.7947 (2.9173)	Entropy 1.34207 (1.34218)	Top-1 acc 56.250 (53.356)	Top-5 acc 79.688 (76.402)	lr 0.01984
Train [37][70/3239]	Time 0.217 (0.909)	Data Time 0.001 (0.455)	Loss 3.1149 (2.9123)	Entropy 1.34205 (1.34217)	Top-1 acc 51.172 (53.631)	Top-5 acc 71.875 (76.496)	lr 0.01984
Train [37][80/3239]	Time 0.226 (0.852)	Data Time 0.001 (0.399)	Loss 2.8061 (2.9119)	Entropy 1.34192 (1.34215)	Top-1 acc 58.594 (53.762)	Top-5 acc 81.250 (76.524)	lr 0.01984
Train [37][90/3239]	Time 0.211 (0.808)	Data Time 0.001 (0.355)	Loss 3.1101 (2.9024)	Entropy 1.34174 (1.34212)	Top-1 acc 51.562 (54.009)	Top-5 acc 73.438 (76.678)	lr 0.01984
Train [37][100/3239]	Time 0.217 (0.772)	Data Time 0.001 (0.320)	Loss 2.9174 (2.9115)	Entropy 1.34168 (1.34208)	Top-1 acc 57.812 (53.953)	Top-5 acc 75.000 (76.555)	lr 0.01984
Train [37][110/3239]	Time 0.348 (0.744)	Data Time 0.001 (0.292)	Loss 2.8010 (2.9105)	Entropy 1.34168 (1.34204)	Top-1 acc 53.906 (53.991)	Top-5 acc 80.859 (76.524)	lr 0.01984
Train [37][120/3239]	Time 2.426 (0.720)	Data Time 0.001 (0.268)	Loss 2.8054 (2.9153)	Entropy 1.34168 (1.34201)	Top-1 acc 55.859 (53.922)	Top-5 acc 79.297 (76.391)	lr 0.01984
Train [37][130/3239]	Time 0.237 (0.682)	Data Time 0.001 (0.247)	Loss 2.9988 (2.9171)	Entropy 1.34159 (1.34198)	Top-1 acc 51.562 (53.942)	Top-5 acc 72.656 (76.354)	lr 0.01984
Train [37][140/3239]	Time 0.221 (0.665)	Data Time 0.002 (0.230)	Loss 2.9751 (2.9165)	Entropy 1.34146 (1.34194)	Top-1 acc 54.688 (53.973)	Top-5 acc 74.219 (76.363)	lr 0.01984
Train [37][150/3239]	Time 0.210 (0.651)	Data Time 0.001 (0.215)	Loss 2.8082 (2.9162)	Entropy 1.34142 (1.34191)	Top-1 acc 55.078 (54.041)	Top-5 acc 80.859 (76.384)	lr 0.01984
Train [37][160/3239]	Time 0.260 (0.639)	Data Time 0.001 (0.202)	Loss 2.9495 (2.9162)	Entropy 1.34135 (1.34188)	Top-1 acc 51.953 (53.996)	Top-5 acc 78.125 (76.448)	lr 0.01983
Train [37][170/3239]	Time 0.219 (0.629)	Data Time 0.001 (0.190)	Loss 2.7659 (2.9143)	Entropy 1.34135 (1.34184)	Top-1 acc 58.984 (54.068)	Top-5 acc 78.906 (76.480)	lr 0.01983
Train [37][180/3239]	Time 0.221 (0.619)	Data Time 0.002 (0.180)	Loss 2.8906 (2.9177)	Entropy 1.34135 (1.34182)	Top-1 acc 54.297 (54.010)	Top-5 acc 76.953 (76.427)	lr 0.01983
Train [37][190/3239]	Time 0.266 (0.611)	Data Time 0.001 (0.170)	Loss 2.9911 (2.9193)	Entropy 1.34133 (1.34179)	Top-1 acc 51.562 (53.974)	Top-5 acc 75.391 (76.434)	lr 0.01983
Train [37][200/3239]	Time 0.221 (0.603)	Data Time 0.001 (0.162)	Loss 2.9430 (2.9228)	Entropy 1.34124 (1.34177)	Top-1 acc 56.250 (53.877)	Top-5 acc 78.516 (76.374)	lr 0.01983
Train [37][210/3239]	Time 0.217 (0.596)	Data Time 0.002 (0.154)	Loss 2.9122 (2.9213)	Entropy 1.34123 (1.34174)	Top-1 acc 50.781 (53.919)	Top-5 acc 78.125 (76.409)	lr 0.01983
Train [37][220/3239]	Time 0.211 (0.590)	Data Time 0.001 (0.147)	Loss 2.8951 (2.9227)	Entropy 1.34119 (1.34172)	Top-1 acc 51.172 (53.834)	Top-5 acc 77.344 (76.375)	lr 0.01983
Train [37][230/3239]	Time 2.407 (0.583)	Data Time 0.001 (0.141)	Loss 2.6459 (2.9207)	Entropy 1.34119 (1.34170)	Top-1 acc 57.812 (53.883)	Top-5 acc 84.375 (76.414)	lr 0.01983
Train [37][240/3239]	Time 0.209 (0.569)	Data Time 0.002 (0.135)	Loss 2.9215 (2.9208)	Entropy 1.34116 (1.34167)	Top-1 acc 55.859 (53.866)	Top-5 acc 76.953 (76.420)	lr 0.01983
Train [37][250/3239]	Time 0.209 (0.564)	Data Time 0.001 (0.130)	Loss 3.0564 (2.9207)	Entropy 1.34114 (1.34165)	Top-1 acc 52.344 (53.930)	Top-5 acc 74.219 (76.426)	lr 0.01983
Train [37][260/3239]	Time 0.209 (0.560)	Data Time 0.001 (0.125)	Loss 2.6904 (2.9190)	Entropy 1.34113 (1.34163)	Top-1 acc 62.891 (54.050)	Top-5 acc 81.641 (76.434)	lr 0.01983
Train [37][270/3239]	Time 0.307 (0.557)	Data Time 0.001 (0.120)	Loss 2.8279 (2.9184)	Entropy 1.34108 (1.34161)	Top-1 acc 53.125 (54.092)	Top-5 acc 75.000 (76.420)	lr 0.01983
Train [37][280/3239]	Time 0.222 (0.552)	Data Time 0.001 (0.116)	Loss 3.0580 (2.9167)	Entropy 1.34104 (1.34159)	Top-1 acc 51.953 (54.136)	Top-5 acc 73.438 (76.465)	lr 0.01982
Train [37][290/3239]	Time 0.206 (0.549)	Data Time 0.001 (0.112)	Loss 2.8787 (2.9173)	Entropy 1.34098 (1.34157)	Top-1 acc 54.297 (54.125)	Top-5 acc 77.344 (76.462)	lr 0.01982
Train [37][300/3239]	Time 0.226 (0.546)	Data Time 0.001 (0.109)	Loss 2.8957 (2.9175)	Entropy 1.34094 (1.34155)	Top-1 acc 54.688 (54.106)	Top-5 acc 77.344 (76.457)	lr 0.01982
Train [37][310/3239]	Time 0.223 (0.695)	Data Time 0.002 (0.105)	Loss 2.8990 (2.9186)	Entropy 1.34095 (1.34153)	Top-1 acc 49.609 (54.053)	Top-5 acc 76.953 (76.423)	lr 0.01982
Train [37][320/3239]	Time 0.228 (0.688)	Data Time 0.008 (0.102)	Loss 3.1049 (2.9175)	Entropy 1.34091 (1.34152)	Top-1 acc 48.828 (54.079)	Top-5 acc 74.609 (76.441)	lr 0.01982
Train [37][330/3239]	Time 0.262 (0.681)	Data Time 0.002 (0.099)	Loss 2.8429 (2.9168)	Entropy 1.34082 (1.34150)	Top-1 acc 56.250 (54.074)	Top-5 acc 76.172 (76.456)	lr 0.01982
Train [37][340/3239]	Time 2.620 (0.675)	Data Time 0.001 (0.096)	Loss 2.7649 (2.9170)	Entropy 1.34082 (1.34148)	Top-1 acc 54.688 (54.071)	Top-5 acc 80.078 (76.450)	lr 0.01982
Train [37][350/3239]	Time 0.231 (0.662)	Data Time 0.001 (0.093)	Loss 2.8680 (2.9179)	Entropy 1.34079 (1.34146)	Top-1 acc 58.594 (54.081)	Top-5 acc 77.344 (76.442)	lr 0.01982
Train [37][360/3239]	Time 0.225 (0.656)	Data Time 0.001 (0.091)	Loss 2.7234 (2.9172)	Entropy 1.34073 (1.34144)	Top-1 acc 61.328 (54.124)	Top-5 acc 81.641 (76.449)	lr 0.01982
Train [37][370/3239]	Time 0.346 (0.651)	Data Time 0.002 (0.088)	Loss 2.9115 (2.9173)	Entropy 1.34067 (1.34142)	Top-1 acc 54.688 (54.129)	Top-5 acc 75.781 (76.441)	lr 0.01982
Train [37][380/3239]	Time 0.230 (0.646)	Data Time 0.001 (0.086)	Loss 2.7732 (2.9164)	Entropy 1.34058 (1.34140)	Top-1 acc 55.078 (54.132)	Top-5 acc 81.250 (76.437)	lr 0.01982
Train [37][390/3239]	Time 0.235 (0.641)	Data Time 0.002 (0.084)	Loss 2.9810 (2.9156)	Entropy 1.34052 (1.34137)	Top-1 acc 52.344 (54.156)	Top-5 acc 72.656 (76.429)	lr 0.01982
Train [37][400/3239]	Time 0.244 (0.636)	Data Time 0.002 (0.082)	Loss 2.9353 (2.9138)	Entropy 1.34048 (1.34135)	Top-1 acc 52.344 (54.216)	Top-5 acc 78.906 (76.467)	lr 0.01981
Train [37][410/3239]	Time 0.201 (0.632)	Data Time 0.001 (0.080)	Loss 2.8833 (2.9149)	Entropy 1.34043 (1.34133)	Top-1 acc 58.984 (54.202)	Top-5 acc 75.000 (76.453)	lr 0.01981
Train [37][420/3239]	Time 0.226 (0.628)	Data Time 0.001 (0.078)	Loss 2.7888 (2.9151)	Entropy 1.34033 (1.34131)	Top-1 acc 55.859 (54.187)	Top-5 acc 76.953 (76.442)	lr 0.01981
Train [37][430/3239]	Time 0.219 (0.624)	Data Time 0.001 (0.076)	Loss 2.9391 (2.9156)	Entropy 1.34018 (1.34128)	Top-1 acc 53.906 (54.184)	Top-5 acc 76.562 (76.437)	lr 0.01981
Train [37][440/3239]	Time 0.244 (0.620)	Data Time 0.002 (0.075)	Loss 2.9098 (2.9153)	Entropy 1.34017 (1.34126)	Top-1 acc 58.203 (54.198)	Top-5 acc 75.781 (76.444)	lr 0.01981
Train [37][450/3239]	Time 2.446 (0.616)	Data Time 0.001 (0.073)	Loss 2.8630 (2.9159)	Entropy 1.34017 (1.34124)	Top-1 acc 54.297 (54.163)	Top-5 acc 78.516 (76.418)	lr 0.01981
Train [37][460/3239]	Time 0.215 (0.608)	Data Time 0.001 (0.072)	Loss 2.8336 (2.9158)	Entropy 1.34012 (1.34121)	Top-1 acc 57.422 (54.170)	Top-5 acc 77.344 (76.426)	lr 0.01981
Train [37][470/3239]	Time 0.214 (0.604)	Data Time 0.001 (0.070)	Loss 2.9487 (2.9156)	Entropy 1.34009 (1.34119)	Top-1 acc 52.344 (54.179)	Top-5 acc 75.781 (76.422)	lr 0.01981
Train [37][480/3239]	Time 0.288 (0.601)	Data Time 0.001 (0.069)	Loss 2.9379 (2.9164)	Entropy 1.34008 (1.34116)	Top-1 acc 55.859 (54.168)	Top-5 acc 76.172 (76.412)	lr 0.01981
Train [37][490/3239]	Time 0.213 (0.598)	Data Time 0.001 (0.067)	Loss 2.9577 (2.9154)	Entropy 1.33997 (1.34114)	Top-1 acc 49.219 (54.182)	Top-5 acc 76.562 (76.426)	lr 0.01981
Train [37][500/3239]	Time 0.164 (0.595)	Data Time 0.001 (0.066)	Loss 2.8593 (2.9155)	Entropy 1.33982 (1.34112)	Top-1 acc 54.688 (54.206)	Top-5 acc 76.953 (76.408)	lr 0.01981
Train [37][510/3239]	Time 0.200 (0.592)	Data Time 0.001 (0.065)	Loss 3.0732 (2.9148)	Entropy 1.33970 (1.34109)	Top-1 acc 50.781 (54.210)	Top-5 acc 72.656 (76.428)	lr 0.01981
Train [37][520/3239]	Time 0.257 (0.589)	Data Time 0.001 (0.064)	Loss 3.1518 (2.9145)	Entropy 1.33963 (1.34106)	Top-1 acc 50.391 (54.236)	Top-5 acc 74.609 (76.422)	lr 0.01980
Train [37][530/3239]	Time 0.206 (0.587)	Data Time 0.001 (0.062)	Loss 2.9396 (2.9132)	Entropy 1.33957 (1.34103)	Top-1 acc 51.172 (54.242)	Top-5 acc 77.344 (76.450)	lr 0.01980
Train [37][540/3239]	Time 0.206 (0.584)	Data Time 0.001 (0.061)	Loss 2.9630 (2.9151)	Entropy 1.33956 (1.34101)	Top-1 acc 50.000 (54.186)	Top-5 acc 73.828 (76.422)	lr 0.01980
Train [37][550/3239]	Time 0.249 (0.582)	Data Time 0.001 (0.060)	Loss 2.9775 (2.9143)	Entropy 1.33955 (1.34098)	Top-1 acc 48.047 (54.181)	Top-5 acc 76.562 (76.448)	lr 0.01980
Train [37][560/3239]	Time 2.400 (0.579)	Data Time 0.001 (0.059)	Loss 2.9900 (2.9154)	Entropy 1.33955 (1.34096)	Top-1 acc 51.172 (54.170)	Top-5 acc 73.047 (76.422)	lr 0.01980
Train [37][570/3239]	Time 0.229 (0.573)	Data Time 0.001 (0.058)	Loss 2.8973 (2.9160)	Entropy 1.33951 (1.34093)	Top-1 acc 53.516 (54.168)	Top-5 acc 76.562 (76.412)	lr 0.01980
Train [37][580/3239]	Time 0.268 (0.571)	Data Time 0.001 (0.057)	Loss 2.8250 (2.9153)	Entropy 1.33941 (1.34090)	Top-1 acc 56.250 (54.172)	Top-5 acc 78.906 (76.431)	lr 0.01980
Train [37][590/3239]	Time 0.318 (0.570)	Data Time 0.001 (0.056)	Loss 2.9093 (2.9161)	Entropy 1.33937 (1.34088)	Top-1 acc 54.688 (54.155)	Top-5 acc 77.344 (76.411)	lr 0.01980
Train [37][600/3239]	Time 0.160 (0.568)	Data Time 0.001 (0.055)	Loss 2.9533 (2.9151)	Entropy 1.33947 (1.34085)	Top-1 acc 52.734 (54.183)	Top-5 acc 76.953 (76.437)	lr 0.01980
Train [37][610/3239]	Time 0.205 (0.566)	Data Time 0.001 (0.055)	Loss 2.9622 (2.9145)	Entropy 1.33942 (1.34083)	Top-1 acc 55.078 (54.196)	Top-5 acc 73.047 (76.450)	lr 0.01980
Train [37][620/3239]	Time 0.237 (0.564)	Data Time 0.001 (0.054)	Loss 2.7555 (2.9138)	Entropy 1.33938 (1.34081)	Top-1 acc 57.031 (54.224)	Top-5 acc 79.688 (76.456)	lr 0.01980
Train [37][630/3239]	Time 0.216 (0.562)	Data Time 0.001 (0.053)	Loss 3.0126 (2.9146)	Entropy 1.33935 (1.34079)	Top-1 acc 50.000 (54.198)	Top-5 acc 73.047 (76.437)	lr 0.01980
Train [37][640/3239]	Time 0.297 (0.560)	Data Time 0.001 (0.052)	Loss 2.8748 (2.9147)	Entropy 1.33929 (1.34076)	Top-1 acc 52.344 (54.199)	Top-5 acc 76.562 (76.442)	lr 0.01979
Train [37][650/3239]	Time 0.218 (0.559)	Data Time 0.001 (0.051)	Loss 3.0317 (2.9149)	Entropy 1.33923 (1.34074)	Top-1 acc 55.078 (54.201)	Top-5 acc 75.000 (76.445)	lr 0.01979
Train [37][660/3239]	Time 0.247 (0.557)	Data Time 0.001 (0.051)	Loss 2.8596 (2.9140)	Entropy 1.33906 (1.34072)	Top-1 acc 53.516 (54.211)	Top-5 acc 79.297 (76.468)	lr 0.01979
Train [37][670/3239]	Time 45.664 (0.620)	Data Time 0.001 (0.050)	Loss 3.0525 (2.9140)	Entropy 1.33906 (1.34069)	Top-1 acc 52.344 (54.202)	Top-5 acc 72.266 (76.471)	lr 0.01979
Train [37][680/3239]	Time 0.359 (0.616)	Data Time 0.002 (0.049)	Loss 2.8763 (2.9138)	Entropy 1.33903 (1.34067)	Top-1 acc 52.734 (54.206)	Top-5 acc 78.125 (76.460)	lr 0.01979
Train [37][690/3239]	Time 0.314 (0.614)	Data Time 0.001 (0.048)	Loss 2.9336 (2.9144)	Entropy 1.33890 (1.34064)	Top-1 acc 57.031 (54.192)	Top-5 acc 74.219 (76.437)	lr 0.01979
Train [37][700/3239]	Time 0.229 (0.611)	Data Time 0.002 (0.048)	Loss 2.7363 (2.9142)	Entropy 1.33883 (1.34062)	Top-1 acc 60.156 (54.194)	Top-5 acc 79.297 (76.447)	lr 0.01979
Train [37][710/3239]	Time 0.226 (0.609)	Data Time 0.001 (0.047)	Loss 2.8812 (2.9149)	Entropy 1.33865 (1.34059)	Top-1 acc 55.859 (54.178)	Top-5 acc 76.953 (76.439)	lr 0.01979
Train [37][720/3239]	Time 0.168 (0.606)	Data Time 0.001 (0.046)	Loss 2.8894 (2.9143)	Entropy 1.33853 (1.34056)	Top-1 acc 55.469 (54.187)	Top-5 acc 78.125 (76.457)	lr 0.01979
Train [37][730/3239]	Time 0.211 (0.604)	Data Time 0.001 (0.046)	Loss 2.7856 (2.9143)	Entropy 1.33845 (1.34053)	Top-1 acc 55.469 (54.181)	Top-5 acc 78.516 (76.462)	lr 0.01979
Train [37][740/3239]	Time 0.279 (0.602)	Data Time 0.001 (0.045)	Loss 2.9334 (2.9146)	Entropy 1.33842 (1.34051)	Top-1 acc 58.203 (54.175)	Top-5 acc 75.000 (76.451)	lr 0.01979
Train [37][750/3239]	Time 0.214 (0.600)	Data Time 0.002 (0.045)	Loss 2.9945 (2.9143)	Entropy 1.33836 (1.34048)	Top-1 acc 54.297 (54.201)	Top-5 acc 74.219 (76.453)	lr 0.01979
Train [37][760/3239]	Time 0.211 (0.598)	Data Time 0.001 (0.044)	Loss 2.7165 (2.9143)	Entropy 1.33831 (1.34045)	Top-1 acc 58.594 (54.203)	Top-5 acc 80.078 (76.444)	lr 0.01979
Train [37][770/3239]	Time 0.253 (0.596)	Data Time 0.001 (0.044)	Loss 3.0282 (2.9145)	Entropy 1.33830 (1.34042)	Top-1 acc 48.047 (54.191)	Top-5 acc 75.781 (76.443)	lr 0.01978
Train [37][780/3239]	Time 2.429 (0.595)	Data Time 0.001 (0.043)	Loss 2.9807 (2.9154)	Entropy 1.33830 (1.34039)	Top-1 acc 54.297 (54.175)	Top-5 acc 76.562 (76.427)	lr 0.01978
Train [37][790/3239]	Time 0.222 (0.590)	Data Time 0.001 (0.043)	Loss 3.0363 (2.9159)	Entropy 1.33827 (1.34037)	Top-1 acc 54.297 (54.176)	Top-5 acc 75.000 (76.416)	lr 0.01978
Train [37][800/3239]	Time 0.212 (0.589)	Data Time 0.001 (0.042)	Loss 2.8690 (2.9162)	Entropy 1.33810 (1.34034)	Top-1 acc 55.078 (54.144)	Top-5 acc 77.344 (76.416)	lr 0.01978
Train [37][810/3239]	Time 0.203 (0.587)	Data Time 0.001 (0.042)	Loss 2.8657 (2.9171)	Entropy 1.33807 (1.34031)	Top-1 acc 58.594 (54.121)	Top-5 acc 75.781 (76.397)	lr 0.01978
Train [37][820/3239]	Time 0.224 (0.585)	Data Time 0.001 (0.041)	Loss 2.8411 (2.9169)	Entropy 1.33754 (1.34028)	Top-1 acc 53.516 (54.129)	Top-5 acc 74.609 (76.391)	lr 0.01978
Train [37][830/3239]	Time 0.219 (0.584)	Data Time 0.001 (0.041)	Loss 2.9146 (2.9177)	Entropy 1.33752 (1.34025)	Top-1 acc 53.906 (54.113)	Top-5 acc 77.344 (76.372)	lr 0.01978
Train [37][840/3239]	Time 0.268 (0.582)	Data Time 0.001 (0.040)	Loss 2.9760 (2.9173)	Entropy 1.33748 (1.34021)	Top-1 acc 50.391 (54.119)	Top-5 acc 75.781 (76.379)	lr 0.01978
Train [37][850/3239]	Time 0.316 (0.580)	Data Time 0.001 (0.040)	Loss 3.1123 (2.9177)	Entropy 1.33741 (1.34018)	Top-1 acc 50.391 (54.100)	Top-5 acc 71.484 (76.362)	lr 0.01978
Train [37][860/3239]	Time 0.167 (0.579)	Data Time 0.001 (0.039)	Loss 2.9047 (2.9172)	Entropy 1.33730 (1.34015)	Top-1 acc 54.688 (54.114)	Top-5 acc 75.000 (76.377)	lr 0.01978
Train [37][870/3239]	Time 0.229 (0.577)	Data Time 0.001 (0.039)	Loss 2.9370 (2.9174)	Entropy 1.33721 (1.34012)	Top-1 acc 55.469 (54.102)	Top-5 acc 76.562 (76.372)	lr 0.01978
Train [37][880/3239]	Time 0.235 (0.576)	Data Time 0.001 (0.038)	Loss 3.0622 (2.9170)	Entropy 1.33728 (1.34008)	Top-1 acc 50.000 (54.100)	Top-5 acc 74.219 (76.373)	lr 0.01978
Train [37][890/3239]	Time 2.377 (0.574)	Data Time 0.002 (0.038)	Loss 2.8559 (2.9173)	Entropy 1.33728 (1.34005)	Top-1 acc 56.250 (54.099)	Top-5 acc 75.000 (76.356)	lr 0.01977
Train [37][900/3239]	Time 0.328 (0.571)	Data Time 0.001 (0.038)	Loss 2.8979 (2.9171)	Entropy 1.33721 (1.34002)	Top-1 acc 52.734 (54.107)	Top-5 acc 75.781 (76.352)	lr 0.01977
Train [37][910/3239]	Time 0.219 (0.569)	Data Time 0.001 (0.037)	Loss 3.1594 (2.9178)	Entropy 1.33709 (1.33999)	Top-1 acc 50.000 (54.090)	Top-5 acc 73.438 (76.337)	lr 0.01977
Train [37][920/3239]	Time 0.257 (0.568)	Data Time 0.001 (0.037)	Loss 3.1104 (2.9182)	Entropy 1.33703 (1.33996)	Top-1 acc 49.219 (54.085)	Top-5 acc 73.047 (76.325)	lr 0.01977
Train [37][930/3239]	Time 0.230 (0.567)	Data Time 0.001 (0.036)	Loss 2.9854 (2.9181)	Entropy 1.33706 (1.33992)	Top-1 acc 50.000 (54.082)	Top-5 acc 77.734 (76.331)	lr 0.01977
Train [37][940/3239]	Time 0.227 (0.565)	Data Time 0.001 (0.036)	Loss 3.2810 (2.9185)	Entropy 1.33697 (1.33989)	Top-1 acc 44.922 (54.063)	Top-5 acc 69.141 (76.328)	lr 0.01977
Train [37][950/3239]	Time 0.314 (0.564)	Data Time 0.002 (0.036)	Loss 2.8873 (2.9182)	Entropy 1.33687 (1.33986)	Top-1 acc 56.641 (54.078)	Top-5 acc 75.391 (76.333)	lr 0.01977
Train [37][960/3239]	Time 0.227 (0.563)	Data Time 0.001 (0.035)	Loss 3.1512 (2.9190)	Entropy 1.33685 (1.33983)	Top-1 acc 47.266 (54.043)	Top-5 acc 70.312 (76.323)	lr 0.01977
Train [37][970/3239]	Time 0.222 (0.562)	Data Time 0.001 (0.035)	Loss 2.8863 (2.9194)	Entropy 1.33678 (1.33980)	Top-1 acc 55.859 (54.037)	Top-5 acc 78.516 (76.318)	lr 0.01977
Train [37][980/3239]	Time 0.234 (0.561)	Data Time 0.001 (0.035)	Loss 2.9015 (2.9193)	Entropy 1.33673 (1.33977)	Top-1 acc 52.344 (54.035)	Top-5 acc 76.172 (76.313)	lr 0.01977
Train [37][990/3239]	Time 0.222 (0.560)	Data Time 0.002 (0.034)	Loss 2.7709 (2.9189)	Entropy 1.33671 (1.33974)	Top-1 acc 58.594 (54.033)	Top-5 acc 80.859 (76.323)	lr 0.01977
Train [37][1000/3239]	Time 2.526 (0.559)	Data Time 0.001 (0.034)	Loss 3.2877 (2.9196)	Entropy 1.33671 (1.33971)	Top-1 acc 47.266 (54.020)	Top-5 acc 71.094 (76.320)	lr 0.01977
Train [37][1010/3239]	Time 0.226 (0.556)	Data Time 0.001 (0.034)	Loss 2.9854 (2.9198)	Entropy 1.33659 (1.33968)	Top-1 acc 51.562 (54.016)	Top-5 acc 74.609 (76.316)	lr 0.01976
Train [37][1020/3239]	Time 0.232 (0.555)	Data Time 0.001 (0.033)	Loss 3.0461 (2.9200)	Entropy 1.33649 (1.33965)	Top-1 acc 51.953 (54.008)	Top-5 acc 74.609 (76.311)	lr 0.01976
Train [37][1030/3239]	Time 0.221 (0.554)	Data Time 0.001 (0.033)	Loss 3.0727 (2.9197)	Entropy 1.33647 (1.33962)	Top-1 acc 47.266 (54.013)	Top-5 acc 71.094 (76.316)	lr 0.01976
Train [37][1040/3239]	Time 0.252 (0.596)	Data Time 0.003 (0.033)	Loss 2.7784 (2.9197)	Entropy 1.33640 (1.33959)	Top-1 acc 57.812 (54.009)	Top-5 acc 76.953 (76.315)	lr 0.01976
Train [37][1050/3239]	Time 0.321 (0.595)	Data Time 0.002 (0.032)	Loss 3.0504 (2.9202)	Entropy 1.33636 (1.33955)	Top-1 acc 52.734 (53.989)	Top-5 acc 71.094 (76.305)	lr 0.01976
Train [37][1060/3239]	Time 0.260 (0.593)	Data Time 0.001 (0.032)	Loss 2.8954 (2.9205)	Entropy 1.33630 (1.33952)	Top-1 acc 55.078 (53.988)	Top-5 acc 77.344 (76.304)	lr 0.01976
Train [37][1070/3239]	Time 0.228 (0.592)	Data Time 0.001 (0.032)	Loss 2.7326 (2.9205)	Entropy 1.33621 (1.33949)	Top-1 acc 58.203 (53.984)	Top-5 acc 81.641 (76.310)	lr 0.01976
Train [37][1080/3239]	Time 0.219 (0.591)	Data Time 0.001 (0.032)	Loss 2.6616 (2.9201)	Entropy 1.33618 (1.33946)	Top-1 acc 60.938 (54.002)	Top-5 acc 81.250 (76.324)	lr 0.01976
Train [37][1090/3239]	Time 0.169 (0.589)	Data Time 0.001 (0.031)	Loss 3.0906 (2.9199)	Entropy 1.33613 (1.33943)	Top-1 acc 55.078 (54.020)	Top-5 acc 73.828 (76.328)	lr 0.01976
Train [37][1100/3239]	Time 0.218 (0.588)	Data Time 0.001 (0.031)	Loss 2.8983 (2.9200)	Entropy 1.33606 (1.33940)	Top-1 acc 55.469 (54.019)	Top-5 acc 76.172 (76.322)	lr 0.01976
Train [37][1110/3239]	Time 2.411 (0.587)	Data Time 0.001 (0.031)	Loss 2.9956 (2.9199)	Entropy 1.33606 (1.33937)	Top-1 acc 55.469 (54.017)	Top-5 acc 77.344 (76.328)	lr 0.01976
Train [37][1120/3239]	Time 0.221 (0.584)	Data Time 0.002 (0.031)	Loss 2.8206 (2.9197)	Entropy 1.33597 (1.33934)	Top-1 acc 53.516 (54.017)	Top-5 acc 77.344 (76.332)	lr 0.01976
Train [37][1130/3239]	Time 0.234 (0.582)	Data Time 0.001 (0.030)	Loss 2.8874 (2.9195)	Entropy 1.33592 (1.33931)	Top-1 acc 51.172 (54.030)	Top-5 acc 76.172 (76.332)	lr 0.01975
Train [37][1140/3239]	Time 0.224 (0.582)	Data Time 0.001 (0.030)	Loss 2.8937 (2.9196)	Entropy 1.33591 (1.33928)	Top-1 acc 52.344 (54.019)	Top-5 acc 75.000 (76.334)	lr 0.01975
Train [37][1150/3239]	Time 0.222 (0.581)	Data Time 0.001 (0.030)	Loss 2.9738 (2.9191)	Entropy 1.33581 (1.33925)	Top-1 acc 51.562 (54.030)	Top-5 acc 75.391 (76.347)	lr 0.01975
Train [37][1160/3239]	Time 0.227 (0.580)	Data Time 0.001 (0.030)	Loss 3.0418 (2.9191)	Entropy 1.33576 (1.33922)	Top-1 acc 51.953 (54.037)	Top-5 acc 75.781 (76.351)	lr 0.01975
Train [37][1170/3239]	Time 0.237 (0.578)	Data Time 0.001 (0.029)	Loss 2.9206 (2.9185)	Entropy 1.33569 (1.33919)	Top-1 acc 55.469 (54.063)	Top-5 acc 75.391 (76.363)	lr 0.01975
Train [37][1180/3239]	Time 0.277 (0.577)	Data Time 0.002 (0.029)	Loss 2.9951 (2.9182)	Entropy 1.33565 (1.33916)	Top-1 acc 54.297 (54.069)	Top-5 acc 74.609 (76.373)	lr 0.01975
Train [37][1190/3239]	Time 0.212 (0.576)	Data Time 0.001 (0.029)	Loss 2.9192 (2.9179)	Entropy 1.33562 (1.33913)	Top-1 acc 51.562 (54.069)	Top-5 acc 78.125 (76.384)	lr 0.01975
Train [37][1200/3239]	Time 0.207 (0.575)	Data Time 0.001 (0.029)	Loss 2.5931 (2.9172)	Entropy 1.33545 (1.33910)	Top-1 acc 60.156 (54.086)	Top-5 acc 81.641 (76.404)	lr 0.01975
Train [37][1210/3239]	Time 0.330 (0.574)	Data Time 0.001 (0.028)	Loss 2.8839 (2.9166)	Entropy 1.33537 (1.33907)	Top-1 acc 54.688 (54.094)	Top-5 acc 77.344 (76.412)	lr 0.01975
Train [37][1220/3239]	Time 2.436 (0.573)	Data Time 0.001 (0.028)	Loss 3.0417 (2.9169)	Entropy 1.33537 (1.33904)	Top-1 acc 51.562 (54.091)	Top-5 acc 75.781 (76.407)	lr 0.01975
Train [37][1230/3239]	Time 0.221 (0.570)	Data Time 0.001 (0.028)	Loss 3.0931 (2.9170)	Entropy 1.33534 (1.33901)	Top-1 acc 49.609 (54.083)	Top-5 acc 73.828 (76.411)	lr 0.01975
Train [37][1240/3239]	Time 0.229 (0.570)	Data Time 0.002 (0.028)	Loss 2.8069 (2.9171)	Entropy 1.33533 (1.33898)	Top-1 acc 56.250 (54.084)	Top-5 acc 79.297 (76.408)	lr 0.01975
Train [37][1250/3239]	Time 0.228 (0.569)	Data Time 0.002 (0.028)	Loss 2.7260 (2.9169)	Entropy 1.33530 (1.33895)	Top-1 acc 60.156 (54.082)	Top-5 acc 78.906 (76.416)	lr 0.01974
Train [37][1260/3239]	Time 0.316 (0.568)	Data Time 0.001 (0.027)	Loss 2.7087 (2.9165)	Entropy 1.33525 (1.33892)	Top-1 acc 61.328 (54.091)	Top-5 acc 79.688 (76.427)	lr 0.01974
Train [37][1270/3239]	Time 0.249 (0.567)	Data Time 0.001 (0.027)	Loss 3.0068 (2.9167)	Entropy 1.33509 (1.33890)	Top-1 acc 51.172 (54.087)	Top-5 acc 74.219 (76.422)	lr 0.01974
Train [37][1280/3239]	Time 0.234 (0.566)	Data Time 0.001 (0.027)	Loss 3.0117 (2.9176)	Entropy 1.33506 (1.33887)	Top-1 acc 51.953 (54.058)	Top-5 acc 76.172 (76.411)	lr 0.01974
Train [37][1290/3239]	Time 0.167 (0.565)	Data Time 0.001 (0.027)	Loss 2.9365 (2.9173)	Entropy 1.33500 (1.33884)	Top-1 acc 54.688 (54.067)	Top-5 acc 75.781 (76.417)	lr 0.01974
Train [37][1300/3239]	Time 0.214 (0.564)	Data Time 0.001 (0.027)	Loss 2.9277 (2.9174)	Entropy 1.33477 (1.33881)	Top-1 acc 56.250 (54.057)	Top-5 acc 77.734 (76.425)	lr 0.01974
Train [37][1310/3239]	Time 0.364 (0.563)	Data Time 0.001 (0.026)	Loss 3.0122 (2.9174)	Entropy 1.33472 (1.33877)	Top-1 acc 52.734 (54.055)	Top-5 acc 76.172 (76.428)	lr 0.01974
Train [37][1320/3239]	Time 0.204 (0.562)	Data Time 0.001 (0.026)	Loss 3.1097 (2.9178)	Entropy 1.33459 (1.33874)	Top-1 acc 48.438 (54.038)	Top-5 acc 71.484 (76.419)	lr 0.01974
Train [37][1330/3239]	Time 2.415 (0.562)	Data Time 0.002 (0.026)	Loss 2.9899 (2.9177)	Entropy 1.33459 (1.33871)	Top-1 acc 53.125 (54.044)	Top-5 acc 73.047 (76.422)	lr 0.01974
Train [37][1340/3239]	Time 0.220 (0.559)	Data Time 0.002 (0.026)	Loss 2.8533 (2.9176)	Entropy 1.33456 (1.33868)	Top-1 acc 55.469 (54.045)	Top-5 acc 78.906 (76.421)	lr 0.01974
Train [37][1350/3239]	Time 0.209 (0.558)	Data Time 0.001 (0.026)	Loss 2.7978 (2.9175)	Entropy 1.33448 (1.33865)	Top-1 acc 53.906 (54.046)	Top-5 acc 78.906 (76.425)	lr 0.01974
Train [37][1360/3239]	Time 0.217 (0.557)	Data Time 0.001 (0.025)	Loss 2.6495 (2.9171)	Entropy 1.33445 (1.33862)	Top-1 acc 61.328 (54.059)	Top-5 acc 80.859 (76.434)	lr 0.01974
Train [37][1370/3239]	Time 0.218 (0.557)	Data Time 0.001 (0.025)	Loss 2.9910 (2.9169)	Entropy 1.33432 (1.33859)	Top-1 acc 51.562 (54.060)	Top-5 acc 74.219 (76.439)	lr 0.01973
Train [37][1380/3239]	Time 0.224 (0.556)	Data Time 0.001 (0.025)	Loss 2.7361 (2.9168)	Entropy 1.33420 (1.33856)	Top-1 acc 58.203 (54.059)	Top-5 acc 81.641 (76.444)	lr 0.01973
Train [37][1390/3239]	Time 0.221 (0.555)	Data Time 0.001 (0.025)	Loss 2.9313 (2.9168)	Entropy 1.33412 (1.33853)	Top-1 acc 55.469 (54.058)	Top-5 acc 75.000 (76.441)	lr 0.01973
Train [37][1400/3239]	Time 0.178 (0.588)	Data Time 0.002 (0.025)	Loss 3.0140 (2.9172)	Entropy 1.33395 (1.33849)	Top-1 acc 54.297 (54.048)	Top-5 acc 73.047 (76.435)	lr 0.01973
Train [37][1410/3239]	Time 0.229 (0.587)	Data Time 0.001 (0.025)	Loss 3.1730 (2.9176)	Entropy 1.33393 (1.33846)	Top-1 acc 47.656 (54.046)	Top-5 acc 70.312 (76.425)	lr 0.01973
Train [37][1420/3239]	Time 0.304 (0.586)	Data Time 0.001 (0.025)	Loss 3.0513 (2.9179)	Entropy 1.33389 (1.33843)	Top-1 acc 46.875 (54.041)	Top-5 acc 76.172 (76.417)	lr 0.01973
Train [37][1430/3239]	Time 0.214 (0.585)	Data Time 0.001 (0.024)	Loss 2.9252 (2.9178)	Entropy 1.33380 (1.33840)	Top-1 acc 54.688 (54.045)	Top-5 acc 78.125 (76.423)	lr 0.01973
Train [37][1440/3239]	Time 2.372 (0.584)	Data Time 0.001 (0.024)	Loss 2.8202 (2.9182)	Entropy 1.33380 (1.33837)	Top-1 acc 54.297 (54.032)	Top-5 acc 76.953 (76.409)	lr 0.01973
Train [37][1450/3239]	Time 0.235 (0.582)	Data Time 0.001 (0.024)	Loss 2.9476 (2.9184)	Entropy 1.33373 (1.33833)	Top-1 acc 55.078 (54.032)	Top-5 acc 75.000 (76.409)	lr 0.01973
Train [37][1460/3239]	Time 0.262 (0.581)	Data Time 0.002 (0.024)	Loss 2.9120 (2.9186)	Entropy 1.33365 (1.33830)	Top-1 acc 53.516 (54.027)	Top-5 acc 74.609 (76.403)	lr 0.01973
Train [37][1470/3239]	Time 0.265 (0.580)	Data Time 0.002 (0.024)	Loss 2.7705 (2.9185)	Entropy 1.33362 (1.33827)	Top-1 acc 58.594 (54.030)	Top-5 acc 80.469 (76.402)	lr 0.01973
Train [37][1480/3239]	Time 0.164 (0.579)	Data Time 0.001 (0.024)	Loss 3.0671 (2.9187)	Entropy 1.33355 (1.33824)	Top-1 acc 54.297 (54.029)	Top-5 acc 74.219 (76.396)	lr 0.01973
Train [37][1490/3239]	Time 0.223 (0.579)	Data Time 0.001 (0.023)	Loss 2.9051 (2.9187)	Entropy 1.33352 (1.33821)	Top-1 acc 55.078 (54.028)	Top-5 acc 75.781 (76.396)	lr 0.01973
Train [37][1500/3239]	Time 0.218 (0.578)	Data Time 0.002 (0.023)	Loss 2.9924 (2.9189)	Entropy 1.33336 (1.33818)	Top-1 acc 51.562 (54.017)	Top-5 acc 73.438 (76.390)	lr 0.01972
Train [37][1510/3239]	Time 0.279 (0.577)	Data Time 0.001 (0.023)	Loss 2.8145 (2.9187)	Entropy 1.33330 (1.33814)	Top-1 acc 58.984 (54.030)	Top-5 acc 77.734 (76.398)	lr 0.01972
Train [37][1520/3239]	Time 0.232 (0.576)	Data Time 0.001 (0.023)	Loss 2.9997 (2.9185)	Entropy 1.33330 (1.33811)	Top-1 acc 51.562 (54.027)	Top-5 acc 73.828 (76.399)	lr 0.01972
Train [37][1530/3239]	Time 0.282 (0.575)	Data Time 0.001 (0.023)	Loss 2.9230 (2.9184)	Entropy 1.33323 (1.33808)	Top-1 acc 51.562 (54.029)	Top-5 acc 75.000 (76.399)	lr 0.01972
Train [37][1540/3239]	Time 0.243 (0.574)	Data Time 0.001 (0.023)	Loss 2.9967 (2.9182)	Entropy 1.33309 (1.33805)	Top-1 acc 50.781 (54.032)	Top-5 acc 75.000 (76.401)	lr 0.01972
Train [37][1550/3239]	Time 2.414 (0.573)	Data Time 0.001 (0.023)	Loss 2.7364 (2.9183)	Entropy 1.33309 (1.33802)	Top-1 acc 59.766 (54.037)	Top-5 acc 79.688 (76.399)	lr 0.01972
Train [37][1560/3239]	Time 0.226 (0.571)	Data Time 0.001 (0.022)	Loss 3.0932 (2.9191)	Entropy 1.33299 (1.33798)	Top-1 acc 51.562 (54.022)	Top-5 acc 73.828 (76.385)	lr 0.01972
Train [37][1570/3239]	Time 0.241 (0.570)	Data Time 0.001 (0.022)	Loss 2.7586 (2.9190)	Entropy 1.33286 (1.33795)	Top-1 acc 53.516 (54.014)	Top-5 acc 78.516 (76.386)	lr 0.01972
Train [37][1580/3239]	Time 0.342 (0.570)	Data Time 0.002 (0.022)	Loss 2.9181 (2.9192)	Entropy 1.33278 (1.33792)	Top-1 acc 51.172 (54.007)	Top-5 acc 77.344 (76.384)	lr 0.01972
Train [37][1590/3239]	Time 0.213 (0.569)	Data Time 0.001 (0.022)	Loss 2.6800 (2.9191)	Entropy 1.33271 (1.33789)	Top-1 acc 59.766 (54.009)	Top-5 acc 81.641 (76.389)	lr 0.01972
Train [37][1600/3239]	Time 0.263 (0.568)	Data Time 0.001 (0.022)	Loss 2.7680 (2.9190)	Entropy 1.33264 (1.33785)	Top-1 acc 57.422 (54.008)	Top-5 acc 78.516 (76.388)	lr 0.01972
Train [37][1610/3239]	Time 0.230 (0.568)	Data Time 0.001 (0.022)	Loss 2.8378 (2.9193)	Entropy 1.33259 (1.33782)	Top-1 acc 57.031 (53.998)	Top-5 acc 74.609 (76.385)	lr 0.01972
Train [37][1620/3239]	Time 0.196 (0.567)	Data Time 0.001 (0.022)	Loss 3.1279 (2.9195)	Entropy 1.33247 (1.33779)	Top-1 acc 49.609 (53.989)	Top-5 acc 71.875 (76.381)	lr 0.01971
Train [37][1630/3239]	Time 0.225 (0.566)	Data Time 0.001 (0.022)	Loss 2.9530 (2.9198)	Entropy 1.33231 (1.33776)	Top-1 acc 52.734 (53.975)	Top-5 acc 76.562 (76.377)	lr 0.01971
Train [37][1640/3239]	Time 0.200 (0.565)	Data Time 0.001 (0.021)	Loss 2.7584 (2.9197)	Entropy 1.33221 (1.33772)	Top-1 acc 60.547 (53.978)	Top-5 acc 78.516 (76.382)	lr 0.01971
Train [37][1650/3239]	Time 0.256 (0.565)	Data Time 0.001 (0.021)	Loss 3.0536 (2.9199)	Entropy 1.33208 (1.33769)	Top-1 acc 51.562 (53.977)	Top-5 acc 73.438 (76.376)	lr 0.01971
Train [37][1660/3239]	Time 2.450 (0.564)	Data Time 0.001 (0.021)	Loss 2.8882 (2.9194)	Entropy 1.33208 (1.33766)	Top-1 acc 54.297 (53.985)	Top-5 acc 75.781 (76.389)	lr 0.01971
Train [37][1670/3239]	Time 0.208 (0.562)	Data Time 0.001 (0.021)	Loss 3.0001 (2.9195)	Entropy 1.33195 (1.33762)	Top-1 acc 48.438 (53.981)	Top-5 acc 73.828 (76.388)	lr 0.01971
Train [37][1680/3239]	Time 0.233 (0.561)	Data Time 0.001 (0.021)	Loss 3.1540 (2.9204)	Entropy 1.33189 (1.33759)	Top-1 acc 44.531 (53.960)	Top-5 acc 68.750 (76.367)	lr 0.01971
Train [37][1690/3239]	Time 0.180 (0.561)	Data Time 0.001 (0.021)	Loss 3.0155 (2.9207)	Entropy 1.33187 (1.33755)	Top-1 acc 55.078 (53.951)	Top-5 acc 74.609 (76.360)	lr 0.01971
Train [37][1700/3239]	Time 0.226 (0.560)	Data Time 0.001 (0.021)	Loss 2.8592 (2.9209)	Entropy 1.33180 (1.33752)	Top-1 acc 54.297 (53.947)	Top-5 acc 76.172 (76.355)	lr 0.01971
Train [37][1710/3239]	Time 0.211 (0.559)	Data Time 0.001 (0.021)	Loss 2.9354 (2.9209)	Entropy 1.33180 (1.33749)	Top-1 acc 52.344 (53.939)	Top-5 acc 75.781 (76.356)	lr 0.01971
Train [37][1720/3239]	Time 0.200 (0.559)	Data Time 0.001 (0.021)	Loss 2.8680 (2.9208)	Entropy 1.33176 (1.33745)	Top-1 acc 55.078 (53.943)	Top-5 acc 76.953 (76.358)	lr 0.01971
Train [37][1730/3239]	Time 0.236 (0.558)	Data Time 0.001 (0.020)	Loss 2.9110 (2.9206)	Entropy 1.33158 (1.33742)	Top-1 acc 52.344 (53.941)	Top-5 acc 77.734 (76.363)	lr 0.01971
Train [37][1740/3239]	Time 0.317 (0.558)	Data Time 0.001 (0.020)	Loss 3.1095 (2.9205)	Entropy 1.33160 (1.33739)	Top-1 acc 49.609 (53.941)	Top-5 acc 72.656 (76.364)	lr 0.01970
Train [37][1750/3239]	Time 0.243 (0.557)	Data Time 0.001 (0.020)	Loss 2.9276 (2.9206)	Entropy 1.33155 (1.33735)	Top-1 acc 53.906 (53.941)	Top-5 acc 73.828 (76.361)	lr 0.01970
Train [37][1760/3239]	Time 0.219 (0.582)	Data Time 0.003 (0.020)	Loss 2.9163 (2.9204)	Entropy 1.33154 (1.33732)	Top-1 acc 52.344 (53.945)	Top-5 acc 78.906 (76.363)	lr 0.01970
Train [37][1770/3239]	Time 2.402 (0.582)	Data Time 0.002 (0.020)	Loss 3.0031 (2.9206)	Entropy 1.33154 (1.33729)	Top-1 acc 54.688 (53.942)	Top-5 acc 72.656 (76.360)	lr 0.01970
Train [37][1780/3239]	Time 0.239 (0.580)	Data Time 0.002 (0.020)	Loss 3.1061 (2.9207)	Entropy 1.33144 (1.33725)	Top-1 acc 51.562 (53.944)	Top-5 acc 72.266 (76.352)	lr 0.01970
Train [37][1790/3239]	Time 0.246 (0.579)	Data Time 0.001 (0.020)	Loss 2.8948 (2.9209)	Entropy 1.33141 (1.33722)	Top-1 acc 54.688 (53.937)	Top-5 acc 78.125 (76.346)	lr 0.01970
Train [37][1800/3239]	Time 0.214 (0.578)	Data Time 0.002 (0.020)	Loss 2.8929 (2.9208)	Entropy 1.33148 (1.33719)	Top-1 acc 55.078 (53.940)	Top-5 acc 76.953 (76.347)	lr 0.01970
Train [37][1810/3239]	Time 0.225 (0.578)	Data Time 0.001 (0.020)	Loss 2.9267 (2.9206)	Entropy 1.33143 (1.33716)	Top-1 acc 57.422 (53.941)	Top-5 acc 76.562 (76.351)	lr 0.01970
Train [37][1820/3239]	Time 0.241 (0.577)	Data Time 0.002 (0.020)	Loss 2.8731 (2.9207)	Entropy 1.33136 (1.33713)	Top-1 acc 54.688 (53.941)	Top-5 acc 78.906 (76.349)	lr 0.01970
Train [37][1830/3239]	Time 0.241 (0.576)	Data Time 0.001 (0.019)	Loss 3.0314 (2.9205)	Entropy 1.33134 (1.33710)	Top-1 acc 48.047 (53.946)	Top-5 acc 76.562 (76.354)	lr 0.01970
Train [37][1840/3239]	Time 0.203 (0.576)	Data Time 0.001 (0.019)	Loss 3.1038 (2.9204)	Entropy 1.33127 (1.33706)	Top-1 acc 49.219 (53.954)	Top-5 acc 74.609 (76.356)	lr 0.01970
Train [37][1850/3239]	Time 0.309 (0.575)	Data Time 0.001 (0.019)	Loss 2.7571 (2.9202)	Entropy 1.33114 (1.33703)	Top-1 acc 57.422 (53.963)	Top-5 acc 80.859 (76.360)	lr 0.01970
Train [37][1860/3239]	Time 0.222 (0.574)	Data Time 0.001 (0.019)	Loss 2.8418 (2.9199)	Entropy 1.33105 (1.33700)	Top-1 acc 52.734 (53.972)	Top-5 acc 80.469 (76.373)	lr 0.01969
Train [37][1870/3239]	Time 0.225 (0.574)	Data Time 0.001 (0.019)	Loss 2.8642 (2.9198)	Entropy 1.33099 (1.33697)	Top-1 acc 55.078 (53.970)	Top-5 acc 76.953 (76.380)	lr 0.01969
Train [37][1880/3239]	Time 2.414 (0.573)	Data Time 0.001 (0.019)	Loss 2.9541 (2.9199)	Entropy 1.33099 (1.33694)	Top-1 acc 51.562 (53.965)	Top-5 acc 76.562 (76.380)	lr 0.01969
Train [37][1890/3239]	Time 0.226 (0.571)	Data Time 0.001 (0.019)	Loss 3.0371 (2.9203)	Entropy 1.33089 (1.33690)	Top-1 acc 47.656 (53.957)	Top-5 acc 73.828 (76.370)	lr 0.01969
Train [37][1900/3239]	Time 0.318 (0.571)	Data Time 0.001 (0.019)	Loss 3.1041 (2.9201)	Entropy 1.33064 (1.33687)	Top-1 acc 53.125 (53.965)	Top-5 acc 71.875 (76.374)	lr 0.01969
Train [37][1910/3239]	Time 0.210 (0.570)	Data Time 0.001 (0.019)	Loss 2.7580 (2.9202)	Entropy 1.33055 (1.33684)	Top-1 acc 58.203 (53.965)	Top-5 acc 82.812 (76.374)	lr 0.01969
Train [37][1920/3239]	Time 0.219 (0.569)	Data Time 0.001 (0.019)	Loss 3.0189 (2.9205)	Entropy 1.33049 (1.33681)	Top-1 acc 52.734 (53.953)	Top-5 acc 73.828 (76.367)	lr 0.01969
Train [37][1930/3239]	Time 0.195 (0.569)	Data Time 0.001 (0.019)	Loss 3.0711 (2.9207)	Entropy 1.33047 (1.33677)	Top-1 acc 50.391 (53.947)	Top-5 acc 74.609 (76.366)	lr 0.01969
Train [37][1940/3239]	Time 0.221 (0.568)	Data Time 0.001 (0.018)	Loss 2.9368 (2.9209)	Entropy 1.33041 (1.33674)	Top-1 acc 50.000 (53.944)	Top-5 acc 78.125 (76.371)	lr 0.01969
Train [37][1950/3239]	Time 0.233 (0.567)	Data Time 0.001 (0.018)	Loss 3.0893 (2.9207)	Entropy 1.33040 (1.33671)	Top-1 acc 51.172 (53.948)	Top-5 acc 73.047 (76.375)	lr 0.01969
Train [37][1960/3239]	Time 0.332 (0.567)	Data Time 0.001 (0.018)	Loss 2.8111 (2.9205)	Entropy 1.33037 (1.33668)	Top-1 acc 55.859 (53.955)	Top-5 acc 78.516 (76.379)	lr 0.01969
Train [37][1970/3239]	Time 0.234 (0.566)	Data Time 0.001 (0.018)	Loss 2.9363 (2.9206)	Entropy 1.33028 (1.33664)	Top-1 acc 53.516 (53.954)	Top-5 acc 73.438 (76.376)	lr 0.01969
Train [37][1980/3239]	Time 0.239 (0.566)	Data Time 0.001 (0.018)	Loss 2.9856 (2.9207)	Entropy 1.33014 (1.33661)	Top-1 acc 52.344 (53.948)	Top-5 acc 77.734 (76.370)	lr 0.01968
Train [37][1990/3239]	Time 2.468 (0.565)	Data Time 0.001 (0.018)	Loss 2.6298 (2.9205)	Entropy 1.33014 (1.33658)	Top-1 acc 64.844 (53.953)	Top-5 acc 80.469 (76.372)	lr 0.01968
Train [37][2000/3239]	Time 0.242 (0.564)	Data Time 0.001 (0.018)	Loss 2.8236 (2.9205)	Entropy 1.33009 (1.33655)	Top-1 acc 56.250 (53.948)	Top-5 acc 77.734 (76.367)	lr 0.01968
Train [37][2010/3239]	Time 0.328 (0.563)	Data Time 0.001 (0.018)	Loss 3.0316 (2.9205)	Entropy 1.32995 (1.33651)	Top-1 acc 50.391 (53.947)	Top-5 acc 74.609 (76.367)	lr 0.01968
Train [37][2020/3239]	Time 0.236 (0.563)	Data Time 0.001 (0.018)	Loss 2.9532 (2.9204)	Entropy 1.32982 (1.33648)	Top-1 acc 51.953 (53.944)	Top-5 acc 76.172 (76.373)	lr 0.01968
Train [37][2030/3239]	Time 0.211 (0.562)	Data Time 0.001 (0.018)	Loss 3.0146 (2.9206)	Entropy 1.32973 (1.33645)	Top-1 acc 51.562 (53.939)	Top-5 acc 75.391 (76.367)	lr 0.01968
Train [37][2040/3239]	Time 0.222 (0.561)	Data Time 0.002 (0.018)	Loss 2.9825 (2.9209)	Entropy 1.32974 (1.33642)	Top-1 acc 49.609 (53.933)	Top-5 acc 72.266 (76.364)	lr 0.01968
Train [37][2050/3239]	Time 0.227 (0.561)	Data Time 0.001 (0.018)	Loss 2.8471 (2.9210)	Entropy 1.32972 (1.33638)	Top-1 acc 57.031 (53.930)	Top-5 acc 76.953 (76.364)	lr 0.01968
Train [37][2060/3239]	Time 0.330 (0.560)	Data Time 0.001 (0.017)	Loss 2.8210 (2.9214)	Entropy 1.32969 (1.33635)	Top-1 acc 58.984 (53.923)	Top-5 acc 81.641 (76.357)	lr 0.01968
Train [37][2070/3239]	Time 0.163 (0.560)	Data Time 0.001 (0.017)	Loss 2.8682 (2.9214)	Entropy 1.32968 (1.33632)	Top-1 acc 55.469 (53.922)	Top-5 acc 79.688 (76.357)	lr 0.01968
Train [37][2080/3239]	Time 0.222 (0.559)	Data Time 0.001 (0.017)	Loss 2.9622 (2.9213)	Entropy 1.32963 (1.33629)	Top-1 acc 52.344 (53.925)	Top-5 acc 72.656 (76.361)	lr 0.01968
Train [37][2090/3239]	Time 0.209 (0.559)	Data Time 0.001 (0.017)	Loss 3.0297 (2.9212)	Entropy 1.32963 (1.33625)	Top-1 acc 52.734 (53.930)	Top-5 acc 74.609 (76.366)	lr 0.01968
Train [37][2100/3239]	Time 2.489 (0.558)	Data Time 0.001 (0.017)	Loss 2.8329 (2.9216)	Entropy 1.32963 (1.33622)	Top-1 acc 56.250 (53.921)	Top-5 acc 78.125 (76.360)	lr 0.01967
Train [37][2110/3239]	Time 0.231 (0.557)	Data Time 0.001 (0.017)	Loss 2.9177 (2.9216)	Entropy 1.32961 (1.33619)	Top-1 acc 51.953 (53.921)	Top-5 acc 77.344 (76.360)	lr 0.01967
Train [37][2120/3239]	Time 0.296 (0.556)	Data Time 0.001 (0.017)	Loss 2.9432 (2.9210)	Entropy 1.32950 (1.33616)	Top-1 acc 56.641 (53.937)	Top-5 acc 75.391 (76.369)	lr 0.01967
Train [37][2130/3239]	Time 0.231 (0.577)	Data Time 0.002 (0.017)	Loss 3.0146 (2.9212)	Entropy 1.32948 (1.33613)	Top-1 acc 53.516 (53.932)	Top-5 acc 74.609 (76.367)	lr 0.01967
Train [37][2140/3239]	Time 0.218 (0.577)	Data Time 0.002 (0.017)	Loss 3.1518 (2.9211)	Entropy 1.32942 (1.33610)	Top-1 acc 46.484 (53.931)	Top-5 acc 74.219 (76.372)	lr 0.01967
Train [37][2150/3239]	Time 0.200 (0.576)	Data Time 0.001 (0.017)	Loss 2.8569 (2.9212)	Entropy 1.32937 (1.33607)	Top-1 acc 54.688 (53.927)	Top-5 acc 77.734 (76.368)	lr 0.01967
Train [37][2160/3239]	Time 0.231 (0.576)	Data Time 0.001 (0.017)	Loss 3.0776 (2.9213)	Entropy 1.32935 (1.33604)	Top-1 acc 47.266 (53.923)	Top-5 acc 73.047 (76.367)	lr 0.01967
Train [37][2170/3239]	Time 0.320 (0.575)	Data Time 0.001 (0.017)	Loss 2.8993 (2.9213)	Entropy 1.32925 (1.33600)	Top-1 acc 51.953 (53.922)	Top-5 acc 78.125 (76.370)	lr 0.01967
Train [37][2180/3239]	Time 0.244 (0.575)	Data Time 0.001 (0.017)	Loss 3.0534 (2.9213)	Entropy 1.32921 (1.33597)	Top-1 acc 46.875 (53.917)	Top-5 acc 74.219 (76.367)	lr 0.01967
Train [37][2190/3239]	Time 0.268 (0.574)	Data Time 0.001 (0.017)	Loss 2.8454 (2.9215)	Entropy 1.32919 (1.33594)	Top-1 acc 55.469 (53.913)	Top-5 acc 78.125 (76.362)	lr 0.01967
Train [37][2200/3239]	Time 0.234 (0.574)	Data Time 0.001 (0.016)	Loss 3.0935 (2.9217)	Entropy 1.32915 (1.33591)	Top-1 acc 48.828 (53.905)	Top-5 acc 74.219 (76.358)	lr 0.01967
Train [37][2210/3239]	Time 2.433 (0.573)	Data Time 0.001 (0.016)	Loss 2.7944 (2.9217)	Entropy 1.32915 (1.33588)	Top-1 acc 58.203 (53.905)	Top-5 acc 76.562 (76.357)	lr 0.01967
Train [37][2220/3239]	Time 0.226 (0.572)	Data Time 0.001 (0.016)	Loss 2.8380 (2.9217)	Entropy 1.32899 (1.33585)	Top-1 acc 55.859 (53.906)	Top-5 acc 79.297 (76.356)	lr 0.01966
Train [37][2230/3239]	Time 0.213 (0.571)	Data Time 0.001 (0.016)	Loss 3.0510 (2.9216)	Entropy 1.32898 (1.33582)	Top-1 acc 49.609 (53.907)	Top-5 acc 74.219 (76.361)	lr 0.01966
Train [37][2240/3239]	Time 0.226 (0.571)	Data Time 0.001 (0.016)	Loss 2.9323 (2.9216)	Entropy 1.32885 (1.33579)	Top-1 acc 53.516 (53.904)	Top-5 acc 75.391 (76.357)	lr 0.01966
Train [37][2250/3239]	Time 0.219 (0.570)	Data Time 0.005 (0.016)	Loss 2.8494 (2.9218)	Entropy 1.32832 (1.33576)	Top-1 acc 58.203 (53.902)	Top-5 acc 76.172 (76.351)	lr 0.01966
Train [37][2260/3239]	Time 0.216 (0.569)	Data Time 0.001 (0.016)	Loss 3.0088 (2.9221)	Entropy 1.32831 (1.33572)	Top-1 acc 51.562 (53.896)	Top-5 acc 75.391 (76.346)	lr 0.01966
Train [37][2270/3239]	Time 0.139 (0.569)	Data Time 0.001 (0.016)	Loss 2.7786 (2.9221)	Entropy 1.32819 (1.33569)	Top-1 acc 57.031 (53.897)	Top-5 acc 80.078 (76.347)	lr 0.01966
Train [37][2280/3239]	Time 0.314 (0.568)	Data Time 0.001 (0.016)	Loss 2.8854 (2.9221)	Entropy 1.32818 (1.33566)	Top-1 acc 51.172 (53.898)	Top-5 acc 76.172 (76.345)	lr 0.01966
Train [37][2290/3239]	Time 0.228 (0.568)	Data Time 0.001 (0.016)	Loss 3.0339 (2.9221)	Entropy 1.32811 (1.33562)	Top-1 acc 55.859 (53.907)	Top-5 acc 74.609 (76.344)	lr 0.01966
Train [37][2300/3239]	Time 0.208 (0.567)	Data Time 0.001 (0.016)	Loss 2.9716 (2.9223)	Entropy 1.32806 (1.33559)	Top-1 acc 57.031 (53.904)	Top-5 acc 75.000 (76.342)	lr 0.01966
Train [37][2310/3239]	Time 0.219 (0.567)	Data Time 0.001 (0.016)	Loss 3.0313 (2.9226)	Entropy 1.32792 (1.33556)	Top-1 acc 51.562 (53.896)	Top-5 acc 76.172 (76.334)	lr 0.01966
Train [37][2320/3239]	Time 2.461 (0.566)	Data Time 0.001 (0.016)	Loss 2.9829 (2.9227)	Entropy 1.32792 (1.33553)	Top-1 acc 54.297 (53.897)	Top-5 acc 76.953 (76.334)	lr 0.01966
Train [37][2330/3239]	Time 0.243 (0.565)	Data Time 0.001 (0.016)	Loss 2.9960 (2.9226)	Entropy 1.32784 (1.33549)	Top-1 acc 51.172 (53.896)	Top-5 acc 72.656 (76.334)	lr 0.01966
Train [37][2340/3239]	Time 0.218 (0.565)	Data Time 0.001 (0.016)	Loss 2.8397 (2.9223)	Entropy 1.32783 (1.33546)	Top-1 acc 58.594 (53.907)	Top-5 acc 79.688 (76.339)	lr 0.01965
Train [37][2350/3239]	Time 0.223 (0.564)	Data Time 0.001 (0.016)	Loss 2.8055 (2.9219)	Entropy 1.32770 (1.33543)	Top-1 acc 57.422 (53.914)	Top-5 acc 80.078 (76.350)	lr 0.01965
Train [37][2360/3239]	Time 0.261 (0.564)	Data Time 0.001 (0.015)	Loss 2.9764 (2.9219)	Entropy 1.32762 (1.33539)	Top-1 acc 55.078 (53.914)	Top-5 acc 75.781 (76.355)	lr 0.01965
Train [37][2370/3239]	Time 0.224 (0.563)	Data Time 0.001 (0.015)	Loss 2.9742 (2.9221)	Entropy 1.32754 (1.33536)	Top-1 acc 53.125 (53.910)	Top-5 acc 74.219 (76.350)	lr 0.01965
Train [37][2380/3239]	Time 0.240 (0.563)	Data Time 0.001 (0.015)	Loss 3.1330 (2.9220)	Entropy 1.32747 (1.33533)	Top-1 acc 49.609 (53.913)	Top-5 acc 71.484 (76.351)	lr 0.01965
Train [37][2390/3239]	Time 0.318 (0.563)	Data Time 0.001 (0.015)	Loss 2.7664 (2.9220)	Entropy 1.32739 (1.33530)	Top-1 acc 55.469 (53.914)	Top-5 acc 78.906 (76.349)	lr 0.01965
Train [37][2400/3239]	Time 0.220 (0.562)	Data Time 0.001 (0.015)	Loss 2.8607 (2.9219)	Entropy 1.32728 (1.33526)	Top-1 acc 56.641 (53.917)	Top-5 acc 79.688 (76.353)	lr 0.01965
Train [37][2410/3239]	Time 0.228 (0.562)	Data Time 0.001 (0.015)	Loss 2.9703 (2.9219)	Entropy 1.32726 (1.33523)	Top-1 acc 51.562 (53.911)	Top-5 acc 76.172 (76.354)	lr 0.01965
Train [37][2420/3239]	Time 0.222 (0.561)	Data Time 0.001 (0.015)	Loss 2.9507 (2.9218)	Entropy 1.32714 (1.33520)	Top-1 acc 53.906 (53.916)	Top-5 acc 77.734 (76.358)	lr 0.01965
Train [37][2430/3239]	Time 2.446 (0.561)	Data Time 0.001 (0.015)	Loss 2.8054 (2.9219)	Entropy 1.32714 (1.33516)	Top-1 acc 53.125 (53.915)	Top-5 acc 81.641 (76.357)	lr 0.01965
Train [37][2440/3239]	Time 0.215 (0.559)	Data Time 0.001 (0.015)	Loss 2.8519 (2.9223)	Entropy 1.32698 (1.33513)	Top-1 acc 56.250 (53.906)	Top-5 acc 76.953 (76.350)	lr 0.01965
Train [37][2450/3239]	Time 0.230 (0.559)	Data Time 0.001 (0.015)	Loss 2.9825 (2.9223)	Entropy 1.32689 (1.33510)	Top-1 acc 52.734 (53.905)	Top-5 acc 75.391 (76.353)	lr 0.01965
Train [37][2460/3239]	Time 0.242 (0.558)	Data Time 0.001 (0.015)	Loss 2.9072 (2.9221)	Entropy 1.32683 (1.33506)	Top-1 acc 54.688 (53.910)	Top-5 acc 76.562 (76.353)	lr 0.01964
Train [37][2470/3239]	Time 0.226 (0.558)	Data Time 0.001 (0.015)	Loss 2.8541 (2.9219)	Entropy 1.32676 (1.33503)	Top-1 acc 56.250 (53.915)	Top-5 acc 78.125 (76.359)	lr 0.01964
Train [37][2480/3239]	Time 0.246 (0.558)	Data Time 0.001 (0.015)	Loss 3.0145 (2.9220)	Entropy 1.32673 (1.33500)	Top-1 acc 51.953 (53.916)	Top-5 acc 72.656 (76.358)	lr 0.01964
Train [37][2490/3239]	Time 0.278 (0.574)	Data Time 0.002 (0.015)	Loss 2.9017 (2.9218)	Entropy 1.32672 (1.33496)	Top-1 acc 50.000 (53.918)	Top-5 acc 76.172 (76.362)	lr 0.01964
Train [37][2500/3239]	Time 0.361 (0.574)	Data Time 0.002 (0.015)	Loss 2.7551 (2.9215)	Entropy 1.32669 (1.33493)	Top-1 acc 57.031 (53.919)	Top-5 acc 78.125 (76.368)	lr 0.01964
Train [37][2510/3239]	Time 0.228 (0.574)	Data Time 0.001 (0.015)	Loss 2.9910 (2.9215)	Entropy 1.32666 (1.33490)	Top-1 acc 56.250 (53.922)	Top-5 acc 73.047 (76.367)	lr 0.01964
Train [37][2520/3239]	Time 0.234 (0.573)	Data Time 0.001 (0.015)	Loss 2.8901 (2.9216)	Entropy 1.32662 (1.33486)	Top-1 acc 55.859 (53.918)	Top-5 acc 75.391 (76.363)	lr 0.01964
Train [37][2530/3239]	Time 0.223 (0.573)	Data Time 0.001 (0.015)	Loss 2.8664 (2.9216)	Entropy 1.32660 (1.33483)	Top-1 acc 55.859 (53.918)	Top-5 acc 79.297 (76.364)	lr 0.01964
Train [37][2540/3239]	Time 2.607 (0.572)	Data Time 0.001 (0.014)	Loss 3.0144 (2.9217)	Entropy 1.32660 (1.33480)	Top-1 acc 48.828 (53.913)	Top-5 acc 75.000 (76.360)	lr 0.01964
Train [37][2550/3239]	Time 0.242 (0.571)	Data Time 0.002 (0.014)	Loss 2.9660 (2.9217)	Entropy 1.32645 (1.33477)	Top-1 acc 54.297 (53.910)	Top-5 acc 75.391 (76.359)	lr 0.01964
Train [37][2560/3239]	Time 0.224 (0.571)	Data Time 0.001 (0.014)	Loss 2.8664 (2.9217)	Entropy 1.32644 (1.33473)	Top-1 acc 58.203 (53.911)	Top-5 acc 76.953 (76.362)	lr 0.01964
Train [37][2570/3239]	Time 0.212 (0.570)	Data Time 0.001 (0.014)	Loss 2.8398 (2.9217)	Entropy 1.32640 (1.33470)	Top-1 acc 56.250 (53.910)	Top-5 acc 79.297 (76.364)	lr 0.01964
Train [37][2580/3239]	Time 0.227 (0.570)	Data Time 0.001 (0.014)	Loss 2.8261 (2.9219)	Entropy 1.32629 (1.33467)	Top-1 acc 58.203 (53.908)	Top-5 acc 80.469 (76.360)	lr 0.01963
Train [37][2590/3239]	Time 0.213 (0.569)	Data Time 0.002 (0.014)	Loss 2.9383 (2.9219)	Entropy 1.32627 (1.33464)	Top-1 acc 53.906 (53.909)	Top-5 acc 72.656 (76.359)	lr 0.01963
Train [37][2600/3239]	Time 0.233 (0.569)	Data Time 0.001 (0.014)	Loss 3.0127 (2.9219)	Entropy 1.32619 (1.33460)	Top-1 acc 53.906 (53.911)	Top-5 acc 72.266 (76.356)	lr 0.01963
Train [37][2610/3239]	Time 0.310 (0.568)	Data Time 0.001 (0.014)	Loss 2.9599 (2.9218)	Entropy 1.32610 (1.33457)	Top-1 acc 51.953 (53.913)	Top-5 acc 73.047 (76.355)	lr 0.01963
Train [37][2620/3239]	Time 0.233 (0.568)	Data Time 0.001 (0.014)	Loss 3.0135 (2.9216)	Entropy 1.32610 (1.33454)	Top-1 acc 50.000 (53.915)	Top-5 acc 75.391 (76.363)	lr 0.01963
Train [37][2630/3239]	Time 0.228 (0.568)	Data Time 0.001 (0.014)	Loss 2.6779 (2.9215)	Entropy 1.32604 (1.33451)	Top-1 acc 54.688 (53.912)	Top-5 acc 81.250 (76.366)	lr 0.01963
Train [37][2640/3239]	Time 0.233 (0.567)	Data Time 0.001 (0.014)	Loss 2.9057 (2.9218)	Entropy 1.32596 (1.33448)	Top-1 acc 53.125 (53.907)	Top-5 acc 80.469 (76.361)	lr 0.01963
Train [37][2650/3239]	Time 0.261 (0.567)	Data Time 0.001 (0.014)	Loss 2.6936 (2.9218)	Entropy 1.32611 (1.33444)	Top-1 acc 62.109 (53.911)	Top-5 acc 80.469 (76.361)	lr 0.01963
Train [37][2660/3239]	Time 0.319 (0.566)	Data Time 0.001 (0.014)	Loss 2.8499 (2.9219)	Entropy 1.32609 (1.33441)	Top-1 acc 57.031 (53.908)	Top-5 acc 78.125 (76.361)	lr 0.01963
Train [37][2670/3239]	Time 0.245 (0.566)	Data Time 0.001 (0.014)	Loss 2.9566 (2.9220)	Entropy 1.32605 (1.33438)	Top-1 acc 53.125 (53.904)	Top-5 acc 76.562 (76.360)	lr 0.01963
Train [37][2680/3239]	Time 0.225 (0.565)	Data Time 0.001 (0.014)	Loss 2.9014 (2.9219)	Entropy 1.32597 (1.33435)	Top-1 acc 56.250 (53.910)	Top-5 acc 77.734 (76.359)	lr 0.01963
Train [37][2690/3239]	Time 0.208 (0.565)	Data Time 0.001 (0.014)	Loss 2.8005 (2.9220)	Entropy 1.32597 (1.33432)	Top-1 acc 52.734 (53.909)	Top-5 acc 77.734 (76.356)	lr 0.01963
Train [37][2700/3239]	Time 0.219 (0.565)	Data Time 0.001 (0.014)	Loss 3.0669 (2.9223)	Entropy 1.32583 (1.33429)	Top-1 acc 53.125 (53.902)	Top-5 acc 72.656 (76.349)	lr 0.01962
Train [37][2710/3239]	Time 0.383 (0.564)	Data Time 0.002 (0.014)	Loss 2.8375 (2.9222)	Entropy 1.32579 (1.33426)	Top-1 acc 54.297 (53.906)	Top-5 acc 75.391 (76.350)	lr 0.01962
Train [37][2720/3239]	Time 0.226 (0.564)	Data Time 0.005 (0.014)	Loss 3.0099 (2.9220)	Entropy 1.32571 (1.33422)	Top-1 acc 53.125 (53.909)	Top-5 acc 75.391 (76.355)	lr 0.01962
Train [37][2730/3239]	Time 0.228 (0.563)	Data Time 0.001 (0.014)	Loss 2.9278 (2.9219)	Entropy 1.32555 (1.33419)	Top-1 acc 54.297 (53.914)	Top-5 acc 76.172 (76.354)	lr 0.01962
Train [37][2740/3239]	Time 0.202 (0.563)	Data Time 0.001 (0.014)	Loss 2.8566 (2.9219)	Entropy 1.32538 (1.33416)	Top-1 acc 51.172 (53.916)	Top-5 acc 75.000 (76.353)	lr 0.01962
Train [37][2750/3239]	Time 0.243 (0.563)	Data Time 0.001 (0.014)	Loss 3.0062 (2.9220)	Entropy 1.32532 (1.33413)	Top-1 acc 53.125 (53.912)	Top-5 acc 75.000 (76.353)	lr 0.01962
Train [37][2760/3239]	Time 0.344 (0.562)	Data Time 0.001 (0.013)	Loss 2.9079 (2.9219)	Entropy 1.32528 (1.33410)	Top-1 acc 55.469 (53.917)	Top-5 acc 76.562 (76.353)	lr 0.01962
Train [37][2770/3239]	Time 0.214 (0.562)	Data Time 0.001 (0.013)	Loss 2.9642 (2.9218)	Entropy 1.32518 (1.33407)	Top-1 acc 50.391 (53.921)	Top-5 acc 73.828 (76.355)	lr 0.01962
Train [37][2780/3239]	Time 0.253 (0.561)	Data Time 0.001 (0.013)	Loss 2.7892 (2.9218)	Entropy 1.32511 (1.33403)	Top-1 acc 57.422 (53.920)	Top-5 acc 80.469 (76.355)	lr 0.01962
Train [37][2790/3239]	Time 0.234 (0.561)	Data Time 0.001 (0.013)	Loss 2.9863 (2.9222)	Entropy 1.32507 (1.33400)	Top-1 acc 51.562 (53.908)	Top-5 acc 77.344 (76.348)	lr 0.01962
Train [37][2800/3239]	Time 0.266 (0.561)	Data Time 0.001 (0.013)	Loss 2.9765 (2.9224)	Entropy 1.32508 (1.33397)	Top-1 acc 49.609 (53.906)	Top-5 acc 76.562 (76.346)	lr 0.01962
Train [37][2810/3239]	Time 0.219 (0.560)	Data Time 0.001 (0.013)	Loss 2.6817 (2.9222)	Entropy 1.32492 (1.33394)	Top-1 acc 61.328 (53.912)	Top-5 acc 81.641 (76.351)	lr 0.01962
Train [37][2820/3239]	Time 0.236 (0.560)	Data Time 0.001 (0.013)	Loss 3.0269 (2.9223)	Entropy 1.32493 (1.33391)	Top-1 acc 51.562 (53.906)	Top-5 acc 71.875 (76.347)	lr 0.01961
Train [37][2830/3239]	Time 0.238 (0.575)	Data Time 0.003 (0.013)	Loss 2.9787 (2.9223)	Entropy 1.32483 (1.33388)	Top-1 acc 49.609 (53.903)	Top-5 acc 75.000 (76.349)	lr 0.01961
Train [37][2840/3239]	Time 0.236 (0.575)	Data Time 0.002 (0.013)	Loss 2.9606 (2.9225)	Entropy 1.32484 (1.33384)	Top-1 acc 53.516 (53.898)	Top-5 acc 76.172 (76.345)	lr 0.01961
Train [37][2850/3239]	Time 0.215 (0.575)	Data Time 0.001 (0.013)	Loss 2.7252 (2.9224)	Entropy 1.32479 (1.33381)	Top-1 acc 60.547 (53.897)	Top-5 acc 79.297 (76.345)	lr 0.01961
Train [37][2860/3239]	Time 0.383 (0.574)	Data Time 0.002 (0.013)	Loss 2.9533 (2.9223)	Entropy 1.32463 (1.33378)	Top-1 acc 51.953 (53.899)	Top-5 acc 76.953 (76.349)	lr 0.01961
Train [37][2870/3239]	Time 0.247 (0.574)	Data Time 0.002 (0.013)	Loss 2.6885 (2.9224)	Entropy 1.32460 (1.33375)	Top-1 acc 59.375 (53.899)	Top-5 acc 79.297 (76.348)	lr 0.01961
Train [37][2880/3239]	Time 0.200 (0.574)	Data Time 0.001 (0.013)	Loss 2.7912 (2.9225)	Entropy 1.32447 (1.33372)	Top-1 acc 53.125 (53.896)	Top-5 acc 80.859 (76.347)	lr 0.01961
Train [37][2890/3239]	Time 0.220 (0.573)	Data Time 0.001 (0.013)	Loss 2.9412 (2.9225)	Entropy 1.32441 (1.33368)	Top-1 acc 51.953 (53.892)	Top-5 acc 73.047 (76.345)	lr 0.01961
Train [37][2900/3239]	Time 0.248 (0.573)	Data Time 0.001 (0.013)	Loss 2.9317 (2.9224)	Entropy 1.32441 (1.33365)	Top-1 acc 57.031 (53.896)	Top-5 acc 77.344 (76.350)	lr 0.01961
Train [37][2910/3239]	Time 0.250 (0.572)	Data Time 0.001 (0.013)	Loss 2.8171 (2.9222)	Entropy 1.32440 (1.33362)	Top-1 acc 54.297 (53.902)	Top-5 acc 78.906 (76.356)	lr 0.01961
Train [37][2920/3239]	Time 0.232 (0.572)	Data Time 0.001 (0.013)	Loss 2.8399 (2.9223)	Entropy 1.32436 (1.33359)	Top-1 acc 55.469 (53.898)	Top-5 acc 77.734 (76.358)	lr 0.01961
Train [37][2930/3239]	Time 0.227 (0.572)	Data Time 0.001 (0.013)	Loss 3.1189 (2.9224)	Entropy 1.32428 (1.33356)	Top-1 acc 49.609 (53.895)	Top-5 acc 73.828 (76.355)	lr 0.01961
Train [37][2940/3239]	Time 0.227 (0.571)	Data Time 0.002 (0.013)	Loss 3.0179 (2.9224)	Entropy 1.32417 (1.33353)	Top-1 acc 50.000 (53.892)	Top-5 acc 75.000 (76.356)	lr 0.01961
Train [37][2950/3239]	Time 0.257 (0.571)	Data Time 0.001 (0.013)	Loss 2.9146 (2.9224)	Entropy 1.32407 (1.33349)	Top-1 acc 54.688 (53.890)	Top-5 acc 76.953 (76.354)	lr 0.01960
Train [37][2960/3239]	Time 0.246 (0.570)	Data Time 0.001 (0.013)	Loss 3.0897 (2.9224)	Entropy 1.32401 (1.33346)	Top-1 acc 52.344 (53.888)	Top-5 acc 73.438 (76.354)	lr 0.01960
Train [37][2970/3239]	Time 0.405 (0.570)	Data Time 0.001 (0.013)	Loss 2.8669 (2.9224)	Entropy 1.32390 (1.33343)	Top-1 acc 54.297 (53.886)	Top-5 acc 76.953 (76.353)	lr 0.01960
Train [37][2980/3239]	Time 0.205 (0.570)	Data Time 0.001 (0.013)	Loss 2.9017 (2.9226)	Entropy 1.32395 (1.33340)	Top-1 acc 53.906 (53.883)	Top-5 acc 74.219 (76.348)	lr 0.01960
Train [37][2990/3239]	Time 0.274 (0.569)	Data Time 0.001 (0.013)	Loss 3.0169 (2.9228)	Entropy 1.32392 (1.33337)	Top-1 acc 50.000 (53.881)	Top-5 acc 77.734 (76.346)	lr 0.01960
Train [37][3000/3239]	Time 0.230 (0.569)	Data Time 0.001 (0.013)	Loss 2.8940 (2.9230)	Entropy 1.32390 (1.33334)	Top-1 acc 55.469 (53.879)	Top-5 acc 76.172 (76.342)	lr 0.01960
Train [37][3010/3239]	Time 0.224 (0.568)	Data Time 0.001 (0.012)	Loss 2.8552 (2.9230)	Entropy 1.32391 (1.33330)	Top-1 acc 55.078 (53.880)	Top-5 acc 75.781 (76.341)	lr 0.01960
Train [37][3020/3239]	Time 0.362 (0.568)	Data Time 0.002 (0.012)	Loss 2.8144 (2.9231)	Entropy 1.32387 (1.33327)	Top-1 acc 57.812 (53.874)	Top-5 acc 78.125 (76.339)	lr 0.01960
Train [37][3030/3239]	Time 0.201 (0.568)	Data Time 0.001 (0.012)	Loss 2.7999 (2.9230)	Entropy 1.32386 (1.33324)	Top-1 acc 58.203 (53.875)	Top-5 acc 78.516 (76.341)	lr 0.01960
Train [37][3040/3239]	Time 0.227 (0.567)	Data Time 0.001 (0.012)	Loss 2.7919 (2.9231)	Entropy 1.32376 (1.33321)	Top-1 acc 55.469 (53.874)	Top-5 acc 76.172 (76.338)	lr 0.01960
Train [37][3050/3239]	Time 0.243 (0.567)	Data Time 0.001 (0.012)	Loss 3.0894 (2.9231)	Entropy 1.32375 (1.33318)	Top-1 acc 51.172 (53.874)	Top-5 acc 75.781 (76.339)	lr 0.01960
Train [37][3060/3239]	Time 0.245 (0.566)	Data Time 0.001 (0.012)	Loss 2.9418 (2.9232)	Entropy 1.32368 (1.33315)	Top-1 acc 55.469 (53.873)	Top-5 acc 73.828 (76.337)	lr 0.01960
Train [37][3070/3239]	Time 0.312 (0.566)	Data Time 0.001 (0.012)	Loss 3.0077 (2.9232)	Entropy 1.32365 (1.33312)	Top-1 acc 50.781 (53.873)	Top-5 acc 74.219 (76.336)	lr 0.01959
Train [37][3080/3239]	Time 0.269 (0.566)	Data Time 0.001 (0.012)	Loss 2.7471 (2.9231)	Entropy 1.32362 (1.33309)	Top-1 acc 57.422 (53.875)	Top-5 acc 79.688 (76.340)	lr 0.01959
Train [37][3090/3239]	Time 0.237 (0.565)	Data Time 0.001 (0.012)	Loss 2.9253 (2.9230)	Entropy 1.32359 (1.33306)	Top-1 acc 55.078 (53.880)	Top-5 acc 75.781 (76.340)	lr 0.01959
Train [37][3100/3239]	Time 0.221 (0.565)	Data Time 0.001 (0.012)	Loss 3.1821 (2.9230)	Entropy 1.32353 (1.33303)	Top-1 acc 50.000 (53.876)	Top-5 acc 72.656 (76.341)	lr 0.01959
Train [37][3110/3239]	Time 0.200 (0.565)	Data Time 0.002 (0.012)	Loss 3.1397 (2.9232)	Entropy 1.32342 (1.33300)	Top-1 acc 50.000 (53.876)	Top-5 acc 73.438 (76.341)	lr 0.01959
Train [37][3120/3239]	Time 0.243 (0.564)	Data Time 0.002 (0.012)	Loss 2.8714 (2.9229)	Entropy 1.32337 (1.33297)	Top-1 acc 52.734 (53.881)	Top-5 acc 76.953 (76.345)	lr 0.01959
Train [37][3130/3239]	Time 0.336 (0.564)	Data Time 0.001 (0.012)	Loss 2.8582 (2.9228)	Entropy 1.32327 (1.33293)	Top-1 acc 56.250 (53.882)	Top-5 acc 76.953 (76.347)	lr 0.01959
Train [37][3140/3239]	Time 0.243 (0.564)	Data Time 0.001 (0.012)	Loss 3.0158 (2.9228)	Entropy 1.32318 (1.33290)	Top-1 acc 53.516 (53.883)	Top-5 acc 71.875 (76.347)	lr 0.01959
Train [37][3150/3239]	Time 0.212 (0.563)	Data Time 0.001 (0.012)	Loss 2.8519 (2.9227)	Entropy 1.32315 (1.33287)	Top-1 acc 53.906 (53.882)	Top-5 acc 75.391 (76.349)	lr 0.01959
Train [37][3160/3239]	Time 0.263 (0.576)	Data Time 0.004 (0.012)	Loss 3.0253 (2.9228)	Entropy 1.32309 (1.33284)	Top-1 acc 53.125 (53.877)	Top-5 acc 75.781 (76.346)	lr 0.01959
Train [37][3170/3239]	Time 0.238 (0.576)	Data Time 0.002 (0.012)	Loss 2.8666 (2.9231)	Entropy 1.32294 (1.33281)	Top-1 acc 57.812 (53.873)	Top-5 acc 77.734 (76.339)	lr 0.01959
Train [37][3180/3239]	Time 0.212 (0.576)	Data Time 0.000 (0.012)	Loss 2.8777 (2.9231)	Entropy 1.32297 (1.33278)	Top-1 acc 51.953 (53.865)	Top-5 acc 79.297 (76.339)	lr 0.01959
Train [37][3190/3239]	Time 0.218 (0.575)	Data Time 0.000 (0.012)	Loss 2.9018 (2.9233)	Entropy 1.32297 (1.33275)	Top-1 acc 56.250 (53.860)	Top-5 acc 74.609 (76.334)	lr 0.01958
Train [37][3200/3239]	Time 0.215 (0.575)	Data Time 0.000 (0.012)	Loss 2.9924 (2.9233)	Entropy 1.32293 (1.33272)	Top-1 acc 51.562 (53.857)	Top-5 acc 77.344 (76.334)	lr 0.01958
Train [37][3210/3239]	Time 0.221 (0.574)	Data Time 0.000 (0.012)	Loss 2.9652 (2.9232)	Entropy 1.32291 (1.33269)	Top-1 acc 55.078 (53.862)	Top-5 acc 74.609 (76.337)	lr 0.01958
Train [37][3220/3239]	Time 0.218 (0.574)	Data Time 0.000 (0.012)	Loss 3.0555 (2.9232)	Entropy 1.32288 (1.33266)	Top-1 acc 48.828 (53.859)	Top-5 acc 75.781 (76.339)	lr 0.01958
Train [37][3230/3239]	Time 0.221 (0.573)	Data Time 0.000 (0.012)	Loss 2.8701 (2.9231)	Entropy 1.32282 (1.33263)	Top-1 acc 57.031 (53.860)	Top-5 acc 77.734 (76.339)	lr 0.01958
Train [37][3239/3239]	Time 2.205 (0.573)	Data Time 0.000 (0.012)	Loss 2.9609 (2.9234)	Entropy 1.32282 (1.33260)	Top-1 acc 56.790 (53.857)	Top-5 acc 79.012 (76.333)	lr 0.01958
==========Valid [37/120]	loss 1.742	top-1 acc 61.178 (61.178)	top-5 acc 82.632	Train top-1 53.857	top-5 76.333	Entropy 1.32282	Latency-None: 0.000ms	Flops: 559.17M
Train [38][0/3239]	Time 29.710 (29.710)	Data Time 28.747 (28.747)	Loss 2.8884 (2.8884)	Entropy 1.32276 (1.32276)	Top-1 acc 54.297 (54.297)	Top-5 acc 75.391 (75.391)	lr 0.01958
Train [38][10/3239]	Time 2.579 (3.317)	Data Time 0.002 (2.710)	Loss 2.8988 (2.8604)	Entropy 1.32276 (1.32276)	Top-1 acc 57.812 (55.540)	Top-5 acc 76.953 (76.989)	lr 0.01958
Train [38][20/3239]	Time 0.245 (1.849)	Data Time 0.001 (1.420)	Loss 2.8452 (2.8402)	Entropy 1.32271 (1.32273)	Top-1 acc 55.469 (55.580)	Top-5 acc 78.906 (77.530)	lr 0.01958
Train [38][30/3239]	Time 0.245 (1.398)	Data Time 0.002 (0.963)	Loss 2.5976 (2.8631)	Entropy 1.32256 (1.32268)	Top-1 acc 59.375 (54.952)	Top-5 acc 85.938 (77.331)	lr 0.01958
Train [38][40/3239]	Time 0.316 (1.168)	Data Time 0.002 (0.728)	Loss 2.7943 (2.8643)	Entropy 1.32256 (1.32265)	Top-1 acc 56.641 (55.088)	Top-5 acc 79.688 (77.134)	lr 0.01958
Train [38][50/3239]	Time 0.200 (1.025)	Data Time 0.001 (0.586)	Loss 2.7222 (2.8713)	Entropy 1.32248 (1.32262)	Top-1 acc 58.203 (54.979)	Top-5 acc 82.812 (77.168)	lr 0.01958
Train [38][60/3239]	Time 0.229 (0.930)	Data Time 0.001 (0.490)	Loss 2.9129 (2.8695)	Entropy 1.32243 (1.32260)	Top-1 acc 53.125 (55.232)	Top-5 acc 76.953 (77.222)	lr 0.01958
Train [38][70/3239]	Time 0.232 (0.862)	Data Time 0.001 (0.421)	Loss 2.7813 (2.8709)	Entropy 1.32238 (1.32257)	Top-1 acc 56.250 (55.183)	Top-5 acc 77.344 (77.140)	lr 0.01957
Train [38][80/3239]	Time 0.222 (0.813)	Data Time 0.001 (0.369)	Loss 2.8613 (2.8745)	Entropy 1.32228 (1.32254)	Top-1 acc 54.688 (55.204)	Top-5 acc 79.297 (77.083)	lr 0.01957
Train [38][90/3239]	Time 0.308 (0.776)	Data Time 0.001 (0.329)	Loss 2.7868 (2.8781)	Entropy 1.32221 (1.32251)	Top-1 acc 59.766 (55.070)	Top-5 acc 77.734 (77.018)	lr 0.01957
Train [38][100/3239]	Time 0.213 (0.744)	Data Time 0.001 (0.297)	Loss 2.9085 (2.8810)	Entropy 1.32221 (1.32248)	Top-1 acc 55.469 (55.051)	Top-5 acc 76.562 (76.965)	lr 0.01957
Train [38][110/3239]	Time 0.201 (0.718)	Data Time 0.001 (0.270)	Loss 2.8764 (2.8829)	Entropy 1.32210 (1.32245)	Top-1 acc 53.906 (55.039)	Top-5 acc 76.172 (76.914)	lr 0.01957
Train [38][120/3239]	Time 2.591 (0.698)	Data Time 0.001 (0.248)	Loss 2.9763 (2.8833)	Entropy 1.32210 (1.32242)	Top-1 acc 52.734 (55.030)	Top-5 acc 75.781 (76.895)	lr 0.01957
Train [38][130/3239]	Time 0.220 (0.663)	Data Time 0.001 (0.229)	Loss 2.9570 (2.8772)	Entropy 1.32203 (1.32239)	Top-1 acc 53.125 (55.090)	Top-5 acc 74.219 (77.019)	lr 0.01957
Train [38][140/3239]	Time 0.320 (0.648)	Data Time 0.001 (0.213)	Loss 2.7834 (2.8777)	Entropy 1.32192 (1.32236)	Top-1 acc 54.688 (54.992)	Top-5 acc 78.516 (77.067)	lr 0.01957
Train [38][150/3239]	Time 0.221 (0.634)	Data Time 0.001 (0.199)	Loss 2.7024 (2.8762)	Entropy 1.32190 (1.32233)	Top-1 acc 55.078 (54.959)	Top-5 acc 80.469 (77.051)	lr 0.01957
Train [38][160/3239]	Time 0.216 (0.623)	Data Time 0.001 (0.187)	Loss 2.6845 (2.8716)	Entropy 1.32190 (1.32230)	Top-1 acc 61.719 (55.102)	Top-5 acc 80.469 (77.174)	lr 0.01957
Train [38][170/3239]	Time 0.244 (0.613)	Data Time 0.001 (0.176)	Loss 2.8604 (2.8726)	Entropy 1.32182 (1.32228)	Top-1 acc 55.469 (55.101)	Top-5 acc 76.562 (77.104)	lr 0.01957
Train [38][180/3239]	Time 0.227 (0.606)	Data Time 0.001 (0.166)	Loss 2.8791 (2.8709)	Entropy 1.32173 (1.32225)	Top-1 acc 53.906 (55.087)	Top-5 acc 78.125 (77.137)	lr 0.01957
Train [38][190/3239]	Time 0.230 (0.598)	Data Time 0.001 (0.158)	Loss 3.0012 (2.8828)	Entropy 1.32174 (1.32222)	Top-1 acc 52.344 (54.896)	Top-5 acc 75.391 (76.959)	lr 0.01956
Train [38][200/3239]	Time 0.170 (0.592)	Data Time 0.001 (0.150)	Loss 2.7587 (2.8806)	Entropy 1.32166 (1.32220)	Top-1 acc 60.156 (54.985)	Top-5 acc 78.516 (76.988)	lr 0.01956
Train [38][210/3239]	Time 0.276 (0.587)	Data Time 0.001 (0.143)	Loss 2.8030 (2.8786)	Entropy 1.32158 (1.32217)	Top-1 acc 55.859 (55.010)	Top-5 acc 78.516 (77.085)	lr 0.01956
Train [38][220/3239]	Time 0.223 (0.582)	Data Time 0.001 (0.137)	Loss 2.8286 (2.8797)	Entropy 1.32153 (1.32214)	Top-1 acc 56.250 (54.984)	Top-5 acc 76.953 (77.082)	lr 0.01956
Train [38][230/3239]	Time 2.513 (0.576)	Data Time 0.001 (0.131)	Loss 3.0250 (2.8788)	Entropy 1.32153 (1.32212)	Top-1 acc 48.047 (55.000)	Top-5 acc 76.953 (77.115)	lr 0.01956
Train [38][240/3239]	Time 0.238 (0.562)	Data Time 0.002 (0.125)	Loss 2.7590 (2.8792)	Entropy 1.32142 (1.32209)	Top-1 acc 62.500 (55.008)	Top-5 acc 80.469 (77.112)	lr 0.01956
Train [38][250/3239]	Time 0.391 (0.558)	Data Time 0.002 (0.120)	Loss 3.1514 (2.8791)	Entropy 1.32128 (1.32206)	Top-1 acc 52.734 (55.036)	Top-5 acc 70.703 (77.093)	lr 0.01956
Train [38][260/3239]	Time 0.196 (0.554)	Data Time 0.001 (0.116)	Loss 2.9712 (2.8787)	Entropy 1.32118 (1.32202)	Top-1 acc 53.516 (55.024)	Top-5 acc 74.609 (77.106)	lr 0.01956
Train [38][270/3239]	Time 0.190 (0.550)	Data Time 0.001 (0.112)	Loss 2.9254 (2.8807)	Entropy 1.32115 (1.32199)	Top-1 acc 56.641 (54.993)	Top-5 acc 75.391 (77.066)	lr 0.01956
Train [38][280/3239]	Time 0.378 (0.702)	Data Time 0.003 (0.108)	Loss 2.8164 (2.8797)	Entropy 1.32113 (1.32196)	Top-1 acc 53.125 (55.002)	Top-5 acc 77.344 (77.099)	lr 0.01956
Train [38][290/3239]	Time 0.219 (0.696)	Data Time 0.002 (0.105)	Loss 2.7593 (2.8783)	Entropy 1.32104 (1.32193)	Top-1 acc 57.812 (55.018)	Top-5 acc 78.516 (77.105)	lr 0.01956
Train [38][300/3239]	Time 0.312 (0.688)	Data Time 0.002 (0.101)	Loss 3.0091 (2.8791)	Entropy 1.32096 (1.32190)	Top-1 acc 53.516 (55.007)	Top-5 acc 77.344 (77.111)	lr 0.01956
Train [38][310/3239]	Time 0.226 (0.681)	Data Time 0.001 (0.098)	Loss 2.9077 (2.8796)	Entropy 1.32089 (1.32187)	Top-1 acc 53.906 (54.989)	Top-5 acc 78.516 (77.123)	lr 0.01955
Train [38][320/3239]	Time 0.204 (0.674)	Data Time 0.001 (0.095)	Loss 3.0309 (2.8803)	Entropy 1.32082 (1.32184)	Top-1 acc 48.828 (54.966)	Top-5 acc 74.219 (77.117)	lr 0.01955
Train [38][330/3239]	Time 0.264 (0.668)	Data Time 0.001 (0.092)	Loss 2.8248 (2.8807)	Entropy 1.32081 (1.32181)	Top-1 acc 58.203 (54.965)	Top-5 acc 78.516 (77.131)	lr 0.01955
Train [38][340/3239]	Time 2.459 (0.662)	Data Time 0.003 (0.090)	Loss 2.9695 (2.8820)	Entropy 1.32081 (1.32178)	Top-1 acc 50.781 (54.903)	Top-5 acc 76.562 (77.115)	lr 0.01955
Train [38][350/3239]	Time 0.316 (0.649)	Data Time 0.001 (0.087)	Loss 2.8791 (2.8806)	Entropy 1.32080 (1.32175)	Top-1 acc 57.031 (54.956)	Top-5 acc 78.516 (77.157)	lr 0.01955
Train [38][360/3239]	Time 0.205 (0.644)	Data Time 0.001 (0.085)	Loss 3.1524 (2.8817)	Entropy 1.32070 (1.32172)	Top-1 acc 51.172 (54.917)	Top-5 acc 70.703 (77.118)	lr 0.01955
Train [38][370/3239]	Time 0.219 (0.638)	Data Time 0.001 (0.083)	Loss 2.6796 (2.8812)	Entropy 1.32065 (1.32169)	Top-1 acc 57.422 (54.952)	Top-5 acc 81.250 (77.115)	lr 0.01955
Train [38][380/3239]	Time 0.220 (0.634)	Data Time 0.001 (0.080)	Loss 2.7717 (2.8816)	Entropy 1.32060 (1.32167)	Top-1 acc 59.766 (54.928)	Top-5 acc 80.469 (77.124)	lr 0.01955
Train [38][390/3239]	Time 0.250 (0.629)	Data Time 0.001 (0.078)	Loss 2.9246 (2.8823)	Entropy 1.32051 (1.32164)	Top-1 acc 54.297 (54.921)	Top-5 acc 75.781 (77.106)	lr 0.01955
Train [38][400/3239]	Time 0.230 (0.625)	Data Time 0.002 (0.076)	Loss 2.8282 (2.8815)	Entropy 1.32046 (1.32161)	Top-1 acc 53.906 (54.926)	Top-5 acc 78.125 (77.110)	lr 0.01955
Train [38][410/3239]	Time 0.216 (0.621)	Data Time 0.001 (0.075)	Loss 3.0187 (2.8819)	Entropy 1.32042 (1.32158)	Top-1 acc 50.781 (54.916)	Top-5 acc 78.125 (77.102)	lr 0.01955
Train [38][420/3239]	Time 0.222 (0.617)	Data Time 0.001 (0.073)	Loss 2.7426 (2.8817)	Entropy 1.32041 (1.32155)	Top-1 acc 60.938 (54.905)	Top-5 acc 78.125 (77.107)	lr 0.01955
Train [38][430/3239]	Time 0.203 (0.614)	Data Time 0.001 (0.071)	Loss 2.7077 (2.8827)	Entropy 1.32037 (1.32153)	Top-1 acc 55.469 (54.882)	Top-5 acc 81.250 (77.104)	lr 0.01954
Train [38][440/3239]	Time 0.217 (0.610)	Data Time 0.001 (0.070)	Loss 2.8115 (2.8827)	Entropy 1.32035 (1.32150)	Top-1 acc 57.422 (54.888)	Top-5 acc 78.125 (77.099)	lr 0.01954
Train [38][450/3239]	Time 2.327 (0.606)	Data Time 0.001 (0.068)	Loss 2.8637 (2.8827)	Entropy 1.32035 (1.32147)	Top-1 acc 51.562 (54.876)	Top-5 acc 76.172 (77.098)	lr 0.01954
Train [38][460/3239]	Time 0.274 (0.598)	Data Time 0.001 (0.067)	Loss 2.8480 (2.8833)	Entropy 1.32032 (1.32145)	Top-1 acc 54.688 (54.847)	Top-5 acc 77.344 (77.081)	lr 0.01954
Train [38][470/3239]	Time 0.229 (0.595)	Data Time 0.001 (0.065)	Loss 2.9263 (2.8835)	Entropy 1.32018 (1.32142)	Top-1 acc 54.688 (54.859)	Top-5 acc 75.000 (77.063)	lr 0.01954
Train [38][480/3239]	Time 0.227 (0.592)	Data Time 0.001 (0.064)	Loss 2.9189 (2.8844)	Entropy 1.32014 (1.32140)	Top-1 acc 55.078 (54.830)	Top-5 acc 76.953 (77.034)	lr 0.01954
Train [38][490/3239]	Time 0.211 (0.589)	Data Time 0.001 (0.063)	Loss 2.9984 (2.8858)	Entropy 1.32011 (1.32137)	Top-1 acc 53.906 (54.799)	Top-5 acc 73.047 (77.000)	lr 0.01954
Train [38][500/3239]	Time 0.226 (0.586)	Data Time 0.001 (0.062)	Loss 3.0188 (2.8867)	Entropy 1.32002 (1.32134)	Top-1 acc 56.250 (54.793)	Top-5 acc 75.391 (76.984)	lr 0.01954
Train [38][510/3239]	Time 0.291 (0.584)	Data Time 0.001 (0.060)	Loss 2.8891 (2.8868)	Entropy 1.31998 (1.32132)	Top-1 acc 54.297 (54.801)	Top-5 acc 78.906 (76.997)	lr 0.01954
Train [38][520/3239]	Time 0.225 (0.581)	Data Time 0.001 (0.059)	Loss 2.8580 (2.8876)	Entropy 1.31997 (1.32129)	Top-1 acc 53.906 (54.769)	Top-5 acc 77.734 (76.980)	lr 0.01954
Train [38][530/3239]	Time 0.232 (0.579)	Data Time 0.001 (0.058)	Loss 2.8634 (2.8864)	Entropy 1.31991 (1.32127)	Top-1 acc 56.250 (54.797)	Top-5 acc 76.172 (77.016)	lr 0.01954
Train [38][540/3239]	Time 0.222 (0.577)	Data Time 0.001 (0.057)	Loss 2.8513 (2.8858)	Entropy 1.31982 (1.32124)	Top-1 acc 57.422 (54.813)	Top-5 acc 76.562 (77.025)	lr 0.01954
Train [38][550/3239]	Time 0.239 (0.575)	Data Time 0.001 (0.056)	Loss 2.9184 (2.8872)	Entropy 1.31982 (1.32121)	Top-1 acc 48.828 (54.761)	Top-5 acc 75.000 (76.996)	lr 0.01953
Train [38][560/3239]	Time 2.499 (0.573)	Data Time 0.002 (0.055)	Loss 2.9240 (2.8872)	Entropy 1.31982 (1.32119)	Top-1 acc 51.172 (54.763)	Top-5 acc 74.609 (76.992)	lr 0.01953
Train [38][570/3239]	Time 0.241 (0.567)	Data Time 0.001 (0.054)	Loss 2.7791 (2.8871)	Entropy 1.31976 (1.32116)	Top-1 acc 56.250 (54.748)	Top-5 acc 76.562 (76.989)	lr 0.01953
Train [38][580/3239]	Time 0.223 (0.565)	Data Time 0.001 (0.053)	Loss 2.8622 (2.8885)	Entropy 1.31968 (1.32114)	Top-1 acc 54.688 (54.720)	Top-5 acc 76.953 (76.949)	lr 0.01953
Train [38][590/3239]	Time 0.227 (0.563)	Data Time 0.001 (0.052)	Loss 2.9453 (2.8889)	Entropy 1.31956 (1.32111)	Top-1 acc 50.781 (54.697)	Top-5 acc 78.125 (76.938)	lr 0.01953
Train [38][600/3239]	Time 0.240 (0.561)	Data Time 0.001 (0.052)	Loss 2.7542 (2.8892)	Entropy 1.31949 (1.32109)	Top-1 acc 57.422 (54.713)	Top-5 acc 80.078 (76.930)	lr 0.01953
Train [38][610/3239]	Time 0.273 (0.559)	Data Time 0.001 (0.051)	Loss 2.8006 (2.8896)	Entropy 1.31951 (1.32106)	Top-1 acc 57.031 (54.725)	Top-5 acc 78.516 (76.922)	lr 0.01953
Train [38][620/3239]	Time 0.205 (0.558)	Data Time 0.001 (0.050)	Loss 2.9172 (2.8896)	Entropy 1.31941 (1.32103)	Top-1 acc 54.688 (54.728)	Top-5 acc 75.781 (76.928)	lr 0.01953
Train [38][630/3239]	Time 0.216 (0.556)	Data Time 0.001 (0.049)	Loss 3.0259 (2.8905)	Entropy 1.31942 (1.32101)	Top-1 acc 56.250 (54.724)	Top-5 acc 73.828 (76.905)	lr 0.01953
Train [38][640/3239]	Time 0.227 (0.628)	Data Time 0.002 (0.049)	Loss 3.0118 (2.8906)	Entropy 1.31934 (1.32098)	Top-1 acc 55.078 (54.722)	Top-5 acc 74.219 (76.906)	lr 0.01953
Train [38][650/3239]	Time 0.259 (0.625)	Data Time 0.002 (0.048)	Loss 3.0453 (2.8909)	Entropy 1.31934 (1.32096)	Top-1 acc 52.344 (54.700)	Top-5 acc 75.781 (76.902)	lr 0.01953
Train [38][660/3239]	Time 0.332 (0.623)	Data Time 0.002 (0.047)	Loss 2.9614 (2.8913)	Entropy 1.31934 (1.32093)	Top-1 acc 55.469 (54.697)	Top-5 acc 79.297 (76.899)	lr 0.01953
Train [38][670/3239]	Time 2.591 (0.621)	Data Time 0.001 (0.046)	Loss 2.8846 (2.8922)	Entropy 1.31934 (1.32091)	Top-1 acc 50.781 (54.671)	Top-5 acc 78.125 (76.886)	lr 0.01952
Train [38][680/3239]	Time 0.209 (0.615)	Data Time 0.001 (0.046)	Loss 2.9422 (2.8918)	Entropy 1.31930 (1.32089)	Top-1 acc 53.516 (54.673)	Top-5 acc 75.781 (76.897)	lr 0.01952
Train [38][690/3239]	Time 0.224 (0.612)	Data Time 0.001 (0.045)	Loss 2.9106 (2.8914)	Entropy 1.31928 (1.32086)	Top-1 acc 53.125 (54.675)	Top-5 acc 76.562 (76.898)	lr 0.01952
Train [38][700/3239]	Time 0.209 (0.610)	Data Time 0.001 (0.045)	Loss 2.9713 (2.8917)	Entropy 1.31930 (1.32084)	Top-1 acc 51.562 (54.669)	Top-5 acc 76.953 (76.888)	lr 0.01952
Train [38][710/3239]	Time 0.313 (0.608)	Data Time 0.001 (0.044)	Loss 2.8023 (2.8917)	Entropy 1.31927 (1.32082)	Top-1 acc 56.641 (54.669)	Top-5 acc 78.125 (76.891)	lr 0.01952
Train [38][720/3239]	Time 0.160 (0.606)	Data Time 0.001 (0.043)	Loss 3.0594 (2.8921)	Entropy 1.31921 (1.32080)	Top-1 acc 50.000 (54.661)	Top-5 acc 71.875 (76.877)	lr 0.01952
Train [38][730/3239]	Time 0.236 (0.604)	Data Time 0.001 (0.043)	Loss 2.9800 (2.8927)	Entropy 1.31920 (1.32078)	Top-1 acc 51.953 (54.648)	Top-5 acc 76.562 (76.864)	lr 0.01952
Train [38][740/3239]	Time 0.166 (0.602)	Data Time 0.001 (0.042)	Loss 2.9587 (2.8935)	Entropy 1.31916 (1.32075)	Top-1 acc 51.172 (54.606)	Top-5 acc 75.000 (76.849)	lr 0.01952
Train [38][750/3239]	Time 0.227 (0.600)	Data Time 0.001 (0.042)	Loss 2.6649 (2.8933)	Entropy 1.31895 (1.32073)	Top-1 acc 59.375 (54.620)	Top-5 acc 79.297 (76.854)	lr 0.01952
Train [38][760/3239]	Time 0.312 (0.598)	Data Time 0.001 (0.041)	Loss 2.9469 (2.8935)	Entropy 1.31891 (1.32071)	Top-1 acc 51.172 (54.624)	Top-5 acc 76.953 (76.853)	lr 0.01952
Train [38][770/3239]	Time 0.214 (0.596)	Data Time 0.001 (0.041)	Loss 2.7282 (2.8936)	Entropy 1.31886 (1.32068)	Top-1 acc 61.328 (54.611)	Top-5 acc 79.688 (76.852)	lr 0.01952
Train [38][780/3239]	Time 2.246 (0.594)	Data Time 0.001 (0.040)	Loss 2.7938 (2.8933)	Entropy 1.31886 (1.32066)	Top-1 acc 60.156 (54.627)	Top-5 acc 77.344 (76.855)	lr 0.01952
Train [38][790/3239]	Time 0.205 (0.589)	Data Time 0.001 (0.040)	Loss 2.7102 (2.8938)	Entropy 1.31873 (1.32064)	Top-1 acc 54.688 (54.605)	Top-5 acc 79.297 (76.847)	lr 0.01951
Train [38][800/3239]	Time 0.222 (0.587)	Data Time 0.001 (0.039)	Loss 3.0215 (2.8939)	Entropy 1.31873 (1.32061)	Top-1 acc 52.344 (54.598)	Top-5 acc 74.609 (76.838)	lr 0.01951
Train [38][810/3239]	Time 0.220 (0.586)	Data Time 0.001 (0.039)	Loss 2.9243 (2.8939)	Entropy 1.31871 (1.32059)	Top-1 acc 53.125 (54.602)	Top-5 acc 75.781 (76.848)	lr 0.01951
Train [38][820/3239]	Time 0.294 (0.584)	Data Time 0.001 (0.038)	Loss 2.9857 (2.8937)	Entropy 1.31866 (1.32057)	Top-1 acc 50.391 (54.611)	Top-5 acc 75.000 (76.857)	lr 0.01951
Train [38][830/3239]	Time 0.217 (0.582)	Data Time 0.001 (0.038)	Loss 2.9630 (2.8930)	Entropy 1.31863 (1.32054)	Top-1 acc 52.734 (54.633)	Top-5 acc 76.953 (76.869)	lr 0.01951
Train [38][840/3239]	Time 0.226 (0.581)	Data Time 0.001 (0.037)	Loss 2.6273 (2.8920)	Entropy 1.31867 (1.32052)	Top-1 acc 61.328 (54.664)	Top-5 acc 80.859 (76.895)	lr 0.01951
Train [38][850/3239]	Time 0.202 (0.579)	Data Time 0.001 (0.037)	Loss 2.9648 (2.8923)	Entropy 1.31866 (1.32050)	Top-1 acc 55.078 (54.656)	Top-5 acc 76.953 (76.889)	lr 0.01951
Train [38][860/3239]	Time 0.203 (0.578)	Data Time 0.001 (0.037)	Loss 3.1992 (2.8928)	Entropy 1.31864 (1.32048)	Top-1 acc 47.656 (54.637)	Top-5 acc 70.703 (76.886)	lr 0.01951
Train [38][870/3239]	Time 0.243 (0.576)	Data Time 0.001 (0.036)	Loss 2.8530 (2.8937)	Entropy 1.31857 (1.32046)	Top-1 acc 56.641 (54.622)	Top-5 acc 78.125 (76.871)	lr 0.01951
Train [38][880/3239]	Time 0.199 (0.575)	Data Time 0.001 (0.036)	Loss 2.7504 (2.8938)	Entropy 1.31843 (1.32043)	Top-1 acc 56.250 (54.612)	Top-5 acc 80.469 (76.870)	lr 0.01951
Train [38][890/3239]	Time 2.397 (0.574)	Data Time 0.002 (0.035)	Loss 2.7192 (2.8936)	Entropy 1.31843 (1.32041)	Top-1 acc 59.766 (54.619)	Top-5 acc 80.859 (76.865)	lr 0.01951
Train [38][900/3239]	Time 0.205 (0.570)	Data Time 0.001 (0.035)	Loss 2.8685 (2.8933)	Entropy 1.31843 (1.32039)	Top-1 acc 53.125 (54.614)	Top-5 acc 75.000 (76.866)	lr 0.01950
Train [38][910/3239]	Time 0.214 (0.569)	Data Time 0.001 (0.035)	Loss 2.7043 (2.8940)	Entropy 1.31847 (1.32037)	Top-1 acc 60.938 (54.601)	Top-5 acc 79.688 (76.855)	lr 0.01950
Train [38][920/3239]	Time 0.217 (0.567)	Data Time 0.001 (0.034)	Loss 2.8237 (2.8932)	Entropy 1.31850 (1.32035)	Top-1 acc 57.812 (54.626)	Top-5 acc 78.125 (76.871)	lr 0.01950
Train [38][930/3239]	Time 0.307 (0.566)	Data Time 0.001 (0.034)	Loss 2.9756 (2.8934)	Entropy 1.31838 (1.32033)	Top-1 acc 53.125 (54.621)	Top-5 acc 77.344 (76.868)	lr 0.01950
Train [38][940/3239]	Time 0.231 (0.565)	Data Time 0.001 (0.034)	Loss 3.0320 (2.8931)	Entropy 1.31831 (1.32031)	Top-1 acc 51.172 (54.632)	Top-5 acc 77.344 (76.883)	lr 0.01950
Train [38][950/3239]	Time 0.225 (0.564)	Data Time 0.002 (0.033)	Loss 2.6467 (2.8922)	Entropy 1.31825 (1.32029)	Top-1 acc 60.156 (54.651)	Top-5 acc 82.812 (76.910)	lr 0.01950
Train [38][960/3239]	Time 0.245 (0.563)	Data Time 0.001 (0.033)	Loss 2.9606 (2.8924)	Entropy 1.31850 (1.32027)	Top-1 acc 53.125 (54.649)	Top-5 acc 75.000 (76.901)	lr 0.01950
Train [38][970/3239]	Time 0.220 (0.562)	Data Time 0.001 (0.033)	Loss 2.8383 (2.8924)	Entropy 1.31851 (1.32025)	Top-1 acc 52.734 (54.640)	Top-5 acc 76.172 (76.896)	lr 0.01950
Train [38][980/3239]	Time 0.222 (0.561)	Data Time 0.001 (0.032)	Loss 2.7911 (2.8920)	Entropy 1.31811 (1.32023)	Top-1 acc 58.594 (54.651)	Top-5 acc 78.125 (76.906)	lr 0.01950
Train [38][990/3239]	Time 0.268 (0.560)	Data Time 0.001 (0.032)	Loss 2.7489 (2.8912)	Entropy 1.31804 (1.32021)	Top-1 acc 57.031 (54.669)	Top-5 acc 77.734 (76.916)	lr 0.01950
Train [38][1000/3239]	Time 47.875 (0.604)	Data Time 0.001 (0.032)	Loss 2.8872 (2.8909)	Entropy 1.31804 (1.32019)	Top-1 acc 57.422 (54.674)	Top-5 acc 75.391 (76.915)	lr 0.01950
Train [38][1010/3239]	Time 0.338 (0.600)	Data Time 0.002 (0.031)	Loss 2.9454 (2.8907)	Entropy 1.31802 (1.32016)	Top-1 acc 53.516 (54.674)	Top-5 acc 76.953 (76.918)	lr 0.01950
Train [38][1020/3239]	Time 0.228 (0.599)	Data Time 0.001 (0.031)	Loss 3.0067 (2.8911)	Entropy 1.31801 (1.32014)	Top-1 acc 53.516 (54.670)	Top-5 acc 72.266 (76.912)	lr 0.01949
Train [38][1030/3239]	Time 0.240 (0.598)	Data Time 0.001 (0.031)	Loss 2.9981 (2.8910)	Entropy 1.31791 (1.32012)	Top-1 acc 50.391 (54.669)	Top-5 acc 75.000 (76.914)	lr 0.01949
Train [38][1040/3239]	Time 0.328 (0.596)	Data Time 0.002 (0.031)	Loss 2.8727 (2.8910)	Entropy 1.31780 (1.32010)	Top-1 acc 53.516 (54.656)	Top-5 acc 77.344 (76.915)	lr 0.01949
Train [38][1050/3239]	Time 0.230 (0.595)	Data Time 0.001 (0.030)	Loss 2.7528 (2.8908)	Entropy 1.31772 (1.32008)	Top-1 acc 57.422 (54.654)	Top-5 acc 78.906 (76.917)	lr 0.01949
Train [38][1060/3239]	Time 0.281 (0.594)	Data Time 0.001 (0.030)	Loss 2.7394 (2.8913)	Entropy 1.31771 (1.32005)	Top-1 acc 57.422 (54.639)	Top-5 acc 81.250 (76.906)	lr 0.01949
Train [38][1070/3239]	Time 0.219 (0.592)	Data Time 0.001 (0.030)	Loss 3.0696 (2.8913)	Entropy 1.31761 (1.32003)	Top-1 acc 49.609 (54.630)	Top-5 acc 74.219 (76.903)	lr 0.01949
Train [38][1080/3239]	Time 0.211 (0.591)	Data Time 0.001 (0.030)	Loss 2.9604 (2.8918)	Entropy 1.31752 (1.32001)	Top-1 acc 54.688 (54.624)	Top-5 acc 75.781 (76.888)	lr 0.01949
Train [38][1090/3239]	Time 0.212 (0.590)	Data Time 0.001 (0.029)	Loss 2.9320 (2.8927)	Entropy 1.31750 (1.31999)	Top-1 acc 51.953 (54.606)	Top-5 acc 74.219 (76.871)	lr 0.01949
Train [38][1100/3239]	Time 0.198 (0.589)	Data Time 0.001 (0.029)	Loss 3.3302 (2.8929)	Entropy 1.31749 (1.31996)	Top-1 acc 46.484 (54.599)	Top-5 acc 67.969 (76.875)	lr 0.01949
Train [38][1110/3239]	Time 2.437 (0.587)	Data Time 0.001 (0.029)	Loss 2.9426 (2.8941)	Entropy 1.31749 (1.31994)	Top-1 acc 53.906 (54.578)	Top-5 acc 77.734 (76.861)	lr 0.01949
Train [38][1120/3239]	Time 0.228 (0.584)	Data Time 0.001 (0.029)	Loss 2.9364 (2.8939)	Entropy 1.31739 (1.31992)	Top-1 acc 53.125 (54.582)	Top-5 acc 75.391 (76.864)	lr 0.01949
Train [38][1130/3239]	Time 0.195 (0.583)	Data Time 0.001 (0.028)	Loss 3.0887 (2.8934)	Entropy 1.31737 (1.31990)	Top-1 acc 52.734 (54.603)	Top-5 acc 73.047 (76.878)	lr 0.01949
Train [38][1140/3239]	Time 0.207 (0.582)	Data Time 0.001 (0.028)	Loss 2.9288 (2.8935)	Entropy 1.31715 (1.31987)	Top-1 acc 51.953 (54.596)	Top-5 acc 74.219 (76.881)	lr 0.01948
Train [38][1150/3239]	Time 0.358 (0.581)	Data Time 0.001 (0.028)	Loss 2.6797 (2.8932)	Entropy 1.31704 (1.31985)	Top-1 acc 58.203 (54.594)	Top-5 acc 82.422 (76.888)	lr 0.01948
Train [38][1160/3239]	Time 0.223 (0.580)	Data Time 0.001 (0.028)	Loss 2.8273 (2.8941)	Entropy 1.31700 (1.31982)	Top-1 acc 54.297 (54.564)	Top-5 acc 77.734 (76.863)	lr 0.01948
Train [38][1170/3239]	Time 0.221 (0.579)	Data Time 0.001 (0.027)	Loss 2.8331 (2.8940)	Entropy 1.31698 (1.31980)	Top-1 acc 55.078 (54.560)	Top-5 acc 79.688 (76.865)	lr 0.01948
Train [38][1180/3239]	Time 0.226 (0.578)	Data Time 0.001 (0.027)	Loss 2.9452 (2.8942)	Entropy 1.31687 (1.31978)	Top-1 acc 57.031 (54.553)	Top-5 acc 74.609 (76.861)	lr 0.01948
Train [38][1190/3239]	Time 0.261 (0.577)	Data Time 0.001 (0.027)	Loss 2.9037 (2.8941)	Entropy 1.31681 (1.31975)	Top-1 acc 50.000 (54.544)	Top-5 acc 78.516 (76.865)	lr 0.01948
Train [38][1200/3239]	Time 0.307 (0.576)	Data Time 0.001 (0.027)	Loss 3.0041 (2.8939)	Entropy 1.31676 (1.31973)	Top-1 acc 54.297 (54.543)	Top-5 acc 74.609 (76.865)	lr 0.01948
Train [38][1210/3239]	Time 0.264 (0.575)	Data Time 0.001 (0.027)	Loss 2.9056 (2.8938)	Entropy 1.31670 (1.31970)	Top-1 acc 52.734 (54.548)	Top-5 acc 78.906 (76.875)	lr 0.01948
Train [38][1220/3239]	Time 2.410 (0.574)	Data Time 0.001 (0.026)	Loss 2.9439 (2.8941)	Entropy 1.31670 (1.31968)	Top-1 acc 51.953 (54.546)	Top-5 acc 76.172 (76.869)	lr 0.01948
Train [38][1230/3239]	Time 0.200 (0.571)	Data Time 0.001 (0.026)	Loss 2.8820 (2.8941)	Entropy 1.31668 (1.31965)	Top-1 acc 53.906 (54.552)	Top-5 acc 75.000 (76.869)	lr 0.01948
Train [38][1240/3239]	Time 0.272 (0.570)	Data Time 0.001 (0.026)	Loss 2.8212 (2.8939)	Entropy 1.31664 (1.31963)	Top-1 acc 57.422 (54.563)	Top-5 acc 77.734 (76.876)	lr 0.01948
Train [38][1250/3239]	Time 0.330 (0.569)	Data Time 0.001 (0.026)	Loss 2.9098 (2.8941)	Entropy 1.31663 (1.31961)	Top-1 acc 53.125 (54.554)	Top-5 acc 75.391 (76.873)	lr 0.01948
Train [38][1260/3239]	Time 0.233 (0.568)	Data Time 0.001 (0.026)	Loss 2.8584 (2.8944)	Entropy 1.31657 (1.31958)	Top-1 acc 52.734 (54.542)	Top-5 acc 78.125 (76.869)	lr 0.01947
Train [38][1270/3239]	Time 0.184 (0.568)	Data Time 0.002 (0.025)	Loss 3.1169 (2.8945)	Entropy 1.31644 (1.31956)	Top-1 acc 50.781 (54.538)	Top-5 acc 73.438 (76.872)	lr 0.01947
Train [38][1280/3239]	Time 0.240 (0.567)	Data Time 0.001 (0.025)	Loss 2.9357 (2.8942)	Entropy 1.31639 (1.31953)	Top-1 acc 53.125 (54.544)	Top-5 acc 77.344 (76.880)	lr 0.01947
Train [38][1290/3239]	Time 0.204 (0.566)	Data Time 0.001 (0.025)	Loss 2.9994 (2.8938)	Entropy 1.31636 (1.31951)	Top-1 acc 48.438 (54.548)	Top-5 acc 76.172 (76.892)	lr 0.01947
Train [38][1300/3239]	Time 0.217 (0.565)	Data Time 0.001 (0.025)	Loss 2.8958 (2.8938)	Entropy 1.31628 (1.31948)	Top-1 acc 53.906 (54.545)	Top-5 acc 74.609 (76.887)	lr 0.01947
Train [38][1310/3239]	Time 0.213 (0.564)	Data Time 0.001 (0.025)	Loss 2.9508 (2.8945)	Entropy 1.31615 (1.31946)	Top-1 acc 52.344 (54.525)	Top-5 acc 78.906 (76.881)	lr 0.01947
Train [38][1320/3239]	Time 0.245 (0.563)	Data Time 0.001 (0.024)	Loss 2.9075 (2.8947)	Entropy 1.31609 (1.31943)	Top-1 acc 50.781 (54.521)	Top-5 acc 77.344 (76.868)	lr 0.01947
Train [38][1330/3239]	Time 2.381 (0.563)	Data Time 0.001 (0.024)	Loss 2.8830 (2.8950)	Entropy 1.31609 (1.31941)	Top-1 acc 53.516 (54.509)	Top-5 acc 79.297 (76.862)	lr 0.01947
Train [38][1340/3239]	Time 0.228 (0.560)	Data Time 0.008 (0.024)	Loss 2.9201 (2.8955)	Entropy 1.31610 (1.31938)	Top-1 acc 53.516 (54.497)	Top-5 acc 76.172 (76.852)	lr 0.01947
Train [38][1350/3239]	Time 0.220 (0.559)	Data Time 0.001 (0.024)	Loss 2.9024 (2.8958)	Entropy 1.31601 (1.31936)	Top-1 acc 53.516 (54.484)	Top-5 acc 76.953 (76.841)	lr 0.01947
Train [38][1360/3239]	Time 0.364 (0.559)	Data Time 0.002 (0.024)	Loss 3.1056 (2.8963)	Entropy 1.31547 (1.31933)	Top-1 acc 46.875 (54.473)	Top-5 acc 70.703 (76.825)	lr 0.01947
Train [38][1370/3239]	Time 0.233 (0.591)	Data Time 0.002 (0.024)	Loss 3.0182 (2.8963)	Entropy 1.31543 (1.31930)	Top-1 acc 51.562 (54.464)	Top-5 acc 74.219 (76.820)	lr 0.01947
Train [38][1380/3239]	Time 0.204 (0.590)	Data Time 0.002 (0.023)	Loss 2.9521 (2.8963)	Entropy 1.31537 (1.31927)	Top-1 acc 51.562 (54.465)	Top-5 acc 74.219 (76.820)	lr 0.01946
Train [38][1390/3239]	Time 0.228 (0.589)	Data Time 0.001 (0.023)	Loss 3.1071 (2.8967)	Entropy 1.31523 (1.31925)	Top-1 acc 48.438 (54.448)	Top-5 acc 74.219 (76.816)	lr 0.01946
Train [38][1400/3239]	Time 0.166 (0.588)	Data Time 0.001 (0.023)	Loss 2.7630 (2.8963)	Entropy 1.31519 (1.31922)	Top-1 acc 56.641 (54.456)	Top-5 acc 78.516 (76.819)	lr 0.01946
Train [38][1410/3239]	Time 0.302 (0.587)	Data Time 0.001 (0.023)	Loss 3.1218 (2.8968)	Entropy 1.31517 (1.31919)	Top-1 acc 49.609 (54.445)	Top-5 acc 73.438 (76.805)	lr 0.01946
Train [38][1420/3239]	Time 0.191 (0.587)	Data Time 0.001 (0.023)	Loss 2.9765 (2.8967)	Entropy 1.31508 (1.31916)	Top-1 acc 53.125 (54.447)	Top-5 acc 71.875 (76.802)	lr 0.01946
Train [38][1430/3239]	Time 0.262 (0.586)	Data Time 0.001 (0.023)	Loss 2.8588 (2.8965)	Entropy 1.31504 (1.31913)	Top-1 acc 55.469 (54.452)	Top-5 acc 79.297 (76.811)	lr 0.01946
Train [38][1440/3239]	Time 2.423 (0.585)	Data Time 0.001 (0.023)	Loss 2.9002 (2.8964)	Entropy 1.31504 (1.31910)	Top-1 acc 53.906 (54.449)	Top-5 acc 77.734 (76.815)	lr 0.01946
Train [38][1450/3239]	Time 0.218 (0.582)	Data Time 0.001 (0.022)	Loss 3.2161 (2.8968)	Entropy 1.31492 (1.31907)	Top-1 acc 49.609 (54.441)	Top-5 acc 70.312 (76.812)	lr 0.01946
Train [38][1460/3239]	Time 0.214 (0.581)	Data Time 0.001 (0.022)	Loss 2.9544 (2.8971)	Entropy 1.31482 (1.31905)	Top-1 acc 50.781 (54.436)	Top-5 acc 76.562 (76.806)	lr 0.01946
Train [38][1470/3239]	Time 0.217 (0.581)	Data Time 0.001 (0.022)	Loss 2.9478 (2.8973)	Entropy 1.31474 (1.31902)	Top-1 acc 55.078 (54.430)	Top-5 acc 76.172 (76.804)	lr 0.01946
Train [38][1480/3239]	Time 0.210 (0.580)	Data Time 0.001 (0.022)	Loss 2.9059 (2.8975)	Entropy 1.31464 (1.31899)	Top-1 acc 55.859 (54.436)	Top-5 acc 79.688 (76.799)	lr 0.01946
Train [38][1490/3239]	Time 0.234 (0.579)	Data Time 0.001 (0.022)	Loss 2.7477 (2.8974)	Entropy 1.31452 (1.31896)	Top-1 acc 53.516 (54.433)	Top-5 acc 79.297 (76.803)	lr 0.01946
Train [38][1500/3239]	Time 0.219 (0.578)	Data Time 0.001 (0.022)	Loss 2.8965 (2.8976)	Entropy 1.31448 (1.31893)	Top-1 acc 53.125 (54.425)	Top-5 acc 74.609 (76.803)	lr 0.01945
Train [38][1510/3239]	Time 0.222 (0.577)	Data Time 0.001 (0.022)	Loss 3.0171 (2.8975)	Entropy 1.31448 (1.31890)	Top-1 acc 51.562 (54.424)	Top-5 acc 77.344 (76.808)	lr 0.01945
Train [38][1520/3239]	Time 0.314 (0.576)	Data Time 0.002 (0.021)	Loss 2.8376 (2.8974)	Entropy 1.31447 (1.31887)	Top-1 acc 58.594 (54.429)	Top-5 acc 77.734 (76.812)	lr 0.01945
Train [38][1530/3239]	Time 0.229 (0.576)	Data Time 0.001 (0.021)	Loss 2.7900 (2.8974)	Entropy 1.31443 (1.31884)	Top-1 acc 54.297 (54.432)	Top-5 acc 79.688 (76.815)	lr 0.01945
Train [38][1540/3239]	Time 0.222 (0.575)	Data Time 0.001 (0.021)	Loss 2.8816 (2.8973)	Entropy 1.31441 (1.31881)	Top-1 acc 54.688 (54.432)	Top-5 acc 77.734 (76.823)	lr 0.01945
Train [38][1550/3239]	Time 2.423 (0.574)	Data Time 0.001 (0.021)	Loss 2.9202 (2.8971)	Entropy 1.31441 (1.31878)	Top-1 acc 54.688 (54.435)	Top-5 acc 76.562 (76.825)	lr 0.01945
Train [38][1560/3239]	Time 0.233 (0.572)	Data Time 0.001 (0.021)	Loss 2.8949 (2.8972)	Entropy 1.31434 (1.31876)	Top-1 acc 51.953 (54.431)	Top-5 acc 78.125 (76.826)	lr 0.01945
Train [38][1570/3239]	Time 0.291 (0.571)	Data Time 0.001 (0.021)	Loss 3.0459 (2.8971)	Entropy 1.31432 (1.31873)	Top-1 acc 50.000 (54.435)	Top-5 acc 75.000 (76.830)	lr 0.01945
Train [38][1580/3239]	Time 0.226 (0.570)	Data Time 0.001 (0.021)	Loss 2.9585 (2.8974)	Entropy 1.31428 (1.31870)	Top-1 acc 58.203 (54.432)	Top-5 acc 73.438 (76.819)	lr 0.01945
Train [38][1590/3239]	Time 0.217 (0.570)	Data Time 0.001 (0.021)	Loss 2.9424 (2.8975)	Entropy 1.31425 (1.31867)	Top-1 acc 51.172 (54.420)	Top-5 acc 76.172 (76.822)	lr 0.01945
Train [38][1600/3239]	Time 0.240 (0.569)	Data Time 0.001 (0.020)	Loss 2.9187 (2.8973)	Entropy 1.31424 (1.31864)	Top-1 acc 50.781 (54.424)	Top-5 acc 78.125 (76.828)	lr 0.01945
Train [38][1610/3239]	Time 0.223 (0.568)	Data Time 0.001 (0.020)	Loss 2.8678 (2.8980)	Entropy 1.31398 (1.31862)	Top-1 acc 56.250 (54.413)	Top-5 acc 76.172 (76.812)	lr 0.01945
Train [38][1620/3239]	Time 0.219 (0.568)	Data Time 0.001 (0.020)	Loss 2.7959 (2.8979)	Entropy 1.31390 (1.31859)	Top-1 acc 56.641 (54.409)	Top-5 acc 77.344 (76.810)	lr 0.01944
Train [38][1630/3239]	Time 0.233 (0.567)	Data Time 0.001 (0.020)	Loss 3.1659 (2.8985)	Entropy 1.31390 (1.31856)	Top-1 acc 46.484 (54.394)	Top-5 acc 73.047 (76.800)	lr 0.01944
Train [38][1640/3239]	Time 0.226 (0.566)	Data Time 0.001 (0.020)	Loss 2.9288 (2.8988)	Entropy 1.31381 (1.31853)	Top-1 acc 52.734 (54.386)	Top-5 acc 76.562 (76.797)	lr 0.01944
Train [38][1650/3239]	Time 0.224 (0.565)	Data Time 0.001 (0.020)	Loss 2.8625 (2.8987)	Entropy 1.31376 (1.31850)	Top-1 acc 53.906 (54.386)	Top-5 acc 77.734 (76.796)	lr 0.01944
Train [38][1660/3239]	Time 2.381 (0.565)	Data Time 0.001 (0.020)	Loss 2.9468 (2.8991)	Entropy 1.31376 (1.31847)	Top-1 acc 53.125 (54.381)	Top-5 acc 76.953 (76.789)	lr 0.01944
Train [38][1670/3239]	Time 0.224 (0.563)	Data Time 0.001 (0.020)	Loss 3.1050 (2.8992)	Entropy 1.31360 (1.31844)	Top-1 acc 50.781 (54.373)	Top-5 acc 71.875 (76.788)	lr 0.01944
Train [38][1680/3239]	Time 0.236 (0.562)	Data Time 0.001 (0.020)	Loss 2.9901 (2.8992)	Entropy 1.31359 (1.31841)	Top-1 acc 50.000 (54.373)	Top-5 acc 73.438 (76.785)	lr 0.01944
Train [38][1690/3239]	Time 0.243 (0.561)	Data Time 0.002 (0.019)	Loss 2.8195 (2.8992)	Entropy 1.31356 (1.31839)	Top-1 acc 54.297 (54.376)	Top-5 acc 77.344 (76.785)	lr 0.01944
Train [38][1700/3239]	Time 0.189 (0.561)	Data Time 0.002 (0.019)	Loss 2.8569 (2.8988)	Entropy 1.31360 (1.31836)	Top-1 acc 55.859 (54.389)	Top-5 acc 76.562 (76.794)	lr 0.01944
Train [38][1710/3239]	Time 0.231 (0.560)	Data Time 0.001 (0.019)	Loss 2.9613 (2.8987)	Entropy 1.31358 (1.31833)	Top-1 acc 54.297 (54.386)	Top-5 acc 76.562 (76.794)	lr 0.01944
Train [38][1720/3239]	Time 0.222 (0.560)	Data Time 0.001 (0.019)	Loss 2.9432 (2.8984)	Entropy 1.31355 (1.31830)	Top-1 acc 53.906 (54.387)	Top-5 acc 75.391 (76.802)	lr 0.01944
Train [38][1730/3239]	Time 0.323 (0.585)	Data Time 0.002 (0.019)	Loss 2.9222 (2.8986)	Entropy 1.31351 (1.31827)	Top-1 acc 51.953 (54.385)	Top-5 acc 76.172 (76.801)	lr 0.01944
Train [38][1740/3239]	Time 0.204 (0.584)	Data Time 0.002 (0.019)	Loss 3.1379 (2.8986)	Entropy 1.31345 (1.31825)	Top-1 acc 50.391 (54.388)	Top-5 acc 74.219 (76.801)	lr 0.01943
Train [38][1750/3239]	Time 0.220 (0.583)	Data Time 0.002 (0.019)	Loss 2.9614 (2.8988)	Entropy 1.31341 (1.31822)	Top-1 acc 54.297 (54.383)	Top-5 acc 76.172 (76.794)	lr 0.01943
Train [38][1760/3239]	Time 0.246 (0.583)	Data Time 0.001 (0.019)	Loss 2.8690 (2.8989)	Entropy 1.31332 (1.31819)	Top-1 acc 55.469 (54.381)	Top-5 acc 76.562 (76.788)	lr 0.01943
Train [38][1770/3239]	Time 2.396 (0.582)	Data Time 0.001 (0.019)	Loss 2.8727 (2.8992)	Entropy 1.31332 (1.31816)	Top-1 acc 60.938 (54.375)	Top-5 acc 78.125 (76.789)	lr 0.01943
Train [38][1780/3239]	Time 0.222 (0.580)	Data Time 0.001 (0.019)	Loss 2.9419 (2.8993)	Entropy 1.31332 (1.31814)	Top-1 acc 53.516 (54.380)	Top-5 acc 75.391 (76.787)	lr 0.01943
Train [38][1790/3239]	Time 0.299 (0.579)	Data Time 0.001 (0.018)	Loss 3.0125 (2.8992)	Entropy 1.31322 (1.31811)	Top-1 acc 48.438 (54.377)	Top-5 acc 76.172 (76.788)	lr 0.01943
Train [38][1800/3239]	Time 0.216 (0.579)	Data Time 0.001 (0.018)	Loss 2.8443 (2.8991)	Entropy 1.31313 (1.31808)	Top-1 acc 54.297 (54.378)	Top-5 acc 80.469 (76.793)	lr 0.01943
Train [38][1810/3239]	Time 0.217 (0.578)	Data Time 0.001 (0.018)	Loss 2.7154 (2.8995)	Entropy 1.31301 (1.31805)	Top-1 acc 58.203 (54.369)	Top-5 acc 78.125 (76.788)	lr 0.01943
Train [38][1820/3239]	Time 0.252 (0.577)	Data Time 0.001 (0.018)	Loss 2.8442 (2.8996)	Entropy 1.31299 (1.31803)	Top-1 acc 57.812 (54.364)	Top-5 acc 78.516 (76.782)	lr 0.01943
Train [38][1830/3239]	Time 0.229 (0.576)	Data Time 0.001 (0.018)	Loss 3.1303 (2.8997)	Entropy 1.31299 (1.31800)	Top-1 acc 51.172 (54.366)	Top-5 acc 71.875 (76.781)	lr 0.01943
Train [38][1840/3239]	Time 0.297 (0.576)	Data Time 0.001 (0.018)	Loss 2.7663 (2.8997)	Entropy 1.31292 (1.31797)	Top-1 acc 58.203 (54.367)	Top-5 acc 76.172 (76.781)	lr 0.01943
Train [38][1850/3239]	Time 0.219 (0.575)	Data Time 0.001 (0.018)	Loss 2.8921 (2.8997)	Entropy 1.31274 (1.31794)	Top-1 acc 56.641 (54.369)	Top-5 acc 76.953 (76.779)	lr 0.01943
Train [38][1860/3239]	Time 0.217 (0.574)	Data Time 0.001 (0.018)	Loss 2.7332 (2.9004)	Entropy 1.31267 (1.31792)	Top-1 acc 59.766 (54.362)	Top-5 acc 79.688 (76.770)	lr 0.01942
Train [38][1870/3239]	Time 0.210 (0.574)	Data Time 0.001 (0.018)	Loss 3.0881 (2.9003)	Entropy 1.31266 (1.31789)	Top-1 acc 47.266 (54.369)	Top-5 acc 73.047 (76.771)	lr 0.01942
Train [38][1880/3239]	Time 2.460 (0.573)	Data Time 0.001 (0.018)	Loss 2.6851 (2.8998)	Entropy 1.31266 (1.31786)	Top-1 acc 62.109 (54.382)	Top-5 acc 81.641 (76.781)	lr 0.01942
Train [38][1890/3239]	Time 0.246 (0.571)	Data Time 0.001 (0.018)	Loss 2.8519 (2.8997)	Entropy 1.31266 (1.31783)	Top-1 acc 53.516 (54.388)	Top-5 acc 79.297 (76.782)	lr 0.01942
Train [38][1900/3239]	Time 0.228 (0.571)	Data Time 0.001 (0.018)	Loss 2.9151 (2.8995)	Entropy 1.31260 (1.31781)	Top-1 acc 52.344 (54.392)	Top-5 acc 76.172 (76.786)	lr 0.01942
Train [38][1910/3239]	Time 0.221 (0.570)	Data Time 0.001 (0.017)	Loss 2.8884 (2.8993)	Entropy 1.31259 (1.31778)	Top-1 acc 54.297 (54.397)	Top-5 acc 79.688 (76.788)	lr 0.01942
Train [38][1920/3239]	Time 0.228 (0.569)	Data Time 0.001 (0.017)	Loss 2.7940 (2.8994)	Entropy 1.31257 (1.31775)	Top-1 acc 55.469 (54.395)	Top-5 acc 80.078 (76.788)	lr 0.01942
Train [38][1930/3239]	Time 0.225 (0.569)	Data Time 0.001 (0.017)	Loss 2.7630 (2.8993)	Entropy 1.31250 (1.31772)	Top-1 acc 60.547 (54.395)	Top-5 acc 81.250 (76.796)	lr 0.01942
Train [38][1940/3239]	Time 0.223 (0.568)	Data Time 0.001 (0.017)	Loss 2.7876 (2.8991)	Entropy 1.31245 (1.31770)	Top-1 acc 54.297 (54.401)	Top-5 acc 79.297 (76.799)	lr 0.01942
Train [38][1950/3239]	Time 0.320 (0.568)	Data Time 0.001 (0.017)	Loss 3.0339 (2.8995)	Entropy 1.31243 (1.31767)	Top-1 acc 48.047 (54.394)	Top-5 acc 74.219 (76.795)	lr 0.01942
Train [38][1960/3239]	Time 0.208 (0.567)	Data Time 0.001 (0.017)	Loss 2.9810 (2.8996)	Entropy 1.31241 (1.31764)	Top-1 acc 53.516 (54.388)	Top-5 acc 73.828 (76.793)	lr 0.01942
Train [38][1970/3239]	Time 0.228 (0.566)	Data Time 0.001 (0.017)	Loss 3.0718 (2.8995)	Entropy 1.31234 (1.31762)	Top-1 acc 51.562 (54.391)	Top-5 acc 74.219 (76.793)	lr 0.01942
Train [38][1980/3239]	Time 0.214 (0.566)	Data Time 0.001 (0.017)	Loss 2.8610 (2.8994)	Entropy 1.31230 (1.31759)	Top-1 acc 57.422 (54.393)	Top-5 acc 73.047 (76.791)	lr 0.01941
Train [38][1990/3239]	Time 2.424 (0.565)	Data Time 0.001 (0.017)	Loss 2.9384 (2.8995)	Entropy 1.31230 (1.31756)	Top-1 acc 55.078 (54.390)	Top-5 acc 76.172 (76.793)	lr 0.01941
Train [38][2000/3239]	Time 0.250 (0.564)	Data Time 0.002 (0.017)	Loss 2.9663 (2.8994)	Entropy 1.31230 (1.31754)	Top-1 acc 51.953 (54.389)	Top-5 acc 76.562 (76.796)	lr 0.01941
Train [38][2010/3239]	Time 0.229 (0.563)	Data Time 0.001 (0.017)	Loss 2.9329 (2.8995)	Entropy 1.31222 (1.31751)	Top-1 acc 54.297 (54.390)	Top-5 acc 75.391 (76.795)	lr 0.01941
Train [38][2020/3239]	Time 0.221 (0.563)	Data Time 0.001 (0.017)	Loss 2.6644 (2.8996)	Entropy 1.31216 (1.31748)	Top-1 acc 58.594 (54.393)	Top-5 acc 82.422 (76.792)	lr 0.01941
Train [38][2030/3239]	Time 0.222 (0.562)	Data Time 0.001 (0.016)	Loss 2.8665 (2.8994)	Entropy 1.31212 (1.31746)	Top-1 acc 53.516 (54.398)	Top-5 acc 77.734 (76.792)	lr 0.01941
Train [38][2040/3239]	Time 0.270 (0.562)	Data Time 0.001 (0.016)	Loss 2.7356 (2.8995)	Entropy 1.31208 (1.31743)	Top-1 acc 61.328 (54.397)	Top-5 acc 80.078 (76.793)	lr 0.01941
Train [38][2050/3239]	Time 0.233 (0.561)	Data Time 0.001 (0.016)	Loss 2.9856 (2.8998)	Entropy 1.31198 (1.31741)	Top-1 acc 51.953 (54.394)	Top-5 acc 73.828 (76.786)	lr 0.01941
Train [38][2060/3239]	Time 0.319 (0.561)	Data Time 0.001 (0.016)	Loss 2.9052 (2.8999)	Entropy 1.31196 (1.31738)	Top-1 acc 53.906 (54.392)	Top-5 acc 73.828 (76.783)	lr 0.01941
Train [38][2070/3239]	Time 0.206 (0.560)	Data Time 0.001 (0.016)	Loss 2.8719 (2.8996)	Entropy 1.31196 (1.31735)	Top-1 acc 53.906 (54.398)	Top-5 acc 79.297 (76.790)	lr 0.01941
Train [38][2080/3239]	Time 0.245 (0.560)	Data Time 0.001 (0.016)	Loss 3.1409 (2.8995)	Entropy 1.31196 (1.31733)	Top-1 acc 47.656 (54.401)	Top-5 acc 71.484 (76.795)	lr 0.01941
Train [38][2090/3239]	Time 0.274 (0.578)	Data Time 0.004 (0.016)	Loss 2.7364 (2.8993)	Entropy 1.31195 (1.31730)	Top-1 acc 58.203 (54.400)	Top-5 acc 82.812 (76.805)	lr 0.01941
Train [38][2100/3239]	Time 3.386 (0.578)	Data Time 0.115 (0.016)	Loss 2.9127 (2.8997)	Entropy 1.31195 (1.31728)	Top-1 acc 56.250 (54.396)	Top-5 acc 79.297 (76.797)	lr 0.01940
Train [38][2110/3239]	Time 0.211 (0.576)	Data Time 0.002 (0.016)	Loss 2.7336 (2.8993)	Entropy 1.31183 (1.31725)	Top-1 acc 61.328 (54.404)	Top-5 acc 78.125 (76.803)	lr 0.01940
Train [38][2120/3239]	Time 0.233 (0.576)	Data Time 0.001 (0.016)	Loss 2.6640 (2.8995)	Entropy 1.31168 (1.31722)	Top-1 acc 59.375 (54.395)	Top-5 acc 79.688 (76.800)	lr 0.01940
Train [38][2130/3239]	Time 0.207 (0.575)	Data Time 0.001 (0.016)	Loss 2.9675 (2.8994)	Entropy 1.31159 (1.31720)	Top-1 acc 49.219 (54.394)	Top-5 acc 74.219 (76.803)	lr 0.01940
Train [38][2140/3239]	Time 0.230 (0.575)	Data Time 0.001 (0.016)	Loss 2.9890 (2.8995)	Entropy 1.31152 (1.31717)	Top-1 acc 51.953 (54.388)	Top-5 acc 75.391 (76.801)	lr 0.01940
Train [38][2150/3239]	Time 0.222 (0.574)	Data Time 0.001 (0.016)	Loss 2.9708 (2.8998)	Entropy 1.31145 (1.31714)	Top-1 acc 52.734 (54.386)	Top-5 acc 75.000 (76.797)	lr 0.01940
Train [38][2160/3239]	Time 0.224 (0.573)	Data Time 0.001 (0.016)	Loss 2.8049 (2.8997)	Entropy 1.31114 (1.31712)	Top-1 acc 52.734 (54.391)	Top-5 acc 76.562 (76.795)	lr 0.01940
Train [38][2170/3239]	Time 0.222 (0.573)	Data Time 0.001 (0.016)	Loss 2.7556 (2.8995)	Entropy 1.31100 (1.31709)	Top-1 acc 55.469 (54.401)	Top-5 acc 79.688 (76.800)	lr 0.01940
Train [38][2180/3239]	Time 0.218 (0.572)	Data Time 0.001 (0.016)	Loss 2.9474 (2.8994)	Entropy 1.31099 (1.31706)	Top-1 acc 55.469 (54.405)	Top-5 acc 77.734 (76.798)	lr 0.01940
Train [38][2190/3239]	Time 0.230 (0.572)	Data Time 0.001 (0.015)	Loss 2.9989 (2.8998)	Entropy 1.31087 (1.31703)	Top-1 acc 49.609 (54.396)	Top-5 acc 77.344 (76.792)	lr 0.01940
Train [38][2200/3239]	Time 0.222 (0.571)	Data Time 0.001 (0.015)	Loss 2.9810 (2.8999)	Entropy 1.31072 (1.31701)	Top-1 acc 55.859 (54.396)	Top-5 acc 75.781 (76.788)	lr 0.01940
Train [38][2210/3239]	Time 2.670 (0.571)	Data Time 0.001 (0.015)	Loss 2.6475 (2.8998)	Entropy 1.31072 (1.31698)	Top-1 acc 57.031 (54.396)	Top-5 acc 80.469 (76.792)	lr 0.01940
Train [38][2220/3239]	Time 0.256 (0.569)	Data Time 0.001 (0.015)	Loss 2.9288 (2.9000)	Entropy 1.31071 (1.31695)	Top-1 acc 53.125 (54.395)	Top-5 acc 78.906 (76.791)	lr 0.01939
Train [38][2230/3239]	Time 0.250 (0.569)	Data Time 0.002 (0.015)	Loss 3.0374 (2.9000)	Entropy 1.31062 (1.31692)	Top-1 acc 50.391 (54.390)	Top-5 acc 73.828 (76.790)	lr 0.01939
Train [38][2240/3239]	Time 0.218 (0.569)	Data Time 0.001 (0.015)	Loss 3.1117 (2.9002)	Entropy 1.31063 (1.31689)	Top-1 acc 48.828 (54.382)	Top-5 acc 72.266 (76.784)	lr 0.01939
Train [38][2250/3239]	Time 0.212 (0.568)	Data Time 0.001 (0.015)	Loss 2.9304 (2.9001)	Entropy 1.31060 (1.31686)	Top-1 acc 52.734 (54.385)	Top-5 acc 75.781 (76.786)	lr 0.01939
Train [38][2260/3239]	Time 0.316 (0.568)	Data Time 0.001 (0.015)	Loss 2.9970 (2.9002)	Entropy 1.31059 (1.31684)	Top-1 acc 50.391 (54.384)	Top-5 acc 74.609 (76.784)	lr 0.01939
Train [38][2270/3239]	Time 0.231 (0.567)	Data Time 0.001 (0.015)	Loss 2.9859 (2.9003)	Entropy 1.31043 (1.31681)	Top-1 acc 55.469 (54.387)	Top-5 acc 72.656 (76.781)	lr 0.01939
Train [38][2280/3239]	Time 0.235 (0.567)	Data Time 0.001 (0.015)	Loss 2.8750 (2.9002)	Entropy 1.31042 (1.31678)	Top-1 acc 52.734 (54.388)	Top-5 acc 76.953 (76.781)	lr 0.01939
Train [38][2290/3239]	Time 0.198 (0.566)	Data Time 0.001 (0.015)	Loss 2.9757 (2.9003)	Entropy 1.31036 (1.31675)	Top-1 acc 53.516 (54.384)	Top-5 acc 73.438 (76.779)	lr 0.01939
Train [38][2300/3239]	Time 0.221 (0.566)	Data Time 0.001 (0.015)	Loss 2.9641 (2.9005)	Entropy 1.31034 (1.31673)	Top-1 acc 54.297 (54.384)	Top-5 acc 76.562 (76.777)	lr 0.01939
Train [38][2310/3239]	Time 0.349 (0.565)	Data Time 0.001 (0.015)	Loss 2.8512 (2.9004)	Entropy 1.31018 (1.31670)	Top-1 acc 54.297 (54.383)	Top-5 acc 79.688 (76.779)	lr 0.01939
Train [38][2320/3239]	Time 2.533 (0.565)	Data Time 0.001 (0.015)	Loss 3.4946 (2.9007)	Entropy 1.31018 (1.31667)	Top-1 acc 39.062 (54.380)	Top-5 acc 67.969 (76.775)	lr 0.01939
Train [38][2330/3239]	Time 0.269 (0.564)	Data Time 0.001 (0.015)	Loss 2.9105 (2.9008)	Entropy 1.31009 (1.31664)	Top-1 acc 50.781 (54.378)	Top-5 acc 79.297 (76.768)	lr 0.01938
Train [38][2340/3239]	Time 0.222 (0.563)	Data Time 0.001 (0.015)	Loss 2.7498 (2.9008)	Entropy 1.31008 (1.31661)	Top-1 acc 59.375 (54.382)	Top-5 acc 76.953 (76.768)	lr 0.01938
Train [38][2350/3239]	Time 0.216 (0.563)	Data Time 0.001 (0.015)	Loss 2.9737 (2.9009)	Entropy 1.31001 (1.31659)	Top-1 acc 52.734 (54.374)	Top-5 acc 76.172 (76.764)	lr 0.01938
Train [38][2360/3239]	Time 0.295 (0.562)	Data Time 0.001 (0.014)	Loss 2.9248 (2.9011)	Entropy 1.31001 (1.31656)	Top-1 acc 53.516 (54.370)	Top-5 acc 78.125 (76.764)	lr 0.01938
Train [38][2370/3239]	Time 0.212 (0.562)	Data Time 0.001 (0.014)	Loss 2.8310 (2.9012)	Entropy 1.30988 (1.31653)	Top-1 acc 56.641 (54.362)	Top-5 acc 79.688 (76.763)	lr 0.01938
Train [38][2380/3239]	Time 0.231 (0.561)	Data Time 0.001 (0.014)	Loss 2.8980 (2.9012)	Entropy 1.30989 (1.31650)	Top-1 acc 51.172 (54.356)	Top-5 acc 75.391 (76.765)	lr 0.01938
Train [38][2390/3239]	Time 0.215 (0.561)	Data Time 0.009 (0.014)	Loss 2.9811 (2.9011)	Entropy 1.30984 (1.31647)	Top-1 acc 53.125 (54.363)	Top-5 acc 76.172 (76.766)	lr 0.01938
Train [38][2400/3239]	Time 0.226 (0.560)	Data Time 0.001 (0.014)	Loss 2.9653 (2.9012)	Entropy 1.30985 (1.31645)	Top-1 acc 53.906 (54.361)	Top-5 acc 76.953 (76.765)	lr 0.01938
Train [38][2410/3239]	Time 0.233 (0.560)	Data Time 0.001 (0.014)	Loss 2.9186 (2.9013)	Entropy 1.30980 (1.31642)	Top-1 acc 53.125 (54.356)	Top-5 acc 78.906 (76.761)	lr 0.01938
Train [38][2420/3239]	Time 0.242 (0.560)	Data Time 0.001 (0.014)	Loss 2.9782 (2.9017)	Entropy 1.30965 (1.31639)	Top-1 acc 53.516 (54.351)	Top-5 acc 75.781 (76.757)	lr 0.01938
Train [38][2430/3239]	Time 2.505 (0.559)	Data Time 0.001 (0.014)	Loss 3.0516 (2.9017)	Entropy 1.30965 (1.31636)	Top-1 acc 50.781 (54.348)	Top-5 acc 75.391 (76.755)	lr 0.01938
Train [38][2440/3239]	Time 0.221 (0.558)	Data Time 0.001 (0.014)	Loss 2.9299 (2.9015)	Entropy 1.30970 (1.31634)	Top-1 acc 55.469 (54.356)	Top-5 acc 77.344 (76.759)	lr 0.01938
Train [38][2450/3239]	Time 0.238 (0.557)	Data Time 0.001 (0.014)	Loss 2.8555 (2.9016)	Entropy 1.30963 (1.31631)	Top-1 acc 56.250 (54.355)	Top-5 acc 76.172 (76.756)	lr 0.01937
Train [38][2460/3239]	Time 0.231 (0.576)	Data Time 0.004 (0.014)	Loss 2.9431 (2.9019)	Entropy 1.30952 (1.31628)	Top-1 acc 53.125 (54.353)	Top-5 acc 76.172 (76.748)	lr 0.01937
Train [38][2470/3239]	Time 0.327 (0.576)	Data Time 0.002 (0.014)	Loss 2.6634 (2.9019)	Entropy 1.30952 (1.31625)	Top-1 acc 57.422 (54.353)	Top-5 acc 81.250 (76.750)	lr 0.01937
Train [38][2480/3239]	Time 0.230 (0.575)	Data Time 0.001 (0.014)	Loss 2.7311 (2.9019)	Entropy 1.30948 (1.31623)	Top-1 acc 53.516 (54.350)	Top-5 acc 81.250 (76.750)	lr 0.01937
Train [38][2490/3239]	Time 0.219 (0.575)	Data Time 0.001 (0.014)	Loss 2.9407 (2.9019)	Entropy 1.30938 (1.31620)	Top-1 acc 52.344 (54.348)	Top-5 acc 76.172 (76.753)	lr 0.01937
Train [38][2500/3239]	Time 0.225 (0.574)	Data Time 0.001 (0.014)	Loss 2.9986 (2.9019)	Entropy 1.30891 (1.31617)	Top-1 acc 52.734 (54.352)	Top-5 acc 76.562 (76.754)	lr 0.01937
Train [38][2510/3239]	Time 0.251 (0.574)	Data Time 0.001 (0.014)	Loss 2.8880 (2.9017)	Entropy 1.30892 (1.31614)	Top-1 acc 54.688 (54.357)	Top-5 acc 76.562 (76.759)	lr 0.01937
Train [38][2520/3239]	Time 0.316 (0.574)	Data Time 0.001 (0.014)	Loss 2.9096 (2.9015)	Entropy 1.30890 (1.31611)	Top-1 acc 53.906 (54.361)	Top-5 acc 77.734 (76.766)	lr 0.01937
Train [38][2530/3239]	Time 0.232 (0.573)	Data Time 0.001 (0.014)	Loss 2.8840 (2.9015)	Entropy 1.30888 (1.31609)	Top-1 acc 58.594 (54.365)	Top-5 acc 78.125 (76.765)	lr 0.01937
Train [38][2540/3239]	Time 2.420 (0.573)	Data Time 0.001 (0.014)	Loss 2.8660 (2.9019)	Entropy 1.30888 (1.31606)	Top-1 acc 53.516 (54.353)	Top-5 acc 81.250 (76.759)	lr 0.01937
Train [38][2550/3239]	Time 0.213 (0.571)	Data Time 0.001 (0.014)	Loss 2.9276 (2.9020)	Entropy 1.30866 (1.31603)	Top-1 acc 49.219 (54.350)	Top-5 acc 75.781 (76.754)	lr 0.01937
Train [38][2560/3239]	Time 0.268 (0.571)	Data Time 0.002 (0.014)	Loss 2.9086 (2.9021)	Entropy 1.30862 (1.31600)	Top-1 acc 57.422 (54.351)	Top-5 acc 77.344 (76.752)	lr 0.01937
Train [38][2570/3239]	Time 0.229 (0.571)	Data Time 0.001 (0.013)	Loss 2.8110 (2.9019)	Entropy 1.30860 (1.31597)	Top-1 acc 56.641 (54.356)	Top-5 acc 78.125 (76.757)	lr 0.01936
Train [38][2580/3239]	Time 0.230 (0.570)	Data Time 0.001 (0.013)	Loss 3.0752 (2.9018)	Entropy 1.30859 (1.31594)	Top-1 acc 52.734 (54.361)	Top-5 acc 74.609 (76.760)	lr 0.01936
Train [38][2590/3239]	Time 0.236 (0.570)	Data Time 0.002 (0.013)	Loss 2.8131 (2.9018)	Entropy 1.30833 (1.31591)	Top-1 acc 57.422 (54.358)	Top-5 acc 76.172 (76.761)	lr 0.01936
Train [38][2600/3239]	Time 0.227 (0.569)	Data Time 0.001 (0.013)	Loss 2.8276 (2.9015)	Entropy 1.30825 (1.31588)	Top-1 acc 58.594 (54.367)	Top-5 acc 78.516 (76.769)	lr 0.01936
Train [38][2610/3239]	Time 0.215 (0.569)	Data Time 0.001 (0.013)	Loss 3.0573 (2.9015)	Entropy 1.30824 (1.31585)	Top-1 acc 52.734 (54.366)	Top-5 acc 73.828 (76.766)	lr 0.01936
Train [38][2620/3239]	Time 0.223 (0.569)	Data Time 0.001 (0.013)	Loss 2.9905 (2.9015)	Entropy 1.30810 (1.31583)	Top-1 acc 50.781 (54.372)	Top-5 acc 74.219 (76.767)	lr 0.01936
Train [38][2630/3239]	Time 0.226 (0.568)	Data Time 0.001 (0.013)	Loss 2.8549 (2.9016)	Entropy 1.30809 (1.31580)	Top-1 acc 56.250 (54.371)	Top-5 acc 78.125 (76.762)	lr 0.01936
Train [38][2640/3239]	Time 0.227 (0.568)	Data Time 0.001 (0.013)	Loss 2.8034 (2.9015)	Entropy 1.30804 (1.31577)	Top-1 acc 57.812 (54.370)	Top-5 acc 79.297 (76.761)	lr 0.01936
Train [38][2650/3239]	Time 0.229 (0.568)	Data Time 0.001 (0.013)	Loss 2.9307 (2.9016)	Entropy 1.30798 (1.31574)	Top-1 acc 53.906 (54.369)	Top-5 acc 76.172 (76.761)	lr 0.01936
Train [38][2660/3239]	Time 0.243 (0.567)	Data Time 0.001 (0.013)	Loss 3.0622 (2.9017)	Entropy 1.30783 (1.31571)	Top-1 acc 50.391 (54.366)	Top-5 acc 72.266 (76.756)	lr 0.01936
Train [38][2670/3239]	Time 0.222 (0.567)	Data Time 0.001 (0.013)	Loss 2.7466 (2.9017)	Entropy 1.30782 (1.31568)	Top-1 acc 55.859 (54.367)	Top-5 acc 80.078 (76.756)	lr 0.01936
Train [38][2680/3239]	Time 0.335 (0.566)	Data Time 0.001 (0.013)	Loss 2.9688 (2.9018)	Entropy 1.30768 (1.31565)	Top-1 acc 55.859 (54.365)	Top-5 acc 78.516 (76.756)	lr 0.01936
Train [38][2690/3239]	Time 0.217 (0.566)	Data Time 0.001 (0.013)	Loss 3.0012 (2.9021)	Entropy 1.30766 (1.31562)	Top-1 acc 51.953 (54.358)	Top-5 acc 72.266 (76.745)	lr 0.01935
Train [38][2700/3239]	Time 0.260 (0.565)	Data Time 0.001 (0.013)	Loss 2.9024 (2.9024)	Entropy 1.30762 (1.31559)	Top-1 acc 53.906 (54.353)	Top-5 acc 76.562 (76.741)	lr 0.01935
Train [38][2710/3239]	Time 0.268 (0.565)	Data Time 0.001 (0.013)	Loss 2.8695 (2.9027)	Entropy 1.30759 (1.31556)	Top-1 acc 54.688 (54.341)	Top-5 acc 75.391 (76.737)	lr 0.01935
Train [38][2720/3239]	Time 0.227 (0.565)	Data Time 0.001 (0.013)	Loss 2.9445 (2.9027)	Entropy 1.30746 (1.31553)	Top-1 acc 51.172 (54.343)	Top-5 acc 74.219 (76.738)	lr 0.01935
Train [38][2730/3239]	Time 0.229 (0.564)	Data Time 0.001 (0.013)	Loss 2.6825 (2.9027)	Entropy 1.30738 (1.31550)	Top-1 acc 55.469 (54.347)	Top-5 acc 80.859 (76.737)	lr 0.01935
Train [38][2740/3239]	Time 0.214 (0.564)	Data Time 0.001 (0.013)	Loss 2.8568 (2.9028)	Entropy 1.30738 (1.31547)	Top-1 acc 56.250 (54.346)	Top-5 acc 75.781 (76.735)	lr 0.01935
Train [38][2750/3239]	Time 0.255 (0.563)	Data Time 0.002 (0.013)	Loss 2.9827 (2.9028)	Entropy 1.30736 (1.31544)	Top-1 acc 57.031 (54.351)	Top-5 acc 74.219 (76.737)	lr 0.01935
Train [38][2760/3239]	Time 0.250 (0.563)	Data Time 0.001 (0.013)	Loss 2.9075 (2.9027)	Entropy 1.30736 (1.31541)	Top-1 acc 57.812 (54.352)	Top-5 acc 75.391 (76.739)	lr 0.01935
Train [38][2770/3239]	Time 0.262 (0.563)	Data Time 0.001 (0.013)	Loss 3.1109 (2.9027)	Entropy 1.30732 (1.31538)	Top-1 acc 51.172 (54.353)	Top-5 acc 71.484 (76.738)	lr 0.01935
Train [38][2780/3239]	Time 0.224 (0.562)	Data Time 0.001 (0.013)	Loss 2.9899 (2.9026)	Entropy 1.30725 (1.31536)	Top-1 acc 50.000 (54.351)	Top-5 acc 74.609 (76.740)	lr 0.01935
Train [38][2790/3239]	Time 0.210 (0.562)	Data Time 0.001 (0.013)	Loss 2.9524 (2.9026)	Entropy 1.30722 (1.31533)	Top-1 acc 51.562 (54.351)	Top-5 acc 74.609 (76.739)	lr 0.01935
Train [38][2800/3239]	Time 0.283 (0.576)	Data Time 0.004 (0.012)	Loss 2.6365 (2.9025)	Entropy 1.30715 (1.31530)	Top-1 acc 61.328 (54.360)	Top-5 acc 80.469 (76.743)	lr 0.01935
Train [38][2810/3239]	Time 0.243 (0.576)	Data Time 0.002 (0.012)	Loss 2.8381 (2.9026)	Entropy 1.30712 (1.31527)	Top-1 acc 50.781 (54.356)	Top-5 acc 80.859 (76.742)	lr 0.01934
Train [38][2820/3239]	Time 0.253 (0.576)	Data Time 0.001 (0.012)	Loss 2.9834 (2.9027)	Entropy 1.30714 (1.31524)	Top-1 acc 54.297 (54.356)	Top-5 acc 76.953 (76.740)	lr 0.01934
Train [38][2830/3239]	Time 0.287 (0.576)	Data Time 0.002 (0.012)	Loss 2.7976 (2.9027)	Entropy 1.30698 (1.31521)	Top-1 acc 53.125 (54.355)	Top-5 acc 76.562 (76.739)	lr 0.01934
Train [38][2840/3239]	Time 0.210 (0.575)	Data Time 0.002 (0.012)	Loss 2.8634 (2.9029)	Entropy 1.30694 (1.31518)	Top-1 acc 58.203 (54.353)	Top-5 acc 78.516 (76.735)	lr 0.01934
Train [38][2850/3239]	Time 0.201 (0.575)	Data Time 0.001 (0.012)	Loss 2.9414 (2.9029)	Entropy 1.30691 (1.31515)	Top-1 acc 53.906 (54.355)	Top-5 acc 75.391 (76.736)	lr 0.01934
Train [38][2860/3239]	Time 0.251 (0.574)	Data Time 0.001 (0.012)	Loss 2.8596 (2.9030)	Entropy 1.30690 (1.31512)	Top-1 acc 55.469 (54.356)	Top-5 acc 78.125 (76.736)	lr 0.01934
Train [38][2870/3239]	Time 0.222 (0.574)	Data Time 0.001 (0.012)	Loss 2.7802 (2.9030)	Entropy 1.30689 (1.31510)	Top-1 acc 59.766 (54.354)	Top-5 acc 76.953 (76.736)	lr 0.01934
Train [38][2880/3239]	Time 0.228 (0.574)	Data Time 0.001 (0.012)	Loss 2.9344 (2.9030)	Entropy 1.30683 (1.31507)	Top-1 acc 52.734 (54.354)	Top-5 acc 76.172 (76.737)	lr 0.01934
Train [38][2890/3239]	Time 0.243 (0.573)	Data Time 0.001 (0.012)	Loss 2.8371 (2.9030)	Entropy 1.30682 (1.31504)	Top-1 acc 57.812 (54.353)	Top-5 acc 75.000 (76.735)	lr 0.01934
Train [38][2900/3239]	Time 0.235 (0.573)	Data Time 0.001 (0.012)	Loss 2.9815 (2.9032)	Entropy 1.30684 (1.31501)	Top-1 acc 55.078 (54.349)	Top-5 acc 76.172 (76.731)	lr 0.01934
Train [38][2910/3239]	Time 0.215 (0.572)	Data Time 0.001 (0.012)	Loss 2.7556 (2.9032)	Entropy 1.30683 (1.31498)	Top-1 acc 54.688 (54.348)	Top-5 acc 81.641 (76.735)	lr 0.01934
Train [38][2920/3239]	Time 0.203 (0.572)	Data Time 0.001 (0.012)	Loss 2.9841 (2.9033)	Entropy 1.30679 (1.31495)	Top-1 acc 49.609 (54.344)	Top-5 acc 76.562 (76.734)	lr 0.01934
Train [38][2930/3239]	Time 0.230 (0.572)	Data Time 0.001 (0.012)	Loss 2.9992 (2.9034)	Entropy 1.30679 (1.31493)	Top-1 acc 51.953 (54.339)	Top-5 acc 75.781 (76.731)	lr 0.01933
Train [38][2940/3239]	Time 0.317 (0.571)	Data Time 0.001 (0.012)	Loss 2.9617 (2.9035)	Entropy 1.30675 (1.31490)	Top-1 acc 52.344 (54.339)	Top-5 acc 73.047 (76.726)	lr 0.01933
Train [38][2950/3239]	Time 0.246 (0.571)	Data Time 0.001 (0.012)	Loss 2.9221 (2.9036)	Entropy 1.30673 (1.31487)	Top-1 acc 55.469 (54.336)	Top-5 acc 76.953 (76.723)	lr 0.01933
Train [38][2960/3239]	Time 0.221 (0.571)	Data Time 0.001 (0.012)	Loss 2.7286 (2.9033)	Entropy 1.30667 (1.31484)	Top-1 acc 60.156 (54.344)	Top-5 acc 82.422 (76.730)	lr 0.01933
Train [38][2970/3239]	Time 0.237 (0.570)	Data Time 0.001 (0.012)	Loss 3.0266 (2.9032)	Entropy 1.30662 (1.31482)	Top-1 acc 52.734 (54.343)	Top-5 acc 73.047 (76.730)	lr 0.01933
Train [38][2980/3239]	Time 0.202 (0.570)	Data Time 0.002 (0.012)	Loss 3.0177 (2.9032)	Entropy 1.30660 (1.31479)	Top-1 acc 52.734 (54.349)	Top-5 acc 71.875 (76.729)	lr 0.01933
Train [38][2990/3239]	Time 0.393 (0.569)	Data Time 0.005 (0.012)	Loss 2.7758 (2.9030)	Entropy 1.30657 (1.31476)	Top-1 acc 55.469 (54.356)	Top-5 acc 80.078 (76.733)	lr 0.01933
Train [38][3000/3239]	Time 0.202 (0.569)	Data Time 0.002 (0.012)	Loss 2.9867 (2.9031)	Entropy 1.30652 (1.31473)	Top-1 acc 50.000 (54.351)	Top-5 acc 75.391 (76.732)	lr 0.01933
Train [38][3010/3239]	Time 0.253 (0.569)	Data Time 0.001 (0.012)	Loss 2.8526 (2.9032)	Entropy 1.30652 (1.31471)	Top-1 acc 54.297 (54.346)	Top-5 acc 75.391 (76.727)	lr 0.01933
Train [38][3020/3239]	Time 0.228 (0.568)	Data Time 0.002 (0.012)	Loss 2.8482 (2.9031)	Entropy 1.30650 (1.31468)	Top-1 acc 56.250 (54.348)	Top-5 acc 78.125 (76.729)	lr 0.01933
Train [38][3030/3239]	Time 0.271 (0.568)	Data Time 0.001 (0.012)	Loss 2.9674 (2.9031)	Entropy 1.30642 (1.31465)	Top-1 acc 54.688 (54.348)	Top-5 acc 75.391 (76.727)	lr 0.01933
Train [38][3040/3239]	Time 0.387 (0.568)	Data Time 0.001 (0.012)	Loss 2.9868 (2.9031)	Entropy 1.30639 (1.31463)	Top-1 acc 50.391 (54.345)	Top-5 acc 73.828 (76.724)	lr 0.01932
Train [38][3050/3239]	Time 0.221 (0.567)	Data Time 0.001 (0.012)	Loss 2.8996 (2.9032)	Entropy 1.30637 (1.31460)	Top-1 acc 52.734 (54.343)	Top-5 acc 75.781 (76.722)	lr 0.01932
Train [38][3060/3239]	Time 0.258 (0.567)	Data Time 0.001 (0.012)	Loss 2.7894 (2.9030)	Entropy 1.30627 (1.31457)	Top-1 acc 54.688 (54.345)	Top-5 acc 80.078 (76.725)	lr 0.01932
Train [38][3070/3239]	Time 0.227 (0.567)	Data Time 0.001 (0.012)	Loss 2.8248 (2.9031)	Entropy 1.30621 (1.31454)	Top-1 acc 56.250 (54.345)	Top-5 acc 79.688 (76.725)	lr 0.01932
Train [38][3080/3239]	Time 0.256 (0.566)	Data Time 0.001 (0.012)	Loss 2.9049 (2.9030)	Entropy 1.30616 (1.31452)	Top-1 acc 53.516 (54.344)	Top-5 acc 78.516 (76.725)	lr 0.01932
Train [38][3090/3239]	Time 0.359 (0.566)	Data Time 0.001 (0.011)	Loss 2.8772 (2.9031)	Entropy 1.30614 (1.31449)	Top-1 acc 57.812 (54.344)	Top-5 acc 76.562 (76.724)	lr 0.01932
Train [38][3100/3239]	Time 0.236 (0.566)	Data Time 0.001 (0.011)	Loss 2.9792 (2.9031)	Entropy 1.30607 (1.31446)	Top-1 acc 52.344 (54.345)	Top-5 acc 75.000 (76.726)	lr 0.01932
Train [38][3110/3239]	Time 0.240 (0.565)	Data Time 0.001 (0.011)	Loss 2.6376 (2.9032)	Entropy 1.30600 (1.31444)	Top-1 acc 56.641 (54.339)	Top-5 acc 82.031 (76.723)	lr 0.01932
Train [38][3120/3239]	Time 0.245 (0.565)	Data Time 0.001 (0.011)	Loss 2.9040 (2.9032)	Entropy 1.30594 (1.31441)	Top-1 acc 53.125 (54.338)	Top-5 acc 76.172 (76.725)	lr 0.01932
Train [38][3130/3239]	Time 0.270 (0.577)	Data Time 0.004 (0.011)	Loss 2.7845 (2.9031)	Entropy 1.30590 (1.31438)	Top-1 acc 57.422 (54.342)	Top-5 acc 77.344 (76.728)	lr 0.01932
Train [38][3140/3239]	Time 0.229 (0.578)	Data Time 0.002 (0.011)	Loss 2.9036 (2.9030)	Entropy 1.30586 (1.31436)	Top-1 acc 53.516 (54.346)	Top-5 acc 75.781 (76.728)	lr 0.01932
Train [38][3150/3239]	Time 0.249 (0.577)	Data Time 0.001 (0.011)	Loss 2.9080 (2.9030)	Entropy 1.30582 (1.31433)	Top-1 acc 58.203 (54.349)	Top-5 acc 75.000 (76.729)	lr 0.01932
Train [38][3160/3239]	Time 0.249 (0.577)	Data Time 0.002 (0.011)	Loss 2.9217 (2.9032)	Entropy 1.30574 (1.31430)	Top-1 acc 50.781 (54.343)	Top-5 acc 78.516 (76.727)	lr 0.01931
Train [38][3170/3239]	Time 0.238 (0.577)	Data Time 0.002 (0.011)	Loss 2.9228 (2.9032)	Entropy 1.30568 (1.31427)	Top-1 acc 56.641 (54.345)	Top-5 acc 75.781 (76.730)	lr 0.01931
Train [38][3180/3239]	Time 0.221 (0.576)	Data Time 0.000 (0.011)	Loss 2.8123 (2.9030)	Entropy 1.30566 (1.31425)	Top-1 acc 55.078 (54.345)	Top-5 acc 77.344 (76.732)	lr 0.01931
Train [38][3190/3239]	Time 0.228 (0.576)	Data Time 0.000 (0.011)	Loss 2.8447 (2.9030)	Entropy 1.30564 (1.31422)	Top-1 acc 58.203 (54.347)	Top-5 acc 79.297 (76.731)	lr 0.01931
Train [38][3200/3239]	Time 0.197 (0.575)	Data Time 0.000 (0.011)	Loss 2.9564 (2.9031)	Entropy 1.30562 (1.31419)	Top-1 acc 50.781 (54.344)	Top-5 acc 77.344 (76.726)	lr 0.01931
Train [38][3210/3239]	Time 0.230 (0.575)	Data Time 0.000 (0.011)	Loss 3.0879 (2.9032)	Entropy 1.30562 (1.31417)	Top-1 acc 49.219 (54.341)	Top-5 acc 69.531 (76.722)	lr 0.01931
Train [38][3220/3239]	Time 0.197 (0.574)	Data Time 0.000 (0.011)	Loss 2.7779 (2.9033)	Entropy 1.30560 (1.31414)	Top-1 acc 55.859 (54.344)	Top-5 acc 82.031 (76.725)	lr 0.01931
Train [38][3230/3239]	Time 0.158 (0.574)	Data Time 0.000 (0.011)	Loss 2.9496 (2.9032)	Entropy 1.30579 (1.31411)	Top-1 acc 53.125 (54.343)	Top-5 acc 74.219 (76.724)	lr 0.01931
Train [38][3239/3239]	Time 2.233 (0.574)	Data Time 0.000 (0.011)	Loss 3.0962 (2.9033)	Entropy 1.30579 (1.31409)	Top-1 acc 43.210 (54.338)	Top-5 acc 75.309 (76.720)	lr 0.01931
==========Valid [38/120]	loss 1.718	top-1 acc 61.665 (61.665)	top-5 acc 82.887	Train top-1 54.338	top-5 76.720	Entropy 1.30579	Latency-None: 0.000ms	Flops: 557.37M
Train [39][0/3239]	Time 32.742 (32.742)	Data Time 31.637 (31.637)	Loss 2.9967 (2.9967)	Entropy 1.30578 (1.30578)	Top-1 acc 53.125 (53.125)	Top-5 acc 75.391 (75.391)	lr 0.01931
Train [39][10/3239]	Time 2.917 (3.574)	Data Time 0.001 (2.881)	Loss 2.9146 (2.8518)	Entropy 1.30578 (1.30578)	Top-1 acc 52.734 (55.433)	Top-5 acc 78.125 (78.232)	lr 0.01931
Train [39][20/3239]	Time 0.270 (1.990)	Data Time 0.002 (1.510)	Loss 2.9587 (2.8665)	Entropy 1.30572 (1.30575)	Top-1 acc 47.266 (54.985)	Top-5 acc 74.609 (77.344)	lr 0.01931
Train [39][30/3239]	Time 0.234 (1.498)	Data Time 0.001 (1.023)	Loss 2.9029 (2.8695)	Entropy 1.30556 (1.30569)	Top-1 acc 53.516 (54.927)	Top-5 acc 75.781 (77.117)	lr 0.01931
Train [39][40/3239]	Time 0.234 (1.246)	Data Time 0.002 (0.774)	Loss 2.9079 (2.8675)	Entropy 1.30549 (1.30565)	Top-1 acc 55.469 (55.011)	Top-5 acc 78.906 (77.182)	lr 0.01930
Train [39][50/3239]	Time 0.223 (1.097)	Data Time 0.001 (0.623)	Loss 2.8140 (2.8622)	Entropy 1.30540 (1.30560)	Top-1 acc 53.516 (55.032)	Top-5 acc 81.641 (77.328)	lr 0.01930
Train [39][60/3239]	Time 0.303 (0.993)	Data Time 0.001 (0.521)	Loss 2.7567 (2.8715)	Entropy 1.30526 (1.30556)	Top-1 acc 59.766 (54.995)	Top-5 acc 77.344 (77.152)	lr 0.01930
Train [39][70/3239]	Time 0.244 (0.919)	Data Time 0.001 (0.448)	Loss 3.0426 (2.8811)	Entropy 1.30519 (1.30551)	Top-1 acc 48.047 (54.759)	Top-5 acc 72.266 (76.920)	lr 0.01930
Train [39][80/3239]	Time 0.234 (0.864)	Data Time 0.001 (0.393)	Loss 2.7644 (2.8794)	Entropy 1.30519 (1.30547)	Top-1 acc 56.641 (54.818)	Top-5 acc 81.641 (77.025)	lr 0.01930
Train [39][90/3239]	Time 0.218 (0.820)	Data Time 0.001 (0.350)	Loss 2.9057 (2.8827)	Entropy 1.30513 (1.30544)	Top-1 acc 57.422 (55.027)	Top-5 acc 76.172 (76.927)	lr 0.01930
Train [39][100/3239]	Time 0.217 (0.784)	Data Time 0.001 (0.315)	Loss 2.8580 (2.8808)	Entropy 1.30507 (1.30541)	Top-1 acc 58.594 (55.036)	Top-5 acc 78.516 (77.046)	lr 0.01930
Train [39][110/3239]	Time 0.310 (0.757)	Data Time 0.001 (0.287)	Loss 2.8952 (2.8780)	Entropy 1.30504 (1.30538)	Top-1 acc 53.125 (55.085)	Top-5 acc 77.734 (77.140)	lr 0.01930
Train [39][120/3239]	Time 2.544 (0.733)	Data Time 0.001 (0.263)	Loss 2.6853 (2.8737)	Entropy 1.30504 (1.30535)	Top-1 acc 58.984 (55.172)	Top-5 acc 82.422 (77.292)	lr 0.01930
Train [39][130/3239]	Time 0.292 (0.695)	Data Time 0.002 (0.243)	Loss 2.6810 (2.8701)	Entropy 1.30499 (1.30532)	Top-1 acc 59.375 (55.257)	Top-5 acc 79.297 (77.308)	lr 0.01930
Train [39][140/3239]	Time 0.206 (0.678)	Data Time 0.001 (0.226)	Loss 2.8485 (2.8712)	Entropy 1.30498 (1.30530)	Top-1 acc 57.031 (55.211)	Top-5 acc 77.344 (77.263)	lr 0.01930
Train [39][150/3239]	Time 0.238 (0.664)	Data Time 0.001 (0.211)	Loss 2.9398 (2.8705)	Entropy 1.30496 (1.30528)	Top-1 acc 56.641 (55.275)	Top-5 acc 76.953 (77.269)	lr 0.01930
Train [39][160/3239]	Time 0.229 (0.652)	Data Time 0.002 (0.198)	Loss 2.9770 (2.8718)	Entropy 1.30494 (1.30525)	Top-1 acc 54.297 (55.282)	Top-5 acc 73.828 (77.242)	lr 0.01929
Train [39][170/3239]	Time 0.209 (0.640)	Data Time 0.001 (0.187)	Loss 2.9033 (2.8674)	Entropy 1.30487 (1.30523)	Top-1 acc 53.906 (55.373)	Top-5 acc 75.391 (77.346)	lr 0.01929
Train [39][180/3239]	Time 0.239 (0.631)	Data Time 0.001 (0.177)	Loss 2.8633 (2.8668)	Entropy 1.30487 (1.30521)	Top-1 acc 55.469 (55.380)	Top-5 acc 78.906 (77.331)	lr 0.01929
Train [39][190/3239]	Time 0.200 (0.622)	Data Time 0.001 (0.167)	Loss 2.8891 (2.8676)	Entropy 1.30485 (1.30520)	Top-1 acc 56.641 (55.395)	Top-5 acc 76.953 (77.291)	lr 0.01929
Train [39][200/3239]	Time 0.227 (0.614)	Data Time 0.002 (0.159)	Loss 2.9256 (2.8684)	Entropy 1.30495 (1.30518)	Top-1 acc 52.734 (55.381)	Top-5 acc 75.781 (77.305)	lr 0.01929
Train [39][210/3239]	Time 0.248 (0.608)	Data Time 0.003 (0.152)	Loss 3.0403 (2.8684)	Entropy 1.30487 (1.30517)	Top-1 acc 51.172 (55.387)	Top-5 acc 73.438 (77.303)	lr 0.01929
Train [39][220/3239]	Time 0.246 (0.602)	Data Time 0.001 (0.145)	Loss 2.8912 (2.8660)	Entropy 1.30485 (1.30515)	Top-1 acc 55.469 (55.396)	Top-5 acc 76.562 (77.384)	lr 0.01929
Train [39][230/3239]	Time 2.348 (0.594)	Data Time 0.001 (0.139)	Loss 2.5727 (2.8638)	Entropy 1.30485 (1.30514)	Top-1 acc 65.234 (55.492)	Top-5 acc 82.812 (77.388)	lr 0.01929
Train [39][240/3239]	Time 0.175 (0.579)	Data Time 0.001 (0.133)	Loss 2.7177 (2.8626)	Entropy 1.30483 (1.30513)	Top-1 acc 60.156 (55.495)	Top-5 acc 80.469 (77.431)	lr 0.01929
Train [39][250/3239]	Time 0.342 (0.752)	Data Time 0.003 (0.128)	Loss 2.7627 (2.8616)	Entropy 1.30478 (1.30511)	Top-1 acc 57.812 (55.515)	Top-5 acc 80.078 (77.446)	lr 0.01929
Train [39][260/3239]	Time 0.223 (0.741)	Data Time 0.001 (0.123)	Loss 2.8704 (2.8614)	Entropy 1.30476 (1.30510)	Top-1 acc 52.344 (55.440)	Top-5 acc 76.562 (77.457)	lr 0.01929
Train [39][270/3239]	Time 0.235 (0.731)	Data Time 0.001 (0.119)	Loss 2.7557 (2.8614)	Entropy 1.30473 (1.30509)	Top-1 acc 57.031 (55.444)	Top-5 acc 80.859 (77.466)	lr 0.01929
Train [39][280/3239]	Time 0.237 (0.720)	Data Time 0.002 (0.115)	Loss 2.6509 (2.8607)	Entropy 1.30473 (1.30508)	Top-1 acc 61.719 (55.458)	Top-5 acc 81.250 (77.481)	lr 0.01928
Train [39][290/3239]	Time 0.233 (0.712)	Data Time 0.002 (0.111)	Loss 2.9051 (2.8612)	Entropy 1.30464 (1.30506)	Top-1 acc 53.906 (55.502)	Top-5 acc 78.516 (77.465)	lr 0.01928
Train [39][300/3239]	Time 0.226 (0.704)	Data Time 0.001 (0.107)	Loss 2.7028 (2.8603)	Entropy 1.30456 (1.30505)	Top-1 acc 57.812 (55.534)	Top-5 acc 82.422 (77.485)	lr 0.01928
Train [39][310/3239]	Time 0.214 (0.696)	Data Time 0.001 (0.104)	Loss 2.8475 (2.8594)	Entropy 1.30460 (1.30503)	Top-1 acc 56.641 (55.572)	Top-5 acc 76.172 (77.499)	lr 0.01928
Train [39][320/3239]	Time 0.227 (0.690)	Data Time 0.001 (0.101)	Loss 3.1071 (2.8608)	Entropy 1.30456 (1.30502)	Top-1 acc 50.000 (55.541)	Top-5 acc 72.266 (77.472)	lr 0.01928
Train [39][330/3239]	Time 0.294 (0.683)	Data Time 0.002 (0.098)	Loss 2.9875 (2.8610)	Entropy 1.30452 (1.30500)	Top-1 acc 52.734 (55.512)	Top-5 acc 75.000 (77.470)	lr 0.01928
Train [39][340/3239]	Time 2.466 (0.676)	Data Time 0.001 (0.095)	Loss 2.9633 (2.8602)	Entropy 1.30452 (1.30499)	Top-1 acc 53.906 (55.519)	Top-5 acc 74.219 (77.493)	lr 0.01928
Train [39][350/3239]	Time 0.245 (0.664)	Data Time 0.001 (0.092)	Loss 2.9273 (2.8592)	Entropy 1.30443 (1.30497)	Top-1 acc 60.156 (55.549)	Top-5 acc 78.125 (77.532)	lr 0.01928
Train [39][360/3239]	Time 0.213 (0.658)	Data Time 0.001 (0.090)	Loss 2.9048 (2.8594)	Entropy 1.30439 (1.30496)	Top-1 acc 50.781 (55.563)	Top-5 acc 76.953 (77.534)	lr 0.01928
Train [39][370/3239]	Time 0.404 (0.653)	Data Time 0.001 (0.087)	Loss 2.9494 (2.8606)	Entropy 1.30438 (1.30494)	Top-1 acc 57.031 (55.575)	Top-5 acc 76.562 (77.521)	lr 0.01928
Train [39][380/3239]	Time 0.221 (0.648)	Data Time 0.001 (0.085)	Loss 2.9885 (2.8622)	Entropy 1.30430 (1.30493)	Top-1 acc 55.469 (55.542)	Top-5 acc 75.391 (77.492)	lr 0.01928
Train [39][390/3239]	Time 0.234 (0.644)	Data Time 0.001 (0.083)	Loss 3.0484 (2.8624)	Entropy 1.30425 (1.30491)	Top-1 acc 53.125 (55.553)	Top-5 acc 75.781 (77.500)	lr 0.01927
Train [39][400/3239]	Time 0.208 (0.639)	Data Time 0.001 (0.081)	Loss 2.9116 (2.8624)	Entropy 1.30424 (1.30489)	Top-1 acc 55.469 (55.561)	Top-5 acc 76.953 (77.498)	lr 0.01927
Train [39][410/3239]	Time 0.226 (0.635)	Data Time 0.002 (0.079)	Loss 2.8116 (2.8615)	Entropy 1.30422 (1.30488)	Top-1 acc 57.031 (55.569)	Top-5 acc 80.859 (77.520)	lr 0.01927
Train [39][420/3239]	Time 0.299 (0.631)	Data Time 0.001 (0.077)	Loss 2.9565 (2.8639)	Entropy 1.30418 (1.30486)	Top-1 acc 55.078 (55.507)	Top-5 acc 76.953 (77.483)	lr 0.01927
Train [39][430/3239]	Time 0.227 (0.627)	Data Time 0.001 (0.075)	Loss 2.9938 (2.8640)	Entropy 1.30411 (1.30484)	Top-1 acc 54.297 (55.532)	Top-5 acc 75.391 (77.469)	lr 0.01927
Train [39][440/3239]	Time 0.219 (0.624)	Data Time 0.001 (0.074)	Loss 2.8523 (2.8632)	Entropy 1.30412 (1.30483)	Top-1 acc 53.906 (55.534)	Top-5 acc 78.125 (77.478)	lr 0.01927
Train [39][450/3239]	Time 2.433 (0.620)	Data Time 0.001 (0.072)	Loss 2.7855 (2.8643)	Entropy 1.30412 (1.30481)	Top-1 acc 58.594 (55.514)	Top-5 acc 79.688 (77.449)	lr 0.01927
Train [39][460/3239]	Time 0.278 (0.612)	Data Time 0.001 (0.071)	Loss 2.7812 (2.8645)	Entropy 1.30413 (1.30480)	Top-1 acc 57.031 (55.519)	Top-5 acc 78.516 (77.440)	lr 0.01927
Train [39][470/3239]	Time 0.338 (0.608)	Data Time 0.001 (0.069)	Loss 2.9102 (2.8640)	Entropy 1.30409 (1.30478)	Top-1 acc 53.125 (55.527)	Top-5 acc 74.609 (77.432)	lr 0.01927
Train [39][480/3239]	Time 0.208 (0.605)	Data Time 0.002 (0.068)	Loss 2.9093 (2.8634)	Entropy 1.30404 (1.30477)	Top-1 acc 53.125 (55.542)	Top-5 acc 76.172 (77.449)	lr 0.01927
Train [39][490/3239]	Time 0.226 (0.602)	Data Time 0.002 (0.066)	Loss 2.6976 (2.8648)	Entropy 1.30392 (1.30475)	Top-1 acc 57.812 (55.501)	Top-5 acc 78.906 (77.410)	lr 0.01927
Train [39][500/3239]	Time 0.214 (0.599)	Data Time 0.001 (0.065)	Loss 2.7535 (2.8642)	Entropy 1.30393 (1.30473)	Top-1 acc 59.375 (55.505)	Top-5 acc 79.297 (77.420)	lr 0.01927
Train [39][510/3239]	Time 0.220 (0.597)	Data Time 0.001 (0.064)	Loss 3.0695 (2.8650)	Entropy 1.30391 (1.30472)	Top-1 acc 53.516 (55.478)	Top-5 acc 73.828 (77.414)	lr 0.01926
Train [39][520/3239]	Time 0.316 (0.594)	Data Time 0.002 (0.063)	Loss 2.8936 (2.8659)	Entropy 1.30387 (1.30470)	Top-1 acc 55.859 (55.441)	Top-5 acc 75.000 (77.392)	lr 0.01926
Train [39][530/3239]	Time 0.226 (0.592)	Data Time 0.001 (0.061)	Loss 3.7334 (2.8666)	Entropy 1.30382 (1.30469)	Top-1 acc 35.156 (55.422)	Top-5 acc 62.500 (77.372)	lr 0.01926
Train [39][540/3239]	Time 0.228 (0.589)	Data Time 0.002 (0.060)	Loss 2.9469 (2.8657)	Entropy 1.30384 (1.30467)	Top-1 acc 52.344 (55.443)	Top-5 acc 76.562 (77.386)	lr 0.01926
Train [39][550/3239]	Time 0.211 (0.587)	Data Time 0.001 (0.059)	Loss 2.8383 (2.8655)	Entropy 1.30408 (1.30466)	Top-1 acc 58.203 (55.452)	Top-5 acc 77.734 (77.390)	lr 0.01926
Train [39][560/3239]	Time 2.493 (0.584)	Data Time 0.001 (0.058)	Loss 2.5607 (2.8647)	Entropy 1.30408 (1.30465)	Top-1 acc 62.500 (55.467)	Top-5 acc 84.375 (77.413)	lr 0.01926
Train [39][570/3239]	Time 0.289 (0.578)	Data Time 0.001 (0.057)	Loss 2.7087 (2.8655)	Entropy 1.30406 (1.30464)	Top-1 acc 58.203 (55.451)	Top-5 acc 78.906 (77.385)	lr 0.01926
Train [39][580/3239]	Time 0.250 (0.576)	Data Time 0.001 (0.056)	Loss 3.0712 (2.8674)	Entropy 1.30400 (1.30462)	Top-1 acc 50.391 (55.387)	Top-5 acc 72.656 (77.340)	lr 0.01926
Train [39][590/3239]	Time 0.205 (0.574)	Data Time 0.001 (0.055)	Loss 2.9671 (2.8677)	Entropy 1.30394 (1.30461)	Top-1 acc 54.688 (55.384)	Top-5 acc 75.781 (77.334)	lr 0.01926
Train [39][600/3239]	Time 0.218 (0.572)	Data Time 0.002 (0.054)	Loss 2.9638 (2.8685)	Entropy 1.30394 (1.30460)	Top-1 acc 54.297 (55.380)	Top-5 acc 74.609 (77.323)	lr 0.01926
Train [39][610/3239]	Time 0.228 (0.645)	Data Time 0.002 (0.054)	Loss 2.9063 (2.8697)	Entropy 1.30381 (1.30459)	Top-1 acc 54.297 (55.342)	Top-5 acc 78.125 (77.307)	lr 0.01926
Train [39][620/3239]	Time 0.208 (0.642)	Data Time 0.002 (0.053)	Loss 2.8555 (2.8698)	Entropy 1.30373 (1.30458)	Top-1 acc 55.859 (55.342)	Top-5 acc 76.562 (77.300)	lr 0.01926
Train [39][630/3239]	Time 0.308 (0.639)	Data Time 0.001 (0.052)	Loss 2.8122 (2.8693)	Entropy 1.30365 (1.30456)	Top-1 acc 55.469 (55.350)	Top-5 acc 77.734 (77.312)	lr 0.01925
Train [39][640/3239]	Time 0.238 (0.635)	Data Time 0.001 (0.051)	Loss 2.7736 (2.8694)	Entropy 1.30364 (1.30455)	Top-1 acc 59.375 (55.351)	Top-5 acc 78.125 (77.310)	lr 0.01925
Train [39][650/3239]	Time 0.189 (0.632)	Data Time 0.001 (0.050)	Loss 2.6090 (2.8692)	Entropy 1.30357 (1.30453)	Top-1 acc 61.719 (55.343)	Top-5 acc 80.859 (77.314)	lr 0.01925
Train [39][660/3239]	Time 0.231 (0.630)	Data Time 0.001 (0.050)	Loss 2.7538 (2.8692)	Entropy 1.30353 (1.30452)	Top-1 acc 57.031 (55.332)	Top-5 acc 78.906 (77.313)	lr 0.01925
Train [39][670/3239]	Time 2.473 (0.627)	Data Time 0.002 (0.049)	Loss 3.1845 (2.8699)	Entropy 1.30353 (1.30450)	Top-1 acc 45.312 (55.317)	Top-5 acc 69.922 (77.294)	lr 0.01925
Train [39][680/3239]	Time 0.308 (0.622)	Data Time 0.001 (0.048)	Loss 2.5378 (2.8703)	Entropy 1.30346 (1.30449)	Top-1 acc 61.328 (55.299)	Top-5 acc 83.203 (77.274)	lr 0.01925
Train [39][690/3239]	Time 0.234 (0.619)	Data Time 0.001 (0.048)	Loss 2.7788 (2.8698)	Entropy 1.30346 (1.30447)	Top-1 acc 58.594 (55.303)	Top-5 acc 80.078 (77.291)	lr 0.01925
Train [39][700/3239]	Time 0.225 (0.617)	Data Time 0.001 (0.047)	Loss 2.8838 (2.8706)	Entropy 1.30344 (1.30446)	Top-1 acc 55.859 (55.292)	Top-5 acc 78.125 (77.273)	lr 0.01925
Train [39][710/3239]	Time 0.214 (0.615)	Data Time 0.001 (0.046)	Loss 2.6544 (2.8697)	Entropy 1.30339 (1.30445)	Top-1 acc 60.938 (55.305)	Top-5 acc 84.766 (77.297)	lr 0.01925
Train [39][720/3239]	Time 0.215 (0.612)	Data Time 0.001 (0.046)	Loss 2.8434 (2.8697)	Entropy 1.30334 (1.30443)	Top-1 acc 53.125 (55.298)	Top-5 acc 80.859 (77.300)	lr 0.01925
Train [39][730/3239]	Time 0.352 (0.610)	Data Time 0.001 (0.045)	Loss 2.7442 (2.8689)	Entropy 1.30329 (1.30441)	Top-1 acc 61.328 (55.316)	Top-5 acc 80.469 (77.324)	lr 0.01925
Train [39][740/3239]	Time 0.209 (0.608)	Data Time 0.001 (0.045)	Loss 2.9039 (2.8690)	Entropy 1.30307 (1.30440)	Top-1 acc 51.953 (55.292)	Top-5 acc 77.344 (77.326)	lr 0.01925
Train [39][750/3239]	Time 0.258 (0.606)	Data Time 0.001 (0.044)	Loss 2.8531 (2.8686)	Entropy 1.30298 (1.30438)	Top-1 acc 51.562 (55.307)	Top-5 acc 81.250 (77.342)	lr 0.01924
Train [39][760/3239]	Time 0.206 (0.603)	Data Time 0.001 (0.043)	Loss 2.9156 (2.8687)	Entropy 1.30287 (1.30436)	Top-1 acc 53.906 (55.301)	Top-5 acc 76.172 (77.336)	lr 0.01924
Train [39][770/3239]	Time 0.205 (0.602)	Data Time 0.001 (0.043)	Loss 2.7577 (2.8681)	Entropy 1.30278 (1.30434)	Top-1 acc 55.859 (55.298)	Top-5 acc 82.422 (77.342)	lr 0.01924
Train [39][780/3239]	Time 2.470 (0.600)	Data Time 0.001 (0.042)	Loss 2.9506 (2.8686)	Entropy 1.30278 (1.30432)	Top-1 acc 54.688 (55.271)	Top-5 acc 76.172 (77.329)	lr 0.01924
Train [39][790/3239]	Time 0.265 (0.595)	Data Time 0.001 (0.042)	Loss 2.7346 (2.8686)	Entropy 1.30276 (1.30430)	Top-1 acc 59.375 (55.270)	Top-5 acc 76.562 (77.324)	lr 0.01924
Train [39][800/3239]	Time 0.245 (0.594)	Data Time 0.002 (0.041)	Loss 2.9418 (2.8701)	Entropy 1.30274 (1.30428)	Top-1 acc 51.953 (55.241)	Top-5 acc 76.953 (77.296)	lr 0.01924
Train [39][810/3239]	Time 0.208 (0.592)	Data Time 0.001 (0.041)	Loss 2.8285 (2.8703)	Entropy 1.30271 (1.30426)	Top-1 acc 53.906 (55.244)	Top-5 acc 78.516 (77.297)	lr 0.01924
Train [39][820/3239]	Time 0.210 (0.591)	Data Time 0.001 (0.040)	Loss 2.8140 (2.8693)	Entropy 1.30267 (1.30424)	Top-1 acc 58.984 (55.264)	Top-5 acc 77.734 (77.319)	lr 0.01924
Train [39][830/3239]	Time 0.232 (0.589)	Data Time 0.001 (0.040)	Loss 2.8847 (2.8694)	Entropy 1.30262 (1.30422)	Top-1 acc 54.688 (55.264)	Top-5 acc 75.391 (77.315)	lr 0.01924
Train [39][840/3239]	Time 0.215 (0.587)	Data Time 0.001 (0.039)	Loss 3.0928 (2.8698)	Entropy 1.30259 (1.30421)	Top-1 acc 47.656 (55.254)	Top-5 acc 73.047 (77.314)	lr 0.01924
Train [39][850/3239]	Time 0.234 (0.586)	Data Time 0.001 (0.039)	Loss 2.7917 (2.8696)	Entropy 1.30252 (1.30419)	Top-1 acc 55.469 (55.256)	Top-5 acc 79.297 (77.319)	lr 0.01924
Train [39][860/3239]	Time 0.225 (0.584)	Data Time 0.001 (0.039)	Loss 2.7744 (2.8698)	Entropy 1.30246 (1.30417)	Top-1 acc 59.766 (55.257)	Top-5 acc 78.516 (77.313)	lr 0.01923
Train [39][870/3239]	Time 0.215 (0.583)	Data Time 0.001 (0.038)	Loss 2.9754 (2.8705)	Entropy 1.30236 (1.30415)	Top-1 acc 52.734 (55.238)	Top-5 acc 76.953 (77.307)	lr 0.01923
Train [39][880/3239]	Time 0.254 (0.582)	Data Time 0.001 (0.038)	Loss 2.7788 (2.8708)	Entropy 1.30227 (1.30413)	Top-1 acc 57.031 (55.227)	Top-5 acc 82.422 (77.311)	lr 0.01923
Train [39][890/3239]	Time 2.385 (0.580)	Data Time 0.001 (0.037)	Loss 3.0095 (2.8712)	Entropy 1.30227 (1.30411)	Top-1 acc 51.172 (55.216)	Top-5 acc 75.000 (77.303)	lr 0.01923
Train [39][900/3239]	Time 0.220 (0.576)	Data Time 0.001 (0.037)	Loss 2.8460 (2.8708)	Entropy 1.30204 (1.30408)	Top-1 acc 57.422 (55.220)	Top-5 acc 76.953 (77.311)	lr 0.01923
Train [39][910/3239]	Time 0.234 (0.575)	Data Time 0.002 (0.037)	Loss 2.6981 (2.8705)	Entropy 1.30200 (1.30406)	Top-1 acc 60.547 (55.232)	Top-5 acc 81.641 (77.323)	lr 0.01923
Train [39][920/3239]	Time 0.223 (0.574)	Data Time 0.001 (0.036)	Loss 2.8039 (2.8713)	Entropy 1.30201 (1.30404)	Top-1 acc 55.469 (55.215)	Top-5 acc 80.078 (77.311)	lr 0.01923
Train [39][930/3239]	Time 0.313 (0.572)	Data Time 0.001 (0.036)	Loss 2.8209 (2.8725)	Entropy 1.30194 (1.30402)	Top-1 acc 58.203 (55.183)	Top-5 acc 76.562 (77.283)	lr 0.01923
Train [39][940/3239]	Time 0.216 (0.571)	Data Time 0.001 (0.035)	Loss 2.9646 (2.8726)	Entropy 1.30192 (1.30399)	Top-1 acc 53.516 (55.173)	Top-5 acc 77.734 (77.281)	lr 0.01923
Train [39][950/3239]	Time 0.214 (0.570)	Data Time 0.001 (0.035)	Loss 2.9891 (2.8729)	Entropy 1.30194 (1.30397)	Top-1 acc 50.000 (55.165)	Top-5 acc 75.000 (77.281)	lr 0.01923
Train [39][960/3239]	Time 0.261 (0.569)	Data Time 0.001 (0.035)	Loss 2.9319 (2.8734)	Entropy 1.30191 (1.30395)	Top-1 acc 55.469 (55.154)	Top-5 acc 75.781 (77.269)	lr 0.01923
Train [39][970/3239]	Time 0.348 (0.614)	Data Time 0.003 (0.034)	Loss 2.9129 (2.8732)	Entropy 1.30178 (1.30393)	Top-1 acc 54.297 (55.146)	Top-5 acc 75.391 (77.272)	lr 0.01923
Train [39][980/3239]	Time 0.330 (0.613)	Data Time 0.002 (0.034)	Loss 2.8596 (2.8730)	Entropy 1.30169 (1.30391)	Top-1 acc 57.422 (55.150)	Top-5 acc 76.172 (77.272)	lr 0.01922
Train [39][990/3239]	Time 0.246 (0.611)	Data Time 0.002 (0.034)	Loss 2.9508 (2.8741)	Entropy 1.30166 (1.30388)	Top-1 acc 54.688 (55.128)	Top-5 acc 76.562 (77.253)	lr 0.01922
Train [39][1000/3239]	Time 2.422 (0.610)	Data Time 0.002 (0.033)	Loss 2.7894 (2.8742)	Entropy 1.30166 (1.30386)	Top-1 acc 56.250 (55.126)	Top-5 acc 80.078 (77.248)	lr 0.01922
Train [39][1010/3239]	Time 0.197 (0.606)	Data Time 0.001 (0.033)	Loss 2.8627 (2.8744)	Entropy 1.30162 (1.30384)	Top-1 acc 55.469 (55.116)	Top-5 acc 74.609 (77.249)	lr 0.01922
Train [39][1020/3239]	Time 0.239 (0.604)	Data Time 0.001 (0.033)	Loss 2.9012 (2.8746)	Entropy 1.30163 (1.30382)	Top-1 acc 53.516 (55.112)	Top-5 acc 73.828 (77.245)	lr 0.01922
Train [39][1030/3239]	Time 0.223 (0.603)	Data Time 0.001 (0.033)	Loss 2.7030 (2.8755)	Entropy 1.30160 (1.30380)	Top-1 acc 57.812 (55.085)	Top-5 acc 78.906 (77.232)	lr 0.01922
Train [39][1040/3239]	Time 0.198 (0.602)	Data Time 0.001 (0.032)	Loss 2.9438 (2.8757)	Entropy 1.30147 (1.30378)	Top-1 acc 52.344 (55.075)	Top-5 acc 77.344 (77.232)	lr 0.01922
Train [39][1050/3239]	Time 0.232 (0.600)	Data Time 0.001 (0.032)	Loss 2.8475 (2.8760)	Entropy 1.30139 (1.30375)	Top-1 acc 53.906 (55.068)	Top-5 acc 77.734 (77.217)	lr 0.01922
Train [39][1060/3239]	Time 0.232 (0.599)	Data Time 0.001 (0.032)	Loss 3.0139 (2.8766)	Entropy 1.30135 (1.30373)	Top-1 acc 55.078 (55.053)	Top-5 acc 71.875 (77.213)	lr 0.01922
Train [39][1070/3239]	Time 0.236 (0.598)	Data Time 0.002 (0.031)	Loss 2.8308 (2.8769)	Entropy 1.30125 (1.30371)	Top-1 acc 53.906 (55.053)	Top-5 acc 76.953 (77.207)	lr 0.01922
Train [39][1080/3239]	Time 0.208 (0.596)	Data Time 0.002 (0.031)	Loss 2.9764 (2.8777)	Entropy 1.30120 (1.30368)	Top-1 acc 53.516 (55.030)	Top-5 acc 73.828 (77.191)	lr 0.01922
Train [39][1090/3239]	Time 0.238 (0.595)	Data Time 0.001 (0.031)	Loss 2.8454 (2.8776)	Entropy 1.30117 (1.30366)	Top-1 acc 56.641 (55.029)	Top-5 acc 79.688 (77.193)	lr 0.01922
Train [39][1100/3239]	Time 0.264 (0.594)	Data Time 0.001 (0.031)	Loss 2.6215 (2.8771)	Entropy 1.30115 (1.30364)	Top-1 acc 60.547 (55.031)	Top-5 acc 81.250 (77.205)	lr 0.01921
Train [39][1110/3239]	Time 2.506 (0.593)	Data Time 0.001 (0.030)	Loss 2.8798 (2.8771)	Entropy 1.30115 (1.30362)	Top-1 acc 54.688 (55.031)	Top-5 acc 78.516 (77.206)	lr 0.01921
Train [39][1120/3239]	Time 0.245 (0.590)	Data Time 0.001 (0.030)	Loss 2.8558 (2.8768)	Entropy 1.30112 (1.30359)	Top-1 acc 56.641 (55.039)	Top-5 acc 78.516 (77.213)	lr 0.01921
Train [39][1130/3239]	Time 0.220 (0.588)	Data Time 0.001 (0.030)	Loss 2.7647 (2.8769)	Entropy 1.30108 (1.30357)	Top-1 acc 57.422 (55.033)	Top-5 acc 78.906 (77.204)	lr 0.01921
Train [39][1140/3239]	Time 0.232 (0.587)	Data Time 0.001 (0.030)	Loss 2.7805 (2.8768)	Entropy 1.30100 (1.30355)	Top-1 acc 56.250 (55.043)	Top-5 acc 79.297 (77.206)	lr 0.01921
Train [39][1150/3239]	Time 0.228 (0.586)	Data Time 0.001 (0.029)	Loss 2.6152 (2.8764)	Entropy 1.30093 (1.30353)	Top-1 acc 58.203 (55.060)	Top-5 acc 81.250 (77.218)	lr 0.01921
Train [39][1160/3239]	Time 0.232 (0.585)	Data Time 0.001 (0.029)	Loss 2.7468 (2.8763)	Entropy 1.30089 (1.30350)	Top-1 acc 54.688 (55.062)	Top-5 acc 77.734 (77.214)	lr 0.01921
Train [39][1170/3239]	Time 0.261 (0.584)	Data Time 0.001 (0.029)	Loss 2.9023 (2.8760)	Entropy 1.30087 (1.30348)	Top-1 acc 52.344 (55.071)	Top-5 acc 77.734 (77.218)	lr 0.01921
Train [39][1180/3239]	Time 0.210 (0.583)	Data Time 0.001 (0.029)	Loss 2.9760 (2.8766)	Entropy 1.30085 (1.30346)	Top-1 acc 53.906 (55.059)	Top-5 acc 73.828 (77.203)	lr 0.01921
Train [39][1190/3239]	Time 0.307 (0.582)	Data Time 0.001 (0.028)	Loss 2.8888 (2.8766)	Entropy 1.30084 (1.30344)	Top-1 acc 55.078 (55.049)	Top-5 acc 79.297 (77.206)	lr 0.01921
Train [39][1200/3239]	Time 0.210 (0.581)	Data Time 0.001 (0.028)	Loss 2.8118 (2.8765)	Entropy 1.30082 (1.30342)	Top-1 acc 54.297 (55.050)	Top-5 acc 78.906 (77.211)	lr 0.01921
Train [39][1210/3239]	Time 0.236 (0.580)	Data Time 0.001 (0.028)	Loss 3.0464 (2.8766)	Entropy 1.30081 (1.30340)	Top-1 acc 52.734 (55.041)	Top-5 acc 75.391 (77.209)	lr 0.01921
Train [39][1220/3239]	Time 2.473 (0.578)	Data Time 0.001 (0.028)	Loss 2.7096 (2.8767)	Entropy 1.30081 (1.30337)	Top-1 acc 57.031 (55.037)	Top-5 acc 82.031 (77.207)	lr 0.01920
Train [39][1230/3239]	Time 0.214 (0.576)	Data Time 0.001 (0.027)	Loss 2.9343 (2.8771)	Entropy 1.30077 (1.30335)	Top-1 acc 53.125 (55.032)	Top-5 acc 76.953 (77.206)	lr 0.01920
Train [39][1240/3239]	Time 0.314 (0.575)	Data Time 0.001 (0.027)	Loss 2.8213 (2.8773)	Entropy 1.30078 (1.30333)	Top-1 acc 55.859 (55.025)	Top-5 acc 78.906 (77.202)	lr 0.01920
Train [39][1250/3239]	Time 0.217 (0.574)	Data Time 0.001 (0.027)	Loss 2.7734 (2.8775)	Entropy 1.30072 (1.30331)	Top-1 acc 55.859 (55.019)	Top-5 acc 79.297 (77.199)	lr 0.01920
Train [39][1260/3239]	Time 0.243 (0.573)	Data Time 0.001 (0.027)	Loss 2.9291 (2.8774)	Entropy 1.30065 (1.30329)	Top-1 acc 53.516 (55.019)	Top-5 acc 73.828 (77.195)	lr 0.01920
Train [39][1270/3239]	Time 0.238 (0.572)	Data Time 0.001 (0.027)	Loss 2.6666 (2.8772)	Entropy 1.30062 (1.30327)	Top-1 acc 55.859 (55.013)	Top-5 acc 79.688 (77.199)	lr 0.01920
Train [39][1280/3239]	Time 0.216 (0.571)	Data Time 0.001 (0.026)	Loss 3.0455 (2.8777)	Entropy 1.30056 (1.30325)	Top-1 acc 52.734 (55.005)	Top-5 acc 74.609 (77.185)	lr 0.01920
Train [39][1290/3239]	Time 0.276 (0.570)	Data Time 0.002 (0.026)	Loss 2.8452 (2.8781)	Entropy 1.30037 (1.30323)	Top-1 acc 56.250 (55.000)	Top-5 acc 79.297 (77.178)	lr 0.01920
Train [39][1300/3239]	Time 0.221 (0.569)	Data Time 0.001 (0.026)	Loss 2.8735 (2.8781)	Entropy 1.30032 (1.30321)	Top-1 acc 55.078 (55.002)	Top-5 acc 76.562 (77.174)	lr 0.01920
Train [39][1310/3239]	Time 0.227 (0.568)	Data Time 0.001 (0.026)	Loss 2.8897 (2.8782)	Entropy 1.30029 (1.30318)	Top-1 acc 53.125 (54.999)	Top-5 acc 77.344 (77.172)	lr 0.01920
Train [39][1320/3239]	Time 0.226 (0.567)	Data Time 0.001 (0.026)	Loss 3.1176 (2.8782)	Entropy 1.30029 (1.30316)	Top-1 acc 53.906 (54.995)	Top-5 acc 74.609 (77.175)	lr 0.01920
Train [39][1330/3239]	Time 47.896 (0.601)	Data Time 0.001 (0.026)	Loss 3.0122 (2.8784)	Entropy 1.30029 (1.30314)	Top-1 acc 53.125 (54.995)	Top-5 acc 73.828 (77.168)	lr 0.01919
Train [39][1340/3239]	Time 0.253 (0.598)	Data Time 0.003 (0.025)	Loss 2.7726 (2.8785)	Entropy 1.30017 (1.30312)	Top-1 acc 57.422 (54.986)	Top-5 acc 81.641 (77.172)	lr 0.01919
Train [39][1350/3239]	Time 0.242 (0.597)	Data Time 0.002 (0.025)	Loss 2.8059 (2.8784)	Entropy 1.30016 (1.30310)	Top-1 acc 54.297 (54.984)	Top-5 acc 76.562 (77.164)	lr 0.01919
Train [39][1360/3239]	Time 0.243 (0.596)	Data Time 0.001 (0.025)	Loss 2.8431 (2.8784)	Entropy 1.30020 (1.30307)	Top-1 acc 57.031 (54.985)	Top-5 acc 79.297 (77.165)	lr 0.01919
Train [39][1370/3239]	Time 0.217 (0.595)	Data Time 0.002 (0.025)	Loss 2.8135 (2.8786)	Entropy 1.30023 (1.30305)	Top-1 acc 54.297 (54.979)	Top-5 acc 80.859 (77.161)	lr 0.01919
Train [39][1380/3239]	Time 0.227 (0.594)	Data Time 0.001 (0.025)	Loss 2.8908 (2.8790)	Entropy 1.30023 (1.30303)	Top-1 acc 55.469 (54.971)	Top-5 acc 78.125 (77.157)	lr 0.01919
Train [39][1390/3239]	Time 0.205 (0.593)	Data Time 0.001 (0.025)	Loss 2.9700 (2.8793)	Entropy 1.30019 (1.30301)	Top-1 acc 50.391 (54.961)	Top-5 acc 74.609 (77.151)	lr 0.01919
Train [39][1400/3239]	Time 0.324 (0.592)	Data Time 0.001 (0.024)	Loss 2.5426 (2.8789)	Entropy 1.30015 (1.30299)	Top-1 acc 61.719 (54.964)	Top-5 acc 85.547 (77.161)	lr 0.01919
Train [39][1410/3239]	Time 0.212 (0.591)	Data Time 0.002 (0.024)	Loss 2.9231 (2.8791)	Entropy 1.30014 (1.30297)	Top-1 acc 57.031 (54.957)	Top-5 acc 76.172 (77.154)	lr 0.01919
Train [39][1420/3239]	Time 0.284 (0.591)	Data Time 0.038 (0.024)	Loss 3.0701 (2.8793)	Entropy 1.30010 (1.30295)	Top-1 acc 49.219 (54.946)	Top-5 acc 75.781 (77.150)	lr 0.01919
Train [39][1430/3239]	Time 0.256 (0.590)	Data Time 0.001 (0.024)	Loss 2.9496 (2.8793)	Entropy 1.30001 (1.30293)	Top-1 acc 51.172 (54.939)	Top-5 acc 74.609 (77.145)	lr 0.01919
Train [39][1440/3239]	Time 2.407 (0.589)	Data Time 0.001 (0.024)	Loss 2.8153 (2.8790)	Entropy 1.30001 (1.30291)	Top-1 acc 55.078 (54.951)	Top-5 acc 79.688 (77.149)	lr 0.01919
Train [39][1450/3239]	Time 0.344 (0.586)	Data Time 0.001 (0.024)	Loss 2.7633 (2.8787)	Entropy 1.29997 (1.30289)	Top-1 acc 54.297 (54.956)	Top-5 acc 78.516 (77.154)	lr 0.01918
Train [39][1460/3239]	Time 0.209 (0.585)	Data Time 0.001 (0.023)	Loss 2.7727 (2.8790)	Entropy 1.29988 (1.30287)	Top-1 acc 58.203 (54.946)	Top-5 acc 80.469 (77.152)	lr 0.01918
Train [39][1470/3239]	Time 0.236 (0.585)	Data Time 0.001 (0.023)	Loss 2.7692 (2.8791)	Entropy 1.29988 (1.30285)	Top-1 acc 56.250 (54.936)	Top-5 acc 78.906 (77.149)	lr 0.01918
Train [39][1480/3239]	Time 0.231 (0.584)	Data Time 0.001 (0.023)	Loss 2.8408 (2.8790)	Entropy 1.29984 (1.30283)	Top-1 acc 54.688 (54.934)	Top-5 acc 76.953 (77.153)	lr 0.01918
Train [39][1490/3239]	Time 0.230 (0.583)	Data Time 0.001 (0.023)	Loss 2.9038 (2.8798)	Entropy 1.29972 (1.30281)	Top-1 acc 51.953 (54.919)	Top-5 acc 76.562 (77.132)	lr 0.01918
Train [39][1500/3239]	Time 0.302 (0.582)	Data Time 0.001 (0.023)	Loss 2.7805 (2.8800)	Entropy 1.29963 (1.30279)	Top-1 acc 57.422 (54.914)	Top-5 acc 78.516 (77.128)	lr 0.01918
Train [39][1510/3239]	Time 0.232 (0.581)	Data Time 0.001 (0.023)	Loss 2.8940 (2.8801)	Entropy 1.29961 (1.30277)	Top-1 acc 54.297 (54.906)	Top-5 acc 78.516 (77.125)	lr 0.01918
Train [39][1520/3239]	Time 0.229 (0.580)	Data Time 0.001 (0.023)	Loss 2.7872 (2.8801)	Entropy 1.29957 (1.30275)	Top-1 acc 53.906 (54.908)	Top-5 acc 80.469 (77.133)	lr 0.01918
Train [39][1530/3239]	Time 0.184 (0.579)	Data Time 0.001 (0.022)	Loss 2.7069 (2.8801)	Entropy 1.29949 (1.30273)	Top-1 acc 57.422 (54.912)	Top-5 acc 78.906 (77.128)	lr 0.01918
Train [39][1540/3239]	Time 0.232 (0.579)	Data Time 0.002 (0.022)	Loss 2.7521 (2.8801)	Entropy 1.29942 (1.30271)	Top-1 acc 58.203 (54.916)	Top-5 acc 79.688 (77.128)	lr 0.01918
Train [39][1550/3239]	Time 2.492 (0.578)	Data Time 0.001 (0.022)	Loss 2.8840 (2.8799)	Entropy 1.29942 (1.30268)	Top-1 acc 56.250 (54.921)	Top-5 acc 75.391 (77.136)	lr 0.01918
Train [39][1560/3239]	Time 0.218 (0.576)	Data Time 0.001 (0.022)	Loss 2.8571 (2.8800)	Entropy 1.29934 (1.30266)	Top-1 acc 54.688 (54.918)	Top-5 acc 78.906 (77.134)	lr 0.01918
Train [39][1570/3239]	Time 0.268 (0.575)	Data Time 0.001 (0.022)	Loss 3.0455 (2.8800)	Entropy 1.29929 (1.30264)	Top-1 acc 52.734 (54.918)	Top-5 acc 75.000 (77.130)	lr 0.01917
Train [39][1580/3239]	Time 0.212 (0.574)	Data Time 0.001 (0.022)	Loss 2.6935 (2.8797)	Entropy 1.29923 (1.30262)	Top-1 acc 60.938 (54.930)	Top-5 acc 80.469 (77.134)	lr 0.01917
Train [39][1590/3239]	Time 0.224 (0.574)	Data Time 0.001 (0.022)	Loss 2.8179 (2.8799)	Entropy 1.29917 (1.30260)	Top-1 acc 58.203 (54.926)	Top-5 acc 76.953 (77.126)	lr 0.01917
Train [39][1600/3239]	Time 0.209 (0.573)	Data Time 0.001 (0.022)	Loss 2.9058 (2.8803)	Entropy 1.29906 (1.30258)	Top-1 acc 54.688 (54.918)	Top-5 acc 76.172 (77.117)	lr 0.01917
Train [39][1610/3239]	Time 0.222 (0.572)	Data Time 0.001 (0.021)	Loss 2.8247 (2.8804)	Entropy 1.29910 (1.30255)	Top-1 acc 59.766 (54.921)	Top-5 acc 78.125 (77.113)	lr 0.01917
Train [39][1620/3239]	Time 0.232 (0.572)	Data Time 0.002 (0.021)	Loss 3.0264 (2.8805)	Entropy 1.29932 (1.30253)	Top-1 acc 55.859 (54.917)	Top-5 acc 71.484 (77.105)	lr 0.01917
Train [39][1630/3239]	Time 0.268 (0.571)	Data Time 0.001 (0.021)	Loss 3.0261 (2.8806)	Entropy 1.29921 (1.30251)	Top-1 acc 53.516 (54.910)	Top-5 acc 75.391 (77.103)	lr 0.01917
Train [39][1640/3239]	Time 0.211 (0.570)	Data Time 0.001 (0.021)	Loss 2.7346 (2.8805)	Entropy 1.29913 (1.30249)	Top-1 acc 58.203 (54.912)	Top-5 acc 77.344 (77.104)	lr 0.01917
Train [39][1650/3239]	Time 0.279 (0.570)	Data Time 0.001 (0.021)	Loss 2.7460 (2.8805)	Entropy 1.29905 (1.30247)	Top-1 acc 57.422 (54.917)	Top-5 acc 78.516 (77.099)	lr 0.01917
Train [39][1660/3239]	Time 2.776 (0.569)	Data Time 0.001 (0.021)	Loss 2.8265 (2.8803)	Entropy 1.29905 (1.30245)	Top-1 acc 55.859 (54.922)	Top-5 acc 77.344 (77.098)	lr 0.01917
Train [39][1670/3239]	Time 0.236 (0.567)	Data Time 0.001 (0.021)	Loss 2.8626 (2.8802)	Entropy 1.29901 (1.30243)	Top-1 acc 53.125 (54.923)	Top-5 acc 79.688 (77.104)	lr 0.01917
Train [39][1680/3239]	Time 0.225 (0.566)	Data Time 0.004 (0.021)	Loss 2.8934 (2.8803)	Entropy 1.29882 (1.30241)	Top-1 acc 54.688 (54.922)	Top-5 acc 79.297 (77.105)	lr 0.01917
Train [39][1690/3239]	Time 0.229 (0.566)	Data Time 0.001 (0.020)	Loss 2.9237 (2.8805)	Entropy 1.29873 (1.30239)	Top-1 acc 53.125 (54.917)	Top-5 acc 75.781 (77.103)	lr 0.01916
Train [39][1700/3239]	Time 0.312 (0.592)	Data Time 0.002 (0.020)	Loss 2.9338 (2.8802)	Entropy 1.29863 (1.30237)	Top-1 acc 52.344 (54.925)	Top-5 acc 78.516 (77.107)	lr 0.01916
Train [39][1710/3239]	Time 0.223 (0.591)	Data Time 0.002 (0.020)	Loss 2.9494 (2.8803)	Entropy 1.29858 (1.30235)	Top-1 acc 52.734 (54.924)	Top-5 acc 74.219 (77.102)	lr 0.01916
Train [39][1720/3239]	Time 0.227 (0.590)	Data Time 0.002 (0.020)	Loss 2.9290 (2.8806)	Entropy 1.29852 (1.30232)	Top-1 acc 55.078 (54.914)	Top-5 acc 73.828 (77.098)	lr 0.01916
Train [39][1730/3239]	Time 0.213 (0.589)	Data Time 0.001 (0.020)	Loss 2.7591 (2.8807)	Entropy 1.29847 (1.30230)	Top-1 acc 56.641 (54.914)	Top-5 acc 83.203 (77.099)	lr 0.01916
Train [39][1740/3239]	Time 0.211 (0.589)	Data Time 0.001 (0.020)	Loss 2.8642 (2.8805)	Entropy 1.29845 (1.30228)	Top-1 acc 57.031 (54.915)	Top-5 acc 76.953 (77.103)	lr 0.01916
Train [39][1750/3239]	Time 0.260 (0.588)	Data Time 0.001 (0.020)	Loss 3.0110 (2.8809)	Entropy 1.29841 (1.30226)	Top-1 acc 49.219 (54.902)	Top-5 acc 75.000 (77.097)	lr 0.01916
Train [39][1760/3239]	Time 0.261 (0.587)	Data Time 0.001 (0.020)	Loss 2.9969 (2.8810)	Entropy 1.29840 (1.30224)	Top-1 acc 57.031 (54.904)	Top-5 acc 76.172 (77.094)	lr 0.01916
Train [39][1770/3239]	Time 2.553 (0.587)	Data Time 0.002 (0.020)	Loss 2.9651 (2.8811)	Entropy 1.29840 (1.30221)	Top-1 acc 49.609 (54.899)	Top-5 acc 77.734 (77.094)	lr 0.01916
Train [39][1780/3239]	Time 0.233 (0.585)	Data Time 0.001 (0.020)	Loss 2.9228 (2.8814)	Entropy 1.29833 (1.30219)	Top-1 acc 52.344 (54.896)	Top-5 acc 75.000 (77.086)	lr 0.01916
Train [39][1790/3239]	Time 0.225 (0.584)	Data Time 0.001 (0.019)	Loss 2.7711 (2.8812)	Entropy 1.29831 (1.30217)	Top-1 acc 57.812 (54.904)	Top-5 acc 77.734 (77.089)	lr 0.01916
Train [39][1800/3239]	Time 0.222 (0.583)	Data Time 0.001 (0.019)	Loss 2.9149 (2.8810)	Entropy 1.29823 (1.30215)	Top-1 acc 55.078 (54.914)	Top-5 acc 75.000 (77.090)	lr 0.01915
Train [39][1810/3239]	Time 0.258 (0.583)	Data Time 0.001 (0.019)	Loss 2.6227 (2.8807)	Entropy 1.29818 (1.30213)	Top-1 acc 64.453 (54.921)	Top-5 acc 79.688 (77.094)	lr 0.01915
Train [39][1820/3239]	Time 0.219 (0.582)	Data Time 0.001 (0.019)	Loss 2.8915 (2.8805)	Entropy 1.29813 (1.30210)	Top-1 acc 55.469 (54.927)	Top-5 acc 78.906 (77.103)	lr 0.01915
Train [39][1830/3239]	Time 0.229 (0.581)	Data Time 0.002 (0.019)	Loss 2.9435 (2.8805)	Entropy 1.29807 (1.30208)	Top-1 acc 51.562 (54.921)	Top-5 acc 76.562 (77.103)	lr 0.01915
Train [39][1840/3239]	Time 0.211 (0.581)	Data Time 0.002 (0.019)	Loss 2.9567 (2.8806)	Entropy 1.29802 (1.30206)	Top-1 acc 48.828 (54.921)	Top-5 acc 74.609 (77.101)	lr 0.01915
Train [39][1850/3239]	Time 0.213 (0.580)	Data Time 0.001 (0.019)	Loss 3.0039 (2.8809)	Entropy 1.29796 (1.30204)	Top-1 acc 52.344 (54.912)	Top-5 acc 74.219 (77.096)	lr 0.01915
Train [39][1860/3239]	Time 0.240 (0.579)	Data Time 0.001 (0.019)	Loss 3.0253 (2.8809)	Entropy 1.29798 (1.30202)	Top-1 acc 53.516 (54.913)	Top-5 acc 76.172 (77.091)	lr 0.01915
Train [39][1870/3239]	Time 0.229 (0.579)	Data Time 0.002 (0.019)	Loss 3.0465 (2.8811)	Entropy 1.29792 (1.30200)	Top-1 acc 55.078 (54.910)	Top-5 acc 75.391 (77.085)	lr 0.01915
Train [39][1880/3239]	Time 2.562 (0.578)	Data Time 0.001 (0.019)	Loss 2.7905 (2.8811)	Entropy 1.29792 (1.30197)	Top-1 acc 50.781 (54.906)	Top-5 acc 79.688 (77.086)	lr 0.01915
Train [39][1890/3239]	Time 0.227 (0.576)	Data Time 0.001 (0.019)	Loss 2.7994 (2.8809)	Entropy 1.29791 (1.30195)	Top-1 acc 57.422 (54.911)	Top-5 acc 76.562 (77.089)	lr 0.01915
Train [39][1900/3239]	Time 0.212 (0.576)	Data Time 0.001 (0.018)	Loss 3.1405 (2.8809)	Entropy 1.29787 (1.30193)	Top-1 acc 49.609 (54.905)	Top-5 acc 73.047 (77.091)	lr 0.01915
Train [39][1910/3239]	Time 0.229 (0.575)	Data Time 0.001 (0.018)	Loss 3.0271 (2.8812)	Entropy 1.29780 (1.30191)	Top-1 acc 51.172 (54.898)	Top-5 acc 72.656 (77.086)	lr 0.01915
Train [39][1920/3239]	Time 0.319 (0.575)	Data Time 0.001 (0.018)	Loss 2.9289 (2.8814)	Entropy 1.29774 (1.30189)	Top-1 acc 54.297 (54.897)	Top-5 acc 75.391 (77.085)	lr 0.01914
Train [39][1930/3239]	Time 0.240 (0.574)	Data Time 0.001 (0.018)	Loss 2.9236 (2.8814)	Entropy 1.29771 (1.30187)	Top-1 acc 57.812 (54.902)	Top-5 acc 74.219 (77.083)	lr 0.01914
Train [39][1940/3239]	Time 0.281 (0.574)	Data Time 0.004 (0.018)	Loss 3.0004 (2.8815)	Entropy 1.29763 (1.30184)	Top-1 acc 52.344 (54.899)	Top-5 acc 73.828 (77.078)	lr 0.01914
Train [39][1950/3239]	Time 0.239 (0.573)	Data Time 0.001 (0.018)	Loss 2.8063 (2.8815)	Entropy 1.29761 (1.30182)	Top-1 acc 58.203 (54.904)	Top-5 acc 80.859 (77.077)	lr 0.01914
Train [39][1960/3239]	Time 0.188 (0.572)	Data Time 0.001 (0.018)	Loss 2.8531 (2.8818)	Entropy 1.29747 (1.30180)	Top-1 acc 56.250 (54.902)	Top-5 acc 77.734 (77.072)	lr 0.01914
Train [39][1970/3239]	Time 0.270 (0.572)	Data Time 0.001 (0.018)	Loss 2.8687 (2.8818)	Entropy 1.29739 (1.30178)	Top-1 acc 54.688 (54.902)	Top-5 acc 76.562 (77.076)	lr 0.01914
Train [39][1980/3239]	Time 0.208 (0.571)	Data Time 0.001 (0.018)	Loss 2.8279 (2.8819)	Entropy 1.29730 (1.30176)	Top-1 acc 55.078 (54.898)	Top-5 acc 80.859 (77.076)	lr 0.01914
Train [39][1990/3239]	Time 2.425 (0.571)	Data Time 0.001 (0.018)	Loss 3.0998 (2.8820)	Entropy 1.29730 (1.30173)	Top-1 acc 49.609 (54.894)	Top-5 acc 73.047 (77.077)	lr 0.01914
Train [39][2000/3239]	Time 0.209 (0.569)	Data Time 0.001 (0.018)	Loss 2.8737 (2.8822)	Entropy 1.29727 (1.30171)	Top-1 acc 54.297 (54.889)	Top-5 acc 75.781 (77.072)	lr 0.01914
Train [39][2010/3239]	Time 0.230 (0.569)	Data Time 0.001 (0.017)	Loss 2.7719 (2.8819)	Entropy 1.29717 (1.30169)	Top-1 acc 58.203 (54.901)	Top-5 acc 81.641 (77.079)	lr 0.01914
Train [39][2020/3239]	Time 0.216 (0.568)	Data Time 0.001 (0.017)	Loss 2.8961 (2.8818)	Entropy 1.29716 (1.30167)	Top-1 acc 54.688 (54.899)	Top-5 acc 78.516 (77.086)	lr 0.01914
Train [39][2030/3239]	Time 0.223 (0.568)	Data Time 0.001 (0.017)	Loss 2.7489 (2.8817)	Entropy 1.29700 (1.30164)	Top-1 acc 59.375 (54.902)	Top-5 acc 79.297 (77.084)	lr 0.01914
Train [39][2040/3239]	Time 0.215 (0.567)	Data Time 0.001 (0.017)	Loss 3.0686 (2.8819)	Entropy 1.29689 (1.30162)	Top-1 acc 52.734 (54.899)	Top-5 acc 69.531 (77.083)	lr 0.01913
Train [39][2050/3239]	Time 0.212 (0.567)	Data Time 0.001 (0.017)	Loss 2.9699 (2.8818)	Entropy 1.29687 (1.30160)	Top-1 acc 49.609 (54.896)	Top-5 acc 76.953 (77.084)	lr 0.01913
Train [39][2060/3239]	Time 0.225 (0.589)	Data Time 0.002 (0.017)	Loss 2.9270 (2.8817)	Entropy 1.29677 (1.30158)	Top-1 acc 52.734 (54.899)	Top-5 acc 76.562 (77.084)	lr 0.01913
Train [39][2070/3239]	Time 0.320 (0.589)	Data Time 0.002 (0.017)	Loss 2.9771 (2.8816)	Entropy 1.29674 (1.30155)	Top-1 acc 50.391 (54.900)	Top-5 acc 75.391 (77.086)	lr 0.01913
Train [39][2080/3239]	Time 0.171 (0.588)	Data Time 0.001 (0.017)	Loss 2.8423 (2.8815)	Entropy 1.29671 (1.30153)	Top-1 acc 54.297 (54.897)	Top-5 acc 76.953 (77.087)	lr 0.01913
Train [39][2090/3239]	Time 0.263 (0.587)	Data Time 0.001 (0.017)	Loss 2.7743 (2.8815)	Entropy 1.29665 (1.30151)	Top-1 acc 57.422 (54.897)	Top-5 acc 78.906 (77.082)	lr 0.01913
Train [39][2100/3239]	Time 2.533 (0.587)	Data Time 0.001 (0.017)	Loss 2.9387 (2.8815)	Entropy 1.29665 (1.30148)	Top-1 acc 52.734 (54.891)	Top-5 acc 76.172 (77.082)	lr 0.01913
Train [39][2110/3239]	Time 0.213 (0.585)	Data Time 0.001 (0.017)	Loss 2.9111 (2.8816)	Entropy 1.29659 (1.30146)	Top-1 acc 54.297 (54.887)	Top-5 acc 78.125 (77.081)	lr 0.01913
Train [39][2120/3239]	Time 0.253 (0.585)	Data Time 0.001 (0.017)	Loss 3.0526 (2.8814)	Entropy 1.29646 (1.30144)	Top-1 acc 50.000 (54.893)	Top-5 acc 72.656 (77.084)	lr 0.01913
Train [39][2130/3239]	Time 0.248 (0.584)	Data Time 0.001 (0.017)	Loss 2.7512 (2.8813)	Entropy 1.29641 (1.30141)	Top-1 acc 60.156 (54.900)	Top-5 acc 78.906 (77.085)	lr 0.01913
Train [39][2140/3239]	Time 0.221 (0.583)	Data Time 0.001 (0.017)	Loss 2.9077 (2.8813)	Entropy 1.29623 (1.30139)	Top-1 acc 53.516 (54.893)	Top-5 acc 76.172 (77.084)	lr 0.01913
Train [39][2150/3239]	Time 0.210 (0.583)	Data Time 0.001 (0.016)	Loss 2.8921 (2.8810)	Entropy 1.29619 (1.30136)	Top-1 acc 49.219 (54.900)	Top-5 acc 77.344 (77.088)	lr 0.01912
Train [39][2160/3239]	Time 0.227 (0.582)	Data Time 0.001 (0.016)	Loss 2.8864 (2.8812)	Entropy 1.29616 (1.30134)	Top-1 acc 53.516 (54.893)	Top-5 acc 78.516 (77.088)	lr 0.01912
Train [39][2170/3239]	Time 0.214 (0.581)	Data Time 0.001 (0.016)	Loss 2.8063 (2.8812)	Entropy 1.29610 (1.30132)	Top-1 acc 53.125 (54.893)	Top-5 acc 78.906 (77.091)	lr 0.01912
Train [39][2180/3239]	Time 0.238 (0.581)	Data Time 0.001 (0.016)	Loss 2.7173 (2.8813)	Entropy 1.29606 (1.30129)	Top-1 acc 58.594 (54.891)	Top-5 acc 80.469 (77.086)	lr 0.01912
Train [39][2190/3239]	Time 0.238 (0.580)	Data Time 0.001 (0.016)	Loss 2.9751 (2.8815)	Entropy 1.29604 (1.30127)	Top-1 acc 50.781 (54.888)	Top-5 acc 75.000 (77.081)	lr 0.01912
Train [39][2200/3239]	Time 0.223 (0.580)	Data Time 0.001 (0.016)	Loss 2.9398 (2.8814)	Entropy 1.29596 (1.30124)	Top-1 acc 52.344 (54.893)	Top-5 acc 73.438 (77.081)	lr 0.01912
Train [39][2210/3239]	Time 2.482 (0.580)	Data Time 0.001 (0.016)	Loss 2.8539 (2.8813)	Entropy 1.29596 (1.30122)	Top-1 acc 54.297 (54.895)	Top-5 acc 78.125 (77.084)	lr 0.01912
Train [39][2220/3239]	Time 0.343 (0.578)	Data Time 0.001 (0.016)	Loss 2.8888 (2.8814)	Entropy 1.29588 (1.30120)	Top-1 acc 58.594 (54.887)	Top-5 acc 78.516 (77.080)	lr 0.01912
Train [39][2230/3239]	Time 0.196 (0.578)	Data Time 0.001 (0.016)	Loss 2.8344 (2.8811)	Entropy 1.29577 (1.30117)	Top-1 acc 59.766 (54.897)	Top-5 acc 75.781 (77.085)	lr 0.01912
Train [39][2240/3239]	Time 0.213 (0.577)	Data Time 0.001 (0.016)	Loss 2.8302 (2.8811)	Entropy 1.29566 (1.30115)	Top-1 acc 58.984 (54.899)	Top-5 acc 79.688 (77.084)	lr 0.01912
Train [39][2250/3239]	Time 0.225 (0.576)	Data Time 0.001 (0.016)	Loss 2.9028 (2.8812)	Entropy 1.29558 (1.30112)	Top-1 acc 51.953 (54.892)	Top-5 acc 77.344 (77.077)	lr 0.01912
Train [39][2260/3239]	Time 0.221 (0.576)	Data Time 0.001 (0.016)	Loss 2.8197 (2.8810)	Entropy 1.29579 (1.30110)	Top-1 acc 57.812 (54.899)	Top-5 acc 78.906 (77.082)	lr 0.01912
Train [39][2270/3239]	Time 0.218 (0.575)	Data Time 0.001 (0.016)	Loss 2.8876 (2.8811)	Entropy 1.29578 (1.30108)	Top-1 acc 57.812 (54.894)	Top-5 acc 75.391 (77.080)	lr 0.01911
Train [39][2280/3239]	Time 0.225 (0.575)	Data Time 0.001 (0.016)	Loss 2.8775 (2.8810)	Entropy 1.29575 (1.30105)	Top-1 acc 54.297 (54.901)	Top-5 acc 76.953 (77.078)	lr 0.01911
Train [39][2290/3239]	Time 0.208 (0.574)	Data Time 0.001 (0.016)	Loss 3.0348 (2.8810)	Entropy 1.29574 (1.30103)	Top-1 acc 52.734 (54.904)	Top-5 acc 76.562 (77.078)	lr 0.01911
Train [39][2300/3239]	Time 0.239 (0.574)	Data Time 0.001 (0.016)	Loss 2.8626 (2.8810)	Entropy 1.29570 (1.30101)	Top-1 acc 52.344 (54.899)	Top-5 acc 78.906 (77.080)	lr 0.01911
Train [39][2310/3239]	Time 0.219 (0.573)	Data Time 0.001 (0.015)	Loss 2.7675 (2.8807)	Entropy 1.29562 (1.30098)	Top-1 acc 57.422 (54.901)	Top-5 acc 78.516 (77.084)	lr 0.01911
Train [39][2320/3239]	Time 2.362 (0.573)	Data Time 0.001 (0.015)	Loss 2.9369 (2.8810)	Entropy 1.29562 (1.30096)	Top-1 acc 51.172 (54.897)	Top-5 acc 78.516 (77.080)	lr 0.01911
Train [39][2330/3239]	Time 0.324 (0.571)	Data Time 0.001 (0.015)	Loss 2.6870 (2.8812)	Entropy 1.29553 (1.30094)	Top-1 acc 56.641 (54.896)	Top-5 acc 82.422 (77.076)	lr 0.01911
Train [39][2340/3239]	Time 0.218 (0.571)	Data Time 0.001 (0.015)	Loss 2.8537 (2.8811)	Entropy 1.29543 (1.30091)	Top-1 acc 54.688 (54.896)	Top-5 acc 76.172 (77.080)	lr 0.01911
Train [39][2350/3239]	Time 0.221 (0.570)	Data Time 0.002 (0.015)	Loss 2.7905 (2.8811)	Entropy 1.29538 (1.30089)	Top-1 acc 58.984 (54.900)	Top-5 acc 78.125 (77.080)	lr 0.01911
Train [39][2360/3239]	Time 0.232 (0.570)	Data Time 0.001 (0.015)	Loss 2.9950 (2.8813)	Entropy 1.29536 (1.30087)	Top-1 acc 51.562 (54.893)	Top-5 acc 77.344 (77.079)	lr 0.01911
Train [39][2370/3239]	Time 0.209 (0.569)	Data Time 0.001 (0.015)	Loss 2.7667 (2.8812)	Entropy 1.29532 (1.30084)	Top-1 acc 58.594 (54.890)	Top-5 acc 79.688 (77.082)	lr 0.01911
Train [39][2380/3239]	Time 0.226 (0.569)	Data Time 0.001 (0.015)	Loss 2.7623 (2.8811)	Entropy 1.29516 (1.30082)	Top-1 acc 56.250 (54.889)	Top-5 acc 81.641 (77.084)	lr 0.01911
Train [39][2390/3239]	Time 0.238 (0.569)	Data Time 0.001 (0.015)	Loss 2.8147 (2.8811)	Entropy 1.29520 (1.30080)	Top-1 acc 55.859 (54.888)	Top-5 acc 78.125 (77.086)	lr 0.01910
Train [39][2400/3239]	Time 0.215 (0.568)	Data Time 0.002 (0.015)	Loss 3.0124 (2.8812)	Entropy 1.29515 (1.30077)	Top-1 acc 51.562 (54.891)	Top-5 acc 75.000 (77.085)	lr 0.01910
Train [39][2410/3239]	Time 0.211 (0.568)	Data Time 0.001 (0.015)	Loss 2.9678 (2.8812)	Entropy 1.29511 (1.30075)	Top-1 acc 54.297 (54.893)	Top-5 acc 74.609 (77.079)	lr 0.01910
Train [39][2420/3239]	Time 0.258 (0.586)	Data Time 0.003 (0.015)	Loss 2.9213 (2.8811)	Entropy 1.29508 (1.30073)	Top-1 acc 54.297 (54.893)	Top-5 acc 79.688 (77.086)	lr 0.01910
Train [39][2430/3239]	Time 2.594 (0.586)	Data Time 0.003 (0.015)	Loss 3.0173 (2.8813)	Entropy 1.29508 (1.30070)	Top-1 acc 52.734 (54.891)	Top-5 acc 75.000 (77.082)	lr 0.01910
Train [39][2440/3239]	Time 0.226 (0.584)	Data Time 0.001 (0.015)	Loss 2.9151 (2.8812)	Entropy 1.29502 (1.30068)	Top-1 acc 53.516 (54.897)	Top-5 acc 73.438 (77.082)	lr 0.01910
Train [39][2450/3239]	Time 0.213 (0.584)	Data Time 0.001 (0.015)	Loss 2.9324 (2.8813)	Entropy 1.29501 (1.30066)	Top-1 acc 52.344 (54.891)	Top-5 acc 75.781 (77.082)	lr 0.01910
Train [39][2460/3239]	Time 0.201 (0.583)	Data Time 0.001 (0.015)	Loss 3.1398 (2.8816)	Entropy 1.29493 (1.30063)	Top-1 acc 50.000 (54.887)	Top-5 acc 73.828 (77.079)	lr 0.01910
Train [39][2470/3239]	Time 0.228 (0.583)	Data Time 0.001 (0.015)	Loss 2.5454 (2.8814)	Entropy 1.29487 (1.30061)	Top-1 acc 57.422 (54.888)	Top-5 acc 84.375 (77.085)	lr 0.01910
Train [39][2480/3239]	Time 0.233 (0.582)	Data Time 0.001 (0.015)	Loss 2.7451 (2.8813)	Entropy 1.29480 (1.30059)	Top-1 acc 60.938 (54.891)	Top-5 acc 81.250 (77.091)	lr 0.01910
Train [39][2490/3239]	Time 0.239 (0.582)	Data Time 0.001 (0.014)	Loss 2.8987 (2.8811)	Entropy 1.29479 (1.30056)	Top-1 acc 56.250 (54.898)	Top-5 acc 78.125 (77.096)	lr 0.01910
Train [39][2500/3239]	Time 0.226 (0.581)	Data Time 0.001 (0.014)	Loss 2.9060 (2.8813)	Entropy 1.29502 (1.30054)	Top-1 acc 49.609 (54.889)	Top-5 acc 76.562 (77.092)	lr 0.01909
Train [39][2510/3239]	Time 0.243 (0.581)	Data Time 0.001 (0.014)	Loss 2.8441 (2.8812)	Entropy 1.29491 (1.30052)	Top-1 acc 54.688 (54.890)	Top-5 acc 78.516 (77.094)	lr 0.01909
Train [39][2520/3239]	Time 0.196 (0.580)	Data Time 0.001 (0.014)	Loss 2.8718 (2.8811)	Entropy 1.29487 (1.30050)	Top-1 acc 58.984 (54.894)	Top-5 acc 78.125 (77.094)	lr 0.01909
Train [39][2530/3239]	Time 0.213 (0.580)	Data Time 0.001 (0.014)	Loss 2.8319 (2.8810)	Entropy 1.29490 (1.30047)	Top-1 acc 52.734 (54.896)	Top-5 acc 77.344 (77.096)	lr 0.01909
Train [39][2540/3239]	Time 2.459 (0.579)	Data Time 0.001 (0.014)	Loss 2.8850 (2.8812)	Entropy 1.29490 (1.30045)	Top-1 acc 52.734 (54.893)	Top-5 acc 78.906 (77.095)	lr 0.01909
Train [39][2550/3239]	Time 0.241 (0.578)	Data Time 0.001 (0.014)	Loss 3.1002 (2.8816)	Entropy 1.29477 (1.30043)	Top-1 acc 47.656 (54.884)	Top-5 acc 71.094 (77.088)	lr 0.01909
Train [39][2560/3239]	Time 0.233 (0.578)	Data Time 0.001 (0.014)	Loss 2.8704 (2.8816)	Entropy 1.29476 (1.30041)	Top-1 acc 53.125 (54.884)	Top-5 acc 79.297 (77.089)	lr 0.01909
Train [39][2570/3239]	Time 0.218 (0.577)	Data Time 0.001 (0.014)	Loss 3.1453 (2.8819)	Entropy 1.29463 (1.30039)	Top-1 acc 50.391 (54.878)	Top-5 acc 71.875 (77.085)	lr 0.01909
Train [39][2580/3239]	Time 0.246 (0.577)	Data Time 0.001 (0.014)	Loss 3.2097 (2.8819)	Entropy 1.29457 (1.30036)	Top-1 acc 47.656 (54.883)	Top-5 acc 69.531 (77.083)	lr 0.01909
Train [39][2590/3239]	Time 0.328 (0.576)	Data Time 0.001 (0.014)	Loss 2.9087 (2.8819)	Entropy 1.29453 (1.30034)	Top-1 acc 58.203 (54.887)	Top-5 acc 72.266 (77.082)	lr 0.01909
Train [39][2600/3239]	Time 0.238 (0.576)	Data Time 0.001 (0.014)	Loss 2.9036 (2.8821)	Entropy 1.29444 (1.30032)	Top-1 acc 54.688 (54.881)	Top-5 acc 77.344 (77.078)	lr 0.01909
Train [39][2610/3239]	Time 0.223 (0.575)	Data Time 0.001 (0.014)	Loss 3.0192 (2.8822)	Entropy 1.29437 (1.30030)	Top-1 acc 52.344 (54.880)	Top-5 acc 73.828 (77.078)	lr 0.01909
Train [39][2620/3239]	Time 0.216 (0.575)	Data Time 0.001 (0.014)	Loss 2.8642 (2.8824)	Entropy 1.29431 (1.30027)	Top-1 acc 55.078 (54.875)	Top-5 acc 78.125 (77.076)	lr 0.01908
Train [39][2630/3239]	Time 0.251 (0.575)	Data Time 0.001 (0.014)	Loss 2.9627 (2.8825)	Entropy 1.29421 (1.30025)	Top-1 acc 51.953 (54.869)	Top-5 acc 76.172 (77.071)	lr 0.01908
Train [39][2640/3239]	Time 0.218 (0.574)	Data Time 0.001 (0.014)	Loss 2.8358 (2.8825)	Entropy 1.29409 (1.30023)	Top-1 acc 56.250 (54.874)	Top-5 acc 78.125 (77.069)	lr 0.01908
Train [39][2650/3239]	Time 0.230 (0.574)	Data Time 0.001 (0.014)	Loss 2.9836 (2.8824)	Entropy 1.29406 (1.30020)	Top-1 acc 51.953 (54.875)	Top-5 acc 75.000 (77.072)	lr 0.01908
Train [39][2660/3239]	Time 0.233 (0.573)	Data Time 0.001 (0.014)	Loss 2.9075 (2.8823)	Entropy 1.29402 (1.30018)	Top-1 acc 56.250 (54.877)	Top-5 acc 74.219 (77.073)	lr 0.01908
Train [39][2670/3239]	Time 0.229 (0.573)	Data Time 0.001 (0.014)	Loss 2.8528 (2.8821)	Entropy 1.29395 (1.30016)	Top-1 acc 52.344 (54.881)	Top-5 acc 76.562 (77.078)	lr 0.01908
Train [39][2680/3239]	Time 0.222 (0.572)	Data Time 0.001 (0.014)	Loss 2.9676 (2.8821)	Entropy 1.29393 (1.30013)	Top-1 acc 57.031 (54.883)	Top-5 acc 78.516 (77.077)	lr 0.01908
Train [39][2690/3239]	Time 0.309 (0.572)	Data Time 0.001 (0.013)	Loss 2.7365 (2.8822)	Entropy 1.29393 (1.30011)	Top-1 acc 59.375 (54.880)	Top-5 acc 78.906 (77.077)	lr 0.01908
Train [39][2700/3239]	Time 0.214 (0.571)	Data Time 0.001 (0.013)	Loss 2.9650 (2.8823)	Entropy 1.29390 (1.30009)	Top-1 acc 55.469 (54.881)	Top-5 acc 75.781 (77.073)	lr 0.01908
Train [39][2710/3239]	Time 0.226 (0.571)	Data Time 0.001 (0.013)	Loss 2.7277 (2.8822)	Entropy 1.29383 (1.30007)	Top-1 acc 58.594 (54.882)	Top-5 acc 80.469 (77.075)	lr 0.01908
Train [39][2720/3239]	Time 0.214 (0.571)	Data Time 0.001 (0.013)	Loss 3.0468 (2.8822)	Entropy 1.29378 (1.30004)	Top-1 acc 48.828 (54.881)	Top-5 acc 75.391 (77.075)	lr 0.01908
Train [39][2730/3239]	Time 0.218 (0.570)	Data Time 0.001 (0.013)	Loss 2.8872 (2.8823)	Entropy 1.29369 (1.30002)	Top-1 acc 53.906 (54.876)	Top-5 acc 75.391 (77.070)	lr 0.01908
Train [39][2740/3239]	Time 0.347 (0.570)	Data Time 0.001 (0.013)	Loss 2.8516 (2.8822)	Entropy 1.29371 (1.30000)	Top-1 acc 58.203 (54.879)	Top-5 acc 76.562 (77.073)	lr 0.01907
Train [39][2750/3239]	Time 0.214 (0.569)	Data Time 0.001 (0.013)	Loss 2.7557 (2.8824)	Entropy 1.29371 (1.29997)	Top-1 acc 58.594 (54.878)	Top-5 acc 80.859 (77.072)	lr 0.01907
Train [39][2760/3239]	Time 0.209 (0.569)	Data Time 0.001 (0.013)	Loss 2.9547 (2.8824)	Entropy 1.29369 (1.29995)	Top-1 acc 52.344 (54.877)	Top-5 acc 76.562 (77.069)	lr 0.01907
Train [39][2770/3239]	Time 0.286 (0.585)	Data Time 0.005 (0.013)	Loss 3.0548 (2.8824)	Entropy 1.29359 (1.29993)	Top-1 acc 49.609 (54.878)	Top-5 acc 72.266 (77.065)	lr 0.01907
Train [39][2780/3239]	Time 0.253 (0.585)	Data Time 0.002 (0.013)	Loss 2.8920 (2.8825)	Entropy 1.29357 (1.29991)	Top-1 acc 53.906 (54.878)	Top-5 acc 77.734 (77.062)	lr 0.01907
Train [39][2790/3239]	Time 0.410 (0.585)	Data Time 0.001 (0.013)	Loss 2.9329 (2.8825)	Entropy 1.29346 (1.29988)	Top-1 acc 53.516 (54.879)	Top-5 acc 76.953 (77.063)	lr 0.01907
Train [39][2800/3239]	Time 0.269 (0.584)	Data Time 0.001 (0.013)	Loss 2.9608 (2.8828)	Entropy 1.29339 (1.29986)	Top-1 acc 49.219 (54.875)	Top-5 acc 76.562 (77.061)	lr 0.01907
Train [39][2810/3239]	Time 0.272 (0.584)	Data Time 0.001 (0.013)	Loss 2.7924 (2.8828)	Entropy 1.29319 (1.29984)	Top-1 acc 56.250 (54.874)	Top-5 acc 80.859 (77.062)	lr 0.01907
Train [39][2820/3239]	Time 0.233 (0.583)	Data Time 0.001 (0.013)	Loss 2.8259 (2.8827)	Entropy 1.29314 (1.29981)	Top-1 acc 54.688 (54.875)	Top-5 acc 76.953 (77.062)	lr 0.01907
Train [39][2830/3239]	Time 0.262 (0.583)	Data Time 0.001 (0.013)	Loss 2.8501 (2.8829)	Entropy 1.29313 (1.29979)	Top-1 acc 55.469 (54.870)	Top-5 acc 78.906 (77.059)	lr 0.01907
Train [39][2840/3239]	Time 0.335 (0.583)	Data Time 0.001 (0.013)	Loss 2.7514 (2.8827)	Entropy 1.29302 (1.29977)	Top-1 acc 56.641 (54.873)	Top-5 acc 80.859 (77.065)	lr 0.01907
Train [39][2850/3239]	Time 0.226 (0.582)	Data Time 0.001 (0.013)	Loss 2.8559 (2.8826)	Entropy 1.29293 (1.29974)	Top-1 acc 55.078 (54.873)	Top-5 acc 77.344 (77.069)	lr 0.01906
Train [39][2860/3239]	Time 0.204 (0.582)	Data Time 0.001 (0.013)	Loss 2.8325 (2.8825)	Entropy 1.29288 (1.29972)	Top-1 acc 58.594 (54.874)	Top-5 acc 75.781 (77.069)	lr 0.01906
Train [39][2870/3239]	Time 0.245 (0.581)	Data Time 0.001 (0.013)	Loss 2.9757 (2.8825)	Entropy 1.29287 (1.29970)	Top-1 acc 55.859 (54.874)	Top-5 acc 74.219 (77.068)	lr 0.01906
Train [39][2880/3239]	Time 0.246 (0.581)	Data Time 0.001 (0.013)	Loss 2.6820 (2.8824)	Entropy 1.29285 (1.29967)	Top-1 acc 58.984 (54.874)	Top-5 acc 81.641 (77.072)	lr 0.01906
Train [39][2890/3239]	Time 0.349 (0.580)	Data Time 0.001 (0.013)	Loss 2.8954 (2.8825)	Entropy 1.29279 (1.29965)	Top-1 acc 55.469 (54.875)	Top-5 acc 76.172 (77.071)	lr 0.01906
Train [39][2900/3239]	Time 0.251 (0.580)	Data Time 0.001 (0.013)	Loss 2.8519 (2.8826)	Entropy 1.29273 (1.29962)	Top-1 acc 55.859 (54.871)	Top-5 acc 77.734 (77.069)	lr 0.01906
Train [39][2910/3239]	Time 0.229 (0.580)	Data Time 0.001 (0.013)	Loss 2.8658 (2.8826)	Entropy 1.29276 (1.29960)	Top-1 acc 57.031 (54.871)	Top-5 acc 78.516 (77.072)	lr 0.01906
Train [39][2920/3239]	Time 0.231 (0.579)	Data Time 0.001 (0.013)	Loss 2.9177 (2.8827)	Entropy 1.29281 (1.29958)	Top-1 acc 54.297 (54.865)	Top-5 acc 75.391 (77.070)	lr 0.01906
Train [39][2930/3239]	Time 0.235 (0.579)	Data Time 0.001 (0.013)	Loss 2.8381 (2.8828)	Entropy 1.29272 (1.29955)	Top-1 acc 52.734 (54.859)	Top-5 acc 79.297 (77.070)	lr 0.01906
Train [39][2940/3239]	Time 0.307 (0.578)	Data Time 0.001 (0.012)	Loss 2.9452 (2.8825)	Entropy 1.29258 (1.29953)	Top-1 acc 53.125 (54.864)	Top-5 acc 75.000 (77.074)	lr 0.01906
Train [39][2950/3239]	Time 0.288 (0.578)	Data Time 0.001 (0.012)	Loss 2.9306 (2.8825)	Entropy 1.29255 (1.29951)	Top-1 acc 51.172 (54.861)	Top-5 acc 76.562 (77.071)	lr 0.01906
Train [39][2960/3239]	Time 0.264 (0.578)	Data Time 0.001 (0.012)	Loss 2.7064 (2.8824)	Entropy 1.29250 (1.29948)	Top-1 acc 61.328 (54.860)	Top-5 acc 80.469 (77.075)	lr 0.01906
Train [39][2970/3239]	Time 0.243 (0.577)	Data Time 0.001 (0.012)	Loss 3.0175 (2.8827)	Entropy 1.29254 (1.29946)	Top-1 acc 52.734 (54.852)	Top-5 acc 75.781 (77.071)	lr 0.01905
Train [39][2980/3239]	Time 0.223 (0.577)	Data Time 0.003 (0.012)	Loss 3.0003 (2.8826)	Entropy 1.29250 (1.29944)	Top-1 acc 57.031 (54.854)	Top-5 acc 73.047 (77.069)	lr 0.01905
Train [39][2990/3239]	Time 0.287 (0.576)	Data Time 0.001 (0.012)	Loss 2.7801 (2.8829)	Entropy 1.29250 (1.29941)	Top-1 acc 57.812 (54.848)	Top-5 acc 77.734 (77.062)	lr 0.01905
Train [39][3000/3239]	Time 0.223 (0.576)	Data Time 0.001 (0.012)	Loss 2.9543 (2.8830)	Entropy 1.29249 (1.29939)	Top-1 acc 55.469 (54.848)	Top-5 acc 75.000 (77.062)	lr 0.01905
Train [39][3010/3239]	Time 0.260 (0.576)	Data Time 0.001 (0.012)	Loss 3.0602 (2.8829)	Entropy 1.29238 (1.29937)	Top-1 acc 49.609 (54.851)	Top-5 acc 73.438 (77.067)	lr 0.01905
Train [39][3020/3239]	Time 0.256 (0.575)	Data Time 0.001 (0.012)	Loss 3.0336 (2.8827)	Entropy 1.29229 (1.29934)	Top-1 acc 50.391 (54.849)	Top-5 acc 73.438 (77.070)	lr 0.01905
Train [39][3030/3239]	Time 0.253 (0.575)	Data Time 0.002 (0.012)	Loss 2.7547 (2.8826)	Entropy 1.29222 (1.29932)	Top-1 acc 57.812 (54.852)	Top-5 acc 77.344 (77.072)	lr 0.01905
Train [39][3040/3239]	Time 0.265 (0.574)	Data Time 0.001 (0.012)	Loss 2.6970 (2.8827)	Entropy 1.29212 (1.29930)	Top-1 acc 58.594 (54.850)	Top-5 acc 79.688 (77.070)	lr 0.01905
Train [39][3050/3239]	Time 0.222 (0.574)	Data Time 0.001 (0.012)	Loss 2.8866 (2.8829)	Entropy 1.29211 (1.29927)	Top-1 acc 54.297 (54.841)	Top-5 acc 75.781 (77.063)	lr 0.01905
Train [39][3060/3239]	Time 0.241 (0.574)	Data Time 0.001 (0.012)	Loss 2.7965 (2.8828)	Entropy 1.29206 (1.29925)	Top-1 acc 56.250 (54.845)	Top-5 acc 78.516 (77.065)	lr 0.01905
Train [39][3070/3239]	Time 0.222 (0.573)	Data Time 0.001 (0.012)	Loss 2.7567 (2.8827)	Entropy 1.29200 (1.29923)	Top-1 acc 58.594 (54.845)	Top-5 acc 79.688 (77.066)	lr 0.01905
Train [39][3080/3239]	Time 0.231 (0.573)	Data Time 0.001 (0.012)	Loss 2.7857 (2.8825)	Entropy 1.29200 (1.29920)	Top-1 acc 55.859 (54.849)	Top-5 acc 80.078 (77.071)	lr 0.01905
Train [39][3090/3239]	Time 0.243 (0.573)	Data Time 0.001 (0.012)	Loss 2.8158 (2.8826)	Entropy 1.29193 (1.29918)	Top-1 acc 54.688 (54.846)	Top-5 acc 78.906 (77.067)	lr 0.01904
Train [39][3100/3239]	Time 0.228 (0.588)	Data Time 0.003 (0.012)	Loss 3.0138 (2.8826)	Entropy 1.29190 (1.29916)	Top-1 acc 53.125 (54.847)	Top-5 acc 76.172 (77.070)	lr 0.01904
Train [39][3110/3239]	Time 0.239 (0.587)	Data Time 0.002 (0.012)	Loss 3.0112 (2.8827)	Entropy 1.29175 (1.29913)	Top-1 acc 50.391 (54.845)	Top-5 acc 77.734 (77.070)	lr 0.01904
Train [39][3120/3239]	Time 0.229 (0.587)	Data Time 0.001 (0.012)	Loss 2.9411 (2.8829)	Entropy 1.29166 (1.29911)	Top-1 acc 54.688 (54.845)	Top-5 acc 74.609 (77.066)	lr 0.01904
Train [39][3130/3239]	Time 0.257 (0.587)	Data Time 0.002 (0.012)	Loss 2.8896 (2.8830)	Entropy 1.29161 (1.29909)	Top-1 acc 55.078 (54.842)	Top-5 acc 75.000 (77.062)	lr 0.01904
Train [39][3140/3239]	Time 0.262 (0.586)	Data Time 0.001 (0.012)	Loss 2.8854 (2.8830)	Entropy 1.29156 (1.29906)	Top-1 acc 52.734 (54.839)	Top-5 acc 78.906 (77.060)	lr 0.01904
Train [39][3150/3239]	Time 0.223 (0.586)	Data Time 0.001 (0.012)	Loss 2.7351 (2.8829)	Entropy 1.29154 (1.29904)	Top-1 acc 57.812 (54.837)	Top-5 acc 81.250 (77.061)	lr 0.01904
Train [39][3160/3239]	Time 0.245 (0.585)	Data Time 0.001 (0.012)	Loss 2.8445 (2.8828)	Entropy 1.29152 (1.29902)	Top-1 acc 59.766 (54.842)	Top-5 acc 79.297 (77.062)	lr 0.01904
Train [39][3170/3239]	Time 0.217 (0.585)	Data Time 0.001 (0.012)	Loss 2.8090 (2.8829)	Entropy 1.29140 (1.29899)	Top-1 acc 59.375 (54.841)	Top-5 acc 76.562 (77.061)	lr 0.01904
Train [39][3180/3239]	Time 0.232 (0.584)	Data Time 0.000 (0.012)	Loss 2.8514 (2.8831)	Entropy 1.29131 (1.29897)	Top-1 acc 55.078 (54.836)	Top-5 acc 76.953 (77.057)	lr 0.01904
Train [39][3190/3239]	Time 0.184 (0.584)	Data Time 0.000 (0.012)	Loss 2.8104 (2.8830)	Entropy 1.29120 (1.29894)	Top-1 acc 51.953 (54.837)	Top-5 acc 76.172 (77.057)	lr 0.01904
Train [39][3200/3239]	Time 0.213 (0.584)	Data Time 0.000 (0.012)	Loss 3.0260 (2.8830)	Entropy 1.29121 (1.29892)	Top-1 acc 54.297 (54.840)	Top-5 acc 73.828 (77.057)	lr 0.01903
Train [39][3210/3239]	Time 0.238 (0.583)	Data Time 0.000 (0.012)	Loss 2.7898 (2.8829)	Entropy 1.29120 (1.29890)	Top-1 acc 56.641 (54.846)	Top-5 acc 76.953 (77.058)	lr 0.01903
Train [39][3220/3239]	Time 0.230 (0.583)	Data Time 0.000 (0.012)	Loss 2.8206 (2.8829)	Entropy 1.29105 (1.29887)	Top-1 acc 56.250 (54.844)	Top-5 acc 76.562 (77.059)	lr 0.01903
Train [39][3230/3239]	Time 0.234 (0.582)	Data Time 0.000 (0.012)	Loss 2.7320 (2.8828)	Entropy 1.29098 (1.29885)	Top-1 acc 55.469 (54.845)	Top-5 acc 78.906 (77.060)	lr 0.01903
Train [39][3239/3239]	Time 2.334 (0.582)	Data Time 0.000 (0.011)	Loss 3.0528 (2.8827)	Entropy 1.29098 (1.29883)	Top-1 acc 55.556 (54.849)	Top-5 acc 71.605 (77.064)	lr 0.01903
==========Valid [39/120]	loss 1.713	top-1 acc 61.576 (61.665)	top-5 acc 82.792	Train top-1 54.849	top-5 77.064	Entropy 1.29098	Latency-None: 0.000ms	Flops: 559.17M
Train [40][0/3239]	Time 33.660 (33.660)	Data Time 32.194 (32.194)	Loss 2.8313 (2.8313)	Entropy 1.29092 (1.29092)	Top-1 acc 55.469 (55.469)	Top-5 acc 79.297 (79.297)	lr 0.01903
Train [40][10/3239]	Time 2.714 (3.564)	Data Time 0.004 (2.929)	Loss 2.7543 (2.8677)	Entropy 1.29092 (1.29092)	Top-1 acc 59.766 (55.504)	Top-5 acc 78.906 (77.557)	lr 0.01903
Train [40][20/3239]	Time 0.214 (1.979)	Data Time 0.001 (1.535)	Loss 3.0397 (2.8448)	Entropy 1.29084 (1.29088)	Top-1 acc 53.906 (56.231)	Top-5 acc 73.047 (77.697)	lr 0.01903
Train [40][30/3239]	Time 0.246 (1.486)	Data Time 0.002 (1.040)	Loss 2.8491 (2.8482)	Entropy 1.29081 (1.29086)	Top-1 acc 57.031 (56.011)	Top-5 acc 78.125 (77.747)	lr 0.01903
Train [40][40/3239]	Time 0.229 (1.238)	Data Time 0.001 (0.787)	Loss 2.8061 (2.8329)	Entropy 1.29079 (1.29085)	Top-1 acc 55.859 (56.240)	Top-5 acc 78.906 (78.039)	lr 0.01903
Train [40][50/3239]	Time 0.231 (1.086)	Data Time 0.001 (0.633)	Loss 2.7083 (2.8363)	Entropy 1.29070 (1.29082)	Top-1 acc 58.594 (56.028)	Top-5 acc 77.734 (77.880)	lr 0.01903
Train [40][60/3239]	Time 0.323 (0.987)	Data Time 0.001 (0.529)	Loss 2.7851 (2.8378)	Entropy 1.29067 (1.29080)	Top-1 acc 57.812 (56.083)	Top-5 acc 78.906 (77.728)	lr 0.01903
Train [40][70/3239]	Time 0.223 (0.913)	Data Time 0.001 (0.455)	Loss 2.7590 (2.8351)	Entropy 1.29060 (1.29078)	Top-1 acc 55.078 (55.920)	Top-5 acc 80.469 (77.850)	lr 0.01903
Train [40][80/3239]	Time 0.244 (0.855)	Data Time 0.002 (0.399)	Loss 2.8402 (2.8417)	Entropy 1.29057 (1.29075)	Top-1 acc 54.688 (55.797)	Top-5 acc 77.734 (77.677)	lr 0.01902
Train [40][90/3239]	Time 0.217 (0.813)	Data Time 0.002 (0.355)	Loss 3.0601 (2.8422)	Entropy 1.29057 (1.29073)	Top-1 acc 51.172 (55.838)	Top-5 acc 72.266 (77.610)	lr 0.01902
Train [40][100/3239]	Time 0.204 (0.778)	Data Time 0.001 (0.320)	Loss 2.7175 (2.8451)	Entropy 1.29057 (1.29072)	Top-1 acc 52.734 (55.647)	Top-5 acc 82.422 (77.533)	lr 0.01902
Train [40][110/3239]	Time 0.279 (0.748)	Data Time 0.001 (0.292)	Loss 2.7279 (2.8441)	Entropy 1.29051 (1.29070)	Top-1 acc 58.984 (55.648)	Top-5 acc 78.125 (77.576)	lr 0.01902
Train [40][120/3239]	Time 2.637 (0.726)	Data Time 0.001 (0.268)	Loss 2.6868 (2.8466)	Entropy 1.29051 (1.29069)	Top-1 acc 57.812 (55.520)	Top-5 acc 83.594 (77.515)	lr 0.01902
Train [40][130/3239]	Time 0.191 (0.689)	Data Time 0.001 (0.247)	Loss 2.7963 (2.8480)	Entropy 1.29041 (1.29067)	Top-1 acc 58.594 (55.576)	Top-5 acc 80.859 (77.523)	lr 0.01902
Train [40][140/3239]	Time 0.238 (0.672)	Data Time 0.001 (0.230)	Loss 2.9539 (2.8521)	Entropy 1.29035 (1.29064)	Top-1 acc 50.781 (55.411)	Top-5 acc 73.828 (77.474)	lr 0.01902
Train [40][150/3239]	Time 0.227 (0.658)	Data Time 0.001 (0.215)	Loss 2.6996 (2.8520)	Entropy 1.29026 (1.29062)	Top-1 acc 56.641 (55.433)	Top-5 acc 82.812 (77.502)	lr 0.01902
Train [40][160/3239]	Time 0.205 (0.645)	Data Time 0.001 (0.202)	Loss 2.7598 (2.8526)	Entropy 1.29019 (1.29059)	Top-1 acc 55.078 (55.350)	Top-5 acc 79.297 (77.472)	lr 0.01902
Train [40][170/3239]	Time 0.294 (0.634)	Data Time 0.001 (0.190)	Loss 2.9025 (2.8524)	Entropy 1.29019 (1.29057)	Top-1 acc 52.734 (55.318)	Top-5 acc 77.734 (77.524)	lr 0.01902
Train [40][180/3239]	Time 0.202 (0.624)	Data Time 0.001 (0.180)	Loss 2.9961 (2.8527)	Entropy 1.29012 (1.29055)	Top-1 acc 50.781 (55.311)	Top-5 acc 74.219 (77.534)	lr 0.01902
Train [40][190/3239]	Time 0.211 (0.614)	Data Time 0.001 (0.170)	Loss 2.8681 (2.8514)	Entropy 1.29007 (1.29053)	Top-1 acc 57.422 (55.315)	Top-5 acc 76.562 (77.546)	lr 0.01901
Train [40][200/3239]	Time 0.195 (0.606)	Data Time 0.001 (0.162)	Loss 2.9347 (2.8522)	Entropy 1.29003 (1.29050)	Top-1 acc 54.688 (55.286)	Top-5 acc 73.828 (77.554)	lr 0.01901
Train [40][210/3239]	Time 0.253 (0.819)	Data Time 0.003 (0.154)	Loss 2.7456 (2.8519)	Entropy 1.28989 (1.29048)	Top-1 acc 59.766 (55.293)	Top-5 acc 78.906 (77.531)	lr 0.01901
Train [40][220/3239]	Time 0.356 (0.806)	Data Time 0.002 (0.147)	Loss 2.8488 (2.8535)	Entropy 1.28985 (1.29045)	Top-1 acc 53.516 (55.251)	Top-5 acc 75.391 (77.517)	lr 0.01901
Train [40][230/3239]	Time 2.536 (0.791)	Data Time 0.002 (0.141)	Loss 2.8134 (2.8536)	Entropy 1.28985 (1.29043)	Top-1 acc 49.219 (55.246)	Top-5 acc 79.688 (77.525)	lr 0.01901
Train [40][240/3239]	Time 0.219 (0.767)	Data Time 0.001 (0.135)	Loss 3.0294 (2.8548)	Entropy 1.28981 (1.29040)	Top-1 acc 54.297 (55.201)	Top-5 acc 75.781 (77.507)	lr 0.01901
Train [40][250/3239]	Time 0.246 (0.756)	Data Time 0.001 (0.130)	Loss 2.9081 (2.8548)	Entropy 1.28973 (1.29037)	Top-1 acc 53.906 (55.181)	Top-5 acc 75.000 (77.490)	lr 0.01901
Train [40][260/3239]	Time 0.232 (0.744)	Data Time 0.001 (0.125)	Loss 2.8008 (2.8533)	Entropy 1.28969 (1.29035)	Top-1 acc 53.516 (55.190)	Top-5 acc 77.734 (77.513)	lr 0.01901
Train [40][270/3239]	Time 0.328 (0.734)	Data Time 0.001 (0.121)	Loss 2.7879 (2.8527)	Entropy 1.28956 (1.29032)	Top-1 acc 53.906 (55.179)	Top-5 acc 80.469 (77.543)	lr 0.01901
Train [40][280/3239]	Time 0.208 (0.724)	Data Time 0.001 (0.116)	Loss 2.8906 (2.8542)	Entropy 1.28954 (1.29029)	Top-1 acc 57.812 (55.202)	Top-5 acc 80.469 (77.512)	lr 0.01901
Train [40][290/3239]	Time 0.228 (0.714)	Data Time 0.001 (0.112)	Loss 2.7989 (2.8540)	Entropy 1.28954 (1.29027)	Top-1 acc 58.594 (55.208)	Top-5 acc 78.516 (77.518)	lr 0.01901
Train [40][300/3239]	Time 0.238 (0.706)	Data Time 0.001 (0.109)	Loss 3.0812 (2.8579)	Entropy 1.28949 (1.29024)	Top-1 acc 52.344 (55.147)	Top-5 acc 71.875 (77.446)	lr 0.01901
Train [40][310/3239]	Time 0.207 (0.698)	Data Time 0.001 (0.105)	Loss 2.8420 (2.8580)	Entropy 1.28946 (1.29022)	Top-1 acc 55.859 (55.187)	Top-5 acc 77.734 (77.445)	lr 0.01900
Train [40][320/3239]	Time 0.210 (0.690)	Data Time 0.001 (0.102)	Loss 2.9929 (2.8584)	Entropy 1.28945 (1.29019)	Top-1 acc 53.906 (55.227)	Top-5 acc 75.781 (77.434)	lr 0.01900
Train [40][330/3239]	Time 0.339 (0.683)	Data Time 0.001 (0.099)	Loss 2.9689 (2.8578)	Entropy 1.28934 (1.29017)	Top-1 acc 51.172 (55.267)	Top-5 acc 76.172 (77.450)	lr 0.01900
Train [40][340/3239]	Time 2.403 (0.677)	Data Time 0.001 (0.096)	Loss 2.6549 (2.8565)	Entropy 1.28934 (1.29015)	Top-1 acc 58.984 (55.304)	Top-5 acc 80.078 (77.498)	lr 0.01900
Train [40][350/3239]	Time 0.203 (0.664)	Data Time 0.001 (0.093)	Loss 2.8156 (2.8548)	Entropy 1.28934 (1.29012)	Top-1 acc 53.906 (55.327)	Top-5 acc 78.906 (77.539)	lr 0.01900
Train [40][360/3239]	Time 0.236 (0.658)	Data Time 0.002 (0.091)	Loss 2.8593 (2.8543)	Entropy 1.28931 (1.29010)	Top-1 acc 57.422 (55.343)	Top-5 acc 78.125 (77.562)	lr 0.01900
Train [40][370/3239]	Time 0.226 (0.653)	Data Time 0.001 (0.088)	Loss 2.7583 (2.8536)	Entropy 1.28934 (1.29008)	Top-1 acc 57.031 (55.380)	Top-5 acc 77.734 (77.560)	lr 0.01900
Train [40][380/3239]	Time 0.239 (0.648)	Data Time 0.001 (0.086)	Loss 2.8481 (2.8560)	Entropy 1.28929 (1.29006)	Top-1 acc 54.688 (55.313)	Top-5 acc 77.734 (77.503)	lr 0.01900
Train [40][390/3239]	Time 0.231 (0.643)	Data Time 0.001 (0.084)	Loss 2.7207 (2.8555)	Entropy 1.28927 (1.29004)	Top-1 acc 60.156 (55.355)	Top-5 acc 77.734 (77.479)	lr 0.01900
Train [40][400/3239]	Time 0.241 (0.639)	Data Time 0.001 (0.082)	Loss 2.8948 (2.8552)	Entropy 1.28924 (1.29002)	Top-1 acc 53.516 (55.373)	Top-5 acc 78.516 (77.496)	lr 0.01900
Train [40][410/3239]	Time 0.224 (0.634)	Data Time 0.001 (0.080)	Loss 3.0020 (2.8551)	Entropy 1.28924 (1.29000)	Top-1 acc 49.609 (55.359)	Top-5 acc 72.656 (77.501)	lr 0.01900
Train [40][420/3239]	Time 0.210 (0.630)	Data Time 0.001 (0.078)	Loss 2.8080 (2.8556)	Entropy 1.28922 (1.28998)	Top-1 acc 57.812 (55.330)	Top-5 acc 78.906 (77.496)	lr 0.01900
Train [40][430/3239]	Time 0.219 (0.626)	Data Time 0.003 (0.076)	Loss 2.7804 (2.8544)	Entropy 1.28908 (1.28997)	Top-1 acc 58.203 (55.340)	Top-5 acc 79.297 (77.538)	lr 0.01899
Train [40][440/3239]	Time 0.233 (0.623)	Data Time 0.006 (0.075)	Loss 2.8331 (2.8552)	Entropy 1.28905 (1.28995)	Top-1 acc 59.375 (55.312)	Top-5 acc 75.781 (77.521)	lr 0.01899
Train [40][450/3239]	Time 2.432 (0.619)	Data Time 0.001 (0.073)	Loss 2.7108 (2.8556)	Entropy 1.28905 (1.28993)	Top-1 acc 57.031 (55.291)	Top-5 acc 81.250 (77.514)	lr 0.01899
Train [40][460/3239]	Time 0.232 (0.610)	Data Time 0.001 (0.072)	Loss 2.9078 (2.8566)	Entropy 1.28891 (1.28990)	Top-1 acc 55.078 (55.287)	Top-5 acc 78.125 (77.488)	lr 0.01899
Train [40][470/3239]	Time 0.243 (0.607)	Data Time 0.002 (0.070)	Loss 2.7392 (2.8562)	Entropy 1.28880 (1.28988)	Top-1 acc 57.812 (55.306)	Top-5 acc 78.906 (77.498)	lr 0.01899
Train [40][480/3239]	Time 0.324 (0.604)	Data Time 0.001 (0.069)	Loss 3.0254 (2.8562)	Entropy 1.28868 (1.28986)	Top-1 acc 46.875 (55.286)	Top-5 acc 73.828 (77.500)	lr 0.01899
Train [40][490/3239]	Time 0.213 (0.600)	Data Time 0.001 (0.067)	Loss 2.9068 (2.8564)	Entropy 1.28868 (1.28983)	Top-1 acc 53.125 (55.285)	Top-5 acc 75.000 (77.491)	lr 0.01899
Train [40][500/3239]	Time 0.251 (0.597)	Data Time 0.001 (0.066)	Loss 2.7002 (2.8563)	Entropy 1.28866 (1.28981)	Top-1 acc 59.766 (55.271)	Top-5 acc 81.641 (77.502)	lr 0.01899
Train [40][510/3239]	Time 0.199 (0.594)	Data Time 0.001 (0.065)	Loss 2.8877 (2.8560)	Entropy 1.28862 (1.28979)	Top-1 acc 53.125 (55.270)	Top-5 acc 77.734 (77.526)	lr 0.01899
Train [40][520/3239]	Time 0.216 (0.592)	Data Time 0.001 (0.063)	Loss 2.6733 (2.8554)	Entropy 1.28856 (1.28976)	Top-1 acc 61.719 (55.287)	Top-5 acc 81.250 (77.524)	lr 0.01899
Train [40][530/3239]	Time 0.322 (0.589)	Data Time 0.001 (0.062)	Loss 2.8050 (2.8546)	Entropy 1.28851 (1.28974)	Top-1 acc 56.250 (55.315)	Top-5 acc 79.688 (77.536)	lr 0.01899
Train [40][540/3239]	Time 0.243 (0.587)	Data Time 0.001 (0.061)	Loss 2.8133 (2.8546)	Entropy 1.28844 (1.28972)	Top-1 acc 55.469 (55.321)	Top-5 acc 79.688 (77.537)	lr 0.01898
Train [40][550/3239]	Time 0.243 (0.584)	Data Time 0.001 (0.060)	Loss 2.6276 (2.8536)	Entropy 1.28845 (1.28969)	Top-1 acc 61.719 (55.339)	Top-5 acc 83.203 (77.559)	lr 0.01898
Train [40][560/3239]	Time 2.456 (0.582)	Data Time 0.002 (0.059)	Loss 2.7915 (2.8527)	Entropy 1.28845 (1.28967)	Top-1 acc 57.031 (55.351)	Top-5 acc 79.297 (77.577)	lr 0.01898
Train [40][570/3239]	Time 0.252 (0.576)	Data Time 0.001 (0.058)	Loss 2.7917 (2.8527)	Entropy 1.28845 (1.28965)	Top-1 acc 58.594 (55.346)	Top-5 acc 77.734 (77.577)	lr 0.01898
Train [40][580/3239]	Time 0.540 (0.650)	Data Time 0.004 (0.057)	Loss 3.0707 (2.8536)	Entropy 1.28839 (1.28963)	Top-1 acc 51.562 (55.332)	Top-5 acc 73.438 (77.555)	lr 0.01898
Train [40][590/3239]	Time 0.221 (0.647)	Data Time 0.002 (0.056)	Loss 3.0041 (2.8541)	Entropy 1.28834 (1.28961)	Top-1 acc 54.688 (55.327)	Top-5 acc 71.094 (77.539)	lr 0.01898
Train [40][600/3239]	Time 0.245 (0.644)	Data Time 0.002 (0.055)	Loss 2.7344 (2.8536)	Entropy 1.28827 (1.28959)	Top-1 acc 60.547 (55.336)	Top-5 acc 81.641 (77.548)	lr 0.01898
Train [40][610/3239]	Time 0.219 (0.641)	Data Time 0.001 (0.054)	Loss 2.8381 (2.8543)	Entropy 1.28825 (1.28956)	Top-1 acc 55.078 (55.319)	Top-5 acc 77.734 (77.525)	lr 0.01898
Train [40][620/3239]	Time 0.213 (0.638)	Data Time 0.001 (0.054)	Loss 2.9090 (2.8546)	Entropy 1.28821 (1.28954)	Top-1 acc 53.516 (55.316)	Top-5 acc 76.172 (77.531)	lr 0.01898
Train [40][630/3239]	Time 0.217 (0.635)	Data Time 0.001 (0.053)	Loss 2.8423 (2.8535)	Entropy 1.28810 (1.28952)	Top-1 acc 55.859 (55.339)	Top-5 acc 75.000 (77.552)	lr 0.01898
Train [40][640/3239]	Time 0.212 (0.632)	Data Time 0.001 (0.052)	Loss 2.9901 (2.8543)	Entropy 1.28790 (1.28950)	Top-1 acc 54.297 (55.325)	Top-5 acc 75.000 (77.549)	lr 0.01898
Train [40][650/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.051)	Loss 2.8268 (2.8543)	Entropy 1.28776 (1.28947)	Top-1 acc 58.203 (55.327)	Top-5 acc 78.906 (77.539)	lr 0.01898
Train [40][660/3239]	Time 0.216 (0.627)	Data Time 0.001 (0.050)	Loss 2.8103 (2.8537)	Entropy 1.28771 (1.28945)	Top-1 acc 57.422 (55.341)	Top-5 acc 77.344 (77.548)	lr 0.01897
Train [40][670/3239]	Time 2.425 (0.624)	Data Time 0.001 (0.050)	Loss 2.9498 (2.8530)	Entropy 1.28771 (1.28942)	Top-1 acc 56.250 (55.353)	Top-5 acc 74.219 (77.577)	lr 0.01897
Train [40][680/3239]	Time 0.258 (0.618)	Data Time 0.002 (0.049)	Loss 2.8050 (2.8544)	Entropy 1.28763 (1.28939)	Top-1 acc 57.422 (55.332)	Top-5 acc 78.516 (77.550)	lr 0.01897
Train [40][690/3239]	Time 0.318 (0.616)	Data Time 0.001 (0.048)	Loss 2.6928 (2.8543)	Entropy 1.28759 (1.28937)	Top-1 acc 61.719 (55.334)	Top-5 acc 81.641 (77.552)	lr 0.01897
Train [40][700/3239]	Time 0.254 (0.614)	Data Time 0.002 (0.048)	Loss 2.6787 (2.8542)	Entropy 1.28752 (1.28934)	Top-1 acc 62.109 (55.353)	Top-5 acc 83.203 (77.555)	lr 0.01897
Train [40][710/3239]	Time 0.250 (0.612)	Data Time 0.001 (0.047)	Loss 2.9521 (2.8542)	Entropy 1.28744 (1.28932)	Top-1 acc 53.906 (55.358)	Top-5 acc 75.781 (77.564)	lr 0.01897
Train [40][720/3239]	Time 0.219 (0.609)	Data Time 0.001 (0.046)	Loss 2.7784 (2.8536)	Entropy 1.28741 (1.28929)	Top-1 acc 55.078 (55.352)	Top-5 acc 80.469 (77.586)	lr 0.01897
Train [40][730/3239]	Time 0.213 (0.607)	Data Time 0.001 (0.046)	Loss 2.8858 (2.8531)	Entropy 1.28726 (1.28926)	Top-1 acc 53.906 (55.363)	Top-5 acc 78.516 (77.602)	lr 0.01897
Train [40][740/3239]	Time 0.329 (0.605)	Data Time 0.001 (0.045)	Loss 2.7374 (2.8528)	Entropy 1.28710 (1.28923)	Top-1 acc 57.812 (55.356)	Top-5 acc 79.297 (77.616)	lr 0.01897
Train [40][750/3239]	Time 0.170 (0.603)	Data Time 0.001 (0.045)	Loss 2.7730 (2.8522)	Entropy 1.28710 (1.28921)	Top-1 acc 57.812 (55.386)	Top-5 acc 78.125 (77.624)	lr 0.01897
Train [40][760/3239]	Time 0.228 (0.601)	Data Time 0.001 (0.044)	Loss 2.9306 (2.8529)	Entropy 1.28705 (1.28918)	Top-1 acc 56.250 (55.377)	Top-5 acc 75.781 (77.613)	lr 0.01897
Train [40][770/3239]	Time 0.250 (0.599)	Data Time 0.001 (0.044)	Loss 2.8140 (2.8530)	Entropy 1.28701 (1.28915)	Top-1 acc 58.984 (55.387)	Top-5 acc 78.125 (77.612)	lr 0.01896
Train [40][780/3239]	Time 2.393 (0.598)	Data Time 0.001 (0.043)	Loss 2.7578 (2.8529)	Entropy 1.28701 (1.28912)	Top-1 acc 58.594 (55.393)	Top-5 acc 82.031 (77.609)	lr 0.01896
Train [40][790/3239]	Time 0.206 (0.593)	Data Time 0.001 (0.042)	Loss 3.0430 (2.8534)	Entropy 1.28694 (1.28910)	Top-1 acc 53.516 (55.392)	Top-5 acc 72.266 (77.592)	lr 0.01896
Train [40][800/3239]	Time 0.222 (0.591)	Data Time 0.001 (0.042)	Loss 2.7026 (2.8534)	Entropy 1.28691 (1.28907)	Top-1 acc 58.984 (55.383)	Top-5 acc 83.594 (77.600)	lr 0.01896
Train [40][810/3239]	Time 0.219 (0.589)	Data Time 0.001 (0.041)	Loss 2.7681 (2.8538)	Entropy 1.28696 (1.28904)	Top-1 acc 56.641 (55.366)	Top-5 acc 79.688 (77.600)	lr 0.01896
Train [40][820/3239]	Time 0.228 (0.588)	Data Time 0.010 (0.041)	Loss 2.8543 (2.8540)	Entropy 1.28692 (1.28902)	Top-1 acc 56.641 (55.361)	Top-5 acc 77.344 (77.595)	lr 0.01896
Train [40][830/3239]	Time 0.199 (0.586)	Data Time 0.001 (0.041)	Loss 2.9229 (2.8539)	Entropy 1.28688 (1.28899)	Top-1 acc 53.516 (55.360)	Top-5 acc 73.047 (77.595)	lr 0.01896
Train [40][840/3239]	Time 0.213 (0.584)	Data Time 0.001 (0.040)	Loss 2.5820 (2.8538)	Entropy 1.28683 (1.28897)	Top-1 acc 64.453 (55.370)	Top-5 acc 82.031 (77.594)	lr 0.01896
Train [40][850/3239]	Time 0.208 (0.583)	Data Time 0.001 (0.040)	Loss 2.8434 (2.8535)	Entropy 1.28676 (1.28894)	Top-1 acc 54.688 (55.384)	Top-5 acc 75.781 (77.597)	lr 0.01896
Train [40][860/3239]	Time 0.213 (0.581)	Data Time 0.001 (0.039)	Loss 2.7440 (2.8526)	Entropy 1.28659 (1.28891)	Top-1 acc 57.422 (55.412)	Top-5 acc 79.297 (77.607)	lr 0.01896
Train [40][870/3239]	Time 0.218 (0.580)	Data Time 0.001 (0.039)	Loss 2.9290 (2.8524)	Entropy 1.28659 (1.28889)	Top-1 acc 50.781 (55.411)	Top-5 acc 78.125 (77.616)	lr 0.01896
Train [40][880/3239]	Time 0.250 (0.578)	Data Time 0.001 (0.038)	Loss 2.8950 (2.8525)	Entropy 1.28653 (1.28886)	Top-1 acc 53.906 (55.420)	Top-5 acc 76.562 (77.617)	lr 0.01896
Train [40][890/3239]	Time 2.541 (0.577)	Data Time 0.001 (0.038)	Loss 2.7470 (2.8525)	Entropy 1.28653 (1.28884)	Top-1 acc 61.328 (55.427)	Top-5 acc 78.906 (77.611)	lr 0.01895
Train [40][900/3239]	Time 0.246 (0.573)	Data Time 0.001 (0.037)	Loss 2.8093 (2.8528)	Entropy 1.28650 (1.28881)	Top-1 acc 58.984 (55.418)	Top-5 acc 80.078 (77.608)	lr 0.01895
Train [40][910/3239]	Time 0.236 (0.572)	Data Time 0.001 (0.037)	Loss 3.6458 (2.8539)	Entropy 1.28641 (1.28878)	Top-1 acc 37.109 (55.395)	Top-5 acc 62.500 (77.591)	lr 0.01895
Train [40][920/3239]	Time 0.242 (0.571)	Data Time 0.001 (0.037)	Loss 3.0548 (2.8544)	Entropy 1.28637 (1.28876)	Top-1 acc 50.000 (55.385)	Top-5 acc 73.438 (77.583)	lr 0.01895
Train [40][930/3239]	Time 0.218 (0.569)	Data Time 0.002 (0.036)	Loss 3.0033 (2.8551)	Entropy 1.28636 (1.28873)	Top-1 acc 54.297 (55.375)	Top-5 acc 74.609 (77.564)	lr 0.01895
Train [40][940/3239]	Time 0.293 (0.615)	Data Time 0.003 (0.036)	Loss 2.8659 (2.8550)	Entropy 1.28627 (1.28871)	Top-1 acc 54.297 (55.374)	Top-5 acc 76.953 (77.566)	lr 0.01895
Train [40][950/3239]	Time 0.231 (0.613)	Data Time 0.002 (0.036)	Loss 2.7687 (2.8546)	Entropy 1.28622 (1.28868)	Top-1 acc 57.812 (55.373)	Top-5 acc 78.125 (77.571)	lr 0.01895
Train [40][960/3239]	Time 0.226 (0.612)	Data Time 0.001 (0.035)	Loss 2.7563 (2.8547)	Entropy 1.28614 (1.28865)	Top-1 acc 58.594 (55.370)	Top-5 acc 79.297 (77.569)	lr 0.01895
Train [40][970/3239]	Time 0.210 (0.610)	Data Time 0.001 (0.035)	Loss 2.7767 (2.8539)	Entropy 1.28609 (1.28863)	Top-1 acc 55.469 (55.390)	Top-5 acc 79.688 (77.581)	lr 0.01895
Train [40][980/3239]	Time 0.228 (0.608)	Data Time 0.001 (0.035)	Loss 2.7039 (2.8538)	Entropy 1.28599 (1.28860)	Top-1 acc 55.859 (55.387)	Top-5 acc 80.078 (77.585)	lr 0.01895
Train [40][990/3239]	Time 0.215 (0.607)	Data Time 0.002 (0.034)	Loss 3.0313 (2.8539)	Entropy 1.28591 (1.28857)	Top-1 acc 50.391 (55.391)	Top-5 acc 73.828 (77.589)	lr 0.01895
Train [40][1000/3239]	Time 2.658 (0.606)	Data Time 0.001 (0.034)	Loss 2.8850 (2.8544)	Entropy 1.28591 (1.28855)	Top-1 acc 54.297 (55.371)	Top-5 acc 78.125 (77.574)	lr 0.01894
Train [40][1010/3239]	Time 0.230 (0.602)	Data Time 0.002 (0.034)	Loss 2.9008 (2.8547)	Entropy 1.28588 (1.28852)	Top-1 acc 54.297 (55.361)	Top-5 acc 76.172 (77.569)	lr 0.01894
Train [40][1020/3239]	Time 0.214 (0.600)	Data Time 0.001 (0.033)	Loss 2.8399 (2.8544)	Entropy 1.28588 (1.28850)	Top-1 acc 55.469 (55.373)	Top-5 acc 80.078 (77.574)	lr 0.01894
Train [40][1030/3239]	Time 0.267 (0.599)	Data Time 0.001 (0.033)	Loss 2.7425 (2.8545)	Entropy 1.28588 (1.28847)	Top-1 acc 58.594 (55.380)	Top-5 acc 78.516 (77.570)	lr 0.01894
Train [40][1040/3239]	Time 0.229 (0.598)	Data Time 0.001 (0.033)	Loss 2.8355 (2.8546)	Entropy 1.28592 (1.28845)	Top-1 acc 55.859 (55.378)	Top-5 acc 76.953 (77.564)	lr 0.01894
Train [40][1050/3239]	Time 0.230 (0.596)	Data Time 0.001 (0.032)	Loss 2.7238 (2.8547)	Entropy 1.28590 (1.28842)	Top-1 acc 60.938 (55.376)	Top-5 acc 76.562 (77.566)	lr 0.01894
Train [40][1060/3239]	Time 0.217 (0.595)	Data Time 0.001 (0.032)	Loss 2.8773 (2.8550)	Entropy 1.28588 (1.28840)	Top-1 acc 53.516 (55.365)	Top-5 acc 76.562 (77.559)	lr 0.01894
Train [40][1070/3239]	Time 0.229 (0.594)	Data Time 0.001 (0.032)	Loss 3.0688 (2.8558)	Entropy 1.28583 (1.28837)	Top-1 acc 50.781 (55.336)	Top-5 acc 73.047 (77.548)	lr 0.01894
Train [40][1080/3239]	Time 0.223 (0.593)	Data Time 0.001 (0.032)	Loss 2.9988 (2.8560)	Entropy 1.28583 (1.28835)	Top-1 acc 50.391 (55.329)	Top-5 acc 76.562 (77.553)	lr 0.01894
Train [40][1090/3239]	Time 0.246 (0.591)	Data Time 0.002 (0.031)	Loss 2.9231 (2.8563)	Entropy 1.28579 (1.28833)	Top-1 acc 52.344 (55.320)	Top-5 acc 75.781 (77.541)	lr 0.01894
Train [40][1100/3239]	Time 0.240 (0.590)	Data Time 0.001 (0.031)	Loss 2.8023 (2.8567)	Entropy 1.28579 (1.28830)	Top-1 acc 56.250 (55.316)	Top-5 acc 78.125 (77.533)	lr 0.01894
Train [40][1110/3239]	Time 2.446 (0.589)	Data Time 0.001 (0.031)	Loss 2.9571 (2.8569)	Entropy 1.28579 (1.28828)	Top-1 acc 53.906 (55.303)	Top-5 acc 76.172 (77.527)	lr 0.01894
Train [40][1120/3239]	Time 0.228 (0.586)	Data Time 0.001 (0.030)	Loss 2.8971 (2.8577)	Entropy 1.28578 (1.28826)	Top-1 acc 53.125 (55.283)	Top-5 acc 79.297 (77.513)	lr 0.01893
Train [40][1130/3239]	Time 0.242 (0.585)	Data Time 0.001 (0.030)	Loss 2.9274 (2.8580)	Entropy 1.28574 (1.28824)	Top-1 acc 52.734 (55.278)	Top-5 acc 75.391 (77.509)	lr 0.01893
Train [40][1140/3239]	Time 0.235 (0.584)	Data Time 0.001 (0.030)	Loss 2.9160 (2.8582)	Entropy 1.28565 (1.28822)	Top-1 acc 54.688 (55.267)	Top-5 acc 77.344 (77.508)	lr 0.01893
Train [40][1150/3239]	Time 0.232 (0.583)	Data Time 0.001 (0.030)	Loss 3.0907 (2.8579)	Entropy 1.28562 (1.28819)	Top-1 acc 49.609 (55.272)	Top-5 acc 70.703 (77.507)	lr 0.01893
Train [40][1160/3239]	Time 0.238 (0.582)	Data Time 0.001 (0.029)	Loss 2.6239 (2.8575)	Entropy 1.28563 (1.28817)	Top-1 acc 60.156 (55.284)	Top-5 acc 85.156 (77.511)	lr 0.01893
Train [40][1170/3239]	Time 0.224 (0.581)	Data Time 0.001 (0.029)	Loss 2.9896 (2.8573)	Entropy 1.28559 (1.28815)	Top-1 acc 54.688 (55.295)	Top-5 acc 74.219 (77.517)	lr 0.01893
Train [40][1180/3239]	Time 0.203 (0.580)	Data Time 0.001 (0.029)	Loss 2.7236 (2.8570)	Entropy 1.28557 (1.28813)	Top-1 acc 58.203 (55.297)	Top-5 acc 81.250 (77.531)	lr 0.01893
Train [40][1190/3239]	Time 0.203 (0.579)	Data Time 0.001 (0.029)	Loss 3.1673 (2.8573)	Entropy 1.28554 (1.28811)	Top-1 acc 43.750 (55.288)	Top-5 acc 72.266 (77.530)	lr 0.01893
Train [40][1200/3239]	Time 0.228 (0.578)	Data Time 0.002 (0.029)	Loss 2.8993 (2.8576)	Entropy 1.28548 (1.28808)	Top-1 acc 55.469 (55.277)	Top-5 acc 74.609 (77.528)	lr 0.01893
Train [40][1210/3239]	Time 0.213 (0.577)	Data Time 0.001 (0.028)	Loss 2.8514 (2.8577)	Entropy 1.28544 (1.28806)	Top-1 acc 57.031 (55.271)	Top-5 acc 77.344 (77.522)	lr 0.01893
Train [40][1220/3239]	Time 2.399 (0.576)	Data Time 0.001 (0.028)	Loss 2.8442 (2.8581)	Entropy 1.28544 (1.28804)	Top-1 acc 54.297 (55.257)	Top-5 acc 79.688 (77.517)	lr 0.01893
Train [40][1230/3239]	Time 0.172 (0.573)	Data Time 0.001 (0.028)	Loss 2.6986 (2.8582)	Entropy 1.28543 (1.28802)	Top-1 acc 57.422 (55.247)	Top-5 acc 81.641 (77.517)	lr 0.01892
Train [40][1240/3239]	Time 0.216 (0.572)	Data Time 0.001 (0.028)	Loss 2.8409 (2.8585)	Entropy 1.28538 (1.28800)	Top-1 acc 55.469 (55.247)	Top-5 acc 76.562 (77.505)	lr 0.01892
Train [40][1250/3239]	Time 0.219 (0.571)	Data Time 0.001 (0.028)	Loss 2.9450 (2.8583)	Entropy 1.28532 (1.28798)	Top-1 acc 55.859 (55.251)	Top-5 acc 78.125 (77.510)	lr 0.01892
Train [40][1260/3239]	Time 0.222 (0.570)	Data Time 0.001 (0.027)	Loss 2.8466 (2.8584)	Entropy 1.28529 (1.28796)	Top-1 acc 57.422 (55.248)	Top-5 acc 77.734 (77.509)	lr 0.01892
Train [40][1270/3239]	Time 0.290 (0.569)	Data Time 0.001 (0.027)	Loss 2.8850 (2.8588)	Entropy 1.28524 (1.28793)	Top-1 acc 56.641 (55.237)	Top-5 acc 77.344 (77.503)	lr 0.01892
Train [40][1280/3239]	Time 0.223 (0.568)	Data Time 0.001 (0.027)	Loss 2.9189 (2.8590)	Entropy 1.28520 (1.28791)	Top-1 acc 50.391 (55.234)	Top-5 acc 74.609 (77.497)	lr 0.01892
Train [40][1290/3239]	Time 0.258 (0.567)	Data Time 0.002 (0.027)	Loss 2.8196 (2.8587)	Entropy 1.28508 (1.28789)	Top-1 acc 55.078 (55.239)	Top-5 acc 76.562 (77.502)	lr 0.01892
Train [40][1300/3239]	Time 0.230 (0.603)	Data Time 0.002 (0.027)	Loss 3.0389 (2.8587)	Entropy 1.28509 (1.28787)	Top-1 acc 49.219 (55.243)	Top-5 acc 73.047 (77.508)	lr 0.01892
Train [40][1310/3239]	Time 0.211 (0.602)	Data Time 0.002 (0.026)	Loss 2.9949 (2.8589)	Entropy 1.28505 (1.28785)	Top-1 acc 51.953 (55.245)	Top-5 acc 73.438 (77.503)	lr 0.01892
Train [40][1320/3239]	Time 0.298 (0.601)	Data Time 0.001 (0.026)	Loss 2.8530 (2.8590)	Entropy 1.28498 (1.28783)	Top-1 acc 51.562 (55.240)	Top-5 acc 77.734 (77.503)	lr 0.01892
Train [40][1330/3239]	Time 2.524 (0.600)	Data Time 0.001 (0.026)	Loss 3.0114 (2.8591)	Entropy 1.28498 (1.28781)	Top-1 acc 51.953 (55.240)	Top-5 acc 73.828 (77.505)	lr 0.01892
Train [40][1340/3239]	Time 0.215 (0.597)	Data Time 0.002 (0.026)	Loss 2.8675 (2.8590)	Entropy 1.28497 (1.28779)	Top-1 acc 51.953 (55.238)	Top-5 acc 79.297 (77.508)	lr 0.01892
Train [40][1350/3239]	Time 0.238 (0.597)	Data Time 0.001 (0.026)	Loss 3.0020 (2.8588)	Entropy 1.28497 (1.28776)	Top-1 acc 54.297 (55.252)	Top-5 acc 74.219 (77.508)	lr 0.01891
Train [40][1360/3239]	Time 0.232 (0.596)	Data Time 0.002 (0.025)	Loss 2.8850 (2.8594)	Entropy 1.28486 (1.28774)	Top-1 acc 54.688 (55.245)	Top-5 acc 78.125 (77.499)	lr 0.01891
Train [40][1370/3239]	Time 0.239 (0.595)	Data Time 0.001 (0.025)	Loss 2.8000 (2.8594)	Entropy 1.28481 (1.28772)	Top-1 acc 57.812 (55.243)	Top-5 acc 81.641 (77.504)	lr 0.01891
Train [40][1380/3239]	Time 0.206 (0.594)	Data Time 0.001 (0.025)	Loss 2.8819 (2.8593)	Entropy 1.28480 (1.28770)	Top-1 acc 52.734 (55.243)	Top-5 acc 79.297 (77.513)	lr 0.01891
Train [40][1390/3239]	Time 0.230 (0.593)	Data Time 0.001 (0.025)	Loss 2.8715 (2.8589)	Entropy 1.28479 (1.28768)	Top-1 acc 55.859 (55.252)	Top-5 acc 78.516 (77.518)	lr 0.01891
Train [40][1400/3239]	Time 0.222 (0.592)	Data Time 0.001 (0.025)	Loss 2.8455 (2.8594)	Entropy 1.28466 (1.28766)	Top-1 acc 57.031 (55.242)	Top-5 acc 76.562 (77.508)	lr 0.01891
Train [40][1410/3239]	Time 0.225 (0.591)	Data Time 0.001 (0.025)	Loss 2.7372 (2.8593)	Entropy 1.28465 (1.28764)	Top-1 acc 59.766 (55.245)	Top-5 acc 79.297 (77.508)	lr 0.01891
Train [40][1420/3239]	Time 0.303 (0.590)	Data Time 0.001 (0.024)	Loss 2.7861 (2.8591)	Entropy 1.28466 (1.28762)	Top-1 acc 54.297 (55.244)	Top-5 acc 78.516 (77.511)	lr 0.01891
Train [40][1430/3239]	Time 0.224 (0.589)	Data Time 0.001 (0.024)	Loss 2.7786 (2.8590)	Entropy 1.28469 (1.28760)	Top-1 acc 55.859 (55.248)	Top-5 acc 79.688 (77.513)	lr 0.01891
Train [40][1440/3239]	Time 2.524 (0.588)	Data Time 0.001 (0.024)	Loss 2.7870 (2.8589)	Entropy 1.28469 (1.28758)	Top-1 acc 54.297 (55.253)	Top-5 acc 78.125 (77.514)	lr 0.01891
Train [40][1450/3239]	Time 0.277 (0.586)	Data Time 0.002 (0.024)	Loss 2.8476 (2.8585)	Entropy 1.28467 (1.28756)	Top-1 acc 56.641 (55.267)	Top-5 acc 78.906 (77.522)	lr 0.01891
Train [40][1460/3239]	Time 0.232 (0.585)	Data Time 0.001 (0.024)	Loss 2.6734 (2.8583)	Entropy 1.28463 (1.28754)	Top-1 acc 57.031 (55.275)	Top-5 acc 82.422 (77.528)	lr 0.01891
Train [40][1470/3239]	Time 0.315 (0.584)	Data Time 0.001 (0.024)	Loss 2.9948 (2.8591)	Entropy 1.28461 (1.28752)	Top-1 acc 55.469 (55.257)	Top-5 acc 75.391 (77.514)	lr 0.01890
Train [40][1480/3239]	Time 0.228 (0.584)	Data Time 0.002 (0.024)	Loss 2.8360 (2.8592)	Entropy 1.28458 (1.28750)	Top-1 acc 55.859 (55.248)	Top-5 acc 77.734 (77.510)	lr 0.01890
Train [40][1490/3239]	Time 0.214 (0.583)	Data Time 0.001 (0.023)	Loss 2.8600 (2.8593)	Entropy 1.28457 (1.28748)	Top-1 acc 58.594 (55.254)	Top-5 acc 75.391 (77.508)	lr 0.01890
Train [40][1500/3239]	Time 0.226 (0.582)	Data Time 0.001 (0.023)	Loss 2.7433 (2.8596)	Entropy 1.28449 (1.28746)	Top-1 acc 57.031 (55.243)	Top-5 acc 80.469 (77.504)	lr 0.01890
Train [40][1510/3239]	Time 0.248 (0.581)	Data Time 0.001 (0.023)	Loss 2.6715 (2.8597)	Entropy 1.28449 (1.28744)	Top-1 acc 58.203 (55.240)	Top-5 acc 82.812 (77.508)	lr 0.01890
Train [40][1520/3239]	Time 0.200 (0.580)	Data Time 0.001 (0.023)	Loss 2.8518 (2.8596)	Entropy 1.28436 (1.28742)	Top-1 acc 56.641 (55.245)	Top-5 acc 78.516 (77.510)	lr 0.01890
Train [40][1530/3239]	Time 0.223 (0.579)	Data Time 0.001 (0.023)	Loss 2.7640 (2.8598)	Entropy 1.28438 (1.28740)	Top-1 acc 58.203 (55.235)	Top-5 acc 77.344 (77.512)	lr 0.01890
Train [40][1540/3239]	Time 0.291 (0.578)	Data Time 0.001 (0.023)	Loss 3.0501 (2.8603)	Entropy 1.28429 (1.28738)	Top-1 acc 54.297 (55.223)	Top-5 acc 74.609 (77.500)	lr 0.01890
Train [40][1550/3239]	Time 2.435 (0.578)	Data Time 0.001 (0.023)	Loss 2.7542 (2.8604)	Entropy 1.28429 (1.28736)	Top-1 acc 57.812 (55.212)	Top-5 acc 81.250 (77.498)	lr 0.01890
Train [40][1560/3239]	Time 0.226 (0.575)	Data Time 0.001 (0.022)	Loss 2.9046 (2.8609)	Entropy 1.28429 (1.28734)	Top-1 acc 53.516 (55.201)	Top-5 acc 74.219 (77.487)	lr 0.01890
Train [40][1570/3239]	Time 0.334 (0.575)	Data Time 0.001 (0.022)	Loss 2.8658 (2.8609)	Entropy 1.28412 (1.28732)	Top-1 acc 54.688 (55.201)	Top-5 acc 75.781 (77.486)	lr 0.01890
Train [40][1580/3239]	Time 0.194 (0.574)	Data Time 0.001 (0.022)	Loss 2.8599 (2.8608)	Entropy 1.28414 (1.28730)	Top-1 acc 56.641 (55.204)	Top-5 acc 78.516 (77.490)	lr 0.01889
Train [40][1590/3239]	Time 0.215 (0.573)	Data Time 0.001 (0.022)	Loss 2.8410 (2.8607)	Entropy 1.28394 (1.28728)	Top-1 acc 56.641 (55.211)	Top-5 acc 79.297 (77.493)	lr 0.01889
Train [40][1600/3239]	Time 0.258 (0.572)	Data Time 0.001 (0.022)	Loss 3.1211 (2.8610)	Entropy 1.28393 (1.28726)	Top-1 acc 49.609 (55.203)	Top-5 acc 72.266 (77.489)	lr 0.01889
Train [40][1610/3239]	Time 0.208 (0.572)	Data Time 0.001 (0.022)	Loss 2.8769 (2.8612)	Entropy 1.28386 (1.28724)	Top-1 acc 52.344 (55.198)	Top-5 acc 78.516 (77.486)	lr 0.01889
Train [40][1620/3239]	Time 0.309 (0.571)	Data Time 0.001 (0.022)	Loss 2.9011 (2.8616)	Entropy 1.28375 (1.28721)	Top-1 acc 54.297 (55.192)	Top-5 acc 75.000 (77.481)	lr 0.01889
Train [40][1630/3239]	Time 0.231 (0.570)	Data Time 0.001 (0.022)	Loss 2.8263 (2.8619)	Entropy 1.28371 (1.28719)	Top-1 acc 57.812 (55.187)	Top-5 acc 77.734 (77.477)	lr 0.01889
Train [40][1640/3239]	Time 0.208 (0.570)	Data Time 0.001 (0.021)	Loss 2.8143 (2.8620)	Entropy 1.28368 (1.28717)	Top-1 acc 57.031 (55.187)	Top-5 acc 78.906 (77.476)	lr 0.01889
Train [40][1650/3239]	Time 0.219 (0.569)	Data Time 0.001 (0.021)	Loss 2.8247 (2.8619)	Entropy 1.28348 (1.28715)	Top-1 acc 55.469 (55.189)	Top-5 acc 78.906 (77.477)	lr 0.01889
Train [40][1660/3239]	Time 44.131 (0.593)	Data Time 0.001 (0.021)	Loss 2.9863 (2.8618)	Entropy 1.28348 (1.28713)	Top-1 acc 50.781 (55.190)	Top-5 acc 74.609 (77.478)	lr 0.01889
Train [40][1670/3239]	Time 0.596 (0.592)	Data Time 0.002 (0.021)	Loss 2.9245 (2.8618)	Entropy 1.28348 (1.28711)	Top-1 acc 53.125 (55.190)	Top-5 acc 75.000 (77.482)	lr 0.01889
Train [40][1680/3239]	Time 0.211 (0.591)	Data Time 0.002 (0.021)	Loss 2.9196 (2.8614)	Entropy 1.28343 (1.28709)	Top-1 acc 54.688 (55.201)	Top-5 acc 77.734 (77.489)	lr 0.01889
Train [40][1690/3239]	Time 0.230 (0.590)	Data Time 0.001 (0.021)	Loss 3.0636 (2.8616)	Entropy 1.28338 (1.28706)	Top-1 acc 50.000 (55.197)	Top-5 acc 72.656 (77.481)	lr 0.01889
Train [40][1700/3239]	Time 0.231 (0.590)	Data Time 0.001 (0.021)	Loss 2.8228 (2.8615)	Entropy 1.28326 (1.28704)	Top-1 acc 55.859 (55.198)	Top-5 acc 76.953 (77.483)	lr 0.01888
Train [40][1710/3239]	Time 0.239 (0.589)	Data Time 0.001 (0.021)	Loss 2.9972 (2.8617)	Entropy 1.28315 (1.28702)	Top-1 acc 54.688 (55.197)	Top-5 acc 75.781 (77.482)	lr 0.01888
Train [40][1720/3239]	Time 0.226 (0.588)	Data Time 0.001 (0.020)	Loss 2.8221 (2.8620)	Entropy 1.28311 (1.28700)	Top-1 acc 58.203 (55.186)	Top-5 acc 79.297 (77.477)	lr 0.01888
Train [40][1730/3239]	Time 0.311 (0.587)	Data Time 0.001 (0.020)	Loss 2.6987 (2.8618)	Entropy 1.28305 (1.28697)	Top-1 acc 60.938 (55.190)	Top-5 acc 80.078 (77.483)	lr 0.01888
Train [40][1740/3239]	Time 0.225 (0.587)	Data Time 0.002 (0.020)	Loss 2.8720 (2.8623)	Entropy 1.28294 (1.28695)	Top-1 acc 57.422 (55.176)	Top-5 acc 74.219 (77.473)	lr 0.01888
Train [40][1750/3239]	Time 0.234 (0.586)	Data Time 0.003 (0.020)	Loss 2.8612 (2.8625)	Entropy 1.28289 (1.28693)	Top-1 acc 58.203 (55.170)	Top-5 acc 80.469 (77.474)	lr 0.01888
Train [40][1760/3239]	Time 0.243 (0.585)	Data Time 0.002 (0.020)	Loss 2.9175 (2.8628)	Entropy 1.28289 (1.28690)	Top-1 acc 53.516 (55.166)	Top-5 acc 76.172 (77.468)	lr 0.01888
Train [40][1770/3239]	Time 2.355 (0.585)	Data Time 0.001 (0.020)	Loss 2.8144 (2.8628)	Entropy 1.28289 (1.28688)	Top-1 acc 57.812 (55.168)	Top-5 acc 75.781 (77.469)	lr 0.01888
Train [40][1780/3239]	Time 0.381 (0.583)	Data Time 0.001 (0.020)	Loss 2.8992 (2.8630)	Entropy 1.28287 (1.28686)	Top-1 acc 53.516 (55.168)	Top-5 acc 76.953 (77.467)	lr 0.01888
Train [40][1790/3239]	Time 0.220 (0.582)	Data Time 0.002 (0.020)	Loss 2.7822 (2.8628)	Entropy 1.28285 (1.28684)	Top-1 acc 59.766 (55.175)	Top-5 acc 78.125 (77.475)	lr 0.01888
Train [40][1800/3239]	Time 0.209 (0.581)	Data Time 0.001 (0.020)	Loss 3.0767 (2.8632)	Entropy 1.28276 (1.28681)	Top-1 acc 50.000 (55.168)	Top-5 acc 73.047 (77.466)	lr 0.01888
Train [40][1810/3239]	Time 0.218 (0.581)	Data Time 0.001 (0.020)	Loss 2.9396 (2.8633)	Entropy 1.28264 (1.28679)	Top-1 acc 53.516 (55.169)	Top-5 acc 76.562 (77.464)	lr 0.01887
Train [40][1820/3239]	Time 0.230 (0.580)	Data Time 0.002 (0.019)	Loss 2.8178 (2.8631)	Entropy 1.28253 (1.28677)	Top-1 acc 57.031 (55.176)	Top-5 acc 76.172 (77.464)	lr 0.01887
Train [40][1830/3239]	Time 0.247 (0.579)	Data Time 0.002 (0.019)	Loss 3.0922 (2.8629)	Entropy 1.28249 (1.28675)	Top-1 acc 51.562 (55.179)	Top-5 acc 73.438 (77.473)	lr 0.01887
Train [40][1840/3239]	Time 0.218 (0.578)	Data Time 0.001 (0.019)	Loss 2.8094 (2.8629)	Entropy 1.28240 (1.28672)	Top-1 acc 54.297 (55.170)	Top-5 acc 76.562 (77.477)	lr 0.01887
Train [40][1850/3239]	Time 0.202 (0.578)	Data Time 0.001 (0.019)	Loss 2.8510 (2.8632)	Entropy 1.28238 (1.28670)	Top-1 acc 58.203 (55.165)	Top-5 acc 76.562 (77.469)	lr 0.01887
Train [40][1860/3239]	Time 0.228 (0.577)	Data Time 0.001 (0.019)	Loss 2.8255 (2.8632)	Entropy 1.28231 (1.28668)	Top-1 acc 54.297 (55.169)	Top-5 acc 81.250 (77.470)	lr 0.01887
Train [40][1870/3239]	Time 0.240 (0.576)	Data Time 0.001 (0.019)	Loss 3.0014 (2.8631)	Entropy 1.28229 (1.28665)	Top-1 acc 53.516 (55.169)	Top-5 acc 76.953 (77.472)	lr 0.01887
Train [40][1880/3239]	Time 2.467 (0.576)	Data Time 0.002 (0.019)	Loss 2.8288 (2.8634)	Entropy 1.28229 (1.28663)	Top-1 acc 59.766 (55.169)	Top-5 acc 79.297 (77.465)	lr 0.01887
Train [40][1890/3239]	Time 0.223 (0.574)	Data Time 0.001 (0.019)	Loss 3.0137 (2.8632)	Entropy 1.28215 (1.28661)	Top-1 acc 53.906 (55.174)	Top-5 acc 73.828 (77.466)	lr 0.01887
Train [40][1900/3239]	Time 0.267 (0.573)	Data Time 0.001 (0.019)	Loss 3.0444 (2.8630)	Entropy 1.28213 (1.28658)	Top-1 acc 49.609 (55.177)	Top-5 acc 74.609 (77.470)	lr 0.01887
Train [40][1910/3239]	Time 0.229 (0.573)	Data Time 0.001 (0.019)	Loss 2.9335 (2.8629)	Entropy 1.28212 (1.28656)	Top-1 acc 51.953 (55.177)	Top-5 acc 73.828 (77.468)	lr 0.01887
Train [40][1920/3239]	Time 0.225 (0.572)	Data Time 0.001 (0.019)	Loss 2.9267 (2.8629)	Entropy 1.28196 (1.28653)	Top-1 acc 54.297 (55.173)	Top-5 acc 75.391 (77.470)	lr 0.01887
Train [40][1930/3239]	Time 0.209 (0.571)	Data Time 0.001 (0.018)	Loss 3.0038 (2.8631)	Entropy 1.28193 (1.28651)	Top-1 acc 50.781 (55.169)	Top-5 acc 74.219 (77.466)	lr 0.01886
Train [40][1940/3239]	Time 0.225 (0.571)	Data Time 0.001 (0.018)	Loss 2.8201 (2.8627)	Entropy 1.28187 (1.28649)	Top-1 acc 57.031 (55.178)	Top-5 acc 78.906 (77.470)	lr 0.01886
Train [40][1950/3239]	Time 0.172 (0.570)	Data Time 0.001 (0.018)	Loss 2.7856 (2.8627)	Entropy 1.28186 (1.28646)	Top-1 acc 57.812 (55.179)	Top-5 acc 78.125 (77.467)	lr 0.01886
Train [40][1960/3239]	Time 0.239 (0.570)	Data Time 0.001 (0.018)	Loss 2.7896 (2.8628)	Entropy 1.28168 (1.28644)	Top-1 acc 60.156 (55.180)	Top-5 acc 77.734 (77.464)	lr 0.01886
Train [40][1970/3239]	Time 0.221 (0.569)	Data Time 0.001 (0.018)	Loss 3.0691 (2.8628)	Entropy 1.28159 (1.28642)	Top-1 acc 49.219 (55.182)	Top-5 acc 74.219 (77.462)	lr 0.01886
Train [40][1980/3239]	Time 0.258 (0.569)	Data Time 0.002 (0.018)	Loss 2.8657 (2.8626)	Entropy 1.28158 (1.28639)	Top-1 acc 57.422 (55.179)	Top-5 acc 76.172 (77.467)	lr 0.01886
Train [40][1990/3239]	Time 2.542 (0.568)	Data Time 0.002 (0.018)	Loss 2.7505 (2.8627)	Entropy 1.28158 (1.28637)	Top-1 acc 58.203 (55.177)	Top-5 acc 80.078 (77.466)	lr 0.01886
Train [40][2000/3239]	Time 0.247 (0.566)	Data Time 0.001 (0.018)	Loss 2.6954 (2.8628)	Entropy 1.28151 (1.28634)	Top-1 acc 56.641 (55.171)	Top-5 acc 80.859 (77.467)	lr 0.01886
Train [40][2010/3239]	Time 0.229 (0.566)	Data Time 0.002 (0.018)	Loss 2.9222 (2.8628)	Entropy 1.28146 (1.28632)	Top-1 acc 52.344 (55.165)	Top-5 acc 76.953 (77.462)	lr 0.01886
Train [40][2020/3239]	Time 0.204 (0.565)	Data Time 0.002 (0.018)	Loss 2.7761 (2.8627)	Entropy 1.28140 (1.28629)	Top-1 acc 59.766 (55.166)	Top-5 acc 79.688 (77.464)	lr 0.01886
Train [40][2030/3239]	Time 0.406 (0.587)	Data Time 0.002 (0.018)	Loss 2.9215 (2.8630)	Entropy 1.28133 (1.28627)	Top-1 acc 50.000 (55.155)	Top-5 acc 75.000 (77.455)	lr 0.01886
Train [40][2040/3239]	Time 0.218 (0.586)	Data Time 0.002 (0.018)	Loss 2.6906 (2.8631)	Entropy 1.28131 (1.28625)	Top-1 acc 57.812 (55.157)	Top-5 acc 80.859 (77.452)	lr 0.01885
Train [40][2050/3239]	Time 0.213 (0.585)	Data Time 0.002 (0.017)	Loss 2.9481 (2.8634)	Entropy 1.28127 (1.28622)	Top-1 acc 50.781 (55.150)	Top-5 acc 75.000 (77.445)	lr 0.01885
Train [40][2060/3239]	Time 0.213 (0.585)	Data Time 0.001 (0.017)	Loss 3.6130 (2.8637)	Entropy 1.28123 (1.28620)	Top-1 acc 38.672 (55.141)	Top-5 acc 65.625 (77.438)	lr 0.01885
Train [40][2070/3239]	Time 0.211 (0.584)	Data Time 0.001 (0.017)	Loss 3.0196 (2.8637)	Entropy 1.28118 (1.28617)	Top-1 acc 51.953 (55.143)	Top-5 acc 71.484 (77.436)	lr 0.01885
Train [40][2080/3239]	Time 0.217 (0.583)	Data Time 0.001 (0.017)	Loss 2.7686 (2.8637)	Entropy 1.28109 (1.28615)	Top-1 acc 56.641 (55.146)	Top-5 acc 80.469 (77.429)	lr 0.01885
Train [40][2090/3239]	Time 0.361 (0.583)	Data Time 0.001 (0.017)	Loss 2.8770 (2.8639)	Entropy 1.28104 (1.28612)	Top-1 acc 56.641 (55.141)	Top-5 acc 79.688 (77.427)	lr 0.01885
Train [40][2100/3239]	Time 2.326 (0.582)	Data Time 0.001 (0.017)	Loss 2.8374 (2.8638)	Entropy 1.28104 (1.28610)	Top-1 acc 55.469 (55.144)	Top-5 acc 78.125 (77.430)	lr 0.01885
Train [40][2110/3239]	Time 0.212 (0.581)	Data Time 0.001 (0.017)	Loss 2.9843 (2.8638)	Entropy 1.28097 (1.28608)	Top-1 acc 51.953 (55.147)	Top-5 acc 74.609 (77.428)	lr 0.01885
Train [40][2120/3239]	Time 0.235 (0.580)	Data Time 0.001 (0.017)	Loss 2.9860 (2.8639)	Entropy 1.28090 (1.28605)	Top-1 acc 52.344 (55.145)	Top-5 acc 71.484 (77.428)	lr 0.01885
Train [40][2130/3239]	Time 0.229 (0.579)	Data Time 0.001 (0.017)	Loss 2.8022 (2.8637)	Entropy 1.28085 (1.28603)	Top-1 acc 55.859 (55.149)	Top-5 acc 78.906 (77.436)	lr 0.01885
Train [40][2140/3239]	Time 0.306 (0.579)	Data Time 0.001 (0.017)	Loss 2.9530 (2.8638)	Entropy 1.28084 (1.28600)	Top-1 acc 54.297 (55.147)	Top-5 acc 76.172 (77.429)	lr 0.01885
Train [40][2150/3239]	Time 0.219 (0.578)	Data Time 0.001 (0.017)	Loss 2.9322 (2.8639)	Entropy 1.28082 (1.28598)	Top-1 acc 53.125 (55.144)	Top-5 acc 76.562 (77.428)	lr 0.01885
Train [40][2160/3239]	Time 0.243 (0.578)	Data Time 0.001 (0.017)	Loss 2.6343 (2.8641)	Entropy 1.28082 (1.28596)	Top-1 acc 57.812 (55.135)	Top-5 acc 80.078 (77.423)	lr 0.01884
Train [40][2170/3239]	Time 0.226 (0.577)	Data Time 0.001 (0.017)	Loss 2.9071 (2.8642)	Entropy 1.28078 (1.28593)	Top-1 acc 55.859 (55.127)	Top-5 acc 77.734 (77.425)	lr 0.01884
Train [40][2180/3239]	Time 0.224 (0.577)	Data Time 0.001 (0.017)	Loss 2.9718 (2.8643)	Entropy 1.28072 (1.28591)	Top-1 acc 50.781 (55.119)	Top-5 acc 73.828 (77.423)	lr 0.01884
Train [40][2190/3239]	Time 0.315 (0.576)	Data Time 0.001 (0.016)	Loss 3.0273 (2.8646)	Entropy 1.28069 (1.28588)	Top-1 acc 50.391 (55.112)	Top-5 acc 75.781 (77.416)	lr 0.01884
Train [40][2200/3239]	Time 0.232 (0.576)	Data Time 0.002 (0.016)	Loss 2.9660 (2.8648)	Entropy 1.28066 (1.28586)	Top-1 acc 51.172 (55.108)	Top-5 acc 76.172 (77.413)	lr 0.01884
Train [40][2210/3239]	Time 2.451 (0.575)	Data Time 0.002 (0.016)	Loss 2.9483 (2.8648)	Entropy 1.28066 (1.28584)	Top-1 acc 53.516 (55.108)	Top-5 acc 76.172 (77.410)	lr 0.01884
Train [40][2220/3239]	Time 0.243 (0.573)	Data Time 0.001 (0.016)	Loss 2.9393 (2.8647)	Entropy 1.28066 (1.28581)	Top-1 acc 54.688 (55.113)	Top-5 acc 74.219 (77.411)	lr 0.01884
Train [40][2230/3239]	Time 0.209 (0.573)	Data Time 0.001 (0.016)	Loss 2.8816 (2.8648)	Entropy 1.28049 (1.28579)	Top-1 acc 54.688 (55.115)	Top-5 acc 79.688 (77.413)	lr 0.01884
Train [40][2240/3239]	Time 0.204 (0.572)	Data Time 0.001 (0.016)	Loss 2.8880 (2.8645)	Entropy 1.28028 (1.28577)	Top-1 acc 56.250 (55.120)	Top-5 acc 76.172 (77.419)	lr 0.01884
Train [40][2250/3239]	Time 0.251 (0.572)	Data Time 0.001 (0.016)	Loss 2.7624 (2.8645)	Entropy 1.28022 (1.28574)	Top-1 acc 58.203 (55.118)	Top-5 acc 79.688 (77.419)	lr 0.01884
Train [40][2260/3239]	Time 0.215 (0.571)	Data Time 0.001 (0.016)	Loss 2.9332 (2.8645)	Entropy 1.28018 (1.28572)	Top-1 acc 54.297 (55.123)	Top-5 acc 75.391 (77.419)	lr 0.01884
Train [40][2270/3239]	Time 0.227 (0.571)	Data Time 0.001 (0.016)	Loss 2.8571 (2.8643)	Entropy 1.28016 (1.28569)	Top-1 acc 55.859 (55.130)	Top-5 acc 76.562 (77.422)	lr 0.01883
Train [40][2280/3239]	Time 0.228 (0.570)	Data Time 0.002 (0.016)	Loss 2.9018 (2.8642)	Entropy 1.28012 (1.28567)	Top-1 acc 52.344 (55.125)	Top-5 acc 77.734 (77.424)	lr 0.01883
Train [40][2290/3239]	Time 0.307 (0.570)	Data Time 0.002 (0.016)	Loss 2.8389 (2.8644)	Entropy 1.28007 (1.28564)	Top-1 acc 56.641 (55.123)	Top-5 acc 78.125 (77.420)	lr 0.01883
Train [40][2300/3239]	Time 0.221 (0.569)	Data Time 0.001 (0.016)	Loss 2.9994 (2.8645)	Entropy 1.27997 (1.28562)	Top-1 acc 51.172 (55.118)	Top-5 acc 75.000 (77.422)	lr 0.01883
Train [40][2310/3239]	Time 0.280 (0.569)	Data Time 0.001 (0.016)	Loss 2.9562 (2.8644)	Entropy 1.27993 (1.28559)	Top-1 acc 52.734 (55.116)	Top-5 acc 76.172 (77.425)	lr 0.01883
Train [40][2320/3239]	Time 2.453 (0.568)	Data Time 0.001 (0.016)	Loss 3.1657 (2.8644)	Entropy 1.27993 (1.28557)	Top-1 acc 46.484 (55.112)	Top-5 acc 72.266 (77.427)	lr 0.01883
Train [40][2330/3239]	Time 0.227 (0.567)	Data Time 0.001 (0.016)	Loss 2.7054 (2.8643)	Entropy 1.27986 (1.28555)	Top-1 acc 58.984 (55.116)	Top-5 acc 80.469 (77.430)	lr 0.01883
Train [40][2340/3239]	Time 0.264 (0.567)	Data Time 0.001 (0.016)	Loss 3.0394 (2.8642)	Entropy 1.27983 (1.28552)	Top-1 acc 52.734 (55.117)	Top-5 acc 74.219 (77.435)	lr 0.01883
Train [40][2350/3239]	Time 0.216 (0.566)	Data Time 0.001 (0.015)	Loss 2.8763 (2.8641)	Entropy 1.27977 (1.28550)	Top-1 acc 56.250 (55.120)	Top-5 acc 78.125 (77.436)	lr 0.01883
Train [40][2360/3239]	Time 0.215 (0.566)	Data Time 0.001 (0.015)	Loss 3.0882 (2.8643)	Entropy 1.27972 (1.28547)	Top-1 acc 50.781 (55.115)	Top-5 acc 73.438 (77.431)	lr 0.01883
Train [40][2370/3239]	Time 0.214 (0.565)	Data Time 0.001 (0.015)	Loss 2.8821 (2.8645)	Entropy 1.27969 (1.28545)	Top-1 acc 52.344 (55.112)	Top-5 acc 76.953 (77.424)	lr 0.01883
Train [40][2380/3239]	Time 0.219 (0.565)	Data Time 0.001 (0.015)	Loss 3.0298 (2.8647)	Entropy 1.27970 (1.28542)	Top-1 acc 51.953 (55.105)	Top-5 acc 75.391 (77.422)	lr 0.01883
Train [40][2390/3239]	Time 0.437 (0.581)	Data Time 0.004 (0.015)	Loss 2.9907 (2.8646)	Entropy 1.27965 (1.28540)	Top-1 acc 50.000 (55.102)	Top-5 acc 75.781 (77.423)	lr 0.01882
Train [40][2400/3239]	Time 0.208 (0.581)	Data Time 0.002 (0.015)	Loss 2.9798 (2.8648)	Entropy 1.27956 (1.28538)	Top-1 acc 51.562 (55.098)	Top-5 acc 75.000 (77.419)	lr 0.01882
Train [40][2410/3239]	Time 0.201 (0.581)	Data Time 0.001 (0.015)	Loss 2.8583 (2.8650)	Entropy 1.27946 (1.28535)	Top-1 acc 58.594 (55.095)	Top-5 acc 77.734 (77.417)	lr 0.01882
Train [40][2420/3239]	Time 0.257 (0.580)	Data Time 0.001 (0.015)	Loss 2.9449 (2.8649)	Entropy 1.27944 (1.28533)	Top-1 acc 53.906 (55.098)	Top-5 acc 76.172 (77.421)	lr 0.01882
Train [40][2430/3239]	Time 2.411 (0.580)	Data Time 0.002 (0.015)	Loss 2.8751 (2.8649)	Entropy 1.27944 (1.28530)	Top-1 acc 56.641 (55.100)	Top-5 acc 77.734 (77.423)	lr 0.01882
Train [40][2440/3239]	Time 0.275 (0.578)	Data Time 0.002 (0.015)	Loss 2.7257 (2.8649)	Entropy 1.27938 (1.28528)	Top-1 acc 57.812 (55.097)	Top-5 acc 79.297 (77.423)	lr 0.01882
Train [40][2450/3239]	Time 0.230 (0.578)	Data Time 0.001 (0.015)	Loss 2.7956 (2.8648)	Entropy 1.27932 (1.28525)	Top-1 acc 56.641 (55.102)	Top-5 acc 80.078 (77.425)	lr 0.01882
Train [40][2460/3239]	Time 0.229 (0.577)	Data Time 0.001 (0.015)	Loss 2.8161 (2.8647)	Entropy 1.27927 (1.28523)	Top-1 acc 50.391 (55.101)	Top-5 acc 79.297 (77.428)	lr 0.01882
Train [40][2470/3239]	Time 0.222 (0.577)	Data Time 0.001 (0.015)	Loss 2.9947 (2.8646)	Entropy 1.27918 (1.28521)	Top-1 acc 52.344 (55.107)	Top-5 acc 76.562 (77.431)	lr 0.01882
Train [40][2480/3239]	Time 0.197 (0.576)	Data Time 0.001 (0.015)	Loss 2.9379 (2.8647)	Entropy 1.27918 (1.28518)	Top-1 acc 50.000 (55.107)	Top-5 acc 77.734 (77.432)	lr 0.01882
Train [40][2490/3239]	Time 0.308 (0.576)	Data Time 0.001 (0.015)	Loss 2.9993 (2.8650)	Entropy 1.27915 (1.28516)	Top-1 acc 51.172 (55.104)	Top-5 acc 73.828 (77.425)	lr 0.01882
Train [40][2500/3239]	Time 0.238 (0.575)	Data Time 0.001 (0.015)	Loss 2.6817 (2.8648)	Entropy 1.27907 (1.28513)	Top-1 acc 60.547 (55.111)	Top-5 acc 80.469 (77.430)	lr 0.01881
Train [40][2510/3239]	Time 0.221 (0.575)	Data Time 0.001 (0.015)	Loss 2.7383 (2.8646)	Entropy 1.27905 (1.28511)	Top-1 acc 51.953 (55.118)	Top-5 acc 83.594 (77.434)	lr 0.01881
Train [40][2520/3239]	Time 0.209 (0.574)	Data Time 0.001 (0.015)	Loss 2.9805 (2.8648)	Entropy 1.27903 (1.28509)	Top-1 acc 55.859 (55.114)	Top-5 acc 72.656 (77.428)	lr 0.01881
Train [40][2530/3239]	Time 0.195 (0.574)	Data Time 0.001 (0.014)	Loss 2.7802 (2.8647)	Entropy 1.27896 (1.28506)	Top-1 acc 57.422 (55.117)	Top-5 acc 76.953 (77.429)	lr 0.01881
Train [40][2540/3239]	Time 2.428 (0.574)	Data Time 0.001 (0.014)	Loss 2.7279 (2.8646)	Entropy 1.27896 (1.28504)	Top-1 acc 58.984 (55.121)	Top-5 acc 80.078 (77.430)	lr 0.01881
Train [40][2550/3239]	Time 0.219 (0.572)	Data Time 0.001 (0.014)	Loss 2.9102 (2.8646)	Entropy 1.27872 (1.28501)	Top-1 acc 56.641 (55.123)	Top-5 acc 76.172 (77.428)	lr 0.01881
Train [40][2560/3239]	Time 0.234 (0.572)	Data Time 0.001 (0.014)	Loss 2.9921 (2.8647)	Entropy 1.27866 (1.28499)	Top-1 acc 50.391 (55.115)	Top-5 acc 73.828 (77.427)	lr 0.01881
Train [40][2570/3239]	Time 0.235 (0.571)	Data Time 0.001 (0.014)	Loss 2.8764 (2.8648)	Entropy 1.27866 (1.28496)	Top-1 acc 56.641 (55.112)	Top-5 acc 77.734 (77.422)	lr 0.01881
Train [40][2580/3239]	Time 0.216 (0.571)	Data Time 0.001 (0.014)	Loss 2.5610 (2.8647)	Entropy 1.27863 (1.28494)	Top-1 acc 57.812 (55.113)	Top-5 acc 82.812 (77.425)	lr 0.01881
Train [40][2590/3239]	Time 0.223 (0.570)	Data Time 0.001 (0.014)	Loss 2.9043 (2.8647)	Entropy 1.27859 (1.28491)	Top-1 acc 54.688 (55.110)	Top-5 acc 75.781 (77.426)	lr 0.01881
Train [40][2600/3239]	Time 0.231 (0.570)	Data Time 0.001 (0.014)	Loss 2.8718 (2.8647)	Entropy 1.27856 (1.28489)	Top-1 acc 57.031 (55.111)	Top-5 acc 78.906 (77.423)	lr 0.01881
Train [40][2610/3239]	Time 0.213 (0.570)	Data Time 0.001 (0.014)	Loss 2.8501 (2.8648)	Entropy 1.27850 (1.28487)	Top-1 acc 53.516 (55.110)	Top-5 acc 78.125 (77.421)	lr 0.01881
Train [40][2620/3239]	Time 0.237 (0.569)	Data Time 0.001 (0.014)	Loss 2.9570 (2.8650)	Entropy 1.27845 (1.28484)	Top-1 acc 54.297 (55.108)	Top-5 acc 75.000 (77.416)	lr 0.01880
Train [40][2630/3239]	Time 0.213 (0.569)	Data Time 0.001 (0.014)	Loss 2.9817 (2.8649)	Entropy 1.27846 (1.28482)	Top-1 acc 51.562 (55.106)	Top-5 acc 75.000 (77.417)	lr 0.01880
Train [40][2640/3239]	Time 0.249 (0.568)	Data Time 0.002 (0.014)	Loss 2.8783 (2.8650)	Entropy 1.27842 (1.28479)	Top-1 acc 52.734 (55.099)	Top-5 acc 76.172 (77.415)	lr 0.01880
Train [40][2650/3239]	Time 0.253 (0.568)	Data Time 0.001 (0.014)	Loss 2.7399 (2.8649)	Entropy 1.27838 (1.28477)	Top-1 acc 60.938 (55.101)	Top-5 acc 79.297 (77.415)	lr 0.01880
Train [40][2660/3239]	Time 0.254 (0.568)	Data Time 0.001 (0.014)	Loss 2.9475 (2.8649)	Entropy 1.27833 (1.28474)	Top-1 acc 53.906 (55.105)	Top-5 acc 76.562 (77.412)	lr 0.01880
Train [40][2670/3239]	Time 0.232 (0.567)	Data Time 0.001 (0.014)	Loss 3.4644 (2.8655)	Entropy 1.27828 (1.28472)	Top-1 acc 40.234 (55.093)	Top-5 acc 68.750 (77.404)	lr 0.01880
Train [40][2680/3239]	Time 0.230 (0.567)	Data Time 0.001 (0.014)	Loss 2.8982 (2.8655)	Entropy 1.27822 (1.28470)	Top-1 acc 55.859 (55.094)	Top-5 acc 78.516 (77.402)	lr 0.01880
Train [40][2690/3239]	Time 0.219 (0.566)	Data Time 0.001 (0.014)	Loss 3.0096 (2.8656)	Entropy 1.27817 (1.28467)	Top-1 acc 50.391 (55.093)	Top-5 acc 73.828 (77.399)	lr 0.01880
Train [40][2700/3239]	Time 0.307 (0.566)	Data Time 0.001 (0.014)	Loss 2.8752 (2.8656)	Entropy 1.27814 (1.28465)	Top-1 acc 53.125 (55.089)	Top-5 acc 80.469 (77.404)	lr 0.01880
Train [40][2710/3239]	Time 0.279 (0.565)	Data Time 0.001 (0.014)	Loss 2.9246 (2.8656)	Entropy 1.27812 (1.28462)	Top-1 acc 53.906 (55.090)	Top-5 acc 76.953 (77.405)	lr 0.01880
Train [40][2720/3239]	Time 0.215 (0.565)	Data Time 0.001 (0.014)	Loss 2.8257 (2.8655)	Entropy 1.27809 (1.28460)	Top-1 acc 53.125 (55.089)	Top-5 acc 76.953 (77.404)	lr 0.01880
Train [40][2730/3239]	Time 0.271 (0.565)	Data Time 0.003 (0.014)	Loss 2.9216 (2.8656)	Entropy 1.27801 (1.28458)	Top-1 acc 55.078 (55.088)	Top-5 acc 77.734 (77.406)	lr 0.01879
Train [40][2740/3239]	Time 0.276 (0.581)	Data Time 0.005 (0.014)	Loss 2.8396 (2.8653)	Entropy 1.27799 (1.28455)	Top-1 acc 55.078 (55.098)	Top-5 acc 76.953 (77.413)	lr 0.01879
Train [40][2750/3239]	Time 0.342 (0.581)	Data Time 0.002 (0.013)	Loss 3.0418 (2.8653)	Entropy 1.27791 (1.28453)	Top-1 acc 50.391 (55.101)	Top-5 acc 71.875 (77.412)	lr 0.01879
Train [40][2760/3239]	Time 0.206 (0.580)	Data Time 0.002 (0.013)	Loss 2.8533 (2.8656)	Entropy 1.27780 (1.28450)	Top-1 acc 55.859 (55.095)	Top-5 acc 76.562 (77.405)	lr 0.01879
Train [40][2770/3239]	Time 0.242 (0.580)	Data Time 0.002 (0.013)	Loss 2.8183 (2.8658)	Entropy 1.27776 (1.28448)	Top-1 acc 53.516 (55.096)	Top-5 acc 73.828 (77.400)	lr 0.01879
Train [40][2780/3239]	Time 0.235 (0.579)	Data Time 0.001 (0.013)	Loss 2.8618 (2.8658)	Entropy 1.27764 (1.28446)	Top-1 acc 57.031 (55.095)	Top-5 acc 80.078 (77.401)	lr 0.01879
Train [40][2790/3239]	Time 0.206 (0.579)	Data Time 0.003 (0.013)	Loss 2.9430 (2.8659)	Entropy 1.27762 (1.28443)	Top-1 acc 54.297 (55.091)	Top-5 acc 77.734 (77.400)	lr 0.01879
Train [40][2800/3239]	Time 0.241 (0.578)	Data Time 0.001 (0.013)	Loss 2.8515 (2.8659)	Entropy 1.27760 (1.28441)	Top-1 acc 58.594 (55.090)	Top-5 acc 75.391 (77.402)	lr 0.01879
Train [40][2810/3239]	Time 0.205 (0.578)	Data Time 0.001 (0.013)	Loss 2.8679 (2.8659)	Entropy 1.27759 (1.28438)	Top-1 acc 51.953 (55.088)	Top-5 acc 78.516 (77.404)	lr 0.01879
Train [40][2820/3239]	Time 0.250 (0.578)	Data Time 0.001 (0.013)	Loss 2.8446 (2.8660)	Entropy 1.27758 (1.28436)	Top-1 acc 53.516 (55.084)	Top-5 acc 77.344 (77.402)	lr 0.01879
Train [40][2830/3239]	Time 0.207 (0.577)	Data Time 0.001 (0.013)	Loss 2.8339 (2.8659)	Entropy 1.27732 (1.28434)	Top-1 acc 50.781 (55.081)	Top-5 acc 78.516 (77.403)	lr 0.01879
Train [40][2840/3239]	Time 0.251 (0.577)	Data Time 0.001 (0.013)	Loss 2.8803 (2.8660)	Entropy 1.27728 (1.28431)	Top-1 acc 55.078 (55.080)	Top-5 acc 79.297 (77.404)	lr 0.01878
Train [40][2850/3239]	Time 0.264 (0.576)	Data Time 0.002 (0.013)	Loss 2.7198 (2.8658)	Entropy 1.27718 (1.28429)	Top-1 acc 60.156 (55.085)	Top-5 acc 81.250 (77.406)	lr 0.01878
Train [40][2860/3239]	Time 0.340 (0.576)	Data Time 0.001 (0.013)	Loss 2.9564 (2.8658)	Entropy 1.27709 (1.28426)	Top-1 acc 55.078 (55.084)	Top-5 acc 77.344 (77.406)	lr 0.01878
Train [40][2870/3239]	Time 0.251 (0.576)	Data Time 0.001 (0.013)	Loss 2.8529 (2.8658)	Entropy 1.27704 (1.28424)	Top-1 acc 53.906 (55.083)	Top-5 acc 76.562 (77.405)	lr 0.01878
Train [40][2880/3239]	Time 0.236 (0.575)	Data Time 0.001 (0.013)	Loss 2.6375 (2.8660)	Entropy 1.27702 (1.28421)	Top-1 acc 56.641 (55.079)	Top-5 acc 82.031 (77.400)	lr 0.01878
Train [40][2890/3239]	Time 0.193 (0.575)	Data Time 0.001 (0.013)	Loss 2.8103 (2.8658)	Entropy 1.27696 (1.28419)	Top-1 acc 57.031 (55.081)	Top-5 acc 77.734 (77.403)	lr 0.01878
Train [40][2900/3239]	Time 0.225 (0.574)	Data Time 0.001 (0.013)	Loss 2.9621 (2.8660)	Entropy 1.27694 (1.28416)	Top-1 acc 51.562 (55.076)	Top-5 acc 75.781 (77.396)	lr 0.01878
Train [40][2910/3239]	Time 0.294 (0.574)	Data Time 0.002 (0.013)	Loss 2.7057 (2.8661)	Entropy 1.27690 (1.28414)	Top-1 acc 60.547 (55.078)	Top-5 acc 80.469 (77.398)	lr 0.01878
Train [40][2920/3239]	Time 0.229 (0.574)	Data Time 0.002 (0.013)	Loss 2.9758 (2.8660)	Entropy 1.27687 (1.28411)	Top-1 acc 50.781 (55.083)	Top-5 acc 75.781 (77.399)	lr 0.01878
Train [40][2930/3239]	Time 0.252 (0.573)	Data Time 0.001 (0.013)	Loss 2.8286 (2.8659)	Entropy 1.27684 (1.28409)	Top-1 acc 54.688 (55.084)	Top-5 acc 76.172 (77.397)	lr 0.01878
Train [40][2940/3239]	Time 0.246 (0.573)	Data Time 0.001 (0.013)	Loss 2.7655 (2.8657)	Entropy 1.27671 (1.28406)	Top-1 acc 57.812 (55.093)	Top-5 acc 78.125 (77.398)	lr 0.01878
Train [40][2950/3239]	Time 0.214 (0.572)	Data Time 0.001 (0.013)	Loss 3.5321 (2.8660)	Entropy 1.27667 (1.28404)	Top-1 acc 42.188 (55.088)	Top-5 acc 64.062 (77.392)	lr 0.01878
Train [40][2960/3239]	Time 0.314 (0.572)	Data Time 0.001 (0.013)	Loss 2.8178 (2.8662)	Entropy 1.27663 (1.28401)	Top-1 acc 57.422 (55.086)	Top-5 acc 77.734 (77.387)	lr 0.01877
Train [40][2970/3239]	Time 0.223 (0.571)	Data Time 0.001 (0.013)	Loss 2.7891 (2.8662)	Entropy 1.27659 (1.28399)	Top-1 acc 60.156 (55.084)	Top-5 acc 79.297 (77.388)	lr 0.01877
Train [40][2980/3239]	Time 0.244 (0.571)	Data Time 0.001 (0.013)	Loss 2.8401 (2.8660)	Entropy 1.27658 (1.28396)	Top-1 acc 54.297 (55.090)	Top-5 acc 78.516 (77.391)	lr 0.01877
Train [40][2990/3239]	Time 0.215 (0.571)	Data Time 0.001 (0.013)	Loss 2.6035 (2.8659)	Entropy 1.27650 (1.28394)	Top-1 acc 60.547 (55.092)	Top-5 acc 82.422 (77.393)	lr 0.01877
Train [40][3000/3239]	Time 0.185 (0.570)	Data Time 0.001 (0.012)	Loss 3.0835 (2.8658)	Entropy 1.27647 (1.28391)	Top-1 acc 50.391 (55.100)	Top-5 acc 74.609 (77.395)	lr 0.01877
Train [40][3010/3239]	Time 0.258 (0.570)	Data Time 0.001 (0.012)	Loss 2.8264 (2.8658)	Entropy 1.27643 (1.28389)	Top-1 acc 55.469 (55.102)	Top-5 acc 78.125 (77.395)	lr 0.01877
Train [40][3020/3239]	Time 0.241 (0.570)	Data Time 0.001 (0.012)	Loss 2.8688 (2.8657)	Entropy 1.27653 (1.28386)	Top-1 acc 58.594 (55.105)	Top-5 acc 74.219 (77.394)	lr 0.01877
Train [40][3030/3239]	Time 0.205 (0.569)	Data Time 0.002 (0.012)	Loss 2.8305 (2.8658)	Entropy 1.27643 (1.28384)	Top-1 acc 54.688 (55.104)	Top-5 acc 79.297 (77.396)	lr 0.01877
Train [40][3040/3239]	Time 0.290 (0.569)	Data Time 0.001 (0.012)	Loss 2.7296 (2.8657)	Entropy 1.27637 (1.28382)	Top-1 acc 60.938 (55.102)	Top-5 acc 80.859 (77.399)	lr 0.01877
Train [40][3050/3239]	Time 0.217 (0.569)	Data Time 0.001 (0.012)	Loss 2.9961 (2.8658)	Entropy 1.27630 (1.28379)	Top-1 acc 57.031 (55.101)	Top-5 acc 70.703 (77.396)	lr 0.01877
Train [40][3060/3239]	Time 0.216 (0.568)	Data Time 0.001 (0.012)	Loss 3.0426 (2.8657)	Entropy 1.27619 (1.28377)	Top-1 acc 51.953 (55.101)	Top-5 acc 71.484 (77.398)	lr 0.01877
Train [40][3070/3239]	Time 0.421 (0.583)	Data Time 0.004 (0.012)	Loss 3.0249 (2.8657)	Entropy 1.27595 (1.28374)	Top-1 acc 49.219 (55.099)	Top-5 acc 77.734 (77.397)	lr 0.01876
Train [40][3080/3239]	Time 0.223 (0.582)	Data Time 0.002 (0.012)	Loss 2.8168 (2.8656)	Entropy 1.27590 (1.28372)	Top-1 acc 55.078 (55.104)	Top-5 acc 78.125 (77.402)	lr 0.01876
Train [40][3090/3239]	Time 0.219 (0.582)	Data Time 0.001 (0.012)	Loss 2.9649 (2.8656)	Entropy 1.27585 (1.28369)	Top-1 acc 53.125 (55.110)	Top-5 acc 76.562 (77.403)	lr 0.01876
Train [40][3100/3239]	Time 0.218 (0.581)	Data Time 0.001 (0.012)	Loss 2.6969 (2.8656)	Entropy 1.27571 (1.28367)	Top-1 acc 57.812 (55.110)	Top-5 acc 81.250 (77.402)	lr 0.01876
Train [40][3110/3239]	Time 0.252 (0.581)	Data Time 0.001 (0.012)	Loss 2.8236 (2.8656)	Entropy 1.27564 (1.28364)	Top-1 acc 54.688 (55.107)	Top-5 acc 78.516 (77.401)	lr 0.01876
Train [40][3120/3239]	Time 0.294 (0.581)	Data Time 0.002 (0.012)	Loss 2.8066 (2.8656)	Entropy 1.27551 (1.28361)	Top-1 acc 55.078 (55.107)	Top-5 acc 79.297 (77.404)	lr 0.01876
Train [40][3130/3239]	Time 0.234 (0.580)	Data Time 0.001 (0.012)	Loss 2.7219 (2.8656)	Entropy 1.27540 (1.28359)	Top-1 acc 59.766 (55.109)	Top-5 acc 79.688 (77.402)	lr 0.01876
Train [40][3140/3239]	Time 0.224 (0.580)	Data Time 0.002 (0.012)	Loss 2.9469 (2.8655)	Entropy 1.27541 (1.28356)	Top-1 acc 54.688 (55.111)	Top-5 acc 75.391 (77.403)	lr 0.01876
Train [40][3150/3239]	Time 0.208 (0.579)	Data Time 0.001 (0.012)	Loss 2.7270 (2.8654)	Entropy 1.27534 (1.28354)	Top-1 acc 57.812 (55.116)	Top-5 acc 81.641 (77.407)	lr 0.01876
Train [40][3160/3239]	Time 0.338 (0.579)	Data Time 0.001 (0.012)	Loss 3.0158 (2.8653)	Entropy 1.27529 (1.28351)	Top-1 acc 50.781 (55.118)	Top-5 acc 72.266 (77.407)	lr 0.01876
Train [40][3170/3239]	Time 0.218 (0.579)	Data Time 0.001 (0.012)	Loss 2.8341 (2.8653)	Entropy 1.27520 (1.28348)	Top-1 acc 55.469 (55.120)	Top-5 acc 77.344 (77.407)	lr 0.01876
Train [40][3180/3239]	Time 0.155 (0.578)	Data Time 0.000 (0.012)	Loss 2.8523 (2.8653)	Entropy 1.27507 (1.28346)	Top-1 acc 57.422 (55.117)	Top-5 acc 77.734 (77.406)	lr 0.01876
Train [40][3190/3239]	Time 0.218 (0.578)	Data Time 0.000 (0.012)	Loss 2.7820 (2.8654)	Entropy 1.27507 (1.28343)	Top-1 acc 59.766 (55.117)	Top-5 acc 79.297 (77.406)	lr 0.01875
Train [40][3200/3239]	Time 0.235 (0.577)	Data Time 0.000 (0.012)	Loss 3.0419 (2.8654)	Entropy 1.27504 (1.28341)	Top-1 acc 53.125 (55.119)	Top-5 acc 74.609 (77.406)	lr 0.01875
Train [40][3210/3239]	Time 0.303 (0.577)	Data Time 0.000 (0.012)	Loss 2.7646 (2.8651)	Entropy 1.27500 (1.28338)	Top-1 acc 56.250 (55.124)	Top-5 acc 82.031 (77.412)	lr 0.01875
Train [40][3220/3239]	Time 0.202 (0.576)	Data Time 0.000 (0.012)	Loss 2.8862 (2.8651)	Entropy 1.27494 (1.28335)	Top-1 acc 55.078 (55.125)	Top-5 acc 79.297 (77.414)	lr 0.01875
Train [40][3230/3239]	Time 0.217 (0.576)	Data Time 0.000 (0.012)	Loss 2.8345 (2.8651)	Entropy 1.27495 (1.28333)	Top-1 acc 55.078 (55.124)	Top-5 acc 78.906 (77.415)	lr 0.01875
Train [40][3239/3239]	Time 2.158 (0.576)	Data Time 0.000 (0.012)	Loss 3.0604 (2.8651)	Entropy 1.27495 (1.28330)	Top-1 acc 53.086 (55.126)	Top-5 acc 75.309 (77.417)	lr 0.01875
==========Valid [40/120]	loss 1.682	top-1 acc 62.122 (62.122)	top-5 acc 83.430	Train top-1 55.126	top-5 77.417	Entropy 1.27495	Latency-None: 0.000ms	Flops: 557.37M
Train [41][0/3239]	Time 36.655 (36.655)	Data Time 34.704 (34.704)	Loss 2.9091 (2.9091)	Entropy 1.27495 (1.27495)	Top-1 acc 56.250 (56.250)	Top-5 acc 78.516 (78.516)	lr 0.01875
Train [41][10/3239]	Time 2.577 (3.811)	Data Time 0.002 (3.159)	Loss 2.8384 (2.8444)	Entropy 1.27495 (1.27495)	Top-1 acc 55.859 (55.114)	Top-5 acc 76.172 (77.912)	lr 0.01875
Train [41][20/3239]	Time 0.325 (2.110)	Data Time 0.001 (1.655)	Loss 2.7456 (2.8445)	Entropy 1.27491 (1.27493)	Top-1 acc 56.641 (54.967)	Top-5 acc 80.078 (78.199)	lr 0.01875
Train [41][30/3239]	Time 0.258 (1.586)	Data Time 0.002 (1.122)	Loss 2.7985 (2.8191)	Entropy 1.27485 (1.27491)	Top-1 acc 51.562 (56.011)	Top-5 acc 78.125 (78.402)	lr 0.01875
Train [41][40/3239]	Time 0.218 (1.312)	Data Time 0.001 (0.848)	Loss 2.9086 (2.8267)	Entropy 1.27483 (1.27489)	Top-1 acc 56.641 (56.021)	Top-5 acc 75.391 (78.239)	lr 0.01875
Train [41][50/3239]	Time 0.177 (1.140)	Data Time 0.001 (0.683)	Loss 2.6074 (2.8210)	Entropy 1.27472 (1.27486)	Top-1 acc 62.500 (56.196)	Top-5 acc 81.641 (78.278)	lr 0.01875
Train [41][60/3239]	Time 0.213 (1.030)	Data Time 0.001 (0.572)	Loss 2.7997 (2.8240)	Entropy 1.27469 (1.27484)	Top-1 acc 57.812 (56.096)	Top-5 acc 79.297 (78.234)	lr 0.01874
Train [41][70/3239]	Time 0.303 (0.950)	Data Time 0.001 (0.491)	Loss 2.9447 (2.8265)	Entropy 1.27466 (1.27481)	Top-1 acc 54.688 (55.848)	Top-5 acc 74.609 (78.252)	lr 0.01874
Train [41][80/3239]	Time 0.212 (0.889)	Data Time 0.001 (0.431)	Loss 2.8541 (2.8252)	Entropy 1.27459 (1.27479)	Top-1 acc 56.250 (55.917)	Top-5 acc 80.078 (78.313)	lr 0.01874
Train [41][90/3239]	Time 0.223 (0.842)	Data Time 0.001 (0.384)	Loss 2.8633 (2.8264)	Entropy 1.27456 (1.27477)	Top-1 acc 55.469 (55.971)	Top-5 acc 77.734 (78.249)	lr 0.01874
Train [41][100/3239]	Time 0.222 (0.805)	Data Time 0.001 (0.346)	Loss 2.7990 (2.8257)	Entropy 1.27455 (1.27475)	Top-1 acc 57.031 (55.999)	Top-5 acc 78.906 (78.303)	lr 0.01874
Train [41][110/3239]	Time 0.226 (0.772)	Data Time 0.001 (0.315)	Loss 3.1491 (2.8267)	Entropy 1.27449 (1.27473)	Top-1 acc 49.609 (56.046)	Top-5 acc 69.531 (78.220)	lr 0.01874
Train [41][120/3239]	Time 2.514 (0.747)	Data Time 0.001 (0.289)	Loss 2.9436 (2.8280)	Entropy 1.27449 (1.27471)	Top-1 acc 55.078 (55.969)	Top-5 acc 75.781 (78.131)	lr 0.01874
Train [41][130/3239]	Time 0.216 (0.708)	Data Time 0.001 (0.267)	Loss 3.0581 (2.8298)	Entropy 1.27454 (1.27470)	Top-1 acc 50.781 (55.970)	Top-5 acc 72.266 (78.119)	lr 0.01874
Train [41][140/3239]	Time 0.223 (0.690)	Data Time 0.002 (0.248)	Loss 2.7101 (2.8222)	Entropy 1.27452 (1.27468)	Top-1 acc 57.422 (56.078)	Top-5 acc 80.469 (78.264)	lr 0.01874
Train [41][150/3239]	Time 0.218 (0.674)	Data Time 0.001 (0.232)	Loss 2.8112 (2.8249)	Entropy 1.27451 (1.27467)	Top-1 acc 53.906 (55.976)	Top-5 acc 78.516 (78.239)	lr 0.01874
Train [41][160/3239]	Time 0.221 (0.660)	Data Time 0.001 (0.218)	Loss 2.9689 (2.8228)	Entropy 1.27448 (1.27466)	Top-1 acc 53.516 (56.058)	Top-5 acc 76.172 (78.268)	lr 0.01874
Train [41][170/3239]	Time 0.199 (0.649)	Data Time 0.001 (0.205)	Loss 2.8626 (2.8247)	Entropy 1.27440 (1.27465)	Top-1 acc 53.125 (55.994)	Top-5 acc 75.000 (78.228)	lr 0.01874
Train [41][180/3239]	Time 0.389 (0.875)	Data Time 0.002 (0.194)	Loss 2.6751 (2.8254)	Entropy 1.27437 (1.27463)	Top-1 acc 56.641 (56.002)	Top-5 acc 82.422 (78.201)	lr 0.01873
Train [41][190/3239]	Time 0.213 (0.858)	Data Time 0.002 (0.184)	Loss 2.9369 (2.8249)	Entropy 1.27424 (1.27462)	Top-1 acc 55.469 (56.041)	Top-5 acc 75.000 (78.237)	lr 0.01873
Train [41][200/3239]	Time 0.206 (0.837)	Data Time 0.001 (0.175)	Loss 2.7848 (2.8243)	Entropy 1.27419 (1.27460)	Top-1 acc 58.594 (56.077)	Top-5 acc 78.906 (78.226)	lr 0.01873
Train [41][210/3239]	Time 0.221 (0.819)	Data Time 0.001 (0.167)	Loss 2.6981 (2.8250)	Entropy 1.27416 (1.27458)	Top-1 acc 56.641 (56.069)	Top-5 acc 79.688 (78.206)	lr 0.01873
Train [41][220/3239]	Time 0.243 (0.803)	Data Time 0.001 (0.159)	Loss 2.6653 (2.8279)	Entropy 1.27405 (1.27456)	Top-1 acc 58.984 (56.015)	Top-5 acc 83.203 (78.174)	lr 0.01873
Train [41][230/3239]	Time 2.481 (0.789)	Data Time 0.002 (0.152)	Loss 2.9929 (2.8287)	Entropy 1.27405 (1.27454)	Top-1 acc 50.391 (56.030)	Top-5 acc 73.828 (78.132)	lr 0.01873
Train [41][240/3239]	Time 0.253 (0.766)	Data Time 0.001 (0.146)	Loss 3.0913 (2.8273)	Entropy 1.27397 (1.27451)	Top-1 acc 48.438 (56.046)	Top-5 acc 71.484 (78.153)	lr 0.01873
Train [41][250/3239]	Time 0.217 (0.753)	Data Time 0.001 (0.140)	Loss 2.8315 (2.8266)	Entropy 1.27394 (1.27449)	Top-1 acc 51.562 (56.060)	Top-5 acc 76.953 (78.130)	lr 0.01873
Train [41][260/3239]	Time 0.214 (0.742)	Data Time 0.001 (0.135)	Loss 2.9106 (2.8271)	Entropy 1.27395 (1.27447)	Top-1 acc 51.172 (56.006)	Top-5 acc 76.953 (78.152)	lr 0.01873
Train [41][270/3239]	Time 0.220 (0.731)	Data Time 0.001 (0.130)	Loss 2.8209 (2.8274)	Entropy 1.27386 (1.27445)	Top-1 acc 53.125 (55.991)	Top-5 acc 76.562 (78.170)	lr 0.01873
Train [41][280/3239]	Time 0.324 (0.723)	Data Time 0.002 (0.125)	Loss 2.8526 (2.8291)	Entropy 1.27383 (1.27443)	Top-1 acc 57.812 (55.958)	Top-5 acc 78.516 (78.167)	lr 0.01873
Train [41][290/3239]	Time 0.224 (0.713)	Data Time 0.001 (0.121)	Loss 2.8383 (2.8346)	Entropy 1.27383 (1.27441)	Top-1 acc 55.859 (55.847)	Top-5 acc 78.906 (78.082)	lr 0.01872
Train [41][300/3239]	Time 0.217 (0.705)	Data Time 0.001 (0.117)	Loss 2.9000 (2.8365)	Entropy 1.27382 (1.27439)	Top-1 acc 51.953 (55.789)	Top-5 acc 75.000 (78.037)	lr 0.01872
Train [41][310/3239]	Time 0.231 (0.697)	Data Time 0.001 (0.114)	Loss 3.0377 (2.8374)	Entropy 1.27365 (1.27437)	Top-1 acc 48.828 (55.768)	Top-5 acc 75.391 (78.002)	lr 0.01872
Train [41][320/3239]	Time 0.206 (0.689)	Data Time 0.002 (0.110)	Loss 2.8199 (2.8356)	Entropy 1.27360 (1.27434)	Top-1 acc 57.422 (55.850)	Top-5 acc 76.562 (78.028)	lr 0.01872
Train [41][330/3239]	Time 0.333 (0.682)	Data Time 0.001 (0.107)	Loss 2.6904 (2.8354)	Entropy 1.27344 (1.27432)	Top-1 acc 61.719 (55.862)	Top-5 acc 80.469 (78.029)	lr 0.01872
Train [41][340/3239]	Time 2.382 (0.676)	Data Time 0.002 (0.104)	Loss 2.8241 (2.8354)	Entropy 1.27344 (1.27429)	Top-1 acc 57.031 (55.857)	Top-5 acc 78.906 (78.039)	lr 0.01872
Train [41][350/3239]	Time 0.266 (0.663)	Data Time 0.001 (0.101)	Loss 2.9289 (2.8343)	Entropy 1.27342 (1.27427)	Top-1 acc 54.297 (55.891)	Top-5 acc 77.344 (78.055)	lr 0.01872
Train [41][360/3239]	Time 0.215 (0.657)	Data Time 0.001 (0.098)	Loss 2.8180 (2.8344)	Entropy 1.27336 (1.27424)	Top-1 acc 52.734 (55.881)	Top-5 acc 78.516 (78.049)	lr 0.01872
Train [41][370/3239]	Time 0.216 (0.652)	Data Time 0.001 (0.095)	Loss 2.8440 (2.8356)	Entropy 1.27324 (1.27422)	Top-1 acc 54.688 (55.858)	Top-5 acc 75.391 (78.013)	lr 0.01872
Train [41][380/3239]	Time 0.307 (0.647)	Data Time 0.001 (0.093)	Loss 2.7432 (2.8357)	Entropy 1.27322 (1.27419)	Top-1 acc 57.031 (55.840)	Top-5 acc 80.469 (78.027)	lr 0.01872
Train [41][390/3239]	Time 0.221 (0.642)	Data Time 0.001 (0.091)	Loss 2.8755 (2.8354)	Entropy 1.27313 (1.27417)	Top-1 acc 56.250 (55.864)	Top-5 acc 77.344 (78.007)	lr 0.01872
Train [41][400/3239]	Time 0.240 (0.638)	Data Time 0.002 (0.088)	Loss 2.9185 (2.8360)	Entropy 1.27306 (1.27414)	Top-1 acc 53.125 (55.824)	Top-5 acc 78.125 (77.988)	lr 0.01871
Train [41][410/3239]	Time 0.219 (0.634)	Data Time 0.001 (0.086)	Loss 2.8203 (2.8371)	Entropy 1.27303 (1.27411)	Top-1 acc 55.078 (55.810)	Top-5 acc 80.469 (77.969)	lr 0.01871
Train [41][420/3239]	Time 0.146 (0.629)	Data Time 0.001 (0.084)	Loss 2.8626 (2.8362)	Entropy 1.27303 (1.27409)	Top-1 acc 54.688 (55.863)	Top-5 acc 75.000 (77.983)	lr 0.01871
Train [41][430/3239]	Time 0.252 (0.625)	Data Time 0.001 (0.083)	Loss 2.7925 (2.8342)	Entropy 1.27291 (1.27406)	Top-1 acc 56.250 (55.933)	Top-5 acc 78.125 (77.999)	lr 0.01871
Train [41][440/3239]	Time 0.261 (0.621)	Data Time 0.001 (0.081)	Loss 3.0567 (2.8341)	Entropy 1.27282 (1.27404)	Top-1 acc 49.609 (55.950)	Top-5 acc 71.094 (77.990)	lr 0.01871
Train [41][450/3239]	Time 2.469 (0.617)	Data Time 0.001 (0.079)	Loss 2.7962 (2.8334)	Entropy 1.27282 (1.27401)	Top-1 acc 57.422 (55.964)	Top-5 acc 78.516 (78.011)	lr 0.01871
Train [41][460/3239]	Time 0.220 (0.609)	Data Time 0.001 (0.077)	Loss 2.8258 (2.8325)	Entropy 1.27281 (1.27398)	Top-1 acc 53.516 (55.986)	Top-5 acc 78.516 (78.042)	lr 0.01871
Train [41][470/3239]	Time 0.220 (0.605)	Data Time 0.001 (0.076)	Loss 3.0126 (2.8334)	Entropy 1.27276 (1.27396)	Top-1 acc 51.562 (55.958)	Top-5 acc 73.438 (78.029)	lr 0.01871
Train [41][480/3239]	Time 0.318 (0.602)	Data Time 0.001 (0.074)	Loss 2.6512 (2.8326)	Entropy 1.27271 (1.27393)	Top-1 acc 60.547 (55.967)	Top-5 acc 81.641 (78.041)	lr 0.01871
Train [41][490/3239]	Time 0.242 (0.599)	Data Time 0.001 (0.073)	Loss 2.8603 (2.8330)	Entropy 1.27261 (1.27390)	Top-1 acc 57.422 (55.968)	Top-5 acc 73.438 (78.036)	lr 0.01871
Train [41][500/3239]	Time 0.232 (0.596)	Data Time 0.001 (0.071)	Loss 2.9640 (2.8332)	Entropy 1.27260 (1.27388)	Top-1 acc 51.562 (55.965)	Top-5 acc 73.047 (78.022)	lr 0.01871
Train [41][510/3239]	Time 0.207 (0.593)	Data Time 0.001 (0.070)	Loss 2.7205 (2.8324)	Entropy 1.27257 (1.27385)	Top-1 acc 56.641 (55.983)	Top-5 acc 80.078 (78.027)	lr 0.01871
Train [41][520/3239]	Time 0.170 (0.590)	Data Time 0.001 (0.069)	Loss 2.8865 (2.8331)	Entropy 1.27247 (1.27383)	Top-1 acc 56.641 (55.978)	Top-5 acc 77.344 (78.016)	lr 0.01870
Train [41][530/3239]	Time 0.318 (0.588)	Data Time 0.001 (0.067)	Loss 2.7665 (2.8344)	Entropy 1.27244 (1.27380)	Top-1 acc 53.906 (55.950)	Top-5 acc 78.516 (77.998)	lr 0.01870
Train [41][540/3239]	Time 0.266 (0.663)	Data Time 0.005 (0.066)	Loss 2.9301 (2.8341)	Entropy 1.27242 (1.27378)	Top-1 acc 56.250 (55.973)	Top-5 acc 77.344 (78.007)	lr 0.01870
Train [41][550/3239]	Time 0.204 (0.662)	Data Time 0.002 (0.065)	Loss 2.8648 (2.8335)	Entropy 1.27237 (1.27375)	Top-1 acc 53.906 (55.988)	Top-5 acc 75.391 (78.014)	lr 0.01870
Train [41][560/3239]	Time 2.413 (0.659)	Data Time 0.002 (0.064)	Loss 2.9283 (2.8341)	Entropy 1.27237 (1.27373)	Top-1 acc 55.078 (55.957)	Top-5 acc 76.562 (78.000)	lr 0.01870
Train [41][570/3239]	Time 0.262 (0.651)	Data Time 0.001 (0.063)	Loss 2.8337 (2.8341)	Entropy 1.27227 (1.27370)	Top-1 acc 60.156 (55.959)	Top-5 acc 76.562 (78.005)	lr 0.01870
Train [41][580/3239]	Time 0.307 (0.648)	Data Time 0.001 (0.062)	Loss 3.1364 (2.8338)	Entropy 1.27222 (1.27368)	Top-1 acc 50.391 (55.966)	Top-5 acc 73.828 (78.019)	lr 0.01870
Train [41][590/3239]	Time 0.211 (0.644)	Data Time 0.002 (0.061)	Loss 2.8540 (2.8337)	Entropy 1.27210 (1.27365)	Top-1 acc 53.125 (55.965)	Top-5 acc 78.125 (78.021)	lr 0.01870
Train [41][600/3239]	Time 0.223 (0.641)	Data Time 0.001 (0.060)	Loss 2.8294 (2.8339)	Entropy 1.27202 (1.27362)	Top-1 acc 52.734 (55.958)	Top-5 acc 77.344 (78.024)	lr 0.01870
Train [41][610/3239]	Time 0.233 (0.638)	Data Time 0.001 (0.059)	Loss 2.9354 (2.8343)	Entropy 1.27200 (1.27360)	Top-1 acc 53.516 (55.939)	Top-5 acc 75.000 (78.014)	lr 0.01870
Train [41][620/3239]	Time 0.218 (0.635)	Data Time 0.001 (0.058)	Loss 2.8609 (2.8332)	Entropy 1.27194 (1.27357)	Top-1 acc 53.125 (55.974)	Top-5 acc 77.344 (78.026)	lr 0.01870
Train [41][630/3239]	Time 0.318 (0.633)	Data Time 0.001 (0.057)	Loss 2.6845 (2.8324)	Entropy 1.27193 (1.27354)	Top-1 acc 57.812 (55.988)	Top-5 acc 80.469 (78.039)	lr 0.01869
Train [41][640/3239]	Time 0.226 (0.630)	Data Time 0.001 (0.056)	Loss 3.0106 (2.8313)	Entropy 1.27183 (1.27352)	Top-1 acc 50.781 (55.991)	Top-5 acc 75.781 (78.070)	lr 0.01869
Train [41][650/3239]	Time 0.217 (0.627)	Data Time 0.001 (0.055)	Loss 2.7652 (2.8313)	Entropy 1.27177 (1.27349)	Top-1 acc 57.812 (56.017)	Top-5 acc 78.516 (78.069)	lr 0.01869
Train [41][660/3239]	Time 0.240 (0.625)	Data Time 0.002 (0.054)	Loss 2.8804 (2.8313)	Entropy 1.27178 (1.27347)	Top-1 acc 56.641 (56.015)	Top-5 acc 77.734 (78.071)	lr 0.01869
Train [41][670/3239]	Time 2.412 (0.622)	Data Time 0.002 (0.054)	Loss 3.0568 (2.8313)	Entropy 1.27178 (1.27344)	Top-1 acc 49.609 (56.005)	Top-5 acc 75.391 (78.071)	lr 0.01869
Train [41][680/3239]	Time 0.230 (0.616)	Data Time 0.002 (0.053)	Loss 2.8759 (2.8316)	Entropy 1.27159 (1.27341)	Top-1 acc 55.859 (56.005)	Top-5 acc 77.344 (78.060)	lr 0.01869
Train [41][690/3239]	Time 0.218 (0.614)	Data Time 0.001 (0.052)	Loss 3.1022 (2.8320)	Entropy 1.27158 (1.27339)	Top-1 acc 48.438 (56.007)	Top-5 acc 75.000 (78.057)	lr 0.01869
Train [41][700/3239]	Time 0.226 (0.612)	Data Time 0.001 (0.051)	Loss 2.7289 (2.8330)	Entropy 1.27153 (1.27336)	Top-1 acc 58.594 (55.983)	Top-5 acc 77.734 (78.028)	lr 0.01869
Train [41][710/3239]	Time 0.170 (0.609)	Data Time 0.001 (0.051)	Loss 2.8962 (2.8332)	Entropy 1.27146 (1.27334)	Top-1 acc 56.641 (55.970)	Top-5 acc 76.562 (78.028)	lr 0.01869
Train [41][720/3239]	Time 0.230 (0.607)	Data Time 0.001 (0.050)	Loss 2.9387 (2.8332)	Entropy 1.27143 (1.27331)	Top-1 acc 52.344 (55.971)	Top-5 acc 74.609 (78.024)	lr 0.01869
Train [41][730/3239]	Time 0.216 (0.605)	Data Time 0.001 (0.050)	Loss 3.0509 (2.8344)	Entropy 1.27141 (1.27328)	Top-1 acc 50.391 (55.951)	Top-5 acc 73.047 (77.990)	lr 0.01869
Train [41][740/3239]	Time 0.197 (0.603)	Data Time 0.001 (0.049)	Loss 3.0136 (2.8345)	Entropy 1.27136 (1.27326)	Top-1 acc 53.906 (55.944)	Top-5 acc 71.484 (77.996)	lr 0.01869
Train [41][750/3239]	Time 0.218 (0.601)	Data Time 0.001 (0.048)	Loss 2.9358 (2.8349)	Entropy 1.27129 (1.27323)	Top-1 acc 56.641 (55.951)	Top-5 acc 76.562 (77.988)	lr 0.01868
Train [41][760/3239]	Time 0.209 (0.599)	Data Time 0.001 (0.048)	Loss 2.8315 (2.8358)	Entropy 1.27121 (1.27321)	Top-1 acc 57.422 (55.945)	Top-5 acc 77.344 (77.961)	lr 0.01868
Train [41][770/3239]	Time 0.259 (0.598)	Data Time 0.001 (0.047)	Loss 2.9724 (2.8351)	Entropy 1.27111 (1.27318)	Top-1 acc 56.250 (55.968)	Top-5 acc 78.125 (77.981)	lr 0.01868
Train [41][780/3239]	Time 2.414 (0.596)	Data Time 0.001 (0.046)	Loss 3.0618 (2.8359)	Entropy 1.27111 (1.27315)	Top-1 acc 50.781 (55.940)	Top-5 acc 74.219 (77.959)	lr 0.01868
Train [41][790/3239]	Time 0.230 (0.591)	Data Time 0.001 (0.046)	Loss 2.7183 (2.8356)	Entropy 1.27110 (1.27313)	Top-1 acc 60.547 (55.955)	Top-5 acc 79.688 (77.960)	lr 0.01868
Train [41][800/3239]	Time 0.235 (0.589)	Data Time 0.001 (0.045)	Loss 2.6576 (2.8353)	Entropy 1.27104 (1.27310)	Top-1 acc 59.766 (55.960)	Top-5 acc 82.812 (77.969)	lr 0.01868
Train [41][810/3239]	Time 0.219 (0.588)	Data Time 0.001 (0.045)	Loss 2.7168 (2.8348)	Entropy 1.27104 (1.27308)	Top-1 acc 59.375 (55.963)	Top-5 acc 78.906 (77.979)	lr 0.01868
Train [41][820/3239]	Time 0.227 (0.586)	Data Time 0.001 (0.044)	Loss 2.7283 (2.8357)	Entropy 1.27104 (1.27305)	Top-1 acc 58.203 (55.942)	Top-5 acc 83.594 (77.972)	lr 0.01868
Train [41][830/3239]	Time 0.222 (0.584)	Data Time 0.001 (0.044)	Loss 2.7779 (2.8366)	Entropy 1.27103 (1.27303)	Top-1 acc 55.078 (55.926)	Top-5 acc 79.297 (77.947)	lr 0.01868
Train [41][840/3239]	Time 0.208 (0.583)	Data Time 0.001 (0.043)	Loss 3.0646 (2.8368)	Entropy 1.27101 (1.27300)	Top-1 acc 46.875 (55.917)	Top-5 acc 73.438 (77.936)	lr 0.01868
Train [41][850/3239]	Time 0.194 (0.581)	Data Time 0.001 (0.043)	Loss 2.9040 (2.8362)	Entropy 1.27096 (1.27298)	Top-1 acc 52.734 (55.920)	Top-5 acc 76.562 (77.953)	lr 0.01868
Train [41][860/3239]	Time 0.229 (0.580)	Data Time 0.001 (0.042)	Loss 2.7677 (2.8361)	Entropy 1.27087 (1.27296)	Top-1 acc 58.203 (55.926)	Top-5 acc 81.250 (77.954)	lr 0.01867
Train [41][870/3239]	Time 0.215 (0.579)	Data Time 0.001 (0.042)	Loss 2.7715 (2.8362)	Entropy 1.27093 (1.27293)	Top-1 acc 57.031 (55.919)	Top-5 acc 77.734 (77.954)	lr 0.01867
Train [41][880/3239]	Time 0.264 (0.577)	Data Time 0.002 (0.041)	Loss 2.7911 (2.8359)	Entropy 1.27091 (1.27291)	Top-1 acc 57.422 (55.928)	Top-5 acc 81.641 (77.960)	lr 0.01867
Train [41][890/3239]	Time 2.308 (0.576)	Data Time 0.001 (0.041)	Loss 2.8098 (2.8361)	Entropy 1.27091 (1.27289)	Top-1 acc 58.594 (55.924)	Top-5 acc 76.953 (77.949)	lr 0.01867
Train [41][900/3239]	Time 0.234 (0.572)	Data Time 0.002 (0.040)	Loss 2.8510 (2.8360)	Entropy 1.27089 (1.27287)	Top-1 acc 57.422 (55.924)	Top-5 acc 77.344 (77.957)	lr 0.01867
Train [41][910/3239]	Time 0.369 (0.617)	Data Time 0.004 (0.040)	Loss 2.7807 (2.8362)	Entropy 1.27109 (1.27285)	Top-1 acc 59.766 (55.919)	Top-5 acc 81.250 (77.955)	lr 0.01867
Train [41][920/3239]	Time 0.243 (0.616)	Data Time 0.002 (0.040)	Loss 2.9766 (2.8367)	Entropy 1.27101 (1.27283)	Top-1 acc 55.859 (55.906)	Top-5 acc 73.438 (77.948)	lr 0.01867
Train [41][930/3239]	Time 0.278 (0.614)	Data Time 0.003 (0.039)	Loss 2.7257 (2.8374)	Entropy 1.27099 (1.27281)	Top-1 acc 58.984 (55.893)	Top-5 acc 80.859 (77.932)	lr 0.01867
Train [41][940/3239]	Time 0.242 (0.613)	Data Time 0.001 (0.039)	Loss 2.8985 (2.8376)	Entropy 1.27086 (1.27279)	Top-1 acc 54.297 (55.891)	Top-5 acc 76.562 (77.935)	lr 0.01867
Train [41][950/3239]	Time 0.218 (0.611)	Data Time 0.001 (0.038)	Loss 2.9341 (2.8378)	Entropy 1.27074 (1.27276)	Top-1 acc 54.688 (55.885)	Top-5 acc 77.734 (77.937)	lr 0.01867
Train [41][960/3239]	Time 0.226 (0.609)	Data Time 0.001 (0.038)	Loss 2.6473 (2.8372)	Entropy 1.27071 (1.27274)	Top-1 acc 62.500 (55.897)	Top-5 acc 80.078 (77.942)	lr 0.01867
Train [41][970/3239]	Time 0.210 (0.608)	Data Time 0.001 (0.038)	Loss 2.8224 (2.8374)	Entropy 1.27063 (1.27272)	Top-1 acc 53.516 (55.902)	Top-5 acc 78.516 (77.942)	lr 0.01866
Train [41][980/3239]	Time 0.215 (0.606)	Data Time 0.002 (0.037)	Loss 2.8656 (2.8376)	Entropy 1.27060 (1.27270)	Top-1 acc 56.250 (55.905)	Top-5 acc 79.297 (77.946)	lr 0.01866
Train [41][990/3239]	Time 0.199 (0.605)	Data Time 0.001 (0.037)	Loss 3.0171 (2.8375)	Entropy 1.27050 (1.27268)	Top-1 acc 52.344 (55.898)	Top-5 acc 75.781 (77.950)	lr 0.01866
Train [41][1000/3239]	Time 2.540 (0.603)	Data Time 0.004 (0.037)	Loss 2.9850 (2.8377)	Entropy 1.27050 (1.27266)	Top-1 acc 53.906 (55.898)	Top-5 acc 73.828 (77.947)	lr 0.01866
Train [41][1010/3239]	Time 0.214 (0.600)	Data Time 0.001 (0.036)	Loss 2.9088 (2.8376)	Entropy 1.27051 (1.27264)	Top-1 acc 54.297 (55.901)	Top-5 acc 77.734 (77.945)	lr 0.01866
Train [41][1020/3239]	Time 0.223 (0.598)	Data Time 0.001 (0.036)	Loss 2.8557 (2.8377)	Entropy 1.27074 (1.27262)	Top-1 acc 52.734 (55.905)	Top-5 acc 72.266 (77.943)	lr 0.01866
Train [41][1030/3239]	Time 0.254 (0.597)	Data Time 0.001 (0.036)	Loss 2.9171 (2.8379)	Entropy 1.27071 (1.27260)	Top-1 acc 56.641 (55.891)	Top-5 acc 78.125 (77.937)	lr 0.01866
Train [41][1040/3239]	Time 0.225 (0.596)	Data Time 0.001 (0.035)	Loss 2.7214 (2.8376)	Entropy 1.27068 (1.27258)	Top-1 acc 62.500 (55.901)	Top-5 acc 79.688 (77.945)	lr 0.01866
Train [41][1050/3239]	Time 0.216 (0.594)	Data Time 0.001 (0.035)	Loss 2.7747 (2.8382)	Entropy 1.27066 (1.27256)	Top-1 acc 58.203 (55.886)	Top-5 acc 79.688 (77.925)	lr 0.01866
Train [41][1060/3239]	Time 0.230 (0.593)	Data Time 0.001 (0.035)	Loss 2.6805 (2.8381)	Entropy 1.27058 (1.27254)	Top-1 acc 60.547 (55.891)	Top-5 acc 82.031 (77.926)	lr 0.01866
Train [41][1070/3239]	Time 0.218 (0.591)	Data Time 0.001 (0.034)	Loss 2.7593 (2.8381)	Entropy 1.27060 (1.27253)	Top-1 acc 59.766 (55.888)	Top-5 acc 78.906 (77.919)	lr 0.01866
Train [41][1080/3239]	Time 0.317 (0.590)	Data Time 0.001 (0.034)	Loss 2.9271 (2.8378)	Entropy 1.27059 (1.27251)	Top-1 acc 55.469 (55.896)	Top-5 acc 75.781 (77.930)	lr 0.01866
Train [41][1090/3239]	Time 0.226 (0.589)	Data Time 0.001 (0.034)	Loss 3.0015 (2.8376)	Entropy 1.27055 (1.27249)	Top-1 acc 50.391 (55.895)	Top-5 acc 74.609 (77.928)	lr 0.01865
Train [41][1100/3239]	Time 0.221 (0.588)	Data Time 0.001 (0.033)	Loss 2.9372 (2.8376)	Entropy 1.27054 (1.27247)	Top-1 acc 51.172 (55.893)	Top-5 acc 77.734 (77.928)	lr 0.01865
Train [41][1110/3239]	Time 2.405 (0.587)	Data Time 0.001 (0.033)	Loss 2.8072 (2.8375)	Entropy 1.27054 (1.27246)	Top-1 acc 59.375 (55.889)	Top-5 acc 77.734 (77.926)	lr 0.01865
Train [41][1120/3239]	Time 0.258 (0.584)	Data Time 0.002 (0.033)	Loss 2.9554 (2.8371)	Entropy 1.27051 (1.27244)	Top-1 acc 53.125 (55.898)	Top-5 acc 77.734 (77.939)	lr 0.01865
Train [41][1130/3239]	Time 0.274 (0.582)	Data Time 0.001 (0.033)	Loss 3.0831 (2.8380)	Entropy 1.27036 (1.27242)	Top-1 acc 47.656 (55.883)	Top-5 acc 69.141 (77.917)	lr 0.01865
Train [41][1140/3239]	Time 0.221 (0.581)	Data Time 0.001 (0.032)	Loss 2.7166 (2.8380)	Entropy 1.27026 (1.27240)	Top-1 acc 60.547 (55.888)	Top-5 acc 79.688 (77.909)	lr 0.01865
Train [41][1150/3239]	Time 0.236 (0.580)	Data Time 0.001 (0.032)	Loss 2.8079 (2.8380)	Entropy 1.27021 (1.27238)	Top-1 acc 54.688 (55.892)	Top-5 acc 79.688 (77.912)	lr 0.01865
Train [41][1160/3239]	Time 0.228 (0.579)	Data Time 0.001 (0.032)	Loss 2.8574 (2.8377)	Entropy 1.27008 (1.27236)	Top-1 acc 51.172 (55.897)	Top-5 acc 78.516 (77.919)	lr 0.01865
Train [41][1170/3239]	Time 0.200 (0.578)	Data Time 0.001 (0.032)	Loss 2.9380 (2.8378)	Entropy 1.27004 (1.27234)	Top-1 acc 50.391 (55.892)	Top-5 acc 75.391 (77.920)	lr 0.01865
Train [41][1180/3239]	Time 0.329 (0.577)	Data Time 0.001 (0.031)	Loss 2.9002 (2.8378)	Entropy 1.27002 (1.27232)	Top-1 acc 55.078 (55.894)	Top-5 acc 77.734 (77.921)	lr 0.01865
Train [41][1190/3239]	Time 0.216 (0.576)	Data Time 0.001 (0.031)	Loss 2.8721 (2.8377)	Entropy 1.27002 (1.27230)	Top-1 acc 52.734 (55.888)	Top-5 acc 79.297 (77.923)	lr 0.01865
Train [41][1200/3239]	Time 0.213 (0.575)	Data Time 0.001 (0.031)	Loss 2.7419 (2.8373)	Entropy 1.27001 (1.27229)	Top-1 acc 59.766 (55.896)	Top-5 acc 79.297 (77.932)	lr 0.01864
Train [41][1210/3239]	Time 0.227 (0.574)	Data Time 0.001 (0.031)	Loss 3.0159 (2.8381)	Entropy 1.26995 (1.27227)	Top-1 acc 54.688 (55.883)	Top-5 acc 76.172 (77.919)	lr 0.01864
Train [41][1220/3239]	Time 2.552 (0.573)	Data Time 0.001 (0.030)	Loss 2.9976 (2.8383)	Entropy 1.26995 (1.27225)	Top-1 acc 55.078 (55.887)	Top-5 acc 76.172 (77.916)	lr 0.01864
Train [41][1230/3239]	Time 0.238 (0.570)	Data Time 0.001 (0.030)	Loss 2.7761 (2.8381)	Entropy 1.26996 (1.27223)	Top-1 acc 55.859 (55.889)	Top-5 acc 80.469 (77.918)	lr 0.01864
Train [41][1240/3239]	Time 0.222 (0.569)	Data Time 0.001 (0.030)	Loss 2.7848 (2.8381)	Entropy 1.26995 (1.27221)	Top-1 acc 57.031 (55.888)	Top-5 acc 82.031 (77.924)	lr 0.01864
Train [41][1250/3239]	Time 0.225 (0.569)	Data Time 0.001 (0.030)	Loss 2.9345 (2.8384)	Entropy 1.26994 (1.27219)	Top-1 acc 57.031 (55.884)	Top-5 acc 76.562 (77.913)	lr 0.01864
Train [41][1260/3239]	Time 0.215 (0.568)	Data Time 0.001 (0.029)	Loss 2.6697 (2.8381)	Entropy 1.26979 (1.27217)	Top-1 acc 59.766 (55.895)	Top-5 acc 81.641 (77.921)	lr 0.01864
Train [41][1270/3239]	Time 0.253 (0.603)	Data Time 0.002 (0.029)	Loss 2.9211 (2.8384)	Entropy 1.26978 (1.27216)	Top-1 acc 56.250 (55.896)	Top-5 acc 77.734 (77.918)	lr 0.01864
Train [41][1280/3239]	Time 0.204 (0.602)	Data Time 0.002 (0.029)	Loss 2.9133 (2.8387)	Entropy 1.26977 (1.27214)	Top-1 acc 50.391 (55.888)	Top-5 acc 76.562 (77.902)	lr 0.01864
Train [41][1290/3239]	Time 0.225 (0.601)	Data Time 0.001 (0.029)	Loss 2.7787 (2.8388)	Entropy 1.26973 (1.27212)	Top-1 acc 56.641 (55.891)	Top-5 acc 79.688 (77.903)	lr 0.01864
Train [41][1300/3239]	Time 0.233 (0.600)	Data Time 0.001 (0.029)	Loss 2.8421 (2.8389)	Entropy 1.26965 (1.27210)	Top-1 acc 56.250 (55.886)	Top-5 acc 77.734 (77.896)	lr 0.01864
Train [41][1310/3239]	Time 0.206 (0.599)	Data Time 0.001 (0.028)	Loss 2.7133 (2.8384)	Entropy 1.26954 (1.27208)	Top-1 acc 57.812 (55.892)	Top-5 acc 78.906 (77.905)	lr 0.01864
Train [41][1320/3239]	Time 0.242 (0.597)	Data Time 0.001 (0.028)	Loss 2.9940 (2.8385)	Entropy 1.26955 (1.27206)	Top-1 acc 54.297 (55.890)	Top-5 acc 76.172 (77.901)	lr 0.01863
Train [41][1330/3239]	Time 2.519 (0.597)	Data Time 0.001 (0.028)	Loss 2.7354 (2.8386)	Entropy 1.26955 (1.27204)	Top-1 acc 57.031 (55.890)	Top-5 acc 80.469 (77.905)	lr 0.01863
Train [41][1340/3239]	Time 0.227 (0.594)	Data Time 0.001 (0.028)	Loss 2.8987 (2.8385)	Entropy 1.26948 (1.27202)	Top-1 acc 55.469 (55.894)	Top-5 acc 75.781 (77.906)	lr 0.01863
Train [41][1350/3239]	Time 0.241 (0.593)	Data Time 0.001 (0.028)	Loss 2.7281 (2.8385)	Entropy 1.26941 (1.27200)	Top-1 acc 54.297 (55.895)	Top-5 acc 79.688 (77.910)	lr 0.01863
Train [41][1360/3239]	Time 0.239 (0.592)	Data Time 0.001 (0.027)	Loss 2.8508 (2.8389)	Entropy 1.26935 (1.27198)	Top-1 acc 53.516 (55.890)	Top-5 acc 78.906 (77.908)	lr 0.01863
Train [41][1370/3239]	Time 0.213 (0.591)	Data Time 0.001 (0.027)	Loss 3.0373 (2.8389)	Entropy 1.26930 (1.27197)	Top-1 acc 51.562 (55.895)	Top-5 acc 75.391 (77.910)	lr 0.01863
Train [41][1380/3239]	Time 0.256 (0.590)	Data Time 0.002 (0.027)	Loss 2.7132 (2.8388)	Entropy 1.26925 (1.27195)	Top-1 acc 58.203 (55.896)	Top-5 acc 77.734 (77.912)	lr 0.01863
Train [41][1390/3239]	Time 0.224 (0.589)	Data Time 0.001 (0.027)	Loss 2.6565 (2.8388)	Entropy 1.26914 (1.27193)	Top-1 acc 58.203 (55.892)	Top-5 acc 82.031 (77.910)	lr 0.01863
Train [41][1400/3239]	Time 0.214 (0.588)	Data Time 0.001 (0.027)	Loss 2.7585 (2.8386)	Entropy 1.26906 (1.27191)	Top-1 acc 55.469 (55.898)	Top-5 acc 78.516 (77.916)	lr 0.01863
Train [41][1410/3239]	Time 0.211 (0.587)	Data Time 0.001 (0.026)	Loss 2.8871 (2.8385)	Entropy 1.26901 (1.27189)	Top-1 acc 51.953 (55.903)	Top-5 acc 75.781 (77.917)	lr 0.01863
Train [41][1420/3239]	Time 0.224 (0.586)	Data Time 0.001 (0.026)	Loss 3.0859 (2.8384)	Entropy 1.26894 (1.27187)	Top-1 acc 52.344 (55.903)	Top-5 acc 70.703 (77.923)	lr 0.01863
Train [41][1430/3239]	Time 0.254 (0.585)	Data Time 0.001 (0.026)	Loss 2.6533 (2.8385)	Entropy 1.26894 (1.27184)	Top-1 acc 58.984 (55.902)	Top-5 acc 84.766 (77.920)	lr 0.01862
Train [41][1440/3239]	Time 2.462 (0.584)	Data Time 0.001 (0.026)	Loss 2.7224 (2.8384)	Entropy 1.26894 (1.27182)	Top-1 acc 56.250 (55.911)	Top-5 acc 78.906 (77.918)	lr 0.01862
Train [41][1450/3239]	Time 0.223 (0.582)	Data Time 0.001 (0.026)	Loss 2.6497 (2.8380)	Entropy 1.26882 (1.27180)	Top-1 acc 63.281 (55.926)	Top-5 acc 82.812 (77.928)	lr 0.01862
Train [41][1460/3239]	Time 0.275 (0.581)	Data Time 0.002 (0.026)	Loss 2.7912 (2.8377)	Entropy 1.26882 (1.27178)	Top-1 acc 60.547 (55.942)	Top-5 acc 78.516 (77.934)	lr 0.01862
Train [41][1470/3239]	Time 0.223 (0.580)	Data Time 0.001 (0.025)	Loss 2.8147 (2.8382)	Entropy 1.26875 (1.27176)	Top-1 acc 54.688 (55.925)	Top-5 acc 76.172 (77.928)	lr 0.01862
Train [41][1480/3239]	Time 0.213 (0.579)	Data Time 0.001 (0.025)	Loss 2.7965 (2.8384)	Entropy 1.26876 (1.27174)	Top-1 acc 56.250 (55.921)	Top-5 acc 78.125 (77.916)	lr 0.01862
Train [41][1490/3239]	Time 0.214 (0.578)	Data Time 0.001 (0.025)	Loss 3.1242 (2.8387)	Entropy 1.26872 (1.27172)	Top-1 acc 51.562 (55.915)	Top-5 acc 73.047 (77.907)	lr 0.01862
Train [41][1500/3239]	Time 0.203 (0.577)	Data Time 0.001 (0.025)	Loss 2.9216 (2.8387)	Entropy 1.26873 (1.27170)	Top-1 acc 53.906 (55.913)	Top-5 acc 76.953 (77.900)	lr 0.01862
Train [41][1510/3239]	Time 0.225 (0.577)	Data Time 0.001 (0.025)	Loss 2.7210 (2.8388)	Entropy 1.26873 (1.27168)	Top-1 acc 57.031 (55.915)	Top-5 acc 81.250 (77.900)	lr 0.01862
Train [41][1520/3239]	Time 0.218 (0.576)	Data Time 0.001 (0.025)	Loss 2.8905 (2.8389)	Entropy 1.26869 (1.27166)	Top-1 acc 55.859 (55.908)	Top-5 acc 76.172 (77.896)	lr 0.01862
Train [41][1530/3239]	Time 0.230 (0.575)	Data Time 0.001 (0.024)	Loss 2.7538 (2.8390)	Entropy 1.26862 (1.27164)	Top-1 acc 56.641 (55.906)	Top-5 acc 80.859 (77.896)	lr 0.01862
Train [41][1540/3239]	Time 0.238 (0.574)	Data Time 0.001 (0.024)	Loss 2.9042 (2.8390)	Entropy 1.26858 (1.27162)	Top-1 acc 53.516 (55.900)	Top-5 acc 77.734 (77.896)	lr 0.01861
Train [41][1550/3239]	Time 2.397 (0.574)	Data Time 0.001 (0.024)	Loss 3.0450 (2.8395)	Entropy 1.26858 (1.27160)	Top-1 acc 50.391 (55.888)	Top-5 acc 75.000 (77.890)	lr 0.01861
Train [41][1560/3239]	Time 0.229 (0.571)	Data Time 0.001 (0.024)	Loss 2.6286 (2.8391)	Entropy 1.26858 (1.27159)	Top-1 acc 60.547 (55.895)	Top-5 acc 81.641 (77.896)	lr 0.01861
Train [41][1570/3239]	Time 0.255 (0.571)	Data Time 0.001 (0.024)	Loss 2.9082 (2.8393)	Entropy 1.26853 (1.27157)	Top-1 acc 54.297 (55.895)	Top-5 acc 75.000 (77.895)	lr 0.01861
Train [41][1580/3239]	Time 0.217 (0.570)	Data Time 0.001 (0.024)	Loss 2.8768 (2.8396)	Entropy 1.26856 (1.27155)	Top-1 acc 57.422 (55.893)	Top-5 acc 75.000 (77.887)	lr 0.01861
Train [41][1590/3239]	Time 0.210 (0.569)	Data Time 0.001 (0.024)	Loss 2.8344 (2.8395)	Entropy 1.26852 (1.27153)	Top-1 acc 56.250 (55.894)	Top-5 acc 75.781 (77.888)	lr 0.01861
Train [41][1600/3239]	Time 0.242 (0.569)	Data Time 0.001 (0.023)	Loss 2.7777 (2.8399)	Entropy 1.26843 (1.27151)	Top-1 acc 55.078 (55.878)	Top-5 acc 76.953 (77.879)	lr 0.01861
Train [41][1610/3239]	Time 0.239 (0.568)	Data Time 0.001 (0.023)	Loss 3.1920 (2.8399)	Entropy 1.26840 (1.27149)	Top-1 acc 45.312 (55.873)	Top-5 acc 70.312 (77.879)	lr 0.01861
Train [41][1620/3239]	Time 0.213 (0.567)	Data Time 0.001 (0.023)	Loss 2.7647 (2.8398)	Entropy 1.26834 (1.27147)	Top-1 acc 54.688 (55.876)	Top-5 acc 82.031 (77.881)	lr 0.01861
Train [41][1630/3239]	Time 0.450 (0.594)	Data Time 0.002 (0.023)	Loss 2.7399 (2.8396)	Entropy 1.26831 (1.27145)	Top-1 acc 58.594 (55.882)	Top-5 acc 80.469 (77.883)	lr 0.01861
Train [41][1640/3239]	Time 0.235 (0.593)	Data Time 0.002 (0.023)	Loss 2.6849 (2.8396)	Entropy 1.26828 (1.27143)	Top-1 acc 64.062 (55.880)	Top-5 acc 82.031 (77.884)	lr 0.01861
Train [41][1650/3239]	Time 0.262 (0.592)	Data Time 0.002 (0.023)	Loss 2.8324 (2.8399)	Entropy 1.26816 (1.27141)	Top-1 acc 56.641 (55.879)	Top-5 acc 75.781 (77.872)	lr 0.01861
Train [41][1660/3239]	Time 2.450 (0.591)	Data Time 0.002 (0.023)	Loss 2.9119 (2.8401)	Entropy 1.26816 (1.27139)	Top-1 acc 55.469 (55.879)	Top-5 acc 78.906 (77.867)	lr 0.01860
Train [41][1670/3239]	Time 0.233 (0.589)	Data Time 0.001 (0.023)	Loss 2.8149 (2.8399)	Entropy 1.26810 (1.27137)	Top-1 acc 57.422 (55.882)	Top-5 acc 79.297 (77.869)	lr 0.01860
Train [41][1680/3239]	Time 0.278 (0.589)	Data Time 0.001 (0.022)	Loss 2.8480 (2.8400)	Entropy 1.26808 (1.27135)	Top-1 acc 55.469 (55.887)	Top-5 acc 75.000 (77.864)	lr 0.01860
Train [41][1690/3239]	Time 0.233 (0.588)	Data Time 0.001 (0.022)	Loss 2.9092 (2.8400)	Entropy 1.26794 (1.27133)	Top-1 acc 56.641 (55.889)	Top-5 acc 78.906 (77.864)	lr 0.01860
Train [41][1700/3239]	Time 0.214 (0.587)	Data Time 0.001 (0.022)	Loss 2.9275 (2.8400)	Entropy 1.26784 (1.27131)	Top-1 acc 55.859 (55.896)	Top-5 acc 71.875 (77.864)	lr 0.01860
Train [41][1710/3239]	Time 0.210 (0.586)	Data Time 0.001 (0.022)	Loss 2.7641 (2.8397)	Entropy 1.26782 (1.27129)	Top-1 acc 55.469 (55.891)	Top-5 acc 78.906 (77.870)	lr 0.01860
Train [41][1720/3239]	Time 0.212 (0.585)	Data Time 0.001 (0.022)	Loss 2.7879 (2.8397)	Entropy 1.26774 (1.27127)	Top-1 acc 58.984 (55.891)	Top-5 acc 78.125 (77.874)	lr 0.01860
Train [41][1730/3239]	Time 0.199 (0.585)	Data Time 0.001 (0.022)	Loss 2.8495 (2.8398)	Entropy 1.26765 (1.27125)	Top-1 acc 55.859 (55.886)	Top-5 acc 76.953 (77.870)	lr 0.01860
Train [41][1740/3239]	Time 0.231 (0.584)	Data Time 0.001 (0.022)	Loss 2.7106 (2.8396)	Entropy 1.26763 (1.27123)	Top-1 acc 56.641 (55.885)	Top-5 acc 77.344 (77.873)	lr 0.01860
Train [41][1750/3239]	Time 0.210 (0.583)	Data Time 0.001 (0.022)	Loss 2.6323 (2.8393)	Entropy 1.26760 (1.27121)	Top-1 acc 58.203 (55.892)	Top-5 acc 81.641 (77.878)	lr 0.01860
Train [41][1760/3239]	Time 0.268 (0.582)	Data Time 0.001 (0.022)	Loss 2.7449 (2.8390)	Entropy 1.26757 (1.27119)	Top-1 acc 58.203 (55.894)	Top-5 acc 80.469 (77.884)	lr 0.01860
Train [41][1770/3239]	Time 2.529 (0.582)	Data Time 0.001 (0.021)	Loss 2.7455 (2.8390)	Entropy 1.26757 (1.27117)	Top-1 acc 58.594 (55.900)	Top-5 acc 78.906 (77.881)	lr 0.01859
Train [41][1780/3239]	Time 0.232 (0.580)	Data Time 0.001 (0.021)	Loss 3.0298 (2.8389)	Entropy 1.26751 (1.27115)	Top-1 acc 48.438 (55.901)	Top-5 acc 75.000 (77.880)	lr 0.01859
Train [41][1790/3239]	Time 0.327 (0.579)	Data Time 0.001 (0.021)	Loss 2.9408 (2.8388)	Entropy 1.26749 (1.27113)	Top-1 acc 56.641 (55.902)	Top-5 acc 74.609 (77.883)	lr 0.01859
Train [41][1800/3239]	Time 0.209 (0.579)	Data Time 0.001 (0.021)	Loss 2.9656 (2.8387)	Entropy 1.26738 (1.27111)	Top-1 acc 51.172 (55.902)	Top-5 acc 75.391 (77.884)	lr 0.01859
Train [41][1810/3239]	Time 0.231 (0.578)	Data Time 0.001 (0.021)	Loss 2.8435 (2.8388)	Entropy 1.26736 (1.27109)	Top-1 acc 55.078 (55.903)	Top-5 acc 77.734 (77.883)	lr 0.01859
Train [41][1820/3239]	Time 0.232 (0.577)	Data Time 0.001 (0.021)	Loss 2.9316 (2.8391)	Entropy 1.26734 (1.27107)	Top-1 acc 54.297 (55.894)	Top-5 acc 75.000 (77.878)	lr 0.01859
Train [41][1830/3239]	Time 0.231 (0.577)	Data Time 0.001 (0.021)	Loss 2.8339 (2.8392)	Entropy 1.26729 (1.27105)	Top-1 acc 56.250 (55.889)	Top-5 acc 80.859 (77.877)	lr 0.01859
Train [41][1840/3239]	Time 0.322 (0.576)	Data Time 0.001 (0.021)	Loss 2.7897 (2.8395)	Entropy 1.26733 (1.27103)	Top-1 acc 55.469 (55.876)	Top-5 acc 77.734 (77.870)	lr 0.01859
Train [41][1850/3239]	Time 0.182 (0.576)	Data Time 0.001 (0.021)	Loss 2.6912 (2.8396)	Entropy 1.26727 (1.27101)	Top-1 acc 59.375 (55.872)	Top-5 acc 78.906 (77.867)	lr 0.01859
Train [41][1860/3239]	Time 0.227 (0.575)	Data Time 0.001 (0.020)	Loss 2.7458 (2.8396)	Entropy 1.26714 (1.27099)	Top-1 acc 55.078 (55.870)	Top-5 acc 81.250 (77.871)	lr 0.01859
Train [41][1870/3239]	Time 0.271 (0.574)	Data Time 0.001 (0.020)	Loss 2.9726 (2.8396)	Entropy 1.26714 (1.27097)	Top-1 acc 55.859 (55.866)	Top-5 acc 73.828 (77.867)	lr 0.01859
Train [41][1880/3239]	Time 2.472 (0.574)	Data Time 0.001 (0.020)	Loss 2.7257 (2.8396)	Entropy 1.26714 (1.27094)	Top-1 acc 57.031 (55.861)	Top-5 acc 80.078 (77.868)	lr 0.01858
Train [41][1890/3239]	Time 0.322 (0.572)	Data Time 0.001 (0.020)	Loss 2.8125 (2.8400)	Entropy 1.26712 (1.27092)	Top-1 acc 55.469 (55.859)	Top-5 acc 78.125 (77.860)	lr 0.01858
Train [41][1900/3239]	Time 0.222 (0.571)	Data Time 0.001 (0.020)	Loss 2.9972 (2.8399)	Entropy 1.26700 (1.27090)	Top-1 acc 51.172 (55.855)	Top-5 acc 77.344 (77.863)	lr 0.01858
Train [41][1910/3239]	Time 0.210 (0.571)	Data Time 0.001 (0.020)	Loss 2.8075 (2.8398)	Entropy 1.26696 (1.27088)	Top-1 acc 54.297 (55.854)	Top-5 acc 79.297 (77.866)	lr 0.01858
Train [41][1920/3239]	Time 0.209 (0.570)	Data Time 0.001 (0.020)	Loss 2.9912 (2.8399)	Entropy 1.26685 (1.27086)	Top-1 acc 48.047 (55.844)	Top-5 acc 76.953 (77.866)	lr 0.01858
Train [41][1930/3239]	Time 0.218 (0.569)	Data Time 0.001 (0.020)	Loss 2.7934 (2.8403)	Entropy 1.26684 (1.27084)	Top-1 acc 56.250 (55.831)	Top-5 acc 79.297 (77.856)	lr 0.01858
Train [41][1940/3239]	Time 0.277 (0.569)	Data Time 0.001 (0.020)	Loss 2.8883 (2.8402)	Entropy 1.26681 (1.27082)	Top-1 acc 55.078 (55.830)	Top-5 acc 76.172 (77.857)	lr 0.01858
Train [41][1950/3239]	Time 0.212 (0.568)	Data Time 0.001 (0.020)	Loss 2.8827 (2.8404)	Entropy 1.26680 (1.27080)	Top-1 acc 54.297 (55.824)	Top-5 acc 77.344 (77.854)	lr 0.01858
Train [41][1960/3239]	Time 0.217 (0.568)	Data Time 0.001 (0.020)	Loss 2.9929 (2.8406)	Entropy 1.26678 (1.27078)	Top-1 acc 53.516 (55.823)	Top-5 acc 74.219 (77.854)	lr 0.01858
Train [41][1970/3239]	Time 0.268 (0.567)	Data Time 0.002 (0.019)	Loss 2.8399 (2.8405)	Entropy 1.26675 (1.27076)	Top-1 acc 57.031 (55.825)	Top-5 acc 78.125 (77.857)	lr 0.01858
Train [41][1980/3239]	Time 0.262 (0.567)	Data Time 0.001 (0.019)	Loss 2.9856 (2.8407)	Entropy 1.26669 (1.27074)	Top-1 acc 54.297 (55.823)	Top-5 acc 72.656 (77.853)	lr 0.01858
Train [41][1990/3239]	Time 49.950 (0.590)	Data Time 0.002 (0.019)	Loss 2.8418 (2.8404)	Entropy 1.26669 (1.27072)	Top-1 acc 56.641 (55.828)	Top-5 acc 78.516 (77.859)	lr 0.01858
Train [41][2000/3239]	Time 0.233 (0.589)	Data Time 0.002 (0.019)	Loss 2.9013 (2.8406)	Entropy 1.26645 (1.27070)	Top-1 acc 54.688 (55.821)	Top-5 acc 76.953 (77.856)	lr 0.01857
Train [41][2010/3239]	Time 0.210 (0.588)	Data Time 0.002 (0.019)	Loss 2.8912 (2.8406)	Entropy 1.26645 (1.27068)	Top-1 acc 56.250 (55.820)	Top-5 acc 75.391 (77.857)	lr 0.01857
Train [41][2020/3239]	Time 0.240 (0.587)	Data Time 0.002 (0.019)	Loss 2.6278 (2.8412)	Entropy 1.26641 (1.27066)	Top-1 acc 59.766 (55.809)	Top-5 acc 83.984 (77.845)	lr 0.01857
Train [41][2030/3239]	Time 0.251 (0.587)	Data Time 0.001 (0.019)	Loss 2.8261 (2.8412)	Entropy 1.26643 (1.27063)	Top-1 acc 53.906 (55.808)	Top-5 acc 78.125 (77.843)	lr 0.01857
Train [41][2040/3239]	Time 0.262 (0.586)	Data Time 0.001 (0.019)	Loss 2.5911 (2.8412)	Entropy 1.26641 (1.27061)	Top-1 acc 57.422 (55.809)	Top-5 acc 84.375 (77.843)	lr 0.01857
Train [41][2050/3239]	Time 0.210 (0.586)	Data Time 0.001 (0.019)	Loss 2.9085 (2.8413)	Entropy 1.26630 (1.27059)	Top-1 acc 56.250 (55.805)	Top-5 acc 77.734 (77.840)	lr 0.01857
Train [41][2060/3239]	Time 0.234 (0.585)	Data Time 0.001 (0.019)	Loss 2.8122 (2.8413)	Entropy 1.26627 (1.27057)	Top-1 acc 55.859 (55.804)	Top-5 acc 79.688 (77.841)	lr 0.01857
Train [41][2070/3239]	Time 0.219 (0.584)	Data Time 0.002 (0.019)	Loss 3.0085 (2.8416)	Entropy 1.26619 (1.27055)	Top-1 acc 50.781 (55.800)	Top-5 acc 75.391 (77.838)	lr 0.01857
Train [41][2080/3239]	Time 0.191 (0.584)	Data Time 0.001 (0.019)	Loss 2.7749 (2.8416)	Entropy 1.26613 (1.27053)	Top-1 acc 55.859 (55.795)	Top-5 acc 78.906 (77.840)	lr 0.01857
Train [41][2090/3239]	Time 0.218 (0.583)	Data Time 0.001 (0.018)	Loss 2.8157 (2.8412)	Entropy 1.26611 (1.27051)	Top-1 acc 56.250 (55.802)	Top-5 acc 77.344 (77.849)	lr 0.01857
Train [41][2100/3239]	Time 2.544 (0.583)	Data Time 0.001 (0.018)	Loss 2.9209 (2.8413)	Entropy 1.26611 (1.27049)	Top-1 acc 50.781 (55.799)	Top-5 acc 75.391 (77.847)	lr 0.01857
Train [41][2110/3239]	Time 0.173 (0.581)	Data Time 0.001 (0.018)	Loss 3.1635 (2.8415)	Entropy 1.26609 (1.27047)	Top-1 acc 48.828 (55.793)	Top-5 acc 71.484 (77.846)	lr 0.01856
Train [41][2120/3239]	Time 0.234 (0.581)	Data Time 0.001 (0.018)	Loss 3.0387 (2.8417)	Entropy 1.26609 (1.27045)	Top-1 acc 49.219 (55.784)	Top-5 acc 74.219 (77.839)	lr 0.01856
Train [41][2130/3239]	Time 0.214 (0.580)	Data Time 0.001 (0.018)	Loss 2.8277 (2.8420)	Entropy 1.26610 (1.27043)	Top-1 acc 56.641 (55.779)	Top-5 acc 76.172 (77.832)	lr 0.01856
Train [41][2140/3239]	Time 0.225 (0.579)	Data Time 0.001 (0.018)	Loss 2.9648 (2.8421)	Entropy 1.26591 (1.27041)	Top-1 acc 52.344 (55.777)	Top-5 acc 75.781 (77.831)	lr 0.01856
Train [41][2150/3239]	Time 0.297 (0.579)	Data Time 0.001 (0.018)	Loss 2.9895 (2.8424)	Entropy 1.26585 (1.27038)	Top-1 acc 50.000 (55.767)	Top-5 acc 74.609 (77.828)	lr 0.01856
Train [41][2160/3239]	Time 0.220 (0.578)	Data Time 0.001 (0.018)	Loss 2.7688 (2.8424)	Entropy 1.26581 (1.27036)	Top-1 acc 56.641 (55.764)	Top-5 acc 81.641 (77.827)	lr 0.01856
Train [41][2170/3239]	Time 0.224 (0.578)	Data Time 0.001 (0.018)	Loss 2.9288 (2.8423)	Entropy 1.26580 (1.27034)	Top-1 acc 52.734 (55.765)	Top-5 acc 78.516 (77.826)	lr 0.01856
Train [41][2180/3239]	Time 0.219 (0.577)	Data Time 0.001 (0.018)	Loss 2.6872 (2.8421)	Entropy 1.26575 (1.27032)	Top-1 acc 57.812 (55.773)	Top-5 acc 82.812 (77.834)	lr 0.01856
Train [41][2190/3239]	Time 0.205 (0.577)	Data Time 0.001 (0.018)	Loss 2.9004 (2.8419)	Entropy 1.26566 (1.27030)	Top-1 acc 53.125 (55.779)	Top-5 acc 76.953 (77.839)	lr 0.01856
Train [41][2200/3239]	Time 0.248 (0.576)	Data Time 0.001 (0.018)	Loss 2.6402 (2.8418)	Entropy 1.26561 (1.27028)	Top-1 acc 59.766 (55.779)	Top-5 acc 83.984 (77.837)	lr 0.01856
Train [41][2210/3239]	Time 2.512 (0.576)	Data Time 0.001 (0.018)	Loss 2.8185 (2.8419)	Entropy 1.26561 (1.27026)	Top-1 acc 56.250 (55.779)	Top-5 acc 78.516 (77.835)	lr 0.01856
Train [41][2220/3239]	Time 0.216 (0.574)	Data Time 0.001 (0.017)	Loss 2.8781 (2.8419)	Entropy 1.26561 (1.27024)	Top-1 acc 55.078 (55.780)	Top-5 acc 77.734 (77.838)	lr 0.01855
Train [41][2230/3239]	Time 0.215 (0.573)	Data Time 0.001 (0.017)	Loss 3.0032 (2.8421)	Entropy 1.26550 (1.27022)	Top-1 acc 50.781 (55.773)	Top-5 acc 74.609 (77.835)	lr 0.01855
Train [41][2240/3239]	Time 0.209 (0.573)	Data Time 0.001 (0.017)	Loss 3.0511 (2.8424)	Entropy 1.26547 (1.27020)	Top-1 acc 49.609 (55.764)	Top-5 acc 74.219 (77.826)	lr 0.01855
Train [41][2250/3239]	Time 0.228 (0.572)	Data Time 0.001 (0.017)	Loss 2.8495 (2.8424)	Entropy 1.26546 (1.27017)	Top-1 acc 55.859 (55.765)	Top-5 acc 74.609 (77.828)	lr 0.01855
Train [41][2260/3239]	Time 0.205 (0.572)	Data Time 0.001 (0.017)	Loss 3.0103 (2.8422)	Entropy 1.26541 (1.27015)	Top-1 acc 51.562 (55.771)	Top-5 acc 76.172 (77.836)	lr 0.01855
Train [41][2270/3239]	Time 0.223 (0.571)	Data Time 0.001 (0.017)	Loss 2.9982 (2.8424)	Entropy 1.26537 (1.27013)	Top-1 acc 52.344 (55.769)	Top-5 acc 74.609 (77.832)	lr 0.01855
Train [41][2280/3239]	Time 0.216 (0.571)	Data Time 0.001 (0.017)	Loss 2.9136 (2.8425)	Entropy 1.26532 (1.27011)	Top-1 acc 54.297 (55.766)	Top-5 acc 78.125 (77.828)	lr 0.01855
Train [41][2290/3239]	Time 0.234 (0.570)	Data Time 0.001 (0.017)	Loss 2.8661 (2.8425)	Entropy 1.26535 (1.27009)	Top-1 acc 57.422 (55.766)	Top-5 acc 80.078 (77.825)	lr 0.01855
Train [41][2300/3239]	Time 0.225 (0.570)	Data Time 0.001 (0.017)	Loss 2.7540 (2.8424)	Entropy 1.26537 (1.27007)	Top-1 acc 59.766 (55.767)	Top-5 acc 77.734 (77.825)	lr 0.01855
Train [41][2310/3239]	Time 0.216 (0.569)	Data Time 0.001 (0.017)	Loss 2.8440 (2.8427)	Entropy 1.26526 (1.27005)	Top-1 acc 52.344 (55.761)	Top-5 acc 78.125 (77.821)	lr 0.01855
Train [41][2320/3239]	Time 2.357 (0.569)	Data Time 0.001 (0.017)	Loss 2.8335 (2.8432)	Entropy 1.26526 (1.27003)	Top-1 acc 55.078 (55.749)	Top-5 acc 78.516 (77.815)	lr 0.01855
Train [41][2330/3239]	Time 0.235 (0.567)	Data Time 0.001 (0.017)	Loss 2.9807 (2.8432)	Entropy 1.26520 (1.27001)	Top-1 acc 51.953 (55.745)	Top-5 acc 74.609 (77.812)	lr 0.01855
Train [41][2340/3239]	Time 0.245 (0.567)	Data Time 0.001 (0.017)	Loss 2.9796 (2.8436)	Entropy 1.26514 (1.26999)	Top-1 acc 52.344 (55.737)	Top-5 acc 73.047 (77.806)	lr 0.01854
Train [41][2350/3239]	Time 0.225 (0.566)	Data Time 0.001 (0.017)	Loss 2.8057 (2.8436)	Entropy 1.26503 (1.26997)	Top-1 acc 57.812 (55.734)	Top-5 acc 80.859 (77.806)	lr 0.01854
Train [41][2360/3239]	Time 0.465 (0.584)	Data Time 0.003 (0.017)	Loss 2.8826 (2.8436)	Entropy 1.26492 (1.26995)	Top-1 acc 57.031 (55.733)	Top-5 acc 74.219 (77.806)	lr 0.01854
Train [41][2370/3239]	Time 0.205 (0.584)	Data Time 0.002 (0.016)	Loss 2.8891 (2.8435)	Entropy 1.26489 (1.26992)	Top-1 acc 55.859 (55.736)	Top-5 acc 75.000 (77.808)	lr 0.01854
Train [41][2380/3239]	Time 0.229 (0.583)	Data Time 0.002 (0.016)	Loss 2.7828 (2.8435)	Entropy 1.26482 (1.26990)	Top-1 acc 56.641 (55.736)	Top-5 acc 77.734 (77.807)	lr 0.01854
Train [41][2390/3239]	Time 0.221 (0.583)	Data Time 0.001 (0.016)	Loss 2.8006 (2.8434)	Entropy 1.26467 (1.26988)	Top-1 acc 55.078 (55.740)	Top-5 acc 77.734 (77.808)	lr 0.01854
Train [41][2400/3239]	Time 0.231 (0.583)	Data Time 0.001 (0.016)	Loss 2.9556 (2.8435)	Entropy 1.26455 (1.26986)	Top-1 acc 54.297 (55.740)	Top-5 acc 75.391 (77.804)	lr 0.01854
Train [41][2410/3239]	Time 0.231 (0.582)	Data Time 0.001 (0.016)	Loss 2.7754 (2.8434)	Entropy 1.26451 (1.26984)	Top-1 acc 56.250 (55.740)	Top-5 acc 76.562 (77.806)	lr 0.01854
Train [41][2420/3239]	Time 0.224 (0.582)	Data Time 0.001 (0.016)	Loss 2.7315 (2.8436)	Entropy 1.26448 (1.26982)	Top-1 acc 60.156 (55.740)	Top-5 acc 76.172 (77.803)	lr 0.01854
Train [41][2430/3239]	Time 2.343 (0.581)	Data Time 0.001 (0.016)	Loss 2.8083 (2.8435)	Entropy 1.26448 (1.26979)	Top-1 acc 53.906 (55.739)	Top-5 acc 78.906 (77.801)	lr 0.01854
Train [41][2440/3239]	Time 0.230 (0.580)	Data Time 0.001 (0.016)	Loss 2.9968 (2.8435)	Entropy 1.26435 (1.26977)	Top-1 acc 51.562 (55.738)	Top-5 acc 74.219 (77.798)	lr 0.01854
Train [41][2450/3239]	Time 0.256 (0.579)	Data Time 0.002 (0.016)	Loss 2.9512 (2.8434)	Entropy 1.26434 (1.26975)	Top-1 acc 54.297 (55.742)	Top-5 acc 77.734 (77.802)	lr 0.01853
Train [41][2460/3239]	Time 0.245 (0.579)	Data Time 0.001 (0.016)	Loss 2.8987 (2.8434)	Entropy 1.26423 (1.26973)	Top-1 acc 53.125 (55.740)	Top-5 acc 78.125 (77.807)	lr 0.01853
Train [41][2470/3239]	Time 0.227 (0.578)	Data Time 0.001 (0.016)	Loss 2.8404 (2.8435)	Entropy 1.26420 (1.26970)	Top-1 acc 55.859 (55.739)	Top-5 acc 75.391 (77.808)	lr 0.01853
Train [41][2480/3239]	Time 0.213 (0.578)	Data Time 0.001 (0.016)	Loss 2.7508 (2.8436)	Entropy 1.26417 (1.26968)	Top-1 acc 55.078 (55.737)	Top-5 acc 82.031 (77.805)	lr 0.01853
Train [41][2490/3239]	Time 0.216 (0.578)	Data Time 0.001 (0.016)	Loss 2.8175 (2.8433)	Entropy 1.26416 (1.26966)	Top-1 acc 56.250 (55.746)	Top-5 acc 79.688 (77.810)	lr 0.01853
Train [41][2500/3239]	Time 0.248 (0.577)	Data Time 0.001 (0.016)	Loss 2.9103 (2.8432)	Entropy 1.26411 (1.26964)	Top-1 acc 55.078 (55.749)	Top-5 acc 75.781 (77.810)	lr 0.01853
Train [41][2510/3239]	Time 0.223 (0.577)	Data Time 0.001 (0.016)	Loss 2.8281 (2.8431)	Entropy 1.26411 (1.26962)	Top-1 acc 54.688 (55.750)	Top-5 acc 78.125 (77.811)	lr 0.01853
Train [41][2520/3239]	Time 0.216 (0.576)	Data Time 0.001 (0.016)	Loss 2.9478 (2.8435)	Entropy 1.26402 (1.26959)	Top-1 acc 55.078 (55.742)	Top-5 acc 74.609 (77.804)	lr 0.01853
Train [41][2530/3239]	Time 0.225 (0.576)	Data Time 0.001 (0.016)	Loss 2.6617 (2.8435)	Entropy 1.26399 (1.26957)	Top-1 acc 58.984 (55.743)	Top-5 acc 83.203 (77.803)	lr 0.01853
Train [41][2540/3239]	Time 2.393 (0.575)	Data Time 0.001 (0.015)	Loss 2.8116 (2.8435)	Entropy 1.26399 (1.26955)	Top-1 acc 58.203 (55.741)	Top-5 acc 77.734 (77.802)	lr 0.01853
Train [41][2550/3239]	Time 0.232 (0.574)	Data Time 0.001 (0.015)	Loss 2.7371 (2.8433)	Entropy 1.26394 (1.26953)	Top-1 acc 57.812 (55.748)	Top-5 acc 80.859 (77.808)	lr 0.01853
Train [41][2560/3239]	Time 0.388 (0.573)	Data Time 0.002 (0.015)	Loss 2.5920 (2.8431)	Entropy 1.26386 (1.26951)	Top-1 acc 63.281 (55.754)	Top-5 acc 82.812 (77.814)	lr 0.01852
Train [41][2570/3239]	Time 0.237 (0.573)	Data Time 0.001 (0.015)	Loss 3.0814 (2.8431)	Entropy 1.26377 (1.26948)	Top-1 acc 50.391 (55.750)	Top-5 acc 70.703 (77.813)	lr 0.01852
Train [41][2580/3239]	Time 0.164 (0.572)	Data Time 0.001 (0.015)	Loss 2.7686 (2.8430)	Entropy 1.26373 (1.26946)	Top-1 acc 55.078 (55.755)	Top-5 acc 78.906 (77.815)	lr 0.01852
Train [41][2590/3239]	Time 0.216 (0.572)	Data Time 0.001 (0.015)	Loss 2.7568 (2.8430)	Entropy 1.26369 (1.26944)	Top-1 acc 54.297 (55.748)	Top-5 acc 78.906 (77.816)	lr 0.01852
Train [41][2600/3239]	Time 0.260 (0.572)	Data Time 0.001 (0.015)	Loss 2.7439 (2.8432)	Entropy 1.26361 (1.26942)	Top-1 acc 55.859 (55.744)	Top-5 acc 78.516 (77.810)	lr 0.01852
Train [41][2610/3239]	Time 0.324 (0.571)	Data Time 0.001 (0.015)	Loss 2.9058 (2.8432)	Entropy 1.26341 (1.26939)	Top-1 acc 53.125 (55.743)	Top-5 acc 76.562 (77.808)	lr 0.01852
Train [41][2620/3239]	Time 0.214 (0.571)	Data Time 0.001 (0.015)	Loss 2.8798 (2.8431)	Entropy 1.26338 (1.26937)	Top-1 acc 56.641 (55.746)	Top-5 acc 73.828 (77.810)	lr 0.01852
Train [41][2630/3239]	Time 0.243 (0.570)	Data Time 0.001 (0.015)	Loss 2.7034 (2.8430)	Entropy 1.26348 (1.26935)	Top-1 acc 58.984 (55.746)	Top-5 acc 78.516 (77.810)	lr 0.01852
Train [41][2640/3239]	Time 0.236 (0.570)	Data Time 0.001 (0.015)	Loss 2.9784 (2.8431)	Entropy 1.26336 (1.26933)	Top-1 acc 53.125 (55.746)	Top-5 acc 76.562 (77.807)	lr 0.01852
Train [41][2650/3239]	Time 0.257 (0.570)	Data Time 0.001 (0.015)	Loss 2.9273 (2.8431)	Entropy 1.26329 (1.26930)	Top-1 acc 55.078 (55.746)	Top-5 acc 76.172 (77.803)	lr 0.01852
Train [41][2660/3239]	Time 0.309 (0.569)	Data Time 0.002 (0.015)	Loss 2.8696 (2.8431)	Entropy 1.26326 (1.26928)	Top-1 acc 52.734 (55.745)	Top-5 acc 78.906 (77.804)	lr 0.01852
Train [41][2670/3239]	Time 0.288 (0.569)	Data Time 0.001 (0.015)	Loss 2.8222 (2.8432)	Entropy 1.26318 (1.26926)	Top-1 acc 54.688 (55.744)	Top-5 acc 78.125 (77.803)	lr 0.01852
Train [41][2680/3239]	Time 0.270 (0.568)	Data Time 0.001 (0.015)	Loss 2.8660 (2.8433)	Entropy 1.26313 (1.26924)	Top-1 acc 52.344 (55.738)	Top-5 acc 77.734 (77.803)	lr 0.01851
Train [41][2690/3239]	Time 0.211 (0.568)	Data Time 0.001 (0.015)	Loss 2.8766 (2.8432)	Entropy 1.26304 (1.26921)	Top-1 acc 59.766 (55.743)	Top-5 acc 75.391 (77.801)	lr 0.01851
Train [41][2700/3239]	Time 0.259 (0.567)	Data Time 0.001 (0.015)	Loss 2.9896 (2.8432)	Entropy 1.26298 (1.26919)	Top-1 acc 49.609 (55.748)	Top-5 acc 75.000 (77.801)	lr 0.01851
Train [41][2710/3239]	Time 0.252 (0.584)	Data Time 0.003 (0.015)	Loss 2.6636 (2.8431)	Entropy 1.26294 (1.26917)	Top-1 acc 59.375 (55.749)	Top-5 acc 80.859 (77.801)	lr 0.01851
Train [41][2720/3239]	Time 0.213 (0.584)	Data Time 0.002 (0.015)	Loss 2.9630 (2.8431)	Entropy 1.26286 (1.26914)	Top-1 acc 54.297 (55.750)	Top-5 acc 75.000 (77.800)	lr 0.01851
Train [41][2730/3239]	Time 0.253 (0.584)	Data Time 0.002 (0.015)	Loss 2.8116 (2.8430)	Entropy 1.26284 (1.26912)	Top-1 acc 54.688 (55.747)	Top-5 acc 79.688 (77.801)	lr 0.01851
Train [41][2740/3239]	Time 0.258 (0.583)	Data Time 0.002 (0.014)	Loss 2.8342 (2.8432)	Entropy 1.26278 (1.26910)	Top-1 acc 57.031 (55.747)	Top-5 acc 76.562 (77.800)	lr 0.01851
Train [41][2750/3239]	Time 0.245 (0.583)	Data Time 0.001 (0.014)	Loss 2.7876 (2.8433)	Entropy 1.26275 (1.26908)	Top-1 acc 55.859 (55.739)	Top-5 acc 76.953 (77.797)	lr 0.01851
Train [41][2760/3239]	Time 0.233 (0.582)	Data Time 0.001 (0.014)	Loss 3.0825 (2.8432)	Entropy 1.26275 (1.26905)	Top-1 acc 48.047 (55.742)	Top-5 acc 73.828 (77.798)	lr 0.01851
Train [41][2770/3239]	Time 0.224 (0.582)	Data Time 0.002 (0.014)	Loss 2.7882 (2.8431)	Entropy 1.26267 (1.26903)	Top-1 acc 56.641 (55.749)	Top-5 acc 77.734 (77.798)	lr 0.01851
Train [41][2780/3239]	Time 0.266 (0.582)	Data Time 0.001 (0.014)	Loss 2.9069 (2.8433)	Entropy 1.26267 (1.26901)	Top-1 acc 53.125 (55.742)	Top-5 acc 73.828 (77.795)	lr 0.01851
Train [41][2790/3239]	Time 0.215 (0.581)	Data Time 0.001 (0.014)	Loss 2.9267 (2.8435)	Entropy 1.26264 (1.26898)	Top-1 acc 52.734 (55.739)	Top-5 acc 73.828 (77.792)	lr 0.01850
Train [41][2800/3239]	Time 0.235 (0.581)	Data Time 0.001 (0.014)	Loss 2.7887 (2.8436)	Entropy 1.26256 (1.26896)	Top-1 acc 58.984 (55.734)	Top-5 acc 78.516 (77.787)	lr 0.01850
Train [41][2810/3239]	Time 0.215 (0.580)	Data Time 0.001 (0.014)	Loss 3.0318 (2.8437)	Entropy 1.26253 (1.26894)	Top-1 acc 52.344 (55.728)	Top-5 acc 75.391 (77.785)	lr 0.01850
Train [41][2820/3239]	Time 0.229 (0.580)	Data Time 0.001 (0.014)	Loss 2.8047 (2.8436)	Entropy 1.26250 (1.26892)	Top-1 acc 55.859 (55.730)	Top-5 acc 79.297 (77.785)	lr 0.01850
Train [41][2830/3239]	Time 0.253 (0.579)	Data Time 0.001 (0.014)	Loss 2.7091 (2.8436)	Entropy 1.26251 (1.26889)	Top-1 acc 60.547 (55.733)	Top-5 acc 79.688 (77.786)	lr 0.01850
Train [41][2840/3239]	Time 0.268 (0.579)	Data Time 0.001 (0.014)	Loss 2.9337 (2.8438)	Entropy 1.26248 (1.26887)	Top-1 acc 51.172 (55.730)	Top-5 acc 78.906 (77.788)	lr 0.01850
Train [41][2850/3239]	Time 0.220 (0.578)	Data Time 0.001 (0.014)	Loss 2.7885 (2.8436)	Entropy 1.26248 (1.26885)	Top-1 acc 55.859 (55.735)	Top-5 acc 79.688 (77.793)	lr 0.01850
Train [41][2860/3239]	Time 0.262 (0.578)	Data Time 0.001 (0.014)	Loss 2.9998 (2.8438)	Entropy 1.26212 (1.26883)	Top-1 acc 50.781 (55.732)	Top-5 acc 74.219 (77.792)	lr 0.01850
Train [41][2870/3239]	Time 0.343 (0.577)	Data Time 0.001 (0.014)	Loss 2.9521 (2.8439)	Entropy 1.26207 (1.26880)	Top-1 acc 52.734 (55.727)	Top-5 acc 78.125 (77.787)	lr 0.01850
Train [41][2880/3239]	Time 0.207 (0.577)	Data Time 0.001 (0.014)	Loss 2.7815 (2.8438)	Entropy 1.26204 (1.26878)	Top-1 acc 54.297 (55.724)	Top-5 acc 79.688 (77.790)	lr 0.01850
Train [41][2890/3239]	Time 0.247 (0.577)	Data Time 0.001 (0.014)	Loss 2.9093 (2.8440)	Entropy 1.26200 (1.26876)	Top-1 acc 51.172 (55.715)	Top-5 acc 75.391 (77.790)	lr 0.01850
Train [41][2900/3239]	Time 0.203 (0.576)	Data Time 0.001 (0.014)	Loss 2.9079 (2.8439)	Entropy 1.26198 (1.26873)	Top-1 acc 50.781 (55.717)	Top-5 acc 78.125 (77.793)	lr 0.01849
Train [41][2910/3239]	Time 0.230 (0.576)	Data Time 0.001 (0.014)	Loss 2.9470 (2.8439)	Entropy 1.26195 (1.26871)	Top-1 acc 55.078 (55.718)	Top-5 acc 74.219 (77.793)	lr 0.01849
Train [41][2920/3239]	Time 0.366 (0.575)	Data Time 0.001 (0.014)	Loss 2.9234 (2.8441)	Entropy 1.26192 (1.26869)	Top-1 acc 55.859 (55.712)	Top-5 acc 74.609 (77.785)	lr 0.01849
Train [41][2930/3239]	Time 0.236 (0.575)	Data Time 0.001 (0.014)	Loss 2.7912 (2.8443)	Entropy 1.26186 (1.26866)	Top-1 acc 57.422 (55.708)	Top-5 acc 80.859 (77.784)	lr 0.01849
Train [41][2940/3239]	Time 0.257 (0.575)	Data Time 0.002 (0.014)	Loss 2.7689 (2.8443)	Entropy 1.26205 (1.26864)	Top-1 acc 58.594 (55.710)	Top-5 acc 80.859 (77.784)	lr 0.01849
Train [41][2950/3239]	Time 0.237 (0.574)	Data Time 0.001 (0.014)	Loss 2.9411 (2.8444)	Entropy 1.26203 (1.26862)	Top-1 acc 48.438 (55.702)	Top-5 acc 75.781 (77.780)	lr 0.01849
Train [41][2960/3239]	Time 0.258 (0.574)	Data Time 0.001 (0.014)	Loss 2.7417 (2.8444)	Entropy 1.26200 (1.26860)	Top-1 acc 60.547 (55.706)	Top-5 acc 79.688 (77.779)	lr 0.01849
Train [41][2970/3239]	Time 0.317 (0.573)	Data Time 0.001 (0.013)	Loss 2.8144 (2.8442)	Entropy 1.26199 (1.26857)	Top-1 acc 57.031 (55.707)	Top-5 acc 77.734 (77.786)	lr 0.01849
Train [41][2980/3239]	Time 0.210 (0.573)	Data Time 0.001 (0.013)	Loss 2.7794 (2.8442)	Entropy 1.26196 (1.26855)	Top-1 acc 57.812 (55.708)	Top-5 acc 78.125 (77.786)	lr 0.01849
Train [41][2990/3239]	Time 0.224 (0.573)	Data Time 0.001 (0.013)	Loss 2.9870 (2.8440)	Entropy 1.26190 (1.26853)	Top-1 acc 51.172 (55.711)	Top-5 acc 73.047 (77.788)	lr 0.01849
Train [41][3000/3239]	Time 0.316 (0.572)	Data Time 0.001 (0.013)	Loss 2.7655 (2.8438)	Entropy 1.26178 (1.26851)	Top-1 acc 56.250 (55.714)	Top-5 acc 79.297 (77.793)	lr 0.01849
Train [41][3010/3239]	Time 0.231 (0.572)	Data Time 0.001 (0.013)	Loss 2.7948 (2.8439)	Entropy 1.26176 (1.26848)	Top-1 acc 57.812 (55.715)	Top-5 acc 78.516 (77.792)	lr 0.01848
Train [41][3020/3239]	Time 0.255 (0.571)	Data Time 0.001 (0.013)	Loss 2.8463 (2.8439)	Entropy 1.26173 (1.26846)	Top-1 acc 55.859 (55.718)	Top-5 acc 75.391 (77.791)	lr 0.01848
Train [41][3030/3239]	Time 0.250 (0.571)	Data Time 0.001 (0.013)	Loss 2.7358 (2.8440)	Entropy 1.26169 (1.26844)	Top-1 acc 58.984 (55.713)	Top-5 acc 80.469 (77.788)	lr 0.01848
Train [41][3040/3239]	Time 0.250 (0.585)	Data Time 0.004 (0.013)	Loss 3.0249 (2.8440)	Entropy 1.26167 (1.26842)	Top-1 acc 47.266 (55.710)	Top-5 acc 75.781 (77.789)	lr 0.01848
Train [41][3050/3239]	Time 0.248 (0.585)	Data Time 0.002 (0.013)	Loss 2.6343 (2.8439)	Entropy 1.26164 (1.26840)	Top-1 acc 57.422 (55.712)	Top-5 acc 81.250 (77.792)	lr 0.01848
Train [41][3060/3239]	Time 0.204 (0.585)	Data Time 0.002 (0.013)	Loss 2.9543 (2.8439)	Entropy 1.26159 (1.26837)	Top-1 acc 51.172 (55.707)	Top-5 acc 77.344 (77.793)	lr 0.01848
Train [41][3070/3239]	Time 0.222 (0.584)	Data Time 0.001 (0.013)	Loss 2.6418 (2.8438)	Entropy 1.26160 (1.26835)	Top-1 acc 60.547 (55.708)	Top-5 acc 81.641 (77.796)	lr 0.01848
Train [41][3080/3239]	Time 0.220 (0.584)	Data Time 0.002 (0.013)	Loss 2.8912 (2.8437)	Entropy 1.26154 (1.26833)	Top-1 acc 54.688 (55.710)	Top-5 acc 76.562 (77.794)	lr 0.01848
Train [41][3090/3239]	Time 0.208 (0.584)	Data Time 0.002 (0.013)	Loss 2.7152 (2.8436)	Entropy 1.26155 (1.26831)	Top-1 acc 60.938 (55.714)	Top-5 acc 78.125 (77.793)	lr 0.01848
Train [41][3100/3239]	Time 0.232 (0.583)	Data Time 0.001 (0.013)	Loss 2.8031 (2.8437)	Entropy 1.26155 (1.26829)	Top-1 acc 61.719 (55.711)	Top-5 acc 80.078 (77.792)	lr 0.01848
Train [41][3110/3239]	Time 0.247 (0.583)	Data Time 0.001 (0.013)	Loss 2.8212 (2.8439)	Entropy 1.26152 (1.26826)	Top-1 acc 57.031 (55.707)	Top-5 acc 78.125 (77.788)	lr 0.01848
Train [41][3120/3239]	Time 0.246 (0.582)	Data Time 0.001 (0.013)	Loss 2.9242 (2.8439)	Entropy 1.26149 (1.26824)	Top-1 acc 54.297 (55.710)	Top-5 acc 75.391 (77.788)	lr 0.01848
Train [41][3130/3239]	Time 0.223 (0.582)	Data Time 0.001 (0.013)	Loss 2.6599 (2.8438)	Entropy 1.26146 (1.26822)	Top-1 acc 62.109 (55.710)	Top-5 acc 82.812 (77.787)	lr 0.01847
Train [41][3140/3239]	Time 0.207 (0.582)	Data Time 0.001 (0.013)	Loss 2.9962 (2.8439)	Entropy 1.26134 (1.26820)	Top-1 acc 53.906 (55.711)	Top-5 acc 74.219 (77.785)	lr 0.01847
Train [41][3150/3239]	Time 0.256 (0.581)	Data Time 0.001 (0.013)	Loss 2.7742 (2.8437)	Entropy 1.26129 (1.26818)	Top-1 acc 55.859 (55.710)	Top-5 acc 78.906 (77.788)	lr 0.01847
Train [41][3160/3239]	Time 0.225 (0.581)	Data Time 0.001 (0.013)	Loss 2.8453 (2.8435)	Entropy 1.26127 (1.26816)	Top-1 acc 59.766 (55.715)	Top-5 acc 80.078 (77.789)	lr 0.01847
Train [41][3170/3239]	Time 0.256 (0.580)	Data Time 0.001 (0.013)	Loss 2.7264 (2.8435)	Entropy 1.26128 (1.26813)	Top-1 acc 58.984 (55.714)	Top-5 acc 80.078 (77.789)	lr 0.01847
Train [41][3180/3239]	Time 0.213 (0.580)	Data Time 0.000 (0.013)	Loss 2.9718 (2.8435)	Entropy 1.26130 (1.26811)	Top-1 acc 54.688 (55.719)	Top-5 acc 73.047 (77.789)	lr 0.01847
Train [41][3190/3239]	Time 0.230 (0.579)	Data Time 0.000 (0.013)	Loss 2.9968 (2.8434)	Entropy 1.26126 (1.26809)	Top-1 acc 49.219 (55.719)	Top-5 acc 74.609 (77.790)	lr 0.01847
Train [41][3200/3239]	Time 0.155 (0.579)	Data Time 0.000 (0.013)	Loss 2.7805 (2.8433)	Entropy 1.26125 (1.26807)	Top-1 acc 53.906 (55.722)	Top-5 acc 77.344 (77.790)	lr 0.01847
Train [41][3210/3239]	Time 0.221 (0.579)	Data Time 0.000 (0.013)	Loss 2.8916 (2.8433)	Entropy 1.26107 (1.26805)	Top-1 acc 53.906 (55.724)	Top-5 acc 77.734 (77.789)	lr 0.01847
Train [41][3220/3239]	Time 0.191 (0.578)	Data Time 0.000 (0.013)	Loss 2.7987 (2.8433)	Entropy 1.26105 (1.26803)	Top-1 acc 55.469 (55.728)	Top-5 acc 78.906 (77.789)	lr 0.01847
Train [41][3230/3239]	Time 0.233 (0.578)	Data Time 0.000 (0.013)	Loss 2.8673 (2.8435)	Entropy 1.26099 (1.26801)	Top-1 acc 54.297 (55.723)	Top-5 acc 74.609 (77.783)	lr 0.01847
Train [41][3239/3239]	Time 2.243 (0.577)	Data Time 0.000 (0.013)	Loss 3.4974 (2.8436)	Entropy 1.26099 (1.26799)	Top-1 acc 39.506 (55.723)	Top-5 acc 67.901 (77.781)	lr 0.01846
==========Valid [41/120]	loss 1.659	top-1 acc 62.786 (62.786)	top-5 acc 83.757	Train top-1 55.723	top-5 77.781	Entropy 1.26099	Latency-None: 0.000ms	Flops: 557.37M
Train [42][0/3239]	Time 33.644 (33.644)	Data Time 32.583 (32.583)	Loss 2.8003 (2.8003)	Entropy 1.26098 (1.26098)	Top-1 acc 57.812 (57.812)	Top-5 acc 78.906 (78.906)	lr 0.01846
Train [42][10/3239]	Time 2.476 (3.545)	Data Time 0.002 (2.965)	Loss 2.6587 (2.8000)	Entropy 1.26098 (1.26098)	Top-1 acc 59.375 (56.996)	Top-5 acc 81.250 (78.232)	lr 0.01846
Train [42][20/3239]	Time 0.252 (1.973)	Data Time 0.002 (1.554)	Loss 2.8191 (2.8084)	Entropy 1.26096 (1.26097)	Top-1 acc 54.688 (56.808)	Top-5 acc 76.953 (78.497)	lr 0.01846
Train [42][30/3239]	Time 0.229 (1.490)	Data Time 0.001 (1.053)	Loss 2.8421 (2.8141)	Entropy 1.26090 (1.26095)	Top-1 acc 55.078 (56.578)	Top-5 acc 77.734 (78.339)	lr 0.01846
Train [42][40/3239]	Time 0.319 (1.243)	Data Time 0.001 (0.797)	Loss 3.0767 (2.8196)	Entropy 1.26083 (1.26092)	Top-1 acc 50.391 (56.221)	Top-5 acc 76.562 (78.306)	lr 0.01846
Train [42][50/3239]	Time 0.231 (1.091)	Data Time 0.002 (0.641)	Loss 2.7887 (2.8192)	Entropy 1.26080 (1.26090)	Top-1 acc 57.812 (56.357)	Top-5 acc 79.688 (78.217)	lr 0.01846
Train [42][60/3239]	Time 0.222 (0.987)	Data Time 0.001 (0.536)	Loss 2.7711 (2.8235)	Entropy 1.26074 (1.26088)	Top-1 acc 57.422 (56.116)	Top-5 acc 80.469 (78.138)	lr 0.01846
Train [42][70/3239]	Time 0.217 (0.913)	Data Time 0.006 (0.461)	Loss 2.7182 (2.8154)	Entropy 1.26068 (1.26085)	Top-1 acc 54.297 (56.305)	Top-5 acc 79.688 (78.246)	lr 0.01846
Train [42][80/3239]	Time 0.253 (0.857)	Data Time 0.001 (0.404)	Loss 2.8686 (2.8111)	Entropy 1.26068 (1.26083)	Top-1 acc 56.250 (56.385)	Top-5 acc 78.516 (78.342)	lr 0.01846
Train [42][90/3239]	Time 0.329 (0.816)	Data Time 0.001 (0.360)	Loss 2.9759 (2.8118)	Entropy 1.26060 (1.26081)	Top-1 acc 52.344 (56.374)	Top-5 acc 74.609 (78.284)	lr 0.01846
Train [42][100/3239]	Time 0.214 (0.780)	Data Time 0.001 (0.325)	Loss 2.8744 (2.8115)	Entropy 1.26057 (1.26079)	Top-1 acc 58.203 (56.428)	Top-5 acc 75.781 (78.342)	lr 0.01846
Train [42][110/3239]	Time 0.269 (0.751)	Data Time 0.001 (0.295)	Loss 2.8748 (2.8121)	Entropy 1.26056 (1.26077)	Top-1 acc 55.859 (56.415)	Top-5 acc 76.953 (78.385)	lr 0.01845
Train [42][120/3239]	Time 2.616 (0.727)	Data Time 0.002 (0.271)	Loss 2.6053 (2.8062)	Entropy 1.26056 (1.26075)	Top-1 acc 59.766 (56.460)	Top-5 acc 82.812 (78.493)	lr 0.01845
Train [42][130/3239]	Time 0.223 (0.690)	Data Time 0.001 (0.251)	Loss 2.6895 (2.8076)	Entropy 1.26048 (1.26073)	Top-1 acc 56.250 (56.342)	Top-5 acc 82.422 (78.480)	lr 0.01845
Train [42][140/3239]	Time 0.341 (0.674)	Data Time 0.001 (0.233)	Loss 2.9769 (2.8080)	Entropy 1.26048 (1.26071)	Top-1 acc 51.953 (56.316)	Top-5 acc 77.734 (78.471)	lr 0.01845
Train [42][150/3239]	Time 0.220 (0.974)	Data Time 0.002 (0.218)	Loss 2.6750 (2.8091)	Entropy 1.26046 (1.26070)	Top-1 acc 58.203 (56.201)	Top-5 acc 82.422 (78.547)	lr 0.01845
Train [42][160/3239]	Time 0.232 (0.943)	Data Time 0.002 (0.204)	Loss 2.8865 (2.8081)	Entropy 1.26041 (1.26068)	Top-1 acc 57.422 (56.206)	Top-5 acc 75.000 (78.567)	lr 0.01845
Train [42][170/3239]	Time 0.228 (0.915)	Data Time 0.001 (0.192)	Loss 2.8434 (2.8063)	Entropy 1.26039 (1.26067)	Top-1 acc 55.078 (56.225)	Top-5 acc 78.516 (78.625)	lr 0.01845
Train [42][180/3239]	Time 0.226 (0.890)	Data Time 0.001 (0.182)	Loss 2.6765 (2.8033)	Entropy 1.26035 (1.26065)	Top-1 acc 61.719 (56.338)	Top-5 acc 81.250 (78.656)	lr 0.01845
Train [42][190/3239]	Time 0.343 (0.868)	Data Time 0.001 (0.173)	Loss 3.0143 (2.8065)	Entropy 1.26029 (1.26063)	Top-1 acc 54.688 (56.287)	Top-5 acc 74.609 (78.606)	lr 0.01845
Train [42][200/3239]	Time 0.225 (0.849)	Data Time 0.001 (0.164)	Loss 2.7827 (2.8048)	Entropy 1.26025 (1.26061)	Top-1 acc 53.906 (56.318)	Top-5 acc 78.516 (78.636)	lr 0.01845
Train [42][210/3239]	Time 0.209 (0.829)	Data Time 0.001 (0.156)	Loss 2.7690 (2.8059)	Entropy 1.26017 (1.26060)	Top-1 acc 57.422 (56.293)	Top-5 acc 77.734 (78.586)	lr 0.01845
Train [42][220/3239]	Time 0.251 (0.813)	Data Time 0.001 (0.149)	Loss 2.7148 (2.8042)	Entropy 1.26016 (1.26058)	Top-1 acc 57.031 (56.349)	Top-5 acc 81.250 (78.641)	lr 0.01844
Train [42][230/3239]	Time 2.540 (0.798)	Data Time 0.001 (0.143)	Loss 2.6274 (2.8063)	Entropy 1.26016 (1.26056)	Top-1 acc 61.719 (56.314)	Top-5 acc 82.422 (78.576)	lr 0.01844
Train [42][240/3239]	Time 0.225 (0.775)	Data Time 0.001 (0.137)	Loss 2.8614 (2.8068)	Entropy 1.26010 (1.26054)	Top-1 acc 55.078 (56.315)	Top-5 acc 77.344 (78.553)	lr 0.01844
Train [42][250/3239]	Time 0.209 (0.762)	Data Time 0.001 (0.132)	Loss 2.8490 (2.8061)	Entropy 1.26004 (1.26052)	Top-1 acc 55.469 (56.354)	Top-5 acc 74.609 (78.569)	lr 0.01844
Train [42][260/3239]	Time 0.213 (0.750)	Data Time 0.001 (0.127)	Loss 2.6989 (2.8064)	Entropy 1.26002 (1.26050)	Top-1 acc 58.594 (56.368)	Top-5 acc 80.469 (78.575)	lr 0.01844
Train [42][270/3239]	Time 0.247 (0.739)	Data Time 0.002 (0.122)	Loss 2.8101 (2.8058)	Entropy 1.25999 (1.26048)	Top-1 acc 58.203 (56.403)	Top-5 acc 80.859 (78.614)	lr 0.01844
Train [42][280/3239]	Time 0.222 (0.728)	Data Time 0.001 (0.118)	Loss 2.8170 (2.8053)	Entropy 1.25996 (1.26046)	Top-1 acc 59.375 (56.436)	Top-5 acc 77.344 (78.591)	lr 0.01844
Train [42][290/3239]	Time 0.214 (0.720)	Data Time 0.001 (0.114)	Loss 2.6348 (2.8042)	Entropy 1.25994 (1.26045)	Top-1 acc 61.328 (56.516)	Top-5 acc 81.250 (78.603)	lr 0.01844
Train [42][300/3239]	Time 0.213 (0.711)	Data Time 0.001 (0.110)	Loss 2.6670 (2.8022)	Entropy 1.25991 (1.26043)	Top-1 acc 57.812 (56.574)	Top-5 acc 81.641 (78.627)	lr 0.01844
Train [42][310/3239]	Time 0.194 (0.703)	Data Time 0.001 (0.107)	Loss 2.9316 (2.8024)	Entropy 1.25988 (1.26041)	Top-1 acc 52.344 (56.597)	Top-5 acc 75.391 (78.619)	lr 0.01844
Train [42][320/3239]	Time 0.202 (0.695)	Data Time 0.001 (0.103)	Loss 2.5931 (2.8022)	Entropy 1.25982 (1.26040)	Top-1 acc 62.109 (56.607)	Top-5 acc 82.031 (78.608)	lr 0.01844
Train [42][330/3239]	Time 0.233 (0.688)	Data Time 0.001 (0.100)	Loss 2.7361 (2.8018)	Entropy 1.25979 (1.26038)	Top-1 acc 58.594 (56.618)	Top-5 acc 77.344 (78.604)	lr 0.01844
Train [42][340/3239]	Time 2.568 (0.681)	Data Time 0.001 (0.097)	Loss 2.8034 (2.8025)	Entropy 1.25979 (1.26036)	Top-1 acc 56.641 (56.590)	Top-5 acc 78.516 (78.605)	lr 0.01843
Train [42][350/3239]	Time 0.215 (0.668)	Data Time 0.002 (0.095)	Loss 2.6270 (2.8025)	Entropy 1.25972 (1.26034)	Top-1 acc 62.109 (56.591)	Top-5 acc 82.031 (78.598)	lr 0.01843
Train [42][360/3239]	Time 0.218 (0.662)	Data Time 0.001 (0.092)	Loss 2.5530 (2.8029)	Entropy 1.25966 (1.26032)	Top-1 acc 64.062 (56.570)	Top-5 acc 82.812 (78.596)	lr 0.01843
Train [42][370/3239]	Time 0.215 (0.657)	Data Time 0.001 (0.090)	Loss 2.7333 (2.8034)	Entropy 1.25963 (1.26031)	Top-1 acc 58.203 (56.554)	Top-5 acc 78.125 (78.589)	lr 0.01843
Train [42][380/3239]	Time 0.214 (0.651)	Data Time 0.001 (0.087)	Loss 2.8062 (2.8026)	Entropy 1.25947 (1.26028)	Top-1 acc 58.594 (56.566)	Top-5 acc 76.562 (78.600)	lr 0.01843
Train [42][390/3239]	Time 0.311 (0.647)	Data Time 0.001 (0.085)	Loss 2.8807 (2.8030)	Entropy 1.25939 (1.26026)	Top-1 acc 55.078 (56.571)	Top-5 acc 78.906 (78.616)	lr 0.01843
Train [42][400/3239]	Time 0.232 (0.642)	Data Time 0.001 (0.083)	Loss 2.5747 (2.8033)	Entropy 1.25936 (1.26024)	Top-1 acc 62.891 (56.557)	Top-5 acc 81.250 (78.594)	lr 0.01843
Train [42][410/3239]	Time 0.210 (0.637)	Data Time 0.001 (0.081)	Loss 2.9255 (2.8031)	Entropy 1.25929 (1.26022)	Top-1 acc 53.516 (56.593)	Top-5 acc 77.734 (78.591)	lr 0.01843
Train [42][420/3239]	Time 0.220 (0.633)	Data Time 0.001 (0.079)	Loss 3.0302 (2.8030)	Entropy 1.25925 (1.26020)	Top-1 acc 50.781 (56.613)	Top-5 acc 73.828 (78.595)	lr 0.01843
Train [42][430/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.077)	Loss 2.8669 (2.8038)	Entropy 1.25923 (1.26017)	Top-1 acc 56.250 (56.583)	Top-5 acc 77.344 (78.572)	lr 0.01843
Train [42][440/3239]	Time 0.214 (0.624)	Data Time 0.002 (0.076)	Loss 2.7458 (2.8037)	Entropy 1.25906 (1.26015)	Top-1 acc 56.250 (56.567)	Top-5 acc 80.469 (78.571)	lr 0.01843
Train [42][450/3239]	Time 2.465 (0.621)	Data Time 0.001 (0.074)	Loss 2.7608 (2.8033)	Entropy 1.25906 (1.26013)	Top-1 acc 58.984 (56.589)	Top-5 acc 78.906 (78.582)	lr 0.01842
Train [42][460/3239]	Time 0.219 (0.612)	Data Time 0.001 (0.072)	Loss 2.6284 (2.8019)	Entropy 1.25902 (1.26010)	Top-1 acc 59.766 (56.630)	Top-5 acc 82.812 (78.612)	lr 0.01842
Train [42][470/3239]	Time 0.211 (0.609)	Data Time 0.001 (0.071)	Loss 2.6586 (2.8023)	Entropy 1.25897 (1.26008)	Top-1 acc 61.328 (56.607)	Top-5 acc 80.469 (78.597)	lr 0.01842
Train [42][480/3239]	Time 0.227 (0.605)	Data Time 0.001 (0.070)	Loss 2.9910 (2.8031)	Entropy 1.25885 (1.26005)	Top-1 acc 47.656 (56.573)	Top-5 acc 75.000 (78.584)	lr 0.01842
Train [42][490/3239]	Time 0.238 (0.602)	Data Time 0.001 (0.068)	Loss 2.5907 (2.8022)	Entropy 1.25883 (1.26003)	Top-1 acc 60.938 (56.583)	Top-5 acc 83.594 (78.590)	lr 0.01842
Train [42][500/3239]	Time 0.241 (0.600)	Data Time 0.001 (0.067)	Loss 2.7572 (2.8016)	Entropy 1.25872 (1.26000)	Top-1 acc 57.422 (56.584)	Top-5 acc 76.953 (78.593)	lr 0.01842
Train [42][510/3239]	Time 0.259 (0.683)	Data Time 0.003 (0.066)	Loss 2.9442 (2.8018)	Entropy 1.25868 (1.25998)	Top-1 acc 51.562 (56.574)	Top-5 acc 77.734 (78.589)	lr 0.01842
Train [42][520/3239]	Time 0.218 (0.679)	Data Time 0.002 (0.065)	Loss 2.6681 (2.8015)	Entropy 1.25861 (1.25995)	Top-1 acc 60.156 (56.574)	Top-5 acc 82.812 (78.599)	lr 0.01842
Train [42][530/3239]	Time 0.221 (0.676)	Data Time 0.002 (0.063)	Loss 2.7431 (2.8006)	Entropy 1.25858 (1.25993)	Top-1 acc 56.641 (56.605)	Top-5 acc 78.906 (78.617)	lr 0.01842
Train [42][540/3239]	Time 0.222 (0.672)	Data Time 0.002 (0.062)	Loss 2.9801 (2.8011)	Entropy 1.25850 (1.25990)	Top-1 acc 54.688 (56.593)	Top-5 acc 76.172 (78.612)	lr 0.01842
Train [42][550/3239]	Time 0.255 (0.669)	Data Time 0.001 (0.061)	Loss 2.9234 (2.8022)	Entropy 1.25847 (1.25988)	Top-1 acc 53.906 (56.565)	Top-5 acc 75.781 (78.586)	lr 0.01842
Train [42][560/3239]	Time 2.520 (0.665)	Data Time 0.001 (0.060)	Loss 2.6522 (2.8011)	Entropy 1.25847 (1.25985)	Top-1 acc 58.984 (56.603)	Top-5 acc 81.641 (78.594)	lr 0.01841
Train [42][570/3239]	Time 0.260 (0.658)	Data Time 0.002 (0.059)	Loss 2.9411 (2.8010)	Entropy 1.25845 (1.25983)	Top-1 acc 57.031 (56.619)	Top-5 acc 75.391 (78.597)	lr 0.01841
Train [42][580/3239]	Time 0.272 (0.655)	Data Time 0.001 (0.058)	Loss 2.7432 (2.8013)	Entropy 1.25841 (1.25980)	Top-1 acc 55.859 (56.614)	Top-5 acc 79.297 (78.584)	lr 0.01841
Train [42][590/3239]	Time 0.267 (0.652)	Data Time 0.001 (0.057)	Loss 2.9152 (2.8030)	Entropy 1.25839 (1.25978)	Top-1 acc 54.297 (56.568)	Top-5 acc 77.344 (78.557)	lr 0.01841
Train [42][600/3239]	Time 0.175 (0.648)	Data Time 0.001 (0.056)	Loss 2.8880 (2.8029)	Entropy 1.25835 (1.25976)	Top-1 acc 50.781 (56.565)	Top-5 acc 75.000 (78.555)	lr 0.01841
Train [42][610/3239]	Time 0.232 (0.646)	Data Time 0.001 (0.055)	Loss 2.7179 (2.8035)	Entropy 1.25810 (1.25973)	Top-1 acc 57.031 (56.556)	Top-5 acc 80.469 (78.557)	lr 0.01841
Train [42][620/3239]	Time 0.232 (0.642)	Data Time 0.001 (0.055)	Loss 2.7300 (2.8031)	Entropy 1.25798 (1.25970)	Top-1 acc 56.641 (56.557)	Top-5 acc 80.078 (78.573)	lr 0.01841
Train [42][630/3239]	Time 0.253 (0.640)	Data Time 0.001 (0.054)	Loss 2.7182 (2.8037)	Entropy 1.25794 (1.25968)	Top-1 acc 63.281 (56.552)	Top-5 acc 78.906 (78.552)	lr 0.01841
Train [42][640/3239]	Time 0.224 (0.637)	Data Time 0.001 (0.053)	Loss 2.8722 (2.8041)	Entropy 1.25790 (1.25965)	Top-1 acc 58.203 (56.550)	Top-5 acc 77.734 (78.545)	lr 0.01841
Train [42][650/3239]	Time 0.216 (0.634)	Data Time 0.001 (0.052)	Loss 2.8309 (2.8044)	Entropy 1.25789 (1.25962)	Top-1 acc 55.078 (56.549)	Top-5 acc 73.438 (78.534)	lr 0.01841
Train [42][660/3239]	Time 0.291 (0.631)	Data Time 0.001 (0.051)	Loss 2.8070 (2.8048)	Entropy 1.25779 (1.25960)	Top-1 acc 57.812 (56.540)	Top-5 acc 78.516 (78.520)	lr 0.01841
Train [42][670/3239]	Time 2.402 (0.629)	Data Time 0.001 (0.051)	Loss 2.7651 (2.8055)	Entropy 1.25779 (1.25957)	Top-1 acc 58.984 (56.518)	Top-5 acc 76.172 (78.521)	lr 0.01840
Train [42][680/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.050)	Loss 2.8357 (2.8053)	Entropy 1.25774 (1.25954)	Top-1 acc 57.422 (56.534)	Top-5 acc 74.609 (78.523)	lr 0.01840
Train [42][690/3239]	Time 0.237 (0.620)	Data Time 0.002 (0.049)	Loss 2.7849 (2.8058)	Entropy 1.25773 (1.25952)	Top-1 acc 54.297 (56.509)	Top-5 acc 77.344 (78.511)	lr 0.01840
Train [42][700/3239]	Time 0.208 (0.618)	Data Time 0.001 (0.048)	Loss 2.7022 (2.8055)	Entropy 1.25765 (1.25949)	Top-1 acc 55.859 (56.502)	Top-5 acc 80.859 (78.511)	lr 0.01840
Train [42][710/3239]	Time 0.238 (0.616)	Data Time 0.001 (0.048)	Loss 2.6852 (2.8057)	Entropy 1.25750 (1.25946)	Top-1 acc 60.547 (56.503)	Top-5 acc 81.250 (78.511)	lr 0.01840
Train [42][720/3239]	Time 0.237 (0.614)	Data Time 0.001 (0.047)	Loss 2.7794 (2.8060)	Entropy 1.25748 (1.25943)	Top-1 acc 60.938 (56.514)	Top-5 acc 79.297 (78.499)	lr 0.01840
Train [42][730/3239]	Time 0.236 (0.612)	Data Time 0.001 (0.047)	Loss 2.7770 (2.8059)	Entropy 1.25743 (1.25941)	Top-1 acc 58.203 (56.521)	Top-5 acc 78.516 (78.493)	lr 0.01840
Train [42][740/3239]	Time 0.213 (0.610)	Data Time 0.001 (0.046)	Loss 2.7293 (2.8062)	Entropy 1.25738 (1.25938)	Top-1 acc 56.250 (56.497)	Top-5 acc 80.859 (78.490)	lr 0.01840
Train [42][750/3239]	Time 0.241 (0.608)	Data Time 0.001 (0.045)	Loss 2.7788 (2.8064)	Entropy 1.25737 (1.25935)	Top-1 acc 57.031 (56.485)	Top-5 acc 79.297 (78.477)	lr 0.01840
Train [42][760/3239]	Time 0.227 (0.606)	Data Time 0.001 (0.045)	Loss 3.0839 (2.8066)	Entropy 1.25740 (1.25933)	Top-1 acc 50.000 (56.471)	Top-5 acc 73.438 (78.481)	lr 0.01840
Train [42][770/3239]	Time 0.253 (0.604)	Data Time 0.001 (0.044)	Loss 2.5945 (2.8057)	Entropy 1.25736 (1.25930)	Top-1 acc 60.547 (56.485)	Top-5 acc 82.422 (78.500)	lr 0.01840
Train [42][780/3239]	Time 2.388 (0.602)	Data Time 0.001 (0.044)	Loss 2.6899 (2.8058)	Entropy 1.25736 (1.25928)	Top-1 acc 60.156 (56.478)	Top-5 acc 76.562 (78.492)	lr 0.01840
Train [42][790/3239]	Time 0.277 (0.597)	Data Time 0.001 (0.043)	Loss 2.6860 (2.8055)	Entropy 1.25730 (1.25925)	Top-1 acc 58.594 (56.494)	Top-5 acc 78.516 (78.489)	lr 0.01839
Train [42][800/3239]	Time 0.230 (0.596)	Data Time 0.001 (0.043)	Loss 2.7558 (2.8059)	Entropy 1.25720 (1.25923)	Top-1 acc 56.641 (56.480)	Top-5 acc 78.516 (78.483)	lr 0.01839
Train [42][810/3239]	Time 0.218 (0.594)	Data Time 0.001 (0.042)	Loss 2.9220 (2.8063)	Entropy 1.25714 (1.25920)	Top-1 acc 55.078 (56.454)	Top-5 acc 74.219 (78.476)	lr 0.01839
Train [42][820/3239]	Time 0.233 (0.592)	Data Time 0.001 (0.042)	Loss 2.6989 (2.8066)	Entropy 1.25706 (1.25918)	Top-1 acc 58.984 (56.442)	Top-5 acc 82.031 (78.473)	lr 0.01839
Train [42][830/3239]	Time 0.234 (0.591)	Data Time 0.001 (0.041)	Loss 2.8502 (2.8072)	Entropy 1.25702 (1.25915)	Top-1 acc 57.812 (56.441)	Top-5 acc 76.562 (78.466)	lr 0.01839
Train [42][840/3239]	Time 0.226 (0.589)	Data Time 0.001 (0.041)	Loss 2.7369 (2.8077)	Entropy 1.25702 (1.25913)	Top-1 acc 59.375 (56.438)	Top-5 acc 80.078 (78.453)	lr 0.01839
Train [42][850/3239]	Time 0.324 (0.588)	Data Time 0.001 (0.040)	Loss 2.6239 (2.8081)	Entropy 1.25699 (1.25910)	Top-1 acc 60.156 (56.430)	Top-5 acc 81.250 (78.449)	lr 0.01839
Train [42][860/3239]	Time 0.214 (0.586)	Data Time 0.001 (0.040)	Loss 2.7763 (2.8078)	Entropy 1.25695 (1.25908)	Top-1 acc 57.422 (56.430)	Top-5 acc 77.734 (78.450)	lr 0.01839
Train [42][870/3239]	Time 0.207 (0.639)	Data Time 0.002 (0.039)	Loss 2.8054 (2.8080)	Entropy 1.25688 (1.25905)	Top-1 acc 57.031 (56.429)	Top-5 acc 78.125 (78.454)	lr 0.01839
Train [42][880/3239]	Time 0.268 (0.637)	Data Time 0.002 (0.039)	Loss 2.7226 (2.8088)	Entropy 1.25687 (1.25903)	Top-1 acc 58.594 (56.418)	Top-5 acc 80.469 (78.443)	lr 0.01839
Train [42][890/3239]	Time 2.501 (0.635)	Data Time 0.002 (0.038)	Loss 2.7934 (2.8094)	Entropy 1.25687 (1.25900)	Top-1 acc 58.203 (56.413)	Top-5 acc 79.688 (78.429)	lr 0.01839
Train [42][900/3239]	Time 0.213 (0.631)	Data Time 0.002 (0.038)	Loss 2.8752 (2.8101)	Entropy 1.25685 (1.25898)	Top-1 acc 57.812 (56.406)	Top-5 acc 75.781 (78.409)	lr 0.01838
Train [42][910/3239]	Time 0.211 (0.629)	Data Time 0.001 (0.038)	Loss 2.7827 (2.8103)	Entropy 1.25684 (1.25895)	Top-1 acc 55.469 (56.395)	Top-5 acc 77.734 (78.400)	lr 0.01838
Train [42][920/3239]	Time 0.246 (0.627)	Data Time 0.002 (0.037)	Loss 2.8203 (2.8100)	Entropy 1.25680 (1.25893)	Top-1 acc 55.469 (56.401)	Top-5 acc 78.906 (78.416)	lr 0.01838
Train [42][930/3239]	Time 0.220 (0.625)	Data Time 0.001 (0.037)	Loss 2.8624 (2.8100)	Entropy 1.25672 (1.25891)	Top-1 acc 55.469 (56.393)	Top-5 acc 78.516 (78.418)	lr 0.01838
Train [42][940/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.037)	Loss 2.9055 (2.8099)	Entropy 1.25669 (1.25888)	Top-1 acc 54.688 (56.393)	Top-5 acc 76.172 (78.417)	lr 0.01838
Train [42][950/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.036)	Loss 2.6952 (2.8107)	Entropy 1.25667 (1.25886)	Top-1 acc 62.500 (56.372)	Top-5 acc 78.906 (78.396)	lr 0.01838
Train [42][960/3239]	Time 0.216 (0.620)	Data Time 0.001 (0.036)	Loss 2.8206 (2.8107)	Entropy 1.25656 (1.25884)	Top-1 acc 57.812 (56.376)	Top-5 acc 77.734 (78.397)	lr 0.01838
Train [42][970/3239]	Time 0.208 (0.618)	Data Time 0.001 (0.035)	Loss 2.8395 (2.8107)	Entropy 1.25652 (1.25881)	Top-1 acc 55.859 (56.371)	Top-5 acc 76.562 (78.387)	lr 0.01838
Train [42][980/3239]	Time 0.211 (0.616)	Data Time 0.001 (0.035)	Loss 2.8241 (2.8107)	Entropy 1.25646 (1.25879)	Top-1 acc 57.812 (56.375)	Top-5 acc 76.953 (78.393)	lr 0.01838
Train [42][990/3239]	Time 0.285 (0.615)	Data Time 0.001 (0.035)	Loss 2.8119 (2.8108)	Entropy 1.25645 (1.25877)	Top-1 acc 56.641 (56.377)	Top-5 acc 79.688 (78.391)	lr 0.01838
Train [42][1000/3239]	Time 2.436 (0.613)	Data Time 0.002 (0.034)	Loss 2.8805 (2.8113)	Entropy 1.25645 (1.25874)	Top-1 acc 52.734 (56.360)	Top-5 acc 76.172 (78.375)	lr 0.01838
Train [42][1010/3239]	Time 0.205 (0.609)	Data Time 0.001 (0.034)	Loss 2.7973 (2.8109)	Entropy 1.25638 (1.25872)	Top-1 acc 58.203 (56.372)	Top-5 acc 78.516 (78.388)	lr 0.01837
Train [42][1020/3239]	Time 0.223 (0.608)	Data Time 0.001 (0.034)	Loss 2.7402 (2.8109)	Entropy 1.25637 (1.25870)	Top-1 acc 55.469 (56.368)	Top-5 acc 81.250 (78.396)	lr 0.01837
Train [42][1030/3239]	Time 0.230 (0.607)	Data Time 0.002 (0.033)	Loss 2.8721 (2.8109)	Entropy 1.25627 (1.25867)	Top-1 acc 58.203 (56.372)	Top-5 acc 80.078 (78.409)	lr 0.01837
Train [42][1040/3239]	Time 0.240 (0.605)	Data Time 0.001 (0.033)	Loss 2.7580 (2.8112)	Entropy 1.25619 (1.25865)	Top-1 acc 59.766 (56.375)	Top-5 acc 78.906 (78.404)	lr 0.01837
Train [42][1050/3239]	Time 0.330 (0.604)	Data Time 0.001 (0.033)	Loss 2.9946 (2.8117)	Entropy 1.25617 (1.25863)	Top-1 acc 50.781 (56.365)	Top-5 acc 76.953 (78.395)	lr 0.01837
Train [42][1060/3239]	Time 0.222 (0.603)	Data Time 0.001 (0.033)	Loss 2.9260 (2.8120)	Entropy 1.25612 (1.25860)	Top-1 acc 52.344 (56.349)	Top-5 acc 75.391 (78.388)	lr 0.01837
Train [42][1070/3239]	Time 0.208 (0.601)	Data Time 0.001 (0.032)	Loss 3.0165 (2.8124)	Entropy 1.25613 (1.25858)	Top-1 acc 51.953 (56.335)	Top-5 acc 74.609 (78.369)	lr 0.01837
Train [42][1080/3239]	Time 0.201 (0.600)	Data Time 0.001 (0.032)	Loss 2.7918 (2.8125)	Entropy 1.25607 (1.25856)	Top-1 acc 55.078 (56.329)	Top-5 acc 78.906 (78.362)	lr 0.01837
Train [42][1090/3239]	Time 0.225 (0.599)	Data Time 0.002 (0.032)	Loss 2.7754 (2.8126)	Entropy 1.25604 (1.25854)	Top-1 acc 52.344 (56.320)	Top-5 acc 79.688 (78.353)	lr 0.01837
Train [42][1100/3239]	Time 0.342 (0.597)	Data Time 0.001 (0.031)	Loss 2.8693 (2.8127)	Entropy 1.25600 (1.25851)	Top-1 acc 55.859 (56.324)	Top-5 acc 76.953 (78.353)	lr 0.01837
Train [42][1110/3239]	Time 2.511 (0.596)	Data Time 0.002 (0.031)	Loss 2.7939 (2.8128)	Entropy 1.25600 (1.25849)	Top-1 acc 56.641 (56.319)	Top-5 acc 80.859 (78.350)	lr 0.01837
Train [42][1120/3239]	Time 0.231 (0.593)	Data Time 0.001 (0.031)	Loss 2.8647 (2.8131)	Entropy 1.25598 (1.25847)	Top-1 acc 54.688 (56.315)	Top-5 acc 79.297 (78.350)	lr 0.01836
Train [42][1130/3239]	Time 0.239 (0.592)	Data Time 0.001 (0.031)	Loss 2.9108 (2.8132)	Entropy 1.25597 (1.25845)	Top-1 acc 55.859 (56.316)	Top-5 acc 75.391 (78.339)	lr 0.01836
Train [42][1140/3239]	Time 0.209 (0.590)	Data Time 0.001 (0.030)	Loss 2.7895 (2.8131)	Entropy 1.25578 (1.25842)	Top-1 acc 60.547 (56.315)	Top-5 acc 76.172 (78.337)	lr 0.01836
Train [42][1150/3239]	Time 0.325 (0.589)	Data Time 0.001 (0.030)	Loss 2.7773 (2.8128)	Entropy 1.25563 (1.25840)	Top-1 acc 54.688 (56.323)	Top-5 acc 80.859 (78.348)	lr 0.01836
Train [42][1160/3239]	Time 0.231 (0.588)	Data Time 0.002 (0.030)	Loss 2.6946 (2.8124)	Entropy 1.25557 (1.25837)	Top-1 acc 61.328 (56.336)	Top-5 acc 80.859 (78.360)	lr 0.01836
Train [42][1170/3239]	Time 0.252 (0.587)	Data Time 0.002 (0.030)	Loss 2.8331 (2.8123)	Entropy 1.25551 (1.25835)	Top-1 acc 58.203 (56.339)	Top-5 acc 77.734 (78.357)	lr 0.01836
Train [42][1180/3239]	Time 0.229 (0.586)	Data Time 0.002 (0.029)	Loss 2.8353 (2.8119)	Entropy 1.25549 (1.25833)	Top-1 acc 54.688 (56.357)	Top-5 acc 80.078 (78.375)	lr 0.01836
Train [42][1190/3239]	Time 0.215 (0.585)	Data Time 0.001 (0.029)	Loss 2.8563 (2.8115)	Entropy 1.25542 (1.25830)	Top-1 acc 58.984 (56.363)	Top-5 acc 74.219 (78.382)	lr 0.01836
Train [42][1200/3239]	Time 0.212 (0.584)	Data Time 0.001 (0.029)	Loss 2.9525 (2.8117)	Entropy 1.25537 (1.25828)	Top-1 acc 52.734 (56.360)	Top-5 acc 75.391 (78.380)	lr 0.01836
Train [42][1210/3239]	Time 0.277 (0.583)	Data Time 0.001 (0.029)	Loss 2.8504 (2.8117)	Entropy 1.25538 (1.25825)	Top-1 acc 57.031 (56.360)	Top-5 acc 77.344 (78.389)	lr 0.01836
Train [42][1220/3239]	Time 2.422 (0.582)	Data Time 0.001 (0.028)	Loss 2.9434 (2.8120)	Entropy 1.25538 (1.25823)	Top-1 acc 53.516 (56.354)	Top-5 acc 75.781 (78.381)	lr 0.01836
Train [42][1230/3239]	Time 0.230 (0.579)	Data Time 0.001 (0.028)	Loss 2.8811 (2.8123)	Entropy 1.25529 (1.25821)	Top-1 acc 56.641 (56.356)	Top-5 acc 75.391 (78.372)	lr 0.01836
Train [42][1240/3239]	Time 0.387 (0.612)	Data Time 0.002 (0.028)	Loss 2.6826 (2.8118)	Entropy 1.25524 (1.25818)	Top-1 acc 58.594 (56.367)	Top-5 acc 81.641 (78.380)	lr 0.01835
Train [42][1250/3239]	Time 0.228 (0.612)	Data Time 0.002 (0.028)	Loss 2.7674 (2.8118)	Entropy 1.25515 (1.25816)	Top-1 acc 55.859 (56.361)	Top-5 acc 77.344 (78.376)	lr 0.01835
Train [42][1260/3239]	Time 0.226 (0.611)	Data Time 0.002 (0.028)	Loss 3.1187 (2.8125)	Entropy 1.25483 (1.25813)	Top-1 acc 52.344 (56.353)	Top-5 acc 70.703 (78.359)	lr 0.01835
Train [42][1270/3239]	Time 0.247 (0.610)	Data Time 0.001 (0.027)	Loss 2.7996 (2.8128)	Entropy 1.25468 (1.25811)	Top-1 acc 53.125 (56.347)	Top-5 acc 78.906 (78.355)	lr 0.01835
Train [42][1280/3239]	Time 0.214 (0.608)	Data Time 0.001 (0.027)	Loss 3.0283 (2.8131)	Entropy 1.25466 (1.25808)	Top-1 acc 49.219 (56.339)	Top-5 acc 73.047 (78.352)	lr 0.01835
Train [42][1290/3239]	Time 0.226 (0.607)	Data Time 0.001 (0.027)	Loss 2.6879 (2.8137)	Entropy 1.25461 (1.25805)	Top-1 acc 59.766 (56.326)	Top-5 acc 79.297 (78.341)	lr 0.01835
Train [42][1300/3239]	Time 0.215 (0.606)	Data Time 0.001 (0.027)	Loss 2.8607 (2.8142)	Entropy 1.25459 (1.25803)	Top-1 acc 55.078 (56.318)	Top-5 acc 76.562 (78.333)	lr 0.01835
Train [42][1310/3239]	Time 0.215 (0.605)	Data Time 0.001 (0.027)	Loss 2.8253 (2.8140)	Entropy 1.25448 (1.25800)	Top-1 acc 55.078 (56.318)	Top-5 acc 76.172 (78.332)	lr 0.01835
Train [42][1320/3239]	Time 0.246 (0.604)	Data Time 0.002 (0.026)	Loss 2.8111 (2.8141)	Entropy 1.25428 (1.25797)	Top-1 acc 61.328 (56.316)	Top-5 acc 75.781 (78.330)	lr 0.01835
Train [42][1330/3239]	Time 2.379 (0.603)	Data Time 0.001 (0.026)	Loss 2.7646 (2.8146)	Entropy 1.25428 (1.25795)	Top-1 acc 60.938 (56.307)	Top-5 acc 79.297 (78.324)	lr 0.01835
Train [42][1340/3239]	Time 0.231 (0.600)	Data Time 0.001 (0.026)	Loss 2.8787 (2.8146)	Entropy 1.25446 (1.25792)	Top-1 acc 52.734 (56.307)	Top-5 acc 78.906 (78.324)	lr 0.01835
Train [42][1350/3239]	Time 0.239 (0.599)	Data Time 0.001 (0.026)	Loss 2.9200 (2.8147)	Entropy 1.25439 (1.25789)	Top-1 acc 54.688 (56.302)	Top-5 acc 76.953 (78.322)	lr 0.01834
Train [42][1360/3239]	Time 0.248 (0.598)	Data Time 0.001 (0.026)	Loss 2.9448 (2.8142)	Entropy 1.25431 (1.25787)	Top-1 acc 51.953 (56.317)	Top-5 acc 76.953 (78.335)	lr 0.01834
Train [42][1370/3239]	Time 0.226 (0.597)	Data Time 0.001 (0.026)	Loss 2.7986 (2.8144)	Entropy 1.25432 (1.25784)	Top-1 acc 55.469 (56.306)	Top-5 acc 79.688 (78.332)	lr 0.01834
Train [42][1380/3239]	Time 0.247 (0.596)	Data Time 0.002 (0.025)	Loss 2.8172 (2.8147)	Entropy 1.25427 (1.25782)	Top-1 acc 54.688 (56.301)	Top-5 acc 77.344 (78.325)	lr 0.01834
Train [42][1390/3239]	Time 0.224 (0.595)	Data Time 0.001 (0.025)	Loss 2.8129 (2.8150)	Entropy 1.25422 (1.25779)	Top-1 acc 54.688 (56.301)	Top-5 acc 77.344 (78.314)	lr 0.01834
Train [42][1400/3239]	Time 0.218 (0.594)	Data Time 0.001 (0.025)	Loss 2.7276 (2.8151)	Entropy 1.25414 (1.25776)	Top-1 acc 59.375 (56.300)	Top-5 acc 80.078 (78.312)	lr 0.01834
Train [42][1410/3239]	Time 0.301 (0.593)	Data Time 0.001 (0.025)	Loss 2.8249 (2.8149)	Entropy 1.25412 (1.25774)	Top-1 acc 56.641 (56.303)	Top-5 acc 78.906 (78.317)	lr 0.01834
Train [42][1420/3239]	Time 0.210 (0.592)	Data Time 0.001 (0.025)	Loss 2.9217 (2.8147)	Entropy 1.25409 (1.25771)	Top-1 acc 55.859 (56.316)	Top-5 acc 75.781 (78.318)	lr 0.01834
Train [42][1430/3239]	Time 0.208 (0.591)	Data Time 0.002 (0.025)	Loss 2.9470 (2.8148)	Entropy 1.25410 (1.25769)	Top-1 acc 51.172 (56.314)	Top-5 acc 78.125 (78.319)	lr 0.01834
Train [42][1440/3239]	Time 2.362 (0.590)	Data Time 0.002 (0.024)	Loss 3.0333 (2.8146)	Entropy 1.25410 (1.25766)	Top-1 acc 51.562 (56.314)	Top-5 acc 72.656 (78.319)	lr 0.01834
Train [42][1450/3239]	Time 0.204 (0.587)	Data Time 0.001 (0.024)	Loss 2.7142 (2.8142)	Entropy 1.25405 (1.25764)	Top-1 acc 57.812 (56.321)	Top-5 acc 78.516 (78.324)	lr 0.01834
Train [42][1460/3239]	Time 0.399 (0.586)	Data Time 0.002 (0.024)	Loss 2.8644 (2.8140)	Entropy 1.25405 (1.25761)	Top-1 acc 57.031 (56.329)	Top-5 acc 76.953 (78.333)	lr 0.01833
Train [42][1470/3239]	Time 0.244 (0.586)	Data Time 0.001 (0.024)	Loss 3.0450 (2.8144)	Entropy 1.25406 (1.25759)	Top-1 acc 53.125 (56.320)	Top-5 acc 73.438 (78.326)	lr 0.01833
Train [42][1480/3239]	Time 0.208 (0.585)	Data Time 0.001 (0.024)	Loss 2.9339 (2.8144)	Entropy 1.25404 (1.25757)	Top-1 acc 53.125 (56.318)	Top-5 acc 76.172 (78.329)	lr 0.01833
Train [42][1490/3239]	Time 0.217 (0.584)	Data Time 0.001 (0.024)	Loss 2.6821 (2.8145)	Entropy 1.25401 (1.25754)	Top-1 acc 60.547 (56.315)	Top-5 acc 80.469 (78.332)	lr 0.01833
Train [42][1500/3239]	Time 0.230 (0.583)	Data Time 0.001 (0.023)	Loss 2.9051 (2.8151)	Entropy 1.25404 (1.25752)	Top-1 acc 53.125 (56.307)	Top-5 acc 76.562 (78.323)	lr 0.01833
Train [42][1510/3239]	Time 0.316 (0.582)	Data Time 0.001 (0.023)	Loss 2.7502 (2.8150)	Entropy 1.25402 (1.25749)	Top-1 acc 57.422 (56.312)	Top-5 acc 75.781 (78.321)	lr 0.01833
Train [42][1520/3239]	Time 0.160 (0.581)	Data Time 0.001 (0.023)	Loss 2.7349 (2.8154)	Entropy 1.25397 (1.25747)	Top-1 acc 59.375 (56.311)	Top-5 acc 77.734 (78.318)	lr 0.01833
Train [42][1530/3239]	Time 0.217 (0.581)	Data Time 0.001 (0.023)	Loss 2.7782 (2.8154)	Entropy 1.25396 (1.25745)	Top-1 acc 58.203 (56.313)	Top-5 acc 78.125 (78.318)	lr 0.01833
Train [42][1540/3239]	Time 0.223 (0.580)	Data Time 0.001 (0.023)	Loss 2.8059 (2.8153)	Entropy 1.25389 (1.25743)	Top-1 acc 55.859 (56.314)	Top-5 acc 81.250 (78.324)	lr 0.01833
Train [42][1550/3239]	Time 2.431 (0.579)	Data Time 0.001 (0.023)	Loss 2.8024 (2.8154)	Entropy 1.25389 (1.25740)	Top-1 acc 61.328 (56.314)	Top-5 acc 78.906 (78.315)	lr 0.01833
Train [42][1560/3239]	Time 0.203 (0.577)	Data Time 0.001 (0.023)	Loss 2.9243 (2.8156)	Entropy 1.25392 (1.25738)	Top-1 acc 54.688 (56.311)	Top-5 acc 78.516 (78.316)	lr 0.01833
Train [42][1570/3239]	Time 0.221 (0.576)	Data Time 0.001 (0.023)	Loss 2.8059 (2.8157)	Entropy 1.25388 (1.25736)	Top-1 acc 58.984 (56.306)	Top-5 acc 77.734 (78.316)	lr 0.01832
Train [42][1580/3239]	Time 0.208 (0.575)	Data Time 0.001 (0.022)	Loss 3.0914 (2.8155)	Entropy 1.25383 (1.25734)	Top-1 acc 52.344 (56.311)	Top-5 acc 71.484 (78.318)	lr 0.01832
Train [42][1590/3239]	Time 0.210 (0.575)	Data Time 0.001 (0.022)	Loss 2.8602 (2.8156)	Entropy 1.25365 (1.25731)	Top-1 acc 55.859 (56.312)	Top-5 acc 75.781 (78.313)	lr 0.01832
Train [42][1600/3239]	Time 0.239 (0.602)	Data Time 0.002 (0.022)	Loss 2.9598 (2.8159)	Entropy 1.25363 (1.25729)	Top-1 acc 53.516 (56.305)	Top-5 acc 77.344 (78.309)	lr 0.01832
Train [42][1610/3239]	Time 0.216 (0.602)	Data Time 0.002 (0.022)	Loss 2.8070 (2.8162)	Entropy 1.25362 (1.25727)	Top-1 acc 56.250 (56.299)	Top-5 acc 78.125 (78.309)	lr 0.01832
Train [42][1620/3239]	Time 0.208 (0.601)	Data Time 0.002 (0.022)	Loss 2.8670 (2.8163)	Entropy 1.25360 (1.25725)	Top-1 acc 55.078 (56.294)	Top-5 acc 78.125 (78.306)	lr 0.01832
Train [42][1630/3239]	Time 0.210 (0.600)	Data Time 0.001 (0.022)	Loss 2.6762 (2.8166)	Entropy 1.25358 (1.25722)	Top-1 acc 58.984 (56.285)	Top-5 acc 80.469 (78.299)	lr 0.01832
Train [42][1640/3239]	Time 0.226 (0.599)	Data Time 0.002 (0.022)	Loss 2.6703 (2.8166)	Entropy 1.25354 (1.25720)	Top-1 acc 60.547 (56.292)	Top-5 acc 81.250 (78.303)	lr 0.01832
Train [42][1650/3239]	Time 0.231 (0.598)	Data Time 0.002 (0.022)	Loss 2.8677 (2.8169)	Entropy 1.25342 (1.25718)	Top-1 acc 55.859 (56.288)	Top-5 acc 75.000 (78.297)	lr 0.01832
Train [42][1660/3239]	Time 2.435 (0.597)	Data Time 0.001 (0.021)	Loss 2.7080 (2.8168)	Entropy 1.25342 (1.25716)	Top-1 acc 59.375 (56.294)	Top-5 acc 82.812 (78.297)	lr 0.01832
Train [42][1670/3239]	Time 0.224 (0.595)	Data Time 0.001 (0.021)	Loss 2.7066 (2.8166)	Entropy 1.25335 (1.25713)	Top-1 acc 57.812 (56.299)	Top-5 acc 79.297 (78.301)	lr 0.01832
Train [42][1680/3239]	Time 0.209 (0.594)	Data Time 0.001 (0.021)	Loss 2.7274 (2.8162)	Entropy 1.25332 (1.25711)	Top-1 acc 59.375 (56.308)	Top-5 acc 80.469 (78.311)	lr 0.01831
Train [42][1690/3239]	Time 0.213 (0.593)	Data Time 0.001 (0.021)	Loss 3.0222 (2.8165)	Entropy 1.25331 (1.25709)	Top-1 acc 53.906 (56.302)	Top-5 acc 75.781 (78.305)	lr 0.01831
Train [42][1700/3239]	Time 0.228 (0.593)	Data Time 0.001 (0.021)	Loss 2.7998 (2.8169)	Entropy 1.25329 (1.25707)	Top-1 acc 57.812 (56.299)	Top-5 acc 75.781 (78.296)	lr 0.01831
Train [42][1710/3239]	Time 0.240 (0.592)	Data Time 0.001 (0.021)	Loss 2.7641 (2.8166)	Entropy 1.25327 (1.25704)	Top-1 acc 57.422 (56.309)	Top-5 acc 78.125 (78.296)	lr 0.01831
Train [42][1720/3239]	Time 0.309 (0.591)	Data Time 0.001 (0.021)	Loss 2.9867 (2.8166)	Entropy 1.25321 (1.25702)	Top-1 acc 54.297 (56.312)	Top-5 acc 75.000 (78.296)	lr 0.01831
Train [42][1730/3239]	Time 0.216 (0.590)	Data Time 0.001 (0.021)	Loss 2.7458 (2.8168)	Entropy 1.25316 (1.25700)	Top-1 acc 56.641 (56.309)	Top-5 acc 79.688 (78.296)	lr 0.01831
Train [42][1740/3239]	Time 0.204 (0.589)	Data Time 0.001 (0.021)	Loss 3.0003 (2.8167)	Entropy 1.25317 (1.25698)	Top-1 acc 55.078 (56.308)	Top-5 acc 73.047 (78.296)	lr 0.01831
Train [42][1750/3239]	Time 0.236 (0.589)	Data Time 0.001 (0.020)	Loss 2.8291 (2.8168)	Entropy 1.25308 (1.25696)	Top-1 acc 55.078 (56.306)	Top-5 acc 79.688 (78.291)	lr 0.01831
Train [42][1760/3239]	Time 0.225 (0.588)	Data Time 0.001 (0.020)	Loss 2.7446 (2.8172)	Entropy 1.25300 (1.25693)	Top-1 acc 60.156 (56.299)	Top-5 acc 80.078 (78.287)	lr 0.01831
Train [42][1770/3239]	Time 2.411 (0.587)	Data Time 0.001 (0.020)	Loss 2.8174 (2.8171)	Entropy 1.25300 (1.25691)	Top-1 acc 56.250 (56.303)	Top-5 acc 79.297 (78.290)	lr 0.01831
Train [42][1780/3239]	Time 0.259 (0.585)	Data Time 0.002 (0.020)	Loss 2.8321 (2.8172)	Entropy 1.25288 (1.25689)	Top-1 acc 54.297 (56.296)	Top-5 acc 77.344 (78.292)	lr 0.01831
Train [42][1790/3239]	Time 0.243 (0.585)	Data Time 0.001 (0.020)	Loss 2.8284 (2.8171)	Entropy 1.25286 (1.25687)	Top-1 acc 56.641 (56.302)	Top-5 acc 74.609 (78.290)	lr 0.01830
Train [42][1800/3239]	Time 0.226 (0.584)	Data Time 0.001 (0.020)	Loss 2.8068 (2.8172)	Entropy 1.25285 (1.25684)	Top-1 acc 61.328 (56.304)	Top-5 acc 77.344 (78.287)	lr 0.01830
Train [42][1810/3239]	Time 0.213 (0.583)	Data Time 0.001 (0.020)	Loss 2.7946 (2.8173)	Entropy 1.25276 (1.25682)	Top-1 acc 53.125 (56.307)	Top-5 acc 80.859 (78.287)	lr 0.01830
Train [42][1820/3239]	Time 0.227 (0.583)	Data Time 0.001 (0.020)	Loss 2.8039 (2.8172)	Entropy 1.25267 (1.25680)	Top-1 acc 53.906 (56.303)	Top-5 acc 78.125 (78.287)	lr 0.01830
Train [42][1830/3239]	Time 0.217 (0.582)	Data Time 0.001 (0.020)	Loss 2.7607 (2.8174)	Entropy 1.25262 (1.25678)	Top-1 acc 55.469 (56.294)	Top-5 acc 78.906 (78.282)	lr 0.01830
Train [42][1840/3239]	Time 0.208 (0.581)	Data Time 0.001 (0.019)	Loss 2.8415 (2.8175)	Entropy 1.25258 (1.25675)	Top-1 acc 55.859 (56.287)	Top-5 acc 79.688 (78.280)	lr 0.01830
Train [42][1850/3239]	Time 0.228 (0.581)	Data Time 0.007 (0.019)	Loss 2.9121 (2.8172)	Entropy 1.25250 (1.25673)	Top-1 acc 55.078 (56.291)	Top-5 acc 76.562 (78.285)	lr 0.01830
Train [42][1860/3239]	Time 0.274 (0.580)	Data Time 0.001 (0.019)	Loss 2.7359 (2.8172)	Entropy 1.25244 (1.25671)	Top-1 acc 57.031 (56.291)	Top-5 acc 80.469 (78.288)	lr 0.01830
Train [42][1870/3239]	Time 0.347 (0.579)	Data Time 0.001 (0.019)	Loss 2.7039 (2.8172)	Entropy 1.25227 (1.25669)	Top-1 acc 55.469 (56.284)	Top-5 acc 82.422 (78.287)	lr 0.01830
Train [42][1880/3239]	Time 2.350 (0.579)	Data Time 0.001 (0.019)	Loss 2.9200 (2.8167)	Entropy 1.25227 (1.25666)	Top-1 acc 52.344 (56.298)	Top-5 acc 75.781 (78.296)	lr 0.01830
Train [42][1890/3239]	Time 0.258 (0.577)	Data Time 0.001 (0.019)	Loss 2.7734 (2.8168)	Entropy 1.25223 (1.25664)	Top-1 acc 55.078 (56.298)	Top-5 acc 80.859 (78.294)	lr 0.01830
Train [42][1900/3239]	Time 0.223 (0.576)	Data Time 0.002 (0.019)	Loss 2.9200 (2.8172)	Entropy 1.25213 (1.25661)	Top-1 acc 53.516 (56.283)	Top-5 acc 77.344 (78.285)	lr 0.01830
Train [42][1910/3239]	Time 0.233 (0.576)	Data Time 0.001 (0.019)	Loss 2.8668 (2.8176)	Entropy 1.25211 (1.25659)	Top-1 acc 56.641 (56.277)	Top-5 acc 74.609 (78.276)	lr 0.01829
Train [42][1920/3239]	Time 0.366 (0.575)	Data Time 0.001 (0.019)	Loss 3.0977 (2.8176)	Entropy 1.25209 (1.25657)	Top-1 acc 50.781 (56.274)	Top-5 acc 71.875 (78.272)	lr 0.01829
Train [42][1930/3239]	Time 0.225 (0.574)	Data Time 0.001 (0.019)	Loss 2.8858 (2.8178)	Entropy 1.25200 (1.25654)	Top-1 acc 55.078 (56.270)	Top-5 acc 76.562 (78.266)	lr 0.01829
Train [42][1940/3239]	Time 0.213 (0.574)	Data Time 0.001 (0.019)	Loss 2.8412 (2.8180)	Entropy 1.25200 (1.25652)	Top-1 acc 55.859 (56.264)	Top-5 acc 76.562 (78.261)	lr 0.01829
Train [42][1950/3239]	Time 0.229 (0.573)	Data Time 0.001 (0.018)	Loss 2.8527 (2.8178)	Entropy 1.25175 (1.25650)	Top-1 acc 56.641 (56.273)	Top-5 acc 77.344 (78.269)	lr 0.01829
Train [42][1960/3239]	Time 0.374 (0.595)	Data Time 0.003 (0.018)	Loss 2.8947 (2.8177)	Entropy 1.25168 (1.25647)	Top-1 acc 54.688 (56.279)	Top-5 acc 77.344 (78.270)	lr 0.01829
Train [42][1970/3239]	Time 0.320 (0.595)	Data Time 0.002 (0.018)	Loss 2.8142 (2.8177)	Entropy 1.25165 (1.25645)	Top-1 acc 57.031 (56.280)	Top-5 acc 80.469 (78.269)	lr 0.01829
Train [42][1980/3239]	Time 0.223 (0.594)	Data Time 0.001 (0.018)	Loss 2.8418 (2.8177)	Entropy 1.25161 (1.25642)	Top-1 acc 55.469 (56.276)	Top-5 acc 77.734 (78.270)	lr 0.01829
Train [42][1990/3239]	Time 2.358 (0.593)	Data Time 0.002 (0.018)	Loss 2.8192 (2.8178)	Entropy 1.25161 (1.25640)	Top-1 acc 58.984 (56.278)	Top-5 acc 76.172 (78.267)	lr 0.01829
Train [42][2000/3239]	Time 0.265 (0.592)	Data Time 0.002 (0.018)	Loss 2.8142 (2.8179)	Entropy 1.25161 (1.25638)	Top-1 acc 57.031 (56.275)	Top-5 acc 78.516 (78.263)	lr 0.01829
Train [42][2010/3239]	Time 0.230 (0.591)	Data Time 0.002 (0.018)	Loss 2.8697 (2.8183)	Entropy 1.25157 (1.25635)	Top-1 acc 54.297 (56.265)	Top-5 acc 77.734 (78.255)	lr 0.01829
Train [42][2020/3239]	Time 0.325 (0.591)	Data Time 0.001 (0.018)	Loss 2.9446 (2.8183)	Entropy 1.25159 (1.25633)	Top-1 acc 54.688 (56.266)	Top-5 acc 76.562 (78.256)	lr 0.01828
Train [42][2030/3239]	Time 0.233 (0.590)	Data Time 0.001 (0.018)	Loss 2.9405 (2.8184)	Entropy 1.25154 (1.25631)	Top-1 acc 55.469 (56.264)	Top-5 acc 77.344 (78.256)	lr 0.01828
Train [42][2040/3239]	Time 0.239 (0.589)	Data Time 0.001 (0.018)	Loss 3.0760 (2.8185)	Entropy 1.25153 (1.25628)	Top-1 acc 50.781 (56.264)	Top-5 acc 74.609 (78.253)	lr 0.01828
Train [42][2050/3239]	Time 0.213 (0.588)	Data Time 0.001 (0.018)	Loss 2.9387 (2.8186)	Entropy 1.25145 (1.25626)	Top-1 acc 51.562 (56.252)	Top-5 acc 77.344 (78.253)	lr 0.01828
Train [42][2060/3239]	Time 0.207 (0.588)	Data Time 0.001 (0.018)	Loss 2.7339 (2.8185)	Entropy 1.25141 (1.25623)	Top-1 acc 56.250 (56.251)	Top-5 acc 81.641 (78.255)	lr 0.01828
Train [42][2070/3239]	Time 0.173 (0.587)	Data Time 0.001 (0.018)	Loss 2.8242 (2.8187)	Entropy 1.25135 (1.25621)	Top-1 acc 57.422 (56.245)	Top-5 acc 76.953 (78.255)	lr 0.01828
Train [42][2080/3239]	Time 0.217 (0.587)	Data Time 0.001 (0.017)	Loss 2.8514 (2.8185)	Entropy 1.25127 (1.25619)	Top-1 acc 57.031 (56.247)	Top-5 acc 76.562 (78.260)	lr 0.01828
Train [42][2090/3239]	Time 0.229 (0.586)	Data Time 0.001 (0.017)	Loss 2.9743 (2.8186)	Entropy 1.25127 (1.25616)	Top-1 acc 54.297 (56.241)	Top-5 acc 77.344 (78.255)	lr 0.01828
Train [42][2100/3239]	Time 2.388 (0.585)	Data Time 0.001 (0.017)	Loss 2.8172 (2.8186)	Entropy 1.25127 (1.25614)	Top-1 acc 55.859 (56.239)	Top-5 acc 76.172 (78.256)	lr 0.01828
Train [42][2110/3239]	Time 0.222 (0.584)	Data Time 0.001 (0.017)	Loss 2.8142 (2.8185)	Entropy 1.25128 (1.25612)	Top-1 acc 59.375 (56.239)	Top-5 acc 78.516 (78.260)	lr 0.01828
Train [42][2120/3239]	Time 0.323 (0.583)	Data Time 0.001 (0.017)	Loss 2.9107 (2.8191)	Entropy 1.25132 (1.25610)	Top-1 acc 56.641 (56.232)	Top-5 acc 73.828 (78.249)	lr 0.01828
Train [42][2130/3239]	Time 0.224 (0.583)	Data Time 0.002 (0.017)	Loss 2.7109 (2.8188)	Entropy 1.25123 (1.25607)	Top-1 acc 59.766 (56.235)	Top-5 acc 82.031 (78.255)	lr 0.01827
Train [42][2140/3239]	Time 0.218 (0.582)	Data Time 0.002 (0.017)	Loss 2.7586 (2.8187)	Entropy 1.25120 (1.25605)	Top-1 acc 57.422 (56.243)	Top-5 acc 77.734 (78.257)	lr 0.01827
Train [42][2150/3239]	Time 0.248 (0.582)	Data Time 0.002 (0.017)	Loss 2.6579 (2.8184)	Entropy 1.25136 (1.25603)	Top-1 acc 61.719 (56.250)	Top-5 acc 80.859 (78.259)	lr 0.01827
Train [42][2160/3239]	Time 0.226 (0.581)	Data Time 0.001 (0.017)	Loss 2.9326 (2.8185)	Entropy 1.25133 (1.25601)	Top-1 acc 54.297 (56.243)	Top-5 acc 75.781 (78.257)	lr 0.01827
Train [42][2170/3239]	Time 0.214 (0.580)	Data Time 0.001 (0.017)	Loss 2.8822 (2.8185)	Entropy 1.25128 (1.25598)	Top-1 acc 57.812 (56.241)	Top-5 acc 76.172 (78.255)	lr 0.01827
Train [42][2180/3239]	Time 0.223 (0.580)	Data Time 0.001 (0.017)	Loss 2.9345 (2.8186)	Entropy 1.25124 (1.25596)	Top-1 acc 50.781 (56.239)	Top-5 acc 75.000 (78.253)	lr 0.01827
Train [42][2190/3239]	Time 0.223 (0.579)	Data Time 0.001 (0.017)	Loss 2.9486 (2.8187)	Entropy 1.25121 (1.25594)	Top-1 acc 52.344 (56.236)	Top-5 acc 73.828 (78.247)	lr 0.01827
Train [42][2200/3239]	Time 0.266 (0.579)	Data Time 0.001 (0.017)	Loss 3.0463 (2.8190)	Entropy 1.25117 (1.25592)	Top-1 acc 49.219 (56.225)	Top-5 acc 71.875 (78.244)	lr 0.01827
Train [42][2210/3239]	Time 2.525 (0.578)	Data Time 0.001 (0.017)	Loss 2.6258 (2.8188)	Entropy 1.25117 (1.25590)	Top-1 acc 62.109 (56.231)	Top-5 acc 82.812 (78.246)	lr 0.01827
Train [42][2220/3239]	Time 0.225 (0.577)	Data Time 0.001 (0.016)	Loss 2.7794 (2.8189)	Entropy 1.25114 (1.25588)	Top-1 acc 57.422 (56.229)	Top-5 acc 78.516 (78.243)	lr 0.01827
Train [42][2230/3239]	Time 0.206 (0.576)	Data Time 0.001 (0.016)	Loss 2.7155 (2.8187)	Entropy 1.25110 (1.25586)	Top-1 acc 58.594 (56.234)	Top-5 acc 79.688 (78.248)	lr 0.01827
Train [42][2240/3239]	Time 0.223 (0.576)	Data Time 0.001 (0.016)	Loss 3.0134 (2.8187)	Entropy 1.25108 (1.25583)	Top-1 acc 55.078 (56.237)	Top-5 acc 72.266 (78.248)	lr 0.01826
Train [42][2250/3239]	Time 0.240 (0.575)	Data Time 0.001 (0.016)	Loss 2.8927 (2.8187)	Entropy 1.25099 (1.25581)	Top-1 acc 58.203 (56.239)	Top-5 acc 78.516 (78.249)	lr 0.01826
Train [42][2260/3239]	Time 0.159 (0.575)	Data Time 0.001 (0.016)	Loss 2.7736 (2.8190)	Entropy 1.25097 (1.25579)	Top-1 acc 57.031 (56.234)	Top-5 acc 79.297 (78.248)	lr 0.01826
Train [42][2270/3239]	Time 0.214 (0.574)	Data Time 0.001 (0.016)	Loss 2.6887 (2.8190)	Entropy 1.25096 (1.25577)	Top-1 acc 57.031 (56.229)	Top-5 acc 82.812 (78.246)	lr 0.01826
Train [42][2280/3239]	Time 0.306 (0.574)	Data Time 0.001 (0.016)	Loss 2.7226 (2.8190)	Entropy 1.25091 (1.25575)	Top-1 acc 56.641 (56.230)	Top-5 acc 81.641 (78.248)	lr 0.01826
Train [42][2290/3239]	Time 0.222 (0.573)	Data Time 0.001 (0.016)	Loss 2.7443 (2.8192)	Entropy 1.25091 (1.25573)	Top-1 acc 59.766 (56.230)	Top-5 acc 78.125 (78.244)	lr 0.01826
Train [42][2300/3239]	Time 0.203 (0.573)	Data Time 0.001 (0.016)	Loss 3.0375 (2.8196)	Entropy 1.25063 (1.25571)	Top-1 acc 51.172 (56.219)	Top-5 acc 72.656 (78.233)	lr 0.01826
Train [42][2310/3239]	Time 0.166 (0.572)	Data Time 0.002 (0.016)	Loss 2.8057 (2.8197)	Entropy 1.25064 (1.25568)	Top-1 acc 55.469 (56.216)	Top-5 acc 77.344 (78.236)	lr 0.01826
Train [42][2320/3239]	Time 46.308 (0.590)	Data Time 0.001 (0.016)	Loss 2.8671 (2.8199)	Entropy 1.25064 (1.25566)	Top-1 acc 55.859 (56.214)	Top-5 acc 75.781 (78.231)	lr 0.01826
Train [42][2330/3239]	Time 0.393 (0.590)	Data Time 0.006 (0.016)	Loss 2.8230 (2.8199)	Entropy 1.25058 (1.25564)	Top-1 acc 54.688 (56.213)	Top-5 acc 81.250 (78.231)	lr 0.01826
Train [42][2340/3239]	Time 0.218 (0.589)	Data Time 0.001 (0.016)	Loss 2.7611 (2.8203)	Entropy 1.25050 (1.25562)	Top-1 acc 57.812 (56.207)	Top-5 acc 77.734 (78.222)	lr 0.01826
Train [42][2350/3239]	Time 0.214 (0.588)	Data Time 0.002 (0.016)	Loss 2.7594 (2.8202)	Entropy 1.25049 (1.25560)	Top-1 acc 57.812 (56.203)	Top-5 acc 79.688 (78.224)	lr 0.01825
Train [42][2360/3239]	Time 0.216 (0.588)	Data Time 0.001 (0.016)	Loss 2.8913 (2.8203)	Entropy 1.25043 (1.25558)	Top-1 acc 51.953 (56.200)	Top-5 acc 78.125 (78.224)	lr 0.01825
Train [42][2370/3239]	Time 0.243 (0.587)	Data Time 0.002 (0.016)	Loss 2.6192 (2.8203)	Entropy 1.25037 (1.25555)	Top-1 acc 61.328 (56.200)	Top-5 acc 80.859 (78.224)	lr 0.01825
Train [42][2380/3239]	Time 0.232 (0.587)	Data Time 0.001 (0.016)	Loss 2.8539 (2.8204)	Entropy 1.25037 (1.25553)	Top-1 acc 57.812 (56.201)	Top-5 acc 79.688 (78.222)	lr 0.01825
Train [42][2390/3239]	Time 0.225 (0.586)	Data Time 0.001 (0.015)	Loss 2.8821 (2.8206)	Entropy 1.25034 (1.25551)	Top-1 acc 53.516 (56.196)	Top-5 acc 76.953 (78.218)	lr 0.01825
Train [42][2400/3239]	Time 0.228 (0.586)	Data Time 0.001 (0.015)	Loss 2.7395 (2.8207)	Entropy 1.25031 (1.25549)	Top-1 acc 58.203 (56.197)	Top-5 acc 82.031 (78.213)	lr 0.01825
Train [42][2410/3239]	Time 0.213 (0.585)	Data Time 0.003 (0.015)	Loss 2.8928 (2.8212)	Entropy 1.25033 (1.25547)	Top-1 acc 55.469 (56.187)	Top-5 acc 78.125 (78.206)	lr 0.01825
Train [42][2420/3239]	Time 0.227 (0.585)	Data Time 0.001 (0.015)	Loss 2.9071 (2.8214)	Entropy 1.25026 (1.25545)	Top-1 acc 54.688 (56.187)	Top-5 acc 76.953 (78.202)	lr 0.01825
Train [42][2430/3239]	Time 2.582 (0.584)	Data Time 0.001 (0.015)	Loss 2.9887 (2.8217)	Entropy 1.25026 (1.25542)	Top-1 acc 50.391 (56.182)	Top-5 acc 76.562 (78.197)	lr 0.01825
Train [42][2440/3239]	Time 0.309 (0.583)	Data Time 0.001 (0.015)	Loss 2.7277 (2.8215)	Entropy 1.25014 (1.25540)	Top-1 acc 56.641 (56.191)	Top-5 acc 80.859 (78.202)	lr 0.01825
Train [42][2450/3239]	Time 0.206 (0.582)	Data Time 0.001 (0.015)	Loss 2.8974 (2.8216)	Entropy 1.25014 (1.25538)	Top-1 acc 50.000 (56.188)	Top-5 acc 75.391 (78.200)	lr 0.01825
Train [42][2460/3239]	Time 0.203 (0.582)	Data Time 0.001 (0.015)	Loss 2.9944 (2.8218)	Entropy 1.25010 (1.25536)	Top-1 acc 51.562 (56.188)	Top-5 acc 74.609 (78.195)	lr 0.01824
Train [42][2470/3239]	Time 0.236 (0.581)	Data Time 0.002 (0.015)	Loss 2.9396 (2.8221)	Entropy 1.25005 (1.25534)	Top-1 acc 51.562 (56.179)	Top-5 acc 75.000 (78.187)	lr 0.01824
Train [42][2480/3239]	Time 0.235 (0.581)	Data Time 0.002 (0.015)	Loss 3.2081 (2.8222)	Entropy 1.25010 (1.25532)	Top-1 acc 48.047 (56.177)	Top-5 acc 69.922 (78.183)	lr 0.01824
Train [42][2490/3239]	Time 0.212 (0.580)	Data Time 0.001 (0.015)	Loss 2.9019 (2.8221)	Entropy 1.25007 (1.25530)	Top-1 acc 54.688 (56.176)	Top-5 acc 76.172 (78.185)	lr 0.01824
Train [42][2500/3239]	Time 0.222 (0.580)	Data Time 0.001 (0.015)	Loss 2.8649 (2.8220)	Entropy 1.25003 (1.25528)	Top-1 acc 56.641 (56.182)	Top-5 acc 79.688 (78.186)	lr 0.01824
Train [42][2510/3239]	Time 0.234 (0.579)	Data Time 0.001 (0.015)	Loss 3.0387 (2.8221)	Entropy 1.24994 (1.25525)	Top-1 acc 51.562 (56.183)	Top-5 acc 73.828 (78.182)	lr 0.01824
Train [42][2520/3239]	Time 0.206 (0.579)	Data Time 0.001 (0.015)	Loss 2.9469 (2.8221)	Entropy 1.24990 (1.25523)	Top-1 acc 51.953 (56.186)	Top-5 acc 75.391 (78.181)	lr 0.01824
Train [42][2530/3239]	Time 0.253 (0.578)	Data Time 0.002 (0.015)	Loss 3.0294 (2.8223)	Entropy 1.24978 (1.25521)	Top-1 acc 51.953 (56.183)	Top-5 acc 73.438 (78.177)	lr 0.01824
Train [42][2540/3239]	Time 2.493 (0.578)	Data Time 0.001 (0.015)	Loss 2.8418 (2.8222)	Entropy 1.24978 (1.25519)	Top-1 acc 56.641 (56.184)	Top-5 acc 76.562 (78.178)	lr 0.01824
Train [42][2550/3239]	Time 0.343 (0.577)	Data Time 0.001 (0.015)	Loss 2.7357 (2.8224)	Entropy 1.24973 (1.25517)	Top-1 acc 58.984 (56.181)	Top-5 acc 79.688 (78.174)	lr 0.01824
Train [42][2560/3239]	Time 0.232 (0.576)	Data Time 0.001 (0.015)	Loss 2.8707 (2.8223)	Entropy 1.24959 (1.25515)	Top-1 acc 53.906 (56.180)	Top-5 acc 76.562 (78.175)	lr 0.01824
Train [42][2570/3239]	Time 0.243 (0.576)	Data Time 0.001 (0.015)	Loss 2.8350 (2.8221)	Entropy 1.24955 (1.25513)	Top-1 acc 57.812 (56.186)	Top-5 acc 78.125 (78.178)	lr 0.01824
Train [42][2580/3239]	Time 0.241 (0.575)	Data Time 0.001 (0.014)	Loss 2.6058 (2.8220)	Entropy 1.24954 (1.25510)	Top-1 acc 59.766 (56.191)	Top-5 acc 82.031 (78.181)	lr 0.01823
Train [42][2590/3239]	Time 0.239 (0.575)	Data Time 0.001 (0.014)	Loss 2.8259 (2.8222)	Entropy 1.24951 (1.25508)	Top-1 acc 57.422 (56.192)	Top-5 acc 77.344 (78.174)	lr 0.01823
Train [42][2600/3239]	Time 0.327 (0.574)	Data Time 0.001 (0.014)	Loss 3.7419 (2.8226)	Entropy 1.24942 (1.25506)	Top-1 acc 37.500 (56.182)	Top-5 acc 62.500 (78.166)	lr 0.01823
Train [42][2610/3239]	Time 0.212 (0.574)	Data Time 0.002 (0.014)	Loss 2.8591 (2.8226)	Entropy 1.24944 (1.25504)	Top-1 acc 54.688 (56.183)	Top-5 acc 77.344 (78.167)	lr 0.01823
Train [42][2620/3239]	Time 0.221 (0.573)	Data Time 0.001 (0.014)	Loss 2.7160 (2.8226)	Entropy 1.24942 (1.25502)	Top-1 acc 59.375 (56.182)	Top-5 acc 80.078 (78.165)	lr 0.01823
Train [42][2630/3239]	Time 0.232 (0.573)	Data Time 0.001 (0.014)	Loss 2.6234 (2.8227)	Entropy 1.24937 (1.25500)	Top-1 acc 60.156 (56.178)	Top-5 acc 83.203 (78.166)	lr 0.01823
Train [42][2640/3239]	Time 0.218 (0.573)	Data Time 0.001 (0.014)	Loss 2.8781 (2.8227)	Entropy 1.24935 (1.25498)	Top-1 acc 51.953 (56.175)	Top-5 acc 78.906 (78.170)	lr 0.01823
Train [42][2650/3239]	Time 0.227 (0.572)	Data Time 0.002 (0.014)	Loss 2.8666 (2.8226)	Entropy 1.24927 (1.25495)	Top-1 acc 55.078 (56.179)	Top-5 acc 76.953 (78.170)	lr 0.01823
Train [42][2660/3239]	Time 0.207 (0.572)	Data Time 0.002 (0.014)	Loss 2.7810 (2.8228)	Entropy 1.24923 (1.25493)	Top-1 acc 61.328 (56.175)	Top-5 acc 80.469 (78.169)	lr 0.01823
Train [42][2670/3239]	Time 0.218 (0.572)	Data Time 0.001 (0.014)	Loss 2.6810 (2.8227)	Entropy 1.24918 (1.25491)	Top-1 acc 59.375 (56.176)	Top-5 acc 79.688 (78.169)	lr 0.01823
Train [42][2680/3239]	Time 0.239 (0.589)	Data Time 0.003 (0.014)	Loss 2.7610 (2.8228)	Entropy 1.24916 (1.25489)	Top-1 acc 58.203 (56.170)	Top-5 acc 78.125 (78.168)	lr 0.01823
Train [42][2690/3239]	Time 0.224 (0.588)	Data Time 0.002 (0.014)	Loss 2.8378 (2.8229)	Entropy 1.24909 (1.25487)	Top-1 acc 58.984 (56.166)	Top-5 acc 78.125 (78.167)	lr 0.01822
Train [42][2700/3239]	Time 0.339 (0.588)	Data Time 0.002 (0.014)	Loss 2.9186 (2.8232)	Entropy 1.24907 (1.25485)	Top-1 acc 54.688 (56.161)	Top-5 acc 75.781 (78.160)	lr 0.01822
Train [42][2710/3239]	Time 0.207 (0.587)	Data Time 0.001 (0.014)	Loss 2.9821 (2.8232)	Entropy 1.24905 (1.25483)	Top-1 acc 56.641 (56.159)	Top-5 acc 76.172 (78.159)	lr 0.01822
Train [42][2720/3239]	Time 0.221 (0.587)	Data Time 0.001 (0.014)	Loss 3.1909 (2.8232)	Entropy 1.24890 (1.25481)	Top-1 acc 45.312 (56.158)	Top-5 acc 70.312 (78.156)	lr 0.01822
Train [42][2730/3239]	Time 0.252 (0.586)	Data Time 0.001 (0.014)	Loss 2.7897 (2.8232)	Entropy 1.24889 (1.25478)	Top-1 acc 56.641 (56.159)	Top-5 acc 78.516 (78.159)	lr 0.01822
Train [42][2740/3239]	Time 0.222 (0.586)	Data Time 0.001 (0.014)	Loss 2.8565 (2.8232)	Entropy 1.24886 (1.25476)	Top-1 acc 54.688 (56.158)	Top-5 acc 77.344 (78.160)	lr 0.01822
Train [42][2750/3239]	Time 0.333 (0.586)	Data Time 0.001 (0.014)	Loss 2.7448 (2.8229)	Entropy 1.24882 (1.25474)	Top-1 acc 59.766 (56.165)	Top-5 acc 80.859 (78.164)	lr 0.01822
Train [42][2760/3239]	Time 0.220 (0.585)	Data Time 0.001 (0.014)	Loss 2.8192 (2.8230)	Entropy 1.24885 (1.25472)	Top-1 acc 53.125 (56.160)	Top-5 acc 80.859 (78.162)	lr 0.01822
Train [42][2770/3239]	Time 0.249 (0.585)	Data Time 0.001 (0.014)	Loss 2.9793 (2.8231)	Entropy 1.24877 (1.25470)	Top-1 acc 50.391 (56.154)	Top-5 acc 75.391 (78.158)	lr 0.01822
Train [42][2780/3239]	Time 0.206 (0.584)	Data Time 0.002 (0.014)	Loss 2.7117 (2.8230)	Entropy 1.24876 (1.25468)	Top-1 acc 60.938 (56.161)	Top-5 acc 79.688 (78.161)	lr 0.01822
Train [42][2790/3239]	Time 0.211 (0.584)	Data Time 0.001 (0.014)	Loss 2.8196 (2.8229)	Entropy 1.24874 (1.25466)	Top-1 acc 56.641 (56.165)	Top-5 acc 79.297 (78.162)	lr 0.01822
Train [42][2800/3239]	Time 0.319 (0.583)	Data Time 0.001 (0.013)	Loss 2.7544 (2.8229)	Entropy 1.24873 (1.25463)	Top-1 acc 60.156 (56.165)	Top-5 acc 78.906 (78.162)	lr 0.01821
Train [42][2810/3239]	Time 0.219 (0.583)	Data Time 0.001 (0.013)	Loss 2.8286 (2.8230)	Entropy 1.24866 (1.25461)	Top-1 acc 56.250 (56.165)	Top-5 acc 76.953 (78.159)	lr 0.01821
Train [42][2820/3239]	Time 0.268 (0.583)	Data Time 0.002 (0.013)	Loss 2.7921 (2.8228)	Entropy 1.24862 (1.25459)	Top-1 acc 58.594 (56.169)	Top-5 acc 79.297 (78.160)	lr 0.01821
Train [42][2830/3239]	Time 0.272 (0.582)	Data Time 0.003 (0.013)	Loss 2.7615 (2.8228)	Entropy 1.24862 (1.25457)	Top-1 acc 59.766 (56.169)	Top-5 acc 78.125 (78.159)	lr 0.01821
Train [42][2840/3239]	Time 0.250 (0.582)	Data Time 0.001 (0.013)	Loss 2.8094 (2.8228)	Entropy 1.24859 (1.25455)	Top-1 acc 57.031 (56.170)	Top-5 acc 80.469 (78.159)	lr 0.01821
Train [42][2850/3239]	Time 0.347 (0.581)	Data Time 0.002 (0.013)	Loss 2.9620 (2.8226)	Entropy 1.24853 (1.25453)	Top-1 acc 50.391 (56.176)	Top-5 acc 76.953 (78.160)	lr 0.01821
Train [42][2860/3239]	Time 0.209 (0.581)	Data Time 0.001 (0.013)	Loss 2.8808 (2.8227)	Entropy 1.24852 (1.25451)	Top-1 acc 50.781 (56.171)	Top-5 acc 73.438 (78.155)	lr 0.01821
Train [42][2870/3239]	Time 0.226 (0.580)	Data Time 0.001 (0.013)	Loss 2.9323 (2.8231)	Entropy 1.24848 (1.25449)	Top-1 acc 53.125 (56.162)	Top-5 acc 76.172 (78.151)	lr 0.01821
Train [42][2880/3239]	Time 0.218 (0.580)	Data Time 0.001 (0.013)	Loss 2.6773 (2.8228)	Entropy 1.24847 (1.25447)	Top-1 acc 60.547 (56.165)	Top-5 acc 81.250 (78.157)	lr 0.01821
Train [42][2890/3239]	Time 0.272 (0.580)	Data Time 0.001 (0.013)	Loss 2.7944 (2.8228)	Entropy 1.24845 (1.25445)	Top-1 acc 58.203 (56.164)	Top-5 acc 76.953 (78.158)	lr 0.01821
Train [42][2900/3239]	Time 0.349 (0.579)	Data Time 0.001 (0.013)	Loss 2.8278 (2.8229)	Entropy 1.24840 (1.25443)	Top-1 acc 54.297 (56.160)	Top-5 acc 78.516 (78.160)	lr 0.01821
Train [42][2910/3239]	Time 0.225 (0.579)	Data Time 0.002 (0.013)	Loss 2.9158 (2.8226)	Entropy 1.24842 (1.25440)	Top-1 acc 52.734 (56.168)	Top-5 acc 76.172 (78.166)	lr 0.01820
Train [42][2920/3239]	Time 0.252 (0.578)	Data Time 0.001 (0.013)	Loss 2.7603 (2.8226)	Entropy 1.24834 (1.25438)	Top-1 acc 55.078 (56.168)	Top-5 acc 78.516 (78.166)	lr 0.01820
Train [42][2930/3239]	Time 0.285 (0.578)	Data Time 0.001 (0.013)	Loss 2.8437 (2.8227)	Entropy 1.24830 (1.25436)	Top-1 acc 55.078 (56.165)	Top-5 acc 77.734 (78.162)	lr 0.01820
Train [42][2940/3239]	Time 0.287 (0.578)	Data Time 0.001 (0.013)	Loss 2.8055 (2.8230)	Entropy 1.24828 (1.25434)	Top-1 acc 59.375 (56.158)	Top-5 acc 79.688 (78.157)	lr 0.01820
Train [42][2950/3239]	Time 0.324 (0.577)	Data Time 0.001 (0.013)	Loss 3.0840 (2.8230)	Entropy 1.24823 (1.25432)	Top-1 acc 51.172 (56.158)	Top-5 acc 75.781 (78.159)	lr 0.01820
Train [42][2960/3239]	Time 0.214 (0.577)	Data Time 0.001 (0.013)	Loss 2.8826 (2.8230)	Entropy 1.24807 (1.25430)	Top-1 acc 56.250 (56.155)	Top-5 acc 75.781 (78.158)	lr 0.01820
Train [42][2970/3239]	Time 0.248 (0.576)	Data Time 0.001 (0.013)	Loss 2.8781 (2.8230)	Entropy 1.24806 (1.25428)	Top-1 acc 60.547 (56.156)	Top-5 acc 75.781 (78.156)	lr 0.01820
Train [42][2980/3239]	Time 0.223 (0.576)	Data Time 0.001 (0.013)	Loss 2.8972 (2.8230)	Entropy 1.24796 (1.25426)	Top-1 acc 52.344 (56.156)	Top-5 acc 75.781 (78.156)	lr 0.01820
Train [42][2990/3239]	Time 0.245 (0.576)	Data Time 0.002 (0.013)	Loss 2.7810 (2.8229)	Entropy 1.24792 (1.25424)	Top-1 acc 56.641 (56.154)	Top-5 acc 78.125 (78.154)	lr 0.01820
Train [42][3000/3239]	Time 0.282 (0.575)	Data Time 0.001 (0.013)	Loss 2.7540 (2.8230)	Entropy 1.24786 (1.25422)	Top-1 acc 56.641 (56.155)	Top-5 acc 80.078 (78.154)	lr 0.01820
Train [42][3010/3239]	Time 0.440 (0.589)	Data Time 0.004 (0.013)	Loss 2.7547 (2.8232)	Entropy 1.24783 (1.25420)	Top-1 acc 55.078 (56.150)	Top-5 acc 78.516 (78.150)	lr 0.01820
Train [42][3020/3239]	Time 0.242 (0.589)	Data Time 0.002 (0.013)	Loss 2.5613 (2.8231)	Entropy 1.24784 (1.25418)	Top-1 acc 64.844 (56.154)	Top-5 acc 82.812 (78.151)	lr 0.01819
Train [42][3030/3239]	Time 0.275 (0.589)	Data Time 0.002 (0.013)	Loss 2.9041 (2.8230)	Entropy 1.24782 (1.25415)	Top-1 acc 51.953 (56.157)	Top-5 acc 75.391 (78.151)	lr 0.01819
Train [42][3040/3239]	Time 0.250 (0.588)	Data Time 0.001 (0.013)	Loss 2.8120 (2.8231)	Entropy 1.24782 (1.25413)	Top-1 acc 55.859 (56.156)	Top-5 acc 80.469 (78.151)	lr 0.01819
Train [42][3050/3239]	Time 0.217 (0.588)	Data Time 0.001 (0.012)	Loss 3.0137 (2.8231)	Entropy 1.24778 (1.25411)	Top-1 acc 50.781 (56.158)	Top-5 acc 75.781 (78.151)	lr 0.01819
Train [42][3060/3239]	Time 0.189 (0.587)	Data Time 0.001 (0.012)	Loss 2.8265 (2.8232)	Entropy 1.24776 (1.25409)	Top-1 acc 54.297 (56.155)	Top-5 acc 76.562 (78.149)	lr 0.01819
Train [42][3070/3239]	Time 0.248 (0.587)	Data Time 0.001 (0.012)	Loss 2.8906 (2.8231)	Entropy 1.24775 (1.25407)	Top-1 acc 55.469 (56.157)	Top-5 acc 75.391 (78.149)	lr 0.01819
Train [42][3080/3239]	Time 0.231 (0.587)	Data Time 0.001 (0.012)	Loss 2.8312 (2.8231)	Entropy 1.24768 (1.25405)	Top-1 acc 55.859 (56.156)	Top-5 acc 77.734 (78.149)	lr 0.01819
Train [42][3090/3239]	Time 0.273 (0.586)	Data Time 0.001 (0.012)	Loss 2.8254 (2.8232)	Entropy 1.24751 (1.25403)	Top-1 acc 53.906 (56.156)	Top-5 acc 75.391 (78.146)	lr 0.01819
Train [42][3100/3239]	Time 0.240 (0.586)	Data Time 0.001 (0.012)	Loss 2.7890 (2.8236)	Entropy 1.24750 (1.25401)	Top-1 acc 57.422 (56.150)	Top-5 acc 79.688 (78.139)	lr 0.01819
Train [42][3110/3239]	Time 0.209 (0.585)	Data Time 0.001 (0.012)	Loss 2.7573 (2.8235)	Entropy 1.24749 (1.25399)	Top-1 acc 58.984 (56.156)	Top-5 acc 76.562 (78.141)	lr 0.01819
Train [42][3120/3239]	Time 0.229 (0.585)	Data Time 0.001 (0.012)	Loss 2.7670 (2.8236)	Entropy 1.24746 (1.25397)	Top-1 acc 57.031 (56.153)	Top-5 acc 80.859 (78.139)	lr 0.01819
Train [42][3130/3239]	Time 0.285 (0.585)	Data Time 0.001 (0.012)	Loss 2.7756 (2.8236)	Entropy 1.24744 (1.25395)	Top-1 acc 58.203 (56.149)	Top-5 acc 80.078 (78.137)	lr 0.01818
Train [42][3140/3239]	Time 0.228 (0.584)	Data Time 0.001 (0.012)	Loss 2.8085 (2.8235)	Entropy 1.24738 (1.25393)	Top-1 acc 58.594 (56.154)	Top-5 acc 76.562 (78.138)	lr 0.01818
Train [42][3150/3239]	Time 0.337 (0.584)	Data Time 0.001 (0.012)	Loss 2.9067 (2.8234)	Entropy 1.24736 (1.25391)	Top-1 acc 53.906 (56.155)	Top-5 acc 78.125 (78.141)	lr 0.01818
Train [42][3160/3239]	Time 0.257 (0.583)	Data Time 0.001 (0.012)	Loss 2.7029 (2.8236)	Entropy 1.24728 (1.25388)	Top-1 acc 57.812 (56.152)	Top-5 acc 79.297 (78.136)	lr 0.01818
Train [42][3170/3239]	Time 0.239 (0.583)	Data Time 0.001 (0.012)	Loss 2.7816 (2.8238)	Entropy 1.24723 (1.25386)	Top-1 acc 57.031 (56.150)	Top-5 acc 77.344 (78.132)	lr 0.01818
Train [42][3180/3239]	Time 0.222 (0.582)	Data Time 0.000 (0.012)	Loss 2.8112 (2.8239)	Entropy 1.24719 (1.25384)	Top-1 acc 58.984 (56.149)	Top-5 acc 78.906 (78.128)	lr 0.01818
Train [42][3190/3239]	Time 0.200 (0.582)	Data Time 0.000 (0.012)	Loss 2.8208 (2.8238)	Entropy 1.24716 (1.25382)	Top-1 acc 58.984 (56.154)	Top-5 acc 78.125 (78.130)	lr 0.01818
Train [42][3200/3239]	Time 0.334 (0.582)	Data Time 0.000 (0.012)	Loss 2.7820 (2.8240)	Entropy 1.24711 (1.25380)	Top-1 acc 59.375 (56.152)	Top-5 acc 76.172 (78.126)	lr 0.01818
Train [42][3210/3239]	Time 0.225 (0.581)	Data Time 0.000 (0.012)	Loss 2.8960 (2.8240)	Entropy 1.24710 (1.25378)	Top-1 acc 53.516 (56.151)	Top-5 acc 78.125 (78.125)	lr 0.01818
Train [42][3220/3239]	Time 0.223 (0.581)	Data Time 0.000 (0.012)	Loss 2.8095 (2.8240)	Entropy 1.24699 (1.25376)	Top-1 acc 59.375 (56.154)	Top-5 acc 76.562 (78.124)	lr 0.01818
Train [42][3230/3239]	Time 0.211 (0.580)	Data Time 0.000 (0.012)	Loss 2.8542 (2.8240)	Entropy 1.24698 (1.25374)	Top-1 acc 52.734 (56.154)	Top-5 acc 80.078 (78.125)	lr 0.01818
Train [42][3239/3239]	Time 2.259 (0.580)	Data Time 0.000 (0.012)	Loss 3.2054 (2.8240)	Entropy 1.24698 (1.25372)	Top-1 acc 48.148 (56.155)	Top-5 acc 72.840 (78.124)	lr 0.01817
==========Valid [42/120]	loss 1.653	top-1 acc 62.920 (62.920)	top-5 acc 83.840	Train top-1 56.155	top-5 78.124	Entropy 1.24698	Latency-None: 0.000ms	Flops: 546.53M
Train [43][0/3239]	Time 34.629 (34.629)	Data Time 32.245 (32.245)	Loss 2.6147 (2.6147)	Entropy 1.24695 (1.24695)	Top-1 acc 64.844 (64.844)	Top-5 acc 82.031 (82.031)	lr 0.01817
Train [43][10/3239]	Time 2.449 (3.616)	Data Time 0.001 (2.933)	Loss 2.6575 (2.7754)	Entropy 1.24695 (1.24695)	Top-1 acc 60.938 (57.102)	Top-5 acc 81.250 (79.190)	lr 0.01817
Train [43][20/3239]	Time 0.229 (2.007)	Data Time 0.001 (1.537)	Loss 2.7352 (2.7833)	Entropy 1.24688 (1.24692)	Top-1 acc 56.641 (57.013)	Top-5 acc 80.469 (79.315)	lr 0.01817
Train [43][30/3239]	Time 0.223 (1.505)	Data Time 0.002 (1.042)	Loss 2.5010 (2.7564)	Entropy 1.24685 (1.24690)	Top-1 acc 59.766 (57.623)	Top-5 acc 87.109 (79.713)	lr 0.01817
Train [43][40/3239]	Time 0.231 (1.250)	Data Time 0.001 (0.788)	Loss 2.9299 (2.7673)	Entropy 1.24684 (1.24688)	Top-1 acc 55.859 (57.517)	Top-5 acc 75.781 (79.668)	lr 0.01817
Train [43][50/3239]	Time 0.221 (1.096)	Data Time 0.001 (0.634)	Loss 2.9022 (2.7644)	Entropy 1.24681 (1.24687)	Top-1 acc 52.344 (57.621)	Top-5 acc 75.781 (79.634)	lr 0.01817
Train [43][60/3239]	Time 0.230 (0.991)	Data Time 0.001 (0.530)	Loss 2.7989 (2.7631)	Entropy 1.24673 (1.24685)	Top-1 acc 58.203 (57.608)	Top-5 acc 80.859 (79.591)	lr 0.01817
Train [43][70/3239]	Time 0.324 (0.917)	Data Time 0.001 (0.456)	Loss 2.8985 (2.7685)	Entropy 1.24673 (1.24684)	Top-1 acc 54.688 (57.532)	Top-5 acc 78.125 (79.550)	lr 0.01817
Train [43][80/3239]	Time 0.214 (0.860)	Data Time 0.001 (0.400)	Loss 2.7674 (2.7758)	Entropy 1.24669 (1.24682)	Top-1 acc 57.031 (57.374)	Top-5 acc 79.688 (79.316)	lr 0.01817
Train [43][90/3239]	Time 0.217 (0.815)	Data Time 0.001 (0.356)	Loss 2.7346 (2.7747)	Entropy 1.24672 (1.24681)	Top-1 acc 60.156 (57.375)	Top-5 acc 78.125 (79.275)	lr 0.01817
Train [43][100/3239]	Time 0.218 (0.779)	Data Time 0.001 (0.321)	Loss 2.6649 (2.7697)	Entropy 1.24669 (1.24680)	Top-1 acc 58.594 (57.507)	Top-5 acc 80.078 (79.278)	lr 0.01817
Train [43][110/3239]	Time 0.262 (1.154)	Data Time 0.005 (0.292)	Loss 2.8129 (2.7730)	Entropy 1.24660 (1.24679)	Top-1 acc 58.984 (57.425)	Top-5 acc 78.125 (79.216)	lr 0.01816
Train [43][120/3239]	Time 4.453 (1.123)	Data Time 0.003 (0.268)	Loss 2.9387 (2.7747)	Entropy 1.24660 (1.24677)	Top-1 acc 52.344 (57.367)	Top-5 acc 76.953 (79.148)	lr 0.01816
Train [43][130/3239]	Time 0.217 (1.055)	Data Time 0.001 (0.248)	Loss 2.8325 (2.7733)	Entropy 1.24652 (1.24675)	Top-1 acc 56.641 (57.443)	Top-5 acc 75.391 (79.112)	lr 0.01816
Train [43][140/3239]	Time 0.251 (1.012)	Data Time 0.002 (0.230)	Loss 2.7961 (2.7739)	Entropy 1.24649 (1.24673)	Top-1 acc 54.297 (57.444)	Top-5 acc 80.469 (79.133)	lr 0.01816
Train [43][150/3239]	Time 0.233 (0.975)	Data Time 0.001 (0.215)	Loss 2.8385 (2.7746)	Entropy 1.24607 (1.24669)	Top-1 acc 55.859 (57.404)	Top-5 acc 80.469 (79.085)	lr 0.01816
Train [43][160/3239]	Time 0.221 (0.942)	Data Time 0.002 (0.202)	Loss 2.5666 (2.7723)	Entropy 1.24603 (1.24665)	Top-1 acc 62.500 (57.419)	Top-5 acc 85.156 (79.146)	lr 0.01816
Train [43][170/3239]	Time 0.318 (0.916)	Data Time 0.002 (0.190)	Loss 2.7930 (2.7771)	Entropy 1.24588 (1.24661)	Top-1 acc 55.859 (57.283)	Top-5 acc 80.078 (79.030)	lr 0.01816
Train [43][180/3239]	Time 0.169 (0.890)	Data Time 0.001 (0.180)	Loss 2.6193 (2.7749)	Entropy 1.24585 (1.24657)	Top-1 acc 62.891 (57.342)	Top-5 acc 84.766 (79.085)	lr 0.01816
Train [43][190/3239]	Time 0.233 (0.868)	Data Time 0.001 (0.171)	Loss 2.7788 (2.7750)	Entropy 1.24580 (1.24653)	Top-1 acc 57.812 (57.371)	Top-5 acc 79.297 (79.107)	lr 0.01816
Train [43][200/3239]	Time 0.223 (0.848)	Data Time 0.001 (0.162)	Loss 2.7151 (2.7765)	Entropy 1.24574 (1.24649)	Top-1 acc 55.859 (57.294)	Top-5 acc 77.734 (79.056)	lr 0.01816
Train [43][210/3239]	Time 0.278 (0.830)	Data Time 0.002 (0.155)	Loss 2.9586 (2.7779)	Entropy 1.24571 (1.24646)	Top-1 acc 55.469 (57.285)	Top-5 acc 76.562 (79.027)	lr 0.01816
Train [43][220/3239]	Time 0.332 (0.814)	Data Time 0.001 (0.148)	Loss 2.7286 (2.7779)	Entropy 1.24572 (1.24642)	Top-1 acc 55.469 (57.349)	Top-5 acc 78.906 (79.014)	lr 0.01816
Train [43][230/3239]	Time 2.522 (0.798)	Data Time 0.001 (0.142)	Loss 2.9313 (2.7801)	Entropy 1.24572 (1.24639)	Top-1 acc 56.250 (57.322)	Top-5 acc 75.391 (78.982)	lr 0.01815
Train [43][240/3239]	Time 0.245 (0.775)	Data Time 0.001 (0.136)	Loss 2.9724 (2.7791)	Entropy 1.24562 (1.24636)	Top-1 acc 50.781 (57.360)	Top-5 acc 75.391 (79.012)	lr 0.01815
Train [43][250/3239]	Time 0.232 (0.762)	Data Time 0.001 (0.130)	Loss 2.7692 (2.7818)	Entropy 1.24558 (1.24633)	Top-1 acc 60.547 (57.328)	Top-5 acc 79.688 (78.958)	lr 0.01815
Train [43][260/3239]	Time 0.238 (0.750)	Data Time 0.001 (0.125)	Loss 2.7400 (2.7818)	Entropy 1.24560 (1.24630)	Top-1 acc 56.250 (57.299)	Top-5 acc 81.641 (78.972)	lr 0.01815
Train [43][270/3239]	Time 0.277 (0.740)	Data Time 0.003 (0.121)	Loss 2.8066 (2.7843)	Entropy 1.24558 (1.24628)	Top-1 acc 57.422 (57.250)	Top-5 acc 81.250 (78.955)	lr 0.01815
Train [43][280/3239]	Time 0.218 (0.730)	Data Time 0.001 (0.117)	Loss 2.9343 (2.7855)	Entropy 1.24555 (1.24625)	Top-1 acc 52.344 (57.255)	Top-5 acc 76.172 (78.908)	lr 0.01815
Train [43][290/3239]	Time 0.215 (0.720)	Data Time 0.002 (0.113)	Loss 2.9224 (2.7858)	Entropy 1.24550 (1.24623)	Top-1 acc 52.734 (57.253)	Top-5 acc 75.391 (78.879)	lr 0.01815
Train [43][300/3239]	Time 0.203 (0.711)	Data Time 0.001 (0.109)	Loss 2.7881 (2.7861)	Entropy 1.24548 (1.24620)	Top-1 acc 55.078 (57.230)	Top-5 acc 78.906 (78.909)	lr 0.01815
Train [43][310/3239]	Time 0.204 (0.703)	Data Time 0.001 (0.106)	Loss 2.7170 (2.7866)	Entropy 1.24544 (1.24618)	Top-1 acc 56.641 (57.182)	Top-5 acc 80.469 (78.914)	lr 0.01815
Train [43][320/3239]	Time 0.228 (0.695)	Data Time 0.002 (0.102)	Loss 2.8584 (2.7869)	Entropy 1.24543 (1.24616)	Top-1 acc 57.812 (57.158)	Top-5 acc 77.734 (78.898)	lr 0.01815
Train [43][330/3239]	Time 0.262 (0.689)	Data Time 0.001 (0.099)	Loss 2.8482 (2.7871)	Entropy 1.24540 (1.24613)	Top-1 acc 55.859 (57.155)	Top-5 acc 74.609 (78.874)	lr 0.01815
Train [43][340/3239]	Time 2.280 (0.682)	Data Time 0.001 (0.097)	Loss 2.8776 (2.7878)	Entropy 1.24540 (1.24611)	Top-1 acc 56.250 (57.153)	Top-5 acc 79.297 (78.860)	lr 0.01814
Train [43][350/3239]	Time 0.225 (0.669)	Data Time 0.001 (0.094)	Loss 2.8886 (2.7896)	Entropy 1.24540 (1.24609)	Top-1 acc 52.734 (57.088)	Top-5 acc 76.953 (78.836)	lr 0.01814
Train [43][360/3239]	Time 0.202 (0.662)	Data Time 0.001 (0.091)	Loss 2.9139 (2.7913)	Entropy 1.24531 (1.24607)	Top-1 acc 53.125 (57.078)	Top-5 acc 75.391 (78.776)	lr 0.01814
Train [43][370/3239]	Time 0.218 (0.657)	Data Time 0.001 (0.089)	Loss 2.8972 (2.7928)	Entropy 1.24530 (1.24605)	Top-1 acc 55.078 (57.034)	Top-5 acc 77.344 (78.760)	lr 0.01814
Train [43][380/3239]	Time 0.329 (0.652)	Data Time 0.001 (0.087)	Loss 2.9449 (2.7938)	Entropy 1.24530 (1.24603)	Top-1 acc 54.297 (57.008)	Top-5 acc 79.297 (78.729)	lr 0.01814
Train [43][390/3239]	Time 0.229 (0.647)	Data Time 0.001 (0.084)	Loss 3.1077 (2.7972)	Entropy 1.24524 (1.24601)	Top-1 acc 50.000 (56.943)	Top-5 acc 74.219 (78.678)	lr 0.01814
Train [43][400/3239]	Time 0.237 (0.642)	Data Time 0.002 (0.082)	Loss 2.7307 (2.7957)	Entropy 1.24518 (1.24599)	Top-1 acc 59.766 (56.961)	Top-5 acc 78.516 (78.686)	lr 0.01814
Train [43][410/3239]	Time 0.214 (0.638)	Data Time 0.001 (0.080)	Loss 2.9815 (2.7962)	Entropy 1.24516 (1.24597)	Top-1 acc 52.734 (56.940)	Top-5 acc 76.562 (78.673)	lr 0.01814
Train [43][420/3239]	Time 0.212 (0.633)	Data Time 0.001 (0.078)	Loss 2.7072 (2.7975)	Entropy 1.24513 (1.24595)	Top-1 acc 58.984 (56.905)	Top-5 acc 80.859 (78.642)	lr 0.01814
Train [43][430/3239]	Time 0.206 (0.629)	Data Time 0.001 (0.077)	Loss 2.7249 (2.7959)	Entropy 1.24504 (1.24593)	Top-1 acc 59.375 (56.944)	Top-5 acc 79.688 (78.666)	lr 0.01814
Train [43][440/3239]	Time 0.217 (0.625)	Data Time 0.001 (0.075)	Loss 2.8237 (2.7949)	Entropy 1.24503 (1.24591)	Top-1 acc 57.422 (56.984)	Top-5 acc 80.859 (78.691)	lr 0.01814
Train [43][450/3239]	Time 2.430 (0.621)	Data Time 0.001 (0.073)	Loss 2.7247 (2.7958)	Entropy 1.24503 (1.24589)	Top-1 acc 60.547 (56.965)	Top-5 acc 78.125 (78.665)	lr 0.01813
Train [43][460/3239]	Time 0.221 (0.613)	Data Time 0.001 (0.072)	Loss 2.8178 (2.7952)	Entropy 1.24497 (1.24587)	Top-1 acc 53.516 (56.956)	Top-5 acc 78.906 (78.675)	lr 0.01813
Train [43][470/3239]	Time 0.205 (0.609)	Data Time 0.001 (0.070)	Loss 2.5006 (2.7947)	Entropy 1.24493 (1.24585)	Top-1 acc 62.500 (56.961)	Top-5 acc 83.984 (78.684)	lr 0.01813
Train [43][480/3239]	Time 0.282 (0.702)	Data Time 0.004 (0.069)	Loss 2.6270 (2.7948)	Entropy 1.24491 (1.24583)	Top-1 acc 58.203 (56.961)	Top-5 acc 83.984 (78.668)	lr 0.01813
Train [43][490/3239]	Time 0.242 (0.698)	Data Time 0.002 (0.068)	Loss 2.7358 (2.7956)	Entropy 1.24490 (1.24581)	Top-1 acc 55.078 (56.937)	Top-5 acc 78.906 (78.652)	lr 0.01813
Train [43][500/3239]	Time 0.256 (0.694)	Data Time 0.001 (0.066)	Loss 2.7576 (2.7964)	Entropy 1.24482 (1.24579)	Top-1 acc 57.422 (56.900)	Top-5 acc 77.344 (78.640)	lr 0.01813
Train [43][510/3239]	Time 0.234 (0.689)	Data Time 0.002 (0.065)	Loss 2.6479 (2.7952)	Entropy 1.24481 (1.24577)	Top-1 acc 59.375 (56.921)	Top-5 acc 81.641 (78.659)	lr 0.01813
Train [43][520/3239]	Time 0.223 (0.685)	Data Time 0.002 (0.064)	Loss 2.5932 (2.7937)	Entropy 1.24478 (1.24576)	Top-1 acc 62.891 (56.990)	Top-5 acc 81.641 (78.674)	lr 0.01813
Train [43][530/3239]	Time 0.287 (0.681)	Data Time 0.002 (0.063)	Loss 2.7086 (2.7929)	Entropy 1.24463 (1.24574)	Top-1 acc 60.547 (57.004)	Top-5 acc 81.250 (78.702)	lr 0.01813
Train [43][540/3239]	Time 0.233 (0.677)	Data Time 0.002 (0.061)	Loss 2.7446 (2.7941)	Entropy 1.24459 (1.24572)	Top-1 acc 58.203 (56.976)	Top-5 acc 78.906 (78.687)	lr 0.01813
Train [43][550/3239]	Time 0.263 (0.674)	Data Time 0.001 (0.060)	Loss 2.9749 (2.7948)	Entropy 1.24455 (1.24570)	Top-1 acc 55.469 (56.962)	Top-5 acc 73.828 (78.674)	lr 0.01813
Train [43][560/3239]	Time 2.466 (0.670)	Data Time 0.001 (0.059)	Loss 2.6912 (2.7957)	Entropy 1.24455 (1.24568)	Top-1 acc 58.203 (56.941)	Top-5 acc 81.250 (78.660)	lr 0.01812
Train [43][570/3239]	Time 0.233 (0.662)	Data Time 0.001 (0.058)	Loss 2.8463 (2.7966)	Entropy 1.24447 (1.24565)	Top-1 acc 56.641 (56.903)	Top-5 acc 78.516 (78.636)	lr 0.01812
Train [43][580/3239]	Time 0.215 (0.659)	Data Time 0.002 (0.057)	Loss 2.9371 (2.7971)	Entropy 1.24448 (1.24563)	Top-1 acc 55.078 (56.895)	Top-5 acc 79.297 (78.633)	lr 0.01812
Train [43][590/3239]	Time 0.221 (0.655)	Data Time 0.002 (0.056)	Loss 2.7972 (2.7970)	Entropy 1.24441 (1.24561)	Top-1 acc 56.250 (56.894)	Top-5 acc 76.953 (78.627)	lr 0.01812
Train [43][600/3239]	Time 0.252 (0.652)	Data Time 0.002 (0.056)	Loss 2.6440 (2.7962)	Entropy 1.24433 (1.24559)	Top-1 acc 61.328 (56.905)	Top-5 acc 83.203 (78.636)	lr 0.01812
Train [43][610/3239]	Time 0.220 (0.649)	Data Time 0.001 (0.055)	Loss 2.8914 (2.7951)	Entropy 1.24423 (1.24557)	Top-1 acc 53.125 (56.913)	Top-5 acc 73.828 (78.656)	lr 0.01812
Train [43][620/3239]	Time 0.218 (0.646)	Data Time 0.001 (0.054)	Loss 2.8776 (2.7951)	Entropy 1.24417 (1.24555)	Top-1 acc 56.641 (56.902)	Top-5 acc 78.516 (78.662)	lr 0.01812
Train [43][630/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.053)	Loss 2.8697 (2.7946)	Entropy 1.24415 (1.24553)	Top-1 acc 55.859 (56.905)	Top-5 acc 80.469 (78.674)	lr 0.01812
Train [43][640/3239]	Time 0.210 (0.641)	Data Time 0.001 (0.052)	Loss 2.6265 (2.7944)	Entropy 1.24417 (1.24551)	Top-1 acc 61.328 (56.916)	Top-5 acc 80.469 (78.680)	lr 0.01812
Train [43][650/3239]	Time 0.211 (0.638)	Data Time 0.001 (0.051)	Loss 2.9486 (2.7941)	Entropy 1.24411 (1.24549)	Top-1 acc 52.734 (56.926)	Top-5 acc 77.344 (78.694)	lr 0.01812
Train [43][660/3239]	Time 0.219 (0.635)	Data Time 0.001 (0.051)	Loss 2.7046 (2.7939)	Entropy 1.24406 (1.24546)	Top-1 acc 60.938 (56.921)	Top-5 acc 82.031 (78.705)	lr 0.01812
Train [43][670/3239]	Time 2.387 (0.632)	Data Time 0.002 (0.050)	Loss 2.7762 (2.7942)	Entropy 1.24406 (1.24544)	Top-1 acc 52.344 (56.902)	Top-5 acc 78.516 (78.690)	lr 0.01811
Train [43][680/3239]	Time 0.224 (0.626)	Data Time 0.001 (0.049)	Loss 2.8033 (2.7935)	Entropy 1.24403 (1.24542)	Top-1 acc 56.250 (56.919)	Top-5 acc 77.344 (78.691)	lr 0.01811
Train [43][690/3239]	Time 0.356 (0.624)	Data Time 0.001 (0.048)	Loss 2.7918 (2.7938)	Entropy 1.24402 (1.24540)	Top-1 acc 58.203 (56.915)	Top-5 acc 75.391 (78.678)	lr 0.01811
Train [43][700/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.048)	Loss 2.8476 (2.7938)	Entropy 1.24386 (1.24538)	Top-1 acc 54.688 (56.916)	Top-5 acc 77.734 (78.678)	lr 0.01811
Train [43][710/3239]	Time 0.227 (0.620)	Data Time 0.001 (0.047)	Loss 2.8081 (2.7940)	Entropy 1.24375 (1.24536)	Top-1 acc 53.516 (56.908)	Top-5 acc 79.688 (78.673)	lr 0.01811
Train [43][720/3239]	Time 0.238 (0.618)	Data Time 0.001 (0.047)	Loss 2.7697 (2.7942)	Entropy 1.24373 (1.24534)	Top-1 acc 51.953 (56.911)	Top-5 acc 79.688 (78.667)	lr 0.01811
Train [43][730/3239]	Time 0.165 (0.616)	Data Time 0.001 (0.046)	Loss 2.8329 (2.7942)	Entropy 1.24367 (1.24531)	Top-1 acc 55.469 (56.917)	Top-5 acc 75.391 (78.661)	lr 0.01811
Train [43][740/3239]	Time 0.323 (0.614)	Data Time 0.001 (0.045)	Loss 2.6698 (2.7936)	Entropy 1.24354 (1.24529)	Top-1 acc 58.984 (56.930)	Top-5 acc 82.031 (78.675)	lr 0.01811
Train [43][750/3239]	Time 0.207 (0.611)	Data Time 0.001 (0.045)	Loss 2.7150 (2.7935)	Entropy 1.24352 (1.24527)	Top-1 acc 62.891 (56.932)	Top-5 acc 80.469 (78.674)	lr 0.01811
Train [43][760/3239]	Time 0.217 (0.610)	Data Time 0.001 (0.044)	Loss 3.0233 (2.7943)	Entropy 1.24341 (1.24524)	Top-1 acc 52.734 (56.922)	Top-5 acc 69.922 (78.647)	lr 0.01811
Train [43][770/3239]	Time 0.213 (0.607)	Data Time 0.001 (0.044)	Loss 2.8361 (2.7945)	Entropy 1.24337 (1.24522)	Top-1 acc 53.906 (56.919)	Top-5 acc 79.688 (78.649)	lr 0.01811
Train [43][780/3239]	Time 2.434 (0.605)	Data Time 0.001 (0.043)	Loss 2.7679 (2.7945)	Entropy 1.24337 (1.24520)	Top-1 acc 57.812 (56.929)	Top-5 acc 77.344 (78.646)	lr 0.01810
Train [43][790/3239]	Time 0.220 (0.601)	Data Time 0.002 (0.043)	Loss 2.7802 (2.7946)	Entropy 1.24332 (1.24517)	Top-1 acc 57.031 (56.907)	Top-5 acc 77.344 (78.644)	lr 0.01810
Train [43][800/3239]	Time 0.217 (0.599)	Data Time 0.001 (0.042)	Loss 2.8245 (2.7952)	Entropy 1.24330 (1.24515)	Top-1 acc 54.297 (56.891)	Top-5 acc 76.953 (78.633)	lr 0.01810
Train [43][810/3239]	Time 0.227 (0.597)	Data Time 0.001 (0.042)	Loss 2.8973 (2.7953)	Entropy 1.24327 (1.24513)	Top-1 acc 53.516 (56.896)	Top-5 acc 77.344 (78.637)	lr 0.01810
Train [43][820/3239]	Time 0.221 (0.596)	Data Time 0.001 (0.041)	Loss 2.5301 (2.7949)	Entropy 1.24327 (1.24510)	Top-1 acc 64.453 (56.904)	Top-5 acc 85.156 (78.639)	lr 0.01810
Train [43][830/3239]	Time 0.220 (0.594)	Data Time 0.001 (0.041)	Loss 2.7549 (2.7954)	Entropy 1.24325 (1.24508)	Top-1 acc 59.766 (56.889)	Top-5 acc 82.812 (78.647)	lr 0.01810
Train [43][840/3239]	Time 0.426 (0.648)	Data Time 0.004 (0.040)	Loss 2.8020 (2.7957)	Entropy 1.24322 (1.24506)	Top-1 acc 56.250 (56.890)	Top-5 acc 78.516 (78.641)	lr 0.01810
Train [43][850/3239]	Time 0.221 (0.646)	Data Time 0.002 (0.040)	Loss 2.6874 (2.7959)	Entropy 1.24322 (1.24504)	Top-1 acc 60.547 (56.886)	Top-5 acc 80.078 (78.641)	lr 0.01810
Train [43][860/3239]	Time 0.234 (0.644)	Data Time 0.002 (0.039)	Loss 3.0139 (2.7961)	Entropy 1.24315 (1.24502)	Top-1 acc 55.469 (56.889)	Top-5 acc 75.391 (78.635)	lr 0.01810
Train [43][870/3239]	Time 0.238 (0.641)	Data Time 0.002 (0.039)	Loss 2.8917 (2.7959)	Entropy 1.24310 (1.24500)	Top-1 acc 56.641 (56.907)	Top-5 acc 80.078 (78.643)	lr 0.01810
Train [43][880/3239]	Time 0.227 (0.639)	Data Time 0.001 (0.038)	Loss 2.6786 (2.7959)	Entropy 1.24302 (1.24497)	Top-1 acc 61.719 (56.906)	Top-5 acc 79.297 (78.644)	lr 0.01810
Train [43][890/3239]	Time 2.558 (0.638)	Data Time 0.001 (0.038)	Loss 2.7070 (2.7967)	Entropy 1.24302 (1.24495)	Top-1 acc 59.375 (56.889)	Top-5 acc 80.078 (78.632)	lr 0.01809
Train [43][900/3239]	Time 0.240 (0.633)	Data Time 0.001 (0.038)	Loss 2.6037 (2.7964)	Entropy 1.24301 (1.24493)	Top-1 acc 62.109 (56.877)	Top-5 acc 81.250 (78.637)	lr 0.01809
Train [43][910/3239]	Time 0.231 (0.631)	Data Time 0.001 (0.037)	Loss 2.5587 (2.7959)	Entropy 1.24284 (1.24491)	Top-1 acc 61.328 (56.890)	Top-5 acc 82.812 (78.650)	lr 0.01809
Train [43][920/3239]	Time 0.251 (0.629)	Data Time 0.001 (0.037)	Loss 2.8517 (2.7955)	Entropy 1.24277 (1.24488)	Top-1 acc 55.469 (56.904)	Top-5 acc 77.344 (78.656)	lr 0.01809
Train [43][930/3239]	Time 0.211 (0.628)	Data Time 0.002 (0.037)	Loss 2.8334 (2.7952)	Entropy 1.24273 (1.24486)	Top-1 acc 57.422 (56.910)	Top-5 acc 78.516 (78.662)	lr 0.01809
Train [43][940/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.036)	Loss 2.7647 (2.7951)	Entropy 1.24269 (1.24484)	Top-1 acc 62.109 (56.919)	Top-5 acc 76.562 (78.654)	lr 0.01809
Train [43][950/3239]	Time 0.218 (0.624)	Data Time 0.001 (0.036)	Loss 2.6195 (2.7951)	Entropy 1.24266 (1.24482)	Top-1 acc 60.938 (56.917)	Top-5 acc 81.250 (78.657)	lr 0.01809
Train [43][960/3239]	Time 0.219 (0.623)	Data Time 0.001 (0.035)	Loss 2.8663 (2.7957)	Entropy 1.24265 (1.24479)	Top-1 acc 54.688 (56.900)	Top-5 acc 77.344 (78.648)	lr 0.01809
Train [43][970/3239]	Time 0.261 (0.621)	Data Time 0.001 (0.035)	Loss 2.6812 (2.7960)	Entropy 1.24266 (1.24477)	Top-1 acc 58.203 (56.894)	Top-5 acc 83.203 (78.652)	lr 0.01809
Train [43][980/3239]	Time 0.215 (0.619)	Data Time 0.001 (0.035)	Loss 2.8639 (2.7966)	Entropy 1.24263 (1.24475)	Top-1 acc 54.688 (56.883)	Top-5 acc 75.391 (78.643)	lr 0.01809
Train [43][990/3239]	Time 0.345 (0.618)	Data Time 0.001 (0.034)	Loss 2.8211 (2.7968)	Entropy 1.24261 (1.24473)	Top-1 acc 57.422 (56.881)	Top-5 acc 75.391 (78.641)	lr 0.01809
Train [43][1000/3239]	Time 2.422 (0.616)	Data Time 0.001 (0.034)	Loss 2.7390 (2.7968)	Entropy 1.24261 (1.24471)	Top-1 acc 59.375 (56.880)	Top-5 acc 80.078 (78.636)	lr 0.01808
Train [43][1010/3239]	Time 0.248 (0.612)	Data Time 0.001 (0.034)	Loss 2.7436 (2.7970)	Entropy 1.24258 (1.24469)	Top-1 acc 55.859 (56.881)	Top-5 acc 78.906 (78.628)	lr 0.01808
Train [43][1020/3239]	Time 0.231 (0.611)	Data Time 0.002 (0.033)	Loss 2.9485 (2.7969)	Entropy 1.24263 (1.24467)	Top-1 acc 52.344 (56.871)	Top-5 acc 79.297 (78.634)	lr 0.01808
Train [43][1030/3239]	Time 0.248 (0.609)	Data Time 0.002 (0.033)	Loss 2.7753 (2.7972)	Entropy 1.24254 (1.24465)	Top-1 acc 58.984 (56.863)	Top-5 acc 78.516 (78.633)	lr 0.01808
Train [43][1040/3239]	Time 0.350 (0.608)	Data Time 0.001 (0.033)	Loss 2.9030 (2.7974)	Entropy 1.24253 (1.24463)	Top-1 acc 55.859 (56.849)	Top-5 acc 76.172 (78.639)	lr 0.01808
Train [43][1050/3239]	Time 0.236 (0.606)	Data Time 0.001 (0.033)	Loss 2.8820 (2.7976)	Entropy 1.24252 (1.24461)	Top-1 acc 58.203 (56.842)	Top-5 acc 76.953 (78.628)	lr 0.01808
Train [43][1060/3239]	Time 0.207 (0.605)	Data Time 0.001 (0.032)	Loss 2.4831 (2.7975)	Entropy 1.24248 (1.24459)	Top-1 acc 64.453 (56.847)	Top-5 acc 84.766 (78.630)	lr 0.01808
Train [43][1070/3239]	Time 0.258 (0.604)	Data Time 0.001 (0.032)	Loss 2.8173 (2.7979)	Entropy 1.24246 (1.24457)	Top-1 acc 57.031 (56.829)	Top-5 acc 79.297 (78.625)	lr 0.01808
Train [43][1080/3239]	Time 0.217 (0.602)	Data Time 0.001 (0.032)	Loss 2.7395 (2.7979)	Entropy 1.24240 (1.24455)	Top-1 acc 60.938 (56.830)	Top-5 acc 78.125 (78.624)	lr 0.01808
Train [43][1090/3239]	Time 0.312 (0.601)	Data Time 0.001 (0.031)	Loss 2.7305 (2.7980)	Entropy 1.24235 (1.24453)	Top-1 acc 59.766 (56.822)	Top-5 acc 80.469 (78.622)	lr 0.01808
Train [43][1100/3239]	Time 0.201 (0.600)	Data Time 0.001 (0.031)	Loss 2.4853 (2.7978)	Entropy 1.24226 (1.24451)	Top-1 acc 62.109 (56.824)	Top-5 acc 83.984 (78.628)	lr 0.01808
Train [43][1110/3239]	Time 2.461 (0.599)	Data Time 0.001 (0.031)	Loss 2.8565 (2.7978)	Entropy 1.24226 (1.24449)	Top-1 acc 53.906 (56.821)	Top-5 acc 76.953 (78.625)	lr 0.01807
Train [43][1120/3239]	Time 0.261 (0.595)	Data Time 0.002 (0.031)	Loss 2.7431 (2.7975)	Entropy 1.24226 (1.24447)	Top-1 acc 60.547 (56.825)	Top-5 acc 80.859 (78.633)	lr 0.01807
Train [43][1130/3239]	Time 0.282 (0.595)	Data Time 0.002 (0.030)	Loss 2.7644 (2.7975)	Entropy 1.24225 (1.24445)	Top-1 acc 57.031 (56.828)	Top-5 acc 80.078 (78.635)	lr 0.01807
Train [43][1140/3239]	Time 0.280 (0.594)	Data Time 0.002 (0.030)	Loss 3.0495 (2.7975)	Entropy 1.24221 (1.24443)	Top-1 acc 50.000 (56.825)	Top-5 acc 75.000 (78.641)	lr 0.01807
Train [43][1150/3239]	Time 0.222 (0.593)	Data Time 0.001 (0.030)	Loss 3.0194 (2.7978)	Entropy 1.24215 (1.24441)	Top-1 acc 49.609 (56.817)	Top-5 acc 75.000 (78.640)	lr 0.01807
Train [43][1160/3239]	Time 0.224 (0.592)	Data Time 0.001 (0.030)	Loss 2.8008 (2.7972)	Entropy 1.24209 (1.24439)	Top-1 acc 53.516 (56.829)	Top-5 acc 77.734 (78.644)	lr 0.01807
Train [43][1170/3239]	Time 0.214 (0.591)	Data Time 0.001 (0.029)	Loss 2.8258 (2.7975)	Entropy 1.24207 (1.24437)	Top-1 acc 57.031 (56.823)	Top-5 acc 77.344 (78.638)	lr 0.01807
Train [43][1180/3239]	Time 0.231 (0.590)	Data Time 0.002 (0.029)	Loss 2.7927 (2.7976)	Entropy 1.24199 (1.24435)	Top-1 acc 53.906 (56.818)	Top-5 acc 76.562 (78.635)	lr 0.01807
Train [43][1190/3239]	Time 0.305 (0.589)	Data Time 0.001 (0.029)	Loss 2.8385 (2.7970)	Entropy 1.24201 (1.24433)	Top-1 acc 56.250 (56.830)	Top-5 acc 78.125 (78.642)	lr 0.01807
Train [43][1200/3239]	Time 0.496 (0.625)	Data Time 0.003 (0.029)	Loss 2.7900 (2.7969)	Entropy 1.24199 (1.24431)	Top-1 acc 57.422 (56.827)	Top-5 acc 76.953 (78.647)	lr 0.01807
Train [43][1210/3239]	Time 0.248 (0.624)	Data Time 0.002 (0.028)	Loss 2.9074 (2.7975)	Entropy 1.24202 (1.24429)	Top-1 acc 54.688 (56.822)	Top-5 acc 77.734 (78.633)	lr 0.01807
Train [43][1220/3239]	Time 2.462 (0.623)	Data Time 0.002 (0.028)	Loss 2.6745 (2.7974)	Entropy 1.24202 (1.24427)	Top-1 acc 60.547 (56.835)	Top-5 acc 80.469 (78.636)	lr 0.01806
Train [43][1230/3239]	Time 0.253 (0.620)	Data Time 0.001 (0.028)	Loss 2.7543 (2.7979)	Entropy 1.24201 (1.24425)	Top-1 acc 60.938 (56.834)	Top-5 acc 81.641 (78.626)	lr 0.01806
Train [43][1240/3239]	Time 0.223 (0.619)	Data Time 0.001 (0.028)	Loss 3.0278 (2.7984)	Entropy 1.24196 (1.24423)	Top-1 acc 52.344 (56.824)	Top-5 acc 74.219 (78.617)	lr 0.01806
Train [43][1250/3239]	Time 0.223 (0.618)	Data Time 0.001 (0.028)	Loss 2.7106 (2.7982)	Entropy 1.24193 (1.24422)	Top-1 acc 57.422 (56.826)	Top-5 acc 78.125 (78.620)	lr 0.01806
Train [43][1260/3239]	Time 0.210 (0.616)	Data Time 0.001 (0.027)	Loss 2.5837 (2.7977)	Entropy 1.24187 (1.24420)	Top-1 acc 63.672 (56.832)	Top-5 acc 83.984 (78.629)	lr 0.01806
Train [43][1270/3239]	Time 0.235 (0.615)	Data Time 0.002 (0.027)	Loss 2.7591 (2.7975)	Entropy 1.24185 (1.24418)	Top-1 acc 59.375 (56.836)	Top-5 acc 79.297 (78.634)	lr 0.01806
Train [43][1280/3239]	Time 0.210 (0.614)	Data Time 0.001 (0.027)	Loss 2.6523 (2.7976)	Entropy 1.24179 (1.24416)	Top-1 acc 55.859 (56.829)	Top-5 acc 80.859 (78.632)	lr 0.01806
Train [43][1290/3239]	Time 0.226 (0.613)	Data Time 0.005 (0.027)	Loss 2.8036 (2.7982)	Entropy 1.24177 (1.24414)	Top-1 acc 58.984 (56.816)	Top-5 acc 80.078 (78.615)	lr 0.01806
Train [43][1300/3239]	Time 0.366 (0.612)	Data Time 0.001 (0.027)	Loss 2.7467 (2.7983)	Entropy 1.24174 (1.24412)	Top-1 acc 58.203 (56.804)	Top-5 acc 76.953 (78.613)	lr 0.01806
Train [43][1310/3239]	Time 0.244 (0.610)	Data Time 0.001 (0.026)	Loss 2.6095 (2.7983)	Entropy 1.24173 (1.24411)	Top-1 acc 62.109 (56.804)	Top-5 acc 81.641 (78.614)	lr 0.01806
Train [43][1320/3239]	Time 0.258 (0.609)	Data Time 0.001 (0.026)	Loss 2.5258 (2.7980)	Entropy 1.24169 (1.24409)	Top-1 acc 62.891 (56.811)	Top-5 acc 82.812 (78.622)	lr 0.01806
Train [43][1330/3239]	Time 2.362 (0.608)	Data Time 0.001 (0.026)	Loss 2.8339 (2.7978)	Entropy 1.24169 (1.24407)	Top-1 acc 55.859 (56.816)	Top-5 acc 76.953 (78.626)	lr 0.01805
Train [43][1340/3239]	Time 0.218 (0.605)	Data Time 0.001 (0.026)	Loss 2.9268 (2.7978)	Entropy 1.24168 (1.24405)	Top-1 acc 54.688 (56.820)	Top-5 acc 75.391 (78.629)	lr 0.01805
Train [43][1350/3239]	Time 0.296 (0.604)	Data Time 0.002 (0.026)	Loss 2.8445 (2.7976)	Entropy 1.24165 (1.24403)	Top-1 acc 54.688 (56.826)	Top-5 acc 80.078 (78.635)	lr 0.01805
Train [43][1360/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.026)	Loss 2.8786 (2.7975)	Entropy 1.24159 (1.24402)	Top-1 acc 53.125 (56.827)	Top-5 acc 74.219 (78.636)	lr 0.01805
Train [43][1370/3239]	Time 0.222 (0.602)	Data Time 0.001 (0.025)	Loss 2.7982 (2.7973)	Entropy 1.24148 (1.24400)	Top-1 acc 56.641 (56.834)	Top-5 acc 78.516 (78.640)	lr 0.01805
Train [43][1380/3239]	Time 0.246 (0.601)	Data Time 0.001 (0.025)	Loss 2.8077 (2.7976)	Entropy 1.24140 (1.24398)	Top-1 acc 57.422 (56.829)	Top-5 acc 81.250 (78.641)	lr 0.01805
Train [43][1390/3239]	Time 0.202 (0.600)	Data Time 0.001 (0.025)	Loss 2.7586 (2.7974)	Entropy 1.24128 (1.24396)	Top-1 acc 58.984 (56.826)	Top-5 acc 78.906 (78.646)	lr 0.01805
Train [43][1400/3239]	Time 0.361 (0.599)	Data Time 0.001 (0.025)	Loss 2.8667 (2.7975)	Entropy 1.24126 (1.24394)	Top-1 acc 54.688 (56.826)	Top-5 acc 76.172 (78.645)	lr 0.01805
Train [43][1410/3239]	Time 0.251 (0.598)	Data Time 0.001 (0.025)	Loss 2.8983 (2.7974)	Entropy 1.24125 (1.24392)	Top-1 acc 53.125 (56.832)	Top-5 acc 79.297 (78.649)	lr 0.01805
Train [43][1420/3239]	Time 0.242 (0.598)	Data Time 0.001 (0.025)	Loss 2.6864 (2.7974)	Entropy 1.24122 (1.24390)	Top-1 acc 57.812 (56.833)	Top-5 acc 80.469 (78.656)	lr 0.01805
Train [43][1430/3239]	Time 0.229 (0.597)	Data Time 0.001 (0.024)	Loss 2.6047 (2.7977)	Entropy 1.24116 (1.24388)	Top-1 acc 60.938 (56.823)	Top-5 acc 81.250 (78.647)	lr 0.01805
Train [43][1440/3239]	Time 2.504 (0.596)	Data Time 0.002 (0.024)	Loss 2.6939 (2.7975)	Entropy 1.24116 (1.24387)	Top-1 acc 58.984 (56.830)	Top-5 acc 81.641 (78.656)	lr 0.01804
Train [43][1450/3239]	Time 0.236 (0.593)	Data Time 0.001 (0.024)	Loss 2.8324 (2.7976)	Entropy 1.24109 (1.24385)	Top-1 acc 55.078 (56.821)	Top-5 acc 75.391 (78.651)	lr 0.01804
Train [43][1460/3239]	Time 0.216 (0.592)	Data Time 0.001 (0.024)	Loss 2.9672 (2.7980)	Entropy 1.24107 (1.24383)	Top-1 acc 49.609 (56.811)	Top-5 acc 75.781 (78.642)	lr 0.01804
Train [43][1470/3239]	Time 0.221 (0.591)	Data Time 0.001 (0.024)	Loss 2.8748 (2.7983)	Entropy 1.24108 (1.24381)	Top-1 acc 56.641 (56.811)	Top-5 acc 77.344 (78.635)	lr 0.01804
Train [43][1480/3239]	Time 0.232 (0.590)	Data Time 0.001 (0.024)	Loss 2.8136 (2.7984)	Entropy 1.24108 (1.24379)	Top-1 acc 57.812 (56.807)	Top-5 acc 80.469 (78.629)	lr 0.01804
Train [43][1490/3239]	Time 0.247 (0.590)	Data Time 0.001 (0.023)	Loss 2.6088 (2.7985)	Entropy 1.24099 (1.24377)	Top-1 acc 60.547 (56.805)	Top-5 acc 82.422 (78.629)	lr 0.01804
Train [43][1500/3239]	Time 0.229 (0.589)	Data Time 0.001 (0.023)	Loss 2.7517 (2.7988)	Entropy 1.24099 (1.24375)	Top-1 acc 57.031 (56.797)	Top-5 acc 80.469 (78.624)	lr 0.01804
Train [43][1510/3239]	Time 0.337 (0.588)	Data Time 0.001 (0.023)	Loss 2.8035 (2.7988)	Entropy 1.24089 (1.24374)	Top-1 acc 55.469 (56.799)	Top-5 acc 76.562 (78.625)	lr 0.01804
Train [43][1520/3239]	Time 0.206 (0.587)	Data Time 0.001 (0.023)	Loss 2.9657 (2.7987)	Entropy 1.24079 (1.24372)	Top-1 acc 58.594 (56.801)	Top-5 acc 75.000 (78.626)	lr 0.01804
Train [43][1530/3239]	Time 0.214 (0.586)	Data Time 0.001 (0.023)	Loss 3.0204 (2.7991)	Entropy 1.24024 (1.24370)	Top-1 acc 51.172 (56.791)	Top-5 acc 75.391 (78.620)	lr 0.01804
Train [43][1540/3239]	Time 0.162 (0.585)	Data Time 0.001 (0.023)	Loss 2.6986 (2.7991)	Entropy 1.24013 (1.24367)	Top-1 acc 60.156 (56.798)	Top-5 acc 80.469 (78.620)	lr 0.01804
Train [43][1550/3239]	Time 2.359 (0.585)	Data Time 0.001 (0.023)	Loss 2.7969 (2.7994)	Entropy 1.24013 (1.24365)	Top-1 acc 59.375 (56.792)	Top-5 acc 80.859 (78.619)	lr 0.01803
Train [43][1560/3239]	Time 0.259 (0.582)	Data Time 0.001 (0.022)	Loss 2.8005 (2.7996)	Entropy 1.24012 (1.24363)	Top-1 acc 54.688 (56.783)	Top-5 acc 78.125 (78.613)	lr 0.01803
Train [43][1570/3239]	Time 0.266 (0.611)	Data Time 0.002 (0.022)	Loss 3.0040 (2.7996)	Entropy 1.24003 (1.24361)	Top-1 acc 53.125 (56.787)	Top-5 acc 73.828 (78.610)	lr 0.01803
Train [43][1580/3239]	Time 0.213 (0.610)	Data Time 0.002 (0.022)	Loss 2.8046 (2.7997)	Entropy 1.24001 (1.24358)	Top-1 acc 53.906 (56.783)	Top-5 acc 80.859 (78.612)	lr 0.01803
Train [43][1590/3239]	Time 0.226 (0.609)	Data Time 0.002 (0.022)	Loss 2.7715 (2.7995)	Entropy 1.24008 (1.24356)	Top-1 acc 56.250 (56.790)	Top-5 acc 78.906 (78.613)	lr 0.01803
Train [43][1600/3239]	Time 0.230 (0.608)	Data Time 0.001 (0.022)	Loss 2.7505 (2.7993)	Entropy 1.24004 (1.24354)	Top-1 acc 59.375 (56.798)	Top-5 acc 77.734 (78.617)	lr 0.01803
Train [43][1610/3239]	Time 0.308 (0.607)	Data Time 0.001 (0.022)	Loss 2.6537 (2.7989)	Entropy 1.23997 (1.24352)	Top-1 acc 58.984 (56.811)	Top-5 acc 80.859 (78.627)	lr 0.01803
Train [43][1620/3239]	Time 0.217 (0.606)	Data Time 0.001 (0.022)	Loss 3.0035 (2.7992)	Entropy 1.23993 (1.24349)	Top-1 acc 53.125 (56.805)	Top-5 acc 71.484 (78.620)	lr 0.01803
Train [43][1630/3239]	Time 0.205 (0.605)	Data Time 0.001 (0.022)	Loss 2.9858 (2.7991)	Entropy 1.23977 (1.24347)	Top-1 acc 53.906 (56.807)	Top-5 acc 74.219 (78.624)	lr 0.01803
Train [43][1640/3239]	Time 0.198 (0.604)	Data Time 0.001 (0.021)	Loss 2.8368 (2.7989)	Entropy 1.23973 (1.24345)	Top-1 acc 51.562 (56.817)	Top-5 acc 79.688 (78.631)	lr 0.01803
Train [43][1650/3239]	Time 0.210 (0.603)	Data Time 0.001 (0.021)	Loss 2.7178 (2.7987)	Entropy 1.23965 (1.24343)	Top-1 acc 56.641 (56.824)	Top-5 acc 80.078 (78.635)	lr 0.01803
Train [43][1660/3239]	Time 2.544 (0.603)	Data Time 0.001 (0.021)	Loss 2.9321 (2.7984)	Entropy 1.23965 (1.24340)	Top-1 acc 54.688 (56.826)	Top-5 acc 76.562 (78.645)	lr 0.01802
Train [43][1670/3239]	Time 0.229 (0.600)	Data Time 0.001 (0.021)	Loss 2.9210 (2.7984)	Entropy 1.23956 (1.24338)	Top-1 acc 57.422 (56.829)	Top-5 acc 74.609 (78.637)	lr 0.01802
Train [43][1680/3239]	Time 0.236 (0.599)	Data Time 0.001 (0.021)	Loss 2.8042 (2.7989)	Entropy 1.23945 (1.24336)	Top-1 acc 55.859 (56.819)	Top-5 acc 81.250 (78.621)	lr 0.01802
Train [43][1690/3239]	Time 0.214 (0.599)	Data Time 0.001 (0.021)	Loss 2.6350 (2.7987)	Entropy 1.23938 (1.24334)	Top-1 acc 61.328 (56.828)	Top-5 acc 81.250 (78.626)	lr 0.01802
Train [43][1700/3239]	Time 0.227 (0.598)	Data Time 0.001 (0.021)	Loss 2.8240 (2.7986)	Entropy 1.23929 (1.24331)	Top-1 acc 58.594 (56.837)	Top-5 acc 77.734 (78.629)	lr 0.01802
Train [43][1710/3239]	Time 0.193 (0.597)	Data Time 0.001 (0.021)	Loss 2.9013 (2.7992)	Entropy 1.23929 (1.24329)	Top-1 acc 55.469 (56.823)	Top-5 acc 75.781 (78.612)	lr 0.01802
Train [43][1720/3239]	Time 0.249 (0.596)	Data Time 0.001 (0.021)	Loss 3.0240 (2.7995)	Entropy 1.23925 (1.24326)	Top-1 acc 55.078 (56.820)	Top-5 acc 73.047 (78.604)	lr 0.01802
Train [43][1730/3239]	Time 0.231 (0.596)	Data Time 0.001 (0.020)	Loss 2.8777 (2.7997)	Entropy 1.23925 (1.24324)	Top-1 acc 54.297 (56.815)	Top-5 acc 77.344 (78.599)	lr 0.01802
Train [43][1740/3239]	Time 0.218 (0.595)	Data Time 0.001 (0.020)	Loss 2.7031 (2.7995)	Entropy 1.23926 (1.24322)	Top-1 acc 59.766 (56.818)	Top-5 acc 80.078 (78.604)	lr 0.01802
Train [43][1750/3239]	Time 0.217 (0.594)	Data Time 0.001 (0.020)	Loss 2.7617 (2.7994)	Entropy 1.23923 (1.24320)	Top-1 acc 60.547 (56.823)	Top-5 acc 78.125 (78.608)	lr 0.01802
Train [43][1760/3239]	Time 0.239 (0.593)	Data Time 0.001 (0.020)	Loss 2.8528 (2.7999)	Entropy 1.23912 (1.24317)	Top-1 acc 54.297 (56.809)	Top-5 acc 78.906 (78.599)	lr 0.01802
Train [43][1770/3239]	Time 2.532 (0.593)	Data Time 0.001 (0.020)	Loss 2.7684 (2.7999)	Entropy 1.23912 (1.24315)	Top-1 acc 54.297 (56.810)	Top-5 acc 80.859 (78.599)	lr 0.01802
Train [43][1780/3239]	Time 0.214 (0.591)	Data Time 0.001 (0.020)	Loss 2.6451 (2.8001)	Entropy 1.23906 (1.24313)	Top-1 acc 62.500 (56.802)	Top-5 acc 80.078 (78.596)	lr 0.01801
Train [43][1790/3239]	Time 0.283 (0.590)	Data Time 0.001 (0.020)	Loss 2.7897 (2.8000)	Entropy 1.23902 (1.24310)	Top-1 acc 57.031 (56.801)	Top-5 acc 79.297 (78.599)	lr 0.01801
Train [43][1800/3239]	Time 0.221 (0.589)	Data Time 0.001 (0.020)	Loss 2.8775 (2.7997)	Entropy 1.23898 (1.24308)	Top-1 acc 53.906 (56.807)	Top-5 acc 75.781 (78.605)	lr 0.01801
Train [43][1810/3239]	Time 0.203 (0.588)	Data Time 0.001 (0.020)	Loss 2.7851 (2.7998)	Entropy 1.23892 (1.24306)	Top-1 acc 53.906 (56.804)	Top-5 acc 79.688 (78.602)	lr 0.01801
Train [43][1820/3239]	Time 0.303 (0.588)	Data Time 0.002 (0.020)	Loss 2.7145 (2.7999)	Entropy 1.23879 (1.24304)	Top-1 acc 60.156 (56.808)	Top-5 acc 78.516 (78.597)	lr 0.01801
Train [43][1830/3239]	Time 0.173 (0.587)	Data Time 0.002 (0.019)	Loss 2.9141 (2.8000)	Entropy 1.23879 (1.24301)	Top-1 acc 55.078 (56.804)	Top-5 acc 73.828 (78.598)	lr 0.01801
Train [43][1840/3239]	Time 0.215 (0.586)	Data Time 0.001 (0.019)	Loss 2.7439 (2.7999)	Entropy 1.23877 (1.24299)	Top-1 acc 57.422 (56.808)	Top-5 acc 79.688 (78.602)	lr 0.01801
Train [43][1850/3239]	Time 0.228 (0.586)	Data Time 0.001 (0.019)	Loss 2.7498 (2.7997)	Entropy 1.23869 (1.24297)	Top-1 acc 53.906 (56.812)	Top-5 acc 80.078 (78.602)	lr 0.01801
Train [43][1860/3239]	Time 0.218 (0.585)	Data Time 0.001 (0.019)	Loss 2.7430 (2.7997)	Entropy 1.23867 (1.24294)	Top-1 acc 54.297 (56.813)	Top-5 acc 79.688 (78.603)	lr 0.01801
Train [43][1870/3239]	Time 0.206 (0.584)	Data Time 0.001 (0.019)	Loss 2.6643 (2.7997)	Entropy 1.23869 (1.24292)	Top-1 acc 60.156 (56.815)	Top-5 acc 80.078 (78.601)	lr 0.01801
Train [43][1880/3239]	Time 2.353 (0.583)	Data Time 0.001 (0.019)	Loss 2.8039 (2.7995)	Entropy 1.23869 (1.24290)	Top-1 acc 55.469 (56.823)	Top-5 acc 81.250 (78.608)	lr 0.01801
Train [43][1890/3239]	Time 0.233 (0.582)	Data Time 0.001 (0.019)	Loss 2.6959 (2.7994)	Entropy 1.23861 (1.24288)	Top-1 acc 53.906 (56.821)	Top-5 acc 80.859 (78.610)	lr 0.01800
Train [43][1900/3239]	Time 0.228 (0.581)	Data Time 0.001 (0.019)	Loss 2.8480 (2.7993)	Entropy 1.23849 (1.24285)	Top-1 acc 49.609 (56.820)	Top-5 acc 80.078 (78.611)	lr 0.01800
Train [43][1910/3239]	Time 0.211 (0.580)	Data Time 0.001 (0.019)	Loss 2.9067 (2.7993)	Entropy 1.23845 (1.24283)	Top-1 acc 54.297 (56.817)	Top-5 acc 77.344 (78.612)	lr 0.01800
Train [43][1920/3239]	Time 0.355 (0.580)	Data Time 0.001 (0.019)	Loss 2.8437 (2.7998)	Entropy 1.23834 (1.24281)	Top-1 acc 57.422 (56.803)	Top-5 acc 78.125 (78.601)	lr 0.01800
Train [43][1930/3239]	Time 0.237 (0.604)	Data Time 0.002 (0.019)	Loss 2.8958 (2.7998)	Entropy 1.23827 (1.24278)	Top-1 acc 54.297 (56.802)	Top-5 acc 80.078 (78.602)	lr 0.01800
Train [43][1940/3239]	Time 0.235 (0.603)	Data Time 0.002 (0.018)	Loss 2.9388 (2.7998)	Entropy 1.23824 (1.24276)	Top-1 acc 53.516 (56.795)	Top-5 acc 77.344 (78.605)	lr 0.01800
Train [43][1950/3239]	Time 0.218 (0.603)	Data Time 0.001 (0.018)	Loss 2.8830 (2.7999)	Entropy 1.23802 (1.24274)	Top-1 acc 59.766 (56.793)	Top-5 acc 80.469 (78.609)	lr 0.01800
Train [43][1960/3239]	Time 0.219 (0.602)	Data Time 0.001 (0.018)	Loss 2.6687 (2.7995)	Entropy 1.23803 (1.24271)	Top-1 acc 59.766 (56.809)	Top-5 acc 80.859 (78.612)	lr 0.01800
Train [43][1970/3239]	Time 0.323 (0.601)	Data Time 0.001 (0.018)	Loss 2.6980 (2.7993)	Entropy 1.23803 (1.24269)	Top-1 acc 59.766 (56.814)	Top-5 acc 79.688 (78.617)	lr 0.01800
Train [43][1980/3239]	Time 0.214 (0.600)	Data Time 0.001 (0.018)	Loss 2.9311 (2.7992)	Entropy 1.23802 (1.24266)	Top-1 acc 51.953 (56.814)	Top-5 acc 73.828 (78.616)	lr 0.01800
Train [43][1990/3239]	Time 2.528 (0.600)	Data Time 0.001 (0.018)	Loss 2.6686 (2.7992)	Entropy 1.23802 (1.24264)	Top-1 acc 56.641 (56.809)	Top-5 acc 83.594 (78.622)	lr 0.01800
Train [43][2000/3239]	Time 0.227 (0.598)	Data Time 0.001 (0.018)	Loss 2.8105 (2.7993)	Entropy 1.23801 (1.24262)	Top-1 acc 58.984 (56.811)	Top-5 acc 80.859 (78.622)	lr 0.01799
Train [43][2010/3239]	Time 0.236 (0.597)	Data Time 0.001 (0.018)	Loss 2.8534 (2.7993)	Entropy 1.23796 (1.24260)	Top-1 acc 56.641 (56.815)	Top-5 acc 78.516 (78.625)	lr 0.01799
Train [43][2020/3239]	Time 0.308 (0.597)	Data Time 0.001 (0.018)	Loss 2.9766 (2.7994)	Entropy 1.23791 (1.24257)	Top-1 acc 55.469 (56.813)	Top-5 acc 75.391 (78.620)	lr 0.01799
Train [43][2030/3239]	Time 0.219 (0.596)	Data Time 0.001 (0.018)	Loss 2.8312 (2.7996)	Entropy 1.23790 (1.24255)	Top-1 acc 54.297 (56.809)	Top-5 acc 76.953 (78.614)	lr 0.01799
Train [43][2040/3239]	Time 0.279 (0.595)	Data Time 0.002 (0.018)	Loss 2.7673 (2.7996)	Entropy 1.23784 (1.24253)	Top-1 acc 57.812 (56.812)	Top-5 acc 81.641 (78.615)	lr 0.01799
Train [43][2050/3239]	Time 0.225 (0.595)	Data Time 0.001 (0.018)	Loss 2.7706 (2.7995)	Entropy 1.23774 (1.24250)	Top-1 acc 55.859 (56.814)	Top-5 acc 77.344 (78.618)	lr 0.01799
Train [43][2060/3239]	Time 0.216 (0.594)	Data Time 0.001 (0.017)	Loss 2.7835 (2.7995)	Entropy 1.23771 (1.24248)	Top-1 acc 58.594 (56.819)	Top-5 acc 76.562 (78.616)	lr 0.01799
Train [43][2070/3239]	Time 0.313 (0.593)	Data Time 0.001 (0.017)	Loss 3.1617 (2.7995)	Entropy 1.23770 (1.24246)	Top-1 acc 50.781 (56.823)	Top-5 acc 70.703 (78.617)	lr 0.01799
Train [43][2080/3239]	Time 0.268 (0.593)	Data Time 0.002 (0.017)	Loss 2.7102 (2.7994)	Entropy 1.23766 (1.24243)	Top-1 acc 60.938 (56.818)	Top-5 acc 80.859 (78.616)	lr 0.01799
Train [43][2090/3239]	Time 0.261 (0.592)	Data Time 0.001 (0.017)	Loss 2.9219 (2.7997)	Entropy 1.23765 (1.24241)	Top-1 acc 56.641 (56.818)	Top-5 acc 76.953 (78.607)	lr 0.01799
Train [43][2100/3239]	Time 2.517 (0.592)	Data Time 0.001 (0.017)	Loss 2.8767 (2.8004)	Entropy 1.23765 (1.24239)	Top-1 acc 53.516 (56.802)	Top-5 acc 76.953 (78.598)	lr 0.01799
Train [43][2110/3239]	Time 0.223 (0.590)	Data Time 0.001 (0.017)	Loss 2.7875 (2.8003)	Entropy 1.23758 (1.24237)	Top-1 acc 59.766 (56.802)	Top-5 acc 78.906 (78.595)	lr 0.01798
Train [43][2120/3239]	Time 0.301 (0.590)	Data Time 0.001 (0.017)	Loss 2.8193 (2.8002)	Entropy 1.23755 (1.24234)	Top-1 acc 55.078 (56.805)	Top-5 acc 76.953 (78.600)	lr 0.01798
Train [43][2130/3239]	Time 0.229 (0.589)	Data Time 0.001 (0.017)	Loss 2.7464 (2.8001)	Entropy 1.23750 (1.24232)	Top-1 acc 55.469 (56.805)	Top-5 acc 81.250 (78.604)	lr 0.01798
Train [43][2140/3239]	Time 0.225 (0.588)	Data Time 0.001 (0.017)	Loss 2.8813 (2.8001)	Entropy 1.23747 (1.24230)	Top-1 acc 58.594 (56.806)	Top-5 acc 77.734 (78.602)	lr 0.01798
Train [43][2150/3239]	Time 0.199 (0.588)	Data Time 0.001 (0.017)	Loss 2.8692 (2.8001)	Entropy 1.23747 (1.24228)	Top-1 acc 54.688 (56.807)	Top-5 acc 77.344 (78.603)	lr 0.01798
Train [43][2160/3239]	Time 0.233 (0.587)	Data Time 0.001 (0.017)	Loss 2.7298 (2.8000)	Entropy 1.23746 (1.24225)	Top-1 acc 55.078 (56.804)	Top-5 acc 81.250 (78.605)	lr 0.01798
Train [43][2170/3239]	Time 0.349 (0.587)	Data Time 0.001 (0.017)	Loss 2.6336 (2.8000)	Entropy 1.23744 (1.24223)	Top-1 acc 61.719 (56.798)	Top-5 acc 80.859 (78.607)	lr 0.01798
Train [43][2180/3239]	Time 0.221 (0.586)	Data Time 0.001 (0.017)	Loss 2.9350 (2.8000)	Entropy 1.23742 (1.24221)	Top-1 acc 52.734 (56.796)	Top-5 acc 76.562 (78.610)	lr 0.01798
Train [43][2190/3239]	Time 0.215 (0.585)	Data Time 0.001 (0.017)	Loss 2.9522 (2.8002)	Entropy 1.23738 (1.24219)	Top-1 acc 55.469 (56.794)	Top-5 acc 76.562 (78.603)	lr 0.01798
Train [43][2200/3239]	Time 0.224 (0.585)	Data Time 0.001 (0.016)	Loss 2.9934 (2.8007)	Entropy 1.23735 (1.24217)	Top-1 acc 53.516 (56.785)	Top-5 acc 76.172 (78.596)	lr 0.01798
Train [43][2210/3239]	Time 2.357 (0.584)	Data Time 0.001 (0.016)	Loss 2.9143 (2.8009)	Entropy 1.23735 (1.24214)	Top-1 acc 53.516 (56.784)	Top-5 acc 75.781 (78.589)	lr 0.01798
Train [43][2220/3239]	Time 0.349 (0.583)	Data Time 0.001 (0.016)	Loss 2.8089 (2.8009)	Entropy 1.23734 (1.24212)	Top-1 acc 60.547 (56.782)	Top-5 acc 76.172 (78.584)	lr 0.01797
Train [43][2230/3239]	Time 0.223 (0.582)	Data Time 0.001 (0.016)	Loss 2.8099 (2.8009)	Entropy 1.23732 (1.24210)	Top-1 acc 56.641 (56.781)	Top-5 acc 80.078 (78.587)	lr 0.01797
Train [43][2240/3239]	Time 0.241 (0.581)	Data Time 0.001 (0.016)	Loss 2.8998 (2.8012)	Entropy 1.23725 (1.24208)	Top-1 acc 51.172 (56.770)	Top-5 acc 78.906 (78.584)	lr 0.01797
Train [43][2250/3239]	Time 0.238 (0.581)	Data Time 0.001 (0.016)	Loss 2.7661 (2.8015)	Entropy 1.23718 (1.24206)	Top-1 acc 60.938 (56.759)	Top-5 acc 78.516 (78.579)	lr 0.01797
Train [43][2260/3239]	Time 0.234 (0.580)	Data Time 0.001 (0.016)	Loss 2.9379 (2.8016)	Entropy 1.23719 (1.24204)	Top-1 acc 53.516 (56.758)	Top-5 acc 77.344 (78.575)	lr 0.01797
Train [43][2270/3239]	Time 0.301 (0.580)	Data Time 0.001 (0.016)	Loss 2.8050 (2.8020)	Entropy 1.23718 (1.24201)	Top-1 acc 55.469 (56.746)	Top-5 acc 77.344 (78.566)	lr 0.01797
Train [43][2280/3239]	Time 0.249 (0.579)	Data Time 0.001 (0.016)	Loss 2.9393 (2.8022)	Entropy 1.23710 (1.24199)	Top-1 acc 54.688 (56.742)	Top-5 acc 75.391 (78.564)	lr 0.01797
Train [43][2290/3239]	Time 0.289 (0.597)	Data Time 0.002 (0.016)	Loss 2.6981 (2.8022)	Entropy 1.23705 (1.24197)	Top-1 acc 57.812 (56.736)	Top-5 acc 80.859 (78.563)	lr 0.01797
Train [43][2300/3239]	Time 0.234 (0.597)	Data Time 0.002 (0.016)	Loss 2.8230 (2.8022)	Entropy 1.23704 (1.24195)	Top-1 acc 57.812 (56.734)	Top-5 acc 78.906 (78.564)	lr 0.01797
Train [43][2310/3239]	Time 0.225 (0.596)	Data Time 0.001 (0.016)	Loss 2.7502 (2.8023)	Entropy 1.23701 (1.24193)	Top-1 acc 57.031 (56.733)	Top-5 acc 76.172 (78.560)	lr 0.01797
Train [43][2320/3239]	Time 2.515 (0.596)	Data Time 0.002 (0.016)	Loss 2.7698 (2.8022)	Entropy 1.23701 (1.24191)	Top-1 acc 57.422 (56.730)	Top-5 acc 76.953 (78.561)	lr 0.01797
Train [43][2330/3239]	Time 0.251 (0.594)	Data Time 0.001 (0.016)	Loss 2.7177 (2.8021)	Entropy 1.23698 (1.24189)	Top-1 acc 58.984 (56.731)	Top-5 acc 78.516 (78.566)	lr 0.01796
Train [43][2340/3239]	Time 0.233 (0.594)	Data Time 0.001 (0.016)	Loss 2.8653 (2.8022)	Entropy 1.23696 (1.24187)	Top-1 acc 53.906 (56.725)	Top-5 acc 75.781 (78.565)	lr 0.01796
Train [43][2350/3239]	Time 0.228 (0.593)	Data Time 0.001 (0.016)	Loss 3.0543 (2.8022)	Entropy 1.23695 (1.24184)	Top-1 acc 52.344 (56.722)	Top-5 acc 74.219 (78.567)	lr 0.01796
Train [43][2360/3239]	Time 0.256 (0.593)	Data Time 0.002 (0.015)	Loss 2.9102 (2.8025)	Entropy 1.23692 (1.24182)	Top-1 acc 54.688 (56.714)	Top-5 acc 78.125 (78.561)	lr 0.01796
Train [43][2370/3239]	Time 0.385 (0.592)	Data Time 0.001 (0.015)	Loss 2.7904 (2.8024)	Entropy 1.23691 (1.24180)	Top-1 acc 57.812 (56.710)	Top-5 acc 77.344 (78.560)	lr 0.01796
Train [43][2380/3239]	Time 0.232 (0.592)	Data Time 0.001 (0.015)	Loss 2.6083 (2.8025)	Entropy 1.23683 (1.24178)	Top-1 acc 58.203 (56.708)	Top-5 acc 83.984 (78.558)	lr 0.01796
Train [43][2390/3239]	Time 0.220 (0.591)	Data Time 0.001 (0.015)	Loss 2.7690 (2.8024)	Entropy 1.23677 (1.24176)	Top-1 acc 58.984 (56.708)	Top-5 acc 77.344 (78.560)	lr 0.01796
Train [43][2400/3239]	Time 0.243 (0.591)	Data Time 0.001 (0.015)	Loss 2.7351 (2.8024)	Entropy 1.23676 (1.24174)	Top-1 acc 58.594 (56.705)	Top-5 acc 77.344 (78.559)	lr 0.01796
Train [43][2410/3239]	Time 0.227 (0.590)	Data Time 0.001 (0.015)	Loss 2.8178 (2.8025)	Entropy 1.23672 (1.24172)	Top-1 acc 60.547 (56.707)	Top-5 acc 78.906 (78.556)	lr 0.01796
Train [43][2420/3239]	Time 0.204 (0.590)	Data Time 0.001 (0.015)	Loss 2.8333 (2.8025)	Entropy 1.23668 (1.24170)	Top-1 acc 53.906 (56.705)	Top-5 acc 81.250 (78.560)	lr 0.01796
Train [43][2430/3239]	Time 2.577 (0.589)	Data Time 0.002 (0.015)	Loss 2.7239 (2.8023)	Entropy 1.23668 (1.24168)	Top-1 acc 59.375 (56.712)	Top-5 acc 80.469 (78.565)	lr 0.01796
Train [43][2440/3239]	Time 0.241 (0.588)	Data Time 0.001 (0.015)	Loss 2.7421 (2.8022)	Entropy 1.23664 (1.24166)	Top-1 acc 57.812 (56.714)	Top-5 acc 80.469 (78.562)	lr 0.01795
Train [43][2450/3239]	Time 0.211 (0.587)	Data Time 0.001 (0.015)	Loss 2.7189 (2.8024)	Entropy 1.23658 (1.24164)	Top-1 acc 59.375 (56.712)	Top-5 acc 78.906 (78.559)	lr 0.01795
Train [43][2460/3239]	Time 0.208 (0.587)	Data Time 0.001 (0.015)	Loss 2.7093 (2.8024)	Entropy 1.23644 (1.24162)	Top-1 acc 58.203 (56.711)	Top-5 acc 79.297 (78.560)	lr 0.01795
Train [43][2470/3239]	Time 0.228 (0.586)	Data Time 0.001 (0.015)	Loss 2.6908 (2.8025)	Entropy 1.23641 (1.24160)	Top-1 acc 58.594 (56.706)	Top-5 acc 81.641 (78.558)	lr 0.01795
Train [43][2480/3239]	Time 0.218 (0.586)	Data Time 0.001 (0.015)	Loss 2.7598 (2.8024)	Entropy 1.23640 (1.24157)	Top-1 acc 57.812 (56.700)	Top-5 acc 77.344 (78.556)	lr 0.01795
Train [43][2490/3239]	Time 0.240 (0.585)	Data Time 0.001 (0.015)	Loss 2.8455 (2.8022)	Entropy 1.23638 (1.24155)	Top-1 acc 56.641 (56.701)	Top-5 acc 78.516 (78.562)	lr 0.01795
Train [43][2500/3239]	Time 0.237 (0.585)	Data Time 0.001 (0.015)	Loss 2.9779 (2.8024)	Entropy 1.23631 (1.24153)	Top-1 acc 53.125 (56.698)	Top-5 acc 76.562 (78.559)	lr 0.01795
Train [43][2510/3239]	Time 0.218 (0.584)	Data Time 0.001 (0.015)	Loss 2.8148 (2.8025)	Entropy 1.23625 (1.24151)	Top-1 acc 57.422 (56.701)	Top-5 acc 78.125 (78.556)	lr 0.01795
Train [43][2520/3239]	Time 0.209 (0.584)	Data Time 0.001 (0.015)	Loss 2.8963 (2.8024)	Entropy 1.23623 (1.24149)	Top-1 acc 53.906 (56.704)	Top-5 acc 75.781 (78.553)	lr 0.01795
Train [43][2530/3239]	Time 0.224 (0.583)	Data Time 0.001 (0.015)	Loss 2.8589 (2.8025)	Entropy 1.23619 (1.24147)	Top-1 acc 54.688 (56.705)	Top-5 acc 77.734 (78.552)	lr 0.01795
Train [43][2540/3239]	Time 2.445 (0.583)	Data Time 0.001 (0.015)	Loss 2.7192 (2.8025)	Entropy 1.23619 (1.24145)	Top-1 acc 58.203 (56.706)	Top-5 acc 79.297 (78.549)	lr 0.01795
Train [43][2550/3239]	Time 0.228 (0.582)	Data Time 0.001 (0.014)	Loss 2.9439 (2.8027)	Entropy 1.23611 (1.24143)	Top-1 acc 51.562 (56.701)	Top-5 acc 76.172 (78.543)	lr 0.01794
Train [43][2560/3239]	Time 0.242 (0.581)	Data Time 0.001 (0.014)	Loss 2.7245 (2.8028)	Entropy 1.23607 (1.24141)	Top-1 acc 57.812 (56.701)	Top-5 acc 81.250 (78.541)	lr 0.01794
Train [43][2570/3239]	Time 0.334 (0.581)	Data Time 0.001 (0.014)	Loss 2.8039 (2.8029)	Entropy 1.23604 (1.24139)	Top-1 acc 56.250 (56.697)	Top-5 acc 76.953 (78.538)	lr 0.01794
Train [43][2580/3239]	Time 0.229 (0.580)	Data Time 0.001 (0.014)	Loss 2.5750 (2.8028)	Entropy 1.23604 (1.24137)	Top-1 acc 64.453 (56.700)	Top-5 acc 83.203 (78.540)	lr 0.01794
Train [43][2590/3239]	Time 0.265 (0.580)	Data Time 0.001 (0.014)	Loss 2.8055 (2.8028)	Entropy 1.23602 (1.24135)	Top-1 acc 56.250 (56.699)	Top-5 acc 75.781 (78.538)	lr 0.01794
Train [43][2600/3239]	Time 0.210 (0.579)	Data Time 0.001 (0.014)	Loss 2.8521 (2.8032)	Entropy 1.23600 (1.24132)	Top-1 acc 60.938 (56.695)	Top-5 acc 78.516 (78.532)	lr 0.01794
Train [43][2610/3239]	Time 0.225 (0.579)	Data Time 0.001 (0.014)	Loss 2.7338 (2.8033)	Entropy 1.23598 (1.24130)	Top-1 acc 57.031 (56.688)	Top-5 acc 83.203 (78.529)	lr 0.01794
Train [43][2620/3239]	Time 0.315 (0.579)	Data Time 0.001 (0.014)	Loss 2.9309 (2.8035)	Entropy 1.23595 (1.24128)	Top-1 acc 50.391 (56.680)	Top-5 acc 73.828 (78.527)	lr 0.01794
Train [43][2630/3239]	Time 0.211 (0.578)	Data Time 0.001 (0.014)	Loss 2.7074 (2.8034)	Entropy 1.23591 (1.24126)	Top-1 acc 58.594 (56.682)	Top-5 acc 78.125 (78.528)	lr 0.01794
Train [43][2640/3239]	Time 0.243 (0.578)	Data Time 0.001 (0.014)	Loss 2.7633 (2.8032)	Entropy 1.23587 (1.24124)	Top-1 acc 58.594 (56.687)	Top-5 acc 76.172 (78.527)	lr 0.01794
Train [43][2650/3239]	Time 0.240 (0.595)	Data Time 0.003 (0.014)	Loss 2.7709 (2.8033)	Entropy 1.23584 (1.24122)	Top-1 acc 58.203 (56.686)	Top-5 acc 77.734 (78.525)	lr 0.01794
Train [43][2660/3239]	Time 0.223 (0.594)	Data Time 0.002 (0.014)	Loss 2.7654 (2.8034)	Entropy 1.23581 (1.24120)	Top-1 acc 59.375 (56.683)	Top-5 acc 76.953 (78.524)	lr 0.01793
Train [43][2670/3239]	Time 0.365 (0.594)	Data Time 0.002 (0.014)	Loss 3.0303 (2.8038)	Entropy 1.23578 (1.24118)	Top-1 acc 49.219 (56.670)	Top-5 acc 75.391 (78.518)	lr 0.01793
Train [43][2680/3239]	Time 0.256 (0.593)	Data Time 0.002 (0.014)	Loss 2.7507 (2.8039)	Entropy 1.23567 (1.24116)	Top-1 acc 55.859 (56.670)	Top-5 acc 80.859 (78.515)	lr 0.01793
Train [43][2690/3239]	Time 0.192 (0.593)	Data Time 0.001 (0.014)	Loss 2.8833 (2.8041)	Entropy 1.23560 (1.24114)	Top-1 acc 55.859 (56.662)	Top-5 acc 78.125 (78.513)	lr 0.01793
Train [43][2700/3239]	Time 0.264 (0.592)	Data Time 0.001 (0.014)	Loss 2.9163 (2.8043)	Entropy 1.23555 (1.24112)	Top-1 acc 53.516 (56.656)	Top-5 acc 75.781 (78.505)	lr 0.01793
Train [43][2710/3239]	Time 0.274 (0.592)	Data Time 0.001 (0.014)	Loss 2.8378 (2.8044)	Entropy 1.23550 (1.24110)	Top-1 acc 57.031 (56.655)	Top-5 acc 76.953 (78.500)	lr 0.01793
Train [43][2720/3239]	Time 0.364 (0.591)	Data Time 0.001 (0.014)	Loss 2.9035 (2.8043)	Entropy 1.23547 (1.24108)	Top-1 acc 55.078 (56.655)	Top-5 acc 78.125 (78.502)	lr 0.01793
Train [43][2730/3239]	Time 0.243 (0.591)	Data Time 0.001 (0.014)	Loss 2.8305 (2.8043)	Entropy 1.23546 (1.24106)	Top-1 acc 55.469 (56.656)	Top-5 acc 78.516 (78.500)	lr 0.01793
Train [43][2740/3239]	Time 0.226 (0.590)	Data Time 0.001 (0.014)	Loss 2.7239 (2.8043)	Entropy 1.23541 (1.24104)	Top-1 acc 56.250 (56.655)	Top-5 acc 80.469 (78.499)	lr 0.01793
Train [43][2750/3239]	Time 0.260 (0.590)	Data Time 0.001 (0.014)	Loss 2.8153 (2.8046)	Entropy 1.23536 (1.24102)	Top-1 acc 55.859 (56.649)	Top-5 acc 77.734 (78.493)	lr 0.01793
Train [43][2760/3239]	Time 0.237 (0.590)	Data Time 0.001 (0.013)	Loss 2.6670 (2.8045)	Entropy 1.23536 (1.24100)	Top-1 acc 62.109 (56.652)	Top-5 acc 82.422 (78.497)	lr 0.01793
Train [43][2770/3239]	Time 0.263 (0.589)	Data Time 0.001 (0.013)	Loss 2.7412 (2.8048)	Entropy 1.23535 (1.24098)	Top-1 acc 57.031 (56.645)	Top-5 acc 79.688 (78.491)	lr 0.01792
Train [43][2780/3239]	Time 0.286 (0.589)	Data Time 0.002 (0.013)	Loss 2.8086 (2.8050)	Entropy 1.23530 (1.24096)	Top-1 acc 57.031 (56.638)	Top-5 acc 76.562 (78.487)	lr 0.01792
Train [43][2790/3239]	Time 0.266 (0.588)	Data Time 0.001 (0.013)	Loss 2.7337 (2.8049)	Entropy 1.23526 (1.24094)	Top-1 acc 58.203 (56.634)	Top-5 acc 80.078 (78.490)	lr 0.01792
Train [43][2800/3239]	Time 0.238 (0.588)	Data Time 0.001 (0.013)	Loss 2.8955 (2.8048)	Entropy 1.23526 (1.24092)	Top-1 acc 52.734 (56.636)	Top-5 acc 80.078 (78.489)	lr 0.01792
Train [43][2810/3239]	Time 0.212 (0.587)	Data Time 0.001 (0.013)	Loss 2.7761 (2.8047)	Entropy 1.23523 (1.24090)	Top-1 acc 58.984 (56.641)	Top-5 acc 82.031 (78.490)	lr 0.01792
Train [43][2820/3239]	Time 0.224 (0.587)	Data Time 0.001 (0.013)	Loss 2.6166 (2.8046)	Entropy 1.23522 (1.24088)	Top-1 acc 59.766 (56.642)	Top-5 acc 81.250 (78.494)	lr 0.01792
Train [43][2830/3239]	Time 0.283 (0.586)	Data Time 0.002 (0.013)	Loss 2.7886 (2.8044)	Entropy 1.23523 (1.24086)	Top-1 acc 60.547 (56.644)	Top-5 acc 79.297 (78.496)	lr 0.01792
Train [43][2840/3239]	Time 0.228 (0.586)	Data Time 0.002 (0.013)	Loss 2.7401 (2.8043)	Entropy 1.23515 (1.24084)	Top-1 acc 56.641 (56.646)	Top-5 acc 81.641 (78.495)	lr 0.01792
Train [43][2850/3239]	Time 0.259 (0.585)	Data Time 0.001 (0.013)	Loss 2.7723 (2.8042)	Entropy 1.23512 (1.24082)	Top-1 acc 54.297 (56.645)	Top-5 acc 78.906 (78.495)	lr 0.01792
Train [43][2860/3239]	Time 0.261 (0.585)	Data Time 0.001 (0.013)	Loss 2.6939 (2.8042)	Entropy 1.23511 (1.24080)	Top-1 acc 57.422 (56.646)	Top-5 acc 80.859 (78.497)	lr 0.01792
Train [43][2870/3239]	Time 0.211 (0.584)	Data Time 0.001 (0.013)	Loss 3.0733 (2.8042)	Entropy 1.23503 (1.24078)	Top-1 acc 52.344 (56.646)	Top-5 acc 74.609 (78.502)	lr 0.01792
Train [43][2880/3239]	Time 0.213 (0.584)	Data Time 0.001 (0.013)	Loss 2.7588 (2.8041)	Entropy 1.23500 (1.24076)	Top-1 acc 58.984 (56.649)	Top-5 acc 82.812 (78.505)	lr 0.01791
Train [43][2890/3239]	Time 0.202 (0.584)	Data Time 0.001 (0.013)	Loss 2.9285 (2.8042)	Entropy 1.23497 (1.24074)	Top-1 acc 51.562 (56.645)	Top-5 acc 74.609 (78.503)	lr 0.01791
Train [43][2900/3239]	Time 0.204 (0.583)	Data Time 0.001 (0.013)	Loss 2.7896 (2.8042)	Entropy 1.23494 (1.24072)	Top-1 acc 57.812 (56.644)	Top-5 acc 81.250 (78.502)	lr 0.01791
Train [43][2910/3239]	Time 0.226 (0.583)	Data Time 0.001 (0.013)	Loss 2.7836 (2.8043)	Entropy 1.23496 (1.24070)	Top-1 acc 57.031 (56.640)	Top-5 acc 78.906 (78.500)	lr 0.01791
Train [43][2920/3239]	Time 0.172 (0.582)	Data Time 0.001 (0.013)	Loss 2.6264 (2.8043)	Entropy 1.23494 (1.24068)	Top-1 acc 61.328 (56.639)	Top-5 acc 81.250 (78.500)	lr 0.01791
Train [43][2930/3239]	Time 0.261 (0.582)	Data Time 0.001 (0.013)	Loss 2.8937 (2.8044)	Entropy 1.23495 (1.24066)	Top-1 acc 53.906 (56.638)	Top-5 acc 76.172 (78.498)	lr 0.01791
Train [43][2940/3239]	Time 0.236 (0.581)	Data Time 0.001 (0.013)	Loss 3.0529 (2.8046)	Entropy 1.23495 (1.24064)	Top-1 acc 49.609 (56.632)	Top-5 acc 76.172 (78.497)	lr 0.01791
Train [43][2950/3239]	Time 0.230 (0.581)	Data Time 0.001 (0.013)	Loss 2.6423 (2.8045)	Entropy 1.23489 (1.24062)	Top-1 acc 58.594 (56.636)	Top-5 acc 82.031 (78.499)	lr 0.01791
Train [43][2960/3239]	Time 0.253 (0.581)	Data Time 0.002 (0.013)	Loss 2.7174 (2.8046)	Entropy 1.23489 (1.24060)	Top-1 acc 61.328 (56.637)	Top-5 acc 79.297 (78.497)	lr 0.01791
Train [43][2970/3239]	Time 0.245 (0.580)	Data Time 0.002 (0.013)	Loss 2.8633 (2.8046)	Entropy 1.23488 (1.24058)	Top-1 acc 55.078 (56.636)	Top-5 acc 77.734 (78.499)	lr 0.01791
Train [43][2980/3239]	Time 0.285 (0.594)	Data Time 0.004 (0.013)	Loss 2.9085 (2.8048)	Entropy 1.23484 (1.24056)	Top-1 acc 54.688 (56.630)	Top-5 acc 72.656 (78.490)	lr 0.01791
Train [43][2990/3239]	Time 0.230 (0.594)	Data Time 0.002 (0.013)	Loss 2.9084 (2.8052)	Entropy 1.23481 (1.24054)	Top-1 acc 50.391 (56.617)	Top-5 acc 76.953 (78.481)	lr 0.01790
Train [43][3000/3239]	Time 0.225 (0.594)	Data Time 0.001 (0.013)	Loss 2.9741 (2.8050)	Entropy 1.23475 (1.24052)	Top-1 acc 51.562 (56.621)	Top-5 acc 74.219 (78.482)	lr 0.01790
Train [43][3010/3239]	Time 0.240 (0.593)	Data Time 0.001 (0.013)	Loss 2.8561 (2.8049)	Entropy 1.23468 (1.24050)	Top-1 acc 57.422 (56.626)	Top-5 acc 76.172 (78.488)	lr 0.01790
Train [43][3020/3239]	Time 0.206 (0.593)	Data Time 0.002 (0.012)	Loss 2.8156 (2.8049)	Entropy 1.23468 (1.24049)	Top-1 acc 59.375 (56.623)	Top-5 acc 80.078 (78.487)	lr 0.01790
Train [43][3030/3239]	Time 0.222 (0.592)	Data Time 0.001 (0.012)	Loss 2.7849 (2.8049)	Entropy 1.23465 (1.24047)	Top-1 acc 55.469 (56.622)	Top-5 acc 80.469 (78.489)	lr 0.01790
Train [43][3040/3239]	Time 0.201 (0.592)	Data Time 0.002 (0.012)	Loss 2.6917 (2.8051)	Entropy 1.23452 (1.24045)	Top-1 acc 57.422 (56.615)	Top-5 acc 78.125 (78.485)	lr 0.01790
Train [43][3050/3239]	Time 0.224 (0.591)	Data Time 0.001 (0.012)	Loss 2.7661 (2.8053)	Entropy 1.23450 (1.24043)	Top-1 acc 59.375 (56.613)	Top-5 acc 81.250 (78.481)	lr 0.01790
Train [43][3060/3239]	Time 0.322 (0.591)	Data Time 0.001 (0.012)	Loss 2.7149 (2.8055)	Entropy 1.23444 (1.24041)	Top-1 acc 54.297 (56.610)	Top-5 acc 82.031 (78.477)	lr 0.01790
Train [43][3070/3239]	Time 0.394 (0.591)	Data Time 0.001 (0.012)	Loss 2.7624 (2.8054)	Entropy 1.23438 (1.24039)	Top-1 acc 58.594 (56.611)	Top-5 acc 82.031 (78.479)	lr 0.01790
Train [43][3080/3239]	Time 0.230 (0.590)	Data Time 0.001 (0.012)	Loss 2.7032 (2.8057)	Entropy 1.23435 (1.24037)	Top-1 acc 55.859 (56.606)	Top-5 acc 80.078 (78.477)	lr 0.01790
Train [43][3090/3239]	Time 0.283 (0.590)	Data Time 0.001 (0.012)	Loss 2.7435 (2.8054)	Entropy 1.23433 (1.24035)	Top-1 acc 51.953 (56.608)	Top-5 acc 80.078 (78.483)	lr 0.01790
Train [43][3100/3239]	Time 0.221 (0.590)	Data Time 0.001 (0.012)	Loss 2.8814 (2.8058)	Entropy 1.23427 (1.24033)	Top-1 acc 57.422 (56.602)	Top-5 acc 75.781 (78.476)	lr 0.01789
Train [43][3110/3239]	Time 0.224 (0.589)	Data Time 0.001 (0.012)	Loss 2.5196 (2.8057)	Entropy 1.23423 (1.24031)	Top-1 acc 63.281 (56.604)	Top-5 acc 82.422 (78.477)	lr 0.01789
Train [43][3120/3239]	Time 0.213 (0.589)	Data Time 0.001 (0.012)	Loss 2.6808 (2.8059)	Entropy 1.23419 (1.24029)	Top-1 acc 59.375 (56.598)	Top-5 acc 82.422 (78.473)	lr 0.01789
Train [43][3130/3239]	Time 0.242 (0.588)	Data Time 0.002 (0.012)	Loss 2.7288 (2.8060)	Entropy 1.23411 (1.24027)	Top-1 acc 58.984 (56.596)	Top-5 acc 80.078 (78.472)	lr 0.01789
Train [43][3140/3239]	Time 0.226 (0.588)	Data Time 0.001 (0.012)	Loss 2.8630 (2.8060)	Entropy 1.23415 (1.24025)	Top-1 acc 56.641 (56.597)	Top-5 acc 75.781 (78.473)	lr 0.01789
Train [43][3150/3239]	Time 0.208 (0.587)	Data Time 0.001 (0.012)	Loss 2.8271 (2.8060)	Entropy 1.23414 (1.24023)	Top-1 acc 54.688 (56.598)	Top-5 acc 81.250 (78.471)	lr 0.01789
Train [43][3160/3239]	Time 0.261 (0.587)	Data Time 0.003 (0.012)	Loss 2.6947 (2.8061)	Entropy 1.23415 (1.24021)	Top-1 acc 60.156 (56.595)	Top-5 acc 80.078 (78.469)	lr 0.01789
Train [43][3170/3239]	Time 0.267 (0.587)	Data Time 0.001 (0.012)	Loss 2.9793 (2.8062)	Entropy 1.23411 (1.24019)	Top-1 acc 51.562 (56.593)	Top-5 acc 76.172 (78.468)	lr 0.01789
Train [43][3180/3239]	Time 0.225 (0.586)	Data Time 0.000 (0.012)	Loss 2.9459 (2.8061)	Entropy 1.23410 (1.24018)	Top-1 acc 56.250 (56.598)	Top-5 acc 76.172 (78.467)	lr 0.01789
Train [43][3190/3239]	Time 0.213 (0.586)	Data Time 0.000 (0.012)	Loss 2.8542 (2.8062)	Entropy 1.23411 (1.24016)	Top-1 acc 55.078 (56.594)	Top-5 acc 78.516 (78.466)	lr 0.01789
Train [43][3200/3239]	Time 0.221 (0.585)	Data Time 0.000 (0.012)	Loss 2.6725 (2.8063)	Entropy 1.23411 (1.24014)	Top-1 acc 58.984 (56.594)	Top-5 acc 79.688 (78.465)	lr 0.01789
Train [43][3210/3239]	Time 0.227 (0.585)	Data Time 0.000 (0.012)	Loss 2.8507 (2.8064)	Entropy 1.23412 (1.24012)	Top-1 acc 56.641 (56.593)	Top-5 acc 77.734 (78.463)	lr 0.01788
Train [43][3220/3239]	Time 0.211 (0.584)	Data Time 0.000 (0.012)	Loss 2.6897 (2.8064)	Entropy 1.23405 (1.24010)	Top-1 acc 60.156 (56.593)	Top-5 acc 79.688 (78.465)	lr 0.01788
Train [43][3230/3239]	Time 0.229 (0.584)	Data Time 0.000 (0.012)	Loss 2.8054 (2.8066)	Entropy 1.23400 (1.24008)	Top-1 acc 56.641 (56.586)	Top-5 acc 78.125 (78.463)	lr 0.01788
Train [43][3239/3239]	Time 2.211 (0.584)	Data Time 0.000 (0.012)	Loss 2.7753 (2.8067)	Entropy 1.23400 (1.24006)	Top-1 acc 54.321 (56.582)	Top-5 acc 75.309 (78.461)	lr 0.01788
==========Valid [43/120]	loss 1.637	top-1 acc 63.021 (63.021)	top-5 acc 83.926	Train top-1 56.582	top-5 78.461	Entropy 1.23400	Latency-None: 0.000ms	Flops: 548.34M
Train [44][0/3239]	Time 37.343 (37.343)	Data Time 36.396 (36.396)	Loss 2.6591 (2.6591)	Entropy 1.23399 (1.23399)	Top-1 acc 57.422 (57.422)	Top-5 acc 81.250 (81.250)	lr 0.01788
Train [44][10/3239]	Time 3.492 (4.052)	Data Time 0.002 (3.311)	Loss 2.8698 (2.8480)	Entropy 1.23399 (1.23399)	Top-1 acc 53.516 (55.291)	Top-5 acc 77.344 (77.131)	lr 0.01788
Train [44][20/3239]	Time 0.223 (2.240)	Data Time 0.001 (1.735)	Loss 2.7590 (2.8184)	Entropy 1.23397 (1.23398)	Top-1 acc 54.297 (56.176)	Top-5 acc 78.125 (77.772)	lr 0.01788
Train [44][30/3239]	Time 0.322 (1.671)	Data Time 0.002 (1.176)	Loss 2.8004 (2.7973)	Entropy 1.23399 (1.23398)	Top-1 acc 54.297 (56.540)	Top-5 acc 79.688 (78.390)	lr 0.01788
Train [44][40/3239]	Time 0.202 (1.375)	Data Time 0.001 (0.889)	Loss 3.0168 (2.7962)	Entropy 1.23393 (1.23397)	Top-1 acc 49.219 (56.469)	Top-5 acc 73.828 (78.392)	lr 0.01788
Train [44][50/3239]	Time 0.228 (1.197)	Data Time 0.001 (0.715)	Loss 2.7375 (2.7902)	Entropy 1.23386 (1.23395)	Top-1 acc 59.766 (56.939)	Top-5 acc 78.906 (78.562)	lr 0.01788
Train [44][60/3239]	Time 0.234 (1.075)	Data Time 0.001 (0.598)	Loss 2.9615 (2.7899)	Entropy 1.23378 (1.23393)	Top-1 acc 55.859 (57.076)	Top-5 acc 77.734 (78.599)	lr 0.01788
Train [44][70/3239]	Time 0.265 (0.986)	Data Time 0.001 (0.514)	Loss 2.8315 (2.7859)	Entropy 1.23375 (1.23391)	Top-1 acc 57.422 (57.334)	Top-5 acc 77.734 (78.752)	lr 0.01788
Train [44][80/3239]	Time 0.450 (1.511)	Data Time 0.003 (0.451)	Loss 2.7138 (2.7832)	Entropy 1.23375 (1.23389)	Top-1 acc 60.938 (57.480)	Top-5 acc 78.516 (78.786)	lr 0.01787
Train [44][90/3239]	Time 0.223 (1.400)	Data Time 0.002 (0.402)	Loss 2.7100 (2.7834)	Entropy 1.23341 (1.23386)	Top-1 acc 57.812 (57.357)	Top-5 acc 79.688 (78.820)	lr 0.01787
Train [44][100/3239]	Time 0.222 (1.305)	Data Time 0.001 (0.362)	Loss 2.7716 (2.7855)	Entropy 1.23337 (1.23382)	Top-1 acc 57.422 (57.228)	Top-5 acc 78.125 (78.705)	lr 0.01787
Train [44][110/3239]	Time 0.193 (1.229)	Data Time 0.002 (0.330)	Loss 2.7526 (2.7836)	Entropy 1.23328 (1.23378)	Top-1 acc 53.906 (57.249)	Top-5 acc 79.297 (78.720)	lr 0.01787
Train [44][120/3239]	Time 2.468 (1.165)	Data Time 0.001 (0.303)	Loss 2.7016 (2.7811)	Entropy 1.23328 (1.23374)	Top-1 acc 59.375 (57.367)	Top-5 acc 82.031 (78.758)	lr 0.01787
Train [44][130/3239]	Time 0.363 (1.095)	Data Time 0.002 (0.280)	Loss 2.7746 (2.7771)	Entropy 1.23322 (1.23370)	Top-1 acc 60.547 (57.404)	Top-5 acc 78.516 (78.820)	lr 0.01787
Train [44][140/3239]	Time 0.220 (1.051)	Data Time 0.001 (0.260)	Loss 2.9462 (2.7796)	Entropy 1.23309 (1.23365)	Top-1 acc 53.125 (57.330)	Top-5 acc 75.391 (78.776)	lr 0.01787
Train [44][150/3239]	Time 0.226 (1.012)	Data Time 0.001 (0.243)	Loss 2.6196 (2.7792)	Entropy 1.23308 (1.23362)	Top-1 acc 62.109 (57.347)	Top-5 acc 80.859 (78.798)	lr 0.01787
Train [44][160/3239]	Time 0.212 (0.977)	Data Time 0.001 (0.228)	Loss 2.8255 (2.7788)	Entropy 1.23303 (1.23358)	Top-1 acc 55.078 (57.373)	Top-5 acc 77.344 (78.860)	lr 0.01787
Train [44][170/3239]	Time 0.177 (0.947)	Data Time 0.001 (0.215)	Loss 2.5514 (2.7764)	Entropy 1.23302 (1.23355)	Top-1 acc 64.453 (57.417)	Top-5 acc 83.984 (78.925)	lr 0.01787
Train [44][180/3239]	Time 0.325 (0.920)	Data Time 0.001 (0.203)	Loss 2.8138 (2.7730)	Entropy 1.23296 (1.23352)	Top-1 acc 55.078 (57.443)	Top-5 acc 79.688 (78.973)	lr 0.01786
Train [44][190/3239]	Time 0.225 (0.896)	Data Time 0.001 (0.193)	Loss 3.5643 (2.7732)	Entropy 1.23296 (1.23349)	Top-1 acc 44.141 (57.448)	Top-5 acc 67.969 (79.009)	lr 0.01786
Train [44][200/3239]	Time 0.224 (0.875)	Data Time 0.001 (0.183)	Loss 2.7860 (2.7727)	Entropy 1.23292 (1.23346)	Top-1 acc 56.250 (57.443)	Top-5 acc 79.688 (79.036)	lr 0.01786
Train [44][210/3239]	Time 0.220 (0.856)	Data Time 0.001 (0.174)	Loss 2.7159 (2.7750)	Entropy 1.23289 (1.23344)	Top-1 acc 61.719 (57.450)	Top-5 acc 79.688 (78.951)	lr 0.01786
Train [44][220/3239]	Time 0.227 (0.839)	Data Time 0.001 (0.167)	Loss 2.7108 (2.7726)	Entropy 1.23289 (1.23341)	Top-1 acc 61.719 (57.549)	Top-5 acc 80.078 (79.014)	lr 0.01786
Train [44][230/3239]	Time 2.509 (0.822)	Data Time 0.001 (0.160)	Loss 2.7271 (2.7765)	Entropy 1.23289 (1.23339)	Top-1 acc 59.375 (57.474)	Top-5 acc 80.469 (78.955)	lr 0.01786
Train [44][240/3239]	Time 0.216 (0.798)	Data Time 0.001 (0.153)	Loss 2.7213 (2.7769)	Entropy 1.23286 (1.23337)	Top-1 acc 57.422 (57.420)	Top-5 acc 80.469 (78.989)	lr 0.01786
Train [44][250/3239]	Time 0.233 (0.784)	Data Time 0.001 (0.147)	Loss 2.6597 (2.7773)	Entropy 1.23284 (1.23335)	Top-1 acc 58.203 (57.361)	Top-5 acc 84.375 (79.004)	lr 0.01786
Train [44][260/3239]	Time 0.242 (0.771)	Data Time 0.001 (0.141)	Loss 2.7065 (2.7763)	Entropy 1.23279 (1.23332)	Top-1 acc 58.203 (57.384)	Top-5 acc 81.250 (79.030)	lr 0.01786
Train [44][270/3239]	Time 0.226 (0.760)	Data Time 0.001 (0.136)	Loss 2.7471 (2.7739)	Entropy 1.23277 (1.23330)	Top-1 acc 55.078 (57.469)	Top-5 acc 78.516 (79.084)	lr 0.01786
Train [44][280/3239]	Time 0.324 (0.750)	Data Time 0.001 (0.131)	Loss 2.6578 (2.7718)	Entropy 1.23276 (1.23328)	Top-1 acc 63.672 (57.522)	Top-5 acc 80.469 (79.115)	lr 0.01786
Train [44][290/3239]	Time 0.228 (0.739)	Data Time 0.001 (0.127)	Loss 2.8544 (2.7733)	Entropy 1.23275 (1.23327)	Top-1 acc 55.859 (57.488)	Top-5 acc 76.953 (79.094)	lr 0.01785
Train [44][300/3239]	Time 0.213 (0.730)	Data Time 0.001 (0.123)	Loss 2.7617 (2.7751)	Entropy 1.23273 (1.23325)	Top-1 acc 56.641 (57.391)	Top-5 acc 80.078 (79.071)	lr 0.01785
Train [44][310/3239]	Time 0.226 (0.722)	Data Time 0.002 (0.119)	Loss 2.8426 (2.7753)	Entropy 1.23260 (1.23323)	Top-1 acc 56.250 (57.431)	Top-5 acc 76.562 (79.087)	lr 0.01785
Train [44][320/3239]	Time 0.207 (0.713)	Data Time 0.001 (0.115)	Loss 2.8418 (2.7765)	Entropy 1.23255 (1.23321)	Top-1 acc 53.516 (57.399)	Top-5 acc 78.906 (79.088)	lr 0.01785
Train [44][330/3239]	Time 0.222 (0.706)	Data Time 0.002 (0.112)	Loss 2.7697 (2.7749)	Entropy 1.23249 (1.23319)	Top-1 acc 54.688 (57.396)	Top-5 acc 81.250 (79.129)	lr 0.01785
Train [44][340/3239]	Time 2.378 (0.699)	Data Time 0.002 (0.109)	Loss 2.9011 (2.7753)	Entropy 1.23249 (1.23317)	Top-1 acc 52.734 (57.365)	Top-5 acc 76.172 (79.120)	lr 0.01785
Train [44][350/3239]	Time 0.231 (0.685)	Data Time 0.001 (0.105)	Loss 2.8395 (2.7759)	Entropy 1.23243 (1.23315)	Top-1 acc 58.984 (57.376)	Top-5 acc 77.734 (79.111)	lr 0.01785
Train [44][360/3239]	Time 0.220 (0.679)	Data Time 0.001 (0.103)	Loss 2.7663 (2.7750)	Entropy 1.23241 (1.23313)	Top-1 acc 58.984 (57.411)	Top-5 acc 78.906 (79.127)	lr 0.01785
Train [44][370/3239]	Time 0.230 (0.673)	Data Time 0.001 (0.100)	Loss 2.5943 (2.7747)	Entropy 1.23237 (1.23311)	Top-1 acc 63.281 (57.427)	Top-5 acc 84.766 (79.152)	lr 0.01785
Train [44][380/3239]	Time 0.217 (0.667)	Data Time 0.001 (0.097)	Loss 2.8028 (2.7744)	Entropy 1.23237 (1.23309)	Top-1 acc 58.984 (57.438)	Top-5 acc 77.734 (79.161)	lr 0.01785
Train [44][390/3239]	Time 0.226 (0.662)	Data Time 0.001 (0.095)	Loss 2.6567 (2.7738)	Entropy 1.23233 (1.23307)	Top-1 acc 59.375 (57.467)	Top-5 acc 82.031 (79.177)	lr 0.01785
Train [44][400/3239]	Time 0.221 (0.657)	Data Time 0.001 (0.093)	Loss 2.7138 (2.7732)	Entropy 1.23226 (1.23305)	Top-1 acc 55.859 (57.470)	Top-5 acc 81.250 (79.179)	lr 0.01784
Train [44][410/3239]	Time 0.223 (0.652)	Data Time 0.001 (0.090)	Loss 2.8294 (2.7726)	Entropy 1.23219 (1.23303)	Top-1 acc 58.203 (57.488)	Top-5 acc 78.906 (79.170)	lr 0.01784
Train [44][420/3239]	Time 0.230 (0.648)	Data Time 0.001 (0.088)	Loss 2.6797 (2.7729)	Entropy 1.23216 (1.23301)	Top-1 acc 61.328 (57.476)	Top-5 acc 82.422 (79.169)	lr 0.01784
Train [44][430/3239]	Time 0.209 (0.643)	Data Time 0.001 (0.086)	Loss 2.8284 (2.7723)	Entropy 1.23214 (1.23299)	Top-1 acc 54.688 (57.529)	Top-5 acc 80.859 (79.175)	lr 0.01784
Train [44][440/3239]	Time 0.223 (0.750)	Data Time 0.003 (0.084)	Loss 2.6870 (2.7699)	Entropy 1.23215 (1.23297)	Top-1 acc 58.594 (57.555)	Top-5 acc 79.688 (79.220)	lr 0.01784
Train [44][450/3239]	Time 2.446 (0.744)	Data Time 0.003 (0.082)	Loss 2.9013 (2.7713)	Entropy 1.23215 (1.23295)	Top-1 acc 55.078 (57.519)	Top-5 acc 74.219 (79.196)	lr 0.01784
Train [44][460/3239]	Time 0.202 (0.733)	Data Time 0.002 (0.081)	Loss 2.8147 (2.7717)	Entropy 1.23209 (1.23293)	Top-1 acc 57.422 (57.511)	Top-5 acc 81.641 (79.196)	lr 0.01784
Train [44][470/3239]	Time 0.231 (0.727)	Data Time 0.001 (0.079)	Loss 2.5294 (2.7725)	Entropy 1.23207 (1.23292)	Top-1 acc 63.281 (57.497)	Top-5 acc 83.594 (79.192)	lr 0.01784
Train [44][480/3239]	Time 0.226 (0.722)	Data Time 0.001 (0.077)	Loss 2.7112 (2.7730)	Entropy 1.23203 (1.23290)	Top-1 acc 56.250 (57.477)	Top-5 acc 80.859 (79.184)	lr 0.01784
Train [44][490/3239]	Time 0.218 (0.716)	Data Time 0.001 (0.076)	Loss 2.7345 (2.7724)	Entropy 1.23201 (1.23288)	Top-1 acc 57.422 (57.492)	Top-5 acc 80.859 (79.197)	lr 0.01784
Train [44][500/3239]	Time 0.214 (0.711)	Data Time 0.001 (0.074)	Loss 2.8448 (2.7730)	Entropy 1.23194 (1.23286)	Top-1 acc 55.078 (57.468)	Top-5 acc 78.516 (79.190)	lr 0.01784
Train [44][510/3239]	Time 0.212 (0.706)	Data Time 0.001 (0.073)	Loss 2.8187 (2.7739)	Entropy 1.23189 (1.23284)	Top-1 acc 58.203 (57.466)	Top-5 acc 76.953 (79.166)	lr 0.01783
Train [44][520/3239]	Time 0.229 (0.701)	Data Time 0.001 (0.072)	Loss 2.8755 (2.7746)	Entropy 1.23185 (1.23282)	Top-1 acc 53.125 (57.432)	Top-5 acc 77.344 (79.166)	lr 0.01783
Train [44][530/3239]	Time 0.220 (0.696)	Data Time 0.001 (0.070)	Loss 2.7772 (2.7756)	Entropy 1.23174 (1.23281)	Top-1 acc 57.812 (57.415)	Top-5 acc 79.297 (79.134)	lr 0.01783
Train [44][540/3239]	Time 0.216 (0.692)	Data Time 0.001 (0.069)	Loss 2.7560 (2.7756)	Entropy 1.23161 (1.23279)	Top-1 acc 57.422 (57.408)	Top-5 acc 78.125 (79.115)	lr 0.01783
Train [44][550/3239]	Time 0.242 (0.687)	Data Time 0.001 (0.068)	Loss 2.7448 (2.7759)	Entropy 1.23151 (1.23276)	Top-1 acc 60.156 (57.414)	Top-5 acc 78.906 (79.115)	lr 0.01783
Train [44][560/3239]	Time 2.387 (0.683)	Data Time 0.002 (0.067)	Loss 2.6907 (2.7755)	Entropy 1.23151 (1.23274)	Top-1 acc 63.281 (57.436)	Top-5 acc 80.469 (79.119)	lr 0.01783
Train [44][570/3239]	Time 0.217 (0.675)	Data Time 0.001 (0.065)	Loss 2.7233 (2.7749)	Entropy 1.23149 (1.23272)	Top-1 acc 56.641 (57.425)	Top-5 acc 80.859 (79.136)	lr 0.01783
Train [44][580/3239]	Time 0.237 (0.671)	Data Time 0.001 (0.064)	Loss 2.7179 (2.7762)	Entropy 1.23146 (1.23270)	Top-1 acc 54.297 (57.387)	Top-5 acc 81.250 (79.101)	lr 0.01783
Train [44][590/3239]	Time 0.305 (0.668)	Data Time 0.001 (0.063)	Loss 2.7667 (2.7754)	Entropy 1.23146 (1.23268)	Top-1 acc 56.641 (57.401)	Top-5 acc 78.906 (79.123)	lr 0.01783
Train [44][600/3239]	Time 0.253 (0.664)	Data Time 0.001 (0.062)	Loss 2.8348 (2.7768)	Entropy 1.23142 (1.23266)	Top-1 acc 54.688 (57.367)	Top-5 acc 76.172 (79.104)	lr 0.01783
Train [44][610/3239]	Time 0.229 (0.661)	Data Time 0.001 (0.061)	Loss 2.7521 (2.7763)	Entropy 1.23138 (1.23264)	Top-1 acc 57.422 (57.383)	Top-5 acc 78.516 (79.106)	lr 0.01783
Train [44][620/3239]	Time 0.207 (0.658)	Data Time 0.001 (0.060)	Loss 2.8153 (2.7760)	Entropy 1.23131 (1.23261)	Top-1 acc 53.125 (57.380)	Top-5 acc 80.859 (79.123)	lr 0.01782
Train [44][630/3239]	Time 0.208 (0.655)	Data Time 0.001 (0.059)	Loss 2.7982 (2.7760)	Entropy 1.23124 (1.23259)	Top-1 acc 58.203 (57.374)	Top-5 acc 75.391 (79.108)	lr 0.01782
Train [44][640/3239]	Time 0.324 (0.652)	Data Time 0.001 (0.059)	Loss 2.7491 (2.7760)	Entropy 1.23117 (1.23257)	Top-1 acc 57.812 (57.394)	Top-5 acc 77.734 (79.104)	lr 0.01782
Train [44][650/3239]	Time 0.219 (0.648)	Data Time 0.001 (0.058)	Loss 2.8474 (2.7764)	Entropy 1.23116 (1.23255)	Top-1 acc 54.688 (57.384)	Top-5 acc 76.953 (79.099)	lr 0.01782
Train [44][660/3239]	Time 0.273 (0.646)	Data Time 0.001 (0.057)	Loss 2.7731 (2.7766)	Entropy 1.23111 (1.23253)	Top-1 acc 55.859 (57.379)	Top-5 acc 79.297 (79.102)	lr 0.01782
Train [44][670/3239]	Time 2.468 (0.643)	Data Time 0.002 (0.056)	Loss 2.7556 (2.7769)	Entropy 1.23111 (1.23251)	Top-1 acc 58.203 (57.372)	Top-5 acc 80.078 (79.090)	lr 0.01782
Train [44][680/3239]	Time 0.237 (0.637)	Data Time 0.001 (0.055)	Loss 2.6897 (2.7771)	Entropy 1.23105 (1.23249)	Top-1 acc 58.594 (57.361)	Top-5 acc 80.859 (79.087)	lr 0.01782
Train [44][690/3239]	Time 0.312 (0.634)	Data Time 0.001 (0.054)	Loss 2.6217 (2.7766)	Entropy 1.23104 (1.23247)	Top-1 acc 58.984 (57.363)	Top-5 acc 82.031 (79.096)	lr 0.01782
Train [44][700/3239]	Time 0.226 (0.632)	Data Time 0.001 (0.054)	Loss 2.7126 (2.7759)	Entropy 1.23099 (1.23245)	Top-1 acc 57.812 (57.371)	Top-5 acc 78.906 (79.109)	lr 0.01782
Train [44][710/3239]	Time 0.267 (0.629)	Data Time 0.001 (0.053)	Loss 2.8746 (2.7757)	Entropy 1.23097 (1.23242)	Top-1 acc 50.391 (57.361)	Top-5 acc 78.125 (79.114)	lr 0.01782
Train [44][720/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.052)	Loss 2.8295 (2.7757)	Entropy 1.23091 (1.23240)	Top-1 acc 55.078 (57.369)	Top-5 acc 78.125 (79.101)	lr 0.01782
Train [44][730/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.051)	Loss 2.8555 (2.7756)	Entropy 1.23083 (1.23238)	Top-1 acc 52.344 (57.373)	Top-5 acc 76.172 (79.098)	lr 0.01781
Train [44][740/3239]	Time 0.219 (0.622)	Data Time 0.001 (0.051)	Loss 2.8065 (2.7747)	Entropy 1.23081 (1.23236)	Top-1 acc 55.859 (57.389)	Top-5 acc 78.906 (79.118)	lr 0.01781
Train [44][750/3239]	Time 0.229 (0.620)	Data Time 0.001 (0.050)	Loss 2.9120 (2.7751)	Entropy 1.23079 (1.23234)	Top-1 acc 54.297 (57.388)	Top-5 acc 78.906 (79.114)	lr 0.01781
Train [44][760/3239]	Time 0.201 (0.618)	Data Time 0.001 (0.050)	Loss 2.8127 (2.7751)	Entropy 1.23074 (1.23232)	Top-1 acc 58.594 (57.407)	Top-5 acc 78.906 (79.108)	lr 0.01781
Train [44][770/3239]	Time 0.267 (0.616)	Data Time 0.001 (0.049)	Loss 2.7866 (2.7754)	Entropy 1.23071 (1.23230)	Top-1 acc 56.641 (57.391)	Top-5 acc 79.688 (79.102)	lr 0.01781
Train [44][780/3239]	Time 2.518 (0.614)	Data Time 0.001 (0.048)	Loss 2.9265 (2.7761)	Entropy 1.23071 (1.23228)	Top-1 acc 54.297 (57.374)	Top-5 acc 77.344 (79.094)	lr 0.01781
Train [44][790/3239]	Time 0.210 (0.609)	Data Time 0.001 (0.048)	Loss 2.7953 (2.7761)	Entropy 1.23073 (1.23226)	Top-1 acc 57.422 (57.367)	Top-5 acc 79.297 (79.099)	lr 0.01781
Train [44][800/3239]	Time 0.212 (0.607)	Data Time 0.001 (0.047)	Loss 2.6653 (2.7759)	Entropy 1.23072 (1.23224)	Top-1 acc 59.766 (57.362)	Top-5 acc 79.297 (79.102)	lr 0.01781
Train [44][810/3239]	Time 0.328 (0.661)	Data Time 0.003 (0.047)	Loss 2.6605 (2.7759)	Entropy 1.23070 (1.23222)	Top-1 acc 59.375 (57.365)	Top-5 acc 80.859 (79.094)	lr 0.01781
Train [44][820/3239]	Time 0.283 (0.659)	Data Time 0.002 (0.046)	Loss 2.8161 (2.7748)	Entropy 1.23070 (1.23220)	Top-1 acc 50.391 (57.383)	Top-5 acc 78.125 (79.116)	lr 0.01781
Train [44][830/3239]	Time 0.231 (0.656)	Data Time 0.001 (0.045)	Loss 2.6437 (2.7744)	Entropy 1.23067 (1.23219)	Top-1 acc 60.938 (57.392)	Top-5 acc 80.859 (79.120)	lr 0.01781
Train [44][840/3239]	Time 0.207 (0.654)	Data Time 0.001 (0.045)	Loss 2.8207 (2.7754)	Entropy 1.23062 (1.23217)	Top-1 acc 58.594 (57.374)	Top-5 acc 78.516 (79.100)	lr 0.01780
Train [44][850/3239]	Time 0.227 (0.651)	Data Time 0.001 (0.044)	Loss 2.7790 (2.7756)	Entropy 1.23054 (1.23215)	Top-1 acc 57.422 (57.373)	Top-5 acc 78.516 (79.104)	lr 0.01780
Train [44][860/3239]	Time 0.221 (0.649)	Data Time 0.001 (0.044)	Loss 2.7928 (2.7755)	Entropy 1.23051 (1.23213)	Top-1 acc 58.594 (57.375)	Top-5 acc 78.906 (79.105)	lr 0.01780
Train [44][870/3239]	Time 0.244 (0.647)	Data Time 0.001 (0.043)	Loss 2.7512 (2.7753)	Entropy 1.23048 (1.23211)	Top-1 acc 59.375 (57.381)	Top-5 acc 78.516 (79.102)	lr 0.01780
Train [44][880/3239]	Time 0.286 (0.645)	Data Time 0.001 (0.043)	Loss 2.7950 (2.7752)	Entropy 1.23044 (1.23209)	Top-1 acc 58.984 (57.395)	Top-5 acc 79.688 (79.100)	lr 0.01780
Train [44][890/3239]	Time 2.489 (0.643)	Data Time 0.002 (0.043)	Loss 2.9153 (2.7754)	Entropy 1.23044 (1.23207)	Top-1 acc 53.516 (57.386)	Top-5 acc 76.562 (79.101)	lr 0.01780
Train [44][900/3239]	Time 0.318 (0.638)	Data Time 0.002 (0.042)	Loss 2.7447 (2.7756)	Entropy 1.23037 (1.23205)	Top-1 acc 57.812 (57.390)	Top-5 acc 81.250 (79.100)	lr 0.01780
Train [44][910/3239]	Time 0.217 (0.636)	Data Time 0.001 (0.042)	Loss 2.7109 (2.7750)	Entropy 1.23036 (1.23204)	Top-1 acc 55.859 (57.405)	Top-5 acc 81.250 (79.116)	lr 0.01780
Train [44][920/3239]	Time 0.221 (0.634)	Data Time 0.001 (0.041)	Loss 2.6944 (2.7758)	Entropy 1.23030 (1.23202)	Top-1 acc 57.812 (57.377)	Top-5 acc 79.688 (79.102)	lr 0.01780
Train [44][930/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.041)	Loss 2.8396 (2.7761)	Entropy 1.23028 (1.23200)	Top-1 acc 57.031 (57.368)	Top-5 acc 78.516 (79.093)	lr 0.01780
Train [44][940/3239]	Time 0.222 (0.630)	Data Time 0.001 (0.040)	Loss 2.9067 (2.7765)	Entropy 1.23021 (1.23198)	Top-1 acc 53.906 (57.353)	Top-5 acc 72.656 (79.080)	lr 0.01780
Train [44][950/3239]	Time 0.330 (0.629)	Data Time 0.001 (0.040)	Loss 2.7330 (2.7769)	Entropy 1.23017 (1.23196)	Top-1 acc 60.156 (57.353)	Top-5 acc 76.953 (79.076)	lr 0.01779
Train [44][960/3239]	Time 0.230 (0.627)	Data Time 0.002 (0.040)	Loss 2.6888 (2.7773)	Entropy 1.23016 (1.23194)	Top-1 acc 60.938 (57.348)	Top-5 acc 83.594 (79.063)	lr 0.01779
Train [44][970/3239]	Time 0.219 (0.625)	Data Time 0.001 (0.039)	Loss 2.6671 (2.7775)	Entropy 1.23006 (1.23192)	Top-1 acc 60.156 (57.345)	Top-5 acc 82.031 (79.057)	lr 0.01779
Train [44][980/3239]	Time 0.227 (0.623)	Data Time 0.001 (0.039)	Loss 2.7731 (2.7774)	Entropy 1.23006 (1.23191)	Top-1 acc 58.203 (57.341)	Top-5 acc 80.078 (79.054)	lr 0.01779
Train [44][990/3239]	Time 0.221 (0.621)	Data Time 0.001 (0.038)	Loss 2.9215 (2.7775)	Entropy 1.23003 (1.23189)	Top-1 acc 53.906 (57.344)	Top-5 acc 78.516 (79.053)	lr 0.01779
Train [44][1000/3239]	Time 2.513 (0.620)	Data Time 0.001 (0.038)	Loss 2.7574 (2.7775)	Entropy 1.23003 (1.23187)	Top-1 acc 55.469 (57.340)	Top-5 acc 78.125 (79.057)	lr 0.01779
Train [44][1010/3239]	Time 0.219 (0.616)	Data Time 0.001 (0.038)	Loss 2.7153 (2.7780)	Entropy 1.23003 (1.23185)	Top-1 acc 59.375 (57.336)	Top-5 acc 79.297 (79.053)	lr 0.01779
Train [44][1020/3239]	Time 0.218 (0.615)	Data Time 0.001 (0.037)	Loss 2.5808 (2.7784)	Entropy 1.23000 (1.23183)	Top-1 acc 59.766 (57.326)	Top-5 acc 85.547 (79.047)	lr 0.01779
Train [44][1030/3239]	Time 0.234 (0.613)	Data Time 0.001 (0.037)	Loss 2.7195 (2.7785)	Entropy 1.22997 (1.23181)	Top-1 acc 57.812 (57.323)	Top-5 acc 81.250 (79.040)	lr 0.01779
Train [44][1040/3239]	Time 0.219 (0.611)	Data Time 0.001 (0.037)	Loss 2.7890 (2.7787)	Entropy 1.22994 (1.23180)	Top-1 acc 58.594 (57.316)	Top-5 acc 78.125 (79.032)	lr 0.01779
Train [44][1050/3239]	Time 0.306 (0.610)	Data Time 0.001 (0.036)	Loss 2.7764 (2.7788)	Entropy 1.22985 (1.23178)	Top-1 acc 54.297 (57.309)	Top-5 acc 80.859 (79.030)	lr 0.01779
Train [44][1060/3239]	Time 0.199 (0.609)	Data Time 0.001 (0.036)	Loss 2.7740 (2.7787)	Entropy 1.22975 (1.23176)	Top-1 acc 55.469 (57.305)	Top-5 acc 80.078 (79.037)	lr 0.01778
Train [44][1070/3239]	Time 0.206 (0.607)	Data Time 0.001 (0.036)	Loss 2.8839 (2.7786)	Entropy 1.22971 (1.23174)	Top-1 acc 53.516 (57.301)	Top-5 acc 79.297 (79.046)	lr 0.01778
Train [44][1080/3239]	Time 0.262 (0.606)	Data Time 0.002 (0.035)	Loss 2.7938 (2.7786)	Entropy 1.22967 (1.23172)	Top-1 acc 54.688 (57.296)	Top-5 acc 78.516 (79.049)	lr 0.01778
Train [44][1090/3239]	Time 0.214 (0.605)	Data Time 0.001 (0.035)	Loss 2.7273 (2.7786)	Entropy 1.22960 (1.23170)	Top-1 acc 58.203 (57.300)	Top-5 acc 80.859 (79.049)	lr 0.01778
Train [44][1100/3239]	Time 0.318 (0.603)	Data Time 0.001 (0.035)	Loss 3.0021 (2.7792)	Entropy 1.22956 (1.23168)	Top-1 acc 47.266 (57.279)	Top-5 acc 75.000 (79.040)	lr 0.01778
Train [44][1110/3239]	Time 2.456 (0.602)	Data Time 0.001 (0.034)	Loss 2.8855 (2.7802)	Entropy 1.22956 (1.23166)	Top-1 acc 55.469 (57.257)	Top-5 acc 78.516 (79.019)	lr 0.01778
Train [44][1120/3239]	Time 0.226 (0.599)	Data Time 0.001 (0.034)	Loss 2.8352 (2.7800)	Entropy 1.22940 (1.23164)	Top-1 acc 56.641 (57.265)	Top-5 acc 80.078 (79.025)	lr 0.01778
Train [44][1130/3239]	Time 0.215 (0.597)	Data Time 0.001 (0.034)	Loss 2.8184 (2.7802)	Entropy 1.22927 (1.23162)	Top-1 acc 58.203 (57.267)	Top-5 acc 77.734 (79.018)	lr 0.01778
Train [44][1140/3239]	Time 0.242 (0.596)	Data Time 0.001 (0.034)	Loss 2.6641 (2.7802)	Entropy 1.22929 (1.23160)	Top-1 acc 61.328 (57.270)	Top-5 acc 82.422 (79.021)	lr 0.01778
Train [44][1150/3239]	Time 0.241 (0.595)	Data Time 0.001 (0.033)	Loss 2.9481 (2.7803)	Entropy 1.22929 (1.23158)	Top-1 acc 50.000 (57.265)	Top-5 acc 76.953 (79.019)	lr 0.01778
Train [44][1160/3239]	Time 0.200 (0.594)	Data Time 0.001 (0.033)	Loss 2.9275 (2.7804)	Entropy 1.22930 (1.23156)	Top-1 acc 51.172 (57.253)	Top-5 acc 76.172 (79.012)	lr 0.01778
Train [44][1170/3239]	Time 0.388 (0.631)	Data Time 0.003 (0.033)	Loss 2.6092 (2.7803)	Entropy 1.22924 (1.23154)	Top-1 acc 62.891 (57.257)	Top-5 acc 82.422 (79.013)	lr 0.01777
Train [44][1180/3239]	Time 0.228 (0.630)	Data Time 0.002 (0.033)	Loss 2.6183 (2.7803)	Entropy 1.22922 (1.23152)	Top-1 acc 59.766 (57.261)	Top-5 acc 81.250 (79.012)	lr 0.01777
Train [44][1190/3239]	Time 0.226 (0.628)	Data Time 0.001 (0.032)	Loss 2.8460 (2.7801)	Entropy 1.22922 (1.23150)	Top-1 acc 57.812 (57.268)	Top-5 acc 77.344 (79.012)	lr 0.01777
Train [44][1200/3239]	Time 0.318 (0.627)	Data Time 0.001 (0.032)	Loss 2.8480 (2.7802)	Entropy 1.22917 (1.23148)	Top-1 acc 55.469 (57.264)	Top-5 acc 79.297 (79.011)	lr 0.01777
Train [44][1210/3239]	Time 0.254 (0.626)	Data Time 0.001 (0.032)	Loss 2.7759 (2.7807)	Entropy 1.22918 (1.23147)	Top-1 acc 57.422 (57.255)	Top-5 acc 79.688 (78.999)	lr 0.01777
Train [44][1220/3239]	Time 2.323 (0.624)	Data Time 0.001 (0.031)	Loss 2.8277 (2.7811)	Entropy 1.22918 (1.23145)	Top-1 acc 59.766 (57.248)	Top-5 acc 79.688 (78.991)	lr 0.01777
Train [44][1230/3239]	Time 0.219 (0.621)	Data Time 0.001 (0.031)	Loss 2.6513 (2.7811)	Entropy 1.22916 (1.23143)	Top-1 acc 59.375 (57.248)	Top-5 acc 80.469 (78.993)	lr 0.01777
Train [44][1240/3239]	Time 0.249 (0.620)	Data Time 0.001 (0.031)	Loss 2.9826 (2.7815)	Entropy 1.22914 (1.23141)	Top-1 acc 56.250 (57.240)	Top-5 acc 77.734 (78.992)	lr 0.01777
Train [44][1250/3239]	Time 0.232 (0.618)	Data Time 0.002 (0.031)	Loss 2.9750 (2.7816)	Entropy 1.22914 (1.23139)	Top-1 acc 51.953 (57.236)	Top-5 acc 77.734 (78.993)	lr 0.01777
Train [44][1260/3239]	Time 0.245 (0.617)	Data Time 0.001 (0.031)	Loss 2.7063 (2.7813)	Entropy 1.22910 (1.23137)	Top-1 acc 61.328 (57.248)	Top-5 acc 77.734 (78.999)	lr 0.01777
Train [44][1270/3239]	Time 0.253 (0.616)	Data Time 0.001 (0.030)	Loss 2.8154 (2.7815)	Entropy 1.22903 (1.23136)	Top-1 acc 54.688 (57.242)	Top-5 acc 78.906 (78.996)	lr 0.01777
Train [44][1280/3239]	Time 0.247 (0.615)	Data Time 0.001 (0.030)	Loss 2.7224 (2.7824)	Entropy 1.22903 (1.23134)	Top-1 acc 58.203 (57.221)	Top-5 acc 81.641 (78.978)	lr 0.01776
Train [44][1290/3239]	Time 0.247 (0.614)	Data Time 0.001 (0.030)	Loss 2.8140 (2.7826)	Entropy 1.22900 (1.23132)	Top-1 acc 58.203 (57.222)	Top-5 acc 76.953 (78.973)	lr 0.01776
Train [44][1300/3239]	Time 0.223 (0.613)	Data Time 0.001 (0.030)	Loss 2.5806 (2.7824)	Entropy 1.22895 (1.23130)	Top-1 acc 60.156 (57.225)	Top-5 acc 83.594 (78.977)	lr 0.01776
Train [44][1310/3239]	Time 0.213 (0.611)	Data Time 0.001 (0.029)	Loss 2.6112 (2.7824)	Entropy 1.22887 (1.23128)	Top-1 acc 59.375 (57.224)	Top-5 acc 81.641 (78.977)	lr 0.01776
Train [44][1320/3239]	Time 0.252 (0.610)	Data Time 0.001 (0.029)	Loss 2.6374 (2.7825)	Entropy 1.22893 (1.23127)	Top-1 acc 60.156 (57.216)	Top-5 acc 85.156 (78.973)	lr 0.01776
Train [44][1330/3239]	Time 2.403 (0.609)	Data Time 0.001 (0.029)	Loss 2.9493 (2.7825)	Entropy 1.22893 (1.23125)	Top-1 acc 52.344 (57.220)	Top-5 acc 74.219 (78.970)	lr 0.01776
Train [44][1340/3239]	Time 0.255 (0.606)	Data Time 0.001 (0.029)	Loss 2.7649 (2.7824)	Entropy 1.22892 (1.23123)	Top-1 acc 58.594 (57.220)	Top-5 acc 78.516 (78.972)	lr 0.01776
Train [44][1350/3239]	Time 0.228 (0.605)	Data Time 0.001 (0.029)	Loss 2.9040 (2.7823)	Entropy 1.22893 (1.23121)	Top-1 acc 60.547 (57.232)	Top-5 acc 75.000 (78.976)	lr 0.01776
Train [44][1360/3239]	Time 0.214 (0.604)	Data Time 0.001 (0.028)	Loss 2.8084 (2.7824)	Entropy 1.22893 (1.23120)	Top-1 acc 55.859 (57.227)	Top-5 acc 77.734 (78.969)	lr 0.01776
Train [44][1370/3239]	Time 0.212 (0.603)	Data Time 0.001 (0.028)	Loss 2.9154 (2.7823)	Entropy 1.22889 (1.23118)	Top-1 acc 54.688 (57.227)	Top-5 acc 75.781 (78.969)	lr 0.01776
Train [44][1380/3239]	Time 0.218 (0.602)	Data Time 0.001 (0.028)	Loss 2.6944 (2.7823)	Entropy 1.22886 (1.23116)	Top-1 acc 60.938 (57.230)	Top-5 acc 80.859 (78.964)	lr 0.01776
Train [44][1390/3239]	Time 0.226 (0.601)	Data Time 0.001 (0.028)	Loss 2.8948 (2.7826)	Entropy 1.22884 (1.23115)	Top-1 acc 55.859 (57.219)	Top-5 acc 75.781 (78.951)	lr 0.01775
Train [44][1400/3239]	Time 0.263 (0.600)	Data Time 0.001 (0.028)	Loss 2.6648 (2.7833)	Entropy 1.22885 (1.23113)	Top-1 acc 62.109 (57.199)	Top-5 acc 80.078 (78.939)	lr 0.01775
Train [44][1410/3239]	Time 0.206 (0.599)	Data Time 0.001 (0.027)	Loss 2.8783 (2.7832)	Entropy 1.22873 (1.23111)	Top-1 acc 56.641 (57.198)	Top-5 acc 76.562 (78.940)	lr 0.01775
Train [44][1420/3239]	Time 0.253 (0.598)	Data Time 0.001 (0.027)	Loss 2.8782 (2.7837)	Entropy 1.22870 (1.23110)	Top-1 acc 53.906 (57.189)	Top-5 acc 76.953 (78.934)	lr 0.01775
Train [44][1430/3239]	Time 0.221 (0.597)	Data Time 0.001 (0.027)	Loss 2.7755 (2.7837)	Entropy 1.22867 (1.23108)	Top-1 acc 55.859 (57.187)	Top-5 acc 79.688 (78.936)	lr 0.01775
Train [44][1440/3239]	Time 2.509 (0.596)	Data Time 0.001 (0.027)	Loss 2.7326 (2.7837)	Entropy 1.22867 (1.23106)	Top-1 acc 58.203 (57.182)	Top-5 acc 79.688 (78.930)	lr 0.01775
Train [44][1450/3239]	Time 0.294 (0.594)	Data Time 0.001 (0.027)	Loss 2.8888 (2.7841)	Entropy 1.22862 (1.23105)	Top-1 acc 53.125 (57.176)	Top-5 acc 76.172 (78.917)	lr 0.01775
Train [44][1460/3239]	Time 0.222 (0.593)	Data Time 0.001 (0.027)	Loss 2.7543 (2.7842)	Entropy 1.22858 (1.23103)	Top-1 acc 60.547 (57.176)	Top-5 acc 80.469 (78.916)	lr 0.01775
Train [44][1470/3239]	Time 0.223 (0.592)	Data Time 0.001 (0.026)	Loss 2.7379 (2.7844)	Entropy 1.22844 (1.23101)	Top-1 acc 59.766 (57.183)	Top-5 acc 79.297 (78.911)	lr 0.01775
Train [44][1480/3239]	Time 0.229 (0.591)	Data Time 0.001 (0.026)	Loss 2.9990 (2.7845)	Entropy 1.22837 (1.23099)	Top-1 acc 53.125 (57.182)	Top-5 acc 73.047 (78.904)	lr 0.01775
Train [44][1490/3239]	Time 0.209 (0.590)	Data Time 0.001 (0.026)	Loss 2.7242 (2.7848)	Entropy 1.22833 (1.23098)	Top-1 acc 60.547 (57.176)	Top-5 acc 81.641 (78.903)	lr 0.01775
Train [44][1500/3239]	Time 0.314 (0.589)	Data Time 0.001 (0.026)	Loss 2.7508 (2.7849)	Entropy 1.22825 (1.23096)	Top-1 acc 60.938 (57.174)	Top-5 acc 75.781 (78.892)	lr 0.01774
Train [44][1510/3239]	Time 0.234 (0.588)	Data Time 0.001 (0.026)	Loss 2.7428 (2.7849)	Entropy 1.22821 (1.23094)	Top-1 acc 57.812 (57.176)	Top-5 acc 80.469 (78.893)	lr 0.01774
Train [44][1520/3239]	Time 0.237 (0.587)	Data Time 0.002 (0.026)	Loss 2.6916 (2.7853)	Entropy 1.22818 (1.23092)	Top-1 acc 56.641 (57.166)	Top-5 acc 78.906 (78.880)	lr 0.01774
Train [44][1530/3239]	Time 0.331 (0.615)	Data Time 0.002 (0.025)	Loss 2.7158 (2.7855)	Entropy 1.22811 (1.23090)	Top-1 acc 57.422 (57.159)	Top-5 acc 80.469 (78.873)	lr 0.01774
Train [44][1540/3239]	Time 0.237 (0.615)	Data Time 0.002 (0.025)	Loss 2.8176 (2.7857)	Entropy 1.22812 (1.23089)	Top-1 acc 55.859 (57.154)	Top-5 acc 77.734 (78.863)	lr 0.01774
Train [44][1550/3239]	Time 2.585 (0.614)	Data Time 0.003 (0.025)	Loss 2.8449 (2.7858)	Entropy 1.22812 (1.23087)	Top-1 acc 54.688 (57.149)	Top-5 acc 81.641 (78.862)	lr 0.01774
Train [44][1560/3239]	Time 0.235 (0.611)	Data Time 0.001 (0.025)	Loss 2.7974 (2.7857)	Entropy 1.22808 (1.23085)	Top-1 acc 57.031 (57.148)	Top-5 acc 79.688 (78.866)	lr 0.01774
Train [44][1570/3239]	Time 0.221 (0.610)	Data Time 0.001 (0.025)	Loss 2.9822 (2.7860)	Entropy 1.22806 (1.23083)	Top-1 acc 54.688 (57.144)	Top-5 acc 71.875 (78.859)	lr 0.01774
Train [44][1580/3239]	Time 0.233 (0.609)	Data Time 0.002 (0.025)	Loss 2.6668 (2.7857)	Entropy 1.22800 (1.23082)	Top-1 acc 61.719 (57.152)	Top-5 acc 81.641 (78.865)	lr 0.01774
Train [44][1590/3239]	Time 0.218 (0.608)	Data Time 0.001 (0.025)	Loss 2.7540 (2.7857)	Entropy 1.22801 (1.23080)	Top-1 acc 53.906 (57.152)	Top-5 acc 80.078 (78.864)	lr 0.01774
Train [44][1600/3239]	Time 0.316 (0.608)	Data Time 0.001 (0.024)	Loss 2.7401 (2.7857)	Entropy 1.22798 (1.23078)	Top-1 acc 56.250 (57.152)	Top-5 acc 78.906 (78.864)	lr 0.01774
Train [44][1610/3239]	Time 0.194 (0.606)	Data Time 0.001 (0.024)	Loss 2.9648 (2.7859)	Entropy 1.22795 (1.23076)	Top-1 acc 53.125 (57.152)	Top-5 acc 75.000 (78.864)	lr 0.01773
Train [44][1620/3239]	Time 0.216 (0.606)	Data Time 0.001 (0.024)	Loss 2.9922 (2.7859)	Entropy 1.22795 (1.23075)	Top-1 acc 47.656 (57.141)	Top-5 acc 73.047 (78.861)	lr 0.01773
Train [44][1630/3239]	Time 0.218 (0.605)	Data Time 0.001 (0.024)	Loss 2.9472 (2.7860)	Entropy 1.22790 (1.23073)	Top-1 acc 54.297 (57.132)	Top-5 acc 73.828 (78.851)	lr 0.01773
Train [44][1640/3239]	Time 0.218 (0.604)	Data Time 0.001 (0.024)	Loss 2.9025 (2.7857)	Entropy 1.22779 (1.23071)	Top-1 acc 52.344 (57.138)	Top-5 acc 76.562 (78.859)	lr 0.01773
Train [44][1650/3239]	Time 0.272 (0.603)	Data Time 0.001 (0.024)	Loss 2.7501 (2.7857)	Entropy 1.22776 (1.23069)	Top-1 acc 57.812 (57.142)	Top-5 acc 81.641 (78.856)	lr 0.01773
Train [44][1660/3239]	Time 2.368 (0.602)	Data Time 0.001 (0.024)	Loss 2.9073 (2.7858)	Entropy 1.22776 (1.23068)	Top-1 acc 54.688 (57.138)	Top-5 acc 74.219 (78.857)	lr 0.01773
Train [44][1670/3239]	Time 0.226 (0.600)	Data Time 0.001 (0.023)	Loss 2.7488 (2.7857)	Entropy 1.22767 (1.23066)	Top-1 acc 58.203 (57.133)	Top-5 acc 79.688 (78.859)	lr 0.01773
Train [44][1680/3239]	Time 0.217 (0.599)	Data Time 0.001 (0.023)	Loss 2.9355 (2.7859)	Entropy 1.22764 (1.23064)	Top-1 acc 51.172 (57.133)	Top-5 acc 75.391 (78.854)	lr 0.01773
Train [44][1690/3239]	Time 0.230 (0.598)	Data Time 0.001 (0.023)	Loss 2.6819 (2.7860)	Entropy 1.22765 (1.23062)	Top-1 acc 60.156 (57.133)	Top-5 acc 82.812 (78.848)	lr 0.01773
Train [44][1700/3239]	Time 0.209 (0.597)	Data Time 0.001 (0.023)	Loss 2.8231 (2.7860)	Entropy 1.22765 (1.23060)	Top-1 acc 53.516 (57.133)	Top-5 acc 78.125 (78.847)	lr 0.01773
Train [44][1710/3239]	Time 0.207 (0.597)	Data Time 0.001 (0.023)	Loss 2.7475 (2.7861)	Entropy 1.22762 (1.23059)	Top-1 acc 54.688 (57.132)	Top-5 acc 80.859 (78.848)	lr 0.01772
Train [44][1720/3239]	Time 0.219 (0.596)	Data Time 0.001 (0.023)	Loss 2.9982 (2.7862)	Entropy 1.22760 (1.23057)	Top-1 acc 52.344 (57.130)	Top-5 acc 76.562 (78.847)	lr 0.01772
Train [44][1730/3239]	Time 0.214 (0.595)	Data Time 0.001 (0.023)	Loss 2.7242 (2.7864)	Entropy 1.22755 (1.23055)	Top-1 acc 58.594 (57.121)	Top-5 acc 80.469 (78.841)	lr 0.01772
Train [44][1740/3239]	Time 0.203 (0.594)	Data Time 0.001 (0.023)	Loss 2.6581 (2.7864)	Entropy 1.22755 (1.23054)	Top-1 acc 60.938 (57.125)	Top-5 acc 80.078 (78.843)	lr 0.01772
Train [44][1750/3239]	Time 0.359 (0.593)	Data Time 0.001 (0.022)	Loss 2.8983 (2.7864)	Entropy 1.22756 (1.23052)	Top-1 acc 55.469 (57.124)	Top-5 acc 74.219 (78.838)	lr 0.01772
Train [44][1760/3239]	Time 0.224 (0.593)	Data Time 0.001 (0.022)	Loss 2.8273 (2.7863)	Entropy 1.22755 (1.23050)	Top-1 acc 55.859 (57.124)	Top-5 acc 80.078 (78.843)	lr 0.01772
Train [44][1770/3239]	Time 2.488 (0.592)	Data Time 0.001 (0.022)	Loss 2.8154 (2.7863)	Entropy 1.22755 (1.23048)	Top-1 acc 56.641 (57.117)	Top-5 acc 77.734 (78.847)	lr 0.01772
Train [44][1780/3239]	Time 0.229 (0.590)	Data Time 0.001 (0.022)	Loss 2.6255 (2.7862)	Entropy 1.22753 (1.23047)	Top-1 acc 58.984 (57.117)	Top-5 acc 83.594 (78.849)	lr 0.01772
Train [44][1790/3239]	Time 0.235 (0.589)	Data Time 0.001 (0.022)	Loss 2.7048 (2.7860)	Entropy 1.22750 (1.23045)	Top-1 acc 58.594 (57.114)	Top-5 acc 80.859 (78.854)	lr 0.01772
Train [44][1800/3239]	Time 0.307 (0.589)	Data Time 0.001 (0.022)	Loss 2.9104 (2.7864)	Entropy 1.22750 (1.23044)	Top-1 acc 53.906 (57.104)	Top-5 acc 78.906 (78.852)	lr 0.01772
Train [44][1810/3239]	Time 0.225 (0.588)	Data Time 0.001 (0.022)	Loss 2.7566 (2.7865)	Entropy 1.22748 (1.23042)	Top-1 acc 57.422 (57.102)	Top-5 acc 78.906 (78.851)	lr 0.01772
Train [44][1820/3239]	Time 0.248 (0.587)	Data Time 0.001 (0.022)	Loss 2.8607 (2.7866)	Entropy 1.22748 (1.23040)	Top-1 acc 53.516 (57.098)	Top-5 acc 76.172 (78.846)	lr 0.01771
Train [44][1830/3239]	Time 0.237 (0.586)	Data Time 0.001 (0.022)	Loss 2.8704 (2.7867)	Entropy 1.22737 (1.23039)	Top-1 acc 50.781 (57.093)	Top-5 acc 78.906 (78.848)	lr 0.01771
Train [44][1840/3239]	Time 0.216 (0.586)	Data Time 0.001 (0.021)	Loss 2.7383 (2.7868)	Entropy 1.22736 (1.23037)	Top-1 acc 57.031 (57.090)	Top-5 acc 77.344 (78.845)	lr 0.01771
Train [44][1850/3239]	Time 0.235 (0.585)	Data Time 0.001 (0.021)	Loss 2.8416 (2.7867)	Entropy 1.22733 (1.23035)	Top-1 acc 53.516 (57.087)	Top-5 acc 77.344 (78.850)	lr 0.01771
Train [44][1860/3239]	Time 0.210 (0.585)	Data Time 0.001 (0.021)	Loss 2.8473 (2.7868)	Entropy 1.22733 (1.23034)	Top-1 acc 54.297 (57.079)	Top-5 acc 76.953 (78.844)	lr 0.01771
Train [44][1870/3239]	Time 0.210 (0.584)	Data Time 0.001 (0.021)	Loss 2.9382 (2.7869)	Entropy 1.22731 (1.23032)	Top-1 acc 54.688 (57.079)	Top-5 acc 78.906 (78.844)	lr 0.01771
Train [44][1880/3239]	Time 2.421 (0.583)	Data Time 0.001 (0.021)	Loss 2.9951 (2.7872)	Entropy 1.22731 (1.23031)	Top-1 acc 57.031 (57.079)	Top-5 acc 71.484 (78.837)	lr 0.01771
Train [44][1890/3239]	Time 0.224 (0.581)	Data Time 0.001 (0.021)	Loss 2.7845 (2.7872)	Entropy 1.22728 (1.23029)	Top-1 acc 58.984 (57.082)	Top-5 acc 78.516 (78.840)	lr 0.01771
Train [44][1900/3239]	Time 0.267 (0.603)	Data Time 0.003 (0.021)	Loss 2.8956 (2.7874)	Entropy 1.22722 (1.23027)	Top-1 acc 53.906 (57.076)	Top-5 acc 73.828 (78.834)	lr 0.01771
Train [44][1910/3239]	Time 0.229 (0.603)	Data Time 0.002 (0.021)	Loss 2.6193 (2.7876)	Entropy 1.22720 (1.23026)	Top-1 acc 65.625 (57.076)	Top-5 acc 82.031 (78.833)	lr 0.01771
Train [44][1920/3239]	Time 0.226 (0.602)	Data Time 0.001 (0.021)	Loss 2.8523 (2.7876)	Entropy 1.22716 (1.23024)	Top-1 acc 57.031 (57.072)	Top-5 acc 77.734 (78.830)	lr 0.01771
Train [44][1930/3239]	Time 0.205 (0.602)	Data Time 0.001 (0.021)	Loss 2.9471 (2.7877)	Entropy 1.22715 (1.23023)	Top-1 acc 51.172 (57.066)	Top-5 acc 78.125 (78.827)	lr 0.01770
Train [44][1940/3239]	Time 0.267 (0.601)	Data Time 0.001 (0.020)	Loss 2.7203 (2.7880)	Entropy 1.22703 (1.23021)	Top-1 acc 60.156 (57.062)	Top-5 acc 81.250 (78.824)	lr 0.01770
Train [44][1950/3239]	Time 0.219 (0.600)	Data Time 0.001 (0.020)	Loss 2.6264 (2.7879)	Entropy 1.22700 (1.23019)	Top-1 acc 64.062 (57.067)	Top-5 acc 81.641 (78.824)	lr 0.01770
Train [44][1960/3239]	Time 0.207 (0.599)	Data Time 0.001 (0.020)	Loss 2.6917 (2.7876)	Entropy 1.22681 (1.23018)	Top-1 acc 58.984 (57.072)	Top-5 acc 80.469 (78.831)	lr 0.01770
Train [44][1970/3239]	Time 0.234 (0.599)	Data Time 0.001 (0.020)	Loss 2.9467 (2.7879)	Entropy 1.22679 (1.23016)	Top-1 acc 53.906 (57.072)	Top-5 acc 75.781 (78.826)	lr 0.01770
Train [44][1980/3239]	Time 0.223 (0.598)	Data Time 0.001 (0.020)	Loss 2.7104 (2.7877)	Entropy 1.22679 (1.23014)	Top-1 acc 62.109 (57.073)	Top-5 acc 82.422 (78.828)	lr 0.01770
Train [44][1990/3239]	Time 2.442 (0.597)	Data Time 0.001 (0.020)	Loss 2.9134 (2.7881)	Entropy 1.22679 (1.23013)	Top-1 acc 53.906 (57.063)	Top-5 acc 78.125 (78.822)	lr 0.01770
Train [44][2000/3239]	Time 0.266 (0.595)	Data Time 0.001 (0.020)	Loss 2.7426 (2.7883)	Entropy 1.22673 (1.23011)	Top-1 acc 57.422 (57.054)	Top-5 acc 81.641 (78.819)	lr 0.01770
Train [44][2010/3239]	Time 0.226 (0.595)	Data Time 0.001 (0.020)	Loss 2.7660 (2.7884)	Entropy 1.22671 (1.23009)	Top-1 acc 58.203 (57.054)	Top-5 acc 80.469 (78.816)	lr 0.01770
Train [44][2020/3239]	Time 0.220 (0.594)	Data Time 0.001 (0.020)	Loss 2.8479 (2.7885)	Entropy 1.22657 (1.23007)	Top-1 acc 55.469 (57.050)	Top-5 acc 77.344 (78.814)	lr 0.01770
Train [44][2030/3239]	Time 0.215 (0.593)	Data Time 0.001 (0.020)	Loss 2.8952 (2.7885)	Entropy 1.22645 (1.23006)	Top-1 acc 54.688 (57.051)	Top-5 acc 73.438 (78.810)	lr 0.01770
Train [44][2040/3239]	Time 0.209 (0.592)	Data Time 0.001 (0.019)	Loss 2.8300 (2.7884)	Entropy 1.22645 (1.23004)	Top-1 acc 57.031 (57.053)	Top-5 acc 79.297 (78.811)	lr 0.01769
Train [44][2050/3239]	Time 0.219 (0.592)	Data Time 0.001 (0.019)	Loss 2.7784 (2.7884)	Entropy 1.22637 (1.23002)	Top-1 acc 58.984 (57.054)	Top-5 acc 78.516 (78.809)	lr 0.01769
Train [44][2060/3239]	Time 0.225 (0.591)	Data Time 0.001 (0.019)	Loss 2.6877 (2.7882)	Entropy 1.22637 (1.23000)	Top-1 acc 65.625 (57.057)	Top-5 acc 80.078 (78.810)	lr 0.01769
Train [44][2070/3239]	Time 0.224 (0.591)	Data Time 0.001 (0.019)	Loss 2.8054 (2.7884)	Entropy 1.22624 (1.22999)	Top-1 acc 51.562 (57.055)	Top-5 acc 76.562 (78.807)	lr 0.01769
Train [44][2080/3239]	Time 0.225 (0.590)	Data Time 0.001 (0.019)	Loss 2.8338 (2.7888)	Entropy 1.22621 (1.22997)	Top-1 acc 53.906 (57.045)	Top-5 acc 75.391 (78.798)	lr 0.01769
Train [44][2090/3239]	Time 0.194 (0.589)	Data Time 0.001 (0.019)	Loss 2.6540 (2.7887)	Entropy 1.22619 (1.22995)	Top-1 acc 57.812 (57.043)	Top-5 acc 81.250 (78.801)	lr 0.01769
Train [44][2100/3239]	Time 2.312 (0.589)	Data Time 0.001 (0.019)	Loss 2.8050 (2.7884)	Entropy 1.22619 (1.22993)	Top-1 acc 57.031 (57.054)	Top-5 acc 78.125 (78.806)	lr 0.01769
Train [44][2110/3239]	Time 0.220 (0.587)	Data Time 0.001 (0.019)	Loss 2.8173 (2.7884)	Entropy 1.22618 (1.22991)	Top-1 acc 60.938 (57.053)	Top-5 acc 76.562 (78.804)	lr 0.01769
Train [44][2120/3239]	Time 0.257 (0.586)	Data Time 0.001 (0.019)	Loss 2.8425 (2.7884)	Entropy 1.22616 (1.22990)	Top-1 acc 53.516 (57.049)	Top-5 acc 80.078 (78.810)	lr 0.01769
Train [44][2130/3239]	Time 0.242 (0.586)	Data Time 0.002 (0.019)	Loss 2.6000 (2.7884)	Entropy 1.22616 (1.22988)	Top-1 acc 61.328 (57.048)	Top-5 acc 81.641 (78.812)	lr 0.01769
Train [44][2140/3239]	Time 0.197 (0.585)	Data Time 0.001 (0.019)	Loss 2.7295 (2.7883)	Entropy 1.22614 (1.22986)	Top-1 acc 61.719 (57.051)	Top-5 acc 77.344 (78.811)	lr 0.01769
Train [44][2150/3239]	Time 0.229 (0.585)	Data Time 0.001 (0.019)	Loss 2.7831 (2.7885)	Entropy 1.22607 (1.22984)	Top-1 acc 57.812 (57.045)	Top-5 acc 76.953 (78.805)	lr 0.01768
Train [44][2160/3239]	Time 0.217 (0.584)	Data Time 0.001 (0.019)	Loss 2.8818 (2.7887)	Entropy 1.22603 (1.22983)	Top-1 acc 56.641 (57.041)	Top-5 acc 77.734 (78.802)	lr 0.01768
Train [44][2170/3239]	Time 0.211 (0.584)	Data Time 0.001 (0.018)	Loss 2.7013 (2.7887)	Entropy 1.22604 (1.22981)	Top-1 acc 56.250 (57.044)	Top-5 acc 78.516 (78.802)	lr 0.01768
Train [44][2180/3239]	Time 0.217 (0.583)	Data Time 0.002 (0.018)	Loss 2.9244 (2.7890)	Entropy 1.22599 (1.22979)	Top-1 acc 54.297 (57.033)	Top-5 acc 75.781 (78.795)	lr 0.01768
Train [44][2190/3239]	Time 0.274 (0.582)	Data Time 0.001 (0.018)	Loss 2.6588 (2.7888)	Entropy 1.22597 (1.22977)	Top-1 acc 58.984 (57.040)	Top-5 acc 82.031 (78.802)	lr 0.01768
Train [44][2200/3239]	Time 0.217 (0.582)	Data Time 0.001 (0.018)	Loss 2.6433 (2.7889)	Entropy 1.22591 (1.22976)	Top-1 acc 58.594 (57.037)	Top-5 acc 82.031 (78.798)	lr 0.01768
Train [44][2210/3239]	Time 2.386 (0.581)	Data Time 0.001 (0.018)	Loss 2.6994 (2.7891)	Entropy 1.22591 (1.22974)	Top-1 acc 58.984 (57.029)	Top-5 acc 81.250 (78.794)	lr 0.01768
Train [44][2220/3239]	Time 0.227 (0.580)	Data Time 0.001 (0.018)	Loss 3.0124 (2.7894)	Entropy 1.22588 (1.22972)	Top-1 acc 50.391 (57.028)	Top-5 acc 75.391 (78.789)	lr 0.01768
Train [44][2230/3239]	Time 0.256 (0.579)	Data Time 0.002 (0.018)	Loss 2.6728 (2.7895)	Entropy 1.22588 (1.22970)	Top-1 acc 60.938 (57.029)	Top-5 acc 81.641 (78.789)	lr 0.01768
Train [44][2240/3239]	Time 0.218 (0.579)	Data Time 0.001 (0.018)	Loss 2.9127 (2.7894)	Entropy 1.22586 (1.22969)	Top-1 acc 50.391 (57.029)	Top-5 acc 76.562 (78.790)	lr 0.01768
Train [44][2250/3239]	Time 0.239 (0.578)	Data Time 0.001 (0.018)	Loss 2.7068 (2.7894)	Entropy 1.22578 (1.22967)	Top-1 acc 62.109 (57.027)	Top-5 acc 80.859 (78.789)	lr 0.01768
Train [44][2260/3239]	Time 0.242 (0.598)	Data Time 0.002 (0.018)	Loss 2.8608 (2.7894)	Entropy 1.22573 (1.22965)	Top-1 acc 56.641 (57.022)	Top-5 acc 77.344 (78.786)	lr 0.01767
Train [44][2270/3239]	Time 0.229 (0.598)	Data Time 0.002 (0.018)	Loss 2.8807 (2.7894)	Entropy 1.22571 (1.22964)	Top-1 acc 51.953 (57.021)	Top-5 acc 81.250 (78.792)	lr 0.01767
Train [44][2280/3239]	Time 0.210 (0.597)	Data Time 0.002 (0.018)	Loss 2.5994 (2.7894)	Entropy 1.22569 (1.22962)	Top-1 acc 61.328 (57.021)	Top-5 acc 84.766 (78.792)	lr 0.01767
Train [44][2290/3239]	Time 0.314 (0.597)	Data Time 0.001 (0.018)	Loss 2.7762 (2.7894)	Entropy 1.22571 (1.22960)	Top-1 acc 56.641 (57.018)	Top-5 acc 79.297 (78.789)	lr 0.01767
Train [44][2300/3239]	Time 0.208 (0.596)	Data Time 0.001 (0.017)	Loss 2.8836 (2.7895)	Entropy 1.22565 (1.22958)	Top-1 acc 53.125 (57.015)	Top-5 acc 75.391 (78.787)	lr 0.01767
Train [44][2310/3239]	Time 0.192 (0.595)	Data Time 0.001 (0.017)	Loss 2.6442 (2.7897)	Entropy 1.22564 (1.22957)	Top-1 acc 58.203 (57.012)	Top-5 acc 82.812 (78.782)	lr 0.01767
Train [44][2320/3239]	Time 2.365 (0.595)	Data Time 0.002 (0.017)	Loss 2.7745 (2.7896)	Entropy 1.22564 (1.22955)	Top-1 acc 59.766 (57.015)	Top-5 acc 78.125 (78.782)	lr 0.01767
Train [44][2330/3239]	Time 0.154 (0.593)	Data Time 0.001 (0.017)	Loss 2.8624 (2.7898)	Entropy 1.22559 (1.22953)	Top-1 acc 52.734 (57.007)	Top-5 acc 78.516 (78.778)	lr 0.01767
Train [44][2340/3239]	Time 0.265 (0.593)	Data Time 0.001 (0.017)	Loss 2.7295 (2.7900)	Entropy 1.22553 (1.22952)	Top-1 acc 58.203 (57.004)	Top-5 acc 79.688 (78.776)	lr 0.01767
Train [44][2350/3239]	Time 0.211 (0.592)	Data Time 0.001 (0.017)	Loss 2.6809 (2.7899)	Entropy 1.22553 (1.22950)	Top-1 acc 59.766 (57.003)	Top-5 acc 81.641 (78.780)	lr 0.01767
Train [44][2360/3239]	Time 0.229 (0.592)	Data Time 0.001 (0.017)	Loss 2.7740 (2.7897)	Entropy 1.22549 (1.22948)	Top-1 acc 56.250 (57.006)	Top-5 acc 76.953 (78.781)	lr 0.01767
Train [44][2370/3239]	Time 0.215 (0.591)	Data Time 0.001 (0.017)	Loss 2.8085 (2.7902)	Entropy 1.22549 (1.22947)	Top-1 acc 57.812 (56.996)	Top-5 acc 78.516 (78.773)	lr 0.01766
Train [44][2380/3239]	Time 0.225 (0.590)	Data Time 0.001 (0.017)	Loss 3.0164 (2.7902)	Entropy 1.22546 (1.22945)	Top-1 acc 51.562 (56.992)	Top-5 acc 75.781 (78.771)	lr 0.01766
Train [44][2390/3239]	Time 0.211 (0.590)	Data Time 0.001 (0.017)	Loss 3.1315 (2.7905)	Entropy 1.22543 (1.22943)	Top-1 acc 51.172 (56.994)	Top-5 acc 70.312 (78.766)	lr 0.01766
Train [44][2400/3239]	Time 0.158 (0.589)	Data Time 0.001 (0.017)	Loss 2.6721 (2.7906)	Entropy 1.22544 (1.22942)	Top-1 acc 58.984 (56.988)	Top-5 acc 81.250 (78.764)	lr 0.01766
Train [44][2410/3239]	Time 0.210 (0.589)	Data Time 0.001 (0.017)	Loss 3.1159 (2.7906)	Entropy 1.22537 (1.22940)	Top-1 acc 51.172 (56.986)	Top-5 acc 75.000 (78.763)	lr 0.01766
Train [44][2420/3239]	Time 0.249 (0.588)	Data Time 0.001 (0.017)	Loss 3.0757 (2.7906)	Entropy 1.22538 (1.22938)	Top-1 acc 50.391 (56.985)	Top-5 acc 75.391 (78.762)	lr 0.01766
Train [44][2430/3239]	Time 2.359 (0.588)	Data Time 0.001 (0.017)	Loss 3.0559 (2.7909)	Entropy 1.22538 (1.22937)	Top-1 acc 50.391 (56.983)	Top-5 acc 74.219 (78.756)	lr 0.01766
Train [44][2440/3239]	Time 0.225 (0.586)	Data Time 0.001 (0.017)	Loss 2.7668 (2.7909)	Entropy 1.22536 (1.22935)	Top-1 acc 60.547 (56.980)	Top-5 acc 76.953 (78.751)	lr 0.01766
Train [44][2450/3239]	Time 0.222 (0.586)	Data Time 0.001 (0.017)	Loss 2.7274 (2.7908)	Entropy 1.22524 (1.22933)	Top-1 acc 56.641 (56.985)	Top-5 acc 80.469 (78.752)	lr 0.01766
Train [44][2460/3239]	Time 0.211 (0.585)	Data Time 0.001 (0.016)	Loss 2.9105 (2.7908)	Entropy 1.22513 (1.22932)	Top-1 acc 52.344 (56.981)	Top-5 acc 74.609 (78.748)	lr 0.01766
Train [44][2470/3239]	Time 0.218 (0.585)	Data Time 0.001 (0.016)	Loss 2.9709 (2.7909)	Entropy 1.22513 (1.22930)	Top-1 acc 51.953 (56.976)	Top-5 acc 75.391 (78.747)	lr 0.01766
Train [44][2480/3239]	Time 0.211 (0.584)	Data Time 0.001 (0.016)	Loss 2.7125 (2.7908)	Entropy 1.22513 (1.22928)	Top-1 acc 58.594 (56.979)	Top-5 acc 79.688 (78.749)	lr 0.01765
Train [44][2490/3239]	Time 0.173 (0.584)	Data Time 0.001 (0.016)	Loss 2.7943 (2.7904)	Entropy 1.22512 (1.22927)	Top-1 acc 57.812 (56.985)	Top-5 acc 78.906 (78.757)	lr 0.01765
Train [44][2500/3239]	Time 0.229 (0.583)	Data Time 0.001 (0.016)	Loss 3.0116 (2.7906)	Entropy 1.22506 (1.22925)	Top-1 acc 52.344 (56.981)	Top-5 acc 73.438 (78.751)	lr 0.01765
Train [44][2510/3239]	Time 0.216 (0.583)	Data Time 0.001 (0.016)	Loss 2.8182 (2.7908)	Entropy 1.22498 (1.22923)	Top-1 acc 54.688 (56.981)	Top-5 acc 76.172 (78.749)	lr 0.01765
Train [44][2520/3239]	Time 0.226 (0.582)	Data Time 0.002 (0.016)	Loss 2.7853 (2.7907)	Entropy 1.22490 (1.22922)	Top-1 acc 59.375 (56.980)	Top-5 acc 80.859 (78.751)	lr 0.01765
Train [44][2530/3239]	Time 0.285 (0.582)	Data Time 0.001 (0.016)	Loss 2.9842 (2.7907)	Entropy 1.22487 (1.22920)	Top-1 acc 53.125 (56.981)	Top-5 acc 76.172 (78.751)	lr 0.01765
Train [44][2540/3239]	Time 2.557 (0.581)	Data Time 0.001 (0.016)	Loss 2.8608 (2.7906)	Entropy 1.22487 (1.22918)	Top-1 acc 57.812 (56.982)	Top-5 acc 76.562 (78.752)	lr 0.01765
Train [44][2550/3239]	Time 0.264 (0.580)	Data Time 0.002 (0.016)	Loss 2.5712 (2.7904)	Entropy 1.22481 (1.22916)	Top-1 acc 57.812 (56.982)	Top-5 acc 82.812 (78.756)	lr 0.01765
Train [44][2560/3239]	Time 0.215 (0.579)	Data Time 0.001 (0.016)	Loss 2.6742 (2.7908)	Entropy 1.22477 (1.22915)	Top-1 acc 58.984 (56.974)	Top-5 acc 81.641 (78.753)	lr 0.01765
Train [44][2570/3239]	Time 0.209 (0.579)	Data Time 0.001 (0.016)	Loss 2.9291 (2.7908)	Entropy 1.22472 (1.22913)	Top-1 acc 55.469 (56.972)	Top-5 acc 78.125 (78.754)	lr 0.01765
Train [44][2580/3239]	Time 0.227 (0.578)	Data Time 0.002 (0.016)	Loss 2.7749 (2.7906)	Entropy 1.22467 (1.22911)	Top-1 acc 60.547 (56.977)	Top-5 acc 81.250 (78.761)	lr 0.01765
Train [44][2590/3239]	Time 0.221 (0.578)	Data Time 0.001 (0.016)	Loss 2.8367 (2.7906)	Entropy 1.22462 (1.22909)	Top-1 acc 58.594 (56.979)	Top-5 acc 77.734 (78.759)	lr 0.01764
Train [44][2600/3239]	Time 0.322 (0.577)	Data Time 0.002 (0.016)	Loss 2.7883 (2.7906)	Entropy 1.22461 (1.22908)	Top-1 acc 62.109 (56.983)	Top-5 acc 80.078 (78.760)	lr 0.01764
Train [44][2610/3239]	Time 0.226 (0.577)	Data Time 0.001 (0.016)	Loss 2.7749 (2.7905)	Entropy 1.22455 (1.22906)	Top-1 acc 57.812 (56.984)	Top-5 acc 79.688 (78.761)	lr 0.01764
Train [44][2620/3239]	Time 0.293 (0.593)	Data Time 0.004 (0.016)	Loss 2.7401 (2.7904)	Entropy 1.22452 (1.22904)	Top-1 acc 57.031 (56.986)	Top-5 acc 79.688 (78.765)	lr 0.01764
Train [44][2630/3239]	Time 0.228 (0.593)	Data Time 0.009 (0.016)	Loss 2.7910 (2.7904)	Entropy 1.22449 (1.22903)	Top-1 acc 55.469 (56.984)	Top-5 acc 77.734 (78.765)	lr 0.01764
Train [44][2640/3239]	Time 0.192 (0.593)	Data Time 0.001 (0.015)	Loss 2.8436 (2.7906)	Entropy 1.22447 (1.22901)	Top-1 acc 56.250 (56.980)	Top-5 acc 78.906 (78.760)	lr 0.01764
Train [44][2650/3239]	Time 0.341 (0.592)	Data Time 0.002 (0.015)	Loss 2.9387 (2.7907)	Entropy 1.22440 (1.22899)	Top-1 acc 53.906 (56.974)	Top-5 acc 77.734 (78.757)	lr 0.01764
Train [44][2660/3239]	Time 0.223 (0.592)	Data Time 0.001 (0.015)	Loss 2.8762 (2.7906)	Entropy 1.22433 (1.22897)	Top-1 acc 55.859 (56.976)	Top-5 acc 78.906 (78.757)	lr 0.01764
Train [44][2670/3239]	Time 0.258 (0.591)	Data Time 0.001 (0.015)	Loss 2.9616 (2.7907)	Entropy 1.22430 (1.22896)	Top-1 acc 53.906 (56.971)	Top-5 acc 75.781 (78.756)	lr 0.01764
Train [44][2680/3239]	Time 0.265 (0.591)	Data Time 0.001 (0.015)	Loss 2.7471 (2.7905)	Entropy 1.22429 (1.22894)	Top-1 acc 60.156 (56.974)	Top-5 acc 80.469 (78.759)	lr 0.01764
Train [44][2690/3239]	Time 0.228 (0.590)	Data Time 0.001 (0.015)	Loss 2.7871 (2.7904)	Entropy 1.22425 (1.22892)	Top-1 acc 55.859 (56.977)	Top-5 acc 79.688 (78.761)	lr 0.01763
Train [44][2700/3239]	Time 0.323 (0.590)	Data Time 0.001 (0.015)	Loss 2.8278 (2.7906)	Entropy 1.22410 (1.22891)	Top-1 acc 56.250 (56.974)	Top-5 acc 78.516 (78.755)	lr 0.01763
Train [44][2710/3239]	Time 0.269 (0.589)	Data Time 0.001 (0.015)	Loss 2.7402 (2.7905)	Entropy 1.22405 (1.22889)	Top-1 acc 58.984 (56.974)	Top-5 acc 78.125 (78.756)	lr 0.01763
Train [44][2720/3239]	Time 0.168 (0.589)	Data Time 0.001 (0.015)	Loss 2.7924 (2.7902)	Entropy 1.22402 (1.22887)	Top-1 acc 56.250 (56.984)	Top-5 acc 78.906 (78.762)	lr 0.01763
Train [44][2730/3239]	Time 0.231 (0.588)	Data Time 0.002 (0.015)	Loss 2.9288 (2.7901)	Entropy 1.22397 (1.22885)	Top-1 acc 53.125 (56.984)	Top-5 acc 77.344 (78.763)	lr 0.01763
Train [44][2740/3239]	Time 0.258 (0.588)	Data Time 0.001 (0.015)	Loss 2.9041 (2.7902)	Entropy 1.22394 (1.22883)	Top-1 acc 55.078 (56.982)	Top-5 acc 76.953 (78.760)	lr 0.01763
Train [44][2750/3239]	Time 0.321 (0.587)	Data Time 0.001 (0.015)	Loss 2.6608 (2.7902)	Entropy 1.22395 (1.22882)	Top-1 acc 60.938 (56.985)	Top-5 acc 80.859 (78.760)	lr 0.01763
Train [44][2760/3239]	Time 0.243 (0.587)	Data Time 0.001 (0.015)	Loss 2.7972 (2.7902)	Entropy 1.22387 (1.22880)	Top-1 acc 58.203 (56.986)	Top-5 acc 78.125 (78.761)	lr 0.01763
Train [44][2770/3239]	Time 0.235 (0.586)	Data Time 0.002 (0.015)	Loss 2.9016 (2.7903)	Entropy 1.22384 (1.22878)	Top-1 acc 52.734 (56.986)	Top-5 acc 77.344 (78.759)	lr 0.01763
Train [44][2780/3239]	Time 0.226 (0.586)	Data Time 0.001 (0.015)	Loss 2.8967 (2.7903)	Entropy 1.22376 (1.22876)	Top-1 acc 51.953 (56.988)	Top-5 acc 76.562 (78.754)	lr 0.01763
Train [44][2790/3239]	Time 0.215 (0.585)	Data Time 0.001 (0.015)	Loss 2.9169 (2.7906)	Entropy 1.22368 (1.22874)	Top-1 acc 53.125 (56.984)	Top-5 acc 76.953 (78.748)	lr 0.01763
Train [44][2800/3239]	Time 0.333 (0.585)	Data Time 0.002 (0.015)	Loss 2.8762 (2.7905)	Entropy 1.22367 (1.22873)	Top-1 acc 56.641 (56.986)	Top-5 acc 77.344 (78.750)	lr 0.01762
Train [44][2810/3239]	Time 0.241 (0.585)	Data Time 0.001 (0.015)	Loss 2.6696 (2.7906)	Entropy 1.22367 (1.22871)	Top-1 acc 58.203 (56.985)	Top-5 acc 81.250 (78.752)	lr 0.01762
Train [44][2820/3239]	Time 0.237 (0.584)	Data Time 0.001 (0.015)	Loss 2.8828 (2.7905)	Entropy 1.22365 (1.22869)	Top-1 acc 57.812 (56.985)	Top-5 acc 75.781 (78.752)	lr 0.01762
Train [44][2830/3239]	Time 0.221 (0.584)	Data Time 0.001 (0.015)	Loss 2.5305 (2.7905)	Entropy 1.22364 (1.22867)	Top-1 acc 61.719 (56.988)	Top-5 acc 85.547 (78.751)	lr 0.01762
Train [44][2840/3239]	Time 0.246 (0.583)	Data Time 0.001 (0.015)	Loss 2.8948 (2.7907)	Entropy 1.22360 (1.22866)	Top-1 acc 55.859 (56.981)	Top-5 acc 79.688 (78.746)	lr 0.01762
Train [44][2850/3239]	Time 0.307 (0.583)	Data Time 0.001 (0.014)	Loss 2.8626 (2.7907)	Entropy 1.22346 (1.22864)	Top-1 acc 55.078 (56.981)	Top-5 acc 78.906 (78.750)	lr 0.01762
Train [44][2860/3239]	Time 0.257 (0.582)	Data Time 0.001 (0.014)	Loss 2.8662 (2.7908)	Entropy 1.22340 (1.22862)	Top-1 acc 54.688 (56.975)	Top-5 acc 76.562 (78.747)	lr 0.01762
Train [44][2870/3239]	Time 0.252 (0.582)	Data Time 0.001 (0.014)	Loss 2.8256 (2.7910)	Entropy 1.22342 (1.22860)	Top-1 acc 55.469 (56.973)	Top-5 acc 75.000 (78.745)	lr 0.01762
Train [44][2880/3239]	Time 0.249 (0.582)	Data Time 0.001 (0.014)	Loss 2.6728 (2.7909)	Entropy 1.22346 (1.22858)	Top-1 acc 59.375 (56.973)	Top-5 acc 81.641 (78.747)	lr 0.01762
Train [44][2890/3239]	Time 0.248 (0.581)	Data Time 0.001 (0.014)	Loss 2.7748 (2.7911)	Entropy 1.22344 (1.22857)	Top-1 acc 57.422 (56.969)	Top-5 acc 81.641 (78.744)	lr 0.01762
Train [44][2900/3239]	Time 0.216 (0.581)	Data Time 0.002 (0.014)	Loss 2.7081 (2.7909)	Entropy 1.22342 (1.22855)	Top-1 acc 57.812 (56.971)	Top-5 acc 82.031 (78.750)	lr 0.01762
Train [44][2910/3239]	Time 0.248 (0.580)	Data Time 0.001 (0.014)	Loss 2.8308 (2.7908)	Entropy 1.22340 (1.22853)	Top-1 acc 58.984 (56.973)	Top-5 acc 77.734 (78.752)	lr 0.01761
Train [44][2920/3239]	Time 0.172 (0.580)	Data Time 0.001 (0.014)	Loss 2.7832 (2.7909)	Entropy 1.22340 (1.22851)	Top-1 acc 57.031 (56.971)	Top-5 acc 79.688 (78.748)	lr 0.01761
Train [44][2930/3239]	Time 0.225 (0.580)	Data Time 0.001 (0.014)	Loss 2.7761 (2.7910)	Entropy 1.22334 (1.22850)	Top-1 acc 58.594 (56.969)	Top-5 acc 80.078 (78.751)	lr 0.01761
Train [44][2940/3239]	Time 0.214 (0.579)	Data Time 0.001 (0.014)	Loss 2.6200 (2.7910)	Entropy 1.22321 (1.22848)	Top-1 acc 60.938 (56.971)	Top-5 acc 82.812 (78.751)	lr 0.01761
Train [44][2950/3239]	Time 0.228 (0.595)	Data Time 0.003 (0.014)	Loss 2.6917 (2.7912)	Entropy 1.22312 (1.22846)	Top-1 acc 58.594 (56.963)	Top-5 acc 83.594 (78.746)	lr 0.01761
Train [44][2960/3239]	Time 0.257 (0.594)	Data Time 0.002 (0.014)	Loss 2.7550 (2.7911)	Entropy 1.22309 (1.22844)	Top-1 acc 57.812 (56.963)	Top-5 acc 82.422 (78.748)	lr 0.01761
Train [44][2970/3239]	Time 0.230 (0.594)	Data Time 0.001 (0.014)	Loss 2.6892 (2.7912)	Entropy 1.22304 (1.22842)	Top-1 acc 58.594 (56.958)	Top-5 acc 82.031 (78.746)	lr 0.01761
Train [44][2980/3239]	Time 0.231 (0.593)	Data Time 0.002 (0.014)	Loss 2.5514 (2.7911)	Entropy 1.22293 (1.22841)	Top-1 acc 60.938 (56.959)	Top-5 acc 83.203 (78.749)	lr 0.01761
Train [44][2990/3239]	Time 0.232 (0.593)	Data Time 0.001 (0.014)	Loss 2.6240 (2.7910)	Entropy 1.22296 (1.22839)	Top-1 acc 58.984 (56.959)	Top-5 acc 83.984 (78.754)	lr 0.01761
Train [44][3000/3239]	Time 0.216 (0.593)	Data Time 0.001 (0.014)	Loss 2.9261 (2.7911)	Entropy 1.22295 (1.22837)	Top-1 acc 55.469 (56.957)	Top-5 acc 76.172 (78.754)	lr 0.01761
Train [44][3010/3239]	Time 0.230 (0.592)	Data Time 0.001 (0.014)	Loss 2.8923 (2.7912)	Entropy 1.22245 (1.22835)	Top-1 acc 57.812 (56.960)	Top-5 acc 75.391 (78.751)	lr 0.01761
Train [44][3020/3239]	Time 0.261 (0.592)	Data Time 0.002 (0.014)	Loss 2.9152 (2.7912)	Entropy 1.22241 (1.22833)	Top-1 acc 53.125 (56.960)	Top-5 acc 76.172 (78.751)	lr 0.01760
Train [44][3030/3239]	Time 0.202 (0.591)	Data Time 0.001 (0.014)	Loss 2.7042 (2.7913)	Entropy 1.22237 (1.22831)	Top-1 acc 58.594 (56.957)	Top-5 acc 80.469 (78.748)	lr 0.01760
Train [44][3040/3239]	Time 0.245 (0.591)	Data Time 0.001 (0.014)	Loss 2.7608 (2.7913)	Entropy 1.22235 (1.22829)	Top-1 acc 59.375 (56.953)	Top-5 acc 78.906 (78.749)	lr 0.01760
Train [44][3050/3239]	Time 0.218 (0.590)	Data Time 0.001 (0.014)	Loss 2.7648 (2.7912)	Entropy 1.22232 (1.22827)	Top-1 acc 58.203 (56.956)	Top-5 acc 80.859 (78.751)	lr 0.01760
Train [44][3060/3239]	Time 0.268 (0.590)	Data Time 0.001 (0.014)	Loss 2.6690 (2.7912)	Entropy 1.22232 (1.22825)	Top-1 acc 58.594 (56.953)	Top-5 acc 84.766 (78.753)	lr 0.01760
Train [44][3070/3239]	Time 0.229 (0.590)	Data Time 0.001 (0.014)	Loss 2.7955 (2.7913)	Entropy 1.22228 (1.22823)	Top-1 acc 58.203 (56.954)	Top-5 acc 78.906 (78.752)	lr 0.01760
Train [44][3080/3239]	Time 0.274 (0.589)	Data Time 0.001 (0.014)	Loss 2.8336 (2.7915)	Entropy 1.22224 (1.22821)	Top-1 acc 55.859 (56.948)	Top-5 acc 76.562 (78.748)	lr 0.01760
Train [44][3090/3239]	Time 0.384 (0.589)	Data Time 0.001 (0.014)	Loss 2.7361 (2.7914)	Entropy 1.22224 (1.22820)	Top-1 acc 57.031 (56.950)	Top-5 acc 81.250 (78.750)	lr 0.01760
Train [44][3100/3239]	Time 0.236 (0.588)	Data Time 0.001 (0.013)	Loss 2.8966 (2.7917)	Entropy 1.22224 (1.22818)	Top-1 acc 54.297 (56.950)	Top-5 acc 78.516 (78.745)	lr 0.01760
Train [44][3110/3239]	Time 0.259 (0.588)	Data Time 0.001 (0.013)	Loss 2.8943 (2.7918)	Entropy 1.22220 (1.22816)	Top-1 acc 52.734 (56.947)	Top-5 acc 75.781 (78.743)	lr 0.01760
Train [44][3120/3239]	Time 0.250 (0.588)	Data Time 0.001 (0.013)	Loss 2.8771 (2.7918)	Entropy 1.22214 (1.22814)	Top-1 acc 52.734 (56.946)	Top-5 acc 79.297 (78.742)	lr 0.01760
Train [44][3130/3239]	Time 0.222 (0.587)	Data Time 0.001 (0.013)	Loss 2.6256 (2.7917)	Entropy 1.22214 (1.22812)	Top-1 acc 62.500 (56.955)	Top-5 acc 82.812 (78.745)	lr 0.01759
Train [44][3140/3239]	Time 0.214 (0.587)	Data Time 0.001 (0.013)	Loss 2.8088 (2.7918)	Entropy 1.22215 (1.22810)	Top-1 acc 53.906 (56.949)	Top-5 acc 79.688 (78.745)	lr 0.01759
Train [44][3150/3239]	Time 0.231 (0.586)	Data Time 0.001 (0.013)	Loss 2.8779 (2.7918)	Entropy 1.22211 (1.22808)	Top-1 acc 55.078 (56.952)	Top-5 acc 78.906 (78.749)	lr 0.01759
Train [44][3160/3239]	Time 0.231 (0.586)	Data Time 0.001 (0.013)	Loss 2.9436 (2.7918)	Entropy 1.22204 (1.22806)	Top-1 acc 55.078 (56.952)	Top-5 acc 73.047 (78.748)	lr 0.01759
Train [44][3170/3239]	Time 0.181 (0.586)	Data Time 0.001 (0.013)	Loss 2.8038 (2.7919)	Entropy 1.22199 (1.22804)	Top-1 acc 54.688 (56.954)	Top-5 acc 77.734 (78.747)	lr 0.01759
Train [44][3180/3239]	Time 0.205 (0.585)	Data Time 0.000 (0.013)	Loss 2.8040 (2.7921)	Entropy 1.22193 (1.22802)	Top-1 acc 54.297 (56.948)	Top-5 acc 77.734 (78.744)	lr 0.01759
Train [44][3190/3239]	Time 0.211 (0.585)	Data Time 0.000 (0.013)	Loss 2.6366 (2.7920)	Entropy 1.22192 (1.22800)	Top-1 acc 59.375 (56.948)	Top-5 acc 81.250 (78.744)	lr 0.01759
Train [44][3200/3239]	Time 0.213 (0.584)	Data Time 0.000 (0.013)	Loss 2.8298 (2.7920)	Entropy 1.22185 (1.22799)	Top-1 acc 55.469 (56.951)	Top-5 acc 76.953 (78.744)	lr 0.01759
Train [44][3210/3239]	Time 0.227 (0.584)	Data Time 0.000 (0.013)	Loss 2.7489 (2.7921)	Entropy 1.22182 (1.22797)	Top-1 acc 58.984 (56.950)	Top-5 acc 80.469 (78.743)	lr 0.01759
Train [44][3220/3239]	Time 0.207 (0.583)	Data Time 0.000 (0.013)	Loss 2.8608 (2.7922)	Entropy 1.22180 (1.22795)	Top-1 acc 52.734 (56.947)	Top-5 acc 78.516 (78.740)	lr 0.01759
Train [44][3230/3239]	Time 0.205 (0.583)	Data Time 0.000 (0.013)	Loss 2.7292 (2.7923)	Entropy 1.22175 (1.22793)	Top-1 acc 59.766 (56.946)	Top-5 acc 81.641 (78.739)	lr 0.01759
Train [44][3239/3239]	Time 2.263 (0.582)	Data Time 0.000 (0.013)	Loss 3.1540 (2.7923)	Entropy 1.22175 (1.22791)	Top-1 acc 50.617 (56.948)	Top-5 acc 72.840 (78.741)	lr 0.01758
==========Valid [44/120]	loss 1.631	top-1 acc 63.496 (63.496)	top-5 acc 84.341	Train top-1 56.948	top-5 78.741	Entropy 1.22175	Latency-None: 0.000ms	Flops: 548.34M
Train [45][0/3239]	Time 33.632 (33.632)	Data Time 31.991 (31.991)	Loss 2.6147 (2.6147)	Entropy 1.22193 (1.22193)	Top-1 acc 58.594 (58.594)	Top-5 acc 82.422 (82.422)	lr 0.01758
Train [45][10/3239]	Time 2.697 (3.614)	Data Time 0.002 (2.912)	Loss 2.6220 (2.7356)	Entropy 1.22193 (1.22193)	Top-1 acc 63.281 (58.878)	Top-5 acc 82.031 (79.759)	lr 0.01758
Train [45][20/3239]	Time 0.256 (2.006)	Data Time 0.001 (1.526)	Loss 2.9866 (2.7234)	Entropy 1.22187 (1.22190)	Top-1 acc 50.000 (58.501)	Top-5 acc 74.609 (79.985)	lr 0.01758
Train [45][30/3239]	Time 0.249 (1.509)	Data Time 0.001 (1.034)	Loss 2.5782 (2.7554)	Entropy 1.22182 (1.22188)	Top-1 acc 60.156 (57.674)	Top-5 acc 82.422 (79.309)	lr 0.01758
Train [45][40/3239]	Time 0.225 (1.256)	Data Time 0.001 (0.782)	Loss 2.6386 (2.7508)	Entropy 1.22177 (1.22185)	Top-1 acc 57.031 (57.736)	Top-5 acc 82.031 (79.306)	lr 0.01758
Train [45][50/3239]	Time 0.318 (2.123)	Data Time 0.002 (0.629)	Loss 2.7498 (2.7478)	Entropy 1.22174 (1.22183)	Top-1 acc 57.812 (57.767)	Top-5 acc 78.125 (79.488)	lr 0.01758
Train [45][60/3239]	Time 0.238 (1.852)	Data Time 0.002 (0.527)	Loss 2.7375 (2.7494)	Entropy 1.22164 (1.22181)	Top-1 acc 58.203 (57.736)	Top-5 acc 79.297 (79.495)	lr 0.01758
Train [45][70/3239]	Time 0.206 (1.655)	Data Time 0.001 (0.453)	Loss 2.8116 (2.7497)	Entropy 1.22165 (1.22179)	Top-1 acc 60.156 (57.735)	Top-5 acc 79.297 (79.561)	lr 0.01758
Train [45][80/3239]	Time 0.216 (1.506)	Data Time 0.001 (0.397)	Loss 2.8026 (2.7484)	Entropy 1.22164 (1.22177)	Top-1 acc 52.734 (57.784)	Top-5 acc 77.344 (79.543)	lr 0.01758
Train [45][90/3239]	Time 0.224 (1.391)	Data Time 0.001 (0.354)	Loss 2.7057 (2.7442)	Entropy 1.22158 (1.22175)	Top-1 acc 55.859 (57.885)	Top-5 acc 82.031 (79.563)	lr 0.01758
Train [45][100/3239]	Time 0.312 (1.300)	Data Time 0.001 (0.319)	Loss 2.7856 (2.7489)	Entropy 1.22157 (1.22173)	Top-1 acc 57.422 (57.816)	Top-5 acc 77.734 (79.467)	lr 0.01757
Train [45][110/3239]	Time 0.247 (1.225)	Data Time 0.001 (0.290)	Loss 2.6474 (2.7521)	Entropy 1.22151 (1.22172)	Top-1 acc 58.984 (57.749)	Top-5 acc 80.469 (79.417)	lr 0.01757
Train [45][120/3239]	Time 2.396 (1.160)	Data Time 0.001 (0.266)	Loss 2.7752 (2.7512)	Entropy 1.22151 (1.22170)	Top-1 acc 58.984 (57.848)	Top-5 acc 76.953 (79.397)	lr 0.01757
Train [45][130/3239]	Time 0.234 (1.089)	Data Time 0.001 (0.246)	Loss 2.8887 (2.7537)	Entropy 1.22151 (1.22169)	Top-1 acc 53.125 (57.789)	Top-5 acc 78.125 (79.357)	lr 0.01757
Train [45][140/3239]	Time 0.231 (1.044)	Data Time 0.001 (0.229)	Loss 2.6764 (2.7517)	Entropy 1.22150 (1.22168)	Top-1 acc 58.984 (57.846)	Top-5 acc 81.250 (79.471)	lr 0.01757
Train [45][150/3239]	Time 0.301 (1.005)	Data Time 0.001 (0.214)	Loss 2.6747 (2.7523)	Entropy 1.22145 (1.22166)	Top-1 acc 56.641 (57.849)	Top-5 acc 80.078 (79.491)	lr 0.01757
Train [45][160/3239]	Time 0.208 (0.971)	Data Time 0.001 (0.201)	Loss 2.7542 (2.7558)	Entropy 1.22143 (1.22165)	Top-1 acc 59.375 (57.788)	Top-5 acc 78.125 (79.389)	lr 0.01757
Train [45][170/3239]	Time 0.221 (0.941)	Data Time 0.001 (0.189)	Loss 2.6927 (2.7571)	Entropy 1.22135 (1.22163)	Top-1 acc 57.422 (57.767)	Top-5 acc 81.250 (79.372)	lr 0.01757
Train [45][180/3239]	Time 0.211 (0.913)	Data Time 0.001 (0.179)	Loss 2.7531 (2.7564)	Entropy 1.22133 (1.22162)	Top-1 acc 57.031 (57.784)	Top-5 acc 80.859 (79.407)	lr 0.01757
Train [45][190/3239]	Time 0.225 (0.889)	Data Time 0.001 (0.169)	Loss 3.0147 (2.7586)	Entropy 1.22130 (1.22160)	Top-1 acc 51.172 (57.673)	Top-5 acc 74.219 (79.352)	lr 0.01757
Train [45][200/3239]	Time 0.336 (0.867)	Data Time 0.001 (0.161)	Loss 2.7259 (2.7593)	Entropy 1.22124 (1.22158)	Top-1 acc 58.594 (57.657)	Top-5 acc 80.078 (79.351)	lr 0.01757
Train [45][210/3239]	Time 0.248 (0.848)	Data Time 0.002 (0.153)	Loss 2.6451 (2.7593)	Entropy 1.22122 (1.22157)	Top-1 acc 58.203 (57.638)	Top-5 acc 82.812 (79.367)	lr 0.01756
Train [45][220/3239]	Time 0.257 (0.832)	Data Time 0.002 (0.147)	Loss 2.7775 (2.7608)	Entropy 1.22121 (1.22155)	Top-1 acc 56.250 (57.597)	Top-5 acc 79.688 (79.348)	lr 0.01756
Train [45][230/3239]	Time 2.575 (0.817)	Data Time 0.002 (0.140)	Loss 2.8196 (2.7590)	Entropy 1.22121 (1.22154)	Top-1 acc 58.203 (57.618)	Top-5 acc 78.516 (79.395)	lr 0.01756
Train [45][240/3239]	Time 0.240 (0.793)	Data Time 0.001 (0.134)	Loss 2.7062 (2.7609)	Entropy 1.22114 (1.22152)	Top-1 acc 62.109 (57.556)	Top-5 acc 79.688 (79.341)	lr 0.01756
Train [45][250/3239]	Time 0.337 (0.781)	Data Time 0.001 (0.129)	Loss 2.6787 (2.7585)	Entropy 1.22110 (1.22150)	Top-1 acc 58.984 (57.629)	Top-5 acc 80.469 (79.373)	lr 0.01756
Train [45][260/3239]	Time 0.218 (0.768)	Data Time 0.001 (0.124)	Loss 2.8123 (2.7590)	Entropy 1.22110 (1.22149)	Top-1 acc 55.078 (57.613)	Top-5 acc 78.516 (79.357)	lr 0.01756
Train [45][270/3239]	Time 0.213 (0.757)	Data Time 0.001 (0.120)	Loss 2.8279 (2.7594)	Entropy 1.22109 (1.22147)	Top-1 acc 55.078 (57.612)	Top-5 acc 78.516 (79.333)	lr 0.01756
Train [45][280/3239]	Time 0.225 (0.746)	Data Time 0.001 (0.116)	Loss 2.7235 (2.7585)	Entropy 1.22110 (1.22146)	Top-1 acc 55.859 (57.608)	Top-5 acc 80.469 (79.337)	lr 0.01756
Train [45][290/3239]	Time 0.258 (0.735)	Data Time 0.001 (0.112)	Loss 2.8188 (2.7596)	Entropy 1.22112 (1.22145)	Top-1 acc 55.469 (57.579)	Top-5 acc 76.953 (79.296)	lr 0.01756
Train [45][300/3239]	Time 0.313 (0.726)	Data Time 0.001 (0.108)	Loss 2.8281 (2.7608)	Entropy 1.22108 (1.22144)	Top-1 acc 56.641 (57.559)	Top-5 acc 78.906 (79.283)	lr 0.01756
Train [45][310/3239]	Time 0.230 (0.718)	Data Time 0.001 (0.105)	Loss 2.6948 (2.7616)	Entropy 1.22104 (1.22142)	Top-1 acc 57.812 (57.540)	Top-5 acc 80.859 (79.272)	lr 0.01756
Train [45][320/3239]	Time 0.217 (0.709)	Data Time 0.001 (0.101)	Loss 2.6961 (2.7631)	Entropy 1.22103 (1.22141)	Top-1 acc 60.547 (57.511)	Top-5 acc 80.078 (79.252)	lr 0.01755
Train [45][330/3239]	Time 0.247 (0.702)	Data Time 0.001 (0.098)	Loss 2.9199 (2.7635)	Entropy 1.22096 (1.22140)	Top-1 acc 53.516 (57.523)	Top-5 acc 74.609 (79.239)	lr 0.01755
Train [45][340/3239]	Time 2.568 (0.695)	Data Time 0.001 (0.096)	Loss 2.7069 (2.7630)	Entropy 1.22096 (1.22139)	Top-1 acc 59.766 (57.554)	Top-5 acc 81.641 (79.248)	lr 0.01755
Train [45][350/3239]	Time 0.221 (0.682)	Data Time 0.001 (0.093)	Loss 2.7642 (2.7612)	Entropy 1.22092 (1.22137)	Top-1 acc 59.766 (57.630)	Top-5 acc 80.078 (79.270)	lr 0.01755
Train [45][360/3239]	Time 0.218 (0.675)	Data Time 0.001 (0.090)	Loss 2.7909 (2.7631)	Entropy 1.22088 (1.22136)	Top-1 acc 54.297 (57.577)	Top-5 acc 78.516 (79.228)	lr 0.01755
Train [45][370/3239]	Time 0.221 (0.669)	Data Time 0.001 (0.088)	Loss 2.7163 (2.7632)	Entropy 1.22081 (1.22135)	Top-1 acc 59.766 (57.615)	Top-5 acc 79.688 (79.233)	lr 0.01755
Train [45][380/3239]	Time 0.258 (0.664)	Data Time 0.002 (0.086)	Loss 2.5825 (2.7629)	Entropy 1.22078 (1.22133)	Top-1 acc 63.672 (57.636)	Top-5 acc 83.984 (79.240)	lr 0.01755
Train [45][390/3239]	Time 0.222 (0.658)	Data Time 0.001 (0.084)	Loss 2.7397 (2.7608)	Entropy 1.22074 (1.22132)	Top-1 acc 54.297 (57.655)	Top-5 acc 78.906 (79.281)	lr 0.01755
Train [45][400/3239]	Time 0.226 (0.653)	Data Time 0.002 (0.081)	Loss 2.7383 (2.7597)	Entropy 1.22072 (1.22130)	Top-1 acc 56.250 (57.704)	Top-5 acc 78.906 (79.299)	lr 0.01755
Train [45][410/3239]	Time 0.220 (0.767)	Data Time 0.002 (0.080)	Loss 2.8314 (2.7594)	Entropy 1.22070 (1.22129)	Top-1 acc 57.422 (57.727)	Top-5 acc 78.516 (79.299)	lr 0.01755
Train [45][420/3239]	Time 0.210 (0.760)	Data Time 0.002 (0.078)	Loss 2.8341 (2.7610)	Entropy 1.22060 (1.22127)	Top-1 acc 55.859 (57.695)	Top-5 acc 79.297 (79.267)	lr 0.01755
Train [45][430/3239]	Time 0.224 (0.753)	Data Time 0.002 (0.076)	Loss 2.6547 (2.7598)	Entropy 1.22051 (1.22126)	Top-1 acc 57.812 (57.719)	Top-5 acc 80.859 (79.300)	lr 0.01754
Train [45][440/3239]	Time 0.233 (0.747)	Data Time 0.002 (0.074)	Loss 2.8362 (2.7614)	Entropy 1.22045 (1.22124)	Top-1 acc 56.250 (57.673)	Top-5 acc 79.688 (79.279)	lr 0.01754
Train [45][450/3239]	Time 2.531 (0.740)	Data Time 0.001 (0.073)	Loss 2.7354 (2.7614)	Entropy 1.22045 (1.22122)	Top-1 acc 60.156 (57.654)	Top-5 acc 78.516 (79.264)	lr 0.01754
Train [45][460/3239]	Time 0.197 (0.729)	Data Time 0.001 (0.071)	Loss 2.6347 (2.7595)	Entropy 1.22038 (1.22120)	Top-1 acc 60.547 (57.688)	Top-5 acc 81.641 (79.291)	lr 0.01754
Train [45][470/3239]	Time 0.204 (0.723)	Data Time 0.001 (0.070)	Loss 2.9975 (2.7614)	Entropy 1.22040 (1.22119)	Top-1 acc 54.297 (57.643)	Top-5 acc 76.953 (79.264)	lr 0.01754
Train [45][480/3239]	Time 0.212 (0.718)	Data Time 0.001 (0.068)	Loss 2.5368 (2.7604)	Entropy 1.22037 (1.22117)	Top-1 acc 64.062 (57.665)	Top-5 acc 81.641 (79.276)	lr 0.01754
Train [45][490/3239]	Time 0.222 (0.712)	Data Time 0.002 (0.067)	Loss 2.8255 (2.7609)	Entropy 1.22035 (1.22115)	Top-1 acc 57.031 (57.664)	Top-5 acc 75.000 (79.270)	lr 0.01754
Train [45][500/3239]	Time 0.314 (0.707)	Data Time 0.001 (0.066)	Loss 2.7755 (2.7611)	Entropy 1.22033 (1.22114)	Top-1 acc 55.469 (57.642)	Top-5 acc 76.172 (79.268)	lr 0.01754
Train [45][510/3239]	Time 0.231 (0.702)	Data Time 0.002 (0.064)	Loss 2.7537 (2.7608)	Entropy 1.22030 (1.22112)	Top-1 acc 55.469 (57.628)	Top-5 acc 80.859 (79.274)	lr 0.01754
Train [45][520/3239]	Time 0.218 (0.698)	Data Time 0.001 (0.063)	Loss 2.5460 (2.7608)	Entropy 1.22023 (1.22111)	Top-1 acc 64.062 (57.622)	Top-5 acc 85.547 (79.288)	lr 0.01754
Train [45][530/3239]	Time 0.233 (0.693)	Data Time 0.001 (0.062)	Loss 2.7525 (2.7609)	Entropy 1.22020 (1.22109)	Top-1 acc 54.688 (57.619)	Top-5 acc 78.516 (79.289)	lr 0.01754
Train [45][540/3239]	Time 0.215 (0.689)	Data Time 0.001 (0.061)	Loss 2.8125 (2.7618)	Entropy 1.22016 (1.22107)	Top-1 acc 58.203 (57.603)	Top-5 acc 80.078 (79.267)	lr 0.01753
Train [45][550/3239]	Time 0.225 (0.685)	Data Time 0.001 (0.060)	Loss 2.7454 (2.7622)	Entropy 1.22015 (1.22106)	Top-1 acc 58.203 (57.605)	Top-5 acc 76.562 (79.247)	lr 0.01753
Train [45][560/3239]	Time 2.540 (0.681)	Data Time 0.001 (0.059)	Loss 2.8293 (2.7623)	Entropy 1.22015 (1.22104)	Top-1 acc 55.469 (57.597)	Top-5 acc 77.344 (79.250)	lr 0.01753
Train [45][570/3239]	Time 0.243 (0.673)	Data Time 0.002 (0.058)	Loss 2.4974 (2.7608)	Entropy 1.22005 (1.22102)	Top-1 acc 64.844 (57.639)	Top-5 acc 82.422 (79.280)	lr 0.01753
Train [45][580/3239]	Time 0.187 (0.669)	Data Time 0.001 (0.057)	Loss 2.8521 (2.7614)	Entropy 1.22004 (1.22101)	Top-1 acc 54.688 (57.632)	Top-5 acc 79.297 (79.264)	lr 0.01753
Train [45][590/3239]	Time 0.245 (0.665)	Data Time 0.002 (0.056)	Loss 2.6974 (2.7620)	Entropy 1.22002 (1.22099)	Top-1 acc 62.500 (57.611)	Top-5 acc 79.688 (79.247)	lr 0.01753
Train [45][600/3239]	Time 0.211 (0.662)	Data Time 0.001 (0.055)	Loss 2.7285 (2.7619)	Entropy 1.21998 (1.22097)	Top-1 acc 60.156 (57.608)	Top-5 acc 77.734 (79.246)	lr 0.01753
Train [45][610/3239]	Time 0.231 (0.659)	Data Time 0.001 (0.054)	Loss 2.9122 (2.7613)	Entropy 1.21998 (1.22096)	Top-1 acc 52.344 (57.626)	Top-5 acc 78.906 (79.247)	lr 0.01753
Train [45][620/3239]	Time 0.207 (0.656)	Data Time 0.001 (0.053)	Loss 2.7874 (2.7611)	Entropy 1.21994 (1.22094)	Top-1 acc 53.906 (57.632)	Top-5 acc 79.297 (79.253)	lr 0.01753
Train [45][630/3239]	Time 0.195 (0.653)	Data Time 0.001 (0.052)	Loss 2.9167 (2.7615)	Entropy 1.21988 (1.22092)	Top-1 acc 57.031 (57.622)	Top-5 acc 75.000 (79.245)	lr 0.01753
Train [45][640/3239]	Time 0.231 (0.650)	Data Time 0.001 (0.052)	Loss 2.7388 (2.7617)	Entropy 1.21985 (1.22091)	Top-1 acc 56.641 (57.617)	Top-5 acc 80.469 (79.255)	lr 0.01753
Train [45][650/3239]	Time 0.226 (0.647)	Data Time 0.001 (0.051)	Loss 2.9130 (2.7618)	Entropy 1.21984 (1.22089)	Top-1 acc 57.812 (57.637)	Top-5 acc 74.609 (79.260)	lr 0.01752
Train [45][660/3239]	Time 0.269 (0.644)	Data Time 0.001 (0.050)	Loss 2.8220 (2.7631)	Entropy 1.21980 (1.22087)	Top-1 acc 60.938 (57.616)	Top-5 acc 78.125 (79.237)	lr 0.01752
Train [45][670/3239]	Time 2.332 (0.641)	Data Time 0.001 (0.049)	Loss 2.6909 (2.7635)	Entropy 1.21980 (1.22086)	Top-1 acc 60.938 (57.618)	Top-5 acc 80.859 (79.235)	lr 0.01752
Train [45][680/3239]	Time 0.239 (0.635)	Data Time 0.001 (0.049)	Loss 2.7105 (2.7639)	Entropy 1.21974 (1.22084)	Top-1 acc 59.375 (57.634)	Top-5 acc 79.297 (79.228)	lr 0.01752
Train [45][690/3239]	Time 0.332 (0.633)	Data Time 0.001 (0.048)	Loss 2.6329 (2.7640)	Entropy 1.21968 (1.22083)	Top-1 acc 60.938 (57.636)	Top-5 acc 80.078 (79.232)	lr 0.01752
Train [45][700/3239]	Time 0.262 (0.630)	Data Time 0.001 (0.047)	Loss 2.8143 (2.7640)	Entropy 1.21966 (1.22081)	Top-1 acc 57.422 (57.639)	Top-5 acc 78.906 (79.236)	lr 0.01752
Train [45][710/3239]	Time 0.216 (0.628)	Data Time 0.001 (0.047)	Loss 2.8174 (2.7640)	Entropy 1.21961 (1.22079)	Top-1 acc 57.812 (57.641)	Top-5 acc 79.297 (79.234)	lr 0.01752
Train [45][720/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.046)	Loss 2.7250 (2.7631)	Entropy 1.21959 (1.22078)	Top-1 acc 60.156 (57.668)	Top-5 acc 80.469 (79.257)	lr 0.01752
Train [45][730/3239]	Time 0.212 (0.623)	Data Time 0.001 (0.045)	Loss 2.6166 (2.7627)	Entropy 1.21958 (1.22076)	Top-1 acc 63.281 (57.687)	Top-5 acc 81.250 (79.262)	lr 0.01752
Train [45][740/3239]	Time 0.331 (0.621)	Data Time 0.001 (0.045)	Loss 2.7613 (2.7623)	Entropy 1.21951 (1.22074)	Top-1 acc 55.859 (57.699)	Top-5 acc 78.125 (79.269)	lr 0.01752
Train [45][750/3239]	Time 0.230 (0.619)	Data Time 0.001 (0.044)	Loss 2.7931 (2.7625)	Entropy 1.21945 (1.22073)	Top-1 acc 59.375 (57.695)	Top-5 acc 80.078 (79.280)	lr 0.01751
Train [45][760/3239]	Time 0.207 (0.617)	Data Time 0.001 (0.044)	Loss 2.9636 (2.7632)	Entropy 1.21942 (1.22071)	Top-1 acc 53.906 (57.666)	Top-5 acc 75.000 (79.264)	lr 0.01751
Train [45][770/3239]	Time 0.232 (0.679)	Data Time 0.003 (0.043)	Loss 2.6216 (2.7634)	Entropy 1.21936 (1.22069)	Top-1 acc 61.719 (57.667)	Top-5 acc 80.078 (79.251)	lr 0.01751
Train [45][780/3239]	Time 2.432 (0.676)	Data Time 0.002 (0.043)	Loss 2.6914 (2.7635)	Entropy 1.21936 (1.22068)	Top-1 acc 59.766 (57.669)	Top-5 acc 82.031 (79.248)	lr 0.01751
Train [45][790/3239]	Time 0.268 (0.671)	Data Time 0.002 (0.042)	Loss 2.9258 (2.7650)	Entropy 1.21936 (1.22066)	Top-1 acc 51.172 (57.620)	Top-5 acc 77.344 (79.221)	lr 0.01751
Train [45][800/3239]	Time 0.221 (0.668)	Data Time 0.001 (0.042)	Loss 2.7254 (2.7653)	Entropy 1.21930 (1.22064)	Top-1 acc 60.156 (57.618)	Top-5 acc 78.516 (79.224)	lr 0.01751
Train [45][810/3239]	Time 0.235 (0.665)	Data Time 0.001 (0.041)	Loss 2.8825 (2.7658)	Entropy 1.21929 (1.22063)	Top-1 acc 57.422 (57.611)	Top-5 acc 76.172 (79.219)	lr 0.01751
Train [45][820/3239]	Time 0.237 (0.663)	Data Time 0.002 (0.041)	Loss 2.6765 (2.7660)	Entropy 1.21928 (1.22061)	Top-1 acc 59.766 (57.623)	Top-5 acc 79.297 (79.220)	lr 0.01751
Train [45][830/3239]	Time 0.235 (0.660)	Data Time 0.001 (0.040)	Loss 2.7493 (2.7658)	Entropy 1.21926 (1.22059)	Top-1 acc 58.594 (57.630)	Top-5 acc 76.953 (79.221)	lr 0.01751
Train [45][840/3239]	Time 0.289 (0.658)	Data Time 0.001 (0.040)	Loss 2.8106 (2.7660)	Entropy 1.21921 (1.22058)	Top-1 acc 57.812 (57.615)	Top-5 acc 75.781 (79.216)	lr 0.01751
Train [45][850/3239]	Time 0.230 (0.655)	Data Time 0.001 (0.039)	Loss 2.5598 (2.7664)	Entropy 1.21912 (1.22056)	Top-1 acc 64.844 (57.613)	Top-5 acc 83.203 (79.207)	lr 0.01751
Train [45][860/3239]	Time 0.219 (0.653)	Data Time 0.001 (0.039)	Loss 2.7669 (2.7663)	Entropy 1.21914 (1.22054)	Top-1 acc 56.250 (57.614)	Top-5 acc 79.688 (79.213)	lr 0.01750
Train [45][870/3239]	Time 0.218 (0.651)	Data Time 0.002 (0.038)	Loss 2.8571 (2.7667)	Entropy 1.21903 (1.22053)	Top-1 acc 57.812 (57.617)	Top-5 acc 75.781 (79.210)	lr 0.01750
Train [45][880/3239]	Time 0.229 (0.649)	Data Time 0.002 (0.038)	Loss 2.8861 (2.7661)	Entropy 1.21899 (1.22051)	Top-1 acc 57.031 (57.631)	Top-5 acc 76.172 (79.227)	lr 0.01750
Train [45][890/3239]	Time 2.564 (0.646)	Data Time 0.001 (0.038)	Loss 2.7912 (2.7659)	Entropy 1.21899 (1.22049)	Top-1 acc 54.297 (57.619)	Top-5 acc 80.078 (79.231)	lr 0.01750
Train [45][900/3239]	Time 0.226 (0.642)	Data Time 0.001 (0.037)	Loss 2.7777 (2.7664)	Entropy 1.21897 (1.22048)	Top-1 acc 59.766 (57.610)	Top-5 acc 78.125 (79.214)	lr 0.01750
Train [45][910/3239]	Time 0.222 (0.640)	Data Time 0.002 (0.037)	Loss 2.6723 (2.7665)	Entropy 1.21896 (1.22046)	Top-1 acc 60.156 (57.605)	Top-5 acc 79.688 (79.204)	lr 0.01750
Train [45][920/3239]	Time 0.222 (0.638)	Data Time 0.001 (0.036)	Loss 2.5831 (2.7660)	Entropy 1.21895 (1.22044)	Top-1 acc 62.500 (57.619)	Top-5 acc 81.641 (79.217)	lr 0.01750
Train [45][930/3239]	Time 0.213 (0.636)	Data Time 0.001 (0.036)	Loss 2.7123 (2.7658)	Entropy 1.21896 (1.22043)	Top-1 acc 60.938 (57.619)	Top-5 acc 80.078 (79.225)	lr 0.01750
Train [45][940/3239]	Time 0.334 (0.634)	Data Time 0.001 (0.036)	Loss 2.5318 (2.7665)	Entropy 1.21892 (1.22041)	Top-1 acc 60.547 (57.600)	Top-5 acc 83.594 (79.206)	lr 0.01750
Train [45][950/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.035)	Loss 2.9547 (2.7662)	Entropy 1.21893 (1.22040)	Top-1 acc 52.734 (57.605)	Top-5 acc 74.219 (79.216)	lr 0.01750
Train [45][960/3239]	Time 0.209 (0.630)	Data Time 0.001 (0.035)	Loss 2.7431 (2.7665)	Entropy 1.21907 (1.22038)	Top-1 acc 55.469 (57.581)	Top-5 acc 82.422 (79.213)	lr 0.01750
Train [45][970/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.035)	Loss 2.9870 (2.7667)	Entropy 1.21904 (1.22037)	Top-1 acc 55.859 (57.577)	Top-5 acc 74.219 (79.209)	lr 0.01749
Train [45][980/3239]	Time 0.225 (0.627)	Data Time 0.001 (0.034)	Loss 2.8735 (2.7665)	Entropy 1.21899 (1.22035)	Top-1 acc 57.812 (57.596)	Top-5 acc 75.391 (79.223)	lr 0.01749
Train [45][990/3239]	Time 0.319 (0.625)	Data Time 0.001 (0.034)	Loss 2.6690 (2.7664)	Entropy 1.21897 (1.22034)	Top-1 acc 61.328 (57.599)	Top-5 acc 78.906 (79.222)	lr 0.01749
Train [45][1000/3239]	Time 2.376 (0.623)	Data Time 0.001 (0.034)	Loss 2.8075 (2.7665)	Entropy 1.21897 (1.22033)	Top-1 acc 56.641 (57.597)	Top-5 acc 78.906 (79.220)	lr 0.01749
Train [45][1010/3239]	Time 0.258 (0.620)	Data Time 0.002 (0.033)	Loss 2.7807 (2.7662)	Entropy 1.21892 (1.22031)	Top-1 acc 56.641 (57.606)	Top-5 acc 79.297 (79.223)	lr 0.01749
Train [45][1020/3239]	Time 0.229 (0.618)	Data Time 0.002 (0.033)	Loss 2.6138 (2.7662)	Entropy 1.21887 (1.22030)	Top-1 acc 60.547 (57.609)	Top-5 acc 83.203 (79.231)	lr 0.01749
Train [45][1030/3239]	Time 0.219 (0.616)	Data Time 0.002 (0.033)	Loss 2.6648 (2.7655)	Entropy 1.21885 (1.22028)	Top-1 acc 60.938 (57.617)	Top-5 acc 79.297 (79.242)	lr 0.01749
Train [45][1040/3239]	Time 0.310 (0.615)	Data Time 0.001 (0.032)	Loss 2.8042 (2.7661)	Entropy 1.21882 (1.22027)	Top-1 acc 58.203 (57.603)	Top-5 acc 77.734 (79.226)	lr 0.01749
Train [45][1050/3239]	Time 0.221 (0.613)	Data Time 0.001 (0.032)	Loss 2.8747 (2.7661)	Entropy 1.21882 (1.22026)	Top-1 acc 54.297 (57.601)	Top-5 acc 78.125 (79.226)	lr 0.01749
Train [45][1060/3239]	Time 0.205 (0.612)	Data Time 0.001 (0.032)	Loss 2.5780 (2.7660)	Entropy 1.21875 (1.22024)	Top-1 acc 60.156 (57.605)	Top-5 acc 83.203 (79.232)	lr 0.01749
Train [45][1070/3239]	Time 0.232 (0.611)	Data Time 0.001 (0.032)	Loss 2.5747 (2.7655)	Entropy 1.21872 (1.22023)	Top-1 acc 60.156 (57.607)	Top-5 acc 85.938 (79.249)	lr 0.01749
Train [45][1080/3239]	Time 0.207 (0.609)	Data Time 0.001 (0.031)	Loss 2.9762 (2.7658)	Entropy 1.21867 (1.22021)	Top-1 acc 53.516 (57.608)	Top-5 acc 76.562 (79.241)	lr 0.01748
Train [45][1090/3239]	Time 0.222 (0.608)	Data Time 0.001 (0.031)	Loss 2.7020 (2.7662)	Entropy 1.21865 (1.22020)	Top-1 acc 60.156 (57.601)	Top-5 acc 81.250 (79.235)	lr 0.01748
Train [45][1100/3239]	Time 0.259 (0.607)	Data Time 0.001 (0.031)	Loss 2.7354 (2.7656)	Entropy 1.21862 (1.22019)	Top-1 acc 60.156 (57.615)	Top-5 acc 79.297 (79.246)	lr 0.01748
Train [45][1110/3239]	Time 2.485 (0.605)	Data Time 0.002 (0.031)	Loss 2.6963 (2.7657)	Entropy 1.21862 (1.22017)	Top-1 acc 60.547 (57.617)	Top-5 acc 79.297 (79.240)	lr 0.01748
Train [45][1120/3239]	Time 0.238 (0.602)	Data Time 0.001 (0.030)	Loss 2.8425 (2.7660)	Entropy 1.21862 (1.22016)	Top-1 acc 57.031 (57.608)	Top-5 acc 77.734 (79.229)	lr 0.01748
Train [45][1130/3239]	Time 0.193 (0.601)	Data Time 0.001 (0.030)	Loss 2.9350 (2.7664)	Entropy 1.21860 (1.22014)	Top-1 acc 55.469 (57.606)	Top-5 acc 76.953 (79.228)	lr 0.01748
Train [45][1140/3239]	Time 0.459 (0.643)	Data Time 0.002 (0.030)	Loss 2.9078 (2.7669)	Entropy 1.21854 (1.22013)	Top-1 acc 54.297 (57.588)	Top-5 acc 76.953 (79.221)	lr 0.01748
Train [45][1150/3239]	Time 0.220 (0.642)	Data Time 0.002 (0.030)	Loss 2.8285 (2.7663)	Entropy 1.21854 (1.22012)	Top-1 acc 53.906 (57.600)	Top-5 acc 80.078 (79.233)	lr 0.01748
Train [45][1160/3239]	Time 0.226 (0.640)	Data Time 0.001 (0.029)	Loss 2.8574 (2.7666)	Entropy 1.21853 (1.22010)	Top-1 acc 53.125 (57.595)	Top-5 acc 76.953 (79.222)	lr 0.01748
Train [45][1170/3239]	Time 0.219 (0.638)	Data Time 0.001 (0.029)	Loss 2.7082 (2.7665)	Entropy 1.21851 (1.22009)	Top-1 acc 60.547 (57.604)	Top-5 acc 80.078 (79.217)	lr 0.01748
Train [45][1180/3239]	Time 0.222 (0.637)	Data Time 0.001 (0.029)	Loss 2.7398 (2.7661)	Entropy 1.21850 (1.22008)	Top-1 acc 60.547 (57.623)	Top-5 acc 78.125 (79.226)	lr 0.01748
Train [45][1190/3239]	Time 0.225 (0.635)	Data Time 0.001 (0.029)	Loss 2.7569 (2.7659)	Entropy 1.21839 (1.22006)	Top-1 acc 58.594 (57.623)	Top-5 acc 77.344 (79.230)	lr 0.01747
Train [45][1200/3239]	Time 0.210 (0.634)	Data Time 0.001 (0.028)	Loss 2.7529 (2.7656)	Entropy 1.21838 (1.22005)	Top-1 acc 58.203 (57.633)	Top-5 acc 80.469 (79.232)	lr 0.01747
Train [45][1210/3239]	Time 0.236 (0.632)	Data Time 0.001 (0.028)	Loss 2.6811 (2.7654)	Entropy 1.21835 (1.22004)	Top-1 acc 60.156 (57.642)	Top-5 acc 81.641 (79.235)	lr 0.01747
Train [45][1220/3239]	Time 2.357 (0.631)	Data Time 0.002 (0.028)	Loss 2.6877 (2.7655)	Entropy 1.21835 (1.22002)	Top-1 acc 55.078 (57.625)	Top-5 acc 81.250 (79.241)	lr 0.01747
Train [45][1230/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.028)	Loss 2.8403 (2.7660)	Entropy 1.21831 (1.22001)	Top-1 acc 54.688 (57.610)	Top-5 acc 76.562 (79.231)	lr 0.01747
Train [45][1240/3239]	Time 0.243 (0.626)	Data Time 0.001 (0.027)	Loss 2.8839 (2.7661)	Entropy 1.21822 (1.21999)	Top-1 acc 54.297 (57.603)	Top-5 acc 75.000 (79.228)	lr 0.01747
Train [45][1250/3239]	Time 0.210 (0.625)	Data Time 0.001 (0.027)	Loss 2.8221 (2.7661)	Entropy 1.21814 (1.21998)	Top-1 acc 54.688 (57.592)	Top-5 acc 80.469 (79.232)	lr 0.01747
Train [45][1260/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.027)	Loss 2.9203 (2.7662)	Entropy 1.21811 (1.21996)	Top-1 acc 55.469 (57.587)	Top-5 acc 73.828 (79.229)	lr 0.01747
Train [45][1270/3239]	Time 0.213 (0.622)	Data Time 0.001 (0.027)	Loss 2.8188 (2.7661)	Entropy 1.21807 (1.21995)	Top-1 acc 57.031 (57.585)	Top-5 acc 76.562 (79.233)	lr 0.01747
Train [45][1280/3239]	Time 0.253 (0.621)	Data Time 0.002 (0.027)	Loss 2.7788 (2.7664)	Entropy 1.21806 (1.21993)	Top-1 acc 56.250 (57.575)	Top-5 acc 76.562 (79.224)	lr 0.01747
Train [45][1290/3239]	Time 0.210 (0.620)	Data Time 0.001 (0.026)	Loss 2.8519 (2.7667)	Entropy 1.21802 (1.21992)	Top-1 acc 56.250 (57.570)	Top-5 acc 76.562 (79.217)	lr 0.01746
Train [45][1300/3239]	Time 0.223 (0.619)	Data Time 0.001 (0.026)	Loss 2.7636 (2.7664)	Entropy 1.21799 (1.21990)	Top-1 acc 56.250 (57.578)	Top-5 acc 79.688 (79.222)	lr 0.01746
Train [45][1310/3239]	Time 0.221 (0.618)	Data Time 0.001 (0.026)	Loss 2.6860 (2.7668)	Entropy 1.21797 (1.21989)	Top-1 acc 61.719 (57.567)	Top-5 acc 81.250 (79.220)	lr 0.01746
Train [45][1320/3239]	Time 0.223 (0.616)	Data Time 0.001 (0.026)	Loss 2.6778 (2.7665)	Entropy 1.21797 (1.21988)	Top-1 acc 58.594 (57.571)	Top-5 acc 78.125 (79.221)	lr 0.01746
Train [45][1330/3239]	Time 2.500 (0.615)	Data Time 0.001 (0.026)	Loss 2.7299 (2.7669)	Entropy 1.21797 (1.21986)	Top-1 acc 58.984 (57.557)	Top-5 acc 82.812 (79.215)	lr 0.01746
Train [45][1340/3239]	Time 0.272 (0.612)	Data Time 0.002 (0.026)	Loss 2.7976 (2.7675)	Entropy 1.21785 (1.21985)	Top-1 acc 57.422 (57.544)	Top-5 acc 78.516 (79.200)	lr 0.01746
Train [45][1350/3239]	Time 0.232 (0.611)	Data Time 0.001 (0.025)	Loss 2.7637 (2.7679)	Entropy 1.21786 (1.21983)	Top-1 acc 57.812 (57.540)	Top-5 acc 78.906 (79.193)	lr 0.01746
Train [45][1360/3239]	Time 0.215 (0.610)	Data Time 0.001 (0.025)	Loss 2.7211 (2.7680)	Entropy 1.21782 (1.21982)	Top-1 acc 56.641 (57.533)	Top-5 acc 82.422 (79.190)	lr 0.01746
Train [45][1370/3239]	Time 0.225 (0.609)	Data Time 0.001 (0.025)	Loss 3.0500 (2.7681)	Entropy 1.21783 (1.21980)	Top-1 acc 49.219 (57.526)	Top-5 acc 73.438 (79.189)	lr 0.01746
Train [45][1380/3239]	Time 0.257 (0.608)	Data Time 0.001 (0.025)	Loss 2.6121 (2.7685)	Entropy 1.21780 (1.21979)	Top-1 acc 60.547 (57.520)	Top-5 acc 83.203 (79.187)	lr 0.01746
Train [45][1390/3239]	Time 0.231 (0.607)	Data Time 0.001 (0.025)	Loss 2.7635 (2.7685)	Entropy 1.21777 (1.21977)	Top-1 acc 61.328 (57.518)	Top-5 acc 81.641 (79.192)	lr 0.01746
Train [45][1400/3239]	Time 0.196 (0.606)	Data Time 0.001 (0.025)	Loss 2.9188 (2.7687)	Entropy 1.21794 (1.21976)	Top-1 acc 55.469 (57.516)	Top-5 acc 76.172 (79.189)	lr 0.01745
Train [45][1410/3239]	Time 0.221 (0.605)	Data Time 0.001 (0.024)	Loss 2.8696 (2.7690)	Entropy 1.21794 (1.21975)	Top-1 acc 53.906 (57.512)	Top-5 acc 78.125 (79.187)	lr 0.01745
Train [45][1420/3239]	Time 0.200 (0.604)	Data Time 0.001 (0.024)	Loss 2.8368 (2.7688)	Entropy 1.21785 (1.21973)	Top-1 acc 50.391 (57.515)	Top-5 acc 78.516 (79.186)	lr 0.01745
Train [45][1430/3239]	Time 0.260 (0.603)	Data Time 0.002 (0.024)	Loss 2.6091 (2.7686)	Entropy 1.21786 (1.21972)	Top-1 acc 61.328 (57.524)	Top-5 acc 83.984 (79.195)	lr 0.01745
Train [45][1440/3239]	Time 2.517 (0.602)	Data Time 0.001 (0.024)	Loss 2.6987 (2.7685)	Entropy 1.21786 (1.21971)	Top-1 acc 61.328 (57.524)	Top-5 acc 79.688 (79.204)	lr 0.01745
Train [45][1450/3239]	Time 0.230 (0.599)	Data Time 0.001 (0.024)	Loss 2.8208 (2.7691)	Entropy 1.21777 (1.21969)	Top-1 acc 55.859 (57.510)	Top-5 acc 78.906 (79.193)	lr 0.01745
Train [45][1460/3239]	Time 0.213 (0.598)	Data Time 0.001 (0.024)	Loss 2.5681 (2.7689)	Entropy 1.21773 (1.21968)	Top-1 acc 59.375 (57.511)	Top-5 acc 83.203 (79.195)	lr 0.01745
Train [45][1470/3239]	Time 0.211 (0.597)	Data Time 0.001 (0.023)	Loss 2.7636 (2.7689)	Entropy 1.21770 (1.21967)	Top-1 acc 60.938 (57.510)	Top-5 acc 77.734 (79.197)	lr 0.01745
Train [45][1480/3239]	Time 0.234 (0.596)	Data Time 0.001 (0.023)	Loss 2.6333 (2.7688)	Entropy 1.21769 (1.21965)	Top-1 acc 62.500 (57.509)	Top-5 acc 81.250 (79.198)	lr 0.01745
Train [45][1490/3239]	Time 0.249 (0.595)	Data Time 0.001 (0.023)	Loss 2.6827 (2.7692)	Entropy 1.21762 (1.21964)	Top-1 acc 60.938 (57.500)	Top-5 acc 77.734 (79.188)	lr 0.01745
Train [45][1500/3239]	Time 0.274 (0.624)	Data Time 0.002 (0.023)	Loss 2.9879 (2.7694)	Entropy 1.21759 (1.21963)	Top-1 acc 53.125 (57.497)	Top-5 acc 75.000 (79.185)	lr 0.01745
Train [45][1510/3239]	Time 0.212 (0.624)	Data Time 0.002 (0.023)	Loss 2.8464 (2.7693)	Entropy 1.21755 (1.21961)	Top-1 acc 55.078 (57.500)	Top-5 acc 76.953 (79.187)	lr 0.01744
Train [45][1520/3239]	Time 0.218 (0.623)	Data Time 0.002 (0.023)	Loss 2.8238 (2.7693)	Entropy 1.21749 (1.21960)	Top-1 acc 55.078 (57.502)	Top-5 acc 78.516 (79.185)	lr 0.01744
Train [45][1530/3239]	Time 0.308 (0.622)	Data Time 0.001 (0.023)	Loss 2.6507 (2.7691)	Entropy 1.21744 (1.21959)	Top-1 acc 60.938 (57.517)	Top-5 acc 78.516 (79.188)	lr 0.01744
Train [45][1540/3239]	Time 0.266 (0.621)	Data Time 0.001 (0.022)	Loss 2.8429 (2.7691)	Entropy 1.21744 (1.21957)	Top-1 acc 52.734 (57.516)	Top-5 acc 78.516 (79.189)	lr 0.01744
Train [45][1550/3239]	Time 2.475 (0.620)	Data Time 0.001 (0.022)	Loss 2.7473 (2.7690)	Entropy 1.21744 (1.21956)	Top-1 acc 59.766 (57.519)	Top-5 acc 78.906 (79.193)	lr 0.01744
Train [45][1560/3239]	Time 0.244 (0.617)	Data Time 0.001 (0.022)	Loss 2.9765 (2.7688)	Entropy 1.21739 (1.21954)	Top-1 acc 53.516 (57.517)	Top-5 acc 75.781 (79.195)	lr 0.01744
Train [45][1570/3239]	Time 0.237 (0.616)	Data Time 0.001 (0.022)	Loss 2.9272 (2.7690)	Entropy 1.21740 (1.21953)	Top-1 acc 51.172 (57.513)	Top-5 acc 77.734 (79.194)	lr 0.01744
Train [45][1580/3239]	Time 0.309 (0.615)	Data Time 0.001 (0.022)	Loss 2.8257 (2.7692)	Entropy 1.21735 (1.21952)	Top-1 acc 52.734 (57.505)	Top-5 acc 77.734 (79.186)	lr 0.01744
Train [45][1590/3239]	Time 0.248 (0.614)	Data Time 0.001 (0.022)	Loss 2.7447 (2.7695)	Entropy 1.21732 (1.21950)	Top-1 acc 59.375 (57.501)	Top-5 acc 80.469 (79.181)	lr 0.01744
Train [45][1600/3239]	Time 0.221 (0.613)	Data Time 0.001 (0.022)	Loss 2.8984 (2.7693)	Entropy 1.21731 (1.21949)	Top-1 acc 51.562 (57.501)	Top-5 acc 78.906 (79.189)	lr 0.01744
Train [45][1610/3239]	Time 0.219 (0.612)	Data Time 0.001 (0.022)	Loss 2.8693 (2.7692)	Entropy 1.21735 (1.21948)	Top-1 acc 57.031 (57.505)	Top-5 acc 77.344 (79.193)	lr 0.01744
Train [45][1620/3239]	Time 0.213 (0.611)	Data Time 0.001 (0.021)	Loss 2.6539 (2.7693)	Entropy 1.21731 (1.21946)	Top-1 acc 58.984 (57.496)	Top-5 acc 79.688 (79.190)	lr 0.01743
Train [45][1630/3239]	Time 0.316 (0.610)	Data Time 0.001 (0.021)	Loss 2.6833 (2.7695)	Entropy 1.21729 (1.21945)	Top-1 acc 60.156 (57.494)	Top-5 acc 80.469 (79.187)	lr 0.01743
Train [45][1640/3239]	Time 0.222 (0.609)	Data Time 0.001 (0.021)	Loss 2.7489 (2.7696)	Entropy 1.21723 (1.21944)	Top-1 acc 56.250 (57.490)	Top-5 acc 80.078 (79.185)	lr 0.01743
Train [45][1650/3239]	Time 0.225 (0.609)	Data Time 0.001 (0.021)	Loss 2.7578 (2.7695)	Entropy 1.21720 (1.21942)	Top-1 acc 59.375 (57.490)	Top-5 acc 78.125 (79.184)	lr 0.01743
Train [45][1660/3239]	Time 2.533 (0.608)	Data Time 0.001 (0.021)	Loss 2.6904 (2.7694)	Entropy 1.21720 (1.21941)	Top-1 acc 57.422 (57.488)	Top-5 acc 80.078 (79.188)	lr 0.01743
Train [45][1670/3239]	Time 0.243 (0.605)	Data Time 0.001 (0.021)	Loss 2.8115 (2.7693)	Entropy 1.21714 (1.21940)	Top-1 acc 57.031 (57.490)	Top-5 acc 80.469 (79.192)	lr 0.01743
Train [45][1680/3239]	Time 0.255 (0.605)	Data Time 0.001 (0.021)	Loss 2.7730 (2.7693)	Entropy 1.21712 (1.21938)	Top-1 acc 57.422 (57.491)	Top-5 acc 81.250 (79.191)	lr 0.01743
Train [45][1690/3239]	Time 0.234 (0.604)	Data Time 0.001 (0.021)	Loss 2.7204 (2.7692)	Entropy 1.21713 (1.21937)	Top-1 acc 54.297 (57.484)	Top-5 acc 80.469 (79.194)	lr 0.01743
Train [45][1700/3239]	Time 0.215 (0.603)	Data Time 0.001 (0.021)	Loss 2.8971 (2.7691)	Entropy 1.21714 (1.21936)	Top-1 acc 55.859 (57.490)	Top-5 acc 76.172 (79.195)	lr 0.01743
Train [45][1710/3239]	Time 0.215 (0.602)	Data Time 0.001 (0.020)	Loss 2.7036 (2.7691)	Entropy 1.21709 (1.21934)	Top-1 acc 58.984 (57.488)	Top-5 acc 80.078 (79.194)	lr 0.01743
Train [45][1720/3239]	Time 0.210 (0.601)	Data Time 0.001 (0.020)	Loss 2.8166 (2.7692)	Entropy 1.21707 (1.21933)	Top-1 acc 53.125 (57.484)	Top-5 acc 77.344 (79.192)	lr 0.01743
Train [45][1730/3239]	Time 0.314 (0.600)	Data Time 0.002 (0.020)	Loss 2.6874 (2.7692)	Entropy 1.21700 (1.21932)	Top-1 acc 52.344 (57.481)	Top-5 acc 83.984 (79.194)	lr 0.01742
Train [45][1740/3239]	Time 0.238 (0.600)	Data Time 0.002 (0.020)	Loss 2.8980 (2.7692)	Entropy 1.21695 (1.21930)	Top-1 acc 52.344 (57.473)	Top-5 acc 75.781 (79.194)	lr 0.01742
Train [45][1750/3239]	Time 0.194 (0.599)	Data Time 0.001 (0.020)	Loss 2.7890 (2.7693)	Entropy 1.21692 (1.21929)	Top-1 acc 57.422 (57.466)	Top-5 acc 80.859 (79.194)	lr 0.01742
Train [45][1760/3239]	Time 0.255 (0.598)	Data Time 0.001 (0.020)	Loss 2.5960 (2.7693)	Entropy 1.21690 (1.21928)	Top-1 acc 63.672 (57.470)	Top-5 acc 82.812 (79.195)	lr 0.01742
Train [45][1770/3239]	Time 2.531 (0.597)	Data Time 0.001 (0.020)	Loss 2.8452 (2.7692)	Entropy 1.21690 (1.21926)	Top-1 acc 55.469 (57.472)	Top-5 acc 78.125 (79.197)	lr 0.01742
Train [45][1780/3239]	Time 0.347 (0.595)	Data Time 0.001 (0.020)	Loss 2.9333 (2.7691)	Entropy 1.21681 (1.21925)	Top-1 acc 56.641 (57.469)	Top-5 acc 75.781 (79.204)	lr 0.01742
Train [45][1790/3239]	Time 0.248 (0.595)	Data Time 0.001 (0.020)	Loss 2.7758 (2.7689)	Entropy 1.21681 (1.21924)	Top-1 acc 55.859 (57.474)	Top-5 acc 78.516 (79.205)	lr 0.01742
Train [45][1800/3239]	Time 0.245 (0.594)	Data Time 0.002 (0.019)	Loss 2.6934 (2.7693)	Entropy 1.21674 (1.21922)	Top-1 acc 59.375 (57.465)	Top-5 acc 80.469 (79.197)	lr 0.01742
Train [45][1810/3239]	Time 0.228 (0.593)	Data Time 0.001 (0.019)	Loss 3.0285 (2.7692)	Entropy 1.21665 (1.21921)	Top-1 acc 51.953 (57.470)	Top-5 acc 75.000 (79.199)	lr 0.01742
Train [45][1820/3239]	Time 0.226 (0.592)	Data Time 0.001 (0.019)	Loss 2.7661 (2.7690)	Entropy 1.21665 (1.21919)	Top-1 acc 56.250 (57.473)	Top-5 acc 78.125 (79.204)	lr 0.01742
Train [45][1830/3239]	Time 0.327 (0.592)	Data Time 0.001 (0.019)	Loss 2.6071 (2.7692)	Entropy 1.21663 (1.21918)	Top-1 acc 60.156 (57.468)	Top-5 acc 82.031 (79.201)	lr 0.01741
Train [45][1840/3239]	Time 0.205 (0.591)	Data Time 0.001 (0.019)	Loss 2.8488 (2.7693)	Entropy 1.21656 (1.21917)	Top-1 acc 53.906 (57.463)	Top-5 acc 79.297 (79.204)	lr 0.01741
Train [45][1850/3239]	Time 0.224 (0.590)	Data Time 0.001 (0.019)	Loss 2.7206 (2.7691)	Entropy 1.21650 (1.21915)	Top-1 acc 55.859 (57.462)	Top-5 acc 80.469 (79.208)	lr 0.01741
Train [45][1860/3239]	Time 0.247 (0.615)	Data Time 0.002 (0.019)	Loss 2.7397 (2.7692)	Entropy 1.21641 (1.21914)	Top-1 acc 60.156 (57.457)	Top-5 acc 79.688 (79.203)	lr 0.01741
Train [45][1870/3239]	Time 0.210 (0.614)	Data Time 0.002 (0.019)	Loss 2.8163 (2.7692)	Entropy 1.21634 (1.21912)	Top-1 acc 56.641 (57.453)	Top-5 acc 76.172 (79.202)	lr 0.01741
Train [45][1880/3239]	Time 2.529 (0.613)	Data Time 0.002 (0.019)	Loss 2.7898 (2.7692)	Entropy 1.21634 (1.21911)	Top-1 acc 53.906 (57.452)	Top-5 acc 80.078 (79.202)	lr 0.01741
Train [45][1890/3239]	Time 0.224 (0.611)	Data Time 0.001 (0.019)	Loss 2.6123 (2.7691)	Entropy 1.21630 (1.21909)	Top-1 acc 59.375 (57.455)	Top-5 acc 82.031 (79.204)	lr 0.01741
Train [45][1900/3239]	Time 0.218 (0.610)	Data Time 0.001 (0.019)	Loss 2.8419 (2.7690)	Entropy 1.21626 (1.21908)	Top-1 acc 52.344 (57.452)	Top-5 acc 79.688 (79.205)	lr 0.01741
Train [45][1910/3239]	Time 0.227 (0.610)	Data Time 0.001 (0.018)	Loss 3.1112 (2.7692)	Entropy 1.21624 (1.21906)	Top-1 acc 50.391 (57.445)	Top-5 acc 68.750 (79.199)	lr 0.01741
Train [45][1920/3239]	Time 0.235 (0.609)	Data Time 0.001 (0.018)	Loss 2.5691 (2.7695)	Entropy 1.21615 (1.21905)	Top-1 acc 62.109 (57.439)	Top-5 acc 85.938 (79.200)	lr 0.01741
Train [45][1930/3239]	Time 0.306 (0.608)	Data Time 0.001 (0.018)	Loss 2.9591 (2.7695)	Entropy 1.21614 (1.21903)	Top-1 acc 49.219 (57.435)	Top-5 acc 74.609 (79.198)	lr 0.01741
Train [45][1940/3239]	Time 0.220 (0.607)	Data Time 0.001 (0.018)	Loss 2.7844 (2.7700)	Entropy 1.21613 (1.21902)	Top-1 acc 57.422 (57.427)	Top-5 acc 80.859 (79.185)	lr 0.01740
Train [45][1950/3239]	Time 0.217 (0.606)	Data Time 0.001 (0.018)	Loss 2.8237 (2.7703)	Entropy 1.21609 (1.21900)	Top-1 acc 55.469 (57.418)	Top-5 acc 75.000 (79.176)	lr 0.01740
Train [45][1960/3239]	Time 0.219 (0.606)	Data Time 0.001 (0.018)	Loss 2.8691 (2.7704)	Entropy 1.21603 (1.21899)	Top-1 acc 55.078 (57.415)	Top-5 acc 74.609 (79.176)	lr 0.01740
Train [45][1970/3239]	Time 0.231 (0.605)	Data Time 0.001 (0.018)	Loss 2.6213 (2.7705)	Entropy 1.21600 (1.21897)	Top-1 acc 60.156 (57.414)	Top-5 acc 83.203 (79.174)	lr 0.01740
Train [45][1980/3239]	Time 0.347 (0.604)	Data Time 0.001 (0.018)	Loss 2.8587 (2.7706)	Entropy 1.21599 (1.21896)	Top-1 acc 56.250 (57.411)	Top-5 acc 78.125 (79.169)	lr 0.01740
Train [45][1990/3239]	Time 2.469 (0.603)	Data Time 0.001 (0.018)	Loss 2.6209 (2.7706)	Entropy 1.21599 (1.21894)	Top-1 acc 61.719 (57.411)	Top-5 acc 83.594 (79.173)	lr 0.01740
Train [45][2000/3239]	Time 0.227 (0.602)	Data Time 0.001 (0.018)	Loss 2.6974 (2.7705)	Entropy 1.21600 (1.21893)	Top-1 acc 61.719 (57.413)	Top-5 acc 77.734 (79.173)	lr 0.01740
Train [45][2010/3239]	Time 0.224 (0.601)	Data Time 0.001 (0.018)	Loss 2.6943 (2.7703)	Entropy 1.21598 (1.21891)	Top-1 acc 61.719 (57.420)	Top-5 acc 78.125 (79.175)	lr 0.01740
Train [45][2020/3239]	Time 0.225 (0.600)	Data Time 0.001 (0.018)	Loss 2.8136 (2.7703)	Entropy 1.21595 (1.21890)	Top-1 acc 57.422 (57.414)	Top-5 acc 77.734 (79.175)	lr 0.01740
Train [45][2030/3239]	Time 0.218 (0.600)	Data Time 0.001 (0.017)	Loss 2.9661 (2.7706)	Entropy 1.21592 (1.21889)	Top-1 acc 55.469 (57.408)	Top-5 acc 76.172 (79.170)	lr 0.01740
Train [45][2040/3239]	Time 0.220 (0.599)	Data Time 0.001 (0.017)	Loss 2.9703 (2.7707)	Entropy 1.21589 (1.21887)	Top-1 acc 54.688 (57.410)	Top-5 acc 75.391 (79.166)	lr 0.01740
Train [45][2050/3239]	Time 0.143 (0.598)	Data Time 0.001 (0.017)	Loss 2.5956 (2.7706)	Entropy 1.21589 (1.21886)	Top-1 acc 63.672 (57.416)	Top-5 acc 82.031 (79.170)	lr 0.01739
Train [45][2060/3239]	Time 0.237 (0.597)	Data Time 0.001 (0.017)	Loss 2.8487 (2.7707)	Entropy 1.21591 (1.21884)	Top-1 acc 52.734 (57.412)	Top-5 acc 76.562 (79.165)	lr 0.01739
Train [45][2070/3239]	Time 0.215 (0.597)	Data Time 0.001 (0.017)	Loss 2.7477 (2.7709)	Entropy 1.21589 (1.21883)	Top-1 acc 62.109 (57.409)	Top-5 acc 80.078 (79.161)	lr 0.01739
Train [45][2080/3239]	Time 0.324 (0.596)	Data Time 0.001 (0.017)	Loss 2.6455 (2.7709)	Entropy 1.21588 (1.21881)	Top-1 acc 60.156 (57.407)	Top-5 acc 82.812 (79.161)	lr 0.01739
Train [45][2090/3239]	Time 0.224 (0.595)	Data Time 0.001 (0.017)	Loss 2.5912 (2.7710)	Entropy 1.21584 (1.21880)	Top-1 acc 63.281 (57.403)	Top-5 acc 83.594 (79.160)	lr 0.01739
Train [45][2100/3239]	Time 2.492 (0.595)	Data Time 0.001 (0.017)	Loss 2.8417 (2.7713)	Entropy 1.21584 (1.21879)	Top-1 acc 54.297 (57.396)	Top-5 acc 79.297 (79.155)	lr 0.01739
Train [45][2110/3239]	Time 0.229 (0.593)	Data Time 0.001 (0.017)	Loss 2.6155 (2.7716)	Entropy 1.21582 (1.21877)	Top-1 acc 59.766 (57.389)	Top-5 acc 83.984 (79.153)	lr 0.01739
Train [45][2120/3239]	Time 0.215 (0.592)	Data Time 0.001 (0.017)	Loss 2.8012 (2.7719)	Entropy 1.21577 (1.21876)	Top-1 acc 58.984 (57.381)	Top-5 acc 78.125 (79.144)	lr 0.01739
Train [45][2130/3239]	Time 0.339 (0.592)	Data Time 0.001 (0.017)	Loss 2.6882 (2.7717)	Entropy 1.21573 (1.21874)	Top-1 acc 58.594 (57.386)	Top-5 acc 80.859 (79.148)	lr 0.01739
Train [45][2140/3239]	Time 0.216 (0.591)	Data Time 0.002 (0.017)	Loss 2.7246 (2.7718)	Entropy 1.21561 (1.21873)	Top-1 acc 59.766 (57.377)	Top-5 acc 82.031 (79.145)	lr 0.01739
Train [45][2150/3239]	Time 0.242 (0.591)	Data Time 0.001 (0.017)	Loss 2.8908 (2.7717)	Entropy 1.21561 (1.21871)	Top-1 acc 53.516 (57.382)	Top-5 acc 75.391 (79.146)	lr 0.01739
Train [45][2160/3239]	Time 0.228 (0.590)	Data Time 0.001 (0.016)	Loss 2.8541 (2.7720)	Entropy 1.21560 (1.21870)	Top-1 acc 54.688 (57.376)	Top-5 acc 78.125 (79.144)	lr 0.01738
Train [45][2170/3239]	Time 0.231 (0.590)	Data Time 0.001 (0.016)	Loss 2.7234 (2.7720)	Entropy 1.21558 (1.21869)	Top-1 acc 58.203 (57.376)	Top-5 acc 81.250 (79.144)	lr 0.01738
Train [45][2180/3239]	Time 0.300 (0.589)	Data Time 0.001 (0.016)	Loss 2.7789 (2.7721)	Entropy 1.21557 (1.21867)	Top-1 acc 60.938 (57.372)	Top-5 acc 78.516 (79.138)	lr 0.01738
Train [45][2190/3239]	Time 0.212 (0.588)	Data Time 0.001 (0.016)	Loss 2.6930 (2.7720)	Entropy 1.21554 (1.21866)	Top-1 acc 60.547 (57.376)	Top-5 acc 81.641 (79.143)	lr 0.01738
Train [45][2200/3239]	Time 0.233 (0.588)	Data Time 0.001 (0.016)	Loss 2.8120 (2.7719)	Entropy 1.21554 (1.21864)	Top-1 acc 55.078 (57.380)	Top-5 acc 77.344 (79.142)	lr 0.01738
Train [45][2210/3239]	Time 2.496 (0.587)	Data Time 0.002 (0.016)	Loss 2.6915 (2.7718)	Entropy 1.21554 (1.21863)	Top-1 acc 58.594 (57.381)	Top-5 acc 81.641 (79.144)	lr 0.01738
Train [45][2220/3239]	Time 0.227 (0.586)	Data Time 0.001 (0.016)	Loss 2.9776 (2.7718)	Entropy 1.21550 (1.21861)	Top-1 acc 55.078 (57.384)	Top-5 acc 71.875 (79.142)	lr 0.01738
Train [45][2230/3239]	Time 0.231 (0.606)	Data Time 0.002 (0.016)	Loss 2.6235 (2.7718)	Entropy 1.21544 (1.21860)	Top-1 acc 58.984 (57.382)	Top-5 acc 83.594 (79.140)	lr 0.01738
Train [45][2240/3239]	Time 0.219 (0.605)	Data Time 0.002 (0.016)	Loss 2.8350 (2.7720)	Entropy 1.21543 (1.21859)	Top-1 acc 55.078 (57.376)	Top-5 acc 79.297 (79.143)	lr 0.01738
Train [45][2250/3239]	Time 0.223 (0.604)	Data Time 0.001 (0.016)	Loss 2.8233 (2.7719)	Entropy 1.21544 (1.21857)	Top-1 acc 56.641 (57.376)	Top-5 acc 80.078 (79.148)	lr 0.01738
Train [45][2260/3239]	Time 0.222 (0.604)	Data Time 0.001 (0.016)	Loss 2.7709 (2.7720)	Entropy 1.21545 (1.21856)	Top-1 acc 57.812 (57.379)	Top-5 acc 79.688 (79.145)	lr 0.01737
Train [45][2270/3239]	Time 0.241 (0.603)	Data Time 0.001 (0.016)	Loss 2.7448 (2.7718)	Entropy 1.21532 (1.21854)	Top-1 acc 53.906 (57.383)	Top-5 acc 81.250 (79.148)	lr 0.01737
Train [45][2280/3239]	Time 0.242 (0.603)	Data Time 0.001 (0.016)	Loss 2.6282 (2.7720)	Entropy 1.21530 (1.21853)	Top-1 acc 60.156 (57.381)	Top-5 acc 79.297 (79.144)	lr 0.01737
Train [45][2290/3239]	Time 0.213 (0.602)	Data Time 0.002 (0.016)	Loss 2.8729 (2.7721)	Entropy 1.21529 (1.21852)	Top-1 acc 53.516 (57.379)	Top-5 acc 76.953 (79.143)	lr 0.01737
Train [45][2300/3239]	Time 0.212 (0.601)	Data Time 0.001 (0.016)	Loss 2.7241 (2.7720)	Entropy 1.21526 (1.21850)	Top-1 acc 55.078 (57.378)	Top-5 acc 78.906 (79.143)	lr 0.01737
Train [45][2310/3239]	Time 0.254 (0.601)	Data Time 0.003 (0.016)	Loss 2.7993 (2.7719)	Entropy 1.21520 (1.21849)	Top-1 acc 57.422 (57.379)	Top-5 acc 78.125 (79.146)	lr 0.01737
Train [45][2320/3239]	Time 2.467 (0.600)	Data Time 0.001 (0.015)	Loss 2.6653 (2.7718)	Entropy 1.21520 (1.21847)	Top-1 acc 61.719 (57.384)	Top-5 acc 79.297 (79.148)	lr 0.01737
Train [45][2330/3239]	Time 0.223 (0.599)	Data Time 0.001 (0.015)	Loss 2.6873 (2.7717)	Entropy 1.21519 (1.21846)	Top-1 acc 57.812 (57.385)	Top-5 acc 80.078 (79.150)	lr 0.01737
Train [45][2340/3239]	Time 0.222 (0.598)	Data Time 0.002 (0.015)	Loss 2.5685 (2.7716)	Entropy 1.21534 (1.21845)	Top-1 acc 66.797 (57.389)	Top-5 acc 80.859 (79.151)	lr 0.01737
Train [45][2350/3239]	Time 0.232 (0.598)	Data Time 0.001 (0.015)	Loss 2.7884 (2.7716)	Entropy 1.21531 (1.21843)	Top-1 acc 54.297 (57.388)	Top-5 acc 78.125 (79.155)	lr 0.01737
Train [45][2360/3239]	Time 0.236 (0.597)	Data Time 0.001 (0.015)	Loss 2.7624 (2.7716)	Entropy 1.21530 (1.21842)	Top-1 acc 57.422 (57.386)	Top-5 acc 79.688 (79.156)	lr 0.01737
Train [45][2370/3239]	Time 0.352 (0.596)	Data Time 0.001 (0.015)	Loss 2.8337 (2.7715)	Entropy 1.21519 (1.21841)	Top-1 acc 51.172 (57.389)	Top-5 acc 82.031 (79.155)	lr 0.01736
Train [45][2380/3239]	Time 0.211 (0.596)	Data Time 0.001 (0.015)	Loss 2.8069 (2.7716)	Entropy 1.21517 (1.21839)	Top-1 acc 54.688 (57.385)	Top-5 acc 78.906 (79.153)	lr 0.01736
Train [45][2390/3239]	Time 0.221 (0.595)	Data Time 0.001 (0.015)	Loss 2.7122 (2.7716)	Entropy 1.21513 (1.21838)	Top-1 acc 60.156 (57.387)	Top-5 acc 81.250 (79.154)	lr 0.01736
Train [45][2400/3239]	Time 0.218 (0.595)	Data Time 0.001 (0.015)	Loss 2.7865 (2.7715)	Entropy 1.21511 (1.21837)	Top-1 acc 57.422 (57.387)	Top-5 acc 80.078 (79.154)	lr 0.01736
Train [45][2410/3239]	Time 0.213 (0.594)	Data Time 0.001 (0.015)	Loss 2.8420 (2.7713)	Entropy 1.21507 (1.21835)	Top-1 acc 55.078 (57.387)	Top-5 acc 78.125 (79.159)	lr 0.01736
Train [45][2420/3239]	Time 0.256 (0.594)	Data Time 0.001 (0.015)	Loss 2.7540 (2.7713)	Entropy 1.21505 (1.21834)	Top-1 acc 57.031 (57.384)	Top-5 acc 80.078 (79.161)	lr 0.01736
Train [45][2430/3239]	Time 2.413 (0.593)	Data Time 0.001 (0.015)	Loss 2.9250 (2.7714)	Entropy 1.21505 (1.21833)	Top-1 acc 53.125 (57.382)	Top-5 acc 75.000 (79.159)	lr 0.01736
Train [45][2440/3239]	Time 0.225 (0.591)	Data Time 0.001 (0.015)	Loss 2.7471 (2.7714)	Entropy 1.21502 (1.21831)	Top-1 acc 56.641 (57.381)	Top-5 acc 79.297 (79.157)	lr 0.01736
Train [45][2450/3239]	Time 0.233 (0.591)	Data Time 0.001 (0.015)	Loss 2.7741 (2.7714)	Entropy 1.21495 (1.21830)	Top-1 acc 57.422 (57.383)	Top-5 acc 77.734 (79.155)	lr 0.01736
Train [45][2460/3239]	Time 0.229 (0.590)	Data Time 0.001 (0.015)	Loss 2.8223 (2.7716)	Entropy 1.21487 (1.21828)	Top-1 acc 55.469 (57.376)	Top-5 acc 76.953 (79.149)	lr 0.01736
Train [45][2470/3239]	Time 0.342 (0.590)	Data Time 0.001 (0.015)	Loss 2.9063 (2.7717)	Entropy 1.21485 (1.21827)	Top-1 acc 57.031 (57.378)	Top-5 acc 76.172 (79.149)	lr 0.01736
Train [45][2480/3239]	Time 0.211 (0.589)	Data Time 0.001 (0.015)	Loss 2.9422 (2.7718)	Entropy 1.21482 (1.21826)	Top-1 acc 53.516 (57.374)	Top-5 acc 78.125 (79.146)	lr 0.01735
Train [45][2490/3239]	Time 0.219 (0.589)	Data Time 0.001 (0.015)	Loss 2.8812 (2.7720)	Entropy 1.21474 (1.21824)	Top-1 acc 55.859 (57.374)	Top-5 acc 76.953 (79.145)	lr 0.01735
Train [45][2500/3239]	Time 0.238 (0.588)	Data Time 0.001 (0.014)	Loss 2.6384 (2.7721)	Entropy 1.21474 (1.21823)	Top-1 acc 60.156 (57.374)	Top-5 acc 80.859 (79.139)	lr 0.01735
Train [45][2510/3239]	Time 0.216 (0.588)	Data Time 0.001 (0.014)	Loss 2.8274 (2.7721)	Entropy 1.21466 (1.21821)	Top-1 acc 56.641 (57.370)	Top-5 acc 76.172 (79.137)	lr 0.01735
Train [45][2520/3239]	Time 0.318 (0.587)	Data Time 0.001 (0.014)	Loss 2.9204 (2.7722)	Entropy 1.21489 (1.21820)	Top-1 acc 55.078 (57.367)	Top-5 acc 77.344 (79.135)	lr 0.01735
Train [45][2530/3239]	Time 0.223 (0.587)	Data Time 0.001 (0.014)	Loss 2.6324 (2.7723)	Entropy 1.21489 (1.21819)	Top-1 acc 60.547 (57.364)	Top-5 acc 81.250 (79.134)	lr 0.01735
Train [45][2540/3239]	Time 2.493 (0.586)	Data Time 0.001 (0.014)	Loss 2.9200 (2.7723)	Entropy 1.21489 (1.21817)	Top-1 acc 54.297 (57.361)	Top-5 acc 77.734 (79.137)	lr 0.01735
Train [45][2550/3239]	Time 0.203 (0.585)	Data Time 0.001 (0.014)	Loss 3.0425 (2.7724)	Entropy 1.21484 (1.21816)	Top-1 acc 54.688 (57.357)	Top-5 acc 73.438 (79.137)	lr 0.01735
Train [45][2560/3239]	Time 0.236 (0.584)	Data Time 0.001 (0.014)	Loss 2.7077 (2.7725)	Entropy 1.21464 (1.21815)	Top-1 acc 57.422 (57.354)	Top-5 acc 82.812 (79.132)	lr 0.01735
Train [45][2570/3239]	Time 0.288 (0.584)	Data Time 0.001 (0.014)	Loss 3.0177 (2.7727)	Entropy 1.21460 (1.21813)	Top-1 acc 51.172 (57.348)	Top-5 acc 73.047 (79.127)	lr 0.01735
Train [45][2580/3239]	Time 0.234 (0.583)	Data Time 0.001 (0.014)	Loss 2.7258 (2.7727)	Entropy 1.21455 (1.21812)	Top-1 acc 55.078 (57.347)	Top-5 acc 79.688 (79.128)	lr 0.01735
Train [45][2590/3239]	Time 0.288 (0.601)	Data Time 0.004 (0.014)	Loss 2.8831 (2.7728)	Entropy 1.21454 (1.21811)	Top-1 acc 53.906 (57.342)	Top-5 acc 76.172 (79.125)	lr 0.01734
Train [45][2600/3239]	Time 0.232 (0.600)	Data Time 0.002 (0.014)	Loss 2.7650 (2.7730)	Entropy 1.21450 (1.21809)	Top-1 acc 57.812 (57.338)	Top-5 acc 80.469 (79.120)	lr 0.01734
Train [45][2610/3239]	Time 0.214 (0.600)	Data Time 0.002 (0.014)	Loss 2.8289 (2.7731)	Entropy 1.21449 (1.21808)	Top-1 acc 56.250 (57.335)	Top-5 acc 76.562 (79.116)	lr 0.01734
Train [45][2620/3239]	Time 0.294 (0.599)	Data Time 0.001 (0.014)	Loss 2.5263 (2.7729)	Entropy 1.21449 (1.21806)	Top-1 acc 64.062 (57.341)	Top-5 acc 82.812 (79.117)	lr 0.01734
Train [45][2630/3239]	Time 0.224 (0.598)	Data Time 0.001 (0.014)	Loss 2.6598 (2.7728)	Entropy 1.21449 (1.21805)	Top-1 acc 61.719 (57.345)	Top-5 acc 81.250 (79.119)	lr 0.01734
Train [45][2640/3239]	Time 0.239 (0.598)	Data Time 0.001 (0.014)	Loss 2.9246 (2.7731)	Entropy 1.21446 (1.21804)	Top-1 acc 51.953 (57.336)	Top-5 acc 75.391 (79.112)	lr 0.01734
Train [45][2650/3239]	Time 0.252 (0.597)	Data Time 0.001 (0.014)	Loss 2.7094 (2.7732)	Entropy 1.21442 (1.21802)	Top-1 acc 59.375 (57.340)	Top-5 acc 79.688 (79.110)	lr 0.01734
Train [45][2660/3239]	Time 0.251 (0.597)	Data Time 0.029 (0.014)	Loss 2.7994 (2.7733)	Entropy 1.21431 (1.21801)	Top-1 acc 55.469 (57.337)	Top-5 acc 82.812 (79.109)	lr 0.01734
Train [45][2670/3239]	Time 0.349 (0.596)	Data Time 0.001 (0.014)	Loss 2.8363 (2.7732)	Entropy 1.21426 (1.21800)	Top-1 acc 55.078 (57.337)	Top-5 acc 77.344 (79.111)	lr 0.01734
Train [45][2680/3239]	Time 0.253 (0.596)	Data Time 0.001 (0.014)	Loss 2.8703 (2.7733)	Entropy 1.21421 (1.21798)	Top-1 acc 55.469 (57.336)	Top-5 acc 78.125 (79.105)	lr 0.01734
Train [45][2690/3239]	Time 0.262 (0.595)	Data Time 0.001 (0.014)	Loss 2.8733 (2.7734)	Entropy 1.21419 (1.21797)	Top-1 acc 57.422 (57.332)	Top-5 acc 75.000 (79.102)	lr 0.01733
Train [45][2700/3239]	Time 0.234 (0.595)	Data Time 0.001 (0.014)	Loss 2.6940 (2.7734)	Entropy 1.21417 (1.21795)	Top-1 acc 58.594 (57.334)	Top-5 acc 80.469 (79.103)	lr 0.01733
Train [45][2710/3239]	Time 0.236 (0.594)	Data Time 0.001 (0.014)	Loss 2.7871 (2.7734)	Entropy 1.21408 (1.21794)	Top-1 acc 57.422 (57.336)	Top-5 acc 76.953 (79.102)	lr 0.01733
Train [45][2720/3239]	Time 0.285 (0.594)	Data Time 0.001 (0.013)	Loss 2.7131 (2.7734)	Entropy 1.21402 (1.21793)	Top-1 acc 56.641 (57.334)	Top-5 acc 79.688 (79.098)	lr 0.01733
Train [45][2730/3239]	Time 0.262 (0.593)	Data Time 0.001 (0.013)	Loss 2.7848 (2.7734)	Entropy 1.21403 (1.21791)	Top-1 acc 58.203 (57.334)	Top-5 acc 78.906 (79.098)	lr 0.01733
Train [45][2740/3239]	Time 0.238 (0.593)	Data Time 0.001 (0.013)	Loss 2.5955 (2.7733)	Entropy 1.21402 (1.21790)	Top-1 acc 66.016 (57.337)	Top-5 acc 80.859 (79.100)	lr 0.01733
Train [45][2750/3239]	Time 0.219 (0.592)	Data Time 0.001 (0.013)	Loss 2.8695 (2.7733)	Entropy 1.21402 (1.21788)	Top-1 acc 52.344 (57.332)	Top-5 acc 76.953 (79.101)	lr 0.01733
Train [45][2760/3239]	Time 0.227 (0.592)	Data Time 0.001 (0.013)	Loss 2.8889 (2.7736)	Entropy 1.21395 (1.21787)	Top-1 acc 53.125 (57.321)	Top-5 acc 76.172 (79.097)	lr 0.01733
Train [45][2770/3239]	Time 0.204 (0.591)	Data Time 0.001 (0.013)	Loss 2.9671 (2.7737)	Entropy 1.21396 (1.21786)	Top-1 acc 50.781 (57.318)	Top-5 acc 74.609 (79.094)	lr 0.01733
Train [45][2780/3239]	Time 0.223 (0.591)	Data Time 0.001 (0.013)	Loss 2.7205 (2.7738)	Entropy 1.21388 (1.21784)	Top-1 acc 55.859 (57.314)	Top-5 acc 80.859 (79.089)	lr 0.01733
Train [45][2790/3239]	Time 0.219 (0.590)	Data Time 0.001 (0.013)	Loss 2.8723 (2.7736)	Entropy 1.21387 (1.21783)	Top-1 acc 55.469 (57.318)	Top-5 acc 78.125 (79.091)	lr 0.01733
Train [45][2800/3239]	Time 0.286 (0.590)	Data Time 0.001 (0.013)	Loss 2.9140 (2.7739)	Entropy 1.21382 (1.21781)	Top-1 acc 51.953 (57.308)	Top-5 acc 77.734 (79.088)	lr 0.01732
Train [45][2810/3239]	Time 0.298 (0.589)	Data Time 0.001 (0.013)	Loss 2.7847 (2.7738)	Entropy 1.21375 (1.21780)	Top-1 acc 55.078 (57.309)	Top-5 acc 78.906 (79.089)	lr 0.01732
Train [45][2820/3239]	Time 0.211 (0.589)	Data Time 0.001 (0.013)	Loss 2.9113 (2.7738)	Entropy 1.21370 (1.21778)	Top-1 acc 53.516 (57.311)	Top-5 acc 75.391 (79.087)	lr 0.01732
Train [45][2830/3239]	Time 0.235 (0.588)	Data Time 0.001 (0.013)	Loss 2.8826 (2.7740)	Entropy 1.21370 (1.21777)	Top-1 acc 57.812 (57.307)	Top-5 acc 75.391 (79.084)	lr 0.01732
Train [45][2840/3239]	Time 0.245 (0.588)	Data Time 0.001 (0.013)	Loss 2.6519 (2.7742)	Entropy 1.21362 (1.21776)	Top-1 acc 58.594 (57.301)	Top-5 acc 83.203 (79.082)	lr 0.01732
Train [45][2850/3239]	Time 0.270 (0.588)	Data Time 0.001 (0.013)	Loss 2.9843 (2.7742)	Entropy 1.21357 (1.21774)	Top-1 acc 51.562 (57.302)	Top-5 acc 73.828 (79.081)	lr 0.01732
Train [45][2860/3239]	Time 0.349 (0.587)	Data Time 0.002 (0.013)	Loss 2.6991 (2.7743)	Entropy 1.21361 (1.21773)	Top-1 acc 58.594 (57.303)	Top-5 acc 79.297 (79.077)	lr 0.01732
Train [45][2870/3239]	Time 0.251 (0.587)	Data Time 0.001 (0.013)	Loss 2.7122 (2.7744)	Entropy 1.21351 (1.21771)	Top-1 acc 58.594 (57.301)	Top-5 acc 78.125 (79.074)	lr 0.01732
Train [45][2880/3239]	Time 0.271 (0.586)	Data Time 0.001 (0.013)	Loss 2.7391 (2.7746)	Entropy 1.21351 (1.21770)	Top-1 acc 59.375 (57.295)	Top-5 acc 80.078 (79.073)	lr 0.01732
Train [45][2890/3239]	Time 0.247 (0.586)	Data Time 0.001 (0.013)	Loss 2.7931 (2.7749)	Entropy 1.21349 (1.21768)	Top-1 acc 55.859 (57.285)	Top-5 acc 76.953 (79.068)	lr 0.01732
Train [45][2900/3239]	Time 0.221 (0.586)	Data Time 0.001 (0.013)	Loss 2.8116 (2.7749)	Entropy 1.21341 (1.21767)	Top-1 acc 58.594 (57.287)	Top-5 acc 78.906 (79.068)	lr 0.01732
Train [45][2910/3239]	Time 0.359 (0.585)	Data Time 0.001 (0.013)	Loss 2.9317 (2.7749)	Entropy 1.21342 (1.21765)	Top-1 acc 53.516 (57.287)	Top-5 acc 74.219 (79.069)	lr 0.01731
Train [45][2920/3239]	Time 0.288 (0.600)	Data Time 0.004 (0.013)	Loss 2.8854 (2.7749)	Entropy 1.21336 (1.21764)	Top-1 acc 55.859 (57.289)	Top-5 acc 78.516 (79.067)	lr 0.01731
Train [45][2930/3239]	Time 0.235 (0.600)	Data Time 0.002 (0.013)	Loss 2.9609 (2.7749)	Entropy 1.21336 (1.21763)	Top-1 acc 54.297 (57.288)	Top-5 acc 74.219 (79.066)	lr 0.01731
Train [45][2940/3239]	Time 0.246 (0.599)	Data Time 0.002 (0.013)	Loss 2.9667 (2.7750)	Entropy 1.21329 (1.21761)	Top-1 acc 56.250 (57.288)	Top-5 acc 72.266 (79.061)	lr 0.01731
Train [45][2950/3239]	Time 0.257 (0.599)	Data Time 0.002 (0.013)	Loss 2.7460 (2.7750)	Entropy 1.21321 (1.21760)	Top-1 acc 58.594 (57.289)	Top-5 acc 79.297 (79.062)	lr 0.01731
Train [45][2960/3239]	Time 0.334 (0.598)	Data Time 0.001 (0.013)	Loss 2.6415 (2.7749)	Entropy 1.21317 (1.21758)	Top-1 acc 58.594 (57.291)	Top-5 acc 81.641 (79.061)	lr 0.01731
Train [45][2970/3239]	Time 0.226 (0.598)	Data Time 0.001 (0.012)	Loss 2.8016 (2.7749)	Entropy 1.21318 (1.21757)	Top-1 acc 58.203 (57.294)	Top-5 acc 77.734 (79.061)	lr 0.01731
Train [45][2980/3239]	Time 0.226 (0.597)	Data Time 0.001 (0.012)	Loss 2.7052 (2.7749)	Entropy 1.21316 (1.21755)	Top-1 acc 59.766 (57.294)	Top-5 acc 80.859 (79.062)	lr 0.01731
Train [45][2990/3239]	Time 0.236 (0.597)	Data Time 0.001 (0.012)	Loss 2.6824 (2.7748)	Entropy 1.21314 (1.21754)	Top-1 acc 59.375 (57.294)	Top-5 acc 80.469 (79.062)	lr 0.01731
Train [45][3000/3239]	Time 0.239 (0.597)	Data Time 0.002 (0.012)	Loss 2.9369 (2.7749)	Entropy 1.21312 (1.21752)	Top-1 acc 53.906 (57.294)	Top-5 acc 76.953 (79.061)	lr 0.01731
Train [45][3010/3239]	Time 0.265 (0.596)	Data Time 0.002 (0.012)	Loss 2.8446 (2.7748)	Entropy 1.21310 (1.21751)	Top-1 acc 57.031 (57.296)	Top-5 acc 79.688 (79.061)	lr 0.01730
Train [45][3020/3239]	Time 0.225 (0.596)	Data Time 0.001 (0.012)	Loss 2.7759 (2.7748)	Entropy 1.21304 (1.21749)	Top-1 acc 56.250 (57.292)	Top-5 acc 79.688 (79.060)	lr 0.01730
Train [45][3030/3239]	Time 0.240 (0.595)	Data Time 0.002 (0.012)	Loss 2.8528 (2.7749)	Entropy 1.21299 (1.21748)	Top-1 acc 53.125 (57.287)	Top-5 acc 77.734 (79.060)	lr 0.01730
Train [45][3040/3239]	Time 0.241 (0.595)	Data Time 0.002 (0.012)	Loss 2.7455 (2.7748)	Entropy 1.21296 (1.21746)	Top-1 acc 58.984 (57.288)	Top-5 acc 76.953 (79.060)	lr 0.01730
Train [45][3050/3239]	Time 0.377 (0.594)	Data Time 0.001 (0.012)	Loss 2.6685 (2.7750)	Entropy 1.21296 (1.21745)	Top-1 acc 58.203 (57.286)	Top-5 acc 80.078 (79.055)	lr 0.01730
Train [45][3060/3239]	Time 0.234 (0.594)	Data Time 0.001 (0.012)	Loss 2.8685 (2.7751)	Entropy 1.21287 (1.21743)	Top-1 acc 53.516 (57.278)	Top-5 acc 74.609 (79.050)	lr 0.01730
Train [45][3070/3239]	Time 0.246 (0.593)	Data Time 0.001 (0.012)	Loss 2.9725 (2.7751)	Entropy 1.21289 (1.21742)	Top-1 acc 50.391 (57.280)	Top-5 acc 75.391 (79.050)	lr 0.01730
Train [45][3080/3239]	Time 0.208 (0.593)	Data Time 0.001 (0.012)	Loss 2.9173 (2.7752)	Entropy 1.21287 (1.21740)	Top-1 acc 52.734 (57.277)	Top-5 acc 76.953 (79.047)	lr 0.01730
Train [45][3090/3239]	Time 0.305 (0.593)	Data Time 0.001 (0.012)	Loss 2.8100 (2.7754)	Entropy 1.21283 (1.21739)	Top-1 acc 53.125 (57.270)	Top-5 acc 76.562 (79.040)	lr 0.01730
Train [45][3100/3239]	Time 0.337 (0.592)	Data Time 0.001 (0.012)	Loss 2.7224 (2.7754)	Entropy 1.21280 (1.21738)	Top-1 acc 57.812 (57.270)	Top-5 acc 79.688 (79.040)	lr 0.01730
Train [45][3110/3239]	Time 0.264 (0.592)	Data Time 0.002 (0.012)	Loss 2.5770 (2.7753)	Entropy 1.21282 (1.21736)	Top-1 acc 62.109 (57.273)	Top-5 acc 85.156 (79.044)	lr 0.01730
Train [45][3120/3239]	Time 0.257 (0.591)	Data Time 0.001 (0.012)	Loss 2.8245 (2.7754)	Entropy 1.21275 (1.21735)	Top-1 acc 57.812 (57.271)	Top-5 acc 76.172 (79.041)	lr 0.01729
Train [45][3130/3239]	Time 0.213 (0.591)	Data Time 0.001 (0.012)	Loss 2.7234 (2.7753)	Entropy 1.21272 (1.21733)	Top-1 acc 58.203 (57.271)	Top-5 acc 78.125 (79.039)	lr 0.01729
Train [45][3140/3239]	Time 0.220 (0.590)	Data Time 0.001 (0.012)	Loss 2.8112 (2.7756)	Entropy 1.21264 (1.21732)	Top-1 acc 55.469 (57.266)	Top-5 acc 78.125 (79.034)	lr 0.01729
Train [45][3150/3239]	Time 0.364 (0.590)	Data Time 0.001 (0.012)	Loss 2.9416 (2.7756)	Entropy 1.21261 (1.21730)	Top-1 acc 52.344 (57.266)	Top-5 acc 78.516 (79.032)	lr 0.01729
Train [45][3160/3239]	Time 0.232 (0.590)	Data Time 0.001 (0.012)	Loss 2.8362 (2.7756)	Entropy 1.21261 (1.21729)	Top-1 acc 56.250 (57.265)	Top-5 acc 77.344 (79.034)	lr 0.01729
Train [45][3170/3239]	Time 0.241 (0.589)	Data Time 0.001 (0.012)	Loss 2.7545 (2.7755)	Entropy 1.21256 (1.21727)	Top-1 acc 54.297 (57.270)	Top-5 acc 79.688 (79.035)	lr 0.01729
Train [45][3180/3239]	Time 0.224 (0.589)	Data Time 0.000 (0.012)	Loss 2.6091 (2.7755)	Entropy 1.21252 (1.21726)	Top-1 acc 63.672 (57.269)	Top-5 acc 82.031 (79.034)	lr 0.01729
Train [45][3190/3239]	Time 0.215 (0.588)	Data Time 0.000 (0.012)	Loss 2.7585 (2.7755)	Entropy 1.21251 (1.21724)	Top-1 acc 58.203 (57.271)	Top-5 acc 80.469 (79.035)	lr 0.01729
Train [45][3200/3239]	Time 0.309 (0.588)	Data Time 0.000 (0.012)	Loss 2.7318 (2.7754)	Entropy 1.21249 (1.21723)	Top-1 acc 60.938 (57.274)	Top-5 acc 78.516 (79.037)	lr 0.01729
Train [45][3210/3239]	Time 0.223 (0.587)	Data Time 0.000 (0.012)	Loss 2.8354 (2.7755)	Entropy 1.21244 (1.21721)	Top-1 acc 58.984 (57.273)	Top-5 acc 76.172 (79.035)	lr 0.01729
Train [45][3220/3239]	Time 0.206 (0.587)	Data Time 0.000 (0.012)	Loss 2.8147 (2.7754)	Entropy 1.21242 (1.21720)	Top-1 acc 54.297 (57.272)	Top-5 acc 80.469 (79.035)	lr 0.01729
Train [45][3230/3239]	Time 0.207 (0.586)	Data Time 0.000 (0.012)	Loss 2.8685 (2.7754)	Entropy 1.21237 (1.21718)	Top-1 acc 53.906 (57.271)	Top-5 acc 75.391 (79.034)	lr 0.01728
Train [45][3239/3239]	Time 2.326 (0.586)	Data Time 0.000 (0.012)	Loss 2.9336 (2.7754)	Entropy 1.21237 (1.21717)	Top-1 acc 50.617 (57.274)	Top-5 acc 79.012 (79.035)	lr 0.01728
==========Valid [45/120]	loss 1.615	top-1 acc 63.558 (63.558)	top-5 acc 84.579	Train top-1 57.274	top-5 79.035	Entropy 1.21237	Latency-None: 0.000ms	Flops: 548.34M
Train [46][0/3239]	Time 35.100 (35.100)	Data Time 33.539 (33.539)	Loss 2.6329 (2.6329)	Entropy 1.21232 (1.21232)	Top-1 acc 64.453 (64.453)	Top-5 acc 82.812 (82.812)	lr 0.01728
Train [46][10/3239]	Time 53.148 (8.272)	Data Time 0.002 (3.051)	Loss 2.7761 (2.6515)	Entropy 1.21232 (1.21232)	Top-1 acc 60.156 (61.080)	Top-5 acc 78.516 (81.108)	lr 0.01728
Train [46][20/3239]	Time 0.254 (4.468)	Data Time 0.002 (1.599)	Loss 2.8092 (2.7050)	Entropy 1.21220 (1.21226)	Top-1 acc 57.422 (59.505)	Top-5 acc 77.734 (80.115)	lr 0.01728
Train [46][30/3239]	Time 0.219 (3.173)	Data Time 0.001 (1.084)	Loss 2.6279 (2.7116)	Entropy 1.21215 (1.21223)	Top-1 acc 59.766 (58.934)	Top-5 acc 83.594 (79.940)	lr 0.01728
Train [46][40/3239]	Time 0.220 (2.511)	Data Time 0.001 (0.820)	Loss 2.6984 (2.7111)	Entropy 1.21211 (1.21220)	Top-1 acc 59.375 (59.013)	Top-5 acc 78.906 (79.926)	lr 0.01728
Train [46][50/3239]	Time 0.248 (2.109)	Data Time 0.002 (0.660)	Loss 2.9766 (2.7204)	Entropy 1.21210 (1.21218)	Top-1 acc 49.609 (58.770)	Top-5 acc 75.781 (79.733)	lr 0.01728
Train [46][60/3239]	Time 0.212 (1.838)	Data Time 0.001 (0.552)	Loss 2.7241 (2.7176)	Entropy 1.21202 (1.21216)	Top-1 acc 56.641 (58.728)	Top-5 acc 80.469 (79.796)	lr 0.01728
Train [46][70/3239]	Time 0.211 (1.647)	Data Time 0.001 (0.474)	Loss 2.5423 (2.7139)	Entropy 1.21195 (1.21214)	Top-1 acc 62.891 (58.836)	Top-5 acc 85.156 (79.875)	lr 0.01728
Train [46][80/3239]	Time 0.219 (1.498)	Data Time 0.001 (0.416)	Loss 2.7274 (2.7157)	Entropy 1.21187 (1.21211)	Top-1 acc 60.547 (58.806)	Top-5 acc 79.688 (79.847)	lr 0.01728
Train [46][90/3239]	Time 0.218 (1.385)	Data Time 0.001 (0.370)	Loss 2.8338 (2.7171)	Entropy 1.21186 (1.21208)	Top-1 acc 58.203 (58.654)	Top-5 acc 76.953 (79.872)	lr 0.01728
Train [46][100/3239]	Time 0.239 (1.295)	Data Time 0.001 (0.334)	Loss 2.7357 (2.7180)	Entropy 1.21181 (1.21206)	Top-1 acc 60.938 (58.783)	Top-5 acc 78.516 (79.869)	lr 0.01727
Train [46][110/3239]	Time 0.281 (1.223)	Data Time 0.001 (0.304)	Loss 2.7410 (2.7202)	Entropy 1.21177 (1.21204)	Top-1 acc 59.766 (58.682)	Top-5 acc 78.516 (79.818)	lr 0.01727
Train [46][120/3239]	Time 2.538 (1.161)	Data Time 0.002 (0.279)	Loss 2.7534 (2.7213)	Entropy 1.21177 (1.21202)	Top-1 acc 54.297 (58.526)	Top-5 acc 80.078 (79.878)	lr 0.01727
Train [46][130/3239]	Time 0.267 (1.090)	Data Time 0.001 (0.258)	Loss 2.6295 (2.7218)	Entropy 1.21178 (1.21200)	Top-1 acc 61.719 (58.591)	Top-5 acc 81.641 (79.935)	lr 0.01727
Train [46][140/3239]	Time 0.236 (1.045)	Data Time 0.001 (0.240)	Loss 2.8131 (2.7222)	Entropy 1.21177 (1.21198)	Top-1 acc 55.859 (58.513)	Top-5 acc 78.125 (79.953)	lr 0.01727
Train [46][150/3239]	Time 0.243 (1.007)	Data Time 0.001 (0.224)	Loss 2.6116 (2.7216)	Entropy 1.21172 (1.21196)	Top-1 acc 59.375 (58.552)	Top-5 acc 84.766 (79.985)	lr 0.01727
Train [46][160/3239]	Time 0.298 (0.974)	Data Time 0.001 (0.210)	Loss 2.7647 (2.7230)	Entropy 1.21158 (1.21194)	Top-1 acc 55.859 (58.482)	Top-5 acc 80.078 (79.964)	lr 0.01727
Train [46][170/3239]	Time 0.222 (0.943)	Data Time 0.001 (0.198)	Loss 2.7143 (2.7283)	Entropy 1.21172 (1.21193)	Top-1 acc 55.078 (58.297)	Top-5 acc 78.516 (79.886)	lr 0.01727
Train [46][180/3239]	Time 0.220 (0.916)	Data Time 0.001 (0.187)	Loss 2.7362 (2.7303)	Entropy 1.21171 (1.21191)	Top-1 acc 58.984 (58.257)	Top-5 acc 79.688 (79.882)	lr 0.01727
Train [46][190/3239]	Time 0.227 (0.891)	Data Time 0.001 (0.177)	Loss 2.5362 (2.7276)	Entropy 1.21168 (1.21190)	Top-1 acc 60.156 (58.338)	Top-5 acc 85.156 (79.939)	lr 0.01727
Train [46][200/3239]	Time 0.191 (0.870)	Data Time 0.001 (0.169)	Loss 2.8941 (2.7268)	Entropy 1.21163 (1.21189)	Top-1 acc 52.734 (58.353)	Top-5 acc 75.000 (79.956)	lr 0.01726
Train [46][210/3239]	Time 0.312 (0.850)	Data Time 0.001 (0.161)	Loss 2.5179 (2.7242)	Entropy 1.21163 (1.21188)	Top-1 acc 65.234 (58.407)	Top-5 acc 83.594 (80.006)	lr 0.01726
Train [46][220/3239]	Time 0.228 (0.832)	Data Time 0.001 (0.153)	Loss 2.7776 (2.7240)	Entropy 1.21159 (1.21187)	Top-1 acc 56.250 (58.428)	Top-5 acc 79.297 (80.032)	lr 0.01726
Train [46][230/3239]	Time 2.448 (0.817)	Data Time 0.001 (0.147)	Loss 2.7955 (2.7266)	Entropy 1.21159 (1.21186)	Top-1 acc 53.906 (58.352)	Top-5 acc 80.859 (79.967)	lr 0.01726
Train [46][240/3239]	Time 0.222 (0.792)	Data Time 0.001 (0.141)	Loss 2.9631 (2.7272)	Entropy 1.21160 (1.21185)	Top-1 acc 53.906 (58.349)	Top-5 acc 77.734 (79.958)	lr 0.01726
Train [46][250/3239]	Time 0.217 (0.779)	Data Time 0.001 (0.135)	Loss 2.7870 (2.7283)	Entropy 1.21153 (1.21183)	Top-1 acc 57.422 (58.289)	Top-5 acc 79.297 (79.938)	lr 0.01726
Train [46][260/3239]	Time 0.223 (0.767)	Data Time 0.001 (0.130)	Loss 2.6951 (2.7290)	Entropy 1.21152 (1.21182)	Top-1 acc 58.594 (58.256)	Top-5 acc 80.078 (79.960)	lr 0.01726
Train [46][270/3239]	Time 0.231 (0.755)	Data Time 0.001 (0.125)	Loss 2.9376 (2.7289)	Entropy 1.21147 (1.21181)	Top-1 acc 53.125 (58.238)	Top-5 acc 74.219 (79.956)	lr 0.01726
Train [46][280/3239]	Time 0.234 (0.744)	Data Time 0.001 (0.121)	Loss 2.6965 (2.7277)	Entropy 1.21147 (1.21180)	Top-1 acc 58.984 (58.280)	Top-5 acc 80.078 (79.943)	lr 0.01726
Train [46][290/3239]	Time 0.184 (0.734)	Data Time 0.001 (0.117)	Loss 2.7682 (2.7279)	Entropy 1.21147 (1.21179)	Top-1 acc 58.203 (58.262)	Top-5 acc 78.125 (79.972)	lr 0.01726
Train [46][300/3239]	Time 0.225 (0.725)	Data Time 0.001 (0.113)	Loss 2.6994 (2.7282)	Entropy 1.21142 (1.21177)	Top-1 acc 62.109 (58.265)	Top-5 acc 79.297 (79.947)	lr 0.01726
Train [46][310/3239]	Time 0.220 (0.717)	Data Time 0.001 (0.110)	Loss 2.7334 (2.7307)	Entropy 1.21136 (1.21176)	Top-1 acc 59.766 (58.233)	Top-5 acc 78.906 (79.916)	lr 0.01725
Train [46][320/3239]	Time 0.208 (0.708)	Data Time 0.001 (0.106)	Loss 2.6153 (2.7296)	Entropy 1.21135 (1.21175)	Top-1 acc 60.938 (58.270)	Top-5 acc 82.031 (79.947)	lr 0.01725
Train [46][330/3239]	Time 0.229 (0.701)	Data Time 0.002 (0.103)	Loss 2.8644 (2.7290)	Entropy 1.21133 (1.21174)	Top-1 acc 57.422 (58.303)	Top-5 acc 77.344 (79.965)	lr 0.01725
Train [46][340/3239]	Time 2.357 (0.694)	Data Time 0.001 (0.100)	Loss 2.8145 (2.7298)	Entropy 1.21133 (1.21173)	Top-1 acc 53.906 (58.279)	Top-5 acc 78.125 (79.927)	lr 0.01725
Train [46][350/3239]	Time 0.243 (0.681)	Data Time 0.001 (0.097)	Loss 2.7531 (2.7304)	Entropy 1.21129 (1.21171)	Top-1 acc 57.812 (58.275)	Top-5 acc 78.516 (79.915)	lr 0.01725
Train [46][360/3239]	Time 0.224 (0.675)	Data Time 0.001 (0.095)	Loss 2.7265 (2.7312)	Entropy 1.21107 (1.21170)	Top-1 acc 57.422 (58.241)	Top-5 acc 81.250 (79.904)	lr 0.01725
Train [46][370/3239]	Time 0.246 (0.668)	Data Time 0.001 (0.092)	Loss 2.6681 (2.7325)	Entropy 1.21098 (1.21168)	Top-1 acc 60.156 (58.186)	Top-5 acc 82.422 (79.888)	lr 0.01725
Train [46][380/3239]	Time 0.366 (0.781)	Data Time 0.003 (0.090)	Loss 2.8041 (2.7322)	Entropy 1.21092 (1.21166)	Top-1 acc 59.766 (58.231)	Top-5 acc 77.344 (79.896)	lr 0.01725
Train [46][390/3239]	Time 0.230 (0.776)	Data Time 0.002 (0.088)	Loss 2.9406 (2.7324)	Entropy 1.21088 (1.21164)	Top-1 acc 53.125 (58.251)	Top-5 acc 77.344 (79.877)	lr 0.01725
Train [46][400/3239]	Time 0.200 (0.768)	Data Time 0.001 (0.085)	Loss 2.8403 (2.7333)	Entropy 1.21088 (1.21162)	Top-1 acc 57.422 (58.230)	Top-5 acc 76.562 (79.861)	lr 0.01725
Train [46][410/3239]	Time 0.235 (0.761)	Data Time 0.002 (0.084)	Loss 2.4370 (2.7331)	Entropy 1.21084 (1.21160)	Top-1 acc 67.188 (58.253)	Top-5 acc 87.109 (79.881)	lr 0.01725
Train [46][420/3239]	Time 0.264 (0.754)	Data Time 0.001 (0.082)	Loss 2.9867 (2.7325)	Entropy 1.21083 (1.21158)	Top-1 acc 50.000 (58.276)	Top-5 acc 74.609 (79.893)	lr 0.01724
Train [46][430/3239]	Time 0.269 (0.747)	Data Time 0.001 (0.080)	Loss 2.7875 (2.7330)	Entropy 1.21083 (1.21156)	Top-1 acc 51.172 (58.259)	Top-5 acc 80.859 (79.891)	lr 0.01724
Train [46][440/3239]	Time 0.218 (0.741)	Data Time 0.002 (0.078)	Loss 2.6372 (2.7334)	Entropy 1.21084 (1.21155)	Top-1 acc 57.031 (58.256)	Top-5 acc 81.641 (79.897)	lr 0.01724
Train [46][450/3239]	Time 2.457 (0.734)	Data Time 0.001 (0.076)	Loss 2.7627 (2.7340)	Entropy 1.21084 (1.21153)	Top-1 acc 61.328 (58.255)	Top-5 acc 78.125 (79.897)	lr 0.01724
Train [46][460/3239]	Time 0.207 (0.724)	Data Time 0.001 (0.075)	Loss 2.7853 (2.7344)	Entropy 1.21079 (1.21152)	Top-1 acc 54.297 (58.224)	Top-5 acc 80.859 (79.885)	lr 0.01724
Train [46][470/3239]	Time 0.236 (0.718)	Data Time 0.001 (0.073)	Loss 2.7024 (2.7343)	Entropy 1.21074 (1.21150)	Top-1 acc 60.547 (58.215)	Top-5 acc 81.641 (79.876)	lr 0.01724
Train [46][480/3239]	Time 0.231 (0.712)	Data Time 0.002 (0.072)	Loss 2.7810 (2.7354)	Entropy 1.21069 (1.21148)	Top-1 acc 59.766 (58.188)	Top-5 acc 78.906 (79.863)	lr 0.01724
Train [46][490/3239]	Time 0.229 (0.707)	Data Time 0.001 (0.070)	Loss 2.7693 (2.7367)	Entropy 1.21068 (1.21147)	Top-1 acc 57.031 (58.165)	Top-5 acc 81.250 (79.830)	lr 0.01724
Train [46][500/3239]	Time 0.312 (0.702)	Data Time 0.001 (0.069)	Loss 2.7484 (2.7356)	Entropy 1.21067 (1.21145)	Top-1 acc 60.938 (58.207)	Top-5 acc 81.641 (79.836)	lr 0.01724
Train [46][510/3239]	Time 0.238 (0.697)	Data Time 0.001 (0.068)	Loss 2.7788 (2.7359)	Entropy 1.21065 (1.21144)	Top-1 acc 55.078 (58.179)	Top-5 acc 78.516 (79.837)	lr 0.01724
Train [46][520/3239]	Time 0.218 (0.693)	Data Time 0.001 (0.066)	Loss 2.7533 (2.7366)	Entropy 1.21065 (1.21142)	Top-1 acc 55.859 (58.147)	Top-5 acc 78.906 (79.825)	lr 0.01723
Train [46][530/3239]	Time 0.229 (0.688)	Data Time 0.003 (0.065)	Loss 2.8106 (2.7386)	Entropy 1.21064 (1.21141)	Top-1 acc 58.594 (58.100)	Top-5 acc 76.562 (79.789)	lr 0.01723
Train [46][540/3239]	Time 0.206 (0.684)	Data Time 0.001 (0.064)	Loss 2.7307 (2.7387)	Entropy 1.21061 (1.21139)	Top-1 acc 57.031 (58.094)	Top-5 acc 79.688 (79.795)	lr 0.01723
Train [46][550/3239]	Time 0.305 (0.680)	Data Time 0.001 (0.063)	Loss 2.8618 (2.7389)	Entropy 1.21059 (1.21138)	Top-1 acc 55.859 (58.078)	Top-5 acc 76.562 (79.778)	lr 0.01723
Train [46][560/3239]	Time 2.487 (0.676)	Data Time 0.001 (0.062)	Loss 2.6910 (2.7393)	Entropy 1.21059 (1.21136)	Top-1 acc 60.938 (58.079)	Top-5 acc 82.812 (79.779)	lr 0.01723
Train [46][570/3239]	Time 0.213 (0.668)	Data Time 0.001 (0.061)	Loss 2.6167 (2.7399)	Entropy 1.21057 (1.21135)	Top-1 acc 58.984 (58.084)	Top-5 acc 81.250 (79.756)	lr 0.01723
Train [46][580/3239]	Time 0.222 (0.665)	Data Time 0.001 (0.060)	Loss 2.7975 (2.7400)	Entropy 1.21050 (1.21134)	Top-1 acc 54.297 (58.090)	Top-5 acc 80.078 (79.747)	lr 0.01723
Train [46][590/3239]	Time 0.206 (0.661)	Data Time 0.001 (0.059)	Loss 2.8578 (2.7403)	Entropy 1.21046 (1.21132)	Top-1 acc 59.766 (58.091)	Top-5 acc 77.344 (79.748)	lr 0.01723
Train [46][600/3239]	Time 0.217 (0.657)	Data Time 0.001 (0.058)	Loss 2.7361 (2.7399)	Entropy 1.21039 (1.21131)	Top-1 acc 60.938 (58.108)	Top-5 acc 76.562 (79.756)	lr 0.01723
Train [46][610/3239]	Time 0.220 (0.654)	Data Time 0.001 (0.057)	Loss 2.6056 (2.7393)	Entropy 1.21037 (1.21129)	Top-1 acc 60.156 (58.109)	Top-5 acc 80.078 (79.760)	lr 0.01723
Train [46][620/3239]	Time 0.238 (0.651)	Data Time 0.001 (0.056)	Loss 2.8556 (2.7392)	Entropy 1.21026 (1.21127)	Top-1 acc 53.516 (58.108)	Top-5 acc 75.000 (79.759)	lr 0.01723
Train [46][630/3239]	Time 0.219 (0.648)	Data Time 0.001 (0.055)	Loss 2.7313 (2.7390)	Entropy 1.21021 (1.21126)	Top-1 acc 61.719 (58.107)	Top-5 acc 80.078 (79.772)	lr 0.01722
Train [46][640/3239]	Time 0.225 (0.645)	Data Time 0.001 (0.054)	Loss 2.8871 (2.7398)	Entropy 1.21019 (1.21124)	Top-1 acc 53.906 (58.076)	Top-5 acc 79.688 (79.750)	lr 0.01722
Train [46][650/3239]	Time 0.251 (0.643)	Data Time 0.002 (0.053)	Loss 2.5415 (2.7399)	Entropy 1.21014 (1.21123)	Top-1 acc 59.766 (58.054)	Top-5 acc 82.031 (79.758)	lr 0.01722
Train [46][660/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.053)	Loss 2.8555 (2.7405)	Entropy 1.21009 (1.21121)	Top-1 acc 53.516 (58.052)	Top-5 acc 78.516 (79.744)	lr 0.01722
Train [46][670/3239]	Time 2.431 (0.637)	Data Time 0.001 (0.052)	Loss 2.9397 (2.7409)	Entropy 1.21009 (1.21119)	Top-1 acc 54.688 (58.055)	Top-5 acc 76.562 (79.732)	lr 0.01722
Train [46][680/3239]	Time 0.204 (0.631)	Data Time 0.001 (0.051)	Loss 2.6790 (2.7410)	Entropy 1.21009 (1.21118)	Top-1 acc 60.938 (58.061)	Top-5 acc 81.250 (79.727)	lr 0.01722
Train [46][690/3239]	Time 0.249 (0.629)	Data Time 0.001 (0.050)	Loss 2.7610 (2.7411)	Entropy 1.21001 (1.21116)	Top-1 acc 58.984 (58.073)	Top-5 acc 76.562 (79.726)	lr 0.01722
Train [46][700/3239]	Time 0.205 (0.626)	Data Time 0.001 (0.050)	Loss 2.8920 (2.7423)	Entropy 1.20997 (1.21114)	Top-1 acc 52.344 (58.041)	Top-5 acc 76.172 (79.715)	lr 0.01722
Train [46][710/3239]	Time 0.226 (0.624)	Data Time 0.001 (0.049)	Loss 2.8599 (2.7422)	Entropy 1.20994 (1.21113)	Top-1 acc 54.297 (58.029)	Top-5 acc 76.172 (79.722)	lr 0.01722
Train [46][720/3239]	Time 0.240 (0.622)	Data Time 0.001 (0.048)	Loss 2.8070 (2.7412)	Entropy 1.20992 (1.21111)	Top-1 acc 57.812 (58.053)	Top-5 acc 79.688 (79.743)	lr 0.01722
Train [46][730/3239]	Time 0.212 (0.619)	Data Time 0.001 (0.048)	Loss 2.6636 (2.7412)	Entropy 1.20991 (1.21109)	Top-1 acc 59.375 (58.045)	Top-5 acc 80.469 (79.739)	lr 0.01722
Train [46][740/3239]	Time 0.404 (0.680)	Data Time 0.003 (0.047)	Loss 2.7147 (2.7408)	Entropy 1.20982 (1.21108)	Top-1 acc 59.375 (58.063)	Top-5 acc 79.688 (79.739)	lr 0.01721
Train [46][750/3239]	Time 0.226 (0.677)	Data Time 0.002 (0.046)	Loss 2.8781 (2.7403)	Entropy 1.20974 (1.21106)	Top-1 acc 58.984 (58.088)	Top-5 acc 76.562 (79.746)	lr 0.01721
Train [46][760/3239]	Time 0.219 (0.674)	Data Time 0.001 (0.046)	Loss 2.7939 (2.7399)	Entropy 1.20968 (1.21104)	Top-1 acc 56.641 (58.095)	Top-5 acc 78.906 (79.753)	lr 0.01721
Train [46][770/3239]	Time 0.224 (0.672)	Data Time 0.001 (0.045)	Loss 2.8714 (2.7394)	Entropy 1.20965 (1.21102)	Top-1 acc 56.641 (58.129)	Top-5 acc 77.734 (79.752)	lr 0.01721
Train [46][780/3239]	Time 2.440 (0.669)	Data Time 0.001 (0.045)	Loss 2.8620 (2.7392)	Entropy 1.20965 (1.21101)	Top-1 acc 56.641 (58.137)	Top-5 acc 75.781 (79.760)	lr 0.01721
Train [46][790/3239]	Time 0.215 (0.663)	Data Time 0.001 (0.044)	Loss 2.6520 (2.7391)	Entropy 1.20965 (1.21099)	Top-1 acc 64.453 (58.150)	Top-5 acc 81.641 (79.760)	lr 0.01721
Train [46][800/3239]	Time 0.236 (0.661)	Data Time 0.001 (0.044)	Loss 2.7196 (2.7399)	Entropy 1.20960 (1.21097)	Top-1 acc 60.547 (58.137)	Top-5 acc 80.078 (79.749)	lr 0.01721
Train [46][810/3239]	Time 0.221 (0.658)	Data Time 0.001 (0.043)	Loss 2.6912 (2.7398)	Entropy 1.20960 (1.21095)	Top-1 acc 60.156 (58.149)	Top-5 acc 79.688 (79.745)	lr 0.01721
Train [46][820/3239]	Time 0.236 (0.656)	Data Time 0.001 (0.043)	Loss 2.8011 (2.7398)	Entropy 1.20964 (1.21094)	Top-1 acc 56.641 (58.162)	Top-5 acc 76.953 (79.743)	lr 0.01721
Train [46][830/3239]	Time 0.239 (0.654)	Data Time 0.001 (0.042)	Loss 2.7073 (2.7401)	Entropy 1.20958 (1.21092)	Top-1 acc 60.156 (58.168)	Top-5 acc 81.250 (79.733)	lr 0.01721
Train [46][840/3239]	Time 0.244 (0.652)	Data Time 0.001 (0.042)	Loss 2.7387 (2.7408)	Entropy 1.20956 (1.21091)	Top-1 acc 58.203 (58.153)	Top-5 acc 79.688 (79.718)	lr 0.01721
Train [46][850/3239]	Time 0.327 (0.650)	Data Time 0.001 (0.041)	Loss 2.8446 (2.7409)	Entropy 1.20955 (1.21089)	Top-1 acc 55.859 (58.156)	Top-5 acc 76.172 (79.717)	lr 0.01720
Train [46][860/3239]	Time 0.171 (0.647)	Data Time 0.001 (0.041)	Loss 2.6952 (2.7412)	Entropy 1.20953 (1.21088)	Top-1 acc 60.547 (58.146)	Top-5 acc 81.641 (79.704)	lr 0.01720
Train [46][870/3239]	Time 0.223 (0.646)	Data Time 0.001 (0.040)	Loss 2.7411 (2.7415)	Entropy 1.20945 (1.21086)	Top-1 acc 58.203 (58.134)	Top-5 acc 79.688 (79.699)	lr 0.01720
Train [46][880/3239]	Time 0.242 (0.643)	Data Time 0.001 (0.040)	Loss 2.7417 (2.7417)	Entropy 1.20957 (1.21084)	Top-1 acc 55.078 (58.130)	Top-5 acc 77.344 (79.687)	lr 0.01720
Train [46][890/3239]	Time 2.480 (0.641)	Data Time 0.002 (0.039)	Loss 2.7653 (2.7421)	Entropy 1.20957 (1.21083)	Top-1 acc 63.281 (58.138)	Top-5 acc 78.125 (79.673)	lr 0.01720
Train [46][900/3239]	Time 0.311 (0.637)	Data Time 0.001 (0.039)	Loss 2.9229 (2.7421)	Entropy 1.20952 (1.21081)	Top-1 acc 53.516 (58.131)	Top-5 acc 74.219 (79.678)	lr 0.01720
Train [46][910/3239]	Time 0.221 (0.634)	Data Time 0.001 (0.039)	Loss 2.7304 (2.7420)	Entropy 1.20934 (1.21080)	Top-1 acc 57.812 (58.135)	Top-5 acc 79.297 (79.686)	lr 0.01720
Train [46][920/3239]	Time 0.256 (0.632)	Data Time 0.001 (0.038)	Loss 2.7163 (2.7420)	Entropy 1.20931 (1.21078)	Top-1 acc 56.250 (58.133)	Top-5 acc 78.516 (79.690)	lr 0.01720
Train [46][930/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.038)	Loss 2.7433 (2.7418)	Entropy 1.20926 (1.21077)	Top-1 acc 55.859 (58.144)	Top-5 acc 78.516 (79.695)	lr 0.01720
Train [46][940/3239]	Time 0.219 (0.629)	Data Time 0.002 (0.037)	Loss 2.6187 (2.7417)	Entropy 1.20924 (1.21075)	Top-1 acc 60.156 (58.145)	Top-5 acc 83.984 (79.705)	lr 0.01720
Train [46][950/3239]	Time 0.205 (0.627)	Data Time 0.001 (0.037)	Loss 2.8777 (2.7419)	Entropy 1.20924 (1.21073)	Top-1 acc 53.125 (58.126)	Top-5 acc 75.391 (79.703)	lr 0.01719
Train [46][960/3239]	Time 0.205 (0.625)	Data Time 0.001 (0.037)	Loss 2.7514 (2.7422)	Entropy 1.20922 (1.21072)	Top-1 acc 59.766 (58.117)	Top-5 acc 76.562 (79.687)	lr 0.01719
Train [46][970/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.036)	Loss 2.8167 (2.7425)	Entropy 1.20918 (1.21070)	Top-1 acc 58.594 (58.102)	Top-5 acc 76.953 (79.676)	lr 0.01719
Train [46][980/3239]	Time 0.217 (0.621)	Data Time 0.001 (0.036)	Loss 2.6107 (2.7426)	Entropy 1.20916 (1.21069)	Top-1 acc 61.328 (58.105)	Top-5 acc 82.812 (79.664)	lr 0.01719
Train [46][990/3239]	Time 0.219 (0.620)	Data Time 0.001 (0.036)	Loss 2.8072 (2.7425)	Entropy 1.20917 (1.21067)	Top-1 acc 60.547 (58.110)	Top-5 acc 76.172 (79.665)	lr 0.01719
Train [46][1000/3239]	Time 2.411 (0.618)	Data Time 0.001 (0.035)	Loss 2.5662 (2.7420)	Entropy 1.20917 (1.21066)	Top-1 acc 61.719 (58.119)	Top-5 acc 84.375 (79.676)	lr 0.01719
Train [46][1010/3239]	Time 0.227 (0.614)	Data Time 0.002 (0.035)	Loss 2.8163 (2.7423)	Entropy 1.20946 (1.21065)	Top-1 acc 54.297 (58.114)	Top-5 acc 78.516 (79.669)	lr 0.01719
Train [46][1020/3239]	Time 0.211 (0.613)	Data Time 0.001 (0.035)	Loss 2.8491 (2.7427)	Entropy 1.20946 (1.21063)	Top-1 acc 54.688 (58.105)	Top-5 acc 77.734 (79.667)	lr 0.01719
Train [46][1030/3239]	Time 0.242 (0.611)	Data Time 0.001 (0.034)	Loss 2.8169 (2.7429)	Entropy 1.20939 (1.21062)	Top-1 acc 55.469 (58.101)	Top-5 acc 78.516 (79.664)	lr 0.01719
Train [46][1040/3239]	Time 0.273 (0.610)	Data Time 0.001 (0.034)	Loss 2.7459 (2.7430)	Entropy 1.20936 (1.21061)	Top-1 acc 59.375 (58.094)	Top-5 acc 80.469 (79.665)	lr 0.01719
Train [46][1050/3239]	Time 0.214 (0.609)	Data Time 0.001 (0.034)	Loss 2.8764 (2.7432)	Entropy 1.20930 (1.21060)	Top-1 acc 53.125 (58.093)	Top-5 acc 76.172 (79.664)	lr 0.01719
Train [46][1060/3239]	Time 0.222 (0.607)	Data Time 0.001 (0.033)	Loss 2.7351 (2.7434)	Entropy 1.20922 (1.21059)	Top-1 acc 62.500 (58.094)	Top-5 acc 82.422 (79.658)	lr 0.01718
Train [46][1070/3239]	Time 0.225 (0.606)	Data Time 0.001 (0.033)	Loss 2.7008 (2.7443)	Entropy 1.20923 (1.21057)	Top-1 acc 57.422 (58.090)	Top-5 acc 82.031 (79.638)	lr 0.01718
Train [46][1080/3239]	Time 0.216 (0.605)	Data Time 0.001 (0.033)	Loss 2.7313 (2.7449)	Entropy 1.20922 (1.21056)	Top-1 acc 57.812 (58.075)	Top-5 acc 80.859 (79.630)	lr 0.01718
Train [46][1090/3239]	Time 0.217 (0.603)	Data Time 0.001 (0.032)	Loss 2.8028 (2.7449)	Entropy 1.20921 (1.21055)	Top-1 acc 57.031 (58.084)	Top-5 acc 78.516 (79.634)	lr 0.01718
Train [46][1100/3239]	Time 0.403 (0.643)	Data Time 0.003 (0.032)	Loss 2.7642 (2.7457)	Entropy 1.20917 (1.21054)	Top-1 acc 57.812 (58.056)	Top-5 acc 81.641 (79.624)	lr 0.01718
Train [46][1110/3239]	Time 2.777 (0.643)	Data Time 0.003 (0.032)	Loss 2.7811 (2.7459)	Entropy 1.20917 (1.21052)	Top-1 acc 58.594 (58.057)	Top-5 acc 78.125 (79.615)	lr 0.01718
Train [46][1120/3239]	Time 0.227 (0.639)	Data Time 0.002 (0.032)	Loss 2.5969 (2.7453)	Entropy 1.20914 (1.21051)	Top-1 acc 61.719 (58.074)	Top-5 acc 84.766 (79.633)	lr 0.01718
Train [46][1130/3239]	Time 0.242 (0.638)	Data Time 0.001 (0.031)	Loss 2.7874 (2.7457)	Entropy 1.20912 (1.21050)	Top-1 acc 62.109 (58.067)	Top-5 acc 78.906 (79.628)	lr 0.01718
Train [46][1140/3239]	Time 0.245 (0.636)	Data Time 0.002 (0.031)	Loss 2.8222 (2.7455)	Entropy 1.20908 (1.21049)	Top-1 acc 53.906 (58.064)	Top-5 acc 78.516 (79.632)	lr 0.01718
Train [46][1150/3239]	Time 0.223 (0.635)	Data Time 0.001 (0.031)	Loss 2.6500 (2.7452)	Entropy 1.20905 (1.21047)	Top-1 acc 62.109 (58.063)	Top-5 acc 80.469 (79.640)	lr 0.01718
Train [46][1160/3239]	Time 0.220 (0.633)	Data Time 0.001 (0.031)	Loss 2.5671 (2.7446)	Entropy 1.20900 (1.21046)	Top-1 acc 62.109 (58.084)	Top-5 acc 82.812 (79.656)	lr 0.01718
Train [46][1170/3239]	Time 0.216 (0.632)	Data Time 0.001 (0.030)	Loss 2.7378 (2.7448)	Entropy 1.20895 (1.21045)	Top-1 acc 58.594 (58.081)	Top-5 acc 80.078 (79.658)	lr 0.01717
Train [46][1180/3239]	Time 0.221 (0.630)	Data Time 0.002 (0.030)	Loss 2.6278 (2.7452)	Entropy 1.20894 (1.21044)	Top-1 acc 62.500 (58.073)	Top-5 acc 82.031 (79.648)	lr 0.01717
Train [46][1190/3239]	Time 0.214 (0.629)	Data Time 0.001 (0.030)	Loss 2.7689 (2.7453)	Entropy 1.20895 (1.21042)	Top-1 acc 56.250 (58.061)	Top-5 acc 79.297 (79.647)	lr 0.01717
Train [46][1200/3239]	Time 0.174 (0.627)	Data Time 0.001 (0.030)	Loss 2.8416 (2.7457)	Entropy 1.20895 (1.21041)	Top-1 acc 57.422 (58.056)	Top-5 acc 79.297 (79.640)	lr 0.01717
Train [46][1210/3239]	Time 0.214 (0.626)	Data Time 0.001 (0.030)	Loss 2.7616 (2.7461)	Entropy 1.20893 (1.21040)	Top-1 acc 58.984 (58.044)	Top-5 acc 80.469 (79.628)	lr 0.01717
Train [46][1220/3239]	Time 2.394 (0.624)	Data Time 0.001 (0.029)	Loss 2.8112 (2.7459)	Entropy 1.20893 (1.21039)	Top-1 acc 57.422 (58.055)	Top-5 acc 80.078 (79.633)	lr 0.01717
Train [46][1230/3239]	Time 0.214 (0.621)	Data Time 0.001 (0.029)	Loss 2.6366 (2.7457)	Entropy 1.20885 (1.21037)	Top-1 acc 61.719 (58.065)	Top-5 acc 82.031 (79.630)	lr 0.01717
Train [46][1240/3239]	Time 0.170 (0.620)	Data Time 0.001 (0.029)	Loss 2.9291 (2.7461)	Entropy 1.20880 (1.21036)	Top-1 acc 51.562 (58.057)	Top-5 acc 75.391 (79.622)	lr 0.01717
Train [46][1250/3239]	Time 0.217 (0.619)	Data Time 0.001 (0.029)	Loss 2.6393 (2.7462)	Entropy 1.20879 (1.21035)	Top-1 acc 60.547 (58.054)	Top-5 acc 83.203 (79.625)	lr 0.01717
Train [46][1260/3239]	Time 0.319 (0.617)	Data Time 0.002 (0.028)	Loss 2.7939 (2.7463)	Entropy 1.20874 (1.21034)	Top-1 acc 56.250 (58.049)	Top-5 acc 79.688 (79.626)	lr 0.01717
Train [46][1270/3239]	Time 0.251 (0.616)	Data Time 0.001 (0.028)	Loss 2.8272 (2.7464)	Entropy 1.20873 (1.21032)	Top-1 acc 56.641 (58.050)	Top-5 acc 75.781 (79.623)	lr 0.01716
Train [46][1280/3239]	Time 0.248 (0.615)	Data Time 0.002 (0.028)	Loss 2.8477 (2.7463)	Entropy 1.20872 (1.21031)	Top-1 acc 56.641 (58.056)	Top-5 acc 79.297 (79.625)	lr 0.01716
Train [46][1290/3239]	Time 0.199 (0.614)	Data Time 0.001 (0.028)	Loss 2.7492 (2.7465)	Entropy 1.20867 (1.21030)	Top-1 acc 57.422 (58.054)	Top-5 acc 78.516 (79.614)	lr 0.01716
Train [46][1300/3239]	Time 0.211 (0.612)	Data Time 0.001 (0.028)	Loss 2.6804 (2.7464)	Entropy 1.20861 (1.21029)	Top-1 acc 58.594 (58.057)	Top-5 acc 80.078 (79.616)	lr 0.01716
Train [46][1310/3239]	Time 0.317 (0.611)	Data Time 0.001 (0.027)	Loss 2.7091 (2.7463)	Entropy 1.20852 (1.21027)	Top-1 acc 61.719 (58.062)	Top-5 acc 78.906 (79.614)	lr 0.01716
Train [46][1320/3239]	Time 0.252 (0.610)	Data Time 0.001 (0.027)	Loss 2.6239 (2.7466)	Entropy 1.20849 (1.21026)	Top-1 acc 58.594 (58.052)	Top-5 acc 82.031 (79.608)	lr 0.01716
Train [46][1330/3239]	Time 2.483 (0.609)	Data Time 0.002 (0.027)	Loss 2.7431 (2.7468)	Entropy 1.20849 (1.21025)	Top-1 acc 62.109 (58.051)	Top-5 acc 80.078 (79.601)	lr 0.01716
Train [46][1340/3239]	Time 0.301 (0.606)	Data Time 0.001 (0.027)	Loss 2.9654 (2.7464)	Entropy 1.20848 (1.21023)	Top-1 acc 52.344 (58.064)	Top-5 acc 75.391 (79.609)	lr 0.01716
Train [46][1350/3239]	Time 0.217 (0.605)	Data Time 0.001 (0.027)	Loss 2.6789 (2.7461)	Entropy 1.20846 (1.21022)	Top-1 acc 59.375 (58.072)	Top-5 acc 85.156 (79.619)	lr 0.01716
Train [46][1360/3239]	Time 0.314 (0.604)	Data Time 0.001 (0.026)	Loss 2.8776 (2.7463)	Entropy 1.20844 (1.21021)	Top-1 acc 55.078 (58.070)	Top-5 acc 78.125 (79.621)	lr 0.01716
Train [46][1370/3239]	Time 0.194 (0.603)	Data Time 0.001 (0.026)	Loss 2.6555 (2.7462)	Entropy 1.20843 (1.21019)	Top-1 acc 61.719 (58.081)	Top-5 acc 81.641 (79.617)	lr 0.01716
Train [46][1380/3239]	Time 0.205 (0.602)	Data Time 0.004 (0.026)	Loss 2.7765 (2.7463)	Entropy 1.20840 (1.21018)	Top-1 acc 58.984 (58.074)	Top-5 acc 79.688 (79.618)	lr 0.01715
Train [46][1390/3239]	Time 0.231 (0.601)	Data Time 0.001 (0.026)	Loss 2.6854 (2.7463)	Entropy 1.20839 (1.21017)	Top-1 acc 58.594 (58.067)	Top-5 acc 80.859 (79.618)	lr 0.01715
Train [46][1400/3239]	Time 0.229 (0.600)	Data Time 0.001 (0.026)	Loss 2.9030 (2.7465)	Entropy 1.20837 (1.21016)	Top-1 acc 54.688 (58.061)	Top-5 acc 76.562 (79.615)	lr 0.01715
Train [46][1410/3239]	Time 0.223 (0.599)	Data Time 0.001 (0.026)	Loss 2.7469 (2.7468)	Entropy 1.20816 (1.21014)	Top-1 acc 56.641 (58.047)	Top-5 acc 80.078 (79.607)	lr 0.01715
Train [46][1420/3239]	Time 0.214 (0.598)	Data Time 0.001 (0.025)	Loss 2.9209 (2.7470)	Entropy 1.20813 (1.21013)	Top-1 acc 51.562 (58.037)	Top-5 acc 74.609 (79.601)	lr 0.01715
Train [46][1430/3239]	Time 0.279 (0.597)	Data Time 0.001 (0.025)	Loss 2.6012 (2.7466)	Entropy 1.20807 (1.21012)	Top-1 acc 63.281 (58.053)	Top-5 acc 84.766 (79.612)	lr 0.01715
Train [46][1440/3239]	Time 2.477 (0.596)	Data Time 0.001 (0.025)	Loss 2.8032 (2.7467)	Entropy 1.20807 (1.21010)	Top-1 acc 61.328 (58.058)	Top-5 acc 78.125 (79.606)	lr 0.01715
Train [46][1450/3239]	Time 0.250 (0.594)	Data Time 0.001 (0.025)	Loss 2.4405 (2.7466)	Entropy 1.20806 (1.21009)	Top-1 acc 66.016 (58.058)	Top-5 acc 85.938 (79.606)	lr 0.01715
Train [46][1460/3239]	Time 0.226 (0.593)	Data Time 0.001 (0.025)	Loss 2.8724 (2.7467)	Entropy 1.20805 (1.21007)	Top-1 acc 55.469 (58.051)	Top-5 acc 74.609 (79.604)	lr 0.01715
Train [46][1470/3239]	Time 0.278 (0.623)	Data Time 0.002 (0.025)	Loss 2.9498 (2.7469)	Entropy 1.20803 (1.21006)	Top-1 acc 53.516 (58.048)	Top-5 acc 76.172 (79.603)	lr 0.01715
Train [46][1480/3239]	Time 0.226 (0.622)	Data Time 0.002 (0.024)	Loss 2.8260 (2.7472)	Entropy 1.20786 (1.21004)	Top-1 acc 56.250 (58.044)	Top-5 acc 77.344 (79.599)	lr 0.01715
Train [46][1490/3239]	Time 0.298 (0.621)	Data Time 0.002 (0.024)	Loss 2.7337 (2.7471)	Entropy 1.20781 (1.21003)	Top-1 acc 60.156 (58.054)	Top-5 acc 78.516 (79.600)	lr 0.01714
Train [46][1500/3239]	Time 0.239 (0.620)	Data Time 0.001 (0.024)	Loss 2.7085 (2.7472)	Entropy 1.20780 (1.21001)	Top-1 acc 60.938 (58.061)	Top-5 acc 78.906 (79.594)	lr 0.01714
Train [46][1510/3239]	Time 0.202 (0.619)	Data Time 0.001 (0.024)	Loss 2.8340 (2.7476)	Entropy 1.20768 (1.21000)	Top-1 acc 55.078 (58.055)	Top-5 acc 78.125 (79.584)	lr 0.01714
Train [46][1520/3239]	Time 0.203 (0.618)	Data Time 0.001 (0.024)	Loss 2.7516 (2.7475)	Entropy 1.20766 (1.20998)	Top-1 acc 57.031 (58.055)	Top-5 acc 79.297 (79.588)	lr 0.01714
Train [46][1530/3239]	Time 0.243 (0.617)	Data Time 0.001 (0.024)	Loss 2.6020 (2.7475)	Entropy 1.20761 (1.20997)	Top-1 acc 60.156 (58.055)	Top-5 acc 82.031 (79.588)	lr 0.01714
Train [46][1540/3239]	Time 0.257 (0.616)	Data Time 0.001 (0.024)	Loss 2.9657 (2.7478)	Entropy 1.20765 (1.20995)	Top-1 acc 48.047 (58.045)	Top-5 acc 73.828 (79.581)	lr 0.01714
Train [46][1550/3239]	Time 2.365 (0.615)	Data Time 0.001 (0.023)	Loss 2.6328 (2.7480)	Entropy 1.20765 (1.20994)	Top-1 acc 62.109 (58.038)	Top-5 acc 81.250 (79.577)	lr 0.01714
Train [46][1560/3239]	Time 0.306 (0.613)	Data Time 0.001 (0.023)	Loss 2.7244 (2.7478)	Entropy 1.20756 (1.20992)	Top-1 acc 61.328 (58.042)	Top-5 acc 78.125 (79.577)	lr 0.01714
Train [46][1570/3239]	Time 0.218 (0.612)	Data Time 0.001 (0.023)	Loss 2.8695 (2.7484)	Entropy 1.20755 (1.20991)	Top-1 acc 53.125 (58.019)	Top-5 acc 78.906 (79.570)	lr 0.01714
Train [46][1580/3239]	Time 0.261 (0.611)	Data Time 0.002 (0.023)	Loss 2.7641 (2.7484)	Entropy 1.20749 (1.20989)	Top-1 acc 60.156 (58.025)	Top-5 acc 78.516 (79.577)	lr 0.01714
Train [46][1590/3239]	Time 0.229 (0.610)	Data Time 0.001 (0.023)	Loss 2.6433 (2.7482)	Entropy 1.20753 (1.20988)	Top-1 acc 62.109 (58.029)	Top-5 acc 80.859 (79.576)	lr 0.01713
Train [46][1600/3239]	Time 0.223 (0.609)	Data Time 0.001 (0.023)	Loss 2.6587 (2.7486)	Entropy 1.20748 (1.20986)	Top-1 acc 58.984 (58.018)	Top-5 acc 80.469 (79.565)	lr 0.01713
Train [46][1610/3239]	Time 0.309 (0.608)	Data Time 0.001 (0.023)	Loss 2.8829 (2.7490)	Entropy 1.20749 (1.20985)	Top-1 acc 54.688 (58.007)	Top-5 acc 74.609 (79.550)	lr 0.01713
Train [46][1620/3239]	Time 0.221 (0.607)	Data Time 0.001 (0.022)	Loss 2.8212 (2.7487)	Entropy 1.20748 (1.20983)	Top-1 acc 53.125 (58.008)	Top-5 acc 81.250 (79.555)	lr 0.01713
Train [46][1630/3239]	Time 0.225 (0.606)	Data Time 0.001 (0.022)	Loss 2.8132 (2.7492)	Entropy 1.20745 (1.20982)	Top-1 acc 56.641 (57.999)	Top-5 acc 78.125 (79.543)	lr 0.01713
Train [46][1640/3239]	Time 0.192 (0.605)	Data Time 0.001 (0.022)	Loss 2.8500 (2.7494)	Entropy 1.20744 (1.20981)	Top-1 acc 51.953 (57.993)	Top-5 acc 77.734 (79.538)	lr 0.01713
Train [46][1650/3239]	Time 0.225 (0.604)	Data Time 0.001 (0.022)	Loss 2.6429 (2.7497)	Entropy 1.20737 (1.20979)	Top-1 acc 57.031 (57.982)	Top-5 acc 82.812 (79.532)	lr 0.01713
Train [46][1660/3239]	Time 2.474 (0.603)	Data Time 0.002 (0.022)	Loss 2.7994 (2.7497)	Entropy 1.20737 (1.20978)	Top-1 acc 55.469 (57.978)	Top-5 acc 76.953 (79.524)	lr 0.01713
Train [46][1670/3239]	Time 0.230 (0.601)	Data Time 0.001 (0.022)	Loss 2.6951 (2.7496)	Entropy 1.20735 (1.20976)	Top-1 acc 59.375 (57.979)	Top-5 acc 82.422 (79.527)	lr 0.01713
Train [46][1680/3239]	Time 0.226 (0.600)	Data Time 0.001 (0.022)	Loss 2.7488 (2.7496)	Entropy 1.20730 (1.20975)	Top-1 acc 59.375 (57.975)	Top-5 acc 78.516 (79.526)	lr 0.01713
Train [46][1690/3239]	Time 0.213 (0.600)	Data Time 0.001 (0.022)	Loss 2.6150 (2.7498)	Entropy 1.20725 (1.20973)	Top-1 acc 62.109 (57.970)	Top-5 acc 82.422 (79.526)	lr 0.01713
Train [46][1700/3239]	Time 0.224 (0.599)	Data Time 0.001 (0.022)	Loss 2.9180 (2.7504)	Entropy 1.20724 (1.20972)	Top-1 acc 51.562 (57.959)	Top-5 acc 77.734 (79.514)	lr 0.01712
Train [46][1710/3239]	Time 0.325 (0.598)	Data Time 0.002 (0.021)	Loss 2.5746 (2.7500)	Entropy 1.20715 (1.20970)	Top-1 acc 60.938 (57.962)	Top-5 acc 82.812 (79.522)	lr 0.01712
Train [46][1720/3239]	Time 0.212 (0.597)	Data Time 0.002 (0.021)	Loss 2.7886 (2.7501)	Entropy 1.20706 (1.20969)	Top-1 acc 58.594 (57.964)	Top-5 acc 79.297 (79.522)	lr 0.01712
Train [46][1730/3239]	Time 0.251 (0.596)	Data Time 0.003 (0.021)	Loss 2.6843 (2.7501)	Entropy 1.20698 (1.20967)	Top-1 acc 56.250 (57.963)	Top-5 acc 80.469 (79.524)	lr 0.01712
Train [46][1740/3239]	Time 0.225 (0.596)	Data Time 0.001 (0.021)	Loss 2.6408 (2.7498)	Entropy 1.20699 (1.20966)	Top-1 acc 63.672 (57.971)	Top-5 acc 81.641 (79.528)	lr 0.01712
Train [46][1750/3239]	Time 0.244 (0.595)	Data Time 0.001 (0.021)	Loss 2.6220 (2.7500)	Entropy 1.20701 (1.20964)	Top-1 acc 61.719 (57.965)	Top-5 acc 80.859 (79.521)	lr 0.01712
Train [46][1760/3239]	Time 0.314 (0.594)	Data Time 0.001 (0.021)	Loss 2.8159 (2.7499)	Entropy 1.20697 (1.20963)	Top-1 acc 55.859 (57.967)	Top-5 acc 77.734 (79.522)	lr 0.01712
Train [46][1770/3239]	Time 2.379 (0.593)	Data Time 0.001 (0.021)	Loss 2.8111 (2.7498)	Entropy 1.20697 (1.20961)	Top-1 acc 53.125 (57.966)	Top-5 acc 78.516 (79.523)	lr 0.01712
Train [46][1780/3239]	Time 0.216 (0.592)	Data Time 0.001 (0.021)	Loss 2.5053 (2.7500)	Entropy 1.20693 (1.20960)	Top-1 acc 64.453 (57.967)	Top-5 acc 80.859 (79.516)	lr 0.01712
Train [46][1790/3239]	Time 0.222 (0.591)	Data Time 0.001 (0.021)	Loss 2.8322 (2.7502)	Entropy 1.20692 (1.20958)	Top-1 acc 58.203 (57.965)	Top-5 acc 78.125 (79.511)	lr 0.01712
Train [46][1800/3239]	Time 0.227 (0.590)	Data Time 0.001 (0.020)	Loss 2.9516 (2.7502)	Entropy 1.20686 (1.20957)	Top-1 acc 53.906 (57.966)	Top-5 acc 78.516 (79.512)	lr 0.01712
Train [46][1810/3239]	Time 0.274 (0.589)	Data Time 0.001 (0.020)	Loss 2.7868 (2.7501)	Entropy 1.20681 (1.20955)	Top-1 acc 57.812 (57.972)	Top-5 acc 80.469 (79.517)	lr 0.01711
Train [46][1820/3239]	Time 0.202 (0.589)	Data Time 0.001 (0.020)	Loss 2.8734 (2.7503)	Entropy 1.20678 (1.20954)	Top-1 acc 57.812 (57.968)	Top-5 acc 75.391 (79.516)	lr 0.01711
Train [46][1830/3239]	Time 0.220 (0.615)	Data Time 0.002 (0.020)	Loss 2.7621 (2.7502)	Entropy 1.20678 (1.20952)	Top-1 acc 61.719 (57.970)	Top-5 acc 80.859 (79.519)	lr 0.01711
Train [46][1840/3239]	Time 0.218 (0.614)	Data Time 0.002 (0.020)	Loss 2.7377 (2.7501)	Entropy 1.20669 (1.20951)	Top-1 acc 58.984 (57.974)	Top-5 acc 79.688 (79.519)	lr 0.01711
Train [46][1850/3239]	Time 0.217 (0.614)	Data Time 0.002 (0.020)	Loss 2.8082 (2.7502)	Entropy 1.20667 (1.20949)	Top-1 acc 57.031 (57.969)	Top-5 acc 77.344 (79.513)	lr 0.01711
Train [46][1860/3239]	Time 0.302 (0.613)	Data Time 0.001 (0.020)	Loss 2.9696 (2.7505)	Entropy 1.20667 (1.20948)	Top-1 acc 53.516 (57.962)	Top-5 acc 75.781 (79.508)	lr 0.01711
Train [46][1870/3239]	Time 0.236 (0.612)	Data Time 0.001 (0.020)	Loss 2.6066 (2.7502)	Entropy 1.20665 (1.20946)	Top-1 acc 60.938 (57.967)	Top-5 acc 81.641 (79.513)	lr 0.01711
Train [46][1880/3239]	Time 2.426 (0.611)	Data Time 0.002 (0.020)	Loss 2.7107 (2.7502)	Entropy 1.20665 (1.20945)	Top-1 acc 57.812 (57.966)	Top-5 acc 80.078 (79.515)	lr 0.01711
Train [46][1890/3239]	Time 0.215 (0.609)	Data Time 0.001 (0.020)	Loss 2.7682 (2.7503)	Entropy 1.20674 (1.20943)	Top-1 acc 60.938 (57.966)	Top-5 acc 80.469 (79.512)	lr 0.01711
Train [46][1900/3239]	Time 0.216 (0.608)	Data Time 0.001 (0.019)	Loss 2.7794 (2.7506)	Entropy 1.20668 (1.20942)	Top-1 acc 57.031 (57.957)	Top-5 acc 78.906 (79.509)	lr 0.01711
Train [46][1910/3239]	Time 0.316 (0.608)	Data Time 0.001 (0.019)	Loss 2.8133 (2.7505)	Entropy 1.20664 (1.20940)	Top-1 acc 54.297 (57.958)	Top-5 acc 78.125 (79.509)	lr 0.01710
Train [46][1920/3239]	Time 0.223 (0.607)	Data Time 0.001 (0.019)	Loss 2.5647 (2.7505)	Entropy 1.20654 (1.20939)	Top-1 acc 61.719 (57.955)	Top-5 acc 80.859 (79.512)	lr 0.01710
Train [46][1930/3239]	Time 0.230 (0.606)	Data Time 0.001 (0.019)	Loss 2.7730 (2.7506)	Entropy 1.20657 (1.20937)	Top-1 acc 56.250 (57.951)	Top-5 acc 77.734 (79.509)	lr 0.01710
Train [46][1940/3239]	Time 0.253 (0.605)	Data Time 0.001 (0.019)	Loss 2.8750 (2.7507)	Entropy 1.20651 (1.20936)	Top-1 acc 54.688 (57.953)	Top-5 acc 77.344 (79.506)	lr 0.01710
Train [46][1950/3239]	Time 0.218 (0.605)	Data Time 0.001 (0.019)	Loss 2.6351 (2.7507)	Entropy 1.20647 (1.20934)	Top-1 acc 60.547 (57.955)	Top-5 acc 80.859 (79.504)	lr 0.01710
Train [46][1960/3239]	Time 0.307 (0.604)	Data Time 0.001 (0.019)	Loss 2.7308 (2.7507)	Entropy 1.20643 (1.20933)	Top-1 acc 57.812 (57.952)	Top-5 acc 79.688 (79.505)	lr 0.01710
Train [46][1970/3239]	Time 0.221 (0.603)	Data Time 0.001 (0.019)	Loss 2.6314 (2.7506)	Entropy 1.20643 (1.20932)	Top-1 acc 57.031 (57.950)	Top-5 acc 83.594 (79.508)	lr 0.01710
Train [46][1980/3239]	Time 0.257 (0.603)	Data Time 0.001 (0.019)	Loss 2.9112 (2.7507)	Entropy 1.20629 (1.20930)	Top-1 acc 56.641 (57.949)	Top-5 acc 74.609 (79.505)	lr 0.01710
Train [46][1990/3239]	Time 2.397 (0.602)	Data Time 0.001 (0.019)	Loss 2.7694 (2.7506)	Entropy 1.20629 (1.20929)	Top-1 acc 60.547 (57.951)	Top-5 acc 78.516 (79.502)	lr 0.01710
Train [46][2000/3239]	Time 0.208 (0.600)	Data Time 0.001 (0.019)	Loss 2.5499 (2.7506)	Entropy 1.20628 (1.20927)	Top-1 acc 61.719 (57.952)	Top-5 acc 84.375 (79.506)	lr 0.01710
Train [46][2010/3239]	Time 0.201 (0.599)	Data Time 0.001 (0.018)	Loss 2.6397 (2.7504)	Entropy 1.20624 (1.20926)	Top-1 acc 58.984 (57.954)	Top-5 acc 81.641 (79.512)	lr 0.01710
Train [46][2020/3239]	Time 0.215 (0.599)	Data Time 0.001 (0.018)	Loss 2.6108 (2.7503)	Entropy 1.20624 (1.20924)	Top-1 acc 59.766 (57.957)	Top-5 acc 83.594 (79.513)	lr 0.01709
Train [46][2030/3239]	Time 0.207 (0.598)	Data Time 0.001 (0.018)	Loss 2.7683 (2.7502)	Entropy 1.20623 (1.20923)	Top-1 acc 58.203 (57.962)	Top-5 acc 76.953 (79.517)	lr 0.01709
Train [46][2040/3239]	Time 0.222 (0.597)	Data Time 0.001 (0.018)	Loss 2.8521 (2.7502)	Entropy 1.20617 (1.20921)	Top-1 acc 53.125 (57.955)	Top-5 acc 78.125 (79.518)	lr 0.01709
Train [46][2050/3239]	Time 0.223 (0.596)	Data Time 0.001 (0.018)	Loss 2.7606 (2.7504)	Entropy 1.20614 (1.20920)	Top-1 acc 57.422 (57.948)	Top-5 acc 80.859 (79.514)	lr 0.01709
Train [46][2060/3239]	Time 0.226 (0.596)	Data Time 0.001 (0.018)	Loss 2.8204 (2.7506)	Entropy 1.20605 (1.20918)	Top-1 acc 56.641 (57.945)	Top-5 acc 77.344 (79.510)	lr 0.01709
Train [46][2070/3239]	Time 0.226 (0.595)	Data Time 0.001 (0.018)	Loss 2.7497 (2.7507)	Entropy 1.20598 (1.20917)	Top-1 acc 57.422 (57.939)	Top-5 acc 77.734 (79.510)	lr 0.01709
Train [46][2080/3239]	Time 0.211 (0.594)	Data Time 0.001 (0.018)	Loss 2.6958 (2.7505)	Entropy 1.20597 (1.20915)	Top-1 acc 61.328 (57.945)	Top-5 acc 82.812 (79.516)	lr 0.01709
Train [46][2090/3239]	Time 0.307 (0.594)	Data Time 0.001 (0.018)	Loss 2.6626 (2.7504)	Entropy 1.20596 (1.20914)	Top-1 acc 58.203 (57.952)	Top-5 acc 80.469 (79.518)	lr 0.01709
Train [46][2100/3239]	Time 2.405 (0.593)	Data Time 0.001 (0.018)	Loss 2.6191 (2.7506)	Entropy 1.20596 (1.20912)	Top-1 acc 60.547 (57.951)	Top-5 acc 81.250 (79.513)	lr 0.01709
Train [46][2110/3239]	Time 0.364 (0.591)	Data Time 0.001 (0.018)	Loss 2.8474 (2.7506)	Entropy 1.20587 (1.20910)	Top-1 acc 54.688 (57.952)	Top-5 acc 76.172 (79.505)	lr 0.01709
Train [46][2120/3239]	Time 0.264 (0.591)	Data Time 0.001 (0.018)	Loss 2.6210 (2.7507)	Entropy 1.20585 (1.20909)	Top-1 acc 60.156 (57.951)	Top-5 acc 84.766 (79.505)	lr 0.01709
Train [46][2130/3239]	Time 0.254 (0.590)	Data Time 0.001 (0.018)	Loss 2.8934 (2.7509)	Entropy 1.20581 (1.20907)	Top-1 acc 55.469 (57.945)	Top-5 acc 75.000 (79.499)	lr 0.01708
Train [46][2140/3239]	Time 0.256 (0.590)	Data Time 0.001 (0.017)	Loss 2.7845 (2.7511)	Entropy 1.20583 (1.20906)	Top-1 acc 52.344 (57.939)	Top-5 acc 79.297 (79.494)	lr 0.01708
Train [46][2150/3239]	Time 0.247 (0.589)	Data Time 0.001 (0.017)	Loss 2.7435 (2.7511)	Entropy 1.20584 (1.20904)	Top-1 acc 55.078 (57.941)	Top-5 acc 79.688 (79.490)	lr 0.01708
Train [46][2160/3239]	Time 0.215 (0.588)	Data Time 0.001 (0.017)	Loss 2.6919 (2.7510)	Entropy 1.20582 (1.20903)	Top-1 acc 60.156 (57.940)	Top-5 acc 80.078 (79.491)	lr 0.01708
Train [46][2170/3239]	Time 0.241 (0.588)	Data Time 0.001 (0.017)	Loss 2.6739 (2.7508)	Entropy 1.20582 (1.20901)	Top-1 acc 57.422 (57.943)	Top-5 acc 78.906 (79.494)	lr 0.01708
Train [46][2180/3239]	Time 0.217 (0.587)	Data Time 0.002 (0.017)	Loss 2.8332 (2.7511)	Entropy 1.20580 (1.20900)	Top-1 acc 59.375 (57.942)	Top-5 acc 78.516 (79.490)	lr 0.01708
Train [46][2190/3239]	Time 0.285 (0.609)	Data Time 0.002 (0.017)	Loss 2.6267 (2.7512)	Entropy 1.20579 (1.20898)	Top-1 acc 61.719 (57.942)	Top-5 acc 83.203 (79.489)	lr 0.01708
Train [46][2200/3239]	Time 0.254 (0.608)	Data Time 0.002 (0.017)	Loss 2.6018 (2.7511)	Entropy 1.20577 (1.20897)	Top-1 acc 59.766 (57.940)	Top-5 acc 83.984 (79.489)	lr 0.01708
Train [46][2210/3239]	Time 2.573 (0.608)	Data Time 0.002 (0.017)	Loss 2.7558 (2.7509)	Entropy 1.20577 (1.20896)	Top-1 acc 60.938 (57.949)	Top-5 acc 78.516 (79.494)	lr 0.01708
Train [46][2220/3239]	Time 0.261 (0.606)	Data Time 0.002 (0.017)	Loss 2.7179 (2.7509)	Entropy 1.20567 (1.20894)	Top-1 acc 55.859 (57.945)	Top-5 acc 83.984 (79.497)	lr 0.01708
Train [46][2230/3239]	Time 0.234 (0.606)	Data Time 0.002 (0.017)	Loss 2.8017 (2.7509)	Entropy 1.20565 (1.20893)	Top-1 acc 57.422 (57.945)	Top-5 acc 77.734 (79.496)	lr 0.01707
Train [46][2240/3239]	Time 0.222 (0.605)	Data Time 0.001 (0.017)	Loss 2.9001 (2.7512)	Entropy 1.20564 (1.20891)	Top-1 acc 53.125 (57.938)	Top-5 acc 76.172 (79.489)	lr 0.01707
Train [46][2250/3239]	Time 0.223 (0.604)	Data Time 0.001 (0.017)	Loss 2.6154 (2.7513)	Entropy 1.20560 (1.20890)	Top-1 acc 62.109 (57.938)	Top-5 acc 82.422 (79.490)	lr 0.01707
Train [46][2260/3239]	Time 0.217 (0.604)	Data Time 0.001 (0.017)	Loss 3.7284 (2.7519)	Entropy 1.20555 (1.20888)	Top-1 acc 38.281 (57.920)	Top-5 acc 64.453 (79.479)	lr 0.01707
Train [46][2270/3239]	Time 0.227 (0.603)	Data Time 0.002 (0.017)	Loss 2.7780 (2.7520)	Entropy 1.20546 (1.20887)	Top-1 acc 57.031 (57.921)	Top-5 acc 77.734 (79.476)	lr 0.01707
Train [46][2280/3239]	Time 0.250 (0.602)	Data Time 0.002 (0.016)	Loss 2.7277 (2.7521)	Entropy 1.20539 (1.20885)	Top-1 acc 57.031 (57.914)	Top-5 acc 80.859 (79.475)	lr 0.01707
Train [46][2290/3239]	Time 0.219 (0.602)	Data Time 0.001 (0.016)	Loss 2.8073 (2.7521)	Entropy 1.20538 (1.20884)	Top-1 acc 58.203 (57.919)	Top-5 acc 78.125 (79.475)	lr 0.01707
Train [46][2300/3239]	Time 0.254 (0.601)	Data Time 0.001 (0.016)	Loss 2.7053 (2.7522)	Entropy 1.20537 (1.20882)	Top-1 acc 60.156 (57.920)	Top-5 acc 82.422 (79.475)	lr 0.01707
Train [46][2310/3239]	Time 0.225 (0.601)	Data Time 0.001 (0.016)	Loss 2.6426 (2.7522)	Entropy 1.20535 (1.20881)	Top-1 acc 56.641 (57.918)	Top-5 acc 82.812 (79.477)	lr 0.01707
Train [46][2320/3239]	Time 2.510 (0.600)	Data Time 0.001 (0.016)	Loss 2.6744 (2.7522)	Entropy 1.20535 (1.20879)	Top-1 acc 58.203 (57.916)	Top-5 acc 82.422 (79.477)	lr 0.01707
Train [46][2330/3239]	Time 0.227 (0.599)	Data Time 0.001 (0.016)	Loss 2.7787 (2.7522)	Entropy 1.20534 (1.20878)	Top-1 acc 55.078 (57.915)	Top-5 acc 80.469 (79.476)	lr 0.01707
Train [46][2340/3239]	Time 0.250 (0.598)	Data Time 0.002 (0.016)	Loss 2.7698 (2.7521)	Entropy 1.20530 (1.20876)	Top-1 acc 57.031 (57.916)	Top-5 acc 75.391 (79.475)	lr 0.01706
Train [46][2350/3239]	Time 0.313 (0.597)	Data Time 0.001 (0.016)	Loss 2.8361 (2.7522)	Entropy 1.20522 (1.20875)	Top-1 acc 58.984 (57.918)	Top-5 acc 77.344 (79.476)	lr 0.01706
Train [46][2360/3239]	Time 0.214 (0.597)	Data Time 0.001 (0.016)	Loss 2.8563 (2.7522)	Entropy 1.20516 (1.20873)	Top-1 acc 55.078 (57.917)	Top-5 acc 77.344 (79.478)	lr 0.01706
Train [46][2370/3239]	Time 0.211 (0.596)	Data Time 0.001 (0.016)	Loss 2.7235 (2.7522)	Entropy 1.20510 (1.20872)	Top-1 acc 58.984 (57.911)	Top-5 acc 80.078 (79.476)	lr 0.01706
Train [46][2380/3239]	Time 0.218 (0.596)	Data Time 0.001 (0.016)	Loss 2.6839 (2.7524)	Entropy 1.20497 (1.20870)	Top-1 acc 56.641 (57.902)	Top-5 acc 81.641 (79.476)	lr 0.01706
Train [46][2390/3239]	Time 0.223 (0.595)	Data Time 0.001 (0.016)	Loss 2.9315 (2.7528)	Entropy 1.20492 (1.20869)	Top-1 acc 52.344 (57.891)	Top-5 acc 76.172 (79.467)	lr 0.01706
Train [46][2400/3239]	Time 0.325 (0.595)	Data Time 0.001 (0.016)	Loss 2.6015 (2.7527)	Entropy 1.20492 (1.20867)	Top-1 acc 61.328 (57.895)	Top-5 acc 82.031 (79.468)	lr 0.01706
Train [46][2410/3239]	Time 0.229 (0.594)	Data Time 0.001 (0.016)	Loss 2.8303 (2.7528)	Entropy 1.20485 (1.20865)	Top-1 acc 57.031 (57.894)	Top-5 acc 77.734 (79.463)	lr 0.01706
Train [46][2420/3239]	Time 0.187 (0.594)	Data Time 0.001 (0.016)	Loss 2.8203 (2.7527)	Entropy 1.20482 (1.20864)	Top-1 acc 55.469 (57.895)	Top-5 acc 76.562 (79.463)	lr 0.01706
Train [46][2430/3239]	Time 2.496 (0.593)	Data Time 0.001 (0.016)	Loss 2.6652 (2.7529)	Entropy 1.20482 (1.20862)	Top-1 acc 57.812 (57.888)	Top-5 acc 78.125 (79.456)	lr 0.01706
Train [46][2440/3239]	Time 0.262 (0.592)	Data Time 0.001 (0.016)	Loss 2.6251 (2.7527)	Entropy 1.20481 (1.20861)	Top-1 acc 63.672 (57.890)	Top-5 acc 80.469 (79.460)	lr 0.01705
Train [46][2450/3239]	Time 0.329 (0.591)	Data Time 0.001 (0.015)	Loss 2.7620 (2.7526)	Entropy 1.20478 (1.20859)	Top-1 acc 57.422 (57.894)	Top-5 acc 77.734 (79.460)	lr 0.01705
Train [46][2460/3239]	Time 0.229 (0.591)	Data Time 0.001 (0.015)	Loss 2.8686 (2.7529)	Entropy 1.20478 (1.20858)	Top-1 acc 53.516 (57.888)	Top-5 acc 78.125 (79.455)	lr 0.01705
Train [46][2470/3239]	Time 0.237 (0.590)	Data Time 0.001 (0.015)	Loss 2.7062 (2.7528)	Entropy 1.20474 (1.20856)	Top-1 acc 59.375 (57.890)	Top-5 acc 80.078 (79.455)	lr 0.01705
Train [46][2480/3239]	Time 0.211 (0.590)	Data Time 0.001 (0.015)	Loss 2.7621 (2.7527)	Entropy 1.20467 (1.20855)	Top-1 acc 55.469 (57.895)	Top-5 acc 80.469 (79.456)	lr 0.01705
Train [46][2490/3239]	Time 0.226 (0.589)	Data Time 0.001 (0.015)	Loss 2.7111 (2.7529)	Entropy 1.20454 (1.20853)	Top-1 acc 58.984 (57.892)	Top-5 acc 79.688 (79.455)	lr 0.01705
Train [46][2500/3239]	Time 0.238 (0.589)	Data Time 0.002 (0.015)	Loss 2.6909 (2.7529)	Entropy 1.20443 (1.20851)	Top-1 acc 57.422 (57.888)	Top-5 acc 79.297 (79.455)	lr 0.01705
Train [46][2510/3239]	Time 0.218 (0.588)	Data Time 0.001 (0.015)	Loss 2.9057 (2.7528)	Entropy 1.20443 (1.20850)	Top-1 acc 53.125 (57.892)	Top-5 acc 75.000 (79.456)	lr 0.01705
Train [46][2520/3239]	Time 0.243 (0.588)	Data Time 0.001 (0.015)	Loss 2.7585 (2.7527)	Entropy 1.20442 (1.20848)	Top-1 acc 57.422 (57.894)	Top-5 acc 78.516 (79.458)	lr 0.01705
Train [46][2530/3239]	Time 0.248 (0.587)	Data Time 0.001 (0.015)	Loss 2.7298 (2.7528)	Entropy 1.20461 (1.20847)	Top-1 acc 56.250 (57.894)	Top-5 acc 81.250 (79.457)	lr 0.01705
Train [46][2540/3239]	Time 2.442 (0.587)	Data Time 0.001 (0.015)	Loss 2.7851 (2.7529)	Entropy 1.20461 (1.20845)	Top-1 acc 54.688 (57.887)	Top-5 acc 81.250 (79.457)	lr 0.01705
Train [46][2550/3239]	Time 0.231 (0.585)	Data Time 0.002 (0.015)	Loss 2.7363 (2.7528)	Entropy 1.20460 (1.20844)	Top-1 acc 58.984 (57.892)	Top-5 acc 80.859 (79.459)	lr 0.01704
Train [46][2560/3239]	Time 0.344 (0.604)	Data Time 0.002 (0.015)	Loss 2.8261 (2.7528)	Entropy 1.20461 (1.20842)	Top-1 acc 56.641 (57.894)	Top-5 acc 78.125 (79.459)	lr 0.01704
Train [46][2570/3239]	Time 0.239 (0.603)	Data Time 0.002 (0.015)	Loss 2.8340 (2.7529)	Entropy 1.20459 (1.20841)	Top-1 acc 56.250 (57.890)	Top-5 acc 77.734 (79.454)	lr 0.01704
Train [46][2580/3239]	Time 0.234 (0.603)	Data Time 0.002 (0.015)	Loss 2.8420 (2.7533)	Entropy 1.20456 (1.20839)	Top-1 acc 55.859 (57.878)	Top-5 acc 75.781 (79.444)	lr 0.01704
Train [46][2590/3239]	Time 0.215 (0.602)	Data Time 0.001 (0.015)	Loss 2.6556 (2.7533)	Entropy 1.20448 (1.20838)	Top-1 acc 60.156 (57.878)	Top-5 acc 78.906 (79.443)	lr 0.01704
Train [46][2600/3239]	Time 0.335 (0.602)	Data Time 0.001 (0.015)	Loss 2.8309 (2.7535)	Entropy 1.20437 (1.20836)	Top-1 acc 59.375 (57.871)	Top-5 acc 79.297 (79.438)	lr 0.01704
Train [46][2610/3239]	Time 0.220 (0.601)	Data Time 0.001 (0.015)	Loss 2.7815 (2.7533)	Entropy 1.20436 (1.20835)	Top-1 acc 59.375 (57.878)	Top-5 acc 78.906 (79.440)	lr 0.01704
Train [46][2620/3239]	Time 0.223 (0.601)	Data Time 0.001 (0.015)	Loss 2.5869 (2.7534)	Entropy 1.20435 (1.20833)	Top-1 acc 62.500 (57.878)	Top-5 acc 82.031 (79.438)	lr 0.01704
Train [46][2630/3239]	Time 0.211 (0.600)	Data Time 0.001 (0.015)	Loss 2.8696 (2.7536)	Entropy 1.20431 (1.20831)	Top-1 acc 56.250 (57.873)	Top-5 acc 73.828 (79.434)	lr 0.01704
Train [46][2640/3239]	Time 0.243 (0.600)	Data Time 0.001 (0.014)	Loss 2.8246 (2.7537)	Entropy 1.20413 (1.20830)	Top-1 acc 56.641 (57.866)	Top-5 acc 76.953 (79.430)	lr 0.01704
Train [46][2650/3239]	Time 0.294 (0.599)	Data Time 0.001 (0.014)	Loss 2.7868 (2.7540)	Entropy 1.20409 (1.20828)	Top-1 acc 58.594 (57.863)	Top-5 acc 80.078 (79.426)	lr 0.01704
Train [46][2660/3239]	Time 0.259 (0.599)	Data Time 0.001 (0.014)	Loss 2.8592 (2.7540)	Entropy 1.20410 (1.20827)	Top-1 acc 55.859 (57.861)	Top-5 acc 76.953 (79.426)	lr 0.01703
Train [46][2670/3239]	Time 0.225 (0.598)	Data Time 0.001 (0.014)	Loss 2.7370 (2.7541)	Entropy 1.20407 (1.20825)	Top-1 acc 55.859 (57.857)	Top-5 acc 79.297 (79.424)	lr 0.01703
Train [46][2680/3239]	Time 0.216 (0.597)	Data Time 0.001 (0.014)	Loss 3.0309 (2.7543)	Entropy 1.20403 (1.20824)	Top-1 acc 52.734 (57.849)	Top-5 acc 72.266 (79.418)	lr 0.01703
Train [46][2690/3239]	Time 0.217 (0.597)	Data Time 0.002 (0.014)	Loss 2.6617 (2.7543)	Entropy 1.20400 (1.20822)	Top-1 acc 61.719 (57.850)	Top-5 acc 82.031 (79.422)	lr 0.01703
Train [46][2700/3239]	Time 0.218 (0.596)	Data Time 0.001 (0.014)	Loss 2.5368 (2.7544)	Entropy 1.20398 (1.20821)	Top-1 acc 64.453 (57.849)	Top-5 acc 84.375 (79.418)	lr 0.01703
Train [46][2710/3239]	Time 0.230 (0.596)	Data Time 0.001 (0.014)	Loss 2.7847 (2.7546)	Entropy 1.20394 (1.20819)	Top-1 acc 55.078 (57.850)	Top-5 acc 78.125 (79.412)	lr 0.01703
Train [46][2720/3239]	Time 0.209 (0.595)	Data Time 0.001 (0.014)	Loss 2.8505 (2.7544)	Entropy 1.20393 (1.20817)	Top-1 acc 57.031 (57.855)	Top-5 acc 76.953 (79.418)	lr 0.01703
Train [46][2730/3239]	Time 0.205 (0.595)	Data Time 0.001 (0.014)	Loss 2.8570 (2.7544)	Entropy 1.20390 (1.20816)	Top-1 acc 51.953 (57.848)	Top-5 acc 78.516 (79.418)	lr 0.01703
Train [46][2740/3239]	Time 0.244 (0.594)	Data Time 0.001 (0.014)	Loss 2.5435 (2.7545)	Entropy 1.20387 (1.20814)	Top-1 acc 65.234 (57.850)	Top-5 acc 80.859 (79.417)	lr 0.01703
Train [46][2750/3239]	Time 0.219 (0.594)	Data Time 0.001 (0.014)	Loss 2.6734 (2.7545)	Entropy 1.20379 (1.20813)	Top-1 acc 59.375 (57.849)	Top-5 acc 80.078 (79.416)	lr 0.01703
Train [46][2760/3239]	Time 0.215 (0.593)	Data Time 0.001 (0.014)	Loss 2.7493 (2.7545)	Entropy 1.20372 (1.20811)	Top-1 acc 57.031 (57.848)	Top-5 acc 79.688 (79.414)	lr 0.01702
Train [46][2770/3239]	Time 0.216 (0.593)	Data Time 0.001 (0.014)	Loss 2.7318 (2.7544)	Entropy 1.20367 (1.20810)	Top-1 acc 62.109 (57.852)	Top-5 acc 80.078 (79.417)	lr 0.01702
Train [46][2780/3239]	Time 0.274 (0.592)	Data Time 0.001 (0.014)	Loss 2.7566 (2.7543)	Entropy 1.20362 (1.20808)	Top-1 acc 63.281 (57.857)	Top-5 acc 78.906 (79.420)	lr 0.01702
Train [46][2790/3239]	Time 0.248 (0.592)	Data Time 0.001 (0.014)	Loss 2.7167 (2.7542)	Entropy 1.20352 (1.20806)	Top-1 acc 57.422 (57.857)	Top-5 acc 81.641 (79.423)	lr 0.01702
Train [46][2800/3239]	Time 0.195 (0.591)	Data Time 0.001 (0.014)	Loss 2.8277 (2.7544)	Entropy 1.20349 (1.20805)	Top-1 acc 54.297 (57.852)	Top-5 acc 78.125 (79.421)	lr 0.01702
Train [46][2810/3239]	Time 0.231 (0.591)	Data Time 0.001 (0.014)	Loss 2.6891 (2.7545)	Entropy 1.20340 (1.20803)	Top-1 acc 58.984 (57.850)	Top-5 acc 81.250 (79.416)	lr 0.01702
Train [46][2820/3239]	Time 0.243 (0.590)	Data Time 0.001 (0.014)	Loss 2.9096 (2.7546)	Entropy 1.20343 (1.20802)	Top-1 acc 54.297 (57.846)	Top-5 acc 75.391 (79.417)	lr 0.01702
Train [46][2830/3239]	Time 0.252 (0.590)	Data Time 0.001 (0.014)	Loss 2.6306 (2.7546)	Entropy 1.20342 (1.20800)	Top-1 acc 63.281 (57.846)	Top-5 acc 82.422 (79.418)	lr 0.01702
Train [46][2840/3239]	Time 0.228 (0.589)	Data Time 0.001 (0.014)	Loss 2.8487 (2.7546)	Entropy 1.20339 (1.20798)	Top-1 acc 52.344 (57.841)	Top-5 acc 77.734 (79.417)	lr 0.01702
Train [46][2850/3239]	Time 0.248 (0.589)	Data Time 0.001 (0.014)	Loss 2.7100 (2.7546)	Entropy 1.20338 (1.20797)	Top-1 acc 59.766 (57.840)	Top-5 acc 79.297 (79.420)	lr 0.01702
Train [46][2860/3239]	Time 0.257 (0.589)	Data Time 0.001 (0.013)	Loss 2.6952 (2.7547)	Entropy 1.20335 (1.20795)	Top-1 acc 59.375 (57.839)	Top-5 acc 81.641 (79.418)	lr 0.01702
Train [46][2870/3239]	Time 0.220 (0.588)	Data Time 0.002 (0.013)	Loss 2.9939 (2.7550)	Entropy 1.20334 (1.20793)	Top-1 acc 50.781 (57.832)	Top-5 acc 77.734 (79.411)	lr 0.01701
Train [46][2880/3239]	Time 0.221 (0.588)	Data Time 0.001 (0.013)	Loss 2.8688 (2.7550)	Entropy 1.20333 (1.20792)	Top-1 acc 53.125 (57.835)	Top-5 acc 77.734 (79.412)	lr 0.01701
Train [46][2890/3239]	Time 0.460 (0.604)	Data Time 0.004 (0.013)	Loss 2.9672 (2.7550)	Entropy 1.20334 (1.20790)	Top-1 acc 52.344 (57.835)	Top-5 acc 74.219 (79.413)	lr 0.01701
Train [46][2900/3239]	Time 0.231 (0.604)	Data Time 0.002 (0.013)	Loss 2.7820 (2.7551)	Entropy 1.20334 (1.20789)	Top-1 acc 58.594 (57.829)	Top-5 acc 79.688 (79.414)	lr 0.01701
Train [46][2910/3239]	Time 0.270 (0.603)	Data Time 0.002 (0.013)	Loss 2.9020 (2.7550)	Entropy 1.20333 (1.20787)	Top-1 acc 56.250 (57.833)	Top-5 acc 76.172 (79.413)	lr 0.01701
Train [46][2920/3239]	Time 0.212 (0.603)	Data Time 0.001 (0.013)	Loss 2.6214 (2.7550)	Entropy 1.20326 (1.20786)	Top-1 acc 60.547 (57.832)	Top-5 acc 82.031 (79.414)	lr 0.01701
Train [46][2930/3239]	Time 0.254 (0.602)	Data Time 0.001 (0.013)	Loss 2.8703 (2.7550)	Entropy 1.20320 (1.20784)	Top-1 acc 55.859 (57.826)	Top-5 acc 77.734 (79.416)	lr 0.01701
Train [46][2940/3239]	Time 0.350 (0.602)	Data Time 0.001 (0.013)	Loss 2.7265 (2.7551)	Entropy 1.20319 (1.20782)	Top-1 acc 55.859 (57.822)	Top-5 acc 81.250 (79.414)	lr 0.01701
Train [46][2950/3239]	Time 0.227 (0.601)	Data Time 0.001 (0.013)	Loss 2.6447 (2.7551)	Entropy 1.20320 (1.20781)	Top-1 acc 60.547 (57.822)	Top-5 acc 80.078 (79.415)	lr 0.01701
Train [46][2960/3239]	Time 0.242 (0.601)	Data Time 0.001 (0.013)	Loss 2.8101 (2.7551)	Entropy 1.20319 (1.20779)	Top-1 acc 58.594 (57.823)	Top-5 acc 76.562 (79.413)	lr 0.01701
Train [46][2970/3239]	Time 0.226 (0.600)	Data Time 0.001 (0.013)	Loss 3.0535 (2.7555)	Entropy 1.20313 (1.20778)	Top-1 acc 50.000 (57.816)	Top-5 acc 77.344 (79.407)	lr 0.01701
Train [46][2980/3239]	Time 0.218 (0.600)	Data Time 0.001 (0.013)	Loss 2.7148 (2.7556)	Entropy 1.20306 (1.20776)	Top-1 acc 62.500 (57.814)	Top-5 acc 81.641 (79.406)	lr 0.01700
Train [46][2990/3239]	Time 0.214 (0.599)	Data Time 0.002 (0.013)	Loss 2.7711 (2.7555)	Entropy 1.20307 (1.20775)	Top-1 acc 59.375 (57.816)	Top-5 acc 80.469 (79.409)	lr 0.01700
Train [46][3000/3239]	Time 0.224 (0.599)	Data Time 0.001 (0.013)	Loss 2.6588 (2.7553)	Entropy 1.20304 (1.20773)	Top-1 acc 58.984 (57.818)	Top-5 acc 80.469 (79.412)	lr 0.01700
Train [46][3010/3239]	Time 0.227 (0.598)	Data Time 0.001 (0.013)	Loss 2.9033 (2.7555)	Entropy 1.20302 (1.20772)	Top-1 acc 54.297 (57.814)	Top-5 acc 78.906 (79.407)	lr 0.01700
Train [46][3020/3239]	Time 0.231 (0.598)	Data Time 0.001 (0.013)	Loss 2.8490 (2.7556)	Entropy 1.20293 (1.20770)	Top-1 acc 54.688 (57.812)	Top-5 acc 78.906 (79.404)	lr 0.01700
Train [46][3030/3239]	Time 0.226 (0.597)	Data Time 0.001 (0.013)	Loss 2.5339 (2.7555)	Entropy 1.20282 (1.20768)	Top-1 acc 62.109 (57.812)	Top-5 acc 85.547 (79.406)	lr 0.01700
Train [46][3040/3239]	Time 0.318 (0.597)	Data Time 0.001 (0.013)	Loss 2.8200 (2.7555)	Entropy 1.20274 (1.20767)	Top-1 acc 57.422 (57.813)	Top-5 acc 79.297 (79.407)	lr 0.01700
Train [46][3050/3239]	Time 0.231 (0.596)	Data Time 0.001 (0.013)	Loss 2.6204 (2.7555)	Entropy 1.20265 (1.20765)	Top-1 acc 60.547 (57.813)	Top-5 acc 78.906 (79.406)	lr 0.01700
Train [46][3060/3239]	Time 0.237 (0.596)	Data Time 0.001 (0.013)	Loss 2.6923 (2.7553)	Entropy 1.20261 (1.20764)	Top-1 acc 63.281 (57.818)	Top-5 acc 82.812 (79.409)	lr 0.01700
Train [46][3070/3239]	Time 0.199 (0.596)	Data Time 0.002 (0.013)	Loss 2.7519 (2.7555)	Entropy 1.20258 (1.20762)	Top-1 acc 61.328 (57.814)	Top-5 acc 77.734 (79.406)	lr 0.01700
Train [46][3080/3239]	Time 0.220 (0.595)	Data Time 0.001 (0.013)	Loss 2.7956 (2.7554)	Entropy 1.20252 (1.20760)	Top-1 acc 57.031 (57.814)	Top-5 acc 78.516 (79.405)	lr 0.01699
Train [46][3090/3239]	Time 0.231 (0.595)	Data Time 0.001 (0.013)	Loss 2.7530 (2.7556)	Entropy 1.20251 (1.20759)	Top-1 acc 55.078 (57.808)	Top-5 acc 79.688 (79.403)	lr 0.01699
Train [46][3100/3239]	Time 0.214 (0.594)	Data Time 0.001 (0.013)	Loss 2.5705 (2.7556)	Entropy 1.20249 (1.20757)	Top-1 acc 60.938 (57.807)	Top-5 acc 84.766 (79.406)	lr 0.01699
Train [46][3110/3239]	Time 0.253 (0.594)	Data Time 0.001 (0.013)	Loss 2.8617 (2.7556)	Entropy 1.20244 (1.20755)	Top-1 acc 58.594 (57.809)	Top-5 acc 75.781 (79.407)	lr 0.01699
Train [46][3120/3239]	Time 0.224 (0.593)	Data Time 0.001 (0.012)	Loss 2.7821 (2.7557)	Entropy 1.20219 (1.20754)	Top-1 acc 59.375 (57.805)	Top-5 acc 79.297 (79.402)	lr 0.01699
Train [46][3130/3239]	Time 0.267 (0.593)	Data Time 0.001 (0.012)	Loss 2.8548 (2.7560)	Entropy 1.20216 (1.20752)	Top-1 acc 55.469 (57.797)	Top-5 acc 76.172 (79.398)	lr 0.01699
Train [46][3140/3239]	Time 0.261 (0.593)	Data Time 0.001 (0.012)	Loss 2.7779 (2.7557)	Entropy 1.20216 (1.20750)	Top-1 acc 53.516 (57.801)	Top-5 acc 77.344 (79.403)	lr 0.01699
Train [46][3150/3239]	Time 0.277 (0.592)	Data Time 0.001 (0.012)	Loss 2.8270 (2.7558)	Entropy 1.20212 (1.20749)	Top-1 acc 53.906 (57.797)	Top-5 acc 80.469 (79.401)	lr 0.01699
Train [46][3160/3239]	Time 0.223 (0.592)	Data Time 0.001 (0.012)	Loss 2.7812 (2.7559)	Entropy 1.20208 (1.20747)	Top-1 acc 56.641 (57.793)	Top-5 acc 76.172 (79.399)	lr 0.01699
Train [46][3170/3239]	Time 0.226 (0.591)	Data Time 0.001 (0.012)	Loss 2.9122 (2.7560)	Entropy 1.20207 (1.20745)	Top-1 acc 55.078 (57.792)	Top-5 acc 76.562 (79.397)	lr 0.01699
Train [46][3180/3239]	Time 0.259 (0.591)	Data Time 0.000 (0.012)	Loss 2.8066 (2.7565)	Entropy 1.20200 (1.20743)	Top-1 acc 54.688 (57.782)	Top-5 acc 80.078 (79.391)	lr 0.01699
Train [46][3190/3239]	Time 0.255 (0.590)	Data Time 0.000 (0.012)	Loss 2.8641 (2.7564)	Entropy 1.20199 (1.20742)	Top-1 acc 58.203 (57.783)	Top-5 acc 78.125 (79.393)	lr 0.01698
Train [46][3200/3239]	Time 0.217 (0.590)	Data Time 0.000 (0.012)	Loss 2.8247 (2.7562)	Entropy 1.20198 (1.20740)	Top-1 acc 53.906 (57.781)	Top-5 acc 77.344 (79.394)	lr 0.01698
Train [46][3210/3239]	Time 0.228 (0.590)	Data Time 0.000 (0.012)	Loss 2.6680 (2.7563)	Entropy 1.20200 (1.20738)	Top-1 acc 60.938 (57.778)	Top-5 acc 80.469 (79.392)	lr 0.01698
Train [46][3220/3239]	Time 0.241 (0.604)	Data Time 0.000 (0.012)	Loss 2.7375 (2.7561)	Entropy 1.20191 (1.20737)	Top-1 acc 56.641 (57.782)	Top-5 acc 81.250 (79.398)	lr 0.01698
Train [46][3230/3239]	Time 0.321 (0.604)	Data Time 0.000 (0.012)	Loss 2.7777 (2.7560)	Entropy 1.20188 (1.20735)	Top-1 acc 57.031 (57.784)	Top-5 acc 80.078 (79.399)	lr 0.01698
Train [46][3239/3239]	Time 2.318 (0.603)	Data Time 0.000 (0.012)	Loss 2.9785 (2.7561)	Entropy 1.20188 (1.20734)	Top-1 acc 46.914 (57.782)	Top-5 acc 71.605 (79.399)	lr 0.01698
==========Valid [46/120]	loss 1.598	top-1 acc 63.929 (63.929)	top-5 acc 84.507	Train top-1 57.782	top-5 79.399	Entropy 1.20188	Latency-None: 0.000ms	Flops: 548.34M
Train [47][0/3239]	Time 35.369 (35.369)	Data Time 34.095 (34.095)	Loss 2.7971 (2.7971)	Entropy 1.20187 (1.20187)	Top-1 acc 57.422 (57.422)	Top-5 acc 80.859 (80.859)	lr 0.01698
Train [47][10/3239]	Time 2.755 (3.760)	Data Time 0.002 (3.101)	Loss 2.7616 (2.6726)	Entropy 1.20187 (1.20187)	Top-1 acc 56.250 (60.227)	Top-5 acc 79.297 (81.392)	lr 0.01698
Train [47][20/3239]	Time 0.247 (2.085)	Data Time 0.002 (1.625)	Loss 2.6231 (2.6686)	Entropy 1.20203 (1.20195)	Top-1 acc 61.328 (60.435)	Top-5 acc 80.469 (81.064)	lr 0.01698
Train [47][30/3239]	Time 0.224 (1.563)	Data Time 0.002 (1.102)	Loss 2.8263 (2.7067)	Entropy 1.20198 (1.20196)	Top-1 acc 55.469 (59.060)	Top-5 acc 80.469 (80.733)	lr 0.01698
Train [47][40/3239]	Time 0.328 (1.298)	Data Time 0.001 (0.833)	Loss 2.8462 (2.7170)	Entropy 1.20197 (1.20196)	Top-1 acc 57.031 (58.660)	Top-5 acc 76.172 (80.364)	lr 0.01698
Train [47][50/3239]	Time 0.221 (1.135)	Data Time 0.001 (0.670)	Loss 2.5929 (2.7205)	Entropy 1.20195 (1.20196)	Top-1 acc 62.891 (58.670)	Top-5 acc 83.203 (80.155)	lr 0.01697
Train [47][60/3239]	Time 0.228 (1.026)	Data Time 0.002 (0.561)	Loss 2.8716 (2.7194)	Entropy 1.20194 (1.20196)	Top-1 acc 55.078 (58.613)	Top-5 acc 76.953 (80.155)	lr 0.01697
Train [47][70/3239]	Time 0.217 (0.948)	Data Time 0.002 (0.482)	Loss 2.6408 (2.7333)	Entropy 1.20195 (1.20196)	Top-1 acc 61.719 (58.308)	Top-5 acc 78.906 (79.831)	lr 0.01697
Train [47][80/3239]	Time 0.279 (0.889)	Data Time 0.001 (0.423)	Loss 2.8043 (2.7272)	Entropy 1.20191 (1.20195)	Top-1 acc 56.641 (58.439)	Top-5 acc 78.125 (79.909)	lr 0.01697
Train [47][90/3239]	Time 0.328 (0.844)	Data Time 0.001 (0.377)	Loss 2.7887 (2.7257)	Entropy 1.20188 (1.20195)	Top-1 acc 58.984 (58.585)	Top-5 acc 76.562 (79.932)	lr 0.01697
Train [47][100/3239]	Time 0.223 (0.805)	Data Time 0.001 (0.339)	Loss 2.7600 (2.7233)	Entropy 1.20188 (1.20194)	Top-1 acc 62.500 (58.632)	Top-5 acc 77.344 (79.931)	lr 0.01697
Train [47][110/3239]	Time 0.224 (0.773)	Data Time 0.001 (0.309)	Loss 2.8464 (2.7220)	Entropy 1.20184 (1.20194)	Top-1 acc 53.125 (58.622)	Top-5 acc 77.344 (79.885)	lr 0.01697
Train [47][120/3239]	Time 2.424 (0.747)	Data Time 0.002 (0.284)	Loss 2.9422 (2.7196)	Entropy 1.20184 (1.20193)	Top-1 acc 54.297 (58.658)	Top-5 acc 75.000 (80.036)	lr 0.01697
Train [47][130/3239]	Time 0.229 (0.708)	Data Time 0.001 (0.262)	Loss 2.6499 (2.7221)	Entropy 1.20179 (1.20192)	Top-1 acc 58.203 (58.609)	Top-5 acc 83.594 (79.995)	lr 0.01697
Train [47][140/3239]	Time 0.353 (0.692)	Data Time 0.001 (0.244)	Loss 2.7646 (2.7253)	Entropy 1.20176 (1.20191)	Top-1 acc 60.156 (58.486)	Top-5 acc 79.297 (79.940)	lr 0.01697
Train [47][150/3239]	Time 0.219 (0.677)	Data Time 0.001 (0.228)	Loss 2.6703 (2.7242)	Entropy 1.20172 (1.20189)	Top-1 acc 60.547 (58.457)	Top-5 acc 80.469 (79.957)	lr 0.01697
Train [47][160/3239]	Time 0.214 (0.663)	Data Time 0.001 (0.214)	Loss 2.7977 (2.7267)	Entropy 1.20170 (1.20188)	Top-1 acc 56.250 (58.400)	Top-5 acc 78.906 (79.867)	lr 0.01696
Train [47][170/3239]	Time 0.246 (0.652)	Data Time 0.002 (0.201)	Loss 2.6053 (2.7260)	Entropy 1.20169 (1.20187)	Top-1 acc 61.719 (58.358)	Top-5 acc 82.031 (79.863)	lr 0.01696
Train [47][180/3239]	Time 0.216 (0.641)	Data Time 0.001 (0.190)	Loss 2.6501 (2.7250)	Entropy 1.20165 (1.20186)	Top-1 acc 58.984 (58.382)	Top-5 acc 82.031 (79.845)	lr 0.01696
Train [47][190/3239]	Time 0.245 (0.632)	Data Time 0.001 (0.180)	Loss 2.6918 (2.7241)	Entropy 1.20157 (1.20185)	Top-1 acc 59.375 (58.442)	Top-5 acc 82.422 (79.865)	lr 0.01696
Train [47][200/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.171)	Loss 2.8510 (2.7259)	Entropy 1.20158 (1.20183)	Top-1 acc 54.297 (58.407)	Top-5 acc 78.516 (79.843)	lr 0.01696
Train [47][210/3239]	Time 0.243 (0.616)	Data Time 0.001 (0.163)	Loss 2.6752 (2.7253)	Entropy 1.20156 (1.20182)	Top-1 acc 58.594 (58.377)	Top-5 acc 80.078 (79.876)	lr 0.01696
Train [47][220/3239]	Time 0.301 (0.610)	Data Time 0.001 (0.156)	Loss 2.9437 (2.7273)	Entropy 1.20154 (1.20181)	Top-1 acc 54.297 (58.300)	Top-5 acc 75.391 (79.875)	lr 0.01696
Train [47][230/3239]	Time 2.443 (0.604)	Data Time 0.001 (0.149)	Loss 2.6856 (2.7289)	Entropy 1.20154 (1.20180)	Top-1 acc 63.281 (58.264)	Top-5 acc 80.078 (79.819)	lr 0.01696
Train [47][240/3239]	Time 0.381 (0.589)	Data Time 0.001 (0.143)	Loss 2.8258 (2.7310)	Entropy 1.20148 (1.20179)	Top-1 acc 56.641 (58.192)	Top-5 acc 76.953 (79.772)	lr 0.01696
Train [47][250/3239]	Time 0.229 (0.584)	Data Time 0.001 (0.138)	Loss 2.7998 (2.7323)	Entropy 1.20146 (1.20177)	Top-1 acc 53.906 (58.161)	Top-5 acc 76.562 (79.753)	lr 0.01696
Train [47][260/3239]	Time 0.222 (0.580)	Data Time 0.001 (0.132)	Loss 2.8635 (2.7317)	Entropy 1.20143 (1.20176)	Top-1 acc 57.031 (58.202)	Top-5 acc 76.172 (79.770)	lr 0.01696
Train [47][270/3239]	Time 0.227 (0.575)	Data Time 0.001 (0.128)	Loss 2.6645 (2.7319)	Entropy 1.20142 (1.20175)	Top-1 acc 59.375 (58.216)	Top-5 acc 80.078 (79.745)	lr 0.01695
Train [47][280/3239]	Time 0.248 (0.572)	Data Time 0.001 (0.123)	Loss 2.9004 (2.7338)	Entropy 1.20134 (1.20173)	Top-1 acc 55.078 (58.191)	Top-5 acc 78.125 (79.694)	lr 0.01695
Train [47][290/3239]	Time 0.423 (0.569)	Data Time 0.005 (0.119)	Loss 2.5945 (2.7324)	Entropy 1.20131 (1.20172)	Top-1 acc 59.766 (58.243)	Top-5 acc 83.203 (79.724)	lr 0.01695
Train [47][300/3239]	Time 0.227 (0.565)	Data Time 0.001 (0.115)	Loss 2.5398 (2.7311)	Entropy 1.20130 (1.20171)	Top-1 acc 60.547 (58.243)	Top-5 acc 82.812 (79.761)	lr 0.01695
Train [47][310/3239]	Time 0.222 (0.562)	Data Time 0.001 (0.111)	Loss 2.6803 (2.7302)	Entropy 1.20122 (1.20169)	Top-1 acc 60.156 (58.271)	Top-5 acc 80.859 (79.788)	lr 0.01695
Train [47][320/3239]	Time 0.219 (0.559)	Data Time 0.001 (0.108)	Loss 2.6333 (2.7297)	Entropy 1.20119 (1.20168)	Top-1 acc 61.328 (58.303)	Top-5 acc 83.984 (79.815)	lr 0.01695
Train [47][330/3239]	Time 0.243 (0.556)	Data Time 0.001 (0.105)	Loss 2.6817 (2.7293)	Entropy 1.20113 (1.20166)	Top-1 acc 58.594 (58.346)	Top-5 acc 80.859 (79.817)	lr 0.01695
Train [47][340/3239]	Time 53.885 (0.704)	Data Time 0.001 (0.102)	Loss 2.8347 (2.7294)	Entropy 1.20113 (1.20165)	Top-1 acc 59.375 (58.343)	Top-5 acc 77.344 (79.792)	lr 0.01695
Train [47][350/3239]	Time 0.220 (0.690)	Data Time 0.002 (0.099)	Loss 2.7439 (2.7293)	Entropy 1.20105 (1.20163)	Top-1 acc 58.594 (58.349)	Top-5 acc 77.344 (79.784)	lr 0.01695
Train [47][360/3239]	Time 0.223 (0.685)	Data Time 0.002 (0.096)	Loss 2.7104 (2.7296)	Entropy 1.20102 (1.20161)	Top-1 acc 57.812 (58.344)	Top-5 acc 78.906 (79.761)	lr 0.01695
Train [47][370/3239]	Time 0.247 (0.679)	Data Time 0.001 (0.094)	Loss 2.8073 (2.7291)	Entropy 1.20099 (1.20160)	Top-1 acc 57.031 (58.361)	Top-5 acc 78.906 (79.773)	lr 0.01694
Train [47][380/3239]	Time 0.337 (0.674)	Data Time 0.003 (0.091)	Loss 2.5200 (2.7289)	Entropy 1.20094 (1.20158)	Top-1 acc 62.891 (58.362)	Top-5 acc 83.594 (79.773)	lr 0.01694
Train [47][390/3239]	Time 0.234 (0.669)	Data Time 0.002 (0.089)	Loss 2.6901 (2.7289)	Entropy 1.20091 (1.20156)	Top-1 acc 60.547 (58.375)	Top-5 acc 79.688 (79.766)	lr 0.01694
Train [47][400/3239]	Time 0.229 (0.664)	Data Time 0.001 (0.087)	Loss 2.7125 (2.7305)	Entropy 1.20088 (1.20155)	Top-1 acc 55.078 (58.346)	Top-5 acc 81.250 (79.714)	lr 0.01694
Train [47][410/3239]	Time 0.211 (0.659)	Data Time 0.001 (0.085)	Loss 2.6299 (2.7303)	Entropy 1.20080 (1.20153)	Top-1 acc 60.938 (58.354)	Top-5 acc 82.812 (79.740)	lr 0.01694
Train [47][420/3239]	Time 0.218 (0.654)	Data Time 0.002 (0.083)	Loss 2.7425 (2.7292)	Entropy 1.20076 (1.20151)	Top-1 acc 56.250 (58.363)	Top-5 acc 81.641 (79.758)	lr 0.01694
Train [47][430/3239]	Time 0.355 (0.650)	Data Time 0.001 (0.081)	Loss 2.7444 (2.7290)	Entropy 1.20072 (1.20149)	Top-1 acc 60.938 (58.376)	Top-5 acc 80.078 (79.754)	lr 0.01694
Train [47][440/3239]	Time 0.241 (0.646)	Data Time 0.001 (0.079)	Loss 2.6746 (2.7291)	Entropy 1.20072 (1.20148)	Top-1 acc 59.766 (58.388)	Top-5 acc 82.812 (79.759)	lr 0.01694
Train [47][450/3239]	Time 2.444 (0.642)	Data Time 0.001 (0.077)	Loss 2.8207 (2.7296)	Entropy 1.20072 (1.20146)	Top-1 acc 56.641 (58.399)	Top-5 acc 76.172 (79.744)	lr 0.01694
Train [47][460/3239]	Time 0.224 (0.633)	Data Time 0.001 (0.076)	Loss 3.0636 (2.7299)	Entropy 1.20065 (1.20144)	Top-1 acc 53.516 (58.389)	Top-5 acc 75.000 (79.743)	lr 0.01694
Train [47][470/3239]	Time 0.236 (0.629)	Data Time 0.001 (0.074)	Loss 2.7196 (2.7289)	Entropy 1.20059 (1.20142)	Top-1 acc 58.203 (58.415)	Top-5 acc 80.859 (79.763)	lr 0.01694
Train [47][480/3239]	Time 0.323 (0.626)	Data Time 0.001 (0.073)	Loss 2.8267 (2.7296)	Entropy 1.20057 (1.20141)	Top-1 acc 54.297 (58.408)	Top-5 acc 76.953 (79.750)	lr 0.01693
Train [47][490/3239]	Time 0.216 (0.623)	Data Time 0.001 (0.071)	Loss 2.6651 (2.7294)	Entropy 1.20051 (1.20139)	Top-1 acc 60.938 (58.422)	Top-5 acc 81.250 (79.747)	lr 0.01693
Train [47][500/3239]	Time 0.233 (0.620)	Data Time 0.001 (0.070)	Loss 2.5544 (2.7286)	Entropy 1.20049 (1.20137)	Top-1 acc 62.109 (58.436)	Top-5 acc 83.984 (79.764)	lr 0.01693
Train [47][510/3239]	Time 0.224 (0.617)	Data Time 0.001 (0.069)	Loss 2.6800 (2.7292)	Entropy 1.20044 (1.20135)	Top-1 acc 61.719 (58.438)	Top-5 acc 81.250 (79.752)	lr 0.01693
Train [47][520/3239]	Time 0.224 (0.613)	Data Time 0.001 (0.067)	Loss 2.7034 (2.7291)	Entropy 1.20042 (1.20134)	Top-1 acc 58.594 (58.431)	Top-5 acc 81.250 (79.756)	lr 0.01693
Train [47][530/3239]	Time 0.323 (0.611)	Data Time 0.001 (0.066)	Loss 2.6535 (2.7288)	Entropy 1.20042 (1.20132)	Top-1 acc 64.062 (58.422)	Top-5 acc 80.859 (79.774)	lr 0.01693
Train [47][540/3239]	Time 0.213 (0.608)	Data Time 0.001 (0.065)	Loss 2.8399 (2.7292)	Entropy 1.20042 (1.20130)	Top-1 acc 53.516 (58.402)	Top-5 acc 78.906 (79.768)	lr 0.01693
Train [47][550/3239]	Time 0.263 (0.606)	Data Time 0.001 (0.064)	Loss 2.8547 (2.7292)	Entropy 1.20037 (1.20129)	Top-1 acc 56.250 (58.395)	Top-5 acc 78.906 (79.780)	lr 0.01693
Train [47][560/3239]	Time 2.520 (0.603)	Data Time 0.003 (0.063)	Loss 2.9032 (2.7291)	Entropy 1.20037 (1.20127)	Top-1 acc 55.469 (58.381)	Top-5 acc 75.391 (79.788)	lr 0.01693
Train [47][570/3239]	Time 0.253 (0.597)	Data Time 0.001 (0.062)	Loss 2.6848 (2.7289)	Entropy 1.20037 (1.20125)	Top-1 acc 60.547 (58.396)	Top-5 acc 81.250 (79.790)	lr 0.01693
Train [47][580/3239]	Time 0.322 (0.595)	Data Time 0.001 (0.061)	Loss 2.9117 (2.7306)	Entropy 1.20030 (1.20124)	Top-1 acc 52.734 (58.360)	Top-5 acc 73.047 (79.754)	lr 0.01692
Train [47][590/3239]	Time 0.235 (0.593)	Data Time 0.001 (0.060)	Loss 2.8415 (2.7314)	Entropy 1.20036 (1.20122)	Top-1 acc 57.422 (58.329)	Top-5 acc 76.172 (79.733)	lr 0.01692
Train [47][600/3239]	Time 0.240 (0.591)	Data Time 0.001 (0.059)	Loss 2.7498 (2.7310)	Entropy 1.20037 (1.20121)	Top-1 acc 54.297 (58.331)	Top-5 acc 78.125 (79.739)	lr 0.01692
Train [47][610/3239]	Time 0.232 (0.588)	Data Time 0.001 (0.058)	Loss 2.7055 (2.7313)	Entropy 1.20031 (1.20119)	Top-1 acc 59.375 (58.336)	Top-5 acc 81.250 (79.735)	lr 0.01692
Train [47][620/3239]	Time 0.231 (0.586)	Data Time 0.001 (0.057)	Loss 2.6200 (2.7314)	Entropy 1.20029 (1.20118)	Top-1 acc 60.547 (58.338)	Top-5 acc 84.375 (79.733)	lr 0.01692
Train [47][630/3239]	Time 0.224 (0.584)	Data Time 0.001 (0.056)	Loss 2.7519 (2.7316)	Entropy 1.20019 (1.20116)	Top-1 acc 57.812 (58.315)	Top-5 acc 79.688 (79.738)	lr 0.01692
Train [47][640/3239]	Time 0.228 (0.582)	Data Time 0.001 (0.055)	Loss 2.7978 (2.7330)	Entropy 1.20016 (1.20115)	Top-1 acc 57.812 (58.285)	Top-5 acc 79.297 (79.717)	lr 0.01692
Train [47][650/3239]	Time 0.213 (0.581)	Data Time 0.002 (0.054)	Loss 2.7062 (2.7329)	Entropy 1.20016 (1.20113)	Top-1 acc 56.641 (58.284)	Top-5 acc 80.859 (79.716)	lr 0.01692
Train [47][660/3239]	Time 0.270 (0.579)	Data Time 0.002 (0.053)	Loss 2.5804 (2.7323)	Entropy 1.20007 (1.20112)	Top-1 acc 64.453 (58.312)	Top-5 acc 81.641 (79.740)	lr 0.01692
Train [47][670/3239]	Time 2.438 (0.577)	Data Time 0.001 (0.053)	Loss 2.8862 (2.7323)	Entropy 1.20007 (1.20110)	Top-1 acc 57.422 (58.321)	Top-5 acc 75.781 (79.737)	lr 0.01692
Train [47][680/3239]	Time 0.332 (0.572)	Data Time 0.001 (0.052)	Loss 2.7750 (2.7316)	Entropy 1.20008 (1.20109)	Top-1 acc 57.812 (58.340)	Top-5 acc 78.516 (79.745)	lr 0.01692
Train [47][690/3239]	Time 0.226 (0.571)	Data Time 0.001 (0.051)	Loss 2.8881 (2.7320)	Entropy 1.20008 (1.20107)	Top-1 acc 54.688 (58.332)	Top-5 acc 74.609 (79.733)	lr 0.01691
Train [47][700/3239]	Time 0.240 (0.569)	Data Time 0.001 (0.050)	Loss 2.7166 (2.7318)	Entropy 1.20007 (1.20106)	Top-1 acc 60.938 (58.346)	Top-5 acc 80.469 (79.742)	lr 0.01691
Train [47][710/3239]	Time 0.266 (0.632)	Data Time 0.003 (0.050)	Loss 2.6846 (2.7320)	Entropy 1.20005 (1.20105)	Top-1 acc 60.156 (58.343)	Top-5 acc 78.906 (79.740)	lr 0.01691
Train [47][720/3239]	Time 0.215 (0.631)	Data Time 0.002 (0.049)	Loss 2.8168 (2.7315)	Entropy 1.19993 (1.20103)	Top-1 acc 58.203 (58.378)	Top-5 acc 76.562 (79.740)	lr 0.01691
Train [47][730/3239]	Time 0.342 (0.629)	Data Time 0.001 (0.048)	Loss 2.8609 (2.7319)	Entropy 1.19990 (1.20102)	Top-1 acc 50.781 (58.379)	Top-5 acc 77.734 (79.732)	lr 0.01691
Train [47][740/3239]	Time 0.202 (0.626)	Data Time 0.001 (0.048)	Loss 2.9927 (2.7328)	Entropy 1.19988 (1.20100)	Top-1 acc 51.953 (58.370)	Top-5 acc 73.047 (79.705)	lr 0.01691
Train [47][750/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.047)	Loss 2.7164 (2.7327)	Entropy 1.19980 (1.20099)	Top-1 acc 58.984 (58.359)	Top-5 acc 79.297 (79.712)	lr 0.01691
Train [47][760/3239]	Time 0.219 (0.621)	Data Time 0.001 (0.047)	Loss 2.7076 (2.7328)	Entropy 1.19980 (1.20097)	Top-1 acc 60.156 (58.364)	Top-5 acc 80.859 (79.710)	lr 0.01691
Train [47][770/3239]	Time 0.269 (0.619)	Data Time 0.001 (0.046)	Loss 2.5963 (2.7334)	Entropy 1.19973 (1.20095)	Top-1 acc 60.938 (58.349)	Top-5 acc 81.250 (79.702)	lr 0.01691
Train [47][780/3239]	Time 2.441 (0.617)	Data Time 0.002 (0.045)	Loss 2.8251 (2.7337)	Entropy 1.19973 (1.20094)	Top-1 acc 56.641 (58.348)	Top-5 acc 78.516 (79.698)	lr 0.01691
Train [47][790/3239]	Time 0.232 (0.612)	Data Time 0.001 (0.045)	Loss 2.7231 (2.7340)	Entropy 1.19972 (1.20092)	Top-1 acc 59.766 (58.339)	Top-5 acc 81.250 (79.707)	lr 0.01691
Train [47][800/3239]	Time 0.143 (0.610)	Data Time 0.001 (0.044)	Loss 2.6999 (2.7338)	Entropy 1.19974 (1.20091)	Top-1 acc 61.719 (58.339)	Top-5 acc 81.250 (79.716)	lr 0.01690
Train [47][810/3239]	Time 0.251 (0.608)	Data Time 0.001 (0.044)	Loss 2.8076 (2.7337)	Entropy 1.19973 (1.20089)	Top-1 acc 58.203 (58.351)	Top-5 acc 78.125 (79.725)	lr 0.01690
Train [47][820/3239]	Time 0.230 (0.607)	Data Time 0.001 (0.043)	Loss 2.6952 (2.7337)	Entropy 1.19973 (1.20088)	Top-1 acc 62.109 (58.345)	Top-5 acc 78.906 (79.721)	lr 0.01690
Train [47][830/3239]	Time 0.335 (0.605)	Data Time 0.001 (0.043)	Loss 2.6028 (2.7333)	Entropy 1.19970 (1.20087)	Top-1 acc 59.375 (58.332)	Top-5 acc 83.203 (79.722)	lr 0.01690
Train [47][840/3239]	Time 0.209 (0.603)	Data Time 0.001 (0.042)	Loss 2.9078 (2.7335)	Entropy 1.19964 (1.20085)	Top-1 acc 52.344 (58.328)	Top-5 acc 76.172 (79.718)	lr 0.01690
Train [47][850/3239]	Time 0.216 (0.602)	Data Time 0.001 (0.042)	Loss 2.5117 (2.7330)	Entropy 1.19962 (1.20084)	Top-1 acc 65.625 (58.349)	Top-5 acc 85.547 (79.723)	lr 0.01690
Train [47][860/3239]	Time 0.225 (0.600)	Data Time 0.001 (0.041)	Loss 2.7428 (2.7337)	Entropy 1.19957 (1.20082)	Top-1 acc 57.031 (58.330)	Top-5 acc 80.469 (79.710)	lr 0.01690
Train [47][870/3239]	Time 0.214 (0.598)	Data Time 0.001 (0.041)	Loss 2.8759 (2.7337)	Entropy 1.19957 (1.20081)	Top-1 acc 54.688 (58.336)	Top-5 acc 77.344 (79.706)	lr 0.01690
Train [47][880/3239]	Time 0.336 (0.597)	Data Time 0.001 (0.041)	Loss 2.7914 (2.7340)	Entropy 1.19952 (1.20079)	Top-1 acc 55.078 (58.322)	Top-5 acc 77.344 (79.703)	lr 0.01690
Train [47][890/3239]	Time 2.293 (0.595)	Data Time 0.001 (0.040)	Loss 2.6839 (2.7344)	Entropy 1.19952 (1.20078)	Top-1 acc 55.078 (58.301)	Top-5 acc 82.812 (79.694)	lr 0.01690
Train [47][900/3239]	Time 0.229 (0.591)	Data Time 0.002 (0.040)	Loss 2.6791 (2.7345)	Entropy 1.19948 (1.20077)	Top-1 acc 58.984 (58.312)	Top-5 acc 82.422 (79.692)	lr 0.01689
Train [47][910/3239]	Time 0.213 (0.590)	Data Time 0.001 (0.039)	Loss 2.7291 (2.7344)	Entropy 1.19948 (1.20075)	Top-1 acc 58.203 (58.312)	Top-5 acc 78.906 (79.697)	lr 0.01689
Train [47][920/3239]	Time 0.225 (0.588)	Data Time 0.001 (0.039)	Loss 2.6887 (2.7344)	Entropy 1.19946 (1.20074)	Top-1 acc 57.812 (58.320)	Top-5 acc 82.031 (79.700)	lr 0.01689
Train [47][930/3239]	Time 0.311 (0.587)	Data Time 0.001 (0.038)	Loss 2.7641 (2.7347)	Entropy 1.19942 (1.20072)	Top-1 acc 60.938 (58.319)	Top-5 acc 76.953 (79.689)	lr 0.01689
Train [47][940/3239]	Time 0.243 (0.586)	Data Time 0.001 (0.038)	Loss 2.6146 (2.7347)	Entropy 1.19931 (1.20071)	Top-1 acc 63.281 (58.321)	Top-5 acc 84.375 (79.694)	lr 0.01689
Train [47][950/3239]	Time 0.236 (0.584)	Data Time 0.001 (0.038)	Loss 2.9419 (2.7350)	Entropy 1.19923 (1.20069)	Top-1 acc 54.297 (58.309)	Top-5 acc 76.562 (79.696)	lr 0.01689
Train [47][960/3239]	Time 0.206 (0.583)	Data Time 0.001 (0.037)	Loss 2.6909 (2.7348)	Entropy 1.19921 (1.20068)	Top-1 acc 61.719 (58.321)	Top-5 acc 81.250 (79.700)	lr 0.01689
Train [47][970/3239]	Time 0.205 (0.582)	Data Time 0.001 (0.037)	Loss 2.8034 (2.7351)	Entropy 1.19916 (1.20066)	Top-1 acc 58.984 (58.322)	Top-5 acc 80.078 (79.700)	lr 0.01689
Train [47][980/3239]	Time 0.242 (0.580)	Data Time 0.001 (0.037)	Loss 2.5315 (2.7350)	Entropy 1.19915 (1.20065)	Top-1 acc 61.328 (58.319)	Top-5 acc 83.984 (79.705)	lr 0.01689
Train [47][990/3239]	Time 0.228 (0.579)	Data Time 0.001 (0.036)	Loss 2.5092 (2.7349)	Entropy 1.19910 (1.20063)	Top-1 acc 62.109 (58.317)	Top-5 acc 84.766 (79.712)	lr 0.01689
Train [47][1000/3239]	Time 2.434 (0.578)	Data Time 0.001 (0.036)	Loss 2.6826 (2.7347)	Entropy 1.19910 (1.20062)	Top-1 acc 58.203 (58.312)	Top-5 acc 81.641 (79.712)	lr 0.01689
Train [47][1010/3239]	Time 0.214 (0.575)	Data Time 0.001 (0.035)	Loss 2.8108 (2.7351)	Entropy 1.19907 (1.20060)	Top-1 acc 56.250 (58.316)	Top-5 acc 80.078 (79.701)	lr 0.01688
Train [47][1020/3239]	Time 0.233 (0.573)	Data Time 0.001 (0.035)	Loss 2.5622 (2.7345)	Entropy 1.19903 (1.20059)	Top-1 acc 64.453 (58.329)	Top-5 acc 83.984 (79.707)	lr 0.01688
Train [47][1030/3239]	Time 0.314 (0.572)	Data Time 0.001 (0.035)	Loss 2.7805 (2.7347)	Entropy 1.19889 (1.20057)	Top-1 acc 55.469 (58.324)	Top-5 acc 78.516 (79.702)	lr 0.01688
Train [47][1040/3239]	Time 0.232 (0.571)	Data Time 0.001 (0.035)	Loss 2.7593 (2.7348)	Entropy 1.19883 (1.20055)	Top-1 acc 57.031 (58.327)	Top-5 acc 80.078 (79.694)	lr 0.01688
Train [47][1050/3239]	Time 0.232 (0.570)	Data Time 0.001 (0.034)	Loss 2.7235 (2.7344)	Entropy 1.19882 (1.20054)	Top-1 acc 56.250 (58.318)	Top-5 acc 78.125 (79.699)	lr 0.01688
Train [47][1060/3239]	Time 0.237 (0.569)	Data Time 0.001 (0.034)	Loss 2.7705 (2.7338)	Entropy 1.19880 (1.20052)	Top-1 acc 58.594 (58.324)	Top-5 acc 80.078 (79.714)	lr 0.01688
Train [47][1070/3239]	Time 0.269 (0.611)	Data Time 0.002 (0.034)	Loss 2.6540 (2.7337)	Entropy 1.19873 (1.20051)	Top-1 acc 57.812 (58.329)	Top-5 acc 83.984 (79.714)	lr 0.01688
Train [47][1080/3239]	Time 0.303 (0.611)	Data Time 0.002 (0.033)	Loss 2.7673 (2.7340)	Entropy 1.19871 (1.20049)	Top-1 acc 57.031 (58.325)	Top-5 acc 80.469 (79.718)	lr 0.01688
Train [47][1090/3239]	Time 0.221 (0.609)	Data Time 0.001 (0.033)	Loss 2.8178 (2.7343)	Entropy 1.19855 (1.20047)	Top-1 acc 53.516 (58.322)	Top-5 acc 79.297 (79.715)	lr 0.01688
Train [47][1100/3239]	Time 0.282 (0.608)	Data Time 0.001 (0.033)	Loss 2.6485 (2.7344)	Entropy 1.19855 (1.20045)	Top-1 acc 63.281 (58.324)	Top-5 acc 79.297 (79.712)	lr 0.01688
Train [47][1110/3239]	Time 2.503 (0.607)	Data Time 0.002 (0.033)	Loss 2.6076 (2.7343)	Entropy 1.19855 (1.20044)	Top-1 acc 61.719 (58.322)	Top-5 acc 82.812 (79.716)	lr 0.01687
Train [47][1120/3239]	Time 0.247 (0.604)	Data Time 0.001 (0.032)	Loss 2.7980 (2.7341)	Entropy 1.19854 (1.20042)	Top-1 acc 55.469 (58.329)	Top-5 acc 77.344 (79.719)	lr 0.01687
Train [47][1130/3239]	Time 0.312 (0.603)	Data Time 0.001 (0.032)	Loss 2.5296 (2.7336)	Entropy 1.19850 (1.20040)	Top-1 acc 67.969 (58.351)	Top-5 acc 83.594 (79.733)	lr 0.01687
Train [47][1140/3239]	Time 0.209 (0.601)	Data Time 0.001 (0.032)	Loss 2.6018 (2.7338)	Entropy 1.19849 (1.20039)	Top-1 acc 64.062 (58.353)	Top-5 acc 81.250 (79.734)	lr 0.01687
Train [47][1150/3239]	Time 0.215 (0.600)	Data Time 0.001 (0.031)	Loss 2.8576 (2.7340)	Entropy 1.19840 (1.20037)	Top-1 acc 52.734 (58.345)	Top-5 acc 76.953 (79.723)	lr 0.01687
Train [47][1160/3239]	Time 0.233 (0.599)	Data Time 0.001 (0.031)	Loss 2.7570 (2.7343)	Entropy 1.19836 (1.20035)	Top-1 acc 54.688 (58.332)	Top-5 acc 82.422 (79.716)	lr 0.01687
Train [47][1170/3239]	Time 0.171 (0.597)	Data Time 0.001 (0.031)	Loss 2.9623 (2.7343)	Entropy 1.19829 (1.20034)	Top-1 acc 51.953 (58.339)	Top-5 acc 74.219 (79.716)	lr 0.01687
Train [47][1180/3239]	Time 0.316 (0.596)	Data Time 0.001 (0.031)	Loss 2.6874 (2.7343)	Entropy 1.19828 (1.20032)	Top-1 acc 56.641 (58.334)	Top-5 acc 78.906 (79.713)	lr 0.01687
Train [47][1190/3239]	Time 0.220 (0.595)	Data Time 0.001 (0.031)	Loss 2.7521 (2.7343)	Entropy 1.19825 (1.20030)	Top-1 acc 56.250 (58.334)	Top-5 acc 82.031 (79.710)	lr 0.01687
Train [47][1200/3239]	Time 0.215 (0.594)	Data Time 0.001 (0.030)	Loss 2.7573 (2.7345)	Entropy 1.19822 (1.20028)	Top-1 acc 57.812 (58.328)	Top-5 acc 78.125 (79.712)	lr 0.01687
Train [47][1210/3239]	Time 0.218 (0.593)	Data Time 0.001 (0.030)	Loss 2.7850 (2.7342)	Entropy 1.19820 (1.20027)	Top-1 acc 57.422 (58.341)	Top-5 acc 82.422 (79.714)	lr 0.01687
Train [47][1220/3239]	Time 2.390 (0.592)	Data Time 0.005 (0.030)	Loss 2.6534 (2.7342)	Entropy 1.19820 (1.20025)	Top-1 acc 62.500 (58.338)	Top-5 acc 80.469 (79.717)	lr 0.01686
Train [47][1230/3239]	Time 0.272 (0.589)	Data Time 0.001 (0.030)	Loss 2.8055 (2.7339)	Entropy 1.19815 (1.20023)	Top-1 acc 57.422 (58.351)	Top-5 acc 76.953 (79.719)	lr 0.01686
Train [47][1240/3239]	Time 0.227 (0.588)	Data Time 0.001 (0.029)	Loss 2.7463 (2.7350)	Entropy 1.19810 (1.20022)	Top-1 acc 58.203 (58.329)	Top-5 acc 77.344 (79.700)	lr 0.01686
Train [47][1250/3239]	Time 0.207 (0.586)	Data Time 0.001 (0.029)	Loss 2.9453 (2.7349)	Entropy 1.19810 (1.20020)	Top-1 acc 50.781 (58.328)	Top-5 acc 75.000 (79.699)	lr 0.01686
Train [47][1260/3239]	Time 0.210 (0.585)	Data Time 0.001 (0.029)	Loss 2.6073 (2.7347)	Entropy 1.19807 (1.20018)	Top-1 acc 61.328 (58.333)	Top-5 acc 82.422 (79.699)	lr 0.01686
Train [47][1270/3239]	Time 0.220 (0.584)	Data Time 0.001 (0.029)	Loss 2.6387 (2.7346)	Entropy 1.19807 (1.20017)	Top-1 acc 62.891 (58.345)	Top-5 acc 79.688 (79.701)	lr 0.01686
Train [47][1280/3239]	Time 0.190 (0.583)	Data Time 0.001 (0.028)	Loss 2.6398 (2.7350)	Entropy 1.19805 (1.20015)	Top-1 acc 61.719 (58.341)	Top-5 acc 81.641 (79.698)	lr 0.01686
Train [47][1290/3239]	Time 0.287 (0.582)	Data Time 0.001 (0.028)	Loss 2.8027 (2.7349)	Entropy 1.19802 (1.20013)	Top-1 acc 54.297 (58.345)	Top-5 acc 77.734 (79.702)	lr 0.01686
Train [47][1300/3239]	Time 0.222 (0.581)	Data Time 0.001 (0.028)	Loss 2.7248 (2.7347)	Entropy 1.19798 (1.20012)	Top-1 acc 59.766 (58.346)	Top-5 acc 80.078 (79.706)	lr 0.01686
Train [47][1310/3239]	Time 0.213 (0.580)	Data Time 0.001 (0.028)	Loss 2.7889 (2.7347)	Entropy 1.19794 (1.20010)	Top-1 acc 55.078 (58.344)	Top-5 acc 79.688 (79.709)	lr 0.01686
Train [47][1320/3239]	Time 0.254 (0.580)	Data Time 0.001 (0.028)	Loss 3.2626 (2.7356)	Entropy 1.19785 (1.20008)	Top-1 acc 47.656 (58.330)	Top-5 acc 70.312 (79.692)	lr 0.01685
Train [47][1330/3239]	Time 2.565 (0.579)	Data Time 0.001 (0.027)	Loss 2.8877 (2.7352)	Entropy 1.19785 (1.20007)	Top-1 acc 50.000 (58.335)	Top-5 acc 78.516 (79.700)	lr 0.01685
Train [47][1340/3239]	Time 0.212 (0.576)	Data Time 0.001 (0.027)	Loss 2.7319 (2.7355)	Entropy 1.19784 (1.20005)	Top-1 acc 60.938 (58.331)	Top-5 acc 81.250 (79.699)	lr 0.01685
Train [47][1350/3239]	Time 0.215 (0.575)	Data Time 0.001 (0.027)	Loss 2.7090 (2.7355)	Entropy 1.19778 (1.20003)	Top-1 acc 60.547 (58.326)	Top-5 acc 79.688 (79.702)	lr 0.01685
Train [47][1360/3239]	Time 0.248 (0.574)	Data Time 0.001 (0.027)	Loss 2.8202 (2.7355)	Entropy 1.19776 (1.20002)	Top-1 acc 56.250 (58.334)	Top-5 acc 78.125 (79.702)	lr 0.01685
Train [47][1370/3239]	Time 0.218 (0.574)	Data Time 0.001 (0.027)	Loss 2.6365 (2.7351)	Entropy 1.19774 (1.20000)	Top-1 acc 61.328 (58.342)	Top-5 acc 81.641 (79.708)	lr 0.01685
Train [47][1380/3239]	Time 0.316 (0.573)	Data Time 0.001 (0.027)	Loss 2.7562 (2.7352)	Entropy 1.19772 (1.19998)	Top-1 acc 56.641 (58.332)	Top-5 acc 80.859 (79.711)	lr 0.01685
Train [47][1390/3239]	Time 0.205 (0.572)	Data Time 0.001 (0.026)	Loss 2.5747 (2.7355)	Entropy 1.19769 (1.19997)	Top-1 acc 57.422 (58.324)	Top-5 acc 85.156 (79.707)	lr 0.01685
Train [47][1400/3239]	Time 0.222 (0.571)	Data Time 0.001 (0.026)	Loss 2.6996 (2.7358)	Entropy 1.19766 (1.19995)	Top-1 acc 57.812 (58.324)	Top-5 acc 80.469 (79.704)	lr 0.01685
Train [47][1410/3239]	Time 0.245 (0.570)	Data Time 0.030 (0.026)	Loss 2.9764 (2.7364)	Entropy 1.19763 (1.19993)	Top-1 acc 55.078 (58.308)	Top-5 acc 76.953 (79.691)	lr 0.01685
Train [47][1420/3239]	Time 0.200 (0.570)	Data Time 0.001 (0.026)	Loss 2.6184 (2.7366)	Entropy 1.19764 (1.19992)	Top-1 acc 62.891 (58.306)	Top-5 acc 80.078 (79.687)	lr 0.01685
Train [47][1430/3239]	Time 0.492 (0.603)	Data Time 0.004 (0.026)	Loss 2.5866 (2.7363)	Entropy 1.19758 (1.19990)	Top-1 acc 57.812 (58.313)	Top-5 acc 82.812 (79.689)	lr 0.01684
Train [47][1440/3239]	Time 2.448 (0.603)	Data Time 0.002 (0.026)	Loss 2.8529 (2.7364)	Entropy 1.19758 (1.19989)	Top-1 acc 55.078 (58.312)	Top-5 acc 76.953 (79.690)	lr 0.01684
Train [47][1450/3239]	Time 0.232 (0.600)	Data Time 0.002 (0.025)	Loss 2.6044 (2.7365)	Entropy 1.19755 (1.19987)	Top-1 acc 62.109 (58.307)	Top-5 acc 83.594 (79.689)	lr 0.01684
Train [47][1460/3239]	Time 0.226 (0.599)	Data Time 0.001 (0.025)	Loss 2.5596 (2.7364)	Entropy 1.19755 (1.19985)	Top-1 acc 61.328 (58.315)	Top-5 acc 80.859 (79.688)	lr 0.01684
Train [47][1470/3239]	Time 0.232 (0.598)	Data Time 0.001 (0.025)	Loss 2.7654 (2.7364)	Entropy 1.19759 (1.19984)	Top-1 acc 60.156 (58.315)	Top-5 acc 78.516 (79.689)	lr 0.01684
Train [47][1480/3239]	Time 0.319 (0.597)	Data Time 0.001 (0.025)	Loss 2.8417 (2.7367)	Entropy 1.19757 (1.19982)	Top-1 acc 57.812 (58.308)	Top-5 acc 77.734 (79.681)	lr 0.01684
Train [47][1490/3239]	Time 0.228 (0.596)	Data Time 0.001 (0.025)	Loss 2.7687 (2.7365)	Entropy 1.19754 (1.19981)	Top-1 acc 58.594 (58.314)	Top-5 acc 78.906 (79.686)	lr 0.01684
Train [47][1500/3239]	Time 0.220 (0.595)	Data Time 0.001 (0.025)	Loss 2.6657 (2.7363)	Entropy 1.19752 (1.19979)	Top-1 acc 63.672 (58.319)	Top-5 acc 81.250 (79.698)	lr 0.01684
Train [47][1510/3239]	Time 0.242 (0.594)	Data Time 0.001 (0.024)	Loss 2.7651 (2.7359)	Entropy 1.19752 (1.19978)	Top-1 acc 58.203 (58.335)	Top-5 acc 81.250 (79.707)	lr 0.01684
Train [47][1520/3239]	Time 0.227 (0.593)	Data Time 0.002 (0.024)	Loss 2.7187 (2.7360)	Entropy 1.19754 (1.19976)	Top-1 acc 58.594 (58.337)	Top-5 acc 78.906 (79.707)	lr 0.01684
Train [47][1530/3239]	Time 0.304 (0.593)	Data Time 0.001 (0.024)	Loss 2.7468 (2.7363)	Entropy 1.19750 (1.19975)	Top-1 acc 57.422 (58.329)	Top-5 acc 79.688 (79.696)	lr 0.01683
Train [47][1540/3239]	Time 0.256 (0.592)	Data Time 0.001 (0.024)	Loss 2.6739 (2.7364)	Entropy 1.19745 (1.19973)	Top-1 acc 62.891 (58.330)	Top-5 acc 82.031 (79.696)	lr 0.01683
Train [47][1550/3239]	Time 2.419 (0.591)	Data Time 0.001 (0.024)	Loss 2.9104 (2.7364)	Entropy 1.19745 (1.19972)	Top-1 acc 51.562 (58.327)	Top-5 acc 75.000 (79.691)	lr 0.01683
Train [47][1560/3239]	Time 0.227 (0.589)	Data Time 0.001 (0.024)	Loss 2.6852 (2.7365)	Entropy 1.19739 (1.19970)	Top-1 acc 58.203 (58.317)	Top-5 acc 81.250 (79.688)	lr 0.01683
Train [47][1570/3239]	Time 0.238 (0.588)	Data Time 0.001 (0.024)	Loss 2.8336 (2.7366)	Entropy 1.19738 (1.19969)	Top-1 acc 56.250 (58.315)	Top-5 acc 76.953 (79.694)	lr 0.01683
Train [47][1580/3239]	Time 0.387 (0.587)	Data Time 0.001 (0.023)	Loss 2.7761 (2.7363)	Entropy 1.19732 (1.19967)	Top-1 acc 55.859 (58.316)	Top-5 acc 76.953 (79.701)	lr 0.01683
Train [47][1590/3239]	Time 0.235 (0.586)	Data Time 0.001 (0.023)	Loss 2.8041 (2.7367)	Entropy 1.19730 (1.19966)	Top-1 acc 55.078 (58.304)	Top-5 acc 76.562 (79.689)	lr 0.01683
Train [47][1600/3239]	Time 0.262 (0.586)	Data Time 0.001 (0.023)	Loss 2.8475 (2.7369)	Entropy 1.19729 (1.19965)	Top-1 acc 53.906 (58.296)	Top-5 acc 77.344 (79.687)	lr 0.01683
Train [47][1610/3239]	Time 0.222 (0.585)	Data Time 0.002 (0.023)	Loss 2.7486 (2.7369)	Entropy 1.19728 (1.19963)	Top-1 acc 57.812 (58.302)	Top-5 acc 78.906 (79.690)	lr 0.01683
Train [47][1620/3239]	Time 0.230 (0.584)	Data Time 0.001 (0.023)	Loss 2.6973 (2.7370)	Entropy 1.19726 (1.19962)	Top-1 acc 58.984 (58.296)	Top-5 acc 80.859 (79.687)	lr 0.01683
Train [47][1630/3239]	Time 0.316 (0.583)	Data Time 0.001 (0.023)	Loss 2.5942 (2.7367)	Entropy 1.19725 (1.19960)	Top-1 acc 61.328 (58.302)	Top-5 acc 82.812 (79.695)	lr 0.01683
Train [47][1640/3239]	Time 0.246 (0.583)	Data Time 0.001 (0.023)	Loss 2.6589 (2.7368)	Entropy 1.19724 (1.19959)	Top-1 acc 60.938 (58.297)	Top-5 acc 83.203 (79.694)	lr 0.01682
Train [47][1650/3239]	Time 0.167 (0.582)	Data Time 0.001 (0.023)	Loss 2.6939 (2.7368)	Entropy 1.19725 (1.19957)	Top-1 acc 57.422 (58.295)	Top-5 acc 79.688 (79.693)	lr 0.01682
Train [47][1660/3239]	Time 2.454 (0.581)	Data Time 0.001 (0.022)	Loss 2.8144 (2.7366)	Entropy 1.19725 (1.19956)	Top-1 acc 58.203 (58.298)	Top-5 acc 76.562 (79.702)	lr 0.01682
Train [47][1670/3239]	Time 0.258 (0.579)	Data Time 0.001 (0.022)	Loss 2.5932 (2.7368)	Entropy 1.19726 (1.19955)	Top-1 acc 66.016 (58.294)	Top-5 acc 82.812 (79.697)	lr 0.01682
Train [47][1680/3239]	Time 0.289 (0.578)	Data Time 0.001 (0.022)	Loss 2.8386 (2.7364)	Entropy 1.19724 (1.19953)	Top-1 acc 57.812 (58.307)	Top-5 acc 75.781 (79.706)	lr 0.01682
Train [47][1690/3239]	Time 0.238 (0.578)	Data Time 0.001 (0.022)	Loss 2.7562 (2.7366)	Entropy 1.19725 (1.19952)	Top-1 acc 56.641 (58.299)	Top-5 acc 78.516 (79.705)	lr 0.01682
Train [47][1700/3239]	Time 0.240 (0.577)	Data Time 0.002 (0.022)	Loss 2.6843 (2.7366)	Entropy 1.19723 (1.19950)	Top-1 acc 58.594 (58.293)	Top-5 acc 82.031 (79.707)	lr 0.01682
Train [47][1710/3239]	Time 0.218 (0.576)	Data Time 0.001 (0.022)	Loss 2.8345 (2.7366)	Entropy 1.19719 (1.19949)	Top-1 acc 59.766 (58.288)	Top-5 acc 75.391 (79.703)	lr 0.01682
Train [47][1720/3239]	Time 0.233 (0.576)	Data Time 0.001 (0.022)	Loss 2.6890 (2.7366)	Entropy 1.19717 (1.19948)	Top-1 acc 59.766 (58.282)	Top-5 acc 80.469 (79.706)	lr 0.01682
Train [47][1730/3239]	Time 0.245 (0.575)	Data Time 0.001 (0.022)	Loss 2.7967 (2.7369)	Entropy 1.19713 (1.19946)	Top-1 acc 56.250 (58.270)	Top-5 acc 80.469 (79.701)	lr 0.01682
Train [47][1740/3239]	Time 0.220 (0.574)	Data Time 0.001 (0.021)	Loss 2.8704 (2.7369)	Entropy 1.19712 (1.19945)	Top-1 acc 54.297 (58.265)	Top-5 acc 77.734 (79.702)	lr 0.01682
Train [47][1750/3239]	Time 0.236 (0.574)	Data Time 0.002 (0.021)	Loss 2.7119 (2.7369)	Entropy 1.19707 (1.19944)	Top-1 acc 58.203 (58.262)	Top-5 acc 79.688 (79.698)	lr 0.01681
Train [47][1760/3239]	Time 0.282 (0.573)	Data Time 0.001 (0.021)	Loss 2.6831 (2.7371)	Entropy 1.19704 (1.19942)	Top-1 acc 58.984 (58.254)	Top-5 acc 80.469 (79.696)	lr 0.01681
Train [47][1770/3239]	Time 2.453 (0.573)	Data Time 0.001 (0.021)	Loss 2.7358 (2.7374)	Entropy 1.19704 (1.19941)	Top-1 acc 56.641 (58.247)	Top-5 acc 80.469 (79.689)	lr 0.01681
Train [47][1780/3239]	Time 0.224 (0.571)	Data Time 0.002 (0.021)	Loss 2.6984 (2.7376)	Entropy 1.19702 (1.19940)	Top-1 acc 59.766 (58.239)	Top-5 acc 82.031 (79.685)	lr 0.01681
Train [47][1790/3239]	Time 0.215 (0.570)	Data Time 0.002 (0.021)	Loss 2.6258 (2.7378)	Entropy 1.19704 (1.19938)	Top-1 acc 63.672 (58.240)	Top-5 acc 81.250 (79.681)	lr 0.01681
Train [47][1800/3239]	Time 0.237 (0.596)	Data Time 0.004 (0.021)	Loss 2.5663 (2.7378)	Entropy 1.19703 (1.19937)	Top-1 acc 62.109 (58.240)	Top-5 acc 82.812 (79.680)	lr 0.01681
Train [47][1810/3239]	Time 0.235 (0.595)	Data Time 0.002 (0.021)	Loss 2.6130 (2.7380)	Entropy 1.19699 (1.19936)	Top-1 acc 60.938 (58.233)	Top-5 acc 82.812 (79.681)	lr 0.01681
Train [47][1820/3239]	Time 0.233 (0.594)	Data Time 0.002 (0.021)	Loss 2.8190 (2.7385)	Entropy 1.19694 (1.19934)	Top-1 acc 55.078 (58.224)	Top-5 acc 77.344 (79.673)	lr 0.01681
Train [47][1830/3239]	Time 0.214 (0.594)	Data Time 0.001 (0.021)	Loss 2.6384 (2.7382)	Entropy 1.19691 (1.19933)	Top-1 acc 63.672 (58.229)	Top-5 acc 82.422 (79.676)	lr 0.01681
Train [47][1840/3239]	Time 0.206 (0.593)	Data Time 0.001 (0.020)	Loss 2.7268 (2.7382)	Entropy 1.19689 (1.19932)	Top-1 acc 59.766 (58.228)	Top-5 acc 78.125 (79.675)	lr 0.01681
Train [47][1850/3239]	Time 0.202 (0.592)	Data Time 0.001 (0.020)	Loss 2.5244 (2.7381)	Entropy 1.19687 (1.19931)	Top-1 acc 65.625 (58.234)	Top-5 acc 82.031 (79.673)	lr 0.01680
Train [47][1860/3239]	Time 0.214 (0.592)	Data Time 0.002 (0.020)	Loss 2.5999 (2.7382)	Entropy 1.19685 (1.19929)	Top-1 acc 63.281 (58.229)	Top-5 acc 83.203 (79.669)	lr 0.01680
Train [47][1870/3239]	Time 0.250 (0.591)	Data Time 0.001 (0.020)	Loss 2.5009 (2.7378)	Entropy 1.19685 (1.19928)	Top-1 acc 63.672 (58.241)	Top-5 acc 83.203 (79.677)	lr 0.01680
Train [47][1880/3239]	Time 2.444 (0.590)	Data Time 0.001 (0.020)	Loss 2.6066 (2.7378)	Entropy 1.19685 (1.19927)	Top-1 acc 59.766 (58.237)	Top-5 acc 82.422 (79.677)	lr 0.01680
Train [47][1890/3239]	Time 0.208 (0.588)	Data Time 0.001 (0.020)	Loss 2.8117 (2.7379)	Entropy 1.19682 (1.19925)	Top-1 acc 55.469 (58.235)	Top-5 acc 79.688 (79.673)	lr 0.01680
Train [47][1900/3239]	Time 0.202 (0.588)	Data Time 0.001 (0.020)	Loss 2.6736 (2.7378)	Entropy 1.19679 (1.19924)	Top-1 acc 55.469 (58.234)	Top-5 acc 82.031 (79.672)	lr 0.01680
Train [47][1910/3239]	Time 0.248 (0.587)	Data Time 0.001 (0.020)	Loss 2.7039 (2.7380)	Entropy 1.19679 (1.19923)	Top-1 acc 58.594 (58.232)	Top-5 acc 81.641 (79.669)	lr 0.01680
Train [47][1920/3239]	Time 0.240 (0.586)	Data Time 0.001 (0.020)	Loss 2.7459 (2.7380)	Entropy 1.19676 (1.19921)	Top-1 acc 59.375 (58.231)	Top-5 acc 77.344 (79.667)	lr 0.01680
Train [47][1930/3239]	Time 0.196 (0.586)	Data Time 0.001 (0.020)	Loss 2.9507 (2.7380)	Entropy 1.19676 (1.19920)	Top-1 acc 51.172 (58.226)	Top-5 acc 72.656 (79.666)	lr 0.01680
Train [47][1940/3239]	Time 0.234 (0.585)	Data Time 0.001 (0.019)	Loss 2.5939 (2.7375)	Entropy 1.19670 (1.19919)	Top-1 acc 60.156 (58.231)	Top-5 acc 83.594 (79.674)	lr 0.01680
Train [47][1950/3239]	Time 0.210 (0.584)	Data Time 0.001 (0.019)	Loss 2.7672 (2.7378)	Entropy 1.19669 (1.19918)	Top-1 acc 57.422 (58.229)	Top-5 acc 76.562 (79.668)	lr 0.01680
Train [47][1960/3239]	Time 0.210 (0.584)	Data Time 0.001 (0.019)	Loss 2.5103 (2.7378)	Entropy 1.19668 (1.19916)	Top-1 acc 62.500 (58.227)	Top-5 acc 85.156 (79.667)	lr 0.01679
Train [47][1970/3239]	Time 0.233 (0.583)	Data Time 0.001 (0.019)	Loss 2.8392 (2.7378)	Entropy 1.19665 (1.19915)	Top-1 acc 55.859 (58.223)	Top-5 acc 77.344 (79.667)	lr 0.01679
Train [47][1980/3239]	Time 0.313 (0.583)	Data Time 0.001 (0.019)	Loss 2.7581 (2.7378)	Entropy 1.19664 (1.19914)	Top-1 acc 54.688 (58.226)	Top-5 acc 80.078 (79.668)	lr 0.01679
Train [47][1990/3239]	Time 2.546 (0.582)	Data Time 0.001 (0.019)	Loss 2.7230 (2.7376)	Entropy 1.19664 (1.19913)	Top-1 acc 58.984 (58.228)	Top-5 acc 78.516 (79.667)	lr 0.01679
Train [47][2000/3239]	Time 0.208 (0.580)	Data Time 0.001 (0.019)	Loss 2.8026 (2.7375)	Entropy 1.19664 (1.19911)	Top-1 acc 60.547 (58.231)	Top-5 acc 80.078 (79.671)	lr 0.01679
Train [47][2010/3239]	Time 0.196 (0.580)	Data Time 0.001 (0.019)	Loss 2.8042 (2.7374)	Entropy 1.19663 (1.19910)	Top-1 acc 58.203 (58.237)	Top-5 acc 77.734 (79.671)	lr 0.01679
Train [47][2020/3239]	Time 0.230 (0.579)	Data Time 0.001 (0.019)	Loss 2.7658 (2.7372)	Entropy 1.19662 (1.19909)	Top-1 acc 57.031 (58.243)	Top-5 acc 79.297 (79.676)	lr 0.01679
Train [47][2030/3239]	Time 0.329 (0.579)	Data Time 0.001 (0.019)	Loss 2.5337 (2.7374)	Entropy 1.19660 (1.19908)	Top-1 acc 65.625 (58.239)	Top-5 acc 81.250 (79.672)	lr 0.01679
Train [47][2040/3239]	Time 0.220 (0.578)	Data Time 0.001 (0.019)	Loss 2.6407 (2.7373)	Entropy 1.19660 (1.19906)	Top-1 acc 57.812 (58.242)	Top-5 acc 80.859 (79.673)	lr 0.01679
Train [47][2050/3239]	Time 0.217 (0.577)	Data Time 0.001 (0.018)	Loss 2.8909 (2.7372)	Entropy 1.19656 (1.19905)	Top-1 acc 57.422 (58.246)	Top-5 acc 78.516 (79.675)	lr 0.01679
Train [47][2060/3239]	Time 0.227 (0.577)	Data Time 0.001 (0.018)	Loss 2.7307 (2.7372)	Entropy 1.19653 (1.19904)	Top-1 acc 57.812 (58.244)	Top-5 acc 80.469 (79.675)	lr 0.01678
Train [47][2070/3239]	Time 0.220 (0.576)	Data Time 0.001 (0.018)	Loss 2.8741 (2.7373)	Entropy 1.19653 (1.19903)	Top-1 acc 55.859 (58.246)	Top-5 acc 75.391 (79.670)	lr 0.01678
Train [47][2080/3239]	Time 0.350 (0.576)	Data Time 0.001 (0.018)	Loss 2.7583 (2.7374)	Entropy 1.19647 (1.19902)	Top-1 acc 50.000 (58.238)	Top-5 acc 80.859 (79.670)	lr 0.01678
Train [47][2090/3239]	Time 0.281 (0.575)	Data Time 0.002 (0.018)	Loss 2.6179 (2.7375)	Entropy 1.19646 (1.19900)	Top-1 acc 60.547 (58.235)	Top-5 acc 81.250 (79.666)	lr 0.01678
Train [47][2100/3239]	Time 2.513 (0.575)	Data Time 0.002 (0.018)	Loss 2.8920 (2.7377)	Entropy 1.19646 (1.19899)	Top-1 acc 53.125 (58.228)	Top-5 acc 75.781 (79.663)	lr 0.01678
Train [47][2110/3239]	Time 0.250 (0.573)	Data Time 0.001 (0.018)	Loss 2.5452 (2.7378)	Entropy 1.19631 (1.19898)	Top-1 acc 62.500 (58.225)	Top-5 acc 83.594 (79.658)	lr 0.01678
Train [47][2120/3239]	Time 0.233 (0.573)	Data Time 0.001 (0.018)	Loss 2.7755 (2.7379)	Entropy 1.19621 (1.19897)	Top-1 acc 57.812 (58.219)	Top-5 acc 78.516 (79.654)	lr 0.01678
Train [47][2130/3239]	Time 0.237 (0.572)	Data Time 0.001 (0.018)	Loss 2.7147 (2.7382)	Entropy 1.19620 (1.19895)	Top-1 acc 62.500 (58.217)	Top-5 acc 81.250 (79.649)	lr 0.01678
Train [47][2140/3239]	Time 0.241 (0.572)	Data Time 0.001 (0.018)	Loss 2.8013 (2.7384)	Entropy 1.19617 (1.19894)	Top-1 acc 55.469 (58.210)	Top-5 acc 78.125 (79.644)	lr 0.01678
Train [47][2150/3239]	Time 0.208 (0.571)	Data Time 0.001 (0.018)	Loss 2.6917 (2.7383)	Entropy 1.19615 (1.19893)	Top-1 acc 61.719 (58.208)	Top-5 acc 79.297 (79.646)	lr 0.01678
Train [47][2160/3239]	Time 0.255 (0.594)	Data Time 0.002 (0.018)	Loss 2.8846 (2.7382)	Entropy 1.19615 (1.19891)	Top-1 acc 58.203 (58.208)	Top-5 acc 78.906 (79.648)	lr 0.01678
Train [47][2170/3239]	Time 0.239 (0.594)	Data Time 0.002 (0.018)	Loss 2.9086 (2.7383)	Entropy 1.19617 (1.19890)	Top-1 acc 53.906 (58.205)	Top-5 acc 75.781 (79.645)	lr 0.01677
Train [47][2180/3239]	Time 0.221 (0.593)	Data Time 0.002 (0.017)	Loss 2.5984 (2.7382)	Entropy 1.19613 (1.19889)	Top-1 acc 61.328 (58.208)	Top-5 acc 83.984 (79.646)	lr 0.01677
Train [47][2190/3239]	Time 0.214 (0.593)	Data Time 0.001 (0.017)	Loss 2.8221 (2.7384)	Entropy 1.19601 (1.19888)	Top-1 acc 56.641 (58.203)	Top-5 acc 78.906 (79.644)	lr 0.01677
Train [47][2200/3239]	Time 0.249 (0.592)	Data Time 0.001 (0.017)	Loss 2.7397 (2.7381)	Entropy 1.19596 (1.19886)	Top-1 acc 57.031 (58.212)	Top-5 acc 81.641 (79.652)	lr 0.01677
Train [47][2210/3239]	Time 2.487 (0.592)	Data Time 0.001 (0.017)	Loss 2.8521 (2.7382)	Entropy 1.19596 (1.19885)	Top-1 acc 54.688 (58.205)	Top-5 acc 75.391 (79.649)	lr 0.01677
Train [47][2220/3239]	Time 0.200 (0.590)	Data Time 0.001 (0.017)	Loss 2.6374 (2.7380)	Entropy 1.19593 (1.19884)	Top-1 acc 58.594 (58.208)	Top-5 acc 83.203 (79.658)	lr 0.01677
Train [47][2230/3239]	Time 0.216 (0.589)	Data Time 0.001 (0.017)	Loss 3.0925 (2.7384)	Entropy 1.19588 (1.19882)	Top-1 acc 46.875 (58.200)	Top-5 acc 73.828 (79.653)	lr 0.01677
Train [47][2240/3239]	Time 0.206 (0.589)	Data Time 0.001 (0.017)	Loss 2.7113 (2.7382)	Entropy 1.19589 (1.19881)	Top-1 acc 58.984 (58.204)	Top-5 acc 78.125 (79.659)	lr 0.01677
Train [47][2250/3239]	Time 0.221 (0.588)	Data Time 0.001 (0.017)	Loss 2.6828 (2.7382)	Entropy 1.19587 (1.19880)	Top-1 acc 59.375 (58.205)	Top-5 acc 80.078 (79.657)	lr 0.01677
Train [47][2260/3239]	Time 0.234 (0.588)	Data Time 0.001 (0.017)	Loss 2.9617 (2.7381)	Entropy 1.19587 (1.19878)	Top-1 acc 51.953 (58.205)	Top-5 acc 75.781 (79.659)	lr 0.01677
Train [47][2270/3239]	Time 0.250 (0.587)	Data Time 0.001 (0.017)	Loss 2.7856 (2.7380)	Entropy 1.19584 (1.19877)	Top-1 acc 59.766 (58.211)	Top-5 acc 79.297 (79.658)	lr 0.01676
Train [47][2280/3239]	Time 0.222 (0.587)	Data Time 0.001 (0.017)	Loss 2.7741 (2.7378)	Entropy 1.19579 (1.19876)	Top-1 acc 55.859 (58.216)	Top-5 acc 79.688 (79.663)	lr 0.01676
Train [47][2290/3239]	Time 0.223 (0.586)	Data Time 0.001 (0.017)	Loss 2.6948 (2.7378)	Entropy 1.19578 (1.19875)	Top-1 acc 60.547 (58.221)	Top-5 acc 81.641 (79.662)	lr 0.01676
Train [47][2300/3239]	Time 0.245 (0.586)	Data Time 0.002 (0.017)	Loss 2.6283 (2.7380)	Entropy 1.19574 (1.19873)	Top-1 acc 58.984 (58.214)	Top-5 acc 81.250 (79.662)	lr 0.01676
Train [47][2310/3239]	Time 0.247 (0.585)	Data Time 0.002 (0.017)	Loss 2.7240 (2.7381)	Entropy 1.19570 (1.19872)	Top-1 acc 58.203 (58.214)	Top-5 acc 77.734 (79.660)	lr 0.01676
Train [47][2320/3239]	Time 2.450 (0.584)	Data Time 0.001 (0.017)	Loss 2.7953 (2.7387)	Entropy 1.19570 (1.19871)	Top-1 acc 56.641 (58.200)	Top-5 acc 79.297 (79.648)	lr 0.01676
Train [47][2330/3239]	Time 0.235 (0.583)	Data Time 0.002 (0.016)	Loss 2.8758 (2.7386)	Entropy 1.19570 (1.19869)	Top-1 acc 57.422 (58.203)	Top-5 acc 76.953 (79.650)	lr 0.01676
Train [47][2340/3239]	Time 0.239 (0.583)	Data Time 0.001 (0.016)	Loss 3.0272 (2.7391)	Entropy 1.19564 (1.19868)	Top-1 acc 53.516 (58.189)	Top-5 acc 73.828 (79.640)	lr 0.01676
Train [47][2350/3239]	Time 0.289 (0.582)	Data Time 0.001 (0.016)	Loss 2.6459 (2.7392)	Entropy 1.19563 (1.19867)	Top-1 acc 59.766 (58.186)	Top-5 acc 81.250 (79.638)	lr 0.01676
Train [47][2360/3239]	Time 0.252 (0.582)	Data Time 0.001 (0.016)	Loss 2.7421 (2.7390)	Entropy 1.19560 (1.19866)	Top-1 acc 56.641 (58.192)	Top-5 acc 79.688 (79.641)	lr 0.01676
Train [47][2370/3239]	Time 0.268 (0.581)	Data Time 0.001 (0.016)	Loss 2.9383 (2.7389)	Entropy 1.19557 (1.19864)	Top-1 acc 54.688 (58.194)	Top-5 acc 73.438 (79.644)	lr 0.01676
Train [47][2380/3239]	Time 0.216 (0.581)	Data Time 0.001 (0.016)	Loss 2.8934 (2.7389)	Entropy 1.19553 (1.19863)	Top-1 acc 51.562 (58.190)	Top-5 acc 78.516 (79.643)	lr 0.01675
Train [47][2390/3239]	Time 0.214 (0.580)	Data Time 0.001 (0.016)	Loss 2.7806 (2.7390)	Entropy 1.19550 (1.19862)	Top-1 acc 54.297 (58.184)	Top-5 acc 79.297 (79.640)	lr 0.01675
Train [47][2400/3239]	Time 0.237 (0.580)	Data Time 0.001 (0.016)	Loss 2.8673 (2.7389)	Entropy 1.19543 (1.19860)	Top-1 acc 55.859 (58.184)	Top-5 acc 79.297 (79.641)	lr 0.01675
Train [47][2410/3239]	Time 0.227 (0.579)	Data Time 0.001 (0.016)	Loss 2.7666 (2.7389)	Entropy 1.19535 (1.19859)	Top-1 acc 60.547 (58.186)	Top-5 acc 78.125 (79.644)	lr 0.01675
Train [47][2420/3239]	Time 0.239 (0.579)	Data Time 0.001 (0.016)	Loss 2.8321 (2.7390)	Entropy 1.19530 (1.19858)	Top-1 acc 55.859 (58.182)	Top-5 acc 78.125 (79.648)	lr 0.01675
Train [47][2430/3239]	Time 2.456 (0.579)	Data Time 0.001 (0.016)	Loss 2.5442 (2.7389)	Entropy 1.19530 (1.19856)	Top-1 acc 65.625 (58.186)	Top-5 acc 82.031 (79.645)	lr 0.01675
Train [47][2440/3239]	Time 0.225 (0.577)	Data Time 0.001 (0.016)	Loss 2.7630 (2.7389)	Entropy 1.19527 (1.19855)	Top-1 acc 57.422 (58.188)	Top-5 acc 78.516 (79.644)	lr 0.01675
Train [47][2450/3239]	Time 0.226 (0.577)	Data Time 0.001 (0.016)	Loss 2.6696 (2.7389)	Entropy 1.19521 (1.19854)	Top-1 acc 54.688 (58.186)	Top-5 acc 81.250 (79.646)	lr 0.01675
Train [47][2460/3239]	Time 0.236 (0.576)	Data Time 0.001 (0.016)	Loss 2.7669 (2.7390)	Entropy 1.19519 (1.19852)	Top-1 acc 56.641 (58.182)	Top-5 acc 78.906 (79.644)	lr 0.01675
Train [47][2470/3239]	Time 0.344 (0.576)	Data Time 0.001 (0.016)	Loss 2.7676 (2.7391)	Entropy 1.19519 (1.19851)	Top-1 acc 56.641 (58.180)	Top-5 acc 80.078 (79.642)	lr 0.01675
Train [47][2480/3239]	Time 0.208 (0.575)	Data Time 0.001 (0.016)	Loss 2.5380 (2.7391)	Entropy 1.19513 (1.19850)	Top-1 acc 62.891 (58.172)	Top-5 acc 81.250 (79.644)	lr 0.01674
Train [47][2490/3239]	Time 0.193 (0.575)	Data Time 0.001 (0.016)	Loss 2.7962 (2.7398)	Entropy 1.19512 (1.19848)	Top-1 acc 58.984 (58.159)	Top-5 acc 79.688 (79.637)	lr 0.01674
Train [47][2500/3239]	Time 0.219 (0.574)	Data Time 0.001 (0.015)	Loss 2.6740 (2.7398)	Entropy 1.19510 (1.19847)	Top-1 acc 63.672 (58.164)	Top-5 acc 79.688 (79.637)	lr 0.01674
Train [47][2510/3239]	Time 0.205 (0.574)	Data Time 0.001 (0.015)	Loss 2.7401 (2.7397)	Entropy 1.19505 (1.19845)	Top-1 acc 60.547 (58.171)	Top-5 acc 79.297 (79.638)	lr 0.01674
Train [47][2520/3239]	Time 0.323 (0.593)	Data Time 0.002 (0.015)	Loss 2.5836 (2.7396)	Entropy 1.19503 (1.19844)	Top-1 acc 62.109 (58.176)	Top-5 acc 80.469 (79.641)	lr 0.01674
Train [47][2530/3239]	Time 0.254 (0.593)	Data Time 0.002 (0.015)	Loss 2.6799 (2.7397)	Entropy 1.19502 (1.19843)	Top-1 acc 60.547 (58.173)	Top-5 acc 81.641 (79.638)	lr 0.01674
Train [47][2540/3239]	Time 2.423 (0.592)	Data Time 0.002 (0.015)	Loss 2.7096 (2.7399)	Entropy 1.19502 (1.19841)	Top-1 acc 59.766 (58.173)	Top-5 acc 77.734 (79.636)	lr 0.01674
Train [47][2550/3239]	Time 0.230 (0.591)	Data Time 0.002 (0.015)	Loss 2.8137 (2.7402)	Entropy 1.19497 (1.19840)	Top-1 acc 53.516 (58.163)	Top-5 acc 78.906 (79.632)	lr 0.01674
Train [47][2560/3239]	Time 0.216 (0.590)	Data Time 0.001 (0.015)	Loss 2.7449 (2.7405)	Entropy 1.19497 (1.19839)	Top-1 acc 60.547 (58.155)	Top-5 acc 78.125 (79.628)	lr 0.01674
Train [47][2570/3239]	Time 0.245 (0.590)	Data Time 0.001 (0.015)	Loss 2.6494 (2.7405)	Entropy 1.19492 (1.19837)	Top-1 acc 60.156 (58.155)	Top-5 acc 78.125 (79.624)	lr 0.01674
Train [47][2580/3239]	Time 0.267 (0.589)	Data Time 0.001 (0.015)	Loss 2.7950 (2.7407)	Entropy 1.19486 (1.19836)	Top-1 acc 54.688 (58.147)	Top-5 acc 79.688 (79.618)	lr 0.01674
Train [47][2590/3239]	Time 0.240 (0.589)	Data Time 0.001 (0.015)	Loss 2.6099 (2.7412)	Entropy 1.19482 (1.19835)	Top-1 acc 59.375 (58.135)	Top-5 acc 80.859 (79.613)	lr 0.01673
Train [47][2600/3239]	Time 0.236 (0.588)	Data Time 0.002 (0.015)	Loss 2.6919 (2.7411)	Entropy 1.19477 (1.19833)	Top-1 acc 61.328 (58.142)	Top-5 acc 80.078 (79.613)	lr 0.01673
Train [47][2610/3239]	Time 0.240 (0.588)	Data Time 0.001 (0.015)	Loss 2.7371 (2.7412)	Entropy 1.19470 (1.19832)	Top-1 acc 58.203 (58.142)	Top-5 acc 82.031 (79.609)	lr 0.01673
Train [47][2620/3239]	Time 0.239 (0.588)	Data Time 0.001 (0.015)	Loss 2.8069 (2.7412)	Entropy 1.19459 (1.19831)	Top-1 acc 54.688 (58.137)	Top-5 acc 78.516 (79.612)	lr 0.01673
Train [47][2630/3239]	Time 0.215 (0.587)	Data Time 0.001 (0.015)	Loss 2.8710 (2.7412)	Entropy 1.19460 (1.19829)	Top-1 acc 54.688 (58.139)	Top-5 acc 76.953 (79.610)	lr 0.01673
Train [47][2640/3239]	Time 0.216 (0.586)	Data Time 0.001 (0.015)	Loss 2.7340 (2.7414)	Entropy 1.19460 (1.19828)	Top-1 acc 57.812 (58.135)	Top-5 acc 81.250 (79.610)	lr 0.01673
Train [47][2650/3239]	Time 0.245 (0.586)	Data Time 0.001 (0.015)	Loss 2.8229 (2.7413)	Entropy 1.19460 (1.19826)	Top-1 acc 55.859 (58.138)	Top-5 acc 78.125 (79.611)	lr 0.01673
Train [47][2660/3239]	Time 0.283 (0.585)	Data Time 0.001 (0.015)	Loss 2.8003 (2.7412)	Entropy 1.19458 (1.19825)	Top-1 acc 56.250 (58.138)	Top-5 acc 77.344 (79.612)	lr 0.01673
Train [47][2670/3239]	Time 0.214 (0.585)	Data Time 0.001 (0.015)	Loss 2.8621 (2.7414)	Entropy 1.19451 (1.19824)	Top-1 acc 51.172 (58.131)	Top-5 acc 78.125 (79.609)	lr 0.01673
Train [47][2680/3239]	Time 0.239 (0.585)	Data Time 0.002 (0.015)	Loss 2.9438 (2.7414)	Entropy 1.19445 (1.19822)	Top-1 acc 52.734 (58.130)	Top-5 acc 75.000 (79.607)	lr 0.01673
Train [47][2690/3239]	Time 0.249 (0.584)	Data Time 0.001 (0.015)	Loss 2.9601 (2.7415)	Entropy 1.19444 (1.19821)	Top-1 acc 53.125 (58.126)	Top-5 acc 74.219 (79.607)	lr 0.01672
Train [47][2700/3239]	Time 0.251 (0.584)	Data Time 0.001 (0.014)	Loss 2.8567 (2.7414)	Entropy 1.19437 (1.19819)	Top-1 acc 54.297 (58.125)	Top-5 acc 76.562 (79.610)	lr 0.01672
Train [47][2710/3239]	Time 0.212 (0.583)	Data Time 0.001 (0.014)	Loss 2.5627 (2.7414)	Entropy 1.19436 (1.19818)	Top-1 acc 65.625 (58.133)	Top-5 acc 82.422 (79.611)	lr 0.01672
Train [47][2720/3239]	Time 0.224 (0.583)	Data Time 0.001 (0.014)	Loss 2.7559 (2.7414)	Entropy 1.19430 (1.19817)	Top-1 acc 59.375 (58.131)	Top-5 acc 80.469 (79.611)	lr 0.01672
Train [47][2730/3239]	Time 0.207 (0.582)	Data Time 0.001 (0.014)	Loss 2.7069 (2.7413)	Entropy 1.19415 (1.19815)	Top-1 acc 57.812 (58.135)	Top-5 acc 82.031 (79.614)	lr 0.01672
Train [47][2740/3239]	Time 0.212 (0.582)	Data Time 0.001 (0.014)	Loss 2.8252 (2.7412)	Entropy 1.19416 (1.19814)	Top-1 acc 58.203 (58.138)	Top-5 acc 80.078 (79.616)	lr 0.01672
Train [47][2750/3239]	Time 0.224 (0.581)	Data Time 0.003 (0.014)	Loss 3.1078 (2.7414)	Entropy 1.19417 (1.19812)	Top-1 acc 46.484 (58.130)	Top-5 acc 72.266 (79.614)	lr 0.01672
Train [47][2760/3239]	Time 0.253 (0.581)	Data Time 0.001 (0.014)	Loss 2.6036 (2.7415)	Entropy 1.19415 (1.19811)	Top-1 acc 59.375 (58.126)	Top-5 acc 82.422 (79.612)	lr 0.01672
Train [47][2770/3239]	Time 0.248 (0.581)	Data Time 0.001 (0.014)	Loss 2.6587 (2.7417)	Entropy 1.19413 (1.19809)	Top-1 acc 61.328 (58.125)	Top-5 acc 80.469 (79.608)	lr 0.01672
Train [47][2780/3239]	Time 0.215 (0.580)	Data Time 0.001 (0.014)	Loss 2.6193 (2.7415)	Entropy 1.19408 (1.19808)	Top-1 acc 61.719 (58.126)	Top-5 acc 84.375 (79.610)	lr 0.01672
Train [47][2790/3239]	Time 0.236 (0.580)	Data Time 0.001 (0.014)	Loss 2.7819 (2.7416)	Entropy 1.19408 (1.19807)	Top-1 acc 56.641 (58.127)	Top-5 acc 78.906 (79.608)	lr 0.01672
Train [47][2800/3239]	Time 0.250 (0.579)	Data Time 0.001 (0.014)	Loss 2.7720 (2.7415)	Entropy 1.19407 (1.19805)	Top-1 acc 57.812 (58.126)	Top-5 acc 76.953 (79.610)	lr 0.01671
Train [47][2810/3239]	Time 0.261 (0.579)	Data Time 0.001 (0.014)	Loss 2.6131 (2.7415)	Entropy 1.19413 (1.19804)	Top-1 acc 61.719 (58.129)	Top-5 acc 80.078 (79.609)	lr 0.01671
Train [47][2820/3239]	Time 0.175 (0.578)	Data Time 0.001 (0.014)	Loss 2.8632 (2.7417)	Entropy 1.19428 (1.19802)	Top-1 acc 56.641 (58.123)	Top-5 acc 75.781 (79.603)	lr 0.01671
Train [47][2830/3239]	Time 0.230 (0.578)	Data Time 0.002 (0.014)	Loss 2.9447 (2.7420)	Entropy 1.19427 (1.19801)	Top-1 acc 53.516 (58.115)	Top-5 acc 73.438 (79.595)	lr 0.01671
Train [47][2840/3239]	Time 0.244 (0.577)	Data Time 0.002 (0.014)	Loss 2.7125 (2.7419)	Entropy 1.19425 (1.19800)	Top-1 acc 57.031 (58.114)	Top-5 acc 80.078 (79.597)	lr 0.01671
Train [47][2850/3239]	Time 0.263 (0.577)	Data Time 0.001 (0.014)	Loss 2.8505 (2.7420)	Entropy 1.19420 (1.19798)	Top-1 acc 57.812 (58.113)	Top-5 acc 76.953 (79.595)	lr 0.01671
Train [47][2860/3239]	Time 0.310 (0.594)	Data Time 0.003 (0.014)	Loss 2.8083 (2.7419)	Entropy 1.19421 (1.19797)	Top-1 acc 55.859 (58.113)	Top-5 acc 78.906 (79.595)	lr 0.01671
Train [47][2870/3239]	Time 0.246 (0.594)	Data Time 0.002 (0.014)	Loss 2.8168 (2.7419)	Entropy 1.19417 (1.19796)	Top-1 acc 58.203 (58.117)	Top-5 acc 77.734 (79.595)	lr 0.01671
Train [47][2880/3239]	Time 0.199 (0.593)	Data Time 0.001 (0.014)	Loss 2.7799 (2.7420)	Entropy 1.19413 (1.19794)	Top-1 acc 55.469 (58.113)	Top-5 acc 77.734 (79.592)	lr 0.01671
Train [47][2890/3239]	Time 0.252 (0.593)	Data Time 0.001 (0.014)	Loss 2.6702 (2.7420)	Entropy 1.19415 (1.19793)	Top-1 acc 58.203 (58.114)	Top-5 acc 80.078 (79.590)	lr 0.01671
Train [47][2900/3239]	Time 0.228 (0.592)	Data Time 0.002 (0.014)	Loss 3.0632 (2.7422)	Entropy 1.19410 (1.19792)	Top-1 acc 51.172 (58.109)	Top-5 acc 76.172 (79.589)	lr 0.01670
Train [47][2910/3239]	Time 0.226 (0.592)	Data Time 0.001 (0.014)	Loss 2.7714 (2.7421)	Entropy 1.19410 (1.19790)	Top-1 acc 58.203 (58.109)	Top-5 acc 79.297 (79.589)	lr 0.01670
Train [47][2920/3239]	Time 0.230 (0.592)	Data Time 0.001 (0.013)	Loss 2.7378 (2.7421)	Entropy 1.19413 (1.19789)	Top-1 acc 58.594 (58.113)	Top-5 acc 79.297 (79.588)	lr 0.01670
Train [47][2930/3239]	Time 0.247 (0.591)	Data Time 0.001 (0.013)	Loss 2.7712 (2.7421)	Entropy 1.19411 (1.19788)	Top-1 acc 59.766 (58.112)	Top-5 acc 78.516 (79.585)	lr 0.01670
Train [47][2940/3239]	Time 0.234 (0.591)	Data Time 0.001 (0.013)	Loss 2.6255 (2.7421)	Entropy 1.19408 (1.19787)	Top-1 acc 64.453 (58.113)	Top-5 acc 83.984 (79.587)	lr 0.01670
Train [47][2950/3239]	Time 0.230 (0.590)	Data Time 0.001 (0.013)	Loss 2.7212 (2.7421)	Entropy 1.19404 (1.19785)	Top-1 acc 55.469 (58.113)	Top-5 acc 81.641 (79.587)	lr 0.01670
Train [47][2960/3239]	Time 0.255 (0.590)	Data Time 0.001 (0.013)	Loss 2.8095 (2.7421)	Entropy 1.19399 (1.19784)	Top-1 acc 55.078 (58.114)	Top-5 acc 80.469 (79.588)	lr 0.01670
Train [47][2970/3239]	Time 0.237 (0.589)	Data Time 0.001 (0.013)	Loss 2.7672 (2.7420)	Entropy 1.19394 (1.19783)	Top-1 acc 56.641 (58.117)	Top-5 acc 78.906 (79.589)	lr 0.01670
Train [47][2980/3239]	Time 0.259 (0.589)	Data Time 0.001 (0.013)	Loss 2.6534 (2.7419)	Entropy 1.19388 (1.19781)	Top-1 acc 62.109 (58.119)	Top-5 acc 82.422 (79.589)	lr 0.01670
Train [47][2990/3239]	Time 0.236 (0.589)	Data Time 0.001 (0.013)	Loss 2.6063 (2.7421)	Entropy 1.19387 (1.19780)	Top-1 acc 62.891 (58.114)	Top-5 acc 80.078 (79.587)	lr 0.01670
Train [47][3000/3239]	Time 0.233 (0.588)	Data Time 0.001 (0.013)	Loss 2.7256 (2.7420)	Entropy 1.19379 (1.19779)	Top-1 acc 56.641 (58.116)	Top-5 acc 81.641 (79.588)	lr 0.01670
Train [47][3010/3239]	Time 0.345 (0.588)	Data Time 0.001 (0.013)	Loss 2.7995 (2.7418)	Entropy 1.19376 (1.19777)	Top-1 acc 57.812 (58.122)	Top-5 acc 78.516 (79.593)	lr 0.01669
Train [47][3020/3239]	Time 0.206 (0.587)	Data Time 0.002 (0.013)	Loss 2.8499 (2.7420)	Entropy 1.19374 (1.19776)	Top-1 acc 54.297 (58.120)	Top-5 acc 80.078 (79.591)	lr 0.01669
Train [47][3030/3239]	Time 0.261 (0.587)	Data Time 0.001 (0.013)	Loss 2.6169 (2.7418)	Entropy 1.19371 (1.19775)	Top-1 acc 61.328 (58.124)	Top-5 acc 82.031 (79.595)	lr 0.01669
Train [47][3040/3239]	Time 0.214 (0.587)	Data Time 0.001 (0.013)	Loss 2.6078 (2.7417)	Entropy 1.19367 (1.19773)	Top-1 acc 58.594 (58.124)	Top-5 acc 82.812 (79.595)	lr 0.01669
Train [47][3050/3239]	Time 0.235 (0.586)	Data Time 0.001 (0.013)	Loss 2.7422 (2.7418)	Entropy 1.19357 (1.19772)	Top-1 acc 57.031 (58.124)	Top-5 acc 79.688 (79.594)	lr 0.01669
Train [47][3060/3239]	Time 0.312 (0.586)	Data Time 0.001 (0.013)	Loss 2.6603 (2.7417)	Entropy 1.19351 (1.19771)	Top-1 acc 60.547 (58.129)	Top-5 acc 81.250 (79.595)	lr 0.01669
Train [47][3070/3239]	Time 0.263 (0.585)	Data Time 0.001 (0.013)	Loss 2.6702 (2.7415)	Entropy 1.19348 (1.19769)	Top-1 acc 56.250 (58.132)	Top-5 acc 83.594 (79.601)	lr 0.01669
Train [47][3080/3239]	Time 0.254 (0.585)	Data Time 0.002 (0.013)	Loss 2.5124 (2.7413)	Entropy 1.19339 (1.19768)	Top-1 acc 63.672 (58.139)	Top-5 acc 82.422 (79.605)	lr 0.01669
Train [47][3090/3239]	Time 0.239 (0.585)	Data Time 0.001 (0.013)	Loss 2.7727 (2.7413)	Entropy 1.19337 (1.19767)	Top-1 acc 56.250 (58.136)	Top-5 acc 78.906 (79.605)	lr 0.01669
Train [47][3100/3239]	Time 0.234 (0.584)	Data Time 0.001 (0.013)	Loss 2.6669 (2.7415)	Entropy 1.19330 (1.19765)	Top-1 acc 58.594 (58.130)	Top-5 acc 82.812 (79.604)	lr 0.01669
Train [47][3110/3239]	Time 0.337 (0.584)	Data Time 0.002 (0.013)	Loss 2.9777 (2.7414)	Entropy 1.19328 (1.19764)	Top-1 acc 52.344 (58.128)	Top-5 acc 75.781 (79.607)	lr 0.01668
Train [47][3120/3239]	Time 0.249 (0.584)	Data Time 0.001 (0.013)	Loss 2.5733 (2.7414)	Entropy 1.19321 (1.19762)	Top-1 acc 62.500 (58.128)	Top-5 acc 82.031 (79.606)	lr 0.01668
Train [47][3130/3239]	Time 0.230 (0.583)	Data Time 0.001 (0.013)	Loss 2.8472 (2.7415)	Entropy 1.19315 (1.19761)	Top-1 acc 53.516 (58.128)	Top-5 acc 75.781 (79.605)	lr 0.01668
Train [47][3140/3239]	Time 0.222 (0.583)	Data Time 0.001 (0.013)	Loss 2.6479 (2.7415)	Entropy 1.19313 (1.19760)	Top-1 acc 60.547 (58.129)	Top-5 acc 82.812 (79.604)	lr 0.01668
Train [47][3150/3239]	Time 0.158 (0.582)	Data Time 0.001 (0.013)	Loss 2.4186 (2.7416)	Entropy 1.19303 (1.19758)	Top-1 acc 64.062 (58.124)	Top-5 acc 83.203 (79.602)	lr 0.01668
Train [47][3160/3239]	Time 0.317 (0.582)	Data Time 0.001 (0.013)	Loss 2.7918 (2.7416)	Entropy 1.19300 (1.19757)	Top-1 acc 56.641 (58.124)	Top-5 acc 80.469 (79.602)	lr 0.01668
Train [47][3170/3239]	Time 0.262 (0.582)	Data Time 0.001 (0.013)	Loss 2.6272 (2.7415)	Entropy 1.19296 (1.19755)	Top-1 acc 62.500 (58.123)	Top-5 acc 83.984 (79.604)	lr 0.01668
Train [47][3180/3239]	Time 0.232 (0.581)	Data Time 0.000 (0.013)	Loss 2.7654 (2.7415)	Entropy 1.19296 (1.19754)	Top-1 acc 56.641 (58.124)	Top-5 acc 79.297 (79.601)	lr 0.01668
Train [47][3190/3239]	Time 0.266 (0.596)	Data Time 0.000 (0.012)	Loss 2.7197 (2.7415)	Entropy 1.19298 (1.19752)	Top-1 acc 60.547 (58.126)	Top-5 acc 79.688 (79.602)	lr 0.01668
Train [47][3200/3239]	Time 0.208 (0.596)	Data Time 0.000 (0.012)	Loss 2.8484 (2.7416)	Entropy 1.19297 (1.19751)	Top-1 acc 57.422 (58.123)	Top-5 acc 75.781 (79.599)	lr 0.01668
Train [47][3210/3239]	Time 0.206 (0.595)	Data Time 0.000 (0.012)	Loss 2.8655 (2.7416)	Entropy 1.19299 (1.19750)	Top-1 acc 56.641 (58.120)	Top-5 acc 78.516 (79.600)	lr 0.01668
Train [47][3220/3239]	Time 0.190 (0.595)	Data Time 0.000 (0.012)	Loss 2.5829 (2.7418)	Entropy 1.19291 (1.19748)	Top-1 acc 63.281 (58.112)	Top-5 acc 79.688 (79.597)	lr 0.01667
Train [47][3230/3239]	Time 0.227 (0.594)	Data Time 0.000 (0.012)	Loss 2.6570 (2.7417)	Entropy 1.19286 (1.19747)	Top-1 acc 61.328 (58.117)	Top-5 acc 81.641 (79.598)	lr 0.01667
Train [47][3239/3239]	Time 2.215 (0.594)	Data Time 0.000 (0.012)	Loss 2.9386 (2.7418)	Entropy 1.19286 (1.19746)	Top-1 acc 54.321 (58.118)	Top-5 acc 77.778 (79.597)	lr 0.01667
==========Valid [47/120]	loss 1.580	top-1 acc 64.499 (64.499)	top-5 acc 85.012	Train top-1 58.118	top-5 79.597	Entropy 1.19286	Latency-None: 0.000ms	Flops: 548.34M
Train [48][0/3239]	Time 35.993 (35.993)	Data Time 34.163 (34.163)	Loss 2.6422 (2.6422)	Entropy 1.19263 (1.19263)	Top-1 acc 60.547 (60.547)	Top-5 acc 82.422 (82.422)	lr 0.01667
Train [48][10/3239]	Time 2.493 (3.839)	Data Time 0.002 (3.174)	Loss 2.5220 (2.6492)	Entropy 1.19263 (1.19263)	Top-1 acc 64.453 (60.618)	Top-5 acc 82.422 (81.747)	lr 0.01667
Train [48][20/3239]	Time 0.302 (2.121)	Data Time 0.001 (1.663)	Loss 2.6783 (2.6668)	Entropy 1.19258 (1.19260)	Top-1 acc 59.375 (60.045)	Top-5 acc 79.688 (81.008)	lr 0.01667
Train [48][30/3239]	Time 0.223 (1.583)	Data Time 0.002 (1.128)	Loss 3.0708 (2.6867)	Entropy 1.19259 (1.19260)	Top-1 acc 49.609 (59.703)	Top-5 acc 74.219 (80.607)	lr 0.01667
Train [48][40/3239]	Time 0.240 (1.307)	Data Time 0.001 (0.853)	Loss 2.6360 (2.7026)	Entropy 1.19258 (1.19259)	Top-1 acc 62.500 (59.213)	Top-5 acc 82.422 (80.183)	lr 0.01667
Train [48][50/3239]	Time 0.238 (1.137)	Data Time 0.001 (0.686)	Loss 2.7706 (2.6986)	Entropy 1.19239 (1.19257)	Top-1 acc 54.688 (59.161)	Top-5 acc 80.469 (80.170)	lr 0.01667
Train [48][60/3239]	Time 0.224 (1.023)	Data Time 0.001 (0.574)	Loss 2.8136 (2.7021)	Entropy 1.19236 (1.19253)	Top-1 acc 59.375 (59.023)	Top-5 acc 78.906 (80.174)	lr 0.01667
Train [48][70/3239]	Time 0.306 (0.944)	Data Time 0.001 (0.493)	Loss 2.6428 (2.6988)	Entropy 1.19230 (1.19250)	Top-1 acc 58.984 (59.133)	Top-5 acc 81.641 (80.265)	lr 0.01667
Train [48][80/3239]	Time 0.237 (0.883)	Data Time 0.001 (0.433)	Loss 2.7021 (2.6946)	Entropy 1.19230 (1.19248)	Top-1 acc 56.641 (59.221)	Top-5 acc 82.031 (80.343)	lr 0.01666
Train [48][90/3239]	Time 0.219 (0.836)	Data Time 0.001 (0.385)	Loss 2.6369 (2.6949)	Entropy 1.19227 (1.19246)	Top-1 acc 58.984 (59.268)	Top-5 acc 81.641 (80.387)	lr 0.01666
Train [48][100/3239]	Time 0.259 (0.798)	Data Time 0.002 (0.347)	Loss 2.7426 (2.6916)	Entropy 1.19221 (1.19244)	Top-1 acc 54.297 (59.251)	Top-5 acc 80.078 (80.488)	lr 0.01666
Train [48][110/3239]	Time 0.212 (0.767)	Data Time 0.001 (0.316)	Loss 2.6328 (2.6907)	Entropy 1.19220 (1.19242)	Top-1 acc 59.375 (59.248)	Top-5 acc 80.078 (80.507)	lr 0.01666
Train [48][120/3239]	Time 2.540 (0.741)	Data Time 0.001 (0.290)	Loss 2.7320 (2.6903)	Entropy 1.19220 (1.19240)	Top-1 acc 57.812 (59.288)	Top-5 acc 81.641 (80.540)	lr 0.01666
Train [48][130/3239]	Time 0.187 (0.702)	Data Time 0.001 (0.268)	Loss 2.8788 (2.6952)	Entropy 1.19220 (1.19238)	Top-1 acc 52.734 (59.166)	Top-5 acc 74.219 (80.403)	lr 0.01666
Train [48][140/3239]	Time 0.240 (0.685)	Data Time 0.001 (0.249)	Loss 2.6809 (2.6964)	Entropy 1.19219 (1.19237)	Top-1 acc 57.812 (59.101)	Top-5 acc 79.688 (80.377)	lr 0.01666
Train [48][150/3239]	Time 0.212 (0.669)	Data Time 0.001 (0.233)	Loss 2.6073 (2.6965)	Entropy 1.19220 (1.19236)	Top-1 acc 62.891 (59.114)	Top-5 acc 83.203 (80.363)	lr 0.01666
Train [48][160/3239]	Time 0.233 (0.656)	Data Time 0.001 (0.219)	Loss 2.6004 (2.6953)	Entropy 1.19219 (1.19235)	Top-1 acc 62.109 (59.089)	Top-5 acc 82.031 (80.401)	lr 0.01666
Train [48][170/3239]	Time 0.197 (0.644)	Data Time 0.001 (0.206)	Loss 2.6734 (2.6952)	Entropy 1.19212 (1.19234)	Top-1 acc 58.984 (59.035)	Top-5 acc 82.031 (80.389)	lr 0.01666
Train [48][180/3239]	Time 0.247 (0.634)	Data Time 0.001 (0.195)	Loss 2.8076 (2.6955)	Entropy 1.19211 (1.19233)	Top-1 acc 60.156 (59.071)	Top-5 acc 77.344 (80.400)	lr 0.01666
Train [48][190/3239]	Time 0.259 (0.625)	Data Time 0.001 (0.185)	Loss 2.7298 (2.6945)	Entropy 1.19215 (1.19231)	Top-1 acc 54.688 (59.085)	Top-5 acc 80.078 (80.403)	lr 0.01665
Train [48][200/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.176)	Loss 2.9168 (2.6939)	Entropy 1.19212 (1.19231)	Top-1 acc 53.125 (59.064)	Top-5 acc 79.297 (80.426)	lr 0.01665
Train [48][210/3239]	Time 0.225 (0.609)	Data Time 0.001 (0.167)	Loss 2.7162 (2.6929)	Entropy 1.19211 (1.19230)	Top-1 acc 57.031 (59.132)	Top-5 acc 82.031 (80.495)	lr 0.01665
Train [48][220/3239]	Time 0.272 (0.602)	Data Time 0.001 (0.160)	Loss 2.4841 (2.6936)	Entropy 1.19211 (1.19229)	Top-1 acc 65.625 (59.172)	Top-5 acc 83.984 (80.463)	lr 0.01665
Train [48][230/3239]	Time 2.403 (0.595)	Data Time 0.001 (0.153)	Loss 2.9026 (2.6926)	Entropy 1.19211 (1.19228)	Top-1 acc 53.906 (59.184)	Top-5 acc 78.906 (80.516)	lr 0.01665
Train [48][240/3239]	Time 0.226 (0.580)	Data Time 0.001 (0.147)	Loss 2.8933 (2.6947)	Entropy 1.19208 (1.19227)	Top-1 acc 57.031 (59.106)	Top-5 acc 75.000 (80.483)	lr 0.01665
Train [48][250/3239]	Time 0.239 (0.575)	Data Time 0.001 (0.141)	Loss 2.7702 (2.6956)	Entropy 1.19204 (1.19226)	Top-1 acc 56.641 (59.087)	Top-5 acc 80.859 (80.484)	lr 0.01665
Train [48][260/3239]	Time 0.232 (0.571)	Data Time 0.002 (0.135)	Loss 2.8653 (2.6966)	Entropy 1.19201 (1.19225)	Top-1 acc 56.641 (59.083)	Top-5 acc 77.344 (80.461)	lr 0.01665
Train [48][270/3239]	Time 0.173 (0.567)	Data Time 0.001 (0.131)	Loss 2.7325 (2.6946)	Entropy 1.19200 (1.19224)	Top-1 acc 58.984 (59.154)	Top-5 acc 80.078 (80.499)	lr 0.01665
Train [48][280/3239]	Time 0.236 (0.564)	Data Time 0.001 (0.126)	Loss 2.6342 (2.6942)	Entropy 1.19198 (1.19224)	Top-1 acc 58.203 (59.162)	Top-5 acc 79.688 (80.516)	lr 0.01665
Train [48][290/3239]	Time 0.218 (0.560)	Data Time 0.001 (0.122)	Loss 2.7170 (2.6944)	Entropy 1.19192 (1.19223)	Top-1 acc 61.328 (59.198)	Top-5 acc 78.125 (80.501)	lr 0.01664
Train [48][300/3239]	Time 0.221 (0.556)	Data Time 0.001 (0.118)	Loss 2.6854 (2.6929)	Entropy 1.19191 (1.19222)	Top-1 acc 60.156 (59.249)	Top-5 acc 79.297 (80.514)	lr 0.01664
Train [48][310/3239]	Time 0.294 (0.705)	Data Time 0.003 (0.114)	Loss 2.7508 (2.6924)	Entropy 1.19189 (1.19221)	Top-1 acc 58.984 (59.258)	Top-5 acc 79.688 (80.519)	lr 0.01664
Train [48][320/3239]	Time 0.313 (0.702)	Data Time 0.002 (0.111)	Loss 2.7610 (2.6912)	Entropy 1.19167 (1.19219)	Top-1 acc 55.469 (59.265)	Top-5 acc 79.297 (80.556)	lr 0.01664
Train [48][330/3239]	Time 0.216 (0.695)	Data Time 0.001 (0.107)	Loss 2.8360 (2.6924)	Entropy 1.19166 (1.19218)	Top-1 acc 58.203 (59.232)	Top-5 acc 77.344 (80.522)	lr 0.01664
Train [48][340/3239]	Time 2.456 (0.687)	Data Time 0.001 (0.104)	Loss 2.6558 (2.6922)	Entropy 1.19166 (1.19216)	Top-1 acc 60.938 (59.262)	Top-5 acc 81.641 (80.508)	lr 0.01664
Train [48][350/3239]	Time 0.245 (0.674)	Data Time 0.001 (0.101)	Loss 2.7267 (2.6937)	Entropy 1.19164 (1.19215)	Top-1 acc 59.766 (59.215)	Top-5 acc 81.250 (80.505)	lr 0.01664
Train [48][360/3239]	Time 0.244 (0.668)	Data Time 0.002 (0.099)	Loss 2.5112 (2.6931)	Entropy 1.19159 (1.19213)	Top-1 acc 61.719 (59.206)	Top-5 acc 83.594 (80.513)	lr 0.01664
Train [48][370/3239]	Time 0.231 (0.663)	Data Time 0.002 (0.096)	Loss 2.6280 (2.6935)	Entropy 1.19141 (1.19211)	Top-1 acc 63.281 (59.172)	Top-5 acc 82.031 (80.505)	lr 0.01664
Train [48][380/3239]	Time 0.222 (0.658)	Data Time 0.002 (0.094)	Loss 2.6298 (2.6954)	Entropy 1.19139 (1.19210)	Top-1 acc 59.766 (59.118)	Top-5 acc 82.812 (80.481)	lr 0.01664
Train [48][390/3239]	Time 0.250 (0.653)	Data Time 0.002 (0.091)	Loss 2.6001 (2.6969)	Entropy 1.19136 (1.19208)	Top-1 acc 58.984 (59.102)	Top-5 acc 85.156 (80.467)	lr 0.01664
Train [48][400/3239]	Time 0.202 (0.648)	Data Time 0.001 (0.089)	Loss 2.6268 (2.6962)	Entropy 1.19130 (1.19206)	Top-1 acc 59.766 (59.122)	Top-5 acc 82.031 (80.484)	lr 0.01663
Train [48][410/3239]	Time 0.232 (0.644)	Data Time 0.001 (0.087)	Loss 2.7402 (2.6969)	Entropy 1.19122 (1.19204)	Top-1 acc 59.766 (59.106)	Top-5 acc 78.125 (80.472)	lr 0.01663
Train [48][420/3239]	Time 0.334 (0.640)	Data Time 0.001 (0.085)	Loss 2.6561 (2.6972)	Entropy 1.19116 (1.19202)	Top-1 acc 60.547 (59.085)	Top-5 acc 80.859 (80.476)	lr 0.01663
Train [48][430/3239]	Time 0.234 (0.636)	Data Time 0.002 (0.083)	Loss 2.7184 (2.6979)	Entropy 1.19112 (1.19200)	Top-1 acc 58.594 (59.070)	Top-5 acc 79.297 (80.465)	lr 0.01663
Train [48][440/3239]	Time 0.219 (0.632)	Data Time 0.001 (0.081)	Loss 2.7314 (2.6972)	Entropy 1.19110 (1.19198)	Top-1 acc 56.250 (59.077)	Top-5 acc 82.031 (80.484)	lr 0.01663
Train [48][450/3239]	Time 2.376 (0.628)	Data Time 0.001 (0.079)	Loss 2.6332 (2.6964)	Entropy 1.19110 (1.19196)	Top-1 acc 58.203 (59.094)	Top-5 acc 80.859 (80.503)	lr 0.01663
Train [48][460/3239]	Time 0.268 (0.619)	Data Time 0.001 (0.078)	Loss 2.7631 (2.6979)	Entropy 1.19107 (1.19194)	Top-1 acc 55.859 (59.049)	Top-5 acc 79.688 (80.480)	lr 0.01663
Train [48][470/3239]	Time 0.333 (0.617)	Data Time 0.001 (0.076)	Loss 2.7072 (2.6980)	Entropy 1.19107 (1.19192)	Top-1 acc 60.547 (59.057)	Top-5 acc 80.078 (80.486)	lr 0.01663
Train [48][480/3239]	Time 0.212 (0.614)	Data Time 0.001 (0.074)	Loss 2.6747 (2.6985)	Entropy 1.19102 (1.19190)	Top-1 acc 61.328 (59.058)	Top-5 acc 81.250 (80.451)	lr 0.01663
Train [48][490/3239]	Time 0.220 (0.610)	Data Time 0.001 (0.073)	Loss 2.4921 (2.6980)	Entropy 1.19100 (1.19189)	Top-1 acc 62.891 (59.060)	Top-5 acc 83.203 (80.462)	lr 0.01663
Train [48][500/3239]	Time 0.163 (0.607)	Data Time 0.001 (0.072)	Loss 2.6593 (2.6986)	Entropy 1.19093 (1.19187)	Top-1 acc 60.156 (59.047)	Top-5 acc 83.594 (80.460)	lr 0.01662
Train [48][510/3239]	Time 0.216 (0.604)	Data Time 0.001 (0.070)	Loss 2.5401 (2.6986)	Entropy 1.19078 (1.19185)	Top-1 acc 62.500 (59.053)	Top-5 acc 82.812 (80.473)	lr 0.01662
Train [48][520/3239]	Time 0.318 (0.601)	Data Time 0.001 (0.069)	Loss 2.6456 (2.6988)	Entropy 1.19077 (1.19183)	Top-1 acc 58.594 (59.027)	Top-5 acc 83.203 (80.487)	lr 0.01662
Train [48][530/3239]	Time 0.214 (0.599)	Data Time 0.001 (0.068)	Loss 2.7039 (2.6993)	Entropy 1.19071 (1.19181)	Top-1 acc 57.422 (59.011)	Top-5 acc 76.953 (80.464)	lr 0.01662
Train [48][540/3239]	Time 0.221 (0.596)	Data Time 0.001 (0.066)	Loss 2.8079 (2.6993)	Entropy 1.19066 (1.19179)	Top-1 acc 58.594 (59.009)	Top-5 acc 76.953 (80.466)	lr 0.01662
Train [48][550/3239]	Time 0.240 (0.594)	Data Time 0.001 (0.065)	Loss 2.8461 (2.7009)	Entropy 1.19062 (1.19177)	Top-1 acc 53.906 (58.979)	Top-5 acc 77.344 (80.431)	lr 0.01662
Train [48][560/3239]	Time 2.367 (0.591)	Data Time 0.001 (0.064)	Loss 2.5756 (2.7011)	Entropy 1.19062 (1.19174)	Top-1 acc 60.156 (58.957)	Top-5 acc 83.203 (80.435)	lr 0.01662
Train [48][570/3239]	Time 0.228 (0.585)	Data Time 0.001 (0.063)	Loss 2.8543 (2.7035)	Entropy 1.19058 (1.19172)	Top-1 acc 57.031 (58.914)	Top-5 acc 76.953 (80.388)	lr 0.01662
Train [48][580/3239]	Time 0.238 (0.583)	Data Time 0.001 (0.062)	Loss 2.8944 (2.7037)	Entropy 1.19060 (1.19171)	Top-1 acc 55.859 (58.908)	Top-5 acc 75.781 (80.371)	lr 0.01662
Train [48][590/3239]	Time 0.228 (0.580)	Data Time 0.001 (0.061)	Loss 2.6159 (2.7044)	Entropy 1.19054 (1.19169)	Top-1 acc 60.547 (58.900)	Top-5 acc 80.859 (80.351)	lr 0.01662
Train [48][600/3239]	Time 0.215 (0.578)	Data Time 0.001 (0.060)	Loss 2.7399 (2.7044)	Entropy 1.19052 (1.19167)	Top-1 acc 59.375 (58.919)	Top-5 acc 78.906 (80.338)	lr 0.01662
Train [48][610/3239]	Time 0.197 (0.576)	Data Time 0.001 (0.059)	Loss 2.8373 (2.7046)	Entropy 1.19053 (1.19165)	Top-1 acc 53.516 (58.896)	Top-5 acc 78.906 (80.327)	lr 0.01661
Train [48][620/3239]	Time 0.249 (0.574)	Data Time 0.001 (0.058)	Loss 2.6327 (2.7051)	Entropy 1.19046 (1.19163)	Top-1 acc 58.984 (58.894)	Top-5 acc 81.641 (80.313)	lr 0.01661
Train [48][630/3239]	Time 0.212 (0.572)	Data Time 0.001 (0.057)	Loss 2.5530 (2.7044)	Entropy 1.19041 (1.19161)	Top-1 acc 63.672 (58.906)	Top-5 acc 84.766 (80.321)	lr 0.01661
Train [48][640/3239]	Time 0.225 (0.570)	Data Time 0.001 (0.056)	Loss 2.5574 (2.7043)	Entropy 1.19034 (1.19159)	Top-1 acc 62.891 (58.894)	Top-5 acc 82.812 (80.335)	lr 0.01661
Train [48][650/3239]	Time 0.232 (0.569)	Data Time 0.001 (0.055)	Loss 2.5502 (2.7044)	Entropy 1.19032 (1.19157)	Top-1 acc 61.328 (58.896)	Top-5 acc 83.203 (80.331)	lr 0.01661
Train [48][660/3239]	Time 0.218 (0.567)	Data Time 0.001 (0.055)	Loss 2.6761 (2.7045)	Entropy 1.19026 (1.19155)	Top-1 acc 61.328 (58.892)	Top-5 acc 82.812 (80.317)	lr 0.01661
Train [48][670/3239]	Time 52.863 (0.641)	Data Time 0.001 (0.054)	Loss 2.7309 (2.7044)	Entropy 1.19026 (1.19153)	Top-1 acc 57.031 (58.891)	Top-5 acc 79.688 (80.317)	lr 0.01661
Train [48][680/3239]	Time 0.231 (0.635)	Data Time 0.002 (0.053)	Loss 2.6132 (2.7044)	Entropy 1.19018 (1.19151)	Top-1 acc 57.812 (58.888)	Top-5 acc 79.688 (80.325)	lr 0.01661
Train [48][690/3239]	Time 0.213 (0.633)	Data Time 0.001 (0.052)	Loss 2.6765 (2.7043)	Entropy 1.19012 (1.19149)	Top-1 acc 58.594 (58.875)	Top-5 acc 80.469 (80.334)	lr 0.01661
Train [48][700/3239]	Time 0.177 (0.630)	Data Time 0.001 (0.052)	Loss 2.7946 (2.7054)	Entropy 1.19010 (1.19147)	Top-1 acc 56.250 (58.841)	Top-5 acc 79.688 (80.315)	lr 0.01661
Train [48][710/3239]	Time 0.219 (0.628)	Data Time 0.001 (0.051)	Loss 2.6785 (2.7052)	Entropy 1.19009 (1.19145)	Top-1 acc 59.766 (58.857)	Top-5 acc 81.641 (80.319)	lr 0.01660
Train [48][720/3239]	Time 0.341 (0.626)	Data Time 0.001 (0.050)	Loss 2.7391 (2.7057)	Entropy 1.19008 (1.19144)	Top-1 acc 60.938 (58.852)	Top-5 acc 78.516 (80.313)	lr 0.01660
Train [48][730/3239]	Time 0.218 (0.624)	Data Time 0.001 (0.050)	Loss 2.5715 (2.7059)	Entropy 1.19006 (1.19142)	Top-1 acc 63.281 (58.848)	Top-5 acc 83.984 (80.307)	lr 0.01660
Train [48][740/3239]	Time 0.213 (0.621)	Data Time 0.001 (0.049)	Loss 2.6202 (2.7059)	Entropy 1.19002 (1.19140)	Top-1 acc 63.672 (58.843)	Top-5 acc 80.859 (80.310)	lr 0.01660
Train [48][750/3239]	Time 0.220 (0.619)	Data Time 0.002 (0.048)	Loss 2.7732 (2.7060)	Entropy 1.18995 (1.19138)	Top-1 acc 57.422 (58.842)	Top-5 acc 79.688 (80.311)	lr 0.01660
Train [48][760/3239]	Time 0.251 (0.617)	Data Time 0.001 (0.048)	Loss 3.0014 (2.7065)	Entropy 1.18992 (1.19136)	Top-1 acc 53.125 (58.832)	Top-5 acc 73.828 (80.301)	lr 0.01660
Train [48][770/3239]	Time 0.234 (0.615)	Data Time 0.002 (0.047)	Loss 2.7396 (2.7069)	Entropy 1.18991 (1.19134)	Top-1 acc 60.547 (58.834)	Top-5 acc 80.859 (80.295)	lr 0.01660
Train [48][780/3239]	Time 2.442 (0.613)	Data Time 0.001 (0.047)	Loss 2.6270 (2.7067)	Entropy 1.18991 (1.19132)	Top-1 acc 59.766 (58.830)	Top-5 acc 78.906 (80.286)	lr 0.01660
Train [48][790/3239]	Time 0.237 (0.608)	Data Time 0.001 (0.046)	Loss 2.7980 (2.7069)	Entropy 1.18991 (1.19131)	Top-1 acc 57.031 (58.819)	Top-5 acc 79.688 (80.291)	lr 0.01660
Train [48][800/3239]	Time 0.228 (0.607)	Data Time 0.001 (0.045)	Loss 2.7852 (2.7068)	Entropy 1.18988 (1.19129)	Top-1 acc 56.641 (58.821)	Top-5 acc 80.078 (80.294)	lr 0.01660
Train [48][810/3239]	Time 0.236 (0.605)	Data Time 0.002 (0.045)	Loss 2.6836 (2.7075)	Entropy 1.18985 (1.19127)	Top-1 acc 58.594 (58.796)	Top-5 acc 79.688 (80.281)	lr 0.01660
Train [48][820/3239]	Time 0.311 (0.603)	Data Time 0.001 (0.044)	Loss 2.6711 (2.7079)	Entropy 1.18979 (1.19125)	Top-1 acc 57.422 (58.785)	Top-5 acc 83.203 (80.279)	lr 0.01659
Train [48][830/3239]	Time 0.212 (0.601)	Data Time 0.001 (0.044)	Loss 2.6294 (2.7072)	Entropy 1.18977 (1.19123)	Top-1 acc 65.234 (58.801)	Top-5 acc 81.641 (80.294)	lr 0.01659
Train [48][840/3239]	Time 0.233 (0.600)	Data Time 0.002 (0.043)	Loss 2.6337 (2.7079)	Entropy 1.18971 (1.19122)	Top-1 acc 61.328 (58.791)	Top-5 acc 82.812 (80.276)	lr 0.01659
Train [48][850/3239]	Time 0.223 (0.598)	Data Time 0.001 (0.043)	Loss 2.6555 (2.7084)	Entropy 1.18967 (1.19120)	Top-1 acc 57.031 (58.778)	Top-5 acc 80.859 (80.259)	lr 0.01659
Train [48][860/3239]	Time 0.220 (0.597)	Data Time 0.002 (0.042)	Loss 2.8514 (2.7086)	Entropy 1.18963 (1.19118)	Top-1 acc 54.297 (58.775)	Top-5 acc 75.781 (80.263)	lr 0.01659
Train [48][870/3239]	Time 0.249 (0.596)	Data Time 0.003 (0.042)	Loss 2.7834 (2.7082)	Entropy 1.18962 (1.19116)	Top-1 acc 60.547 (58.799)	Top-5 acc 76.953 (80.274)	lr 0.01659
Train [48][880/3239]	Time 0.311 (0.594)	Data Time 0.001 (0.041)	Loss 2.6326 (2.7083)	Entropy 1.18956 (1.19115)	Top-1 acc 61.328 (58.803)	Top-5 acc 81.641 (80.266)	lr 0.01659
Train [48][890/3239]	Time 2.446 (0.593)	Data Time 0.001 (0.041)	Loss 2.7716 (2.7087)	Entropy 1.18956 (1.19113)	Top-1 acc 55.859 (58.798)	Top-5 acc 79.688 (80.257)	lr 0.01659
Train [48][900/3239]	Time 0.184 (0.589)	Data Time 0.001 (0.041)	Loss 3.8674 (2.7115)	Entropy 1.18952 (1.19111)	Top-1 acc 33.984 (58.737)	Top-5 acc 62.891 (80.213)	lr 0.01659
Train [48][910/3239]	Time 0.226 (0.587)	Data Time 0.001 (0.040)	Loss 2.6302 (2.7118)	Entropy 1.18941 (1.19109)	Top-1 acc 58.203 (58.730)	Top-5 acc 80.078 (80.197)	lr 0.01659
Train [48][920/3239]	Time 0.234 (0.586)	Data Time 0.001 (0.040)	Loss 2.9619 (2.7126)	Entropy 1.18938 (1.19107)	Top-1 acc 50.391 (58.717)	Top-5 acc 76.172 (80.173)	lr 0.01658
Train [48][930/3239]	Time 0.239 (0.584)	Data Time 0.001 (0.039)	Loss 2.6576 (2.7130)	Entropy 1.18928 (1.19105)	Top-1 acc 57.812 (58.697)	Top-5 acc 81.641 (80.159)	lr 0.01658
Train [48][940/3239]	Time 0.218 (0.583)	Data Time 0.001 (0.039)	Loss 2.8361 (2.7132)	Entropy 1.18924 (1.19103)	Top-1 acc 54.688 (58.691)	Top-5 acc 77.734 (80.150)	lr 0.01658
Train [48][950/3239]	Time 0.228 (0.581)	Data Time 0.001 (0.039)	Loss 2.5530 (2.7134)	Entropy 1.18921 (1.19102)	Top-1 acc 63.281 (58.687)	Top-5 acc 82.812 (80.148)	lr 0.01658
Train [48][960/3239]	Time 0.236 (0.580)	Data Time 0.001 (0.038)	Loss 2.7162 (2.7132)	Entropy 1.18919 (1.19100)	Top-1 acc 62.109 (58.695)	Top-5 acc 79.688 (80.150)	lr 0.01658
Train [48][970/3239]	Time 0.214 (0.579)	Data Time 0.001 (0.038)	Loss 2.7402 (2.7131)	Entropy 1.18924 (1.19098)	Top-1 acc 57.812 (58.704)	Top-5 acc 79.688 (80.148)	lr 0.01658
Train [48][980/3239]	Time 0.216 (0.577)	Data Time 0.001 (0.037)	Loss 2.5312 (2.7129)	Entropy 1.18917 (1.19096)	Top-1 acc 61.719 (58.712)	Top-5 acc 85.156 (80.148)	lr 0.01658
Train [48][990/3239]	Time 0.277 (0.576)	Data Time 0.001 (0.037)	Loss 2.5486 (2.7128)	Entropy 1.18914 (1.19094)	Top-1 acc 65.625 (58.711)	Top-5 acc 81.641 (80.147)	lr 0.01658
Train [48][1000/3239]	Time 2.452 (0.575)	Data Time 0.001 (0.037)	Loss 2.7391 (2.7128)	Entropy 1.18914 (1.19092)	Top-1 acc 59.766 (58.706)	Top-5 acc 77.344 (80.147)	lr 0.01658
Train [48][1010/3239]	Time 0.233 (0.572)	Data Time 0.002 (0.036)	Loss 2.5090 (2.7126)	Entropy 1.18910 (1.19091)	Top-1 acc 66.016 (58.712)	Top-5 acc 83.203 (80.147)	lr 0.01658
Train [48][1020/3239]	Time 0.234 (0.571)	Data Time 0.001 (0.036)	Loss 2.7238 (2.7126)	Entropy 1.18909 (1.19089)	Top-1 acc 59.766 (58.711)	Top-5 acc 80.078 (80.144)	lr 0.01658
Train [48][1030/3239]	Time 0.203 (0.569)	Data Time 0.001 (0.036)	Loss 2.9278 (2.7132)	Entropy 1.18903 (1.19087)	Top-1 acc 54.297 (58.696)	Top-5 acc 76.172 (80.132)	lr 0.01657
Train [48][1040/3239]	Time 0.347 (0.614)	Data Time 0.002 (0.035)	Loss 2.4334 (2.7128)	Entropy 1.18903 (1.19085)	Top-1 acc 62.109 (58.703)	Top-5 acc 86.328 (80.137)	lr 0.01657
Train [48][1050/3239]	Time 0.226 (0.613)	Data Time 0.002 (0.035)	Loss 2.7756 (2.7132)	Entropy 1.18901 (1.19084)	Top-1 acc 57.422 (58.687)	Top-5 acc 80.078 (80.129)	lr 0.01657
Train [48][1060/3239]	Time 0.223 (0.611)	Data Time 0.001 (0.035)	Loss 2.7627 (2.7130)	Entropy 1.18898 (1.19082)	Top-1 acc 60.156 (58.696)	Top-5 acc 80.469 (80.138)	lr 0.01657
Train [48][1070/3239]	Time 0.254 (0.610)	Data Time 0.002 (0.034)	Loss 2.6573 (2.7130)	Entropy 1.18893 (1.19080)	Top-1 acc 61.328 (58.695)	Top-5 acc 80.859 (80.138)	lr 0.01657
Train [48][1080/3239]	Time 0.232 (0.609)	Data Time 0.002 (0.034)	Loss 2.9066 (2.7135)	Entropy 1.18890 (1.19078)	Top-1 acc 54.688 (58.692)	Top-5 acc 77.734 (80.125)	lr 0.01657
Train [48][1090/3239]	Time 0.212 (0.607)	Data Time 0.001 (0.034)	Loss 2.8910 (2.7141)	Entropy 1.18886 (1.19077)	Top-1 acc 54.297 (58.679)	Top-5 acc 74.219 (80.111)	lr 0.01657
Train [48][1100/3239]	Time 0.263 (0.606)	Data Time 0.001 (0.034)	Loss 2.4649 (2.7132)	Entropy 1.18882 (1.19075)	Top-1 acc 64.453 (58.697)	Top-5 acc 85.938 (80.131)	lr 0.01657
Train [48][1110/3239]	Time 2.658 (0.605)	Data Time 0.001 (0.033)	Loss 2.9156 (2.7137)	Entropy 1.18882 (1.19073)	Top-1 acc 54.688 (58.690)	Top-5 acc 76.172 (80.128)	lr 0.01657
Train [48][1120/3239]	Time 0.216 (0.602)	Data Time 0.001 (0.033)	Loss 2.6797 (2.7136)	Entropy 1.18879 (1.19071)	Top-1 acc 60.547 (58.700)	Top-5 acc 80.469 (80.129)	lr 0.01657
Train [48][1130/3239]	Time 0.229 (0.600)	Data Time 0.001 (0.033)	Loss 2.7145 (2.7134)	Entropy 1.18877 (1.19070)	Top-1 acc 52.734 (58.694)	Top-5 acc 79.297 (80.130)	lr 0.01656
Train [48][1140/3239]	Time 0.225 (0.599)	Data Time 0.001 (0.032)	Loss 2.7458 (2.7136)	Entropy 1.18877 (1.19068)	Top-1 acc 57.031 (58.685)	Top-5 acc 78.906 (80.125)	lr 0.01656
Train [48][1150/3239]	Time 0.266 (0.598)	Data Time 0.001 (0.032)	Loss 2.6478 (2.7138)	Entropy 1.18876 (1.19066)	Top-1 acc 62.109 (58.678)	Top-5 acc 81.641 (80.127)	lr 0.01656
Train [48][1160/3239]	Time 0.322 (0.597)	Data Time 0.001 (0.032)	Loss 2.7707 (2.7134)	Entropy 1.18868 (1.19065)	Top-1 acc 60.156 (58.695)	Top-5 acc 80.078 (80.130)	lr 0.01656
Train [48][1170/3239]	Time 0.256 (0.595)	Data Time 0.001 (0.032)	Loss 2.7105 (2.7140)	Entropy 1.18869 (1.19063)	Top-1 acc 59.375 (58.691)	Top-5 acc 78.516 (80.111)	lr 0.01656
Train [48][1180/3239]	Time 0.211 (0.594)	Data Time 0.001 (0.031)	Loss 2.8286 (2.7139)	Entropy 1.18860 (1.19061)	Top-1 acc 55.078 (58.694)	Top-5 acc 77.344 (80.112)	lr 0.01656
Train [48][1190/3239]	Time 0.214 (0.593)	Data Time 0.001 (0.031)	Loss 2.5403 (2.7139)	Entropy 1.18854 (1.19060)	Top-1 acc 65.234 (58.688)	Top-5 acc 83.594 (80.107)	lr 0.01656
Train [48][1200/3239]	Time 0.213 (0.592)	Data Time 0.001 (0.031)	Loss 2.6802 (2.7143)	Entropy 1.18850 (1.19058)	Top-1 acc 60.156 (58.682)	Top-5 acc 81.250 (80.100)	lr 0.01656
Train [48][1210/3239]	Time 0.348 (0.591)	Data Time 0.001 (0.031)	Loss 2.5929 (2.7139)	Entropy 1.18846 (1.19056)	Top-1 acc 63.281 (58.694)	Top-5 acc 82.422 (80.110)	lr 0.01656
Train [48][1220/3239]	Time 2.486 (0.590)	Data Time 0.001 (0.030)	Loss 2.8663 (2.7135)	Entropy 1.18846 (1.19054)	Top-1 acc 56.250 (58.699)	Top-5 acc 75.781 (80.118)	lr 0.01656
Train [48][1230/3239]	Time 0.242 (0.587)	Data Time 0.001 (0.030)	Loss 2.6812 (2.7139)	Entropy 1.18836 (1.19053)	Top-1 acc 59.375 (58.685)	Top-5 acc 80.078 (80.112)	lr 0.01656
Train [48][1240/3239]	Time 0.232 (0.586)	Data Time 0.001 (0.030)	Loss 2.8636 (2.7145)	Entropy 1.18837 (1.19051)	Top-1 acc 59.375 (58.682)	Top-5 acc 77.734 (80.103)	lr 0.01655
Train [48][1250/3239]	Time 0.247 (0.585)	Data Time 0.002 (0.030)	Loss 2.8002 (2.7148)	Entropy 1.18837 (1.19049)	Top-1 acc 55.859 (58.675)	Top-5 acc 78.906 (80.096)	lr 0.01655
Train [48][1260/3239]	Time 0.312 (0.584)	Data Time 0.002 (0.029)	Loss 2.7900 (2.7150)	Entropy 1.18826 (1.19047)	Top-1 acc 56.641 (58.670)	Top-5 acc 78.125 (80.091)	lr 0.01655
Train [48][1270/3239]	Time 0.243 (0.583)	Data Time 0.001 (0.029)	Loss 2.6745 (2.7153)	Entropy 1.18817 (1.19046)	Top-1 acc 62.500 (58.664)	Top-5 acc 80.469 (80.079)	lr 0.01655
Train [48][1280/3239]	Time 0.240 (0.582)	Data Time 0.001 (0.029)	Loss 2.5562 (2.7152)	Entropy 1.18819 (1.19044)	Top-1 acc 62.500 (58.671)	Top-5 acc 84.766 (80.084)	lr 0.01655
Train [48][1290/3239]	Time 0.227 (0.581)	Data Time 0.001 (0.029)	Loss 2.7020 (2.7154)	Entropy 1.18810 (1.19042)	Top-1 acc 59.766 (58.670)	Top-5 acc 78.125 (80.080)	lr 0.01655
Train [48][1300/3239]	Time 0.227 (0.580)	Data Time 0.001 (0.029)	Loss 2.6482 (2.7158)	Entropy 1.18806 (1.19040)	Top-1 acc 60.156 (58.666)	Top-5 acc 82.031 (80.072)	lr 0.01655
Train [48][1310/3239]	Time 0.226 (0.579)	Data Time 0.001 (0.028)	Loss 2.9841 (2.7160)	Entropy 1.18803 (1.19039)	Top-1 acc 53.125 (58.664)	Top-5 acc 73.828 (80.070)	lr 0.01655
Train [48][1320/3239]	Time 0.233 (0.578)	Data Time 0.001 (0.028)	Loss 2.5779 (2.7161)	Entropy 1.18795 (1.19037)	Top-1 acc 61.719 (58.665)	Top-5 acc 81.641 (80.074)	lr 0.01655
Train [48][1330/3239]	Time 2.397 (0.577)	Data Time 0.001 (0.028)	Loss 2.8436 (2.7164)	Entropy 1.18795 (1.19035)	Top-1 acc 56.250 (58.664)	Top-5 acc 75.781 (80.070)	lr 0.01655
Train [48][1340/3239]	Time 0.231 (0.575)	Data Time 0.001 (0.028)	Loss 2.7461 (2.7166)	Entropy 1.18795 (1.19033)	Top-1 acc 58.984 (58.664)	Top-5 acc 80.859 (80.065)	lr 0.01654
Train [48][1350/3239]	Time 0.221 (0.574)	Data Time 0.001 (0.028)	Loss 2.5740 (2.7165)	Entropy 1.18790 (1.19031)	Top-1 acc 60.156 (58.665)	Top-5 acc 80.469 (80.062)	lr 0.01654
Train [48][1360/3239]	Time 0.226 (0.573)	Data Time 0.001 (0.027)	Loss 2.8977 (2.7167)	Entropy 1.18789 (1.19030)	Top-1 acc 56.641 (58.661)	Top-5 acc 75.000 (80.053)	lr 0.01654
Train [48][1370/3239]	Time 0.226 (0.572)	Data Time 0.001 (0.027)	Loss 2.6259 (2.7166)	Entropy 1.18786 (1.19028)	Top-1 acc 60.547 (58.674)	Top-5 acc 82.812 (80.059)	lr 0.01654
Train [48][1380/3239]	Time 0.239 (0.571)	Data Time 0.002 (0.027)	Loss 2.6209 (2.7170)	Entropy 1.18787 (1.19026)	Top-1 acc 63.672 (58.665)	Top-5 acc 81.641 (80.047)	lr 0.01654
Train [48][1390/3239]	Time 0.233 (0.571)	Data Time 0.002 (0.027)	Loss 2.8106 (2.7168)	Entropy 1.18783 (1.19024)	Top-1 acc 57.422 (58.666)	Top-5 acc 78.516 (80.053)	lr 0.01654
Train [48][1400/3239]	Time 0.272 (0.603)	Data Time 0.003 (0.027)	Loss 2.7104 (2.7176)	Entropy 1.18783 (1.19023)	Top-1 acc 57.422 (58.643)	Top-5 acc 82.031 (80.041)	lr 0.01654
Train [48][1410/3239]	Time 0.326 (0.603)	Data Time 0.002 (0.027)	Loss 2.6339 (2.7177)	Entropy 1.18781 (1.19021)	Top-1 acc 62.500 (58.650)	Top-5 acc 81.641 (80.037)	lr 0.01654
Train [48][1420/3239]	Time 0.217 (0.602)	Data Time 0.001 (0.026)	Loss 2.7963 (2.7173)	Entropy 1.18778 (1.19019)	Top-1 acc 57.031 (58.657)	Top-5 acc 76.953 (80.041)	lr 0.01654
Train [48][1430/3239]	Time 0.237 (0.601)	Data Time 0.002 (0.026)	Loss 2.7946 (2.7173)	Entropy 1.18773 (1.19018)	Top-1 acc 60.156 (58.665)	Top-5 acc 79.297 (80.041)	lr 0.01654
Train [48][1440/3239]	Time 2.385 (0.600)	Data Time 0.001 (0.026)	Loss 2.6651 (2.7172)	Entropy 1.18773 (1.19016)	Top-1 acc 59.766 (58.666)	Top-5 acc 83.203 (80.044)	lr 0.01654
Train [48][1450/3239]	Time 0.224 (0.597)	Data Time 0.001 (0.026)	Loss 2.7644 (2.7172)	Entropy 1.18771 (1.19014)	Top-1 acc 57.031 (58.668)	Top-5 acc 77.344 (80.047)	lr 0.01653
Train [48][1460/3239]	Time 0.332 (0.596)	Data Time 0.001 (0.026)	Loss 2.5372 (2.7171)	Entropy 1.18766 (1.19012)	Top-1 acc 65.234 (58.668)	Top-5 acc 82.812 (80.044)	lr 0.01653
Train [48][1470/3239]	Time 0.225 (0.595)	Data Time 0.001 (0.026)	Loss 2.6232 (2.7172)	Entropy 1.18767 (1.19011)	Top-1 acc 62.500 (58.666)	Top-5 acc 82.422 (80.041)	lr 0.01653
Train [48][1480/3239]	Time 0.229 (0.595)	Data Time 0.001 (0.025)	Loss 2.8390 (2.7173)	Entropy 1.18766 (1.19009)	Top-1 acc 55.469 (58.660)	Top-5 acc 77.734 (80.045)	lr 0.01653
Train [48][1490/3239]	Time 0.149 (0.594)	Data Time 0.002 (0.025)	Loss 2.6483 (2.7174)	Entropy 1.18764 (1.19008)	Top-1 acc 57.812 (58.654)	Top-5 acc 81.641 (80.049)	lr 0.01653
Train [48][1500/3239]	Time 0.228 (0.593)	Data Time 0.001 (0.025)	Loss 2.9012 (2.7174)	Entropy 1.18753 (1.19006)	Top-1 acc 55.469 (58.650)	Top-5 acc 75.391 (80.049)	lr 0.01653
Train [48][1510/3239]	Time 0.229 (0.592)	Data Time 0.001 (0.025)	Loss 2.6975 (2.7171)	Entropy 1.18761 (1.19004)	Top-1 acc 57.422 (58.656)	Top-5 acc 80.469 (80.061)	lr 0.01653
Train [48][1520/3239]	Time 0.207 (0.591)	Data Time 0.001 (0.025)	Loss 2.7097 (2.7169)	Entropy 1.18757 (1.19003)	Top-1 acc 59.375 (58.661)	Top-5 acc 80.859 (80.067)	lr 0.01653
Train [48][1530/3239]	Time 0.213 (0.590)	Data Time 0.001 (0.025)	Loss 2.6322 (2.7174)	Entropy 1.18754 (1.19001)	Top-1 acc 62.109 (58.653)	Top-5 acc 82.812 (80.059)	lr 0.01653
Train [48][1540/3239]	Time 0.244 (0.589)	Data Time 0.001 (0.025)	Loss 2.6973 (2.7176)	Entropy 1.18749 (1.18999)	Top-1 acc 59.766 (58.649)	Top-5 acc 81.250 (80.057)	lr 0.01653
Train [48][1550/3239]	Time 2.546 (0.589)	Data Time 0.001 (0.024)	Loss 2.7475 (2.7176)	Entropy 1.18749 (1.18998)	Top-1 acc 56.250 (58.645)	Top-5 acc 78.906 (80.058)	lr 0.01652
Train [48][1560/3239]	Time 0.303 (0.586)	Data Time 0.001 (0.024)	Loss 2.8608 (2.7174)	Entropy 1.18749 (1.18996)	Top-1 acc 60.156 (58.653)	Top-5 acc 79.297 (80.063)	lr 0.01652
Train [48][1570/3239]	Time 0.222 (0.586)	Data Time 0.001 (0.024)	Loss 2.6675 (2.7172)	Entropy 1.18744 (1.18995)	Top-1 acc 63.281 (58.661)	Top-5 acc 79.297 (80.066)	lr 0.01652
Train [48][1580/3239]	Time 0.221 (0.585)	Data Time 0.001 (0.024)	Loss 2.8112 (2.7174)	Entropy 1.18738 (1.18993)	Top-1 acc 58.594 (58.654)	Top-5 acc 78.906 (80.063)	lr 0.01652
Train [48][1590/3239]	Time 0.220 (0.584)	Data Time 0.001 (0.024)	Loss 2.7547 (2.7175)	Entropy 1.18734 (1.18991)	Top-1 acc 57.031 (58.649)	Top-5 acc 78.125 (80.060)	lr 0.01652
Train [48][1600/3239]	Time 0.228 (0.583)	Data Time 0.001 (0.024)	Loss 2.7072 (2.7177)	Entropy 1.18732 (1.18990)	Top-1 acc 59.766 (58.643)	Top-5 acc 79.297 (80.057)	lr 0.01652
Train [48][1610/3239]	Time 0.325 (0.582)	Data Time 0.001 (0.024)	Loss 2.7080 (2.7176)	Entropy 1.18732 (1.18988)	Top-1 acc 59.375 (58.651)	Top-5 acc 79.297 (80.056)	lr 0.01652
Train [48][1620/3239]	Time 0.232 (0.582)	Data Time 0.001 (0.023)	Loss 2.8187 (2.7179)	Entropy 1.18722 (1.18987)	Top-1 acc 56.641 (58.641)	Top-5 acc 81.250 (80.049)	lr 0.01652
Train [48][1630/3239]	Time 0.233 (0.581)	Data Time 0.002 (0.023)	Loss 2.6929 (2.7180)	Entropy 1.18720 (1.18985)	Top-1 acc 60.156 (58.639)	Top-5 acc 79.688 (80.048)	lr 0.01652
Train [48][1640/3239]	Time 0.221 (0.580)	Data Time 0.001 (0.023)	Loss 2.8165 (2.7188)	Entropy 1.18718 (1.18983)	Top-1 acc 55.078 (58.625)	Top-5 acc 80.469 (80.038)	lr 0.01652
Train [48][1650/3239]	Time 0.263 (0.580)	Data Time 0.001 (0.023)	Loss 3.0447 (2.7188)	Entropy 1.18716 (1.18982)	Top-1 acc 51.953 (58.627)	Top-5 acc 72.656 (80.032)	lr 0.01652
Train [48][1660/3239]	Time 2.540 (0.579)	Data Time 0.001 (0.023)	Loss 2.8675 (2.7190)	Entropy 1.18716 (1.18980)	Top-1 acc 54.297 (58.621)	Top-5 acc 80.469 (80.030)	lr 0.01651
Train [48][1670/3239]	Time 0.209 (0.577)	Data Time 0.001 (0.023)	Loss 2.6347 (2.7190)	Entropy 1.18712 (1.18978)	Top-1 acc 61.719 (58.622)	Top-5 acc 80.859 (80.033)	lr 0.01651
Train [48][1680/3239]	Time 0.229 (0.576)	Data Time 0.001 (0.023)	Loss 2.7222 (2.7194)	Entropy 1.18706 (1.18977)	Top-1 acc 57.812 (58.608)	Top-5 acc 79.297 (80.027)	lr 0.01651
Train [48][1690/3239]	Time 0.219 (0.575)	Data Time 0.001 (0.022)	Loss 2.8322 (2.7196)	Entropy 1.18701 (1.18975)	Top-1 acc 58.594 (58.609)	Top-5 acc 76.953 (80.019)	lr 0.01651
Train [48][1700/3239]	Time 0.255 (0.575)	Data Time 0.001 (0.022)	Loss 2.8979 (2.7196)	Entropy 1.18697 (1.18974)	Top-1 acc 51.953 (58.609)	Top-5 acc 76.562 (80.017)	lr 0.01651
Train [48][1710/3239]	Time 0.216 (0.574)	Data Time 0.002 (0.022)	Loss 2.9365 (2.7194)	Entropy 1.18696 (1.18972)	Top-1 acc 57.031 (58.613)	Top-5 acc 75.000 (80.023)	lr 0.01651
Train [48][1720/3239]	Time 0.196 (0.573)	Data Time 0.001 (0.022)	Loss 2.5348 (2.7196)	Entropy 1.18692 (1.18970)	Top-1 acc 60.938 (58.608)	Top-5 acc 82.812 (80.018)	lr 0.01651
Train [48][1730/3239]	Time 0.202 (0.573)	Data Time 0.001 (0.022)	Loss 2.7963 (2.7199)	Entropy 1.18689 (1.18969)	Top-1 acc 57.031 (58.609)	Top-5 acc 78.516 (80.015)	lr 0.01651
Train [48][1740/3239]	Time 0.226 (0.572)	Data Time 0.001 (0.022)	Loss 2.9253 (2.7201)	Entropy 1.18689 (1.18967)	Top-1 acc 55.469 (58.603)	Top-5 acc 77.344 (80.013)	lr 0.01651
Train [48][1750/3239]	Time 0.213 (0.571)	Data Time 0.001 (0.022)	Loss 2.9331 (2.7204)	Entropy 1.18684 (1.18966)	Top-1 acc 53.125 (58.591)	Top-5 acc 76.562 (80.006)	lr 0.01651
Train [48][1760/3239]	Time 0.253 (0.599)	Data Time 0.003 (0.022)	Loss 2.7473 (2.7207)	Entropy 1.18683 (1.18964)	Top-1 acc 56.250 (58.584)	Top-5 acc 78.906 (80.001)	lr 0.01650
Train [48][1770/3239]	Time 2.593 (0.598)	Data Time 0.002 (0.022)	Loss 2.7061 (2.7206)	Entropy 1.18683 (1.18962)	Top-1 acc 58.594 (58.586)	Top-5 acc 80.469 (80.004)	lr 0.01650
Train [48][1780/3239]	Time 0.244 (0.596)	Data Time 0.002 (0.021)	Loss 2.8021 (2.7208)	Entropy 1.18681 (1.18961)	Top-1 acc 56.250 (58.585)	Top-5 acc 78.125 (80.000)	lr 0.01650
Train [48][1790/3239]	Time 0.228 (0.595)	Data Time 0.001 (0.021)	Loss 2.7135 (2.7214)	Entropy 1.18678 (1.18959)	Top-1 acc 57.031 (58.570)	Top-5 acc 81.250 (79.989)	lr 0.01650
Train [48][1800/3239]	Time 0.242 (0.595)	Data Time 0.001 (0.021)	Loss 2.8551 (2.7211)	Entropy 1.18679 (1.18958)	Top-1 acc 59.375 (58.579)	Top-5 acc 77.344 (79.991)	lr 0.01650
Train [48][1810/3239]	Time 0.234 (0.594)	Data Time 0.001 (0.021)	Loss 2.8428 (2.7211)	Entropy 1.18676 (1.18956)	Top-1 acc 56.250 (58.582)	Top-5 acc 77.344 (79.989)	lr 0.01650
Train [48][1820/3239]	Time 0.249 (0.593)	Data Time 0.001 (0.021)	Loss 2.7847 (2.7212)	Entropy 1.18672 (1.18955)	Top-1 acc 54.688 (58.576)	Top-5 acc 78.125 (79.987)	lr 0.01650
Train [48][1830/3239]	Time 0.261 (0.593)	Data Time 0.001 (0.021)	Loss 2.6242 (2.7209)	Entropy 1.18668 (1.18953)	Top-1 acc 58.594 (58.576)	Top-5 acc 83.203 (79.997)	lr 0.01650
Train [48][1840/3239]	Time 0.178 (0.592)	Data Time 0.001 (0.021)	Loss 2.5372 (2.7207)	Entropy 1.18665 (1.18951)	Top-1 acc 62.109 (58.579)	Top-5 acc 81.641 (79.999)	lr 0.01650
Train [48][1850/3239]	Time 0.327 (0.591)	Data Time 0.002 (0.021)	Loss 2.6365 (2.7208)	Entropy 1.18659 (1.18950)	Top-1 acc 58.594 (58.580)	Top-5 acc 82.812 (80.002)	lr 0.01650
Train [48][1860/3239]	Time 0.209 (0.591)	Data Time 0.001 (0.021)	Loss 2.5113 (2.7207)	Entropy 1.18655 (1.18948)	Top-1 acc 68.750 (58.587)	Top-5 acc 82.422 (80.003)	lr 0.01650
Train [48][1870/3239]	Time 0.270 (0.590)	Data Time 0.001 (0.021)	Loss 2.7856 (2.7208)	Entropy 1.18650 (1.18947)	Top-1 acc 54.688 (58.581)	Top-5 acc 77.734 (80.005)	lr 0.01649
Train [48][1880/3239]	Time 2.341 (0.589)	Data Time 0.002 (0.020)	Loss 2.6865 (2.7208)	Entropy 1.18650 (1.18945)	Top-1 acc 62.500 (58.577)	Top-5 acc 80.078 (80.004)	lr 0.01649
Train [48][1890/3239]	Time 0.242 (0.587)	Data Time 0.001 (0.020)	Loss 2.6394 (2.7212)	Entropy 1.18648 (1.18944)	Top-1 acc 57.422 (58.572)	Top-5 acc 82.812 (79.996)	lr 0.01649
Train [48][1900/3239]	Time 0.244 (0.587)	Data Time 0.001 (0.020)	Loss 2.8107 (2.7215)	Entropy 1.18646 (1.18942)	Top-1 acc 60.156 (58.568)	Top-5 acc 77.734 (79.991)	lr 0.01649
Train [48][1910/3239]	Time 0.203 (0.586)	Data Time 0.001 (0.020)	Loss 2.7737 (2.7214)	Entropy 1.18647 (1.18940)	Top-1 acc 57.031 (58.566)	Top-5 acc 76.953 (79.989)	lr 0.01649
Train [48][1920/3239]	Time 0.211 (0.585)	Data Time 0.001 (0.020)	Loss 2.5205 (2.7216)	Entropy 1.18645 (1.18939)	Top-1 acc 63.672 (58.558)	Top-5 acc 83.984 (79.985)	lr 0.01649
Train [48][1930/3239]	Time 0.227 (0.585)	Data Time 0.001 (0.020)	Loss 2.8189 (2.7216)	Entropy 1.18644 (1.18937)	Top-1 acc 54.297 (58.552)	Top-5 acc 77.344 (79.981)	lr 0.01649
Train [48][1940/3239]	Time 0.222 (0.584)	Data Time 0.001 (0.020)	Loss 2.7983 (2.7220)	Entropy 1.18636 (1.18936)	Top-1 acc 53.906 (58.540)	Top-5 acc 80.859 (79.978)	lr 0.01649
Train [48][1950/3239]	Time 0.334 (0.583)	Data Time 0.001 (0.020)	Loss 2.6530 (2.7224)	Entropy 1.18632 (1.18934)	Top-1 acc 60.156 (58.531)	Top-5 acc 83.203 (79.971)	lr 0.01649
Train [48][1960/3239]	Time 0.212 (0.583)	Data Time 0.001 (0.020)	Loss 2.6583 (2.7227)	Entropy 1.18628 (1.18933)	Top-1 acc 58.203 (58.522)	Top-5 acc 78.125 (79.965)	lr 0.01649
Train [48][1970/3239]	Time 0.236 (0.582)	Data Time 0.001 (0.020)	Loss 2.6420 (2.7227)	Entropy 1.18624 (1.18931)	Top-1 acc 62.109 (58.524)	Top-5 acc 83.594 (79.962)	lr 0.01648
Train [48][1980/3239]	Time 0.240 (0.582)	Data Time 0.001 (0.019)	Loss 2.6985 (2.7229)	Entropy 1.18619 (1.18930)	Top-1 acc 57.812 (58.520)	Top-5 acc 79.688 (79.961)	lr 0.01648
Train [48][1990/3239]	Time 2.411 (0.581)	Data Time 0.001 (0.019)	Loss 2.7977 (2.7233)	Entropy 1.18619 (1.18928)	Top-1 acc 57.422 (58.515)	Top-5 acc 79.297 (79.955)	lr 0.01648
Train [48][2000/3239]	Time 0.318 (0.579)	Data Time 0.001 (0.019)	Loss 2.7487 (2.7232)	Entropy 1.18616 (1.18927)	Top-1 acc 57.812 (58.514)	Top-5 acc 78.906 (79.957)	lr 0.01648
Train [48][2010/3239]	Time 0.216 (0.579)	Data Time 0.001 (0.019)	Loss 2.8485 (2.7233)	Entropy 1.18614 (1.18925)	Top-1 acc 58.203 (58.513)	Top-5 acc 75.000 (79.954)	lr 0.01648
Train [48][2020/3239]	Time 0.248 (0.578)	Data Time 0.001 (0.019)	Loss 2.7023 (2.7231)	Entropy 1.18614 (1.18923)	Top-1 acc 58.984 (58.521)	Top-5 acc 80.859 (79.957)	lr 0.01648
Train [48][2030/3239]	Time 0.228 (0.577)	Data Time 0.001 (0.019)	Loss 2.9307 (2.7232)	Entropy 1.18615 (1.18922)	Top-1 acc 52.344 (58.522)	Top-5 acc 78.125 (79.956)	lr 0.01648
Train [48][2040/3239]	Time 0.239 (0.577)	Data Time 0.001 (0.019)	Loss 2.7164 (2.7231)	Entropy 1.18611 (1.18920)	Top-1 acc 58.594 (58.524)	Top-5 acc 80.859 (79.959)	lr 0.01648
Train [48][2050/3239]	Time 0.180 (0.576)	Data Time 0.001 (0.019)	Loss 2.7713 (2.7231)	Entropy 1.18600 (1.18919)	Top-1 acc 58.984 (58.521)	Top-5 acc 76.953 (79.957)	lr 0.01648
Train [48][2060/3239]	Time 0.225 (0.576)	Data Time 0.001 (0.019)	Loss 2.7573 (2.7234)	Entropy 1.18598 (1.18917)	Top-1 acc 58.203 (58.513)	Top-5 acc 78.906 (79.953)	lr 0.01648
Train [48][2070/3239]	Time 0.216 (0.575)	Data Time 0.002 (0.019)	Loss 2.4554 (2.7234)	Entropy 1.18595 (1.18916)	Top-1 acc 64.062 (58.512)	Top-5 acc 85.938 (79.955)	lr 0.01647
Train [48][2080/3239]	Time 0.207 (0.575)	Data Time 0.001 (0.019)	Loss 2.5252 (2.7232)	Entropy 1.18593 (1.18914)	Top-1 acc 62.891 (58.515)	Top-5 acc 83.203 (79.959)	lr 0.01647
Train [48][2090/3239]	Time 0.217 (0.574)	Data Time 0.001 (0.019)	Loss 2.5915 (2.7229)	Entropy 1.18581 (1.18913)	Top-1 acc 63.281 (58.519)	Top-5 acc 83.984 (79.966)	lr 0.01647
Train [48][2100/3239]	Time 2.506 (0.574)	Data Time 0.001 (0.018)	Loss 2.6479 (2.7231)	Entropy 1.18581 (1.18911)	Top-1 acc 58.984 (58.520)	Top-5 acc 82.031 (79.962)	lr 0.01647
Train [48][2110/3239]	Time 0.221 (0.572)	Data Time 0.001 (0.018)	Loss 2.6846 (2.7229)	Entropy 1.18589 (1.18910)	Top-1 acc 58.203 (58.522)	Top-5 acc 79.297 (79.964)	lr 0.01647
Train [48][2120/3239]	Time 0.229 (0.571)	Data Time 0.001 (0.018)	Loss 2.6301 (2.7229)	Entropy 1.18585 (1.18908)	Top-1 acc 57.031 (58.518)	Top-5 acc 82.031 (79.964)	lr 0.01647
Train [48][2130/3239]	Time 0.331 (0.594)	Data Time 0.002 (0.018)	Loss 2.6970 (2.7232)	Entropy 1.18583 (1.18907)	Top-1 acc 56.641 (58.511)	Top-5 acc 81.641 (79.954)	lr 0.01647
Train [48][2140/3239]	Time 0.316 (0.594)	Data Time 0.002 (0.018)	Loss 2.5883 (2.7231)	Entropy 1.18582 (1.18905)	Top-1 acc 59.375 (58.509)	Top-5 acc 83.203 (79.955)	lr 0.01647
Train [48][2150/3239]	Time 0.240 (0.593)	Data Time 0.001 (0.018)	Loss 2.9541 (2.7233)	Entropy 1.18577 (1.18904)	Top-1 acc 52.734 (58.503)	Top-5 acc 76.172 (79.952)	lr 0.01647
Train [48][2160/3239]	Time 0.238 (0.592)	Data Time 0.002 (0.018)	Loss 2.5850 (2.7231)	Entropy 1.18572 (1.18902)	Top-1 acc 61.719 (58.508)	Top-5 acc 84.375 (79.956)	lr 0.01647
Train [48][2170/3239]	Time 0.218 (0.592)	Data Time 0.001 (0.018)	Loss 2.8365 (2.7232)	Entropy 1.18571 (1.18900)	Top-1 acc 57.812 (58.509)	Top-5 acc 75.000 (79.954)	lr 0.01647
Train [48][2180/3239]	Time 0.213 (0.591)	Data Time 0.001 (0.018)	Loss 2.6827 (2.7233)	Entropy 1.18566 (1.18899)	Top-1 acc 63.281 (58.509)	Top-5 acc 76.953 (79.948)	lr 0.01646
Train [48][2190/3239]	Time 0.320 (0.591)	Data Time 0.001 (0.018)	Loss 2.8814 (2.7236)	Entropy 1.18565 (1.18897)	Top-1 acc 53.516 (58.500)	Top-5 acc 77.344 (79.944)	lr 0.01646
Train [48][2200/3239]	Time 0.234 (0.590)	Data Time 0.001 (0.018)	Loss 2.6442 (2.7235)	Entropy 1.18563 (1.18896)	Top-1 acc 60.938 (58.504)	Top-5 acc 81.641 (79.944)	lr 0.01646
Train [48][2210/3239]	Time 2.532 (0.589)	Data Time 0.001 (0.018)	Loss 2.8094 (2.7236)	Entropy 1.18563 (1.18894)	Top-1 acc 58.984 (58.505)	Top-5 acc 78.125 (79.944)	lr 0.01646
Train [48][2220/3239]	Time 0.237 (0.588)	Data Time 0.001 (0.018)	Loss 2.7093 (2.7237)	Entropy 1.18560 (1.18893)	Top-1 acc 61.328 (58.506)	Top-5 acc 81.250 (79.943)	lr 0.01646
Train [48][2230/3239]	Time 0.218 (0.587)	Data Time 0.001 (0.017)	Loss 2.6013 (2.7234)	Entropy 1.18558 (1.18891)	Top-1 acc 61.328 (58.510)	Top-5 acc 80.859 (79.948)	lr 0.01646
Train [48][2240/3239]	Time 0.346 (0.587)	Data Time 0.001 (0.017)	Loss 2.8704 (2.7239)	Entropy 1.18552 (1.18890)	Top-1 acc 55.469 (58.499)	Top-5 acc 76.172 (79.937)	lr 0.01646
Train [48][2250/3239]	Time 0.228 (0.586)	Data Time 0.001 (0.017)	Loss 2.7694 (2.7236)	Entropy 1.18548 (1.18888)	Top-1 acc 57.422 (58.507)	Top-5 acc 76.953 (79.941)	lr 0.01646
Train [48][2260/3239]	Time 0.209 (0.586)	Data Time 0.001 (0.017)	Loss 2.6492 (2.7238)	Entropy 1.18542 (1.18887)	Top-1 acc 63.672 (58.499)	Top-5 acc 80.859 (79.933)	lr 0.01646
Train [48][2270/3239]	Time 0.244 (0.585)	Data Time 0.001 (0.017)	Loss 2.7837 (2.7240)	Entropy 1.18540 (1.18885)	Top-1 acc 58.594 (58.492)	Top-5 acc 78.516 (79.929)	lr 0.01646
Train [48][2280/3239]	Time 0.216 (0.584)	Data Time 0.001 (0.017)	Loss 2.4609 (2.7239)	Entropy 1.18536 (1.18884)	Top-1 acc 64.844 (58.497)	Top-5 acc 84.766 (79.927)	lr 0.01645
Train [48][2290/3239]	Time 0.355 (0.584)	Data Time 0.001 (0.017)	Loss 2.7394 (2.7239)	Entropy 1.18532 (1.18882)	Top-1 acc 57.422 (58.496)	Top-5 acc 80.078 (79.927)	lr 0.01645
Train [48][2300/3239]	Time 0.242 (0.583)	Data Time 0.001 (0.017)	Loss 2.6584 (2.7242)	Entropy 1.18532 (1.18881)	Top-1 acc 59.766 (58.491)	Top-5 acc 80.078 (79.925)	lr 0.01645
Train [48][2310/3239]	Time 0.221 (0.583)	Data Time 0.001 (0.017)	Loss 2.9307 (2.7243)	Entropy 1.18531 (1.18879)	Top-1 acc 55.859 (58.488)	Top-5 acc 78.516 (79.924)	lr 0.01645
Train [48][2320/3239]	Time 2.385 (0.582)	Data Time 0.001 (0.017)	Loss 2.7541 (2.7244)	Entropy 1.18531 (1.18878)	Top-1 acc 57.031 (58.484)	Top-5 acc 79.297 (79.919)	lr 0.01645
Train [48][2330/3239]	Time 0.254 (0.581)	Data Time 0.001 (0.017)	Loss 2.8674 (2.7248)	Entropy 1.18525 (1.18876)	Top-1 acc 54.297 (58.472)	Top-5 acc 77.734 (79.909)	lr 0.01645
Train [48][2340/3239]	Time 0.230 (0.580)	Data Time 0.001 (0.017)	Loss 2.6659 (2.7247)	Entropy 1.18527 (1.18875)	Top-1 acc 61.719 (58.475)	Top-5 acc 81.641 (79.910)	lr 0.01645
Train [48][2350/3239]	Time 0.258 (0.580)	Data Time 0.002 (0.017)	Loss 2.6276 (2.7247)	Entropy 1.18522 (1.18873)	Top-1 acc 58.984 (58.476)	Top-5 acc 84.375 (79.912)	lr 0.01645
Train [48][2360/3239]	Time 0.213 (0.579)	Data Time 0.001 (0.017)	Loss 2.7691 (2.7251)	Entropy 1.18519 (1.18872)	Top-1 acc 57.812 (58.468)	Top-5 acc 78.906 (79.902)	lr 0.01645
Train [48][2370/3239]	Time 0.220 (0.579)	Data Time 0.001 (0.017)	Loss 2.4656 (2.7249)	Entropy 1.18516 (1.18870)	Top-1 acc 64.453 (58.470)	Top-5 acc 85.547 (79.905)	lr 0.01645
Train [48][2380/3239]	Time 0.217 (0.578)	Data Time 0.001 (0.016)	Loss 2.5937 (2.7252)	Entropy 1.18513 (1.18869)	Top-1 acc 60.547 (58.467)	Top-5 acc 82.422 (79.898)	lr 0.01645
Train [48][2390/3239]	Time 0.220 (0.578)	Data Time 0.001 (0.016)	Loss 2.8148 (2.7254)	Entropy 1.18512 (1.18867)	Top-1 acc 58.203 (58.464)	Top-5 acc 77.344 (79.894)	lr 0.01644
Train [48][2400/3239]	Time 0.226 (0.577)	Data Time 0.001 (0.016)	Loss 2.7848 (2.7255)	Entropy 1.18509 (1.18866)	Top-1 acc 57.422 (58.461)	Top-5 acc 78.516 (79.891)	lr 0.01644
Train [48][2410/3239]	Time 0.236 (0.577)	Data Time 0.001 (0.016)	Loss 2.7582 (2.7256)	Entropy 1.18506 (1.18864)	Top-1 acc 58.203 (58.460)	Top-5 acc 80.859 (79.892)	lr 0.01644
Train [48][2420/3239]	Time 0.283 (0.577)	Data Time 0.001 (0.016)	Loss 2.7031 (2.7257)	Entropy 1.18505 (1.18863)	Top-1 acc 57.422 (58.456)	Top-5 acc 80.469 (79.893)	lr 0.01644
Train [48][2430/3239]	Time 2.445 (0.576)	Data Time 0.001 (0.016)	Loss 2.9075 (2.7259)	Entropy 1.18505 (1.18861)	Top-1 acc 54.688 (58.449)	Top-5 acc 77.344 (79.889)	lr 0.01644
Train [48][2440/3239]	Time 0.225 (0.575)	Data Time 0.001 (0.016)	Loss 2.6361 (2.7259)	Entropy 1.18493 (1.18860)	Top-1 acc 60.156 (58.447)	Top-5 acc 80.859 (79.889)	lr 0.01644
Train [48][2450/3239]	Time 0.237 (0.574)	Data Time 0.002 (0.016)	Loss 2.5790 (2.7260)	Entropy 1.18495 (1.18858)	Top-1 acc 59.375 (58.440)	Top-5 acc 82.422 (79.887)	lr 0.01644
Train [48][2460/3239]	Time 0.236 (0.574)	Data Time 0.001 (0.016)	Loss 2.7175 (2.7261)	Entropy 1.18479 (1.18857)	Top-1 acc 58.203 (58.438)	Top-5 acc 80.859 (79.888)	lr 0.01644
Train [48][2470/3239]	Time 0.208 (0.573)	Data Time 0.001 (0.016)	Loss 2.5278 (2.7260)	Entropy 1.18478 (1.18855)	Top-1 acc 63.281 (58.440)	Top-5 acc 83.984 (79.890)	lr 0.01644
Train [48][2480/3239]	Time 0.289 (0.573)	Data Time 0.001 (0.016)	Loss 2.7721 (2.7261)	Entropy 1.18480 (1.18854)	Top-1 acc 55.078 (58.433)	Top-5 acc 79.688 (79.891)	lr 0.01644
Train [48][2490/3239]	Time 0.324 (0.591)	Data Time 0.004 (0.016)	Loss 2.7404 (2.7258)	Entropy 1.18476 (1.18852)	Top-1 acc 58.203 (58.433)	Top-5 acc 81.641 (79.901)	lr 0.01643
Train [48][2500/3239]	Time 0.229 (0.591)	Data Time 0.002 (0.016)	Loss 2.7340 (2.7259)	Entropy 1.18477 (1.18851)	Top-1 acc 57.812 (58.429)	Top-5 acc 77.734 (79.896)	lr 0.01643
Train [48][2510/3239]	Time 0.257 (0.590)	Data Time 0.002 (0.016)	Loss 2.9449 (2.7261)	Entropy 1.18471 (1.18849)	Top-1 acc 55.469 (58.427)	Top-5 acc 76.172 (79.892)	lr 0.01643
Train [48][2520/3239]	Time 0.250 (0.590)	Data Time 0.001 (0.016)	Loss 2.7186 (2.7262)	Entropy 1.18466 (1.18848)	Top-1 acc 56.250 (58.420)	Top-5 acc 79.297 (79.889)	lr 0.01643
Train [48][2530/3239]	Time 0.312 (0.589)	Data Time 0.001 (0.016)	Loss 2.7796 (2.7260)	Entropy 1.18463 (1.18846)	Top-1 acc 57.812 (58.426)	Top-5 acc 81.641 (79.896)	lr 0.01643
Train [48][2540/3239]	Time 2.468 (0.589)	Data Time 0.001 (0.016)	Loss 2.7896 (2.7259)	Entropy 1.18463 (1.18845)	Top-1 acc 55.078 (58.424)	Top-5 acc 80.859 (79.898)	lr 0.01643
Train [48][2550/3239]	Time 0.233 (0.587)	Data Time 0.001 (0.015)	Loss 2.8695 (2.7258)	Entropy 1.18461 (1.18843)	Top-1 acc 53.906 (58.427)	Top-5 acc 78.516 (79.899)	lr 0.01643
Train [48][2560/3239]	Time 0.225 (0.587)	Data Time 0.001 (0.015)	Loss 2.6772 (2.7260)	Entropy 1.18460 (1.18842)	Top-1 acc 63.281 (58.426)	Top-5 acc 80.078 (79.897)	lr 0.01643
Train [48][2570/3239]	Time 0.223 (0.586)	Data Time 0.001 (0.015)	Loss 2.8050 (2.7260)	Entropy 1.18460 (1.18840)	Top-1 acc 55.859 (58.425)	Top-5 acc 77.734 (79.895)	lr 0.01643
Train [48][2580/3239]	Time 0.302 (0.586)	Data Time 0.001 (0.015)	Loss 2.7205 (2.7261)	Entropy 1.18455 (1.18839)	Top-1 acc 61.719 (58.423)	Top-5 acc 80.469 (79.890)	lr 0.01643
Train [48][2590/3239]	Time 0.227 (0.586)	Data Time 0.001 (0.015)	Loss 2.8966 (2.7261)	Entropy 1.18449 (1.18837)	Top-1 acc 55.859 (58.425)	Top-5 acc 77.734 (79.888)	lr 0.01643
Train [48][2600/3239]	Time 0.213 (0.585)	Data Time 0.001 (0.015)	Loss 2.8169 (2.7263)	Entropy 1.18448 (1.18836)	Top-1 acc 57.422 (58.421)	Top-5 acc 77.344 (79.887)	lr 0.01642
Train [48][2610/3239]	Time 0.225 (0.584)	Data Time 0.001 (0.015)	Loss 2.6894 (2.7263)	Entropy 1.18447 (1.18834)	Top-1 acc 60.156 (58.423)	Top-5 acc 82.812 (79.888)	lr 0.01642
Train [48][2620/3239]	Time 0.195 (0.584)	Data Time 0.001 (0.015)	Loss 2.7206 (2.7259)	Entropy 1.18436 (1.18833)	Top-1 acc 57.031 (58.432)	Top-5 acc 80.078 (79.897)	lr 0.01642
Train [48][2630/3239]	Time 0.221 (0.584)	Data Time 0.001 (0.015)	Loss 2.7038 (2.7260)	Entropy 1.18426 (1.18831)	Top-1 acc 60.938 (58.433)	Top-5 acc 82.031 (79.895)	lr 0.01642
Train [48][2640/3239]	Time 0.212 (0.583)	Data Time 0.001 (0.015)	Loss 2.7180 (2.7261)	Entropy 1.18422 (1.18830)	Top-1 acc 57.031 (58.430)	Top-5 acc 80.469 (79.892)	lr 0.01642
Train [48][2650/3239]	Time 0.222 (0.583)	Data Time 0.001 (0.015)	Loss 2.8244 (2.7259)	Entropy 1.18422 (1.18828)	Top-1 acc 53.516 (58.437)	Top-5 acc 78.125 (79.895)	lr 0.01642
Train [48][2660/3239]	Time 0.211 (0.582)	Data Time 0.001 (0.015)	Loss 2.6184 (2.7260)	Entropy 1.18414 (1.18827)	Top-1 acc 59.375 (58.435)	Top-5 acc 82.031 (79.894)	lr 0.01642
Train [48][2670/3239]	Time 0.234 (0.582)	Data Time 0.002 (0.015)	Loss 2.8691 (2.7260)	Entropy 1.18410 (1.18825)	Top-1 acc 56.250 (58.436)	Top-5 acc 75.781 (79.892)	lr 0.01642
Train [48][2680/3239]	Time 0.213 (0.581)	Data Time 0.001 (0.015)	Loss 2.7351 (2.7265)	Entropy 1.18409 (1.18824)	Top-1 acc 57.812 (58.424)	Top-5 acc 80.859 (79.886)	lr 0.01642
Train [48][2690/3239]	Time 0.242 (0.581)	Data Time 0.001 (0.015)	Loss 2.6944 (2.7264)	Entropy 1.18402 (1.18822)	Top-1 acc 57.422 (58.426)	Top-5 acc 83.594 (79.891)	lr 0.01642
Train [48][2700/3239]	Time 0.217 (0.580)	Data Time 0.001 (0.015)	Loss 2.6876 (2.7263)	Entropy 1.18400 (1.18821)	Top-1 acc 60.547 (58.432)	Top-5 acc 81.250 (79.893)	lr 0.01641
Train [48][2710/3239]	Time 0.235 (0.580)	Data Time 0.001 (0.015)	Loss 2.7446 (2.7261)	Entropy 1.18396 (1.18819)	Top-1 acc 55.469 (58.435)	Top-5 acc 80.859 (79.897)	lr 0.01641
Train [48][2720/3239]	Time 0.281 (0.579)	Data Time 0.001 (0.015)	Loss 2.7901 (2.7260)	Entropy 1.18396 (1.18817)	Top-1 acc 57.031 (58.439)	Top-5 acc 75.000 (79.901)	lr 0.01641
Train [48][2730/3239]	Time 0.232 (0.579)	Data Time 0.001 (0.015)	Loss 2.6749 (2.7261)	Entropy 1.18392 (1.18816)	Top-1 acc 60.547 (58.436)	Top-5 acc 81.641 (79.897)	lr 0.01641
Train [48][2740/3239]	Time 0.278 (0.578)	Data Time 0.001 (0.015)	Loss 2.6888 (2.7261)	Entropy 1.18377 (1.18814)	Top-1 acc 60.156 (58.434)	Top-5 acc 79.297 (79.897)	lr 0.01641
Train [48][2750/3239]	Time 0.258 (0.578)	Data Time 0.001 (0.014)	Loss 2.9095 (2.7263)	Entropy 1.18376 (1.18813)	Top-1 acc 53.906 (58.428)	Top-5 acc 76.953 (79.893)	lr 0.01641
Train [48][2760/3239]	Time 0.239 (0.578)	Data Time 0.001 (0.014)	Loss 2.6364 (2.7264)	Entropy 1.18368 (1.18811)	Top-1 acc 60.156 (58.426)	Top-5 acc 80.859 (79.888)	lr 0.01641
Train [48][2770/3239]	Time 0.264 (0.577)	Data Time 0.002 (0.014)	Loss 2.7051 (2.7265)	Entropy 1.18363 (1.18810)	Top-1 acc 59.766 (58.423)	Top-5 acc 80.859 (79.889)	lr 0.01641
Train [48][2780/3239]	Time 0.222 (0.577)	Data Time 0.001 (0.014)	Loss 2.8415 (2.7265)	Entropy 1.18362 (1.18808)	Top-1 acc 53.906 (58.423)	Top-5 acc 77.344 (79.891)	lr 0.01641
Train [48][2790/3239]	Time 0.224 (0.577)	Data Time 0.001 (0.014)	Loss 2.8426 (2.7267)	Entropy 1.18339 (1.18806)	Top-1 acc 54.688 (58.421)	Top-5 acc 74.609 (79.887)	lr 0.01641
Train [48][2800/3239]	Time 0.277 (0.576)	Data Time 0.001 (0.014)	Loss 2.8287 (2.7268)	Entropy 1.18336 (1.18805)	Top-1 acc 57.422 (58.422)	Top-5 acc 79.297 (79.886)	lr 0.01640
Train [48][2810/3239]	Time 0.260 (0.576)	Data Time 0.001 (0.014)	Loss 2.8938 (2.7269)	Entropy 1.18331 (1.18803)	Top-1 acc 56.641 (58.420)	Top-5 acc 79.297 (79.885)	lr 0.01640
Train [48][2820/3239]	Time 0.239 (0.575)	Data Time 0.001 (0.014)	Loss 2.8563 (2.7272)	Entropy 1.18326 (1.18801)	Top-1 acc 57.812 (58.416)	Top-5 acc 78.125 (79.878)	lr 0.01640
Train [48][2830/3239]	Time 0.432 (0.592)	Data Time 0.005 (0.014)	Loss 2.7214 (2.7271)	Entropy 1.18318 (1.18800)	Top-1 acc 57.031 (58.417)	Top-5 acc 82.031 (79.877)	lr 0.01640
Train [48][2840/3239]	Time 0.220 (0.591)	Data Time 0.002 (0.014)	Loss 2.8002 (2.7272)	Entropy 1.18292 (1.18798)	Top-1 acc 55.859 (58.416)	Top-5 acc 78.125 (79.877)	lr 0.01640
Train [48][2850/3239]	Time 0.227 (0.591)	Data Time 0.002 (0.014)	Loss 2.6932 (2.7272)	Entropy 1.18292 (1.18796)	Top-1 acc 58.984 (58.416)	Top-5 acc 80.469 (79.876)	lr 0.01640
Train [48][2860/3239]	Time 0.286 (0.590)	Data Time 0.001 (0.014)	Loss 2.8279 (2.7274)	Entropy 1.18293 (1.18794)	Top-1 acc 53.125 (58.408)	Top-5 acc 78.906 (79.874)	lr 0.01640
Train [48][2870/3239]	Time 0.229 (0.590)	Data Time 0.002 (0.014)	Loss 2.6133 (2.7272)	Entropy 1.18292 (1.18793)	Top-1 acc 62.500 (58.411)	Top-5 acc 81.641 (79.875)	lr 0.01640
Train [48][2880/3239]	Time 0.217 (0.590)	Data Time 0.001 (0.014)	Loss 2.6780 (2.7273)	Entropy 1.18279 (1.18791)	Top-1 acc 58.984 (58.408)	Top-5 acc 80.859 (79.873)	lr 0.01640
Train [48][2890/3239]	Time 0.334 (0.589)	Data Time 0.003 (0.014)	Loss 2.8815 (2.7275)	Entropy 1.18278 (1.18789)	Top-1 acc 55.078 (58.404)	Top-5 acc 76.172 (79.869)	lr 0.01640
Train [48][2900/3239]	Time 0.307 (0.589)	Data Time 0.001 (0.014)	Loss 2.8693 (2.7277)	Entropy 1.18277 (1.18787)	Top-1 acc 57.422 (58.400)	Top-5 acc 76.172 (79.865)	lr 0.01640
Train [48][2910/3239]	Time 0.247 (0.588)	Data Time 0.001 (0.014)	Loss 2.9245 (2.7277)	Entropy 1.18276 (1.18786)	Top-1 acc 54.297 (58.399)	Top-5 acc 75.781 (79.864)	lr 0.01639
Train [48][2920/3239]	Time 0.325 (0.588)	Data Time 0.001 (0.014)	Loss 2.6939 (2.7275)	Entropy 1.18275 (1.18784)	Top-1 acc 60.156 (58.404)	Top-5 acc 78.516 (79.867)	lr 0.01639
Train [48][2930/3239]	Time 0.230 (0.587)	Data Time 0.001 (0.014)	Loss 2.6931 (2.7275)	Entropy 1.18269 (1.18782)	Top-1 acc 64.844 (58.405)	Top-5 acc 76.953 (79.862)	lr 0.01639
Train [48][2940/3239]	Time 0.227 (0.587)	Data Time 0.001 (0.014)	Loss 3.0100 (2.7276)	Entropy 1.18267 (1.18780)	Top-1 acc 53.125 (58.406)	Top-5 acc 75.000 (79.865)	lr 0.01639
Train [48][2950/3239]	Time 0.226 (0.587)	Data Time 0.002 (0.014)	Loss 2.7323 (2.7277)	Entropy 1.18266 (1.18779)	Top-1 acc 54.297 (58.403)	Top-5 acc 80.078 (79.863)	lr 0.01639
Train [48][2960/3239]	Time 0.218 (0.586)	Data Time 0.001 (0.014)	Loss 2.6339 (2.7275)	Entropy 1.18265 (1.18777)	Top-1 acc 61.719 (58.406)	Top-5 acc 80.078 (79.867)	lr 0.01639
Train [48][2970/3239]	Time 0.298 (0.586)	Data Time 0.001 (0.014)	Loss 2.7205 (2.7275)	Entropy 1.18257 (1.18775)	Top-1 acc 55.078 (58.403)	Top-5 acc 82.422 (79.866)	lr 0.01639
Train [48][2980/3239]	Time 0.243 (0.585)	Data Time 0.001 (0.013)	Loss 2.8008 (2.7275)	Entropy 1.18249 (1.18773)	Top-1 acc 55.859 (58.407)	Top-5 acc 80.469 (79.865)	lr 0.01639
Train [48][2990/3239]	Time 0.275 (0.585)	Data Time 0.001 (0.013)	Loss 2.7742 (2.7274)	Entropy 1.18260 (1.18772)	Top-1 acc 53.125 (58.408)	Top-5 acc 79.297 (79.868)	lr 0.01639
Train [48][3000/3239]	Time 0.291 (0.585)	Data Time 0.001 (0.013)	Loss 2.8471 (2.7274)	Entropy 1.18261 (1.18770)	Top-1 acc 55.078 (58.408)	Top-5 acc 78.906 (79.869)	lr 0.01639
Train [48][3010/3239]	Time 0.272 (0.584)	Data Time 0.001 (0.013)	Loss 2.7399 (2.7272)	Entropy 1.18258 (1.18768)	Top-1 acc 61.328 (58.412)	Top-5 acc 79.297 (79.872)	lr 0.01638
Train [48][3020/3239]	Time 0.325 (0.584)	Data Time 0.001 (0.013)	Loss 2.8735 (2.7274)	Entropy 1.18257 (1.18767)	Top-1 acc 54.688 (58.414)	Top-5 acc 75.391 (79.868)	lr 0.01638
Train [48][3030/3239]	Time 0.161 (0.583)	Data Time 0.001 (0.013)	Loss 2.6773 (2.7274)	Entropy 1.18257 (1.18765)	Top-1 acc 58.594 (58.410)	Top-5 acc 79.688 (79.868)	lr 0.01638
Train [48][3040/3239]	Time 0.240 (0.583)	Data Time 0.001 (0.013)	Loss 2.9216 (2.7276)	Entropy 1.18256 (1.18763)	Top-1 acc 53.906 (58.406)	Top-5 acc 75.000 (79.864)	lr 0.01638
Train [48][3050/3239]	Time 0.228 (0.583)	Data Time 0.001 (0.013)	Loss 2.3778 (2.7275)	Entropy 1.18255 (1.18762)	Top-1 acc 67.969 (58.407)	Top-5 acc 83.984 (79.863)	lr 0.01638
Train [48][3060/3239]	Time 0.228 (0.582)	Data Time 0.001 (0.013)	Loss 2.7395 (2.7274)	Entropy 1.18253 (1.18760)	Top-1 acc 58.984 (58.408)	Top-5 acc 80.469 (79.866)	lr 0.01638
Train [48][3070/3239]	Time 0.259 (0.582)	Data Time 0.001 (0.013)	Loss 2.5988 (2.7275)	Entropy 1.18288 (1.18758)	Top-1 acc 57.812 (58.408)	Top-5 acc 82.031 (79.861)	lr 0.01638
Train [48][3080/3239]	Time 0.228 (0.581)	Data Time 0.001 (0.013)	Loss 2.7267 (2.7274)	Entropy 1.18280 (1.18757)	Top-1 acc 56.641 (58.409)	Top-5 acc 77.734 (79.866)	lr 0.01638
Train [48][3090/3239]	Time 0.230 (0.581)	Data Time 0.002 (0.013)	Loss 2.6587 (2.7272)	Entropy 1.18279 (1.18755)	Top-1 acc 63.672 (58.409)	Top-5 acc 82.422 (79.869)	lr 0.01638
Train [48][3100/3239]	Time 0.220 (0.581)	Data Time 0.001 (0.013)	Loss 2.6780 (2.7273)	Entropy 1.18275 (1.18754)	Top-1 acc 59.375 (58.408)	Top-5 acc 79.688 (79.869)	lr 0.01638
Train [48][3110/3239]	Time 0.221 (0.580)	Data Time 0.001 (0.013)	Loss 2.8699 (2.7273)	Entropy 1.18269 (1.18752)	Top-1 acc 55.859 (58.407)	Top-5 acc 76.953 (79.870)	lr 0.01638
Train [48][3120/3239]	Time 0.215 (0.580)	Data Time 0.001 (0.013)	Loss 2.8723 (2.7273)	Entropy 1.18268 (1.18751)	Top-1 acc 55.078 (58.407)	Top-5 acc 76.562 (79.868)	lr 0.01637
Train [48][3130/3239]	Time 0.213 (0.579)	Data Time 0.001 (0.013)	Loss 2.6163 (2.7275)	Entropy 1.18265 (1.18749)	Top-1 acc 59.766 (58.400)	Top-5 acc 82.031 (79.865)	lr 0.01637
Train [48][3140/3239]	Time 0.196 (0.579)	Data Time 0.001 (0.013)	Loss 2.7164 (2.7274)	Entropy 1.18263 (1.18748)	Top-1 acc 61.328 (58.404)	Top-5 acc 78.906 (79.866)	lr 0.01637
Train [48][3150/3239]	Time 0.220 (0.579)	Data Time 0.001 (0.013)	Loss 2.9556 (2.7275)	Entropy 1.18262 (1.18746)	Top-1 acc 55.078 (58.399)	Top-5 acc 75.781 (79.865)	lr 0.01637
Train [48][3160/3239]	Time 0.279 (0.593)	Data Time 0.004 (0.013)	Loss 2.8037 (2.7275)	Entropy 1.18260 (1.18744)	Top-1 acc 55.078 (58.400)	Top-5 acc 78.516 (79.867)	lr 0.01637
Train [48][3170/3239]	Time 0.323 (0.593)	Data Time 0.002 (0.013)	Loss 2.8771 (2.7278)	Entropy 1.18256 (1.18743)	Top-1 acc 57.422 (58.393)	Top-5 acc 78.125 (79.864)	lr 0.01637
Train [48][3180/3239]	Time 0.211 (0.592)	Data Time 0.000 (0.013)	Loss 2.7964 (2.7278)	Entropy 1.18250 (1.18741)	Top-1 acc 57.422 (58.396)	Top-5 acc 77.344 (79.862)	lr 0.01637
Train [48][3190/3239]	Time 0.217 (0.592)	Data Time 0.000 (0.013)	Loss 2.7602 (2.7278)	Entropy 1.18249 (1.18740)	Top-1 acc 54.688 (58.394)	Top-5 acc 80.469 (79.861)	lr 0.01637
Train [48][3200/3239]	Time 0.221 (0.591)	Data Time 0.000 (0.013)	Loss 2.9658 (2.7278)	Entropy 1.18247 (1.18738)	Top-1 acc 58.203 (58.394)	Top-5 acc 74.219 (79.863)	lr 0.01637
Train [48][3210/3239]	Time 0.215 (0.591)	Data Time 0.000 (0.013)	Loss 2.5212 (2.7281)	Entropy 1.18231 (1.18737)	Top-1 acc 63.281 (58.389)	Top-5 acc 84.766 (79.858)	lr 0.01637
Train [48][3220/3239]	Time 0.217 (0.590)	Data Time 0.000 (0.013)	Loss 2.6745 (2.7282)	Entropy 1.18229 (1.18735)	Top-1 acc 58.594 (58.386)	Top-5 acc 82.812 (79.856)	lr 0.01636
Train [48][3230/3239]	Time 0.225 (0.590)	Data Time 0.000 (0.013)	Loss 2.7503 (2.7285)	Entropy 1.18219 (1.18734)	Top-1 acc 62.109 (58.380)	Top-5 acc 76.172 (79.851)	lr 0.01636
Train [48][3239/3239]	Time 2.260 (0.589)	Data Time 0.000 (0.013)	Loss 3.0334 (2.7286)	Entropy 1.18219 (1.18732)	Top-1 acc 58.025 (58.380)	Top-5 acc 71.605 (79.849)	lr 0.01636
==========Valid [48/120]	loss 1.575	top-1 acc 64.525 (64.525)	top-5 acc 84.861	Train top-1 58.380	top-5 79.849	Entropy 1.18219	Latency-None: 0.000ms	Flops: 548.34M
Train [49][0/3239]	Time 33.903 (33.903)	Data Time 33.233 (33.233)	Loss 2.7109 (2.7109)	Entropy 1.18211 (1.18211)	Top-1 acc 59.766 (59.766)	Top-5 acc 80.078 (80.078)	lr 0.01636
Train [49][10/3239]	Time 2.871 (3.717)	Data Time 0.002 (3.064)	Loss 2.6994 (2.7183)	Entropy 1.18211 (1.18211)	Top-1 acc 61.719 (59.091)	Top-5 acc 79.297 (80.043)	lr 0.01636
Train [49][20/3239]	Time 0.237 (2.067)	Data Time 0.001 (1.606)	Loss 3.0888 (2.7283)	Entropy 1.18210 (1.18211)	Top-1 acc 50.391 (58.519)	Top-5 acc 72.656 (79.929)	lr 0.01636
Train [49][30/3239]	Time 0.312 (1.556)	Data Time 0.001 (1.088)	Loss 2.7841 (2.7153)	Entropy 1.18208 (1.18210)	Top-1 acc 59.766 (59.035)	Top-5 acc 80.078 (80.053)	lr 0.01636
Train [49][40/3239]	Time 0.217 (1.287)	Data Time 0.001 (0.823)	Loss 2.6663 (2.7073)	Entropy 1.18205 (1.18209)	Top-1 acc 60.156 (59.308)	Top-5 acc 79.297 (80.116)	lr 0.01636
Train [49][50/3239]	Time 0.225 (1.127)	Data Time 0.002 (0.662)	Loss 2.7192 (2.7148)	Entropy 1.18201 (1.18208)	Top-1 acc 57.812 (59.176)	Top-5 acc 78.516 (79.917)	lr 0.01636
Train [49][60/3239]	Time 0.228 (1.015)	Data Time 0.001 (0.554)	Loss 2.7513 (2.7117)	Entropy 1.18199 (1.18206)	Top-1 acc 58.203 (59.266)	Top-5 acc 82.031 (80.020)	lr 0.01636
Train [49][70/3239]	Time 0.239 (0.935)	Data Time 0.001 (0.476)	Loss 2.6477 (2.7034)	Entropy 1.18197 (1.18205)	Top-1 acc 61.719 (59.496)	Top-5 acc 80.859 (80.144)	lr 0.01636
Train [49][80/3239]	Time 0.208 (0.879)	Data Time 0.001 (0.417)	Loss 2.6632 (2.6994)	Entropy 1.18193 (1.18204)	Top-1 acc 59.375 (59.423)	Top-5 acc 80.469 (80.252)	lr 0.01636
Train [49][90/3239]	Time 0.214 (0.832)	Data Time 0.001 (0.372)	Loss 2.5979 (2.6915)	Entropy 1.18190 (1.18203)	Top-1 acc 60.156 (59.495)	Top-5 acc 83.203 (80.417)	lr 0.01635
Train [49][100/3239]	Time 0.221 (0.795)	Data Time 0.001 (0.335)	Loss 2.7080 (2.6927)	Entropy 1.18182 (1.18201)	Top-1 acc 57.812 (59.394)	Top-5 acc 80.469 (80.449)	lr 0.01635
Train [49][110/3239]	Time 0.216 (0.765)	Data Time 0.001 (0.305)	Loss 2.5736 (2.6880)	Entropy 1.18188 (1.18199)	Top-1 acc 64.062 (59.431)	Top-5 acc 82.422 (80.539)	lr 0.01635
Train [49][120/3239]	Time 2.579 (0.741)	Data Time 0.001 (0.280)	Loss 2.7739 (2.6892)	Entropy 1.18188 (1.18199)	Top-1 acc 56.641 (59.327)	Top-5 acc 80.078 (80.498)	lr 0.01635
Train [49][130/3239]	Time 0.224 (0.701)	Data Time 0.001 (0.259)	Loss 2.4899 (2.6867)	Entropy 1.18188 (1.18198)	Top-1 acc 64.844 (59.387)	Top-5 acc 85.547 (80.525)	lr 0.01635
Train [49][140/3239]	Time 0.229 (0.683)	Data Time 0.001 (0.240)	Loss 2.4663 (2.6866)	Entropy 1.18187 (1.18197)	Top-1 acc 65.625 (59.400)	Top-5 acc 85.547 (80.541)	lr 0.01635
Train [49][150/3239]	Time 0.215 (0.669)	Data Time 0.001 (0.225)	Loss 2.7163 (2.6883)	Entropy 1.18188 (1.18196)	Top-1 acc 57.422 (59.409)	Top-5 acc 80.078 (80.536)	lr 0.01635
Train [49][160/3239]	Time 0.242 (0.656)	Data Time 0.001 (0.211)	Loss 2.7319 (2.6858)	Entropy 1.18186 (1.18196)	Top-1 acc 56.641 (59.448)	Top-5 acc 80.859 (80.542)	lr 0.01635
Train [49][170/3239]	Time 0.322 (0.645)	Data Time 0.001 (0.198)	Loss 2.6766 (2.6857)	Entropy 1.18181 (1.18195)	Top-1 acc 62.500 (59.480)	Top-5 acc 82.812 (80.562)	lr 0.01635
Train [49][180/3239]	Time 0.208 (0.634)	Data Time 0.001 (0.188)	Loss 2.6372 (2.6887)	Entropy 1.18176 (1.18194)	Top-1 acc 57.422 (59.362)	Top-5 acc 80.469 (80.551)	lr 0.01635
Train [49][190/3239]	Time 0.231 (0.625)	Data Time 0.001 (0.178)	Loss 2.8954 (2.6903)	Entropy 1.18175 (1.18193)	Top-1 acc 55.078 (59.256)	Top-5 acc 79.297 (80.544)	lr 0.01634
Train [49][200/3239]	Time 0.219 (0.616)	Data Time 0.001 (0.169)	Loss 2.5685 (2.6900)	Entropy 1.18160 (1.18192)	Top-1 acc 63.281 (59.239)	Top-5 acc 83.984 (80.548)	lr 0.01634
Train [49][210/3239]	Time 0.216 (0.608)	Data Time 0.001 (0.161)	Loss 2.8100 (2.6875)	Entropy 1.18158 (1.18190)	Top-1 acc 53.125 (59.258)	Top-5 acc 77.734 (80.585)	lr 0.01634
Train [49][220/3239]	Time 0.331 (0.601)	Data Time 0.001 (0.154)	Loss 2.7107 (2.6854)	Entropy 1.18153 (1.18189)	Top-1 acc 57.812 (59.258)	Top-5 acc 77.344 (80.630)	lr 0.01634
Train [49][230/3239]	Time 2.466 (0.595)	Data Time 0.001 (0.147)	Loss 2.7055 (2.6882)	Entropy 1.18153 (1.18187)	Top-1 acc 60.547 (59.235)	Top-5 acc 80.859 (80.572)	lr 0.01634
Train [49][240/3239]	Time 0.267 (0.580)	Data Time 0.001 (0.141)	Loss 2.6077 (2.6893)	Entropy 1.18151 (1.18186)	Top-1 acc 60.938 (59.213)	Top-5 acc 83.203 (80.585)	lr 0.01634
Train [49][250/3239]	Time 0.166 (0.576)	Data Time 0.001 (0.136)	Loss 2.7993 (2.6899)	Entropy 1.18148 (1.18184)	Top-1 acc 59.375 (59.170)	Top-5 acc 78.125 (80.554)	lr 0.01634
Train [49][260/3239]	Time 0.249 (0.571)	Data Time 0.001 (0.131)	Loss 2.6811 (2.6912)	Entropy 1.18146 (1.18183)	Top-1 acc 59.375 (59.189)	Top-5 acc 80.078 (80.535)	lr 0.01634
Train [49][270/3239]	Time 0.225 (0.568)	Data Time 0.001 (0.126)	Loss 2.7172 (2.6896)	Entropy 1.18146 (1.18182)	Top-1 acc 58.594 (59.240)	Top-5 acc 80.859 (80.561)	lr 0.01634
Train [49][280/3239]	Time 0.260 (0.741)	Data Time 0.002 (0.122)	Loss 2.6025 (2.6885)	Entropy 1.18141 (1.18180)	Top-1 acc 62.891 (59.260)	Top-5 acc 80.078 (80.549)	lr 0.01634
Train [49][290/3239]	Time 0.319 (0.733)	Data Time 0.003 (0.117)	Loss 2.5241 (2.6859)	Entropy 1.18139 (1.18179)	Top-1 acc 65.625 (59.319)	Top-5 acc 82.422 (80.603)	lr 0.01633
Train [49][300/3239]	Time 0.268 (0.726)	Data Time 0.002 (0.114)	Loss 2.6487 (2.6867)	Entropy 1.18138 (1.18178)	Top-1 acc 61.328 (59.319)	Top-5 acc 82.422 (80.571)	lr 0.01633
Train [49][310/3239]	Time 0.210 (0.719)	Data Time 0.001 (0.110)	Loss 2.7971 (2.6876)	Entropy 1.18134 (1.18176)	Top-1 acc 54.297 (59.272)	Top-5 acc 78.906 (80.574)	lr 0.01633
Train [49][320/3239]	Time 0.225 (0.711)	Data Time 0.002 (0.107)	Loss 2.6487 (2.6882)	Entropy 1.18133 (1.18175)	Top-1 acc 58.984 (59.284)	Top-5 acc 80.859 (80.577)	lr 0.01633
Train [49][330/3239]	Time 0.234 (0.703)	Data Time 0.001 (0.104)	Loss 2.8172 (2.6904)	Entropy 1.18130 (1.18174)	Top-1 acc 56.250 (59.216)	Top-5 acc 78.516 (80.538)	lr 0.01633
Train [49][340/3239]	Time 2.443 (0.696)	Data Time 0.001 (0.101)	Loss 2.7986 (2.6917)	Entropy 1.18130 (1.18172)	Top-1 acc 55.078 (59.141)	Top-5 acc 78.516 (80.513)	lr 0.01633
Train [49][350/3239]	Time 0.227 (0.683)	Data Time 0.001 (0.098)	Loss 3.0532 (2.6928)	Entropy 1.18132 (1.18171)	Top-1 acc 49.219 (59.121)	Top-5 acc 74.609 (80.499)	lr 0.01633
Train [49][360/3239]	Time 0.393 (0.677)	Data Time 0.002 (0.095)	Loss 2.6784 (2.6923)	Entropy 1.18131 (1.18170)	Top-1 acc 57.422 (59.112)	Top-5 acc 81.641 (80.499)	lr 0.01633
Train [49][370/3239]	Time 0.240 (0.671)	Data Time 0.001 (0.093)	Loss 2.7024 (2.6925)	Entropy 1.18127 (1.18169)	Top-1 acc 57.422 (59.105)	Top-5 acc 81.641 (80.507)	lr 0.01633
Train [49][380/3239]	Time 0.171 (0.665)	Data Time 0.001 (0.090)	Loss 2.7794 (2.6921)	Entropy 1.18116 (1.18168)	Top-1 acc 55.859 (59.122)	Top-5 acc 78.125 (80.513)	lr 0.01633
Train [49][390/3239]	Time 0.215 (0.661)	Data Time 0.002 (0.088)	Loss 2.5799 (2.6912)	Entropy 1.18116 (1.18166)	Top-1 acc 64.844 (59.134)	Top-5 acc 80.469 (80.534)	lr 0.01633
Train [49][400/3239]	Time 0.233 (0.656)	Data Time 0.001 (0.086)	Loss 2.6496 (2.6908)	Entropy 1.18111 (1.18165)	Top-1 acc 57.031 (59.170)	Top-5 acc 80.469 (80.543)	lr 0.01632
Train [49][410/3239]	Time 0.241 (0.652)	Data Time 0.001 (0.084)	Loss 2.7734 (2.6908)	Entropy 1.18093 (1.18164)	Top-1 acc 58.984 (59.182)	Top-5 acc 79.688 (80.559)	lr 0.01632
Train [49][420/3239]	Time 0.222 (0.647)	Data Time 0.001 (0.082)	Loss 2.8243 (2.6916)	Entropy 1.18091 (1.18162)	Top-1 acc 57.031 (59.163)	Top-5 acc 77.344 (80.560)	lr 0.01632
Train [49][430/3239]	Time 0.201 (0.643)	Data Time 0.001 (0.080)	Loss 2.5682 (2.6913)	Entropy 1.18092 (1.18160)	Top-1 acc 60.156 (59.178)	Top-5 acc 83.594 (80.552)	lr 0.01632
Train [49][440/3239]	Time 0.255 (0.640)	Data Time 0.001 (0.078)	Loss 2.7827 (2.6909)	Entropy 1.18088 (1.18159)	Top-1 acc 57.031 (59.180)	Top-5 acc 78.125 (80.556)	lr 0.01632
Train [49][450/3239]	Time 2.350 (0.635)	Data Time 0.001 (0.076)	Loss 2.8226 (2.6899)	Entropy 1.18088 (1.18157)	Top-1 acc 58.594 (59.206)	Top-5 acc 78.906 (80.578)	lr 0.01632
Train [49][460/3239]	Time 0.268 (0.627)	Data Time 0.001 (0.075)	Loss 2.6362 (2.6893)	Entropy 1.18088 (1.18156)	Top-1 acc 58.984 (59.210)	Top-5 acc 81.641 (80.594)	lr 0.01632
Train [49][470/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.073)	Loss 2.8356 (2.6897)	Entropy 1.18080 (1.18154)	Top-1 acc 58.203 (59.220)	Top-5 acc 78.906 (80.585)	lr 0.01632
Train [49][480/3239]	Time 0.242 (0.620)	Data Time 0.002 (0.072)	Loss 2.7183 (2.6900)	Entropy 1.18074 (1.18152)	Top-1 acc 59.766 (59.209)	Top-5 acc 80.078 (80.584)	lr 0.01632
Train [49][490/3239]	Time 0.224 (0.617)	Data Time 0.001 (0.070)	Loss 2.8035 (2.6919)	Entropy 1.18069 (1.18151)	Top-1 acc 60.547 (59.185)	Top-5 acc 78.125 (80.546)	lr 0.01632
Train [49][500/3239]	Time 0.231 (0.614)	Data Time 0.001 (0.069)	Loss 2.7768 (2.6913)	Entropy 1.18062 (1.18149)	Top-1 acc 57.422 (59.204)	Top-5 acc 80.469 (80.569)	lr 0.01631
Train [49][510/3239]	Time 0.306 (0.611)	Data Time 0.001 (0.068)	Loss 2.6887 (2.6912)	Entropy 1.18058 (1.18147)	Top-1 acc 59.375 (59.203)	Top-5 acc 79.297 (80.559)	lr 0.01631
Train [49][520/3239]	Time 0.227 (0.608)	Data Time 0.002 (0.066)	Loss 2.7956 (2.6923)	Entropy 1.18055 (1.18145)	Top-1 acc 57.031 (59.181)	Top-5 acc 77.344 (80.523)	lr 0.01631
Train [49][530/3239]	Time 0.230 (0.605)	Data Time 0.001 (0.065)	Loss 2.7933 (2.6936)	Entropy 1.18053 (1.18144)	Top-1 acc 57.422 (59.144)	Top-5 acc 78.516 (80.500)	lr 0.01631
Train [49][540/3239]	Time 0.221 (0.602)	Data Time 0.001 (0.064)	Loss 2.7102 (2.6932)	Entropy 1.18043 (1.18142)	Top-1 acc 60.156 (59.161)	Top-5 acc 82.422 (80.512)	lr 0.01631
Train [49][550/3239]	Time 0.244 (0.600)	Data Time 0.002 (0.063)	Loss 2.5699 (2.6923)	Entropy 1.18039 (1.18140)	Top-1 acc 62.500 (59.200)	Top-5 acc 82.422 (80.521)	lr 0.01631
Train [49][560/3239]	Time 2.560 (0.597)	Data Time 0.001 (0.062)	Loss 2.7401 (2.6918)	Entropy 1.18039 (1.18138)	Top-1 acc 59.766 (59.195)	Top-5 acc 80.078 (80.547)	lr 0.01631
Train [49][570/3239]	Time 0.228 (0.591)	Data Time 0.001 (0.061)	Loss 2.7308 (2.6920)	Entropy 1.18039 (1.18137)	Top-1 acc 57.031 (59.177)	Top-5 acc 82.812 (80.558)	lr 0.01631
Train [49][580/3239]	Time 0.243 (0.589)	Data Time 0.001 (0.060)	Loss 2.6070 (2.6922)	Entropy 1.18033 (1.18135)	Top-1 acc 60.547 (59.170)	Top-5 acc 82.422 (80.543)	lr 0.01631
Train [49][590/3239]	Time 0.247 (0.587)	Data Time 0.001 (0.059)	Loss 2.6841 (2.6923)	Entropy 1.18028 (1.18133)	Top-1 acc 62.109 (59.161)	Top-5 acc 82.031 (80.550)	lr 0.01631
Train [49][600/3239]	Time 0.250 (0.585)	Data Time 0.001 (0.058)	Loss 2.7152 (2.6926)	Entropy 1.18020 (1.18131)	Top-1 acc 59.375 (59.140)	Top-5 acc 78.516 (80.547)	lr 0.01631
Train [49][610/3239]	Time 0.321 (0.583)	Data Time 0.001 (0.057)	Loss 2.5529 (2.6933)	Entropy 1.18016 (1.18129)	Top-1 acc 63.281 (59.130)	Top-5 acc 81.250 (80.526)	lr 0.01630
Train [49][620/3239]	Time 0.262 (0.581)	Data Time 0.001 (0.056)	Loss 2.4891 (2.6926)	Entropy 1.18013 (1.18128)	Top-1 acc 63.672 (59.150)	Top-5 acc 85.156 (80.557)	lr 0.01630
Train [49][630/3239]	Time 0.217 (0.579)	Data Time 0.001 (0.055)	Loss 2.8373 (2.6919)	Entropy 1.18011 (1.18126)	Top-1 acc 56.641 (59.173)	Top-5 acc 77.734 (80.577)	lr 0.01630
Train [49][640/3239]	Time 0.251 (0.649)	Data Time 0.002 (0.054)	Loss 2.6604 (2.6924)	Entropy 1.18008 (1.18124)	Top-1 acc 59.766 (59.150)	Top-5 acc 78.516 (80.566)	lr 0.01630
Train [49][650/3239]	Time 0.264 (0.647)	Data Time 0.003 (0.053)	Loss 2.6376 (2.6923)	Entropy 1.18006 (1.18122)	Top-1 acc 59.766 (59.151)	Top-5 acc 83.203 (80.559)	lr 0.01630
Train [49][660/3239]	Time 0.357 (0.645)	Data Time 0.002 (0.053)	Loss 2.7105 (2.6931)	Entropy 1.18002 (1.18120)	Top-1 acc 58.984 (59.130)	Top-5 acc 80.859 (80.549)	lr 0.01630
Train [49][670/3239]	Time 2.561 (0.642)	Data Time 0.002 (0.052)	Loss 2.8396 (2.6933)	Entropy 1.18002 (1.18119)	Top-1 acc 57.422 (59.126)	Top-5 acc 78.516 (80.557)	lr 0.01630
Train [49][680/3239]	Time 0.230 (0.636)	Data Time 0.002 (0.051)	Loss 2.4961 (2.6939)	Entropy 1.17999 (1.18117)	Top-1 acc 62.109 (59.107)	Top-5 acc 85.547 (80.537)	lr 0.01630
Train [49][690/3239]	Time 0.263 (0.634)	Data Time 0.002 (0.050)	Loss 2.7870 (2.6939)	Entropy 1.17992 (1.18115)	Top-1 acc 60.938 (59.125)	Top-5 acc 82.031 (80.539)	lr 0.01630
Train [49][700/3239]	Time 0.224 (0.632)	Data Time 0.001 (0.050)	Loss 2.7875 (2.6941)	Entropy 1.17992 (1.18113)	Top-1 acc 56.641 (59.139)	Top-5 acc 78.516 (80.535)	lr 0.01630
Train [49][710/3239]	Time 0.192 (0.630)	Data Time 0.001 (0.049)	Loss 2.7504 (2.6939)	Entropy 1.17989 (1.18112)	Top-1 acc 57.812 (59.146)	Top-5 acc 79.688 (80.541)	lr 0.01629
Train [49][720/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.048)	Loss 2.6610 (2.6941)	Entropy 1.17988 (1.18110)	Top-1 acc 57.812 (59.127)	Top-5 acc 78.516 (80.541)	lr 0.01629
Train [49][730/3239]	Time 0.226 (0.625)	Data Time 0.001 (0.048)	Loss 2.6547 (2.6942)	Entropy 1.17989 (1.18108)	Top-1 acc 60.938 (59.122)	Top-5 acc 78.125 (80.537)	lr 0.01629
Train [49][740/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.047)	Loss 2.6850 (2.6940)	Entropy 1.17979 (1.18107)	Top-1 acc 58.984 (59.129)	Top-5 acc 82.812 (80.542)	lr 0.01629
Train [49][750/3239]	Time 0.320 (0.621)	Data Time 0.001 (0.047)	Loss 2.7275 (2.6953)	Entropy 1.17978 (1.18105)	Top-1 acc 55.078 (59.096)	Top-5 acc 80.469 (80.519)	lr 0.01629
Train [49][760/3239]	Time 0.220 (0.619)	Data Time 0.001 (0.046)	Loss 2.6789 (2.6944)	Entropy 1.17973 (1.18103)	Top-1 acc 64.062 (59.124)	Top-5 acc 80.078 (80.534)	lr 0.01629
Train [49][770/3239]	Time 0.254 (0.617)	Data Time 0.001 (0.045)	Loss 2.5965 (2.6941)	Entropy 1.17972 (1.18101)	Top-1 acc 64.062 (59.142)	Top-5 acc 83.203 (80.534)	lr 0.01629
Train [49][780/3239]	Time 2.394 (0.614)	Data Time 0.001 (0.045)	Loss 2.6068 (2.6938)	Entropy 1.17972 (1.18100)	Top-1 acc 60.156 (59.157)	Top-5 acc 78.906 (80.537)	lr 0.01629
Train [49][790/3239]	Time 0.220 (0.610)	Data Time 0.001 (0.044)	Loss 2.7844 (2.6943)	Entropy 1.17971 (1.18098)	Top-1 acc 58.984 (59.156)	Top-5 acc 78.516 (80.515)	lr 0.01629
Train [49][800/3239]	Time 0.243 (0.608)	Data Time 0.001 (0.044)	Loss 2.4814 (2.6940)	Entropy 1.17970 (1.18097)	Top-1 acc 66.016 (59.150)	Top-5 acc 83.203 (80.533)	lr 0.01629
Train [49][810/3239]	Time 0.222 (0.606)	Data Time 0.001 (0.043)	Loss 2.5413 (2.6940)	Entropy 1.17969 (1.18095)	Top-1 acc 61.328 (59.142)	Top-5 acc 85.156 (80.539)	lr 0.01628
Train [49][820/3239]	Time 0.201 (0.604)	Data Time 0.001 (0.043)	Loss 2.5636 (2.6938)	Entropy 1.17961 (1.18093)	Top-1 acc 58.203 (59.142)	Top-5 acc 85.156 (80.538)	lr 0.01628
Train [49][830/3239]	Time 0.206 (0.602)	Data Time 0.001 (0.042)	Loss 2.7606 (2.6941)	Entropy 1.17955 (1.18092)	Top-1 acc 57.422 (59.141)	Top-5 acc 79.297 (80.532)	lr 0.01628
Train [49][840/3239]	Time 0.226 (0.601)	Data Time 0.001 (0.042)	Loss 2.8685 (2.6938)	Entropy 1.17956 (1.18090)	Top-1 acc 55.078 (59.149)	Top-5 acc 79.688 (80.533)	lr 0.01628
Train [49][850/3239]	Time 0.311 (0.599)	Data Time 0.001 (0.041)	Loss 2.6307 (2.6940)	Entropy 1.17951 (1.18089)	Top-1 acc 62.891 (59.157)	Top-5 acc 84.766 (80.535)	lr 0.01628
Train [49][860/3239]	Time 0.226 (0.598)	Data Time 0.001 (0.041)	Loss 2.7383 (2.6940)	Entropy 1.17947 (1.18087)	Top-1 acc 57.422 (59.158)	Top-5 acc 78.516 (80.533)	lr 0.01628
Train [49][870/3239]	Time 0.225 (0.596)	Data Time 0.001 (0.040)	Loss 2.7966 (2.6942)	Entropy 1.17944 (1.18085)	Top-1 acc 58.594 (59.144)	Top-5 acc 78.125 (80.534)	lr 0.01628
Train [49][880/3239]	Time 0.223 (0.594)	Data Time 0.001 (0.040)	Loss 2.9036 (2.6948)	Entropy 1.17941 (1.18084)	Top-1 acc 53.516 (59.138)	Top-5 acc 77.734 (80.522)	lr 0.01628
Train [49][890/3239]	Time 2.489 (0.593)	Data Time 0.001 (0.040)	Loss 2.7532 (2.6948)	Entropy 1.17941 (1.18082)	Top-1 acc 57.812 (59.141)	Top-5 acc 80.859 (80.517)	lr 0.01628
Train [49][900/3239]	Time 0.254 (0.589)	Data Time 0.001 (0.039)	Loss 2.7075 (2.6952)	Entropy 1.17937 (1.18081)	Top-1 acc 59.766 (59.127)	Top-5 acc 78.906 (80.514)	lr 0.01628
Train [49][910/3239]	Time 0.242 (0.588)	Data Time 0.001 (0.039)	Loss 2.8058 (2.6951)	Entropy 1.17932 (1.18079)	Top-1 acc 54.688 (59.129)	Top-5 acc 81.250 (80.517)	lr 0.01628
Train [49][920/3239]	Time 0.248 (0.586)	Data Time 0.002 (0.038)	Loss 2.8611 (2.6958)	Entropy 1.17927 (1.18077)	Top-1 acc 58.203 (59.134)	Top-5 acc 77.734 (80.509)	lr 0.01627
Train [49][930/3239]	Time 0.216 (0.585)	Data Time 0.001 (0.038)	Loss 2.5839 (2.6955)	Entropy 1.17922 (1.18076)	Top-1 acc 58.203 (59.134)	Top-5 acc 83.594 (80.515)	lr 0.01627
Train [49][940/3239]	Time 0.225 (0.584)	Data Time 0.001 (0.038)	Loss 2.6458 (2.6954)	Entropy 1.17917 (1.18074)	Top-1 acc 62.109 (59.135)	Top-5 acc 83.203 (80.526)	lr 0.01627
Train [49][950/3239]	Time 0.212 (0.582)	Data Time 0.001 (0.037)	Loss 2.6238 (2.6954)	Entropy 1.17918 (1.18072)	Top-1 acc 61.719 (59.137)	Top-5 acc 79.688 (80.524)	lr 0.01627
Train [49][960/3239]	Time 0.219 (0.581)	Data Time 0.001 (0.037)	Loss 2.7532 (2.6954)	Entropy 1.17917 (1.18071)	Top-1 acc 56.250 (59.133)	Top-5 acc 81.250 (80.521)	lr 0.01627
Train [49][970/3239]	Time 0.217 (0.580)	Data Time 0.001 (0.036)	Loss 2.6735 (2.6962)	Entropy 1.17906 (1.18069)	Top-1 acc 58.203 (59.114)	Top-5 acc 81.641 (80.510)	lr 0.01627
Train [49][980/3239]	Time 0.229 (0.579)	Data Time 0.002 (0.036)	Loss 2.7836 (2.6963)	Entropy 1.17908 (1.18067)	Top-1 acc 57.812 (59.103)	Top-5 acc 78.516 (80.513)	lr 0.01627
Train [49][990/3239]	Time 0.327 (0.577)	Data Time 0.001 (0.036)	Loss 2.6027 (2.6962)	Entropy 1.17905 (1.18066)	Top-1 acc 58.203 (59.109)	Top-5 acc 81.641 (80.509)	lr 0.01627
Train [49][1000/3239]	Time 45.624 (0.619)	Data Time 0.002 (0.035)	Loss 2.6917 (2.6965)	Entropy 1.17905 (1.18064)	Top-1 acc 57.031 (59.108)	Top-5 acc 79.688 (80.504)	lr 0.01627
Train [49][1010/3239]	Time 0.461 (0.616)	Data Time 0.003 (0.035)	Loss 2.6207 (2.6963)	Entropy 1.17904 (1.18063)	Top-1 acc 59.766 (59.111)	Top-5 acc 78.906 (80.502)	lr 0.01627
Train [49][1020/3239]	Time 0.225 (0.616)	Data Time 0.002 (0.035)	Loss 2.7483 (2.6966)	Entropy 1.17901 (1.18061)	Top-1 acc 57.422 (59.114)	Top-5 acc 77.344 (80.489)	lr 0.01626
Train [49][1030/3239]	Time 0.232 (0.614)	Data Time 0.001 (0.034)	Loss 2.6144 (2.6971)	Entropy 1.17895 (1.18059)	Top-1 acc 60.547 (59.106)	Top-5 acc 83.594 (80.478)	lr 0.01626
Train [49][1040/3239]	Time 0.237 (0.613)	Data Time 0.001 (0.034)	Loss 2.7591 (2.6973)	Entropy 1.17893 (1.18058)	Top-1 acc 59.375 (59.098)	Top-5 acc 77.344 (80.470)	lr 0.01626
Train [49][1050/3239]	Time 0.239 (0.611)	Data Time 0.001 (0.034)	Loss 2.8495 (2.6977)	Entropy 1.17894 (1.18056)	Top-1 acc 53.516 (59.095)	Top-5 acc 77.344 (80.462)	lr 0.01626
Train [49][1060/3239]	Time 0.210 (0.610)	Data Time 0.001 (0.033)	Loss 2.5883 (2.6977)	Entropy 1.17892 (1.18055)	Top-1 acc 60.938 (59.089)	Top-5 acc 84.766 (80.465)	lr 0.01626
Train [49][1070/3239]	Time 0.210 (0.608)	Data Time 0.001 (0.033)	Loss 2.4807 (2.6979)	Entropy 1.17887 (1.18053)	Top-1 acc 62.109 (59.090)	Top-5 acc 83.984 (80.464)	lr 0.01626
Train [49][1080/3239]	Time 0.225 (0.607)	Data Time 0.001 (0.033)	Loss 2.7255 (2.6983)	Entropy 1.17884 (1.18052)	Top-1 acc 58.203 (59.073)	Top-5 acc 76.562 (80.455)	lr 0.01626
Train [49][1090/3239]	Time 0.226 (0.605)	Data Time 0.001 (0.033)	Loss 2.7422 (2.6986)	Entropy 1.17865 (1.18050)	Top-1 acc 58.984 (59.068)	Top-5 acc 76.562 (80.442)	lr 0.01626
Train [49][1100/3239]	Time 0.221 (0.604)	Data Time 0.001 (0.032)	Loss 2.6784 (2.6987)	Entropy 1.17858 (1.18048)	Top-1 acc 59.375 (59.062)	Top-5 acc 80.469 (80.436)	lr 0.01626
Train [49][1110/3239]	Time 2.392 (0.603)	Data Time 0.002 (0.032)	Loss 2.8204 (2.6984)	Entropy 1.17858 (1.18047)	Top-1 acc 58.984 (59.073)	Top-5 acc 77.734 (80.437)	lr 0.01626
Train [49][1120/3239]	Time 0.218 (0.599)	Data Time 0.001 (0.032)	Loss 2.4350 (2.6985)	Entropy 1.17859 (1.18045)	Top-1 acc 65.625 (59.069)	Top-5 acc 83.594 (80.436)	lr 0.01625
Train [49][1130/3239]	Time 0.227 (0.598)	Data Time 0.001 (0.032)	Loss 2.6699 (2.6983)	Entropy 1.17858 (1.18043)	Top-1 acc 59.375 (59.072)	Top-5 acc 80.469 (80.441)	lr 0.01625
Train [49][1140/3239]	Time 0.213 (0.597)	Data Time 0.001 (0.031)	Loss 2.7778 (2.6986)	Entropy 1.17859 (1.18042)	Top-1 acc 57.422 (59.063)	Top-5 acc 79.297 (80.436)	lr 0.01625
Train [49][1150/3239]	Time 0.168 (0.596)	Data Time 0.001 (0.031)	Loss 2.6359 (2.6984)	Entropy 1.17850 (1.18040)	Top-1 acc 58.594 (59.067)	Top-5 acc 83.594 (80.443)	lr 0.01625
Train [49][1160/3239]	Time 0.231 (0.595)	Data Time 0.002 (0.031)	Loss 2.6418 (2.6985)	Entropy 1.17847 (1.18038)	Top-1 acc 62.109 (59.063)	Top-5 acc 80.469 (80.442)	lr 0.01625
Train [49][1170/3239]	Time 0.215 (0.593)	Data Time 0.001 (0.031)	Loss 2.6851 (2.6988)	Entropy 1.17846 (1.18037)	Top-1 acc 57.031 (59.056)	Top-5 acc 82.031 (80.439)	lr 0.01625
Train [49][1180/3239]	Time 0.230 (0.592)	Data Time 0.001 (0.030)	Loss 2.6594 (2.6985)	Entropy 1.17843 (1.18035)	Top-1 acc 58.984 (59.060)	Top-5 acc 80.469 (80.444)	lr 0.01625
Train [49][1190/3239]	Time 0.230 (0.591)	Data Time 0.001 (0.030)	Loss 2.7792 (2.6990)	Entropy 1.17843 (1.18034)	Top-1 acc 60.938 (59.057)	Top-5 acc 79.688 (80.434)	lr 0.01625
Train [49][1200/3239]	Time 0.215 (0.590)	Data Time 0.001 (0.030)	Loss 2.8987 (2.6993)	Entropy 1.17842 (1.18032)	Top-1 acc 58.594 (59.060)	Top-5 acc 78.516 (80.427)	lr 0.01625
Train [49][1210/3239]	Time 0.245 (0.589)	Data Time 0.001 (0.030)	Loss 2.6201 (2.6990)	Entropy 1.17840 (1.18030)	Top-1 acc 60.547 (59.059)	Top-5 acc 82.031 (80.436)	lr 0.01625
Train [49][1220/3239]	Time 2.363 (0.588)	Data Time 0.001 (0.029)	Loss 2.6851 (2.6989)	Entropy 1.17840 (1.18029)	Top-1 acc 58.594 (59.058)	Top-5 acc 79.688 (80.438)	lr 0.01625
Train [49][1230/3239]	Time 0.307 (0.585)	Data Time 0.001 (0.029)	Loss 2.6089 (2.6987)	Entropy 1.17832 (1.18027)	Top-1 acc 57.812 (59.055)	Top-5 acc 80.859 (80.439)	lr 0.01624
Train [49][1240/3239]	Time 0.225 (0.584)	Data Time 0.001 (0.029)	Loss 2.8842 (2.6985)	Entropy 1.17826 (1.18026)	Top-1 acc 55.859 (59.065)	Top-5 acc 76.953 (80.441)	lr 0.01624
Train [49][1250/3239]	Time 0.225 (0.583)	Data Time 0.001 (0.029)	Loss 2.7257 (2.6986)	Entropy 1.17823 (1.18024)	Top-1 acc 57.812 (59.060)	Top-5 acc 79.688 (80.442)	lr 0.01624
Train [49][1260/3239]	Time 0.237 (0.582)	Data Time 0.001 (0.028)	Loss 2.8217 (2.6986)	Entropy 1.17822 (1.18022)	Top-1 acc 57.812 (59.061)	Top-5 acc 76.172 (80.444)	lr 0.01624
Train [49][1270/3239]	Time 0.220 (0.581)	Data Time 0.001 (0.028)	Loss 2.7151 (2.6993)	Entropy 1.17785 (1.18021)	Top-1 acc 60.547 (59.054)	Top-5 acc 80.078 (80.429)	lr 0.01624
Train [49][1280/3239]	Time 0.234 (0.580)	Data Time 0.001 (0.028)	Loss 2.6033 (2.6994)	Entropy 1.17783 (1.18019)	Top-1 acc 58.984 (59.054)	Top-5 acc 84.375 (80.428)	lr 0.01624
Train [49][1290/3239]	Time 0.235 (0.579)	Data Time 0.005 (0.028)	Loss 2.5561 (2.6996)	Entropy 1.17782 (1.18017)	Top-1 acc 64.844 (59.052)	Top-5 acc 82.812 (80.423)	lr 0.01624
Train [49][1300/3239]	Time 0.214 (0.578)	Data Time 0.001 (0.028)	Loss 2.7760 (2.6998)	Entropy 1.17770 (1.18015)	Top-1 acc 55.469 (59.052)	Top-5 acc 78.516 (80.420)	lr 0.01624
Train [49][1310/3239]	Time 0.212 (0.578)	Data Time 0.001 (0.027)	Loss 2.6785 (2.6999)	Entropy 1.17767 (1.18013)	Top-1 acc 59.766 (59.056)	Top-5 acc 81.250 (80.412)	lr 0.01624
Train [49][1320/3239]	Time 0.226 (0.577)	Data Time 0.002 (0.027)	Loss 2.7561 (2.6997)	Entropy 1.17766 (1.18011)	Top-1 acc 59.766 (59.062)	Top-5 acc 78.125 (80.419)	lr 0.01624
Train [49][1330/3239]	Time 2.526 (0.576)	Data Time 0.001 (0.027)	Loss 2.6247 (2.6999)	Entropy 1.17766 (1.18010)	Top-1 acc 58.984 (59.059)	Top-5 acc 81.250 (80.413)	lr 0.01623
Train [49][1340/3239]	Time 0.234 (0.573)	Data Time 0.001 (0.027)	Loss 2.5821 (2.6996)	Entropy 1.17761 (1.18008)	Top-1 acc 62.891 (59.070)	Top-5 acc 82.812 (80.422)	lr 0.01623
Train [49][1350/3239]	Time 0.203 (0.572)	Data Time 0.001 (0.027)	Loss 2.6873 (2.6999)	Entropy 1.17760 (1.18006)	Top-1 acc 58.984 (59.062)	Top-5 acc 82.031 (80.410)	lr 0.01623
Train [49][1360/3239]	Time 0.237 (0.571)	Data Time 0.001 (0.026)	Loss 2.6148 (2.7002)	Entropy 1.17748 (1.18004)	Top-1 acc 60.547 (59.048)	Top-5 acc 82.031 (80.404)	lr 0.01623
Train [49][1370/3239]	Time 0.259 (0.607)	Data Time 0.002 (0.026)	Loss 2.7331 (2.7005)	Entropy 1.17749 (1.18002)	Top-1 acc 55.469 (59.042)	Top-5 acc 78.516 (80.400)	lr 0.01623
Train [49][1380/3239]	Time 0.323 (0.606)	Data Time 0.002 (0.026)	Loss 2.6383 (2.7001)	Entropy 1.17746 (1.18000)	Top-1 acc 64.062 (59.052)	Top-5 acc 83.203 (80.411)	lr 0.01623
Train [49][1390/3239]	Time 0.218 (0.605)	Data Time 0.001 (0.026)	Loss 2.8951 (2.7001)	Entropy 1.17744 (1.17998)	Top-1 acc 55.469 (59.050)	Top-5 acc 78.125 (80.411)	lr 0.01623
Train [49][1400/3239]	Time 0.257 (0.604)	Data Time 0.001 (0.026)	Loss 2.6249 (2.7008)	Entropy 1.17743 (1.17997)	Top-1 acc 62.109 (59.026)	Top-5 acc 80.859 (80.398)	lr 0.01623
Train [49][1410/3239]	Time 0.215 (0.603)	Data Time 0.001 (0.026)	Loss 2.5757 (2.7009)	Entropy 1.17742 (1.17995)	Top-1 acc 62.500 (59.028)	Top-5 acc 84.375 (80.404)	lr 0.01623
Train [49][1420/3239]	Time 0.216 (0.602)	Data Time 0.002 (0.025)	Loss 2.7323 (2.7007)	Entropy 1.17738 (1.17993)	Top-1 acc 60.938 (59.035)	Top-5 acc 80.469 (80.406)	lr 0.01623
Train [49][1430/3239]	Time 0.342 (0.601)	Data Time 0.001 (0.025)	Loss 2.7762 (2.7007)	Entropy 1.17740 (1.17991)	Top-1 acc 60.547 (59.034)	Top-5 acc 78.516 (80.403)	lr 0.01623
Train [49][1440/3239]	Time 2.464 (0.600)	Data Time 0.001 (0.025)	Loss 2.5748 (2.7011)	Entropy 1.17740 (1.17990)	Top-1 acc 62.891 (59.023)	Top-5 acc 82.812 (80.397)	lr 0.01622
Train [49][1450/3239]	Time 0.217 (0.598)	Data Time 0.001 (0.025)	Loss 2.8085 (2.7008)	Entropy 1.17740 (1.17988)	Top-1 acc 54.297 (59.017)	Top-5 acc 78.125 (80.405)	lr 0.01622
Train [49][1460/3239]	Time 0.233 (0.597)	Data Time 0.001 (0.025)	Loss 2.6445 (2.7007)	Entropy 1.17731 (1.17986)	Top-1 acc 58.594 (59.016)	Top-5 acc 82.031 (80.411)	lr 0.01622
Train [49][1470/3239]	Time 0.235 (0.596)	Data Time 0.001 (0.025)	Loss 2.6508 (2.7009)	Entropy 1.17730 (1.17984)	Top-1 acc 59.375 (59.013)	Top-5 acc 81.250 (80.403)	lr 0.01622
Train [49][1480/3239]	Time 0.324 (0.595)	Data Time 0.001 (0.024)	Loss 2.7635 (2.7007)	Entropy 1.17729 (1.17983)	Top-1 acc 57.422 (59.019)	Top-5 acc 78.906 (80.408)	lr 0.01622
Train [49][1490/3239]	Time 0.219 (0.594)	Data Time 0.001 (0.024)	Loss 2.8069 (2.7007)	Entropy 1.17729 (1.17981)	Top-1 acc 55.469 (59.012)	Top-5 acc 79.688 (80.404)	lr 0.01622
Train [49][1500/3239]	Time 0.223 (0.593)	Data Time 0.002 (0.024)	Loss 2.7633 (2.7010)	Entropy 1.17730 (1.17979)	Top-1 acc 57.812 (59.013)	Top-5 acc 79.688 (80.395)	lr 0.01622
Train [49][1510/3239]	Time 0.231 (0.592)	Data Time 0.001 (0.024)	Loss 2.5963 (2.7013)	Entropy 1.17733 (1.17978)	Top-1 acc 62.500 (59.009)	Top-5 acc 80.859 (80.386)	lr 0.01622
Train [49][1520/3239]	Time 0.219 (0.591)	Data Time 0.002 (0.024)	Loss 2.6372 (2.7016)	Entropy 1.17757 (1.17976)	Top-1 acc 63.672 (59.006)	Top-5 acc 80.469 (80.378)	lr 0.01622
Train [49][1530/3239]	Time 0.297 (0.591)	Data Time 0.002 (0.024)	Loss 2.5391 (2.7018)	Entropy 1.17756 (1.17975)	Top-1 acc 63.281 (59.008)	Top-5 acc 82.812 (80.372)	lr 0.01622
Train [49][1540/3239]	Time 0.227 (0.590)	Data Time 0.001 (0.024)	Loss 2.7043 (2.7018)	Entropy 1.17750 (1.17973)	Top-1 acc 60.547 (59.010)	Top-5 acc 83.594 (80.373)	lr 0.01621
Train [49][1550/3239]	Time 2.469 (0.589)	Data Time 0.001 (0.023)	Loss 2.6679 (2.7021)	Entropy 1.17750 (1.17972)	Top-1 acc 60.938 (58.998)	Top-5 acc 82.031 (80.370)	lr 0.01621
Train [49][1560/3239]	Time 0.265 (0.587)	Data Time 0.001 (0.023)	Loss 2.5503 (2.7019)	Entropy 1.17745 (1.17970)	Top-1 acc 62.500 (59.003)	Top-5 acc 86.328 (80.376)	lr 0.01621
Train [49][1570/3239]	Time 0.234 (0.586)	Data Time 0.002 (0.023)	Loss 2.7185 (2.7019)	Entropy 1.17744 (1.17969)	Top-1 acc 56.250 (59.006)	Top-5 acc 82.031 (80.377)	lr 0.01621
Train [49][1580/3239]	Time 0.320 (0.585)	Data Time 0.001 (0.023)	Loss 2.7138 (2.7021)	Entropy 1.17743 (1.17967)	Top-1 acc 61.328 (58.995)	Top-5 acc 79.297 (80.374)	lr 0.01621
Train [49][1590/3239]	Time 0.210 (0.584)	Data Time 0.001 (0.023)	Loss 2.8931 (2.7020)	Entropy 1.17742 (1.17966)	Top-1 acc 56.250 (58.991)	Top-5 acc 76.172 (80.376)	lr 0.01621
Train [49][1600/3239]	Time 0.238 (0.584)	Data Time 0.001 (0.023)	Loss 2.8732 (2.7022)	Entropy 1.17740 (1.17965)	Top-1 acc 53.516 (58.987)	Top-5 acc 75.391 (80.374)	lr 0.01621
Train [49][1610/3239]	Time 0.224 (0.583)	Data Time 0.001 (0.023)	Loss 2.8967 (2.7021)	Entropy 1.17739 (1.17963)	Top-1 acc 52.734 (58.989)	Top-5 acc 80.078 (80.379)	lr 0.01621
Train [49][1620/3239]	Time 0.220 (0.582)	Data Time 0.001 (0.022)	Loss 2.6421 (2.7023)	Entropy 1.17737 (1.17962)	Top-1 acc 60.547 (58.982)	Top-5 acc 84.375 (80.378)	lr 0.01621
Train [49][1630/3239]	Time 0.312 (0.581)	Data Time 0.001 (0.022)	Loss 2.6956 (2.7022)	Entropy 1.17737 (1.17960)	Top-1 acc 57.812 (58.981)	Top-5 acc 81.641 (80.383)	lr 0.01621
Train [49][1640/3239]	Time 0.216 (0.581)	Data Time 0.001 (0.022)	Loss 2.7244 (2.7023)	Entropy 1.17740 (1.17959)	Top-1 acc 57.812 (58.976)	Top-5 acc 76.953 (80.379)	lr 0.01620
Train [49][1650/3239]	Time 0.237 (0.580)	Data Time 0.001 (0.022)	Loss 2.6452 (2.7025)	Entropy 1.17733 (1.17958)	Top-1 acc 59.766 (58.975)	Top-5 acc 80.859 (80.376)	lr 0.01620
Train [49][1660/3239]	Time 2.435 (0.579)	Data Time 0.001 (0.022)	Loss 2.6317 (2.7026)	Entropy 1.17733 (1.17956)	Top-1 acc 58.594 (58.970)	Top-5 acc 82.422 (80.374)	lr 0.01620
Train [49][1670/3239]	Time 0.238 (0.577)	Data Time 0.001 (0.022)	Loss 2.7788 (2.7028)	Entropy 1.17730 (1.17955)	Top-1 acc 62.109 (58.962)	Top-5 acc 78.906 (80.370)	lr 0.01620
Train [49][1680/3239]	Time 0.354 (0.576)	Data Time 0.002 (0.022)	Loss 2.9454 (2.7034)	Entropy 1.17727 (1.17954)	Top-1 acc 53.906 (58.951)	Top-5 acc 72.266 (80.358)	lr 0.01620
Train [49][1690/3239]	Time 0.266 (0.576)	Data Time 0.001 (0.022)	Loss 2.8533 (2.7034)	Entropy 1.17728 (1.17952)	Top-1 acc 51.562 (58.945)	Top-5 acc 76.953 (80.356)	lr 0.01620
Train [49][1700/3239]	Time 0.231 (0.575)	Data Time 0.001 (0.022)	Loss 2.6942 (2.7032)	Entropy 1.17725 (1.17951)	Top-1 acc 60.938 (58.948)	Top-5 acc 81.641 (80.362)	lr 0.01620
Train [49][1710/3239]	Time 0.217 (0.574)	Data Time 0.001 (0.021)	Loss 2.6704 (2.7034)	Entropy 1.17721 (1.17950)	Top-1 acc 58.594 (58.941)	Top-5 acc 85.547 (80.360)	lr 0.01620
Train [49][1720/3239]	Time 0.223 (0.574)	Data Time 0.002 (0.021)	Loss 2.6443 (2.7034)	Entropy 1.17721 (1.17948)	Top-1 acc 58.594 (58.944)	Top-5 acc 82.422 (80.359)	lr 0.01620
Train [49][1730/3239]	Time 0.266 (0.599)	Data Time 0.003 (0.021)	Loss 2.7421 (2.7036)	Entropy 1.17712 (1.17947)	Top-1 acc 57.812 (58.937)	Top-5 acc 77.344 (80.351)	lr 0.01620
Train [49][1740/3239]	Time 0.217 (0.599)	Data Time 0.002 (0.021)	Loss 2.6793 (2.7034)	Entropy 1.17707 (1.17946)	Top-1 acc 60.547 (58.942)	Top-5 acc 81.250 (80.353)	lr 0.01620
Train [49][1750/3239]	Time 0.249 (0.598)	Data Time 0.001 (0.021)	Loss 3.0010 (2.7038)	Entropy 1.17703 (1.17944)	Top-1 acc 51.172 (58.938)	Top-5 acc 72.656 (80.342)	lr 0.01619
Train [49][1760/3239]	Time 0.244 (0.597)	Data Time 0.002 (0.021)	Loss 2.8980 (2.7040)	Entropy 1.17699 (1.17943)	Top-1 acc 56.641 (58.934)	Top-5 acc 78.125 (80.338)	lr 0.01619
Train [49][1770/3239]	Time 2.599 (0.596)	Data Time 0.001 (0.021)	Loss 2.7705 (2.7039)	Entropy 1.17699 (1.17942)	Top-1 acc 55.859 (58.938)	Top-5 acc 80.078 (80.341)	lr 0.01619
Train [49][1780/3239]	Time 0.251 (0.595)	Data Time 0.001 (0.021)	Loss 2.7900 (2.7037)	Entropy 1.17696 (1.17940)	Top-1 acc 55.469 (58.941)	Top-5 acc 79.688 (80.346)	lr 0.01619
Train [49][1790/3239]	Time 0.231 (0.594)	Data Time 0.001 (0.021)	Loss 2.8233 (2.7040)	Entropy 1.17698 (1.17939)	Top-1 acc 55.859 (58.938)	Top-5 acc 78.125 (80.344)	lr 0.01619
Train [49][1800/3239]	Time 0.224 (0.593)	Data Time 0.001 (0.020)	Loss 2.9900 (2.7044)	Entropy 1.17696 (1.17937)	Top-1 acc 56.250 (58.930)	Top-5 acc 72.266 (80.331)	lr 0.01619
Train [49][1810/3239]	Time 0.235 (0.592)	Data Time 0.001 (0.020)	Loss 2.6389 (2.7040)	Entropy 1.17695 (1.17936)	Top-1 acc 59.375 (58.939)	Top-5 acc 83.203 (80.342)	lr 0.01619
Train [49][1820/3239]	Time 0.330 (0.592)	Data Time 0.001 (0.020)	Loss 2.7472 (2.7039)	Entropy 1.17694 (1.17935)	Top-1 acc 54.688 (58.940)	Top-5 acc 79.297 (80.343)	lr 0.01619
Train [49][1830/3239]	Time 0.216 (0.591)	Data Time 0.001 (0.020)	Loss 2.7940 (2.7039)	Entropy 1.17691 (1.17933)	Top-1 acc 57.422 (58.943)	Top-5 acc 77.344 (80.344)	lr 0.01619
Train [49][1840/3239]	Time 0.226 (0.590)	Data Time 0.001 (0.020)	Loss 2.6526 (2.7039)	Entropy 1.17685 (1.17932)	Top-1 acc 60.547 (58.943)	Top-5 acc 79.297 (80.345)	lr 0.01619
Train [49][1850/3239]	Time 0.227 (0.590)	Data Time 0.001 (0.020)	Loss 2.8721 (2.7041)	Entropy 1.17681 (1.17931)	Top-1 acc 53.125 (58.929)	Top-5 acc 76.953 (80.339)	lr 0.01618
Train [49][1860/3239]	Time 0.226 (0.589)	Data Time 0.001 (0.020)	Loss 3.0221 (2.7042)	Entropy 1.17675 (1.17929)	Top-1 acc 52.734 (58.923)	Top-5 acc 75.391 (80.338)	lr 0.01618
Train [49][1870/3239]	Time 0.214 (0.588)	Data Time 0.001 (0.020)	Loss 2.7844 (2.7044)	Entropy 1.17676 (1.17928)	Top-1 acc 55.859 (58.922)	Top-5 acc 80.469 (80.335)	lr 0.01618
Train [49][1880/3239]	Time 2.513 (0.588)	Data Time 0.002 (0.020)	Loss 2.6826 (2.7045)	Entropy 1.17676 (1.17927)	Top-1 acc 55.859 (58.918)	Top-5 acc 80.859 (80.335)	lr 0.01618
Train [49][1890/3239]	Time 0.212 (0.586)	Data Time 0.001 (0.020)	Loss 2.5566 (2.7047)	Entropy 1.17667 (1.17925)	Top-1 acc 60.938 (58.907)	Top-5 acc 83.984 (80.329)	lr 0.01618
Train [49][1900/3239]	Time 0.164 (0.585)	Data Time 0.001 (0.019)	Loss 2.5818 (2.7049)	Entropy 1.17660 (1.17924)	Top-1 acc 62.109 (58.905)	Top-5 acc 82.812 (80.328)	lr 0.01618
Train [49][1910/3239]	Time 0.233 (0.585)	Data Time 0.001 (0.019)	Loss 2.7565 (2.7051)	Entropy 1.17662 (1.17923)	Top-1 acc 58.984 (58.896)	Top-5 acc 80.469 (80.323)	lr 0.01618
Train [49][1920/3239]	Time 0.225 (0.584)	Data Time 0.001 (0.019)	Loss 2.9004 (2.7054)	Entropy 1.17657 (1.17921)	Top-1 acc 51.953 (58.891)	Top-5 acc 75.781 (80.317)	lr 0.01618
Train [49][1930/3239]	Time 0.220 (0.583)	Data Time 0.001 (0.019)	Loss 2.5935 (2.7054)	Entropy 1.17652 (1.17920)	Top-1 acc 63.281 (58.889)	Top-5 acc 81.641 (80.316)	lr 0.01618
Train [49][1940/3239]	Time 0.213 (0.583)	Data Time 0.001 (0.019)	Loss 2.6600 (2.7056)	Entropy 1.17653 (1.17918)	Top-1 acc 59.766 (58.887)	Top-5 acc 80.469 (80.308)	lr 0.01618
Train [49][1950/3239]	Time 0.212 (0.582)	Data Time 0.001 (0.019)	Loss 2.7388 (2.7055)	Entropy 1.17643 (1.17917)	Top-1 acc 60.547 (58.886)	Top-5 acc 79.297 (80.309)	lr 0.01617
Train [49][1960/3239]	Time 0.211 (0.581)	Data Time 0.001 (0.019)	Loss 2.8458 (2.7057)	Entropy 1.17640 (1.17916)	Top-1 acc 54.297 (58.885)	Top-5 acc 77.344 (80.303)	lr 0.01617
Train [49][1970/3239]	Time 0.220 (0.581)	Data Time 0.001 (0.019)	Loss 2.7152 (2.7058)	Entropy 1.17624 (1.17914)	Top-1 acc 59.375 (58.880)	Top-5 acc 80.078 (80.301)	lr 0.01617
Train [49][1980/3239]	Time 0.218 (0.580)	Data Time 0.001 (0.019)	Loss 2.7906 (2.7061)	Entropy 1.17624 (1.17913)	Top-1 acc 55.859 (58.874)	Top-5 acc 78.906 (80.295)	lr 0.01617
Train [49][1990/3239]	Time 2.488 (0.580)	Data Time 0.001 (0.019)	Loss 2.8777 (2.7065)	Entropy 1.17624 (1.17911)	Top-1 acc 55.078 (58.865)	Top-5 acc 75.000 (80.282)	lr 0.01617
Train [49][2000/3239]	Time 0.259 (0.578)	Data Time 0.002 (0.019)	Loss 2.7346 (2.7066)	Entropy 1.17621 (1.17910)	Top-1 acc 56.641 (58.860)	Top-5 acc 80.078 (80.278)	lr 0.01617
Train [49][2010/3239]	Time 0.259 (0.577)	Data Time 0.001 (0.018)	Loss 2.6801 (2.7066)	Entropy 1.17620 (1.17908)	Top-1 acc 60.547 (58.866)	Top-5 acc 82.031 (80.280)	lr 0.01617
Train [49][2020/3239]	Time 0.219 (0.577)	Data Time 0.001 (0.018)	Loss 2.5116 (2.7064)	Entropy 1.17619 (1.17907)	Top-1 acc 61.328 (58.872)	Top-5 acc 82.422 (80.282)	lr 0.01617
Train [49][2030/3239]	Time 0.211 (0.576)	Data Time 0.001 (0.018)	Loss 2.6508 (2.7065)	Entropy 1.17617 (1.17906)	Top-1 acc 58.203 (58.865)	Top-5 acc 80.859 (80.282)	lr 0.01617
Train [49][2040/3239]	Time 0.210 (0.575)	Data Time 0.001 (0.018)	Loss 2.8462 (2.7067)	Entropy 1.17614 (1.17904)	Top-1 acc 56.250 (58.862)	Top-5 acc 80.078 (80.282)	lr 0.01617
Train [49][2050/3239]	Time 0.218 (0.575)	Data Time 0.001 (0.018)	Loss 2.6878 (2.7065)	Entropy 1.17614 (1.17903)	Top-1 acc 56.641 (58.866)	Top-5 acc 82.422 (80.287)	lr 0.01617
Train [49][2060/3239]	Time 0.223 (0.574)	Data Time 0.001 (0.018)	Loss 2.6313 (2.7064)	Entropy 1.17610 (1.17901)	Top-1 acc 60.156 (58.868)	Top-5 acc 79.688 (80.292)	lr 0.01616
Train [49][2070/3239]	Time 0.233 (0.574)	Data Time 0.001 (0.018)	Loss 2.5702 (2.7064)	Entropy 1.17606 (1.17900)	Top-1 acc 59.766 (58.867)	Top-5 acc 83.203 (80.289)	lr 0.01616
Train [49][2080/3239]	Time 0.226 (0.573)	Data Time 0.001 (0.018)	Loss 2.6127 (2.7065)	Entropy 1.17597 (1.17899)	Top-1 acc 63.281 (58.865)	Top-5 acc 82.031 (80.287)	lr 0.01616
Train [49][2090/3239]	Time 0.296 (0.595)	Data Time 0.004 (0.018)	Loss 2.8418 (2.7066)	Entropy 1.17596 (1.17897)	Top-1 acc 57.031 (58.862)	Top-5 acc 76.953 (80.284)	lr 0.01616
Train [49][2100/3239]	Time 2.591 (0.595)	Data Time 0.002 (0.018)	Loss 2.5736 (2.7067)	Entropy 1.17596 (1.17896)	Top-1 acc 59.766 (58.862)	Top-5 acc 82.422 (80.282)	lr 0.01616
Train [49][2110/3239]	Time 0.233 (0.593)	Data Time 0.002 (0.018)	Loss 2.6491 (2.7066)	Entropy 1.17595 (1.17894)	Top-1 acc 59.375 (58.866)	Top-5 acc 81.641 (80.286)	lr 0.01616
Train [49][2120/3239]	Time 0.198 (0.593)	Data Time 0.001 (0.018)	Loss 2.7479 (2.7065)	Entropy 1.17596 (1.17893)	Top-1 acc 57.031 (58.869)	Top-5 acc 78.516 (80.288)	lr 0.01616
Train [49][2130/3239]	Time 0.237 (0.592)	Data Time 0.001 (0.018)	Loss 2.7817 (2.7068)	Entropy 1.17593 (1.17891)	Top-1 acc 57.031 (58.864)	Top-5 acc 80.078 (80.282)	lr 0.01616
Train [49][2140/3239]	Time 0.229 (0.591)	Data Time 0.001 (0.017)	Loss 2.6991 (2.7067)	Entropy 1.17587 (1.17890)	Top-1 acc 57.422 (58.867)	Top-5 acc 80.078 (80.284)	lr 0.01616
Train [49][2150/3239]	Time 0.312 (0.591)	Data Time 0.002 (0.017)	Loss 2.6655 (2.7067)	Entropy 1.17586 (1.17889)	Top-1 acc 60.938 (58.867)	Top-5 acc 78.516 (80.284)	lr 0.01616
Train [49][2160/3239]	Time 0.230 (0.590)	Data Time 0.001 (0.017)	Loss 2.7452 (2.7067)	Entropy 1.17584 (1.17887)	Top-1 acc 60.547 (58.865)	Top-5 acc 78.125 (80.285)	lr 0.01615
Train [49][2170/3239]	Time 0.229 (0.589)	Data Time 0.001 (0.017)	Loss 2.8475 (2.7069)	Entropy 1.17583 (1.17886)	Top-1 acc 53.906 (58.858)	Top-5 acc 75.781 (80.279)	lr 0.01615
Train [49][2180/3239]	Time 0.240 (0.589)	Data Time 0.001 (0.017)	Loss 2.7167 (2.7072)	Entropy 1.17584 (1.17884)	Top-1 acc 59.375 (58.854)	Top-5 acc 82.422 (80.273)	lr 0.01615
Train [49][2190/3239]	Time 0.209 (0.588)	Data Time 0.001 (0.017)	Loss 2.7752 (2.7074)	Entropy 1.17581 (1.17883)	Top-1 acc 58.594 (58.849)	Top-5 acc 81.641 (80.267)	lr 0.01615
Train [49][2200/3239]	Time 0.225 (0.588)	Data Time 0.001 (0.017)	Loss 2.6979 (2.7073)	Entropy 1.17582 (1.17882)	Top-1 acc 58.203 (58.854)	Top-5 acc 79.688 (80.268)	lr 0.01615
Train [49][2210/3239]	Time 2.583 (0.587)	Data Time 0.002 (0.017)	Loss 2.7860 (2.7073)	Entropy 1.17582 (1.17880)	Top-1 acc 55.859 (58.858)	Top-5 acc 79.688 (80.267)	lr 0.01615
Train [49][2220/3239]	Time 0.244 (0.586)	Data Time 0.001 (0.017)	Loss 2.6953 (2.7074)	Entropy 1.17578 (1.17879)	Top-1 acc 60.938 (58.857)	Top-5 acc 78.906 (80.263)	lr 0.01615
Train [49][2230/3239]	Time 0.221 (0.585)	Data Time 0.001 (0.017)	Loss 2.6761 (2.7078)	Entropy 1.17574 (1.17878)	Top-1 acc 60.547 (58.847)	Top-5 acc 81.641 (80.257)	lr 0.01615
Train [49][2240/3239]	Time 0.229 (0.585)	Data Time 0.001 (0.017)	Loss 2.6770 (2.7077)	Entropy 1.17568 (1.17876)	Top-1 acc 61.328 (58.849)	Top-5 acc 81.641 (80.261)	lr 0.01615
Train [49][2250/3239]	Time 0.302 (0.584)	Data Time 0.001 (0.017)	Loss 2.6227 (2.7081)	Entropy 1.17565 (1.17875)	Top-1 acc 58.594 (58.836)	Top-5 acc 83.984 (80.258)	lr 0.01615
Train [49][2260/3239]	Time 0.225 (0.583)	Data Time 0.001 (0.017)	Loss 2.7988 (2.7083)	Entropy 1.17562 (1.17873)	Top-1 acc 52.344 (58.830)	Top-5 acc 79.688 (80.254)	lr 0.01614
Train [49][2270/3239]	Time 0.206 (0.583)	Data Time 0.001 (0.017)	Loss 2.6507 (2.7082)	Entropy 1.17561 (1.17872)	Top-1 acc 58.984 (58.833)	Top-5 acc 79.297 (80.256)	lr 0.01614
Train [49][2280/3239]	Time 0.230 (0.582)	Data Time 0.005 (0.016)	Loss 2.9530 (2.7081)	Entropy 1.17560 (1.17871)	Top-1 acc 55.078 (58.834)	Top-5 acc 75.000 (80.257)	lr 0.01614
Train [49][2290/3239]	Time 0.228 (0.582)	Data Time 0.001 (0.016)	Loss 2.7263 (2.7081)	Entropy 1.17553 (1.17869)	Top-1 acc 57.812 (58.837)	Top-5 acc 79.297 (80.258)	lr 0.01614
Train [49][2300/3239]	Time 0.211 (0.581)	Data Time 0.001 (0.016)	Loss 2.7203 (2.7083)	Entropy 1.17550 (1.17868)	Top-1 acc 58.984 (58.834)	Top-5 acc 80.078 (80.254)	lr 0.01614
Train [49][2310/3239]	Time 0.215 (0.581)	Data Time 0.001 (0.016)	Loss 2.7083 (2.7083)	Entropy 1.17549 (1.17867)	Top-1 acc 61.328 (58.835)	Top-5 acc 81.641 (80.254)	lr 0.01614
Train [49][2320/3239]	Time 2.487 (0.580)	Data Time 0.001 (0.016)	Loss 2.6386 (2.7083)	Entropy 1.17549 (1.17865)	Top-1 acc 62.109 (58.834)	Top-5 acc 80.078 (80.256)	lr 0.01614
Train [49][2330/3239]	Time 0.220 (0.579)	Data Time 0.001 (0.016)	Loss 2.5630 (2.7082)	Entropy 1.17547 (1.17864)	Top-1 acc 63.672 (58.834)	Top-5 acc 83.594 (80.256)	lr 0.01614
Train [49][2340/3239]	Time 0.233 (0.578)	Data Time 0.001 (0.016)	Loss 2.6662 (2.7082)	Entropy 1.17545 (1.17862)	Top-1 acc 58.594 (58.835)	Top-5 acc 81.250 (80.260)	lr 0.01614
Train [49][2350/3239]	Time 0.321 (0.578)	Data Time 0.001 (0.016)	Loss 2.6384 (2.7082)	Entropy 1.17543 (1.17861)	Top-1 acc 59.375 (58.833)	Top-5 acc 83.203 (80.260)	lr 0.01614
Train [49][2360/3239]	Time 0.228 (0.577)	Data Time 0.001 (0.016)	Loss 2.7834 (2.7081)	Entropy 1.17541 (1.17860)	Top-1 acc 58.984 (58.832)	Top-5 acc 79.297 (80.261)	lr 0.01614
Train [49][2370/3239]	Time 0.137 (0.576)	Data Time 0.001 (0.016)	Loss 2.6591 (2.7085)	Entropy 1.17539 (1.17858)	Top-1 acc 56.641 (58.824)	Top-5 acc 79.688 (80.254)	lr 0.01613
Train [49][2380/3239]	Time 0.229 (0.576)	Data Time 0.001 (0.016)	Loss 3.0359 (2.7086)	Entropy 1.17532 (1.17857)	Top-1 acc 51.562 (58.822)	Top-5 acc 71.484 (80.246)	lr 0.01613
Train [49][2390/3239]	Time 0.247 (0.576)	Data Time 0.001 (0.016)	Loss 2.7845 (2.7087)	Entropy 1.17530 (1.17856)	Top-1 acc 59.766 (58.823)	Top-5 acc 78.125 (80.243)	lr 0.01613
Train [49][2400/3239]	Time 0.316 (0.575)	Data Time 0.001 (0.016)	Loss 2.7515 (2.7089)	Entropy 1.17528 (1.17854)	Top-1 acc 57.422 (58.822)	Top-5 acc 80.859 (80.240)	lr 0.01613
Train [49][2410/3239]	Time 0.226 (0.575)	Data Time 0.001 (0.016)	Loss 2.6848 (2.7089)	Entropy 1.17524 (1.17853)	Top-1 acc 59.766 (58.822)	Top-5 acc 83.203 (80.239)	lr 0.01613
Train [49][2420/3239]	Time 0.245 (0.574)	Data Time 0.001 (0.016)	Loss 2.5885 (2.7089)	Entropy 1.17519 (1.17852)	Top-1 acc 60.938 (58.820)	Top-5 acc 81.641 (80.237)	lr 0.01613
Train [49][2430/3239]	Time 2.446 (0.574)	Data Time 0.001 (0.016)	Loss 2.8575 (2.7089)	Entropy 1.17519 (1.17850)	Top-1 acc 55.078 (58.821)	Top-5 acc 79.688 (80.237)	lr 0.01613
Train [49][2440/3239]	Time 0.227 (0.572)	Data Time 0.001 (0.016)	Loss 2.7192 (2.7089)	Entropy 1.17514 (1.17849)	Top-1 acc 56.250 (58.824)	Top-5 acc 81.250 (80.242)	lr 0.01613
Train [49][2450/3239]	Time 0.345 (0.572)	Data Time 0.001 (0.015)	Loss 2.6198 (2.7088)	Entropy 1.17513 (1.17848)	Top-1 acc 60.156 (58.821)	Top-5 acc 80.469 (80.242)	lr 0.01613
Train [49][2460/3239]	Time 0.241 (0.592)	Data Time 0.002 (0.015)	Loss 2.7961 (2.7087)	Entropy 1.17511 (1.17846)	Top-1 acc 53.906 (58.823)	Top-5 acc 78.125 (80.245)	lr 0.01613
Train [49][2470/3239]	Time 0.220 (0.591)	Data Time 0.002 (0.015)	Loss 2.6011 (2.7085)	Entropy 1.17508 (1.17845)	Top-1 acc 64.844 (58.829)	Top-5 acc 80.859 (80.247)	lr 0.01612
Train [49][2480/3239]	Time 0.231 (0.591)	Data Time 0.001 (0.015)	Loss 2.5086 (2.7086)	Entropy 1.17507 (1.17843)	Top-1 acc 63.281 (58.829)	Top-5 acc 83.594 (80.247)	lr 0.01612
Train [49][2490/3239]	Time 0.209 (0.590)	Data Time 0.001 (0.015)	Loss 2.7927 (2.7088)	Entropy 1.17504 (1.17842)	Top-1 acc 57.422 (58.821)	Top-5 acc 78.125 (80.243)	lr 0.01612
Train [49][2500/3239]	Time 0.200 (0.590)	Data Time 0.001 (0.015)	Loss 2.8305 (2.7088)	Entropy 1.17501 (1.17841)	Top-1 acc 59.375 (58.821)	Top-5 acc 80.469 (80.242)	lr 0.01612
Train [49][2510/3239]	Time 0.226 (0.589)	Data Time 0.001 (0.015)	Loss 2.7650 (2.7088)	Entropy 1.17501 (1.17839)	Top-1 acc 57.422 (58.817)	Top-5 acc 77.734 (80.241)	lr 0.01612
Train [49][2520/3239]	Time 0.242 (0.589)	Data Time 0.001 (0.015)	Loss 2.7068 (2.7091)	Entropy 1.17492 (1.17838)	Top-1 acc 59.766 (58.813)	Top-5 acc 80.469 (80.236)	lr 0.01612
Train [49][2530/3239]	Time 0.265 (0.588)	Data Time 0.001 (0.015)	Loss 2.4447 (2.7090)	Entropy 1.17492 (1.17837)	Top-1 acc 65.625 (58.814)	Top-5 acc 85.156 (80.236)	lr 0.01612
Train [49][2540/3239]	Time 2.617 (0.588)	Data Time 0.002 (0.015)	Loss 2.7921 (2.7092)	Entropy 1.17492 (1.17835)	Top-1 acc 56.250 (58.809)	Top-5 acc 76.953 (80.232)	lr 0.01612
Train [49][2550/3239]	Time 0.228 (0.587)	Data Time 0.001 (0.015)	Loss 2.8325 (2.7095)	Entropy 1.17485 (1.17834)	Top-1 acc 56.250 (58.800)	Top-5 acc 77.344 (80.224)	lr 0.01612
Train [49][2560/3239]	Time 0.230 (0.586)	Data Time 0.001 (0.015)	Loss 2.6994 (2.7095)	Entropy 1.17483 (1.17833)	Top-1 acc 58.594 (58.800)	Top-5 acc 79.297 (80.227)	lr 0.01612
Train [49][2570/3239]	Time 0.223 (0.585)	Data Time 0.001 (0.015)	Loss 2.4969 (2.7095)	Entropy 1.17480 (1.17831)	Top-1 acc 62.109 (58.799)	Top-5 acc 85.156 (80.228)	lr 0.01612
Train [49][2580/3239]	Time 0.219 (0.585)	Data Time 0.002 (0.015)	Loss 2.6754 (2.7096)	Entropy 1.17478 (1.17830)	Top-1 acc 57.031 (58.795)	Top-5 acc 80.469 (80.229)	lr 0.01611
Train [49][2590/3239]	Time 0.340 (0.585)	Data Time 0.001 (0.015)	Loss 2.5730 (2.7097)	Entropy 1.17476 (1.17828)	Top-1 acc 62.109 (58.796)	Top-5 acc 83.984 (80.223)	lr 0.01611
Train [49][2600/3239]	Time 0.264 (0.584)	Data Time 0.002 (0.015)	Loss 2.7583 (2.7096)	Entropy 1.17466 (1.17827)	Top-1 acc 56.641 (58.797)	Top-5 acc 78.125 (80.222)	lr 0.01611
Train [49][2610/3239]	Time 0.228 (0.584)	Data Time 0.001 (0.015)	Loss 2.7092 (2.7099)	Entropy 1.17461 (1.17826)	Top-1 acc 56.641 (58.789)	Top-5 acc 78.516 (80.215)	lr 0.01611
Train [49][2620/3239]	Time 0.243 (0.583)	Data Time 0.001 (0.015)	Loss 2.9427 (2.7101)	Entropy 1.17451 (1.17824)	Top-1 acc 54.297 (58.785)	Top-5 acc 75.781 (80.212)	lr 0.01611
Train [49][2630/3239]	Time 0.226 (0.583)	Data Time 0.001 (0.015)	Loss 2.6392 (2.7101)	Entropy 1.17417 (1.17823)	Top-1 acc 60.156 (58.784)	Top-5 acc 80.859 (80.212)	lr 0.01611
Train [49][2640/3239]	Time 0.367 (0.582)	Data Time 0.001 (0.014)	Loss 2.6126 (2.7101)	Entropy 1.17416 (1.17821)	Top-1 acc 58.984 (58.784)	Top-5 acc 80.469 (80.215)	lr 0.01611
Train [49][2650/3239]	Time 0.258 (0.582)	Data Time 0.001 (0.014)	Loss 2.7356 (2.7101)	Entropy 1.17412 (1.17820)	Top-1 acc 58.203 (58.786)	Top-5 acc 78.125 (80.211)	lr 0.01611
Train [49][2660/3239]	Time 0.231 (0.581)	Data Time 0.001 (0.014)	Loss 2.8663 (2.7104)	Entropy 1.17412 (1.17818)	Top-1 acc 58.203 (58.783)	Top-5 acc 76.953 (80.204)	lr 0.01611
Train [49][2670/3239]	Time 0.212 (0.581)	Data Time 0.001 (0.014)	Loss 2.6879 (2.7105)	Entropy 1.17410 (1.17817)	Top-1 acc 61.328 (58.783)	Top-5 acc 77.734 (80.199)	lr 0.01611
Train [49][2680/3239]	Time 0.224 (0.580)	Data Time 0.002 (0.014)	Loss 2.5326 (2.7104)	Entropy 1.17409 (1.17815)	Top-1 acc 63.672 (58.783)	Top-5 acc 82.031 (80.202)	lr 0.01610
Train [49][2690/3239]	Time 0.399 (0.580)	Data Time 0.002 (0.014)	Loss 2.7279 (2.7104)	Entropy 1.17411 (1.17814)	Top-1 acc 55.859 (58.782)	Top-5 acc 79.297 (80.199)	lr 0.01610
Train [49][2700/3239]	Time 0.251 (0.580)	Data Time 0.001 (0.014)	Loss 2.8406 (2.7105)	Entropy 1.17409 (1.17812)	Top-1 acc 55.469 (58.778)	Top-5 acc 78.125 (80.196)	lr 0.01610
Train [49][2710/3239]	Time 0.237 (0.579)	Data Time 0.001 (0.014)	Loss 2.9474 (2.7107)	Entropy 1.17406 (1.17811)	Top-1 acc 56.250 (58.775)	Top-5 acc 74.609 (80.193)	lr 0.01610
Train [49][2720/3239]	Time 0.264 (0.579)	Data Time 0.001 (0.014)	Loss 2.6954 (2.7107)	Entropy 1.17403 (1.17809)	Top-1 acc 56.250 (58.776)	Top-5 acc 80.078 (80.194)	lr 0.01610
Train [49][2730/3239]	Time 0.277 (0.578)	Data Time 0.001 (0.014)	Loss 2.5915 (2.7108)	Entropy 1.17402 (1.17808)	Top-1 acc 58.203 (58.768)	Top-5 acc 81.641 (80.191)	lr 0.01610
Train [49][2740/3239]	Time 0.328 (0.578)	Data Time 0.002 (0.014)	Loss 2.6280 (2.7109)	Entropy 1.17399 (1.17806)	Top-1 acc 62.891 (58.768)	Top-5 acc 80.859 (80.192)	lr 0.01610
Train [49][2750/3239]	Time 0.222 (0.577)	Data Time 0.001 (0.014)	Loss 2.7857 (2.7108)	Entropy 1.17398 (1.17805)	Top-1 acc 55.859 (58.767)	Top-5 acc 78.125 (80.191)	lr 0.01610
Train [49][2760/3239]	Time 0.212 (0.577)	Data Time 0.001 (0.014)	Loss 2.5452 (2.7107)	Entropy 1.17395 (1.17803)	Top-1 acc 63.672 (58.772)	Top-5 acc 83.594 (80.194)	lr 0.01610
Train [49][2770/3239]	Time 0.265 (0.577)	Data Time 0.002 (0.014)	Loss 2.5924 (2.7106)	Entropy 1.17393 (1.17802)	Top-1 acc 61.328 (58.773)	Top-5 acc 79.297 (80.193)	lr 0.01610
Train [49][2780/3239]	Time 0.209 (0.576)	Data Time 0.001 (0.014)	Loss 2.7690 (2.7106)	Entropy 1.17382 (1.17800)	Top-1 acc 58.203 (58.775)	Top-5 acc 77.344 (80.192)	lr 0.01609
Train [49][2790/3239]	Time 0.358 (0.576)	Data Time 0.001 (0.014)	Loss 2.6460 (2.7108)	Entropy 1.17377 (1.17799)	Top-1 acc 58.984 (58.768)	Top-5 acc 82.031 (80.188)	lr 0.01609
Train [49][2800/3239]	Time 0.294 (0.591)	Data Time 0.004 (0.014)	Loss 2.7682 (2.7109)	Entropy 1.17373 (1.17797)	Top-1 acc 57.031 (58.762)	Top-5 acc 78.125 (80.184)	lr 0.01609
Train [49][2810/3239]	Time 0.231 (0.591)	Data Time 0.002 (0.014)	Loss 2.6855 (2.7108)	Entropy 1.17373 (1.17796)	Top-1 acc 57.812 (58.763)	Top-5 acc 82.812 (80.185)	lr 0.01609
Train [49][2820/3239]	Time 0.263 (0.591)	Data Time 0.002 (0.014)	Loss 2.6762 (2.7108)	Entropy 1.17370 (1.17794)	Top-1 acc 61.719 (58.766)	Top-5 acc 79.688 (80.187)	lr 0.01609
Train [49][2830/3239]	Time 0.229 (0.591)	Data Time 0.001 (0.014)	Loss 2.6450 (2.7108)	Entropy 1.17366 (1.17793)	Top-1 acc 63.672 (58.766)	Top-5 acc 81.250 (80.186)	lr 0.01609
Train [49][2840/3239]	Time 0.236 (0.590)	Data Time 0.001 (0.014)	Loss 2.6471 (2.7108)	Entropy 1.17364 (1.17791)	Top-1 acc 58.984 (58.764)	Top-5 acc 83.203 (80.187)	lr 0.01609
Train [49][2850/3239]	Time 0.266 (0.590)	Data Time 0.002 (0.014)	Loss 2.9656 (2.7109)	Entropy 1.17364 (1.17790)	Top-1 acc 52.734 (58.761)	Top-5 acc 76.172 (80.185)	lr 0.01609
Train [49][2860/3239]	Time 0.223 (0.589)	Data Time 0.001 (0.013)	Loss 2.9895 (2.7112)	Entropy 1.17356 (1.17788)	Top-1 acc 54.688 (58.758)	Top-5 acc 71.875 (80.178)	lr 0.01609
Train [49][2870/3239]	Time 0.249 (0.589)	Data Time 0.001 (0.013)	Loss 2.7311 (2.7113)	Entropy 1.17355 (1.17787)	Top-1 acc 60.938 (58.755)	Top-5 acc 78.906 (80.174)	lr 0.01609
Train [49][2880/3239]	Time 0.220 (0.588)	Data Time 0.001 (0.013)	Loss 2.6069 (2.7112)	Entropy 1.17352 (1.17785)	Top-1 acc 58.203 (58.755)	Top-5 acc 81.641 (80.173)	lr 0.01609
Train [49][2890/3239]	Time 0.224 (0.588)	Data Time 0.001 (0.013)	Loss 2.7425 (2.7111)	Entropy 1.17349 (1.17784)	Top-1 acc 54.297 (58.760)	Top-5 acc 80.078 (80.175)	lr 0.01608
Train [49][2900/3239]	Time 0.266 (0.587)	Data Time 0.001 (0.013)	Loss 2.9391 (2.7112)	Entropy 1.17349 (1.17782)	Top-1 acc 54.688 (58.762)	Top-5 acc 74.609 (80.169)	lr 0.01608
Train [49][2910/3239]	Time 0.210 (0.587)	Data Time 0.001 (0.013)	Loss 2.6370 (2.7112)	Entropy 1.17345 (1.17781)	Top-1 acc 59.375 (58.759)	Top-5 acc 82.812 (80.170)	lr 0.01608
Train [49][2920/3239]	Time 0.163 (0.586)	Data Time 0.001 (0.013)	Loss 3.5278 (2.7113)	Entropy 1.17347 (1.17779)	Top-1 acc 45.312 (58.757)	Top-5 acc 66.797 (80.166)	lr 0.01608
Train [49][2930/3239]	Time 0.330 (0.586)	Data Time 0.001 (0.013)	Loss 2.7301 (2.7113)	Entropy 1.17338 (1.17778)	Top-1 acc 56.250 (58.757)	Top-5 acc 77.344 (80.168)	lr 0.01608
Train [49][2940/3239]	Time 0.208 (0.586)	Data Time 0.001 (0.013)	Loss 2.8152 (2.7113)	Entropy 1.17333 (1.17776)	Top-1 acc 55.859 (58.755)	Top-5 acc 76.172 (80.165)	lr 0.01608
Train [49][2950/3239]	Time 0.252 (0.585)	Data Time 0.001 (0.013)	Loss 2.5399 (2.7113)	Entropy 1.17328 (1.17775)	Top-1 acc 62.500 (58.759)	Top-5 acc 84.375 (80.165)	lr 0.01608
Train [49][2960/3239]	Time 0.255 (0.585)	Data Time 0.001 (0.013)	Loss 3.0971 (2.7115)	Entropy 1.17310 (1.17773)	Top-1 acc 53.516 (58.756)	Top-5 acc 72.266 (80.161)	lr 0.01608
Train [49][2970/3239]	Time 0.244 (0.584)	Data Time 0.001 (0.013)	Loss 2.4559 (2.7114)	Entropy 1.17308 (1.17772)	Top-1 acc 66.016 (58.761)	Top-5 acc 84.766 (80.164)	lr 0.01608
Train [49][2980/3239]	Time 0.341 (0.584)	Data Time 0.001 (0.013)	Loss 2.7047 (2.7115)	Entropy 1.17306 (1.17770)	Top-1 acc 56.641 (58.759)	Top-5 acc 78.516 (80.161)	lr 0.01608
Train [49][2990/3239]	Time 0.197 (0.584)	Data Time 0.001 (0.013)	Loss 2.6597 (2.7117)	Entropy 1.17305 (1.17769)	Top-1 acc 60.938 (58.754)	Top-5 acc 80.078 (80.156)	lr 0.01607
Train [49][3000/3239]	Time 0.256 (0.583)	Data Time 0.001 (0.013)	Loss 2.9282 (2.7119)	Entropy 1.17305 (1.17767)	Top-1 acc 55.469 (58.750)	Top-5 acc 76.172 (80.151)	lr 0.01607
Train [49][3010/3239]	Time 0.224 (0.583)	Data Time 0.001 (0.013)	Loss 2.7153 (2.7120)	Entropy 1.17302 (1.17766)	Top-1 acc 56.250 (58.748)	Top-5 acc 80.859 (80.149)	lr 0.01607
Train [49][3020/3239]	Time 0.279 (0.582)	Data Time 0.001 (0.013)	Loss 2.7208 (2.7119)	Entropy 1.17297 (1.17764)	Top-1 acc 62.109 (58.754)	Top-5 acc 80.078 (80.152)	lr 0.01607
Train [49][3030/3239]	Time 0.330 (0.582)	Data Time 0.001 (0.013)	Loss 2.5861 (2.7117)	Entropy 1.17297 (1.17763)	Top-1 acc 61.328 (58.759)	Top-5 acc 85.156 (80.155)	lr 0.01607
Train [49][3040/3239]	Time 0.258 (0.582)	Data Time 0.001 (0.013)	Loss 2.8600 (2.7117)	Entropy 1.17292 (1.17761)	Top-1 acc 54.688 (58.762)	Top-5 acc 76.562 (80.153)	lr 0.01607
Train [49][3050/3239]	Time 0.236 (0.581)	Data Time 0.001 (0.013)	Loss 2.8526 (2.7119)	Entropy 1.17289 (1.17759)	Top-1 acc 55.078 (58.757)	Top-5 acc 75.391 (80.143)	lr 0.01607
Train [49][3060/3239]	Time 0.216 (0.581)	Data Time 0.001 (0.013)	Loss 2.8031 (2.7119)	Entropy 1.17285 (1.17758)	Top-1 acc 56.641 (58.756)	Top-5 acc 77.734 (80.145)	lr 0.01607
Train [49][3070/3239]	Time 0.215 (0.580)	Data Time 0.001 (0.013)	Loss 2.6612 (2.7118)	Entropy 1.17289 (1.17756)	Top-1 acc 60.547 (58.756)	Top-5 acc 78.125 (80.143)	lr 0.01607
Train [49][3080/3239]	Time 0.332 (0.580)	Data Time 0.001 (0.013)	Loss 2.7844 (2.7118)	Entropy 1.17285 (1.17755)	Top-1 acc 57.812 (58.757)	Top-5 acc 80.469 (80.143)	lr 0.01607
Train [49][3090/3239]	Time 0.265 (0.579)	Data Time 0.002 (0.013)	Loss 2.8602 (2.7119)	Entropy 1.17277 (1.17753)	Top-1 acc 59.766 (58.757)	Top-5 acc 77.734 (80.142)	lr 0.01606
Train [49][3100/3239]	Time 0.247 (0.579)	Data Time 0.002 (0.013)	Loss 2.6079 (2.7120)	Entropy 1.17274 (1.17752)	Top-1 acc 66.797 (58.756)	Top-5 acc 82.031 (80.142)	lr 0.01606
Train [49][3110/3239]	Time 0.223 (0.579)	Data Time 0.002 (0.013)	Loss 2.8303 (2.7120)	Entropy 1.17271 (1.17750)	Top-1 acc 54.297 (58.752)	Top-5 acc 76.562 (80.139)	lr 0.01606
Train [49][3120/3239]	Time 0.203 (0.578)	Data Time 0.001 (0.013)	Loss 2.8576 (2.7119)	Entropy 1.17274 (1.17749)	Top-1 acc 57.812 (58.752)	Top-5 acc 73.828 (80.140)	lr 0.01606
Train [49][3130/3239]	Time 0.237 (0.593)	Data Time 0.003 (0.012)	Loss 2.8272 (2.7120)	Entropy 1.17273 (1.17747)	Top-1 acc 53.906 (58.748)	Top-5 acc 75.000 (80.136)	lr 0.01606
Train [49][3140/3239]	Time 0.245 (0.593)	Data Time 0.002 (0.012)	Loss 2.8480 (2.7121)	Entropy 1.17271 (1.17746)	Top-1 acc 55.859 (58.745)	Top-5 acc 78.125 (80.136)	lr 0.01606
Train [49][3150/3239]	Time 0.247 (0.593)	Data Time 0.002 (0.012)	Loss 2.6712 (2.7121)	Entropy 1.17273 (1.17744)	Top-1 acc 61.719 (58.746)	Top-5 acc 80.078 (80.135)	lr 0.01606
Train [49][3160/3239]	Time 0.272 (0.592)	Data Time 0.002 (0.012)	Loss 2.8795 (2.7123)	Entropy 1.17269 (1.17743)	Top-1 acc 55.078 (58.740)	Top-5 acc 78.125 (80.133)	lr 0.01606
Train [49][3170/3239]	Time 0.306 (0.592)	Data Time 0.001 (0.012)	Loss 2.7335 (2.7122)	Entropy 1.17263 (1.17741)	Top-1 acc 57.812 (58.741)	Top-5 acc 80.078 (80.137)	lr 0.01606
Train [49][3180/3239]	Time 0.224 (0.591)	Data Time 0.000 (0.012)	Loss 2.6875 (2.7123)	Entropy 1.17253 (1.17740)	Top-1 acc 56.250 (58.743)	Top-5 acc 79.297 (80.137)	lr 0.01606
Train [49][3190/3239]	Time 0.227 (0.591)	Data Time 0.000 (0.012)	Loss 2.7788 (2.7123)	Entropy 1.17248 (1.17738)	Top-1 acc 56.641 (58.740)	Top-5 acc 77.734 (80.138)	lr 0.01606
Train [49][3200/3239]	Time 0.213 (0.591)	Data Time 0.000 (0.012)	Loss 2.6762 (2.7123)	Entropy 1.17242 (1.17737)	Top-1 acc 57.422 (58.742)	Top-5 acc 79.688 (80.138)	lr 0.01605
Train [49][3210/3239]	Time 0.223 (0.590)	Data Time 0.000 (0.012)	Loss 2.8744 (2.7122)	Entropy 1.17242 (1.17735)	Top-1 acc 55.078 (58.746)	Top-5 acc 77.734 (80.140)	lr 0.01605
Train [49][3220/3239]	Time 0.330 (0.590)	Data Time 0.000 (0.012)	Loss 2.7292 (2.7122)	Entropy 1.17238 (1.17734)	Top-1 acc 59.766 (58.746)	Top-5 acc 77.734 (80.142)	lr 0.01605
Train [49][3230/3239]	Time 0.223 (0.589)	Data Time 0.000 (0.012)	Loss 2.6882 (2.7120)	Entropy 1.17234 (1.17732)	Top-1 acc 60.156 (58.747)	Top-5 acc 82.031 (80.145)	lr 0.01605
Train [49][3239/3239]	Time 2.245 (0.589)	Data Time 0.000 (0.012)	Loss 2.9304 (2.7120)	Entropy 1.17234 (1.17731)	Top-1 acc 53.086 (58.746)	Top-5 acc 80.247 (80.147)	lr 0.01605
==========Valid [49/120]	loss 1.556	top-1 acc 64.914 (64.914)	top-5 acc 85.291	Train top-1 58.746	top-5 80.147	Entropy 1.17234	Latency-None: 0.000ms	Flops: 548.34M
Train [50][0/3239]	Time 34.082 (34.082)	Data Time 32.386 (32.386)	Loss 2.6573 (2.6573)	Entropy 1.17227 (1.17227)	Top-1 acc 62.891 (62.891)	Top-5 acc 80.859 (80.859)	lr 0.01605
Train [50][10/3239]	Time 2.649 (3.778)	Data Time 0.002 (3.107)	Loss 2.7730 (2.6435)	Entropy 1.17227 (1.17227)	Top-1 acc 59.766 (61.790)	Top-5 acc 79.297 (81.357)	lr 0.01605
Train [50][20/3239]	Time 0.216 (2.089)	Data Time 0.002 (1.628)	Loss 2.5914 (2.6204)	Entropy 1.17219 (1.17223)	Top-1 acc 61.719 (61.347)	Top-5 acc 82.422 (81.715)	lr 0.01605
Train [50][30/3239]	Time 0.247 (1.562)	Data Time 0.002 (1.104)	Loss 2.5324 (2.6344)	Entropy 1.17218 (1.17222)	Top-1 acc 65.234 (60.597)	Top-5 acc 82.422 (81.628)	lr 0.01605
Train [50][40/3239]	Time 0.242 (1.295)	Data Time 0.001 (0.835)	Loss 2.6618 (2.6384)	Entropy 1.17213 (1.17220)	Top-1 acc 62.891 (60.709)	Top-5 acc 80.469 (81.421)	lr 0.01605
Train [50][50/3239]	Time 0.238 (1.133)	Data Time 0.001 (0.671)	Loss 2.5885 (2.6434)	Entropy 1.17211 (1.17218)	Top-1 acc 62.891 (60.731)	Top-5 acc 80.859 (81.319)	lr 0.01605
Train [50][60/3239]	Time 0.244 (1.024)	Data Time 0.001 (0.562)	Loss 2.8094 (2.6437)	Entropy 1.17206 (1.17216)	Top-1 acc 54.688 (60.585)	Top-5 acc 79.688 (81.442)	lr 0.01604
Train [50][70/3239]	Time 0.218 (0.946)	Data Time 0.001 (0.483)	Loss 2.6474 (2.6578)	Entropy 1.17206 (1.17215)	Top-1 acc 59.375 (60.228)	Top-5 acc 80.078 (81.151)	lr 0.01604
Train [50][80/3239]	Time 0.328 (0.887)	Data Time 0.001 (0.423)	Loss 2.7494 (2.6527)	Entropy 1.17206 (1.17214)	Top-1 acc 60.156 (60.417)	Top-5 acc 78.906 (81.274)	lr 0.01604
Train [50][90/3239]	Time 0.220 (0.842)	Data Time 0.002 (0.377)	Loss 2.6042 (2.6582)	Entropy 1.17202 (1.17213)	Top-1 acc 62.500 (60.444)	Top-5 acc 81.250 (81.095)	lr 0.01604
Train [50][100/3239]	Time 0.228 (0.806)	Data Time 0.001 (0.340)	Loss 2.7456 (2.6620)	Entropy 1.17196 (1.17212)	Top-1 acc 60.938 (60.353)	Top-5 acc 78.516 (80.983)	lr 0.01604
Train [50][110/3239]	Time 0.285 (0.775)	Data Time 0.001 (0.309)	Loss 2.7752 (2.6596)	Entropy 1.17197 (1.17210)	Top-1 acc 55.859 (60.445)	Top-5 acc 81.641 (81.056)	lr 0.01604
Train [50][120/3239]	Time 2.522 (0.750)	Data Time 0.002 (0.284)	Loss 2.5997 (2.6583)	Entropy 1.17197 (1.17209)	Top-1 acc 63.672 (60.382)	Top-5 acc 79.297 (81.069)	lr 0.01604
Train [50][130/3239]	Time 0.229 (0.713)	Data Time 0.002 (0.262)	Loss 2.7902 (2.6609)	Entropy 1.17192 (1.17208)	Top-1 acc 57.812 (60.287)	Top-5 acc 78.125 (80.961)	lr 0.01604
Train [50][140/3239]	Time 0.232 (0.695)	Data Time 0.001 (0.244)	Loss 2.6940 (2.6624)	Entropy 1.17190 (1.17207)	Top-1 acc 60.156 (60.275)	Top-5 acc 79.688 (80.918)	lr 0.01604
Train [50][150/3239]	Time 0.233 (0.679)	Data Time 0.001 (0.228)	Loss 2.6741 (2.6664)	Entropy 1.17183 (1.17205)	Top-1 acc 58.984 (60.167)	Top-5 acc 78.906 (80.870)	lr 0.01604
Train [50][160/3239]	Time 0.234 (0.665)	Data Time 0.001 (0.214)	Loss 2.6004 (2.6672)	Entropy 1.17183 (1.17204)	Top-1 acc 58.203 (60.120)	Top-5 acc 82.422 (80.869)	lr 0.01603
Train [50][170/3239]	Time 0.203 (0.654)	Data Time 0.001 (0.201)	Loss 2.7631 (2.6695)	Entropy 1.17179 (1.17202)	Top-1 acc 57.422 (60.049)	Top-5 acc 80.469 (80.816)	lr 0.01603
Train [50][180/3239]	Time 0.226 (0.644)	Data Time 0.001 (0.190)	Loss 2.6343 (2.6698)	Entropy 1.17177 (1.17201)	Top-1 acc 60.938 (60.048)	Top-5 acc 80.469 (80.814)	lr 0.01603
Train [50][190/3239]	Time 0.217 (0.634)	Data Time 0.001 (0.180)	Loss 2.5986 (2.6696)	Entropy 1.17168 (1.17200)	Top-1 acc 58.594 (60.001)	Top-5 acc 85.547 (80.823)	lr 0.01603
Train [50][200/3239]	Time 0.208 (0.626)	Data Time 0.001 (0.172)	Loss 2.7761 (2.6733)	Entropy 1.17166 (1.17198)	Top-1 acc 58.203 (59.911)	Top-5 acc 82.422 (80.764)	lr 0.01603
Train [50][210/3239]	Time 0.204 (0.617)	Data Time 0.001 (0.163)	Loss 2.6927 (2.6743)	Entropy 1.17163 (1.17196)	Top-1 acc 61.328 (59.882)	Top-5 acc 81.641 (80.750)	lr 0.01603
Train [50][220/3239]	Time 0.224 (0.610)	Data Time 0.002 (0.156)	Loss 2.6523 (2.6756)	Entropy 1.17158 (1.17195)	Top-1 acc 61.328 (59.850)	Top-5 acc 82.812 (80.743)	lr 0.01603
Train [50][230/3239]	Time 2.447 (0.604)	Data Time 0.002 (0.149)	Loss 2.7750 (2.6779)	Entropy 1.17158 (1.17193)	Top-1 acc 57.812 (59.767)	Top-5 acc 79.297 (80.699)	lr 0.01603
Train [50][240/3239]	Time 0.209 (0.589)	Data Time 0.001 (0.143)	Loss 2.7675 (2.6760)	Entropy 1.17160 (1.17192)	Top-1 acc 59.766 (59.795)	Top-5 acc 78.516 (80.762)	lr 0.01603
Train [50][250/3239]	Time 0.303 (0.774)	Data Time 0.002 (0.138)	Loss 2.7289 (2.6752)	Entropy 1.17158 (1.17191)	Top-1 acc 57.812 (59.789)	Top-5 acc 79.297 (80.780)	lr 0.01603
Train [50][260/3239]	Time 0.245 (0.764)	Data Time 0.002 (0.133)	Loss 2.7528 (2.6753)	Entropy 1.17156 (1.17189)	Top-1 acc 56.250 (59.787)	Top-5 acc 80.078 (80.795)	lr 0.01603
Train [50][270/3239]	Time 0.213 (0.753)	Data Time 0.001 (0.128)	Loss 2.7532 (2.6759)	Entropy 1.17148 (1.17188)	Top-1 acc 58.203 (59.774)	Top-5 acc 81.641 (80.813)	lr 0.01602
Train [50][280/3239]	Time 0.247 (0.743)	Data Time 0.001 (0.123)	Loss 2.8287 (2.6779)	Entropy 1.17147 (1.17186)	Top-1 acc 55.859 (59.768)	Top-5 acc 78.906 (80.761)	lr 0.01602
Train [50][290/3239]	Time 0.229 (0.734)	Data Time 0.002 (0.119)	Loss 2.5848 (2.6790)	Entropy 1.17149 (1.17185)	Top-1 acc 62.109 (59.737)	Top-5 acc 82.812 (80.753)	lr 0.01602
Train [50][300/3239]	Time 0.232 (0.725)	Data Time 0.002 (0.115)	Loss 2.5565 (2.6780)	Entropy 1.17146 (1.17184)	Top-1 acc 60.547 (59.741)	Top-5 acc 84.766 (80.787)	lr 0.01602
Train [50][310/3239]	Time 0.229 (0.717)	Data Time 0.001 (0.112)	Loss 2.5978 (2.6785)	Entropy 1.17144 (1.17183)	Top-1 acc 62.109 (59.746)	Top-5 acc 82.422 (80.766)	lr 0.01602
Train [50][320/3239]	Time 0.231 (0.710)	Data Time 0.001 (0.108)	Loss 2.6819 (2.6779)	Entropy 1.17141 (1.17181)	Top-1 acc 61.719 (59.752)	Top-5 acc 80.078 (80.783)	lr 0.01602
Train [50][330/3239]	Time 0.224 (0.702)	Data Time 0.001 (0.105)	Loss 2.8195 (2.6779)	Entropy 1.17140 (1.17180)	Top-1 acc 56.250 (59.744)	Top-5 acc 77.344 (80.786)	lr 0.01602
Train [50][340/3239]	Time 2.524 (0.695)	Data Time 0.002 (0.102)	Loss 2.6297 (2.6784)	Entropy 1.17140 (1.17179)	Top-1 acc 60.156 (59.731)	Top-5 acc 82.812 (80.757)	lr 0.01602
Train [50][350/3239]	Time 0.232 (0.682)	Data Time 0.002 (0.099)	Loss 2.6989 (2.6765)	Entropy 1.17137 (1.17178)	Top-1 acc 58.203 (59.760)	Top-5 acc 81.641 (80.806)	lr 0.01602
Train [50][360/3239]	Time 0.240 (0.676)	Data Time 0.001 (0.096)	Loss 2.4713 (2.6765)	Entropy 1.17138 (1.17177)	Top-1 acc 68.359 (59.756)	Top-5 acc 86.328 (80.806)	lr 0.01602
Train [50][370/3239]	Time 0.231 (0.670)	Data Time 0.001 (0.094)	Loss 2.6525 (2.6762)	Entropy 1.17139 (1.17176)	Top-1 acc 61.328 (59.738)	Top-5 acc 82.031 (80.797)	lr 0.01601
Train [50][380/3239]	Time 0.232 (0.665)	Data Time 0.001 (0.091)	Loss 2.6950 (2.6755)	Entropy 1.17136 (1.17175)	Top-1 acc 57.422 (59.723)	Top-5 acc 80.078 (80.815)	lr 0.01601
Train [50][390/3239]	Time 0.216 (0.660)	Data Time 0.001 (0.089)	Loss 2.8602 (2.6769)	Entropy 1.17133 (1.17174)	Top-1 acc 53.906 (59.673)	Top-5 acc 79.297 (80.801)	lr 0.01601
Train [50][400/3239]	Time 0.229 (0.655)	Data Time 0.001 (0.087)	Loss 2.6566 (2.6772)	Entropy 1.17132 (1.17173)	Top-1 acc 57.812 (59.666)	Top-5 acc 81.250 (80.787)	lr 0.01601
Train [50][410/3239]	Time 0.324 (0.651)	Data Time 0.001 (0.085)	Loss 2.9021 (2.6791)	Entropy 1.17128 (1.17172)	Top-1 acc 53.906 (59.603)	Top-5 acc 74.609 (80.734)	lr 0.01601
Train [50][420/3239]	Time 0.220 (0.646)	Data Time 0.001 (0.083)	Loss 2.8372 (2.6795)	Entropy 1.17128 (1.17171)	Top-1 acc 56.250 (59.589)	Top-5 acc 79.297 (80.727)	lr 0.01601
Train [50][430/3239]	Time 0.238 (0.642)	Data Time 0.002 (0.081)	Loss 2.5763 (2.6799)	Entropy 1.17125 (1.17170)	Top-1 acc 61.719 (59.570)	Top-5 acc 80.859 (80.700)	lr 0.01601
Train [50][440/3239]	Time 0.234 (0.638)	Data Time 0.003 (0.079)	Loss 2.6102 (2.6796)	Entropy 1.17117 (1.17169)	Top-1 acc 60.547 (59.582)	Top-5 acc 82.422 (80.710)	lr 0.01601
Train [50][450/3239]	Time 2.468 (0.634)	Data Time 0.001 (0.077)	Loss 2.5977 (2.6792)	Entropy 1.17117 (1.17167)	Top-1 acc 60.938 (59.595)	Top-5 acc 82.422 (80.695)	lr 0.01601
Train [50][460/3239]	Time 0.317 (0.625)	Data Time 0.001 (0.076)	Loss 2.6170 (2.6788)	Entropy 1.17118 (1.17166)	Top-1 acc 61.719 (59.617)	Top-5 acc 80.859 (80.709)	lr 0.01601
Train [50][470/3239]	Time 0.219 (0.621)	Data Time 0.001 (0.074)	Loss 2.6362 (2.6785)	Entropy 1.17078 (1.17165)	Top-1 acc 55.859 (59.610)	Top-5 acc 80.859 (80.707)	lr 0.01600
Train [50][480/3239]	Time 0.246 (0.618)	Data Time 0.001 (0.073)	Loss 2.6688 (2.6782)	Entropy 1.17076 (1.17163)	Top-1 acc 60.547 (59.637)	Top-5 acc 81.641 (80.704)	lr 0.01600
Train [50][490/3239]	Time 0.249 (0.615)	Data Time 0.001 (0.071)	Loss 2.5477 (2.6782)	Entropy 1.17077 (1.17161)	Top-1 acc 66.797 (59.650)	Top-5 acc 83.594 (80.713)	lr 0.01600
Train [50][500/3239]	Time 0.224 (0.612)	Data Time 0.001 (0.070)	Loss 2.6354 (2.6785)	Entropy 1.17065 (1.17159)	Top-1 acc 59.766 (59.662)	Top-5 acc 82.031 (80.714)	lr 0.01600
Train [50][510/3239]	Time 0.236 (0.609)	Data Time 0.002 (0.068)	Loss 2.7529 (2.6789)	Entropy 1.17062 (1.17157)	Top-1 acc 58.984 (59.649)	Top-5 acc 78.125 (80.717)	lr 0.01600
Train [50][520/3239]	Time 0.228 (0.607)	Data Time 0.001 (0.067)	Loss 2.7319 (2.6781)	Entropy 1.17061 (1.17155)	Top-1 acc 59.375 (59.658)	Top-5 acc 78.516 (80.740)	lr 0.01600
Train [50][530/3239]	Time 0.231 (0.604)	Data Time 0.001 (0.066)	Loss 2.6787 (2.6787)	Entropy 1.17060 (1.17154)	Top-1 acc 58.203 (59.631)	Top-5 acc 81.641 (80.739)	lr 0.01600
Train [50][540/3239]	Time 0.206 (0.602)	Data Time 0.001 (0.065)	Loss 2.6956 (2.6781)	Entropy 1.17061 (1.17152)	Top-1 acc 58.203 (59.655)	Top-5 acc 81.641 (80.737)	lr 0.01600
Train [50][550/3239]	Time 0.246 (0.599)	Data Time 0.001 (0.064)	Loss 2.8180 (2.6784)	Entropy 1.17054 (1.17150)	Top-1 acc 57.031 (59.645)	Top-5 acc 78.906 (80.745)	lr 0.01600
Train [50][560/3239]	Time 2.481 (0.597)	Data Time 0.002 (0.063)	Loss 2.7381 (2.6793)	Entropy 1.17054 (1.17149)	Top-1 acc 53.516 (59.624)	Top-5 acc 80.469 (80.744)	lr 0.01600
Train [50][570/3239]	Time 0.230 (0.590)	Data Time 0.001 (0.061)	Loss 3.5739 (2.6810)	Entropy 1.17049 (1.17147)	Top-1 acc 43.750 (59.585)	Top-5 acc 68.750 (80.706)	lr 0.01599
Train [50][580/3239]	Time 0.222 (0.588)	Data Time 0.002 (0.060)	Loss 2.6474 (2.6814)	Entropy 1.17043 (1.17145)	Top-1 acc 62.891 (59.600)	Top-5 acc 82.422 (80.691)	lr 0.01599
Train [50][590/3239]	Time 0.224 (0.586)	Data Time 0.002 (0.059)	Loss 2.6780 (2.6811)	Entropy 1.17042 (1.17143)	Top-1 acc 61.719 (59.611)	Top-5 acc 78.906 (80.701)	lr 0.01599
Train [50][600/3239]	Time 0.214 (0.584)	Data Time 0.001 (0.059)	Loss 2.7595 (2.6819)	Entropy 1.17041 (1.17142)	Top-1 acc 57.031 (59.597)	Top-5 acc 79.297 (80.694)	lr 0.01599
Train [50][610/3239]	Time 0.394 (0.661)	Data Time 0.003 (0.058)	Loss 2.6082 (2.6813)	Entropy 1.17041 (1.17140)	Top-1 acc 58.984 (59.610)	Top-5 acc 82.812 (80.703)	lr 0.01599
Train [50][620/3239]	Time 0.235 (0.658)	Data Time 0.002 (0.057)	Loss 2.5146 (2.6814)	Entropy 1.17042 (1.17138)	Top-1 acc 62.109 (59.589)	Top-5 acc 87.891 (80.723)	lr 0.01599
Train [50][630/3239]	Time 0.241 (0.655)	Data Time 0.001 (0.056)	Loss 2.7449 (2.6809)	Entropy 1.17039 (1.17137)	Top-1 acc 58.984 (59.600)	Top-5 acc 78.516 (80.739)	lr 0.01599
Train [50][640/3239]	Time 0.230 (0.652)	Data Time 0.001 (0.055)	Loss 2.5885 (2.6811)	Entropy 1.17038 (1.17135)	Top-1 acc 62.109 (59.586)	Top-5 acc 82.422 (80.731)	lr 0.01599
Train [50][650/3239]	Time 0.199 (0.649)	Data Time 0.001 (0.054)	Loss 2.6365 (2.6813)	Entropy 1.17036 (1.17134)	Top-1 acc 60.156 (59.592)	Top-5 acc 82.031 (80.715)	lr 0.01599
Train [50][660/3239]	Time 0.270 (0.646)	Data Time 0.001 (0.053)	Loss 2.8126 (2.6815)	Entropy 1.17037 (1.17132)	Top-1 acc 56.641 (59.588)	Top-5 acc 80.078 (80.715)	lr 0.01599
Train [50][670/3239]	Time 2.402 (0.643)	Data Time 0.001 (0.053)	Loss 2.5567 (2.6809)	Entropy 1.17037 (1.17131)	Top-1 acc 58.203 (59.588)	Top-5 acc 82.422 (80.724)	lr 0.01599
Train [50][680/3239]	Time 0.233 (0.637)	Data Time 0.001 (0.052)	Loss 2.7344 (2.6810)	Entropy 1.17035 (1.17129)	Top-1 acc 60.156 (59.599)	Top-5 acc 78.125 (80.710)	lr 0.01598
Train [50][690/3239]	Time 0.234 (0.635)	Data Time 0.001 (0.051)	Loss 2.6346 (2.6812)	Entropy 1.17028 (1.17128)	Top-1 acc 60.547 (59.591)	Top-5 acc 82.031 (80.702)	lr 0.01598
Train [50][700/3239]	Time 0.236 (0.632)	Data Time 0.001 (0.050)	Loss 2.7782 (2.6815)	Entropy 1.17024 (1.17127)	Top-1 acc 55.469 (59.582)	Top-5 acc 78.516 (80.706)	lr 0.01598
Train [50][710/3239]	Time 0.222 (0.630)	Data Time 0.001 (0.050)	Loss 2.7777 (2.6828)	Entropy 1.17024 (1.17125)	Top-1 acc 54.297 (59.563)	Top-5 acc 79.297 (80.700)	lr 0.01598
Train [50][720/3239]	Time 0.223 (0.628)	Data Time 0.001 (0.049)	Loss 2.6128 (2.6830)	Entropy 1.17019 (1.17124)	Top-1 acc 64.844 (59.566)	Top-5 acc 80.859 (80.687)	lr 0.01598
Train [50][730/3239]	Time 0.260 (0.626)	Data Time 0.001 (0.048)	Loss 2.7502 (2.6836)	Entropy 1.17019 (1.17122)	Top-1 acc 56.641 (59.552)	Top-5 acc 76.562 (80.673)	lr 0.01598
Train [50][740/3239]	Time 0.210 (0.623)	Data Time 0.001 (0.048)	Loss 2.7431 (2.6830)	Entropy 1.17018 (1.17121)	Top-1 acc 58.984 (59.570)	Top-5 acc 78.125 (80.686)	lr 0.01598
Train [50][750/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.047)	Loss 2.6113 (2.6825)	Entropy 1.17016 (1.17119)	Top-1 acc 60.156 (59.579)	Top-5 acc 83.203 (80.700)	lr 0.01598
Train [50][760/3239]	Time 0.264 (0.619)	Data Time 0.001 (0.047)	Loss 2.6986 (2.6825)	Entropy 1.17016 (1.17118)	Top-1 acc 59.766 (59.576)	Top-5 acc 81.641 (80.704)	lr 0.01598
Train [50][770/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.046)	Loss 2.6187 (2.6829)	Entropy 1.17016 (1.17117)	Top-1 acc 61.719 (59.573)	Top-5 acc 82.031 (80.693)	lr 0.01598
Train [50][780/3239]	Time 2.550 (0.615)	Data Time 0.001 (0.046)	Loss 2.8065 (2.6830)	Entropy 1.17016 (1.17115)	Top-1 acc 57.031 (59.577)	Top-5 acc 75.391 (80.690)	lr 0.01597
Train [50][790/3239]	Time 0.236 (0.610)	Data Time 0.001 (0.045)	Loss 2.6417 (2.6835)	Entropy 1.17007 (1.17114)	Top-1 acc 61.719 (59.555)	Top-5 acc 83.984 (80.682)	lr 0.01597
Train [50][800/3239]	Time 0.252 (0.608)	Data Time 0.002 (0.044)	Loss 2.5723 (2.6841)	Entropy 1.17005 (1.17113)	Top-1 acc 61.719 (59.546)	Top-5 acc 82.812 (80.670)	lr 0.01597
Train [50][810/3239]	Time 0.343 (0.606)	Data Time 0.002 (0.044)	Loss 2.5961 (2.6841)	Entropy 1.17003 (1.17111)	Top-1 acc 61.328 (59.543)	Top-5 acc 82.812 (80.675)	lr 0.01597
Train [50][820/3239]	Time 0.221 (0.605)	Data Time 0.001 (0.043)	Loss 2.6552 (2.6836)	Entropy 1.16999 (1.17110)	Top-1 acc 63.281 (59.559)	Top-5 acc 80.469 (80.684)	lr 0.01597
Train [50][830/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.043)	Loss 2.6940 (2.6834)	Entropy 1.16997 (1.17109)	Top-1 acc 57.422 (59.559)	Top-5 acc 80.469 (80.691)	lr 0.01597
Train [50][840/3239]	Time 0.221 (0.601)	Data Time 0.001 (0.042)	Loss 2.6282 (2.6839)	Entropy 1.16993 (1.17107)	Top-1 acc 55.469 (59.530)	Top-5 acc 83.203 (80.694)	lr 0.01597
Train [50][850/3239]	Time 0.224 (0.600)	Data Time 0.001 (0.042)	Loss 2.6436 (2.6840)	Entropy 1.16983 (1.17106)	Top-1 acc 64.062 (59.520)	Top-5 acc 81.250 (80.701)	lr 0.01597
Train [50][860/3239]	Time 0.197 (0.598)	Data Time 0.001 (0.041)	Loss 2.6883 (2.6839)	Entropy 1.16981 (1.17104)	Top-1 acc 62.109 (59.522)	Top-5 acc 81.250 (80.703)	lr 0.01597
Train [50][870/3239]	Time 0.210 (0.596)	Data Time 0.001 (0.041)	Loss 2.6315 (2.6840)	Entropy 1.16981 (1.17103)	Top-1 acc 60.938 (59.519)	Top-5 acc 82.422 (80.696)	lr 0.01597
Train [50][880/3239]	Time 0.251 (0.595)	Data Time 0.001 (0.041)	Loss 2.6678 (2.6840)	Entropy 1.16929 (1.17102)	Top-1 acc 59.766 (59.516)	Top-5 acc 80.469 (80.700)	lr 0.01596
Train [50][890/3239]	Time 2.520 (0.594)	Data Time 0.002 (0.040)	Loss 2.6099 (2.6843)	Entropy 1.16929 (1.17100)	Top-1 acc 61.719 (59.510)	Top-5 acc 81.250 (80.698)	lr 0.01596
Train [50][900/3239]	Time 0.269 (0.590)	Data Time 0.001 (0.040)	Loss 2.6833 (2.6844)	Entropy 1.16927 (1.17098)	Top-1 acc 59.766 (59.508)	Top-5 acc 81.250 (80.702)	lr 0.01596
Train [50][910/3239]	Time 0.327 (0.588)	Data Time 0.001 (0.039)	Loss 2.8253 (2.6842)	Entropy 1.16921 (1.17096)	Top-1 acc 57.031 (59.516)	Top-5 acc 78.125 (80.702)	lr 0.01596
Train [50][920/3239]	Time 0.226 (0.587)	Data Time 0.001 (0.039)	Loss 2.6351 (2.6841)	Entropy 1.16920 (1.17094)	Top-1 acc 58.984 (59.513)	Top-5 acc 80.859 (80.703)	lr 0.01596
Train [50][930/3239]	Time 0.234 (0.585)	Data Time 0.001 (0.038)	Loss 2.6914 (2.6847)	Entropy 1.16921 (1.17092)	Top-1 acc 59.766 (59.494)	Top-5 acc 80.078 (80.692)	lr 0.01596
Train [50][940/3239]	Time 0.276 (0.584)	Data Time 0.002 (0.038)	Loss 2.6768 (2.6844)	Entropy 1.16919 (1.17090)	Top-1 acc 56.641 (59.490)	Top-5 acc 81.641 (80.700)	lr 0.01596
Train [50][950/3239]	Time 0.223 (0.583)	Data Time 0.001 (0.038)	Loss 2.5493 (2.6841)	Entropy 1.16916 (1.17088)	Top-1 acc 60.547 (59.505)	Top-5 acc 80.859 (80.702)	lr 0.01596
Train [50][960/3239]	Time 0.217 (0.581)	Data Time 0.001 (0.037)	Loss 2.7995 (2.6843)	Entropy 1.16910 (1.17087)	Top-1 acc 54.688 (59.492)	Top-5 acc 75.000 (80.692)	lr 0.01596
Train [50][970/3239]	Time 0.233 (0.634)	Data Time 0.002 (0.037)	Loss 2.5455 (2.6839)	Entropy 1.16906 (1.17085)	Top-1 acc 61.719 (59.503)	Top-5 acc 82.422 (80.698)	lr 0.01596
Train [50][980/3239]	Time 0.237 (0.632)	Data Time 0.002 (0.037)	Loss 2.6922 (2.6841)	Entropy 1.16904 (1.17083)	Top-1 acc 61.328 (59.497)	Top-5 acc 80.859 (80.696)	lr 0.01596
Train [50][990/3239]	Time 0.246 (0.630)	Data Time 0.002 (0.036)	Loss 2.8757 (2.6841)	Entropy 1.16904 (1.17081)	Top-1 acc 53.125 (59.499)	Top-5 acc 76.953 (80.691)	lr 0.01595
Train [50][1000/3239]	Time 2.504 (0.629)	Data Time 0.002 (0.036)	Loss 2.7064 (2.6842)	Entropy 1.16904 (1.17079)	Top-1 acc 61.328 (59.506)	Top-5 acc 82.422 (80.685)	lr 0.01595
Train [50][1010/3239]	Time 0.329 (0.625)	Data Time 0.002 (0.036)	Loss 2.7153 (2.6844)	Entropy 1.16902 (1.17078)	Top-1 acc 58.984 (59.507)	Top-5 acc 81.250 (80.683)	lr 0.01595
Train [50][1020/3239]	Time 0.254 (0.623)	Data Time 0.002 (0.035)	Loss 2.7394 (2.6847)	Entropy 1.16900 (1.17076)	Top-1 acc 55.078 (59.503)	Top-5 acc 78.516 (80.678)	lr 0.01595
Train [50][1030/3239]	Time 0.218 (0.622)	Data Time 0.001 (0.035)	Loss 2.6952 (2.6843)	Entropy 1.16899 (1.17074)	Top-1 acc 59.375 (59.502)	Top-5 acc 81.641 (80.680)	lr 0.01595
Train [50][1040/3239]	Time 0.212 (0.620)	Data Time 0.001 (0.035)	Loss 2.7758 (2.6841)	Entropy 1.16899 (1.17073)	Top-1 acc 55.078 (59.505)	Top-5 acc 78.125 (80.683)	lr 0.01595
Train [50][1050/3239]	Time 0.223 (0.618)	Data Time 0.001 (0.034)	Loss 2.6691 (2.6837)	Entropy 1.16892 (1.17071)	Top-1 acc 59.375 (59.520)	Top-5 acc 79.688 (80.684)	lr 0.01595
Train [50][1060/3239]	Time 0.308 (0.617)	Data Time 0.001 (0.034)	Loss 2.6238 (2.6838)	Entropy 1.16889 (1.17069)	Top-1 acc 58.984 (59.513)	Top-5 acc 81.641 (80.684)	lr 0.01595
Train [50][1070/3239]	Time 0.230 (0.615)	Data Time 0.002 (0.034)	Loss 2.6673 (2.6840)	Entropy 1.16886 (1.17067)	Top-1 acc 62.891 (59.513)	Top-5 acc 82.422 (80.683)	lr 0.01595
Train [50][1080/3239]	Time 0.235 (0.614)	Data Time 0.001 (0.033)	Loss 2.6829 (2.6842)	Entropy 1.16881 (1.17066)	Top-1 acc 60.547 (59.513)	Top-5 acc 81.641 (80.679)	lr 0.01595
Train [50][1090/3239]	Time 0.221 (0.613)	Data Time 0.001 (0.033)	Loss 2.5694 (2.6843)	Entropy 1.16878 (1.17064)	Top-1 acc 62.109 (59.508)	Top-5 acc 82.422 (80.675)	lr 0.01594
Train [50][1100/3239]	Time 0.227 (0.611)	Data Time 0.001 (0.033)	Loss 2.4708 (2.6840)	Entropy 1.16884 (1.17062)	Top-1 acc 63.672 (59.520)	Top-5 acc 85.938 (80.681)	lr 0.01594
Train [50][1110/3239]	Time 2.449 (0.610)	Data Time 0.001 (0.032)	Loss 2.6599 (2.6844)	Entropy 1.16884 (1.17061)	Top-1 acc 60.938 (59.513)	Top-5 acc 80.859 (80.675)	lr 0.01594
Train [50][1120/3239]	Time 0.241 (0.607)	Data Time 0.001 (0.032)	Loss 2.8381 (2.6841)	Entropy 1.16881 (1.17059)	Top-1 acc 58.203 (59.527)	Top-5 acc 76.172 (80.679)	lr 0.01594
Train [50][1130/3239]	Time 0.219 (0.606)	Data Time 0.001 (0.032)	Loss 2.5316 (2.6842)	Entropy 1.16871 (1.17057)	Top-1 acc 60.938 (59.528)	Top-5 acc 84.766 (80.683)	lr 0.01594
Train [50][1140/3239]	Time 0.225 (0.605)	Data Time 0.002 (0.032)	Loss 2.7664 (2.6841)	Entropy 1.16870 (1.17056)	Top-1 acc 59.766 (59.534)	Top-5 acc 79.688 (80.685)	lr 0.01594
Train [50][1150/3239]	Time 0.207 (0.603)	Data Time 0.001 (0.031)	Loss 2.6365 (2.6842)	Entropy 1.16868 (1.17054)	Top-1 acc 62.500 (59.535)	Top-5 acc 80.859 (80.684)	lr 0.01594
Train [50][1160/3239]	Time 0.262 (0.602)	Data Time 0.001 (0.031)	Loss 2.6668 (2.6847)	Entropy 1.16864 (1.17053)	Top-1 acc 60.547 (59.521)	Top-5 acc 79.688 (80.676)	lr 0.01594
Train [50][1170/3239]	Time 0.243 (0.601)	Data Time 0.002 (0.031)	Loss 2.6990 (2.6846)	Entropy 1.16860 (1.17051)	Top-1 acc 60.547 (59.509)	Top-5 acc 79.688 (80.673)	lr 0.01594
Train [50][1180/3239]	Time 0.169 (0.600)	Data Time 0.001 (0.031)	Loss 2.8541 (2.6851)	Entropy 1.16859 (1.17049)	Top-1 acc 53.906 (59.498)	Top-5 acc 76.953 (80.661)	lr 0.01594
Train [50][1190/3239]	Time 0.224 (0.599)	Data Time 0.002 (0.030)	Loss 2.7468 (2.6847)	Entropy 1.16839 (1.17048)	Top-1 acc 59.766 (59.513)	Top-5 acc 79.688 (80.667)	lr 0.01593
Train [50][1200/3239]	Time 0.233 (0.598)	Data Time 0.001 (0.030)	Loss 2.7185 (2.6843)	Entropy 1.16838 (1.17046)	Top-1 acc 56.250 (59.516)	Top-5 acc 80.859 (80.673)	lr 0.01593
Train [50][1210/3239]	Time 0.216 (0.597)	Data Time 0.001 (0.030)	Loss 2.7391 (2.6849)	Entropy 1.16833 (1.17044)	Top-1 acc 58.203 (59.490)	Top-5 acc 77.734 (80.666)	lr 0.01593
Train [50][1220/3239]	Time 2.414 (0.595)	Data Time 0.002 (0.030)	Loss 2.5667 (2.6849)	Entropy 1.16833 (1.17043)	Top-1 acc 63.281 (59.489)	Top-5 acc 83.203 (80.661)	lr 0.01593
Train [50][1230/3239]	Time 0.243 (0.593)	Data Time 0.002 (0.030)	Loss 2.5340 (2.6844)	Entropy 1.16832 (1.17041)	Top-1 acc 65.234 (59.506)	Top-5 acc 82.422 (80.666)	lr 0.01593
Train [50][1240/3239]	Time 0.216 (0.592)	Data Time 0.001 (0.029)	Loss 2.7603 (2.6847)	Entropy 1.16830 (1.17039)	Top-1 acc 60.547 (59.504)	Top-5 acc 80.078 (80.670)	lr 0.01593
Train [50][1250/3239]	Time 0.216 (0.591)	Data Time 0.002 (0.029)	Loss 2.7861 (2.6846)	Entropy 1.16828 (1.17037)	Top-1 acc 57.422 (59.509)	Top-5 acc 76.172 (80.668)	lr 0.01593
Train [50][1260/3239]	Time 0.218 (0.590)	Data Time 0.001 (0.029)	Loss 2.7484 (2.6849)	Entropy 1.16828 (1.17036)	Top-1 acc 60.547 (59.506)	Top-5 acc 80.859 (80.665)	lr 0.01593
Train [50][1270/3239]	Time 0.232 (0.589)	Data Time 0.002 (0.029)	Loss 2.6699 (2.6851)	Entropy 1.16828 (1.17034)	Top-1 acc 60.156 (59.511)	Top-5 acc 79.688 (80.661)	lr 0.01593
Train [50][1280/3239]	Time 0.213 (0.588)	Data Time 0.001 (0.028)	Loss 2.7231 (2.6853)	Entropy 1.16824 (1.17032)	Top-1 acc 58.984 (59.512)	Top-5 acc 81.641 (80.655)	lr 0.01593
Train [50][1290/3239]	Time 0.253 (0.587)	Data Time 0.001 (0.028)	Loss 2.6197 (2.6850)	Entropy 1.16825 (1.17031)	Top-1 acc 60.547 (59.521)	Top-5 acc 82.031 (80.664)	lr 0.01593
Train [50][1300/3239]	Time 0.308 (0.586)	Data Time 0.001 (0.028)	Loss 2.6765 (2.6847)	Entropy 1.16822 (1.17029)	Top-1 acc 59.375 (59.522)	Top-5 acc 82.812 (80.670)	lr 0.01592
Train [50][1310/3239]	Time 0.235 (0.585)	Data Time 0.001 (0.028)	Loss 2.6954 (2.6848)	Entropy 1.16823 (1.17028)	Top-1 acc 59.375 (59.521)	Top-5 acc 79.297 (80.665)	lr 0.01592
Train [50][1320/3239]	Time 0.206 (0.585)	Data Time 0.001 (0.028)	Loss 2.7550 (2.6850)	Entropy 1.16823 (1.17026)	Top-1 acc 58.203 (59.526)	Top-5 acc 78.125 (80.662)	lr 0.01592
Train [50][1330/3239]	Time 50.767 (0.620)	Data Time 0.001 (0.027)	Loss 2.5312 (2.6849)	Entropy 1.16823 (1.17025)	Top-1 acc 66.016 (59.529)	Top-5 acc 82.812 (80.661)	lr 0.01592
Train [50][1340/3239]	Time 0.237 (0.617)	Data Time 0.003 (0.027)	Loss 2.7244 (2.6853)	Entropy 1.16816 (1.17023)	Top-1 acc 58.203 (59.504)	Top-5 acc 78.516 (80.652)	lr 0.01592
Train [50][1350/3239]	Time 0.217 (0.616)	Data Time 0.002 (0.027)	Loss 2.7490 (2.6854)	Entropy 1.16813 (1.17022)	Top-1 acc 56.641 (59.495)	Top-5 acc 77.344 (80.653)	lr 0.01592
Train [50][1360/3239]	Time 0.227 (0.615)	Data Time 0.001 (0.027)	Loss 2.7316 (2.6855)	Entropy 1.16813 (1.17020)	Top-1 acc 58.984 (59.492)	Top-5 acc 79.297 (80.651)	lr 0.01592
Train [50][1370/3239]	Time 0.250 (0.614)	Data Time 0.001 (0.027)	Loss 2.5578 (2.6860)	Entropy 1.16811 (1.17018)	Top-1 acc 62.109 (59.484)	Top-5 acc 82.812 (80.640)	lr 0.01592
Train [50][1380/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.027)	Loss 2.6470 (2.6864)	Entropy 1.16812 (1.17017)	Top-1 acc 62.109 (59.471)	Top-5 acc 80.469 (80.632)	lr 0.01592
Train [50][1390/3239]	Time 0.246 (0.612)	Data Time 0.001 (0.026)	Loss 2.7546 (2.6867)	Entropy 1.16803 (1.17015)	Top-1 acc 55.078 (59.462)	Top-5 acc 81.250 (80.624)	lr 0.01592
Train [50][1400/3239]	Time 0.226 (0.611)	Data Time 0.001 (0.026)	Loss 2.6350 (2.6865)	Entropy 1.16801 (1.17014)	Top-1 acc 57.031 (59.469)	Top-5 acc 82.031 (80.629)	lr 0.01591
Train [50][1410/3239]	Time 0.223 (0.610)	Data Time 0.001 (0.026)	Loss 2.5759 (2.6864)	Entropy 1.16793 (1.17012)	Top-1 acc 67.969 (59.477)	Top-5 acc 83.594 (80.628)	lr 0.01591
Train [50][1420/3239]	Time 0.229 (0.609)	Data Time 0.001 (0.026)	Loss 2.9226 (2.6864)	Entropy 1.16791 (1.17011)	Top-1 acc 52.734 (59.480)	Top-5 acc 76.172 (80.628)	lr 0.01591
Train [50][1430/3239]	Time 0.239 (0.608)	Data Time 0.001 (0.026)	Loss 2.6367 (2.6862)	Entropy 1.16790 (1.17009)	Top-1 acc 60.156 (59.481)	Top-5 acc 80.859 (80.631)	lr 0.01591
Train [50][1440/3239]	Time 2.538 (0.607)	Data Time 0.001 (0.025)	Loss 2.8190 (2.6861)	Entropy 1.16790 (1.17008)	Top-1 acc 57.812 (59.484)	Top-5 acc 80.469 (80.630)	lr 0.01591
Train [50][1450/3239]	Time 0.224 (0.604)	Data Time 0.001 (0.025)	Loss 2.6660 (2.6861)	Entropy 1.16789 (1.17006)	Top-1 acc 60.938 (59.481)	Top-5 acc 82.031 (80.631)	lr 0.01591
Train [50][1460/3239]	Time 0.218 (0.603)	Data Time 0.001 (0.025)	Loss 2.7192 (2.6859)	Entropy 1.16785 (1.17005)	Top-1 acc 56.641 (59.486)	Top-5 acc 79.297 (80.634)	lr 0.01591
Train [50][1470/3239]	Time 0.235 (0.603)	Data Time 0.001 (0.025)	Loss 2.5609 (2.6859)	Entropy 1.16777 (1.17003)	Top-1 acc 67.578 (59.492)	Top-5 acc 83.203 (80.638)	lr 0.01591
Train [50][1480/3239]	Time 0.232 (0.602)	Data Time 0.001 (0.025)	Loss 2.5487 (2.6857)	Entropy 1.16771 (1.17002)	Top-1 acc 62.500 (59.500)	Top-5 acc 83.594 (80.644)	lr 0.01591
Train [50][1490/3239]	Time 0.309 (0.601)	Data Time 0.001 (0.025)	Loss 2.7599 (2.6857)	Entropy 1.16762 (1.17000)	Top-1 acc 57.031 (59.497)	Top-5 acc 77.344 (80.647)	lr 0.01591
Train [50][1500/3239]	Time 0.219 (0.600)	Data Time 0.001 (0.025)	Loss 2.6149 (2.6858)	Entropy 1.16757 (1.16999)	Top-1 acc 62.500 (59.497)	Top-5 acc 81.641 (80.650)	lr 0.01590
Train [50][1510/3239]	Time 0.230 (0.599)	Data Time 0.001 (0.024)	Loss 2.6264 (2.6855)	Entropy 1.16758 (1.16997)	Top-1 acc 61.719 (59.500)	Top-5 acc 82.812 (80.655)	lr 0.01590
Train [50][1520/3239]	Time 0.240 (0.598)	Data Time 0.002 (0.024)	Loss 2.7167 (2.6856)	Entropy 1.16757 (1.16995)	Top-1 acc 60.547 (59.498)	Top-5 acc 79.688 (80.652)	lr 0.01590
Train [50][1530/3239]	Time 0.230 (0.597)	Data Time 0.002 (0.024)	Loss 2.6430 (2.6857)	Entropy 1.16752 (1.16994)	Top-1 acc 61.719 (59.490)	Top-5 acc 79.688 (80.654)	lr 0.01590
Train [50][1540/3239]	Time 0.319 (0.596)	Data Time 0.001 (0.024)	Loss 2.8175 (2.6860)	Entropy 1.16747 (1.16992)	Top-1 acc 58.594 (59.481)	Top-5 acc 79.297 (80.650)	lr 0.01590
Train [50][1550/3239]	Time 2.498 (0.596)	Data Time 0.002 (0.024)	Loss 2.8285 (2.6859)	Entropy 1.16747 (1.16991)	Top-1 acc 55.859 (59.484)	Top-5 acc 79.688 (80.651)	lr 0.01590
Train [50][1560/3239]	Time 0.216 (0.593)	Data Time 0.001 (0.024)	Loss 2.6383 (2.6857)	Entropy 1.16746 (1.16989)	Top-1 acc 61.719 (59.485)	Top-5 acc 84.375 (80.661)	lr 0.01590
Train [50][1570/3239]	Time 0.240 (0.593)	Data Time 0.001 (0.024)	Loss 2.5513 (2.6853)	Entropy 1.16742 (1.16988)	Top-1 acc 61.719 (59.491)	Top-5 acc 81.641 (80.660)	lr 0.01590
Train [50][1580/3239]	Time 0.236 (0.592)	Data Time 0.001 (0.023)	Loss 2.7027 (2.6861)	Entropy 1.16738 (1.16986)	Top-1 acc 58.594 (59.462)	Top-5 acc 80.469 (80.642)	lr 0.01590
Train [50][1590/3239]	Time 0.279 (0.591)	Data Time 0.001 (0.023)	Loss 2.8039 (2.6861)	Entropy 1.16736 (1.16984)	Top-1 acc 57.422 (59.463)	Top-5 acc 79.297 (80.641)	lr 0.01590
Train [50][1600/3239]	Time 0.207 (0.590)	Data Time 0.001 (0.023)	Loss 2.6067 (2.6864)	Entropy 1.16735 (1.16983)	Top-1 acc 61.328 (59.455)	Top-5 acc 82.422 (80.636)	lr 0.01589
Train [50][1610/3239]	Time 0.219 (0.590)	Data Time 0.001 (0.023)	Loss 2.7478 (2.6863)	Entropy 1.16735 (1.16981)	Top-1 acc 57.812 (59.446)	Top-5 acc 79.688 (80.639)	lr 0.01589
Train [50][1620/3239]	Time 0.231 (0.589)	Data Time 0.001 (0.023)	Loss 2.8933 (2.6864)	Entropy 1.16731 (1.16980)	Top-1 acc 55.078 (59.441)	Top-5 acc 76.172 (80.638)	lr 0.01589
Train [50][1630/3239]	Time 0.229 (0.588)	Data Time 0.001 (0.023)	Loss 2.6974 (2.6865)	Entropy 1.16727 (1.16978)	Top-1 acc 61.328 (59.443)	Top-5 acc 80.469 (80.636)	lr 0.01589
Train [50][1640/3239]	Time 0.221 (0.587)	Data Time 0.001 (0.023)	Loss 2.6512 (2.6866)	Entropy 1.16720 (1.16977)	Top-1 acc 60.156 (59.441)	Top-5 acc 80.859 (80.633)	lr 0.01589
Train [50][1650/3239]	Time 0.217 (0.587)	Data Time 0.001 (0.022)	Loss 2.7443 (2.6865)	Entropy 1.16713 (1.16975)	Top-1 acc 57.422 (59.435)	Top-5 acc 78.906 (80.636)	lr 0.01589
Train [50][1660/3239]	Time 2.456 (0.586)	Data Time 0.002 (0.022)	Loss 2.6450 (2.6868)	Entropy 1.16713 (1.16974)	Top-1 acc 58.984 (59.422)	Top-5 acc 82.031 (80.639)	lr 0.01589
Train [50][1670/3239]	Time 0.216 (0.584)	Data Time 0.001 (0.022)	Loss 2.7300 (2.6870)	Entropy 1.16709 (1.16972)	Top-1 acc 54.297 (59.415)	Top-5 acc 82.422 (80.636)	lr 0.01589
Train [50][1680/3239]	Time 0.203 (0.583)	Data Time 0.001 (0.022)	Loss 2.5640 (2.6872)	Entropy 1.16709 (1.16970)	Top-1 acc 58.984 (59.404)	Top-5 acc 82.422 (80.634)	lr 0.01589
Train [50][1690/3239]	Time 0.233 (0.582)	Data Time 0.001 (0.022)	Loss 2.5429 (2.6875)	Entropy 1.16710 (1.16969)	Top-1 acc 66.406 (59.401)	Top-5 acc 80.469 (80.626)	lr 0.01589
Train [50][1700/3239]	Time 0.342 (0.609)	Data Time 0.003 (0.022)	Loss 2.5875 (2.6876)	Entropy 1.16711 (1.16967)	Top-1 acc 61.719 (59.394)	Top-5 acc 81.641 (80.624)	lr 0.01589
Train [50][1710/3239]	Time 0.220 (0.608)	Data Time 0.002 (0.022)	Loss 2.7113 (2.6873)	Entropy 1.16711 (1.16966)	Top-1 acc 57.031 (59.397)	Top-5 acc 79.688 (80.629)	lr 0.01588
Train [50][1720/3239]	Time 0.226 (0.608)	Data Time 0.002 (0.022)	Loss 2.8357 (2.6881)	Entropy 1.16710 (1.16964)	Top-1 acc 56.250 (59.380)	Top-5 acc 77.734 (80.616)	lr 0.01588
Train [50][1730/3239]	Time 0.225 (0.607)	Data Time 0.001 (0.022)	Loss 2.7440 (2.6879)	Entropy 1.16707 (1.16963)	Top-1 acc 56.250 (59.383)	Top-5 acc 80.859 (80.621)	lr 0.01588
Train [50][1740/3239]	Time 0.240 (0.606)	Data Time 0.001 (0.021)	Loss 2.8158 (2.6880)	Entropy 1.16705 (1.16961)	Top-1 acc 56.250 (59.383)	Top-5 acc 77.344 (80.618)	lr 0.01588
Train [50][1750/3239]	Time 0.238 (0.605)	Data Time 0.001 (0.021)	Loss 2.6567 (2.6882)	Entropy 1.16702 (1.16960)	Top-1 acc 59.375 (59.375)	Top-5 acc 82.031 (80.610)	lr 0.01588
Train [50][1760/3239]	Time 0.231 (0.604)	Data Time 0.001 (0.021)	Loss 2.8059 (2.6885)	Entropy 1.16701 (1.16958)	Top-1 acc 51.562 (59.368)	Top-5 acc 78.906 (80.608)	lr 0.01588
Train [50][1770/3239]	Time 2.663 (0.604)	Data Time 0.002 (0.021)	Loss 2.6693 (2.6883)	Entropy 1.16701 (1.16957)	Top-1 acc 59.766 (59.377)	Top-5 acc 81.250 (80.608)	lr 0.01588
Train [50][1780/3239]	Time 0.228 (0.602)	Data Time 0.001 (0.021)	Loss 2.6322 (2.6883)	Entropy 1.16712 (1.16956)	Top-1 acc 58.203 (59.373)	Top-5 acc 80.078 (80.608)	lr 0.01588
Train [50][1790/3239]	Time 0.230 (0.601)	Data Time 0.001 (0.021)	Loss 2.7425 (2.6885)	Entropy 1.16705 (1.16954)	Top-1 acc 55.859 (59.374)	Top-5 acc 79.688 (80.603)	lr 0.01588
Train [50][1800/3239]	Time 0.229 (0.600)	Data Time 0.001 (0.021)	Loss 2.9215 (2.6884)	Entropy 1.16705 (1.16953)	Top-1 acc 53.516 (59.370)	Top-5 acc 77.734 (80.603)	lr 0.01588
Train [50][1810/3239]	Time 0.214 (0.600)	Data Time 0.001 (0.021)	Loss 2.8968 (2.6888)	Entropy 1.16702 (1.16951)	Top-1 acc 55.859 (59.363)	Top-5 acc 78.125 (80.594)	lr 0.01587
Train [50][1820/3239]	Time 0.233 (0.599)	Data Time 0.001 (0.021)	Loss 2.5902 (2.6888)	Entropy 1.16768 (1.16950)	Top-1 acc 59.375 (59.365)	Top-5 acc 85.156 (80.597)	lr 0.01587
Train [50][1830/3239]	Time 0.227 (0.598)	Data Time 0.001 (0.020)	Loss 2.7405 (2.6890)	Entropy 1.16766 (1.16949)	Top-1 acc 55.859 (59.358)	Top-5 acc 78.906 (80.593)	lr 0.01587
Train [50][1840/3239]	Time 0.331 (0.597)	Data Time 0.001 (0.020)	Loss 2.7570 (2.6893)	Entropy 1.16764 (1.16948)	Top-1 acc 57.812 (59.355)	Top-5 acc 78.906 (80.588)	lr 0.01587
Train [50][1850/3239]	Time 0.221 (0.597)	Data Time 0.001 (0.020)	Loss 2.5638 (2.6896)	Entropy 1.16762 (1.16947)	Top-1 acc 62.500 (59.347)	Top-5 acc 80.859 (80.575)	lr 0.01587
Train [50][1860/3239]	Time 0.241 (0.596)	Data Time 0.001 (0.020)	Loss 2.5811 (2.6896)	Entropy 1.16765 (1.16946)	Top-1 acc 62.891 (59.350)	Top-5 acc 81.250 (80.578)	lr 0.01587
Train [50][1870/3239]	Time 0.224 (0.595)	Data Time 0.001 (0.020)	Loss 2.8866 (2.6899)	Entropy 1.16762 (1.16945)	Top-1 acc 52.734 (59.340)	Top-5 acc 78.125 (80.570)	lr 0.01587
Train [50][1880/3239]	Time 2.451 (0.594)	Data Time 0.001 (0.020)	Loss 2.6815 (2.6901)	Entropy 1.16762 (1.16944)	Top-1 acc 58.203 (59.336)	Top-5 acc 80.469 (80.569)	lr 0.01587
Train [50][1890/3239]	Time 0.342 (0.593)	Data Time 0.001 (0.020)	Loss 2.8008 (2.6901)	Entropy 1.16757 (1.16943)	Top-1 acc 56.250 (59.336)	Top-5 acc 77.734 (80.565)	lr 0.01587
Train [50][1900/3239]	Time 0.224 (0.592)	Data Time 0.001 (0.020)	Loss 2.7603 (2.6902)	Entropy 1.16751 (1.16942)	Top-1 acc 58.594 (59.331)	Top-5 acc 77.734 (80.567)	lr 0.01587
Train [50][1910/3239]	Time 0.224 (0.591)	Data Time 0.001 (0.020)	Loss 2.7079 (2.6906)	Entropy 1.16747 (1.16941)	Top-1 acc 54.297 (59.319)	Top-5 acc 80.859 (80.560)	lr 0.01586
Train [50][1920/3239]	Time 0.221 (0.591)	Data Time 0.001 (0.020)	Loss 2.6521 (2.6907)	Entropy 1.16742 (1.16940)	Top-1 acc 58.984 (59.320)	Top-5 acc 82.031 (80.559)	lr 0.01586
Train [50][1930/3239]	Time 0.209 (0.590)	Data Time 0.001 (0.019)	Loss 2.8442 (2.6905)	Entropy 1.16742 (1.16939)	Top-1 acc 50.000 (59.322)	Top-5 acc 78.906 (80.565)	lr 0.01586
Train [50][1940/3239]	Time 0.329 (0.590)	Data Time 0.002 (0.019)	Loss 2.4861 (2.6907)	Entropy 1.16730 (1.16938)	Top-1 acc 64.062 (59.318)	Top-5 acc 85.938 (80.561)	lr 0.01586
Train [50][1950/3239]	Time 0.223 (0.589)	Data Time 0.001 (0.019)	Loss 2.7789 (2.6906)	Entropy 1.16730 (1.16937)	Top-1 acc 61.328 (59.323)	Top-5 acc 80.078 (80.562)	lr 0.01586
Train [50][1960/3239]	Time 0.215 (0.588)	Data Time 0.001 (0.019)	Loss 2.6287 (2.6904)	Entropy 1.16733 (1.16936)	Top-1 acc 56.641 (59.325)	Top-5 acc 83.203 (80.567)	lr 0.01586
Train [50][1970/3239]	Time 0.207 (0.587)	Data Time 0.001 (0.019)	Loss 2.6399 (2.6902)	Entropy 1.16728 (1.16935)	Top-1 acc 57.422 (59.329)	Top-5 acc 82.031 (80.573)	lr 0.01586
Train [50][1980/3239]	Time 0.247 (0.587)	Data Time 0.001 (0.019)	Loss 2.6247 (2.6905)	Entropy 1.16729 (1.16934)	Top-1 acc 62.109 (59.325)	Top-5 acc 82.422 (80.562)	lr 0.01586
Train [50][1990/3239]	Time 2.522 (0.586)	Data Time 0.001 (0.019)	Loss 2.7110 (2.6911)	Entropy 1.16729 (1.16933)	Top-1 acc 57.812 (59.307)	Top-5 acc 80.469 (80.554)	lr 0.01586
Train [50][2000/3239]	Time 0.207 (0.584)	Data Time 0.001 (0.019)	Loss 2.7546 (2.6913)	Entropy 1.16728 (1.16932)	Top-1 acc 58.203 (59.301)	Top-5 acc 77.734 (80.551)	lr 0.01586
Train [50][2010/3239]	Time 0.225 (0.584)	Data Time 0.002 (0.019)	Loss 2.9583 (2.6912)	Entropy 1.16724 (1.16931)	Top-1 acc 53.125 (59.304)	Top-5 acc 75.000 (80.551)	lr 0.01586
Train [50][2020/3239]	Time 0.271 (0.583)	Data Time 0.001 (0.019)	Loss 2.6679 (2.6915)	Entropy 1.16716 (1.16930)	Top-1 acc 61.328 (59.301)	Top-5 acc 80.859 (80.546)	lr 0.01585
Train [50][2030/3239]	Time 0.205 (0.583)	Data Time 0.001 (0.019)	Loss 2.7063 (2.6913)	Entropy 1.16714 (1.16929)	Top-1 acc 56.641 (59.302)	Top-5 acc 81.641 (80.547)	lr 0.01585
Train [50][2040/3239]	Time 0.229 (0.582)	Data Time 0.001 (0.018)	Loss 2.7591 (2.6914)	Entropy 1.16711 (1.16928)	Top-1 acc 58.594 (59.304)	Top-5 acc 82.031 (80.544)	lr 0.01585
Train [50][2050/3239]	Time 0.224 (0.582)	Data Time 0.002 (0.018)	Loss 2.6214 (2.6916)	Entropy 1.16711 (1.16927)	Top-1 acc 59.766 (59.303)	Top-5 acc 80.859 (80.536)	lr 0.01585
Train [50][2060/3239]	Time 0.248 (0.605)	Data Time 0.002 (0.018)	Loss 2.6169 (2.6919)	Entropy 1.16699 (1.16926)	Top-1 acc 63.281 (59.299)	Top-5 acc 80.469 (80.527)	lr 0.01585
Train [50][2070/3239]	Time 0.230 (0.604)	Data Time 0.002 (0.018)	Loss 2.8962 (2.6919)	Entropy 1.16697 (1.16925)	Top-1 acc 55.859 (59.300)	Top-5 acc 78.906 (80.529)	lr 0.01585
Train [50][2080/3239]	Time 0.332 (0.604)	Data Time 0.001 (0.018)	Loss 2.6241 (2.6919)	Entropy 1.16693 (1.16923)	Top-1 acc 63.281 (59.301)	Top-5 acc 80.859 (80.530)	lr 0.01585
Train [50][2090/3239]	Time 0.171 (0.603)	Data Time 0.001 (0.018)	Loss 2.8476 (2.6917)	Entropy 1.16691 (1.16922)	Top-1 acc 57.031 (59.303)	Top-5 acc 76.953 (80.534)	lr 0.01585
Train [50][2100/3239]	Time 2.494 (0.603)	Data Time 0.002 (0.018)	Loss 2.6775 (2.6917)	Entropy 1.16691 (1.16921)	Top-1 acc 58.203 (59.300)	Top-5 acc 81.250 (80.534)	lr 0.01585
Train [50][2110/3239]	Time 0.213 (0.601)	Data Time 0.002 (0.018)	Loss 2.8855 (2.6924)	Entropy 1.16689 (1.16920)	Top-1 acc 53.125 (59.287)	Top-5 acc 75.781 (80.522)	lr 0.01585
Train [50][2120/3239]	Time 0.234 (0.600)	Data Time 0.002 (0.018)	Loss 2.8788 (2.6925)	Entropy 1.16684 (1.16919)	Top-1 acc 52.734 (59.282)	Top-5 acc 75.391 (80.516)	lr 0.01584
Train [50][2130/3239]	Time 0.224 (0.599)	Data Time 0.001 (0.018)	Loss 2.7171 (2.6925)	Entropy 1.16681 (1.16918)	Top-1 acc 61.328 (59.285)	Top-5 acc 80.859 (80.513)	lr 0.01584
Train [50][2140/3239]	Time 0.210 (0.599)	Data Time 0.001 (0.018)	Loss 2.8075 (2.6927)	Entropy 1.16683 (1.16917)	Top-1 acc 53.906 (59.281)	Top-5 acc 78.125 (80.507)	lr 0.01584
Train [50][2150/3239]	Time 0.214 (0.598)	Data Time 0.001 (0.018)	Loss 2.5104 (2.6926)	Entropy 1.16676 (1.16916)	Top-1 acc 62.891 (59.278)	Top-5 acc 82.812 (80.510)	lr 0.01584
Train [50][2160/3239]	Time 0.225 (0.598)	Data Time 0.002 (0.018)	Loss 2.8824 (2.6926)	Entropy 1.16665 (1.16915)	Top-1 acc 55.078 (59.281)	Top-5 acc 76.172 (80.510)	lr 0.01584
Train [50][2170/3239]	Time 0.234 (0.597)	Data Time 0.001 (0.018)	Loss 2.7432 (2.6927)	Entropy 1.16663 (1.16913)	Top-1 acc 57.812 (59.279)	Top-5 acc 80.469 (80.510)	lr 0.01584
Train [50][2180/3239]	Time 0.242 (0.596)	Data Time 0.001 (0.017)	Loss 2.7327 (2.6927)	Entropy 1.16664 (1.16912)	Top-1 acc 59.766 (59.279)	Top-5 acc 80.469 (80.511)	lr 0.01584
Train [50][2190/3239]	Time 0.215 (0.596)	Data Time 0.001 (0.017)	Loss 2.5880 (2.6929)	Entropy 1.16664 (1.16911)	Top-1 acc 59.375 (59.271)	Top-5 acc 83.594 (80.506)	lr 0.01584
Train [50][2200/3239]	Time 0.230 (0.595)	Data Time 0.001 (0.017)	Loss 2.6636 (2.6930)	Entropy 1.16659 (1.16910)	Top-1 acc 56.250 (59.266)	Top-5 acc 81.641 (80.505)	lr 0.01584
Train [50][2210/3239]	Time 2.496 (0.595)	Data Time 0.001 (0.017)	Loss 2.6088 (2.6932)	Entropy 1.16659 (1.16909)	Top-1 acc 65.234 (59.260)	Top-5 acc 85.547 (80.499)	lr 0.01584
Train [50][2220/3239]	Time 0.254 (0.593)	Data Time 0.001 (0.017)	Loss 2.7487 (2.6936)	Entropy 1.16662 (1.16908)	Top-1 acc 60.938 (59.252)	Top-5 acc 79.688 (80.493)	lr 0.01583
Train [50][2230/3239]	Time 0.215 (0.592)	Data Time 0.001 (0.017)	Loss 2.6725 (2.6933)	Entropy 1.16662 (1.16907)	Top-1 acc 60.156 (59.257)	Top-5 acc 81.641 (80.500)	lr 0.01583
Train [50][2240/3239]	Time 0.230 (0.592)	Data Time 0.001 (0.017)	Loss 2.6818 (2.6933)	Entropy 1.16651 (1.16906)	Top-1 acc 58.984 (59.257)	Top-5 acc 80.078 (80.498)	lr 0.01583
Train [50][2250/3239]	Time 0.240 (0.591)	Data Time 0.001 (0.017)	Loss 2.6741 (2.6936)	Entropy 1.16648 (1.16904)	Top-1 acc 60.547 (59.249)	Top-5 acc 78.516 (80.492)	lr 0.01583
Train [50][2260/3239]	Time 0.267 (0.591)	Data Time 0.001 (0.017)	Loss 2.5806 (2.6935)	Entropy 1.16647 (1.16903)	Top-1 acc 59.375 (59.245)	Top-5 acc 81.250 (80.492)	lr 0.01583
Train [50][2270/3239]	Time 0.226 (0.590)	Data Time 0.001 (0.017)	Loss 2.7172 (2.6936)	Entropy 1.16643 (1.16902)	Top-1 acc 59.375 (59.240)	Top-5 acc 80.469 (80.493)	lr 0.01583
Train [50][2280/3239]	Time 0.226 (0.590)	Data Time 0.001 (0.017)	Loss 2.8917 (2.6934)	Entropy 1.16632 (1.16901)	Top-1 acc 55.859 (59.244)	Top-5 acc 77.734 (80.500)	lr 0.01583
Train [50][2290/3239]	Time 0.225 (0.589)	Data Time 0.002 (0.017)	Loss 2.7178 (2.6933)	Entropy 1.16632 (1.16900)	Top-1 acc 58.984 (59.245)	Top-5 acc 78.125 (80.498)	lr 0.01583
Train [50][2300/3239]	Time 0.228 (0.589)	Data Time 0.001 (0.017)	Loss 2.5960 (2.6930)	Entropy 1.16632 (1.16899)	Top-1 acc 63.281 (59.246)	Top-5 acc 84.375 (80.506)	lr 0.01583
Train [50][2310/3239]	Time 0.257 (0.588)	Data Time 0.001 (0.017)	Loss 2.5004 (2.6931)	Entropy 1.16633 (1.16898)	Top-1 acc 66.797 (59.239)	Top-5 acc 84.375 (80.502)	lr 0.01583
Train [50][2320/3239]	Time 2.529 (0.588)	Data Time 0.001 (0.016)	Loss 2.6815 (2.6931)	Entropy 1.16633 (1.16896)	Top-1 acc 62.109 (59.242)	Top-5 acc 81.641 (80.505)	lr 0.01582
Train [50][2330/3239]	Time 0.330 (0.586)	Data Time 0.001 (0.016)	Loss 2.6344 (2.6935)	Entropy 1.16632 (1.16895)	Top-1 acc 59.375 (59.229)	Top-5 acc 82.812 (80.497)	lr 0.01582
Train [50][2340/3239]	Time 0.248 (0.586)	Data Time 0.001 (0.016)	Loss 2.6942 (2.6934)	Entropy 1.16627 (1.16894)	Top-1 acc 58.984 (59.227)	Top-5 acc 79.688 (80.499)	lr 0.01582
Train [50][2350/3239]	Time 0.225 (0.585)	Data Time 0.001 (0.016)	Loss 2.7186 (2.6935)	Entropy 1.16623 (1.16893)	Top-1 acc 56.250 (59.224)	Top-5 acc 80.859 (80.497)	lr 0.01582
Train [50][2360/3239]	Time 0.228 (0.585)	Data Time 0.001 (0.016)	Loss 2.8482 (2.6935)	Entropy 1.16621 (1.16892)	Top-1 acc 53.516 (59.225)	Top-5 acc 74.609 (80.498)	lr 0.01582
Train [50][2370/3239]	Time 0.223 (0.584)	Data Time 0.001 (0.016)	Loss 2.8972 (2.6936)	Entropy 1.16613 (1.16891)	Top-1 acc 49.609 (59.221)	Top-5 acc 76.172 (80.495)	lr 0.01582
Train [50][2380/3239]	Time 0.338 (0.584)	Data Time 0.001 (0.016)	Loss 2.5546 (2.6933)	Entropy 1.16613 (1.16889)	Top-1 acc 61.328 (59.224)	Top-5 acc 83.594 (80.503)	lr 0.01582
Train [50][2390/3239]	Time 0.219 (0.583)	Data Time 0.001 (0.016)	Loss 2.6010 (2.6934)	Entropy 1.16612 (1.16888)	Top-1 acc 63.281 (59.224)	Top-5 acc 82.422 (80.498)	lr 0.01582
Train [50][2400/3239]	Time 0.217 (0.583)	Data Time 0.001 (0.016)	Loss 2.7243 (2.6935)	Entropy 1.16609 (1.16887)	Top-1 acc 58.594 (59.221)	Top-5 acc 77.734 (80.495)	lr 0.01582
Train [50][2410/3239]	Time 0.248 (0.582)	Data Time 0.001 (0.016)	Loss 2.7382 (2.6938)	Entropy 1.16608 (1.16886)	Top-1 acc 62.109 (59.217)	Top-5 acc 79.688 (80.486)	lr 0.01582
Train [50][2420/3239]	Time 0.304 (0.600)	Data Time 0.004 (0.016)	Loss 2.6834 (2.6939)	Entropy 1.16606 (1.16885)	Top-1 acc 62.109 (59.215)	Top-5 acc 78.516 (80.484)	lr 0.01582
Train [50][2430/3239]	Time 2.879 (0.600)	Data Time 0.097 (0.016)	Loss 2.6933 (2.6941)	Entropy 1.16606 (1.16884)	Top-1 acc 61.719 (59.211)	Top-5 acc 80.859 (80.479)	lr 0.01581
Train [50][2440/3239]	Time 0.203 (0.599)	Data Time 0.002 (0.016)	Loss 2.5900 (2.6940)	Entropy 1.16602 (1.16883)	Top-1 acc 59.766 (59.215)	Top-5 acc 82.422 (80.480)	lr 0.01581
Train [50][2450/3239]	Time 0.258 (0.598)	Data Time 0.002 (0.016)	Loss 2.7496 (2.6943)	Entropy 1.16601 (1.16881)	Top-1 acc 56.641 (59.210)	Top-5 acc 79.688 (80.477)	lr 0.01581
Train [50][2460/3239]	Time 0.223 (0.598)	Data Time 0.002 (0.016)	Loss 2.8197 (2.6944)	Entropy 1.16598 (1.16880)	Top-1 acc 55.859 (59.207)	Top-5 acc 77.734 (80.475)	lr 0.01581
Train [50][2470/3239]	Time 0.241 (0.597)	Data Time 0.001 (0.016)	Loss 2.6751 (2.6944)	Entropy 1.16601 (1.16879)	Top-1 acc 60.156 (59.206)	Top-5 acc 80.469 (80.472)	lr 0.01581
Train [50][2480/3239]	Time 0.350 (0.597)	Data Time 0.002 (0.016)	Loss 2.8093 (2.6944)	Entropy 1.16593 (1.16878)	Top-1 acc 54.297 (59.207)	Top-5 acc 78.516 (80.474)	lr 0.01581
Train [50][2490/3239]	Time 0.234 (0.596)	Data Time 0.001 (0.016)	Loss 2.6844 (2.6944)	Entropy 1.16592 (1.16877)	Top-1 acc 57.812 (59.204)	Top-5 acc 79.688 (80.475)	lr 0.01581
Train [50][2500/3239]	Time 0.232 (0.596)	Data Time 0.001 (0.015)	Loss 2.6844 (2.6945)	Entropy 1.16592 (1.16876)	Top-1 acc 58.594 (59.200)	Top-5 acc 78.906 (80.471)	lr 0.01581
Train [50][2510/3239]	Time 0.246 (0.595)	Data Time 0.001 (0.015)	Loss 2.7934 (2.6945)	Entropy 1.16591 (1.16875)	Top-1 acc 57.812 (59.200)	Top-5 acc 78.906 (80.468)	lr 0.01581
Train [50][2520/3239]	Time 0.226 (0.595)	Data Time 0.001 (0.015)	Loss 2.8557 (2.6947)	Entropy 1.16590 (1.16873)	Top-1 acc 53.906 (59.196)	Top-5 acc 76.172 (80.465)	lr 0.01581
Train [50][2530/3239]	Time 0.254 (0.594)	Data Time 0.001 (0.015)	Loss 2.8202 (2.6947)	Entropy 1.16581 (1.16872)	Top-1 acc 57.031 (59.196)	Top-5 acc 78.516 (80.469)	lr 0.01580
Train [50][2540/3239]	Time 2.345 (0.594)	Data Time 0.001 (0.015)	Loss 2.7009 (2.6947)	Entropy 1.16581 (1.16871)	Top-1 acc 60.156 (59.197)	Top-5 acc 78.906 (80.471)	lr 0.01580
Train [50][2550/3239]	Time 0.232 (0.592)	Data Time 0.001 (0.015)	Loss 2.5290 (2.6945)	Entropy 1.16579 (1.16870)	Top-1 acc 64.062 (59.196)	Top-5 acc 80.859 (80.473)	lr 0.01580
Train [50][2560/3239]	Time 0.221 (0.592)	Data Time 0.001 (0.015)	Loss 2.7781 (2.6944)	Entropy 1.16578 (1.16869)	Top-1 acc 54.688 (59.199)	Top-5 acc 79.688 (80.475)	lr 0.01580
Train [50][2570/3239]	Time 0.232 (0.591)	Data Time 0.001 (0.015)	Loss 2.5738 (2.6944)	Entropy 1.16578 (1.16868)	Top-1 acc 62.891 (59.196)	Top-5 acc 80.469 (80.475)	lr 0.01580
Train [50][2580/3239]	Time 0.192 (0.591)	Data Time 0.001 (0.015)	Loss 2.7821 (2.6945)	Entropy 1.16571 (1.16867)	Top-1 acc 55.859 (59.194)	Top-5 acc 79.688 (80.475)	lr 0.01580
Train [50][2590/3239]	Time 0.215 (0.590)	Data Time 0.001 (0.015)	Loss 2.5981 (2.6944)	Entropy 1.16577 (1.16865)	Top-1 acc 61.719 (59.193)	Top-5 acc 82.422 (80.477)	lr 0.01580
Train [50][2600/3239]	Time 0.215 (0.590)	Data Time 0.001 (0.015)	Loss 2.7471 (2.6945)	Entropy 1.16570 (1.16864)	Top-1 acc 57.812 (59.190)	Top-5 acc 80.078 (80.475)	lr 0.01580
Train [50][2610/3239]	Time 0.236 (0.589)	Data Time 0.001 (0.015)	Loss 2.5760 (2.6946)	Entropy 1.16564 (1.16863)	Top-1 acc 63.281 (59.189)	Top-5 acc 83.984 (80.475)	lr 0.01580
Train [50][2620/3239]	Time 0.217 (0.589)	Data Time 0.001 (0.015)	Loss 2.7392 (2.6947)	Entropy 1.16559 (1.16862)	Top-1 acc 61.328 (59.186)	Top-5 acc 79.688 (80.472)	lr 0.01580
Train [50][2630/3239]	Time 0.217 (0.588)	Data Time 0.001 (0.015)	Loss 2.6895 (2.6947)	Entropy 1.16559 (1.16861)	Top-1 acc 58.984 (59.185)	Top-5 acc 82.031 (80.470)	lr 0.01579
Train [50][2640/3239]	Time 0.252 (0.588)	Data Time 0.002 (0.015)	Loss 2.6457 (2.6947)	Entropy 1.16554 (1.16860)	Top-1 acc 62.109 (59.191)	Top-5 acc 82.422 (80.472)	lr 0.01579
Train [50][2650/3239]	Time 0.267 (0.587)	Data Time 0.002 (0.015)	Loss 2.6996 (2.6947)	Entropy 1.16548 (1.16859)	Top-1 acc 59.766 (59.186)	Top-5 acc 77.734 (80.472)	lr 0.01579
Train [50][2660/3239]	Time 0.221 (0.587)	Data Time 0.001 (0.015)	Loss 2.6477 (2.6947)	Entropy 1.16546 (1.16857)	Top-1 acc 59.766 (59.187)	Top-5 acc 82.812 (80.472)	lr 0.01579
Train [50][2670/3239]	Time 0.345 (0.586)	Data Time 0.001 (0.015)	Loss 2.6336 (2.6945)	Entropy 1.16545 (1.16856)	Top-1 acc 58.203 (59.189)	Top-5 acc 80.859 (80.477)	lr 0.01579
Train [50][2680/3239]	Time 0.237 (0.586)	Data Time 0.001 (0.015)	Loss 2.4051 (2.6944)	Entropy 1.16540 (1.16855)	Top-1 acc 67.188 (59.190)	Top-5 acc 87.891 (80.479)	lr 0.01579
Train [50][2690/3239]	Time 0.241 (0.585)	Data Time 0.001 (0.014)	Loss 2.7255 (2.6944)	Entropy 1.16541 (1.16854)	Top-1 acc 57.812 (59.187)	Top-5 acc 75.781 (80.480)	lr 0.01579
Train [50][2700/3239]	Time 0.233 (0.585)	Data Time 0.001 (0.014)	Loss 2.6266 (2.6945)	Entropy 1.16533 (1.16853)	Top-1 acc 60.156 (59.180)	Top-5 acc 81.641 (80.480)	lr 0.01579
Train [50][2710/3239]	Time 0.249 (0.585)	Data Time 0.001 (0.014)	Loss 2.6671 (2.6948)	Entropy 1.16533 (1.16852)	Top-1 acc 60.938 (59.175)	Top-5 acc 81.250 (80.474)	lr 0.01579
Train [50][2720/3239]	Time 0.246 (0.584)	Data Time 0.001 (0.014)	Loss 2.6116 (2.6949)	Entropy 1.16530 (1.16850)	Top-1 acc 62.891 (59.172)	Top-5 acc 82.422 (80.471)	lr 0.01579
Train [50][2730/3239]	Time 0.221 (0.584)	Data Time 0.001 (0.014)	Loss 2.7402 (2.6952)	Entropy 1.16527 (1.16849)	Top-1 acc 59.766 (59.170)	Top-5 acc 79.297 (80.467)	lr 0.01578
Train [50][2740/3239]	Time 0.255 (0.583)	Data Time 0.001 (0.014)	Loss 2.4994 (2.6951)	Entropy 1.16521 (1.16848)	Top-1 acc 64.844 (59.169)	Top-5 acc 84.766 (80.470)	lr 0.01578
Train [50][2750/3239]	Time 0.171 (0.583)	Data Time 0.001 (0.014)	Loss 2.6083 (2.6954)	Entropy 1.16526 (1.16847)	Top-1 acc 62.500 (59.166)	Top-5 acc 82.422 (80.466)	lr 0.01578
Train [50][2760/3239]	Time 0.275 (0.582)	Data Time 0.002 (0.014)	Loss 2.6428 (2.6954)	Entropy 1.16523 (1.16846)	Top-1 acc 57.812 (59.163)	Top-5 acc 82.812 (80.467)	lr 0.01578
Train [50][2770/3239]	Time 0.283 (0.599)	Data Time 0.003 (0.014)	Loss 2.8198 (2.6956)	Entropy 1.16520 (1.16845)	Top-1 acc 55.859 (59.156)	Top-5 acc 78.516 (80.466)	lr 0.01578
Train [50][2780/3239]	Time 0.222 (0.599)	Data Time 0.002 (0.014)	Loss 2.7926 (2.6955)	Entropy 1.16520 (1.16843)	Top-1 acc 53.125 (59.159)	Top-5 acc 79.688 (80.469)	lr 0.01578
Train [50][2790/3239]	Time 0.253 (0.598)	Data Time 0.002 (0.014)	Loss 2.7026 (2.6955)	Entropy 1.16518 (1.16842)	Top-1 acc 58.984 (59.157)	Top-5 acc 80.469 (80.470)	lr 0.01578
Train [50][2800/3239]	Time 0.218 (0.598)	Data Time 0.001 (0.014)	Loss 2.7663 (2.6954)	Entropy 1.16515 (1.16841)	Top-1 acc 54.688 (59.158)	Top-5 acc 78.906 (80.472)	lr 0.01578
Train [50][2810/3239]	Time 0.195 (0.597)	Data Time 0.001 (0.014)	Loss 2.8460 (2.6957)	Entropy 1.16513 (1.16840)	Top-1 acc 53.906 (59.154)	Top-5 acc 76.172 (80.467)	lr 0.01578
Train [50][2820/3239]	Time 0.303 (0.597)	Data Time 0.001 (0.014)	Loss 2.7887 (2.6956)	Entropy 1.16509 (1.16839)	Top-1 acc 55.859 (59.153)	Top-5 acc 78.125 (80.466)	lr 0.01578
Train [50][2830/3239]	Time 0.238 (0.596)	Data Time 0.001 (0.014)	Loss 2.6125 (2.6956)	Entropy 1.16507 (1.16838)	Top-1 acc 58.594 (59.153)	Top-5 acc 83.203 (80.467)	lr 0.01578
Train [50][2840/3239]	Time 0.227 (0.596)	Data Time 0.001 (0.014)	Loss 2.8082 (2.6957)	Entropy 1.16510 (1.16836)	Top-1 acc 56.250 (59.154)	Top-5 acc 78.906 (80.465)	lr 0.01577
Train [50][2850/3239]	Time 0.266 (0.595)	Data Time 0.002 (0.014)	Loss 2.6350 (2.6957)	Entropy 1.16509 (1.16835)	Top-1 acc 60.547 (59.155)	Top-5 acc 81.250 (80.465)	lr 0.01577
Train [50][2860/3239]	Time 0.239 (0.595)	Data Time 0.001 (0.014)	Loss 2.8975 (2.6959)	Entropy 1.16509 (1.16834)	Top-1 acc 53.906 (59.152)	Top-5 acc 75.781 (80.463)	lr 0.01577
Train [50][2870/3239]	Time 0.343 (0.595)	Data Time 0.001 (0.014)	Loss 2.6929 (2.6958)	Entropy 1.16505 (1.16833)	Top-1 acc 54.688 (59.154)	Top-5 acc 82.031 (80.463)	lr 0.01577
Train [50][2880/3239]	Time 0.239 (0.594)	Data Time 0.001 (0.014)	Loss 2.7835 (2.6958)	Entropy 1.16504 (1.16832)	Top-1 acc 55.078 (59.154)	Top-5 acc 76.172 (80.458)	lr 0.01577
Train [50][2890/3239]	Time 0.223 (0.594)	Data Time 0.001 (0.014)	Loss 2.9981 (2.6960)	Entropy 1.16498 (1.16831)	Top-1 acc 57.422 (59.152)	Top-5 acc 73.828 (80.455)	lr 0.01577
Train [50][2900/3239]	Time 0.214 (0.593)	Data Time 0.001 (0.014)	Loss 2.6803 (2.6960)	Entropy 1.16498 (1.16830)	Top-1 acc 62.500 (59.152)	Top-5 acc 81.641 (80.456)	lr 0.01577
Train [50][2910/3239]	Time 0.200 (0.593)	Data Time 0.001 (0.014)	Loss 2.7182 (2.6960)	Entropy 1.16496 (1.16828)	Top-1 acc 57.422 (59.153)	Top-5 acc 79.297 (80.456)	lr 0.01577
Train [50][2920/3239]	Time 0.255 (0.592)	Data Time 0.001 (0.013)	Loss 2.7467 (2.6961)	Entropy 1.16494 (1.16827)	Top-1 acc 61.719 (59.153)	Top-5 acc 78.516 (80.456)	lr 0.01577
Train [50][2930/3239]	Time 0.210 (0.592)	Data Time 0.001 (0.013)	Loss 2.6816 (2.6960)	Entropy 1.16490 (1.16826)	Top-1 acc 55.078 (59.152)	Top-5 acc 81.641 (80.459)	lr 0.01577
Train [50][2940/3239]	Time 0.275 (0.591)	Data Time 0.001 (0.013)	Loss 2.6048 (2.6961)	Entropy 1.16487 (1.16825)	Top-1 acc 62.109 (59.150)	Top-5 acc 82.422 (80.456)	lr 0.01576
Train [50][2950/3239]	Time 0.286 (0.591)	Data Time 0.001 (0.013)	Loss 2.9142 (2.6962)	Entropy 1.16485 (1.16824)	Top-1 acc 55.469 (59.147)	Top-5 acc 75.781 (80.456)	lr 0.01576
Train [50][2960/3239]	Time 0.259 (0.591)	Data Time 0.001 (0.013)	Loss 2.7735 (2.6962)	Entropy 1.16480 (1.16823)	Top-1 acc 56.250 (59.144)	Top-5 acc 79.297 (80.455)	lr 0.01576
Train [50][2970/3239]	Time 0.224 (0.590)	Data Time 0.001 (0.013)	Loss 2.7105 (2.6964)	Entropy 1.16470 (1.16822)	Top-1 acc 58.594 (59.141)	Top-5 acc 78.906 (80.447)	lr 0.01576
Train [50][2980/3239]	Time 0.247 (0.590)	Data Time 0.002 (0.013)	Loss 2.6310 (2.6965)	Entropy 1.16464 (1.16820)	Top-1 acc 60.547 (59.139)	Top-5 acc 81.641 (80.445)	lr 0.01576
Train [50][2990/3239]	Time 0.248 (0.589)	Data Time 0.002 (0.013)	Loss 2.7390 (2.6965)	Entropy 1.16462 (1.16819)	Top-1 acc 56.641 (59.135)	Top-5 acc 78.125 (80.444)	lr 0.01576
Train [50][3000/3239]	Time 0.214 (0.589)	Data Time 0.001 (0.013)	Loss 2.8683 (2.6966)	Entropy 1.16452 (1.16818)	Top-1 acc 52.344 (59.135)	Top-5 acc 78.516 (80.442)	lr 0.01576
Train [50][3010/3239]	Time 0.287 (0.588)	Data Time 0.001 (0.013)	Loss 2.6948 (2.6967)	Entropy 1.16450 (1.16817)	Top-1 acc 59.375 (59.133)	Top-5 acc 80.469 (80.440)	lr 0.01576
Train [50][3020/3239]	Time 0.206 (0.588)	Data Time 0.001 (0.013)	Loss 2.7997 (2.6966)	Entropy 1.16444 (1.16816)	Top-1 acc 55.859 (59.133)	Top-5 acc 79.297 (80.439)	lr 0.01576
Train [50][3030/3239]	Time 0.226 (0.588)	Data Time 0.001 (0.013)	Loss 2.8150 (2.6967)	Entropy 1.16441 (1.16814)	Top-1 acc 55.078 (59.133)	Top-5 acc 80.078 (80.438)	lr 0.01576
Train [50][3040/3239]	Time 0.217 (0.587)	Data Time 0.001 (0.013)	Loss 2.6656 (2.6967)	Entropy 1.16441 (1.16813)	Top-1 acc 61.328 (59.133)	Top-5 acc 80.469 (80.434)	lr 0.01575
Train [50][3050/3239]	Time 0.215 (0.587)	Data Time 0.001 (0.013)	Loss 2.5948 (2.6965)	Entropy 1.16439 (1.16812)	Top-1 acc 64.453 (59.134)	Top-5 acc 82.031 (80.439)	lr 0.01575
Train [50][3060/3239]	Time 0.346 (0.586)	Data Time 0.001 (0.013)	Loss 2.8349 (2.6964)	Entropy 1.16440 (1.16811)	Top-1 acc 56.250 (59.136)	Top-5 acc 80.469 (80.442)	lr 0.01575
Train [50][3070/3239]	Time 0.213 (0.586)	Data Time 0.001 (0.013)	Loss 3.0205 (2.6965)	Entropy 1.16438 (1.16809)	Top-1 acc 49.219 (59.132)	Top-5 acc 74.609 (80.438)	lr 0.01575
Train [50][3080/3239]	Time 0.228 (0.585)	Data Time 0.001 (0.013)	Loss 2.8072 (2.6967)	Entropy 1.16433 (1.16808)	Top-1 acc 58.594 (59.124)	Top-5 acc 76.172 (80.434)	lr 0.01575
Train [50][3090/3239]	Time 0.219 (0.585)	Data Time 0.001 (0.013)	Loss 2.7778 (2.6970)	Entropy 1.16429 (1.16807)	Top-1 acc 53.125 (59.118)	Top-5 acc 79.688 (80.427)	lr 0.01575
Train [50][3100/3239]	Time 0.316 (0.600)	Data Time 0.004 (0.013)	Loss 2.6322 (2.6969)	Entropy 1.16430 (1.16806)	Top-1 acc 62.500 (59.123)	Top-5 acc 82.031 (80.429)	lr 0.01575
Train [50][3110/3239]	Time 0.311 (0.600)	Data Time 0.002 (0.013)	Loss 2.8828 (2.6969)	Entropy 1.16430 (1.16805)	Top-1 acc 55.078 (59.122)	Top-5 acc 75.391 (80.427)	lr 0.01575
Train [50][3120/3239]	Time 0.200 (0.599)	Data Time 0.002 (0.013)	Loss 2.7552 (2.6972)	Entropy 1.16429 (1.16803)	Top-1 acc 58.203 (59.116)	Top-5 acc 82.812 (80.424)	lr 0.01575
Train [50][3130/3239]	Time 0.225 (0.599)	Data Time 0.001 (0.013)	Loss 2.7532 (2.6973)	Entropy 1.16430 (1.16802)	Top-1 acc 56.250 (59.117)	Top-5 acc 78.906 (80.421)	lr 0.01575
Train [50][3140/3239]	Time 0.281 (0.598)	Data Time 0.001 (0.013)	Loss 2.6596 (2.6974)	Entropy 1.16431 (1.16801)	Top-1 acc 60.938 (59.114)	Top-5 acc 80.078 (80.419)	lr 0.01574
Train [50][3150/3239]	Time 0.227 (0.598)	Data Time 0.002 (0.013)	Loss 2.5709 (2.6972)	Entropy 1.16432 (1.16800)	Top-1 acc 61.719 (59.116)	Top-5 acc 83.203 (80.421)	lr 0.01574
Train [50][3160/3239]	Time 0.255 (0.598)	Data Time 0.001 (0.013)	Loss 2.7696 (2.6974)	Entropy 1.16429 (1.16799)	Top-1 acc 58.203 (59.109)	Top-5 acc 77.734 (80.415)	lr 0.01574
Train [50][3170/3239]	Time 0.217 (0.597)	Data Time 0.001 (0.013)	Loss 2.7748 (2.6974)	Entropy 1.16424 (1.16798)	Top-1 acc 56.250 (59.110)	Top-5 acc 80.078 (80.415)	lr 0.01574
Train [50][3180/3239]	Time 0.222 (0.597)	Data Time 0.000 (0.013)	Loss 2.6732 (2.6974)	Entropy 1.16421 (1.16796)	Top-1 acc 60.547 (59.109)	Top-5 acc 81.641 (80.417)	lr 0.01574
Train [50][3190/3239]	Time 0.231 (0.596)	Data Time 0.000 (0.012)	Loss 2.5826 (2.6974)	Entropy 1.16418 (1.16795)	Top-1 acc 60.547 (59.105)	Top-5 acc 82.422 (80.416)	lr 0.01574
Train [50][3200/3239]	Time 0.236 (0.596)	Data Time 0.000 (0.012)	Loss 2.4939 (2.6976)	Entropy 1.16416 (1.16794)	Top-1 acc 63.672 (59.103)	Top-5 acc 82.812 (80.413)	lr 0.01574
Train [50][3210/3239]	Time 0.233 (0.595)	Data Time 0.000 (0.012)	Loss 2.8007 (2.6977)	Entropy 1.16410 (1.16793)	Top-1 acc 56.250 (59.101)	Top-5 acc 77.344 (80.411)	lr 0.01574
Train [50][3220/3239]	Time 0.217 (0.595)	Data Time 0.000 (0.012)	Loss 2.8218 (2.6977)	Entropy 1.16406 (1.16792)	Top-1 acc 57.031 (59.101)	Top-5 acc 78.906 (80.412)	lr 0.01574
Train [50][3230/3239]	Time 0.207 (0.594)	Data Time 0.000 (0.012)	Loss 2.6886 (2.6978)	Entropy 1.16401 (1.16790)	Top-1 acc 58.594 (59.100)	Top-5 acc 81.641 (80.413)	lr 0.01574
Train [50][3239/3239]	Time 2.204 (0.594)	Data Time 0.000 (0.012)	Loss 2.7798 (2.6978)	Entropy 1.16401 (1.16789)	Top-1 acc 56.790 (59.097)	Top-5 acc 77.778 (80.410)	lr 0.01574
==========Valid [50/120]	loss 1.542	top-1 acc 65.125 (65.125)	top-5 acc 85.279	Train top-1 59.097	top-5 80.410	Entropy 1.16401	Latency-None: 0.000ms	Flops: 548.34M
Train [51][0/3239]	Time 34.927 (34.927)	Data Time 32.548 (32.548)	Loss 2.8497 (2.8497)	Entropy 1.16402 (1.16402)	Top-1 acc 55.859 (55.859)	Top-5 acc 76.562 (76.562)	lr 0.01574
Train [51][10/3239]	Time 2.627 (3.784)	Data Time 0.002 (3.062)	Loss 2.4462 (2.6571)	Entropy 1.16402 (1.16402)	Top-1 acc 65.625 (60.298)	Top-5 acc 84.375 (81.641)	lr 0.01573
Train [51][20/3239]	Time 0.403 (2.107)	Data Time 0.002 (1.604)	Loss 2.6697 (2.6739)	Entropy 1.16403 (1.16402)	Top-1 acc 58.984 (59.710)	Top-5 acc 82.812 (81.287)	lr 0.01573
Train [51][30/3239]	Time 0.243 (1.580)	Data Time 0.002 (1.087)	Loss 2.6612 (2.6748)	Entropy 1.16396 (1.16401)	Top-1 acc 57.812 (59.829)	Top-5 acc 80.469 (81.250)	lr 0.01573
Train [51][40/3239]	Time 0.231 (1.307)	Data Time 0.001 (0.823)	Loss 2.6666 (2.6678)	Entropy 1.16395 (1.16399)	Top-1 acc 60.156 (60.118)	Top-5 acc 82.812 (81.212)	lr 0.01573
Train [51][50/3239]	Time 0.206 (1.140)	Data Time 0.001 (0.662)	Loss 2.6222 (2.6693)	Entropy 1.16396 (1.16399)	Top-1 acc 61.719 (59.903)	Top-5 acc 81.250 (81.296)	lr 0.01573
Train [51][60/3239]	Time 0.215 (1.027)	Data Time 0.001 (0.553)	Loss 2.7182 (2.6713)	Entropy 1.16395 (1.16398)	Top-1 acc 58.594 (60.028)	Top-5 acc 81.641 (81.218)	lr 0.01573
Train [51][70/3239]	Time 0.308 (0.949)	Data Time 0.001 (0.476)	Loss 2.5978 (2.6718)	Entropy 1.16389 (1.16397)	Top-1 acc 62.500 (59.964)	Top-5 acc 82.031 (81.256)	lr 0.01573
Train [51][80/3239]	Time 0.241 (0.888)	Data Time 0.002 (0.417)	Loss 2.9141 (2.6782)	Entropy 1.16388 (1.16396)	Top-1 acc 54.688 (59.770)	Top-5 acc 76.953 (81.110)	lr 0.01573
Train [51][90/3239]	Time 0.220 (0.843)	Data Time 0.002 (0.372)	Loss 2.6047 (2.6805)	Entropy 1.16386 (1.16395)	Top-1 acc 59.375 (59.615)	Top-5 acc 83.594 (81.147)	lr 0.01573
Train [51][100/3239]	Time 0.262 (0.806)	Data Time 0.003 (0.335)	Loss 2.6166 (2.6779)	Entropy 1.16385 (1.16394)	Top-1 acc 62.891 (59.638)	Top-5 acc 80.078 (81.107)	lr 0.01573
Train [51][110/3239]	Time 0.221 (0.776)	Data Time 0.002 (0.305)	Loss 2.7632 (2.6735)	Entropy 1.16375 (1.16393)	Top-1 acc 58.203 (59.734)	Top-5 acc 78.906 (81.218)	lr 0.01572
Train [51][120/3239]	Time 2.470 (0.750)	Data Time 0.002 (0.280)	Loss 2.7243 (2.6707)	Entropy 1.16375 (1.16392)	Top-1 acc 56.250 (59.779)	Top-5 acc 81.641 (81.240)	lr 0.01572
Train [51][130/3239]	Time 0.224 (0.711)	Data Time 0.002 (0.259)	Loss 2.7299 (2.6686)	Entropy 1.16374 (1.16391)	Top-1 acc 59.766 (59.852)	Top-5 acc 81.641 (81.220)	lr 0.01572
Train [51][140/3239]	Time 0.224 (0.693)	Data Time 0.001 (0.240)	Loss 2.8821 (2.6713)	Entropy 1.16365 (1.16389)	Top-1 acc 52.344 (59.721)	Top-5 acc 77.344 (81.175)	lr 0.01572
Train [51][150/3239]	Time 0.213 (0.678)	Data Time 0.001 (0.225)	Loss 2.6103 (2.6729)	Entropy 1.16363 (1.16387)	Top-1 acc 59.375 (59.613)	Top-5 acc 83.203 (81.159)	lr 0.01572
Train [51][160/3239]	Time 0.228 (0.664)	Data Time 0.001 (0.211)	Loss 2.5667 (2.6723)	Entropy 1.16361 (1.16386)	Top-1 acc 62.500 (59.681)	Top-5 acc 84.375 (81.129)	lr 0.01572
Train [51][170/3239]	Time 0.211 (0.652)	Data Time 0.001 (0.199)	Loss 2.6951 (2.6708)	Entropy 1.16359 (1.16384)	Top-1 acc 60.547 (59.736)	Top-5 acc 80.078 (81.122)	lr 0.01572
Train [51][180/3239]	Time 0.236 (0.642)	Data Time 0.001 (0.188)	Loss 2.9017 (2.6713)	Entropy 1.16358 (1.16383)	Top-1 acc 54.297 (59.703)	Top-5 acc 78.125 (81.086)	lr 0.01572
Train [51][190/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.178)	Loss 2.6304 (2.6708)	Entropy 1.16354 (1.16381)	Top-1 acc 62.500 (59.674)	Top-5 acc 81.250 (81.084)	lr 0.01572
Train [51][200/3239]	Time 0.272 (0.624)	Data Time 0.001 (0.169)	Loss 2.5773 (2.6749)	Entropy 1.16350 (1.16380)	Top-1 acc 58.984 (59.556)	Top-5 acc 83.984 (80.980)	lr 0.01572
Train [51][210/3239]	Time 0.246 (0.860)	Data Time 0.002 (0.161)	Loss 2.6047 (2.6800)	Entropy 1.16347 (1.16378)	Top-1 acc 61.719 (59.449)	Top-5 acc 83.594 (80.861)	lr 0.01571
Train [51][220/3239]	Time 0.248 (0.843)	Data Time 0.002 (0.154)	Loss 2.6429 (2.6791)	Entropy 1.16344 (1.16377)	Top-1 acc 58.203 (59.499)	Top-5 acc 81.641 (80.886)	lr 0.01571
Train [51][230/3239]	Time 2.464 (0.826)	Data Time 0.002 (0.147)	Loss 2.7633 (2.6794)	Entropy 1.16344 (1.16375)	Top-1 acc 58.984 (59.512)	Top-5 acc 78.125 (80.859)	lr 0.01571
Train [51][240/3239]	Time 0.228 (0.802)	Data Time 0.002 (0.141)	Loss 2.6828 (2.6778)	Entropy 1.16348 (1.16374)	Top-1 acc 58.984 (59.552)	Top-5 acc 80.469 (80.898)	lr 0.01571
Train [51][250/3239]	Time 0.228 (0.789)	Data Time 0.001 (0.136)	Loss 2.6035 (2.6767)	Entropy 1.16349 (1.16373)	Top-1 acc 63.672 (59.562)	Top-5 acc 83.984 (80.933)	lr 0.01571
Train [51][260/3239]	Time 0.310 (0.777)	Data Time 0.001 (0.131)	Loss 2.7501 (2.6757)	Entropy 1.16352 (1.16372)	Top-1 acc 57.422 (59.591)	Top-5 acc 76.172 (80.943)	lr 0.01571
Train [51][270/3239]	Time 0.233 (0.765)	Data Time 0.001 (0.126)	Loss 2.6889 (2.6752)	Entropy 1.16347 (1.16372)	Top-1 acc 59.766 (59.627)	Top-5 acc 82.031 (80.959)	lr 0.01571
Train [51][280/3239]	Time 0.205 (0.754)	Data Time 0.001 (0.121)	Loss 2.6237 (2.6757)	Entropy 1.16346 (1.16371)	Top-1 acc 59.766 (59.607)	Top-5 acc 83.594 (80.927)	lr 0.01571
Train [51][290/3239]	Time 0.235 (0.745)	Data Time 0.001 (0.117)	Loss 2.4426 (2.6742)	Entropy 1.16344 (1.16370)	Top-1 acc 64.453 (59.631)	Top-5 acc 85.938 (80.939)	lr 0.01571
Train [51][300/3239]	Time 0.223 (0.735)	Data Time 0.001 (0.114)	Loss 2.8600 (2.6765)	Entropy 1.16344 (1.16369)	Top-1 acc 55.859 (59.597)	Top-5 acc 74.609 (80.883)	lr 0.01571
Train [51][310/3239]	Time 0.321 (0.727)	Data Time 0.001 (0.110)	Loss 2.5860 (2.6759)	Entropy 1.16330 (1.16368)	Top-1 acc 63.281 (59.658)	Top-5 acc 82.812 (80.893)	lr 0.01570
Train [51][320/3239]	Time 0.223 (0.719)	Data Time 0.001 (0.107)	Loss 2.8451 (2.6765)	Entropy 1.16337 (1.16367)	Top-1 acc 56.641 (59.656)	Top-5 acc 78.125 (80.879)	lr 0.01570
Train [51][330/3239]	Time 0.269 (0.711)	Data Time 0.001 (0.103)	Loss 2.7030 (2.6773)	Entropy 1.16336 (1.16366)	Top-1 acc 57.812 (59.597)	Top-5 acc 80.078 (80.887)	lr 0.01570
Train [51][340/3239]	Time 2.498 (0.703)	Data Time 0.001 (0.100)	Loss 2.7657 (2.6779)	Entropy 1.16336 (1.16365)	Top-1 acc 56.641 (59.620)	Top-5 acc 80.859 (80.870)	lr 0.01570
Train [51][350/3239]	Time 0.267 (0.690)	Data Time 0.001 (0.098)	Loss 2.7503 (2.6790)	Entropy 1.16335 (1.16364)	Top-1 acc 58.984 (59.594)	Top-5 acc 79.297 (80.844)	lr 0.01570
Train [51][360/3239]	Time 0.305 (0.684)	Data Time 0.001 (0.095)	Loss 2.5321 (2.6772)	Entropy 1.16332 (1.16363)	Top-1 acc 62.891 (59.630)	Top-5 acc 83.984 (80.870)	lr 0.01570
Train [51][370/3239]	Time 0.228 (0.678)	Data Time 0.001 (0.092)	Loss 2.7192 (2.6775)	Entropy 1.16332 (1.16362)	Top-1 acc 55.078 (59.602)	Top-5 acc 80.859 (80.861)	lr 0.01570
Train [51][380/3239]	Time 0.217 (0.672)	Data Time 0.001 (0.090)	Loss 2.7040 (2.6776)	Entropy 1.16330 (1.16362)	Top-1 acc 59.375 (59.636)	Top-5 acc 80.469 (80.866)	lr 0.01570
Train [51][390/3239]	Time 0.224 (0.667)	Data Time 0.001 (0.088)	Loss 2.7828 (2.6781)	Entropy 1.16326 (1.16361)	Top-1 acc 56.250 (59.616)	Top-5 acc 80.859 (80.872)	lr 0.01570
Train [51][400/3239]	Time 0.225 (0.662)	Data Time 0.002 (0.086)	Loss 2.6508 (2.6766)	Entropy 1.16326 (1.16360)	Top-1 acc 60.938 (59.689)	Top-5 acc 80.859 (80.891)	lr 0.01570
Train [51][410/3239]	Time 0.219 (0.657)	Data Time 0.001 (0.084)	Loss 2.6635 (2.6762)	Entropy 1.16326 (1.16359)	Top-1 acc 58.203 (59.723)	Top-5 acc 80.859 (80.888)	lr 0.01570
Train [51][420/3239]	Time 0.229 (0.652)	Data Time 0.001 (0.082)	Loss 2.7528 (2.6765)	Entropy 1.16325 (1.16358)	Top-1 acc 55.859 (59.718)	Top-5 acc 80.859 (80.892)	lr 0.01569
Train [51][430/3239]	Time 0.213 (0.648)	Data Time 0.001 (0.080)	Loss 2.6722 (2.6760)	Entropy 1.16324 (1.16358)	Top-1 acc 64.453 (59.699)	Top-5 acc 82.031 (80.914)	lr 0.01569
Train [51][440/3239]	Time 0.252 (0.643)	Data Time 0.001 (0.078)	Loss 2.8295 (2.6776)	Entropy 1.16318 (1.16357)	Top-1 acc 60.156 (59.673)	Top-5 acc 77.344 (80.870)	lr 0.01569
Train [51][450/3239]	Time 2.476 (0.639)	Data Time 0.001 (0.076)	Loss 2.6035 (2.6767)	Entropy 1.16318 (1.16356)	Top-1 acc 63.672 (59.696)	Top-5 acc 80.469 (80.887)	lr 0.01569
Train [51][460/3239]	Time 0.235 (0.631)	Data Time 0.001 (0.075)	Loss 2.6510 (2.6760)	Entropy 1.16315 (1.16355)	Top-1 acc 61.719 (59.711)	Top-5 acc 79.688 (80.876)	lr 0.01569
Train [51][470/3239]	Time 0.230 (0.627)	Data Time 0.001 (0.073)	Loss 2.4796 (2.6755)	Entropy 1.16316 (1.16354)	Top-1 acc 66.406 (59.743)	Top-5 acc 83.203 (80.863)	lr 0.01569
Train [51][480/3239]	Time 0.226 (0.624)	Data Time 0.001 (0.072)	Loss 2.6673 (2.6746)	Entropy 1.16314 (1.16353)	Top-1 acc 60.547 (59.747)	Top-5 acc 83.984 (80.881)	lr 0.01569
Train [51][490/3239]	Time 0.212 (0.620)	Data Time 0.001 (0.070)	Loss 2.5244 (2.6749)	Entropy 1.16303 (1.16352)	Top-1 acc 65.234 (59.741)	Top-5 acc 84.766 (80.878)	lr 0.01569
Train [51][500/3239]	Time 0.328 (0.617)	Data Time 0.001 (0.069)	Loss 2.8732 (2.6743)	Entropy 1.16300 (1.16351)	Top-1 acc 55.078 (59.745)	Top-5 acc 77.344 (80.887)	lr 0.01569
Train [51][510/3239]	Time 0.221 (0.615)	Data Time 0.001 (0.067)	Loss 2.6372 (2.6728)	Entropy 1.16298 (1.16350)	Top-1 acc 59.375 (59.779)	Top-5 acc 81.641 (80.921)	lr 0.01569
Train [51][520/3239]	Time 0.192 (0.612)	Data Time 0.001 (0.066)	Loss 2.6848 (2.6729)	Entropy 1.16295 (1.16349)	Top-1 acc 58.984 (59.775)	Top-5 acc 80.469 (80.925)	lr 0.01568
Train [51][530/3239]	Time 0.221 (0.609)	Data Time 0.001 (0.065)	Loss 2.6644 (2.6743)	Entropy 1.16296 (1.16348)	Top-1 acc 65.234 (59.742)	Top-5 acc 77.734 (80.894)	lr 0.01568
Train [51][540/3239]	Time 0.230 (0.606)	Data Time 0.001 (0.064)	Loss 2.5303 (2.6732)	Entropy 1.16295 (1.16347)	Top-1 acc 61.719 (59.770)	Top-5 acc 84.766 (80.918)	lr 0.01568
Train [51][550/3239]	Time 0.307 (0.604)	Data Time 0.001 (0.063)	Loss 2.5524 (2.6727)	Entropy 1.16297 (1.16346)	Top-1 acc 58.203 (59.758)	Top-5 acc 83.203 (80.922)	lr 0.01568
Train [51][560/3239]	Time 2.439 (0.601)	Data Time 0.001 (0.062)	Loss 2.7090 (2.6725)	Entropy 1.16297 (1.16346)	Top-1 acc 55.469 (59.754)	Top-5 acc 76.953 (80.922)	lr 0.01568
Train [51][570/3239]	Time 0.225 (0.595)	Data Time 0.001 (0.061)	Loss 2.5150 (2.6728)	Entropy 1.16295 (1.16345)	Top-1 acc 66.406 (59.755)	Top-5 acc 84.375 (80.899)	lr 0.01568
Train [51][580/3239]	Time 0.148 (0.680)	Data Time 0.002 (0.060)	Loss 2.7444 (2.6725)	Entropy 1.16291 (1.16344)	Top-1 acc 56.641 (59.772)	Top-5 acc 80.859 (80.892)	lr 0.01568
Train [51][590/3239]	Time 0.186 (0.677)	Data Time 0.003 (0.059)	Loss 2.6577 (2.6718)	Entropy 1.16289 (1.16343)	Top-1 acc 57.031 (59.789)	Top-5 acc 81.250 (80.904)	lr 0.01568
Train [51][600/3239]	Time 0.252 (0.673)	Data Time 0.002 (0.058)	Loss 2.9049 (2.6726)	Entropy 1.16288 (1.16342)	Top-1 acc 53.125 (59.763)	Top-5 acc 76.562 (80.872)	lr 0.01568
Train [51][610/3239]	Time 0.208 (0.670)	Data Time 0.002 (0.057)	Loss 2.5770 (2.6730)	Entropy 1.16280 (1.16341)	Top-1 acc 59.375 (59.753)	Top-5 acc 84.375 (80.869)	lr 0.01568
Train [51][620/3239]	Time 0.206 (0.667)	Data Time 0.001 (0.056)	Loss 2.7868 (2.6725)	Entropy 1.16279 (1.16340)	Top-1 acc 58.594 (59.768)	Top-5 acc 80.078 (80.873)	lr 0.01567
Train [51][630/3239]	Time 0.228 (0.664)	Data Time 0.001 (0.055)	Loss 2.7757 (2.6731)	Entropy 1.16280 (1.16339)	Top-1 acc 61.719 (59.751)	Top-5 acc 76.172 (80.864)	lr 0.01567
Train [51][640/3239]	Time 0.233 (0.660)	Data Time 0.002 (0.054)	Loss 2.7796 (2.6729)	Entropy 1.16278 (1.16338)	Top-1 acc 60.547 (59.775)	Top-5 acc 78.906 (80.864)	lr 0.01567
Train [51][650/3239]	Time 0.216 (0.657)	Data Time 0.001 (0.054)	Loss 2.6237 (2.6720)	Entropy 1.16277 (1.16337)	Top-1 acc 58.984 (59.785)	Top-5 acc 80.859 (80.884)	lr 0.01567
Train [51][660/3239]	Time 0.211 (0.655)	Data Time 0.001 (0.053)	Loss 2.5794 (2.6718)	Entropy 1.16276 (1.16336)	Top-1 acc 62.500 (59.796)	Top-5 acc 81.641 (80.881)	lr 0.01567
Train [51][670/3239]	Time 2.332 (0.651)	Data Time 0.001 (0.052)	Loss 2.5420 (2.6721)	Entropy 1.16276 (1.16335)	Top-1 acc 64.453 (59.776)	Top-5 acc 83.984 (80.871)	lr 0.01567
Train [51][680/3239]	Time 0.221 (0.645)	Data Time 0.001 (0.051)	Loss 2.6040 (2.6724)	Entropy 1.16273 (1.16334)	Top-1 acc 61.719 (59.761)	Top-5 acc 82.422 (80.866)	lr 0.01567
Train [51][690/3239]	Time 0.275 (0.643)	Data Time 0.001 (0.051)	Loss 2.6589 (2.6723)	Entropy 1.16257 (1.16333)	Top-1 acc 58.984 (59.760)	Top-5 acc 82.422 (80.877)	lr 0.01567
Train [51][700/3239]	Time 0.341 (0.640)	Data Time 0.002 (0.050)	Loss 2.6510 (2.6721)	Entropy 1.16253 (1.16332)	Top-1 acc 62.109 (59.772)	Top-5 acc 82.812 (80.877)	lr 0.01567
Train [51][710/3239]	Time 0.221 (0.638)	Data Time 0.001 (0.049)	Loss 2.6114 (2.6724)	Entropy 1.16251 (1.16331)	Top-1 acc 62.109 (59.771)	Top-5 acc 82.422 (80.874)	lr 0.01567
Train [51][720/3239]	Time 0.221 (0.635)	Data Time 0.001 (0.048)	Loss 2.7582 (2.6735)	Entropy 1.16250 (1.16330)	Top-1 acc 55.859 (59.726)	Top-5 acc 79.297 (80.868)	lr 0.01566
Train [51][730/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.048)	Loss 2.6482 (2.6736)	Entropy 1.16249 (1.16329)	Top-1 acc 56.250 (59.713)	Top-5 acc 83.984 (80.864)	lr 0.01566
Train [51][740/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.047)	Loss 2.8143 (2.6742)	Entropy 1.16246 (1.16328)	Top-1 acc 59.375 (59.719)	Top-5 acc 77.734 (80.851)	lr 0.01566
Train [51][750/3239]	Time 0.323 (0.629)	Data Time 0.001 (0.047)	Loss 2.6724 (2.6742)	Entropy 1.16240 (1.16327)	Top-1 acc 57.031 (59.702)	Top-5 acc 80.859 (80.846)	lr 0.01566
Train [51][760/3239]	Time 0.225 (0.627)	Data Time 0.001 (0.046)	Loss 2.4429 (2.6737)	Entropy 1.16237 (1.16325)	Top-1 acc 62.500 (59.724)	Top-5 acc 88.281 (80.852)	lr 0.01566
Train [51][770/3239]	Time 0.217 (0.624)	Data Time 0.001 (0.045)	Loss 2.8107 (2.6733)	Entropy 1.16234 (1.16324)	Top-1 acc 56.641 (59.739)	Top-5 acc 78.516 (80.871)	lr 0.01566
Train [51][780/3239]	Time 2.438 (0.622)	Data Time 0.001 (0.045)	Loss 2.6873 (2.6725)	Entropy 1.16234 (1.16323)	Top-1 acc 64.062 (59.765)	Top-5 acc 83.594 (80.884)	lr 0.01566
Train [51][790/3239]	Time 0.216 (0.617)	Data Time 0.001 (0.044)	Loss 2.6796 (2.6731)	Entropy 1.16234 (1.16322)	Top-1 acc 62.891 (59.753)	Top-5 acc 80.859 (80.890)	lr 0.01566
Train [51][800/3239]	Time 0.224 (0.615)	Data Time 0.001 (0.044)	Loss 2.8124 (2.6732)	Entropy 1.16233 (1.16321)	Top-1 acc 57.031 (59.735)	Top-5 acc 76.562 (80.891)	lr 0.01566
Train [51][810/3239]	Time 0.222 (0.613)	Data Time 0.001 (0.043)	Loss 2.6509 (2.6733)	Entropy 1.16234 (1.16320)	Top-1 acc 61.328 (59.744)	Top-5 acc 80.078 (80.882)	lr 0.01566
Train [51][820/3239]	Time 0.212 (0.611)	Data Time 0.001 (0.043)	Loss 2.2287 (2.6731)	Entropy 1.16231 (1.16319)	Top-1 acc 74.609 (59.756)	Top-5 acc 90.625 (80.890)	lr 0.01566
Train [51][830/3239]	Time 0.242 (0.610)	Data Time 0.001 (0.042)	Loss 2.7383 (2.6730)	Entropy 1.16231 (1.16318)	Top-1 acc 59.766 (59.751)	Top-5 acc 80.078 (80.891)	lr 0.01565
Train [51][840/3239]	Time 0.382 (0.608)	Data Time 0.002 (0.042)	Loss 2.5905 (2.6725)	Entropy 1.16224 (1.16317)	Top-1 acc 64.062 (59.772)	Top-5 acc 81.641 (80.897)	lr 0.01565
Train [51][850/3239]	Time 0.265 (0.607)	Data Time 0.001 (0.041)	Loss 2.6282 (2.6727)	Entropy 1.16219 (1.16316)	Top-1 acc 59.375 (59.765)	Top-5 acc 82.031 (80.886)	lr 0.01565
Train [51][860/3239]	Time 0.245 (0.605)	Data Time 0.001 (0.041)	Loss 2.8597 (2.6726)	Entropy 1.16218 (1.16314)	Top-1 acc 57.031 (59.761)	Top-5 acc 77.734 (80.893)	lr 0.01565
Train [51][870/3239]	Time 0.167 (0.603)	Data Time 0.001 (0.040)	Loss 2.7242 (2.6732)	Entropy 1.16211 (1.16313)	Top-1 acc 58.594 (59.749)	Top-5 acc 80.469 (80.876)	lr 0.01565
Train [51][880/3239]	Time 0.219 (0.602)	Data Time 0.002 (0.040)	Loss 2.4954 (2.6725)	Entropy 1.16211 (1.16312)	Top-1 acc 62.500 (59.759)	Top-5 acc 85.938 (80.888)	lr 0.01565
Train [51][890/3239]	Time 2.366 (0.600)	Data Time 0.001 (0.040)	Loss 2.7517 (2.6723)	Entropy 1.16211 (1.16311)	Top-1 acc 56.250 (59.756)	Top-5 acc 79.688 (80.892)	lr 0.01565
Train [51][900/3239]	Time 0.258 (0.596)	Data Time 0.001 (0.039)	Loss 2.7192 (2.6726)	Entropy 1.16207 (1.16310)	Top-1 acc 57.031 (59.754)	Top-5 acc 79.297 (80.883)	lr 0.01565
Train [51][910/3239]	Time 0.227 (0.595)	Data Time 0.001 (0.039)	Loss 2.8100 (2.6729)	Entropy 1.16207 (1.16309)	Top-1 acc 56.641 (59.756)	Top-5 acc 76.953 (80.877)	lr 0.01565
Train [51][920/3239]	Time 0.171 (0.593)	Data Time 0.001 (0.038)	Loss 2.7876 (2.6731)	Entropy 1.16202 (1.16308)	Top-1 acc 55.469 (59.735)	Top-5 acc 78.516 (80.872)	lr 0.01565
Train [51][930/3239]	Time 0.242 (0.592)	Data Time 0.001 (0.038)	Loss 2.7856 (2.6735)	Entropy 1.16199 (1.16306)	Top-1 acc 54.688 (59.726)	Top-5 acc 78.906 (80.870)	lr 0.01564
Train [51][940/3239]	Time 0.443 (0.638)	Data Time 0.004 (0.038)	Loss 2.9085 (2.6739)	Entropy 1.16196 (1.16305)	Top-1 acc 53.516 (59.720)	Top-5 acc 75.781 (80.859)	lr 0.01564
Train [51][950/3239]	Time 0.235 (0.639)	Data Time 0.002 (0.037)	Loss 2.7089 (2.6739)	Entropy 1.16190 (1.16304)	Top-1 acc 61.719 (59.742)	Top-5 acc 80.859 (80.858)	lr 0.01564
Train [51][960/3239]	Time 0.216 (0.637)	Data Time 0.001 (0.037)	Loss 2.6583 (2.6740)	Entropy 1.16190 (1.16303)	Top-1 acc 58.594 (59.731)	Top-5 acc 82.812 (80.860)	lr 0.01564
Train [51][970/3239]	Time 0.240 (0.635)	Data Time 0.002 (0.037)	Loss 2.6150 (2.6736)	Entropy 1.16189 (1.16302)	Top-1 acc 58.203 (59.741)	Top-5 acc 81.641 (80.861)	lr 0.01564
Train [51][980/3239]	Time 0.227 (0.633)	Data Time 0.001 (0.036)	Loss 2.4051 (2.6739)	Entropy 1.16187 (1.16301)	Top-1 acc 66.797 (59.734)	Top-5 acc 86.719 (80.860)	lr 0.01564
Train [51][990/3239]	Time 0.306 (0.632)	Data Time 0.001 (0.036)	Loss 2.5310 (2.6729)	Entropy 1.16181 (1.16299)	Top-1 acc 65.625 (59.757)	Top-5 acc 83.203 (80.877)	lr 0.01564
Train [51][1000/3239]	Time 2.478 (0.630)	Data Time 0.002 (0.036)	Loss 2.6020 (2.6727)	Entropy 1.16181 (1.16298)	Top-1 acc 62.500 (59.756)	Top-5 acc 82.812 (80.878)	lr 0.01564
Train [51][1010/3239]	Time 0.226 (0.626)	Data Time 0.001 (0.035)	Loss 2.6459 (2.6728)	Entropy 1.16176 (1.16297)	Top-1 acc 59.375 (59.748)	Top-5 acc 82.031 (80.876)	lr 0.01564
Train [51][1020/3239]	Time 0.211 (0.624)	Data Time 0.001 (0.035)	Loss 2.8024 (2.6736)	Entropy 1.16175 (1.16296)	Top-1 acc 55.859 (59.725)	Top-5 acc 78.125 (80.860)	lr 0.01564
Train [51][1030/3239]	Time 0.211 (0.623)	Data Time 0.001 (0.035)	Loss 2.6006 (2.6735)	Entropy 1.16172 (1.16295)	Top-1 acc 63.281 (59.730)	Top-5 acc 79.297 (80.850)	lr 0.01563
Train [51][1040/3239]	Time 0.313 (0.621)	Data Time 0.001 (0.034)	Loss 2.6602 (2.6738)	Entropy 1.16169 (1.16293)	Top-1 acc 61.328 (59.728)	Top-5 acc 80.859 (80.848)	lr 0.01563
Train [51][1050/3239]	Time 0.218 (0.620)	Data Time 0.001 (0.034)	Loss 2.8126 (2.6743)	Entropy 1.16168 (1.16292)	Top-1 acc 54.688 (59.712)	Top-5 acc 78.516 (80.841)	lr 0.01563
Train [51][1060/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.034)	Loss 2.6764 (2.6755)	Entropy 1.16165 (1.16291)	Top-1 acc 60.938 (59.688)	Top-5 acc 81.250 (80.827)	lr 0.01563
Train [51][1070/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.033)	Loss 2.7207 (2.6759)	Entropy 1.16161 (1.16290)	Top-1 acc 59.375 (59.677)	Top-5 acc 78.516 (80.812)	lr 0.01563
Train [51][1080/3239]	Time 0.209 (0.615)	Data Time 0.001 (0.033)	Loss 2.6206 (2.6758)	Entropy 1.16161 (1.16289)	Top-1 acc 64.062 (59.688)	Top-5 acc 80.859 (80.810)	lr 0.01563
Train [51][1090/3239]	Time 0.199 (0.614)	Data Time 0.001 (0.033)	Loss 2.6670 (2.6753)	Entropy 1.16157 (1.16288)	Top-1 acc 63.281 (59.700)	Top-5 acc 82.422 (80.821)	lr 0.01563
Train [51][1100/3239]	Time 0.216 (0.613)	Data Time 0.001 (0.032)	Loss 2.5783 (2.6753)	Entropy 1.16156 (1.16286)	Top-1 acc 60.156 (59.707)	Top-5 acc 84.766 (80.823)	lr 0.01563
Train [51][1110/3239]	Time 2.424 (0.611)	Data Time 0.002 (0.032)	Loss 2.5512 (2.6752)	Entropy 1.16156 (1.16285)	Top-1 acc 65.625 (59.719)	Top-5 acc 80.859 (80.830)	lr 0.01563
Train [51][1120/3239]	Time 0.267 (0.608)	Data Time 0.001 (0.032)	Loss 2.6370 (2.6746)	Entropy 1.16154 (1.16284)	Top-1 acc 56.250 (59.734)	Top-5 acc 83.203 (80.844)	lr 0.01563
Train [51][1130/3239]	Time 0.393 (0.607)	Data Time 0.001 (0.032)	Loss 2.6404 (2.6747)	Entropy 1.16151 (1.16283)	Top-1 acc 58.984 (59.724)	Top-5 acc 82.031 (80.839)	lr 0.01562
Train [51][1140/3239]	Time 0.212 (0.606)	Data Time 0.001 (0.031)	Loss 2.8084 (2.6746)	Entropy 1.16150 (1.16282)	Top-1 acc 53.516 (59.718)	Top-5 acc 78.516 (80.842)	lr 0.01562
Train [51][1150/3239]	Time 0.211 (0.604)	Data Time 0.001 (0.031)	Loss 2.7569 (2.6743)	Entropy 1.16148 (1.16281)	Top-1 acc 54.297 (59.725)	Top-5 acc 78.906 (80.849)	lr 0.01562
Train [51][1160/3239]	Time 0.221 (0.603)	Data Time 0.001 (0.031)	Loss 2.7713 (2.6749)	Entropy 1.16145 (1.16279)	Top-1 acc 58.594 (59.714)	Top-5 acc 79.688 (80.838)	lr 0.01562
Train [51][1170/3239]	Time 0.231 (0.602)	Data Time 0.001 (0.031)	Loss 2.6843 (2.6750)	Entropy 1.16138 (1.16278)	Top-1 acc 58.984 (59.709)	Top-5 acc 80.859 (80.835)	lr 0.01562
Train [51][1180/3239]	Time 0.326 (0.601)	Data Time 0.001 (0.030)	Loss 2.7126 (2.6745)	Entropy 1.16136 (1.16277)	Top-1 acc 59.375 (59.722)	Top-5 acc 82.422 (80.844)	lr 0.01562
Train [51][1190/3239]	Time 0.218 (0.599)	Data Time 0.001 (0.030)	Loss 2.6910 (2.6745)	Entropy 1.16134 (1.16276)	Top-1 acc 62.109 (59.722)	Top-5 acc 78.516 (80.842)	lr 0.01562
Train [51][1200/3239]	Time 0.237 (0.598)	Data Time 0.001 (0.030)	Loss 2.3785 (2.6745)	Entropy 1.16136 (1.16275)	Top-1 acc 67.969 (59.715)	Top-5 acc 85.938 (80.844)	lr 0.01562
Train [51][1210/3239]	Time 0.242 (0.597)	Data Time 0.001 (0.030)	Loss 3.0345 (2.6747)	Entropy 1.16136 (1.16273)	Top-1 acc 48.438 (59.716)	Top-5 acc 71.875 (80.836)	lr 0.01562
Train [51][1220/3239]	Time 2.436 (0.596)	Data Time 0.001 (0.029)	Loss 2.8435 (2.6747)	Entropy 1.16136 (1.16272)	Top-1 acc 56.641 (59.714)	Top-5 acc 77.734 (80.838)	lr 0.01562
Train [51][1230/3239]	Time 0.321 (0.593)	Data Time 0.001 (0.029)	Loss 2.7684 (2.6745)	Entropy 1.16135 (1.16271)	Top-1 acc 53.906 (59.710)	Top-5 acc 79.688 (80.845)	lr 0.01562
Train [51][1240/3239]	Time 0.224 (0.592)	Data Time 0.001 (0.029)	Loss 2.7032 (2.6746)	Entropy 1.16135 (1.16270)	Top-1 acc 60.938 (59.711)	Top-5 acc 78.906 (80.842)	lr 0.01561
Train [51][1250/3239]	Time 0.228 (0.591)	Data Time 0.001 (0.029)	Loss 2.4939 (2.6748)	Entropy 1.16127 (1.16269)	Top-1 acc 63.672 (59.709)	Top-5 acc 84.766 (80.843)	lr 0.01561
Train [51][1260/3239]	Time 0.215 (0.590)	Data Time 0.001 (0.029)	Loss 2.5341 (2.6751)	Entropy 1.16117 (1.16268)	Top-1 acc 63.672 (59.694)	Top-5 acc 83.984 (80.836)	lr 0.01561
Train [51][1270/3239]	Time 0.208 (0.589)	Data Time 0.001 (0.028)	Loss 2.4882 (2.6749)	Entropy 1.16117 (1.16267)	Top-1 acc 64.453 (59.693)	Top-5 acc 85.156 (80.837)	lr 0.01561
Train [51][1280/3239]	Time 0.227 (0.588)	Data Time 0.001 (0.028)	Loss 2.6867 (2.6750)	Entropy 1.16114 (1.16265)	Top-1 acc 58.594 (59.689)	Top-5 acc 82.422 (80.837)	lr 0.01561
Train [51][1290/3239]	Time 0.278 (0.587)	Data Time 0.001 (0.028)	Loss 2.7599 (2.6752)	Entropy 1.16116 (1.16264)	Top-1 acc 58.203 (59.686)	Top-5 acc 77.734 (80.831)	lr 0.01561
Train [51][1300/3239]	Time 0.228 (0.624)	Data Time 0.002 (0.028)	Loss 2.7905 (2.6756)	Entropy 1.16113 (1.16263)	Top-1 acc 57.812 (59.675)	Top-5 acc 75.391 (80.821)	lr 0.01561
Train [51][1310/3239]	Time 0.224 (0.623)	Data Time 0.002 (0.028)	Loss 2.7460 (2.6752)	Entropy 1.16109 (1.16262)	Top-1 acc 57.422 (59.685)	Top-5 acc 81.641 (80.833)	lr 0.01561
Train [51][1320/3239]	Time 0.258 (0.622)	Data Time 0.002 (0.027)	Loss 2.5874 (2.6751)	Entropy 1.16108 (1.16261)	Top-1 acc 61.328 (59.685)	Top-5 acc 80.469 (80.833)	lr 0.01561
Train [51][1330/3239]	Time 2.493 (0.620)	Data Time 0.003 (0.027)	Loss 2.6654 (2.6748)	Entropy 1.16108 (1.16260)	Top-1 acc 62.500 (59.689)	Top-5 acc 78.906 (80.836)	lr 0.01561
Train [51][1340/3239]	Time 0.253 (0.618)	Data Time 0.001 (0.027)	Loss 2.7147 (2.6746)	Entropy 1.16105 (1.16259)	Top-1 acc 54.688 (59.695)	Top-5 acc 80.078 (80.840)	lr 0.01560
Train [51][1350/3239]	Time 0.199 (0.616)	Data Time 0.001 (0.027)	Loss 2.8183 (2.6752)	Entropy 1.16103 (1.16257)	Top-1 acc 57.422 (59.680)	Top-5 acc 77.344 (80.831)	lr 0.01560
Train [51][1360/3239]	Time 0.239 (0.615)	Data Time 0.001 (0.027)	Loss 2.6305 (2.6753)	Entropy 1.16092 (1.16256)	Top-1 acc 61.328 (59.678)	Top-5 acc 82.031 (80.828)	lr 0.01560
Train [51][1370/3239]	Time 0.244 (0.614)	Data Time 0.001 (0.026)	Loss 2.7532 (2.6751)	Entropy 1.16082 (1.16255)	Top-1 acc 59.766 (59.676)	Top-5 acc 79.297 (80.827)	lr 0.01560
Train [51][1380/3239]	Time 0.325 (0.613)	Data Time 0.001 (0.026)	Loss 2.6810 (2.6759)	Entropy 1.16081 (1.16254)	Top-1 acc 57.422 (59.649)	Top-5 acc 80.859 (80.818)	lr 0.01560
Train [51][1390/3239]	Time 0.229 (0.612)	Data Time 0.001 (0.026)	Loss 2.7595 (2.6759)	Entropy 1.16080 (1.16252)	Top-1 acc 57.422 (59.644)	Top-5 acc 76.953 (80.820)	lr 0.01560
Train [51][1400/3239]	Time 0.236 (0.611)	Data Time 0.001 (0.026)	Loss 2.6087 (2.6762)	Entropy 1.16083 (1.16251)	Top-1 acc 60.156 (59.638)	Top-5 acc 82.031 (80.815)	lr 0.01560
Train [51][1410/3239]	Time 0.239 (0.610)	Data Time 0.001 (0.026)	Loss 2.5595 (2.6764)	Entropy 1.16079 (1.16250)	Top-1 acc 61.719 (59.632)	Top-5 acc 83.984 (80.815)	lr 0.01560
Train [51][1420/3239]	Time 0.224 (0.609)	Data Time 0.001 (0.026)	Loss 2.7709 (2.6764)	Entropy 1.16077 (1.16249)	Top-1 acc 57.422 (59.635)	Top-5 acc 78.516 (80.812)	lr 0.01560
Train [51][1430/3239]	Time 0.329 (0.608)	Data Time 0.001 (0.025)	Loss 2.6597 (2.6764)	Entropy 1.16074 (1.16248)	Top-1 acc 59.375 (59.635)	Top-5 acc 81.250 (80.814)	lr 0.01560
Train [51][1440/3239]	Time 2.559 (0.607)	Data Time 0.001 (0.025)	Loss 2.7582 (2.6764)	Entropy 1.16074 (1.16246)	Top-1 acc 56.250 (59.635)	Top-5 acc 76.562 (80.818)	lr 0.01559
Train [51][1450/3239]	Time 0.256 (0.604)	Data Time 0.001 (0.025)	Loss 2.6299 (2.6763)	Entropy 1.16074 (1.16245)	Top-1 acc 59.375 (59.636)	Top-5 acc 85.547 (80.816)	lr 0.01559
Train [51][1460/3239]	Time 0.200 (0.603)	Data Time 0.001 (0.025)	Loss 2.6405 (2.6761)	Entropy 1.16072 (1.16244)	Top-1 acc 56.641 (59.637)	Top-5 acc 82.812 (80.823)	lr 0.01559
Train [51][1470/3239]	Time 0.220 (0.602)	Data Time 0.001 (0.025)	Loss 2.5976 (2.6762)	Entropy 1.16068 (1.16243)	Top-1 acc 62.500 (59.632)	Top-5 acc 82.031 (80.823)	lr 0.01559
Train [51][1480/3239]	Time 0.232 (0.601)	Data Time 0.001 (0.025)	Loss 2.5259 (2.6755)	Entropy 1.16066 (1.16242)	Top-1 acc 60.547 (59.638)	Top-5 acc 84.375 (80.838)	lr 0.01559
Train [51][1490/3239]	Time 0.198 (0.600)	Data Time 0.001 (0.024)	Loss 2.6498 (2.6757)	Entropy 1.16067 (1.16240)	Top-1 acc 65.234 (59.635)	Top-5 acc 81.250 (80.836)	lr 0.01559
Train [51][1500/3239]	Time 0.235 (0.599)	Data Time 0.001 (0.024)	Loss 2.8806 (2.6759)	Entropy 1.16066 (1.16239)	Top-1 acc 57.031 (59.632)	Top-5 acc 76.172 (80.825)	lr 0.01559
Train [51][1510/3239]	Time 0.218 (0.599)	Data Time 0.002 (0.024)	Loss 2.7564 (2.6759)	Entropy 1.16061 (1.16238)	Top-1 acc 58.203 (59.636)	Top-5 acc 80.078 (80.825)	lr 0.01559
Train [51][1520/3239]	Time 0.225 (0.598)	Data Time 0.001 (0.024)	Loss 2.6973 (2.6764)	Entropy 1.16057 (1.16237)	Top-1 acc 60.156 (59.621)	Top-5 acc 80.859 (80.823)	lr 0.01559
Train [51][1530/3239]	Time 0.217 (0.597)	Data Time 0.001 (0.024)	Loss 2.5568 (2.6764)	Entropy 1.16052 (1.16236)	Top-1 acc 61.719 (59.619)	Top-5 acc 82.812 (80.823)	lr 0.01559
Train [51][1540/3239]	Time 0.237 (0.596)	Data Time 0.001 (0.024)	Loss 2.5686 (2.6770)	Entropy 1.16048 (1.16235)	Top-1 acc 62.891 (59.605)	Top-5 acc 82.812 (80.808)	lr 0.01558
Train [51][1550/3239]	Time 2.415 (0.595)	Data Time 0.002 (0.024)	Loss 2.6909 (2.6774)	Entropy 1.16048 (1.16233)	Top-1 acc 58.594 (59.597)	Top-5 acc 82.422 (80.803)	lr 0.01558
Train [51][1560/3239]	Time 0.243 (0.593)	Data Time 0.001 (0.023)	Loss 2.9087 (2.6775)	Entropy 1.16045 (1.16232)	Top-1 acc 54.688 (59.589)	Top-5 acc 78.516 (80.804)	lr 0.01558
Train [51][1570/3239]	Time 0.321 (0.592)	Data Time 0.001 (0.023)	Loss 2.8246 (2.6772)	Entropy 1.16041 (1.16231)	Top-1 acc 58.203 (59.597)	Top-5 acc 79.688 (80.812)	lr 0.01558
Train [51][1580/3239]	Time 0.243 (0.591)	Data Time 0.001 (0.023)	Loss 2.7090 (2.6770)	Entropy 1.16042 (1.16230)	Top-1 acc 58.984 (59.603)	Top-5 acc 79.688 (80.811)	lr 0.01558
Train [51][1590/3239]	Time 0.220 (0.590)	Data Time 0.001 (0.023)	Loss 2.5137 (2.6765)	Entropy 1.16037 (1.16229)	Top-1 acc 65.234 (59.615)	Top-5 acc 83.984 (80.817)	lr 0.01558
Train [51][1600/3239]	Time 0.230 (0.590)	Data Time 0.001 (0.023)	Loss 2.8056 (2.6764)	Entropy 1.16038 (1.16227)	Top-1 acc 59.766 (59.618)	Top-5 acc 79.297 (80.818)	lr 0.01558
Train [51][1610/3239]	Time 0.238 (0.589)	Data Time 0.002 (0.023)	Loss 2.5879 (2.6764)	Entropy 1.16037 (1.16226)	Top-1 acc 62.109 (59.616)	Top-5 acc 80.469 (80.817)	lr 0.01558
Train [51][1620/3239]	Time 0.311 (0.588)	Data Time 0.001 (0.023)	Loss 2.7525 (2.6767)	Entropy 1.16028 (1.16225)	Top-1 acc 59.375 (59.614)	Top-5 acc 78.125 (80.813)	lr 0.01558
Train [51][1630/3239]	Time 0.223 (0.587)	Data Time 0.001 (0.022)	Loss 2.5395 (2.6768)	Entropy 1.16028 (1.16224)	Top-1 acc 60.547 (59.610)	Top-5 acc 85.938 (80.809)	lr 0.01558
Train [51][1640/3239]	Time 0.229 (0.587)	Data Time 0.001 (0.022)	Loss 2.4361 (2.6766)	Entropy 1.16021 (1.16223)	Top-1 acc 66.406 (59.616)	Top-5 acc 85.938 (80.806)	lr 0.01557
Train [51][1650/3239]	Time 0.242 (0.586)	Data Time 0.001 (0.022)	Loss 2.6485 (2.6765)	Entropy 1.16020 (1.16221)	Top-1 acc 61.328 (59.613)	Top-5 acc 82.422 (80.809)	lr 0.01557
Train [51][1660/3239]	Time 47.684 (0.612)	Data Time 0.001 (0.022)	Loss 2.5534 (2.6764)	Entropy 1.16020 (1.16220)	Top-1 acc 63.672 (59.618)	Top-5 acc 83.594 (80.810)	lr 0.01557
Train [51][1670/3239]	Time 0.271 (0.611)	Data Time 0.002 (0.022)	Loss 2.5367 (2.6769)	Entropy 1.16018 (1.16219)	Top-1 acc 62.891 (59.606)	Top-5 acc 82.812 (80.802)	lr 0.01557
Train [51][1680/3239]	Time 0.246 (0.610)	Data Time 0.002 (0.022)	Loss 2.6110 (2.6771)	Entropy 1.16014 (1.16218)	Top-1 acc 59.766 (59.603)	Top-5 acc 81.641 (80.801)	lr 0.01557
Train [51][1690/3239]	Time 0.213 (0.609)	Data Time 0.002 (0.022)	Loss 2.7603 (2.6776)	Entropy 1.16013 (1.16217)	Top-1 acc 55.859 (59.593)	Top-5 acc 79.688 (80.791)	lr 0.01557
Train [51][1700/3239]	Time 0.247 (0.609)	Data Time 0.002 (0.022)	Loss 2.7161 (2.6774)	Entropy 1.16004 (1.16215)	Top-1 acc 56.641 (59.595)	Top-5 acc 78.516 (80.795)	lr 0.01557
Train [51][1710/3239]	Time 0.232 (0.608)	Data Time 0.001 (0.021)	Loss 2.6018 (2.6771)	Entropy 1.16003 (1.16214)	Top-1 acc 62.891 (59.604)	Top-5 acc 82.812 (80.806)	lr 0.01557
Train [51][1720/3239]	Time 0.228 (0.607)	Data Time 0.001 (0.021)	Loss 2.7938 (2.6771)	Entropy 1.15998 (1.16213)	Top-1 acc 57.422 (59.606)	Top-5 acc 76.172 (80.801)	lr 0.01557
Train [51][1730/3239]	Time 0.224 (0.606)	Data Time 0.001 (0.021)	Loss 2.6453 (2.6773)	Entropy 1.15989 (1.16212)	Top-1 acc 62.500 (59.607)	Top-5 acc 81.641 (80.799)	lr 0.01557
Train [51][1740/3239]	Time 0.216 (0.605)	Data Time 0.001 (0.021)	Loss 2.7605 (2.6775)	Entropy 1.15983 (1.16210)	Top-1 acc 59.375 (59.609)	Top-5 acc 78.516 (80.793)	lr 0.01557
Train [51][1750/3239]	Time 0.232 (0.604)	Data Time 0.002 (0.021)	Loss 2.7769 (2.6772)	Entropy 1.15974 (1.16209)	Top-1 acc 58.984 (59.618)	Top-5 acc 77.344 (80.798)	lr 0.01556
Train [51][1760/3239]	Time 0.281 (0.604)	Data Time 0.001 (0.021)	Loss 2.6382 (2.6772)	Entropy 1.15974 (1.16208)	Top-1 acc 62.109 (59.616)	Top-5 acc 81.250 (80.795)	lr 0.01556
Train [51][1770/3239]	Time 2.553 (0.603)	Data Time 0.002 (0.021)	Loss 2.6572 (2.6772)	Entropy 1.15974 (1.16206)	Top-1 acc 57.031 (59.616)	Top-5 acc 80.469 (80.792)	lr 0.01556
Train [51][1780/3239]	Time 0.228 (0.601)	Data Time 0.002 (0.021)	Loss 2.6487 (2.6773)	Entropy 1.15970 (1.16205)	Top-1 acc 58.594 (59.610)	Top-5 acc 80.859 (80.787)	lr 0.01556
Train [51][1790/3239]	Time 0.215 (0.600)	Data Time 0.001 (0.021)	Loss 2.6656 (2.6771)	Entropy 1.15970 (1.16204)	Top-1 acc 59.766 (59.616)	Top-5 acc 83.203 (80.794)	lr 0.01556
Train [51][1800/3239]	Time 0.224 (0.599)	Data Time 0.001 (0.020)	Loss 2.5846 (2.6773)	Entropy 1.15968 (1.16202)	Top-1 acc 63.281 (59.604)	Top-5 acc 82.031 (80.795)	lr 0.01556
Train [51][1810/3239]	Time 0.219 (0.598)	Data Time 0.001 (0.020)	Loss 2.7272 (2.6776)	Entropy 1.15968 (1.16201)	Top-1 acc 58.594 (59.593)	Top-5 acc 78.516 (80.787)	lr 0.01556
Train [51][1820/3239]	Time 0.220 (0.598)	Data Time 0.001 (0.020)	Loss 2.7633 (2.6777)	Entropy 1.15971 (1.16200)	Top-1 acc 56.641 (59.593)	Top-5 acc 79.297 (80.782)	lr 0.01556
Train [51][1830/3239]	Time 0.226 (0.597)	Data Time 0.001 (0.020)	Loss 2.5751 (2.6777)	Entropy 1.15968 (1.16199)	Top-1 acc 61.328 (59.590)	Top-5 acc 82.812 (80.781)	lr 0.01556
Train [51][1840/3239]	Time 0.236 (0.596)	Data Time 0.001 (0.020)	Loss 2.5492 (2.6777)	Entropy 1.15968 (1.16197)	Top-1 acc 64.844 (59.587)	Top-5 acc 84.766 (80.782)	lr 0.01556
Train [51][1850/3239]	Time 0.236 (0.596)	Data Time 0.001 (0.020)	Loss 2.7443 (2.6780)	Entropy 1.15967 (1.16196)	Top-1 acc 57.812 (59.578)	Top-5 acc 77.734 (80.774)	lr 0.01555
Train [51][1860/3239]	Time 0.340 (0.595)	Data Time 0.001 (0.020)	Loss 2.7103 (2.6781)	Entropy 1.15966 (1.16195)	Top-1 acc 58.203 (59.571)	Top-5 acc 81.250 (80.769)	lr 0.01555
Train [51][1870/3239]	Time 0.213 (0.594)	Data Time 0.002 (0.020)	Loss 2.8633 (2.6781)	Entropy 1.15967 (1.16194)	Top-1 acc 53.906 (59.572)	Top-5 acc 75.781 (80.769)	lr 0.01555
Train [51][1880/3239]	Time 2.526 (0.594)	Data Time 0.001 (0.020)	Loss 2.7214 (2.6782)	Entropy 1.15967 (1.16192)	Top-1 acc 59.375 (59.570)	Top-5 acc 78.516 (80.766)	lr 0.01555
Train [51][1890/3239]	Time 0.235 (0.592)	Data Time 0.001 (0.020)	Loss 2.7744 (2.6784)	Entropy 1.15964 (1.16191)	Top-1 acc 57.422 (59.557)	Top-5 acc 78.906 (80.764)	lr 0.01555
Train [51][1900/3239]	Time 0.227 (0.591)	Data Time 0.001 (0.019)	Loss 2.7398 (2.6787)	Entropy 1.15960 (1.16190)	Top-1 acc 56.250 (59.549)	Top-5 acc 80.469 (80.760)	lr 0.01555
Train [51][1910/3239]	Time 0.328 (0.590)	Data Time 0.001 (0.019)	Loss 2.6285 (2.6788)	Entropy 1.15947 (1.16189)	Top-1 acc 62.500 (59.550)	Top-5 acc 83.203 (80.757)	lr 0.01555
Train [51][1920/3239]	Time 0.235 (0.590)	Data Time 0.001 (0.019)	Loss 2.7140 (2.6788)	Entropy 1.15945 (1.16187)	Top-1 acc 58.984 (59.549)	Top-5 acc 81.641 (80.758)	lr 0.01555
Train [51][1930/3239]	Time 0.223 (0.589)	Data Time 0.001 (0.019)	Loss 2.4956 (2.6787)	Entropy 1.15943 (1.16186)	Top-1 acc 61.328 (59.550)	Top-5 acc 85.156 (80.758)	lr 0.01555
Train [51][1940/3239]	Time 0.203 (0.588)	Data Time 0.001 (0.019)	Loss 2.9768 (2.6788)	Entropy 1.15942 (1.16185)	Top-1 acc 51.172 (59.545)	Top-5 acc 75.391 (80.758)	lr 0.01555
Train [51][1950/3239]	Time 0.265 (0.588)	Data Time 0.001 (0.019)	Loss 2.9076 (2.6791)	Entropy 1.15940 (1.16184)	Top-1 acc 52.734 (59.537)	Top-5 acc 76.953 (80.752)	lr 0.01554
Train [51][1960/3239]	Time 0.330 (0.587)	Data Time 0.001 (0.019)	Loss 2.5635 (2.6791)	Entropy 1.15937 (1.16182)	Top-1 acc 62.109 (59.530)	Top-5 acc 83.203 (80.751)	lr 0.01554
Train [51][1970/3239]	Time 0.210 (0.586)	Data Time 0.001 (0.019)	Loss 2.5281 (2.6791)	Entropy 1.15924 (1.16181)	Top-1 acc 61.328 (59.526)	Top-5 acc 82.812 (80.752)	lr 0.01554
Train [51][1980/3239]	Time 0.276 (0.586)	Data Time 0.001 (0.019)	Loss 2.7479 (2.6791)	Entropy 1.15917 (1.16180)	Top-1 acc 58.203 (59.521)	Top-5 acc 78.516 (80.747)	lr 0.01554
Train [51][1990/3239]	Time 2.511 (0.585)	Data Time 0.001 (0.019)	Loss 2.6417 (2.6792)	Entropy 1.15917 (1.16179)	Top-1 acc 62.109 (59.519)	Top-5 acc 81.250 (80.747)	lr 0.01554
Train [51][2000/3239]	Time 0.204 (0.583)	Data Time 0.001 (0.019)	Loss 2.6039 (2.6793)	Entropy 1.15914 (1.16177)	Top-1 acc 61.328 (59.520)	Top-5 acc 82.422 (80.743)	lr 0.01554
Train [51][2010/3239]	Time 0.310 (0.583)	Data Time 0.001 (0.018)	Loss 2.6413 (2.6795)	Entropy 1.15914 (1.16176)	Top-1 acc 60.547 (59.513)	Top-5 acc 80.469 (80.743)	lr 0.01554
Train [51][2020/3239]	Time 0.267 (0.582)	Data Time 0.001 (0.018)	Loss 2.7668 (2.6792)	Entropy 1.15912 (1.16175)	Top-1 acc 57.812 (59.515)	Top-5 acc 79.297 (80.750)	lr 0.01554
Train [51][2030/3239]	Time 0.233 (0.606)	Data Time 0.002 (0.018)	Loss 2.6682 (2.6791)	Entropy 1.15911 (1.16173)	Top-1 acc 57.422 (59.521)	Top-5 acc 81.250 (80.752)	lr 0.01554
Train [51][2040/3239]	Time 0.220 (0.605)	Data Time 0.002 (0.018)	Loss 2.7843 (2.6792)	Entropy 1.15907 (1.16172)	Top-1 acc 56.250 (59.520)	Top-5 acc 77.344 (80.752)	lr 0.01554
Train [51][2050/3239]	Time 0.268 (0.605)	Data Time 0.001 (0.018)	Loss 2.6738 (2.6799)	Entropy 1.15907 (1.16171)	Top-1 acc 58.984 (59.507)	Top-5 acc 81.641 (80.744)	lr 0.01553
Train [51][2060/3239]	Time 0.273 (0.604)	Data Time 0.004 (0.018)	Loss 2.6879 (2.6800)	Entropy 1.15907 (1.16169)	Top-1 acc 58.984 (59.506)	Top-5 acc 78.516 (80.737)	lr 0.01553
Train [51][2070/3239]	Time 0.218 (0.603)	Data Time 0.001 (0.018)	Loss 2.6679 (2.6800)	Entropy 1.15904 (1.16168)	Top-1 acc 62.500 (59.503)	Top-5 acc 82.422 (80.737)	lr 0.01553
Train [51][2080/3239]	Time 0.218 (0.603)	Data Time 0.001 (0.018)	Loss 2.6366 (2.6800)	Entropy 1.15904 (1.16167)	Top-1 acc 60.156 (59.503)	Top-5 acc 83.203 (80.738)	lr 0.01553
Train [51][2090/3239]	Time 0.253 (0.602)	Data Time 0.002 (0.018)	Loss 2.7218 (2.6803)	Entropy 1.15903 (1.16166)	Top-1 acc 59.766 (59.500)	Top-5 acc 78.125 (80.732)	lr 0.01553
Train [51][2100/3239]	Time 2.414 (0.601)	Data Time 0.001 (0.018)	Loss 2.7688 (2.6800)	Entropy 1.15903 (1.16164)	Top-1 acc 57.031 (59.505)	Top-5 acc 78.125 (80.737)	lr 0.01553
Train [51][2110/3239]	Time 0.225 (0.599)	Data Time 0.001 (0.018)	Loss 2.8386 (2.6799)	Entropy 1.15904 (1.16163)	Top-1 acc 54.688 (59.509)	Top-5 acc 78.125 (80.739)	lr 0.01553
Train [51][2120/3239]	Time 0.208 (0.599)	Data Time 0.001 (0.018)	Loss 2.7794 (2.6800)	Entropy 1.15905 (1.16162)	Top-1 acc 58.594 (59.505)	Top-5 acc 77.734 (80.737)	lr 0.01553
Train [51][2130/3239]	Time 0.228 (0.598)	Data Time 0.001 (0.018)	Loss 2.7386 (2.6802)	Entropy 1.15903 (1.16161)	Top-1 acc 60.156 (59.501)	Top-5 acc 80.469 (80.734)	lr 0.01553
Train [51][2140/3239]	Time 0.224 (0.597)	Data Time 0.001 (0.017)	Loss 2.5636 (2.6802)	Entropy 1.15903 (1.16160)	Top-1 acc 60.156 (59.504)	Top-5 acc 81.641 (80.736)	lr 0.01553
Train [51][2150/3239]	Time 0.253 (0.597)	Data Time 0.001 (0.017)	Loss 2.8402 (2.6802)	Entropy 1.15902 (1.16158)	Top-1 acc 56.641 (59.501)	Top-5 acc 75.391 (80.732)	lr 0.01553
Train [51][2160/3239]	Time 0.215 (0.596)	Data Time 0.001 (0.017)	Loss 2.7669 (2.6802)	Entropy 1.15900 (1.16157)	Top-1 acc 55.078 (59.497)	Top-5 acc 80.078 (80.733)	lr 0.01552
Train [51][2170/3239]	Time 0.203 (0.596)	Data Time 0.001 (0.017)	Loss 2.6413 (2.6802)	Entropy 1.15898 (1.16156)	Top-1 acc 58.203 (59.491)	Top-5 acc 83.203 (80.731)	lr 0.01552
Train [51][2180/3239]	Time 0.239 (0.595)	Data Time 0.001 (0.017)	Loss 2.8475 (2.6804)	Entropy 1.15892 (1.16155)	Top-1 acc 53.125 (59.484)	Top-5 acc 76.953 (80.729)	lr 0.01552
Train [51][2190/3239]	Time 0.238 (0.594)	Data Time 0.001 (0.017)	Loss 2.7235 (2.6805)	Entropy 1.15883 (1.16154)	Top-1 acc 58.594 (59.483)	Top-5 acc 82.031 (80.729)	lr 0.01552
Train [51][2200/3239]	Time 0.229 (0.594)	Data Time 0.001 (0.017)	Loss 2.7216 (2.6804)	Entropy 1.15879 (1.16152)	Top-1 acc 54.688 (59.485)	Top-5 acc 79.297 (80.729)	lr 0.01552
Train [51][2210/3239]	Time 2.505 (0.593)	Data Time 0.001 (0.017)	Loss 2.5348 (2.6804)	Entropy 1.15879 (1.16151)	Top-1 acc 64.844 (59.492)	Top-5 acc 84.375 (80.729)	lr 0.01552
Train [51][2220/3239]	Time 0.248 (0.592)	Data Time 0.001 (0.017)	Loss 2.6899 (2.6805)	Entropy 1.15877 (1.16150)	Top-1 acc 58.984 (59.489)	Top-5 acc 80.859 (80.725)	lr 0.01552
Train [51][2230/3239]	Time 0.253 (0.591)	Data Time 0.001 (0.017)	Loss 2.6692 (2.6803)	Entropy 1.15874 (1.16149)	Top-1 acc 57.422 (59.493)	Top-5 acc 80.859 (80.726)	lr 0.01552
Train [51][2240/3239]	Time 0.236 (0.591)	Data Time 0.002 (0.017)	Loss 2.7093 (2.6803)	Entropy 1.15867 (1.16147)	Top-1 acc 59.766 (59.497)	Top-5 acc 80.859 (80.725)	lr 0.01552
Train [51][2250/3239]	Time 0.349 (0.590)	Data Time 0.001 (0.017)	Loss 2.7457 (2.6805)	Entropy 1.15863 (1.16146)	Top-1 acc 56.250 (59.494)	Top-5 acc 77.734 (80.720)	lr 0.01552
Train [51][2260/3239]	Time 0.234 (0.590)	Data Time 0.001 (0.017)	Loss 2.8221 (2.6807)	Entropy 1.15859 (1.16145)	Top-1 acc 51.953 (59.488)	Top-5 acc 78.516 (80.717)	lr 0.01551
Train [51][2270/3239]	Time 0.234 (0.589)	Data Time 0.001 (0.017)	Loss 2.6122 (2.6808)	Entropy 1.15855 (1.16144)	Top-1 acc 58.984 (59.479)	Top-5 acc 85.156 (80.718)	lr 0.01551
Train [51][2280/3239]	Time 0.239 (0.589)	Data Time 0.001 (0.016)	Loss 2.6412 (2.6809)	Entropy 1.15855 (1.16142)	Top-1 acc 57.812 (59.474)	Top-5 acc 82.812 (80.716)	lr 0.01551
Train [51][2290/3239]	Time 0.214 (0.588)	Data Time 0.001 (0.016)	Loss 2.7471 (2.6809)	Entropy 1.15858 (1.16141)	Top-1 acc 58.984 (59.476)	Top-5 acc 78.906 (80.716)	lr 0.01551
Train [51][2300/3239]	Time 0.228 (0.588)	Data Time 0.001 (0.016)	Loss 2.7548 (2.6810)	Entropy 1.15857 (1.16140)	Top-1 acc 55.859 (59.475)	Top-5 acc 78.516 (80.713)	lr 0.01551
Train [51][2310/3239]	Time 0.202 (0.587)	Data Time 0.001 (0.016)	Loss 2.6290 (2.6809)	Entropy 1.15853 (1.16139)	Top-1 acc 60.156 (59.476)	Top-5 acc 83.203 (80.715)	lr 0.01551
Train [51][2320/3239]	Time 2.447 (0.586)	Data Time 0.001 (0.016)	Loss 2.8404 (2.6809)	Entropy 1.15853 (1.16137)	Top-1 acc 55.078 (59.477)	Top-5 acc 78.125 (80.708)	lr 0.01551
Train [51][2330/3239]	Time 0.214 (0.585)	Data Time 0.002 (0.016)	Loss 2.8065 (2.6808)	Entropy 1.15849 (1.16136)	Top-1 acc 55.859 (59.481)	Top-5 acc 75.781 (80.712)	lr 0.01551
Train [51][2340/3239]	Time 0.223 (0.584)	Data Time 0.001 (0.016)	Loss 2.7170 (2.6810)	Entropy 1.15849 (1.16135)	Top-1 acc 57.422 (59.474)	Top-5 acc 81.250 (80.712)	lr 0.01551
Train [51][2350/3239]	Time 0.222 (0.584)	Data Time 0.001 (0.016)	Loss 2.6937 (2.6812)	Entropy 1.15844 (1.16134)	Top-1 acc 59.766 (59.470)	Top-5 acc 81.250 (80.710)	lr 0.01551
Train [51][2360/3239]	Time 0.220 (0.583)	Data Time 0.001 (0.016)	Loss 2.7716 (2.6811)	Entropy 1.15842 (1.16132)	Top-1 acc 58.203 (59.471)	Top-5 acc 76.562 (80.716)	lr 0.01550
Train [51][2370/3239]	Time 0.233 (0.583)	Data Time 0.001 (0.016)	Loss 2.7924 (2.6812)	Entropy 1.15837 (1.16131)	Top-1 acc 54.688 (59.469)	Top-5 acc 79.297 (80.714)	lr 0.01550
Train [51][2380/3239]	Time 0.228 (0.582)	Data Time 0.005 (0.016)	Loss 2.8340 (2.6814)	Entropy 1.15831 (1.16130)	Top-1 acc 56.250 (59.466)	Top-5 acc 77.344 (80.711)	lr 0.01550
Train [51][2390/3239]	Time 0.463 (0.601)	Data Time 0.003 (0.016)	Loss 2.6264 (2.6816)	Entropy 1.15829 (1.16129)	Top-1 acc 60.156 (59.459)	Top-5 acc 80.469 (80.709)	lr 0.01550
Train [51][2400/3239]	Time 0.231 (0.601)	Data Time 0.002 (0.016)	Loss 2.7257 (2.6817)	Entropy 1.15829 (1.16127)	Top-1 acc 60.938 (59.457)	Top-5 acc 80.469 (80.707)	lr 0.01550
Train [51][2410/3239]	Time 0.234 (0.601)	Data Time 0.002 (0.016)	Loss 2.5724 (2.6818)	Entropy 1.15827 (1.16126)	Top-1 acc 63.281 (59.458)	Top-5 acc 81.250 (80.702)	lr 0.01550
Train [51][2420/3239]	Time 0.239 (0.600)	Data Time 0.001 (0.016)	Loss 2.7193 (2.6821)	Entropy 1.15824 (1.16125)	Top-1 acc 59.766 (59.453)	Top-5 acc 78.906 (80.697)	lr 0.01550
Train [51][2430/3239]	Time 2.466 (0.599)	Data Time 0.001 (0.016)	Loss 2.6740 (2.6821)	Entropy 1.15824 (1.16124)	Top-1 acc 56.250 (59.447)	Top-5 acc 79.297 (80.695)	lr 0.01550
Train [51][2440/3239]	Time 0.366 (0.598)	Data Time 0.001 (0.016)	Loss 2.6683 (2.6825)	Entropy 1.15823 (1.16123)	Top-1 acc 59.375 (59.438)	Top-5 acc 79.688 (80.687)	lr 0.01550
Train [51][2450/3239]	Time 0.266 (0.597)	Data Time 0.001 (0.015)	Loss 2.7640 (2.6827)	Entropy 1.15818 (1.16121)	Top-1 acc 58.984 (59.438)	Top-5 acc 80.469 (80.682)	lr 0.01550
Train [51][2460/3239]	Time 0.213 (0.597)	Data Time 0.001 (0.015)	Loss 2.7344 (2.6829)	Entropy 1.15812 (1.16120)	Top-1 acc 57.422 (59.433)	Top-5 acc 82.812 (80.678)	lr 0.01549
Train [51][2470/3239]	Time 0.225 (0.596)	Data Time 0.001 (0.015)	Loss 2.9351 (2.6828)	Entropy 1.15803 (1.16119)	Top-1 acc 56.641 (59.435)	Top-5 acc 75.391 (80.681)	lr 0.01549
Train [51][2480/3239]	Time 0.227 (0.596)	Data Time 0.001 (0.015)	Loss 2.6620 (2.6829)	Entropy 1.15787 (1.16117)	Top-1 acc 61.328 (59.434)	Top-5 acc 82.812 (80.677)	lr 0.01549
Train [51][2490/3239]	Time 0.308 (0.595)	Data Time 0.001 (0.015)	Loss 2.7277 (2.6834)	Entropy 1.15786 (1.16116)	Top-1 acc 56.250 (59.422)	Top-5 acc 80.078 (80.669)	lr 0.01549
Train [51][2500/3239]	Time 0.227 (0.595)	Data Time 0.002 (0.015)	Loss 2.6682 (2.6834)	Entropy 1.15752 (1.16115)	Top-1 acc 64.062 (59.427)	Top-5 acc 80.859 (80.673)	lr 0.01549
Train [51][2510/3239]	Time 0.240 (0.594)	Data Time 0.003 (0.015)	Loss 2.7314 (2.6833)	Entropy 1.15803 (1.16113)	Top-1 acc 55.859 (59.427)	Top-5 acc 79.688 (80.673)	lr 0.01549
Train [51][2520/3239]	Time 0.236 (0.594)	Data Time 0.001 (0.015)	Loss 2.7093 (2.6833)	Entropy 1.15802 (1.16112)	Top-1 acc 55.469 (59.428)	Top-5 acc 79.297 (80.668)	lr 0.01549
Train [51][2530/3239]	Time 0.252 (0.593)	Data Time 0.001 (0.015)	Loss 2.7958 (2.6834)	Entropy 1.15801 (1.16111)	Top-1 acc 60.938 (59.427)	Top-5 acc 78.906 (80.666)	lr 0.01549
Train [51][2540/3239]	Time 2.662 (0.593)	Data Time 0.001 (0.015)	Loss 2.6443 (2.6834)	Entropy 1.15801 (1.16110)	Top-1 acc 60.547 (59.425)	Top-5 acc 80.469 (80.666)	lr 0.01549
Train [51][2550/3239]	Time 0.223 (0.591)	Data Time 0.002 (0.015)	Loss 2.7392 (2.6833)	Entropy 1.15800 (1.16108)	Top-1 acc 60.938 (59.430)	Top-5 acc 80.078 (80.668)	lr 0.01549
Train [51][2560/3239]	Time 0.229 (0.591)	Data Time 0.001 (0.015)	Loss 2.7262 (2.6833)	Entropy 1.15799 (1.16107)	Top-1 acc 58.594 (59.429)	Top-5 acc 82.031 (80.668)	lr 0.01548
Train [51][2570/3239]	Time 0.212 (0.590)	Data Time 0.001 (0.015)	Loss 2.5221 (2.6833)	Entropy 1.15807 (1.16106)	Top-1 acc 64.062 (59.426)	Top-5 acc 83.984 (80.667)	lr 0.01548
Train [51][2580/3239]	Time 0.224 (0.590)	Data Time 0.001 (0.015)	Loss 2.6623 (2.6835)	Entropy 1.15804 (1.16105)	Top-1 acc 60.547 (59.420)	Top-5 acc 82.422 (80.665)	lr 0.01548
Train [51][2590/3239]	Time 0.242 (0.589)	Data Time 0.005 (0.015)	Loss 2.5823 (2.6836)	Entropy 1.15802 (1.16104)	Top-1 acc 60.156 (59.420)	Top-5 acc 82.422 (80.667)	lr 0.01548
Train [51][2600/3239]	Time 0.212 (0.589)	Data Time 0.001 (0.015)	Loss 2.6972 (2.6835)	Entropy 1.15801 (1.16103)	Top-1 acc 57.422 (59.424)	Top-5 acc 80.859 (80.670)	lr 0.01548
Train [51][2610/3239]	Time 0.230 (0.588)	Data Time 0.001 (0.015)	Loss 2.8776 (2.6837)	Entropy 1.15794 (1.16101)	Top-1 acc 50.391 (59.422)	Top-5 acc 77.344 (80.664)	lr 0.01548
Train [51][2620/3239]	Time 0.230 (0.588)	Data Time 0.001 (0.015)	Loss 2.6038 (2.6837)	Entropy 1.15790 (1.16100)	Top-1 acc 60.547 (59.420)	Top-5 acc 80.859 (80.663)	lr 0.01548
Train [51][2630/3239]	Time 0.231 (0.587)	Data Time 0.001 (0.015)	Loss 2.6135 (2.6837)	Entropy 1.15783 (1.16099)	Top-1 acc 62.109 (59.419)	Top-5 acc 84.375 (80.665)	lr 0.01548
Train [51][2640/3239]	Time 0.268 (0.587)	Data Time 0.002 (0.014)	Loss 2.5666 (2.6835)	Entropy 1.15780 (1.16098)	Top-1 acc 60.156 (59.424)	Top-5 acc 80.469 (80.667)	lr 0.01548
Train [51][2650/3239]	Time 0.213 (0.587)	Data Time 0.001 (0.014)	Loss 2.6797 (2.6836)	Entropy 1.15776 (1.16097)	Top-1 acc 57.422 (59.416)	Top-5 acc 81.641 (80.663)	lr 0.01548
Train [51][2660/3239]	Time 0.226 (0.586)	Data Time 0.001 (0.014)	Loss 2.6504 (2.6838)	Entropy 1.15776 (1.16095)	Top-1 acc 63.672 (59.412)	Top-5 acc 82.031 (80.660)	lr 0.01547
Train [51][2670/3239]	Time 0.215 (0.586)	Data Time 0.001 (0.014)	Loss 2.8539 (2.6840)	Entropy 1.15774 (1.16094)	Top-1 acc 51.562 (59.407)	Top-5 acc 77.344 (80.655)	lr 0.01547
Train [51][2680/3239]	Time 0.387 (0.585)	Data Time 0.001 (0.014)	Loss 2.5390 (2.6841)	Entropy 1.15769 (1.16093)	Top-1 acc 66.016 (59.408)	Top-5 acc 83.594 (80.652)	lr 0.01547
Train [51][2690/3239]	Time 0.249 (0.585)	Data Time 0.001 (0.014)	Loss 2.7362 (2.6842)	Entropy 1.15768 (1.16092)	Top-1 acc 60.547 (59.408)	Top-5 acc 76.953 (80.648)	lr 0.01547
Train [51][2700/3239]	Time 0.228 (0.584)	Data Time 0.002 (0.014)	Loss 2.5673 (2.6842)	Entropy 1.15768 (1.16091)	Top-1 acc 57.812 (59.409)	Top-5 acc 83.203 (80.648)	lr 0.01547
Train [51][2710/3239]	Time 0.248 (0.584)	Data Time 0.001 (0.014)	Loss 2.5890 (2.6841)	Entropy 1.15769 (1.16089)	Top-1 acc 60.156 (59.411)	Top-5 acc 81.641 (80.651)	lr 0.01547
Train [51][2720/3239]	Time 0.213 (0.583)	Data Time 0.001 (0.014)	Loss 2.6032 (2.6841)	Entropy 1.15773 (1.16088)	Top-1 acc 61.328 (59.412)	Top-5 acc 81.250 (80.651)	lr 0.01547
Train [51][2730/3239]	Time 0.380 (0.583)	Data Time 0.001 (0.014)	Loss 2.5137 (2.6840)	Entropy 1.15771 (1.16087)	Top-1 acc 64.062 (59.414)	Top-5 acc 83.984 (80.652)	lr 0.01547
Train [51][2740/3239]	Time 0.246 (0.601)	Data Time 0.003 (0.014)	Loss 2.9116 (2.6842)	Entropy 1.15769 (1.16086)	Top-1 acc 49.609 (59.408)	Top-5 acc 77.734 (80.648)	lr 0.01547
Train [51][2750/3239]	Time 0.243 (0.600)	Data Time 0.002 (0.014)	Loss 2.6589 (2.6842)	Entropy 1.15770 (1.16085)	Top-1 acc 61.328 (59.408)	Top-5 acc 81.250 (80.651)	lr 0.01547
Train [51][2760/3239]	Time 0.179 (0.600)	Data Time 0.002 (0.014)	Loss 2.5403 (2.6842)	Entropy 1.15767 (1.16084)	Top-1 acc 64.062 (59.409)	Top-5 acc 83.594 (80.651)	lr 0.01547
Train [51][2770/3239]	Time 0.234 (0.599)	Data Time 0.001 (0.014)	Loss 2.6914 (2.6842)	Entropy 1.15767 (1.16083)	Top-1 acc 60.156 (59.407)	Top-5 acc 80.859 (80.651)	lr 0.01546
Train [51][2780/3239]	Time 0.347 (0.599)	Data Time 0.001 (0.014)	Loss 2.5112 (2.6843)	Entropy 1.15769 (1.16081)	Top-1 acc 62.500 (59.404)	Top-5 acc 82.031 (80.646)	lr 0.01546
Train [51][2790/3239]	Time 0.218 (0.599)	Data Time 0.002 (0.014)	Loss 2.7591 (2.6844)	Entropy 1.15758 (1.16080)	Top-1 acc 56.250 (59.402)	Top-5 acc 78.906 (80.644)	lr 0.01546
Train [51][2800/3239]	Time 0.283 (0.598)	Data Time 0.004 (0.014)	Loss 2.6284 (2.6844)	Entropy 1.15756 (1.16079)	Top-1 acc 60.547 (59.399)	Top-5 acc 81.641 (80.645)	lr 0.01546
Train [51][2810/3239]	Time 0.224 (0.598)	Data Time 0.001 (0.014)	Loss 2.7537 (2.6844)	Entropy 1.15751 (1.16078)	Top-1 acc 58.984 (59.399)	Top-5 acc 79.688 (80.646)	lr 0.01546
Train [51][2820/3239]	Time 0.233 (0.597)	Data Time 0.001 (0.014)	Loss 2.6283 (2.6844)	Entropy 1.15751 (1.16077)	Top-1 acc 58.984 (59.399)	Top-5 acc 81.250 (80.645)	lr 0.01546
Train [51][2830/3239]	Time 0.214 (0.597)	Data Time 0.001 (0.014)	Loss 2.7093 (2.6842)	Entropy 1.15750 (1.16076)	Top-1 acc 57.812 (59.408)	Top-5 acc 77.344 (80.646)	lr 0.01546
Train [51][2840/3239]	Time 0.227 (0.596)	Data Time 0.001 (0.014)	Loss 2.5085 (2.6843)	Entropy 1.15750 (1.16075)	Top-1 acc 60.547 (59.404)	Top-5 acc 85.156 (80.643)	lr 0.01546
Train [51][2850/3239]	Time 0.269 (0.596)	Data Time 0.001 (0.014)	Loss 2.7188 (2.6842)	Entropy 1.15749 (1.16073)	Top-1 acc 60.547 (59.404)	Top-5 acc 80.859 (80.642)	lr 0.01546
Train [51][2860/3239]	Time 0.230 (0.595)	Data Time 0.001 (0.014)	Loss 2.6548 (2.6844)	Entropy 1.15747 (1.16072)	Top-1 acc 58.594 (59.398)	Top-5 acc 80.078 (80.638)	lr 0.01546
Train [51][2870/3239]	Time 0.236 (0.595)	Data Time 0.001 (0.013)	Loss 2.4973 (2.6845)	Entropy 1.15747 (1.16071)	Top-1 acc 66.406 (59.396)	Top-5 acc 85.156 (80.637)	lr 0.01545
Train [51][2880/3239]	Time 0.352 (0.594)	Data Time 0.001 (0.013)	Loss 2.5621 (2.6844)	Entropy 1.15746 (1.16070)	Top-1 acc 66.016 (59.400)	Top-5 acc 82.422 (80.641)	lr 0.01545
Train [51][2890/3239]	Time 0.215 (0.594)	Data Time 0.002 (0.013)	Loss 2.6087 (2.6843)	Entropy 1.15743 (1.16069)	Top-1 acc 62.109 (59.403)	Top-5 acc 80.469 (80.642)	lr 0.01545
Train [51][2900/3239]	Time 0.269 (0.593)	Data Time 0.002 (0.013)	Loss 2.5480 (2.6843)	Entropy 1.15740 (1.16068)	Top-1 acc 63.281 (59.402)	Top-5 acc 82.031 (80.642)	lr 0.01545
Train [51][2910/3239]	Time 0.215 (0.593)	Data Time 0.001 (0.013)	Loss 2.7081 (2.6844)	Entropy 1.15739 (1.16067)	Top-1 acc 57.422 (59.400)	Top-5 acc 81.250 (80.642)	lr 0.01545
Train [51][2920/3239]	Time 0.238 (0.593)	Data Time 0.001 (0.013)	Loss 2.5122 (2.6843)	Entropy 1.15739 (1.16066)	Top-1 acc 64.844 (59.402)	Top-5 acc 82.812 (80.645)	lr 0.01545
Train [51][2930/3239]	Time 0.375 (0.592)	Data Time 0.001 (0.013)	Loss 2.7895 (2.6845)	Entropy 1.15738 (1.16064)	Top-1 acc 55.859 (59.394)	Top-5 acc 76.953 (80.639)	lr 0.01545
Train [51][2940/3239]	Time 0.262 (0.592)	Data Time 0.001 (0.013)	Loss 2.6426 (2.6846)	Entropy 1.15739 (1.16063)	Top-1 acc 63.281 (59.392)	Top-5 acc 80.859 (80.638)	lr 0.01545
Train [51][2950/3239]	Time 0.215 (0.591)	Data Time 0.001 (0.013)	Loss 2.6883 (2.6848)	Entropy 1.15738 (1.16062)	Top-1 acc 60.938 (59.388)	Top-5 acc 80.859 (80.633)	lr 0.01545
Train [51][2960/3239]	Time 0.244 (0.591)	Data Time 0.001 (0.013)	Loss 2.6930 (2.6848)	Entropy 1.15715 (1.16061)	Top-1 acc 57.422 (59.386)	Top-5 acc 79.297 (80.630)	lr 0.01545
Train [51][2970/3239]	Time 0.245 (0.590)	Data Time 0.001 (0.013)	Loss 2.5715 (2.6847)	Entropy 1.15711 (1.16060)	Top-1 acc 59.375 (59.388)	Top-5 acc 82.812 (80.632)	lr 0.01544
Train [51][2980/3239]	Time 0.214 (0.590)	Data Time 0.001 (0.013)	Loss 2.6266 (2.6846)	Entropy 1.15707 (1.16059)	Top-1 acc 60.938 (59.393)	Top-5 acc 83.594 (80.636)	lr 0.01544
Train [51][2990/3239]	Time 0.246 (0.590)	Data Time 0.001 (0.013)	Loss 2.7497 (2.6845)	Entropy 1.15703 (1.16058)	Top-1 acc 58.594 (59.396)	Top-5 acc 77.734 (80.638)	lr 0.01544
Train [51][3000/3239]	Time 0.219 (0.589)	Data Time 0.001 (0.013)	Loss 2.8033 (2.6845)	Entropy 1.15699 (1.16056)	Top-1 acc 57.422 (59.394)	Top-5 acc 78.125 (80.639)	lr 0.01544
Train [51][3010/3239]	Time 0.252 (0.589)	Data Time 0.002 (0.013)	Loss 2.6657 (2.6846)	Entropy 1.15695 (1.16055)	Top-1 acc 61.328 (59.393)	Top-5 acc 80.859 (80.637)	lr 0.01544
Train [51][3020/3239]	Time 0.223 (0.588)	Data Time 0.001 (0.013)	Loss 2.6282 (2.6846)	Entropy 1.15688 (1.16054)	Top-1 acc 62.109 (59.395)	Top-5 acc 81.641 (80.639)	lr 0.01544
Train [51][3030/3239]	Time 0.261 (0.588)	Data Time 0.001 (0.013)	Loss 2.8538 (2.6845)	Entropy 1.15690 (1.16053)	Top-1 acc 57.031 (59.397)	Top-5 acc 78.125 (80.640)	lr 0.01544
Train [51][3040/3239]	Time 0.253 (0.587)	Data Time 0.001 (0.013)	Loss 2.6445 (2.6846)	Entropy 1.15691 (1.16052)	Top-1 acc 60.547 (59.395)	Top-5 acc 82.031 (80.640)	lr 0.01544
Train [51][3050/3239]	Time 0.212 (0.587)	Data Time 0.001 (0.013)	Loss 2.7739 (2.6848)	Entropy 1.15684 (1.16050)	Top-1 acc 59.375 (59.390)	Top-5 acc 81.250 (80.638)	lr 0.01544
Train [51][3060/3239]	Time 0.236 (0.587)	Data Time 0.001 (0.013)	Loss 2.6853 (2.6846)	Entropy 1.15682 (1.16049)	Top-1 acc 55.859 (59.392)	Top-5 acc 82.031 (80.639)	lr 0.01544
Train [51][3070/3239]	Time 0.418 (0.603)	Data Time 0.003 (0.013)	Loss 2.8234 (2.6849)	Entropy 1.15681 (1.16048)	Top-1 acc 57.812 (59.387)	Top-5 acc 79.688 (80.636)	lr 0.01543
Train [51][3080/3239]	Time 0.268 (0.602)	Data Time 0.002 (0.013)	Loss 2.6611 (2.6849)	Entropy 1.15679 (1.16047)	Top-1 acc 59.375 (59.387)	Top-5 acc 82.031 (80.636)	lr 0.01543
Train [51][3090/3239]	Time 0.213 (0.602)	Data Time 0.001 (0.013)	Loss 2.6599 (2.6850)	Entropy 1.15675 (1.16046)	Top-1 acc 61.328 (59.384)	Top-5 acc 82.031 (80.634)	lr 0.01543
Train [51][3100/3239]	Time 0.304 (0.601)	Data Time 0.001 (0.013)	Loss 2.7304 (2.6852)	Entropy 1.15675 (1.16044)	Top-1 acc 58.984 (59.380)	Top-5 acc 79.297 (80.631)	lr 0.01543
Train [51][3110/3239]	Time 0.235 (0.601)	Data Time 0.001 (0.013)	Loss 2.5333 (2.6850)	Entropy 1.15674 (1.16043)	Top-1 acc 64.453 (59.383)	Top-5 acc 81.641 (80.633)	lr 0.01543
Train [51][3120/3239]	Time 0.344 (0.601)	Data Time 0.001 (0.013)	Loss 2.6234 (2.6852)	Entropy 1.15666 (1.16042)	Top-1 acc 60.938 (59.378)	Top-5 acc 82.422 (80.631)	lr 0.01543
Train [51][3130/3239]	Time 0.231 (0.600)	Data Time 0.001 (0.012)	Loss 2.6498 (2.6851)	Entropy 1.15664 (1.16041)	Top-1 acc 62.500 (59.380)	Top-5 acc 83.594 (80.630)	lr 0.01543
Train [51][3140/3239]	Time 0.234 (0.600)	Data Time 0.001 (0.012)	Loss 2.8373 (2.6851)	Entropy 1.15664 (1.16040)	Top-1 acc 56.641 (59.384)	Top-5 acc 78.516 (80.630)	lr 0.01543
Train [51][3150/3239]	Time 0.267 (0.599)	Data Time 0.001 (0.012)	Loss 2.7625 (2.6851)	Entropy 1.15665 (1.16038)	Top-1 acc 59.766 (59.386)	Top-5 acc 80.078 (80.628)	lr 0.01543
Train [51][3160/3239]	Time 0.238 (0.599)	Data Time 0.002 (0.012)	Loss 2.7320 (2.6854)	Entropy 1.15658 (1.16037)	Top-1 acc 58.594 (59.379)	Top-5 acc 79.688 (80.623)	lr 0.01543
Train [51][3170/3239]	Time 0.290 (0.598)	Data Time 0.001 (0.012)	Loss 2.6601 (2.6857)	Entropy 1.15659 (1.16036)	Top-1 acc 60.547 (59.372)	Top-5 acc 83.203 (80.621)	lr 0.01542
Train [51][3180/3239]	Time 0.206 (0.598)	Data Time 0.000 (0.012)	Loss 2.6353 (2.6856)	Entropy 1.15656 (1.16035)	Top-1 acc 60.938 (59.374)	Top-5 acc 82.812 (80.623)	lr 0.01542
Train [51][3190/3239]	Time 0.196 (0.598)	Data Time 0.000 (0.012)	Loss 2.4352 (2.6857)	Entropy 1.15653 (1.16034)	Top-1 acc 67.969 (59.375)	Top-5 acc 85.547 (80.623)	lr 0.01542
Train [51][3200/3239]	Time 0.217 (0.597)	Data Time 0.000 (0.012)	Loss 2.4731 (2.6857)	Entropy 1.15649 (1.16033)	Top-1 acc 66.406 (59.377)	Top-5 acc 83.594 (80.622)	lr 0.01542
Train [51][3210/3239]	Time 0.217 (0.596)	Data Time 0.000 (0.012)	Loss 2.7601 (2.6858)	Entropy 1.15647 (1.16031)	Top-1 acc 60.938 (59.374)	Top-5 acc 78.906 (80.619)	lr 0.01542
Train [51][3220/3239]	Time 0.289 (0.596)	Data Time 0.000 (0.012)	Loss 2.6793 (2.6857)	Entropy 1.15644 (1.16030)	Top-1 acc 62.891 (59.375)	Top-5 acc 79.297 (80.620)	lr 0.01542
Train [51][3230/3239]	Time 0.151 (0.595)	Data Time 0.000 (0.012)	Loss 2.8440 (2.6857)	Entropy 1.15639 (1.16029)	Top-1 acc 56.250 (59.377)	Top-5 acc 76.172 (80.620)	lr 0.01542
Train [51][3239/3239]	Time 2.169 (0.595)	Data Time 0.000 (0.012)	Loss 2.7861 (2.6856)	Entropy 1.15639 (1.16028)	Top-1 acc 58.025 (59.380)	Top-5 acc 82.716 (80.622)	lr 0.01542
==========Valid [51/120]	loss 1.530	top-1 acc 65.404 (65.404)	top-5 acc 85.576	Train top-1 59.380	top-5 80.622	Entropy 1.15639	Latency-None: 0.000ms	Flops: 548.34M
Train [52][0/3239]	Time 37.827 (37.827)	Data Time 34.564 (34.564)	Loss 2.6497 (2.6497)	Entropy 1.15633 (1.15633)	Top-1 acc 62.109 (62.109)	Top-5 acc 82.031 (82.031)	lr 0.01542
Train [52][10/3239]	Time 2.711 (3.975)	Data Time 0.002 (3.180)	Loss 2.5938 (2.6105)	Entropy 1.15633 (1.15633)	Top-1 acc 58.594 (60.156)	Top-5 acc 83.203 (82.528)	lr 0.01542
Train [52][20/3239]	Time 0.234 (2.197)	Data Time 0.001 (1.666)	Loss 2.6446 (2.6110)	Entropy 1.15623 (1.15629)	Top-1 acc 57.422 (60.733)	Top-5 acc 79.688 (82.124)	lr 0.01542
Train [52][30/3239]	Time 0.232 (1.639)	Data Time 0.001 (1.129)	Loss 2.6971 (2.6157)	Entropy 1.15625 (1.15627)	Top-1 acc 57.422 (60.572)	Top-5 acc 81.250 (82.082)	lr 0.01542
Train [52][40/3239]	Time 0.231 (1.355)	Data Time 0.001 (0.854)	Loss 2.6397 (2.6228)	Entropy 1.15624 (1.15626)	Top-1 acc 59.766 (60.423)	Top-5 acc 83.594 (81.812)	lr 0.01541
Train [52][50/3239]	Time 0.219 (1.180)	Data Time 0.001 (0.687)	Loss 2.9645 (2.6253)	Entropy 1.15619 (1.15625)	Top-1 acc 51.562 (60.302)	Top-5 acc 75.000 (81.733)	lr 0.01541
Train [52][60/3239]	Time 0.213 (1.061)	Data Time 0.001 (0.575)	Loss 2.6092 (2.6278)	Entropy 1.15617 (1.15624)	Top-1 acc 62.500 (60.291)	Top-5 acc 81.641 (81.717)	lr 0.01541
Train [52][70/3239]	Time 0.224 (0.973)	Data Time 0.001 (0.494)	Loss 2.8174 (2.6295)	Entropy 1.15617 (1.15623)	Top-1 acc 54.297 (60.255)	Top-5 acc 77.734 (81.679)	lr 0.01541
Train [52][80/3239]	Time 0.219 (0.911)	Data Time 0.001 (0.433)	Loss 2.7205 (2.6302)	Entropy 1.15614 (1.15622)	Top-1 acc 58.594 (60.204)	Top-5 acc 78.125 (81.694)	lr 0.01541
Train [52][90/3239]	Time 0.237 (0.862)	Data Time 0.001 (0.386)	Loss 2.6294 (2.6283)	Entropy 1.15608 (1.15621)	Top-1 acc 61.719 (60.448)	Top-5 acc 82.031 (81.791)	lr 0.01541
Train [52][100/3239]	Time 0.202 (0.821)	Data Time 0.001 (0.348)	Loss 2.8007 (2.6308)	Entropy 1.15609 (1.15620)	Top-1 acc 54.688 (60.388)	Top-5 acc 78.906 (81.730)	lr 0.01541
Train [52][110/3239]	Time 0.253 (0.789)	Data Time 0.002 (0.317)	Loss 2.5760 (2.6293)	Entropy 1.15609 (1.15619)	Top-1 acc 62.109 (60.459)	Top-5 acc 83.594 (81.750)	lr 0.01541
Train [52][120/3239]	Time 2.388 (0.761)	Data Time 0.001 (0.291)	Loss 2.8400 (2.6355)	Entropy 1.15609 (1.15618)	Top-1 acc 57.031 (60.415)	Top-5 acc 79.297 (81.644)	lr 0.01541
Train [52][130/3239]	Time 0.240 (0.720)	Data Time 0.001 (0.268)	Loss 2.6240 (2.6306)	Entropy 1.15610 (1.15617)	Top-1 acc 60.156 (60.559)	Top-5 acc 80.859 (81.659)	lr 0.01541
Train [52][140/3239]	Time 0.222 (0.703)	Data Time 0.001 (0.249)	Loss 2.5847 (2.6279)	Entropy 1.15611 (1.15617)	Top-1 acc 63.672 (60.694)	Top-5 acc 82.812 (81.668)	lr 0.01540
Train [52][150/3239]	Time 0.221 (0.686)	Data Time 0.001 (0.233)	Loss 2.7171 (2.6265)	Entropy 1.15607 (1.15616)	Top-1 acc 56.641 (60.751)	Top-5 acc 79.688 (81.708)	lr 0.01540
Train [52][160/3239]	Time 0.210 (0.673)	Data Time 0.001 (0.219)	Loss 2.6728 (2.6262)	Entropy 1.15604 (1.15616)	Top-1 acc 59.766 (60.736)	Top-5 acc 78.906 (81.687)	lr 0.01540
Train [52][170/3239]	Time 0.204 (0.660)	Data Time 0.001 (0.206)	Loss 2.6295 (2.6278)	Entropy 1.15601 (1.15615)	Top-1 acc 60.547 (60.682)	Top-5 acc 80.859 (81.622)	lr 0.01540
Train [52][180/3239]	Time 0.257 (0.911)	Data Time 0.002 (0.195)	Loss 2.6500 (2.6281)	Entropy 1.15594 (1.15614)	Top-1 acc 62.500 (60.722)	Top-5 acc 81.641 (81.669)	lr 0.01540
Train [52][190/3239]	Time 0.206 (0.890)	Data Time 0.002 (0.185)	Loss 2.8486 (2.6321)	Entropy 1.15591 (1.15613)	Top-1 acc 53.125 (60.610)	Top-5 acc 76.953 (81.596)	lr 0.01540
Train [52][200/3239]	Time 0.217 (0.870)	Data Time 0.001 (0.176)	Loss 2.6815 (2.6329)	Entropy 1.15591 (1.15612)	Top-1 acc 58.594 (60.570)	Top-5 acc 78.125 (81.592)	lr 0.01540
Train [52][210/3239]	Time 0.224 (0.851)	Data Time 0.001 (0.167)	Loss 2.4657 (2.6337)	Entropy 1.15591 (1.15611)	Top-1 acc 65.234 (60.604)	Top-5 acc 85.938 (81.592)	lr 0.01540
Train [52][220/3239]	Time 0.241 (0.833)	Data Time 0.001 (0.160)	Loss 2.8164 (2.6371)	Entropy 1.15587 (1.15610)	Top-1 acc 55.469 (60.492)	Top-5 acc 78.906 (81.522)	lr 0.01540
Train [52][230/3239]	Time 2.464 (0.817)	Data Time 0.002 (0.153)	Loss 2.7480 (2.6365)	Entropy 1.15587 (1.15609)	Top-1 acc 56.641 (60.489)	Top-5 acc 80.078 (81.556)	lr 0.01540
Train [52][240/3239]	Time 0.334 (0.793)	Data Time 0.001 (0.147)	Loss 2.6486 (2.6393)	Entropy 1.15578 (1.15608)	Top-1 acc 59.375 (60.417)	Top-5 acc 81.641 (81.524)	lr 0.01539
Train [52][250/3239]	Time 0.240 (0.780)	Data Time 0.002 (0.141)	Loss 2.6426 (2.6387)	Entropy 1.15573 (1.15606)	Top-1 acc 60.938 (60.429)	Top-5 acc 80.859 (81.527)	lr 0.01539
Train [52][260/3239]	Time 0.164 (0.768)	Data Time 0.001 (0.136)	Loss 2.7948 (2.6389)	Entropy 1.15572 (1.15605)	Top-1 acc 53.125 (60.399)	Top-5 acc 80.859 (81.518)	lr 0.01539
Train [52][270/3239]	Time 0.222 (0.756)	Data Time 0.001 (0.131)	Loss 2.9319 (2.6400)	Entropy 1.15566 (1.15603)	Top-1 acc 57.422 (60.378)	Top-5 acc 76.562 (81.481)	lr 0.01539
Train [52][280/3239]	Time 0.225 (0.746)	Data Time 0.001 (0.126)	Loss 2.8244 (2.6409)	Entropy 1.15562 (1.15602)	Top-1 acc 55.078 (60.351)	Top-5 acc 78.125 (81.485)	lr 0.01539
Train [52][290/3239]	Time 0.375 (0.737)	Data Time 0.001 (0.122)	Loss 2.6041 (2.6410)	Entropy 1.15564 (1.15601)	Top-1 acc 60.938 (60.355)	Top-5 acc 79.688 (81.469)	lr 0.01539
Train [52][300/3239]	Time 0.232 (0.727)	Data Time 0.001 (0.118)	Loss 2.5615 (2.6416)	Entropy 1.15561 (1.15599)	Top-1 acc 63.672 (60.324)	Top-5 acc 82.422 (81.469)	lr 0.01539
Train [52][310/3239]	Time 0.239 (0.719)	Data Time 0.001 (0.114)	Loss 2.6290 (2.6435)	Entropy 1.15554 (1.15598)	Top-1 acc 62.500 (60.279)	Top-5 acc 82.031 (81.430)	lr 0.01539
Train [52][320/3239]	Time 0.223 (0.711)	Data Time 0.001 (0.111)	Loss 2.7160 (2.6425)	Entropy 1.15549 (1.15597)	Top-1 acc 60.938 (60.295)	Top-5 acc 77.734 (81.445)	lr 0.01539
Train [52][330/3239]	Time 0.231 (0.703)	Data Time 0.002 (0.107)	Loss 2.4800 (2.6440)	Entropy 1.15548 (1.15595)	Top-1 acc 60.547 (60.278)	Top-5 acc 87.891 (81.408)	lr 0.01539
Train [52][340/3239]	Time 2.563 (0.697)	Data Time 0.001 (0.104)	Loss 2.6134 (2.6444)	Entropy 1.15548 (1.15594)	Top-1 acc 63.281 (60.286)	Top-5 acc 81.641 (81.401)	lr 0.01538
Train [52][350/3239]	Time 0.209 (0.683)	Data Time 0.001 (0.101)	Loss 2.7625 (2.6454)	Entropy 1.15548 (1.15593)	Top-1 acc 55.469 (60.263)	Top-5 acc 78.516 (81.362)	lr 0.01538
Train [52][360/3239]	Time 0.227 (0.677)	Data Time 0.001 (0.099)	Loss 2.8397 (2.6465)	Entropy 1.15545 (1.15591)	Top-1 acc 58.984 (60.255)	Top-5 acc 76.562 (81.328)	lr 0.01538
Train [52][370/3239]	Time 0.201 (0.671)	Data Time 0.001 (0.096)	Loss 2.6034 (2.6481)	Entropy 1.15541 (1.15590)	Top-1 acc 60.547 (60.213)	Top-5 acc 82.031 (81.294)	lr 0.01538
Train [52][380/3239]	Time 0.226 (0.666)	Data Time 0.001 (0.093)	Loss 2.5683 (2.6480)	Entropy 1.15529 (1.15588)	Top-1 acc 64.062 (60.221)	Top-5 acc 81.641 (81.279)	lr 0.01538
Train [52][390/3239]	Time 0.313 (0.661)	Data Time 0.001 (0.091)	Loss 2.4545 (2.6457)	Entropy 1.15521 (1.15587)	Top-1 acc 64.062 (60.288)	Top-5 acc 86.328 (81.325)	lr 0.01538
Train [52][400/3239]	Time 0.223 (0.656)	Data Time 0.001 (0.089)	Loss 2.6131 (2.6458)	Entropy 1.15521 (1.15585)	Top-1 acc 62.891 (60.298)	Top-5 acc 80.469 (81.322)	lr 0.01538
Train [52][410/3239]	Time 0.235 (0.651)	Data Time 0.001 (0.087)	Loss 2.7371 (2.6455)	Entropy 1.15515 (1.15584)	Top-1 acc 58.594 (60.284)	Top-5 acc 78.125 (81.316)	lr 0.01538
Train [52][420/3239]	Time 0.236 (0.647)	Data Time 0.001 (0.085)	Loss 2.7886 (2.6446)	Entropy 1.15516 (1.15582)	Top-1 acc 58.203 (60.296)	Top-5 acc 79.688 (81.332)	lr 0.01538
Train [52][430/3239]	Time 0.253 (0.643)	Data Time 0.001 (0.083)	Loss 2.5028 (2.6453)	Entropy 1.15516 (1.15580)	Top-1 acc 64.844 (60.279)	Top-5 acc 81.641 (81.314)	lr 0.01538
Train [52][440/3239]	Time 0.225 (0.639)	Data Time 0.001 (0.081)	Loss 2.7361 (2.6453)	Entropy 1.15516 (1.15579)	Top-1 acc 59.766 (60.302)	Top-5 acc 82.031 (81.328)	lr 0.01537
Train [52][450/3239]	Time 2.456 (0.635)	Data Time 0.001 (0.079)	Loss 2.7733 (2.6464)	Entropy 1.15516 (1.15578)	Top-1 acc 57.422 (60.265)	Top-5 acc 78.125 (81.321)	lr 0.01537
Train [52][460/3239]	Time 0.235 (0.626)	Data Time 0.001 (0.078)	Loss 2.6999 (2.6460)	Entropy 1.15514 (1.15576)	Top-1 acc 60.938 (60.286)	Top-5 acc 81.641 (81.324)	lr 0.01537
Train [52][470/3239]	Time 0.241 (0.623)	Data Time 0.001 (0.076)	Loss 2.8488 (2.6478)	Entropy 1.15512 (1.15575)	Top-1 acc 57.422 (60.228)	Top-5 acc 77.734 (81.296)	lr 0.01537
Train [52][480/3239]	Time 0.241 (0.620)	Data Time 0.001 (0.074)	Loss 2.8249 (2.6488)	Entropy 1.15512 (1.15574)	Top-1 acc 54.297 (60.192)	Top-5 acc 78.906 (81.274)	lr 0.01537
Train [52][490/3239]	Time 0.264 (0.616)	Data Time 0.001 (0.073)	Loss 2.5311 (2.6488)	Entropy 1.15510 (1.15572)	Top-1 acc 62.500 (60.194)	Top-5 acc 82.422 (81.274)	lr 0.01537
Train [52][500/3239]	Time 0.217 (0.613)	Data Time 0.001 (0.071)	Loss 2.7902 (2.6498)	Entropy 1.15506 (1.15571)	Top-1 acc 56.641 (60.179)	Top-5 acc 78.125 (81.252)	lr 0.01537
Train [52][510/3239]	Time 0.239 (0.610)	Data Time 0.002 (0.070)	Loss 3.6436 (2.6522)	Entropy 1.15505 (1.15570)	Top-1 acc 47.266 (60.131)	Top-5 acc 70.312 (81.215)	lr 0.01537
Train [52][520/3239]	Time 0.242 (0.607)	Data Time 0.001 (0.069)	Loss 2.4258 (2.6516)	Entropy 1.15500 (1.15568)	Top-1 acc 65.234 (60.155)	Top-5 acc 86.328 (81.222)	lr 0.01537
Train [52][530/3239]	Time 0.306 (0.605)	Data Time 0.001 (0.068)	Loss 2.6942 (2.6514)	Entropy 1.15495 (1.15567)	Top-1 acc 62.109 (60.160)	Top-5 acc 79.297 (81.234)	lr 0.01537
Train [52][540/3239]	Time 0.276 (0.691)	Data Time 0.002 (0.066)	Loss 2.7786 (2.6522)	Entropy 1.15493 (1.15566)	Top-1 acc 53.125 (60.119)	Top-5 acc 78.906 (81.217)	lr 0.01537
Train [52][550/3239]	Time 0.239 (0.688)	Data Time 0.002 (0.065)	Loss 2.7736 (2.6528)	Entropy 1.15488 (1.15564)	Top-1 acc 61.328 (60.109)	Top-5 acc 77.734 (81.205)	lr 0.01536
Train [52][560/3239]	Time 2.555 (0.684)	Data Time 0.001 (0.064)	Loss 2.5661 (2.6525)	Entropy 1.15488 (1.15563)	Top-1 acc 63.672 (60.100)	Top-5 acc 81.250 (81.205)	lr 0.01536
Train [52][570/3239]	Time 0.220 (0.676)	Data Time 0.001 (0.063)	Loss 2.6453 (2.6546)	Entropy 1.15492 (1.15562)	Top-1 acc 58.203 (60.030)	Top-5 acc 82.031 (81.168)	lr 0.01536
Train [52][580/3239]	Time 0.253 (0.673)	Data Time 0.001 (0.062)	Loss 2.8034 (2.6547)	Entropy 1.15489 (1.15561)	Top-1 acc 58.203 (60.036)	Top-5 acc 77.734 (81.160)	lr 0.01536
Train [52][590/3239]	Time 0.207 (0.669)	Data Time 0.001 (0.061)	Loss 2.7266 (2.6553)	Entropy 1.15487 (1.15559)	Top-1 acc 55.859 (60.011)	Top-5 acc 76.953 (81.144)	lr 0.01536
Train [52][600/3239]	Time 0.210 (0.665)	Data Time 0.001 (0.060)	Loss 2.6877 (2.6559)	Entropy 1.15487 (1.15558)	Top-1 acc 58.203 (60.004)	Top-5 acc 80.469 (81.137)	lr 0.01536
Train [52][610/3239]	Time 0.242 (0.662)	Data Time 0.001 (0.059)	Loss 2.6550 (2.6562)	Entropy 1.15478 (1.15557)	Top-1 acc 57.812 (59.993)	Top-5 acc 81.250 (81.135)	lr 0.01536
Train [52][620/3239]	Time 0.319 (0.659)	Data Time 0.001 (0.058)	Loss 2.7183 (2.6557)	Entropy 1.15481 (1.15556)	Top-1 acc 56.641 (60.004)	Top-5 acc 78.516 (81.139)	lr 0.01536
Train [52][630/3239]	Time 0.225 (0.656)	Data Time 0.001 (0.057)	Loss 2.7752 (2.6548)	Entropy 1.15478 (1.15554)	Top-1 acc 58.594 (60.039)	Top-5 acc 78.906 (81.163)	lr 0.01536
Train [52][640/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.056)	Loss 2.6904 (2.6551)	Entropy 1.15475 (1.15553)	Top-1 acc 56.250 (60.029)	Top-5 acc 80.859 (81.154)	lr 0.01536
Train [52][650/3239]	Time 0.215 (0.650)	Data Time 0.001 (0.055)	Loss 2.5983 (2.6555)	Entropy 1.15472 (1.15552)	Top-1 acc 58.984 (60.016)	Top-5 acc 84.375 (81.155)	lr 0.01535
Train [52][660/3239]	Time 0.235 (0.648)	Data Time 0.001 (0.055)	Loss 2.7449 (2.6557)	Entropy 1.15467 (1.15551)	Top-1 acc 58.203 (59.998)	Top-5 acc 78.125 (81.160)	lr 0.01535
Train [52][670/3239]	Time 2.635 (0.645)	Data Time 0.001 (0.054)	Loss 2.5717 (2.6549)	Entropy 1.15467 (1.15550)	Top-1 acc 60.156 (60.015)	Top-5 acc 84.375 (81.182)	lr 0.01535
Train [52][680/3239]	Time 0.216 (0.639)	Data Time 0.001 (0.053)	Loss 2.6215 (2.6549)	Entropy 1.15468 (1.15548)	Top-1 acc 57.031 (60.015)	Top-5 acc 81.250 (81.168)	lr 0.01535
Train [52][690/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.052)	Loss 2.3970 (2.6546)	Entropy 1.15462 (1.15547)	Top-1 acc 66.797 (60.019)	Top-5 acc 83.594 (81.168)	lr 0.01535
Train [52][700/3239]	Time 0.219 (0.634)	Data Time 0.001 (0.052)	Loss 2.6967 (2.6550)	Entropy 1.15461 (1.15546)	Top-1 acc 59.375 (60.019)	Top-5 acc 79.297 (81.166)	lr 0.01535
Train [52][710/3239]	Time 0.211 (0.631)	Data Time 0.001 (0.051)	Loss 2.6993 (2.6548)	Entropy 1.15453 (1.15545)	Top-1 acc 60.156 (60.039)	Top-5 acc 78.906 (81.165)	lr 0.01535
Train [52][720/3239]	Time 0.306 (0.629)	Data Time 0.001 (0.050)	Loss 2.5892 (2.6553)	Entropy 1.15446 (1.15543)	Top-1 acc 64.062 (60.031)	Top-5 acc 83.594 (81.169)	lr 0.01535
Train [52][730/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.050)	Loss 2.6498 (2.6561)	Entropy 1.15441 (1.15542)	Top-1 acc 60.938 (60.026)	Top-5 acc 79.297 (81.145)	lr 0.01535
Train [52][740/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.049)	Loss 2.7152 (2.6561)	Entropy 1.15438 (1.15541)	Top-1 acc 57.422 (60.028)	Top-5 acc 79.688 (81.147)	lr 0.01535
Train [52][750/3239]	Time 0.212 (0.622)	Data Time 0.001 (0.048)	Loss 2.8429 (2.6559)	Entropy 1.15438 (1.15539)	Top-1 acc 60.156 (60.043)	Top-5 acc 76.562 (81.157)	lr 0.01534
Train [52][760/3239]	Time 0.211 (0.620)	Data Time 0.001 (0.048)	Loss 2.7271 (2.6557)	Entropy 1.15436 (1.15538)	Top-1 acc 60.938 (60.056)	Top-5 acc 79.688 (81.158)	lr 0.01534
Train [52][770/3239]	Time 0.236 (0.618)	Data Time 0.001 (0.047)	Loss 2.9041 (2.6569)	Entropy 1.15436 (1.15537)	Top-1 acc 57.422 (60.020)	Top-5 acc 76.562 (81.128)	lr 0.01534
Train [52][780/3239]	Time 2.417 (0.616)	Data Time 0.002 (0.046)	Loss 2.5501 (2.6570)	Entropy 1.15436 (1.15535)	Top-1 acc 63.672 (60.020)	Top-5 acc 81.641 (81.126)	lr 0.01534
Train [52][790/3239]	Time 0.225 (0.611)	Data Time 0.002 (0.046)	Loss 2.6425 (2.6575)	Entropy 1.15433 (1.15534)	Top-1 acc 60.547 (60.004)	Top-5 acc 81.250 (81.116)	lr 0.01534
Train [52][800/3239]	Time 0.232 (0.609)	Data Time 0.001 (0.045)	Loss 2.4506 (2.6572)	Entropy 1.15433 (1.15533)	Top-1 acc 64.453 (60.014)	Top-5 acc 86.719 (81.119)	lr 0.01534
Train [52][810/3239]	Time 0.210 (0.607)	Data Time 0.001 (0.045)	Loss 2.6666 (2.6573)	Entropy 1.15433 (1.15531)	Top-1 acc 62.109 (60.012)	Top-5 acc 78.906 (81.117)	lr 0.01534
Train [52][820/3239]	Time 0.226 (0.606)	Data Time 0.001 (0.044)	Loss 2.7507 (2.6576)	Entropy 1.15435 (1.15530)	Top-1 acc 55.859 (59.987)	Top-5 acc 79.688 (81.111)	lr 0.01534
Train [52][830/3239]	Time 0.201 (0.604)	Data Time 0.001 (0.044)	Loss 2.6295 (2.6579)	Entropy 1.15434 (1.15529)	Top-1 acc 57.422 (59.987)	Top-5 acc 82.422 (81.100)	lr 0.01534
Train [52][840/3239]	Time 0.240 (0.602)	Data Time 0.001 (0.043)	Loss 2.5825 (2.6577)	Entropy 1.15431 (1.15528)	Top-1 acc 64.062 (59.990)	Top-5 acc 80.469 (81.108)	lr 0.01534
Train [52][850/3239]	Time 0.218 (0.601)	Data Time 0.001 (0.043)	Loss 2.6992 (2.6583)	Entropy 1.15429 (1.15527)	Top-1 acc 61.719 (59.989)	Top-5 acc 79.688 (81.094)	lr 0.01533
Train [52][860/3239]	Time 0.250 (0.599)	Data Time 0.001 (0.042)	Loss 2.6646 (2.6589)	Entropy 1.15429 (1.15526)	Top-1 acc 57.812 (59.966)	Top-5 acc 78.125 (81.078)	lr 0.01533
Train [52][870/3239]	Time 0.224 (0.597)	Data Time 0.002 (0.042)	Loss 2.6832 (2.6588)	Entropy 1.15425 (1.15525)	Top-1 acc 62.500 (59.972)	Top-5 acc 83.594 (81.089)	lr 0.01533
Train [52][880/3239]	Time 0.209 (0.596)	Data Time 0.001 (0.041)	Loss 2.7907 (2.6588)	Entropy 1.15419 (1.15523)	Top-1 acc 58.594 (59.972)	Top-5 acc 77.734 (81.089)	lr 0.01533
Train [52][890/3239]	Time 2.490 (0.594)	Data Time 0.001 (0.041)	Loss 2.8369 (2.6590)	Entropy 1.15419 (1.15522)	Top-1 acc 57.812 (59.975)	Top-5 acc 76.172 (81.082)	lr 0.01533
Train [52][900/3239]	Time 0.232 (0.590)	Data Time 0.001 (0.040)	Loss 2.6488 (2.6583)	Entropy 1.15416 (1.15521)	Top-1 acc 60.547 (60.006)	Top-5 acc 80.859 (81.087)	lr 0.01533
Train [52][910/3239]	Time 0.255 (0.642)	Data Time 0.002 (0.040)	Loss 2.6801 (2.6581)	Entropy 1.15412 (1.15520)	Top-1 acc 57.422 (59.999)	Top-5 acc 82.031 (81.090)	lr 0.01533
Train [52][920/3239]	Time 0.241 (0.640)	Data Time 0.002 (0.040)	Loss 2.7331 (2.6580)	Entropy 1.15412 (1.15519)	Top-1 acc 60.938 (59.996)	Top-5 acc 77.344 (81.087)	lr 0.01533
Train [52][930/3239]	Time 0.245 (0.638)	Data Time 0.002 (0.039)	Loss 2.6841 (2.6586)	Entropy 1.15407 (1.15518)	Top-1 acc 60.547 (59.985)	Top-5 acc 79.688 (81.068)	lr 0.01533
Train [52][940/3239]	Time 0.254 (0.636)	Data Time 0.001 (0.039)	Loss 2.8132 (2.6589)	Entropy 1.15407 (1.15516)	Top-1 acc 54.688 (59.985)	Top-5 acc 78.125 (81.057)	lr 0.01533
Train [52][950/3239]	Time 0.239 (0.634)	Data Time 0.001 (0.038)	Loss 2.6805 (2.6590)	Entropy 1.15406 (1.15515)	Top-1 acc 53.906 (59.988)	Top-5 acc 81.641 (81.054)	lr 0.01532
Train [52][960/3239]	Time 0.372 (0.633)	Data Time 0.002 (0.038)	Loss 2.5277 (2.6593)	Entropy 1.15405 (1.15514)	Top-1 acc 61.328 (59.983)	Top-5 acc 84.766 (81.052)	lr 0.01532
Train [52][970/3239]	Time 0.234 (0.631)	Data Time 0.002 (0.038)	Loss 2.7341 (2.6593)	Entropy 1.15400 (1.15513)	Top-1 acc 60.547 (59.994)	Top-5 acc 82.031 (81.055)	lr 0.01532
Train [52][980/3239]	Time 0.250 (0.629)	Data Time 0.002 (0.037)	Loss 2.8826 (2.6599)	Entropy 1.15391 (1.15512)	Top-1 acc 53.906 (59.987)	Top-5 acc 75.000 (81.040)	lr 0.01532
Train [52][990/3239]	Time 0.253 (0.628)	Data Time 0.001 (0.037)	Loss 2.5329 (2.6599)	Entropy 1.15387 (1.15511)	Top-1 acc 63.281 (59.980)	Top-5 acc 83.984 (81.035)	lr 0.01532
Train [52][1000/3239]	Time 2.492 (0.626)	Data Time 0.002 (0.037)	Loss 2.6365 (2.6596)	Entropy 1.15387 (1.15509)	Top-1 acc 58.984 (59.981)	Top-5 acc 82.812 (81.044)	lr 0.01532
Train [52][1010/3239]	Time 0.382 (0.623)	Data Time 0.001 (0.036)	Loss 2.6468 (2.6597)	Entropy 1.15384 (1.15508)	Top-1 acc 58.594 (59.976)	Top-5 acc 82.031 (81.041)	lr 0.01532
Train [52][1020/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.036)	Loss 2.6952 (2.6594)	Entropy 1.15381 (1.15507)	Top-1 acc 60.938 (59.985)	Top-5 acc 79.688 (81.044)	lr 0.01532
Train [52][1030/3239]	Time 0.235 (0.620)	Data Time 0.002 (0.036)	Loss 2.7948 (2.6596)	Entropy 1.15379 (1.15506)	Top-1 acc 50.781 (59.977)	Top-5 acc 79.688 (81.051)	lr 0.01532
Train [52][1040/3239]	Time 0.209 (0.618)	Data Time 0.001 (0.035)	Loss 2.7596 (2.6600)	Entropy 1.15377 (1.15504)	Top-1 acc 59.375 (59.971)	Top-5 acc 77.734 (81.043)	lr 0.01532
Train [52][1050/3239]	Time 0.220 (0.617)	Data Time 0.001 (0.035)	Loss 2.7448 (2.6600)	Entropy 1.15368 (1.15503)	Top-1 acc 55.859 (59.965)	Top-5 acc 76.953 (81.034)	lr 0.01531
Train [52][1060/3239]	Time 0.308 (0.616)	Data Time 0.001 (0.035)	Loss 2.5184 (2.6600)	Entropy 1.15368 (1.15502)	Top-1 acc 63.281 (59.964)	Top-5 acc 82.812 (81.036)	lr 0.01531
Train [52][1070/3239]	Time 0.232 (0.614)	Data Time 0.001 (0.034)	Loss 2.5549 (2.6599)	Entropy 1.15369 (1.15501)	Top-1 acc 66.016 (59.968)	Top-5 acc 83.203 (81.039)	lr 0.01531
Train [52][1080/3239]	Time 0.232 (0.613)	Data Time 0.001 (0.034)	Loss 2.6174 (2.6601)	Entropy 1.15365 (1.15499)	Top-1 acc 57.812 (59.952)	Top-5 acc 79.688 (81.041)	lr 0.01531
Train [52][1090/3239]	Time 0.212 (0.611)	Data Time 0.001 (0.034)	Loss 2.7483 (2.6606)	Entropy 1.15364 (1.15498)	Top-1 acc 57.422 (59.940)	Top-5 acc 79.297 (81.029)	lr 0.01531
Train [52][1100/3239]	Time 0.287 (0.610)	Data Time 0.002 (0.033)	Loss 2.6679 (2.6605)	Entropy 1.15360 (1.15497)	Top-1 acc 60.547 (59.939)	Top-5 acc 81.641 (81.033)	lr 0.01531
Train [52][1110/3239]	Time 2.537 (0.609)	Data Time 0.001 (0.033)	Loss 2.5438 (2.6605)	Entropy 1.15360 (1.15496)	Top-1 acc 65.234 (59.947)	Top-5 acc 83.594 (81.030)	lr 0.01531
Train [52][1120/3239]	Time 0.243 (0.606)	Data Time 0.002 (0.033)	Loss 2.6249 (2.6605)	Entropy 1.15357 (1.15494)	Top-1 acc 56.250 (59.944)	Top-5 acc 78.516 (81.022)	lr 0.01531
Train [52][1130/3239]	Time 0.233 (0.604)	Data Time 0.001 (0.033)	Loss 2.5303 (2.6613)	Entropy 1.15357 (1.15493)	Top-1 acc 61.719 (59.924)	Top-5 acc 82.812 (81.009)	lr 0.01531
Train [52][1140/3239]	Time 0.193 (0.603)	Data Time 0.001 (0.032)	Loss 2.7138 (2.6617)	Entropy 1.15378 (1.15492)	Top-1 acc 59.375 (59.930)	Top-5 acc 81.641 (80.997)	lr 0.01531
Train [52][1150/3239]	Time 0.223 (0.602)	Data Time 0.001 (0.032)	Loss 2.6047 (2.6623)	Entropy 1.15377 (1.15491)	Top-1 acc 61.719 (59.922)	Top-5 acc 82.031 (80.988)	lr 0.01530
Train [52][1160/3239]	Time 0.317 (0.601)	Data Time 0.001 (0.032)	Loss 2.5317 (2.6616)	Entropy 1.15378 (1.15490)	Top-1 acc 62.109 (59.935)	Top-5 acc 85.156 (81.004)	lr 0.01530
Train [52][1170/3239]	Time 0.238 (0.600)	Data Time 0.001 (0.032)	Loss 2.6657 (2.6618)	Entropy 1.15380 (1.15489)	Top-1 acc 59.375 (59.920)	Top-5 acc 80.859 (81.006)	lr 0.01530
Train [52][1180/3239]	Time 0.222 (0.599)	Data Time 0.001 (0.031)	Loss 2.6229 (2.6615)	Entropy 1.15376 (1.15488)	Top-1 acc 60.156 (59.918)	Top-5 acc 82.031 (81.010)	lr 0.01530
Train [52][1190/3239]	Time 0.220 (0.597)	Data Time 0.001 (0.031)	Loss 2.5464 (2.6615)	Entropy 1.15379 (1.15487)	Top-1 acc 62.891 (59.935)	Top-5 acc 82.812 (81.007)	lr 0.01530
Train [52][1200/3239]	Time 0.256 (0.596)	Data Time 0.001 (0.031)	Loss 2.6681 (2.6618)	Entropy 1.15373 (1.15486)	Top-1 acc 58.203 (59.927)	Top-5 acc 81.250 (80.998)	lr 0.01530
Train [52][1210/3239]	Time 0.211 (0.595)	Data Time 0.001 (0.031)	Loss 2.5487 (2.6621)	Entropy 1.15371 (1.15485)	Top-1 acc 61.328 (59.921)	Top-5 acc 85.156 (80.993)	lr 0.01530
Train [52][1220/3239]	Time 2.426 (0.594)	Data Time 0.001 (0.030)	Loss 2.6171 (2.6622)	Entropy 1.15371 (1.15485)	Top-1 acc 61.719 (59.923)	Top-5 acc 81.250 (80.987)	lr 0.01530
Train [52][1230/3239]	Time 0.231 (0.591)	Data Time 0.001 (0.030)	Loss 2.5988 (2.6623)	Entropy 1.15364 (1.15484)	Top-1 acc 57.422 (59.927)	Top-5 acc 84.766 (80.985)	lr 0.01530
Train [52][1240/3239]	Time 0.236 (0.590)	Data Time 0.001 (0.030)	Loss 2.7078 (2.6622)	Entropy 1.15364 (1.15483)	Top-1 acc 58.203 (59.932)	Top-5 acc 81.641 (80.988)	lr 0.01530
Train [52][1250/3239]	Time 0.314 (0.589)	Data Time 0.001 (0.030)	Loss 2.5848 (2.6624)	Entropy 1.15362 (1.15482)	Top-1 acc 57.031 (59.923)	Top-5 acc 82.812 (80.985)	lr 0.01530
Train [52][1260/3239]	Time 0.229 (0.588)	Data Time 0.001 (0.029)	Loss 2.5751 (2.6626)	Entropy 1.15352 (1.15481)	Top-1 acc 62.500 (59.919)	Top-5 acc 80.859 (80.980)	lr 0.01529
Train [52][1270/3239]	Time 0.228 (0.627)	Data Time 0.002 (0.029)	Loss 2.5365 (2.6625)	Entropy 1.15347 (1.15480)	Top-1 acc 62.500 (59.915)	Top-5 acc 84.766 (80.988)	lr 0.01529
Train [52][1280/3239]	Time 0.236 (0.626)	Data Time 0.002 (0.029)	Loss 2.7413 (2.6623)	Entropy 1.15346 (1.15479)	Top-1 acc 56.250 (59.920)	Top-5 acc 81.250 (80.992)	lr 0.01529
Train [52][1290/3239]	Time 0.228 (0.624)	Data Time 0.002 (0.029)	Loss 2.8132 (2.6625)	Entropy 1.15340 (1.15478)	Top-1 acc 55.859 (59.910)	Top-5 acc 78.516 (80.980)	lr 0.01529
Train [52][1300/3239]	Time 0.310 (0.623)	Data Time 0.001 (0.029)	Loss 2.6104 (2.6627)	Entropy 1.15339 (1.15476)	Top-1 acc 63.281 (59.903)	Top-5 acc 81.641 (80.975)	lr 0.01529
Train [52][1310/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.028)	Loss 2.5156 (2.6626)	Entropy 1.15338 (1.15475)	Top-1 acc 64.844 (59.902)	Top-5 acc 83.594 (80.977)	lr 0.01529
Train [52][1320/3239]	Time 0.229 (0.621)	Data Time 0.002 (0.028)	Loss 2.7869 (2.6631)	Entropy 1.15336 (1.15474)	Top-1 acc 56.641 (59.887)	Top-5 acc 77.344 (80.965)	lr 0.01529
Train [52][1330/3239]	Time 2.446 (0.620)	Data Time 0.002 (0.028)	Loss 2.5257 (2.6627)	Entropy 1.15336 (1.15473)	Top-1 acc 64.062 (59.897)	Top-5 acc 84.375 (80.977)	lr 0.01529
Train [52][1340/3239]	Time 0.215 (0.617)	Data Time 0.001 (0.028)	Loss 2.9885 (2.6635)	Entropy 1.15332 (1.15472)	Top-1 acc 53.906 (59.874)	Top-5 acc 72.266 (80.958)	lr 0.01529
Train [52][1350/3239]	Time 0.239 (0.615)	Data Time 0.002 (0.028)	Loss 2.5529 (2.6633)	Entropy 1.15331 (1.15471)	Top-1 acc 58.203 (59.878)	Top-5 acc 85.156 (80.963)	lr 0.01529
Train [52][1360/3239]	Time 0.240 (0.614)	Data Time 0.001 (0.027)	Loss 2.7754 (2.6641)	Entropy 1.15330 (1.15470)	Top-1 acc 55.469 (59.854)	Top-5 acc 82.031 (80.957)	lr 0.01528
Train [52][1370/3239]	Time 0.210 (0.613)	Data Time 0.001 (0.027)	Loss 2.9972 (2.6646)	Entropy 1.15323 (1.15469)	Top-1 acc 48.047 (59.843)	Top-5 acc 75.781 (80.945)	lr 0.01528
Train [52][1380/3239]	Time 0.231 (0.612)	Data Time 0.002 (0.027)	Loss 2.4798 (2.6643)	Entropy 1.15322 (1.15468)	Top-1 acc 62.891 (59.843)	Top-5 acc 84.375 (80.950)	lr 0.01528
Train [52][1390/3239]	Time 0.208 (0.611)	Data Time 0.002 (0.027)	Loss 2.6862 (2.6643)	Entropy 1.15318 (1.15467)	Top-1 acc 57.422 (59.845)	Top-5 acc 80.469 (80.950)	lr 0.01528
Train [52][1400/3239]	Time 0.241 (0.610)	Data Time 0.001 (0.027)	Loss 2.5136 (2.6640)	Entropy 1.15314 (1.15466)	Top-1 acc 64.062 (59.845)	Top-5 acc 83.594 (80.959)	lr 0.01528
Train [52][1410/3239]	Time 0.248 (0.609)	Data Time 0.002 (0.026)	Loss 2.5979 (2.6636)	Entropy 1.15311 (1.15465)	Top-1 acc 60.547 (59.861)	Top-5 acc 80.859 (80.971)	lr 0.01528
Train [52][1420/3239]	Time 0.233 (0.608)	Data Time 0.001 (0.026)	Loss 2.6175 (2.6632)	Entropy 1.15308 (1.15464)	Top-1 acc 58.984 (59.873)	Top-5 acc 80.859 (80.982)	lr 0.01528
Train [52][1430/3239]	Time 0.287 (0.607)	Data Time 0.001 (0.026)	Loss 2.6361 (2.6635)	Entropy 1.15309 (1.15463)	Top-1 acc 62.891 (59.864)	Top-5 acc 80.469 (80.978)	lr 0.01528
Train [52][1440/3239]	Time 2.525 (0.606)	Data Time 0.002 (0.026)	Loss 2.7608 (2.6639)	Entropy 1.15309 (1.15462)	Top-1 acc 55.859 (59.852)	Top-5 acc 79.297 (80.971)	lr 0.01528
Train [52][1450/3239]	Time 0.245 (0.604)	Data Time 0.001 (0.026)	Loss 2.4588 (2.6636)	Entropy 1.15309 (1.15461)	Top-1 acc 64.062 (59.855)	Top-5 acc 85.938 (80.976)	lr 0.01528
Train [52][1460/3239]	Time 0.234 (0.603)	Data Time 0.001 (0.026)	Loss 2.6644 (2.6635)	Entropy 1.15306 (1.15459)	Top-1 acc 58.203 (59.854)	Top-5 acc 81.250 (80.982)	lr 0.01527
Train [52][1470/3239]	Time 0.219 (0.602)	Data Time 0.001 (0.025)	Loss 2.5572 (2.6635)	Entropy 1.15297 (1.15458)	Top-1 acc 64.453 (59.858)	Top-5 acc 82.422 (80.980)	lr 0.01527
Train [52][1480/3239]	Time 0.222 (0.601)	Data Time 0.001 (0.025)	Loss 2.7188 (2.6632)	Entropy 1.15297 (1.15457)	Top-1 acc 55.078 (59.861)	Top-5 acc 81.250 (80.991)	lr 0.01527
Train [52][1490/3239]	Time 0.307 (0.600)	Data Time 0.001 (0.025)	Loss 2.7883 (2.6635)	Entropy 1.15299 (1.15456)	Top-1 acc 56.250 (59.854)	Top-5 acc 75.391 (80.983)	lr 0.01527
Train [52][1500/3239]	Time 0.221 (0.599)	Data Time 0.001 (0.025)	Loss 2.5249 (2.6632)	Entropy 1.15300 (1.15455)	Top-1 acc 65.625 (59.861)	Top-5 acc 83.203 (80.986)	lr 0.01527
Train [52][1510/3239]	Time 0.233 (0.598)	Data Time 0.001 (0.025)	Loss 2.6400 (2.6635)	Entropy 1.15296 (1.15454)	Top-1 acc 61.328 (59.855)	Top-5 acc 80.078 (80.981)	lr 0.01527
Train [52][1520/3239]	Time 0.213 (0.597)	Data Time 0.001 (0.025)	Loss 2.7037 (2.6637)	Entropy 1.15289 (1.15453)	Top-1 acc 60.156 (59.856)	Top-5 acc 80.078 (80.977)	lr 0.01527
Train [52][1530/3239]	Time 0.200 (0.596)	Data Time 0.001 (0.025)	Loss 2.4749 (2.6633)	Entropy 1.15286 (1.15452)	Top-1 acc 63.672 (59.867)	Top-5 acc 82.031 (80.983)	lr 0.01527
Train [52][1540/3239]	Time 0.327 (0.595)	Data Time 0.001 (0.024)	Loss 2.7604 (2.6632)	Entropy 1.15284 (1.15451)	Top-1 acc 56.641 (59.871)	Top-5 acc 81.641 (80.986)	lr 0.01527
Train [52][1550/3239]	Time 2.423 (0.594)	Data Time 0.001 (0.024)	Loss 2.6302 (2.6632)	Entropy 1.15284 (1.15450)	Top-1 acc 57.422 (59.880)	Top-5 acc 83.203 (80.984)	lr 0.01527
Train [52][1560/3239]	Time 0.262 (0.592)	Data Time 0.001 (0.024)	Loss 2.5847 (2.6629)	Entropy 1.15285 (1.15449)	Top-1 acc 62.109 (59.885)	Top-5 acc 81.250 (80.990)	lr 0.01526
Train [52][1570/3239]	Time 0.245 (0.591)	Data Time 0.002 (0.024)	Loss 2.6104 (2.6631)	Entropy 1.15286 (1.15448)	Top-1 acc 65.625 (59.883)	Top-5 acc 80.859 (80.982)	lr 0.01526
Train [52][1580/3239]	Time 0.252 (0.590)	Data Time 0.001 (0.024)	Loss 2.7407 (2.6633)	Entropy 1.15283 (1.15447)	Top-1 acc 59.375 (59.877)	Top-5 acc 79.297 (80.977)	lr 0.01526
Train [52][1590/3239]	Time 0.325 (0.590)	Data Time 0.001 (0.024)	Loss 2.7525 (2.6634)	Entropy 1.15281 (1.15446)	Top-1 acc 58.984 (59.875)	Top-5 acc 78.516 (80.976)	lr 0.01526
Train [52][1600/3239]	Time 0.251 (0.589)	Data Time 0.001 (0.024)	Loss 2.6641 (2.6636)	Entropy 1.15279 (1.15445)	Top-1 acc 59.375 (59.869)	Top-5 acc 83.203 (80.979)	lr 0.01526
Train [52][1610/3239]	Time 0.236 (0.588)	Data Time 0.001 (0.023)	Loss 2.6528 (2.6637)	Entropy 1.15277 (1.15444)	Top-1 acc 60.547 (59.861)	Top-5 acc 80.859 (80.979)	lr 0.01526
Train [52][1620/3239]	Time 0.204 (0.587)	Data Time 0.001 (0.023)	Loss 2.6428 (2.6638)	Entropy 1.15277 (1.15443)	Top-1 acc 57.031 (59.858)	Top-5 acc 83.203 (80.982)	lr 0.01526
Train [52][1630/3239]	Time 0.537 (0.616)	Data Time 0.003 (0.023)	Loss 2.7132 (2.6637)	Entropy 1.15275 (1.15442)	Top-1 acc 57.422 (59.862)	Top-5 acc 79.297 (80.985)	lr 0.01526
Train [52][1640/3239]	Time 0.228 (0.615)	Data Time 0.002 (0.023)	Loss 2.5751 (2.6634)	Entropy 1.15275 (1.15441)	Top-1 acc 62.891 (59.863)	Top-5 acc 81.641 (80.983)	lr 0.01526
Train [52][1650/3239]	Time 0.258 (0.614)	Data Time 0.002 (0.023)	Loss 2.7547 (2.6635)	Entropy 1.15276 (1.15440)	Top-1 acc 57.422 (59.856)	Top-5 acc 79.688 (80.986)	lr 0.01526
Train [52][1660/3239]	Time 2.488 (0.613)	Data Time 0.002 (0.023)	Loss 2.5869 (2.6633)	Entropy 1.15276 (1.15439)	Top-1 acc 60.156 (59.862)	Top-5 acc 82.422 (80.989)	lr 0.01525
Train [52][1670/3239]	Time 0.233 (0.611)	Data Time 0.001 (0.023)	Loss 2.6932 (2.6633)	Entropy 1.15274 (1.15438)	Top-1 acc 58.984 (59.865)	Top-5 acc 80.469 (80.989)	lr 0.01525
Train [52][1680/3239]	Time 0.232 (0.610)	Data Time 0.001 (0.022)	Loss 2.7001 (2.6631)	Entropy 1.15273 (1.15437)	Top-1 acc 57.812 (59.872)	Top-5 acc 78.125 (80.992)	lr 0.01525
Train [52][1690/3239]	Time 0.226 (0.609)	Data Time 0.001 (0.022)	Loss 2.7705 (2.6635)	Entropy 1.15266 (1.15436)	Top-1 acc 57.422 (59.864)	Top-5 acc 78.516 (80.982)	lr 0.01525
Train [52][1700/3239]	Time 0.225 (0.608)	Data Time 0.001 (0.022)	Loss 2.6514 (2.6639)	Entropy 1.15265 (1.15435)	Top-1 acc 62.891 (59.856)	Top-5 acc 76.953 (80.970)	lr 0.01525
Train [52][1710/3239]	Time 0.235 (0.608)	Data Time 0.001 (0.022)	Loss 2.8166 (2.6640)	Entropy 1.15262 (1.15434)	Top-1 acc 57.422 (59.855)	Top-5 acc 76.562 (80.967)	lr 0.01525
Train [52][1720/3239]	Time 0.232 (0.607)	Data Time 0.001 (0.022)	Loss 2.6170 (2.6642)	Entropy 1.15258 (1.15433)	Top-1 acc 59.375 (59.854)	Top-5 acc 82.031 (80.966)	lr 0.01525
Train [52][1730/3239]	Time 0.338 (0.606)	Data Time 0.002 (0.022)	Loss 2.6782 (2.6640)	Entropy 1.15255 (1.15432)	Top-1 acc 65.234 (59.859)	Top-5 acc 82.422 (80.965)	lr 0.01525
Train [52][1740/3239]	Time 0.216 (0.605)	Data Time 0.001 (0.022)	Loss 2.7460 (2.6641)	Entropy 1.15253 (1.15431)	Top-1 acc 59.375 (59.856)	Top-5 acc 80.859 (80.962)	lr 0.01525
Train [52][1750/3239]	Time 0.217 (0.604)	Data Time 0.001 (0.022)	Loss 2.7615 (2.6644)	Entropy 1.15251 (1.15430)	Top-1 acc 55.859 (59.848)	Top-5 acc 78.125 (80.954)	lr 0.01525
Train [52][1760/3239]	Time 0.258 (0.603)	Data Time 0.001 (0.022)	Loss 2.6638 (2.6645)	Entropy 1.15250 (1.15429)	Top-1 acc 60.156 (59.856)	Top-5 acc 81.641 (80.952)	lr 0.01524
Train [52][1770/3239]	Time 2.391 (0.603)	Data Time 0.001 (0.021)	Loss 2.5305 (2.6641)	Entropy 1.15250 (1.15428)	Top-1 acc 63.281 (59.862)	Top-5 acc 83.984 (80.960)	lr 0.01524
Train [52][1780/3239]	Time 0.316 (0.600)	Data Time 0.001 (0.021)	Loss 2.5787 (2.6639)	Entropy 1.15245 (1.15427)	Top-1 acc 58.984 (59.867)	Top-5 acc 84.766 (80.962)	lr 0.01524
Train [52][1790/3239]	Time 0.220 (0.600)	Data Time 0.001 (0.021)	Loss 2.6642 (2.6643)	Entropy 1.15242 (1.15425)	Top-1 acc 60.938 (59.857)	Top-5 acc 81.250 (80.953)	lr 0.01524
Train [52][1800/3239]	Time 0.224 (0.599)	Data Time 0.001 (0.021)	Loss 2.8001 (2.6642)	Entropy 1.15240 (1.15424)	Top-1 acc 57.031 (59.858)	Top-5 acc 77.734 (80.955)	lr 0.01524
Train [52][1810/3239]	Time 0.254 (0.598)	Data Time 0.001 (0.021)	Loss 2.8145 (2.6640)	Entropy 1.15238 (1.15423)	Top-1 acc 56.250 (59.861)	Top-5 acc 77.734 (80.953)	lr 0.01524
Train [52][1820/3239]	Time 0.230 (0.597)	Data Time 0.002 (0.021)	Loss 2.7001 (2.6640)	Entropy 1.15238 (1.15422)	Top-1 acc 58.203 (59.862)	Top-5 acc 80.469 (80.958)	lr 0.01524
Train [52][1830/3239]	Time 0.318 (0.597)	Data Time 0.001 (0.021)	Loss 2.5868 (2.6639)	Entropy 1.15233 (1.15421)	Top-1 acc 60.156 (59.865)	Top-5 acc 83.594 (80.963)	lr 0.01524
Train [52][1840/3239]	Time 0.238 (0.596)	Data Time 0.001 (0.021)	Loss 2.5753 (2.6637)	Entropy 1.15221 (1.15420)	Top-1 acc 58.594 (59.865)	Top-5 acc 83.203 (80.968)	lr 0.01524
Train [52][1850/3239]	Time 0.212 (0.596)	Data Time 0.002 (0.021)	Loss 2.6427 (2.6638)	Entropy 1.15219 (1.15419)	Top-1 acc 63.281 (59.864)	Top-5 acc 80.859 (80.965)	lr 0.01524
Train [52][1860/3239]	Time 0.204 (0.595)	Data Time 0.001 (0.020)	Loss 2.5388 (2.6641)	Entropy 1.15214 (1.15418)	Top-1 acc 59.375 (59.861)	Top-5 acc 84.766 (80.962)	lr 0.01524
Train [52][1870/3239]	Time 0.196 (0.594)	Data Time 0.001 (0.020)	Loss 2.8120 (2.6643)	Entropy 1.15214 (1.15417)	Top-1 acc 58.984 (59.857)	Top-5 acc 77.344 (80.956)	lr 0.01523
Train [52][1880/3239]	Time 2.441 (0.594)	Data Time 0.001 (0.020)	Loss 2.3866 (2.6643)	Entropy 1.15214 (1.15416)	Top-1 acc 63.281 (59.857)	Top-5 acc 86.328 (80.956)	lr 0.01523
Train [52][1890/3239]	Time 0.214 (0.592)	Data Time 0.001 (0.020)	Loss 2.6184 (2.6646)	Entropy 1.15202 (1.15415)	Top-1 acc 59.375 (59.852)	Top-5 acc 80.859 (80.949)	lr 0.01523
Train [52][1900/3239]	Time 0.220 (0.591)	Data Time 0.001 (0.020)	Loss 2.5350 (2.6646)	Entropy 1.15205 (1.15414)	Top-1 acc 61.719 (59.848)	Top-5 acc 84.766 (80.951)	lr 0.01523
Train [52][1910/3239]	Time 0.227 (0.590)	Data Time 0.001 (0.020)	Loss 2.6721 (2.6643)	Entropy 1.15200 (1.15413)	Top-1 acc 60.938 (59.855)	Top-5 acc 80.078 (80.960)	lr 0.01523
Train [52][1920/3239]	Time 0.227 (0.590)	Data Time 0.001 (0.020)	Loss 2.7732 (2.6644)	Entropy 1.15199 (1.15412)	Top-1 acc 57.422 (59.854)	Top-5 acc 79.297 (80.961)	lr 0.01523
Train [52][1930/3239]	Time 0.291 (0.589)	Data Time 0.001 (0.020)	Loss 2.7851 (2.6645)	Entropy 1.15196 (1.15410)	Top-1 acc 53.906 (59.848)	Top-5 acc 78.125 (80.957)	lr 0.01523
Train [52][1940/3239]	Time 0.230 (0.589)	Data Time 0.001 (0.020)	Loss 2.7112 (2.6647)	Entropy 1.15195 (1.15409)	Top-1 acc 57.812 (59.842)	Top-5 acc 81.250 (80.953)	lr 0.01523
Train [52][1950/3239]	Time 0.219 (0.588)	Data Time 0.001 (0.020)	Loss 2.6329 (2.6649)	Entropy 1.15196 (1.15408)	Top-1 acc 59.766 (59.838)	Top-5 acc 82.031 (80.950)	lr 0.01523
Train [52][1960/3239]	Time 0.228 (0.587)	Data Time 0.001 (0.019)	Loss 2.8401 (2.6650)	Entropy 1.15197 (1.15407)	Top-1 acc 52.344 (59.836)	Top-5 acc 77.344 (80.948)	lr 0.01523
Train [52][1970/3239]	Time 0.335 (0.587)	Data Time 0.001 (0.019)	Loss 2.5744 (2.6647)	Entropy 1.15190 (1.15406)	Top-1 acc 62.109 (59.839)	Top-5 acc 85.938 (80.956)	lr 0.01522
Train [52][1980/3239]	Time 0.229 (0.586)	Data Time 0.001 (0.019)	Loss 2.6227 (2.6648)	Entropy 1.15187 (1.15405)	Top-1 acc 61.328 (59.836)	Top-5 acc 80.078 (80.958)	lr 0.01522
Train [52][1990/3239]	Time 52.194 (0.611)	Data Time 0.001 (0.019)	Loss 2.6985 (2.6651)	Entropy 1.15187 (1.15404)	Top-1 acc 62.109 (59.835)	Top-5 acc 81.250 (80.950)	lr 0.01522
Train [52][2000/3239]	Time 0.238 (0.609)	Data Time 0.002 (0.019)	Loss 2.7122 (2.6652)	Entropy 1.15184 (1.15403)	Top-1 acc 54.297 (59.830)	Top-5 acc 81.250 (80.948)	lr 0.01522
Train [52][2010/3239]	Time 0.225 (0.608)	Data Time 0.002 (0.019)	Loss 2.6448 (2.6650)	Entropy 1.15181 (1.15402)	Top-1 acc 58.984 (59.835)	Top-5 acc 82.031 (80.952)	lr 0.01522
Train [52][2020/3239]	Time 0.224 (0.607)	Data Time 0.002 (0.019)	Loss 2.5884 (2.6649)	Entropy 1.15180 (1.15401)	Top-1 acc 59.766 (59.839)	Top-5 acc 85.547 (80.956)	lr 0.01522
Train [52][2030/3239]	Time 0.240 (0.607)	Data Time 0.001 (0.019)	Loss 2.7955 (2.6651)	Entropy 1.15179 (1.15399)	Top-1 acc 56.641 (59.836)	Top-5 acc 76.562 (80.955)	lr 0.01522
Train [52][2040/3239]	Time 0.225 (0.606)	Data Time 0.001 (0.019)	Loss 2.7758 (2.6649)	Entropy 1.15179 (1.15398)	Top-1 acc 58.594 (59.841)	Top-5 acc 79.688 (80.957)	lr 0.01522
Train [52][2050/3239]	Time 0.216 (0.605)	Data Time 0.002 (0.019)	Loss 2.8524 (2.6647)	Entropy 1.15175 (1.15397)	Top-1 acc 53.906 (59.848)	Top-5 acc 75.391 (80.958)	lr 0.01522
Train [52][2060/3239]	Time 0.225 (0.605)	Data Time 0.002 (0.019)	Loss 2.4198 (2.6646)	Entropy 1.15174 (1.15396)	Top-1 acc 66.797 (59.851)	Top-5 acc 85.156 (80.959)	lr 0.01522
Train [52][2070/3239]	Time 0.238 (0.604)	Data Time 0.001 (0.019)	Loss 2.6592 (2.6648)	Entropy 1.15174 (1.15395)	Top-1 acc 58.984 (59.850)	Top-5 acc 80.859 (80.956)	lr 0.01521
Train [52][2080/3239]	Time 0.243 (0.604)	Data Time 0.001 (0.018)	Loss 2.5619 (2.6649)	Entropy 1.15174 (1.15394)	Top-1 acc 65.625 (59.851)	Top-5 acc 84.766 (80.954)	lr 0.01521
Train [52][2090/3239]	Time 0.227 (0.603)	Data Time 0.001 (0.018)	Loss 2.5265 (2.6649)	Entropy 1.15168 (1.15393)	Top-1 acc 63.672 (59.850)	Top-5 acc 83.984 (80.954)	lr 0.01521
Train [52][2100/3239]	Time 2.461 (0.602)	Data Time 0.001 (0.018)	Loss 2.7170 (2.6650)	Entropy 1.15168 (1.15392)	Top-1 acc 61.719 (59.849)	Top-5 acc 79.688 (80.949)	lr 0.01521
Train [52][2110/3239]	Time 0.228 (0.601)	Data Time 0.001 (0.018)	Loss 2.5556 (2.6649)	Entropy 1.15164 (1.15391)	Top-1 acc 64.062 (59.855)	Top-5 acc 80.859 (80.948)	lr 0.01521
Train [52][2120/3239]	Time 0.323 (0.600)	Data Time 0.002 (0.018)	Loss 2.6072 (2.6650)	Entropy 1.15161 (1.15390)	Top-1 acc 61.719 (59.850)	Top-5 acc 80.859 (80.945)	lr 0.01521
Train [52][2130/3239]	Time 0.242 (0.599)	Data Time 0.002 (0.018)	Loss 2.6132 (2.6652)	Entropy 1.15162 (1.15389)	Top-1 acc 59.766 (59.845)	Top-5 acc 83.203 (80.940)	lr 0.01521
Train [52][2140/3239]	Time 0.211 (0.599)	Data Time 0.002 (0.018)	Loss 2.9191 (2.6653)	Entropy 1.15161 (1.15388)	Top-1 acc 51.953 (59.844)	Top-5 acc 76.172 (80.939)	lr 0.01521
Train [52][2150/3239]	Time 0.217 (0.598)	Data Time 0.001 (0.018)	Loss 2.5256 (2.6653)	Entropy 1.15162 (1.15387)	Top-1 acc 64.453 (59.846)	Top-5 acc 83.984 (80.938)	lr 0.01521
Train [52][2160/3239]	Time 0.213 (0.597)	Data Time 0.001 (0.018)	Loss 2.6428 (2.6653)	Entropy 1.15159 (1.15386)	Top-1 acc 62.891 (59.849)	Top-5 acc 81.641 (80.935)	lr 0.01521
Train [52][2170/3239]	Time 0.210 (0.597)	Data Time 0.001 (0.018)	Loss 2.6238 (2.6654)	Entropy 1.15157 (1.15385)	Top-1 acc 61.328 (59.847)	Top-5 acc 83.594 (80.934)	lr 0.01520
Train [52][2180/3239]	Time 0.237 (0.596)	Data Time 0.001 (0.018)	Loss 2.6773 (2.6655)	Entropy 1.15152 (1.15384)	Top-1 acc 61.328 (59.838)	Top-5 acc 76.562 (80.932)	lr 0.01520
Train [52][2190/3239]	Time 0.215 (0.596)	Data Time 0.001 (0.018)	Loss 2.7158 (2.6659)	Entropy 1.15152 (1.15382)	Top-1 acc 56.641 (59.833)	Top-5 acc 79.688 (80.924)	lr 0.01520
Train [52][2200/3239]	Time 0.217 (0.595)	Data Time 0.002 (0.018)	Loss 3.0021 (2.6661)	Entropy 1.15151 (1.15381)	Top-1 acc 49.609 (59.823)	Top-5 acc 75.781 (80.918)	lr 0.01520
Train [52][2210/3239]	Time 2.484 (0.594)	Data Time 0.003 (0.017)	Loss 2.9255 (2.6661)	Entropy 1.15151 (1.15380)	Top-1 acc 53.125 (59.826)	Top-5 acc 74.609 (80.917)	lr 0.01520
Train [52][2220/3239]	Time 0.228 (0.593)	Data Time 0.001 (0.017)	Loss 2.5931 (2.6661)	Entropy 1.15147 (1.15379)	Top-1 acc 65.234 (59.829)	Top-5 acc 82.812 (80.918)	lr 0.01520
Train [52][2230/3239]	Time 0.256 (0.592)	Data Time 0.001 (0.017)	Loss 2.3993 (2.6660)	Entropy 1.15140 (1.15378)	Top-1 acc 66.797 (59.835)	Top-5 acc 88.281 (80.915)	lr 0.01520
Train [52][2240/3239]	Time 0.229 (0.592)	Data Time 0.001 (0.017)	Loss 2.7114 (2.6663)	Entropy 1.15133 (1.15377)	Top-1 acc 58.594 (59.828)	Top-5 acc 79.297 (80.910)	lr 0.01520
Train [52][2250/3239]	Time 0.227 (0.591)	Data Time 0.001 (0.017)	Loss 2.7668 (2.6666)	Entropy 1.15132 (1.15376)	Top-1 acc 57.422 (59.821)	Top-5 acc 79.297 (80.906)	lr 0.01520
Train [52][2260/3239]	Time 0.234 (0.591)	Data Time 0.001 (0.017)	Loss 2.6184 (2.6665)	Entropy 1.15129 (1.15375)	Top-1 acc 64.453 (59.825)	Top-5 acc 80.859 (80.906)	lr 0.01520
Train [52][2270/3239]	Time 0.213 (0.590)	Data Time 0.001 (0.017)	Loss 2.7246 (2.6665)	Entropy 1.15131 (1.15374)	Top-1 acc 59.766 (59.824)	Top-5 acc 78.516 (80.905)	lr 0.01519
Train [52][2280/3239]	Time 0.245 (0.589)	Data Time 0.002 (0.017)	Loss 2.5701 (2.6669)	Entropy 1.15132 (1.15373)	Top-1 acc 61.328 (59.816)	Top-5 acc 78.906 (80.899)	lr 0.01519
Train [52][2290/3239]	Time 0.231 (0.589)	Data Time 0.001 (0.017)	Loss 2.6584 (2.6672)	Entropy 1.15131 (1.15372)	Top-1 acc 61.719 (59.811)	Top-5 acc 82.031 (80.893)	lr 0.01519
Train [52][2300/3239]	Time 0.243 (0.588)	Data Time 0.001 (0.017)	Loss 2.7928 (2.6675)	Entropy 1.15124 (1.15371)	Top-1 acc 59.375 (59.805)	Top-5 acc 78.906 (80.886)	lr 0.01519
Train [52][2310/3239]	Time 0.364 (0.588)	Data Time 0.001 (0.017)	Loss 2.6572 (2.6678)	Entropy 1.15120 (1.15370)	Top-1 acc 60.938 (59.798)	Top-5 acc 80.469 (80.878)	lr 0.01519
Train [52][2320/3239]	Time 2.564 (0.588)	Data Time 0.001 (0.017)	Loss 2.6366 (2.6678)	Entropy 1.15120 (1.15369)	Top-1 acc 62.891 (59.798)	Top-5 acc 80.078 (80.877)	lr 0.01519
Train [52][2330/3239]	Time 0.223 (0.586)	Data Time 0.001 (0.017)	Loss 2.5860 (2.6676)	Entropy 1.15121 (1.15368)	Top-1 acc 62.891 (59.803)	Top-5 acc 82.031 (80.880)	lr 0.01519
Train [52][2340/3239]	Time 0.237 (0.586)	Data Time 0.002 (0.017)	Loss 2.6103 (2.6677)	Entropy 1.15118 (1.15366)	Top-1 acc 60.156 (59.803)	Top-5 acc 82.812 (80.877)	lr 0.01519
Train [52][2350/3239]	Time 0.231 (0.585)	Data Time 0.001 (0.017)	Loss 2.5953 (2.6676)	Entropy 1.15115 (1.15365)	Top-1 acc 60.547 (59.807)	Top-5 acc 83.594 (80.878)	lr 0.01519
Train [52][2360/3239]	Time 0.528 (0.604)	Data Time 0.002 (0.016)	Loss 2.5356 (2.6677)	Entropy 1.15105 (1.15364)	Top-1 acc 58.984 (59.802)	Top-5 acc 81.250 (80.874)	lr 0.01519
Train [52][2370/3239]	Time 0.233 (0.604)	Data Time 0.002 (0.016)	Loss 2.6284 (2.6679)	Entropy 1.15102 (1.15363)	Top-1 acc 62.891 (59.804)	Top-5 acc 81.250 (80.870)	lr 0.01518
Train [52][2380/3239]	Time 0.186 (0.603)	Data Time 0.002 (0.016)	Loss 2.6700 (2.6677)	Entropy 1.15102 (1.15362)	Top-1 acc 60.156 (59.809)	Top-5 acc 83.203 (80.872)	lr 0.01518
Train [52][2390/3239]	Time 0.227 (0.603)	Data Time 0.001 (0.016)	Loss 2.6437 (2.6676)	Entropy 1.15097 (1.15361)	Top-1 acc 62.891 (59.809)	Top-5 acc 80.859 (80.875)	lr 0.01518
Train [52][2400/3239]	Time 0.205 (0.602)	Data Time 0.001 (0.016)	Loss 2.5455 (2.6676)	Entropy 1.15092 (1.15360)	Top-1 acc 62.891 (59.807)	Top-5 acc 82.031 (80.875)	lr 0.01518
Train [52][2410/3239]	Time 0.251 (0.601)	Data Time 0.001 (0.016)	Loss 2.6947 (2.6675)	Entropy 1.15085 (1.15359)	Top-1 acc 60.156 (59.810)	Top-5 acc 79.297 (80.879)	lr 0.01518
Train [52][2420/3239]	Time 0.214 (0.601)	Data Time 0.001 (0.016)	Loss 2.6866 (2.6673)	Entropy 1.15086 (1.15358)	Top-1 acc 55.078 (59.813)	Top-5 acc 80.078 (80.883)	lr 0.01518
Train [52][2430/3239]	Time 2.420 (0.600)	Data Time 0.001 (0.016)	Loss 2.7593 (2.6675)	Entropy 1.15086 (1.15357)	Top-1 acc 59.375 (59.810)	Top-5 acc 78.906 (80.879)	lr 0.01518
Train [52][2440/3239]	Time 0.214 (0.599)	Data Time 0.002 (0.016)	Loss 2.7023 (2.6675)	Entropy 1.15084 (1.15355)	Top-1 acc 58.984 (59.809)	Top-5 acc 80.078 (80.877)	lr 0.01518
Train [52][2450/3239]	Time 0.249 (0.598)	Data Time 0.001 (0.016)	Loss 2.4521 (2.6674)	Entropy 1.15081 (1.15354)	Top-1 acc 67.188 (59.820)	Top-5 acc 85.938 (80.881)	lr 0.01518
Train [52][2460/3239]	Time 0.268 (0.598)	Data Time 0.001 (0.016)	Loss 2.7071 (2.6674)	Entropy 1.15078 (1.15353)	Top-1 acc 62.891 (59.822)	Top-5 acc 79.688 (80.879)	lr 0.01518
Train [52][2470/3239]	Time 0.234 (0.597)	Data Time 0.001 (0.016)	Loss 2.5434 (2.6672)	Entropy 1.15074 (1.15352)	Top-1 acc 63.281 (59.826)	Top-5 acc 82.422 (80.882)	lr 0.01517
Train [52][2480/3239]	Time 0.254 (0.597)	Data Time 0.001 (0.016)	Loss 2.5720 (2.6671)	Entropy 1.15074 (1.15351)	Top-1 acc 60.156 (59.826)	Top-5 acc 83.203 (80.885)	lr 0.01517
Train [52][2490/3239]	Time 0.224 (0.596)	Data Time 0.003 (0.016)	Loss 2.9082 (2.6675)	Entropy 1.15077 (1.15350)	Top-1 acc 55.469 (59.819)	Top-5 acc 77.734 (80.881)	lr 0.01517
Train [52][2500/3239]	Time 0.251 (0.596)	Data Time 0.001 (0.016)	Loss 2.5986 (2.6675)	Entropy 1.15077 (1.15349)	Top-1 acc 62.500 (59.823)	Top-5 acc 80.859 (80.880)	lr 0.01517
Train [52][2510/3239]	Time 0.323 (0.595)	Data Time 0.001 (0.016)	Loss 2.5941 (2.6675)	Entropy 1.15078 (1.15348)	Top-1 acc 60.547 (59.822)	Top-5 acc 82.031 (80.880)	lr 0.01517
Train [52][2520/3239]	Time 0.220 (0.595)	Data Time 0.002 (0.016)	Loss 2.6318 (2.6675)	Entropy 1.15075 (1.15347)	Top-1 acc 59.766 (59.824)	Top-5 acc 82.031 (80.881)	lr 0.01517
Train [52][2530/3239]	Time 0.219 (0.594)	Data Time 0.001 (0.016)	Loss 2.6666 (2.6676)	Entropy 1.15065 (1.15345)	Top-1 acc 58.594 (59.821)	Top-5 acc 82.031 (80.880)	lr 0.01517
Train [52][2540/3239]	Time 2.354 (0.594)	Data Time 0.001 (0.015)	Loss 2.7945 (2.6676)	Entropy 1.15065 (1.15344)	Top-1 acc 57.422 (59.821)	Top-5 acc 77.734 (80.879)	lr 0.01517
Train [52][2550/3239]	Time 0.222 (0.592)	Data Time 0.001 (0.015)	Loss 2.7976 (2.6679)	Entropy 1.15061 (1.15343)	Top-1 acc 60.156 (59.815)	Top-5 acc 77.344 (80.876)	lr 0.01517
Train [52][2560/3239]	Time 0.320 (0.592)	Data Time 0.001 (0.015)	Loss 2.5473 (2.6679)	Entropy 1.15058 (1.15342)	Top-1 acc 61.719 (59.815)	Top-5 acc 83.984 (80.874)	lr 0.01517
Train [52][2570/3239]	Time 0.239 (0.591)	Data Time 0.001 (0.015)	Loss 2.6270 (2.6679)	Entropy 1.15053 (1.15341)	Top-1 acc 60.938 (59.817)	Top-5 acc 80.078 (80.874)	lr 0.01517
Train [52][2580/3239]	Time 0.231 (0.591)	Data Time 0.001 (0.015)	Loss 2.8144 (2.6681)	Entropy 1.15048 (1.15340)	Top-1 acc 57.812 (59.818)	Top-5 acc 80.469 (80.872)	lr 0.01516
Train [52][2590/3239]	Time 0.211 (0.590)	Data Time 0.001 (0.015)	Loss 2.5635 (2.6683)	Entropy 1.15069 (1.15339)	Top-1 acc 65.234 (59.813)	Top-5 acc 81.641 (80.869)	lr 0.01516
Train [52][2600/3239]	Time 0.243 (0.590)	Data Time 0.001 (0.015)	Loss 2.5090 (2.6681)	Entropy 1.15071 (1.15338)	Top-1 acc 63.672 (59.819)	Top-5 acc 82.812 (80.875)	lr 0.01516
Train [52][2610/3239]	Time 0.347 (0.589)	Data Time 0.001 (0.015)	Loss 2.6981 (2.6684)	Entropy 1.15067 (1.15337)	Top-1 acc 58.984 (59.812)	Top-5 acc 80.078 (80.870)	lr 0.01516
Train [52][2620/3239]	Time 0.223 (0.589)	Data Time 0.002 (0.015)	Loss 2.8424 (2.6684)	Entropy 1.15066 (1.15336)	Top-1 acc 57.812 (59.813)	Top-5 acc 77.734 (80.872)	lr 0.01516
Train [52][2630/3239]	Time 0.219 (0.588)	Data Time 0.001 (0.015)	Loss 2.7708 (2.6685)	Entropy 1.15067 (1.15335)	Top-1 acc 57.031 (59.810)	Top-5 acc 79.297 (80.872)	lr 0.01516
Train [52][2640/3239]	Time 0.226 (0.588)	Data Time 0.001 (0.015)	Loss 2.7794 (2.6686)	Entropy 1.15066 (1.15334)	Top-1 acc 58.984 (59.811)	Top-5 acc 78.125 (80.869)	lr 0.01516
Train [52][2650/3239]	Time 0.260 (0.587)	Data Time 0.001 (0.015)	Loss 2.5576 (2.6690)	Entropy 1.15066 (1.15333)	Top-1 acc 64.453 (59.805)	Top-5 acc 82.422 (80.862)	lr 0.01516
Train [52][2660/3239]	Time 0.354 (0.587)	Data Time 0.001 (0.015)	Loss 2.6225 (2.6692)	Entropy 1.15063 (1.15332)	Top-1 acc 59.766 (59.797)	Top-5 acc 83.594 (80.860)	lr 0.01516
Train [52][2670/3239]	Time 0.237 (0.586)	Data Time 0.001 (0.015)	Loss 2.7100 (2.6692)	Entropy 1.15061 (1.15331)	Top-1 acc 56.641 (59.797)	Top-5 acc 80.078 (80.862)	lr 0.01516
Train [52][2680/3239]	Time 0.208 (0.586)	Data Time 0.001 (0.015)	Loss 2.7498 (2.6693)	Entropy 1.15057 (1.15330)	Top-1 acc 59.766 (59.796)	Top-5 acc 76.953 (80.861)	lr 0.01515
Train [52][2690/3239]	Time 0.229 (0.586)	Data Time 0.001 (0.015)	Loss 2.6434 (2.6693)	Entropy 1.15057 (1.15329)	Top-1 acc 61.719 (59.797)	Top-5 acc 82.812 (80.861)	lr 0.01515
Train [52][2700/3239]	Time 0.225 (0.585)	Data Time 0.001 (0.015)	Loss 2.7579 (2.6694)	Entropy 1.15055 (1.15328)	Top-1 acc 60.547 (59.792)	Top-5 acc 80.469 (80.864)	lr 0.01515
Train [52][2710/3239]	Time 0.230 (0.603)	Data Time 0.003 (0.015)	Loss 2.6395 (2.6693)	Entropy 1.15064 (1.15327)	Top-1 acc 61.328 (59.793)	Top-5 acc 81.250 (80.867)	lr 0.01515
Train [52][2720/3239]	Time 0.268 (0.603)	Data Time 0.002 (0.015)	Loss 2.5848 (2.6693)	Entropy 1.15062 (1.15326)	Top-1 acc 61.719 (59.792)	Top-5 acc 82.422 (80.869)	lr 0.01515
Train [52][2730/3239]	Time 0.263 (0.602)	Data Time 0.002 (0.014)	Loss 2.7740 (2.6693)	Entropy 1.15062 (1.15325)	Top-1 acc 54.688 (59.789)	Top-5 acc 78.125 (80.871)	lr 0.01515
Train [52][2740/3239]	Time 0.244 (0.602)	Data Time 0.002 (0.014)	Loss 2.4966 (2.6692)	Entropy 1.15057 (1.15324)	Top-1 acc 64.844 (59.793)	Top-5 acc 83.984 (80.870)	lr 0.01515
Train [52][2750/3239]	Time 0.212 (0.601)	Data Time 0.002 (0.014)	Loss 2.8534 (2.6695)	Entropy 1.15054 (1.15323)	Top-1 acc 57.031 (59.787)	Top-5 acc 77.344 (80.868)	lr 0.01515
Train [52][2760/3239]	Time 0.223 (0.601)	Data Time 0.002 (0.014)	Loss 2.6303 (2.6696)	Entropy 1.15102 (1.15322)	Top-1 acc 61.719 (59.784)	Top-5 acc 82.422 (80.864)	lr 0.01515
Train [52][2770/3239]	Time 0.233 (0.600)	Data Time 0.001 (0.014)	Loss 2.5898 (2.6697)	Entropy 1.15103 (1.15321)	Top-1 acc 60.156 (59.780)	Top-5 acc 81.250 (80.861)	lr 0.01515
Train [52][2780/3239]	Time 0.228 (0.600)	Data Time 0.001 (0.014)	Loss 2.7010 (2.6697)	Entropy 1.15101 (1.15320)	Top-1 acc 59.375 (59.783)	Top-5 acc 80.469 (80.862)	lr 0.01514
Train [52][2790/3239]	Time 0.257 (0.599)	Data Time 0.001 (0.014)	Loss 2.7767 (2.6697)	Entropy 1.15098 (1.15319)	Top-1 acc 57.812 (59.785)	Top-5 acc 77.344 (80.862)	lr 0.01514
Train [52][2800/3239]	Time 0.275 (0.599)	Data Time 0.001 (0.014)	Loss 2.6716 (2.6696)	Entropy 1.15098 (1.15319)	Top-1 acc 60.938 (59.787)	Top-5 acc 81.641 (80.861)	lr 0.01514
Train [52][2810/3239]	Time 0.218 (0.598)	Data Time 0.001 (0.014)	Loss 2.7639 (2.6698)	Entropy 1.15096 (1.15318)	Top-1 acc 60.156 (59.781)	Top-5 acc 79.297 (80.859)	lr 0.01514
Train [52][2820/3239]	Time 0.255 (0.598)	Data Time 0.001 (0.014)	Loss 2.5466 (2.6697)	Entropy 1.15095 (1.15317)	Top-1 acc 64.453 (59.784)	Top-5 acc 83.594 (80.862)	lr 0.01514
Train [52][2830/3239]	Time 0.205 (0.597)	Data Time 0.001 (0.014)	Loss 2.8791 (2.6699)	Entropy 1.15093 (1.15316)	Top-1 acc 55.078 (59.781)	Top-5 acc 76.172 (80.859)	lr 0.01514
Train [52][2840/3239]	Time 0.238 (0.597)	Data Time 0.001 (0.014)	Loss 2.9692 (2.6700)	Entropy 1.15096 (1.15316)	Top-1 acc 52.734 (59.777)	Top-5 acc 74.609 (80.856)	lr 0.01514
Train [52][2850/3239]	Time 0.330 (0.596)	Data Time 0.001 (0.014)	Loss 2.6996 (2.6700)	Entropy 1.15096 (1.15315)	Top-1 acc 55.078 (59.778)	Top-5 acc 80.859 (80.857)	lr 0.01514
Train [52][2860/3239]	Time 0.251 (0.596)	Data Time 0.001 (0.014)	Loss 2.7380 (2.6700)	Entropy 1.15097 (1.15314)	Top-1 acc 55.859 (59.777)	Top-5 acc 80.078 (80.857)	lr 0.01514
Train [52][2870/3239]	Time 0.262 (0.596)	Data Time 0.001 (0.014)	Loss 2.5334 (2.6695)	Entropy 1.15091 (1.15313)	Top-1 acc 63.281 (59.784)	Top-5 acc 82.422 (80.865)	lr 0.01514
Train [52][2880/3239]	Time 0.253 (0.595)	Data Time 0.001 (0.014)	Loss 2.6970 (2.6697)	Entropy 1.15088 (1.15312)	Top-1 acc 62.891 (59.780)	Top-5 acc 79.297 (80.861)	lr 0.01513
Train [52][2890/3239]	Time 0.252 (0.595)	Data Time 0.001 (0.014)	Loss 2.8072 (2.6701)	Entropy 1.15084 (1.15312)	Top-1 acc 58.203 (59.773)	Top-5 acc 78.125 (80.855)	lr 0.01513
Train [52][2900/3239]	Time 0.276 (0.594)	Data Time 0.001 (0.014)	Loss 2.6227 (2.6699)	Entropy 1.15075 (1.15311)	Top-1 acc 62.109 (59.776)	Top-5 acc 80.469 (80.859)	lr 0.01513
Train [52][2910/3239]	Time 0.229 (0.594)	Data Time 0.001 (0.014)	Loss 2.8013 (2.6701)	Entropy 1.15069 (1.15310)	Top-1 acc 55.078 (59.771)	Top-5 acc 75.781 (80.855)	lr 0.01513
Train [52][2920/3239]	Time 0.218 (0.593)	Data Time 0.001 (0.014)	Loss 2.6037 (2.6698)	Entropy 1.15069 (1.15309)	Top-1 acc 60.547 (59.773)	Top-5 acc 83.203 (80.861)	lr 0.01513
Train [52][2930/3239]	Time 0.251 (0.593)	Data Time 0.001 (0.014)	Loss 2.4613 (2.6697)	Entropy 1.15070 (1.15308)	Top-1 acc 65.625 (59.774)	Top-5 acc 86.719 (80.863)	lr 0.01513
Train [52][2940/3239]	Time 0.267 (0.592)	Data Time 0.001 (0.014)	Loss 2.8071 (2.6699)	Entropy 1.15070 (1.15308)	Top-1 acc 58.594 (59.769)	Top-5 acc 77.344 (80.857)	lr 0.01513
Train [52][2950/3239]	Time 0.269 (0.592)	Data Time 0.001 (0.014)	Loss 2.7281 (2.6700)	Entropy 1.15070 (1.15307)	Top-1 acc 58.594 (59.769)	Top-5 acc 80.469 (80.857)	lr 0.01513
Train [52][2960/3239]	Time 0.275 (0.592)	Data Time 0.001 (0.013)	Loss 2.6607 (2.6701)	Entropy 1.15070 (1.15306)	Top-1 acc 60.547 (59.767)	Top-5 acc 80.859 (80.856)	lr 0.01513
Train [52][2970/3239]	Time 0.230 (0.591)	Data Time 0.001 (0.013)	Loss 2.7742 (2.6705)	Entropy 1.15068 (1.15305)	Top-1 acc 55.078 (59.761)	Top-5 acc 79.297 (80.848)	lr 0.01513
Train [52][2980/3239]	Time 0.200 (0.591)	Data Time 0.001 (0.013)	Loss 2.7087 (2.6704)	Entropy 1.15066 (1.15304)	Top-1 acc 57.812 (59.767)	Top-5 acc 82.031 (80.849)	lr 0.01512
Train [52][2990/3239]	Time 0.302 (0.590)	Data Time 0.001 (0.013)	Loss 2.5906 (2.6706)	Entropy 1.15064 (1.15304)	Top-1 acc 62.500 (59.765)	Top-5 acc 82.812 (80.847)	lr 0.01512
Train [52][3000/3239]	Time 0.255 (0.590)	Data Time 0.001 (0.013)	Loss 2.5350 (2.6704)	Entropy 1.15061 (1.15303)	Top-1 acc 60.938 (59.771)	Top-5 acc 84.375 (80.850)	lr 0.01512
Train [52][3010/3239]	Time 0.259 (0.589)	Data Time 0.002 (0.013)	Loss 2.5332 (2.6705)	Entropy 1.15058 (1.15302)	Top-1 acc 60.547 (59.765)	Top-5 acc 85.547 (80.853)	lr 0.01512
Train [52][3020/3239]	Time 0.231 (0.589)	Data Time 0.001 (0.013)	Loss 2.7279 (2.6707)	Entropy 1.15060 (1.15301)	Top-1 acc 60.156 (59.761)	Top-5 acc 78.516 (80.847)	lr 0.01512
Train [52][3030/3239]	Time 0.219 (0.589)	Data Time 0.001 (0.013)	Loss 2.7589 (2.6708)	Entropy 1.15053 (1.15300)	Top-1 acc 57.422 (59.760)	Top-5 acc 79.297 (80.845)	lr 0.01512
Train [52][3040/3239]	Time 0.521 (0.604)	Data Time 0.004 (0.013)	Loss 2.6527 (2.6707)	Entropy 1.15051 (1.15300)	Top-1 acc 62.109 (59.765)	Top-5 acc 82.031 (80.849)	lr 0.01512
Train [52][3050/3239]	Time 0.231 (0.604)	Data Time 0.002 (0.013)	Loss 2.5860 (2.6706)	Entropy 1.15052 (1.15299)	Top-1 acc 59.766 (59.766)	Top-5 acc 83.594 (80.852)	lr 0.01512
Train [52][3060/3239]	Time 0.222 (0.603)	Data Time 0.002 (0.013)	Loss 2.5802 (2.6706)	Entropy 1.15051 (1.15298)	Top-1 acc 59.375 (59.762)	Top-5 acc 80.859 (80.850)	lr 0.01512
Train [52][3070/3239]	Time 0.220 (0.603)	Data Time 0.001 (0.013)	Loss 2.6691 (2.6706)	Entropy 1.15045 (1.15297)	Top-1 acc 59.375 (59.762)	Top-5 acc 83.594 (80.850)	lr 0.01512
Train [52][3080/3239]	Time 0.241 (0.602)	Data Time 0.001 (0.013)	Loss 2.7431 (2.6706)	Entropy 1.15044 (1.15296)	Top-1 acc 60.547 (59.762)	Top-5 acc 80.078 (80.850)	lr 0.01511
Train [52][3090/3239]	Time 0.221 (0.602)	Data Time 0.001 (0.013)	Loss 2.6942 (2.6707)	Entropy 1.15044 (1.15296)	Top-1 acc 58.984 (59.762)	Top-5 acc 82.812 (80.849)	lr 0.01511
Train [52][3100/3239]	Time 0.238 (0.601)	Data Time 0.001 (0.013)	Loss 2.9458 (2.6710)	Entropy 1.15041 (1.15295)	Top-1 acc 53.516 (59.755)	Top-5 acc 79.688 (80.843)	lr 0.01511
Train [52][3110/3239]	Time 0.223 (0.601)	Data Time 0.002 (0.013)	Loss 2.5241 (2.6710)	Entropy 1.15040 (1.15294)	Top-1 acc 62.891 (59.757)	Top-5 acc 82.422 (80.841)	lr 0.01511
Train [52][3120/3239]	Time 0.229 (0.600)	Data Time 0.001 (0.013)	Loss 2.5150 (2.6710)	Entropy 1.15035 (1.15293)	Top-1 acc 60.547 (59.756)	Top-5 acc 82.812 (80.840)	lr 0.01511
Train [52][3130/3239]	Time 0.249 (0.600)	Data Time 0.001 (0.013)	Loss 2.7083 (2.6711)	Entropy 1.15034 (1.15292)	Top-1 acc 55.859 (59.751)	Top-5 acc 81.641 (80.837)	lr 0.01511
Train [52][3140/3239]	Time 0.207 (0.600)	Data Time 0.001 (0.013)	Loss 2.6480 (2.6708)	Entropy 1.15034 (1.15291)	Top-1 acc 62.109 (59.760)	Top-5 acc 80.469 (80.839)	lr 0.01511
Train [52][3150/3239]	Time 0.204 (0.599)	Data Time 0.001 (0.013)	Loss 2.6845 (2.6710)	Entropy 1.15033 (1.15291)	Top-1 acc 57.812 (59.756)	Top-5 acc 79.297 (80.834)	lr 0.01511
Train [52][3160/3239]	Time 0.274 (0.599)	Data Time 0.001 (0.013)	Loss 2.6195 (2.6711)	Entropy 1.15029 (1.15290)	Top-1 acc 59.766 (59.756)	Top-5 acc 80.859 (80.836)	lr 0.01511
Train [52][3170/3239]	Time 0.254 (0.598)	Data Time 0.001 (0.013)	Loss 2.4184 (2.6709)	Entropy 1.15022 (1.15289)	Top-1 acc 69.141 (59.760)	Top-5 acc 85.547 (80.838)	lr 0.01511
Train [52][3180/3239]	Time 0.221 (0.598)	Data Time 0.000 (0.013)	Loss 2.6362 (2.6709)	Entropy 1.15021 (1.15288)	Top-1 acc 60.156 (59.758)	Top-5 acc 80.859 (80.838)	lr 0.01510
Train [52][3190/3239]	Time 0.239 (0.597)	Data Time 0.000 (0.013)	Loss 2.6241 (2.6708)	Entropy 1.15020 (1.15287)	Top-1 acc 61.719 (59.757)	Top-5 acc 82.031 (80.841)	lr 0.01510
Train [52][3200/3239]	Time 0.223 (0.597)	Data Time 0.000 (0.013)	Loss 2.5646 (2.6707)	Entropy 1.15020 (1.15286)	Top-1 acc 61.719 (59.758)	Top-5 acc 83.203 (80.844)	lr 0.01510
Train [52][3210/3239]	Time 0.223 (0.596)	Data Time 0.000 (0.013)	Loss 2.6971 (2.6707)	Entropy 1.15015 (1.15286)	Top-1 acc 61.719 (59.763)	Top-5 acc 80.078 (80.844)	lr 0.01510
Train [52][3220/3239]	Time 0.211 (0.596)	Data Time 0.000 (0.013)	Loss 2.8571 (2.6707)	Entropy 1.15012 (1.15285)	Top-1 acc 56.641 (59.764)	Top-5 acc 76.953 (80.843)	lr 0.01510
Train [52][3230/3239]	Time 0.234 (0.596)	Data Time 0.000 (0.012)	Loss 2.5880 (2.6707)	Entropy 1.15011 (1.15284)	Top-1 acc 61.719 (59.764)	Top-5 acc 81.250 (80.841)	lr 0.01510
Train [52][3239/3239]	Time 2.376 (0.595)	Data Time 0.000 (0.012)	Loss 3.0135 (2.6708)	Entropy 1.15011 (1.15283)	Top-1 acc 51.852 (59.758)	Top-5 acc 70.370 (80.842)	lr 0.01510
==========Valid [52/120]	loss 1.527	top-1 acc 65.347 (65.404)	top-5 acc 85.596	Train top-1 59.758	top-5 80.842	Entropy 1.15011	Latency-None: 0.000ms	Flops: 548.34M
Train [53][0/3239]	Time 36.743 (36.743)	Data Time 34.957 (34.957)	Loss 2.4806 (2.4806)	Entropy 1.15012 (1.15012)	Top-1 acc 65.234 (65.234)	Top-5 acc 84.766 (84.766)	lr 0.01510
Train [53][10/3239]	Time 2.536 (3.876)	Data Time 0.003 (3.180)	Loss 2.5254 (2.6017)	Entropy 1.15012 (1.15012)	Top-1 acc 64.453 (61.435)	Top-5 acc 82.422 (82.173)	lr 0.01510
Train [53][20/3239]	Time 0.242 (2.149)	Data Time 0.001 (1.666)	Loss 2.7243 (2.6143)	Entropy 1.15008 (1.15010)	Top-1 acc 55.859 (61.310)	Top-5 acc 80.859 (82.031)	lr 0.01510
Train [53][30/3239]	Time 0.277 (1.604)	Data Time 0.002 (1.129)	Loss 2.6766 (2.6162)	Entropy 1.15007 (1.15009)	Top-1 acc 58.984 (61.366)	Top-5 acc 83.203 (82.031)	lr 0.01510
Train [53][40/3239]	Time 0.248 (1.329)	Data Time 0.001 (0.854)	Loss 2.6878 (2.6144)	Entropy 1.15006 (1.15008)	Top-1 acc 60.938 (61.442)	Top-5 acc 80.859 (82.050)	lr 0.01509
Train [53][50/3239]	Time 0.267 (1.162)	Data Time 0.001 (0.687)	Loss 2.4938 (2.6216)	Entropy 1.15005 (1.15008)	Top-1 acc 66.016 (61.374)	Top-5 acc 82.812 (81.870)	lr 0.01509
Train [53][60/3239]	Time 0.220 (1.047)	Data Time 0.001 (0.575)	Loss 2.8554 (2.6250)	Entropy 1.15002 (1.15007)	Top-1 acc 54.688 (61.168)	Top-5 acc 76.562 (81.814)	lr 0.01509
Train [53][70/3239]	Time 0.228 (0.965)	Data Time 0.001 (0.494)	Loss 2.9538 (2.6344)	Entropy 1.15001 (1.15006)	Top-1 acc 53.906 (60.888)	Top-5 acc 76.172 (81.624)	lr 0.01509
Train [53][80/3239]	Time 0.247 (0.904)	Data Time 0.002 (0.433)	Loss 2.5881 (2.6351)	Entropy 1.14995 (1.15005)	Top-1 acc 61.719 (60.778)	Top-5 acc 83.203 (81.650)	lr 0.01509
Train [53][90/3239]	Time 0.228 (0.856)	Data Time 0.001 (0.386)	Loss 2.7321 (2.6467)	Entropy 1.14992 (1.15004)	Top-1 acc 60.547 (60.628)	Top-5 acc 79.688 (81.374)	lr 0.01509
Train [53][100/3239]	Time 0.206 (0.819)	Data Time 0.001 (0.348)	Loss 2.7725 (2.6438)	Entropy 1.14989 (1.15003)	Top-1 acc 55.859 (60.675)	Top-5 acc 80.078 (81.467)	lr 0.01509
Train [53][110/3239]	Time 0.218 (0.786)	Data Time 0.001 (0.316)	Loss 2.5925 (2.6386)	Entropy 1.14989 (1.15001)	Top-1 acc 62.500 (60.786)	Top-5 acc 81.250 (81.546)	lr 0.01509
Train [53][120/3239]	Time 2.494 (0.758)	Data Time 0.001 (0.290)	Loss 2.7270 (2.6399)	Entropy 1.14989 (1.15000)	Top-1 acc 56.641 (60.715)	Top-5 acc 78.125 (81.518)	lr 0.01509
Train [53][130/3239]	Time 0.230 (0.718)	Data Time 0.001 (0.268)	Loss 2.7835 (2.6437)	Entropy 1.14988 (1.15000)	Top-1 acc 58.984 (60.651)	Top-5 acc 80.078 (81.465)	lr 0.01509
Train [53][140/3239]	Time 0.219 (0.701)	Data Time 0.001 (0.249)	Loss 2.7491 (2.6499)	Entropy 1.14986 (1.14999)	Top-1 acc 58.984 (60.444)	Top-5 acc 79.297 (81.377)	lr 0.01509
Train [53][150/3239]	Time 0.443 (1.002)	Data Time 0.002 (0.233)	Loss 2.7709 (2.6538)	Entropy 1.14983 (1.14998)	Top-1 acc 59.766 (60.376)	Top-5 acc 77.734 (81.237)	lr 0.01508
Train [53][160/3239]	Time 0.236 (0.975)	Data Time 0.002 (0.219)	Loss 2.5161 (2.6531)	Entropy 1.14973 (1.14996)	Top-1 acc 62.500 (60.428)	Top-5 acc 85.156 (81.262)	lr 0.01508
Train [53][170/3239]	Time 0.265 (0.945)	Data Time 0.001 (0.206)	Loss 2.6241 (2.6496)	Entropy 1.14973 (1.14995)	Top-1 acc 62.109 (60.508)	Top-5 acc 81.641 (81.316)	lr 0.01508
Train [53][180/3239]	Time 0.228 (0.919)	Data Time 0.001 (0.195)	Loss 2.7084 (2.6525)	Entropy 1.14971 (1.14994)	Top-1 acc 59.766 (60.407)	Top-5 acc 78.906 (81.239)	lr 0.01508
Train [53][190/3239]	Time 0.237 (0.895)	Data Time 0.001 (0.185)	Loss 2.5597 (2.6508)	Entropy 1.14960 (1.14992)	Top-1 acc 64.844 (60.434)	Top-5 acc 81.250 (81.315)	lr 0.01508
Train [53][200/3239]	Time 0.216 (0.874)	Data Time 0.001 (0.176)	Loss 2.8718 (2.6515)	Entropy 1.14959 (1.14991)	Top-1 acc 54.688 (60.368)	Top-5 acc 77.734 (81.318)	lr 0.01508
Train [53][210/3239]	Time 0.220 (0.855)	Data Time 0.001 (0.167)	Loss 2.7854 (2.6511)	Entropy 1.14955 (1.14989)	Top-1 acc 57.031 (60.352)	Top-5 acc 79.688 (81.354)	lr 0.01508
Train [53][220/3239]	Time 0.231 (0.837)	Data Time 0.002 (0.160)	Loss 2.7059 (2.6507)	Entropy 1.14954 (1.14988)	Top-1 acc 59.375 (60.344)	Top-5 acc 80.859 (81.328)	lr 0.01508
Train [53][230/3239]	Time 2.443 (0.820)	Data Time 0.002 (0.153)	Loss 2.7051 (2.6521)	Entropy 1.14954 (1.14986)	Top-1 acc 59.766 (60.342)	Top-5 acc 81.250 (81.314)	lr 0.01508
Train [53][240/3239]	Time 0.216 (0.796)	Data Time 0.001 (0.147)	Loss 2.7391 (2.6513)	Entropy 1.14955 (1.14985)	Top-1 acc 60.156 (60.399)	Top-5 acc 78.125 (81.320)	lr 0.01508
Train [53][250/3239]	Time 0.232 (0.782)	Data Time 0.001 (0.141)	Loss 2.7130 (2.6489)	Entropy 1.14947 (1.14983)	Top-1 acc 57.422 (60.393)	Top-5 acc 79.688 (81.373)	lr 0.01507
Train [53][260/3239]	Time 0.217 (0.770)	Data Time 0.001 (0.136)	Loss 2.6878 (2.6492)	Entropy 1.14945 (1.14982)	Top-1 acc 60.156 (60.391)	Top-5 acc 83.203 (81.398)	lr 0.01507
Train [53][270/3239]	Time 0.219 (0.758)	Data Time 0.002 (0.131)	Loss 2.4719 (2.6483)	Entropy 1.14942 (1.14980)	Top-1 acc 64.453 (60.433)	Top-5 acc 83.984 (81.383)	lr 0.01507
Train [53][280/3239]	Time 0.215 (0.748)	Data Time 0.001 (0.126)	Loss 2.4921 (2.6474)	Entropy 1.14941 (1.14979)	Top-1 acc 62.891 (60.458)	Top-5 acc 85.156 (81.429)	lr 0.01507
Train [53][290/3239]	Time 0.315 (0.738)	Data Time 0.001 (0.122)	Loss 2.4945 (2.6480)	Entropy 1.14944 (1.14978)	Top-1 acc 59.766 (60.452)	Top-5 acc 84.375 (81.406)	lr 0.01507
Train [53][300/3239]	Time 0.218 (0.729)	Data Time 0.001 (0.118)	Loss 2.9561 (2.6484)	Entropy 1.14940 (1.14977)	Top-1 acc 53.906 (60.440)	Top-5 acc 75.000 (81.408)	lr 0.01507
Train [53][310/3239]	Time 0.204 (0.720)	Data Time 0.001 (0.114)	Loss 2.5685 (2.6466)	Entropy 1.14937 (1.14975)	Top-1 acc 63.672 (60.474)	Top-5 acc 83.203 (81.426)	lr 0.01507
Train [53][320/3239]	Time 0.224 (0.712)	Data Time 0.001 (0.111)	Loss 2.6918 (2.6459)	Entropy 1.14936 (1.14974)	Top-1 acc 58.203 (60.470)	Top-5 acc 80.078 (81.419)	lr 0.01507
Train [53][330/3239]	Time 0.235 (0.706)	Data Time 0.001 (0.107)	Loss 2.5870 (2.6463)	Entropy 1.14930 (1.14973)	Top-1 acc 60.547 (60.463)	Top-5 acc 82.031 (81.405)	lr 0.01507
Train [53][340/3239]	Time 2.714 (0.700)	Data Time 0.002 (0.104)	Loss 2.8328 (2.6456)	Entropy 1.14930 (1.14972)	Top-1 acc 55.859 (60.493)	Top-5 acc 78.125 (81.426)	lr 0.01507
Train [53][350/3239]	Time 0.224 (0.687)	Data Time 0.001 (0.101)	Loss 2.4461 (2.6463)	Entropy 1.14932 (1.14971)	Top-1 acc 65.234 (60.472)	Top-5 acc 85.938 (81.414)	lr 0.01506
Train [53][360/3239]	Time 0.222 (0.681)	Data Time 0.001 (0.098)	Loss 2.7358 (2.6464)	Entropy 1.14931 (1.14970)	Top-1 acc 56.250 (60.456)	Top-5 acc 80.859 (81.407)	lr 0.01506
Train [53][370/3239]	Time 0.216 (0.675)	Data Time 0.001 (0.096)	Loss 2.6609 (2.6462)	Entropy 1.14929 (1.14968)	Top-1 acc 58.984 (60.469)	Top-5 acc 80.469 (81.397)	lr 0.01506
Train [53][380/3239]	Time 0.227 (0.669)	Data Time 0.001 (0.093)	Loss 2.6812 (2.6458)	Entropy 1.14928 (1.14967)	Top-1 acc 55.469 (60.439)	Top-5 acc 82.031 (81.429)	lr 0.01506
Train [53][390/3239]	Time 0.342 (0.664)	Data Time 0.001 (0.091)	Loss 2.6073 (2.6466)	Entropy 1.14922 (1.14966)	Top-1 acc 62.891 (60.423)	Top-5 acc 83.984 (81.391)	lr 0.01506
Train [53][400/3239]	Time 0.259 (0.659)	Data Time 0.001 (0.089)	Loss 2.5696 (2.6465)	Entropy 1.14921 (1.14965)	Top-1 acc 62.500 (60.435)	Top-5 acc 84.375 (81.389)	lr 0.01506
Train [53][410/3239]	Time 0.225 (0.654)	Data Time 0.001 (0.087)	Loss 2.7252 (2.6467)	Entropy 1.14917 (1.14964)	Top-1 acc 57.422 (60.425)	Top-5 acc 80.469 (81.396)	lr 0.01506
Train [53][420/3239]	Time 0.233 (0.650)	Data Time 0.001 (0.085)	Loss 2.6369 (2.6471)	Entropy 1.14917 (1.14963)	Top-1 acc 63.281 (60.414)	Top-5 acc 81.641 (81.404)	lr 0.01506
Train [53][430/3239]	Time 0.230 (0.646)	Data Time 0.001 (0.083)	Loss 2.6517 (2.6485)	Entropy 1.14916 (1.14962)	Top-1 acc 64.062 (60.370)	Top-5 acc 81.641 (81.381)	lr 0.01506
Train [53][440/3239]	Time 0.369 (0.643)	Data Time 0.001 (0.081)	Loss 2.7208 (2.6472)	Entropy 1.14917 (1.14961)	Top-1 acc 60.547 (60.430)	Top-5 acc 78.516 (81.386)	lr 0.01506
Train [53][450/3239]	Time 2.616 (0.639)	Data Time 0.001 (0.079)	Loss 2.5593 (2.6465)	Entropy 1.14917 (1.14960)	Top-1 acc 63.281 (60.448)	Top-5 acc 83.594 (81.396)	lr 0.01505
Train [53][460/3239]	Time 0.234 (0.631)	Data Time 0.002 (0.077)	Loss 2.5699 (2.6457)	Entropy 1.14911 (1.14959)	Top-1 acc 62.500 (60.483)	Top-5 acc 81.641 (81.400)	lr 0.01505
Train [53][470/3239]	Time 0.225 (0.627)	Data Time 0.001 (0.076)	Loss 2.8408 (2.6457)	Entropy 1.14909 (1.14958)	Top-1 acc 55.469 (60.483)	Top-5 acc 78.516 (81.388)	lr 0.01505
Train [53][480/3239]	Time 0.227 (0.623)	Data Time 0.001 (0.074)	Loss 2.5807 (2.6458)	Entropy 1.14906 (1.14957)	Top-1 acc 64.453 (60.472)	Top-5 acc 83.203 (81.390)	lr 0.01505
Train [53][490/3239]	Time 0.341 (0.620)	Data Time 0.002 (0.073)	Loss 2.5197 (2.6448)	Entropy 1.14903 (1.14956)	Top-1 acc 62.109 (60.503)	Top-5 acc 83.594 (81.417)	lr 0.01505
Train [53][500/3239]	Time 0.216 (0.617)	Data Time 0.001 (0.071)	Loss 2.5535 (2.6442)	Entropy 1.14895 (1.14954)	Top-1 acc 62.109 (60.520)	Top-5 acc 82.812 (81.437)	lr 0.01505
Train [53][510/3239]	Time 0.314 (0.703)	Data Time 0.003 (0.070)	Loss 2.4868 (2.6440)	Entropy 1.14894 (1.14953)	Top-1 acc 67.969 (60.511)	Top-5 acc 84.375 (81.448)	lr 0.01505
Train [53][520/3239]	Time 0.235 (0.701)	Data Time 0.002 (0.069)	Loss 2.6771 (2.6435)	Entropy 1.14892 (1.14952)	Top-1 acc 59.375 (60.524)	Top-5 acc 81.250 (81.450)	lr 0.01505
Train [53][530/3239]	Time 0.206 (0.697)	Data Time 0.001 (0.068)	Loss 2.6506 (2.6448)	Entropy 1.14880 (1.14951)	Top-1 acc 59.766 (60.498)	Top-5 acc 81.250 (81.423)	lr 0.01505
Train [53][540/3239]	Time 0.233 (0.693)	Data Time 0.002 (0.066)	Loss 2.5626 (2.6437)	Entropy 1.14880 (1.14950)	Top-1 acc 61.328 (60.507)	Top-5 acc 82.812 (81.438)	lr 0.01505
Train [53][550/3239]	Time 0.268 (0.689)	Data Time 0.001 (0.065)	Loss 2.7802 (2.6439)	Entropy 1.14874 (1.14948)	Top-1 acc 57.422 (60.482)	Top-5 acc 78.516 (81.424)	lr 0.01504
Train [53][560/3239]	Time 2.481 (0.685)	Data Time 0.001 (0.064)	Loss 2.6725 (2.6430)	Entropy 1.14874 (1.14947)	Top-1 acc 60.547 (60.526)	Top-5 acc 81.250 (81.441)	lr 0.01504
Train [53][570/3239]	Time 0.273 (0.677)	Data Time 0.002 (0.063)	Loss 2.6398 (2.6433)	Entropy 1.14874 (1.14946)	Top-1 acc 57.422 (60.520)	Top-5 acc 80.469 (81.418)	lr 0.01504
Train [53][580/3239]	Time 0.243 (0.673)	Data Time 0.001 (0.062)	Loss 2.5596 (2.6434)	Entropy 1.14871 (1.14944)	Top-1 acc 66.406 (60.509)	Top-5 acc 82.422 (81.417)	lr 0.01504
Train [53][590/3239]	Time 0.225 (0.670)	Data Time 0.002 (0.061)	Loss 2.5525 (2.6428)	Entropy 1.14864 (1.14943)	Top-1 acc 62.500 (60.524)	Top-5 acc 83.594 (81.426)	lr 0.01504
Train [53][600/3239]	Time 0.227 (0.667)	Data Time 0.002 (0.060)	Loss 2.6276 (2.6420)	Entropy 1.14863 (1.14942)	Top-1 acc 62.109 (60.534)	Top-5 acc 80.859 (81.446)	lr 0.01504
Train [53][610/3239]	Time 0.232 (0.664)	Data Time 0.001 (0.059)	Loss 2.4801 (2.6422)	Entropy 1.14859 (1.14940)	Top-1 acc 67.578 (60.526)	Top-5 acc 86.328 (81.443)	lr 0.01504
Train [53][620/3239]	Time 0.270 (0.660)	Data Time 0.001 (0.058)	Loss 2.7717 (2.6419)	Entropy 1.14860 (1.14939)	Top-1 acc 59.375 (60.544)	Top-5 acc 79.297 (81.441)	lr 0.01504
Train [53][630/3239]	Time 0.328 (0.657)	Data Time 0.001 (0.057)	Loss 2.7210 (2.6421)	Entropy 1.14860 (1.14938)	Top-1 acc 54.688 (60.521)	Top-5 acc 81.641 (81.444)	lr 0.01504
Train [53][640/3239]	Time 0.220 (0.654)	Data Time 0.001 (0.056)	Loss 2.6269 (2.6426)	Entropy 1.14857 (1.14937)	Top-1 acc 64.453 (60.517)	Top-5 acc 80.078 (81.424)	lr 0.01504
Train [53][650/3239]	Time 0.211 (0.651)	Data Time 0.001 (0.055)	Loss 2.8317 (2.6429)	Entropy 1.14858 (1.14935)	Top-1 acc 56.641 (60.517)	Top-5 acc 77.344 (81.409)	lr 0.01503
Train [53][660/3239]	Time 0.209 (0.649)	Data Time 0.001 (0.055)	Loss 2.4416 (2.6430)	Entropy 1.14858 (1.14934)	Top-1 acc 65.234 (60.516)	Top-5 acc 85.938 (81.407)	lr 0.01503
Train [53][670/3239]	Time 2.504 (0.646)	Data Time 0.001 (0.054)	Loss 2.4259 (2.6428)	Entropy 1.14858 (1.14933)	Top-1 acc 66.406 (60.538)	Top-5 acc 85.547 (81.402)	lr 0.01503
Train [53][680/3239]	Time 0.216 (0.640)	Data Time 0.001 (0.053)	Loss 2.5658 (2.6417)	Entropy 1.14859 (1.14932)	Top-1 acc 60.547 (60.561)	Top-5 acc 84.766 (81.420)	lr 0.01503
Train [53][690/3239]	Time 0.208 (0.637)	Data Time 0.001 (0.052)	Loss 2.6877 (2.6417)	Entropy 1.14853 (1.14931)	Top-1 acc 59.766 (60.549)	Top-5 acc 82.812 (81.434)	lr 0.01503
Train [53][700/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.052)	Loss 2.6289 (2.6415)	Entropy 1.14854 (1.14930)	Top-1 acc 59.766 (60.534)	Top-5 acc 85.156 (81.455)	lr 0.01503
Train [53][710/3239]	Time 0.232 (0.632)	Data Time 0.001 (0.051)	Loss 2.8005 (2.6411)	Entropy 1.14853 (1.14929)	Top-1 acc 55.469 (60.551)	Top-5 acc 76.172 (81.447)	lr 0.01503
Train [53][720/3239]	Time 0.219 (0.630)	Data Time 0.001 (0.050)	Loss 2.3859 (2.6415)	Entropy 1.14852 (1.14928)	Top-1 acc 64.453 (60.537)	Top-5 acc 86.328 (81.446)	lr 0.01503
Train [53][730/3239]	Time 0.226 (0.628)	Data Time 0.001 (0.049)	Loss 2.7269 (2.6419)	Entropy 1.14852 (1.14927)	Top-1 acc 57.031 (60.534)	Top-5 acc 76.953 (81.434)	lr 0.01503
Train [53][740/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.049)	Loss 3.1292 (2.6426)	Entropy 1.14850 (1.14926)	Top-1 acc 48.438 (60.522)	Top-5 acc 72.266 (81.432)	lr 0.01503
Train [53][750/3239]	Time 0.219 (0.623)	Data Time 0.001 (0.048)	Loss 2.8810 (2.6430)	Entropy 1.14849 (1.14925)	Top-1 acc 54.297 (60.520)	Top-5 acc 76.172 (81.432)	lr 0.01502
Train [53][760/3239]	Time 0.215 (0.621)	Data Time 0.001 (0.048)	Loss 2.6050 (2.6430)	Entropy 1.14844 (1.14924)	Top-1 acc 59.375 (60.512)	Top-5 acc 80.078 (81.424)	lr 0.01502
Train [53][770/3239]	Time 0.169 (0.619)	Data Time 0.002 (0.047)	Loss 2.5359 (2.6422)	Entropy 1.14843 (1.14923)	Top-1 acc 61.719 (60.523)	Top-5 acc 82.422 (81.438)	lr 0.01502
Train [53][780/3239]	Time 2.459 (0.617)	Data Time 0.001 (0.046)	Loss 2.5808 (2.6419)	Entropy 1.14843 (1.14922)	Top-1 acc 62.500 (60.529)	Top-5 acc 81.250 (81.443)	lr 0.01502
Train [53][790/3239]	Time 0.222 (0.612)	Data Time 0.001 (0.046)	Loss 2.5391 (2.6412)	Entropy 1.14838 (1.14920)	Top-1 acc 63.281 (60.533)	Top-5 acc 84.766 (81.454)	lr 0.01502
Train [53][800/3239]	Time 0.230 (0.610)	Data Time 0.001 (0.045)	Loss 2.7685 (2.6413)	Entropy 1.14831 (1.14919)	Top-1 acc 56.641 (60.529)	Top-5 acc 77.734 (81.451)	lr 0.01502
Train [53][810/3239]	Time 0.169 (0.608)	Data Time 0.002 (0.045)	Loss 2.7452 (2.6418)	Entropy 1.14826 (1.14918)	Top-1 acc 59.375 (60.511)	Top-5 acc 78.516 (81.433)	lr 0.01502
Train [53][820/3239]	Time 0.377 (0.607)	Data Time 0.001 (0.044)	Loss 2.4941 (2.6417)	Entropy 1.14824 (1.14917)	Top-1 acc 62.109 (60.510)	Top-5 acc 86.328 (81.438)	lr 0.01502
Train [53][830/3239]	Time 0.244 (0.605)	Data Time 0.002 (0.044)	Loss 2.6133 (2.6424)	Entropy 1.14802 (1.14916)	Top-1 acc 62.500 (60.482)	Top-5 acc 80.859 (81.424)	lr 0.01502
Train [53][840/3239]	Time 0.227 (0.603)	Data Time 0.001 (0.043)	Loss 2.5474 (2.6422)	Entropy 1.14801 (1.14914)	Top-1 acc 59.375 (60.480)	Top-5 acc 85.156 (81.433)	lr 0.01502
Train [53][850/3239]	Time 0.234 (0.602)	Data Time 0.001 (0.043)	Loss 2.6251 (2.6426)	Entropy 1.14796 (1.14913)	Top-1 acc 58.984 (60.468)	Top-5 acc 82.422 (81.424)	lr 0.01501
Train [53][860/3239]	Time 0.221 (0.600)	Data Time 0.002 (0.042)	Loss 2.4932 (2.6423)	Entropy 1.14794 (1.14912)	Top-1 acc 65.625 (60.475)	Top-5 acc 83.203 (81.425)	lr 0.01501
Train [53][870/3239]	Time 0.222 (0.656)	Data Time 0.002 (0.042)	Loss 2.6404 (2.6423)	Entropy 1.14793 (1.14910)	Top-1 acc 57.812 (60.472)	Top-5 acc 83.203 (81.421)	lr 0.01501
Train [53][880/3239]	Time 0.237 (0.654)	Data Time 0.002 (0.041)	Loss 2.5422 (2.6421)	Entropy 1.14793 (1.14909)	Top-1 acc 64.062 (60.478)	Top-5 acc 82.031 (81.428)	lr 0.01501
Train [53][890/3239]	Time 2.403 (0.652)	Data Time 0.002 (0.041)	Loss 2.7673 (2.6422)	Entropy 1.14793 (1.14908)	Top-1 acc 53.906 (60.471)	Top-5 acc 80.859 (81.421)	lr 0.01501
Train [53][900/3239]	Time 0.227 (0.647)	Data Time 0.001 (0.041)	Loss 2.5095 (2.6419)	Entropy 1.14791 (1.14906)	Top-1 acc 63.281 (60.479)	Top-5 acc 84.766 (81.427)	lr 0.01501
Train [53][910/3239]	Time 0.244 (0.645)	Data Time 0.002 (0.040)	Loss 2.6637 (2.6423)	Entropy 1.14790 (1.14905)	Top-1 acc 59.766 (60.484)	Top-5 acc 82.031 (81.421)	lr 0.01501
Train [53][920/3239]	Time 0.223 (0.643)	Data Time 0.001 (0.040)	Loss 2.8199 (2.6419)	Entropy 1.14784 (1.14904)	Top-1 acc 54.688 (60.493)	Top-5 acc 78.125 (81.426)	lr 0.01501
Train [53][930/3239]	Time 0.240 (0.641)	Data Time 0.001 (0.039)	Loss 2.4816 (2.6414)	Entropy 1.14782 (1.14903)	Top-1 acc 63.672 (60.508)	Top-5 acc 83.203 (81.441)	lr 0.01501
Train [53][940/3239]	Time 0.218 (0.639)	Data Time 0.001 (0.039)	Loss 2.7702 (2.6412)	Entropy 1.14784 (1.14901)	Top-1 acc 59.766 (60.509)	Top-5 acc 78.516 (81.441)	lr 0.01501
Train [53][950/3239]	Time 0.218 (0.637)	Data Time 0.001 (0.038)	Loss 2.5711 (2.6413)	Entropy 1.14784 (1.14900)	Top-1 acc 60.938 (60.500)	Top-5 acc 83.594 (81.445)	lr 0.01500
Train [53][960/3239]	Time 0.225 (0.635)	Data Time 0.001 (0.038)	Loss 2.6957 (2.6417)	Entropy 1.14782 (1.14899)	Top-1 acc 59.375 (60.484)	Top-5 acc 82.422 (81.436)	lr 0.01500
Train [53][970/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.038)	Loss 2.7727 (2.6417)	Entropy 1.14786 (1.14898)	Top-1 acc 57.422 (60.478)	Top-5 acc 78.125 (81.437)	lr 0.01500
Train [53][980/3239]	Time 0.223 (0.632)	Data Time 0.001 (0.037)	Loss 2.7177 (2.6423)	Entropy 1.14786 (1.14897)	Top-1 acc 59.375 (60.464)	Top-5 acc 82.031 (81.434)	lr 0.01500
Train [53][990/3239]	Time 0.219 (0.630)	Data Time 0.002 (0.037)	Loss 2.6587 (2.6421)	Entropy 1.14789 (1.14895)	Top-1 acc 60.938 (60.457)	Top-5 acc 81.641 (81.437)	lr 0.01500
Train [53][1000/3239]	Time 2.508 (0.629)	Data Time 0.002 (0.037)	Loss 2.5803 (2.6419)	Entropy 1.14789 (1.14894)	Top-1 acc 64.844 (60.465)	Top-5 acc 82.031 (81.440)	lr 0.01500
Train [53][1010/3239]	Time 0.321 (0.625)	Data Time 0.001 (0.036)	Loss 2.6492 (2.6420)	Entropy 1.14790 (1.14893)	Top-1 acc 60.547 (60.461)	Top-5 acc 83.984 (81.439)	lr 0.01500
Train [53][1020/3239]	Time 0.242 (0.623)	Data Time 0.001 (0.036)	Loss 2.6859 (2.6421)	Entropy 1.14785 (1.14892)	Top-1 acc 58.594 (60.464)	Top-5 acc 84.375 (81.441)	lr 0.01500
Train [53][1030/3239]	Time 0.198 (0.622)	Data Time 0.001 (0.036)	Loss 2.8198 (2.6426)	Entropy 1.14786 (1.14891)	Top-1 acc 57.031 (60.459)	Top-5 acc 76.953 (81.427)	lr 0.01500
Train [53][1040/3239]	Time 0.221 (0.620)	Data Time 0.001 (0.035)	Loss 2.5346 (2.6424)	Entropy 1.14784 (1.14890)	Top-1 acc 61.719 (60.461)	Top-5 acc 84.766 (81.428)	lr 0.01500
Train [53][1050/3239]	Time 0.247 (0.619)	Data Time 0.002 (0.035)	Loss 2.5921 (2.6422)	Entropy 1.14785 (1.14889)	Top-1 acc 60.547 (60.456)	Top-5 acc 83.984 (81.435)	lr 0.01500
Train [53][1060/3239]	Time 0.250 (0.617)	Data Time 0.001 (0.035)	Loss 2.6697 (2.6422)	Entropy 1.14782 (1.14888)	Top-1 acc 61.328 (60.466)	Top-5 acc 82.031 (81.433)	lr 0.01499
Train [53][1070/3239]	Time 0.235 (0.616)	Data Time 0.002 (0.034)	Loss 2.7245 (2.6419)	Entropy 1.14778 (1.14887)	Top-1 acc 59.766 (60.474)	Top-5 acc 81.250 (81.437)	lr 0.01499
Train [53][1080/3239]	Time 0.170 (0.614)	Data Time 0.001 (0.034)	Loss 2.7183 (2.6426)	Entropy 1.14776 (1.14886)	Top-1 acc 57.031 (60.462)	Top-5 acc 78.906 (81.419)	lr 0.01499
Train [53][1090/3239]	Time 0.253 (0.613)	Data Time 0.001 (0.034)	Loss 2.6211 (2.6428)	Entropy 1.14770 (1.14885)	Top-1 acc 59.766 (60.452)	Top-5 acc 80.469 (81.409)	lr 0.01499
Train [53][1100/3239]	Time 0.235 (0.612)	Data Time 0.001 (0.034)	Loss 2.6157 (2.6430)	Entropy 1.14765 (1.14884)	Top-1 acc 59.375 (60.445)	Top-5 acc 82.422 (81.404)	lr 0.01499
Train [53][1110/3239]	Time 2.415 (0.610)	Data Time 0.001 (0.033)	Loss 2.6031 (2.6431)	Entropy 1.14765 (1.14883)	Top-1 acc 59.375 (60.433)	Top-5 acc 84.375 (81.410)	lr 0.01499
Train [53][1120/3239]	Time 0.248 (0.607)	Data Time 0.001 (0.033)	Loss 2.5685 (2.6433)	Entropy 1.14756 (1.14882)	Top-1 acc 64.453 (60.440)	Top-5 acc 84.375 (81.411)	lr 0.01499
Train [53][1130/3239]	Time 0.223 (0.606)	Data Time 0.001 (0.033)	Loss 2.7136 (2.6437)	Entropy 1.14754 (1.14881)	Top-1 acc 58.984 (60.428)	Top-5 acc 77.734 (81.396)	lr 0.01499
Train [53][1140/3239]	Time 0.221 (0.605)	Data Time 0.001 (0.032)	Loss 2.9040 (2.6439)	Entropy 1.14754 (1.14880)	Top-1 acc 57.031 (60.429)	Top-5 acc 76.172 (81.390)	lr 0.01499
Train [53][1150/3239]	Time 0.333 (0.603)	Data Time 0.001 (0.032)	Loss 2.7548 (2.6438)	Entropy 1.14753 (1.14879)	Top-1 acc 57.422 (60.427)	Top-5 acc 81.250 (81.394)	lr 0.01499
Train [53][1160/3239]	Time 0.216 (0.602)	Data Time 0.001 (0.032)	Loss 2.7722 (2.6437)	Entropy 1.14754 (1.14877)	Top-1 acc 53.906 (60.428)	Top-5 acc 79.688 (81.396)	lr 0.01498
Train [53][1170/3239]	Time 0.244 (0.601)	Data Time 0.002 (0.032)	Loss 2.5078 (2.6429)	Entropy 1.14750 (1.14876)	Top-1 acc 64.453 (60.449)	Top-5 acc 85.156 (81.414)	lr 0.01498
Train [53][1180/3239]	Time 0.213 (0.600)	Data Time 0.001 (0.031)	Loss 2.7311 (2.6430)	Entropy 1.14743 (1.14875)	Top-1 acc 59.766 (60.446)	Top-5 acc 82.031 (81.418)	lr 0.01498
Train [53][1190/3239]	Time 0.223 (0.599)	Data Time 0.001 (0.031)	Loss 2.6158 (2.6435)	Entropy 1.14741 (1.14874)	Top-1 acc 60.547 (60.435)	Top-5 acc 81.250 (81.408)	lr 0.01498
Train [53][1200/3239]	Time 0.262 (0.598)	Data Time 0.001 (0.031)	Loss 2.6161 (2.6438)	Entropy 1.14738 (1.14873)	Top-1 acc 65.234 (60.432)	Top-5 acc 81.250 (81.402)	lr 0.01498
Train [53][1210/3239]	Time 0.230 (0.597)	Data Time 0.001 (0.031)	Loss 2.4897 (2.6436)	Entropy 1.14737 (1.14872)	Top-1 acc 63.281 (60.437)	Top-5 acc 84.375 (81.405)	lr 0.01498
Train [53][1220/3239]	Time 2.509 (0.595)	Data Time 0.001 (0.030)	Loss 2.6248 (2.6438)	Entropy 1.14737 (1.14871)	Top-1 acc 59.375 (60.439)	Top-5 acc 83.594 (81.402)	lr 0.01498
Train [53][1230/3239]	Time 0.217 (0.592)	Data Time 0.001 (0.030)	Loss 2.6164 (2.6438)	Entropy 1.14736 (1.14870)	Top-1 acc 58.594 (60.435)	Top-5 acc 81.641 (81.406)	lr 0.01498
Train [53][1240/3239]	Time 0.216 (0.633)	Data Time 0.002 (0.030)	Loss 2.5230 (2.6436)	Entropy 1.14736 (1.14869)	Top-1 acc 61.719 (60.447)	Top-5 acc 85.547 (81.413)	lr 0.01498
Train [53][1250/3239]	Time 0.224 (0.631)	Data Time 0.002 (0.030)	Loss 2.7704 (2.6435)	Entropy 1.14734 (1.14868)	Top-1 acc 57.031 (60.448)	Top-5 acc 78.906 (81.417)	lr 0.01498
Train [53][1260/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.029)	Loss 2.6978 (2.6435)	Entropy 1.14730 (1.14866)	Top-1 acc 57.031 (60.435)	Top-5 acc 79.688 (81.419)	lr 0.01497
Train [53][1270/3239]	Time 0.221 (0.629)	Data Time 0.001 (0.029)	Loss 2.5210 (2.6435)	Entropy 1.14720 (1.14865)	Top-1 acc 63.281 (60.434)	Top-5 acc 87.109 (81.418)	lr 0.01497
Train [53][1280/3239]	Time 0.221 (0.627)	Data Time 0.001 (0.029)	Loss 2.7612 (2.6441)	Entropy 1.14717 (1.14864)	Top-1 acc 59.375 (60.417)	Top-5 acc 76.562 (81.402)	lr 0.01497
Train [53][1290/3239]	Time 0.226 (0.626)	Data Time 0.002 (0.029)	Loss 2.6466 (2.6442)	Entropy 1.14715 (1.14863)	Top-1 acc 59.766 (60.420)	Top-5 acc 83.594 (81.402)	lr 0.01497
Train [53][1300/3239]	Time 0.220 (0.625)	Data Time 0.001 (0.029)	Loss 2.5036 (2.6435)	Entropy 1.14713 (1.14862)	Top-1 acc 63.281 (60.438)	Top-5 acc 84.766 (81.414)	lr 0.01497
Train [53][1310/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.028)	Loss 2.7003 (2.6436)	Entropy 1.14706 (1.14861)	Top-1 acc 55.859 (60.428)	Top-5 acc 82.031 (81.407)	lr 0.01497
Train [53][1320/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.028)	Loss 2.6465 (2.6439)	Entropy 1.14704 (1.14860)	Top-1 acc 58.203 (60.418)	Top-5 acc 80.859 (81.402)	lr 0.01497
Train [53][1330/3239]	Time 2.369 (0.621)	Data Time 0.001 (0.028)	Loss 2.8127 (2.6439)	Entropy 1.14704 (1.14858)	Top-1 acc 55.469 (60.413)	Top-5 acc 78.516 (81.408)	lr 0.01497
Train [53][1340/3239]	Time 0.293 (0.619)	Data Time 0.001 (0.028)	Loss 2.6633 (2.6441)	Entropy 1.14696 (1.14857)	Top-1 acc 58.984 (60.404)	Top-5 acc 82.812 (81.405)	lr 0.01497
Train [53][1350/3239]	Time 0.225 (0.617)	Data Time 0.001 (0.028)	Loss 2.5337 (2.6441)	Entropy 1.14692 (1.14856)	Top-1 acc 62.891 (60.401)	Top-5 acc 82.812 (81.408)	lr 0.01497
Train [53][1360/3239]	Time 0.228 (0.616)	Data Time 0.001 (0.027)	Loss 2.8894 (2.6447)	Entropy 1.14700 (1.14855)	Top-1 acc 56.250 (60.397)	Top-5 acc 79.297 (81.398)	lr 0.01496
Train [53][1370/3239]	Time 0.227 (0.615)	Data Time 0.001 (0.027)	Loss 2.5779 (2.6450)	Entropy 1.14698 (1.14854)	Top-1 acc 63.672 (60.391)	Top-5 acc 82.031 (81.390)	lr 0.01496
Train [53][1380/3239]	Time 0.220 (0.614)	Data Time 0.001 (0.027)	Loss 2.7722 (2.6449)	Entropy 1.14697 (1.14853)	Top-1 acc 54.688 (60.384)	Top-5 acc 82.031 (81.395)	lr 0.01496
Train [53][1390/3239]	Time 0.308 (0.613)	Data Time 0.001 (0.027)	Loss 2.7253 (2.6452)	Entropy 1.14691 (1.14851)	Top-1 acc 57.031 (60.377)	Top-5 acc 80.469 (81.392)	lr 0.01496
Train [53][1400/3239]	Time 0.210 (0.612)	Data Time 0.001 (0.027)	Loss 2.7043 (2.6456)	Entropy 1.14684 (1.14850)	Top-1 acc 58.203 (60.371)	Top-5 acc 81.250 (81.380)	lr 0.01496
Train [53][1410/3239]	Time 0.214 (0.611)	Data Time 0.001 (0.026)	Loss 2.5333 (2.6458)	Entropy 1.14680 (1.14849)	Top-1 acc 62.891 (60.363)	Top-5 acc 84.766 (81.373)	lr 0.01496
Train [53][1420/3239]	Time 0.221 (0.610)	Data Time 0.001 (0.026)	Loss 2.7194 (2.6458)	Entropy 1.14676 (1.14848)	Top-1 acc 61.328 (60.365)	Top-5 acc 78.125 (81.371)	lr 0.01496
Train [53][1430/3239]	Time 0.225 (0.609)	Data Time 0.001 (0.026)	Loss 2.6790 (2.6462)	Entropy 1.14673 (1.14847)	Top-1 acc 60.156 (60.361)	Top-5 acc 79.297 (81.364)	lr 0.01496
Train [53][1440/3239]	Time 2.508 (0.608)	Data Time 0.002 (0.026)	Loss 2.8025 (2.6467)	Entropy 1.14673 (1.14845)	Top-1 acc 55.469 (60.347)	Top-5 acc 79.297 (81.353)	lr 0.01496
Train [53][1450/3239]	Time 0.226 (0.605)	Data Time 0.001 (0.026)	Loss 2.5337 (2.6470)	Entropy 1.14672 (1.14844)	Top-1 acc 64.453 (60.342)	Top-5 acc 81.250 (81.345)	lr 0.01496
Train [53][1460/3239]	Time 0.225 (0.604)	Data Time 0.001 (0.026)	Loss 2.5438 (2.6473)	Entropy 1.14671 (1.14843)	Top-1 acc 64.453 (60.333)	Top-5 acc 82.031 (81.336)	lr 0.01495
Train [53][1470/3239]	Time 0.233 (0.603)	Data Time 0.001 (0.025)	Loss 2.6048 (2.6470)	Entropy 1.14662 (1.14842)	Top-1 acc 57.422 (60.336)	Top-5 acc 84.375 (81.343)	lr 0.01495
Train [53][1480/3239]	Time 0.233 (0.602)	Data Time 0.001 (0.025)	Loss 2.6860 (2.6476)	Entropy 1.14662 (1.14841)	Top-1 acc 59.766 (60.325)	Top-5 acc 82.422 (81.333)	lr 0.01495
Train [53][1490/3239]	Time 0.216 (0.601)	Data Time 0.001 (0.025)	Loss 2.8638 (2.6479)	Entropy 1.14659 (1.14839)	Top-1 acc 53.906 (60.313)	Top-5 acc 76.953 (81.326)	lr 0.01495
Train [53][1500/3239]	Time 0.224 (0.600)	Data Time 0.001 (0.025)	Loss 2.4852 (2.6477)	Entropy 1.14658 (1.14838)	Top-1 acc 62.500 (60.313)	Top-5 acc 86.328 (81.333)	lr 0.01495
Train [53][1510/3239]	Time 0.220 (0.600)	Data Time 0.001 (0.025)	Loss 2.4576 (2.6476)	Entropy 1.14656 (1.14837)	Top-1 acc 62.891 (60.310)	Top-5 acc 85.547 (81.334)	lr 0.01495
Train [53][1520/3239]	Time 0.225 (0.599)	Data Time 0.001 (0.025)	Loss 2.7209 (2.6476)	Entropy 1.14657 (1.14836)	Top-1 acc 56.641 (60.308)	Top-5 acc 79.688 (81.332)	lr 0.01495
Train [53][1530/3239]	Time 0.257 (0.598)	Data Time 0.001 (0.025)	Loss 2.9041 (2.6478)	Entropy 1.14647 (1.14835)	Top-1 acc 53.516 (60.299)	Top-5 acc 77.734 (81.332)	lr 0.01495
Train [53][1540/3239]	Time 0.204 (0.597)	Data Time 0.001 (0.024)	Loss 2.6038 (2.6480)	Entropy 1.14641 (1.14833)	Top-1 acc 62.891 (60.291)	Top-5 acc 80.859 (81.325)	lr 0.01495
Train [53][1550/3239]	Time 2.463 (0.596)	Data Time 0.001 (0.024)	Loss 2.6676 (2.6482)	Entropy 1.14641 (1.14832)	Top-1 acc 60.547 (60.291)	Top-5 acc 82.422 (81.323)	lr 0.01495
Train [53][1560/3239]	Time 0.227 (0.593)	Data Time 0.001 (0.024)	Loss 2.5719 (2.6483)	Entropy 1.14639 (1.14831)	Top-1 acc 61.328 (60.285)	Top-5 acc 83.594 (81.324)	lr 0.01494
Train [53][1570/3239]	Time 0.275 (0.593)	Data Time 0.001 (0.024)	Loss 2.5371 (2.6481)	Entropy 1.14637 (1.14830)	Top-1 acc 63.281 (60.288)	Top-5 acc 81.641 (81.326)	lr 0.01494
Train [53][1580/3239]	Time 0.347 (0.592)	Data Time 0.001 (0.024)	Loss 2.6884 (2.6489)	Entropy 1.14635 (1.14829)	Top-1 acc 60.547 (60.279)	Top-5 acc 81.250 (81.316)	lr 0.01494
Train [53][1590/3239]	Time 0.160 (0.591)	Data Time 0.001 (0.024)	Loss 2.8033 (2.6490)	Entropy 1.14634 (1.14827)	Top-1 acc 55.859 (60.274)	Top-5 acc 77.734 (81.311)	lr 0.01494
Train [53][1600/3239]	Time 0.235 (0.621)	Data Time 0.002 (0.024)	Loss 2.7557 (2.6495)	Entropy 1.14631 (1.14826)	Top-1 acc 55.469 (60.258)	Top-5 acc 80.469 (81.307)	lr 0.01494
Train [53][1610/3239]	Time 0.220 (0.620)	Data Time 0.002 (0.023)	Loss 2.6188 (2.6496)	Entropy 1.14633 (1.14825)	Top-1 acc 57.812 (60.247)	Top-5 acc 81.641 (81.296)	lr 0.01494
Train [53][1620/3239]	Time 0.222 (0.619)	Data Time 0.002 (0.023)	Loss 2.6310 (2.6497)	Entropy 1.14627 (1.14824)	Top-1 acc 65.625 (60.251)	Top-5 acc 82.031 (81.294)	lr 0.01494
Train [53][1630/3239]	Time 0.316 (0.618)	Data Time 0.001 (0.023)	Loss 2.5538 (2.6493)	Entropy 1.14627 (1.14822)	Top-1 acc 64.453 (60.256)	Top-5 acc 84.766 (81.303)	lr 0.01494
Train [53][1640/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.023)	Loss 2.6005 (2.6495)	Entropy 1.14625 (1.14821)	Top-1 acc 61.328 (60.257)	Top-5 acc 82.812 (81.299)	lr 0.01494
Train [53][1650/3239]	Time 0.280 (0.617)	Data Time 0.001 (0.023)	Loss 2.7487 (2.6496)	Entropy 1.14623 (1.14820)	Top-1 acc 57.812 (60.255)	Top-5 acc 80.859 (81.297)	lr 0.01494
Train [53][1660/3239]	Time 2.569 (0.616)	Data Time 0.001 (0.023)	Loss 2.6084 (2.6496)	Entropy 1.14623 (1.14819)	Top-1 acc 61.328 (60.263)	Top-5 acc 83.594 (81.296)	lr 0.01493
Train [53][1670/3239]	Time 0.235 (0.613)	Data Time 0.001 (0.023)	Loss 2.6235 (2.6493)	Entropy 1.14624 (1.14818)	Top-1 acc 60.156 (60.269)	Top-5 acc 81.250 (81.299)	lr 0.01493
Train [53][1680/3239]	Time 0.315 (0.613)	Data Time 0.001 (0.023)	Loss 2.4835 (2.6493)	Entropy 1.14620 (1.14817)	Top-1 acc 67.578 (60.268)	Top-5 acc 83.594 (81.298)	lr 0.01493
Train [53][1690/3239]	Time 0.227 (0.612)	Data Time 0.001 (0.022)	Loss 2.6032 (2.6495)	Entropy 1.14614 (1.14815)	Top-1 acc 59.375 (60.262)	Top-5 acc 82.422 (81.293)	lr 0.01493
Train [53][1700/3239]	Time 0.214 (0.611)	Data Time 0.001 (0.022)	Loss 2.6806 (2.6496)	Entropy 1.14615 (1.14814)	Top-1 acc 58.203 (60.252)	Top-5 acc 80.469 (81.288)	lr 0.01493
Train [53][1710/3239]	Time 0.221 (0.610)	Data Time 0.001 (0.022)	Loss 2.6303 (2.6493)	Entropy 1.14608 (1.14813)	Top-1 acc 61.328 (60.260)	Top-5 acc 83.594 (81.298)	lr 0.01493
Train [53][1720/3239]	Time 0.200 (0.609)	Data Time 0.001 (0.022)	Loss 2.5875 (2.6490)	Entropy 1.14610 (1.14812)	Top-1 acc 59.766 (60.266)	Top-5 acc 80.469 (81.302)	lr 0.01493
Train [53][1730/3239]	Time 0.224 (0.608)	Data Time 0.001 (0.022)	Loss 2.7549 (2.6492)	Entropy 1.14609 (1.14811)	Top-1 acc 59.375 (60.261)	Top-5 acc 78.125 (81.298)	lr 0.01493
Train [53][1740/3239]	Time 0.268 (0.607)	Data Time 0.002 (0.022)	Loss 2.7431 (2.6491)	Entropy 1.14601 (1.14809)	Top-1 acc 61.719 (60.266)	Top-5 acc 78.516 (81.299)	lr 0.01493
Train [53][1750/3239]	Time 0.209 (0.606)	Data Time 0.001 (0.022)	Loss 2.8016 (2.6490)	Entropy 1.14597 (1.14808)	Top-1 acc 57.031 (60.262)	Top-5 acc 76.172 (81.300)	lr 0.01493
Train [53][1760/3239]	Time 0.229 (0.606)	Data Time 0.001 (0.022)	Loss 2.6817 (2.6489)	Entropy 1.14592 (1.14807)	Top-1 acc 62.500 (60.263)	Top-5 acc 81.250 (81.301)	lr 0.01492
Train [53][1770/3239]	Time 2.396 (0.605)	Data Time 0.001 (0.021)	Loss 2.5961 (2.6492)	Entropy 1.14592 (1.14806)	Top-1 acc 62.109 (60.262)	Top-5 acc 80.859 (81.294)	lr 0.01492
Train [53][1780/3239]	Time 0.212 (0.603)	Data Time 0.001 (0.021)	Loss 2.7942 (2.6493)	Entropy 1.14588 (1.14805)	Top-1 acc 59.766 (60.260)	Top-5 acc 75.391 (81.288)	lr 0.01492
Train [53][1790/3239]	Time 0.219 (0.602)	Data Time 0.001 (0.021)	Loss 2.6866 (2.6493)	Entropy 1.14584 (1.14803)	Top-1 acc 59.375 (60.262)	Top-5 acc 80.078 (81.287)	lr 0.01492
Train [53][1800/3239]	Time 0.253 (0.601)	Data Time 0.002 (0.021)	Loss 2.6995 (2.6493)	Entropy 1.14579 (1.14802)	Top-1 acc 57.812 (60.258)	Top-5 acc 80.469 (81.286)	lr 0.01492
Train [53][1810/3239]	Time 0.232 (0.601)	Data Time 0.001 (0.021)	Loss 2.6282 (2.6493)	Entropy 1.14578 (1.14801)	Top-1 acc 61.328 (60.257)	Top-5 acc 79.688 (81.285)	lr 0.01492
Train [53][1820/3239]	Time 0.262 (0.600)	Data Time 0.001 (0.021)	Loss 2.5095 (2.6492)	Entropy 1.14577 (1.14800)	Top-1 acc 61.719 (60.256)	Top-5 acc 83.594 (81.286)	lr 0.01492
Train [53][1830/3239]	Time 0.209 (0.599)	Data Time 0.001 (0.021)	Loss 2.6529 (2.6494)	Entropy 1.14571 (1.14798)	Top-1 acc 61.719 (60.256)	Top-5 acc 82.422 (81.281)	lr 0.01492
Train [53][1840/3239]	Time 0.211 (0.598)	Data Time 0.001 (0.021)	Loss 2.7816 (2.6496)	Entropy 1.14569 (1.14797)	Top-1 acc 56.641 (60.250)	Top-5 acc 76.953 (81.278)	lr 0.01492
Train [53][1850/3239]	Time 0.235 (0.597)	Data Time 0.001 (0.021)	Loss 2.8029 (2.6499)	Entropy 1.14572 (1.14796)	Top-1 acc 56.641 (60.244)	Top-5 acc 78.906 (81.270)	lr 0.01492
Train [53][1860/3239]	Time 0.243 (0.597)	Data Time 0.001 (0.020)	Loss 2.7141 (2.6500)	Entropy 1.14569 (1.14795)	Top-1 acc 58.984 (60.245)	Top-5 acc 79.297 (81.267)	lr 0.01491
Train [53][1870/3239]	Time 0.340 (0.596)	Data Time 0.001 (0.020)	Loss 2.6241 (2.6500)	Entropy 1.14571 (1.14794)	Top-1 acc 57.812 (60.242)	Top-5 acc 80.469 (81.269)	lr 0.01491
Train [53][1880/3239]	Time 2.455 (0.595)	Data Time 0.001 (0.020)	Loss 2.7143 (2.6500)	Entropy 1.14571 (1.14792)	Top-1 acc 59.766 (60.247)	Top-5 acc 78.906 (81.272)	lr 0.01491
Train [53][1890/3239]	Time 0.254 (0.593)	Data Time 0.001 (0.020)	Loss 2.9276 (2.6500)	Entropy 1.14573 (1.14791)	Top-1 acc 51.953 (60.238)	Top-5 acc 74.609 (81.266)	lr 0.01491
Train [53][1900/3239]	Time 0.237 (0.593)	Data Time 0.001 (0.020)	Loss 2.6643 (2.6501)	Entropy 1.14568 (1.14790)	Top-1 acc 59.766 (60.237)	Top-5 acc 82.031 (81.265)	lr 0.01491
Train [53][1910/3239]	Time 0.254 (0.592)	Data Time 0.001 (0.020)	Loss 2.5586 (2.6501)	Entropy 1.14566 (1.14789)	Top-1 acc 63.672 (60.238)	Top-5 acc 83.984 (81.265)	lr 0.01491
Train [53][1920/3239]	Time 0.222 (0.592)	Data Time 0.001 (0.020)	Loss 2.6494 (2.6500)	Entropy 1.14560 (1.14788)	Top-1 acc 60.156 (60.240)	Top-5 acc 80.078 (81.266)	lr 0.01491
Train [53][1930/3239]	Time 0.229 (0.591)	Data Time 0.001 (0.020)	Loss 2.6966 (2.6503)	Entropy 1.14546 (1.14787)	Top-1 acc 58.594 (60.231)	Top-5 acc 79.297 (81.261)	lr 0.01491
Train [53][1940/3239]	Time 0.217 (0.590)	Data Time 0.001 (0.020)	Loss 2.5806 (2.6502)	Entropy 1.14541 (1.14785)	Top-1 acc 62.500 (60.235)	Top-5 acc 80.859 (81.261)	lr 0.01491
Train [53][1950/3239]	Time 0.222 (0.589)	Data Time 0.001 (0.020)	Loss 2.4292 (2.6502)	Entropy 1.14536 (1.14784)	Top-1 acc 67.969 (60.239)	Top-5 acc 85.938 (81.261)	lr 0.01491
Train [53][1960/3239]	Time 0.442 (0.613)	Data Time 0.002 (0.020)	Loss 2.6044 (2.6502)	Entropy 1.14538 (1.14783)	Top-1 acc 58.203 (60.235)	Top-5 acc 84.766 (81.262)	lr 0.01490
Train [53][1970/3239]	Time 0.240 (0.612)	Data Time 0.002 (0.019)	Loss 2.4513 (2.6500)	Entropy 1.14536 (1.14781)	Top-1 acc 64.453 (60.236)	Top-5 acc 82.031 (81.265)	lr 0.01490
Train [53][1980/3239]	Time 0.234 (0.612)	Data Time 0.001 (0.019)	Loss 2.7200 (2.6501)	Entropy 1.14536 (1.14780)	Top-1 acc 59.375 (60.229)	Top-5 acc 79.297 (81.267)	lr 0.01490
Train [53][1990/3239]	Time 2.352 (0.611)	Data Time 0.001 (0.019)	Loss 2.5136 (2.6499)	Entropy 1.14536 (1.14779)	Top-1 acc 62.500 (60.234)	Top-5 acc 82.812 (81.272)	lr 0.01490
Train [53][2000/3239]	Time 0.212 (0.609)	Data Time 0.001 (0.019)	Loss 2.9061 (2.6501)	Entropy 1.14533 (1.14778)	Top-1 acc 53.125 (60.227)	Top-5 acc 76.953 (81.269)	lr 0.01490
Train [53][2010/3239]	Time 0.219 (0.608)	Data Time 0.001 (0.019)	Loss 2.4065 (2.6501)	Entropy 1.14528 (1.14777)	Top-1 acc 66.016 (60.223)	Top-5 acc 86.719 (81.272)	lr 0.01490
Train [53][2020/3239]	Time 0.225 (0.608)	Data Time 0.001 (0.019)	Loss 2.6472 (2.6501)	Entropy 1.14525 (1.14775)	Top-1 acc 58.984 (60.222)	Top-5 acc 80.469 (81.272)	lr 0.01490
Train [53][2030/3239]	Time 0.227 (0.607)	Data Time 0.002 (0.019)	Loss 2.8219 (2.6504)	Entropy 1.14527 (1.14774)	Top-1 acc 55.078 (60.217)	Top-5 acc 76.953 (81.269)	lr 0.01490
Train [53][2040/3239]	Time 0.239 (0.606)	Data Time 0.001 (0.019)	Loss 2.4630 (2.6504)	Entropy 1.14521 (1.14773)	Top-1 acc 62.500 (60.215)	Top-5 acc 86.328 (81.266)	lr 0.01490
Train [53][2050/3239]	Time 0.209 (0.606)	Data Time 0.001 (0.019)	Loss 2.8055 (2.6507)	Entropy 1.14519 (1.14772)	Top-1 acc 56.250 (60.211)	Top-5 acc 78.125 (81.261)	lr 0.01490
Train [53][2060/3239]	Time 0.318 (0.605)	Data Time 0.001 (0.019)	Loss 2.7399 (2.6508)	Entropy 1.14520 (1.14770)	Top-1 acc 61.719 (60.210)	Top-5 acc 78.516 (81.259)	lr 0.01490
Train [53][2070/3239]	Time 0.227 (0.604)	Data Time 0.001 (0.019)	Loss 2.5819 (2.6510)	Entropy 1.14518 (1.14769)	Top-1 acc 62.109 (60.208)	Top-5 acc 83.984 (81.253)	lr 0.01489
Train [53][2080/3239]	Time 0.208 (0.604)	Data Time 0.001 (0.019)	Loss 2.6796 (2.6511)	Entropy 1.14515 (1.14768)	Top-1 acc 56.250 (60.206)	Top-5 acc 83.594 (81.252)	lr 0.01489
Train [53][2090/3239]	Time 0.251 (0.603)	Data Time 0.001 (0.018)	Loss 2.6636 (2.6509)	Entropy 1.14510 (1.14767)	Top-1 acc 58.594 (60.207)	Top-5 acc 82.812 (81.256)	lr 0.01489
Train [53][2100/3239]	Time 2.584 (0.602)	Data Time 0.001 (0.018)	Loss 2.7138 (2.6511)	Entropy 1.14510 (1.14766)	Top-1 acc 60.156 (60.199)	Top-5 acc 79.688 (81.252)	lr 0.01489
Train [53][2110/3239]	Time 0.215 (0.601)	Data Time 0.001 (0.018)	Loss 2.7858 (2.6514)	Entropy 1.14510 (1.14764)	Top-1 acc 55.469 (60.190)	Top-5 acc 75.781 (81.243)	lr 0.01489
Train [53][2120/3239]	Time 0.236 (0.600)	Data Time 0.002 (0.018)	Loss 2.6916 (2.6514)	Entropy 1.14507 (1.14763)	Top-1 acc 60.547 (60.187)	Top-5 acc 80.469 (81.244)	lr 0.01489
Train [53][2130/3239]	Time 0.215 (0.599)	Data Time 0.001 (0.018)	Loss 2.5811 (2.6516)	Entropy 1.14508 (1.14762)	Top-1 acc 62.500 (60.183)	Top-5 acc 83.203 (81.241)	lr 0.01489
Train [53][2140/3239]	Time 0.256 (0.599)	Data Time 0.001 (0.018)	Loss 2.7013 (2.6515)	Entropy 1.14507 (1.14761)	Top-1 acc 57.422 (60.185)	Top-5 acc 78.906 (81.241)	lr 0.01489
Train [53][2150/3239]	Time 0.210 (0.598)	Data Time 0.001 (0.018)	Loss 2.7595 (2.6516)	Entropy 1.14505 (1.14760)	Top-1 acc 55.078 (60.184)	Top-5 acc 79.688 (81.235)	lr 0.01489
Train [53][2160/3239]	Time 0.227 (0.598)	Data Time 0.001 (0.018)	Loss 2.6269 (2.6518)	Entropy 1.14506 (1.14758)	Top-1 acc 60.938 (60.179)	Top-5 acc 83.594 (81.233)	lr 0.01489
Train [53][2170/3239]	Time 0.228 (0.597)	Data Time 0.001 (0.018)	Loss 2.5497 (2.6518)	Entropy 1.14505 (1.14757)	Top-1 acc 64.453 (60.179)	Top-5 acc 84.375 (81.232)	lr 0.01488
Train [53][2180/3239]	Time 0.221 (0.596)	Data Time 0.001 (0.018)	Loss 2.6951 (2.6519)	Entropy 1.14505 (1.14756)	Top-1 acc 60.156 (60.176)	Top-5 acc 78.906 (81.228)	lr 0.01488
Train [53][2190/3239]	Time 0.214 (0.596)	Data Time 0.001 (0.018)	Loss 2.7280 (2.6518)	Entropy 1.14499 (1.14755)	Top-1 acc 51.172 (60.174)	Top-5 acc 80.469 (81.232)	lr 0.01488
Train [53][2200/3239]	Time 0.289 (0.595)	Data Time 0.001 (0.018)	Loss 2.6536 (2.6518)	Entropy 1.14497 (1.14754)	Top-1 acc 62.500 (60.176)	Top-5 acc 83.984 (81.232)	lr 0.01488
Train [53][2210/3239]	Time 2.572 (0.595)	Data Time 0.001 (0.018)	Loss 2.5088 (2.6518)	Entropy 1.14497 (1.14753)	Top-1 acc 60.938 (60.175)	Top-5 acc 82.031 (81.233)	lr 0.01488
Train [53][2220/3239]	Time 0.243 (0.593)	Data Time 0.002 (0.017)	Loss 2.4340 (2.6519)	Entropy 1.14492 (1.14751)	Top-1 acc 66.406 (60.178)	Top-5 acc 84.766 (81.230)	lr 0.01488
Train [53][2230/3239]	Time 0.231 (0.593)	Data Time 0.001 (0.017)	Loss 2.6231 (2.6521)	Entropy 1.14486 (1.14750)	Top-1 acc 59.375 (60.170)	Top-5 acc 82.812 (81.227)	lr 0.01488
Train [53][2240/3239]	Time 0.214 (0.592)	Data Time 0.001 (0.017)	Loss 2.6686 (2.6525)	Entropy 1.14487 (1.14749)	Top-1 acc 59.766 (60.158)	Top-5 acc 82.422 (81.219)	lr 0.01488
Train [53][2250/3239]	Time 0.308 (0.591)	Data Time 0.001 (0.017)	Loss 2.6142 (2.6524)	Entropy 1.14485 (1.14748)	Top-1 acc 62.891 (60.163)	Top-5 acc 81.250 (81.217)	lr 0.01488
Train [53][2260/3239]	Time 0.214 (0.591)	Data Time 0.001 (0.017)	Loss 2.6720 (2.6527)	Entropy 1.14483 (1.14747)	Top-1 acc 58.984 (60.157)	Top-5 acc 81.250 (81.213)	lr 0.01488
Train [53][2270/3239]	Time 0.225 (0.590)	Data Time 0.002 (0.017)	Loss 2.6220 (2.6526)	Entropy 1.14481 (1.14746)	Top-1 acc 60.938 (60.161)	Top-5 acc 82.031 (81.215)	lr 0.01487
Train [53][2280/3239]	Time 0.227 (0.590)	Data Time 0.001 (0.017)	Loss 2.7152 (2.6526)	Entropy 1.14480 (1.14744)	Top-1 acc 59.375 (60.161)	Top-5 acc 83.203 (81.213)	lr 0.01487
Train [53][2290/3239]	Time 0.314 (0.589)	Data Time 0.002 (0.017)	Loss 2.6243 (2.6525)	Entropy 1.14474 (1.14743)	Top-1 acc 60.938 (60.164)	Top-5 acc 81.641 (81.215)	lr 0.01487
Train [53][2300/3239]	Time 0.227 (0.589)	Data Time 0.001 (0.017)	Loss 2.6916 (2.6527)	Entropy 1.14471 (1.14742)	Top-1 acc 58.984 (60.157)	Top-5 acc 82.422 (81.215)	lr 0.01487
Train [53][2310/3239]	Time 0.209 (0.588)	Data Time 0.001 (0.017)	Loss 2.6317 (2.6526)	Entropy 1.14470 (1.14741)	Top-1 acc 60.938 (60.159)	Top-5 acc 82.031 (81.216)	lr 0.01487
Train [53][2320/3239]	Time 53.956 (0.610)	Data Time 0.001 (0.017)	Loss 2.6778 (2.6526)	Entropy 1.14470 (1.14740)	Top-1 acc 61.719 (60.161)	Top-5 acc 80.469 (81.220)	lr 0.01487
Train [53][2330/3239]	Time 0.297 (0.608)	Data Time 0.003 (0.017)	Loss 2.5274 (2.6524)	Entropy 1.14470 (1.14739)	Top-1 acc 59.766 (60.168)	Top-5 acc 83.594 (81.224)	lr 0.01487
Train [53][2340/3239]	Time 0.274 (0.608)	Data Time 0.002 (0.017)	Loss 2.8070 (2.6522)	Entropy 1.14461 (1.14737)	Top-1 acc 52.344 (60.175)	Top-5 acc 80.078 (81.226)	lr 0.01487
Train [53][2350/3239]	Time 0.234 (0.607)	Data Time 0.001 (0.017)	Loss 2.5974 (2.6522)	Entropy 1.14451 (1.14736)	Top-1 acc 60.938 (60.174)	Top-5 acc 85.547 (81.228)	lr 0.01487
Train [53][2360/3239]	Time 0.220 (0.607)	Data Time 0.001 (0.017)	Loss 2.5330 (2.6522)	Entropy 1.14448 (1.14735)	Top-1 acc 60.156 (60.174)	Top-5 acc 83.594 (81.228)	lr 0.01487
Train [53][2370/3239]	Time 0.229 (0.606)	Data Time 0.001 (0.016)	Loss 2.6759 (2.6524)	Entropy 1.14447 (1.14734)	Top-1 acc 58.594 (60.169)	Top-5 acc 77.734 (81.224)	lr 0.01486
Train [53][2380/3239]	Time 0.227 (0.606)	Data Time 0.002 (0.016)	Loss 2.6723 (2.6523)	Entropy 1.14444 (1.14733)	Top-1 acc 59.766 (60.166)	Top-5 acc 81.250 (81.224)	lr 0.01486
Train [53][2390/3239]	Time 0.232 (0.605)	Data Time 0.001 (0.016)	Loss 2.7474 (2.6524)	Entropy 1.14438 (1.14731)	Top-1 acc 57.422 (60.163)	Top-5 acc 79.688 (81.225)	lr 0.01486
Train [53][2400/3239]	Time 0.209 (0.605)	Data Time 0.001 (0.016)	Loss 2.6018 (2.6524)	Entropy 1.14434 (1.14730)	Top-1 acc 62.891 (60.163)	Top-5 acc 82.422 (81.225)	lr 0.01486
Train [53][2410/3239]	Time 0.236 (0.604)	Data Time 0.001 (0.016)	Loss 2.6856 (2.6523)	Entropy 1.14428 (1.14729)	Top-1 acc 61.719 (60.159)	Top-5 acc 80.859 (81.227)	lr 0.01486
Train [53][2420/3239]	Time 0.234 (0.603)	Data Time 0.002 (0.016)	Loss 2.6722 (2.6523)	Entropy 1.14431 (1.14728)	Top-1 acc 58.984 (60.164)	Top-5 acc 81.250 (81.228)	lr 0.01486
Train [53][2430/3239]	Time 2.554 (0.603)	Data Time 0.002 (0.016)	Loss 2.7667 (2.6525)	Entropy 1.14431 (1.14726)	Top-1 acc 58.203 (60.159)	Top-5 acc 78.516 (81.223)	lr 0.01486
Train [53][2440/3239]	Time 0.267 (0.601)	Data Time 0.002 (0.016)	Loss 2.5274 (2.6524)	Entropy 1.14431 (1.14725)	Top-1 acc 66.406 (60.168)	Top-5 acc 83.203 (81.225)	lr 0.01486
Train [53][2450/3239]	Time 0.232 (0.601)	Data Time 0.001 (0.016)	Loss 2.4865 (2.6521)	Entropy 1.14424 (1.14724)	Top-1 acc 64.062 (60.175)	Top-5 acc 83.594 (81.228)	lr 0.01486
Train [53][2460/3239]	Time 0.263 (0.600)	Data Time 0.002 (0.016)	Loss 2.8200 (2.6523)	Entropy 1.14425 (1.14723)	Top-1 acc 55.469 (60.171)	Top-5 acc 77.344 (81.230)	lr 0.01486
Train [53][2470/3239]	Time 0.232 (0.600)	Data Time 0.001 (0.016)	Loss 2.5409 (2.6523)	Entropy 1.14417 (1.14722)	Top-1 acc 61.328 (60.171)	Top-5 acc 82.422 (81.231)	lr 0.01485
Train [53][2480/3239]	Time 0.343 (0.599)	Data Time 0.001 (0.016)	Loss 2.8249 (2.6525)	Entropy 1.14417 (1.14720)	Top-1 acc 52.344 (60.164)	Top-5 acc 80.078 (81.228)	lr 0.01485
Train [53][2490/3239]	Time 0.233 (0.599)	Data Time 0.001 (0.016)	Loss 2.6593 (2.6525)	Entropy 1.14415 (1.14719)	Top-1 acc 59.766 (60.162)	Top-5 acc 81.250 (81.228)	lr 0.01485
Train [53][2500/3239]	Time 0.203 (0.598)	Data Time 0.001 (0.016)	Loss 2.7067 (2.6526)	Entropy 1.14413 (1.14718)	Top-1 acc 58.203 (60.159)	Top-5 acc 80.078 (81.228)	lr 0.01485
Train [53][2510/3239]	Time 0.219 (0.598)	Data Time 0.001 (0.016)	Loss 2.6234 (2.6527)	Entropy 1.14405 (1.14717)	Top-1 acc 63.281 (60.158)	Top-5 acc 83.594 (81.223)	lr 0.01485
Train [53][2520/3239]	Time 0.215 (0.597)	Data Time 0.001 (0.016)	Loss 2.7076 (2.6531)	Entropy 1.14405 (1.14715)	Top-1 acc 59.766 (60.149)	Top-5 acc 77.734 (81.215)	lr 0.01485
Train [53][2530/3239]	Time 0.233 (0.597)	Data Time 0.001 (0.016)	Loss 2.4768 (2.6529)	Entropy 1.14403 (1.14714)	Top-1 acc 62.109 (60.151)	Top-5 acc 85.547 (81.221)	lr 0.01485
Train [53][2540/3239]	Time 2.541 (0.596)	Data Time 0.001 (0.015)	Loss 2.7770 (2.6530)	Entropy 1.14403 (1.14713)	Top-1 acc 58.984 (60.149)	Top-5 acc 80.859 (81.220)	lr 0.01485
Train [53][2550/3239]	Time 0.279 (0.595)	Data Time 0.003 (0.015)	Loss 2.6577 (2.6530)	Entropy 1.14403 (1.14712)	Top-1 acc 55.469 (60.146)	Top-5 acc 83.594 (81.223)	lr 0.01485
Train [53][2560/3239]	Time 0.224 (0.594)	Data Time 0.001 (0.015)	Loss 2.5387 (2.6531)	Entropy 1.14402 (1.14711)	Top-1 acc 63.672 (60.145)	Top-5 acc 83.984 (81.223)	lr 0.01485
Train [53][2570/3239]	Time 0.235 (0.594)	Data Time 0.001 (0.015)	Loss 2.7446 (2.6537)	Entropy 1.14396 (1.14709)	Top-1 acc 59.766 (60.132)	Top-5 acc 81.250 (81.211)	lr 0.01484
Train [53][2580/3239]	Time 0.212 (0.593)	Data Time 0.001 (0.015)	Loss 2.8518 (2.6538)	Entropy 1.14396 (1.14708)	Top-1 acc 55.859 (60.127)	Top-5 acc 77.344 (81.208)	lr 0.01484
Train [53][2590/3239]	Time 0.215 (0.593)	Data Time 0.001 (0.015)	Loss 2.5798 (2.6538)	Entropy 1.14392 (1.14707)	Top-1 acc 65.625 (60.126)	Top-5 acc 82.031 (81.208)	lr 0.01484
Train [53][2600/3239]	Time 0.226 (0.592)	Data Time 0.001 (0.015)	Loss 2.6316 (2.6539)	Entropy 1.14391 (1.14706)	Top-1 acc 62.109 (60.125)	Top-5 acc 80.078 (81.204)	lr 0.01484
Train [53][2610/3239]	Time 0.222 (0.592)	Data Time 0.002 (0.015)	Loss 2.4665 (2.6538)	Entropy 1.14386 (1.14704)	Top-1 acc 64.453 (60.126)	Top-5 acc 86.328 (81.206)	lr 0.01484
Train [53][2620/3239]	Time 0.254 (0.591)	Data Time 0.001 (0.015)	Loss 2.6702 (2.6538)	Entropy 1.14383 (1.14703)	Top-1 acc 58.203 (60.121)	Top-5 acc 82.422 (81.206)	lr 0.01484
Train [53][2630/3239]	Time 0.210 (0.591)	Data Time 0.001 (0.015)	Loss 2.8487 (2.6538)	Entropy 1.14382 (1.14702)	Top-1 acc 57.031 (60.124)	Top-5 acc 76.172 (81.205)	lr 0.01484
Train [53][2640/3239]	Time 0.247 (0.590)	Data Time 0.001 (0.015)	Loss 2.6134 (2.6541)	Entropy 1.14377 (1.14701)	Top-1 acc 58.984 (60.121)	Top-5 acc 81.641 (81.201)	lr 0.01484
Train [53][2650/3239]	Time 0.221 (0.590)	Data Time 0.001 (0.015)	Loss 2.4643 (2.6539)	Entropy 1.14377 (1.14700)	Top-1 acc 67.969 (60.122)	Top-5 acc 84.766 (81.204)	lr 0.01484
Train [53][2660/3239]	Time 0.256 (0.589)	Data Time 0.001 (0.015)	Loss 2.6563 (2.6541)	Entropy 1.14375 (1.14698)	Top-1 acc 59.375 (60.117)	Top-5 acc 83.984 (81.201)	lr 0.01484
Train [53][2670/3239]	Time 0.323 (0.589)	Data Time 0.001 (0.015)	Loss 2.6662 (2.6546)	Entropy 1.14369 (1.14697)	Top-1 acc 61.328 (60.109)	Top-5 acc 82.031 (81.195)	lr 0.01483
Train [53][2680/3239]	Time 0.291 (0.605)	Data Time 0.004 (0.015)	Loss 2.7328 (2.6546)	Entropy 1.14364 (1.14696)	Top-1 acc 57.031 (60.108)	Top-5 acc 80.469 (81.194)	lr 0.01483
Train [53][2690/3239]	Time 0.240 (0.605)	Data Time 0.002 (0.015)	Loss 2.7516 (2.6546)	Entropy 1.14364 (1.14695)	Top-1 acc 57.031 (60.106)	Top-5 acc 80.078 (81.192)	lr 0.01483
Train [53][2700/3239]	Time 0.223 (0.605)	Data Time 0.001 (0.015)	Loss 2.7007 (2.6545)	Entropy 1.14362 (1.14693)	Top-1 acc 59.375 (60.105)	Top-5 acc 78.516 (81.193)	lr 0.01483
Train [53][2710/3239]	Time 0.218 (0.604)	Data Time 0.001 (0.015)	Loss 2.7179 (2.6543)	Entropy 1.14364 (1.14692)	Top-1 acc 60.156 (60.111)	Top-5 acc 82.031 (81.196)	lr 0.01483
Train [53][2720/3239]	Time 0.263 (0.604)	Data Time 0.001 (0.015)	Loss 2.7493 (2.6544)	Entropy 1.14363 (1.14691)	Top-1 acc 58.984 (60.108)	Top-5 acc 78.516 (81.192)	lr 0.01483
Train [53][2730/3239]	Time 0.214 (0.603)	Data Time 0.001 (0.015)	Loss 2.6362 (2.6546)	Entropy 1.14360 (1.14690)	Top-1 acc 58.594 (60.103)	Top-5 acc 81.641 (81.186)	lr 0.01483
Train [53][2740/3239]	Time 0.231 (0.603)	Data Time 0.002 (0.014)	Loss 2.5952 (2.6545)	Entropy 1.14360 (1.14689)	Top-1 acc 63.672 (60.106)	Top-5 acc 82.422 (81.190)	lr 0.01483
Train [53][2750/3239]	Time 0.230 (0.602)	Data Time 0.001 (0.014)	Loss 2.6513 (2.6542)	Entropy 1.14359 (1.14687)	Top-1 acc 61.328 (60.109)	Top-5 acc 82.031 (81.192)	lr 0.01483
Train [53][2760/3239]	Time 0.230 (0.602)	Data Time 0.001 (0.014)	Loss 2.7078 (2.6542)	Entropy 1.14354 (1.14686)	Top-1 acc 54.297 (60.110)	Top-5 acc 81.641 (81.193)	lr 0.01483
Train [53][2770/3239]	Time 0.235 (0.601)	Data Time 0.001 (0.014)	Loss 2.5929 (2.6541)	Entropy 1.14349 (1.14685)	Top-1 acc 63.281 (60.114)	Top-5 acc 82.812 (81.194)	lr 0.01482
Train [53][2780/3239]	Time 0.267 (0.601)	Data Time 0.001 (0.014)	Loss 2.7150 (2.6542)	Entropy 1.14352 (1.14684)	Top-1 acc 60.938 (60.114)	Top-5 acc 78.516 (81.191)	lr 0.01482
Train [53][2790/3239]	Time 0.247 (0.600)	Data Time 0.001 (0.014)	Loss 2.5429 (2.6541)	Entropy 1.14352 (1.14683)	Top-1 acc 61.719 (60.116)	Top-5 acc 83.203 (81.191)	lr 0.01482
Train [53][2800/3239]	Time 0.238 (0.600)	Data Time 0.001 (0.014)	Loss 2.7392 (2.6542)	Entropy 1.14352 (1.14681)	Top-1 acc 58.984 (60.113)	Top-5 acc 80.469 (81.191)	lr 0.01482
Train [53][2810/3239]	Time 0.247 (0.599)	Data Time 0.001 (0.014)	Loss 2.7189 (2.6542)	Entropy 1.14350 (1.14680)	Top-1 acc 60.547 (60.114)	Top-5 acc 78.906 (81.189)	lr 0.01482
Train [53][2820/3239]	Time 0.226 (0.599)	Data Time 0.001 (0.014)	Loss 2.8089 (2.6542)	Entropy 1.14348 (1.14679)	Top-1 acc 57.422 (60.116)	Top-5 acc 79.688 (81.189)	lr 0.01482
Train [53][2830/3239]	Time 0.247 (0.598)	Data Time 0.001 (0.014)	Loss 2.6714 (2.6541)	Entropy 1.14340 (1.14678)	Top-1 acc 61.328 (60.117)	Top-5 acc 81.250 (81.189)	lr 0.01482
Train [53][2840/3239]	Time 0.237 (0.598)	Data Time 0.001 (0.014)	Loss 2.6903 (2.6542)	Entropy 1.14335 (1.14677)	Top-1 acc 58.984 (60.116)	Top-5 acc 81.250 (81.188)	lr 0.01482
Train [53][2850/3239]	Time 0.239 (0.597)	Data Time 0.001 (0.014)	Loss 2.7131 (2.6543)	Entropy 1.14335 (1.14676)	Top-1 acc 57.422 (60.115)	Top-5 acc 80.469 (81.185)	lr 0.01482
Train [53][2860/3239]	Time 0.322 (0.597)	Data Time 0.001 (0.014)	Loss 2.8788 (2.6544)	Entropy 1.14334 (1.14674)	Top-1 acc 57.031 (60.115)	Top-5 acc 76.953 (81.182)	lr 0.01482
Train [53][2870/3239]	Time 0.208 (0.596)	Data Time 0.001 (0.014)	Loss 2.6343 (2.6544)	Entropy 1.14333 (1.14673)	Top-1 acc 63.672 (60.119)	Top-5 acc 80.859 (81.185)	lr 0.01481
Train [53][2880/3239]	Time 0.277 (0.596)	Data Time 0.001 (0.014)	Loss 2.5809 (2.6546)	Entropy 1.14327 (1.14672)	Top-1 acc 61.719 (60.116)	Top-5 acc 82.031 (81.181)	lr 0.01481
Train [53][2890/3239]	Time 0.250 (0.595)	Data Time 0.001 (0.014)	Loss 2.5653 (2.6546)	Entropy 1.14324 (1.14671)	Top-1 acc 62.109 (60.119)	Top-5 acc 84.375 (81.181)	lr 0.01481
Train [53][2900/3239]	Time 0.257 (0.595)	Data Time 0.001 (0.014)	Loss 2.6411 (2.6547)	Entropy 1.14320 (1.14670)	Top-1 acc 62.109 (60.116)	Top-5 acc 81.641 (81.179)	lr 0.01481
Train [53][2910/3239]	Time 0.331 (0.594)	Data Time 0.001 (0.014)	Loss 2.4672 (2.6550)	Entropy 1.14310 (1.14668)	Top-1 acc 67.578 (60.112)	Top-5 acc 83.594 (81.171)	lr 0.01481
Train [53][2920/3239]	Time 0.262 (0.594)	Data Time 0.001 (0.014)	Loss 2.5569 (2.6550)	Entropy 1.14310 (1.14667)	Top-1 acc 64.062 (60.111)	Top-5 acc 84.766 (81.170)	lr 0.01481
Train [53][2930/3239]	Time 0.246 (0.594)	Data Time 0.001 (0.014)	Loss 2.7318 (2.6550)	Entropy 1.14305 (1.14666)	Top-1 acc 56.641 (60.110)	Top-5 acc 80.078 (81.170)	lr 0.01481
Train [53][2940/3239]	Time 0.240 (0.593)	Data Time 0.001 (0.014)	Loss 2.6911 (2.6550)	Entropy 1.14298 (1.14665)	Top-1 acc 56.250 (60.108)	Top-5 acc 80.078 (81.167)	lr 0.01481
Train [53][2950/3239]	Time 0.225 (0.593)	Data Time 0.001 (0.014)	Loss 2.5518 (2.6549)	Entropy 1.14290 (1.14663)	Top-1 acc 65.234 (60.113)	Top-5 acc 83.203 (81.170)	lr 0.01481
Train [53][2960/3239]	Time 0.206 (0.592)	Data Time 0.001 (0.014)	Loss 2.6765 (2.6549)	Entropy 1.14290 (1.14662)	Top-1 acc 61.328 (60.115)	Top-5 acc 80.859 (81.171)	lr 0.01481
Train [53][2970/3239]	Time 0.221 (0.592)	Data Time 0.001 (0.013)	Loss 2.4811 (2.6551)	Entropy 1.14282 (1.14661)	Top-1 acc 64.844 (60.108)	Top-5 acc 84.766 (81.165)	lr 0.01480
Train [53][2980/3239]	Time 0.237 (0.591)	Data Time 0.001 (0.013)	Loss 2.6403 (2.6551)	Entropy 1.14282 (1.14660)	Top-1 acc 60.938 (60.109)	Top-5 acc 82.422 (81.165)	lr 0.01480
Train [53][2990/3239]	Time 0.276 (0.591)	Data Time 0.001 (0.013)	Loss 2.7006 (2.6549)	Entropy 1.14278 (1.14658)	Top-1 acc 58.594 (60.112)	Top-5 acc 81.641 (81.169)	lr 0.01480
Train [53][3000/3239]	Time 0.254 (0.590)	Data Time 0.001 (0.013)	Loss 2.5683 (2.6549)	Entropy 1.14281 (1.14657)	Top-1 acc 60.938 (60.113)	Top-5 acc 83.984 (81.167)	lr 0.01480
Train [53][3010/3239]	Time 0.259 (0.606)	Data Time 0.004 (0.013)	Loss 2.6421 (2.6549)	Entropy 1.14278 (1.14656)	Top-1 acc 63.281 (60.114)	Top-5 acc 80.078 (81.167)	lr 0.01480
Train [53][3020/3239]	Time 0.295 (0.606)	Data Time 0.002 (0.013)	Loss 2.5923 (2.6549)	Entropy 1.14273 (1.14655)	Top-1 acc 60.938 (60.116)	Top-5 acc 83.984 (81.166)	lr 0.01480
Train [53][3030/3239]	Time 0.218 (0.605)	Data Time 0.001 (0.013)	Loss 2.7241 (2.6550)	Entropy 1.14271 (1.14653)	Top-1 acc 57.031 (60.114)	Top-5 acc 78.906 (81.162)	lr 0.01480
Train [53][3040/3239]	Time 0.220 (0.605)	Data Time 0.001 (0.013)	Loss 2.8102 (2.6553)	Entropy 1.14271 (1.14652)	Top-1 acc 56.641 (60.105)	Top-5 acc 79.297 (81.157)	lr 0.01480
Train [53][3050/3239]	Time 0.337 (0.604)	Data Time 0.001 (0.013)	Loss 2.6464 (2.6553)	Entropy 1.14266 (1.14651)	Top-1 acc 59.375 (60.102)	Top-5 acc 82.812 (81.155)	lr 0.01480
Train [53][3060/3239]	Time 0.242 (0.604)	Data Time 0.001 (0.013)	Loss 2.6946 (2.6552)	Entropy 1.14263 (1.14650)	Top-1 acc 58.594 (60.107)	Top-5 acc 82.422 (81.160)	lr 0.01480
Train [53][3070/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.013)	Loss 2.6326 (2.6551)	Entropy 1.14257 (1.14648)	Top-1 acc 57.031 (60.105)	Top-5 acc 82.031 (81.161)	lr 0.01479
Train [53][3080/3239]	Time 0.221 (0.603)	Data Time 0.001 (0.013)	Loss 2.6274 (2.6552)	Entropy 1.14255 (1.14647)	Top-1 acc 60.156 (60.103)	Top-5 acc 81.641 (81.160)	lr 0.01479
Train [53][3090/3239]	Time 0.263 (0.602)	Data Time 0.001 (0.013)	Loss 2.6995 (2.6552)	Entropy 1.14249 (1.14646)	Top-1 acc 59.766 (60.104)	Top-5 acc 78.516 (81.159)	lr 0.01479
Train [53][3100/3239]	Time 0.366 (0.602)	Data Time 0.001 (0.013)	Loss 2.5972 (2.6552)	Entropy 1.14244 (1.14645)	Top-1 acc 62.500 (60.102)	Top-5 acc 83.203 (81.155)	lr 0.01479
Train [53][3110/3239]	Time 0.235 (0.602)	Data Time 0.001 (0.013)	Loss 2.8131 (2.6553)	Entropy 1.14242 (1.14643)	Top-1 acc 56.641 (60.099)	Top-5 acc 80.078 (81.154)	lr 0.01479
Train [53][3120/3239]	Time 0.219 (0.601)	Data Time 0.001 (0.013)	Loss 2.6575 (2.6552)	Entropy 1.14240 (1.14642)	Top-1 acc 57.422 (60.099)	Top-5 acc 82.422 (81.156)	lr 0.01479
Train [53][3130/3239]	Time 0.256 (0.601)	Data Time 0.001 (0.013)	Loss 2.9461 (2.6554)	Entropy 1.14237 (1.14641)	Top-1 acc 55.859 (60.097)	Top-5 acc 74.219 (81.153)	lr 0.01479
Train [53][3140/3239]	Time 0.245 (0.600)	Data Time 0.001 (0.013)	Loss 2.6371 (2.6554)	Entropy 1.14247 (1.14639)	Top-1 acc 63.281 (60.098)	Top-5 acc 80.078 (81.152)	lr 0.01479
Train [53][3150/3239]	Time 0.204 (0.600)	Data Time 0.001 (0.013)	Loss 2.7385 (2.6553)	Entropy 1.14243 (1.14638)	Top-1 acc 57.031 (60.101)	Top-5 acc 78.516 (81.153)	lr 0.01479
Train [53][3160/3239]	Time 0.262 (0.599)	Data Time 0.001 (0.013)	Loss 2.7891 (2.6553)	Entropy 1.14236 (1.14637)	Top-1 acc 58.594 (60.101)	Top-5 acc 78.516 (81.152)	lr 0.01479
Train [53][3170/3239]	Time 0.214 (0.599)	Data Time 0.001 (0.013)	Loss 2.8124 (2.6553)	Entropy 1.14236 (1.14636)	Top-1 acc 56.250 (60.102)	Top-5 acc 80.859 (81.152)	lr 0.01478
Train [53][3180/3239]	Time 0.231 (0.598)	Data Time 0.000 (0.013)	Loss 2.6884 (2.6554)	Entropy 1.14236 (1.14634)	Top-1 acc 58.984 (60.103)	Top-5 acc 82.031 (81.150)	lr 0.01478
Train [53][3190/3239]	Time 0.226 (0.598)	Data Time 0.000 (0.013)	Loss 2.6831 (2.6555)	Entropy 1.14233 (1.14633)	Top-1 acc 61.328 (60.101)	Top-5 acc 80.469 (81.147)	lr 0.01478
Train [53][3200/3239]	Time 0.210 (0.598)	Data Time 0.000 (0.013)	Loss 2.7240 (2.6558)	Entropy 1.14232 (1.14632)	Top-1 acc 58.203 (60.090)	Top-5 acc 78.125 (81.141)	lr 0.01478
Train [53][3210/3239]	Time 0.208 (0.597)	Data Time 0.000 (0.013)	Loss 2.6793 (2.6558)	Entropy 1.14229 (1.14631)	Top-1 acc 61.328 (60.089)	Top-5 acc 78.516 (81.141)	lr 0.01478
Train [53][3220/3239]	Time 0.222 (0.597)	Data Time 0.000 (0.013)	Loss 2.7570 (2.6558)	Entropy 1.14223 (1.14629)	Top-1 acc 57.812 (60.090)	Top-5 acc 78.906 (81.141)	lr 0.01478
Train [53][3230/3239]	Time 0.223 (0.596)	Data Time 0.000 (0.013)	Loss 2.6851 (2.6557)	Entropy 1.14218 (1.14628)	Top-1 acc 55.859 (60.091)	Top-5 acc 78.125 (81.140)	lr 0.01478
Train [53][3239/3239]	Time 2.235 (0.596)	Data Time 0.000 (0.012)	Loss 2.6636 (2.6558)	Entropy 1.14218 (1.14627)	Top-1 acc 65.432 (60.089)	Top-5 acc 83.951 (81.139)	lr 0.01478
==========Valid [53/120]	loss 1.510	top-1 acc 65.861 (65.861)	top-5 acc 85.819	Train top-1 60.089	top-5 81.139	Entropy 1.14218	Latency-None: 0.000ms	Flops: 548.34M
Train [54][0/3239]	Time 36.211 (36.211)	Data Time 34.366 (34.366)	Loss 2.6045 (2.6045)	Entropy 1.14221 (1.14221)	Top-1 acc 58.594 (58.594)	Top-5 acc 81.250 (81.250)	lr 0.01478
Train [54][10/3239]	Time 2.820 (3.909)	Data Time 0.001 (3.207)	Loss 2.6261 (2.6296)	Entropy 1.14221 (1.14221)	Top-1 acc 61.328 (61.577)	Top-5 acc 79.688 (81.534)	lr 0.01478
Train [54][20/3239]	Time 0.231 (2.163)	Data Time 0.002 (1.681)	Loss 2.5190 (2.6388)	Entropy 1.14221 (1.14221)	Top-1 acc 60.547 (61.105)	Top-5 acc 85.938 (81.678)	lr 0.01478
Train [54][30/3239]	Time 0.213 (1.613)	Data Time 0.001 (1.139)	Loss 2.7251 (2.6251)	Entropy 1.14221 (1.14221)	Top-1 acc 58.594 (61.152)	Top-5 acc 80.078 (81.779)	lr 0.01477
Train [54][40/3239]	Time 0.231 (1.332)	Data Time 0.001 (0.862)	Loss 2.5904 (2.6149)	Entropy 1.14216 (1.14220)	Top-1 acc 64.453 (61.033)	Top-5 acc 82.031 (82.003)	lr 0.01477
Train [54][50/3239]	Time 0.243 (1.165)	Data Time 0.001 (0.693)	Loss 2.8380 (2.6201)	Entropy 1.14213 (1.14219)	Top-1 acc 51.953 (60.930)	Top-5 acc 79.688 (82.024)	lr 0.01477
Train [54][60/3239]	Time 0.323 (1.051)	Data Time 0.002 (0.580)	Loss 2.6011 (2.6187)	Entropy 1.14214 (1.14218)	Top-1 acc 60.156 (60.995)	Top-5 acc 83.203 (82.044)	lr 0.01477
Train [54][70/3239]	Time 0.218 (0.968)	Data Time 0.001 (0.498)	Loss 2.5906 (2.6217)	Entropy 1.14211 (1.14217)	Top-1 acc 59.766 (61.009)	Top-5 acc 82.031 (81.910)	lr 0.01477
Train [54][80/3239]	Time 0.258 (0.906)	Data Time 0.001 (0.437)	Loss 2.5111 (2.6252)	Entropy 1.14215 (1.14217)	Top-1 acc 61.719 (60.947)	Top-5 acc 84.375 (81.776)	lr 0.01477
Train [54][90/3239]	Time 0.211 (0.857)	Data Time 0.001 (0.389)	Loss 2.4407 (2.6255)	Entropy 1.14217 (1.14216)	Top-1 acc 64.844 (60.886)	Top-5 acc 84.766 (81.791)	lr 0.01477
Train [54][100/3239]	Time 0.226 (0.817)	Data Time 0.001 (0.351)	Loss 2.7042 (2.6217)	Entropy 1.14209 (1.14216)	Top-1 acc 56.641 (60.837)	Top-5 acc 80.469 (81.915)	lr 0.01477
Train [54][110/3239]	Time 0.240 (1.256)	Data Time 0.003 (0.319)	Loss 2.4912 (2.6262)	Entropy 1.14209 (1.14216)	Top-1 acc 62.500 (60.797)	Top-5 acc 82.812 (81.838)	lr 0.01477
Train [54][120/3239]	Time 2.458 (1.191)	Data Time 0.003 (0.293)	Loss 2.4151 (2.6247)	Entropy 1.14209 (1.14215)	Top-1 acc 66.016 (60.847)	Top-5 acc 85.938 (81.825)	lr 0.01477
Train [54][130/3239]	Time 0.219 (1.118)	Data Time 0.002 (0.271)	Loss 2.7361 (2.6300)	Entropy 1.14209 (1.14215)	Top-1 acc 59.375 (60.717)	Top-5 acc 78.516 (81.682)	lr 0.01477
Train [54][140/3239]	Time 0.235 (1.071)	Data Time 0.002 (0.252)	Loss 2.7425 (2.6365)	Entropy 1.14204 (1.14214)	Top-1 acc 57.812 (60.563)	Top-5 acc 80.469 (81.563)	lr 0.01476
Train [54][150/3239]	Time 0.220 (1.031)	Data Time 0.002 (0.235)	Loss 2.6735 (2.6334)	Entropy 1.14203 (1.14213)	Top-1 acc 62.891 (60.661)	Top-5 acc 80.078 (81.607)	lr 0.01476
Train [54][160/3239]	Time 0.230 (0.997)	Data Time 0.001 (0.221)	Loss 2.7173 (2.6318)	Entropy 1.14204 (1.14213)	Top-1 acc 58.594 (60.697)	Top-5 acc 80.469 (81.633)	lr 0.01476
Train [54][170/3239]	Time 0.236 (0.966)	Data Time 0.001 (0.208)	Loss 2.5985 (2.6346)	Entropy 1.14204 (1.14212)	Top-1 acc 59.766 (60.631)	Top-5 acc 82.422 (81.613)	lr 0.01476
Train [54][180/3239]	Time 0.229 (0.938)	Data Time 0.001 (0.197)	Loss 2.4966 (2.6356)	Entropy 1.14205 (1.14212)	Top-1 acc 66.406 (60.584)	Top-5 acc 86.328 (81.608)	lr 0.01476
Train [54][190/3239]	Time 0.212 (0.913)	Data Time 0.001 (0.186)	Loss 2.5374 (2.6335)	Entropy 1.14204 (1.14211)	Top-1 acc 62.500 (60.629)	Top-5 acc 83.984 (81.616)	lr 0.01476
Train [54][200/3239]	Time 0.257 (0.891)	Data Time 0.001 (0.177)	Loss 2.6765 (2.6330)	Entropy 1.14202 (1.14211)	Top-1 acc 62.109 (60.669)	Top-5 acc 82.031 (81.631)	lr 0.01476
Train [54][210/3239]	Time 0.219 (0.870)	Data Time 0.001 (0.169)	Loss 2.6198 (2.6329)	Entropy 1.14200 (1.14211)	Top-1 acc 58.203 (60.641)	Top-5 acc 82.031 (81.620)	lr 0.01476
Train [54][220/3239]	Time 0.234 (0.853)	Data Time 0.001 (0.161)	Loss 2.3996 (2.6303)	Entropy 1.14201 (1.14210)	Top-1 acc 67.578 (60.702)	Top-5 acc 84.375 (81.688)	lr 0.01476
Train [54][230/3239]	Time 2.391 (0.835)	Data Time 0.002 (0.154)	Loss 2.5978 (2.6298)	Entropy 1.14201 (1.14210)	Top-1 acc 61.719 (60.735)	Top-5 acc 81.641 (81.698)	lr 0.01476
Train [54][240/3239]	Time 0.252 (0.810)	Data Time 0.001 (0.148)	Loss 2.5830 (2.6299)	Entropy 1.14199 (1.14209)	Top-1 acc 63.281 (60.771)	Top-5 acc 82.812 (81.684)	lr 0.01475
Train [54][250/3239]	Time 0.341 (0.797)	Data Time 0.001 (0.142)	Loss 2.6673 (2.6299)	Entropy 1.14195 (1.14209)	Top-1 acc 63.281 (60.790)	Top-5 acc 83.594 (81.684)	lr 0.01475
Train [54][260/3239]	Time 0.216 (0.784)	Data Time 0.001 (0.137)	Loss 2.5059 (2.6280)	Entropy 1.14192 (1.14208)	Top-1 acc 64.062 (60.806)	Top-5 acc 83.594 (81.696)	lr 0.01475
Train [54][270/3239]	Time 0.220 (0.772)	Data Time 0.001 (0.132)	Loss 2.7983 (2.6263)	Entropy 1.14189 (1.14207)	Top-1 acc 58.203 (60.852)	Top-5 acc 78.906 (81.713)	lr 0.01475
Train [54][280/3239]	Time 0.214 (0.760)	Data Time 0.001 (0.127)	Loss 2.6765 (2.6293)	Entropy 1.14187 (1.14207)	Top-1 acc 57.031 (60.760)	Top-5 acc 80.859 (81.659)	lr 0.01475
Train [54][290/3239]	Time 0.232 (0.751)	Data Time 0.001 (0.123)	Loss 2.5632 (2.6282)	Entropy 1.14186 (1.14206)	Top-1 acc 65.234 (60.811)	Top-5 acc 78.906 (81.658)	lr 0.01475
Train [54][300/3239]	Time 0.222 (0.741)	Data Time 0.001 (0.119)	Loss 2.5280 (2.6266)	Entropy 1.14185 (1.14205)	Top-1 acc 62.500 (60.830)	Top-5 acc 82.812 (81.683)	lr 0.01475
Train [54][310/3239]	Time 0.231 (0.732)	Data Time 0.001 (0.115)	Loss 2.7530 (2.6266)	Entropy 1.14182 (1.14205)	Top-1 acc 59.375 (60.836)	Top-5 acc 76.953 (81.692)	lr 0.01475
Train [54][320/3239]	Time 0.254 (0.724)	Data Time 0.001 (0.112)	Loss 2.6853 (2.6270)	Entropy 1.14175 (1.14204)	Top-1 acc 60.156 (60.819)	Top-5 acc 78.906 (81.698)	lr 0.01475
Train [54][330/3239]	Time 0.220 (0.715)	Data Time 0.001 (0.108)	Loss 2.4162 (2.6274)	Entropy 1.14168 (1.14203)	Top-1 acc 64.844 (60.782)	Top-5 acc 85.547 (81.688)	lr 0.01475
Train [54][340/3239]	Time 2.447 (0.708)	Data Time 0.001 (0.105)	Loss 2.5329 (2.6253)	Entropy 1.14168 (1.14202)	Top-1 acc 61.328 (60.824)	Top-5 acc 84.375 (81.733)	lr 0.01474
Train [54][350/3239]	Time 0.222 (0.695)	Data Time 0.001 (0.102)	Loss 2.5785 (2.6246)	Entropy 1.14164 (1.14201)	Top-1 acc 64.844 (60.822)	Top-5 acc 83.984 (81.771)	lr 0.01474
Train [54][360/3239]	Time 0.219 (0.688)	Data Time 0.001 (0.099)	Loss 2.6524 (2.6265)	Entropy 1.14165 (1.14200)	Top-1 acc 58.984 (60.769)	Top-5 acc 79.297 (81.722)	lr 0.01474
Train [54][370/3239]	Time 0.216 (0.682)	Data Time 0.001 (0.097)	Loss 2.6080 (2.6272)	Entropy 1.14164 (1.14199)	Top-1 acc 57.812 (60.762)	Top-5 acc 83.203 (81.735)	lr 0.01474
Train [54][380/3239]	Time 0.229 (0.676)	Data Time 0.002 (0.094)	Loss 2.6215 (2.6290)	Entropy 1.14159 (1.14198)	Top-1 acc 61.328 (60.704)	Top-5 acc 80.859 (81.696)	lr 0.01474
Train [54][390/3239]	Time 0.216 (0.671)	Data Time 0.001 (0.092)	Loss 2.5136 (2.6283)	Entropy 1.14156 (1.14197)	Top-1 acc 63.672 (60.744)	Top-5 acc 86.328 (81.725)	lr 0.01474
Train [54][400/3239]	Time 0.224 (0.666)	Data Time 0.001 (0.090)	Loss 2.4835 (2.6290)	Entropy 1.14148 (1.14196)	Top-1 acc 60.547 (60.739)	Top-5 acc 85.547 (81.694)	lr 0.01474
Train [54][410/3239]	Time 0.214 (0.661)	Data Time 0.001 (0.087)	Loss 2.5500 (2.6284)	Entropy 1.14149 (1.14195)	Top-1 acc 63.281 (60.736)	Top-5 acc 84.375 (81.696)	lr 0.01474
Train [54][420/3239]	Time 0.221 (0.656)	Data Time 0.001 (0.085)	Loss 2.6461 (2.6278)	Entropy 1.14149 (1.14194)	Top-1 acc 63.281 (60.749)	Top-5 acc 81.641 (81.706)	lr 0.01474
Train [54][430/3239]	Time 0.275 (0.651)	Data Time 0.001 (0.083)	Loss 2.6539 (2.6284)	Entropy 1.14148 (1.14193)	Top-1 acc 61.719 (60.734)	Top-5 acc 82.422 (81.706)	lr 0.01474
Train [54][440/3239]	Time 0.300 (0.647)	Data Time 0.001 (0.082)	Loss 2.6619 (2.6282)	Entropy 1.14149 (1.14192)	Top-1 acc 60.156 (60.759)	Top-5 acc 80.859 (81.720)	lr 0.01473
Train [54][450/3239]	Time 2.553 (0.643)	Data Time 0.002 (0.080)	Loss 2.6150 (2.6277)	Entropy 1.14149 (1.14191)	Top-1 acc 60.938 (60.783)	Top-5 acc 82.812 (81.740)	lr 0.01473
Train [54][460/3239]	Time 0.221 (0.635)	Data Time 0.001 (0.078)	Loss 2.5744 (2.6271)	Entropy 1.14148 (1.14190)	Top-1 acc 62.500 (60.814)	Top-5 acc 82.031 (81.754)	lr 0.01473
Train [54][470/3239]	Time 0.216 (0.631)	Data Time 0.001 (0.076)	Loss 2.6653 (2.6293)	Entropy 1.14142 (1.14189)	Top-1 acc 62.500 (60.784)	Top-5 acc 77.734 (81.707)	lr 0.01473
Train [54][480/3239]	Time 0.226 (0.733)	Data Time 0.002 (0.075)	Loss 2.5328 (2.6290)	Entropy 1.14143 (1.14188)	Top-1 acc 62.500 (60.786)	Top-5 acc 83.594 (81.720)	lr 0.01473
Train [54][490/3239]	Time 0.212 (0.727)	Data Time 0.002 (0.073)	Loss 2.6311 (2.6299)	Entropy 1.14142 (1.14187)	Top-1 acc 60.156 (60.741)	Top-5 acc 82.422 (81.704)	lr 0.01473
Train [54][500/3239]	Time 0.259 (0.722)	Data Time 0.003 (0.072)	Loss 2.8774 (2.6294)	Entropy 1.14139 (1.14186)	Top-1 acc 56.641 (60.775)	Top-5 acc 75.781 (81.714)	lr 0.01473
Train [54][510/3239]	Time 0.210 (0.717)	Data Time 0.001 (0.071)	Loss 2.6219 (2.6287)	Entropy 1.14138 (1.14185)	Top-1 acc 64.062 (60.776)	Top-5 acc 82.812 (81.735)	lr 0.01473
Train [54][520/3239]	Time 0.194 (0.712)	Data Time 0.001 (0.069)	Loss 2.6726 (2.6288)	Entropy 1.14131 (1.14184)	Top-1 acc 59.375 (60.764)	Top-5 acc 82.422 (81.749)	lr 0.01473
Train [54][530/3239]	Time 0.225 (0.707)	Data Time 0.001 (0.068)	Loss 2.7322 (2.6300)	Entropy 1.14129 (1.14183)	Top-1 acc 60.156 (60.739)	Top-5 acc 78.906 (81.734)	lr 0.01473
Train [54][540/3239]	Time 0.235 (0.702)	Data Time 0.001 (0.067)	Loss 2.7092 (2.6299)	Entropy 1.14126 (1.14182)	Top-1 acc 59.375 (60.741)	Top-5 acc 81.250 (81.733)	lr 0.01472
Train [54][550/3239]	Time 0.248 (0.698)	Data Time 0.002 (0.066)	Loss 2.4773 (2.6297)	Entropy 1.14123 (1.14181)	Top-1 acc 63.281 (60.750)	Top-5 acc 83.203 (81.730)	lr 0.01472
Train [54][560/3239]	Time 2.528 (0.694)	Data Time 0.002 (0.065)	Loss 2.4934 (2.6290)	Entropy 1.14123 (1.14180)	Top-1 acc 64.062 (60.783)	Top-5 acc 87.500 (81.737)	lr 0.01472
Train [54][570/3239]	Time 0.239 (0.686)	Data Time 0.001 (0.063)	Loss 2.5558 (2.6305)	Entropy 1.14116 (1.14179)	Top-1 acc 60.156 (60.739)	Top-5 acc 78.906 (81.702)	lr 0.01472
Train [54][580/3239]	Time 0.320 (0.682)	Data Time 0.001 (0.062)	Loss 2.5387 (2.6309)	Entropy 1.14129 (1.14178)	Top-1 acc 64.453 (60.735)	Top-5 acc 85.547 (81.689)	lr 0.01472
Train [54][590/3239]	Time 0.213 (0.679)	Data Time 0.001 (0.061)	Loss 2.7244 (2.6308)	Entropy 1.14126 (1.14177)	Top-1 acc 56.641 (60.744)	Top-5 acc 78.516 (81.698)	lr 0.01472
Train [54][600/3239]	Time 0.219 (0.675)	Data Time 0.001 (0.060)	Loss 2.5661 (2.6308)	Entropy 1.14125 (1.14176)	Top-1 acc 60.938 (60.752)	Top-5 acc 83.203 (81.705)	lr 0.01472
Train [54][610/3239]	Time 0.173 (0.671)	Data Time 0.001 (0.059)	Loss 2.5948 (2.6318)	Entropy 1.14124 (1.14175)	Top-1 acc 61.719 (60.734)	Top-5 acc 80.859 (81.676)	lr 0.01472
Train [54][620/3239]	Time 0.229 (0.668)	Data Time 0.001 (0.059)	Loss 2.7203 (2.6316)	Entropy 1.14120 (1.14174)	Top-1 acc 59.375 (60.732)	Top-5 acc 77.344 (81.678)	lr 0.01472
Train [54][630/3239]	Time 0.319 (0.665)	Data Time 0.002 (0.058)	Loss 2.6490 (2.6313)	Entropy 1.14122 (1.14174)	Top-1 acc 59.766 (60.734)	Top-5 acc 79.297 (81.668)	lr 0.01472
Train [54][640/3239]	Time 0.230 (0.662)	Data Time 0.001 (0.057)	Loss 2.5497 (2.6311)	Entropy 1.14126 (1.14173)	Top-1 acc 61.719 (60.750)	Top-5 acc 83.203 (81.664)	lr 0.01471
Train [54][650/3239]	Time 0.258 (0.659)	Data Time 0.001 (0.056)	Loss 2.5329 (2.6320)	Entropy 1.14121 (1.14172)	Top-1 acc 64.453 (60.732)	Top-5 acc 84.766 (81.650)	lr 0.01471
Train [54][660/3239]	Time 0.231 (0.656)	Data Time 0.001 (0.055)	Loss 2.6628 (2.6322)	Entropy 1.14110 (1.14171)	Top-1 acc 58.984 (60.726)	Top-5 acc 82.422 (81.640)	lr 0.01471
Train [54][670/3239]	Time 2.399 (0.653)	Data Time 0.001 (0.054)	Loss 2.6651 (2.6322)	Entropy 1.14110 (1.14170)	Top-1 acc 55.859 (60.712)	Top-5 acc 78.906 (81.636)	lr 0.01471
Train [54][680/3239]	Time 0.241 (0.647)	Data Time 0.001 (0.053)	Loss 2.6930 (2.6322)	Entropy 1.14111 (1.14170)	Top-1 acc 60.156 (60.717)	Top-5 acc 80.469 (81.636)	lr 0.01471
Train [54][690/3239]	Time 0.221 (0.644)	Data Time 0.001 (0.053)	Loss 2.4560 (2.6319)	Entropy 1.14099 (1.14168)	Top-1 acc 68.359 (60.724)	Top-5 acc 84.766 (81.651)	lr 0.01471
Train [54][700/3239]	Time 0.219 (0.642)	Data Time 0.001 (0.052)	Loss 2.6416 (2.6315)	Entropy 1.14091 (1.14167)	Top-1 acc 60.547 (60.727)	Top-5 acc 83.203 (81.657)	lr 0.01471
Train [54][710/3239]	Time 0.212 (0.639)	Data Time 0.001 (0.051)	Loss 2.5076 (2.6312)	Entropy 1.14087 (1.14166)	Top-1 acc 60.938 (60.723)	Top-5 acc 85.547 (81.658)	lr 0.01471
Train [54][720/3239]	Time 0.257 (0.637)	Data Time 0.001 (0.051)	Loss 2.5482 (2.6308)	Entropy 1.14079 (1.14165)	Top-1 acc 64.844 (60.734)	Top-5 acc 83.594 (81.670)	lr 0.01471
Train [54][730/3239]	Time 0.225 (0.635)	Data Time 0.001 (0.050)	Loss 2.5036 (2.6297)	Entropy 1.14079 (1.14164)	Top-1 acc 66.016 (60.765)	Top-5 acc 82.812 (81.690)	lr 0.01471
Train [54][740/3239]	Time 0.222 (0.632)	Data Time 0.001 (0.049)	Loss 2.5265 (2.6300)	Entropy 1.14079 (1.14163)	Top-1 acc 64.453 (60.753)	Top-5 acc 82.422 (81.680)	lr 0.01470
Train [54][750/3239]	Time 0.240 (0.630)	Data Time 0.002 (0.049)	Loss 2.6899 (2.6299)	Entropy 1.14073 (1.14162)	Top-1 acc 58.203 (60.745)	Top-5 acc 80.469 (81.682)	lr 0.01470
Train [54][760/3239]	Time 0.219 (0.628)	Data Time 0.001 (0.048)	Loss 2.7195 (2.6300)	Entropy 1.14070 (1.14160)	Top-1 acc 57.422 (60.738)	Top-5 acc 77.344 (81.667)	lr 0.01470
Train [54][770/3239]	Time 0.318 (0.626)	Data Time 0.001 (0.047)	Loss 2.6185 (2.6296)	Entropy 1.14067 (1.14159)	Top-1 acc 60.156 (60.745)	Top-5 acc 82.812 (81.679)	lr 0.01470
Train [54][780/3239]	Time 2.400 (0.624)	Data Time 0.001 (0.047)	Loss 2.7981 (2.6305)	Entropy 1.14067 (1.14158)	Top-1 acc 51.562 (60.713)	Top-5 acc 81.250 (81.670)	lr 0.01470
Train [54][790/3239]	Time 0.211 (0.619)	Data Time 0.001 (0.046)	Loss 2.7654 (2.6303)	Entropy 1.14061 (1.14157)	Top-1 acc 57.812 (60.713)	Top-5 acc 78.516 (81.674)	lr 0.01470
Train [54][800/3239]	Time 0.224 (0.617)	Data Time 0.001 (0.046)	Loss 2.4984 (2.6295)	Entropy 1.14057 (1.14156)	Top-1 acc 63.281 (60.720)	Top-5 acc 85.547 (81.701)	lr 0.01470
Train [54][810/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.045)	Loss 2.7948 (2.6302)	Entropy 1.14057 (1.14154)	Top-1 acc 58.984 (60.703)	Top-5 acc 77.734 (81.684)	lr 0.01470
Train [54][820/3239]	Time 0.246 (0.612)	Data Time 0.001 (0.045)	Loss 2.6399 (2.6308)	Entropy 1.14053 (1.14153)	Top-1 acc 58.984 (60.682)	Top-5 acc 83.984 (81.679)	lr 0.01470
Train [54][830/3239]	Time 0.233 (0.610)	Data Time 0.001 (0.044)	Loss 2.5338 (2.6314)	Entropy 1.14053 (1.14152)	Top-1 acc 64.062 (60.667)	Top-5 acc 85.156 (81.669)	lr 0.01470
Train [54][840/3239]	Time 0.266 (0.664)	Data Time 0.003 (0.044)	Loss 2.5909 (2.6315)	Entropy 1.14045 (1.14151)	Top-1 acc 62.500 (60.653)	Top-5 acc 84.766 (81.669)	lr 0.01469
Train [54][850/3239]	Time 0.212 (0.663)	Data Time 0.002 (0.043)	Loss 2.5320 (2.6312)	Entropy 1.14046 (1.14150)	Top-1 acc 63.281 (60.656)	Top-5 acc 85.547 (81.673)	lr 0.01469
Train [54][860/3239]	Time 0.216 (0.661)	Data Time 0.002 (0.043)	Loss 2.6140 (2.6312)	Entropy 1.14036 (1.14148)	Top-1 acc 62.891 (60.664)	Top-5 acc 81.250 (81.676)	lr 0.01469
Train [54][870/3239]	Time 0.336 (0.659)	Data Time 0.002 (0.042)	Loss 2.4763 (2.6312)	Entropy 1.14033 (1.14147)	Top-1 acc 62.500 (60.666)	Top-5 acc 84.766 (81.673)	lr 0.01469
Train [54][880/3239]	Time 0.255 (0.657)	Data Time 0.001 (0.042)	Loss 2.5882 (2.6316)	Entropy 1.14028 (1.14146)	Top-1 acc 62.109 (60.656)	Top-5 acc 82.031 (81.661)	lr 0.01469
Train [54][890/3239]	Time 2.513 (0.655)	Data Time 0.003 (0.041)	Loss 2.3221 (2.6305)	Entropy 1.14028 (1.14144)	Top-1 acc 68.750 (60.687)	Top-5 acc 87.500 (81.682)	lr 0.01469
Train [54][900/3239]	Time 0.248 (0.650)	Data Time 0.001 (0.041)	Loss 2.5478 (2.6302)	Entropy 1.14029 (1.14143)	Top-1 acc 59.375 (60.696)	Top-5 acc 85.938 (81.688)	lr 0.01469
Train [54][910/3239]	Time 0.233 (0.648)	Data Time 0.001 (0.040)	Loss 2.5957 (2.6305)	Entropy 1.14023 (1.14142)	Top-1 acc 60.547 (60.685)	Top-5 acc 84.766 (81.686)	lr 0.01469
Train [54][920/3239]	Time 0.316 (0.646)	Data Time 0.001 (0.040)	Loss 2.5637 (2.6301)	Entropy 1.14025 (1.14141)	Top-1 acc 65.234 (60.692)	Top-5 acc 81.641 (81.691)	lr 0.01469
Train [54][930/3239]	Time 0.209 (0.644)	Data Time 0.001 (0.040)	Loss 2.7260 (2.6304)	Entropy 1.14021 (1.14139)	Top-1 acc 58.594 (60.679)	Top-5 acc 80.078 (81.682)	lr 0.01469
Train [54][940/3239]	Time 0.243 (0.642)	Data Time 0.001 (0.039)	Loss 2.4597 (2.6305)	Entropy 1.14018 (1.14138)	Top-1 acc 66.797 (60.691)	Top-5 acc 83.984 (81.678)	lr 0.01468
Train [54][950/3239]	Time 0.221 (0.640)	Data Time 0.001 (0.039)	Loss 2.5188 (2.6309)	Entropy 1.14015 (1.14137)	Top-1 acc 58.594 (60.676)	Top-5 acc 83.594 (81.671)	lr 0.01468
Train [54][960/3239]	Time 0.237 (0.638)	Data Time 0.001 (0.038)	Loss 2.6339 (2.6309)	Entropy 1.14010 (1.14135)	Top-1 acc 60.156 (60.665)	Top-5 acc 79.297 (81.667)	lr 0.01468
Train [54][970/3239]	Time 0.310 (0.637)	Data Time 0.001 (0.038)	Loss 2.6702 (2.6318)	Entropy 1.14009 (1.14134)	Top-1 acc 56.641 (60.641)	Top-5 acc 82.031 (81.647)	lr 0.01468
Train [54][980/3239]	Time 0.225 (0.635)	Data Time 0.001 (0.038)	Loss 2.5150 (2.6318)	Entropy 1.14009 (1.14133)	Top-1 acc 63.281 (60.646)	Top-5 acc 84.375 (81.645)	lr 0.01468
Train [54][990/3239]	Time 0.269 (0.633)	Data Time 0.001 (0.037)	Loss 2.8501 (2.6317)	Entropy 1.13994 (1.14132)	Top-1 acc 55.078 (60.647)	Top-5 acc 79.688 (81.648)	lr 0.01468
Train [54][1000/3239]	Time 2.452 (0.631)	Data Time 0.001 (0.037)	Loss 2.5996 (2.6316)	Entropy 1.13994 (1.14130)	Top-1 acc 62.500 (60.653)	Top-5 acc 80.859 (81.645)	lr 0.01468
Train [54][1010/3239]	Time 0.226 (0.628)	Data Time 0.001 (0.037)	Loss 2.4350 (2.6309)	Entropy 1.13990 (1.14129)	Top-1 acc 66.016 (60.664)	Top-5 acc 84.375 (81.656)	lr 0.01468
Train [54][1020/3239]	Time 0.338 (0.626)	Data Time 0.001 (0.036)	Loss 2.6952 (2.6311)	Entropy 1.13989 (1.14127)	Top-1 acc 60.938 (60.659)	Top-5 acc 78.125 (81.642)	lr 0.01468
Train [54][1030/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.036)	Loss 2.8521 (2.6313)	Entropy 1.13987 (1.14126)	Top-1 acc 55.078 (60.650)	Top-5 acc 77.344 (81.638)	lr 0.01468
Train [54][1040/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.036)	Loss 2.7439 (2.6315)	Entropy 1.13986 (1.14125)	Top-1 acc 60.156 (60.646)	Top-5 acc 76.562 (81.629)	lr 0.01467
Train [54][1050/3239]	Time 0.216 (0.621)	Data Time 0.001 (0.035)	Loss 2.6630 (2.6318)	Entropy 1.13983 (1.14123)	Top-1 acc 58.984 (60.637)	Top-5 acc 81.250 (81.620)	lr 0.01467
Train [54][1060/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.035)	Loss 2.5731 (2.6320)	Entropy 1.13982 (1.14122)	Top-1 acc 62.109 (60.637)	Top-5 acc 83.203 (81.612)	lr 0.01467
Train [54][1070/3239]	Time 0.317 (0.618)	Data Time 0.001 (0.035)	Loss 2.5827 (2.6318)	Entropy 1.13980 (1.14121)	Top-1 acc 63.281 (60.646)	Top-5 acc 83.594 (81.617)	lr 0.01467
Train [54][1080/3239]	Time 0.225 (0.617)	Data Time 0.001 (0.034)	Loss 2.6773 (2.6318)	Entropy 1.13975 (1.14119)	Top-1 acc 58.203 (60.644)	Top-5 acc 82.031 (81.620)	lr 0.01467
Train [54][1090/3239]	Time 0.239 (0.615)	Data Time 0.001 (0.034)	Loss 2.7172 (2.6320)	Entropy 1.13974 (1.14118)	Top-1 acc 58.984 (60.634)	Top-5 acc 80.078 (81.612)	lr 0.01467
Train [54][1100/3239]	Time 0.230 (0.614)	Data Time 0.001 (0.034)	Loss 2.6162 (2.6321)	Entropy 1.13974 (1.14117)	Top-1 acc 59.766 (60.636)	Top-5 acc 85.156 (81.609)	lr 0.01467
Train [54][1110/3239]	Time 2.433 (0.613)	Data Time 0.001 (0.033)	Loss 2.5892 (2.6325)	Entropy 1.13974 (1.14116)	Top-1 acc 62.109 (60.625)	Top-5 acc 82.031 (81.599)	lr 0.01467
Train [54][1120/3239]	Time 0.222 (0.609)	Data Time 0.002 (0.033)	Loss 2.5431 (2.6326)	Entropy 1.13980 (1.14114)	Top-1 acc 63.672 (60.626)	Top-5 acc 82.422 (81.594)	lr 0.01467
Train [54][1130/3239]	Time 0.224 (0.608)	Data Time 0.001 (0.033)	Loss 2.5951 (2.6322)	Entropy 1.13979 (1.14113)	Top-1 acc 60.156 (60.643)	Top-5 acc 83.984 (81.603)	lr 0.01467
Train [54][1140/3239]	Time 0.217 (0.607)	Data Time 0.001 (0.033)	Loss 2.6459 (2.6322)	Entropy 1.13975 (1.14112)	Top-1 acc 55.859 (60.641)	Top-5 acc 80.859 (81.605)	lr 0.01466
Train [54][1150/3239]	Time 0.218 (0.606)	Data Time 0.001 (0.032)	Loss 2.7323 (2.6319)	Entropy 1.13972 (1.14111)	Top-1 acc 58.984 (60.648)	Top-5 acc 78.516 (81.610)	lr 0.01466
Train [54][1160/3239]	Time 0.239 (0.605)	Data Time 0.001 (0.032)	Loss 2.8165 (2.6320)	Entropy 1.13966 (1.14109)	Top-1 acc 58.203 (60.646)	Top-5 acc 78.125 (81.613)	lr 0.01466
Train [54][1170/3239]	Time 0.199 (0.603)	Data Time 0.001 (0.032)	Loss 2.5795 (2.6322)	Entropy 1.13967 (1.14108)	Top-1 acc 61.719 (60.645)	Top-5 acc 81.250 (81.609)	lr 0.01466
Train [54][1180/3239]	Time 0.217 (0.602)	Data Time 0.001 (0.032)	Loss 2.5581 (2.6321)	Entropy 1.13969 (1.14107)	Top-1 acc 64.062 (60.650)	Top-5 acc 80.859 (81.606)	lr 0.01466
Train [54][1190/3239]	Time 0.219 (0.601)	Data Time 0.001 (0.031)	Loss 2.5186 (2.6321)	Entropy 1.13963 (1.14106)	Top-1 acc 66.016 (60.649)	Top-5 acc 82.812 (81.608)	lr 0.01466
Train [54][1200/3239]	Time 0.265 (0.637)	Data Time 0.002 (0.031)	Loss 2.6868 (2.6322)	Entropy 1.13957 (1.14105)	Top-1 acc 60.547 (60.643)	Top-5 acc 80.469 (81.604)	lr 0.01466
Train [54][1210/3239]	Time 0.312 (0.637)	Data Time 0.002 (0.031)	Loss 2.6587 (2.6325)	Entropy 1.13952 (1.14103)	Top-1 acc 62.500 (60.638)	Top-5 acc 82.812 (81.602)	lr 0.01466
Train [54][1220/3239]	Time 2.484 (0.636)	Data Time 0.001 (0.031)	Loss 2.7416 (2.6326)	Entropy 1.13952 (1.14102)	Top-1 acc 61.719 (60.630)	Top-5 acc 80.078 (81.594)	lr 0.01466
Train [54][1230/3239]	Time 0.242 (0.632)	Data Time 0.002 (0.030)	Loss 2.7887 (2.6328)	Entropy 1.13953 (1.14101)	Top-1 acc 53.125 (60.623)	Top-5 acc 82.422 (81.597)	lr 0.01466
Train [54][1240/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.030)	Loss 2.7428 (2.6328)	Entropy 1.13953 (1.14100)	Top-1 acc 61.328 (60.625)	Top-5 acc 76.953 (81.597)	lr 0.01465
Train [54][1250/3239]	Time 0.247 (0.630)	Data Time 0.001 (0.030)	Loss 2.5224 (2.6326)	Entropy 1.13950 (1.14099)	Top-1 acc 63.672 (60.633)	Top-5 acc 82.031 (81.598)	lr 0.01465
Train [54][1260/3239]	Time 0.338 (0.628)	Data Time 0.002 (0.030)	Loss 2.7635 (2.6331)	Entropy 1.13947 (1.14097)	Top-1 acc 52.344 (60.623)	Top-5 acc 78.516 (81.587)	lr 0.01465
Train [54][1270/3239]	Time 0.178 (0.627)	Data Time 0.001 (0.029)	Loss 2.6186 (2.6331)	Entropy 1.13949 (1.14096)	Top-1 acc 60.156 (60.622)	Top-5 acc 81.250 (81.589)	lr 0.01465
Train [54][1280/3239]	Time 0.226 (0.626)	Data Time 0.002 (0.029)	Loss 2.5349 (2.6335)	Entropy 1.13947 (1.14095)	Top-1 acc 64.453 (60.618)	Top-5 acc 82.031 (81.582)	lr 0.01465
Train [54][1290/3239]	Time 0.223 (0.624)	Data Time 0.001 (0.029)	Loss 2.5322 (2.6336)	Entropy 1.13941 (1.14094)	Top-1 acc 66.016 (60.611)	Top-5 acc 80.859 (81.580)	lr 0.01465
Train [54][1300/3239]	Time 0.213 (0.623)	Data Time 0.001 (0.029)	Loss 2.6869 (2.6339)	Entropy 1.13942 (1.14093)	Top-1 acc 59.766 (60.600)	Top-5 acc 78.906 (81.573)	lr 0.01465
Train [54][1310/3239]	Time 0.218 (0.622)	Data Time 0.001 (0.029)	Loss 2.5951 (2.6336)	Entropy 1.13935 (1.14092)	Top-1 acc 62.500 (60.611)	Top-5 acc 82.031 (81.579)	lr 0.01465
Train [54][1320/3239]	Time 0.247 (0.621)	Data Time 0.001 (0.028)	Loss 2.7656 (2.6339)	Entropy 1.13931 (1.14090)	Top-1 acc 60.938 (60.608)	Top-5 acc 76.953 (81.574)	lr 0.01465
Train [54][1330/3239]	Time 2.493 (0.620)	Data Time 0.001 (0.028)	Loss 2.3909 (2.6336)	Entropy 1.13931 (1.14089)	Top-1 acc 69.531 (60.624)	Top-5 acc 86.328 (81.578)	lr 0.01465
Train [54][1340/3239]	Time 0.179 (0.617)	Data Time 0.001 (0.028)	Loss 2.8571 (2.6341)	Entropy 1.13932 (1.14088)	Top-1 acc 55.469 (60.608)	Top-5 acc 76.562 (81.566)	lr 0.01464
Train [54][1350/3239]	Time 0.264 (0.616)	Data Time 0.001 (0.028)	Loss 2.5115 (2.6341)	Entropy 1.13929 (1.14087)	Top-1 acc 61.328 (60.608)	Top-5 acc 82.812 (81.560)	lr 0.01464
Train [54][1360/3239]	Time 0.211 (0.614)	Data Time 0.001 (0.028)	Loss 2.7744 (2.6341)	Entropy 1.13923 (1.14086)	Top-1 acc 59.766 (60.615)	Top-5 acc 77.734 (81.557)	lr 0.01464
Train [54][1370/3239]	Time 0.243 (0.613)	Data Time 0.001 (0.027)	Loss 2.5973 (2.6341)	Entropy 1.13919 (1.14084)	Top-1 acc 61.328 (60.601)	Top-5 acc 84.375 (81.555)	lr 0.01464
Train [54][1380/3239]	Time 0.226 (0.612)	Data Time 0.001 (0.027)	Loss 2.6362 (2.6338)	Entropy 1.13917 (1.14083)	Top-1 acc 64.062 (60.611)	Top-5 acc 80.469 (81.561)	lr 0.01464
Train [54][1390/3239]	Time 0.248 (0.611)	Data Time 0.001 (0.027)	Loss 2.6339 (2.6339)	Entropy 1.13916 (1.14082)	Top-1 acc 58.984 (60.604)	Top-5 acc 80.469 (81.555)	lr 0.01464
Train [54][1400/3239]	Time 0.306 (0.610)	Data Time 0.001 (0.027)	Loss 2.6341 (2.6336)	Entropy 1.13912 (1.14081)	Top-1 acc 60.938 (60.608)	Top-5 acc 82.812 (81.560)	lr 0.01464
Train [54][1410/3239]	Time 0.242 (0.609)	Data Time 0.001 (0.027)	Loss 2.6973 (2.6336)	Entropy 1.13911 (1.14080)	Top-1 acc 55.078 (60.605)	Top-5 acc 82.812 (81.560)	lr 0.01464
Train [54][1420/3239]	Time 0.251 (0.608)	Data Time 0.001 (0.027)	Loss 2.7481 (2.6338)	Entropy 1.13908 (1.14078)	Top-1 acc 61.328 (60.600)	Top-5 acc 78.906 (81.556)	lr 0.01464
Train [54][1430/3239]	Time 0.220 (0.607)	Data Time 0.001 (0.026)	Loss 3.0809 (2.6342)	Entropy 1.13905 (1.14077)	Top-1 acc 51.172 (60.589)	Top-5 acc 75.000 (81.549)	lr 0.01464
Train [54][1440/3239]	Time 2.429 (0.606)	Data Time 0.001 (0.026)	Loss 2.6139 (2.6340)	Entropy 1.13905 (1.14076)	Top-1 acc 58.594 (60.586)	Top-5 acc 83.203 (81.550)	lr 0.01463
Train [54][1450/3239]	Time 0.219 (0.604)	Data Time 0.001 (0.026)	Loss 2.6615 (2.6341)	Entropy 1.13893 (1.14075)	Top-1 acc 60.156 (60.585)	Top-5 acc 81.250 (81.554)	lr 0.01463
Train [54][1460/3239]	Time 0.246 (0.603)	Data Time 0.001 (0.026)	Loss 2.6365 (2.6341)	Entropy 1.13892 (1.14074)	Top-1 acc 60.156 (60.586)	Top-5 acc 83.203 (81.553)	lr 0.01463
Train [54][1470/3239]	Time 0.222 (0.602)	Data Time 0.001 (0.026)	Loss 2.5490 (2.6343)	Entropy 1.13883 (1.14072)	Top-1 acc 63.281 (60.576)	Top-5 acc 82.031 (81.551)	lr 0.01463
Train [54][1480/3239]	Time 0.198 (0.601)	Data Time 0.001 (0.026)	Loss 2.8777 (2.6347)	Entropy 1.13877 (1.14071)	Top-1 acc 53.125 (60.574)	Top-5 acc 76.953 (81.545)	lr 0.01463
Train [54][1490/3239]	Time 0.282 (0.600)	Data Time 0.002 (0.025)	Loss 2.6153 (2.6348)	Entropy 1.13872 (1.14070)	Top-1 acc 63.281 (60.572)	Top-5 acc 82.031 (81.545)	lr 0.01463
Train [54][1500/3239]	Time 0.268 (0.599)	Data Time 0.001 (0.025)	Loss 2.4813 (2.6348)	Entropy 1.13869 (1.14068)	Top-1 acc 63.672 (60.573)	Top-5 acc 83.984 (81.543)	lr 0.01463
Train [54][1510/3239]	Time 0.209 (0.598)	Data Time 0.001 (0.025)	Loss 2.7213 (2.6349)	Entropy 1.13866 (1.14067)	Top-1 acc 59.766 (60.564)	Top-5 acc 81.250 (81.542)	lr 0.01463
Train [54][1520/3239]	Time 0.208 (0.597)	Data Time 0.001 (0.025)	Loss 2.7776 (2.6347)	Entropy 1.13864 (1.14066)	Top-1 acc 57.422 (60.567)	Top-5 acc 79.688 (81.549)	lr 0.01463
Train [54][1530/3239]	Time 0.222 (0.596)	Data Time 0.001 (0.025)	Loss 2.6001 (2.6346)	Entropy 1.13862 (1.14064)	Top-1 acc 60.938 (60.564)	Top-5 acc 83.984 (81.550)	lr 0.01463
Train [54][1540/3239]	Time 0.319 (0.596)	Data Time 0.001 (0.025)	Loss 2.5942 (2.6344)	Entropy 1.13858 (1.14063)	Top-1 acc 59.766 (60.571)	Top-5 acc 82.812 (81.554)	lr 0.01462
Train [54][1550/3239]	Time 2.543 (0.595)	Data Time 0.001 (0.024)	Loss 2.8503 (2.6346)	Entropy 1.13858 (1.14062)	Top-1 acc 58.594 (60.573)	Top-5 acc 76.953 (81.549)	lr 0.01462
Train [54][1560/3239]	Time 0.243 (0.593)	Data Time 0.001 (0.024)	Loss 2.8991 (2.6348)	Entropy 1.13856 (1.14060)	Top-1 acc 54.688 (60.566)	Top-5 acc 74.219 (81.552)	lr 0.01462
Train [54][1570/3239]	Time 0.402 (0.622)	Data Time 0.003 (0.024)	Loss 2.6762 (2.6350)	Entropy 1.13856 (1.14059)	Top-1 acc 57.812 (60.551)	Top-5 acc 78.125 (81.547)	lr 0.01462
Train [54][1580/3239]	Time 0.218 (0.621)	Data Time 0.002 (0.024)	Loss 2.5438 (2.6350)	Entropy 1.13855 (1.14058)	Top-1 acc 65.625 (60.551)	Top-5 acc 81.641 (81.550)	lr 0.01462
Train [54][1590/3239]	Time 0.251 (0.620)	Data Time 0.001 (0.024)	Loss 2.6748 (2.6351)	Entropy 1.13849 (1.14057)	Top-1 acc 58.594 (60.547)	Top-5 acc 81.641 (81.548)	lr 0.01462
Train [54][1600/3239]	Time 0.237 (0.619)	Data Time 0.001 (0.024)	Loss 2.7442 (2.6351)	Entropy 1.13840 (1.14055)	Top-1 acc 57.422 (60.547)	Top-5 acc 79.688 (81.544)	lr 0.01462
Train [54][1610/3239]	Time 0.239 (0.618)	Data Time 0.001 (0.024)	Loss 2.6259 (2.6350)	Entropy 1.13842 (1.14054)	Top-1 acc 62.109 (60.554)	Top-5 acc 78.906 (81.548)	lr 0.01462
Train [54][1620/3239]	Time 0.230 (0.617)	Data Time 0.002 (0.024)	Loss 2.6282 (2.6351)	Entropy 1.13840 (1.14053)	Top-1 acc 58.594 (60.552)	Top-5 acc 82.031 (81.551)	lr 0.01462
Train [54][1630/3239]	Time 0.238 (0.616)	Data Time 0.001 (0.023)	Loss 2.4454 (2.6350)	Entropy 1.13839 (1.14051)	Top-1 acc 64.453 (60.554)	Top-5 acc 84.766 (81.552)	lr 0.01462
Train [54][1640/3239]	Time 0.315 (0.615)	Data Time 0.001 (0.023)	Loss 2.7173 (2.6353)	Entropy 1.13837 (1.14050)	Top-1 acc 57.812 (60.547)	Top-5 acc 78.906 (81.547)	lr 0.01461
Train [54][1650/3239]	Time 0.206 (0.615)	Data Time 0.001 (0.023)	Loss 2.5970 (2.6357)	Entropy 1.13831 (1.14049)	Top-1 acc 61.719 (60.538)	Top-5 acc 83.984 (81.541)	lr 0.01461
Train [54][1660/3239]	Time 2.424 (0.614)	Data Time 0.001 (0.023)	Loss 2.8748 (2.6361)	Entropy 1.13831 (1.14047)	Top-1 acc 54.688 (60.528)	Top-5 acc 77.344 (81.534)	lr 0.01461
Train [54][1670/3239]	Time 0.228 (0.611)	Data Time 0.001 (0.023)	Loss 2.8378 (2.6362)	Entropy 1.13831 (1.14046)	Top-1 acc 55.078 (60.530)	Top-5 acc 76.953 (81.526)	lr 0.01461
Train [54][1680/3239]	Time 0.233 (0.610)	Data Time 0.001 (0.023)	Loss 2.6585 (2.6364)	Entropy 1.13830 (1.14045)	Top-1 acc 62.500 (60.530)	Top-5 acc 83.203 (81.522)	lr 0.01461
Train [54][1690/3239]	Time 0.341 (0.610)	Data Time 0.001 (0.023)	Loss 2.6359 (2.6362)	Entropy 1.13831 (1.14043)	Top-1 acc 59.766 (60.534)	Top-5 acc 81.641 (81.527)	lr 0.01461
Train [54][1700/3239]	Time 0.233 (0.609)	Data Time 0.001 (0.023)	Loss 2.6766 (2.6362)	Entropy 1.13826 (1.14042)	Top-1 acc 58.594 (60.527)	Top-5 acc 81.641 (81.524)	lr 0.01461
Train [54][1710/3239]	Time 0.185 (0.608)	Data Time 0.001 (0.022)	Loss 2.5871 (2.6365)	Entropy 1.13824 (1.14041)	Top-1 acc 63.672 (60.525)	Top-5 acc 81.641 (81.518)	lr 0.01461
Train [54][1720/3239]	Time 0.230 (0.607)	Data Time 0.001 (0.022)	Loss 2.6212 (2.6366)	Entropy 1.13824 (1.14040)	Top-1 acc 62.109 (60.524)	Top-5 acc 82.812 (81.516)	lr 0.01461
Train [54][1730/3239]	Time 0.235 (0.606)	Data Time 0.001 (0.022)	Loss 2.6697 (2.6364)	Entropy 1.13822 (1.14038)	Top-1 acc 61.719 (60.530)	Top-5 acc 81.641 (81.517)	lr 0.01461
Train [54][1740/3239]	Time 0.328 (0.605)	Data Time 0.001 (0.022)	Loss 2.6216 (2.6363)	Entropy 1.13823 (1.14037)	Top-1 acc 61.328 (60.532)	Top-5 acc 81.641 (81.521)	lr 0.01460
Train [54][1750/3239]	Time 0.245 (0.605)	Data Time 0.001 (0.022)	Loss 2.6605 (2.6361)	Entropy 1.13819 (1.14036)	Top-1 acc 57.812 (60.528)	Top-5 acc 83.203 (81.528)	lr 0.01460
Train [54][1760/3239]	Time 0.248 (0.604)	Data Time 0.002 (0.022)	Loss 2.8715 (2.6362)	Entropy 1.13819 (1.14035)	Top-1 acc 51.562 (60.526)	Top-5 acc 76.953 (81.524)	lr 0.01460
Train [54][1770/3239]	Time 2.482 (0.603)	Data Time 0.001 (0.022)	Loss 2.7198 (2.6361)	Entropy 1.13819 (1.14034)	Top-1 acc 58.594 (60.533)	Top-5 acc 79.297 (81.522)	lr 0.01460
Train [54][1780/3239]	Time 0.238 (0.601)	Data Time 0.001 (0.022)	Loss 2.6317 (2.6363)	Entropy 1.13819 (1.14032)	Top-1 acc 58.594 (60.522)	Top-5 acc 80.469 (81.515)	lr 0.01460
Train [54][1790/3239]	Time 0.226 (0.600)	Data Time 0.001 (0.021)	Loss 2.6703 (2.6366)	Entropy 1.13813 (1.14031)	Top-1 acc 63.281 (60.519)	Top-5 acc 82.812 (81.512)	lr 0.01460
Train [54][1800/3239]	Time 0.237 (0.599)	Data Time 0.001 (0.021)	Loss 2.7007 (2.6366)	Entropy 1.13811 (1.14030)	Top-1 acc 57.422 (60.517)	Top-5 acc 82.422 (81.512)	lr 0.01460
Train [54][1810/3239]	Time 0.225 (0.599)	Data Time 0.001 (0.021)	Loss 2.5848 (2.6366)	Entropy 1.13810 (1.14029)	Top-1 acc 63.281 (60.517)	Top-5 acc 83.984 (81.515)	lr 0.01460
Train [54][1820/3239]	Time 0.250 (0.598)	Data Time 0.001 (0.021)	Loss 2.6370 (2.6369)	Entropy 1.13810 (1.14027)	Top-1 acc 60.156 (60.511)	Top-5 acc 81.250 (81.508)	lr 0.01460
Train [54][1830/3239]	Time 0.232 (0.597)	Data Time 0.001 (0.021)	Loss 2.7423 (2.6372)	Entropy 1.13807 (1.14026)	Top-1 acc 55.078 (60.507)	Top-5 acc 80.078 (81.504)	lr 0.01460
Train [54][1840/3239]	Time 0.229 (0.596)	Data Time 0.001 (0.021)	Loss 2.5696 (2.6371)	Entropy 1.13804 (1.14025)	Top-1 acc 63.672 (60.508)	Top-5 acc 83.203 (81.509)	lr 0.01459
Train [54][1850/3239]	Time 0.233 (0.596)	Data Time 0.001 (0.021)	Loss 2.6583 (2.6373)	Entropy 1.13803 (1.14024)	Top-1 acc 60.938 (60.504)	Top-5 acc 78.516 (81.498)	lr 0.01459
Train [54][1860/3239]	Time 0.238 (0.595)	Data Time 0.001 (0.021)	Loss 2.8186 (2.6377)	Entropy 1.13806 (1.14023)	Top-1 acc 57.422 (60.501)	Top-5 acc 77.344 (81.488)	lr 0.01459
Train [54][1870/3239]	Time 0.212 (0.594)	Data Time 0.001 (0.021)	Loss 2.7940 (2.6379)	Entropy 1.13804 (1.14022)	Top-1 acc 55.859 (60.497)	Top-5 acc 79.297 (81.484)	lr 0.01459
Train [54][1880/3239]	Time 2.523 (0.594)	Data Time 0.002 (0.021)	Loss 2.5917 (2.6379)	Entropy 1.13804 (1.14020)	Top-1 acc 61.328 (60.495)	Top-5 acc 81.641 (81.481)	lr 0.01459
Train [54][1890/3239]	Time 0.232 (0.592)	Data Time 0.001 (0.020)	Loss 2.5637 (2.6380)	Entropy 1.13800 (1.14019)	Top-1 acc 62.891 (60.491)	Top-5 acc 81.250 (81.476)	lr 0.01459
Train [54][1900/3239]	Time 0.224 (0.591)	Data Time 0.001 (0.020)	Loss 2.5150 (2.6381)	Entropy 1.13798 (1.14018)	Top-1 acc 63.672 (60.493)	Top-5 acc 82.812 (81.474)	lr 0.01459
Train [54][1910/3239]	Time 0.224 (0.590)	Data Time 0.001 (0.020)	Loss 2.5544 (2.6379)	Entropy 1.13794 (1.14017)	Top-1 acc 58.984 (60.488)	Top-5 acc 83.203 (81.476)	lr 0.01459
Train [54][1920/3239]	Time 0.235 (0.590)	Data Time 0.001 (0.020)	Loss 2.6758 (2.6380)	Entropy 1.13792 (1.14016)	Top-1 acc 59.375 (60.479)	Top-5 acc 80.859 (81.473)	lr 0.01459
Train [54][1930/3239]	Time 0.609 (0.614)	Data Time 0.002 (0.020)	Loss 2.5500 (2.6381)	Entropy 1.13795 (1.14015)	Top-1 acc 59.766 (60.482)	Top-5 acc 83.594 (81.469)	lr 0.01459
Train [54][1940/3239]	Time 0.241 (0.613)	Data Time 0.002 (0.020)	Loss 2.5948 (2.6382)	Entropy 1.13795 (1.14013)	Top-1 acc 62.891 (60.476)	Top-5 acc 82.031 (81.467)	lr 0.01459
Train [54][1950/3239]	Time 0.228 (0.612)	Data Time 0.001 (0.020)	Loss 2.6368 (2.6383)	Entropy 1.13791 (1.14012)	Top-1 acc 60.938 (60.478)	Top-5 acc 80.859 (81.464)	lr 0.01458
Train [54][1960/3239]	Time 0.214 (0.612)	Data Time 0.001 (0.020)	Loss 2.7635 (2.6384)	Entropy 1.13782 (1.14011)	Top-1 acc 55.859 (60.471)	Top-5 acc 80.469 (81.463)	lr 0.01458
Train [54][1970/3239]	Time 0.229 (0.611)	Data Time 0.001 (0.020)	Loss 2.7276 (2.6384)	Entropy 1.13775 (1.14010)	Top-1 acc 55.078 (60.468)	Top-5 acc 81.250 (81.465)	lr 0.01458
Train [54][1980/3239]	Time 0.347 (0.610)	Data Time 0.001 (0.020)	Loss 2.4326 (2.6383)	Entropy 1.13771 (1.14009)	Top-1 acc 67.188 (60.475)	Top-5 acc 87.109 (81.467)	lr 0.01458
Train [54][1990/3239]	Time 2.501 (0.609)	Data Time 0.001 (0.019)	Loss 2.6257 (2.6385)	Entropy 1.13771 (1.14008)	Top-1 acc 60.547 (60.471)	Top-5 acc 83.203 (81.462)	lr 0.01458
Train [54][2000/3239]	Time 0.226 (0.608)	Data Time 0.001 (0.019)	Loss 2.5321 (2.6385)	Entropy 1.13764 (1.14006)	Top-1 acc 65.234 (60.472)	Top-5 acc 83.594 (81.462)	lr 0.01458
Train [54][2010/3239]	Time 0.226 (0.607)	Data Time 0.001 (0.019)	Loss 2.5331 (2.6386)	Entropy 1.13760 (1.14005)	Top-1 acc 60.938 (60.467)	Top-5 acc 82.422 (81.459)	lr 0.01458
Train [54][2020/3239]	Time 0.256 (0.606)	Data Time 0.001 (0.019)	Loss 2.6411 (2.6387)	Entropy 1.13755 (1.14004)	Top-1 acc 58.984 (60.466)	Top-5 acc 80.469 (81.455)	lr 0.01458
Train [54][2030/3239]	Time 0.321 (0.605)	Data Time 0.001 (0.019)	Loss 2.5565 (2.6383)	Entropy 1.13755 (1.14003)	Top-1 acc 61.719 (60.483)	Top-5 acc 81.641 (81.461)	lr 0.01458
Train [54][2040/3239]	Time 0.213 (0.605)	Data Time 0.001 (0.019)	Loss 2.6869 (2.6383)	Entropy 1.13758 (1.14001)	Top-1 acc 59.766 (60.480)	Top-5 acc 79.297 (81.463)	lr 0.01458
Train [54][2050/3239]	Time 0.220 (0.604)	Data Time 0.001 (0.019)	Loss 2.6519 (2.6382)	Entropy 1.13758 (1.14000)	Top-1 acc 60.938 (60.485)	Top-5 acc 82.422 (81.463)	lr 0.01457
Train [54][2060/3239]	Time 0.194 (0.603)	Data Time 0.001 (0.019)	Loss 2.6675 (2.6384)	Entropy 1.13753 (1.13999)	Top-1 acc 59.375 (60.477)	Top-5 acc 80.859 (81.458)	lr 0.01457
Train [54][2070/3239]	Time 0.226 (0.603)	Data Time 0.001 (0.019)	Loss 2.6052 (2.6383)	Entropy 1.13750 (1.13998)	Top-1 acc 62.500 (60.485)	Top-5 acc 83.203 (81.460)	lr 0.01457
Train [54][2080/3239]	Time 0.288 (0.602)	Data Time 0.001 (0.019)	Loss 2.9779 (2.6383)	Entropy 1.13752 (1.13997)	Top-1 acc 56.641 (60.488)	Top-5 acc 73.438 (81.458)	lr 0.01457
Train [54][2090/3239]	Time 0.257 (0.601)	Data Time 0.001 (0.019)	Loss 2.6258 (2.6384)	Entropy 1.13750 (1.13996)	Top-1 acc 63.281 (60.487)	Top-5 acc 80.078 (81.454)	lr 0.01457
Train [54][2100/3239]	Time 2.390 (0.601)	Data Time 0.001 (0.019)	Loss 2.7830 (2.6384)	Entropy 1.13750 (1.13994)	Top-1 acc 57.422 (60.493)	Top-5 acc 78.125 (81.456)	lr 0.01457
Train [54][2110/3239]	Time 0.228 (0.599)	Data Time 0.001 (0.018)	Loss 2.4878 (2.6386)	Entropy 1.13752 (1.13993)	Top-1 acc 64.844 (60.487)	Top-5 acc 85.938 (81.452)	lr 0.01457
Train [54][2120/3239]	Time 0.236 (0.598)	Data Time 0.001 (0.018)	Loss 2.4006 (2.6383)	Entropy 1.13752 (1.13992)	Top-1 acc 66.016 (60.493)	Top-5 acc 84.375 (81.454)	lr 0.01457
Train [54][2130/3239]	Time 0.341 (0.598)	Data Time 0.001 (0.018)	Loss 2.3726 (2.6384)	Entropy 1.13750 (1.13991)	Top-1 acc 66.797 (60.491)	Top-5 acc 87.109 (81.453)	lr 0.01457
Train [54][2140/3239]	Time 0.232 (0.597)	Data Time 0.001 (0.018)	Loss 2.8875 (2.6386)	Entropy 1.13751 (1.13990)	Top-1 acc 54.688 (60.483)	Top-5 acc 74.609 (81.447)	lr 0.01457
Train [54][2150/3239]	Time 0.222 (0.597)	Data Time 0.001 (0.018)	Loss 2.4864 (2.6385)	Entropy 1.13743 (1.13989)	Top-1 acc 63.672 (60.491)	Top-5 acc 84.375 (81.450)	lr 0.01456
Train [54][2160/3239]	Time 0.213 (0.596)	Data Time 0.001 (0.018)	Loss 2.8312 (2.6383)	Entropy 1.13738 (1.13988)	Top-1 acc 55.078 (60.494)	Top-5 acc 80.078 (81.457)	lr 0.01456
Train [54][2170/3239]	Time 0.243 (0.595)	Data Time 0.002 (0.018)	Loss 2.5282 (2.6384)	Entropy 1.13737 (1.13986)	Top-1 acc 63.672 (60.490)	Top-5 acc 84.766 (81.455)	lr 0.01456
Train [54][2180/3239]	Time 0.306 (0.595)	Data Time 0.001 (0.018)	Loss 2.5827 (2.6385)	Entropy 1.13728 (1.13985)	Top-1 acc 57.422 (60.487)	Top-5 acc 83.594 (81.452)	lr 0.01456
Train [54][2190/3239]	Time 0.217 (0.594)	Data Time 0.001 (0.018)	Loss 2.5553 (2.6387)	Entropy 1.13726 (1.13984)	Top-1 acc 60.938 (60.483)	Top-5 acc 82.031 (81.449)	lr 0.01456
Train [54][2200/3239]	Time 0.218 (0.594)	Data Time 0.001 (0.018)	Loss 2.6025 (2.6388)	Entropy 1.13723 (1.13983)	Top-1 acc 62.109 (60.476)	Top-5 acc 80.469 (81.444)	lr 0.01456
Train [54][2210/3239]	Time 2.424 (0.593)	Data Time 0.001 (0.018)	Loss 2.6572 (2.6387)	Entropy 1.13723 (1.13982)	Top-1 acc 60.938 (60.477)	Top-5 acc 79.297 (81.450)	lr 0.01456
Train [54][2220/3239]	Time 0.245 (0.591)	Data Time 0.002 (0.018)	Loss 2.5692 (2.6387)	Entropy 1.13721 (1.13981)	Top-1 acc 59.766 (60.477)	Top-5 acc 82.422 (81.450)	lr 0.01456
Train [54][2230/3239]	Time 0.333 (0.591)	Data Time 0.001 (0.018)	Loss 2.4842 (2.6387)	Entropy 1.13716 (1.13979)	Top-1 acc 60.156 (60.475)	Top-5 acc 85.547 (81.448)	lr 0.01456
Train [54][2240/3239]	Time 0.218 (0.590)	Data Time 0.001 (0.017)	Loss 2.6007 (2.6385)	Entropy 1.13717 (1.13978)	Top-1 acc 61.719 (60.477)	Top-5 acc 82.812 (81.450)	lr 0.01456
Train [54][2250/3239]	Time 0.239 (0.590)	Data Time 0.001 (0.017)	Loss 2.5061 (2.6386)	Entropy 1.13714 (1.13977)	Top-1 acc 62.500 (60.470)	Top-5 acc 83.984 (81.447)	lr 0.01455
Train [54][2260/3239]	Time 0.225 (0.589)	Data Time 0.001 (0.017)	Loss 2.7129 (2.6389)	Entropy 1.13712 (1.13976)	Top-1 acc 60.156 (60.465)	Top-5 acc 80.469 (81.444)	lr 0.01455
Train [54][2270/3239]	Time 0.219 (0.589)	Data Time 0.001 (0.017)	Loss 2.6813 (2.6388)	Entropy 1.13710 (1.13975)	Top-1 acc 60.156 (60.468)	Top-5 acc 81.250 (81.445)	lr 0.01455
Train [54][2280/3239]	Time 0.229 (0.588)	Data Time 0.001 (0.017)	Loss 2.7913 (2.6387)	Entropy 1.13712 (1.13974)	Top-1 acc 56.641 (60.469)	Top-5 acc 79.688 (81.447)	lr 0.01455
Train [54][2290/3239]	Time 0.329 (0.609)	Data Time 0.003 (0.017)	Loss 2.5976 (2.6387)	Entropy 1.13706 (1.13972)	Top-1 acc 64.844 (60.468)	Top-5 acc 82.031 (81.449)	lr 0.01455
Train [54][2300/3239]	Time 0.221 (0.609)	Data Time 0.002 (0.017)	Loss 2.6524 (2.6387)	Entropy 1.13707 (1.13971)	Top-1 acc 60.156 (60.467)	Top-5 acc 83.203 (81.451)	lr 0.01455
Train [54][2310/3239]	Time 0.252 (0.608)	Data Time 0.002 (0.017)	Loss 2.4303 (2.6387)	Entropy 1.13703 (1.13970)	Top-1 acc 65.234 (60.466)	Top-5 acc 87.109 (81.454)	lr 0.01455
Train [54][2320/3239]	Time 2.558 (0.607)	Data Time 0.002 (0.017)	Loss 2.7069 (2.6388)	Entropy 1.13703 (1.13969)	Top-1 acc 60.156 (60.463)	Top-5 acc 81.250 (81.451)	lr 0.01455
Train [54][2330/3239]	Time 0.218 (0.606)	Data Time 0.002 (0.017)	Loss 2.4477 (2.6387)	Entropy 1.13690 (1.13968)	Top-1 acc 66.406 (60.468)	Top-5 acc 83.984 (81.454)	lr 0.01455
Train [54][2340/3239]	Time 0.252 (0.605)	Data Time 0.002 (0.017)	Loss 2.6895 (2.6388)	Entropy 1.13688 (1.13967)	Top-1 acc 60.938 (60.468)	Top-5 acc 79.688 (81.453)	lr 0.01455
Train [54][2350/3239]	Time 0.251 (0.605)	Data Time 0.001 (0.017)	Loss 2.6803 (2.6386)	Entropy 1.13681 (1.13965)	Top-1 acc 59.375 (60.474)	Top-5 acc 83.203 (81.456)	lr 0.01454
Train [54][2360/3239]	Time 0.224 (0.604)	Data Time 0.001 (0.017)	Loss 2.6867 (2.6388)	Entropy 1.13676 (1.13964)	Top-1 acc 61.719 (60.467)	Top-5 acc 79.688 (81.453)	lr 0.01454
Train [54][2370/3239]	Time 0.360 (0.604)	Data Time 0.001 (0.017)	Loss 2.4979 (2.6386)	Entropy 1.13678 (1.13963)	Top-1 acc 62.891 (60.466)	Top-5 acc 84.375 (81.454)	lr 0.01454
Train [54][2380/3239]	Time 0.213 (0.603)	Data Time 0.001 (0.017)	Loss 2.4423 (2.6383)	Entropy 1.13676 (1.13962)	Top-1 acc 66.016 (60.472)	Top-5 acc 84.766 (81.459)	lr 0.01454
Train [54][2390/3239]	Time 0.248 (0.602)	Data Time 0.001 (0.017)	Loss 2.7774 (2.6386)	Entropy 1.13672 (1.13961)	Top-1 acc 57.031 (60.468)	Top-5 acc 76.953 (81.457)	lr 0.01454
Train [54][2400/3239]	Time 0.220 (0.602)	Data Time 0.002 (0.017)	Loss 2.8110 (2.6387)	Entropy 1.13669 (1.13959)	Top-1 acc 58.203 (60.466)	Top-5 acc 76.172 (81.452)	lr 0.01454
Train [54][2410/3239]	Time 0.245 (0.601)	Data Time 0.001 (0.016)	Loss 2.5906 (2.6388)	Entropy 1.13665 (1.13958)	Top-1 acc 62.500 (60.463)	Top-5 acc 81.641 (81.450)	lr 0.01454
Train [54][2420/3239]	Time 0.366 (0.601)	Data Time 0.001 (0.016)	Loss 2.7732 (2.6388)	Entropy 1.13659 (1.13957)	Top-1 acc 59.375 (60.463)	Top-5 acc 80.078 (81.447)	lr 0.01454
Train [54][2430/3239]	Time 2.537 (0.600)	Data Time 0.002 (0.016)	Loss 2.7489 (2.6387)	Entropy 1.13659 (1.13956)	Top-1 acc 57.422 (60.463)	Top-5 acc 79.688 (81.450)	lr 0.01454
Train [54][2440/3239]	Time 0.236 (0.599)	Data Time 0.001 (0.016)	Loss 2.7462 (2.6387)	Entropy 1.13658 (1.13954)	Top-1 acc 60.547 (60.463)	Top-5 acc 80.469 (81.447)	lr 0.01454
Train [54][2450/3239]	Time 0.185 (0.598)	Data Time 0.001 (0.016)	Loss 2.6701 (2.6386)	Entropy 1.13656 (1.13953)	Top-1 acc 60.938 (60.466)	Top-5 acc 79.688 (81.448)	lr 0.01453
Train [54][2460/3239]	Time 0.220 (0.598)	Data Time 0.001 (0.016)	Loss 2.8066 (2.6389)	Entropy 1.13653 (1.13952)	Top-1 acc 56.250 (60.458)	Top-5 acc 80.859 (81.443)	lr 0.01453
Train [54][2470/3239]	Time 0.340 (0.597)	Data Time 0.002 (0.016)	Loss 2.6681 (2.6388)	Entropy 1.13655 (1.13951)	Top-1 acc 58.594 (60.456)	Top-5 acc 83.203 (81.446)	lr 0.01453
Train [54][2480/3239]	Time 0.239 (0.597)	Data Time 0.001 (0.016)	Loss 3.0715 (2.6390)	Entropy 1.13654 (1.13950)	Top-1 acc 48.047 (60.451)	Top-5 acc 70.703 (81.440)	lr 0.01453
Train [54][2490/3239]	Time 0.214 (0.596)	Data Time 0.001 (0.016)	Loss 2.5661 (2.6388)	Entropy 1.13652 (1.13948)	Top-1 acc 62.891 (60.457)	Top-5 acc 81.641 (81.442)	lr 0.01453
Train [54][2500/3239]	Time 0.241 (0.596)	Data Time 0.001 (0.016)	Loss 2.6194 (2.6388)	Entropy 1.13653 (1.13947)	Top-1 acc 60.547 (60.457)	Top-5 acc 82.812 (81.443)	lr 0.01453
Train [54][2510/3239]	Time 0.244 (0.595)	Data Time 0.001 (0.016)	Loss 2.5888 (2.6387)	Entropy 1.13653 (1.13946)	Top-1 acc 60.938 (60.457)	Top-5 acc 81.641 (81.443)	lr 0.01453
Train [54][2520/3239]	Time 0.236 (0.595)	Data Time 0.001 (0.016)	Loss 2.5494 (2.6390)	Entropy 1.13655 (1.13945)	Top-1 acc 61.328 (60.449)	Top-5 acc 83.203 (81.437)	lr 0.01453
Train [54][2530/3239]	Time 0.239 (0.594)	Data Time 0.001 (0.016)	Loss 2.5300 (2.6390)	Entropy 1.13651 (1.13944)	Top-1 acc 61.719 (60.452)	Top-5 acc 82.422 (81.440)	lr 0.01453
Train [54][2540/3239]	Time 2.587 (0.594)	Data Time 0.001 (0.016)	Loss 2.7170 (2.6390)	Entropy 1.13651 (1.13943)	Top-1 acc 56.250 (60.452)	Top-5 acc 81.641 (81.442)	lr 0.01453
Train [54][2550/3239]	Time 0.280 (0.593)	Data Time 0.001 (0.016)	Loss 2.6610 (2.6390)	Entropy 1.13650 (1.13941)	Top-1 acc 55.859 (60.446)	Top-5 acc 79.688 (81.442)	lr 0.01452
Train [54][2560/3239]	Time 0.405 (0.592)	Data Time 0.002 (0.016)	Loss 2.8476 (2.6391)	Entropy 1.13648 (1.13940)	Top-1 acc 55.469 (60.445)	Top-5 acc 79.688 (81.438)	lr 0.01452
Train [54][2570/3239]	Time 0.236 (0.592)	Data Time 0.001 (0.016)	Loss 2.6132 (2.6393)	Entropy 1.13643 (1.13939)	Top-1 acc 60.938 (60.439)	Top-5 acc 81.250 (81.437)	lr 0.01452
Train [54][2580/3239]	Time 0.237 (0.591)	Data Time 0.001 (0.015)	Loss 2.6753 (2.6393)	Entropy 1.13641 (1.13938)	Top-1 acc 61.328 (60.437)	Top-5 acc 78.906 (81.437)	lr 0.01452
Train [54][2590/3239]	Time 0.236 (0.591)	Data Time 0.001 (0.015)	Loss 2.7989 (2.6395)	Entropy 1.13636 (1.13937)	Top-1 acc 58.203 (60.433)	Top-5 acc 78.516 (81.431)	lr 0.01452
Train [54][2600/3239]	Time 0.229 (0.590)	Data Time 0.001 (0.015)	Loss 2.5499 (2.6395)	Entropy 1.13629 (1.13936)	Top-1 acc 64.844 (60.432)	Top-5 acc 80.469 (81.428)	lr 0.01452
Train [54][2610/3239]	Time 0.367 (0.590)	Data Time 0.003 (0.015)	Loss 2.6923 (2.6397)	Entropy 1.13629 (1.13935)	Top-1 acc 57.422 (60.430)	Top-5 acc 82.031 (81.426)	lr 0.01452
Train [54][2620/3239]	Time 0.225 (0.589)	Data Time 0.002 (0.015)	Loss 2.7970 (2.6400)	Entropy 1.13630 (1.13933)	Top-1 acc 55.469 (60.417)	Top-5 acc 76.953 (81.420)	lr 0.01452
Train [54][2630/3239]	Time 0.239 (0.589)	Data Time 0.001 (0.015)	Loss 2.5829 (2.6400)	Entropy 1.13627 (1.13932)	Top-1 acc 59.375 (60.416)	Top-5 acc 82.812 (81.418)	lr 0.01452
Train [54][2640/3239]	Time 0.226 (0.589)	Data Time 0.001 (0.015)	Loss 2.6491 (2.6401)	Entropy 1.13629 (1.13931)	Top-1 acc 60.156 (60.417)	Top-5 acc 81.641 (81.417)	lr 0.01452
Train [54][2650/3239]	Time 0.265 (0.606)	Data Time 0.004 (0.015)	Loss 2.4601 (2.6400)	Entropy 1.13624 (1.13930)	Top-1 acc 63.281 (60.414)	Top-5 acc 86.719 (81.421)	lr 0.01451
Train [54][2660/3239]	Time 0.320 (0.606)	Data Time 0.002 (0.015)	Loss 2.7835 (2.6402)	Entropy 1.13619 (1.13929)	Top-1 acc 58.984 (60.411)	Top-5 acc 76.953 (81.415)	lr 0.01451
Train [54][2670/3239]	Time 0.215 (0.605)	Data Time 0.001 (0.015)	Loss 2.6310 (2.6406)	Entropy 1.13619 (1.13928)	Top-1 acc 62.109 (60.405)	Top-5 acc 80.078 (81.407)	lr 0.01451
Train [54][2680/3239]	Time 0.254 (0.605)	Data Time 0.002 (0.015)	Loss 2.8425 (2.6406)	Entropy 1.13617 (1.13926)	Top-1 acc 56.250 (60.405)	Top-5 acc 78.125 (81.407)	lr 0.01451
Train [54][2690/3239]	Time 0.242 (0.604)	Data Time 0.001 (0.015)	Loss 2.6222 (2.6405)	Entropy 1.13617 (1.13925)	Top-1 acc 60.156 (60.411)	Top-5 acc 81.641 (81.407)	lr 0.01451
Train [54][2700/3239]	Time 0.241 (0.604)	Data Time 0.001 (0.015)	Loss 2.4926 (2.6403)	Entropy 1.13613 (1.13924)	Top-1 acc 64.844 (60.415)	Top-5 acc 84.375 (81.414)	lr 0.01451
Train [54][2710/3239]	Time 0.233 (0.603)	Data Time 0.001 (0.015)	Loss 2.6654 (2.6403)	Entropy 1.13611 (1.13923)	Top-1 acc 60.156 (60.415)	Top-5 acc 78.906 (81.416)	lr 0.01451
Train [54][2720/3239]	Time 0.231 (0.603)	Data Time 0.001 (0.015)	Loss 2.7296 (2.6403)	Entropy 1.13607 (1.13922)	Top-1 acc 60.547 (60.413)	Top-5 acc 78.125 (81.417)	lr 0.01451
Train [54][2730/3239]	Time 0.224 (0.602)	Data Time 0.001 (0.015)	Loss 2.5170 (2.6404)	Entropy 1.13601 (1.13921)	Top-1 acc 64.062 (60.412)	Top-5 acc 84.375 (81.416)	lr 0.01451
Train [54][2740/3239]	Time 0.216 (0.602)	Data Time 0.002 (0.015)	Loss 2.6318 (2.6404)	Entropy 1.13596 (1.13920)	Top-1 acc 60.156 (60.417)	Top-5 acc 82.812 (81.417)	lr 0.01451
Train [54][2750/3239]	Time 0.308 (0.601)	Data Time 0.001 (0.015)	Loss 2.7946 (2.6405)	Entropy 1.13593 (1.13918)	Top-1 acc 56.250 (60.414)	Top-5 acc 76.562 (81.415)	lr 0.01450
Train [54][2760/3239]	Time 0.222 (0.601)	Data Time 0.001 (0.015)	Loss 2.6439 (2.6403)	Entropy 1.13593 (1.13917)	Top-1 acc 59.766 (60.416)	Top-5 acc 83.984 (81.416)	lr 0.01450
Train [54][2770/3239]	Time 0.264 (0.600)	Data Time 0.003 (0.015)	Loss 2.6765 (2.6402)	Entropy 1.13588 (1.13916)	Top-1 acc 61.719 (60.418)	Top-5 acc 79.297 (81.420)	lr 0.01450
Train [54][2780/3239]	Time 0.253 (0.600)	Data Time 0.001 (0.014)	Loss 2.5939 (2.6402)	Entropy 1.13588 (1.13915)	Top-1 acc 58.984 (60.415)	Top-5 acc 82.422 (81.419)	lr 0.01450
Train [54][2790/3239]	Time 0.219 (0.599)	Data Time 0.001 (0.014)	Loss 2.5936 (2.6401)	Entropy 1.13586 (1.13914)	Top-1 acc 63.672 (60.415)	Top-5 acc 82.031 (81.418)	lr 0.01450
Train [54][2800/3239]	Time 0.303 (0.599)	Data Time 0.002 (0.014)	Loss 2.5427 (2.6401)	Entropy 1.13577 (1.13912)	Top-1 acc 64.062 (60.418)	Top-5 acc 82.812 (81.419)	lr 0.01450
Train [54][2810/3239]	Time 0.208 (0.598)	Data Time 0.001 (0.014)	Loss 2.8640 (2.6401)	Entropy 1.13576 (1.13911)	Top-1 acc 56.641 (60.420)	Top-5 acc 76.562 (81.416)	lr 0.01450
Train [54][2820/3239]	Time 0.264 (0.598)	Data Time 0.001 (0.014)	Loss 2.5082 (2.6402)	Entropy 1.13575 (1.13910)	Top-1 acc 65.625 (60.419)	Top-5 acc 83.594 (81.416)	lr 0.01450
Train [54][2830/3239]	Time 0.296 (0.597)	Data Time 0.001 (0.014)	Loss 2.7995 (2.6404)	Entropy 1.13565 (1.13909)	Top-1 acc 55.859 (60.413)	Top-5 acc 79.297 (81.411)	lr 0.01450
Train [54][2840/3239]	Time 0.226 (0.597)	Data Time 0.001 (0.014)	Loss 2.8065 (2.6404)	Entropy 1.13568 (1.13908)	Top-1 acc 57.422 (60.414)	Top-5 acc 78.906 (81.413)	lr 0.01450
Train [54][2850/3239]	Time 0.249 (0.597)	Data Time 0.001 (0.014)	Loss 2.5406 (2.6404)	Entropy 1.13559 (1.13906)	Top-1 acc 60.938 (60.413)	Top-5 acc 82.422 (81.414)	lr 0.01449
Train [54][2860/3239]	Time 0.235 (0.596)	Data Time 0.001 (0.014)	Loss 2.5073 (2.6402)	Entropy 1.13552 (1.13905)	Top-1 acc 64.453 (60.419)	Top-5 acc 84.766 (81.414)	lr 0.01449
Train [54][2870/3239]	Time 0.228 (0.596)	Data Time 0.001 (0.014)	Loss 2.5810 (2.6403)	Entropy 1.13555 (1.13904)	Top-1 acc 62.109 (60.419)	Top-5 acc 83.203 (81.413)	lr 0.01449
Train [54][2880/3239]	Time 0.240 (0.595)	Data Time 0.001 (0.014)	Loss 2.5286 (2.6401)	Entropy 1.13557 (1.13903)	Top-1 acc 61.328 (60.423)	Top-5 acc 83.984 (81.415)	lr 0.01449
Train [54][2890/3239]	Time 0.319 (0.595)	Data Time 0.001 (0.014)	Loss 2.8826 (2.6402)	Entropy 1.13559 (1.13902)	Top-1 acc 60.938 (60.421)	Top-5 acc 77.734 (81.412)	lr 0.01449
Train [54][2900/3239]	Time 0.193 (0.594)	Data Time 0.001 (0.014)	Loss 2.5366 (2.6403)	Entropy 1.13564 (1.13900)	Top-1 acc 62.891 (60.421)	Top-5 acc 82.422 (81.410)	lr 0.01449
Train [54][2910/3239]	Time 0.289 (0.594)	Data Time 0.001 (0.014)	Loss 2.6246 (2.6401)	Entropy 1.13565 (1.13899)	Top-1 acc 59.375 (60.424)	Top-5 acc 82.422 (81.416)	lr 0.01449
Train [54][2920/3239]	Time 0.257 (0.594)	Data Time 0.001 (0.014)	Loss 2.4478 (2.6400)	Entropy 1.13566 (1.13898)	Top-1 acc 67.188 (60.424)	Top-5 acc 83.984 (81.419)	lr 0.01449
Train [54][2930/3239]	Time 0.226 (0.593)	Data Time 0.001 (0.014)	Loss 2.6341 (2.6400)	Entropy 1.13564 (1.13897)	Top-1 acc 58.203 (60.420)	Top-5 acc 80.859 (81.419)	lr 0.01449
Train [54][2940/3239]	Time 0.239 (0.593)	Data Time 0.001 (0.014)	Loss 2.6380 (2.6400)	Entropy 1.13561 (1.13896)	Top-1 acc 63.672 (60.419)	Top-5 acc 79.297 (81.421)	lr 0.01449
Train [54][2950/3239]	Time 0.217 (0.592)	Data Time 0.001 (0.014)	Loss 2.8088 (2.6402)	Entropy 1.13559 (1.13895)	Top-1 acc 58.203 (60.414)	Top-5 acc 76.953 (81.417)	lr 0.01448
Train [54][2960/3239]	Time 0.277 (0.592)	Data Time 0.001 (0.014)	Loss 2.5213 (2.6401)	Entropy 1.13559 (1.13894)	Top-1 acc 60.938 (60.413)	Top-5 acc 83.203 (81.416)	lr 0.01448
Train [54][2970/3239]	Time 0.228 (0.591)	Data Time 0.002 (0.014)	Loss 2.7412 (2.6401)	Entropy 1.13556 (1.13893)	Top-1 acc 60.547 (60.414)	Top-5 acc 79.688 (81.416)	lr 0.01448
Train [54][2980/3239]	Time 0.602 (0.606)	Data Time 0.004 (0.014)	Loss 2.7012 (2.6402)	Entropy 1.13552 (1.13891)	Top-1 acc 57.422 (60.410)	Top-5 acc 79.688 (81.413)	lr 0.01448
Train [54][2990/3239]	Time 0.227 (0.606)	Data Time 0.002 (0.014)	Loss 2.7610 (2.6402)	Entropy 1.13547 (1.13890)	Top-1 acc 60.156 (60.411)	Top-5 acc 79.297 (81.413)	lr 0.01448
Train [54][3000/3239]	Time 0.239 (0.606)	Data Time 0.002 (0.014)	Loss 2.7423 (2.6403)	Entropy 1.13538 (1.13889)	Top-1 acc 59.375 (60.413)	Top-5 acc 78.125 (81.411)	lr 0.01448
Train [54][3010/3239]	Time 0.247 (0.605)	Data Time 0.001 (0.014)	Loss 2.6426 (2.6404)	Entropy 1.13536 (1.13888)	Top-1 acc 62.891 (60.414)	Top-5 acc 80.078 (81.407)	lr 0.01448
Train [54][3020/3239]	Time 0.226 (0.605)	Data Time 0.002 (0.013)	Loss 2.4134 (2.6403)	Entropy 1.13533 (1.13887)	Top-1 acc 65.234 (60.416)	Top-5 acc 87.500 (81.413)	lr 0.01448
Train [54][3030/3239]	Time 0.254 (0.605)	Data Time 0.001 (0.013)	Loss 2.5143 (2.6403)	Entropy 1.13530 (1.13886)	Top-1 acc 58.594 (60.415)	Top-5 acc 83.984 (81.412)	lr 0.01448
Train [54][3040/3239]	Time 0.232 (0.604)	Data Time 0.002 (0.013)	Loss 2.5818 (2.6402)	Entropy 1.13530 (1.13884)	Top-1 acc 61.328 (60.416)	Top-5 acc 80.078 (81.413)	lr 0.01448
Train [54][3050/3239]	Time 0.237 (0.604)	Data Time 0.001 (0.013)	Loss 2.6355 (2.6403)	Entropy 1.13528 (1.13883)	Top-1 acc 61.328 (60.414)	Top-5 acc 79.688 (81.411)	lr 0.01447
Train [54][3060/3239]	Time 0.242 (0.603)	Data Time 0.002 (0.013)	Loss 2.6587 (2.6402)	Entropy 1.13528 (1.13882)	Top-1 acc 57.812 (60.416)	Top-5 acc 80.078 (81.412)	lr 0.01447
Train [54][3070/3239]	Time 0.291 (0.603)	Data Time 0.001 (0.013)	Loss 2.6052 (2.6402)	Entropy 1.13527 (1.13881)	Top-1 acc 59.375 (60.415)	Top-5 acc 82.031 (81.412)	lr 0.01447
Train [54][3080/3239]	Time 0.227 (0.602)	Data Time 0.001 (0.013)	Loss 2.7852 (2.6403)	Entropy 1.13527 (1.13880)	Top-1 acc 55.078 (60.414)	Top-5 acc 80.078 (81.411)	lr 0.01447
Train [54][3090/3239]	Time 0.214 (0.602)	Data Time 0.001 (0.013)	Loss 2.6830 (2.6405)	Entropy 1.13524 (1.13879)	Top-1 acc 56.641 (60.413)	Top-5 acc 80.469 (81.407)	lr 0.01447
Train [54][3100/3239]	Time 0.226 (0.602)	Data Time 0.001 (0.013)	Loss 2.6156 (2.6405)	Entropy 1.13522 (1.13878)	Top-1 acc 56.250 (60.406)	Top-5 acc 82.812 (81.408)	lr 0.01447
Train [54][3110/3239]	Time 0.214 (0.601)	Data Time 0.001 (0.013)	Loss 2.5961 (2.6405)	Entropy 1.13518 (1.13876)	Top-1 acc 58.984 (60.406)	Top-5 acc 80.859 (81.407)	lr 0.01447
Train [54][3120/3239]	Time 0.326 (0.601)	Data Time 0.001 (0.013)	Loss 2.8056 (2.6404)	Entropy 1.13516 (1.13875)	Top-1 acc 56.250 (60.408)	Top-5 acc 77.734 (81.409)	lr 0.01447
Train [54][3130/3239]	Time 0.227 (0.600)	Data Time 0.001 (0.013)	Loss 2.5488 (2.6403)	Entropy 1.13518 (1.13874)	Top-1 acc 62.109 (60.414)	Top-5 acc 83.594 (81.412)	lr 0.01447
Train [54][3140/3239]	Time 0.235 (0.600)	Data Time 0.001 (0.013)	Loss 2.6851 (2.6405)	Entropy 1.13514 (1.13873)	Top-1 acc 57.812 (60.409)	Top-5 acc 81.641 (81.408)	lr 0.01447
Train [54][3150/3239]	Time 0.244 (0.599)	Data Time 0.001 (0.013)	Loss 2.4279 (2.6405)	Entropy 1.13515 (1.13872)	Top-1 acc 67.969 (60.405)	Top-5 acc 83.203 (81.407)	lr 0.01446
Train [54][3160/3239]	Time 0.259 (0.599)	Data Time 0.001 (0.013)	Loss 2.5536 (2.6405)	Entropy 1.13509 (1.13871)	Top-1 acc 62.891 (60.405)	Top-5 acc 81.641 (81.407)	lr 0.01446
Train [54][3170/3239]	Time 0.360 (0.599)	Data Time 0.002 (0.013)	Loss 2.4824 (2.6404)	Entropy 1.13509 (1.13870)	Top-1 acc 64.062 (60.404)	Top-5 acc 86.328 (81.410)	lr 0.01446
Train [54][3180/3239]	Time 0.229 (0.598)	Data Time 0.000 (0.013)	Loss 2.5799 (2.6405)	Entropy 1.13506 (1.13868)	Top-1 acc 62.500 (60.403)	Top-5 acc 84.375 (81.407)	lr 0.01446
Train [54][3190/3239]	Time 0.217 (0.598)	Data Time 0.000 (0.013)	Loss 2.6373 (2.6404)	Entropy 1.13505 (1.13867)	Top-1 acc 62.500 (60.405)	Top-5 acc 83.203 (81.408)	lr 0.01446
Train [54][3200/3239]	Time 0.229 (0.597)	Data Time 0.000 (0.013)	Loss 2.5199 (2.6403)	Entropy 1.13498 (1.13866)	Top-1 acc 60.938 (60.406)	Top-5 acc 81.250 (81.407)	lr 0.01446
Train [54][3210/3239]	Time 0.214 (0.597)	Data Time 0.000 (0.013)	Loss 3.5465 (2.6407)	Entropy 1.13491 (1.13865)	Top-1 acc 43.750 (60.397)	Top-5 acc 71.484 (81.400)	lr 0.01446
Train [54][3220/3239]	Time 0.289 (0.596)	Data Time 0.000 (0.013)	Loss 2.5781 (2.6408)	Entropy 1.13486 (1.13864)	Top-1 acc 59.375 (60.395)	Top-5 acc 83.203 (81.400)	lr 0.01446
Train [54][3230/3239]	Time 0.227 (0.596)	Data Time 0.000 (0.013)	Loss 2.7269 (2.6409)	Entropy 1.13484 (1.13863)	Top-1 acc 58.594 (60.391)	Top-5 acc 79.297 (81.399)	lr 0.01446
Train [54][3239/3239]	Time 2.236 (0.595)	Data Time 0.000 (0.013)	Loss 2.7262 (2.6411)	Entropy 1.13484 (1.13862)	Top-1 acc 51.852 (60.388)	Top-5 acc 80.247 (81.396)	lr 0.01446
==========Valid [54/120]	loss 1.503	top-1 acc 65.869 (65.869)	top-5 acc 85.911	Train top-1 60.388	top-5 81.396	Entropy 1.13484	Latency-None: 0.000ms	Flops: 548.34M
Train [55][0/3239]	Time 37.399 (37.399)	Data Time 35.644 (35.644)	Loss 2.8235 (2.8235)	Entropy 1.13485 (1.13485)	Top-1 acc 56.641 (56.641)	Top-5 acc 78.906 (78.906)	lr 0.01446
Train [55][10/3239]	Time 2.415 (4.015)	Data Time 0.003 (3.353)	Loss 2.6963 (2.6680)	Entropy 1.13485 (1.13485)	Top-1 acc 62.891 (60.156)	Top-5 acc 79.297 (80.930)	lr 0.01445
Train [55][20/3239]	Time 0.209 (2.210)	Data Time 0.002 (1.757)	Loss 2.6935 (2.6414)	Entropy 1.13468 (1.13477)	Top-1 acc 60.156 (60.882)	Top-5 acc 81.250 (81.622)	lr 0.01445
Train [55][30/3239]	Time 0.222 (1.642)	Data Time 0.001 (1.191)	Loss 2.3206 (2.6108)	Entropy 1.13463 (1.13473)	Top-1 acc 65.234 (61.416)	Top-5 acc 86.719 (82.182)	lr 0.01445
Train [55][40/3239]	Time 0.245 (1.359)	Data Time 0.001 (0.901)	Loss 2.5702 (2.6097)	Entropy 1.13463 (1.13470)	Top-1 acc 61.328 (61.576)	Top-5 acc 82.812 (82.241)	lr 0.01445
Train [55][50/3239]	Time 0.246 (1.183)	Data Time 0.001 (0.724)	Loss 2.6688 (2.6018)	Entropy 1.13462 (1.13469)	Top-1 acc 61.719 (61.719)	Top-5 acc 80.859 (82.353)	lr 0.01445
Train [55][60/3239]	Time 0.234 (1.063)	Data Time 0.001 (0.606)	Loss 2.4876 (2.6022)	Entropy 1.13459 (1.13467)	Top-1 acc 62.891 (61.693)	Top-5 acc 83.203 (82.287)	lr 0.01445
Train [55][70/3239]	Time 0.225 (0.978)	Data Time 0.001 (0.521)	Loss 2.3753 (2.6005)	Entropy 1.13456 (1.13466)	Top-1 acc 66.797 (61.647)	Top-5 acc 86.328 (82.273)	lr 0.01445
Train [55][80/3239]	Time 0.442 (1.548)	Data Time 0.003 (0.457)	Loss 2.7176 (2.6061)	Entropy 1.13457 (1.13465)	Top-1 acc 60.547 (61.439)	Top-5 acc 81.250 (82.142)	lr 0.01445
Train [55][90/3239]	Time 0.256 (1.433)	Data Time 0.002 (0.407)	Loss 2.5320 (2.6025)	Entropy 1.13450 (1.13464)	Top-1 acc 62.891 (61.564)	Top-5 acc 83.203 (82.207)	lr 0.01445
Train [55][100/3239]	Time 0.206 (1.338)	Data Time 0.002 (0.367)	Loss 2.9246 (2.6011)	Entropy 1.13451 (1.13462)	Top-1 acc 54.688 (61.610)	Top-5 acc 75.391 (82.252)	lr 0.01445
Train [55][110/3239]	Time 0.226 (1.259)	Data Time 0.002 (0.334)	Loss 2.8898 (2.6028)	Entropy 1.13449 (1.13461)	Top-1 acc 53.906 (61.560)	Top-5 acc 76.562 (82.207)	lr 0.01444
Train [55][120/3239]	Time 2.447 (1.194)	Data Time 0.002 (0.306)	Loss 2.5426 (2.6043)	Entropy 1.13449 (1.13460)	Top-1 acc 62.500 (61.419)	Top-5 acc 82.031 (82.222)	lr 0.01444
Train [55][130/3239]	Time 0.394 (1.122)	Data Time 0.001 (0.283)	Loss 2.7150 (2.6082)	Entropy 1.13455 (1.13460)	Top-1 acc 58.203 (61.251)	Top-5 acc 81.641 (82.198)	lr 0.01444
Train [55][140/3239]	Time 0.230 (1.074)	Data Time 0.002 (0.263)	Loss 2.4778 (2.6069)	Entropy 1.13451 (1.13459)	Top-1 acc 65.234 (61.309)	Top-5 acc 84.375 (82.186)	lr 0.01444
Train [55][150/3239]	Time 0.242 (1.033)	Data Time 0.002 (0.246)	Loss 2.7289 (2.6102)	Entropy 1.13443 (1.13458)	Top-1 acc 59.766 (61.263)	Top-5 acc 78.906 (82.104)	lr 0.01444
Train [55][160/3239]	Time 0.251 (1.000)	Data Time 0.001 (0.231)	Loss 2.5266 (2.6067)	Entropy 1.13442 (1.13457)	Top-1 acc 62.891 (61.272)	Top-5 acc 84.766 (82.196)	lr 0.01444
Train [55][170/3239]	Time 0.285 (0.970)	Data Time 0.002 (0.217)	Loss 2.6854 (2.6029)	Entropy 1.13442 (1.13456)	Top-1 acc 59.375 (61.349)	Top-5 acc 81.250 (82.283)	lr 0.01444
Train [55][180/3239]	Time 0.229 (0.942)	Data Time 0.001 (0.205)	Loss 2.7404 (2.6039)	Entropy 1.13440 (1.13456)	Top-1 acc 55.859 (61.328)	Top-5 acc 77.344 (82.251)	lr 0.01444
Train [55][190/3239]	Time 0.225 (0.918)	Data Time 0.001 (0.195)	Loss 2.6606 (2.6016)	Entropy 1.13440 (1.13455)	Top-1 acc 59.375 (61.365)	Top-5 acc 81.641 (82.297)	lr 0.01444
Train [55][200/3239]	Time 0.265 (0.896)	Data Time 0.001 (0.185)	Loss 2.5828 (2.5994)	Entropy 1.13436 (1.13454)	Top-1 acc 58.203 (61.433)	Top-5 acc 82.031 (82.367)	lr 0.01444
Train [55][210/3239]	Time 0.231 (0.876)	Data Time 0.001 (0.176)	Loss 2.5570 (2.5990)	Entropy 1.13437 (1.13453)	Top-1 acc 62.891 (61.465)	Top-5 acc 81.641 (82.348)	lr 0.01443
Train [55][220/3239]	Time 0.299 (0.858)	Data Time 0.001 (0.169)	Loss 2.3793 (2.5970)	Entropy 1.13436 (1.13452)	Top-1 acc 66.016 (61.528)	Top-5 acc 88.672 (82.356)	lr 0.01443
Train [55][230/3239]	Time 2.385 (0.841)	Data Time 0.002 (0.161)	Loss 2.4222 (2.6000)	Entropy 1.13436 (1.13452)	Top-1 acc 63.672 (61.453)	Top-5 acc 85.547 (82.287)	lr 0.01443
Train [55][240/3239]	Time 0.242 (0.817)	Data Time 0.001 (0.155)	Loss 2.5228 (2.6000)	Entropy 1.13435 (1.13451)	Top-1 acc 64.062 (61.437)	Top-5 acc 83.984 (82.300)	lr 0.01443
Train [55][250/3239]	Time 0.238 (0.803)	Data Time 0.001 (0.149)	Loss 2.6490 (2.6016)	Entropy 1.13431 (1.13450)	Top-1 acc 60.547 (61.412)	Top-5 acc 80.469 (82.276)	lr 0.01443
Train [55][260/3239]	Time 0.215 (0.790)	Data Time 0.001 (0.143)	Loss 2.7018 (2.6037)	Entropy 1.13432 (1.13450)	Top-1 acc 59.375 (61.327)	Top-5 acc 81.641 (82.251)	lr 0.01443
Train [55][270/3239]	Time 0.384 (0.779)	Data Time 0.002 (0.138)	Loss 2.5432 (2.6025)	Entropy 1.13429 (1.13449)	Top-1 acc 62.891 (61.357)	Top-5 acc 83.594 (82.278)	lr 0.01443
Train [55][280/3239]	Time 0.225 (0.768)	Data Time 0.002 (0.133)	Loss 2.4965 (2.6036)	Entropy 1.13427 (1.13448)	Top-1 acc 59.766 (61.282)	Top-5 acc 86.328 (82.238)	lr 0.01443
Train [55][290/3239]	Time 0.226 (0.758)	Data Time 0.001 (0.128)	Loss 2.9532 (2.6069)	Entropy 1.13426 (1.13447)	Top-1 acc 52.734 (61.180)	Top-5 acc 77.344 (82.157)	lr 0.01443
Train [55][300/3239]	Time 0.230 (0.749)	Data Time 0.001 (0.124)	Loss 2.4818 (2.6057)	Entropy 1.13424 (1.13447)	Top-1 acc 63.281 (61.233)	Top-5 acc 82.422 (82.177)	lr 0.01443
Train [55][310/3239]	Time 0.248 (0.740)	Data Time 0.001 (0.120)	Loss 2.8300 (2.6080)	Entropy 1.13409 (1.13446)	Top-1 acc 56.641 (61.174)	Top-5 acc 76.562 (82.103)	lr 0.01442
Train [55][320/3239]	Time 0.218 (0.732)	Data Time 0.001 (0.117)	Loss 2.6487 (2.6096)	Entropy 1.13407 (1.13445)	Top-1 acc 60.156 (61.146)	Top-5 acc 83.203 (82.060)	lr 0.01442
Train [55][330/3239]	Time 0.254 (0.724)	Data Time 0.001 (0.113)	Loss 2.8326 (2.6101)	Entropy 1.13405 (1.13443)	Top-1 acc 55.078 (61.137)	Top-5 acc 76.172 (82.038)	lr 0.01442
Train [55][340/3239]	Time 2.618 (0.717)	Data Time 0.001 (0.110)	Loss 2.4283 (2.6093)	Entropy 1.13405 (1.13442)	Top-1 acc 65.625 (61.132)	Top-5 acc 86.328 (82.060)	lr 0.01442
Train [55][350/3239]	Time 0.241 (0.703)	Data Time 0.002 (0.107)	Loss 2.5212 (2.6103)	Entropy 1.13402 (1.13441)	Top-1 acc 64.453 (61.109)	Top-5 acc 83.203 (82.042)	lr 0.01442
Train [55][360/3239]	Time 0.325 (0.697)	Data Time 0.001 (0.104)	Loss 2.5673 (2.6114)	Entropy 1.13402 (1.13440)	Top-1 acc 61.328 (61.086)	Top-5 acc 82.031 (82.028)	lr 0.01442
Train [55][370/3239]	Time 0.201 (0.691)	Data Time 0.001 (0.101)	Loss 2.6831 (2.6129)	Entropy 1.13400 (1.13439)	Top-1 acc 60.938 (61.041)	Top-5 acc 79.688 (82.004)	lr 0.01442
Train [55][380/3239]	Time 0.218 (0.685)	Data Time 0.001 (0.098)	Loss 2.8066 (2.6126)	Entropy 1.13398 (1.13438)	Top-1 acc 60.547 (61.051)	Top-5 acc 80.078 (82.036)	lr 0.01442
Train [55][390/3239]	Time 0.263 (0.680)	Data Time 0.001 (0.096)	Loss 2.4825 (2.6122)	Entropy 1.13390 (1.13437)	Top-1 acc 64.453 (61.060)	Top-5 acc 86.328 (82.053)	lr 0.01442
Train [55][400/3239]	Time 0.220 (0.674)	Data Time 0.001 (0.094)	Loss 2.6173 (2.6132)	Entropy 1.13384 (1.13436)	Top-1 acc 59.766 (61.050)	Top-5 acc 84.375 (82.047)	lr 0.01442
Train [55][410/3239]	Time 0.323 (0.669)	Data Time 0.001 (0.091)	Loss 2.6304 (2.6134)	Entropy 1.13387 (1.13434)	Top-1 acc 65.234 (61.047)	Top-5 acc 82.422 (82.047)	lr 0.01441
Train [55][420/3239]	Time 0.201 (0.664)	Data Time 0.001 (0.089)	Loss 2.6014 (2.6140)	Entropy 1.13384 (1.13433)	Top-1 acc 62.891 (61.055)	Top-5 acc 80.859 (82.034)	lr 0.01441
Train [55][430/3239]	Time 0.259 (0.659)	Data Time 0.001 (0.087)	Loss 2.7216 (2.6147)	Entropy 1.13385 (1.13432)	Top-1 acc 59.766 (61.034)	Top-5 acc 81.250 (82.026)	lr 0.01441
Train [55][440/3239]	Time 0.231 (0.769)	Data Time 0.004 (0.085)	Loss 2.5024 (2.6152)	Entropy 1.13386 (1.13431)	Top-1 acc 64.453 (61.010)	Top-5 acc 82.812 (82.017)	lr 0.01441
Train [55][450/3239]	Time 2.464 (0.763)	Data Time 0.002 (0.083)	Loss 2.5202 (2.6146)	Entropy 1.13386 (1.13430)	Top-1 acc 61.719 (61.014)	Top-5 acc 86.328 (82.060)	lr 0.01441
Train [55][460/3239]	Time 0.275 (0.752)	Data Time 0.002 (0.082)	Loss 2.4562 (2.6143)	Entropy 1.13378 (1.13429)	Top-1 acc 63.281 (61.003)	Top-5 acc 85.156 (82.064)	lr 0.01441
Train [55][470/3239]	Time 0.284 (0.746)	Data Time 0.002 (0.080)	Loss 2.5358 (2.6145)	Entropy 1.13379 (1.13428)	Top-1 acc 64.062 (60.988)	Top-5 acc 80.078 (82.033)	lr 0.01441
Train [55][480/3239]	Time 0.234 (0.740)	Data Time 0.001 (0.078)	Loss 2.4805 (2.6136)	Entropy 1.13378 (1.13427)	Top-1 acc 62.500 (61.001)	Top-5 acc 84.375 (82.047)	lr 0.01441
Train [55][490/3239]	Time 0.208 (0.734)	Data Time 0.001 (0.077)	Loss 2.5602 (2.6139)	Entropy 1.13376 (1.13426)	Top-1 acc 57.422 (60.973)	Top-5 acc 83.594 (82.020)	lr 0.01441
Train [55][500/3239]	Time 0.258 (0.728)	Data Time 0.005 (0.075)	Loss 2.6404 (2.6151)	Entropy 1.13370 (1.13425)	Top-1 acc 55.859 (60.938)	Top-5 acc 80.469 (81.998)	lr 0.01441
Train [55][510/3239]	Time 0.216 (0.723)	Data Time 0.001 (0.074)	Loss 2.6378 (2.6151)	Entropy 1.13363 (1.13424)	Top-1 acc 60.938 (60.948)	Top-5 acc 82.422 (82.016)	lr 0.01440
Train [55][520/3239]	Time 0.232 (0.718)	Data Time 0.001 (0.072)	Loss 2.4763 (2.6148)	Entropy 1.13363 (1.13422)	Top-1 acc 63.281 (60.949)	Top-5 acc 82.812 (82.018)	lr 0.01440
Train [55][530/3239]	Time 0.217 (0.713)	Data Time 0.001 (0.071)	Loss 2.5291 (2.6148)	Entropy 1.13360 (1.13421)	Top-1 acc 61.328 (60.932)	Top-5 acc 85.156 (82.025)	lr 0.01440
Train [55][540/3239]	Time 0.216 (0.708)	Data Time 0.001 (0.070)	Loss 2.5991 (2.6146)	Entropy 1.13357 (1.13420)	Top-1 acc 61.719 (60.950)	Top-5 acc 81.641 (82.019)	lr 0.01440
Train [55][550/3239]	Time 0.374 (0.704)	Data Time 0.001 (0.069)	Loss 2.6130 (2.6140)	Entropy 1.13356 (1.13419)	Top-1 acc 61.719 (60.980)	Top-5 acc 82.812 (82.033)	lr 0.01440
Train [55][560/3239]	Time 2.561 (0.700)	Data Time 0.002 (0.067)	Loss 2.4894 (2.6145)	Entropy 1.13356 (1.13418)	Top-1 acc 64.844 (60.979)	Top-5 acc 84.375 (82.015)	lr 0.01440
Train [55][570/3239]	Time 0.219 (0.692)	Data Time 0.001 (0.066)	Loss 2.6780 (2.6143)	Entropy 1.13346 (1.13417)	Top-1 acc 56.250 (60.985)	Top-5 acc 80.078 (81.998)	lr 0.01440
Train [55][580/3239]	Time 0.234 (0.688)	Data Time 0.001 (0.065)	Loss 2.6260 (2.6139)	Entropy 1.13346 (1.13415)	Top-1 acc 60.938 (61.003)	Top-5 acc 81.250 (82.004)	lr 0.01440
Train [55][590/3239]	Time 0.239 (0.684)	Data Time 0.001 (0.064)	Loss 2.7150 (2.6133)	Entropy 1.13332 (1.13414)	Top-1 acc 60.938 (61.016)	Top-5 acc 77.734 (82.003)	lr 0.01440
Train [55][600/3239]	Time 0.309 (0.680)	Data Time 0.002 (0.063)	Loss 2.6672 (2.6132)	Entropy 1.13323 (1.13413)	Top-1 acc 60.156 (61.033)	Top-5 acc 81.641 (82.010)	lr 0.01440
Train [55][610/3239]	Time 0.219 (0.677)	Data Time 0.001 (0.062)	Loss 2.4918 (2.6127)	Entropy 1.13322 (1.13411)	Top-1 acc 64.453 (61.026)	Top-5 acc 84.766 (82.028)	lr 0.01439
Train [55][620/3239]	Time 0.240 (0.673)	Data Time 0.001 (0.061)	Loss 2.6740 (2.6134)	Entropy 1.13317 (1.13410)	Top-1 acc 57.031 (61.013)	Top-5 acc 81.250 (82.014)	lr 0.01439
Train [55][630/3239]	Time 0.220 (0.670)	Data Time 0.001 (0.060)	Loss 2.6182 (2.6143)	Entropy 1.13319 (1.13408)	Top-1 acc 59.766 (60.996)	Top-5 acc 78.906 (81.993)	lr 0.01439
Train [55][640/3239]	Time 0.220 (0.667)	Data Time 0.001 (0.059)	Loss 2.7206 (2.6141)	Entropy 1.13321 (1.13407)	Top-1 acc 57.031 (61.005)	Top-5 acc 78.906 (81.988)	lr 0.01439
Train [55][650/3239]	Time 0.317 (0.664)	Data Time 0.001 (0.058)	Loss 2.5858 (2.6143)	Entropy 1.13319 (1.13405)	Top-1 acc 61.719 (60.987)	Top-5 acc 82.422 (81.977)	lr 0.01439
Train [55][660/3239]	Time 0.232 (0.661)	Data Time 0.001 (0.057)	Loss 2.5371 (2.6141)	Entropy 1.13319 (1.13404)	Top-1 acc 63.281 (61.002)	Top-5 acc 84.766 (81.976)	lr 0.01439
Train [55][670/3239]	Time 2.445 (0.657)	Data Time 0.001 (0.057)	Loss 2.7314 (2.6140)	Entropy 1.13319 (1.13403)	Top-1 acc 59.375 (61.017)	Top-5 acc 77.734 (81.975)	lr 0.01439
Train [55][680/3239]	Time 0.146 (0.651)	Data Time 0.001 (0.056)	Loss 2.6451 (2.6142)	Entropy 1.13315 (1.13402)	Top-1 acc 60.547 (61.008)	Top-5 acc 82.812 (81.970)	lr 0.01439
Train [55][690/3239]	Time 0.225 (0.648)	Data Time 0.001 (0.055)	Loss 2.5556 (2.6137)	Entropy 1.13314 (1.13400)	Top-1 acc 64.844 (61.027)	Top-5 acc 82.422 (81.976)	lr 0.01439
Train [55][700/3239]	Time 0.212 (0.646)	Data Time 0.001 (0.054)	Loss 2.6918 (2.6140)	Entropy 1.13306 (1.13399)	Top-1 acc 61.719 (61.026)	Top-5 acc 82.031 (81.972)	lr 0.01439
Train [55][710/3239]	Time 0.231 (0.643)	Data Time 0.002 (0.054)	Loss 2.4344 (2.6138)	Entropy 1.13305 (1.13398)	Top-1 acc 63.672 (61.021)	Top-5 acc 86.719 (81.974)	lr 0.01438
Train [55][720/3239]	Time 0.215 (0.641)	Data Time 0.002 (0.053)	Loss 2.5862 (2.6144)	Entropy 1.13301 (1.13396)	Top-1 acc 60.156 (61.008)	Top-5 acc 83.984 (81.966)	lr 0.01438
Train [55][730/3239]	Time 0.246 (0.638)	Data Time 0.001 (0.052)	Loss 2.5151 (2.6144)	Entropy 1.13292 (1.13395)	Top-1 acc 64.062 (60.991)	Top-5 acc 84.766 (81.971)	lr 0.01438
Train [55][740/3239]	Time 0.212 (0.636)	Data Time 0.001 (0.051)	Loss 2.4754 (2.6150)	Entropy 1.13290 (1.13394)	Top-1 acc 61.328 (60.979)	Top-5 acc 87.500 (81.966)	lr 0.01438
Train [55][750/3239]	Time 0.258 (0.633)	Data Time 0.001 (0.051)	Loss 2.5792 (2.6146)	Entropy 1.13288 (1.13392)	Top-1 acc 61.328 (60.998)	Top-5 acc 84.375 (81.963)	lr 0.01438
Train [55][760/3239]	Time 0.211 (0.631)	Data Time 0.001 (0.050)	Loss 2.5972 (2.6150)	Entropy 1.13277 (1.13391)	Top-1 acc 58.984 (60.991)	Top-5 acc 83.594 (81.962)	lr 0.01438
Train [55][770/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.050)	Loss 2.7322 (2.6154)	Entropy 1.13262 (1.13389)	Top-1 acc 58.594 (60.978)	Top-5 acc 78.906 (81.946)	lr 0.01438
Train [55][780/3239]	Time 2.529 (0.627)	Data Time 0.001 (0.049)	Loss 2.8312 (2.6162)	Entropy 1.13262 (1.13388)	Top-1 acc 57.031 (60.956)	Top-5 acc 78.125 (81.933)	lr 0.01438
Train [55][790/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.048)	Loss 2.5594 (2.6159)	Entropy 1.13258 (1.13386)	Top-1 acc 62.500 (60.966)	Top-5 acc 80.469 (81.935)	lr 0.01438
Train [55][800/3239]	Time 0.239 (0.620)	Data Time 0.001 (0.048)	Loss 2.5457 (2.6157)	Entropy 1.13251 (1.13384)	Top-1 acc 62.500 (60.977)	Top-5 acc 83.984 (81.932)	lr 0.01438
Train [55][810/3239]	Time 0.271 (0.673)	Data Time 0.002 (0.047)	Loss 2.5771 (2.6153)	Entropy 1.13246 (1.13383)	Top-1 acc 63.672 (61.002)	Top-5 acc 82.812 (81.926)	lr 0.01437
Train [55][820/3239]	Time 0.254 (0.673)	Data Time 0.002 (0.047)	Loss 2.6078 (2.6165)	Entropy 1.13242 (1.13381)	Top-1 acc 60.938 (60.980)	Top-5 acc 83.984 (81.902)	lr 0.01437
Train [55][830/3239]	Time 0.231 (0.670)	Data Time 0.002 (0.046)	Loss 2.3168 (2.6163)	Entropy 1.13241 (1.13379)	Top-1 acc 70.312 (60.978)	Top-5 acc 87.109 (81.910)	lr 0.01437
Train [55][840/3239]	Time 0.324 (0.668)	Data Time 0.001 (0.046)	Loss 2.8528 (2.6172)	Entropy 1.13236 (1.13378)	Top-1 acc 55.469 (60.958)	Top-5 acc 76.172 (81.891)	lr 0.01437
Train [55][850/3239]	Time 0.217 (0.665)	Data Time 0.001 (0.045)	Loss 2.6370 (2.6176)	Entropy 1.13230 (1.13376)	Top-1 acc 62.891 (60.960)	Top-5 acc 82.031 (81.882)	lr 0.01437
Train [55][860/3239]	Time 0.216 (0.663)	Data Time 0.001 (0.045)	Loss 2.6042 (2.6168)	Entropy 1.13230 (1.13374)	Top-1 acc 59.766 (60.989)	Top-5 acc 84.375 (81.898)	lr 0.01437
Train [55][870/3239]	Time 0.212 (0.660)	Data Time 0.001 (0.044)	Loss 2.4592 (2.6181)	Entropy 1.13228 (1.13373)	Top-1 acc 61.719 (60.960)	Top-5 acc 85.156 (81.864)	lr 0.01437
Train [55][880/3239]	Time 0.218 (0.658)	Data Time 0.001 (0.044)	Loss 2.7278 (2.6183)	Entropy 1.13227 (1.13371)	Top-1 acc 57.422 (60.950)	Top-5 acc 80.469 (81.858)	lr 0.01437
Train [55][890/3239]	Time 2.604 (0.656)	Data Time 0.001 (0.043)	Loss 2.5916 (2.6182)	Entropy 1.13227 (1.13369)	Top-1 acc 58.984 (60.946)	Top-5 acc 82.812 (81.860)	lr 0.01437
Train [55][900/3239]	Time 0.214 (0.651)	Data Time 0.001 (0.043)	Loss 2.8783 (2.6178)	Entropy 1.13225 (1.13368)	Top-1 acc 53.906 (60.971)	Top-5 acc 79.688 (81.876)	lr 0.01437
Train [55][910/3239]	Time 0.239 (0.649)	Data Time 0.001 (0.042)	Loss 2.7198 (2.6177)	Entropy 1.13225 (1.13366)	Top-1 acc 58.594 (60.957)	Top-5 acc 79.688 (81.881)	lr 0.01436
Train [55][920/3239]	Time 0.235 (0.647)	Data Time 0.001 (0.042)	Loss 2.8478 (2.6177)	Entropy 1.13221 (1.13365)	Top-1 acc 55.078 (60.964)	Top-5 acc 79.297 (81.876)	lr 0.01436
Train [55][930/3239]	Time 0.207 (0.645)	Data Time 0.001 (0.041)	Loss 2.6770 (2.6186)	Entropy 1.13219 (1.13363)	Top-1 acc 58.984 (60.940)	Top-5 acc 78.906 (81.857)	lr 0.01436
Train [55][940/3239]	Time 0.305 (0.643)	Data Time 0.001 (0.041)	Loss 2.6000 (2.6182)	Entropy 1.13217 (1.13362)	Top-1 acc 57.031 (60.949)	Top-5 acc 82.422 (81.865)	lr 0.01436
Train [55][950/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.040)	Loss 2.5438 (2.6178)	Entropy 1.13216 (1.13360)	Top-1 acc 63.281 (60.958)	Top-5 acc 83.984 (81.877)	lr 0.01436
Train [55][960/3239]	Time 0.228 (0.639)	Data Time 0.001 (0.040)	Loss 2.6428 (2.6177)	Entropy 1.13214 (1.13358)	Top-1 acc 58.594 (60.973)	Top-5 acc 80.469 (81.872)	lr 0.01436
Train [55][970/3239]	Time 0.232 (0.638)	Data Time 0.001 (0.040)	Loss 2.5639 (2.6177)	Entropy 1.13208 (1.13357)	Top-1 acc 58.203 (60.987)	Top-5 acc 83.203 (81.876)	lr 0.01436
Train [55][980/3239]	Time 0.225 (0.636)	Data Time 0.001 (0.039)	Loss 2.7116 (2.6177)	Entropy 1.13206 (1.13355)	Top-1 acc 57.422 (60.990)	Top-5 acc 82.422 (81.877)	lr 0.01436
Train [55][990/3239]	Time 0.233 (0.634)	Data Time 0.001 (0.039)	Loss 2.5080 (2.6173)	Entropy 1.13200 (1.13354)	Top-1 acc 62.891 (60.992)	Top-5 acc 81.641 (81.881)	lr 0.01436
Train [55][1000/3239]	Time 2.454 (0.633)	Data Time 0.001 (0.039)	Loss 2.5882 (2.6174)	Entropy 1.13200 (1.13352)	Top-1 acc 58.203 (60.987)	Top-5 acc 81.250 (81.885)	lr 0.01436
Train [55][1010/3239]	Time 0.222 (0.629)	Data Time 0.001 (0.038)	Loss 2.6204 (2.6174)	Entropy 1.13199 (1.13351)	Top-1 acc 58.984 (60.985)	Top-5 acc 82.422 (81.882)	lr 0.01435
Train [55][1020/3239]	Time 0.217 (0.627)	Data Time 0.001 (0.038)	Loss 2.6999 (2.6176)	Entropy 1.13194 (1.13349)	Top-1 acc 62.891 (60.972)	Top-5 acc 80.859 (81.878)	lr 0.01435
Train [55][1030/3239]	Time 0.161 (0.626)	Data Time 0.001 (0.037)	Loss 2.6602 (2.6174)	Entropy 1.13192 (1.13348)	Top-1 acc 62.500 (60.972)	Top-5 acc 81.250 (81.882)	lr 0.01435
Train [55][1040/3239]	Time 0.218 (0.624)	Data Time 0.001 (0.037)	Loss 2.5652 (2.6175)	Entropy 1.13191 (1.13346)	Top-1 acc 62.500 (60.968)	Top-5 acc 83.594 (81.878)	lr 0.01435
Train [55][1050/3239]	Time 0.219 (0.623)	Data Time 0.001 (0.037)	Loss 2.3553 (2.6173)	Entropy 1.13185 (1.13345)	Top-1 acc 67.578 (60.969)	Top-5 acc 88.672 (81.879)	lr 0.01435
Train [55][1060/3239]	Time 0.287 (0.621)	Data Time 0.001 (0.036)	Loss 2.5795 (2.6171)	Entropy 1.13183 (1.13343)	Top-1 acc 64.062 (60.979)	Top-5 acc 80.859 (81.884)	lr 0.01435
Train [55][1070/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.036)	Loss 2.5218 (2.6171)	Entropy 1.13182 (1.13342)	Top-1 acc 66.016 (60.981)	Top-5 acc 82.812 (81.876)	lr 0.01435
Train [55][1080/3239]	Time 0.259 (0.618)	Data Time 0.002 (0.036)	Loss 2.4650 (2.6175)	Entropy 1.13170 (1.13340)	Top-1 acc 63.672 (60.971)	Top-5 acc 86.328 (81.868)	lr 0.01435
Train [55][1090/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.036)	Loss 2.6431 (2.6171)	Entropy 1.13167 (1.13339)	Top-1 acc 58.594 (60.978)	Top-5 acc 83.203 (81.874)	lr 0.01435
Train [55][1100/3239]	Time 0.231 (0.615)	Data Time 0.001 (0.035)	Loss 2.6392 (2.6175)	Entropy 1.13168 (1.13337)	Top-1 acc 61.328 (60.969)	Top-5 acc 80.469 (81.865)	lr 0.01435
Train [55][1110/3239]	Time 2.416 (0.614)	Data Time 0.001 (0.035)	Loss 2.6519 (2.6178)	Entropy 1.13168 (1.13336)	Top-1 acc 62.891 (60.964)	Top-5 acc 79.297 (81.862)	lr 0.01434
Train [55][1120/3239]	Time 0.218 (0.611)	Data Time 0.001 (0.035)	Loss 2.5512 (2.6176)	Entropy 1.13166 (1.13334)	Top-1 acc 64.062 (60.974)	Top-5 acc 82.812 (81.868)	lr 0.01434
Train [55][1130/3239]	Time 0.238 (0.609)	Data Time 0.001 (0.034)	Loss 2.4783 (2.6174)	Entropy 1.13162 (1.13333)	Top-1 acc 64.453 (60.975)	Top-5 acc 85.156 (81.867)	lr 0.01434
Train [55][1140/3239]	Time 0.234 (0.608)	Data Time 0.001 (0.034)	Loss 2.4698 (2.6175)	Entropy 1.13163 (1.13331)	Top-1 acc 65.625 (60.973)	Top-5 acc 83.594 (81.863)	lr 0.01434
Train [55][1150/3239]	Time 0.228 (0.607)	Data Time 0.001 (0.034)	Loss 2.5864 (2.6170)	Entropy 1.13162 (1.13330)	Top-1 acc 64.062 (60.980)	Top-5 acc 80.859 (81.869)	lr 0.01434
Train [55][1160/3239]	Time 0.224 (0.606)	Data Time 0.002 (0.033)	Loss 2.5627 (2.6170)	Entropy 1.13163 (1.13328)	Top-1 acc 64.844 (60.981)	Top-5 acc 82.031 (81.864)	lr 0.01434
Train [55][1170/3239]	Time 0.220 (0.647)	Data Time 0.002 (0.033)	Loss 2.6040 (2.6167)	Entropy 1.13166 (1.13327)	Top-1 acc 60.156 (60.988)	Top-5 acc 79.297 (81.868)	lr 0.01434
Train [55][1180/3239]	Time 0.312 (0.645)	Data Time 0.002 (0.033)	Loss 2.7739 (2.6175)	Entropy 1.13160 (1.13325)	Top-1 acc 56.250 (60.968)	Top-5 acc 82.031 (81.858)	lr 0.01434
Train [55][1190/3239]	Time 0.219 (0.644)	Data Time 0.001 (0.033)	Loss 2.7140 (2.6173)	Entropy 1.13158 (1.13324)	Top-1 acc 57.422 (60.962)	Top-5 acc 81.641 (81.869)	lr 0.01434
Train [55][1200/3239]	Time 0.205 (0.642)	Data Time 0.001 (0.032)	Loss 2.6026 (2.6174)	Entropy 1.13152 (1.13323)	Top-1 acc 61.719 (60.957)	Top-5 acc 81.250 (81.867)	lr 0.01434
Train [55][1210/3239]	Time 0.220 (0.641)	Data Time 0.001 (0.032)	Loss 2.6583 (2.6177)	Entropy 1.13147 (1.13321)	Top-1 acc 57.031 (60.942)	Top-5 acc 83.203 (81.869)	lr 0.01433
Train [55][1220/3239]	Time 2.452 (0.639)	Data Time 0.001 (0.032)	Loss 2.6144 (2.6180)	Entropy 1.13147 (1.13320)	Top-1 acc 60.156 (60.930)	Top-5 acc 82.422 (81.871)	lr 0.01433
Train [55][1230/3239]	Time 0.314 (0.636)	Data Time 0.001 (0.032)	Loss 2.5663 (2.6181)	Entropy 1.13137 (1.13318)	Top-1 acc 61.328 (60.929)	Top-5 acc 84.375 (81.872)	lr 0.01433
Train [55][1240/3239]	Time 0.216 (0.634)	Data Time 0.001 (0.031)	Loss 2.5975 (2.6179)	Entropy 1.13136 (1.13317)	Top-1 acc 62.891 (60.937)	Top-5 acc 81.641 (81.876)	lr 0.01433
Train [55][1250/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.031)	Loss 2.5191 (2.6176)	Entropy 1.13135 (1.13315)	Top-1 acc 60.156 (60.942)	Top-5 acc 85.938 (81.876)	lr 0.01433
Train [55][1260/3239]	Time 0.215 (0.632)	Data Time 0.001 (0.031)	Loss 2.4833 (2.6173)	Entropy 1.13137 (1.13314)	Top-1 acc 64.453 (60.954)	Top-5 acc 82.812 (81.879)	lr 0.01433
Train [55][1270/3239]	Time 0.229 (0.631)	Data Time 0.001 (0.031)	Loss 2.6049 (2.6178)	Entropy 1.13127 (1.13313)	Top-1 acc 61.719 (60.939)	Top-5 acc 80.469 (81.866)	lr 0.01433
Train [55][1280/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.030)	Loss 2.5654 (2.6180)	Entropy 1.13121 (1.13311)	Top-1 acc 62.500 (60.937)	Top-5 acc 84.375 (81.861)	lr 0.01433
Train [55][1290/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.030)	Loss 2.7147 (2.6182)	Entropy 1.13119 (1.13310)	Top-1 acc 59.375 (60.932)	Top-5 acc 80.859 (81.859)	lr 0.01433
Train [55][1300/3239]	Time 0.231 (0.627)	Data Time 0.002 (0.030)	Loss 2.6727 (2.6183)	Entropy 1.13121 (1.13308)	Top-1 acc 58.594 (60.923)	Top-5 acc 79.688 (81.863)	lr 0.01433
Train [55][1310/3239]	Time 0.213 (0.626)	Data Time 0.002 (0.030)	Loss 2.6187 (2.6183)	Entropy 1.13119 (1.13307)	Top-1 acc 61.719 (60.931)	Top-5 acc 80.078 (81.860)	lr 0.01432
Train [55][1320/3239]	Time 0.296 (0.625)	Data Time 0.001 (0.030)	Loss 2.6141 (2.6185)	Entropy 1.13118 (1.13305)	Top-1 acc 62.109 (60.923)	Top-5 acc 82.422 (81.856)	lr 0.01432
Train [55][1330/3239]	Time 2.416 (0.623)	Data Time 0.001 (0.029)	Loss 2.5893 (2.6185)	Entropy 1.13118 (1.13304)	Top-1 acc 64.062 (60.918)	Top-5 acc 82.812 (81.856)	lr 0.01432
Train [55][1340/3239]	Time 0.271 (0.620)	Data Time 0.001 (0.029)	Loss 2.7389 (2.6186)	Entropy 1.13117 (1.13302)	Top-1 acc 57.812 (60.914)	Top-5 acc 78.906 (81.851)	lr 0.01432
Train [55][1350/3239]	Time 0.243 (0.619)	Data Time 0.002 (0.029)	Loss 2.4525 (2.6185)	Entropy 1.13116 (1.13301)	Top-1 acc 66.016 (60.926)	Top-5 acc 83.594 (81.850)	lr 0.01432
Train [55][1360/3239]	Time 0.213 (0.618)	Data Time 0.001 (0.029)	Loss 2.6819 (2.6185)	Entropy 1.13115 (1.13300)	Top-1 acc 59.766 (60.928)	Top-5 acc 78.516 (81.842)	lr 0.01432
Train [55][1370/3239]	Time 0.336 (0.617)	Data Time 0.001 (0.029)	Loss 2.6077 (2.6185)	Entropy 1.13114 (1.13298)	Top-1 acc 61.719 (60.939)	Top-5 acc 82.812 (81.842)	lr 0.01432
Train [55][1380/3239]	Time 0.232 (0.616)	Data Time 0.001 (0.028)	Loss 2.6750 (2.6186)	Entropy 1.13109 (1.13297)	Top-1 acc 59.766 (60.935)	Top-5 acc 82.031 (81.842)	lr 0.01432
Train [55][1390/3239]	Time 0.252 (0.615)	Data Time 0.001 (0.028)	Loss 2.6121 (2.6186)	Entropy 1.13106 (1.13296)	Top-1 acc 60.156 (60.933)	Top-5 acc 80.859 (81.844)	lr 0.01432
Train [55][1400/3239]	Time 0.216 (0.614)	Data Time 0.001 (0.028)	Loss 2.5945 (2.6184)	Entropy 1.13104 (1.13294)	Top-1 acc 61.719 (60.937)	Top-5 acc 82.031 (81.844)	lr 0.01432
Train [55][1410/3239]	Time 0.221 (0.613)	Data Time 0.001 (0.028)	Loss 2.8222 (2.6186)	Entropy 1.13104 (1.13293)	Top-1 acc 57.812 (60.940)	Top-5 acc 78.516 (81.839)	lr 0.01431
Train [55][1420/3239]	Time 0.226 (0.612)	Data Time 0.001 (0.028)	Loss 2.6088 (2.6186)	Entropy 1.13105 (1.13292)	Top-1 acc 61.328 (60.935)	Top-5 acc 82.812 (81.835)	lr 0.01431
Train [55][1430/3239]	Time 0.212 (0.611)	Data Time 0.001 (0.027)	Loss 2.6715 (2.6184)	Entropy 1.13103 (1.13290)	Top-1 acc 60.156 (60.944)	Top-5 acc 80.859 (81.837)	lr 0.01431
Train [55][1440/3239]	Time 2.522 (0.610)	Data Time 0.001 (0.027)	Loss 2.4089 (2.6187)	Entropy 1.13103 (1.13289)	Top-1 acc 66.016 (60.932)	Top-5 acc 84.766 (81.833)	lr 0.01431
Train [55][1450/3239]	Time 0.229 (0.607)	Data Time 0.001 (0.027)	Loss 2.6491 (2.6188)	Entropy 1.13099 (1.13288)	Top-1 acc 56.641 (60.926)	Top-5 acc 82.422 (81.830)	lr 0.01431
Train [55][1460/3239]	Time 0.245 (0.606)	Data Time 0.001 (0.027)	Loss 2.5659 (2.6186)	Entropy 1.13099 (1.13286)	Top-1 acc 60.547 (60.935)	Top-5 acc 82.422 (81.837)	lr 0.01431
Train [55][1470/3239]	Time 0.226 (0.606)	Data Time 0.001 (0.027)	Loss 2.6388 (2.6186)	Entropy 1.13096 (1.13285)	Top-1 acc 62.109 (60.933)	Top-5 acc 82.422 (81.844)	lr 0.01431
Train [55][1480/3239]	Time 0.233 (0.605)	Data Time 0.001 (0.027)	Loss 2.7065 (2.6187)	Entropy 1.13094 (1.13284)	Top-1 acc 61.328 (60.936)	Top-5 acc 80.859 (81.841)	lr 0.01431
Train [55][1490/3239]	Time 0.231 (0.604)	Data Time 0.001 (0.026)	Loss 2.5225 (2.6186)	Entropy 1.13083 (1.13283)	Top-1 acc 66.016 (60.943)	Top-5 acc 83.203 (81.840)	lr 0.01431
Train [55][1500/3239]	Time 0.221 (0.603)	Data Time 0.001 (0.026)	Loss 2.4815 (2.6186)	Entropy 1.13081 (1.13281)	Top-1 acc 66.406 (60.947)	Top-5 acc 85.156 (81.845)	lr 0.01431
Train [55][1510/3239]	Time 0.330 (0.602)	Data Time 0.001 (0.026)	Loss 2.5388 (2.6185)	Entropy 1.13082 (1.13280)	Top-1 acc 61.719 (60.950)	Top-5 acc 84.375 (81.844)	lr 0.01430
Train [55][1520/3239]	Time 0.235 (0.601)	Data Time 0.001 (0.026)	Loss 2.6660 (2.6189)	Entropy 1.13071 (1.13279)	Top-1 acc 61.328 (60.941)	Top-5 acc 81.250 (81.835)	lr 0.01430
Train [55][1530/3239]	Time 0.294 (0.632)	Data Time 0.003 (0.026)	Loss 2.6052 (2.6190)	Entropy 1.13064 (1.13277)	Top-1 acc 58.984 (60.935)	Top-5 acc 85.156 (81.832)	lr 0.01430
Train [55][1540/3239]	Time 0.254 (0.631)	Data Time 0.002 (0.026)	Loss 2.6688 (2.6191)	Entropy 1.13063 (1.13276)	Top-1 acc 58.594 (60.934)	Top-5 acc 80.859 (81.827)	lr 0.01430
Train [55][1550/3239]	Time 2.566 (0.630)	Data Time 0.002 (0.025)	Loss 2.4130 (2.6194)	Entropy 1.13063 (1.13274)	Top-1 acc 67.578 (60.925)	Top-5 acc 84.375 (81.820)	lr 0.01430
Train [55][1560/3239]	Time 0.320 (0.628)	Data Time 0.001 (0.025)	Loss 2.4806 (2.6192)	Entropy 1.13057 (1.13273)	Top-1 acc 63.672 (60.929)	Top-5 acc 83.984 (81.820)	lr 0.01430
Train [55][1570/3239]	Time 0.240 (0.627)	Data Time 0.002 (0.025)	Loss 2.8496 (2.6194)	Entropy 1.13050 (1.13272)	Top-1 acc 58.594 (60.933)	Top-5 acc 78.516 (81.816)	lr 0.01430
Train [55][1580/3239]	Time 0.222 (0.626)	Data Time 0.001 (0.025)	Loss 2.5629 (2.6194)	Entropy 1.13046 (1.13270)	Top-1 acc 65.625 (60.936)	Top-5 acc 82.422 (81.818)	lr 0.01430
Train [55][1590/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.025)	Loss 2.4763 (2.6195)	Entropy 1.13039 (1.13269)	Top-1 acc 63.281 (60.935)	Top-5 acc 86.328 (81.816)	lr 0.01430
Train [55][1600/3239]	Time 0.247 (0.624)	Data Time 0.001 (0.025)	Loss 2.5687 (2.6192)	Entropy 1.13021 (1.13267)	Top-1 acc 61.328 (60.937)	Top-5 acc 81.641 (81.823)	lr 0.01430
Train [55][1610/3239]	Time 0.221 (0.623)	Data Time 0.001 (0.025)	Loss 2.6960 (2.6192)	Entropy 1.13019 (1.13266)	Top-1 acc 60.547 (60.937)	Top-5 acc 80.469 (81.825)	lr 0.01429
Train [55][1620/3239]	Time 0.229 (0.622)	Data Time 0.001 (0.024)	Loss 2.6446 (2.6192)	Entropy 1.13013 (1.13264)	Top-1 acc 57.812 (60.941)	Top-5 acc 81.641 (81.828)	lr 0.01429
Train [55][1630/3239]	Time 0.245 (0.621)	Data Time 0.001 (0.024)	Loss 2.5499 (2.6187)	Entropy 1.13006 (1.13263)	Top-1 acc 63.281 (60.949)	Top-5 acc 84.766 (81.836)	lr 0.01429
Train [55][1640/3239]	Time 0.230 (0.620)	Data Time 0.002 (0.024)	Loss 2.6743 (2.6191)	Entropy 1.13001 (1.13261)	Top-1 acc 62.109 (60.943)	Top-5 acc 82.812 (81.834)	lr 0.01429
Train [55][1650/3239]	Time 0.377 (0.619)	Data Time 0.001 (0.024)	Loss 2.5159 (2.6188)	Entropy 1.12998 (1.13259)	Top-1 acc 64.453 (60.953)	Top-5 acc 85.547 (81.840)	lr 0.01429
Train [55][1660/3239]	Time 2.579 (0.618)	Data Time 0.001 (0.024)	Loss 2.5836 (2.6188)	Entropy 1.12998 (1.13258)	Top-1 acc 61.328 (60.959)	Top-5 acc 82.812 (81.837)	lr 0.01429
Train [55][1670/3239]	Time 0.244 (0.616)	Data Time 0.001 (0.024)	Loss 2.6502 (2.6188)	Entropy 1.12994 (1.13256)	Top-1 acc 60.938 (60.963)	Top-5 acc 80.078 (81.836)	lr 0.01429
Train [55][1680/3239]	Time 0.217 (0.615)	Data Time 0.001 (0.024)	Loss 2.7013 (2.6187)	Entropy 1.12987 (1.13255)	Top-1 acc 60.547 (60.972)	Top-5 acc 78.125 (81.835)	lr 0.01429
Train [55][1690/3239]	Time 0.206 (0.614)	Data Time 0.001 (0.024)	Loss 2.5310 (2.6185)	Entropy 1.12980 (1.13253)	Top-1 acc 64.453 (60.975)	Top-5 acc 84.766 (81.838)	lr 0.01429
Train [55][1700/3239]	Time 0.333 (0.613)	Data Time 0.002 (0.023)	Loss 2.7402 (2.6185)	Entropy 1.12976 (1.13251)	Top-1 acc 61.328 (60.974)	Top-5 acc 78.906 (81.839)	lr 0.01429
Train [55][1710/3239]	Time 0.228 (0.612)	Data Time 0.001 (0.023)	Loss 2.7397 (2.6188)	Entropy 1.12973 (1.13250)	Top-1 acc 62.891 (60.970)	Top-5 acc 78.516 (81.825)	lr 0.01428
Train [55][1720/3239]	Time 0.230 (0.611)	Data Time 0.001 (0.023)	Loss 2.8714 (2.6191)	Entropy 1.12972 (1.13248)	Top-1 acc 55.078 (60.968)	Top-5 acc 73.828 (81.818)	lr 0.01428
Train [55][1730/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.023)	Loss 2.6417 (2.6193)	Entropy 1.12971 (1.13247)	Top-1 acc 57.031 (60.961)	Top-5 acc 83.594 (81.816)	lr 0.01428
Train [55][1740/3239]	Time 0.227 (0.610)	Data Time 0.001 (0.023)	Loss 2.6175 (2.6195)	Entropy 1.12971 (1.13245)	Top-1 acc 60.547 (60.955)	Top-5 acc 80.469 (81.812)	lr 0.01428
Train [55][1750/3239]	Time 0.332 (0.609)	Data Time 0.001 (0.023)	Loss 2.7216 (2.6198)	Entropy 1.12969 (1.13244)	Top-1 acc 59.375 (60.948)	Top-5 acc 80.469 (81.804)	lr 0.01428
Train [55][1760/3239]	Time 0.219 (0.608)	Data Time 0.001 (0.023)	Loss 2.6863 (2.6197)	Entropy 1.12969 (1.13242)	Top-1 acc 59.766 (60.951)	Top-5 acc 82.031 (81.811)	lr 0.01428
Train [55][1770/3239]	Time 2.532 (0.607)	Data Time 0.002 (0.023)	Loss 2.5117 (2.6199)	Entropy 1.12969 (1.13240)	Top-1 acc 62.500 (60.953)	Top-5 acc 85.156 (81.809)	lr 0.01428
Train [55][1780/3239]	Time 0.245 (0.605)	Data Time 0.001 (0.022)	Loss 2.6512 (2.6201)	Entropy 1.12972 (1.13239)	Top-1 acc 59.766 (60.945)	Top-5 acc 79.688 (81.806)	lr 0.01428
Train [55][1790/3239]	Time 0.255 (0.604)	Data Time 0.001 (0.022)	Loss 2.6443 (2.6202)	Entropy 1.12973 (1.13237)	Top-1 acc 57.422 (60.943)	Top-5 acc 79.297 (81.800)	lr 0.01428
Train [55][1800/3239]	Time 0.207 (0.604)	Data Time 0.001 (0.022)	Loss 2.5709 (2.6204)	Entropy 1.12972 (1.13236)	Top-1 acc 62.500 (60.949)	Top-5 acc 84.375 (81.799)	lr 0.01428
Train [55][1810/3239]	Time 0.225 (0.603)	Data Time 0.001 (0.022)	Loss 2.7620 (2.6204)	Entropy 1.12972 (1.13234)	Top-1 acc 60.547 (60.952)	Top-5 acc 78.125 (81.804)	lr 0.01427
Train [55][1820/3239]	Time 0.202 (0.602)	Data Time 0.001 (0.022)	Loss 2.6674 (2.6207)	Entropy 1.12971 (1.13233)	Top-1 acc 60.156 (60.947)	Top-5 acc 81.641 (81.798)	lr 0.01427
Train [55][1830/3239]	Time 0.219 (0.601)	Data Time 0.001 (0.022)	Loss 2.6844 (2.6208)	Entropy 1.12969 (1.13232)	Top-1 acc 60.156 (60.950)	Top-5 acc 77.344 (81.790)	lr 0.01427
Train [55][1840/3239]	Time 0.314 (0.601)	Data Time 0.001 (0.022)	Loss 2.4687 (2.6209)	Entropy 1.12968 (1.13230)	Top-1 acc 66.797 (60.945)	Top-5 acc 84.375 (81.786)	lr 0.01427
Train [55][1850/3239]	Time 0.224 (0.600)	Data Time 0.001 (0.022)	Loss 2.7500 (2.6210)	Entropy 1.12968 (1.13229)	Top-1 acc 57.422 (60.940)	Top-5 acc 78.125 (81.784)	lr 0.01427
Train [55][1860/3239]	Time 0.216 (0.599)	Data Time 0.001 (0.021)	Loss 2.6077 (2.6210)	Entropy 1.12959 (1.13227)	Top-1 acc 60.938 (60.945)	Top-5 acc 83.594 (81.783)	lr 0.01427
Train [55][1870/3239]	Time 0.238 (0.598)	Data Time 0.001 (0.021)	Loss 2.5717 (2.6213)	Entropy 1.12956 (1.13226)	Top-1 acc 60.938 (60.937)	Top-5 acc 82.422 (81.777)	lr 0.01427
Train [55][1880/3239]	Time 2.479 (0.598)	Data Time 0.002 (0.021)	Loss 2.6367 (2.6213)	Entropy 1.12956 (1.13224)	Top-1 acc 59.375 (60.933)	Top-5 acc 81.250 (81.780)	lr 0.01427
Train [55][1890/3239]	Time 0.321 (0.596)	Data Time 0.001 (0.021)	Loss 2.8940 (2.6217)	Entropy 1.12946 (1.13223)	Top-1 acc 50.000 (60.924)	Top-5 acc 76.172 (81.776)	lr 0.01427
Train [55][1900/3239]	Time 0.236 (0.620)	Data Time 0.002 (0.021)	Loss 2.6205 (2.6218)	Entropy 1.12941 (1.13222)	Top-1 acc 61.719 (60.922)	Top-5 acc 81.250 (81.775)	lr 0.01427
Train [55][1910/3239]	Time 0.217 (0.620)	Data Time 0.002 (0.021)	Loss 2.8793 (2.6221)	Entropy 1.12939 (1.13220)	Top-1 acc 56.250 (60.918)	Top-5 acc 77.344 (81.769)	lr 0.01426
Train [55][1920/3239]	Time 0.252 (0.619)	Data Time 0.002 (0.021)	Loss 2.6718 (2.6219)	Entropy 1.12933 (1.13219)	Top-1 acc 60.156 (60.921)	Top-5 acc 83.203 (81.772)	lr 0.01426
Train [55][1930/3239]	Time 0.220 (0.618)	Data Time 0.001 (0.021)	Loss 2.7290 (2.6223)	Entropy 1.12930 (1.13217)	Top-1 acc 60.156 (60.917)	Top-5 acc 76.953 (81.766)	lr 0.01426
Train [55][1940/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.021)	Loss 2.8101 (2.6221)	Entropy 1.12929 (1.13216)	Top-1 acc 57.812 (60.926)	Top-5 acc 77.734 (81.768)	lr 0.01426
Train [55][1950/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.021)	Loss 2.5491 (2.6223)	Entropy 1.12920 (1.13214)	Top-1 acc 60.156 (60.921)	Top-5 acc 82.422 (81.760)	lr 0.01426
Train [55][1960/3239]	Time 0.221 (0.616)	Data Time 0.001 (0.020)	Loss 2.4849 (2.6224)	Entropy 1.12916 (1.13213)	Top-1 acc 61.719 (60.911)	Top-5 acc 86.328 (81.759)	lr 0.01426
Train [55][1970/3239]	Time 0.235 (0.615)	Data Time 0.001 (0.020)	Loss 2.4941 (2.6224)	Entropy 1.12913 (1.13211)	Top-1 acc 65.625 (60.914)	Top-5 acc 84.766 (81.760)	lr 0.01426
Train [55][1980/3239]	Time 0.261 (0.614)	Data Time 0.001 (0.020)	Loss 2.5931 (2.6223)	Entropy 1.12911 (1.13210)	Top-1 acc 59.375 (60.917)	Top-5 acc 80.469 (81.761)	lr 0.01426
Train [55][1990/3239]	Time 2.407 (0.614)	Data Time 0.001 (0.020)	Loss 2.6031 (2.6220)	Entropy 1.12911 (1.13208)	Top-1 acc 64.453 (60.926)	Top-5 acc 82.031 (81.767)	lr 0.01426
Train [55][2000/3239]	Time 0.227 (0.612)	Data Time 0.001 (0.020)	Loss 2.4681 (2.6221)	Entropy 1.12908 (1.13207)	Top-1 acc 68.359 (60.928)	Top-5 acc 86.328 (81.767)	lr 0.01426
Train [55][2010/3239]	Time 0.241 (0.611)	Data Time 0.002 (0.020)	Loss 2.6846 (2.6223)	Entropy 1.12910 (1.13205)	Top-1 acc 60.547 (60.925)	Top-5 acc 80.859 (81.762)	lr 0.01425
Train [55][2020/3239]	Time 0.229 (0.610)	Data Time 0.001 (0.020)	Loss 2.7700 (2.6224)	Entropy 1.12900 (1.13204)	Top-1 acc 57.031 (60.920)	Top-5 acc 78.906 (81.759)	lr 0.01425
Train [55][2030/3239]	Time 0.261 (0.610)	Data Time 0.001 (0.020)	Loss 2.5647 (2.6222)	Entropy 1.12897 (1.13202)	Top-1 acc 60.938 (60.929)	Top-5 acc 82.812 (81.761)	lr 0.01425
Train [55][2040/3239]	Time 0.263 (0.609)	Data Time 0.001 (0.020)	Loss 2.6147 (2.6222)	Entropy 1.12897 (1.13201)	Top-1 acc 61.719 (60.929)	Top-5 acc 80.859 (81.759)	lr 0.01425
Train [55][2050/3239]	Time 0.233 (0.608)	Data Time 0.001 (0.020)	Loss 2.6523 (2.6222)	Entropy 1.12898 (1.13199)	Top-1 acc 63.281 (60.927)	Top-5 acc 80.469 (81.762)	lr 0.01425
Train [55][2060/3239]	Time 0.222 (0.608)	Data Time 0.001 (0.020)	Loss 2.6993 (2.6223)	Entropy 1.12898 (1.13198)	Top-1 acc 61.719 (60.928)	Top-5 acc 79.688 (81.761)	lr 0.01425
Train [55][2070/3239]	Time 0.232 (0.607)	Data Time 0.001 (0.019)	Loss 2.5909 (2.6223)	Entropy 1.12899 (1.13196)	Top-1 acc 65.625 (60.933)	Top-5 acc 82.812 (81.763)	lr 0.01425
Train [55][2080/3239]	Time 0.318 (0.606)	Data Time 0.001 (0.019)	Loss 2.5683 (2.6223)	Entropy 1.12894 (1.13195)	Top-1 acc 63.281 (60.936)	Top-5 acc 82.422 (81.762)	lr 0.01425
Train [55][2090/3239]	Time 0.258 (0.606)	Data Time 0.001 (0.019)	Loss 2.7956 (2.6226)	Entropy 1.12894 (1.13193)	Top-1 acc 54.297 (60.929)	Top-5 acc 79.297 (81.756)	lr 0.01425
Train [55][2100/3239]	Time 2.447 (0.605)	Data Time 0.001 (0.019)	Loss 2.5665 (2.6228)	Entropy 1.12894 (1.13192)	Top-1 acc 62.500 (60.922)	Top-5 acc 83.594 (81.751)	lr 0.01425
Train [55][2110/3239]	Time 0.234 (0.603)	Data Time 0.001 (0.019)	Loss 2.5834 (2.6230)	Entropy 1.12894 (1.13191)	Top-1 acc 64.844 (60.919)	Top-5 acc 82.031 (81.746)	lr 0.01424
Train [55][2120/3239]	Time 0.237 (0.602)	Data Time 0.001 (0.019)	Loss 2.3795 (2.6230)	Entropy 1.12894 (1.13189)	Top-1 acc 68.359 (60.919)	Top-5 acc 86.719 (81.744)	lr 0.01424
Train [55][2130/3239]	Time 0.242 (0.602)	Data Time 0.001 (0.019)	Loss 2.4507 (2.6233)	Entropy 1.12893 (1.13188)	Top-1 acc 61.328 (60.915)	Top-5 acc 85.938 (81.739)	lr 0.01424
Train [55][2140/3239]	Time 0.217 (0.601)	Data Time 0.001 (0.019)	Loss 2.5528 (2.6234)	Entropy 1.12889 (1.13186)	Top-1 acc 61.719 (60.914)	Top-5 acc 80.469 (81.736)	lr 0.01424
Train [55][2150/3239]	Time 0.217 (0.601)	Data Time 0.001 (0.019)	Loss 2.6853 (2.6237)	Entropy 1.12884 (1.13185)	Top-1 acc 60.547 (60.908)	Top-5 acc 83.203 (81.734)	lr 0.01424
Train [55][2160/3239]	Time 0.216 (0.600)	Data Time 0.001 (0.019)	Loss 2.6955 (2.6237)	Entropy 1.12882 (1.13184)	Top-1 acc 57.031 (60.902)	Top-5 acc 80.469 (81.733)	lr 0.01424
Train [55][2170/3239]	Time 0.233 (0.599)	Data Time 0.002 (0.019)	Loss 2.7481 (2.6236)	Entropy 1.12887 (1.13182)	Top-1 acc 58.984 (60.907)	Top-5 acc 80.859 (81.736)	lr 0.01424
Train [55][2180/3239]	Time 0.240 (0.599)	Data Time 0.001 (0.019)	Loss 2.5922 (2.6236)	Entropy 1.12883 (1.13181)	Top-1 acc 62.109 (60.909)	Top-5 acc 82.812 (81.734)	lr 0.01424
Train [55][2190/3239]	Time 0.219 (0.598)	Data Time 0.001 (0.019)	Loss 2.7113 (2.6237)	Entropy 1.12879 (1.13179)	Top-1 acc 57.812 (60.905)	Top-5 acc 78.906 (81.731)	lr 0.01424
Train [55][2200/3239]	Time 0.247 (0.597)	Data Time 0.001 (0.018)	Loss 2.6982 (2.6239)	Entropy 1.12881 (1.13178)	Top-1 acc 57.422 (60.901)	Top-5 acc 77.734 (81.726)	lr 0.01424
Train [55][2210/3239]	Time 2.580 (0.597)	Data Time 0.001 (0.018)	Loss 2.4950 (2.6238)	Entropy 1.12881 (1.13177)	Top-1 acc 62.891 (60.908)	Top-5 acc 83.984 (81.726)	lr 0.01423
Train [55][2220/3239]	Time 0.231 (0.595)	Data Time 0.001 (0.018)	Loss 2.6910 (2.6238)	Entropy 1.12883 (1.13175)	Top-1 acc 60.547 (60.911)	Top-5 acc 79.688 (81.727)	lr 0.01423
Train [55][2230/3239]	Time 0.250 (0.595)	Data Time 0.001 (0.018)	Loss 2.6994 (2.6240)	Entropy 1.12885 (1.13174)	Top-1 acc 60.938 (60.902)	Top-5 acc 79.297 (81.724)	lr 0.01423
Train [55][2240/3239]	Time 0.214 (0.594)	Data Time 0.001 (0.018)	Loss 2.7070 (2.6242)	Entropy 1.12884 (1.13173)	Top-1 acc 58.203 (60.897)	Top-5 acc 79.297 (81.719)	lr 0.01423
Train [55][2250/3239]	Time 0.226 (0.594)	Data Time 0.001 (0.018)	Loss 2.6889 (2.6244)	Entropy 1.12880 (1.13172)	Top-1 acc 57.812 (60.889)	Top-5 acc 82.422 (81.715)	lr 0.01423
Train [55][2260/3239]	Time 0.291 (0.613)	Data Time 0.003 (0.018)	Loss 2.5942 (2.6241)	Entropy 1.12878 (1.13170)	Top-1 acc 62.109 (60.894)	Top-5 acc 83.984 (81.723)	lr 0.01423
Train [55][2270/3239]	Time 0.323 (0.613)	Data Time 0.002 (0.018)	Loss 2.6518 (2.6240)	Entropy 1.12880 (1.13169)	Top-1 acc 60.547 (60.900)	Top-5 acc 78.906 (81.725)	lr 0.01423
Train [55][2280/3239]	Time 0.231 (0.612)	Data Time 0.001 (0.018)	Loss 2.4801 (2.6241)	Entropy 1.12875 (1.13168)	Top-1 acc 64.453 (60.895)	Top-5 acc 84.766 (81.722)	lr 0.01423
Train [55][2290/3239]	Time 0.212 (0.612)	Data Time 0.001 (0.018)	Loss 2.7756 (2.6242)	Entropy 1.12875 (1.13166)	Top-1 acc 53.906 (60.889)	Top-5 acc 78.125 (81.718)	lr 0.01423
Train [55][2300/3239]	Time 0.228 (0.611)	Data Time 0.001 (0.018)	Loss 2.8816 (2.6244)	Entropy 1.12872 (1.13165)	Top-1 acc 55.859 (60.889)	Top-5 acc 76.562 (81.714)	lr 0.01423
Train [55][2310/3239]	Time 0.255 (0.610)	Data Time 0.001 (0.018)	Loss 2.6587 (2.6245)	Entropy 1.12868 (1.13164)	Top-1 acc 58.984 (60.886)	Top-5 acc 82.031 (81.714)	lr 0.01422
Train [55][2320/3239]	Time 2.430 (0.610)	Data Time 0.001 (0.018)	Loss 2.7271 (2.6246)	Entropy 1.12868 (1.13163)	Top-1 acc 58.203 (60.878)	Top-5 acc 80.859 (81.712)	lr 0.01422
Train [55][2330/3239]	Time 0.226 (0.608)	Data Time 0.001 (0.018)	Loss 2.6905 (2.6244)	Entropy 1.12865 (1.13161)	Top-1 acc 57.812 (60.881)	Top-5 acc 78.906 (81.716)	lr 0.01422
Train [55][2340/3239]	Time 0.236 (0.608)	Data Time 0.001 (0.017)	Loss 2.6093 (2.6242)	Entropy 1.12870 (1.13160)	Top-1 acc 59.766 (60.888)	Top-5 acc 83.594 (81.720)	lr 0.01422
Train [55][2350/3239]	Time 0.238 (0.607)	Data Time 0.001 (0.017)	Loss 2.7376 (2.6242)	Entropy 1.12872 (1.13159)	Top-1 acc 60.156 (60.889)	Top-5 acc 79.297 (81.723)	lr 0.01422
Train [55][2360/3239]	Time 0.227 (0.606)	Data Time 0.001 (0.017)	Loss 2.7984 (2.6244)	Entropy 1.12874 (1.13158)	Top-1 acc 56.250 (60.881)	Top-5 acc 78.125 (81.714)	lr 0.01422
Train [55][2370/3239]	Time 0.226 (0.606)	Data Time 0.001 (0.017)	Loss 2.4655 (2.6243)	Entropy 1.12867 (1.13156)	Top-1 acc 61.719 (60.876)	Top-5 acc 82.812 (81.717)	lr 0.01422
Train [55][2380/3239]	Time 0.234 (0.605)	Data Time 0.001 (0.017)	Loss 2.5110 (2.6243)	Entropy 1.12868 (1.13155)	Top-1 acc 67.578 (60.878)	Top-5 acc 85.938 (81.720)	lr 0.01422
Train [55][2390/3239]	Time 0.235 (0.605)	Data Time 0.001 (0.017)	Loss 2.7813 (2.6243)	Entropy 1.12865 (1.13154)	Top-1 acc 58.594 (60.878)	Top-5 acc 78.516 (81.718)	lr 0.01422
Train [55][2400/3239]	Time 0.234 (0.604)	Data Time 0.001 (0.017)	Loss 2.5021 (2.6243)	Entropy 1.12855 (1.13153)	Top-1 acc 62.891 (60.882)	Top-5 acc 83.594 (81.716)	lr 0.01422
Train [55][2410/3239]	Time 0.311 (0.603)	Data Time 0.001 (0.017)	Loss 2.5358 (2.6244)	Entropy 1.12852 (1.13152)	Top-1 acc 63.281 (60.880)	Top-5 acc 82.812 (81.710)	lr 0.01421
Train [55][2420/3239]	Time 0.208 (0.603)	Data Time 0.001 (0.017)	Loss 2.6363 (2.6244)	Entropy 1.12854 (1.13150)	Top-1 acc 60.938 (60.879)	Top-5 acc 82.812 (81.709)	lr 0.01421
Train [55][2430/3239]	Time 2.475 (0.602)	Data Time 0.001 (0.017)	Loss 2.8053 (2.6247)	Entropy 1.12854 (1.13149)	Top-1 acc 53.125 (60.872)	Top-5 acc 79.688 (81.704)	lr 0.01421
Train [55][2440/3239]	Time 0.221 (0.601)	Data Time 0.001 (0.017)	Loss 2.5605 (2.6245)	Entropy 1.12852 (1.13148)	Top-1 acc 61.328 (60.875)	Top-5 acc 82.422 (81.708)	lr 0.01421
Train [55][2450/3239]	Time 0.244 (0.600)	Data Time 0.001 (0.017)	Loss 2.7396 (2.6245)	Entropy 1.12850 (1.13147)	Top-1 acc 57.812 (60.872)	Top-5 acc 79.297 (81.710)	lr 0.01421
Train [55][2460/3239]	Time 0.224 (0.600)	Data Time 0.001 (0.017)	Loss 2.5799 (2.6243)	Entropy 1.12852 (1.13145)	Top-1 acc 57.812 (60.874)	Top-5 acc 82.812 (81.712)	lr 0.01421
Train [55][2470/3239]	Time 0.238 (0.599)	Data Time 0.001 (0.017)	Loss 2.6811 (2.6242)	Entropy 1.12847 (1.13144)	Top-1 acc 59.766 (60.877)	Top-5 acc 81.641 (81.711)	lr 0.01421
Train [55][2480/3239]	Time 0.222 (0.599)	Data Time 0.001 (0.017)	Loss 2.6094 (2.6242)	Entropy 1.12848 (1.13143)	Top-1 acc 60.156 (60.879)	Top-5 acc 83.203 (81.710)	lr 0.01421
Train [55][2490/3239]	Time 0.222 (0.598)	Data Time 0.001 (0.016)	Loss 2.5197 (2.6242)	Entropy 1.12844 (1.13142)	Top-1 acc 66.016 (60.877)	Top-5 acc 82.812 (81.708)	lr 0.01421
Train [55][2500/3239]	Time 0.354 (0.598)	Data Time 0.002 (0.016)	Loss 2.5569 (2.6242)	Entropy 1.12832 (1.13141)	Top-1 acc 60.938 (60.875)	Top-5 acc 81.641 (81.710)	lr 0.01421
Train [55][2510/3239]	Time 0.220 (0.597)	Data Time 0.001 (0.016)	Loss 2.5847 (2.6242)	Entropy 1.12831 (1.13139)	Top-1 acc 62.109 (60.874)	Top-5 acc 82.812 (81.710)	lr 0.01420
Train [55][2520/3239]	Time 0.208 (0.597)	Data Time 0.001 (0.016)	Loss 2.6346 (2.6245)	Entropy 1.12828 (1.13138)	Top-1 acc 62.891 (60.870)	Top-5 acc 82.031 (81.703)	lr 0.01420
Train [55][2530/3239]	Time 0.250 (0.596)	Data Time 0.001 (0.016)	Loss 2.6360 (2.6244)	Entropy 1.12826 (1.13137)	Top-1 acc 61.719 (60.870)	Top-5 acc 81.641 (81.705)	lr 0.01420
Train [55][2540/3239]	Time 2.367 (0.596)	Data Time 0.001 (0.016)	Loss 2.4056 (2.6244)	Entropy 1.12826 (1.13136)	Top-1 acc 64.844 (60.874)	Top-5 acc 86.328 (81.705)	lr 0.01420
Train [55][2550/3239]	Time 0.331 (0.594)	Data Time 0.001 (0.016)	Loss 2.8168 (2.6245)	Entropy 1.12823 (1.13135)	Top-1 acc 55.078 (60.871)	Top-5 acc 78.516 (81.702)	lr 0.01420
Train [55][2560/3239]	Time 0.218 (0.594)	Data Time 0.001 (0.016)	Loss 2.6800 (2.6246)	Entropy 1.12823 (1.13133)	Top-1 acc 60.156 (60.872)	Top-5 acc 80.469 (81.700)	lr 0.01420
Train [55][2570/3239]	Time 0.225 (0.593)	Data Time 0.001 (0.016)	Loss 2.5647 (2.6246)	Entropy 1.12825 (1.13132)	Top-1 acc 60.547 (60.871)	Top-5 acc 82.422 (81.700)	lr 0.01420
Train [55][2580/3239]	Time 0.244 (0.593)	Data Time 0.001 (0.016)	Loss 2.6553 (2.6246)	Entropy 1.12829 (1.13131)	Top-1 acc 56.641 (60.869)	Top-5 acc 82.812 (81.696)	lr 0.01420
Train [55][2590/3239]	Time 0.234 (0.592)	Data Time 0.001 (0.016)	Loss 2.5045 (2.6250)	Entropy 1.12821 (1.13130)	Top-1 acc 65.234 (60.864)	Top-5 acc 84.766 (81.689)	lr 0.01420
Train [55][2600/3239]	Time 0.236 (0.592)	Data Time 0.001 (0.016)	Loss 2.7384 (2.6250)	Entropy 1.12821 (1.13129)	Top-1 acc 58.984 (60.863)	Top-5 acc 79.297 (81.688)	lr 0.01420
Train [55][2610/3239]	Time 0.212 (0.591)	Data Time 0.001 (0.016)	Loss 2.6487 (2.6251)	Entropy 1.12818 (1.13127)	Top-1 acc 60.156 (60.859)	Top-5 acc 81.250 (81.686)	lr 0.01419
Train [55][2620/3239]	Time 0.283 (0.608)	Data Time 0.007 (0.016)	Loss 2.5692 (2.6251)	Entropy 1.12815 (1.13126)	Top-1 acc 62.500 (60.860)	Top-5 acc 82.031 (81.688)	lr 0.01419
Train [55][2630/3239]	Time 0.227 (0.608)	Data Time 0.002 (0.016)	Loss 2.3049 (2.6251)	Entropy 1.12816 (1.13125)	Top-1 acc 70.312 (60.864)	Top-5 acc 87.891 (81.690)	lr 0.01419
Train [55][2640/3239]	Time 0.317 (0.607)	Data Time 0.001 (0.016)	Loss 2.4991 (2.6252)	Entropy 1.12810 (1.13124)	Top-1 acc 59.766 (60.857)	Top-5 acc 85.547 (81.688)	lr 0.01419
Train [55][2650/3239]	Time 0.216 (0.607)	Data Time 0.001 (0.016)	Loss 2.9827 (2.6253)	Entropy 1.12810 (1.13123)	Top-1 acc 53.906 (60.858)	Top-5 acc 76.562 (81.686)	lr 0.01419
Train [55][2660/3239]	Time 0.247 (0.606)	Data Time 0.001 (0.016)	Loss 2.9543 (2.6254)	Entropy 1.12808 (1.13121)	Top-1 acc 54.688 (60.856)	Top-5 acc 76.562 (81.688)	lr 0.01419
Train [55][2670/3239]	Time 0.232 (0.605)	Data Time 0.002 (0.015)	Loss 3.1180 (2.6258)	Entropy 1.12800 (1.13120)	Top-1 acc 48.828 (60.846)	Top-5 acc 71.484 (81.678)	lr 0.01419
Train [55][2680/3239]	Time 0.233 (0.605)	Data Time 0.001 (0.015)	Loss 2.6106 (2.6256)	Entropy 1.12796 (1.13119)	Top-1 acc 59.766 (60.850)	Top-5 acc 82.031 (81.679)	lr 0.01419
Train [55][2690/3239]	Time 0.344 (0.604)	Data Time 0.001 (0.015)	Loss 2.8721 (2.6258)	Entropy 1.12794 (1.13118)	Top-1 acc 57.812 (60.848)	Top-5 acc 76.172 (81.673)	lr 0.01419
Train [55][2700/3239]	Time 0.234 (0.604)	Data Time 0.001 (0.015)	Loss 2.6665 (2.6258)	Entropy 1.12789 (1.13117)	Top-1 acc 61.719 (60.846)	Top-5 acc 79.688 (81.674)	lr 0.01419
Train [55][2710/3239]	Time 0.236 (0.603)	Data Time 0.001 (0.015)	Loss 2.6671 (2.6261)	Entropy 1.12787 (1.13115)	Top-1 acc 57.812 (60.839)	Top-5 acc 81.250 (81.666)	lr 0.01418
Train [55][2720/3239]	Time 0.229 (0.603)	Data Time 0.001 (0.015)	Loss 2.8042 (2.6260)	Entropy 1.12790 (1.13114)	Top-1 acc 58.984 (60.843)	Top-5 acc 76.562 (81.668)	lr 0.01418
Train [55][2730/3239]	Time 0.226 (0.602)	Data Time 0.001 (0.015)	Loss 2.5219 (2.6258)	Entropy 1.12780 (1.13113)	Top-1 acc 64.453 (60.849)	Top-5 acc 82.422 (81.670)	lr 0.01418
Train [55][2740/3239]	Time 0.164 (0.602)	Data Time 0.001 (0.015)	Loss 2.7366 (2.6258)	Entropy 1.12774 (1.13112)	Top-1 acc 55.469 (60.849)	Top-5 acc 81.250 (81.673)	lr 0.01418
Train [55][2750/3239]	Time 0.215 (0.601)	Data Time 0.001 (0.015)	Loss 2.5165 (2.6258)	Entropy 1.12771 (1.13111)	Top-1 acc 63.672 (60.850)	Top-5 acc 84.766 (81.674)	lr 0.01418
Train [55][2760/3239]	Time 0.226 (0.601)	Data Time 0.001 (0.015)	Loss 2.5863 (2.6256)	Entropy 1.12766 (1.13109)	Top-1 acc 59.375 (60.851)	Top-5 acc 83.203 (81.678)	lr 0.01418
Train [55][2770/3239]	Time 0.239 (0.600)	Data Time 0.001 (0.015)	Loss 2.9321 (2.6257)	Entropy 1.12760 (1.13108)	Top-1 acc 54.297 (60.849)	Top-5 acc 77.344 (81.678)	lr 0.01418
Train [55][2780/3239]	Time 0.321 (0.600)	Data Time 0.001 (0.015)	Loss 2.6824 (2.6257)	Entropy 1.12760 (1.13107)	Top-1 acc 57.422 (60.849)	Top-5 acc 81.250 (81.681)	lr 0.01418
Train [55][2790/3239]	Time 0.259 (0.599)	Data Time 0.001 (0.015)	Loss 2.5729 (2.6260)	Entropy 1.12754 (1.13106)	Top-1 acc 64.453 (60.844)	Top-5 acc 81.641 (81.674)	lr 0.01418
Train [55][2800/3239]	Time 0.278 (0.599)	Data Time 0.001 (0.015)	Loss 2.6754 (2.6261)	Entropy 1.12753 (1.13104)	Top-1 acc 59.375 (60.844)	Top-5 acc 80.469 (81.673)	lr 0.01418
Train [55][2810/3239]	Time 0.236 (0.598)	Data Time 0.001 (0.015)	Loss 2.7221 (2.6262)	Entropy 1.12749 (1.13103)	Top-1 acc 55.859 (60.841)	Top-5 acc 80.469 (81.673)	lr 0.01417
Train [55][2820/3239]	Time 0.219 (0.598)	Data Time 0.001 (0.015)	Loss 2.6523 (2.6263)	Entropy 1.12745 (1.13102)	Top-1 acc 62.109 (60.842)	Top-5 acc 81.250 (81.670)	lr 0.01417
Train [55][2830/3239]	Time 0.314 (0.597)	Data Time 0.001 (0.015)	Loss 2.6350 (2.6262)	Entropy 1.12742 (1.13101)	Top-1 acc 61.719 (60.846)	Top-5 acc 81.250 (81.673)	lr 0.01417
Train [55][2840/3239]	Time 0.217 (0.597)	Data Time 0.002 (0.015)	Loss 2.6103 (2.6264)	Entropy 1.12741 (1.13099)	Top-1 acc 59.766 (60.841)	Top-5 acc 79.297 (81.668)	lr 0.01417
Train [55][2850/3239]	Time 0.267 (0.596)	Data Time 0.001 (0.015)	Loss 2.7235 (2.6264)	Entropy 1.12740 (1.13098)	Top-1 acc 56.641 (60.840)	Top-5 acc 80.469 (81.667)	lr 0.01417
Train [55][2860/3239]	Time 0.285 (0.596)	Data Time 0.001 (0.015)	Loss 2.6615 (2.6264)	Entropy 1.12732 (1.13097)	Top-1 acc 60.938 (60.842)	Top-5 acc 80.859 (81.667)	lr 0.01417
Train [55][2870/3239]	Time 0.247 (0.595)	Data Time 0.001 (0.015)	Loss 2.7165 (2.6264)	Entropy 1.12723 (1.13096)	Top-1 acc 56.250 (60.839)	Top-5 acc 80.469 (81.666)	lr 0.01417
Train [55][2880/3239]	Time 0.341 (0.595)	Data Time 0.001 (0.014)	Loss 2.6916 (2.6263)	Entropy 1.12717 (1.13094)	Top-1 acc 60.156 (60.843)	Top-5 acc 80.469 (81.669)	lr 0.01417
Train [55][2890/3239]	Time 0.233 (0.595)	Data Time 0.002 (0.014)	Loss 2.6039 (2.6263)	Entropy 1.12715 (1.13093)	Top-1 acc 61.719 (60.841)	Top-5 acc 83.203 (81.668)	lr 0.01417
Train [55][2900/3239]	Time 0.280 (0.594)	Data Time 0.001 (0.014)	Loss 2.8131 (2.6265)	Entropy 1.12711 (1.13092)	Top-1 acc 53.906 (60.837)	Top-5 acc 77.344 (81.665)	lr 0.01417
Train [55][2910/3239]	Time 0.235 (0.594)	Data Time 0.001 (0.014)	Loss 2.7899 (2.6266)	Entropy 1.12714 (1.13090)	Top-1 acc 53.516 (60.831)	Top-5 acc 78.516 (81.663)	lr 0.01416
Train [55][2920/3239]	Time 0.260 (0.593)	Data Time 0.001 (0.014)	Loss 2.7408 (2.6268)	Entropy 1.12713 (1.13089)	Top-1 acc 58.984 (60.824)	Top-5 acc 79.297 (81.661)	lr 0.01416
Train [55][2930/3239]	Time 0.167 (0.593)	Data Time 0.001 (0.014)	Loss 2.5807 (2.6268)	Entropy 1.12713 (1.13088)	Top-1 acc 59.375 (60.822)	Top-5 acc 83.203 (81.660)	lr 0.01416
Train [55][2940/3239]	Time 0.259 (0.592)	Data Time 0.001 (0.014)	Loss 2.5635 (2.6268)	Entropy 1.12715 (1.13087)	Top-1 acc 61.328 (60.821)	Top-5 acc 83.594 (81.661)	lr 0.01416
Train [55][2950/3239]	Time 0.238 (0.609)	Data Time 0.003 (0.014)	Loss 2.6380 (2.6268)	Entropy 1.12715 (1.13085)	Top-1 acc 59.375 (60.824)	Top-5 acc 80.078 (81.659)	lr 0.01416
Train [55][2960/3239]	Time 0.251 (0.609)	Data Time 0.002 (0.014)	Loss 2.6821 (2.6271)	Entropy 1.12716 (1.13084)	Top-1 acc 58.984 (60.819)	Top-5 acc 81.641 (81.653)	lr 0.01416
Train [55][2970/3239]	Time 0.256 (0.608)	Data Time 0.002 (0.014)	Loss 2.5904 (2.6275)	Entropy 1.12717 (1.13083)	Top-1 acc 61.719 (60.812)	Top-5 acc 85.156 (81.647)	lr 0.01416
Train [55][2980/3239]	Time 0.252 (0.608)	Data Time 0.001 (0.014)	Loss 3.0692 (2.6278)	Entropy 1.12713 (1.13082)	Top-1 acc 51.562 (60.808)	Top-5 acc 71.484 (81.641)	lr 0.01416
Train [55][2990/3239]	Time 0.252 (0.607)	Data Time 0.001 (0.014)	Loss 2.6336 (2.6278)	Entropy 1.12705 (1.13080)	Top-1 acc 59.766 (60.807)	Top-5 acc 81.250 (81.639)	lr 0.01416
Train [55][3000/3239]	Time 0.263 (0.607)	Data Time 0.001 (0.014)	Loss 2.6467 (2.6278)	Entropy 1.12715 (1.13079)	Top-1 acc 58.594 (60.807)	Top-5 acc 82.422 (81.641)	lr 0.01416
Train [55][3010/3239]	Time 0.239 (0.606)	Data Time 0.001 (0.014)	Loss 2.8248 (2.6278)	Entropy 1.12709 (1.13078)	Top-1 acc 59.766 (60.806)	Top-5 acc 78.125 (81.640)	lr 0.01415
Train [55][3020/3239]	Time 0.265 (0.606)	Data Time 0.001 (0.014)	Loss 2.6484 (2.6278)	Entropy 1.12707 (1.13077)	Top-1 acc 63.281 (60.804)	Top-5 acc 80.469 (81.639)	lr 0.01415
Train [55][3030/3239]	Time 0.226 (0.605)	Data Time 0.001 (0.014)	Loss 2.7351 (2.6279)	Entropy 1.12702 (1.13075)	Top-1 acc 60.156 (60.802)	Top-5 acc 78.125 (81.637)	lr 0.01415
Train [55][3040/3239]	Time 0.258 (0.605)	Data Time 0.001 (0.014)	Loss 2.7599 (2.6280)	Entropy 1.12702 (1.13074)	Top-1 acc 58.203 (60.802)	Top-5 acc 77.344 (81.635)	lr 0.01415
Train [55][3050/3239]	Time 0.223 (0.604)	Data Time 0.001 (0.014)	Loss 2.5974 (2.6281)	Entropy 1.12691 (1.13073)	Top-1 acc 59.375 (60.799)	Top-5 acc 82.812 (81.629)	lr 0.01415
Train [55][3060/3239]	Time 0.227 (0.604)	Data Time 0.001 (0.014)	Loss 2.5750 (2.6282)	Entropy 1.12683 (1.13072)	Top-1 acc 61.719 (60.799)	Top-5 acc 81.641 (81.629)	lr 0.01415
Train [55][3070/3239]	Time 0.241 (0.603)	Data Time 0.001 (0.014)	Loss 2.6011 (2.6282)	Entropy 1.12680 (1.13070)	Top-1 acc 61.719 (60.798)	Top-5 acc 81.641 (81.628)	lr 0.01415
Train [55][3080/3239]	Time 0.218 (0.603)	Data Time 0.001 (0.014)	Loss 2.6195 (2.6286)	Entropy 1.12681 (1.13069)	Top-1 acc 62.500 (60.791)	Top-5 acc 82.031 (81.621)	lr 0.01415
Train [55][3090/3239]	Time 0.245 (0.602)	Data Time 0.001 (0.014)	Loss 2.6342 (2.6287)	Entropy 1.12684 (1.13068)	Top-1 acc 60.938 (60.792)	Top-5 acc 79.688 (81.620)	lr 0.01415
Train [55][3100/3239]	Time 0.248 (0.602)	Data Time 0.001 (0.014)	Loss 2.7272 (2.6288)	Entropy 1.12684 (1.13067)	Top-1 acc 55.859 (60.788)	Top-5 acc 77.734 (81.614)	lr 0.01415
Train [55][3110/3239]	Time 0.230 (0.601)	Data Time 0.001 (0.014)	Loss 2.6749 (2.6291)	Entropy 1.12682 (1.13065)	Top-1 acc 58.203 (60.782)	Top-5 acc 83.203 (81.612)	lr 0.01414
Train [55][3120/3239]	Time 0.298 (0.601)	Data Time 0.001 (0.014)	Loss 2.7629 (2.6291)	Entropy 1.12680 (1.13064)	Top-1 acc 61.328 (60.784)	Top-5 acc 77.344 (81.612)	lr 0.01414
Train [55][3130/3239]	Time 0.233 (0.600)	Data Time 0.001 (0.013)	Loss 2.6673 (2.6293)	Entropy 1.12683 (1.13063)	Top-1 acc 62.109 (60.781)	Top-5 acc 81.641 (81.607)	lr 0.01414
Train [55][3140/3239]	Time 0.272 (0.600)	Data Time 0.001 (0.013)	Loss 2.5425 (2.6292)	Entropy 1.12675 (1.13062)	Top-1 acc 64.062 (60.780)	Top-5 acc 85.156 (81.610)	lr 0.01414
Train [55][3150/3239]	Time 0.216 (0.600)	Data Time 0.003 (0.013)	Loss 2.5315 (2.6293)	Entropy 1.12676 (1.13061)	Top-1 acc 63.672 (60.777)	Top-5 acc 83.984 (81.607)	lr 0.01414
Train [55][3160/3239]	Time 0.216 (0.599)	Data Time 0.001 (0.013)	Loss 2.6398 (2.6294)	Entropy 1.12672 (1.13059)	Top-1 acc 61.719 (60.771)	Top-5 acc 82.031 (81.605)	lr 0.01414
Train [55][3170/3239]	Time 0.379 (0.599)	Data Time 0.001 (0.013)	Loss 2.6430 (2.6296)	Entropy 1.12661 (1.13058)	Top-1 acc 62.891 (60.773)	Top-5 acc 77.734 (81.602)	lr 0.01414
Train [55][3180/3239]	Time 0.215 (0.598)	Data Time 0.000 (0.013)	Loss 2.6626 (2.6294)	Entropy 1.12662 (1.13057)	Top-1 acc 57.031 (60.775)	Top-5 acc 80.078 (81.604)	lr 0.01414
Train [55][3190/3239]	Time 0.201 (0.598)	Data Time 0.000 (0.013)	Loss 2.4262 (2.6294)	Entropy 1.12657 (1.13056)	Top-1 acc 65.234 (60.775)	Top-5 acc 84.375 (81.603)	lr 0.01414
Train [55][3200/3239]	Time 0.206 (0.597)	Data Time 0.000 (0.013)	Loss 2.5198 (2.6294)	Entropy 1.12655 (1.13054)	Top-1 acc 62.891 (60.777)	Top-5 acc 83.203 (81.605)	lr 0.01414
Train [55][3210/3239]	Time 0.213 (0.597)	Data Time 0.000 (0.013)	Loss 2.5559 (2.6294)	Entropy 1.12651 (1.13053)	Top-1 acc 62.891 (60.776)	Top-5 acc 85.547 (81.604)	lr 0.01413
Train [55][3220/3239]	Time 0.294 (0.596)	Data Time 0.000 (0.013)	Loss 2.7097 (2.6295)	Entropy 1.12648 (1.13052)	Top-1 acc 56.250 (60.770)	Top-5 acc 80.859 (81.603)	lr 0.01413
Train [55][3230/3239]	Time 0.217 (0.596)	Data Time 0.000 (0.013)	Loss 2.7526 (2.6297)	Entropy 1.12647 (1.13051)	Top-1 acc 60.938 (60.766)	Top-5 acc 77.344 (81.598)	lr 0.01413
Train [55][3239/3239]	Time 2.221 (0.595)	Data Time 0.000 (0.013)	Loss 2.9456 (2.6297)	Entropy 1.12647 (1.13050)	Top-1 acc 56.790 (60.769)	Top-5 acc 75.309 (81.597)	lr 0.01413
==========Valid [55/120]	loss 1.491	top-1 acc 66.291 (66.291)	top-5 acc 86.113	Train top-1 60.769	top-5 81.597	Entropy 1.12647	Latency-None: 0.000ms	Flops: 548.34M
Train [56][0/3239]	Time 36.886 (36.886)	Data Time 35.534 (35.534)	Loss 2.4058 (2.4058)	Entropy 1.12644 (1.12644)	Top-1 acc 64.453 (64.453)	Top-5 acc 83.984 (83.984)	lr 0.01413
Train [56][10/3239]	Time 2.500 (3.869)	Data Time 0.001 (3.236)	Loss 2.8419 (2.6254)	Entropy 1.12644 (1.12644)	Top-1 acc 55.078 (60.618)	Top-5 acc 81.250 (81.854)	lr 0.01413
Train [56][20/3239]	Time 0.229 (2.144)	Data Time 0.001 (1.696)	Loss 2.7163 (2.6314)	Entropy 1.12640 (1.12642)	Top-1 acc 53.906 (60.175)	Top-5 acc 79.688 (82.161)	lr 0.01413
Train [56][30/3239]	Time 0.303 (1.603)	Data Time 0.001 (1.149)	Loss 2.6871 (2.6346)	Entropy 1.12638 (1.12641)	Top-1 acc 61.328 (60.282)	Top-5 acc 80.469 (82.056)	lr 0.01413
Train [56][40/3239]	Time 0.221 (1.322)	Data Time 0.001 (0.869)	Loss 2.5721 (2.6286)	Entropy 1.12633 (1.12639)	Top-1 acc 61.719 (60.585)	Top-5 acc 84.375 (82.050)	lr 0.01413
Train [56][50/3239]	Time 0.305 (2.114)	Data Time 0.003 (0.699)	Loss 2.4876 (2.6220)	Entropy 1.12625 (1.12637)	Top-1 acc 63.672 (60.677)	Top-5 acc 83.984 (82.077)	lr 0.01413
Train [56][60/3239]	Time 0.214 (1.854)	Data Time 0.002 (0.585)	Loss 2.6077 (2.6266)	Entropy 1.12629 (1.12635)	Top-1 acc 62.500 (60.771)	Top-5 acc 83.203 (81.961)	lr 0.01413
Train [56][70/3239]	Time 0.226 (1.657)	Data Time 0.001 (0.503)	Loss 2.5015 (2.6163)	Entropy 1.12627 (1.12634)	Top-1 acc 64.062 (60.938)	Top-5 acc 83.594 (82.059)	lr 0.01412
Train [56][80/3239]	Time 0.379 (1.511)	Data Time 0.002 (0.441)	Loss 3.8445 (2.6270)	Entropy 1.12622 (1.12633)	Top-1 acc 33.594 (60.749)	Top-5 acc 61.328 (81.901)	lr 0.01412
Train [56][90/3239]	Time 0.222 (1.397)	Data Time 0.001 (0.393)	Loss 2.7687 (2.6261)	Entropy 1.12624 (1.12632)	Top-1 acc 57.422 (60.839)	Top-5 acc 80.469 (81.971)	lr 0.01412
Train [56][100/3239]	Time 0.217 (1.305)	Data Time 0.002 (0.354)	Loss 2.6518 (2.6242)	Entropy 1.12623 (1.12631)	Top-1 acc 61.328 (60.934)	Top-5 acc 80.078 (81.946)	lr 0.01412
Train [56][110/3239]	Time 0.266 (1.229)	Data Time 0.001 (0.322)	Loss 2.7764 (2.6249)	Entropy 1.12621 (1.12630)	Top-1 acc 59.766 (60.909)	Top-5 acc 80.469 (81.947)	lr 0.01412
Train [56][120/3239]	Time 2.759 (1.169)	Data Time 0.007 (0.296)	Loss 2.6516 (2.6249)	Entropy 1.12621 (1.12630)	Top-1 acc 58.203 (60.841)	Top-5 acc 82.422 (81.925)	lr 0.01412
Train [56][130/3239]	Time 0.219 (1.099)	Data Time 0.002 (0.274)	Loss 2.5258 (2.6226)	Entropy 1.12617 (1.12629)	Top-1 acc 62.500 (60.896)	Top-5 acc 82.422 (81.957)	lr 0.01412
Train [56][140/3239]	Time 0.225 (1.053)	Data Time 0.001 (0.254)	Loss 2.5943 (2.6215)	Entropy 1.12616 (1.12628)	Top-1 acc 60.938 (60.946)	Top-5 acc 82.812 (81.937)	lr 0.01412
Train [56][150/3239]	Time 0.227 (1.015)	Data Time 0.001 (0.238)	Loss 2.5632 (2.6200)	Entropy 1.12615 (1.12627)	Top-1 acc 57.812 (60.917)	Top-5 acc 83.984 (81.969)	lr 0.01412
Train [56][160/3239]	Time 0.274 (0.981)	Data Time 0.001 (0.223)	Loss 2.3254 (2.6169)	Entropy 1.12618 (1.12626)	Top-1 acc 68.359 (61.015)	Top-5 acc 86.328 (82.026)	lr 0.01412
Train [56][170/3239]	Time 0.201 (0.952)	Data Time 0.001 (0.210)	Loss 2.5140 (2.6154)	Entropy 1.12613 (1.12626)	Top-1 acc 67.578 (61.109)	Top-5 acc 82.422 (82.059)	lr 0.01411
Train [56][180/3239]	Time 0.232 (0.926)	Data Time 0.001 (0.198)	Loss 2.4593 (2.6115)	Entropy 1.12615 (1.12625)	Top-1 acc 65.625 (61.214)	Top-5 acc 83.594 (82.154)	lr 0.01411
Train [56][190/3239]	Time 0.204 (0.900)	Data Time 0.001 (0.188)	Loss 2.6224 (2.6098)	Entropy 1.12613 (1.12624)	Top-1 acc 59.766 (61.175)	Top-5 acc 82.812 (82.197)	lr 0.01411
Train [56][200/3239]	Time 0.217 (0.879)	Data Time 0.001 (0.179)	Loss 2.7072 (2.6113)	Entropy 1.12609 (1.12624)	Top-1 acc 57.812 (61.151)	Top-5 acc 80.469 (82.140)	lr 0.01411
Train [56][210/3239]	Time 0.223 (0.859)	Data Time 0.001 (0.170)	Loss 2.6526 (2.6113)	Entropy 1.12606 (1.12623)	Top-1 acc 57.812 (61.174)	Top-5 acc 79.688 (82.111)	lr 0.01411
Train [56][220/3239]	Time 0.244 (0.841)	Data Time 0.001 (0.163)	Loss 2.6905 (2.6123)	Entropy 1.12600 (1.12622)	Top-1 acc 60.547 (61.166)	Top-5 acc 80.469 (82.056)	lr 0.01411
Train [56][230/3239]	Time 2.423 (0.825)	Data Time 0.001 (0.156)	Loss 2.5992 (2.6120)	Entropy 1.12600 (1.12621)	Top-1 acc 62.500 (61.186)	Top-5 acc 82.812 (82.033)	lr 0.01411
Train [56][240/3239]	Time 0.246 (0.800)	Data Time 0.001 (0.149)	Loss 2.7821 (2.6124)	Entropy 1.12602 (1.12621)	Top-1 acc 56.250 (61.173)	Top-5 acc 78.125 (82.018)	lr 0.01411
Train [56][250/3239]	Time 0.212 (0.787)	Data Time 0.001 (0.144)	Loss 2.6386 (2.6112)	Entropy 1.12599 (1.12620)	Top-1 acc 61.328 (61.168)	Top-5 acc 80.859 (82.039)	lr 0.01411
Train [56][260/3239]	Time 0.221 (0.774)	Data Time 0.001 (0.138)	Loss 2.7292 (2.6109)	Entropy 1.12593 (1.12619)	Top-1 acc 59.375 (61.193)	Top-5 acc 78.906 (82.039)	lr 0.01411
Train [56][270/3239]	Time 0.297 (0.763)	Data Time 0.001 (0.133)	Loss 2.6664 (2.6109)	Entropy 1.12587 (1.12618)	Top-1 acc 58.203 (61.158)	Top-5 acc 82.031 (82.038)	lr 0.01410
Train [56][280/3239]	Time 0.213 (0.753)	Data Time 0.001 (0.128)	Loss 2.5657 (2.6113)	Entropy 1.12580 (1.12616)	Top-1 acc 61.719 (61.154)	Top-5 acc 84.375 (82.041)	lr 0.01410
Train [56][290/3239]	Time 0.225 (0.743)	Data Time 0.002 (0.124)	Loss 2.5634 (2.6119)	Entropy 1.12573 (1.12615)	Top-1 acc 63.281 (61.135)	Top-5 acc 82.422 (82.011)	lr 0.01410
Train [56][300/3239]	Time 0.215 (0.733)	Data Time 0.001 (0.120)	Loss 2.5761 (2.6110)	Entropy 1.12573 (1.12614)	Top-1 acc 64.062 (61.157)	Top-5 acc 82.422 (82.026)	lr 0.01410
Train [56][310/3239]	Time 0.232 (0.725)	Data Time 0.001 (0.116)	Loss 2.4730 (2.6105)	Entropy 1.12568 (1.12612)	Top-1 acc 62.500 (61.165)	Top-5 acc 84.375 (82.025)	lr 0.01410
Train [56][320/3239]	Time 0.330 (0.717)	Data Time 0.002 (0.113)	Loss 2.6017 (2.6109)	Entropy 1.12566 (1.12611)	Top-1 acc 60.547 (61.167)	Top-5 acc 83.203 (82.025)	lr 0.01410
Train [56][330/3239]	Time 0.303 (0.709)	Data Time 0.001 (0.109)	Loss 2.7110 (2.6127)	Entropy 1.12558 (1.12609)	Top-1 acc 54.688 (61.109)	Top-5 acc 79.688 (81.990)	lr 0.01410
Train [56][340/3239]	Time 2.504 (0.702)	Data Time 0.001 (0.106)	Loss 2.5933 (2.6126)	Entropy 1.12558 (1.12608)	Top-1 acc 60.156 (61.146)	Top-5 acc 83.594 (81.989)	lr 0.01410
Train [56][350/3239]	Time 0.240 (0.689)	Data Time 0.002 (0.103)	Loss 2.4814 (2.6113)	Entropy 1.12556 (1.12606)	Top-1 acc 63.281 (61.206)	Top-5 acc 85.156 (82.026)	lr 0.01410
Train [56][360/3239]	Time 0.236 (0.683)	Data Time 0.002 (0.100)	Loss 2.7561 (2.6112)	Entropy 1.12552 (1.12605)	Top-1 acc 55.859 (61.227)	Top-5 acc 80.469 (82.027)	lr 0.01410
Train [56][370/3239]	Time 0.229 (0.677)	Data Time 0.001 (0.098)	Loss 2.6129 (2.6110)	Entropy 1.12548 (1.12603)	Top-1 acc 58.984 (61.240)	Top-5 acc 82.812 (82.029)	lr 0.01409
Train [56][380/3239]	Time 0.217 (0.671)	Data Time 0.001 (0.095)	Loss 2.7292 (2.6099)	Entropy 1.12545 (1.12602)	Top-1 acc 58.984 (61.249)	Top-5 acc 78.516 (82.031)	lr 0.01409
Train [56][390/3239]	Time 0.236 (0.666)	Data Time 0.001 (0.093)	Loss 2.4590 (2.6103)	Entropy 1.12535 (1.12600)	Top-1 acc 63.281 (61.248)	Top-5 acc 85.938 (82.015)	lr 0.01409
Train [56][400/3239]	Time 0.228 (0.661)	Data Time 0.001 (0.090)	Loss 2.5784 (2.6093)	Entropy 1.12523 (1.12599)	Top-1 acc 61.719 (61.282)	Top-5 acc 82.031 (82.014)	lr 0.01409
Train [56][410/3239]	Time 0.279 (0.772)	Data Time 0.003 (0.088)	Loss 2.8731 (2.6112)	Entropy 1.12519 (1.12597)	Top-1 acc 53.906 (61.216)	Top-5 acc 74.609 (81.975)	lr 0.01409
Train [56][420/3239]	Time 0.239 (0.766)	Data Time 0.002 (0.086)	Loss 2.4316 (2.6111)	Entropy 1.12512 (1.12595)	Top-1 acc 66.016 (61.208)	Top-5 acc 83.984 (81.970)	lr 0.01409
Train [56][430/3239]	Time 0.215 (0.760)	Data Time 0.002 (0.084)	Loss 2.7194 (2.6110)	Entropy 1.12502 (1.12593)	Top-1 acc 59.766 (61.224)	Top-5 acc 78.125 (81.947)	lr 0.01409
Train [56][440/3239]	Time 0.266 (0.753)	Data Time 0.002 (0.082)	Loss 2.5867 (2.6115)	Entropy 1.12497 (1.12591)	Top-1 acc 64.062 (61.224)	Top-5 acc 80.078 (81.940)	lr 0.01409
Train [56][450/3239]	Time 2.450 (0.746)	Data Time 0.003 (0.081)	Loss 2.7190 (2.6105)	Entropy 1.12497 (1.12589)	Top-1 acc 56.641 (61.242)	Top-5 acc 80.078 (81.951)	lr 0.01409
Train [56][460/3239]	Time 0.380 (0.736)	Data Time 0.002 (0.079)	Loss 2.6694 (2.6103)	Entropy 1.12482 (1.12586)	Top-1 acc 60.938 (61.253)	Top-5 acc 82.812 (81.952)	lr 0.01409
Train [56][470/3239]	Time 0.227 (0.730)	Data Time 0.001 (0.077)	Loss 2.4898 (2.6110)	Entropy 1.12480 (1.12584)	Top-1 acc 62.891 (61.229)	Top-5 acc 82.812 (81.921)	lr 0.01408
Train [56][480/3239]	Time 0.268 (0.725)	Data Time 0.001 (0.076)	Loss 2.6336 (2.6104)	Entropy 1.12476 (1.12582)	Top-1 acc 62.500 (61.229)	Top-5 acc 81.250 (81.936)	lr 0.01408
Train [56][490/3239]	Time 0.240 (0.720)	Data Time 0.001 (0.074)	Loss 2.5662 (2.6102)	Entropy 1.12460 (1.12579)	Top-1 acc 64.453 (61.221)	Top-5 acc 83.594 (81.933)	lr 0.01408
Train [56][500/3239]	Time 0.227 (0.715)	Data Time 0.001 (0.073)	Loss 2.7365 (2.6105)	Entropy 1.12458 (1.12577)	Top-1 acc 53.516 (61.195)	Top-5 acc 80.469 (81.927)	lr 0.01408
Train [56][510/3239]	Time 0.376 (0.710)	Data Time 0.001 (0.071)	Loss 2.9298 (2.6115)	Entropy 1.12456 (1.12575)	Top-1 acc 53.125 (61.165)	Top-5 acc 75.000 (81.909)	lr 0.01408
Train [56][520/3239]	Time 0.221 (0.705)	Data Time 0.001 (0.070)	Loss 2.6624 (2.6122)	Entropy 1.12463 (1.12572)	Top-1 acc 58.203 (61.154)	Top-5 acc 78.516 (81.881)	lr 0.01408
Train [56][530/3239]	Time 0.216 (0.701)	Data Time 0.001 (0.069)	Loss 2.5716 (2.6121)	Entropy 1.12461 (1.12570)	Top-1 acc 65.234 (61.177)	Top-5 acc 82.812 (81.884)	lr 0.01408
Train [56][540/3239]	Time 0.264 (0.696)	Data Time 0.002 (0.067)	Loss 2.5685 (2.6126)	Entropy 1.12461 (1.12568)	Top-1 acc 60.156 (61.176)	Top-5 acc 83.203 (81.875)	lr 0.01408
Train [56][550/3239]	Time 0.215 (0.692)	Data Time 0.001 (0.066)	Loss 2.6565 (2.6122)	Entropy 1.12458 (1.12566)	Top-1 acc 59.375 (61.164)	Top-5 acc 83.984 (81.890)	lr 0.01408
Train [56][560/3239]	Time 2.514 (0.688)	Data Time 0.001 (0.065)	Loss 2.6379 (2.6131)	Entropy 1.12458 (1.12565)	Top-1 acc 58.203 (61.120)	Top-5 acc 79.688 (81.869)	lr 0.01408
Train [56][570/3239]	Time 0.272 (0.681)	Data Time 0.001 (0.064)	Loss 2.6391 (2.6133)	Entropy 1.12457 (1.12563)	Top-1 acc 60.547 (61.113)	Top-5 acc 80.078 (81.853)	lr 0.01407
Train [56][580/3239]	Time 0.215 (0.677)	Data Time 0.001 (0.063)	Loss 2.7021 (2.6132)	Entropy 1.12457 (1.12561)	Top-1 acc 57.422 (61.114)	Top-5 acc 78.906 (81.853)	lr 0.01407
Train [56][590/3239]	Time 0.239 (0.673)	Data Time 0.001 (0.062)	Loss 2.4951 (2.6132)	Entropy 1.12456 (1.12559)	Top-1 acc 67.969 (61.119)	Top-5 acc 82.812 (81.859)	lr 0.01407
Train [56][600/3239]	Time 0.212 (0.669)	Data Time 0.001 (0.061)	Loss 2.6116 (2.6136)	Entropy 1.12460 (1.12557)	Top-1 acc 62.891 (61.108)	Top-5 acc 82.812 (81.851)	lr 0.01407
Train [56][610/3239]	Time 0.232 (0.666)	Data Time 0.001 (0.060)	Loss 2.6343 (2.6134)	Entropy 1.12460 (1.12556)	Top-1 acc 59.375 (61.119)	Top-5 acc 82.031 (81.859)	lr 0.01407
Train [56][620/3239]	Time 0.226 (0.663)	Data Time 0.001 (0.059)	Loss 2.5520 (2.6128)	Entropy 1.12452 (1.12554)	Top-1 acc 63.672 (61.153)	Top-5 acc 85.156 (81.877)	lr 0.01407
Train [56][630/3239]	Time 0.213 (0.660)	Data Time 0.001 (0.058)	Loss 2.6510 (2.6127)	Entropy 1.12447 (1.12553)	Top-1 acc 62.891 (61.165)	Top-5 acc 80.469 (81.878)	lr 0.01407
Train [56][640/3239]	Time 0.212 (0.657)	Data Time 0.001 (0.057)	Loss 2.6349 (2.6122)	Entropy 1.12448 (1.12551)	Top-1 acc 61.719 (61.178)	Top-5 acc 82.422 (81.880)	lr 0.01407
Train [56][650/3239]	Time 0.227 (0.654)	Data Time 0.001 (0.056)	Loss 2.5663 (2.6123)	Entropy 1.12450 (1.12549)	Top-1 acc 62.109 (61.188)	Top-5 acc 81.641 (81.878)	lr 0.01407
Train [56][660/3239]	Time 0.211 (0.651)	Data Time 0.001 (0.056)	Loss 2.4566 (2.6124)	Entropy 1.12447 (1.12548)	Top-1 acc 64.844 (61.176)	Top-5 acc 86.328 (81.879)	lr 0.01407
Train [56][670/3239]	Time 2.422 (0.648)	Data Time 0.001 (0.055)	Loss 2.7871 (2.6124)	Entropy 1.12447 (1.12546)	Top-1 acc 57.812 (61.169)	Top-5 acc 76.562 (81.874)	lr 0.01406
Train [56][680/3239]	Time 0.238 (0.642)	Data Time 0.001 (0.054)	Loss 2.6826 (2.6126)	Entropy 1.12444 (1.12545)	Top-1 acc 61.719 (61.167)	Top-5 acc 80.859 (81.867)	lr 0.01406
Train [56][690/3239]	Time 0.224 (0.640)	Data Time 0.001 (0.053)	Loss 2.4535 (2.6119)	Entropy 1.12444 (1.12543)	Top-1 acc 65.625 (61.202)	Top-5 acc 85.938 (81.888)	lr 0.01406
Train [56][700/3239]	Time 0.330 (0.637)	Data Time 0.001 (0.052)	Loss 2.5758 (2.6117)	Entropy 1.12442 (1.12542)	Top-1 acc 62.891 (61.202)	Top-5 acc 80.469 (81.882)	lr 0.01406
Train [56][710/3239]	Time 0.228 (0.635)	Data Time 0.002 (0.052)	Loss 2.4818 (2.6110)	Entropy 1.12444 (1.12541)	Top-1 acc 64.453 (61.206)	Top-5 acc 83.984 (81.899)	lr 0.01406
Train [56][720/3239]	Time 0.209 (0.633)	Data Time 0.001 (0.051)	Loss 2.6680 (2.6120)	Entropy 1.12439 (1.12539)	Top-1 acc 59.375 (61.172)	Top-5 acc 79.297 (81.867)	lr 0.01406
Train [56][730/3239]	Time 0.250 (0.630)	Data Time 0.001 (0.050)	Loss 2.6022 (2.6115)	Entropy 1.12429 (1.12538)	Top-1 acc 60.156 (61.181)	Top-5 acc 80.859 (81.871)	lr 0.01406
Train [56][740/3239]	Time 0.220 (0.628)	Data Time 0.001 (0.050)	Loss 2.6893 (2.6115)	Entropy 1.12426 (1.12536)	Top-1 acc 58.203 (61.177)	Top-5 acc 81.250 (81.877)	lr 0.01406
Train [56][750/3239]	Time 0.329 (0.626)	Data Time 0.001 (0.049)	Loss 2.7508 (2.6120)	Entropy 1.12420 (1.12535)	Top-1 acc 59.766 (61.163)	Top-5 acc 77.734 (81.859)	lr 0.01406
Train [56][760/3239]	Time 0.228 (0.623)	Data Time 0.001 (0.048)	Loss 2.5828 (2.6121)	Entropy 1.12412 (1.12533)	Top-1 acc 62.500 (61.162)	Top-5 acc 83.203 (81.858)	lr 0.01406
Train [56][770/3239]	Time 0.287 (0.680)	Data Time 0.004 (0.048)	Loss 2.5437 (2.6123)	Entropy 1.12408 (1.12532)	Top-1 acc 60.156 (61.150)	Top-5 acc 85.156 (81.858)	lr 0.01405
Train [56][780/3239]	Time 3.348 (0.679)	Data Time 0.003 (0.047)	Loss 2.5068 (2.6121)	Entropy 1.12408 (1.12530)	Top-1 acc 62.109 (61.156)	Top-5 acc 85.547 (81.868)	lr 0.01405
Train [56][790/3239]	Time 0.252 (0.673)	Data Time 0.002 (0.047)	Loss 2.6040 (2.6121)	Entropy 1.12407 (1.12528)	Top-1 acc 62.109 (61.164)	Top-5 acc 81.250 (81.880)	lr 0.01405
Train [56][800/3239]	Time 0.326 (0.671)	Data Time 0.001 (0.046)	Loss 2.7090 (2.6117)	Entropy 1.12406 (1.12527)	Top-1 acc 57.812 (61.173)	Top-5 acc 81.250 (81.894)	lr 0.01405
Train [56][810/3239]	Time 0.165 (0.668)	Data Time 0.001 (0.046)	Loss 2.4539 (2.6108)	Entropy 1.12406 (1.12525)	Top-1 acc 63.672 (61.186)	Top-5 acc 84.766 (81.910)	lr 0.01405
Train [56][820/3239]	Time 0.263 (0.666)	Data Time 0.002 (0.045)	Loss 2.4150 (2.6113)	Entropy 1.12404 (1.12524)	Top-1 acc 64.062 (61.163)	Top-5 acc 87.109 (81.904)	lr 0.01405
Train [56][830/3239]	Time 0.229 (0.663)	Data Time 0.001 (0.045)	Loss 2.4414 (2.6112)	Entropy 1.12398 (1.12522)	Top-1 acc 71.094 (61.162)	Top-5 acc 82.812 (81.899)	lr 0.01405
Train [56][840/3239]	Time 0.225 (0.661)	Data Time 0.001 (0.044)	Loss 2.6551 (2.6117)	Entropy 1.12396 (1.12521)	Top-1 acc 59.766 (61.152)	Top-5 acc 79.688 (81.888)	lr 0.01405
Train [56][850/3239]	Time 0.323 (0.659)	Data Time 0.001 (0.044)	Loss 2.4780 (2.6114)	Entropy 1.12388 (1.12520)	Top-1 acc 62.891 (61.150)	Top-5 acc 86.328 (81.893)	lr 0.01405
Train [56][860/3239]	Time 0.207 (0.656)	Data Time 0.001 (0.043)	Loss 2.5641 (2.6114)	Entropy 1.12388 (1.12518)	Top-1 acc 58.984 (61.140)	Top-5 acc 83.594 (81.901)	lr 0.01405
Train [56][870/3239]	Time 0.217 (0.654)	Data Time 0.001 (0.043)	Loss 3.1740 (2.6124)	Entropy 1.12381 (1.12516)	Top-1 acc 49.219 (61.104)	Top-5 acc 73.438 (81.892)	lr 0.01404
Train [56][880/3239]	Time 0.242 (0.652)	Data Time 0.001 (0.042)	Loss 2.4874 (2.6115)	Entropy 1.12378 (1.12515)	Top-1 acc 63.281 (61.127)	Top-5 acc 85.547 (81.908)	lr 0.01404
Train [56][890/3239]	Time 2.565 (0.650)	Data Time 0.002 (0.042)	Loss 2.6333 (2.6117)	Entropy 1.12378 (1.12513)	Top-1 acc 59.375 (61.115)	Top-5 acc 80.469 (81.909)	lr 0.01404
Train [56][900/3239]	Time 0.225 (0.645)	Data Time 0.001 (0.041)	Loss 2.7536 (2.6122)	Entropy 1.12370 (1.12512)	Top-1 acc 57.422 (61.107)	Top-5 acc 78.906 (81.893)	lr 0.01404
Train [56][910/3239]	Time 0.236 (0.643)	Data Time 0.001 (0.041)	Loss 2.5911 (2.6121)	Entropy 1.12365 (1.12510)	Top-1 acc 63.672 (61.115)	Top-5 acc 82.812 (81.892)	lr 0.01404
Train [56][920/3239]	Time 0.204 (0.641)	Data Time 0.001 (0.040)	Loss 2.6369 (2.6128)	Entropy 1.12360 (1.12509)	Top-1 acc 62.109 (61.104)	Top-5 acc 82.422 (81.874)	lr 0.01404
Train [56][930/3239]	Time 0.224 (0.639)	Data Time 0.001 (0.040)	Loss 2.8298 (2.6125)	Entropy 1.12358 (1.12507)	Top-1 acc 54.688 (61.104)	Top-5 acc 76.172 (81.876)	lr 0.01404
Train [56][940/3239]	Time 0.254 (0.638)	Data Time 0.001 (0.040)	Loss 2.7225 (2.6122)	Entropy 1.12359 (1.12505)	Top-1 acc 59.766 (61.110)	Top-5 acc 78.906 (81.887)	lr 0.01404
Train [56][950/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.039)	Loss 2.6514 (2.6119)	Entropy 1.12359 (1.12504)	Top-1 acc 57.422 (61.116)	Top-5 acc 82.812 (81.893)	lr 0.01404
Train [56][960/3239]	Time 0.213 (0.634)	Data Time 0.001 (0.039)	Loss 2.6631 (2.6115)	Entropy 1.12357 (1.12502)	Top-1 acc 56.641 (61.115)	Top-5 acc 82.422 (81.901)	lr 0.01404
Train [56][970/3239]	Time 0.217 (0.632)	Data Time 0.001 (0.038)	Loss 2.6682 (2.6114)	Entropy 1.12356 (1.12501)	Top-1 acc 56.250 (61.113)	Top-5 acc 82.031 (81.910)	lr 0.01403
Train [56][980/3239]	Time 0.223 (0.630)	Data Time 0.001 (0.038)	Loss 2.6641 (2.6119)	Entropy 1.12347 (1.12499)	Top-1 acc 58.203 (61.101)	Top-5 acc 83.984 (81.899)	lr 0.01403
Train [56][990/3239]	Time 0.309 (0.629)	Data Time 0.001 (0.038)	Loss 2.6279 (2.6117)	Entropy 1.12343 (1.12498)	Top-1 acc 63.281 (61.108)	Top-5 acc 82.031 (81.903)	lr 0.01403
Train [56][1000/3239]	Time 2.534 (0.627)	Data Time 0.001 (0.037)	Loss 2.6420 (2.6116)	Entropy 1.12343 (1.12496)	Top-1 acc 60.938 (61.112)	Top-5 acc 80.859 (81.905)	lr 0.01403
Train [56][1010/3239]	Time 0.256 (0.623)	Data Time 0.001 (0.037)	Loss 2.4135 (2.6112)	Entropy 1.12344 (1.12495)	Top-1 acc 67.188 (61.119)	Top-5 acc 86.719 (81.912)	lr 0.01403
Train [56][1020/3239]	Time 0.223 (0.622)	Data Time 0.002 (0.037)	Loss 2.5875 (2.6116)	Entropy 1.12340 (1.12493)	Top-1 acc 61.328 (61.110)	Top-5 acc 83.203 (81.907)	lr 0.01403
Train [56][1030/3239]	Time 0.237 (0.620)	Data Time 0.002 (0.036)	Loss 2.5127 (2.6115)	Entropy 1.12338 (1.12492)	Top-1 acc 65.625 (61.106)	Top-5 acc 81.641 (81.908)	lr 0.01403
Train [56][1040/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.036)	Loss 2.6295 (2.6122)	Entropy 1.12339 (1.12490)	Top-1 acc 58.594 (61.090)	Top-5 acc 81.250 (81.896)	lr 0.01403
Train [56][1050/3239]	Time 0.209 (0.618)	Data Time 0.001 (0.036)	Loss 2.7237 (2.6128)	Entropy 1.12336 (1.12489)	Top-1 acc 58.594 (61.074)	Top-5 acc 82.422 (81.886)	lr 0.01403
Train [56][1060/3239]	Time 0.220 (0.616)	Data Time 0.001 (0.035)	Loss 2.6816 (2.6124)	Entropy 1.12334 (1.12487)	Top-1 acc 59.375 (61.071)	Top-5 acc 79.688 (81.893)	lr 0.01403
Train [56][1070/3239]	Time 0.219 (0.615)	Data Time 0.001 (0.035)	Loss 2.6686 (2.6124)	Entropy 1.12348 (1.12486)	Top-1 acc 62.109 (61.070)	Top-5 acc 80.859 (81.897)	lr 0.01402
Train [56][1080/3239]	Time 0.217 (0.613)	Data Time 0.001 (0.035)	Loss 2.5063 (2.6125)	Entropy 1.12346 (1.12485)	Top-1 acc 60.938 (61.068)	Top-5 acc 85.156 (81.890)	lr 0.01402
Train [56][1090/3239]	Time 0.221 (0.612)	Data Time 0.001 (0.034)	Loss 2.7092 (2.6127)	Entropy 1.12345 (1.12483)	Top-1 acc 58.203 (61.066)	Top-5 acc 80.078 (81.892)	lr 0.01402
Train [56][1100/3239]	Time 0.218 (0.611)	Data Time 0.001 (0.034)	Loss 2.6062 (2.6134)	Entropy 1.12345 (1.12482)	Top-1 acc 61.719 (61.058)	Top-5 acc 82.422 (81.877)	lr 0.01402
Train [56][1110/3239]	Time 2.483 (0.609)	Data Time 0.001 (0.034)	Loss 2.5229 (2.6136)	Entropy 1.12345 (1.12481)	Top-1 acc 62.109 (61.058)	Top-5 acc 83.594 (81.866)	lr 0.01402
Train [56][1120/3239]	Time 0.254 (0.606)	Data Time 0.001 (0.033)	Loss 2.7104 (2.6136)	Entropy 1.12344 (1.12480)	Top-1 acc 56.641 (61.057)	Top-5 acc 82.422 (81.863)	lr 0.01402
Train [56][1130/3239]	Time 0.254 (0.605)	Data Time 0.001 (0.033)	Loss 2.5438 (2.6140)	Entropy 1.12339 (1.12478)	Top-1 acc 60.547 (61.055)	Top-5 acc 82.812 (81.856)	lr 0.01402
Train [56][1140/3239]	Time 0.263 (0.647)	Data Time 0.002 (0.033)	Loss 2.4867 (2.6139)	Entropy 1.12336 (1.12477)	Top-1 acc 62.891 (61.055)	Top-5 acc 85.156 (81.859)	lr 0.01402
Train [56][1150/3239]	Time 0.236 (0.645)	Data Time 0.002 (0.033)	Loss 2.5600 (2.6135)	Entropy 1.12333 (1.12476)	Top-1 acc 60.938 (61.055)	Top-5 acc 81.641 (81.872)	lr 0.01402
Train [56][1160/3239]	Time 0.237 (0.644)	Data Time 0.001 (0.032)	Loss 2.4381 (2.6132)	Entropy 1.12334 (1.12475)	Top-1 acc 66.406 (61.063)	Top-5 acc 85.547 (81.880)	lr 0.01402
Train [56][1170/3239]	Time 0.266 (0.642)	Data Time 0.001 (0.032)	Loss 2.5536 (2.6130)	Entropy 1.12339 (1.12474)	Top-1 acc 62.109 (61.072)	Top-5 acc 81.641 (81.878)	lr 0.01401
Train [56][1180/3239]	Time 0.300 (0.641)	Data Time 0.001 (0.032)	Loss 2.7647 (2.6131)	Entropy 1.12329 (1.12472)	Top-1 acc 56.641 (61.071)	Top-5 acc 77.734 (81.878)	lr 0.01401
Train [56][1190/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.032)	Loss 2.5712 (2.6133)	Entropy 1.12330 (1.12471)	Top-1 acc 62.500 (61.076)	Top-5 acc 82.422 (81.880)	lr 0.01401
Train [56][1200/3239]	Time 0.232 (0.638)	Data Time 0.001 (0.031)	Loss 2.5853 (2.6133)	Entropy 1.12329 (1.12470)	Top-1 acc 59.375 (61.087)	Top-5 acc 81.641 (81.879)	lr 0.01401
Train [56][1210/3239]	Time 0.277 (0.636)	Data Time 0.001 (0.031)	Loss 2.4548 (2.6130)	Entropy 1.12333 (1.12469)	Top-1 acc 64.844 (61.093)	Top-5 acc 83.984 (81.882)	lr 0.01401
Train [56][1220/3239]	Time 2.469 (0.635)	Data Time 0.002 (0.031)	Loss 2.7785 (2.6135)	Entropy 1.12333 (1.12468)	Top-1 acc 57.422 (61.091)	Top-5 acc 79.297 (81.865)	lr 0.01401
Train [56][1230/3239]	Time 0.292 (0.632)	Data Time 0.001 (0.031)	Loss 2.7490 (2.6135)	Entropy 1.12333 (1.12467)	Top-1 acc 61.328 (61.101)	Top-5 acc 79.688 (81.865)	lr 0.01401
Train [56][1240/3239]	Time 0.221 (0.630)	Data Time 0.001 (0.030)	Loss 2.6081 (2.6134)	Entropy 1.12332 (1.12466)	Top-1 acc 59.766 (61.100)	Top-5 acc 81.250 (81.868)	lr 0.01401
Train [56][1250/3239]	Time 0.225 (0.629)	Data Time 0.001 (0.030)	Loss 2.5582 (2.6133)	Entropy 1.12332 (1.12465)	Top-1 acc 64.062 (61.103)	Top-5 acc 81.641 (81.867)	lr 0.01401
Train [56][1260/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.030)	Loss 2.7088 (2.6134)	Entropy 1.12328 (1.12463)	Top-1 acc 60.938 (61.099)	Top-5 acc 80.078 (81.866)	lr 0.01401
Train [56][1270/3239]	Time 0.236 (0.626)	Data Time 0.001 (0.030)	Loss 2.5281 (2.6135)	Entropy 1.12328 (1.12462)	Top-1 acc 61.719 (61.089)	Top-5 acc 85.547 (81.863)	lr 0.01400
Train [56][1280/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.029)	Loss 2.7340 (2.6135)	Entropy 1.12327 (1.12461)	Top-1 acc 57.812 (61.098)	Top-5 acc 78.125 (81.863)	lr 0.01400
Train [56][1290/3239]	Time 0.240 (0.624)	Data Time 0.002 (0.029)	Loss 2.6931 (2.6136)	Entropy 1.12321 (1.12460)	Top-1 acc 58.594 (61.098)	Top-5 acc 78.516 (81.853)	lr 0.01400
Train [56][1300/3239]	Time 0.161 (0.623)	Data Time 0.001 (0.029)	Loss 2.4166 (2.6133)	Entropy 1.12321 (1.12459)	Top-1 acc 64.844 (61.104)	Top-5 acc 87.109 (81.862)	lr 0.01400
Train [56][1310/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.029)	Loss 2.5814 (2.6131)	Entropy 1.12319 (1.12458)	Top-1 acc 60.938 (61.118)	Top-5 acc 82.812 (81.868)	lr 0.01400
Train [56][1320/3239]	Time 0.220 (0.620)	Data Time 0.001 (0.029)	Loss 2.7011 (2.6136)	Entropy 1.12321 (1.12457)	Top-1 acc 57.422 (61.097)	Top-5 acc 79.688 (81.858)	lr 0.01400
Train [56][1330/3239]	Time 2.445 (0.619)	Data Time 0.001 (0.028)	Loss 2.6481 (2.6136)	Entropy 1.12321 (1.12456)	Top-1 acc 60.156 (61.099)	Top-5 acc 79.688 (81.858)	lr 0.01400
Train [56][1340/3239]	Time 0.283 (0.616)	Data Time 0.003 (0.028)	Loss 2.4959 (2.6136)	Entropy 1.12319 (1.12455)	Top-1 acc 62.500 (61.095)	Top-5 acc 83.594 (81.860)	lr 0.01400
Train [56][1350/3239]	Time 0.235 (0.615)	Data Time 0.001 (0.028)	Loss 2.5693 (2.6138)	Entropy 1.12309 (1.12454)	Top-1 acc 64.062 (61.090)	Top-5 acc 80.469 (81.856)	lr 0.01400
Train [56][1360/3239]	Time 0.232 (0.614)	Data Time 0.001 (0.028)	Loss 2.5275 (2.6137)	Entropy 1.12310 (1.12453)	Top-1 acc 62.109 (61.084)	Top-5 acc 85.938 (81.858)	lr 0.01400
Train [56][1370/3239]	Time 0.214 (0.613)	Data Time 0.001 (0.028)	Loss 2.6865 (2.6139)	Entropy 1.12301 (1.12452)	Top-1 acc 60.547 (61.077)	Top-5 acc 82.031 (81.857)	lr 0.01399
Train [56][1380/3239]	Time 0.230 (0.612)	Data Time 0.001 (0.027)	Loss 2.5410 (2.6138)	Entropy 1.12299 (1.12451)	Top-1 acc 62.109 (61.076)	Top-5 acc 82.031 (81.858)	lr 0.01399
Train [56][1390/3239]	Time 0.214 (0.611)	Data Time 0.001 (0.027)	Loss 2.8928 (2.6136)	Entropy 1.12297 (1.12450)	Top-1 acc 51.953 (61.085)	Top-5 acc 75.391 (81.859)	lr 0.01399
Train [56][1400/3239]	Time 0.212 (0.609)	Data Time 0.001 (0.027)	Loss 2.4915 (2.6139)	Entropy 1.12300 (1.12449)	Top-1 acc 65.234 (61.088)	Top-5 acc 85.156 (81.853)	lr 0.01399
Train [56][1410/3239]	Time 0.216 (0.608)	Data Time 0.001 (0.027)	Loss 2.5297 (2.6135)	Entropy 1.12297 (1.12447)	Top-1 acc 60.547 (61.094)	Top-5 acc 85.156 (81.864)	lr 0.01399
Train [56][1420/3239]	Time 0.230 (0.607)	Data Time 0.001 (0.027)	Loss 2.8879 (2.6134)	Entropy 1.12295 (1.12446)	Top-1 acc 54.688 (61.094)	Top-5 acc 76.562 (81.864)	lr 0.01399
Train [56][1430/3239]	Time 0.252 (0.606)	Data Time 0.001 (0.027)	Loss 2.5586 (2.6136)	Entropy 1.12288 (1.12445)	Top-1 acc 61.328 (61.087)	Top-5 acc 82.422 (81.860)	lr 0.01399
Train [56][1440/3239]	Time 2.420 (0.605)	Data Time 0.001 (0.026)	Loss 2.7950 (2.6136)	Entropy 1.12288 (1.12444)	Top-1 acc 55.859 (61.085)	Top-5 acc 77.734 (81.862)	lr 0.01399
Train [56][1450/3239]	Time 0.219 (0.603)	Data Time 0.001 (0.026)	Loss 2.6118 (2.6138)	Entropy 1.12278 (1.12443)	Top-1 acc 58.984 (61.081)	Top-5 acc 82.812 (81.857)	lr 0.01399
Train [56][1460/3239]	Time 0.234 (0.602)	Data Time 0.001 (0.026)	Loss 2.6939 (2.6139)	Entropy 1.12276 (1.12442)	Top-1 acc 62.891 (61.075)	Top-5 acc 79.688 (81.855)	lr 0.01399
Train [56][1470/3239]	Time 0.238 (0.601)	Data Time 0.001 (0.026)	Loss 2.7527 (2.6143)	Entropy 1.12274 (1.12441)	Top-1 acc 57.812 (61.064)	Top-5 acc 77.344 (81.846)	lr 0.01398
Train [56][1480/3239]	Time 0.360 (0.600)	Data Time 0.001 (0.026)	Loss 2.7316 (2.6148)	Entropy 1.12272 (1.12440)	Top-1 acc 59.375 (61.055)	Top-5 acc 79.297 (81.838)	lr 0.01398
Train [56][1490/3239]	Time 0.203 (0.599)	Data Time 0.001 (0.026)	Loss 2.4450 (2.6145)	Entropy 1.12270 (1.12439)	Top-1 acc 69.141 (61.070)	Top-5 acc 83.984 (81.846)	lr 0.01398
Train [56][1500/3239]	Time 0.239 (0.632)	Data Time 0.002 (0.025)	Loss 2.8065 (2.6149)	Entropy 1.12272 (1.12437)	Top-1 acc 54.297 (61.057)	Top-5 acc 77.344 (81.830)	lr 0.01398
Train [56][1510/3239]	Time 0.237 (0.631)	Data Time 0.002 (0.025)	Loss 2.7749 (2.6152)	Entropy 1.12270 (1.12436)	Top-1 acc 57.031 (61.050)	Top-5 acc 78.906 (81.828)	lr 0.01398
Train [56][1520/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.025)	Loss 2.6283 (2.6158)	Entropy 1.12268 (1.12435)	Top-1 acc 60.156 (61.031)	Top-5 acc 82.422 (81.816)	lr 0.01398
Train [56][1530/3239]	Time 0.259 (0.629)	Data Time 0.001 (0.025)	Loss 2.6009 (2.6155)	Entropy 1.12268 (1.12434)	Top-1 acc 62.109 (61.035)	Top-5 acc 82.031 (81.823)	lr 0.01398
Train [56][1540/3239]	Time 0.236 (0.628)	Data Time 0.001 (0.025)	Loss 2.6069 (2.6156)	Entropy 1.12266 (1.12433)	Top-1 acc 61.328 (61.036)	Top-5 acc 80.859 (81.819)	lr 0.01398
Train [56][1550/3239]	Time 2.429 (0.627)	Data Time 0.001 (0.025)	Loss 2.7658 (2.6154)	Entropy 1.12266 (1.12432)	Top-1 acc 59.375 (61.036)	Top-5 acc 78.125 (81.819)	lr 0.01398
Train [56][1560/3239]	Time 0.258 (0.625)	Data Time 0.001 (0.025)	Loss 2.4738 (2.6153)	Entropy 1.12264 (1.12431)	Top-1 acc 66.406 (61.041)	Top-5 acc 84.766 (81.819)	lr 0.01398
Train [56][1570/3239]	Time 0.248 (0.624)	Data Time 0.001 (0.024)	Loss 2.6887 (2.6153)	Entropy 1.12263 (1.12430)	Top-1 acc 54.688 (61.042)	Top-5 acc 82.031 (81.819)	lr 0.01397
Train [56][1580/3239]	Time 0.216 (0.623)	Data Time 0.001 (0.024)	Loss 2.6743 (2.6152)	Entropy 1.12260 (1.12429)	Top-1 acc 60.938 (61.047)	Top-5 acc 83.594 (81.820)	lr 0.01397
Train [56][1590/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.024)	Loss 2.6773 (2.6152)	Entropy 1.12260 (1.12428)	Top-1 acc 56.250 (61.052)	Top-5 acc 79.688 (81.821)	lr 0.01397
Train [56][1600/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.024)	Loss 2.6281 (2.6152)	Entropy 1.12258 (1.12427)	Top-1 acc 59.375 (61.049)	Top-5 acc 80.859 (81.823)	lr 0.01397
Train [56][1610/3239]	Time 0.251 (0.620)	Data Time 0.002 (0.024)	Loss 2.5164 (2.6155)	Entropy 1.12254 (1.12426)	Top-1 acc 58.594 (61.039)	Top-5 acc 84.375 (81.820)	lr 0.01397
Train [56][1620/3239]	Time 0.339 (0.619)	Data Time 0.001 (0.024)	Loss 2.7212 (2.6159)	Entropy 1.12252 (1.12425)	Top-1 acc 58.594 (61.033)	Top-5 acc 81.641 (81.812)	lr 0.01397
Train [56][1630/3239]	Time 0.226 (0.618)	Data Time 0.001 (0.024)	Loss 2.5754 (2.6158)	Entropy 1.12251 (1.12424)	Top-1 acc 62.500 (61.035)	Top-5 acc 84.375 (81.817)	lr 0.01397
Train [56][1640/3239]	Time 0.224 (0.617)	Data Time 0.001 (0.023)	Loss 2.5885 (2.6160)	Entropy 1.12248 (1.12422)	Top-1 acc 57.422 (61.026)	Top-5 acc 85.156 (81.817)	lr 0.01397
Train [56][1650/3239]	Time 0.217 (0.616)	Data Time 0.001 (0.023)	Loss 2.6868 (2.6162)	Entropy 1.12244 (1.12421)	Top-1 acc 57.812 (61.018)	Top-5 acc 81.641 (81.814)	lr 0.01397
Train [56][1660/3239]	Time 2.534 (0.615)	Data Time 0.001 (0.023)	Loss 2.5918 (2.6162)	Entropy 1.12244 (1.12420)	Top-1 acc 61.719 (61.018)	Top-5 acc 82.031 (81.816)	lr 0.01397
Train [56][1670/3239]	Time 0.358 (0.613)	Data Time 0.002 (0.023)	Loss 2.3993 (2.6161)	Entropy 1.12250 (1.12419)	Top-1 acc 66.016 (61.022)	Top-5 acc 86.328 (81.818)	lr 0.01396
Train [56][1680/3239]	Time 0.244 (0.612)	Data Time 0.002 (0.023)	Loss 2.6794 (2.6159)	Entropy 1.12249 (1.12418)	Top-1 acc 59.766 (61.026)	Top-5 acc 80.078 (81.824)	lr 0.01396
Train [56][1690/3239]	Time 0.216 (0.611)	Data Time 0.001 (0.023)	Loss 2.3511 (2.6163)	Entropy 1.12247 (1.12417)	Top-1 acc 66.406 (61.016)	Top-5 acc 87.891 (81.816)	lr 0.01396
Train [56][1700/3239]	Time 0.242 (0.610)	Data Time 0.001 (0.023)	Loss 2.4675 (2.6160)	Entropy 1.12245 (1.12416)	Top-1 acc 63.672 (61.028)	Top-5 acc 84.375 (81.818)	lr 0.01396
Train [56][1710/3239]	Time 0.229 (0.610)	Data Time 0.001 (0.023)	Loss 2.5727 (2.6162)	Entropy 1.12246 (1.12415)	Top-1 acc 62.891 (61.023)	Top-5 acc 81.641 (81.814)	lr 0.01396
Train [56][1720/3239]	Time 0.216 (0.609)	Data Time 0.001 (0.022)	Loss 2.6949 (2.6162)	Entropy 1.12245 (1.12414)	Top-1 acc 60.156 (61.024)	Top-5 acc 78.906 (81.814)	lr 0.01396
Train [56][1730/3239]	Time 0.200 (0.608)	Data Time 0.001 (0.022)	Loss 2.6960 (2.6163)	Entropy 1.12239 (1.12413)	Top-1 acc 58.594 (61.021)	Top-5 acc 81.250 (81.814)	lr 0.01396
Train [56][1740/3239]	Time 0.220 (0.607)	Data Time 0.001 (0.022)	Loss 2.4681 (2.6158)	Entropy 1.12239 (1.12412)	Top-1 acc 64.844 (61.033)	Top-5 acc 86.719 (81.826)	lr 0.01396
Train [56][1750/3239]	Time 0.203 (0.606)	Data Time 0.001 (0.022)	Loss 2.5500 (2.6161)	Entropy 1.12235 (1.12411)	Top-1 acc 61.328 (61.024)	Top-5 acc 83.203 (81.822)	lr 0.01396
Train [56][1760/3239]	Time 0.352 (0.605)	Data Time 0.001 (0.022)	Loss 2.5021 (2.6156)	Entropy 1.12233 (1.12410)	Top-1 acc 61.328 (61.027)	Top-5 acc 82.422 (81.831)	lr 0.01396
Train [56][1770/3239]	Time 2.405 (0.604)	Data Time 0.001 (0.022)	Loss 2.8816 (2.6166)	Entropy 1.12233 (1.12409)	Top-1 acc 53.906 (61.004)	Top-5 acc 76.562 (81.810)	lr 0.01395
Train [56][1780/3239]	Time 0.234 (0.602)	Data Time 0.001 (0.022)	Loss 2.6923 (2.6168)	Entropy 1.12231 (1.12408)	Top-1 acc 58.984 (61.002)	Top-5 acc 78.906 (81.807)	lr 0.01395
Train [56][1790/3239]	Time 0.220 (0.602)	Data Time 0.001 (0.022)	Loss 2.6377 (2.6169)	Entropy 1.12229 (1.12407)	Top-1 acc 60.938 (60.995)	Top-5 acc 81.641 (81.811)	lr 0.01395
Train [56][1800/3239]	Time 0.267 (0.601)	Data Time 0.001 (0.021)	Loss 2.6958 (2.6173)	Entropy 1.12232 (1.12406)	Top-1 acc 57.422 (60.981)	Top-5 acc 79.297 (81.802)	lr 0.01395
Train [56][1810/3239]	Time 0.346 (0.600)	Data Time 0.001 (0.021)	Loss 2.7084 (2.6176)	Entropy 1.12231 (1.12405)	Top-1 acc 57.812 (60.975)	Top-5 acc 78.906 (81.796)	lr 0.01395
Train [56][1820/3239]	Time 0.238 (0.600)	Data Time 0.002 (0.021)	Loss 2.8220 (2.6179)	Entropy 1.12235 (1.12404)	Top-1 acc 57.031 (60.974)	Top-5 acc 79.297 (81.786)	lr 0.01395
Train [56][1830/3239]	Time 0.238 (0.599)	Data Time 0.001 (0.021)	Loss 2.5866 (2.6184)	Entropy 1.12228 (1.12403)	Top-1 acc 63.672 (60.965)	Top-5 acc 82.422 (81.782)	lr 0.01395
Train [56][1840/3239]	Time 0.214 (0.598)	Data Time 0.001 (0.021)	Loss 2.6837 (2.6189)	Entropy 1.12227 (1.12403)	Top-1 acc 61.328 (60.952)	Top-5 acc 80.078 (81.773)	lr 0.01395
Train [56][1850/3239]	Time 0.222 (0.597)	Data Time 0.001 (0.021)	Loss 2.5497 (2.6189)	Entropy 1.12219 (1.12402)	Top-1 acc 60.938 (60.946)	Top-5 acc 84.766 (81.768)	lr 0.01395
Train [56][1860/3239]	Time 0.263 (0.623)	Data Time 0.004 (0.021)	Loss 2.6346 (2.6190)	Entropy 1.12220 (1.12401)	Top-1 acc 60.938 (60.943)	Top-5 acc 80.078 (81.763)	lr 0.01395
Train [56][1870/3239]	Time 0.293 (0.622)	Data Time 0.002 (0.021)	Loss 2.4858 (2.6187)	Entropy 1.12212 (1.12400)	Top-1 acc 64.062 (60.948)	Top-5 acc 85.547 (81.766)	lr 0.01394
Train [56][1880/3239]	Time 2.618 (0.621)	Data Time 0.002 (0.021)	Loss 2.7102 (2.6186)	Entropy 1.12212 (1.12399)	Top-1 acc 57.031 (60.956)	Top-5 acc 81.250 (81.768)	lr 0.01394
Train [56][1890/3239]	Time 0.236 (0.619)	Data Time 0.001 (0.021)	Loss 2.5316 (2.6187)	Entropy 1.12208 (1.12398)	Top-1 acc 63.281 (60.950)	Top-5 acc 83.594 (81.766)	lr 0.01394
Train [56][1900/3239]	Time 0.247 (0.618)	Data Time 0.002 (0.020)	Loss 2.6527 (2.6186)	Entropy 1.12201 (1.12397)	Top-1 acc 58.203 (60.954)	Top-5 acc 81.641 (81.767)	lr 0.01394
Train [56][1910/3239]	Time 0.237 (0.618)	Data Time 0.002 (0.020)	Loss 2.4550 (2.6186)	Entropy 1.12200 (1.12396)	Top-1 acc 67.188 (60.952)	Top-5 acc 85.547 (81.771)	lr 0.01394
Train [56][1920/3239]	Time 0.223 (0.617)	Data Time 0.001 (0.020)	Loss 2.5737 (2.6185)	Entropy 1.12200 (1.12395)	Top-1 acc 59.375 (60.948)	Top-5 acc 82.422 (81.774)	lr 0.01394
Train [56][1930/3239]	Time 0.235 (0.616)	Data Time 0.002 (0.020)	Loss 2.6214 (2.6185)	Entropy 1.12199 (1.12394)	Top-1 acc 62.109 (60.944)	Top-5 acc 79.688 (81.774)	lr 0.01394
Train [56][1940/3239]	Time 0.239 (0.615)	Data Time 0.001 (0.020)	Loss 2.5556 (2.6182)	Entropy 1.12196 (1.12393)	Top-1 acc 62.500 (60.948)	Top-5 acc 82.812 (81.780)	lr 0.01394
Train [56][1950/3239]	Time 0.369 (0.615)	Data Time 0.001 (0.020)	Loss 2.7168 (2.6182)	Entropy 1.12191 (1.12391)	Top-1 acc 58.594 (60.946)	Top-5 acc 78.125 (81.777)	lr 0.01394
Train [56][1960/3239]	Time 0.231 (0.614)	Data Time 0.002 (0.020)	Loss 2.6494 (2.6182)	Entropy 1.12192 (1.12390)	Top-1 acc 58.203 (60.942)	Top-5 acc 82.031 (81.781)	lr 0.01394
Train [56][1970/3239]	Time 0.218 (0.613)	Data Time 0.001 (0.020)	Loss 2.7072 (2.6183)	Entropy 1.12189 (1.12389)	Top-1 acc 60.938 (60.944)	Top-5 acc 79.688 (81.783)	lr 0.01393
Train [56][1980/3239]	Time 0.233 (0.613)	Data Time 0.001 (0.020)	Loss 2.5044 (2.6182)	Entropy 1.12183 (1.12388)	Top-1 acc 66.016 (60.946)	Top-5 acc 83.203 (81.784)	lr 0.01393
Train [56][1990/3239]	Time 2.435 (0.612)	Data Time 0.001 (0.020)	Loss 2.5370 (2.6180)	Entropy 1.12183 (1.12387)	Top-1 acc 58.594 (60.945)	Top-5 acc 85.547 (81.790)	lr 0.01393
Train [56][2000/3239]	Time 0.235 (0.610)	Data Time 0.001 (0.020)	Loss 2.6789 (2.6182)	Entropy 1.12177 (1.12386)	Top-1 acc 60.547 (60.938)	Top-5 acc 76.953 (81.783)	lr 0.01393
Train [56][2010/3239]	Time 0.213 (0.609)	Data Time 0.001 (0.019)	Loss 2.7649 (2.6185)	Entropy 1.12177 (1.12385)	Top-1 acc 58.594 (60.935)	Top-5 acc 78.906 (81.777)	lr 0.01393
Train [56][2020/3239]	Time 0.240 (0.609)	Data Time 0.002 (0.019)	Loss 2.5797 (2.6183)	Entropy 1.12173 (1.12384)	Top-1 acc 61.719 (60.936)	Top-5 acc 78.125 (81.783)	lr 0.01393
Train [56][2030/3239]	Time 0.244 (0.608)	Data Time 0.001 (0.019)	Loss 2.7794 (2.6183)	Entropy 1.12168 (1.12383)	Top-1 acc 56.250 (60.934)	Top-5 acc 78.906 (81.785)	lr 0.01393
Train [56][2040/3239]	Time 0.222 (0.607)	Data Time 0.001 (0.019)	Loss 2.6604 (2.6183)	Entropy 1.12163 (1.12382)	Top-1 acc 58.984 (60.935)	Top-5 acc 80.078 (81.785)	lr 0.01393
Train [56][2050/3239]	Time 0.207 (0.607)	Data Time 0.001 (0.019)	Loss 2.5814 (2.6183)	Entropy 1.12162 (1.12381)	Top-1 acc 64.844 (60.937)	Top-5 acc 82.422 (81.784)	lr 0.01393
Train [56][2060/3239]	Time 0.234 (0.606)	Data Time 0.001 (0.019)	Loss 2.6426 (2.6185)	Entropy 1.12157 (1.12380)	Top-1 acc 60.547 (60.935)	Top-5 acc 81.250 (81.778)	lr 0.01393
Train [56][2070/3239]	Time 0.237 (0.605)	Data Time 0.001 (0.019)	Loss 2.6618 (2.6186)	Entropy 1.12155 (1.12379)	Top-1 acc 60.547 (60.934)	Top-5 acc 80.078 (81.778)	lr 0.01392
Train [56][2080/3239]	Time 0.220 (0.605)	Data Time 0.001 (0.019)	Loss 2.7616 (2.6188)	Entropy 1.12152 (1.12378)	Top-1 acc 58.984 (60.931)	Top-5 acc 79.688 (81.775)	lr 0.01392
Train [56][2090/3239]	Time 0.246 (0.604)	Data Time 0.001 (0.019)	Loss 2.5398 (2.6187)	Entropy 1.12148 (1.12377)	Top-1 acc 61.719 (60.931)	Top-5 acc 80.859 (81.775)	lr 0.01392
Train [56][2100/3239]	Time 2.513 (0.603)	Data Time 0.002 (0.019)	Loss 2.4566 (2.6187)	Entropy 1.12148 (1.12376)	Top-1 acc 64.844 (60.930)	Top-5 acc 85.547 (81.769)	lr 0.01392
Train [56][2110/3239]	Time 0.229 (0.602)	Data Time 0.001 (0.019)	Loss 2.4521 (2.6186)	Entropy 1.12149 (1.12375)	Top-1 acc 67.188 (60.934)	Top-5 acc 87.109 (81.770)	lr 0.01392
Train [56][2120/3239]	Time 0.217 (0.601)	Data Time 0.001 (0.018)	Loss 2.6501 (2.6188)	Entropy 1.12152 (1.12374)	Top-1 acc 58.203 (60.927)	Top-5 acc 79.297 (81.759)	lr 0.01392
Train [56][2130/3239]	Time 0.237 (0.600)	Data Time 0.001 (0.018)	Loss 2.4486 (2.6186)	Entropy 1.12152 (1.12373)	Top-1 acc 66.016 (60.934)	Top-5 acc 87.500 (81.768)	lr 0.01392
Train [56][2140/3239]	Time 0.310 (0.600)	Data Time 0.001 (0.018)	Loss 2.6134 (2.6186)	Entropy 1.12148 (1.12371)	Top-1 acc 61.719 (60.933)	Top-5 acc 80.859 (81.765)	lr 0.01392
Train [56][2150/3239]	Time 0.218 (0.599)	Data Time 0.001 (0.018)	Loss 2.5847 (2.6185)	Entropy 1.12143 (1.12370)	Top-1 acc 61.719 (60.937)	Top-5 acc 82.031 (81.764)	lr 0.01392
Train [56][2160/3239]	Time 0.243 (0.598)	Data Time 0.001 (0.018)	Loss 2.8308 (2.6187)	Entropy 1.12143 (1.12369)	Top-1 acc 58.203 (60.927)	Top-5 acc 79.297 (81.761)	lr 0.01392
Train [56][2170/3239]	Time 0.224 (0.598)	Data Time 0.001 (0.018)	Loss 2.4996 (2.6188)	Entropy 1.12139 (1.12368)	Top-1 acc 62.109 (60.925)	Top-5 acc 85.938 (81.761)	lr 0.01391
Train [56][2180/3239]	Time 0.210 (0.597)	Data Time 0.001 (0.018)	Loss 2.6587 (2.6189)	Entropy 1.12140 (1.12367)	Top-1 acc 60.156 (60.923)	Top-5 acc 80.469 (81.756)	lr 0.01391
Train [56][2190/3239]	Time 0.343 (0.597)	Data Time 0.001 (0.018)	Loss 2.6749 (2.6190)	Entropy 1.12137 (1.12366)	Top-1 acc 60.547 (60.919)	Top-5 acc 80.078 (81.754)	lr 0.01391
Train [56][2200/3239]	Time 0.254 (0.596)	Data Time 0.001 (0.018)	Loss 2.6185 (2.6192)	Entropy 1.12136 (1.12365)	Top-1 acc 60.547 (60.917)	Top-5 acc 83.203 (81.751)	lr 0.01391
Train [56][2210/3239]	Time 2.639 (0.595)	Data Time 0.001 (0.018)	Loss 2.5429 (2.6189)	Entropy 1.12136 (1.12364)	Top-1 acc 61.719 (60.926)	Top-5 acc 81.250 (81.756)	lr 0.01391
Train [56][2220/3239]	Time 0.271 (0.594)	Data Time 0.001 (0.018)	Loss 2.7297 (2.6189)	Entropy 1.12138 (1.12363)	Top-1 acc 58.594 (60.930)	Top-5 acc 79.688 (81.758)	lr 0.01391
Train [56][2230/3239]	Time 0.286 (0.614)	Data Time 0.002 (0.018)	Loss 2.6696 (2.6192)	Entropy 1.12134 (1.12362)	Top-1 acc 61.328 (60.922)	Top-5 acc 80.469 (81.749)	lr 0.01391
Train [56][2240/3239]	Time 0.229 (0.614)	Data Time 0.002 (0.018)	Loss 2.5827 (2.6194)	Entropy 1.12137 (1.12361)	Top-1 acc 61.719 (60.918)	Top-5 acc 82.031 (81.745)	lr 0.01391
Train [56][2250/3239]	Time 0.228 (0.613)	Data Time 0.001 (0.018)	Loss 2.7048 (2.6193)	Entropy 1.12136 (1.12360)	Top-1 acc 55.469 (60.919)	Top-5 acc 79.688 (81.750)	lr 0.01391
Train [56][2260/3239]	Time 0.226 (0.612)	Data Time 0.001 (0.017)	Loss 2.4801 (2.6192)	Entropy 1.12137 (1.12359)	Top-1 acc 64.453 (60.918)	Top-5 acc 81.641 (81.748)	lr 0.01391
Train [56][2270/3239]	Time 0.243 (0.612)	Data Time 0.001 (0.017)	Loss 2.6497 (2.6191)	Entropy 1.12134 (1.12358)	Top-1 acc 60.547 (60.921)	Top-5 acc 79.688 (81.748)	lr 0.01390
Train [56][2280/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.017)	Loss 2.6338 (2.6192)	Entropy 1.12128 (1.12357)	Top-1 acc 58.594 (60.918)	Top-5 acc 80.859 (81.746)	lr 0.01390
Train [56][2290/3239]	Time 0.223 (0.610)	Data Time 0.001 (0.017)	Loss 2.6850 (2.6191)	Entropy 1.12128 (1.12356)	Top-1 acc 60.156 (60.918)	Top-5 acc 76.562 (81.747)	lr 0.01390
Train [56][2300/3239]	Time 0.218 (0.610)	Data Time 0.001 (0.017)	Loss 2.5843 (2.6190)	Entropy 1.12123 (1.12355)	Top-1 acc 60.938 (60.921)	Top-5 acc 84.766 (81.748)	lr 0.01390
Train [56][2310/3239]	Time 0.264 (0.609)	Data Time 0.001 (0.017)	Loss 2.7047 (2.6192)	Entropy 1.12121 (1.12354)	Top-1 acc 60.547 (60.916)	Top-5 acc 80.469 (81.745)	lr 0.01390
Train [56][2320/3239]	Time 2.494 (0.609)	Data Time 0.001 (0.017)	Loss 2.5529 (2.6191)	Entropy 1.12121 (1.12353)	Top-1 acc 61.719 (60.917)	Top-5 acc 83.984 (81.742)	lr 0.01390
Train [56][2330/3239]	Time 0.284 (0.607)	Data Time 0.001 (0.017)	Loss 2.6883 (2.6191)	Entropy 1.12120 (1.12352)	Top-1 acc 57.422 (60.914)	Top-5 acc 78.906 (81.745)	lr 0.01390
Train [56][2340/3239]	Time 0.241 (0.606)	Data Time 0.001 (0.017)	Loss 2.7445 (2.6194)	Entropy 1.12118 (1.12351)	Top-1 acc 56.250 (60.912)	Top-5 acc 80.078 (81.740)	lr 0.01390
Train [56][2350/3239]	Time 0.232 (0.606)	Data Time 0.001 (0.017)	Loss 2.7234 (2.6194)	Entropy 1.12117 (1.12350)	Top-1 acc 55.859 (60.907)	Top-5 acc 78.906 (81.739)	lr 0.01390
Train [56][2360/3239]	Time 0.226 (0.605)	Data Time 0.001 (0.017)	Loss 2.6106 (2.6193)	Entropy 1.12116 (1.12349)	Top-1 acc 58.984 (60.909)	Top-5 acc 84.375 (81.744)	lr 0.01389
Train [56][2370/3239]	Time 0.254 (0.605)	Data Time 0.001 (0.017)	Loss 2.7643 (2.6193)	Entropy 1.12109 (1.12348)	Top-1 acc 54.688 (60.908)	Top-5 acc 79.688 (81.742)	lr 0.01389
Train [56][2380/3239]	Time 0.305 (0.604)	Data Time 0.001 (0.017)	Loss 2.7388 (2.6194)	Entropy 1.12100 (1.12347)	Top-1 acc 54.688 (60.906)	Top-5 acc 81.641 (81.742)	lr 0.01389
Train [56][2390/3239]	Time 0.229 (0.603)	Data Time 0.001 (0.017)	Loss 2.6047 (2.6193)	Entropy 1.12096 (1.12346)	Top-1 acc 62.109 (60.913)	Top-5 acc 81.250 (81.745)	lr 0.01389
Train [56][2400/3239]	Time 0.218 (0.603)	Data Time 0.001 (0.017)	Loss 2.4213 (2.6193)	Entropy 1.12092 (1.12345)	Top-1 acc 66.016 (60.909)	Top-5 acc 83.984 (81.739)	lr 0.01389
Train [56][2410/3239]	Time 0.204 (0.602)	Data Time 0.001 (0.016)	Loss 2.5431 (2.6195)	Entropy 1.12090 (1.12344)	Top-1 acc 60.938 (60.904)	Top-5 acc 85.547 (81.739)	lr 0.01389
Train [56][2420/3239]	Time 0.236 (0.602)	Data Time 0.002 (0.016)	Loss 2.5120 (2.6191)	Entropy 1.12071 (1.12343)	Top-1 acc 61.719 (60.913)	Top-5 acc 85.156 (81.746)	lr 0.01389
Train [56][2430/3239]	Time 2.547 (0.601)	Data Time 0.001 (0.016)	Loss 2.8798 (2.6192)	Entropy 1.12071 (1.12342)	Top-1 acc 50.391 (60.912)	Top-5 acc 78.516 (81.744)	lr 0.01389
Train [56][2440/3239]	Time 0.211 (0.600)	Data Time 0.001 (0.016)	Loss 2.6166 (2.6191)	Entropy 1.12069 (1.12341)	Top-1 acc 60.938 (60.916)	Top-5 acc 80.078 (81.744)	lr 0.01389
Train [56][2450/3239]	Time 0.230 (0.599)	Data Time 0.001 (0.016)	Loss 2.5744 (2.6191)	Entropy 1.12066 (1.12340)	Top-1 acc 59.375 (60.919)	Top-5 acc 85.156 (81.747)	lr 0.01389
Train [56][2460/3239]	Time 0.241 (0.599)	Data Time 0.001 (0.016)	Loss 2.5462 (2.6190)	Entropy 1.12068 (1.12338)	Top-1 acc 58.594 (60.920)	Top-5 acc 84.766 (81.746)	lr 0.01388
Train [56][2470/3239]	Time 0.231 (0.598)	Data Time 0.001 (0.016)	Loss 2.6060 (2.6188)	Entropy 1.12065 (1.12337)	Top-1 acc 62.891 (60.926)	Top-5 acc 80.078 (81.744)	lr 0.01388
Train [56][2480/3239]	Time 0.229 (0.597)	Data Time 0.001 (0.016)	Loss 2.7197 (2.6189)	Entropy 1.12056 (1.12336)	Top-1 acc 58.203 (60.927)	Top-5 acc 81.250 (81.743)	lr 0.01388
Train [56][2490/3239]	Time 0.228 (0.597)	Data Time 0.001 (0.016)	Loss 2.7216 (2.6189)	Entropy 1.12056 (1.12335)	Top-1 acc 58.203 (60.930)	Top-5 acc 78.906 (81.743)	lr 0.01388
Train [56][2500/3239]	Time 0.233 (0.596)	Data Time 0.001 (0.016)	Loss 2.5755 (2.6187)	Entropy 1.12058 (1.12334)	Top-1 acc 58.203 (60.934)	Top-5 acc 82.812 (81.746)	lr 0.01388
Train [56][2510/3239]	Time 0.224 (0.596)	Data Time 0.001 (0.016)	Loss 2.7355 (2.6186)	Entropy 1.12058 (1.12333)	Top-1 acc 57.812 (60.932)	Top-5 acc 78.516 (81.747)	lr 0.01388
Train [56][2520/3239]	Time 0.253 (0.595)	Data Time 0.001 (0.016)	Loss 2.5810 (2.6186)	Entropy 1.12057 (1.12332)	Top-1 acc 59.375 (60.930)	Top-5 acc 82.812 (81.748)	lr 0.01388
Train [56][2530/3239]	Time 0.230 (0.595)	Data Time 0.001 (0.016)	Loss 2.5323 (2.6187)	Entropy 1.12056 (1.12331)	Top-1 acc 64.844 (60.930)	Top-5 acc 82.422 (81.746)	lr 0.01388
Train [56][2540/3239]	Time 2.453 (0.594)	Data Time 0.001 (0.016)	Loss 2.6137 (2.6189)	Entropy 1.12056 (1.12330)	Top-1 acc 63.281 (60.924)	Top-5 acc 81.641 (81.744)	lr 0.01388
Train [56][2550/3239]	Time 0.218 (0.593)	Data Time 0.001 (0.016)	Loss 2.4949 (2.6189)	Entropy 1.12053 (1.12329)	Top-1 acc 60.938 (60.926)	Top-5 acc 83.984 (81.740)	lr 0.01388
Train [56][2560/3239]	Time 0.225 (0.592)	Data Time 0.002 (0.016)	Loss 2.6148 (2.6188)	Entropy 1.12051 (1.12327)	Top-1 acc 64.062 (60.932)	Top-5 acc 80.859 (81.740)	lr 0.01387
Train [56][2570/3239]	Time 0.337 (0.592)	Data Time 0.001 (0.016)	Loss 2.6523 (2.6189)	Entropy 1.12054 (1.12326)	Top-1 acc 60.156 (60.929)	Top-5 acc 83.984 (81.741)	lr 0.01387
Train [56][2580/3239]	Time 0.221 (0.591)	Data Time 0.001 (0.015)	Loss 2.7477 (2.6188)	Entropy 1.12051 (1.12325)	Top-1 acc 56.250 (60.929)	Top-5 acc 78.906 (81.744)	lr 0.01387
Train [56][2590/3239]	Time 0.245 (0.610)	Data Time 0.003 (0.015)	Loss 2.6803 (2.6187)	Entropy 1.12049 (1.12324)	Top-1 acc 57.812 (60.932)	Top-5 acc 81.250 (81.746)	lr 0.01387
Train [56][2600/3239]	Time 0.216 (0.609)	Data Time 0.002 (0.015)	Loss 2.6803 (2.6186)	Entropy 1.12045 (1.12323)	Top-1 acc 60.938 (60.935)	Top-5 acc 77.734 (81.747)	lr 0.01387
Train [56][2610/3239]	Time 0.213 (0.609)	Data Time 0.001 (0.015)	Loss 2.7665 (2.6187)	Entropy 1.12042 (1.12322)	Top-1 acc 60.938 (60.933)	Top-5 acc 78.906 (81.747)	lr 0.01387
Train [56][2620/3239]	Time 0.318 (0.608)	Data Time 0.001 (0.015)	Loss 2.5201 (2.6187)	Entropy 1.12043 (1.12321)	Top-1 acc 63.672 (60.931)	Top-5 acc 83.984 (81.745)	lr 0.01387
Train [56][2630/3239]	Time 0.216 (0.608)	Data Time 0.001 (0.015)	Loss 2.5324 (2.6188)	Entropy 1.12041 (1.12320)	Top-1 acc 63.672 (60.928)	Top-5 acc 82.812 (81.743)	lr 0.01387
Train [56][2640/3239]	Time 0.224 (0.607)	Data Time 0.001 (0.015)	Loss 2.6605 (2.6190)	Entropy 1.12042 (1.12319)	Top-1 acc 57.031 (60.923)	Top-5 acc 80.469 (81.737)	lr 0.01387
Train [56][2650/3239]	Time 0.265 (0.606)	Data Time 0.001 (0.015)	Loss 2.6314 (2.6190)	Entropy 1.12033 (1.12318)	Top-1 acc 62.109 (60.925)	Top-5 acc 81.641 (81.739)	lr 0.01387
Train [56][2660/3239]	Time 0.256 (0.606)	Data Time 0.002 (0.015)	Loss 2.6999 (2.6190)	Entropy 1.12034 (1.12317)	Top-1 acc 58.203 (60.924)	Top-5 acc 79.297 (81.736)	lr 0.01386
Train [56][2670/3239]	Time 0.205 (0.605)	Data Time 0.001 (0.015)	Loss 2.5702 (2.6190)	Entropy 1.12034 (1.12316)	Top-1 acc 61.328 (60.925)	Top-5 acc 83.984 (81.736)	lr 0.01386
Train [56][2680/3239]	Time 0.227 (0.605)	Data Time 0.001 (0.015)	Loss 2.5915 (2.6191)	Entropy 1.12028 (1.12315)	Top-1 acc 62.891 (60.922)	Top-5 acc 82.812 (81.734)	lr 0.01386
Train [56][2690/3239]	Time 0.253 (0.604)	Data Time 0.003 (0.015)	Loss 2.7426 (2.6192)	Entropy 1.12027 (1.12314)	Top-1 acc 59.766 (60.922)	Top-5 acc 80.078 (81.733)	lr 0.01386
Train [56][2700/3239]	Time 0.247 (0.604)	Data Time 0.001 (0.015)	Loss 2.5838 (2.6191)	Entropy 1.12027 (1.12313)	Top-1 acc 60.156 (60.923)	Top-5 acc 80.469 (81.737)	lr 0.01386
Train [56][2710/3239]	Time 0.221 (0.603)	Data Time 0.002 (0.015)	Loss 2.7178 (2.6192)	Entropy 1.12028 (1.12312)	Top-1 acc 62.109 (60.920)	Top-5 acc 79.297 (81.735)	lr 0.01386
Train [56][2720/3239]	Time 0.338 (0.603)	Data Time 0.001 (0.015)	Loss 2.5214 (2.6191)	Entropy 1.12041 (1.12311)	Top-1 acc 66.406 (60.920)	Top-5 acc 81.641 (81.736)	lr 0.01386
Train [56][2730/3239]	Time 0.235 (0.602)	Data Time 0.001 (0.015)	Loss 2.5628 (2.6192)	Entropy 1.12039 (1.12310)	Top-1 acc 61.719 (60.919)	Top-5 acc 85.156 (81.735)	lr 0.01386
Train [56][2740/3239]	Time 0.224 (0.602)	Data Time 0.001 (0.015)	Loss 2.6690 (2.6190)	Entropy 1.12039 (1.12309)	Top-1 acc 62.109 (60.921)	Top-5 acc 81.641 (81.740)	lr 0.01386
Train [56][2750/3239]	Time 0.280 (0.601)	Data Time 0.001 (0.015)	Loss 2.5484 (2.6190)	Entropy 1.12038 (1.12308)	Top-1 acc 60.547 (60.919)	Top-5 acc 85.156 (81.740)	lr 0.01386
Train [56][2760/3239]	Time 0.267 (0.601)	Data Time 0.001 (0.015)	Loss 2.4996 (2.6188)	Entropy 1.12032 (1.12307)	Top-1 acc 61.719 (60.922)	Top-5 acc 85.938 (81.745)	lr 0.01385
Train [56][2770/3239]	Time 0.247 (0.601)	Data Time 0.001 (0.015)	Loss 2.5392 (2.6188)	Entropy 1.12030 (1.12306)	Top-1 acc 63.281 (60.923)	Top-5 acc 83.984 (81.744)	lr 0.01385
Train [56][2780/3239]	Time 0.225 (0.600)	Data Time 0.001 (0.014)	Loss 2.5876 (2.6187)	Entropy 1.12029 (1.12305)	Top-1 acc 59.375 (60.923)	Top-5 acc 81.250 (81.746)	lr 0.01385
Train [56][2790/3239]	Time 0.248 (0.600)	Data Time 0.001 (0.014)	Loss 2.6341 (2.6188)	Entropy 1.12026 (1.12304)	Top-1 acc 61.328 (60.918)	Top-5 acc 81.250 (81.747)	lr 0.01385
Train [56][2800/3239]	Time 0.231 (0.599)	Data Time 0.001 (0.014)	Loss 2.4714 (2.6188)	Entropy 1.12025 (1.12303)	Top-1 acc 64.844 (60.916)	Top-5 acc 85.547 (81.749)	lr 0.01385
Train [56][2810/3239]	Time 0.371 (0.599)	Data Time 0.001 (0.014)	Loss 2.5588 (2.6189)	Entropy 1.12022 (1.12302)	Top-1 acc 61.328 (60.915)	Top-5 acc 81.641 (81.746)	lr 0.01385
Train [56][2820/3239]	Time 0.214 (0.598)	Data Time 0.001 (0.014)	Loss 2.5570 (2.6190)	Entropy 1.12024 (1.12301)	Top-1 acc 64.062 (60.915)	Top-5 acc 82.031 (81.744)	lr 0.01385
Train [56][2830/3239]	Time 0.210 (0.598)	Data Time 0.001 (0.014)	Loss 2.5028 (2.6191)	Entropy 1.12022 (1.12300)	Top-1 acc 61.719 (60.913)	Top-5 acc 83.984 (81.740)	lr 0.01385
Train [56][2840/3239]	Time 0.248 (0.597)	Data Time 0.001 (0.014)	Loss 2.4771 (2.6190)	Entropy 1.12018 (1.12299)	Top-1 acc 65.234 (60.917)	Top-5 acc 87.109 (81.742)	lr 0.01385
Train [56][2850/3239]	Time 0.211 (0.597)	Data Time 0.001 (0.014)	Loss 2.6660 (2.6191)	Entropy 1.12016 (1.12298)	Top-1 acc 59.766 (60.916)	Top-5 acc 83.203 (81.743)	lr 0.01385
Train [56][2860/3239]	Time 0.243 (0.596)	Data Time 0.001 (0.014)	Loss 2.5838 (2.6190)	Entropy 1.12013 (1.12297)	Top-1 acc 60.547 (60.916)	Top-5 acc 83.203 (81.748)	lr 0.01384
Train [56][2870/3239]	Time 0.249 (0.596)	Data Time 0.001 (0.014)	Loss 2.4863 (2.6190)	Entropy 1.12011 (1.12296)	Top-1 acc 61.328 (60.913)	Top-5 acc 85.156 (81.750)	lr 0.01384
Train [56][2880/3239]	Time 0.253 (0.595)	Data Time 0.001 (0.014)	Loss 2.7526 (2.6190)	Entropy 1.12008 (1.12295)	Top-1 acc 58.984 (60.914)	Top-5 acc 77.734 (81.750)	lr 0.01384
Train [56][2890/3239]	Time 0.289 (0.595)	Data Time 0.001 (0.014)	Loss 2.5841 (2.6190)	Entropy 1.11971 (1.12294)	Top-1 acc 59.375 (60.912)	Top-5 acc 81.641 (81.752)	lr 0.01384
Train [56][2900/3239]	Time 0.231 (0.594)	Data Time 0.001 (0.014)	Loss 2.6161 (2.6190)	Entropy 1.11969 (1.12293)	Top-1 acc 59.766 (60.914)	Top-5 acc 81.641 (81.751)	lr 0.01384
Train [56][2910/3239]	Time 0.256 (0.594)	Data Time 0.001 (0.014)	Loss 2.4840 (2.6189)	Entropy 1.11966 (1.12291)	Top-1 acc 66.797 (60.917)	Top-5 acc 84.375 (81.751)	lr 0.01384
Train [56][2920/3239]	Time 0.266 (0.610)	Data Time 0.004 (0.014)	Loss 2.7010 (2.6189)	Entropy 1.11952 (1.12290)	Top-1 acc 59.375 (60.916)	Top-5 acc 79.297 (81.753)	lr 0.01384
Train [56][2930/3239]	Time 0.235 (0.609)	Data Time 0.002 (0.014)	Loss 2.5885 (2.6190)	Entropy 1.11953 (1.12289)	Top-1 acc 63.672 (60.914)	Top-5 acc 80.859 (81.750)	lr 0.01384
Train [56][2940/3239]	Time 0.220 (0.609)	Data Time 0.001 (0.014)	Loss 2.6509 (2.6189)	Entropy 1.11949 (1.12288)	Top-1 acc 61.719 (60.917)	Top-5 acc 81.250 (81.751)	lr 0.01384
Train [56][2950/3239]	Time 0.345 (0.609)	Data Time 0.001 (0.014)	Loss 2.5449 (2.6188)	Entropy 1.11948 (1.12287)	Top-1 acc 64.453 (60.922)	Top-5 acc 82.031 (81.752)	lr 0.01384
Train [56][2960/3239]	Time 0.280 (0.608)	Data Time 0.001 (0.014)	Loss 2.6191 (2.6186)	Entropy 1.11940 (1.12286)	Top-1 acc 58.984 (60.925)	Top-5 acc 82.031 (81.759)	lr 0.01383
Train [56][2970/3239]	Time 0.252 (0.607)	Data Time 0.001 (0.014)	Loss 2.6509 (2.6186)	Entropy 1.11939 (1.12285)	Top-1 acc 59.375 (60.927)	Top-5 acc 80.469 (81.754)	lr 0.01383
Train [56][2980/3239]	Time 0.253 (0.607)	Data Time 0.001 (0.014)	Loss 2.5981 (2.6187)	Entropy 1.11935 (1.12283)	Top-1 acc 61.328 (60.924)	Top-5 acc 82.422 (81.751)	lr 0.01383
Train [56][2990/3239]	Time 0.249 (0.607)	Data Time 0.001 (0.014)	Loss 2.8570 (2.6190)	Entropy 1.11933 (1.12282)	Top-1 acc 54.688 (60.916)	Top-5 acc 76.562 (81.746)	lr 0.01383
Train [56][3000/3239]	Time 0.242 (0.606)	Data Time 0.001 (0.014)	Loss 2.5521 (2.6190)	Entropy 1.11932 (1.12281)	Top-1 acc 62.109 (60.917)	Top-5 acc 83.984 (81.743)	lr 0.01383
Train [56][3010/3239]	Time 0.174 (0.606)	Data Time 0.001 (0.014)	Loss 2.4656 (2.6189)	Entropy 1.11931 (1.12280)	Top-1 acc 62.109 (60.918)	Top-5 acc 85.547 (81.742)	lr 0.01383
Train [56][3020/3239]	Time 0.235 (0.605)	Data Time 0.001 (0.013)	Loss 2.7925 (2.6190)	Entropy 1.11936 (1.12279)	Top-1 acc 61.328 (60.918)	Top-5 acc 75.391 (81.742)	lr 0.01383
Train [56][3030/3239]	Time 0.207 (0.605)	Data Time 0.001 (0.013)	Loss 2.4746 (2.6189)	Entropy 1.11931 (1.12278)	Top-1 acc 63.672 (60.920)	Top-5 acc 86.328 (81.743)	lr 0.01383
Train [56][3040/3239]	Time 0.338 (0.604)	Data Time 0.001 (0.013)	Loss 2.6611 (2.6189)	Entropy 1.11924 (1.12277)	Top-1 acc 58.984 (60.924)	Top-5 acc 81.250 (81.743)	lr 0.01383
Train [56][3050/3239]	Time 0.223 (0.604)	Data Time 0.001 (0.013)	Loss 2.6502 (2.6188)	Entropy 1.11925 (1.12275)	Top-1 acc 54.297 (60.926)	Top-5 acc 81.250 (81.743)	lr 0.01383
Train [56][3060/3239]	Time 0.225 (0.603)	Data Time 0.001 (0.013)	Loss 2.6604 (2.6188)	Entropy 1.11920 (1.12274)	Top-1 acc 57.422 (60.924)	Top-5 acc 80.859 (81.742)	lr 0.01382
Train [56][3070/3239]	Time 0.204 (0.603)	Data Time 0.002 (0.013)	Loss 2.6368 (2.6190)	Entropy 1.11917 (1.12273)	Top-1 acc 60.156 (60.919)	Top-5 acc 80.859 (81.740)	lr 0.01382
Train [56][3080/3239]	Time 0.219 (0.602)	Data Time 0.001 (0.013)	Loss 2.5719 (2.6193)	Entropy 1.11919 (1.12272)	Top-1 acc 61.328 (60.912)	Top-5 acc 80.859 (81.735)	lr 0.01382
Train [56][3090/3239]	Time 0.354 (0.602)	Data Time 0.001 (0.013)	Loss 2.6015 (2.6193)	Entropy 1.11914 (1.12271)	Top-1 acc 63.281 (60.915)	Top-5 acc 80.859 (81.734)	lr 0.01382
Train [56][3100/3239]	Time 0.223 (0.602)	Data Time 0.001 (0.013)	Loss 2.5063 (2.6191)	Entropy 1.11912 (1.12270)	Top-1 acc 63.672 (60.917)	Top-5 acc 82.812 (81.736)	lr 0.01382
Train [56][3110/3239]	Time 0.228 (0.601)	Data Time 0.001 (0.013)	Loss 2.6234 (2.6191)	Entropy 1.11848 (1.12268)	Top-1 acc 58.594 (60.917)	Top-5 acc 80.859 (81.737)	lr 0.01382
Train [56][3120/3239]	Time 0.233 (0.601)	Data Time 0.001 (0.013)	Loss 2.6228 (2.6191)	Entropy 1.11859 (1.12267)	Top-1 acc 60.547 (60.916)	Top-5 acc 82.031 (81.739)	lr 0.01382
Train [56][3130/3239]	Time 0.266 (0.600)	Data Time 0.001 (0.013)	Loss 3.0417 (2.6193)	Entropy 1.11860 (1.12266)	Top-1 acc 49.219 (60.909)	Top-5 acc 76.562 (81.736)	lr 0.01382
Train [56][3140/3239]	Time 0.298 (0.600)	Data Time 0.001 (0.013)	Loss 2.8595 (2.6196)	Entropy 1.11847 (1.12265)	Top-1 acc 54.297 (60.900)	Top-5 acc 76.953 (81.728)	lr 0.01382
Train [56][3150/3239]	Time 0.226 (0.599)	Data Time 0.001 (0.013)	Loss 2.8608 (2.6197)	Entropy 1.11848 (1.12263)	Top-1 acc 56.641 (60.897)	Top-5 acc 78.516 (81.727)	lr 0.01382
Train [56][3160/3239]	Time 0.250 (0.599)	Data Time 0.001 (0.013)	Loss 2.3984 (2.6197)	Entropy 1.11847 (1.12262)	Top-1 acc 66.016 (60.896)	Top-5 acc 83.203 (81.724)	lr 0.01381
Train [56][3170/3239]	Time 0.281 (0.598)	Data Time 0.001 (0.013)	Loss 2.5684 (2.6195)	Entropy 1.11849 (1.12261)	Top-1 acc 63.281 (60.901)	Top-5 acc 83.203 (81.729)	lr 0.01381
Train [56][3180/3239]	Time 0.239 (0.598)	Data Time 0.000 (0.013)	Loss 3.7637 (2.6200)	Entropy 1.11848 (1.12259)	Top-1 acc 41.406 (60.891)	Top-5 acc 62.891 (81.720)	lr 0.01381
Train [56][3190/3239]	Time 0.211 (0.598)	Data Time 0.000 (0.013)	Loss 2.6969 (2.6201)	Entropy 1.11842 (1.12258)	Top-1 acc 59.766 (60.888)	Top-5 acc 81.250 (81.720)	lr 0.01381
Train [56][3200/3239]	Time 0.203 (0.597)	Data Time 0.000 (0.013)	Loss 2.5247 (2.6199)	Entropy 1.11838 (1.12257)	Top-1 acc 68.359 (60.891)	Top-5 acc 83.594 (81.723)	lr 0.01381
Train [56][3210/3239]	Time 0.237 (0.597)	Data Time 0.000 (0.013)	Loss 2.6545 (2.6198)	Entropy 1.11838 (1.12255)	Top-1 acc 64.453 (60.896)	Top-5 acc 80.469 (81.728)	lr 0.01381
Train [56][3220/3239]	Time 0.207 (0.596)	Data Time 0.000 (0.013)	Loss 2.5704 (2.6198)	Entropy 1.11839 (1.12254)	Top-1 acc 61.328 (60.895)	Top-5 acc 82.422 (81.730)	lr 0.01381
Train [56][3230/3239]	Time 0.220 (0.596)	Data Time 0.000 (0.013)	Loss 2.7678 (2.6198)	Entropy 1.11839 (1.12253)	Top-1 acc 57.422 (60.895)	Top-5 acc 77.734 (81.729)	lr 0.01381
Train [56][3239/3239]	Time 2.340 (0.595)	Data Time 0.000 (0.013)	Loss 2.6357 (2.6198)	Entropy 1.11839 (1.12252)	Top-1 acc 58.025 (60.896)	Top-5 acc 75.309 (81.728)	lr 0.01381
==========Valid [56/120]	loss 1.484	top-1 acc 66.421 (66.421)	top-5 acc 86.255	Train top-1 60.896	top-5 81.728	Entropy 1.11839	Latency-None: 0.000ms	Flops: 548.34M
Train [57][0/3239]	Time 38.880 (38.880)	Data Time 37.345 (37.345)	Loss 2.5328 (2.5328)	Entropy 1.11838 (1.11838)	Top-1 acc 59.375 (59.375)	Top-5 acc 82.422 (82.422)	lr 0.01381
Train [57][10/3239]	Time 53.585 (8.677)	Data Time 0.002 (3.399)	Loss 2.5127 (2.6018)	Entropy 1.11838 (1.11838)	Top-1 acc 63.672 (60.511)	Top-5 acc 82.422 (81.925)	lr 0.01381
Train [57][20/3239]	Time 0.270 (4.661)	Data Time 0.003 (1.782)	Loss 2.5662 (2.5853)	Entropy 1.11839 (1.11839)	Top-1 acc 60.938 (61.012)	Top-5 acc 80.859 (82.106)	lr 0.01380
Train [57][30/3239]	Time 0.230 (3.317)	Data Time 0.002 (1.208)	Loss 2.5094 (2.5838)	Entropy 1.11837 (1.11838)	Top-1 acc 67.188 (61.202)	Top-5 acc 83.203 (82.371)	lr 0.01380
Train [57][40/3239]	Time 0.219 (2.623)	Data Time 0.001 (0.914)	Loss 2.5473 (2.5834)	Entropy 1.11811 (1.11833)	Top-1 acc 62.891 (61.566)	Top-5 acc 82.422 (82.269)	lr 0.01380
Train [57][50/3239]	Time 0.320 (2.199)	Data Time 0.001 (0.735)	Loss 2.4847 (2.5770)	Entropy 1.11813 (1.11829)	Top-1 acc 66.016 (61.765)	Top-5 acc 83.594 (82.292)	lr 0.01380
Train [57][60/3239]	Time 0.220 (1.914)	Data Time 0.001 (0.615)	Loss 2.3989 (2.5756)	Entropy 1.11811 (1.11826)	Top-1 acc 65.625 (62.001)	Top-5 acc 86.719 (82.364)	lr 0.01380
Train [57][70/3239]	Time 0.234 (1.708)	Data Time 0.001 (0.528)	Loss 2.5705 (2.5687)	Entropy 1.11810 (1.11824)	Top-1 acc 65.234 (62.197)	Top-5 acc 82.812 (82.488)	lr 0.01380
Train [57][80/3239]	Time 0.236 (1.555)	Data Time 0.001 (0.463)	Loss 2.4679 (2.5836)	Entropy 1.11804 (1.11822)	Top-1 acc 63.672 (61.801)	Top-5 acc 83.594 (82.325)	lr 0.01380
Train [57][90/3239]	Time 0.208 (1.435)	Data Time 0.001 (0.413)	Loss 2.4010 (2.5803)	Entropy 1.11791 (1.11819)	Top-1 acc 63.672 (61.697)	Top-5 acc 89.062 (82.452)	lr 0.01380
Train [57][100/3239]	Time 0.235 (1.341)	Data Time 0.001 (0.372)	Loss 2.4204 (2.5756)	Entropy 1.11788 (1.11817)	Top-1 acc 65.234 (61.854)	Top-5 acc 84.766 (82.627)	lr 0.01380
Train [57][110/3239]	Time 0.295 (1.261)	Data Time 0.001 (0.339)	Loss 2.6602 (2.5791)	Entropy 1.11785 (1.11814)	Top-1 acc 59.766 (61.842)	Top-5 acc 81.250 (82.542)	lr 0.01380
Train [57][120/3239]	Time 2.468 (1.195)	Data Time 0.001 (0.311)	Loss 2.5982 (2.5802)	Entropy 1.11785 (1.11812)	Top-1 acc 60.156 (61.774)	Top-5 acc 80.859 (82.522)	lr 0.01379
Train [57][130/3239]	Time 0.221 (1.122)	Data Time 0.002 (0.287)	Loss 2.5623 (2.5821)	Entropy 1.11784 (1.11809)	Top-1 acc 61.719 (61.731)	Top-5 acc 81.641 (82.568)	lr 0.01379
Train [57][140/3239]	Time 0.370 (1.075)	Data Time 0.002 (0.267)	Loss 2.6428 (2.5821)	Entropy 1.11791 (1.11808)	Top-1 acc 59.766 (61.744)	Top-5 acc 80.859 (82.607)	lr 0.01379
Train [57][150/3239]	Time 0.224 (1.034)	Data Time 0.001 (0.249)	Loss 2.4692 (2.5812)	Entropy 1.11792 (1.11807)	Top-1 acc 62.500 (61.830)	Top-5 acc 86.328 (82.688)	lr 0.01379
Train [57][160/3239]	Time 0.260 (0.999)	Data Time 0.001 (0.234)	Loss 2.5348 (2.5784)	Entropy 1.11790 (1.11806)	Top-1 acc 65.234 (61.959)	Top-5 acc 83.203 (82.706)	lr 0.01379
Train [57][170/3239]	Time 0.219 (0.967)	Data Time 0.001 (0.220)	Loss 2.5461 (2.5782)	Entropy 1.11790 (1.11805)	Top-1 acc 64.062 (62.023)	Top-5 acc 84.375 (82.735)	lr 0.01379
Train [57][180/3239]	Time 0.219 (0.939)	Data Time 0.001 (0.208)	Loss 2.4678 (2.5773)	Entropy 1.11793 (1.11804)	Top-1 acc 64.062 (61.991)	Top-5 acc 83.984 (82.746)	lr 0.01379
Train [57][190/3239]	Time 0.318 (0.915)	Data Time 0.001 (0.197)	Loss 2.5187 (2.5792)	Entropy 1.11794 (1.11804)	Top-1 acc 67.969 (61.972)	Top-5 acc 83.984 (82.700)	lr 0.01379
Train [57][200/3239]	Time 0.205 (0.893)	Data Time 0.001 (0.188)	Loss 2.6321 (2.5812)	Entropy 1.11784 (1.11803)	Top-1 acc 63.672 (61.929)	Top-5 acc 80.469 (82.640)	lr 0.01379
Train [57][210/3239]	Time 0.211 (0.872)	Data Time 0.001 (0.179)	Loss 2.6169 (2.5798)	Entropy 1.11781 (1.11802)	Top-1 acc 62.891 (62.006)	Top-5 acc 78.906 (82.631)	lr 0.01379
Train [57][220/3239]	Time 0.249 (0.853)	Data Time 0.001 (0.171)	Loss 2.6269 (2.5812)	Entropy 1.11779 (1.11801)	Top-1 acc 58.203 (61.908)	Top-5 acc 82.812 (82.606)	lr 0.01378
Train [57][230/3239]	Time 2.429 (0.836)	Data Time 0.001 (0.163)	Loss 2.5384 (2.5826)	Entropy 1.11779 (1.11800)	Top-1 acc 62.500 (61.917)	Top-5 acc 83.984 (82.564)	lr 0.01378
Train [57][240/3239]	Time 0.304 (0.811)	Data Time 0.001 (0.157)	Loss 2.6373 (2.5854)	Entropy 1.11779 (1.11799)	Top-1 acc 58.594 (61.822)	Top-5 acc 80.078 (82.501)	lr 0.01378
Train [57][250/3239]	Time 0.247 (0.797)	Data Time 0.001 (0.151)	Loss 2.6880 (2.5848)	Entropy 1.11776 (1.11798)	Top-1 acc 56.641 (61.804)	Top-5 acc 81.250 (82.514)	lr 0.01378
Train [57][260/3239]	Time 0.237 (0.783)	Data Time 0.001 (0.145)	Loss 2.7576 (2.5865)	Entropy 1.11772 (1.11798)	Top-1 acc 55.859 (61.764)	Top-5 acc 79.297 (82.464)	lr 0.01378
Train [57][270/3239]	Time 0.228 (0.770)	Data Time 0.001 (0.140)	Loss 2.6229 (2.5867)	Entropy 1.11773 (1.11797)	Top-1 acc 57.031 (61.761)	Top-5 acc 81.641 (82.433)	lr 0.01378
Train [57][280/3239]	Time 0.263 (0.759)	Data Time 0.001 (0.135)	Loss 2.6738 (2.5874)	Entropy 1.11773 (1.11796)	Top-1 acc 61.328 (61.749)	Top-5 acc 81.250 (82.422)	lr 0.01378
Train [57][290/3239]	Time 0.221 (0.749)	Data Time 0.001 (0.130)	Loss 2.6163 (2.5879)	Entropy 1.11772 (1.11795)	Top-1 acc 62.500 (61.767)	Top-5 acc 84.375 (82.412)	lr 0.01378
Train [57][300/3239]	Time 0.288 (0.740)	Data Time 0.001 (0.126)	Loss 2.5026 (2.5889)	Entropy 1.11767 (1.11794)	Top-1 acc 62.891 (61.749)	Top-5 acc 82.422 (82.371)	lr 0.01378
Train [57][310/3239]	Time 0.264 (0.731)	Data Time 0.001 (0.122)	Loss 2.4231 (2.5885)	Entropy 1.11767 (1.11793)	Top-1 acc 67.578 (61.782)	Top-5 acc 88.672 (82.363)	lr 0.01378
Train [57][320/3239]	Time 0.213 (0.723)	Data Time 0.001 (0.118)	Loss 2.7693 (2.5895)	Entropy 1.11764 (1.11792)	Top-1 acc 55.469 (61.743)	Top-5 acc 80.859 (82.337)	lr 0.01377
Train [57][330/3239]	Time 0.282 (0.715)	Data Time 0.001 (0.115)	Loss 2.5352 (2.5897)	Entropy 1.11763 (1.11792)	Top-1 acc 64.062 (61.713)	Top-5 acc 85.156 (82.346)	lr 0.01377
Train [57][340/3239]	Time 2.613 (0.708)	Data Time 0.001 (0.111)	Loss 2.5335 (2.5903)	Entropy 1.11763 (1.11791)	Top-1 acc 63.281 (61.711)	Top-5 acc 84.375 (82.341)	lr 0.01377
Train [57][350/3239]	Time 0.232 (0.695)	Data Time 0.001 (0.108)	Loss 2.5153 (2.5917)	Entropy 1.11764 (1.11790)	Top-1 acc 60.938 (61.681)	Top-5 acc 82.422 (82.304)	lr 0.01377
Train [57][360/3239]	Time 0.204 (0.688)	Data Time 0.001 (0.105)	Loss 2.7433 (2.5923)	Entropy 1.11762 (1.11789)	Top-1 acc 57.422 (61.662)	Top-5 acc 79.297 (82.281)	lr 0.01377
Train [57][370/3239]	Time 0.224 (0.681)	Data Time 0.001 (0.102)	Loss 2.5679 (2.5935)	Entropy 1.11757 (1.11788)	Top-1 acc 62.500 (61.671)	Top-5 acc 81.250 (82.256)	lr 0.01377
Train [57][380/3239]	Time 0.218 (0.810)	Data Time 0.002 (0.100)	Loss 2.6589 (2.5928)	Entropy 1.11754 (1.11787)	Top-1 acc 60.938 (61.697)	Top-5 acc 81.250 (82.277)	lr 0.01377
Train [57][390/3239]	Time 0.227 (0.802)	Data Time 0.002 (0.097)	Loss 2.3672 (2.5924)	Entropy 1.11744 (1.11786)	Top-1 acc 66.406 (61.719)	Top-5 acc 88.672 (82.299)	lr 0.01377
Train [57][400/3239]	Time 0.224 (0.793)	Data Time 0.001 (0.095)	Loss 2.7088 (2.5928)	Entropy 1.11742 (1.11785)	Top-1 acc 57.031 (61.666)	Top-5 acc 80.469 (82.294)	lr 0.01377
Train [57][410/3239]	Time 0.220 (0.785)	Data Time 0.001 (0.093)	Loss 2.6586 (2.5923)	Entropy 1.11738 (1.11784)	Top-1 acc 57.031 (61.662)	Top-5 acc 80.859 (82.306)	lr 0.01377
Train [57][420/3239]	Time 0.331 (0.778)	Data Time 0.001 (0.091)	Loss 2.5627 (2.5921)	Entropy 1.11734 (1.11783)	Top-1 acc 65.234 (61.666)	Top-5 acc 83.984 (82.315)	lr 0.01376
Train [57][430/3239]	Time 0.212 (0.770)	Data Time 0.001 (0.088)	Loss 2.4671 (2.5916)	Entropy 1.11734 (1.11782)	Top-1 acc 64.844 (61.698)	Top-5 acc 83.984 (82.325)	lr 0.01376
Train [57][440/3239]	Time 0.250 (0.764)	Data Time 0.001 (0.087)	Loss 2.5214 (2.5926)	Entropy 1.11734 (1.11781)	Top-1 acc 66.016 (61.688)	Top-5 acc 84.375 (82.284)	lr 0.01376
Train [57][450/3239]	Time 2.371 (0.757)	Data Time 0.001 (0.085)	Loss 2.6742 (2.5930)	Entropy 1.11734 (1.11780)	Top-1 acc 58.594 (61.677)	Top-5 acc 81.641 (82.262)	lr 0.01376
Train [57][460/3239]	Time 0.233 (0.745)	Data Time 0.001 (0.083)	Loss 2.5802 (2.5925)	Entropy 1.11731 (1.11779)	Top-1 acc 63.672 (61.665)	Top-5 acc 83.984 (82.280)	lr 0.01376
Train [57][470/3239]	Time 0.336 (0.740)	Data Time 0.001 (0.081)	Loss 2.6066 (2.5915)	Entropy 1.11726 (1.11778)	Top-1 acc 63.281 (61.695)	Top-5 acc 82.422 (82.312)	lr 0.01376
Train [57][480/3239]	Time 0.244 (0.734)	Data Time 0.001 (0.079)	Loss 2.5644 (2.5921)	Entropy 1.11728 (1.11777)	Top-1 acc 60.156 (61.664)	Top-5 acc 82.031 (82.297)	lr 0.01376
Train [57][490/3239]	Time 0.227 (0.728)	Data Time 0.002 (0.078)	Loss 2.6472 (2.5921)	Entropy 1.11727 (1.11776)	Top-1 acc 62.109 (61.673)	Top-5 acc 81.641 (82.297)	lr 0.01376
Train [57][500/3239]	Time 0.221 (0.723)	Data Time 0.001 (0.076)	Loss 2.8057 (2.5915)	Entropy 1.11719 (1.11775)	Top-1 acc 56.641 (61.688)	Top-5 acc 79.688 (82.319)	lr 0.01376
Train [57][510/3239]	Time 0.266 (0.718)	Data Time 0.002 (0.075)	Loss 2.5590 (2.5913)	Entropy 1.11713 (1.11773)	Top-1 acc 62.891 (61.697)	Top-5 acc 80.859 (82.309)	lr 0.01376
Train [57][520/3239]	Time 0.236 (0.713)	Data Time 0.001 (0.073)	Loss 2.5556 (2.5922)	Entropy 1.11799 (1.11773)	Top-1 acc 64.453 (61.687)	Top-5 acc 84.375 (82.290)	lr 0.01375
Train [57][530/3239]	Time 0.241 (0.708)	Data Time 0.001 (0.072)	Loss 2.4195 (2.5920)	Entropy 1.11795 (1.11773)	Top-1 acc 67.969 (61.701)	Top-5 acc 85.547 (82.303)	lr 0.01375
Train [57][540/3239]	Time 0.204 (0.704)	Data Time 0.001 (0.071)	Loss 2.5657 (2.5918)	Entropy 1.11793 (1.11774)	Top-1 acc 62.891 (61.704)	Top-5 acc 84.375 (82.308)	lr 0.01375
Train [57][550/3239]	Time 0.263 (0.699)	Data Time 0.001 (0.070)	Loss 2.6407 (2.5919)	Entropy 1.11791 (1.11774)	Top-1 acc 57.422 (61.682)	Top-5 acc 83.203 (82.315)	lr 0.01375
Train [57][560/3239]	Time 2.507 (0.695)	Data Time 0.001 (0.068)	Loss 2.6829 (2.5921)	Entropy 1.11791 (1.11774)	Top-1 acc 57.812 (61.671)	Top-5 acc 80.078 (82.314)	lr 0.01375
Train [57][570/3239]	Time 0.217 (0.687)	Data Time 0.001 (0.067)	Loss 2.5221 (2.5910)	Entropy 1.11787 (1.11775)	Top-1 acc 60.938 (61.688)	Top-5 acc 83.203 (82.343)	lr 0.01375
Train [57][580/3239]	Time 0.236 (0.683)	Data Time 0.001 (0.066)	Loss 2.4234 (2.5920)	Entropy 1.11785 (1.11775)	Top-1 acc 67.188 (61.687)	Top-5 acc 85.938 (82.310)	lr 0.01375
Train [57][590/3239]	Time 0.221 (0.680)	Data Time 0.001 (0.065)	Loss 2.6833 (2.5919)	Entropy 1.11784 (1.11775)	Top-1 acc 61.328 (61.691)	Top-5 acc 81.641 (82.306)	lr 0.01375
Train [57][600/3239]	Time 0.217 (0.676)	Data Time 0.002 (0.064)	Loss 2.6588 (2.5919)	Entropy 1.11781 (1.11775)	Top-1 acc 59.766 (61.689)	Top-5 acc 78.906 (82.293)	lr 0.01375
Train [57][610/3239]	Time 0.309 (0.672)	Data Time 0.001 (0.063)	Loss 2.6962 (2.5918)	Entropy 1.11780 (1.11775)	Top-1 acc 60.938 (61.699)	Top-5 acc 78.906 (82.286)	lr 0.01375
Train [57][620/3239]	Time 0.208 (0.669)	Data Time 0.001 (0.062)	Loss 2.6127 (2.5921)	Entropy 1.11782 (1.11775)	Top-1 acc 59.766 (61.673)	Top-5 acc 82.031 (82.290)	lr 0.01374
Train [57][630/3239]	Time 0.226 (0.665)	Data Time 0.001 (0.061)	Loss 2.5597 (2.5921)	Entropy 1.11776 (1.11775)	Top-1 acc 62.109 (61.674)	Top-5 acc 84.766 (82.291)	lr 0.01374
Train [57][640/3239]	Time 0.246 (0.662)	Data Time 0.001 (0.060)	Loss 2.4631 (2.5917)	Entropy 1.11770 (1.11775)	Top-1 acc 66.797 (61.678)	Top-5 acc 84.766 (82.305)	lr 0.01374
Train [57][650/3239]	Time 0.205 (0.659)	Data Time 0.001 (0.059)	Loss 2.6279 (2.5918)	Entropy 1.11768 (1.11775)	Top-1 acc 61.719 (61.665)	Top-5 acc 82.422 (82.306)	lr 0.01374
Train [57][660/3239]	Time 0.259 (0.656)	Data Time 0.001 (0.058)	Loss 2.7538 (2.5931)	Entropy 1.11766 (1.11775)	Top-1 acc 57.812 (61.635)	Top-5 acc 80.469 (82.285)	lr 0.01374
Train [57][670/3239]	Time 2.558 (0.653)	Data Time 0.001 (0.057)	Loss 2.6552 (2.5940)	Entropy 1.11766 (1.11775)	Top-1 acc 62.109 (61.626)	Top-5 acc 80.469 (82.257)	lr 0.01374
Train [57][680/3239]	Time 0.227 (0.647)	Data Time 0.001 (0.057)	Loss 2.5459 (2.5940)	Entropy 1.11761 (1.11775)	Top-1 acc 61.719 (61.619)	Top-5 acc 83.203 (82.254)	lr 0.01374
Train [57][690/3239]	Time 0.228 (0.644)	Data Time 0.001 (0.056)	Loss 2.4158 (2.5936)	Entropy 1.11743 (1.11774)	Top-1 acc 65.234 (61.633)	Top-5 acc 86.328 (82.258)	lr 0.01374
Train [57][700/3239]	Time 0.336 (0.642)	Data Time 0.001 (0.055)	Loss 2.6199 (2.5934)	Entropy 1.11742 (1.11774)	Top-1 acc 60.547 (61.645)	Top-5 acc 80.469 (82.261)	lr 0.01374
Train [57][710/3239]	Time 0.210 (0.639)	Data Time 0.001 (0.054)	Loss 2.6463 (2.5937)	Entropy 1.11734 (1.11773)	Top-1 acc 60.547 (61.627)	Top-5 acc 80.859 (82.257)	lr 0.01374
Train [57][720/3239]	Time 0.207 (0.637)	Data Time 0.001 (0.054)	Loss 2.6318 (2.5938)	Entropy 1.11730 (1.11773)	Top-1 acc 61.328 (61.623)	Top-5 acc 80.469 (82.255)	lr 0.01373
Train [57][730/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.053)	Loss 2.7473 (2.5946)	Entropy 1.11727 (1.11772)	Top-1 acc 57.422 (61.612)	Top-5 acc 80.078 (82.241)	lr 0.01373
Train [57][740/3239]	Time 0.338 (0.697)	Data Time 0.003 (0.052)	Loss 2.5581 (2.5944)	Entropy 1.11725 (1.11772)	Top-1 acc 60.156 (61.618)	Top-5 acc 83.203 (82.241)	lr 0.01373
Train [57][750/3239]	Time 0.225 (0.694)	Data Time 0.002 (0.051)	Loss 2.5678 (2.5949)	Entropy 1.11724 (1.11771)	Top-1 acc 66.016 (61.628)	Top-5 acc 82.031 (82.219)	lr 0.01373
Train [57][760/3239]	Time 0.222 (0.691)	Data Time 0.002 (0.051)	Loss 2.6919 (2.5955)	Entropy 1.11726 (1.11770)	Top-1 acc 54.297 (61.605)	Top-5 acc 82.422 (82.215)	lr 0.01373
Train [57][770/3239]	Time 0.226 (0.688)	Data Time 0.001 (0.050)	Loss 2.7397 (2.5949)	Entropy 1.11719 (1.11770)	Top-1 acc 59.766 (61.635)	Top-5 acc 77.734 (82.227)	lr 0.01373
Train [57][780/3239]	Time 2.402 (0.685)	Data Time 0.001 (0.050)	Loss 2.6231 (2.5951)	Entropy 1.11719 (1.11769)	Top-1 acc 59.766 (61.641)	Top-5 acc 83.594 (82.223)	lr 0.01373
Train [57][790/3239]	Time 0.229 (0.679)	Data Time 0.001 (0.049)	Loss 2.6504 (2.5955)	Entropy 1.11716 (1.11768)	Top-1 acc 62.500 (61.634)	Top-5 acc 82.422 (82.221)	lr 0.01373
Train [57][800/3239]	Time 0.225 (0.677)	Data Time 0.001 (0.048)	Loss 2.6608 (2.5953)	Entropy 1.11711 (1.11768)	Top-1 acc 61.719 (61.644)	Top-5 acc 80.859 (82.222)	lr 0.01373
Train [57][810/3239]	Time 0.216 (0.674)	Data Time 0.001 (0.048)	Loss 2.7459 (2.5961)	Entropy 1.11710 (1.11767)	Top-1 acc 63.281 (61.628)	Top-5 acc 80.859 (82.205)	lr 0.01373
Train [57][820/3239]	Time 0.217 (0.671)	Data Time 0.001 (0.047)	Loss 2.4789 (2.5957)	Entropy 1.11709 (1.11766)	Top-1 acc 65.234 (61.638)	Top-5 acc 84.766 (82.210)	lr 0.01372
Train [57][830/3239]	Time 0.223 (0.669)	Data Time 0.001 (0.047)	Loss 2.5782 (2.5959)	Entropy 1.11709 (1.11766)	Top-1 acc 61.719 (61.638)	Top-5 acc 80.078 (82.200)	lr 0.01372
Train [57][840/3239]	Time 0.236 (0.666)	Data Time 0.001 (0.046)	Loss 2.5330 (2.5960)	Entropy 1.11706 (1.11765)	Top-1 acc 60.938 (61.636)	Top-5 acc 84.375 (82.196)	lr 0.01372
Train [57][850/3239]	Time 0.219 (0.664)	Data Time 0.001 (0.046)	Loss 2.5329 (2.5949)	Entropy 1.11702 (1.11764)	Top-1 acc 62.500 (61.656)	Top-5 acc 83.203 (82.212)	lr 0.01372
Train [57][860/3239]	Time 0.221 (0.662)	Data Time 0.001 (0.045)	Loss 2.8381 (2.5948)	Entropy 1.11690 (1.11764)	Top-1 acc 55.078 (61.653)	Top-5 acc 79.297 (82.215)	lr 0.01372
Train [57][870/3239]	Time 0.218 (0.659)	Data Time 0.001 (0.045)	Loss 2.5472 (2.5945)	Entropy 1.11690 (1.11763)	Top-1 acc 61.719 (61.662)	Top-5 acc 78.906 (82.223)	lr 0.01372
Train [57][880/3239]	Time 0.228 (0.657)	Data Time 0.005 (0.044)	Loss 2.6820 (2.5943)	Entropy 1.11686 (1.11762)	Top-1 acc 59.766 (61.662)	Top-5 acc 80.859 (82.223)	lr 0.01372
Train [57][890/3239]	Time 2.531 (0.655)	Data Time 0.001 (0.044)	Loss 2.5479 (2.5934)	Entropy 1.11686 (1.11761)	Top-1 acc 62.891 (61.680)	Top-5 acc 84.766 (82.240)	lr 0.01372
Train [57][900/3239]	Time 0.226 (0.650)	Data Time 0.001 (0.043)	Loss 2.4594 (2.5936)	Entropy 1.11687 (1.11760)	Top-1 acc 63.281 (61.671)	Top-5 acc 83.984 (82.236)	lr 0.01372
Train [57][910/3239]	Time 0.246 (0.648)	Data Time 0.001 (0.043)	Loss 2.5572 (2.5938)	Entropy 1.11686 (1.11759)	Top-1 acc 61.719 (61.667)	Top-5 acc 81.250 (82.238)	lr 0.01372
Train [57][920/3239]	Time 0.243 (0.646)	Data Time 0.002 (0.042)	Loss 2.7350 (2.5938)	Entropy 1.11684 (1.11759)	Top-1 acc 57.812 (61.666)	Top-5 acc 77.734 (82.237)	lr 0.01371
Train [57][930/3239]	Time 0.228 (0.644)	Data Time 0.001 (0.042)	Loss 2.4670 (2.5939)	Entropy 1.11681 (1.11758)	Top-1 acc 68.359 (61.671)	Top-5 acc 85.156 (82.235)	lr 0.01371
Train [57][940/3239]	Time 0.327 (0.643)	Data Time 0.002 (0.041)	Loss 2.5433 (2.5937)	Entropy 1.11679 (1.11757)	Top-1 acc 66.016 (61.683)	Top-5 acc 83.594 (82.237)	lr 0.01371
Train [57][950/3239]	Time 0.212 (0.641)	Data Time 0.001 (0.041)	Loss 2.5527 (2.5942)	Entropy 1.11678 (1.11756)	Top-1 acc 61.328 (61.663)	Top-5 acc 81.250 (82.221)	lr 0.01371
Train [57][960/3239]	Time 0.232 (0.639)	Data Time 0.001 (0.041)	Loss 2.6296 (2.5944)	Entropy 1.11663 (1.11755)	Top-1 acc 64.844 (61.657)	Top-5 acc 79.297 (82.212)	lr 0.01371
Train [57][970/3239]	Time 0.191 (0.637)	Data Time 0.001 (0.040)	Loss 2.5423 (2.5941)	Entropy 1.11657 (1.11754)	Top-1 acc 67.578 (61.646)	Top-5 acc 82.422 (82.216)	lr 0.01371
Train [57][980/3239]	Time 0.210 (0.635)	Data Time 0.001 (0.040)	Loss 2.6843 (2.5941)	Entropy 1.11659 (1.11753)	Top-1 acc 62.500 (61.650)	Top-5 acc 78.906 (82.214)	lr 0.01371
Train [57][990/3239]	Time 0.264 (0.633)	Data Time 0.001 (0.039)	Loss 2.4812 (2.5945)	Entropy 1.11657 (1.11752)	Top-1 acc 63.281 (61.640)	Top-5 acc 85.547 (82.207)	lr 0.01371
Train [57][1000/3239]	Time 2.510 (0.632)	Data Time 0.001 (0.039)	Loss 2.4504 (2.5947)	Entropy 1.11657 (1.11751)	Top-1 acc 63.281 (61.633)	Top-5 acc 83.984 (82.201)	lr 0.01371
Train [57][1010/3239]	Time 0.220 (0.628)	Data Time 0.001 (0.039)	Loss 2.5598 (2.5949)	Entropy 1.11654 (1.11750)	Top-1 acc 59.766 (61.635)	Top-5 acc 82.812 (82.193)	lr 0.01371
Train [57][1020/3239]	Time 0.230 (0.626)	Data Time 0.001 (0.038)	Loss 2.5092 (2.5948)	Entropy 1.11657 (1.11749)	Top-1 acc 66.406 (61.648)	Top-5 acc 82.812 (82.192)	lr 0.01370
Train [57][1030/3239]	Time 0.219 (0.625)	Data Time 0.001 (0.038)	Loss 2.6271 (2.5951)	Entropy 1.11654 (1.11749)	Top-1 acc 61.328 (61.634)	Top-5 acc 79.688 (82.179)	lr 0.01370
Train [57][1040/3239]	Time 0.331 (0.623)	Data Time 0.002 (0.038)	Loss 2.3558 (2.5945)	Entropy 1.11659 (1.11748)	Top-1 acc 69.922 (61.656)	Top-5 acc 86.328 (82.190)	lr 0.01370
Train [57][1050/3239]	Time 0.218 (0.621)	Data Time 0.001 (0.037)	Loss 2.4750 (2.5944)	Entropy 1.11649 (1.11747)	Top-1 acc 62.500 (61.653)	Top-5 acc 82.031 (82.189)	lr 0.01370
Train [57][1060/3239]	Time 0.241 (0.620)	Data Time 0.001 (0.037)	Loss 2.8341 (2.5951)	Entropy 1.11649 (1.11746)	Top-1 acc 53.125 (61.627)	Top-5 acc 77.344 (82.171)	lr 0.01370
Train [57][1070/3239]	Time 0.224 (0.618)	Data Time 0.001 (0.037)	Loss 2.6062 (2.5947)	Entropy 1.11637 (1.11745)	Top-1 acc 62.109 (61.648)	Top-5 acc 80.078 (82.180)	lr 0.01370
Train [57][1080/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.036)	Loss 2.5839 (2.5950)	Entropy 1.11633 (1.11744)	Top-1 acc 60.938 (61.631)	Top-5 acc 82.422 (82.174)	lr 0.01370
Train [57][1090/3239]	Time 0.226 (0.616)	Data Time 0.001 (0.036)	Loss 2.6812 (2.5951)	Entropy 1.11633 (1.11743)	Top-1 acc 59.766 (61.632)	Top-5 acc 80.469 (82.170)	lr 0.01370
Train [57][1100/3239]	Time 0.277 (0.657)	Data Time 0.004 (0.036)	Loss 2.4933 (2.5953)	Entropy 1.11637 (1.11742)	Top-1 acc 64.062 (61.628)	Top-5 acc 83.594 (82.166)	lr 0.01370
Train [57][1110/3239]	Time 2.603 (0.656)	Data Time 0.002 (0.035)	Loss 2.4823 (2.5954)	Entropy 1.11637 (1.11741)	Top-1 acc 61.719 (61.629)	Top-5 acc 84.766 (82.163)	lr 0.01370
Train [57][1120/3239]	Time 0.221 (0.652)	Data Time 0.003 (0.035)	Loss 2.6356 (2.5954)	Entropy 1.11636 (1.11740)	Top-1 acc 63.281 (61.626)	Top-5 acc 80.469 (82.163)	lr 0.01369
Train [57][1130/3239]	Time 0.174 (0.651)	Data Time 0.001 (0.035)	Loss 2.7419 (2.5958)	Entropy 1.11630 (1.11739)	Top-1 acc 55.469 (61.612)	Top-5 acc 79.688 (82.151)	lr 0.01369
Train [57][1140/3239]	Time 0.242 (0.649)	Data Time 0.001 (0.035)	Loss 2.5639 (2.5958)	Entropy 1.11628 (1.11738)	Top-1 acc 60.156 (61.606)	Top-5 acc 86.719 (82.151)	lr 0.01369
Train [57][1150/3239]	Time 0.242 (0.648)	Data Time 0.002 (0.034)	Loss 2.5441 (2.5964)	Entropy 1.11628 (1.11737)	Top-1 acc 61.719 (61.599)	Top-5 acc 83.984 (82.146)	lr 0.01369
Train [57][1160/3239]	Time 0.228 (0.646)	Data Time 0.001 (0.034)	Loss 2.6474 (2.5966)	Entropy 1.11628 (1.11736)	Top-1 acc 56.641 (61.593)	Top-5 acc 83.203 (82.142)	lr 0.01369
Train [57][1170/3239]	Time 0.225 (0.644)	Data Time 0.001 (0.034)	Loss 2.6912 (2.5971)	Entropy 1.11625 (1.11735)	Top-1 acc 59.766 (61.581)	Top-5 acc 82.422 (82.133)	lr 0.01369
Train [57][1180/3239]	Time 0.305 (0.643)	Data Time 0.001 (0.033)	Loss 2.6506 (2.5972)	Entropy 1.11616 (1.11734)	Top-1 acc 61.719 (61.575)	Top-5 acc 80.859 (82.131)	lr 0.01369
Train [57][1190/3239]	Time 0.225 (0.641)	Data Time 0.001 (0.033)	Loss 2.7247 (2.5972)	Entropy 1.11614 (1.11733)	Top-1 acc 60.156 (61.578)	Top-5 acc 79.297 (82.137)	lr 0.01369
Train [57][1200/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.033)	Loss 2.5786 (2.5972)	Entropy 1.11613 (1.11732)	Top-1 acc 62.109 (61.571)	Top-5 acc 81.250 (82.137)	lr 0.01369
Train [57][1210/3239]	Time 0.227 (0.639)	Data Time 0.001 (0.033)	Loss 2.4477 (2.5969)	Entropy 1.11611 (1.11731)	Top-1 acc 63.672 (61.583)	Top-5 acc 84.375 (82.139)	lr 0.01369
Train [57][1220/3239]	Time 2.361 (0.637)	Data Time 0.001 (0.032)	Loss 2.4919 (2.5972)	Entropy 1.11611 (1.11730)	Top-1 acc 64.844 (61.582)	Top-5 acc 83.984 (82.126)	lr 0.01368
Train [57][1230/3239]	Time 0.222 (0.634)	Data Time 0.001 (0.032)	Loss 2.5388 (2.5971)	Entropy 1.11605 (1.11729)	Top-1 acc 62.109 (61.584)	Top-5 acc 82.422 (82.134)	lr 0.01368
Train [57][1240/3239]	Time 0.220 (0.632)	Data Time 0.001 (0.032)	Loss 2.7135 (2.5971)	Entropy 1.11607 (1.11728)	Top-1 acc 58.203 (61.579)	Top-5 acc 80.078 (82.133)	lr 0.01368
Train [57][1250/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.032)	Loss 2.5098 (2.5965)	Entropy 1.11606 (1.11727)	Top-1 acc 62.500 (61.587)	Top-5 acc 82.422 (82.141)	lr 0.01368
Train [57][1260/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.031)	Loss 2.6049 (2.5961)	Entropy 1.11602 (1.11726)	Top-1 acc 60.156 (61.596)	Top-5 acc 79.297 (82.147)	lr 0.01368
Train [57][1270/3239]	Time 0.320 (0.628)	Data Time 0.001 (0.031)	Loss 2.6694 (2.5962)	Entropy 1.11600 (1.11725)	Top-1 acc 58.203 (61.585)	Top-5 acc 82.031 (82.150)	lr 0.01368
Train [57][1280/3239]	Time 0.218 (0.627)	Data Time 0.001 (0.031)	Loss 2.5146 (2.5966)	Entropy 1.11599 (1.11724)	Top-1 acc 64.062 (61.582)	Top-5 acc 83.984 (82.139)	lr 0.01368
Train [57][1290/3239]	Time 0.215 (0.626)	Data Time 0.002 (0.031)	Loss 2.6670 (2.5967)	Entropy 1.11598 (1.11723)	Top-1 acc 62.109 (61.579)	Top-5 acc 82.812 (82.136)	lr 0.01368
Train [57][1300/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.030)	Loss 2.5213 (2.5964)	Entropy 1.11598 (1.11722)	Top-1 acc 64.844 (61.586)	Top-5 acc 83.984 (82.144)	lr 0.01368
Train [57][1310/3239]	Time 0.228 (0.623)	Data Time 0.001 (0.030)	Loss 2.4260 (2.5959)	Entropy 1.11592 (1.11721)	Top-1 acc 64.062 (61.592)	Top-5 acc 84.375 (82.154)	lr 0.01367
Train [57][1320/3239]	Time 0.315 (0.622)	Data Time 0.001 (0.030)	Loss 2.5314 (2.5963)	Entropy 1.11590 (1.11720)	Top-1 acc 63.281 (61.591)	Top-5 acc 81.641 (82.143)	lr 0.01367
Train [57][1330/3239]	Time 2.468 (0.621)	Data Time 0.001 (0.030)	Loss 2.5194 (2.5967)	Entropy 1.11590 (1.11719)	Top-1 acc 62.891 (61.576)	Top-5 acc 84.766 (82.137)	lr 0.01367
Train [57][1340/3239]	Time 0.226 (0.618)	Data Time 0.001 (0.030)	Loss 2.6657 (2.5963)	Entropy 1.11587 (1.11719)	Top-1 acc 59.375 (61.586)	Top-5 acc 81.250 (82.144)	lr 0.01367
Train [57][1350/3239]	Time 0.231 (0.617)	Data Time 0.001 (0.029)	Loss 2.7919 (2.5965)	Entropy 1.11588 (1.11718)	Top-1 acc 58.984 (61.584)	Top-5 acc 78.906 (82.140)	lr 0.01367
Train [57][1360/3239]	Time 0.238 (0.616)	Data Time 0.001 (0.029)	Loss 2.7365 (2.5962)	Entropy 1.11570 (1.11716)	Top-1 acc 58.203 (61.581)	Top-5 acc 79.688 (82.145)	lr 0.01367
Train [57][1370/3239]	Time 0.350 (0.615)	Data Time 0.001 (0.029)	Loss 2.6045 (2.5964)	Entropy 1.11572 (1.11715)	Top-1 acc 61.328 (61.577)	Top-5 acc 81.250 (82.146)	lr 0.01367
Train [57][1380/3239]	Time 0.209 (0.614)	Data Time 0.001 (0.029)	Loss 2.4704 (2.5964)	Entropy 1.11571 (1.11714)	Top-1 acc 62.109 (61.572)	Top-5 acc 85.547 (82.149)	lr 0.01367
Train [57][1390/3239]	Time 0.166 (0.612)	Data Time 0.001 (0.029)	Loss 2.6662 (2.5968)	Entropy 1.11571 (1.11713)	Top-1 acc 58.984 (61.557)	Top-5 acc 79.688 (82.141)	lr 0.01367
Train [57][1400/3239]	Time 0.210 (0.611)	Data Time 0.001 (0.028)	Loss 2.7654 (2.5970)	Entropy 1.11573 (1.11712)	Top-1 acc 56.250 (61.555)	Top-5 acc 78.906 (82.140)	lr 0.01367
Train [57][1410/3239]	Time 0.214 (0.610)	Data Time 0.001 (0.028)	Loss 2.4639 (2.5965)	Entropy 1.11571 (1.11711)	Top-1 acc 67.578 (61.574)	Top-5 acc 83.984 (82.152)	lr 0.01366
Train [57][1420/3239]	Time 0.341 (0.609)	Data Time 0.001 (0.028)	Loss 2.4389 (2.5963)	Entropy 1.11569 (1.11710)	Top-1 acc 62.500 (61.579)	Top-5 acc 85.547 (82.155)	lr 0.01366
Train [57][1430/3239]	Time 0.229 (0.608)	Data Time 0.001 (0.028)	Loss 2.7426 (2.5964)	Entropy 1.11566 (1.11709)	Top-1 acc 58.594 (61.572)	Top-5 acc 80.078 (82.149)	lr 0.01366
Train [57][1440/3239]	Time 2.601 (0.607)	Data Time 0.002 (0.028)	Loss 2.7138 (2.5965)	Entropy 1.11566 (1.11708)	Top-1 acc 57.422 (61.577)	Top-5 acc 78.906 (82.147)	lr 0.01366
Train [57][1450/3239]	Time 0.243 (0.605)	Data Time 0.001 (0.028)	Loss 2.7925 (2.5967)	Entropy 1.11561 (1.11707)	Top-1 acc 55.859 (61.569)	Top-5 acc 78.906 (82.145)	lr 0.01366
Train [57][1460/3239]	Time 0.238 (0.604)	Data Time 0.001 (0.027)	Loss 2.4497 (2.5967)	Entropy 1.11558 (1.11706)	Top-1 acc 60.938 (61.569)	Top-5 acc 88.281 (82.146)	lr 0.01366
Train [57][1470/3239]	Time 0.428 (0.635)	Data Time 0.002 (0.027)	Loss 2.4741 (2.5966)	Entropy 1.11556 (1.11705)	Top-1 acc 66.406 (61.584)	Top-5 acc 82.812 (82.145)	lr 0.01366
Train [57][1480/3239]	Time 0.267 (0.634)	Data Time 0.002 (0.027)	Loss 2.7343 (2.5972)	Entropy 1.11555 (1.11704)	Top-1 acc 57.422 (61.577)	Top-5 acc 80.078 (82.133)	lr 0.01366
Train [57][1490/3239]	Time 0.206 (0.633)	Data Time 0.001 (0.027)	Loss 2.6234 (2.5970)	Entropy 1.11554 (1.11703)	Top-1 acc 63.281 (61.584)	Top-5 acc 83.594 (82.138)	lr 0.01366
Train [57][1500/3239]	Time 0.226 (0.632)	Data Time 0.002 (0.027)	Loss 2.6035 (2.5970)	Entropy 1.11546 (1.11702)	Top-1 acc 62.891 (61.588)	Top-5 acc 81.641 (82.137)	lr 0.01366
Train [57][1510/3239]	Time 0.287 (0.631)	Data Time 0.001 (0.027)	Loss 2.6576 (2.5971)	Entropy 1.11545 (1.11701)	Top-1 acc 60.938 (61.587)	Top-5 acc 79.297 (82.135)	lr 0.01365
Train [57][1520/3239]	Time 0.237 (0.630)	Data Time 0.001 (0.026)	Loss 2.5585 (2.5971)	Entropy 1.11544 (1.11700)	Top-1 acc 60.547 (61.589)	Top-5 acc 82.422 (82.136)	lr 0.01365
Train [57][1530/3239]	Time 0.222 (0.629)	Data Time 0.001 (0.026)	Loss 2.6933 (2.5972)	Entropy 1.11544 (1.11699)	Top-1 acc 57.812 (61.588)	Top-5 acc 80.469 (82.133)	lr 0.01365
Train [57][1540/3239]	Time 0.282 (0.628)	Data Time 0.001 (0.026)	Loss 2.4608 (2.5974)	Entropy 1.11534 (1.11698)	Top-1 acc 64.062 (61.578)	Top-5 acc 85.156 (82.127)	lr 0.01365
Train [57][1550/3239]	Time 2.564 (0.627)	Data Time 0.001 (0.026)	Loss 2.6281 (2.5976)	Entropy 1.11534 (1.11697)	Top-1 acc 61.328 (61.571)	Top-5 acc 82.422 (82.121)	lr 0.01365
Train [57][1560/3239]	Time 0.185 (0.624)	Data Time 0.002 (0.026)	Loss 2.4204 (2.5978)	Entropy 1.11533 (1.11696)	Top-1 acc 63.672 (61.557)	Top-5 acc 85.156 (82.118)	lr 0.01365
Train [57][1570/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.026)	Loss 2.7227 (2.5983)	Entropy 1.11529 (1.11695)	Top-1 acc 58.203 (61.548)	Top-5 acc 80.078 (82.107)	lr 0.01365
Train [57][1580/3239]	Time 0.214 (0.622)	Data Time 0.001 (0.025)	Loss 2.5916 (2.5981)	Entropy 1.11524 (1.11694)	Top-1 acc 63.281 (61.555)	Top-5 acc 79.297 (82.110)	lr 0.01365
Train [57][1590/3239]	Time 0.232 (0.621)	Data Time 0.002 (0.025)	Loss 2.6648 (2.5983)	Entropy 1.11524 (1.11693)	Top-1 acc 56.641 (61.540)	Top-5 acc 81.641 (82.109)	lr 0.01365
Train [57][1600/3239]	Time 0.326 (0.620)	Data Time 0.001 (0.025)	Loss 2.4070 (2.5985)	Entropy 1.11522 (1.11692)	Top-1 acc 66.797 (61.544)	Top-5 acc 87.891 (82.109)	lr 0.01365
Train [57][1610/3239]	Time 0.225 (0.619)	Data Time 0.001 (0.025)	Loss 2.6755 (2.5984)	Entropy 1.11527 (1.11691)	Top-1 acc 61.328 (61.549)	Top-5 acc 79.688 (82.105)	lr 0.01364
Train [57][1620/3239]	Time 0.230 (0.618)	Data Time 0.001 (0.025)	Loss 2.5067 (2.5982)	Entropy 1.11525 (1.11690)	Top-1 acc 59.375 (61.556)	Top-5 acc 84.375 (82.106)	lr 0.01364
Train [57][1630/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.025)	Loss 2.6671 (2.5980)	Entropy 1.11525 (1.11689)	Top-1 acc 60.938 (61.561)	Top-5 acc 81.250 (82.111)	lr 0.01364
Train [57][1640/3239]	Time 0.209 (0.616)	Data Time 0.001 (0.025)	Loss 2.6785 (2.5981)	Entropy 1.11526 (1.11688)	Top-1 acc 60.938 (61.557)	Top-5 acc 82.422 (82.106)	lr 0.01364
Train [57][1650/3239]	Time 0.330 (0.615)	Data Time 0.001 (0.024)	Loss 2.6458 (2.5980)	Entropy 1.11527 (1.11687)	Top-1 acc 59.766 (61.558)	Top-5 acc 81.641 (82.111)	lr 0.01364
Train [57][1660/3239]	Time 2.485 (0.615)	Data Time 0.001 (0.024)	Loss 2.6186 (2.5982)	Entropy 1.11527 (1.11686)	Top-1 acc 63.281 (61.558)	Top-5 acc 79.297 (82.106)	lr 0.01364
Train [57][1670/3239]	Time 0.249 (0.612)	Data Time 0.002 (0.024)	Loss 2.5317 (2.5983)	Entropy 1.11527 (1.11685)	Top-1 acc 61.328 (61.551)	Top-5 acc 83.594 (82.107)	lr 0.01364
Train [57][1680/3239]	Time 0.261 (0.611)	Data Time 0.002 (0.024)	Loss 2.5122 (2.5986)	Entropy 1.11525 (1.11684)	Top-1 acc 66.406 (61.543)	Top-5 acc 85.547 (82.100)	lr 0.01364
Train [57][1690/3239]	Time 0.221 (0.611)	Data Time 0.001 (0.024)	Loss 2.6586 (2.5988)	Entropy 1.11522 (1.11683)	Top-1 acc 60.938 (61.537)	Top-5 acc 79.297 (82.098)	lr 0.01364
Train [57][1700/3239]	Time 0.320 (0.610)	Data Time 0.001 (0.024)	Loss 2.4958 (2.5990)	Entropy 1.11524 (1.11682)	Top-1 acc 63.672 (61.527)	Top-5 acc 83.594 (82.093)	lr 0.01364
Train [57][1710/3239]	Time 0.225 (0.609)	Data Time 0.001 (0.024)	Loss 2.6183 (2.5989)	Entropy 1.11524 (1.11681)	Top-1 acc 57.422 (61.529)	Top-5 acc 82.031 (82.093)	lr 0.01363
Train [57][1720/3239]	Time 0.211 (0.608)	Data Time 0.001 (0.024)	Loss 2.4606 (2.5991)	Entropy 1.11520 (1.11680)	Top-1 acc 64.062 (61.520)	Top-5 acc 84.766 (82.092)	lr 0.01363
Train [57][1730/3239]	Time 0.224 (0.607)	Data Time 0.001 (0.023)	Loss 2.7933 (2.5990)	Entropy 1.11520 (1.11679)	Top-1 acc 57.812 (61.522)	Top-5 acc 78.906 (82.098)	lr 0.01363
Train [57][1740/3239]	Time 0.225 (0.606)	Data Time 0.001 (0.023)	Loss 2.6791 (2.5994)	Entropy 1.11521 (1.11678)	Top-1 acc 59.375 (61.513)	Top-5 acc 78.906 (82.091)	lr 0.01363
Train [57][1750/3239]	Time 0.227 (0.606)	Data Time 0.001 (0.023)	Loss 2.7264 (2.5995)	Entropy 1.11522 (1.11677)	Top-1 acc 60.938 (61.513)	Top-5 acc 78.906 (82.087)	lr 0.01363
Train [57][1760/3239]	Time 0.239 (0.605)	Data Time 0.001 (0.023)	Loss 2.5391 (2.5994)	Entropy 1.11523 (1.11677)	Top-1 acc 61.719 (61.516)	Top-5 acc 81.641 (82.088)	lr 0.01363
Train [57][1770/3239]	Time 2.504 (0.604)	Data Time 0.001 (0.023)	Loss 2.6128 (2.5994)	Entropy 1.11523 (1.11676)	Top-1 acc 61.719 (61.516)	Top-5 acc 81.641 (82.085)	lr 0.01363
Train [57][1780/3239]	Time 0.223 (0.602)	Data Time 0.001 (0.023)	Loss 2.7396 (2.5997)	Entropy 1.11518 (1.11675)	Top-1 acc 60.547 (61.511)	Top-5 acc 82.422 (82.079)	lr 0.01363
Train [57][1790/3239]	Time 0.317 (0.601)	Data Time 0.001 (0.023)	Loss 2.4912 (2.5996)	Entropy 1.11519 (1.11674)	Top-1 acc 66.797 (61.515)	Top-5 acc 84.766 (82.077)	lr 0.01363
Train [57][1800/3239]	Time 0.200 (0.601)	Data Time 0.001 (0.023)	Loss 2.7140 (2.5998)	Entropy 1.11517 (1.11673)	Top-1 acc 60.156 (61.510)	Top-5 acc 79.297 (82.071)	lr 0.01363
Train [57][1810/3239]	Time 0.217 (0.600)	Data Time 0.001 (0.022)	Loss 2.8082 (2.5999)	Entropy 1.11512 (1.11672)	Top-1 acc 59.375 (61.511)	Top-5 acc 75.000 (82.069)	lr 0.01362
Train [57][1820/3239]	Time 0.234 (0.599)	Data Time 0.001 (0.022)	Loss 2.4955 (2.6000)	Entropy 1.11505 (1.11671)	Top-1 acc 62.891 (61.507)	Top-5 acc 85.156 (82.070)	lr 0.01362
Train [57][1830/3239]	Time 0.330 (0.623)	Data Time 0.002 (0.022)	Loss 2.5730 (2.6000)	Entropy 1.11507 (1.11670)	Top-1 acc 64.062 (61.508)	Top-5 acc 82.031 (82.069)	lr 0.01362
Train [57][1840/3239]	Time 0.308 (0.623)	Data Time 0.002 (0.022)	Loss 2.6248 (2.6000)	Entropy 1.11505 (1.11669)	Top-1 acc 59.766 (61.507)	Top-5 acc 78.516 (82.069)	lr 0.01362
Train [57][1850/3239]	Time 0.243 (0.622)	Data Time 0.002 (0.022)	Loss 2.5762 (2.5996)	Entropy 1.11503 (1.11669)	Top-1 acc 57.812 (61.509)	Top-5 acc 82.422 (82.075)	lr 0.01362
Train [57][1860/3239]	Time 0.239 (0.621)	Data Time 0.001 (0.022)	Loss 2.6808 (2.5997)	Entropy 1.11495 (1.11668)	Top-1 acc 62.500 (61.510)	Top-5 acc 81.641 (82.072)	lr 0.01362
Train [57][1870/3239]	Time 0.214 (0.621)	Data Time 0.001 (0.022)	Loss 2.7579 (2.5997)	Entropy 1.11494 (1.11667)	Top-1 acc 60.156 (61.502)	Top-5 acc 80.469 (82.074)	lr 0.01362
Train [57][1880/3239]	Time 2.374 (0.620)	Data Time 0.001 (0.022)	Loss 2.6440 (2.5997)	Entropy 1.11494 (1.11666)	Top-1 acc 60.547 (61.496)	Top-5 acc 81.641 (82.072)	lr 0.01362
Train [57][1890/3239]	Time 0.324 (0.618)	Data Time 0.001 (0.022)	Loss 2.5521 (2.5997)	Entropy 1.11489 (1.11665)	Top-1 acc 64.844 (61.499)	Top-5 acc 81.641 (82.071)	lr 0.01362
Train [57][1900/3239]	Time 0.248 (0.617)	Data Time 0.002 (0.021)	Loss 2.6459 (2.5996)	Entropy 1.11489 (1.11664)	Top-1 acc 60.547 (61.498)	Top-5 acc 80.469 (82.073)	lr 0.01362
Train [57][1910/3239]	Time 0.207 (0.616)	Data Time 0.001 (0.021)	Loss 2.6824 (2.5995)	Entropy 1.11485 (1.11663)	Top-1 acc 60.156 (61.502)	Top-5 acc 81.641 (82.074)	lr 0.01361
Train [57][1920/3239]	Time 0.232 (0.615)	Data Time 0.001 (0.021)	Loss 2.4407 (2.5995)	Entropy 1.11484 (1.11662)	Top-1 acc 67.578 (61.502)	Top-5 acc 86.328 (82.073)	lr 0.01361
Train [57][1930/3239]	Time 0.226 (0.615)	Data Time 0.001 (0.021)	Loss 2.7298 (2.5994)	Entropy 1.11487 (1.11661)	Top-1 acc 55.469 (61.506)	Top-5 acc 80.859 (82.080)	lr 0.01361
Train [57][1940/3239]	Time 0.306 (0.614)	Data Time 0.001 (0.021)	Loss 2.5014 (2.5994)	Entropy 1.11486 (1.11660)	Top-1 acc 62.109 (61.505)	Top-5 acc 82.422 (82.076)	lr 0.01361
Train [57][1950/3239]	Time 0.228 (0.613)	Data Time 0.001 (0.021)	Loss 2.5125 (2.5993)	Entropy 1.11483 (1.11659)	Top-1 acc 63.281 (61.503)	Top-5 acc 80.469 (82.077)	lr 0.01361
Train [57][1960/3239]	Time 0.258 (0.612)	Data Time 0.001 (0.021)	Loss 2.5470 (2.5991)	Entropy 1.11484 (1.11659)	Top-1 acc 64.844 (61.505)	Top-5 acc 82.812 (82.084)	lr 0.01361
Train [57][1970/3239]	Time 0.214 (0.611)	Data Time 0.001 (0.021)	Loss 2.6569 (2.5991)	Entropy 1.11485 (1.11658)	Top-1 acc 59.766 (61.501)	Top-5 acc 80.859 (82.083)	lr 0.01361
Train [57][1980/3239]	Time 0.256 (0.611)	Data Time 0.001 (0.021)	Loss 2.5595 (2.5990)	Entropy 1.11483 (1.11657)	Top-1 acc 60.156 (61.502)	Top-5 acc 80.078 (82.086)	lr 0.01361
Train [57][1990/3239]	Time 2.682 (0.610)	Data Time 0.001 (0.021)	Loss 2.6922 (2.5992)	Entropy 1.11483 (1.11656)	Top-1 acc 62.109 (61.500)	Top-5 acc 77.344 (82.078)	lr 0.01361
Train [57][2000/3239]	Time 0.241 (0.608)	Data Time 0.001 (0.020)	Loss 2.6424 (2.5991)	Entropy 1.11484 (1.11655)	Top-1 acc 62.891 (61.502)	Top-5 acc 82.422 (82.082)	lr 0.01361
Train [57][2010/3239]	Time 0.239 (0.607)	Data Time 0.001 (0.020)	Loss 2.6025 (2.5992)	Entropy 1.11478 (1.11654)	Top-1 acc 60.547 (61.503)	Top-5 acc 84.375 (82.080)	lr 0.01360
Train [57][2020/3239]	Time 0.235 (0.607)	Data Time 0.001 (0.020)	Loss 2.5359 (2.5991)	Entropy 1.11474 (1.11653)	Top-1 acc 63.672 (61.501)	Top-5 acc 81.641 (82.076)	lr 0.01360
Train [57][2030/3239]	Time 0.220 (0.606)	Data Time 0.001 (0.020)	Loss 2.5293 (2.5991)	Entropy 1.11475 (1.11652)	Top-1 acc 64.062 (61.498)	Top-5 acc 83.984 (82.077)	lr 0.01360
Train [57][2040/3239]	Time 0.334 (0.605)	Data Time 0.001 (0.020)	Loss 2.7872 (2.5994)	Entropy 1.11476 (1.11652)	Top-1 acc 61.328 (61.490)	Top-5 acc 79.297 (82.077)	lr 0.01360
Train [57][2050/3239]	Time 0.246 (0.605)	Data Time 0.001 (0.020)	Loss 2.6533 (2.5994)	Entropy 1.11469 (1.11651)	Top-1 acc 59.375 (61.489)	Top-5 acc 81.250 (82.073)	lr 0.01360
Train [57][2060/3239]	Time 0.227 (0.604)	Data Time 0.001 (0.020)	Loss 2.6304 (2.5993)	Entropy 1.11466 (1.11650)	Top-1 acc 61.719 (61.491)	Top-5 acc 82.422 (82.078)	lr 0.01360
Train [57][2070/3239]	Time 0.215 (0.603)	Data Time 0.001 (0.020)	Loss 2.6268 (2.5995)	Entropy 1.11461 (1.11649)	Top-1 acc 61.719 (61.493)	Top-5 acc 82.812 (82.075)	lr 0.01360
Train [57][2080/3239]	Time 0.208 (0.602)	Data Time 0.001 (0.020)	Loss 2.7096 (2.5994)	Entropy 1.11457 (1.11648)	Top-1 acc 61.719 (61.493)	Top-5 acc 79.688 (82.076)	lr 0.01360
Train [57][2090/3239]	Time 0.212 (0.602)	Data Time 0.001 (0.020)	Loss 2.6013 (2.5995)	Entropy 1.11456 (1.11647)	Top-1 acc 61.719 (61.490)	Top-5 acc 80.078 (82.073)	lr 0.01360
Train [57][2100/3239]	Time 2.470 (0.601)	Data Time 0.001 (0.020)	Loss 2.5132 (2.5998)	Entropy 1.11456 (1.11646)	Top-1 acc 62.109 (61.479)	Top-5 acc 86.328 (82.070)	lr 0.01360
Train [57][2110/3239]	Time 0.219 (0.599)	Data Time 0.001 (0.020)	Loss 2.6083 (2.5998)	Entropy 1.11453 (1.11645)	Top-1 acc 59.766 (61.477)	Top-5 acc 84.766 (82.072)	lr 0.01359
Train [57][2120/3239]	Time 0.220 (0.599)	Data Time 0.001 (0.019)	Loss 2.6355 (2.5999)	Entropy 1.11449 (1.11644)	Top-1 acc 62.109 (61.470)	Top-5 acc 82.422 (82.072)	lr 0.01359
Train [57][2130/3239]	Time 0.269 (0.598)	Data Time 0.001 (0.019)	Loss 2.5577 (2.5998)	Entropy 1.11440 (1.11643)	Top-1 acc 62.109 (61.474)	Top-5 acc 83.203 (82.073)	lr 0.01359
Train [57][2140/3239]	Time 0.219 (0.598)	Data Time 0.001 (0.019)	Loss 2.5990 (2.5998)	Entropy 1.11441 (1.11642)	Top-1 acc 64.062 (61.478)	Top-5 acc 82.031 (82.074)	lr 0.01359
Train [57][2150/3239]	Time 0.237 (0.597)	Data Time 0.001 (0.019)	Loss 2.5222 (2.5998)	Entropy 1.11426 (1.11641)	Top-1 acc 64.062 (61.473)	Top-5 acc 82.031 (82.072)	lr 0.01359
Train [57][2160/3239]	Time 0.247 (0.596)	Data Time 0.001 (0.019)	Loss 2.4369 (2.5997)	Entropy 1.11422 (1.11640)	Top-1 acc 60.938 (61.475)	Top-5 acc 87.500 (82.078)	lr 0.01359
Train [57][2170/3239]	Time 0.240 (0.596)	Data Time 0.001 (0.019)	Loss 2.5859 (2.5996)	Entropy 1.11419 (1.11639)	Top-1 acc 59.375 (61.472)	Top-5 acc 82.812 (82.081)	lr 0.01359
Train [57][2180/3239]	Time 0.318 (0.595)	Data Time 0.001 (0.019)	Loss 2.4438 (2.5998)	Entropy 1.11416 (1.11638)	Top-1 acc 62.891 (61.467)	Top-5 acc 88.281 (82.082)	lr 0.01359
Train [57][2190/3239]	Time 0.232 (0.616)	Data Time 0.002 (0.019)	Loss 2.6340 (2.5998)	Entropy 1.11412 (1.11637)	Top-1 acc 59.766 (61.468)	Top-5 acc 79.688 (82.079)	lr 0.01359
Train [57][2200/3239]	Time 0.210 (0.615)	Data Time 0.001 (0.019)	Loss 2.4634 (2.6002)	Entropy 1.11412 (1.11636)	Top-1 acc 62.109 (61.458)	Top-5 acc 84.766 (82.072)	lr 0.01359
Train [57][2210/3239]	Time 2.408 (0.614)	Data Time 0.002 (0.019)	Loss 2.7458 (2.6002)	Entropy 1.11412 (1.11635)	Top-1 acc 58.984 (61.455)	Top-5 acc 78.516 (82.071)	lr 0.01358
Train [57][2220/3239]	Time 0.219 (0.612)	Data Time 0.002 (0.019)	Loss 2.7174 (2.6003)	Entropy 1.11412 (1.11634)	Top-1 acc 61.719 (61.460)	Top-5 acc 79.297 (82.068)	lr 0.01358
Train [57][2230/3239]	Time 0.216 (0.612)	Data Time 0.001 (0.019)	Loss 2.7322 (2.6001)	Entropy 1.11410 (1.11633)	Top-1 acc 59.766 (61.460)	Top-5 acc 82.422 (82.073)	lr 0.01358
Train [57][2240/3239]	Time 0.239 (0.611)	Data Time 0.001 (0.018)	Loss 2.7006 (2.6000)	Entropy 1.11409 (1.11632)	Top-1 acc 57.422 (61.459)	Top-5 acc 80.078 (82.074)	lr 0.01358
Train [57][2250/3239]	Time 0.230 (0.610)	Data Time 0.002 (0.018)	Loss 2.4583 (2.5999)	Entropy 1.11405 (1.11631)	Top-1 acc 63.672 (61.457)	Top-5 acc 85.156 (82.078)	lr 0.01358
Train [57][2260/3239]	Time 0.226 (0.610)	Data Time 0.001 (0.018)	Loss 2.5389 (2.5999)	Entropy 1.11403 (1.11630)	Top-1 acc 61.328 (61.454)	Top-5 acc 82.031 (82.080)	lr 0.01358
Train [57][2270/3239]	Time 0.216 (0.609)	Data Time 0.001 (0.018)	Loss 2.8441 (2.6002)	Entropy 1.11402 (1.11629)	Top-1 acc 58.203 (61.448)	Top-5 acc 75.391 (82.073)	lr 0.01358
Train [57][2280/3239]	Time 0.224 (0.609)	Data Time 0.001 (0.018)	Loss 2.6197 (2.6003)	Entropy 1.11395 (1.11628)	Top-1 acc 62.500 (61.441)	Top-5 acc 83.984 (82.069)	lr 0.01358
Train [57][2290/3239]	Time 0.242 (0.608)	Data Time 0.001 (0.018)	Loss 2.4835 (2.6004)	Entropy 1.11394 (1.11627)	Top-1 acc 65.234 (61.442)	Top-5 acc 83.594 (82.064)	lr 0.01358
Train [57][2300/3239]	Time 0.217 (0.607)	Data Time 0.001 (0.018)	Loss 2.5946 (2.6006)	Entropy 1.11396 (1.11626)	Top-1 acc 60.156 (61.438)	Top-5 acc 80.469 (82.061)	lr 0.01358
Train [57][2310/3239]	Time 0.260 (0.607)	Data Time 0.001 (0.018)	Loss 2.8438 (2.6005)	Entropy 1.11395 (1.11625)	Top-1 acc 58.203 (61.438)	Top-5 acc 77.344 (82.066)	lr 0.01357
Train [57][2320/3239]	Time 2.816 (0.606)	Data Time 0.001 (0.018)	Loss 2.6533 (2.6005)	Entropy 1.11395 (1.11624)	Top-1 acc 60.938 (61.438)	Top-5 acc 79.688 (82.067)	lr 0.01357
Train [57][2330/3239]	Time 0.233 (0.605)	Data Time 0.001 (0.018)	Loss 2.7164 (2.6007)	Entropy 1.11394 (1.11623)	Top-1 acc 62.891 (61.436)	Top-5 acc 78.906 (82.065)	lr 0.01357
Train [57][2340/3239]	Time 0.223 (0.604)	Data Time 0.001 (0.018)	Loss 2.7296 (2.6009)	Entropy 1.11395 (1.11622)	Top-1 acc 56.250 (61.432)	Top-5 acc 78.125 (82.060)	lr 0.01357
Train [57][2350/3239]	Time 0.227 (0.603)	Data Time 0.001 (0.018)	Loss 2.6322 (2.6009)	Entropy 1.11385 (1.11621)	Top-1 acc 60.547 (61.431)	Top-5 acc 82.812 (82.061)	lr 0.01357
Train [57][2360/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.018)	Loss 2.6786 (2.6011)	Entropy 1.11384 (1.11620)	Top-1 acc 60.547 (61.425)	Top-5 acc 80.078 (82.058)	lr 0.01357
Train [57][2370/3239]	Time 0.235 (0.602)	Data Time 0.002 (0.018)	Loss 2.7539 (2.6012)	Entropy 1.11380 (1.11619)	Top-1 acc 57.422 (61.423)	Top-5 acc 80.859 (82.057)	lr 0.01357
Train [57][2380/3239]	Time 0.222 (0.602)	Data Time 0.001 (0.017)	Loss 2.5900 (2.6012)	Entropy 1.11378 (1.11618)	Top-1 acc 63.281 (61.423)	Top-5 acc 82.031 (82.057)	lr 0.01357
Train [57][2390/3239]	Time 0.196 (0.601)	Data Time 0.001 (0.017)	Loss 2.5541 (2.6014)	Entropy 1.11373 (1.11617)	Top-1 acc 61.328 (61.416)	Top-5 acc 82.031 (82.052)	lr 0.01357
Train [57][2400/3239]	Time 0.221 (0.601)	Data Time 0.001 (0.017)	Loss 2.5798 (2.6014)	Entropy 1.11369 (1.11616)	Top-1 acc 60.938 (61.413)	Top-5 acc 85.156 (82.052)	lr 0.01357
Train [57][2410/3239]	Time 0.218 (0.600)	Data Time 0.001 (0.017)	Loss 2.6910 (2.6014)	Entropy 1.11367 (1.11615)	Top-1 acc 60.156 (61.414)	Top-5 acc 79.297 (82.046)	lr 0.01356
Train [57][2420/3239]	Time 0.213 (0.600)	Data Time 0.001 (0.017)	Loss 2.6357 (2.6015)	Entropy 1.11364 (1.11614)	Top-1 acc 60.156 (61.412)	Top-5 acc 81.641 (82.044)	lr 0.01356
Train [57][2430/3239]	Time 2.462 (0.599)	Data Time 0.001 (0.017)	Loss 2.5107 (2.6017)	Entropy 1.11364 (1.11613)	Top-1 acc 63.281 (61.403)	Top-5 acc 83.203 (82.039)	lr 0.01356
Train [57][2440/3239]	Time 0.263 (0.598)	Data Time 0.002 (0.017)	Loss 2.5562 (2.6017)	Entropy 1.11364 (1.11612)	Top-1 acc 62.500 (61.402)	Top-5 acc 83.203 (82.041)	lr 0.01356
Train [57][2450/3239]	Time 0.226 (0.597)	Data Time 0.002 (0.017)	Loss 2.5961 (2.6018)	Entropy 1.11361 (1.11611)	Top-1 acc 62.500 (61.404)	Top-5 acc 83.203 (82.043)	lr 0.01356
Train [57][2460/3239]	Time 0.366 (0.597)	Data Time 0.001 (0.017)	Loss 2.6622 (2.6018)	Entropy 1.11359 (1.11610)	Top-1 acc 60.938 (61.401)	Top-5 acc 81.250 (82.045)	lr 0.01356
Train [57][2470/3239]	Time 0.161 (0.596)	Data Time 0.001 (0.017)	Loss 2.5809 (2.6020)	Entropy 1.11357 (1.11609)	Top-1 acc 63.281 (61.397)	Top-5 acc 81.250 (82.041)	lr 0.01356
Train [57][2480/3239]	Time 0.208 (0.596)	Data Time 0.001 (0.017)	Loss 2.5453 (2.6022)	Entropy 1.11354 (1.11608)	Top-1 acc 63.672 (61.389)	Top-5 acc 83.984 (82.036)	lr 0.01356
Train [57][2490/3239]	Time 0.220 (0.595)	Data Time 0.001 (0.017)	Loss 2.6597 (2.6022)	Entropy 1.11346 (1.11607)	Top-1 acc 62.500 (61.388)	Top-5 acc 81.641 (82.034)	lr 0.01356
Train [57][2500/3239]	Time 0.221 (0.595)	Data Time 0.001 (0.017)	Loss 2.6988 (2.6023)	Entropy 1.11344 (1.11606)	Top-1 acc 59.375 (61.382)	Top-5 acc 80.078 (82.034)	lr 0.01356
Train [57][2510/3239]	Time 0.367 (0.594)	Data Time 0.001 (0.017)	Loss 2.6338 (2.6024)	Entropy 1.11344 (1.11605)	Top-1 acc 60.156 (61.379)	Top-5 acc 81.250 (82.033)	lr 0.01355
Train [57][2520/3239]	Time 0.229 (0.594)	Data Time 0.001 (0.017)	Loss 2.7299 (2.6024)	Entropy 1.11341 (1.11604)	Top-1 acc 60.938 (61.381)	Top-5 acc 78.125 (82.030)	lr 0.01355
Train [57][2530/3239]	Time 0.231 (0.593)	Data Time 0.001 (0.017)	Loss 2.7170 (2.6025)	Entropy 1.11322 (1.11603)	Top-1 acc 59.375 (61.379)	Top-5 acc 81.250 (82.027)	lr 0.01355
Train [57][2540/3239]	Time 2.507 (0.593)	Data Time 0.001 (0.016)	Loss 2.5366 (2.6025)	Entropy 1.11322 (1.11602)	Top-1 acc 65.234 (61.380)	Top-5 acc 83.203 (82.025)	lr 0.01355
Train [57][2550/3239]	Time 0.220 (0.591)	Data Time 0.001 (0.016)	Loss 2.6983 (2.6023)	Entropy 1.11322 (1.11601)	Top-1 acc 60.547 (61.384)	Top-5 acc 82.422 (82.028)	lr 0.01355
Train [57][2560/3239]	Time 0.293 (0.611)	Data Time 0.002 (0.016)	Loss 2.6644 (2.6024)	Entropy 1.11310 (1.11599)	Top-1 acc 61.328 (61.382)	Top-5 acc 82.422 (82.028)	lr 0.01355
Train [57][2570/3239]	Time 0.231 (0.610)	Data Time 0.002 (0.016)	Loss 2.6064 (2.6026)	Entropy 1.11309 (1.11598)	Top-1 acc 63.281 (61.379)	Top-5 acc 81.250 (82.026)	lr 0.01355
Train [57][2580/3239]	Time 0.230 (0.609)	Data Time 0.002 (0.016)	Loss 2.5856 (2.6025)	Entropy 1.11307 (1.11597)	Top-1 acc 65.234 (61.380)	Top-5 acc 80.078 (82.029)	lr 0.01355
Train [57][2590/3239]	Time 0.240 (0.609)	Data Time 0.002 (0.016)	Loss 2.5944 (2.6024)	Entropy 1.11306 (1.11596)	Top-1 acc 60.547 (61.383)	Top-5 acc 82.031 (82.031)	lr 0.01355
Train [57][2600/3239]	Time 0.242 (0.608)	Data Time 0.001 (0.016)	Loss 2.7030 (2.6023)	Entropy 1.11304 (1.11595)	Top-1 acc 55.078 (61.380)	Top-5 acc 82.031 (82.032)	lr 0.01355
Train [57][2610/3239]	Time 0.221 (0.608)	Data Time 0.001 (0.016)	Loss 2.6343 (2.6024)	Entropy 1.11304 (1.11594)	Top-1 acc 60.547 (61.373)	Top-5 acc 80.078 (82.030)	lr 0.01354
Train [57][2620/3239]	Time 0.231 (0.607)	Data Time 0.001 (0.016)	Loss 2.7121 (2.6027)	Entropy 1.11305 (1.11593)	Top-1 acc 57.031 (61.369)	Top-5 acc 80.469 (82.025)	lr 0.01354
Train [57][2630/3239]	Time 0.226 (0.607)	Data Time 0.001 (0.016)	Loss 2.6846 (2.6027)	Entropy 1.11308 (1.11592)	Top-1 acc 58.594 (61.367)	Top-5 acc 82.812 (82.025)	lr 0.01354
Train [57][2640/3239]	Time 0.219 (0.606)	Data Time 0.001 (0.016)	Loss 2.5981 (2.6027)	Entropy 1.11313 (1.11591)	Top-1 acc 57.031 (61.367)	Top-5 acc 81.250 (82.026)	lr 0.01354
Train [57][2650/3239]	Time 0.298 (0.606)	Data Time 0.001 (0.016)	Loss 2.5316 (2.6026)	Entropy 1.11311 (1.11590)	Top-1 acc 62.891 (61.366)	Top-5 acc 84.766 (82.030)	lr 0.01354
Train [57][2660/3239]	Time 0.224 (0.605)	Data Time 0.001 (0.016)	Loss 2.7625 (2.6026)	Entropy 1.11312 (1.11589)	Top-1 acc 53.906 (61.366)	Top-5 acc 78.906 (82.036)	lr 0.01354
Train [57][2670/3239]	Time 0.243 (0.605)	Data Time 0.001 (0.016)	Loss 2.5650 (2.6024)	Entropy 1.11313 (1.11587)	Top-1 acc 62.500 (61.366)	Top-5 acc 82.031 (82.039)	lr 0.01354
Train [57][2680/3239]	Time 0.269 (0.604)	Data Time 0.001 (0.016)	Loss 2.6345 (2.6025)	Entropy 1.11317 (1.11586)	Top-1 acc 61.719 (61.365)	Top-5 acc 82.031 (82.038)	lr 0.01354
Train [57][2690/3239]	Time 0.331 (0.604)	Data Time 0.001 (0.016)	Loss 2.7013 (2.6025)	Entropy 1.11312 (1.11585)	Top-1 acc 58.203 (61.367)	Top-5 acc 84.375 (82.040)	lr 0.01354
Train [57][2700/3239]	Time 0.222 (0.603)	Data Time 0.001 (0.016)	Loss 2.5870 (2.6026)	Entropy 1.11313 (1.11584)	Top-1 acc 62.109 (61.364)	Top-5 acc 80.859 (82.040)	lr 0.01354
Train [57][2710/3239]	Time 0.240 (0.603)	Data Time 0.001 (0.016)	Loss 2.5354 (2.6026)	Entropy 1.11304 (1.11583)	Top-1 acc 62.891 (61.367)	Top-5 acc 84.375 (82.038)	lr 0.01353
Train [57][2720/3239]	Time 0.224 (0.602)	Data Time 0.002 (0.016)	Loss 2.6727 (2.6026)	Entropy 1.11295 (1.11582)	Top-1 acc 60.938 (61.368)	Top-5 acc 79.688 (82.038)	lr 0.01353
Train [57][2730/3239]	Time 0.240 (0.602)	Data Time 0.001 (0.015)	Loss 2.6790 (2.6024)	Entropy 1.11290 (1.11581)	Top-1 acc 59.766 (61.375)	Top-5 acc 81.250 (82.041)	lr 0.01353
Train [57][2740/3239]	Time 0.358 (0.601)	Data Time 0.001 (0.015)	Loss 2.5773 (2.6023)	Entropy 1.11288 (1.11580)	Top-1 acc 61.328 (61.377)	Top-5 acc 80.469 (82.044)	lr 0.01353
Train [57][2750/3239]	Time 0.247 (0.600)	Data Time 0.001 (0.015)	Loss 2.6333 (2.6023)	Entropy 1.11286 (1.11579)	Top-1 acc 60.156 (61.377)	Top-5 acc 79.688 (82.043)	lr 0.01353
Train [57][2760/3239]	Time 0.219 (0.600)	Data Time 0.001 (0.015)	Loss 2.6516 (2.6023)	Entropy 1.11288 (1.11578)	Top-1 acc 63.281 (61.382)	Top-5 acc 80.469 (82.044)	lr 0.01353
Train [57][2770/3239]	Time 0.264 (0.600)	Data Time 0.002 (0.015)	Loss 2.6764 (2.6023)	Entropy 1.11286 (1.11577)	Top-1 acc 54.688 (61.379)	Top-5 acc 80.469 (82.044)	lr 0.01353
Train [57][2780/3239]	Time 0.224 (0.599)	Data Time 0.002 (0.015)	Loss 2.6062 (2.6024)	Entropy 1.11281 (1.11576)	Top-1 acc 62.109 (61.377)	Top-5 acc 82.031 (82.040)	lr 0.01353
Train [57][2790/3239]	Time 0.241 (0.599)	Data Time 0.001 (0.015)	Loss 2.6900 (2.6025)	Entropy 1.11280 (1.11575)	Top-1 acc 61.328 (61.376)	Top-5 acc 79.688 (82.040)	lr 0.01353
Train [57][2800/3239]	Time 0.223 (0.598)	Data Time 0.001 (0.015)	Loss 2.5410 (2.6025)	Entropy 1.11281 (1.11574)	Top-1 acc 59.766 (61.378)	Top-5 acc 84.375 (82.040)	lr 0.01353
Train [57][2810/3239]	Time 0.234 (0.598)	Data Time 0.001 (0.015)	Loss 2.6682 (2.6029)	Entropy 1.11275 (1.11573)	Top-1 acc 62.500 (61.370)	Top-5 acc 80.859 (82.032)	lr 0.01352
Train [57][2820/3239]	Time 0.277 (0.597)	Data Time 0.001 (0.015)	Loss 2.6900 (2.6030)	Entropy 1.11270 (1.11572)	Top-1 acc 58.984 (61.366)	Top-5 acc 79.297 (82.029)	lr 0.01352
Train [57][2830/3239]	Time 0.257 (0.597)	Data Time 0.001 (0.015)	Loss 2.6093 (2.6031)	Entropy 1.11269 (1.11571)	Top-1 acc 64.453 (61.366)	Top-5 acc 81.250 (82.027)	lr 0.01352
Train [57][2840/3239]	Time 0.272 (0.596)	Data Time 0.001 (0.015)	Loss 2.4301 (2.6031)	Entropy 1.11257 (1.11570)	Top-1 acc 64.062 (61.364)	Top-5 acc 87.500 (82.028)	lr 0.01352
Train [57][2850/3239]	Time 0.221 (0.596)	Data Time 0.001 (0.015)	Loss 2.9154 (2.6032)	Entropy 1.11247 (1.11569)	Top-1 acc 52.344 (61.365)	Top-5 acc 77.344 (82.025)	lr 0.01352
Train [57][2860/3239]	Time 0.234 (0.595)	Data Time 0.001 (0.015)	Loss 2.6149 (2.6032)	Entropy 1.11238 (1.11568)	Top-1 acc 62.109 (61.365)	Top-5 acc 80.859 (82.028)	lr 0.01352
Train [57][2870/3239]	Time 0.226 (0.595)	Data Time 0.001 (0.015)	Loss 2.6031 (2.6032)	Entropy 1.11235 (1.11566)	Top-1 acc 63.281 (61.363)	Top-5 acc 82.031 (82.028)	lr 0.01352
Train [57][2880/3239]	Time 0.359 (0.594)	Data Time 0.002 (0.015)	Loss 2.7957 (2.6033)	Entropy 1.11229 (1.11565)	Top-1 acc 53.516 (61.361)	Top-5 acc 79.688 (82.027)	lr 0.01352
Train [57][2890/3239]	Time 0.281 (0.610)	Data Time 0.004 (0.015)	Loss 2.4471 (2.6033)	Entropy 1.11222 (1.11564)	Top-1 acc 64.844 (61.363)	Top-5 acc 84.375 (82.027)	lr 0.01352
Train [57][2900/3239]	Time 0.224 (0.610)	Data Time 0.002 (0.015)	Loss 2.4783 (2.6033)	Entropy 1.11223 (1.11563)	Top-1 acc 64.453 (61.367)	Top-5 acc 83.594 (82.024)	lr 0.01351
Train [57][2910/3239]	Time 0.236 (0.609)	Data Time 0.001 (0.015)	Loss 2.6678 (2.6032)	Entropy 1.11225 (1.11562)	Top-1 acc 57.812 (61.370)	Top-5 acc 80.469 (82.025)	lr 0.01351
Train [57][2920/3239]	Time 0.253 (0.609)	Data Time 0.001 (0.015)	Loss 2.5502 (2.6033)	Entropy 1.11222 (1.11561)	Top-1 acc 62.500 (61.367)	Top-5 acc 83.594 (82.023)	lr 0.01351
Train [57][2930/3239]	Time 0.239 (0.608)	Data Time 0.001 (0.015)	Loss 2.5872 (2.6033)	Entropy 1.11203 (1.11559)	Top-1 acc 58.594 (61.365)	Top-5 acc 80.469 (82.023)	lr 0.01351
Train [57][2940/3239]	Time 0.220 (0.608)	Data Time 0.002 (0.014)	Loss 2.5380 (2.6034)	Entropy 1.11204 (1.11558)	Top-1 acc 63.281 (61.362)	Top-5 acc 80.078 (82.019)	lr 0.01351
Train [57][2950/3239]	Time 0.213 (0.607)	Data Time 0.001 (0.014)	Loss 2.4887 (2.6033)	Entropy 1.11205 (1.11557)	Top-1 acc 64.062 (61.363)	Top-5 acc 86.328 (82.024)	lr 0.01351
Train [57][2960/3239]	Time 0.206 (0.607)	Data Time 0.001 (0.014)	Loss 2.5862 (2.6033)	Entropy 1.11203 (1.11556)	Top-1 acc 63.281 (61.363)	Top-5 acc 80.469 (82.019)	lr 0.01351
Train [57][2970/3239]	Time 0.259 (0.606)	Data Time 0.001 (0.014)	Loss 2.7033 (2.6036)	Entropy 1.11198 (1.11555)	Top-1 acc 55.859 (61.355)	Top-5 acc 78.906 (82.014)	lr 0.01351
Train [57][2980/3239]	Time 0.219 (0.606)	Data Time 0.001 (0.014)	Loss 2.6126 (2.6034)	Entropy 1.11203 (1.11553)	Top-1 acc 63.281 (61.360)	Top-5 acc 80.859 (82.017)	lr 0.01351
Train [57][2990/3239]	Time 0.215 (0.605)	Data Time 0.001 (0.014)	Loss 2.6899 (2.6035)	Entropy 1.11199 (1.11552)	Top-1 acc 57.422 (61.356)	Top-5 acc 80.859 (82.016)	lr 0.01351
Train [57][3000/3239]	Time 0.259 (0.605)	Data Time 0.001 (0.014)	Loss 2.6040 (2.6036)	Entropy 1.11199 (1.11551)	Top-1 acc 60.547 (61.351)	Top-5 acc 80.469 (82.014)	lr 0.01350
Train [57][3010/3239]	Time 0.249 (0.604)	Data Time 0.001 (0.014)	Loss 2.7055 (2.6037)	Entropy 1.11198 (1.11550)	Top-1 acc 61.719 (61.350)	Top-5 acc 82.031 (82.012)	lr 0.01350
Train [57][3020/3239]	Time 0.213 (0.604)	Data Time 0.001 (0.014)	Loss 2.5442 (2.6038)	Entropy 1.11198 (1.11549)	Top-1 acc 64.062 (61.349)	Top-5 acc 82.031 (82.011)	lr 0.01350
Train [57][3030/3239]	Time 0.231 (0.603)	Data Time 0.001 (0.014)	Loss 2.5934 (2.6038)	Entropy 1.11197 (1.11548)	Top-1 acc 60.547 (61.349)	Top-5 acc 82.031 (82.012)	lr 0.01350
Train [57][3040/3239]	Time 0.224 (0.603)	Data Time 0.002 (0.014)	Loss 2.6374 (2.6039)	Entropy 1.11195 (1.11546)	Top-1 acc 62.109 (61.345)	Top-5 acc 81.641 (82.012)	lr 0.01350
Train [57][3050/3239]	Time 0.224 (0.602)	Data Time 0.001 (0.014)	Loss 2.4875 (2.6039)	Entropy 1.11195 (1.11545)	Top-1 acc 65.234 (61.342)	Top-5 acc 84.375 (82.012)	lr 0.01350
Train [57][3060/3239]	Time 0.323 (0.602)	Data Time 0.001 (0.014)	Loss 2.6635 (2.6040)	Entropy 1.11195 (1.11544)	Top-1 acc 59.375 (61.340)	Top-5 acc 82.031 (82.009)	lr 0.01350
Train [57][3070/3239]	Time 0.250 (0.602)	Data Time 0.001 (0.014)	Loss 2.5981 (2.6040)	Entropy 1.11193 (1.11543)	Top-1 acc 64.062 (61.342)	Top-5 acc 80.078 (82.010)	lr 0.01350
Train [57][3080/3239]	Time 0.253 (0.601)	Data Time 0.001 (0.014)	Loss 2.6798 (2.6040)	Entropy 1.11195 (1.11542)	Top-1 acc 58.984 (61.342)	Top-5 acc 78.906 (82.009)	lr 0.01350
Train [57][3090/3239]	Time 0.221 (0.601)	Data Time 0.002 (0.014)	Loss 2.6767 (2.6043)	Entropy 1.11188 (1.11541)	Top-1 acc 61.719 (61.336)	Top-5 acc 79.688 (82.004)	lr 0.01350
Train [57][3100/3239]	Time 0.230 (0.600)	Data Time 0.001 (0.014)	Loss 2.4803 (2.6043)	Entropy 1.11186 (1.11540)	Top-1 acc 65.625 (61.336)	Top-5 acc 84.375 (82.005)	lr 0.01349
Train [57][3110/3239]	Time 0.380 (0.600)	Data Time 0.001 (0.014)	Loss 2.6872 (2.6046)	Entropy 1.11186 (1.11538)	Top-1 acc 55.859 (61.328)	Top-5 acc 81.250 (82.001)	lr 0.01349
Train [57][3120/3239]	Time 0.222 (0.599)	Data Time 0.001 (0.014)	Loss 2.8363 (2.6046)	Entropy 1.11183 (1.11537)	Top-1 acc 57.422 (61.327)	Top-5 acc 76.953 (82.001)	lr 0.01349
Train [57][3130/3239]	Time 0.249 (0.599)	Data Time 0.001 (0.014)	Loss 2.5537 (2.6046)	Entropy 1.11181 (1.11536)	Top-1 acc 61.719 (61.327)	Top-5 acc 82.422 (82.000)	lr 0.01349
Train [57][3140/3239]	Time 0.255 (0.599)	Data Time 0.001 (0.014)	Loss 2.7283 (2.6049)	Entropy 1.11182 (1.11535)	Top-1 acc 60.547 (61.323)	Top-5 acc 77.734 (81.996)	lr 0.01349
Train [57][3150/3239]	Time 0.272 (0.598)	Data Time 0.001 (0.014)	Loss 2.6285 (2.6049)	Entropy 1.11170 (1.11534)	Top-1 acc 57.812 (61.318)	Top-5 acc 82.422 (81.996)	lr 0.01349
Train [57][3160/3239]	Time 0.318 (0.598)	Data Time 0.001 (0.014)	Loss 2.4370 (2.6049)	Entropy 1.11160 (1.11533)	Top-1 acc 66.797 (61.321)	Top-5 acc 83.984 (81.999)	lr 0.01349
Train [57][3170/3239]	Time 0.256 (0.597)	Data Time 0.001 (0.014)	Loss 2.6415 (2.6049)	Entropy 1.11152 (1.11532)	Top-1 acc 58.984 (61.321)	Top-5 acc 80.469 (82.000)	lr 0.01349
Train [57][3180/3239]	Time 0.230 (0.597)	Data Time 0.000 (0.014)	Loss 2.6026 (2.6049)	Entropy 1.11152 (1.11530)	Top-1 acc 61.719 (61.320)	Top-5 acc 82.812 (82.000)	lr 0.01349
Train [57][3190/3239]	Time 0.226 (0.596)	Data Time 0.000 (0.013)	Loss 2.5670 (2.6049)	Entropy 1.11151 (1.11529)	Top-1 acc 62.500 (61.320)	Top-5 acc 80.859 (82.000)	lr 0.01349
Train [57][3200/3239]	Time 0.217 (0.596)	Data Time 0.000 (0.013)	Loss 2.5795 (2.6048)	Entropy 1.11157 (1.11528)	Top-1 acc 58.203 (61.319)	Top-5 acc 82.422 (82.003)	lr 0.01348
Train [57][3210/3239]	Time 0.318 (0.595)	Data Time 0.000 (0.013)	Loss 2.4943 (2.6050)	Entropy 1.11159 (1.11527)	Top-1 acc 63.281 (61.315)	Top-5 acc 85.547 (82.001)	lr 0.01348
Train [57][3220/3239]	Time 0.289 (0.609)	Data Time 0.000 (0.013)	Loss 2.5940 (2.6052)	Entropy 1.11157 (1.11526)	Top-1 acc 62.109 (61.309)	Top-5 acc 81.250 (81.998)	lr 0.01348
Train [57][3230/3239]	Time 0.241 (0.609)	Data Time 0.000 (0.013)	Loss 2.8564 (2.6054)	Entropy 1.11155 (1.11525)	Top-1 acc 54.688 (61.305)	Top-5 acc 78.516 (81.993)	lr 0.01348
Train [57][3239/3239]	Time 2.319 (0.608)	Data Time 0.000 (0.013)	Loss 2.6405 (2.6053)	Entropy 1.11155 (1.11524)	Top-1 acc 58.025 (61.307)	Top-5 acc 81.481 (81.994)	lr 0.01348
==========Valid [57/120]	loss 1.487	top-1 acc 66.377 (66.421)	top-5 acc 86.258	Train top-1 61.307	top-5 81.994	Entropy 1.11155	Latency-None: 0.000ms	Flops: 548.34M
Train [58][0/3239]	Time 34.508 (34.508)	Data Time 33.110 (33.110)	Loss 2.5177 (2.5177)	Entropy 1.11152 (1.11152)	Top-1 acc 57.031 (57.031)	Top-5 acc 84.375 (84.375)	lr 0.01348
Train [58][10/3239]	Time 2.641 (3.769)	Data Time 0.002 (3.100)	Loss 2.6104 (2.5557)	Entropy 1.11152 (1.11152)	Top-1 acc 66.016 (63.246)	Top-5 acc 80.469 (82.741)	lr 0.01348
Train [58][20/3239]	Time 0.259 (2.096)	Data Time 0.002 (1.624)	Loss 2.3488 (2.5369)	Entropy 1.11150 (1.11151)	Top-1 acc 67.969 (63.170)	Top-5 acc 86.328 (82.980)	lr 0.01348
Train [58][30/3239]	Time 0.217 (1.570)	Data Time 0.001 (1.101)	Loss 2.6773 (2.5348)	Entropy 1.11147 (1.11150)	Top-1 acc 60.547 (62.966)	Top-5 acc 82.422 (83.140)	lr 0.01348
Train [58][40/3239]	Time 0.242 (1.304)	Data Time 0.002 (0.833)	Loss 2.7983 (2.5523)	Entropy 1.11147 (1.11149)	Top-1 acc 52.734 (62.519)	Top-5 acc 81.641 (83.032)	lr 0.01348
Train [58][50/3239]	Time 0.197 (1.139)	Data Time 0.002 (0.670)	Loss 2.4904 (2.5588)	Entropy 1.11149 (1.11149)	Top-1 acc 64.062 (62.163)	Top-5 acc 82.812 (82.851)	lr 0.01348
Train [58][60/3239]	Time 0.324 (1.032)	Data Time 0.001 (0.560)	Loss 2.7903 (2.5593)	Entropy 1.11148 (1.11149)	Top-1 acc 57.422 (62.282)	Top-5 acc 80.078 (82.896)	lr 0.01347
Train [58][70/3239]	Time 0.230 (0.954)	Data Time 0.001 (0.482)	Loss 2.4974 (2.5596)	Entropy 1.11145 (1.11149)	Top-1 acc 63.281 (62.412)	Top-5 acc 85.938 (83.005)	lr 0.01347
Train [58][80/3239]	Time 0.246 (0.895)	Data Time 0.001 (0.422)	Loss 2.7340 (2.5658)	Entropy 1.11143 (1.11148)	Top-1 acc 60.938 (62.346)	Top-5 acc 77.344 (82.890)	lr 0.01347
Train [58][90/3239]	Time 0.278 (0.848)	Data Time 0.001 (0.376)	Loss 2.4595 (2.5719)	Entropy 1.11133 (1.11147)	Top-1 acc 66.016 (62.200)	Top-5 acc 85.547 (82.735)	lr 0.01347
Train [58][100/3239]	Time 0.247 (0.811)	Data Time 0.029 (0.339)	Loss 2.4653 (2.5702)	Entropy 1.11137 (1.11146)	Top-1 acc 62.500 (62.164)	Top-5 acc 86.328 (82.754)	lr 0.01347
Train [58][110/3239]	Time 0.229 (0.779)	Data Time 0.001 (0.309)	Loss 2.6711 (2.5731)	Entropy 1.11127 (1.11145)	Top-1 acc 58.203 (62.141)	Top-5 acc 79.297 (82.651)	lr 0.01347
Train [58][120/3239]	Time 2.415 (0.753)	Data Time 0.002 (0.283)	Loss 2.6143 (2.5698)	Entropy 1.11127 (1.11144)	Top-1 acc 57.422 (62.177)	Top-5 acc 80.859 (82.670)	lr 0.01347
Train [58][130/3239]	Time 0.220 (0.714)	Data Time 0.001 (0.262)	Loss 2.7202 (2.5761)	Entropy 1.11125 (1.11142)	Top-1 acc 57.031 (61.984)	Top-5 acc 82.422 (82.526)	lr 0.01347
Train [58][140/3239]	Time 0.228 (0.696)	Data Time 0.001 (0.243)	Loss 2.6386 (2.5786)	Entropy 1.11124 (1.11141)	Top-1 acc 57.812 (61.841)	Top-5 acc 83.594 (82.544)	lr 0.01347
Train [58][150/3239]	Time 0.210 (0.681)	Data Time 0.001 (0.227)	Loss 2.7079 (2.5779)	Entropy 1.11124 (1.11140)	Top-1 acc 59.375 (61.933)	Top-5 acc 79.688 (82.536)	lr 0.01347
Train [58][160/3239]	Time 0.245 (0.668)	Data Time 0.002 (0.213)	Loss 2.4535 (2.5788)	Entropy 1.11121 (1.11139)	Top-1 acc 66.797 (61.932)	Top-5 acc 84.375 (82.504)	lr 0.01346
Train [58][170/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.201)	Loss 2.6906 (2.5788)	Entropy 1.11120 (1.11138)	Top-1 acc 60.938 (61.936)	Top-5 acc 79.297 (82.511)	lr 0.01346
Train [58][180/3239]	Time 0.224 (0.647)	Data Time 0.001 (0.190)	Loss 2.6373 (2.5821)	Entropy 1.11119 (1.11137)	Top-1 acc 64.453 (61.883)	Top-5 acc 80.859 (82.482)	lr 0.01346
Train [58][190/3239]	Time 0.219 (0.637)	Data Time 0.001 (0.180)	Loss 2.6080 (2.5806)	Entropy 1.11119 (1.11136)	Top-1 acc 64.844 (61.933)	Top-5 acc 82.812 (82.498)	lr 0.01346
Train [58][200/3239]	Time 0.314 (0.629)	Data Time 0.001 (0.171)	Loss 2.5199 (2.5803)	Entropy 1.11116 (1.11135)	Top-1 acc 58.203 (61.899)	Top-5 acc 85.156 (82.509)	lr 0.01346
Train [58][210/3239]	Time 0.216 (0.621)	Data Time 0.001 (0.163)	Loss 2.5655 (2.5782)	Entropy 1.11113 (1.11134)	Top-1 acc 60.156 (62.017)	Top-5 acc 83.984 (82.544)	lr 0.01346
Train [58][220/3239]	Time 0.266 (0.615)	Data Time 0.001 (0.156)	Loss 2.5493 (2.5786)	Entropy 1.11109 (1.11133)	Top-1 acc 59.375 (62.000)	Top-5 acc 83.594 (82.519)	lr 0.01346
Train [58][230/3239]	Time 2.594 (0.609)	Data Time 0.002 (0.149)	Loss 2.5437 (2.5770)	Entropy 1.11109 (1.11132)	Top-1 acc 62.109 (62.040)	Top-5 acc 82.812 (82.598)	lr 0.01346
Train [58][240/3239]	Time 0.307 (0.594)	Data Time 0.002 (0.143)	Loss 2.6628 (2.5776)	Entropy 1.11107 (1.11131)	Top-1 acc 58.203 (61.996)	Top-5 acc 81.250 (82.561)	lr 0.01346
Train [58][250/3239]	Time 0.336 (0.589)	Data Time 0.002 (0.137)	Loss 2.6032 (2.5783)	Entropy 1.11108 (1.11130)	Top-1 acc 64.453 (61.974)	Top-5 acc 80.469 (82.543)	lr 0.01346
Train [58][260/3239]	Time 0.220 (0.585)	Data Time 0.001 (0.132)	Loss 2.7061 (2.5773)	Entropy 1.11109 (1.11129)	Top-1 acc 57.031 (62.020)	Top-5 acc 78.516 (82.525)	lr 0.01345
Train [58][270/3239]	Time 0.216 (0.580)	Data Time 0.001 (0.127)	Loss 2.5186 (2.5798)	Entropy 1.11111 (1.11129)	Top-1 acc 64.844 (61.993)	Top-5 acc 83.984 (82.484)	lr 0.01345
Train [58][280/3239]	Time 0.230 (0.576)	Data Time 0.001 (0.123)	Loss 3.0110 (2.5818)	Entropy 1.11105 (1.11128)	Top-1 acc 52.344 (61.947)	Top-5 acc 72.656 (82.455)	lr 0.01345
Train [58][290/3239]	Time 0.165 (0.572)	Data Time 0.001 (0.119)	Loss 2.6846 (2.5826)	Entropy 1.11105 (1.11127)	Top-1 acc 58.984 (61.963)	Top-5 acc 80.078 (82.418)	lr 0.01345
Train [58][300/3239]	Time 0.214 (0.569)	Data Time 0.001 (0.115)	Loss 2.4380 (2.5812)	Entropy 1.11105 (1.11126)	Top-1 acc 64.453 (61.991)	Top-5 acc 87.109 (82.466)	lr 0.01345
Train [58][310/3239]	Time 0.215 (0.565)	Data Time 0.001 (0.111)	Loss 2.7917 (2.5835)	Entropy 1.11100 (1.11126)	Top-1 acc 56.250 (61.920)	Top-5 acc 78.516 (82.443)	lr 0.01345
Train [58][320/3239]	Time 0.221 (0.562)	Data Time 0.001 (0.108)	Loss 2.5338 (2.5838)	Entropy 1.11100 (1.11125)	Top-1 acc 65.625 (61.952)	Top-5 acc 83.984 (82.452)	lr 0.01345
Train [58][330/3239]	Time 0.224 (0.559)	Data Time 0.001 (0.105)	Loss 2.6089 (2.5844)	Entropy 1.11099 (1.11124)	Top-1 acc 63.672 (61.942)	Top-5 acc 81.641 (82.436)	lr 0.01345
Train [58][340/3239]	Time 54.674 (0.709)	Data Time 0.001 (0.102)	Loss 2.5104 (2.5843)	Entropy 1.11099 (1.11123)	Top-1 acc 64.453 (61.940)	Top-5 acc 85.156 (82.446)	lr 0.01345
Train [58][350/3239]	Time 0.278 (0.696)	Data Time 0.002 (0.099)	Loss 2.5096 (2.5833)	Entropy 1.11096 (1.11122)	Top-1 acc 64.453 (61.997)	Top-5 acc 84.766 (82.447)	lr 0.01345
Train [58][360/3239]	Time 0.238 (0.690)	Data Time 0.002 (0.096)	Loss 2.5928 (2.5823)	Entropy 1.11092 (1.11122)	Top-1 acc 58.984 (62.011)	Top-5 acc 82.812 (82.452)	lr 0.01344
Train [58][370/3239]	Time 0.230 (0.684)	Data Time 0.002 (0.094)	Loss 2.6476 (2.5817)	Entropy 1.11092 (1.11121)	Top-1 acc 61.719 (62.006)	Top-5 acc 81.641 (82.471)	lr 0.01344
Train [58][380/3239]	Time 0.225 (0.678)	Data Time 0.001 (0.091)	Loss 2.4847 (2.5820)	Entropy 1.11085 (1.11120)	Top-1 acc 64.844 (62.000)	Top-5 acc 85.156 (82.473)	lr 0.01344
Train [58][390/3239]	Time 0.327 (0.673)	Data Time 0.002 (0.089)	Loss 2.6100 (2.5824)	Entropy 1.11086 (1.11119)	Top-1 acc 62.500 (61.969)	Top-5 acc 81.641 (82.466)	lr 0.01344
Train [58][400/3239]	Time 0.217 (0.668)	Data Time 0.001 (0.087)	Loss 2.6467 (2.5812)	Entropy 1.11089 (1.11118)	Top-1 acc 59.375 (61.983)	Top-5 acc 80.469 (82.485)	lr 0.01344
Train [58][410/3239]	Time 0.232 (0.663)	Data Time 0.001 (0.085)	Loss 2.5411 (2.5810)	Entropy 1.11080 (1.11117)	Top-1 acc 64.453 (62.010)	Top-5 acc 82.812 (82.482)	lr 0.01344
Train [58][420/3239]	Time 0.221 (0.658)	Data Time 0.001 (0.083)	Loss 2.6713 (2.5811)	Entropy 1.11076 (1.11117)	Top-1 acc 58.203 (62.010)	Top-5 acc 79.688 (82.464)	lr 0.01344
Train [58][430/3239]	Time 0.215 (0.653)	Data Time 0.001 (0.081)	Loss 2.6585 (2.5802)	Entropy 1.11068 (1.11116)	Top-1 acc 60.156 (62.034)	Top-5 acc 80.078 (82.474)	lr 0.01344
Train [58][440/3239]	Time 0.319 (0.649)	Data Time 0.002 (0.079)	Loss 2.7033 (2.5808)	Entropy 1.11068 (1.11114)	Top-1 acc 54.688 (62.012)	Top-5 acc 79.688 (82.460)	lr 0.01344
Train [58][450/3239]	Time 2.572 (0.645)	Data Time 0.001 (0.077)	Loss 2.6084 (2.5821)	Entropy 1.11068 (1.11113)	Top-1 acc 60.547 (61.968)	Top-5 acc 82.812 (82.440)	lr 0.01344
Train [58][460/3239]	Time 0.241 (0.637)	Data Time 0.001 (0.076)	Loss 2.5355 (2.5829)	Entropy 1.11064 (1.11112)	Top-1 acc 64.844 (61.941)	Top-5 acc 81.641 (82.413)	lr 0.01343
Train [58][470/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.074)	Loss 2.9725 (2.5836)	Entropy 1.11062 (1.11111)	Top-1 acc 49.609 (61.936)	Top-5 acc 74.219 (82.404)	lr 0.01343
Train [58][480/3239]	Time 0.230 (0.630)	Data Time 0.002 (0.073)	Loss 2.5395 (2.5843)	Entropy 1.11058 (1.11110)	Top-1 acc 62.500 (61.927)	Top-5 acc 82.812 (82.388)	lr 0.01343
Train [58][490/3239]	Time 0.329 (0.627)	Data Time 0.002 (0.071)	Loss 2.7516 (2.5850)	Entropy 1.11059 (1.11109)	Top-1 acc 51.953 (61.879)	Top-5 acc 82.031 (82.390)	lr 0.01343
Train [58][500/3239]	Time 0.237 (0.623)	Data Time 0.001 (0.070)	Loss 2.3395 (2.5831)	Entropy 1.11056 (1.11108)	Top-1 acc 67.578 (61.911)	Top-5 acc 88.281 (82.423)	lr 0.01343
Train [58][510/3239]	Time 0.234 (0.620)	Data Time 0.002 (0.068)	Loss 2.5728 (2.5834)	Entropy 1.11055 (1.11107)	Top-1 acc 61.719 (61.905)	Top-5 acc 83.594 (82.410)	lr 0.01343
Train [58][520/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.067)	Loss 2.5595 (2.5836)	Entropy 1.11051 (1.11106)	Top-1 acc 64.062 (61.878)	Top-5 acc 83.984 (82.402)	lr 0.01343
Train [58][530/3239]	Time 0.217 (0.615)	Data Time 0.001 (0.066)	Loss 2.6359 (2.5835)	Entropy 1.11048 (1.11105)	Top-1 acc 58.594 (61.864)	Top-5 acc 80.469 (82.415)	lr 0.01343
Train [58][540/3239]	Time 0.253 (0.612)	Data Time 0.001 (0.065)	Loss 2.8420 (2.5836)	Entropy 1.11049 (1.11104)	Top-1 acc 55.078 (61.866)	Top-5 acc 78.906 (82.411)	lr 0.01343
Train [58][550/3239]	Time 0.254 (0.609)	Data Time 0.001 (0.064)	Loss 2.6162 (2.5841)	Entropy 1.11050 (1.11103)	Top-1 acc 59.766 (61.850)	Top-5 acc 82.422 (82.404)	lr 0.01343
Train [58][560/3239]	Time 2.508 (0.607)	Data Time 0.002 (0.062)	Loss 2.5898 (2.5837)	Entropy 1.11050 (1.11102)	Top-1 acc 63.281 (61.853)	Top-5 acc 79.688 (82.402)	lr 0.01342
Train [58][570/3239]	Time 0.235 (0.600)	Data Time 0.001 (0.061)	Loss 2.6155 (2.5833)	Entropy 1.11049 (1.11101)	Top-1 acc 62.109 (61.847)	Top-5 acc 81.250 (82.407)	lr 0.01342
Train [58][580/3239]	Time 0.238 (0.598)	Data Time 0.002 (0.060)	Loss 2.6329 (2.5835)	Entropy 1.11049 (1.11100)	Top-1 acc 58.203 (61.850)	Top-5 acc 80.859 (82.414)	lr 0.01342
Train [58][590/3239]	Time 0.221 (0.596)	Data Time 0.001 (0.059)	Loss 2.5267 (2.5838)	Entropy 1.11042 (1.11099)	Top-1 acc 65.625 (61.850)	Top-5 acc 84.375 (82.402)	lr 0.01342
Train [58][600/3239]	Time 0.218 (0.594)	Data Time 0.001 (0.058)	Loss 2.5979 (2.5843)	Entropy 1.11040 (1.11098)	Top-1 acc 60.547 (61.829)	Top-5 acc 81.250 (82.394)	lr 0.01342
Train [58][610/3239]	Time 0.215 (0.591)	Data Time 0.001 (0.057)	Loss 2.5263 (2.5843)	Entropy 1.11037 (1.11097)	Top-1 acc 63.281 (61.828)	Top-5 acc 83.203 (82.396)	lr 0.01342
Train [58][620/3239]	Time 0.242 (0.590)	Data Time 0.001 (0.057)	Loss 2.4743 (2.5838)	Entropy 1.11035 (1.11096)	Top-1 acc 62.891 (61.838)	Top-5 acc 83.984 (82.419)	lr 0.01342
Train [58][630/3239]	Time 0.316 (0.587)	Data Time 0.001 (0.056)	Loss 2.5460 (2.5844)	Entropy 1.11037 (1.11095)	Top-1 acc 62.109 (61.831)	Top-5 acc 82.422 (82.401)	lr 0.01342
Train [58][640/3239]	Time 0.192 (0.585)	Data Time 0.001 (0.055)	Loss 2.5498 (2.5848)	Entropy 1.11035 (1.11094)	Top-1 acc 64.844 (61.819)	Top-5 acc 82.812 (82.390)	lr 0.01342
Train [58][650/3239]	Time 0.283 (0.584)	Data Time 0.001 (0.054)	Loss 2.4818 (2.5850)	Entropy 1.11037 (1.11093)	Top-1 acc 64.453 (61.809)	Top-5 acc 84.766 (82.374)	lr 0.01342
Train [58][660/3239]	Time 0.254 (0.582)	Data Time 0.001 (0.053)	Loss 2.5836 (2.5852)	Entropy 1.11038 (1.11093)	Top-1 acc 60.547 (61.800)	Top-5 acc 82.812 (82.372)	lr 0.01341
Train [58][670/3239]	Time 2.485 (0.580)	Data Time 0.001 (0.052)	Loss 2.6957 (2.5852)	Entropy 1.11038 (1.11092)	Top-1 acc 58.203 (61.781)	Top-5 acc 78.516 (82.376)	lr 0.01341
Train [58][680/3239]	Time 0.238 (0.575)	Data Time 0.002 (0.052)	Loss 2.5014 (2.5847)	Entropy 1.11038 (1.11091)	Top-1 acc 64.453 (61.775)	Top-5 acc 85.938 (82.385)	lr 0.01341
Train [58][690/3239]	Time 0.258 (0.574)	Data Time 0.001 (0.051)	Loss 2.4656 (2.5857)	Entropy 1.11040 (1.11090)	Top-1 acc 64.453 (61.757)	Top-5 acc 87.891 (82.366)	lr 0.01341
Train [58][700/3239]	Time 0.222 (0.572)	Data Time 0.001 (0.050)	Loss 2.6058 (2.5851)	Entropy 1.11040 (1.11090)	Top-1 acc 61.719 (61.774)	Top-5 acc 83.203 (82.373)	lr 0.01341
Train [58][710/3239]	Time 0.259 (0.637)	Data Time 0.002 (0.050)	Loss 2.6523 (2.5849)	Entropy 1.11039 (1.11089)	Top-1 acc 57.031 (61.783)	Top-5 acc 82.422 (82.370)	lr 0.01341
Train [58][720/3239]	Time 0.302 (0.635)	Data Time 0.002 (0.049)	Loss 2.8015 (2.5855)	Entropy 1.11039 (1.11088)	Top-1 acc 57.422 (61.775)	Top-5 acc 80.078 (82.363)	lr 0.01341
Train [58][730/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.048)	Loss 2.5987 (2.5861)	Entropy 1.11040 (1.11087)	Top-1 acc 61.328 (61.758)	Top-5 acc 81.641 (82.338)	lr 0.01341
Train [58][740/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.048)	Loss 2.7003 (2.5870)	Entropy 1.11035 (1.11087)	Top-1 acc 57.422 (61.731)	Top-5 acc 81.641 (82.321)	lr 0.01341
Train [58][750/3239]	Time 0.243 (0.628)	Data Time 0.001 (0.047)	Loss 2.7173 (2.5871)	Entropy 1.11036 (1.11086)	Top-1 acc 58.594 (61.731)	Top-5 acc 79.688 (82.321)	lr 0.01341
Train [58][760/3239]	Time 0.231 (0.626)	Data Time 0.002 (0.047)	Loss 2.6081 (2.5870)	Entropy 1.11040 (1.11085)	Top-1 acc 62.891 (61.745)	Top-5 acc 83.594 (82.332)	lr 0.01340
Train [58][770/3239]	Time 0.207 (0.624)	Data Time 0.001 (0.046)	Loss 2.6391 (2.5869)	Entropy 1.11041 (1.11085)	Top-1 acc 62.109 (61.755)	Top-5 acc 82.031 (82.337)	lr 0.01340
Train [58][780/3239]	Time 2.556 (0.622)	Data Time 0.001 (0.045)	Loss 2.5097 (2.5869)	Entropy 1.11041 (1.11084)	Top-1 acc 60.938 (61.748)	Top-5 acc 84.766 (82.338)	lr 0.01340
Train [58][790/3239]	Time 0.215 (0.617)	Data Time 0.001 (0.045)	Loss 2.6472 (2.5869)	Entropy 1.11040 (1.11084)	Top-1 acc 61.719 (61.752)	Top-5 acc 83.984 (82.337)	lr 0.01340
Train [58][800/3239]	Time 0.220 (0.615)	Data Time 0.001 (0.044)	Loss 2.6835 (2.5870)	Entropy 1.11042 (1.11083)	Top-1 acc 60.547 (61.762)	Top-5 acc 79.297 (82.323)	lr 0.01340
Train [58][810/3239]	Time 0.252 (0.613)	Data Time 0.001 (0.044)	Loss 2.4223 (2.5873)	Entropy 1.11042 (1.11083)	Top-1 acc 65.234 (61.763)	Top-5 acc 86.328 (82.311)	lr 0.01340
Train [58][820/3239]	Time 0.218 (0.611)	Data Time 0.001 (0.043)	Loss 2.4595 (2.5869)	Entropy 1.11041 (1.11082)	Top-1 acc 62.500 (61.773)	Top-5 acc 83.984 (82.325)	lr 0.01340
Train [58][830/3239]	Time 0.205 (0.610)	Data Time 0.001 (0.043)	Loss 2.5656 (2.5869)	Entropy 1.11038 (1.11082)	Top-1 acc 67.188 (61.769)	Top-5 acc 81.641 (82.318)	lr 0.01340
Train [58][840/3239]	Time 0.218 (0.608)	Data Time 0.001 (0.042)	Loss 2.5754 (2.5870)	Entropy 1.11041 (1.11081)	Top-1 acc 63.281 (61.757)	Top-5 acc 81.641 (82.312)	lr 0.01340
Train [58][850/3239]	Time 0.226 (0.606)	Data Time 0.001 (0.042)	Loss 2.4390 (2.5874)	Entropy 1.11042 (1.11081)	Top-1 acc 63.672 (61.771)	Top-5 acc 86.328 (82.305)	lr 0.01340
Train [58][860/3239]	Time 0.211 (0.605)	Data Time 0.001 (0.041)	Loss 2.5373 (2.5876)	Entropy 1.11041 (1.11080)	Top-1 acc 63.672 (61.764)	Top-5 acc 83.984 (82.308)	lr 0.01339
Train [58][870/3239]	Time 0.215 (0.603)	Data Time 0.001 (0.041)	Loss 2.6418 (2.5880)	Entropy 1.11041 (1.11080)	Top-1 acc 58.594 (61.741)	Top-5 acc 83.203 (82.307)	lr 0.01339
Train [58][880/3239]	Time 0.195 (0.602)	Data Time 0.001 (0.040)	Loss 2.6661 (2.5885)	Entropy 1.11030 (1.11079)	Top-1 acc 56.641 (61.730)	Top-5 acc 79.297 (82.290)	lr 0.01339
Train [58][890/3239]	Time 2.436 (0.600)	Data Time 0.001 (0.040)	Loss 2.4991 (2.5883)	Entropy 1.11030 (1.11079)	Top-1 acc 63.281 (61.735)	Top-5 acc 85.547 (82.295)	lr 0.01339
Train [58][900/3239]	Time 0.222 (0.596)	Data Time 0.001 (0.040)	Loss 2.4886 (2.5879)	Entropy 1.11026 (1.11078)	Top-1 acc 66.406 (61.748)	Top-5 acc 81.641 (82.303)	lr 0.01339
Train [58][910/3239]	Time 0.217 (0.594)	Data Time 0.001 (0.039)	Loss 2.5537 (2.5885)	Entropy 1.11023 (1.11078)	Top-1 acc 62.891 (61.735)	Top-5 acc 83.203 (82.298)	lr 0.01339
Train [58][920/3239]	Time 0.244 (0.593)	Data Time 0.002 (0.039)	Loss 2.7233 (2.5886)	Entropy 1.11024 (1.11077)	Top-1 acc 59.375 (61.724)	Top-5 acc 80.859 (82.290)	lr 0.01339
Train [58][930/3239]	Time 0.230 (0.592)	Data Time 0.001 (0.038)	Loss 2.7418 (2.5885)	Entropy 1.11022 (1.11076)	Top-1 acc 58.984 (61.720)	Top-5 acc 80.078 (82.293)	lr 0.01339
Train [58][940/3239]	Time 0.219 (0.590)	Data Time 0.001 (0.038)	Loss 2.6417 (2.5888)	Entropy 1.11022 (1.11076)	Top-1 acc 62.500 (61.718)	Top-5 acc 81.250 (82.289)	lr 0.01339
Train [58][950/3239]	Time 0.233 (0.589)	Data Time 0.001 (0.038)	Loss 2.5050 (2.5893)	Entropy 1.11024 (1.11075)	Top-1 acc 62.891 (61.708)	Top-5 acc 83.203 (82.283)	lr 0.01339
Train [58][960/3239]	Time 0.215 (0.588)	Data Time 0.001 (0.037)	Loss 2.7054 (2.5893)	Entropy 1.11021 (1.11075)	Top-1 acc 58.984 (61.702)	Top-5 acc 78.906 (82.275)	lr 0.01338
Train [58][970/3239]	Time 0.201 (0.586)	Data Time 0.001 (0.037)	Loss 2.5052 (2.5895)	Entropy 1.11016 (1.11074)	Top-1 acc 64.844 (61.705)	Top-5 acc 84.766 (82.271)	lr 0.01338
Train [58][980/3239]	Time 0.215 (0.585)	Data Time 0.001 (0.036)	Loss 2.4987 (2.5898)	Entropy 1.11018 (1.11074)	Top-1 acc 63.672 (61.708)	Top-5 acc 83.984 (82.269)	lr 0.01338
Train [58][990/3239]	Time 0.250 (0.584)	Data Time 0.002 (0.036)	Loss 2.5860 (2.5898)	Entropy 1.11019 (1.11073)	Top-1 acc 62.109 (61.708)	Top-5 acc 84.375 (82.269)	lr 0.01338
Train [58][1000/3239]	Time 2.447 (0.582)	Data Time 0.001 (0.036)	Loss 2.5073 (2.5893)	Entropy 1.11019 (1.11073)	Top-1 acc 62.891 (61.713)	Top-5 acc 85.156 (82.279)	lr 0.01338
Train [58][1010/3239]	Time 0.201 (0.579)	Data Time 0.001 (0.035)	Loss 2.5550 (2.5894)	Entropy 1.11013 (1.11072)	Top-1 acc 62.109 (61.713)	Top-5 acc 80.469 (82.268)	lr 0.01338
Train [58][1020/3239]	Time 0.239 (0.578)	Data Time 0.002 (0.035)	Loss 2.8133 (2.5893)	Entropy 1.11009 (1.11071)	Top-1 acc 54.297 (61.703)	Top-5 acc 79.688 (82.262)	lr 0.01338
Train [58][1030/3239]	Time 0.279 (0.577)	Data Time 0.002 (0.035)	Loss 2.5523 (2.5892)	Entropy 1.11006 (1.11071)	Top-1 acc 65.625 (61.698)	Top-5 acc 83.984 (82.261)	lr 0.01338
Train [58][1040/3239]	Time 0.215 (0.576)	Data Time 0.001 (0.034)	Loss 2.5119 (2.5895)	Entropy 1.11006 (1.11070)	Top-1 acc 67.188 (61.697)	Top-5 acc 84.375 (82.257)	lr 0.01338
Train [58][1050/3239]	Time 0.361 (0.575)	Data Time 0.001 (0.034)	Loss 2.6643 (2.5895)	Entropy 1.11009 (1.11069)	Top-1 acc 58.984 (61.691)	Top-5 acc 80.078 (82.255)	lr 0.01337
Train [58][1060/3239]	Time 0.218 (0.574)	Data Time 0.001 (0.034)	Loss 2.7980 (2.5896)	Entropy 1.11002 (1.11069)	Top-1 acc 58.984 (61.693)	Top-5 acc 78.516 (82.259)	lr 0.01337
Train [58][1070/3239]	Time 0.216 (0.621)	Data Time 0.003 (0.034)	Loss 2.5445 (2.5895)	Entropy 1.10998 (1.11068)	Top-1 acc 60.156 (61.680)	Top-5 acc 83.203 (82.256)	lr 0.01337
Train [58][1080/3239]	Time 0.219 (0.619)	Data Time 0.002 (0.033)	Loss 2.6867 (2.5895)	Entropy 1.10997 (1.11068)	Top-1 acc 59.375 (61.685)	Top-5 acc 80.078 (82.252)	lr 0.01337
Train [58][1090/3239]	Time 0.230 (0.618)	Data Time 0.002 (0.033)	Loss 2.6869 (2.5897)	Entropy 1.10994 (1.11067)	Top-1 acc 57.422 (61.682)	Top-5 acc 82.031 (82.249)	lr 0.01337
Train [58][1100/3239]	Time 0.219 (0.617)	Data Time 0.001 (0.033)	Loss 2.6921 (2.5898)	Entropy 1.10990 (1.11066)	Top-1 acc 61.719 (61.677)	Top-5 acc 81.250 (82.247)	lr 0.01337
Train [58][1110/3239]	Time 2.547 (0.616)	Data Time 0.001 (0.032)	Loss 2.7790 (2.5897)	Entropy 1.10990 (1.11066)	Top-1 acc 57.812 (61.677)	Top-5 acc 77.734 (82.251)	lr 0.01337
Train [58][1120/3239]	Time 0.259 (0.612)	Data Time 0.002 (0.032)	Loss 2.5853 (2.5895)	Entropy 1.10992 (1.11065)	Top-1 acc 60.938 (61.676)	Top-5 acc 82.422 (82.255)	lr 0.01337
Train [58][1130/3239]	Time 0.244 (0.611)	Data Time 0.002 (0.032)	Loss 2.4195 (2.5894)	Entropy 1.10988 (1.11064)	Top-1 acc 65.625 (61.681)	Top-5 acc 83.203 (82.252)	lr 0.01337
Train [58][1140/3239]	Time 0.311 (0.610)	Data Time 0.001 (0.032)	Loss 2.6254 (2.5897)	Entropy 1.10983 (1.11064)	Top-1 acc 60.547 (61.675)	Top-5 acc 82.422 (82.249)	lr 0.01337
Train [58][1150/3239]	Time 0.235 (0.609)	Data Time 0.001 (0.031)	Loss 2.4065 (2.5894)	Entropy 1.10976 (1.11063)	Top-1 acc 67.188 (61.680)	Top-5 acc 85.938 (82.253)	lr 0.01336
Train [58][1160/3239]	Time 0.227 (0.607)	Data Time 0.001 (0.031)	Loss 2.5902 (2.5897)	Entropy 1.10977 (1.11062)	Top-1 acc 61.719 (61.675)	Top-5 acc 81.250 (82.249)	lr 0.01336
Train [58][1170/3239]	Time 0.229 (0.606)	Data Time 0.001 (0.031)	Loss 2.6083 (2.5901)	Entropy 1.10970 (1.11061)	Top-1 acc 59.766 (61.669)	Top-5 acc 82.422 (82.242)	lr 0.01336
Train [58][1180/3239]	Time 0.226 (0.605)	Data Time 0.001 (0.031)	Loss 2.6279 (2.5909)	Entropy 1.10968 (1.11061)	Top-1 acc 60.156 (61.651)	Top-5 acc 81.641 (82.231)	lr 0.01336
Train [58][1190/3239]	Time 0.338 (0.604)	Data Time 0.001 (0.030)	Loss 2.4794 (2.5908)	Entropy 1.10966 (1.11060)	Top-1 acc 62.891 (61.652)	Top-5 acc 83.203 (82.230)	lr 0.01336
Train [58][1200/3239]	Time 0.230 (0.602)	Data Time 0.002 (0.030)	Loss 2.5500 (2.5905)	Entropy 1.10965 (1.11059)	Top-1 acc 58.984 (61.652)	Top-5 acc 82.422 (82.236)	lr 0.01336
Train [58][1210/3239]	Time 0.215 (0.601)	Data Time 0.002 (0.030)	Loss 2.8331 (2.5908)	Entropy 1.10961 (1.11058)	Top-1 acc 57.031 (61.634)	Top-5 acc 75.391 (82.228)	lr 0.01336
Train [58][1220/3239]	Time 2.487 (0.600)	Data Time 0.001 (0.030)	Loss 2.5451 (2.5908)	Entropy 1.10961 (1.11057)	Top-1 acc 65.234 (61.637)	Top-5 acc 83.984 (82.225)	lr 0.01336
Train [58][1230/3239]	Time 0.218 (0.597)	Data Time 0.001 (0.029)	Loss 2.6571 (2.5905)	Entropy 1.10959 (1.11057)	Top-1 acc 64.453 (61.654)	Top-5 acc 80.859 (82.231)	lr 0.01336
Train [58][1240/3239]	Time 0.320 (0.596)	Data Time 0.001 (0.029)	Loss 2.7493 (2.5903)	Entropy 1.10959 (1.11056)	Top-1 acc 60.156 (61.659)	Top-5 acc 78.516 (82.236)	lr 0.01336
Train [58][1250/3239]	Time 0.216 (0.595)	Data Time 0.001 (0.029)	Loss 3.9712 (2.5913)	Entropy 1.10956 (1.11055)	Top-1 acc 38.672 (61.641)	Top-5 acc 60.547 (82.217)	lr 0.01335
Train [58][1260/3239]	Time 0.221 (0.594)	Data Time 0.002 (0.029)	Loss 2.6861 (2.5916)	Entropy 1.10955 (1.11054)	Top-1 acc 62.891 (61.638)	Top-5 acc 83.984 (82.215)	lr 0.01335
Train [58][1270/3239]	Time 0.226 (0.593)	Data Time 0.001 (0.029)	Loss 2.5694 (2.5913)	Entropy 1.10952 (1.11053)	Top-1 acc 62.500 (61.642)	Top-5 acc 83.203 (82.223)	lr 0.01335
Train [58][1280/3239]	Time 0.231 (0.592)	Data Time 0.001 (0.028)	Loss 2.4967 (2.5915)	Entropy 1.10947 (1.11053)	Top-1 acc 63.281 (61.636)	Top-5 acc 82.812 (82.219)	lr 0.01335
Train [58][1290/3239]	Time 0.221 (0.591)	Data Time 0.002 (0.028)	Loss 2.5989 (2.5914)	Entropy 1.10940 (1.11052)	Top-1 acc 62.500 (61.636)	Top-5 acc 79.688 (82.216)	lr 0.01335
Train [58][1300/3239]	Time 0.223 (0.590)	Data Time 0.001 (0.028)	Loss 2.5211 (2.5913)	Entropy 1.10938 (1.11051)	Top-1 acc 64.062 (61.639)	Top-5 acc 83.594 (82.223)	lr 0.01335
Train [58][1310/3239]	Time 0.231 (0.589)	Data Time 0.001 (0.028)	Loss 2.4566 (2.5908)	Entropy 1.10936 (1.11050)	Top-1 acc 66.016 (61.656)	Top-5 acc 86.719 (82.236)	lr 0.01335
Train [58][1320/3239]	Time 0.265 (0.588)	Data Time 0.001 (0.028)	Loss 2.5378 (2.5906)	Entropy 1.10935 (1.11049)	Top-1 acc 61.719 (61.664)	Top-5 acc 82.422 (82.241)	lr 0.01335
Train [58][1330/3239]	Time 2.546 (0.587)	Data Time 0.001 (0.027)	Loss 2.5382 (2.5903)	Entropy 1.10935 (1.11048)	Top-1 acc 62.891 (61.670)	Top-5 acc 83.203 (82.239)	lr 0.01335
Train [58][1340/3239]	Time 0.267 (0.585)	Data Time 0.001 (0.027)	Loss 2.6482 (2.5909)	Entropy 1.10936 (1.11047)	Top-1 acc 59.766 (61.656)	Top-5 acc 80.078 (82.230)	lr 0.01335
Train [58][1350/3239]	Time 0.219 (0.584)	Data Time 0.001 (0.027)	Loss 2.8295 (2.5910)	Entropy 1.10928 (1.11047)	Top-1 acc 56.641 (61.655)	Top-5 acc 78.125 (82.228)	lr 0.01334
Train [58][1360/3239]	Time 0.223 (0.583)	Data Time 0.001 (0.027)	Loss 2.5859 (2.5909)	Entropy 1.10928 (1.11046)	Top-1 acc 62.109 (61.655)	Top-5 acc 81.250 (82.229)	lr 0.01334
Train [58][1370/3239]	Time 0.227 (0.582)	Data Time 0.001 (0.027)	Loss 2.7881 (2.5907)	Entropy 1.10927 (1.11045)	Top-1 acc 58.203 (61.668)	Top-5 acc 75.781 (82.230)	lr 0.01334
Train [58][1380/3239]	Time 0.322 (0.581)	Data Time 0.001 (0.026)	Loss 2.4290 (2.5906)	Entropy 1.10926 (1.11044)	Top-1 acc 64.062 (61.676)	Top-5 acc 84.375 (82.233)	lr 0.01334
Train [58][1390/3239]	Time 0.215 (0.580)	Data Time 0.001 (0.026)	Loss 2.5860 (2.5905)	Entropy 1.10924 (1.11043)	Top-1 acc 61.328 (61.680)	Top-5 acc 80.859 (82.228)	lr 0.01334
Train [58][1400/3239]	Time 0.211 (0.579)	Data Time 0.001 (0.026)	Loss 2.5753 (2.5903)	Entropy 1.10923 (1.11042)	Top-1 acc 60.547 (61.687)	Top-5 acc 82.812 (82.234)	lr 0.01334
Train [58][1410/3239]	Time 0.228 (0.579)	Data Time 0.001 (0.026)	Loss 2.4434 (2.5905)	Entropy 1.10920 (1.11041)	Top-1 acc 64.062 (61.688)	Top-5 acc 87.109 (82.231)	lr 0.01334
Train [58][1420/3239]	Time 0.237 (0.578)	Data Time 0.002 (0.026)	Loss 2.6096 (2.5906)	Entropy 1.10919 (1.11041)	Top-1 acc 65.234 (61.680)	Top-5 acc 82.422 (82.233)	lr 0.01334
Train [58][1430/3239]	Time 0.361 (0.610)	Data Time 0.004 (0.026)	Loss 2.5059 (2.5903)	Entropy 1.10921 (1.11040)	Top-1 acc 65.625 (61.687)	Top-5 acc 82.422 (82.240)	lr 0.01334
Train [58][1440/3239]	Time 2.401 (0.609)	Data Time 0.002 (0.025)	Loss 2.4308 (2.5899)	Entropy 1.10921 (1.11039)	Top-1 acc 62.891 (61.691)	Top-5 acc 84.375 (82.251)	lr 0.01334
Train [58][1450/3239]	Time 0.258 (0.607)	Data Time 0.002 (0.025)	Loss 2.6275 (2.5897)	Entropy 1.10918 (1.11038)	Top-1 acc 63.281 (61.693)	Top-5 acc 80.078 (82.252)	lr 0.01333
Train [58][1460/3239]	Time 0.219 (0.606)	Data Time 0.001 (0.025)	Loss 2.6870 (2.5895)	Entropy 1.10918 (1.11037)	Top-1 acc 59.375 (61.697)	Top-5 acc 80.859 (82.258)	lr 0.01333
Train [58][1470/3239]	Time 0.319 (0.605)	Data Time 0.001 (0.025)	Loss 2.6369 (2.5895)	Entropy 1.10919 (1.11036)	Top-1 acc 60.938 (61.690)	Top-5 acc 79.688 (82.258)	lr 0.01333
Train [58][1480/3239]	Time 0.225 (0.604)	Data Time 0.001 (0.025)	Loss 2.7418 (2.5893)	Entropy 1.10918 (1.11036)	Top-1 acc 58.984 (61.691)	Top-5 acc 80.078 (82.260)	lr 0.01333
Train [58][1490/3239]	Time 0.235 (0.603)	Data Time 0.001 (0.025)	Loss 2.6265 (2.5889)	Entropy 1.10915 (1.11035)	Top-1 acc 61.328 (61.701)	Top-5 acc 80.859 (82.270)	lr 0.01333
Train [58][1500/3239]	Time 0.202 (0.602)	Data Time 0.001 (0.024)	Loss 2.8091 (2.5889)	Entropy 1.10919 (1.11034)	Top-1 acc 57.031 (61.705)	Top-5 acc 77.734 (82.270)	lr 0.01333
Train [58][1510/3239]	Time 0.228 (0.601)	Data Time 0.001 (0.024)	Loss 2.5226 (2.5888)	Entropy 1.10916 (1.11033)	Top-1 acc 61.328 (61.703)	Top-5 acc 85.547 (82.275)	lr 0.01333
Train [58][1520/3239]	Time 0.246 (0.600)	Data Time 0.001 (0.024)	Loss 2.4941 (2.5884)	Entropy 1.10918 (1.11033)	Top-1 acc 61.719 (61.712)	Top-5 acc 85.938 (82.279)	lr 0.01333
Train [58][1530/3239]	Time 0.223 (0.599)	Data Time 0.001 (0.024)	Loss 2.5315 (2.5880)	Entropy 1.10914 (1.11032)	Top-1 acc 61.719 (61.720)	Top-5 acc 83.203 (82.286)	lr 0.01333
Train [58][1540/3239]	Time 0.302 (0.598)	Data Time 0.001 (0.024)	Loss 2.4892 (2.5881)	Entropy 1.10904 (1.11031)	Top-1 acc 64.062 (61.720)	Top-5 acc 84.766 (82.285)	lr 0.01333
Train [58][1550/3239]	Time 2.493 (0.597)	Data Time 0.001 (0.024)	Loss 2.6553 (2.5881)	Entropy 1.10904 (1.11030)	Top-1 acc 61.719 (61.715)	Top-5 acc 79.297 (82.286)	lr 0.01332
Train [58][1560/3239]	Time 0.226 (0.595)	Data Time 0.002 (0.024)	Loss 2.4895 (2.5882)	Entropy 1.10902 (1.11029)	Top-1 acc 66.016 (61.712)	Top-5 acc 85.938 (82.284)	lr 0.01332
Train [58][1570/3239]	Time 0.265 (0.594)	Data Time 0.002 (0.023)	Loss 2.6952 (2.5885)	Entropy 1.10904 (1.11029)	Top-1 acc 58.203 (61.706)	Top-5 acc 80.469 (82.277)	lr 0.01332
Train [58][1580/3239]	Time 0.236 (0.594)	Data Time 0.001 (0.023)	Loss 2.5359 (2.5884)	Entropy 1.10898 (1.11028)	Top-1 acc 62.891 (61.708)	Top-5 acc 86.328 (82.285)	lr 0.01332
Train [58][1590/3239]	Time 0.212 (0.593)	Data Time 0.001 (0.023)	Loss 2.6554 (2.5887)	Entropy 1.10887 (1.11027)	Top-1 acc 58.594 (61.694)	Top-5 acc 83.203 (82.279)	lr 0.01332
Train [58][1600/3239]	Time 0.245 (0.592)	Data Time 0.002 (0.023)	Loss 2.5643 (2.5886)	Entropy 1.10885 (1.11026)	Top-1 acc 63.281 (61.705)	Top-5 acc 83.203 (82.276)	lr 0.01332
Train [58][1610/3239]	Time 0.319 (0.591)	Data Time 0.001 (0.023)	Loss 2.6557 (2.5886)	Entropy 1.10886 (1.11025)	Top-1 acc 58.984 (61.703)	Top-5 acc 80.859 (82.281)	lr 0.01332
Train [58][1620/3239]	Time 0.235 (0.590)	Data Time 0.001 (0.023)	Loss 2.6889 (2.5883)	Entropy 1.10888 (1.11024)	Top-1 acc 57.812 (61.709)	Top-5 acc 79.688 (82.290)	lr 0.01332
Train [58][1630/3239]	Time 0.230 (0.590)	Data Time 0.001 (0.023)	Loss 2.6075 (2.5887)	Entropy 1.10890 (1.11023)	Top-1 acc 61.328 (61.702)	Top-5 acc 80.859 (82.283)	lr 0.01332
Train [58][1640/3239]	Time 0.201 (0.589)	Data Time 0.001 (0.022)	Loss 2.5592 (2.5889)	Entropy 1.10886 (1.11023)	Top-1 acc 58.594 (61.697)	Top-5 acc 86.328 (82.279)	lr 0.01332
Train [58][1650/3239]	Time 0.226 (0.588)	Data Time 0.001 (0.022)	Loss 2.5196 (2.5891)	Entropy 1.10885 (1.11022)	Top-1 acc 65.234 (61.696)	Top-5 acc 82.812 (82.272)	lr 0.01331
Train [58][1660/3239]	Time 2.668 (0.587)	Data Time 0.002 (0.022)	Loss 2.4855 (2.5890)	Entropy 1.10885 (1.11021)	Top-1 acc 65.625 (61.700)	Top-5 acc 82.812 (82.271)	lr 0.01331
Train [58][1670/3239]	Time 0.220 (0.585)	Data Time 0.001 (0.022)	Loss 2.6053 (2.5889)	Entropy 1.10884 (1.11020)	Top-1 acc 63.281 (61.712)	Top-5 acc 83.203 (82.270)	lr 0.01331
Train [58][1680/3239]	Time 0.230 (0.585)	Data Time 0.001 (0.022)	Loss 2.5334 (2.5888)	Entropy 1.10881 (1.11019)	Top-1 acc 64.453 (61.714)	Top-5 acc 82.422 (82.267)	lr 0.01331
Train [58][1690/3239]	Time 0.216 (0.584)	Data Time 0.001 (0.022)	Loss 2.6079 (2.5889)	Entropy 1.10878 (1.11018)	Top-1 acc 59.375 (61.718)	Top-5 acc 82.422 (82.262)	lr 0.01331
Train [58][1700/3239]	Time 0.271 (0.583)	Data Time 0.002 (0.022)	Loss 2.4617 (2.5887)	Entropy 1.10878 (1.11018)	Top-1 acc 63.281 (61.725)	Top-5 acc 85.547 (82.263)	lr 0.01331
Train [58][1710/3239]	Time 0.221 (0.583)	Data Time 0.001 (0.022)	Loss 2.5667 (2.5887)	Entropy 1.10874 (1.11017)	Top-1 acc 60.547 (61.731)	Top-5 acc 81.641 (82.264)	lr 0.01331
Train [58][1720/3239]	Time 0.272 (0.582)	Data Time 0.001 (0.021)	Loss 2.5375 (2.5885)	Entropy 1.10867 (1.11016)	Top-1 acc 63.281 (61.735)	Top-5 acc 83.594 (82.265)	lr 0.01331
Train [58][1730/3239]	Time 0.232 (0.581)	Data Time 0.001 (0.021)	Loss 2.5671 (2.5884)	Entropy 1.10860 (1.11015)	Top-1 acc 59.375 (61.742)	Top-5 acc 82.812 (82.265)	lr 0.01331
Train [58][1740/3239]	Time 0.235 (0.580)	Data Time 0.001 (0.021)	Loss 2.9106 (2.5885)	Entropy 1.10861 (1.11014)	Top-1 acc 53.906 (61.736)	Top-5 acc 75.781 (82.265)	lr 0.01331
Train [58][1750/3239]	Time 0.318 (0.580)	Data Time 0.001 (0.021)	Loss 2.6461 (2.5884)	Entropy 1.10853 (1.11013)	Top-1 acc 60.156 (61.741)	Top-5 acc 82.422 (82.266)	lr 0.01330
Train [58][1760/3239]	Time 0.220 (0.579)	Data Time 0.001 (0.021)	Loss 2.7529 (2.5886)	Entropy 1.10855 (1.11012)	Top-1 acc 55.078 (61.740)	Top-5 acc 77.344 (82.261)	lr 0.01330
Train [58][1770/3239]	Time 2.333 (0.578)	Data Time 0.002 (0.021)	Loss 2.5794 (2.5884)	Entropy 1.10855 (1.11012)	Top-1 acc 63.281 (61.747)	Top-5 acc 83.203 (82.263)	lr 0.01330
Train [58][1780/3239]	Time 0.232 (0.577)	Data Time 0.001 (0.021)	Loss 2.5625 (2.5886)	Entropy 1.10849 (1.11011)	Top-1 acc 61.328 (61.742)	Top-5 acc 82.812 (82.260)	lr 0.01330
Train [58][1790/3239]	Time 0.211 (0.576)	Data Time 0.001 (0.021)	Loss 2.3514 (2.5886)	Entropy 1.10850 (1.11010)	Top-1 acc 68.359 (61.739)	Top-5 acc 87.891 (82.260)	lr 0.01330
Train [58][1800/3239]	Time 0.229 (0.603)	Data Time 0.002 (0.021)	Loss 2.6354 (2.5886)	Entropy 1.10850 (1.11009)	Top-1 acc 60.547 (61.739)	Top-5 acc 81.641 (82.258)	lr 0.01330
Train [58][1810/3239]	Time 0.234 (0.603)	Data Time 0.002 (0.021)	Loss 2.5703 (2.5883)	Entropy 1.10854 (1.11008)	Top-1 acc 63.672 (61.745)	Top-5 acc 81.250 (82.261)	lr 0.01330
Train [58][1820/3239]	Time 0.231 (0.602)	Data Time 0.001 (0.020)	Loss 2.7023 (2.5883)	Entropy 1.10854 (1.11007)	Top-1 acc 58.594 (61.751)	Top-5 acc 79.688 (82.263)	lr 0.01330
Train [58][1830/3239]	Time 0.237 (0.601)	Data Time 0.001 (0.020)	Loss 2.5954 (2.5884)	Entropy 1.10855 (1.11006)	Top-1 acc 61.328 (61.746)	Top-5 acc 82.422 (82.263)	lr 0.01330
Train [58][1840/3239]	Time 0.265 (0.601)	Data Time 0.001 (0.020)	Loss 2.6725 (2.5885)	Entropy 1.10854 (1.11005)	Top-1 acc 58.203 (61.739)	Top-5 acc 80.078 (82.261)	lr 0.01330
Train [58][1850/3239]	Time 0.210 (0.600)	Data Time 0.001 (0.020)	Loss 2.6566 (2.5884)	Entropy 1.10852 (1.11005)	Top-1 acc 60.547 (61.745)	Top-5 acc 81.250 (82.264)	lr 0.01329
Train [58][1860/3239]	Time 0.235 (0.599)	Data Time 0.001 (0.020)	Loss 2.4349 (2.5883)	Entropy 1.10843 (1.11004)	Top-1 acc 66.016 (61.744)	Top-5 acc 86.328 (82.265)	lr 0.01329
Train [58][1870/3239]	Time 0.254 (0.598)	Data Time 0.001 (0.020)	Loss 2.4441 (2.5885)	Entropy 1.10843 (1.11003)	Top-1 acc 67.188 (61.742)	Top-5 acc 85.938 (82.263)	lr 0.01329
Train [58][1880/3239]	Time 2.536 (0.598)	Data Time 0.001 (0.020)	Loss 2.5689 (2.5887)	Entropy 1.10843 (1.11002)	Top-1 acc 60.547 (61.735)	Top-5 acc 81.250 (82.262)	lr 0.01329
Train [58][1890/3239]	Time 0.326 (0.596)	Data Time 0.001 (0.020)	Loss 2.7097 (2.5887)	Entropy 1.10840 (1.11001)	Top-1 acc 60.938 (61.730)	Top-5 acc 79.688 (82.262)	lr 0.01329
Train [58][1900/3239]	Time 0.215 (0.595)	Data Time 0.001 (0.020)	Loss 2.6630 (2.5889)	Entropy 1.10839 (1.11000)	Top-1 acc 62.500 (61.730)	Top-5 acc 80.859 (82.261)	lr 0.01329
Train [58][1910/3239]	Time 0.233 (0.594)	Data Time 0.001 (0.020)	Loss 2.7037 (2.5890)	Entropy 1.10835 (1.11000)	Top-1 acc 57.812 (61.728)	Top-5 acc 80.859 (82.259)	lr 0.01329
Train [58][1920/3239]	Time 0.236 (0.594)	Data Time 0.001 (0.019)	Loss 2.4850 (2.5890)	Entropy 1.10835 (1.10999)	Top-1 acc 62.891 (61.720)	Top-5 acc 84.766 (82.262)	lr 0.01329
Train [58][1930/3239]	Time 0.234 (0.593)	Data Time 0.001 (0.019)	Loss 2.5723 (2.5890)	Entropy 1.10833 (1.10998)	Top-1 acc 62.109 (61.723)	Top-5 acc 81.250 (82.264)	lr 0.01329
Train [58][1940/3239]	Time 0.217 (0.593)	Data Time 0.001 (0.019)	Loss 2.4509 (2.5886)	Entropy 1.10826 (1.10997)	Top-1 acc 63.672 (61.731)	Top-5 acc 86.328 (82.273)	lr 0.01329
Train [58][1950/3239]	Time 0.208 (0.592)	Data Time 0.001 (0.019)	Loss 2.5884 (2.5887)	Entropy 1.10825 (1.10996)	Top-1 acc 58.594 (61.724)	Top-5 acc 82.812 (82.275)	lr 0.01328
Train [58][1960/3239]	Time 0.223 (0.591)	Data Time 0.001 (0.019)	Loss 2.5812 (2.5885)	Entropy 1.10826 (1.10995)	Top-1 acc 60.156 (61.721)	Top-5 acc 82.812 (82.278)	lr 0.01328
Train [58][1970/3239]	Time 0.233 (0.591)	Data Time 0.001 (0.019)	Loss 2.4952 (2.5884)	Entropy 1.10825 (1.10994)	Top-1 acc 62.109 (61.721)	Top-5 acc 84.766 (82.279)	lr 0.01328
Train [58][1980/3239]	Time 0.223 (0.590)	Data Time 0.001 (0.019)	Loss 2.7725 (2.5886)	Entropy 1.10824 (1.10994)	Top-1 acc 59.375 (61.718)	Top-5 acc 79.688 (82.276)	lr 0.01328
Train [58][1990/3239]	Time 2.545 (0.589)	Data Time 0.001 (0.019)	Loss 2.6369 (2.5885)	Entropy 1.10824 (1.10993)	Top-1 acc 59.375 (61.713)	Top-5 acc 81.250 (82.277)	lr 0.01328
Train [58][2000/3239]	Time 0.237 (0.588)	Data Time 0.001 (0.019)	Loss 2.7149 (2.5889)	Entropy 1.10819 (1.10992)	Top-1 acc 59.766 (61.706)	Top-5 acc 79.688 (82.268)	lr 0.01328
Train [58][2010/3239]	Time 0.211 (0.587)	Data Time 0.001 (0.019)	Loss 2.5291 (2.5886)	Entropy 1.10816 (1.10991)	Top-1 acc 61.719 (61.712)	Top-5 acc 85.547 (82.274)	lr 0.01328
Train [58][2020/3239]	Time 0.283 (0.586)	Data Time 0.001 (0.019)	Loss 2.4634 (2.5888)	Entropy 1.10816 (1.10990)	Top-1 acc 66.406 (61.707)	Top-5 acc 84.766 (82.265)	lr 0.01328
Train [58][2030/3239]	Time 0.320 (0.586)	Data Time 0.001 (0.018)	Loss 2.7486 (2.5886)	Entropy 1.10812 (1.10989)	Top-1 acc 58.594 (61.707)	Top-5 acc 78.906 (82.268)	lr 0.01328
Train [58][2040/3239]	Time 0.226 (0.585)	Data Time 0.001 (0.018)	Loss 2.6770 (2.5889)	Entropy 1.10809 (1.10988)	Top-1 acc 60.156 (61.696)	Top-5 acc 79.688 (82.264)	lr 0.01328
Train [58][2050/3239]	Time 0.226 (0.585)	Data Time 0.001 (0.018)	Loss 2.6477 (2.5889)	Entropy 1.10811 (1.10987)	Top-1 acc 59.375 (61.695)	Top-5 acc 81.250 (82.265)	lr 0.01327
Train [58][2060/3239]	Time 0.230 (0.584)	Data Time 0.001 (0.018)	Loss 2.6259 (2.5890)	Entropy 1.10811 (1.10987)	Top-1 acc 58.203 (61.693)	Top-5 acc 82.031 (82.264)	lr 0.01327
Train [58][2070/3239]	Time 0.224 (0.583)	Data Time 0.001 (0.018)	Loss 2.6342 (2.5890)	Entropy 1.10808 (1.10986)	Top-1 acc 61.328 (61.689)	Top-5 acc 80.078 (82.267)	lr 0.01327
Train [58][2080/3239]	Time 0.225 (0.583)	Data Time 0.001 (0.018)	Loss 2.6347 (2.5891)	Entropy 1.10806 (1.10985)	Top-1 acc 60.938 (61.686)	Top-5 acc 79.688 (82.265)	lr 0.01327
Train [58][2090/3239]	Time 0.260 (0.582)	Data Time 0.001 (0.018)	Loss 2.7950 (2.5890)	Entropy 1.10802 (1.10984)	Top-1 acc 55.078 (61.688)	Top-5 acc 80.469 (82.266)	lr 0.01327
Train [58][2100/3239]	Time 2.593 (0.582)	Data Time 0.002 (0.018)	Loss 2.6736 (2.5896)	Entropy 1.10802 (1.10983)	Top-1 acc 61.719 (61.678)	Top-5 acc 78.516 (82.257)	lr 0.01327
Train [58][2110/3239]	Time 0.230 (0.580)	Data Time 0.001 (0.018)	Loss 2.5506 (2.5896)	Entropy 1.10799 (1.10982)	Top-1 acc 60.547 (61.680)	Top-5 acc 83.594 (82.257)	lr 0.01327
Train [58][2120/3239]	Time 0.239 (0.580)	Data Time 0.001 (0.018)	Loss 2.8139 (2.5897)	Entropy 1.10797 (1.10981)	Top-1 acc 56.641 (61.681)	Top-5 acc 78.906 (82.251)	lr 0.01327
Train [58][2130/3239]	Time 0.245 (0.579)	Data Time 0.001 (0.018)	Loss 2.5317 (2.5895)	Entropy 1.10792 (1.10981)	Top-1 acc 62.109 (61.688)	Top-5 acc 83.984 (82.253)	lr 0.01327
Train [58][2140/3239]	Time 0.229 (0.579)	Data Time 0.001 (0.018)	Loss 2.6877 (2.5896)	Entropy 1.10791 (1.10980)	Top-1 acc 59.375 (61.682)	Top-5 acc 80.469 (82.253)	lr 0.01327
Train [58][2150/3239]	Time 0.235 (0.578)	Data Time 0.001 (0.018)	Loss 2.4212 (2.5895)	Entropy 1.10789 (1.10979)	Top-1 acc 64.844 (61.680)	Top-5 acc 85.156 (82.255)	lr 0.01326
Train [58][2160/3239]	Time 0.225 (0.601)	Data Time 0.002 (0.017)	Loss 2.5120 (2.5898)	Entropy 1.10778 (1.10978)	Top-1 acc 62.891 (61.671)	Top-5 acc 82.812 (82.249)	lr 0.01326
Train [58][2170/3239]	Time 0.329 (0.600)	Data Time 0.002 (0.017)	Loss 2.4462 (2.5898)	Entropy 1.10776 (1.10977)	Top-1 acc 64.844 (61.673)	Top-5 acc 84.766 (82.251)	lr 0.01326
Train [58][2180/3239]	Time 0.235 (0.600)	Data Time 0.001 (0.017)	Loss 2.6432 (2.5901)	Entropy 1.10774 (1.10976)	Top-1 acc 60.547 (61.663)	Top-5 acc 81.250 (82.247)	lr 0.01326
Train [58][2190/3239]	Time 0.130 (0.599)	Data Time 0.001 (0.017)	Loss 2.8706 (2.5904)	Entropy 1.10774 (1.10975)	Top-1 acc 55.078 (61.659)	Top-5 acc 78.125 (82.246)	lr 0.01326
Train [58][2200/3239]	Time 0.244 (0.598)	Data Time 0.001 (0.017)	Loss 2.7699 (2.5907)	Entropy 1.10773 (1.10974)	Top-1 acc 54.297 (61.649)	Top-5 acc 78.516 (82.240)	lr 0.01326
Train [58][2210/3239]	Time 2.632 (0.598)	Data Time 0.002 (0.017)	Loss 2.4856 (2.5906)	Entropy 1.10773 (1.10973)	Top-1 acc 64.844 (61.653)	Top-5 acc 84.375 (82.243)	lr 0.01326
Train [58][2220/3239]	Time 0.331 (0.596)	Data Time 0.001 (0.017)	Loss 2.5519 (2.5906)	Entropy 1.10766 (1.10972)	Top-1 acc 58.984 (61.651)	Top-5 acc 83.203 (82.242)	lr 0.01326
Train [58][2230/3239]	Time 0.239 (0.596)	Data Time 0.001 (0.017)	Loss 2.4058 (2.5903)	Entropy 1.10771 (1.10971)	Top-1 acc 63.672 (61.656)	Top-5 acc 86.719 (82.247)	lr 0.01326
Train [58][2240/3239]	Time 0.232 (0.595)	Data Time 0.001 (0.017)	Loss 2.3590 (2.5902)	Entropy 1.10771 (1.10971)	Top-1 acc 66.797 (61.658)	Top-5 acc 87.109 (82.249)	lr 0.01326
Train [58][2250/3239]	Time 0.232 (0.595)	Data Time 0.001 (0.017)	Loss 2.8255 (2.5906)	Entropy 1.10807 (1.10970)	Top-1 acc 58.594 (61.649)	Top-5 acc 76.953 (82.240)	lr 0.01325
Train [58][2260/3239]	Time 0.233 (0.594)	Data Time 0.001 (0.017)	Loss 2.6927 (2.5908)	Entropy 1.10805 (1.10969)	Top-1 acc 60.938 (61.642)	Top-5 acc 80.469 (82.235)	lr 0.01325
Train [58][2270/3239]	Time 0.264 (0.594)	Data Time 0.001 (0.017)	Loss 2.5378 (2.5906)	Entropy 1.10806 (1.10968)	Top-1 acc 61.328 (61.647)	Top-5 acc 85.156 (82.240)	lr 0.01325
Train [58][2280/3239]	Time 0.211 (0.593)	Data Time 0.001 (0.017)	Loss 2.7840 (2.5908)	Entropy 1.10806 (1.10968)	Top-1 acc 57.812 (61.642)	Top-5 acc 80.078 (82.237)	lr 0.01325
Train [58][2290/3239]	Time 0.214 (0.592)	Data Time 0.001 (0.017)	Loss 2.7830 (2.5910)	Entropy 1.10797 (1.10967)	Top-1 acc 55.469 (61.638)	Top-5 acc 79.688 (82.234)	lr 0.01325
Train [58][2300/3239]	Time 0.223 (0.592)	Data Time 0.001 (0.017)	Loss 2.6836 (2.5910)	Entropy 1.10794 (1.10966)	Top-1 acc 59.375 (61.639)	Top-5 acc 79.688 (82.233)	lr 0.01325
Train [58][2310/3239]	Time 0.316 (0.591)	Data Time 0.001 (0.016)	Loss 2.5989 (2.5910)	Entropy 1.10793 (1.10965)	Top-1 acc 63.281 (61.642)	Top-5 acc 81.641 (82.232)	lr 0.01325
Train [58][2320/3239]	Time 2.435 (0.591)	Data Time 0.001 (0.016)	Loss 2.6597 (2.5910)	Entropy 1.10793 (1.10965)	Top-1 acc 54.297 (61.641)	Top-5 acc 83.203 (82.235)	lr 0.01325
Train [58][2330/3239]	Time 0.212 (0.589)	Data Time 0.001 (0.016)	Loss 2.5041 (2.5912)	Entropy 1.10790 (1.10964)	Top-1 acc 62.500 (61.635)	Top-5 acc 85.156 (82.232)	lr 0.01325
Train [58][2340/3239]	Time 0.213 (0.589)	Data Time 0.001 (0.016)	Loss 2.5952 (2.5911)	Entropy 1.10791 (1.10963)	Top-1 acc 63.281 (61.637)	Top-5 acc 82.031 (82.232)	lr 0.01324
Train [58][2350/3239]	Time 0.238 (0.588)	Data Time 0.001 (0.016)	Loss 2.6471 (2.5911)	Entropy 1.10789 (1.10962)	Top-1 acc 64.062 (61.641)	Top-5 acc 79.297 (82.232)	lr 0.01324
Train [58][2360/3239]	Time 0.330 (0.588)	Data Time 0.001 (0.016)	Loss 2.5646 (2.5911)	Entropy 1.10789 (1.10962)	Top-1 acc 64.062 (61.641)	Top-5 acc 83.203 (82.231)	lr 0.01324
Train [58][2370/3239]	Time 0.226 (0.587)	Data Time 0.001 (0.016)	Loss 2.5071 (2.5912)	Entropy 1.10779 (1.10961)	Top-1 acc 62.109 (61.639)	Top-5 acc 85.156 (82.228)	lr 0.01324
Train [58][2380/3239]	Time 0.210 (0.587)	Data Time 0.001 (0.016)	Loss 2.7024 (2.5914)	Entropy 1.10778 (1.10960)	Top-1 acc 60.547 (61.636)	Top-5 acc 80.078 (82.224)	lr 0.01324
Train [58][2390/3239]	Time 0.223 (0.586)	Data Time 0.001 (0.016)	Loss 2.7425 (2.5914)	Entropy 1.10774 (1.10959)	Top-1 acc 59.766 (61.637)	Top-5 acc 78.516 (82.223)	lr 0.01324
Train [58][2400/3239]	Time 0.222 (0.586)	Data Time 0.001 (0.016)	Loss 2.5451 (2.5915)	Entropy 1.10774 (1.10959)	Top-1 acc 62.891 (61.633)	Top-5 acc 82.422 (82.220)	lr 0.01324
Train [58][2410/3239]	Time 0.218 (0.585)	Data Time 0.001 (0.016)	Loss 2.5455 (2.5914)	Entropy 1.10767 (1.10958)	Top-1 acc 63.281 (61.634)	Top-5 acc 83.984 (82.222)	lr 0.01324
Train [58][2420/3239]	Time 0.216 (0.585)	Data Time 0.001 (0.016)	Loss 2.6515 (2.5915)	Entropy 1.10767 (1.10957)	Top-1 acc 61.719 (61.633)	Top-5 acc 81.250 (82.219)	lr 0.01324
Train [58][2430/3239]	Time 2.666 (0.584)	Data Time 0.001 (0.016)	Loss 2.5603 (2.5917)	Entropy 1.10767 (1.10956)	Top-1 acc 62.891 (61.630)	Top-5 acc 83.203 (82.213)	lr 0.01324
Train [58][2440/3239]	Time 0.226 (0.583)	Data Time 0.001 (0.016)	Loss 2.5865 (2.5917)	Entropy 1.10768 (1.10956)	Top-1 acc 59.375 (61.632)	Top-5 acc 83.984 (82.214)	lr 0.01323
Train [58][2450/3239]	Time 0.341 (0.582)	Data Time 0.001 (0.016)	Loss 2.5035 (2.5917)	Entropy 1.10767 (1.10955)	Top-1 acc 62.891 (61.632)	Top-5 acc 82.422 (82.212)	lr 0.01323
Train [58][2460/3239]	Time 0.230 (0.582)	Data Time 0.001 (0.016)	Loss 2.7022 (2.5919)	Entropy 1.10760 (1.10954)	Top-1 acc 58.594 (61.623)	Top-5 acc 78.516 (82.211)	lr 0.01323
Train [58][2470/3239]	Time 0.228 (0.581)	Data Time 0.001 (0.015)	Loss 2.6485 (2.5923)	Entropy 1.10752 (1.10953)	Top-1 acc 57.422 (61.615)	Top-5 acc 80.859 (82.206)	lr 0.01323
Train [58][2480/3239]	Time 0.210 (0.581)	Data Time 0.001 (0.015)	Loss 2.6795 (2.5922)	Entropy 1.10747 (1.10952)	Top-1 acc 59.766 (61.613)	Top-5 acc 82.422 (82.206)	lr 0.01323
Train [58][2490/3239]	Time 0.237 (0.581)	Data Time 0.001 (0.015)	Loss 2.6495 (2.5925)	Entropy 1.10748 (1.10951)	Top-1 acc 61.328 (61.604)	Top-5 acc 83.203 (82.202)	lr 0.01323
Train [58][2500/3239]	Time 0.335 (0.580)	Data Time 0.001 (0.015)	Loss 2.6792 (2.5926)	Entropy 1.10747 (1.10951)	Top-1 acc 58.594 (61.599)	Top-5 acc 80.859 (82.200)	lr 0.01323
Train [58][2510/3239]	Time 0.252 (0.580)	Data Time 0.001 (0.015)	Loss 2.9722 (2.5929)	Entropy 1.10748 (1.10950)	Top-1 acc 54.688 (61.597)	Top-5 acc 74.219 (82.192)	lr 0.01323
Train [58][2520/3239]	Time 0.262 (0.597)	Data Time 0.003 (0.015)	Loss 2.6356 (2.5928)	Entropy 1.10737 (1.10949)	Top-1 acc 60.547 (61.597)	Top-5 acc 81.250 (82.195)	lr 0.01323
Train [58][2530/3239]	Time 0.228 (0.597)	Data Time 0.002 (0.015)	Loss 2.7389 (2.5927)	Entropy 1.10733 (1.10948)	Top-1 acc 58.984 (61.603)	Top-5 acc 80.078 (82.197)	lr 0.01323
Train [58][2540/3239]	Time 2.508 (0.596)	Data Time 0.003 (0.015)	Loss 2.7350 (2.5929)	Entropy 1.10733 (1.10947)	Top-1 acc 55.469 (61.597)	Top-5 acc 80.469 (82.192)	lr 0.01322
Train [58][2550/3239]	Time 0.220 (0.595)	Data Time 0.001 (0.015)	Loss 2.6592 (2.5929)	Entropy 1.10726 (1.10947)	Top-1 acc 58.984 (61.594)	Top-5 acc 81.641 (82.193)	lr 0.01322
Train [58][2560/3239]	Time 0.177 (0.595)	Data Time 0.002 (0.015)	Loss 2.6100 (2.5928)	Entropy 1.10721 (1.10946)	Top-1 acc 59.375 (61.595)	Top-5 acc 82.031 (82.195)	lr 0.01322
Train [58][2570/3239]	Time 0.233 (0.594)	Data Time 0.001 (0.015)	Loss 2.4185 (2.5929)	Entropy 1.10722 (1.10945)	Top-1 acc 67.578 (61.592)	Top-5 acc 87.500 (82.195)	lr 0.01322
Train [58][2580/3239]	Time 0.204 (0.594)	Data Time 0.001 (0.015)	Loss 2.7103 (2.5930)	Entropy 1.10714 (1.10944)	Top-1 acc 55.078 (61.588)	Top-5 acc 81.641 (82.195)	lr 0.01322
Train [58][2590/3239]	Time 0.353 (0.593)	Data Time 0.001 (0.015)	Loss 2.4368 (2.5931)	Entropy 1.10705 (1.10943)	Top-1 acc 67.188 (61.588)	Top-5 acc 83.984 (82.192)	lr 0.01322
Train [58][2600/3239]	Time 0.237 (0.593)	Data Time 0.001 (0.015)	Loss 2.7672 (2.5931)	Entropy 1.10705 (1.10942)	Top-1 acc 57.422 (61.585)	Top-5 acc 80.078 (82.188)	lr 0.01322
Train [58][2610/3239]	Time 0.224 (0.592)	Data Time 0.001 (0.015)	Loss 2.5305 (2.5933)	Entropy 1.10698 (1.10941)	Top-1 acc 63.672 (61.582)	Top-5 acc 84.766 (82.186)	lr 0.01322
Train [58][2620/3239]	Time 0.254 (0.592)	Data Time 0.001 (0.015)	Loss 2.6031 (2.5932)	Entropy 1.10700 (1.10940)	Top-1 acc 60.156 (61.585)	Top-5 acc 81.250 (82.187)	lr 0.01322
Train [58][2630/3239]	Time 0.237 (0.591)	Data Time 0.001 (0.015)	Loss 2.6842 (2.5932)	Entropy 1.10698 (1.10939)	Top-1 acc 59.375 (61.586)	Top-5 acc 80.078 (82.187)	lr 0.01322
Train [58][2640/3239]	Time 0.232 (0.591)	Data Time 0.002 (0.015)	Loss 2.5125 (2.5934)	Entropy 1.10700 (1.10938)	Top-1 acc 64.453 (61.580)	Top-5 acc 85.156 (82.185)	lr 0.01321
Train [58][2650/3239]	Time 0.223 (0.591)	Data Time 0.001 (0.015)	Loss 2.5939 (2.5933)	Entropy 1.10697 (1.10937)	Top-1 acc 58.984 (61.584)	Top-5 acc 84.766 (82.189)	lr 0.01321
Train [58][2660/3239]	Time 0.208 (0.590)	Data Time 0.001 (0.014)	Loss 2.5979 (2.5931)	Entropy 1.10696 (1.10937)	Top-1 acc 61.328 (61.587)	Top-5 acc 82.422 (82.192)	lr 0.01321
Train [58][2670/3239]	Time 0.246 (0.589)	Data Time 0.001 (0.014)	Loss 2.7944 (2.5932)	Entropy 1.10702 (1.10936)	Top-1 acc 53.125 (61.582)	Top-5 acc 79.688 (82.191)	lr 0.01321
Train [58][2680/3239]	Time 0.217 (0.589)	Data Time 0.001 (0.014)	Loss 2.6254 (2.5934)	Entropy 1.10701 (1.10935)	Top-1 acc 57.812 (61.577)	Top-5 acc 80.859 (82.189)	lr 0.01321
Train [58][2690/3239]	Time 0.220 (0.588)	Data Time 0.001 (0.014)	Loss 2.6261 (2.5934)	Entropy 1.10701 (1.10934)	Top-1 acc 58.203 (61.576)	Top-5 acc 81.250 (82.189)	lr 0.01321
Train [58][2700/3239]	Time 0.212 (0.588)	Data Time 0.001 (0.014)	Loss 2.6334 (2.5936)	Entropy 1.10699 (1.10933)	Top-1 acc 59.375 (61.573)	Top-5 acc 83.203 (82.187)	lr 0.01321
Train [58][2710/3239]	Time 0.246 (0.588)	Data Time 0.001 (0.014)	Loss 2.4615 (2.5934)	Entropy 1.10700 (1.10932)	Top-1 acc 60.938 (61.572)	Top-5 acc 85.156 (82.192)	lr 0.01321
Train [58][2720/3239]	Time 0.228 (0.587)	Data Time 0.001 (0.014)	Loss 2.7038 (2.5935)	Entropy 1.10702 (1.10931)	Top-1 acc 58.984 (61.577)	Top-5 acc 80.078 (82.192)	lr 0.01321
Train [58][2730/3239]	Time 0.224 (0.587)	Data Time 0.001 (0.014)	Loss 2.6489 (2.5937)	Entropy 1.10694 (1.10931)	Top-1 acc 57.031 (61.564)	Top-5 acc 81.641 (82.188)	lr 0.01321
Train [58][2740/3239]	Time 0.272 (0.586)	Data Time 0.001 (0.014)	Loss 2.6903 (2.5938)	Entropy 1.10693 (1.10930)	Top-1 acc 60.938 (61.564)	Top-5 acc 79.297 (82.186)	lr 0.01320
Train [58][2750/3239]	Time 0.227 (0.586)	Data Time 0.002 (0.014)	Loss 2.6142 (2.5938)	Entropy 1.10705 (1.10929)	Top-1 acc 60.547 (61.559)	Top-5 acc 78.906 (82.185)	lr 0.01320
Train [58][2760/3239]	Time 0.223 (0.585)	Data Time 0.001 (0.014)	Loss 2.5952 (2.5940)	Entropy 1.10698 (1.10928)	Top-1 acc 60.547 (61.553)	Top-5 acc 78.906 (82.179)	lr 0.01320
Train [58][2770/3239]	Time 0.217 (0.585)	Data Time 0.001 (0.014)	Loss 2.6659 (2.5941)	Entropy 1.10703 (1.10927)	Top-1 acc 56.641 (61.549)	Top-5 acc 81.250 (82.177)	lr 0.01320
Train [58][2780/3239]	Time 0.329 (0.585)	Data Time 0.001 (0.014)	Loss 2.5604 (2.5942)	Entropy 1.10699 (1.10926)	Top-1 acc 60.938 (61.545)	Top-5 acc 81.250 (82.173)	lr 0.01320
Train [58][2790/3239]	Time 0.195 (0.584)	Data Time 0.001 (0.014)	Loss 2.5465 (2.5944)	Entropy 1.10697 (1.10926)	Top-1 acc 60.547 (61.543)	Top-5 acc 84.375 (82.171)	lr 0.01320
Train [58][2800/3239]	Time 0.214 (0.584)	Data Time 0.001 (0.014)	Loss 3.8666 (2.5949)	Entropy 1.10698 (1.10925)	Top-1 acc 36.719 (61.530)	Top-5 acc 65.625 (82.164)	lr 0.01320
Train [58][2810/3239]	Time 0.254 (0.583)	Data Time 0.001 (0.014)	Loss 2.7432 (2.5947)	Entropy 1.10696 (1.10924)	Top-1 acc 57.422 (61.535)	Top-5 acc 79.688 (82.169)	lr 0.01320
Train [58][2820/3239]	Time 0.225 (0.583)	Data Time 0.001 (0.014)	Loss 2.6891 (2.5947)	Entropy 1.10696 (1.10923)	Top-1 acc 55.078 (61.530)	Top-5 acc 82.031 (82.168)	lr 0.01320
Train [58][2830/3239]	Time 0.240 (0.583)	Data Time 0.001 (0.014)	Loss 2.6981 (2.5946)	Entropy 1.10692 (1.10922)	Top-1 acc 59.766 (61.534)	Top-5 acc 81.250 (82.171)	lr 0.01320
Train [58][2840/3239]	Time 0.259 (0.582)	Data Time 0.001 (0.014)	Loss 2.6276 (2.5943)	Entropy 1.10692 (1.10921)	Top-1 acc 58.984 (61.540)	Top-5 acc 85.156 (82.176)	lr 0.01319
Train [58][2850/3239]	Time 0.283 (0.582)	Data Time 0.001 (0.014)	Loss 2.4988 (2.5943)	Entropy 1.10687 (1.10921)	Top-1 acc 60.938 (61.540)	Top-5 acc 84.375 (82.178)	lr 0.01319
Train [58][2860/3239]	Time 0.281 (0.599)	Data Time 0.003 (0.014)	Loss 2.4120 (2.5943)	Entropy 1.10688 (1.10920)	Top-1 acc 66.016 (61.538)	Top-5 acc 85.156 (82.178)	lr 0.01319
Train [58][2870/3239]	Time 0.330 (0.599)	Data Time 0.002 (0.014)	Loss 2.6081 (2.5943)	Entropy 1.10687 (1.10919)	Top-1 acc 60.547 (61.541)	Top-5 acc 82.812 (82.181)	lr 0.01319
Train [58][2880/3239]	Time 0.235 (0.598)	Data Time 0.001 (0.014)	Loss 2.7486 (2.5944)	Entropy 1.10687 (1.10918)	Top-1 acc 58.984 (61.538)	Top-5 acc 80.078 (82.178)	lr 0.01319
Train [58][2890/3239]	Time 0.263 (0.598)	Data Time 0.001 (0.013)	Loss 2.4847 (2.5942)	Entropy 1.10686 (1.10917)	Top-1 acc 64.062 (61.540)	Top-5 acc 84.766 (82.180)	lr 0.01319
Train [58][2900/3239]	Time 0.235 (0.597)	Data Time 0.002 (0.013)	Loss 2.5168 (2.5942)	Entropy 1.10685 (1.10917)	Top-1 acc 64.844 (61.545)	Top-5 acc 83.203 (82.182)	lr 0.01319
Train [58][2910/3239]	Time 0.232 (0.597)	Data Time 0.001 (0.013)	Loss 2.5573 (2.5940)	Entropy 1.10676 (1.10916)	Top-1 acc 62.109 (61.548)	Top-5 acc 83.984 (82.185)	lr 0.01319
Train [58][2920/3239]	Time 0.222 (0.596)	Data Time 0.002 (0.013)	Loss 2.6332 (2.5940)	Entropy 1.10670 (1.10915)	Top-1 acc 63.672 (61.549)	Top-5 acc 83.984 (82.185)	lr 0.01319
Train [58][2930/3239]	Time 0.270 (0.596)	Data Time 0.001 (0.013)	Loss 2.6076 (2.5939)	Entropy 1.10670 (1.10914)	Top-1 acc 62.500 (61.551)	Top-5 acc 80.859 (82.188)	lr 0.01319
Train [58][2940/3239]	Time 0.225 (0.596)	Data Time 0.001 (0.013)	Loss 2.6119 (2.5938)	Entropy 1.10673 (1.10913)	Top-1 acc 61.328 (61.549)	Top-5 acc 80.859 (82.187)	lr 0.01318
Train [58][2950/3239]	Time 0.277 (0.595)	Data Time 0.001 (0.013)	Loss 2.5800 (2.5938)	Entropy 1.10665 (1.10913)	Top-1 acc 62.500 (61.553)	Top-5 acc 82.031 (82.188)	lr 0.01318
Train [58][2960/3239]	Time 0.265 (0.595)	Data Time 0.001 (0.013)	Loss 2.5402 (2.5936)	Entropy 1.10663 (1.10912)	Top-1 acc 64.844 (61.560)	Top-5 acc 83.203 (82.191)	lr 0.01318
Train [58][2970/3239]	Time 0.356 (0.594)	Data Time 0.001 (0.013)	Loss 2.5044 (2.5934)	Entropy 1.10658 (1.10911)	Top-1 acc 66.016 (61.566)	Top-5 acc 81.641 (82.193)	lr 0.01318
Train [58][2980/3239]	Time 0.256 (0.594)	Data Time 0.002 (0.013)	Loss 2.3520 (2.5934)	Entropy 1.10655 (1.10910)	Top-1 acc 67.578 (61.568)	Top-5 acc 87.109 (82.195)	lr 0.01318
Train [58][2990/3239]	Time 0.223 (0.594)	Data Time 0.001 (0.013)	Loss 2.5435 (2.5936)	Entropy 1.10650 (1.10909)	Top-1 acc 63.672 (61.566)	Top-5 acc 84.375 (82.193)	lr 0.01318
Train [58][3000/3239]	Time 0.219 (0.593)	Data Time 0.001 (0.013)	Loss 2.7528 (2.5937)	Entropy 1.10644 (1.10908)	Top-1 acc 56.641 (61.563)	Top-5 acc 79.688 (82.190)	lr 0.01318
Train [58][3010/3239]	Time 0.224 (0.593)	Data Time 0.002 (0.013)	Loss 2.6794 (2.5937)	Entropy 1.10643 (1.10907)	Top-1 acc 55.078 (61.559)	Top-5 acc 81.250 (82.192)	lr 0.01318
Train [58][3020/3239]	Time 0.250 (0.592)	Data Time 0.001 (0.013)	Loss 2.5930 (2.5938)	Entropy 1.10647 (1.10907)	Top-1 acc 61.328 (61.556)	Top-5 acc 81.641 (82.191)	lr 0.01318
Train [58][3030/3239]	Time 0.262 (0.592)	Data Time 0.001 (0.013)	Loss 2.5661 (2.5938)	Entropy 1.10642 (1.10906)	Top-1 acc 62.109 (61.552)	Top-5 acc 82.031 (82.192)	lr 0.01318
Train [58][3040/3239]	Time 0.255 (0.592)	Data Time 0.001 (0.013)	Loss 2.8101 (2.5939)	Entropy 1.10626 (1.10905)	Top-1 acc 60.547 (61.550)	Top-5 acc 77.344 (82.188)	lr 0.01317
Train [58][3050/3239]	Time 0.248 (0.591)	Data Time 0.002 (0.013)	Loss 2.5782 (2.5943)	Entropy 1.10626 (1.10904)	Top-1 acc 63.281 (61.539)	Top-5 acc 82.812 (82.180)	lr 0.01317
Train [58][3060/3239]	Time 0.322 (0.591)	Data Time 0.001 (0.013)	Loss 2.5952 (2.5944)	Entropy 1.10622 (1.10903)	Top-1 acc 57.422 (61.539)	Top-5 acc 82.812 (82.181)	lr 0.01317
Train [58][3070/3239]	Time 0.218 (0.590)	Data Time 0.001 (0.013)	Loss 2.4209 (2.5943)	Entropy 1.10618 (1.10902)	Top-1 acc 60.938 (61.536)	Top-5 acc 84.766 (82.182)	lr 0.01317
Train [58][3080/3239]	Time 0.234 (0.590)	Data Time 0.001 (0.013)	Loss 2.5689 (2.5944)	Entropy 1.10612 (1.10901)	Top-1 acc 63.672 (61.538)	Top-5 acc 83.984 (82.179)	lr 0.01317
Train [58][3090/3239]	Time 0.211 (0.590)	Data Time 0.001 (0.013)	Loss 2.7291 (2.5945)	Entropy 1.10611 (1.10900)	Top-1 acc 57.422 (61.539)	Top-5 acc 81.250 (82.178)	lr 0.01317
Train [58][3100/3239]	Time 0.241 (0.589)	Data Time 0.001 (0.013)	Loss 2.6108 (2.5945)	Entropy 1.10607 (1.10899)	Top-1 acc 61.328 (61.540)	Top-5 acc 81.250 (82.176)	lr 0.01317
Train [58][3110/3239]	Time 0.236 (0.589)	Data Time 0.002 (0.013)	Loss 2.5108 (2.5947)	Entropy 1.10605 (1.10898)	Top-1 acc 64.453 (61.537)	Top-5 acc 83.203 (82.174)	lr 0.01317
Train [58][3120/3239]	Time 0.221 (0.588)	Data Time 0.001 (0.013)	Loss 2.7339 (2.5947)	Entropy 1.10608 (1.10897)	Top-1 acc 58.594 (61.535)	Top-5 acc 78.906 (82.175)	lr 0.01317
Train [58][3130/3239]	Time 0.219 (0.588)	Data Time 0.001 (0.013)	Loss 2.7145 (2.5947)	Entropy 1.10603 (1.10896)	Top-1 acc 62.500 (61.533)	Top-5 acc 77.734 (82.170)	lr 0.01317
Train [58][3140/3239]	Time 0.231 (0.588)	Data Time 0.001 (0.013)	Loss 2.5483 (2.5949)	Entropy 1.10604 (1.10896)	Top-1 acc 66.797 (61.532)	Top-5 acc 82.812 (82.167)	lr 0.01316
Train [58][3150/3239]	Time 0.356 (0.587)	Data Time 0.001 (0.012)	Loss 2.7317 (2.5952)	Entropy 1.10604 (1.10895)	Top-1 acc 60.547 (61.524)	Top-5 acc 78.125 (82.158)	lr 0.01316
Train [58][3160/3239]	Time 0.207 (0.587)	Data Time 0.001 (0.012)	Loss 2.7140 (2.5954)	Entropy 1.10605 (1.10894)	Top-1 acc 56.250 (61.520)	Top-5 acc 80.078 (82.155)	lr 0.01316
Train [58][3170/3239]	Time 0.223 (0.587)	Data Time 0.001 (0.012)	Loss 2.6036 (2.5953)	Entropy 1.10606 (1.10893)	Top-1 acc 60.547 (61.523)	Top-5 acc 81.641 (82.158)	lr 0.01316
Train [58][3180/3239]	Time 0.217 (0.586)	Data Time 0.000 (0.012)	Loss 2.4780 (2.5953)	Entropy 1.10598 (1.10892)	Top-1 acc 63.281 (61.525)	Top-5 acc 87.109 (82.158)	lr 0.01316
Train [58][3190/3239]	Time 0.306 (0.601)	Data Time 0.000 (0.012)	Loss 2.4757 (2.5953)	Entropy 1.10598 (1.10891)	Top-1 acc 63.672 (61.528)	Top-5 acc 86.328 (82.159)	lr 0.01316
Train [58][3200/3239]	Time 0.238 (0.601)	Data Time 0.000 (0.012)	Loss 2.5776 (2.5953)	Entropy 1.10595 (1.10890)	Top-1 acc 60.156 (61.527)	Top-5 acc 83.984 (82.158)	lr 0.01316
Train [58][3210/3239]	Time 0.227 (0.600)	Data Time 0.000 (0.012)	Loss 2.4894 (2.5953)	Entropy 1.10595 (1.10889)	Top-1 acc 63.672 (61.528)	Top-5 acc 82.812 (82.156)	lr 0.01316
Train [58][3220/3239]	Time 0.237 (0.600)	Data Time 0.000 (0.012)	Loss 2.7029 (2.5954)	Entropy 1.10591 (1.10888)	Top-1 acc 57.422 (61.526)	Top-5 acc 79.688 (82.155)	lr 0.01316
Train [58][3230/3239]	Time 0.254 (0.599)	Data Time 0.000 (0.012)	Loss 2.7287 (2.5958)	Entropy 1.10593 (1.10887)	Top-1 acc 58.594 (61.518)	Top-5 acc 84.375 (82.149)	lr 0.01316
Train [58][3239/3239]	Time 2.211 (0.599)	Data Time 0.000 (0.012)	Loss 2.7832 (2.5957)	Entropy 1.10593 (1.10886)	Top-1 acc 55.556 (61.519)	Top-5 acc 76.543 (82.151)	lr 0.01315
==========Valid [58/120]	loss 1.468	top-1 acc 66.772 (66.772)	top-5 acc 86.409	Train top-1 61.519	top-5 82.151	Entropy 1.10593	Latency-None: 0.000ms	Flops: 548.34M
Train [59][0/3239]	Time 37.911 (37.911)	Data Time 36.441 (36.441)	Loss 2.5211 (2.5211)	Entropy 1.10589 (1.10589)	Top-1 acc 63.672 (63.672)	Top-5 acc 86.328 (86.328)	lr 0.01315
Train [59][10/3239]	Time 2.665 (4.015)	Data Time 0.002 (3.315)	Loss 2.6577 (2.5582)	Entropy 1.10589 (1.10589)	Top-1 acc 59.766 (62.180)	Top-5 acc 78.516 (83.487)	lr 0.01315
Train [59][20/3239]	Time 0.266 (2.223)	Data Time 0.001 (1.737)	Loss 2.6251 (2.5695)	Entropy 1.10581 (1.10585)	Top-1 acc 60.547 (62.314)	Top-5 acc 81.641 (83.147)	lr 0.01315
Train [59][30/3239]	Time 0.229 (1.663)	Data Time 0.001 (1.177)	Loss 2.4353 (2.5713)	Entropy 1.10580 (1.10583)	Top-1 acc 62.891 (62.198)	Top-5 acc 87.109 (82.989)	lr 0.01315
Train [59][40/3239]	Time 0.244 (1.388)	Data Time 0.002 (0.890)	Loss 2.6288 (2.5733)	Entropy 1.10579 (1.10582)	Top-1 acc 61.328 (62.414)	Top-5 acc 80.078 (82.793)	lr 0.01315
Train [59][50/3239]	Time 0.346 (1.208)	Data Time 0.001 (0.716)	Loss 2.4714 (2.5629)	Entropy 1.10579 (1.10582)	Top-1 acc 64.844 (62.661)	Top-5 acc 85.938 (82.927)	lr 0.01315
Train [59][60/3239]	Time 0.222 (1.086)	Data Time 0.001 (0.599)	Loss 2.5205 (2.5621)	Entropy 1.10575 (1.10581)	Top-1 acc 64.062 (62.468)	Top-5 acc 85.156 (82.832)	lr 0.01315
Train [59][70/3239]	Time 0.215 (1.003)	Data Time 0.001 (0.515)	Loss 2.4257 (2.5570)	Entropy 1.10572 (1.10580)	Top-1 acc 67.188 (62.693)	Top-5 acc 83.984 (82.851)	lr 0.01315
Train [59][80/3239]	Time 0.215 (0.937)	Data Time 0.001 (0.452)	Loss 2.6871 (2.5531)	Entropy 1.10570 (1.10579)	Top-1 acc 59.375 (62.727)	Top-5 acc 79.688 (82.981)	lr 0.01315
Train [59][90/3239]	Time 0.223 (0.885)	Data Time 0.001 (0.402)	Loss 2.6466 (2.5494)	Entropy 1.10570 (1.10578)	Top-1 acc 58.984 (62.745)	Top-5 acc 84.766 (83.104)	lr 0.01315
Train [59][100/3239]	Time 0.326 (0.847)	Data Time 0.001 (0.363)	Loss 2.4406 (2.5471)	Entropy 1.10568 (1.10577)	Top-1 acc 64.844 (62.829)	Top-5 acc 83.203 (83.103)	lr 0.01314
Train [59][110/3239]	Time 0.234 (0.813)	Data Time 0.001 (0.330)	Loss 2.5800 (2.5440)	Entropy 1.10569 (1.10576)	Top-1 acc 58.984 (62.898)	Top-5 acc 83.984 (83.143)	lr 0.01314
Train [59][120/3239]	Time 2.593 (0.785)	Data Time 0.001 (0.303)	Loss 2.4078 (2.5483)	Entropy 1.10569 (1.10576)	Top-1 acc 66.406 (62.791)	Top-5 acc 85.156 (83.064)	lr 0.01314
Train [59][130/3239]	Time 0.211 (0.743)	Data Time 0.001 (0.280)	Loss 2.6538 (2.5471)	Entropy 1.10567 (1.10575)	Top-1 acc 60.156 (62.852)	Top-5 acc 79.688 (83.033)	lr 0.01314
Train [59][140/3239]	Time 0.228 (0.723)	Data Time 0.001 (0.260)	Loss 2.6004 (2.5492)	Entropy 1.10565 (1.10574)	Top-1 acc 59.766 (62.785)	Top-5 acc 79.688 (82.959)	lr 0.01314
Train [59][150/3239]	Time 0.306 (0.706)	Data Time 0.001 (0.243)	Loss 2.4767 (2.5490)	Entropy 1.10565 (1.10574)	Top-1 acc 64.844 (62.735)	Top-5 acc 83.203 (82.988)	lr 0.01314
Train [59][160/3239]	Time 0.241 (0.692)	Data Time 0.001 (0.228)	Loss 2.5269 (2.5500)	Entropy 1.10559 (1.10573)	Top-1 acc 63.672 (62.721)	Top-5 acc 83.594 (82.985)	lr 0.01314
Train [59][170/3239]	Time 0.237 (0.679)	Data Time 0.001 (0.215)	Loss 2.5640 (2.5529)	Entropy 1.10549 (1.10572)	Top-1 acc 64.844 (62.692)	Top-5 acc 82.031 (82.927)	lr 0.01314
Train [59][180/3239]	Time 0.231 (0.668)	Data Time 0.001 (0.203)	Loss 2.5583 (2.5543)	Entropy 1.10545 (1.10570)	Top-1 acc 62.891 (62.666)	Top-5 acc 85.156 (82.933)	lr 0.01314
Train [59][190/3239]	Time 0.213 (0.658)	Data Time 0.001 (0.192)	Loss 2.4985 (2.5542)	Entropy 1.10545 (1.10569)	Top-1 acc 64.453 (62.666)	Top-5 acc 82.031 (82.919)	lr 0.01314
Train [59][200/3239]	Time 0.240 (0.649)	Data Time 0.001 (0.183)	Loss 2.5840 (2.5557)	Entropy 1.10540 (1.10568)	Top-1 acc 63.281 (62.624)	Top-5 acc 83.984 (82.906)	lr 0.01313
Train [59][210/3239]	Time 0.221 (0.641)	Data Time 0.001 (0.174)	Loss 2.5170 (2.5582)	Entropy 1.10539 (1.10566)	Top-1 acc 64.844 (62.635)	Top-5 acc 84.766 (82.855)	lr 0.01313
Train [59][220/3239]	Time 0.237 (0.633)	Data Time 0.001 (0.166)	Loss 2.4641 (2.5570)	Entropy 1.10533 (1.10565)	Top-1 acc 66.797 (62.668)	Top-5 acc 83.984 (82.899)	lr 0.01313
Train [59][230/3239]	Time 2.450 (0.625)	Data Time 0.002 (0.159)	Loss 2.3354 (2.5563)	Entropy 1.10533 (1.10564)	Top-1 acc 63.281 (62.691)	Top-5 acc 86.328 (82.887)	lr 0.01313
Train [59][240/3239]	Time 0.354 (0.610)	Data Time 0.001 (0.153)	Loss 2.6255 (2.5559)	Entropy 1.10527 (1.10562)	Top-1 acc 66.797 (62.714)	Top-5 acc 80.469 (82.855)	lr 0.01313
Train [59][250/3239]	Time 0.242 (0.604)	Data Time 0.001 (0.147)	Loss 2.7302 (2.5571)	Entropy 1.10526 (1.10561)	Top-1 acc 61.719 (62.682)	Top-5 acc 78.125 (82.837)	lr 0.01313
Train [59][260/3239]	Time 0.231 (0.598)	Data Time 0.001 (0.141)	Loss 2.7441 (2.5570)	Entropy 1.10519 (1.10559)	Top-1 acc 57.812 (62.672)	Top-5 acc 78.516 (82.832)	lr 0.01313
Train [59][270/3239]	Time 0.223 (0.593)	Data Time 0.001 (0.136)	Loss 2.5439 (2.5574)	Entropy 1.10515 (1.10558)	Top-1 acc 61.719 (62.659)	Top-5 acc 82.031 (82.810)	lr 0.01313
Train [59][280/3239]	Time 0.279 (0.589)	Data Time 0.001 (0.131)	Loss 2.3788 (2.5564)	Entropy 1.10513 (1.10556)	Top-1 acc 68.359 (62.688)	Top-5 acc 87.500 (82.840)	lr 0.01313
Train [59][290/3239]	Time 0.216 (0.584)	Data Time 0.001 (0.127)	Loss 2.6118 (2.5574)	Entropy 1.10513 (1.10555)	Top-1 acc 59.766 (62.656)	Top-5 acc 82.812 (82.838)	lr 0.01312
Train [59][300/3239]	Time 0.220 (0.580)	Data Time 0.001 (0.123)	Loss 2.5365 (2.5613)	Entropy 1.10511 (1.10553)	Top-1 acc 61.719 (62.596)	Top-5 acc 82.031 (82.793)	lr 0.01312
Train [59][310/3239]	Time 0.218 (0.744)	Data Time 0.002 (0.119)	Loss 2.7948 (2.5611)	Entropy 1.10510 (1.10552)	Top-1 acc 57.812 (62.603)	Top-5 acc 78.125 (82.777)	lr 0.01312
Train [59][320/3239]	Time 0.237 (0.736)	Data Time 0.002 (0.115)	Loss 2.5685 (2.5615)	Entropy 1.10511 (1.10551)	Top-1 acc 64.062 (62.601)	Top-5 acc 83.984 (82.767)	lr 0.01312
Train [59][330/3239]	Time 0.228 (0.728)	Data Time 0.001 (0.112)	Loss 2.7766 (2.5618)	Entropy 1.10508 (1.10549)	Top-1 acc 57.812 (62.563)	Top-5 acc 78.516 (82.755)	lr 0.01312
Train [59][340/3239]	Time 2.422 (0.720)	Data Time 0.001 (0.109)	Loss 2.5077 (2.5616)	Entropy 1.10508 (1.10548)	Top-1 acc 66.406 (62.568)	Top-5 acc 82.812 (82.771)	lr 0.01312
Train [59][350/3239]	Time 0.217 (0.707)	Data Time 0.001 (0.105)	Loss 2.5008 (2.5603)	Entropy 1.10507 (1.10547)	Top-1 acc 65.234 (62.586)	Top-5 acc 83.984 (82.790)	lr 0.01312
Train [59][360/3239]	Time 0.274 (0.700)	Data Time 0.001 (0.103)	Loss 2.6059 (2.5614)	Entropy 1.10518 (1.10546)	Top-1 acc 57.422 (62.536)	Top-5 acc 83.203 (82.766)	lr 0.01312
Train [59][370/3239]	Time 0.225 (0.694)	Data Time 0.001 (0.100)	Loss 2.6365 (2.5611)	Entropy 1.10516 (1.10545)	Top-1 acc 58.594 (62.558)	Top-5 acc 82.812 (82.782)	lr 0.01312
Train [59][380/3239]	Time 0.234 (0.687)	Data Time 0.001 (0.097)	Loss 2.4959 (2.5610)	Entropy 1.10508 (1.10544)	Top-1 acc 62.500 (62.530)	Top-5 acc 83.594 (82.800)	lr 0.01312
Train [59][390/3239]	Time 0.218 (0.682)	Data Time 0.001 (0.095)	Loss 2.6936 (2.5607)	Entropy 1.10506 (1.10543)	Top-1 acc 58.594 (62.534)	Top-5 acc 78.516 (82.795)	lr 0.01311
Train [59][400/3239]	Time 0.220 (0.676)	Data Time 0.001 (0.093)	Loss 2.6317 (2.5614)	Entropy 1.10500 (1.10542)	Top-1 acc 60.156 (62.514)	Top-5 acc 81.250 (82.759)	lr 0.01311
Train [59][410/3239]	Time 0.225 (0.671)	Data Time 0.001 (0.090)	Loss 2.4047 (2.5622)	Entropy 1.10501 (1.10541)	Top-1 acc 65.234 (62.482)	Top-5 acc 86.719 (82.742)	lr 0.01311
Train [59][420/3239]	Time 0.239 (0.666)	Data Time 0.001 (0.088)	Loss 2.5464 (2.5630)	Entropy 1.10499 (1.10540)	Top-1 acc 62.109 (62.468)	Top-5 acc 83.984 (82.714)	lr 0.01311
Train [59][430/3239]	Time 0.325 (0.661)	Data Time 0.001 (0.086)	Loss 2.4684 (2.5632)	Entropy 1.10495 (1.10539)	Top-1 acc 63.281 (62.440)	Top-5 acc 83.984 (82.687)	lr 0.01311
Train [59][440/3239]	Time 0.222 (0.656)	Data Time 0.001 (0.084)	Loss 2.8318 (2.5648)	Entropy 1.10490 (1.10538)	Top-1 acc 51.953 (62.377)	Top-5 acc 80.859 (82.668)	lr 0.01311
Train [59][450/3239]	Time 2.612 (0.652)	Data Time 0.002 (0.082)	Loss 2.4316 (2.5663)	Entropy 1.10490 (1.10537)	Top-1 acc 66.797 (62.344)	Top-5 acc 85.547 (82.651)	lr 0.01311
Train [59][460/3239]	Time 0.231 (0.643)	Data Time 0.001 (0.081)	Loss 2.4649 (2.5660)	Entropy 1.10487 (1.10536)	Top-1 acc 65.234 (62.350)	Top-5 acc 82.812 (82.661)	lr 0.01311
Train [59][470/3239]	Time 0.266 (0.639)	Data Time 0.001 (0.079)	Loss 2.4920 (2.5686)	Entropy 1.10484 (1.10535)	Top-1 acc 67.578 (62.293)	Top-5 acc 82.422 (82.618)	lr 0.01311
Train [59][480/3239]	Time 0.234 (0.636)	Data Time 0.001 (0.077)	Loss 2.6183 (2.5703)	Entropy 1.10484 (1.10534)	Top-1 acc 62.109 (62.248)	Top-5 acc 78.906 (82.586)	lr 0.01311
Train [59][490/3239]	Time 0.218 (0.632)	Data Time 0.001 (0.076)	Loss 2.6113 (2.5699)	Entropy 1.10482 (1.10533)	Top-1 acc 62.109 (62.256)	Top-5 acc 82.422 (82.600)	lr 0.01310
Train [59][500/3239]	Time 0.240 (0.629)	Data Time 0.001 (0.074)	Loss 2.5635 (2.5709)	Entropy 1.10480 (1.10532)	Top-1 acc 65.625 (62.234)	Top-5 acc 82.812 (82.575)	lr 0.01310
Train [59][510/3239]	Time 0.258 (0.625)	Data Time 0.001 (0.073)	Loss 2.6427 (2.5718)	Entropy 1.10481 (1.10531)	Top-1 acc 60.156 (62.206)	Top-5 acc 81.641 (82.567)	lr 0.01310
Train [59][520/3239]	Time 0.331 (0.623)	Data Time 0.002 (0.072)	Loss 2.5687 (2.5718)	Entropy 1.10479 (1.10530)	Top-1 acc 61.328 (62.204)	Top-5 acc 83.203 (82.581)	lr 0.01310
Train [59][530/3239]	Time 0.218 (0.620)	Data Time 0.001 (0.070)	Loss 2.4721 (2.5711)	Entropy 1.10477 (1.10529)	Top-1 acc 65.625 (62.235)	Top-5 acc 85.938 (82.606)	lr 0.01310
Train [59][540/3239]	Time 0.215 (0.617)	Data Time 0.001 (0.069)	Loss 2.5841 (2.5714)	Entropy 1.10476 (1.10528)	Top-1 acc 60.547 (62.231)	Top-5 acc 83.984 (82.597)	lr 0.01310
Train [59][550/3239]	Time 0.267 (0.614)	Data Time 0.001 (0.068)	Loss 2.5112 (2.5706)	Entropy 1.10467 (1.10527)	Top-1 acc 59.766 (62.240)	Top-5 acc 83.203 (82.614)	lr 0.01310
Train [59][560/3239]	Time 2.391 (0.611)	Data Time 0.001 (0.067)	Loss 2.5537 (2.5707)	Entropy 1.10467 (1.10526)	Top-1 acc 62.891 (62.256)	Top-5 acc 84.766 (82.620)	lr 0.01310
Train [59][570/3239]	Time 0.331 (0.605)	Data Time 0.001 (0.065)	Loss 2.5975 (2.5707)	Entropy 1.10452 (1.10525)	Top-1 acc 58.594 (62.269)	Top-5 acc 82.812 (82.620)	lr 0.01310
Train [59][580/3239]	Time 0.253 (0.602)	Data Time 0.001 (0.064)	Loss 2.6932 (2.5707)	Entropy 1.10450 (1.10524)	Top-1 acc 62.500 (62.277)	Top-5 acc 78.906 (82.621)	lr 0.01310
Train [59][590/3239]	Time 0.227 (0.600)	Data Time 0.001 (0.063)	Loss 2.6383 (2.5708)	Entropy 1.10448 (1.10522)	Top-1 acc 59.766 (62.266)	Top-5 acc 83.203 (82.610)	lr 0.01309
Train [59][600/3239]	Time 0.211 (0.598)	Data Time 0.001 (0.062)	Loss 2.5855 (2.5707)	Entropy 1.10448 (1.10521)	Top-1 acc 57.812 (62.247)	Top-5 acc 82.812 (82.612)	lr 0.01309
Train [59][610/3239]	Time 0.160 (0.596)	Data Time 0.001 (0.061)	Loss 2.6132 (2.5720)	Entropy 1.10451 (1.10520)	Top-1 acc 58.203 (62.206)	Top-5 acc 81.641 (82.587)	lr 0.01309
Train [59][620/3239]	Time 0.213 (0.594)	Data Time 0.001 (0.060)	Loss 2.7436 (2.5714)	Entropy 1.10442 (1.10519)	Top-1 acc 57.422 (62.227)	Top-5 acc 79.688 (82.602)	lr 0.01309
Train [59][630/3239]	Time 0.225 (0.591)	Data Time 0.001 (0.059)	Loss 2.6750 (2.5719)	Entropy 1.10437 (1.10517)	Top-1 acc 56.641 (62.208)	Top-5 acc 79.297 (82.589)	lr 0.01309
Train [59][640/3239]	Time 0.227 (0.589)	Data Time 0.001 (0.058)	Loss 2.5948 (2.5713)	Entropy 1.10433 (1.10516)	Top-1 acc 60.938 (62.223)	Top-5 acc 81.250 (82.586)	lr 0.01309
Train [59][650/3239]	Time 0.222 (0.587)	Data Time 0.001 (0.058)	Loss 2.5996 (2.5714)	Entropy 1.10427 (1.10515)	Top-1 acc 58.984 (62.231)	Top-5 acc 81.641 (82.584)	lr 0.01309
Train [59][660/3239]	Time 0.359 (0.585)	Data Time 0.001 (0.057)	Loss 2.5336 (2.5707)	Entropy 1.10424 (1.10513)	Top-1 acc 58.984 (62.238)	Top-5 acc 82.422 (82.599)	lr 0.01309
Train [59][670/3239]	Time 54.237 (0.661)	Data Time 0.001 (0.056)	Loss 2.5792 (2.5711)	Entropy 1.10424 (1.10512)	Top-1 acc 60.938 (62.219)	Top-5 acc 83.203 (82.594)	lr 0.01309
Train [59][680/3239]	Time 0.228 (0.655)	Data Time 0.003 (0.055)	Loss 2.6542 (2.5705)	Entropy 1.10427 (1.10511)	Top-1 acc 62.891 (62.234)	Top-5 acc 79.688 (82.597)	lr 0.01309
Train [59][690/3239]	Time 0.222 (0.652)	Data Time 0.002 (0.054)	Loss 2.6587 (2.5706)	Entropy 1.10420 (1.10510)	Top-1 acc 59.766 (62.226)	Top-5 acc 79.297 (82.589)	lr 0.01308
Train [59][700/3239]	Time 0.249 (0.649)	Data Time 0.002 (0.054)	Loss 2.5950 (2.5703)	Entropy 1.10419 (1.10508)	Top-1 acc 58.594 (62.236)	Top-5 acc 80.859 (82.589)	lr 0.01308
Train [59][710/3239]	Time 0.338 (0.647)	Data Time 0.002 (0.053)	Loss 2.4637 (2.5702)	Entropy 1.10416 (1.10507)	Top-1 acc 64.453 (62.240)	Top-5 acc 82.812 (82.598)	lr 0.01308
Train [59][720/3239]	Time 0.242 (0.644)	Data Time 0.001 (0.052)	Loss 2.8291 (2.5702)	Entropy 1.10412 (1.10506)	Top-1 acc 54.688 (62.252)	Top-5 acc 79.297 (82.598)	lr 0.01308
Train [59][730/3239]	Time 0.223 (0.642)	Data Time 0.001 (0.051)	Loss 2.4447 (2.5712)	Entropy 1.10411 (1.10504)	Top-1 acc 65.625 (62.241)	Top-5 acc 85.938 (82.593)	lr 0.01308
Train [59][740/3239]	Time 0.236 (0.639)	Data Time 0.001 (0.051)	Loss 2.5982 (2.5708)	Entropy 1.10407 (1.10503)	Top-1 acc 58.984 (62.243)	Top-5 acc 81.250 (82.600)	lr 0.01308
Train [59][750/3239]	Time 0.222 (0.637)	Data Time 0.001 (0.050)	Loss 2.6526 (2.5705)	Entropy 1.10408 (1.10502)	Top-1 acc 57.422 (62.244)	Top-5 acc 82.422 (82.594)	lr 0.01308
Train [59][760/3239]	Time 0.205 (0.635)	Data Time 0.001 (0.050)	Loss 2.3996 (2.5701)	Entropy 1.10404 (1.10501)	Top-1 acc 67.969 (62.237)	Top-5 acc 84.375 (82.601)	lr 0.01308
Train [59][770/3239]	Time 0.230 (0.633)	Data Time 0.002 (0.049)	Loss 2.6153 (2.5705)	Entropy 1.10403 (1.10499)	Top-1 acc 60.938 (62.217)	Top-5 acc 80.078 (82.591)	lr 0.01308
Train [59][780/3239]	Time 2.450 (0.630)	Data Time 0.002 (0.048)	Loss 2.4910 (2.5698)	Entropy 1.10403 (1.10498)	Top-1 acc 63.281 (62.243)	Top-5 acc 83.594 (82.607)	lr 0.01308
Train [59][790/3239]	Time 0.254 (0.625)	Data Time 0.001 (0.048)	Loss 2.5100 (2.5702)	Entropy 1.10403 (1.10497)	Top-1 acc 64.062 (62.221)	Top-5 acc 83.594 (82.605)	lr 0.01307
Train [59][800/3239]	Time 0.245 (0.623)	Data Time 0.003 (0.047)	Loss 2.4297 (2.5703)	Entropy 1.10401 (1.10496)	Top-1 acc 64.844 (62.218)	Top-5 acc 86.719 (82.605)	lr 0.01307
Train [59][810/3239]	Time 0.222 (0.622)	Data Time 0.002 (0.047)	Loss 2.3603 (2.5703)	Entropy 1.10403 (1.10495)	Top-1 acc 67.969 (62.208)	Top-5 acc 85.938 (82.603)	lr 0.01307
Train [59][820/3239]	Time 0.226 (0.620)	Data Time 0.001 (0.046)	Loss 2.4407 (2.5700)	Entropy 1.10405 (1.10494)	Top-1 acc 67.188 (62.215)	Top-5 acc 85.547 (82.600)	lr 0.01307
Train [59][830/3239]	Time 0.232 (0.618)	Data Time 0.001 (0.046)	Loss 2.4718 (2.5704)	Entropy 1.10405 (1.10492)	Top-1 acc 64.453 (62.208)	Top-5 acc 82.422 (82.587)	lr 0.01307
Train [59][840/3239]	Time 0.225 (0.616)	Data Time 0.001 (0.045)	Loss 2.3634 (2.5700)	Entropy 1.10405 (1.10491)	Top-1 acc 69.141 (62.227)	Top-5 acc 87.109 (82.594)	lr 0.01307
Train [59][850/3239]	Time 0.300 (0.614)	Data Time 0.002 (0.044)	Loss 2.5637 (2.5708)	Entropy 1.10404 (1.10490)	Top-1 acc 61.328 (62.205)	Top-5 acc 83.594 (82.574)	lr 0.01307
Train [59][860/3239]	Time 0.241 (0.613)	Data Time 0.001 (0.044)	Loss 2.7314 (2.5709)	Entropy 1.10401 (1.10489)	Top-1 acc 58.984 (62.190)	Top-5 acc 81.250 (82.572)	lr 0.01307
Train [59][870/3239]	Time 0.136 (0.611)	Data Time 0.002 (0.043)	Loss 2.6919 (2.5715)	Entropy 1.10402 (1.10488)	Top-1 acc 57.031 (62.170)	Top-5 acc 82.422 (82.565)	lr 0.01307
Train [59][880/3239]	Time 0.232 (0.609)	Data Time 0.001 (0.043)	Loss 2.5900 (2.5717)	Entropy 1.10400 (1.10487)	Top-1 acc 62.500 (62.175)	Top-5 acc 82.422 (82.561)	lr 0.01307
Train [59][890/3239]	Time 2.518 (0.608)	Data Time 0.001 (0.043)	Loss 2.4903 (2.5710)	Entropy 1.10400 (1.10486)	Top-1 acc 64.844 (62.178)	Top-5 acc 83.594 (82.575)	lr 0.01306
Train [59][900/3239]	Time 0.256 (0.604)	Data Time 0.001 (0.042)	Loss 2.5245 (2.5708)	Entropy 1.10395 (1.10485)	Top-1 acc 66.016 (62.179)	Top-5 acc 83.594 (82.579)	lr 0.01306
Train [59][910/3239]	Time 0.233 (0.602)	Data Time 0.001 (0.042)	Loss 2.7384 (2.5712)	Entropy 1.10392 (1.10484)	Top-1 acc 58.594 (62.170)	Top-5 acc 79.688 (82.569)	lr 0.01306
Train [59][920/3239]	Time 0.228 (0.601)	Data Time 0.001 (0.041)	Loss 2.4346 (2.5718)	Entropy 1.10384 (1.10483)	Top-1 acc 63.672 (62.156)	Top-5 acc 83.203 (82.564)	lr 0.01306
Train [59][930/3239]	Time 0.231 (0.600)	Data Time 0.001 (0.041)	Loss 2.5817 (2.5721)	Entropy 1.10382 (1.10482)	Top-1 acc 59.375 (62.147)	Top-5 acc 83.594 (82.561)	lr 0.01306
Train [59][940/3239]	Time 0.312 (0.598)	Data Time 0.001 (0.040)	Loss 2.6430 (2.5719)	Entropy 1.10381 (1.10481)	Top-1 acc 58.203 (62.151)	Top-5 acc 77.734 (82.558)	lr 0.01306
Train [59][950/3239]	Time 0.237 (0.597)	Data Time 0.001 (0.040)	Loss 2.5540 (2.5725)	Entropy 1.10381 (1.10480)	Top-1 acc 62.891 (62.132)	Top-5 acc 82.422 (82.547)	lr 0.01306
Train [59][960/3239]	Time 0.222 (0.596)	Data Time 0.001 (0.040)	Loss 2.6089 (2.5724)	Entropy 1.10383 (1.10479)	Top-1 acc 63.672 (62.129)	Top-5 acc 82.812 (82.552)	lr 0.01306
Train [59][970/3239]	Time 0.231 (0.595)	Data Time 0.002 (0.039)	Loss 2.6938 (2.5725)	Entropy 1.10379 (1.10478)	Top-1 acc 60.547 (62.134)	Top-5 acc 80.078 (82.543)	lr 0.01306
Train [59][980/3239]	Time 0.310 (0.593)	Data Time 0.002 (0.039)	Loss 2.7054 (2.5735)	Entropy 1.10379 (1.10477)	Top-1 acc 58.594 (62.109)	Top-5 acc 80.859 (82.523)	lr 0.01306
Train [59][990/3239]	Time 0.254 (0.592)	Data Time 0.001 (0.039)	Loss 2.5047 (2.5742)	Entropy 1.10380 (1.10476)	Top-1 acc 61.328 (62.087)	Top-5 acc 83.594 (82.508)	lr 0.01305
Train [59][1000/3239]	Time 2.417 (0.591)	Data Time 0.002 (0.038)	Loss 2.4815 (2.5745)	Entropy 1.10380 (1.10475)	Top-1 acc 62.891 (62.081)	Top-5 acc 82.812 (82.510)	lr 0.01305
Train [59][1010/3239]	Time 0.248 (0.587)	Data Time 0.001 (0.038)	Loss 2.3698 (2.5740)	Entropy 1.10379 (1.10474)	Top-1 acc 65.625 (62.088)	Top-5 acc 87.500 (82.515)	lr 0.01305
Train [59][1020/3239]	Time 0.231 (0.586)	Data Time 0.001 (0.037)	Loss 2.3977 (2.5736)	Entropy 1.10379 (1.10473)	Top-1 acc 65.234 (62.089)	Top-5 acc 84.766 (82.525)	lr 0.01305
Train [59][1030/3239]	Time 0.304 (0.585)	Data Time 0.001 (0.037)	Loss 2.7013 (2.5741)	Entropy 1.10372 (1.10472)	Top-1 acc 61.328 (62.072)	Top-5 acc 80.078 (82.516)	lr 0.01305
Train [59][1040/3239]	Time 0.228 (0.634)	Data Time 0.002 (0.037)	Loss 2.5757 (2.5740)	Entropy 1.10371 (1.10471)	Top-1 acc 60.938 (62.078)	Top-5 acc 80.859 (82.521)	lr 0.01305
Train [59][1050/3239]	Time 0.221 (0.632)	Data Time 0.002 (0.036)	Loss 2.4895 (2.5738)	Entropy 1.10367 (1.10470)	Top-1 acc 64.062 (62.084)	Top-5 acc 82.812 (82.520)	lr 0.01305
Train [59][1060/3239]	Time 0.248 (0.631)	Data Time 0.001 (0.036)	Loss 2.5744 (2.5738)	Entropy 1.10366 (1.10469)	Top-1 acc 61.328 (62.090)	Top-5 acc 82.031 (82.517)	lr 0.01305
Train [59][1070/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.036)	Loss 2.6400 (2.5731)	Entropy 1.10365 (1.10468)	Top-1 acc 60.938 (62.113)	Top-5 acc 80.859 (82.532)	lr 0.01305
Train [59][1080/3239]	Time 0.226 (0.628)	Data Time 0.001 (0.035)	Loss 2.5413 (2.5733)	Entropy 1.10367 (1.10467)	Top-1 acc 63.281 (62.112)	Top-5 acc 83.984 (82.529)	lr 0.01305
Train [59][1090/3239]	Time 0.201 (0.626)	Data Time 0.001 (0.035)	Loss 2.6611 (2.5736)	Entropy 1.10362 (1.10466)	Top-1 acc 56.250 (62.106)	Top-5 acc 82.031 (82.531)	lr 0.01304
Train [59][1100/3239]	Time 0.216 (0.625)	Data Time 0.001 (0.035)	Loss 2.4580 (2.5736)	Entropy 1.10358 (1.10466)	Top-1 acc 62.891 (62.111)	Top-5 acc 85.156 (82.525)	lr 0.01304
Train [59][1110/3239]	Time 2.425 (0.623)	Data Time 0.001 (0.035)	Loss 2.6454 (2.5737)	Entropy 1.10358 (1.10465)	Top-1 acc 59.375 (62.095)	Top-5 acc 83.594 (82.531)	lr 0.01304
Train [59][1120/3239]	Time 0.311 (0.620)	Data Time 0.001 (0.034)	Loss 2.4000 (2.5731)	Entropy 1.10360 (1.10464)	Top-1 acc 65.234 (62.113)	Top-5 acc 87.109 (82.543)	lr 0.01304
Train [59][1130/3239]	Time 0.258 (0.618)	Data Time 0.001 (0.034)	Loss 2.5569 (2.5726)	Entropy 1.10347 (1.10463)	Top-1 acc 66.406 (62.122)	Top-5 acc 83.203 (82.553)	lr 0.01304
Train [59][1140/3239]	Time 0.219 (0.617)	Data Time 0.001 (0.034)	Loss 2.5488 (2.5727)	Entropy 1.10347 (1.10462)	Top-1 acc 64.062 (62.123)	Top-5 acc 83.984 (82.550)	lr 0.01304
Train [59][1150/3239]	Time 0.193 (0.615)	Data Time 0.001 (0.033)	Loss 2.6559 (2.5727)	Entropy 1.10344 (1.10461)	Top-1 acc 62.891 (62.130)	Top-5 acc 80.859 (82.548)	lr 0.01304
Train [59][1160/3239]	Time 0.220 (0.614)	Data Time 0.001 (0.033)	Loss 2.7125 (2.5727)	Entropy 1.10345 (1.10460)	Top-1 acc 60.938 (62.138)	Top-5 acc 81.641 (82.550)	lr 0.01304
Train [59][1170/3239]	Time 0.209 (0.613)	Data Time 0.001 (0.033)	Loss 2.4955 (2.5724)	Entropy 1.10346 (1.10459)	Top-1 acc 64.453 (62.133)	Top-5 acc 83.594 (82.563)	lr 0.01304
Train [59][1180/3239]	Time 0.221 (0.611)	Data Time 0.001 (0.033)	Loss 2.5609 (2.5721)	Entropy 1.10342 (1.10458)	Top-1 acc 62.500 (62.146)	Top-5 acc 83.984 (82.566)	lr 0.01304
Train [59][1190/3239]	Time 0.211 (0.610)	Data Time 0.001 (0.032)	Loss 2.6037 (2.5723)	Entropy 1.10340 (1.10457)	Top-1 acc 64.844 (62.141)	Top-5 acc 80.469 (82.564)	lr 0.01303
Train [59][1200/3239]	Time 0.247 (0.609)	Data Time 0.001 (0.032)	Loss 2.6009 (2.5730)	Entropy 1.10332 (1.10456)	Top-1 acc 64.453 (62.129)	Top-5 acc 81.250 (82.550)	lr 0.01303
Train [59][1210/3239]	Time 0.251 (0.607)	Data Time 0.001 (0.032)	Loss 2.5096 (2.5727)	Entropy 1.10335 (1.10455)	Top-1 acc 63.672 (62.127)	Top-5 acc 82.812 (82.554)	lr 0.01303
Train [59][1220/3239]	Time 2.608 (0.606)	Data Time 0.001 (0.032)	Loss 2.4107 (2.5724)	Entropy 1.10335 (1.10454)	Top-1 acc 65.625 (62.136)	Top-5 acc 85.547 (82.566)	lr 0.01303
Train [59][1230/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.031)	Loss 2.6727 (2.5721)	Entropy 1.10333 (1.10453)	Top-1 acc 62.500 (62.138)	Top-5 acc 82.422 (82.574)	lr 0.01303
Train [59][1240/3239]	Time 0.216 (0.602)	Data Time 0.001 (0.031)	Loss 2.5621 (2.5726)	Entropy 1.10332 (1.10452)	Top-1 acc 64.062 (62.122)	Top-5 acc 83.984 (82.566)	lr 0.01303
Train [59][1250/3239]	Time 0.225 (0.601)	Data Time 0.001 (0.031)	Loss 2.6482 (2.5727)	Entropy 1.10332 (1.10451)	Top-1 acc 57.422 (62.117)	Top-5 acc 79.688 (82.563)	lr 0.01303
Train [59][1260/3239]	Time 0.266 (0.600)	Data Time 0.001 (0.031)	Loss 2.6464 (2.5731)	Entropy 1.10335 (1.10450)	Top-1 acc 61.328 (62.106)	Top-5 acc 82.812 (82.560)	lr 0.01303
Train [59][1270/3239]	Time 0.226 (0.599)	Data Time 0.001 (0.030)	Loss 2.6149 (2.5735)	Entropy 1.10334 (1.10449)	Top-1 acc 63.281 (62.106)	Top-5 acc 81.641 (82.550)	lr 0.01303
Train [59][1280/3239]	Time 0.239 (0.597)	Data Time 0.001 (0.030)	Loss 2.4345 (2.5731)	Entropy 1.10334 (1.10448)	Top-1 acc 63.672 (62.116)	Top-5 acc 86.719 (82.557)	lr 0.01303
Train [59][1290/3239]	Time 0.218 (0.596)	Data Time 0.001 (0.030)	Loss 2.5382 (2.5728)	Entropy 1.10331 (1.10447)	Top-1 acc 63.281 (62.127)	Top-5 acc 84.375 (82.562)	lr 0.01302
Train [59][1300/3239]	Time 0.230 (0.595)	Data Time 0.001 (0.030)	Loss 2.5186 (2.5731)	Entropy 1.10332 (1.10446)	Top-1 acc 61.719 (62.119)	Top-5 acc 84.375 (82.561)	lr 0.01302
Train [59][1310/3239]	Time 0.324 (0.595)	Data Time 0.001 (0.029)	Loss 2.8419 (2.5734)	Entropy 1.10329 (1.10445)	Top-1 acc 54.297 (62.115)	Top-5 acc 77.344 (82.556)	lr 0.01302
Train [59][1320/3239]	Time 0.230 (0.594)	Data Time 0.001 (0.029)	Loss 2.5529 (2.5734)	Entropy 1.10325 (1.10445)	Top-1 acc 63.672 (62.121)	Top-5 acc 81.250 (82.558)	lr 0.01302
Train [59][1330/3239]	Time 2.527 (0.593)	Data Time 0.001 (0.029)	Loss 2.6160 (2.5737)	Entropy 1.10325 (1.10444)	Top-1 acc 60.547 (62.109)	Top-5 acc 83.594 (82.552)	lr 0.01302
Train [59][1340/3239]	Time 0.245 (0.590)	Data Time 0.001 (0.029)	Loss 2.6979 (2.5734)	Entropy 1.10321 (1.10443)	Top-1 acc 57.031 (62.111)	Top-5 acc 82.422 (82.562)	lr 0.01302
Train [59][1350/3239]	Time 0.219 (0.589)	Data Time 0.001 (0.029)	Loss 2.5589 (2.5735)	Entropy 1.10316 (1.10442)	Top-1 acc 58.984 (62.105)	Top-5 acc 83.594 (82.562)	lr 0.01302
Train [59][1360/3239]	Time 0.327 (0.588)	Data Time 0.001 (0.028)	Loss 2.5236 (2.5739)	Entropy 1.10311 (1.10441)	Top-1 acc 60.547 (62.092)	Top-5 acc 82.812 (82.552)	lr 0.01302
Train [59][1370/3239]	Time 0.215 (0.587)	Data Time 0.001 (0.028)	Loss 2.4645 (2.5744)	Entropy 1.10308 (1.10440)	Top-1 acc 64.844 (62.081)	Top-5 acc 86.328 (82.545)	lr 0.01302
Train [59][1380/3239]	Time 0.229 (0.586)	Data Time 0.001 (0.028)	Loss 2.6349 (2.5746)	Entropy 1.10305 (1.10439)	Top-1 acc 63.281 (62.079)	Top-5 acc 82.812 (82.541)	lr 0.01301
Train [59][1390/3239]	Time 0.213 (0.585)	Data Time 0.001 (0.028)	Loss 2.6216 (2.5745)	Entropy 1.10303 (1.10438)	Top-1 acc 62.500 (62.083)	Top-5 acc 80.469 (82.540)	lr 0.01301
Train [59][1400/3239]	Time 0.251 (0.622)	Data Time 0.002 (0.028)	Loss 2.6529 (2.5745)	Entropy 1.10304 (1.10437)	Top-1 acc 58.984 (62.078)	Top-5 acc 80.859 (82.538)	lr 0.01301
Train [59][1410/3239]	Time 0.226 (0.621)	Data Time 0.002 (0.028)	Loss 2.6170 (2.5744)	Entropy 1.10299 (1.10436)	Top-1 acc 61.328 (62.086)	Top-5 acc 80.859 (82.540)	lr 0.01301
Train [59][1420/3239]	Time 0.217 (0.620)	Data Time 0.002 (0.027)	Loss 2.4536 (2.5741)	Entropy 1.10291 (1.10435)	Top-1 acc 65.625 (62.090)	Top-5 acc 85.156 (82.548)	lr 0.01301
Train [59][1430/3239]	Time 0.202 (0.619)	Data Time 0.001 (0.027)	Loss 2.5674 (2.5743)	Entropy 1.10287 (1.10434)	Top-1 acc 62.109 (62.088)	Top-5 acc 83.203 (82.537)	lr 0.01301
Train [59][1440/3239]	Time 2.575 (0.617)	Data Time 0.001 (0.027)	Loss 2.5396 (2.5744)	Entropy 1.10287 (1.10433)	Top-1 acc 64.844 (62.083)	Top-5 acc 82.812 (82.537)	lr 0.01301
Train [59][1450/3239]	Time 0.380 (0.615)	Data Time 0.002 (0.027)	Loss 2.5884 (2.5744)	Entropy 1.10281 (1.10432)	Top-1 acc 64.062 (62.079)	Top-5 acc 81.250 (82.539)	lr 0.01301
Train [59][1460/3239]	Time 0.232 (0.614)	Data Time 0.001 (0.027)	Loss 3.1073 (2.5753)	Entropy 1.10284 (1.10431)	Top-1 acc 47.656 (62.064)	Top-5 acc 71.875 (82.523)	lr 0.01301
Train [59][1470/3239]	Time 0.234 (0.613)	Data Time 0.002 (0.026)	Loss 2.5624 (2.5754)	Entropy 1.10284 (1.10430)	Top-1 acc 62.500 (62.062)	Top-5 acc 83.594 (82.518)	lr 0.01301
Train [59][1480/3239]	Time 0.231 (0.612)	Data Time 0.001 (0.026)	Loss 2.3078 (2.5748)	Entropy 1.10280 (1.10429)	Top-1 acc 70.312 (62.078)	Top-5 acc 87.109 (82.528)	lr 0.01300
Train [59][1490/3239]	Time 0.231 (0.611)	Data Time 0.001 (0.026)	Loss 2.6776 (2.5746)	Entropy 1.10277 (1.10428)	Top-1 acc 58.594 (62.084)	Top-5 acc 80.859 (82.531)	lr 0.01300
Train [59][1500/3239]	Time 0.346 (0.610)	Data Time 0.001 (0.026)	Loss 2.6452 (2.5746)	Entropy 1.10275 (1.10427)	Top-1 acc 58.594 (62.089)	Top-5 acc 79.688 (82.529)	lr 0.01300
Train [59][1510/3239]	Time 0.224 (0.609)	Data Time 0.001 (0.026)	Loss 2.6046 (2.5743)	Entropy 1.10275 (1.10426)	Top-1 acc 58.594 (62.094)	Top-5 acc 82.422 (82.531)	lr 0.01300
Train [59][1520/3239]	Time 0.210 (0.608)	Data Time 0.001 (0.026)	Loss 2.4779 (2.5740)	Entropy 1.10273 (1.10425)	Top-1 acc 65.625 (62.102)	Top-5 acc 84.766 (82.540)	lr 0.01300
Train [59][1530/3239]	Time 0.223 (0.607)	Data Time 0.001 (0.025)	Loss 2.5290 (2.5741)	Entropy 1.10272 (1.10424)	Top-1 acc 58.203 (62.094)	Top-5 acc 85.547 (82.539)	lr 0.01300
Train [59][1540/3239]	Time 0.225 (0.606)	Data Time 0.002 (0.025)	Loss 2.6865 (2.5740)	Entropy 1.10270 (1.10423)	Top-1 acc 60.547 (62.101)	Top-5 acc 80.078 (82.543)	lr 0.01300
Train [59][1550/3239]	Time 2.433 (0.605)	Data Time 0.002 (0.025)	Loss 2.5984 (2.5740)	Entropy 1.10270 (1.10422)	Top-1 acc 61.719 (62.099)	Top-5 acc 82.031 (82.540)	lr 0.01300
Train [59][1560/3239]	Time 0.229 (0.603)	Data Time 0.001 (0.025)	Loss 2.4376 (2.5745)	Entropy 1.10267 (1.10421)	Top-1 acc 63.672 (62.080)	Top-5 acc 83.984 (82.530)	lr 0.01300
Train [59][1570/3239]	Time 0.221 (0.602)	Data Time 0.001 (0.025)	Loss 2.6139 (2.5745)	Entropy 1.10267 (1.10420)	Top-1 acc 58.594 (62.080)	Top-5 acc 80.078 (82.527)	lr 0.01300
Train [59][1580/3239]	Time 0.235 (0.601)	Data Time 0.001 (0.025)	Loss 2.5789 (2.5745)	Entropy 1.10262 (1.10419)	Top-1 acc 62.891 (62.077)	Top-5 acc 82.031 (82.524)	lr 0.01299
Train [59][1590/3239]	Time 0.303 (0.600)	Data Time 0.001 (0.025)	Loss 2.5669 (2.5745)	Entropy 1.10261 (1.10418)	Top-1 acc 64.062 (62.081)	Top-5 acc 83.984 (82.524)	lr 0.01299
Train [59][1600/3239]	Time 0.220 (0.599)	Data Time 0.001 (0.024)	Loss 2.4141 (2.5743)	Entropy 1.10259 (1.10417)	Top-1 acc 66.797 (62.081)	Top-5 acc 84.375 (82.529)	lr 0.01299
Train [59][1610/3239]	Time 0.204 (0.598)	Data Time 0.001 (0.024)	Loss 2.6565 (2.5742)	Entropy 1.10256 (1.10416)	Top-1 acc 61.328 (62.081)	Top-5 acc 82.031 (82.533)	lr 0.01299
Train [59][1620/3239]	Time 0.182 (0.597)	Data Time 0.001 (0.024)	Loss 2.4644 (2.5742)	Entropy 1.10256 (1.10415)	Top-1 acc 61.719 (62.083)	Top-5 acc 85.547 (82.533)	lr 0.01299
Train [59][1630/3239]	Time 0.222 (0.596)	Data Time 0.001 (0.024)	Loss 2.7545 (2.5739)	Entropy 1.10254 (1.10414)	Top-1 acc 56.641 (62.085)	Top-5 acc 78.516 (82.537)	lr 0.01299
Train [59][1640/3239]	Time 0.292 (0.596)	Data Time 0.001 (0.024)	Loss 2.6248 (2.5735)	Entropy 1.10250 (1.10413)	Top-1 acc 60.938 (62.094)	Top-5 acc 79.688 (82.539)	lr 0.01299
Train [59][1650/3239]	Time 0.274 (0.595)	Data Time 0.001 (0.024)	Loss 2.5133 (2.5733)	Entropy 1.10251 (1.10412)	Top-1 acc 62.500 (62.100)	Top-5 acc 81.250 (82.544)	lr 0.01299
Train [59][1660/3239]	Time 2.321 (0.594)	Data Time 0.001 (0.024)	Loss 2.5233 (2.5732)	Entropy 1.10251 (1.10411)	Top-1 acc 60.547 (62.100)	Top-5 acc 83.984 (82.547)	lr 0.01299
Train [59][1670/3239]	Time 0.216 (0.592)	Data Time 0.001 (0.023)	Loss 2.5266 (2.5733)	Entropy 1.10245 (1.10410)	Top-1 acc 61.328 (62.093)	Top-5 acc 83.594 (82.546)	lr 0.01299
Train [59][1680/3239]	Time 0.212 (0.591)	Data Time 0.002 (0.023)	Loss 2.7294 (2.5737)	Entropy 1.10239 (1.10409)	Top-1 acc 60.938 (62.088)	Top-5 acc 76.953 (82.537)	lr 0.01298
Train [59][1690/3239]	Time 0.346 (0.590)	Data Time 0.001 (0.023)	Loss 2.6509 (2.5738)	Entropy 1.10237 (1.10408)	Top-1 acc 60.547 (62.089)	Top-5 acc 78.125 (82.530)	lr 0.01298
Train [59][1700/3239]	Time 0.221 (0.590)	Data Time 0.001 (0.023)	Loss 2.6637 (2.5738)	Entropy 1.10243 (1.10407)	Top-1 acc 59.375 (62.086)	Top-5 acc 82.031 (82.528)	lr 0.01298
Train [59][1710/3239]	Time 0.227 (0.589)	Data Time 0.001 (0.023)	Loss 2.8424 (2.5742)	Entropy 1.10233 (1.10406)	Top-1 acc 55.469 (62.075)	Top-5 acc 78.125 (82.521)	lr 0.01298
Train [59][1720/3239]	Time 0.218 (0.588)	Data Time 0.001 (0.023)	Loss 2.5443 (2.5742)	Entropy 1.10231 (1.10405)	Top-1 acc 63.281 (62.073)	Top-5 acc 83.984 (82.518)	lr 0.01298
Train [59][1730/3239]	Time 0.253 (0.587)	Data Time 0.001 (0.023)	Loss 2.6999 (2.5744)	Entropy 1.10225 (1.10404)	Top-1 acc 60.547 (62.069)	Top-5 acc 82.031 (82.511)	lr 0.01298
Train [59][1740/3239]	Time 0.218 (0.587)	Data Time 0.001 (0.023)	Loss 2.6221 (2.5747)	Entropy 1.10219 (1.10403)	Top-1 acc 62.500 (62.061)	Top-5 acc 80.078 (82.503)	lr 0.01298
Train [59][1750/3239]	Time 0.219 (0.586)	Data Time 0.001 (0.022)	Loss 2.6909 (2.5748)	Entropy 1.10221 (1.10402)	Top-1 acc 60.547 (62.063)	Top-5 acc 80.078 (82.501)	lr 0.01298
Train [59][1760/3239]	Time 0.304 (0.612)	Data Time 0.004 (0.022)	Loss 2.4749 (2.5748)	Entropy 1.10217 (1.10401)	Top-1 acc 64.062 (62.064)	Top-5 acc 82.812 (82.497)	lr 0.01298
Train [59][1770/3239]	Time 2.475 (0.612)	Data Time 0.002 (0.022)	Loss 2.4633 (2.5745)	Entropy 1.10217 (1.10400)	Top-1 acc 62.500 (62.071)	Top-5 acc 85.547 (82.507)	lr 0.01298
Train [59][1780/3239]	Time 0.379 (0.610)	Data Time 0.002 (0.022)	Loss 2.7646 (2.5744)	Entropy 1.10220 (1.10399)	Top-1 acc 59.375 (62.070)	Top-5 acc 78.516 (82.508)	lr 0.01297
Train [59][1790/3239]	Time 0.219 (0.609)	Data Time 0.001 (0.022)	Loss 2.6462 (2.5746)	Entropy 1.10209 (1.10398)	Top-1 acc 58.594 (62.064)	Top-5 acc 82.812 (82.508)	lr 0.01297
Train [59][1800/3239]	Time 0.232 (0.608)	Data Time 0.001 (0.022)	Loss 2.5910 (2.5746)	Entropy 1.10207 (1.10397)	Top-1 acc 61.719 (62.062)	Top-5 acc 81.641 (82.509)	lr 0.01297
Train [59][1810/3239]	Time 0.223 (0.608)	Data Time 0.001 (0.022)	Loss 2.6653 (2.5745)	Entropy 1.10206 (1.10396)	Top-1 acc 60.938 (62.066)	Top-5 acc 81.250 (82.510)	lr 0.01297
Train [59][1820/3239]	Time 0.228 (0.607)	Data Time 0.001 (0.022)	Loss 2.7052 (2.5746)	Entropy 1.10203 (1.10395)	Top-1 acc 60.156 (62.067)	Top-5 acc 80.078 (82.509)	lr 0.01297
Train [59][1830/3239]	Time 0.340 (0.606)	Data Time 0.001 (0.022)	Loss 2.6582 (2.5746)	Entropy 1.10203 (1.10394)	Top-1 acc 58.984 (62.066)	Top-5 acc 78.516 (82.513)	lr 0.01297
Train [59][1840/3239]	Time 0.225 (0.605)	Data Time 0.001 (0.021)	Loss 2.6922 (2.5748)	Entropy 1.10201 (1.10393)	Top-1 acc 58.984 (62.062)	Top-5 acc 80.469 (82.505)	lr 0.01297
Train [59][1850/3239]	Time 0.239 (0.605)	Data Time 0.002 (0.021)	Loss 2.5575 (2.5750)	Entropy 1.10203 (1.10392)	Top-1 acc 63.281 (62.062)	Top-5 acc 83.203 (82.499)	lr 0.01297
Train [59][1860/3239]	Time 0.232 (0.604)	Data Time 0.001 (0.021)	Loss 2.4409 (2.5749)	Entropy 1.10202 (1.10391)	Top-1 acc 65.234 (62.063)	Top-5 acc 83.203 (82.502)	lr 0.01297
Train [59][1870/3239]	Time 0.243 (0.603)	Data Time 0.003 (0.021)	Loss 2.6859 (2.5748)	Entropy 1.10185 (1.10390)	Top-1 acc 58.594 (62.062)	Top-5 acc 80.078 (82.502)	lr 0.01297
Train [59][1880/3239]	Time 2.488 (0.602)	Data Time 0.001 (0.021)	Loss 2.5215 (2.5748)	Entropy 1.10185 (1.10388)	Top-1 acc 66.797 (62.064)	Top-5 acc 81.250 (82.503)	lr 0.01296
Train [59][1890/3239]	Time 0.230 (0.601)	Data Time 0.001 (0.021)	Loss 2.7232 (2.5748)	Entropy 1.10184 (1.10387)	Top-1 acc 58.984 (62.064)	Top-5 acc 79.297 (82.505)	lr 0.01296
Train [59][1900/3239]	Time 0.276 (0.600)	Data Time 0.001 (0.021)	Loss 2.4327 (2.5749)	Entropy 1.10184 (1.10386)	Top-1 acc 64.062 (62.061)	Top-5 acc 86.328 (82.507)	lr 0.01296
Train [59][1910/3239]	Time 0.228 (0.599)	Data Time 0.001 (0.021)	Loss 2.6511 (2.5749)	Entropy 1.10162 (1.10385)	Top-1 acc 62.500 (62.057)	Top-5 acc 80.859 (82.507)	lr 0.01296
Train [59][1920/3239]	Time 0.250 (0.598)	Data Time 0.001 (0.021)	Loss 2.5894 (2.5751)	Entropy 1.10161 (1.10384)	Top-1 acc 60.156 (62.051)	Top-5 acc 78.516 (82.505)	lr 0.01296
Train [59][1930/3239]	Time 0.222 (0.598)	Data Time 0.001 (0.021)	Loss 2.6206 (2.5753)	Entropy 1.10162 (1.10383)	Top-1 acc 56.250 (62.040)	Top-5 acc 82.031 (82.504)	lr 0.01296
Train [59][1940/3239]	Time 0.225 (0.597)	Data Time 0.001 (0.020)	Loss 2.6286 (2.5753)	Entropy 1.10161 (1.10382)	Top-1 acc 60.938 (62.042)	Top-5 acc 80.469 (82.503)	lr 0.01296
Train [59][1950/3239]	Time 0.227 (0.596)	Data Time 0.001 (0.020)	Loss 2.7346 (2.5756)	Entropy 1.10159 (1.10381)	Top-1 acc 57.031 (62.035)	Top-5 acc 79.688 (82.503)	lr 0.01296
Train [59][1960/3239]	Time 0.229 (0.596)	Data Time 0.001 (0.020)	Loss 2.7290 (2.5758)	Entropy 1.10153 (1.10379)	Top-1 acc 57.812 (62.027)	Top-5 acc 77.344 (82.499)	lr 0.01296
Train [59][1970/3239]	Time 0.305 (0.595)	Data Time 0.001 (0.020)	Loss 2.6463 (2.5760)	Entropy 1.10144 (1.10378)	Top-1 acc 62.500 (62.025)	Top-5 acc 82.031 (82.498)	lr 0.01296
Train [59][1980/3239]	Time 0.252 (0.594)	Data Time 0.001 (0.020)	Loss 2.4747 (2.5762)	Entropy 1.10143 (1.10377)	Top-1 acc 61.719 (62.011)	Top-5 acc 84.766 (82.493)	lr 0.01295
Train [59][1990/3239]	Time 2.526 (0.594)	Data Time 0.001 (0.020)	Loss 2.6625 (2.5764)	Entropy 1.10143 (1.10376)	Top-1 acc 58.594 (62.005)	Top-5 acc 80.078 (82.489)	lr 0.01295
Train [59][2000/3239]	Time 0.222 (0.592)	Data Time 0.001 (0.020)	Loss 2.6694 (2.5765)	Entropy 1.10143 (1.10375)	Top-1 acc 60.156 (62.004)	Top-5 acc 78.906 (82.491)	lr 0.01295
Train [59][2010/3239]	Time 0.224 (0.591)	Data Time 0.002 (0.020)	Loss 2.5117 (2.5766)	Entropy 1.10142 (1.10374)	Top-1 acc 64.453 (62.002)	Top-5 acc 85.156 (82.493)	lr 0.01295
Train [59][2020/3239]	Time 0.224 (0.591)	Data Time 0.001 (0.020)	Loss 2.7738 (2.5765)	Entropy 1.10137 (1.10372)	Top-1 acc 54.297 (62.001)	Top-5 acc 77.734 (82.490)	lr 0.01295
Train [59][2030/3239]	Time 0.222 (0.590)	Data Time 0.001 (0.020)	Loss 2.4798 (2.5765)	Entropy 1.10139 (1.10371)	Top-1 acc 64.062 (62.005)	Top-5 acc 85.938 (82.488)	lr 0.01295
Train [59][2040/3239]	Time 0.209 (0.589)	Data Time 0.001 (0.020)	Loss 3.2154 (2.5768)	Entropy 1.10132 (1.10370)	Top-1 acc 47.266 (61.996)	Top-5 acc 70.312 (82.483)	lr 0.01295
Train [59][2050/3239]	Time 0.256 (0.589)	Data Time 0.001 (0.019)	Loss 2.5297 (2.5767)	Entropy 1.10134 (1.10369)	Top-1 acc 64.062 (62.000)	Top-5 acc 82.422 (82.482)	lr 0.01295
Train [59][2060/3239]	Time 0.315 (0.588)	Data Time 0.001 (0.019)	Loss 2.6113 (2.5767)	Entropy 1.10127 (1.10368)	Top-1 acc 61.719 (61.996)	Top-5 acc 80.859 (82.485)	lr 0.01295
Train [59][2070/3239]	Time 0.208 (0.588)	Data Time 0.001 (0.019)	Loss 2.6881 (2.5768)	Entropy 1.10123 (1.10367)	Top-1 acc 60.938 (61.990)	Top-5 acc 80.859 (82.483)	lr 0.01295
Train [59][2080/3239]	Time 0.238 (0.587)	Data Time 0.001 (0.019)	Loss 2.6119 (2.5769)	Entropy 1.10121 (1.10365)	Top-1 acc 62.500 (61.987)	Top-5 acc 79.297 (82.481)	lr 0.01294
Train [59][2090/3239]	Time 0.280 (0.586)	Data Time 0.001 (0.019)	Loss 2.6450 (2.5770)	Entropy 1.10119 (1.10364)	Top-1 acc 57.812 (61.982)	Top-5 acc 81.250 (82.481)	lr 0.01294
Train [59][2100/3239]	Time 2.589 (0.586)	Data Time 0.001 (0.019)	Loss 2.6430 (2.5770)	Entropy 1.10119 (1.10363)	Top-1 acc 61.719 (61.984)	Top-5 acc 81.641 (82.474)	lr 0.01294
Train [59][2110/3239]	Time 0.343 (0.584)	Data Time 0.001 (0.019)	Loss 2.3975 (2.5769)	Entropy 1.10120 (1.10362)	Top-1 acc 69.141 (61.984)	Top-5 acc 83.984 (82.473)	lr 0.01294
Train [59][2120/3239]	Time 0.212 (0.584)	Data Time 0.001 (0.019)	Loss 2.6959 (2.5770)	Entropy 1.10118 (1.10361)	Top-1 acc 58.203 (61.982)	Top-5 acc 78.906 (82.473)	lr 0.01294
Train [59][2130/3239]	Time 0.232 (0.607)	Data Time 0.002 (0.019)	Loss 2.5976 (2.5769)	Entropy 1.10114 (1.10360)	Top-1 acc 59.766 (61.984)	Top-5 acc 82.031 (82.478)	lr 0.01294
Train [59][2140/3239]	Time 0.236 (0.606)	Data Time 0.003 (0.019)	Loss 2.5470 (2.5770)	Entropy 1.10114 (1.10359)	Top-1 acc 60.547 (61.979)	Top-5 acc 83.203 (82.474)	lr 0.01294
Train [59][2150/3239]	Time 0.211 (0.606)	Data Time 0.001 (0.019)	Loss 2.6533 (2.5773)	Entropy 1.10111 (1.10357)	Top-1 acc 58.203 (61.974)	Top-5 acc 80.859 (82.473)	lr 0.01294
Train [59][2160/3239]	Time 0.253 (0.605)	Data Time 0.001 (0.019)	Loss 2.5258 (2.5772)	Entropy 1.10110 (1.10356)	Top-1 acc 65.625 (61.977)	Top-5 acc 82.812 (82.476)	lr 0.01294
Train [59][2170/3239]	Time 0.245 (0.604)	Data Time 0.002 (0.018)	Loss 2.5009 (2.5773)	Entropy 1.10112 (1.10355)	Top-1 acc 66.797 (61.975)	Top-5 acc 83.203 (82.474)	lr 0.01294
Train [59][2180/3239]	Time 0.210 (0.604)	Data Time 0.001 (0.018)	Loss 2.5909 (2.5774)	Entropy 1.10116 (1.10354)	Top-1 acc 61.719 (61.969)	Top-5 acc 82.812 (82.471)	lr 0.01293
Train [59][2190/3239]	Time 0.210 (0.603)	Data Time 0.001 (0.018)	Loss 2.5901 (2.5775)	Entropy 1.10112 (1.10353)	Top-1 acc 59.766 (61.964)	Top-5 acc 83.984 (82.471)	lr 0.01293
Train [59][2200/3239]	Time 0.258 (0.603)	Data Time 0.001 (0.018)	Loss 2.6656 (2.5775)	Entropy 1.10113 (1.10352)	Top-1 acc 60.156 (61.963)	Top-5 acc 79.688 (82.472)	lr 0.01293
Train [59][2210/3239]	Time 2.457 (0.602)	Data Time 0.002 (0.018)	Loss 2.5652 (2.5774)	Entropy 1.10113 (1.10351)	Top-1 acc 66.016 (61.969)	Top-5 acc 82.031 (82.476)	lr 0.01293
Train [59][2220/3239]	Time 0.244 (0.600)	Data Time 0.001 (0.018)	Loss 2.6321 (2.5775)	Entropy 1.10110 (1.10350)	Top-1 acc 57.812 (61.964)	Top-5 acc 82.812 (82.473)	lr 0.01293
Train [59][2230/3239]	Time 0.223 (0.600)	Data Time 0.001 (0.018)	Loss 2.5504 (2.5776)	Entropy 1.10108 (1.10349)	Top-1 acc 63.672 (61.961)	Top-5 acc 82.812 (82.472)	lr 0.01293
Train [59][2240/3239]	Time 0.228 (0.599)	Data Time 0.001 (0.018)	Loss 2.6419 (2.5775)	Entropy 1.10115 (1.10348)	Top-1 acc 60.156 (61.964)	Top-5 acc 81.641 (82.473)	lr 0.01293
Train [59][2250/3239]	Time 0.332 (0.599)	Data Time 0.001 (0.018)	Loss 2.4633 (2.5774)	Entropy 1.10117 (1.10346)	Top-1 acc 66.406 (61.970)	Top-5 acc 84.375 (82.476)	lr 0.01293
Train [59][2260/3239]	Time 0.242 (0.598)	Data Time 0.002 (0.018)	Loss 2.5731 (2.5774)	Entropy 1.10112 (1.10345)	Top-1 acc 64.062 (61.971)	Top-5 acc 83.984 (82.479)	lr 0.01293
Train [59][2270/3239]	Time 0.235 (0.598)	Data Time 0.001 (0.018)	Loss 2.5317 (2.5772)	Entropy 1.10107 (1.10344)	Top-1 acc 66.016 (61.976)	Top-5 acc 85.156 (82.484)	lr 0.01293
Train [59][2280/3239]	Time 0.164 (0.597)	Data Time 0.001 (0.018)	Loss 2.6605 (2.5771)	Entropy 1.10107 (1.10343)	Top-1 acc 57.422 (61.980)	Top-5 acc 81.641 (82.483)	lr 0.01292
Train [59][2290/3239]	Time 0.237 (0.596)	Data Time 0.001 (0.018)	Loss 2.5237 (2.5770)	Entropy 1.10105 (1.10342)	Top-1 acc 60.547 (61.979)	Top-5 acc 83.594 (82.485)	lr 0.01292
Train [59][2300/3239]	Time 0.234 (0.596)	Data Time 0.001 (0.018)	Loss 2.6249 (2.5770)	Entropy 1.10100 (1.10341)	Top-1 acc 63.672 (61.979)	Top-5 acc 81.250 (82.486)	lr 0.01292
Train [59][2310/3239]	Time 0.242 (0.595)	Data Time 0.001 (0.017)	Loss 2.7616 (2.5773)	Entropy 1.10092 (1.10340)	Top-1 acc 55.078 (61.967)	Top-5 acc 78.906 (82.482)	lr 0.01292
Train [59][2320/3239]	Time 2.393 (0.595)	Data Time 0.001 (0.017)	Loss 2.4502 (2.5773)	Entropy 1.10092 (1.10339)	Top-1 acc 67.188 (61.970)	Top-5 acc 82.812 (82.479)	lr 0.01292
Train [59][2330/3239]	Time 0.219 (0.593)	Data Time 0.001 (0.017)	Loss 2.5802 (2.5774)	Entropy 1.10092 (1.10338)	Top-1 acc 60.938 (61.971)	Top-5 acc 82.422 (82.478)	lr 0.01292
Train [59][2340/3239]	Time 0.243 (0.593)	Data Time 0.001 (0.017)	Loss 2.7362 (2.5774)	Entropy 1.10090 (1.10337)	Top-1 acc 60.938 (61.974)	Top-5 acc 80.078 (82.477)	lr 0.01292
Train [59][2350/3239]	Time 0.242 (0.592)	Data Time 0.002 (0.017)	Loss 2.7345 (2.5774)	Entropy 1.10089 (1.10336)	Top-1 acc 58.203 (61.974)	Top-5 acc 76.562 (82.473)	lr 0.01292
Train [59][2360/3239]	Time 0.240 (0.592)	Data Time 0.001 (0.017)	Loss 2.6585 (2.5775)	Entropy 1.10086 (1.10335)	Top-1 acc 59.766 (61.973)	Top-5 acc 81.250 (82.475)	lr 0.01292
Train [59][2370/3239]	Time 0.247 (0.591)	Data Time 0.001 (0.017)	Loss 2.6695 (2.5776)	Entropy 1.10081 (1.10334)	Top-1 acc 59.766 (61.971)	Top-5 acc 81.641 (82.475)	lr 0.01292
Train [59][2380/3239]	Time 0.232 (0.591)	Data Time 0.001 (0.017)	Loss 2.4887 (2.5775)	Entropy 1.10080 (1.10333)	Top-1 acc 66.016 (61.975)	Top-5 acc 82.422 (82.477)	lr 0.01291
Train [59][2390/3239]	Time 0.332 (0.590)	Data Time 0.001 (0.017)	Loss 2.5815 (2.5776)	Entropy 1.10078 (1.10332)	Top-1 acc 61.328 (61.974)	Top-5 acc 82.031 (82.475)	lr 0.01291
Train [59][2400/3239]	Time 0.217 (0.590)	Data Time 0.001 (0.017)	Loss 2.8092 (2.5778)	Entropy 1.10077 (1.10331)	Top-1 acc 55.469 (61.964)	Top-5 acc 77.734 (82.470)	lr 0.01291
Train [59][2410/3239]	Time 0.227 (0.589)	Data Time 0.001 (0.017)	Loss 2.5433 (2.5781)	Entropy 1.10070 (1.10330)	Top-1 acc 61.719 (61.955)	Top-5 acc 83.203 (82.466)	lr 0.01291
Train [59][2420/3239]	Time 0.262 (0.589)	Data Time 0.001 (0.017)	Loss 2.7420 (2.5784)	Entropy 1.10072 (1.10329)	Top-1 acc 56.250 (61.949)	Top-5 acc 81.250 (82.461)	lr 0.01291
Train [59][2430/3239]	Time 2.548 (0.588)	Data Time 0.001 (0.017)	Loss 2.5344 (2.5783)	Entropy 1.10072 (1.10328)	Top-1 acc 60.547 (61.955)	Top-5 acc 83.594 (82.464)	lr 0.01291
Train [59][2440/3239]	Time 0.321 (0.587)	Data Time 0.001 (0.017)	Loss 2.4683 (2.5782)	Entropy 1.10067 (1.10326)	Top-1 acc 64.453 (61.957)	Top-5 acc 84.766 (82.467)	lr 0.01291
Train [59][2450/3239]	Time 0.225 (0.586)	Data Time 0.001 (0.017)	Loss 2.5991 (2.5781)	Entropy 1.10054 (1.10325)	Top-1 acc 61.719 (61.955)	Top-5 acc 83.203 (82.468)	lr 0.01291
Train [59][2460/3239]	Time 0.248 (0.586)	Data Time 0.002 (0.016)	Loss 2.6950 (2.5781)	Entropy 1.10052 (1.10324)	Top-1 acc 61.328 (61.956)	Top-5 acc 80.078 (82.469)	lr 0.01291
Train [59][2470/3239]	Time 0.226 (0.585)	Data Time 0.001 (0.016)	Loss 2.3476 (2.5782)	Entropy 1.10059 (1.10323)	Top-1 acc 70.312 (61.955)	Top-5 acc 85.156 (82.469)	lr 0.01290
Train [59][2480/3239]	Time 0.218 (0.585)	Data Time 0.001 (0.016)	Loss 2.8197 (2.5785)	Entropy 1.10057 (1.10322)	Top-1 acc 58.594 (61.952)	Top-5 acc 78.516 (82.467)	lr 0.01290
Train [59][2490/3239]	Time 0.287 (0.602)	Data Time 0.003 (0.016)	Loss 2.3403 (2.5790)	Entropy 1.10051 (1.10321)	Top-1 acc 66.406 (61.940)	Top-5 acc 85.547 (82.455)	lr 0.01290
Train [59][2500/3239]	Time 0.231 (0.602)	Data Time 0.002 (0.016)	Loss 2.5413 (2.5791)	Entropy 1.10053 (1.10320)	Top-1 acc 62.109 (61.941)	Top-5 acc 80.078 (82.455)	lr 0.01290
Train [59][2510/3239]	Time 0.230 (0.602)	Data Time 0.002 (0.016)	Loss 2.7099 (2.5791)	Entropy 1.10048 (1.10319)	Top-1 acc 57.812 (61.942)	Top-5 acc 79.688 (82.456)	lr 0.01290
Train [59][2520/3239]	Time 0.253 (0.601)	Data Time 0.001 (0.016)	Loss 2.6161 (2.5796)	Entropy 1.10047 (1.10318)	Top-1 acc 62.109 (61.934)	Top-5 acc 82.031 (82.449)	lr 0.01290
Train [59][2530/3239]	Time 0.308 (0.601)	Data Time 0.001 (0.016)	Loss 2.4976 (2.5795)	Entropy 1.10048 (1.10317)	Top-1 acc 64.453 (61.936)	Top-5 acc 83.594 (82.448)	lr 0.01290
Train [59][2540/3239]	Time 2.446 (0.600)	Data Time 0.003 (0.016)	Loss 2.6943 (2.5796)	Entropy 1.10048 (1.10316)	Top-1 acc 60.156 (61.936)	Top-5 acc 79.688 (82.446)	lr 0.01290
Train [59][2550/3239]	Time 0.265 (0.599)	Data Time 0.001 (0.016)	Loss 2.4584 (2.5797)	Entropy 1.10049 (1.10315)	Top-1 acc 64.453 (61.931)	Top-5 acc 84.375 (82.445)	lr 0.01290
Train [59][2560/3239]	Time 0.212 (0.598)	Data Time 0.001 (0.016)	Loss 2.6680 (2.5799)	Entropy 1.10047 (1.10314)	Top-1 acc 59.375 (61.929)	Top-5 acc 80.859 (82.441)	lr 0.01290
Train [59][2570/3239]	Time 0.229 (0.598)	Data Time 0.001 (0.016)	Loss 2.5152 (2.5799)	Entropy 1.10046 (1.10313)	Top-1 acc 63.281 (61.926)	Top-5 acc 84.375 (82.443)	lr 0.01289
Train [59][2580/3239]	Time 0.328 (0.597)	Data Time 0.001 (0.016)	Loss 2.4943 (2.5799)	Entropy 1.10047 (1.10312)	Top-1 acc 64.062 (61.927)	Top-5 acc 83.594 (82.441)	lr 0.01289
Train [59][2590/3239]	Time 0.237 (0.597)	Data Time 0.001 (0.016)	Loss 2.3862 (2.5798)	Entropy 1.10042 (1.10310)	Top-1 acc 70.312 (61.930)	Top-5 acc 86.328 (82.444)	lr 0.01289
Train [59][2600/3239]	Time 0.233 (0.596)	Data Time 0.001 (0.016)	Loss 2.4986 (2.5797)	Entropy 1.10039 (1.10309)	Top-1 acc 62.500 (61.933)	Top-5 acc 84.766 (82.448)	lr 0.01289
Train [59][2610/3239]	Time 0.219 (0.596)	Data Time 0.001 (0.016)	Loss 2.5131 (2.5797)	Entropy 1.10040 (1.10308)	Top-1 acc 65.234 (61.935)	Top-5 acc 82.031 (82.450)	lr 0.01289
Train [59][2620/3239]	Time 0.238 (0.595)	Data Time 0.001 (0.016)	Loss 2.6497 (2.5798)	Entropy 1.10039 (1.10307)	Top-1 acc 59.375 (61.930)	Top-5 acc 80.469 (82.445)	lr 0.01289
Train [59][2630/3239]	Time 0.385 (0.595)	Data Time 0.002 (0.016)	Loss 2.5939 (2.5800)	Entropy 1.10036 (1.10306)	Top-1 acc 59.766 (61.926)	Top-5 acc 84.375 (82.443)	lr 0.01289
Train [59][2640/3239]	Time 0.202 (0.594)	Data Time 0.001 (0.015)	Loss 2.6143 (2.5801)	Entropy 1.10040 (1.10305)	Top-1 acc 60.156 (61.922)	Top-5 acc 83.594 (82.442)	lr 0.01289
Train [59][2650/3239]	Time 0.222 (0.594)	Data Time 0.001 (0.015)	Loss 2.7589 (2.5801)	Entropy 1.10035 (1.10304)	Top-1 acc 58.984 (61.922)	Top-5 acc 78.125 (82.444)	lr 0.01289
Train [59][2660/3239]	Time 0.203 (0.593)	Data Time 0.001 (0.015)	Loss 2.6900 (2.5802)	Entropy 1.10037 (1.10303)	Top-1 acc 58.594 (61.917)	Top-5 acc 78.516 (82.441)	lr 0.01289
Train [59][2670/3239]	Time 0.264 (0.593)	Data Time 0.001 (0.015)	Loss 2.4712 (2.5801)	Entropy 1.10033 (1.10302)	Top-1 acc 64.844 (61.920)	Top-5 acc 83.984 (82.443)	lr 0.01288
Train [59][2680/3239]	Time 0.224 (0.592)	Data Time 0.001 (0.015)	Loss 2.4557 (2.5801)	Entropy 1.10033 (1.10301)	Top-1 acc 67.188 (61.919)	Top-5 acc 82.812 (82.441)	lr 0.01288
Train [59][2690/3239]	Time 0.265 (0.592)	Data Time 0.001 (0.015)	Loss 2.4281 (2.5799)	Entropy 1.10031 (1.10300)	Top-1 acc 64.453 (61.921)	Top-5 acc 84.375 (82.444)	lr 0.01288
Train [59][2700/3239]	Time 0.238 (0.591)	Data Time 0.001 (0.015)	Loss 2.4926 (2.5798)	Entropy 1.10025 (1.10299)	Top-1 acc 63.281 (61.923)	Top-5 acc 83.984 (82.448)	lr 0.01288
Train [59][2710/3239]	Time 0.228 (0.591)	Data Time 0.001 (0.015)	Loss 2.4854 (2.5797)	Entropy 1.10023 (1.10298)	Top-1 acc 65.625 (61.924)	Top-5 acc 83.203 (82.448)	lr 0.01288
Train [59][2720/3239]	Time 0.328 (0.591)	Data Time 0.001 (0.015)	Loss 2.6009 (2.5796)	Entropy 1.10013 (1.10297)	Top-1 acc 60.156 (61.928)	Top-5 acc 85.156 (82.452)	lr 0.01288
Train [59][2730/3239]	Time 0.231 (0.590)	Data Time 0.001 (0.015)	Loss 2.5486 (2.5796)	Entropy 1.10015 (1.10296)	Top-1 acc 66.016 (61.924)	Top-5 acc 82.812 (82.450)	lr 0.01288
Train [59][2740/3239]	Time 0.255 (0.590)	Data Time 0.001 (0.015)	Loss 2.5961 (2.5796)	Entropy 1.10014 (1.10295)	Top-1 acc 60.938 (61.925)	Top-5 acc 82.812 (82.450)	lr 0.01288
Train [59][2750/3239]	Time 0.242 (0.589)	Data Time 0.001 (0.015)	Loss 2.5261 (2.5797)	Entropy 1.10009 (1.10294)	Top-1 acc 62.891 (61.919)	Top-5 acc 82.031 (82.444)	lr 0.01288
Train [59][2760/3239]	Time 0.246 (0.589)	Data Time 0.001 (0.015)	Loss 2.7118 (2.5794)	Entropy 1.10004 (1.10293)	Top-1 acc 57.812 (61.930)	Top-5 acc 79.688 (82.448)	lr 0.01288
Train [59][2770/3239]	Time 0.211 (0.588)	Data Time 0.001 (0.015)	Loss 2.5122 (2.5794)	Entropy 1.10003 (1.10292)	Top-1 acc 63.672 (61.934)	Top-5 acc 84.766 (82.450)	lr 0.01287
Train [59][2780/3239]	Time 0.253 (0.588)	Data Time 0.001 (0.015)	Loss 2.6685 (2.5793)	Entropy 1.10005 (1.10291)	Top-1 acc 58.984 (61.935)	Top-5 acc 82.422 (82.453)	lr 0.01287
Train [59][2790/3239]	Time 0.256 (0.587)	Data Time 0.003 (0.015)	Loss 2.5491 (2.5794)	Entropy 1.10002 (1.10290)	Top-1 acc 61.719 (61.935)	Top-5 acc 83.203 (82.451)	lr 0.01287
Train [59][2800/3239]	Time 0.254 (0.587)	Data Time 0.001 (0.015)	Loss 2.4649 (2.5795)	Entropy 1.09998 (1.10289)	Top-1 acc 66.016 (61.936)	Top-5 acc 83.984 (82.447)	lr 0.01287
Train [59][2810/3239]	Time 0.374 (0.587)	Data Time 0.001 (0.015)	Loss 2.5554 (2.5795)	Entropy 1.09994 (1.10288)	Top-1 acc 63.281 (61.934)	Top-5 acc 83.594 (82.449)	lr 0.01287
Train [59][2820/3239]	Time 0.258 (0.586)	Data Time 0.001 (0.015)	Loss 2.5420 (2.5796)	Entropy 1.09990 (1.10287)	Top-1 acc 63.281 (61.932)	Top-5 acc 83.203 (82.448)	lr 0.01287
Train [59][2830/3239]	Time 0.256 (0.603)	Data Time 0.004 (0.015)	Loss 2.4433 (2.5796)	Entropy 1.09983 (1.10286)	Top-1 acc 64.062 (61.933)	Top-5 acc 85.547 (82.449)	lr 0.01287
Train [59][2840/3239]	Time 0.232 (0.602)	Data Time 0.002 (0.015)	Loss 2.6188 (2.5797)	Entropy 1.09981 (1.10285)	Top-1 acc 64.062 (61.931)	Top-5 acc 82.812 (82.446)	lr 0.01287
Train [59][2850/3239]	Time 0.323 (0.602)	Data Time 0.002 (0.014)	Loss 2.6286 (2.5799)	Entropy 1.09982 (1.10284)	Top-1 acc 61.328 (61.928)	Top-5 acc 81.641 (82.446)	lr 0.01287
Train [59][2860/3239]	Time 0.211 (0.601)	Data Time 0.001 (0.014)	Loss 2.4672 (2.5796)	Entropy 1.09974 (1.10283)	Top-1 acc 60.156 (61.931)	Top-5 acc 85.156 (82.450)	lr 0.01287
Train [59][2870/3239]	Time 0.243 (0.601)	Data Time 0.002 (0.014)	Loss 2.6045 (2.5796)	Entropy 1.09971 (1.10282)	Top-1 acc 60.938 (61.930)	Top-5 acc 82.031 (82.449)	lr 0.01286
Train [59][2880/3239]	Time 0.208 (0.600)	Data Time 0.001 (0.014)	Loss 2.5789 (2.5797)	Entropy 1.09968 (1.10281)	Top-1 acc 63.672 (61.929)	Top-5 acc 81.641 (82.444)	lr 0.01286
Train [59][2890/3239]	Time 0.235 (0.600)	Data Time 0.002 (0.014)	Loss 2.6124 (2.5798)	Entropy 1.09956 (1.10279)	Top-1 acc 60.156 (61.927)	Top-5 acc 83.203 (82.441)	lr 0.01286
Train [59][2900/3239]	Time 0.353 (0.600)	Data Time 0.001 (0.014)	Loss 2.5207 (2.5797)	Entropy 1.09962 (1.10278)	Top-1 acc 61.328 (61.933)	Top-5 acc 83.203 (82.446)	lr 0.01286
Train [59][2910/3239]	Time 0.223 (0.599)	Data Time 0.001 (0.014)	Loss 2.5507 (2.5797)	Entropy 1.09959 (1.10277)	Top-1 acc 59.375 (61.930)	Top-5 acc 86.719 (82.445)	lr 0.01286
Train [59][2920/3239]	Time 0.250 (0.599)	Data Time 0.001 (0.014)	Loss 2.5708 (2.5797)	Entropy 1.09958 (1.10276)	Top-1 acc 59.766 (61.928)	Top-5 acc 82.031 (82.446)	lr 0.01286
Train [59][2930/3239]	Time 0.217 (0.598)	Data Time 0.001 (0.014)	Loss 2.4468 (2.5795)	Entropy 1.09961 (1.10275)	Top-1 acc 65.234 (61.933)	Top-5 acc 83.203 (82.448)	lr 0.01286
Train [59][2940/3239]	Time 0.311 (0.598)	Data Time 0.001 (0.014)	Loss 2.6627 (2.5795)	Entropy 1.09962 (1.10274)	Top-1 acc 56.641 (61.934)	Top-5 acc 82.812 (82.452)	lr 0.01286
Train [59][2950/3239]	Time 0.238 (0.597)	Data Time 0.001 (0.014)	Loss 2.8180 (2.5795)	Entropy 1.09966 (1.10273)	Top-1 acc 56.250 (61.937)	Top-5 acc 80.078 (82.450)	lr 0.01286
Train [59][2960/3239]	Time 0.250 (0.597)	Data Time 0.001 (0.014)	Loss 2.4765 (2.5795)	Entropy 1.09967 (1.10272)	Top-1 acc 62.891 (61.937)	Top-5 acc 85.938 (82.450)	lr 0.01286
Train [59][2970/3239]	Time 0.251 (0.597)	Data Time 0.001 (0.014)	Loss 2.3015 (2.5796)	Entropy 1.09956 (1.10271)	Top-1 acc 67.188 (61.935)	Top-5 acc 89.062 (82.449)	lr 0.01285
Train [59][2980/3239]	Time 0.219 (0.596)	Data Time 0.001 (0.014)	Loss 2.7422 (2.5796)	Entropy 1.09950 (1.10270)	Top-1 acc 58.984 (61.932)	Top-5 acc 79.688 (82.449)	lr 0.01285
Train [59][2990/3239]	Time 0.235 (0.596)	Data Time 0.001 (0.014)	Loss 2.8034 (2.5795)	Entropy 1.09950 (1.10269)	Top-1 acc 56.250 (61.934)	Top-5 acc 76.953 (82.448)	lr 0.01285
Train [59][3000/3239]	Time 0.257 (0.595)	Data Time 0.001 (0.014)	Loss 2.4868 (2.5795)	Entropy 1.09949 (1.10268)	Top-1 acc 64.062 (61.934)	Top-5 acc 82.422 (82.446)	lr 0.01285
Train [59][3010/3239]	Time 0.253 (0.595)	Data Time 0.002 (0.014)	Loss 2.6685 (2.5797)	Entropy 1.09948 (1.10267)	Top-1 acc 63.281 (61.932)	Top-5 acc 81.250 (82.442)	lr 0.01285
Train [59][3020/3239]	Time 0.254 (0.594)	Data Time 0.001 (0.014)	Loss 2.5220 (2.5794)	Entropy 1.09947 (1.10266)	Top-1 acc 63.672 (61.938)	Top-5 acc 82.422 (82.445)	lr 0.01285
Train [59][3030/3239]	Time 0.225 (0.594)	Data Time 0.001 (0.014)	Loss 2.5819 (2.5795)	Entropy 1.09946 (1.10265)	Top-1 acc 61.719 (61.936)	Top-5 acc 83.203 (82.444)	lr 0.01285
Train [59][3040/3239]	Time 0.270 (0.594)	Data Time 0.001 (0.014)	Loss 2.7926 (2.5796)	Entropy 1.09946 (1.10264)	Top-1 acc 57.812 (61.932)	Top-5 acc 80.469 (82.443)	lr 0.01285
Train [59][3050/3239]	Time 0.237 (0.593)	Data Time 0.002 (0.014)	Loss 2.4723 (2.5797)	Entropy 1.09947 (1.10262)	Top-1 acc 65.234 (61.929)	Top-5 acc 84.766 (82.441)	lr 0.01285
Train [59][3060/3239]	Time 0.230 (0.593)	Data Time 0.001 (0.014)	Loss 2.4856 (2.5797)	Entropy 1.09937 (1.10261)	Top-1 acc 61.328 (61.928)	Top-5 acc 83.203 (82.440)	lr 0.01285
Train [59][3070/3239]	Time 0.218 (0.592)	Data Time 0.003 (0.014)	Loss 2.5299 (2.5797)	Entropy 1.09930 (1.10260)	Top-1 acc 60.156 (61.925)	Top-5 acc 84.766 (82.442)	lr 0.01284
Train [59][3080/3239]	Time 0.321 (0.592)	Data Time 0.001 (0.014)	Loss 2.7539 (2.5799)	Entropy 1.09930 (1.10259)	Top-1 acc 57.422 (61.923)	Top-5 acc 78.906 (82.435)	lr 0.01284
Train [59][3090/3239]	Time 0.291 (0.591)	Data Time 0.001 (0.013)	Loss 2.6370 (2.5799)	Entropy 1.09925 (1.10258)	Top-1 acc 59.766 (61.924)	Top-5 acc 80.469 (82.435)	lr 0.01284
Train [59][3100/3239]	Time 0.227 (0.591)	Data Time 0.001 (0.013)	Loss 2.5428 (2.5798)	Entropy 1.09902 (1.10257)	Top-1 acc 60.156 (61.925)	Top-5 acc 84.375 (82.436)	lr 0.01284
Train [59][3110/3239]	Time 0.222 (0.591)	Data Time 0.001 (0.013)	Loss 2.6220 (2.5799)	Entropy 1.09898 (1.10256)	Top-1 acc 62.891 (61.924)	Top-5 acc 81.641 (82.435)	lr 0.01284
Train [59][3120/3239]	Time 0.220 (0.590)	Data Time 0.001 (0.013)	Loss 2.6070 (2.5799)	Entropy 1.09894 (1.10255)	Top-1 acc 66.016 (61.922)	Top-5 acc 81.641 (82.434)	lr 0.01284
Train [59][3130/3239]	Time 0.295 (0.590)	Data Time 0.001 (0.013)	Loss 2.5541 (2.5800)	Entropy 1.09886 (1.10254)	Top-1 acc 61.328 (61.921)	Top-5 acc 83.594 (82.430)	lr 0.01284
Train [59][3140/3239]	Time 0.219 (0.589)	Data Time 0.001 (0.013)	Loss 2.7355 (2.5801)	Entropy 1.09885 (1.10253)	Top-1 acc 57.031 (61.917)	Top-5 acc 80.469 (82.430)	lr 0.01284
Train [59][3150/3239]	Time 0.222 (0.589)	Data Time 0.001 (0.013)	Loss 2.4921 (2.5802)	Entropy 1.09882 (1.10251)	Top-1 acc 62.891 (61.911)	Top-5 acc 85.156 (82.428)	lr 0.01284
Train [59][3160/3239]	Time 0.297 (0.604)	Data Time 0.005 (0.013)	Loss 2.5145 (2.5802)	Entropy 1.09883 (1.10250)	Top-1 acc 65.234 (61.911)	Top-5 acc 82.422 (82.427)	lr 0.01284
Train [59][3170/3239]	Time 0.228 (0.604)	Data Time 0.002 (0.013)	Loss 2.5775 (2.5801)	Entropy 1.09884 (1.10249)	Top-1 acc 61.719 (61.912)	Top-5 acc 83.984 (82.432)	lr 0.01283
Train [59][3180/3239]	Time 0.241 (0.604)	Data Time 0.000 (0.013)	Loss 2.7457 (2.5799)	Entropy 1.09878 (1.10248)	Top-1 acc 62.109 (61.915)	Top-5 acc 80.078 (82.435)	lr 0.01283
Train [59][3190/3239]	Time 0.214 (0.603)	Data Time 0.000 (0.013)	Loss 2.5122 (2.5800)	Entropy 1.09877 (1.10247)	Top-1 acc 67.188 (61.917)	Top-5 acc 83.203 (82.435)	lr 0.01283
Train [59][3200/3239]	Time 0.213 (0.603)	Data Time 0.000 (0.013)	Loss 2.6425 (2.5799)	Entropy 1.09875 (1.10246)	Top-1 acc 60.156 (61.918)	Top-5 acc 81.250 (82.435)	lr 0.01283
Train [59][3210/3239]	Time 0.235 (0.602)	Data Time 0.000 (0.013)	Loss 2.5192 (2.5799)	Entropy 1.09876 (1.10244)	Top-1 acc 64.453 (61.917)	Top-5 acc 83.203 (82.432)	lr 0.01283
Train [59][3220/3239]	Time 0.332 (0.602)	Data Time 0.000 (0.013)	Loss 2.6060 (2.5800)	Entropy 1.09876 (1.10243)	Top-1 acc 63.281 (61.916)	Top-5 acc 81.250 (82.432)	lr 0.01283
Train [59][3230/3239]	Time 0.229 (0.601)	Data Time 0.000 (0.013)	Loss 2.4237 (2.5799)	Entropy 1.09868 (1.10242)	Top-1 acc 65.234 (61.917)	Top-5 acc 87.500 (82.432)	lr 0.01283
Train [59][3239/3239]	Time 2.277 (0.601)	Data Time 0.000 (0.013)	Loss 2.7897 (2.5801)	Entropy 1.09868 (1.10241)	Top-1 acc 55.556 (61.915)	Top-5 acc 75.309 (82.428)	lr 0.01283
==========Valid [59/120]	loss 1.466	top-1 acc 66.792 (66.792)	top-5 acc 86.389	Train top-1 61.915	top-5 82.428	Entropy 1.09868	Latency-None: 0.000ms	Flops: 548.34M
Train [60][0/3239]	Time 36.606 (36.606)	Data Time 35.880 (35.880)	Loss 2.4272 (2.4272)	Entropy 1.09867 (1.09867)	Top-1 acc 65.234 (65.234)	Top-5 acc 86.328 (86.328)	lr 0.01283
Train [60][10/3239]	Time 2.541 (3.955)	Data Time 0.002 (3.360)	Loss 2.6919 (2.5126)	Entropy 1.09867 (1.09867)	Top-1 acc 59.766 (63.601)	Top-5 acc 81.641 (83.807)	lr 0.01283
Train [60][20/3239]	Time 0.215 (2.188)	Data Time 0.001 (1.761)	Loss 2.6691 (2.5832)	Entropy 1.09858 (1.09863)	Top-1 acc 57.812 (61.682)	Top-5 acc 80.859 (82.738)	lr 0.01283
Train [60][30/3239]	Time 0.343 (1.634)	Data Time 0.002 (1.193)	Loss 2.6023 (2.5774)	Entropy 1.09853 (1.09860)	Top-1 acc 60.156 (61.883)	Top-5 acc 78.516 (82.623)	lr 0.01282
Train [60][40/3239]	Time 0.219 (1.353)	Data Time 0.001 (0.903)	Loss 2.4410 (2.5586)	Entropy 1.09852 (1.09858)	Top-1 acc 66.797 (62.500)	Top-5 acc 83.594 (82.946)	lr 0.01282
Train [60][50/3239]	Time 0.220 (1.180)	Data Time 0.001 (0.726)	Loss 2.4477 (2.5563)	Entropy 1.09848 (1.09856)	Top-1 acc 62.109 (62.492)	Top-5 acc 85.938 (82.973)	lr 0.01282
Train [60][60/3239]	Time 0.224 (1.063)	Data Time 0.001 (0.607)	Loss 2.7211 (2.5643)	Entropy 1.09847 (1.09855)	Top-1 acc 57.812 (62.455)	Top-5 acc 78.906 (82.787)	lr 0.01282
Train [60][70/3239]	Time 0.241 (0.980)	Data Time 0.001 (0.522)	Loss 2.4258 (2.5638)	Entropy 1.09848 (1.09854)	Top-1 acc 67.578 (62.500)	Top-5 acc 83.594 (82.796)	lr 0.01282
Train [60][80/3239]	Time 0.321 (0.917)	Data Time 0.001 (0.458)	Loss 2.5305 (2.5664)	Entropy 1.09848 (1.09853)	Top-1 acc 64.453 (62.418)	Top-5 acc 84.375 (82.750)	lr 0.01282
Train [60][90/3239]	Time 0.240 (0.868)	Data Time 0.001 (0.408)	Loss 2.3894 (2.5626)	Entropy 1.09844 (1.09852)	Top-1 acc 67.969 (62.509)	Top-5 acc 86.328 (82.774)	lr 0.01282
Train [60][100/3239]	Time 0.239 (0.829)	Data Time 0.001 (0.368)	Loss 2.7653 (2.5670)	Entropy 1.09840 (1.09851)	Top-1 acc 58.594 (62.481)	Top-5 acc 80.078 (82.635)	lr 0.01282
Train [60][110/3239]	Time 0.255 (0.795)	Data Time 0.001 (0.335)	Loss 2.7307 (2.5708)	Entropy 1.09840 (1.09850)	Top-1 acc 59.375 (62.384)	Top-5 acc 80.469 (82.573)	lr 0.01282
Train [60][120/3239]	Time 2.349 (0.766)	Data Time 0.001 (0.307)	Loss 2.7002 (2.5723)	Entropy 1.09840 (1.09850)	Top-1 acc 61.719 (62.464)	Top-5 acc 79.688 (82.532)	lr 0.01282
Train [60][130/3239]	Time 0.228 (0.726)	Data Time 0.001 (0.284)	Loss 2.6095 (2.5700)	Entropy 1.09835 (1.09848)	Top-1 acc 58.203 (62.467)	Top-5 acc 81.250 (82.592)	lr 0.01281
Train [60][140/3239]	Time 0.224 (0.708)	Data Time 0.001 (0.264)	Loss 2.6240 (2.5712)	Entropy 1.09829 (1.09847)	Top-1 acc 62.500 (62.395)	Top-5 acc 77.734 (82.607)	lr 0.01281
Train [60][150/3239]	Time 0.229 (0.691)	Data Time 0.001 (0.246)	Loss 2.3498 (2.5635)	Entropy 1.09828 (1.09846)	Top-1 acc 66.016 (62.570)	Top-5 acc 86.719 (82.781)	lr 0.01281
Train [60][160/3239]	Time 0.228 (0.677)	Data Time 0.001 (0.231)	Loss 2.4751 (2.5638)	Entropy 1.09821 (1.09844)	Top-1 acc 64.453 (62.517)	Top-5 acc 82.812 (82.766)	lr 0.01281
Train [60][170/3239]	Time 0.331 (0.664)	Data Time 0.001 (0.218)	Loss 2.5314 (2.5637)	Entropy 1.09817 (1.09843)	Top-1 acc 62.500 (62.539)	Top-5 acc 81.641 (82.755)	lr 0.01281
Train [60][180/3239]	Time 0.220 (0.652)	Data Time 0.001 (0.206)	Loss 2.5775 (2.5649)	Entropy 1.09807 (1.09841)	Top-1 acc 60.938 (62.453)	Top-5 acc 82.422 (82.713)	lr 0.01281
Train [60][190/3239]	Time 0.216 (0.643)	Data Time 0.002 (0.195)	Loss 2.6976 (2.5606)	Entropy 1.09806 (1.09839)	Top-1 acc 57.422 (62.531)	Top-5 acc 82.422 (82.808)	lr 0.01281
Train [60][200/3239]	Time 0.222 (0.634)	Data Time 0.001 (0.185)	Loss 2.5899 (2.5605)	Entropy 1.09802 (1.09838)	Top-1 acc 59.766 (62.496)	Top-5 acc 85.156 (82.836)	lr 0.01281
Train [60][210/3239]	Time 0.236 (0.626)	Data Time 0.001 (0.177)	Loss 2.3866 (2.5606)	Entropy 1.09800 (1.09836)	Top-1 acc 67.578 (62.476)	Top-5 acc 87.500 (82.822)	lr 0.01281
Train [60][220/3239]	Time 0.308 (0.619)	Data Time 0.001 (0.169)	Loss 2.5363 (2.5607)	Entropy 1.09799 (1.09834)	Top-1 acc 61.719 (62.452)	Top-5 acc 84.375 (82.837)	lr 0.01280
Train [60][230/3239]	Time 2.420 (0.612)	Data Time 0.002 (0.162)	Loss 2.6218 (2.5597)	Entropy 1.09799 (1.09833)	Top-1 acc 59.766 (62.463)	Top-5 acc 82.812 (82.860)	lr 0.01280
Train [60][240/3239]	Time 0.275 (0.597)	Data Time 0.002 (0.155)	Loss 2.6295 (2.5577)	Entropy 1.09796 (1.09831)	Top-1 acc 61.719 (62.497)	Top-5 acc 81.250 (82.945)	lr 0.01280
Train [60][250/3239]	Time 0.233 (0.592)	Data Time 0.001 (0.149)	Loss 2.6080 (2.5588)	Entropy 1.09789 (1.09830)	Top-1 acc 64.062 (62.484)	Top-5 acc 81.641 (82.915)	lr 0.01280
Train [60][260/3239]	Time 0.201 (0.586)	Data Time 0.002 (0.143)	Loss 2.6366 (2.5590)	Entropy 1.09786 (1.09828)	Top-1 acc 60.547 (62.482)	Top-5 acc 82.031 (82.913)	lr 0.01280
Train [60][270/3239]	Time 0.233 (0.582)	Data Time 0.002 (0.138)	Loss 2.5845 (2.5608)	Entropy 1.09779 (1.09826)	Top-1 acc 60.938 (62.437)	Top-5 acc 83.203 (82.846)	lr 0.01280
Train [60][280/3239]	Time 0.288 (0.767)	Data Time 0.004 (0.133)	Loss 2.3463 (2.5600)	Entropy 1.09778 (1.09825)	Top-1 acc 67.969 (62.465)	Top-5 acc 88.281 (82.861)	lr 0.01280
Train [60][290/3239]	Time 0.242 (0.760)	Data Time 0.002 (0.129)	Loss 2.6546 (2.5587)	Entropy 1.09773 (1.09823)	Top-1 acc 56.641 (62.509)	Top-5 acc 82.422 (82.901)	lr 0.01280
Train [60][300/3239]	Time 0.222 (0.751)	Data Time 0.001 (0.125)	Loss 2.4767 (2.5587)	Entropy 1.09775 (1.09821)	Top-1 acc 62.891 (62.539)	Top-5 acc 82.422 (82.892)	lr 0.01280
Train [60][310/3239]	Time 0.314 (0.742)	Data Time 0.001 (0.121)	Loss 2.5054 (2.5588)	Entropy 1.09769 (1.09820)	Top-1 acc 61.719 (62.513)	Top-5 acc 85.156 (82.907)	lr 0.01280
Train [60][320/3239]	Time 0.227 (0.733)	Data Time 0.001 (0.117)	Loss 2.6732 (2.5596)	Entropy 1.09762 (1.09818)	Top-1 acc 61.719 (62.485)	Top-5 acc 80.859 (82.883)	lr 0.01279
Train [60][330/3239]	Time 0.258 (0.724)	Data Time 0.001 (0.113)	Loss 2.5908 (2.5596)	Entropy 1.09757 (1.09816)	Top-1 acc 60.547 (62.483)	Top-5 acc 82.031 (82.880)	lr 0.01279
Train [60][340/3239]	Time 2.467 (0.717)	Data Time 0.037 (0.110)	Loss 2.5198 (2.5586)	Entropy 1.09757 (1.09815)	Top-1 acc 62.891 (62.511)	Top-5 acc 82.031 (82.893)	lr 0.01279
Train [60][350/3239]	Time 0.266 (0.703)	Data Time 0.001 (0.107)	Loss 2.5181 (2.5572)	Entropy 1.09756 (1.09813)	Top-1 acc 63.672 (62.550)	Top-5 acc 83.594 (82.909)	lr 0.01279
Train [60][360/3239]	Time 0.229 (0.697)	Data Time 0.001 (0.104)	Loss 2.5879 (2.5576)	Entropy 1.09754 (1.09811)	Top-1 acc 62.109 (62.561)	Top-5 acc 79.297 (82.907)	lr 0.01279
Train [60][370/3239]	Time 0.230 (0.691)	Data Time 0.001 (0.101)	Loss 2.7132 (2.5591)	Entropy 1.09756 (1.09810)	Top-1 acc 57.422 (62.511)	Top-5 acc 81.250 (82.857)	lr 0.01279
Train [60][380/3239]	Time 0.224 (0.685)	Data Time 0.001 (0.099)	Loss 2.5123 (2.5599)	Entropy 1.09757 (1.09808)	Top-1 acc 61.719 (62.508)	Top-5 acc 84.766 (82.843)	lr 0.01279
Train [60][390/3239]	Time 0.226 (0.679)	Data Time 0.001 (0.096)	Loss 2.4740 (2.5605)	Entropy 1.09754 (1.09807)	Top-1 acc 64.062 (62.517)	Top-5 acc 84.766 (82.816)	lr 0.01279
Train [60][400/3239]	Time 0.214 (0.674)	Data Time 0.001 (0.094)	Loss 2.5221 (2.5599)	Entropy 1.09753 (1.09806)	Top-1 acc 59.766 (62.541)	Top-5 acc 83.594 (82.832)	lr 0.01279
Train [60][410/3239]	Time 0.219 (0.669)	Data Time 0.001 (0.092)	Loss 2.6583 (2.5598)	Entropy 1.09750 (1.09804)	Top-1 acc 62.500 (62.560)	Top-5 acc 81.641 (82.836)	lr 0.01279
Train [60][420/3239]	Time 0.234 (0.664)	Data Time 0.001 (0.090)	Loss 2.5340 (2.5588)	Entropy 1.09737 (1.09803)	Top-1 acc 61.719 (62.579)	Top-5 acc 82.422 (82.864)	lr 0.01278
Train [60][430/3239]	Time 0.232 (0.659)	Data Time 0.003 (0.088)	Loss 2.6234 (2.5584)	Entropy 1.09736 (1.09801)	Top-1 acc 62.891 (62.598)	Top-5 acc 80.859 (82.867)	lr 0.01278
Train [60][440/3239]	Time 0.232 (0.655)	Data Time 0.001 (0.086)	Loss 2.5161 (2.5579)	Entropy 1.09735 (1.09800)	Top-1 acc 65.234 (62.644)	Top-5 acc 83.594 (82.870)	lr 0.01278
Train [60][450/3239]	Time 2.502 (0.651)	Data Time 0.001 (0.084)	Loss 2.5533 (2.5580)	Entropy 1.09735 (1.09799)	Top-1 acc 62.500 (62.646)	Top-5 acc 83.203 (82.876)	lr 0.01278
Train [60][460/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.082)	Loss 2.4736 (2.5576)	Entropy 1.09733 (1.09797)	Top-1 acc 64.453 (62.659)	Top-5 acc 84.375 (82.885)	lr 0.01278
Train [60][470/3239]	Time 0.220 (0.638)	Data Time 0.001 (0.080)	Loss 2.7326 (2.5581)	Entropy 1.09738 (1.09796)	Top-1 acc 57.812 (62.644)	Top-5 acc 78.125 (82.871)	lr 0.01278
Train [60][480/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.079)	Loss 2.6865 (2.5585)	Entropy 1.09735 (1.09795)	Top-1 acc 60.938 (62.623)	Top-5 acc 78.516 (82.870)	lr 0.01278
Train [60][490/3239]	Time 0.334 (0.631)	Data Time 0.001 (0.077)	Loss 2.4247 (2.5569)	Entropy 1.09730 (1.09793)	Top-1 acc 64.453 (62.627)	Top-5 acc 85.938 (82.899)	lr 0.01278
Train [60][500/3239]	Time 0.225 (0.627)	Data Time 0.001 (0.076)	Loss 2.6076 (2.5571)	Entropy 1.09729 (1.09792)	Top-1 acc 62.891 (62.624)	Top-5 acc 80.078 (82.893)	lr 0.01278
Train [60][510/3239]	Time 0.238 (0.624)	Data Time 0.001 (0.074)	Loss 2.6616 (2.5581)	Entropy 1.09730 (1.09791)	Top-1 acc 57.812 (62.605)	Top-5 acc 81.250 (82.863)	lr 0.01278
Train [60][520/3239]	Time 0.231 (0.621)	Data Time 0.001 (0.073)	Loss 2.4998 (2.5576)	Entropy 1.09731 (1.09790)	Top-1 acc 60.547 (62.624)	Top-5 acc 83.594 (82.864)	lr 0.01277
Train [60][530/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.071)	Loss 2.6188 (2.5579)	Entropy 1.09730 (1.09789)	Top-1 acc 62.500 (62.615)	Top-5 acc 81.250 (82.860)	lr 0.01277
Train [60][540/3239]	Time 0.303 (0.616)	Data Time 0.001 (0.070)	Loss 2.6334 (2.5573)	Entropy 1.09729 (1.09787)	Top-1 acc 61.719 (62.627)	Top-5 acc 82.031 (82.882)	lr 0.01277
Train [60][550/3239]	Time 0.256 (0.613)	Data Time 0.001 (0.069)	Loss 2.6687 (2.5575)	Entropy 1.09725 (1.09786)	Top-1 acc 59.375 (62.646)	Top-5 acc 78.906 (82.866)	lr 0.01277
Train [60][560/3239]	Time 2.432 (0.610)	Data Time 0.001 (0.068)	Loss 2.4873 (2.5578)	Entropy 1.09725 (1.09785)	Top-1 acc 62.891 (62.625)	Top-5 acc 81.250 (82.844)	lr 0.01277
Train [60][570/3239]	Time 0.221 (0.603)	Data Time 0.001 (0.066)	Loss 2.8896 (2.5574)	Entropy 1.09720 (1.09784)	Top-1 acc 50.781 (62.625)	Top-5 acc 77.344 (82.849)	lr 0.01277
Train [60][580/3239]	Time 0.211 (0.601)	Data Time 0.001 (0.065)	Loss 2.3692 (2.5571)	Entropy 1.09719 (1.09783)	Top-1 acc 70.312 (62.636)	Top-5 acc 86.328 (82.858)	lr 0.01277
Train [60][590/3239]	Time 0.245 (0.599)	Data Time 0.001 (0.064)	Loss 2.7237 (2.5573)	Entropy 1.09720 (1.09782)	Top-1 acc 57.812 (62.628)	Top-5 acc 79.297 (82.848)	lr 0.01277
Train [60][600/3239]	Time 0.219 (0.597)	Data Time 0.001 (0.063)	Loss 2.5292 (2.5573)	Entropy 1.09728 (1.09781)	Top-1 acc 62.891 (62.623)	Top-5 acc 84.375 (82.858)	lr 0.01277
Train [60][610/3239]	Time 0.221 (0.595)	Data Time 0.001 (0.062)	Loss 2.5942 (2.5562)	Entropy 1.09724 (1.09780)	Top-1 acc 62.109 (62.650)	Top-5 acc 82.422 (82.885)	lr 0.01277
Train [60][620/3239]	Time 0.243 (0.593)	Data Time 0.001 (0.061)	Loss 2.5071 (2.5557)	Entropy 1.09722 (1.09779)	Top-1 acc 64.453 (62.659)	Top-5 acc 85.156 (82.883)	lr 0.01276
Train [60][630/3239]	Time 0.218 (0.591)	Data Time 0.001 (0.060)	Loss 2.3839 (2.5545)	Entropy 1.09715 (1.09778)	Top-1 acc 71.094 (62.682)	Top-5 acc 85.156 (82.902)	lr 0.01276
Train [60][640/3239]	Time 0.378 (0.668)	Data Time 0.002 (0.059)	Loss 2.5751 (2.5540)	Entropy 1.09718 (1.09777)	Top-1 acc 63.672 (62.692)	Top-5 acc 81.641 (82.914)	lr 0.01276
Train [60][650/3239]	Time 0.223 (0.666)	Data Time 0.002 (0.059)	Loss 2.6242 (2.5545)	Entropy 1.09715 (1.09776)	Top-1 acc 60.547 (62.657)	Top-5 acc 82.812 (82.897)	lr 0.01276
Train [60][660/3239]	Time 0.242 (0.663)	Data Time 0.001 (0.058)	Loss 2.5682 (2.5544)	Entropy 1.09716 (1.09775)	Top-1 acc 64.062 (62.676)	Top-5 acc 83.594 (82.898)	lr 0.01276
Train [60][670/3239]	Time 2.496 (0.660)	Data Time 0.001 (0.057)	Loss 2.5671 (2.5541)	Entropy 1.09716 (1.09775)	Top-1 acc 62.500 (62.675)	Top-5 acc 82.422 (82.901)	lr 0.01276
Train [60][680/3239]	Time 0.347 (0.655)	Data Time 0.001 (0.056)	Loss 2.7212 (2.5550)	Entropy 1.09705 (1.09774)	Top-1 acc 58.203 (62.654)	Top-5 acc 80.078 (82.883)	lr 0.01276
Train [60][690/3239]	Time 0.212 (0.652)	Data Time 0.001 (0.055)	Loss 2.4048 (2.5553)	Entropy 1.09702 (1.09773)	Top-1 acc 63.672 (62.633)	Top-5 acc 85.156 (82.879)	lr 0.01276
Train [60][700/3239]	Time 0.233 (0.649)	Data Time 0.001 (0.054)	Loss 2.5568 (2.5547)	Entropy 1.09700 (1.09771)	Top-1 acc 64.844 (62.655)	Top-5 acc 84.766 (82.893)	lr 0.01276
Train [60][710/3239]	Time 0.248 (0.646)	Data Time 0.001 (0.054)	Loss 2.5743 (2.5542)	Entropy 1.09705 (1.09771)	Top-1 acc 63.672 (62.670)	Top-5 acc 84.766 (82.908)	lr 0.01276
Train [60][720/3239]	Time 0.230 (0.644)	Data Time 0.001 (0.053)	Loss 2.5037 (2.5546)	Entropy 1.09693 (1.09770)	Top-1 acc 65.234 (62.670)	Top-5 acc 83.203 (82.900)	lr 0.01275
Train [60][730/3239]	Time 0.173 (0.642)	Data Time 0.001 (0.052)	Loss 2.5938 (2.5545)	Entropy 1.09693 (1.09768)	Top-1 acc 59.766 (62.659)	Top-5 acc 83.203 (82.905)	lr 0.01275
Train [60][740/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.052)	Loss 2.5413 (2.5551)	Entropy 1.09692 (1.09767)	Top-1 acc 64.844 (62.640)	Top-5 acc 80.078 (82.881)	lr 0.01275
Train [60][750/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.051)	Loss 2.7468 (2.5554)	Entropy 1.09686 (1.09766)	Top-1 acc 56.641 (62.632)	Top-5 acc 78.906 (82.875)	lr 0.01275
Train [60][760/3239]	Time 0.279 (0.635)	Data Time 0.002 (0.050)	Loss 2.4915 (2.5547)	Entropy 1.09687 (1.09765)	Top-1 acc 62.500 (62.647)	Top-5 acc 82.812 (82.886)	lr 0.01275
Train [60][770/3239]	Time 0.239 (0.633)	Data Time 0.001 (0.050)	Loss 2.5163 (2.5545)	Entropy 1.09686 (1.09764)	Top-1 acc 63.281 (62.653)	Top-5 acc 85.547 (82.891)	lr 0.01275
Train [60][780/3239]	Time 2.431 (0.631)	Data Time 0.001 (0.049)	Loss 2.5247 (2.5546)	Entropy 1.09686 (1.09763)	Top-1 acc 66.797 (62.659)	Top-5 acc 83.594 (82.884)	lr 0.01275
Train [60][790/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.049)	Loss 2.5166 (2.5557)	Entropy 1.09688 (1.09762)	Top-1 acc 64.062 (62.643)	Top-5 acc 83.594 (82.859)	lr 0.01275
Train [60][800/3239]	Time 0.216 (0.624)	Data Time 0.002 (0.048)	Loss 2.5232 (2.5557)	Entropy 1.09678 (1.09761)	Top-1 acc 62.891 (62.640)	Top-5 acc 83.203 (82.859)	lr 0.01275
Train [60][810/3239]	Time 0.243 (0.622)	Data Time 0.001 (0.047)	Loss 2.2711 (2.5552)	Entropy 1.09673 (1.09760)	Top-1 acc 69.531 (62.658)	Top-5 acc 86.328 (82.860)	lr 0.01275
Train [60][820/3239]	Time 0.316 (0.621)	Data Time 0.001 (0.047)	Loss 2.4482 (2.5546)	Entropy 1.09673 (1.09759)	Top-1 acc 62.500 (62.659)	Top-5 acc 85.938 (82.874)	lr 0.01274
Train [60][830/3239]	Time 0.231 (0.619)	Data Time 0.001 (0.046)	Loss 2.5683 (2.5551)	Entropy 1.09675 (1.09758)	Top-1 acc 62.500 (62.652)	Top-5 acc 82.422 (82.868)	lr 0.01274
Train [60][840/3239]	Time 0.211 (0.617)	Data Time 0.001 (0.046)	Loss 2.6641 (2.5553)	Entropy 1.09667 (1.09757)	Top-1 acc 61.328 (62.647)	Top-5 acc 78.906 (82.860)	lr 0.01274
Train [60][850/3239]	Time 0.244 (0.615)	Data Time 0.001 (0.045)	Loss 2.6263 (2.5560)	Entropy 1.09672 (1.09756)	Top-1 acc 60.547 (62.636)	Top-5 acc 81.250 (82.856)	lr 0.01274
Train [60][860/3239]	Time 0.222 (0.614)	Data Time 0.001 (0.045)	Loss 2.5948 (2.5567)	Entropy 1.09672 (1.09755)	Top-1 acc 59.375 (62.616)	Top-5 acc 82.812 (82.843)	lr 0.01274
Train [60][870/3239]	Time 0.223 (0.612)	Data Time 0.001 (0.044)	Loss 2.5795 (2.5565)	Entropy 1.09671 (1.09754)	Top-1 acc 64.844 (62.612)	Top-5 acc 82.812 (82.836)	lr 0.01274
Train [60][880/3239]	Time 0.290 (0.610)	Data Time 0.002 (0.044)	Loss 2.6342 (2.5564)	Entropy 1.09669 (1.09753)	Top-1 acc 62.891 (62.615)	Top-5 acc 82.031 (82.842)	lr 0.01274
Train [60][890/3239]	Time 2.520 (0.609)	Data Time 0.001 (0.043)	Loss 2.4757 (2.5569)	Entropy 1.09669 (1.09752)	Top-1 acc 64.062 (62.596)	Top-5 acc 82.812 (82.825)	lr 0.01274
Train [60][900/3239]	Time 0.238 (0.605)	Data Time 0.001 (0.043)	Loss 2.6465 (2.5570)	Entropy 1.09664 (1.09751)	Top-1 acc 58.203 (62.575)	Top-5 acc 79.688 (82.819)	lr 0.01274
Train [60][910/3239]	Time 0.220 (0.603)	Data Time 0.002 (0.042)	Loss 2.7159 (2.5575)	Entropy 1.09664 (1.09750)	Top-1 acc 60.938 (62.575)	Top-5 acc 78.125 (82.814)	lr 0.01274
Train [60][920/3239]	Time 0.217 (0.602)	Data Time 0.001 (0.042)	Loss 2.6684 (2.5577)	Entropy 1.09664 (1.09749)	Top-1 acc 58.594 (62.577)	Top-5 acc 82.031 (82.817)	lr 0.01273
Train [60][930/3239]	Time 0.229 (0.600)	Data Time 0.001 (0.041)	Loss 2.3672 (2.5578)	Entropy 1.09662 (1.09748)	Top-1 acc 66.797 (62.576)	Top-5 acc 87.891 (82.815)	lr 0.01273
Train [60][940/3239]	Time 0.224 (0.599)	Data Time 0.001 (0.041)	Loss 2.3445 (2.5574)	Entropy 1.09660 (1.09748)	Top-1 acc 69.922 (62.588)	Top-5 acc 87.109 (82.827)	lr 0.01273
Train [60][950/3239]	Time 0.213 (0.598)	Data Time 0.001 (0.041)	Loss 2.6533 (2.5577)	Entropy 1.09659 (1.09747)	Top-1 acc 60.156 (62.581)	Top-5 acc 80.469 (82.820)	lr 0.01273
Train [60][960/3239]	Time 0.365 (0.597)	Data Time 0.001 (0.040)	Loss 2.5309 (2.5575)	Entropy 1.09664 (1.09746)	Top-1 acc 64.844 (62.576)	Top-5 acc 84.375 (82.832)	lr 0.01273
Train [60][970/3239]	Time 0.227 (0.595)	Data Time 0.001 (0.040)	Loss 2.5765 (2.5573)	Entropy 1.09662 (1.09745)	Top-1 acc 61.328 (62.572)	Top-5 acc 82.812 (82.841)	lr 0.01273
Train [60][980/3239]	Time 0.232 (0.594)	Data Time 0.002 (0.039)	Loss 2.6965 (2.5570)	Entropy 1.09661 (1.09744)	Top-1 acc 60.938 (62.583)	Top-5 acc 79.297 (82.850)	lr 0.01273
Train [60][990/3239]	Time 0.256 (0.593)	Data Time 0.001 (0.039)	Loss 2.2988 (2.5568)	Entropy 1.09667 (1.09743)	Top-1 acc 66.797 (62.588)	Top-5 acc 87.891 (82.854)	lr 0.01273
Train [60][1000/3239]	Time 53.326 (0.643)	Data Time 0.001 (0.039)	Loss 2.6093 (2.5570)	Entropy 1.09667 (1.09742)	Top-1 acc 60.938 (62.576)	Top-5 acc 81.250 (82.855)	lr 0.01273
Train [60][1010/3239]	Time 0.501 (0.640)	Data Time 0.002 (0.038)	Loss 2.6107 (2.5566)	Entropy 1.09667 (1.09742)	Top-1 acc 58.984 (62.581)	Top-5 acc 78.125 (82.860)	lr 0.01273
Train [60][1020/3239]	Time 0.226 (0.638)	Data Time 0.002 (0.038)	Loss 2.5353 (2.5570)	Entropy 1.09670 (1.09741)	Top-1 acc 61.719 (62.577)	Top-5 acc 82.031 (82.858)	lr 0.01272
Train [60][1030/3239]	Time 0.209 (0.637)	Data Time 0.001 (0.038)	Loss 2.5462 (2.5570)	Entropy 1.09664 (1.09740)	Top-1 acc 60.938 (62.579)	Top-5 acc 84.766 (82.854)	lr 0.01272
Train [60][1040/3239]	Time 0.212 (0.635)	Data Time 0.001 (0.037)	Loss 2.4167 (2.5567)	Entropy 1.09661 (1.09739)	Top-1 acc 64.844 (62.580)	Top-5 acc 84.375 (82.860)	lr 0.01272
Train [60][1050/3239]	Time 0.332 (0.633)	Data Time 0.002 (0.037)	Loss 2.4725 (2.5572)	Entropy 1.09661 (1.09739)	Top-1 acc 68.359 (62.576)	Top-5 acc 83.594 (82.850)	lr 0.01272
Train [60][1060/3239]	Time 0.241 (0.632)	Data Time 0.002 (0.037)	Loss 2.5020 (2.5569)	Entropy 1.09658 (1.09738)	Top-1 acc 61.719 (62.574)	Top-5 acc 84.375 (82.852)	lr 0.01272
Train [60][1070/3239]	Time 0.222 (0.630)	Data Time 0.001 (0.036)	Loss 2.4210 (2.5573)	Entropy 1.09659 (1.09737)	Top-1 acc 67.578 (62.565)	Top-5 acc 84.766 (82.848)	lr 0.01272
Train [60][1080/3239]	Time 0.295 (0.629)	Data Time 0.001 (0.036)	Loss 2.4856 (2.5587)	Entropy 1.09653 (1.09737)	Top-1 acc 59.375 (62.546)	Top-5 acc 84.766 (82.828)	lr 0.01272
Train [60][1090/3239]	Time 0.225 (0.628)	Data Time 0.001 (0.036)	Loss 2.6394 (2.5590)	Entropy 1.09647 (1.09736)	Top-1 acc 58.984 (62.541)	Top-5 acc 82.031 (82.824)	lr 0.01272
Train [60][1100/3239]	Time 0.198 (0.626)	Data Time 0.001 (0.035)	Loss 2.4535 (2.5590)	Entropy 1.09644 (1.09735)	Top-1 acc 63.672 (62.534)	Top-5 acc 83.594 (82.829)	lr 0.01272
Train [60][1110/3239]	Time 2.511 (0.625)	Data Time 0.001 (0.035)	Loss 2.5869 (2.5595)	Entropy 1.09644 (1.09734)	Top-1 acc 57.031 (62.512)	Top-5 acc 81.641 (82.816)	lr 0.01272
Train [60][1120/3239]	Time 0.233 (0.621)	Data Time 0.002 (0.035)	Loss 2.5472 (2.5596)	Entropy 1.09646 (1.09733)	Top-1 acc 62.500 (62.512)	Top-5 acc 84.375 (82.819)	lr 0.01271
Train [60][1130/3239]	Time 0.216 (0.620)	Data Time 0.001 (0.034)	Loss 2.6135 (2.5596)	Entropy 1.09640 (1.09732)	Top-1 acc 60.938 (62.514)	Top-5 acc 82.422 (82.814)	lr 0.01271
Train [60][1140/3239]	Time 0.203 (0.618)	Data Time 0.001 (0.034)	Loss 2.6140 (2.5602)	Entropy 1.09640 (1.09732)	Top-1 acc 60.938 (62.497)	Top-5 acc 83.203 (82.803)	lr 0.01271
Train [60][1150/3239]	Time 0.221 (0.617)	Data Time 0.001 (0.034)	Loss 2.6491 (2.5604)	Entropy 1.09637 (1.09731)	Top-1 acc 61.719 (62.496)	Top-5 acc 78.906 (82.796)	lr 0.01271
Train [60][1160/3239]	Time 0.248 (0.616)	Data Time 0.001 (0.034)	Loss 2.5911 (2.5604)	Entropy 1.09638 (1.09730)	Top-1 acc 65.234 (62.501)	Top-5 acc 80.078 (82.793)	lr 0.01271
Train [60][1170/3239]	Time 0.232 (0.614)	Data Time 0.001 (0.033)	Loss 2.5303 (2.5602)	Entropy 1.09631 (1.09729)	Top-1 acc 64.453 (62.507)	Top-5 acc 80.859 (82.793)	lr 0.01271
Train [60][1180/3239]	Time 0.227 (0.613)	Data Time 0.001 (0.033)	Loss 2.4180 (2.5603)	Entropy 1.09628 (1.09728)	Top-1 acc 67.969 (62.502)	Top-5 acc 85.938 (82.790)	lr 0.01271
Train [60][1190/3239]	Time 0.368 (0.612)	Data Time 0.001 (0.033)	Loss 2.4534 (2.5608)	Entropy 1.09628 (1.09728)	Top-1 acc 64.844 (62.491)	Top-5 acc 81.641 (82.770)	lr 0.01271
Train [60][1200/3239]	Time 0.254 (0.611)	Data Time 0.001 (0.033)	Loss 2.5897 (2.5609)	Entropy 1.09626 (1.09727)	Top-1 acc 58.594 (62.484)	Top-5 acc 81.641 (82.766)	lr 0.01271
Train [60][1210/3239]	Time 0.233 (0.610)	Data Time 0.001 (0.032)	Loss 2.8256 (2.5611)	Entropy 1.09628 (1.09726)	Top-1 acc 55.859 (62.468)	Top-5 acc 76.172 (82.761)	lr 0.01271
Train [60][1220/3239]	Time 2.443 (0.609)	Data Time 0.001 (0.032)	Loss 2.7057 (2.5617)	Entropy 1.09628 (1.09725)	Top-1 acc 54.688 (62.454)	Top-5 acc 78.906 (82.748)	lr 0.01270
Train [60][1230/3239]	Time 0.225 (0.606)	Data Time 0.001 (0.032)	Loss 2.6172 (2.5619)	Entropy 1.09624 (1.09724)	Top-1 acc 62.891 (62.444)	Top-5 acc 80.859 (82.745)	lr 0.01270
Train [60][1240/3239]	Time 0.269 (0.605)	Data Time 0.001 (0.032)	Loss 2.7843 (2.5621)	Entropy 1.09627 (1.09723)	Top-1 acc 60.938 (62.446)	Top-5 acc 78.125 (82.744)	lr 0.01270
Train [60][1250/3239]	Time 0.229 (0.604)	Data Time 0.001 (0.031)	Loss 2.6483 (2.5622)	Entropy 1.09624 (1.09723)	Top-1 acc 61.719 (62.442)	Top-5 acc 78.125 (82.733)	lr 0.01270
Train [60][1260/3239]	Time 0.242 (0.603)	Data Time 0.001 (0.031)	Loss 2.5986 (2.5623)	Entropy 1.09619 (1.09722)	Top-1 acc 61.719 (62.436)	Top-5 acc 82.812 (82.733)	lr 0.01270
Train [60][1270/3239]	Time 0.250 (0.602)	Data Time 0.001 (0.031)	Loss 2.6889 (2.5623)	Entropy 1.09615 (1.09721)	Top-1 acc 61.328 (62.434)	Top-5 acc 79.297 (82.738)	lr 0.01270
Train [60][1280/3239]	Time 0.216 (0.601)	Data Time 0.001 (0.031)	Loss 2.5731 (2.5625)	Entropy 1.09614 (1.09720)	Top-1 acc 62.891 (62.438)	Top-5 acc 82.422 (82.736)	lr 0.01270
Train [60][1290/3239]	Time 0.210 (0.600)	Data Time 0.001 (0.030)	Loss 2.5852 (2.5627)	Entropy 1.09611 (1.09719)	Top-1 acc 60.547 (62.426)	Top-5 acc 82.812 (82.734)	lr 0.01270
Train [60][1300/3239]	Time 0.228 (0.599)	Data Time 0.001 (0.030)	Loss 2.5972 (2.5625)	Entropy 1.09612 (1.09719)	Top-1 acc 62.500 (62.428)	Top-5 acc 82.812 (82.738)	lr 0.01270
Train [60][1310/3239]	Time 0.207 (0.598)	Data Time 0.001 (0.030)	Loss 2.3928 (2.5623)	Entropy 1.09610 (1.09718)	Top-1 acc 64.844 (62.429)	Top-5 acc 85.938 (82.737)	lr 0.01269
Train [60][1320/3239]	Time 0.241 (0.597)	Data Time 0.001 (0.030)	Loss 2.4611 (2.5625)	Entropy 1.09608 (1.09717)	Top-1 acc 62.891 (62.418)	Top-5 acc 85.156 (82.736)	lr 0.01269
Train [60][1330/3239]	Time 2.597 (0.596)	Data Time 0.002 (0.030)	Loss 2.5064 (2.5630)	Entropy 1.09608 (1.09716)	Top-1 acc 62.109 (62.401)	Top-5 acc 82.422 (82.725)	lr 0.01269
Train [60][1340/3239]	Time 0.224 (0.593)	Data Time 0.001 (0.029)	Loss 2.5132 (2.5632)	Entropy 1.09605 (1.09715)	Top-1 acc 62.891 (62.387)	Top-5 acc 85.156 (82.730)	lr 0.01269
Train [60][1350/3239]	Time 0.234 (0.592)	Data Time 0.001 (0.029)	Loss 2.5299 (2.5634)	Entropy 1.09607 (1.09714)	Top-1 acc 63.281 (62.382)	Top-5 acc 82.422 (82.726)	lr 0.01269
Train [60][1360/3239]	Time 0.235 (0.591)	Data Time 0.002 (0.029)	Loss 2.6315 (2.5638)	Entropy 1.09603 (1.09714)	Top-1 acc 60.547 (62.381)	Top-5 acc 82.812 (82.714)	lr 0.01269
Train [60][1370/3239]	Time 0.232 (0.632)	Data Time 0.002 (0.029)	Loss 2.6325 (2.5640)	Entropy 1.09603 (1.09713)	Top-1 acc 58.594 (62.373)	Top-5 acc 81.641 (82.708)	lr 0.01269
Train [60][1380/3239]	Time 0.234 (0.631)	Data Time 0.002 (0.029)	Loss 2.5969 (2.5638)	Entropy 1.09601 (1.09712)	Top-1 acc 60.156 (62.371)	Top-5 acc 81.250 (82.719)	lr 0.01269
Train [60][1390/3239]	Time 0.234 (0.630)	Data Time 0.002 (0.028)	Loss 2.4384 (2.5637)	Entropy 1.09598 (1.09711)	Top-1 acc 66.797 (62.377)	Top-5 acc 83.984 (82.722)	lr 0.01269
Train [60][1400/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.028)	Loss 2.6058 (2.5634)	Entropy 1.09593 (1.09710)	Top-1 acc 58.203 (62.386)	Top-5 acc 83.203 (82.727)	lr 0.01269
Train [60][1410/3239]	Time 0.209 (0.627)	Data Time 0.001 (0.028)	Loss 2.4718 (2.5633)	Entropy 1.09589 (1.09710)	Top-1 acc 65.234 (62.388)	Top-5 acc 84.766 (82.728)	lr 0.01268
Train [60][1420/3239]	Time 0.322 (0.626)	Data Time 0.001 (0.028)	Loss 2.6080 (2.5632)	Entropy 1.09587 (1.09709)	Top-1 acc 59.766 (62.390)	Top-5 acc 83.203 (82.728)	lr 0.01268
Train [60][1430/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.028)	Loss 2.6236 (2.5634)	Entropy 1.09583 (1.09708)	Top-1 acc 59.375 (62.385)	Top-5 acc 80.859 (82.729)	lr 0.01268
Train [60][1440/3239]	Time 2.397 (0.624)	Data Time 0.001 (0.027)	Loss 2.5189 (2.5634)	Entropy 1.09583 (1.09707)	Top-1 acc 60.156 (62.386)	Top-5 acc 84.375 (82.729)	lr 0.01268
Train [60][1450/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.027)	Loss 2.6654 (2.5635)	Entropy 1.09580 (1.09706)	Top-1 acc 58.594 (62.382)	Top-5 acc 81.250 (82.730)	lr 0.01268
Train [60][1460/3239]	Time 0.223 (0.620)	Data Time 0.001 (0.027)	Loss 2.5831 (2.5635)	Entropy 1.09571 (1.09705)	Top-1 acc 63.672 (62.380)	Top-5 acc 83.203 (82.733)	lr 0.01268
Train [60][1470/3239]	Time 0.335 (0.619)	Data Time 0.001 (0.027)	Loss 2.5866 (2.5636)	Entropy 1.09567 (1.09704)	Top-1 acc 60.938 (62.373)	Top-5 acc 80.469 (82.736)	lr 0.01268
Train [60][1480/3239]	Time 0.228 (0.618)	Data Time 0.001 (0.027)	Loss 2.3367 (2.5638)	Entropy 1.09556 (1.09703)	Top-1 acc 66.016 (62.366)	Top-5 acc 86.719 (82.735)	lr 0.01268
Train [60][1490/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.027)	Loss 2.5751 (2.5638)	Entropy 1.09558 (1.09702)	Top-1 acc 60.547 (62.364)	Top-5 acc 82.031 (82.738)	lr 0.01268
Train [60][1500/3239]	Time 0.221 (0.616)	Data Time 0.001 (0.026)	Loss 2.5746 (2.5640)	Entropy 1.09556 (1.09701)	Top-1 acc 63.281 (62.367)	Top-5 acc 82.812 (82.729)	lr 0.01268
Train [60][1510/3239]	Time 0.242 (0.615)	Data Time 0.001 (0.026)	Loss 2.6221 (2.5640)	Entropy 1.09554 (1.09700)	Top-1 acc 57.812 (62.361)	Top-5 acc 82.422 (82.727)	lr 0.01267
Train [60][1520/3239]	Time 0.228 (0.615)	Data Time 0.001 (0.026)	Loss 2.6215 (2.5639)	Entropy 1.09556 (1.09699)	Top-1 acc 58.594 (62.357)	Top-5 acc 82.812 (82.723)	lr 0.01267
Train [60][1530/3239]	Time 0.213 (0.613)	Data Time 0.001 (0.026)	Loss 2.6350 (2.5638)	Entropy 1.09554 (1.09698)	Top-1 acc 62.109 (62.359)	Top-5 acc 82.812 (82.729)	lr 0.01267
Train [60][1540/3239]	Time 0.171 (0.612)	Data Time 0.002 (0.026)	Loss 2.6790 (2.5642)	Entropy 1.09550 (1.09698)	Top-1 acc 58.984 (62.348)	Top-5 acc 81.250 (82.723)	lr 0.01267
Train [60][1550/3239]	Time 2.450 (0.612)	Data Time 0.001 (0.026)	Loss 2.4501 (2.5640)	Entropy 1.09550 (1.09697)	Top-1 acc 65.234 (62.352)	Top-5 acc 84.375 (82.725)	lr 0.01267
Train [60][1560/3239]	Time 0.212 (0.609)	Data Time 0.001 (0.025)	Loss 2.6341 (2.5645)	Entropy 1.09546 (1.09696)	Top-1 acc 62.109 (62.346)	Top-5 acc 80.859 (82.713)	lr 0.01267
Train [60][1570/3239]	Time 0.211 (0.608)	Data Time 0.001 (0.025)	Loss 2.4769 (2.5645)	Entropy 1.09545 (1.09695)	Top-1 acc 64.062 (62.345)	Top-5 acc 84.375 (82.713)	lr 0.01267
Train [60][1580/3239]	Time 0.245 (0.607)	Data Time 0.001 (0.025)	Loss 2.4579 (2.5644)	Entropy 1.09544 (1.09694)	Top-1 acc 66.406 (62.349)	Top-5 acc 84.766 (82.712)	lr 0.01267
Train [60][1590/3239]	Time 0.269 (0.606)	Data Time 0.001 (0.025)	Loss 2.4791 (2.5645)	Entropy 1.09545 (1.09693)	Top-1 acc 66.016 (62.345)	Top-5 acc 82.422 (82.710)	lr 0.01267
Train [60][1600/3239]	Time 0.221 (0.605)	Data Time 0.001 (0.025)	Loss 2.5275 (2.5645)	Entropy 1.09543 (1.09692)	Top-1 acc 64.062 (62.343)	Top-5 acc 83.984 (82.714)	lr 0.01267
Train [60][1610/3239]	Time 0.229 (0.605)	Data Time 0.001 (0.025)	Loss 2.8032 (2.5650)	Entropy 1.09544 (1.09691)	Top-1 acc 54.688 (62.329)	Top-5 acc 80.078 (82.705)	lr 0.01266
Train [60][1620/3239]	Time 0.225 (0.604)	Data Time 0.001 (0.025)	Loss 2.5601 (2.5650)	Entropy 1.09542 (1.09690)	Top-1 acc 59.375 (62.325)	Top-5 acc 82.812 (82.706)	lr 0.01266
Train [60][1630/3239]	Time 0.221 (0.603)	Data Time 0.001 (0.024)	Loss 2.6062 (2.5651)	Entropy 1.09542 (1.09689)	Top-1 acc 62.500 (62.315)	Top-5 acc 82.422 (82.707)	lr 0.01266
Train [60][1640/3239]	Time 0.212 (0.602)	Data Time 0.001 (0.024)	Loss 2.5717 (2.5655)	Entropy 1.09539 (1.09688)	Top-1 acc 62.500 (62.312)	Top-5 acc 80.859 (82.697)	lr 0.01266
Train [60][1650/3239]	Time 0.250 (0.601)	Data Time 0.001 (0.024)	Loss 2.5076 (2.5657)	Entropy 1.09537 (1.09687)	Top-1 acc 63.672 (62.301)	Top-5 acc 82.812 (82.690)	lr 0.01266
Train [60][1660/3239]	Time 2.469 (0.600)	Data Time 0.001 (0.024)	Loss 2.3646 (2.5655)	Entropy 1.09537 (1.09686)	Top-1 acc 65.234 (62.302)	Top-5 acc 86.719 (82.697)	lr 0.01266
Train [60][1670/3239]	Time 0.231 (0.598)	Data Time 0.001 (0.024)	Loss 2.5879 (2.5657)	Entropy 1.09530 (1.09685)	Top-1 acc 60.547 (62.296)	Top-5 acc 82.031 (82.697)	lr 0.01266
Train [60][1680/3239]	Time 0.295 (0.597)	Data Time 0.004 (0.024)	Loss 2.5467 (2.5659)	Entropy 1.09529 (1.09685)	Top-1 acc 61.719 (62.287)	Top-5 acc 82.812 (82.690)	lr 0.01266
Train [60][1690/3239]	Time 0.242 (0.597)	Data Time 0.001 (0.024)	Loss 2.5184 (2.5662)	Entropy 1.09522 (1.09684)	Top-1 acc 64.844 (62.284)	Top-5 acc 83.984 (82.685)	lr 0.01266
Train [60][1700/3239]	Time 0.326 (0.596)	Data Time 0.001 (0.023)	Loss 2.6187 (2.5662)	Entropy 1.09523 (1.09683)	Top-1 acc 65.234 (62.291)	Top-5 acc 80.859 (82.687)	lr 0.01266
Train [60][1710/3239]	Time 0.223 (0.595)	Data Time 0.001 (0.023)	Loss 2.4447 (2.5662)	Entropy 1.09518 (1.09682)	Top-1 acc 66.797 (62.292)	Top-5 acc 85.547 (82.683)	lr 0.01265
Train [60][1720/3239]	Time 0.226 (0.594)	Data Time 0.001 (0.023)	Loss 2.6862 (2.5660)	Entropy 1.09511 (1.09681)	Top-1 acc 60.547 (62.299)	Top-5 acc 80.859 (82.684)	lr 0.01265
Train [60][1730/3239]	Time 0.231 (0.625)	Data Time 0.002 (0.023)	Loss 2.6233 (2.5662)	Entropy 1.09511 (1.09680)	Top-1 acc 64.453 (62.297)	Top-5 acc 81.250 (82.679)	lr 0.01265
Train [60][1740/3239]	Time 0.237 (0.624)	Data Time 0.002 (0.023)	Loss 2.6606 (2.5666)	Entropy 1.09504 (1.09679)	Top-1 acc 59.375 (62.288)	Top-5 acc 80.859 (82.672)	lr 0.01265
Train [60][1750/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.023)	Loss 2.3584 (2.5666)	Entropy 1.09505 (1.09678)	Top-1 acc 64.844 (62.282)	Top-5 acc 86.328 (82.676)	lr 0.01265
Train [60][1760/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.023)	Loss 2.6753 (2.5669)	Entropy 1.09496 (1.09677)	Top-1 acc 58.594 (62.275)	Top-5 acc 80.469 (82.671)	lr 0.01265
Train [60][1770/3239]	Time 2.560 (0.621)	Data Time 0.001 (0.023)	Loss 2.5189 (2.5667)	Entropy 1.09496 (1.09676)	Top-1 acc 59.375 (62.279)	Top-5 acc 83.984 (82.675)	lr 0.01265
Train [60][1780/3239]	Time 0.212 (0.619)	Data Time 0.001 (0.023)	Loss 2.5622 (2.5667)	Entropy 1.09488 (1.09675)	Top-1 acc 65.234 (62.285)	Top-5 acc 84.375 (82.678)	lr 0.01265
Train [60][1790/3239]	Time 0.226 (0.619)	Data Time 0.001 (0.022)	Loss 2.6500 (2.5668)	Entropy 1.09485 (1.09674)	Top-1 acc 58.594 (62.281)	Top-5 acc 80.859 (82.672)	lr 0.01265
Train [60][1800/3239]	Time 0.206 (0.618)	Data Time 0.001 (0.022)	Loss 2.5114 (2.5668)	Entropy 1.09485 (1.09673)	Top-1 acc 69.141 (62.282)	Top-5 acc 83.594 (82.669)	lr 0.01265
Train [60][1810/3239]	Time 0.235 (0.617)	Data Time 0.002 (0.022)	Loss 2.5145 (2.5667)	Entropy 1.09484 (1.09672)	Top-1 acc 64.844 (62.283)	Top-5 acc 82.031 (82.671)	lr 0.01264
Train [60][1820/3239]	Time 0.205 (0.616)	Data Time 0.001 (0.022)	Loss 2.4075 (2.5663)	Entropy 1.09483 (1.09671)	Top-1 acc 68.750 (62.289)	Top-5 acc 86.719 (82.679)	lr 0.01264
Train [60][1830/3239]	Time 0.228 (0.615)	Data Time 0.001 (0.022)	Loss 2.7744 (2.5666)	Entropy 1.09485 (1.09670)	Top-1 acc 57.031 (62.280)	Top-5 acc 76.562 (82.673)	lr 0.01264
Train [60][1840/3239]	Time 0.327 (0.614)	Data Time 0.001 (0.022)	Loss 2.5970 (2.5668)	Entropy 1.09482 (1.09669)	Top-1 acc 60.156 (62.275)	Top-5 acc 82.031 (82.666)	lr 0.01264
Train [60][1850/3239]	Time 0.227 (0.614)	Data Time 0.001 (0.022)	Loss 2.5426 (2.5667)	Entropy 1.09479 (1.09667)	Top-1 acc 64.844 (62.276)	Top-5 acc 81.250 (82.666)	lr 0.01264
Train [60][1860/3239]	Time 0.252 (0.613)	Data Time 0.001 (0.022)	Loss 2.4298 (2.5665)	Entropy 1.09472 (1.09666)	Top-1 acc 68.750 (62.285)	Top-5 acc 83.984 (82.668)	lr 0.01264
Train [60][1870/3239]	Time 0.282 (0.612)	Data Time 0.001 (0.022)	Loss 2.5530 (2.5665)	Entropy 1.09474 (1.09665)	Top-1 acc 60.938 (62.287)	Top-5 acc 80.859 (82.665)	lr 0.01264
Train [60][1880/3239]	Time 2.514 (0.611)	Data Time 0.001 (0.021)	Loss 2.5204 (2.5665)	Entropy 1.09474 (1.09664)	Top-1 acc 61.328 (62.288)	Top-5 acc 83.594 (82.664)	lr 0.01264
Train [60][1890/3239]	Time 0.239 (0.609)	Data Time 0.001 (0.021)	Loss 2.4669 (2.5665)	Entropy 1.09474 (1.09663)	Top-1 acc 67.578 (62.288)	Top-5 acc 85.938 (82.665)	lr 0.01264
Train [60][1900/3239]	Time 0.235 (0.609)	Data Time 0.001 (0.021)	Loss 2.6282 (2.5666)	Entropy 1.09466 (1.09662)	Top-1 acc 59.766 (62.285)	Top-5 acc 80.859 (82.663)	lr 0.01264
Train [60][1910/3239]	Time 0.252 (0.608)	Data Time 0.002 (0.021)	Loss 2.7428 (2.5664)	Entropy 1.09464 (1.09661)	Top-1 acc 60.156 (62.283)	Top-5 acc 77.344 (82.667)	lr 0.01263
Train [60][1920/3239]	Time 0.226 (0.607)	Data Time 0.001 (0.021)	Loss 2.5838 (2.5666)	Entropy 1.09462 (1.09660)	Top-1 acc 62.500 (62.275)	Top-5 acc 80.078 (82.662)	lr 0.01263
Train [60][1930/3239]	Time 0.285 (0.606)	Data Time 0.002 (0.021)	Loss 2.6735 (2.5667)	Entropy 1.09462 (1.09659)	Top-1 acc 60.156 (62.271)	Top-5 acc 79.297 (82.661)	lr 0.01263
Train [60][1940/3239]	Time 0.209 (0.606)	Data Time 0.001 (0.021)	Loss 2.5240 (2.5668)	Entropy 1.09459 (1.09658)	Top-1 acc 63.281 (62.266)	Top-5 acc 83.203 (82.658)	lr 0.01263
Train [60][1950/3239]	Time 0.202 (0.605)	Data Time 0.001 (0.021)	Loss 2.4407 (2.5668)	Entropy 1.09457 (1.09657)	Top-1 acc 60.547 (62.264)	Top-5 acc 83.984 (82.654)	lr 0.01263
Train [60][1960/3239]	Time 0.223 (0.604)	Data Time 0.001 (0.021)	Loss 2.5927 (2.5667)	Entropy 1.09457 (1.09656)	Top-1 acc 58.594 (62.264)	Top-5 acc 82.422 (82.654)	lr 0.01263
Train [60][1970/3239]	Time 0.222 (0.603)	Data Time 0.001 (0.021)	Loss 2.6683 (2.5668)	Entropy 1.09452 (1.09655)	Top-1 acc 59.766 (62.266)	Top-5 acc 81.250 (82.650)	lr 0.01263
Train [60][1980/3239]	Time 0.271 (0.603)	Data Time 0.002 (0.020)	Loss 2.4511 (2.5668)	Entropy 1.09451 (1.09654)	Top-1 acc 65.625 (62.267)	Top-5 acc 84.375 (82.649)	lr 0.01263
Train [60][1990/3239]	Time 2.431 (0.602)	Data Time 0.001 (0.020)	Loss 2.4616 (2.5667)	Entropy 1.09451 (1.09653)	Top-1 acc 64.844 (62.268)	Top-5 acc 86.719 (82.648)	lr 0.01263
Train [60][2000/3239]	Time 0.254 (0.600)	Data Time 0.001 (0.020)	Loss 2.5401 (2.5668)	Entropy 1.09444 (1.09652)	Top-1 acc 60.938 (62.264)	Top-5 acc 85.156 (82.649)	lr 0.01263
Train [60][2010/3239]	Time 0.224 (0.600)	Data Time 0.001 (0.020)	Loss 2.5935 (2.5669)	Entropy 1.09437 (1.09651)	Top-1 acc 60.156 (62.257)	Top-5 acc 80.469 (82.648)	lr 0.01262
Train [60][2020/3239]	Time 0.235 (0.599)	Data Time 0.001 (0.020)	Loss 2.6068 (2.5671)	Entropy 1.09436 (1.09650)	Top-1 acc 61.719 (62.249)	Top-5 acc 83.984 (82.645)	lr 0.01262
Train [60][2030/3239]	Time 0.227 (0.598)	Data Time 0.001 (0.020)	Loss 2.7243 (2.5674)	Entropy 1.09438 (1.09649)	Top-1 acc 58.984 (62.245)	Top-5 acc 79.297 (82.639)	lr 0.01262
Train [60][2040/3239]	Time 0.241 (0.598)	Data Time 0.001 (0.020)	Loss 2.5173 (2.5673)	Entropy 1.09432 (1.09648)	Top-1 acc 64.453 (62.250)	Top-5 acc 84.766 (82.642)	lr 0.01262
Train [60][2050/3239]	Time 0.226 (0.597)	Data Time 0.001 (0.020)	Loss 2.7258 (2.5675)	Entropy 1.09429 (1.09647)	Top-1 acc 57.422 (62.249)	Top-5 acc 78.906 (82.636)	lr 0.01262
Train [60][2060/3239]	Time 0.228 (0.596)	Data Time 0.001 (0.020)	Loss 2.6027 (2.5679)	Entropy 1.09429 (1.09646)	Top-1 acc 60.938 (62.237)	Top-5 acc 82.812 (82.632)	lr 0.01262
Train [60][2070/3239]	Time 0.321 (0.596)	Data Time 0.001 (0.020)	Loss 2.7423 (2.5680)	Entropy 1.09429 (1.09645)	Top-1 acc 54.688 (62.237)	Top-5 acc 80.469 (82.633)	lr 0.01262
Train [60][2080/3239]	Time 0.226 (0.595)	Data Time 0.001 (0.020)	Loss 2.4944 (2.5680)	Entropy 1.09425 (1.09644)	Top-1 acc 64.844 (62.239)	Top-5 acc 80.859 (82.629)	lr 0.01262
Train [60][2090/3239]	Time 0.310 (0.618)	Data Time 0.006 (0.019)	Loss 2.5597 (2.5678)	Entropy 1.09420 (1.09643)	Top-1 acc 64.844 (62.247)	Top-5 acc 84.375 (82.632)	lr 0.01262
Train [60][2100/3239]	Time 3.052 (0.618)	Data Time 0.004 (0.019)	Loss 2.4945 (2.5677)	Entropy 1.09420 (1.09642)	Top-1 acc 59.766 (62.248)	Top-5 acc 85.547 (82.638)	lr 0.01262
Train [60][2110/3239]	Time 0.243 (0.616)	Data Time 0.002 (0.019)	Loss 2.4360 (2.5677)	Entropy 1.09411 (1.09640)	Top-1 acc 65.625 (62.242)	Top-5 acc 85.938 (82.635)	lr 0.01261
Train [60][2120/3239]	Time 0.249 (0.616)	Data Time 0.002 (0.019)	Loss 2.4602 (2.5678)	Entropy 1.09401 (1.09639)	Top-1 acc 66.406 (62.241)	Top-5 acc 82.031 (82.631)	lr 0.01261
Train [60][2130/3239]	Time 0.238 (0.615)	Data Time 0.001 (0.019)	Loss 2.6469 (2.5680)	Entropy 1.09392 (1.09638)	Top-1 acc 60.547 (62.236)	Top-5 acc 80.469 (82.628)	lr 0.01261
Train [60][2140/3239]	Time 0.240 (0.614)	Data Time 0.002 (0.019)	Loss 2.6130 (2.5679)	Entropy 1.09391 (1.09637)	Top-1 acc 63.672 (62.241)	Top-5 acc 83.203 (82.632)	lr 0.01261
Train [60][2150/3239]	Time 0.234 (0.614)	Data Time 0.002 (0.019)	Loss 2.5328 (2.5678)	Entropy 1.09386 (1.09636)	Top-1 acc 62.891 (62.246)	Top-5 acc 82.031 (82.634)	lr 0.01261
Train [60][2160/3239]	Time 0.270 (0.613)	Data Time 0.001 (0.019)	Loss 2.4237 (2.5680)	Entropy 1.09378 (1.09635)	Top-1 acc 66.016 (62.240)	Top-5 acc 85.938 (82.630)	lr 0.01261
Train [60][2170/3239]	Time 0.207 (0.612)	Data Time 0.001 (0.019)	Loss 2.5963 (2.5678)	Entropy 1.09377 (1.09634)	Top-1 acc 63.281 (62.244)	Top-5 acc 84.375 (82.634)	lr 0.01261
Train [60][2180/3239]	Time 0.211 (0.612)	Data Time 0.001 (0.019)	Loss 2.6758 (2.5679)	Entropy 1.09371 (1.09632)	Top-1 acc 58.984 (62.242)	Top-5 acc 80.469 (82.629)	lr 0.01261
Train [60][2190/3239]	Time 0.261 (0.611)	Data Time 0.001 (0.019)	Loss 2.7562 (2.5681)	Entropy 1.09371 (1.09631)	Top-1 acc 57.812 (62.238)	Top-5 acc 78.125 (82.625)	lr 0.01261
Train [60][2200/3239]	Time 0.242 (0.610)	Data Time 0.001 (0.019)	Loss 2.5443 (2.5682)	Entropy 1.09368 (1.09630)	Top-1 acc 63.281 (62.235)	Top-5 acc 81.641 (82.621)	lr 0.01261
Train [60][2210/3239]	Time 2.543 (0.610)	Data Time 0.001 (0.018)	Loss 2.5537 (2.5680)	Entropy 1.09368 (1.09629)	Top-1 acc 68.750 (62.247)	Top-5 acc 83.203 (82.624)	lr 0.01260
Train [60][2220/3239]	Time 0.234 (0.608)	Data Time 0.001 (0.018)	Loss 2.4478 (2.5681)	Entropy 1.09371 (1.09628)	Top-1 acc 66.406 (62.249)	Top-5 acc 85.547 (82.624)	lr 0.01260
Train [60][2230/3239]	Time 0.254 (0.608)	Data Time 0.002 (0.018)	Loss 2.6701 (2.5680)	Entropy 1.09370 (1.09626)	Top-1 acc 64.844 (62.257)	Top-5 acc 80.078 (82.625)	lr 0.01260
Train [60][2240/3239]	Time 0.219 (0.607)	Data Time 0.001 (0.018)	Loss 2.6475 (2.5680)	Entropy 1.09367 (1.09625)	Top-1 acc 58.203 (62.256)	Top-5 acc 79.688 (82.622)	lr 0.01260
Train [60][2250/3239]	Time 0.230 (0.606)	Data Time 0.001 (0.018)	Loss 2.6627 (2.5680)	Entropy 1.09361 (1.09624)	Top-1 acc 59.375 (62.255)	Top-5 acc 80.469 (82.618)	lr 0.01260
Train [60][2260/3239]	Time 0.218 (0.606)	Data Time 0.001 (0.018)	Loss 2.6729 (2.5680)	Entropy 1.09354 (1.09623)	Top-1 acc 59.375 (62.255)	Top-5 acc 79.688 (82.621)	lr 0.01260
Train [60][2270/3239]	Time 0.228 (0.605)	Data Time 0.001 (0.018)	Loss 2.4598 (2.5682)	Entropy 1.09349 (1.09622)	Top-1 acc 62.891 (62.250)	Top-5 acc 85.547 (82.619)	lr 0.01260
Train [60][2280/3239]	Time 0.213 (0.604)	Data Time 0.001 (0.018)	Loss 2.4824 (2.5682)	Entropy 1.09345 (1.09621)	Top-1 acc 62.109 (62.248)	Top-5 acc 83.984 (82.617)	lr 0.01260
Train [60][2290/3239]	Time 0.224 (0.604)	Data Time 0.001 (0.018)	Loss 2.5645 (2.5683)	Entropy 1.09348 (1.09619)	Top-1 acc 63.672 (62.247)	Top-5 acc 80.469 (82.617)	lr 0.01260
Train [60][2300/3239]	Time 0.228 (0.603)	Data Time 0.001 (0.018)	Loss 2.3049 (2.5680)	Entropy 1.09346 (1.09618)	Top-1 acc 66.797 (62.257)	Top-5 acc 88.281 (82.620)	lr 0.01259
Train [60][2310/3239]	Time 0.268 (0.603)	Data Time 0.001 (0.018)	Loss 2.5453 (2.5682)	Entropy 1.09343 (1.09617)	Top-1 acc 63.672 (62.253)	Top-5 acc 82.422 (82.617)	lr 0.01259
Train [60][2320/3239]	Time 2.548 (0.602)	Data Time 0.001 (0.018)	Loss 2.4952 (2.5680)	Entropy 1.09343 (1.09616)	Top-1 acc 65.625 (62.262)	Top-5 acc 82.422 (82.625)	lr 0.01259
Train [60][2330/3239]	Time 0.216 (0.601)	Data Time 0.002 (0.018)	Loss 2.6336 (2.5680)	Entropy 1.09338 (1.09615)	Top-1 acc 60.547 (62.260)	Top-5 acc 83.984 (82.625)	lr 0.01259
Train [60][2340/3239]	Time 0.237 (0.600)	Data Time 0.001 (0.018)	Loss 2.6148 (2.5682)	Entropy 1.09339 (1.09613)	Top-1 acc 58.594 (62.253)	Top-5 acc 85.156 (82.624)	lr 0.01259
Train [60][2350/3239]	Time 0.217 (0.600)	Data Time 0.001 (0.017)	Loss 2.5616 (2.5684)	Entropy 1.09332 (1.09612)	Top-1 acc 63.672 (62.251)	Top-5 acc 83.594 (82.618)	lr 0.01259
Train [60][2360/3239]	Time 0.222 (0.599)	Data Time 0.001 (0.017)	Loss 2.6412 (2.5686)	Entropy 1.09329 (1.09611)	Top-1 acc 62.109 (62.243)	Top-5 acc 82.031 (82.617)	lr 0.01259
Train [60][2370/3239]	Time 0.221 (0.598)	Data Time 0.001 (0.017)	Loss 2.4297 (2.5685)	Entropy 1.09329 (1.09610)	Top-1 acc 65.625 (62.246)	Top-5 acc 86.328 (82.623)	lr 0.01259
Train [60][2380/3239]	Time 0.219 (0.598)	Data Time 0.001 (0.017)	Loss 2.4820 (2.5685)	Entropy 1.09325 (1.09609)	Top-1 acc 64.844 (62.251)	Top-5 acc 80.078 (82.621)	lr 0.01259
Train [60][2390/3239]	Time 0.246 (0.597)	Data Time 0.001 (0.017)	Loss 2.6133 (2.5684)	Entropy 1.09318 (1.09607)	Top-1 acc 59.766 (62.254)	Top-5 acc 82.422 (82.625)	lr 0.01259
Train [60][2400/3239]	Time 0.317 (0.597)	Data Time 0.001 (0.017)	Loss 2.5598 (2.5683)	Entropy 1.09315 (1.09606)	Top-1 acc 63.672 (62.254)	Top-5 acc 84.375 (82.629)	lr 0.01258
Train [60][2410/3239]	Time 0.230 (0.596)	Data Time 0.002 (0.017)	Loss 2.5779 (2.5684)	Entropy 1.09313 (1.09605)	Top-1 acc 59.375 (62.247)	Top-5 acc 80.469 (82.628)	lr 0.01258
Train [60][2420/3239]	Time 0.278 (0.596)	Data Time 0.001 (0.017)	Loss 2.3956 (2.5685)	Entropy 1.09307 (1.09604)	Top-1 acc 63.281 (62.243)	Top-5 acc 86.328 (82.625)	lr 0.01258
Train [60][2430/3239]	Time 2.479 (0.595)	Data Time 0.001 (0.017)	Loss 2.3348 (2.5685)	Entropy 1.09307 (1.09603)	Top-1 acc 70.703 (62.245)	Top-5 acc 83.984 (82.622)	lr 0.01258
Train [60][2440/3239]	Time 0.223 (0.594)	Data Time 0.001 (0.017)	Loss 2.4126 (2.5684)	Entropy 1.09306 (1.09601)	Top-1 acc 66.016 (62.252)	Top-5 acc 84.766 (82.622)	lr 0.01258
Train [60][2450/3239]	Time 0.321 (0.593)	Data Time 0.001 (0.017)	Loss 2.7007 (2.5686)	Entropy 1.09312 (1.09600)	Top-1 acc 58.594 (62.248)	Top-5 acc 77.344 (82.619)	lr 0.01258
Train [60][2460/3239]	Time 0.229 (0.615)	Data Time 0.002 (0.017)	Loss 2.6228 (2.5687)	Entropy 1.09307 (1.09599)	Top-1 acc 63.281 (62.249)	Top-5 acc 82.812 (82.619)	lr 0.01258
Train [60][2470/3239]	Time 0.221 (0.614)	Data Time 0.002 (0.017)	Loss 2.5137 (2.5688)	Entropy 1.09293 (1.09598)	Top-1 acc 62.891 (62.245)	Top-5 acc 82.422 (82.614)	lr 0.01258
Train [60][2480/3239]	Time 0.221 (0.614)	Data Time 0.001 (0.017)	Loss 2.5389 (2.5688)	Entropy 1.09287 (1.09597)	Top-1 acc 63.281 (62.243)	Top-5 acc 80.078 (82.612)	lr 0.01258
Train [60][2490/3239]	Time 0.370 (0.613)	Data Time 0.002 (0.017)	Loss 2.3747 (2.5689)	Entropy 1.09286 (1.09595)	Top-1 acc 67.188 (62.237)	Top-5 acc 86.328 (82.610)	lr 0.01258
Train [60][2500/3239]	Time 0.221 (0.613)	Data Time 0.001 (0.017)	Loss 2.5587 (2.5690)	Entropy 1.09289 (1.09594)	Top-1 acc 59.375 (62.234)	Top-5 acc 82.812 (82.607)	lr 0.01257
Train [60][2510/3239]	Time 0.245 (0.612)	Data Time 0.002 (0.016)	Loss 2.3462 (2.5690)	Entropy 1.09292 (1.09593)	Top-1 acc 69.141 (62.229)	Top-5 acc 86.719 (82.609)	lr 0.01257
Train [60][2520/3239]	Time 0.234 (0.612)	Data Time 0.001 (0.016)	Loss 2.3979 (2.5690)	Entropy 1.09289 (1.09592)	Top-1 acc 67.969 (62.227)	Top-5 acc 85.156 (82.611)	lr 0.01257
Train [60][2530/3239]	Time 0.298 (0.611)	Data Time 0.001 (0.016)	Loss 2.6711 (2.5692)	Entropy 1.09287 (1.09590)	Top-1 acc 59.375 (62.219)	Top-5 acc 79.297 (82.607)	lr 0.01257
Train [60][2540/3239]	Time 2.403 (0.611)	Data Time 0.001 (0.016)	Loss 2.4886 (2.5692)	Entropy 1.09287 (1.09589)	Top-1 acc 61.719 (62.222)	Top-5 acc 83.203 (82.604)	lr 0.01257
Train [60][2550/3239]	Time 0.231 (0.609)	Data Time 0.001 (0.016)	Loss 2.5786 (2.5691)	Entropy 1.09283 (1.09588)	Top-1 acc 61.328 (62.224)	Top-5 acc 82.812 (82.607)	lr 0.01257
Train [60][2560/3239]	Time 0.266 (0.609)	Data Time 0.001 (0.016)	Loss 2.5548 (2.5691)	Entropy 1.09281 (1.09587)	Top-1 acc 64.062 (62.226)	Top-5 acc 83.594 (82.607)	lr 0.01257
Train [60][2570/3239]	Time 0.231 (0.608)	Data Time 0.001 (0.016)	Loss 2.6020 (2.5691)	Entropy 1.09286 (1.09586)	Top-1 acc 58.203 (62.226)	Top-5 acc 80.469 (82.606)	lr 0.01257
Train [60][2580/3239]	Time 0.340 (0.608)	Data Time 0.002 (0.016)	Loss 2.2335 (2.5690)	Entropy 1.09285 (1.09585)	Top-1 acc 73.047 (62.232)	Top-5 acc 87.891 (82.606)	lr 0.01257
Train [60][2590/3239]	Time 0.212 (0.607)	Data Time 0.001 (0.016)	Loss 2.6786 (2.5689)	Entropy 1.09284 (1.09583)	Top-1 acc 60.156 (62.235)	Top-5 acc 78.125 (82.604)	lr 0.01257
Train [60][2600/3239]	Time 0.238 (0.607)	Data Time 0.001 (0.016)	Loss 2.5141 (2.5689)	Entropy 1.09269 (1.09582)	Top-1 acc 65.625 (62.237)	Top-5 acc 84.766 (82.605)	lr 0.01256
Train [60][2610/3239]	Time 0.222 (0.606)	Data Time 0.001 (0.016)	Loss 2.7572 (2.5691)	Entropy 1.09267 (1.09581)	Top-1 acc 57.422 (62.236)	Top-5 acc 76.953 (82.598)	lr 0.01256
Train [60][2620/3239]	Time 0.234 (0.606)	Data Time 0.001 (0.016)	Loss 2.5249 (2.5690)	Entropy 1.09266 (1.09580)	Top-1 acc 66.016 (62.241)	Top-5 acc 83.203 (82.599)	lr 0.01256
Train [60][2630/3239]	Time 0.365 (0.605)	Data Time 0.002 (0.016)	Loss 2.6926 (2.5691)	Entropy 1.09264 (1.09579)	Top-1 acc 61.328 (62.243)	Top-5 acc 83.203 (82.599)	lr 0.01256
Train [60][2640/3239]	Time 0.269 (0.605)	Data Time 0.001 (0.016)	Loss 2.6809 (2.5692)	Entropy 1.09262 (1.09577)	Top-1 acc 58.594 (62.241)	Top-5 acc 80.078 (82.598)	lr 0.01256
Train [60][2650/3239]	Time 0.256 (0.604)	Data Time 0.001 (0.016)	Loss 2.3812 (2.5692)	Entropy 1.09259 (1.09576)	Top-1 acc 68.359 (62.241)	Top-5 acc 83.594 (82.598)	lr 0.01256
Train [60][2660/3239]	Time 0.246 (0.604)	Data Time 0.003 (0.016)	Loss 2.7153 (2.5693)	Entropy 1.09262 (1.09575)	Top-1 acc 62.500 (62.241)	Top-5 acc 80.469 (82.598)	lr 0.01256
Train [60][2670/3239]	Time 0.269 (0.603)	Data Time 0.002 (0.016)	Loss 2.7080 (2.5693)	Entropy 1.09259 (1.09574)	Top-1 acc 58.594 (62.243)	Top-5 acc 76.953 (82.596)	lr 0.01256
Train [60][2680/3239]	Time 0.422 (0.603)	Data Time 0.002 (0.016)	Loss 2.3978 (2.5692)	Entropy 1.09247 (1.09573)	Top-1 acc 66.016 (62.245)	Top-5 acc 85.547 (82.600)	lr 0.01256
Train [60][2690/3239]	Time 0.227 (0.602)	Data Time 0.001 (0.015)	Loss 2.7554 (2.5692)	Entropy 1.09244 (1.09571)	Top-1 acc 61.328 (62.243)	Top-5 acc 77.344 (82.601)	lr 0.01256
Train [60][2700/3239]	Time 0.288 (0.602)	Data Time 0.001 (0.015)	Loss 2.5039 (2.5691)	Entropy 1.09238 (1.09570)	Top-1 acc 64.062 (62.248)	Top-5 acc 85.156 (82.601)	lr 0.01255
Train [60][2710/3239]	Time 0.218 (0.601)	Data Time 0.001 (0.015)	Loss 2.6312 (2.5692)	Entropy 1.09237 (1.09569)	Top-1 acc 62.500 (62.244)	Top-5 acc 82.031 (82.599)	lr 0.01255
Train [60][2720/3239]	Time 0.256 (0.601)	Data Time 0.002 (0.015)	Loss 2.6382 (2.5691)	Entropy 1.09236 (1.09568)	Top-1 acc 56.641 (62.239)	Top-5 acc 85.156 (82.604)	lr 0.01255
Train [60][2730/3239]	Time 0.273 (0.600)	Data Time 0.001 (0.015)	Loss 2.5894 (2.5691)	Entropy 1.09234 (1.09567)	Top-1 acc 59.375 (62.238)	Top-5 acc 82.422 (82.606)	lr 0.01255
Train [60][2740/3239]	Time 0.239 (0.600)	Data Time 0.001 (0.015)	Loss 2.7502 (2.5691)	Entropy 1.09234 (1.09565)	Top-1 acc 57.812 (62.235)	Top-5 acc 80.859 (82.607)	lr 0.01255
Train [60][2750/3239]	Time 0.205 (0.599)	Data Time 0.001 (0.015)	Loss 2.7028 (2.5691)	Entropy 1.09233 (1.09564)	Top-1 acc 58.594 (62.234)	Top-5 acc 82.422 (82.611)	lr 0.01255
Train [60][2760/3239]	Time 0.210 (0.599)	Data Time 0.001 (0.015)	Loss 2.5664 (2.5690)	Entropy 1.09226 (1.09563)	Top-1 acc 61.328 (62.232)	Top-5 acc 82.422 (82.612)	lr 0.01255
Train [60][2770/3239]	Time 0.265 (0.598)	Data Time 0.001 (0.015)	Loss 2.7852 (2.5691)	Entropy 1.09221 (1.09562)	Top-1 acc 56.641 (62.226)	Top-5 acc 77.734 (82.611)	lr 0.01255
Train [60][2780/3239]	Time 0.229 (0.598)	Data Time 0.001 (0.015)	Loss 2.7309 (2.5691)	Entropy 1.09219 (1.09561)	Top-1 acc 60.547 (62.226)	Top-5 acc 80.469 (82.613)	lr 0.01255
Train [60][2790/3239]	Time 0.253 (0.598)	Data Time 0.002 (0.015)	Loss 2.5754 (2.5691)	Entropy 1.09216 (1.09559)	Top-1 acc 63.281 (62.223)	Top-5 acc 82.422 (82.611)	lr 0.01255
Train [60][2800/3239]	Time 0.280 (0.615)	Data Time 0.004 (0.015)	Loss 2.6634 (2.5694)	Entropy 1.09213 (1.09558)	Top-1 acc 59.375 (62.217)	Top-5 acc 81.250 (82.605)	lr 0.01254
Train [60][2810/3239]	Time 0.247 (0.615)	Data Time 0.002 (0.015)	Loss 2.7355 (2.5693)	Entropy 1.09213 (1.09557)	Top-1 acc 58.594 (62.218)	Top-5 acc 80.469 (82.606)	lr 0.01254
Train [60][2820/3239]	Time 0.293 (0.614)	Data Time 0.002 (0.015)	Loss 2.5854 (2.5696)	Entropy 1.09211 (1.09556)	Top-1 acc 60.938 (62.213)	Top-5 acc 82.031 (82.602)	lr 0.01254
Train [60][2830/3239]	Time 0.236 (0.614)	Data Time 0.002 (0.015)	Loss 2.7461 (2.5695)	Entropy 1.09207 (1.09554)	Top-1 acc 58.203 (62.214)	Top-5 acc 80.078 (82.602)	lr 0.01254
Train [60][2840/3239]	Time 0.279 (0.613)	Data Time 0.001 (0.015)	Loss 2.6015 (2.5695)	Entropy 1.09205 (1.09553)	Top-1 acc 62.109 (62.215)	Top-5 acc 82.422 (82.603)	lr 0.01254
Train [60][2850/3239]	Time 0.256 (0.613)	Data Time 0.001 (0.015)	Loss 2.6261 (2.5697)	Entropy 1.09204 (1.09552)	Top-1 acc 62.891 (62.215)	Top-5 acc 80.469 (82.599)	lr 0.01254
Train [60][2860/3239]	Time 0.377 (0.612)	Data Time 0.002 (0.015)	Loss 2.8560 (2.5698)	Entropy 1.09202 (1.09551)	Top-1 acc 55.078 (62.214)	Top-5 acc 75.781 (82.598)	lr 0.01254
Train [60][2870/3239]	Time 0.249 (0.612)	Data Time 0.001 (0.015)	Loss 2.4295 (2.5696)	Entropy 1.09197 (1.09550)	Top-1 acc 62.500 (62.218)	Top-5 acc 84.375 (82.600)	lr 0.01254
Train [60][2880/3239]	Time 0.237 (0.611)	Data Time 0.001 (0.015)	Loss 2.5847 (2.5698)	Entropy 1.09191 (1.09548)	Top-1 acc 62.891 (62.216)	Top-5 acc 82.031 (82.598)	lr 0.01254
Train [60][2890/3239]	Time 0.237 (0.611)	Data Time 0.001 (0.015)	Loss 2.3782 (2.5698)	Entropy 1.09190 (1.09547)	Top-1 acc 67.578 (62.214)	Top-5 acc 87.109 (82.598)	lr 0.01254
Train [60][2900/3239]	Time 0.219 (0.610)	Data Time 0.001 (0.014)	Loss 2.5640 (2.5699)	Entropy 1.09177 (1.09546)	Top-1 acc 60.547 (62.212)	Top-5 acc 82.812 (82.595)	lr 0.01253
Train [60][2910/3239]	Time 0.339 (0.610)	Data Time 0.001 (0.014)	Loss 2.4563 (2.5697)	Entropy 1.09167 (1.09545)	Top-1 acc 63.281 (62.217)	Top-5 acc 82.812 (82.600)	lr 0.01253
Train [60][2920/3239]	Time 0.313 (0.609)	Data Time 0.001 (0.014)	Loss 2.6041 (2.5698)	Entropy 1.09168 (1.09543)	Top-1 acc 57.031 (62.214)	Top-5 acc 81.641 (82.596)	lr 0.01253
Train [60][2930/3239]	Time 0.264 (0.609)	Data Time 0.001 (0.014)	Loss 2.4058 (2.5699)	Entropy 1.09162 (1.09542)	Top-1 acc 65.625 (62.213)	Top-5 acc 83.984 (82.594)	lr 0.01253
Train [60][2940/3239]	Time 0.300 (0.608)	Data Time 0.001 (0.014)	Loss 2.7121 (2.5702)	Entropy 1.09158 (1.09541)	Top-1 acc 57.031 (62.207)	Top-5 acc 78.906 (82.587)	lr 0.01253
Train [60][2950/3239]	Time 0.269 (0.608)	Data Time 0.001 (0.014)	Loss 2.6464 (2.5703)	Entropy 1.09155 (1.09539)	Top-1 acc 59.375 (62.206)	Top-5 acc 82.031 (82.589)	lr 0.01253
Train [60][2960/3239]	Time 0.254 (0.607)	Data Time 0.001 (0.014)	Loss 2.5588 (2.5703)	Entropy 1.09148 (1.09538)	Top-1 acc 60.547 (62.203)	Top-5 acc 79.297 (82.585)	lr 0.01253
Train [60][2970/3239]	Time 0.256 (0.607)	Data Time 0.001 (0.014)	Loss 2.5315 (2.5703)	Entropy 1.09146 (1.09537)	Top-1 acc 64.453 (62.201)	Top-5 acc 85.547 (82.586)	lr 0.01253
Train [60][2980/3239]	Time 0.252 (0.607)	Data Time 0.001 (0.014)	Loss 2.7358 (2.5704)	Entropy 1.09145 (1.09535)	Top-1 acc 56.250 (62.198)	Top-5 acc 80.859 (82.586)	lr 0.01253
Train [60][2990/3239]	Time 0.235 (0.606)	Data Time 0.001 (0.014)	Loss 2.6325 (2.5705)	Entropy 1.09137 (1.09534)	Top-1 acc 59.375 (62.196)	Top-5 acc 81.250 (82.582)	lr 0.01253
Train [60][3000/3239]	Time 0.276 (0.606)	Data Time 0.001 (0.014)	Loss 2.7010 (2.5706)	Entropy 1.09146 (1.09533)	Top-1 acc 59.375 (62.190)	Top-5 acc 79.688 (82.579)	lr 0.01252
Train [60][3010/3239]	Time 0.260 (0.605)	Data Time 0.001 (0.014)	Loss 2.5776 (2.5707)	Entropy 1.09146 (1.09532)	Top-1 acc 63.672 (62.187)	Top-5 acc 82.031 (82.577)	lr 0.01252
Train [60][3020/3239]	Time 0.221 (0.605)	Data Time 0.001 (0.014)	Loss 2.5125 (2.5708)	Entropy 1.09146 (1.09530)	Top-1 acc 64.844 (62.188)	Top-5 acc 82.812 (82.574)	lr 0.01252
Train [60][3030/3239]	Time 0.282 (0.604)	Data Time 0.001 (0.014)	Loss 2.5647 (2.5709)	Entropy 1.09136 (1.09529)	Top-1 acc 64.453 (62.185)	Top-5 acc 82.031 (82.574)	lr 0.01252
Train [60][3040/3239]	Time 0.277 (0.604)	Data Time 0.002 (0.014)	Loss 2.6418 (2.5710)	Entropy 1.09134 (1.09528)	Top-1 acc 58.984 (62.179)	Top-5 acc 83.203 (82.577)	lr 0.01252
Train [60][3050/3239]	Time 0.336 (0.603)	Data Time 0.001 (0.014)	Loss 2.6877 (2.5711)	Entropy 1.09129 (1.09526)	Top-1 acc 58.984 (62.180)	Top-5 acc 80.859 (82.576)	lr 0.01252
Train [60][3060/3239]	Time 0.323 (0.603)	Data Time 0.002 (0.014)	Loss 2.3731 (2.5711)	Entropy 1.09129 (1.09525)	Top-1 acc 68.359 (62.184)	Top-5 acc 88.672 (82.575)	lr 0.01252
Train [60][3070/3239]	Time 0.230 (0.602)	Data Time 0.001 (0.014)	Loss 2.5713 (2.5711)	Entropy 1.09128 (1.09524)	Top-1 acc 61.719 (62.182)	Top-5 acc 83.203 (82.575)	lr 0.01252
Train [60][3080/3239]	Time 0.261 (0.602)	Data Time 0.001 (0.014)	Loss 2.6554 (2.5711)	Entropy 1.09126 (1.09523)	Top-1 acc 61.719 (62.182)	Top-5 acc 81.250 (82.574)	lr 0.01252
Train [60][3090/3239]	Time 0.222 (0.602)	Data Time 0.001 (0.014)	Loss 2.3509 (2.5713)	Entropy 1.09118 (1.09521)	Top-1 acc 66.016 (62.182)	Top-5 acc 84.766 (82.570)	lr 0.01252
Train [60][3100/3239]	Time 0.227 (0.601)	Data Time 0.001 (0.014)	Loss 2.4247 (2.5711)	Entropy 1.09114 (1.09520)	Top-1 acc 67.188 (62.186)	Top-5 acc 83.203 (82.572)	lr 0.01251
Train [60][3110/3239]	Time 0.227 (0.601)	Data Time 0.001 (0.014)	Loss 2.6336 (2.5713)	Entropy 1.09114 (1.09519)	Top-1 acc 59.375 (62.184)	Top-5 acc 80.859 (82.568)	lr 0.01251
Train [60][3120/3239]	Time 0.259 (0.600)	Data Time 0.001 (0.014)	Loss 2.6432 (2.5714)	Entropy 1.09115 (1.09517)	Top-1 acc 62.891 (62.181)	Top-5 acc 80.859 (82.563)	lr 0.01251
Train [60][3130/3239]	Time 0.306 (0.617)	Data Time 0.005 (0.014)	Loss 2.3945 (2.5713)	Entropy 1.09111 (1.09516)	Top-1 acc 63.672 (62.183)	Top-5 acc 85.547 (82.564)	lr 0.01251
Train [60][3140/3239]	Time 0.438 (0.617)	Data Time 0.002 (0.014)	Loss 2.6328 (2.5714)	Entropy 1.09111 (1.09515)	Top-1 acc 64.062 (62.183)	Top-5 acc 83.203 (82.562)	lr 0.01251
Train [60][3150/3239]	Time 0.266 (0.616)	Data Time 0.002 (0.013)	Loss 2.6095 (2.5714)	Entropy 1.09106 (1.09514)	Top-1 acc 61.719 (62.182)	Top-5 acc 82.422 (82.562)	lr 0.01251
Train [60][3160/3239]	Time 0.215 (0.616)	Data Time 0.002 (0.013)	Loss 2.5060 (2.5713)	Entropy 1.09102 (1.09512)	Top-1 acc 61.719 (62.183)	Top-5 acc 80.859 (82.563)	lr 0.01251
Train [60][3170/3239]	Time 0.232 (0.615)	Data Time 0.001 (0.013)	Loss 2.4879 (2.5714)	Entropy 1.09099 (1.09511)	Top-1 acc 61.719 (62.180)	Top-5 acc 85.156 (82.561)	lr 0.01251
Train [60][3180/3239]	Time 0.262 (0.615)	Data Time 0.000 (0.013)	Loss 2.5559 (2.5715)	Entropy 1.09093 (1.09510)	Top-1 acc 60.938 (62.176)	Top-5 acc 83.203 (82.560)	lr 0.01251
Train [60][3190/3239]	Time 0.232 (0.614)	Data Time 0.000 (0.013)	Loss 2.6211 (2.5714)	Entropy 1.09090 (1.09508)	Top-1 acc 63.281 (62.179)	Top-5 acc 80.469 (82.559)	lr 0.01251
Train [60][3200/3239]	Time 0.220 (0.614)	Data Time 0.000 (0.013)	Loss 2.5208 (2.5715)	Entropy 1.09085 (1.09507)	Top-1 acc 66.016 (62.177)	Top-5 acc 83.203 (82.558)	lr 0.01250
Train [60][3210/3239]	Time 0.216 (0.613)	Data Time 0.000 (0.013)	Loss 2.8870 (2.5718)	Entropy 1.09082 (1.09506)	Top-1 acc 55.469 (62.172)	Top-5 acc 77.344 (82.555)	lr 0.01250
Train [60][3220/3239]	Time 0.244 (0.613)	Data Time 0.000 (0.013)	Loss 2.5053 (2.5720)	Entropy 1.09081 (1.09504)	Top-1 acc 60.156 (62.167)	Top-5 acc 86.719 (82.553)	lr 0.01250
Train [60][3230/3239]	Time 0.206 (0.613)	Data Time 0.000 (0.013)	Loss 2.9280 (2.5721)	Entropy 1.09081 (1.09503)	Top-1 acc 54.688 (62.166)	Top-5 acc 76.562 (82.551)	lr 0.01250
Train [60][3239/3239]	Time 2.448 (0.612)	Data Time 0.000 (0.013)	Loss 2.6947 (2.5722)	Entropy 1.09081 (1.09502)	Top-1 acc 51.852 (62.160)	Top-5 acc 79.012 (82.547)	lr 0.01250
==========Valid [60/120]	loss 1.448	top-1 acc 67.095 (67.095)	top-5 acc 86.822	Train top-1 62.160	top-5 82.547	Entropy 1.09081	Latency-None: 0.000ms	Flops: 548.34M
Train [61][0/3239]	Time 40.637 (40.637)	Data Time 37.487 (37.487)	Loss 2.5778 (2.5778)	Entropy 1.09082 (1.09082)	Top-1 acc 62.500 (62.500)	Top-5 acc 83.984 (83.984)	lr 0.01250
Train [61][10/3239]	Time 2.699 (4.291)	Data Time 0.029 (3.499)	Loss 2.3208 (2.4958)	Entropy 1.09082 (1.09082)	Top-1 acc 69.922 (63.920)	Top-5 acc 88.672 (84.162)	lr 0.01250
Train [61][20/3239]	Time 0.227 (2.357)	Data Time 0.001 (1.833)	Loss 2.5627 (2.4892)	Entropy 1.09075 (1.09079)	Top-1 acc 60.938 (63.988)	Top-5 acc 82.812 (84.375)	lr 0.01250
Train [61][30/3239]	Time 0.222 (1.744)	Data Time 0.001 (1.242)	Loss 2.3555 (2.4880)	Entropy 1.09074 (1.09077)	Top-1 acc 66.406 (64.201)	Top-5 acc 85.156 (84.060)	lr 0.01250
Train [61][40/3239]	Time 0.252 (1.430)	Data Time 0.001 (0.940)	Loss 2.5038 (2.5094)	Entropy 1.09059 (1.09074)	Top-1 acc 63.281 (64.015)	Top-5 acc 86.719 (83.622)	lr 0.01250
Train [61][50/3239]	Time 0.227 (1.243)	Data Time 0.001 (0.756)	Loss 2.6638 (2.5203)	Entropy 1.09060 (1.09071)	Top-1 acc 61.719 (63.580)	Top-5 acc 82.812 (83.571)	lr 0.01249
Train [61][60/3239]	Time 0.225 (1.114)	Data Time 0.001 (0.632)	Loss 2.5885 (2.5237)	Entropy 1.09060 (1.09069)	Top-1 acc 59.766 (63.454)	Top-5 acc 80.469 (83.504)	lr 0.01249
Train [61][70/3239]	Time 0.205 (1.021)	Data Time 0.001 (0.543)	Loss 2.4574 (2.5260)	Entropy 1.09059 (1.09068)	Top-1 acc 63.672 (63.446)	Top-5 acc 84.375 (83.390)	lr 0.01249
Train [61][80/3239]	Time 0.285 (0.953)	Data Time 0.001 (0.476)	Loss 2.5687 (2.5240)	Entropy 1.09052 (1.09066)	Top-1 acc 61.328 (63.411)	Top-5 acc 84.375 (83.362)	lr 0.01249
Train [61][90/3239]	Time 0.253 (0.901)	Data Time 0.001 (0.424)	Loss 2.7331 (2.5310)	Entropy 1.09053 (1.09065)	Top-1 acc 58.984 (63.118)	Top-5 acc 78.906 (83.280)	lr 0.01249
Train [61][100/3239]	Time 0.278 (0.860)	Data Time 0.001 (0.382)	Loss 2.6098 (2.5361)	Entropy 1.09049 (1.09063)	Top-1 acc 60.547 (62.929)	Top-5 acc 79.297 (83.149)	lr 0.01249
Train [61][110/3239]	Time 0.228 (0.827)	Data Time 0.001 (0.348)	Loss 2.8143 (2.5362)	Entropy 1.09040 (1.09062)	Top-1 acc 56.250 (62.961)	Top-5 acc 77.734 (83.105)	lr 0.01249
Train [61][120/3239]	Time 2.500 (0.798)	Data Time 0.001 (0.319)	Loss 2.6802 (2.5453)	Entropy 1.09040 (1.09060)	Top-1 acc 61.328 (62.836)	Top-5 acc 79.297 (82.925)	lr 0.01249
Train [61][130/3239]	Time 0.256 (0.756)	Data Time 0.001 (0.295)	Loss 2.6655 (2.5437)	Entropy 1.09034 (1.09058)	Top-1 acc 58.203 (62.837)	Top-5 acc 82.422 (83.009)	lr 0.01249
Train [61][140/3239]	Time 0.353 (0.736)	Data Time 0.001 (0.274)	Loss 2.5557 (2.5408)	Entropy 1.09031 (1.09056)	Top-1 acc 59.375 (62.874)	Top-5 acc 83.203 (83.059)	lr 0.01249
Train [61][150/3239]	Time 0.217 (0.717)	Data Time 0.001 (0.256)	Loss 2.5448 (2.5414)	Entropy 1.09027 (1.09054)	Top-1 acc 66.406 (62.948)	Top-5 acc 81.250 (83.076)	lr 0.01248
Train [61][160/3239]	Time 0.219 (0.700)	Data Time 0.001 (0.240)	Loss 2.6442 (2.5432)	Entropy 1.09027 (1.09053)	Top-1 acc 57.812 (62.874)	Top-5 acc 83.594 (83.058)	lr 0.01248
Train [61][170/3239]	Time 0.205 (0.685)	Data Time 0.001 (0.226)	Loss 2.6688 (2.5424)	Entropy 1.09028 (1.09051)	Top-1 acc 64.062 (62.900)	Top-5 acc 82.812 (83.071)	lr 0.01248
Train [61][180/3239]	Time 0.220 (0.672)	Data Time 0.001 (0.214)	Loss 2.7550 (2.5449)	Entropy 1.09028 (1.09050)	Top-1 acc 58.203 (62.871)	Top-5 acc 78.125 (82.964)	lr 0.01248
Train [61][190/3239]	Time 0.209 (0.662)	Data Time 0.001 (0.203)	Loss 2.4860 (2.5477)	Entropy 1.09026 (1.09049)	Top-1 acc 63.672 (62.801)	Top-5 acc 82.812 (82.907)	lr 0.01248
Train [61][200/3239]	Time 0.237 (0.653)	Data Time 0.002 (0.193)	Loss 2.3976 (2.5470)	Entropy 1.09020 (1.09047)	Top-1 acc 63.672 (62.768)	Top-5 acc 87.891 (82.950)	lr 0.01248
Train [61][210/3239]	Time 0.287 (0.645)	Data Time 0.002 (0.184)	Loss 2.4974 (2.5467)	Entropy 1.09019 (1.09046)	Top-1 acc 63.281 (62.767)	Top-5 acc 82.422 (82.946)	lr 0.01248
Train [61][220/3239]	Time 0.265 (0.638)	Data Time 0.001 (0.176)	Loss 2.5420 (2.5487)	Entropy 1.09018 (1.09045)	Top-1 acc 61.719 (62.746)	Top-5 acc 83.594 (82.903)	lr 0.01248
Train [61][230/3239]	Time 2.655 (0.631)	Data Time 0.001 (0.168)	Loss 2.5273 (2.5490)	Entropy 1.09018 (1.09044)	Top-1 acc 64.062 (62.752)	Top-5 acc 83.984 (82.860)	lr 0.01248
Train [61][240/3239]	Time 0.223 (0.616)	Data Time 0.003 (0.161)	Loss 2.3914 (2.5489)	Entropy 1.09019 (1.09043)	Top-1 acc 62.891 (62.764)	Top-5 acc 87.500 (82.892)	lr 0.01248
Train [61][250/3239]	Time 0.229 (0.832)	Data Time 0.002 (0.155)	Loss 2.5897 (2.5493)	Entropy 1.09020 (1.09042)	Top-1 acc 63.281 (62.752)	Top-5 acc 79.297 (82.884)	lr 0.01247
Train [61][260/3239]	Time 0.224 (0.819)	Data Time 0.002 (0.149)	Loss 2.5692 (2.5486)	Entropy 1.09018 (1.09041)	Top-1 acc 60.938 (62.745)	Top-5 acc 80.469 (82.886)	lr 0.01247
Train [61][270/3239]	Time 0.222 (0.805)	Data Time 0.001 (0.144)	Loss 2.4790 (2.5470)	Entropy 1.09016 (1.09040)	Top-1 acc 60.938 (62.754)	Top-5 acc 83.984 (82.938)	lr 0.01247
Train [61][280/3239]	Time 0.333 (0.793)	Data Time 0.001 (0.139)	Loss 2.5538 (2.5464)	Entropy 1.09013 (1.09039)	Top-1 acc 63.672 (62.789)	Top-5 acc 83.203 (82.968)	lr 0.01247
Train [61][290/3239]	Time 0.245 (0.783)	Data Time 0.002 (0.134)	Loss 2.7571 (2.5468)	Entropy 1.09008 (1.09038)	Top-1 acc 56.250 (62.787)	Top-5 acc 80.078 (82.975)	lr 0.01247
Train [61][300/3239]	Time 0.206 (0.773)	Data Time 0.001 (0.129)	Loss 2.4095 (2.5460)	Entropy 1.09008 (1.09037)	Top-1 acc 64.453 (62.795)	Top-5 acc 85.547 (82.999)	lr 0.01247
Train [61][310/3239]	Time 0.210 (0.763)	Data Time 0.002 (0.125)	Loss 2.5129 (2.5462)	Entropy 1.09007 (1.09036)	Top-1 acc 61.719 (62.808)	Top-5 acc 81.641 (82.992)	lr 0.01247
Train [61][320/3239]	Time 0.260 (0.753)	Data Time 0.001 (0.122)	Loss 2.5674 (2.5466)	Entropy 1.09003 (1.09035)	Top-1 acc 62.500 (62.826)	Top-5 acc 81.250 (82.959)	lr 0.01247
Train [61][330/3239]	Time 0.299 (0.745)	Data Time 0.001 (0.118)	Loss 2.5555 (2.5479)	Entropy 1.09000 (1.09034)	Top-1 acc 61.719 (62.796)	Top-5 acc 83.594 (82.918)	lr 0.01247
Train [61][340/3239]	Time 2.507 (0.736)	Data Time 0.001 (0.114)	Loss 2.4102 (2.5481)	Entropy 1.09000 (1.09033)	Top-1 acc 64.062 (62.798)	Top-5 acc 86.719 (82.918)	lr 0.01247
Train [61][350/3239]	Time 0.218 (0.722)	Data Time 0.001 (0.111)	Loss 2.4643 (2.5486)	Entropy 1.08997 (1.09032)	Top-1 acc 63.672 (62.810)	Top-5 acc 86.328 (82.925)	lr 0.01246
Train [61][360/3239]	Time 0.226 (0.716)	Data Time 0.001 (0.108)	Loss 2.4578 (2.5477)	Entropy 1.09001 (1.09031)	Top-1 acc 64.844 (62.814)	Top-5 acc 85.547 (82.943)	lr 0.01246
Train [61][370/3239]	Time 0.245 (0.710)	Data Time 0.001 (0.105)	Loss 2.6527 (2.5470)	Entropy 1.08999 (1.09030)	Top-1 acc 57.812 (62.824)	Top-5 acc 80.859 (82.938)	lr 0.01246
Train [61][380/3239]	Time 0.356 (0.704)	Data Time 0.001 (0.103)	Loss 2.5044 (2.5495)	Entropy 1.08997 (1.09030)	Top-1 acc 64.453 (62.781)	Top-5 acc 84.766 (82.895)	lr 0.01246
Train [61][390/3239]	Time 0.221 (0.697)	Data Time 0.001 (0.100)	Loss 2.5094 (2.5490)	Entropy 1.08996 (1.09029)	Top-1 acc 63.281 (62.787)	Top-5 acc 82.812 (82.918)	lr 0.01246
Train [61][400/3239]	Time 0.229 (0.692)	Data Time 0.001 (0.098)	Loss 2.4440 (2.5497)	Entropy 1.08998 (1.09028)	Top-1 acc 66.406 (62.752)	Top-5 acc 85.547 (82.911)	lr 0.01246
Train [61][410/3239]	Time 0.217 (0.686)	Data Time 0.001 (0.095)	Loss 2.3453 (2.5497)	Entropy 1.08997 (1.09027)	Top-1 acc 67.578 (62.747)	Top-5 acc 88.672 (82.913)	lr 0.01246
Train [61][420/3239]	Time 0.223 (0.680)	Data Time 0.001 (0.093)	Loss 2.5514 (2.5488)	Entropy 1.08994 (1.09026)	Top-1 acc 60.156 (62.765)	Top-5 acc 80.859 (82.928)	lr 0.01246
Train [61][430/3239]	Time 0.235 (0.676)	Data Time 0.001 (0.091)	Loss 2.4712 (2.5474)	Entropy 1.08992 (1.09026)	Top-1 acc 62.891 (62.791)	Top-5 acc 82.812 (82.947)	lr 0.01246
Train [61][440/3239]	Time 0.260 (0.671)	Data Time 0.001 (0.089)	Loss 2.4797 (2.5469)	Entropy 1.08995 (1.09025)	Top-1 acc 65.234 (62.805)	Top-5 acc 84.375 (82.954)	lr 0.01246
Train [61][450/3239]	Time 2.411 (0.667)	Data Time 0.001 (0.087)	Loss 2.5228 (2.5472)	Entropy 1.08995 (1.09024)	Top-1 acc 65.625 (62.823)	Top-5 acc 82.812 (82.949)	lr 0.01245
Train [61][460/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.085)	Loss 2.5887 (2.5486)	Entropy 1.08985 (1.09023)	Top-1 acc 59.375 (62.799)	Top-5 acc 83.203 (82.918)	lr 0.01245
Train [61][470/3239]	Time 0.318 (0.653)	Data Time 0.001 (0.083)	Loss 2.3711 (2.5487)	Entropy 1.08983 (1.09023)	Top-1 acc 67.188 (62.783)	Top-5 acc 87.109 (82.909)	lr 0.01245
Train [61][480/3239]	Time 0.218 (0.649)	Data Time 0.001 (0.082)	Loss 2.6046 (2.5491)	Entropy 1.08989 (1.09022)	Top-1 acc 62.500 (62.770)	Top-5 acc 77.734 (82.887)	lr 0.01245
Train [61][490/3239]	Time 0.254 (0.645)	Data Time 0.001 (0.080)	Loss 2.5604 (2.5503)	Entropy 1.08986 (1.09021)	Top-1 acc 63.672 (62.728)	Top-5 acc 82.812 (82.871)	lr 0.01245
Train [61][500/3239]	Time 0.223 (0.642)	Data Time 0.001 (0.078)	Loss 2.6059 (2.5500)	Entropy 1.08986 (1.09020)	Top-1 acc 64.453 (62.744)	Top-5 acc 83.594 (82.887)	lr 0.01245
Train [61][510/3239]	Time 0.226 (0.638)	Data Time 0.001 (0.077)	Loss 2.3473 (2.5503)	Entropy 1.08982 (1.09020)	Top-1 acc 66.797 (62.737)	Top-5 acc 87.500 (82.884)	lr 0.01245
Train [61][520/3239]	Time 0.227 (0.635)	Data Time 0.001 (0.075)	Loss 2.6681 (2.5501)	Entropy 1.08980 (1.09019)	Top-1 acc 58.984 (62.730)	Top-5 acc 80.859 (82.887)	lr 0.01245
Train [61][530/3239]	Time 0.225 (0.632)	Data Time 0.001 (0.074)	Loss 2.4090 (2.5497)	Entropy 1.08978 (1.09018)	Top-1 acc 67.969 (62.744)	Top-5 acc 84.375 (82.878)	lr 0.01245
Train [61][540/3239]	Time 0.224 (0.629)	Data Time 0.001 (0.073)	Loss 2.5554 (2.5509)	Entropy 1.08977 (1.09018)	Top-1 acc 64.453 (62.732)	Top-5 acc 84.766 (82.864)	lr 0.01245
Train [61][550/3239]	Time 0.260 (0.626)	Data Time 0.001 (0.071)	Loss 2.4058 (2.5510)	Entropy 1.08975 (1.09017)	Top-1 acc 66.797 (62.725)	Top-5 acc 85.938 (82.860)	lr 0.01244
Train [61][560/3239]	Time 2.589 (0.624)	Data Time 0.001 (0.070)	Loss 2.5255 (2.5510)	Entropy 1.08975 (1.09016)	Top-1 acc 64.062 (62.733)	Top-5 acc 84.375 (82.854)	lr 0.01244
Train [61][570/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.069)	Loss 2.6525 (2.5518)	Entropy 1.08974 (1.09015)	Top-1 acc 60.938 (62.713)	Top-5 acc 79.688 (82.849)	lr 0.01244
Train [61][580/3239]	Time 0.248 (0.614)	Data Time 0.001 (0.068)	Loss 2.5951 (2.5513)	Entropy 1.08971 (1.09015)	Top-1 acc 60.938 (62.736)	Top-5 acc 80.859 (82.858)	lr 0.01244
Train [61][590/3239]	Time 0.225 (0.612)	Data Time 0.001 (0.067)	Loss 2.6431 (2.5512)	Entropy 1.08972 (1.09014)	Top-1 acc 60.156 (62.739)	Top-5 acc 83.203 (82.859)	lr 0.01244
Train [61][600/3239]	Time 0.235 (0.609)	Data Time 0.001 (0.066)	Loss 2.4156 (2.5507)	Entropy 1.08968 (1.09013)	Top-1 acc 66.406 (62.738)	Top-5 acc 83.203 (82.875)	lr 0.01244
Train [61][610/3239]	Time 0.275 (0.684)	Data Time 0.002 (0.065)	Loss 2.6946 (2.5512)	Entropy 1.08970 (1.09012)	Top-1 acc 59.766 (62.727)	Top-5 acc 78.906 (82.867)	lr 0.01244
Train [61][620/3239]	Time 0.228 (0.683)	Data Time 0.002 (0.064)	Loss 2.5788 (2.5504)	Entropy 1.08966 (1.09012)	Top-1 acc 63.672 (62.750)	Top-5 acc 82.031 (82.883)	lr 0.01244
Train [61][630/3239]	Time 0.213 (0.680)	Data Time 0.002 (0.063)	Loss 2.6786 (2.5507)	Entropy 1.08961 (1.09011)	Top-1 acc 61.328 (62.743)	Top-5 acc 78.906 (82.875)	lr 0.01244
Train [61][640/3239]	Time 0.215 (0.676)	Data Time 0.001 (0.062)	Loss 2.6079 (2.5505)	Entropy 1.08961 (1.09010)	Top-1 acc 62.109 (62.733)	Top-5 acc 80.859 (82.880)	lr 0.01244
Train [61][650/3239]	Time 0.311 (0.673)	Data Time 0.001 (0.061)	Loss 2.6791 (2.5514)	Entropy 1.08957 (1.09009)	Top-1 acc 60.547 (62.720)	Top-5 acc 82.031 (82.874)	lr 0.01243
Train [61][660/3239]	Time 0.256 (0.670)	Data Time 0.001 (0.060)	Loss 2.5468 (2.5519)	Entropy 1.08952 (1.09009)	Top-1 acc 65.234 (62.706)	Top-5 acc 82.812 (82.871)	lr 0.01243
Train [61][670/3239]	Time 2.467 (0.667)	Data Time 0.001 (0.059)	Loss 2.5875 (2.5518)	Entropy 1.08952 (1.09008)	Top-1 acc 61.719 (62.711)	Top-5 acc 82.031 (82.873)	lr 0.01243
Train [61][680/3239]	Time 0.235 (0.660)	Data Time 0.001 (0.058)	Loss 2.6625 (2.5517)	Entropy 1.08952 (1.09007)	Top-1 acc 64.062 (62.729)	Top-5 acc 78.516 (82.873)	lr 0.01243
Train [61][690/3239]	Time 0.246 (0.658)	Data Time 0.001 (0.057)	Loss 2.6560 (2.5517)	Entropy 1.08973 (1.09006)	Top-1 acc 57.812 (62.722)	Top-5 acc 82.422 (82.869)	lr 0.01243
Train [61][700/3239]	Time 0.238 (0.655)	Data Time 0.002 (0.057)	Loss 2.6166 (2.5513)	Entropy 1.08975 (1.09006)	Top-1 acc 63.281 (62.731)	Top-5 acc 80.859 (82.872)	lr 0.01243
Train [61][710/3239]	Time 0.226 (0.653)	Data Time 0.001 (0.056)	Loss 2.6091 (2.5509)	Entropy 1.08966 (1.09005)	Top-1 acc 59.766 (62.731)	Top-5 acc 82.422 (82.882)	lr 0.01243
Train [61][720/3239]	Time 0.238 (0.650)	Data Time 0.001 (0.055)	Loss 2.7150 (2.5509)	Entropy 1.08959 (1.09005)	Top-1 acc 60.938 (62.723)	Top-5 acc 80.469 (82.891)	lr 0.01243
Train [61][730/3239]	Time 0.237 (0.648)	Data Time 0.001 (0.054)	Loss 2.4842 (2.5514)	Entropy 1.08955 (1.09004)	Top-1 acc 62.891 (62.698)	Top-5 acc 84.766 (82.884)	lr 0.01243
Train [61][740/3239]	Time 0.331 (0.645)	Data Time 0.001 (0.054)	Loss 2.5801 (2.5508)	Entropy 1.08959 (1.09004)	Top-1 acc 64.453 (62.709)	Top-5 acc 82.031 (82.904)	lr 0.01243
Train [61][750/3239]	Time 0.219 (0.643)	Data Time 0.001 (0.053)	Loss 2.6960 (2.5514)	Entropy 1.08958 (1.09003)	Top-1 acc 59.766 (62.695)	Top-5 acc 82.422 (82.891)	lr 0.01242
Train [61][760/3239]	Time 0.208 (0.641)	Data Time 0.001 (0.052)	Loss 2.5623 (2.5517)	Entropy 1.08948 (1.09002)	Top-1 acc 60.547 (62.686)	Top-5 acc 83.984 (82.894)	lr 0.01242
Train [61][770/3239]	Time 0.296 (0.638)	Data Time 0.001 (0.052)	Loss 2.5383 (2.5514)	Entropy 1.08946 (1.09002)	Top-1 acc 64.062 (62.702)	Top-5 acc 83.594 (82.893)	lr 0.01242
Train [61][780/3239]	Time 2.519 (0.636)	Data Time 0.001 (0.051)	Loss 2.4572 (2.5511)	Entropy 1.08946 (1.09001)	Top-1 acc 65.234 (62.698)	Top-5 acc 85.156 (82.914)	lr 0.01242
Train [61][790/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.050)	Loss 2.6022 (2.5506)	Entropy 1.08940 (1.09000)	Top-1 acc 61.328 (62.713)	Top-5 acc 82.422 (82.918)	lr 0.01242
Train [61][800/3239]	Time 0.225 (0.629)	Data Time 0.001 (0.050)	Loss 2.4680 (2.5503)	Entropy 1.08938 (1.08999)	Top-1 acc 62.891 (62.703)	Top-5 acc 84.375 (82.926)	lr 0.01242
Train [61][810/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.049)	Loss 2.5340 (2.5503)	Entropy 1.08935 (1.08999)	Top-1 acc 66.406 (62.693)	Top-5 acc 81.250 (82.921)	lr 0.01242
Train [61][820/3239]	Time 0.210 (0.625)	Data Time 0.001 (0.048)	Loss 2.5784 (2.5502)	Entropy 1.08927 (1.08998)	Top-1 acc 64.844 (62.699)	Top-5 acc 82.422 (82.928)	lr 0.01242
Train [61][830/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.048)	Loss 2.5895 (2.5503)	Entropy 1.08926 (1.08997)	Top-1 acc 61.328 (62.695)	Top-5 acc 82.812 (82.931)	lr 0.01242
Train [61][840/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.047)	Loss 2.4449 (2.5498)	Entropy 1.08922 (1.08996)	Top-1 acc 64.844 (62.707)	Top-5 acc 85.547 (82.944)	lr 0.01242
Train [61][850/3239]	Time 0.222 (0.619)	Data Time 0.001 (0.047)	Loss 2.4702 (2.5497)	Entropy 1.08922 (1.08995)	Top-1 acc 61.719 (62.705)	Top-5 acc 85.938 (82.950)	lr 0.01241
Train [61][860/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.046)	Loss 2.3748 (2.5498)	Entropy 1.08919 (1.08994)	Top-1 acc 66.406 (62.698)	Top-5 acc 86.328 (82.950)	lr 0.01241
Train [61][870/3239]	Time 0.227 (0.616)	Data Time 0.001 (0.046)	Loss 2.6046 (2.5504)	Entropy 1.08917 (1.08993)	Top-1 acc 63.672 (62.671)	Top-5 acc 81.641 (82.945)	lr 0.01241
Train [61][880/3239]	Time 0.390 (0.614)	Data Time 0.001 (0.045)	Loss 2.6619 (2.5512)	Entropy 1.08911 (1.08993)	Top-1 acc 58.984 (62.649)	Top-5 acc 80.078 (82.933)	lr 0.01241
Train [61][890/3239]	Time 2.497 (0.613)	Data Time 0.001 (0.045)	Loss 2.6221 (2.5520)	Entropy 1.08911 (1.08992)	Top-1 acc 62.109 (62.623)	Top-5 acc 80.078 (82.926)	lr 0.01241
Train [61][900/3239]	Time 0.229 (0.608)	Data Time 0.001 (0.044)	Loss 2.5091 (2.5522)	Entropy 1.08917 (1.08991)	Top-1 acc 66.016 (62.627)	Top-5 acc 85.156 (82.925)	lr 0.01241
Train [61][910/3239]	Time 0.217 (0.607)	Data Time 0.001 (0.044)	Loss 2.8260 (2.5518)	Entropy 1.08900 (1.08990)	Top-1 acc 53.516 (62.631)	Top-5 acc 78.906 (82.936)	lr 0.01241
Train [61][920/3239]	Time 0.238 (0.605)	Data Time 0.001 (0.043)	Loss 2.4561 (2.5516)	Entropy 1.08902 (1.08989)	Top-1 acc 63.672 (62.635)	Top-5 acc 85.938 (82.941)	lr 0.01241
Train [61][930/3239]	Time 0.221 (0.604)	Data Time 0.001 (0.043)	Loss 2.5627 (2.5525)	Entropy 1.08901 (1.08988)	Top-1 acc 64.453 (62.604)	Top-5 acc 82.422 (82.922)	lr 0.01241
Train [61][940/3239]	Time 0.217 (0.602)	Data Time 0.001 (0.042)	Loss 2.3832 (2.5522)	Entropy 1.08898 (1.08987)	Top-1 acc 62.891 (62.610)	Top-5 acc 87.500 (82.932)	lr 0.01241
Train [61][950/3239]	Time 0.209 (0.601)	Data Time 0.001 (0.042)	Loss 2.5379 (2.5518)	Entropy 1.08902 (1.08986)	Top-1 acc 63.672 (62.619)	Top-5 acc 85.156 (82.943)	lr 0.01240
Train [61][960/3239]	Time 0.224 (0.599)	Data Time 0.001 (0.042)	Loss 2.7088 (2.5522)	Entropy 1.08900 (1.08985)	Top-1 acc 59.766 (62.619)	Top-5 acc 79.688 (82.935)	lr 0.01240
Train [61][970/3239]	Time 0.436 (0.650)	Data Time 0.003 (0.041)	Loss 2.3427 (2.5522)	Entropy 1.08896 (1.08984)	Top-1 acc 66.016 (62.616)	Top-5 acc 87.500 (82.934)	lr 0.01240
Train [61][980/3239]	Time 0.228 (0.649)	Data Time 0.002 (0.041)	Loss 2.4675 (2.5521)	Entropy 1.08899 (1.08983)	Top-1 acc 66.797 (62.622)	Top-5 acc 83.594 (82.935)	lr 0.01240
Train [61][990/3239]	Time 0.206 (0.647)	Data Time 0.001 (0.040)	Loss 2.3958 (2.5514)	Entropy 1.08898 (1.08983)	Top-1 acc 68.359 (62.640)	Top-5 acc 85.938 (82.950)	lr 0.01240
Train [61][1000/3239]	Time 2.528 (0.645)	Data Time 0.002 (0.040)	Loss 2.5504 (2.5518)	Entropy 1.08898 (1.08982)	Top-1 acc 63.672 (62.632)	Top-5 acc 82.422 (82.941)	lr 0.01240
Train [61][1010/3239]	Time 0.227 (0.641)	Data Time 0.001 (0.040)	Loss 2.6548 (2.5517)	Entropy 1.08894 (1.08981)	Top-1 acc 61.328 (62.625)	Top-5 acc 79.297 (82.935)	lr 0.01240
Train [61][1020/3239]	Time 0.216 (0.639)	Data Time 0.001 (0.039)	Loss 2.3917 (2.5520)	Entropy 1.08893 (1.08980)	Top-1 acc 65.625 (62.621)	Top-5 acc 84.375 (82.923)	lr 0.01240
Train [61][1030/3239]	Time 0.247 (0.637)	Data Time 0.001 (0.039)	Loss 2.3301 (2.5523)	Entropy 1.08895 (1.08979)	Top-1 acc 72.266 (62.620)	Top-5 acc 85.156 (82.920)	lr 0.01240
Train [61][1040/3239]	Time 0.219 (0.636)	Data Time 0.001 (0.039)	Loss 2.5888 (2.5519)	Entropy 1.08893 (1.08978)	Top-1 acc 64.844 (62.633)	Top-5 acc 83.984 (82.928)	lr 0.01239
Train [61][1050/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.038)	Loss 2.6163 (2.5519)	Entropy 1.08896 (1.08977)	Top-1 acc 60.547 (62.641)	Top-5 acc 80.469 (82.925)	lr 0.01239
Train [61][1060/3239]	Time 0.212 (0.632)	Data Time 0.001 (0.038)	Loss 2.5520 (2.5519)	Entropy 1.08896 (1.08977)	Top-1 acc 61.719 (62.635)	Top-5 acc 83.594 (82.928)	lr 0.01239
Train [61][1070/3239]	Time 0.228 (0.631)	Data Time 0.001 (0.038)	Loss 2.5996 (2.5515)	Entropy 1.08895 (1.08976)	Top-1 acc 58.984 (62.648)	Top-5 acc 80.859 (82.937)	lr 0.01239
Train [61][1080/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.037)	Loss 2.5879 (2.5512)	Entropy 1.08890 (1.08975)	Top-1 acc 58.594 (62.659)	Top-5 acc 82.812 (82.942)	lr 0.01239
Train [61][1090/3239]	Time 0.257 (0.627)	Data Time 0.001 (0.037)	Loss 2.5936 (2.5507)	Entropy 1.08893 (1.08974)	Top-1 acc 63.672 (62.670)	Top-5 acc 83.203 (82.959)	lr 0.01239
Train [61][1100/3239]	Time 0.264 (0.626)	Data Time 0.001 (0.037)	Loss 2.5528 (2.5510)	Entropy 1.08888 (1.08974)	Top-1 acc 60.938 (62.660)	Top-5 acc 83.984 (82.952)	lr 0.01239
Train [61][1110/3239]	Time 2.582 (0.625)	Data Time 0.001 (0.036)	Loss 2.4236 (2.5508)	Entropy 1.08888 (1.08973)	Top-1 acc 64.844 (62.656)	Top-5 acc 85.156 (82.951)	lr 0.01239
Train [61][1120/3239]	Time 0.212 (0.621)	Data Time 0.001 (0.036)	Loss 2.4399 (2.5504)	Entropy 1.08884 (1.08972)	Top-1 acc 67.188 (62.671)	Top-5 acc 84.766 (82.957)	lr 0.01239
Train [61][1130/3239]	Time 0.220 (0.620)	Data Time 0.001 (0.036)	Loss 2.5551 (2.5506)	Entropy 1.08889 (1.08971)	Top-1 acc 62.109 (62.659)	Top-5 acc 82.812 (82.950)	lr 0.01239
Train [61][1140/3239]	Time 0.234 (0.618)	Data Time 0.002 (0.035)	Loss 2.6828 (2.5506)	Entropy 1.08891 (1.08971)	Top-1 acc 58.203 (62.662)	Top-5 acc 82.812 (82.953)	lr 0.01238
Train [61][1150/3239]	Time 0.245 (0.617)	Data Time 0.001 (0.035)	Loss 2.4844 (2.5507)	Entropy 1.08886 (1.08970)	Top-1 acc 66.797 (62.652)	Top-5 acc 83.594 (82.952)	lr 0.01238
Train [61][1160/3239]	Time 0.320 (0.616)	Data Time 0.001 (0.035)	Loss 2.6642 (2.5513)	Entropy 1.08883 (1.08969)	Top-1 acc 58.594 (62.629)	Top-5 acc 79.688 (82.942)	lr 0.01238
Train [61][1170/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.034)	Loss 2.5162 (2.5510)	Entropy 1.08881 (1.08968)	Top-1 acc 62.500 (62.639)	Top-5 acc 84.766 (82.945)	lr 0.01238
Train [61][1180/3239]	Time 0.231 (0.613)	Data Time 0.001 (0.034)	Loss 2.5446 (2.5513)	Entropy 1.08875 (1.08968)	Top-1 acc 60.938 (62.630)	Top-5 acc 83.203 (82.937)	lr 0.01238
Train [61][1190/3239]	Time 0.255 (0.612)	Data Time 0.001 (0.034)	Loss 2.6135 (2.5513)	Entropy 1.08866 (1.08967)	Top-1 acc 60.938 (62.639)	Top-5 acc 82.422 (82.938)	lr 0.01238
Train [61][1200/3239]	Time 0.232 (0.610)	Data Time 0.001 (0.034)	Loss 2.5364 (2.5511)	Entropy 1.08858 (1.08966)	Top-1 acc 62.891 (62.642)	Top-5 acc 82.422 (82.946)	lr 0.01238
Train [61][1210/3239]	Time 0.211 (0.609)	Data Time 0.001 (0.033)	Loss 2.6144 (2.5514)	Entropy 1.08855 (1.08965)	Top-1 acc 59.375 (62.633)	Top-5 acc 83.594 (82.943)	lr 0.01238
Train [61][1220/3239]	Time 2.457 (0.608)	Data Time 0.001 (0.033)	Loss 2.5198 (2.5513)	Entropy 1.08855 (1.08964)	Top-1 acc 63.672 (62.637)	Top-5 acc 85.156 (82.947)	lr 0.01238
Train [61][1230/3239]	Time 0.223 (0.605)	Data Time 0.001 (0.033)	Loss 2.4698 (2.5513)	Entropy 1.08858 (1.08963)	Top-1 acc 65.625 (62.641)	Top-5 acc 83.203 (82.946)	lr 0.01238
Train [61][1240/3239]	Time 0.217 (0.604)	Data Time 0.001 (0.033)	Loss 2.4241 (2.5515)	Entropy 1.08857 (1.08963)	Top-1 acc 67.578 (62.642)	Top-5 acc 86.328 (82.941)	lr 0.01237
Train [61][1250/3239]	Time 0.249 (0.603)	Data Time 0.001 (0.032)	Loss 2.5473 (2.5514)	Entropy 1.08856 (1.08962)	Top-1 acc 60.938 (62.646)	Top-5 acc 83.203 (82.946)	lr 0.01237
Train [61][1260/3239]	Time 0.222 (0.602)	Data Time 0.001 (0.032)	Loss 2.3800 (2.5511)	Entropy 1.08855 (1.08961)	Top-1 acc 65.234 (62.660)	Top-5 acc 85.156 (82.952)	lr 0.01237
Train [61][1270/3239]	Time 0.202 (0.600)	Data Time 0.001 (0.032)	Loss 2.6742 (2.5512)	Entropy 1.08854 (1.08960)	Top-1 acc 62.500 (62.655)	Top-5 acc 82.031 (82.953)	lr 0.01237
Train [61][1280/3239]	Time 0.223 (0.599)	Data Time 0.001 (0.032)	Loss 2.5645 (2.5513)	Entropy 1.08858 (1.08959)	Top-1 acc 61.719 (62.654)	Top-5 acc 82.812 (82.947)	lr 0.01237
Train [61][1290/3239]	Time 0.250 (0.598)	Data Time 0.001 (0.031)	Loss 2.5479 (2.5516)	Entropy 1.08859 (1.08958)	Top-1 acc 64.062 (62.653)	Top-5 acc 82.422 (82.943)	lr 0.01237
Train [61][1300/3239]	Time 0.336 (0.597)	Data Time 0.001 (0.031)	Loss 2.3840 (2.5515)	Entropy 1.08865 (1.08958)	Top-1 acc 62.891 (62.656)	Top-5 acc 87.500 (82.943)	lr 0.01237
Train [61][1310/3239]	Time 0.269 (0.596)	Data Time 0.001 (0.031)	Loss 2.3510 (2.5515)	Entropy 1.08833 (1.08957)	Top-1 acc 67.188 (62.661)	Top-5 acc 84.766 (82.945)	lr 0.01237
Train [61][1320/3239]	Time 0.259 (0.595)	Data Time 0.001 (0.031)	Loss 2.5737 (2.5514)	Entropy 1.08833 (1.08956)	Top-1 acc 58.594 (62.657)	Top-5 acc 82.422 (82.947)	lr 0.01237
Train [61][1330/3239]	Time 51.246 (0.631)	Data Time 0.001 (0.030)	Loss 2.5159 (2.5514)	Entropy 1.08833 (1.08955)	Top-1 acc 63.672 (62.651)	Top-5 acc 83.984 (82.949)	lr 0.01237
Train [61][1340/3239]	Time 0.351 (0.628)	Data Time 0.003 (0.030)	Loss 2.4823 (2.5516)	Entropy 1.08836 (1.08954)	Top-1 acc 66.406 (62.648)	Top-5 acc 84.375 (82.948)	lr 0.01236
Train [61][1350/3239]	Time 0.225 (0.627)	Data Time 0.001 (0.030)	Loss 2.6450 (2.5512)	Entropy 1.08833 (1.08953)	Top-1 acc 59.375 (62.655)	Top-5 acc 80.859 (82.959)	lr 0.01236
Train [61][1360/3239]	Time 0.227 (0.626)	Data Time 0.002 (0.030)	Loss 2.6187 (2.5514)	Entropy 1.08838 (1.08952)	Top-1 acc 65.234 (62.656)	Top-5 acc 82.031 (82.958)	lr 0.01236
Train [61][1370/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.030)	Loss 2.4482 (2.5516)	Entropy 1.08836 (1.08952)	Top-1 acc 64.453 (62.649)	Top-5 acc 85.938 (82.954)	lr 0.01236
Train [61][1380/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.029)	Loss 2.6361 (2.5516)	Entropy 1.08832 (1.08951)	Top-1 acc 60.547 (62.654)	Top-5 acc 80.078 (82.953)	lr 0.01236
Train [61][1390/3239]	Time 0.329 (0.623)	Data Time 0.001 (0.029)	Loss 2.5543 (2.5518)	Entropy 1.08836 (1.08950)	Top-1 acc 63.281 (62.644)	Top-5 acc 84.375 (82.948)	lr 0.01236
Train [61][1400/3239]	Time 0.204 (0.622)	Data Time 0.001 (0.029)	Loss 2.5000 (2.5516)	Entropy 1.08826 (1.08949)	Top-1 acc 62.500 (62.648)	Top-5 acc 85.547 (82.957)	lr 0.01236
Train [61][1410/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.029)	Loss 2.5052 (2.5519)	Entropy 1.08827 (1.08948)	Top-1 acc 64.844 (62.645)	Top-5 acc 82.812 (82.949)	lr 0.01236
Train [61][1420/3239]	Time 0.215 (0.620)	Data Time 0.001 (0.029)	Loss 2.7691 (2.5524)	Entropy 1.08826 (1.08947)	Top-1 acc 58.594 (62.638)	Top-5 acc 78.125 (82.942)	lr 0.01236
Train [61][1430/3239]	Time 0.216 (0.619)	Data Time 0.001 (0.028)	Loss 2.6273 (2.5524)	Entropy 1.08824 (1.08946)	Top-1 acc 59.766 (62.636)	Top-5 acc 81.641 (82.946)	lr 0.01236
Train [61][1440/3239]	Time 2.496 (0.617)	Data Time 0.001 (0.028)	Loss 2.5193 (2.5529)	Entropy 1.08824 (1.08946)	Top-1 acc 62.109 (62.629)	Top-5 acc 80.859 (82.932)	lr 0.01235
Train [61][1450/3239]	Time 0.217 (0.615)	Data Time 0.001 (0.028)	Loss 2.5580 (2.5529)	Entropy 1.08823 (1.08945)	Top-1 acc 62.891 (62.626)	Top-5 acc 79.297 (82.928)	lr 0.01235
Train [61][1460/3239]	Time 0.222 (0.614)	Data Time 0.001 (0.028)	Loss 2.5574 (2.5530)	Entropy 1.08823 (1.08944)	Top-1 acc 64.062 (62.620)	Top-5 acc 82.422 (82.933)	lr 0.01235
Train [61][1470/3239]	Time 0.240 (0.613)	Data Time 0.001 (0.028)	Loss 2.5457 (2.5530)	Entropy 1.08829 (1.08943)	Top-1 acc 61.719 (62.622)	Top-5 acc 82.422 (82.933)	lr 0.01235
Train [61][1480/3239]	Time 0.227 (0.612)	Data Time 0.001 (0.028)	Loss 2.2945 (2.5528)	Entropy 1.08827 (1.08942)	Top-1 acc 67.578 (62.621)	Top-5 acc 88.672 (82.937)	lr 0.01235
Train [61][1490/3239]	Time 0.254 (0.611)	Data Time 0.001 (0.027)	Loss 2.4611 (2.5528)	Entropy 1.08854 (1.08942)	Top-1 acc 64.844 (62.619)	Top-5 acc 83.203 (82.939)	lr 0.01235
Train [61][1500/3239]	Time 0.227 (0.610)	Data Time 0.001 (0.027)	Loss 2.7337 (2.5527)	Entropy 1.08851 (1.08941)	Top-1 acc 56.250 (62.616)	Top-5 acc 79.297 (82.937)	lr 0.01235
Train [61][1510/3239]	Time 0.230 (0.609)	Data Time 0.001 (0.027)	Loss 2.5341 (2.5527)	Entropy 1.08853 (1.08941)	Top-1 acc 62.891 (62.612)	Top-5 acc 85.156 (82.938)	lr 0.01235
Train [61][1520/3239]	Time 0.303 (0.608)	Data Time 0.002 (0.027)	Loss 2.6681 (2.5528)	Entropy 1.08855 (1.08940)	Top-1 acc 60.938 (62.613)	Top-5 acc 80.859 (82.939)	lr 0.01235
Train [61][1530/3239]	Time 0.199 (0.607)	Data Time 0.001 (0.027)	Loss 2.5559 (2.5530)	Entropy 1.08853 (1.08939)	Top-1 acc 58.984 (62.614)	Top-5 acc 81.641 (82.929)	lr 0.01235
Train [61][1540/3239]	Time 0.224 (0.606)	Data Time 0.002 (0.027)	Loss 2.5996 (2.5531)	Entropy 1.08850 (1.08939)	Top-1 acc 58.203 (62.612)	Top-5 acc 80.859 (82.930)	lr 0.01234
Train [61][1550/3239]	Time 2.512 (0.605)	Data Time 0.001 (0.026)	Loss 2.6290 (2.5529)	Entropy 1.08850 (1.08938)	Top-1 acc 60.156 (62.619)	Top-5 acc 82.812 (82.933)	lr 0.01234
Train [61][1560/3239]	Time 0.253 (0.603)	Data Time 0.002 (0.026)	Loss 2.4906 (2.5530)	Entropy 1.08853 (1.08938)	Top-1 acc 66.016 (62.617)	Top-5 acc 83.984 (82.930)	lr 0.01234
Train [61][1570/3239]	Time 0.233 (0.602)	Data Time 0.001 (0.026)	Loss 2.4526 (2.5532)	Entropy 1.08847 (1.08937)	Top-1 acc 61.719 (62.616)	Top-5 acc 83.984 (82.925)	lr 0.01234
Train [61][1580/3239]	Time 0.233 (0.601)	Data Time 0.001 (0.026)	Loss 2.5692 (2.5532)	Entropy 1.08842 (1.08937)	Top-1 acc 62.109 (62.615)	Top-5 acc 81.250 (82.924)	lr 0.01234
Train [61][1590/3239]	Time 0.217 (0.601)	Data Time 0.001 (0.026)	Loss 2.4191 (2.5529)	Entropy 1.08838 (1.08936)	Top-1 acc 64.844 (62.626)	Top-5 acc 83.984 (82.923)	lr 0.01234
Train [61][1600/3239]	Time 0.228 (0.600)	Data Time 0.001 (0.026)	Loss 2.5951 (2.5531)	Entropy 1.08838 (1.08935)	Top-1 acc 64.844 (62.627)	Top-5 acc 83.203 (82.920)	lr 0.01234
Train [61][1610/3239]	Time 0.241 (0.599)	Data Time 0.001 (0.025)	Loss 2.6558 (2.5532)	Entropy 1.08833 (1.08935)	Top-1 acc 60.156 (62.623)	Top-5 acc 80.078 (82.918)	lr 0.01234
Train [61][1620/3239]	Time 0.322 (0.598)	Data Time 0.001 (0.025)	Loss 2.6455 (2.5535)	Entropy 1.08835 (1.08934)	Top-1 acc 58.203 (62.618)	Top-5 acc 81.250 (82.913)	lr 0.01234
Train [61][1630/3239]	Time 0.221 (0.597)	Data Time 0.001 (0.025)	Loss 2.5446 (2.5535)	Entropy 1.08841 (1.08933)	Top-1 acc 63.672 (62.617)	Top-5 acc 83.203 (82.913)	lr 0.01234
Train [61][1640/3239]	Time 0.214 (0.597)	Data Time 0.001 (0.025)	Loss 2.3872 (2.5533)	Entropy 1.08841 (1.08933)	Top-1 acc 66.797 (62.621)	Top-5 acc 87.109 (82.919)	lr 0.01233
Train [61][1650/3239]	Time 0.206 (0.596)	Data Time 0.001 (0.025)	Loss 2.5693 (2.5535)	Entropy 1.08828 (1.08932)	Top-1 acc 62.109 (62.624)	Top-5 acc 83.594 (82.913)	lr 0.01233
Train [61][1660/3239]	Time 2.623 (0.595)	Data Time 0.001 (0.025)	Loss 2.6157 (2.5535)	Entropy 1.08828 (1.08932)	Top-1 acc 65.234 (62.622)	Top-5 acc 80.859 (82.914)	lr 0.01233
Train [61][1670/3239]	Time 0.236 (0.593)	Data Time 0.001 (0.025)	Loss 2.8042 (2.5538)	Entropy 1.08821 (1.08931)	Top-1 acc 56.641 (62.612)	Top-5 acc 73.828 (82.911)	lr 0.01233
Train [61][1680/3239]	Time 0.228 (0.592)	Data Time 0.001 (0.024)	Loss 2.7757 (2.5541)	Entropy 1.08822 (1.08930)	Top-1 acc 57.422 (62.602)	Top-5 acc 79.297 (82.905)	lr 0.01233
Train [61][1690/3239]	Time 0.249 (0.591)	Data Time 0.001 (0.024)	Loss 2.5421 (2.5543)	Entropy 1.08822 (1.08930)	Top-1 acc 63.672 (62.596)	Top-5 acc 83.594 (82.903)	lr 0.01233
Train [61][1700/3239]	Time 0.313 (0.621)	Data Time 0.003 (0.024)	Loss 2.5670 (2.5542)	Entropy 1.08819 (1.08929)	Top-1 acc 63.281 (62.598)	Top-5 acc 83.984 (82.903)	lr 0.01233
Train [61][1710/3239]	Time 0.322 (0.620)	Data Time 0.002 (0.024)	Loss 2.7115 (2.5543)	Entropy 1.08813 (1.08928)	Top-1 acc 59.766 (62.594)	Top-5 acc 80.859 (82.902)	lr 0.01233
Train [61][1720/3239]	Time 0.238 (0.619)	Data Time 0.001 (0.024)	Loss 2.4452 (2.5543)	Entropy 1.08810 (1.08928)	Top-1 acc 68.359 (62.597)	Top-5 acc 83.594 (82.898)	lr 0.01233
Train [61][1730/3239]	Time 0.223 (0.618)	Data Time 0.001 (0.024)	Loss 2.6207 (2.5545)	Entropy 1.08821 (1.08927)	Top-1 acc 61.719 (62.593)	Top-5 acc 81.250 (82.896)	lr 0.01233
Train [61][1740/3239]	Time 0.215 (0.618)	Data Time 0.001 (0.024)	Loss 2.5704 (2.5546)	Entropy 1.08822 (1.08927)	Top-1 acc 60.938 (62.593)	Top-5 acc 82.422 (82.895)	lr 0.01232
Train [61][1750/3239]	Time 0.271 (0.617)	Data Time 0.001 (0.024)	Loss 2.5941 (2.5547)	Entropy 1.08821 (1.08926)	Top-1 acc 60.156 (62.588)	Top-5 acc 83.984 (82.897)	lr 0.01232
Train [61][1760/3239]	Time 0.256 (0.616)	Data Time 0.001 (0.023)	Loss 2.6067 (2.5546)	Entropy 1.08822 (1.08925)	Top-1 acc 59.766 (62.592)	Top-5 acc 81.250 (82.900)	lr 0.01232
Train [61][1770/3239]	Time 2.442 (0.615)	Data Time 0.001 (0.023)	Loss 2.5310 (2.5547)	Entropy 1.08822 (1.08925)	Top-1 acc 62.500 (62.583)	Top-5 acc 83.984 (82.901)	lr 0.01232
Train [61][1780/3239]	Time 0.223 (0.613)	Data Time 0.001 (0.023)	Loss 2.5703 (2.5547)	Entropy 1.08816 (1.08924)	Top-1 acc 62.500 (62.581)	Top-5 acc 83.594 (82.901)	lr 0.01232
Train [61][1790/3239]	Time 0.178 (0.612)	Data Time 0.001 (0.023)	Loss 2.5833 (2.5548)	Entropy 1.08809 (1.08923)	Top-1 acc 64.062 (62.578)	Top-5 acc 82.812 (82.902)	lr 0.01232
Train [61][1800/3239]	Time 0.235 (0.611)	Data Time 0.001 (0.023)	Loss 2.7621 (2.5550)	Entropy 1.08809 (1.08923)	Top-1 acc 55.859 (62.573)	Top-5 acc 80.078 (82.894)	lr 0.01232
Train [61][1810/3239]	Time 0.229 (0.610)	Data Time 0.002 (0.023)	Loss 2.6683 (2.5548)	Entropy 1.08808 (1.08922)	Top-1 acc 60.156 (62.574)	Top-5 acc 82.031 (82.900)	lr 0.01232
Train [61][1820/3239]	Time 0.225 (0.610)	Data Time 0.001 (0.023)	Loss 2.7373 (2.5550)	Entropy 1.08807 (1.08922)	Top-1 acc 58.594 (62.568)	Top-5 acc 77.734 (82.893)	lr 0.01232
Train [61][1830/3239]	Time 0.230 (0.609)	Data Time 0.001 (0.023)	Loss 2.6141 (2.5551)	Entropy 1.08806 (1.08921)	Top-1 acc 59.766 (62.569)	Top-5 acc 82.422 (82.895)	lr 0.01232
Train [61][1840/3239]	Time 0.237 (0.608)	Data Time 0.002 (0.023)	Loss 2.4979 (2.5550)	Entropy 1.08798 (1.08920)	Top-1 acc 62.891 (62.566)	Top-5 acc 83.594 (82.896)	lr 0.01231
Train [61][1850/3239]	Time 0.313 (0.607)	Data Time 0.001 (0.022)	Loss 2.7581 (2.5551)	Entropy 1.08798 (1.08920)	Top-1 acc 58.984 (62.556)	Top-5 acc 76.953 (82.896)	lr 0.01231
Train [61][1860/3239]	Time 0.221 (0.607)	Data Time 0.001 (0.022)	Loss 2.6991 (2.5553)	Entropy 1.08800 (1.08919)	Top-1 acc 59.375 (62.549)	Top-5 acc 79.688 (82.889)	lr 0.01231
Train [61][1870/3239]	Time 0.249 (0.606)	Data Time 0.001 (0.022)	Loss 2.5548 (2.5554)	Entropy 1.08796 (1.08918)	Top-1 acc 67.578 (62.545)	Top-5 acc 82.422 (82.886)	lr 0.01231
Train [61][1880/3239]	Time 2.514 (0.605)	Data Time 0.003 (0.022)	Loss 2.5722 (2.5558)	Entropy 1.08796 (1.08918)	Top-1 acc 62.109 (62.534)	Top-5 acc 82.422 (82.880)	lr 0.01231
Train [61][1890/3239]	Time 0.234 (0.603)	Data Time 0.002 (0.022)	Loss 2.3485 (2.5555)	Entropy 1.08792 (1.08917)	Top-1 acc 66.406 (62.538)	Top-5 acc 86.719 (82.886)	lr 0.01231
Train [61][1900/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.022)	Loss 2.5452 (2.5555)	Entropy 1.08788 (1.08916)	Top-1 acc 62.500 (62.541)	Top-5 acc 81.641 (82.885)	lr 0.01231
Train [61][1910/3239]	Time 0.236 (0.602)	Data Time 0.002 (0.022)	Loss 2.5733 (2.5556)	Entropy 1.08782 (1.08916)	Top-1 acc 62.500 (62.537)	Top-5 acc 83.203 (82.882)	lr 0.01231
Train [61][1920/3239]	Time 0.226 (0.601)	Data Time 0.001 (0.022)	Loss 2.5789 (2.5559)	Entropy 1.08782 (1.08915)	Top-1 acc 61.328 (62.533)	Top-5 acc 82.031 (82.879)	lr 0.01231
Train [61][1930/3239]	Time 0.240 (0.600)	Data Time 0.001 (0.022)	Loss 2.3758 (2.5557)	Entropy 1.08777 (1.08914)	Top-1 acc 65.234 (62.534)	Top-5 acc 87.109 (82.883)	lr 0.01231
Train [61][1940/3239]	Time 0.317 (0.600)	Data Time 0.001 (0.021)	Loss 2.5285 (2.5556)	Entropy 1.08777 (1.08914)	Top-1 acc 64.453 (62.541)	Top-5 acc 83.203 (82.888)	lr 0.01230
Train [61][1950/3239]	Time 0.211 (0.599)	Data Time 0.001 (0.021)	Loss 2.6401 (2.5555)	Entropy 1.08770 (1.08913)	Top-1 acc 62.500 (62.538)	Top-5 acc 83.203 (82.890)	lr 0.01230
Train [61][1960/3239]	Time 0.221 (0.598)	Data Time 0.001 (0.021)	Loss 2.4772 (2.5558)	Entropy 1.08764 (1.08912)	Top-1 acc 64.453 (62.530)	Top-5 acc 84.766 (82.878)	lr 0.01230
Train [61][1970/3239]	Time 0.217 (0.598)	Data Time 0.001 (0.021)	Loss 2.6250 (2.5558)	Entropy 1.08763 (1.08911)	Top-1 acc 63.672 (62.536)	Top-5 acc 79.688 (82.878)	lr 0.01230
Train [61][1980/3239]	Time 0.252 (0.597)	Data Time 0.001 (0.021)	Loss 2.3620 (2.5555)	Entropy 1.08762 (1.08911)	Top-1 acc 67.969 (62.539)	Top-5 acc 86.328 (82.884)	lr 0.01230
Train [61][1990/3239]	Time 2.586 (0.596)	Data Time 0.001 (0.021)	Loss 2.6248 (2.5555)	Entropy 1.08762 (1.08910)	Top-1 acc 60.938 (62.536)	Top-5 acc 81.250 (82.887)	lr 0.01230
Train [61][2000/3239]	Time 0.223 (0.594)	Data Time 0.001 (0.021)	Loss 2.5272 (2.5555)	Entropy 1.08757 (1.08909)	Top-1 acc 60.156 (62.535)	Top-5 acc 84.766 (82.886)	lr 0.01230
Train [61][2010/3239]	Time 0.225 (0.594)	Data Time 0.001 (0.021)	Loss 2.6198 (2.5557)	Entropy 1.08758 (1.08908)	Top-1 acc 62.891 (62.527)	Top-5 acc 82.422 (82.886)	lr 0.01230
Train [61][2020/3239]	Time 0.218 (0.593)	Data Time 0.001 (0.021)	Loss 2.6834 (2.5555)	Entropy 1.08753 (1.08908)	Top-1 acc 60.547 (62.531)	Top-5 acc 79.688 (82.890)	lr 0.01230
Train [61][2030/3239]	Time 0.214 (0.592)	Data Time 0.001 (0.021)	Loss 2.3756 (2.5555)	Entropy 1.08748 (1.08907)	Top-1 acc 66.016 (62.535)	Top-5 acc 86.328 (82.890)	lr 0.01229
Train [61][2040/3239]	Time 0.225 (0.592)	Data Time 0.001 (0.020)	Loss 2.4814 (2.5555)	Entropy 1.08745 (1.08906)	Top-1 acc 63.672 (62.534)	Top-5 acc 85.156 (82.891)	lr 0.01229
Train [61][2050/3239]	Time 0.245 (0.591)	Data Time 0.001 (0.020)	Loss 2.6227 (2.5558)	Entropy 1.08746 (1.08905)	Top-1 acc 62.109 (62.527)	Top-5 acc 81.250 (82.885)	lr 0.01229
Train [61][2060/3239]	Time 0.227 (0.615)	Data Time 0.002 (0.020)	Loss 2.7079 (2.5559)	Entropy 1.08743 (1.08904)	Top-1 acc 62.109 (62.523)	Top-5 acc 80.078 (82.880)	lr 0.01229
Train [61][2070/3239]	Time 0.232 (0.614)	Data Time 0.002 (0.020)	Loss 2.6193 (2.5558)	Entropy 1.08743 (1.08904)	Top-1 acc 62.500 (62.528)	Top-5 acc 80.469 (82.881)	lr 0.01229
Train [61][2080/3239]	Time 0.381 (0.613)	Data Time 0.002 (0.020)	Loss 2.4706 (2.5558)	Entropy 1.08738 (1.08903)	Top-1 acc 64.844 (62.526)	Top-5 acc 85.156 (82.884)	lr 0.01229
Train [61][2090/3239]	Time 0.237 (0.613)	Data Time 0.001 (0.020)	Loss 2.6197 (2.5558)	Entropy 1.08736 (1.08902)	Top-1 acc 60.547 (62.519)	Top-5 acc 83.594 (82.888)	lr 0.01229
Train [61][2100/3239]	Time 2.525 (0.612)	Data Time 0.001 (0.020)	Loss 2.4246 (2.5556)	Entropy 1.08736 (1.08901)	Top-1 acc 62.891 (62.522)	Top-5 acc 85.938 (82.895)	lr 0.01229
Train [61][2110/3239]	Time 0.230 (0.610)	Data Time 0.001 (0.020)	Loss 2.5739 (2.5554)	Entropy 1.08727 (1.08901)	Top-1 acc 60.547 (62.523)	Top-5 acc 81.250 (82.899)	lr 0.01229
Train [61][2120/3239]	Time 0.218 (0.610)	Data Time 0.001 (0.020)	Loss 2.4503 (2.5554)	Entropy 1.08722 (1.08900)	Top-1 acc 64.062 (62.528)	Top-5 acc 85.547 (82.898)	lr 0.01229
Train [61][2130/3239]	Time 0.253 (0.609)	Data Time 0.001 (0.020)	Loss 2.5642 (2.5555)	Entropy 1.08716 (1.08899)	Top-1 acc 61.719 (62.529)	Top-5 acc 82.812 (82.895)	lr 0.01228
Train [61][2140/3239]	Time 0.193 (0.608)	Data Time 0.001 (0.020)	Loss 2.5932 (2.5555)	Entropy 1.08711 (1.08898)	Top-1 acc 65.234 (62.530)	Top-5 acc 80.859 (82.897)	lr 0.01228
Train [61][2150/3239]	Time 0.233 (0.608)	Data Time 0.001 (0.020)	Loss 2.5731 (2.5555)	Entropy 1.08712 (1.08897)	Top-1 acc 60.938 (62.531)	Top-5 acc 80.469 (82.899)	lr 0.01228
Train [61][2160/3239]	Time 0.229 (0.607)	Data Time 0.001 (0.019)	Loss 2.5292 (2.5558)	Entropy 1.08709 (1.08896)	Top-1 acc 60.547 (62.526)	Top-5 acc 85.938 (82.894)	lr 0.01228
Train [61][2170/3239]	Time 0.219 (0.606)	Data Time 0.001 (0.019)	Loss 2.3724 (2.5558)	Entropy 1.08722 (1.08895)	Top-1 acc 68.359 (62.526)	Top-5 acc 85.547 (82.896)	lr 0.01228
Train [61][2180/3239]	Time 0.216 (0.606)	Data Time 0.001 (0.019)	Loss 2.8825 (2.5560)	Entropy 1.08723 (1.08895)	Top-1 acc 54.297 (62.523)	Top-5 acc 75.781 (82.892)	lr 0.01228
Train [61][2190/3239]	Time 0.204 (0.605)	Data Time 0.001 (0.019)	Loss 2.5747 (2.5563)	Entropy 1.08727 (1.08894)	Top-1 acc 59.375 (62.516)	Top-5 acc 80.469 (82.886)	lr 0.01228
Train [61][2200/3239]	Time 0.221 (0.604)	Data Time 0.001 (0.019)	Loss 2.7057 (2.5567)	Entropy 1.08727 (1.08893)	Top-1 acc 59.375 (62.509)	Top-5 acc 80.078 (82.877)	lr 0.01228
Train [61][2210/3239]	Time 2.470 (0.604)	Data Time 0.001 (0.019)	Loss 2.5027 (2.5567)	Entropy 1.08727 (1.08892)	Top-1 acc 65.625 (62.510)	Top-5 acc 82.812 (82.875)	lr 0.01228
Train [61][2220/3239]	Time 0.234 (0.602)	Data Time 0.001 (0.019)	Loss 2.4408 (2.5568)	Entropy 1.08721 (1.08892)	Top-1 acc 69.531 (62.507)	Top-5 acc 83.984 (82.872)	lr 0.01228
Train [61][2230/3239]	Time 0.210 (0.601)	Data Time 0.001 (0.019)	Loss 2.6924 (2.5568)	Entropy 1.08712 (1.08891)	Top-1 acc 61.328 (62.503)	Top-5 acc 82.422 (82.875)	lr 0.01227
Train [61][2240/3239]	Time 0.230 (0.601)	Data Time 0.001 (0.019)	Loss 2.5350 (2.5568)	Entropy 1.08707 (1.08890)	Top-1 acc 66.016 (62.503)	Top-5 acc 83.594 (82.873)	lr 0.01227
Train [61][2250/3239]	Time 0.203 (0.600)	Data Time 0.001 (0.019)	Loss 2.4591 (2.5568)	Entropy 1.08694 (1.08889)	Top-1 acc 62.500 (62.501)	Top-5 acc 84.375 (82.874)	lr 0.01227
Train [61][2260/3239]	Time 0.229 (0.599)	Data Time 0.001 (0.019)	Loss 2.5976 (2.5570)	Entropy 1.08685 (1.08888)	Top-1 acc 58.203 (62.492)	Top-5 acc 81.641 (82.870)	lr 0.01227
Train [61][2270/3239]	Time 0.324 (0.599)	Data Time 0.002 (0.019)	Loss 2.6132 (2.5573)	Entropy 1.08684 (1.08887)	Top-1 acc 58.984 (62.482)	Top-5 acc 82.812 (82.865)	lr 0.01227
Train [61][2280/3239]	Time 0.214 (0.598)	Data Time 0.001 (0.019)	Loss 2.5731 (2.5572)	Entropy 1.08680 (1.08886)	Top-1 acc 60.547 (62.482)	Top-5 acc 83.203 (82.867)	lr 0.01227
Train [61][2290/3239]	Time 0.210 (0.598)	Data Time 0.001 (0.018)	Loss 2.6677 (2.5571)	Entropy 1.08679 (1.08885)	Top-1 acc 58.203 (62.484)	Top-5 acc 79.688 (82.867)	lr 0.01227
Train [61][2300/3239]	Time 0.229 (0.597)	Data Time 0.005 (0.018)	Loss 2.4140 (2.5570)	Entropy 1.08678 (1.08885)	Top-1 acc 65.234 (62.487)	Top-5 acc 82.422 (82.863)	lr 0.01227
Train [61][2310/3239]	Time 0.242 (0.597)	Data Time 0.001 (0.018)	Loss 2.5292 (2.5573)	Entropy 1.08679 (1.08884)	Top-1 acc 64.453 (62.479)	Top-5 acc 82.422 (82.858)	lr 0.01227
Train [61][2320/3239]	Time 2.516 (0.596)	Data Time 0.001 (0.018)	Loss 2.6781 (2.5573)	Entropy 1.08679 (1.08883)	Top-1 acc 58.203 (62.477)	Top-5 acc 79.297 (82.859)	lr 0.01227
Train [61][2330/3239]	Time 0.248 (0.595)	Data Time 0.002 (0.018)	Loss 2.7352 (2.5574)	Entropy 1.08679 (1.08882)	Top-1 acc 61.719 (62.478)	Top-5 acc 78.516 (82.859)	lr 0.01226
Train [61][2340/3239]	Time 0.193 (0.594)	Data Time 0.002 (0.018)	Loss 2.6085 (2.5574)	Entropy 1.08677 (1.08881)	Top-1 acc 62.891 (62.480)	Top-5 acc 78.906 (82.858)	lr 0.01226
Train [61][2350/3239]	Time 0.213 (0.593)	Data Time 0.001 (0.018)	Loss 2.6185 (2.5573)	Entropy 1.08677 (1.08880)	Top-1 acc 62.109 (62.478)	Top-5 acc 79.688 (82.856)	lr 0.01226
Train [61][2360/3239]	Time 0.284 (0.593)	Data Time 0.001 (0.018)	Loss 2.5518 (2.5573)	Entropy 1.08679 (1.08879)	Top-1 acc 63.281 (62.471)	Top-5 acc 83.203 (82.858)	lr 0.01226
Train [61][2370/3239]	Time 0.226 (0.592)	Data Time 0.001 (0.018)	Loss 2.5807 (2.5576)	Entropy 1.08677 (1.08878)	Top-1 acc 60.156 (62.462)	Top-5 acc 83.984 (82.858)	lr 0.01226
Train [61][2380/3239]	Time 0.235 (0.592)	Data Time 0.001 (0.018)	Loss 2.3652 (2.5574)	Entropy 1.08677 (1.08878)	Top-1 acc 66.016 (62.463)	Top-5 acc 84.766 (82.862)	lr 0.01226
Train [61][2390/3239]	Time 0.221 (0.591)	Data Time 0.001 (0.018)	Loss 2.6141 (2.5572)	Entropy 1.08678 (1.08877)	Top-1 acc 61.328 (62.468)	Top-5 acc 80.469 (82.865)	lr 0.01226
Train [61][2400/3239]	Time 0.224 (0.591)	Data Time 0.001 (0.018)	Loss 2.5934 (2.5571)	Entropy 1.08672 (1.08876)	Top-1 acc 59.375 (62.471)	Top-5 acc 83.203 (82.866)	lr 0.01226
Train [61][2410/3239]	Time 0.361 (0.590)	Data Time 0.003 (0.018)	Loss 2.6451 (2.5573)	Entropy 1.08673 (1.08875)	Top-1 acc 62.109 (62.466)	Top-5 acc 78.906 (82.860)	lr 0.01226
Train [61][2420/3239]	Time 0.341 (0.610)	Data Time 0.006 (0.018)	Loss 2.7267 (2.5575)	Entropy 1.08671 (1.08874)	Top-1 acc 57.812 (62.460)	Top-5 acc 77.734 (82.854)	lr 0.01226
Train [61][2430/3239]	Time 2.496 (0.610)	Data Time 0.002 (0.017)	Loss 2.4453 (2.5576)	Entropy 1.08671 (1.08873)	Top-1 acc 64.453 (62.457)	Top-5 acc 82.422 (82.853)	lr 0.01225
Train [61][2440/3239]	Time 0.241 (0.608)	Data Time 0.002 (0.017)	Loss 2.4835 (2.5575)	Entropy 1.08670 (1.08873)	Top-1 acc 64.062 (62.466)	Top-5 acc 83.984 (82.858)	lr 0.01225
Train [61][2450/3239]	Time 0.221 (0.608)	Data Time 0.001 (0.017)	Loss 2.5362 (2.5574)	Entropy 1.08671 (1.08872)	Top-1 acc 62.109 (62.465)	Top-5 acc 84.766 (82.860)	lr 0.01225
Train [61][2460/3239]	Time 0.228 (0.607)	Data Time 0.001 (0.017)	Loss 2.6640 (2.5576)	Entropy 1.08663 (1.08871)	Top-1 acc 59.375 (62.460)	Top-5 acc 80.469 (82.856)	lr 0.01225
Train [61][2470/3239]	Time 0.202 (0.607)	Data Time 0.001 (0.017)	Loss 2.6301 (2.5578)	Entropy 1.08664 (1.08870)	Top-1 acc 63.281 (62.454)	Top-5 acc 82.031 (82.853)	lr 0.01225
Train [61][2480/3239]	Time 0.234 (0.606)	Data Time 0.001 (0.017)	Loss 2.4659 (2.5576)	Entropy 1.08664 (1.08869)	Top-1 acc 65.234 (62.460)	Top-5 acc 83.984 (82.855)	lr 0.01225
Train [61][2490/3239]	Time 0.212 (0.606)	Data Time 0.001 (0.017)	Loss 2.7980 (2.5577)	Entropy 1.08662 (1.08868)	Top-1 acc 58.594 (62.459)	Top-5 acc 76.562 (82.852)	lr 0.01225
Train [61][2500/3239]	Time 0.222 (0.605)	Data Time 0.001 (0.017)	Loss 2.6649 (2.5577)	Entropy 1.08661 (1.08868)	Top-1 acc 57.812 (62.458)	Top-5 acc 79.297 (82.852)	lr 0.01225
Train [61][2510/3239]	Time 0.205 (0.604)	Data Time 0.002 (0.017)	Loss 2.4921 (2.5578)	Entropy 1.08657 (1.08867)	Top-1 acc 64.453 (62.460)	Top-5 acc 83.203 (82.845)	lr 0.01225
Train [61][2520/3239]	Time 0.228 (0.604)	Data Time 0.001 (0.017)	Loss 2.4652 (2.5579)	Entropy 1.08654 (1.08866)	Top-1 acc 64.453 (62.458)	Top-5 acc 83.984 (82.844)	lr 0.01225
Train [61][2530/3239]	Time 0.249 (0.603)	Data Time 0.001 (0.017)	Loss 2.5051 (2.5579)	Entropy 1.08650 (1.08865)	Top-1 acc 62.891 (62.454)	Top-5 acc 84.375 (82.843)	lr 0.01224
Train [61][2540/3239]	Time 2.517 (0.603)	Data Time 0.001 (0.017)	Loss 2.5551 (2.5579)	Entropy 1.08650 (1.08864)	Top-1 acc 65.625 (62.451)	Top-5 acc 79.688 (82.844)	lr 0.01224
Train [61][2550/3239]	Time 0.396 (0.602)	Data Time 0.002 (0.017)	Loss 2.4339 (2.5579)	Entropy 1.08649 (1.08863)	Top-1 acc 63.672 (62.451)	Top-5 acc 85.938 (82.845)	lr 0.01224
Train [61][2560/3239]	Time 0.248 (0.601)	Data Time 0.001 (0.017)	Loss 2.4910 (2.5578)	Entropy 1.08649 (1.08863)	Top-1 acc 65.234 (62.456)	Top-5 acc 83.594 (82.845)	lr 0.01224
Train [61][2570/3239]	Time 0.234 (0.601)	Data Time 0.001 (0.017)	Loss 2.5364 (2.5580)	Entropy 1.08644 (1.08862)	Top-1 acc 62.109 (62.449)	Top-5 acc 84.375 (82.841)	lr 0.01224
Train [61][2580/3239]	Time 0.232 (0.600)	Data Time 0.001 (0.017)	Loss 2.5569 (2.5582)	Entropy 1.08643 (1.08861)	Top-1 acc 57.812 (62.442)	Top-5 acc 82.422 (82.835)	lr 0.01224
Train [61][2590/3239]	Time 0.235 (0.600)	Data Time 0.001 (0.017)	Loss 2.5960 (2.5580)	Entropy 1.08644 (1.08860)	Top-1 acc 61.328 (62.449)	Top-5 acc 82.031 (82.838)	lr 0.01224
Train [61][2600/3239]	Time 0.204 (0.599)	Data Time 0.001 (0.016)	Loss 2.6407 (2.5580)	Entropy 1.08641 (1.08859)	Top-1 acc 59.375 (62.448)	Top-5 acc 81.250 (82.834)	lr 0.01224
Train [61][2610/3239]	Time 0.227 (0.599)	Data Time 0.001 (0.016)	Loss 2.6330 (2.5582)	Entropy 1.08643 (1.08858)	Top-1 acc 59.766 (62.442)	Top-5 acc 81.250 (82.830)	lr 0.01224
Train [61][2620/3239]	Time 0.225 (0.598)	Data Time 0.001 (0.016)	Loss 2.6205 (2.5583)	Entropy 1.08641 (1.08858)	Top-1 acc 60.938 (62.444)	Top-5 acc 80.469 (82.828)	lr 0.01224
Train [61][2630/3239]	Time 0.223 (0.598)	Data Time 0.001 (0.016)	Loss 2.5321 (2.5583)	Entropy 1.08641 (1.08857)	Top-1 acc 60.547 (62.439)	Top-5 acc 82.812 (82.826)	lr 0.01223
Train [61][2640/3239]	Time 0.245 (0.597)	Data Time 0.001 (0.016)	Loss 2.5861 (2.5582)	Entropy 1.08640 (1.08856)	Top-1 acc 60.156 (62.439)	Top-5 acc 80.859 (82.828)	lr 0.01223
Train [61][2650/3239]	Time 0.240 (0.597)	Data Time 0.001 (0.016)	Loss 2.4810 (2.5579)	Entropy 1.08635 (1.08855)	Top-1 acc 62.500 (62.444)	Top-5 acc 84.375 (82.833)	lr 0.01223
Train [61][2660/3239]	Time 0.230 (0.596)	Data Time 0.001 (0.016)	Loss 2.5450 (2.5577)	Entropy 1.08630 (1.08854)	Top-1 acc 63.281 (62.446)	Top-5 acc 83.203 (82.833)	lr 0.01223
Train [61][2670/3239]	Time 0.257 (0.596)	Data Time 0.001 (0.016)	Loss 2.4659 (2.5575)	Entropy 1.08625 (1.08853)	Top-1 acc 62.500 (62.446)	Top-5 acc 85.938 (82.838)	lr 0.01223
Train [61][2680/3239]	Time 0.214 (0.595)	Data Time 0.001 (0.016)	Loss 2.3712 (2.5574)	Entropy 1.08622 (1.08853)	Top-1 acc 67.188 (62.453)	Top-5 acc 87.891 (82.844)	lr 0.01223
Train [61][2690/3239]	Time 0.331 (0.595)	Data Time 0.001 (0.016)	Loss 2.7909 (2.5577)	Entropy 1.08619 (1.08852)	Top-1 acc 55.859 (62.446)	Top-5 acc 78.516 (82.838)	lr 0.01223
Train [61][2700/3239]	Time 0.272 (0.594)	Data Time 0.001 (0.016)	Loss 2.6153 (2.5581)	Entropy 1.08617 (1.08851)	Top-1 acc 62.109 (62.438)	Top-5 acc 82.031 (82.831)	lr 0.01223
Train [61][2710/3239]	Time 0.253 (0.594)	Data Time 0.002 (0.016)	Loss 2.5886 (2.5580)	Entropy 1.08615 (1.08850)	Top-1 acc 62.109 (62.437)	Top-5 acc 82.812 (82.833)	lr 0.01223
Train [61][2720/3239]	Time 0.293 (0.593)	Data Time 0.001 (0.016)	Loss 2.4086 (2.5581)	Entropy 1.08612 (1.08849)	Top-1 acc 67.188 (62.430)	Top-5 acc 87.109 (82.834)	lr 0.01223
Train [61][2730/3239]	Time 0.291 (0.593)	Data Time 0.001 (0.016)	Loss 2.6257 (2.5583)	Entropy 1.08612 (1.08848)	Top-1 acc 60.547 (62.423)	Top-5 acc 83.203 (82.829)	lr 0.01222
Train [61][2740/3239]	Time 0.225 (0.592)	Data Time 0.001 (0.016)	Loss 2.6883 (2.5585)	Entropy 1.08616 (1.08847)	Top-1 acc 60.156 (62.418)	Top-5 acc 78.516 (82.826)	lr 0.01222
Train [61][2750/3239]	Time 0.194 (0.592)	Data Time 0.001 (0.016)	Loss 2.4137 (2.5585)	Entropy 1.08618 (1.08847)	Top-1 acc 65.625 (62.419)	Top-5 acc 86.719 (82.824)	lr 0.01222
Train [61][2760/3239]	Time 0.243 (0.592)	Data Time 0.001 (0.016)	Loss 2.6428 (2.5587)	Entropy 1.08620 (1.08846)	Top-1 acc 59.766 (62.414)	Top-5 acc 82.422 (82.824)	lr 0.01222
Train [61][2770/3239]	Time 0.211 (0.610)	Data Time 0.003 (0.016)	Loss 2.2925 (2.5586)	Entropy 1.08616 (1.08845)	Top-1 acc 69.531 (62.418)	Top-5 acc 87.891 (82.824)	lr 0.01222
Train [61][2780/3239]	Time 0.339 (0.610)	Data Time 0.002 (0.015)	Loss 2.6988 (2.5586)	Entropy 1.08613 (1.08844)	Top-1 acc 58.594 (62.422)	Top-5 acc 81.641 (82.824)	lr 0.01222
Train [61][2790/3239]	Time 0.216 (0.609)	Data Time 0.001 (0.015)	Loss 2.5391 (2.5587)	Entropy 1.08613 (1.08843)	Top-1 acc 63.281 (62.418)	Top-5 acc 82.812 (82.822)	lr 0.01222
Train [61][2800/3239]	Time 0.298 (0.608)	Data Time 0.002 (0.015)	Loss 2.4989 (2.5587)	Entropy 1.08618 (1.08842)	Top-1 acc 63.281 (62.416)	Top-5 acc 83.594 (82.824)	lr 0.01222
Train [61][2810/3239]	Time 0.249 (0.608)	Data Time 0.001 (0.015)	Loss 2.6236 (2.5585)	Entropy 1.08617 (1.08842)	Top-1 acc 60.547 (62.418)	Top-5 acc 81.250 (82.829)	lr 0.01222
Train [61][2820/3239]	Time 0.252 (0.608)	Data Time 0.001 (0.015)	Loss 2.5698 (2.5585)	Entropy 1.08619 (1.08841)	Top-1 acc 62.500 (62.420)	Top-5 acc 82.812 (82.828)	lr 0.01222
Train [61][2830/3239]	Time 0.240 (0.607)	Data Time 0.001 (0.015)	Loss 2.5935 (2.5585)	Entropy 1.08615 (1.08840)	Top-1 acc 63.281 (62.426)	Top-5 acc 81.641 (82.829)	lr 0.01221
Train [61][2840/3239]	Time 0.290 (0.607)	Data Time 0.008 (0.015)	Loss 2.6349 (2.5585)	Entropy 1.08611 (1.08839)	Top-1 acc 61.328 (62.426)	Top-5 acc 81.641 (82.825)	lr 0.01221
Train [61][2850/3239]	Time 0.267 (0.606)	Data Time 0.001 (0.015)	Loss 2.6762 (2.5586)	Entropy 1.08609 (1.08838)	Top-1 acc 59.766 (62.425)	Top-5 acc 78.516 (82.822)	lr 0.01221
Train [61][2860/3239]	Time 0.212 (0.606)	Data Time 0.001 (0.015)	Loss 2.3575 (2.5585)	Entropy 1.08604 (1.08838)	Top-1 acc 67.188 (62.427)	Top-5 acc 87.891 (82.823)	lr 0.01221
Train [61][2870/3239]	Time 0.358 (0.605)	Data Time 0.001 (0.015)	Loss 2.5484 (2.5587)	Entropy 1.08607 (1.08837)	Top-1 acc 62.500 (62.422)	Top-5 acc 83.203 (82.820)	lr 0.01221
Train [61][2880/3239]	Time 0.210 (0.605)	Data Time 0.001 (0.015)	Loss 3.1518 (2.5588)	Entropy 1.08606 (1.08836)	Top-1 acc 50.000 (62.416)	Top-5 acc 73.438 (82.819)	lr 0.01221
Train [61][2890/3239]	Time 0.265 (0.604)	Data Time 0.001 (0.015)	Loss 2.5757 (2.5588)	Entropy 1.08601 (1.08835)	Top-1 acc 63.281 (62.414)	Top-5 acc 79.297 (82.820)	lr 0.01221
Train [61][2900/3239]	Time 0.287 (0.604)	Data Time 0.001 (0.015)	Loss 2.6490 (2.5588)	Entropy 1.08596 (1.08834)	Top-1 acc 59.766 (62.415)	Top-5 acc 80.078 (82.820)	lr 0.01221
Train [61][2910/3239]	Time 0.290 (0.603)	Data Time 0.001 (0.015)	Loss 2.4524 (2.5589)	Entropy 1.08600 (1.08834)	Top-1 acc 65.625 (62.410)	Top-5 acc 84.766 (82.818)	lr 0.01221
Train [61][2920/3239]	Time 0.243 (0.603)	Data Time 0.001 (0.015)	Loss 2.3526 (2.5588)	Entropy 1.08595 (1.08833)	Top-1 acc 70.312 (62.415)	Top-5 acc 87.109 (82.823)	lr 0.01221
Train [61][2930/3239]	Time 0.279 (0.602)	Data Time 0.001 (0.015)	Loss 2.7162 (2.5589)	Entropy 1.08581 (1.08832)	Top-1 acc 57.422 (62.409)	Top-5 acc 79.297 (82.821)	lr 0.01220
Train [61][2940/3239]	Time 0.207 (0.602)	Data Time 0.001 (0.015)	Loss 2.5655 (2.5589)	Entropy 1.08580 (1.08831)	Top-1 acc 62.891 (62.412)	Top-5 acc 82.031 (82.822)	lr 0.01220
Train [61][2950/3239]	Time 0.265 (0.602)	Data Time 0.001 (0.015)	Loss 2.5427 (2.5589)	Entropy 1.08580 (1.08830)	Top-1 acc 60.938 (62.415)	Top-5 acc 81.250 (82.823)	lr 0.01220
Train [61][2960/3239]	Time 0.339 (0.601)	Data Time 0.001 (0.015)	Loss 2.6972 (2.5590)	Entropy 1.08581 (1.08829)	Top-1 acc 61.719 (62.413)	Top-5 acc 80.078 (82.821)	lr 0.01220
Train [61][2970/3239]	Time 0.262 (0.601)	Data Time 0.002 (0.015)	Loss 2.4436 (2.5589)	Entropy 1.08582 (1.08829)	Top-1 acc 64.844 (62.419)	Top-5 acc 83.984 (82.823)	lr 0.01220
Train [61][2980/3239]	Time 0.239 (0.600)	Data Time 0.001 (0.015)	Loss 2.6594 (2.5591)	Entropy 1.08572 (1.08828)	Top-1 acc 62.500 (62.418)	Top-5 acc 82.031 (82.819)	lr 0.01220
Train [61][2990/3239]	Time 0.251 (0.600)	Data Time 0.001 (0.015)	Loss 2.6993 (2.5592)	Entropy 1.08565 (1.08827)	Top-1 acc 60.156 (62.417)	Top-5 acc 80.859 (82.816)	lr 0.01220
Train [61][3000/3239]	Time 0.288 (0.599)	Data Time 0.002 (0.014)	Loss 2.5619 (2.5593)	Entropy 1.08557 (1.08826)	Top-1 acc 63.672 (62.412)	Top-5 acc 80.859 (82.816)	lr 0.01220
Train [61][3010/3239]	Time 0.382 (0.599)	Data Time 0.001 (0.014)	Loss 2.4904 (2.5593)	Entropy 1.08555 (1.08825)	Top-1 acc 61.719 (62.408)	Top-5 acc 83.203 (82.815)	lr 0.01220
Train [61][3020/3239]	Time 0.255 (0.598)	Data Time 0.001 (0.014)	Loss 2.6602 (2.5593)	Entropy 1.08553 (1.08824)	Top-1 acc 55.859 (62.405)	Top-5 acc 80.859 (82.815)	lr 0.01220
Train [61][3030/3239]	Time 0.247 (0.598)	Data Time 0.001 (0.014)	Loss 2.4345 (2.5591)	Entropy 1.08554 (1.08823)	Top-1 acc 67.188 (62.411)	Top-5 acc 84.766 (82.818)	lr 0.01219
Train [61][3040/3239]	Time 0.229 (0.597)	Data Time 0.001 (0.014)	Loss 2.6735 (2.5592)	Entropy 1.08550 (1.08823)	Top-1 acc 60.156 (62.411)	Top-5 acc 79.688 (82.816)	lr 0.01219
Train [61][3050/3239]	Time 0.291 (0.597)	Data Time 0.002 (0.014)	Loss 2.3960 (2.5592)	Entropy 1.08551 (1.08822)	Top-1 acc 67.188 (62.412)	Top-5 acc 86.719 (82.816)	lr 0.01219
Train [61][3060/3239]	Time 0.219 (0.597)	Data Time 0.001 (0.014)	Loss 2.5088 (2.5592)	Entropy 1.08553 (1.08821)	Top-1 acc 64.844 (62.414)	Top-5 acc 83.594 (82.812)	lr 0.01219
Train [61][3070/3239]	Time 0.220 (0.596)	Data Time 0.001 (0.014)	Loss 2.4505 (2.5593)	Entropy 1.08546 (1.08820)	Top-1 acc 62.891 (62.410)	Top-5 acc 87.891 (82.811)	lr 0.01219
Train [61][3080/3239]	Time 0.232 (0.596)	Data Time 0.001 (0.014)	Loss 2.4237 (2.5594)	Entropy 1.08543 (1.08819)	Top-1 acc 66.406 (62.410)	Top-5 acc 83.594 (82.808)	lr 0.01219
Train [61][3090/3239]	Time 0.258 (0.595)	Data Time 0.001 (0.014)	Loss 2.5780 (2.5592)	Entropy 1.08544 (1.08818)	Top-1 acc 58.594 (62.414)	Top-5 acc 82.031 (82.810)	lr 0.01219
Train [61][3100/3239]	Time 0.316 (0.612)	Data Time 0.003 (0.014)	Loss 2.4783 (2.5591)	Entropy 1.08542 (1.08817)	Top-1 acc 60.547 (62.415)	Top-5 acc 83.594 (82.810)	lr 0.01219
Train [61][3110/3239]	Time 0.270 (0.612)	Data Time 0.002 (0.014)	Loss 2.5867 (2.5593)	Entropy 1.08540 (1.08816)	Top-1 acc 66.016 (62.410)	Top-5 acc 82.812 (82.806)	lr 0.01219
Train [61][3120/3239]	Time 0.236 (0.611)	Data Time 0.002 (0.014)	Loss 2.5548 (2.5593)	Entropy 1.08530 (1.08815)	Top-1 acc 60.547 (62.409)	Top-5 acc 82.812 (82.805)	lr 0.01218
Train [61][3130/3239]	Time 0.257 (0.611)	Data Time 0.002 (0.014)	Loss 2.4799 (2.5592)	Entropy 1.08527 (1.08815)	Top-1 acc 61.719 (62.409)	Top-5 acc 83.984 (82.806)	lr 0.01218
Train [61][3140/3239]	Time 0.261 (0.610)	Data Time 0.001 (0.014)	Loss 2.5323 (2.5592)	Entropy 1.08529 (1.08814)	Top-1 acc 61.719 (62.410)	Top-5 acc 82.812 (82.808)	lr 0.01218
Train [61][3150/3239]	Time 0.221 (0.610)	Data Time 0.001 (0.014)	Loss 2.6543 (2.5591)	Entropy 1.08528 (1.08813)	Top-1 acc 58.203 (62.412)	Top-5 acc 81.641 (82.808)	lr 0.01218
Train [61][3160/3239]	Time 0.265 (0.610)	Data Time 0.001 (0.014)	Loss 2.5119 (2.5591)	Entropy 1.08512 (1.08812)	Top-1 acc 61.719 (62.412)	Top-5 acc 86.328 (82.809)	lr 0.01218
Train [61][3170/3239]	Time 0.214 (0.609)	Data Time 0.001 (0.014)	Loss 2.6663 (2.5590)	Entropy 1.08511 (1.08811)	Top-1 acc 59.375 (62.415)	Top-5 acc 82.031 (82.811)	lr 0.01218
Train [61][3180/3239]	Time 0.232 (0.609)	Data Time 0.000 (0.014)	Loss 2.6409 (2.5591)	Entropy 1.08505 (1.08810)	Top-1 acc 60.156 (62.412)	Top-5 acc 80.469 (82.808)	lr 0.01218
Train [61][3190/3239]	Time 0.329 (0.608)	Data Time 0.000 (0.014)	Loss 2.5296 (2.5591)	Entropy 1.08504 (1.08809)	Top-1 acc 63.672 (62.414)	Top-5 acc 83.984 (82.809)	lr 0.01218
Train [61][3200/3239]	Time 0.224 (0.608)	Data Time 0.000 (0.014)	Loss 2.5406 (2.5593)	Entropy 1.08497 (1.08808)	Top-1 acc 63.281 (62.412)	Top-5 acc 82.422 (82.805)	lr 0.01218
Train [61][3210/3239]	Time 0.242 (0.607)	Data Time 0.000 (0.014)	Loss 2.6621 (2.5593)	Entropy 1.08490 (1.08807)	Top-1 acc 59.766 (62.412)	Top-5 acc 79.297 (82.803)	lr 0.01218
Train [61][3220/3239]	Time 0.207 (0.607)	Data Time 0.000 (0.014)	Loss 2.6269 (2.5594)	Entropy 1.08486 (1.08806)	Top-1 acc 59.766 (62.409)	Top-5 acc 81.641 (82.802)	lr 0.01217
Train [61][3230/3239]	Time 0.227 (0.606)	Data Time 0.000 (0.014)	Loss 2.6010 (2.5595)	Entropy 1.08490 (1.08805)	Top-1 acc 62.109 (62.407)	Top-5 acc 83.984 (82.800)	lr 0.01217
Train [61][3239/3239]	Time 2.351 (0.606)	Data Time 0.000 (0.014)	Loss 3.0639 (2.5596)	Entropy 1.08490 (1.08804)	Top-1 acc 51.852 (62.406)	Top-5 acc 75.309 (82.798)	lr 0.01217
==========Valid [61/120]	loss 1.451	top-1 acc 67.220 (67.220)	top-5 acc 86.680	Train top-1 62.406	top-5 82.798	Entropy 1.08490	Latency-None: 0.000ms	Flops: 546.53M
Train [62][0/3239]	Time 37.088 (37.088)	Data Time 35.844 (35.844)	Loss 2.6306 (2.6306)	Entropy 1.08484 (1.08484)	Top-1 acc 64.844 (64.844)	Top-5 acc 82.031 (82.031)	lr 0.01217
Train [62][10/3239]	Time 2.619 (4.067)	Data Time 0.031 (3.451)	Loss 2.6070 (2.4897)	Entropy 1.08484 (1.08484)	Top-1 acc 60.547 (64.950)	Top-5 acc 80.469 (83.487)	lr 0.01217
Train [62][20/3239]	Time 0.227 (2.247)	Data Time 0.001 (1.809)	Loss 2.5882 (2.4976)	Entropy 1.08480 (1.08482)	Top-1 acc 62.109 (63.709)	Top-5 acc 81.641 (83.798)	lr 0.01217
Train [62][30/3239]	Time 0.221 (1.673)	Data Time 0.002 (1.227)	Loss 2.5417 (2.4884)	Entropy 1.08479 (1.08481)	Top-1 acc 63.672 (63.861)	Top-5 acc 83.203 (84.022)	lr 0.01217
Train [62][40/3239]	Time 0.234 (1.379)	Data Time 0.001 (0.928)	Loss 2.3985 (2.5033)	Entropy 1.08479 (1.08481)	Top-1 acc 67.188 (63.748)	Top-5 acc 83.984 (83.784)	lr 0.01217
Train [62][50/3239]	Time 0.237 (1.203)	Data Time 0.001 (0.746)	Loss 2.4653 (2.5098)	Entropy 1.08474 (1.08480)	Top-1 acc 66.406 (63.611)	Top-5 acc 85.938 (83.586)	lr 0.01217
Train [62][60/3239]	Time 0.243 (1.081)	Data Time 0.001 (0.624)	Loss 2.4769 (2.5176)	Entropy 1.08466 (1.08478)	Top-1 acc 61.328 (63.358)	Top-5 acc 85.547 (83.382)	lr 0.01217
Train [62][70/3239]	Time 0.211 (0.993)	Data Time 0.001 (0.537)	Loss 2.6737 (2.5272)	Entropy 1.08445 (1.08475)	Top-1 acc 60.547 (63.177)	Top-5 acc 84.766 (83.313)	lr 0.01217
Train [62][80/3239]	Time 0.203 (0.925)	Data Time 0.001 (0.471)	Loss 2.5628 (2.5287)	Entropy 1.08443 (1.08471)	Top-1 acc 64.844 (63.190)	Top-5 acc 83.594 (83.329)	lr 0.01216
Train [62][90/3239]	Time 0.252 (0.875)	Data Time 0.001 (0.419)	Loss 2.4971 (2.5245)	Entropy 1.08440 (1.08468)	Top-1 acc 62.891 (63.225)	Top-5 acc 83.203 (83.401)	lr 0.01216
Train [62][100/3239]	Time 0.217 (0.835)	Data Time 0.001 (0.378)	Loss 2.5792 (2.5251)	Entropy 1.08439 (1.08465)	Top-1 acc 61.719 (63.215)	Top-5 acc 82.812 (83.381)	lr 0.01216
Train [62][110/3239]	Time 0.233 (0.804)	Data Time 0.001 (0.344)	Loss 2.3745 (2.5175)	Entropy 1.08441 (1.08463)	Top-1 acc 66.406 (63.461)	Top-5 acc 85.547 (83.464)	lr 0.01216
Train [62][120/3239]	Time 2.544 (0.778)	Data Time 0.001 (0.316)	Loss 2.6426 (2.5185)	Entropy 1.08441 (1.08461)	Top-1 acc 61.719 (63.540)	Top-5 acc 79.688 (83.497)	lr 0.01216
Train [62][130/3239]	Time 0.197 (0.737)	Data Time 0.001 (0.292)	Loss 2.3234 (2.5174)	Entropy 1.08441 (1.08459)	Top-1 acc 66.406 (63.606)	Top-5 acc 86.719 (83.546)	lr 0.01216
Train [62][140/3239]	Time 0.271 (0.718)	Data Time 0.002 (0.271)	Loss 2.4373 (2.5159)	Entropy 1.08442 (1.08458)	Top-1 acc 68.750 (63.664)	Top-5 acc 83.984 (83.605)	lr 0.01216
Train [62][150/3239]	Time 0.226 (0.704)	Data Time 0.001 (0.254)	Loss 2.7486 (2.5158)	Entropy 1.08439 (1.08457)	Top-1 acc 58.984 (63.625)	Top-5 acc 77.734 (83.612)	lr 0.01216
Train [62][160/3239]	Time 0.237 (0.690)	Data Time 0.001 (0.238)	Loss 2.5694 (2.5172)	Entropy 1.08437 (1.08456)	Top-1 acc 58.203 (63.538)	Top-5 acc 82.031 (83.565)	lr 0.01216
Train [62][170/3239]	Time 0.235 (0.676)	Data Time 0.001 (0.224)	Loss 2.4042 (2.5222)	Entropy 1.08433 (1.08455)	Top-1 acc 64.062 (63.432)	Top-5 acc 87.500 (83.477)	lr 0.01216
Train [62][180/3239]	Time 0.221 (0.664)	Data Time 0.002 (0.212)	Loss 2.3622 (2.5225)	Entropy 1.08419 (1.08453)	Top-1 acc 66.406 (63.398)	Top-5 acc 87.500 (83.447)	lr 0.01215
Train [62][190/3239]	Time 0.224 (0.654)	Data Time 0.001 (0.201)	Loss 2.5416 (2.5216)	Entropy 1.08418 (1.08451)	Top-1 acc 62.891 (63.433)	Top-5 acc 82.812 (83.465)	lr 0.01215
Train [62][200/3239]	Time 0.232 (0.645)	Data Time 0.001 (0.191)	Loss 2.5229 (2.5254)	Entropy 1.08422 (1.08450)	Top-1 acc 64.062 (63.299)	Top-5 acc 83.984 (83.394)	lr 0.01215
Train [62][210/3239]	Time 0.292 (0.860)	Data Time 0.003 (0.182)	Loss 2.4758 (2.5264)	Entropy 1.08423 (1.08448)	Top-1 acc 64.844 (63.285)	Top-5 acc 86.719 (83.398)	lr 0.01215
Train [62][220/3239]	Time 0.230 (0.851)	Data Time 0.002 (0.174)	Loss 2.6607 (2.5258)	Entropy 1.08422 (1.08447)	Top-1 acc 60.156 (63.294)	Top-5 acc 79.297 (83.408)	lr 0.01215
Train [62][230/3239]	Time 2.435 (0.833)	Data Time 0.002 (0.167)	Loss 2.6240 (2.5276)	Entropy 1.08422 (1.08446)	Top-1 acc 60.938 (63.239)	Top-5 acc 82.031 (83.350)	lr 0.01215
Train [62][240/3239]	Time 0.242 (0.809)	Data Time 0.002 (0.160)	Loss 2.4931 (2.5263)	Entropy 1.08419 (1.08445)	Top-1 acc 62.109 (63.263)	Top-5 acc 83.984 (83.368)	lr 0.01215
Train [62][250/3239]	Time 0.237 (0.795)	Data Time 0.001 (0.153)	Loss 2.5118 (2.5255)	Entropy 1.08410 (1.08444)	Top-1 acc 63.672 (63.272)	Top-5 acc 82.812 (83.382)	lr 0.01215
Train [62][260/3239]	Time 0.223 (0.783)	Data Time 0.001 (0.148)	Loss 2.5225 (2.5243)	Entropy 1.08408 (1.08442)	Top-1 acc 62.891 (63.325)	Top-5 acc 83.594 (83.405)	lr 0.01215
Train [62][270/3239]	Time 0.211 (0.771)	Data Time 0.001 (0.142)	Loss 2.5427 (2.5241)	Entropy 1.08377 (1.08440)	Top-1 acc 59.766 (63.329)	Top-5 acc 84.375 (83.405)	lr 0.01215
Train [62][280/3239]	Time 0.358 (0.761)	Data Time 0.002 (0.137)	Loss 2.6907 (2.5266)	Entropy 1.08376 (1.08438)	Top-1 acc 60.938 (63.256)	Top-5 acc 82.031 (83.370)	lr 0.01214
Train [62][290/3239]	Time 0.237 (0.751)	Data Time 0.001 (0.133)	Loss 2.5328 (2.5274)	Entropy 1.08378 (1.08436)	Top-1 acc 62.500 (63.225)	Top-5 acc 83.984 (83.328)	lr 0.01214
Train [62][300/3239]	Time 0.221 (0.742)	Data Time 0.001 (0.128)	Loss 2.4974 (2.5273)	Entropy 1.08377 (1.08434)	Top-1 acc 62.109 (63.190)	Top-5 acc 84.375 (83.326)	lr 0.01214
Train [62][310/3239]	Time 0.227 (0.734)	Data Time 0.001 (0.124)	Loss 2.4805 (2.5274)	Entropy 1.08372 (1.08432)	Top-1 acc 63.281 (63.168)	Top-5 acc 85.938 (83.335)	lr 0.01214
Train [62][320/3239]	Time 0.217 (0.726)	Data Time 0.001 (0.120)	Loss 2.5444 (2.5274)	Entropy 1.08371 (1.08430)	Top-1 acc 60.938 (63.190)	Top-5 acc 82.812 (83.326)	lr 0.01214
Train [62][330/3239]	Time 0.213 (0.718)	Data Time 0.001 (0.117)	Loss 2.5331 (2.5266)	Entropy 1.08370 (1.08428)	Top-1 acc 63.281 (63.202)	Top-5 acc 85.156 (83.357)	lr 0.01214
Train [62][340/3239]	Time 2.533 (0.711)	Data Time 0.001 (0.113)	Loss 2.5711 (2.5268)	Entropy 1.08370 (1.08427)	Top-1 acc 60.547 (63.153)	Top-5 acc 80.078 (83.343)	lr 0.01214
Train [62][350/3239]	Time 0.222 (0.697)	Data Time 0.001 (0.110)	Loss 2.6025 (2.5285)	Entropy 1.08367 (1.08425)	Top-1 acc 61.719 (63.132)	Top-5 acc 82.031 (83.298)	lr 0.01214
Train [62][360/3239]	Time 0.221 (0.691)	Data Time 0.001 (0.107)	Loss 2.6227 (2.5297)	Entropy 1.08358 (1.08423)	Top-1 acc 60.547 (63.093)	Top-5 acc 83.203 (83.297)	lr 0.01214
Train [62][370/3239]	Time 0.332 (0.685)	Data Time 0.002 (0.104)	Loss 2.4733 (2.5296)	Entropy 1.08356 (1.08421)	Top-1 acc 64.844 (63.095)	Top-5 acc 83.203 (83.293)	lr 0.01214
Train [62][380/3239]	Time 0.234 (0.680)	Data Time 0.001 (0.102)	Loss 2.5558 (2.5308)	Entropy 1.08349 (1.08419)	Top-1 acc 61.719 (63.054)	Top-5 acc 82.812 (83.253)	lr 0.01213
Train [62][390/3239]	Time 0.271 (0.675)	Data Time 0.001 (0.099)	Loss 2.3189 (2.5315)	Entropy 1.08350 (1.08418)	Top-1 acc 69.922 (63.032)	Top-5 acc 85.547 (83.219)	lr 0.01213
Train [62][400/3239]	Time 0.222 (0.670)	Data Time 0.001 (0.097)	Loss 2.5773 (2.5315)	Entropy 1.08348 (1.08416)	Top-1 acc 62.891 (63.039)	Top-5 acc 81.250 (83.205)	lr 0.01213
Train [62][410/3239]	Time 0.216 (0.665)	Data Time 0.001 (0.094)	Loss 2.5143 (2.5315)	Entropy 1.08343 (1.08414)	Top-1 acc 62.109 (63.043)	Top-5 acc 87.109 (83.204)	lr 0.01213
Train [62][420/3239]	Time 0.227 (0.661)	Data Time 0.001 (0.092)	Loss 2.5771 (2.5327)	Entropy 1.08336 (1.08412)	Top-1 acc 63.281 (63.029)	Top-5 acc 80.859 (83.184)	lr 0.01213
Train [62][430/3239]	Time 0.237 (0.656)	Data Time 0.001 (0.090)	Loss 2.4241 (2.5319)	Entropy 1.08336 (1.08411)	Top-1 acc 67.188 (63.047)	Top-5 acc 86.328 (83.211)	lr 0.01213
Train [62][440/3239]	Time 0.280 (0.652)	Data Time 0.001 (0.088)	Loss 2.3938 (2.5320)	Entropy 1.08333 (1.08409)	Top-1 acc 68.750 (63.050)	Top-5 acc 87.500 (83.211)	lr 0.01213
Train [62][450/3239]	Time 2.502 (0.648)	Data Time 0.001 (0.086)	Loss 2.5712 (2.5318)	Entropy 1.08333 (1.08407)	Top-1 acc 64.062 (63.040)	Top-5 acc 82.031 (83.223)	lr 0.01213
Train [62][460/3239]	Time 0.235 (0.639)	Data Time 0.001 (0.084)	Loss 2.4111 (2.5321)	Entropy 1.08329 (1.08406)	Top-1 acc 66.016 (63.037)	Top-5 acc 85.547 (83.218)	lr 0.01213
Train [62][470/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.083)	Loss 2.4492 (2.5331)	Entropy 1.08321 (1.08404)	Top-1 acc 67.188 (63.013)	Top-5 acc 86.328 (83.201)	lr 0.01213
Train [62][480/3239]	Time 0.248 (0.632)	Data Time 0.001 (0.081)	Loss 2.4634 (2.5325)	Entropy 1.08320 (1.08402)	Top-1 acc 62.891 (63.028)	Top-5 acc 83.984 (83.217)	lr 0.01212
Train [62][490/3239]	Time 0.226 (0.628)	Data Time 0.001 (0.079)	Loss 2.5412 (2.5326)	Entropy 1.08318 (1.08400)	Top-1 acc 62.891 (63.028)	Top-5 acc 83.594 (83.228)	lr 0.01212
Train [62][500/3239]	Time 0.227 (0.625)	Data Time 0.001 (0.078)	Loss 2.5684 (2.5320)	Entropy 1.08318 (1.08399)	Top-1 acc 64.062 (63.050)	Top-5 acc 81.641 (83.232)	lr 0.01212
Train [62][510/3239]	Time 0.218 (0.622)	Data Time 0.001 (0.076)	Loss 2.4596 (2.5330)	Entropy 1.08314 (1.08397)	Top-1 acc 64.062 (63.010)	Top-5 acc 84.375 (83.215)	lr 0.01212
Train [62][520/3239]	Time 0.214 (0.619)	Data Time 0.001 (0.075)	Loss 2.5863 (2.5330)	Entropy 1.08309 (1.08395)	Top-1 acc 62.109 (62.999)	Top-5 acc 81.250 (83.217)	lr 0.01212
Train [62][530/3239]	Time 0.232 (0.616)	Data Time 0.001 (0.073)	Loss 2.5484 (2.5331)	Entropy 1.08304 (1.08394)	Top-1 acc 64.453 (63.013)	Top-5 acc 84.375 (83.216)	lr 0.01212
Train [62][540/3239]	Time 0.199 (0.613)	Data Time 0.001 (0.072)	Loss 2.3474 (2.5331)	Entropy 1.08303 (1.08392)	Top-1 acc 71.094 (63.039)	Top-5 acc 87.109 (83.209)	lr 0.01212
Train [62][550/3239]	Time 0.418 (0.611)	Data Time 0.001 (0.071)	Loss 2.7413 (2.5328)	Entropy 1.08301 (1.08391)	Top-1 acc 60.156 (63.048)	Top-5 acc 79.297 (83.222)	lr 0.01212
Train [62][560/3239]	Time 2.629 (0.609)	Data Time 0.002 (0.070)	Loss 2.5298 (2.5334)	Entropy 1.08301 (1.08389)	Top-1 acc 63.672 (63.034)	Top-5 acc 81.641 (83.215)	lr 0.01212
Train [62][570/3239]	Time 0.281 (0.603)	Data Time 0.002 (0.068)	Loss 2.7678 (2.5341)	Entropy 1.08301 (1.08387)	Top-1 acc 61.719 (63.029)	Top-5 acc 76.953 (83.204)	lr 0.01212
Train [62][580/3239]	Time 0.363 (0.700)	Data Time 0.002 (0.067)	Loss 2.4766 (2.5343)	Entropy 1.08303 (1.08386)	Top-1 acc 66.016 (63.006)	Top-5 acc 83.203 (83.200)	lr 0.01211
Train [62][590/3239]	Time 0.224 (0.696)	Data Time 0.002 (0.066)	Loss 2.6184 (2.5336)	Entropy 1.08293 (1.08384)	Top-1 acc 64.844 (63.033)	Top-5 acc 83.594 (83.213)	lr 0.01211
Train [62][600/3239]	Time 0.223 (0.693)	Data Time 0.002 (0.065)	Loss 2.5928 (2.5341)	Entropy 1.08292 (1.08383)	Top-1 acc 63.672 (63.010)	Top-5 acc 81.250 (83.208)	lr 0.01211
Train [62][610/3239]	Time 0.234 (0.689)	Data Time 0.001 (0.064)	Loss 2.5027 (2.5349)	Entropy 1.08287 (1.08381)	Top-1 acc 62.500 (63.001)	Top-5 acc 85.156 (83.196)	lr 0.01211
Train [62][620/3239]	Time 0.217 (0.686)	Data Time 0.001 (0.063)	Loss 2.5521 (2.5355)	Entropy 1.08282 (1.08380)	Top-1 acc 62.109 (62.989)	Top-5 acc 86.328 (83.200)	lr 0.01211
Train [62][630/3239]	Time 0.226 (0.682)	Data Time 0.001 (0.062)	Loss 2.5137 (2.5357)	Entropy 1.08279 (1.08378)	Top-1 acc 62.500 (62.969)	Top-5 acc 82.422 (83.192)	lr 0.01211
Train [62][640/3239]	Time 0.216 (0.679)	Data Time 0.001 (0.061)	Loss 2.5957 (2.5367)	Entropy 1.08285 (1.08377)	Top-1 acc 60.547 (62.963)	Top-5 acc 82.031 (83.178)	lr 0.01211
Train [62][650/3239]	Time 0.221 (0.675)	Data Time 0.001 (0.060)	Loss 2.5391 (2.5372)	Entropy 1.08285 (1.08375)	Top-1 acc 62.891 (62.946)	Top-5 acc 84.375 (83.173)	lr 0.01211
Train [62][660/3239]	Time 0.235 (0.672)	Data Time 0.002 (0.059)	Loss 2.8152 (2.5379)	Entropy 1.08283 (1.08374)	Top-1 acc 58.594 (62.931)	Top-5 acc 76.953 (83.157)	lr 0.01211
Train [62][670/3239]	Time 2.598 (0.669)	Data Time 0.001 (0.058)	Loss 2.5512 (2.5382)	Entropy 1.08283 (1.08373)	Top-1 acc 66.016 (62.936)	Top-5 acc 82.812 (83.150)	lr 0.01211
Train [62][680/3239]	Time 0.219 (0.663)	Data Time 0.002 (0.058)	Loss 2.5247 (2.5383)	Entropy 1.08283 (1.08371)	Top-1 acc 60.547 (62.937)	Top-5 acc 83.203 (83.151)	lr 0.01210
Train [62][690/3239]	Time 0.341 (0.660)	Data Time 0.001 (0.057)	Loss 2.6417 (2.5382)	Entropy 1.08281 (1.08370)	Top-1 acc 62.500 (62.943)	Top-5 acc 81.250 (83.144)	lr 0.01210
Train [62][700/3239]	Time 0.224 (0.657)	Data Time 0.001 (0.056)	Loss 2.6660 (2.5389)	Entropy 1.08280 (1.08369)	Top-1 acc 59.766 (62.934)	Top-5 acc 82.422 (83.135)	lr 0.01210
Train [62][710/3239]	Time 0.233 (0.655)	Data Time 0.001 (0.055)	Loss 2.6692 (2.5390)	Entropy 1.08277 (1.08367)	Top-1 acc 57.031 (62.938)	Top-5 acc 80.859 (83.133)	lr 0.01210
Train [62][720/3239]	Time 0.246 (0.652)	Data Time 0.001 (0.055)	Loss 2.5996 (2.5389)	Entropy 1.08268 (1.08366)	Top-1 acc 64.453 (62.943)	Top-5 acc 81.641 (83.138)	lr 0.01210
Train [62][730/3239]	Time 0.243 (0.650)	Data Time 0.002 (0.054)	Loss 2.4358 (2.5388)	Entropy 1.08273 (1.08365)	Top-1 acc 66.406 (62.928)	Top-5 acc 83.594 (83.129)	lr 0.01210
Train [62][740/3239]	Time 0.218 (0.648)	Data Time 0.001 (0.053)	Loss 2.6306 (2.5394)	Entropy 1.08273 (1.08363)	Top-1 acc 62.109 (62.913)	Top-5 acc 78.125 (83.115)	lr 0.01210
Train [62][750/3239]	Time 0.241 (0.645)	Data Time 0.001 (0.052)	Loss 2.4320 (2.5397)	Entropy 1.08271 (1.08362)	Top-1 acc 64.453 (62.913)	Top-5 acc 85.938 (83.105)	lr 0.01210
Train [62][760/3239]	Time 0.228 (0.643)	Data Time 0.001 (0.052)	Loss 2.2771 (2.5393)	Entropy 1.08269 (1.08361)	Top-1 acc 67.578 (62.912)	Top-5 acc 86.328 (83.114)	lr 0.01210
Train [62][770/3239]	Time 0.221 (0.641)	Data Time 0.001 (0.051)	Loss 2.4420 (2.5392)	Entropy 1.08287 (1.08360)	Top-1 acc 67.578 (62.918)	Top-5 acc 83.594 (83.111)	lr 0.01210
Train [62][780/3239]	Time 2.592 (0.639)	Data Time 0.001 (0.050)	Loss 2.7304 (2.5394)	Entropy 1.08287 (1.08359)	Top-1 acc 61.719 (62.919)	Top-5 acc 76.562 (83.098)	lr 0.01209
Train [62][790/3239]	Time 0.269 (0.634)	Data Time 0.002 (0.050)	Loss 2.4986 (2.5399)	Entropy 1.08280 (1.08358)	Top-1 acc 65.234 (62.900)	Top-5 acc 82.422 (83.085)	lr 0.01209
Train [62][800/3239]	Time 0.218 (0.632)	Data Time 0.001 (0.049)	Loss 2.5601 (2.5402)	Entropy 1.08269 (1.08357)	Top-1 acc 63.672 (62.904)	Top-5 acc 82.812 (83.078)	lr 0.01209
Train [62][810/3239]	Time 0.171 (0.629)	Data Time 0.001 (0.049)	Loss 2.6139 (2.5413)	Entropy 1.08269 (1.08356)	Top-1 acc 56.250 (62.860)	Top-5 acc 81.641 (83.064)	lr 0.01209
Train [62][820/3239]	Time 0.244 (0.627)	Data Time 0.001 (0.048)	Loss 2.3836 (2.5412)	Entropy 1.08267 (1.08355)	Top-1 acc 65.625 (62.870)	Top-5 acc 86.328 (83.064)	lr 0.01209
Train [62][830/3239]	Time 0.320 (0.625)	Data Time 0.001 (0.048)	Loss 2.5428 (2.5416)	Entropy 1.08267 (1.08354)	Top-1 acc 64.453 (62.856)	Top-5 acc 82.812 (83.058)	lr 0.01209
Train [62][840/3239]	Time 0.212 (0.623)	Data Time 0.001 (0.047)	Loss 2.5228 (2.5411)	Entropy 1.08266 (1.08353)	Top-1 acc 62.109 (62.861)	Top-5 acc 84.766 (83.062)	lr 0.01209
Train [62][850/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.046)	Loss 2.4546 (2.5405)	Entropy 1.08260 (1.08352)	Top-1 acc 63.281 (62.867)	Top-5 acc 82.812 (83.081)	lr 0.01209
Train [62][860/3239]	Time 0.214 (0.619)	Data Time 0.001 (0.046)	Loss 2.5330 (2.5407)	Entropy 1.08258 (1.08350)	Top-1 acc 65.625 (62.872)	Top-5 acc 81.250 (83.072)	lr 0.01209
Train [62][870/3239]	Time 0.257 (0.618)	Data Time 0.001 (0.045)	Loss 2.4811 (2.5412)	Entropy 1.08254 (1.08349)	Top-1 acc 64.453 (62.863)	Top-5 acc 84.375 (83.056)	lr 0.01208
Train [62][880/3239]	Time 0.253 (0.616)	Data Time 0.001 (0.045)	Loss 2.4808 (2.5415)	Entropy 1.08252 (1.08348)	Top-1 acc 61.719 (62.871)	Top-5 acc 85.547 (83.044)	lr 0.01208
Train [62][890/3239]	Time 2.543 (0.614)	Data Time 0.001 (0.044)	Loss 2.6142 (2.5411)	Entropy 1.08252 (1.08347)	Top-1 acc 59.766 (62.869)	Top-5 acc 81.250 (83.055)	lr 0.01208
Train [62][900/3239]	Time 0.235 (0.610)	Data Time 0.001 (0.044)	Loss 2.5463 (2.5407)	Entropy 1.08255 (1.08346)	Top-1 acc 62.891 (62.871)	Top-5 acc 84.766 (83.065)	lr 0.01208
Train [62][910/3239]	Time 0.227 (0.609)	Data Time 0.001 (0.043)	Loss 2.5814 (2.5403)	Entropy 1.08256 (1.08345)	Top-1 acc 59.766 (62.879)	Top-5 acc 83.594 (83.068)	lr 0.01208
Train [62][920/3239]	Time 0.251 (0.607)	Data Time 0.002 (0.043)	Loss 2.5928 (2.5405)	Entropy 1.08255 (1.08344)	Top-1 acc 60.938 (62.886)	Top-5 acc 81.641 (83.062)	lr 0.01208
Train [62][930/3239]	Time 0.227 (0.606)	Data Time 0.001 (0.043)	Loss 2.4880 (2.5402)	Entropy 1.08250 (1.08343)	Top-1 acc 65.234 (62.889)	Top-5 acc 82.812 (83.060)	lr 0.01208
Train [62][940/3239]	Time 0.401 (0.657)	Data Time 0.002 (0.042)	Loss 2.6901 (2.5404)	Entropy 1.08236 (1.08342)	Top-1 acc 62.891 (62.884)	Top-5 acc 80.078 (83.061)	lr 0.01208
Train [62][950/3239]	Time 0.232 (0.655)	Data Time 0.002 (0.042)	Loss 2.4387 (2.5399)	Entropy 1.08235 (1.08341)	Top-1 acc 69.922 (62.906)	Top-5 acc 83.984 (83.070)	lr 0.01208
Train [62][960/3239]	Time 0.231 (0.654)	Data Time 0.002 (0.041)	Loss 2.6367 (2.5412)	Entropy 1.08228 (1.08340)	Top-1 acc 58.984 (62.873)	Top-5 acc 80.469 (83.045)	lr 0.01208
Train [62][970/3239]	Time 0.312 (0.652)	Data Time 0.001 (0.041)	Loss 2.5857 (2.5415)	Entropy 1.08226 (1.08339)	Top-1 acc 60.547 (62.862)	Top-5 acc 82.812 (83.041)	lr 0.01207
Train [62][980/3239]	Time 0.242 (0.650)	Data Time 0.001 (0.041)	Loss 2.4503 (2.5414)	Entropy 1.08220 (1.08338)	Top-1 acc 67.188 (62.868)	Top-5 acc 84.766 (83.032)	lr 0.01207
Train [62][990/3239]	Time 0.276 (0.648)	Data Time 0.001 (0.040)	Loss 2.4404 (2.5421)	Entropy 1.08220 (1.08336)	Top-1 acc 65.625 (62.846)	Top-5 acc 84.375 (83.024)	lr 0.01207
Train [62][1000/3239]	Time 2.554 (0.646)	Data Time 0.002 (0.040)	Loss 2.5215 (2.5419)	Entropy 1.08220 (1.08335)	Top-1 acc 63.281 (62.856)	Top-5 acc 83.594 (83.033)	lr 0.01207
Train [62][1010/3239]	Time 0.216 (0.642)	Data Time 0.001 (0.039)	Loss 2.5036 (2.5414)	Entropy 1.08217 (1.08334)	Top-1 acc 60.547 (62.855)	Top-5 acc 85.156 (83.054)	lr 0.01207
Train [62][1020/3239]	Time 0.255 (0.641)	Data Time 0.001 (0.039)	Loss 2.5860 (2.5413)	Entropy 1.08213 (1.08333)	Top-1 acc 63.672 (62.868)	Top-5 acc 83.203 (83.058)	lr 0.01207
Train [62][1030/3239]	Time 0.239 (0.639)	Data Time 0.002 (0.039)	Loss 2.6870 (2.5420)	Entropy 1.08211 (1.08332)	Top-1 acc 58.984 (62.857)	Top-5 acc 80.469 (83.049)	lr 0.01207
Train [62][1040/3239]	Time 0.222 (0.637)	Data Time 0.001 (0.038)	Loss 2.4602 (2.5416)	Entropy 1.08211 (1.08331)	Top-1 acc 66.016 (62.859)	Top-5 acc 84.766 (83.060)	lr 0.01207
Train [62][1050/3239]	Time 0.239 (0.636)	Data Time 0.001 (0.038)	Loss 2.7117 (2.5416)	Entropy 1.08209 (1.08329)	Top-1 acc 58.984 (62.863)	Top-5 acc 80.859 (83.061)	lr 0.01207
Train [62][1060/3239]	Time 0.330 (0.634)	Data Time 0.001 (0.038)	Loss 2.4258 (2.5413)	Entropy 1.08208 (1.08328)	Top-1 acc 65.234 (62.871)	Top-5 acc 83.594 (83.067)	lr 0.01207
Train [62][1070/3239]	Time 0.224 (0.632)	Data Time 0.001 (0.037)	Loss 2.4710 (2.5411)	Entropy 1.08204 (1.08327)	Top-1 acc 62.891 (62.882)	Top-5 acc 85.547 (83.070)	lr 0.01206
Train [62][1080/3239]	Time 0.223 (0.631)	Data Time 0.001 (0.037)	Loss 2.4359 (2.5409)	Entropy 1.08209 (1.08326)	Top-1 acc 63.672 (62.884)	Top-5 acc 84.375 (83.072)	lr 0.01206
Train [62][1090/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.037)	Loss 2.5724 (2.5408)	Entropy 1.08210 (1.08325)	Top-1 acc 59.375 (62.885)	Top-5 acc 78.906 (83.070)	lr 0.01206
Train [62][1100/3239]	Time 0.245 (0.628)	Data Time 0.001 (0.036)	Loss 2.5823 (2.5412)	Entropy 1.08205 (1.08324)	Top-1 acc 60.938 (62.869)	Top-5 acc 82.422 (83.063)	lr 0.01206
Train [62][1110/3239]	Time 2.688 (0.626)	Data Time 0.001 (0.036)	Loss 2.4831 (2.5410)	Entropy 1.08205 (1.08323)	Top-1 acc 64.062 (62.874)	Top-5 acc 85.547 (83.072)	lr 0.01206
Train [62][1120/3239]	Time 0.252 (0.623)	Data Time 0.002 (0.036)	Loss 2.3506 (2.5408)	Entropy 1.08203 (1.08322)	Top-1 acc 66.406 (62.877)	Top-5 acc 85.938 (83.080)	lr 0.01206
Train [62][1130/3239]	Time 0.219 (0.622)	Data Time 0.001 (0.035)	Loss 2.6410 (2.5411)	Entropy 1.08199 (1.08321)	Top-1 acc 59.375 (62.871)	Top-5 acc 82.031 (83.069)	lr 0.01206
Train [62][1140/3239]	Time 0.223 (0.620)	Data Time 0.001 (0.035)	Loss 2.6783 (2.5411)	Entropy 1.08193 (1.08320)	Top-1 acc 60.547 (62.868)	Top-5 acc 82.031 (83.070)	lr 0.01206
Train [62][1150/3239]	Time 0.330 (0.619)	Data Time 0.001 (0.035)	Loss 2.4501 (2.5407)	Entropy 1.08190 (1.08318)	Top-1 acc 65.625 (62.877)	Top-5 acc 85.547 (83.079)	lr 0.01206
Train [62][1160/3239]	Time 0.225 (0.617)	Data Time 0.001 (0.035)	Loss 2.5634 (2.5407)	Entropy 1.08190 (1.08317)	Top-1 acc 60.938 (62.876)	Top-5 acc 84.375 (83.078)	lr 0.01206
Train [62][1170/3239]	Time 0.221 (0.616)	Data Time 0.001 (0.034)	Loss 2.5530 (2.5404)	Entropy 1.08183 (1.08316)	Top-1 acc 64.062 (62.876)	Top-5 acc 84.766 (83.081)	lr 0.01205
Train [62][1180/3239]	Time 0.205 (0.615)	Data Time 0.001 (0.034)	Loss 2.3559 (2.5401)	Entropy 1.08185 (1.08315)	Top-1 acc 66.406 (62.884)	Top-5 acc 86.719 (83.087)	lr 0.01205
Train [62][1190/3239]	Time 0.225 (0.614)	Data Time 0.001 (0.034)	Loss 2.4119 (2.5407)	Entropy 1.08180 (1.08314)	Top-1 acc 64.453 (62.866)	Top-5 acc 87.500 (83.080)	lr 0.01205
Train [62][1200/3239]	Time 0.245 (0.612)	Data Time 0.001 (0.033)	Loss 2.3172 (2.5400)	Entropy 1.08174 (1.08313)	Top-1 acc 66.016 (62.879)	Top-5 acc 88.672 (83.089)	lr 0.01205
Train [62][1210/3239]	Time 0.264 (0.611)	Data Time 0.001 (0.033)	Loss 2.5222 (2.5398)	Entropy 1.08168 (1.08312)	Top-1 acc 63.281 (62.885)	Top-5 acc 83.594 (83.093)	lr 0.01205
Train [62][1220/3239]	Time 2.485 (0.610)	Data Time 0.001 (0.033)	Loss 2.5993 (2.5395)	Entropy 1.08168 (1.08311)	Top-1 acc 59.375 (62.886)	Top-5 acc 80.859 (83.097)	lr 0.01205
Train [62][1230/3239]	Time 0.268 (0.607)	Data Time 0.001 (0.033)	Loss 2.4604 (2.5399)	Entropy 1.08167 (1.08309)	Top-1 acc 67.578 (62.875)	Top-5 acc 85.547 (83.092)	lr 0.01205
Train [62][1240/3239]	Time 0.259 (0.606)	Data Time 0.001 (0.032)	Loss 2.4704 (2.5398)	Entropy 1.08156 (1.08308)	Top-1 acc 66.406 (62.880)	Top-5 acc 85.156 (83.091)	lr 0.01205
Train [62][1250/3239]	Time 0.227 (0.605)	Data Time 0.001 (0.032)	Loss 2.6088 (2.5401)	Entropy 1.08158 (1.08307)	Top-1 acc 61.328 (62.873)	Top-5 acc 82.031 (83.086)	lr 0.01205
Train [62][1260/3239]	Time 0.235 (0.604)	Data Time 0.001 (0.032)	Loss 2.4233 (2.5398)	Entropy 1.08165 (1.08306)	Top-1 acc 63.672 (62.874)	Top-5 acc 84.375 (83.089)	lr 0.01205
Train [62][1270/3239]	Time 0.225 (0.603)	Data Time 0.001 (0.032)	Loss 2.5339 (2.5397)	Entropy 1.08161 (1.08305)	Top-1 acc 63.672 (62.880)	Top-5 acc 81.641 (83.088)	lr 0.01204
Train [62][1280/3239]	Time 0.236 (0.602)	Data Time 0.001 (0.031)	Loss 2.5138 (2.5400)	Entropy 1.08149 (1.08303)	Top-1 acc 62.891 (62.873)	Top-5 acc 83.203 (83.077)	lr 0.01204
Train [62][1290/3239]	Time 0.314 (0.601)	Data Time 0.001 (0.031)	Loss 2.5523 (2.5405)	Entropy 1.08157 (1.08302)	Top-1 acc 61.719 (62.862)	Top-5 acc 82.812 (83.072)	lr 0.01204
Train [62][1300/3239]	Time 0.224 (0.640)	Data Time 0.002 (0.031)	Loss 2.5806 (2.5406)	Entropy 1.08159 (1.08301)	Top-1 acc 62.500 (62.861)	Top-5 acc 82.422 (83.067)	lr 0.01204
Train [62][1310/3239]	Time 0.224 (0.639)	Data Time 0.002 (0.031)	Loss 2.4310 (2.5404)	Entropy 1.08151 (1.08300)	Top-1 acc 64.844 (62.866)	Top-5 acc 83.594 (83.073)	lr 0.01204
Train [62][1320/3239]	Time 0.252 (0.638)	Data Time 0.002 (0.031)	Loss 2.5337 (2.5405)	Entropy 1.08139 (1.08299)	Top-1 acc 64.062 (62.872)	Top-5 acc 82.812 (83.069)	lr 0.01204
Train [62][1330/3239]	Time 2.488 (0.637)	Data Time 0.002 (0.030)	Loss 2.5442 (2.5407)	Entropy 1.08139 (1.08298)	Top-1 acc 63.281 (62.865)	Top-5 acc 82.422 (83.065)	lr 0.01204
Train [62][1340/3239]	Time 0.244 (0.634)	Data Time 0.002 (0.030)	Loss 2.5894 (2.5409)	Entropy 1.08131 (1.08297)	Top-1 acc 63.672 (62.863)	Top-5 acc 82.031 (83.060)	lr 0.01204
Train [62][1350/3239]	Time 0.269 (0.633)	Data Time 0.002 (0.030)	Loss 2.4294 (2.5409)	Entropy 1.08127 (1.08295)	Top-1 acc 62.109 (62.859)	Top-5 acc 86.719 (83.060)	lr 0.01204
Train [62][1360/3239]	Time 0.235 (0.632)	Data Time 0.001 (0.030)	Loss 2.5232 (2.5410)	Entropy 1.08129 (1.08294)	Top-1 acc 64.453 (62.856)	Top-5 acc 84.766 (83.056)	lr 0.01204
Train [62][1370/3239]	Time 0.232 (0.630)	Data Time 0.001 (0.029)	Loss 2.5353 (2.5408)	Entropy 1.08129 (1.08293)	Top-1 acc 63.672 (62.856)	Top-5 acc 84.766 (83.063)	lr 0.01203
Train [62][1380/3239]	Time 0.254 (0.629)	Data Time 0.002 (0.029)	Loss 2.6500 (2.5405)	Entropy 1.08122 (1.08292)	Top-1 acc 59.375 (62.860)	Top-5 acc 81.250 (83.067)	lr 0.01203
Train [62][1390/3239]	Time 0.219 (0.628)	Data Time 0.001 (0.029)	Loss 2.3525 (2.5404)	Entropy 1.08120 (1.08290)	Top-1 acc 66.016 (62.861)	Top-5 acc 87.891 (83.067)	lr 0.01203
Train [62][1400/3239]	Time 0.235 (0.627)	Data Time 0.001 (0.029)	Loss 2.4661 (2.5404)	Entropy 1.08119 (1.08289)	Top-1 acc 66.797 (62.864)	Top-5 acc 83.203 (83.064)	lr 0.01203
Train [62][1410/3239]	Time 0.223 (0.626)	Data Time 0.001 (0.029)	Loss 2.5096 (2.5399)	Entropy 1.08120 (1.08288)	Top-1 acc 62.109 (62.871)	Top-5 acc 83.203 (83.069)	lr 0.01203
Train [62][1420/3239]	Time 0.221 (0.625)	Data Time 0.001 (0.028)	Loss 2.3802 (2.5401)	Entropy 1.08116 (1.08287)	Top-1 acc 67.578 (62.872)	Top-5 acc 84.766 (83.065)	lr 0.01203
Train [62][1430/3239]	Time 0.306 (0.624)	Data Time 0.002 (0.028)	Loss 2.4493 (2.5400)	Entropy 1.08115 (1.08286)	Top-1 acc 64.453 (62.878)	Top-5 acc 85.547 (83.068)	lr 0.01203
Train [62][1440/3239]	Time 2.487 (0.622)	Data Time 0.001 (0.028)	Loss 2.6871 (2.5398)	Entropy 1.08115 (1.08284)	Top-1 acc 56.250 (62.880)	Top-5 acc 80.469 (83.076)	lr 0.01203
Train [62][1450/3239]	Time 0.280 (0.620)	Data Time 0.002 (0.028)	Loss 2.4998 (2.5397)	Entropy 1.08117 (1.08283)	Top-1 acc 63.281 (62.879)	Top-5 acc 85.156 (83.079)	lr 0.01203
Train [62][1460/3239]	Time 0.235 (0.619)	Data Time 0.001 (0.028)	Loss 2.5431 (2.5397)	Entropy 1.08112 (1.08282)	Top-1 acc 63.672 (62.877)	Top-5 acc 82.422 (83.070)	lr 0.01203
Train [62][1470/3239]	Time 0.229 (0.618)	Data Time 0.001 (0.028)	Loss 2.7220 (2.5398)	Entropy 1.08109 (1.08281)	Top-1 acc 59.766 (62.870)	Top-5 acc 78.125 (83.067)	lr 0.01202
Train [62][1480/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.027)	Loss 2.6198 (2.5400)	Entropy 1.08110 (1.08280)	Top-1 acc 62.109 (62.864)	Top-5 acc 81.250 (83.064)	lr 0.01202
Train [62][1490/3239]	Time 0.224 (0.616)	Data Time 0.001 (0.027)	Loss 2.6145 (2.5400)	Entropy 1.08108 (1.08279)	Top-1 acc 63.281 (62.864)	Top-5 acc 80.078 (83.066)	lr 0.01202
Train [62][1500/3239]	Time 0.222 (0.615)	Data Time 0.001 (0.027)	Loss 2.4425 (2.5400)	Entropy 1.08108 (1.08277)	Top-1 acc 68.359 (62.865)	Top-5 acc 85.156 (83.068)	lr 0.01202
Train [62][1510/3239]	Time 0.216 (0.614)	Data Time 0.001 (0.027)	Loss 2.5050 (2.5399)	Entropy 1.08107 (1.08276)	Top-1 acc 66.406 (62.877)	Top-5 acc 83.984 (83.071)	lr 0.01202
Train [62][1520/3239]	Time 0.311 (0.613)	Data Time 0.001 (0.027)	Loss 2.4496 (2.5401)	Entropy 1.08103 (1.08275)	Top-1 acc 63.672 (62.875)	Top-5 acc 84.375 (83.067)	lr 0.01202
Train [62][1530/3239]	Time 0.217 (0.612)	Data Time 0.001 (0.027)	Loss 2.4896 (2.5402)	Entropy 1.08097 (1.08274)	Top-1 acc 66.406 (62.874)	Top-5 acc 83.984 (83.064)	lr 0.01202
Train [62][1540/3239]	Time 0.265 (0.611)	Data Time 0.001 (0.026)	Loss 2.7279 (2.5401)	Entropy 1.08094 (1.08273)	Top-1 acc 55.859 (62.872)	Top-5 acc 78.906 (83.065)	lr 0.01202
Train [62][1550/3239]	Time 2.567 (0.610)	Data Time 0.001 (0.026)	Loss 2.3314 (2.5402)	Entropy 1.08094 (1.08272)	Top-1 acc 66.797 (62.869)	Top-5 acc 86.328 (83.063)	lr 0.01202
Train [62][1560/3239]	Time 0.251 (0.607)	Data Time 0.001 (0.026)	Loss 2.4863 (2.5403)	Entropy 1.08091 (1.08271)	Top-1 acc 66.406 (62.869)	Top-5 acc 84.766 (83.063)	lr 0.01202
Train [62][1570/3239]	Time 0.312 (0.607)	Data Time 0.001 (0.026)	Loss 2.4091 (2.5406)	Entropy 1.08075 (1.08269)	Top-1 acc 67.188 (62.867)	Top-5 acc 84.766 (83.056)	lr 0.01201
Train [62][1580/3239]	Time 0.220 (0.606)	Data Time 0.001 (0.026)	Loss 2.6165 (2.5409)	Entropy 1.08069 (1.08268)	Top-1 acc 62.500 (62.864)	Top-5 acc 79.297 (83.053)	lr 0.01201
Train [62][1590/3239]	Time 0.229 (0.605)	Data Time 0.001 (0.026)	Loss 2.4054 (2.5412)	Entropy 1.08066 (1.08267)	Top-1 acc 67.188 (62.862)	Top-5 acc 86.719 (83.051)	lr 0.01201
Train [62][1600/3239]	Time 0.253 (0.604)	Data Time 0.001 (0.025)	Loss 2.4838 (2.5410)	Entropy 1.08068 (1.08266)	Top-1 acc 63.281 (62.868)	Top-5 acc 83.203 (83.054)	lr 0.01201
Train [62][1610/3239]	Time 0.377 (0.603)	Data Time 0.001 (0.025)	Loss 2.5669 (2.5413)	Entropy 1.08055 (1.08264)	Top-1 acc 62.500 (62.859)	Top-5 acc 83.594 (83.053)	lr 0.01201
Train [62][1620/3239]	Time 0.238 (0.603)	Data Time 0.001 (0.025)	Loss 2.6576 (2.5418)	Entropy 1.08057 (1.08263)	Top-1 acc 56.641 (62.839)	Top-5 acc 80.469 (83.044)	lr 0.01201
Train [62][1630/3239]	Time 0.225 (0.602)	Data Time 0.001 (0.025)	Loss 2.6728 (2.5422)	Entropy 1.08049 (1.08262)	Top-1 acc 59.375 (62.835)	Top-5 acc 81.641 (83.037)	lr 0.01201
Train [62][1640/3239]	Time 0.210 (0.601)	Data Time 0.001 (0.025)	Loss 2.6655 (2.5423)	Entropy 1.08048 (1.08260)	Top-1 acc 62.109 (62.833)	Top-5 acc 78.906 (83.034)	lr 0.01201
Train [62][1650/3239]	Time 0.231 (0.600)	Data Time 0.001 (0.025)	Loss 2.4576 (2.5421)	Entropy 1.08047 (1.08259)	Top-1 acc 62.891 (62.838)	Top-5 acc 81.641 (83.034)	lr 0.01201
Train [62][1660/3239]	Time 55.913 (0.631)	Data Time 0.001 (0.025)	Loss 2.6482 (2.5421)	Entropy 1.08047 (1.08258)	Top-1 acc 60.547 (62.841)	Top-5 acc 79.688 (83.034)	lr 0.01201
Train [62][1670/3239]	Time 0.226 (0.629)	Data Time 0.002 (0.024)	Loss 2.6031 (2.5420)	Entropy 1.08069 (1.08257)	Top-1 acc 64.062 (62.846)	Top-5 acc 82.422 (83.032)	lr 0.01200
Train [62][1680/3239]	Time 0.237 (0.628)	Data Time 0.002 (0.024)	Loss 2.5873 (2.5422)	Entropy 1.08072 (1.08256)	Top-1 acc 61.328 (62.832)	Top-5 acc 82.422 (83.026)	lr 0.01200
Train [62][1690/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.024)	Loss 2.3304 (2.5420)	Entropy 1.08067 (1.08255)	Top-1 acc 68.750 (62.836)	Top-5 acc 89.062 (83.035)	lr 0.01200
Train [62][1700/3239]	Time 0.237 (0.626)	Data Time 0.002 (0.024)	Loss 2.5858 (2.5417)	Entropy 1.08065 (1.08253)	Top-1 acc 60.938 (62.841)	Top-5 acc 82.812 (83.042)	lr 0.01200
Train [62][1710/3239]	Time 0.310 (0.625)	Data Time 0.002 (0.024)	Loss 2.4075 (2.5422)	Entropy 1.08062 (1.08252)	Top-1 acc 68.359 (62.835)	Top-5 acc 85.156 (83.033)	lr 0.01200
Train [62][1720/3239]	Time 0.233 (0.625)	Data Time 0.002 (0.024)	Loss 2.6982 (2.5425)	Entropy 1.08058 (1.08251)	Top-1 acc 58.203 (62.832)	Top-5 acc 80.469 (83.029)	lr 0.01200
Train [62][1730/3239]	Time 0.234 (0.624)	Data Time 0.003 (0.024)	Loss 2.6087 (2.5427)	Entropy 1.08060 (1.08250)	Top-1 acc 64.453 (62.829)	Top-5 acc 82.422 (83.022)	lr 0.01200
Train [62][1740/3239]	Time 0.242 (0.623)	Data Time 0.002 (0.024)	Loss 2.6073 (2.5425)	Entropy 1.08056 (1.08249)	Top-1 acc 60.938 (62.831)	Top-5 acc 81.250 (83.027)	lr 0.01200
Train [62][1750/3239]	Time 0.216 (0.622)	Data Time 0.001 (0.023)	Loss 2.2701 (2.5423)	Entropy 1.08061 (1.08248)	Top-1 acc 71.094 (62.838)	Top-5 acc 86.719 (83.032)	lr 0.01200
Train [62][1760/3239]	Time 0.252 (0.621)	Data Time 0.001 (0.023)	Loss 2.6032 (2.5425)	Entropy 1.08059 (1.08247)	Top-1 acc 60.547 (62.829)	Top-5 acc 79.688 (83.024)	lr 0.01200
Train [62][1770/3239]	Time 2.474 (0.620)	Data Time 0.001 (0.023)	Loss 2.3954 (2.5425)	Entropy 1.08059 (1.08246)	Top-1 acc 66.016 (62.827)	Top-5 acc 87.109 (83.028)	lr 0.01199
Train [62][1780/3239]	Time 0.279 (0.618)	Data Time 0.002 (0.023)	Loss 2.3989 (2.5422)	Entropy 1.08052 (1.08245)	Top-1 acc 66.016 (62.832)	Top-5 acc 85.938 (83.033)	lr 0.01199
Train [62][1790/3239]	Time 0.224 (0.618)	Data Time 0.001 (0.023)	Loss 2.5812 (2.5421)	Entropy 1.08053 (1.08244)	Top-1 acc 62.109 (62.831)	Top-5 acc 82.031 (83.032)	lr 0.01199
Train [62][1800/3239]	Time 0.345 (0.617)	Data Time 0.001 (0.023)	Loss 2.4003 (2.5423)	Entropy 1.08054 (1.08243)	Top-1 acc 65.234 (62.823)	Top-5 acc 85.938 (83.029)	lr 0.01199
Train [62][1810/3239]	Time 0.250 (0.616)	Data Time 0.001 (0.023)	Loss 2.3681 (2.5423)	Entropy 1.08055 (1.08242)	Top-1 acc 64.844 (62.823)	Top-5 acc 85.547 (83.032)	lr 0.01199
Train [62][1820/3239]	Time 0.229 (0.615)	Data Time 0.001 (0.023)	Loss 2.4469 (2.5422)	Entropy 1.08051 (1.08241)	Top-1 acc 67.969 (62.828)	Top-5 acc 82.422 (83.032)	lr 0.01199
Train [62][1830/3239]	Time 0.239 (0.614)	Data Time 0.001 (0.022)	Loss 2.6253 (2.5426)	Entropy 1.08052 (1.08239)	Top-1 acc 60.938 (62.818)	Top-5 acc 80.469 (83.023)	lr 0.01199
Train [62][1840/3239]	Time 0.230 (0.614)	Data Time 0.001 (0.022)	Loss 2.5004 (2.5428)	Entropy 1.08051 (1.08238)	Top-1 acc 62.500 (62.813)	Top-5 acc 85.156 (83.019)	lr 0.01199
Train [62][1850/3239]	Time 0.324 (0.613)	Data Time 0.001 (0.022)	Loss 2.6728 (2.5432)	Entropy 1.08049 (1.08237)	Top-1 acc 58.203 (62.803)	Top-5 acc 79.688 (83.009)	lr 0.01199
Train [62][1860/3239]	Time 0.223 (0.612)	Data Time 0.001 (0.022)	Loss 2.5359 (2.5435)	Entropy 1.08049 (1.08236)	Top-1 acc 66.406 (62.799)	Top-5 acc 80.078 (83.003)	lr 0.01199
Train [62][1870/3239]	Time 0.312 (0.611)	Data Time 0.002 (0.022)	Loss 2.5459 (2.5437)	Entropy 1.08047 (1.08235)	Top-1 acc 62.500 (62.795)	Top-5 acc 82.812 (83.000)	lr 0.01198
Train [62][1880/3239]	Time 2.536 (0.611)	Data Time 0.002 (0.022)	Loss 2.6006 (2.5435)	Entropy 1.08047 (1.08234)	Top-1 acc 62.500 (62.804)	Top-5 acc 84.375 (83.005)	lr 0.01198
Train [62][1890/3239]	Time 0.220 (0.609)	Data Time 0.001 (0.022)	Loss 2.5637 (2.5435)	Entropy 1.08052 (1.08233)	Top-1 acc 63.672 (62.810)	Top-5 acc 80.078 (83.003)	lr 0.01198
Train [62][1900/3239]	Time 0.280 (0.608)	Data Time 0.001 (0.022)	Loss 2.3724 (2.5433)	Entropy 1.08049 (1.08233)	Top-1 acc 66.016 (62.811)	Top-5 acc 85.938 (83.009)	lr 0.01198
Train [62][1910/3239]	Time 0.234 (0.607)	Data Time 0.001 (0.022)	Loss 2.5573 (2.5432)	Entropy 1.08047 (1.08232)	Top-1 acc 60.938 (62.813)	Top-5 acc 82.812 (83.015)	lr 0.01198
Train [62][1920/3239]	Time 0.257 (0.607)	Data Time 0.002 (0.022)	Loss 2.5245 (2.5430)	Entropy 1.08050 (1.08231)	Top-1 acc 62.500 (62.818)	Top-5 acc 85.938 (83.019)	lr 0.01198
Train [62][1930/3239]	Time 0.266 (0.606)	Data Time 0.001 (0.021)	Loss 2.5972 (2.5431)	Entropy 1.08048 (1.08230)	Top-1 acc 63.281 (62.817)	Top-5 acc 80.469 (83.020)	lr 0.01198
Train [62][1940/3239]	Time 0.338 (0.605)	Data Time 0.001 (0.021)	Loss 2.5672 (2.5429)	Entropy 1.08034 (1.08229)	Top-1 acc 58.594 (62.818)	Top-5 acc 83.203 (83.025)	lr 0.01198
Train [62][1950/3239]	Time 0.215 (0.605)	Data Time 0.001 (0.021)	Loss 2.4948 (2.5429)	Entropy 1.08037 (1.08228)	Top-1 acc 64.453 (62.824)	Top-5 acc 85.547 (83.025)	lr 0.01198
Train [62][1960/3239]	Time 0.225 (0.604)	Data Time 0.001 (0.021)	Loss 2.3885 (2.5429)	Entropy 1.08035 (1.08227)	Top-1 acc 64.453 (62.821)	Top-5 acc 86.328 (83.028)	lr 0.01197
Train [62][1970/3239]	Time 0.217 (0.603)	Data Time 0.001 (0.021)	Loss 2.4383 (2.5430)	Entropy 1.08029 (1.08226)	Top-1 acc 64.844 (62.820)	Top-5 acc 85.156 (83.030)	lr 0.01197
Train [62][1980/3239]	Time 0.210 (0.603)	Data Time 0.005 (0.021)	Loss 2.4928 (2.5429)	Entropy 1.08032 (1.08225)	Top-1 acc 63.281 (62.817)	Top-5 acc 86.328 (83.035)	lr 0.01197
Train [62][1990/3239]	Time 2.413 (0.602)	Data Time 0.001 (0.021)	Loss 2.5775 (2.5430)	Entropy 1.08032 (1.08224)	Top-1 acc 59.766 (62.810)	Top-5 acc 82.031 (83.034)	lr 0.01197
Train [62][2000/3239]	Time 0.206 (0.600)	Data Time 0.001 (0.021)	Loss 2.4761 (2.5429)	Entropy 1.08030 (1.08223)	Top-1 acc 62.500 (62.811)	Top-5 acc 85.156 (83.036)	lr 0.01197
Train [62][2010/3239]	Time 0.239 (0.600)	Data Time 0.001 (0.021)	Loss 2.4912 (2.5428)	Entropy 1.08026 (1.08222)	Top-1 acc 65.234 (62.818)	Top-5 acc 83.594 (83.039)	lr 0.01197
Train [62][2020/3239]	Time 0.242 (0.599)	Data Time 0.001 (0.021)	Loss 2.5922 (2.5430)	Entropy 1.08024 (1.08221)	Top-1 acc 60.938 (62.815)	Top-5 acc 80.859 (83.034)	lr 0.01197
Train [62][2030/3239]	Time 0.462 (0.623)	Data Time 0.003 (0.020)	Loss 2.3962 (2.5432)	Entropy 1.08024 (1.08220)	Top-1 acc 69.141 (62.807)	Top-5 acc 86.719 (83.031)	lr 0.01197
Train [62][2040/3239]	Time 0.253 (0.622)	Data Time 0.002 (0.020)	Loss 2.6999 (2.5434)	Entropy 1.08021 (1.08219)	Top-1 acc 60.547 (62.806)	Top-5 acc 79.297 (83.027)	lr 0.01197
Train [62][2050/3239]	Time 0.234 (0.622)	Data Time 0.002 (0.020)	Loss 2.4800 (2.5434)	Entropy 1.08021 (1.08218)	Top-1 acc 64.062 (62.809)	Top-5 acc 85.547 (83.022)	lr 0.01197
Train [62][2060/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.020)	Loss 2.4208 (2.5434)	Entropy 1.08017 (1.08217)	Top-1 acc 65.625 (62.807)	Top-5 acc 85.938 (83.020)	lr 0.01196
Train [62][2070/3239]	Time 0.218 (0.620)	Data Time 0.001 (0.020)	Loss 2.5362 (2.5435)	Entropy 1.08013 (1.08216)	Top-1 acc 63.672 (62.805)	Top-5 acc 83.984 (83.019)	lr 0.01196
Train [62][2080/3239]	Time 0.219 (0.619)	Data Time 0.001 (0.020)	Loss 2.6248 (2.5436)	Entropy 1.08010 (1.08215)	Top-1 acc 62.109 (62.808)	Top-5 acc 79.297 (83.015)	lr 0.01196
Train [62][2090/3239]	Time 0.260 (0.619)	Data Time 0.002 (0.020)	Loss 2.4605 (2.5435)	Entropy 1.08002 (1.08214)	Top-1 acc 63.281 (62.806)	Top-5 acc 83.203 (83.018)	lr 0.01196
Train [62][2100/3239]	Time 2.409 (0.618)	Data Time 0.001 (0.020)	Loss 2.7238 (2.5436)	Entropy 1.08002 (1.08213)	Top-1 acc 53.906 (62.802)	Top-5 acc 80.469 (83.013)	lr 0.01196
Train [62][2110/3239]	Time 0.255 (0.616)	Data Time 0.001 (0.020)	Loss 2.5856 (2.5437)	Entropy 1.07995 (1.08212)	Top-1 acc 63.672 (62.802)	Top-5 acc 81.250 (83.009)	lr 0.01196
Train [62][2120/3239]	Time 0.227 (0.615)	Data Time 0.001 (0.020)	Loss 2.7002 (2.5435)	Entropy 1.07993 (1.08211)	Top-1 acc 59.375 (62.805)	Top-5 acc 83.594 (83.016)	lr 0.01196
Train [62][2130/3239]	Time 0.215 (0.615)	Data Time 0.001 (0.020)	Loss 2.7371 (2.5438)	Entropy 1.07993 (1.08210)	Top-1 acc 56.641 (62.794)	Top-5 acc 78.125 (83.010)	lr 0.01196
Train [62][2140/3239]	Time 0.239 (0.614)	Data Time 0.001 (0.019)	Loss 2.4936 (2.5438)	Entropy 1.07995 (1.08209)	Top-1 acc 60.938 (62.797)	Top-5 acc 82.812 (83.012)	lr 0.01196
Train [62][2150/3239]	Time 0.219 (0.613)	Data Time 0.001 (0.019)	Loss 2.7794 (2.5438)	Entropy 1.07993 (1.08208)	Top-1 acc 54.688 (62.797)	Top-5 acc 77.344 (83.010)	lr 0.01196
Train [62][2160/3239]	Time 0.232 (0.613)	Data Time 0.001 (0.019)	Loss 2.5666 (2.5438)	Entropy 1.07990 (1.08207)	Top-1 acc 65.625 (62.794)	Top-5 acc 82.031 (83.013)	lr 0.01195
Train [62][2170/3239]	Time 0.458 (0.612)	Data Time 0.001 (0.019)	Loss 2.5014 (2.5441)	Entropy 1.07989 (1.08206)	Top-1 acc 65.625 (62.789)	Top-5 acc 84.375 (83.007)	lr 0.01195
Train [62][2180/3239]	Time 0.230 (0.612)	Data Time 0.001 (0.019)	Loss 2.5293 (2.5441)	Entropy 1.07989 (1.08205)	Top-1 acc 62.891 (62.789)	Top-5 acc 85.156 (83.008)	lr 0.01195
Train [62][2190/3239]	Time 0.207 (0.611)	Data Time 0.001 (0.019)	Loss 2.5914 (2.5444)	Entropy 1.07988 (1.08204)	Top-1 acc 62.109 (62.784)	Top-5 acc 82.031 (83.005)	lr 0.01195
Train [62][2200/3239]	Time 0.284 (0.610)	Data Time 0.002 (0.019)	Loss 2.5003 (2.5443)	Entropy 1.07985 (1.08203)	Top-1 acc 64.453 (62.787)	Top-5 acc 81.250 (83.006)	lr 0.01195
Train [62][2210/3239]	Time 2.579 (0.610)	Data Time 0.001 (0.019)	Loss 2.4836 (2.5441)	Entropy 1.07985 (1.08202)	Top-1 acc 61.719 (62.793)	Top-5 acc 83.594 (83.008)	lr 0.01195
Train [62][2220/3239]	Time 0.241 (0.608)	Data Time 0.001 (0.019)	Loss 2.5705 (2.5439)	Entropy 1.07982 (1.08201)	Top-1 acc 64.844 (62.799)	Top-5 acc 82.031 (83.009)	lr 0.01195
Train [62][2230/3239]	Time 0.220 (0.607)	Data Time 0.001 (0.019)	Loss 2.5461 (2.5440)	Entropy 1.07977 (1.08200)	Top-1 acc 65.234 (62.796)	Top-5 acc 81.641 (83.007)	lr 0.01195
Train [62][2240/3239]	Time 0.231 (0.607)	Data Time 0.001 (0.019)	Loss 2.6146 (2.5440)	Entropy 1.07979 (1.08199)	Top-1 acc 58.984 (62.792)	Top-5 acc 83.984 (83.009)	lr 0.01195
Train [62][2250/3239]	Time 0.234 (0.606)	Data Time 0.001 (0.019)	Loss 2.6123 (2.5441)	Entropy 1.07969 (1.08198)	Top-1 acc 59.375 (62.791)	Top-5 acc 83.203 (83.008)	lr 0.01195
Train [62][2260/3239]	Time 0.319 (0.606)	Data Time 0.001 (0.019)	Loss 2.7015 (2.5441)	Entropy 1.07967 (1.08197)	Top-1 acc 62.891 (62.792)	Top-5 acc 82.422 (83.008)	lr 0.01194
Train [62][2270/3239]	Time 0.208 (0.605)	Data Time 0.001 (0.018)	Loss 2.4609 (2.5442)	Entropy 1.07962 (1.08196)	Top-1 acc 64.844 (62.788)	Top-5 acc 85.156 (83.005)	lr 0.01194
Train [62][2280/3239]	Time 0.226 (0.604)	Data Time 0.001 (0.018)	Loss 2.5789 (2.5441)	Entropy 1.07959 (1.08195)	Top-1 acc 62.500 (62.794)	Top-5 acc 83.203 (83.010)	lr 0.01194
Train [62][2290/3239]	Time 0.229 (0.604)	Data Time 0.001 (0.018)	Loss 2.6157 (2.5442)	Entropy 1.07959 (1.08194)	Top-1 acc 63.281 (62.794)	Top-5 acc 80.469 (83.009)	lr 0.01194
Train [62][2300/3239]	Time 0.220 (0.603)	Data Time 0.001 (0.018)	Loss 2.7097 (2.5441)	Entropy 1.07962 (1.08193)	Top-1 acc 61.328 (62.798)	Top-5 acc 80.859 (83.013)	lr 0.01194
Train [62][2310/3239]	Time 0.252 (0.602)	Data Time 0.001 (0.018)	Loss 2.5854 (2.5441)	Entropy 1.07963 (1.08192)	Top-1 acc 61.719 (62.795)	Top-5 acc 81.641 (83.010)	lr 0.01194
Train [62][2320/3239]	Time 2.507 (0.602)	Data Time 0.002 (0.018)	Loss 2.5856 (2.5444)	Entropy 1.07963 (1.08191)	Top-1 acc 58.203 (62.786)	Top-5 acc 82.422 (83.004)	lr 0.01194
Train [62][2330/3239]	Time 0.246 (0.600)	Data Time 0.001 (0.018)	Loss 2.5094 (2.5443)	Entropy 1.07964 (1.08190)	Top-1 acc 64.453 (62.792)	Top-5 acc 83.984 (83.007)	lr 0.01194
Train [62][2340/3239]	Time 0.220 (0.600)	Data Time 0.001 (0.018)	Loss 2.5384 (2.5444)	Entropy 1.07961 (1.08189)	Top-1 acc 62.109 (62.788)	Top-5 acc 80.859 (83.001)	lr 0.01194
Train [62][2350/3239]	Time 0.330 (0.599)	Data Time 0.001 (0.018)	Loss 2.4633 (2.5446)	Entropy 1.07962 (1.08188)	Top-1 acc 66.797 (62.781)	Top-5 acc 82.812 (83.000)	lr 0.01194
Train [62][2360/3239]	Time 0.249 (0.599)	Data Time 0.001 (0.018)	Loss 2.5143 (2.5448)	Entropy 1.07957 (1.08187)	Top-1 acc 61.328 (62.776)	Top-5 acc 83.594 (82.996)	lr 0.01193
Train [62][2370/3239]	Time 0.232 (0.598)	Data Time 0.001 (0.018)	Loss 2.4802 (2.5450)	Entropy 1.07960 (1.08186)	Top-1 acc 65.234 (62.772)	Top-5 acc 82.422 (82.992)	lr 0.01193
Train [62][2380/3239]	Time 0.239 (0.598)	Data Time 0.001 (0.018)	Loss 2.7124 (2.5451)	Entropy 1.07961 (1.08185)	Top-1 acc 58.203 (62.766)	Top-5 acc 79.297 (82.994)	lr 0.01193
Train [62][2390/3239]	Time 0.243 (0.619)	Data Time 0.003 (0.018)	Loss 2.4196 (2.5451)	Entropy 1.07961 (1.08184)	Top-1 acc 64.062 (62.767)	Top-5 acc 84.766 (82.994)	lr 0.01193
Train [62][2400/3239]	Time 0.395 (0.618)	Data Time 0.002 (0.018)	Loss 2.6303 (2.5454)	Entropy 1.07961 (1.08183)	Top-1 acc 59.766 (62.761)	Top-5 acc 81.641 (82.990)	lr 0.01193
Train [62][2410/3239]	Time 0.232 (0.618)	Data Time 0.002 (0.017)	Loss 2.7607 (2.5456)	Entropy 1.07950 (1.08182)	Top-1 acc 60.938 (62.753)	Top-5 acc 76.953 (82.986)	lr 0.01193
Train [62][2420/3239]	Time 0.254 (0.617)	Data Time 0.001 (0.017)	Loss 2.3566 (2.5456)	Entropy 1.07946 (1.08181)	Top-1 acc 69.141 (62.754)	Top-5 acc 89.062 (82.990)	lr 0.01193
Train [62][2430/3239]	Time 2.581 (0.617)	Data Time 0.002 (0.017)	Loss 2.4768 (2.5460)	Entropy 1.07946 (1.08180)	Top-1 acc 62.891 (62.743)	Top-5 acc 83.594 (82.980)	lr 0.01193
Train [62][2440/3239]	Time 0.285 (0.615)	Data Time 0.001 (0.017)	Loss 2.4700 (2.5460)	Entropy 1.07942 (1.08179)	Top-1 acc 64.062 (62.744)	Top-5 acc 82.812 (82.980)	lr 0.01193
Train [62][2450/3239]	Time 0.245 (0.615)	Data Time 0.001 (0.017)	Loss 2.5226 (2.5459)	Entropy 1.07938 (1.08178)	Top-1 acc 64.453 (62.748)	Top-5 acc 83.203 (82.983)	lr 0.01193
Train [62][2460/3239]	Time 0.226 (0.614)	Data Time 0.001 (0.017)	Loss 2.6907 (2.5459)	Entropy 1.07931 (1.08177)	Top-1 acc 58.203 (62.747)	Top-5 acc 80.078 (82.986)	lr 0.01192
Train [62][2470/3239]	Time 0.243 (0.614)	Data Time 0.001 (0.017)	Loss 2.6564 (2.5462)	Entropy 1.07932 (1.08176)	Top-1 acc 59.766 (62.739)	Top-5 acc 80.078 (82.981)	lr 0.01192
Train [62][2480/3239]	Time 0.242 (0.613)	Data Time 0.001 (0.017)	Loss 2.4655 (2.5459)	Entropy 1.07929 (1.08175)	Top-1 acc 63.672 (62.742)	Top-5 acc 84.375 (82.988)	lr 0.01192
Train [62][2490/3239]	Time 0.366 (0.613)	Data Time 0.001 (0.017)	Loss 2.2309 (2.5461)	Entropy 1.07929 (1.08174)	Top-1 acc 69.531 (62.738)	Top-5 acc 88.672 (82.983)	lr 0.01192
Train [62][2500/3239]	Time 0.226 (0.612)	Data Time 0.001 (0.017)	Loss 2.4114 (2.5460)	Entropy 1.07921 (1.08173)	Top-1 acc 67.578 (62.742)	Top-5 acc 86.719 (82.986)	lr 0.01192
Train [62][2510/3239]	Time 0.222 (0.612)	Data Time 0.001 (0.017)	Loss 2.6701 (2.5462)	Entropy 1.07917 (1.08172)	Top-1 acc 59.766 (62.737)	Top-5 acc 79.297 (82.981)	lr 0.01192
Train [62][2520/3239]	Time 0.252 (0.611)	Data Time 0.001 (0.017)	Loss 2.4042 (2.5463)	Entropy 1.07915 (1.08171)	Top-1 acc 67.188 (62.734)	Top-5 acc 84.375 (82.978)	lr 0.01192
Train [62][2530/3239]	Time 0.249 (0.611)	Data Time 0.001 (0.017)	Loss 2.4868 (2.5464)	Entropy 1.07905 (1.08170)	Top-1 acc 62.891 (62.728)	Top-5 acc 82.812 (82.976)	lr 0.01192
Train [62][2540/3239]	Time 2.616 (0.610)	Data Time 0.001 (0.017)	Loss 2.5579 (2.5467)	Entropy 1.07905 (1.08169)	Top-1 acc 60.938 (62.720)	Top-5 acc 85.547 (82.970)	lr 0.01192
Train [62][2550/3239]	Time 0.235 (0.609)	Data Time 0.001 (0.017)	Loss 2.4684 (2.5468)	Entropy 1.07904 (1.08168)	Top-1 acc 65.234 (62.717)	Top-5 acc 83.594 (82.967)	lr 0.01192
Train [62][2560/3239]	Time 0.237 (0.608)	Data Time 0.001 (0.017)	Loss 2.4836 (2.5470)	Entropy 1.07901 (1.08167)	Top-1 acc 64.844 (62.710)	Top-5 acc 83.984 (82.964)	lr 0.01191
Train [62][2570/3239]	Time 0.241 (0.608)	Data Time 0.001 (0.016)	Loss 2.6362 (2.5471)	Entropy 1.07899 (1.08166)	Top-1 acc 60.547 (62.709)	Top-5 acc 80.859 (82.962)	lr 0.01191
Train [62][2580/3239]	Time 0.236 (0.607)	Data Time 0.004 (0.016)	Loss 2.5306 (2.5470)	Entropy 1.07891 (1.08165)	Top-1 acc 60.156 (62.709)	Top-5 acc 82.812 (82.961)	lr 0.01191
Train [62][2590/3239]	Time 0.237 (0.607)	Data Time 0.001 (0.016)	Loss 2.5887 (2.5472)	Entropy 1.07884 (1.08164)	Top-1 acc 60.938 (62.707)	Top-5 acc 82.422 (82.959)	lr 0.01191
Train [62][2600/3239]	Time 0.223 (0.606)	Data Time 0.001 (0.016)	Loss 2.7305 (2.5473)	Entropy 1.07879 (1.08163)	Top-1 acc 56.250 (62.697)	Top-5 acc 80.469 (82.958)	lr 0.01191
Train [62][2610/3239]	Time 0.296 (0.606)	Data Time 0.002 (0.016)	Loss 2.6619 (2.5473)	Entropy 1.07877 (1.08162)	Top-1 acc 61.328 (62.700)	Top-5 acc 80.078 (82.957)	lr 0.01191
Train [62][2620/3239]	Time 0.208 (0.605)	Data Time 0.002 (0.016)	Loss 2.5617 (2.5475)	Entropy 1.07867 (1.08161)	Top-1 acc 61.719 (62.694)	Top-5 acc 80.859 (82.953)	lr 0.01191
Train [62][2630/3239]	Time 0.320 (0.605)	Data Time 0.001 (0.016)	Loss 2.6237 (2.5473)	Entropy 1.07867 (1.08160)	Top-1 acc 60.547 (62.694)	Top-5 acc 81.641 (82.959)	lr 0.01191
Train [62][2640/3239]	Time 0.214 (0.604)	Data Time 0.001 (0.016)	Loss 2.3956 (2.5476)	Entropy 1.07865 (1.08159)	Top-1 acc 67.578 (62.691)	Top-5 acc 87.891 (82.956)	lr 0.01191
Train [62][2650/3239]	Time 0.230 (0.604)	Data Time 0.001 (0.016)	Loss 2.5493 (2.5478)	Entropy 1.07865 (1.08157)	Top-1 acc 66.016 (62.689)	Top-5 acc 80.859 (82.955)	lr 0.01191
Train [62][2660/3239]	Time 0.260 (0.603)	Data Time 0.001 (0.016)	Loss 2.5304 (2.5478)	Entropy 1.07862 (1.08156)	Top-1 acc 62.109 (62.688)	Top-5 acc 83.203 (82.952)	lr 0.01190
Train [62][2670/3239]	Time 0.216 (0.603)	Data Time 0.001 (0.016)	Loss 2.5188 (2.5478)	Entropy 1.07860 (1.08155)	Top-1 acc 61.328 (62.686)	Top-5 acc 83.984 (82.952)	lr 0.01190
Train [62][2680/3239]	Time 0.266 (0.602)	Data Time 0.001 (0.016)	Loss 2.3087 (2.5478)	Entropy 1.07857 (1.08154)	Top-1 acc 68.359 (62.684)	Top-5 acc 88.281 (82.956)	lr 0.01190
Train [62][2690/3239]	Time 0.275 (0.602)	Data Time 0.001 (0.016)	Loss 2.5574 (2.5476)	Entropy 1.07857 (1.08153)	Top-1 acc 58.984 (62.687)	Top-5 acc 83.203 (82.959)	lr 0.01190
Train [62][2700/3239]	Time 0.269 (0.601)	Data Time 0.001 (0.016)	Loss 2.4107 (2.5476)	Entropy 1.07854 (1.08152)	Top-1 acc 66.016 (62.686)	Top-5 acc 86.719 (82.958)	lr 0.01190
Train [62][2710/3239]	Time 0.224 (0.601)	Data Time 0.001 (0.016)	Loss 2.4230 (2.5475)	Entropy 1.07849 (1.08151)	Top-1 acc 67.188 (62.683)	Top-5 acc 83.203 (82.957)	lr 0.01190
Train [62][2720/3239]	Time 0.321 (0.600)	Data Time 0.002 (0.016)	Loss 2.6222 (2.5475)	Entropy 1.07843 (1.08150)	Top-1 acc 60.156 (62.682)	Top-5 acc 84.375 (82.959)	lr 0.01190
Train [62][2730/3239]	Time 0.210 (0.600)	Data Time 0.001 (0.016)	Loss 2.4200 (2.5476)	Entropy 1.07843 (1.08149)	Top-1 acc 64.453 (62.682)	Top-5 acc 84.375 (82.959)	lr 0.01190
Train [62][2740/3239]	Time 0.293 (0.618)	Data Time 0.004 (0.016)	Loss 2.6496 (2.5475)	Entropy 1.07846 (1.08148)	Top-1 acc 59.766 (62.686)	Top-5 acc 78.516 (82.957)	lr 0.01190
Train [62][2750/3239]	Time 0.217 (0.617)	Data Time 0.002 (0.016)	Loss 2.3920 (2.5478)	Entropy 1.07844 (1.08146)	Top-1 acc 69.531 (62.682)	Top-5 acc 84.766 (82.953)	lr 0.01190
Train [62][2760/3239]	Time 0.218 (0.617)	Data Time 0.001 (0.015)	Loss 2.3374 (2.5477)	Entropy 1.07837 (1.08145)	Top-1 acc 69.141 (62.685)	Top-5 acc 87.109 (82.954)	lr 0.01189
Train [62][2770/3239]	Time 0.233 (0.616)	Data Time 0.001 (0.015)	Loss 2.5139 (2.5478)	Entropy 1.07835 (1.08144)	Top-1 acc 62.500 (62.681)	Top-5 acc 83.594 (82.954)	lr 0.01189
Train [62][2780/3239]	Time 0.278 (0.616)	Data Time 0.001 (0.015)	Loss 2.4021 (2.5479)	Entropy 1.07832 (1.08143)	Top-1 acc 68.359 (62.681)	Top-5 acc 84.766 (82.953)	lr 0.01189
Train [62][2790/3239]	Time 0.208 (0.615)	Data Time 0.001 (0.015)	Loss 2.5038 (2.5479)	Entropy 1.07830 (1.08142)	Top-1 acc 64.453 (62.680)	Top-5 acc 85.156 (82.953)	lr 0.01189
Train [62][2800/3239]	Time 0.251 (0.615)	Data Time 0.002 (0.015)	Loss 2.4906 (2.5478)	Entropy 1.07828 (1.08141)	Top-1 acc 67.188 (62.681)	Top-5 acc 83.203 (82.951)	lr 0.01189
Train [62][2810/3239]	Time 0.319 (0.614)	Data Time 0.001 (0.015)	Loss 2.4636 (2.5478)	Entropy 1.07829 (1.08140)	Top-1 acc 65.234 (62.678)	Top-5 acc 82.812 (82.948)	lr 0.01189
Train [62][2820/3239]	Time 0.218 (0.613)	Data Time 0.001 (0.015)	Loss 2.6279 (2.5477)	Entropy 1.07819 (1.08139)	Top-1 acc 65.234 (62.681)	Top-5 acc 82.031 (82.950)	lr 0.01189
Train [62][2830/3239]	Time 0.224 (0.613)	Data Time 0.001 (0.015)	Loss 2.6339 (2.5478)	Entropy 1.07814 (1.08138)	Top-1 acc 60.547 (62.680)	Top-5 acc 80.859 (82.948)	lr 0.01189
Train [62][2840/3239]	Time 0.208 (0.612)	Data Time 0.001 (0.015)	Loss 2.4778 (2.5484)	Entropy 1.07805 (1.08136)	Top-1 acc 66.797 (62.669)	Top-5 acc 83.984 (82.939)	lr 0.01189
Train [62][2850/3239]	Time 0.255 (0.612)	Data Time 0.001 (0.015)	Loss 2.7424 (2.5485)	Entropy 1.07805 (1.08135)	Top-1 acc 57.422 (62.668)	Top-5 acc 80.859 (82.940)	lr 0.01189
Train [62][2860/3239]	Time 0.248 (0.611)	Data Time 0.001 (0.015)	Loss 2.5361 (2.5484)	Entropy 1.07801 (1.08134)	Top-1 acc 58.984 (62.667)	Top-5 acc 83.594 (82.939)	lr 0.01188
Train [62][2870/3239]	Time 0.225 (0.611)	Data Time 0.001 (0.015)	Loss 2.7422 (2.5485)	Entropy 1.07797 (1.08133)	Top-1 acc 58.594 (62.666)	Top-5 acc 82.031 (82.941)	lr 0.01188
Train [62][2880/3239]	Time 0.252 (0.610)	Data Time 0.001 (0.015)	Loss 2.4041 (2.5485)	Entropy 1.07795 (1.08132)	Top-1 acc 64.844 (62.665)	Top-5 acc 85.156 (82.945)	lr 0.01188
Train [62][2890/3239]	Time 0.225 (0.610)	Data Time 0.001 (0.015)	Loss 2.6437 (2.5486)	Entropy 1.07789 (1.08131)	Top-1 acc 57.031 (62.658)	Top-5 acc 82.031 (82.945)	lr 0.01188
Train [62][2900/3239]	Time 0.355 (0.609)	Data Time 0.001 (0.015)	Loss 2.5342 (2.5486)	Entropy 1.07786 (1.08129)	Top-1 acc 63.281 (62.656)	Top-5 acc 82.422 (82.944)	lr 0.01188
Train [62][2910/3239]	Time 0.229 (0.609)	Data Time 0.001 (0.015)	Loss 2.5379 (2.5486)	Entropy 1.07783 (1.08128)	Top-1 acc 67.188 (62.660)	Top-5 acc 83.984 (82.946)	lr 0.01188
Train [62][2920/3239]	Time 0.228 (0.609)	Data Time 0.001 (0.015)	Loss 2.6714 (2.5486)	Entropy 1.07779 (1.08127)	Top-1 acc 62.891 (62.663)	Top-5 acc 79.688 (82.946)	lr 0.01188
Train [62][2930/3239]	Time 0.210 (0.608)	Data Time 0.001 (0.015)	Loss 2.5071 (2.5484)	Entropy 1.07770 (1.08126)	Top-1 acc 64.062 (62.665)	Top-5 acc 82.422 (82.948)	lr 0.01188
Train [62][2940/3239]	Time 0.233 (0.608)	Data Time 0.001 (0.015)	Loss 2.5593 (2.5483)	Entropy 1.07772 (1.08125)	Top-1 acc 59.375 (62.667)	Top-5 acc 81.641 (82.948)	lr 0.01188
Train [62][2950/3239]	Time 0.230 (0.607)	Data Time 0.001 (0.015)	Loss 2.5915 (2.5485)	Entropy 1.07768 (1.08123)	Top-1 acc 62.891 (62.664)	Top-5 acc 79.688 (82.944)	lr 0.01188
Train [62][2960/3239]	Time 0.211 (0.607)	Data Time 0.001 (0.015)	Loss 2.5604 (2.5486)	Entropy 1.07765 (1.08122)	Top-1 acc 61.719 (62.665)	Top-5 acc 82.031 (82.941)	lr 0.01187
Train [62][2970/3239]	Time 0.216 (0.606)	Data Time 0.001 (0.014)	Loss 2.6917 (2.5487)	Entropy 1.07767 (1.08121)	Top-1 acc 58.984 (62.663)	Top-5 acc 81.641 (82.938)	lr 0.01187
Train [62][2980/3239]	Time 0.251 (0.606)	Data Time 0.001 (0.014)	Loss 2.7389 (2.5488)	Entropy 1.07763 (1.08120)	Top-1 acc 57.422 (62.663)	Top-5 acc 80.859 (82.939)	lr 0.01187
Train [62][2990/3239]	Time 0.353 (0.605)	Data Time 0.001 (0.014)	Loss 2.5737 (2.5487)	Entropy 1.07756 (1.08119)	Top-1 acc 61.719 (62.665)	Top-5 acc 82.031 (82.940)	lr 0.01187
Train [62][3000/3239]	Time 0.270 (0.605)	Data Time 0.001 (0.014)	Loss 2.5019 (2.5486)	Entropy 1.07742 (1.08117)	Top-1 acc 61.328 (62.664)	Top-5 acc 85.547 (82.946)	lr 0.01187
Train [62][3010/3239]	Time 0.233 (0.604)	Data Time 0.001 (0.014)	Loss 2.5492 (2.5488)	Entropy 1.07745 (1.08116)	Top-1 acc 60.938 (62.657)	Top-5 acc 82.422 (82.940)	lr 0.01187
Train [62][3020/3239]	Time 0.256 (0.604)	Data Time 0.001 (0.014)	Loss 2.4807 (2.5489)	Entropy 1.07743 (1.08115)	Top-1 acc 64.062 (62.655)	Top-5 acc 82.812 (82.940)	lr 0.01187
Train [62][3030/3239]	Time 0.290 (0.603)	Data Time 0.002 (0.014)	Loss 2.4496 (2.5487)	Entropy 1.07738 (1.08114)	Top-1 acc 64.453 (62.659)	Top-5 acc 85.938 (82.944)	lr 0.01187
Train [62][3040/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.014)	Loss 2.7596 (2.5487)	Entropy 1.07736 (1.08112)	Top-1 acc 59.766 (62.657)	Top-5 acc 78.125 (82.942)	lr 0.01187
Train [62][3050/3239]	Time 0.223 (0.602)	Data Time 0.001 (0.014)	Loss 2.5156 (2.5488)	Entropy 1.07735 (1.08111)	Top-1 acc 64.062 (62.660)	Top-5 acc 81.641 (82.940)	lr 0.01186
Train [62][3060/3239]	Time 0.261 (0.602)	Data Time 0.001 (0.014)	Loss 2.5805 (2.5487)	Entropy 1.07733 (1.08110)	Top-1 acc 61.328 (62.660)	Top-5 acc 82.422 (82.939)	lr 0.01186
Train [62][3070/3239]	Time 0.307 (0.618)	Data Time 0.005 (0.014)	Loss 2.3805 (2.5487)	Entropy 1.07731 (1.08109)	Top-1 acc 65.234 (62.658)	Top-5 acc 88.281 (82.944)	lr 0.01186
Train [62][3080/3239]	Time 0.336 (0.617)	Data Time 0.002 (0.014)	Loss 2.7170 (2.5489)	Entropy 1.07729 (1.08108)	Top-1 acc 60.156 (62.651)	Top-5 acc 78.516 (82.936)	lr 0.01186
Train [62][3090/3239]	Time 0.198 (0.617)	Data Time 0.002 (0.014)	Loss 2.6486 (2.5489)	Entropy 1.07727 (1.08106)	Top-1 acc 62.500 (62.649)	Top-5 acc 80.859 (82.937)	lr 0.01186
Train [62][3100/3239]	Time 0.241 (0.616)	Data Time 0.001 (0.014)	Loss 2.7754 (2.5491)	Entropy 1.07725 (1.08105)	Top-1 acc 57.812 (62.646)	Top-5 acc 76.953 (82.934)	lr 0.01186
Train [62][3110/3239]	Time 0.242 (0.616)	Data Time 0.001 (0.014)	Loss 2.6609 (2.5490)	Entropy 1.07731 (1.08104)	Top-1 acc 57.031 (62.645)	Top-5 acc 80.078 (82.937)	lr 0.01186
Train [62][3120/3239]	Time 0.226 (0.615)	Data Time 0.001 (0.014)	Loss 2.6166 (2.5491)	Entropy 1.07728 (1.08103)	Top-1 acc 60.156 (62.642)	Top-5 acc 82.031 (82.935)	lr 0.01186
Train [62][3130/3239]	Time 0.391 (0.615)	Data Time 0.001 (0.014)	Loss 2.4907 (2.5491)	Entropy 1.07720 (1.08102)	Top-1 acc 66.406 (62.642)	Top-5 acc 82.031 (82.936)	lr 0.01186
Train [62][3140/3239]	Time 0.214 (0.614)	Data Time 0.001 (0.014)	Loss 2.6635 (2.5490)	Entropy 1.07720 (1.08100)	Top-1 acc 59.375 (62.646)	Top-5 acc 80.859 (82.938)	lr 0.01186
Train [62][3150/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.014)	Loss 2.4880 (2.5490)	Entropy 1.07717 (1.08099)	Top-1 acc 64.844 (62.648)	Top-5 acc 83.594 (82.938)	lr 0.01185
Train [62][3160/3239]	Time 0.235 (0.614)	Data Time 0.001 (0.014)	Loss 2.5192 (2.5490)	Entropy 1.07712 (1.08098)	Top-1 acc 62.500 (62.647)	Top-5 acc 84.766 (82.937)	lr 0.01185
Train [62][3170/3239]	Time 0.232 (0.613)	Data Time 0.001 (0.014)	Loss 2.3794 (2.5490)	Entropy 1.07714 (1.08097)	Top-1 acc 64.844 (62.647)	Top-5 acc 84.375 (82.936)	lr 0.01185
Train [62][3180/3239]	Time 0.217 (0.613)	Data Time 0.000 (0.014)	Loss 2.6340 (2.5490)	Entropy 1.07712 (1.08095)	Top-1 acc 62.109 (62.648)	Top-5 acc 80.859 (82.936)	lr 0.01185
Train [62][3190/3239]	Time 0.218 (0.612)	Data Time 0.000 (0.014)	Loss 2.5016 (2.5491)	Entropy 1.07681 (1.08094)	Top-1 acc 61.328 (62.647)	Top-5 acc 84.375 (82.933)	lr 0.01185
Train [62][3200/3239]	Time 0.236 (0.612)	Data Time 0.000 (0.014)	Loss 2.7595 (2.5491)	Entropy 1.07680 (1.08093)	Top-1 acc 57.812 (62.648)	Top-5 acc 80.469 (82.932)	lr 0.01185
Train [62][3210/3239]	Time 0.231 (0.611)	Data Time 0.000 (0.014)	Loss 2.5228 (2.5490)	Entropy 1.07677 (1.08092)	Top-1 acc 64.453 (62.650)	Top-5 acc 83.594 (82.935)	lr 0.01185
Train [62][3220/3239]	Time 0.320 (0.611)	Data Time 0.000 (0.013)	Loss 2.4958 (2.5489)	Entropy 1.07668 (1.08090)	Top-1 acc 64.453 (62.655)	Top-5 acc 83.984 (82.937)	lr 0.01185
Train [62][3230/3239]	Time 0.209 (0.610)	Data Time 0.000 (0.013)	Loss 2.4987 (2.5490)	Entropy 1.07669 (1.08089)	Top-1 acc 66.797 (62.653)	Top-5 acc 82.031 (82.933)	lr 0.01185
Train [62][3239/3239]	Time 2.261 (0.610)	Data Time 0.000 (0.013)	Loss 2.7679 (2.5488)	Entropy 1.07669 (1.08088)	Top-1 acc 55.556 (62.654)	Top-5 acc 76.543 (82.938)	lr 0.01185
==========Valid [62/120]	loss 1.432	top-1 acc 67.237 (67.237)	top-5 acc 86.834	Train top-1 62.654	top-5 82.938	Entropy 1.07669	Latency-None: 0.000ms	Flops: 546.53M
Train [63][0/3239]	Time 41.157 (41.157)	Data Time 39.468 (39.468)	Loss 2.5639 (2.5639)	Entropy 1.07663 (1.07663)	Top-1 acc 61.328 (61.328)	Top-5 acc 82.812 (82.812)	lr 0.01185
Train [63][10/3239]	Time 2.752 (4.278)	Data Time 0.002 (3.602)	Loss 2.5612 (2.5762)	Entropy 1.07663 (1.07663)	Top-1 acc 62.891 (62.322)	Top-5 acc 81.250 (82.528)	lr 0.01184
Train [63][20/3239]	Time 0.224 (2.359)	Data Time 0.001 (1.887)	Loss 2.3713 (2.5302)	Entropy 1.07660 (1.07662)	Top-1 acc 66.016 (62.798)	Top-5 acc 84.766 (83.408)	lr 0.01184
Train [63][30/3239]	Time 0.328 (1.755)	Data Time 0.001 (1.280)	Loss 2.6924 (2.5379)	Entropy 1.07660 (1.07661)	Top-1 acc 60.156 (62.840)	Top-5 acc 80.469 (83.329)	lr 0.01184
Train [63][40/3239]	Time 0.250 (1.445)	Data Time 0.001 (0.968)	Loss 2.5973 (2.5400)	Entropy 1.07659 (1.07661)	Top-1 acc 62.500 (62.948)	Top-5 acc 80.859 (83.060)	lr 0.01184
Train [63][50/3239]	Time 0.242 (1.257)	Data Time 0.001 (0.779)	Loss 2.3532 (2.5347)	Entropy 1.07657 (1.07660)	Top-1 acc 67.578 (63.235)	Top-5 acc 86.719 (83.203)	lr 0.01184
Train [63][60/3239]	Time 0.235 (1.132)	Data Time 0.001 (0.651)	Loss 2.5785 (2.5320)	Entropy 1.07654 (1.07659)	Top-1 acc 59.766 (63.179)	Top-5 acc 79.688 (83.178)	lr 0.01184
Train [63][70/3239]	Time 0.248 (1.040)	Data Time 0.001 (0.560)	Loss 2.6079 (2.5330)	Entropy 1.07653 (1.07658)	Top-1 acc 60.156 (63.166)	Top-5 acc 83.203 (83.247)	lr 0.01184
Train [63][80/3239]	Time 0.221 (0.973)	Data Time 0.002 (0.491)	Loss 2.6775 (2.5278)	Entropy 1.07652 (1.07658)	Top-1 acc 61.328 (63.262)	Top-5 acc 82.422 (83.401)	lr 0.01184
Train [63][90/3239]	Time 0.205 (0.917)	Data Time 0.001 (0.437)	Loss 2.3995 (2.5238)	Entropy 1.07654 (1.07657)	Top-1 acc 67.578 (63.260)	Top-5 acc 84.766 (83.474)	lr 0.01184
Train [63][100/3239]	Time 0.224 (0.873)	Data Time 0.001 (0.394)	Loss 2.4397 (2.5231)	Entropy 1.07651 (1.07657)	Top-1 acc 64.844 (63.293)	Top-5 acc 85.156 (83.532)	lr 0.01184
Train [63][110/3239]	Time 0.258 (0.836)	Data Time 0.001 (0.359)	Loss 2.6094 (2.5265)	Entropy 1.07649 (1.07656)	Top-1 acc 62.500 (63.197)	Top-5 acc 83.203 (83.488)	lr 0.01183
Train [63][120/3239]	Time 2.662 (0.807)	Data Time 0.001 (0.329)	Loss 2.7425 (2.5301)	Entropy 1.07649 (1.07656)	Top-1 acc 58.984 (63.020)	Top-5 acc 78.516 (83.436)	lr 0.01183
Train [63][130/3239]	Time 0.241 (0.763)	Data Time 0.001 (0.304)	Loss 2.4113 (2.5252)	Entropy 1.07648 (1.07655)	Top-1 acc 67.188 (63.165)	Top-5 acc 83.203 (83.442)	lr 0.01183
Train [63][140/3239]	Time 0.228 (0.742)	Data Time 0.001 (0.283)	Loss 2.4877 (2.5259)	Entropy 1.07639 (1.07654)	Top-1 acc 64.453 (63.129)	Top-5 acc 83.984 (83.433)	lr 0.01183
Train [63][150/3239]	Time 0.226 (0.724)	Data Time 0.001 (0.264)	Loss 2.6319 (2.5289)	Entropy 1.07634 (1.07653)	Top-1 acc 58.984 (63.017)	Top-5 acc 79.297 (83.322)	lr 0.01183
Train [63][160/3239]	Time 0.233 (0.708)	Data Time 0.001 (0.248)	Loss 2.4490 (2.5323)	Entropy 1.07633 (1.07651)	Top-1 acc 66.406 (62.978)	Top-5 acc 85.547 (83.240)	lr 0.01183
Train [63][170/3239]	Time 0.243 (0.695)	Data Time 0.001 (0.233)	Loss 3.0700 (2.5361)	Entropy 1.07621 (1.07650)	Top-1 acc 53.516 (62.950)	Top-5 acc 72.266 (83.123)	lr 0.01183
Train [63][180/3239]	Time 0.269 (0.967)	Data Time 0.002 (0.221)	Loss 2.4310 (2.5331)	Entropy 1.07619 (1.07648)	Top-1 acc 64.062 (63.087)	Top-5 acc 84.375 (83.121)	lr 0.01183
Train [63][190/3239]	Time 0.220 (0.943)	Data Time 0.002 (0.209)	Loss 2.4811 (2.5335)	Entropy 1.07616 (1.07647)	Top-1 acc 64.453 (63.089)	Top-5 acc 85.938 (83.144)	lr 0.01183
Train [63][200/3239]	Time 0.228 (0.919)	Data Time 0.002 (0.199)	Loss 2.4858 (2.5335)	Entropy 1.07612 (1.07645)	Top-1 acc 63.672 (63.066)	Top-5 acc 83.594 (83.172)	lr 0.01183
Train [63][210/3239]	Time 0.316 (0.898)	Data Time 0.001 (0.190)	Loss 2.5030 (2.5342)	Entropy 1.07605 (1.07643)	Top-1 acc 61.328 (63.074)	Top-5 acc 83.594 (83.188)	lr 0.01182
Train [63][220/3239]	Time 0.229 (0.878)	Data Time 0.001 (0.181)	Loss 2.5348 (2.5331)	Entropy 1.07604 (1.07642)	Top-1 acc 62.500 (63.083)	Top-5 acc 82.422 (83.162)	lr 0.01182
Train [63][230/3239]	Time 2.444 (0.860)	Data Time 0.001 (0.173)	Loss 2.6343 (2.5341)	Entropy 1.07604 (1.07640)	Top-1 acc 58.984 (63.053)	Top-5 acc 82.422 (83.147)	lr 0.01182
Train [63][240/3239]	Time 0.282 (0.834)	Data Time 0.001 (0.166)	Loss 2.4653 (2.5347)	Entropy 1.07601 (1.07638)	Top-1 acc 61.719 (62.998)	Top-5 acc 87.500 (83.145)	lr 0.01182
Train [63][250/3239]	Time 0.247 (0.819)	Data Time 0.001 (0.160)	Loss 2.4920 (2.5366)	Entropy 1.07599 (1.07637)	Top-1 acc 61.328 (62.962)	Top-5 acc 83.594 (83.105)	lr 0.01182
Train [63][260/3239]	Time 0.323 (0.806)	Data Time 0.001 (0.154)	Loss 2.4580 (2.5345)	Entropy 1.07603 (1.07636)	Top-1 acc 65.625 (63.016)	Top-5 acc 83.594 (83.145)	lr 0.01182
Train [63][270/3239]	Time 0.229 (0.794)	Data Time 0.001 (0.148)	Loss 2.6775 (2.5352)	Entropy 1.07602 (1.07634)	Top-1 acc 60.938 (62.997)	Top-5 acc 80.078 (83.154)	lr 0.01182
Train [63][280/3239]	Time 0.240 (0.782)	Data Time 0.001 (0.143)	Loss 2.6006 (2.5348)	Entropy 1.07597 (1.07633)	Top-1 acc 63.281 (63.023)	Top-5 acc 83.203 (83.146)	lr 0.01182
Train [63][290/3239]	Time 0.222 (0.772)	Data Time 0.001 (0.138)	Loss 2.6022 (2.5352)	Entropy 1.07599 (1.07632)	Top-1 acc 62.500 (63.014)	Top-5 acc 81.641 (83.109)	lr 0.01182
Train [63][300/3239]	Time 0.214 (0.762)	Data Time 0.001 (0.133)	Loss 2.4509 (2.5340)	Entropy 1.07600 (1.07631)	Top-1 acc 61.328 (63.045)	Top-5 acc 87.500 (83.147)	lr 0.01182
Train [63][310/3239]	Time 0.234 (0.752)	Data Time 0.001 (0.129)	Loss 2.5890 (2.5336)	Entropy 1.07597 (1.07630)	Top-1 acc 62.109 (63.071)	Top-5 acc 82.031 (83.153)	lr 0.01181
Train [63][320/3239]	Time 0.206 (0.744)	Data Time 0.001 (0.125)	Loss 2.4233 (2.5326)	Entropy 1.07590 (1.07629)	Top-1 acc 59.766 (63.090)	Top-5 acc 87.109 (83.171)	lr 0.01181
Train [63][330/3239]	Time 0.254 (0.736)	Data Time 0.001 (0.121)	Loss 2.3688 (2.5309)	Entropy 1.07589 (1.07628)	Top-1 acc 67.188 (63.156)	Top-5 acc 86.328 (83.198)	lr 0.01181
Train [63][340/3239]	Time 2.463 (0.727)	Data Time 0.001 (0.118)	Loss 2.4873 (2.5305)	Entropy 1.07589 (1.07626)	Top-1 acc 66.016 (63.178)	Top-5 acc 83.203 (83.202)	lr 0.01181
Train [63][350/3239]	Time 0.387 (0.714)	Data Time 0.001 (0.115)	Loss 2.6143 (2.5308)	Entropy 1.07603 (1.07626)	Top-1 acc 58.984 (63.167)	Top-5 acc 83.203 (83.182)	lr 0.01181
Train [63][360/3239]	Time 0.221 (0.707)	Data Time 0.001 (0.111)	Loss 2.5565 (2.5312)	Entropy 1.07602 (1.07625)	Top-1 acc 62.109 (63.190)	Top-5 acc 82.031 (83.168)	lr 0.01181
Train [63][370/3239]	Time 0.232 (0.701)	Data Time 0.001 (0.108)	Loss 2.3729 (2.5297)	Entropy 1.07601 (1.07624)	Top-1 acc 67.578 (63.247)	Top-5 acc 85.156 (83.198)	lr 0.01181
Train [63][380/3239]	Time 0.246 (0.695)	Data Time 0.001 (0.106)	Loss 2.4660 (2.5305)	Entropy 1.07604 (1.07624)	Top-1 acc 67.188 (63.226)	Top-5 acc 83.203 (83.187)	lr 0.01181
Train [63][390/3239]	Time 0.199 (0.689)	Data Time 0.001 (0.103)	Loss 2.4373 (2.5318)	Entropy 1.07601 (1.07623)	Top-1 acc 64.844 (63.195)	Top-5 acc 85.156 (83.166)	lr 0.01181
Train [63][400/3239]	Time 0.250 (0.683)	Data Time 0.001 (0.100)	Loss 2.5566 (2.5319)	Entropy 1.07599 (1.07623)	Top-1 acc 63.672 (63.220)	Top-5 acc 80.078 (83.162)	lr 0.01181
Train [63][410/3239]	Time 0.210 (0.678)	Data Time 0.001 (0.098)	Loss 2.5813 (2.5322)	Entropy 1.07597 (1.07622)	Top-1 acc 60.547 (63.226)	Top-5 acc 83.203 (83.150)	lr 0.01180
Train [63][420/3239]	Time 0.218 (0.673)	Data Time 0.001 (0.096)	Loss 2.4223 (2.5319)	Entropy 1.07592 (1.07622)	Top-1 acc 62.109 (63.201)	Top-5 acc 85.547 (83.174)	lr 0.01180
Train [63][430/3239]	Time 0.230 (0.668)	Data Time 0.001 (0.094)	Loss 2.5673 (2.5315)	Entropy 1.07586 (1.07621)	Top-1 acc 63.672 (63.211)	Top-5 acc 81.250 (83.177)	lr 0.01180
Train [63][440/3239]	Time 0.251 (0.664)	Data Time 0.001 (0.092)	Loss 2.7098 (2.5330)	Entropy 1.07583 (1.07620)	Top-1 acc 57.812 (63.163)	Top-5 acc 81.641 (83.155)	lr 0.01180
Train [63][450/3239]	Time 2.477 (0.660)	Data Time 0.001 (0.090)	Loss 2.5956 (2.5325)	Entropy 1.07583 (1.07619)	Top-1 acc 57.812 (63.157)	Top-5 acc 82.812 (83.170)	lr 0.01180
Train [63][460/3239]	Time 0.239 (0.651)	Data Time 0.001 (0.088)	Loss 2.5857 (2.5332)	Entropy 1.07576 (1.07618)	Top-1 acc 63.672 (63.166)	Top-5 acc 81.250 (83.141)	lr 0.01180
Train [63][470/3239]	Time 0.192 (0.647)	Data Time 0.002 (0.086)	Loss 2.6761 (2.5334)	Entropy 1.07578 (1.07617)	Top-1 acc 60.156 (63.141)	Top-5 acc 80.078 (83.139)	lr 0.01180
Train [63][480/3239]	Time 0.257 (0.643)	Data Time 0.001 (0.084)	Loss 2.5223 (2.5327)	Entropy 1.07575 (1.07617)	Top-1 acc 62.891 (63.167)	Top-5 acc 84.766 (83.162)	lr 0.01180
Train [63][490/3239]	Time 0.318 (0.640)	Data Time 0.001 (0.082)	Loss 2.4486 (2.5328)	Entropy 1.07576 (1.07616)	Top-1 acc 62.891 (63.147)	Top-5 acc 85.547 (83.167)	lr 0.01180
Train [63][500/3239]	Time 0.217 (0.636)	Data Time 0.001 (0.081)	Loss 2.5123 (2.5322)	Entropy 1.07569 (1.07615)	Top-1 acc 63.672 (63.148)	Top-5 acc 84.375 (83.175)	lr 0.01180
Train [63][510/3239]	Time 0.265 (0.633)	Data Time 0.002 (0.079)	Loss 2.6474 (2.5314)	Entropy 1.07566 (1.07614)	Top-1 acc 54.688 (63.165)	Top-5 acc 82.422 (83.202)	lr 0.01179
Train [63][520/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.078)	Loss 2.6648 (2.5320)	Entropy 1.07569 (1.07613)	Top-1 acc 60.156 (63.142)	Top-5 acc 82.031 (83.198)	lr 0.01179
Train [63][530/3239]	Time 0.222 (0.627)	Data Time 0.001 (0.076)	Loss 2.6747 (2.5326)	Entropy 1.07571 (1.07612)	Top-1 acc 59.766 (63.139)	Top-5 acc 82.812 (83.199)	lr 0.01179
Train [63][540/3239]	Time 0.417 (0.713)	Data Time 0.101 (0.075)	Loss 2.5630 (2.5326)	Entropy 1.07569 (1.07611)	Top-1 acc 62.500 (63.143)	Top-5 acc 85.547 (83.199)	lr 0.01179
Train [63][550/3239]	Time 0.253 (0.711)	Data Time 0.002 (0.074)	Loss 2.4607 (2.5338)	Entropy 1.07566 (1.07611)	Top-1 acc 62.109 (63.102)	Top-5 acc 87.891 (83.181)	lr 0.01179
Train [63][560/3239]	Time 2.522 (0.707)	Data Time 0.002 (0.072)	Loss 2.6319 (2.5339)	Entropy 1.07566 (1.07610)	Top-1 acc 57.812 (63.088)	Top-5 acc 82.812 (83.184)	lr 0.01179
Train [63][570/3239]	Time 0.238 (0.699)	Data Time 0.001 (0.071)	Loss 2.4523 (2.5331)	Entropy 1.07564 (1.07609)	Top-1 acc 64.062 (63.106)	Top-5 acc 85.156 (83.198)	lr 0.01179
Train [63][580/3239]	Time 0.315 (0.695)	Data Time 0.001 (0.070)	Loss 2.4851 (2.5333)	Entropy 1.07561 (1.07608)	Top-1 acc 62.891 (63.101)	Top-5 acc 83.594 (83.204)	lr 0.01179
Train [63][590/3239]	Time 0.236 (0.691)	Data Time 0.001 (0.069)	Loss 2.4949 (2.5335)	Entropy 1.07561 (1.07607)	Top-1 acc 64.062 (63.090)	Top-5 acc 82.812 (83.210)	lr 0.01179
Train [63][600/3239]	Time 0.206 (0.687)	Data Time 0.001 (0.068)	Loss 2.6122 (2.5327)	Entropy 1.07556 (1.07607)	Top-1 acc 62.891 (63.114)	Top-5 acc 82.812 (83.223)	lr 0.01179
Train [63][610/3239]	Time 0.236 (0.683)	Data Time 0.001 (0.067)	Loss 2.5112 (2.5329)	Entropy 1.07557 (1.07606)	Top-1 acc 64.453 (63.095)	Top-5 acc 83.594 (83.228)	lr 0.01178
Train [63][620/3239]	Time 0.238 (0.680)	Data Time 0.001 (0.066)	Loss 2.4131 (2.5321)	Entropy 1.07559 (1.07605)	Top-1 acc 63.281 (63.100)	Top-5 acc 84.766 (83.252)	lr 0.01178
Train [63][630/3239]	Time 0.234 (0.676)	Data Time 0.001 (0.065)	Loss 2.6069 (2.5329)	Entropy 1.07552 (1.07604)	Top-1 acc 62.500 (63.091)	Top-5 acc 82.422 (83.231)	lr 0.01178
Train [63][640/3239]	Time 0.234 (0.673)	Data Time 0.001 (0.064)	Loss 2.5781 (2.5319)	Entropy 1.07548 (1.07603)	Top-1 acc 63.281 (63.120)	Top-5 acc 85.156 (83.249)	lr 0.01178
Train [63][650/3239]	Time 0.254 (0.670)	Data Time 0.001 (0.063)	Loss 2.5817 (2.5319)	Entropy 1.07547 (1.07603)	Top-1 acc 62.109 (63.110)	Top-5 acc 83.984 (83.250)	lr 0.01178
Train [63][660/3239]	Time 0.203 (0.667)	Data Time 0.001 (0.062)	Loss 2.5317 (2.5319)	Entropy 1.07551 (1.07602)	Top-1 acc 62.891 (63.129)	Top-5 acc 81.641 (83.250)	lr 0.01178
Train [63][670/3239]	Time 2.722 (0.665)	Data Time 0.001 (0.061)	Loss 2.5106 (2.5321)	Entropy 1.07551 (1.07601)	Top-1 acc 61.719 (63.124)	Top-5 acc 81.641 (83.238)	lr 0.01178
Train [63][680/3239]	Time 0.223 (0.658)	Data Time 0.001 (0.060)	Loss 2.5290 (2.5316)	Entropy 1.07553 (1.07600)	Top-1 acc 63.672 (63.140)	Top-5 acc 83.594 (83.235)	lr 0.01178
Train [63][690/3239]	Time 0.248 (0.656)	Data Time 0.002 (0.059)	Loss 2.5102 (2.5319)	Entropy 1.07554 (1.07600)	Top-1 acc 62.891 (63.122)	Top-5 acc 82.422 (83.212)	lr 0.01178
Train [63][700/3239]	Time 0.229 (0.653)	Data Time 0.001 (0.058)	Loss 2.4953 (2.5321)	Entropy 1.07548 (1.07599)	Top-1 acc 64.062 (63.127)	Top-5 acc 80.469 (83.200)	lr 0.01178
Train [63][710/3239]	Time 0.315 (0.650)	Data Time 0.001 (0.058)	Loss 2.4876 (2.5321)	Entropy 1.07536 (1.07598)	Top-1 acc 67.188 (63.127)	Top-5 acc 82.812 (83.197)	lr 0.01177
Train [63][720/3239]	Time 0.237 (0.648)	Data Time 0.001 (0.057)	Loss 2.4043 (2.5318)	Entropy 1.07534 (1.07597)	Top-1 acc 65.625 (63.131)	Top-5 acc 85.547 (83.199)	lr 0.01177
Train [63][730/3239]	Time 0.233 (0.646)	Data Time 0.001 (0.056)	Loss 2.5050 (2.5318)	Entropy 1.07531 (1.07596)	Top-1 acc 64.453 (63.134)	Top-5 acc 83.203 (83.199)	lr 0.01177
Train [63][740/3239]	Time 0.226 (0.644)	Data Time 0.001 (0.055)	Loss 2.5129 (2.5317)	Entropy 1.07529 (1.07595)	Top-1 acc 62.891 (63.132)	Top-5 acc 83.203 (83.197)	lr 0.01177
Train [63][750/3239]	Time 0.228 (0.641)	Data Time 0.001 (0.055)	Loss 2.5469 (2.5316)	Entropy 1.07526 (1.07595)	Top-1 acc 62.891 (63.136)	Top-5 acc 81.641 (83.198)	lr 0.01177
Train [63][760/3239]	Time 0.302 (0.639)	Data Time 0.001 (0.054)	Loss 2.4598 (2.5316)	Entropy 1.07526 (1.07594)	Top-1 acc 65.625 (63.147)	Top-5 acc 83.984 (83.202)	lr 0.01177
Train [63][770/3239]	Time 0.208 (0.637)	Data Time 0.001 (0.053)	Loss 2.4392 (2.5308)	Entropy 1.07524 (1.07593)	Top-1 acc 67.188 (63.154)	Top-5 acc 84.766 (83.227)	lr 0.01177
Train [63][780/3239]	Time 2.512 (0.635)	Data Time 0.001 (0.053)	Loss 2.5658 (2.5307)	Entropy 1.07524 (1.07592)	Top-1 acc 63.281 (63.153)	Top-5 acc 83.984 (83.236)	lr 0.01177
Train [63][790/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.052)	Loss 2.5774 (2.5305)	Entropy 1.07522 (1.07591)	Top-1 acc 63.281 (63.155)	Top-5 acc 83.594 (83.246)	lr 0.01177
Train [63][800/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.051)	Loss 2.5835 (2.5313)	Entropy 1.07522 (1.07590)	Top-1 acc 60.156 (63.117)	Top-5 acc 81.250 (83.239)	lr 0.01177
Train [63][810/3239]	Time 0.317 (0.626)	Data Time 0.001 (0.051)	Loss 2.5266 (2.5314)	Entropy 1.07523 (1.07589)	Top-1 acc 62.109 (63.105)	Top-5 acc 82.812 (83.234)	lr 0.01176
Train [63][820/3239]	Time 0.208 (0.624)	Data Time 0.001 (0.050)	Loss 2.4661 (2.5315)	Entropy 1.07519 (1.07588)	Top-1 acc 64.844 (63.117)	Top-5 acc 85.547 (83.234)	lr 0.01176
Train [63][830/3239]	Time 0.237 (0.622)	Data Time 0.002 (0.049)	Loss 2.5508 (2.5313)	Entropy 1.07505 (1.07587)	Top-1 acc 63.281 (63.130)	Top-5 acc 83.984 (83.242)	lr 0.01176
Train [63][840/3239]	Time 0.235 (0.620)	Data Time 0.002 (0.049)	Loss 2.4598 (2.5307)	Entropy 1.07504 (1.07587)	Top-1 acc 66.016 (63.143)	Top-5 acc 85.938 (83.251)	lr 0.01176
Train [63][850/3239]	Time 0.232 (0.618)	Data Time 0.001 (0.048)	Loss 2.4459 (2.5309)	Entropy 1.07501 (1.07586)	Top-1 acc 67.969 (63.139)	Top-5 acc 85.547 (83.253)	lr 0.01176
Train [63][860/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.048)	Loss 2.6097 (2.5306)	Entropy 1.07494 (1.07585)	Top-1 acc 60.156 (63.147)	Top-5 acc 80.859 (83.259)	lr 0.01176
Train [63][870/3239]	Time 0.227 (0.615)	Data Time 0.001 (0.047)	Loss 2.5405 (2.5307)	Entropy 1.07493 (1.07583)	Top-1 acc 62.891 (63.153)	Top-5 acc 82.422 (83.253)	lr 0.01176
Train [63][880/3239]	Time 0.223 (0.613)	Data Time 0.001 (0.047)	Loss 2.4118 (2.5305)	Entropy 1.07489 (1.07582)	Top-1 acc 64.062 (63.164)	Top-5 acc 85.156 (83.257)	lr 0.01176
Train [63][890/3239]	Time 2.518 (0.612)	Data Time 0.002 (0.046)	Loss 2.5798 (2.5306)	Entropy 1.07489 (1.07581)	Top-1 acc 63.672 (63.160)	Top-5 acc 83.203 (83.254)	lr 0.01176
Train [63][900/3239]	Time 0.242 (0.608)	Data Time 0.001 (0.046)	Loss 2.5662 (2.5306)	Entropy 1.07491 (1.07580)	Top-1 acc 61.719 (63.157)	Top-5 acc 83.984 (83.248)	lr 0.01176
Train [63][910/3239]	Time 0.408 (0.660)	Data Time 0.002 (0.045)	Loss 2.4451 (2.5304)	Entropy 1.07491 (1.07579)	Top-1 acc 65.234 (63.157)	Top-5 acc 85.156 (83.249)	lr 0.01175
Train [63][920/3239]	Time 0.217 (0.659)	Data Time 0.002 (0.045)	Loss 2.4857 (2.5307)	Entropy 1.07482 (1.07578)	Top-1 acc 64.844 (63.148)	Top-5 acc 85.156 (83.243)	lr 0.01175
Train [63][930/3239]	Time 0.229 (0.657)	Data Time 0.001 (0.044)	Loss 2.4739 (2.5315)	Entropy 1.07482 (1.07577)	Top-1 acc 64.844 (63.139)	Top-5 acc 83.594 (83.227)	lr 0.01175
Train [63][940/3239]	Time 0.246 (0.654)	Data Time 0.001 (0.044)	Loss 2.7150 (2.5313)	Entropy 1.07477 (1.07576)	Top-1 acc 62.500 (63.149)	Top-5 acc 79.297 (83.229)	lr 0.01175
Train [63][950/3239]	Time 0.218 (0.652)	Data Time 0.001 (0.043)	Loss 2.4215 (2.5319)	Entropy 1.07479 (1.07575)	Top-1 acc 66.016 (63.139)	Top-5 acc 85.156 (83.225)	lr 0.01175
Train [63][960/3239]	Time 0.225 (0.651)	Data Time 0.001 (0.043)	Loss 2.4032 (2.5314)	Entropy 1.07479 (1.07574)	Top-1 acc 64.062 (63.144)	Top-5 acc 86.328 (83.239)	lr 0.01175
Train [63][970/3239]	Time 0.255 (0.649)	Data Time 0.001 (0.043)	Loss 2.6709 (2.5315)	Entropy 1.07475 (1.07573)	Top-1 acc 62.500 (63.136)	Top-5 acc 82.812 (83.245)	lr 0.01175
Train [63][980/3239]	Time 0.253 (0.647)	Data Time 0.001 (0.042)	Loss 2.5640 (2.5318)	Entropy 1.07469 (1.07572)	Top-1 acc 62.500 (63.136)	Top-5 acc 83.594 (83.235)	lr 0.01175
Train [63][990/3239]	Time 0.238 (0.645)	Data Time 0.002 (0.042)	Loss 2.5381 (2.5320)	Entropy 1.07466 (1.07571)	Top-1 acc 66.797 (63.133)	Top-5 acc 83.203 (83.234)	lr 0.01175
Train [63][1000/3239]	Time 2.639 (0.643)	Data Time 0.001 (0.041)	Loss 2.6904 (2.5321)	Entropy 1.07466 (1.07570)	Top-1 acc 59.766 (63.135)	Top-5 acc 82.031 (83.229)	lr 0.01174
Train [63][1010/3239]	Time 0.280 (0.639)	Data Time 0.001 (0.041)	Loss 2.6422 (2.5318)	Entropy 1.07474 (1.07569)	Top-1 acc 57.422 (63.141)	Top-5 acc 81.641 (83.233)	lr 0.01174
Train [63][1020/3239]	Time 0.223 (0.638)	Data Time 0.001 (0.041)	Loss 2.5808 (2.5316)	Entropy 1.07472 (1.07568)	Top-1 acc 61.719 (63.140)	Top-5 acc 82.422 (83.241)	lr 0.01174
Train [63][1030/3239]	Time 0.218 (0.636)	Data Time 0.001 (0.040)	Loss 2.5852 (2.5314)	Entropy 1.07461 (1.07567)	Top-1 acc 62.500 (63.133)	Top-5 acc 81.641 (83.246)	lr 0.01174
Train [63][1040/3239]	Time 0.227 (0.634)	Data Time 0.001 (0.040)	Loss 2.3460 (2.5319)	Entropy 1.07462 (1.07566)	Top-1 acc 63.672 (63.123)	Top-5 acc 87.500 (83.237)	lr 0.01174
Train [63][1050/3239]	Time 0.223 (0.633)	Data Time 0.001 (0.039)	Loss 2.6637 (2.5316)	Entropy 1.07459 (1.07565)	Top-1 acc 58.594 (63.128)	Top-5 acc 81.641 (83.240)	lr 0.01174
Train [63][1060/3239]	Time 0.218 (0.631)	Data Time 0.001 (0.039)	Loss 2.5543 (2.5321)	Entropy 1.07456 (1.07564)	Top-1 acc 60.547 (63.118)	Top-5 acc 82.031 (83.230)	lr 0.01174
Train [63][1070/3239]	Time 0.239 (0.630)	Data Time 0.001 (0.039)	Loss 2.5045 (2.5318)	Entropy 1.07453 (1.07563)	Top-1 acc 62.109 (63.122)	Top-5 acc 84.375 (83.236)	lr 0.01174
Train [63][1080/3239]	Time 0.242 (0.629)	Data Time 0.001 (0.038)	Loss 2.4749 (2.5318)	Entropy 1.07447 (1.07562)	Top-1 acc 65.234 (63.119)	Top-5 acc 81.641 (83.236)	lr 0.01174
Train [63][1090/3239]	Time 0.312 (0.627)	Data Time 0.001 (0.038)	Loss 2.5658 (2.5318)	Entropy 1.07444 (1.07561)	Top-1 acc 60.156 (63.119)	Top-5 acc 83.594 (83.231)	lr 0.01174
Train [63][1100/3239]	Time 0.236 (0.626)	Data Time 0.001 (0.038)	Loss 2.6593 (2.5316)	Entropy 1.07437 (1.07560)	Top-1 acc 60.547 (63.125)	Top-5 acc 82.031 (83.241)	lr 0.01173
Train [63][1110/3239]	Time 2.458 (0.624)	Data Time 0.001 (0.037)	Loss 2.5137 (2.5318)	Entropy 1.07437 (1.07559)	Top-1 acc 64.453 (63.120)	Top-5 acc 83.594 (83.239)	lr 0.01173
Train [63][1120/3239]	Time 0.281 (0.621)	Data Time 0.001 (0.037)	Loss 2.5101 (2.5320)	Entropy 1.07438 (1.07558)	Top-1 acc 63.281 (63.116)	Top-5 acc 83.594 (83.236)	lr 0.01173
Train [63][1130/3239]	Time 0.230 (0.620)	Data Time 0.002 (0.037)	Loss 2.6048 (2.5324)	Entropy 1.07436 (1.07557)	Top-1 acc 61.719 (63.114)	Top-5 acc 80.078 (83.233)	lr 0.01173
Train [63][1140/3239]	Time 0.227 (0.618)	Data Time 0.002 (0.036)	Loss 2.5419 (2.5327)	Entropy 1.07432 (1.07556)	Top-1 acc 60.547 (63.106)	Top-5 acc 82.422 (83.225)	lr 0.01173
Train [63][1150/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.036)	Loss 2.8250 (2.5326)	Entropy 1.07433 (1.07555)	Top-1 acc 58.203 (63.107)	Top-5 acc 75.391 (83.227)	lr 0.01173
Train [63][1160/3239]	Time 0.216 (0.616)	Data Time 0.001 (0.036)	Loss 2.6014 (2.5327)	Entropy 1.07430 (1.07554)	Top-1 acc 61.719 (63.102)	Top-5 acc 80.078 (83.227)	lr 0.01173
Train [63][1170/3239]	Time 0.225 (0.614)	Data Time 0.001 (0.036)	Loss 2.4201 (2.5325)	Entropy 1.07429 (1.07552)	Top-1 acc 69.922 (63.108)	Top-5 acc 85.547 (83.233)	lr 0.01173
Train [63][1180/3239]	Time 0.366 (0.613)	Data Time 0.001 (0.035)	Loss 2.5093 (2.5326)	Entropy 1.07426 (1.07551)	Top-1 acc 66.406 (63.106)	Top-5 acc 81.641 (83.228)	lr 0.01173
Train [63][1190/3239]	Time 0.235 (0.612)	Data Time 0.001 (0.035)	Loss 2.4507 (2.5325)	Entropy 1.07429 (1.07550)	Top-1 acc 64.453 (63.107)	Top-5 acc 81.641 (83.226)	lr 0.01173
Train [63][1200/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.035)	Loss 2.3979 (2.5331)	Entropy 1.07431 (1.07549)	Top-1 acc 67.578 (63.097)	Top-5 acc 85.547 (83.212)	lr 0.01172
Train [63][1210/3239]	Time 0.226 (0.610)	Data Time 0.001 (0.034)	Loss 2.5708 (2.5335)	Entropy 1.07428 (1.07548)	Top-1 acc 62.891 (63.087)	Top-5 acc 81.250 (83.207)	lr 0.01172
Train [63][1220/3239]	Time 2.452 (0.608)	Data Time 0.001 (0.034)	Loss 2.5907 (2.5337)	Entropy 1.07428 (1.07547)	Top-1 acc 58.203 (63.080)	Top-5 acc 82.812 (83.200)	lr 0.01172
Train [63][1230/3239]	Time 0.223 (0.605)	Data Time 0.001 (0.034)	Loss 2.5770 (2.5337)	Entropy 1.07420 (1.07546)	Top-1 acc 62.891 (63.081)	Top-5 acc 82.031 (83.193)	lr 0.01172
Train [63][1240/3239]	Time 0.230 (0.604)	Data Time 0.001 (0.034)	Loss 2.4706 (2.5340)	Entropy 1.07418 (1.07545)	Top-1 acc 65.234 (63.076)	Top-5 acc 83.203 (83.192)	lr 0.01172
Train [63][1250/3239]	Time 0.216 (0.603)	Data Time 0.001 (0.033)	Loss 2.7142 (2.5343)	Entropy 1.07413 (1.07544)	Top-1 acc 59.766 (63.075)	Top-5 acc 79.688 (83.188)	lr 0.01172
Train [63][1260/3239]	Time 0.228 (0.602)	Data Time 0.001 (0.033)	Loss 2.3101 (2.5342)	Entropy 1.07410 (1.07543)	Top-1 acc 65.625 (63.070)	Top-5 acc 88.672 (83.192)	lr 0.01172
Train [63][1270/3239]	Time 0.330 (0.644)	Data Time 0.002 (0.033)	Loss 2.4008 (2.5338)	Entropy 1.07409 (1.07542)	Top-1 acc 66.016 (63.078)	Top-5 acc 84.766 (83.194)	lr 0.01172
Train [63][1280/3239]	Time 0.248 (0.643)	Data Time 0.002 (0.033)	Loss 2.6457 (2.5344)	Entropy 1.07403 (1.07541)	Top-1 acc 60.938 (63.060)	Top-5 acc 79.297 (83.185)	lr 0.01172
Train [63][1290/3239]	Time 0.236 (0.642)	Data Time 0.001 (0.032)	Loss 2.6662 (2.5343)	Entropy 1.07404 (1.07540)	Top-1 acc 57.031 (63.061)	Top-5 acc 79.688 (83.189)	lr 0.01172
Train [63][1300/3239]	Time 0.226 (0.640)	Data Time 0.001 (0.032)	Loss 2.4131 (2.5342)	Entropy 1.07404 (1.07539)	Top-1 acc 66.016 (63.070)	Top-5 acc 85.156 (83.190)	lr 0.01171
Train [63][1310/3239]	Time 0.231 (0.639)	Data Time 0.001 (0.032)	Loss 2.5282 (2.5343)	Entropy 1.07403 (1.07538)	Top-1 acc 64.844 (63.072)	Top-5 acc 82.812 (83.184)	lr 0.01171
Train [63][1320/3239]	Time 0.322 (0.637)	Data Time 0.001 (0.032)	Loss 2.5726 (2.5341)	Entropy 1.07399 (1.07537)	Top-1 acc 60.547 (63.072)	Top-5 acc 78.516 (83.187)	lr 0.01171
Train [63][1330/3239]	Time 2.475 (0.636)	Data Time 0.002 (0.032)	Loss 2.5478 (2.5342)	Entropy 1.07399 (1.07536)	Top-1 acc 65.234 (63.073)	Top-5 acc 82.812 (83.187)	lr 0.01171
Train [63][1340/3239]	Time 0.251 (0.633)	Data Time 0.001 (0.031)	Loss 2.4752 (2.5344)	Entropy 1.07392 (1.07535)	Top-1 acc 66.016 (63.065)	Top-5 acc 83.203 (83.185)	lr 0.01171
Train [63][1350/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.031)	Loss 2.4576 (2.5341)	Entropy 1.07391 (1.07534)	Top-1 acc 62.500 (63.074)	Top-5 acc 85.547 (83.189)	lr 0.01171
Train [63][1360/3239]	Time 0.241 (0.631)	Data Time 0.001 (0.031)	Loss 2.5777 (2.5341)	Entropy 1.07389 (1.07533)	Top-1 acc 62.500 (63.076)	Top-5 acc 81.250 (83.188)	lr 0.01171
Train [63][1370/3239]	Time 0.224 (0.630)	Data Time 0.001 (0.031)	Loss 2.5255 (2.5340)	Entropy 1.07389 (1.07532)	Top-1 acc 64.844 (63.084)	Top-5 acc 83.984 (83.192)	lr 0.01171
Train [63][1380/3239]	Time 0.239 (0.629)	Data Time 0.001 (0.030)	Loss 2.5577 (2.5338)	Entropy 1.07389 (1.07531)	Top-1 acc 62.891 (63.087)	Top-5 acc 82.812 (83.197)	lr 0.01171
Train [63][1390/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.030)	Loss 2.5543 (2.5340)	Entropy 1.07385 (1.07530)	Top-1 acc 62.109 (63.078)	Top-5 acc 83.984 (83.192)	lr 0.01171
Train [63][1400/3239]	Time 0.208 (0.626)	Data Time 0.001 (0.030)	Loss 2.7033 (2.5342)	Entropy 1.07377 (1.07529)	Top-1 acc 56.641 (63.075)	Top-5 acc 79.688 (83.188)	lr 0.01170
Train [63][1410/3239]	Time 0.356 (0.625)	Data Time 0.001 (0.030)	Loss 2.5447 (2.5342)	Entropy 1.07373 (1.07527)	Top-1 acc 58.984 (63.072)	Top-5 acc 83.984 (83.187)	lr 0.01170
Train [63][1420/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.030)	Loss 2.4303 (2.5339)	Entropy 1.07366 (1.07526)	Top-1 acc 64.844 (63.084)	Top-5 acc 84.375 (83.187)	lr 0.01170
Train [63][1430/3239]	Time 0.288 (0.623)	Data Time 0.001 (0.029)	Loss 2.6735 (2.5340)	Entropy 1.07362 (1.07525)	Top-1 acc 59.375 (63.085)	Top-5 acc 81.641 (83.182)	lr 0.01170
Train [63][1440/3239]	Time 2.489 (0.622)	Data Time 0.001 (0.029)	Loss 2.4491 (2.5341)	Entropy 1.07362 (1.07524)	Top-1 acc 64.062 (63.082)	Top-5 acc 83.984 (83.180)	lr 0.01170
Train [63][1450/3239]	Time 0.267 (0.619)	Data Time 0.001 (0.029)	Loss 2.6450 (2.5345)	Entropy 1.07362 (1.07523)	Top-1 acc 59.766 (63.065)	Top-5 acc 81.641 (83.171)	lr 0.01170
Train [63][1460/3239]	Time 0.318 (0.618)	Data Time 0.001 (0.029)	Loss 2.4324 (2.5347)	Entropy 1.07360 (1.07522)	Top-1 acc 65.625 (63.061)	Top-5 acc 85.156 (83.168)	lr 0.01170
Train [63][1470/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.029)	Loss 2.6022 (2.5349)	Entropy 1.07357 (1.07521)	Top-1 acc 63.281 (63.054)	Top-5 acc 81.641 (83.169)	lr 0.01170
Train [63][1480/3239]	Time 0.222 (0.616)	Data Time 0.001 (0.029)	Loss 2.6682 (2.5354)	Entropy 1.07355 (1.07520)	Top-1 acc 57.031 (63.041)	Top-5 acc 81.250 (83.162)	lr 0.01170
Train [63][1490/3239]	Time 0.230 (0.615)	Data Time 0.001 (0.028)	Loss 2.5053 (2.5351)	Entropy 1.07352 (1.07519)	Top-1 acc 64.453 (63.053)	Top-5 acc 85.156 (83.167)	lr 0.01170
Train [63][1500/3239]	Time 0.237 (0.614)	Data Time 0.002 (0.028)	Loss 2.4204 (2.5351)	Entropy 1.07348 (1.07517)	Top-1 acc 68.359 (63.058)	Top-5 acc 82.812 (83.167)	lr 0.01169
Train [63][1510/3239]	Time 0.237 (0.613)	Data Time 0.001 (0.028)	Loss 2.3964 (2.5351)	Entropy 1.07350 (1.07516)	Top-1 acc 67.188 (63.055)	Top-5 acc 85.156 (83.174)	lr 0.01169
Train [63][1520/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.028)	Loss 2.7004 (2.5349)	Entropy 1.07352 (1.07515)	Top-1 acc 62.109 (63.063)	Top-5 acc 80.469 (83.175)	lr 0.01169
Train [63][1530/3239]	Time 0.223 (0.611)	Data Time 0.002 (0.028)	Loss 2.6859 (2.5350)	Entropy 1.07347 (1.07514)	Top-1 acc 58.594 (63.063)	Top-5 acc 78.906 (83.176)	lr 0.01169
Train [63][1540/3239]	Time 0.286 (0.610)	Data Time 0.001 (0.027)	Loss 2.6417 (2.5354)	Entropy 1.07347 (1.07513)	Top-1 acc 60.156 (63.051)	Top-5 acc 80.469 (83.170)	lr 0.01169
Train [63][1550/3239]	Time 2.634 (0.609)	Data Time 0.001 (0.027)	Loss 2.4857 (2.5355)	Entropy 1.07347 (1.07512)	Top-1 acc 62.891 (63.046)	Top-5 acc 83.984 (83.163)	lr 0.01169
Train [63][1560/3239]	Time 0.235 (0.607)	Data Time 0.001 (0.027)	Loss 2.5078 (2.5357)	Entropy 1.07344 (1.07511)	Top-1 acc 62.891 (63.045)	Top-5 acc 83.594 (83.158)	lr 0.01169
Train [63][1570/3239]	Time 0.230 (0.606)	Data Time 0.001 (0.027)	Loss 2.5137 (2.5356)	Entropy 1.07340 (1.07510)	Top-1 acc 65.234 (63.050)	Top-5 acc 84.375 (83.158)	lr 0.01169
Train [63][1580/3239]	Time 0.226 (0.605)	Data Time 0.001 (0.027)	Loss 2.4749 (2.5357)	Entropy 1.07342 (1.07509)	Top-1 acc 62.500 (63.047)	Top-5 acc 84.375 (83.156)	lr 0.01169
Train [63][1590/3239]	Time 0.239 (0.604)	Data Time 0.001 (0.027)	Loss 2.4132 (2.5350)	Entropy 1.07332 (1.07508)	Top-1 acc 65.625 (63.064)	Top-5 acc 83.984 (83.166)	lr 0.01169
Train [63][1600/3239]	Time 0.213 (0.604)	Data Time 0.001 (0.026)	Loss 2.4635 (2.5352)	Entropy 1.07329 (1.07507)	Top-1 acc 64.844 (63.064)	Top-5 acc 86.328 (83.163)	lr 0.01168
Train [63][1610/3239]	Time 0.257 (0.603)	Data Time 0.001 (0.026)	Loss 2.3083 (2.5353)	Entropy 1.07324 (1.07505)	Top-1 acc 71.094 (63.060)	Top-5 acc 86.328 (83.158)	lr 0.01168
Train [63][1620/3239]	Time 0.247 (0.602)	Data Time 0.001 (0.026)	Loss 2.5834 (2.5352)	Entropy 1.07325 (1.07504)	Top-1 acc 62.500 (63.062)	Top-5 acc 82.031 (83.160)	lr 0.01168
Train [63][1630/3239]	Time 0.371 (0.632)	Data Time 0.003 (0.026)	Loss 2.4504 (2.5351)	Entropy 1.07321 (1.07503)	Top-1 acc 64.844 (63.067)	Top-5 acc 83.984 (83.161)	lr 0.01168
Train [63][1640/3239]	Time 0.234 (0.632)	Data Time 0.002 (0.026)	Loss 3.7252 (2.5360)	Entropy 1.07312 (1.07502)	Top-1 acc 41.797 (63.052)	Top-5 acc 68.750 (83.148)	lr 0.01168
Train [63][1650/3239]	Time 0.241 (0.631)	Data Time 0.001 (0.026)	Loss 2.5778 (2.5362)	Entropy 1.07308 (1.07501)	Top-1 acc 59.766 (63.046)	Top-5 acc 82.422 (83.144)	lr 0.01168
Train [63][1660/3239]	Time 2.507 (0.630)	Data Time 0.002 (0.026)	Loss 2.4421 (2.5359)	Entropy 1.07308 (1.07500)	Top-1 acc 65.234 (63.057)	Top-5 acc 86.719 (83.147)	lr 0.01168
Train [63][1670/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.025)	Loss 2.6020 (2.5363)	Entropy 1.07303 (1.07499)	Top-1 acc 62.500 (63.051)	Top-5 acc 80.469 (83.136)	lr 0.01168
Train [63][1680/3239]	Time 0.222 (0.626)	Data Time 0.001 (0.025)	Loss 2.5831 (2.5366)	Entropy 1.07296 (1.07497)	Top-1 acc 62.109 (63.042)	Top-5 acc 80.859 (83.122)	lr 0.01168
Train [63][1690/3239]	Time 0.252 (0.625)	Data Time 0.002 (0.025)	Loss 2.3476 (2.5364)	Entropy 1.07295 (1.07496)	Top-1 acc 67.578 (63.047)	Top-5 acc 88.281 (83.129)	lr 0.01168
Train [63][1700/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.025)	Loss 2.4254 (2.5363)	Entropy 1.07292 (1.07495)	Top-1 acc 65.234 (63.050)	Top-5 acc 85.938 (83.128)	lr 0.01167
Train [63][1710/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.025)	Loss 2.3000 (2.5362)	Entropy 1.07290 (1.07494)	Top-1 acc 70.312 (63.056)	Top-5 acc 87.500 (83.128)	lr 0.01167
Train [63][1720/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.025)	Loss 2.3998 (2.5363)	Entropy 1.07292 (1.07493)	Top-1 acc 66.797 (63.048)	Top-5 acc 85.938 (83.132)	lr 0.01167
Train [63][1730/3239]	Time 0.250 (0.622)	Data Time 0.001 (0.025)	Loss 2.6501 (2.5363)	Entropy 1.07291 (1.07491)	Top-1 acc 57.812 (63.047)	Top-5 acc 80.469 (83.132)	lr 0.01167
Train [63][1740/3239]	Time 0.222 (0.621)	Data Time 0.001 (0.024)	Loss 2.3671 (2.5362)	Entropy 1.07292 (1.07490)	Top-1 acc 65.625 (63.055)	Top-5 acc 87.500 (83.134)	lr 0.01167
Train [63][1750/3239]	Time 0.236 (0.620)	Data Time 0.001 (0.024)	Loss 2.5690 (2.5360)	Entropy 1.07287 (1.07489)	Top-1 acc 61.719 (63.056)	Top-5 acc 80.859 (83.137)	lr 0.01167
Train [63][1760/3239]	Time 0.257 (0.619)	Data Time 0.001 (0.024)	Loss 2.5391 (2.5360)	Entropy 1.07282 (1.07488)	Top-1 acc 62.500 (63.053)	Top-5 acc 82.031 (83.136)	lr 0.01167
Train [63][1770/3239]	Time 2.480 (0.618)	Data Time 0.001 (0.024)	Loss 2.6361 (2.5362)	Entropy 1.07282 (1.07487)	Top-1 acc 60.156 (63.041)	Top-5 acc 80.859 (83.133)	lr 0.01167
Train [63][1780/3239]	Time 0.289 (0.616)	Data Time 0.001 (0.024)	Loss 2.4855 (2.5363)	Entropy 1.07277 (1.07486)	Top-1 acc 64.062 (63.041)	Top-5 acc 85.938 (83.136)	lr 0.01167
Train [63][1790/3239]	Time 0.220 (0.615)	Data Time 0.001 (0.024)	Loss 2.5198 (2.5362)	Entropy 1.07275 (1.07484)	Top-1 acc 63.281 (63.041)	Top-5 acc 84.766 (83.140)	lr 0.01167
Train [63][1800/3239]	Time 0.230 (0.614)	Data Time 0.001 (0.024)	Loss 2.5903 (2.5362)	Entropy 1.07271 (1.07483)	Top-1 acc 62.500 (63.038)	Top-5 acc 82.031 (83.143)	lr 0.01166
Train [63][1810/3239]	Time 0.260 (0.614)	Data Time 0.001 (0.024)	Loss 2.5303 (2.5364)	Entropy 1.07267 (1.07482)	Top-1 acc 64.844 (63.034)	Top-5 acc 84.766 (83.141)	lr 0.01166
Train [63][1820/3239]	Time 0.334 (0.613)	Data Time 0.001 (0.023)	Loss 2.4931 (2.5366)	Entropy 1.07265 (1.07481)	Top-1 acc 66.016 (63.028)	Top-5 acc 83.203 (83.136)	lr 0.01166
Train [63][1830/3239]	Time 0.224 (0.612)	Data Time 0.001 (0.023)	Loss 2.5202 (2.5364)	Entropy 1.07258 (1.07480)	Top-1 acc 60.547 (63.030)	Top-5 acc 82.422 (83.140)	lr 0.01166
Train [63][1840/3239]	Time 0.262 (0.611)	Data Time 0.001 (0.023)	Loss 2.5052 (2.5368)	Entropy 1.07252 (1.07479)	Top-1 acc 62.500 (63.027)	Top-5 acc 82.812 (83.136)	lr 0.01166
Train [63][1850/3239]	Time 0.221 (0.610)	Data Time 0.001 (0.023)	Loss 2.3911 (2.5369)	Entropy 1.07253 (1.07477)	Top-1 acc 68.359 (63.030)	Top-5 acc 84.766 (83.135)	lr 0.01166
Train [63][1860/3239]	Time 0.223 (0.610)	Data Time 0.001 (0.023)	Loss 2.6245 (2.5366)	Entropy 1.07251 (1.07476)	Top-1 acc 59.766 (63.033)	Top-5 acc 80.469 (83.142)	lr 0.01166
Train [63][1870/3239]	Time 0.260 (0.609)	Data Time 0.001 (0.023)	Loss 2.4977 (2.5368)	Entropy 1.07251 (1.07475)	Top-1 acc 65.625 (63.028)	Top-5 acc 83.203 (83.139)	lr 0.01166
Train [63][1880/3239]	Time 2.464 (0.608)	Data Time 0.001 (0.023)	Loss 2.5402 (2.5366)	Entropy 1.07251 (1.07474)	Top-1 acc 63.281 (63.028)	Top-5 acc 82.812 (83.147)	lr 0.01166
Train [63][1890/3239]	Time 0.240 (0.606)	Data Time 0.002 (0.023)	Loss 2.4939 (2.5366)	Entropy 1.07250 (1.07473)	Top-1 acc 63.672 (63.034)	Top-5 acc 82.812 (83.147)	lr 0.01166
Train [63][1900/3239]	Time 0.233 (0.605)	Data Time 0.001 (0.023)	Loss 2.4820 (2.5366)	Entropy 1.07242 (1.07471)	Top-1 acc 61.719 (63.030)	Top-5 acc 83.203 (83.147)	lr 0.01165
Train [63][1910/3239]	Time 0.402 (0.605)	Data Time 0.001 (0.022)	Loss 2.5406 (2.5368)	Entropy 1.07239 (1.07470)	Top-1 acc 59.766 (63.021)	Top-5 acc 82.422 (83.151)	lr 0.01165
Train [63][1920/3239]	Time 0.227 (0.604)	Data Time 0.001 (0.022)	Loss 2.6299 (2.5367)	Entropy 1.07239 (1.07469)	Top-1 acc 57.812 (63.022)	Top-5 acc 82.031 (83.154)	lr 0.01165
Train [63][1930/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.022)	Loss 2.7798 (2.5368)	Entropy 1.07228 (1.07468)	Top-1 acc 59.375 (63.023)	Top-5 acc 78.125 (83.148)	lr 0.01165
Train [63][1940/3239]	Time 0.223 (0.603)	Data Time 0.001 (0.022)	Loss 2.4229 (2.5367)	Entropy 1.07224 (1.07466)	Top-1 acc 60.547 (63.026)	Top-5 acc 85.547 (83.149)	lr 0.01165
Train [63][1950/3239]	Time 0.245 (0.602)	Data Time 0.001 (0.022)	Loss 2.6763 (2.5367)	Entropy 1.07228 (1.07465)	Top-1 acc 60.156 (63.026)	Top-5 acc 82.031 (83.146)	lr 0.01165
Train [63][1960/3239]	Time 0.304 (0.601)	Data Time 0.001 (0.022)	Loss 2.5153 (2.5368)	Entropy 1.07228 (1.07464)	Top-1 acc 60.938 (63.022)	Top-5 acc 82.031 (83.141)	lr 0.01165
Train [63][1970/3239]	Time 0.228 (0.601)	Data Time 0.002 (0.022)	Loss 2.6670 (2.5368)	Entropy 1.07226 (1.07463)	Top-1 acc 58.203 (63.020)	Top-5 acc 78.906 (83.138)	lr 0.01165
Train [63][1980/3239]	Time 0.232 (0.600)	Data Time 0.001 (0.022)	Loss 2.6028 (2.5369)	Entropy 1.07226 (1.07462)	Top-1 acc 60.547 (63.014)	Top-5 acc 81.641 (83.138)	lr 0.01165
Train [63][1990/3239]	Time 56.730 (0.626)	Data Time 0.001 (0.022)	Loss 2.5932 (2.5370)	Entropy 1.07226 (1.07460)	Top-1 acc 64.453 (63.011)	Top-5 acc 84.766 (83.143)	lr 0.01165
Train [63][2000/3239]	Time 0.228 (0.625)	Data Time 0.002 (0.021)	Loss 2.5006 (2.5371)	Entropy 1.07223 (1.07459)	Top-1 acc 64.453 (63.010)	Top-5 acc 83.594 (83.139)	lr 0.01164
Train [63][2010/3239]	Time 0.251 (0.624)	Data Time 0.002 (0.021)	Loss 2.9473 (2.5373)	Entropy 1.07222 (1.07458)	Top-1 acc 54.297 (62.998)	Top-5 acc 75.000 (83.137)	lr 0.01164
Train [63][2020/3239]	Time 0.252 (0.623)	Data Time 0.001 (0.021)	Loss 2.5985 (2.5373)	Entropy 1.07223 (1.07457)	Top-1 acc 62.891 (63.005)	Top-5 acc 83.203 (83.136)	lr 0.01164
Train [63][2030/3239]	Time 0.233 (0.622)	Data Time 0.002 (0.021)	Loss 2.5726 (2.5374)	Entropy 1.07222 (1.07456)	Top-1 acc 63.672 (62.996)	Top-5 acc 83.594 (83.139)	lr 0.01164
Train [63][2040/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.021)	Loss 2.5278 (2.5375)	Entropy 1.07214 (1.07455)	Top-1 acc 65.234 (62.994)	Top-5 acc 83.594 (83.135)	lr 0.01164
Train [63][2050/3239]	Time 0.245 (0.621)	Data Time 0.002 (0.021)	Loss 2.4414 (2.5373)	Entropy 1.07214 (1.07453)	Top-1 acc 66.797 (62.993)	Top-5 acc 84.766 (83.143)	lr 0.01164
Train [63][2060/3239]	Time 0.240 (0.620)	Data Time 0.001 (0.021)	Loss 2.5768 (2.5372)	Entropy 1.07215 (1.07452)	Top-1 acc 66.016 (62.999)	Top-5 acc 81.641 (83.142)	lr 0.01164
Train [63][2070/3239]	Time 0.235 (0.620)	Data Time 0.001 (0.021)	Loss 2.6171 (2.5372)	Entropy 1.07219 (1.07451)	Top-1 acc 60.938 (63.002)	Top-5 acc 82.422 (83.145)	lr 0.01164
Train [63][2080/3239]	Time 0.216 (0.619)	Data Time 0.002 (0.021)	Loss 2.5225 (2.5372)	Entropy 1.07217 (1.07450)	Top-1 acc 66.016 (63.001)	Top-5 acc 86.328 (83.148)	lr 0.01164
Train [63][2090/3239]	Time 0.217 (0.618)	Data Time 0.001 (0.021)	Loss 2.6077 (2.5371)	Entropy 1.07220 (1.07449)	Top-1 acc 61.328 (62.998)	Top-5 acc 83.203 (83.149)	lr 0.01164
Train [63][2100/3239]	Time 2.504 (0.617)	Data Time 0.001 (0.021)	Loss 2.4032 (2.5369)	Entropy 1.07220 (1.07448)	Top-1 acc 66.406 (63.003)	Top-5 acc 87.109 (83.153)	lr 0.01163
Train [63][2110/3239]	Time 0.280 (0.616)	Data Time 0.001 (0.020)	Loss 2.3619 (2.5368)	Entropy 1.07216 (1.07447)	Top-1 acc 68.359 (63.003)	Top-5 acc 86.328 (83.153)	lr 0.01163
Train [63][2120/3239]	Time 0.221 (0.615)	Data Time 0.001 (0.020)	Loss 2.4537 (2.5366)	Entropy 1.07206 (1.07446)	Top-1 acc 65.234 (63.006)	Top-5 acc 84.766 (83.155)	lr 0.01163
Train [63][2130/3239]	Time 0.219 (0.614)	Data Time 0.001 (0.020)	Loss 2.5717 (2.5369)	Entropy 1.07182 (1.07444)	Top-1 acc 61.719 (62.998)	Top-5 acc 83.594 (83.149)	lr 0.01163
Train [63][2140/3239]	Time 0.202 (0.614)	Data Time 0.002 (0.020)	Loss 2.5088 (2.5367)	Entropy 1.07177 (1.07443)	Top-1 acc 59.766 (62.999)	Top-5 acc 84.766 (83.154)	lr 0.01163
Train [63][2150/3239]	Time 0.340 (0.613)	Data Time 0.001 (0.020)	Loss 2.3405 (2.5367)	Entropy 1.07171 (1.07442)	Top-1 acc 65.625 (62.995)	Top-5 acc 88.281 (83.154)	lr 0.01163
Train [63][2160/3239]	Time 0.228 (0.612)	Data Time 0.001 (0.020)	Loss 2.6327 (2.5367)	Entropy 1.07165 (1.07441)	Top-1 acc 59.766 (62.995)	Top-5 acc 80.078 (83.150)	lr 0.01163
Train [63][2170/3239]	Time 0.230 (0.612)	Data Time 0.001 (0.020)	Loss 2.5506 (2.5368)	Entropy 1.07165 (1.07439)	Top-1 acc 60.547 (62.994)	Top-5 acc 83.594 (83.147)	lr 0.01163
Train [63][2180/3239]	Time 0.178 (0.611)	Data Time 0.001 (0.020)	Loss 2.4721 (2.5369)	Entropy 1.07166 (1.07438)	Top-1 acc 60.156 (62.990)	Top-5 acc 85.938 (83.149)	lr 0.01163
Train [63][2190/3239]	Time 0.204 (0.610)	Data Time 0.001 (0.020)	Loss 2.5243 (2.5368)	Entropy 1.07161 (1.07437)	Top-1 acc 66.797 (62.997)	Top-5 acc 82.422 (83.147)	lr 0.01163
Train [63][2200/3239]	Time 0.249 (0.610)	Data Time 0.001 (0.020)	Loss 2.5182 (2.5368)	Entropy 1.07153 (1.07436)	Top-1 acc 61.719 (62.991)	Top-5 acc 84.766 (83.148)	lr 0.01162
Train [63][2210/3239]	Time 2.475 (0.609)	Data Time 0.001 (0.020)	Loss 2.5210 (2.5368)	Entropy 1.07153 (1.07434)	Top-1 acc 62.891 (62.988)	Top-5 acc 83.984 (83.148)	lr 0.01162
Train [63][2220/3239]	Time 0.233 (0.607)	Data Time 0.001 (0.020)	Loss 2.7188 (2.5368)	Entropy 1.07156 (1.07433)	Top-1 acc 60.547 (62.990)	Top-5 acc 79.297 (83.149)	lr 0.01162
Train [63][2230/3239]	Time 0.240 (0.607)	Data Time 0.001 (0.019)	Loss 2.3331 (2.5367)	Entropy 1.07154 (1.07432)	Top-1 acc 70.312 (62.996)	Top-5 acc 87.109 (83.151)	lr 0.01162
Train [63][2240/3239]	Time 0.225 (0.606)	Data Time 0.001 (0.019)	Loss 2.3855 (2.5365)	Entropy 1.07155 (1.07431)	Top-1 acc 68.750 (62.997)	Top-5 acc 85.938 (83.158)	lr 0.01162
Train [63][2250/3239]	Time 0.268 (0.606)	Data Time 0.001 (0.019)	Loss 2.6098 (2.5367)	Entropy 1.07149 (1.07429)	Top-1 acc 62.891 (62.992)	Top-5 acc 78.906 (83.156)	lr 0.01162
Train [63][2260/3239]	Time 0.167 (0.605)	Data Time 0.001 (0.019)	Loss 2.4322 (2.5366)	Entropy 1.07149 (1.07428)	Top-1 acc 64.062 (62.993)	Top-5 acc 85.938 (83.156)	lr 0.01162
Train [63][2270/3239]	Time 0.256 (0.604)	Data Time 0.002 (0.019)	Loss 2.4547 (2.5367)	Entropy 1.07151 (1.07427)	Top-1 acc 64.844 (62.992)	Top-5 acc 81.641 (83.153)	lr 0.01162
Train [63][2280/3239]	Time 0.243 (0.604)	Data Time 0.001 (0.019)	Loss 2.5624 (2.5367)	Entropy 1.07149 (1.07426)	Top-1 acc 64.453 (62.991)	Top-5 acc 82.812 (83.150)	lr 0.01162
Train [63][2290/3239]	Time 0.349 (0.603)	Data Time 0.001 (0.019)	Loss 2.6627 (2.5365)	Entropy 1.07149 (1.07424)	Top-1 acc 58.203 (62.996)	Top-5 acc 80.469 (83.155)	lr 0.01161
Train [63][2300/3239]	Time 0.228 (0.603)	Data Time 0.001 (0.019)	Loss 2.7142 (2.5366)	Entropy 1.07138 (1.07423)	Top-1 acc 60.938 (62.991)	Top-5 acc 78.516 (83.151)	lr 0.01161
Train [63][2310/3239]	Time 0.234 (0.602)	Data Time 0.001 (0.019)	Loss 2.5560 (2.5364)	Entropy 1.07134 (1.07422)	Top-1 acc 64.062 (62.996)	Top-5 acc 81.250 (83.152)	lr 0.01161
Train [63][2320/3239]	Time 2.488 (0.602)	Data Time 0.001 (0.019)	Loss 2.6712 (2.5365)	Entropy 1.07134 (1.07421)	Top-1 acc 61.719 (62.996)	Top-5 acc 80.469 (83.153)	lr 0.01161
Train [63][2330/3239]	Time 0.232 (0.600)	Data Time 0.001 (0.019)	Loss 2.5311 (2.5363)	Entropy 1.07130 (1.07419)	Top-1 acc 62.891 (62.995)	Top-5 acc 82.031 (83.155)	lr 0.01161
Train [63][2340/3239]	Time 0.224 (0.600)	Data Time 0.001 (0.019)	Loss 2.4542 (2.5361)	Entropy 1.07131 (1.07418)	Top-1 acc 66.016 (63.004)	Top-5 acc 83.594 (83.159)	lr 0.01161
Train [63][2350/3239]	Time 0.224 (0.599)	Data Time 0.001 (0.019)	Loss 2.4955 (2.5361)	Entropy 1.07120 (1.07417)	Top-1 acc 63.281 (63.002)	Top-5 acc 84.766 (83.162)	lr 0.01161
Train [63][2360/3239]	Time 0.254 (0.621)	Data Time 0.002 (0.019)	Loss 2.4283 (2.5359)	Entropy 1.07110 (1.07416)	Top-1 acc 64.453 (63.007)	Top-5 acc 83.984 (83.168)	lr 0.01161
Train [63][2370/3239]	Time 0.227 (0.620)	Data Time 0.002 (0.018)	Loss 2.5992 (2.5358)	Entropy 1.07096 (1.07414)	Top-1 acc 61.328 (63.009)	Top-5 acc 80.859 (83.169)	lr 0.01161
Train [63][2380/3239]	Time 0.393 (0.620)	Data Time 0.002 (0.018)	Loss 2.5117 (2.5356)	Entropy 1.07096 (1.07413)	Top-1 acc 64.453 (63.012)	Top-5 acc 82.031 (83.170)	lr 0.01161
Train [63][2390/3239]	Time 0.242 (0.619)	Data Time 0.001 (0.018)	Loss 2.7226 (2.5357)	Entropy 1.07097 (1.07412)	Top-1 acc 58.203 (63.013)	Top-5 acc 82.812 (83.171)	lr 0.01160
Train [63][2400/3239]	Time 0.224 (0.619)	Data Time 0.001 (0.018)	Loss 2.5987 (2.5356)	Entropy 1.07093 (1.07410)	Top-1 acc 61.719 (63.016)	Top-5 acc 81.250 (83.169)	lr 0.01160
Train [63][2410/3239]	Time 0.246 (0.618)	Data Time 0.006 (0.018)	Loss 2.5018 (2.5355)	Entropy 1.07089 (1.07409)	Top-1 acc 61.328 (63.018)	Top-5 acc 84.766 (83.171)	lr 0.01160
Train [63][2420/3239]	Time 0.236 (0.618)	Data Time 0.001 (0.018)	Loss 2.4731 (2.5353)	Entropy 1.07063 (1.07408)	Top-1 acc 60.938 (63.018)	Top-5 acc 88.672 (83.177)	lr 0.01160
Train [63][2430/3239]	Time 2.572 (0.617)	Data Time 0.001 (0.018)	Loss 2.5066 (2.5351)	Entropy 1.07063 (1.07406)	Top-1 acc 63.672 (63.024)	Top-5 acc 84.766 (83.183)	lr 0.01160
Train [63][2440/3239]	Time 0.224 (0.615)	Data Time 0.001 (0.018)	Loss 2.5342 (2.5352)	Entropy 1.07055 (1.07405)	Top-1 acc 67.578 (63.025)	Top-5 acc 82.422 (83.182)	lr 0.01160
Train [63][2450/3239]	Time 0.244 (0.615)	Data Time 0.001 (0.018)	Loss 2.5951 (2.5352)	Entropy 1.07051 (1.07403)	Top-1 acc 61.719 (63.025)	Top-5 acc 82.812 (83.182)	lr 0.01160
Train [63][2460/3239]	Time 0.242 (0.614)	Data Time 0.002 (0.018)	Loss 2.4553 (2.5351)	Entropy 1.07042 (1.07402)	Top-1 acc 66.016 (63.025)	Top-5 acc 84.375 (83.183)	lr 0.01160
Train [63][2470/3239]	Time 0.336 (0.614)	Data Time 0.001 (0.018)	Loss 2.5897 (2.5352)	Entropy 1.07034 (1.07400)	Top-1 acc 61.328 (63.026)	Top-5 acc 80.859 (83.182)	lr 0.01160
Train [63][2480/3239]	Time 0.240 (0.613)	Data Time 0.002 (0.018)	Loss 2.4090 (2.5353)	Entropy 1.07032 (1.07399)	Top-1 acc 65.234 (63.024)	Top-5 acc 85.156 (83.179)	lr 0.01160
Train [63][2490/3239]	Time 0.225 (0.613)	Data Time 0.001 (0.018)	Loss 2.5361 (2.5353)	Entropy 1.07033 (1.07398)	Top-1 acc 62.109 (63.021)	Top-5 acc 84.766 (83.179)	lr 0.01159
Train [63][2500/3239]	Time 0.223 (0.612)	Data Time 0.001 (0.018)	Loss 2.5346 (2.5352)	Entropy 1.07030 (1.07396)	Top-1 acc 63.281 (63.020)	Top-5 acc 84.375 (83.183)	lr 0.01159
Train [63][2510/3239]	Time 0.250 (0.611)	Data Time 0.001 (0.018)	Loss 2.5609 (2.5352)	Entropy 1.07023 (1.07395)	Top-1 acc 62.891 (63.021)	Top-5 acc 85.156 (83.184)	lr 0.01159
Train [63][2520/3239]	Time 0.346 (0.611)	Data Time 0.001 (0.017)	Loss 2.5495 (2.5351)	Entropy 1.07027 (1.07393)	Top-1 acc 64.062 (63.027)	Top-5 acc 82.031 (83.185)	lr 0.01159
Train [63][2530/3239]	Time 0.268 (0.610)	Data Time 0.001 (0.017)	Loss 2.3614 (2.5350)	Entropy 1.07022 (1.07392)	Top-1 acc 68.750 (63.029)	Top-5 acc 87.891 (83.187)	lr 0.01159
Train [63][2540/3239]	Time 2.583 (0.610)	Data Time 0.001 (0.017)	Loss 2.5555 (2.5349)	Entropy 1.07022 (1.07390)	Top-1 acc 64.844 (63.031)	Top-5 acc 85.938 (83.191)	lr 0.01159
Train [63][2550/3239]	Time 0.268 (0.608)	Data Time 0.002 (0.017)	Loss 2.5047 (2.5349)	Entropy 1.07024 (1.07389)	Top-1 acc 61.719 (63.029)	Top-5 acc 85.938 (83.194)	lr 0.01159
Train [63][2560/3239]	Time 0.222 (0.608)	Data Time 0.001 (0.017)	Loss 2.7224 (2.5350)	Entropy 1.07025 (1.07387)	Top-1 acc 57.812 (63.028)	Top-5 acc 78.906 (83.192)	lr 0.01159
Train [63][2570/3239]	Time 0.215 (0.607)	Data Time 0.001 (0.017)	Loss 2.4875 (2.5349)	Entropy 1.07022 (1.07386)	Top-1 acc 66.406 (63.032)	Top-5 acc 84.375 (83.198)	lr 0.01159
Train [63][2580/3239]	Time 0.231 (0.607)	Data Time 0.001 (0.017)	Loss 2.5042 (2.5349)	Entropy 1.07024 (1.07385)	Top-1 acc 65.234 (63.037)	Top-5 acc 83.594 (83.194)	lr 0.01159
Train [63][2590/3239]	Time 0.222 (0.606)	Data Time 0.001 (0.017)	Loss 2.4115 (2.5347)	Entropy 1.07025 (1.07383)	Top-1 acc 66.797 (63.044)	Top-5 acc 83.203 (83.199)	lr 0.01158
Train [63][2600/3239]	Time 0.216 (0.606)	Data Time 0.001 (0.017)	Loss 2.5938 (2.5346)	Entropy 1.07019 (1.07382)	Top-1 acc 64.062 (63.044)	Top-5 acc 84.766 (83.203)	lr 0.01158
Train [63][2610/3239]	Time 0.221 (0.605)	Data Time 0.002 (0.017)	Loss 2.4217 (2.5346)	Entropy 1.07012 (1.07380)	Top-1 acc 65.234 (63.045)	Top-5 acc 84.766 (83.203)	lr 0.01158
Train [63][2620/3239]	Time 0.230 (0.605)	Data Time 0.001 (0.017)	Loss 2.4638 (2.5346)	Entropy 1.06985 (1.07379)	Top-1 acc 64.453 (63.045)	Top-5 acc 85.156 (83.204)	lr 0.01158
Train [63][2630/3239]	Time 0.235 (0.604)	Data Time 0.001 (0.017)	Loss 2.5118 (2.5346)	Entropy 1.06989 (1.07377)	Top-1 acc 64.062 (63.045)	Top-5 acc 84.766 (83.205)	lr 0.01158
Train [63][2640/3239]	Time 0.227 (0.604)	Data Time 0.001 (0.017)	Loss 2.5322 (2.5346)	Entropy 1.06988 (1.07376)	Top-1 acc 66.797 (63.044)	Top-5 acc 84.375 (83.205)	lr 0.01158
Train [63][2650/3239]	Time 0.232 (0.603)	Data Time 0.001 (0.017)	Loss 2.8239 (2.5346)	Entropy 1.06980 (1.07375)	Top-1 acc 57.031 (63.044)	Top-5 acc 76.953 (83.206)	lr 0.01158
Train [63][2660/3239]	Time 0.200 (0.603)	Data Time 0.001 (0.017)	Loss 2.5256 (2.5346)	Entropy 1.06977 (1.07373)	Top-1 acc 64.453 (63.043)	Top-5 acc 82.031 (83.205)	lr 0.01158
Train [63][2670/3239]	Time 0.256 (0.602)	Data Time 0.001 (0.017)	Loss 2.4578 (2.5347)	Entropy 1.06970 (1.07372)	Top-1 acc 64.453 (63.040)	Top-5 acc 83.984 (83.205)	lr 0.01158
Train [63][2680/3239]	Time 0.219 (0.602)	Data Time 0.001 (0.017)	Loss 2.4334 (2.5347)	Entropy 1.06972 (1.07370)	Top-1 acc 64.844 (63.042)	Top-5 acc 86.328 (83.207)	lr 0.01158
Train [63][2690/3239]	Time 0.223 (0.601)	Data Time 0.001 (0.016)	Loss 2.4955 (2.5346)	Entropy 1.06963 (1.07369)	Top-1 acc 63.672 (63.042)	Top-5 acc 83.594 (83.209)	lr 0.01157
Train [63][2700/3239]	Time 0.216 (0.601)	Data Time 0.002 (0.016)	Loss 2.4242 (2.5346)	Entropy 1.06959 (1.07367)	Top-1 acc 63.672 (63.043)	Top-5 acc 86.328 (83.209)	lr 0.01157
Train [63][2710/3239]	Time 0.335 (0.620)	Data Time 0.004 (0.016)	Loss 2.4765 (2.5346)	Entropy 1.06959 (1.07366)	Top-1 acc 64.062 (63.045)	Top-5 acc 83.203 (83.210)	lr 0.01157
Train [63][2720/3239]	Time 0.237 (0.620)	Data Time 0.002 (0.016)	Loss 2.4764 (2.5347)	Entropy 1.06961 (1.07364)	Top-1 acc 65.234 (63.042)	Top-5 acc 83.984 (83.207)	lr 0.01157
Train [63][2730/3239]	Time 0.221 (0.619)	Data Time 0.002 (0.016)	Loss 2.5476 (2.5347)	Entropy 1.06960 (1.07363)	Top-1 acc 60.156 (63.041)	Top-5 acc 82.031 (83.206)	lr 0.01157
Train [63][2740/3239]	Time 0.207 (0.619)	Data Time 0.001 (0.016)	Loss 2.6324 (2.5349)	Entropy 1.06953 (1.07361)	Top-1 acc 59.766 (63.035)	Top-5 acc 83.984 (83.204)	lr 0.01157
Train [63][2750/3239]	Time 0.349 (0.618)	Data Time 0.001 (0.016)	Loss 2.5188 (2.5348)	Entropy 1.06954 (1.07360)	Top-1 acc 64.844 (63.036)	Top-5 acc 81.641 (83.203)	lr 0.01157
Train [63][2760/3239]	Time 0.221 (0.618)	Data Time 0.001 (0.016)	Loss 2.5580 (2.5349)	Entropy 1.06956 (1.07358)	Top-1 acc 61.328 (63.030)	Top-5 acc 83.594 (83.203)	lr 0.01157
Train [63][2770/3239]	Time 0.215 (0.617)	Data Time 0.001 (0.016)	Loss 2.7088 (2.5352)	Entropy 1.06952 (1.07357)	Top-1 acc 56.250 (63.025)	Top-5 acc 80.859 (83.197)	lr 0.01157
Train [63][2780/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.016)	Loss 2.5010 (2.5351)	Entropy 1.06953 (1.07355)	Top-1 acc 65.234 (63.027)	Top-5 acc 83.594 (83.198)	lr 0.01157
Train [63][2790/3239]	Time 0.262 (0.616)	Data Time 0.002 (0.016)	Loss 2.6081 (2.5350)	Entropy 1.06932 (1.07354)	Top-1 acc 60.547 (63.031)	Top-5 acc 81.641 (83.197)	lr 0.01156
Train [63][2800/3239]	Time 0.237 (0.616)	Data Time 0.001 (0.016)	Loss 2.4877 (2.5351)	Entropy 1.06931 (1.07352)	Top-1 acc 62.891 (63.030)	Top-5 acc 85.547 (83.195)	lr 0.01156
Train [63][2810/3239]	Time 0.241 (0.615)	Data Time 0.001 (0.016)	Loss 2.5086 (2.5352)	Entropy 1.06926 (1.07351)	Top-1 acc 64.844 (63.028)	Top-5 acc 85.156 (83.194)	lr 0.01156
Train [63][2820/3239]	Time 0.270 (0.615)	Data Time 0.001 (0.016)	Loss 2.5295 (2.5353)	Entropy 1.06928 (1.07349)	Top-1 acc 62.891 (63.026)	Top-5 acc 83.984 (83.195)	lr 0.01156
Train [63][2830/3239]	Time 0.219 (0.615)	Data Time 0.001 (0.016)	Loss 2.6023 (2.5352)	Entropy 1.06923 (1.07348)	Top-1 acc 60.938 (63.027)	Top-5 acc 84.375 (83.200)	lr 0.01156
Train [63][2840/3239]	Time 0.292 (0.614)	Data Time 0.001 (0.016)	Loss 2.4025 (2.5349)	Entropy 1.06914 (1.07346)	Top-1 acc 67.188 (63.036)	Top-5 acc 86.328 (83.204)	lr 0.01156
Train [63][2850/3239]	Time 0.222 (0.613)	Data Time 0.002 (0.016)	Loss 2.5185 (2.5349)	Entropy 1.06911 (1.07345)	Top-1 acc 66.406 (63.038)	Top-5 acc 85.156 (83.204)	lr 0.01156
Train [63][2860/3239]	Time 0.244 (0.613)	Data Time 0.001 (0.016)	Loss 2.5077 (2.5347)	Entropy 1.06907 (1.07343)	Top-1 acc 64.062 (63.044)	Top-5 acc 80.859 (83.206)	lr 0.01156
Train [63][2870/3239]	Time 0.223 (0.612)	Data Time 0.001 (0.016)	Loss 2.5273 (2.5348)	Entropy 1.06907 (1.07342)	Top-1 acc 63.281 (63.041)	Top-5 acc 81.250 (83.205)	lr 0.01156
Train [63][2880/3239]	Time 0.239 (0.612)	Data Time 0.001 (0.015)	Loss 2.6482 (2.5349)	Entropy 1.06907 (1.07340)	Top-1 acc 62.109 (63.042)	Top-5 acc 80.859 (83.201)	lr 0.01156
Train [63][2890/3239]	Time 0.211 (0.611)	Data Time 0.001 (0.015)	Loss 2.4533 (2.5350)	Entropy 1.06907 (1.07339)	Top-1 acc 66.406 (63.037)	Top-5 acc 84.766 (83.199)	lr 0.01155
Train [63][2900/3239]	Time 0.224 (0.611)	Data Time 0.001 (0.015)	Loss 2.7525 (2.5352)	Entropy 1.06911 (1.07337)	Top-1 acc 60.938 (63.035)	Top-5 acc 78.906 (83.197)	lr 0.01155
Train [63][2910/3239]	Time 0.275 (0.610)	Data Time 0.001 (0.015)	Loss 2.5940 (2.5350)	Entropy 1.06907 (1.07336)	Top-1 acc 61.719 (63.038)	Top-5 acc 82.031 (83.199)	lr 0.01155
Train [63][2920/3239]	Time 0.218 (0.610)	Data Time 0.001 (0.015)	Loss 2.4921 (2.5350)	Entropy 1.06904 (1.07334)	Top-1 acc 66.406 (63.041)	Top-5 acc 83.984 (83.198)	lr 0.01155
Train [63][2930/3239]	Time 0.325 (0.610)	Data Time 0.001 (0.015)	Loss 2.3575 (2.5351)	Entropy 1.06901 (1.07333)	Top-1 acc 67.969 (63.041)	Top-5 acc 86.719 (83.197)	lr 0.01155
Train [63][2940/3239]	Time 0.223 (0.609)	Data Time 0.001 (0.015)	Loss 2.5179 (2.5350)	Entropy 1.06903 (1.07331)	Top-1 acc 62.891 (63.045)	Top-5 acc 83.203 (83.195)	lr 0.01155
Train [63][2950/3239]	Time 0.246 (0.609)	Data Time 0.001 (0.015)	Loss 2.6017 (2.5349)	Entropy 1.06900 (1.07330)	Top-1 acc 62.109 (63.044)	Top-5 acc 82.422 (83.197)	lr 0.01155
Train [63][2960/3239]	Time 0.230 (0.608)	Data Time 0.001 (0.015)	Loss 2.4975 (2.5349)	Entropy 1.06898 (1.07328)	Top-1 acc 62.500 (63.044)	Top-5 acc 85.938 (83.199)	lr 0.01155
Train [63][2970/3239]	Time 0.230 (0.608)	Data Time 0.001 (0.015)	Loss 2.4937 (2.5349)	Entropy 1.06898 (1.07327)	Top-1 acc 64.062 (63.043)	Top-5 acc 83.203 (83.199)	lr 0.01155
Train [63][2980/3239]	Time 0.213 (0.607)	Data Time 0.001 (0.015)	Loss 2.5582 (2.5349)	Entropy 1.06897 (1.07326)	Top-1 acc 62.500 (63.041)	Top-5 acc 83.984 (83.198)	lr 0.01155
Train [63][2990/3239]	Time 0.203 (0.607)	Data Time 0.001 (0.015)	Loss 2.7771 (2.5351)	Entropy 1.06895 (1.07324)	Top-1 acc 56.641 (63.034)	Top-5 acc 78.516 (83.195)	lr 0.01154
Train [63][3000/3239]	Time 0.284 (0.606)	Data Time 0.001 (0.015)	Loss 2.4458 (2.5351)	Entropy 1.06895 (1.07323)	Top-1 acc 65.234 (63.031)	Top-5 acc 82.812 (83.194)	lr 0.01154
Train [63][3010/3239]	Time 0.301 (0.606)	Data Time 0.001 (0.015)	Loss 2.6102 (2.5353)	Entropy 1.06893 (1.07321)	Top-1 acc 60.156 (63.026)	Top-5 acc 82.031 (83.190)	lr 0.01154
Train [63][3020/3239]	Time 0.299 (0.605)	Data Time 0.001 (0.015)	Loss 2.5086 (2.5354)	Entropy 1.06893 (1.07320)	Top-1 acc 64.062 (63.027)	Top-5 acc 84.375 (83.189)	lr 0.01154
Train [63][3030/3239]	Time 0.264 (0.605)	Data Time 0.001 (0.015)	Loss 2.6279 (2.5353)	Entropy 1.06888 (1.07318)	Top-1 acc 59.766 (63.028)	Top-5 acc 82.422 (83.189)	lr 0.01154
Train [63][3040/3239]	Time 0.294 (0.621)	Data Time 0.006 (0.015)	Loss 2.6092 (2.5354)	Entropy 1.06889 (1.07317)	Top-1 acc 57.812 (63.024)	Top-5 acc 80.078 (83.188)	lr 0.01154
Train [63][3050/3239]	Time 0.217 (0.621)	Data Time 0.002 (0.015)	Loss 2.7995 (2.5354)	Entropy 1.06885 (1.07316)	Top-1 acc 57.812 (63.022)	Top-5 acc 78.906 (83.188)	lr 0.01154
Train [63][3060/3239]	Time 0.223 (0.620)	Data Time 0.001 (0.015)	Loss 2.4332 (2.5352)	Entropy 1.06882 (1.07314)	Top-1 acc 66.406 (63.025)	Top-5 acc 83.203 (83.190)	lr 0.01154
Train [63][3070/3239]	Time 0.272 (0.620)	Data Time 0.002 (0.015)	Loss 2.5880 (2.5354)	Entropy 1.06879 (1.07313)	Top-1 acc 62.891 (63.020)	Top-5 acc 84.375 (83.188)	lr 0.01154
Train [63][3080/3239]	Time 0.244 (0.619)	Data Time 0.001 (0.015)	Loss 2.6061 (2.5355)	Entropy 1.06868 (1.07311)	Top-1 acc 64.453 (63.020)	Top-5 acc 80.469 (83.184)	lr 0.01154
Train [63][3090/3239]	Time 0.294 (0.619)	Data Time 0.001 (0.015)	Loss 2.4529 (2.5359)	Entropy 1.06863 (1.07310)	Top-1 acc 64.453 (63.013)	Top-5 acc 83.594 (83.178)	lr 0.01153
Train [63][3100/3239]	Time 0.233 (0.618)	Data Time 0.001 (0.015)	Loss 2.5566 (2.5359)	Entropy 1.06861 (1.07309)	Top-1 acc 60.156 (63.014)	Top-5 acc 82.031 (83.174)	lr 0.01153
Train [63][3110/3239]	Time 0.226 (0.618)	Data Time 0.001 (0.014)	Loss 2.4256 (2.5356)	Entropy 1.06853 (1.07307)	Top-1 acc 60.547 (63.018)	Top-5 acc 85.938 (83.180)	lr 0.01153
Train [63][3120/3239]	Time 0.240 (0.617)	Data Time 0.001 (0.014)	Loss 2.5372 (2.5357)	Entropy 1.06854 (1.07306)	Top-1 acc 59.375 (63.013)	Top-5 acc 83.203 (83.178)	lr 0.01153
Train [63][3130/3239]	Time 0.262 (0.617)	Data Time 0.001 (0.014)	Loss 2.5365 (2.5358)	Entropy 1.06842 (1.07304)	Top-1 acc 63.672 (63.013)	Top-5 acc 82.422 (83.177)	lr 0.01153
Train [63][3140/3239]	Time 0.245 (0.616)	Data Time 0.001 (0.014)	Loss 2.5936 (2.5360)	Entropy 1.06839 (1.07303)	Top-1 acc 60.547 (63.007)	Top-5 acc 83.984 (83.174)	lr 0.01153
Train [63][3150/3239]	Time 0.220 (0.616)	Data Time 0.001 (0.014)	Loss 2.6252 (2.5362)	Entropy 1.06835 (1.07301)	Top-1 acc 60.156 (63.002)	Top-5 acc 81.641 (83.171)	lr 0.01153
Train [63][3160/3239]	Time 0.320 (0.615)	Data Time 0.001 (0.014)	Loss 2.6412 (2.5363)	Entropy 1.06829 (1.07300)	Top-1 acc 60.547 (62.997)	Top-5 acc 79.297 (83.166)	lr 0.01153
Train [63][3170/3239]	Time 0.228 (0.615)	Data Time 0.001 (0.014)	Loss 2.7253 (2.5366)	Entropy 1.06824 (1.07298)	Top-1 acc 60.938 (62.990)	Top-5 acc 80.078 (83.161)	lr 0.01153
Train [63][3180/3239]	Time 0.206 (0.614)	Data Time 0.000 (0.014)	Loss 2.6196 (2.5367)	Entropy 1.06832 (1.07297)	Top-1 acc 60.547 (62.983)	Top-5 acc 78.906 (83.158)	lr 0.01153
Train [63][3190/3239]	Time 0.213 (0.614)	Data Time 0.000 (0.014)	Loss 2.8003 (2.5367)	Entropy 1.06828 (1.07295)	Top-1 acc 58.203 (62.984)	Top-5 acc 78.125 (83.157)	lr 0.01152
Train [63][3200/3239]	Time 0.218 (0.613)	Data Time 0.000 (0.014)	Loss 2.6899 (2.5368)	Entropy 1.06824 (1.07294)	Top-1 acc 59.766 (62.982)	Top-5 acc 80.859 (83.157)	lr 0.01152
Train [63][3210/3239]	Time 0.317 (0.613)	Data Time 0.000 (0.014)	Loss 2.4432 (2.5368)	Entropy 1.06821 (1.07292)	Top-1 acc 65.234 (62.980)	Top-5 acc 85.938 (83.158)	lr 0.01152
Train [63][3220/3239]	Time 0.251 (0.612)	Data Time 0.000 (0.014)	Loss 2.8226 (2.5368)	Entropy 1.06815 (1.07291)	Top-1 acc 56.641 (62.980)	Top-5 acc 77.734 (83.161)	lr 0.01152
Train [63][3230/3239]	Time 0.238 (0.612)	Data Time 0.000 (0.014)	Loss 2.6500 (2.5368)	Entropy 1.06816 (1.07289)	Top-1 acc 60.547 (62.982)	Top-5 acc 81.250 (83.161)	lr 0.01152
Train [63][3239/3239]	Time 2.330 (0.612)	Data Time 0.000 (0.014)	Loss 2.8315 (2.5368)	Entropy 1.06816 (1.07288)	Top-1 acc 60.494 (62.985)	Top-5 acc 74.074 (83.160)	lr 0.01152
==========Valid [63/120]	loss 1.425	top-1 acc 67.786 (67.786)	top-5 acc 87.059	Train top-1 62.985	top-5 83.160	Entropy 1.06816	Latency-None: 0.000ms	Flops: 546.53M
Train [64][0/3239]	Time 43.687 (43.687)	Data Time 40.092 (40.092)	Loss 2.4119 (2.4119)	Entropy 1.06814 (1.06814)	Top-1 acc 67.969 (67.969)	Top-5 acc 83.984 (83.984)	lr 0.01152
Train [64][10/3239]	Time 2.733 (4.484)	Data Time 0.002 (3.649)	Loss 2.4491 (2.4938)	Entropy 1.06814 (1.06814)	Top-1 acc 66.016 (64.382)	Top-5 acc 86.719 (83.949)	lr 0.01152
Train [64][20/3239]	Time 0.235 (2.463)	Data Time 0.001 (1.912)	Loss 2.5592 (2.5218)	Entropy 1.06816 (1.06815)	Top-1 acc 63.672 (63.635)	Top-5 acc 82.812 (83.650)	lr 0.01152
Train [64][30/3239]	Time 0.238 (1.819)	Data Time 0.001 (1.296)	Loss 2.4500 (2.5176)	Entropy 1.06809 (1.06813)	Top-1 acc 66.016 (63.836)	Top-5 acc 85.547 (83.846)	lr 0.01152
Train [64][40/3239]	Time 0.238 (1.498)	Data Time 0.001 (0.980)	Loss 2.7445 (2.5280)	Entropy 1.06803 (1.06811)	Top-1 acc 58.594 (63.567)	Top-5 acc 78.125 (83.508)	lr 0.01152
Train [64][50/3239]	Time 0.212 (1.296)	Data Time 0.002 (0.788)	Loss 2.4191 (2.5238)	Entropy 1.06796 (1.06808)	Top-1 acc 61.719 (63.534)	Top-5 acc 85.938 (83.601)	lr 0.01151
Train [64][60/3239]	Time 0.228 (1.161)	Data Time 0.001 (0.659)	Loss 2.5033 (2.5228)	Entropy 1.06791 (1.06806)	Top-1 acc 61.328 (63.627)	Top-5 acc 85.156 (83.626)	lr 0.01151
Train [64][70/3239]	Time 0.336 (1.065)	Data Time 0.001 (0.567)	Loss 2.5729 (2.5218)	Entropy 1.06790 (1.06804)	Top-1 acc 64.453 (63.655)	Top-5 acc 80.859 (83.588)	lr 0.01151
Train [64][80/3239]	Time 0.220 (0.993)	Data Time 0.001 (0.497)	Loss 2.4822 (2.5162)	Entropy 1.06780 (1.06802)	Top-1 acc 61.328 (63.720)	Top-5 acc 84.375 (83.719)	lr 0.01151
Train [64][90/3239]	Time 0.233 (0.936)	Data Time 0.001 (0.442)	Loss 2.4672 (2.5163)	Entropy 1.06798 (1.06800)	Top-1 acc 63.672 (63.663)	Top-5 acc 86.328 (83.705)	lr 0.01151
Train [64][100/3239]	Time 0.212 (0.891)	Data Time 0.001 (0.399)	Loss 2.4848 (2.5184)	Entropy 1.06795 (1.06800)	Top-1 acc 65.625 (63.718)	Top-5 acc 84.766 (83.613)	lr 0.01151
Train [64][110/3239]	Time 0.208 (0.853)	Data Time 0.001 (0.363)	Loss 2.6173 (2.5226)	Entropy 1.06792 (1.06799)	Top-1 acc 60.938 (63.577)	Top-5 acc 81.250 (83.562)	lr 0.01151
Train [64][120/3239]	Time 2.755 (0.824)	Data Time 0.001 (0.333)	Loss 2.5827 (2.5205)	Entropy 1.06792 (1.06798)	Top-1 acc 61.328 (63.630)	Top-5 acc 83.594 (83.594)	lr 0.01151
Train [64][130/3239]	Time 0.214 (0.780)	Data Time 0.001 (0.308)	Loss 2.2948 (2.5122)	Entropy 1.06785 (1.06797)	Top-1 acc 67.969 (63.758)	Top-5 acc 87.109 (83.776)	lr 0.01151
Train [64][140/3239]	Time 0.228 (0.758)	Data Time 0.001 (0.286)	Loss 2.5911 (2.5152)	Entropy 1.06777 (1.06796)	Top-1 acc 60.938 (63.669)	Top-5 acc 82.812 (83.702)	lr 0.01151
Train [64][150/3239]	Time 0.214 (1.113)	Data Time 0.002 (0.267)	Loss 2.6394 (2.5151)	Entropy 1.06776 (1.06795)	Top-1 acc 62.109 (63.690)	Top-5 acc 80.859 (83.687)	lr 0.01150
Train [64][160/3239]	Time 0.353 (1.075)	Data Time 0.002 (0.251)	Loss 2.5194 (2.5108)	Entropy 1.06767 (1.06793)	Top-1 acc 65.625 (63.759)	Top-5 acc 83.203 (83.793)	lr 0.01150
Train [64][170/3239]	Time 0.219 (1.039)	Data Time 0.001 (0.236)	Loss 2.4486 (2.5103)	Entropy 1.06768 (1.06792)	Top-1 acc 64.062 (63.768)	Top-5 acc 82.812 (83.776)	lr 0.01150
Train [64][180/3239]	Time 0.231 (1.009)	Data Time 0.002 (0.223)	Loss 2.7463 (2.5108)	Entropy 1.06752 (1.06790)	Top-1 acc 57.422 (63.754)	Top-5 acc 80.469 (83.771)	lr 0.01150
Train [64][190/3239]	Time 0.219 (0.981)	Data Time 0.001 (0.212)	Loss 2.4215 (2.5111)	Entropy 1.06746 (1.06788)	Top-1 acc 65.625 (63.696)	Top-5 acc 82.422 (83.745)	lr 0.01150
Train [64][200/3239]	Time 0.223 (0.956)	Data Time 0.002 (0.201)	Loss 2.3695 (2.5100)	Entropy 1.06737 (1.06786)	Top-1 acc 67.188 (63.695)	Top-5 acc 84.375 (83.740)	lr 0.01150
Train [64][210/3239]	Time 0.242 (0.934)	Data Time 0.001 (0.192)	Loss 2.4902 (2.5096)	Entropy 1.06736 (1.06783)	Top-1 acc 64.844 (63.742)	Top-5 acc 87.109 (83.759)	lr 0.01150
Train [64][220/3239]	Time 0.230 (0.913)	Data Time 0.002 (0.183)	Loss 2.5327 (2.5112)	Entropy 1.06734 (1.06781)	Top-1 acc 62.891 (63.668)	Top-5 acc 83.984 (83.733)	lr 0.01150
Train [64][230/3239]	Time 2.540 (0.894)	Data Time 0.001 (0.175)	Loss 2.6231 (2.5104)	Entropy 1.06734 (1.06779)	Top-1 acc 61.719 (63.655)	Top-5 acc 81.250 (83.749)	lr 0.01150
Train [64][240/3239]	Time 0.235 (0.867)	Data Time 0.002 (0.168)	Loss 2.7137 (2.5107)	Entropy 1.06734 (1.06777)	Top-1 acc 57.422 (63.628)	Top-5 acc 77.734 (83.743)	lr 0.01150
Train [64][250/3239]	Time 0.327 (0.852)	Data Time 0.001 (0.162)	Loss 2.5177 (2.5110)	Entropy 1.06722 (1.06775)	Top-1 acc 61.719 (63.571)	Top-5 acc 84.766 (83.740)	lr 0.01149
Train [64][260/3239]	Time 0.200 (0.837)	Data Time 0.001 (0.156)	Loss 2.5672 (2.5130)	Entropy 1.06721 (1.06773)	Top-1 acc 63.281 (63.542)	Top-5 acc 81.250 (83.664)	lr 0.01149
Train [64][270/3239]	Time 0.230 (0.823)	Data Time 0.001 (0.150)	Loss 2.4344 (2.5115)	Entropy 1.06719 (1.06771)	Top-1 acc 65.234 (63.568)	Top-5 acc 84.375 (83.700)	lr 0.01149
Train [64][280/3239]	Time 0.223 (0.810)	Data Time 0.001 (0.145)	Loss 2.4820 (2.5104)	Entropy 1.06712 (1.06769)	Top-1 acc 63.281 (63.607)	Top-5 acc 85.547 (83.699)	lr 0.01149
Train [64][290/3239]	Time 0.215 (0.798)	Data Time 0.001 (0.140)	Loss 2.4247 (2.5100)	Entropy 1.06715 (1.06767)	Top-1 acc 63.672 (63.607)	Top-5 acc 82.422 (83.698)	lr 0.01149
Train [64][300/3239]	Time 0.236 (0.788)	Data Time 0.002 (0.135)	Loss 2.5755 (2.5112)	Entropy 1.06706 (1.06765)	Top-1 acc 60.938 (63.551)	Top-5 acc 81.250 (83.683)	lr 0.01149
Train [64][310/3239]	Time 0.233 (0.778)	Data Time 0.001 (0.131)	Loss 2.4826 (2.5109)	Entropy 1.06705 (1.06763)	Top-1 acc 65.234 (63.555)	Top-5 acc 85.156 (83.693)	lr 0.01149
Train [64][320/3239]	Time 0.203 (0.768)	Data Time 0.001 (0.127)	Loss 2.3896 (2.5107)	Entropy 1.06702 (1.06762)	Top-1 acc 67.578 (63.543)	Top-5 acc 85.156 (83.695)	lr 0.01149
Train [64][330/3239]	Time 0.220 (0.759)	Data Time 0.001 (0.123)	Loss 2.5510 (2.5103)	Entropy 1.06705 (1.06760)	Top-1 acc 60.547 (63.552)	Top-5 acc 83.984 (83.687)	lr 0.01149
Train [64][340/3239]	Time 2.496 (0.750)	Data Time 0.001 (0.119)	Loss 2.5547 (2.5110)	Entropy 1.06705 (1.06758)	Top-1 acc 64.062 (63.526)	Top-5 acc 81.250 (83.667)	lr 0.01149
Train [64][350/3239]	Time 0.214 (0.735)	Data Time 0.001 (0.116)	Loss 2.4684 (2.5120)	Entropy 1.06704 (1.06757)	Top-1 acc 62.891 (63.498)	Top-5 acc 85.156 (83.646)	lr 0.01148
Train [64][360/3239]	Time 0.230 (0.728)	Data Time 0.001 (0.113)	Loss 2.4251 (2.5106)	Entropy 1.06699 (1.06755)	Top-1 acc 64.844 (63.546)	Top-5 acc 86.719 (83.691)	lr 0.01148
Train [64][370/3239]	Time 0.228 (0.721)	Data Time 0.002 (0.110)	Loss 2.4984 (2.5096)	Entropy 1.06698 (1.06753)	Top-1 acc 61.328 (63.582)	Top-5 acc 82.812 (83.696)	lr 0.01148
Train [64][380/3239]	Time 0.220 (0.714)	Data Time 0.001 (0.107)	Loss 2.5031 (2.5100)	Entropy 1.06694 (1.06752)	Top-1 acc 63.672 (63.586)	Top-5 acc 82.812 (83.693)	lr 0.01148
Train [64][390/3239]	Time 0.397 (0.708)	Data Time 0.001 (0.104)	Loss 2.4214 (2.5093)	Entropy 1.06689 (1.06750)	Top-1 acc 64.844 (63.618)	Top-5 acc 87.500 (83.711)	lr 0.01148
Train [64][400/3239]	Time 0.234 (0.702)	Data Time 0.001 (0.102)	Loss 2.3869 (2.5101)	Entropy 1.06689 (1.06749)	Top-1 acc 65.234 (63.594)	Top-5 acc 82.422 (83.691)	lr 0.01148
Train [64][410/3239]	Time 0.222 (0.697)	Data Time 0.001 (0.099)	Loss 2.4310 (2.5077)	Entropy 1.06695 (1.06747)	Top-1 acc 65.625 (63.641)	Top-5 acc 83.203 (83.723)	lr 0.01148
Train [64][420/3239]	Time 0.210 (0.691)	Data Time 0.001 (0.097)	Loss 2.4226 (2.5088)	Entropy 1.06691 (1.06746)	Top-1 acc 63.281 (63.600)	Top-5 acc 83.984 (83.700)	lr 0.01148
Train [64][430/3239]	Time 0.374 (0.686)	Data Time 0.001 (0.095)	Loss 2.4208 (2.5072)	Entropy 1.06684 (1.06745)	Top-1 acc 64.453 (63.627)	Top-5 acc 87.500 (83.733)	lr 0.01148
Train [64][440/3239]	Time 0.253 (0.681)	Data Time 0.001 (0.093)	Loss 2.3224 (2.5058)	Entropy 1.06678 (1.06744)	Top-1 acc 65.625 (63.630)	Top-5 acc 86.719 (83.754)	lr 0.01147
Train [64][450/3239]	Time 2.489 (0.677)	Data Time 0.001 (0.091)	Loss 2.5558 (2.5066)	Entropy 1.06678 (1.06742)	Top-1 acc 63.672 (63.622)	Top-5 acc 81.250 (83.738)	lr 0.01147
Train [64][460/3239]	Time 0.228 (0.667)	Data Time 0.001 (0.089)	Loss 2.5377 (2.5070)	Entropy 1.06677 (1.06741)	Top-1 acc 63.672 (63.617)	Top-5 acc 83.203 (83.734)	lr 0.01147
Train [64][470/3239]	Time 0.241 (0.663)	Data Time 0.001 (0.087)	Loss 2.4506 (2.5074)	Entropy 1.06678 (1.06739)	Top-1 acc 64.844 (63.624)	Top-5 acc 85.938 (83.729)	lr 0.01147
Train [64][480/3239]	Time 0.226 (0.659)	Data Time 0.001 (0.085)	Loss 2.6422 (2.5080)	Entropy 1.06670 (1.06738)	Top-1 acc 59.766 (63.595)	Top-5 acc 80.859 (83.720)	lr 0.01147
Train [64][490/3239]	Time 0.227 (0.655)	Data Time 0.001 (0.083)	Loss 2.6694 (2.5094)	Entropy 1.06664 (1.06736)	Top-1 acc 57.422 (63.568)	Top-5 acc 83.203 (83.688)	lr 0.01147
Train [64][500/3239]	Time 0.232 (0.651)	Data Time 0.001 (0.082)	Loss 2.5815 (2.5085)	Entropy 1.06638 (1.06735)	Top-1 acc 60.938 (63.593)	Top-5 acc 84.375 (83.703)	lr 0.01147
Train [64][510/3239]	Time 0.221 (0.757)	Data Time 0.002 (0.080)	Loss 2.5475 (2.5092)	Entropy 1.06638 (1.06733)	Top-1 acc 64.453 (63.577)	Top-5 acc 81.641 (83.671)	lr 0.01147
Train [64][520/3239]	Time 0.269 (0.752)	Data Time 0.002 (0.079)	Loss 2.4776 (2.5099)	Entropy 1.06636 (1.06731)	Top-1 acc 64.453 (63.575)	Top-5 acc 83.984 (83.654)	lr 0.01147
Train [64][530/3239]	Time 0.282 (0.747)	Data Time 0.004 (0.077)	Loss 2.5428 (2.5115)	Entropy 1.06635 (1.06729)	Top-1 acc 58.594 (63.537)	Top-5 acc 84.766 (83.627)	lr 0.01147
Train [64][540/3239]	Time 0.223 (0.742)	Data Time 0.001 (0.076)	Loss 2.3408 (2.5117)	Entropy 1.06632 (1.06727)	Top-1 acc 69.531 (63.520)	Top-5 acc 85.938 (83.621)	lr 0.01146
Train [64][550/3239]	Time 0.274 (0.737)	Data Time 0.001 (0.075)	Loss 2.3940 (2.5115)	Entropy 1.06625 (1.06726)	Top-1 acc 67.188 (63.524)	Top-5 acc 88.281 (83.635)	lr 0.01146
Train [64][560/3239]	Time 2.531 (0.732)	Data Time 0.002 (0.073)	Loss 2.7157 (2.5134)	Entropy 1.06625 (1.06724)	Top-1 acc 58.203 (63.494)	Top-5 acc 78.125 (83.599)	lr 0.01146
Train [64][570/3239]	Time 0.345 (0.724)	Data Time 0.001 (0.072)	Loss 2.4790 (2.5136)	Entropy 1.06621 (1.06722)	Top-1 acc 62.891 (63.484)	Top-5 acc 83.203 (83.595)	lr 0.01146
Train [64][580/3239]	Time 0.227 (0.719)	Data Time 0.001 (0.071)	Loss 2.7576 (2.5145)	Entropy 1.06621 (1.06720)	Top-1 acc 59.375 (63.467)	Top-5 acc 78.516 (83.575)	lr 0.01146
Train [64][590/3239]	Time 0.207 (0.715)	Data Time 0.001 (0.070)	Loss 2.5545 (2.5153)	Entropy 1.06620 (1.06719)	Top-1 acc 62.891 (63.462)	Top-5 acc 82.422 (83.557)	lr 0.01146
Train [64][600/3239]	Time 0.230 (0.711)	Data Time 0.001 (0.068)	Loss 2.5061 (2.5154)	Entropy 1.06619 (1.06717)	Top-1 acc 62.891 (63.466)	Top-5 acc 84.766 (83.567)	lr 0.01146
Train [64][610/3239]	Time 0.333 (0.707)	Data Time 0.001 (0.067)	Loss 2.4383 (2.5154)	Entropy 1.06611 (1.06715)	Top-1 acc 64.453 (63.461)	Top-5 acc 84.766 (83.549)	lr 0.01146
Train [64][620/3239]	Time 0.223 (0.703)	Data Time 0.001 (0.066)	Loss 2.6788 (2.5161)	Entropy 1.06609 (1.06714)	Top-1 acc 57.812 (63.421)	Top-5 acc 78.906 (83.532)	lr 0.01146
Train [64][630/3239]	Time 0.228 (0.699)	Data Time 0.001 (0.065)	Loss 2.5047 (2.5173)	Entropy 1.06607 (1.06712)	Top-1 acc 62.891 (63.392)	Top-5 acc 82.422 (83.526)	lr 0.01146
Train [64][640/3239]	Time 0.231 (0.695)	Data Time 0.001 (0.064)	Loss 2.8020 (2.5180)	Entropy 1.06605 (1.06710)	Top-1 acc 54.297 (63.376)	Top-5 acc 78.516 (83.516)	lr 0.01145
Train [64][650/3239]	Time 0.220 (0.692)	Data Time 0.001 (0.063)	Loss 2.4473 (2.5174)	Entropy 1.06599 (1.06709)	Top-1 acc 62.500 (63.380)	Top-5 acc 85.938 (83.537)	lr 0.01145
Train [64][660/3239]	Time 0.378 (0.689)	Data Time 0.003 (0.062)	Loss 2.4889 (2.5171)	Entropy 1.06598 (1.06707)	Top-1 acc 64.844 (63.390)	Top-5 acc 83.594 (83.552)	lr 0.01145
Train [64][670/3239]	Time 2.475 (0.686)	Data Time 0.001 (0.061)	Loss 2.4884 (2.5178)	Entropy 1.06598 (1.06705)	Top-1 acc 66.016 (63.377)	Top-5 acc 83.984 (83.529)	lr 0.01145
Train [64][680/3239]	Time 0.226 (0.679)	Data Time 0.001 (0.061)	Loss 2.5158 (2.5175)	Entropy 1.06596 (1.06704)	Top-1 acc 61.328 (63.377)	Top-5 acc 83.984 (83.546)	lr 0.01145
Train [64][690/3239]	Time 0.229 (0.676)	Data Time 0.002 (0.060)	Loss 2.7076 (2.5181)	Entropy 1.06595 (1.06702)	Top-1 acc 59.766 (63.381)	Top-5 acc 81.641 (83.541)	lr 0.01145
Train [64][700/3239]	Time 0.236 (0.673)	Data Time 0.001 (0.059)	Loss 2.4713 (2.5175)	Entropy 1.06599 (1.06701)	Top-1 acc 66.797 (63.398)	Top-5 acc 83.203 (83.551)	lr 0.01145
Train [64][710/3239]	Time 0.228 (0.670)	Data Time 0.001 (0.058)	Loss 2.4380 (2.5177)	Entropy 1.06596 (1.06699)	Top-1 acc 64.453 (63.403)	Top-5 acc 85.547 (83.546)	lr 0.01145
Train [64][720/3239]	Time 0.213 (0.667)	Data Time 0.001 (0.057)	Loss 2.5481 (2.5174)	Entropy 1.06592 (1.06698)	Top-1 acc 62.891 (63.396)	Top-5 acc 82.812 (83.551)	lr 0.01145
Train [64][730/3239]	Time 0.231 (0.665)	Data Time 0.002 (0.057)	Loss 2.4879 (2.5174)	Entropy 1.06598 (1.06696)	Top-1 acc 64.453 (63.402)	Top-5 acc 85.938 (83.553)	lr 0.01145
Train [64][740/3239]	Time 0.225 (0.662)	Data Time 0.001 (0.056)	Loss 2.5441 (2.5172)	Entropy 1.06597 (1.06695)	Top-1 acc 56.641 (63.393)	Top-5 acc 83.984 (83.555)	lr 0.01144
Train [64][750/3239]	Time 0.337 (0.660)	Data Time 0.001 (0.055)	Loss 2.4243 (2.5173)	Entropy 1.06594 (1.06694)	Top-1 acc 66.406 (63.395)	Top-5 acc 85.156 (83.557)	lr 0.01144
Train [64][760/3239]	Time 0.223 (0.657)	Data Time 0.001 (0.054)	Loss 2.5203 (2.5178)	Entropy 1.06595 (1.06692)	Top-1 acc 62.891 (63.378)	Top-5 acc 83.203 (83.554)	lr 0.01144
Train [64][770/3239]	Time 0.257 (0.655)	Data Time 0.001 (0.054)	Loss 2.5843 (2.5185)	Entropy 1.06596 (1.06691)	Top-1 acc 60.938 (63.358)	Top-5 acc 84.375 (83.543)	lr 0.01144
Train [64][780/3239]	Time 2.489 (0.652)	Data Time 0.001 (0.053)	Loss 2.4770 (2.5190)	Entropy 1.06596 (1.06690)	Top-1 acc 68.750 (63.352)	Top-5 acc 81.250 (83.531)	lr 0.01144
Train [64][790/3239]	Time 0.250 (0.647)	Data Time 0.001 (0.052)	Loss 2.4806 (2.5189)	Entropy 1.06597 (1.06689)	Top-1 acc 65.625 (63.345)	Top-5 acc 86.328 (83.525)	lr 0.01144
Train [64][800/3239]	Time 0.219 (0.645)	Data Time 0.001 (0.052)	Loss 2.5578 (2.5187)	Entropy 1.06581 (1.06687)	Top-1 acc 61.328 (63.344)	Top-5 acc 81.250 (83.527)	lr 0.01144
Train [64][810/3239]	Time 0.216 (0.642)	Data Time 0.001 (0.051)	Loss 2.4837 (2.5194)	Entropy 1.06581 (1.06686)	Top-1 acc 65.625 (63.316)	Top-5 acc 83.203 (83.503)	lr 0.01144
Train [64][820/3239]	Time 0.224 (0.640)	Data Time 0.001 (0.051)	Loss 2.5406 (2.5195)	Entropy 1.06578 (1.06685)	Top-1 acc 62.500 (63.306)	Top-5 acc 84.375 (83.503)	lr 0.01144
Train [64][830/3239]	Time 0.222 (0.638)	Data Time 0.002 (0.050)	Loss 2.4362 (2.5198)	Entropy 1.06570 (1.06683)	Top-1 acc 66.797 (63.305)	Top-5 acc 83.594 (83.498)	lr 0.01144
Train [64][840/3239]	Time 0.321 (0.636)	Data Time 0.001 (0.049)	Loss 2.5341 (2.5203)	Entropy 1.06563 (1.06682)	Top-1 acc 62.500 (63.285)	Top-5 acc 83.203 (83.493)	lr 0.01143
Train [64][850/3239]	Time 0.208 (0.634)	Data Time 0.001 (0.049)	Loss 2.6542 (2.5210)	Entropy 1.06563 (1.06681)	Top-1 acc 58.203 (63.272)	Top-5 acc 83.203 (83.484)	lr 0.01143
Train [64][860/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.048)	Loss 2.3929 (2.5206)	Entropy 1.06564 (1.06679)	Top-1 acc 67.188 (63.286)	Top-5 acc 87.891 (83.499)	lr 0.01143
Train [64][870/3239]	Time 0.390 (0.687)	Data Time 0.002 (0.048)	Loss 2.4678 (2.5199)	Entropy 1.06559 (1.06678)	Top-1 acc 64.062 (63.309)	Top-5 acc 84.766 (83.513)	lr 0.01143
Train [64][880/3239]	Time 0.227 (0.686)	Data Time 0.002 (0.047)	Loss 2.4487 (2.5204)	Entropy 1.06563 (1.06677)	Top-1 acc 64.062 (63.305)	Top-5 acc 85.938 (83.510)	lr 0.01143
Train [64][890/3239]	Time 2.402 (0.684)	Data Time 0.002 (0.047)	Loss 2.4227 (2.5206)	Entropy 1.06563 (1.06675)	Top-1 acc 66.406 (63.301)	Top-5 acc 83.984 (83.506)	lr 0.01143
Train [64][900/3239]	Time 0.233 (0.679)	Data Time 0.001 (0.046)	Loss 2.5518 (2.5214)	Entropy 1.06560 (1.06674)	Top-1 acc 61.328 (63.286)	Top-5 acc 80.859 (83.495)	lr 0.01143
Train [64][910/3239]	Time 0.226 (0.677)	Data Time 0.001 (0.046)	Loss 2.5740 (2.5208)	Entropy 1.06556 (1.06673)	Top-1 acc 58.594 (63.301)	Top-5 acc 83.594 (83.511)	lr 0.01143
Train [64][920/3239]	Time 0.231 (0.674)	Data Time 0.001 (0.045)	Loss 2.4139 (2.5204)	Entropy 1.06548 (1.06671)	Top-1 acc 67.969 (63.316)	Top-5 acc 85.156 (83.514)	lr 0.01143
Train [64][930/3239]	Time 0.327 (0.672)	Data Time 0.001 (0.045)	Loss 2.6908 (2.5211)	Entropy 1.06543 (1.06670)	Top-1 acc 59.766 (63.302)	Top-5 acc 82.031 (83.511)	lr 0.01143
Train [64][940/3239]	Time 0.229 (0.670)	Data Time 0.001 (0.044)	Loss 2.4722 (2.5212)	Entropy 1.06537 (1.06669)	Top-1 acc 66.406 (63.315)	Top-5 acc 83.203 (83.504)	lr 0.01142
Train [64][950/3239]	Time 0.223 (0.668)	Data Time 0.001 (0.044)	Loss 2.3926 (2.5213)	Entropy 1.06534 (1.06667)	Top-1 acc 68.750 (63.311)	Top-5 acc 85.938 (83.505)	lr 0.01142
Train [64][960/3239]	Time 0.206 (0.666)	Data Time 0.001 (0.043)	Loss 2.5854 (2.5209)	Entropy 1.06534 (1.06666)	Top-1 acc 59.766 (63.325)	Top-5 acc 82.422 (83.505)	lr 0.01142
Train [64][970/3239]	Time 0.233 (0.664)	Data Time 0.001 (0.043)	Loss 2.6736 (2.5210)	Entropy 1.06527 (1.06665)	Top-1 acc 63.281 (63.325)	Top-5 acc 80.469 (83.503)	lr 0.01142
Train [64][980/3239]	Time 0.227 (0.662)	Data Time 0.001 (0.043)	Loss 2.6159 (2.5209)	Entropy 1.06529 (1.06663)	Top-1 acc 62.109 (63.314)	Top-5 acc 83.203 (83.507)	lr 0.01142
Train [64][990/3239]	Time 0.269 (0.660)	Data Time 0.001 (0.042)	Loss 2.3662 (2.5210)	Entropy 1.06522 (1.06662)	Top-1 acc 67.578 (63.315)	Top-5 acc 84.766 (83.506)	lr 0.01142
Train [64][1000/3239]	Time 2.570 (0.658)	Data Time 0.001 (0.042)	Loss 2.4832 (2.5211)	Entropy 1.06522 (1.06660)	Top-1 acc 64.062 (63.325)	Top-5 acc 85.156 (83.500)	lr 0.01142
Train [64][1010/3239]	Time 0.266 (0.654)	Data Time 0.001 (0.041)	Loss 2.6639 (2.5212)	Entropy 1.06524 (1.06659)	Top-1 acc 57.812 (63.325)	Top-5 acc 79.688 (83.496)	lr 0.01142
Train [64][1020/3239]	Time 0.331 (0.652)	Data Time 0.001 (0.041)	Loss 2.5868 (2.5215)	Entropy 1.06524 (1.06658)	Top-1 acc 64.844 (63.321)	Top-5 acc 80.859 (83.491)	lr 0.01142
Train [64][1030/3239]	Time 0.228 (0.650)	Data Time 0.001 (0.041)	Loss 2.5011 (2.5218)	Entropy 1.06520 (1.06656)	Top-1 acc 65.234 (63.317)	Top-5 acc 83.594 (83.485)	lr 0.01142
Train [64][1040/3239]	Time 0.214 (0.649)	Data Time 0.001 (0.040)	Loss 2.5410 (2.5220)	Entropy 1.06514 (1.06655)	Top-1 acc 61.719 (63.314)	Top-5 acc 84.375 (83.479)	lr 0.01141
Train [64][1050/3239]	Time 0.239 (0.647)	Data Time 0.001 (0.040)	Loss 2.4623 (2.5223)	Entropy 1.06512 (1.06654)	Top-1 acc 63.281 (63.298)	Top-5 acc 84.375 (83.475)	lr 0.01141
Train [64][1060/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.039)	Loss 2.5818 (2.5223)	Entropy 1.06507 (1.06652)	Top-1 acc 63.672 (63.308)	Top-5 acc 82.422 (83.472)	lr 0.01141
Train [64][1070/3239]	Time 0.207 (0.644)	Data Time 0.001 (0.039)	Loss 2.5435 (2.5222)	Entropy 1.06497 (1.06651)	Top-1 acc 63.672 (63.309)	Top-5 acc 83.594 (83.472)	lr 0.01141
Train [64][1080/3239]	Time 0.226 (0.642)	Data Time 0.001 (0.039)	Loss 2.4697 (2.5221)	Entropy 1.06494 (1.06649)	Top-1 acc 63.281 (63.314)	Top-5 acc 85.547 (83.471)	lr 0.01141
Train [64][1090/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.038)	Loss 2.6309 (2.5225)	Entropy 1.06493 (1.06648)	Top-1 acc 60.938 (63.302)	Top-5 acc 81.641 (83.459)	lr 0.01141
Train [64][1100/3239]	Time 0.206 (0.639)	Data Time 0.001 (0.038)	Loss 2.7183 (2.5225)	Entropy 1.06490 (1.06647)	Top-1 acc 59.766 (63.297)	Top-5 acc 79.297 (83.462)	lr 0.01141
Train [64][1110/3239]	Time 2.556 (0.637)	Data Time 0.001 (0.038)	Loss 2.5599 (2.5221)	Entropy 1.06490 (1.06645)	Top-1 acc 65.234 (63.298)	Top-5 acc 82.422 (83.475)	lr 0.01141
Train [64][1120/3239]	Time 0.218 (0.633)	Data Time 0.001 (0.037)	Loss 2.5268 (2.5226)	Entropy 1.06489 (1.06644)	Top-1 acc 60.547 (63.281)	Top-5 acc 81.641 (83.464)	lr 0.01141
Train [64][1130/3239]	Time 0.211 (0.632)	Data Time 0.001 (0.037)	Loss 2.5045 (2.5229)	Entropy 1.06480 (1.06642)	Top-1 acc 62.109 (63.276)	Top-5 acc 83.203 (83.451)	lr 0.01141
Train [64][1140/3239]	Time 0.241 (0.630)	Data Time 0.001 (0.037)	Loss 2.5567 (2.5229)	Entropy 1.06483 (1.06641)	Top-1 acc 63.281 (63.282)	Top-5 acc 80.859 (83.448)	lr 0.01140
Train [64][1150/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.037)	Loss 2.5226 (2.5229)	Entropy 1.06477 (1.06640)	Top-1 acc 63.281 (63.288)	Top-5 acc 84.375 (83.454)	lr 0.01140
Train [64][1160/3239]	Time 0.321 (0.628)	Data Time 0.001 (0.036)	Loss 2.6075 (2.5233)	Entropy 1.06473 (1.06638)	Top-1 acc 61.719 (63.277)	Top-5 acc 81.641 (83.445)	lr 0.01140
Train [64][1170/3239]	Time 0.233 (0.626)	Data Time 0.001 (0.036)	Loss 2.4092 (2.5232)	Entropy 1.06459 (1.06637)	Top-1 acc 67.188 (63.285)	Top-5 acc 87.109 (83.451)	lr 0.01140
Train [64][1180/3239]	Time 0.218 (0.625)	Data Time 0.001 (0.036)	Loss 2.4640 (2.5229)	Entropy 1.06456 (1.06635)	Top-1 acc 66.016 (63.293)	Top-5 acc 86.328 (83.463)	lr 0.01140
Train [64][1190/3239]	Time 0.226 (0.624)	Data Time 0.002 (0.035)	Loss 2.4428 (2.5229)	Entropy 1.06456 (1.06634)	Top-1 acc 67.188 (63.291)	Top-5 acc 82.031 (83.459)	lr 0.01140
Train [64][1200/3239]	Time 0.226 (0.622)	Data Time 0.002 (0.035)	Loss 2.6058 (2.5232)	Entropy 1.06458 (1.06632)	Top-1 acc 59.375 (63.280)	Top-5 acc 82.422 (83.453)	lr 0.01140
Train [64][1210/3239]	Time 0.256 (0.621)	Data Time 0.001 (0.035)	Loss 2.4358 (2.5227)	Entropy 1.06453 (1.06631)	Top-1 acc 67.188 (63.293)	Top-5 acc 83.594 (83.464)	lr 0.01140
Train [64][1220/3239]	Time 2.478 (0.620)	Data Time 0.001 (0.035)	Loss 2.3015 (2.5223)	Entropy 1.06453 (1.06629)	Top-1 acc 66.797 (63.308)	Top-5 acc 88.281 (83.465)	lr 0.01140
Train [64][1230/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.034)	Loss 2.4520 (2.5220)	Entropy 1.06449 (1.06628)	Top-1 acc 62.891 (63.312)	Top-5 acc 85.938 (83.472)	lr 0.01140
Train [64][1240/3239]	Time 0.353 (0.655)	Data Time 0.003 (0.034)	Loss 2.4437 (2.5224)	Entropy 1.06444 (1.06626)	Top-1 acc 63.672 (63.299)	Top-5 acc 84.766 (83.466)	lr 0.01139
Train [64][1250/3239]	Time 0.342 (0.654)	Data Time 0.002 (0.034)	Loss 2.5263 (2.5223)	Entropy 1.06436 (1.06625)	Top-1 acc 61.719 (63.311)	Top-5 acc 80.859 (83.470)	lr 0.01139
Train [64][1260/3239]	Time 0.246 (0.653)	Data Time 0.001 (0.034)	Loss 2.5226 (2.5219)	Entropy 1.06436 (1.06623)	Top-1 acc 63.281 (63.322)	Top-5 acc 83.984 (83.480)	lr 0.01139
Train [64][1270/3239]	Time 0.204 (0.652)	Data Time 0.001 (0.033)	Loss 2.5627 (2.5223)	Entropy 1.06432 (1.06622)	Top-1 acc 59.766 (63.308)	Top-5 acc 82.422 (83.474)	lr 0.01139
Train [64][1280/3239]	Time 0.216 (0.650)	Data Time 0.001 (0.033)	Loss 2.5659 (2.5223)	Entropy 1.06428 (1.06620)	Top-1 acc 62.891 (63.310)	Top-5 acc 83.984 (83.472)	lr 0.01139
Train [64][1290/3239]	Time 0.211 (0.648)	Data Time 0.001 (0.033)	Loss 2.5574 (2.5226)	Entropy 1.06429 (1.06619)	Top-1 acc 62.500 (63.304)	Top-5 acc 82.031 (83.469)	lr 0.01139
Train [64][1300/3239]	Time 0.224 (0.647)	Data Time 0.001 (0.033)	Loss 2.4469 (2.5224)	Entropy 1.06428 (1.06617)	Top-1 acc 66.016 (63.312)	Top-5 acc 83.984 (83.478)	lr 0.01139
Train [64][1310/3239]	Time 0.236 (0.646)	Data Time 0.001 (0.032)	Loss 2.5748 (2.5224)	Entropy 1.06430 (1.06616)	Top-1 acc 61.328 (63.309)	Top-5 acc 81.250 (83.476)	lr 0.01139
Train [64][1320/3239]	Time 0.275 (0.644)	Data Time 0.001 (0.032)	Loss 2.5417 (2.5223)	Entropy 1.06430 (1.06615)	Top-1 acc 60.156 (63.308)	Top-5 acc 82.812 (83.474)	lr 0.01139
Train [64][1330/3239]	Time 2.545 (0.643)	Data Time 0.001 (0.032)	Loss 2.5505 (2.5225)	Entropy 1.06430 (1.06613)	Top-1 acc 63.281 (63.303)	Top-5 acc 81.250 (83.466)	lr 0.01139
Train [64][1340/3239]	Time 0.215 (0.640)	Data Time 0.001 (0.032)	Loss 2.4503 (2.5226)	Entropy 1.06428 (1.06612)	Top-1 acc 64.453 (63.314)	Top-5 acc 86.719 (83.464)	lr 0.01138
Train [64][1350/3239]	Time 0.225 (0.639)	Data Time 0.001 (0.031)	Loss 2.8616 (2.5227)	Entropy 1.06428 (1.06610)	Top-1 acc 53.516 (63.312)	Top-5 acc 76.953 (83.460)	lr 0.01138
Train [64][1360/3239]	Time 0.229 (0.638)	Data Time 0.001 (0.031)	Loss 2.5851 (2.5227)	Entropy 1.06434 (1.06609)	Top-1 acc 59.766 (63.311)	Top-5 acc 82.812 (83.466)	lr 0.01138
Train [64][1370/3239]	Time 0.236 (0.636)	Data Time 0.001 (0.031)	Loss 2.5448 (2.5226)	Entropy 1.06418 (1.06608)	Top-1 acc 62.500 (63.313)	Top-5 acc 81.250 (83.470)	lr 0.01138
Train [64][1380/3239]	Time 0.240 (0.635)	Data Time 0.001 (0.031)	Loss 2.4332 (2.5231)	Entropy 1.06418 (1.06606)	Top-1 acc 67.188 (63.301)	Top-5 acc 85.156 (83.455)	lr 0.01138
Train [64][1390/3239]	Time 0.323 (0.634)	Data Time 0.001 (0.031)	Loss 2.6385 (2.5233)	Entropy 1.06421 (1.06605)	Top-1 acc 60.547 (63.290)	Top-5 acc 81.250 (83.453)	lr 0.01138
Train [64][1400/3239]	Time 0.217 (0.633)	Data Time 0.001 (0.030)	Loss 2.4875 (2.5235)	Entropy 1.06425 (1.06604)	Top-1 acc 62.500 (63.288)	Top-5 acc 83.984 (83.446)	lr 0.01138
Train [64][1410/3239]	Time 0.215 (0.632)	Data Time 0.001 (0.030)	Loss 2.4494 (2.5238)	Entropy 1.06423 (1.06602)	Top-1 acc 64.062 (63.278)	Top-5 acc 83.984 (83.437)	lr 0.01138
Train [64][1420/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.030)	Loss 2.4789 (2.5237)	Entropy 1.06422 (1.06601)	Top-1 acc 63.281 (63.283)	Top-5 acc 81.641 (83.438)	lr 0.01138
Train [64][1430/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.030)	Loss 2.4745 (2.5238)	Entropy 1.06419 (1.06600)	Top-1 acc 67.578 (63.276)	Top-5 acc 83.203 (83.429)	lr 0.01138
Train [64][1440/3239]	Time 2.534 (0.628)	Data Time 0.002 (0.030)	Loss 2.5412 (2.5237)	Entropy 1.06419 (1.06599)	Top-1 acc 64.062 (63.277)	Top-5 acc 83.203 (83.432)	lr 0.01137
Train [64][1450/3239]	Time 0.269 (0.626)	Data Time 0.001 (0.029)	Loss 2.4766 (2.5234)	Entropy 1.06420 (1.06597)	Top-1 acc 63.672 (63.283)	Top-5 acc 83.594 (83.439)	lr 0.01137
Train [64][1460/3239]	Time 0.220 (0.625)	Data Time 0.001 (0.029)	Loss 2.4680 (2.5231)	Entropy 1.06419 (1.06596)	Top-1 acc 64.453 (63.291)	Top-5 acc 85.938 (83.444)	lr 0.01137
Train [64][1470/3239]	Time 0.239 (0.623)	Data Time 0.001 (0.029)	Loss 2.4571 (2.5229)	Entropy 1.06408 (1.06595)	Top-1 acc 60.938 (63.287)	Top-5 acc 83.984 (83.449)	lr 0.01137
Train [64][1480/3239]	Time 0.348 (0.622)	Data Time 0.001 (0.029)	Loss 2.2769 (2.5231)	Entropy 1.06406 (1.06594)	Top-1 acc 70.312 (63.282)	Top-5 acc 87.109 (83.445)	lr 0.01137
Train [64][1490/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.029)	Loss 2.4783 (2.5232)	Entropy 1.06402 (1.06592)	Top-1 acc 62.891 (63.283)	Top-5 acc 84.766 (83.440)	lr 0.01137
Train [64][1500/3239]	Time 0.230 (0.620)	Data Time 0.001 (0.028)	Loss 2.5646 (2.5235)	Entropy 1.06402 (1.06591)	Top-1 acc 62.500 (63.274)	Top-5 acc 83.203 (83.435)	lr 0.01137
Train [64][1510/3239]	Time 0.230 (0.619)	Data Time 0.001 (0.028)	Loss 2.6832 (2.5234)	Entropy 1.06402 (1.06590)	Top-1 acc 54.297 (63.270)	Top-5 acc 78.125 (83.432)	lr 0.01137
Train [64][1520/3239]	Time 0.217 (0.618)	Data Time 0.001 (0.028)	Loss 2.4687 (2.5234)	Entropy 1.06402 (1.06589)	Top-1 acc 62.109 (63.266)	Top-5 acc 82.812 (83.438)	lr 0.01137
Train [64][1530/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.028)	Loss 2.4873 (2.5233)	Entropy 1.06396 (1.06587)	Top-1 acc 64.844 (63.266)	Top-5 acc 84.375 (83.442)	lr 0.01137
Train [64][1540/3239]	Time 0.252 (0.617)	Data Time 0.001 (0.028)	Loss 2.3531 (2.5231)	Entropy 1.06393 (1.06586)	Top-1 acc 64.844 (63.267)	Top-5 acc 87.500 (83.453)	lr 0.01136
Train [64][1550/3239]	Time 2.440 (0.616)	Data Time 0.001 (0.028)	Loss 2.4881 (2.5235)	Entropy 1.06393 (1.06585)	Top-1 acc 63.672 (63.258)	Top-5 acc 84.766 (83.447)	lr 0.01136
Train [64][1560/3239]	Time 0.224 (0.613)	Data Time 0.001 (0.027)	Loss 2.6574 (2.5237)	Entropy 1.06390 (1.06584)	Top-1 acc 60.156 (63.253)	Top-5 acc 81.250 (83.445)	lr 0.01136
Train [64][1570/3239]	Time 0.333 (0.612)	Data Time 0.001 (0.027)	Loss 2.5314 (2.5239)	Entropy 1.06390 (1.06583)	Top-1 acc 61.719 (63.248)	Top-5 acc 83.594 (83.443)	lr 0.01136
Train [64][1580/3239]	Time 0.239 (0.611)	Data Time 0.001 (0.027)	Loss 2.5267 (2.5241)	Entropy 1.06393 (1.06581)	Top-1 acc 60.547 (63.239)	Top-5 acc 82.031 (83.435)	lr 0.01136
Train [64][1590/3239]	Time 0.213 (0.610)	Data Time 0.001 (0.027)	Loss 2.4723 (2.5240)	Entropy 1.06388 (1.06580)	Top-1 acc 63.281 (63.243)	Top-5 acc 85.156 (83.436)	lr 0.01136
Train [64][1600/3239]	Time 0.299 (0.643)	Data Time 0.003 (0.027)	Loss 2.6396 (2.5241)	Entropy 1.06387 (1.06579)	Top-1 acc 60.547 (63.239)	Top-5 acc 78.906 (83.439)	lr 0.01136
Train [64][1610/3239]	Time 0.235 (0.642)	Data Time 0.002 (0.027)	Loss 2.5130 (2.5243)	Entropy 1.06387 (1.06578)	Top-1 acc 61.328 (63.232)	Top-5 acc 81.641 (83.432)	lr 0.01136
Train [64][1620/3239]	Time 0.233 (0.641)	Data Time 0.002 (0.026)	Loss 2.5029 (2.5242)	Entropy 1.06384 (1.06577)	Top-1 acc 65.625 (63.233)	Top-5 acc 82.031 (83.434)	lr 0.01136
Train [64][1630/3239]	Time 0.239 (0.640)	Data Time 0.002 (0.026)	Loss 2.3969 (2.5242)	Entropy 1.06379 (1.06575)	Top-1 acc 64.453 (63.234)	Top-5 acc 85.547 (83.434)	lr 0.01136
Train [64][1640/3239]	Time 0.226 (0.639)	Data Time 0.001 (0.026)	Loss 2.6516 (2.5246)	Entropy 1.06382 (1.06574)	Top-1 acc 60.938 (63.231)	Top-5 acc 79.297 (83.426)	lr 0.01135
Train [64][1650/3239]	Time 0.274 (0.638)	Data Time 0.001 (0.026)	Loss 2.3238 (2.5244)	Entropy 1.06390 (1.06573)	Top-1 acc 67.188 (63.239)	Top-5 acc 88.672 (83.429)	lr 0.01135
Train [64][1660/3239]	Time 2.640 (0.637)	Data Time 0.001 (0.026)	Loss 2.5054 (2.5243)	Entropy 1.06390 (1.06572)	Top-1 acc 65.625 (63.241)	Top-5 acc 84.766 (83.428)	lr 0.01135
Train [64][1670/3239]	Time 0.276 (0.634)	Data Time 0.002 (0.026)	Loss 2.5363 (2.5244)	Entropy 1.06356 (1.06571)	Top-1 acc 64.844 (63.237)	Top-5 acc 82.422 (83.428)	lr 0.01135
Train [64][1680/3239]	Time 0.219 (0.633)	Data Time 0.001 (0.026)	Loss 2.5862 (2.5246)	Entropy 1.06354 (1.06569)	Top-1 acc 64.453 (63.233)	Top-5 acc 82.812 (83.423)	lr 0.01135
Train [64][1690/3239]	Time 0.203 (0.632)	Data Time 0.001 (0.025)	Loss 2.5217 (2.5246)	Entropy 1.06345 (1.06568)	Top-1 acc 62.500 (63.233)	Top-5 acc 83.984 (83.424)	lr 0.01135
Train [64][1700/3239]	Time 0.236 (0.631)	Data Time 0.001 (0.025)	Loss 2.3426 (2.5245)	Entropy 1.06337 (1.06567)	Top-1 acc 67.188 (63.233)	Top-5 acc 87.109 (83.422)	lr 0.01135
Train [64][1710/3239]	Time 0.320 (0.630)	Data Time 0.001 (0.025)	Loss 2.5714 (2.5246)	Entropy 1.06335 (1.06565)	Top-1 acc 64.453 (63.235)	Top-5 acc 82.812 (83.420)	lr 0.01135
Train [64][1720/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.025)	Loss 2.6474 (2.5248)	Entropy 1.06333 (1.06564)	Top-1 acc 57.812 (63.228)	Top-5 acc 81.641 (83.415)	lr 0.01135
Train [64][1730/3239]	Time 0.236 (0.629)	Data Time 0.001 (0.025)	Loss 2.5745 (2.5250)	Entropy 1.06326 (1.06563)	Top-1 acc 62.500 (63.228)	Top-5 acc 80.469 (83.412)	lr 0.01135
Train [64][1740/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.025)	Loss 2.5771 (2.5248)	Entropy 1.06318 (1.06561)	Top-1 acc 62.500 (63.232)	Top-5 acc 80.078 (83.414)	lr 0.01134
Train [64][1750/3239]	Time 0.223 (0.627)	Data Time 0.001 (0.025)	Loss 2.3114 (2.5243)	Entropy 1.06316 (1.06560)	Top-1 acc 68.359 (63.243)	Top-5 acc 86.719 (83.422)	lr 0.01134
Train [64][1760/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.024)	Loss 2.4713 (2.5243)	Entropy 1.06313 (1.06558)	Top-1 acc 65.234 (63.243)	Top-5 acc 82.812 (83.418)	lr 0.01134
Train [64][1770/3239]	Time 2.486 (0.625)	Data Time 0.001 (0.024)	Loss 2.5079 (2.5243)	Entropy 1.06313 (1.06557)	Top-1 acc 66.406 (63.243)	Top-5 acc 83.203 (83.416)	lr 0.01134
Train [64][1780/3239]	Time 0.237 (0.623)	Data Time 0.001 (0.024)	Loss 2.4275 (2.5243)	Entropy 1.06311 (1.06556)	Top-1 acc 66.797 (63.241)	Top-5 acc 83.203 (83.417)	lr 0.01134
Train [64][1790/3239]	Time 0.227 (0.622)	Data Time 0.001 (0.024)	Loss 2.4298 (2.5243)	Entropy 1.06308 (1.06554)	Top-1 acc 66.016 (63.245)	Top-5 acc 84.766 (83.413)	lr 0.01134
Train [64][1800/3239]	Time 0.345 (0.621)	Data Time 0.001 (0.024)	Loss 2.5276 (2.5244)	Entropy 1.06307 (1.06553)	Top-1 acc 62.109 (63.239)	Top-5 acc 82.422 (83.415)	lr 0.01134
Train [64][1810/3239]	Time 0.226 (0.620)	Data Time 0.001 (0.024)	Loss 2.5307 (2.5241)	Entropy 1.06307 (1.06552)	Top-1 acc 62.891 (63.247)	Top-5 acc 82.031 (83.422)	lr 0.01134
Train [64][1820/3239]	Time 0.229 (0.619)	Data Time 0.001 (0.024)	Loss 2.5682 (2.5243)	Entropy 1.06303 (1.06550)	Top-1 acc 61.328 (63.241)	Top-5 acc 81.250 (83.420)	lr 0.01134
Train [64][1830/3239]	Time 0.225 (0.618)	Data Time 0.001 (0.024)	Loss 2.4487 (2.5243)	Entropy 1.06280 (1.06549)	Top-1 acc 62.891 (63.235)	Top-5 acc 84.766 (83.419)	lr 0.01134
Train [64][1840/3239]	Time 0.230 (0.618)	Data Time 0.001 (0.023)	Loss 2.4952 (2.5244)	Entropy 1.06276 (1.06547)	Top-1 acc 66.016 (63.238)	Top-5 acc 83.594 (83.416)	lr 0.01133
Train [64][1850/3239]	Time 0.218 (0.617)	Data Time 0.001 (0.023)	Loss 2.4396 (2.5244)	Entropy 1.06269 (1.06546)	Top-1 acc 65.234 (63.241)	Top-5 acc 87.109 (83.416)	lr 0.01133
Train [64][1860/3239]	Time 0.224 (0.616)	Data Time 0.001 (0.023)	Loss 2.6833 (2.5245)	Entropy 1.06269 (1.06544)	Top-1 acc 56.641 (63.237)	Top-5 acc 80.859 (83.414)	lr 0.01133
Train [64][1870/3239]	Time 0.273 (0.615)	Data Time 0.001 (0.023)	Loss 2.5959 (2.5247)	Entropy 1.06268 (1.06543)	Top-1 acc 59.766 (63.240)	Top-5 acc 82.422 (83.410)	lr 0.01133
Train [64][1880/3239]	Time 2.569 (0.615)	Data Time 0.001 (0.023)	Loss 2.5426 (2.5247)	Entropy 1.06268 (1.06541)	Top-1 acc 62.891 (63.246)	Top-5 acc 82.422 (83.408)	lr 0.01133
Train [64][1890/3239]	Time 0.355 (0.613)	Data Time 0.001 (0.023)	Loss 2.5032 (2.5245)	Entropy 1.06265 (1.06540)	Top-1 acc 62.109 (63.243)	Top-5 acc 83.203 (83.407)	lr 0.01133
Train [64][1900/3239]	Time 0.242 (0.612)	Data Time 0.001 (0.023)	Loss 2.6507 (2.5245)	Entropy 1.06264 (1.06539)	Top-1 acc 61.328 (63.243)	Top-5 acc 79.688 (83.407)	lr 0.01133
Train [64][1910/3239]	Time 0.236 (0.611)	Data Time 0.001 (0.023)	Loss 2.7415 (2.5246)	Entropy 1.06263 (1.06537)	Top-1 acc 57.422 (63.239)	Top-5 acc 80.859 (83.403)	lr 0.01133
Train [64][1920/3239]	Time 0.214 (0.610)	Data Time 0.001 (0.023)	Loss 2.5720 (2.5247)	Entropy 1.06258 (1.06536)	Top-1 acc 63.672 (63.242)	Top-5 acc 83.203 (83.400)	lr 0.01133
Train [64][1930/3239]	Time 0.238 (0.610)	Data Time 0.001 (0.022)	Loss 2.3908 (2.5248)	Entropy 1.06248 (1.06534)	Top-1 acc 67.188 (63.244)	Top-5 acc 88.281 (83.399)	lr 0.01133
Train [64][1940/3239]	Time 0.205 (0.609)	Data Time 0.001 (0.022)	Loss 2.6268 (2.5248)	Entropy 1.06242 (1.06533)	Top-1 acc 60.547 (63.245)	Top-5 acc 82.812 (83.398)	lr 0.01132
Train [64][1950/3239]	Time 0.237 (0.608)	Data Time 0.001 (0.022)	Loss 2.4025 (2.5249)	Entropy 1.06240 (1.06531)	Top-1 acc 64.844 (63.243)	Top-5 acc 85.938 (83.397)	lr 0.01132
Train [64][1960/3239]	Time 0.263 (0.633)	Data Time 0.003 (0.022)	Loss 2.7169 (2.5248)	Entropy 1.06238 (1.06530)	Top-1 acc 58.984 (63.242)	Top-5 acc 80.078 (83.399)	lr 0.01132
Train [64][1970/3239]	Time 0.235 (0.633)	Data Time 0.002 (0.022)	Loss 2.5535 (2.5247)	Entropy 1.06250 (1.06528)	Top-1 acc 63.672 (63.244)	Top-5 acc 83.203 (83.399)	lr 0.01132
Train [64][1980/3239]	Time 0.275 (0.632)	Data Time 0.001 (0.022)	Loss 2.4115 (2.5248)	Entropy 1.06250 (1.06527)	Top-1 acc 64.844 (63.238)	Top-5 acc 87.109 (83.399)	lr 0.01132
Train [64][1990/3239]	Time 2.468 (0.631)	Data Time 0.001 (0.022)	Loss 2.4731 (2.5248)	Entropy 1.06250 (1.06525)	Top-1 acc 66.016 (63.239)	Top-5 acc 83.203 (83.398)	lr 0.01132
Train [64][2000/3239]	Time 0.237 (0.629)	Data Time 0.001 (0.022)	Loss 2.7257 (2.5248)	Entropy 1.06252 (1.06524)	Top-1 acc 57.422 (63.239)	Top-5 acc 78.516 (83.394)	lr 0.01132
Train [64][2010/3239]	Time 0.242 (0.628)	Data Time 0.001 (0.022)	Loss 2.5010 (2.5248)	Entropy 1.06250 (1.06523)	Top-1 acc 64.844 (63.236)	Top-5 acc 83.203 (83.392)	lr 0.01132
Train [64][2020/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.022)	Loss 2.5675 (2.5248)	Entropy 1.06236 (1.06521)	Top-1 acc 61.328 (63.237)	Top-5 acc 82.031 (83.392)	lr 0.01132
Train [64][2030/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.021)	Loss 2.4290 (2.5247)	Entropy 1.06235 (1.06520)	Top-1 acc 66.016 (63.242)	Top-5 acc 85.156 (83.394)	lr 0.01131
Train [64][2040/3239]	Time 0.234 (0.626)	Data Time 0.001 (0.021)	Loss 2.5648 (2.5245)	Entropy 1.06221 (1.06518)	Top-1 acc 65.625 (63.248)	Top-5 acc 80.859 (83.398)	lr 0.01131
Train [64][2050/3239]	Time 0.219 (0.625)	Data Time 0.001 (0.021)	Loss 2.6170 (2.5246)	Entropy 1.06222 (1.06517)	Top-1 acc 61.328 (63.247)	Top-5 acc 80.859 (83.390)	lr 0.01131
Train [64][2060/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.021)	Loss 2.5239 (2.5247)	Entropy 1.06219 (1.06516)	Top-1 acc 62.891 (63.251)	Top-5 acc 85.156 (83.389)	lr 0.01131
Train [64][2070/3239]	Time 0.217 (0.624)	Data Time 0.001 (0.021)	Loss 2.5253 (2.5248)	Entropy 1.06217 (1.06514)	Top-1 acc 63.672 (63.249)	Top-5 acc 80.469 (83.386)	lr 0.01131
Train [64][2080/3239]	Time 0.231 (0.623)	Data Time 0.001 (0.021)	Loss 2.5880 (2.5250)	Entropy 1.06209 (1.06513)	Top-1 acc 61.719 (63.243)	Top-5 acc 82.812 (83.382)	lr 0.01131
Train [64][2090/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.021)	Loss 2.7052 (2.5248)	Entropy 1.06207 (1.06511)	Top-1 acc 58.984 (63.249)	Top-5 acc 80.469 (83.385)	lr 0.01131
Train [64][2100/3239]	Time 2.580 (0.622)	Data Time 0.001 (0.021)	Loss 2.4762 (2.5245)	Entropy 1.06207 (1.06510)	Top-1 acc 63.281 (63.250)	Top-5 acc 83.594 (83.387)	lr 0.01131
Train [64][2110/3239]	Time 0.234 (0.620)	Data Time 0.001 (0.021)	Loss 2.4978 (2.5245)	Entropy 1.06196 (1.06508)	Top-1 acc 64.062 (63.248)	Top-5 acc 84.375 (83.389)	lr 0.01131
Train [64][2120/3239]	Time 0.212 (0.619)	Data Time 0.001 (0.021)	Loss 2.3503 (2.5244)	Entropy 1.06195 (1.06507)	Top-1 acc 65.625 (63.246)	Top-5 acc 88.281 (83.390)	lr 0.01131
Train [64][2130/3239]	Time 0.219 (0.618)	Data Time 0.001 (0.021)	Loss 2.5203 (2.5244)	Entropy 1.06195 (1.06505)	Top-1 acc 63.281 (63.243)	Top-5 acc 83.984 (83.391)	lr 0.01130
Train [64][2140/3239]	Time 0.242 (0.618)	Data Time 0.001 (0.020)	Loss 2.4470 (2.5242)	Entropy 1.06195 (1.06504)	Top-1 acc 62.500 (63.243)	Top-5 acc 82.031 (83.393)	lr 0.01130
Train [64][2150/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.020)	Loss 2.4783 (2.5240)	Entropy 1.06192 (1.06502)	Top-1 acc 67.578 (63.249)	Top-5 acc 84.766 (83.398)	lr 0.01130
Train [64][2160/3239]	Time 0.320 (0.616)	Data Time 0.001 (0.020)	Loss 2.4207 (2.5240)	Entropy 1.06186 (1.06501)	Top-1 acc 66.406 (63.250)	Top-5 acc 82.812 (83.393)	lr 0.01130
Train [64][2170/3239]	Time 0.218 (0.616)	Data Time 0.001 (0.020)	Loss 2.5399 (2.5240)	Entropy 1.06189 (1.06500)	Top-1 acc 62.109 (63.254)	Top-5 acc 83.984 (83.394)	lr 0.01130
Train [64][2180/3239]	Time 0.237 (0.615)	Data Time 0.001 (0.020)	Loss 2.6577 (2.5242)	Entropy 1.06187 (1.06498)	Top-1 acc 58.984 (63.253)	Top-5 acc 82.812 (83.390)	lr 0.01130
Train [64][2190/3239]	Time 0.234 (0.614)	Data Time 0.001 (0.020)	Loss 2.5655 (2.5241)	Entropy 1.06185 (1.06497)	Top-1 acc 64.062 (63.259)	Top-5 acc 82.031 (83.389)	lr 0.01130
Train [64][2200/3239]	Time 0.213 (0.614)	Data Time 0.003 (0.020)	Loss 2.4500 (2.5241)	Entropy 1.06183 (1.06495)	Top-1 acc 64.062 (63.254)	Top-5 acc 83.594 (83.389)	lr 0.01130
Train [64][2210/3239]	Time 2.544 (0.613)	Data Time 0.002 (0.020)	Loss 2.4085 (2.5243)	Entropy 1.06183 (1.06494)	Top-1 acc 62.891 (63.245)	Top-5 acc 85.156 (83.388)	lr 0.01130
Train [64][2220/3239]	Time 0.225 (0.611)	Data Time 0.001 (0.020)	Loss 2.5425 (2.5245)	Entropy 1.06186 (1.06493)	Top-1 acc 60.938 (63.235)	Top-5 acc 84.766 (83.385)	lr 0.01130
Train [64][2230/3239]	Time 0.282 (0.611)	Data Time 0.001 (0.020)	Loss 2.4486 (2.5245)	Entropy 1.06181 (1.06491)	Top-1 acc 66.406 (63.239)	Top-5 acc 83.203 (83.383)	lr 0.01129
Train [64][2240/3239]	Time 0.224 (0.610)	Data Time 0.001 (0.020)	Loss 2.4445 (2.5243)	Entropy 1.06172 (1.06490)	Top-1 acc 64.062 (63.243)	Top-5 acc 85.938 (83.388)	lr 0.01129
Train [64][2250/3239]	Time 0.209 (0.609)	Data Time 0.001 (0.019)	Loss 2.5256 (2.5241)	Entropy 1.06168 (1.06488)	Top-1 acc 62.891 (63.240)	Top-5 acc 82.812 (83.389)	lr 0.01129
Train [64][2260/3239]	Time 0.218 (0.609)	Data Time 0.001 (0.019)	Loss 2.5626 (2.5246)	Entropy 1.06157 (1.06487)	Top-1 acc 65.625 (63.233)	Top-5 acc 82.812 (83.382)	lr 0.01129
Train [64][2270/3239]	Time 0.212 (0.608)	Data Time 0.001 (0.019)	Loss 2.5563 (2.5247)	Entropy 1.06154 (1.06485)	Top-1 acc 66.016 (63.229)	Top-5 acc 80.859 (83.378)	lr 0.01129
Train [64][2280/3239]	Time 0.255 (0.608)	Data Time 0.002 (0.019)	Loss 2.7481 (2.5247)	Entropy 1.06151 (1.06484)	Top-1 acc 59.766 (63.230)	Top-5 acc 80.469 (83.380)	lr 0.01129
Train [64][2290/3239]	Time 0.220 (0.607)	Data Time 0.001 (0.019)	Loss 2.3636 (2.5246)	Entropy 1.06149 (1.06482)	Top-1 acc 66.797 (63.230)	Top-5 acc 87.500 (83.381)	lr 0.01129
Train [64][2300/3239]	Time 0.321 (0.607)	Data Time 0.001 (0.019)	Loss 2.3452 (2.5243)	Entropy 1.06150 (1.06481)	Top-1 acc 71.875 (63.238)	Top-5 acc 87.500 (83.387)	lr 0.01129
Train [64][2310/3239]	Time 0.245 (0.606)	Data Time 0.001 (0.019)	Loss 2.6254 (2.5246)	Entropy 1.06134 (1.06480)	Top-1 acc 60.547 (63.230)	Top-5 acc 84.766 (83.382)	lr 0.01129
Train [64][2320/3239]	Time 55.959 (0.628)	Data Time 0.001 (0.019)	Loss 2.5486 (2.5248)	Entropy 1.06134 (1.06478)	Top-1 acc 60.156 (63.223)	Top-5 acc 85.547 (83.377)	lr 0.01129
Train [64][2330/3239]	Time 0.243 (0.627)	Data Time 0.002 (0.019)	Loss 2.5659 (2.5248)	Entropy 1.06135 (1.06477)	Top-1 acc 64.453 (63.224)	Top-5 acc 82.031 (83.378)	lr 0.01128
Train [64][2340/3239]	Time 0.236 (0.626)	Data Time 0.002 (0.019)	Loss 2.3947 (2.5249)	Entropy 1.06134 (1.06475)	Top-1 acc 67.969 (63.221)	Top-5 acc 85.938 (83.375)	lr 0.01128
Train [64][2350/3239]	Time 0.232 (0.626)	Data Time 0.002 (0.019)	Loss 2.3205 (2.5247)	Entropy 1.06130 (1.06474)	Top-1 acc 68.750 (63.226)	Top-5 acc 83.203 (83.382)	lr 0.01128
Train [64][2360/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.019)	Loss 2.5380 (2.5245)	Entropy 1.06129 (1.06472)	Top-1 acc 64.453 (63.229)	Top-5 acc 82.031 (83.381)	lr 0.01128
Train [64][2370/3239]	Time 0.248 (0.624)	Data Time 0.002 (0.019)	Loss 2.4021 (2.5244)	Entropy 1.06122 (1.06471)	Top-1 acc 69.531 (63.236)	Top-5 acc 84.375 (83.383)	lr 0.01128
Train [64][2380/3239]	Time 0.234 (0.624)	Data Time 0.002 (0.019)	Loss 2.6612 (2.5245)	Entropy 1.06121 (1.06469)	Top-1 acc 61.719 (63.238)	Top-5 acc 81.250 (83.379)	lr 0.01128
Train [64][2390/3239]	Time 0.329 (0.623)	Data Time 0.001 (0.018)	Loss 2.5419 (2.5247)	Entropy 1.06118 (1.06468)	Top-1 acc 64.453 (63.233)	Top-5 acc 82.031 (83.375)	lr 0.01128
Train [64][2400/3239]	Time 0.227 (0.623)	Data Time 0.001 (0.018)	Loss 2.3976 (2.5248)	Entropy 1.06123 (1.06466)	Top-1 acc 67.578 (63.235)	Top-5 acc 83.203 (83.371)	lr 0.01128
Train [64][2410/3239]	Time 0.224 (0.622)	Data Time 0.001 (0.018)	Loss 2.4591 (2.5247)	Entropy 1.06121 (1.06465)	Top-1 acc 66.406 (63.242)	Top-5 acc 82.422 (83.368)	lr 0.01128
Train [64][2420/3239]	Time 0.247 (0.621)	Data Time 0.003 (0.018)	Loss 2.6637 (2.5247)	Entropy 1.06120 (1.06464)	Top-1 acc 60.547 (63.238)	Top-5 acc 80.859 (83.370)	lr 0.01128
Train [64][2430/3239]	Time 2.458 (0.621)	Data Time 0.001 (0.018)	Loss 2.6337 (2.5247)	Entropy 1.06120 (1.06462)	Top-1 acc 59.375 (63.240)	Top-5 acc 80.859 (83.368)	lr 0.01127
Train [64][2440/3239]	Time 0.226 (0.619)	Data Time 0.001 (0.018)	Loss 2.5974 (2.5247)	Entropy 1.06118 (1.06461)	Top-1 acc 60.156 (63.239)	Top-5 acc 80.859 (83.365)	lr 0.01127
Train [64][2450/3239]	Time 0.235 (0.619)	Data Time 0.001 (0.018)	Loss 2.5910 (2.5247)	Entropy 1.06118 (1.06459)	Top-1 acc 59.766 (63.236)	Top-5 acc 83.984 (83.368)	lr 0.01127
Train [64][2460/3239]	Time 0.217 (0.618)	Data Time 0.001 (0.018)	Loss 2.6918 (2.5250)	Entropy 1.06117 (1.06458)	Top-1 acc 56.641 (63.230)	Top-5 acc 77.344 (83.360)	lr 0.01127
Train [64][2470/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.018)	Loss 2.6314 (2.5256)	Entropy 1.06107 (1.06457)	Top-1 acc 61.719 (63.222)	Top-5 acc 81.250 (83.349)	lr 0.01127
Train [64][2480/3239]	Time 0.321 (0.617)	Data Time 0.001 (0.018)	Loss 2.5256 (2.5255)	Entropy 1.06105 (1.06455)	Top-1 acc 64.453 (63.227)	Top-5 acc 82.422 (83.349)	lr 0.01127
Train [64][2490/3239]	Time 0.226 (0.616)	Data Time 0.001 (0.018)	Loss 2.4555 (2.5257)	Entropy 1.06104 (1.06454)	Top-1 acc 65.625 (63.223)	Top-5 acc 86.719 (83.349)	lr 0.01127
Train [64][2500/3239]	Time 0.243 (0.616)	Data Time 0.001 (0.018)	Loss 2.4640 (2.5256)	Entropy 1.06099 (1.06452)	Top-1 acc 66.406 (63.228)	Top-5 acc 84.766 (83.350)	lr 0.01127
Train [64][2510/3239]	Time 0.246 (0.615)	Data Time 0.001 (0.018)	Loss 2.5188 (2.5256)	Entropy 1.06097 (1.06451)	Top-1 acc 62.109 (63.229)	Top-5 acc 82.812 (83.352)	lr 0.01127
Train [64][2520/3239]	Time 0.219 (0.615)	Data Time 0.001 (0.018)	Loss 2.6749 (2.5256)	Entropy 1.06092 (1.06449)	Top-1 acc 58.984 (63.225)	Top-5 acc 80.078 (83.356)	lr 0.01127
Train [64][2530/3239]	Time 0.238 (0.614)	Data Time 0.001 (0.018)	Loss 2.5315 (2.5255)	Entropy 1.06091 (1.06448)	Top-1 acc 60.156 (63.226)	Top-5 acc 82.812 (83.356)	lr 0.01126
Train [64][2540/3239]	Time 2.586 (0.614)	Data Time 0.001 (0.017)	Loss 2.6664 (2.5257)	Entropy 1.06091 (1.06447)	Top-1 acc 58.594 (63.223)	Top-5 acc 81.641 (83.351)	lr 0.01126
Train [64][2550/3239]	Time 0.211 (0.612)	Data Time 0.001 (0.017)	Loss 2.6596 (2.5256)	Entropy 1.06092 (1.06445)	Top-1 acc 58.594 (63.230)	Top-5 acc 78.516 (83.352)	lr 0.01126
Train [64][2560/3239]	Time 0.241 (0.612)	Data Time 0.001 (0.017)	Loss 2.5122 (2.5258)	Entropy 1.06094 (1.06444)	Top-1 acc 64.453 (63.228)	Top-5 acc 84.375 (83.349)	lr 0.01126
Train [64][2570/3239]	Time 0.255 (0.611)	Data Time 0.001 (0.017)	Loss 2.6399 (2.5258)	Entropy 1.06084 (1.06443)	Top-1 acc 61.719 (63.227)	Top-5 acc 82.812 (83.348)	lr 0.01126
Train [64][2580/3239]	Time 0.207 (0.611)	Data Time 0.001 (0.017)	Loss 2.6843 (2.5261)	Entropy 1.06082 (1.06441)	Top-1 acc 58.594 (63.222)	Top-5 acc 80.078 (83.345)	lr 0.01126
Train [64][2590/3239]	Time 0.230 (0.610)	Data Time 0.001 (0.017)	Loss 2.6372 (2.5262)	Entropy 1.06082 (1.06440)	Top-1 acc 60.938 (63.219)	Top-5 acc 81.250 (83.342)	lr 0.01126
Train [64][2600/3239]	Time 0.232 (0.609)	Data Time 0.001 (0.017)	Loss 2.5353 (2.5262)	Entropy 1.06079 (1.06438)	Top-1 acc 60.547 (63.218)	Top-5 acc 83.594 (83.340)	lr 0.01126
Train [64][2610/3239]	Time 0.219 (0.609)	Data Time 0.001 (0.017)	Loss 2.7279 (2.5263)	Entropy 1.06075 (1.06437)	Top-1 acc 54.688 (63.216)	Top-5 acc 79.688 (83.336)	lr 0.01126
Train [64][2620/3239]	Time 0.200 (0.608)	Data Time 0.001 (0.017)	Loss 2.4523 (2.5263)	Entropy 1.06073 (1.06436)	Top-1 acc 64.844 (63.217)	Top-5 acc 84.766 (83.338)	lr 0.01126
Train [64][2630/3239]	Time 0.215 (0.608)	Data Time 0.001 (0.017)	Loss 2.4712 (2.5263)	Entropy 1.06073 (1.06434)	Top-1 acc 64.844 (63.215)	Top-5 acc 84.375 (83.338)	lr 0.01125
Train [64][2640/3239]	Time 0.215 (0.607)	Data Time 0.001 (0.017)	Loss 2.4398 (2.5264)	Entropy 1.06076 (1.06433)	Top-1 acc 65.234 (63.210)	Top-5 acc 84.375 (83.334)	lr 0.01125
Train [64][2650/3239]	Time 0.219 (0.607)	Data Time 0.001 (0.017)	Loss 2.4987 (2.5264)	Entropy 1.06076 (1.06431)	Top-1 acc 66.016 (63.212)	Top-5 acc 85.938 (83.337)	lr 0.01125
Train [64][2660/3239]	Time 0.330 (0.606)	Data Time 0.001 (0.017)	Loss 2.5685 (2.5264)	Entropy 1.06076 (1.06430)	Top-1 acc 59.766 (63.210)	Top-5 acc 82.812 (83.337)	lr 0.01125
Train [64][2670/3239]	Time 0.267 (0.606)	Data Time 0.002 (0.017)	Loss 2.4110 (2.5262)	Entropy 1.06074 (1.06429)	Top-1 acc 63.281 (63.213)	Top-5 acc 83.594 (83.338)	lr 0.01125
Train [64][2680/3239]	Time 0.245 (0.626)	Data Time 0.003 (0.017)	Loss 2.4046 (2.5260)	Entropy 1.06068 (1.06428)	Top-1 acc 67.188 (63.221)	Top-5 acc 85.938 (83.341)	lr 0.01125
Train [64][2690/3239]	Time 0.257 (0.625)	Data Time 0.002 (0.017)	Loss 2.6121 (2.5259)	Entropy 1.06061 (1.06426)	Top-1 acc 62.109 (63.221)	Top-5 acc 81.641 (83.342)	lr 0.01125
Train [64][2700/3239]	Time 0.223 (0.625)	Data Time 0.002 (0.017)	Loss 2.6675 (2.5262)	Entropy 1.06063 (1.06425)	Top-1 acc 60.547 (63.212)	Top-5 acc 80.078 (83.336)	lr 0.01125
Train [64][2710/3239]	Time 0.230 (0.624)	Data Time 0.001 (0.016)	Loss 2.5486 (2.5263)	Entropy 1.06055 (1.06423)	Top-1 acc 64.453 (63.213)	Top-5 acc 82.031 (83.333)	lr 0.01125
Train [64][2720/3239]	Time 0.239 (0.624)	Data Time 0.001 (0.016)	Loss 2.4951 (2.5263)	Entropy 1.06057 (1.06422)	Top-1 acc 60.938 (63.212)	Top-5 acc 85.938 (83.332)	lr 0.01125
Train [64][2730/3239]	Time 0.251 (0.623)	Data Time 0.001 (0.016)	Loss 2.6593 (2.5265)	Entropy 1.06055 (1.06421)	Top-1 acc 62.109 (63.205)	Top-5 acc 81.641 (83.330)	lr 0.01124
Train [64][2740/3239]	Time 0.226 (0.622)	Data Time 0.002 (0.016)	Loss 2.4940 (2.5266)	Entropy 1.06050 (1.06419)	Top-1 acc 64.453 (63.205)	Top-5 acc 82.812 (83.328)	lr 0.01124
Train [64][2750/3239]	Time 0.368 (0.622)	Data Time 0.001 (0.016)	Loss 2.4887 (2.5265)	Entropy 1.06049 (1.06418)	Top-1 acc 63.281 (63.205)	Top-5 acc 82.031 (83.328)	lr 0.01124
Train [64][2760/3239]	Time 0.231 (0.621)	Data Time 0.001 (0.016)	Loss 2.5162 (2.5265)	Entropy 1.06046 (1.06417)	Top-1 acc 62.109 (63.203)	Top-5 acc 82.812 (83.328)	lr 0.01124
Train [64][2770/3239]	Time 0.266 (0.621)	Data Time 0.001 (0.016)	Loss 2.4626 (2.5267)	Entropy 1.06040 (1.06415)	Top-1 acc 66.406 (63.202)	Top-5 acc 84.375 (83.323)	lr 0.01124
Train [64][2780/3239]	Time 0.244 (0.620)	Data Time 0.002 (0.016)	Loss 2.4892 (2.5267)	Entropy 1.06038 (1.06414)	Top-1 acc 62.500 (63.198)	Top-5 acc 84.766 (83.325)	lr 0.01124
Train [64][2790/3239]	Time 0.324 (0.620)	Data Time 0.001 (0.016)	Loss 2.5631 (2.5266)	Entropy 1.06040 (1.06413)	Top-1 acc 64.062 (63.200)	Top-5 acc 83.984 (83.325)	lr 0.01124
Train [64][2800/3239]	Time 0.180 (0.619)	Data Time 0.003 (0.016)	Loss 2.5327 (2.5267)	Entropy 1.06033 (1.06411)	Top-1 acc 61.328 (63.196)	Top-5 acc 82.031 (83.320)	lr 0.01124
Train [64][2810/3239]	Time 0.212 (0.619)	Data Time 0.001 (0.016)	Loss 2.7258 (2.5269)	Entropy 1.06029 (1.06410)	Top-1 acc 58.203 (63.193)	Top-5 acc 79.688 (83.313)	lr 0.01124
Train [64][2820/3239]	Time 0.227 (0.618)	Data Time 0.001 (0.016)	Loss 2.6226 (2.5268)	Entropy 1.06028 (1.06409)	Top-1 acc 56.641 (63.197)	Top-5 acc 83.203 (83.316)	lr 0.01124
Train [64][2830/3239]	Time 0.227 (0.618)	Data Time 0.001 (0.016)	Loss 2.5115 (2.5267)	Entropy 1.06019 (1.06407)	Top-1 acc 63.281 (63.202)	Top-5 acc 85.156 (83.316)	lr 0.01123
Train [64][2840/3239]	Time 0.216 (0.617)	Data Time 0.001 (0.016)	Loss 2.2676 (2.5266)	Entropy 1.06020 (1.06406)	Top-1 acc 70.312 (63.207)	Top-5 acc 88.672 (83.318)	lr 0.01123
Train [64][2850/3239]	Time 0.253 (0.616)	Data Time 0.001 (0.016)	Loss 2.5655 (2.5266)	Entropy 1.06019 (1.06405)	Top-1 acc 59.766 (63.207)	Top-5 acc 83.203 (83.315)	lr 0.01123
Train [64][2860/3239]	Time 0.300 (0.616)	Data Time 0.001 (0.016)	Loss 2.6294 (2.5265)	Entropy 1.06020 (1.06403)	Top-1 acc 58.984 (63.208)	Top-5 acc 80.859 (83.317)	lr 0.01123
Train [64][2870/3239]	Time 0.227 (0.615)	Data Time 0.001 (0.016)	Loss 2.5051 (2.5265)	Entropy 1.06020 (1.06402)	Top-1 acc 62.891 (63.208)	Top-5 acc 84.766 (83.318)	lr 0.01123
Train [64][2880/3239]	Time 0.374 (0.615)	Data Time 0.001 (0.016)	Loss 2.5438 (2.5269)	Entropy 1.06017 (1.06401)	Top-1 acc 62.500 (63.201)	Top-5 acc 82.812 (83.310)	lr 0.01123
Train [64][2890/3239]	Time 0.229 (0.614)	Data Time 0.001 (0.016)	Loss 2.4759 (2.5269)	Entropy 1.06012 (1.06399)	Top-1 acc 65.234 (63.200)	Top-5 acc 83.594 (83.309)	lr 0.01123
Train [64][2900/3239]	Time 0.220 (0.614)	Data Time 0.001 (0.016)	Loss 2.6086 (2.5270)	Entropy 1.06001 (1.06398)	Top-1 acc 63.672 (63.199)	Top-5 acc 82.031 (83.306)	lr 0.01123
Train [64][2910/3239]	Time 0.297 (0.613)	Data Time 0.001 (0.015)	Loss 2.4803 (2.5271)	Entropy 1.05992 (1.06397)	Top-1 acc 64.844 (63.200)	Top-5 acc 82.031 (83.304)	lr 0.01123
Train [64][2920/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.015)	Loss 2.2609 (2.5268)	Entropy 1.05985 (1.06395)	Top-1 acc 69.531 (63.205)	Top-5 acc 88.672 (83.306)	lr 0.01123
Train [64][2930/3239]	Time 0.381 (0.612)	Data Time 0.002 (0.015)	Loss 2.5720 (2.5269)	Entropy 1.05990 (1.06394)	Top-1 acc 61.719 (63.203)	Top-5 acc 82.031 (83.302)	lr 0.01122
Train [64][2940/3239]	Time 0.266 (0.612)	Data Time 0.001 (0.015)	Loss 2.5889 (2.5270)	Entropy 1.05988 (1.06392)	Top-1 acc 62.891 (63.204)	Top-5 acc 81.641 (83.301)	lr 0.01122
Train [64][2950/3239]	Time 0.223 (0.611)	Data Time 0.001 (0.015)	Loss 2.5228 (2.5271)	Entropy 1.05984 (1.06391)	Top-1 acc 64.062 (63.203)	Top-5 acc 83.984 (83.298)	lr 0.01122
Train [64][2960/3239]	Time 0.231 (0.611)	Data Time 0.001 (0.015)	Loss 2.6298 (2.5271)	Entropy 1.05983 (1.06390)	Top-1 acc 59.375 (63.200)	Top-5 acc 82.422 (83.296)	lr 0.01122
Train [64][2970/3239]	Time 0.245 (0.610)	Data Time 0.014 (0.015)	Loss 2.3888 (2.5273)	Entropy 1.05979 (1.06388)	Top-1 acc 68.359 (63.201)	Top-5 acc 87.109 (83.291)	lr 0.01122
Train [64][2980/3239]	Time 0.229 (0.610)	Data Time 0.001 (0.015)	Loss 2.5133 (2.5274)	Entropy 1.05979 (1.06387)	Top-1 acc 64.453 (63.202)	Top-5 acc 83.984 (83.289)	lr 0.01122
Train [64][2990/3239]	Time 0.328 (0.609)	Data Time 0.001 (0.015)	Loss 2.4216 (2.5275)	Entropy 1.05971 (1.06386)	Top-1 acc 68.359 (63.199)	Top-5 acc 86.328 (83.286)	lr 0.01122
Train [64][3000/3239]	Time 0.239 (0.609)	Data Time 0.001 (0.015)	Loss 2.4166 (2.5275)	Entropy 1.05969 (1.06384)	Top-1 acc 65.625 (63.197)	Top-5 acc 86.328 (83.287)	lr 0.01122
Train [64][3010/3239]	Time 0.271 (0.625)	Data Time 0.004 (0.015)	Loss 2.4257 (2.5275)	Entropy 1.05970 (1.06383)	Top-1 acc 61.328 (63.194)	Top-5 acc 85.938 (83.286)	lr 0.01122
Train [64][3020/3239]	Time 0.335 (0.625)	Data Time 0.002 (0.015)	Loss 2.3829 (2.5273)	Entropy 1.05975 (1.06381)	Top-1 acc 66.797 (63.197)	Top-5 acc 86.719 (83.291)	lr 0.01122
Train [64][3030/3239]	Time 0.264 (0.624)	Data Time 0.002 (0.015)	Loss 2.5582 (2.5274)	Entropy 1.05972 (1.06380)	Top-1 acc 61.719 (63.195)	Top-5 acc 85.156 (83.289)	lr 0.01121
Train [64][3040/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.015)	Loss 2.5839 (2.5276)	Entropy 1.05965 (1.06379)	Top-1 acc 60.156 (63.187)	Top-5 acc 82.422 (83.284)	lr 0.01121
Train [64][3050/3239]	Time 0.234 (0.623)	Data Time 0.001 (0.015)	Loss 2.6629 (2.5276)	Entropy 1.05962 (1.06377)	Top-1 acc 60.938 (63.185)	Top-5 acc 82.031 (83.287)	lr 0.01121
Train [64][3060/3239]	Time 0.312 (0.623)	Data Time 0.001 (0.015)	Loss 2.6481 (2.5278)	Entropy 1.05959 (1.06376)	Top-1 acc 56.641 (63.179)	Top-5 acc 78.906 (83.285)	lr 0.01121
Train [64][3070/3239]	Time 0.291 (0.622)	Data Time 0.001 (0.015)	Loss 2.5851 (2.5277)	Entropy 1.05954 (1.06375)	Top-1 acc 62.891 (63.180)	Top-5 acc 80.469 (83.285)	lr 0.01121
Train [64][3080/3239]	Time 0.258 (0.622)	Data Time 0.001 (0.015)	Loss 2.6639 (2.5276)	Entropy 1.05947 (1.06373)	Top-1 acc 61.328 (63.182)	Top-5 acc 82.812 (83.287)	lr 0.01121
Train [64][3090/3239]	Time 0.214 (0.621)	Data Time 0.001 (0.015)	Loss 2.7281 (2.5277)	Entropy 1.05949 (1.06372)	Top-1 acc 59.766 (63.184)	Top-5 acc 78.516 (83.285)	lr 0.01121
Train [64][3100/3239]	Time 0.271 (0.621)	Data Time 0.001 (0.015)	Loss 2.5890 (2.5276)	Entropy 1.05947 (1.06371)	Top-1 acc 56.641 (63.181)	Top-5 acc 82.812 (83.285)	lr 0.01121
Train [64][3110/3239]	Time 0.313 (0.620)	Data Time 0.001 (0.015)	Loss 2.5493 (2.5278)	Entropy 1.05943 (1.06369)	Top-1 acc 62.500 (63.174)	Top-5 acc 83.203 (83.280)	lr 0.01121
Train [64][3120/3239]	Time 0.219 (0.620)	Data Time 0.001 (0.015)	Loss 2.6949 (2.5277)	Entropy 1.05938 (1.06368)	Top-1 acc 60.156 (63.177)	Top-5 acc 80.859 (83.284)	lr 0.01121
Train [64][3130/3239]	Time 0.205 (0.619)	Data Time 0.001 (0.015)	Loss 2.7332 (2.5278)	Entropy 1.05924 (1.06366)	Top-1 acc 55.859 (63.174)	Top-5 acc 78.125 (83.281)	lr 0.01120
Train [64][3140/3239]	Time 0.249 (0.618)	Data Time 0.001 (0.014)	Loss 2.6048 (2.5278)	Entropy 1.05922 (1.06365)	Top-1 acc 62.891 (63.174)	Top-5 acc 80.078 (83.280)	lr 0.01120
Train [64][3150/3239]	Time 0.341 (0.618)	Data Time 0.001 (0.014)	Loss 2.5319 (2.5278)	Entropy 1.05918 (1.06364)	Top-1 acc 62.500 (63.175)	Top-5 acc 82.812 (83.281)	lr 0.01120
Train [64][3160/3239]	Time 0.266 (0.618)	Data Time 0.001 (0.014)	Loss 2.6298 (2.5280)	Entropy 1.05917 (1.06362)	Top-1 acc 63.281 (63.172)	Top-5 acc 81.641 (83.278)	lr 0.01120
Train [64][3170/3239]	Time 0.246 (0.617)	Data Time 0.001 (0.014)	Loss 2.5735 (2.5282)	Entropy 1.05914 (1.06361)	Top-1 acc 60.547 (63.169)	Top-5 acc 81.641 (83.274)	lr 0.01120
Train [64][3180/3239]	Time 0.252 (0.617)	Data Time 0.000 (0.014)	Loss 2.6401 (2.5282)	Entropy 1.05921 (1.06359)	Top-1 acc 64.062 (63.170)	Top-5 acc 79.297 (83.274)	lr 0.01120
Train [64][3190/3239]	Time 0.215 (0.616)	Data Time 0.000 (0.014)	Loss 2.4344 (2.5282)	Entropy 1.05918 (1.06358)	Top-1 acc 67.578 (63.169)	Top-5 acc 85.547 (83.272)	lr 0.01120
Train [64][3200/3239]	Time 0.223 (0.616)	Data Time 0.000 (0.014)	Loss 2.3780 (2.5282)	Entropy 1.05915 (1.06357)	Top-1 acc 64.844 (63.168)	Top-5 acc 84.766 (83.272)	lr 0.01120
Train [64][3210/3239]	Time 0.249 (0.615)	Data Time 0.000 (0.014)	Loss 2.4667 (2.5282)	Entropy 1.05922 (1.06355)	Top-1 acc 64.453 (63.169)	Top-5 acc 85.156 (83.274)	lr 0.01120
Train [64][3220/3239]	Time 0.198 (0.614)	Data Time 0.000 (0.014)	Loss 2.5834 (2.5284)	Entropy 1.05915 (1.06354)	Top-1 acc 65.625 (63.166)	Top-5 acc 80.859 (83.273)	lr 0.01120
Train [64][3230/3239]	Time 0.215 (0.614)	Data Time 0.000 (0.014)	Loss 2.7282 (2.5283)	Entropy 1.05910 (1.06353)	Top-1 acc 60.156 (63.167)	Top-5 acc 78.125 (83.272)	lr 0.01119
Train [64][3239/3239]	Time 2.260 (0.614)	Data Time 0.000 (0.014)	Loss 2.6834 (2.5282)	Entropy 1.05910 (1.06351)	Top-1 acc 60.494 (63.167)	Top-5 acc 80.247 (83.272)	lr 0.01119
==========Valid [64/120]	loss 1.412	top-1 acc 67.593 (67.786)	top-5 acc 87.282	Train top-1 63.167	top-5 83.272	Entropy 1.05910	Latency-None: 0.000ms	Flops: 546.53M
Train [65][0/3239]	Time 40.342 (40.342)	Data Time 38.607 (38.607)	Loss 2.3411 (2.3411)	Entropy 1.05905 (1.05905)	Top-1 acc 67.578 (67.578)	Top-5 acc 83.984 (83.984)	lr 0.01119
Train [65][10/3239]	Time 2.876 (4.294)	Data Time 0.002 (3.567)	Loss 2.5519 (2.5439)	Entropy 1.05905 (1.05905)	Top-1 acc 64.062 (63.033)	Top-5 acc 84.375 (82.386)	lr 0.01119
Train [65][20/3239]	Time 0.226 (2.369)	Data Time 0.001 (1.869)	Loss 2.5505 (2.5181)	Entropy 1.05903 (1.05904)	Top-1 acc 65.234 (63.914)	Top-5 acc 82.422 (83.110)	lr 0.01119
Train [65][30/3239]	Time 0.260 (1.763)	Data Time 0.001 (1.267)	Loss 2.5230 (2.5182)	Entropy 1.05896 (1.05901)	Top-1 acc 63.672 (63.785)	Top-5 acc 81.641 (83.065)	lr 0.01119
Train [65][40/3239]	Time 0.224 (1.450)	Data Time 0.001 (0.958)	Loss 2.5501 (2.5127)	Entropy 1.05888 (1.05899)	Top-1 acc 62.500 (63.958)	Top-5 acc 82.812 (83.432)	lr 0.01119
Train [65][50/3239]	Time 0.338 (1.262)	Data Time 0.001 (0.771)	Loss 2.5421 (2.5172)	Entropy 1.05886 (1.05896)	Top-1 acc 63.672 (63.611)	Top-5 acc 81.641 (83.333)	lr 0.01119
Train [65][60/3239]	Time 0.271 (1.134)	Data Time 0.001 (0.645)	Loss 2.3228 (2.5066)	Entropy 1.05891 (1.05895)	Top-1 acc 68.750 (63.915)	Top-5 acc 86.328 (83.555)	lr 0.01119
Train [65][70/3239]	Time 0.229 (1.041)	Data Time 0.001 (0.554)	Loss 2.4727 (2.4986)	Entropy 1.05890 (1.05894)	Top-1 acc 64.453 (64.162)	Top-5 acc 83.594 (83.731)	lr 0.01119
Train [65][80/3239]	Time 0.246 (0.971)	Data Time 0.001 (0.486)	Loss 2.6525 (2.4943)	Entropy 1.05887 (1.05894)	Top-1 acc 58.984 (64.265)	Top-5 acc 81.250 (83.854)	lr 0.01119
Train [65][90/3239]	Time 0.228 (0.918)	Data Time 0.006 (0.433)	Loss 2.4585 (2.4929)	Entropy 1.05888 (1.05893)	Top-1 acc 63.672 (64.221)	Top-5 acc 84.766 (83.813)	lr 0.01118
Train [65][100/3239]	Time 0.233 (0.876)	Data Time 0.001 (0.390)	Loss 2.2665 (2.4920)	Entropy 1.05889 (1.05893)	Top-1 acc 68.750 (64.117)	Top-5 acc 87.891 (83.872)	lr 0.01118
Train [65][110/3239]	Time 0.374 (1.314)	Data Time 0.004 (0.355)	Loss 2.3981 (2.4905)	Entropy 1.05892 (1.05892)	Top-1 acc 63.672 (64.165)	Top-5 acc 84.766 (83.858)	lr 0.01118
Train [65][120/3239]	Time 3.207 (1.258)	Data Time 0.003 (0.327)	Loss 2.6008 (2.4908)	Entropy 1.05892 (1.05892)	Top-1 acc 60.938 (64.156)	Top-5 acc 80.078 (83.829)	lr 0.01118
Train [65][130/3239]	Time 0.234 (1.181)	Data Time 0.002 (0.302)	Loss 2.6087 (2.4894)	Entropy 1.05887 (1.05892)	Top-1 acc 61.328 (64.206)	Top-5 acc 82.031 (83.895)	lr 0.01118
Train [65][140/3239]	Time 0.361 (1.132)	Data Time 0.002 (0.281)	Loss 2.5807 (2.4914)	Entropy 1.05885 (1.05891)	Top-1 acc 62.891 (64.110)	Top-5 acc 79.297 (83.854)	lr 0.01118
Train [65][150/3239]	Time 0.247 (1.088)	Data Time 0.001 (0.262)	Loss 2.6468 (2.4921)	Entropy 1.05885 (1.05891)	Top-1 acc 57.812 (64.106)	Top-5 acc 81.641 (83.865)	lr 0.01118
Train [65][160/3239]	Time 0.233 (1.050)	Data Time 0.002 (0.246)	Loss 2.3919 (2.4883)	Entropy 1.05883 (1.05891)	Top-1 acc 62.500 (64.157)	Top-5 acc 87.500 (83.946)	lr 0.01118
Train [65][170/3239]	Time 0.212 (1.015)	Data Time 0.001 (0.232)	Loss 2.7201 (2.4896)	Entropy 1.05884 (1.05890)	Top-1 acc 60.156 (64.168)	Top-5 acc 79.297 (83.923)	lr 0.01118
Train [65][180/3239]	Time 0.223 (0.986)	Data Time 0.001 (0.219)	Loss 2.7238 (2.4931)	Entropy 1.05885 (1.05890)	Top-1 acc 59.766 (64.149)	Top-5 acc 78.516 (83.870)	lr 0.01118
Train [65][190/3239]	Time 0.220 (0.960)	Data Time 0.001 (0.208)	Loss 2.5241 (2.4924)	Entropy 1.05882 (1.05890)	Top-1 acc 60.938 (64.183)	Top-5 acc 85.938 (83.880)	lr 0.01117
Train [65][200/3239]	Time 0.225 (0.936)	Data Time 0.001 (0.197)	Loss 2.3003 (2.4914)	Entropy 1.05879 (1.05889)	Top-1 acc 69.922 (64.210)	Top-5 acc 88.672 (83.897)	lr 0.01117
Train [65][210/3239]	Time 0.224 (0.914)	Data Time 0.001 (0.188)	Loss 2.5801 (2.4939)	Entropy 1.05876 (1.05889)	Top-1 acc 62.500 (64.162)	Top-5 acc 79.297 (83.851)	lr 0.01117
Train [65][220/3239]	Time 0.231 (0.894)	Data Time 0.001 (0.180)	Loss 2.5487 (2.4943)	Entropy 1.05863 (1.05888)	Top-1 acc 63.281 (64.139)	Top-5 acc 82.422 (83.859)	lr 0.01117
Train [65][230/3239]	Time 2.732 (0.877)	Data Time 0.003 (0.172)	Loss 2.5747 (2.4971)	Entropy 1.05863 (1.05887)	Top-1 acc 65.625 (64.084)	Top-5 acc 79.688 (83.807)	lr 0.01117
Train [65][240/3239]	Time 0.266 (0.851)	Data Time 0.001 (0.165)	Loss 2.4510 (2.4948)	Entropy 1.05863 (1.05886)	Top-1 acc 65.234 (64.145)	Top-5 acc 85.156 (83.873)	lr 0.01117
Train [65][250/3239]	Time 0.230 (0.836)	Data Time 0.001 (0.158)	Loss 2.6786 (2.4975)	Entropy 1.05857 (1.05885)	Top-1 acc 61.719 (64.020)	Top-5 acc 80.859 (83.840)	lr 0.01117
Train [65][260/3239]	Time 0.233 (0.822)	Data Time 0.001 (0.152)	Loss 2.4950 (2.4968)	Entropy 1.05859 (1.05884)	Top-1 acc 63.281 (64.021)	Top-5 acc 84.766 (83.845)	lr 0.01117
Train [65][270/3239]	Time 0.294 (0.810)	Data Time 0.001 (0.147)	Loss 2.6657 (2.4967)	Entropy 1.05857 (1.05883)	Top-1 acc 62.109 (64.022)	Top-5 acc 83.594 (83.860)	lr 0.01117
Train [65][280/3239]	Time 0.228 (0.798)	Data Time 0.001 (0.142)	Loss 2.4560 (2.4965)	Entropy 1.05855 (1.05882)	Top-1 acc 63.672 (64.003)	Top-5 acc 86.328 (83.875)	lr 0.01117
Train [65][290/3239]	Time 0.252 (0.787)	Data Time 0.001 (0.137)	Loss 2.7027 (2.4965)	Entropy 1.05850 (1.05881)	Top-1 acc 58.594 (63.983)	Top-5 acc 82.422 (83.870)	lr 0.01116
Train [65][300/3239]	Time 0.238 (0.777)	Data Time 0.001 (0.132)	Loss 2.4974 (2.4981)	Entropy 1.05849 (1.05880)	Top-1 acc 65.234 (63.934)	Top-5 acc 85.156 (83.856)	lr 0.01116
Train [65][310/3239]	Time 0.209 (0.767)	Data Time 0.001 (0.128)	Loss 2.6364 (2.4990)	Entropy 1.05843 (1.05879)	Top-1 acc 58.984 (63.932)	Top-5 acc 83.594 (83.840)	lr 0.01116
Train [65][320/3239]	Time 0.335 (0.759)	Data Time 0.001 (0.124)	Loss 2.2645 (2.4994)	Entropy 1.05844 (1.05878)	Top-1 acc 68.750 (63.880)	Top-5 acc 88.672 (83.853)	lr 0.01116
Train [65][330/3239]	Time 0.249 (0.751)	Data Time 0.001 (0.121)	Loss 2.5255 (2.5010)	Entropy 1.05840 (1.05877)	Top-1 acc 61.719 (63.831)	Top-5 acc 83.984 (83.824)	lr 0.01116
Train [65][340/3239]	Time 2.537 (0.743)	Data Time 0.001 (0.117)	Loss 2.4265 (2.5008)	Entropy 1.05840 (1.05876)	Top-1 acc 70.312 (63.862)	Top-5 acc 84.375 (83.807)	lr 0.01116
Train [65][350/3239]	Time 0.251 (0.729)	Data Time 0.001 (0.114)	Loss 2.6961 (2.5037)	Entropy 1.05836 (1.05874)	Top-1 acc 58.984 (63.823)	Top-5 acc 79.688 (83.751)	lr 0.01116
Train [65][360/3239]	Time 0.230 (0.721)	Data Time 0.001 (0.111)	Loss 2.4894 (2.5052)	Entropy 1.05829 (1.05873)	Top-1 acc 65.234 (63.785)	Top-5 acc 83.594 (83.710)	lr 0.01116
Train [65][370/3239]	Time 0.327 (0.715)	Data Time 0.001 (0.108)	Loss 2.3585 (2.5038)	Entropy 1.05827 (1.05872)	Top-1 acc 66.797 (63.818)	Top-5 acc 87.891 (83.726)	lr 0.01116
Train [65][380/3239]	Time 0.225 (0.709)	Data Time 0.001 (0.105)	Loss 2.4827 (2.5043)	Entropy 1.05820 (1.05871)	Top-1 acc 64.844 (63.798)	Top-5 acc 82.812 (83.719)	lr 0.01116
Train [65][390/3239]	Time 0.239 (0.703)	Data Time 0.001 (0.102)	Loss 2.4621 (2.5044)	Entropy 1.05815 (1.05869)	Top-1 acc 64.844 (63.803)	Top-5 acc 85.547 (83.728)	lr 0.01115
Train [65][400/3239]	Time 0.218 (0.698)	Data Time 0.001 (0.100)	Loss 2.3151 (2.5043)	Entropy 1.05818 (1.05868)	Top-1 acc 67.578 (63.813)	Top-5 acc 88.672 (83.719)	lr 0.01115
Train [65][410/3239]	Time 0.242 (0.692)	Data Time 0.001 (0.097)	Loss 2.4083 (2.5045)	Entropy 1.05815 (1.05867)	Top-1 acc 66.797 (63.796)	Top-5 acc 84.375 (83.725)	lr 0.01115
Train [65][420/3239]	Time 0.221 (0.687)	Data Time 0.001 (0.095)	Loss 2.2831 (2.5043)	Entropy 1.05807 (1.05865)	Top-1 acc 68.750 (63.797)	Top-5 acc 89.453 (83.727)	lr 0.01115
Train [65][430/3239]	Time 0.228 (0.682)	Data Time 0.001 (0.093)	Loss 2.4648 (2.5042)	Entropy 1.05803 (1.05864)	Top-1 acc 63.281 (63.777)	Top-5 acc 84.375 (83.733)	lr 0.01115
Train [65][440/3239]	Time 0.235 (0.678)	Data Time 0.001 (0.091)	Loss 2.5694 (2.5056)	Entropy 1.05799 (1.05863)	Top-1 acc 62.109 (63.748)	Top-5 acc 81.641 (83.709)	lr 0.01115
Train [65][450/3239]	Time 2.521 (0.673)	Data Time 0.001 (0.089)	Loss 2.4390 (2.5068)	Entropy 1.05799 (1.05861)	Top-1 acc 65.625 (63.707)	Top-5 acc 86.328 (83.700)	lr 0.01115
Train [65][460/3239]	Time 0.332 (0.664)	Data Time 0.001 (0.087)	Loss 2.4050 (2.5058)	Entropy 1.05800 (1.05860)	Top-1 acc 68.750 (63.750)	Top-5 acc 83.594 (83.721)	lr 0.01115
Train [65][470/3239]	Time 0.232 (0.660)	Data Time 0.001 (0.085)	Loss 2.4161 (2.5059)	Entropy 1.05796 (1.05859)	Top-1 acc 66.406 (63.739)	Top-5 acc 85.547 (83.711)	lr 0.01115
Train [65][480/3239]	Time 0.297 (0.763)	Data Time 0.003 (0.083)	Loss 2.4453 (2.5062)	Entropy 1.05796 (1.05857)	Top-1 acc 65.234 (63.726)	Top-5 acc 85.938 (83.720)	lr 0.01115
Train [65][490/3239]	Time 0.243 (0.759)	Data Time 0.002 (0.082)	Loss 2.5545 (2.5057)	Entropy 1.05795 (1.05856)	Top-1 acc 67.969 (63.754)	Top-5 acc 84.766 (83.749)	lr 0.01114
Train [65][500/3239]	Time 0.235 (0.754)	Data Time 0.002 (0.080)	Loss 2.4234 (2.5048)	Entropy 1.05791 (1.05855)	Top-1 acc 65.625 (63.778)	Top-5 acc 83.203 (83.769)	lr 0.01114
Train [65][510/3239]	Time 0.238 (0.749)	Data Time 0.001 (0.079)	Loss 2.5423 (2.5050)	Entropy 1.05778 (1.05853)	Top-1 acc 61.719 (63.764)	Top-5 acc 82.031 (83.760)	lr 0.01114
Train [65][520/3239]	Time 0.229 (0.744)	Data Time 0.001 (0.077)	Loss 2.3571 (2.5043)	Entropy 1.05778 (1.05852)	Top-1 acc 65.234 (63.773)	Top-5 acc 86.328 (83.768)	lr 0.01114
Train [65][530/3239]	Time 0.210 (0.738)	Data Time 0.001 (0.076)	Loss 2.3612 (2.5047)	Entropy 1.05778 (1.05851)	Top-1 acc 69.141 (63.750)	Top-5 acc 87.109 (83.771)	lr 0.01114
Train [65][540/3239]	Time 0.227 (0.734)	Data Time 0.001 (0.074)	Loss 2.3327 (2.5038)	Entropy 1.05780 (1.05849)	Top-1 acc 69.141 (63.769)	Top-5 acc 86.328 (83.791)	lr 0.01114
Train [65][550/3239]	Time 0.378 (0.729)	Data Time 0.001 (0.073)	Loss 2.5695 (2.5046)	Entropy 1.05778 (1.05848)	Top-1 acc 60.938 (63.753)	Top-5 acc 84.375 (83.787)	lr 0.01114
Train [65][560/3239]	Time 2.595 (0.725)	Data Time 0.002 (0.072)	Loss 2.5600 (2.5056)	Entropy 1.05778 (1.05847)	Top-1 acc 62.500 (63.721)	Top-5 acc 82.422 (83.766)	lr 0.01114
Train [65][570/3239]	Time 0.232 (0.716)	Data Time 0.001 (0.071)	Loss 2.4543 (2.5051)	Entropy 1.05780 (1.05846)	Top-1 acc 63.281 (63.732)	Top-5 acc 84.375 (83.763)	lr 0.01114
Train [65][580/3239]	Time 0.243 (0.712)	Data Time 0.001 (0.069)	Loss 2.5360 (2.5054)	Entropy 1.05780 (1.05844)	Top-1 acc 61.719 (63.736)	Top-5 acc 83.203 (83.756)	lr 0.01114
Train [65][590/3239]	Time 0.239 (0.708)	Data Time 0.001 (0.068)	Loss 2.5016 (2.5055)	Entropy 1.05781 (1.05843)	Top-1 acc 63.672 (63.741)	Top-5 acc 83.203 (83.736)	lr 0.01113
Train [65][600/3239]	Time 0.279 (0.704)	Data Time 0.002 (0.067)	Loss 2.5981 (2.5048)	Entropy 1.05777 (1.05842)	Top-1 acc 60.156 (63.752)	Top-5 acc 84.375 (83.756)	lr 0.01113
Train [65][610/3239]	Time 0.227 (0.700)	Data Time 0.001 (0.066)	Loss 2.6249 (2.5044)	Entropy 1.05771 (1.05841)	Top-1 acc 61.719 (63.756)	Top-5 acc 82.812 (83.767)	lr 0.01113
Train [65][620/3239]	Time 0.227 (0.697)	Data Time 0.002 (0.065)	Loss 2.6250 (2.5041)	Entropy 1.05768 (1.05840)	Top-1 acc 60.938 (63.767)	Top-5 acc 82.812 (83.769)	lr 0.01113
Train [65][630/3239]	Time 0.238 (0.694)	Data Time 0.001 (0.064)	Loss 2.4918 (2.5033)	Entropy 1.05774 (1.05839)	Top-1 acc 64.062 (63.783)	Top-5 acc 85.938 (83.779)	lr 0.01113
Train [65][640/3239]	Time 0.334 (0.690)	Data Time 0.001 (0.063)	Loss 2.5508 (2.5045)	Entropy 1.05751 (1.05838)	Top-1 acc 65.625 (63.766)	Top-5 acc 82.031 (83.756)	lr 0.01113
Train [65][650/3239]	Time 0.233 (0.687)	Data Time 0.001 (0.062)	Loss 2.6676 (2.5047)	Entropy 1.05752 (1.05836)	Top-1 acc 58.203 (63.750)	Top-5 acc 82.422 (83.758)	lr 0.01113
Train [65][660/3239]	Time 0.224 (0.684)	Data Time 0.001 (0.061)	Loss 2.5616 (2.5046)	Entropy 1.05749 (1.05835)	Top-1 acc 61.719 (63.735)	Top-5 acc 84.375 (83.762)	lr 0.01113
Train [65][670/3239]	Time 2.428 (0.680)	Data Time 0.002 (0.060)	Loss 2.4300 (2.5044)	Entropy 1.05749 (1.05834)	Top-1 acc 62.109 (63.735)	Top-5 acc 84.766 (83.752)	lr 0.01113
Train [65][680/3239]	Time 0.244 (0.674)	Data Time 0.002 (0.059)	Loss 2.4905 (2.5054)	Entropy 1.05749 (1.05833)	Top-1 acc 61.328 (63.710)	Top-5 acc 83.203 (83.733)	lr 0.01113
Train [65][690/3239]	Time 0.341 (0.671)	Data Time 0.001 (0.059)	Loss 2.6409 (2.5049)	Entropy 1.05746 (1.05831)	Top-1 acc 59.766 (63.715)	Top-5 acc 82.422 (83.735)	lr 0.01112
Train [65][700/3239]	Time 0.223 (0.668)	Data Time 0.001 (0.058)	Loss 2.6351 (2.5066)	Entropy 1.05745 (1.05830)	Top-1 acc 61.719 (63.684)	Top-5 acc 80.859 (83.701)	lr 0.01112
Train [65][710/3239]	Time 0.273 (0.666)	Data Time 0.001 (0.057)	Loss 2.5978 (2.5064)	Entropy 1.05736 (1.05829)	Top-1 acc 62.891 (63.685)	Top-5 acc 82.031 (83.701)	lr 0.01112
Train [65][720/3239]	Time 0.263 (0.663)	Data Time 0.001 (0.056)	Loss 3.1076 (2.5072)	Entropy 1.05725 (1.05827)	Top-1 acc 50.781 (63.656)	Top-5 acc 74.219 (83.692)	lr 0.01112
Train [65][730/3239]	Time 0.333 (0.660)	Data Time 0.001 (0.056)	Loss 2.3822 (2.5069)	Entropy 1.05732 (1.05826)	Top-1 acc 66.797 (63.673)	Top-5 acc 85.156 (83.692)	lr 0.01112
Train [65][740/3239]	Time 0.217 (0.658)	Data Time 0.001 (0.055)	Loss 2.5593 (2.5072)	Entropy 1.05728 (1.05825)	Top-1 acc 62.500 (63.664)	Top-5 acc 82.031 (83.683)	lr 0.01112
Train [65][750/3239]	Time 0.226 (0.655)	Data Time 0.001 (0.054)	Loss 2.5045 (2.5073)	Entropy 1.05727 (1.05824)	Top-1 acc 62.891 (63.665)	Top-5 acc 82.812 (83.677)	lr 0.01112
Train [65][760/3239]	Time 0.218 (0.653)	Data Time 0.001 (0.053)	Loss 2.6616 (2.5076)	Entropy 1.05731 (1.05822)	Top-1 acc 62.109 (63.661)	Top-5 acc 79.297 (83.668)	lr 0.01112
Train [65][770/3239]	Time 0.322 (0.651)	Data Time 0.001 (0.053)	Loss 2.5782 (2.5084)	Entropy 1.05727 (1.05821)	Top-1 acc 62.891 (63.645)	Top-5 acc 83.203 (83.652)	lr 0.01112
Train [65][780/3239]	Time 2.532 (0.648)	Data Time 0.001 (0.052)	Loss 2.4190 (2.5086)	Entropy 1.05727 (1.05820)	Top-1 acc 64.844 (63.646)	Top-5 acc 86.328 (83.652)	lr 0.01112
Train [65][790/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.051)	Loss 2.6086 (2.5087)	Entropy 1.05727 (1.05819)	Top-1 acc 62.109 (63.631)	Top-5 acc 82.422 (83.650)	lr 0.01111
Train [65][800/3239]	Time 0.241 (0.641)	Data Time 0.001 (0.051)	Loss 2.6729 (2.5089)	Entropy 1.05725 (1.05818)	Top-1 acc 59.766 (63.622)	Top-5 acc 79.688 (83.649)	lr 0.01111
Train [65][810/3239]	Time 0.228 (0.639)	Data Time 0.001 (0.050)	Loss 2.5227 (2.5085)	Entropy 1.05723 (1.05816)	Top-1 acc 64.062 (63.639)	Top-5 acc 85.156 (83.654)	lr 0.01111
Train [65][820/3239]	Time 0.359 (0.637)	Data Time 0.001 (0.050)	Loss 2.5687 (2.5087)	Entropy 1.05724 (1.05815)	Top-1 acc 58.984 (63.651)	Top-5 acc 83.594 (83.648)	lr 0.01111
Train [65][830/3239]	Time 0.227 (0.635)	Data Time 0.001 (0.049)	Loss 2.3353 (2.5080)	Entropy 1.05719 (1.05814)	Top-1 acc 66.406 (63.669)	Top-5 acc 86.328 (83.654)	lr 0.01111
Train [65][840/3239]	Time 0.255 (0.700)	Data Time 0.002 (0.048)	Loss 2.3959 (2.5080)	Entropy 1.05728 (1.05813)	Top-1 acc 60.547 (63.670)	Top-5 acc 88.281 (83.655)	lr 0.01111
Train [65][850/3239]	Time 0.216 (0.697)	Data Time 0.002 (0.048)	Loss 2.4262 (2.5081)	Entropy 1.05729 (1.05812)	Top-1 acc 64.844 (63.666)	Top-5 acc 83.984 (83.653)	lr 0.01111
Train [65][860/3239]	Time 0.225 (0.695)	Data Time 0.001 (0.047)	Loss 2.5806 (2.5077)	Entropy 1.05728 (1.05811)	Top-1 acc 61.328 (63.676)	Top-5 acc 83.594 (83.658)	lr 0.01111
Train [65][870/3239]	Time 0.229 (0.692)	Data Time 0.001 (0.047)	Loss 2.4691 (2.5079)	Entropy 1.05723 (1.05810)	Top-1 acc 65.625 (63.669)	Top-5 acc 83.594 (83.657)	lr 0.01111
Train [65][880/3239]	Time 0.230 (0.690)	Data Time 0.001 (0.046)	Loss 2.4275 (2.5077)	Entropy 1.05717 (1.05809)	Top-1 acc 64.844 (63.671)	Top-5 acc 85.156 (83.667)	lr 0.01111
Train [65][890/3239]	Time 2.488 (0.687)	Data Time 0.001 (0.046)	Loss 2.1997 (2.5073)	Entropy 1.05717 (1.05808)	Top-1 acc 70.703 (63.686)	Top-5 acc 89.453 (83.677)	lr 0.01110
Train [65][900/3239]	Time 0.229 (0.682)	Data Time 0.001 (0.045)	Loss 2.3091 (2.5074)	Entropy 1.05720 (1.05807)	Top-1 acc 70.312 (63.682)	Top-5 acc 85.938 (83.677)	lr 0.01110
Train [65][910/3239]	Time 0.365 (0.680)	Data Time 0.002 (0.045)	Loss 2.5078 (2.5067)	Entropy 1.05721 (1.05806)	Top-1 acc 67.188 (63.706)	Top-5 acc 83.984 (83.693)	lr 0.01110
Train [65][920/3239]	Time 0.208 (0.678)	Data Time 0.001 (0.044)	Loss 2.6245 (2.5070)	Entropy 1.05708 (1.05805)	Top-1 acc 60.547 (63.698)	Top-5 acc 82.422 (83.692)	lr 0.01110
Train [65][930/3239]	Time 0.231 (0.676)	Data Time 0.001 (0.044)	Loss 2.5525 (2.5067)	Entropy 1.05707 (1.05804)	Top-1 acc 63.672 (63.713)	Top-5 acc 81.250 (83.683)	lr 0.01110
Train [65][940/3239]	Time 0.205 (0.674)	Data Time 0.001 (0.044)	Loss 2.4031 (2.5071)	Entropy 1.05704 (1.05803)	Top-1 acc 67.578 (63.715)	Top-5 acc 86.328 (83.671)	lr 0.01110
Train [65][950/3239]	Time 0.241 (0.671)	Data Time 0.001 (0.043)	Loss 2.3734 (2.5070)	Entropy 1.05704 (1.05802)	Top-1 acc 67.188 (63.706)	Top-5 acc 86.719 (83.669)	lr 0.01110
Train [65][960/3239]	Time 0.331 (0.669)	Data Time 0.001 (0.043)	Loss 2.3967 (2.5072)	Entropy 1.05703 (1.05801)	Top-1 acc 64.844 (63.699)	Top-5 acc 88.281 (83.664)	lr 0.01110
Train [65][970/3239]	Time 0.266 (0.667)	Data Time 0.002 (0.042)	Loss 2.4109 (2.5076)	Entropy 1.05703 (1.05800)	Top-1 acc 65.234 (63.688)	Top-5 acc 86.719 (83.664)	lr 0.01110
Train [65][980/3239]	Time 0.204 (0.665)	Data Time 0.001 (0.042)	Loss 2.4584 (2.5082)	Entropy 1.05740 (1.05799)	Top-1 acc 64.062 (63.671)	Top-5 acc 84.375 (83.652)	lr 0.01109
Train [65][990/3239]	Time 0.253 (0.663)	Data Time 0.001 (0.041)	Loss 2.4866 (2.5081)	Entropy 1.05737 (1.05798)	Top-1 acc 65.234 (63.678)	Top-5 acc 84.766 (83.653)	lr 0.01109
Train [65][1000/3239]	Time 2.505 (0.661)	Data Time 0.001 (0.041)	Loss 2.4873 (2.5082)	Entropy 1.05737 (1.05798)	Top-1 acc 65.625 (63.681)	Top-5 acc 84.375 (83.652)	lr 0.01109
Train [65][1010/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.041)	Loss 2.3523 (2.5076)	Entropy 1.05736 (1.05797)	Top-1 acc 70.703 (63.695)	Top-5 acc 84.766 (83.663)	lr 0.01109
Train [65][1020/3239]	Time 0.227 (0.655)	Data Time 0.001 (0.040)	Loss 2.3949 (2.5079)	Entropy 1.05732 (1.05797)	Top-1 acc 62.500 (63.682)	Top-5 acc 87.500 (83.665)	lr 0.01109
Train [65][1030/3239]	Time 0.234 (0.654)	Data Time 0.001 (0.040)	Loss 2.3892 (2.5078)	Entropy 1.05731 (1.05796)	Top-1 acc 69.531 (63.684)	Top-5 acc 88.672 (83.678)	lr 0.01109
Train [65][1040/3239]	Time 0.230 (0.652)	Data Time 0.001 (0.040)	Loss 2.6302 (2.5077)	Entropy 1.05726 (1.05795)	Top-1 acc 58.203 (63.676)	Top-5 acc 79.688 (83.684)	lr 0.01109
Train [65][1050/3239]	Time 0.334 (0.650)	Data Time 0.001 (0.039)	Loss 2.4672 (2.5078)	Entropy 1.05722 (1.05795)	Top-1 acc 62.891 (63.681)	Top-5 acc 84.766 (83.688)	lr 0.01109
Train [65][1060/3239]	Time 0.232 (0.648)	Data Time 0.001 (0.039)	Loss 2.6248 (2.5078)	Entropy 1.05717 (1.05794)	Top-1 acc 62.500 (63.678)	Top-5 acc 80.469 (83.683)	lr 0.01109
Train [65][1070/3239]	Time 0.222 (0.647)	Data Time 0.001 (0.038)	Loss 2.5284 (2.5096)	Entropy 1.05715 (1.05793)	Top-1 acc 65.234 (63.639)	Top-5 acc 83.594 (83.649)	lr 0.01109
Train [65][1080/3239]	Time 0.219 (0.645)	Data Time 0.001 (0.038)	Loss 2.5991 (2.5101)	Entropy 1.05714 (1.05792)	Top-1 acc 58.594 (63.624)	Top-5 acc 83.594 (83.642)	lr 0.01108
Train [65][1090/3239]	Time 0.228 (0.643)	Data Time 0.001 (0.038)	Loss 2.7268 (2.5100)	Entropy 1.05702 (1.05792)	Top-1 acc 62.500 (63.625)	Top-5 acc 80.469 (83.645)	lr 0.01108
Train [65][1100/3239]	Time 0.249 (0.642)	Data Time 0.001 (0.037)	Loss 2.4469 (2.5100)	Entropy 1.05701 (1.05791)	Top-1 acc 63.281 (63.620)	Top-5 acc 84.375 (83.643)	lr 0.01108
Train [65][1110/3239]	Time 2.434 (0.640)	Data Time 0.001 (0.037)	Loss 2.4307 (2.5101)	Entropy 1.05701 (1.05790)	Top-1 acc 66.016 (63.625)	Top-5 acc 85.547 (83.640)	lr 0.01108
Train [65][1120/3239]	Time 0.220 (0.637)	Data Time 0.001 (0.037)	Loss 2.6035 (2.5101)	Entropy 1.05698 (1.05789)	Top-1 acc 61.328 (63.623)	Top-5 acc 83.594 (83.639)	lr 0.01108
Train [65][1130/3239]	Time 0.226 (0.635)	Data Time 0.001 (0.036)	Loss 2.4955 (2.5097)	Entropy 1.05694 (1.05788)	Top-1 acc 66.016 (63.637)	Top-5 acc 84.766 (83.649)	lr 0.01108
Train [65][1140/3239]	Time 0.342 (0.634)	Data Time 0.001 (0.036)	Loss 2.6075 (2.5100)	Entropy 1.05688 (1.05788)	Top-1 acc 61.328 (63.631)	Top-5 acc 85.547 (83.644)	lr 0.01108
Train [65][1150/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.036)	Loss 2.6944 (2.5099)	Entropy 1.05691 (1.05787)	Top-1 acc 58.984 (63.630)	Top-5 acc 78.906 (83.641)	lr 0.01108
Train [65][1160/3239]	Time 0.250 (0.631)	Data Time 0.001 (0.036)	Loss 2.4702 (2.5098)	Entropy 1.05687 (1.05786)	Top-1 acc 65.625 (63.632)	Top-5 acc 83.594 (83.640)	lr 0.01108
Train [65][1170/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.035)	Loss 2.3877 (2.5100)	Entropy 1.05682 (1.05785)	Top-1 acc 64.844 (63.631)	Top-5 acc 87.891 (83.636)	lr 0.01108
Train [65][1180/3239]	Time 0.210 (0.628)	Data Time 0.002 (0.035)	Loss 2.6287 (2.5101)	Entropy 1.05683 (1.05784)	Top-1 acc 61.328 (63.634)	Top-5 acc 82.031 (83.634)	lr 0.01107
Train [65][1190/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.035)	Loss 2.5104 (2.5100)	Entropy 1.05678 (1.05783)	Top-1 acc 63.281 (63.637)	Top-5 acc 84.375 (83.636)	lr 0.01107
Train [65][1200/3239]	Time 0.315 (0.670)	Data Time 0.003 (0.034)	Loss 2.4891 (2.5094)	Entropy 1.05669 (1.05782)	Top-1 acc 64.062 (63.650)	Top-5 acc 82.812 (83.655)	lr 0.01107
Train [65][1210/3239]	Time 0.304 (0.669)	Data Time 0.002 (0.034)	Loss 2.4132 (2.5096)	Entropy 1.05665 (1.05781)	Top-1 acc 64.453 (63.632)	Top-5 acc 85.156 (83.655)	lr 0.01107
Train [65][1220/3239]	Time 2.424 (0.668)	Data Time 0.002 (0.034)	Loss 2.4418 (2.5094)	Entropy 1.05665 (1.05780)	Top-1 acc 66.016 (63.639)	Top-5 acc 83.984 (83.663)	lr 0.01107
Train [65][1230/3239]	Time 0.219 (0.664)	Data Time 0.001 (0.034)	Loss 2.5352 (2.5092)	Entropy 1.05668 (1.05780)	Top-1 acc 62.109 (63.633)	Top-5 acc 81.250 (83.669)	lr 0.01107
Train [65][1240/3239]	Time 0.241 (0.663)	Data Time 0.001 (0.033)	Loss 2.5305 (2.5094)	Entropy 1.05657 (1.05779)	Top-1 acc 63.281 (63.628)	Top-5 acc 82.812 (83.667)	lr 0.01107
Train [65][1250/3239]	Time 0.226 (0.661)	Data Time 0.001 (0.033)	Loss 2.4761 (2.5092)	Entropy 1.05657 (1.05778)	Top-1 acc 58.203 (63.628)	Top-5 acc 85.547 (83.670)	lr 0.01107
Train [65][1260/3239]	Time 0.247 (0.660)	Data Time 0.001 (0.033)	Loss 2.4354 (2.5091)	Entropy 1.05659 (1.05777)	Top-1 acc 67.578 (63.633)	Top-5 acc 83.984 (83.671)	lr 0.01107
Train [65][1270/3239]	Time 0.230 (0.658)	Data Time 0.001 (0.033)	Loss 2.6339 (2.5095)	Entropy 1.05658 (1.05776)	Top-1 acc 61.719 (63.622)	Top-5 acc 82.031 (83.665)	lr 0.01107
Train [65][1280/3239]	Time 0.216 (0.657)	Data Time 0.001 (0.032)	Loss 2.5164 (2.5102)	Entropy 1.05657 (1.05775)	Top-1 acc 63.281 (63.600)	Top-5 acc 85.156 (83.651)	lr 0.01106
Train [65][1290/3239]	Time 0.211 (0.655)	Data Time 0.001 (0.032)	Loss 2.6204 (2.5102)	Entropy 1.05653 (1.05774)	Top-1 acc 59.375 (63.596)	Top-5 acc 80.469 (83.644)	lr 0.01106
Train [65][1300/3239]	Time 0.313 (0.654)	Data Time 0.001 (0.032)	Loss 2.5002 (2.5105)	Entropy 1.05645 (1.05773)	Top-1 acc 62.500 (63.578)	Top-5 acc 83.984 (83.639)	lr 0.01106
Train [65][1310/3239]	Time 0.241 (0.652)	Data Time 0.001 (0.032)	Loss 2.3676 (2.5106)	Entropy 1.05637 (1.05772)	Top-1 acc 61.719 (63.563)	Top-5 acc 89.062 (83.641)	lr 0.01106
Train [65][1320/3239]	Time 0.332 (0.651)	Data Time 0.001 (0.031)	Loss 2.3978 (2.5106)	Entropy 1.05636 (1.05771)	Top-1 acc 65.234 (63.558)	Top-5 acc 85.547 (83.638)	lr 0.01106
Train [65][1330/3239]	Time 2.408 (0.649)	Data Time 0.001 (0.031)	Loss 2.5143 (2.5105)	Entropy 1.05636 (1.05770)	Top-1 acc 66.016 (63.559)	Top-5 acc 84.766 (83.642)	lr 0.01106
Train [65][1340/3239]	Time 0.241 (0.646)	Data Time 0.001 (0.031)	Loss 2.5705 (2.5107)	Entropy 1.05637 (1.05769)	Top-1 acc 65.234 (63.559)	Top-5 acc 85.938 (83.638)	lr 0.01106
Train [65][1350/3239]	Time 0.237 (0.645)	Data Time 0.001 (0.031)	Loss 2.4431 (2.5103)	Entropy 1.05634 (1.05768)	Top-1 acc 65.625 (63.571)	Top-5 acc 85.938 (83.648)	lr 0.01106
Train [65][1360/3239]	Time 0.214 (0.644)	Data Time 0.001 (0.031)	Loss 2.4734 (2.5101)	Entropy 1.05629 (1.05767)	Top-1 acc 64.453 (63.575)	Top-5 acc 85.547 (83.653)	lr 0.01106
Train [65][1370/3239]	Time 0.317 (0.643)	Data Time 0.001 (0.030)	Loss 2.4340 (2.5098)	Entropy 1.05629 (1.05766)	Top-1 acc 65.625 (63.583)	Top-5 acc 86.719 (83.656)	lr 0.01106
Train [65][1380/3239]	Time 0.225 (0.641)	Data Time 0.001 (0.030)	Loss 2.3760 (2.5096)	Entropy 1.05628 (1.05765)	Top-1 acc 67.188 (63.582)	Top-5 acc 87.109 (83.659)	lr 0.01105
Train [65][1390/3239]	Time 0.212 (0.640)	Data Time 0.001 (0.030)	Loss 2.4061 (2.5096)	Entropy 1.05624 (1.05764)	Top-1 acc 67.188 (63.592)	Top-5 acc 86.328 (83.654)	lr 0.01105
Train [65][1400/3239]	Time 0.219 (0.639)	Data Time 0.001 (0.030)	Loss 2.7507 (2.5100)	Entropy 1.05623 (1.05763)	Top-1 acc 56.641 (63.579)	Top-5 acc 77.734 (83.649)	lr 0.01105
Train [65][1410/3239]	Time 0.213 (0.637)	Data Time 0.001 (0.030)	Loss 2.5120 (2.5105)	Entropy 1.05620 (1.05762)	Top-1 acc 61.719 (63.569)	Top-5 acc 84.375 (83.636)	lr 0.01105
Train [65][1420/3239]	Time 0.308 (0.636)	Data Time 0.001 (0.029)	Loss 2.3111 (2.5103)	Entropy 1.05622 (1.05761)	Top-1 acc 67.188 (63.570)	Top-5 acc 87.891 (83.639)	lr 0.01105
Train [65][1430/3239]	Time 0.219 (0.635)	Data Time 0.001 (0.029)	Loss 2.4955 (2.5105)	Entropy 1.05612 (1.05760)	Top-1 acc 64.062 (63.568)	Top-5 acc 82.031 (83.636)	lr 0.01105
Train [65][1440/3239]	Time 2.432 (0.634)	Data Time 0.001 (0.029)	Loss 2.6737 (2.5108)	Entropy 1.05612 (1.05759)	Top-1 acc 57.812 (63.558)	Top-5 acc 81.250 (83.633)	lr 0.01105
Train [65][1450/3239]	Time 0.243 (0.631)	Data Time 0.001 (0.029)	Loss 2.5222 (2.5109)	Entropy 1.05611 (1.05758)	Top-1 acc 66.016 (63.557)	Top-5 acc 83.984 (83.632)	lr 0.01105
Train [65][1460/3239]	Time 0.278 (0.630)	Data Time 0.002 (0.029)	Loss 2.5493 (2.5108)	Entropy 1.05603 (1.05757)	Top-1 acc 63.281 (63.563)	Top-5 acc 81.641 (83.628)	lr 0.01105
Train [65][1470/3239]	Time 0.236 (0.629)	Data Time 0.002 (0.028)	Loss 2.5097 (2.5108)	Entropy 1.05601 (1.05756)	Top-1 acc 60.156 (63.562)	Top-5 acc 81.250 (83.630)	lr 0.01105
Train [65][1480/3239]	Time 0.242 (0.628)	Data Time 0.001 (0.028)	Loss 2.3489 (2.5110)	Entropy 1.05598 (1.05755)	Top-1 acc 67.188 (63.557)	Top-5 acc 87.109 (83.624)	lr 0.01104
Train [65][1490/3239]	Time 0.226 (0.627)	Data Time 0.002 (0.028)	Loss 2.5260 (2.5112)	Entropy 1.05591 (1.05754)	Top-1 acc 64.844 (63.559)	Top-5 acc 83.594 (83.622)	lr 0.01104
Train [65][1500/3239]	Time 0.289 (0.626)	Data Time 0.001 (0.028)	Loss 2.7006 (2.5111)	Entropy 1.05591 (1.05753)	Top-1 acc 57.812 (63.559)	Top-5 acc 78.516 (83.621)	lr 0.01104
Train [65][1510/3239]	Time 0.244 (0.625)	Data Time 0.001 (0.028)	Loss 2.8467 (2.5118)	Entropy 1.05594 (1.05751)	Top-1 acc 54.297 (63.542)	Top-5 acc 77.734 (83.608)	lr 0.01104
Train [65][1520/3239]	Time 0.252 (0.624)	Data Time 0.001 (0.028)	Loss 2.3095 (2.5119)	Entropy 1.05588 (1.05750)	Top-1 acc 69.922 (63.538)	Top-5 acc 86.328 (83.605)	lr 0.01104
Train [65][1530/3239]	Time 0.270 (0.623)	Data Time 0.001 (0.027)	Loss 2.2655 (2.5120)	Entropy 1.05579 (1.05749)	Top-1 acc 71.875 (63.531)	Top-5 acc 87.500 (83.603)	lr 0.01104
Train [65][1540/3239]	Time 0.266 (0.622)	Data Time 0.001 (0.027)	Loss 2.4453 (2.5120)	Entropy 1.05573 (1.05748)	Top-1 acc 69.141 (63.532)	Top-5 acc 85.547 (83.606)	lr 0.01104
Train [65][1550/3239]	Time 2.626 (0.621)	Data Time 0.001 (0.027)	Loss 2.4306 (2.5115)	Entropy 1.05573 (1.05747)	Top-1 acc 62.109 (63.545)	Top-5 acc 87.891 (83.620)	lr 0.01104
Train [65][1560/3239]	Time 0.229 (0.619)	Data Time 0.001 (0.027)	Loss 2.4906 (2.5115)	Entropy 1.05572 (1.05746)	Top-1 acc 63.672 (63.541)	Top-5 acc 85.156 (83.622)	lr 0.01104
Train [65][1570/3239]	Time 0.297 (0.651)	Data Time 0.004 (0.027)	Loss 2.5514 (2.5115)	Entropy 1.05570 (1.05745)	Top-1 acc 62.891 (63.545)	Top-5 acc 80.469 (83.627)	lr 0.01104
Train [65][1580/3239]	Time 0.218 (0.650)	Data Time 0.002 (0.027)	Loss 2.4841 (2.5117)	Entropy 1.05572 (1.05744)	Top-1 acc 62.891 (63.538)	Top-5 acc 83.594 (83.621)	lr 0.01103
Train [65][1590/3239]	Time 0.226 (0.649)	Data Time 0.001 (0.026)	Loss 2.6423 (2.5118)	Entropy 1.05566 (1.05743)	Top-1 acc 61.328 (63.539)	Top-5 acc 82.422 (83.623)	lr 0.01103
Train [65][1600/3239]	Time 0.237 (0.647)	Data Time 0.001 (0.026)	Loss 2.4282 (2.5116)	Entropy 1.05564 (1.05742)	Top-1 acc 65.625 (63.538)	Top-5 acc 84.766 (83.628)	lr 0.01103
Train [65][1610/3239]	Time 0.223 (0.646)	Data Time 0.001 (0.026)	Loss 2.4025 (2.5115)	Entropy 1.05564 (1.05740)	Top-1 acc 66.797 (63.538)	Top-5 acc 86.328 (83.631)	lr 0.01103
Train [65][1620/3239]	Time 0.223 (0.645)	Data Time 0.001 (0.026)	Loss 2.4916 (2.5120)	Entropy 1.05565 (1.05739)	Top-1 acc 62.500 (63.527)	Top-5 acc 84.766 (83.626)	lr 0.01103
Train [65][1630/3239]	Time 0.248 (0.644)	Data Time 0.001 (0.026)	Loss 2.3730 (2.5120)	Entropy 1.05562 (1.05738)	Top-1 acc 66.797 (63.525)	Top-5 acc 85.938 (83.629)	lr 0.01103
Train [65][1640/3239]	Time 0.205 (0.643)	Data Time 0.003 (0.026)	Loss 2.5537 (2.5118)	Entropy 1.05559 (1.05737)	Top-1 acc 61.719 (63.526)	Top-5 acc 83.203 (83.632)	lr 0.01103
Train [65][1650/3239]	Time 0.270 (0.642)	Data Time 0.001 (0.026)	Loss 2.6808 (2.5122)	Entropy 1.05554 (1.05736)	Top-1 acc 58.203 (63.518)	Top-5 acc 82.812 (83.628)	lr 0.01103
Train [65][1660/3239]	Time 2.482 (0.641)	Data Time 0.002 (0.025)	Loss 2.5131 (2.5124)	Entropy 1.05554 (1.05735)	Top-1 acc 65.234 (63.516)	Top-5 acc 84.375 (83.626)	lr 0.01103
Train [65][1670/3239]	Time 0.219 (0.639)	Data Time 0.001 (0.025)	Loss 2.5984 (2.5125)	Entropy 1.05541 (1.05734)	Top-1 acc 57.031 (63.506)	Top-5 acc 82.422 (83.623)	lr 0.01103
Train [65][1680/3239]	Time 0.237 (0.638)	Data Time 0.001 (0.025)	Loss 2.8547 (2.5126)	Entropy 1.05540 (1.05733)	Top-1 acc 56.250 (63.505)	Top-5 acc 78.125 (83.624)	lr 0.01102
Train [65][1690/3239]	Time 0.331 (0.637)	Data Time 0.001 (0.025)	Loss 2.4140 (2.5126)	Entropy 1.05542 (1.05732)	Top-1 acc 66.406 (63.509)	Top-5 acc 85.156 (83.620)	lr 0.01102
Train [65][1700/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.025)	Loss 2.3622 (2.5126)	Entropy 1.05541 (1.05730)	Top-1 acc 66.797 (63.504)	Top-5 acc 88.281 (83.624)	lr 0.01102
Train [65][1710/3239]	Time 0.247 (0.635)	Data Time 0.001 (0.025)	Loss 2.5049 (2.5125)	Entropy 1.05539 (1.05729)	Top-1 acc 61.719 (63.507)	Top-5 acc 84.766 (83.628)	lr 0.01102
Train [65][1720/3239]	Time 0.243 (0.634)	Data Time 0.001 (0.025)	Loss 2.4038 (2.5126)	Entropy 1.05535 (1.05728)	Top-1 acc 64.062 (63.498)	Top-5 acc 87.109 (83.625)	lr 0.01102
Train [65][1730/3239]	Time 0.231 (0.633)	Data Time 0.001 (0.024)	Loss 2.6424 (2.5129)	Entropy 1.05522 (1.05727)	Top-1 acc 55.469 (63.494)	Top-5 acc 80.859 (83.618)	lr 0.01102
Train [65][1740/3239]	Time 0.352 (0.632)	Data Time 0.001 (0.024)	Loss 2.5116 (2.5132)	Entropy 1.05510 (1.05726)	Top-1 acc 64.453 (63.488)	Top-5 acc 83.203 (83.611)	lr 0.01102
Train [65][1750/3239]	Time 0.229 (0.632)	Data Time 0.002 (0.024)	Loss 2.4991 (2.5135)	Entropy 1.05506 (1.05725)	Top-1 acc 59.375 (63.477)	Top-5 acc 85.156 (83.603)	lr 0.01102
Train [65][1760/3239]	Time 0.259 (0.631)	Data Time 0.001 (0.024)	Loss 2.5705 (2.5135)	Entropy 1.05499 (1.05723)	Top-1 acc 58.594 (63.475)	Top-5 acc 82.031 (83.602)	lr 0.01102
Train [65][1770/3239]	Time 2.450 (0.630)	Data Time 0.002 (0.024)	Loss 2.6474 (2.5137)	Entropy 1.05499 (1.05722)	Top-1 acc 63.672 (63.471)	Top-5 acc 81.250 (83.599)	lr 0.01102
Train [65][1780/3239]	Time 0.229 (0.628)	Data Time 0.002 (0.024)	Loss 2.4033 (2.5136)	Entropy 1.05496 (1.05721)	Top-1 acc 65.625 (63.472)	Top-5 acc 86.719 (83.602)	lr 0.01101
Train [65][1790/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.024)	Loss 2.4317 (2.5135)	Entropy 1.05497 (1.05720)	Top-1 acc 64.844 (63.470)	Top-5 acc 88.281 (83.605)	lr 0.01101
Train [65][1800/3239]	Time 0.211 (0.626)	Data Time 0.001 (0.024)	Loss 2.5604 (2.5135)	Entropy 1.05488 (1.05718)	Top-1 acc 60.938 (63.463)	Top-5 acc 83.594 (83.603)	lr 0.01101
Train [65][1810/3239]	Time 0.224 (0.625)	Data Time 0.001 (0.023)	Loss 2.5773 (2.5134)	Entropy 1.05485 (1.05717)	Top-1 acc 62.891 (63.465)	Top-5 acc 84.375 (83.608)	lr 0.01101
Train [65][1820/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.023)	Loss 2.6071 (2.5133)	Entropy 1.05483 (1.05716)	Top-1 acc 61.719 (63.471)	Top-5 acc 81.250 (83.609)	lr 0.01101
Train [65][1830/3239]	Time 0.346 (0.624)	Data Time 0.001 (0.023)	Loss 2.5036 (2.5134)	Entropy 1.05484 (1.05715)	Top-1 acc 65.234 (63.461)	Top-5 acc 84.375 (83.608)	lr 0.01101
Train [65][1840/3239]	Time 0.233 (0.623)	Data Time 0.001 (0.023)	Loss 2.5340 (2.5134)	Entropy 1.05483 (1.05713)	Top-1 acc 64.844 (63.466)	Top-5 acc 83.203 (83.607)	lr 0.01101
Train [65][1850/3239]	Time 0.260 (0.622)	Data Time 0.001 (0.023)	Loss 2.3453 (2.5131)	Entropy 1.05484 (1.05712)	Top-1 acc 66.797 (63.473)	Top-5 acc 87.500 (83.614)	lr 0.01101
Train [65][1860/3239]	Time 0.286 (0.621)	Data Time 0.001 (0.023)	Loss 2.4631 (2.5130)	Entropy 1.05480 (1.05711)	Top-1 acc 66.406 (63.474)	Top-5 acc 82.812 (83.614)	lr 0.01101
Train [65][1870/3239]	Time 0.251 (0.620)	Data Time 0.001 (0.023)	Loss 2.4557 (2.5128)	Entropy 1.05473 (1.05710)	Top-1 acc 64.453 (63.477)	Top-5 acc 83.984 (83.617)	lr 0.01101
Train [65][1880/3239]	Time 2.492 (0.619)	Data Time 0.001 (0.023)	Loss 2.6773 (2.5131)	Entropy 1.05473 (1.05708)	Top-1 acc 58.984 (63.471)	Top-5 acc 81.641 (83.613)	lr 0.01100
Train [65][1890/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.022)	Loss 2.5100 (2.5130)	Entropy 1.05468 (1.05707)	Top-1 acc 63.281 (63.474)	Top-5 acc 83.594 (83.614)	lr 0.01100
Train [65][1900/3239]	Time 0.233 (0.617)	Data Time 0.001 (0.022)	Loss 2.4611 (2.5133)	Entropy 1.05464 (1.05706)	Top-1 acc 63.281 (63.466)	Top-5 acc 84.766 (83.608)	lr 0.01100
Train [65][1910/3239]	Time 0.208 (0.616)	Data Time 0.001 (0.022)	Loss 2.6127 (2.5139)	Entropy 1.05463 (1.05704)	Top-1 acc 60.938 (63.457)	Top-5 acc 80.078 (83.593)	lr 0.01100
Train [65][1920/3239]	Time 0.330 (0.615)	Data Time 0.001 (0.022)	Loss 2.4993 (2.5138)	Entropy 1.05462 (1.05703)	Top-1 acc 64.062 (63.466)	Top-5 acc 83.984 (83.594)	lr 0.01100
Train [65][1930/3239]	Time 0.243 (0.643)	Data Time 0.002 (0.022)	Loss 2.4903 (2.5137)	Entropy 1.05462 (1.05702)	Top-1 acc 62.891 (63.471)	Top-5 acc 82.422 (83.594)	lr 0.01100
Train [65][1940/3239]	Time 0.226 (0.642)	Data Time 0.002 (0.022)	Loss 2.3293 (2.5138)	Entropy 1.05461 (1.05701)	Top-1 acc 70.312 (63.470)	Top-5 acc 85.938 (83.597)	lr 0.01100
Train [65][1950/3239]	Time 0.220 (0.641)	Data Time 0.002 (0.022)	Loss 2.6702 (2.5138)	Entropy 1.05459 (1.05699)	Top-1 acc 62.891 (63.473)	Top-5 acc 78.906 (83.595)	lr 0.01100
Train [65][1960/3239]	Time 0.229 (0.640)	Data Time 0.002 (0.022)	Loss 2.4877 (2.5139)	Entropy 1.05451 (1.05698)	Top-1 acc 64.453 (63.477)	Top-5 acc 84.766 (83.593)	lr 0.01100
Train [65][1970/3239]	Time 0.257 (0.640)	Data Time 0.001 (0.022)	Loss 2.5816 (2.5140)	Entropy 1.05445 (1.05697)	Top-1 acc 63.672 (63.475)	Top-5 acc 80.469 (83.592)	lr 0.01100
Train [65][1980/3239]	Time 0.299 (0.639)	Data Time 0.001 (0.022)	Loss 2.4334 (2.5139)	Entropy 1.05442 (1.05696)	Top-1 acc 62.500 (63.471)	Top-5 acc 85.547 (83.590)	lr 0.01099
Train [65][1990/3239]	Time 2.561 (0.638)	Data Time 0.001 (0.021)	Loss 2.5602 (2.5141)	Entropy 1.05442 (1.05694)	Top-1 acc 62.500 (63.469)	Top-5 acc 81.641 (83.585)	lr 0.01099
Train [65][2000/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.021)	Loss 2.5647 (2.5142)	Entropy 1.05427 (1.05693)	Top-1 acc 65.625 (63.469)	Top-5 acc 82.812 (83.580)	lr 0.01099
Train [65][2010/3239]	Time 0.223 (0.635)	Data Time 0.001 (0.021)	Loss 2.5136 (2.5145)	Entropy 1.05425 (1.05692)	Top-1 acc 62.891 (63.463)	Top-5 acc 82.031 (83.574)	lr 0.01099
Train [65][2020/3239]	Time 0.226 (0.634)	Data Time 0.001 (0.021)	Loss 2.5366 (2.5141)	Entropy 1.05426 (1.05690)	Top-1 acc 63.672 (63.469)	Top-5 acc 82.812 (83.580)	lr 0.01099
Train [65][2030/3239]	Time 0.220 (0.634)	Data Time 0.001 (0.021)	Loss 3.2015 (2.5147)	Entropy 1.05416 (1.05689)	Top-1 acc 50.781 (63.454)	Top-5 acc 73.047 (83.572)	lr 0.01099
Train [65][2040/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.021)	Loss 2.4837 (2.5147)	Entropy 1.05413 (1.05688)	Top-1 acc 64.844 (63.457)	Top-5 acc 83.594 (83.568)	lr 0.01099
Train [65][2050/3239]	Time 0.219 (0.632)	Data Time 0.001 (0.021)	Loss 2.5681 (2.5146)	Entropy 1.05415 (1.05686)	Top-1 acc 64.453 (63.460)	Top-5 acc 82.031 (83.572)	lr 0.01099
Train [65][2060/3239]	Time 0.392 (0.631)	Data Time 0.001 (0.021)	Loss 2.4532 (2.5147)	Entropy 1.05416 (1.05685)	Top-1 acc 64.844 (63.452)	Top-5 acc 85.156 (83.569)	lr 0.01099
Train [65][2070/3239]	Time 0.257 (0.630)	Data Time 0.001 (0.021)	Loss 2.3804 (2.5148)	Entropy 1.05409 (1.05684)	Top-1 acc 66.016 (63.449)	Top-5 acc 87.109 (83.570)	lr 0.01099
Train [65][2080/3239]	Time 0.247 (0.630)	Data Time 0.001 (0.021)	Loss 2.5230 (2.5148)	Entropy 1.05404 (1.05682)	Top-1 acc 64.453 (63.452)	Top-5 acc 82.812 (83.571)	lr 0.01098
Train [65][2090/3239]	Time 0.252 (0.629)	Data Time 0.001 (0.021)	Loss 2.4958 (2.5148)	Entropy 1.05397 (1.05681)	Top-1 acc 63.281 (63.451)	Top-5 acc 83.203 (83.570)	lr 0.01098
Train [65][2100/3239]	Time 2.405 (0.628)	Data Time 0.001 (0.020)	Loss 2.4739 (2.5146)	Entropy 1.05397 (1.05680)	Top-1 acc 65.234 (63.453)	Top-5 acc 83.594 (83.575)	lr 0.01098
Train [65][2110/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.020)	Loss 2.5666 (2.5147)	Entropy 1.05391 (1.05678)	Top-1 acc 60.938 (63.453)	Top-5 acc 84.766 (83.577)	lr 0.01098
Train [65][2120/3239]	Time 0.245 (0.626)	Data Time 0.001 (0.020)	Loss 2.5453 (2.5147)	Entropy 1.05375 (1.05677)	Top-1 acc 63.672 (63.451)	Top-5 acc 83.203 (83.576)	lr 0.01098
Train [65][2130/3239]	Time 0.212 (0.625)	Data Time 0.002 (0.020)	Loss 2.6167 (2.5147)	Entropy 1.05372 (1.05676)	Top-1 acc 57.422 (63.451)	Top-5 acc 82.812 (83.579)	lr 0.01098
Train [65][2140/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.020)	Loss 2.6444 (2.5150)	Entropy 1.05369 (1.05674)	Top-1 acc 62.500 (63.444)	Top-5 acc 81.250 (83.572)	lr 0.01098
Train [65][2150/3239]	Time 0.352 (0.623)	Data Time 0.001 (0.020)	Loss 2.7596 (2.5152)	Entropy 1.05371 (1.05673)	Top-1 acc 58.203 (63.441)	Top-5 acc 76.172 (83.568)	lr 0.01098
Train [65][2160/3239]	Time 0.231 (0.623)	Data Time 0.001 (0.020)	Loss 2.7195 (2.5149)	Entropy 1.05369 (1.05671)	Top-1 acc 60.156 (63.445)	Top-5 acc 77.734 (83.572)	lr 0.01098
Train [65][2170/3239]	Time 0.230 (0.622)	Data Time 0.001 (0.020)	Loss 2.4341 (2.5150)	Entropy 1.05364 (1.05670)	Top-1 acc 67.578 (63.444)	Top-5 acc 86.328 (83.569)	lr 0.01098
Train [65][2180/3239]	Time 0.251 (0.621)	Data Time 0.001 (0.020)	Loss 2.3712 (2.5151)	Entropy 1.05356 (1.05669)	Top-1 acc 66.016 (63.444)	Top-5 acc 87.500 (83.569)	lr 0.01097
Train [65][2190/3239]	Time 0.227 (0.621)	Data Time 0.001 (0.020)	Loss 2.5249 (2.5152)	Entropy 1.05361 (1.05667)	Top-1 acc 64.453 (63.443)	Top-5 acc 82.031 (83.567)	lr 0.01097
Train [65][2200/3239]	Time 0.244 (0.620)	Data Time 0.001 (0.020)	Loss 2.4466 (2.5154)	Entropy 1.05363 (1.05666)	Top-1 acc 66.797 (63.438)	Top-5 acc 84.766 (83.562)	lr 0.01097
Train [65][2210/3239]	Time 2.380 (0.619)	Data Time 0.001 (0.020)	Loss 2.4907 (2.5157)	Entropy 1.05363 (1.05664)	Top-1 acc 62.891 (63.427)	Top-5 acc 83.984 (83.555)	lr 0.01097
Train [65][2220/3239]	Time 0.218 (0.618)	Data Time 0.001 (0.019)	Loss 2.3580 (2.5157)	Entropy 1.05361 (1.05663)	Top-1 acc 67.969 (63.427)	Top-5 acc 86.328 (83.554)	lr 0.01097
Train [65][2230/3239]	Time 0.261 (0.617)	Data Time 0.002 (0.019)	Loss 2.4910 (2.5159)	Entropy 1.05357 (1.05662)	Top-1 acc 64.062 (63.423)	Top-5 acc 81.250 (83.551)	lr 0.01097
Train [65][2240/3239]	Time 0.282 (0.616)	Data Time 0.001 (0.019)	Loss 2.5202 (2.5157)	Entropy 1.05343 (1.05660)	Top-1 acc 60.938 (63.423)	Top-5 acc 83.203 (83.555)	lr 0.01097
Train [65][2250/3239]	Time 0.212 (0.616)	Data Time 0.001 (0.019)	Loss 2.6626 (2.5157)	Entropy 1.05342 (1.05659)	Top-1 acc 59.375 (63.425)	Top-5 acc 80.859 (83.555)	lr 0.01097
Train [65][2260/3239]	Time 0.238 (0.615)	Data Time 0.001 (0.019)	Loss 2.4344 (2.5156)	Entropy 1.05339 (1.05657)	Top-1 acc 64.062 (63.427)	Top-5 acc 86.719 (83.555)	lr 0.01097
Train [65][2270/3239]	Time 0.247 (0.614)	Data Time 0.001 (0.019)	Loss 2.4345 (2.5157)	Entropy 1.05338 (1.05656)	Top-1 acc 65.625 (63.422)	Top-5 acc 84.766 (83.554)	lr 0.01097
Train [65][2280/3239]	Time 0.249 (0.614)	Data Time 0.001 (0.019)	Loss 2.3882 (2.5159)	Entropy 1.05338 (1.05655)	Top-1 acc 68.750 (63.417)	Top-5 acc 82.812 (83.548)	lr 0.01096
Train [65][2290/3239]	Time 0.407 (0.637)	Data Time 0.002 (0.019)	Loss 2.5429 (2.5159)	Entropy 1.05338 (1.05653)	Top-1 acc 64.453 (63.423)	Top-5 acc 85.156 (83.547)	lr 0.01096
Train [65][2300/3239]	Time 0.295 (0.636)	Data Time 0.003 (0.019)	Loss 2.4872 (2.5159)	Entropy 1.05337 (1.05652)	Top-1 acc 66.797 (63.427)	Top-5 acc 80.469 (83.546)	lr 0.01096
Train [65][2310/3239]	Time 0.259 (0.635)	Data Time 0.002 (0.019)	Loss 2.3878 (2.5156)	Entropy 1.05339 (1.05650)	Top-1 acc 64.844 (63.435)	Top-5 acc 85.547 (83.553)	lr 0.01096
Train [65][2320/3239]	Time 2.518 (0.635)	Data Time 0.002 (0.019)	Loss 2.3168 (2.5156)	Entropy 1.05339 (1.05649)	Top-1 acc 65.625 (63.433)	Top-5 acc 91.406 (83.557)	lr 0.01096
Train [65][2330/3239]	Time 0.327 (0.633)	Data Time 0.001 (0.019)	Loss 2.4108 (2.5155)	Entropy 1.05334 (1.05648)	Top-1 acc 68.750 (63.434)	Top-5 acc 85.547 (83.560)	lr 0.01096
Train [65][2340/3239]	Time 0.244 (0.632)	Data Time 0.001 (0.019)	Loss 2.4861 (2.5156)	Entropy 1.05326 (1.05646)	Top-1 acc 63.672 (63.431)	Top-5 acc 84.375 (83.559)	lr 0.01096
Train [65][2350/3239]	Time 0.239 (0.632)	Data Time 0.001 (0.018)	Loss 2.3722 (2.5154)	Entropy 1.05321 (1.05645)	Top-1 acc 69.141 (63.437)	Top-5 acc 83.984 (83.562)	lr 0.01096
Train [65][2360/3239]	Time 0.240 (0.631)	Data Time 0.001 (0.018)	Loss 2.3640 (2.5153)	Entropy 1.05322 (1.05644)	Top-1 acc 63.281 (63.438)	Top-5 acc 88.672 (83.567)	lr 0.01096
Train [65][2370/3239]	Time 0.225 (0.630)	Data Time 0.001 (0.018)	Loss 2.5677 (2.5153)	Entropy 1.05351 (1.05642)	Top-1 acc 61.719 (63.435)	Top-5 acc 83.203 (83.568)	lr 0.01096
Train [65][2380/3239]	Time 0.231 (0.630)	Data Time 0.001 (0.018)	Loss 2.3601 (2.5154)	Entropy 1.05343 (1.05641)	Top-1 acc 65.234 (63.432)	Top-5 acc 88.281 (83.570)	lr 0.01095
Train [65][2390/3239]	Time 0.229 (0.629)	Data Time 0.002 (0.018)	Loss 2.4329 (2.5154)	Entropy 1.05347 (1.05640)	Top-1 acc 67.188 (63.431)	Top-5 acc 83.984 (83.569)	lr 0.01095
Train [65][2400/3239]	Time 0.255 (0.628)	Data Time 0.001 (0.018)	Loss 2.3583 (2.5153)	Entropy 1.05346 (1.05639)	Top-1 acc 66.016 (63.429)	Top-5 acc 88.281 (83.573)	lr 0.01095
Train [65][2410/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.018)	Loss 2.5986 (2.5153)	Entropy 1.05344 (1.05637)	Top-1 acc 60.938 (63.432)	Top-5 acc 83.984 (83.574)	lr 0.01095
Train [65][2420/3239]	Time 0.407 (0.627)	Data Time 0.001 (0.018)	Loss 2.6783 (2.5153)	Entropy 1.05341 (1.05636)	Top-1 acc 58.984 (63.433)	Top-5 acc 80.078 (83.571)	lr 0.01095
Train [65][2430/3239]	Time 2.525 (0.626)	Data Time 0.001 (0.018)	Loss 2.5100 (2.5153)	Entropy 1.05341 (1.05635)	Top-1 acc 63.672 (63.431)	Top-5 acc 83.594 (83.569)	lr 0.01095
Train [65][2440/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.018)	Loss 2.5949 (2.5155)	Entropy 1.05340 (1.05634)	Top-1 acc 65.234 (63.428)	Top-5 acc 81.641 (83.565)	lr 0.01095
Train [65][2450/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.018)	Loss 2.5903 (2.5155)	Entropy 1.05336 (1.05633)	Top-1 acc 60.156 (63.429)	Top-5 acc 82.422 (83.562)	lr 0.01095
Train [65][2460/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.018)	Loss 2.6169 (2.5152)	Entropy 1.05335 (1.05631)	Top-1 acc 59.766 (63.436)	Top-5 acc 82.031 (83.566)	lr 0.01095
Train [65][2470/3239]	Time 0.245 (0.623)	Data Time 0.001 (0.018)	Loss 2.4541 (2.5154)	Entropy 1.05333 (1.05630)	Top-1 acc 62.109 (63.432)	Top-5 acc 85.547 (83.562)	lr 0.01095
Train [65][2480/3239]	Time 0.241 (0.622)	Data Time 0.001 (0.018)	Loss 2.4888 (2.5156)	Entropy 1.05332 (1.05629)	Top-1 acc 66.797 (63.430)	Top-5 acc 82.812 (83.559)	lr 0.01094
Train [65][2490/3239]	Time 0.230 (0.622)	Data Time 0.001 (0.018)	Loss 2.5487 (2.5157)	Entropy 1.05328 (1.05628)	Top-1 acc 63.672 (63.431)	Top-5 acc 83.984 (83.554)	lr 0.01094
Train [65][2500/3239]	Time 0.259 (0.621)	Data Time 0.001 (0.017)	Loss 2.5795 (2.5157)	Entropy 1.05324 (1.05627)	Top-1 acc 64.844 (63.436)	Top-5 acc 82.422 (83.552)	lr 0.01094
Train [65][2510/3239]	Time 0.371 (0.620)	Data Time 0.001 (0.017)	Loss 2.3438 (2.5155)	Entropy 1.05326 (1.05625)	Top-1 acc 68.359 (63.445)	Top-5 acc 87.109 (83.556)	lr 0.01094
Train [65][2520/3239]	Time 0.221 (0.620)	Data Time 0.001 (0.017)	Loss 2.7372 (2.5156)	Entropy 1.05331 (1.05624)	Top-1 acc 55.469 (63.440)	Top-5 acc 83.203 (83.554)	lr 0.01094
Train [65][2530/3239]	Time 0.305 (0.619)	Data Time 0.001 (0.017)	Loss 2.5214 (2.5156)	Entropy 1.05327 (1.05623)	Top-1 acc 61.328 (63.439)	Top-5 acc 83.203 (83.553)	lr 0.01094
Train [65][2540/3239]	Time 2.393 (0.619)	Data Time 0.001 (0.017)	Loss 2.4942 (2.5156)	Entropy 1.05327 (1.05622)	Top-1 acc 62.891 (63.435)	Top-5 acc 83.203 (83.553)	lr 0.01094
Train [65][2550/3239]	Time 0.227 (0.617)	Data Time 0.001 (0.017)	Loss 2.4583 (2.5153)	Entropy 1.05327 (1.05621)	Top-1 acc 67.578 (63.442)	Top-5 acc 83.203 (83.556)	lr 0.01094
Train [65][2560/3239]	Time 0.328 (0.617)	Data Time 0.001 (0.017)	Loss 2.4646 (2.5153)	Entropy 1.05321 (1.05620)	Top-1 acc 69.141 (63.443)	Top-5 acc 85.938 (83.558)	lr 0.01094
Train [65][2570/3239]	Time 0.237 (0.616)	Data Time 0.001 (0.017)	Loss 2.4676 (2.5153)	Entropy 1.05319 (1.05618)	Top-1 acc 67.188 (63.444)	Top-5 acc 82.422 (83.558)	lr 0.01094
Train [65][2580/3239]	Time 0.232 (0.615)	Data Time 0.001 (0.017)	Loss 2.6587 (2.5153)	Entropy 1.05306 (1.05617)	Top-1 acc 59.766 (63.445)	Top-5 acc 78.125 (83.555)	lr 0.01093
Train [65][2590/3239]	Time 0.237 (0.615)	Data Time 0.001 (0.017)	Loss 2.4373 (2.5156)	Entropy 1.05303 (1.05616)	Top-1 acc 63.672 (63.435)	Top-5 acc 85.156 (83.549)	lr 0.01093
Train [65][2600/3239]	Time 0.278 (0.614)	Data Time 0.001 (0.017)	Loss 2.4579 (2.5157)	Entropy 1.05303 (1.05615)	Top-1 acc 69.141 (63.435)	Top-5 acc 84.766 (83.545)	lr 0.01093
Train [65][2610/3239]	Time 0.250 (0.614)	Data Time 0.001 (0.017)	Loss 2.6093 (2.5156)	Entropy 1.05302 (1.05614)	Top-1 acc 61.719 (63.435)	Top-5 acc 83.594 (83.548)	lr 0.01093
Train [65][2620/3239]	Time 0.292 (0.613)	Data Time 0.001 (0.017)	Loss 2.4698 (2.5156)	Entropy 1.05303 (1.05612)	Top-1 acc 65.625 (63.439)	Top-5 acc 83.984 (83.547)	lr 0.01093
Train [65][2630/3239]	Time 0.253 (0.613)	Data Time 0.001 (0.017)	Loss 2.4752 (2.5153)	Entropy 1.05304 (1.05611)	Top-1 acc 64.453 (63.450)	Top-5 acc 83.203 (83.553)	lr 0.01093
Train [65][2640/3239]	Time 0.238 (0.612)	Data Time 0.001 (0.017)	Loss 2.4717 (2.5154)	Entropy 1.05303 (1.05610)	Top-1 acc 67.188 (63.449)	Top-5 acc 84.375 (83.550)	lr 0.01093
Train [65][2650/3239]	Time 0.375 (0.633)	Data Time 0.003 (0.017)	Loss 2.4143 (2.5154)	Entropy 1.05303 (1.05609)	Top-1 acc 69.922 (63.447)	Top-5 acc 83.984 (83.547)	lr 0.01093
Train [65][2660/3239]	Time 0.218 (0.633)	Data Time 0.002 (0.016)	Loss 2.5852 (2.5154)	Entropy 1.05302 (1.05608)	Top-1 acc 62.500 (63.447)	Top-5 acc 82.031 (83.547)	lr 0.01093
Train [65][2670/3239]	Time 0.276 (0.632)	Data Time 0.002 (0.016)	Loss 2.5584 (2.5154)	Entropy 1.05298 (1.05607)	Top-1 acc 61.328 (63.448)	Top-5 acc 85.547 (83.548)	lr 0.01093
Train [65][2680/3239]	Time 0.286 (0.632)	Data Time 0.002 (0.016)	Loss 2.4612 (2.5152)	Entropy 1.05290 (1.05605)	Top-1 acc 60.547 (63.451)	Top-5 acc 86.328 (83.554)	lr 0.01092
Train [65][2690/3239]	Time 0.275 (0.631)	Data Time 0.001 (0.016)	Loss 2.4338 (2.5150)	Entropy 1.05292 (1.05604)	Top-1 acc 66.406 (63.458)	Top-5 acc 83.594 (83.558)	lr 0.01092
Train [65][2700/3239]	Time 0.237 (0.630)	Data Time 0.001 (0.016)	Loss 2.5494 (2.5151)	Entropy 1.05292 (1.05603)	Top-1 acc 60.156 (63.456)	Top-5 acc 83.594 (83.556)	lr 0.01092
Train [65][2710/3239]	Time 0.265 (0.630)	Data Time 0.001 (0.016)	Loss 2.7278 (2.5150)	Entropy 1.05280 (1.05602)	Top-1 acc 58.984 (63.456)	Top-5 acc 80.078 (83.557)	lr 0.01092
Train [65][2720/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.016)	Loss 2.4257 (2.5149)	Entropy 1.05283 (1.05601)	Top-1 acc 65.625 (63.456)	Top-5 acc 83.203 (83.557)	lr 0.01092
Train [65][2730/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.016)	Loss 2.7192 (2.5150)	Entropy 1.05284 (1.05600)	Top-1 acc 59.375 (63.448)	Top-5 acc 80.469 (83.555)	lr 0.01092
Train [65][2740/3239]	Time 0.392 (0.628)	Data Time 0.001 (0.016)	Loss 2.4905 (2.5150)	Entropy 1.05280 (1.05598)	Top-1 acc 63.281 (63.448)	Top-5 acc 84.375 (83.555)	lr 0.01092
Train [65][2750/3239]	Time 0.267 (0.628)	Data Time 0.001 (0.016)	Loss 2.3394 (2.5149)	Entropy 1.05282 (1.05597)	Top-1 acc 65.625 (63.452)	Top-5 acc 88.672 (83.558)	lr 0.01092
Train [65][2760/3239]	Time 0.224 (0.627)	Data Time 0.001 (0.016)	Loss 2.4479 (2.5151)	Entropy 1.05285 (1.05596)	Top-1 acc 65.625 (63.447)	Top-5 acc 83.984 (83.553)	lr 0.01092
Train [65][2770/3239]	Time 0.274 (0.626)	Data Time 0.001 (0.016)	Loss 2.5172 (2.5153)	Entropy 1.05284 (1.05595)	Top-1 acc 62.891 (63.438)	Top-5 acc 83.594 (83.550)	lr 0.01092
Train [65][2780/3239]	Time 0.224 (0.626)	Data Time 0.001 (0.016)	Loss 2.6713 (2.5155)	Entropy 1.05282 (1.05594)	Top-1 acc 60.547 (63.434)	Top-5 acc 81.641 (83.548)	lr 0.01091
Train [65][2790/3239]	Time 0.259 (0.625)	Data Time 0.001 (0.016)	Loss 2.6416 (2.5157)	Entropy 1.05283 (1.05593)	Top-1 acc 59.766 (63.432)	Top-5 acc 84.375 (83.547)	lr 0.01091
Train [65][2800/3239]	Time 0.251 (0.625)	Data Time 0.001 (0.016)	Loss 2.5324 (2.5158)	Entropy 1.05277 (1.05592)	Top-1 acc 62.891 (63.430)	Top-5 acc 83.984 (83.543)	lr 0.01091
Train [65][2810/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.016)	Loss 2.5412 (2.5160)	Entropy 1.05268 (1.05591)	Top-1 acc 66.797 (63.425)	Top-5 acc 83.984 (83.539)	lr 0.01091
Train [65][2820/3239]	Time 0.274 (0.624)	Data Time 0.001 (0.016)	Loss 2.4348 (2.5159)	Entropy 1.05272 (1.05589)	Top-1 acc 66.406 (63.423)	Top-5 acc 85.156 (83.539)	lr 0.01091
Train [65][2830/3239]	Time 0.340 (0.623)	Data Time 0.001 (0.016)	Loss 2.5027 (2.5160)	Entropy 1.05273 (1.05588)	Top-1 acc 60.547 (63.420)	Top-5 acc 83.594 (83.535)	lr 0.01091
Train [65][2840/3239]	Time 0.290 (0.623)	Data Time 0.001 (0.016)	Loss 2.5265 (2.5158)	Entropy 1.05274 (1.05587)	Top-1 acc 60.156 (63.424)	Top-5 acc 83.594 (83.542)	lr 0.01091
Train [65][2850/3239]	Time 0.239 (0.622)	Data Time 0.001 (0.016)	Loss 2.4461 (2.5158)	Entropy 1.05272 (1.05586)	Top-1 acc 64.062 (63.421)	Top-5 acc 84.375 (83.543)	lr 0.01091
Train [65][2860/3239]	Time 0.274 (0.621)	Data Time 0.001 (0.015)	Loss 2.5848 (2.5158)	Entropy 1.05272 (1.05585)	Top-1 acc 61.719 (63.423)	Top-5 acc 83.203 (83.541)	lr 0.01091
Train [65][2870/3239]	Time 0.292 (0.621)	Data Time 0.001 (0.015)	Loss 2.6049 (2.5159)	Entropy 1.05269 (1.05584)	Top-1 acc 63.281 (63.419)	Top-5 acc 80.078 (83.540)	lr 0.01091
Train [65][2880/3239]	Time 0.226 (0.620)	Data Time 0.001 (0.015)	Loss 2.4524 (2.5157)	Entropy 1.05276 (1.05583)	Top-1 acc 66.016 (63.426)	Top-5 acc 85.938 (83.542)	lr 0.01090
Train [65][2890/3239]	Time 0.258 (0.620)	Data Time 0.001 (0.015)	Loss 2.5490 (2.5158)	Entropy 1.05274 (1.05582)	Top-1 acc 62.500 (63.428)	Top-5 acc 82.031 (83.539)	lr 0.01090
Train [65][2900/3239]	Time 0.224 (0.619)	Data Time 0.001 (0.015)	Loss 2.5384 (2.5159)	Entropy 1.05265 (1.05581)	Top-1 acc 61.328 (63.424)	Top-5 acc 80.469 (83.537)	lr 0.01090
Train [65][2910/3239]	Time 0.217 (0.619)	Data Time 0.001 (0.015)	Loss 2.4398 (2.5161)	Entropy 1.05263 (1.05580)	Top-1 acc 65.234 (63.421)	Top-5 acc 83.594 (83.530)	lr 0.01090
Train [65][2920/3239]	Time 0.213 (0.618)	Data Time 0.001 (0.015)	Loss 2.5479 (2.5161)	Entropy 1.05254 (1.05579)	Top-1 acc 63.672 (63.419)	Top-5 acc 82.812 (83.529)	lr 0.01090
Train [65][2930/3239]	Time 0.254 (0.618)	Data Time 0.001 (0.015)	Loss 2.4737 (2.5161)	Entropy 1.05254 (1.05577)	Top-1 acc 60.938 (63.419)	Top-5 acc 84.375 (83.529)	lr 0.01090
Train [65][2940/3239]	Time 0.243 (0.617)	Data Time 0.001 (0.015)	Loss 2.4883 (2.5161)	Entropy 1.05251 (1.05576)	Top-1 acc 68.359 (63.424)	Top-5 acc 82.812 (83.527)	lr 0.01090
Train [65][2950/3239]	Time 0.293 (0.617)	Data Time 0.001 (0.015)	Loss 2.3092 (2.5160)	Entropy 1.05243 (1.05575)	Top-1 acc 71.094 (63.429)	Top-5 acc 89.062 (83.530)	lr 0.01090
Train [65][2960/3239]	Time 0.280 (0.616)	Data Time 0.001 (0.015)	Loss 2.4168 (2.5160)	Entropy 1.05234 (1.05574)	Top-1 acc 62.109 (63.427)	Top-5 acc 85.547 (83.532)	lr 0.01090
Train [65][2970/3239]	Time 0.272 (0.616)	Data Time 0.002 (0.015)	Loss 2.5281 (2.5160)	Entropy 1.05229 (1.05573)	Top-1 acc 63.281 (63.427)	Top-5 acc 82.031 (83.530)	lr 0.01090
Train [65][2980/3239]	Time 0.311 (0.633)	Data Time 0.004 (0.015)	Loss 2.4163 (2.5161)	Entropy 1.05225 (1.05572)	Top-1 acc 68.359 (63.429)	Top-5 acc 84.766 (83.528)	lr 0.01089
Train [65][2990/3239]	Time 0.252 (0.633)	Data Time 0.002 (0.015)	Loss 2.5598 (2.5162)	Entropy 1.05217 (1.05571)	Top-1 acc 65.625 (63.423)	Top-5 acc 82.422 (83.527)	lr 0.01089
Train [65][3000/3239]	Time 0.237 (0.632)	Data Time 0.001 (0.015)	Loss 2.4315 (2.5160)	Entropy 1.05216 (1.05569)	Top-1 acc 67.578 (63.430)	Top-5 acc 84.375 (83.532)	lr 0.01089
Train [65][3010/3239]	Time 0.242 (0.632)	Data Time 0.001 (0.015)	Loss 2.5456 (2.5160)	Entropy 1.05215 (1.05568)	Top-1 acc 60.547 (63.427)	Top-5 acc 85.156 (83.531)	lr 0.01089
Train [65][3020/3239]	Time 0.249 (0.631)	Data Time 0.001 (0.015)	Loss 2.3865 (2.5159)	Entropy 1.05216 (1.05567)	Top-1 acc 66.406 (63.431)	Top-5 acc 84.766 (83.532)	lr 0.01089
Train [65][3030/3239]	Time 0.240 (0.631)	Data Time 0.001 (0.015)	Loss 2.5791 (2.5160)	Entropy 1.05213 (1.05566)	Top-1 acc 60.938 (63.430)	Top-5 acc 82.031 (83.531)	lr 0.01089
Train [65][3040/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.015)	Loss 2.4414 (2.5159)	Entropy 1.05212 (1.05565)	Top-1 acc 68.359 (63.432)	Top-5 acc 83.984 (83.535)	lr 0.01089
Train [65][3050/3239]	Time 0.351 (0.630)	Data Time 0.001 (0.015)	Loss 2.8824 (2.5162)	Entropy 1.05211 (1.05564)	Top-1 acc 55.078 (63.427)	Top-5 acc 74.609 (83.531)	lr 0.01089
Train [65][3060/3239]	Time 0.242 (0.629)	Data Time 0.001 (0.015)	Loss 2.7178 (2.5164)	Entropy 1.05209 (1.05562)	Top-1 acc 59.766 (63.422)	Top-5 acc 77.734 (83.524)	lr 0.01089
Train [65][3070/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.015)	Loss 2.4059 (2.5162)	Entropy 1.05196 (1.05561)	Top-1 acc 64.062 (63.429)	Top-5 acc 87.891 (83.531)	lr 0.01089
Train [65][3080/3239]	Time 0.257 (0.629)	Data Time 0.002 (0.014)	Loss 2.7330 (2.5161)	Entropy 1.05192 (1.05560)	Top-1 acc 58.984 (63.433)	Top-5 acc 80.469 (83.534)	lr 0.01088
Train [65][3090/3239]	Time 0.249 (0.628)	Data Time 0.001 (0.014)	Loss 2.4267 (2.5161)	Entropy 1.05180 (1.05559)	Top-1 acc 65.625 (63.433)	Top-5 acc 85.156 (83.534)	lr 0.01088
Train [65][3100/3239]	Time 0.252 (0.628)	Data Time 0.001 (0.014)	Loss 2.5075 (2.5162)	Entropy 1.05179 (1.05558)	Top-1 acc 60.938 (63.429)	Top-5 acc 80.859 (83.531)	lr 0.01088
Train [65][3110/3239]	Time 0.215 (0.627)	Data Time 0.001 (0.014)	Loss 2.4601 (2.5162)	Entropy 1.05173 (1.05556)	Top-1 acc 61.719 (63.428)	Top-5 acc 87.891 (83.533)	lr 0.01088
Train [65][3120/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.014)	Loss 2.5270 (2.5163)	Entropy 1.05172 (1.05555)	Top-1 acc 64.844 (63.427)	Top-5 acc 83.984 (83.530)	lr 0.01088
Train [65][3130/3239]	Time 0.241 (0.626)	Data Time 0.001 (0.014)	Loss 2.6310 (2.5165)	Entropy 1.05173 (1.05554)	Top-1 acc 59.375 (63.421)	Top-5 acc 81.641 (83.526)	lr 0.01088
Train [65][3140/3239]	Time 0.322 (0.626)	Data Time 0.001 (0.014)	Loss 2.6327 (2.5166)	Entropy 1.05164 (1.05553)	Top-1 acc 59.766 (63.416)	Top-5 acc 81.641 (83.525)	lr 0.01088
Train [65][3150/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.014)	Loss 2.5806 (2.5165)	Entropy 1.05157 (1.05552)	Top-1 acc 62.500 (63.417)	Top-5 acc 82.031 (83.525)	lr 0.01088
Train [65][3160/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.014)	Loss 2.5586 (2.5166)	Entropy 1.05158 (1.05550)	Top-1 acc 61.328 (63.417)	Top-5 acc 82.422 (83.524)	lr 0.01088
Train [65][3170/3239]	Time 0.245 (0.625)	Data Time 0.001 (0.014)	Loss 2.5588 (2.5165)	Entropy 1.05163 (1.05549)	Top-1 acc 60.156 (63.419)	Top-5 acc 83.984 (83.527)	lr 0.01088
Train [65][3180/3239]	Time 0.378 (0.624)	Data Time 0.000 (0.014)	Loss 2.4907 (2.5164)	Entropy 1.05159 (1.05548)	Top-1 acc 64.062 (63.426)	Top-5 acc 83.984 (83.530)	lr 0.01087
Train [65][3190/3239]	Time 0.218 (0.624)	Data Time 0.000 (0.014)	Loss 2.4868 (2.5165)	Entropy 1.05158 (1.05547)	Top-1 acc 66.016 (63.421)	Top-5 acc 82.812 (83.530)	lr 0.01087
Train [65][3200/3239]	Time 0.220 (0.623)	Data Time 0.000 (0.014)	Loss 2.6075 (2.5164)	Entropy 1.05157 (1.05545)	Top-1 acc 61.328 (63.423)	Top-5 acc 82.422 (83.529)	lr 0.01087
Train [65][3210/3239]	Time 0.219 (0.623)	Data Time 0.000 (0.014)	Loss 2.4679 (2.5166)	Entropy 1.05148 (1.05544)	Top-1 acc 68.750 (63.420)	Top-5 acc 84.375 (83.527)	lr 0.01087
Train [65][3220/3239]	Time 0.240 (0.622)	Data Time 0.000 (0.014)	Loss 2.5479 (2.5167)	Entropy 1.05137 (1.05543)	Top-1 acc 62.891 (63.416)	Top-5 acc 84.375 (83.529)	lr 0.01087
Train [65][3230/3239]	Time 0.210 (0.621)	Data Time 0.000 (0.014)	Loss 2.5063 (2.5167)	Entropy 1.05132 (1.05542)	Top-1 acc 62.109 (63.417)	Top-5 acc 83.594 (83.529)	lr 0.01087
Train [65][3239/3239]	Time 2.287 (0.621)	Data Time 0.000 (0.014)	Loss 2.5832 (2.5168)	Entropy 1.05132 (1.05541)	Top-1 acc 67.901 (63.413)	Top-5 acc 83.951 (83.527)	lr 0.01087
==========Valid [65/120]	loss 1.410	top-1 acc 67.979 (67.979)	top-5 acc 87.116	Train top-1 63.413	top-5 83.527	Entropy 1.05132	Latency-None: 0.000ms	Flops: 546.53M
Train [66][0/3239]	Time 42.435 (42.435)	Data Time 40.335 (40.335)	Loss 2.4791 (2.4791)	Entropy 1.05130 (1.05130)	Top-1 acc 64.062 (64.062)	Top-5 acc 83.594 (83.594)	lr 0.01087
Train [66][10/3239]	Time 2.590 (4.400)	Data Time 0.002 (3.702)	Loss 2.5361 (2.4757)	Entropy 1.05130 (1.05130)	Top-1 acc 61.719 (63.743)	Top-5 acc 82.422 (84.482)	lr 0.01087
Train [66][20/3239]	Time 0.252 (2.424)	Data Time 0.002 (1.940)	Loss 2.8105 (2.5037)	Entropy 1.05121 (1.05126)	Top-1 acc 57.422 (63.579)	Top-5 acc 75.781 (83.761)	lr 0.01087
Train [66][30/3239]	Time 0.230 (1.794)	Data Time 0.001 (1.315)	Loss 2.5072 (2.5236)	Entropy 1.05117 (1.05123)	Top-1 acc 63.281 (63.344)	Top-5 acc 84.375 (83.266)	lr 0.01087
Train [66][40/3239]	Time 0.236 (1.478)	Data Time 0.002 (0.995)	Loss 2.7006 (2.5158)	Entropy 1.05116 (1.05121)	Top-1 acc 58.984 (63.634)	Top-5 acc 79.297 (83.441)	lr 0.01086
Train [66][50/3239]	Time 0.242 (1.282)	Data Time 0.001 (0.800)	Loss 2.5287 (2.5110)	Entropy 1.05115 (1.05120)	Top-1 acc 62.500 (63.741)	Top-5 acc 82.812 (83.494)	lr 0.01086
Train [66][60/3239]	Time 0.220 (1.148)	Data Time 0.001 (0.669)	Loss 2.5961 (2.5160)	Entropy 1.05112 (1.05119)	Top-1 acc 62.891 (63.800)	Top-5 acc 83.203 (83.389)	lr 0.01086
Train [66][70/3239]	Time 0.225 (1.051)	Data Time 0.001 (0.575)	Loss 2.4254 (2.5104)	Entropy 1.05088 (1.05116)	Top-1 acc 65.234 (63.936)	Top-5 acc 85.938 (83.451)	lr 0.01086
Train [66][80/3239]	Time 0.552 (1.657)	Data Time 0.003 (0.504)	Loss 2.5148 (2.5089)	Entropy 1.05093 (1.05113)	Top-1 acc 65.234 (63.971)	Top-5 acc 83.203 (83.478)	lr 0.01086
Train [66][90/3239]	Time 0.230 (1.529)	Data Time 0.002 (0.449)	Loss 2.4335 (2.5071)	Entropy 1.05091 (1.05111)	Top-1 acc 66.406 (64.062)	Top-5 acc 85.938 (83.512)	lr 0.01086
Train [66][100/3239]	Time 0.231 (1.425)	Data Time 0.001 (0.405)	Loss 2.3493 (2.5041)	Entropy 1.05088 (1.05109)	Top-1 acc 63.281 (64.082)	Top-5 acc 87.109 (83.578)	lr 0.01086
Train [66][110/3239]	Time 0.308 (1.341)	Data Time 0.001 (0.369)	Loss 2.5474 (2.5032)	Entropy 1.05081 (1.05107)	Top-1 acc 63.281 (64.101)	Top-5 acc 82.812 (83.548)	lr 0.01086
Train [66][120/3239]	Time 2.498 (1.268)	Data Time 0.001 (0.338)	Loss 2.7335 (2.5006)	Entropy 1.05081 (1.05105)	Top-1 acc 57.422 (64.098)	Top-5 acc 81.250 (83.636)	lr 0.01086
Train [66][130/3239]	Time 0.229 (1.191)	Data Time 0.001 (0.313)	Loss 2.4281 (2.5000)	Entropy 1.05060 (1.05101)	Top-1 acc 64.453 (64.030)	Top-5 acc 84.766 (83.644)	lr 0.01086
Train [66][140/3239]	Time 0.231 (1.139)	Data Time 0.001 (0.290)	Loss 2.6180 (2.4971)	Entropy 1.05068 (1.05099)	Top-1 acc 60.547 (64.093)	Top-5 acc 81.641 (83.729)	lr 0.01085
Train [66][150/3239]	Time 0.239 (1.095)	Data Time 0.001 (0.271)	Loss 2.4299 (2.5008)	Entropy 1.05058 (1.05096)	Top-1 acc 68.750 (63.987)	Top-5 acc 84.766 (83.671)	lr 0.01085
Train [66][160/3239]	Time 0.231 (1.056)	Data Time 0.001 (0.255)	Loss 2.6213 (2.4991)	Entropy 1.05052 (1.05094)	Top-1 acc 62.891 (64.036)	Top-5 acc 81.641 (83.686)	lr 0.01085
Train [66][170/3239]	Time 0.222 (1.020)	Data Time 0.001 (0.240)	Loss 2.5394 (2.4990)	Entropy 1.05044 (1.05091)	Top-1 acc 64.062 (64.081)	Top-5 acc 83.594 (83.683)	lr 0.01085
Train [66][180/3239]	Time 0.219 (0.990)	Data Time 0.001 (0.227)	Loss 2.5800 (2.4969)	Entropy 1.05041 (1.05088)	Top-1 acc 63.281 (64.160)	Top-5 acc 78.906 (83.719)	lr 0.01085
Train [66][190/3239]	Time 0.220 (0.963)	Data Time 0.001 (0.215)	Loss 2.2780 (2.4951)	Entropy 1.05038 (1.05086)	Top-1 acc 69.531 (64.216)	Top-5 acc 89.844 (83.788)	lr 0.01085
Train [66][200/3239]	Time 0.220 (0.938)	Data Time 0.001 (0.204)	Loss 2.4076 (2.4958)	Entropy 1.05031 (1.05083)	Top-1 acc 67.578 (64.195)	Top-5 acc 84.375 (83.773)	lr 0.01085
Train [66][210/3239]	Time 0.247 (0.916)	Data Time 0.001 (0.195)	Loss 2.5207 (2.4972)	Entropy 1.05034 (1.05081)	Top-1 acc 60.547 (64.125)	Top-5 acc 82.812 (83.757)	lr 0.01085
Train [66][220/3239]	Time 0.256 (0.896)	Data Time 0.001 (0.186)	Loss 2.5476 (2.4954)	Entropy 1.05030 (1.05079)	Top-1 acc 64.453 (64.137)	Top-5 acc 83.984 (83.785)	lr 0.01085
Train [66][230/3239]	Time 2.580 (0.878)	Data Time 0.002 (0.178)	Loss 2.4375 (2.4930)	Entropy 1.05030 (1.05076)	Top-1 acc 66.797 (64.221)	Top-5 acc 83.594 (83.827)	lr 0.01085
Train [66][240/3239]	Time 0.192 (0.852)	Data Time 0.001 (0.171)	Loss 2.4740 (2.4918)	Entropy 1.05020 (1.05074)	Top-1 acc 67.969 (64.216)	Top-5 acc 85.156 (83.861)	lr 0.01084
Train [66][250/3239]	Time 0.226 (0.836)	Data Time 0.001 (0.164)	Loss 2.4825 (2.4913)	Entropy 1.05018 (1.05072)	Top-1 acc 64.062 (64.213)	Top-5 acc 83.203 (83.868)	lr 0.01084
Train [66][260/3239]	Time 0.233 (0.822)	Data Time 0.001 (0.158)	Loss 2.5253 (2.4937)	Entropy 1.05017 (1.05070)	Top-1 acc 59.375 (64.137)	Top-5 acc 82.812 (83.824)	lr 0.01084
Train [66][270/3239]	Time 0.224 (0.810)	Data Time 0.001 (0.152)	Loss 2.4091 (2.4945)	Entropy 1.05007 (1.05068)	Top-1 acc 67.188 (64.143)	Top-5 acc 83.594 (83.817)	lr 0.01084
Train [66][280/3239]	Time 0.220 (0.797)	Data Time 0.001 (0.147)	Loss 2.4238 (2.4922)	Entropy 1.05005 (1.05065)	Top-1 acc 63.281 (64.163)	Top-5 acc 86.719 (83.852)	lr 0.01084
Train [66][290/3239]	Time 0.242 (0.786)	Data Time 0.001 (0.142)	Loss 2.5024 (2.4945)	Entropy 1.05000 (1.05063)	Top-1 acc 60.156 (64.058)	Top-5 acc 85.156 (83.802)	lr 0.01084
Train [66][300/3239]	Time 0.225 (0.775)	Data Time 0.001 (0.137)	Loss 2.4174 (2.4944)	Entropy 1.04995 (1.05061)	Top-1 acc 67.969 (64.057)	Top-5 acc 84.766 (83.816)	lr 0.01084
Train [66][310/3239]	Time 0.322 (0.766)	Data Time 0.001 (0.133)	Loss 2.4889 (2.4934)	Entropy 1.04990 (1.05059)	Top-1 acc 60.547 (64.056)	Top-5 acc 81.250 (83.829)	lr 0.01084
Train [66][320/3239]	Time 0.226 (0.756)	Data Time 0.001 (0.128)	Loss 2.3477 (2.4936)	Entropy 1.04968 (1.05057)	Top-1 acc 71.875 (64.045)	Top-5 acc 87.109 (83.826)	lr 0.01084
Train [66][330/3239]	Time 0.268 (0.747)	Data Time 0.001 (0.125)	Loss 2.4238 (2.4924)	Entropy 1.04966 (1.05054)	Top-1 acc 63.672 (64.064)	Top-5 acc 83.984 (83.849)	lr 0.01084
Train [66][340/3239]	Time 2.459 (0.739)	Data Time 0.002 (0.121)	Loss 2.5149 (2.4935)	Entropy 1.04966 (1.05051)	Top-1 acc 64.062 (64.038)	Top-5 acc 81.641 (83.829)	lr 0.01083
Train [66][350/3239]	Time 0.217 (0.725)	Data Time 0.001 (0.118)	Loss 2.5844 (2.4946)	Entropy 1.04960 (1.05049)	Top-1 acc 60.156 (63.999)	Top-5 acc 80.859 (83.794)	lr 0.01083
Train [66][360/3239]	Time 0.261 (0.718)	Data Time 0.002 (0.114)	Loss 2.6203 (2.4944)	Entropy 1.04956 (1.05046)	Top-1 acc 58.594 (63.989)	Top-5 acc 83.984 (83.817)	lr 0.01083
Train [66][370/3239]	Time 0.232 (0.711)	Data Time 0.001 (0.111)	Loss 2.5248 (2.4948)	Entropy 1.04957 (1.05044)	Top-1 acc 64.844 (63.994)	Top-5 acc 85.547 (83.813)	lr 0.01083
Train [66][380/3239]	Time 0.233 (0.705)	Data Time 0.001 (0.108)	Loss 2.6732 (2.4938)	Entropy 1.04959 (1.05042)	Top-1 acc 59.375 (64.020)	Top-5 acc 81.641 (83.840)	lr 0.01083
Train [66][390/3239]	Time 0.218 (0.699)	Data Time 0.001 (0.106)	Loss 2.4328 (2.4954)	Entropy 1.04954 (1.05039)	Top-1 acc 69.141 (63.989)	Top-5 acc 86.328 (83.817)	lr 0.01083
Train [66][400/3239]	Time 0.319 (0.693)	Data Time 0.001 (0.103)	Loss 2.2928 (2.4951)	Entropy 1.04941 (1.05037)	Top-1 acc 71.484 (63.993)	Top-5 acc 87.500 (83.824)	lr 0.01083
Train [66][410/3239]	Time 0.210 (0.688)	Data Time 0.001 (0.101)	Loss 2.4641 (2.4943)	Entropy 1.04937 (1.05035)	Top-1 acc 65.234 (64.007)	Top-5 acc 85.156 (83.833)	lr 0.01083
Train [66][420/3239]	Time 0.222 (0.682)	Data Time 0.002 (0.098)	Loss 2.3411 (2.4936)	Entropy 1.04926 (1.05032)	Top-1 acc 67.969 (64.046)	Top-5 acc 89.062 (83.847)	lr 0.01083
Train [66][430/3239]	Time 0.224 (0.677)	Data Time 0.001 (0.096)	Loss 2.5322 (2.4939)	Entropy 1.04924 (1.05030)	Top-1 acc 62.500 (64.016)	Top-5 acc 83.984 (83.838)	lr 0.01083
Train [66][440/3239]	Time 0.445 (0.798)	Data Time 0.004 (0.094)	Loss 2.4554 (2.4942)	Entropy 1.04922 (1.05027)	Top-1 acc 65.234 (64.000)	Top-5 acc 83.594 (83.822)	lr 0.01082
Train [66][450/3239]	Time 2.799 (0.792)	Data Time 0.002 (0.092)	Loss 2.3949 (2.4938)	Entropy 1.04922 (1.05025)	Top-1 acc 64.453 (63.978)	Top-5 acc 87.891 (83.842)	lr 0.01082
Train [66][460/3239]	Time 0.228 (0.780)	Data Time 0.002 (0.090)	Loss 2.3951 (2.4933)	Entropy 1.04924 (1.05023)	Top-1 acc 65.625 (63.983)	Top-5 acc 86.328 (83.845)	lr 0.01082
Train [66][470/3239]	Time 0.241 (0.773)	Data Time 0.002 (0.088)	Loss 2.4686 (2.4936)	Entropy 1.04927 (1.05021)	Top-1 acc 67.188 (63.958)	Top-5 acc 82.422 (83.848)	lr 0.01082
Train [66][480/3239]	Time 0.246 (0.767)	Data Time 0.001 (0.086)	Loss 2.4366 (2.4942)	Entropy 1.04928 (1.05019)	Top-1 acc 66.797 (63.932)	Top-5 acc 84.375 (83.831)	lr 0.01082
Train [66][490/3239]	Time 0.229 (0.761)	Data Time 0.001 (0.085)	Loss 2.4405 (2.4940)	Entropy 1.04928 (1.05017)	Top-1 acc 66.797 (63.923)	Top-5 acc 85.547 (83.834)	lr 0.01082
Train [66][500/3239]	Time 0.216 (0.755)	Data Time 0.001 (0.083)	Loss 2.3697 (2.4940)	Entropy 1.04929 (1.05015)	Top-1 acc 66.016 (63.929)	Top-5 acc 86.328 (83.845)	lr 0.01082
Train [66][510/3239]	Time 0.233 (0.749)	Data Time 0.001 (0.081)	Loss 2.5482 (2.4937)	Entropy 1.04926 (1.05014)	Top-1 acc 64.844 (63.945)	Top-5 acc 82.031 (83.842)	lr 0.01082
Train [66][520/3239]	Time 0.229 (0.744)	Data Time 0.001 (0.080)	Loss 2.6338 (2.4932)	Entropy 1.04925 (1.05012)	Top-1 acc 60.547 (63.961)	Top-5 acc 82.031 (83.845)	lr 0.01082
Train [66][530/3239]	Time 0.228 (0.738)	Data Time 0.001 (0.078)	Loss 2.4276 (2.4940)	Entropy 1.04926 (1.05010)	Top-1 acc 64.062 (63.940)	Top-5 acc 85.547 (83.832)	lr 0.01082
Train [66][540/3239]	Time 0.224 (0.734)	Data Time 0.002 (0.077)	Loss 2.4057 (2.4937)	Entropy 1.04924 (1.05009)	Top-1 acc 64.453 (63.948)	Top-5 acc 85.156 (83.844)	lr 0.01081
Train [66][550/3239]	Time 0.214 (0.729)	Data Time 0.001 (0.076)	Loss 2.6509 (2.4944)	Entropy 1.04925 (1.05007)	Top-1 acc 60.547 (63.921)	Top-5 acc 82.422 (83.828)	lr 0.01081
Train [66][560/3239]	Time 2.588 (0.724)	Data Time 0.001 (0.074)	Loss 2.4129 (2.4948)	Entropy 1.04925 (1.05006)	Top-1 acc 64.453 (63.926)	Top-5 acc 85.547 (83.816)	lr 0.01081
Train [66][570/3239]	Time 0.229 (0.716)	Data Time 0.001 (0.073)	Loss 2.6277 (2.4946)	Entropy 1.04906 (1.05004)	Top-1 acc 63.672 (63.930)	Top-5 acc 80.078 (83.814)	lr 0.01081
Train [66][580/3239]	Time 0.346 (0.712)	Data Time 0.001 (0.072)	Loss 2.5257 (2.4956)	Entropy 1.04899 (1.05002)	Top-1 acc 62.891 (63.914)	Top-5 acc 86.328 (83.792)	lr 0.01081
Train [66][590/3239]	Time 0.234 (0.708)	Data Time 0.001 (0.071)	Loss 2.3467 (2.4949)	Entropy 1.04899 (1.05000)	Top-1 acc 68.359 (63.950)	Top-5 acc 88.281 (83.798)	lr 0.01081
Train [66][600/3239]	Time 0.202 (0.704)	Data Time 0.001 (0.069)	Loss 2.5986 (2.4953)	Entropy 1.04897 (1.04999)	Top-1 acc 62.500 (63.950)	Top-5 acc 80.469 (83.792)	lr 0.01081
Train [66][610/3239]	Time 0.235 (0.700)	Data Time 0.001 (0.068)	Loss 2.9040 (2.4974)	Entropy 1.04898 (1.04997)	Top-1 acc 51.953 (63.893)	Top-5 acc 76.953 (83.752)	lr 0.01081
Train [66][620/3239]	Time 0.245 (0.696)	Data Time 0.001 (0.067)	Loss 2.5372 (2.4971)	Entropy 1.04898 (1.04995)	Top-1 acc 63.672 (63.908)	Top-5 acc 84.375 (83.766)	lr 0.01081
Train [66][630/3239]	Time 0.213 (0.693)	Data Time 0.001 (0.066)	Loss 2.5107 (2.4967)	Entropy 1.04890 (1.04994)	Top-1 acc 59.375 (63.903)	Top-5 acc 84.375 (83.781)	lr 0.01081
Train [66][640/3239]	Time 0.229 (0.689)	Data Time 0.001 (0.065)	Loss 2.4821 (2.4968)	Entropy 1.04887 (1.04992)	Top-1 acc 63.672 (63.899)	Top-5 acc 85.547 (83.795)	lr 0.01080
Train [66][650/3239]	Time 0.219 (0.686)	Data Time 0.001 (0.064)	Loss 2.4074 (2.4965)	Entropy 1.04889 (1.04991)	Top-1 acc 63.672 (63.916)	Top-5 acc 85.938 (83.809)	lr 0.01080
Train [66][660/3239]	Time 0.233 (0.683)	Data Time 0.001 (0.063)	Loss 2.4843 (2.4966)	Entropy 1.04881 (1.04989)	Top-1 acc 66.406 (63.911)	Top-5 acc 85.156 (83.808)	lr 0.01080
Train [66][670/3239]	Time 2.572 (0.680)	Data Time 0.001 (0.062)	Loss 2.3465 (2.4962)	Entropy 1.04881 (1.04987)	Top-1 acc 70.703 (63.934)	Top-5 acc 86.328 (83.810)	lr 0.01080
Train [66][680/3239]	Time 0.240 (0.673)	Data Time 0.001 (0.061)	Loss 2.4153 (2.4954)	Entropy 1.04880 (1.04986)	Top-1 acc 64.062 (63.945)	Top-5 acc 85.938 (83.828)	lr 0.01080
Train [66][690/3239]	Time 0.216 (0.670)	Data Time 0.001 (0.061)	Loss 2.3805 (2.4948)	Entropy 1.04879 (1.04984)	Top-1 acc 66.016 (63.945)	Top-5 acc 85.547 (83.844)	lr 0.01080
Train [66][700/3239]	Time 0.222 (0.667)	Data Time 0.001 (0.060)	Loss 2.5076 (2.4952)	Entropy 1.04880 (1.04983)	Top-1 acc 62.891 (63.952)	Top-5 acc 83.984 (83.837)	lr 0.01080
Train [66][710/3239]	Time 0.232 (0.665)	Data Time 0.001 (0.059)	Loss 2.3720 (2.4956)	Entropy 1.04881 (1.04981)	Top-1 acc 66.797 (63.940)	Top-5 acc 87.500 (83.833)	lr 0.01080
Train [66][720/3239]	Time 0.239 (0.662)	Data Time 0.001 (0.058)	Loss 2.5224 (2.4957)	Entropy 1.04876 (1.04980)	Top-1 acc 64.844 (63.947)	Top-5 acc 83.203 (83.829)	lr 0.01080
Train [66][730/3239]	Time 0.226 (0.659)	Data Time 0.001 (0.057)	Loss 2.5420 (2.4958)	Entropy 1.04876 (1.04979)	Top-1 acc 62.891 (63.937)	Top-5 acc 81.250 (83.828)	lr 0.01080
Train [66][740/3239]	Time 0.212 (0.657)	Data Time 0.001 (0.057)	Loss 2.4671 (2.4961)	Entropy 1.04873 (1.04977)	Top-1 acc 62.500 (63.916)	Top-5 acc 83.203 (83.821)	lr 0.01079
Train [66][750/3239]	Time 0.245 (0.654)	Data Time 0.001 (0.056)	Loss 2.4537 (2.4964)	Entropy 1.04867 (1.04976)	Top-1 acc 63.672 (63.915)	Top-5 acc 85.938 (83.808)	lr 0.01079
Train [66][760/3239]	Time 0.322 (0.652)	Data Time 0.001 (0.055)	Loss 2.4893 (2.4955)	Entropy 1.04870 (1.04974)	Top-1 acc 66.797 (63.941)	Top-5 acc 81.250 (83.820)	lr 0.01079
Train [66][770/3239]	Time 0.221 (0.650)	Data Time 0.001 (0.054)	Loss 2.4574 (2.4956)	Entropy 1.04870 (1.04973)	Top-1 acc 66.406 (63.929)	Top-5 acc 82.031 (83.817)	lr 0.01079
Train [66][780/3239]	Time 2.421 (0.647)	Data Time 0.001 (0.054)	Loss 2.4578 (2.4965)	Entropy 1.04870 (1.04972)	Top-1 acc 65.625 (63.909)	Top-5 acc 82.812 (83.788)	lr 0.01079
Train [66][790/3239]	Time 0.224 (0.642)	Data Time 0.001 (0.053)	Loss 2.5349 (2.4974)	Entropy 1.04861 (1.04970)	Top-1 acc 62.891 (63.889)	Top-5 acc 83.594 (83.768)	lr 0.01079
Train [66][800/3239]	Time 0.234 (0.640)	Data Time 0.001 (0.052)	Loss 2.3842 (2.4971)	Entropy 1.04860 (1.04969)	Top-1 acc 65.234 (63.901)	Top-5 acc 88.672 (83.776)	lr 0.01079
Train [66][810/3239]	Time 0.237 (0.707)	Data Time 0.002 (0.052)	Loss 2.5347 (2.4973)	Entropy 1.04856 (1.04967)	Top-1 acc 63.281 (63.898)	Top-5 acc 82.422 (83.771)	lr 0.01079
Train [66][820/3239]	Time 0.239 (0.704)	Data Time 0.002 (0.051)	Loss 2.2749 (2.4975)	Entropy 1.04853 (1.04966)	Top-1 acc 72.266 (63.901)	Top-5 acc 88.281 (83.768)	lr 0.01079
Train [66][830/3239]	Time 0.219 (0.702)	Data Time 0.001 (0.051)	Loss 2.7404 (2.4978)	Entropy 1.04857 (1.04965)	Top-1 acc 59.375 (63.899)	Top-5 acc 78.125 (83.765)	lr 0.01079
Train [66][840/3239]	Time 0.232 (0.699)	Data Time 0.001 (0.050)	Loss 2.6724 (2.4980)	Entropy 1.04855 (1.04963)	Top-1 acc 58.594 (63.899)	Top-5 acc 78.906 (83.766)	lr 0.01078
Train [66][850/3239]	Time 0.327 (0.696)	Data Time 0.001 (0.049)	Loss 2.3563 (2.4979)	Entropy 1.04865 (1.04962)	Top-1 acc 67.578 (63.903)	Top-5 acc 88.281 (83.776)	lr 0.01078
Train [66][860/3239]	Time 0.236 (0.693)	Data Time 0.002 (0.049)	Loss 2.3878 (2.4980)	Entropy 1.04857 (1.04961)	Top-1 acc 67.969 (63.906)	Top-5 acc 83.984 (83.769)	lr 0.01078
Train [66][870/3239]	Time 0.236 (0.691)	Data Time 0.002 (0.048)	Loss 2.5097 (2.4977)	Entropy 1.04854 (1.04960)	Top-1 acc 66.016 (63.920)	Top-5 acc 83.984 (83.776)	lr 0.01078
Train [66][880/3239]	Time 0.248 (0.688)	Data Time 0.001 (0.048)	Loss 2.5338 (2.4979)	Entropy 1.04854 (1.04959)	Top-1 acc 64.453 (63.915)	Top-5 acc 85.156 (83.769)	lr 0.01078
Train [66][890/3239]	Time 2.504 (0.686)	Data Time 0.001 (0.047)	Loss 2.6059 (2.4980)	Entropy 1.04854 (1.04957)	Top-1 acc 62.891 (63.909)	Top-5 acc 82.422 (83.766)	lr 0.01078
Train [66][900/3239]	Time 0.215 (0.681)	Data Time 0.001 (0.047)	Loss 2.3522 (2.4981)	Entropy 1.04850 (1.04956)	Top-1 acc 66.406 (63.911)	Top-5 acc 85.938 (83.765)	lr 0.01078
Train [66][910/3239]	Time 0.230 (0.679)	Data Time 0.001 (0.046)	Loss 2.4624 (2.4985)	Entropy 1.04850 (1.04955)	Top-1 acc 66.406 (63.907)	Top-5 acc 83.984 (83.761)	lr 0.01078
Train [66][920/3239]	Time 0.210 (0.676)	Data Time 0.001 (0.046)	Loss 2.4623 (2.4985)	Entropy 1.04848 (1.04954)	Top-1 acc 64.844 (63.909)	Top-5 acc 88.281 (83.768)	lr 0.01078
Train [66][930/3239]	Time 0.230 (0.674)	Data Time 0.001 (0.045)	Loss 2.5419 (2.4987)	Entropy 1.04844 (1.04953)	Top-1 acc 61.719 (63.898)	Top-5 acc 83.594 (83.764)	lr 0.01078
Train [66][940/3239]	Time 0.379 (0.672)	Data Time 0.001 (0.045)	Loss 2.4436 (2.4985)	Entropy 1.04842 (1.04952)	Top-1 acc 62.500 (63.903)	Top-5 acc 84.766 (83.769)	lr 0.01077
Train [66][950/3239]	Time 0.215 (0.670)	Data Time 0.001 (0.044)	Loss 2.5996 (2.4987)	Entropy 1.04843 (1.04950)	Top-1 acc 62.891 (63.901)	Top-5 acc 81.250 (83.760)	lr 0.01077
Train [66][960/3239]	Time 0.208 (0.668)	Data Time 0.001 (0.044)	Loss 2.4153 (2.4989)	Entropy 1.04842 (1.04949)	Top-1 acc 64.844 (63.891)	Top-5 acc 83.984 (83.750)	lr 0.01077
Train [66][970/3239]	Time 0.230 (0.666)	Data Time 0.001 (0.044)	Loss 2.3895 (2.4985)	Entropy 1.04846 (1.04948)	Top-1 acc 68.750 (63.900)	Top-5 acc 87.891 (83.762)	lr 0.01077
Train [66][980/3239]	Time 0.236 (0.664)	Data Time 0.001 (0.043)	Loss 2.5141 (2.4983)	Entropy 1.04840 (1.04947)	Top-1 acc 65.234 (63.899)	Top-5 acc 85.547 (83.769)	lr 0.01077
Train [66][990/3239]	Time 0.227 (0.662)	Data Time 0.001 (0.043)	Loss 2.3541 (2.4981)	Entropy 1.04838 (1.04946)	Top-1 acc 66.797 (63.905)	Top-5 acc 85.938 (83.774)	lr 0.01077
Train [66][1000/3239]	Time 2.424 (0.660)	Data Time 0.001 (0.042)	Loss 2.6481 (2.4980)	Entropy 1.04838 (1.04945)	Top-1 acc 56.641 (63.903)	Top-5 acc 79.297 (83.775)	lr 0.01077
Train [66][1010/3239]	Time 0.208 (0.656)	Data Time 0.001 (0.042)	Loss 2.5140 (2.4979)	Entropy 1.04838 (1.04944)	Top-1 acc 59.766 (63.900)	Top-5 acc 85.547 (83.773)	lr 0.01077
Train [66][1020/3239]	Time 0.220 (0.654)	Data Time 0.001 (0.042)	Loss 2.5860 (2.4980)	Entropy 1.04836 (1.04943)	Top-1 acc 61.719 (63.900)	Top-5 acc 82.422 (83.771)	lr 0.01077
Train [66][1030/3239]	Time 0.254 (0.652)	Data Time 0.001 (0.041)	Loss 2.4140 (2.4982)	Entropy 1.04828 (1.04942)	Top-1 acc 67.188 (63.896)	Top-5 acc 85.547 (83.770)	lr 0.01077
Train [66][1040/3239]	Time 0.235 (0.650)	Data Time 0.001 (0.041)	Loss 2.6521 (2.4985)	Entropy 1.04826 (1.04941)	Top-1 acc 56.250 (63.893)	Top-5 acc 82.031 (83.760)	lr 0.01076
Train [66][1050/3239]	Time 0.230 (0.648)	Data Time 0.001 (0.040)	Loss 2.5660 (2.4986)	Entropy 1.04823 (1.04940)	Top-1 acc 61.328 (63.892)	Top-5 acc 83.203 (83.763)	lr 0.01076
Train [66][1060/3239]	Time 0.229 (0.647)	Data Time 0.001 (0.040)	Loss 2.6029 (2.4985)	Entropy 1.04820 (1.04939)	Top-1 acc 61.719 (63.894)	Top-5 acc 81.250 (83.765)	lr 0.01076
Train [66][1070/3239]	Time 0.224 (0.645)	Data Time 0.001 (0.040)	Loss 2.4808 (2.4984)	Entropy 1.04817 (1.04937)	Top-1 acc 66.797 (63.905)	Top-5 acc 86.328 (83.771)	lr 0.01076
Train [66][1080/3239]	Time 0.224 (0.644)	Data Time 0.001 (0.039)	Loss 2.6478 (2.4980)	Entropy 1.04818 (1.04936)	Top-1 acc 58.594 (63.909)	Top-5 acc 83.594 (83.778)	lr 0.01076
Train [66][1090/3239]	Time 0.201 (0.642)	Data Time 0.001 (0.039)	Loss 2.5034 (2.4983)	Entropy 1.04819 (1.04935)	Top-1 acc 67.188 (63.903)	Top-5 acc 79.688 (83.776)	lr 0.01076
Train [66][1100/3239]	Time 0.286 (0.640)	Data Time 0.001 (0.039)	Loss 2.4553 (2.4980)	Entropy 1.04782 (1.04934)	Top-1 acc 63.672 (63.909)	Top-5 acc 83.984 (83.775)	lr 0.01076
Train [66][1110/3239]	Time 2.471 (0.639)	Data Time 0.001 (0.038)	Loss 2.5513 (2.4982)	Entropy 1.04782 (1.04933)	Top-1 acc 60.547 (63.907)	Top-5 acc 83.594 (83.768)	lr 0.01076
Train [66][1120/3239]	Time 0.321 (0.635)	Data Time 0.001 (0.038)	Loss 2.5896 (2.4981)	Entropy 1.04780 (1.04931)	Top-1 acc 60.547 (63.913)	Top-5 acc 82.422 (83.771)	lr 0.01076
Train [66][1130/3239]	Time 0.227 (0.634)	Data Time 0.001 (0.038)	Loss 2.3598 (2.4977)	Entropy 1.04776 (1.04930)	Top-1 acc 66.016 (63.926)	Top-5 acc 88.672 (83.781)	lr 0.01076
Train [66][1140/3239]	Time 0.221 (0.632)	Data Time 0.001 (0.037)	Loss 2.5289 (2.4981)	Entropy 1.04774 (1.04929)	Top-1 acc 64.453 (63.924)	Top-5 acc 81.641 (83.773)	lr 0.01075
Train [66][1150/3239]	Time 0.208 (0.631)	Data Time 0.001 (0.037)	Loss 2.6651 (2.4981)	Entropy 1.04770 (1.04927)	Top-1 acc 58.594 (63.919)	Top-5 acc 82.422 (83.777)	lr 0.01075
Train [66][1160/3239]	Time 0.214 (0.629)	Data Time 0.001 (0.037)	Loss 2.3772 (2.4976)	Entropy 1.04768 (1.04926)	Top-1 acc 68.359 (63.941)	Top-5 acc 84.766 (83.779)	lr 0.01075
Train [66][1170/3239]	Time 0.309 (0.672)	Data Time 0.003 (0.036)	Loss 2.3670 (2.4974)	Entropy 1.04778 (1.04925)	Top-1 acc 68.359 (63.946)	Top-5 acc 89.453 (83.788)	lr 0.01075
Train [66][1180/3239]	Time 0.246 (0.671)	Data Time 0.002 (0.036)	Loss 2.5991 (2.4981)	Entropy 1.04781 (1.04923)	Top-1 acc 60.547 (63.928)	Top-5 acc 82.031 (83.781)	lr 0.01075
Train [66][1190/3239]	Time 0.231 (0.669)	Data Time 0.002 (0.036)	Loss 2.4061 (2.4981)	Entropy 1.04778 (1.04922)	Top-1 acc 67.188 (63.932)	Top-5 acc 83.203 (83.781)	lr 0.01075
Train [66][1200/3239]	Time 0.234 (0.667)	Data Time 0.002 (0.036)	Loss 2.5029 (2.4985)	Entropy 1.04777 (1.04921)	Top-1 acc 62.891 (63.920)	Top-5 acc 83.203 (83.769)	lr 0.01075
Train [66][1210/3239]	Time 0.323 (0.666)	Data Time 0.001 (0.035)	Loss 2.3822 (2.4979)	Entropy 1.04775 (1.04920)	Top-1 acc 67.578 (63.935)	Top-5 acc 88.672 (83.785)	lr 0.01075
Train [66][1220/3239]	Time 2.431 (0.664)	Data Time 0.001 (0.035)	Loss 2.5300 (2.4977)	Entropy 1.04775 (1.04919)	Top-1 acc 63.672 (63.938)	Top-5 acc 84.375 (83.790)	lr 0.01075
Train [66][1230/3239]	Time 0.258 (0.661)	Data Time 0.002 (0.035)	Loss 2.3630 (2.4978)	Entropy 1.04771 (1.04917)	Top-1 acc 66.016 (63.933)	Top-5 acc 84.375 (83.786)	lr 0.01075
Train [66][1240/3239]	Time 0.225 (0.659)	Data Time 0.001 (0.034)	Loss 2.4535 (2.4974)	Entropy 1.04763 (1.04916)	Top-1 acc 66.406 (63.938)	Top-5 acc 85.938 (83.789)	lr 0.01074
Train [66][1250/3239]	Time 0.239 (0.658)	Data Time 0.001 (0.034)	Loss 2.4851 (2.4973)	Entropy 1.04791 (1.04915)	Top-1 acc 63.672 (63.928)	Top-5 acc 85.156 (83.797)	lr 0.01074
Train [66][1260/3239]	Time 0.232 (0.656)	Data Time 0.001 (0.034)	Loss 2.4821 (2.4974)	Entropy 1.04789 (1.04914)	Top-1 acc 63.281 (63.919)	Top-5 acc 85.156 (83.801)	lr 0.01074
Train [66][1270/3239]	Time 0.225 (0.655)	Data Time 0.001 (0.034)	Loss 2.3043 (2.4971)	Entropy 1.04783 (1.04913)	Top-1 acc 71.094 (63.925)	Top-5 acc 89.062 (83.811)	lr 0.01074
Train [66][1280/3239]	Time 0.224 (0.653)	Data Time 0.001 (0.033)	Loss 2.3731 (2.4972)	Entropy 1.04781 (1.04912)	Top-1 acc 66.797 (63.921)	Top-5 acc 86.328 (83.815)	lr 0.01074
Train [66][1290/3239]	Time 0.240 (0.652)	Data Time 0.001 (0.033)	Loss 2.4264 (2.4969)	Entropy 1.04777 (1.04911)	Top-1 acc 66.016 (63.935)	Top-5 acc 85.547 (83.822)	lr 0.01074
Train [66][1300/3239]	Time 0.358 (0.650)	Data Time 0.001 (0.033)	Loss 2.3673 (2.4967)	Entropy 1.04769 (1.04910)	Top-1 acc 64.062 (63.941)	Top-5 acc 87.500 (83.824)	lr 0.01074
Train [66][1310/3239]	Time 0.208 (0.649)	Data Time 0.001 (0.033)	Loss 2.4864 (2.4968)	Entropy 1.04765 (1.04909)	Top-1 acc 63.281 (63.933)	Top-5 acc 85.547 (83.824)	lr 0.01074
Train [66][1320/3239]	Time 0.250 (0.647)	Data Time 0.001 (0.032)	Loss 2.3871 (2.4968)	Entropy 1.04758 (1.04908)	Top-1 acc 69.141 (63.928)	Top-5 acc 85.547 (83.820)	lr 0.01074
Train [66][1330/3239]	Time 2.386 (0.646)	Data Time 0.001 (0.032)	Loss 2.6254 (2.4966)	Entropy 1.04758 (1.04907)	Top-1 acc 62.109 (63.934)	Top-5 acc 81.641 (83.825)	lr 0.01074
Train [66][1340/3239]	Time 0.233 (0.643)	Data Time 0.001 (0.032)	Loss 2.5951 (2.4964)	Entropy 1.04760 (1.04906)	Top-1 acc 63.281 (63.941)	Top-5 acc 82.031 (83.832)	lr 0.01073
Train [66][1350/3239]	Time 0.226 (0.642)	Data Time 0.001 (0.032)	Loss 2.4330 (2.4964)	Entropy 1.04760 (1.04905)	Top-1 acc 66.016 (63.940)	Top-5 acc 83.203 (83.831)	lr 0.01073
Train [66][1360/3239]	Time 0.231 (0.640)	Data Time 0.001 (0.032)	Loss 2.5473 (2.4962)	Entropy 1.04757 (1.04903)	Top-1 acc 63.281 (63.947)	Top-5 acc 80.469 (83.831)	lr 0.01073
Train [66][1370/3239]	Time 0.204 (0.639)	Data Time 0.001 (0.031)	Loss 2.6801 (2.4964)	Entropy 1.04756 (1.04902)	Top-1 acc 57.422 (63.944)	Top-5 acc 79.297 (83.828)	lr 0.01073
Train [66][1380/3239]	Time 0.223 (0.638)	Data Time 0.001 (0.031)	Loss 2.6095 (2.4965)	Entropy 1.04746 (1.04901)	Top-1 acc 60.547 (63.938)	Top-5 acc 81.250 (83.820)	lr 0.01073
Train [66][1390/3239]	Time 0.253 (0.637)	Data Time 0.001 (0.031)	Loss 2.4974 (2.4969)	Entropy 1.04741 (1.04900)	Top-1 acc 60.938 (63.934)	Top-5 acc 84.766 (83.809)	lr 0.01073
Train [66][1400/3239]	Time 0.221 (0.635)	Data Time 0.001 (0.031)	Loss 2.5397 (2.4972)	Entropy 1.04746 (1.04899)	Top-1 acc 62.109 (63.928)	Top-5 acc 81.250 (83.804)	lr 0.01073
Train [66][1410/3239]	Time 0.225 (0.634)	Data Time 0.001 (0.031)	Loss 2.4145 (2.4973)	Entropy 1.04738 (1.04898)	Top-1 acc 66.406 (63.922)	Top-5 acc 83.984 (83.806)	lr 0.01073
Train [66][1420/3239]	Time 0.271 (0.633)	Data Time 0.001 (0.030)	Loss 2.4504 (2.4976)	Entropy 1.04730 (1.04897)	Top-1 acc 62.891 (63.912)	Top-5 acc 84.766 (83.801)	lr 0.01073
Train [66][1430/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.030)	Loss 2.5274 (2.4976)	Entropy 1.04723 (1.04896)	Top-1 acc 61.719 (63.914)	Top-5 acc 84.766 (83.801)	lr 0.01073
Train [66][1440/3239]	Time 2.531 (0.631)	Data Time 0.001 (0.030)	Loss 2.8792 (2.4981)	Entropy 1.04723 (1.04894)	Top-1 acc 53.906 (63.906)	Top-5 acc 79.688 (83.794)	lr 0.01072
Train [66][1450/3239]	Time 0.240 (0.628)	Data Time 0.001 (0.030)	Loss 2.4789 (2.4979)	Entropy 1.04723 (1.04893)	Top-1 acc 62.500 (63.910)	Top-5 acc 81.641 (83.796)	lr 0.01072
Train [66][1460/3239]	Time 0.254 (0.627)	Data Time 0.001 (0.030)	Loss 2.5444 (2.4980)	Entropy 1.04720 (1.04892)	Top-1 acc 61.328 (63.908)	Top-5 acc 81.250 (83.790)	lr 0.01072
Train [66][1470/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.029)	Loss 2.6227 (2.4981)	Entropy 1.04718 (1.04891)	Top-1 acc 57.812 (63.901)	Top-5 acc 81.250 (83.793)	lr 0.01072
Train [66][1480/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.029)	Loss 2.2121 (2.4979)	Entropy 1.04713 (1.04890)	Top-1 acc 69.531 (63.910)	Top-5 acc 89.062 (83.796)	lr 0.01072
Train [66][1490/3239]	Time 0.219 (0.624)	Data Time 0.001 (0.029)	Loss 2.5621 (2.4980)	Entropy 1.04712 (1.04888)	Top-1 acc 60.547 (63.902)	Top-5 acc 83.203 (83.797)	lr 0.01072
Train [66][1500/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.029)	Loss 2.4298 (2.4979)	Entropy 1.04714 (1.04887)	Top-1 acc 64.453 (63.905)	Top-5 acc 84.375 (83.797)	lr 0.01072
Train [66][1510/3239]	Time 0.223 (0.622)	Data Time 0.001 (0.029)	Loss 2.5801 (2.4978)	Entropy 1.04713 (1.04886)	Top-1 acc 60.547 (63.901)	Top-5 acc 83.984 (83.804)	lr 0.01072
Train [66][1520/3239]	Time 0.203 (0.621)	Data Time 0.001 (0.028)	Loss 2.5753 (2.4978)	Entropy 1.04714 (1.04885)	Top-1 acc 64.062 (63.900)	Top-5 acc 86.328 (83.803)	lr 0.01072
Train [66][1530/3239]	Time 0.470 (0.655)	Data Time 0.003 (0.028)	Loss 2.2987 (2.4978)	Entropy 1.04713 (1.04884)	Top-1 acc 69.141 (63.895)	Top-5 acc 89.062 (83.804)	lr 0.01072
Train [66][1540/3239]	Time 0.231 (0.654)	Data Time 0.002 (0.028)	Loss 2.5245 (2.4980)	Entropy 1.04712 (1.04883)	Top-1 acc 65.625 (63.888)	Top-5 acc 83.984 (83.802)	lr 0.01071
Train [66][1550/3239]	Time 2.715 (0.653)	Data Time 0.001 (0.028)	Loss 2.6165 (2.4981)	Entropy 1.04712 (1.04882)	Top-1 acc 59.766 (63.887)	Top-5 acc 80.469 (83.800)	lr 0.01071
Train [66][1560/3239]	Time 0.226 (0.651)	Data Time 0.001 (0.028)	Loss 2.6397 (2.4982)	Entropy 1.04706 (1.04881)	Top-1 acc 59.766 (63.883)	Top-5 acc 81.641 (83.803)	lr 0.01071
Train [66][1570/3239]	Time 0.264 (0.650)	Data Time 0.002 (0.028)	Loss 2.4483 (2.4982)	Entropy 1.04702 (1.04879)	Top-1 acc 67.969 (63.879)	Top-5 acc 84.766 (83.804)	lr 0.01071
Train [66][1580/3239]	Time 0.227 (0.648)	Data Time 0.001 (0.027)	Loss 2.5560 (2.4982)	Entropy 1.04699 (1.04878)	Top-1 acc 60.938 (63.879)	Top-5 acc 81.641 (83.801)	lr 0.01071
Train [66][1590/3239]	Time 0.244 (0.647)	Data Time 0.001 (0.027)	Loss 2.4260 (2.4981)	Entropy 1.04695 (1.04877)	Top-1 acc 67.578 (63.886)	Top-5 acc 83.594 (83.801)	lr 0.01071
Train [66][1600/3239]	Time 0.218 (0.646)	Data Time 0.001 (0.027)	Loss 2.5432 (2.4980)	Entropy 1.04694 (1.04876)	Top-1 acc 60.156 (63.893)	Top-5 acc 83.984 (83.805)	lr 0.01071
Train [66][1610/3239]	Time 0.216 (0.645)	Data Time 0.001 (0.027)	Loss 2.5660 (2.4977)	Entropy 1.04691 (1.04875)	Top-1 acc 66.406 (63.900)	Top-5 acc 82.422 (83.809)	lr 0.01071
Train [66][1620/3239]	Time 0.331 (0.644)	Data Time 0.001 (0.027)	Loss 2.3894 (2.4975)	Entropy 1.04684 (1.04874)	Top-1 acc 68.359 (63.904)	Top-5 acc 85.938 (83.813)	lr 0.01071
Train [66][1630/3239]	Time 0.253 (0.643)	Data Time 0.001 (0.027)	Loss 2.5118 (2.4972)	Entropy 1.04681 (1.04873)	Top-1 acc 62.109 (63.912)	Top-5 acc 84.766 (83.822)	lr 0.01071
Train [66][1640/3239]	Time 0.260 (0.642)	Data Time 0.001 (0.026)	Loss 2.4913 (2.4971)	Entropy 1.04675 (1.04871)	Top-1 acc 66.016 (63.914)	Top-5 acc 84.766 (83.824)	lr 0.01070
Train [66][1650/3239]	Time 0.275 (0.641)	Data Time 0.001 (0.026)	Loss 2.5647 (2.4974)	Entropy 1.04669 (1.04870)	Top-1 acc 60.547 (63.906)	Top-5 acc 82.031 (83.819)	lr 0.01070
Train [66][1660/3239]	Time 2.576 (0.640)	Data Time 0.001 (0.026)	Loss 2.6372 (2.4976)	Entropy 1.04669 (1.04869)	Top-1 acc 63.281 (63.902)	Top-5 acc 79.688 (83.813)	lr 0.01070
Train [66][1670/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.026)	Loss 2.5740 (2.4979)	Entropy 1.04669 (1.04868)	Top-1 acc 63.672 (63.897)	Top-5 acc 83.594 (83.806)	lr 0.01070
Train [66][1680/3239]	Time 0.231 (0.637)	Data Time 0.001 (0.026)	Loss 2.3696 (2.4980)	Entropy 1.04675 (1.04867)	Top-1 acc 66.016 (63.896)	Top-5 acc 89.062 (83.808)	lr 0.01070
Train [66][1690/3239]	Time 0.249 (0.636)	Data Time 0.001 (0.026)	Loss 2.5491 (2.4981)	Entropy 1.04669 (1.04865)	Top-1 acc 65.234 (63.892)	Top-5 acc 82.812 (83.806)	lr 0.01070
Train [66][1700/3239]	Time 0.233 (0.635)	Data Time 0.001 (0.026)	Loss 2.4019 (2.4981)	Entropy 1.04665 (1.04864)	Top-1 acc 64.844 (63.896)	Top-5 acc 86.719 (83.808)	lr 0.01070
Train [66][1710/3239]	Time 0.225 (0.634)	Data Time 0.001 (0.025)	Loss 2.3052 (2.4980)	Entropy 1.04665 (1.04863)	Top-1 acc 67.188 (63.899)	Top-5 acc 87.500 (83.804)	lr 0.01070
Train [66][1720/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.025)	Loss 2.5400 (2.4978)	Entropy 1.04664 (1.04862)	Top-1 acc 63.281 (63.904)	Top-5 acc 82.031 (83.807)	lr 0.01070
Train [66][1730/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.025)	Loss 2.6345 (2.4980)	Entropy 1.04657 (1.04861)	Top-1 acc 60.547 (63.896)	Top-5 acc 82.031 (83.803)	lr 0.01070
Train [66][1740/3239]	Time 0.214 (0.631)	Data Time 0.001 (0.025)	Loss 2.5705 (2.4977)	Entropy 1.04659 (1.04860)	Top-1 acc 64.453 (63.906)	Top-5 acc 81.250 (83.805)	lr 0.01069
Train [66][1750/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.025)	Loss 2.3946 (2.4977)	Entropy 1.04654 (1.04859)	Top-1 acc 67.578 (63.909)	Top-5 acc 83.594 (83.801)	lr 0.01069
Train [66][1760/3239]	Time 0.257 (0.629)	Data Time 0.001 (0.025)	Loss 2.3279 (2.4976)	Entropy 1.04648 (1.04857)	Top-1 acc 67.969 (63.908)	Top-5 acc 88.672 (83.805)	lr 0.01069
Train [66][1770/3239]	Time 2.478 (0.628)	Data Time 0.001 (0.025)	Loss 2.5984 (2.4976)	Entropy 1.04648 (1.04856)	Top-1 acc 57.812 (63.901)	Top-5 acc 85.156 (83.808)	lr 0.01069
Train [66][1780/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.025)	Loss 2.3179 (2.4976)	Entropy 1.04642 (1.04855)	Top-1 acc 68.750 (63.904)	Top-5 acc 87.500 (83.810)	lr 0.01069
Train [66][1790/3239]	Time 0.244 (0.625)	Data Time 0.001 (0.024)	Loss 2.4714 (2.4973)	Entropy 1.04641 (1.04854)	Top-1 acc 63.281 (63.915)	Top-5 acc 84.766 (83.817)	lr 0.01069
Train [66][1800/3239]	Time 0.345 (0.624)	Data Time 0.001 (0.024)	Loss 2.6155 (2.4971)	Entropy 1.04636 (1.04853)	Top-1 acc 62.500 (63.922)	Top-5 acc 81.250 (83.823)	lr 0.01069
Train [66][1810/3239]	Time 0.214 (0.623)	Data Time 0.001 (0.024)	Loss 2.4903 (2.4971)	Entropy 1.04630 (1.04851)	Top-1 acc 66.797 (63.926)	Top-5 acc 83.203 (83.820)	lr 0.01069
Train [66][1820/3239]	Time 0.218 (0.622)	Data Time 0.001 (0.024)	Loss 2.3564 (2.4971)	Entropy 1.04631 (1.04850)	Top-1 acc 69.141 (63.929)	Top-5 acc 86.328 (83.818)	lr 0.01069
Train [66][1830/3239]	Time 0.222 (0.621)	Data Time 0.002 (0.024)	Loss 2.6806 (2.4974)	Entropy 1.04627 (1.04849)	Top-1 acc 60.547 (63.917)	Top-5 acc 81.250 (83.819)	lr 0.01069
Train [66][1840/3239]	Time 0.274 (0.621)	Data Time 0.001 (0.024)	Loss 2.5758 (2.4973)	Entropy 1.04625 (1.04848)	Top-1 acc 60.547 (63.915)	Top-5 acc 81.641 (83.818)	lr 0.01068
Train [66][1850/3239]	Time 0.223 (0.620)	Data Time 0.001 (0.024)	Loss 2.5369 (2.4973)	Entropy 1.04615 (1.04847)	Top-1 acc 62.891 (63.918)	Top-5 acc 82.031 (83.815)	lr 0.01068
Train [66][1860/3239]	Time 0.278 (0.619)	Data Time 0.001 (0.024)	Loss 2.4577 (2.4975)	Entropy 1.04608 (1.04845)	Top-1 acc 62.891 (63.908)	Top-5 acc 85.156 (83.814)	lr 0.01068
Train [66][1870/3239]	Time 0.253 (0.618)	Data Time 0.001 (0.023)	Loss 2.5125 (2.4976)	Entropy 1.04604 (1.04844)	Top-1 acc 63.672 (63.909)	Top-5 acc 83.984 (83.813)	lr 0.01068
Train [66][1880/3239]	Time 2.530 (0.617)	Data Time 0.001 (0.023)	Loss 2.5121 (2.4978)	Entropy 1.04604 (1.04843)	Top-1 acc 64.844 (63.904)	Top-5 acc 83.984 (83.806)	lr 0.01068
Train [66][1890/3239]	Time 0.346 (0.615)	Data Time 0.001 (0.023)	Loss 2.5397 (2.4980)	Entropy 1.04596 (1.04841)	Top-1 acc 61.719 (63.902)	Top-5 acc 84.766 (83.804)	lr 0.01068
Train [66][1900/3239]	Time 0.334 (0.641)	Data Time 0.003 (0.023)	Loss 2.6254 (2.4981)	Entropy 1.04595 (1.04840)	Top-1 acc 64.844 (63.905)	Top-5 acc 80.078 (83.802)	lr 0.01068
Train [66][1910/3239]	Time 0.227 (0.640)	Data Time 0.002 (0.023)	Loss 2.4452 (2.4981)	Entropy 1.04601 (1.04839)	Top-1 acc 62.500 (63.909)	Top-5 acc 83.984 (83.802)	lr 0.01068
Train [66][1920/3239]	Time 0.241 (0.640)	Data Time 0.002 (0.023)	Loss 2.3756 (2.4982)	Entropy 1.04595 (1.04838)	Top-1 acc 69.531 (63.908)	Top-5 acc 85.156 (83.801)	lr 0.01068
Train [66][1930/3239]	Time 0.342 (0.639)	Data Time 0.001 (0.023)	Loss 2.7040 (2.4983)	Entropy 1.04596 (1.04836)	Top-1 acc 57.031 (63.898)	Top-5 acc 79.688 (83.796)	lr 0.01068
Train [66][1940/3239]	Time 0.217 (0.638)	Data Time 0.001 (0.023)	Loss 2.3463 (2.4980)	Entropy 1.04591 (1.04835)	Top-1 acc 66.016 (63.904)	Top-5 acc 86.719 (83.803)	lr 0.01067
Train [66][1950/3239]	Time 0.258 (0.637)	Data Time 0.001 (0.023)	Loss 2.4340 (2.4980)	Entropy 1.04588 (1.04834)	Top-1 acc 65.234 (63.902)	Top-5 acc 84.766 (83.807)	lr 0.01067
Train [66][1960/3239]	Time 0.213 (0.636)	Data Time 0.001 (0.022)	Loss 2.4675 (2.4981)	Entropy 1.04582 (1.04833)	Top-1 acc 64.453 (63.900)	Top-5 acc 83.984 (83.803)	lr 0.01067
Train [66][1970/3239]	Time 0.247 (0.635)	Data Time 0.001 (0.022)	Loss 2.4328 (2.4982)	Entropy 1.04577 (1.04831)	Top-1 acc 63.281 (63.898)	Top-5 acc 84.766 (83.801)	lr 0.01067
Train [66][1980/3239]	Time 0.314 (0.634)	Data Time 0.001 (0.022)	Loss 2.4522 (2.4985)	Entropy 1.04573 (1.04830)	Top-1 acc 64.844 (63.897)	Top-5 acc 83.984 (83.793)	lr 0.01067
Train [66][1990/3239]	Time 2.457 (0.634)	Data Time 0.001 (0.022)	Loss 2.3446 (2.4985)	Entropy 1.04573 (1.04829)	Top-1 acc 67.188 (63.900)	Top-5 acc 89.062 (83.797)	lr 0.01067
Train [66][2000/3239]	Time 0.309 (0.632)	Data Time 0.002 (0.022)	Loss 2.7055 (2.4984)	Entropy 1.04572 (1.04827)	Top-1 acc 56.641 (63.901)	Top-5 acc 77.734 (83.798)	lr 0.01067
Train [66][2010/3239]	Time 0.223 (0.631)	Data Time 0.001 (0.022)	Loss 2.6015 (2.4987)	Entropy 1.04571 (1.04826)	Top-1 acc 65.234 (63.892)	Top-5 acc 83.984 (83.791)	lr 0.01067
Train [66][2020/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.022)	Loss 2.4020 (2.4987)	Entropy 1.04571 (1.04825)	Top-1 acc 67.188 (63.896)	Top-5 acc 85.938 (83.793)	lr 0.01067
Train [66][2030/3239]	Time 0.240 (0.629)	Data Time 0.001 (0.022)	Loss 2.5940 (2.4993)	Entropy 1.04566 (1.04824)	Top-1 acc 62.109 (63.884)	Top-5 acc 79.688 (83.785)	lr 0.01067
Train [66][2040/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.022)	Loss 2.6621 (2.4993)	Entropy 1.04568 (1.04822)	Top-1 acc 62.891 (63.884)	Top-5 acc 80.469 (83.786)	lr 0.01066
Train [66][2050/3239]	Time 0.225 (0.628)	Data Time 0.001 (0.021)	Loss 2.3321 (2.4993)	Entropy 1.04566 (1.04821)	Top-1 acc 66.406 (63.883)	Top-5 acc 87.500 (83.786)	lr 0.01066
Train [66][2060/3239]	Time 0.266 (0.627)	Data Time 0.001 (0.021)	Loss 2.5128 (2.4995)	Entropy 1.04562 (1.04820)	Top-1 acc 61.719 (63.875)	Top-5 acc 84.766 (83.784)	lr 0.01066
Train [66][2070/3239]	Time 0.340 (0.626)	Data Time 0.001 (0.021)	Loss 2.3847 (2.4993)	Entropy 1.04557 (1.04819)	Top-1 acc 66.016 (63.879)	Top-5 acc 86.328 (83.783)	lr 0.01066
Train [66][2080/3239]	Time 0.282 (0.626)	Data Time 0.001 (0.021)	Loss 2.5035 (2.4993)	Entropy 1.04554 (1.04817)	Top-1 acc 60.156 (63.879)	Top-5 acc 84.766 (83.784)	lr 0.01066
Train [66][2090/3239]	Time 0.261 (0.625)	Data Time 0.001 (0.021)	Loss 2.4370 (2.4994)	Entropy 1.04558 (1.04816)	Top-1 acc 63.281 (63.877)	Top-5 acc 84.766 (83.784)	lr 0.01066
Train [66][2100/3239]	Time 2.489 (0.624)	Data Time 0.001 (0.021)	Loss 2.4496 (2.4995)	Entropy 1.04558 (1.04815)	Top-1 acc 64.453 (63.871)	Top-5 acc 85.547 (83.781)	lr 0.01066
Train [66][2110/3239]	Time 0.242 (0.622)	Data Time 0.001 (0.021)	Loss 2.6366 (2.5000)	Entropy 1.04549 (1.04814)	Top-1 acc 59.375 (63.858)	Top-5 acc 82.422 (83.773)	lr 0.01066
Train [66][2120/3239]	Time 0.236 (0.622)	Data Time 0.001 (0.021)	Loss 2.3655 (2.5000)	Entropy 1.04549 (1.04812)	Top-1 acc 67.578 (63.859)	Top-5 acc 87.109 (83.775)	lr 0.01066
Train [66][2130/3239]	Time 0.285 (0.621)	Data Time 0.001 (0.021)	Loss 2.3816 (2.5001)	Entropy 1.04545 (1.04811)	Top-1 acc 64.453 (63.862)	Top-5 acc 87.109 (83.773)	lr 0.01066
Train [66][2140/3239]	Time 0.221 (0.620)	Data Time 0.001 (0.021)	Loss 2.5331 (2.5001)	Entropy 1.04537 (1.04810)	Top-1 acc 64.453 (63.865)	Top-5 acc 83.594 (83.769)	lr 0.01065
Train [66][2150/3239]	Time 0.240 (0.619)	Data Time 0.001 (0.021)	Loss 2.4688 (2.5004)	Entropy 1.04536 (1.04809)	Top-1 acc 61.328 (63.855)	Top-5 acc 85.547 (83.764)	lr 0.01065
Train [66][2160/3239]	Time 0.356 (0.619)	Data Time 0.001 (0.020)	Loss 2.6406 (2.5005)	Entropy 1.04534 (1.04807)	Top-1 acc 59.766 (63.848)	Top-5 acc 80.078 (83.764)	lr 0.01065
Train [66][2170/3239]	Time 0.251 (0.618)	Data Time 0.001 (0.020)	Loss 2.5650 (2.5007)	Entropy 1.04538 (1.04806)	Top-1 acc 63.672 (63.848)	Top-5 acc 82.422 (83.759)	lr 0.01065
Train [66][2180/3239]	Time 0.210 (0.617)	Data Time 0.002 (0.020)	Loss 2.5911 (2.5007)	Entropy 1.04535 (1.04805)	Top-1 acc 60.938 (63.847)	Top-5 acc 82.812 (83.759)	lr 0.01065
Train [66][2190/3239]	Time 0.244 (0.617)	Data Time 0.001 (0.020)	Loss 2.6475 (2.5009)	Entropy 1.04518 (1.04804)	Top-1 acc 61.328 (63.837)	Top-5 acc 80.469 (83.753)	lr 0.01065
Train [66][2200/3239]	Time 0.259 (0.616)	Data Time 0.001 (0.020)	Loss 2.6687 (2.5009)	Entropy 1.04514 (1.04802)	Top-1 acc 61.328 (63.838)	Top-5 acc 78.125 (83.751)	lr 0.01065
Train [66][2210/3239]	Time 2.522 (0.615)	Data Time 0.001 (0.020)	Loss 2.4775 (2.5009)	Entropy 1.04514 (1.04801)	Top-1 acc 63.281 (63.839)	Top-5 acc 85.938 (83.750)	lr 0.01065
Train [66][2220/3239]	Time 0.243 (0.614)	Data Time 0.001 (0.020)	Loss 2.3857 (2.5008)	Entropy 1.04520 (1.04800)	Top-1 acc 66.406 (63.841)	Top-5 acc 86.328 (83.751)	lr 0.01065
Train [66][2230/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.020)	Loss 2.5692 (2.5009)	Entropy 1.04521 (1.04798)	Top-1 acc 60.547 (63.841)	Top-5 acc 82.422 (83.751)	lr 0.01065
Train [66][2240/3239]	Time 0.232 (0.612)	Data Time 0.001 (0.020)	Loss 2.5385 (2.5007)	Entropy 1.04524 (1.04797)	Top-1 acc 62.109 (63.844)	Top-5 acc 82.422 (83.754)	lr 0.01064
Train [66][2250/3239]	Time 0.250 (0.612)	Data Time 0.001 (0.020)	Loss 2.6437 (2.5009)	Entropy 1.04519 (1.04796)	Top-1 acc 57.812 (63.838)	Top-5 acc 80.859 (83.752)	lr 0.01064
Train [66][2260/3239]	Time 0.342 (0.634)	Data Time 0.003 (0.020)	Loss 2.4770 (2.5012)	Entropy 1.04519 (1.04795)	Top-1 acc 62.500 (63.834)	Top-5 acc 85.547 (83.750)	lr 0.01064
Train [66][2270/3239]	Time 0.240 (0.634)	Data Time 0.002 (0.020)	Loss 2.5193 (2.5010)	Entropy 1.04516 (1.04794)	Top-1 acc 64.844 (63.840)	Top-5 acc 83.984 (83.752)	lr 0.01064
Train [66][2280/3239]	Time 0.253 (0.633)	Data Time 0.002 (0.019)	Loss 2.4112 (2.5011)	Entropy 1.04512 (1.04792)	Top-1 acc 69.141 (63.840)	Top-5 acc 85.156 (83.750)	lr 0.01064
Train [66][2290/3239]	Time 0.269 (0.633)	Data Time 0.004 (0.019)	Loss 2.4648 (2.5011)	Entropy 1.04509 (1.04791)	Top-1 acc 63.281 (63.841)	Top-5 acc 82.812 (83.749)	lr 0.01064
Train [66][2300/3239]	Time 0.383 (0.632)	Data Time 0.001 (0.019)	Loss 2.5529 (2.5011)	Entropy 1.04507 (1.04790)	Top-1 acc 60.938 (63.844)	Top-5 acc 84.375 (83.750)	lr 0.01064
Train [66][2310/3239]	Time 0.293 (0.631)	Data Time 0.001 (0.019)	Loss 2.4868 (2.5011)	Entropy 1.04510 (1.04789)	Top-1 acc 60.938 (63.846)	Top-5 acc 84.375 (83.751)	lr 0.01064
Train [66][2320/3239]	Time 2.441 (0.630)	Data Time 0.001 (0.019)	Loss 2.5909 (2.5012)	Entropy 1.04510 (1.04787)	Top-1 acc 62.500 (63.841)	Top-5 acc 83.203 (83.747)	lr 0.01064
Train [66][2330/3239]	Time 0.239 (0.629)	Data Time 0.001 (0.019)	Loss 2.4046 (2.5012)	Entropy 1.04513 (1.04786)	Top-1 acc 67.578 (63.843)	Top-5 acc 83.984 (83.748)	lr 0.01064
Train [66][2340/3239]	Time 0.239 (0.628)	Data Time 0.001 (0.019)	Loss 2.5366 (2.5014)	Entropy 1.04516 (1.04785)	Top-1 acc 63.672 (63.838)	Top-5 acc 83.594 (83.746)	lr 0.01063
Train [66][2350/3239]	Time 0.226 (0.627)	Data Time 0.001 (0.019)	Loss 2.3982 (2.5014)	Entropy 1.04505 (1.04784)	Top-1 acc 64.062 (63.836)	Top-5 acc 85.938 (83.745)	lr 0.01063
Train [66][2360/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.019)	Loss 2.4898 (2.5018)	Entropy 1.04503 (1.04783)	Top-1 acc 62.891 (63.830)	Top-5 acc 86.328 (83.739)	lr 0.01063
Train [66][2370/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.019)	Loss 2.3566 (2.5014)	Entropy 1.04500 (1.04782)	Top-1 acc 64.844 (63.837)	Top-5 acc 86.328 (83.745)	lr 0.01063
Train [66][2380/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.019)	Loss 2.3146 (2.5014)	Entropy 1.04502 (1.04780)	Top-1 acc 69.531 (63.836)	Top-5 acc 85.547 (83.747)	lr 0.01063
Train [66][2390/3239]	Time 0.228 (0.625)	Data Time 0.001 (0.019)	Loss 2.4229 (2.5014)	Entropy 1.04501 (1.04779)	Top-1 acc 66.797 (63.836)	Top-5 acc 85.156 (83.747)	lr 0.01063
Train [66][2400/3239]	Time 0.228 (0.624)	Data Time 0.002 (0.019)	Loss 2.3575 (2.5013)	Entropy 1.04494 (1.04778)	Top-1 acc 66.406 (63.837)	Top-5 acc 85.938 (83.747)	lr 0.01063
Train [66][2410/3239]	Time 0.228 (0.624)	Data Time 0.001 (0.019)	Loss 2.5591 (2.5012)	Entropy 1.04491 (1.04777)	Top-1 acc 60.547 (63.839)	Top-5 acc 83.203 (83.748)	lr 0.01063
Train [66][2420/3239]	Time 0.258 (0.623)	Data Time 0.001 (0.018)	Loss 2.5199 (2.5012)	Entropy 1.04496 (1.04776)	Top-1 acc 63.672 (63.838)	Top-5 acc 78.906 (83.750)	lr 0.01063
Train [66][2430/3239]	Time 2.474 (0.622)	Data Time 0.001 (0.018)	Loss 2.6039 (2.5013)	Entropy 1.04496 (1.04775)	Top-1 acc 62.109 (63.837)	Top-5 acc 82.812 (83.746)	lr 0.01063
Train [66][2440/3239]	Time 0.360 (0.621)	Data Time 0.001 (0.018)	Loss 2.4933 (2.5013)	Entropy 1.04492 (1.04773)	Top-1 acc 63.281 (63.837)	Top-5 acc 81.641 (83.747)	lr 0.01062
Train [66][2450/3239]	Time 0.221 (0.620)	Data Time 0.001 (0.018)	Loss 2.7497 (2.5014)	Entropy 1.04493 (1.04772)	Top-1 acc 58.203 (63.827)	Top-5 acc 79.297 (83.746)	lr 0.01062
Train [66][2460/3239]	Time 0.225 (0.619)	Data Time 0.001 (0.018)	Loss 2.4241 (2.5013)	Entropy 1.04493 (1.04771)	Top-1 acc 65.234 (63.833)	Top-5 acc 86.328 (83.748)	lr 0.01062
Train [66][2470/3239]	Time 0.224 (0.619)	Data Time 0.001 (0.018)	Loss 2.5087 (2.5013)	Entropy 1.04497 (1.04770)	Top-1 acc 65.234 (63.831)	Top-5 acc 84.766 (83.750)	lr 0.01062
Train [66][2480/3239]	Time 0.341 (0.618)	Data Time 0.001 (0.018)	Loss 2.5877 (2.5015)	Entropy 1.04497 (1.04769)	Top-1 acc 63.672 (63.830)	Top-5 acc 82.812 (83.745)	lr 0.01062
Train [66][2490/3239]	Time 0.241 (0.618)	Data Time 0.001 (0.018)	Loss 2.3970 (2.5015)	Entropy 1.04497 (1.04768)	Top-1 acc 67.188 (63.828)	Top-5 acc 85.156 (83.746)	lr 0.01062
Train [66][2500/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.018)	Loss 2.3544 (2.5014)	Entropy 1.04493 (1.04767)	Top-1 acc 69.531 (63.832)	Top-5 acc 84.766 (83.745)	lr 0.01062
Train [66][2510/3239]	Time 0.244 (0.617)	Data Time 0.001 (0.018)	Loss 2.6056 (2.5015)	Entropy 1.04485 (1.04766)	Top-1 acc 60.547 (63.828)	Top-5 acc 82.812 (83.745)	lr 0.01062
Train [66][2520/3239]	Time 0.215 (0.616)	Data Time 0.001 (0.018)	Loss 2.4775 (2.5014)	Entropy 1.04481 (1.04765)	Top-1 acc 66.406 (63.831)	Top-5 acc 80.859 (83.746)	lr 0.01062
Train [66][2530/3239]	Time 0.229 (0.615)	Data Time 0.001 (0.018)	Loss 2.5328 (2.5016)	Entropy 1.04478 (1.04763)	Top-1 acc 61.719 (63.829)	Top-5 acc 84.375 (83.743)	lr 0.01062
Train [66][2540/3239]	Time 2.471 (0.615)	Data Time 0.002 (0.018)	Loss 2.3470 (2.5017)	Entropy 1.04478 (1.04762)	Top-1 acc 67.578 (63.828)	Top-5 acc 87.109 (83.739)	lr 0.01061
Train [66][2550/3239]	Time 0.230 (0.613)	Data Time 0.001 (0.018)	Loss 2.5770 (2.5017)	Entropy 1.04479 (1.04761)	Top-1 acc 62.109 (63.827)	Top-5 acc 85.156 (83.738)	lr 0.01061
Train [66][2560/3239]	Time 0.233 (0.613)	Data Time 0.001 (0.018)	Loss 2.4869 (2.5020)	Entropy 1.04476 (1.04760)	Top-1 acc 64.844 (63.823)	Top-5 acc 82.812 (83.733)	lr 0.01061
Train [66][2570/3239]	Time 0.304 (0.612)	Data Time 0.001 (0.017)	Loss 2.5153 (2.5021)	Entropy 1.04467 (1.04759)	Top-1 acc 62.109 (63.817)	Top-5 acc 84.375 (83.732)	lr 0.01061
Train [66][2580/3239]	Time 0.219 (0.612)	Data Time 0.001 (0.017)	Loss 2.4626 (2.5022)	Entropy 1.04466 (1.04758)	Top-1 acc 65.625 (63.816)	Top-5 acc 83.203 (83.730)	lr 0.01061
Train [66][2590/3239]	Time 0.283 (0.611)	Data Time 0.001 (0.017)	Loss 2.4827 (2.5022)	Entropy 1.04461 (1.04757)	Top-1 acc 65.234 (63.816)	Top-5 acc 82.812 (83.727)	lr 0.01061
Train [66][2600/3239]	Time 0.238 (0.611)	Data Time 0.001 (0.017)	Loss 2.5424 (2.5023)	Entropy 1.04439 (1.04755)	Top-1 acc 63.672 (63.816)	Top-5 acc 80.859 (83.724)	lr 0.01061
Train [66][2610/3239]	Time 0.267 (0.610)	Data Time 0.001 (0.017)	Loss 2.6102 (2.5024)	Entropy 1.04426 (1.04754)	Top-1 acc 65.234 (63.815)	Top-5 acc 82.031 (83.724)	lr 0.01061
Train [66][2620/3239]	Time 0.273 (0.628)	Data Time 0.002 (0.017)	Loss 2.5834 (2.5025)	Entropy 1.04416 (1.04753)	Top-1 acc 59.766 (63.813)	Top-5 acc 83.594 (83.723)	lr 0.01061
Train [66][2630/3239]	Time 0.218 (0.629)	Data Time 0.002 (0.017)	Loss 2.5286 (2.5025)	Entropy 1.04420 (1.04752)	Top-1 acc 63.281 (63.807)	Top-5 acc 82.422 (83.722)	lr 0.01061
Train [66][2640/3239]	Time 0.245 (0.628)	Data Time 0.001 (0.017)	Loss 2.6689 (2.5026)	Entropy 1.04422 (1.04750)	Top-1 acc 61.328 (63.811)	Top-5 acc 78.516 (83.720)	lr 0.01060
Train [66][2650/3239]	Time 0.214 (0.627)	Data Time 0.001 (0.017)	Loss 2.5925 (2.5027)	Entropy 1.04402 (1.04749)	Top-1 acc 61.328 (63.808)	Top-5 acc 80.859 (83.717)	lr 0.01060
Train [66][2660/3239]	Time 0.331 (0.627)	Data Time 0.001 (0.017)	Loss 2.3766 (2.5025)	Entropy 1.04409 (1.04748)	Top-1 acc 66.797 (63.815)	Top-5 acc 83.203 (83.719)	lr 0.01060
Train [66][2670/3239]	Time 0.255 (0.626)	Data Time 0.001 (0.017)	Loss 2.4760 (2.5027)	Entropy 1.04406 (1.04747)	Top-1 acc 65.625 (63.811)	Top-5 acc 84.375 (83.714)	lr 0.01060
Train [66][2680/3239]	Time 0.222 (0.626)	Data Time 0.001 (0.017)	Loss 2.4239 (2.5028)	Entropy 1.04407 (1.04745)	Top-1 acc 64.453 (63.809)	Top-5 acc 85.547 (83.715)	lr 0.01060
Train [66][2690/3239]	Time 0.215 (0.625)	Data Time 0.001 (0.017)	Loss 2.6067 (2.5030)	Entropy 1.04400 (1.04744)	Top-1 acc 59.766 (63.806)	Top-5 acc 82.812 (83.711)	lr 0.01060
Train [66][2700/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.017)	Loss 2.4754 (2.5028)	Entropy 1.04399 (1.04743)	Top-1 acc 62.109 (63.809)	Top-5 acc 86.328 (83.715)	lr 0.01060
Train [66][2710/3239]	Time 0.261 (0.624)	Data Time 0.001 (0.017)	Loss 2.6147 (2.5029)	Entropy 1.04398 (1.04742)	Top-1 acc 57.812 (63.807)	Top-5 acc 83.203 (83.715)	lr 0.01060
Train [66][2720/3239]	Time 0.265 (0.623)	Data Time 0.001 (0.017)	Loss 2.5706 (2.5028)	Entropy 1.04395 (1.04740)	Top-1 acc 62.891 (63.809)	Top-5 acc 87.109 (83.717)	lr 0.01060
Train [66][2730/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.017)	Loss 2.3659 (2.5026)	Entropy 1.04396 (1.04739)	Top-1 acc 62.109 (63.808)	Top-5 acc 86.719 (83.722)	lr 0.01060
Train [66][2740/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.017)	Loss 2.4927 (2.5027)	Entropy 1.04390 (1.04738)	Top-1 acc 63.672 (63.803)	Top-5 acc 84.766 (83.724)	lr 0.01059
Train [66][2750/3239]	Time 0.380 (0.622)	Data Time 0.001 (0.016)	Loss 2.4761 (2.5026)	Entropy 1.04389 (1.04736)	Top-1 acc 63.672 (63.803)	Top-5 acc 82.422 (83.725)	lr 0.01059
Train [66][2760/3239]	Time 0.272 (0.621)	Data Time 0.001 (0.016)	Loss 2.6060 (2.5025)	Entropy 1.04390 (1.04735)	Top-1 acc 62.891 (63.805)	Top-5 acc 82.422 (83.729)	lr 0.01059
Train [66][2770/3239]	Time 0.237 (0.621)	Data Time 0.001 (0.016)	Loss 2.5483 (2.5024)	Entropy 1.04388 (1.04734)	Top-1 acc 62.891 (63.807)	Top-5 acc 83.594 (83.729)	lr 0.01059
Train [66][2780/3239]	Time 0.258 (0.620)	Data Time 0.001 (0.016)	Loss 2.5589 (2.5024)	Entropy 1.04390 (1.04733)	Top-1 acc 62.500 (63.808)	Top-5 acc 84.766 (83.733)	lr 0.01059
Train [66][2790/3239]	Time 0.256 (0.620)	Data Time 0.001 (0.016)	Loss 2.5160 (2.5024)	Entropy 1.04389 (1.04732)	Top-1 acc 67.578 (63.806)	Top-5 acc 84.375 (83.734)	lr 0.01059
Train [66][2800/3239]	Time 0.270 (0.619)	Data Time 0.001 (0.016)	Loss 2.4486 (2.5026)	Entropy 1.04385 (1.04730)	Top-1 acc 65.625 (63.800)	Top-5 acc 85.547 (83.730)	lr 0.01059
Train [66][2810/3239]	Time 0.224 (0.619)	Data Time 0.001 (0.016)	Loss 2.4673 (2.5027)	Entropy 1.04385 (1.04729)	Top-1 acc 67.969 (63.802)	Top-5 acc 84.375 (83.730)	lr 0.01059
Train [66][2820/3239]	Time 0.221 (0.618)	Data Time 0.001 (0.016)	Loss 2.4650 (2.5026)	Entropy 1.04385 (1.04728)	Top-1 acc 63.672 (63.803)	Top-5 acc 82.812 (83.729)	lr 0.01059
Train [66][2830/3239]	Time 0.295 (0.618)	Data Time 0.002 (0.016)	Loss 2.4335 (2.5027)	Entropy 1.04386 (1.04727)	Top-1 acc 68.750 (63.798)	Top-5 acc 85.547 (83.727)	lr 0.01059
Train [66][2840/3239]	Time 0.302 (0.617)	Data Time 0.001 (0.016)	Loss 2.4518 (2.5026)	Entropy 1.04380 (1.04725)	Top-1 acc 64.844 (63.804)	Top-5 acc 85.156 (83.728)	lr 0.01058
Train [66][2850/3239]	Time 0.210 (0.617)	Data Time 0.001 (0.016)	Loss 2.5696 (2.5027)	Entropy 1.04389 (1.04724)	Top-1 acc 60.938 (63.805)	Top-5 acc 82.031 (83.727)	lr 0.01058
Train [66][2860/3239]	Time 0.230 (0.616)	Data Time 0.001 (0.016)	Loss 2.3628 (2.5026)	Entropy 1.04383 (1.04723)	Top-1 acc 69.531 (63.805)	Top-5 acc 89.062 (83.730)	lr 0.01058
Train [66][2870/3239]	Time 0.205 (0.616)	Data Time 0.001 (0.016)	Loss 2.4872 (2.5025)	Entropy 1.04383 (1.04722)	Top-1 acc 62.500 (63.810)	Top-5 acc 83.594 (83.730)	lr 0.01058
Train [66][2880/3239]	Time 0.331 (0.615)	Data Time 0.001 (0.016)	Loss 2.6252 (2.5026)	Entropy 1.04379 (1.04721)	Top-1 acc 61.719 (63.809)	Top-5 acc 83.203 (83.730)	lr 0.01058
Train [66][2890/3239]	Time 0.225 (0.615)	Data Time 0.001 (0.016)	Loss 2.5176 (2.5025)	Entropy 1.04374 (1.04720)	Top-1 acc 60.547 (63.812)	Top-5 acc 83.594 (83.731)	lr 0.01058
Train [66][2900/3239]	Time 0.264 (0.614)	Data Time 0.001 (0.016)	Loss 2.5802 (2.5028)	Entropy 1.04373 (1.04718)	Top-1 acc 62.109 (63.807)	Top-5 acc 83.203 (83.728)	lr 0.01058
Train [66][2910/3239]	Time 0.257 (0.614)	Data Time 0.001 (0.016)	Loss 2.4804 (2.5028)	Entropy 1.04372 (1.04717)	Top-1 acc 64.844 (63.806)	Top-5 acc 83.594 (83.727)	lr 0.01058
Train [66][2920/3239]	Time 0.208 (0.613)	Data Time 0.001 (0.016)	Loss 2.5185 (2.5028)	Entropy 1.04374 (1.04716)	Top-1 acc 61.328 (63.800)	Top-5 acc 83.594 (83.729)	lr 0.01058
Train [66][2930/3239]	Time 0.217 (0.613)	Data Time 0.001 (0.016)	Loss 2.5083 (2.5029)	Entropy 1.04377 (1.04715)	Top-1 acc 60.547 (63.799)	Top-5 acc 83.594 (83.728)	lr 0.01058
Train [66][2940/3239]	Time 0.225 (0.612)	Data Time 0.001 (0.015)	Loss 2.3559 (2.5027)	Entropy 1.04377 (1.04714)	Top-1 acc 67.969 (63.804)	Top-5 acc 87.109 (83.731)	lr 0.01057
Train [66][2950/3239]	Time 0.275 (0.628)	Data Time 0.004 (0.015)	Loss 2.6323 (2.5026)	Entropy 1.04373 (1.04712)	Top-1 acc 59.375 (63.802)	Top-5 acc 80.078 (83.732)	lr 0.01057
Train [66][2960/3239]	Time 0.281 (0.628)	Data Time 0.002 (0.015)	Loss 2.4909 (2.5027)	Entropy 1.04364 (1.04711)	Top-1 acc 62.891 (63.802)	Top-5 acc 84.766 (83.734)	lr 0.01057
Train [66][2970/3239]	Time 0.402 (0.628)	Data Time 0.002 (0.015)	Loss 2.3896 (2.5026)	Entropy 1.04363 (1.04710)	Top-1 acc 66.406 (63.806)	Top-5 acc 85.547 (83.734)	lr 0.01057
Train [66][2980/3239]	Time 0.302 (0.627)	Data Time 0.003 (0.015)	Loss 2.6434 (2.5026)	Entropy 1.04363 (1.04709)	Top-1 acc 60.156 (63.806)	Top-5 acc 78.516 (83.735)	lr 0.01057
Train [66][2990/3239]	Time 0.285 (0.627)	Data Time 0.002 (0.015)	Loss 2.5171 (2.5027)	Entropy 1.04361 (1.04708)	Top-1 acc 64.453 (63.802)	Top-5 acc 80.078 (83.733)	lr 0.01057
Train [66][3000/3239]	Time 0.264 (0.627)	Data Time 0.003 (0.015)	Loss 2.5460 (2.5030)	Entropy 1.04356 (1.04707)	Top-1 acc 64.062 (63.794)	Top-5 acc 82.422 (83.727)	lr 0.01057
Train [66][3010/3239]	Time 0.222 (0.626)	Data Time 0.002 (0.015)	Loss 2.4532 (2.5030)	Entropy 1.04355 (1.04706)	Top-1 acc 66.406 (63.795)	Top-5 acc 82.812 (83.726)	lr 0.01057
Train [66][3020/3239]	Time 0.329 (0.626)	Data Time 0.001 (0.015)	Loss 2.3940 (2.5031)	Entropy 1.04353 (1.04704)	Top-1 acc 63.672 (63.793)	Top-5 acc 88.281 (83.728)	lr 0.01057
Train [66][3030/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.015)	Loss 2.3932 (2.5030)	Entropy 1.04352 (1.04703)	Top-1 acc 71.875 (63.803)	Top-5 acc 83.984 (83.727)	lr 0.01057
Train [66][3040/3239]	Time 0.224 (0.625)	Data Time 0.001 (0.015)	Loss 2.5508 (2.5031)	Entropy 1.04364 (1.04702)	Top-1 acc 62.109 (63.804)	Top-5 acc 81.641 (83.724)	lr 0.01056
Train [66][3050/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.015)	Loss 2.5168 (2.5032)	Entropy 1.04358 (1.04701)	Top-1 acc 60.938 (63.801)	Top-5 acc 83.203 (83.723)	lr 0.01056
Train [66][3060/3239]	Time 0.373 (0.624)	Data Time 0.001 (0.015)	Loss 2.4573 (2.5033)	Entropy 1.04353 (1.04700)	Top-1 acc 64.453 (63.800)	Top-5 acc 85.156 (83.717)	lr 0.01056
Train [66][3070/3239]	Time 0.227 (0.623)	Data Time 0.001 (0.015)	Loss 2.3848 (2.5034)	Entropy 1.04354 (1.04699)	Top-1 acc 68.750 (63.797)	Top-5 acc 85.156 (83.716)	lr 0.01056
Train [66][3080/3239]	Time 0.224 (0.623)	Data Time 0.001 (0.015)	Loss 2.6165 (2.5032)	Entropy 1.04353 (1.04698)	Top-1 acc 60.547 (63.799)	Top-5 acc 82.031 (83.719)	lr 0.01056
Train [66][3090/3239]	Time 0.306 (0.622)	Data Time 0.002 (0.015)	Loss 2.3090 (2.5031)	Entropy 1.04344 (1.04696)	Top-1 acc 66.797 (63.798)	Top-5 acc 86.719 (83.720)	lr 0.01056
Train [66][3100/3239]	Time 0.253 (0.622)	Data Time 0.001 (0.015)	Loss 2.6811 (2.5032)	Entropy 1.04340 (1.04695)	Top-1 acc 57.422 (63.796)	Top-5 acc 80.078 (83.718)	lr 0.01056
Train [66][3110/3239]	Time 0.280 (0.621)	Data Time 0.001 (0.015)	Loss 2.4971 (2.5032)	Entropy 1.04340 (1.04694)	Top-1 acc 62.891 (63.794)	Top-5 acc 83.594 (83.719)	lr 0.01056
Train [66][3120/3239]	Time 0.341 (0.621)	Data Time 0.002 (0.015)	Loss 2.4333 (2.5032)	Entropy 1.04338 (1.04693)	Top-1 acc 62.500 (63.794)	Top-5 acc 82.031 (83.717)	lr 0.01056
Train [66][3130/3239]	Time 0.266 (0.620)	Data Time 0.002 (0.015)	Loss 2.3732 (2.5031)	Entropy 1.04333 (1.04692)	Top-1 acc 70.312 (63.791)	Top-5 acc 87.109 (83.718)	lr 0.01056
Train [66][3140/3239]	Time 0.261 (0.620)	Data Time 0.002 (0.015)	Loss 2.5415 (2.5031)	Entropy 1.04333 (1.04691)	Top-1 acc 62.500 (63.789)	Top-5 acc 81.250 (83.720)	lr 0.01055
Train [66][3150/3239]	Time 0.366 (0.620)	Data Time 0.001 (0.015)	Loss 2.5876 (2.5032)	Entropy 1.04335 (1.04690)	Top-1 acc 62.109 (63.787)	Top-5 acc 82.812 (83.716)	lr 0.01055
Train [66][3160/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.015)	Loss 2.5873 (2.5034)	Entropy 1.04336 (1.04689)	Top-1 acc 61.719 (63.782)	Top-5 acc 82.422 (83.716)	lr 0.01055
Train [66][3170/3239]	Time 0.282 (0.619)	Data Time 0.001 (0.015)	Loss 2.5203 (2.5034)	Entropy 1.04332 (1.04687)	Top-1 acc 64.844 (63.780)	Top-5 acc 81.250 (83.715)	lr 0.01055
Train [66][3180/3239]	Time 0.260 (0.618)	Data Time 0.000 (0.014)	Loss 2.6411 (2.5034)	Entropy 1.04317 (1.04686)	Top-1 acc 62.500 (63.784)	Top-5 acc 78.516 (83.716)	lr 0.01055
Train [66][3190/3239]	Time 0.213 (0.618)	Data Time 0.000 (0.014)	Loss 2.7374 (2.5034)	Entropy 1.04315 (1.04685)	Top-1 acc 57.812 (63.784)	Top-5 acc 79.688 (83.716)	lr 0.01055
Train [66][3200/3239]	Time 0.211 (0.617)	Data Time 0.000 (0.014)	Loss 2.3972 (2.5033)	Entropy 1.04312 (1.04684)	Top-1 acc 65.234 (63.784)	Top-5 acc 87.891 (83.719)	lr 0.01055
Train [66][3210/3239]	Time 0.237 (0.617)	Data Time 0.000 (0.014)	Loss 2.5246 (2.5033)	Entropy 1.04313 (1.04683)	Top-1 acc 64.453 (63.783)	Top-5 acc 84.375 (83.718)	lr 0.01055
Train [66][3220/3239]	Time 0.220 (0.616)	Data Time 0.000 (0.014)	Loss 2.5135 (2.5034)	Entropy 1.04305 (1.04682)	Top-1 acc 63.672 (63.786)	Top-5 acc 86.328 (83.717)	lr 0.01055
Train [66][3230/3239]	Time 0.165 (0.616)	Data Time 0.000 (0.014)	Loss 2.5029 (2.5032)	Entropy 1.04304 (1.04680)	Top-1 acc 64.062 (63.787)	Top-5 acc 83.594 (83.718)	lr 0.01055
Train [66][3239/3239]	Time 2.243 (0.615)	Data Time 0.000 (0.014)	Loss 2.9284 (2.5033)	Entropy 1.04304 (1.04679)	Top-1 acc 55.556 (63.784)	Top-5 acc 76.543 (83.719)	lr 0.01054
==========Valid [66/120]	loss 1.410	top-1 acc 68.036 (68.036)	top-5 acc 87.288	Train top-1 63.784	top-5 83.719	Entropy 1.04304	Latency-None: 0.000ms	Flops: 546.53M
Train [67][0/3239]	Time 44.075 (44.075)	Data Time 41.101 (41.101)	Loss 2.4461 (2.4461)	Entropy 1.04303 (1.04303)	Top-1 acc 65.625 (65.625)	Top-5 acc 85.547 (85.547)	lr 0.01054
Train [67][10/3239]	Time 2.409 (4.485)	Data Time 0.001 (3.741)	Loss 2.4676 (2.4642)	Entropy 1.04303 (1.04303)	Top-1 acc 64.062 (64.595)	Top-5 acc 85.156 (84.553)	lr 0.01054
Train [67][20/3239]	Time 0.266 (2.472)	Data Time 0.002 (1.960)	Loss 2.3213 (2.4292)	Entropy 1.04298 (1.04301)	Top-1 acc 68.750 (65.104)	Top-5 acc 86.328 (85.045)	lr 0.01054
Train [67][30/3239]	Time 0.232 (1.828)	Data Time 0.001 (1.328)	Loss 2.3817 (2.4596)	Entropy 1.04300 (1.04300)	Top-1 acc 65.625 (64.945)	Top-5 acc 88.281 (84.451)	lr 0.01054
Train [67][40/3239]	Time 0.228 (1.497)	Data Time 0.001 (1.005)	Loss 2.4823 (2.4644)	Entropy 1.04296 (1.04299)	Top-1 acc 68.359 (64.777)	Top-5 acc 80.859 (84.280)	lr 0.01054
Train [67][50/3239]	Time 0.318 (2.463)	Data Time 0.002 (0.808)	Loss 2.4725 (2.4683)	Entropy 1.04295 (1.04299)	Top-1 acc 66.797 (64.698)	Top-5 acc 83.594 (84.107)	lr 0.01054
Train [67][60/3239]	Time 0.235 (2.135)	Data Time 0.002 (0.676)	Loss 2.3921 (2.4594)	Entropy 1.04298 (1.04298)	Top-1 acc 65.625 (64.812)	Top-5 acc 85.547 (84.426)	lr 0.01054
Train [67][70/3239]	Time 0.229 (1.901)	Data Time 0.001 (0.581)	Loss 2.3366 (2.4635)	Entropy 1.04290 (1.04298)	Top-1 acc 69.531 (64.816)	Top-5 acc 85.938 (84.325)	lr 0.01054
Train [67][80/3239]	Time 0.232 (1.724)	Data Time 0.001 (0.510)	Loss 2.3361 (2.4712)	Entropy 1.04283 (1.04296)	Top-1 acc 66.797 (64.656)	Top-5 acc 87.891 (84.226)	lr 0.01054
Train [67][90/3239]	Time 0.314 (1.587)	Data Time 0.001 (0.454)	Loss 2.6078 (2.4727)	Entropy 1.04283 (1.04295)	Top-1 acc 62.500 (64.651)	Top-5 acc 81.250 (84.178)	lr 0.01054
Train [67][100/3239]	Time 0.216 (1.475)	Data Time 0.001 (0.409)	Loss 2.3932 (2.4729)	Entropy 1.04280 (1.04294)	Top-1 acc 64.062 (64.604)	Top-5 acc 88.281 (84.205)	lr 0.01053
Train [67][110/3239]	Time 0.221 (1.384)	Data Time 0.001 (0.372)	Loss 2.5409 (2.4766)	Entropy 1.04281 (1.04292)	Top-1 acc 64.062 (64.520)	Top-5 acc 85.547 (84.181)	lr 0.01053
Train [67][120/3239]	Time 2.529 (1.308)	Data Time 0.001 (0.342)	Loss 2.5140 (2.4803)	Entropy 1.04281 (1.04291)	Top-1 acc 60.156 (64.424)	Top-5 acc 82.812 (84.020)	lr 0.01053
Train [67][130/3239]	Time 0.232 (1.227)	Data Time 0.001 (0.316)	Loss 2.6235 (2.4816)	Entropy 1.04266 (1.04289)	Top-1 acc 64.844 (64.441)	Top-5 acc 81.641 (83.993)	lr 0.01053
Train [67][140/3239]	Time 0.329 (1.173)	Data Time 0.001 (0.293)	Loss 2.4946 (2.4828)	Entropy 1.04258 (1.04287)	Top-1 acc 64.844 (64.495)	Top-5 acc 83.594 (84.065)	lr 0.01053
Train [67][150/3239]	Time 0.227 (1.126)	Data Time 0.001 (0.274)	Loss 2.4683 (2.4837)	Entropy 1.04256 (1.04285)	Top-1 acc 66.797 (64.461)	Top-5 acc 85.938 (84.021)	lr 0.01053
Train [67][160/3239]	Time 0.218 (1.085)	Data Time 0.001 (0.257)	Loss 2.4926 (2.4828)	Entropy 1.04254 (1.04283)	Top-1 acc 64.062 (64.490)	Top-5 acc 84.375 (84.038)	lr 0.01053
Train [67][170/3239]	Time 0.219 (1.048)	Data Time 0.001 (0.242)	Loss 2.3283 (2.4820)	Entropy 1.04253 (1.04282)	Top-1 acc 64.453 (64.501)	Top-5 acc 86.719 (84.055)	lr 0.01053
Train [67][180/3239]	Time 0.241 (1.016)	Data Time 0.001 (0.229)	Loss 2.3660 (2.4826)	Entropy 1.04248 (1.04280)	Top-1 acc 66.797 (64.514)	Top-5 acc 86.328 (84.051)	lr 0.01053
Train [67][190/3239]	Time 0.224 (0.988)	Data Time 0.001 (0.217)	Loss 2.3909 (2.4838)	Entropy 1.04247 (1.04278)	Top-1 acc 67.578 (64.453)	Top-5 acc 85.156 (83.991)	lr 0.01053
Train [67][200/3239]	Time 0.227 (0.962)	Data Time 0.001 (0.206)	Loss 2.6753 (2.4847)	Entropy 1.04238 (1.04276)	Top-1 acc 60.156 (64.426)	Top-5 acc 81.250 (83.975)	lr 0.01052
Train [67][210/3239]	Time 0.224 (0.939)	Data Time 0.001 (0.197)	Loss 2.3817 (2.4833)	Entropy 1.04238 (1.04275)	Top-1 acc 65.234 (64.455)	Top-5 acc 85.156 (83.970)	lr 0.01052
Train [67][220/3239]	Time 0.217 (0.917)	Data Time 0.001 (0.188)	Loss 2.3257 (2.4819)	Entropy 1.04237 (1.04273)	Top-1 acc 66.016 (64.469)	Top-5 acc 87.109 (83.997)	lr 0.01052
Train [67][230/3239]	Time 2.605 (0.897)	Data Time 0.001 (0.180)	Loss 2.4758 (2.4804)	Entropy 1.04237 (1.04271)	Top-1 acc 69.141 (64.506)	Top-5 acc 83.984 (84.059)	lr 0.01052
Train [67][240/3239]	Time 0.229 (0.870)	Data Time 0.001 (0.172)	Loss 2.3952 (2.4805)	Entropy 1.04236 (1.04270)	Top-1 acc 66.797 (64.497)	Top-5 acc 83.984 (84.057)	lr 0.01052
Train [67][250/3239]	Time 0.232 (0.854)	Data Time 0.001 (0.165)	Loss 2.4242 (2.4814)	Entropy 1.04231 (1.04268)	Top-1 acc 64.453 (64.494)	Top-5 acc 83.594 (84.023)	lr 0.01052
Train [67][260/3239]	Time 0.229 (0.839)	Data Time 0.001 (0.159)	Loss 2.6638 (2.4833)	Entropy 1.04233 (1.04267)	Top-1 acc 60.156 (64.432)	Top-5 acc 81.250 (84.004)	lr 0.01052
Train [67][270/3239]	Time 0.218 (0.826)	Data Time 0.001 (0.153)	Loss 2.5587 (2.4819)	Entropy 1.04228 (1.04266)	Top-1 acc 63.672 (64.442)	Top-5 acc 82.422 (84.025)	lr 0.01052
Train [67][280/3239]	Time 0.230 (0.813)	Data Time 0.001 (0.148)	Loss 2.4996 (2.4809)	Entropy 1.04224 (1.04264)	Top-1 acc 65.625 (64.448)	Top-5 acc 83.984 (84.054)	lr 0.01052
Train [67][290/3239]	Time 0.229 (0.801)	Data Time 0.001 (0.143)	Loss 2.3560 (2.4801)	Entropy 1.04230 (1.04263)	Top-1 acc 68.750 (64.481)	Top-5 acc 86.328 (84.074)	lr 0.01052
Train [67][300/3239]	Time 0.244 (0.790)	Data Time 0.001 (0.138)	Loss 2.4889 (2.4804)	Entropy 1.04220 (1.04262)	Top-1 acc 64.453 (64.478)	Top-5 acc 82.031 (84.062)	lr 0.01051
Train [67][310/3239]	Time 0.240 (0.780)	Data Time 0.002 (0.134)	Loss 2.4772 (2.4787)	Entropy 1.04222 (1.04260)	Top-1 acc 68.750 (64.535)	Top-5 acc 84.766 (84.097)	lr 0.01051
Train [67][320/3239]	Time 0.315 (0.771)	Data Time 0.001 (0.130)	Loss 2.2602 (2.4774)	Entropy 1.04214 (1.04259)	Top-1 acc 73.047 (64.570)	Top-5 acc 86.328 (84.112)	lr 0.01051
Train [67][330/3239]	Time 0.282 (0.762)	Data Time 0.001 (0.126)	Loss 2.5787 (2.4775)	Entropy 1.04207 (1.04258)	Top-1 acc 63.672 (64.581)	Top-5 acc 83.203 (84.109)	lr 0.01051
Train [67][340/3239]	Time 2.493 (0.753)	Data Time 0.001 (0.122)	Loss 2.6138 (2.4780)	Entropy 1.04207 (1.04256)	Top-1 acc 63.281 (64.564)	Top-5 acc 79.297 (84.091)	lr 0.01051
Train [67][350/3239]	Time 0.215 (0.738)	Data Time 0.001 (0.119)	Loss 2.4830 (2.4783)	Entropy 1.04203 (1.04255)	Top-1 acc 64.844 (64.572)	Top-5 acc 87.500 (84.088)	lr 0.01051
Train [67][360/3239]	Time 0.237 (0.731)	Data Time 0.001 (0.115)	Loss 2.2532 (2.4793)	Entropy 1.04202 (1.04253)	Top-1 acc 70.703 (64.532)	Top-5 acc 90.234 (84.071)	lr 0.01051
Train [67][370/3239]	Time 0.213 (0.724)	Data Time 0.001 (0.112)	Loss 2.3577 (2.4769)	Entropy 1.04195 (1.04252)	Top-1 acc 67.188 (64.596)	Top-5 acc 87.109 (84.113)	lr 0.01051
Train [67][380/3239]	Time 0.211 (0.717)	Data Time 0.001 (0.109)	Loss 2.4744 (2.4762)	Entropy 1.04190 (1.04250)	Top-1 acc 65.625 (64.613)	Top-5 acc 83.203 (84.128)	lr 0.01051
Train [67][390/3239]	Time 0.218 (0.711)	Data Time 0.002 (0.107)	Loss 2.3634 (2.4756)	Entropy 1.04181 (1.04249)	Top-1 acc 64.844 (64.609)	Top-5 acc 88.672 (84.142)	lr 0.01051
Train [67][400/3239]	Time 0.229 (0.704)	Data Time 0.001 (0.104)	Loss 2.3259 (2.4732)	Entropy 1.04179 (1.04247)	Top-1 acc 64.844 (64.659)	Top-5 acc 89.844 (84.182)	lr 0.01050
Train [67][410/3239]	Time 0.531 (0.828)	Data Time 0.003 (0.102)	Loss 2.4806 (2.4737)	Entropy 1.04175 (1.04245)	Top-1 acc 65.234 (64.657)	Top-5 acc 83.594 (84.177)	lr 0.01050
Train [67][420/3239]	Time 0.223 (0.819)	Data Time 0.002 (0.099)	Loss 2.5460 (2.4735)	Entropy 1.04169 (1.04243)	Top-1 acc 61.719 (64.639)	Top-5 acc 82.812 (84.168)	lr 0.01050
Train [67][430/3239]	Time 0.239 (0.811)	Data Time 0.001 (0.097)	Loss 2.5186 (2.4731)	Entropy 1.04176 (1.04242)	Top-1 acc 62.500 (64.641)	Top-5 acc 82.031 (84.174)	lr 0.01050
Train [67][440/3239]	Time 0.215 (0.804)	Data Time 0.001 (0.095)	Loss 2.5113 (2.4749)	Entropy 1.04176 (1.04240)	Top-1 acc 63.672 (64.585)	Top-5 acc 83.984 (84.132)	lr 0.01050
Train [67][450/3239]	Time 2.541 (0.796)	Data Time 0.002 (0.093)	Loss 2.4921 (2.4762)	Entropy 1.04176 (1.04239)	Top-1 acc 65.234 (64.546)	Top-5 acc 85.156 (84.113)	lr 0.01050
Train [67][460/3239]	Time 0.226 (0.785)	Data Time 0.001 (0.091)	Loss 2.5979 (2.4769)	Entropy 1.04167 (1.04237)	Top-1 acc 62.500 (64.541)	Top-5 acc 81.250 (84.103)	lr 0.01050
Train [67][470/3239]	Time 0.230 (0.778)	Data Time 0.001 (0.089)	Loss 2.5971 (2.4774)	Entropy 1.04163 (1.04236)	Top-1 acc 59.375 (64.534)	Top-5 acc 83.203 (84.112)	lr 0.01050
Train [67][480/3239]	Time 0.259 (0.771)	Data Time 0.001 (0.087)	Loss 2.5022 (2.4777)	Entropy 1.04159 (1.04234)	Top-1 acc 64.062 (64.532)	Top-5 acc 84.766 (84.105)	lr 0.01050
Train [67][490/3239]	Time 0.229 (0.765)	Data Time 0.001 (0.085)	Loss 2.5712 (2.4784)	Entropy 1.04155 (1.04233)	Top-1 acc 62.109 (64.506)	Top-5 acc 82.422 (84.101)	lr 0.01050
Train [67][500/3239]	Time 0.226 (0.759)	Data Time 0.001 (0.084)	Loss 2.5370 (2.4786)	Entropy 1.04140 (1.04231)	Top-1 acc 62.500 (64.481)	Top-5 acc 83.203 (84.092)	lr 0.01049
Train [67][510/3239]	Time 0.258 (0.754)	Data Time 0.001 (0.082)	Loss 2.2758 (2.4778)	Entropy 1.04134 (1.04229)	Top-1 acc 66.406 (64.478)	Top-5 acc 87.109 (84.115)	lr 0.01049
Train [67][520/3239]	Time 0.229 (0.748)	Data Time 0.001 (0.081)	Loss 2.4601 (2.4773)	Entropy 1.04133 (1.04227)	Top-1 acc 64.453 (64.484)	Top-5 acc 83.203 (84.126)	lr 0.01049
Train [67][530/3239]	Time 0.219 (0.743)	Data Time 0.001 (0.079)	Loss 2.5215 (2.4778)	Entropy 1.04132 (1.04225)	Top-1 acc 62.891 (64.471)	Top-5 acc 82.422 (84.119)	lr 0.01049
Train [67][540/3239]	Time 0.265 (0.738)	Data Time 0.002 (0.078)	Loss 2.5217 (2.4779)	Entropy 1.04134 (1.04224)	Top-1 acc 61.719 (64.450)	Top-5 acc 85.547 (84.123)	lr 0.01049
Train [67][550/3239]	Time 0.381 (0.733)	Data Time 0.001 (0.076)	Loss 2.2365 (2.4774)	Entropy 1.04133 (1.04222)	Top-1 acc 70.312 (64.452)	Top-5 acc 87.891 (84.136)	lr 0.01049
Train [67][560/3239]	Time 2.375 (0.728)	Data Time 0.001 (0.075)	Loss 2.5588 (2.4775)	Entropy 1.04133 (1.04220)	Top-1 acc 60.156 (64.455)	Top-5 acc 82.422 (84.124)	lr 0.01049
Train [67][570/3239]	Time 0.201 (0.720)	Data Time 0.001 (0.074)	Loss 2.5362 (2.4796)	Entropy 1.04126 (1.04219)	Top-1 acc 64.844 (64.408)	Top-5 acc 82.422 (84.096)	lr 0.01049
Train [67][580/3239]	Time 0.218 (0.715)	Data Time 0.001 (0.072)	Loss 2.4881 (2.4804)	Entropy 1.04124 (1.04217)	Top-1 acc 64.062 (64.387)	Top-5 acc 85.156 (84.089)	lr 0.01049
Train [67][590/3239]	Time 0.226 (0.711)	Data Time 0.001 (0.071)	Loss 2.6764 (2.4816)	Entropy 1.04121 (1.04216)	Top-1 acc 59.375 (64.353)	Top-5 acc 79.297 (84.058)	lr 0.01049
Train [67][600/3239]	Time 0.233 (0.707)	Data Time 0.001 (0.070)	Loss 2.3776 (2.4818)	Entropy 1.04109 (1.04214)	Top-1 acc 66.797 (64.352)	Top-5 acc 86.328 (84.044)	lr 0.01048
Train [67][610/3239]	Time 0.234 (0.703)	Data Time 0.001 (0.069)	Loss 2.4980 (2.4820)	Entropy 1.04111 (1.04212)	Top-1 acc 60.938 (64.350)	Top-5 acc 83.594 (84.037)	lr 0.01048
Train [67][620/3239]	Time 0.221 (0.699)	Data Time 0.001 (0.068)	Loss 2.5789 (2.4820)	Entropy 1.04106 (1.04211)	Top-1 acc 60.938 (64.338)	Top-5 acc 83.203 (84.040)	lr 0.01048
Train [67][630/3239]	Time 0.217 (0.695)	Data Time 0.001 (0.067)	Loss 2.5103 (2.4818)	Entropy 1.04101 (1.04209)	Top-1 acc 66.406 (64.345)	Top-5 acc 84.766 (84.052)	lr 0.01048
Train [67][640/3239]	Time 0.337 (0.692)	Data Time 0.001 (0.066)	Loss 2.6743 (2.4827)	Entropy 1.04097 (1.04207)	Top-1 acc 58.984 (64.312)	Top-5 acc 78.125 (84.022)	lr 0.01048
Train [67][650/3239]	Time 0.229 (0.689)	Data Time 0.001 (0.065)	Loss 2.4616 (2.4837)	Entropy 1.04093 (1.04205)	Top-1 acc 64.062 (64.289)	Top-5 acc 84.375 (83.995)	lr 0.01048
Train [67][660/3239]	Time 0.242 (0.685)	Data Time 0.001 (0.064)	Loss 2.4629 (2.4834)	Entropy 1.04092 (1.04204)	Top-1 acc 65.625 (64.294)	Top-5 acc 84.766 (84.000)	lr 0.01048
Train [67][670/3239]	Time 2.538 (0.682)	Data Time 0.001 (0.063)	Loss 2.4491 (2.4832)	Entropy 1.04092 (1.04202)	Top-1 acc 64.844 (64.290)	Top-5 acc 84.375 (84.005)	lr 0.01048
Train [67][680/3239]	Time 0.239 (0.675)	Data Time 0.001 (0.062)	Loss 2.5334 (2.4834)	Entropy 1.04089 (1.04200)	Top-1 acc 67.578 (64.283)	Top-5 acc 81.641 (84.003)	lr 0.01048
Train [67][690/3239]	Time 0.237 (0.673)	Data Time 0.001 (0.061)	Loss 2.5728 (2.4845)	Entropy 1.04081 (1.04199)	Top-1 acc 64.062 (64.258)	Top-5 acc 82.812 (83.990)	lr 0.01048
Train [67][700/3239]	Time 0.235 (0.670)	Data Time 0.001 (0.060)	Loss 2.4039 (2.4851)	Entropy 1.04074 (1.04197)	Top-1 acc 64.453 (64.244)	Top-5 acc 82.031 (83.971)	lr 0.01047
Train [67][710/3239]	Time 0.235 (0.667)	Data Time 0.001 (0.059)	Loss 2.5248 (2.4852)	Entropy 1.04068 (1.04195)	Top-1 acc 63.281 (64.245)	Top-5 acc 82.422 (83.971)	lr 0.01047
Train [67][720/3239]	Time 0.232 (0.664)	Data Time 0.001 (0.059)	Loss 2.4446 (2.4854)	Entropy 1.04068 (1.04193)	Top-1 acc 61.719 (64.240)	Top-5 acc 87.109 (83.966)	lr 0.01047
Train [67][730/3239]	Time 0.319 (0.662)	Data Time 0.002 (0.058)	Loss 2.4746 (2.4863)	Entropy 1.04068 (1.04192)	Top-1 acc 64.453 (64.226)	Top-5 acc 84.375 (83.952)	lr 0.01047
Train [67][740/3239]	Time 0.223 (0.659)	Data Time 0.001 (0.057)	Loss 2.4866 (2.4862)	Entropy 1.04062 (1.04190)	Top-1 acc 66.016 (64.233)	Top-5 acc 83.594 (83.954)	lr 0.01047
Train [67][750/3239]	Time 0.234 (0.657)	Data Time 0.001 (0.056)	Loss 2.3411 (2.4861)	Entropy 1.04067 (1.04188)	Top-1 acc 68.750 (64.253)	Top-5 acc 87.109 (83.963)	lr 0.01047
Train [67][760/3239]	Time 0.215 (0.654)	Data Time 0.001 (0.056)	Loss 2.5184 (2.4865)	Entropy 1.04067 (1.04187)	Top-1 acc 65.625 (64.251)	Top-5 acc 86.328 (83.969)	lr 0.01047
Train [67][770/3239]	Time 0.411 (0.722)	Data Time 0.004 (0.055)	Loss 2.4603 (2.4867)	Entropy 1.04064 (1.04185)	Top-1 acc 65.625 (64.239)	Top-5 acc 86.328 (83.979)	lr 0.01047
Train [67][780/3239]	Time 2.530 (0.719)	Data Time 0.002 (0.054)	Loss 2.4656 (2.4865)	Entropy 1.04064 (1.04184)	Top-1 acc 67.578 (64.248)	Top-5 acc 85.156 (83.986)	lr 0.01047
Train [67][790/3239]	Time 0.236 (0.713)	Data Time 0.002 (0.054)	Loss 2.6367 (2.4870)	Entropy 1.04062 (1.04182)	Top-1 acc 62.500 (64.246)	Top-5 acc 80.469 (83.978)	lr 0.01047
Train [67][800/3239]	Time 0.223 (0.710)	Data Time 0.001 (0.053)	Loss 2.5493 (2.4866)	Entropy 1.04054 (1.04180)	Top-1 acc 61.719 (64.264)	Top-5 acc 83.203 (83.981)	lr 0.01046
Train [67][810/3239]	Time 0.229 (0.707)	Data Time 0.001 (0.052)	Loss 2.4980 (2.4865)	Entropy 1.04058 (1.04179)	Top-1 acc 59.766 (64.272)	Top-5 acc 82.812 (83.979)	lr 0.01046
Train [67][820/3239]	Time 0.328 (0.704)	Data Time 0.001 (0.052)	Loss 2.6021 (2.4867)	Entropy 1.04054 (1.04177)	Top-1 acc 62.500 (64.268)	Top-5 acc 81.250 (83.977)	lr 0.01046
Train [67][830/3239]	Time 0.243 (0.701)	Data Time 0.001 (0.051)	Loss 2.4419 (2.4871)	Entropy 1.04050 (1.04176)	Top-1 acc 66.406 (64.268)	Top-5 acc 85.156 (83.973)	lr 0.01046
Train [67][840/3239]	Time 0.221 (0.699)	Data Time 0.001 (0.051)	Loss 2.4362 (2.4868)	Entropy 1.04048 (1.04174)	Top-1 acc 62.891 (64.273)	Top-5 acc 85.156 (83.989)	lr 0.01046
Train [67][850/3239]	Time 0.234 (0.696)	Data Time 0.001 (0.050)	Loss 2.4778 (2.4860)	Entropy 1.04051 (1.04173)	Top-1 acc 62.891 (64.285)	Top-5 acc 85.938 (84.005)	lr 0.01046
Train [67][860/3239]	Time 0.245 (0.693)	Data Time 0.001 (0.049)	Loss 2.3487 (2.4866)	Entropy 1.04054 (1.04171)	Top-1 acc 63.672 (64.272)	Top-5 acc 88.672 (84.005)	lr 0.01046
Train [67][870/3239]	Time 0.253 (0.691)	Data Time 0.001 (0.049)	Loss 2.5775 (2.4865)	Entropy 1.04042 (1.04170)	Top-1 acc 64.062 (64.271)	Top-5 acc 83.203 (84.014)	lr 0.01046
Train [67][880/3239]	Time 0.221 (0.688)	Data Time 0.001 (0.048)	Loss 2.5456 (2.4863)	Entropy 1.04034 (1.04169)	Top-1 acc 61.719 (64.264)	Top-5 acc 82.812 (84.016)	lr 0.01046
Train [67][890/3239]	Time 2.405 (0.686)	Data Time 0.001 (0.048)	Loss 2.4626 (2.4863)	Entropy 1.04034 (1.04167)	Top-1 acc 63.672 (64.272)	Top-5 acc 84.375 (84.021)	lr 0.01046
Train [67][900/3239]	Time 0.235 (0.681)	Data Time 0.001 (0.047)	Loss 2.5366 (2.4861)	Entropy 1.04036 (1.04166)	Top-1 acc 62.891 (64.285)	Top-5 acc 83.984 (84.022)	lr 0.01045
Train [67][910/3239]	Time 0.336 (0.679)	Data Time 0.001 (0.047)	Loss 2.6569 (2.4862)	Entropy 1.04028 (1.04164)	Top-1 acc 60.156 (64.276)	Top-5 acc 82.422 (84.024)	lr 0.01045
Train [67][920/3239]	Time 0.245 (0.676)	Data Time 0.001 (0.046)	Loss 2.3109 (2.4864)	Entropy 1.04025 (1.04163)	Top-1 acc 68.750 (64.267)	Top-5 acc 87.109 (84.015)	lr 0.01045
Train [67][930/3239]	Time 0.229 (0.674)	Data Time 0.001 (0.046)	Loss 2.8239 (2.4871)	Entropy 1.04025 (1.04161)	Top-1 acc 58.203 (64.245)	Top-5 acc 79.297 (84.012)	lr 0.01045
Train [67][940/3239]	Time 0.262 (0.672)	Data Time 0.001 (0.045)	Loss 2.4945 (2.4873)	Entropy 1.04021 (1.04160)	Top-1 acc 64.062 (64.243)	Top-5 acc 85.547 (84.006)	lr 0.01045
Train [67][950/3239]	Time 0.224 (0.670)	Data Time 0.001 (0.045)	Loss 2.5120 (2.4873)	Entropy 1.04021 (1.04158)	Top-1 acc 64.062 (64.237)	Top-5 acc 82.812 (84.005)	lr 0.01045
Train [67][960/3239]	Time 0.248 (0.668)	Data Time 0.001 (0.044)	Loss 2.4089 (2.4871)	Entropy 1.04014 (1.04157)	Top-1 acc 69.141 (64.240)	Top-5 acc 85.938 (84.010)	lr 0.01045
Train [67][970/3239]	Time 0.236 (0.666)	Data Time 0.001 (0.044)	Loss 2.6786 (2.4877)	Entropy 1.04013 (1.04155)	Top-1 acc 60.156 (64.226)	Top-5 acc 80.859 (84.002)	lr 0.01045
Train [67][980/3239]	Time 0.231 (0.664)	Data Time 0.001 (0.044)	Loss 2.5955 (2.4882)	Entropy 1.03994 (1.04154)	Top-1 acc 58.594 (64.204)	Top-5 acc 82.422 (83.997)	lr 0.01045
Train [67][990/3239]	Time 0.234 (0.662)	Data Time 0.001 (0.043)	Loss 2.3416 (2.4880)	Entropy 1.03991 (1.04152)	Top-1 acc 67.969 (64.212)	Top-5 acc 86.328 (83.996)	lr 0.01045
Train [67][1000/3239]	Time 2.489 (0.660)	Data Time 0.001 (0.043)	Loss 2.3516 (2.4877)	Entropy 1.03991 (1.04151)	Top-1 acc 67.969 (64.230)	Top-5 acc 86.328 (84.003)	lr 0.01044
Train [67][1010/3239]	Time 0.250 (0.656)	Data Time 0.001 (0.042)	Loss 2.5865 (2.4878)	Entropy 1.03991 (1.04149)	Top-1 acc 62.891 (64.222)	Top-5 acc 80.859 (84.004)	lr 0.01044
Train [67][1020/3239]	Time 0.236 (0.654)	Data Time 0.001 (0.042)	Loss 2.3899 (2.4875)	Entropy 1.03987 (1.04147)	Top-1 acc 62.891 (64.230)	Top-5 acc 86.328 (84.015)	lr 0.01044
Train [67][1030/3239]	Time 0.246 (0.652)	Data Time 0.001 (0.041)	Loss 2.4876 (2.4877)	Entropy 1.03984 (1.04146)	Top-1 acc 67.578 (64.221)	Top-5 acc 85.547 (84.015)	lr 0.01044
Train [67][1040/3239]	Time 0.225 (0.650)	Data Time 0.001 (0.041)	Loss 2.7637 (2.4880)	Entropy 1.03981 (1.04144)	Top-1 acc 58.594 (64.213)	Top-5 acc 78.125 (84.016)	lr 0.01044
Train [67][1050/3239]	Time 0.226 (0.649)	Data Time 0.001 (0.041)	Loss 2.4818 (2.4882)	Entropy 1.03982 (1.04143)	Top-1 acc 66.797 (64.200)	Top-5 acc 82.422 (84.017)	lr 0.01044
Train [67][1060/3239]	Time 0.226 (0.647)	Data Time 0.001 (0.040)	Loss 2.6566 (2.4884)	Entropy 1.03984 (1.04141)	Top-1 acc 57.812 (64.194)	Top-5 acc 80.859 (84.011)	lr 0.01044
Train [67][1070/3239]	Time 0.214 (0.645)	Data Time 0.001 (0.040)	Loss 2.6295 (2.4887)	Entropy 1.03978 (1.04140)	Top-1 acc 60.547 (64.183)	Top-5 acc 80.469 (84.007)	lr 0.01044
Train [67][1080/3239]	Time 0.236 (0.644)	Data Time 0.001 (0.040)	Loss 2.4303 (2.4886)	Entropy 1.03978 (1.04138)	Top-1 acc 64.844 (64.176)	Top-5 acc 86.328 (84.015)	lr 0.01044
Train [67][1090/3239]	Time 0.335 (0.642)	Data Time 0.001 (0.039)	Loss 2.2853 (2.4882)	Entropy 1.03971 (1.04137)	Top-1 acc 66.797 (64.185)	Top-5 acc 87.500 (84.022)	lr 0.01044
Train [67][1100/3239]	Time 0.220 (0.641)	Data Time 0.001 (0.039)	Loss 2.6585 (2.4887)	Entropy 1.03970 (1.04135)	Top-1 acc 58.984 (64.167)	Top-5 acc 79.297 (84.017)	lr 0.01043
Train [67][1110/3239]	Time 2.437 (0.639)	Data Time 0.001 (0.039)	Loss 2.4762 (2.4886)	Entropy 1.03970 (1.04134)	Top-1 acc 65.234 (64.166)	Top-5 acc 85.156 (84.021)	lr 0.01043
Train [67][1120/3239]	Time 0.211 (0.635)	Data Time 0.001 (0.038)	Loss 2.4279 (2.4883)	Entropy 1.03970 (1.04132)	Top-1 acc 62.891 (64.167)	Top-5 acc 86.719 (84.024)	lr 0.01043
Train [67][1130/3239]	Time 0.225 (0.634)	Data Time 0.001 (0.038)	Loss 2.5738 (2.4882)	Entropy 1.03970 (1.04131)	Top-1 acc 64.453 (64.163)	Top-5 acc 82.422 (84.025)	lr 0.01043
Train [67][1140/3239]	Time 0.245 (0.682)	Data Time 0.002 (0.038)	Loss 2.5833 (2.4883)	Entropy 1.03965 (1.04129)	Top-1 acc 61.328 (64.157)	Top-5 acc 81.250 (84.028)	lr 0.01043
Train [67][1150/3239]	Time 0.213 (0.680)	Data Time 0.002 (0.037)	Loss 2.4705 (2.4887)	Entropy 1.03957 (1.04128)	Top-1 acc 65.234 (64.147)	Top-5 acc 84.375 (84.021)	lr 0.01043
Train [67][1160/3239]	Time 0.217 (0.678)	Data Time 0.001 (0.037)	Loss 2.5710 (2.4888)	Entropy 1.03952 (1.04126)	Top-1 acc 64.062 (64.141)	Top-5 acc 80.469 (84.021)	lr 0.01043
Train [67][1170/3239]	Time 0.224 (0.677)	Data Time 0.001 (0.037)	Loss 2.5649 (2.4885)	Entropy 1.03945 (1.04125)	Top-1 acc 62.500 (64.144)	Top-5 acc 82.812 (84.029)	lr 0.01043
Train [67][1180/3239]	Time 0.336 (0.675)	Data Time 0.001 (0.036)	Loss 2.4301 (2.4883)	Entropy 1.03936 (1.04123)	Top-1 acc 64.062 (64.147)	Top-5 acc 83.203 (84.027)	lr 0.01043
Train [67][1190/3239]	Time 0.213 (0.673)	Data Time 0.001 (0.036)	Loss 2.5349 (2.4883)	Entropy 1.03936 (1.04122)	Top-1 acc 63.281 (64.155)	Top-5 acc 85.156 (84.026)	lr 0.01043
Train [67][1200/3239]	Time 0.224 (0.671)	Data Time 0.001 (0.036)	Loss 2.3175 (2.4880)	Entropy 1.03931 (1.04120)	Top-1 acc 67.969 (64.154)	Top-5 acc 88.281 (84.035)	lr 0.01042
Train [67][1210/3239]	Time 0.251 (0.670)	Data Time 0.001 (0.036)	Loss 2.5075 (2.4884)	Entropy 1.03929 (1.04119)	Top-1 acc 61.328 (64.151)	Top-5 acc 86.328 (84.026)	lr 0.01042
Train [67][1220/3239]	Time 2.466 (0.668)	Data Time 0.001 (0.035)	Loss 2.3406 (2.4883)	Entropy 1.03929 (1.04117)	Top-1 acc 72.656 (64.156)	Top-5 acc 86.719 (84.029)	lr 0.01042
Train [67][1230/3239]	Time 0.229 (0.665)	Data Time 0.001 (0.035)	Loss 2.4943 (2.4883)	Entropy 1.03926 (1.04116)	Top-1 acc 62.109 (64.155)	Top-5 acc 83.984 (84.026)	lr 0.01042
Train [67][1240/3239]	Time 0.223 (0.663)	Data Time 0.001 (0.035)	Loss 2.3991 (2.4881)	Entropy 1.03924 (1.04114)	Top-1 acc 66.406 (64.158)	Top-5 acc 85.938 (84.023)	lr 0.01042
Train [67][1250/3239]	Time 0.224 (0.661)	Data Time 0.001 (0.034)	Loss 2.4528 (2.4882)	Entropy 1.03921 (1.04112)	Top-1 acc 64.062 (64.156)	Top-5 acc 85.547 (84.027)	lr 0.01042
Train [67][1260/3239]	Time 0.242 (0.660)	Data Time 0.002 (0.034)	Loss 2.5871 (2.4883)	Entropy 1.03922 (1.04111)	Top-1 acc 65.234 (64.156)	Top-5 acc 82.422 (84.026)	lr 0.01042
Train [67][1270/3239]	Time 0.349 (0.659)	Data Time 0.001 (0.034)	Loss 2.5192 (2.4883)	Entropy 1.03922 (1.04109)	Top-1 acc 63.281 (64.153)	Top-5 acc 83.203 (84.024)	lr 0.01042
Train [67][1280/3239]	Time 0.228 (0.657)	Data Time 0.001 (0.034)	Loss 2.4443 (2.4878)	Entropy 1.03912 (1.04108)	Top-1 acc 67.969 (64.166)	Top-5 acc 83.594 (84.033)	lr 0.01042
Train [67][1290/3239]	Time 0.224 (0.656)	Data Time 0.001 (0.033)	Loss 2.3778 (2.4877)	Entropy 1.03911 (1.04106)	Top-1 acc 66.016 (64.171)	Top-5 acc 85.156 (84.032)	lr 0.01042
Train [67][1300/3239]	Time 0.212 (0.654)	Data Time 0.001 (0.033)	Loss 2.5673 (2.4876)	Entropy 1.03911 (1.04105)	Top-1 acc 59.766 (64.177)	Top-5 acc 85.938 (84.035)	lr 0.01041
Train [67][1310/3239]	Time 0.234 (0.653)	Data Time 0.001 (0.033)	Loss 2.5734 (2.4875)	Entropy 1.03908 (1.04103)	Top-1 acc 61.719 (64.178)	Top-5 acc 82.422 (84.044)	lr 0.01041
Train [67][1320/3239]	Time 0.358 (0.651)	Data Time 0.001 (0.033)	Loss 2.3275 (2.4871)	Entropy 1.03915 (1.04102)	Top-1 acc 67.578 (64.188)	Top-5 acc 85.547 (84.049)	lr 0.01041
Train [67][1330/3239]	Time 2.576 (0.650)	Data Time 0.001 (0.033)	Loss 2.4170 (2.4874)	Entropy 1.03915 (1.04101)	Top-1 acc 58.203 (64.175)	Top-5 acc 89.062 (84.045)	lr 0.01041
Train [67][1340/3239]	Time 0.231 (0.647)	Data Time 0.001 (0.032)	Loss 2.2857 (2.4872)	Entropy 1.03916 (1.04099)	Top-1 acc 69.141 (64.183)	Top-5 acc 88.672 (84.051)	lr 0.01041
Train [67][1350/3239]	Time 0.219 (0.646)	Data Time 0.001 (0.032)	Loss 2.5090 (2.4874)	Entropy 1.03906 (1.04098)	Top-1 acc 64.062 (64.180)	Top-5 acc 84.766 (84.050)	lr 0.01041
Train [67][1360/3239]	Time 0.229 (0.644)	Data Time 0.001 (0.032)	Loss 2.5571 (2.4878)	Entropy 1.03900 (1.04096)	Top-1 acc 64.062 (64.172)	Top-5 acc 82.422 (84.045)	lr 0.01041
Train [67][1370/3239]	Time 0.244 (0.643)	Data Time 0.001 (0.032)	Loss 2.5783 (2.4882)	Entropy 1.03898 (1.04095)	Top-1 acc 60.156 (64.156)	Top-5 acc 84.375 (84.034)	lr 0.01041
Train [67][1380/3239]	Time 0.225 (0.642)	Data Time 0.001 (0.031)	Loss 2.5983 (2.4881)	Entropy 1.03893 (1.04093)	Top-1 acc 59.375 (64.156)	Top-5 acc 81.250 (84.035)	lr 0.01041
Train [67][1390/3239]	Time 0.261 (0.641)	Data Time 0.001 (0.031)	Loss 2.5031 (2.4881)	Entropy 1.03893 (1.04092)	Top-1 acc 66.406 (64.155)	Top-5 acc 83.594 (84.035)	lr 0.01041
Train [67][1400/3239]	Time 0.217 (0.639)	Data Time 0.001 (0.031)	Loss 2.4346 (2.4880)	Entropy 1.03891 (1.04091)	Top-1 acc 67.578 (64.158)	Top-5 acc 83.984 (84.040)	lr 0.01041
Train [67][1410/3239]	Time 0.321 (0.638)	Data Time 0.001 (0.031)	Loss 2.7070 (2.4880)	Entropy 1.03882 (1.04089)	Top-1 acc 57.812 (64.160)	Top-5 acc 78.516 (84.038)	lr 0.01040
Train [67][1420/3239]	Time 0.223 (0.637)	Data Time 0.001 (0.031)	Loss 2.5778 (2.4879)	Entropy 1.03880 (1.04088)	Top-1 acc 62.109 (64.166)	Top-5 acc 80.469 (84.041)	lr 0.01040
Train [67][1430/3239]	Time 0.221 (0.636)	Data Time 0.001 (0.030)	Loss 2.5093 (2.4881)	Entropy 1.03880 (1.04086)	Top-1 acc 63.281 (64.152)	Top-5 acc 84.375 (84.038)	lr 0.01040
Train [67][1440/3239]	Time 2.512 (0.634)	Data Time 0.001 (0.030)	Loss 2.5138 (2.4879)	Entropy 1.03880 (1.04085)	Top-1 acc 61.719 (64.163)	Top-5 acc 82.031 (84.037)	lr 0.01040
Train [67][1450/3239]	Time 0.219 (0.632)	Data Time 0.001 (0.030)	Loss 2.5333 (2.4877)	Entropy 1.03879 (1.04083)	Top-1 acc 61.719 (64.170)	Top-5 acc 83.203 (84.041)	lr 0.01040
Train [67][1460/3239]	Time 0.240 (0.631)	Data Time 0.001 (0.030)	Loss 2.4515 (2.4877)	Entropy 1.03872 (1.04082)	Top-1 acc 65.234 (64.161)	Top-5 acc 83.984 (84.043)	lr 0.01040
Train [67][1470/3239]	Time 0.223 (0.630)	Data Time 0.001 (0.030)	Loss 2.5328 (2.4878)	Entropy 1.03871 (1.04081)	Top-1 acc 65.625 (64.158)	Top-5 acc 82.031 (84.043)	lr 0.01040
Train [67][1480/3239]	Time 0.233 (0.628)	Data Time 0.001 (0.029)	Loss 2.5430 (2.4879)	Entropy 1.03863 (1.04079)	Top-1 acc 63.672 (64.163)	Top-5 acc 83.203 (84.040)	lr 0.01040
Train [67][1490/3239]	Time 0.218 (0.627)	Data Time 0.001 (0.029)	Loss 2.4081 (2.4881)	Entropy 1.03859 (1.04078)	Top-1 acc 68.359 (64.160)	Top-5 acc 82.422 (84.030)	lr 0.01040
Train [67][1500/3239]	Time 0.466 (0.660)	Data Time 0.002 (0.029)	Loss 2.3792 (2.4880)	Entropy 1.03859 (1.04076)	Top-1 acc 67.188 (64.168)	Top-5 acc 86.328 (84.033)	lr 0.01040
Train [67][1510/3239]	Time 0.238 (0.660)	Data Time 0.002 (0.029)	Loss 2.4689 (2.4879)	Entropy 1.03857 (1.04075)	Top-1 acc 62.891 (64.165)	Top-5 acc 84.375 (84.036)	lr 0.01039
Train [67][1520/3239]	Time 0.229 (0.658)	Data Time 0.001 (0.029)	Loss 2.5938 (2.4883)	Entropy 1.03858 (1.04073)	Top-1 acc 60.156 (64.153)	Top-5 acc 83.203 (84.030)	lr 0.01039
Train [67][1530/3239]	Time 0.221 (0.657)	Data Time 0.002 (0.029)	Loss 2.6788 (2.4879)	Entropy 1.03854 (1.04072)	Top-1 acc 59.766 (64.169)	Top-5 acc 80.469 (84.039)	lr 0.01039
Train [67][1540/3239]	Time 0.305 (0.656)	Data Time 0.001 (0.028)	Loss 2.3019 (2.4876)	Entropy 1.03850 (1.04070)	Top-1 acc 66.406 (64.169)	Top-5 acc 85.938 (84.042)	lr 0.01039
Train [67][1550/3239]	Time 2.447 (0.655)	Data Time 0.002 (0.028)	Loss 2.4007 (2.4875)	Entropy 1.03850 (1.04069)	Top-1 acc 64.453 (64.172)	Top-5 acc 85.547 (84.044)	lr 0.01039
Train [67][1560/3239]	Time 0.218 (0.652)	Data Time 0.001 (0.028)	Loss 2.4789 (2.4878)	Entropy 1.03848 (1.04068)	Top-1 acc 62.891 (64.164)	Top-5 acc 82.812 (84.037)	lr 0.01039
Train [67][1570/3239]	Time 0.251 (0.651)	Data Time 0.001 (0.028)	Loss 2.4539 (2.4875)	Entropy 1.03845 (1.04066)	Top-1 acc 64.062 (64.169)	Top-5 acc 84.766 (84.044)	lr 0.01039
Train [67][1580/3239]	Time 0.245 (0.650)	Data Time 0.002 (0.028)	Loss 2.6660 (2.4874)	Entropy 1.03847 (1.04065)	Top-1 acc 64.062 (64.175)	Top-5 acc 82.812 (84.049)	lr 0.01039
Train [67][1590/3239]	Time 0.336 (0.649)	Data Time 0.001 (0.028)	Loss 2.4633 (2.4873)	Entropy 1.03844 (1.04063)	Top-1 acc 61.328 (64.179)	Top-5 acc 83.594 (84.050)	lr 0.01039
Train [67][1600/3239]	Time 0.227 (0.648)	Data Time 0.001 (0.027)	Loss 2.1973 (2.4871)	Entropy 1.03842 (1.04062)	Top-1 acc 70.312 (64.184)	Top-5 acc 89.844 (84.055)	lr 0.01039
Train [67][1610/3239]	Time 0.224 (0.647)	Data Time 0.001 (0.027)	Loss 2.6585 (2.4870)	Entropy 1.03841 (1.04061)	Top-1 acc 62.500 (64.183)	Top-5 acc 80.078 (84.057)	lr 0.01038
Train [67][1620/3239]	Time 0.216 (0.645)	Data Time 0.001 (0.027)	Loss 2.3937 (2.4868)	Entropy 1.03841 (1.04059)	Top-1 acc 68.750 (64.185)	Top-5 acc 85.156 (84.060)	lr 0.01038
Train [67][1630/3239]	Time 0.255 (0.644)	Data Time 0.001 (0.027)	Loss 2.4319 (2.4870)	Entropy 1.03829 (1.04058)	Top-1 acc 66.406 (64.183)	Top-5 acc 82.812 (84.053)	lr 0.01038
Train [67][1640/3239]	Time 0.219 (0.643)	Data Time 0.001 (0.027)	Loss 2.4533 (2.4869)	Entropy 1.03841 (1.04057)	Top-1 acc 65.234 (64.188)	Top-5 acc 83.203 (84.050)	lr 0.01038
Train [67][1650/3239]	Time 0.279 (0.642)	Data Time 0.001 (0.027)	Loss 2.4647 (2.4872)	Entropy 1.03837 (1.04055)	Top-1 acc 66.406 (64.186)	Top-5 acc 85.156 (84.044)	lr 0.01038
Train [67][1660/3239]	Time 2.421 (0.641)	Data Time 0.001 (0.026)	Loss 2.4452 (2.4874)	Entropy 1.03837 (1.04054)	Top-1 acc 67.188 (64.187)	Top-5 acc 86.719 (84.043)	lr 0.01038
Train [67][1670/3239]	Time 0.246 (0.639)	Data Time 0.001 (0.026)	Loss 2.4412 (2.4873)	Entropy 1.03840 (1.04053)	Top-1 acc 66.797 (64.197)	Top-5 acc 85.547 (84.044)	lr 0.01038
Train [67][1680/3239]	Time 0.234 (0.638)	Data Time 0.001 (0.026)	Loss 2.5100 (2.4874)	Entropy 1.03836 (1.04051)	Top-1 acc 62.891 (64.194)	Top-5 acc 83.984 (84.043)	lr 0.01038
Train [67][1690/3239]	Time 0.243 (0.637)	Data Time 0.001 (0.026)	Loss 2.5001 (2.4879)	Entropy 1.03843 (1.04050)	Top-1 acc 60.547 (64.178)	Top-5 acc 85.938 (84.034)	lr 0.01038
Train [67][1700/3239]	Time 0.218 (0.636)	Data Time 0.001 (0.026)	Loss 2.3109 (2.4874)	Entropy 1.03841 (1.04049)	Top-1 acc 67.188 (64.189)	Top-5 acc 89.453 (84.047)	lr 0.01038
Train [67][1710/3239]	Time 0.243 (0.635)	Data Time 0.001 (0.026)	Loss 2.4065 (2.4871)	Entropy 1.03842 (1.04048)	Top-1 acc 64.062 (64.191)	Top-5 acc 86.719 (84.052)	lr 0.01037
Train [67][1720/3239]	Time 0.333 (0.634)	Data Time 0.001 (0.026)	Loss 2.6557 (2.4878)	Entropy 1.03842 (1.04047)	Top-1 acc 59.766 (64.175)	Top-5 acc 81.250 (84.042)	lr 0.01037
Train [67][1730/3239]	Time 0.224 (0.633)	Data Time 0.001 (0.025)	Loss 2.6018 (2.4879)	Entropy 1.03843 (1.04045)	Top-1 acc 62.500 (64.175)	Top-5 acc 82.812 (84.040)	lr 0.01037
Train [67][1740/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.025)	Loss 2.4191 (2.4880)	Entropy 1.03841 (1.04044)	Top-1 acc 65.625 (64.168)	Top-5 acc 86.328 (84.038)	lr 0.01037
Train [67][1750/3239]	Time 0.254 (0.631)	Data Time 0.001 (0.025)	Loss 2.3705 (2.4879)	Entropy 1.03835 (1.04043)	Top-1 acc 66.797 (64.168)	Top-5 acc 83.984 (84.038)	lr 0.01037
Train [67][1760/3239]	Time 0.280 (0.630)	Data Time 0.001 (0.025)	Loss 2.6427 (2.4879)	Entropy 1.03836 (1.04042)	Top-1 acc 61.328 (64.172)	Top-5 acc 80.859 (84.035)	lr 0.01037
Train [67][1770/3239]	Time 2.625 (0.629)	Data Time 0.001 (0.025)	Loss 2.5624 (2.4884)	Entropy 1.03836 (1.04041)	Top-1 acc 61.719 (64.158)	Top-5 acc 83.203 (84.030)	lr 0.01037
Train [67][1780/3239]	Time 0.213 (0.627)	Data Time 0.001 (0.025)	Loss 2.5403 (2.4884)	Entropy 1.03825 (1.04039)	Top-1 acc 57.031 (64.153)	Top-5 acc 84.766 (84.033)	lr 0.01037
Train [67][1790/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.025)	Loss 2.4882 (2.4886)	Entropy 1.03832 (1.04038)	Top-1 acc 64.844 (64.144)	Top-5 acc 82.812 (84.031)	lr 0.01037
Train [67][1800/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.024)	Loss 2.4803 (2.4887)	Entropy 1.03831 (1.04037)	Top-1 acc 65.625 (64.141)	Top-5 acc 85.156 (84.030)	lr 0.01037
Train [67][1810/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.024)	Loss 2.4763 (2.4885)	Entropy 1.03826 (1.04036)	Top-1 acc 62.891 (64.144)	Top-5 acc 82.422 (84.028)	lr 0.01036
Train [67][1820/3239]	Time 0.214 (0.624)	Data Time 0.001 (0.024)	Loss 2.4306 (2.4883)	Entropy 1.03826 (1.04035)	Top-1 acc 62.891 (64.146)	Top-5 acc 84.375 (84.030)	lr 0.01036
Train [67][1830/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.024)	Loss 2.4371 (2.4885)	Entropy 1.03821 (1.04034)	Top-1 acc 64.453 (64.136)	Top-5 acc 87.500 (84.029)	lr 0.01036
Train [67][1840/3239]	Time 0.207 (0.622)	Data Time 0.001 (0.024)	Loss 2.5355 (2.4889)	Entropy 1.03818 (1.04033)	Top-1 acc 67.188 (64.129)	Top-5 acc 83.594 (84.020)	lr 0.01036
Train [67][1850/3239]	Time 0.237 (0.621)	Data Time 0.002 (0.024)	Loss 2.7080 (2.4889)	Entropy 1.03825 (1.04031)	Top-1 acc 58.203 (64.127)	Top-5 acc 79.297 (84.023)	lr 0.01036
Train [67][1860/3239]	Time 0.467 (0.649)	Data Time 0.003 (0.024)	Loss 2.7200 (2.4890)	Entropy 1.03829 (1.04030)	Top-1 acc 60.938 (64.122)	Top-5 acc 81.250 (84.023)	lr 0.01036
Train [67][1870/3239]	Time 0.223 (0.648)	Data Time 0.002 (0.024)	Loss 2.4865 (2.4889)	Entropy 1.03819 (1.04029)	Top-1 acc 64.062 (64.119)	Top-5 acc 83.984 (84.026)	lr 0.01036
Train [67][1880/3239]	Time 2.426 (0.647)	Data Time 0.002 (0.024)	Loss 2.4464 (2.4884)	Entropy 1.03819 (1.04028)	Top-1 acc 65.625 (64.129)	Top-5 acc 84.375 (84.032)	lr 0.01036
Train [67][1890/3239]	Time 0.251 (0.645)	Data Time 0.002 (0.023)	Loss 2.5108 (2.4886)	Entropy 1.03819 (1.04027)	Top-1 acc 67.188 (64.125)	Top-5 acc 82.812 (84.030)	lr 0.01036
Train [67][1900/3239]	Time 0.224 (0.644)	Data Time 0.001 (0.023)	Loss 2.4098 (2.4887)	Entropy 1.03812 (1.04026)	Top-1 acc 65.625 (64.120)	Top-5 acc 85.938 (84.029)	lr 0.01036
Train [67][1910/3239]	Time 0.223 (0.644)	Data Time 0.001 (0.023)	Loss 2.4233 (2.4885)	Entropy 1.03805 (1.04025)	Top-1 acc 63.672 (64.126)	Top-5 acc 86.328 (84.037)	lr 0.01035
Train [67][1920/3239]	Time 0.225 (0.643)	Data Time 0.001 (0.023)	Loss 2.5996 (2.4887)	Entropy 1.03807 (1.04024)	Top-1 acc 61.328 (64.122)	Top-5 acc 80.078 (84.030)	lr 0.01035
Train [67][1930/3239]	Time 0.211 (0.642)	Data Time 0.001 (0.023)	Loss 2.7025 (2.4889)	Entropy 1.03806 (1.04022)	Top-1 acc 58.984 (64.121)	Top-5 acc 80.469 (84.022)	lr 0.01035
Train [67][1940/3239]	Time 0.234 (0.641)	Data Time 0.001 (0.023)	Loss 2.4646 (2.4891)	Entropy 1.03791 (1.04021)	Top-1 acc 64.844 (64.119)	Top-5 acc 83.984 (84.018)	lr 0.01035
Train [67][1950/3239]	Time 0.220 (0.640)	Data Time 0.001 (0.023)	Loss 2.5620 (2.4892)	Entropy 1.03792 (1.04020)	Top-1 acc 62.500 (64.117)	Top-5 acc 81.250 (84.018)	lr 0.01035
Train [67][1960/3239]	Time 0.203 (0.639)	Data Time 0.001 (0.023)	Loss 2.5502 (2.4890)	Entropy 1.03782 (1.04019)	Top-1 acc 63.281 (64.124)	Top-5 acc 81.641 (84.024)	lr 0.01035
Train [67][1970/3239]	Time 0.257 (0.638)	Data Time 0.001 (0.023)	Loss 2.4250 (2.4892)	Entropy 1.03774 (1.04018)	Top-1 acc 69.141 (64.124)	Top-5 acc 85.938 (84.024)	lr 0.01035
Train [67][1980/3239]	Time 0.256 (0.637)	Data Time 0.001 (0.022)	Loss 2.4203 (2.4891)	Entropy 1.03771 (1.04016)	Top-1 acc 66.016 (64.128)	Top-5 acc 86.328 (84.024)	lr 0.01035
Train [67][1990/3239]	Time 2.397 (0.637)	Data Time 0.001 (0.022)	Loss 2.5870 (2.4892)	Entropy 1.03771 (1.04015)	Top-1 acc 61.328 (64.130)	Top-5 acc 82.812 (84.022)	lr 0.01035
Train [67][2000/3239]	Time 0.405 (0.635)	Data Time 0.002 (0.022)	Loss 2.5125 (2.4894)	Entropy 1.03771 (1.04014)	Top-1 acc 64.844 (64.127)	Top-5 acc 82.031 (84.020)	lr 0.01035
Train [67][2010/3239]	Time 0.276 (0.634)	Data Time 0.001 (0.022)	Loss 2.2978 (2.4894)	Entropy 1.03771 (1.04013)	Top-1 acc 69.922 (64.127)	Top-5 acc 87.891 (84.020)	lr 0.01034
Train [67][2020/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.022)	Loss 2.6272 (2.4897)	Entropy 1.03765 (1.04012)	Top-1 acc 58.594 (64.122)	Top-5 acc 80.469 (84.013)	lr 0.01034
Train [67][2030/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.022)	Loss 2.3710 (2.4896)	Entropy 1.03766 (1.04010)	Top-1 acc 68.359 (64.126)	Top-5 acc 87.891 (84.013)	lr 0.01034
Train [67][2040/3239]	Time 0.223 (0.631)	Data Time 0.001 (0.022)	Loss 2.4233 (2.4898)	Entropy 1.03759 (1.04009)	Top-1 acc 62.891 (64.119)	Top-5 acc 83.984 (84.008)	lr 0.01034
Train [67][2050/3239]	Time 0.231 (0.631)	Data Time 0.001 (0.022)	Loss 2.5078 (2.4898)	Entropy 1.03755 (1.04008)	Top-1 acc 62.109 (64.118)	Top-5 acc 83.203 (84.009)	lr 0.01034
Train [67][2060/3239]	Time 0.265 (0.630)	Data Time 0.001 (0.022)	Loss 2.6183 (2.4899)	Entropy 1.03754 (1.04007)	Top-1 acc 63.281 (64.113)	Top-5 acc 82.812 (84.008)	lr 0.01034
Train [67][2070/3239]	Time 0.250 (0.629)	Data Time 0.001 (0.022)	Loss 2.5404 (2.4900)	Entropy 1.03749 (1.04005)	Top-1 acc 63.672 (64.110)	Top-5 acc 83.594 (84.008)	lr 0.01034
Train [67][2080/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.021)	Loss 2.4256 (2.4901)	Entropy 1.03748 (1.04004)	Top-1 acc 68.359 (64.111)	Top-5 acc 85.156 (84.007)	lr 0.01034
Train [67][2090/3239]	Time 0.343 (0.628)	Data Time 0.001 (0.021)	Loss 2.6185 (2.4903)	Entropy 1.03744 (1.04003)	Top-1 acc 61.719 (64.108)	Top-5 acc 79.688 (84.000)	lr 0.01034
Train [67][2100/3239]	Time 2.426 (0.627)	Data Time 0.001 (0.021)	Loss 2.5388 (2.4903)	Entropy 1.03744 (1.04002)	Top-1 acc 58.203 (64.108)	Top-5 acc 82.812 (83.998)	lr 0.01034
Train [67][2110/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.021)	Loss 2.4733 (2.4905)	Entropy 1.03743 (1.04001)	Top-1 acc 63.672 (64.107)	Top-5 acc 85.938 (83.997)	lr 0.01033
Train [67][2120/3239]	Time 0.241 (0.624)	Data Time 0.002 (0.021)	Loss 2.2801 (2.4903)	Entropy 1.03737 (1.03999)	Top-1 acc 70.703 (64.113)	Top-5 acc 89.062 (83.997)	lr 0.01033
Train [67][2130/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.021)	Loss 2.6283 (2.4904)	Entropy 1.03731 (1.03998)	Top-1 acc 59.375 (64.110)	Top-5 acc 84.766 (83.998)	lr 0.01033
Train [67][2140/3239]	Time 0.323 (0.623)	Data Time 0.001 (0.021)	Loss 2.5705 (2.4907)	Entropy 1.03724 (1.03997)	Top-1 acc 64.062 (64.107)	Top-5 acc 82.812 (83.992)	lr 0.01033
Train [67][2150/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.021)	Loss 2.5503 (2.4905)	Entropy 1.03720 (1.03996)	Top-1 acc 62.500 (64.112)	Top-5 acc 82.031 (83.994)	lr 0.01033
Train [67][2160/3239]	Time 0.236 (0.622)	Data Time 0.001 (0.021)	Loss 2.6865 (2.4907)	Entropy 1.03721 (1.03994)	Top-1 acc 61.328 (64.103)	Top-5 acc 78.125 (83.991)	lr 0.01033
Train [67][2170/3239]	Time 0.250 (0.621)	Data Time 0.001 (0.021)	Loss 2.3816 (2.4906)	Entropy 1.03718 (1.03993)	Top-1 acc 66.406 (64.105)	Top-5 acc 85.156 (83.991)	lr 0.01033
Train [67][2180/3239]	Time 0.343 (0.620)	Data Time 0.001 (0.021)	Loss 2.5802 (2.4908)	Entropy 1.03717 (1.03992)	Top-1 acc 65.625 (64.100)	Top-5 acc 82.812 (83.987)	lr 0.01033
Train [67][2190/3239]	Time 0.242 (0.620)	Data Time 0.001 (0.020)	Loss 2.4022 (2.4908)	Entropy 1.03713 (1.03990)	Top-1 acc 66.406 (64.102)	Top-5 acc 85.547 (83.984)	lr 0.01033
Train [67][2200/3239]	Time 0.270 (0.619)	Data Time 0.001 (0.020)	Loss 2.4979 (2.4909)	Entropy 1.03712 (1.03989)	Top-1 acc 64.844 (64.102)	Top-5 acc 84.375 (83.984)	lr 0.01033
Train [67][2210/3239]	Time 2.586 (0.618)	Data Time 0.001 (0.020)	Loss 2.4396 (2.4908)	Entropy 1.03712 (1.03988)	Top-1 acc 68.750 (64.109)	Top-5 acc 83.984 (83.983)	lr 0.01032
Train [67][2220/3239]	Time 0.266 (0.617)	Data Time 0.001 (0.020)	Loss 2.5466 (2.4910)	Entropy 1.03715 (1.03987)	Top-1 acc 64.844 (64.099)	Top-5 acc 84.375 (83.978)	lr 0.01032
Train [67][2230/3239]	Time 0.229 (0.640)	Data Time 0.002 (0.020)	Loss 2.4037 (2.4910)	Entropy 1.03715 (1.03985)	Top-1 acc 67.188 (64.099)	Top-5 acc 85.547 (83.979)	lr 0.01032
Train [67][2240/3239]	Time 0.231 (0.639)	Data Time 0.002 (0.020)	Loss 2.3918 (2.4912)	Entropy 1.03712 (1.03984)	Top-1 acc 67.578 (64.093)	Top-5 acc 82.031 (83.973)	lr 0.01032
Train [67][2250/3239]	Time 0.294 (0.639)	Data Time 0.001 (0.020)	Loss 2.5279 (2.4912)	Entropy 1.03706 (1.03983)	Top-1 acc 59.766 (64.095)	Top-5 acc 83.984 (83.970)	lr 0.01032
Train [67][2260/3239]	Time 0.229 (0.638)	Data Time 0.001 (0.020)	Loss 2.4797 (2.4915)	Entropy 1.03709 (1.03982)	Top-1 acc 66.797 (64.091)	Top-5 acc 83.984 (83.964)	lr 0.01032
Train [67][2270/3239]	Time 0.333 (0.637)	Data Time 0.001 (0.020)	Loss 2.6241 (2.4915)	Entropy 1.03708 (1.03981)	Top-1 acc 61.328 (64.091)	Top-5 acc 80.469 (83.965)	lr 0.01032
Train [67][2280/3239]	Time 0.245 (0.637)	Data Time 0.001 (0.020)	Loss 2.4049 (2.4914)	Entropy 1.03706 (1.03979)	Top-1 acc 67.188 (64.099)	Top-5 acc 86.328 (83.964)	lr 0.01032
Train [67][2290/3239]	Time 0.231 (0.636)	Data Time 0.001 (0.020)	Loss 2.4800 (2.4915)	Entropy 1.03694 (1.03978)	Top-1 acc 62.891 (64.090)	Top-5 acc 83.594 (83.964)	lr 0.01032
Train [67][2300/3239]	Time 0.261 (0.635)	Data Time 0.002 (0.020)	Loss 2.6186 (2.4916)	Entropy 1.03699 (1.03977)	Top-1 acc 62.500 (64.089)	Top-5 acc 81.250 (83.964)	lr 0.01032
Train [67][2310/3239]	Time 0.245 (0.635)	Data Time 0.001 (0.019)	Loss 2.6177 (2.4919)	Entropy 1.03700 (1.03976)	Top-1 acc 60.938 (64.083)	Top-5 acc 84.766 (83.960)	lr 0.01031
Train [67][2320/3239]	Time 2.563 (0.634)	Data Time 0.001 (0.019)	Loss 2.4137 (2.4918)	Entropy 1.03700 (1.03975)	Top-1 acc 65.234 (64.083)	Top-5 acc 83.594 (83.961)	lr 0.01031
Train [67][2330/3239]	Time 0.262 (0.632)	Data Time 0.001 (0.019)	Loss 2.4294 (2.4919)	Entropy 1.03697 (1.03973)	Top-1 acc 67.578 (64.080)	Top-5 acc 84.766 (83.959)	lr 0.01031
Train [67][2340/3239]	Time 0.249 (0.632)	Data Time 0.001 (0.019)	Loss 2.5325 (2.4919)	Entropy 1.03693 (1.03972)	Top-1 acc 60.156 (64.076)	Top-5 acc 83.984 (83.959)	lr 0.01031
Train [67][2350/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.019)	Loss 2.3126 (2.4920)	Entropy 1.03692 (1.03971)	Top-1 acc 68.359 (64.071)	Top-5 acc 83.984 (83.957)	lr 0.01031
Train [67][2360/3239]	Time 0.339 (0.631)	Data Time 0.001 (0.019)	Loss 2.5450 (2.4922)	Entropy 1.03692 (1.03970)	Top-1 acc 61.328 (64.060)	Top-5 acc 84.766 (83.952)	lr 0.01031
Train [67][2370/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.019)	Loss 2.4027 (2.4921)	Entropy 1.03687 (1.03969)	Top-1 acc 65.625 (64.060)	Top-5 acc 83.594 (83.952)	lr 0.01031
Train [67][2380/3239]	Time 0.236 (0.629)	Data Time 0.001 (0.019)	Loss 2.3413 (2.4918)	Entropy 1.03684 (1.03967)	Top-1 acc 69.531 (64.068)	Top-5 acc 87.109 (83.957)	lr 0.01031
Train [67][2390/3239]	Time 0.245 (0.629)	Data Time 0.001 (0.019)	Loss 2.5603 (2.4921)	Entropy 1.03680 (1.03966)	Top-1 acc 64.844 (64.068)	Top-5 acc 83.203 (83.952)	lr 0.01031
Train [67][2400/3239]	Time 0.249 (0.628)	Data Time 0.001 (0.019)	Loss 2.4607 (2.4922)	Entropy 1.03672 (1.03965)	Top-1 acc 67.188 (64.067)	Top-5 acc 83.203 (83.950)	lr 0.01031
Train [67][2410/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.019)	Loss 2.7261 (2.4921)	Entropy 1.03667 (1.03964)	Top-1 acc 58.594 (64.065)	Top-5 acc 76.562 (83.951)	lr 0.01030
Train [67][2420/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.019)	Loss 2.2965 (2.4922)	Entropy 1.03669 (1.03963)	Top-1 acc 66.016 (64.058)	Top-5 acc 87.500 (83.947)	lr 0.01030
Train [67][2430/3239]	Time 2.577 (0.626)	Data Time 0.001 (0.019)	Loss 2.4018 (2.4922)	Entropy 1.03669 (1.03961)	Top-1 acc 67.969 (64.063)	Top-5 acc 85.547 (83.945)	lr 0.01030
Train [67][2440/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.018)	Loss 2.5806 (2.4924)	Entropy 1.03674 (1.03960)	Top-1 acc 63.672 (64.060)	Top-5 acc 81.250 (83.938)	lr 0.01030
Train [67][2450/3239]	Time 0.248 (0.624)	Data Time 0.001 (0.018)	Loss 2.4529 (2.4923)	Entropy 1.03671 (1.03959)	Top-1 acc 64.844 (64.058)	Top-5 acc 83.984 (83.937)	lr 0.01030
Train [67][2460/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.018)	Loss 2.5044 (2.4924)	Entropy 1.03669 (1.03958)	Top-1 acc 66.016 (64.059)	Top-5 acc 84.375 (83.937)	lr 0.01030
Train [67][2470/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.018)	Loss 2.4323 (2.4926)	Entropy 1.03672 (1.03957)	Top-1 acc 66.797 (64.053)	Top-5 acc 83.594 (83.931)	lr 0.01030
Train [67][2480/3239]	Time 0.243 (0.622)	Data Time 0.002 (0.018)	Loss 2.5947 (2.4926)	Entropy 1.03662 (1.03956)	Top-1 acc 61.328 (64.052)	Top-5 acc 81.250 (83.927)	lr 0.01030
Train [67][2490/3239]	Time 0.229 (0.622)	Data Time 0.001 (0.018)	Loss 2.6306 (2.4926)	Entropy 1.03660 (1.03954)	Top-1 acc 60.547 (64.054)	Top-5 acc 78.906 (83.926)	lr 0.01030
Train [67][2500/3239]	Time 0.239 (0.621)	Data Time 0.001 (0.018)	Loss 2.5928 (2.4925)	Entropy 1.03653 (1.03953)	Top-1 acc 64.844 (64.056)	Top-5 acc 82.812 (83.927)	lr 0.01030
Train [67][2510/3239]	Time 0.219 (0.621)	Data Time 0.001 (0.018)	Loss 2.5360 (2.4925)	Entropy 1.03653 (1.03952)	Top-1 acc 62.109 (64.056)	Top-5 acc 86.328 (83.927)	lr 0.01029
Train [67][2520/3239]	Time 0.238 (0.620)	Data Time 0.001 (0.018)	Loss 2.4352 (2.4926)	Entropy 1.03646 (1.03951)	Top-1 acc 64.062 (64.057)	Top-5 acc 83.984 (83.925)	lr 0.01029
Train [67][2530/3239]	Time 0.318 (0.620)	Data Time 0.001 (0.018)	Loss 2.6849 (2.4926)	Entropy 1.03645 (1.03950)	Top-1 acc 57.031 (64.058)	Top-5 acc 83.594 (83.926)	lr 0.01029
Train [67][2540/3239]	Time 2.489 (0.619)	Data Time 0.002 (0.018)	Loss 2.5187 (2.4927)	Entropy 1.03645 (1.03948)	Top-1 acc 63.281 (64.054)	Top-5 acc 84.375 (83.925)	lr 0.01029
Train [67][2550/3239]	Time 0.243 (0.618)	Data Time 0.001 (0.018)	Loss 2.5217 (2.4927)	Entropy 1.03637 (1.03947)	Top-1 acc 62.891 (64.055)	Top-5 acc 83.984 (83.926)	lr 0.01029
Train [67][2560/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.018)	Loss 2.4464 (2.4927)	Entropy 1.03635 (1.03946)	Top-1 acc 63.281 (64.052)	Top-5 acc 85.547 (83.926)	lr 0.01029
Train [67][2570/3239]	Time 0.218 (0.617)	Data Time 0.001 (0.018)	Loss 2.7935 (2.4929)	Entropy 1.03633 (1.03945)	Top-1 acc 58.203 (64.046)	Top-5 acc 81.250 (83.926)	lr 0.01029
Train [67][2580/3239]	Time 0.245 (0.616)	Data Time 0.001 (0.018)	Loss 2.3911 (2.4930)	Entropy 1.03627 (1.03944)	Top-1 acc 66.406 (64.045)	Top-5 acc 88.281 (83.926)	lr 0.01029
Train [67][2590/3239]	Time 0.240 (0.637)	Data Time 0.002 (0.018)	Loss 2.4178 (2.4930)	Entropy 1.03622 (1.03942)	Top-1 acc 61.328 (64.046)	Top-5 acc 85.938 (83.923)	lr 0.01029
Train [67][2600/3239]	Time 0.234 (0.636)	Data Time 0.002 (0.017)	Loss 2.5652 (2.4928)	Entropy 1.03624 (1.03941)	Top-1 acc 62.500 (64.050)	Top-5 acc 82.812 (83.926)	lr 0.01029
Train [67][2610/3239]	Time 0.232 (0.635)	Data Time 0.002 (0.017)	Loss 2.2905 (2.4925)	Entropy 1.03618 (1.03940)	Top-1 acc 67.578 (64.057)	Top-5 acc 87.500 (83.932)	lr 0.01028
Train [67][2620/3239]	Time 0.202 (0.635)	Data Time 0.001 (0.017)	Loss 2.6881 (2.4927)	Entropy 1.03620 (1.03939)	Top-1 acc 58.984 (64.054)	Top-5 acc 80.469 (83.927)	lr 0.01028
Train [67][2630/3239]	Time 0.279 (0.634)	Data Time 0.001 (0.017)	Loss 2.4940 (2.4925)	Entropy 1.03611 (1.03937)	Top-1 acc 62.891 (64.056)	Top-5 acc 82.031 (83.930)	lr 0.01028
Train [67][2640/3239]	Time 0.261 (0.634)	Data Time 0.001 (0.017)	Loss 2.3464 (2.4926)	Entropy 1.03605 (1.03936)	Top-1 acc 66.406 (64.052)	Top-5 acc 87.109 (83.929)	lr 0.01028
Train [67][2650/3239]	Time 0.251 (0.633)	Data Time 0.001 (0.017)	Loss 2.5587 (2.4924)	Entropy 1.03603 (1.03935)	Top-1 acc 56.641 (64.054)	Top-5 acc 83.203 (83.934)	lr 0.01028
Train [67][2660/3239]	Time 0.304 (0.633)	Data Time 0.001 (0.017)	Loss 2.4873 (2.4923)	Entropy 1.03592 (1.03934)	Top-1 acc 64.453 (64.057)	Top-5 acc 85.547 (83.937)	lr 0.01028
Train [67][2670/3239]	Time 0.249 (0.632)	Data Time 0.001 (0.017)	Loss 2.4188 (2.4925)	Entropy 1.03584 (1.03932)	Top-1 acc 66.797 (64.055)	Top-5 acc 85.156 (83.933)	lr 0.01028
Train [67][2680/3239]	Time 0.248 (0.631)	Data Time 0.001 (0.017)	Loss 2.3260 (2.4925)	Entropy 1.03587 (1.03931)	Top-1 acc 66.016 (64.052)	Top-5 acc 86.719 (83.936)	lr 0.01028
Train [67][2690/3239]	Time 0.256 (0.631)	Data Time 0.001 (0.017)	Loss 2.5268 (2.4924)	Entropy 1.03584 (1.03930)	Top-1 acc 63.281 (64.057)	Top-5 acc 83.594 (83.936)	lr 0.01028
Train [67][2700/3239]	Time 0.265 (0.630)	Data Time 0.001 (0.017)	Loss 2.6892 (2.4926)	Entropy 1.03577 (1.03929)	Top-1 acc 62.109 (64.054)	Top-5 acc 80.859 (83.931)	lr 0.01028
Train [67][2710/3239]	Time 0.217 (0.630)	Data Time 0.001 (0.017)	Loss 2.4940 (2.4927)	Entropy 1.03566 (1.03927)	Top-1 acc 62.891 (64.054)	Top-5 acc 82.812 (83.927)	lr 0.01027
Train [67][2720/3239]	Time 0.349 (0.629)	Data Time 0.001 (0.017)	Loss 2.4095 (2.4928)	Entropy 1.03567 (1.03926)	Top-1 acc 66.016 (64.049)	Top-5 acc 84.766 (83.924)	lr 0.01027
Train [67][2730/3239]	Time 0.223 (0.628)	Data Time 0.001 (0.017)	Loss 2.6600 (2.4931)	Entropy 1.03554 (1.03925)	Top-1 acc 54.688 (64.038)	Top-5 acc 80.078 (83.920)	lr 0.01027
Train [67][2740/3239]	Time 0.269 (0.628)	Data Time 0.001 (0.017)	Loss 2.3809 (2.4933)	Entropy 1.03554 (1.03923)	Top-1 acc 67.188 (64.037)	Top-5 acc 87.500 (83.919)	lr 0.01027
Train [67][2750/3239]	Time 0.238 (0.627)	Data Time 0.001 (0.017)	Loss 2.5576 (2.4934)	Entropy 1.03548 (1.03922)	Top-1 acc 63.672 (64.034)	Top-5 acc 82.031 (83.912)	lr 0.01027
Train [67][2760/3239]	Time 0.330 (0.627)	Data Time 0.001 (0.017)	Loss 2.6669 (2.4935)	Entropy 1.03544 (1.03921)	Top-1 acc 60.547 (64.030)	Top-5 acc 81.641 (83.909)	lr 0.01027
Train [67][2770/3239]	Time 0.301 (0.626)	Data Time 0.001 (0.017)	Loss 2.3873 (2.4935)	Entropy 1.03545 (1.03919)	Top-1 acc 66.016 (64.033)	Top-5 acc 86.328 (83.911)	lr 0.01027
Train [67][2780/3239]	Time 0.264 (0.626)	Data Time 0.001 (0.016)	Loss 2.4014 (2.4936)	Entropy 1.03542 (1.03918)	Top-1 acc 63.281 (64.027)	Top-5 acc 85.156 (83.911)	lr 0.01027
Train [67][2790/3239]	Time 0.237 (0.625)	Data Time 0.001 (0.016)	Loss 2.6124 (2.4935)	Entropy 1.03535 (1.03916)	Top-1 acc 60.547 (64.026)	Top-5 acc 82.031 (83.914)	lr 0.01027
Train [67][2800/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.016)	Loss 2.4775 (2.4935)	Entropy 1.03537 (1.03915)	Top-1 acc 59.766 (64.023)	Top-5 acc 84.375 (83.914)	lr 0.01027
Train [67][2810/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.016)	Loss 2.4762 (2.4934)	Entropy 1.03530 (1.03914)	Top-1 acc 66.016 (64.024)	Top-5 acc 85.156 (83.916)	lr 0.01026
Train [67][2820/3239]	Time 0.271 (0.623)	Data Time 0.001 (0.016)	Loss 2.6385 (2.4934)	Entropy 1.03526 (1.03912)	Top-1 acc 60.547 (64.021)	Top-5 acc 80.469 (83.915)	lr 0.01026
Train [67][2830/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.016)	Loss 2.6719 (2.4935)	Entropy 1.03520 (1.03911)	Top-1 acc 60.156 (64.018)	Top-5 acc 81.250 (83.911)	lr 0.01026
Train [67][2840/3239]	Time 0.277 (0.622)	Data Time 0.001 (0.016)	Loss 2.5673 (2.4936)	Entropy 1.03519 (1.03910)	Top-1 acc 60.156 (64.014)	Top-5 acc 80.859 (83.909)	lr 0.01026
Train [67][2850/3239]	Time 0.346 (0.622)	Data Time 0.001 (0.016)	Loss 2.4350 (2.4936)	Entropy 1.03517 (1.03908)	Top-1 acc 66.797 (64.017)	Top-5 acc 83.203 (83.910)	lr 0.01026
Train [67][2860/3239]	Time 0.256 (0.621)	Data Time 0.001 (0.016)	Loss 2.3932 (2.4936)	Entropy 1.03511 (1.03907)	Top-1 acc 64.844 (64.018)	Top-5 acc 85.938 (83.913)	lr 0.01026
Train [67][2870/3239]	Time 0.255 (0.621)	Data Time 0.001 (0.016)	Loss 2.3295 (2.4937)	Entropy 1.03513 (1.03906)	Top-1 acc 71.875 (64.017)	Top-5 acc 87.891 (83.911)	lr 0.01026
Train [67][2880/3239]	Time 0.228 (0.620)	Data Time 0.003 (0.016)	Loss 2.3757 (2.4937)	Entropy 1.03504 (1.03904)	Top-1 acc 65.625 (64.019)	Top-5 acc 86.719 (83.908)	lr 0.01026
Train [67][2890/3239]	Time 0.224 (0.620)	Data Time 0.001 (0.016)	Loss 2.3895 (2.4937)	Entropy 1.03500 (1.03903)	Top-1 acc 67.578 (64.019)	Top-5 acc 86.328 (83.908)	lr 0.01026
Train [67][2900/3239]	Time 0.225 (0.619)	Data Time 0.001 (0.016)	Loss 2.5232 (2.4937)	Entropy 1.03499 (1.03901)	Top-1 acc 62.109 (64.020)	Top-5 acc 82.812 (83.908)	lr 0.01026
Train [67][2910/3239]	Time 0.252 (0.619)	Data Time 0.001 (0.016)	Loss 2.5049 (2.4936)	Entropy 1.03496 (1.03900)	Top-1 acc 66.797 (64.024)	Top-5 acc 83.594 (83.908)	lr 0.01025
Train [67][2920/3239]	Time 0.306 (0.636)	Data Time 0.005 (0.016)	Loss 2.5755 (2.4937)	Entropy 1.03494 (1.03899)	Top-1 acc 63.672 (64.023)	Top-5 acc 82.422 (83.905)	lr 0.01025
Train [67][2930/3239]	Time 0.245 (0.636)	Data Time 0.002 (0.016)	Loss 2.6052 (2.4936)	Entropy 1.03490 (1.03897)	Top-1 acc 62.109 (64.027)	Top-5 acc 82.031 (83.906)	lr 0.01025
Train [67][2940/3239]	Time 0.331 (0.635)	Data Time 0.002 (0.016)	Loss 2.6603 (2.4935)	Entropy 1.03484 (1.03896)	Top-1 acc 60.156 (64.030)	Top-5 acc 81.641 (83.907)	lr 0.01025
Train [67][2950/3239]	Time 0.255 (0.635)	Data Time 0.002 (0.016)	Loss 2.4671 (2.4936)	Entropy 1.03482 (1.03894)	Top-1 acc 63.281 (64.024)	Top-5 acc 84.375 (83.904)	lr 0.01025
Train [67][2960/3239]	Time 0.300 (0.634)	Data Time 0.001 (0.016)	Loss 2.7128 (2.4937)	Entropy 1.03467 (1.03893)	Top-1 acc 58.984 (64.023)	Top-5 acc 79.688 (83.902)	lr 0.01025
Train [67][2970/3239]	Time 0.233 (0.634)	Data Time 0.001 (0.016)	Loss 2.5034 (2.4937)	Entropy 1.03465 (1.03892)	Top-1 acc 64.844 (64.021)	Top-5 acc 83.594 (83.900)	lr 0.01025
Train [67][2980/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.015)	Loss 2.5752 (2.4939)	Entropy 1.03471 (1.03890)	Top-1 acc 59.375 (64.012)	Top-5 acc 82.031 (83.896)	lr 0.01025
Train [67][2990/3239]	Time 0.395 (0.633)	Data Time 0.001 (0.015)	Loss 2.5010 (2.4941)	Entropy 1.03467 (1.03889)	Top-1 acc 65.625 (64.009)	Top-5 acc 84.375 (83.895)	lr 0.01025
Train [67][3000/3239]	Time 0.208 (0.632)	Data Time 0.002 (0.015)	Loss 2.4809 (2.4942)	Entropy 1.03465 (1.03887)	Top-1 acc 67.969 (64.008)	Top-5 acc 83.203 (83.891)	lr 0.01025
Train [67][3010/3239]	Time 0.267 (0.632)	Data Time 0.001 (0.015)	Loss 2.6387 (2.4944)	Entropy 1.03459 (1.03886)	Top-1 acc 61.328 (64.005)	Top-5 acc 82.812 (83.885)	lr 0.01024
Train [67][3020/3239]	Time 0.271 (0.631)	Data Time 0.001 (0.015)	Loss 2.6407 (2.4946)	Entropy 1.03455 (1.03885)	Top-1 acc 64.062 (64.001)	Top-5 acc 81.250 (83.881)	lr 0.01024
Train [67][3030/3239]	Time 0.233 (0.631)	Data Time 0.001 (0.015)	Loss 2.5128 (2.4947)	Entropy 1.03451 (1.03883)	Top-1 acc 65.625 (63.998)	Top-5 acc 83.203 (83.880)	lr 0.01024
Train [67][3040/3239]	Time 0.226 (0.630)	Data Time 0.001 (0.015)	Loss 2.5066 (2.4947)	Entropy 1.03448 (1.03882)	Top-1 acc 61.719 (64.001)	Top-5 acc 82.812 (83.880)	lr 0.01024
Train [67][3050/3239]	Time 0.247 (0.630)	Data Time 0.001 (0.015)	Loss 2.3180 (2.4948)	Entropy 1.03403 (1.03880)	Top-1 acc 66.797 (63.999)	Top-5 acc 87.109 (83.878)	lr 0.01024
Train [67][3060/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.015)	Loss 2.4192 (2.4947)	Entropy 1.03405 (1.03879)	Top-1 acc 64.453 (63.999)	Top-5 acc 85.938 (83.882)	lr 0.01024
Train [67][3070/3239]	Time 0.209 (0.629)	Data Time 0.001 (0.015)	Loss 2.5650 (2.4946)	Entropy 1.03400 (1.03877)	Top-1 acc 62.891 (63.998)	Top-5 acc 82.031 (83.882)	lr 0.01024
Train [67][3080/3239]	Time 0.348 (0.628)	Data Time 0.002 (0.015)	Loss 2.5066 (2.4946)	Entropy 1.03398 (1.03876)	Top-1 acc 61.328 (63.998)	Top-5 acc 84.766 (83.884)	lr 0.01024
Train [67][3090/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.015)	Loss 2.5961 (2.4945)	Entropy 1.03396 (1.03874)	Top-1 acc 62.891 (64.001)	Top-5 acc 82.031 (83.886)	lr 0.01024
Train [67][3100/3239]	Time 0.286 (0.627)	Data Time 0.001 (0.015)	Loss 2.4132 (2.4945)	Entropy 1.03392 (1.03873)	Top-1 acc 66.016 (64.002)	Top-5 acc 84.375 (83.884)	lr 0.01024
Train [67][3110/3239]	Time 0.267 (0.627)	Data Time 0.001 (0.015)	Loss 2.6710 (2.4945)	Entropy 1.03393 (1.03871)	Top-1 acc 60.156 (64.003)	Top-5 acc 82.422 (83.885)	lr 0.01023
Train [67][3120/3239]	Time 0.233 (0.626)	Data Time 0.001 (0.015)	Loss 2.6025 (2.4946)	Entropy 1.03388 (1.03869)	Top-1 acc 59.766 (63.999)	Top-5 acc 79.688 (83.882)	lr 0.01023
Train [67][3130/3239]	Time 0.256 (0.626)	Data Time 0.001 (0.015)	Loss 2.4904 (2.4947)	Entropy 1.03387 (1.03868)	Top-1 acc 62.891 (63.998)	Top-5 acc 84.766 (83.880)	lr 0.01023
Train [67][3140/3239]	Time 0.237 (0.625)	Data Time 0.001 (0.015)	Loss 2.7317 (2.4953)	Entropy 1.03377 (1.03866)	Top-1 acc 55.078 (63.986)	Top-5 acc 80.078 (83.868)	lr 0.01023
Train [67][3150/3239]	Time 0.213 (0.625)	Data Time 0.001 (0.015)	Loss 2.5415 (2.4954)	Entropy 1.03380 (1.03865)	Top-1 acc 61.328 (63.983)	Top-5 acc 83.594 (83.866)	lr 0.01023
Train [67][3160/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.015)	Loss 2.4559 (2.4952)	Entropy 1.03382 (1.03863)	Top-1 acc 66.406 (63.991)	Top-5 acc 86.719 (83.871)	lr 0.01023
Train [67][3170/3239]	Time 0.267 (0.624)	Data Time 0.001 (0.015)	Loss 2.4525 (2.4954)	Entropy 1.03376 (1.03862)	Top-1 acc 64.453 (63.985)	Top-5 acc 83.594 (83.866)	lr 0.01023
Train [67][3180/3239]	Time 0.226 (0.623)	Data Time 0.000 (0.015)	Loss 2.5671 (2.4954)	Entropy 1.03372 (1.03860)	Top-1 acc 64.844 (63.988)	Top-5 acc 84.375 (83.868)	lr 0.01023
Train [67][3190/3239]	Time 0.226 (0.623)	Data Time 0.000 (0.015)	Loss 2.5330 (2.4955)	Entropy 1.03367 (1.03859)	Top-1 acc 61.719 (63.987)	Top-5 acc 83.984 (83.867)	lr 0.01023
Train [67][3200/3239]	Time 0.234 (0.622)	Data Time 0.000 (0.015)	Loss 2.3887 (2.4954)	Entropy 1.03356 (1.03857)	Top-1 acc 66.016 (63.990)	Top-5 acc 84.766 (83.867)	lr 0.01023
Train [67][3210/3239]	Time 0.225 (0.622)	Data Time 0.000 (0.014)	Loss 2.6201 (2.4954)	Entropy 1.03357 (1.03856)	Top-1 acc 60.156 (63.988)	Top-5 acc 80.078 (83.865)	lr 0.01023
Train [67][3220/3239]	Time 0.334 (0.621)	Data Time 0.000 (0.014)	Loss 2.4495 (2.4954)	Entropy 1.03357 (1.03854)	Top-1 acc 65.625 (63.991)	Top-5 acc 84.766 (83.864)	lr 0.01022
Train [67][3230/3239]	Time 0.231 (0.621)	Data Time 0.000 (0.014)	Loss 2.4853 (2.4955)	Entropy 1.03345 (1.03853)	Top-1 acc 60.938 (63.989)	Top-5 acc 83.594 (83.859)	lr 0.01022
Train [67][3239/3239]	Time 2.295 (0.620)	Data Time 0.000 (0.014)	Loss 2.6977 (2.4954)	Entropy 1.03345 (1.03851)	Top-1 acc 59.259 (63.990)	Top-5 acc 81.481 (83.862)	lr 0.01022
==========Valid [67/120]	loss 1.389	top-1 acc 68.297 (68.297)	top-5 acc 87.519	Train top-1 63.990	top-5 83.862	Entropy 1.03345	Latency-None: 0.000ms	Flops: 546.53M
Train [68][0/3239]	Time 40.525 (40.525)	Data Time 38.348 (38.348)	Loss 2.4123 (2.4123)	Entropy 1.03343 (1.03343)	Top-1 acc 66.016 (66.016)	Top-5 acc 86.328 (86.328)	lr 0.01022
Train [68][10/3239]	Time 55.146 (9.129)	Data Time 0.002 (3.602)	Loss 2.4992 (2.4703)	Entropy 1.03343 (1.03343)	Top-1 acc 64.453 (64.453)	Top-5 acc 84.766 (84.837)	lr 0.01022
Train [68][20/3239]	Time 0.250 (4.933)	Data Time 0.002 (1.888)	Loss 2.6092 (2.4803)	Entropy 1.03338 (1.03341)	Top-1 acc 59.375 (63.858)	Top-5 acc 80.469 (84.356)	lr 0.01022
Train [68][30/3239]	Time 0.360 (3.515)	Data Time 0.002 (1.280)	Loss 2.5492 (2.4830)	Entropy 1.03331 (1.03338)	Top-1 acc 62.891 (63.924)	Top-5 acc 81.641 (84.110)	lr 0.01022
Train [68][40/3239]	Time 0.238 (2.773)	Data Time 0.001 (0.968)	Loss 2.5530 (2.4918)	Entropy 1.03319 (1.03334)	Top-1 acc 59.375 (63.577)	Top-5 acc 83.984 (84.099)	lr 0.01022
Train [68][50/3239]	Time 0.235 (2.326)	Data Time 0.002 (0.779)	Loss 2.3987 (2.4837)	Entropy 1.03308 (1.03329)	Top-1 acc 67.578 (63.771)	Top-5 acc 85.547 (84.130)	lr 0.01022
Train [68][60/3239]	Time 0.218 (2.020)	Data Time 0.001 (0.651)	Loss 2.3171 (2.4799)	Entropy 1.03305 (1.03326)	Top-1 acc 66.406 (63.954)	Top-5 acc 86.719 (84.074)	lr 0.01022
Train [68][70/3239]	Time 0.241 (1.802)	Data Time 0.001 (0.560)	Loss 2.4352 (2.4769)	Entropy 1.03298 (1.03322)	Top-1 acc 65.234 (64.051)	Top-5 acc 84.766 (84.171)	lr 0.01022
Train [68][80/3239]	Time 0.231 (1.637)	Data Time 0.001 (0.491)	Loss 2.5116 (2.4764)	Entropy 1.03306 (1.03320)	Top-1 acc 60.156 (64.067)	Top-5 acc 83.594 (84.206)	lr 0.01021
Train [68][90/3239]	Time 0.229 (1.509)	Data Time 0.001 (0.437)	Loss 2.5444 (2.4741)	Entropy 1.03301 (1.03318)	Top-1 acc 63.672 (64.161)	Top-5 acc 83.203 (84.281)	lr 0.01021
Train [68][100/3239]	Time 0.208 (1.407)	Data Time 0.001 (0.394)	Loss 2.5430 (2.4742)	Entropy 1.03297 (1.03316)	Top-1 acc 60.547 (64.151)	Top-5 acc 83.594 (84.367)	lr 0.01021
Train [68][110/3239]	Time 0.251 (1.321)	Data Time 0.001 (0.359)	Loss 2.4455 (2.4755)	Entropy 1.03292 (1.03314)	Top-1 acc 66.016 (64.126)	Top-5 acc 85.938 (84.350)	lr 0.01021
Train [68][120/3239]	Time 2.710 (1.252)	Data Time 0.002 (0.329)	Loss 2.3822 (2.4717)	Entropy 1.03292 (1.03313)	Top-1 acc 66.016 (64.208)	Top-5 acc 85.547 (84.381)	lr 0.01021
Train [68][130/3239]	Time 0.228 (1.176)	Data Time 0.001 (0.304)	Loss 2.3863 (2.4678)	Entropy 1.03282 (1.03310)	Top-1 acc 67.969 (64.355)	Top-5 acc 83.984 (84.399)	lr 0.01021
Train [68][140/3239]	Time 0.229 (1.126)	Data Time 0.001 (0.283)	Loss 2.3889 (2.4631)	Entropy 1.03283 (1.03308)	Top-1 acc 67.578 (64.489)	Top-5 acc 85.938 (84.502)	lr 0.01021
Train [68][150/3239]	Time 0.219 (1.084)	Data Time 0.001 (0.264)	Loss 2.4441 (2.4615)	Entropy 1.03315 (1.03308)	Top-1 acc 63.672 (64.536)	Top-5 acc 85.156 (84.533)	lr 0.01021
Train [68][160/3239]	Time 0.216 (1.045)	Data Time 0.001 (0.248)	Loss 2.4067 (2.4602)	Entropy 1.03310 (1.03308)	Top-1 acc 66.016 (64.584)	Top-5 acc 83.984 (84.538)	lr 0.01021
Train [68][170/3239]	Time 0.225 (1.012)	Data Time 0.001 (0.233)	Loss 2.3991 (2.4576)	Entropy 1.03311 (1.03309)	Top-1 acc 63.672 (64.707)	Top-5 acc 87.891 (84.603)	lr 0.01021
Train [68][180/3239]	Time 0.224 (0.982)	Data Time 0.001 (0.221)	Loss 2.2916 (2.4576)	Entropy 1.03307 (1.03309)	Top-1 acc 69.922 (64.740)	Top-5 acc 87.109 (84.610)	lr 0.01020
Train [68][190/3239]	Time 0.217 (0.955)	Data Time 0.001 (0.209)	Loss 2.4806 (2.4587)	Entropy 1.03305 (1.03308)	Top-1 acc 64.453 (64.688)	Top-5 acc 82.031 (84.573)	lr 0.01020
Train [68][200/3239]	Time 0.220 (0.931)	Data Time 0.001 (0.199)	Loss 2.4496 (2.4608)	Entropy 1.03299 (1.03308)	Top-1 acc 64.844 (64.659)	Top-5 acc 85.547 (84.567)	lr 0.01020
Train [68][210/3239]	Time 0.225 (0.909)	Data Time 0.001 (0.189)	Loss 2.5647 (2.4609)	Entropy 1.03301 (1.03308)	Top-1 acc 64.453 (64.672)	Top-5 acc 83.594 (84.582)	lr 0.01020
Train [68][220/3239]	Time 0.221 (0.888)	Data Time 0.001 (0.181)	Loss 2.4514 (2.4616)	Entropy 1.03286 (1.03307)	Top-1 acc 66.406 (64.679)	Top-5 acc 85.547 (84.592)	lr 0.01020
Train [68][230/3239]	Time 2.485 (0.870)	Data Time 0.001 (0.173)	Loss 2.3041 (2.4618)	Entropy 1.03286 (1.03306)	Top-1 acc 66.797 (64.656)	Top-5 acc 87.109 (84.563)	lr 0.01020
Train [68][240/3239]	Time 0.240 (0.844)	Data Time 0.001 (0.166)	Loss 2.6196 (2.4630)	Entropy 1.03287 (1.03306)	Top-1 acc 59.375 (64.646)	Top-5 acc 79.688 (84.540)	lr 0.01020
Train [68][250/3239]	Time 0.289 (0.830)	Data Time 0.001 (0.159)	Loss 2.4997 (2.4652)	Entropy 1.03305 (1.03306)	Top-1 acc 64.062 (64.613)	Top-5 acc 83.594 (84.523)	lr 0.01020
Train [68][260/3239]	Time 0.236 (0.816)	Data Time 0.001 (0.153)	Loss 2.3409 (2.4656)	Entropy 1.03303 (1.03305)	Top-1 acc 69.922 (64.628)	Top-5 acc 82.422 (84.490)	lr 0.01020
Train [68][270/3239]	Time 0.214 (0.803)	Data Time 0.001 (0.148)	Loss 2.3748 (2.4639)	Entropy 1.03306 (1.03305)	Top-1 acc 67.188 (64.656)	Top-5 acc 86.719 (84.519)	lr 0.01020
Train [68][280/3239]	Time 0.207 (0.790)	Data Time 0.001 (0.143)	Loss 2.4216 (2.4628)	Entropy 1.03304 (1.03305)	Top-1 acc 65.234 (64.695)	Top-5 acc 85.938 (84.532)	lr 0.01019
Train [68][290/3239]	Time 0.239 (0.779)	Data Time 0.001 (0.138)	Loss 2.4533 (2.4637)	Entropy 1.03302 (1.03305)	Top-1 acc 64.062 (64.667)	Top-5 acc 84.766 (84.520)	lr 0.01019
Train [68][300/3239]	Time 0.211 (0.769)	Data Time 0.001 (0.133)	Loss 2.5472 (2.4637)	Entropy 1.03295 (1.03305)	Top-1 acc 62.500 (64.636)	Top-5 acc 82.812 (84.532)	lr 0.01019
Train [68][310/3239]	Time 0.226 (0.759)	Data Time 0.001 (0.129)	Loss 2.4844 (2.4633)	Entropy 1.03292 (1.03305)	Top-1 acc 62.891 (64.624)	Top-5 acc 83.203 (84.548)	lr 0.01019
Train [68][320/3239]	Time 0.212 (0.749)	Data Time 0.001 (0.125)	Loss 2.4746 (2.4631)	Entropy 1.03276 (1.03304)	Top-1 acc 63.281 (64.638)	Top-5 acc 85.156 (84.528)	lr 0.01019
Train [68][330/3239]	Time 0.249 (0.741)	Data Time 0.001 (0.121)	Loss 2.4918 (2.4625)	Entropy 1.03268 (1.03303)	Top-1 acc 67.969 (64.676)	Top-5 acc 81.641 (84.514)	lr 0.01019
Train [68][340/3239]	Time 2.687 (0.734)	Data Time 0.001 (0.118)	Loss 2.3394 (2.4615)	Entropy 1.03268 (1.03302)	Top-1 acc 69.922 (64.691)	Top-5 acc 88.281 (84.534)	lr 0.01019
Train [68][350/3239]	Time 0.215 (0.719)	Data Time 0.001 (0.114)	Loss 2.4240 (2.4614)	Entropy 1.03262 (1.03301)	Top-1 acc 66.016 (64.688)	Top-5 acc 86.719 (84.539)	lr 0.01019
Train [68][360/3239]	Time 0.227 (0.712)	Data Time 0.001 (0.111)	Loss 2.4443 (2.4611)	Entropy 1.03266 (1.03300)	Top-1 acc 66.406 (64.684)	Top-5 acc 84.766 (84.545)	lr 0.01019
Train [68][370/3239]	Time 0.240 (0.706)	Data Time 0.001 (0.108)	Loss 2.4493 (2.4615)	Entropy 1.03261 (1.03299)	Top-1 acc 60.938 (64.657)	Top-5 acc 83.203 (84.528)	lr 0.01019
Train [68][380/3239]	Time 0.232 (0.846)	Data Time 0.002 (0.106)	Loss 2.4676 (2.4604)	Entropy 1.03253 (1.03298)	Top-1 acc 62.109 (64.665)	Top-5 acc 84.766 (84.537)	lr 0.01018
Train [68][390/3239]	Time 0.230 (0.836)	Data Time 0.002 (0.103)	Loss 2.4434 (2.4617)	Entropy 1.03257 (1.03297)	Top-1 acc 64.062 (64.621)	Top-5 acc 86.328 (84.513)	lr 0.01018
Train [68][400/3239]	Time 0.219 (0.827)	Data Time 0.001 (0.100)	Loss 2.5830 (2.4625)	Entropy 1.03253 (1.03296)	Top-1 acc 63.281 (64.617)	Top-5 acc 78.906 (84.477)	lr 0.01018
Train [68][410/3239]	Time 0.216 (0.818)	Data Time 0.001 (0.098)	Loss 2.4957 (2.4631)	Entropy 1.03250 (1.03295)	Top-1 acc 62.109 (64.598)	Top-5 acc 84.766 (84.473)	lr 0.01018
Train [68][420/3239]	Time 0.224 (0.810)	Data Time 0.001 (0.096)	Loss 2.5477 (2.4643)	Entropy 1.03239 (1.03294)	Top-1 acc 63.672 (64.590)	Top-5 acc 84.375 (84.467)	lr 0.01018
Train [68][430/3239]	Time 0.326 (0.802)	Data Time 0.002 (0.094)	Loss 2.4990 (2.4644)	Entropy 1.03234 (1.03292)	Top-1 acc 67.578 (64.605)	Top-5 acc 83.984 (84.452)	lr 0.01018
Train [68][440/3239]	Time 0.259 (0.794)	Data Time 0.001 (0.091)	Loss 2.4722 (2.4636)	Entropy 1.03231 (1.03291)	Top-1 acc 66.016 (64.622)	Top-5 acc 81.641 (84.470)	lr 0.01018
Train [68][450/3239]	Time 2.518 (0.787)	Data Time 0.001 (0.089)	Loss 2.5308 (2.4645)	Entropy 1.03231 (1.03290)	Top-1 acc 62.891 (64.610)	Top-5 acc 82.812 (84.450)	lr 0.01018
Train [68][460/3239]	Time 0.221 (0.775)	Data Time 0.001 (0.088)	Loss 2.3960 (2.4643)	Entropy 1.03229 (1.03288)	Top-1 acc 65.234 (64.603)	Top-5 acc 85.938 (84.461)	lr 0.01018
Train [68][470/3239]	Time 0.239 (0.769)	Data Time 0.001 (0.086)	Loss 2.5049 (2.4650)	Entropy 1.03226 (1.03287)	Top-1 acc 64.453 (64.593)	Top-5 acc 82.422 (84.446)	lr 0.01018
Train [68][480/3239]	Time 0.223 (0.763)	Data Time 0.001 (0.084)	Loss 2.5559 (2.4638)	Entropy 1.03230 (1.03286)	Top-1 acc 64.453 (64.654)	Top-5 acc 83.203 (84.464)	lr 0.01017
Train [68][490/3239]	Time 0.214 (0.756)	Data Time 0.001 (0.082)	Loss 2.6517 (2.4649)	Entropy 1.03229 (1.03285)	Top-1 acc 60.938 (64.627)	Top-5 acc 82.422 (84.457)	lr 0.01017
Train [68][500/3239]	Time 0.222 (0.751)	Data Time 0.001 (0.081)	Loss 2.4019 (2.4646)	Entropy 1.03214 (1.03283)	Top-1 acc 64.844 (64.650)	Top-5 acc 87.109 (84.465)	lr 0.01017
Train [68][510/3239]	Time 0.254 (0.745)	Data Time 0.002 (0.079)	Loss 2.4632 (2.4650)	Entropy 1.03204 (1.03282)	Top-1 acc 62.891 (64.643)	Top-5 acc 83.594 (84.465)	lr 0.01017
Train [68][520/3239]	Time 0.325 (0.740)	Data Time 0.001 (0.078)	Loss 2.4893 (2.4654)	Entropy 1.03208 (1.03281)	Top-1 acc 66.406 (64.653)	Top-5 acc 83.984 (84.470)	lr 0.01017
Train [68][530/3239]	Time 0.224 (0.735)	Data Time 0.001 (0.076)	Loss 2.5219 (2.4653)	Entropy 1.03199 (1.03279)	Top-1 acc 60.938 (64.672)	Top-5 acc 82.812 (84.454)	lr 0.01017
Train [68][540/3239]	Time 0.217 (0.730)	Data Time 0.001 (0.075)	Loss 2.3383 (2.4656)	Entropy 1.03190 (1.03278)	Top-1 acc 67.578 (64.657)	Top-5 acc 85.938 (84.451)	lr 0.01017
Train [68][550/3239]	Time 0.237 (0.725)	Data Time 0.001 (0.073)	Loss 2.6565 (2.4657)	Entropy 1.03185 (1.03276)	Top-1 acc 58.984 (64.620)	Top-5 acc 83.594 (84.457)	lr 0.01017
Train [68][560/3239]	Time 2.597 (0.721)	Data Time 0.001 (0.072)	Loss 2.4252 (2.4657)	Entropy 1.03185 (1.03274)	Top-1 acc 65.234 (64.643)	Top-5 acc 84.375 (84.445)	lr 0.01017
Train [68][570/3239]	Time 0.227 (0.713)	Data Time 0.001 (0.071)	Loss 2.3993 (2.4653)	Entropy 1.03181 (1.03273)	Top-1 acc 65.234 (64.642)	Top-5 acc 86.328 (84.450)	lr 0.01017
Train [68][580/3239]	Time 0.225 (0.709)	Data Time 0.001 (0.070)	Loss 2.3218 (2.4654)	Entropy 1.03177 (1.03271)	Top-1 acc 67.188 (64.635)	Top-5 acc 86.719 (84.458)	lr 0.01016
Train [68][590/3239]	Time 0.229 (0.704)	Data Time 0.001 (0.069)	Loss 2.4896 (2.4655)	Entropy 1.03177 (1.03270)	Top-1 acc 64.062 (64.633)	Top-5 acc 86.328 (84.460)	lr 0.01016
Train [68][600/3239]	Time 0.214 (0.700)	Data Time 0.001 (0.067)	Loss 2.4816 (2.4651)	Entropy 1.03165 (1.03268)	Top-1 acc 64.062 (64.640)	Top-5 acc 83.203 (84.472)	lr 0.01016
Train [68][610/3239]	Time 0.324 (0.697)	Data Time 0.001 (0.066)	Loss 2.7728 (2.4655)	Entropy 1.03166 (1.03266)	Top-1 acc 56.641 (64.615)	Top-5 acc 78.125 (84.459)	lr 0.01016
Train [68][620/3239]	Time 0.235 (0.693)	Data Time 0.002 (0.065)	Loss 2.5068 (2.4668)	Entropy 1.03158 (1.03264)	Top-1 acc 64.062 (64.574)	Top-5 acc 82.812 (84.441)	lr 0.01016
Train [68][630/3239]	Time 0.219 (0.690)	Data Time 0.001 (0.064)	Loss 2.3653 (2.4660)	Entropy 1.03161 (1.03263)	Top-1 acc 65.625 (64.614)	Top-5 acc 84.766 (84.438)	lr 0.01016
Train [68][640/3239]	Time 0.230 (0.686)	Data Time 0.001 (0.063)	Loss 2.5140 (2.4668)	Entropy 1.03156 (1.03261)	Top-1 acc 66.406 (64.603)	Top-5 acc 82.812 (84.409)	lr 0.01016
Train [68][650/3239]	Time 0.211 (0.683)	Data Time 0.001 (0.062)	Loss 2.3051 (2.4676)	Entropy 1.03152 (1.03260)	Top-1 acc 67.578 (64.574)	Top-5 acc 84.766 (84.392)	lr 0.01016
Train [68][660/3239]	Time 0.220 (0.679)	Data Time 0.001 (0.062)	Loss 2.5172 (2.4681)	Entropy 1.03143 (1.03258)	Top-1 acc 62.891 (64.555)	Top-5 acc 82.422 (84.377)	lr 0.01016
Train [68][670/3239]	Time 2.482 (0.676)	Data Time 0.001 (0.061)	Loss 2.5870 (2.4679)	Entropy 1.03143 (1.03256)	Top-1 acc 60.156 (64.557)	Top-5 acc 84.766 (84.394)	lr 0.01016
Train [68][680/3239]	Time 0.235 (0.670)	Data Time 0.001 (0.060)	Loss 2.4332 (2.4678)	Entropy 1.03137 (1.03254)	Top-1 acc 64.062 (64.559)	Top-5 acc 85.938 (84.390)	lr 0.01015
Train [68][690/3239]	Time 0.227 (0.667)	Data Time 0.001 (0.059)	Loss 2.5497 (2.4704)	Entropy 1.03140 (1.03253)	Top-1 acc 63.281 (64.492)	Top-5 acc 82.422 (84.338)	lr 0.01015
Train [68][700/3239]	Time 0.342 (0.664)	Data Time 0.001 (0.058)	Loss 2.5985 (2.4706)	Entropy 1.03135 (1.03251)	Top-1 acc 62.109 (64.480)	Top-5 acc 80.859 (84.334)	lr 0.01015
Train [68][710/3239]	Time 0.224 (0.661)	Data Time 0.001 (0.057)	Loss 2.5089 (2.4706)	Entropy 1.03125 (1.03249)	Top-1 acc 64.844 (64.484)	Top-5 acc 83.984 (84.340)	lr 0.01015
Train [68][720/3239]	Time 0.244 (0.658)	Data Time 0.001 (0.057)	Loss 2.5678 (2.4709)	Entropy 1.03122 (1.03248)	Top-1 acc 58.594 (64.478)	Top-5 acc 82.422 (84.334)	lr 0.01015
Train [68][730/3239]	Time 0.223 (0.655)	Data Time 0.001 (0.056)	Loss 2.2629 (2.4707)	Entropy 1.03118 (1.03246)	Top-1 acc 70.312 (64.481)	Top-5 acc 88.281 (84.340)	lr 0.01015
Train [68][740/3239]	Time 0.234 (0.728)	Data Time 0.002 (0.055)	Loss 2.2766 (2.4703)	Entropy 1.03111 (1.03244)	Top-1 acc 66.406 (64.494)	Top-5 acc 90.234 (84.345)	lr 0.01015
Train [68][750/3239]	Time 0.228 (0.726)	Data Time 0.002 (0.054)	Loss 2.3230 (2.4707)	Entropy 1.03110 (1.03242)	Top-1 acc 66.797 (64.483)	Top-5 acc 89.062 (84.337)	lr 0.01015
Train [68][760/3239]	Time 0.225 (0.722)	Data Time 0.001 (0.054)	Loss 2.4055 (2.4700)	Entropy 1.03104 (1.03241)	Top-1 acc 67.188 (64.489)	Top-5 acc 85.938 (84.359)	lr 0.01015
Train [68][770/3239]	Time 0.217 (0.719)	Data Time 0.001 (0.053)	Loss 2.4064 (2.4696)	Entropy 1.03096 (1.03239)	Top-1 acc 65.234 (64.497)	Top-5 acc 85.938 (84.371)	lr 0.01015
Train [68][780/3239]	Time 2.515 (0.716)	Data Time 0.001 (0.052)	Loss 2.6009 (2.4700)	Entropy 1.03096 (1.03237)	Top-1 acc 62.891 (64.489)	Top-5 acc 80.469 (84.360)	lr 0.01014
Train [68][790/3239]	Time 0.337 (0.710)	Data Time 0.001 (0.052)	Loss 2.5703 (2.4712)	Entropy 1.03089 (1.03235)	Top-1 acc 62.109 (64.462)	Top-5 acc 81.641 (84.339)	lr 0.01014
Train [68][800/3239]	Time 0.224 (0.707)	Data Time 0.001 (0.051)	Loss 2.4680 (2.4715)	Entropy 1.03086 (1.03233)	Top-1 acc 63.281 (64.454)	Top-5 acc 85.938 (84.337)	lr 0.01014
Train [68][810/3239]	Time 0.241 (0.704)	Data Time 0.001 (0.050)	Loss 2.4731 (2.4714)	Entropy 1.03086 (1.03231)	Top-1 acc 68.750 (64.449)	Top-5 acc 82.422 (84.337)	lr 0.01014
Train [68][820/3239]	Time 0.222 (0.701)	Data Time 0.001 (0.050)	Loss 2.5607 (2.4719)	Entropy 1.03082 (1.03230)	Top-1 acc 58.203 (64.437)	Top-5 acc 83.594 (84.324)	lr 0.01014
Train [68][830/3239]	Time 0.203 (0.698)	Data Time 0.001 (0.049)	Loss 2.7868 (2.4727)	Entropy 1.03078 (1.03228)	Top-1 acc 58.984 (64.428)	Top-5 acc 76.953 (84.312)	lr 0.01014
Train [68][840/3239]	Time 0.259 (0.696)	Data Time 0.001 (0.049)	Loss 2.4119 (2.4721)	Entropy 1.03079 (1.03226)	Top-1 acc 66.016 (64.455)	Top-5 acc 85.156 (84.331)	lr 0.01014
Train [68][850/3239]	Time 0.229 (0.693)	Data Time 0.001 (0.048)	Loss 2.3789 (2.4724)	Entropy 1.03082 (1.03224)	Top-1 acc 71.094 (64.454)	Top-5 acc 85.547 (84.325)	lr 0.01014
Train [68][860/3239]	Time 0.209 (0.691)	Data Time 0.001 (0.048)	Loss 2.4727 (2.4725)	Entropy 1.03080 (1.03223)	Top-1 acc 66.406 (64.449)	Top-5 acc 83.984 (84.316)	lr 0.01014
Train [68][870/3239]	Time 0.232 (0.688)	Data Time 0.001 (0.047)	Loss 2.3021 (2.4727)	Entropy 1.03080 (1.03221)	Top-1 acc 70.703 (64.441)	Top-5 acc 86.719 (84.316)	lr 0.01014
Train [68][880/3239]	Time 0.311 (0.686)	Data Time 0.001 (0.047)	Loss 2.6274 (2.4734)	Entropy 1.03084 (1.03219)	Top-1 acc 60.938 (64.428)	Top-5 acc 81.641 (84.300)	lr 0.01013
Train [68][890/3239]	Time 2.499 (0.683)	Data Time 0.001 (0.046)	Loss 2.3213 (2.4733)	Entropy 1.03084 (1.03218)	Top-1 acc 71.875 (64.443)	Top-5 acc 85.156 (84.294)	lr 0.01013
Train [68][900/3239]	Time 0.246 (0.678)	Data Time 0.001 (0.046)	Loss 2.4672 (2.4731)	Entropy 1.03085 (1.03216)	Top-1 acc 67.188 (64.460)	Top-5 acc 83.203 (84.284)	lr 0.01013
Train [68][910/3239]	Time 0.241 (0.676)	Data Time 0.001 (0.045)	Loss 2.4891 (2.4733)	Entropy 1.03085 (1.03215)	Top-1 acc 62.500 (64.449)	Top-5 acc 82.812 (84.279)	lr 0.01013
Train [68][920/3239]	Time 0.244 (0.674)	Data Time 0.001 (0.045)	Loss 2.3449 (2.4728)	Entropy 1.03079 (1.03214)	Top-1 acc 67.578 (64.456)	Top-5 acc 85.547 (84.289)	lr 0.01013
Train [68][930/3239]	Time 0.217 (0.671)	Data Time 0.001 (0.044)	Loss 2.6072 (2.4731)	Entropy 1.03079 (1.03212)	Top-1 acc 62.109 (64.450)	Top-5 acc 82.422 (84.286)	lr 0.01013
Train [68][940/3239]	Time 0.234 (0.670)	Data Time 0.001 (0.044)	Loss 2.4461 (2.4727)	Entropy 1.03069 (1.03211)	Top-1 acc 65.234 (64.468)	Top-5 acc 87.500 (84.297)	lr 0.01013
Train [68][950/3239]	Time 0.226 (0.668)	Data Time 0.001 (0.043)	Loss 2.3910 (2.4730)	Entropy 1.03060 (1.03209)	Top-1 acc 65.625 (64.456)	Top-5 acc 84.766 (84.281)	lr 0.01013
Train [68][960/3239]	Time 0.223 (0.665)	Data Time 0.001 (0.043)	Loss 2.3671 (2.4725)	Entropy 1.03058 (1.03208)	Top-1 acc 67.969 (64.464)	Top-5 acc 84.766 (84.287)	lr 0.01013
Train [68][970/3239]	Time 0.313 (0.664)	Data Time 0.001 (0.042)	Loss 2.5170 (2.4725)	Entropy 1.03056 (1.03206)	Top-1 acc 63.672 (64.462)	Top-5 acc 84.766 (84.290)	lr 0.01013
Train [68][980/3239]	Time 0.243 (0.662)	Data Time 0.001 (0.042)	Loss 2.3493 (2.4727)	Entropy 1.03055 (1.03204)	Top-1 acc 67.578 (64.454)	Top-5 acc 87.500 (84.289)	lr 0.01012
Train [68][990/3239]	Time 0.238 (0.660)	Data Time 0.001 (0.042)	Loss 2.2965 (2.4727)	Entropy 1.03049 (1.03203)	Top-1 acc 68.750 (64.463)	Top-5 acc 85.547 (84.289)	lr 0.01012
Train [68][1000/3239]	Time 2.488 (0.658)	Data Time 0.001 (0.041)	Loss 2.5686 (2.4730)	Entropy 1.03049 (1.03201)	Top-1 acc 61.719 (64.459)	Top-5 acc 82.031 (84.279)	lr 0.01012
Train [68][1010/3239]	Time 0.215 (0.654)	Data Time 0.001 (0.041)	Loss 2.4357 (2.4732)	Entropy 1.03045 (1.03200)	Top-1 acc 65.234 (64.449)	Top-5 acc 86.328 (84.285)	lr 0.01012
Train [68][1020/3239]	Time 0.224 (0.652)	Data Time 0.001 (0.040)	Loss 2.4984 (2.4732)	Entropy 1.03060 (1.03198)	Top-1 acc 60.547 (64.445)	Top-5 acc 84.766 (84.285)	lr 0.01012
Train [68][1030/3239]	Time 0.243 (0.651)	Data Time 0.001 (0.040)	Loss 2.4613 (2.4735)	Entropy 1.03060 (1.03197)	Top-1 acc 65.234 (64.445)	Top-5 acc 83.203 (84.279)	lr 0.01012
Train [68][1040/3239]	Time 0.235 (0.649)	Data Time 0.001 (0.040)	Loss 2.4794 (2.4733)	Entropy 1.03060 (1.03196)	Top-1 acc 64.062 (64.455)	Top-5 acc 84.766 (84.281)	lr 0.01012
Train [68][1050/3239]	Time 0.220 (0.647)	Data Time 0.003 (0.039)	Loss 2.5010 (2.4731)	Entropy 1.03064 (1.03195)	Top-1 acc 63.281 (64.457)	Top-5 acc 82.031 (84.287)	lr 0.01012
Train [68][1060/3239]	Time 0.227 (0.646)	Data Time 0.001 (0.039)	Loss 2.4444 (2.4736)	Entropy 1.03064 (1.03193)	Top-1 acc 66.016 (64.444)	Top-5 acc 85.938 (84.277)	lr 0.01012
Train [68][1070/3239]	Time 0.206 (0.644)	Data Time 0.001 (0.039)	Loss 2.5257 (2.4735)	Entropy 1.03053 (1.03192)	Top-1 acc 62.109 (64.444)	Top-5 acc 84.375 (84.271)	lr 0.01012
Train [68][1080/3239]	Time 0.230 (0.642)	Data Time 0.001 (0.038)	Loss 2.4267 (2.4735)	Entropy 1.03048 (1.03191)	Top-1 acc 68.750 (64.450)	Top-5 acc 83.984 (84.269)	lr 0.01011
Train [68][1090/3239]	Time 0.213 (0.641)	Data Time 0.001 (0.038)	Loss 2.4809 (2.4733)	Entropy 1.03046 (1.03189)	Top-1 acc 62.891 (64.449)	Top-5 acc 86.719 (84.275)	lr 0.01011
Train [68][1100/3239]	Time 0.457 (0.689)	Data Time 0.004 (0.038)	Loss 2.3428 (2.4728)	Entropy 1.03039 (1.03188)	Top-1 acc 66.406 (64.455)	Top-5 acc 85.547 (84.285)	lr 0.01011
Train [68][1110/3239]	Time 2.634 (0.687)	Data Time 0.002 (0.037)	Loss 2.3306 (2.4730)	Entropy 1.03039 (1.03187)	Top-1 acc 64.844 (64.440)	Top-5 acc 85.938 (84.278)	lr 0.01011
Train [68][1120/3239]	Time 0.243 (0.683)	Data Time 0.002 (0.037)	Loss 2.6213 (2.4733)	Entropy 1.03036 (1.03185)	Top-1 acc 61.719 (64.431)	Top-5 acc 80.859 (84.271)	lr 0.01011
Train [68][1130/3239]	Time 0.233 (0.681)	Data Time 0.001 (0.037)	Loss 2.4927 (2.4733)	Entropy 1.03036 (1.03184)	Top-1 acc 64.453 (64.419)	Top-5 acc 82.422 (84.265)	lr 0.01011
Train [68][1140/3239]	Time 0.222 (0.679)	Data Time 0.001 (0.036)	Loss 2.3983 (2.4733)	Entropy 1.03036 (1.03183)	Top-1 acc 69.531 (64.426)	Top-5 acc 86.328 (84.267)	lr 0.01011
Train [68][1150/3239]	Time 0.224 (0.678)	Data Time 0.002 (0.036)	Loss 2.2367 (2.4729)	Entropy 1.03039 (1.03182)	Top-1 acc 69.531 (64.443)	Top-5 acc 89.062 (84.271)	lr 0.01011
Train [68][1160/3239]	Time 0.220 (0.676)	Data Time 0.001 (0.036)	Loss 2.5319 (2.4730)	Entropy 1.03038 (1.03180)	Top-1 acc 66.016 (64.448)	Top-5 acc 82.812 (84.268)	lr 0.01011
Train [68][1170/3239]	Time 0.214 (0.674)	Data Time 0.001 (0.035)	Loss 2.5450 (2.4729)	Entropy 1.03038 (1.03179)	Top-1 acc 62.891 (64.455)	Top-5 acc 81.641 (84.266)	lr 0.01011
Train [68][1180/3239]	Time 0.218 (0.672)	Data Time 0.001 (0.035)	Loss 2.6049 (2.4733)	Entropy 1.03036 (1.03178)	Top-1 acc 61.328 (64.449)	Top-5 acc 84.375 (84.255)	lr 0.01010
Train [68][1190/3239]	Time 0.232 (0.671)	Data Time 0.001 (0.035)	Loss 2.4284 (2.4739)	Entropy 1.03034 (1.03177)	Top-1 acc 62.109 (64.427)	Top-5 acc 86.328 (84.242)	lr 0.01010
Train [68][1200/3239]	Time 0.217 (0.669)	Data Time 0.001 (0.035)	Loss 2.6548 (2.4742)	Entropy 1.03027 (1.03176)	Top-1 acc 60.156 (64.431)	Top-5 acc 82.422 (84.234)	lr 0.01010
Train [68][1210/3239]	Time 0.220 (0.667)	Data Time 0.001 (0.034)	Loss 2.5427 (2.4746)	Entropy 1.03018 (1.03174)	Top-1 acc 63.672 (64.418)	Top-5 acc 81.641 (84.229)	lr 0.01010
Train [68][1220/3239]	Time 2.381 (0.666)	Data Time 0.001 (0.034)	Loss 2.4100 (2.4745)	Entropy 1.03018 (1.03173)	Top-1 acc 65.234 (64.418)	Top-5 acc 86.328 (84.232)	lr 0.01010
Train [68][1230/3239]	Time 0.268 (0.662)	Data Time 0.001 (0.034)	Loss 2.5282 (2.4745)	Entropy 1.03009 (1.03172)	Top-1 acc 62.109 (64.411)	Top-5 acc 85.938 (84.231)	lr 0.01010
Train [68][1240/3239]	Time 0.226 (0.661)	Data Time 0.001 (0.034)	Loss 2.5382 (2.4749)	Entropy 1.03012 (1.03170)	Top-1 acc 62.500 (64.396)	Top-5 acc 83.203 (84.222)	lr 0.01010
Train [68][1250/3239]	Time 0.232 (0.659)	Data Time 0.001 (0.033)	Loss 2.3991 (2.4753)	Entropy 1.03011 (1.03169)	Top-1 acc 66.406 (64.388)	Top-5 acc 89.062 (84.219)	lr 0.01010
Train [68][1260/3239]	Time 0.223 (0.657)	Data Time 0.001 (0.033)	Loss 2.4705 (2.4752)	Entropy 1.03009 (1.03168)	Top-1 acc 62.500 (64.391)	Top-5 acc 85.156 (84.225)	lr 0.01010
Train [68][1270/3239]	Time 0.228 (0.656)	Data Time 0.001 (0.033)	Loss 2.4332 (2.4754)	Entropy 1.03005 (1.03167)	Top-1 acc 64.453 (64.391)	Top-5 acc 82.812 (84.218)	lr 0.01010
Train [68][1280/3239]	Time 0.351 (0.654)	Data Time 0.001 (0.033)	Loss 2.4729 (2.4756)	Entropy 1.02996 (1.03165)	Top-1 acc 66.016 (64.385)	Top-5 acc 83.984 (84.212)	lr 0.01010
Train [68][1290/3239]	Time 0.221 (0.653)	Data Time 0.001 (0.032)	Loss 2.3929 (2.4755)	Entropy 1.03003 (1.03164)	Top-1 acc 69.922 (64.397)	Top-5 acc 85.938 (84.213)	lr 0.01009
Train [68][1300/3239]	Time 0.216 (0.652)	Data Time 0.001 (0.032)	Loss 2.4840 (2.4757)	Entropy 1.03003 (1.03163)	Top-1 acc 63.672 (64.396)	Top-5 acc 85.156 (84.218)	lr 0.01009
Train [68][1310/3239]	Time 0.229 (0.650)	Data Time 0.001 (0.032)	Loss 2.6908 (2.4759)	Entropy 1.03003 (1.03162)	Top-1 acc 60.156 (64.395)	Top-5 acc 80.078 (84.211)	lr 0.01009
Train [68][1320/3239]	Time 0.251 (0.649)	Data Time 0.001 (0.032)	Loss 2.5509 (2.4763)	Entropy 1.03006 (1.03160)	Top-1 acc 64.453 (64.389)	Top-5 acc 83.594 (84.200)	lr 0.01009
Train [68][1330/3239]	Time 2.550 (0.648)	Data Time 0.001 (0.031)	Loss 2.4886 (2.4766)	Entropy 1.03006 (1.03159)	Top-1 acc 64.844 (64.379)	Top-5 acc 85.156 (84.193)	lr 0.01009
Train [68][1340/3239]	Time 0.219 (0.645)	Data Time 0.001 (0.031)	Loss 2.5350 (2.4767)	Entropy 1.03004 (1.03158)	Top-1 acc 64.844 (64.379)	Top-5 acc 82.031 (84.188)	lr 0.01009
Train [68][1350/3239]	Time 0.273 (0.643)	Data Time 0.001 (0.031)	Loss 2.4567 (2.4770)	Entropy 1.03003 (1.03157)	Top-1 acc 66.797 (64.379)	Top-5 acc 83.594 (84.183)	lr 0.01009
Train [68][1360/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.031)	Loss 2.4717 (2.4768)	Entropy 1.03002 (1.03156)	Top-1 acc 63.281 (64.379)	Top-5 acc 82.812 (84.180)	lr 0.01009
Train [68][1370/3239]	Time 0.318 (0.641)	Data Time 0.001 (0.030)	Loss 2.3445 (2.4766)	Entropy 1.02999 (1.03155)	Top-1 acc 67.188 (64.392)	Top-5 acc 85.938 (84.184)	lr 0.01009
Train [68][1380/3239]	Time 0.220 (0.639)	Data Time 0.001 (0.030)	Loss 2.5926 (2.4767)	Entropy 1.02990 (1.03153)	Top-1 acc 58.594 (64.384)	Top-5 acc 82.031 (84.184)	lr 0.01009
Train [68][1390/3239]	Time 0.231 (0.638)	Data Time 0.001 (0.030)	Loss 2.4502 (2.4771)	Entropy 1.02988 (1.03152)	Top-1 acc 69.531 (64.380)	Top-5 acc 84.766 (84.173)	lr 0.01008
Train [68][1400/3239]	Time 0.233 (0.637)	Data Time 0.001 (0.030)	Loss 2.4735 (2.4772)	Entropy 1.02988 (1.03151)	Top-1 acc 64.062 (64.376)	Top-5 acc 82.812 (84.169)	lr 0.01008
Train [68][1410/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.030)	Loss 2.6234 (2.4776)	Entropy 1.02987 (1.03150)	Top-1 acc 63.281 (64.371)	Top-5 acc 81.641 (84.161)	lr 0.01008
Train [68][1420/3239]	Time 0.239 (0.635)	Data Time 0.001 (0.029)	Loss 2.5516 (2.4775)	Entropy 1.02986 (1.03149)	Top-1 acc 63.672 (64.378)	Top-5 acc 81.641 (84.164)	lr 0.01008
Train [68][1430/3239]	Time 0.263 (0.634)	Data Time 0.001 (0.029)	Loss 2.4323 (2.4774)	Entropy 1.02997 (1.03148)	Top-1 acc 71.484 (64.382)	Top-5 acc 84.375 (84.161)	lr 0.01008
Train [68][1440/3239]	Time 2.454 (0.633)	Data Time 0.001 (0.029)	Loss 2.4695 (2.4774)	Entropy 1.02997 (1.03147)	Top-1 acc 61.719 (64.379)	Top-5 acc 83.984 (84.160)	lr 0.01008
Train [68][1450/3239]	Time 0.215 (0.630)	Data Time 0.001 (0.029)	Loss 2.5036 (2.4775)	Entropy 1.02995 (1.03146)	Top-1 acc 65.234 (64.377)	Top-5 acc 83.594 (84.155)	lr 0.01008
Train [68][1460/3239]	Time 0.345 (0.629)	Data Time 0.001 (0.029)	Loss 2.4677 (2.4774)	Entropy 1.02988 (1.03144)	Top-1 acc 60.938 (64.376)	Top-5 acc 85.547 (84.157)	lr 0.01008
Train [68][1470/3239]	Time 0.312 (0.664)	Data Time 0.004 (0.029)	Loss 2.4574 (2.4776)	Entropy 1.02991 (1.03143)	Top-1 acc 64.844 (64.374)	Top-5 acc 82.812 (84.148)	lr 0.01008
Train [68][1480/3239]	Time 0.239 (0.663)	Data Time 0.002 (0.028)	Loss 2.4774 (2.4777)	Entropy 1.02993 (1.03142)	Top-1 acc 67.969 (64.371)	Top-5 acc 85.156 (84.148)	lr 0.01008
Train [68][1490/3239]	Time 0.246 (0.662)	Data Time 0.001 (0.028)	Loss 2.4813 (2.4781)	Entropy 1.02993 (1.03141)	Top-1 acc 66.016 (64.359)	Top-5 acc 83.594 (84.146)	lr 0.01007
Train [68][1500/3239]	Time 0.273 (0.660)	Data Time 0.001 (0.028)	Loss 2.4101 (2.4779)	Entropy 1.02989 (1.03140)	Top-1 acc 66.797 (64.368)	Top-5 acc 84.766 (84.146)	lr 0.01007
Train [68][1510/3239]	Time 0.227 (0.659)	Data Time 0.001 (0.028)	Loss 2.5012 (2.4779)	Entropy 1.02982 (1.03139)	Top-1 acc 62.500 (64.370)	Top-5 acc 85.938 (84.144)	lr 0.01007
Train [68][1520/3239]	Time 0.221 (0.658)	Data Time 0.001 (0.028)	Loss 2.5634 (2.4781)	Entropy 1.02983 (1.03138)	Top-1 acc 63.281 (64.369)	Top-5 acc 85.156 (84.140)	lr 0.01007
Train [68][1530/3239]	Time 0.239 (0.657)	Data Time 0.001 (0.027)	Loss 2.3805 (2.4781)	Entropy 1.02977 (1.03137)	Top-1 acc 69.531 (64.371)	Top-5 acc 85.156 (84.136)	lr 0.01007
Train [68][1540/3239]	Time 0.247 (0.655)	Data Time 0.001 (0.027)	Loss 2.6265 (2.4783)	Entropy 1.02972 (1.03136)	Top-1 acc 60.156 (64.361)	Top-5 acc 79.688 (84.130)	lr 0.01007
Train [68][1550/3239]	Time 2.619 (0.654)	Data Time 0.001 (0.027)	Loss 2.5473 (2.4782)	Entropy 1.02972 (1.03135)	Top-1 acc 60.938 (64.365)	Top-5 acc 83.594 (84.127)	lr 0.01007
Train [68][1560/3239]	Time 0.240 (0.652)	Data Time 0.001 (0.027)	Loss 2.5111 (2.4784)	Entropy 1.02957 (1.03134)	Top-1 acc 62.891 (64.367)	Top-5 acc 85.547 (84.118)	lr 0.01007
Train [68][1570/3239]	Time 0.218 (0.650)	Data Time 0.001 (0.027)	Loss 2.5215 (2.4782)	Entropy 1.02956 (1.03133)	Top-1 acc 62.500 (64.373)	Top-5 acc 83.984 (84.123)	lr 0.01007
Train [68][1580/3239]	Time 0.229 (0.649)	Data Time 0.001 (0.027)	Loss 2.6373 (2.4784)	Entropy 1.02952 (1.03132)	Top-1 acc 61.719 (64.369)	Top-5 acc 80.078 (84.118)	lr 0.01007
Train [68][1590/3239]	Time 0.327 (0.648)	Data Time 0.001 (0.027)	Loss 2.5237 (2.4785)	Entropy 1.02956 (1.03131)	Top-1 acc 64.453 (64.366)	Top-5 acc 82.031 (84.108)	lr 0.01006
Train [68][1600/3239]	Time 0.230 (0.647)	Data Time 0.001 (0.026)	Loss 2.5796 (2.4784)	Entropy 1.02950 (1.03130)	Top-1 acc 61.719 (64.375)	Top-5 acc 84.375 (84.109)	lr 0.01006
Train [68][1610/3239]	Time 0.226 (0.646)	Data Time 0.001 (0.026)	Loss 2.2707 (2.4784)	Entropy 1.02947 (1.03128)	Top-1 acc 72.266 (64.380)	Top-5 acc 84.766 (84.106)	lr 0.01006
Train [68][1620/3239]	Time 0.219 (0.645)	Data Time 0.001 (0.026)	Loss 2.5096 (2.4783)	Entropy 1.02941 (1.03127)	Top-1 acc 64.062 (64.376)	Top-5 acc 83.594 (84.112)	lr 0.01006
Train [68][1630/3239]	Time 0.227 (0.644)	Data Time 0.001 (0.026)	Loss 2.7047 (2.4782)	Entropy 1.02942 (1.03126)	Top-1 acc 59.375 (64.386)	Top-5 acc 80.469 (84.118)	lr 0.01006
Train [68][1640/3239]	Time 0.246 (0.643)	Data Time 0.001 (0.026)	Loss 2.5740 (2.4780)	Entropy 1.02938 (1.03125)	Top-1 acc 59.766 (64.391)	Top-5 acc 82.422 (84.119)	lr 0.01006
Train [68][1650/3239]	Time 0.220 (0.642)	Data Time 0.001 (0.026)	Loss 2.5004 (2.4779)	Entropy 1.02936 (1.03124)	Top-1 acc 63.281 (64.388)	Top-5 acc 82.031 (84.120)	lr 0.01006
Train [68][1660/3239]	Time 2.673 (0.641)	Data Time 0.001 (0.025)	Loss 2.8707 (2.4780)	Entropy 1.02936 (1.03123)	Top-1 acc 55.859 (64.385)	Top-5 acc 76.562 (84.115)	lr 0.01006
Train [68][1670/3239]	Time 0.219 (0.639)	Data Time 0.001 (0.025)	Loss 2.3072 (2.4778)	Entropy 1.02932 (1.03122)	Top-1 acc 69.531 (64.383)	Top-5 acc 85.938 (84.117)	lr 0.01006
Train [68][1680/3239]	Time 0.324 (0.638)	Data Time 0.001 (0.025)	Loss 2.4850 (2.4781)	Entropy 1.02914 (1.03120)	Top-1 acc 64.844 (64.379)	Top-5 acc 84.375 (84.110)	lr 0.01006
Train [68][1690/3239]	Time 0.250 (0.637)	Data Time 0.001 (0.025)	Loss 2.7263 (2.4780)	Entropy 1.02897 (1.03119)	Top-1 acc 59.375 (64.374)	Top-5 acc 78.516 (84.111)	lr 0.01005
Train [68][1700/3239]	Time 0.229 (0.636)	Data Time 0.001 (0.025)	Loss 2.4588 (2.4780)	Entropy 1.02893 (1.03118)	Top-1 acc 64.062 (64.375)	Top-5 acc 84.766 (84.113)	lr 0.01005
Train [68][1710/3239]	Time 0.220 (0.635)	Data Time 0.001 (0.025)	Loss 2.6565 (2.4780)	Entropy 1.02890 (1.03116)	Top-1 acc 59.375 (64.376)	Top-5 acc 79.688 (84.111)	lr 0.01005
Train [68][1720/3239]	Time 0.161 (0.634)	Data Time 0.001 (0.025)	Loss 2.4165 (2.4783)	Entropy 1.02889 (1.03115)	Top-1 acc 65.625 (64.371)	Top-5 acc 83.984 (84.103)	lr 0.01005
Train [68][1730/3239]	Time 0.337 (0.633)	Data Time 0.001 (0.025)	Loss 2.3829 (2.4781)	Entropy 1.02884 (1.03114)	Top-1 acc 66.797 (64.375)	Top-5 acc 84.375 (84.106)	lr 0.01005
Train [68][1740/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.024)	Loss 2.3312 (2.4782)	Entropy 1.02878 (1.03113)	Top-1 acc 71.875 (64.373)	Top-5 acc 83.594 (84.101)	lr 0.01005
Train [68][1750/3239]	Time 0.218 (0.631)	Data Time 0.001 (0.024)	Loss 2.4676 (2.4782)	Entropy 1.02878 (1.03111)	Top-1 acc 64.062 (64.371)	Top-5 acc 83.984 (84.099)	lr 0.01005
Train [68][1760/3239]	Time 0.253 (0.630)	Data Time 0.001 (0.024)	Loss 2.5531 (2.4780)	Entropy 1.02876 (1.03110)	Top-1 acc 61.328 (64.377)	Top-5 acc 84.375 (84.104)	lr 0.01005
Train [68][1770/3239]	Time 2.615 (0.629)	Data Time 0.001 (0.024)	Loss 2.4599 (2.4780)	Entropy 1.02876 (1.03109)	Top-1 acc 66.406 (64.376)	Top-5 acc 83.984 (84.104)	lr 0.01005
Train [68][1780/3239]	Time 0.219 (0.627)	Data Time 0.001 (0.024)	Loss 2.4226 (2.4779)	Entropy 1.02870 (1.03107)	Top-1 acc 68.359 (64.384)	Top-5 acc 84.766 (84.107)	lr 0.01005
Train [68][1790/3239]	Time 0.226 (0.626)	Data Time 0.001 (0.024)	Loss 2.4644 (2.4781)	Entropy 1.02867 (1.03106)	Top-1 acc 60.156 (64.379)	Top-5 acc 86.328 (84.106)	lr 0.01004
Train [68][1800/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.024)	Loss 2.4693 (2.4781)	Entropy 1.02868 (1.03105)	Top-1 acc 64.062 (64.380)	Top-5 acc 83.984 (84.103)	lr 0.01004
Train [68][1810/3239]	Time 0.242 (0.625)	Data Time 0.001 (0.023)	Loss 2.4115 (2.4783)	Entropy 1.02864 (1.03103)	Top-1 acc 67.188 (64.380)	Top-5 acc 83.203 (84.093)	lr 0.01004
Train [68][1820/3239]	Time 0.333 (0.624)	Data Time 0.001 (0.023)	Loss 2.5360 (2.4783)	Entropy 1.02864 (1.03102)	Top-1 acc 62.891 (64.378)	Top-5 acc 84.766 (84.092)	lr 0.01004
Train [68][1830/3239]	Time 0.216 (0.653)	Data Time 0.002 (0.023)	Loss 2.4297 (2.4781)	Entropy 1.02864 (1.03101)	Top-1 acc 66.406 (64.380)	Top-5 acc 85.938 (84.094)	lr 0.01004
Train [68][1840/3239]	Time 0.239 (0.653)	Data Time 0.002 (0.023)	Loss 2.3716 (2.4781)	Entropy 1.02860 (1.03099)	Top-1 acc 66.797 (64.381)	Top-5 acc 85.547 (84.094)	lr 0.01004
Train [68][1850/3239]	Time 0.219 (0.652)	Data Time 0.002 (0.023)	Loss 2.6491 (2.4782)	Entropy 1.02872 (1.03098)	Top-1 acc 59.375 (64.379)	Top-5 acc 80.078 (84.091)	lr 0.01004
Train [68][1860/3239]	Time 0.361 (0.651)	Data Time 0.001 (0.023)	Loss 2.5505 (2.4781)	Entropy 1.02871 (1.03097)	Top-1 acc 64.062 (64.383)	Top-5 acc 82.422 (84.091)	lr 0.01004
Train [68][1870/3239]	Time 0.296 (0.650)	Data Time 0.002 (0.023)	Loss 2.5373 (2.4785)	Entropy 1.02857 (1.03096)	Top-1 acc 66.016 (64.378)	Top-5 acc 80.859 (84.083)	lr 0.01004
Train [68][1880/3239]	Time 2.448 (0.649)	Data Time 0.006 (0.023)	Loss 2.6266 (2.4786)	Entropy 1.02857 (1.03094)	Top-1 acc 60.938 (64.374)	Top-5 acc 83.203 (84.085)	lr 0.01004
Train [68][1890/3239]	Time 0.236 (0.647)	Data Time 0.001 (0.023)	Loss 2.6668 (2.4788)	Entropy 1.02861 (1.03093)	Top-1 acc 58.984 (64.369)	Top-5 acc 83.594 (84.082)	lr 0.01003
Train [68][1900/3239]	Time 0.277 (0.646)	Data Time 0.001 (0.022)	Loss 2.5021 (2.4789)	Entropy 1.02865 (1.03092)	Top-1 acc 62.500 (64.368)	Top-5 acc 83.203 (84.079)	lr 0.01003
Train [68][1910/3239]	Time 0.392 (0.645)	Data Time 0.002 (0.022)	Loss 2.4786 (2.4789)	Entropy 1.02859 (1.03091)	Top-1 acc 64.062 (64.364)	Top-5 acc 85.156 (84.077)	lr 0.01003
Train [68][1920/3239]	Time 0.228 (0.644)	Data Time 0.001 (0.022)	Loss 2.5285 (2.4790)	Entropy 1.02862 (1.03089)	Top-1 acc 64.453 (64.358)	Top-5 acc 83.594 (84.075)	lr 0.01003
Train [68][1930/3239]	Time 0.227 (0.643)	Data Time 0.001 (0.022)	Loss 2.4470 (2.4786)	Entropy 1.02855 (1.03088)	Top-1 acc 67.188 (64.370)	Top-5 acc 84.375 (84.083)	lr 0.01003
Train [68][1940/3239]	Time 0.228 (0.642)	Data Time 0.001 (0.022)	Loss 2.2818 (2.4781)	Entropy 1.02848 (1.03087)	Top-1 acc 70.312 (64.386)	Top-5 acc 89.062 (84.090)	lr 0.01003
Train [68][1950/3239]	Time 0.220 (0.641)	Data Time 0.002 (0.022)	Loss 2.7420 (2.4782)	Entropy 1.02846 (1.03086)	Top-1 acc 59.375 (64.388)	Top-5 acc 75.000 (84.086)	lr 0.01003
Train [68][1960/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.022)	Loss 2.4566 (2.4781)	Entropy 1.02847 (1.03085)	Top-1 acc 64.062 (64.386)	Top-5 acc 82.812 (84.087)	lr 0.01003
Train [68][1970/3239]	Time 0.210 (0.640)	Data Time 0.001 (0.022)	Loss 2.4227 (2.4781)	Entropy 1.02846 (1.03083)	Top-1 acc 64.844 (64.392)	Top-5 acc 83.984 (84.087)	lr 0.01003
Train [68][1980/3239]	Time 0.285 (0.639)	Data Time 0.001 (0.022)	Loss 2.4018 (2.4783)	Entropy 1.02838 (1.03082)	Top-1 acc 67.188 (64.392)	Top-5 acc 85.156 (84.083)	lr 0.01003
Train [68][1990/3239]	Time 2.601 (0.638)	Data Time 0.001 (0.022)	Loss 2.4242 (2.4784)	Entropy 1.02838 (1.03081)	Top-1 acc 64.844 (64.395)	Top-5 acc 86.328 (84.084)	lr 0.01002
Train [68][2000/3239]	Time 0.214 (0.636)	Data Time 0.001 (0.021)	Loss 2.6041 (2.4784)	Entropy 1.02836 (1.03080)	Top-1 acc 57.422 (64.393)	Top-5 acc 83.203 (84.088)	lr 0.01002
Train [68][2010/3239]	Time 0.259 (0.635)	Data Time 0.001 (0.021)	Loss 2.2543 (2.4785)	Entropy 1.02840 (1.03079)	Top-1 acc 72.266 (64.392)	Top-5 acc 89.453 (84.089)	lr 0.01002
Train [68][2020/3239]	Time 0.218 (0.635)	Data Time 0.001 (0.021)	Loss 2.3024 (2.4786)	Entropy 1.02839 (1.03077)	Top-1 acc 68.359 (64.396)	Top-5 acc 87.109 (84.085)	lr 0.01002
Train [68][2030/3239]	Time 0.214 (0.634)	Data Time 0.001 (0.021)	Loss 2.4888 (2.4789)	Entropy 1.02842 (1.03076)	Top-1 acc 64.844 (64.390)	Top-5 acc 84.766 (84.080)	lr 0.01002
Train [68][2040/3239]	Time 0.224 (0.633)	Data Time 0.001 (0.021)	Loss 2.4307 (2.4790)	Entropy 1.02834 (1.03075)	Top-1 acc 65.625 (64.389)	Top-5 acc 84.766 (84.076)	lr 0.01002
Train [68][2050/3239]	Time 0.337 (0.632)	Data Time 0.001 (0.021)	Loss 2.4504 (2.4790)	Entropy 1.02827 (1.03074)	Top-1 acc 66.797 (64.393)	Top-5 acc 83.203 (84.075)	lr 0.01002
Train [68][2060/3239]	Time 0.228 (0.631)	Data Time 0.001 (0.021)	Loss 2.3986 (2.4792)	Entropy 1.02821 (1.03073)	Top-1 acc 64.844 (64.390)	Top-5 acc 85.938 (84.070)	lr 0.01002
Train [68][2070/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.021)	Loss 2.3802 (2.4792)	Entropy 1.02820 (1.03071)	Top-1 acc 67.969 (64.387)	Top-5 acc 86.719 (84.072)	lr 0.01002
Train [68][2080/3239]	Time 0.220 (0.630)	Data Time 0.001 (0.021)	Loss 2.4292 (2.4796)	Entropy 1.02815 (1.03070)	Top-1 acc 63.672 (64.377)	Top-5 acc 86.719 (84.065)	lr 0.01002
Train [68][2090/3239]	Time 0.245 (0.629)	Data Time 0.001 (0.021)	Loss 2.4878 (2.4795)	Entropy 1.02805 (1.03069)	Top-1 acc 65.234 (64.382)	Top-5 acc 84.375 (84.067)	lr 0.01001
Train [68][2100/3239]	Time 2.518 (0.628)	Data Time 0.001 (0.020)	Loss 2.5961 (2.4797)	Entropy 1.02805 (1.03068)	Top-1 acc 63.281 (64.380)	Top-5 acc 80.469 (84.063)	lr 0.01001
Train [68][2110/3239]	Time 0.235 (0.627)	Data Time 0.001 (0.020)	Loss 2.5336 (2.4797)	Entropy 1.02800 (1.03066)	Top-1 acc 63.281 (64.384)	Top-5 acc 83.984 (84.063)	lr 0.01001
Train [68][2120/3239]	Time 0.244 (0.626)	Data Time 0.002 (0.020)	Loss 2.4895 (2.4798)	Entropy 1.02799 (1.03065)	Top-1 acc 64.453 (64.380)	Top-5 acc 83.594 (84.057)	lr 0.01001
Train [68][2130/3239]	Time 0.219 (0.625)	Data Time 0.001 (0.020)	Loss 2.5524 (2.4800)	Entropy 1.02798 (1.03064)	Top-1 acc 61.328 (64.370)	Top-5 acc 82.422 (84.054)	lr 0.01001
Train [68][2140/3239]	Time 0.373 (0.624)	Data Time 0.001 (0.020)	Loss 2.6312 (2.4801)	Entropy 1.02793 (1.03063)	Top-1 acc 58.984 (64.363)	Top-5 acc 81.641 (84.056)	lr 0.01001
Train [68][2150/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.020)	Loss 2.4582 (2.4800)	Entropy 1.02790 (1.03061)	Top-1 acc 63.672 (64.363)	Top-5 acc 86.719 (84.058)	lr 0.01001
Train [68][2160/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.020)	Loss 2.5007 (2.4799)	Entropy 1.02789 (1.03060)	Top-1 acc 62.891 (64.365)	Top-5 acc 84.375 (84.059)	lr 0.01001
Train [68][2170/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.020)	Loss 2.8262 (2.4802)	Entropy 1.02777 (1.03059)	Top-1 acc 55.469 (64.356)	Top-5 acc 77.344 (84.055)	lr 0.01001
Train [68][2180/3239]	Time 0.217 (0.622)	Data Time 0.001 (0.020)	Loss 2.6021 (2.4804)	Entropy 1.02773 (1.03058)	Top-1 acc 60.156 (64.354)	Top-5 acc 80.469 (84.052)	lr 0.01001
Train [68][2190/3239]	Time 0.259 (0.646)	Data Time 0.002 (0.020)	Loss 2.5054 (2.4806)	Entropy 1.02769 (1.03056)	Top-1 acc 62.109 (64.348)	Top-5 acc 82.031 (84.047)	lr 0.01000
Train [68][2200/3239]	Time 0.265 (0.645)	Data Time 0.002 (0.020)	Loss 2.5283 (2.4806)	Entropy 1.02773 (1.03055)	Top-1 acc 66.406 (64.351)	Top-5 acc 82.812 (84.050)	lr 0.01000
Train [68][2210/3239]	Time 2.508 (0.644)	Data Time 0.002 (0.020)	Loss 2.6283 (2.4805)	Entropy 1.02773 (1.03054)	Top-1 acc 61.328 (64.352)	Top-5 acc 80.859 (84.051)	lr 0.01000
Train [68][2220/3239]	Time 0.238 (0.643)	Data Time 0.001 (0.019)	Loss 2.5264 (2.4806)	Entropy 1.02769 (1.03052)	Top-1 acc 64.844 (64.353)	Top-5 acc 85.547 (84.050)	lr 0.01000
Train [68][2230/3239]	Time 0.228 (0.642)	Data Time 0.001 (0.019)	Loss 2.4875 (2.4804)	Entropy 1.02761 (1.03051)	Top-1 acc 65.234 (64.358)	Top-5 acc 84.766 (84.055)	lr 0.01000
Train [68][2240/3239]	Time 0.251 (0.641)	Data Time 0.001 (0.019)	Loss 2.5953 (2.4801)	Entropy 1.02758 (1.03050)	Top-1 acc 63.672 (64.365)	Top-5 acc 80.078 (84.061)	lr 0.01000
Train [68][2250/3239]	Time 0.236 (0.641)	Data Time 0.001 (0.019)	Loss 2.3831 (2.4801)	Entropy 1.02764 (1.03049)	Top-1 acc 67.969 (64.371)	Top-5 acc 85.938 (84.061)	lr 0.01000
Train [68][2260/3239]	Time 0.254 (0.640)	Data Time 0.001 (0.019)	Loss 2.4621 (2.4801)	Entropy 1.02754 (1.03047)	Top-1 acc 66.406 (64.370)	Top-5 acc 82.812 (84.061)	lr 0.01000
Train [68][2270/3239]	Time 0.222 (0.639)	Data Time 0.001 (0.019)	Loss 2.5226 (2.4802)	Entropy 1.02752 (1.03046)	Top-1 acc 63.281 (64.367)	Top-5 acc 83.594 (84.062)	lr 0.01000
Train [68][2280/3239]	Time 0.238 (0.638)	Data Time 0.001 (0.019)	Loss 2.3500 (2.4803)	Entropy 1.02769 (1.03045)	Top-1 acc 69.922 (64.366)	Top-5 acc 87.500 (84.059)	lr 0.01000
Train [68][2290/3239]	Time 0.240 (0.638)	Data Time 0.001 (0.019)	Loss 2.6047 (2.4806)	Entropy 1.02763 (1.03044)	Top-1 acc 64.062 (64.359)	Top-5 acc 80.078 (84.051)	lr 0.01000
Train [68][2300/3239]	Time 0.208 (0.637)	Data Time 0.001 (0.019)	Loss 2.5497 (2.4805)	Entropy 1.02757 (1.03042)	Top-1 acc 64.844 (64.363)	Top-5 acc 83.984 (84.052)	lr 0.00999
Train [68][2310/3239]	Time 0.214 (0.636)	Data Time 0.001 (0.019)	Loss 2.3352 (2.4805)	Entropy 1.02752 (1.03041)	Top-1 acc 66.016 (64.366)	Top-5 acc 87.109 (84.051)	lr 0.00999
Train [68][2320/3239]	Time 2.603 (0.636)	Data Time 0.001 (0.019)	Loss 2.3877 (2.4804)	Entropy 1.02752 (1.03040)	Top-1 acc 64.062 (64.364)	Top-5 acc 85.547 (84.053)	lr 0.00999
Train [68][2330/3239]	Time 0.251 (0.634)	Data Time 0.001 (0.019)	Loss 2.4989 (2.4806)	Entropy 1.02753 (1.03039)	Top-1 acc 63.672 (64.360)	Top-5 acc 84.766 (84.053)	lr 0.00999
Train [68][2340/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.019)	Loss 2.5721 (2.4806)	Entropy 1.02750 (1.03037)	Top-1 acc 58.984 (64.358)	Top-5 acc 83.594 (84.056)	lr 0.00999
Train [68][2350/3239]	Time 0.211 (0.633)	Data Time 0.001 (0.018)	Loss 2.4076 (2.4806)	Entropy 1.02749 (1.03036)	Top-1 acc 62.891 (64.355)	Top-5 acc 85.156 (84.059)	lr 0.00999
Train [68][2360/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.018)	Loss 2.4447 (2.4805)	Entropy 1.02742 (1.03035)	Top-1 acc 64.844 (64.357)	Top-5 acc 83.594 (84.061)	lr 0.00999
Train [68][2370/3239]	Time 0.224 (0.631)	Data Time 0.001 (0.018)	Loss 2.4057 (2.4807)	Entropy 1.02740 (1.03034)	Top-1 acc 64.844 (64.348)	Top-5 acc 83.984 (84.058)	lr 0.00999
Train [68][2380/3239]	Time 0.240 (0.631)	Data Time 0.001 (0.018)	Loss 2.3896 (2.4807)	Entropy 1.02738 (1.03032)	Top-1 acc 63.672 (64.346)	Top-5 acc 84.766 (84.058)	lr 0.00999
Train [68][2390/3239]	Time 0.213 (0.630)	Data Time 0.001 (0.018)	Loss 2.5843 (2.4808)	Entropy 1.02739 (1.03031)	Top-1 acc 65.234 (64.344)	Top-5 acc 81.641 (84.055)	lr 0.00999
Train [68][2400/3239]	Time 0.212 (0.629)	Data Time 0.001 (0.018)	Loss 2.5688 (2.4809)	Entropy 1.02734 (1.03030)	Top-1 acc 61.719 (64.344)	Top-5 acc 83.203 (84.053)	lr 0.00998
Train [68][2410/3239]	Time 0.367 (0.629)	Data Time 0.001 (0.018)	Loss 2.5099 (2.4809)	Entropy 1.02735 (1.03029)	Top-1 acc 62.500 (64.340)	Top-5 acc 81.250 (84.053)	lr 0.00998
Train [68][2420/3239]	Time 0.247 (0.628)	Data Time 0.002 (0.018)	Loss 2.4376 (2.4809)	Entropy 1.02733 (1.03028)	Top-1 acc 62.109 (64.338)	Top-5 acc 85.938 (84.056)	lr 0.00998
Train [68][2430/3239]	Time 2.506 (0.627)	Data Time 0.001 (0.018)	Loss 2.4919 (2.4813)	Entropy 1.02733 (1.03026)	Top-1 acc 64.453 (64.329)	Top-5 acc 83.594 (84.047)	lr 0.00998
Train [68][2440/3239]	Time 0.223 (0.626)	Data Time 0.001 (0.018)	Loss 2.5862 (2.4814)	Entropy 1.02713 (1.03025)	Top-1 acc 62.109 (64.329)	Top-5 acc 82.031 (84.046)	lr 0.00998
Train [68][2450/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.018)	Loss 2.2591 (2.4815)	Entropy 1.02712 (1.03024)	Top-1 acc 69.922 (64.328)	Top-5 acc 89.453 (84.044)	lr 0.00998
Train [68][2460/3239]	Time 0.354 (0.625)	Data Time 0.001 (0.018)	Loss 2.4737 (2.4815)	Entropy 1.02704 (1.03022)	Top-1 acc 64.844 (64.329)	Top-5 acc 85.547 (84.045)	lr 0.00998
Train [68][2470/3239]	Time 0.240 (0.624)	Data Time 0.001 (0.018)	Loss 2.4095 (2.4815)	Entropy 1.02699 (1.03021)	Top-1 acc 68.359 (64.330)	Top-5 acc 83.984 (84.047)	lr 0.00998
Train [68][2480/3239]	Time 0.243 (0.624)	Data Time 0.001 (0.018)	Loss 2.6281 (2.4819)	Entropy 1.02697 (1.03020)	Top-1 acc 60.938 (64.321)	Top-5 acc 83.203 (84.044)	lr 0.00998
Train [68][2490/3239]	Time 0.215 (0.623)	Data Time 0.001 (0.018)	Loss 2.3808 (2.4819)	Entropy 1.02690 (1.03019)	Top-1 acc 68.359 (64.319)	Top-5 acc 87.109 (84.042)	lr 0.00998
Train [68][2500/3239]	Time 0.376 (0.622)	Data Time 0.001 (0.017)	Loss 2.4899 (2.4820)	Entropy 1.02687 (1.03017)	Top-1 acc 63.281 (64.318)	Top-5 acc 82.812 (84.040)	lr 0.00997
Train [68][2510/3239]	Time 0.242 (0.622)	Data Time 0.001 (0.017)	Loss 2.3262 (2.4819)	Entropy 1.02678 (1.03016)	Top-1 acc 64.844 (64.322)	Top-5 acc 88.281 (84.045)	lr 0.00997
Train [68][2520/3239]	Time 0.224 (0.621)	Data Time 0.001 (0.017)	Loss 2.5165 (2.4819)	Entropy 1.02673 (1.03015)	Top-1 acc 63.672 (64.323)	Top-5 acc 80.078 (84.040)	lr 0.00997
Train [68][2530/3239]	Time 0.220 (0.621)	Data Time 0.001 (0.017)	Loss 2.6643 (2.4819)	Entropy 1.02667 (1.03013)	Top-1 acc 60.938 (64.323)	Top-5 acc 80.078 (84.042)	lr 0.00997
Train [68][2540/3239]	Time 2.540 (0.620)	Data Time 0.001 (0.017)	Loss 2.3905 (2.4820)	Entropy 1.02667 (1.03012)	Top-1 acc 63.281 (64.320)	Top-5 acc 87.500 (84.040)	lr 0.00997
Train [68][2550/3239]	Time 0.347 (0.619)	Data Time 0.001 (0.017)	Loss 2.4142 (2.4820)	Entropy 1.02670 (1.03010)	Top-1 acc 64.844 (64.317)	Top-5 acc 83.203 (84.037)	lr 0.00997
Train [68][2560/3239]	Time 0.351 (0.640)	Data Time 0.003 (0.017)	Loss 2.4900 (2.4818)	Entropy 1.02670 (1.03009)	Top-1 acc 64.844 (64.321)	Top-5 acc 84.375 (84.041)	lr 0.00997
Train [68][2570/3239]	Time 0.226 (0.639)	Data Time 0.002 (0.017)	Loss 2.5732 (2.4819)	Entropy 1.02661 (1.03008)	Top-1 acc 65.625 (64.322)	Top-5 acc 80.469 (84.036)	lr 0.00997
Train [68][2580/3239]	Time 0.250 (0.638)	Data Time 0.002 (0.017)	Loss 2.4360 (2.4819)	Entropy 1.02663 (1.03006)	Top-1 acc 66.406 (64.325)	Top-5 acc 84.766 (84.036)	lr 0.00997
Train [68][2590/3239]	Time 0.332 (0.638)	Data Time 0.001 (0.017)	Loss 2.3631 (2.4820)	Entropy 1.02660 (1.03005)	Top-1 acc 68.359 (64.322)	Top-5 acc 85.547 (84.033)	lr 0.00997
Train [68][2600/3239]	Time 0.218 (0.637)	Data Time 0.001 (0.017)	Loss 2.4257 (2.4819)	Entropy 1.02656 (1.03004)	Top-1 acc 63.672 (64.321)	Top-5 acc 85.156 (84.035)	lr 0.00996
Train [68][2610/3239]	Time 0.239 (0.637)	Data Time 0.001 (0.017)	Loss 2.7040 (2.4821)	Entropy 1.02652 (1.03002)	Top-1 acc 58.203 (64.313)	Top-5 acc 79.688 (84.031)	lr 0.00996
Train [68][2620/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.017)	Loss 2.5084 (2.4820)	Entropy 1.02642 (1.03001)	Top-1 acc 62.109 (64.310)	Top-5 acc 83.594 (84.034)	lr 0.00996
Train [68][2630/3239]	Time 0.250 (0.635)	Data Time 0.001 (0.017)	Loss 2.7187 (2.4822)	Entropy 1.02641 (1.03000)	Top-1 acc 60.938 (64.311)	Top-5 acc 81.250 (84.034)	lr 0.00996
Train [68][2640/3239]	Time 0.283 (0.635)	Data Time 0.001 (0.017)	Loss 2.2801 (2.4827)	Entropy 1.02637 (1.02998)	Top-1 acc 69.922 (64.299)	Top-5 acc 85.938 (84.026)	lr 0.00996
Train [68][2650/3239]	Time 0.236 (0.634)	Data Time 0.001 (0.017)	Loss 2.4056 (2.4829)	Entropy 1.02635 (1.02997)	Top-1 acc 68.359 (64.297)	Top-5 acc 84.375 (84.022)	lr 0.00996
Train [68][2660/3239]	Time 0.280 (0.634)	Data Time 0.001 (0.017)	Loss 2.3738 (2.4828)	Entropy 1.02625 (1.02996)	Top-1 acc 67.188 (64.296)	Top-5 acc 85.547 (84.027)	lr 0.00996
Train [68][2670/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.016)	Loss 2.3947 (2.4830)	Entropy 1.02626 (1.02994)	Top-1 acc 67.188 (64.294)	Top-5 acc 83.984 (84.024)	lr 0.00996
Train [68][2680/3239]	Time 0.280 (0.632)	Data Time 0.001 (0.016)	Loss 2.4130 (2.4831)	Entropy 1.02628 (1.02993)	Top-1 acc 62.500 (64.290)	Top-5 acc 84.766 (84.022)	lr 0.00996
Train [68][2690/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.016)	Loss 2.3828 (2.4831)	Entropy 1.02622 (1.02992)	Top-1 acc 67.188 (64.288)	Top-5 acc 86.719 (84.023)	lr 0.00996
Train [68][2700/3239]	Time 0.247 (0.631)	Data Time 0.001 (0.016)	Loss 2.5998 (2.4832)	Entropy 1.02615 (1.02990)	Top-1 acc 65.625 (64.291)	Top-5 acc 82.422 (84.022)	lr 0.00995
Train [68][2710/3239]	Time 0.264 (0.631)	Data Time 0.001 (0.016)	Loss 2.5477 (2.4831)	Entropy 1.02616 (1.02989)	Top-1 acc 62.891 (64.292)	Top-5 acc 82.812 (84.023)	lr 0.00995
Train [68][2720/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.016)	Loss 2.3809 (2.4831)	Entropy 1.02612 (1.02987)	Top-1 acc 66.016 (64.293)	Top-5 acc 86.719 (84.025)	lr 0.00995
Train [68][2730/3239]	Time 0.308 (0.630)	Data Time 0.001 (0.016)	Loss 2.3933 (2.4833)	Entropy 1.02608 (1.02986)	Top-1 acc 65.234 (64.294)	Top-5 acc 84.766 (84.021)	lr 0.00995
Train [68][2740/3239]	Time 0.200 (0.629)	Data Time 0.001 (0.016)	Loss 2.4561 (2.4835)	Entropy 1.02598 (1.02985)	Top-1 acc 65.234 (64.286)	Top-5 acc 83.984 (84.015)	lr 0.00995
Train [68][2750/3239]	Time 0.254 (0.628)	Data Time 0.001 (0.016)	Loss 2.6422 (2.4836)	Entropy 1.02608 (1.02983)	Top-1 acc 62.109 (64.285)	Top-5 acc 82.422 (84.014)	lr 0.00995
Train [68][2760/3239]	Time 0.249 (0.628)	Data Time 0.001 (0.016)	Loss 2.4407 (2.4836)	Entropy 1.02600 (1.02982)	Top-1 acc 67.578 (64.282)	Top-5 acc 85.156 (84.013)	lr 0.00995
Train [68][2770/3239]	Time 0.235 (0.627)	Data Time 0.001 (0.016)	Loss 2.5544 (2.4837)	Entropy 1.02595 (1.02981)	Top-1 acc 61.719 (64.276)	Top-5 acc 81.641 (84.012)	lr 0.00995
Train [68][2780/3239]	Time 0.214 (0.627)	Data Time 0.001 (0.016)	Loss 2.4180 (2.4838)	Entropy 1.02597 (1.02979)	Top-1 acc 67.578 (64.274)	Top-5 acc 83.984 (84.011)	lr 0.00995
Train [68][2790/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.016)	Loss 2.5786 (2.4837)	Entropy 1.02601 (1.02978)	Top-1 acc 61.328 (64.275)	Top-5 acc 80.469 (84.014)	lr 0.00995
Train [68][2800/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.016)	Loss 2.4114 (2.4837)	Entropy 1.02600 (1.02976)	Top-1 acc 66.406 (64.274)	Top-5 acc 83.203 (84.013)	lr 0.00994
Train [68][2810/3239]	Time 0.224 (0.625)	Data Time 0.001 (0.016)	Loss 2.6755 (2.4838)	Entropy 1.02597 (1.02975)	Top-1 acc 60.156 (64.276)	Top-5 acc 82.031 (84.014)	lr 0.00994
Train [68][2820/3239]	Time 0.346 (0.624)	Data Time 0.001 (0.016)	Loss 2.4199 (2.4836)	Entropy 1.02597 (1.02974)	Top-1 acc 64.844 (64.282)	Top-5 acc 85.547 (84.016)	lr 0.00994
Train [68][2830/3239]	Time 0.253 (0.624)	Data Time 0.001 (0.016)	Loss 2.6121 (2.4835)	Entropy 1.02598 (1.02972)	Top-1 acc 62.891 (64.283)	Top-5 acc 83.203 (84.017)	lr 0.00994
Train [68][2840/3239]	Time 0.168 (0.623)	Data Time 0.001 (0.016)	Loss 2.5400 (2.4840)	Entropy 1.02599 (1.02971)	Top-1 acc 60.938 (64.270)	Top-5 acc 83.984 (84.010)	lr 0.00994
Train [68][2850/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.016)	Loss 2.4673 (2.4842)	Entropy 1.02593 (1.02970)	Top-1 acc 62.500 (64.265)	Top-5 acc 85.547 (84.009)	lr 0.00994
Train [68][2860/3239]	Time 0.218 (0.622)	Data Time 0.001 (0.015)	Loss 2.4585 (2.4843)	Entropy 1.02591 (1.02968)	Top-1 acc 64.062 (64.262)	Top-5 acc 86.719 (84.009)	lr 0.00994
Train [68][2870/3239]	Time 0.217 (0.622)	Data Time 0.001 (0.015)	Loss 2.5355 (2.4844)	Entropy 1.02589 (1.02967)	Top-1 acc 64.453 (64.257)	Top-5 acc 83.594 (84.006)	lr 0.00994
Train [68][2880/3239]	Time 0.248 (0.621)	Data Time 0.001 (0.015)	Loss 2.5242 (2.4844)	Entropy 1.02585 (1.02966)	Top-1 acc 61.328 (64.255)	Top-5 acc 81.641 (84.007)	lr 0.00994
Train [68][2890/3239]	Time 0.245 (0.640)	Data Time 0.004 (0.015)	Loss 2.4742 (2.4842)	Entropy 1.02581 (1.02965)	Top-1 acc 67.188 (64.260)	Top-5 acc 84.375 (84.009)	lr 0.00994
Train [68][2900/3239]	Time 0.267 (0.639)	Data Time 0.002 (0.015)	Loss 2.5148 (2.4842)	Entropy 1.02576 (1.02963)	Top-1 acc 61.328 (64.259)	Top-5 acc 83.203 (84.009)	lr 0.00993
Train [68][2910/3239]	Time 0.387 (0.639)	Data Time 0.002 (0.015)	Loss 2.5173 (2.4841)	Entropy 1.02575 (1.02962)	Top-1 acc 62.109 (64.260)	Top-5 acc 83.984 (84.010)	lr 0.00993
Train [68][2920/3239]	Time 0.241 (0.638)	Data Time 0.001 (0.015)	Loss 2.3442 (2.4840)	Entropy 1.02568 (1.02961)	Top-1 acc 69.922 (64.265)	Top-5 acc 84.375 (84.013)	lr 0.00993
Train [68][2930/3239]	Time 0.234 (0.638)	Data Time 0.001 (0.015)	Loss 2.4628 (2.4840)	Entropy 1.02568 (1.02959)	Top-1 acc 65.234 (64.264)	Top-5 acc 88.281 (84.017)	lr 0.00993
Train [68][2940/3239]	Time 0.264 (0.637)	Data Time 0.001 (0.015)	Loss 2.4505 (2.4842)	Entropy 1.02571 (1.02958)	Top-1 acc 63.672 (64.258)	Top-5 acc 84.375 (84.013)	lr 0.00993
Train [68][2950/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.015)	Loss 2.3360 (2.4840)	Entropy 1.02576 (1.02957)	Top-1 acc 68.750 (64.260)	Top-5 acc 86.328 (84.015)	lr 0.00993
Train [68][2960/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.015)	Loss 2.5801 (2.4840)	Entropy 1.02576 (1.02955)	Top-1 acc 61.328 (64.264)	Top-5 acc 84.766 (84.016)	lr 0.00993
Train [68][2970/3239]	Time 0.292 (0.636)	Data Time 0.001 (0.015)	Loss 2.3402 (2.4838)	Entropy 1.02575 (1.02954)	Top-1 acc 68.750 (64.270)	Top-5 acc 88.672 (84.020)	lr 0.00993
Train [68][2980/3239]	Time 0.228 (0.635)	Data Time 0.001 (0.015)	Loss 2.3891 (2.4838)	Entropy 1.02575 (1.02953)	Top-1 acc 64.844 (64.272)	Top-5 acc 84.766 (84.019)	lr 0.00993
Train [68][2990/3239]	Time 0.245 (0.635)	Data Time 0.001 (0.015)	Loss 2.3938 (2.4839)	Entropy 1.02575 (1.02951)	Top-1 acc 69.141 (64.271)	Top-5 acc 86.328 (84.018)	lr 0.00993
Train [68][3000/3239]	Time 0.217 (0.634)	Data Time 0.001 (0.015)	Loss 2.5168 (2.4839)	Entropy 1.02577 (1.02950)	Top-1 acc 62.891 (64.271)	Top-5 acc 81.250 (84.018)	lr 0.00992
Train [68][3010/3239]	Time 0.252 (0.633)	Data Time 0.001 (0.015)	Loss 2.6327 (2.4840)	Entropy 1.02580 (1.02949)	Top-1 acc 59.766 (64.268)	Top-5 acc 81.250 (84.017)	lr 0.00992
Train [68][3020/3239]	Time 0.301 (0.633)	Data Time 0.001 (0.015)	Loss 2.5552 (2.4841)	Entropy 1.02572 (1.02948)	Top-1 acc 62.109 (64.264)	Top-5 acc 83.594 (84.013)	lr 0.00992
Train [68][3030/3239]	Time 0.257 (0.632)	Data Time 0.001 (0.015)	Loss 2.7131 (2.4843)	Entropy 1.02572 (1.02946)	Top-1 acc 60.156 (64.260)	Top-5 acc 76.953 (84.010)	lr 0.00992
Train [68][3040/3239]	Time 0.337 (0.632)	Data Time 0.001 (0.015)	Loss 2.5575 (2.4845)	Entropy 1.02576 (1.02945)	Top-1 acc 63.281 (64.251)	Top-5 acc 82.812 (84.007)	lr 0.00992
Train [68][3050/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.015)	Loss 2.3065 (2.4848)	Entropy 1.02574 (1.02944)	Top-1 acc 67.578 (64.241)	Top-5 acc 86.328 (83.998)	lr 0.00992
Train [68][3060/3239]	Time 0.224 (0.631)	Data Time 0.001 (0.015)	Loss 2.5205 (2.4848)	Entropy 1.02577 (1.02943)	Top-1 acc 66.016 (64.244)	Top-5 acc 82.812 (84.001)	lr 0.00992
Train [68][3070/3239]	Time 0.232 (0.630)	Data Time 0.001 (0.015)	Loss 2.4388 (2.4848)	Entropy 1.02579 (1.02942)	Top-1 acc 65.234 (64.243)	Top-5 acc 84.766 (84.001)	lr 0.00992
Train [68][3080/3239]	Time 0.269 (0.630)	Data Time 0.001 (0.014)	Loss 2.4362 (2.4849)	Entropy 1.02575 (1.02940)	Top-1 acc 64.453 (64.240)	Top-5 acc 85.938 (83.998)	lr 0.00992
Train [68][3090/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.014)	Loss 2.3435 (2.4848)	Entropy 1.02579 (1.02939)	Top-1 acc 67.969 (64.243)	Top-5 acc 86.719 (84.001)	lr 0.00992
Train [68][3100/3239]	Time 0.223 (0.629)	Data Time 0.001 (0.014)	Loss 2.5878 (2.4851)	Entropy 1.02577 (1.02938)	Top-1 acc 62.109 (64.234)	Top-5 acc 83.203 (83.995)	lr 0.00991
Train [68][3110/3239]	Time 0.210 (0.628)	Data Time 0.001 (0.014)	Loss 2.3076 (2.4849)	Entropy 1.02574 (1.02937)	Top-1 acc 68.750 (64.236)	Top-5 acc 88.281 (83.999)	lr 0.00991
Train [68][3120/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.014)	Loss 2.5697 (2.4850)	Entropy 1.02568 (1.02936)	Top-1 acc 60.156 (64.232)	Top-5 acc 83.594 (83.999)	lr 0.00991
Train [68][3130/3239]	Time 0.366 (0.627)	Data Time 0.001 (0.014)	Loss 2.3558 (2.4851)	Entropy 1.02566 (1.02935)	Top-1 acc 66.406 (64.228)	Top-5 acc 85.156 (83.994)	lr 0.00991
Train [68][3140/3239]	Time 0.267 (0.627)	Data Time 0.001 (0.014)	Loss 2.5868 (2.4852)	Entropy 1.02560 (1.02933)	Top-1 acc 62.891 (64.225)	Top-5 acc 82.031 (83.989)	lr 0.00991
Train [68][3150/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.014)	Loss 2.3192 (2.4851)	Entropy 1.02562 (1.02932)	Top-1 acc 66.016 (64.225)	Top-5 acc 88.281 (83.993)	lr 0.00991
Train [68][3160/3239]	Time 0.253 (0.626)	Data Time 0.001 (0.014)	Loss 2.4278 (2.4850)	Entropy 1.02567 (1.02931)	Top-1 acc 65.625 (64.228)	Top-5 acc 83.203 (83.995)	lr 0.00991
Train [68][3170/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.014)	Loss 2.4718 (2.4849)	Entropy 1.02570 (1.02930)	Top-1 acc 64.844 (64.227)	Top-5 acc 82.812 (83.998)	lr 0.00991
Train [68][3180/3239]	Time 0.226 (0.625)	Data Time 0.000 (0.014)	Loss 2.4741 (2.4849)	Entropy 1.02520 (1.02929)	Top-1 acc 66.797 (64.228)	Top-5 acc 86.328 (84.001)	lr 0.00991
Train [68][3190/3239]	Time 0.202 (0.624)	Data Time 0.000 (0.014)	Loss 2.7229 (2.4848)	Entropy 1.02512 (1.02928)	Top-1 acc 57.031 (64.226)	Top-5 acc 79.688 (84.002)	lr 0.00991
Train [68][3200/3239]	Time 0.230 (0.624)	Data Time 0.000 (0.014)	Loss 2.4872 (2.4849)	Entropy 1.02501 (1.02926)	Top-1 acc 62.891 (64.226)	Top-5 acc 84.375 (84.002)	lr 0.00991
Train [68][3210/3239]	Time 0.200 (0.623)	Data Time 0.000 (0.014)	Loss 2.5259 (2.4850)	Entropy 1.02495 (1.02925)	Top-1 acc 61.328 (64.217)	Top-5 acc 82.812 (83.998)	lr 0.00990
Train [68][3220/3239]	Time 0.551 (0.639)	Data Time 0.000 (0.014)	Loss 2.4678 (2.4850)	Entropy 1.02497 (1.02924)	Top-1 acc 63.281 (64.221)	Top-5 acc 85.547 (83.999)	lr 0.00990
Train [68][3230/3239]	Time 0.232 (0.639)	Data Time 0.000 (0.014)	Loss 2.5572 (2.4851)	Entropy 1.02494 (1.02922)	Top-1 acc 61.719 (64.218)	Top-5 acc 82.812 (83.996)	lr 0.00990
Train [68][3239/3239]	Time 2.279 (0.638)	Data Time 0.000 (0.014)	Loss 2.7848 (2.4852)	Entropy 1.02494 (1.02921)	Top-1 acc 56.790 (64.214)	Top-5 acc 76.543 (83.995)	lr 0.00990
==========Valid [68/120]	loss 1.391	top-1 acc 68.199 (68.297)	top-5 acc 87.490	Train top-1 64.214	top-5 83.995	Entropy 1.02494	Latency-None: 0.000ms	Flops: 546.53M
Train [69][0/3239]	Time 43.760 (43.760)	Data Time 40.082 (40.082)	Loss 2.5409 (2.5409)	Entropy 1.02494 (1.02494)	Top-1 acc 61.719 (61.719)	Top-5 acc 84.766 (84.766)	lr 0.00990
Train [69][10/3239]	Time 2.576 (4.585)	Data Time 0.002 (3.679)	Loss 2.4274 (2.4533)	Entropy 1.02494 (1.02494)	Top-1 acc 66.797 (64.808)	Top-5 acc 86.719 (84.730)	lr 0.00990
Train [69][20/3239]	Time 0.242 (2.518)	Data Time 0.002 (1.928)	Loss 2.5744 (2.4501)	Entropy 1.02492 (1.02493)	Top-1 acc 63.281 (64.509)	Top-5 acc 82.812 (84.952)	lr 0.00990
Train [69][30/3239]	Time 0.344 (1.862)	Data Time 0.001 (1.306)	Loss 2.3502 (2.4437)	Entropy 1.02486 (1.02491)	Top-1 acc 66.797 (65.020)	Top-5 acc 87.891 (85.370)	lr 0.00990
Train [69][40/3239]	Time 0.239 (1.525)	Data Time 0.002 (0.988)	Loss 2.3060 (2.4539)	Entropy 1.02481 (1.02489)	Top-1 acc 73.047 (64.796)	Top-5 acc 87.109 (85.042)	lr 0.00990
Train [69][50/3239]	Time 0.215 (1.318)	Data Time 0.001 (0.795)	Loss 2.3819 (2.4529)	Entropy 1.02484 (1.02488)	Top-1 acc 61.719 (64.844)	Top-5 acc 85.938 (84.957)	lr 0.00990
Train [69][60/3239]	Time 0.238 (1.180)	Data Time 0.001 (0.665)	Loss 2.5339 (2.4512)	Entropy 1.02485 (1.02487)	Top-1 acc 57.812 (64.889)	Top-5 acc 83.594 (84.932)	lr 0.00990
Train [69][70/3239]	Time 0.217 (1.076)	Data Time 0.001 (0.571)	Loss 2.6154 (2.4479)	Entropy 1.02488 (1.02487)	Top-1 acc 58.203 (64.855)	Top-5 acc 82.422 (85.008)	lr 0.00989
Train [69][80/3239]	Time 0.225 (1.004)	Data Time 0.001 (0.501)	Loss 2.6159 (2.4502)	Entropy 1.02487 (1.02487)	Top-1 acc 62.109 (64.892)	Top-5 acc 82.422 (84.910)	lr 0.00989
Train [69][90/3239]	Time 0.229 (0.945)	Data Time 0.001 (0.446)	Loss 2.4862 (2.4549)	Entropy 1.02467 (1.02487)	Top-1 acc 61.719 (64.852)	Top-5 acc 84.375 (84.766)	lr 0.00989
Train [69][100/3239]	Time 0.219 (0.897)	Data Time 0.001 (0.402)	Loss 2.5768 (2.4579)	Entropy 1.02469 (1.02485)	Top-1 acc 64.453 (64.910)	Top-5 acc 80.859 (84.692)	lr 0.00989
Train [69][110/3239]	Time 0.223 (0.859)	Data Time 0.001 (0.366)	Loss 2.4961 (2.4593)	Entropy 1.02465 (1.02483)	Top-1 acc 62.891 (64.865)	Top-5 acc 82.812 (84.632)	lr 0.00989
Train [69][120/3239]	Time 2.615 (0.827)	Data Time 0.001 (0.336)	Loss 2.6155 (2.4617)	Entropy 1.02465 (1.02482)	Top-1 acc 60.938 (64.853)	Top-5 acc 81.250 (84.562)	lr 0.00989
Train [69][130/3239]	Time 0.236 (0.782)	Data Time 0.001 (0.310)	Loss 2.3733 (2.4602)	Entropy 1.02462 (1.02480)	Top-1 acc 64.453 (64.918)	Top-5 acc 87.109 (84.584)	lr 0.00989
Train [69][140/3239]	Time 0.218 (0.760)	Data Time 0.001 (0.288)	Loss 2.5309 (2.4561)	Entropy 1.02463 (1.02479)	Top-1 acc 67.188 (65.007)	Top-5 acc 83.594 (84.680)	lr 0.00989
Train [69][150/3239]	Time 0.244 (0.741)	Data Time 0.001 (0.269)	Loss 2.4354 (2.4548)	Entropy 1.02465 (1.02478)	Top-1 acc 67.578 (65.048)	Top-5 acc 86.328 (84.706)	lr 0.00989
Train [69][160/3239]	Time 0.233 (0.725)	Data Time 0.001 (0.253)	Loss 2.4454 (2.4521)	Entropy 1.02469 (1.02477)	Top-1 acc 67.188 (65.077)	Top-5 acc 85.156 (84.746)	lr 0.00989
Train [69][170/3239]	Time 0.234 (0.711)	Data Time 0.002 (0.238)	Loss 2.3591 (2.4497)	Entropy 1.02453 (1.02476)	Top-1 acc 67.578 (65.164)	Top-5 acc 85.938 (84.745)	lr 0.00988
Train [69][180/3239]	Time 0.229 (0.699)	Data Time 0.001 (0.225)	Loss 2.4262 (2.4539)	Entropy 1.02442 (1.02475)	Top-1 acc 64.453 (65.064)	Top-5 acc 85.938 (84.673)	lr 0.00988
Train [69][190/3239]	Time 0.227 (0.686)	Data Time 0.001 (0.213)	Loss 2.4623 (2.4551)	Entropy 1.02440 (1.02473)	Top-1 acc 63.672 (65.036)	Top-5 acc 87.109 (84.637)	lr 0.00988
Train [69][200/3239]	Time 0.223 (0.675)	Data Time 0.001 (0.203)	Loss 2.4453 (2.4575)	Entropy 1.02434 (1.02471)	Top-1 acc 64.062 (64.927)	Top-5 acc 83.984 (84.579)	lr 0.00988
Train [69][210/3239]	Time 0.362 (0.667)	Data Time 0.001 (0.193)	Loss 2.5223 (2.4557)	Entropy 1.02425 (1.02469)	Top-1 acc 63.281 (64.979)	Top-5 acc 82.031 (84.592)	lr 0.00988
Train [69][220/3239]	Time 0.251 (0.658)	Data Time 0.001 (0.185)	Loss 2.4305 (2.4561)	Entropy 1.02417 (1.02467)	Top-1 acc 63.672 (64.994)	Top-5 acc 87.891 (84.585)	lr 0.00988
Train [69][230/3239]	Time 2.509 (0.650)	Data Time 0.002 (0.177)	Loss 2.4779 (2.4564)	Entropy 1.02417 (1.02465)	Top-1 acc 65.234 (65.025)	Top-5 acc 82.422 (84.580)	lr 0.00988
Train [69][240/3239]	Time 0.229 (0.633)	Data Time 0.001 (0.169)	Loss 2.4562 (2.4558)	Entropy 1.02413 (1.02463)	Top-1 acc 67.578 (65.059)	Top-5 acc 84.375 (84.591)	lr 0.00988
Train [69][250/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.163)	Loss 2.4830 (2.4567)	Entropy 1.02403 (1.02461)	Top-1 acc 64.844 (65.016)	Top-5 acc 84.375 (84.591)	lr 0.00988
Train [69][260/3239]	Time 0.228 (0.621)	Data Time 0.001 (0.157)	Loss 2.4559 (2.4572)	Entropy 1.02402 (1.02459)	Top-1 acc 66.406 (65.025)	Top-5 acc 85.547 (84.592)	lr 0.00988
Train [69][270/3239]	Time 0.243 (0.615)	Data Time 0.001 (0.151)	Loss 2.5961 (2.4577)	Entropy 1.02407 (1.02457)	Top-1 acc 62.109 (65.048)	Top-5 acc 82.812 (84.567)	lr 0.00987
Train [69][280/3239]	Time 0.229 (0.610)	Data Time 0.001 (0.145)	Loss 2.4323 (2.4601)	Entropy 1.02413 (1.02455)	Top-1 acc 65.234 (64.973)	Top-5 acc 84.766 (84.511)	lr 0.00987
Train [69][290/3239]	Time 0.241 (0.605)	Data Time 0.001 (0.141)	Loss 2.4146 (2.4615)	Entropy 1.02408 (1.02453)	Top-1 acc 63.672 (64.908)	Top-5 acc 85.156 (84.481)	lr 0.00987
Train [69][300/3239]	Time 0.226 (0.601)	Data Time 0.001 (0.136)	Loss 2.5061 (2.4629)	Entropy 1.02406 (1.02452)	Top-1 acc 67.578 (64.900)	Top-5 acc 82.812 (84.435)	lr 0.00987
Train [69][310/3239]	Time 0.221 (0.597)	Data Time 0.001 (0.132)	Loss 2.4932 (2.4647)	Entropy 1.02405 (1.02450)	Top-1 acc 64.453 (64.835)	Top-5 acc 84.766 (84.408)	lr 0.00987
Train [69][320/3239]	Time 0.237 (0.594)	Data Time 0.001 (0.128)	Loss 2.4106 (2.4643)	Entropy 1.02406 (1.02449)	Top-1 acc 67.969 (64.836)	Top-5 acc 84.766 (84.410)	lr 0.00987
Train [69][330/3239]	Time 0.217 (0.590)	Data Time 0.001 (0.124)	Loss 2.4027 (2.4638)	Entropy 1.02399 (1.02448)	Top-1 acc 69.531 (64.873)	Top-5 acc 83.594 (84.382)	lr 0.00987
Train [69][340/3239]	Time 57.419 (0.747)	Data Time 0.002 (0.120)	Loss 2.3092 (2.4629)	Entropy 1.02399 (1.02446)	Top-1 acc 68.359 (64.849)	Top-5 acc 87.109 (84.400)	lr 0.00987
Train [69][350/3239]	Time 0.277 (0.736)	Data Time 0.002 (0.117)	Loss 2.4980 (2.4636)	Entropy 1.02387 (1.02444)	Top-1 acc 58.203 (64.817)	Top-5 acc 85.547 (84.401)	lr 0.00987
Train [69][360/3239]	Time 0.222 (0.729)	Data Time 0.002 (0.114)	Loss 2.5386 (2.4635)	Entropy 1.02374 (1.02443)	Top-1 acc 58.594 (64.829)	Top-5 acc 84.375 (84.387)	lr 0.00987
Train [69][370/3239]	Time 0.246 (0.722)	Data Time 0.002 (0.111)	Loss 2.4340 (2.4631)	Entropy 1.02373 (1.02441)	Top-1 acc 63.281 (64.840)	Top-5 acc 87.500 (84.399)	lr 0.00986
Train [69][380/3239]	Time 0.244 (0.715)	Data Time 0.001 (0.108)	Loss 2.4571 (2.4637)	Entropy 1.02369 (1.02439)	Top-1 acc 67.188 (64.835)	Top-5 acc 85.938 (84.391)	lr 0.00986
Train [69][390/3239]	Time 0.347 (0.709)	Data Time 0.001 (0.105)	Loss 2.6166 (2.4634)	Entropy 1.02371 (1.02437)	Top-1 acc 62.500 (64.849)	Top-5 acc 80.078 (84.377)	lr 0.00986
Train [69][400/3239]	Time 0.217 (0.704)	Data Time 0.001 (0.103)	Loss 2.4131 (2.4622)	Entropy 1.02365 (1.02435)	Top-1 acc 66.406 (64.881)	Top-5 acc 87.500 (84.415)	lr 0.00986
Train [69][410/3239]	Time 0.240 (0.699)	Data Time 0.001 (0.100)	Loss 2.4385 (2.4620)	Entropy 1.02365 (1.02434)	Top-1 acc 66.406 (64.888)	Top-5 acc 83.984 (84.409)	lr 0.00986
Train [69][420/3239]	Time 0.216 (0.694)	Data Time 0.001 (0.098)	Loss 2.3042 (2.4616)	Entropy 1.02364 (1.02432)	Top-1 acc 71.094 (64.906)	Top-5 acc 86.719 (84.418)	lr 0.00986
Train [69][430/3239]	Time 0.310 (0.688)	Data Time 0.001 (0.095)	Loss 2.5401 (2.4616)	Entropy 1.02364 (1.02430)	Top-1 acc 61.719 (64.928)	Top-5 acc 81.641 (84.417)	lr 0.00986
Train [69][440/3239]	Time 0.221 (0.683)	Data Time 0.001 (0.093)	Loss 2.6251 (2.4625)	Entropy 1.02363 (1.02429)	Top-1 acc 60.547 (64.901)	Top-5 acc 81.250 (84.394)	lr 0.00986
Train [69][450/3239]	Time 2.530 (0.679)	Data Time 0.001 (0.091)	Loss 2.4454 (2.4631)	Entropy 1.02363 (1.02427)	Top-1 acc 66.406 (64.892)	Top-5 acc 83.203 (84.384)	lr 0.00986
Train [69][460/3239]	Time 0.241 (0.669)	Data Time 0.001 (0.089)	Loss 2.5181 (2.4634)	Entropy 1.02356 (1.02426)	Top-1 acc 64.453 (64.885)	Top-5 acc 83.984 (84.362)	lr 0.00986
Train [69][470/3239]	Time 0.237 (0.665)	Data Time 0.001 (0.087)	Loss 2.4981 (2.4632)	Entropy 1.02355 (1.02424)	Top-1 acc 65.625 (64.888)	Top-5 acc 84.766 (84.369)	lr 0.00985
Train [69][480/3239]	Time 0.334 (0.661)	Data Time 0.001 (0.086)	Loss 2.4457 (2.4623)	Entropy 1.02354 (1.02423)	Top-1 acc 64.062 (64.910)	Top-5 acc 82.812 (84.374)	lr 0.00985
Train [69][490/3239]	Time 0.240 (0.657)	Data Time 0.001 (0.084)	Loss 2.3546 (2.4618)	Entropy 1.02351 (1.02422)	Top-1 acc 66.406 (64.904)	Top-5 acc 85.547 (84.385)	lr 0.00985
Train [69][500/3239]	Time 0.223 (0.653)	Data Time 0.001 (0.082)	Loss 2.4152 (2.4622)	Entropy 1.02353 (1.02420)	Top-1 acc 66.797 (64.915)	Top-5 acc 83.984 (84.375)	lr 0.00985
Train [69][510/3239]	Time 0.228 (0.650)	Data Time 0.001 (0.081)	Loss 2.4366 (2.4611)	Entropy 1.02346 (1.02419)	Top-1 acc 66.797 (64.941)	Top-5 acc 83.984 (84.393)	lr 0.00985
Train [69][520/3239]	Time 0.316 (0.647)	Data Time 0.001 (0.079)	Loss 2.5066 (2.4612)	Entropy 1.02352 (1.02417)	Top-1 acc 66.406 (64.934)	Top-5 acc 83.203 (84.391)	lr 0.00985
Train [69][530/3239]	Time 0.234 (0.643)	Data Time 0.001 (0.078)	Loss 2.4725 (2.4614)	Entropy 1.02344 (1.02416)	Top-1 acc 66.016 (64.942)	Top-5 acc 82.422 (84.378)	lr 0.00985
Train [69][540/3239]	Time 0.232 (0.640)	Data Time 0.001 (0.076)	Loss 2.3997 (2.4609)	Entropy 1.02342 (1.02415)	Top-1 acc 65.234 (64.953)	Top-5 acc 85.156 (84.382)	lr 0.00985
Train [69][550/3239]	Time 0.228 (0.637)	Data Time 0.001 (0.075)	Loss 2.3339 (2.4595)	Entropy 1.02348 (1.02413)	Top-1 acc 66.016 (64.988)	Top-5 acc 88.281 (84.415)	lr 0.00985
Train [69][560/3239]	Time 2.520 (0.634)	Data Time 0.001 (0.074)	Loss 2.3743 (2.4597)	Entropy 1.02348 (1.02412)	Top-1 acc 65.625 (64.974)	Top-5 acc 84.766 (84.413)	lr 0.00985
Train [69][570/3239]	Time 0.266 (0.627)	Data Time 0.001 (0.072)	Loss 2.4247 (2.4599)	Entropy 1.02344 (1.02411)	Top-1 acc 66.406 (64.969)	Top-5 acc 85.156 (84.397)	lr 0.00984
Train [69][580/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.071)	Loss 2.5342 (2.4602)	Entropy 1.02341 (1.02410)	Top-1 acc 62.500 (64.968)	Top-5 acc 83.984 (84.390)	lr 0.00984
Train [69][590/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.070)	Loss 2.4466 (2.4607)	Entropy 1.02340 (1.02409)	Top-1 acc 65.234 (64.959)	Top-5 acc 87.109 (84.388)	lr 0.00984
Train [69][600/3239]	Time 0.245 (0.619)	Data Time 0.001 (0.069)	Loss 2.2888 (2.4596)	Entropy 1.02340 (1.02408)	Top-1 acc 72.656 (64.998)	Top-5 acc 85.938 (84.411)	lr 0.00984
Train [69][610/3239]	Time 0.325 (0.617)	Data Time 0.001 (0.068)	Loss 2.7861 (2.4600)	Entropy 1.02337 (1.02406)	Top-1 acc 58.594 (64.986)	Top-5 acc 80.469 (84.414)	lr 0.00984
Train [69][620/3239]	Time 0.229 (0.614)	Data Time 0.001 (0.067)	Loss 2.5841 (2.4598)	Entropy 1.02316 (1.02405)	Top-1 acc 64.453 (64.988)	Top-5 acc 80.859 (84.423)	lr 0.00984
Train [69][630/3239]	Time 0.225 (0.612)	Data Time 0.001 (0.066)	Loss 2.3981 (2.4600)	Entropy 1.02312 (1.02404)	Top-1 acc 64.844 (64.964)	Top-5 acc 87.500 (84.417)	lr 0.00984
Train [69][640/3239]	Time 0.237 (0.610)	Data Time 0.001 (0.065)	Loss 2.5326 (2.4600)	Entropy 1.02316 (1.02402)	Top-1 acc 59.766 (64.970)	Top-5 acc 81.641 (84.416)	lr 0.00984
Train [69][650/3239]	Time 0.217 (0.608)	Data Time 0.001 (0.064)	Loss 2.5031 (2.4602)	Entropy 1.02306 (1.02401)	Top-1 acc 65.234 (64.967)	Top-5 acc 83.984 (84.424)	lr 0.00984
Train [69][660/3239]	Time 0.326 (0.606)	Data Time 0.001 (0.063)	Loss 2.3515 (2.4599)	Entropy 1.02302 (1.02399)	Top-1 acc 69.141 (64.975)	Top-5 acc 87.500 (84.445)	lr 0.00984
Train [69][670/3239]	Time 2.544 (0.604)	Data Time 0.002 (0.062)	Loss 2.5473 (2.4597)	Entropy 1.02302 (1.02398)	Top-1 acc 62.500 (64.984)	Top-5 acc 83.203 (84.452)	lr 0.00983
Train [69][680/3239]	Time 0.243 (0.599)	Data Time 0.001 (0.061)	Loss 2.3522 (2.4596)	Entropy 1.02305 (1.02397)	Top-1 acc 65.625 (64.984)	Top-5 acc 85.547 (84.442)	lr 0.00983
Train [69][690/3239]	Time 0.238 (0.597)	Data Time 0.001 (0.060)	Loss 2.4849 (2.4591)	Entropy 1.02305 (1.02395)	Top-1 acc 65.625 (64.994)	Top-5 acc 85.547 (84.444)	lr 0.00983
Train [69][700/3239]	Time 0.244 (0.595)	Data Time 0.001 (0.059)	Loss 2.4173 (2.4595)	Entropy 1.02301 (1.02394)	Top-1 acc 65.234 (64.981)	Top-5 acc 86.719 (84.426)	lr 0.00983
Train [69][710/3239]	Time 0.451 (0.670)	Data Time 0.004 (0.058)	Loss 2.3323 (2.4589)	Entropy 1.02302 (1.02393)	Top-1 acc 68.750 (64.990)	Top-5 acc 85.156 (84.429)	lr 0.00983
Train [69][720/3239]	Time 0.235 (0.667)	Data Time 0.002 (0.058)	Loss 2.5369 (2.4586)	Entropy 1.02300 (1.02391)	Top-1 acc 64.062 (65.006)	Top-5 acc 83.594 (84.435)	lr 0.00983
Train [69][730/3239]	Time 0.234 (0.664)	Data Time 0.002 (0.057)	Loss 2.5679 (2.4588)	Entropy 1.02297 (1.02390)	Top-1 acc 60.156 (64.986)	Top-5 acc 82.812 (84.437)	lr 0.00983
Train [69][740/3239]	Time 0.240 (0.662)	Data Time 0.001 (0.056)	Loss 2.4851 (2.4583)	Entropy 1.02295 (1.02389)	Top-1 acc 64.453 (65.003)	Top-5 acc 85.156 (84.449)	lr 0.00983
Train [69][750/3239]	Time 0.249 (0.660)	Data Time 0.001 (0.055)	Loss 2.4813 (2.4588)	Entropy 1.02289 (1.02388)	Top-1 acc 66.016 (65.002)	Top-5 acc 82.812 (84.434)	lr 0.00983
Train [69][760/3239]	Time 0.232 (0.657)	Data Time 0.004 (0.055)	Loss 2.5604 (2.4588)	Entropy 1.02290 (1.02386)	Top-1 acc 65.625 (64.985)	Top-5 acc 83.594 (84.434)	lr 0.00983
Train [69][770/3239]	Time 0.219 (0.655)	Data Time 0.001 (0.054)	Loss 2.3960 (2.4589)	Entropy 1.02283 (1.02385)	Top-1 acc 66.797 (64.979)	Top-5 acc 84.375 (84.439)	lr 0.00983
Train [69][780/3239]	Time 2.471 (0.653)	Data Time 0.001 (0.053)	Loss 2.5515 (2.4586)	Entropy 1.02283 (1.02384)	Top-1 acc 66.016 (64.992)	Top-5 acc 79.688 (84.441)	lr 0.00982
Train [69][790/3239]	Time 0.261 (0.647)	Data Time 0.001 (0.053)	Loss 2.3801 (2.4579)	Entropy 1.02283 (1.02382)	Top-1 acc 62.109 (65.004)	Top-5 acc 87.891 (84.457)	lr 0.00982
Train [69][800/3239]	Time 0.223 (0.646)	Data Time 0.001 (0.052)	Loss 2.6383 (2.4583)	Entropy 1.02280 (1.02381)	Top-1 acc 61.328 (64.992)	Top-5 acc 80.078 (84.453)	lr 0.00982
Train [69][810/3239]	Time 0.232 (0.643)	Data Time 0.001 (0.051)	Loss 2.4462 (2.4587)	Entropy 1.02276 (1.02380)	Top-1 acc 62.109 (64.982)	Top-5 acc 85.938 (84.450)	lr 0.00982
Train [69][820/3239]	Time 0.206 (0.641)	Data Time 0.001 (0.051)	Loss 2.3914 (2.4604)	Entropy 1.02273 (1.02379)	Top-1 acc 67.188 (64.945)	Top-5 acc 85.156 (84.425)	lr 0.00982
Train [69][830/3239]	Time 0.211 (0.639)	Data Time 0.001 (0.050)	Loss 2.5181 (2.4605)	Entropy 1.02269 (1.02377)	Top-1 acc 62.500 (64.933)	Top-5 acc 83.984 (84.425)	lr 0.00982
Train [69][840/3239]	Time 0.339 (0.637)	Data Time 0.001 (0.050)	Loss 2.4504 (2.4609)	Entropy 1.02259 (1.02376)	Top-1 acc 66.016 (64.927)	Top-5 acc 84.375 (84.421)	lr 0.00982
Train [69][850/3239]	Time 0.243 (0.635)	Data Time 0.001 (0.049)	Loss 2.3054 (2.4608)	Entropy 1.02262 (1.02375)	Top-1 acc 66.797 (64.932)	Top-5 acc 89.062 (84.420)	lr 0.00982
Train [69][860/3239]	Time 0.246 (0.634)	Data Time 0.001 (0.049)	Loss 2.4266 (2.4605)	Entropy 1.02264 (1.02373)	Top-1 acc 67.188 (64.932)	Top-5 acc 85.938 (84.424)	lr 0.00982
Train [69][870/3239]	Time 0.233 (0.632)	Data Time 0.001 (0.048)	Loss 2.4517 (2.4603)	Entropy 1.02264 (1.02372)	Top-1 acc 66.016 (64.934)	Top-5 acc 83.203 (84.418)	lr 0.00982
Train [69][880/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.048)	Loss 2.4547 (2.4607)	Entropy 1.02262 (1.02371)	Top-1 acc 64.844 (64.931)	Top-5 acc 88.281 (84.406)	lr 0.00981
Train [69][890/3239]	Time 2.533 (0.629)	Data Time 0.001 (0.047)	Loss 2.3550 (2.4604)	Entropy 1.02262 (1.02370)	Top-1 acc 64.062 (64.943)	Top-5 acc 87.109 (84.414)	lr 0.00981
Train [69][900/3239]	Time 0.241 (0.624)	Data Time 0.001 (0.047)	Loss 2.4500 (2.4599)	Entropy 1.02249 (1.02368)	Top-1 acc 66.406 (64.947)	Top-5 acc 83.984 (84.426)	lr 0.00981
Train [69][910/3239]	Time 0.219 (0.622)	Data Time 0.001 (0.046)	Loss 2.3449 (2.4598)	Entropy 1.02245 (1.02367)	Top-1 acc 68.750 (64.963)	Top-5 acc 87.891 (84.421)	lr 0.00981
Train [69][920/3239]	Time 0.231 (0.621)	Data Time 0.001 (0.046)	Loss 2.3343 (2.4596)	Entropy 1.02238 (1.02366)	Top-1 acc 67.188 (64.970)	Top-5 acc 85.547 (84.427)	lr 0.00981
Train [69][930/3239]	Time 0.373 (0.620)	Data Time 0.001 (0.045)	Loss 2.5144 (2.4601)	Entropy 1.02238 (1.02364)	Top-1 acc 63.672 (64.955)	Top-5 acc 82.422 (84.414)	lr 0.00981
Train [69][940/3239]	Time 0.237 (0.618)	Data Time 0.001 (0.045)	Loss 2.3251 (2.4601)	Entropy 1.02236 (1.02363)	Top-1 acc 69.141 (64.951)	Top-5 acc 89.062 (84.425)	lr 0.00981
Train [69][950/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.044)	Loss 2.3586 (2.4597)	Entropy 1.02225 (1.02361)	Top-1 acc 66.016 (64.954)	Top-5 acc 86.328 (84.428)	lr 0.00981
Train [69][960/3239]	Time 0.228 (0.615)	Data Time 0.001 (0.044)	Loss 2.5697 (2.4599)	Entropy 1.02220 (1.02360)	Top-1 acc 59.375 (64.932)	Top-5 acc 80.078 (84.427)	lr 0.00981
Train [69][970/3239]	Time 0.238 (0.614)	Data Time 0.001 (0.043)	Loss 3.5618 (2.4610)	Entropy 1.02216 (1.02359)	Top-1 acc 42.578 (64.906)	Top-5 acc 68.750 (84.407)	lr 0.00981
Train [69][980/3239]	Time 0.225 (0.612)	Data Time 0.002 (0.043)	Loss 2.3649 (2.4612)	Entropy 1.02220 (1.02357)	Top-1 acc 67.578 (64.912)	Top-5 acc 87.891 (84.407)	lr 0.00980
Train [69][990/3239]	Time 0.260 (0.611)	Data Time 0.001 (0.042)	Loss 2.7222 (2.4612)	Entropy 1.02219 (1.02356)	Top-1 acc 61.328 (64.912)	Top-5 acc 78.516 (84.400)	lr 0.00980
Train [69][1000/3239]	Time 2.472 (0.609)	Data Time 0.001 (0.042)	Loss 2.5416 (2.4607)	Entropy 1.02219 (1.02354)	Top-1 acc 57.422 (64.924)	Top-5 acc 82.812 (84.410)	lr 0.00980
Train [69][1010/3239]	Time 0.228 (0.606)	Data Time 0.001 (0.042)	Loss 2.5993 (2.4610)	Entropy 1.02212 (1.02353)	Top-1 acc 62.500 (64.916)	Top-5 acc 82.031 (84.408)	lr 0.00980
Train [69][1020/3239]	Time 0.337 (0.604)	Data Time 0.001 (0.041)	Loss 2.2546 (2.4607)	Entropy 1.02209 (1.02351)	Top-1 acc 72.266 (64.923)	Top-5 acc 88.672 (84.416)	lr 0.00980
Train [69][1030/3239]	Time 0.226 (0.603)	Data Time 0.001 (0.041)	Loss 2.5575 (2.4607)	Entropy 1.02207 (1.02350)	Top-1 acc 64.062 (64.920)	Top-5 acc 85.156 (84.420)	lr 0.00980
Train [69][1040/3239]	Time 0.181 (0.602)	Data Time 0.001 (0.040)	Loss 2.4062 (2.4605)	Entropy 1.02205 (1.02349)	Top-1 acc 62.500 (64.918)	Top-5 acc 86.719 (84.426)	lr 0.00980
Train [69][1050/3239]	Time 0.247 (0.601)	Data Time 0.001 (0.040)	Loss 2.5388 (2.4607)	Entropy 1.02208 (1.02347)	Top-1 acc 66.016 (64.907)	Top-5 acc 81.641 (84.420)	lr 0.00980
Train [69][1060/3239]	Time 0.231 (0.600)	Data Time 0.001 (0.040)	Loss 2.5451 (2.4609)	Entropy 1.02204 (1.02346)	Top-1 acc 66.406 (64.913)	Top-5 acc 80.859 (84.410)	lr 0.00980
Train [69][1070/3239]	Time 0.297 (0.649)	Data Time 0.004 (0.039)	Loss 2.4092 (2.4605)	Entropy 1.02195 (1.02345)	Top-1 acc 65.625 (64.923)	Top-5 acc 88.281 (84.412)	lr 0.00980
Train [69][1080/3239]	Time 0.235 (0.648)	Data Time 0.002 (0.039)	Loss 2.4892 (2.4604)	Entropy 1.02192 (1.02343)	Top-1 acc 62.891 (64.917)	Top-5 acc 83.594 (84.411)	lr 0.00979
Train [69][1090/3239]	Time 0.228 (0.646)	Data Time 0.002 (0.039)	Loss 2.4005 (2.4606)	Entropy 1.02193 (1.02342)	Top-1 acc 64.062 (64.913)	Top-5 acc 85.938 (84.405)	lr 0.00979
Train [69][1100/3239]	Time 0.270 (0.645)	Data Time 0.002 (0.038)	Loss 2.3810 (2.4608)	Entropy 1.02195 (1.02341)	Top-1 acc 67.578 (64.908)	Top-5 acc 82.812 (84.395)	lr 0.00979
Train [69][1110/3239]	Time 2.568 (0.643)	Data Time 0.001 (0.038)	Loss 2.4238 (2.4609)	Entropy 1.02195 (1.02339)	Top-1 acc 64.453 (64.908)	Top-5 acc 84.766 (84.394)	lr 0.00979
Train [69][1120/3239]	Time 0.227 (0.639)	Data Time 0.001 (0.038)	Loss 2.7754 (2.4614)	Entropy 1.02194 (1.02338)	Top-1 acc 58.594 (64.895)	Top-5 acc 77.734 (84.386)	lr 0.00979
Train [69][1130/3239]	Time 0.238 (0.638)	Data Time 0.001 (0.037)	Loss 2.5649 (2.4614)	Entropy 1.02182 (1.02337)	Top-1 acc 63.281 (64.896)	Top-5 acc 80.469 (84.385)	lr 0.00979
Train [69][1140/3239]	Time 0.239 (0.636)	Data Time 0.001 (0.037)	Loss 2.5162 (2.4616)	Entropy 1.02177 (1.02335)	Top-1 acc 64.453 (64.891)	Top-5 acc 82.031 (84.387)	lr 0.00979
Train [69][1150/3239]	Time 0.232 (0.635)	Data Time 0.001 (0.037)	Loss 2.3689 (2.4613)	Entropy 1.02174 (1.02334)	Top-1 acc 67.969 (64.895)	Top-5 acc 85.547 (84.398)	lr 0.00979
Train [69][1160/3239]	Time 0.238 (0.633)	Data Time 0.001 (0.037)	Loss 2.5685 (2.4618)	Entropy 1.02151 (1.02332)	Top-1 acc 60.156 (64.881)	Top-5 acc 80.859 (84.390)	lr 0.00979
Train [69][1170/3239]	Time 0.217 (0.632)	Data Time 0.002 (0.036)	Loss 2.4625 (2.4617)	Entropy 1.02141 (1.02331)	Top-1 acc 62.500 (64.880)	Top-5 acc 84.766 (84.394)	lr 0.00979
Train [69][1180/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.036)	Loss 2.5647 (2.4619)	Entropy 1.02139 (1.02329)	Top-1 acc 59.375 (64.873)	Top-5 acc 83.203 (84.391)	lr 0.00978
Train [69][1190/3239]	Time 0.220 (0.629)	Data Time 0.001 (0.036)	Loss 2.4846 (2.4619)	Entropy 1.02136 (1.02327)	Top-1 acc 64.453 (64.878)	Top-5 acc 83.594 (84.393)	lr 0.00978
Train [69][1200/3239]	Time 0.325 (0.628)	Data Time 0.001 (0.035)	Loss 2.4449 (2.4617)	Entropy 1.02132 (1.02326)	Top-1 acc 66.406 (64.885)	Top-5 acc 85.156 (84.398)	lr 0.00978
Train [69][1210/3239]	Time 0.275 (0.627)	Data Time 0.001 (0.035)	Loss 2.4578 (2.4618)	Entropy 1.02126 (1.02324)	Top-1 acc 64.062 (64.882)	Top-5 acc 83.594 (84.397)	lr 0.00978
Train [69][1220/3239]	Time 2.488 (0.625)	Data Time 0.001 (0.035)	Loss 2.4885 (2.4620)	Entropy 1.02126 (1.02323)	Top-1 acc 64.062 (64.879)	Top-5 acc 86.328 (84.397)	lr 0.00978
Train [69][1230/3239]	Time 0.237 (0.622)	Data Time 0.001 (0.035)	Loss 2.3737 (2.4617)	Entropy 1.02123 (1.02321)	Top-1 acc 70.703 (64.890)	Top-5 acc 86.719 (84.400)	lr 0.00978
Train [69][1240/3239]	Time 0.248 (0.621)	Data Time 0.002 (0.034)	Loss 2.4201 (2.4612)	Entropy 1.02121 (1.02319)	Top-1 acc 63.672 (64.893)	Top-5 acc 85.156 (84.406)	lr 0.00978
Train [69][1250/3239]	Time 0.231 (0.620)	Data Time 0.001 (0.034)	Loss 2.5423 (2.4614)	Entropy 1.02116 (1.02318)	Top-1 acc 66.016 (64.891)	Top-5 acc 82.422 (84.402)	lr 0.00978
Train [69][1260/3239]	Time 0.231 (0.619)	Data Time 0.001 (0.034)	Loss 2.5300 (2.4614)	Entropy 1.02118 (1.02316)	Top-1 acc 63.281 (64.888)	Top-5 acc 82.812 (84.403)	lr 0.00978
Train [69][1270/3239]	Time 0.255 (0.618)	Data Time 0.002 (0.033)	Loss 2.3159 (2.4614)	Entropy 1.02117 (1.02315)	Top-1 acc 64.844 (64.880)	Top-5 acc 87.109 (84.408)	lr 0.00978
Train [69][1280/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.033)	Loss 2.3215 (2.4614)	Entropy 1.02110 (1.02313)	Top-1 acc 66.406 (64.875)	Top-5 acc 85.938 (84.406)	lr 0.00977
Train [69][1290/3239]	Time 0.260 (0.616)	Data Time 0.001 (0.033)	Loss 2.4757 (2.4613)	Entropy 1.02109 (1.02312)	Top-1 acc 63.672 (64.876)	Top-5 acc 85.156 (84.413)	lr 0.00977
Train [69][1300/3239]	Time 0.229 (0.614)	Data Time 0.001 (0.033)	Loss 2.5508 (2.4614)	Entropy 1.02108 (1.02310)	Top-1 acc 61.719 (64.870)	Top-5 acc 82.422 (84.412)	lr 0.00977
Train [69][1310/3239]	Time 0.215 (0.613)	Data Time 0.001 (0.033)	Loss 2.5826 (2.4617)	Entropy 1.02094 (1.02308)	Top-1 acc 62.500 (64.860)	Top-5 acc 83.203 (84.410)	lr 0.00977
Train [69][1320/3239]	Time 0.250 (0.612)	Data Time 0.003 (0.032)	Loss 2.5344 (2.4619)	Entropy 1.02094 (1.02307)	Top-1 acc 60.547 (64.851)	Top-5 acc 84.375 (84.408)	lr 0.00977
Train [69][1330/3239]	Time 2.529 (0.611)	Data Time 0.001 (0.032)	Loss 2.6751 (2.4622)	Entropy 1.02094 (1.02305)	Top-1 acc 61.328 (64.846)	Top-5 acc 79.688 (84.405)	lr 0.00977
Train [69][1340/3239]	Time 0.346 (0.608)	Data Time 0.001 (0.032)	Loss 2.4355 (2.4624)	Entropy 1.02096 (1.02304)	Top-1 acc 66.016 (64.838)	Top-5 acc 83.594 (84.405)	lr 0.00977
Train [69][1350/3239]	Time 0.222 (0.607)	Data Time 0.001 (0.032)	Loss 2.4572 (2.4623)	Entropy 1.02092 (1.02302)	Top-1 acc 61.328 (64.839)	Top-5 acc 83.594 (84.404)	lr 0.00977
Train [69][1360/3239]	Time 0.223 (0.606)	Data Time 0.001 (0.031)	Loss 2.5917 (2.4633)	Entropy 1.02087 (1.02300)	Top-1 acc 58.984 (64.819)	Top-5 acc 83.203 (84.387)	lr 0.00977
Train [69][1370/3239]	Time 0.245 (0.605)	Data Time 0.001 (0.031)	Loss 2.5266 (2.4636)	Entropy 1.02088 (1.02299)	Top-1 acc 62.500 (64.808)	Top-5 acc 83.203 (84.381)	lr 0.00977
Train [69][1380/3239]	Time 0.337 (0.605)	Data Time 0.001 (0.031)	Loss 2.3747 (2.4635)	Entropy 1.02088 (1.02297)	Top-1 acc 69.141 (64.812)	Top-5 acc 83.594 (84.383)	lr 0.00976
Train [69][1390/3239]	Time 0.234 (0.604)	Data Time 0.001 (0.031)	Loss 2.5506 (2.4638)	Entropy 1.02093 (1.02296)	Top-1 acc 63.281 (64.806)	Top-5 acc 83.594 (84.376)	lr 0.00976
Train [69][1400/3239]	Time 0.230 (0.603)	Data Time 0.001 (0.031)	Loss 2.6401 (2.4643)	Entropy 1.02088 (1.02294)	Top-1 acc 60.547 (64.785)	Top-5 acc 82.812 (84.370)	lr 0.00976
Train [69][1410/3239]	Time 0.220 (0.602)	Data Time 0.001 (0.030)	Loss 2.5934 (2.4643)	Entropy 1.02090 (1.02293)	Top-1 acc 63.672 (64.781)	Top-5 acc 82.031 (84.373)	lr 0.00976
Train [69][1420/3239]	Time 0.226 (0.601)	Data Time 0.001 (0.030)	Loss 2.6063 (2.4646)	Entropy 1.02087 (1.02292)	Top-1 acc 60.938 (64.784)	Top-5 acc 79.688 (84.366)	lr 0.00976
Train [69][1430/3239]	Time 0.239 (0.639)	Data Time 0.003 (0.030)	Loss 2.2212 (2.4646)	Entropy 1.02087 (1.02290)	Top-1 acc 71.094 (64.786)	Top-5 acc 88.281 (84.370)	lr 0.00976
Train [69][1440/3239]	Time 2.628 (0.638)	Data Time 0.002 (0.030)	Loss 2.4951 (2.4643)	Entropy 1.02087 (1.02289)	Top-1 acc 65.234 (64.793)	Top-5 acc 84.375 (84.377)	lr 0.00976
Train [69][1450/3239]	Time 0.236 (0.636)	Data Time 0.002 (0.030)	Loss 2.3798 (2.4642)	Entropy 1.02086 (1.02287)	Top-1 acc 65.234 (64.794)	Top-5 acc 86.328 (84.377)	lr 0.00976
Train [69][1460/3239]	Time 0.260 (0.635)	Data Time 0.002 (0.029)	Loss 2.6419 (2.4650)	Entropy 1.02092 (1.02286)	Top-1 acc 60.547 (64.777)	Top-5 acc 80.078 (84.365)	lr 0.00976
Train [69][1470/3239]	Time 0.360 (0.634)	Data Time 0.001 (0.029)	Loss 2.6450 (2.4654)	Entropy 1.02087 (1.02285)	Top-1 acc 60.547 (64.765)	Top-5 acc 78.906 (84.355)	lr 0.00976
Train [69][1480/3239]	Time 0.241 (0.633)	Data Time 0.001 (0.029)	Loss 2.4855 (2.4656)	Entropy 1.02081 (1.02283)	Top-1 acc 62.891 (64.762)	Top-5 acc 86.328 (84.356)	lr 0.00976
Train [69][1490/3239]	Time 0.218 (0.631)	Data Time 0.001 (0.029)	Loss 2.4971 (2.4654)	Entropy 1.02080 (1.02282)	Top-1 acc 66.016 (64.770)	Top-5 acc 83.203 (84.361)	lr 0.00975
Train [69][1500/3239]	Time 0.221 (0.630)	Data Time 0.001 (0.029)	Loss 2.5760 (2.4654)	Entropy 1.02078 (1.02281)	Top-1 acc 62.109 (64.767)	Top-5 acc 82.812 (84.360)	lr 0.00975
Train [69][1510/3239]	Time 0.219 (0.629)	Data Time 0.001 (0.028)	Loss 2.6031 (2.4655)	Entropy 1.02071 (1.02279)	Top-1 acc 61.328 (64.761)	Top-5 acc 78.516 (84.356)	lr 0.00975
Train [69][1520/3239]	Time 0.247 (0.628)	Data Time 0.001 (0.028)	Loss 2.2304 (2.4656)	Entropy 1.02070 (1.02278)	Top-1 acc 71.094 (64.764)	Top-5 acc 89.062 (84.353)	lr 0.00975
Train [69][1530/3239]	Time 0.248 (0.628)	Data Time 0.001 (0.028)	Loss 2.4870 (2.4658)	Entropy 1.02066 (1.02276)	Top-1 acc 66.406 (64.760)	Top-5 acc 82.031 (84.347)	lr 0.00975
Train [69][1540/3239]	Time 0.222 (0.627)	Data Time 0.001 (0.028)	Loss 2.5023 (2.4660)	Entropy 1.02059 (1.02275)	Top-1 acc 65.625 (64.757)	Top-5 acc 81.641 (84.344)	lr 0.00975
Train [69][1550/3239]	Time 2.559 (0.626)	Data Time 0.001 (0.028)	Loss 2.4895 (2.4666)	Entropy 1.02059 (1.02274)	Top-1 acc 63.672 (64.741)	Top-5 acc 83.594 (84.335)	lr 0.00975
Train [69][1560/3239]	Time 0.238 (0.623)	Data Time 0.001 (0.028)	Loss 2.5324 (2.4668)	Entropy 1.02058 (1.02272)	Top-1 acc 63.281 (64.735)	Top-5 acc 80.859 (84.328)	lr 0.00975
Train [69][1570/3239]	Time 0.241 (0.622)	Data Time 0.001 (0.027)	Loss 2.4334 (2.4666)	Entropy 1.02058 (1.02271)	Top-1 acc 65.625 (64.740)	Top-5 acc 83.984 (84.332)	lr 0.00975
Train [69][1580/3239]	Time 0.241 (0.621)	Data Time 0.002 (0.027)	Loss 2.5185 (2.4665)	Entropy 1.02048 (1.02270)	Top-1 acc 62.891 (64.740)	Top-5 acc 82.422 (84.330)	lr 0.00975
Train [69][1590/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.027)	Loss 2.4455 (2.4667)	Entropy 1.02038 (1.02268)	Top-1 acc 65.234 (64.738)	Top-5 acc 84.766 (84.324)	lr 0.00974
Train [69][1600/3239]	Time 0.341 (0.620)	Data Time 0.001 (0.027)	Loss 2.4593 (2.4669)	Entropy 1.02034 (1.02267)	Top-1 acc 64.453 (64.734)	Top-5 acc 83.594 (84.324)	lr 0.00974
Train [69][1610/3239]	Time 0.229 (0.619)	Data Time 0.001 (0.027)	Loss 2.4650 (2.4668)	Entropy 1.02031 (1.02265)	Top-1 acc 64.062 (64.735)	Top-5 acc 84.766 (84.329)	lr 0.00974
Train [69][1620/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.027)	Loss 2.6482 (2.4673)	Entropy 1.02042 (1.02264)	Top-1 acc 58.594 (64.722)	Top-5 acc 78.906 (84.320)	lr 0.00974
Train [69][1630/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.026)	Loss 2.4081 (2.4673)	Entropy 1.02037 (1.02262)	Top-1 acc 66.406 (64.721)	Top-5 acc 85.156 (84.318)	lr 0.00974
Train [69][1640/3239]	Time 0.349 (0.616)	Data Time 0.001 (0.026)	Loss 2.3754 (2.4673)	Entropy 1.02034 (1.02261)	Top-1 acc 69.531 (64.727)	Top-5 acc 85.156 (84.316)	lr 0.00974
Train [69][1650/3239]	Time 0.262 (0.616)	Data Time 0.001 (0.026)	Loss 2.4086 (2.4676)	Entropy 1.02023 (1.02260)	Top-1 acc 66.406 (64.718)	Top-5 acc 85.156 (84.311)	lr 0.00974
Train [69][1660/3239]	Time 2.563 (0.615)	Data Time 0.002 (0.026)	Loss 2.3304 (2.4677)	Entropy 1.02023 (1.02258)	Top-1 acc 68.750 (64.714)	Top-5 acc 87.109 (84.312)	lr 0.00974
Train [69][1670/3239]	Time 0.245 (0.613)	Data Time 0.001 (0.026)	Loss 2.3561 (2.4675)	Entropy 1.02040 (1.02257)	Top-1 acc 65.625 (64.715)	Top-5 acc 85.547 (84.317)	lr 0.00974
Train [69][1680/3239]	Time 0.231 (0.612)	Data Time 0.001 (0.026)	Loss 2.4973 (2.4674)	Entropy 1.02038 (1.02256)	Top-1 acc 63.281 (64.712)	Top-5 acc 85.156 (84.317)	lr 0.00974
Train [69][1690/3239]	Time 0.361 (0.611)	Data Time 0.001 (0.026)	Loss 2.7681 (2.4679)	Entropy 1.02030 (1.02254)	Top-1 acc 59.375 (64.701)	Top-5 acc 78.125 (84.305)	lr 0.00973
Train [69][1700/3239]	Time 0.238 (0.611)	Data Time 0.001 (0.025)	Loss 2.4450 (2.4676)	Entropy 1.02028 (1.02253)	Top-1 acc 67.578 (64.710)	Top-5 acc 84.375 (84.312)	lr 0.00973
Train [69][1710/3239]	Time 0.252 (0.610)	Data Time 0.001 (0.025)	Loss 2.4219 (2.4674)	Entropy 1.02025 (1.02252)	Top-1 acc 67.578 (64.712)	Top-5 acc 84.375 (84.317)	lr 0.00973
Train [69][1720/3239]	Time 0.256 (0.609)	Data Time 0.001 (0.025)	Loss 2.3397 (2.4673)	Entropy 1.02026 (1.02250)	Top-1 acc 64.453 (64.716)	Top-5 acc 89.062 (84.318)	lr 0.00973
Train [69][1730/3239]	Time 0.243 (0.608)	Data Time 0.001 (0.025)	Loss 2.4812 (2.4671)	Entropy 1.02024 (1.02249)	Top-1 acc 64.453 (64.716)	Top-5 acc 82.812 (84.321)	lr 0.00973
Train [69][1740/3239]	Time 0.238 (0.608)	Data Time 0.001 (0.025)	Loss 2.5774 (2.4671)	Entropy 1.02022 (1.02248)	Top-1 acc 63.281 (64.715)	Top-5 acc 85.156 (84.323)	lr 0.00973
Train [69][1750/3239]	Time 0.224 (0.607)	Data Time 0.001 (0.025)	Loss 2.3980 (2.4669)	Entropy 1.02020 (1.02246)	Top-1 acc 68.750 (64.724)	Top-5 acc 85.938 (84.329)	lr 0.00973
Train [69][1760/3239]	Time 0.229 (0.606)	Data Time 0.001 (0.025)	Loss 2.4117 (2.4670)	Entropy 1.02021 (1.02245)	Top-1 acc 63.672 (64.723)	Top-5 acc 86.719 (84.329)	lr 0.00973
Train [69][1770/3239]	Time 2.564 (0.605)	Data Time 0.001 (0.025)	Loss 2.5973 (2.4671)	Entropy 1.02021 (1.02244)	Top-1 acc 62.891 (64.722)	Top-5 acc 83.594 (84.327)	lr 0.00973
Train [69][1780/3239]	Time 0.222 (0.603)	Data Time 0.001 (0.024)	Loss 2.6754 (2.4675)	Entropy 1.02017 (1.02243)	Top-1 acc 59.766 (64.714)	Top-5 acc 80.469 (84.320)	lr 0.00973
Train [69][1790/3239]	Time 0.232 (0.603)	Data Time 0.001 (0.024)	Loss 2.5584 (2.4679)	Entropy 1.01966 (1.02241)	Top-1 acc 61.328 (64.707)	Top-5 acc 83.984 (84.314)	lr 0.00972
Train [69][1800/3239]	Time 0.374 (0.630)	Data Time 0.004 (0.024)	Loss 2.5664 (2.4681)	Entropy 1.01966 (1.02240)	Top-1 acc 60.938 (64.697)	Top-5 acc 81.641 (84.309)	lr 0.00972
Train [69][1810/3239]	Time 0.243 (0.630)	Data Time 0.011 (0.024)	Loss 2.4915 (2.4682)	Entropy 1.01965 (1.02238)	Top-1 acc 63.672 (64.695)	Top-5 acc 84.375 (84.308)	lr 0.00972
Train [69][1820/3239]	Time 0.236 (0.629)	Data Time 0.002 (0.024)	Loss 2.4184 (2.4682)	Entropy 1.01963 (1.02237)	Top-1 acc 66.406 (64.692)	Top-5 acc 84.766 (84.309)	lr 0.00972
Train [69][1830/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.024)	Loss 2.4969 (2.4684)	Entropy 1.01955 (1.02235)	Top-1 acc 64.062 (64.692)	Top-5 acc 83.984 (84.307)	lr 0.00972
Train [69][1840/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.024)	Loss 2.4159 (2.4679)	Entropy 1.01951 (1.02234)	Top-1 acc 64.844 (64.701)	Top-5 acc 84.766 (84.313)	lr 0.00972
Train [69][1850/3239]	Time 0.212 (0.627)	Data Time 0.001 (0.024)	Loss 2.4008 (2.4680)	Entropy 1.01940 (1.02232)	Top-1 acc 68.750 (64.703)	Top-5 acc 83.203 (84.314)	lr 0.00972
Train [69][1860/3239]	Time 0.230 (0.626)	Data Time 0.001 (0.023)	Loss 2.3528 (2.4678)	Entropy 1.01941 (1.02230)	Top-1 acc 66.797 (64.707)	Top-5 acc 87.109 (84.318)	lr 0.00972
Train [69][1870/3239]	Time 0.350 (0.625)	Data Time 0.001 (0.023)	Loss 2.5468 (2.4680)	Entropy 1.01935 (1.02229)	Top-1 acc 62.891 (64.703)	Top-5 acc 83.594 (84.315)	lr 0.00972
Train [69][1880/3239]	Time 2.511 (0.624)	Data Time 0.002 (0.023)	Loss 2.3590 (2.4679)	Entropy 1.01935 (1.02227)	Top-1 acc 63.672 (64.702)	Top-5 acc 85.938 (84.314)	lr 0.00972
Train [69][1890/3239]	Time 0.243 (0.622)	Data Time 0.001 (0.023)	Loss 2.6141 (2.4680)	Entropy 1.01934 (1.02226)	Top-1 acc 61.719 (64.704)	Top-5 acc 82.031 (84.311)	lr 0.00971
Train [69][1900/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.023)	Loss 2.3482 (2.4682)	Entropy 1.01936 (1.02224)	Top-1 acc 67.969 (64.695)	Top-5 acc 85.547 (84.310)	lr 0.00971
Train [69][1910/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.023)	Loss 2.5061 (2.4683)	Entropy 1.01937 (1.02223)	Top-1 acc 62.109 (64.695)	Top-5 acc 82.422 (84.307)	lr 0.00971
Train [69][1920/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.023)	Loss 2.4284 (2.4682)	Entropy 1.01936 (1.02221)	Top-1 acc 64.062 (64.693)	Top-5 acc 86.328 (84.309)	lr 0.00971
Train [69][1930/3239]	Time 0.232 (0.619)	Data Time 0.001 (0.023)	Loss 2.4005 (2.4684)	Entropy 1.01940 (1.02220)	Top-1 acc 72.266 (64.688)	Top-5 acc 85.156 (84.302)	lr 0.00971
Train [69][1940/3239]	Time 0.228 (0.618)	Data Time 0.001 (0.023)	Loss 2.4263 (2.4684)	Entropy 1.01938 (1.02218)	Top-1 acc 66.406 (64.687)	Top-5 acc 86.328 (84.302)	lr 0.00971
Train [69][1950/3239]	Time 0.237 (0.617)	Data Time 0.001 (0.022)	Loss 2.5267 (2.4688)	Entropy 1.01937 (1.02217)	Top-1 acc 62.109 (64.674)	Top-5 acc 83.203 (84.294)	lr 0.00971
Train [69][1960/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.022)	Loss 2.5078 (2.4689)	Entropy 1.01936 (1.02215)	Top-1 acc 65.234 (64.668)	Top-5 acc 84.766 (84.291)	lr 0.00971
Train [69][1970/3239]	Time 0.219 (0.616)	Data Time 0.001 (0.022)	Loss 2.5801 (2.4689)	Entropy 1.01933 (1.02214)	Top-1 acc 63.281 (64.671)	Top-5 acc 81.641 (84.292)	lr 0.00971
Train [69][1980/3239]	Time 0.271 (0.615)	Data Time 0.001 (0.022)	Loss 2.4400 (2.4690)	Entropy 1.01921 (1.02213)	Top-1 acc 66.797 (64.669)	Top-5 acc 85.938 (84.294)	lr 0.00971
Train [69][1990/3239]	Time 2.493 (0.615)	Data Time 0.001 (0.022)	Loss 2.7067 (2.4693)	Entropy 1.01921 (1.02211)	Top-1 acc 60.156 (64.668)	Top-5 acc 81.641 (84.290)	lr 0.00970
Train [69][2000/3239]	Time 0.325 (0.613)	Data Time 0.001 (0.022)	Loss 2.5216 (2.4692)	Entropy 1.01922 (1.02210)	Top-1 acc 60.547 (64.669)	Top-5 acc 83.984 (84.294)	lr 0.00970
Train [69][2010/3239]	Time 0.235 (0.612)	Data Time 0.001 (0.022)	Loss 2.6857 (2.4691)	Entropy 1.01915 (1.02208)	Top-1 acc 64.453 (64.676)	Top-5 acc 79.688 (84.302)	lr 0.00970
Train [69][2020/3239]	Time 0.230 (0.611)	Data Time 0.001 (0.022)	Loss 2.4066 (2.4689)	Entropy 1.01911 (1.02207)	Top-1 acc 64.844 (64.679)	Top-5 acc 84.375 (84.303)	lr 0.00970
Train [69][2030/3239]	Time 0.237 (0.611)	Data Time 0.001 (0.022)	Loss 2.3566 (2.4690)	Entropy 1.01908 (1.02205)	Top-1 acc 68.750 (64.681)	Top-5 acc 88.672 (84.303)	lr 0.00970
Train [69][2040/3239]	Time 0.253 (0.610)	Data Time 0.002 (0.021)	Loss 2.6694 (2.4694)	Entropy 1.01908 (1.02204)	Top-1 acc 59.766 (64.671)	Top-5 acc 78.906 (84.293)	lr 0.00970
Train [69][2050/3239]	Time 0.231 (0.609)	Data Time 0.001 (0.021)	Loss 2.4799 (2.4693)	Entropy 1.01907 (1.02202)	Top-1 acc 67.969 (64.676)	Top-5 acc 85.938 (84.296)	lr 0.00970
Train [69][2060/3239]	Time 0.225 (0.609)	Data Time 0.001 (0.021)	Loss 2.4076 (2.4695)	Entropy 1.01899 (1.02201)	Top-1 acc 67.969 (64.676)	Top-5 acc 84.766 (84.294)	lr 0.00970
Train [69][2070/3239]	Time 0.234 (0.608)	Data Time 0.001 (0.021)	Loss 2.4454 (2.4693)	Entropy 1.01896 (1.02199)	Top-1 acc 63.672 (64.680)	Top-5 acc 82.422 (84.292)	lr 0.00970
Train [69][2080/3239]	Time 0.231 (0.608)	Data Time 0.001 (0.021)	Loss 2.5587 (2.4696)	Entropy 1.01891 (1.02198)	Top-1 acc 62.500 (64.671)	Top-5 acc 79.688 (84.288)	lr 0.00970
Train [69][2090/3239]	Time 0.392 (0.607)	Data Time 0.001 (0.021)	Loss 2.3526 (2.4696)	Entropy 1.01885 (1.02197)	Top-1 acc 68.359 (64.677)	Top-5 acc 86.328 (84.292)	lr 0.00970
Train [69][2100/3239]	Time 2.565 (0.606)	Data Time 0.001 (0.021)	Loss 2.4590 (2.4695)	Entropy 1.01885 (1.02195)	Top-1 acc 66.016 (64.675)	Top-5 acc 82.812 (84.292)	lr 0.00969
Train [69][2110/3239]	Time 0.271 (0.605)	Data Time 0.002 (0.021)	Loss 2.3054 (2.4696)	Entropy 1.01886 (1.02194)	Top-1 acc 69.141 (64.671)	Top-5 acc 87.500 (84.288)	lr 0.00969
Train [69][2120/3239]	Time 0.229 (0.604)	Data Time 0.001 (0.021)	Loss 2.4474 (2.4695)	Entropy 1.01881 (1.02192)	Top-1 acc 69.141 (64.675)	Top-5 acc 83.594 (84.287)	lr 0.00969
Train [69][2130/3239]	Time 0.232 (0.603)	Data Time 0.001 (0.021)	Loss 2.4588 (2.4697)	Entropy 1.01878 (1.02191)	Top-1 acc 65.625 (64.673)	Top-5 acc 83.594 (84.284)	lr 0.00969
Train [69][2140/3239]	Time 0.226 (0.603)	Data Time 0.001 (0.021)	Loss 2.7343 (2.4697)	Entropy 1.01872 (1.02189)	Top-1 acc 57.031 (64.672)	Top-5 acc 78.125 (84.282)	lr 0.00969
Train [69][2150/3239]	Time 0.225 (0.602)	Data Time 0.001 (0.020)	Loss 2.5047 (2.4694)	Entropy 1.01868 (1.02188)	Top-1 acc 68.359 (64.683)	Top-5 acc 83.984 (84.288)	lr 0.00969
Train [69][2160/3239]	Time 0.250 (0.627)	Data Time 0.002 (0.020)	Loss 2.4828 (2.4697)	Entropy 1.01864 (1.02186)	Top-1 acc 62.500 (64.673)	Top-5 acc 84.766 (84.282)	lr 0.00969
Train [69][2170/3239]	Time 0.218 (0.626)	Data Time 0.002 (0.020)	Loss 2.5113 (2.4695)	Entropy 1.01863 (1.02185)	Top-1 acc 60.547 (64.675)	Top-5 acc 82.812 (84.279)	lr 0.00969
Train [69][2180/3239]	Time 0.339 (0.626)	Data Time 0.001 (0.020)	Loss 2.2950 (2.4695)	Entropy 1.01866 (1.02183)	Top-1 acc 66.016 (64.676)	Top-5 acc 88.281 (84.278)	lr 0.00969
Train [69][2190/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.020)	Loss 2.6217 (2.4695)	Entropy 1.01864 (1.02182)	Top-1 acc 59.766 (64.677)	Top-5 acc 82.812 (84.283)	lr 0.00969
Train [69][2200/3239]	Time 0.237 (0.624)	Data Time 0.001 (0.020)	Loss 2.4121 (2.4693)	Entropy 1.01862 (1.02180)	Top-1 acc 66.797 (64.679)	Top-5 acc 83.984 (84.288)	lr 0.00968
Train [69][2210/3239]	Time 2.482 (0.624)	Data Time 0.001 (0.020)	Loss 2.2975 (2.4692)	Entropy 1.01862 (1.02179)	Top-1 acc 69.531 (64.683)	Top-5 acc 88.281 (84.290)	lr 0.00968
Train [69][2220/3239]	Time 0.246 (0.622)	Data Time 0.001 (0.020)	Loss 2.4547 (2.4692)	Entropy 1.01858 (1.02177)	Top-1 acc 66.406 (64.686)	Top-5 acc 83.594 (84.289)	lr 0.00968
Train [69][2230/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.020)	Loss 2.3895 (2.4692)	Entropy 1.01855 (1.02176)	Top-1 acc 67.188 (64.680)	Top-5 acc 85.156 (84.291)	lr 0.00968
Train [69][2240/3239]	Time 0.227 (0.621)	Data Time 0.001 (0.020)	Loss 2.4188 (2.4693)	Entropy 1.01850 (1.02175)	Top-1 acc 67.578 (64.679)	Top-5 acc 86.719 (84.289)	lr 0.00968
Train [69][2250/3239]	Time 0.224 (0.620)	Data Time 0.001 (0.020)	Loss 2.8090 (2.4695)	Entropy 1.01856 (1.02173)	Top-1 acc 55.469 (64.669)	Top-5 acc 81.250 (84.286)	lr 0.00968
Train [69][2260/3239]	Time 0.198 (0.619)	Data Time 0.001 (0.020)	Loss 2.4212 (2.4695)	Entropy 1.01856 (1.02172)	Top-1 acc 64.844 (64.668)	Top-5 acc 84.766 (84.285)	lr 0.00968
Train [69][2270/3239]	Time 0.337 (0.619)	Data Time 0.002 (0.019)	Loss 2.5297 (2.4697)	Entropy 1.01858 (1.02170)	Top-1 acc 62.500 (64.662)	Top-5 acc 79.688 (84.281)	lr 0.00968
Train [69][2280/3239]	Time 0.242 (0.618)	Data Time 0.001 (0.019)	Loss 2.4266 (2.4698)	Entropy 1.01853 (1.02169)	Top-1 acc 66.406 (64.660)	Top-5 acc 86.328 (84.280)	lr 0.00968
Train [69][2290/3239]	Time 0.224 (0.617)	Data Time 0.001 (0.019)	Loss 2.3446 (2.4698)	Entropy 1.01850 (1.02168)	Top-1 acc 68.750 (64.666)	Top-5 acc 87.109 (84.280)	lr 0.00968
Train [69][2300/3239]	Time 0.243 (0.617)	Data Time 0.001 (0.019)	Loss 2.3822 (2.4699)	Entropy 1.01850 (1.02166)	Top-1 acc 66.797 (64.659)	Top-5 acc 88.672 (84.281)	lr 0.00967
Train [69][2310/3239]	Time 0.255 (0.616)	Data Time 0.001 (0.019)	Loss 2.2941 (2.4699)	Entropy 1.01847 (1.02165)	Top-1 acc 67.578 (64.657)	Top-5 acc 88.672 (84.284)	lr 0.00967
Train [69][2320/3239]	Time 2.532 (0.616)	Data Time 0.001 (0.019)	Loss 2.3840 (2.4699)	Entropy 1.01847 (1.02163)	Top-1 acc 69.141 (64.651)	Top-5 acc 82.812 (84.284)	lr 0.00967
Train [69][2330/3239]	Time 0.238 (0.614)	Data Time 0.001 (0.019)	Loss 2.6736 (2.4702)	Entropy 1.01844 (1.02162)	Top-1 acc 66.016 (64.645)	Top-5 acc 82.422 (84.283)	lr 0.00967
Train [69][2340/3239]	Time 0.233 (0.613)	Data Time 0.001 (0.019)	Loss 2.3069 (2.4701)	Entropy 1.01847 (1.02161)	Top-1 acc 67.969 (64.647)	Top-5 acc 85.156 (84.281)	lr 0.00967
Train [69][2350/3239]	Time 0.238 (0.613)	Data Time 0.001 (0.019)	Loss 2.4678 (2.4703)	Entropy 1.01856 (1.02159)	Top-1 acc 66.016 (64.643)	Top-5 acc 83.594 (84.279)	lr 0.00967
Train [69][2360/3239]	Time 0.343 (0.612)	Data Time 0.001 (0.019)	Loss 2.6361 (2.4706)	Entropy 1.01847 (1.02158)	Top-1 acc 61.719 (64.636)	Top-5 acc 78.516 (84.272)	lr 0.00967
Train [69][2370/3239]	Time 0.219 (0.611)	Data Time 0.001 (0.019)	Loss 2.4611 (2.4710)	Entropy 1.01845 (1.02157)	Top-1 acc 67.188 (64.631)	Top-5 acc 82.031 (84.261)	lr 0.00967
Train [69][2380/3239]	Time 0.223 (0.611)	Data Time 0.001 (0.019)	Loss 2.3678 (2.4709)	Entropy 1.01841 (1.02156)	Top-1 acc 67.188 (64.636)	Top-5 acc 88.672 (84.263)	lr 0.00967
Train [69][2390/3239]	Time 0.215 (0.610)	Data Time 0.001 (0.019)	Loss 2.5455 (2.4711)	Entropy 1.01834 (1.02154)	Top-1 acc 61.328 (64.630)	Top-5 acc 81.250 (84.259)	lr 0.00967
Train [69][2400/3239]	Time 0.219 (0.610)	Data Time 0.001 (0.018)	Loss 2.7134 (2.4713)	Entropy 1.01832 (1.02153)	Top-1 acc 58.203 (64.627)	Top-5 acc 77.734 (84.255)	lr 0.00966
Train [69][2410/3239]	Time 0.226 (0.609)	Data Time 0.001 (0.018)	Loss 2.4862 (2.4711)	Entropy 1.01827 (1.02152)	Top-1 acc 64.453 (64.628)	Top-5 acc 84.375 (84.261)	lr 0.00966
Train [69][2420/3239]	Time 0.218 (0.608)	Data Time 0.001 (0.018)	Loss 2.3846 (2.4710)	Entropy 1.01823 (1.02150)	Top-1 acc 65.625 (64.629)	Top-5 acc 87.109 (84.264)	lr 0.00966
Train [69][2430/3239]	Time 2.614 (0.608)	Data Time 0.001 (0.018)	Loss 2.4593 (2.4713)	Entropy 1.01823 (1.02149)	Top-1 acc 64.453 (64.621)	Top-5 acc 85.156 (84.258)	lr 0.00966
Train [69][2440/3239]	Time 0.245 (0.606)	Data Time 0.001 (0.018)	Loss 2.4805 (2.4713)	Entropy 1.01822 (1.02147)	Top-1 acc 62.109 (64.623)	Top-5 acc 83.594 (84.257)	lr 0.00966
Train [69][2450/3239]	Time 0.331 (0.606)	Data Time 0.001 (0.018)	Loss 2.4883 (2.4713)	Entropy 1.01820 (1.02146)	Top-1 acc 63.672 (64.621)	Top-5 acc 81.250 (84.258)	lr 0.00966
Train [69][2460/3239]	Time 0.236 (0.605)	Data Time 0.001 (0.018)	Loss 2.4611 (2.4711)	Entropy 1.01814 (1.02145)	Top-1 acc 64.844 (64.624)	Top-5 acc 84.766 (84.264)	lr 0.00966
Train [69][2470/3239]	Time 0.243 (0.605)	Data Time 0.001 (0.018)	Loss 2.4478 (2.4710)	Entropy 1.01800 (1.02143)	Top-1 acc 64.453 (64.623)	Top-5 acc 86.719 (84.265)	lr 0.00966
Train [69][2480/3239]	Time 0.238 (0.604)	Data Time 0.001 (0.018)	Loss 2.5258 (2.4712)	Entropy 1.01798 (1.02142)	Top-1 acc 63.281 (64.616)	Top-5 acc 83.984 (84.263)	lr 0.00966
Train [69][2490/3239]	Time 0.282 (0.604)	Data Time 0.001 (0.018)	Loss 2.4812 (2.4712)	Entropy 1.01801 (1.02141)	Top-1 acc 64.062 (64.617)	Top-5 acc 86.719 (84.260)	lr 0.00966
Train [69][2500/3239]	Time 0.230 (0.603)	Data Time 0.001 (0.018)	Loss 2.3691 (2.4711)	Entropy 1.01795 (1.02139)	Top-1 acc 66.016 (64.621)	Top-5 acc 84.766 (84.260)	lr 0.00965
Train [69][2510/3239]	Time 0.220 (0.603)	Data Time 0.001 (0.018)	Loss 2.4628 (2.4712)	Entropy 1.01794 (1.02138)	Top-1 acc 67.969 (64.618)	Top-5 acc 84.766 (84.260)	lr 0.00965
Train [69][2520/3239]	Time 0.254 (0.625)	Data Time 0.002 (0.018)	Loss 2.4506 (2.4711)	Entropy 1.01791 (1.02137)	Top-1 acc 66.016 (64.618)	Top-5 acc 85.156 (84.264)	lr 0.00965
Train [69][2530/3239]	Time 0.229 (0.624)	Data Time 0.002 (0.018)	Loss 2.4941 (2.4711)	Entropy 1.01786 (1.02135)	Top-1 acc 62.891 (64.620)	Top-5 acc 85.547 (84.260)	lr 0.00965
Train [69][2540/3239]	Time 2.498 (0.623)	Data Time 0.002 (0.018)	Loss 2.5179 (2.4711)	Entropy 1.01786 (1.02134)	Top-1 acc 64.453 (64.619)	Top-5 acc 83.203 (84.261)	lr 0.00965
Train [69][2550/3239]	Time 0.248 (0.622)	Data Time 0.002 (0.018)	Loss 2.4245 (2.4711)	Entropy 1.01785 (1.02132)	Top-1 acc 65.234 (64.620)	Top-5 acc 85.547 (84.262)	lr 0.00965
Train [69][2560/3239]	Time 0.216 (0.621)	Data Time 0.001 (0.017)	Loss 2.4654 (2.4710)	Entropy 1.01781 (1.02131)	Top-1 acc 66.016 (64.622)	Top-5 acc 84.375 (84.265)	lr 0.00965
Train [69][2570/3239]	Time 0.230 (0.621)	Data Time 0.001 (0.017)	Loss 2.4377 (2.4711)	Entropy 1.01780 (1.02130)	Top-1 acc 67.188 (64.621)	Top-5 acc 85.547 (84.263)	lr 0.00965
Train [69][2580/3239]	Time 0.329 (0.620)	Data Time 0.002 (0.017)	Loss 2.4483 (2.4712)	Entropy 1.01776 (1.02128)	Top-1 acc 64.062 (64.616)	Top-5 acc 84.766 (84.261)	lr 0.00965
Train [69][2590/3239]	Time 0.234 (0.620)	Data Time 0.001 (0.017)	Loss 2.3831 (2.4715)	Entropy 1.01775 (1.02127)	Top-1 acc 70.312 (64.611)	Top-5 acc 84.766 (84.254)	lr 0.00965
Train [69][2600/3239]	Time 0.238 (0.619)	Data Time 0.002 (0.017)	Loss 2.2766 (2.4716)	Entropy 1.01775 (1.02126)	Top-1 acc 66.797 (64.611)	Top-5 acc 85.938 (84.254)	lr 0.00964
Train [69][2610/3239]	Time 0.226 (0.619)	Data Time 0.001 (0.017)	Loss 2.4829 (2.4715)	Entropy 1.01765 (1.02124)	Top-1 acc 62.109 (64.611)	Top-5 acc 83.203 (84.254)	lr 0.00964
Train [69][2620/3239]	Time 0.226 (0.618)	Data Time 0.001 (0.017)	Loss 2.4128 (2.4715)	Entropy 1.01761 (1.02123)	Top-1 acc 63.281 (64.611)	Top-5 acc 86.719 (84.255)	lr 0.00964
Train [69][2630/3239]	Time 0.226 (0.618)	Data Time 0.001 (0.017)	Loss 2.4307 (2.4717)	Entropy 1.01770 (1.02122)	Top-1 acc 64.453 (64.605)	Top-5 acc 84.375 (84.251)	lr 0.00964
Train [69][2640/3239]	Time 0.255 (0.617)	Data Time 0.001 (0.017)	Loss 2.5301 (2.4718)	Entropy 1.01770 (1.02120)	Top-1 acc 64.453 (64.604)	Top-5 acc 83.203 (84.251)	lr 0.00964
Train [69][2650/3239]	Time 0.231 (0.617)	Data Time 0.001 (0.017)	Loss 2.4550 (2.4717)	Entropy 1.01768 (1.02119)	Top-1 acc 64.453 (64.602)	Top-5 acc 82.031 (84.252)	lr 0.00964
Train [69][2660/3239]	Time 0.226 (0.616)	Data Time 0.001 (0.017)	Loss 2.4278 (2.4719)	Entropy 1.01761 (1.02118)	Top-1 acc 63.281 (64.594)	Top-5 acc 86.328 (84.249)	lr 0.00964
Train [69][2670/3239]	Time 0.329 (0.615)	Data Time 0.001 (0.017)	Loss 2.4120 (2.4719)	Entropy 1.01751 (1.02116)	Top-1 acc 67.188 (64.594)	Top-5 acc 84.766 (84.251)	lr 0.00964
Train [69][2680/3239]	Time 0.255 (0.615)	Data Time 0.001 (0.017)	Loss 2.4392 (2.4720)	Entropy 1.01746 (1.02115)	Top-1 acc 65.234 (64.594)	Top-5 acc 85.938 (84.249)	lr 0.00964
Train [69][2690/3239]	Time 0.270 (0.614)	Data Time 0.001 (0.017)	Loss 2.3643 (2.4719)	Entropy 1.01750 (1.02113)	Top-1 acc 65.234 (64.599)	Top-5 acc 86.328 (84.250)	lr 0.00964
Train [69][2700/3239]	Time 0.240 (0.614)	Data Time 0.001 (0.017)	Loss 2.7929 (2.4722)	Entropy 1.01745 (1.02112)	Top-1 acc 59.375 (64.589)	Top-5 acc 80.078 (84.247)	lr 0.00963
Train [69][2710/3239]	Time 0.212 (0.613)	Data Time 0.001 (0.017)	Loss 2.3415 (2.4723)	Entropy 1.01744 (1.02111)	Top-1 acc 67.969 (64.587)	Top-5 acc 87.109 (84.247)	lr 0.00963
Train [69][2720/3239]	Time 0.335 (0.613)	Data Time 0.001 (0.017)	Loss 2.5120 (2.4723)	Entropy 1.01739 (1.02109)	Top-1 acc 60.938 (64.585)	Top-5 acc 83.203 (84.248)	lr 0.00963
Train [69][2730/3239]	Time 0.257 (0.612)	Data Time 0.001 (0.016)	Loss 2.3817 (2.4725)	Entropy 1.01733 (1.02108)	Top-1 acc 64.844 (64.579)	Top-5 acc 86.719 (84.246)	lr 0.00963
Train [69][2740/3239]	Time 0.225 (0.612)	Data Time 0.001 (0.016)	Loss 2.4564 (2.4728)	Entropy 1.01730 (1.02107)	Top-1 acc 62.109 (64.568)	Top-5 acc 84.766 (84.239)	lr 0.00963
Train [69][2750/3239]	Time 0.216 (0.611)	Data Time 0.001 (0.016)	Loss 2.3714 (2.4728)	Entropy 1.01733 (1.02105)	Top-1 acc 66.797 (64.569)	Top-5 acc 87.500 (84.242)	lr 0.00963
Train [69][2760/3239]	Time 0.217 (0.611)	Data Time 0.001 (0.016)	Loss 2.4458 (2.4729)	Entropy 1.01727 (1.02104)	Top-1 acc 66.016 (64.566)	Top-5 acc 85.156 (84.241)	lr 0.00963
Train [69][2770/3239]	Time 0.256 (0.610)	Data Time 0.001 (0.016)	Loss 2.4099 (2.4730)	Entropy 1.01721 (1.02103)	Top-1 acc 67.188 (64.559)	Top-5 acc 86.328 (84.237)	lr 0.00963
Train [69][2780/3239]	Time 0.231 (0.610)	Data Time 0.001 (0.016)	Loss 2.6477 (2.4730)	Entropy 1.01721 (1.02101)	Top-1 acc 61.719 (64.559)	Top-5 acc 82.422 (84.240)	lr 0.00963
Train [69][2790/3239]	Time 0.254 (0.609)	Data Time 0.001 (0.016)	Loss 2.5276 (2.4731)	Entropy 1.01720 (1.02100)	Top-1 acc 60.547 (64.555)	Top-5 acc 81.250 (84.241)	lr 0.00963
Train [69][2800/3239]	Time 0.230 (0.609)	Data Time 0.001 (0.016)	Loss 2.6514 (2.4731)	Entropy 1.01721 (1.02099)	Top-1 acc 60.547 (64.554)	Top-5 acc 82.812 (84.241)	lr 0.00963
Train [69][2810/3239]	Time 0.362 (0.608)	Data Time 0.001 (0.016)	Loss 2.6122 (2.4732)	Entropy 1.01717 (1.02097)	Top-1 acc 60.156 (64.553)	Top-5 acc 80.078 (84.239)	lr 0.00962
Train [69][2820/3239]	Time 0.230 (0.608)	Data Time 0.001 (0.016)	Loss 2.2758 (2.4732)	Entropy 1.01716 (1.02096)	Top-1 acc 66.016 (64.553)	Top-5 acc 86.328 (84.234)	lr 0.00962
Train [69][2830/3239]	Time 0.251 (0.607)	Data Time 0.001 (0.016)	Loss 2.4677 (2.4734)	Entropy 1.01709 (1.02094)	Top-1 acc 63.672 (64.550)	Top-5 acc 85.547 (84.231)	lr 0.00962
Train [69][2840/3239]	Time 0.225 (0.607)	Data Time 0.001 (0.016)	Loss 2.3300 (2.4733)	Entropy 1.01711 (1.02093)	Top-1 acc 69.141 (64.550)	Top-5 acc 88.281 (84.233)	lr 0.00962
Train [69][2850/3239]	Time 0.211 (0.606)	Data Time 0.001 (0.016)	Loss 2.4799 (2.4732)	Entropy 1.01707 (1.02092)	Top-1 acc 65.234 (64.554)	Top-5 acc 85.547 (84.235)	lr 0.00962
Train [69][2860/3239]	Time 0.382 (0.623)	Data Time 0.004 (0.016)	Loss 2.3765 (2.4731)	Entropy 1.01702 (1.02090)	Top-1 acc 68.750 (64.558)	Top-5 acc 85.547 (84.237)	lr 0.00962
Train [69][2870/3239]	Time 0.220 (0.623)	Data Time 0.002 (0.016)	Loss 2.7298 (2.4733)	Entropy 1.01696 (1.02089)	Top-1 acc 60.547 (64.557)	Top-5 acc 78.906 (84.232)	lr 0.00962
Train [69][2880/3239]	Time 0.218 (0.623)	Data Time 0.002 (0.016)	Loss 2.3707 (2.4733)	Entropy 1.01689 (1.02088)	Top-1 acc 66.406 (64.556)	Top-5 acc 87.109 (84.231)	lr 0.00962
Train [69][2890/3239]	Time 0.222 (0.622)	Data Time 0.001 (0.016)	Loss 2.5888 (2.4733)	Entropy 1.01682 (1.02086)	Top-1 acc 60.156 (64.556)	Top-5 acc 82.422 (84.233)	lr 0.00962
Train [69][2900/3239]	Time 0.240 (0.622)	Data Time 0.001 (0.016)	Loss 2.5084 (2.4733)	Entropy 1.01684 (1.02085)	Top-1 acc 63.281 (64.555)	Top-5 acc 83.594 (84.231)	lr 0.00962
Train [69][2910/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.016)	Loss 2.5068 (2.4733)	Entropy 1.01686 (1.02084)	Top-1 acc 63.281 (64.559)	Top-5 acc 82.812 (84.231)	lr 0.00961
Train [69][2920/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.016)	Loss 2.5202 (2.4734)	Entropy 1.01683 (1.02082)	Top-1 acc 64.844 (64.558)	Top-5 acc 84.375 (84.230)	lr 0.00961
Train [69][2930/3239]	Time 0.292 (0.620)	Data Time 0.001 (0.015)	Loss 2.5963 (2.4736)	Entropy 1.01683 (1.02081)	Top-1 acc 62.891 (64.555)	Top-5 acc 83.984 (84.226)	lr 0.00961
Train [69][2940/3239]	Time 0.394 (0.620)	Data Time 0.001 (0.015)	Loss 2.5519 (2.4735)	Entropy 1.01681 (1.02079)	Top-1 acc 61.328 (64.556)	Top-5 acc 82.812 (84.227)	lr 0.00961
Train [69][2950/3239]	Time 0.249 (0.619)	Data Time 0.001 (0.015)	Loss 2.3618 (2.4736)	Entropy 1.01676 (1.02078)	Top-1 acc 67.969 (64.555)	Top-5 acc 88.281 (84.229)	lr 0.00961
Train [69][2960/3239]	Time 0.232 (0.619)	Data Time 0.001 (0.015)	Loss 2.3735 (2.4738)	Entropy 1.01664 (1.02077)	Top-1 acc 63.672 (64.548)	Top-5 acc 84.766 (84.222)	lr 0.00961
Train [69][2970/3239]	Time 0.230 (0.618)	Data Time 0.001 (0.015)	Loss 2.3718 (2.4737)	Entropy 1.01657 (1.02075)	Top-1 acc 64.453 (64.548)	Top-5 acc 85.938 (84.224)	lr 0.00961
Train [69][2980/3239]	Time 0.272 (0.618)	Data Time 0.001 (0.015)	Loss 2.4396 (2.4739)	Entropy 1.01656 (1.02074)	Top-1 acc 64.062 (64.545)	Top-5 acc 84.375 (84.221)	lr 0.00961
Train [69][2990/3239]	Time 0.253 (0.617)	Data Time 0.001 (0.015)	Loss 2.5744 (2.4739)	Entropy 1.01654 (1.02073)	Top-1 acc 61.328 (64.543)	Top-5 acc 82.031 (84.221)	lr 0.00961
Train [69][3000/3239]	Time 0.217 (0.617)	Data Time 0.001 (0.015)	Loss 2.3666 (2.4738)	Entropy 1.01647 (1.02071)	Top-1 acc 66.016 (64.545)	Top-5 acc 87.500 (84.221)	lr 0.00961
Train [69][3010/3239]	Time 0.262 (0.616)	Data Time 0.001 (0.015)	Loss 2.5047 (2.4738)	Entropy 1.01646 (1.02070)	Top-1 acc 62.500 (64.541)	Top-5 acc 81.641 (84.222)	lr 0.00960
Train [69][3020/3239]	Time 0.287 (0.616)	Data Time 0.001 (0.015)	Loss 2.6849 (2.4740)	Entropy 1.01644 (1.02068)	Top-1 acc 58.984 (64.536)	Top-5 acc 80.078 (84.218)	lr 0.00960
Train [69][3030/3239]	Time 0.237 (0.615)	Data Time 0.001 (0.015)	Loss 2.5404 (2.4741)	Entropy 1.01642 (1.02067)	Top-1 acc 63.281 (64.534)	Top-5 acc 82.031 (84.216)	lr 0.00960
Train [69][3040/3239]	Time 0.203 (0.615)	Data Time 0.001 (0.015)	Loss 2.4615 (2.4741)	Entropy 1.01631 (1.02066)	Top-1 acc 65.625 (64.536)	Top-5 acc 83.203 (84.216)	lr 0.00960
Train [69][3050/3239]	Time 0.255 (0.614)	Data Time 0.001 (0.015)	Loss 2.2803 (2.4740)	Entropy 1.01631 (1.02064)	Top-1 acc 69.141 (64.538)	Top-5 acc 87.109 (84.218)	lr 0.00960
Train [69][3060/3239]	Time 0.225 (0.614)	Data Time 0.001 (0.015)	Loss 2.4945 (2.4740)	Entropy 1.01629 (1.02063)	Top-1 acc 65.234 (64.537)	Top-5 acc 85.547 (84.220)	lr 0.00960
Train [69][3070/3239]	Time 0.361 (0.613)	Data Time 0.001 (0.015)	Loss 2.5722 (2.4740)	Entropy 1.01625 (1.02061)	Top-1 acc 63.281 (64.537)	Top-5 acc 85.156 (84.221)	lr 0.00960
Train [69][3080/3239]	Time 0.244 (0.613)	Data Time 0.001 (0.015)	Loss 2.4830 (2.4741)	Entropy 1.01564 (1.02060)	Top-1 acc 62.500 (64.536)	Top-5 acc 83.984 (84.221)	lr 0.00960
Train [69][3090/3239]	Time 0.251 (0.612)	Data Time 0.001 (0.015)	Loss 2.4398 (2.4742)	Entropy 1.01557 (1.02058)	Top-1 acc 64.844 (64.536)	Top-5 acc 85.156 (84.220)	lr 0.00960
Train [69][3100/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.015)	Loss 2.3592 (2.4743)	Entropy 1.01558 (1.02057)	Top-1 acc 66.797 (64.533)	Top-5 acc 87.500 (84.220)	lr 0.00960
Train [69][3110/3239]	Time 0.226 (0.611)	Data Time 0.001 (0.015)	Loss 2.5653 (2.4744)	Entropy 1.01546 (1.02055)	Top-1 acc 62.109 (64.532)	Top-5 acc 80.859 (84.217)	lr 0.00959
Train [69][3120/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.015)	Loss 2.3951 (2.4743)	Entropy 1.01555 (1.02053)	Top-1 acc 67.969 (64.534)	Top-5 acc 86.328 (84.219)	lr 0.00959
Train [69][3130/3239]	Time 0.229 (0.611)	Data Time 0.001 (0.015)	Loss 2.3554 (2.4742)	Entropy 1.01552 (1.02052)	Top-1 acc 64.453 (64.536)	Top-5 acc 87.500 (84.218)	lr 0.00959
Train [69][3140/3239]	Time 0.213 (0.610)	Data Time 0.001 (0.015)	Loss 2.5360 (2.4743)	Entropy 1.01547 (1.02050)	Top-1 acc 62.891 (64.533)	Top-5 acc 81.641 (84.216)	lr 0.00959
Train [69][3150/3239]	Time 0.233 (0.610)	Data Time 0.001 (0.014)	Loss 2.5021 (2.4744)	Entropy 1.01544 (1.02049)	Top-1 acc 64.844 (64.532)	Top-5 acc 83.594 (84.213)	lr 0.00959
Train [69][3160/3239]	Time 0.238 (0.609)	Data Time 0.001 (0.014)	Loss 2.4096 (2.4744)	Entropy 1.01541 (1.02047)	Top-1 acc 68.359 (64.535)	Top-5 acc 84.766 (84.211)	lr 0.00959
Train [69][3170/3239]	Time 0.251 (0.609)	Data Time 0.001 (0.014)	Loss 2.4872 (2.4745)	Entropy 1.01540 (1.02045)	Top-1 acc 65.234 (64.535)	Top-5 acc 86.328 (84.212)	lr 0.00959
Train [69][3180/3239]	Time 0.218 (0.608)	Data Time 0.000 (0.014)	Loss 2.5792 (2.4744)	Entropy 1.01537 (1.02044)	Top-1 acc 62.891 (64.536)	Top-5 acc 83.594 (84.212)	lr 0.00959
Train [69][3190/3239]	Time 0.285 (0.624)	Data Time 0.000 (0.014)	Loss 2.4810 (2.4745)	Entropy 1.01535 (1.02042)	Top-1 acc 61.719 (64.533)	Top-5 acc 84.766 (84.209)	lr 0.00959
Train [69][3200/3239]	Time 0.318 (0.624)	Data Time 0.000 (0.014)	Loss 2.4866 (2.4747)	Entropy 1.01532 (1.02041)	Top-1 acc 62.500 (64.531)	Top-5 acc 82.812 (84.205)	lr 0.00959
Train [69][3210/3239]	Time 0.230 (0.623)	Data Time 0.000 (0.014)	Loss 2.4345 (2.4745)	Entropy 1.01531 (1.02039)	Top-1 acc 66.797 (64.537)	Top-5 acc 84.766 (84.208)	lr 0.00958
Train [69][3220/3239]	Time 0.223 (0.623)	Data Time 0.000 (0.014)	Loss 2.4675 (2.4744)	Entropy 1.01531 (1.02038)	Top-1 acc 65.625 (64.541)	Top-5 acc 83.594 (84.209)	lr 0.00958
Train [69][3230/3239]	Time 0.249 (0.622)	Data Time 0.000 (0.014)	Loss 2.4784 (2.4744)	Entropy 1.01530 (1.02036)	Top-1 acc 64.453 (64.538)	Top-5 acc 84.766 (84.210)	lr 0.00958
Train [69][3239/3239]	Time 2.308 (0.622)	Data Time 0.000 (0.014)	Loss 3.5334 (2.4746)	Entropy 1.01530 (1.02035)	Top-1 acc 38.272 (64.530)	Top-5 acc 67.901 (84.209)	lr 0.00958
==========Valid [69/120]	loss 1.381	top-1 acc 68.599 (68.599)	top-5 acc 87.617	Train top-1 64.530	top-5 84.209	Entropy 1.01530	Latency-None: 0.000ms	Flops: 546.53M
Train [70][0/3239]	Time 42.383 (42.383)	Data Time 41.780 (41.780)	Loss 2.6459 (2.6459)	Entropy 1.01529 (1.01529)	Top-1 acc 60.938 (60.938)	Top-5 acc 80.469 (80.469)	lr 0.00958
Train [70][10/3239]	Time 2.668 (4.416)	Data Time 0.001 (3.812)	Loss 2.3572 (2.4359)	Entropy 1.01529 (1.01529)	Top-1 acc 66.016 (64.879)	Top-5 acc 86.719 (85.192)	lr 0.00958
Train [70][20/3239]	Time 0.240 (2.432)	Data Time 0.001 (1.997)	Loss 2.3704 (2.4337)	Entropy 1.01528 (1.01529)	Top-1 acc 67.188 (65.346)	Top-5 acc 86.719 (84.821)	lr 0.00958
Train [70][30/3239]	Time 0.229 (1.798)	Data Time 0.001 (1.353)	Loss 2.2681 (2.4496)	Entropy 1.01514 (1.01525)	Top-1 acc 70.703 (65.360)	Top-5 acc 87.500 (84.640)	lr 0.00958
Train [70][40/3239]	Time 0.228 (1.471)	Data Time 0.001 (1.024)	Loss 2.4098 (2.4402)	Entropy 1.01514 (1.01522)	Top-1 acc 68.359 (65.539)	Top-5 acc 85.156 (84.870)	lr 0.00958
Train [70][50/3239]	Time 0.283 (1.277)	Data Time 0.001 (0.823)	Loss 2.5111 (2.4427)	Entropy 1.01508 (1.01520)	Top-1 acc 66.797 (65.694)	Top-5 acc 82.812 (84.666)	lr 0.00958
Train [70][60/3239]	Time 0.224 (1.143)	Data Time 0.001 (0.689)	Loss 2.3338 (2.4435)	Entropy 1.01507 (1.01518)	Top-1 acc 71.484 (65.510)	Top-5 acc 86.719 (84.574)	lr 0.00958
Train [70][70/3239]	Time 0.248 (1.047)	Data Time 0.001 (0.592)	Loss 2.4775 (2.4434)	Entropy 1.01514 (1.01517)	Top-1 acc 65.234 (65.493)	Top-5 acc 83.984 (84.716)	lr 0.00958
Train [70][80/3239]	Time 0.245 (0.976)	Data Time 0.001 (0.519)	Loss 2.3949 (2.4444)	Entropy 1.01509 (1.01516)	Top-1 acc 66.797 (65.408)	Top-5 acc 85.938 (84.688)	lr 0.00957
Train [70][90/3239]	Time 0.237 (0.922)	Data Time 0.001 (0.462)	Loss 2.6359 (2.4500)	Entropy 1.01507 (1.01515)	Top-1 acc 58.203 (65.282)	Top-5 acc 83.203 (84.620)	lr 0.00957
Train [70][100/3239]	Time 0.218 (0.877)	Data Time 0.001 (0.416)	Loss 2.4551 (2.4477)	Entropy 1.01511 (1.01514)	Top-1 acc 60.547 (65.265)	Top-5 acc 83.594 (84.673)	lr 0.00957
Train [70][110/3239]	Time 0.238 (0.841)	Data Time 0.001 (0.379)	Loss 2.4758 (2.4469)	Entropy 1.01509 (1.01514)	Top-1 acc 66.406 (65.231)	Top-5 acc 82.812 (84.706)	lr 0.00957
Train [70][120/3239]	Time 2.610 (0.811)	Data Time 0.001 (0.348)	Loss 2.3273 (2.4436)	Entropy 1.01509 (1.01514)	Top-1 acc 66.797 (65.234)	Top-5 acc 86.328 (84.785)	lr 0.00957
Train [70][130/3239]	Time 0.239 (0.767)	Data Time 0.001 (0.321)	Loss 2.5130 (2.4417)	Entropy 1.01509 (1.01513)	Top-1 acc 65.234 (65.273)	Top-5 acc 83.594 (84.810)	lr 0.00957
Train [70][140/3239]	Time 0.242 (0.746)	Data Time 0.001 (0.299)	Loss 2.3860 (2.4409)	Entropy 1.01504 (1.01513)	Top-1 acc 67.578 (65.279)	Top-5 acc 85.938 (84.802)	lr 0.00957
Train [70][150/3239]	Time 0.248 (0.729)	Data Time 0.001 (0.279)	Loss 2.6316 (2.4413)	Entropy 1.01496 (1.01512)	Top-1 acc 60.547 (65.234)	Top-5 acc 81.250 (84.810)	lr 0.00957
Train [70][160/3239]	Time 0.245 (0.713)	Data Time 0.001 (0.262)	Loss 2.5550 (2.4411)	Entropy 1.01493 (1.01511)	Top-1 acc 65.234 (65.283)	Top-5 acc 83.203 (84.795)	lr 0.00957
Train [70][170/3239]	Time 0.241 (0.700)	Data Time 0.001 (0.247)	Loss 2.3879 (2.4419)	Entropy 1.01492 (1.01510)	Top-1 acc 65.625 (65.257)	Top-5 acc 85.547 (84.809)	lr 0.00957
Train [70][180/3239]	Time 0.229 (0.687)	Data Time 0.001 (0.233)	Loss 2.3804 (2.4385)	Entropy 1.01487 (1.01508)	Top-1 acc 65.625 (65.327)	Top-5 acc 85.547 (84.830)	lr 0.00956
Train [70][190/3239]	Time 0.313 (0.675)	Data Time 0.002 (0.221)	Loss 2.4324 (2.4394)	Entropy 1.01485 (1.01507)	Top-1 acc 67.188 (65.333)	Top-5 acc 85.938 (84.815)	lr 0.00956
Train [70][200/3239]	Time 0.217 (0.665)	Data Time 0.001 (0.210)	Loss 2.2093 (2.4369)	Entropy 1.01483 (1.01506)	Top-1 acc 70.703 (65.341)	Top-5 acc 89.844 (84.898)	lr 0.00956
Train [70][210/3239]	Time 0.231 (0.656)	Data Time 0.001 (0.200)	Loss 2.4387 (2.4357)	Entropy 1.01481 (1.01505)	Top-1 acc 62.500 (65.325)	Top-5 acc 86.328 (84.934)	lr 0.00956
Train [70][220/3239]	Time 0.259 (0.648)	Data Time 0.001 (0.191)	Loss 2.4977 (2.4355)	Entropy 1.01482 (1.01504)	Top-1 acc 62.500 (65.351)	Top-5 acc 83.984 (84.914)	lr 0.00956
Train [70][230/3239]	Time 2.630 (0.641)	Data Time 0.001 (0.183)	Loss 2.3497 (2.4367)	Entropy 1.01482 (1.01503)	Top-1 acc 66.797 (65.341)	Top-5 acc 86.328 (84.904)	lr 0.00956
Train [70][240/3239]	Time 0.246 (0.625)	Data Time 0.001 (0.175)	Loss 2.3907 (2.4360)	Entropy 1.01483 (1.01502)	Top-1 acc 67.188 (65.372)	Top-5 acc 85.547 (84.902)	lr 0.00956
Train [70][250/3239]	Time 0.240 (0.619)	Data Time 0.001 (0.168)	Loss 2.4633 (2.4384)	Entropy 1.01472 (1.01501)	Top-1 acc 66.016 (65.328)	Top-5 acc 83.203 (84.845)	lr 0.00956
Train [70][260/3239]	Time 0.242 (0.614)	Data Time 0.001 (0.162)	Loss 2.5275 (2.4407)	Entropy 1.01476 (1.01500)	Top-1 acc 61.719 (65.294)	Top-5 acc 82.031 (84.808)	lr 0.00956
Train [70][270/3239]	Time 0.237 (0.609)	Data Time 0.001 (0.156)	Loss 2.4510 (2.4410)	Entropy 1.01486 (1.01499)	Top-1 acc 64.844 (65.260)	Top-5 acc 85.938 (84.812)	lr 0.00956
Train [70][280/3239]	Time 0.232 (0.605)	Data Time 0.001 (0.151)	Loss 2.3673 (2.4398)	Entropy 1.01484 (1.01499)	Top-1 acc 68.750 (65.301)	Top-5 acc 85.547 (84.842)	lr 0.00955
Train [70][290/3239]	Time 0.222 (0.600)	Data Time 0.001 (0.146)	Loss 2.4230 (2.4406)	Entropy 1.01468 (1.01498)	Top-1 acc 62.891 (65.265)	Top-5 acc 87.891 (84.823)	lr 0.00955
Train [70][300/3239]	Time 0.222 (0.596)	Data Time 0.001 (0.141)	Loss 2.4822 (2.4422)	Entropy 1.01463 (1.01497)	Top-1 acc 62.109 (65.227)	Top-5 acc 82.031 (84.782)	lr 0.00955
Train [70][310/3239]	Time 0.252 (0.762)	Data Time 0.003 (0.136)	Loss 2.2935 (2.4426)	Entropy 1.01454 (1.01496)	Top-1 acc 65.625 (65.184)	Top-5 acc 88.281 (84.763)	lr 0.00955
Train [70][320/3239]	Time 0.372 (0.758)	Data Time 0.002 (0.132)	Loss 2.5589 (2.4419)	Entropy 1.01444 (1.01494)	Top-1 acc 63.672 (65.217)	Top-5 acc 78.906 (84.760)	lr 0.00955
Train [70][330/3239]	Time 0.238 (0.750)	Data Time 0.001 (0.128)	Loss 2.3453 (2.4397)	Entropy 1.01442 (1.01493)	Top-1 acc 67.188 (65.271)	Top-5 acc 88.672 (84.793)	lr 0.00955
Train [70][340/3239]	Time 2.619 (0.742)	Data Time 0.001 (0.125)	Loss 2.5552 (2.4406)	Entropy 1.01442 (1.01491)	Top-1 acc 64.062 (65.263)	Top-5 acc 83.203 (84.781)	lr 0.00955
Train [70][350/3239]	Time 0.232 (0.728)	Data Time 0.001 (0.121)	Loss 3.3164 (2.4433)	Entropy 1.01441 (1.01490)	Top-1 acc 48.438 (65.213)	Top-5 acc 75.391 (84.757)	lr 0.00955
Train [70][360/3239]	Time 0.253 (0.721)	Data Time 0.001 (0.118)	Loss 2.3447 (2.4429)	Entropy 1.01439 (1.01489)	Top-1 acc 67.188 (65.245)	Top-5 acc 86.719 (84.746)	lr 0.00955
Train [70][370/3239]	Time 0.335 (0.716)	Data Time 0.002 (0.115)	Loss 2.3723 (2.4426)	Entropy 1.01435 (1.01487)	Top-1 acc 68.750 (65.234)	Top-5 acc 86.719 (84.747)	lr 0.00955
Train [70][380/3239]	Time 0.176 (0.709)	Data Time 0.001 (0.112)	Loss 2.4958 (2.4436)	Entropy 1.01431 (1.01486)	Top-1 acc 65.625 (65.230)	Top-5 acc 83.984 (84.741)	lr 0.00954
Train [70][390/3239]	Time 0.234 (0.704)	Data Time 0.001 (0.109)	Loss 2.4102 (2.4442)	Entropy 1.01425 (1.01484)	Top-1 acc 62.109 (65.206)	Top-5 acc 87.500 (84.738)	lr 0.00954
Train [70][400/3239]	Time 0.247 (0.698)	Data Time 0.001 (0.106)	Loss 2.3678 (2.4429)	Entropy 1.01422 (1.01483)	Top-1 acc 66.016 (65.245)	Top-5 acc 87.109 (84.758)	lr 0.00954
Train [70][410/3239]	Time 0.289 (0.693)	Data Time 0.001 (0.104)	Loss 2.3668 (2.4434)	Entropy 1.01421 (1.01481)	Top-1 acc 63.672 (65.238)	Top-5 acc 87.891 (84.748)	lr 0.00954
Train [70][420/3239]	Time 0.212 (0.688)	Data Time 0.001 (0.101)	Loss 2.5119 (2.4426)	Entropy 1.01421 (1.01480)	Top-1 acc 64.453 (65.264)	Top-5 acc 83.203 (84.757)	lr 0.00954
Train [70][430/3239]	Time 0.236 (0.683)	Data Time 0.001 (0.099)	Loss 2.4245 (2.4429)	Entropy 1.01420 (1.01478)	Top-1 acc 63.281 (65.270)	Top-5 acc 88.281 (84.753)	lr 0.00954
Train [70][440/3239]	Time 0.226 (0.679)	Data Time 0.001 (0.097)	Loss 2.3882 (2.4441)	Entropy 1.01424 (1.01477)	Top-1 acc 66.406 (65.246)	Top-5 acc 88.281 (84.725)	lr 0.00954
Train [70][450/3239]	Time 2.519 (0.674)	Data Time 0.001 (0.095)	Loss 2.3971 (2.4433)	Entropy 1.01424 (1.01476)	Top-1 acc 67.188 (65.257)	Top-5 acc 87.109 (84.732)	lr 0.00954
Train [70][460/3239]	Time 0.344 (0.665)	Data Time 0.002 (0.093)	Loss 2.3590 (2.4431)	Entropy 1.01427 (1.01475)	Top-1 acc 67.969 (65.254)	Top-5 acc 86.719 (84.736)	lr 0.00954
Train [70][470/3239]	Time 0.230 (0.661)	Data Time 0.001 (0.091)	Loss 2.5503 (2.4433)	Entropy 1.01425 (1.01474)	Top-1 acc 65.234 (65.242)	Top-5 acc 82.812 (84.733)	lr 0.00954
Train [70][480/3239]	Time 0.212 (0.657)	Data Time 0.001 (0.089)	Loss 2.5884 (2.4433)	Entropy 1.01422 (1.01473)	Top-1 acc 62.891 (65.240)	Top-5 acc 82.031 (84.736)	lr 0.00953
Train [70][490/3239]	Time 0.263 (0.653)	Data Time 0.001 (0.087)	Loss 2.5975 (2.4439)	Entropy 1.01419 (1.01472)	Top-1 acc 62.500 (65.247)	Top-5 acc 81.250 (84.710)	lr 0.00953
Train [70][500/3239]	Time 0.236 (0.650)	Data Time 0.001 (0.085)	Loss 2.4798 (2.4448)	Entropy 1.01412 (1.01471)	Top-1 acc 65.625 (65.234)	Top-5 acc 83.594 (84.703)	lr 0.00953
Train [70][510/3239]	Time 0.235 (0.647)	Data Time 0.001 (0.084)	Loss 2.3862 (2.4441)	Entropy 1.01402 (1.01469)	Top-1 acc 65.625 (65.244)	Top-5 acc 86.719 (84.714)	lr 0.00953
Train [70][520/3239]	Time 0.262 (0.644)	Data Time 0.002 (0.082)	Loss 2.3879 (2.4444)	Entropy 1.01399 (1.01468)	Top-1 acc 67.188 (65.237)	Top-5 acc 85.156 (84.708)	lr 0.00953
Train [70][530/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.081)	Loss 2.4298 (2.4447)	Entropy 1.01403 (1.01467)	Top-1 acc 64.844 (65.247)	Top-5 acc 82.812 (84.691)	lr 0.00953
Train [70][540/3239]	Time 0.223 (0.638)	Data Time 0.001 (0.079)	Loss 2.6216 (2.4449)	Entropy 1.01407 (1.01466)	Top-1 acc 62.109 (65.249)	Top-5 acc 80.859 (84.689)	lr 0.00953
Train [70][550/3239]	Time 0.344 (0.635)	Data Time 0.001 (0.078)	Loss 2.4165 (2.4450)	Entropy 1.01405 (1.01465)	Top-1 acc 65.625 (65.235)	Top-5 acc 85.547 (84.702)	lr 0.00953
Train [70][560/3239]	Time 2.482 (0.632)	Data Time 0.002 (0.076)	Loss 2.4640 (2.4455)	Entropy 1.01405 (1.01463)	Top-1 acc 65.234 (65.233)	Top-5 acc 85.156 (84.698)	lr 0.00953
Train [70][570/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.075)	Loss 2.4689 (2.4460)	Entropy 1.01405 (1.01462)	Top-1 acc 64.453 (65.218)	Top-5 acc 82.812 (84.679)	lr 0.00953
Train [70][580/3239]	Time 0.220 (0.622)	Data Time 0.001 (0.074)	Loss 2.3931 (2.4444)	Entropy 1.01396 (1.01461)	Top-1 acc 64.062 (65.248)	Top-5 acc 84.766 (84.708)	lr 0.00953
Train [70][590/3239]	Time 0.210 (0.619)	Data Time 0.001 (0.073)	Loss 2.4028 (2.4446)	Entropy 1.01401 (1.01460)	Top-1 acc 63.281 (65.230)	Top-5 acc 86.719 (84.711)	lr 0.00952
Train [70][600/3239]	Time 0.231 (0.617)	Data Time 0.001 (0.071)	Loss 2.4348 (2.4443)	Entropy 1.01401 (1.01459)	Top-1 acc 64.453 (65.236)	Top-5 acc 83.594 (84.719)	lr 0.00952
Train [70][610/3239]	Time 0.223 (0.615)	Data Time 0.001 (0.070)	Loss 2.4084 (2.4445)	Entropy 1.01399 (1.01458)	Top-1 acc 64.453 (65.233)	Top-5 acc 87.891 (84.713)	lr 0.00952
Train [70][620/3239]	Time 0.207 (0.612)	Data Time 0.001 (0.069)	Loss 2.5768 (2.4443)	Entropy 1.01397 (1.01457)	Top-1 acc 62.109 (65.235)	Top-5 acc 81.641 (84.720)	lr 0.00952
Train [70][630/3239]	Time 0.219 (0.610)	Data Time 0.001 (0.068)	Loss 2.4718 (2.4445)	Entropy 1.01388 (1.01456)	Top-1 acc 66.406 (65.243)	Top-5 acc 84.766 (84.713)	lr 0.00952
Train [70][640/3239]	Time 0.336 (0.608)	Data Time 0.001 (0.067)	Loss 2.2943 (2.4445)	Entropy 1.01385 (1.01455)	Top-1 acc 69.922 (65.247)	Top-5 acc 86.719 (84.711)	lr 0.00952
Train [70][650/3239]	Time 0.238 (0.606)	Data Time 0.001 (0.066)	Loss 2.4230 (2.4445)	Entropy 1.01381 (1.01454)	Top-1 acc 64.453 (65.256)	Top-5 acc 85.547 (84.709)	lr 0.00952
Train [70][660/3239]	Time 0.284 (0.604)	Data Time 0.001 (0.065)	Loss 2.4660 (2.4445)	Entropy 1.01372 (1.01453)	Top-1 acc 65.625 (65.264)	Top-5 acc 83.984 (84.705)	lr 0.00952
Train [70][670/3239]	Time 52.591 (0.677)	Data Time 0.001 (0.064)	Loss 2.4612 (2.4446)	Entropy 1.01372 (1.01452)	Top-1 acc 63.281 (65.258)	Top-5 acc 84.766 (84.713)	lr 0.00952
Train [70][680/3239]	Time 0.355 (0.673)	Data Time 0.002 (0.063)	Loss 2.5256 (2.4454)	Entropy 1.01370 (1.01451)	Top-1 acc 63.672 (65.225)	Top-5 acc 81.641 (84.699)	lr 0.00952
Train [70][690/3239]	Time 0.229 (0.672)	Data Time 0.002 (0.062)	Loss 2.4726 (2.4452)	Entropy 1.01364 (1.01449)	Top-1 acc 66.016 (65.233)	Top-5 acc 86.328 (84.709)	lr 0.00951
Train [70][700/3239]	Time 0.254 (0.669)	Data Time 0.002 (0.061)	Loss 2.4344 (2.4453)	Entropy 1.01363 (1.01448)	Top-1 acc 66.016 (65.235)	Top-5 acc 84.375 (84.708)	lr 0.00951
Train [70][710/3239]	Time 0.249 (0.667)	Data Time 0.002 (0.061)	Loss 2.5936 (2.4454)	Entropy 1.01362 (1.01447)	Top-1 acc 60.938 (65.235)	Top-5 acc 82.422 (84.707)	lr 0.00951
Train [70][720/3239]	Time 0.244 (0.664)	Data Time 0.001 (0.060)	Loss 2.4579 (2.4450)	Entropy 1.01363 (1.01446)	Top-1 acc 68.359 (65.268)	Top-5 acc 83.594 (84.713)	lr 0.00951
Train [70][730/3239]	Time 0.242 (0.662)	Data Time 0.001 (0.059)	Loss 2.4487 (2.4446)	Entropy 1.01348 (1.01445)	Top-1 acc 67.188 (65.274)	Top-5 acc 84.375 (84.718)	lr 0.00951
Train [70][740/3239]	Time 0.221 (0.659)	Data Time 0.001 (0.058)	Loss 2.4488 (2.4441)	Entropy 1.01345 (1.01443)	Top-1 acc 65.234 (65.293)	Top-5 acc 82.812 (84.726)	lr 0.00951
Train [70][750/3239]	Time 0.234 (0.657)	Data Time 0.001 (0.058)	Loss 2.4400 (2.4439)	Entropy 1.01340 (1.01442)	Top-1 acc 67.578 (65.315)	Top-5 acc 84.375 (84.729)	lr 0.00951
Train [70][760/3239]	Time 0.224 (0.655)	Data Time 0.001 (0.057)	Loss 2.4551 (2.4440)	Entropy 1.01337 (1.01441)	Top-1 acc 67.969 (65.319)	Top-5 acc 80.859 (84.720)	lr 0.00951
Train [70][770/3239]	Time 0.251 (0.652)	Data Time 0.001 (0.056)	Loss 2.4441 (2.4445)	Entropy 1.01332 (1.01439)	Top-1 acc 66.016 (65.304)	Top-5 acc 82.812 (84.710)	lr 0.00951
Train [70][780/3239]	Time 2.531 (0.650)	Data Time 0.001 (0.055)	Loss 2.3779 (2.4447)	Entropy 1.01332 (1.01438)	Top-1 acc 65.625 (65.298)	Top-5 acc 85.547 (84.707)	lr 0.00951
Train [70][790/3239]	Time 0.267 (0.645)	Data Time 0.001 (0.055)	Loss 2.7054 (2.4451)	Entropy 1.01324 (1.01436)	Top-1 acc 60.156 (65.288)	Top-5 acc 81.250 (84.700)	lr 0.00950
Train [70][800/3239]	Time 0.237 (0.643)	Data Time 0.002 (0.054)	Loss 2.4735 (2.4459)	Entropy 1.01317 (1.01435)	Top-1 acc 63.281 (65.278)	Top-5 acc 83.984 (84.682)	lr 0.00950
Train [70][810/3239]	Time 0.242 (0.641)	Data Time 0.001 (0.053)	Loss 2.3844 (2.4460)	Entropy 1.01320 (1.01433)	Top-1 acc 65.234 (65.258)	Top-5 acc 86.719 (84.681)	lr 0.00950
Train [70][820/3239]	Time 0.271 (0.640)	Data Time 0.001 (0.053)	Loss 2.4306 (2.4464)	Entropy 1.01323 (1.01432)	Top-1 acc 65.234 (65.248)	Top-5 acc 85.547 (84.674)	lr 0.00950
Train [70][830/3239]	Time 0.257 (0.638)	Data Time 0.001 (0.052)	Loss 2.2458 (2.4462)	Entropy 1.01317 (1.01431)	Top-1 acc 71.484 (65.249)	Top-5 acc 88.672 (84.679)	lr 0.00950
Train [70][840/3239]	Time 0.236 (0.636)	Data Time 0.001 (0.052)	Loss 2.3042 (2.4467)	Entropy 1.01316 (1.01429)	Top-1 acc 67.188 (65.214)	Top-5 acc 86.328 (84.675)	lr 0.00950
Train [70][850/3239]	Time 0.218 (0.634)	Data Time 0.001 (0.051)	Loss 2.3951 (2.4466)	Entropy 1.01314 (1.01428)	Top-1 acc 67.969 (65.226)	Top-5 acc 84.766 (84.666)	lr 0.00950
Train [70][860/3239]	Time 0.344 (0.632)	Data Time 0.001 (0.050)	Loss 2.2817 (2.4466)	Entropy 1.01311 (1.01427)	Top-1 acc 67.578 (65.226)	Top-5 acc 90.234 (84.672)	lr 0.00950
Train [70][870/3239]	Time 0.243 (0.630)	Data Time 0.001 (0.050)	Loss 2.3262 (2.4468)	Entropy 1.01288 (1.01425)	Top-1 acc 67.969 (65.216)	Top-5 acc 84.766 (84.671)	lr 0.00950
Train [70][880/3239]	Time 0.261 (0.628)	Data Time 0.001 (0.049)	Loss 2.4803 (2.4469)	Entropy 1.01280 (1.01424)	Top-1 acc 63.281 (65.214)	Top-5 acc 86.328 (84.669)	lr 0.00950
Train [70][890/3239]	Time 2.460 (0.627)	Data Time 0.003 (0.049)	Loss 2.5743 (2.4476)	Entropy 1.01280 (1.01422)	Top-1 acc 61.328 (65.206)	Top-5 acc 82.031 (84.649)	lr 0.00949
Train [70][900/3239]	Time 0.266 (0.623)	Data Time 0.001 (0.048)	Loss 2.4753 (2.4474)	Entropy 1.01275 (1.01421)	Top-1 acc 67.578 (65.212)	Top-5 acc 83.594 (84.656)	lr 0.00949
Train [70][910/3239]	Time 0.375 (0.621)	Data Time 0.001 (0.048)	Loss 2.6471 (2.4474)	Entropy 1.01275 (1.01419)	Top-1 acc 61.328 (65.210)	Top-5 acc 78.906 (84.658)	lr 0.00949
Train [70][920/3239]	Time 0.234 (0.620)	Data Time 0.001 (0.047)	Loss 2.3593 (2.4471)	Entropy 1.01264 (1.01417)	Top-1 acc 67.969 (65.216)	Top-5 acc 85.938 (84.660)	lr 0.00949
Train [70][930/3239]	Time 0.229 (0.618)	Data Time 0.001 (0.047)	Loss 2.3958 (2.4470)	Entropy 1.01264 (1.01416)	Top-1 acc 65.625 (65.220)	Top-5 acc 83.203 (84.658)	lr 0.00949
Train [70][940/3239]	Time 0.218 (0.617)	Data Time 0.001 (0.046)	Loss 2.4460 (2.4466)	Entropy 1.01260 (1.01414)	Top-1 acc 67.578 (65.233)	Top-5 acc 83.203 (84.667)	lr 0.00949
Train [70][950/3239]	Time 0.357 (0.616)	Data Time 0.001 (0.046)	Loss 2.5207 (2.4465)	Entropy 1.01255 (1.01412)	Top-1 acc 63.672 (65.245)	Top-5 acc 81.250 (84.668)	lr 0.00949
Train [70][960/3239]	Time 0.230 (0.614)	Data Time 0.001 (0.045)	Loss 2.3807 (2.4471)	Entropy 1.01241 (1.01411)	Top-1 acc 66.406 (65.231)	Top-5 acc 87.500 (84.654)	lr 0.00949
Train [70][970/3239]	Time 0.202 (0.613)	Data Time 0.001 (0.045)	Loss 2.4942 (2.4474)	Entropy 1.01238 (1.01409)	Top-1 acc 62.109 (65.230)	Top-5 acc 84.375 (84.645)	lr 0.00949
Train [70][980/3239]	Time 0.239 (0.611)	Data Time 0.001 (0.044)	Loss 2.4274 (2.4475)	Entropy 1.01242 (1.01407)	Top-1 acc 66.406 (65.226)	Top-5 acc 85.547 (84.645)	lr 0.00949
Train [70][990/3239]	Time 0.229 (0.610)	Data Time 0.001 (0.044)	Loss 2.5362 (2.4472)	Entropy 1.01240 (1.01405)	Top-1 acc 66.797 (65.241)	Top-5 acc 80.859 (84.642)	lr 0.00948
Train [70][1000/3239]	Time 2.740 (0.609)	Data Time 0.001 (0.044)	Loss 2.3743 (2.4471)	Entropy 1.01240 (1.01404)	Top-1 acc 67.188 (65.241)	Top-5 acc 85.156 (84.635)	lr 0.00948
Train [70][1010/3239]	Time 0.238 (0.605)	Data Time 0.001 (0.043)	Loss 2.4491 (2.4471)	Entropy 1.01239 (1.01402)	Top-1 acc 65.625 (65.240)	Top-5 acc 83.594 (84.632)	lr 0.00948
Train [70][1020/3239]	Time 0.233 (0.604)	Data Time 0.001 (0.043)	Loss 2.3470 (2.4474)	Entropy 1.01249 (1.01401)	Top-1 acc 69.141 (65.240)	Top-5 acc 87.109 (84.626)	lr 0.00948
Train [70][1030/3239]	Time 0.228 (0.603)	Data Time 0.001 (0.042)	Loss 2.2298 (2.4473)	Entropy 1.01250 (1.01399)	Top-1 acc 68.750 (65.238)	Top-5 acc 88.281 (84.628)	lr 0.00948
Train [70][1040/3239]	Time 0.363 (0.655)	Data Time 0.002 (0.042)	Loss 2.4603 (2.4473)	Entropy 1.01245 (1.01398)	Top-1 acc 64.844 (65.232)	Top-5 acc 83.203 (84.623)	lr 0.00948
Train [70][1050/3239]	Time 0.245 (0.653)	Data Time 0.002 (0.042)	Loss 2.4501 (2.4475)	Entropy 1.01248 (1.01396)	Top-1 acc 64.453 (65.231)	Top-5 acc 82.031 (84.614)	lr 0.00948
Train [70][1060/3239]	Time 0.201 (0.651)	Data Time 0.002 (0.041)	Loss 2.5689 (2.4479)	Entropy 1.01244 (1.01395)	Top-1 acc 60.547 (65.210)	Top-5 acc 82.422 (84.607)	lr 0.00948
Train [70][1070/3239]	Time 0.229 (0.650)	Data Time 0.001 (0.041)	Loss 2.6318 (2.4481)	Entropy 1.01244 (1.01393)	Top-1 acc 60.547 (65.200)	Top-5 acc 81.250 (84.601)	lr 0.00948
Train [70][1080/3239]	Time 0.240 (0.648)	Data Time 0.001 (0.040)	Loss 2.4468 (2.4480)	Entropy 1.01241 (1.01392)	Top-1 acc 64.062 (65.208)	Top-5 acc 86.328 (84.603)	lr 0.00948
Train [70][1090/3239]	Time 0.261 (0.647)	Data Time 0.001 (0.040)	Loss 4.0177 (2.4497)	Entropy 1.01229 (1.01391)	Top-1 acc 37.500 (65.171)	Top-5 acc 60.938 (84.577)	lr 0.00947
Train [70][1100/3239]	Time 0.283 (0.645)	Data Time 0.001 (0.040)	Loss 2.4007 (2.4497)	Entropy 1.01230 (1.01389)	Top-1 acc 66.406 (65.156)	Top-5 acc 85.938 (84.576)	lr 0.00947
Train [70][1110/3239]	Time 2.431 (0.644)	Data Time 0.001 (0.039)	Loss 2.4947 (2.4496)	Entropy 1.01230 (1.01388)	Top-1 acc 66.016 (65.162)	Top-5 acc 83.203 (84.586)	lr 0.00947
Train [70][1120/3239]	Time 0.231 (0.640)	Data Time 0.001 (0.039)	Loss 2.6202 (2.4501)	Entropy 1.01226 (1.01386)	Top-1 acc 61.328 (65.154)	Top-5 acc 83.594 (84.577)	lr 0.00947
Train [70][1130/3239]	Time 0.343 (0.639)	Data Time 0.001 (0.039)	Loss 2.3618 (2.4500)	Entropy 1.01220 (1.01385)	Top-1 acc 63.281 (65.162)	Top-5 acc 87.500 (84.578)	lr 0.00947
Train [70][1140/3239]	Time 0.245 (0.637)	Data Time 0.001 (0.038)	Loss 2.3765 (2.4500)	Entropy 1.01221 (1.01383)	Top-1 acc 66.016 (65.149)	Top-5 acc 87.109 (84.578)	lr 0.00947
Train [70][1150/3239]	Time 0.241 (0.636)	Data Time 0.001 (0.038)	Loss 2.3916 (2.4503)	Entropy 1.01222 (1.01382)	Top-1 acc 66.406 (65.139)	Top-5 acc 86.719 (84.576)	lr 0.00947
Train [70][1160/3239]	Time 0.213 (0.635)	Data Time 0.001 (0.038)	Loss 2.4664 (2.4502)	Entropy 1.01219 (1.01381)	Top-1 acc 66.016 (65.138)	Top-5 acc 83.594 (84.582)	lr 0.00947
Train [70][1170/3239]	Time 0.221 (0.633)	Data Time 0.001 (0.037)	Loss 2.4098 (2.4505)	Entropy 1.01226 (1.01379)	Top-1 acc 64.844 (65.136)	Top-5 acc 85.156 (84.573)	lr 0.00947
Train [70][1180/3239]	Time 0.349 (0.632)	Data Time 0.001 (0.037)	Loss 2.4423 (2.4505)	Entropy 1.01225 (1.01378)	Top-1 acc 64.453 (65.133)	Top-5 acc 86.328 (84.574)	lr 0.00947
Train [70][1190/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.037)	Loss 2.5532 (2.4506)	Entropy 1.01221 (1.01377)	Top-1 acc 62.500 (65.131)	Top-5 acc 80.859 (84.570)	lr 0.00947
Train [70][1200/3239]	Time 0.293 (0.630)	Data Time 0.001 (0.037)	Loss 2.4201 (2.4508)	Entropy 1.01220 (1.01375)	Top-1 acc 64.844 (65.127)	Top-5 acc 86.328 (84.563)	lr 0.00946
Train [70][1210/3239]	Time 0.214 (0.628)	Data Time 0.001 (0.036)	Loss 2.5277 (2.4507)	Entropy 1.01219 (1.01374)	Top-1 acc 62.109 (65.128)	Top-5 acc 83.594 (84.571)	lr 0.00946
Train [70][1220/3239]	Time 2.697 (0.627)	Data Time 0.001 (0.036)	Loss 2.4914 (2.4507)	Entropy 1.01219 (1.01373)	Top-1 acc 62.500 (65.128)	Top-5 acc 83.203 (84.577)	lr 0.00946
Train [70][1230/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.036)	Loss 2.4177 (2.4506)	Entropy 1.01220 (1.01372)	Top-1 acc 65.234 (65.130)	Top-5 acc 87.109 (84.580)	lr 0.00946
Train [70][1240/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.035)	Loss 2.2360 (2.4502)	Entropy 1.01210 (1.01370)	Top-1 acc 72.656 (65.136)	Top-5 acc 90.625 (84.588)	lr 0.00946
Train [70][1250/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.035)	Loss 2.5878 (2.4502)	Entropy 1.01209 (1.01369)	Top-1 acc 62.109 (65.134)	Top-5 acc 83.203 (84.590)	lr 0.00946
Train [70][1260/3239]	Time 0.248 (0.620)	Data Time 0.001 (0.035)	Loss 2.4529 (2.4504)	Entropy 1.01201 (1.01368)	Top-1 acc 67.188 (65.131)	Top-5 acc 84.375 (84.593)	lr 0.00946
Train [70][1270/3239]	Time 0.236 (0.619)	Data Time 0.001 (0.035)	Loss 2.5499 (2.4503)	Entropy 1.01202 (1.01366)	Top-1 acc 60.156 (65.126)	Top-5 acc 82.031 (84.601)	lr 0.00946
Train [70][1280/3239]	Time 0.233 (0.618)	Data Time 0.001 (0.034)	Loss 2.4688 (2.4504)	Entropy 1.01195 (1.01365)	Top-1 acc 65.625 (65.126)	Top-5 acc 83.594 (84.600)	lr 0.00946
Train [70][1290/3239]	Time 0.219 (0.617)	Data Time 0.001 (0.034)	Loss 2.3497 (2.4501)	Entropy 1.01195 (1.01364)	Top-1 acc 67.969 (65.128)	Top-5 acc 84.766 (84.606)	lr 0.00946
Train [70][1300/3239]	Time 0.228 (0.616)	Data Time 0.001 (0.034)	Loss 2.4164 (2.4505)	Entropy 1.01184 (1.01362)	Top-1 acc 66.797 (65.116)	Top-5 acc 83.203 (84.595)	lr 0.00945
Train [70][1310/3239]	Time 0.339 (0.615)	Data Time 0.002 (0.034)	Loss 2.4247 (2.4507)	Entropy 1.01179 (1.01361)	Top-1 acc 66.797 (65.111)	Top-5 acc 86.328 (84.594)	lr 0.00945
Train [70][1320/3239]	Time 0.227 (0.614)	Data Time 0.001 (0.033)	Loss 2.5134 (2.4507)	Entropy 1.01171 (1.01360)	Top-1 acc 62.891 (65.104)	Top-5 acc 84.766 (84.589)	lr 0.00945
Train [70][1330/3239]	Time 2.657 (0.613)	Data Time 0.001 (0.033)	Loss 2.3991 (2.4509)	Entropy 1.01171 (1.01358)	Top-1 acc 67.578 (65.096)	Top-5 acc 85.156 (84.585)	lr 0.00945
Train [70][1340/3239]	Time 0.242 (0.610)	Data Time 0.001 (0.033)	Loss 2.4534 (2.4510)	Entropy 1.01170 (1.01357)	Top-1 acc 66.797 (65.087)	Top-5 acc 82.422 (84.585)	lr 0.00945
Train [70][1350/3239]	Time 0.219 (0.609)	Data Time 0.001 (0.033)	Loss 2.4987 (2.4506)	Entropy 1.01167 (1.01355)	Top-1 acc 62.500 (65.094)	Top-5 acc 83.594 (84.594)	lr 0.00945
Train [70][1360/3239]	Time 0.343 (0.608)	Data Time 0.001 (0.032)	Loss 2.2014 (2.4500)	Entropy 1.01169 (1.01354)	Top-1 acc 71.094 (65.105)	Top-5 acc 87.109 (84.607)	lr 0.00945
Train [70][1370/3239]	Time 0.242 (0.607)	Data Time 0.001 (0.032)	Loss 2.3827 (2.4499)	Entropy 1.01165 (1.01353)	Top-1 acc 67.969 (65.102)	Top-5 acc 86.328 (84.618)	lr 0.00945
Train [70][1380/3239]	Time 0.231 (0.606)	Data Time 0.001 (0.032)	Loss 2.2654 (2.4499)	Entropy 1.01165 (1.01351)	Top-1 acc 70.312 (65.109)	Top-5 acc 89.062 (84.617)	lr 0.00945
Train [70][1390/3239]	Time 0.269 (0.606)	Data Time 0.001 (0.032)	Loss 2.4729 (2.4498)	Entropy 1.01160 (1.01350)	Top-1 acc 65.234 (65.114)	Top-5 acc 84.375 (84.615)	lr 0.00945
Train [70][1400/3239]	Time 0.436 (0.641)	Data Time 0.004 (0.032)	Loss 2.5303 (2.4503)	Entropy 1.01143 (1.01349)	Top-1 acc 63.672 (65.101)	Top-5 acc 81.641 (84.607)	lr 0.00944
Train [70][1410/3239]	Time 0.239 (0.641)	Data Time 0.002 (0.031)	Loss 2.4996 (2.4500)	Entropy 1.01142 (1.01347)	Top-1 acc 62.109 (65.108)	Top-5 acc 82.812 (84.608)	lr 0.00944
Train [70][1420/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.031)	Loss 2.4663 (2.4503)	Entropy 1.01137 (1.01346)	Top-1 acc 64.453 (65.101)	Top-5 acc 86.328 (84.607)	lr 0.00944
Train [70][1430/3239]	Time 0.270 (0.638)	Data Time 0.001 (0.031)	Loss 2.2644 (2.4502)	Entropy 1.01136 (1.01344)	Top-1 acc 65.625 (65.099)	Top-5 acc 88.281 (84.613)	lr 0.00944
Train [70][1440/3239]	Time 2.456 (0.637)	Data Time 0.002 (0.031)	Loss 2.4690 (2.4502)	Entropy 1.01136 (1.01343)	Top-1 acc 63.672 (65.099)	Top-5 acc 82.422 (84.608)	lr 0.00944
Train [70][1450/3239]	Time 0.359 (0.635)	Data Time 0.001 (0.031)	Loss 2.8046 (2.4506)	Entropy 1.01136 (1.01341)	Top-1 acc 56.250 (65.090)	Top-5 acc 77.734 (84.602)	lr 0.00944
Train [70][1460/3239]	Time 0.231 (0.633)	Data Time 0.001 (0.030)	Loss 2.3052 (2.4506)	Entropy 1.01134 (1.01340)	Top-1 acc 65.625 (65.083)	Top-5 acc 87.500 (84.601)	lr 0.00944
Train [70][1470/3239]	Time 0.227 (0.632)	Data Time 0.001 (0.030)	Loss 2.7023 (2.4505)	Entropy 1.01131 (1.01338)	Top-1 acc 55.469 (65.088)	Top-5 acc 80.078 (84.600)	lr 0.00944
Train [70][1480/3239]	Time 0.241 (0.631)	Data Time 0.001 (0.030)	Loss 2.5186 (2.4507)	Entropy 1.01129 (1.01337)	Top-1 acc 64.453 (65.086)	Top-5 acc 83.203 (84.598)	lr 0.00944
Train [70][1490/3239]	Time 0.335 (0.630)	Data Time 0.001 (0.030)	Loss 2.5649 (2.4508)	Entropy 1.01127 (1.01336)	Top-1 acc 63.672 (65.083)	Top-5 acc 82.812 (84.597)	lr 0.00944
Train [70][1500/3239]	Time 0.245 (0.629)	Data Time 0.001 (0.030)	Loss 2.3125 (2.4506)	Entropy 1.01121 (1.01334)	Top-1 acc 67.969 (65.085)	Top-5 acc 85.547 (84.602)	lr 0.00943
Train [70][1510/3239]	Time 0.258 (0.628)	Data Time 0.001 (0.029)	Loss 2.5072 (2.4510)	Entropy 1.01127 (1.01333)	Top-1 acc 62.891 (65.074)	Top-5 acc 84.766 (84.595)	lr 0.00943
Train [70][1520/3239]	Time 0.243 (0.628)	Data Time 0.001 (0.029)	Loss 2.5928 (2.4508)	Entropy 1.01125 (1.01332)	Top-1 acc 63.672 (65.078)	Top-5 acc 82.422 (84.602)	lr 0.00943
Train [70][1530/3239]	Time 0.275 (0.627)	Data Time 0.002 (0.029)	Loss 2.6097 (2.4506)	Entropy 1.01111 (1.01330)	Top-1 acc 62.500 (65.087)	Top-5 acc 82.031 (84.609)	lr 0.00943
Train [70][1540/3239]	Time 0.302 (0.626)	Data Time 0.001 (0.029)	Loss 2.4428 (2.4505)	Entropy 1.01111 (1.01329)	Top-1 acc 66.406 (65.092)	Top-5 acc 84.375 (84.610)	lr 0.00943
Train [70][1550/3239]	Time 2.530 (0.625)	Data Time 0.001 (0.029)	Loss 2.3851 (2.4506)	Entropy 1.01111 (1.01327)	Top-1 acc 67.578 (65.091)	Top-5 acc 85.156 (84.612)	lr 0.00943
Train [70][1560/3239]	Time 0.215 (0.623)	Data Time 0.001 (0.029)	Loss 2.5418 (2.4505)	Entropy 1.01109 (1.01326)	Top-1 acc 66.797 (65.098)	Top-5 acc 83.203 (84.614)	lr 0.00943
Train [70][1570/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.028)	Loss 2.4548 (2.4505)	Entropy 1.01106 (1.01325)	Top-1 acc 65.234 (65.100)	Top-5 acc 83.203 (84.612)	lr 0.00943
Train [70][1580/3239]	Time 0.385 (0.621)	Data Time 0.001 (0.028)	Loss 2.5172 (2.4506)	Entropy 1.01098 (1.01323)	Top-1 acc 63.281 (65.097)	Top-5 acc 82.031 (84.611)	lr 0.00943
Train [70][1590/3239]	Time 0.242 (0.620)	Data Time 0.001 (0.028)	Loss 2.5152 (2.4509)	Entropy 1.01091 (1.01322)	Top-1 acc 63.281 (65.086)	Top-5 acc 83.984 (84.606)	lr 0.00943
Train [70][1600/3239]	Time 0.250 (0.619)	Data Time 0.001 (0.028)	Loss 2.4925 (2.4522)	Entropy 1.01089 (1.01320)	Top-1 acc 65.625 (65.055)	Top-5 acc 84.766 (84.585)	lr 0.00943
Train [70][1610/3239]	Time 0.213 (0.618)	Data Time 0.001 (0.028)	Loss 2.4776 (2.4521)	Entropy 1.01096 (1.01319)	Top-1 acc 62.109 (65.058)	Top-5 acc 85.156 (84.584)	lr 0.00942
Train [70][1620/3239]	Time 0.238 (0.617)	Data Time 0.001 (0.028)	Loss 2.4475 (2.4520)	Entropy 1.01092 (1.01317)	Top-1 acc 66.016 (65.060)	Top-5 acc 83.203 (84.587)	lr 0.00942
Train [70][1630/3239]	Time 0.242 (0.616)	Data Time 0.002 (0.027)	Loss 2.6480 (2.4521)	Entropy 1.01088 (1.01316)	Top-1 acc 60.938 (65.054)	Top-5 acc 79.297 (84.582)	lr 0.00942
Train [70][1640/3239]	Time 0.222 (0.616)	Data Time 0.001 (0.027)	Loss 2.5189 (2.4523)	Entropy 1.01089 (1.01315)	Top-1 acc 61.328 (65.048)	Top-5 acc 83.203 (84.579)	lr 0.00942
Train [70][1650/3239]	Time 0.264 (0.615)	Data Time 0.001 (0.027)	Loss 2.3564 (2.4523)	Entropy 1.01083 (1.01313)	Top-1 acc 66.406 (65.046)	Top-5 acc 85.547 (84.578)	lr 0.00942
Train [70][1660/3239]	Time 2.542 (0.614)	Data Time 0.001 (0.027)	Loss 2.2767 (2.4523)	Entropy 1.01083 (1.01312)	Top-1 acc 70.703 (65.042)	Top-5 acc 87.500 (84.579)	lr 0.00942
Train [70][1670/3239]	Time 0.335 (0.612)	Data Time 0.001 (0.027)	Loss 2.4314 (2.4524)	Entropy 1.01079 (1.01310)	Top-1 acc 69.141 (65.042)	Top-5 acc 82.031 (84.575)	lr 0.00942
Train [70][1680/3239]	Time 0.233 (0.611)	Data Time 0.001 (0.027)	Loss 2.4315 (2.4527)	Entropy 1.01078 (1.01309)	Top-1 acc 62.500 (65.033)	Top-5 acc 83.594 (84.570)	lr 0.00942
Train [70][1690/3239]	Time 0.242 (0.610)	Data Time 0.002 (0.026)	Loss 2.5528 (2.4530)	Entropy 1.01074 (1.01308)	Top-1 acc 64.453 (65.024)	Top-5 acc 83.203 (84.564)	lr 0.00942
Train [70][1700/3239]	Time 0.225 (0.609)	Data Time 0.001 (0.026)	Loss 2.5942 (2.4528)	Entropy 1.01075 (1.01306)	Top-1 acc 63.281 (65.033)	Top-5 acc 80.078 (84.564)	lr 0.00942
Train [70][1710/3239]	Time 0.228 (0.609)	Data Time 0.001 (0.026)	Loss 2.6264 (2.4532)	Entropy 1.01079 (1.01305)	Top-1 acc 58.984 (65.017)	Top-5 acc 84.375 (84.562)	lr 0.00941
Train [70][1720/3239]	Time 0.238 (0.608)	Data Time 0.001 (0.026)	Loss 2.6487 (2.4532)	Entropy 1.01076 (1.01304)	Top-1 acc 60.156 (65.021)	Top-5 acc 83.594 (84.563)	lr 0.00941
Train [70][1730/3239]	Time 0.226 (0.607)	Data Time 0.001 (0.026)	Loss 2.4880 (2.4530)	Entropy 1.01051 (1.01302)	Top-1 acc 64.453 (65.031)	Top-5 acc 83.984 (84.567)	lr 0.00941
Train [70][1740/3239]	Time 0.218 (0.606)	Data Time 0.001 (0.026)	Loss 2.3729 (2.4528)	Entropy 1.01047 (1.01301)	Top-1 acc 69.922 (65.036)	Top-5 acc 87.109 (84.573)	lr 0.00941
Train [70][1750/3239]	Time 0.240 (0.606)	Data Time 0.001 (0.026)	Loss 2.5209 (2.4530)	Entropy 1.01048 (1.01299)	Top-1 acc 61.328 (65.030)	Top-5 acc 85.156 (84.570)	lr 0.00941
Train [70][1760/3239]	Time 0.452 (0.634)	Data Time 0.004 (0.025)	Loss 2.4561 (2.4530)	Entropy 1.01028 (1.01298)	Top-1 acc 63.672 (65.031)	Top-5 acc 84.375 (84.568)	lr 0.00941
Train [70][1770/3239]	Time 2.826 (0.634)	Data Time 0.003 (0.025)	Loss 2.5398 (2.4529)	Entropy 1.01028 (1.01296)	Top-1 acc 65.625 (65.035)	Top-5 acc 81.641 (84.568)	lr 0.00941
Train [70][1780/3239]	Time 0.239 (0.631)	Data Time 0.002 (0.025)	Loss 2.3305 (2.4530)	Entropy 1.01039 (1.01295)	Top-1 acc 69.141 (65.032)	Top-5 acc 85.938 (84.565)	lr 0.00941
Train [70][1790/3239]	Time 0.228 (0.631)	Data Time 0.002 (0.025)	Loss 2.4902 (2.4528)	Entropy 1.01042 (1.01294)	Top-1 acc 62.891 (65.034)	Top-5 acc 83.203 (84.566)	lr 0.00941
Train [70][1800/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.025)	Loss 2.6703 (2.4530)	Entropy 1.01043 (1.01292)	Top-1 acc 58.203 (65.024)	Top-5 acc 81.250 (84.565)	lr 0.00941
Train [70][1810/3239]	Time 0.239 (0.629)	Data Time 0.001 (0.025)	Loss 2.3955 (2.4533)	Entropy 1.01032 (1.01291)	Top-1 acc 69.141 (65.015)	Top-5 acc 85.938 (84.564)	lr 0.00940
Train [70][1820/3239]	Time 0.245 (0.628)	Data Time 0.001 (0.025)	Loss 2.5931 (2.4533)	Entropy 1.01029 (1.01289)	Top-1 acc 61.719 (65.015)	Top-5 acc 83.203 (84.564)	lr 0.00940
Train [70][1830/3239]	Time 0.220 (0.627)	Data Time 0.001 (0.025)	Loss 2.3777 (2.4531)	Entropy 1.01025 (1.01288)	Top-1 acc 66.406 (65.026)	Top-5 acc 87.109 (84.568)	lr 0.00940
Train [70][1840/3239]	Time 0.275 (0.626)	Data Time 0.001 (0.025)	Loss 2.3378 (2.4529)	Entropy 1.01016 (1.01286)	Top-1 acc 68.359 (65.029)	Top-5 acc 86.328 (84.572)	lr 0.00940
Train [70][1850/3239]	Time 0.357 (0.625)	Data Time 0.002 (0.024)	Loss 2.1974 (2.4528)	Entropy 1.01015 (1.01285)	Top-1 acc 73.438 (65.034)	Top-5 acc 88.281 (84.577)	lr 0.00940
Train [70][1860/3239]	Time 0.221 (0.625)	Data Time 0.001 (0.024)	Loss 2.6056 (2.4532)	Entropy 1.01009 (1.01284)	Top-1 acc 64.062 (65.024)	Top-5 acc 80.469 (84.569)	lr 0.00940
Train [70][1870/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.024)	Loss 2.4361 (2.4531)	Entropy 1.00958 (1.01282)	Top-1 acc 67.578 (65.029)	Top-5 acc 86.719 (84.577)	lr 0.00940
Train [70][1880/3239]	Time 2.495 (0.623)	Data Time 0.001 (0.024)	Loss 2.4200 (2.4532)	Entropy 1.00958 (1.01280)	Top-1 acc 66.016 (65.033)	Top-5 acc 87.109 (84.575)	lr 0.00940
Train [70][1890/3239]	Time 0.221 (0.621)	Data Time 0.001 (0.024)	Loss 2.3428 (2.4535)	Entropy 1.00951 (1.01279)	Top-1 acc 64.062 (65.025)	Top-5 acc 87.891 (84.568)	lr 0.00940
Train [70][1900/3239]	Time 0.217 (0.620)	Data Time 0.001 (0.024)	Loss 2.5655 (2.4535)	Entropy 1.00949 (1.01277)	Top-1 acc 63.281 (65.021)	Top-5 acc 80.469 (84.565)	lr 0.00940
Train [70][1910/3239]	Time 0.252 (0.620)	Data Time 0.001 (0.024)	Loss 2.3681 (2.4533)	Entropy 1.00947 (1.01275)	Top-1 acc 66.406 (65.020)	Top-5 acc 87.500 (84.569)	lr 0.00939
Train [70][1920/3239]	Time 0.247 (0.619)	Data Time 0.001 (0.024)	Loss 2.3331 (2.4534)	Entropy 1.00946 (1.01273)	Top-1 acc 70.703 (65.021)	Top-5 acc 87.891 (84.568)	lr 0.00939
Train [70][1930/3239]	Time 0.248 (0.618)	Data Time 0.001 (0.023)	Loss 2.5085 (2.4532)	Entropy 1.00945 (1.01272)	Top-1 acc 62.891 (65.026)	Top-5 acc 81.641 (84.570)	lr 0.00939
Train [70][1940/3239]	Time 0.329 (0.617)	Data Time 0.001 (0.023)	Loss 2.4307 (2.4533)	Entropy 1.00940 (1.01270)	Top-1 acc 65.625 (65.023)	Top-5 acc 85.938 (84.571)	lr 0.00939
Train [70][1950/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.023)	Loss 2.3479 (2.4532)	Entropy 1.00941 (1.01268)	Top-1 acc 67.188 (65.025)	Top-5 acc 86.328 (84.576)	lr 0.00939
Train [70][1960/3239]	Time 0.236 (0.616)	Data Time 0.001 (0.023)	Loss 2.4197 (2.4533)	Entropy 1.00939 (1.01267)	Top-1 acc 64.453 (65.022)	Top-5 acc 86.328 (84.573)	lr 0.00939
Train [70][1970/3239]	Time 0.247 (0.615)	Data Time 0.002 (0.023)	Loss 2.3687 (2.4534)	Entropy 1.00934 (1.01265)	Top-1 acc 65.234 (65.020)	Top-5 acc 87.500 (84.574)	lr 0.00939
Train [70][1980/3239]	Time 0.262 (0.614)	Data Time 0.001 (0.023)	Loss 2.5078 (2.4533)	Entropy 1.00938 (1.01263)	Top-1 acc 63.672 (65.023)	Top-5 acc 83.984 (84.571)	lr 0.00939
Train [70][1990/3239]	Time 2.441 (0.614)	Data Time 0.001 (0.023)	Loss 2.3196 (2.4533)	Entropy 1.00938 (1.01262)	Top-1 acc 66.797 (65.024)	Top-5 acc 86.719 (84.569)	lr 0.00939
Train [70][2000/3239]	Time 0.227 (0.612)	Data Time 0.001 (0.023)	Loss 2.4583 (2.4534)	Entropy 1.00931 (1.01260)	Top-1 acc 60.938 (65.023)	Top-5 acc 83.203 (84.567)	lr 0.00939
Train [70][2010/3239]	Time 0.231 (0.611)	Data Time 0.001 (0.023)	Loss 2.6093 (2.4534)	Entropy 1.00929 (1.01258)	Top-1 acc 61.719 (65.026)	Top-5 acc 81.250 (84.568)	lr 0.00938
Train [70][2020/3239]	Time 0.223 (0.610)	Data Time 0.001 (0.022)	Loss 2.5239 (2.4535)	Entropy 1.00931 (1.01257)	Top-1 acc 64.453 (65.028)	Top-5 acc 80.469 (84.565)	lr 0.00938
Train [70][2030/3239]	Time 0.302 (0.610)	Data Time 0.001 (0.022)	Loss 2.4938 (2.4537)	Entropy 1.00930 (1.01255)	Top-1 acc 64.844 (65.025)	Top-5 acc 81.250 (84.562)	lr 0.00938
Train [70][2040/3239]	Time 0.230 (0.609)	Data Time 0.001 (0.022)	Loss 2.2939 (2.4539)	Entropy 1.00922 (1.01254)	Top-1 acc 69.141 (65.025)	Top-5 acc 88.281 (84.560)	lr 0.00938
Train [70][2050/3239]	Time 0.243 (0.608)	Data Time 0.001 (0.022)	Loss 2.4008 (2.4539)	Entropy 1.00923 (1.01252)	Top-1 acc 65.234 (65.026)	Top-5 acc 85.156 (84.561)	lr 0.00938
Train [70][2060/3239]	Time 0.217 (0.607)	Data Time 0.001 (0.022)	Loss 2.5587 (2.4541)	Entropy 1.00920 (1.01250)	Top-1 acc 64.453 (65.023)	Top-5 acc 83.203 (84.558)	lr 0.00938
Train [70][2070/3239]	Time 0.246 (0.607)	Data Time 0.002 (0.022)	Loss 2.5465 (2.4541)	Entropy 1.00919 (1.01249)	Top-1 acc 61.719 (65.020)	Top-5 acc 83.203 (84.560)	lr 0.00938
Train [70][2080/3239]	Time 0.280 (0.606)	Data Time 0.001 (0.022)	Loss 2.4305 (2.4541)	Entropy 1.00907 (1.01247)	Top-1 acc 66.016 (65.022)	Top-5 acc 85.547 (84.556)	lr 0.00938
Train [70][2090/3239]	Time 0.258 (0.606)	Data Time 0.001 (0.022)	Loss 2.5869 (2.4542)	Entropy 1.00902 (1.01246)	Top-1 acc 60.547 (65.018)	Top-5 acc 82.031 (84.557)	lr 0.00938
Train [70][2100/3239]	Time 2.405 (0.605)	Data Time 0.001 (0.022)	Loss 2.3582 (2.4542)	Entropy 1.00902 (1.01244)	Top-1 acc 69.141 (65.024)	Top-5 acc 86.719 (84.556)	lr 0.00938
Train [70][2110/3239]	Time 0.221 (0.603)	Data Time 0.001 (0.022)	Loss 2.3259 (2.4543)	Entropy 1.00907 (1.01242)	Top-1 acc 65.234 (65.020)	Top-5 acc 87.891 (84.554)	lr 0.00938
Train [70][2120/3239]	Time 0.327 (0.603)	Data Time 0.001 (0.021)	Loss 2.4679 (2.4543)	Entropy 1.00900 (1.01241)	Top-1 acc 64.453 (65.021)	Top-5 acc 84.375 (84.554)	lr 0.00937
Train [70][2130/3239]	Time 0.231 (0.627)	Data Time 0.002 (0.021)	Loss 2.5823 (2.4545)	Entropy 1.00896 (1.01239)	Top-1 acc 64.844 (65.015)	Top-5 acc 81.250 (84.550)	lr 0.00937
Train [70][2140/3239]	Time 0.230 (0.626)	Data Time 0.002 (0.021)	Loss 2.4686 (2.4543)	Entropy 1.00909 (1.01237)	Top-1 acc 65.625 (65.019)	Top-5 acc 83.984 (84.549)	lr 0.00937
Train [70][2150/3239]	Time 0.236 (0.626)	Data Time 0.002 (0.021)	Loss 2.5515 (2.4545)	Entropy 1.00905 (1.01236)	Top-1 acc 63.281 (65.014)	Top-5 acc 82.422 (84.547)	lr 0.00937
Train [70][2160/3239]	Time 0.249 (0.625)	Data Time 0.001 (0.021)	Loss 2.5920 (2.4547)	Entropy 1.00903 (1.01234)	Top-1 acc 60.156 (65.008)	Top-5 acc 82.422 (84.544)	lr 0.00937
Train [70][2170/3239]	Time 0.244 (0.624)	Data Time 0.002 (0.021)	Loss 2.4032 (2.4545)	Entropy 1.00901 (1.01233)	Top-1 acc 67.969 (65.012)	Top-5 acc 85.547 (84.551)	lr 0.00937
Train [70][2180/3239]	Time 0.268 (0.623)	Data Time 0.002 (0.021)	Loss 2.6358 (2.4545)	Entropy 1.00893 (1.01231)	Top-1 acc 63.672 (65.017)	Top-5 acc 81.641 (84.550)	lr 0.00937
Train [70][2190/3239]	Time 0.274 (0.623)	Data Time 0.001 (0.021)	Loss 2.3778 (2.4545)	Entropy 1.00889 (1.01230)	Top-1 acc 65.625 (65.017)	Top-5 acc 85.156 (84.547)	lr 0.00937
Train [70][2200/3239]	Time 0.241 (0.622)	Data Time 0.001 (0.021)	Loss 2.3414 (2.4544)	Entropy 1.00888 (1.01228)	Top-1 acc 68.750 (65.019)	Top-5 acc 88.281 (84.552)	lr 0.00937
Train [70][2210/3239]	Time 2.608 (0.622)	Data Time 0.001 (0.021)	Loss 2.4575 (2.4544)	Entropy 1.00888 (1.01227)	Top-1 acc 64.062 (65.017)	Top-5 acc 86.328 (84.550)	lr 0.00937
Train [70][2220/3239]	Time 0.223 (0.620)	Data Time 0.001 (0.021)	Loss 2.3753 (2.4544)	Entropy 1.00893 (1.01225)	Top-1 acc 64.453 (65.015)	Top-5 acc 85.547 (84.551)	lr 0.00936
Train [70][2230/3239]	Time 0.238 (0.619)	Data Time 0.002 (0.021)	Loss 2.6997 (2.4548)	Entropy 1.00878 (1.01224)	Top-1 acc 56.641 (65.000)	Top-5 acc 82.031 (84.547)	lr 0.00936
Train [70][2240/3239]	Time 0.239 (0.618)	Data Time 0.001 (0.020)	Loss 2.5187 (2.4548)	Entropy 1.00879 (1.01222)	Top-1 acc 64.062 (64.998)	Top-5 acc 83.203 (84.545)	lr 0.00936
Train [70][2250/3239]	Time 0.212 (0.618)	Data Time 0.001 (0.020)	Loss 2.4185 (2.4548)	Entropy 1.00880 (1.01221)	Top-1 acc 68.359 (64.998)	Top-5 acc 82.031 (84.548)	lr 0.00936
Train [70][2260/3239]	Time 0.239 (0.617)	Data Time 0.002 (0.020)	Loss 2.4913 (2.4547)	Entropy 1.00879 (1.01219)	Top-1 acc 63.672 (65.001)	Top-5 acc 83.203 (84.551)	lr 0.00936
Train [70][2270/3239]	Time 0.231 (0.616)	Data Time 0.001 (0.020)	Loss 2.3983 (2.4547)	Entropy 1.00881 (1.01218)	Top-1 acc 63.672 (65.000)	Top-5 acc 83.594 (84.549)	lr 0.00936
Train [70][2280/3239]	Time 0.279 (0.616)	Data Time 0.001 (0.020)	Loss 2.3996 (2.4548)	Entropy 1.00881 (1.01216)	Top-1 acc 67.188 (65.002)	Top-5 acc 83.203 (84.544)	lr 0.00936
Train [70][2290/3239]	Time 0.240 (0.615)	Data Time 0.001 (0.020)	Loss 2.3086 (2.4549)	Entropy 1.00879 (1.01215)	Top-1 acc 69.922 (65.002)	Top-5 acc 85.938 (84.540)	lr 0.00936
Train [70][2300/3239]	Time 0.422 (0.615)	Data Time 0.001 (0.020)	Loss 2.4633 (2.4549)	Entropy 1.00881 (1.01213)	Top-1 acc 62.891 (65.002)	Top-5 acc 82.812 (84.540)	lr 0.00936
Train [70][2310/3239]	Time 0.278 (0.614)	Data Time 0.001 (0.020)	Loss 2.3771 (2.4549)	Entropy 1.00879 (1.01212)	Top-1 acc 67.969 (65.005)	Top-5 acc 84.375 (84.541)	lr 0.00936
Train [70][2320/3239]	Time 2.526 (0.614)	Data Time 0.001 (0.020)	Loss 2.4426 (2.4549)	Entropy 1.00879 (1.01210)	Top-1 acc 66.406 (65.004)	Top-5 acc 83.203 (84.538)	lr 0.00935
Train [70][2330/3239]	Time 0.223 (0.612)	Data Time 0.001 (0.020)	Loss 2.4940 (2.4549)	Entropy 1.00875 (1.01209)	Top-1 acc 66.797 (65.007)	Top-5 acc 82.422 (84.538)	lr 0.00935
Train [70][2340/3239]	Time 0.231 (0.611)	Data Time 0.001 (0.020)	Loss 2.4748 (2.4549)	Entropy 1.00867 (1.01207)	Top-1 acc 63.672 (65.004)	Top-5 acc 84.766 (84.535)	lr 0.00935
Train [70][2350/3239]	Time 0.217 (0.611)	Data Time 0.001 (0.020)	Loss 2.4238 (2.4549)	Entropy 1.00862 (1.01206)	Top-1 acc 64.453 (65.005)	Top-5 acc 83.594 (84.536)	lr 0.00935
Train [70][2360/3239]	Time 0.239 (0.610)	Data Time 0.001 (0.019)	Loss 2.7909 (2.4553)	Entropy 1.00857 (1.01204)	Top-1 acc 60.938 (65.001)	Top-5 acc 77.344 (84.528)	lr 0.00935
Train [70][2370/3239]	Time 0.253 (0.610)	Data Time 0.001 (0.019)	Loss 2.5766 (2.4553)	Entropy 1.00856 (1.01203)	Top-1 acc 60.938 (64.998)	Top-5 acc 82.031 (84.524)	lr 0.00935
Train [70][2380/3239]	Time 0.217 (0.609)	Data Time 0.001 (0.019)	Loss 2.6933 (2.4554)	Entropy 1.00852 (1.01202)	Top-1 acc 60.938 (64.995)	Top-5 acc 82.031 (84.521)	lr 0.00935
Train [70][2390/3239]	Time 0.371 (0.609)	Data Time 0.001 (0.019)	Loss 2.5281 (2.4554)	Entropy 1.00846 (1.01200)	Top-1 acc 63.672 (64.992)	Top-5 acc 82.031 (84.520)	lr 0.00935
Train [70][2400/3239]	Time 0.237 (0.608)	Data Time 0.001 (0.019)	Loss 2.5168 (2.4556)	Entropy 1.00844 (1.01199)	Top-1 acc 62.891 (64.985)	Top-5 acc 83.594 (84.518)	lr 0.00935
Train [70][2410/3239]	Time 0.315 (0.607)	Data Time 0.001 (0.019)	Loss 2.5339 (2.4558)	Entropy 1.00830 (1.01197)	Top-1 acc 63.672 (64.981)	Top-5 acc 84.375 (84.513)	lr 0.00935
Train [70][2420/3239]	Time 0.226 (0.607)	Data Time 0.002 (0.019)	Loss 2.3528 (2.4556)	Entropy 1.00833 (1.01196)	Top-1 acc 69.141 (64.983)	Top-5 acc 85.938 (84.513)	lr 0.00934
Train [70][2430/3239]	Time 2.559 (0.606)	Data Time 0.001 (0.019)	Loss 2.3781 (2.4558)	Entropy 1.00833 (1.01194)	Top-1 acc 66.016 (64.977)	Top-5 acc 85.547 (84.511)	lr 0.00934
Train [70][2440/3239]	Time 0.382 (0.605)	Data Time 0.001 (0.019)	Loss 2.5935 (2.4559)	Entropy 1.00840 (1.01193)	Top-1 acc 61.719 (64.977)	Top-5 acc 83.203 (84.510)	lr 0.00934
Train [70][2450/3239]	Time 0.240 (0.604)	Data Time 0.001 (0.019)	Loss 2.4011 (2.4560)	Entropy 1.00840 (1.01191)	Top-1 acc 69.141 (64.976)	Top-5 acc 85.156 (84.507)	lr 0.00934
Train [70][2460/3239]	Time 0.223 (0.604)	Data Time 0.001 (0.019)	Loss 2.4308 (2.4561)	Entropy 1.00841 (1.01190)	Top-1 acc 64.844 (64.973)	Top-5 acc 87.109 (84.508)	lr 0.00934
Train [70][2470/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.019)	Loss 2.5228 (2.4564)	Entropy 1.00839 (1.01188)	Top-1 acc 61.719 (64.962)	Top-5 acc 81.250 (84.506)	lr 0.00934
Train [70][2480/3239]	Time 0.213 (0.603)	Data Time 0.001 (0.019)	Loss 2.5562 (2.4566)	Entropy 1.00837 (1.01187)	Top-1 acc 63.672 (64.961)	Top-5 acc 82.031 (84.504)	lr 0.00934
Train [70][2490/3239]	Time 0.390 (0.624)	Data Time 0.002 (0.019)	Loss 2.3442 (2.4565)	Entropy 1.00831 (1.01186)	Top-1 acc 65.625 (64.962)	Top-5 acc 85.547 (84.508)	lr 0.00934
Train [70][2500/3239]	Time 0.231 (0.623)	Data Time 0.002 (0.018)	Loss 2.3997 (2.4566)	Entropy 1.00836 (1.01184)	Top-1 acc 66.797 (64.966)	Top-5 acc 87.109 (84.504)	lr 0.00934
Train [70][2510/3239]	Time 0.224 (0.623)	Data Time 0.002 (0.018)	Loss 2.4573 (2.4569)	Entropy 1.00816 (1.01183)	Top-1 acc 66.016 (64.960)	Top-5 acc 85.547 (84.500)	lr 0.00934
Train [70][2520/3239]	Time 0.261 (0.622)	Data Time 0.001 (0.018)	Loss 2.5341 (2.4569)	Entropy 1.00816 (1.01181)	Top-1 acc 63.281 (64.962)	Top-5 acc 83.594 (84.503)	lr 0.00934
Train [70][2530/3239]	Time 0.239 (0.621)	Data Time 0.001 (0.018)	Loss 2.3299 (2.4566)	Entropy 1.00812 (1.01180)	Top-1 acc 66.016 (64.966)	Top-5 acc 88.281 (84.507)	lr 0.00933
Train [70][2540/3239]	Time 2.467 (0.621)	Data Time 0.001 (0.018)	Loss 2.5495 (2.4569)	Entropy 1.00812 (1.01178)	Top-1 acc 60.938 (64.957)	Top-5 acc 82.812 (84.503)	lr 0.00933
Train [70][2550/3239]	Time 0.218 (0.619)	Data Time 0.001 (0.018)	Loss 2.4650 (2.4570)	Entropy 1.00811 (1.01177)	Top-1 acc 62.891 (64.956)	Top-5 acc 82.812 (84.500)	lr 0.00933
Train [70][2560/3239]	Time 0.219 (0.619)	Data Time 0.001 (0.018)	Loss 2.4346 (2.4569)	Entropy 1.00812 (1.01176)	Top-1 acc 66.406 (64.953)	Top-5 acc 85.547 (84.503)	lr 0.00933
Train [70][2570/3239]	Time 0.338 (0.618)	Data Time 0.001 (0.018)	Loss 2.5204 (2.4571)	Entropy 1.00809 (1.01174)	Top-1 acc 64.062 (64.948)	Top-5 acc 83.203 (84.500)	lr 0.00933
Train [70][2580/3239]	Time 0.232 (0.617)	Data Time 0.002 (0.018)	Loss 2.5140 (2.4571)	Entropy 1.00803 (1.01173)	Top-1 acc 62.891 (64.942)	Top-5 acc 84.375 (84.502)	lr 0.00933
Train [70][2590/3239]	Time 0.237 (0.617)	Data Time 0.001 (0.018)	Loss 2.5777 (2.4573)	Entropy 1.00796 (1.01171)	Top-1 acc 62.891 (64.943)	Top-5 acc 82.031 (84.495)	lr 0.00933
Train [70][2600/3239]	Time 0.221 (0.616)	Data Time 0.001 (0.018)	Loss 2.3721 (2.4572)	Entropy 1.00796 (1.01170)	Top-1 acc 67.578 (64.946)	Top-5 acc 85.938 (84.500)	lr 0.00933
Train [70][2610/3239]	Time 0.234 (0.616)	Data Time 0.001 (0.018)	Loss 2.3991 (2.4573)	Entropy 1.00796 (1.01168)	Top-1 acc 67.188 (64.944)	Top-5 acc 85.547 (84.496)	lr 0.00933
Train [70][2620/3239]	Time 0.259 (0.615)	Data Time 0.001 (0.018)	Loss 2.3023 (2.4572)	Entropy 1.00792 (1.01167)	Top-1 acc 68.750 (64.946)	Top-5 acc 87.109 (84.501)	lr 0.00933
Train [70][2630/3239]	Time 0.285 (0.615)	Data Time 0.002 (0.018)	Loss 2.4454 (2.4571)	Entropy 1.00793 (1.01166)	Top-1 acc 62.891 (64.946)	Top-5 acc 83.984 (84.502)	lr 0.00932
Train [70][2640/3239]	Time 0.276 (0.614)	Data Time 0.001 (0.018)	Loss 2.4509 (2.4571)	Entropy 1.00794 (1.01164)	Top-1 acc 70.312 (64.950)	Top-5 acc 84.375 (84.503)	lr 0.00932
Train [70][2650/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.018)	Loss 2.5517 (2.4572)	Entropy 1.00796 (1.01163)	Top-1 acc 64.062 (64.944)	Top-5 acc 81.250 (84.497)	lr 0.00932
Train [70][2660/3239]	Time 0.392 (0.613)	Data Time 0.001 (0.017)	Loss 2.6042 (2.4573)	Entropy 1.00795 (1.01161)	Top-1 acc 63.281 (64.943)	Top-5 acc 81.641 (84.495)	lr 0.00932
Train [70][2670/3239]	Time 0.252 (0.613)	Data Time 0.001 (0.017)	Loss 2.4073 (2.4578)	Entropy 1.00797 (1.01160)	Top-1 acc 64.062 (64.929)	Top-5 acc 85.156 (84.486)	lr 0.00932
Train [70][2680/3239]	Time 0.258 (0.612)	Data Time 0.001 (0.017)	Loss 2.4859 (2.4577)	Entropy 1.00776 (1.01159)	Top-1 acc 67.188 (64.930)	Top-5 acc 83.594 (84.488)	lr 0.00932
Train [70][2690/3239]	Time 0.271 (0.612)	Data Time 0.001 (0.017)	Loss 2.4489 (2.4575)	Entropy 1.00775 (1.01157)	Top-1 acc 63.281 (64.939)	Top-5 acc 85.547 (84.492)	lr 0.00932
Train [70][2700/3239]	Time 0.273 (0.611)	Data Time 0.001 (0.017)	Loss 2.7809 (2.4575)	Entropy 1.00772 (1.01156)	Top-1 acc 58.594 (64.940)	Top-5 acc 77.734 (84.493)	lr 0.00932
Train [70][2710/3239]	Time 0.224 (0.610)	Data Time 0.001 (0.017)	Loss 2.6526 (2.4576)	Entropy 1.00772 (1.01154)	Top-1 acc 59.375 (64.934)	Top-5 acc 82.812 (84.491)	lr 0.00932
Train [70][2720/3239]	Time 0.284 (0.610)	Data Time 0.001 (0.017)	Loss 2.5512 (2.4577)	Entropy 1.00772 (1.01153)	Top-1 acc 61.328 (64.931)	Top-5 acc 83.984 (84.490)	lr 0.00932
Train [70][2730/3239]	Time 0.276 (0.609)	Data Time 0.001 (0.017)	Loss 2.4282 (2.4578)	Entropy 1.00771 (1.01152)	Top-1 acc 62.109 (64.928)	Top-5 acc 85.547 (84.489)	lr 0.00931
Train [70][2740/3239]	Time 0.233 (0.609)	Data Time 0.001 (0.017)	Loss 2.3639 (2.4577)	Entropy 1.00769 (1.01150)	Top-1 acc 68.359 (64.933)	Top-5 acc 85.156 (84.488)	lr 0.00931
Train [70][2750/3239]	Time 0.265 (0.608)	Data Time 0.001 (0.017)	Loss 2.4608 (2.4577)	Entropy 1.00767 (1.01149)	Top-1 acc 64.453 (64.933)	Top-5 acc 87.109 (84.486)	lr 0.00931
Train [70][2760/3239]	Time 0.291 (0.608)	Data Time 0.001 (0.017)	Loss 2.3377 (2.4576)	Entropy 1.00770 (1.01147)	Top-1 acc 67.969 (64.933)	Top-5 acc 87.500 (84.489)	lr 0.00931
Train [70][2770/3239]	Time 0.251 (0.607)	Data Time 0.001 (0.017)	Loss 2.5507 (2.4576)	Entropy 1.00773 (1.01146)	Top-1 acc 60.156 (64.932)	Top-5 acc 83.984 (84.487)	lr 0.00931
Train [70][2780/3239]	Time 0.268 (0.607)	Data Time 0.001 (0.017)	Loss 2.6366 (2.4578)	Entropy 1.00771 (1.01145)	Top-1 acc 61.328 (64.924)	Top-5 acc 79.688 (84.483)	lr 0.00931
Train [70][2790/3239]	Time 0.334 (0.607)	Data Time 0.001 (0.017)	Loss 2.5014 (2.4579)	Entropy 1.00777 (1.01143)	Top-1 acc 62.109 (64.924)	Top-5 acc 85.938 (84.482)	lr 0.00931
Train [70][2800/3239]	Time 0.219 (0.606)	Data Time 0.001 (0.017)	Loss 2.5130 (2.4579)	Entropy 1.00732 (1.01142)	Top-1 acc 66.016 (64.924)	Top-5 acc 82.812 (84.482)	lr 0.00931
Train [70][2810/3239]	Time 0.238 (0.606)	Data Time 0.001 (0.017)	Loss 2.4125 (2.4579)	Entropy 1.00732 (1.01141)	Top-1 acc 68.359 (64.923)	Top-5 acc 84.766 (84.481)	lr 0.00931
Train [70][2820/3239]	Time 0.221 (0.605)	Data Time 0.001 (0.017)	Loss 2.4431 (2.4583)	Entropy 1.00724 (1.01139)	Top-1 acc 62.500 (64.914)	Top-5 acc 87.891 (84.477)	lr 0.00931
Train [70][2830/3239]	Time 0.260 (0.625)	Data Time 0.003 (0.017)	Loss 2.5672 (2.4584)	Entropy 1.00720 (1.01138)	Top-1 acc 64.062 (64.916)	Top-5 acc 83.594 (84.476)	lr 0.00930
Train [70][2840/3239]	Time 0.233 (0.624)	Data Time 0.002 (0.016)	Loss 2.6220 (2.4584)	Entropy 1.00720 (1.01136)	Top-1 acc 60.547 (64.914)	Top-5 acc 80.859 (84.474)	lr 0.00930
Train [70][2850/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.016)	Loss 2.5075 (2.4584)	Entropy 1.00722 (1.01135)	Top-1 acc 61.719 (64.915)	Top-5 acc 81.250 (84.475)	lr 0.00930
Train [70][2860/3239]	Time 0.264 (0.623)	Data Time 0.001 (0.016)	Loss 2.3507 (2.4586)	Entropy 1.00720 (1.01133)	Top-1 acc 64.844 (64.911)	Top-5 acc 86.328 (84.474)	lr 0.00930
Train [70][2870/3239]	Time 0.244 (0.623)	Data Time 0.001 (0.016)	Loss 2.6137 (2.4586)	Entropy 1.00720 (1.01132)	Top-1 acc 60.938 (64.908)	Top-5 acc 81.250 (84.474)	lr 0.00930
Train [70][2880/3239]	Time 0.273 (0.622)	Data Time 0.001 (0.016)	Loss 2.4932 (2.4587)	Entropy 1.00724 (1.01130)	Top-1 acc 62.500 (64.906)	Top-5 acc 82.422 (84.472)	lr 0.00930
Train [70][2890/3239]	Time 0.276 (0.622)	Data Time 0.001 (0.016)	Loss 2.3054 (2.4585)	Entropy 1.00721 (1.01129)	Top-1 acc 72.266 (64.910)	Top-5 acc 88.281 (84.474)	lr 0.00930
Train [70][2900/3239]	Time 0.230 (0.621)	Data Time 0.001 (0.016)	Loss 2.6106 (2.4587)	Entropy 1.00717 (1.01128)	Top-1 acc 59.766 (64.904)	Top-5 acc 82.422 (84.470)	lr 0.00930
Train [70][2910/3239]	Time 0.259 (0.621)	Data Time 0.001 (0.016)	Loss 2.2259 (2.4588)	Entropy 1.00715 (1.01126)	Top-1 acc 71.094 (64.906)	Top-5 acc 87.891 (84.467)	lr 0.00930
Train [70][2920/3239]	Time 0.324 (0.620)	Data Time 0.001 (0.016)	Loss 2.4152 (2.4588)	Entropy 1.00714 (1.01125)	Top-1 acc 70.312 (64.911)	Top-5 acc 85.938 (84.468)	lr 0.00930
Train [70][2930/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.016)	Loss 2.5734 (2.4590)	Entropy 1.00700 (1.01123)	Top-1 acc 58.984 (64.908)	Top-5 acc 82.031 (84.466)	lr 0.00930
Train [70][2940/3239]	Time 0.299 (0.619)	Data Time 0.001 (0.016)	Loss 2.5690 (2.4592)	Entropy 1.00694 (1.01122)	Top-1 acc 61.328 (64.904)	Top-5 acc 83.594 (84.464)	lr 0.00929
Train [70][2950/3239]	Time 0.269 (0.619)	Data Time 0.001 (0.016)	Loss 2.4761 (2.4593)	Entropy 1.00695 (1.01120)	Top-1 acc 62.891 (64.896)	Top-5 acc 84.375 (84.461)	lr 0.00929
Train [70][2960/3239]	Time 0.273 (0.618)	Data Time 0.001 (0.016)	Loss 2.4759 (2.4593)	Entropy 1.00688 (1.01119)	Top-1 acc 61.719 (64.896)	Top-5 acc 85.547 (84.462)	lr 0.00929
Train [70][2970/3239]	Time 0.222 (0.618)	Data Time 0.001 (0.016)	Loss 2.3670 (2.4593)	Entropy 1.00686 (1.01118)	Top-1 acc 65.625 (64.893)	Top-5 acc 86.719 (84.463)	lr 0.00929
Train [70][2980/3239]	Time 0.251 (0.617)	Data Time 0.001 (0.016)	Loss 2.3908 (2.4593)	Entropy 1.00679 (1.01116)	Top-1 acc 69.141 (64.895)	Top-5 acc 87.891 (84.465)	lr 0.00929
Train [70][2990/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.016)	Loss 2.4682 (2.4591)	Entropy 1.00673 (1.01115)	Top-1 acc 65.625 (64.895)	Top-5 acc 80.078 (84.467)	lr 0.00929
Train [70][3000/3239]	Time 0.224 (0.616)	Data Time 0.001 (0.016)	Loss 2.4535 (2.4592)	Entropy 1.00668 (1.01113)	Top-1 acc 65.234 (64.894)	Top-5 acc 86.719 (84.470)	lr 0.00929
Train [70][3010/3239]	Time 0.366 (0.616)	Data Time 0.001 (0.016)	Loss 2.4054 (2.4593)	Entropy 1.00678 (1.01112)	Top-1 acc 65.234 (64.890)	Top-5 acc 84.375 (84.468)	lr 0.00929
Train [70][3020/3239]	Time 0.335 (0.616)	Data Time 0.002 (0.016)	Loss 2.4324 (2.4592)	Entropy 1.00687 (1.01110)	Top-1 acc 66.406 (64.895)	Top-5 acc 84.375 (84.469)	lr 0.00929
Train [70][3030/3239]	Time 0.215 (0.615)	Data Time 0.001 (0.016)	Loss 2.5217 (2.4594)	Entropy 1.00679 (1.01109)	Top-1 acc 62.891 (64.891)	Top-5 acc 84.375 (84.469)	lr 0.00929
Train [70][3040/3239]	Time 0.275 (0.615)	Data Time 0.001 (0.016)	Loss 2.3356 (2.4594)	Entropy 1.00675 (1.01107)	Top-1 acc 66.797 (64.891)	Top-5 acc 87.500 (84.469)	lr 0.00928
Train [70][3050/3239]	Time 0.238 (0.614)	Data Time 0.001 (0.015)	Loss 2.5931 (2.4593)	Entropy 1.00678 (1.01106)	Top-1 acc 64.062 (64.896)	Top-5 acc 82.031 (84.469)	lr 0.00928
Train [70][3060/3239]	Time 0.276 (0.614)	Data Time 0.001 (0.015)	Loss 2.4000 (2.4595)	Entropy 1.00678 (1.01105)	Top-1 acc 65.625 (64.894)	Top-5 acc 86.328 (84.466)	lr 0.00928
Train [70][3070/3239]	Time 0.262 (0.613)	Data Time 0.001 (0.015)	Loss 2.4533 (2.4597)	Entropy 1.00674 (1.01103)	Top-1 acc 65.625 (64.886)	Top-5 acc 82.812 (84.465)	lr 0.00928
Train [70][3080/3239]	Time 0.272 (0.613)	Data Time 0.001 (0.015)	Loss 2.5799 (2.4597)	Entropy 1.00682 (1.01102)	Top-1 acc 64.844 (64.892)	Top-5 acc 81.641 (84.462)	lr 0.00928
Train [70][3090/3239]	Time 0.289 (0.612)	Data Time 0.001 (0.015)	Loss 2.4442 (2.4597)	Entropy 1.00684 (1.01101)	Top-1 acc 65.234 (64.890)	Top-5 acc 83.203 (84.461)	lr 0.00928
Train [70][3100/3239]	Time 0.314 (0.612)	Data Time 0.001 (0.015)	Loss 2.4447 (2.4598)	Entropy 1.00684 (1.01099)	Top-1 acc 66.797 (64.892)	Top-5 acc 85.156 (84.461)	lr 0.00928
Train [70][3110/3239]	Time 0.246 (0.612)	Data Time 0.001 (0.015)	Loss 2.5210 (2.4598)	Entropy 1.00684 (1.01098)	Top-1 acc 60.156 (64.893)	Top-5 acc 85.156 (84.462)	lr 0.00928
Train [70][3120/3239]	Time 0.217 (0.611)	Data Time 0.001 (0.015)	Loss 2.3492 (2.4600)	Entropy 1.00678 (1.01097)	Top-1 acc 66.797 (64.888)	Top-5 acc 86.328 (84.461)	lr 0.00928
Train [70][3130/3239]	Time 0.223 (0.611)	Data Time 0.001 (0.015)	Loss 2.4673 (2.4601)	Entropy 1.00674 (1.01095)	Top-1 acc 59.766 (64.882)	Top-5 acc 83.594 (84.459)	lr 0.00928
Train [70][3140/3239]	Time 0.313 (0.610)	Data Time 0.001 (0.015)	Loss 2.4185 (2.4601)	Entropy 1.00673 (1.01094)	Top-1 acc 68.359 (64.880)	Top-5 acc 84.375 (84.458)	lr 0.00927
Train [70][3150/3239]	Time 0.213 (0.610)	Data Time 0.001 (0.015)	Loss 2.5479 (2.4600)	Entropy 1.00671 (1.01092)	Top-1 acc 62.891 (64.881)	Top-5 acc 80.859 (84.459)	lr 0.00927
Train [70][3160/3239]	Time 0.277 (0.625)	Data Time 0.005 (0.015)	Loss 2.5208 (2.4601)	Entropy 1.00675 (1.01091)	Top-1 acc 64.453 (64.881)	Top-5 acc 81.250 (84.460)	lr 0.00927
Train [70][3170/3239]	Time 0.260 (0.625)	Data Time 0.002 (0.015)	Loss 2.4338 (2.4601)	Entropy 1.00663 (1.01090)	Top-1 acc 63.672 (64.878)	Top-5 acc 85.938 (84.458)	lr 0.00927
Train [70][3180/3239]	Time 0.259 (0.625)	Data Time 0.000 (0.015)	Loss 2.5974 (2.4601)	Entropy 1.00657 (1.01088)	Top-1 acc 62.891 (64.876)	Top-5 acc 82.422 (84.458)	lr 0.00927
Train [70][3190/3239]	Time 0.331 (0.624)	Data Time 0.000 (0.015)	Loss 2.4836 (2.4601)	Entropy 1.00658 (1.01087)	Top-1 acc 60.156 (64.875)	Top-5 acc 83.984 (84.459)	lr 0.00927
Train [70][3200/3239]	Time 0.218 (0.624)	Data Time 0.000 (0.015)	Loss 2.4554 (2.4602)	Entropy 1.00656 (1.01086)	Top-1 acc 63.672 (64.876)	Top-5 acc 86.328 (84.456)	lr 0.00927
Train [70][3210/3239]	Time 0.223 (0.623)	Data Time 0.000 (0.015)	Loss 2.4286 (2.4601)	Entropy 1.00648 (1.01084)	Top-1 acc 65.234 (64.876)	Top-5 acc 86.328 (84.457)	lr 0.00927
Train [70][3220/3239]	Time 0.231 (0.623)	Data Time 0.000 (0.015)	Loss 2.5256 (2.4602)	Entropy 1.00642 (1.01083)	Top-1 acc 64.062 (64.873)	Top-5 acc 82.422 (84.453)	lr 0.00927
Train [70][3230/3239]	Time 0.260 (0.622)	Data Time 0.000 (0.015)	Loss 2.3767 (2.4602)	Entropy 1.00654 (1.01082)	Top-1 acc 66.016 (64.872)	Top-5 acc 85.547 (84.453)	lr 0.00927
Train [70][3239/3239]	Time 2.486 (0.622)	Data Time 0.000 (0.015)	Loss 2.8733 (2.4604)	Entropy 1.00654 (1.01081)	Top-1 acc 59.259 (64.867)	Top-5 acc 74.074 (84.449)	lr 0.00926
==========Valid [70/120]	loss 1.379	top-1 acc 68.552 (68.599)	top-5 acc 87.558	Train top-1 64.867	top-5 84.449	Entropy 1.00654	Latency-None: 0.000ms	Flops: 546.53M
Train [71][0/3239]	Time 43.315 (43.315)	Data Time 40.547 (40.547)	Loss 2.4277 (2.4277)	Entropy 1.00650 (1.00650)	Top-1 acc 63.672 (63.672)	Top-5 acc 83.594 (83.594)	lr 0.00926
Train [71][10/3239]	Time 2.527 (4.442)	Data Time 0.002 (3.700)	Loss 2.4040 (2.4335)	Entropy 1.00650 (1.00650)	Top-1 acc 66.406 (65.270)	Top-5 acc 85.156 (84.482)	lr 0.00926
Train [71][20/3239]	Time 0.244 (2.453)	Data Time 0.001 (1.939)	Loss 2.3202 (2.4350)	Entropy 1.00646 (1.00648)	Top-1 acc 69.141 (64.955)	Top-5 acc 86.719 (84.487)	lr 0.00926
Train [71][30/3239]	Time 0.232 (1.816)	Data Time 0.001 (1.314)	Loss 2.4637 (2.4340)	Entropy 1.00643 (1.00647)	Top-1 acc 62.500 (65.134)	Top-5 acc 83.984 (84.350)	lr 0.00926
Train [71][40/3239]	Time 0.333 (1.492)	Data Time 0.002 (0.994)	Loss 2.4312 (2.4406)	Entropy 1.00643 (1.00646)	Top-1 acc 68.359 (64.977)	Top-5 acc 84.766 (84.308)	lr 0.00926
Train [71][50/3239]	Time 0.233 (1.292)	Data Time 0.001 (0.799)	Loss 2.6155 (2.4579)	Entropy 1.00643 (1.00645)	Top-1 acc 62.109 (64.583)	Top-5 acc 80.859 (84.084)	lr 0.00926
Train [71][60/3239]	Time 0.227 (1.161)	Data Time 0.001 (0.669)	Loss 2.5945 (2.4480)	Entropy 1.00642 (1.00645)	Top-1 acc 64.453 (64.946)	Top-5 acc 82.031 (84.356)	lr 0.00926
Train [71][70/3239]	Time 0.218 (1.064)	Data Time 0.001 (0.575)	Loss 2.4461 (2.4411)	Entropy 1.00641 (1.00644)	Top-1 acc 65.625 (65.069)	Top-5 acc 83.594 (84.491)	lr 0.00926
Train [71][80/3239]	Time 0.240 (0.990)	Data Time 0.001 (0.504)	Loss 2.4903 (2.4452)	Entropy 1.00629 (1.00643)	Top-1 acc 64.062 (64.868)	Top-5 acc 80.469 (84.433)	lr 0.00926
Train [71][90/3239]	Time 0.214 (0.934)	Data Time 0.001 (0.449)	Loss 2.5313 (2.4460)	Entropy 1.00629 (1.00642)	Top-1 acc 60.938 (64.882)	Top-5 acc 84.766 (84.487)	lr 0.00926
Train [71][100/3239]	Time 0.227 (0.886)	Data Time 0.001 (0.404)	Loss 2.3998 (2.4424)	Entropy 1.00629 (1.00640)	Top-1 acc 66.797 (65.068)	Top-5 acc 87.109 (84.557)	lr 0.00926
Train [71][110/3239]	Time 0.253 (0.849)	Data Time 0.001 (0.368)	Loss 2.2976 (2.4407)	Entropy 1.00626 (1.00639)	Top-1 acc 69.531 (65.171)	Top-5 acc 89.062 (84.660)	lr 0.00925
Train [71][120/3239]	Time 2.572 (0.818)	Data Time 0.001 (0.338)	Loss 2.3795 (2.4379)	Entropy 1.00626 (1.00638)	Top-1 acc 62.891 (65.202)	Top-5 acc 86.719 (84.811)	lr 0.00925
Train [71][130/3239]	Time 0.360 (0.775)	Data Time 0.001 (0.312)	Loss 2.5332 (2.4382)	Entropy 1.00621 (1.00637)	Top-1 acc 64.062 (65.211)	Top-5 acc 78.516 (84.807)	lr 0.00925
Train [71][140/3239]	Time 0.225 (0.754)	Data Time 0.001 (0.290)	Loss 2.3172 (2.4346)	Entropy 1.00621 (1.00636)	Top-1 acc 68.359 (65.337)	Top-5 acc 84.375 (84.835)	lr 0.00925
Train [71][150/3239]	Time 0.221 (0.735)	Data Time 0.001 (0.271)	Loss 2.2785 (2.4286)	Entropy 1.00624 (1.00635)	Top-1 acc 67.578 (65.413)	Top-5 acc 88.672 (84.965)	lr 0.00925
Train [71][160/3239]	Time 0.228 (0.719)	Data Time 0.001 (0.254)	Loss 2.4498 (2.4296)	Entropy 1.00626 (1.00635)	Top-1 acc 64.844 (65.431)	Top-5 acc 84.766 (84.955)	lr 0.00925
Train [71][170/3239]	Time 0.247 (0.704)	Data Time 0.001 (0.240)	Loss 2.3652 (2.4321)	Entropy 1.00621 (1.00634)	Top-1 acc 67.578 (65.365)	Top-5 acc 85.938 (84.878)	lr 0.00925
Train [71][180/3239]	Time 0.237 (0.693)	Data Time 0.001 (0.226)	Loss 2.5338 (2.4341)	Entropy 1.00618 (1.00633)	Top-1 acc 63.672 (65.267)	Top-5 acc 82.031 (84.854)	lr 0.00925
Train [71][190/3239]	Time 0.222 (0.681)	Data Time 0.001 (0.215)	Loss 2.4226 (2.4328)	Entropy 1.00622 (1.00632)	Top-1 acc 66.406 (65.304)	Top-5 acc 84.766 (84.870)	lr 0.00925
Train [71][200/3239]	Time 0.242 (0.671)	Data Time 0.001 (0.204)	Loss 2.6719 (2.4347)	Entropy 1.00617 (1.00632)	Top-1 acc 58.594 (65.295)	Top-5 acc 79.688 (84.832)	lr 0.00925
Train [71][210/3239]	Time 0.248 (0.661)	Data Time 0.001 (0.195)	Loss 2.5103 (2.4336)	Entropy 1.00614 (1.00631)	Top-1 acc 64.453 (65.325)	Top-5 acc 82.422 (84.843)	lr 0.00924
Train [71][220/3239]	Time 0.225 (0.653)	Data Time 0.003 (0.186)	Loss 2.4135 (2.4333)	Entropy 1.00605 (1.00630)	Top-1 acc 62.891 (65.321)	Top-5 acc 85.938 (84.866)	lr 0.00924
Train [71][230/3239]	Time 2.554 (0.645)	Data Time 0.001 (0.178)	Loss 2.3175 (2.4345)	Entropy 1.00605 (1.00629)	Top-1 acc 65.234 (65.256)	Top-5 acc 85.938 (84.869)	lr 0.00924
Train [71][240/3239]	Time 0.234 (0.628)	Data Time 0.002 (0.170)	Loss 2.1876 (2.4315)	Entropy 1.00603 (1.00628)	Top-1 acc 71.875 (65.332)	Top-5 acc 92.578 (84.950)	lr 0.00924
Train [71][250/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.164)	Loss 2.4522 (2.4324)	Entropy 1.00602 (1.00627)	Top-1 acc 67.188 (65.343)	Top-5 acc 85.547 (84.921)	lr 0.00924
Train [71][260/3239]	Time 0.225 (0.616)	Data Time 0.001 (0.158)	Loss 2.3599 (2.4327)	Entropy 1.00603 (1.00626)	Top-1 acc 68.750 (65.321)	Top-5 acc 86.328 (84.915)	lr 0.00924
Train [71][270/3239]	Time 0.243 (0.611)	Data Time 0.001 (0.152)	Loss 2.4786 (2.4344)	Entropy 1.00597 (1.00625)	Top-1 acc 64.844 (65.302)	Top-5 acc 83.594 (84.891)	lr 0.00924
Train [71][280/3239]	Time 0.256 (0.788)	Data Time 0.003 (0.146)	Loss 2.5777 (2.4342)	Entropy 1.00584 (1.00624)	Top-1 acc 61.719 (65.328)	Top-5 acc 83.984 (84.889)	lr 0.00924
Train [71][290/3239]	Time 0.220 (0.786)	Data Time 0.002 (0.142)	Loss 2.4917 (2.4341)	Entropy 1.00581 (1.00622)	Top-1 acc 60.547 (65.320)	Top-5 acc 85.938 (84.923)	lr 0.00924
Train [71][300/3239]	Time 0.224 (0.776)	Data Time 0.001 (0.137)	Loss 2.3681 (2.4336)	Entropy 1.00578 (1.00621)	Top-1 acc 64.062 (65.314)	Top-5 acc 87.109 (84.919)	lr 0.00924
Train [71][310/3239]	Time 0.224 (0.766)	Data Time 0.001 (0.133)	Loss 2.4585 (2.4345)	Entropy 1.00576 (1.00620)	Top-1 acc 62.891 (65.291)	Top-5 acc 86.719 (84.914)	lr 0.00923
Train [71][320/3239]	Time 0.221 (0.757)	Data Time 0.002 (0.128)	Loss 2.5435 (2.4362)	Entropy 1.00573 (1.00618)	Top-1 acc 62.891 (65.243)	Top-5 acc 83.984 (84.901)	lr 0.00923
Train [71][330/3239]	Time 0.235 (0.749)	Data Time 0.001 (0.125)	Loss 2.6899 (2.4370)	Entropy 1.00571 (1.00617)	Top-1 acc 58.984 (65.243)	Top-5 acc 79.688 (84.878)	lr 0.00923
Train [71][340/3239]	Time 2.512 (0.740)	Data Time 0.001 (0.121)	Loss 2.3406 (2.4372)	Entropy 1.00571 (1.00615)	Top-1 acc 67.969 (65.269)	Top-5 acc 87.109 (84.865)	lr 0.00923
Train [71][350/3239]	Time 0.245 (0.726)	Data Time 0.001 (0.118)	Loss 2.3592 (2.4366)	Entropy 1.00567 (1.00614)	Top-1 acc 69.531 (65.306)	Top-5 acc 86.719 (84.878)	lr 0.00923
Train [71][360/3239]	Time 0.241 (0.720)	Data Time 0.001 (0.114)	Loss 2.5481 (2.4363)	Entropy 1.00564 (1.00613)	Top-1 acc 62.891 (65.308)	Top-5 acc 82.031 (84.869)	lr 0.00923
Train [71][370/3239]	Time 0.221 (0.713)	Data Time 0.001 (0.111)	Loss 2.4806 (2.4351)	Entropy 1.00559 (1.00611)	Top-1 acc 61.328 (65.333)	Top-5 acc 84.766 (84.877)	lr 0.00923
Train [71][380/3239]	Time 0.210 (0.706)	Data Time 0.001 (0.108)	Loss 2.5593 (2.4360)	Entropy 1.00557 (1.00610)	Top-1 acc 60.156 (65.312)	Top-5 acc 82.031 (84.855)	lr 0.00923
Train [71][390/3239]	Time 0.223 (0.700)	Data Time 0.001 (0.106)	Loss 2.5324 (2.4371)	Entropy 1.00550 (1.00608)	Top-1 acc 62.891 (65.284)	Top-5 acc 83.203 (84.829)	lr 0.00923
Train [71][400/3239]	Time 0.223 (0.695)	Data Time 0.001 (0.103)	Loss 2.5189 (2.4375)	Entropy 1.00539 (1.00607)	Top-1 acc 65.234 (65.284)	Top-5 acc 82.031 (84.825)	lr 0.00923
Train [71][410/3239]	Time 0.214 (0.689)	Data Time 0.001 (0.101)	Loss 2.4552 (2.4380)	Entropy 1.00539 (1.00605)	Top-1 acc 63.672 (65.265)	Top-5 acc 83.984 (84.805)	lr 0.00922
Train [71][420/3239]	Time 0.233 (0.684)	Data Time 0.001 (0.098)	Loss 2.5831 (2.4377)	Entropy 1.00536 (1.00604)	Top-1 acc 61.328 (65.267)	Top-5 acc 82.031 (84.809)	lr 0.00922
Train [71][430/3239]	Time 0.215 (0.679)	Data Time 0.001 (0.096)	Loss 2.3919 (2.4373)	Entropy 1.00536 (1.00602)	Top-1 acc 68.359 (65.306)	Top-5 acc 84.766 (84.815)	lr 0.00922
Train [71][440/3239]	Time 0.398 (0.675)	Data Time 0.001 (0.094)	Loss 2.5119 (2.4381)	Entropy 1.00537 (1.00601)	Top-1 acc 62.891 (65.330)	Top-5 acc 84.766 (84.805)	lr 0.00922
Train [71][450/3239]	Time 2.569 (0.671)	Data Time 0.002 (0.092)	Loss 2.2492 (2.4355)	Entropy 1.00537 (1.00599)	Top-1 acc 72.266 (65.399)	Top-5 acc 85.547 (84.849)	lr 0.00922
Train [71][460/3239]	Time 0.258 (0.661)	Data Time 0.001 (0.090)	Loss 2.6074 (2.4369)	Entropy 1.00538 (1.00598)	Top-1 acc 64.062 (65.383)	Top-5 acc 83.594 (84.845)	lr 0.00922
Train [71][470/3239]	Time 0.268 (0.658)	Data Time 0.001 (0.088)	Loss 2.4567 (2.4377)	Entropy 1.00527 (1.00596)	Top-1 acc 64.062 (65.359)	Top-5 acc 84.375 (84.828)	lr 0.00922
Train [71][480/3239]	Time 0.257 (0.654)	Data Time 0.002 (0.086)	Loss 2.3987 (2.4376)	Entropy 1.00526 (1.00595)	Top-1 acc 66.797 (65.355)	Top-5 acc 86.328 (84.822)	lr 0.00922
Train [71][490/3239]	Time 0.237 (0.651)	Data Time 0.001 (0.085)	Loss 2.4317 (2.4368)	Entropy 1.00521 (1.00593)	Top-1 acc 64.062 (65.383)	Top-5 acc 85.547 (84.837)	lr 0.00922
Train [71][500/3239]	Time 0.223 (0.648)	Data Time 0.001 (0.083)	Loss 2.5654 (2.4387)	Entropy 1.00515 (1.00592)	Top-1 acc 60.938 (65.335)	Top-5 acc 83.203 (84.797)	lr 0.00922
Train [71][510/3239]	Time 0.223 (0.644)	Data Time 0.001 (0.081)	Loss 2.3949 (2.4385)	Entropy 1.00513 (1.00590)	Top-1 acc 62.891 (65.346)	Top-5 acc 85.938 (84.807)	lr 0.00922
Train [71][520/3239]	Time 0.221 (0.641)	Data Time 0.001 (0.080)	Loss 2.4106 (2.4386)	Entropy 1.00511 (1.00589)	Top-1 acc 64.453 (65.339)	Top-5 acc 83.984 (84.785)	lr 0.00921
Train [71][530/3239]	Time 0.362 (0.638)	Data Time 0.001 (0.078)	Loss 2.5598 (2.4393)	Entropy 1.00510 (1.00587)	Top-1 acc 62.891 (65.316)	Top-5 acc 82.031 (84.767)	lr 0.00921
Train [71][540/3239]	Time 0.211 (0.635)	Data Time 0.001 (0.077)	Loss 2.4519 (2.4395)	Entropy 1.00509 (1.00586)	Top-1 acc 66.406 (65.317)	Top-5 acc 85.156 (84.768)	lr 0.00921
Train [71][550/3239]	Time 0.255 (0.632)	Data Time 0.001 (0.076)	Loss 2.4531 (2.4405)	Entropy 1.00506 (1.00585)	Top-1 acc 64.453 (65.293)	Top-5 acc 82.812 (84.748)	lr 0.00921
Train [71][560/3239]	Time 2.532 (0.629)	Data Time 0.001 (0.074)	Loss 2.5130 (2.4412)	Entropy 1.00506 (1.00583)	Top-1 acc 61.328 (65.278)	Top-5 acc 83.984 (84.736)	lr 0.00921
Train [71][570/3239]	Time 0.238 (0.622)	Data Time 0.001 (0.073)	Loss 2.3475 (2.4405)	Entropy 1.00503 (1.00582)	Top-1 acc 65.625 (65.303)	Top-5 acc 87.500 (84.744)	lr 0.00921
Train [71][580/3239]	Time 0.224 (0.620)	Data Time 0.001 (0.072)	Loss 2.4696 (2.4413)	Entropy 1.00500 (1.00580)	Top-1 acc 69.141 (65.289)	Top-5 acc 85.547 (84.735)	lr 0.00921
Train [71][590/3239]	Time 0.232 (0.617)	Data Time 0.001 (0.071)	Loss 2.4661 (2.4409)	Entropy 1.00488 (1.00579)	Top-1 acc 63.281 (65.291)	Top-5 acc 84.375 (84.739)	lr 0.00921
Train [71][600/3239]	Time 0.235 (0.615)	Data Time 0.001 (0.069)	Loss 2.5001 (2.4412)	Entropy 1.00485 (1.00577)	Top-1 acc 66.406 (65.286)	Top-5 acc 84.766 (84.734)	lr 0.00921
Train [71][610/3239]	Time 0.223 (0.612)	Data Time 0.001 (0.068)	Loss 2.4506 (2.4415)	Entropy 1.00484 (1.00576)	Top-1 acc 63.672 (65.275)	Top-5 acc 84.375 (84.730)	lr 0.00921
Train [71][620/3239]	Time 0.339 (0.610)	Data Time 0.001 (0.067)	Loss 2.5051 (2.4416)	Entropy 1.00479 (1.00574)	Top-1 acc 61.719 (65.268)	Top-5 acc 84.766 (84.725)	lr 0.00920
Train [71][630/3239]	Time 0.240 (0.608)	Data Time 0.001 (0.066)	Loss 2.5216 (2.4418)	Entropy 1.00478 (1.00573)	Top-1 acc 64.844 (65.267)	Top-5 acc 82.812 (84.718)	lr 0.00920
Train [71][640/3239]	Time 0.450 (0.686)	Data Time 0.002 (0.065)	Loss 2.4214 (2.4415)	Entropy 1.00476 (1.00571)	Top-1 acc 67.578 (65.289)	Top-5 acc 85.156 (84.729)	lr 0.00920
Train [71][650/3239]	Time 0.229 (0.684)	Data Time 0.002 (0.064)	Loss 2.5084 (2.4417)	Entropy 1.00476 (1.00570)	Top-1 acc 64.453 (65.296)	Top-5 acc 83.984 (84.712)	lr 0.00920
Train [71][660/3239]	Time 0.308 (0.681)	Data Time 0.002 (0.063)	Loss 2.6728 (2.4420)	Entropy 1.00476 (1.00568)	Top-1 acc 59.766 (65.281)	Top-5 acc 78.906 (84.712)	lr 0.00920
Train [71][670/3239]	Time 2.601 (0.678)	Data Time 0.001 (0.062)	Loss 2.4277 (2.4414)	Entropy 1.00476 (1.00567)	Top-1 acc 66.016 (65.290)	Top-5 acc 88.281 (84.725)	lr 0.00920
Train [71][680/3239]	Time 0.231 (0.672)	Data Time 0.002 (0.061)	Loss 2.3067 (2.4413)	Entropy 1.00473 (1.00566)	Top-1 acc 66.797 (65.300)	Top-5 acc 87.891 (84.724)	lr 0.00920
Train [71][690/3239]	Time 0.236 (0.669)	Data Time 0.001 (0.061)	Loss 2.5419 (2.4415)	Entropy 1.00473 (1.00564)	Top-1 acc 64.844 (65.292)	Top-5 acc 82.422 (84.713)	lr 0.00920
Train [71][700/3239]	Time 0.223 (0.666)	Data Time 0.001 (0.060)	Loss 2.5206 (2.4410)	Entropy 1.00470 (1.00563)	Top-1 acc 62.500 (65.305)	Top-5 acc 82.812 (84.719)	lr 0.00920
Train [71][710/3239]	Time 0.249 (0.664)	Data Time 0.001 (0.059)	Loss 2.4926 (2.4414)	Entropy 1.00469 (1.00562)	Top-1 acc 64.453 (65.291)	Top-5 acc 82.422 (84.720)	lr 0.00920
Train [71][720/3239]	Time 0.229 (0.661)	Data Time 0.001 (0.058)	Loss 2.3323 (2.4415)	Entropy 1.00468 (1.00560)	Top-1 acc 68.359 (65.279)	Top-5 acc 87.109 (84.724)	lr 0.00919
Train [71][730/3239]	Time 0.230 (0.659)	Data Time 0.002 (0.057)	Loss 2.5434 (2.4422)	Entropy 1.00463 (1.00559)	Top-1 acc 59.375 (65.253)	Top-5 acc 84.375 (84.707)	lr 0.00919
Train [71][740/3239]	Time 0.242 (0.656)	Data Time 0.001 (0.057)	Loss 2.5611 (2.4427)	Entropy 1.00464 (1.00558)	Top-1 acc 64.453 (65.236)	Top-5 acc 83.203 (84.696)	lr 0.00919
Train [71][750/3239]	Time 0.257 (0.654)	Data Time 0.001 (0.056)	Loss 2.3959 (2.4424)	Entropy 1.00460 (1.00556)	Top-1 acc 68.750 (65.255)	Top-5 acc 84.766 (84.704)	lr 0.00919
Train [71][760/3239]	Time 0.206 (0.652)	Data Time 0.001 (0.055)	Loss 2.6641 (2.4448)	Entropy 1.00454 (1.00555)	Top-1 acc 57.031 (65.196)	Top-5 acc 81.641 (84.663)	lr 0.00919
Train [71][770/3239]	Time 0.278 (0.649)	Data Time 0.001 (0.054)	Loss 2.4713 (2.4440)	Entropy 1.00458 (1.00554)	Top-1 acc 67.578 (65.221)	Top-5 acc 83.594 (84.679)	lr 0.00919
Train [71][780/3239]	Time 2.636 (0.647)	Data Time 0.001 (0.054)	Loss 2.5097 (2.4445)	Entropy 1.00458 (1.00553)	Top-1 acc 66.016 (65.222)	Top-5 acc 82.422 (84.668)	lr 0.00919
Train [71][790/3239]	Time 0.254 (0.642)	Data Time 0.001 (0.053)	Loss 2.5370 (2.4447)	Entropy 1.00454 (1.00551)	Top-1 acc 64.453 (65.215)	Top-5 acc 80.859 (84.671)	lr 0.00919
Train [71][800/3239]	Time 0.254 (0.640)	Data Time 0.001 (0.052)	Loss 2.5823 (2.4447)	Entropy 1.00451 (1.00550)	Top-1 acc 61.719 (65.216)	Top-5 acc 80.469 (84.672)	lr 0.00919
Train [71][810/3239]	Time 0.222 (0.638)	Data Time 0.001 (0.052)	Loss 2.4286 (2.4446)	Entropy 1.00447 (1.00549)	Top-1 acc 66.406 (65.222)	Top-5 acc 86.328 (84.670)	lr 0.00919
Train [71][820/3239]	Time 0.213 (0.636)	Data Time 0.001 (0.051)	Loss 2.5196 (2.4443)	Entropy 1.00448 (1.00548)	Top-1 acc 62.109 (65.226)	Top-5 acc 82.812 (84.674)	lr 0.00918
Train [71][830/3239]	Time 0.235 (0.634)	Data Time 0.001 (0.051)	Loss 2.5573 (2.4449)	Entropy 1.00438 (1.00546)	Top-1 acc 61.719 (65.205)	Top-5 acc 84.766 (84.667)	lr 0.00918
Train [71][840/3239]	Time 0.349 (0.633)	Data Time 0.001 (0.050)	Loss 2.3535 (2.4445)	Entropy 1.00434 (1.00545)	Top-1 acc 69.141 (65.226)	Top-5 acc 85.938 (84.676)	lr 0.00918
Train [71][850/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.049)	Loss 2.4081 (2.4445)	Entropy 1.00440 (1.00544)	Top-1 acc 62.891 (65.220)	Top-5 acc 85.547 (84.673)	lr 0.00918
Train [71][860/3239]	Time 0.218 (0.629)	Data Time 0.001 (0.049)	Loss 2.2640 (2.4446)	Entropy 1.00437 (1.00543)	Top-1 acc 67.578 (65.223)	Top-5 acc 86.719 (84.675)	lr 0.00918
Train [71][870/3239]	Time 0.203 (0.627)	Data Time 0.001 (0.048)	Loss 2.4024 (2.4437)	Entropy 1.00435 (1.00541)	Top-1 acc 67.578 (65.258)	Top-5 acc 86.328 (84.696)	lr 0.00918
Train [71][880/3239]	Time 0.261 (0.625)	Data Time 0.012 (0.048)	Loss 2.3644 (2.4427)	Entropy 1.00434 (1.00540)	Top-1 acc 66.797 (65.274)	Top-5 acc 86.328 (84.721)	lr 0.00918
Train [71][890/3239]	Time 2.583 (0.623)	Data Time 0.001 (0.047)	Loss 2.3126 (2.4427)	Entropy 1.00434 (1.00539)	Top-1 acc 66.016 (65.279)	Top-5 acc 87.500 (84.722)	lr 0.00918
Train [71][900/3239]	Time 0.212 (0.619)	Data Time 0.001 (0.047)	Loss 2.3916 (2.4423)	Entropy 1.00430 (1.00538)	Top-1 acc 67.188 (65.294)	Top-5 acc 83.594 (84.722)	lr 0.00918
Train [71][910/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.046)	Loss 2.3136 (2.4419)	Entropy 1.00429 (1.00537)	Top-1 acc 70.312 (65.293)	Top-5 acc 87.500 (84.728)	lr 0.00918
Train [71][920/3239]	Time 0.264 (0.616)	Data Time 0.001 (0.046)	Loss 2.4876 (2.4417)	Entropy 1.00420 (1.00535)	Top-1 acc 62.500 (65.295)	Top-5 acc 85.938 (84.730)	lr 0.00918
Train [71][930/3239]	Time 0.261 (0.615)	Data Time 0.002 (0.045)	Loss 2.7801 (2.4419)	Entropy 1.00421 (1.00534)	Top-1 acc 58.594 (65.290)	Top-5 acc 78.906 (84.731)	lr 0.00917
Train [71][940/3239]	Time 0.254 (0.613)	Data Time 0.001 (0.045)	Loss 2.5380 (2.4414)	Entropy 1.00424 (1.00533)	Top-1 acc 64.844 (65.305)	Top-5 acc 82.031 (84.744)	lr 0.00917
Train [71][950/3239]	Time 0.222 (0.612)	Data Time 0.001 (0.044)	Loss 2.3391 (2.4420)	Entropy 1.00421 (1.00532)	Top-1 acc 64.453 (65.282)	Top-5 acc 88.281 (84.729)	lr 0.00917
Train [71][960/3239]	Time 0.218 (0.610)	Data Time 0.001 (0.044)	Loss 2.4188 (2.4416)	Entropy 1.00422 (1.00531)	Top-1 acc 66.016 (65.284)	Top-5 acc 87.109 (84.732)	lr 0.00917
Train [71][970/3239]	Time 0.231 (0.609)	Data Time 0.001 (0.044)	Loss 2.4851 (2.4419)	Entropy 1.00419 (1.00530)	Top-1 acc 63.281 (65.270)	Top-5 acc 83.594 (84.718)	lr 0.00917
Train [71][980/3239]	Time 0.358 (0.607)	Data Time 0.001 (0.043)	Loss 2.4205 (2.4424)	Entropy 1.00416 (1.00528)	Top-1 acc 68.359 (65.263)	Top-5 acc 84.766 (84.711)	lr 0.00917
Train [71][990/3239]	Time 0.224 (0.606)	Data Time 0.001 (0.043)	Loss 2.5942 (2.4425)	Entropy 1.00421 (1.00527)	Top-1 acc 62.500 (65.262)	Top-5 acc 80.859 (84.708)	lr 0.00917
Train [71][1000/3239]	Time 51.604 (0.654)	Data Time 0.001 (0.042)	Loss 2.4329 (2.4427)	Entropy 1.00421 (1.00526)	Top-1 acc 66.016 (65.257)	Top-5 acc 85.547 (84.701)	lr 0.00917
Train [71][1010/3239]	Time 0.305 (0.651)	Data Time 0.003 (0.042)	Loss 2.4354 (2.4426)	Entropy 1.00417 (1.00525)	Top-1 acc 65.625 (65.256)	Top-5 acc 84.375 (84.705)	lr 0.00917
Train [71][1020/3239]	Time 0.227 (0.650)	Data Time 0.001 (0.042)	Loss 2.5068 (2.4430)	Entropy 1.00419 (1.00524)	Top-1 acc 68.750 (65.256)	Top-5 acc 81.641 (84.694)	lr 0.00917
Train [71][1030/3239]	Time 0.232 (0.649)	Data Time 0.001 (0.041)	Loss 2.5739 (2.4429)	Entropy 1.00418 (1.00523)	Top-1 acc 62.109 (65.262)	Top-5 acc 82.812 (84.699)	lr 0.00916
Train [71][1040/3239]	Time 0.212 (0.647)	Data Time 0.001 (0.041)	Loss 2.4232 (2.4429)	Entropy 1.00417 (1.00522)	Top-1 acc 64.844 (65.263)	Top-5 acc 83.984 (84.705)	lr 0.00916
Train [71][1050/3239]	Time 0.238 (0.645)	Data Time 0.001 (0.040)	Loss 2.5746 (2.4430)	Entropy 1.00418 (1.00521)	Top-1 acc 60.156 (65.255)	Top-5 acc 80.469 (84.701)	lr 0.00916
Train [71][1060/3239]	Time 0.257 (0.644)	Data Time 0.002 (0.040)	Loss 2.4197 (2.4429)	Entropy 1.00417 (1.00520)	Top-1 acc 67.969 (65.260)	Top-5 acc 81.641 (84.698)	lr 0.00916
Train [71][1070/3239]	Time 0.318 (0.642)	Data Time 0.001 (0.040)	Loss 2.4661 (2.4431)	Entropy 1.00422 (1.00519)	Top-1 acc 63.281 (65.254)	Top-5 acc 86.328 (84.698)	lr 0.00916
Train [71][1080/3239]	Time 0.224 (0.641)	Data Time 0.001 (0.039)	Loss 2.2798 (2.4438)	Entropy 1.00418 (1.00518)	Top-1 acc 70.312 (65.241)	Top-5 acc 85.156 (84.677)	lr 0.00916
Train [71][1090/3239]	Time 0.236 (0.639)	Data Time 0.002 (0.039)	Loss 2.5355 (2.4441)	Entropy 1.00415 (1.00517)	Top-1 acc 60.938 (65.243)	Top-5 acc 83.203 (84.673)	lr 0.00916
Train [71][1100/3239]	Time 0.280 (0.638)	Data Time 0.001 (0.039)	Loss 2.4502 (2.4443)	Entropy 1.00414 (1.00516)	Top-1 acc 66.406 (65.241)	Top-5 acc 83.984 (84.668)	lr 0.00916
Train [71][1110/3239]	Time 2.585 (0.637)	Data Time 0.001 (0.038)	Loss 2.8025 (2.4449)	Entropy 1.00414 (1.00515)	Top-1 acc 55.078 (65.232)	Top-5 acc 80.859 (84.665)	lr 0.00916
Train [71][1120/3239]	Time 0.262 (0.633)	Data Time 0.001 (0.038)	Loss 2.5886 (2.4451)	Entropy 1.00408 (1.00514)	Top-1 acc 62.891 (65.232)	Top-5 acc 82.031 (84.667)	lr 0.00916
Train [71][1130/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.038)	Loss 2.6322 (2.4449)	Entropy 1.00404 (1.00513)	Top-1 acc 58.594 (65.234)	Top-5 acc 79.688 (84.673)	lr 0.00915
Train [71][1140/3239]	Time 0.243 (0.630)	Data Time 0.001 (0.037)	Loss 2.4575 (2.4454)	Entropy 1.00401 (1.00513)	Top-1 acc 68.750 (65.223)	Top-5 acc 83.984 (84.664)	lr 0.00915
Train [71][1150/3239]	Time 0.237 (0.629)	Data Time 0.001 (0.037)	Loss 2.3992 (2.4450)	Entropy 1.00395 (1.00511)	Top-1 acc 64.453 (65.234)	Top-5 acc 84.766 (84.673)	lr 0.00915
Train [71][1160/3239]	Time 0.224 (0.627)	Data Time 0.001 (0.037)	Loss 2.5535 (2.4452)	Entropy 1.00389 (1.00510)	Top-1 acc 62.109 (65.228)	Top-5 acc 84.375 (84.670)	lr 0.00915
Train [71][1170/3239]	Time 0.266 (0.626)	Data Time 0.001 (0.036)	Loss 2.2592 (2.4452)	Entropy 1.00383 (1.00509)	Top-1 acc 69.922 (65.229)	Top-5 acc 89.062 (84.663)	lr 0.00915
Train [71][1180/3239]	Time 0.242 (0.625)	Data Time 0.001 (0.036)	Loss 2.5710 (2.4450)	Entropy 1.00382 (1.00508)	Top-1 acc 63.672 (65.238)	Top-5 acc 84.375 (84.667)	lr 0.00915
Train [71][1190/3239]	Time 0.228 (0.624)	Data Time 0.001 (0.036)	Loss 2.5813 (2.4455)	Entropy 1.00378 (1.00507)	Top-1 acc 59.766 (65.224)	Top-5 acc 82.422 (84.660)	lr 0.00915
Train [71][1200/3239]	Time 0.317 (0.623)	Data Time 0.001 (0.036)	Loss 2.5908 (2.4461)	Entropy 1.00384 (1.00506)	Top-1 acc 60.938 (65.213)	Top-5 acc 80.078 (84.648)	lr 0.00915
Train [71][1210/3239]	Time 0.246 (0.621)	Data Time 0.001 (0.035)	Loss 2.4040 (2.4461)	Entropy 1.00384 (1.00505)	Top-1 acc 70.312 (65.223)	Top-5 acc 86.328 (84.651)	lr 0.00915
Train [71][1220/3239]	Time 2.518 (0.620)	Data Time 0.002 (0.035)	Loss 2.2875 (2.4460)	Entropy 1.00384 (1.00504)	Top-1 acc 67.578 (65.233)	Top-5 acc 87.891 (84.653)	lr 0.00915
Train [71][1230/3239]	Time 0.252 (0.617)	Data Time 0.002 (0.035)	Loss 2.5765 (2.4455)	Entropy 1.00383 (1.00503)	Top-1 acc 60.938 (65.252)	Top-5 acc 83.984 (84.662)	lr 0.00914
Train [71][1240/3239]	Time 0.213 (0.616)	Data Time 0.001 (0.034)	Loss 2.3708 (2.4456)	Entropy 1.00383 (1.00502)	Top-1 acc 63.281 (65.242)	Top-5 acc 88.281 (84.660)	lr 0.00914
Train [71][1250/3239]	Time 0.227 (0.615)	Data Time 0.001 (0.034)	Loss 2.3712 (2.4451)	Entropy 1.00375 (1.00501)	Top-1 acc 65.234 (65.250)	Top-5 acc 87.109 (84.671)	lr 0.00914
Train [71][1260/3239]	Time 0.231 (0.614)	Data Time 0.001 (0.034)	Loss 2.5161 (2.4454)	Entropy 1.00371 (1.00500)	Top-1 acc 61.719 (65.246)	Top-5 acc 83.594 (84.664)	lr 0.00914
Train [71][1270/3239]	Time 0.224 (0.613)	Data Time 0.001 (0.034)	Loss 2.4926 (2.4451)	Entropy 1.00367 (1.00499)	Top-1 acc 64.844 (65.245)	Top-5 acc 82.812 (84.669)	lr 0.00914
Train [71][1280/3239]	Time 0.235 (0.611)	Data Time 0.001 (0.033)	Loss 2.5311 (2.4454)	Entropy 1.00362 (1.00498)	Top-1 acc 55.469 (65.235)	Top-5 acc 84.375 (84.666)	lr 0.00914
Train [71][1290/3239]	Time 0.318 (0.610)	Data Time 0.001 (0.033)	Loss 2.3835 (2.4447)	Entropy 1.00362 (1.00497)	Top-1 acc 66.016 (65.249)	Top-5 acc 87.500 (84.681)	lr 0.00914
Train [71][1300/3239]	Time 0.221 (0.609)	Data Time 0.001 (0.033)	Loss 2.3799 (2.4448)	Entropy 1.00360 (1.00496)	Top-1 acc 67.969 (65.247)	Top-5 acc 85.547 (84.678)	lr 0.00914
Train [71][1310/3239]	Time 0.255 (0.608)	Data Time 0.002 (0.033)	Loss 2.3269 (2.4449)	Entropy 1.00358 (1.00495)	Top-1 acc 70.703 (65.248)	Top-5 acc 86.719 (84.683)	lr 0.00914
Train [71][1320/3239]	Time 0.246 (0.607)	Data Time 0.001 (0.033)	Loss 2.4963 (2.4450)	Entropy 1.00357 (1.00494)	Top-1 acc 65.234 (65.248)	Top-5 acc 83.984 (84.683)	lr 0.00914
Train [71][1330/3239]	Time 2.653 (0.606)	Data Time 0.001 (0.032)	Loss 2.4497 (2.4447)	Entropy 1.00357 (1.00493)	Top-1 acc 63.281 (65.255)	Top-5 acc 87.109 (84.687)	lr 0.00914
Train [71][1340/3239]	Time 0.257 (0.604)	Data Time 0.001 (0.032)	Loss 2.5452 (2.4448)	Entropy 1.00356 (1.00492)	Top-1 acc 60.547 (65.250)	Top-5 acc 83.984 (84.681)	lr 0.00913
Train [71][1350/3239]	Time 0.242 (0.603)	Data Time 0.001 (0.032)	Loss 2.4449 (2.4455)	Entropy 1.00350 (1.00491)	Top-1 acc 61.328 (65.228)	Top-5 acc 86.328 (84.674)	lr 0.00913
Train [71][1360/3239]	Time 0.264 (0.602)	Data Time 0.002 (0.032)	Loss 2.5208 (2.4458)	Entropy 1.00339 (1.00490)	Top-1 acc 62.891 (65.214)	Top-5 acc 83.984 (84.671)	lr 0.00913
Train [71][1370/3239]	Time 0.390 (0.637)	Data Time 0.004 (0.031)	Loss 2.4345 (2.4460)	Entropy 1.00337 (1.00489)	Top-1 acc 63.672 (65.201)	Top-5 acc 83.984 (84.666)	lr 0.00913
Train [71][1380/3239]	Time 0.328 (0.637)	Data Time 0.002 (0.031)	Loss 2.4350 (2.4457)	Entropy 1.00390 (1.00488)	Top-1 acc 67.188 (65.208)	Top-5 acc 85.156 (84.674)	lr 0.00913
Train [71][1390/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.031)	Loss 2.4794 (2.4453)	Entropy 1.00381 (1.00487)	Top-1 acc 62.500 (65.217)	Top-5 acc 83.984 (84.680)	lr 0.00913
Train [71][1400/3239]	Time 0.247 (0.635)	Data Time 0.001 (0.031)	Loss 2.4996 (2.4450)	Entropy 1.00380 (1.00486)	Top-1 acc 65.234 (65.227)	Top-5 acc 84.375 (84.689)	lr 0.00913
Train [71][1410/3239]	Time 0.226 (0.634)	Data Time 0.001 (0.031)	Loss 2.3606 (2.4450)	Entropy 1.00381 (1.00486)	Top-1 acc 67.578 (65.226)	Top-5 acc 86.328 (84.685)	lr 0.00913
Train [71][1420/3239]	Time 0.218 (0.633)	Data Time 0.001 (0.030)	Loss 2.2633 (2.4449)	Entropy 1.00375 (1.00485)	Top-1 acc 71.094 (65.225)	Top-5 acc 85.547 (84.689)	lr 0.00913
Train [71][1430/3239]	Time 0.269 (0.632)	Data Time 0.001 (0.030)	Loss 2.5164 (2.4448)	Entropy 1.00359 (1.00484)	Top-1 acc 68.359 (65.231)	Top-5 acc 85.938 (84.688)	lr 0.00913
Train [71][1440/3239]	Time 2.486 (0.631)	Data Time 0.001 (0.030)	Loss 2.4645 (2.4451)	Entropy 1.00359 (1.00483)	Top-1 acc 67.188 (65.229)	Top-5 acc 82.812 (84.682)	lr 0.00912
Train [71][1450/3239]	Time 0.296 (0.628)	Data Time 0.001 (0.030)	Loss 2.3082 (2.4455)	Entropy 1.00350 (1.00482)	Top-1 acc 67.578 (65.220)	Top-5 acc 87.500 (84.674)	lr 0.00912
Train [71][1460/3239]	Time 0.313 (0.627)	Data Time 0.001 (0.030)	Loss 2.4305 (2.4454)	Entropy 1.00346 (1.00481)	Top-1 acc 65.234 (65.224)	Top-5 acc 85.938 (84.678)	lr 0.00912
Train [71][1470/3239]	Time 0.334 (0.626)	Data Time 0.002 (0.029)	Loss 2.4788 (2.4456)	Entropy 1.00339 (1.00480)	Top-1 acc 61.719 (65.214)	Top-5 acc 84.766 (84.677)	lr 0.00912
Train [71][1480/3239]	Time 0.244 (0.625)	Data Time 0.001 (0.029)	Loss 2.3255 (2.4451)	Entropy 1.00334 (1.00479)	Top-1 acc 68.359 (65.226)	Top-5 acc 87.891 (84.687)	lr 0.00912
Train [71][1490/3239]	Time 0.262 (0.624)	Data Time 0.001 (0.029)	Loss 2.5628 (2.4451)	Entropy 1.00328 (1.00478)	Top-1 acc 62.891 (65.231)	Top-5 acc 83.203 (84.686)	lr 0.00912
Train [71][1500/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.029)	Loss 2.5075 (2.4452)	Entropy 1.00328 (1.00477)	Top-1 acc 67.188 (65.236)	Top-5 acc 83.203 (84.684)	lr 0.00912
Train [71][1510/3239]	Time 0.235 (0.622)	Data Time 0.001 (0.029)	Loss 2.4069 (2.4451)	Entropy 1.00326 (1.00476)	Top-1 acc 67.969 (65.238)	Top-5 acc 85.938 (84.685)	lr 0.00912
Train [71][1520/3239]	Time 0.252 (0.621)	Data Time 0.001 (0.028)	Loss 2.4478 (2.4452)	Entropy 1.00323 (1.00475)	Top-1 acc 66.406 (65.244)	Top-5 acc 82.422 (84.675)	lr 0.00912
Train [71][1530/3239]	Time 0.237 (0.621)	Data Time 0.001 (0.028)	Loss 2.6362 (2.4457)	Entropy 1.00314 (1.00474)	Top-1 acc 63.281 (65.234)	Top-5 acc 81.250 (84.668)	lr 0.00912
Train [71][1540/3239]	Time 0.240 (0.620)	Data Time 0.001 (0.028)	Loss 2.4809 (2.4458)	Entropy 1.00316 (1.00473)	Top-1 acc 66.797 (65.226)	Top-5 acc 85.156 (84.667)	lr 0.00911
Train [71][1550/3239]	Time 2.471 (0.619)	Data Time 0.001 (0.028)	Loss 2.5194 (2.4459)	Entropy 1.00316 (1.00472)	Top-1 acc 64.453 (65.227)	Top-5 acc 83.984 (84.662)	lr 0.00911
Train [71][1560/3239]	Time 0.347 (0.616)	Data Time 0.001 (0.028)	Loss 2.4770 (2.4461)	Entropy 1.00312 (1.00471)	Top-1 acc 67.188 (65.222)	Top-5 acc 83.203 (84.660)	lr 0.00911
Train [71][1570/3239]	Time 0.232 (0.615)	Data Time 0.001 (0.028)	Loss 2.4027 (2.4465)	Entropy 1.00307 (1.00470)	Top-1 acc 67.188 (65.209)	Top-5 acc 85.547 (84.652)	lr 0.00911
Train [71][1580/3239]	Time 0.228 (0.614)	Data Time 0.001 (0.027)	Loss 2.4227 (2.4465)	Entropy 1.00303 (1.00469)	Top-1 acc 65.234 (65.204)	Top-5 acc 84.375 (84.649)	lr 0.00911
Train [71][1590/3239]	Time 0.222 (0.613)	Data Time 0.001 (0.027)	Loss 2.3283 (2.4464)	Entropy 1.00295 (1.00468)	Top-1 acc 68.359 (65.209)	Top-5 acc 86.719 (84.649)	lr 0.00911
Train [71][1600/3239]	Time 0.276 (0.612)	Data Time 0.001 (0.027)	Loss 2.4944 (2.4465)	Entropy 1.00288 (1.00467)	Top-1 acc 60.156 (65.209)	Top-5 acc 83.594 (84.647)	lr 0.00911
Train [71][1610/3239]	Time 0.214 (0.612)	Data Time 0.001 (0.027)	Loss 2.5204 (2.4468)	Entropy 1.00285 (1.00466)	Top-1 acc 61.328 (65.202)	Top-5 acc 85.156 (84.650)	lr 0.00911
Train [71][1620/3239]	Time 0.225 (0.611)	Data Time 0.001 (0.027)	Loss 2.5038 (2.4470)	Entropy 1.00283 (1.00465)	Top-1 acc 64.844 (65.199)	Top-5 acc 82.812 (84.649)	lr 0.00911
Train [71][1630/3239]	Time 0.226 (0.610)	Data Time 0.001 (0.027)	Loss 2.5040 (2.4470)	Entropy 1.00283 (1.00464)	Top-1 acc 66.797 (65.207)	Top-5 acc 79.688 (84.647)	lr 0.00911
Train [71][1640/3239]	Time 0.288 (0.609)	Data Time 0.001 (0.027)	Loss 2.2606 (2.4469)	Entropy 1.00280 (1.00463)	Top-1 acc 67.969 (65.199)	Top-5 acc 87.109 (84.651)	lr 0.00911
Train [71][1650/3239]	Time 0.266 (0.608)	Data Time 0.001 (0.026)	Loss 2.5278 (2.4471)	Entropy 1.00278 (1.00461)	Top-1 acc 61.719 (65.193)	Top-5 acc 82.422 (84.650)	lr 0.00910
Train [71][1660/3239]	Time 2.535 (0.608)	Data Time 0.001 (0.026)	Loss 2.3461 (2.4469)	Entropy 1.00278 (1.00460)	Top-1 acc 70.312 (65.196)	Top-5 acc 89.062 (84.653)	lr 0.00910
Train [71][1670/3239]	Time 0.271 (0.606)	Data Time 0.002 (0.026)	Loss 2.5405 (2.4470)	Entropy 1.00279 (1.00459)	Top-1 acc 66.797 (65.196)	Top-5 acc 83.984 (84.652)	lr 0.00910
Train [71][1680/3239]	Time 0.230 (0.605)	Data Time 0.001 (0.026)	Loss 2.4805 (2.4473)	Entropy 1.00267 (1.00458)	Top-1 acc 66.797 (65.184)	Top-5 acc 83.984 (84.644)	lr 0.00910
Train [71][1690/3239]	Time 0.273 (0.604)	Data Time 0.001 (0.026)	Loss 2.3820 (2.4473)	Entropy 1.00264 (1.00457)	Top-1 acc 66.016 (65.187)	Top-5 acc 87.891 (84.649)	lr 0.00910
Train [71][1700/3239]	Time 0.236 (0.603)	Data Time 0.001 (0.026)	Loss 2.5288 (2.4473)	Entropy 1.00257 (1.00456)	Top-1 acc 63.672 (65.188)	Top-5 acc 83.203 (84.649)	lr 0.00910
Train [71][1710/3239]	Time 0.214 (0.602)	Data Time 0.001 (0.025)	Loss 2.3243 (2.4474)	Entropy 1.00256 (1.00455)	Top-1 acc 67.578 (65.181)	Top-5 acc 85.547 (84.642)	lr 0.00910
Train [71][1720/3239]	Time 0.241 (0.602)	Data Time 0.001 (0.025)	Loss 2.5894 (2.4480)	Entropy 1.00263 (1.00454)	Top-1 acc 61.719 (65.165)	Top-5 acc 83.984 (84.632)	lr 0.00910
Train [71][1730/3239]	Time 0.277 (0.630)	Data Time 0.004 (0.025)	Loss 2.3361 (2.4480)	Entropy 1.00263 (1.00452)	Top-1 acc 69.922 (65.168)	Top-5 acc 86.328 (84.630)	lr 0.00910
Train [71][1740/3239]	Time 0.235 (0.630)	Data Time 0.002 (0.025)	Loss 2.5616 (2.4482)	Entropy 1.00254 (1.00451)	Top-1 acc 65.234 (65.168)	Top-5 acc 82.031 (84.628)	lr 0.00910
Train [71][1750/3239]	Time 0.232 (0.629)	Data Time 0.002 (0.025)	Loss 2.3328 (2.4480)	Entropy 1.00250 (1.00450)	Top-1 acc 66.016 (65.171)	Top-5 acc 85.938 (84.631)	lr 0.00909
Train [71][1760/3239]	Time 0.260 (0.628)	Data Time 0.002 (0.025)	Loss 2.4172 (2.4480)	Entropy 1.00250 (1.00449)	Top-1 acc 67.969 (65.171)	Top-5 acc 82.031 (84.627)	lr 0.00909
Train [71][1770/3239]	Time 2.598 (0.628)	Data Time 0.001 (0.025)	Loss 2.3734 (2.4482)	Entropy 1.00250 (1.00448)	Top-1 acc 66.406 (65.168)	Top-5 acc 85.547 (84.625)	lr 0.00909
Train [71][1780/3239]	Time 0.350 (0.626)	Data Time 0.002 (0.025)	Loss 2.5204 (2.4481)	Entropy 1.00252 (1.00447)	Top-1 acc 61.328 (65.169)	Top-5 acc 81.641 (84.625)	lr 0.00909
Train [71][1790/3239]	Time 0.227 (0.625)	Data Time 0.001 (0.024)	Loss 2.5647 (2.4484)	Entropy 1.00250 (1.00446)	Top-1 acc 60.938 (65.163)	Top-5 acc 82.812 (84.621)	lr 0.00909
Train [71][1800/3239]	Time 0.226 (0.624)	Data Time 0.001 (0.024)	Loss 2.3445 (2.4483)	Entropy 1.00243 (1.00445)	Top-1 acc 69.141 (65.167)	Top-5 acc 87.109 (84.623)	lr 0.00909
Train [71][1810/3239]	Time 0.242 (0.623)	Data Time 0.001 (0.024)	Loss 2.4822 (2.4484)	Entropy 1.00241 (1.00443)	Top-1 acc 66.016 (65.161)	Top-5 acc 83.203 (84.620)	lr 0.00909
Train [71][1820/3239]	Time 0.217 (0.622)	Data Time 0.001 (0.024)	Loss 2.5036 (2.4482)	Entropy 1.00245 (1.00442)	Top-1 acc 64.062 (65.167)	Top-5 acc 85.938 (84.625)	lr 0.00909
Train [71][1830/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.024)	Loss 3.7960 (2.4489)	Entropy 1.00244 (1.00441)	Top-1 acc 36.328 (65.158)	Top-5 acc 63.281 (84.614)	lr 0.00909
Train [71][1840/3239]	Time 0.237 (0.621)	Data Time 0.001 (0.024)	Loss 2.5477 (2.4490)	Entropy 1.00241 (1.00440)	Top-1 acc 58.984 (65.157)	Top-5 acc 84.375 (84.616)	lr 0.00909
Train [71][1850/3239]	Time 0.214 (0.620)	Data Time 0.001 (0.024)	Loss 2.4716 (2.4493)	Entropy 1.00239 (1.00439)	Top-1 acc 61.328 (65.147)	Top-5 acc 82.422 (84.614)	lr 0.00908
Train [71][1860/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.024)	Loss 2.5385 (2.4491)	Entropy 1.00237 (1.00438)	Top-1 acc 64.062 (65.150)	Top-5 acc 80.078 (84.613)	lr 0.00908
Train [71][1870/3239]	Time 0.330 (0.618)	Data Time 0.001 (0.023)	Loss 2.4083 (2.4489)	Entropy 1.00233 (1.00437)	Top-1 acc 64.844 (65.157)	Top-5 acc 86.719 (84.617)	lr 0.00908
Train [71][1880/3239]	Time 2.494 (0.618)	Data Time 0.002 (0.023)	Loss 2.3566 (2.4490)	Entropy 1.00233 (1.00436)	Top-1 acc 65.625 (65.149)	Top-5 acc 86.328 (84.616)	lr 0.00908
Train [71][1890/3239]	Time 0.250 (0.616)	Data Time 0.001 (0.023)	Loss 2.3393 (2.4490)	Entropy 1.00232 (1.00435)	Top-1 acc 71.875 (65.152)	Top-5 acc 85.938 (84.616)	lr 0.00908
Train [71][1900/3239]	Time 0.304 (0.615)	Data Time 0.002 (0.023)	Loss 2.4229 (2.4492)	Entropy 1.00217 (1.00434)	Top-1 acc 66.016 (65.149)	Top-5 acc 84.766 (84.615)	lr 0.00908
Train [71][1910/3239]	Time 0.232 (0.614)	Data Time 0.001 (0.023)	Loss 2.4453 (2.4491)	Entropy 1.00211 (1.00433)	Top-1 acc 67.578 (65.156)	Top-5 acc 85.547 (84.617)	lr 0.00908
Train [71][1920/3239]	Time 0.329 (0.613)	Data Time 0.001 (0.023)	Loss 2.2753 (2.4489)	Entropy 1.00210 (1.00431)	Top-1 acc 70.703 (65.160)	Top-5 acc 87.891 (84.620)	lr 0.00908
Train [71][1930/3239]	Time 0.208 (0.613)	Data Time 0.001 (0.023)	Loss 2.3894 (2.4493)	Entropy 1.00207 (1.00430)	Top-1 acc 66.797 (65.149)	Top-5 acc 85.938 (84.614)	lr 0.00908
Train [71][1940/3239]	Time 0.227 (0.612)	Data Time 0.001 (0.023)	Loss 2.4775 (2.4493)	Entropy 1.00197 (1.00429)	Top-1 acc 60.547 (65.145)	Top-5 acc 84.766 (84.619)	lr 0.00908
Train [71][1950/3239]	Time 0.230 (0.611)	Data Time 0.001 (0.023)	Loss 2.3645 (2.4493)	Entropy 1.00194 (1.00428)	Top-1 acc 64.062 (65.138)	Top-5 acc 87.500 (84.619)	lr 0.00907
Train [71][1960/3239]	Time 0.251 (0.610)	Data Time 0.001 (0.022)	Loss 2.4681 (2.4493)	Entropy 1.00192 (1.00427)	Top-1 acc 58.594 (65.133)	Top-5 acc 87.109 (84.621)	lr 0.00907
Train [71][1970/3239]	Time 0.232 (0.610)	Data Time 0.001 (0.022)	Loss 2.5026 (2.4494)	Entropy 1.00196 (1.00425)	Top-1 acc 64.062 (65.128)	Top-5 acc 81.250 (84.615)	lr 0.00907
Train [71][1980/3239]	Time 0.254 (0.609)	Data Time 0.002 (0.022)	Loss 2.4872 (2.4492)	Entropy 1.00198 (1.00424)	Top-1 acc 62.500 (65.128)	Top-5 acc 83.984 (84.621)	lr 0.00907
Train [71][1990/3239]	Time 2.521 (0.608)	Data Time 0.001 (0.022)	Loss 2.3388 (2.4492)	Entropy 1.00198 (1.00423)	Top-1 acc 67.578 (65.127)	Top-5 acc 86.719 (84.624)	lr 0.00907
Train [71][2000/3239]	Time 0.226 (0.606)	Data Time 0.001 (0.022)	Loss 2.5124 (2.4493)	Entropy 1.00194 (1.00422)	Top-1 acc 61.719 (65.124)	Top-5 acc 83.594 (84.623)	lr 0.00907
Train [71][2010/3239]	Time 0.214 (0.606)	Data Time 0.001 (0.022)	Loss 2.5002 (2.4493)	Entropy 1.00194 (1.00421)	Top-1 acc 63.672 (65.127)	Top-5 acc 82.812 (84.625)	lr 0.00907
Train [71][2020/3239]	Time 0.213 (0.605)	Data Time 0.001 (0.022)	Loss 2.4898 (2.4491)	Entropy 1.00187 (1.00420)	Top-1 acc 64.453 (65.131)	Top-5 acc 80.859 (84.627)	lr 0.00907
Train [71][2030/3239]	Time 0.224 (0.604)	Data Time 0.001 (0.022)	Loss 2.3556 (2.4493)	Entropy 1.00182 (1.00419)	Top-1 acc 67.578 (65.131)	Top-5 acc 84.375 (84.622)	lr 0.00907
Train [71][2040/3239]	Time 0.234 (0.604)	Data Time 0.001 (0.022)	Loss 2.5253 (2.4491)	Entropy 1.00184 (1.00417)	Top-1 acc 60.547 (65.131)	Top-5 acc 83.984 (84.627)	lr 0.00907
Train [71][2050/3239]	Time 0.365 (0.603)	Data Time 0.001 (0.022)	Loss 2.4230 (2.4491)	Entropy 1.00184 (1.00416)	Top-1 acc 67.578 (65.137)	Top-5 acc 83.984 (84.628)	lr 0.00907
Train [71][2060/3239]	Time 0.211 (0.602)	Data Time 0.001 (0.021)	Loss 2.2821 (2.4490)	Entropy 1.00181 (1.00415)	Top-1 acc 67.578 (65.143)	Top-5 acc 88.672 (84.629)	lr 0.00906
Train [71][2070/3239]	Time 0.223 (0.602)	Data Time 0.001 (0.021)	Loss 2.4577 (2.4491)	Entropy 1.00182 (1.00414)	Top-1 acc 65.625 (65.141)	Top-5 acc 83.594 (84.627)	lr 0.00906
Train [71][2080/3239]	Time 0.223 (0.601)	Data Time 0.001 (0.021)	Loss 2.5130 (2.4491)	Entropy 1.00180 (1.00413)	Top-1 acc 63.672 (65.143)	Top-5 acc 85.156 (84.627)	lr 0.00906
Train [71][2090/3239]	Time 0.262 (0.628)	Data Time 0.003 (0.021)	Loss 2.3484 (2.4493)	Entropy 1.00175 (1.00412)	Top-1 acc 68.359 (65.137)	Top-5 acc 85.547 (84.624)	lr 0.00906
Train [71][2100/3239]	Time 2.535 (0.627)	Data Time 0.002 (0.021)	Loss 2.4299 (2.4491)	Entropy 1.00175 (1.00411)	Top-1 acc 65.234 (65.143)	Top-5 acc 85.156 (84.628)	lr 0.00906
Train [71][2110/3239]	Time 0.230 (0.625)	Data Time 0.002 (0.021)	Loss 2.4345 (2.4492)	Entropy 1.00171 (1.00410)	Top-1 acc 67.969 (65.140)	Top-5 acc 86.328 (84.625)	lr 0.00906
Train [71][2120/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.021)	Loss 2.4264 (2.4493)	Entropy 1.00167 (1.00408)	Top-1 acc 69.531 (65.145)	Top-5 acc 83.203 (84.626)	lr 0.00906
Train [71][2130/3239]	Time 0.230 (0.624)	Data Time 0.001 (0.021)	Loss 2.3694 (2.4495)	Entropy 1.00161 (1.00407)	Top-1 acc 66.016 (65.139)	Top-5 acc 85.938 (84.623)	lr 0.00906
Train [71][2140/3239]	Time 0.330 (0.623)	Data Time 0.001 (0.021)	Loss 2.4431 (2.4495)	Entropy 1.00160 (1.00406)	Top-1 acc 64.453 (65.139)	Top-5 acc 82.031 (84.622)	lr 0.00906
Train [71][2150/3239]	Time 0.220 (0.622)	Data Time 0.001 (0.021)	Loss 2.5280 (2.4497)	Entropy 1.00143 (1.00405)	Top-1 acc 58.984 (65.131)	Top-5 acc 83.203 (84.619)	lr 0.00906
Train [71][2160/3239]	Time 0.224 (0.622)	Data Time 0.001 (0.021)	Loss 2.3426 (2.4495)	Entropy 1.00136 (1.00404)	Top-1 acc 66.797 (65.134)	Top-5 acc 85.938 (84.621)	lr 0.00905
Train [71][2170/3239]	Time 0.227 (0.621)	Data Time 0.001 (0.020)	Loss 2.4879 (2.4495)	Entropy 1.00135 (1.00402)	Top-1 acc 64.844 (65.131)	Top-5 acc 83.594 (84.623)	lr 0.00905
Train [71][2180/3239]	Time 0.239 (0.620)	Data Time 0.001 (0.020)	Loss 2.6262 (2.4497)	Entropy 1.00138 (1.00401)	Top-1 acc 58.203 (65.129)	Top-5 acc 80.469 (84.617)	lr 0.00905
Train [71][2190/3239]	Time 0.244 (0.620)	Data Time 0.002 (0.020)	Loss 2.5053 (2.4495)	Entropy 1.00136 (1.00400)	Top-1 acc 62.109 (65.131)	Top-5 acc 83.984 (84.622)	lr 0.00905
Train [71][2200/3239]	Time 0.264 (0.619)	Data Time 0.001 (0.020)	Loss 2.5373 (2.4496)	Entropy 1.00138 (1.00399)	Top-1 acc 59.375 (65.128)	Top-5 acc 85.156 (84.622)	lr 0.00905
Train [71][2210/3239]	Time 2.496 (0.619)	Data Time 0.001 (0.020)	Loss 2.4496 (2.4496)	Entropy 1.00138 (1.00398)	Top-1 acc 66.406 (65.127)	Top-5 acc 84.766 (84.625)	lr 0.00905
Train [71][2220/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.020)	Loss 2.4634 (2.4496)	Entropy 1.00133 (1.00396)	Top-1 acc 63.281 (65.121)	Top-5 acc 84.375 (84.624)	lr 0.00905
Train [71][2230/3239]	Time 0.409 (0.616)	Data Time 0.001 (0.020)	Loss 2.6011 (2.4497)	Entropy 1.00131 (1.00395)	Top-1 acc 63.672 (65.124)	Top-5 acc 80.469 (84.622)	lr 0.00905
Train [71][2240/3239]	Time 0.237 (0.616)	Data Time 0.001 (0.020)	Loss 2.5526 (2.4500)	Entropy 1.00132 (1.00394)	Top-1 acc 65.234 (65.115)	Top-5 acc 80.859 (84.614)	lr 0.00905
Train [71][2250/3239]	Time 0.215 (0.615)	Data Time 0.001 (0.020)	Loss 2.5518 (2.4501)	Entropy 1.00129 (1.00393)	Top-1 acc 61.719 (65.113)	Top-5 acc 82.422 (84.610)	lr 0.00905
Train [71][2260/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.020)	Loss 2.5199 (2.4502)	Entropy 1.00122 (1.00392)	Top-1 acc 65.234 (65.110)	Top-5 acc 82.422 (84.611)	lr 0.00904
Train [71][2270/3239]	Time 0.291 (0.614)	Data Time 0.003 (0.020)	Loss 2.5115 (2.4501)	Entropy 1.00118 (1.00390)	Top-1 acc 65.234 (65.111)	Top-5 acc 82.812 (84.613)	lr 0.00904
Train [71][2280/3239]	Time 0.211 (0.613)	Data Time 0.001 (0.020)	Loss 2.5849 (2.4501)	Entropy 1.00117 (1.00389)	Top-1 acc 64.062 (65.116)	Top-5 acc 79.297 (84.610)	lr 0.00904
Train [71][2290/3239]	Time 0.225 (0.613)	Data Time 0.001 (0.019)	Loss 2.5267 (2.4503)	Entropy 1.00118 (1.00388)	Top-1 acc 65.234 (65.105)	Top-5 acc 82.422 (84.606)	lr 0.00904
Train [71][2300/3239]	Time 0.229 (0.612)	Data Time 0.001 (0.019)	Loss 2.4400 (2.4502)	Entropy 1.00114 (1.00387)	Top-1 acc 62.891 (65.108)	Top-5 acc 82.812 (84.608)	lr 0.00904
Train [71][2310/3239]	Time 0.261 (0.612)	Data Time 0.001 (0.019)	Loss 2.5615 (2.4504)	Entropy 1.00109 (1.00386)	Top-1 acc 64.844 (65.106)	Top-5 acc 81.641 (84.605)	lr 0.00904
Train [71][2320/3239]	Time 2.653 (0.611)	Data Time 0.001 (0.019)	Loss 2.5573 (2.4504)	Entropy 1.00109 (1.00385)	Top-1 acc 62.109 (65.107)	Top-5 acc 85.156 (84.606)	lr 0.00904
Train [71][2330/3239]	Time 0.233 (0.609)	Data Time 0.001 (0.019)	Loss 2.4434 (2.4503)	Entropy 1.00099 (1.00383)	Top-1 acc 68.750 (65.107)	Top-5 acc 86.719 (84.607)	lr 0.00904
Train [71][2340/3239]	Time 0.238 (0.609)	Data Time 0.001 (0.019)	Loss 2.4522 (2.4503)	Entropy 1.00101 (1.00382)	Top-1 acc 64.453 (65.108)	Top-5 acc 85.547 (84.608)	lr 0.00904
Train [71][2350/3239]	Time 0.231 (0.608)	Data Time 0.001 (0.019)	Loss 2.4020 (2.4504)	Entropy 1.00099 (1.00381)	Top-1 acc 67.188 (65.103)	Top-5 acc 87.891 (84.608)	lr 0.00904
Train [71][2360/3239]	Time 0.229 (0.608)	Data Time 0.001 (0.019)	Loss 2.5588 (2.4504)	Entropy 1.00098 (1.00380)	Top-1 acc 63.281 (65.104)	Top-5 acc 84.766 (84.608)	lr 0.00904
Train [71][2370/3239]	Time 0.244 (0.607)	Data Time 0.001 (0.019)	Loss 2.4122 (2.4504)	Entropy 1.00094 (1.00379)	Top-1 acc 64.844 (65.106)	Top-5 acc 87.109 (84.609)	lr 0.00903
Train [71][2380/3239]	Time 0.222 (0.607)	Data Time 0.001 (0.019)	Loss 2.3852 (2.4506)	Entropy 1.00089 (1.00377)	Top-1 acc 67.188 (65.104)	Top-5 acc 87.109 (84.607)	lr 0.00903
Train [71][2390/3239]	Time 0.203 (0.606)	Data Time 0.001 (0.019)	Loss 2.5072 (2.4506)	Entropy 1.00076 (1.00376)	Top-1 acc 60.547 (65.099)	Top-5 acc 82.812 (84.608)	lr 0.00903
Train [71][2400/3239]	Time 0.237 (0.605)	Data Time 0.001 (0.019)	Loss 2.4121 (2.4506)	Entropy 1.00075 (1.00375)	Top-1 acc 67.188 (65.104)	Top-5 acc 83.594 (84.609)	lr 0.00903
Train [71][2410/3239]	Time 0.354 (0.605)	Data Time 0.002 (0.019)	Loss 2.5561 (2.4506)	Entropy 1.00078 (1.00374)	Top-1 acc 62.500 (65.103)	Top-5 acc 81.641 (84.610)	lr 0.00903
Train [71][2420/3239]	Time 0.282 (0.604)	Data Time 0.001 (0.019)	Loss 2.4919 (2.4504)	Entropy 1.00071 (1.00372)	Top-1 acc 64.062 (65.107)	Top-5 acc 85.156 (84.609)	lr 0.00903
Train [71][2430/3239]	Time 2.433 (0.604)	Data Time 0.001 (0.018)	Loss 2.4866 (2.4504)	Entropy 1.00071 (1.00371)	Top-1 acc 64.844 (65.109)	Top-5 acc 82.812 (84.607)	lr 0.00903
Train [71][2440/3239]	Time 0.228 (0.602)	Data Time 0.001 (0.018)	Loss 2.3494 (2.4504)	Entropy 1.00074 (1.00370)	Top-1 acc 66.406 (65.110)	Top-5 acc 86.328 (84.607)	lr 0.00903
Train [71][2450/3239]	Time 0.227 (0.602)	Data Time 0.001 (0.018)	Loss 2.4494 (2.4502)	Entropy 1.00069 (1.00369)	Top-1 acc 64.844 (65.113)	Top-5 acc 83.984 (84.610)	lr 0.00903
Train [71][2460/3239]	Time 0.277 (0.624)	Data Time 0.002 (0.018)	Loss 2.5495 (2.4504)	Entropy 1.00065 (1.00367)	Top-1 acc 60.156 (65.110)	Top-5 acc 85.156 (84.611)	lr 0.00903
Train [71][2470/3239]	Time 0.232 (0.623)	Data Time 0.002 (0.018)	Loss 2.2125 (2.4501)	Entropy 1.00064 (1.00366)	Top-1 acc 73.828 (65.119)	Top-5 acc 89.844 (84.615)	lr 0.00902
Train [71][2480/3239]	Time 0.251 (0.623)	Data Time 0.001 (0.018)	Loss 2.4650 (2.4501)	Entropy 1.00066 (1.00365)	Top-1 acc 69.531 (65.121)	Top-5 acc 82.812 (84.615)	lr 0.00902
Train [71][2490/3239]	Time 0.234 (0.622)	Data Time 0.001 (0.018)	Loss 2.4352 (2.4502)	Entropy 1.00064 (1.00364)	Top-1 acc 67.188 (65.120)	Top-5 acc 83.984 (84.615)	lr 0.00902
Train [71][2500/3239]	Time 0.348 (0.622)	Data Time 0.001 (0.018)	Loss 2.3837 (2.4502)	Entropy 1.00060 (1.00363)	Top-1 acc 67.578 (65.118)	Top-5 acc 86.719 (84.614)	lr 0.00902
Train [71][2510/3239]	Time 0.240 (0.621)	Data Time 0.001 (0.018)	Loss 2.6442 (2.4505)	Entropy 1.00062 (1.00361)	Top-1 acc 63.281 (65.107)	Top-5 acc 81.641 (84.608)	lr 0.00902
Train [71][2520/3239]	Time 0.219 (0.620)	Data Time 0.001 (0.018)	Loss 2.3368 (2.4504)	Entropy 1.00048 (1.00360)	Top-1 acc 64.844 (65.107)	Top-5 acc 85.938 (84.609)	lr 0.00902
Train [71][2530/3239]	Time 0.279 (0.620)	Data Time 0.001 (0.018)	Loss 2.5190 (2.4506)	Entropy 1.00030 (1.00359)	Top-1 acc 63.281 (65.106)	Top-5 acc 83.203 (84.608)	lr 0.00902
Train [71][2540/3239]	Time 2.554 (0.619)	Data Time 0.001 (0.018)	Loss 2.3688 (2.4507)	Entropy 1.00030 (1.00358)	Top-1 acc 67.578 (65.105)	Top-5 acc 87.109 (84.606)	lr 0.00902
Train [71][2550/3239]	Time 0.234 (0.618)	Data Time 0.001 (0.018)	Loss 2.4222 (2.4507)	Entropy 1.00025 (1.00356)	Top-1 acc 62.891 (65.108)	Top-5 acc 86.328 (84.607)	lr 0.00902
Train [71][2560/3239]	Time 0.232 (0.617)	Data Time 0.001 (0.018)	Loss 2.5504 (2.4507)	Entropy 1.00027 (1.00355)	Top-1 acc 62.891 (65.109)	Top-5 acc 82.812 (84.606)	lr 0.00902
Train [71][2570/3239]	Time 0.251 (0.617)	Data Time 0.001 (0.018)	Loss 2.5003 (2.4508)	Entropy 1.00030 (1.00354)	Top-1 acc 62.500 (65.105)	Top-5 acc 85.156 (84.605)	lr 0.00901
Train [71][2580/3239]	Time 0.237 (0.616)	Data Time 0.001 (0.017)	Loss 2.5977 (2.4508)	Entropy 1.00030 (1.00353)	Top-1 acc 59.766 (65.105)	Top-5 acc 82.031 (84.607)	lr 0.00901
Train [71][2590/3239]	Time 0.370 (0.616)	Data Time 0.001 (0.017)	Loss 2.3911 (2.4505)	Entropy 1.00034 (1.00351)	Top-1 acc 65.234 (65.110)	Top-5 acc 85.938 (84.612)	lr 0.00901
Train [71][2600/3239]	Time 0.225 (0.615)	Data Time 0.001 (0.017)	Loss 2.4900 (2.4503)	Entropy 1.00028 (1.00350)	Top-1 acc 62.891 (65.114)	Top-5 acc 82.812 (84.613)	lr 0.00901
Train [71][2610/3239]	Time 0.269 (0.615)	Data Time 0.001 (0.017)	Loss 2.3683 (2.4503)	Entropy 1.00027 (1.00349)	Top-1 acc 67.188 (65.112)	Top-5 acc 87.891 (84.614)	lr 0.00901
Train [71][2620/3239]	Time 0.236 (0.614)	Data Time 0.001 (0.017)	Loss 2.4972 (2.4502)	Entropy 1.00024 (1.00348)	Top-1 acc 66.406 (65.112)	Top-5 acc 83.594 (84.616)	lr 0.00901
Train [71][2630/3239]	Time 0.218 (0.614)	Data Time 0.001 (0.017)	Loss 2.5700 (2.4503)	Entropy 1.00022 (1.00346)	Top-1 acc 64.453 (65.114)	Top-5 acc 81.250 (84.614)	lr 0.00901
Train [71][2640/3239]	Time 0.217 (0.613)	Data Time 0.001 (0.017)	Loss 2.6856 (2.4506)	Entropy 1.00017 (1.00345)	Top-1 acc 58.594 (65.107)	Top-5 acc 80.469 (84.607)	lr 0.00901
Train [71][2650/3239]	Time 0.238 (0.613)	Data Time 0.001 (0.017)	Loss 2.6223 (2.4507)	Entropy 1.00005 (1.00344)	Top-1 acc 60.156 (65.106)	Top-5 acc 80.859 (84.605)	lr 0.00901
Train [71][2660/3239]	Time 0.226 (0.612)	Data Time 0.001 (0.017)	Loss 2.5512 (2.4506)	Entropy 1.00003 (1.00343)	Top-1 acc 61.719 (65.108)	Top-5 acc 82.031 (84.604)	lr 0.00901
Train [71][2670/3239]	Time 0.265 (0.612)	Data Time 0.001 (0.017)	Loss 2.2935 (2.4507)	Entropy 0.99998 (1.00341)	Top-1 acc 73.047 (65.106)	Top-5 acc 87.500 (84.604)	lr 0.00901
Train [71][2680/3239]	Time 0.235 (0.611)	Data Time 0.001 (0.017)	Loss 2.5896 (2.4509)	Entropy 0.99981 (1.00340)	Top-1 acc 62.891 (65.104)	Top-5 acc 82.422 (84.601)	lr 0.00900
Train [71][2690/3239]	Time 0.263 (0.611)	Data Time 0.001 (0.017)	Loss 2.4676 (2.4510)	Entropy 0.99981 (1.00339)	Top-1 acc 65.234 (65.100)	Top-5 acc 83.594 (84.599)	lr 0.00900
Train [71][2700/3239]	Time 0.232 (0.610)	Data Time 0.001 (0.017)	Loss 2.5227 (2.4512)	Entropy 0.99983 (1.00337)	Top-1 acc 64.062 (65.097)	Top-5 acc 81.641 (84.600)	lr 0.00900
Train [71][2710/3239]	Time 0.268 (0.610)	Data Time 0.001 (0.017)	Loss 2.3462 (2.4512)	Entropy 0.99979 (1.00336)	Top-1 acc 65.234 (65.094)	Top-5 acc 87.109 (84.601)	lr 0.00900
Train [71][2720/3239]	Time 0.398 (0.609)	Data Time 0.001 (0.017)	Loss 2.3665 (2.4513)	Entropy 0.99988 (1.00335)	Top-1 acc 65.625 (65.094)	Top-5 acc 86.328 (84.600)	lr 0.00900
Train [71][2730/3239]	Time 0.237 (0.609)	Data Time 0.001 (0.017)	Loss 2.5244 (2.4512)	Entropy 0.99983 (1.00334)	Top-1 acc 64.453 (65.095)	Top-5 acc 84.766 (84.598)	lr 0.00900
Train [71][2740/3239]	Time 0.253 (0.608)	Data Time 0.001 (0.017)	Loss 2.5075 (2.4512)	Entropy 0.99981 (1.00332)	Top-1 acc 62.891 (65.095)	Top-5 acc 83.984 (84.600)	lr 0.00900
Train [71][2750/3239]	Time 0.242 (0.608)	Data Time 0.001 (0.016)	Loss 2.3771 (2.4511)	Entropy 0.99960 (1.00331)	Top-1 acc 65.625 (65.096)	Top-5 acc 84.375 (84.602)	lr 0.00900
Train [71][2760/3239]	Time 0.233 (0.607)	Data Time 0.001 (0.016)	Loss 2.5120 (2.4512)	Entropy 0.99962 (1.00330)	Top-1 acc 62.891 (65.088)	Top-5 acc 83.984 (84.601)	lr 0.00900
Train [71][2770/3239]	Time 0.202 (0.607)	Data Time 0.001 (0.016)	Loss 2.4754 (2.4510)	Entropy 0.99961 (1.00328)	Top-1 acc 61.719 (65.092)	Top-5 acc 83.203 (84.607)	lr 0.00900
Train [71][2780/3239]	Time 0.206 (0.606)	Data Time 0.001 (0.016)	Loss 2.5116 (2.4512)	Entropy 0.99957 (1.00327)	Top-1 acc 62.500 (65.087)	Top-5 acc 85.156 (84.604)	lr 0.00899
Train [71][2790/3239]	Time 0.243 (0.606)	Data Time 0.001 (0.016)	Loss 2.4366 (2.4513)	Entropy 0.99949 (1.00326)	Top-1 acc 66.797 (65.087)	Top-5 acc 83.984 (84.603)	lr 0.00899
Train [71][2800/3239]	Time 0.313 (0.624)	Data Time 0.005 (0.016)	Loss 2.5040 (2.4513)	Entropy 0.99946 (1.00324)	Top-1 acc 62.109 (65.087)	Top-5 acc 86.328 (84.601)	lr 0.00899
Train [71][2810/3239]	Time 0.358 (0.624)	Data Time 0.002 (0.016)	Loss 2.5192 (2.4513)	Entropy 0.99942 (1.00323)	Top-1 acc 65.234 (65.086)	Top-5 acc 82.031 (84.601)	lr 0.00899
Train [71][2820/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.016)	Loss 2.4332 (2.4512)	Entropy 0.99937 (1.00322)	Top-1 acc 65.234 (65.085)	Top-5 acc 84.766 (84.602)	lr 0.00899
Train [71][2830/3239]	Time 0.262 (0.623)	Data Time 0.002 (0.016)	Loss 2.5051 (2.4513)	Entropy 0.99934 (1.00320)	Top-1 acc 62.500 (65.086)	Top-5 acc 81.641 (84.600)	lr 0.00899
Train [71][2840/3239]	Time 0.258 (0.622)	Data Time 0.001 (0.016)	Loss 2.7004 (2.4513)	Entropy 0.99929 (1.00319)	Top-1 acc 57.812 (65.082)	Top-5 acc 80.469 (84.601)	lr 0.00899
Train [71][2850/3239]	Time 0.243 (0.622)	Data Time 0.001 (0.016)	Loss 2.5186 (2.4514)	Entropy 0.99922 (1.00318)	Top-1 acc 64.844 (65.079)	Top-5 acc 82.422 (84.598)	lr 0.00899
Train [71][2860/3239]	Time 0.238 (0.621)	Data Time 0.001 (0.016)	Loss 2.4372 (2.4514)	Entropy 0.99920 (1.00316)	Top-1 acc 63.672 (65.077)	Top-5 acc 85.156 (84.598)	lr 0.00899
Train [71][2870/3239]	Time 0.220 (0.621)	Data Time 0.001 (0.016)	Loss 2.4033 (2.4515)	Entropy 0.99911 (1.00315)	Top-1 acc 66.016 (65.076)	Top-5 acc 85.938 (84.595)	lr 0.00899
Train [71][2880/3239]	Time 0.226 (0.620)	Data Time 0.001 (0.016)	Loss 2.4553 (2.4516)	Entropy 0.99907 (1.00313)	Top-1 acc 64.453 (65.073)	Top-5 acc 83.203 (84.596)	lr 0.00898
Train [71][2890/3239]	Time 0.286 (0.620)	Data Time 0.001 (0.016)	Loss 2.3882 (2.4515)	Entropy 0.99907 (1.00312)	Top-1 acc 69.141 (65.075)	Top-5 acc 84.766 (84.596)	lr 0.00898
Train [71][2900/3239]	Time 0.382 (0.619)	Data Time 0.001 (0.016)	Loss 2.3539 (2.4516)	Entropy 0.99899 (1.00311)	Top-1 acc 67.578 (65.073)	Top-5 acc 87.109 (84.595)	lr 0.00898
Train [71][2910/3239]	Time 0.236 (0.619)	Data Time 0.001 (0.016)	Loss 2.3517 (2.4515)	Entropy 0.99883 (1.00309)	Top-1 acc 71.484 (65.077)	Top-5 acc 85.156 (84.596)	lr 0.00898
Train [71][2920/3239]	Time 0.232 (0.618)	Data Time 0.001 (0.016)	Loss 2.4590 (2.4515)	Entropy 0.99882 (1.00308)	Top-1 acc 67.969 (65.077)	Top-5 acc 85.547 (84.597)	lr 0.00898
Train [71][2930/3239]	Time 0.294 (0.618)	Data Time 0.001 (0.016)	Loss 2.4965 (2.4516)	Entropy 0.99881 (1.00306)	Top-1 acc 64.844 (65.071)	Top-5 acc 85.938 (84.595)	lr 0.00898
Train [71][2940/3239]	Time 0.272 (0.617)	Data Time 0.001 (0.016)	Loss 2.4856 (2.4516)	Entropy 0.99881 (1.00305)	Top-1 acc 62.500 (65.068)	Top-5 acc 83.984 (84.596)	lr 0.00898
Train [71][2950/3239]	Time 0.270 (0.617)	Data Time 0.001 (0.015)	Loss 2.5428 (2.4517)	Entropy 0.99876 (1.00303)	Top-1 acc 63.672 (65.071)	Top-5 acc 82.422 (84.594)	lr 0.00898
Train [71][2960/3239]	Time 0.295 (0.616)	Data Time 0.001 (0.015)	Loss 2.6972 (2.4518)	Entropy 0.99875 (1.00302)	Top-1 acc 61.719 (65.071)	Top-5 acc 78.125 (84.592)	lr 0.00898
Train [71][2970/3239]	Time 0.226 (0.616)	Data Time 0.001 (0.015)	Loss 2.4740 (2.4518)	Entropy 0.99878 (1.00300)	Top-1 acc 63.281 (65.070)	Top-5 acc 83.203 (84.592)	lr 0.00898
Train [71][2980/3239]	Time 0.309 (0.615)	Data Time 0.001 (0.015)	Loss 2.5389 (2.4518)	Entropy 0.99878 (1.00299)	Top-1 acc 61.328 (65.068)	Top-5 acc 81.641 (84.590)	lr 0.00897
Train [71][2990/3239]	Time 0.329 (0.615)	Data Time 0.001 (0.015)	Loss 2.5657 (2.4519)	Entropy 0.99869 (1.00298)	Top-1 acc 61.328 (65.064)	Top-5 acc 80.859 (84.589)	lr 0.00897
Train [71][3000/3239]	Time 0.254 (0.614)	Data Time 0.001 (0.015)	Loss 3.9081 (2.4525)	Entropy 0.99859 (1.00296)	Top-1 acc 40.234 (65.050)	Top-5 acc 65.625 (84.578)	lr 0.00897
Train [71][3010/3239]	Time 0.220 (0.614)	Data Time 0.001 (0.015)	Loss 2.5176 (2.4528)	Entropy 0.99857 (1.00295)	Top-1 acc 58.594 (65.042)	Top-5 acc 83.203 (84.577)	lr 0.00897
Train [71][3020/3239]	Time 0.249 (0.613)	Data Time 0.001 (0.015)	Loss 2.4681 (2.4528)	Entropy 0.99854 (1.00293)	Top-1 acc 66.016 (65.040)	Top-5 acc 83.984 (84.579)	lr 0.00897
Train [71][3030/3239]	Time 0.232 (0.613)	Data Time 0.001 (0.015)	Loss 2.3509 (2.4528)	Entropy 0.99854 (1.00292)	Top-1 acc 65.234 (65.036)	Top-5 acc 88.281 (84.579)	lr 0.00897
Train [71][3040/3239]	Time 0.258 (0.612)	Data Time 0.001 (0.015)	Loss 2.3266 (2.4527)	Entropy 0.99854 (1.00290)	Top-1 acc 67.969 (65.037)	Top-5 acc 88.672 (84.582)	lr 0.00897
Train [71][3050/3239]	Time 0.221 (0.612)	Data Time 0.001 (0.015)	Loss 2.3897 (2.4527)	Entropy 0.99841 (1.00289)	Top-1 acc 65.234 (65.037)	Top-5 acc 85.156 (84.582)	lr 0.00897
Train [71][3060/3239]	Time 0.217 (0.611)	Data Time 0.001 (0.015)	Loss 2.4158 (2.4527)	Entropy 0.99828 (1.00288)	Top-1 acc 65.234 (65.037)	Top-5 acc 83.984 (84.583)	lr 0.00897
Train [71][3070/3239]	Time 0.245 (0.611)	Data Time 0.001 (0.015)	Loss 2.5989 (2.4527)	Entropy 0.99826 (1.00286)	Top-1 acc 63.281 (65.036)	Top-5 acc 80.859 (84.581)	lr 0.00897
Train [71][3080/3239]	Time 0.327 (0.611)	Data Time 0.001 (0.015)	Loss 2.4331 (2.4526)	Entropy 0.99824 (1.00285)	Top-1 acc 66.406 (65.036)	Top-5 acc 86.719 (84.584)	lr 0.00897
Train [71][3090/3239]	Time 0.274 (0.610)	Data Time 0.001 (0.015)	Loss 2.4805 (2.4526)	Entropy 0.99819 (1.00283)	Top-1 acc 65.625 (65.039)	Top-5 acc 86.328 (84.582)	lr 0.00896
Train [71][3100/3239]	Time 0.262 (0.610)	Data Time 0.001 (0.015)	Loss 2.3703 (2.4526)	Entropy 0.99814 (1.00282)	Top-1 acc 65.234 (65.037)	Top-5 acc 85.156 (84.581)	lr 0.00896
Train [71][3110/3239]	Time 0.224 (0.609)	Data Time 0.001 (0.015)	Loss 2.3838 (2.4527)	Entropy 0.99811 (1.00280)	Top-1 acc 64.844 (65.036)	Top-5 acc 86.719 (84.581)	lr 0.00896
Train [71][3120/3239]	Time 0.286 (0.609)	Data Time 0.001 (0.015)	Loss 2.4664 (2.4527)	Entropy 0.99813 (1.00279)	Top-1 acc 65.234 (65.038)	Top-5 acc 83.984 (84.582)	lr 0.00896
Train [71][3130/3239]	Time 0.358 (0.626)	Data Time 0.004 (0.015)	Loss 2.4083 (2.4527)	Entropy 0.99807 (1.00277)	Top-1 acc 64.453 (65.039)	Top-5 acc 86.328 (84.583)	lr 0.00896
Train [71][3140/3239]	Time 0.229 (0.625)	Data Time 0.002 (0.015)	Loss 2.3806 (2.4529)	Entropy 0.99803 (1.00276)	Top-1 acc 66.016 (65.035)	Top-5 acc 85.547 (84.578)	lr 0.00896
Train [71][3150/3239]	Time 0.257 (0.625)	Data Time 0.001 (0.015)	Loss 2.3158 (2.4529)	Entropy 0.99801 (1.00274)	Top-1 acc 66.406 (65.034)	Top-5 acc 87.891 (84.578)	lr 0.00896
Train [71][3160/3239]	Time 0.223 (0.624)	Data Time 0.001 (0.015)	Loss 2.4254 (2.4531)	Entropy 0.99800 (1.00273)	Top-1 acc 67.578 (65.031)	Top-5 acc 83.594 (84.575)	lr 0.00896
Train [71][3170/3239]	Time 0.267 (0.624)	Data Time 0.001 (0.015)	Loss 2.2921 (2.4530)	Entropy 0.99798 (1.00271)	Top-1 acc 69.531 (65.031)	Top-5 acc 87.109 (84.576)	lr 0.00896
Train [71][3180/3239]	Time 0.251 (0.624)	Data Time 0.000 (0.014)	Loss 2.4063 (2.4530)	Entropy 0.99790 (1.00270)	Top-1 acc 67.969 (65.028)	Top-5 acc 85.938 (84.577)	lr 0.00896
Train [71][3190/3239]	Time 0.225 (0.623)	Data Time 0.000 (0.014)	Loss 2.2603 (2.4530)	Entropy 0.99790 (1.00268)	Top-1 acc 69.531 (65.028)	Top-5 acc 87.109 (84.576)	lr 0.00895
Train [71][3200/3239]	Time 0.221 (0.623)	Data Time 0.000 (0.014)	Loss 2.4028 (2.4531)	Entropy 0.99792 (1.00267)	Top-1 acc 64.453 (65.026)	Top-5 acc 83.984 (84.573)	lr 0.00895
Train [71][3210/3239]	Time 0.333 (0.622)	Data Time 0.000 (0.014)	Loss 2.5264 (2.4532)	Entropy 0.99789 (1.00265)	Top-1 acc 61.328 (65.026)	Top-5 acc 83.203 (84.571)	lr 0.00895
Train [71][3220/3239]	Time 0.223 (0.622)	Data Time 0.000 (0.014)	Loss 2.5791 (2.4533)	Entropy 0.99788 (1.00264)	Top-1 acc 61.328 (65.022)	Top-5 acc 83.203 (84.569)	lr 0.00895
Train [71][3230/3239]	Time 0.253 (0.621)	Data Time 0.000 (0.014)	Loss 2.4504 (2.4532)	Entropy 0.99786 (1.00262)	Top-1 acc 64.453 (65.021)	Top-5 acc 86.328 (84.572)	lr 0.00895
Train [71][3239/3239]	Time 2.321 (0.621)	Data Time 0.000 (0.014)	Loss 2.4235 (2.4532)	Entropy 0.99786 (1.00261)	Top-1 acc 66.667 (65.019)	Top-5 acc 86.420 (84.571)	lr 0.00895
==========Valid [71/120]	loss 1.368	top-1 acc 68.377 (68.599)	top-5 acc 87.878	Train top-1 65.019	top-5 84.571	Entropy 0.99786	Latency-None: 0.000ms	Flops: 546.53M
Train [72][0/3239]	Time 40.588 (40.588)	Data Time 37.991 (37.991)	Loss 2.5195 (2.5195)	Entropy 0.99769 (0.99769)	Top-1 acc 61.328 (61.328)	Top-5 acc 84.766 (84.766)	lr 0.00895
Train [72][10/3239]	Time 2.435 (4.364)	Data Time 0.001 (3.611)	Loss 2.5237 (2.4329)	Entropy 0.99769 (0.99769)	Top-1 acc 64.453 (65.661)	Top-5 acc 80.859 (84.339)	lr 0.00895
Train [72][20/3239]	Time 0.404 (2.416)	Data Time 0.001 (1.892)	Loss 2.3673 (2.4361)	Entropy 0.99765 (0.99767)	Top-1 acc 67.578 (65.606)	Top-5 acc 88.281 (84.617)	lr 0.00895
Train [72][30/3239]	Time 0.233 (1.794)	Data Time 0.001 (1.282)	Loss 2.4286 (2.4265)	Entropy 0.99766 (0.99767)	Top-1 acc 62.891 (65.814)	Top-5 acc 85.156 (84.879)	lr 0.00895
Train [72][40/3239]	Time 0.239 (1.472)	Data Time 0.001 (0.970)	Loss 2.4211 (2.4221)	Entropy 0.99757 (0.99765)	Top-1 acc 63.672 (65.920)	Top-5 acc 84.375 (84.870)	lr 0.00895
Train [72][50/3239]	Time 0.248 (1.275)	Data Time 0.001 (0.780)	Loss 2.4038 (2.4236)	Entropy 0.99760 (0.99764)	Top-1 acc 66.016 (65.847)	Top-5 acc 84.766 (84.835)	lr 0.00894
Train [72][60/3239]	Time 0.210 (1.142)	Data Time 0.001 (0.652)	Loss 2.4217 (2.4299)	Entropy 0.99760 (0.99763)	Top-1 acc 67.188 (65.779)	Top-5 acc 83.594 (84.753)	lr 0.00894
Train [72][70/3239]	Time 0.226 (1.047)	Data Time 0.001 (0.561)	Loss 2.2037 (2.4304)	Entropy 0.99758 (0.99762)	Top-1 acc 73.828 (65.779)	Top-5 acc 89.062 (84.766)	lr 0.00894
Train [72][80/3239]	Time 0.291 (0.975)	Data Time 0.002 (0.492)	Loss 2.3454 (2.4333)	Entropy 0.99757 (0.99762)	Top-1 acc 67.188 (65.664)	Top-5 acc 86.719 (84.713)	lr 0.00894
Train [72][90/3239]	Time 0.239 (0.921)	Data Time 0.001 (0.438)	Loss 2.3596 (2.4317)	Entropy 0.99754 (0.99761)	Top-1 acc 68.359 (65.724)	Top-5 acc 86.328 (84.744)	lr 0.00894
Train [72][100/3239]	Time 0.263 (0.879)	Data Time 0.001 (0.395)	Loss 2.4446 (2.4361)	Entropy 0.99741 (0.99760)	Top-1 acc 63.672 (65.664)	Top-5 acc 83.594 (84.607)	lr 0.00894
Train [72][110/3239]	Time 0.357 (0.843)	Data Time 0.001 (0.359)	Loss 2.3920 (2.4360)	Entropy 0.99733 (0.99758)	Top-1 acc 66.797 (65.688)	Top-5 acc 85.156 (84.593)	lr 0.00894
Train [72][120/3239]	Time 2.586 (0.814)	Data Time 0.001 (0.330)	Loss 2.2762 (2.4292)	Entropy 0.99733 (0.99756)	Top-1 acc 67.969 (65.819)	Top-5 acc 88.672 (84.753)	lr 0.00894
Train [72][130/3239]	Time 0.253 (0.771)	Data Time 0.001 (0.305)	Loss 2.3630 (2.4278)	Entropy 0.99732 (0.99755)	Top-1 acc 64.844 (65.884)	Top-5 acc 88.281 (84.819)	lr 0.00894
Train [72][140/3239]	Time 0.232 (0.751)	Data Time 0.001 (0.283)	Loss 2.3102 (2.4233)	Entropy 0.99726 (0.99753)	Top-1 acc 67.969 (66.041)	Top-5 acc 89.062 (84.924)	lr 0.00894
Train [72][150/3239]	Time 0.221 (0.734)	Data Time 0.001 (0.264)	Loss 2.3576 (2.4254)	Entropy 0.99722 (0.99751)	Top-1 acc 70.703 (65.910)	Top-5 acc 85.938 (84.877)	lr 0.00894
Train [72][160/3239]	Time 0.239 (0.718)	Data Time 0.001 (0.248)	Loss 2.3477 (2.4358)	Entropy 0.99721 (0.99749)	Top-1 acc 64.453 (65.669)	Top-5 acc 85.547 (84.758)	lr 0.00893
Train [72][170/3239]	Time 0.225 (0.704)	Data Time 0.001 (0.234)	Loss 2.3191 (2.4355)	Entropy 0.99714 (0.99747)	Top-1 acc 68.750 (65.684)	Top-5 acc 86.719 (84.738)	lr 0.00893
Train [72][180/3239]	Time 0.237 (0.691)	Data Time 0.002 (0.221)	Loss 2.4452 (2.4369)	Entropy 0.99704 (0.99745)	Top-1 acc 64.453 (65.597)	Top-5 acc 83.203 (84.718)	lr 0.00893
Train [72][190/3239]	Time 0.241 (0.680)	Data Time 0.001 (0.209)	Loss 2.3803 (2.4362)	Entropy 0.99706 (0.99743)	Top-1 acc 69.141 (65.619)	Top-5 acc 85.938 (84.715)	lr 0.00893
Train [72][200/3239]	Time 0.284 (0.670)	Data Time 0.001 (0.199)	Loss 2.5875 (2.4367)	Entropy 0.99700 (0.99741)	Top-1 acc 60.156 (65.590)	Top-5 acc 80.469 (84.715)	lr 0.00893
Train [72][210/3239]	Time 0.239 (0.661)	Data Time 0.001 (0.190)	Loss 2.5116 (2.4346)	Entropy 0.99695 (0.99739)	Top-1 acc 63.672 (65.642)	Top-5 acc 84.375 (84.788)	lr 0.00893
Train [72][220/3239]	Time 0.237 (0.652)	Data Time 0.003 (0.181)	Loss 2.3722 (2.4366)	Entropy 0.99691 (0.99737)	Top-1 acc 70.312 (65.595)	Top-5 acc 85.938 (84.769)	lr 0.00893
Train [72][230/3239]	Time 2.534 (0.645)	Data Time 0.004 (0.174)	Loss 2.4332 (2.4381)	Entropy 0.99691 (0.99735)	Top-1 acc 65.625 (65.569)	Top-5 acc 84.766 (84.733)	lr 0.00893
Train [72][240/3239]	Time 0.221 (0.628)	Data Time 0.001 (0.166)	Loss 2.5990 (2.4388)	Entropy 0.99691 (0.99733)	Top-1 acc 59.766 (65.520)	Top-5 acc 83.984 (84.707)	lr 0.00893
Train [72][250/3239]	Time 0.423 (0.831)	Data Time 0.003 (0.160)	Loss 2.4791 (2.4377)	Entropy 0.99685 (0.99731)	Top-1 acc 63.672 (65.544)	Top-5 acc 84.766 (84.714)	lr 0.00893
Train [72][260/3239]	Time 0.210 (0.820)	Data Time 0.002 (0.154)	Loss 2.4434 (2.4379)	Entropy 0.99675 (0.99729)	Top-1 acc 65.234 (65.546)	Top-5 acc 84.766 (84.706)	lr 0.00892
Train [72][270/3239]	Time 0.257 (0.808)	Data Time 0.002 (0.148)	Loss 2.4159 (2.4378)	Entropy 0.99672 (0.99727)	Top-1 acc 65.625 (65.533)	Top-5 acc 86.328 (84.727)	lr 0.00892
Train [72][280/3239]	Time 0.239 (0.796)	Data Time 0.001 (0.143)	Loss 2.3293 (2.4348)	Entropy 0.99661 (0.99725)	Top-1 acc 66.016 (65.600)	Top-5 acc 88.672 (84.786)	lr 0.00892
Train [72][290/3239]	Time 0.342 (0.786)	Data Time 0.001 (0.138)	Loss 2.4591 (2.4353)	Entropy 0.99652 (0.99722)	Top-1 acc 65.625 (65.561)	Top-5 acc 85.547 (84.801)	lr 0.00892
Train [72][300/3239]	Time 0.248 (0.775)	Data Time 0.001 (0.134)	Loss 2.3002 (2.4358)	Entropy 0.99654 (0.99720)	Top-1 acc 67.188 (65.558)	Top-5 acc 86.719 (84.792)	lr 0.00892
Train [72][310/3239]	Time 0.223 (0.765)	Data Time 0.001 (0.129)	Loss 2.2592 (2.4348)	Entropy 0.99651 (0.99718)	Top-1 acc 67.578 (65.550)	Top-5 acc 87.891 (84.820)	lr 0.00892
Train [72][320/3239]	Time 0.211 (0.756)	Data Time 0.001 (0.125)	Loss 2.4677 (2.4369)	Entropy 0.99653 (0.99716)	Top-1 acc 63.672 (65.483)	Top-5 acc 85.547 (84.802)	lr 0.00892
Train [72][330/3239]	Time 0.241 (0.748)	Data Time 0.002 (0.122)	Loss 2.3405 (2.4373)	Entropy 0.99649 (0.99714)	Top-1 acc 67.578 (65.472)	Top-5 acc 85.938 (84.796)	lr 0.00892
Train [72][340/3239]	Time 2.650 (0.740)	Data Time 0.002 (0.118)	Loss 2.4297 (2.4385)	Entropy 0.99649 (0.99712)	Top-1 acc 66.016 (65.455)	Top-5 acc 85.547 (84.783)	lr 0.00892
Train [72][350/3239]	Time 0.240 (0.726)	Data Time 0.002 (0.115)	Loss 2.4796 (2.4385)	Entropy 0.99640 (0.99710)	Top-1 acc 62.891 (65.447)	Top-5 acc 82.422 (84.785)	lr 0.00892
Train [72][360/3239]	Time 0.247 (0.719)	Data Time 0.001 (0.112)	Loss 2.3639 (2.4381)	Entropy 0.99635 (0.99708)	Top-1 acc 66.406 (65.418)	Top-5 acc 85.156 (84.803)	lr 0.00891
Train [72][370/3239]	Time 0.245 (0.713)	Data Time 0.001 (0.109)	Loss 2.4917 (2.4377)	Entropy 0.99630 (0.99706)	Top-1 acc 65.234 (65.431)	Top-5 acc 86.719 (84.808)	lr 0.00891
Train [72][380/3239]	Time 0.367 (0.707)	Data Time 0.001 (0.106)	Loss 2.2915 (2.4369)	Entropy 0.99644 (0.99704)	Top-1 acc 73.828 (65.447)	Top-5 acc 86.719 (84.832)	lr 0.00891
Train [72][390/3239]	Time 0.211 (0.701)	Data Time 0.001 (0.103)	Loss 2.4132 (2.4359)	Entropy 0.99639 (0.99703)	Top-1 acc 66.016 (65.468)	Top-5 acc 82.812 (84.836)	lr 0.00891
Train [72][400/3239]	Time 0.231 (0.695)	Data Time 0.001 (0.101)	Loss 2.3335 (2.4349)	Entropy 0.99643 (0.99701)	Top-1 acc 64.844 (65.454)	Top-5 acc 87.891 (84.860)	lr 0.00891
Train [72][410/3239]	Time 0.224 (0.690)	Data Time 0.001 (0.098)	Loss 2.4454 (2.4353)	Entropy 0.99641 (0.99700)	Top-1 acc 63.672 (65.428)	Top-5 acc 85.547 (84.863)	lr 0.00891
Train [72][420/3239]	Time 0.247 (0.685)	Data Time 0.001 (0.096)	Loss 2.5187 (2.4363)	Entropy 0.99641 (0.99698)	Top-1 acc 64.844 (65.411)	Top-5 acc 82.031 (84.829)	lr 0.00891
Train [72][430/3239]	Time 0.227 (0.680)	Data Time 0.001 (0.094)	Loss 2.5621 (2.4357)	Entropy 0.99638 (0.99697)	Top-1 acc 64.062 (65.428)	Top-5 acc 82.031 (84.835)	lr 0.00891
Train [72][440/3239]	Time 0.222 (0.674)	Data Time 0.001 (0.092)	Loss 2.3976 (2.4359)	Entropy 0.99637 (0.99696)	Top-1 acc 67.578 (65.426)	Top-5 acc 85.938 (84.844)	lr 0.00891
Train [72][450/3239]	Time 2.528 (0.670)	Data Time 0.001 (0.090)	Loss 2.3427 (2.4361)	Entropy 0.99637 (0.99694)	Top-1 acc 66.016 (65.408)	Top-5 acc 86.719 (84.837)	lr 0.00891
Train [72][460/3239]	Time 0.224 (0.660)	Data Time 0.001 (0.088)	Loss 2.4517 (2.4372)	Entropy 0.99629 (0.99693)	Top-1 acc 66.406 (65.400)	Top-5 acc 83.984 (84.826)	lr 0.00891
Train [72][470/3239]	Time 0.290 (0.657)	Data Time 0.001 (0.086)	Loss 2.3469 (2.4364)	Entropy 0.99627 (0.99691)	Top-1 acc 67.578 (65.425)	Top-5 acc 87.500 (84.852)	lr 0.00890
Train [72][480/3239]	Time 0.213 (0.652)	Data Time 0.001 (0.084)	Loss 2.2161 (2.4358)	Entropy 0.99624 (0.99690)	Top-1 acc 71.094 (65.466)	Top-5 acc 89.453 (84.856)	lr 0.00890
Train [72][490/3239]	Time 0.223 (0.649)	Data Time 0.001 (0.083)	Loss 2.4820 (2.4364)	Entropy 0.99634 (0.99689)	Top-1 acc 65.234 (65.453)	Top-5 acc 84.766 (84.852)	lr 0.00890
Train [72][500/3239]	Time 0.233 (0.645)	Data Time 0.001 (0.081)	Loss 2.5559 (2.4357)	Entropy 0.99631 (0.99688)	Top-1 acc 61.719 (65.462)	Top-5 acc 83.203 (84.872)	lr 0.00890
Train [72][510/3239]	Time 0.234 (0.641)	Data Time 0.001 (0.079)	Loss 2.4750 (2.4354)	Entropy 0.99633 (0.99687)	Top-1 acc 67.188 (65.461)	Top-5 acc 82.422 (84.875)	lr 0.00890
Train [72][520/3239]	Time 0.323 (0.639)	Data Time 0.001 (0.078)	Loss 2.5263 (2.4363)	Entropy 0.99630 (0.99686)	Top-1 acc 63.281 (65.428)	Top-5 acc 83.203 (84.860)	lr 0.00890
Train [72][530/3239]	Time 0.218 (0.635)	Data Time 0.001 (0.076)	Loss 2.4111 (2.4357)	Entropy 0.99629 (0.99684)	Top-1 acc 67.188 (65.434)	Top-5 acc 83.203 (84.853)	lr 0.00890
Train [72][540/3239]	Time 0.245 (0.632)	Data Time 0.001 (0.075)	Loss 2.4790 (2.4354)	Entropy 0.99628 (0.99683)	Top-1 acc 64.062 (65.446)	Top-5 acc 85.156 (84.869)	lr 0.00890
Train [72][550/3239]	Time 0.186 (0.629)	Data Time 0.001 (0.074)	Loss 2.3322 (2.4346)	Entropy 0.99593 (0.99682)	Top-1 acc 68.750 (65.474)	Top-5 acc 84.766 (84.878)	lr 0.00890
Train [72][560/3239]	Time 2.523 (0.626)	Data Time 0.002 (0.073)	Loss 2.4845 (2.4350)	Entropy 0.99593 (0.99681)	Top-1 acc 64.844 (65.469)	Top-5 acc 85.938 (84.869)	lr 0.00890
Train [72][570/3239]	Time 0.237 (0.620)	Data Time 0.001 (0.071)	Loss 2.4213 (2.4350)	Entropy 0.99594 (0.99679)	Top-1 acc 61.328 (65.457)	Top-5 acc 87.891 (84.867)	lr 0.00889
Train [72][580/3239]	Time 0.209 (0.617)	Data Time 0.001 (0.070)	Loss 2.5437 (2.4351)	Entropy 0.99590 (0.99678)	Top-1 acc 60.938 (65.458)	Top-5 acc 84.375 (84.865)	lr 0.00889
Train [72][590/3239]	Time 0.229 (0.614)	Data Time 0.001 (0.069)	Loss 2.3689 (2.4359)	Entropy 0.99584 (0.99676)	Top-1 acc 66.797 (65.441)	Top-5 acc 87.109 (84.851)	lr 0.00889
Train [72][600/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.068)	Loss 2.3816 (2.4366)	Entropy 0.99587 (0.99675)	Top-1 acc 67.969 (65.424)	Top-5 acc 86.719 (84.831)	lr 0.00889
Train [72][610/3239]	Time 0.442 (0.693)	Data Time 0.004 (0.067)	Loss 2.5500 (2.4369)	Entropy 0.99590 (0.99673)	Top-1 acc 63.672 (65.405)	Top-5 acc 81.641 (84.828)	lr 0.00889
Train [72][620/3239]	Time 0.240 (0.692)	Data Time 0.002 (0.066)	Loss 2.3665 (2.4366)	Entropy 0.99589 (0.99672)	Top-1 acc 64.844 (65.410)	Top-5 acc 86.328 (84.840)	lr 0.00889
Train [72][630/3239]	Time 0.257 (0.689)	Data Time 0.002 (0.065)	Loss 2.3982 (2.4365)	Entropy 0.99590 (0.99671)	Top-1 acc 64.062 (65.399)	Top-5 acc 85.156 (84.849)	lr 0.00889
Train [72][640/3239]	Time 0.228 (0.685)	Data Time 0.002 (0.064)	Loss 2.5573 (2.4367)	Entropy 0.99586 (0.99669)	Top-1 acc 58.984 (65.389)	Top-5 acc 81.641 (84.851)	lr 0.00889
Train [72][650/3239]	Time 0.306 (0.682)	Data Time 0.001 (0.063)	Loss 2.5800 (2.4373)	Entropy 0.99583 (0.99668)	Top-1 acc 61.328 (65.380)	Top-5 acc 82.031 (84.833)	lr 0.00889
Train [72][660/3239]	Time 0.223 (0.679)	Data Time 0.001 (0.062)	Loss 2.5921 (2.4376)	Entropy 0.99561 (0.99667)	Top-1 acc 60.156 (65.371)	Top-5 acc 81.641 (84.831)	lr 0.00889
Train [72][670/3239]	Time 2.527 (0.676)	Data Time 0.001 (0.061)	Loss 2.5275 (2.4377)	Entropy 0.99561 (0.99665)	Top-1 acc 63.672 (65.369)	Top-5 acc 82.812 (84.826)	lr 0.00888
Train [72][680/3239]	Time 0.236 (0.669)	Data Time 0.001 (0.060)	Loss 2.3923 (2.4379)	Entropy 0.99561 (0.99664)	Top-1 acc 68.359 (65.359)	Top-5 acc 83.203 (84.814)	lr 0.00888
Train [72][690/3239]	Time 0.230 (0.666)	Data Time 0.001 (0.059)	Loss 2.5178 (2.4382)	Entropy 0.99557 (0.99662)	Top-1 acc 63.672 (65.350)	Top-5 acc 83.594 (84.806)	lr 0.00888
Train [72][700/3239]	Time 0.330 (0.664)	Data Time 0.001 (0.058)	Loss 2.5165 (2.4388)	Entropy 0.99554 (0.99661)	Top-1 acc 66.406 (65.348)	Top-5 acc 82.422 (84.791)	lr 0.00888
Train [72][710/3239]	Time 0.215 (0.661)	Data Time 0.001 (0.058)	Loss 2.4202 (2.4387)	Entropy 0.99552 (0.99659)	Top-1 acc 68.359 (65.357)	Top-5 acc 83.203 (84.788)	lr 0.00888
Train [72][720/3239]	Time 0.251 (0.658)	Data Time 0.001 (0.057)	Loss 2.4082 (2.4390)	Entropy 0.99549 (0.99658)	Top-1 acc 66.797 (65.354)	Top-5 acc 84.766 (84.781)	lr 0.00888
Train [72][730/3239]	Time 0.218 (0.656)	Data Time 0.001 (0.056)	Loss 2.3033 (2.4385)	Entropy 0.99548 (0.99656)	Top-1 acc 71.875 (65.377)	Top-5 acc 85.547 (84.796)	lr 0.00888
Train [72][740/3239]	Time 0.319 (0.654)	Data Time 0.001 (0.055)	Loss 2.5669 (2.4391)	Entropy 0.99548 (0.99655)	Top-1 acc 59.766 (65.349)	Top-5 acc 82.422 (84.779)	lr 0.00888
Train [72][750/3239]	Time 0.228 (0.651)	Data Time 0.001 (0.055)	Loss 2.4417 (2.4388)	Entropy 0.99548 (0.99653)	Top-1 acc 64.844 (65.364)	Top-5 acc 83.984 (84.782)	lr 0.00888
Train [72][760/3239]	Time 0.231 (0.649)	Data Time 0.001 (0.054)	Loss 2.2747 (2.4387)	Entropy 0.99537 (0.99652)	Top-1 acc 69.922 (65.372)	Top-5 acc 87.891 (84.774)	lr 0.00888
Train [72][770/3239]	Time 0.281 (0.647)	Data Time 0.001 (0.053)	Loss 2.4914 (2.4392)	Entropy 0.99535 (0.99650)	Top-1 acc 64.062 (65.361)	Top-5 acc 83.203 (84.757)	lr 0.00888
Train [72][780/3239]	Time 2.526 (0.644)	Data Time 0.001 (0.053)	Loss 2.4111 (2.4393)	Entropy 0.99535 (0.99649)	Top-1 acc 68.359 (65.363)	Top-5 acc 83.984 (84.748)	lr 0.00887
Train [72][790/3239]	Time 0.235 (0.639)	Data Time 0.001 (0.052)	Loss 2.6005 (2.4399)	Entropy 0.99529 (0.99647)	Top-1 acc 62.109 (65.365)	Top-5 acc 81.250 (84.741)	lr 0.00887
Train [72][800/3239]	Time 0.232 (0.637)	Data Time 0.001 (0.051)	Loss 2.6158 (2.4403)	Entropy 0.99526 (0.99646)	Top-1 acc 62.500 (65.361)	Top-5 acc 84.766 (84.735)	lr 0.00887
Train [72][810/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.051)	Loss 2.2242 (2.4402)	Entropy 0.99521 (0.99644)	Top-1 acc 71.875 (65.377)	Top-5 acc 86.719 (84.743)	lr 0.00887
Train [72][820/3239]	Time 0.218 (0.633)	Data Time 0.001 (0.050)	Loss 2.4949 (2.4408)	Entropy 0.99517 (0.99643)	Top-1 acc 64.844 (65.366)	Top-5 acc 85.547 (84.736)	lr 0.00887
Train [72][830/3239]	Time 0.309 (0.631)	Data Time 0.001 (0.050)	Loss 2.3872 (2.4405)	Entropy 0.99507 (0.99641)	Top-1 acc 67.578 (65.373)	Top-5 acc 84.766 (84.733)	lr 0.00887
Train [72][840/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.049)	Loss 2.3937 (2.4404)	Entropy 0.99502 (0.99640)	Top-1 acc 66.406 (65.372)	Top-5 acc 86.328 (84.731)	lr 0.00887
Train [72][850/3239]	Time 0.243 (0.628)	Data Time 0.001 (0.048)	Loss 2.5745 (2.4401)	Entropy 0.99503 (0.99638)	Top-1 acc 61.719 (65.382)	Top-5 acc 82.031 (84.731)	lr 0.00887
Train [72][860/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.048)	Loss 2.4692 (2.4402)	Entropy 0.99495 (0.99636)	Top-1 acc 64.062 (65.379)	Top-5 acc 83.984 (84.733)	lr 0.00887
Train [72][870/3239]	Time 0.216 (0.624)	Data Time 0.001 (0.047)	Loss 2.4271 (2.4406)	Entropy 0.99497 (0.99635)	Top-1 acc 64.062 (65.370)	Top-5 acc 84.375 (84.725)	lr 0.00887
Train [72][880/3239]	Time 0.223 (0.622)	Data Time 0.001 (0.047)	Loss 2.5311 (2.4404)	Entropy 0.99487 (0.99633)	Top-1 acc 64.453 (65.380)	Top-5 acc 83.594 (84.724)	lr 0.00886
Train [72][890/3239]	Time 2.480 (0.620)	Data Time 0.001 (0.046)	Loss 2.4270 (2.4400)	Entropy 0.99487 (0.99631)	Top-1 acc 62.500 (65.391)	Top-5 acc 84.766 (84.736)	lr 0.00886
Train [72][900/3239]	Time 0.240 (0.616)	Data Time 0.001 (0.046)	Loss 2.3361 (2.4397)	Entropy 0.99481 (0.99630)	Top-1 acc 70.312 (65.406)	Top-5 acc 85.938 (84.733)	lr 0.00886
Train [72][910/3239]	Time 0.239 (0.615)	Data Time 0.001 (0.045)	Loss 2.5717 (2.4396)	Entropy 0.99479 (0.99628)	Top-1 acc 61.719 (65.406)	Top-5 acc 83.594 (84.733)	lr 0.00886
Train [72][920/3239]	Time 0.275 (0.613)	Data Time 0.001 (0.045)	Loss 2.2796 (2.4392)	Entropy 0.99477 (0.99627)	Top-1 acc 69.531 (65.410)	Top-5 acc 88.672 (84.738)	lr 0.00886
Train [72][930/3239]	Time 0.225 (0.612)	Data Time 0.001 (0.044)	Loss 2.3665 (2.4390)	Entropy 0.99476 (0.99625)	Top-1 acc 65.625 (65.413)	Top-5 acc 86.328 (84.745)	lr 0.00886
Train [72][940/3239]	Time 0.232 (0.610)	Data Time 0.001 (0.044)	Loss 2.3727 (2.4392)	Entropy 0.99478 (0.99623)	Top-1 acc 70.312 (65.422)	Top-5 acc 86.328 (84.744)	lr 0.00886
Train [72][950/3239]	Time 0.238 (0.608)	Data Time 0.001 (0.043)	Loss 2.4904 (2.4393)	Entropy 0.99481 (0.99622)	Top-1 acc 61.328 (65.414)	Top-5 acc 85.547 (84.739)	lr 0.00886
Train [72][960/3239]	Time 0.246 (0.607)	Data Time 0.001 (0.043)	Loss 2.3163 (2.4394)	Entropy 0.99472 (0.99620)	Top-1 acc 67.188 (65.414)	Top-5 acc 88.672 (84.738)	lr 0.00886
Train [72][970/3239]	Time 0.249 (0.665)	Data Time 0.002 (0.043)	Loss 2.5249 (2.4397)	Entropy 0.99474 (0.99619)	Top-1 acc 62.500 (65.413)	Top-5 acc 81.641 (84.733)	lr 0.00886
Train [72][980/3239]	Time 0.235 (0.664)	Data Time 0.002 (0.042)	Loss 2.4577 (2.4398)	Entropy 0.99469 (0.99617)	Top-1 acc 64.062 (65.416)	Top-5 acc 84.766 (84.727)	lr 0.00886
Train [72][990/3239]	Time 0.232 (0.662)	Data Time 0.002 (0.042)	Loss 2.4925 (2.4404)	Entropy 0.99464 (0.99616)	Top-1 acc 64.062 (65.399)	Top-5 acc 84.375 (84.718)	lr 0.00885
Train [72][1000/3239]	Time 2.640 (0.660)	Data Time 0.001 (0.041)	Loss 2.3219 (2.4403)	Entropy 0.99464 (0.99614)	Top-1 acc 69.531 (65.396)	Top-5 acc 87.500 (84.720)	lr 0.00885
Train [72][1010/3239]	Time 0.369 (0.656)	Data Time 0.003 (0.041)	Loss 2.4842 (2.4405)	Entropy 0.99463 (0.99613)	Top-1 acc 61.719 (65.380)	Top-5 acc 83.984 (84.718)	lr 0.00885
Train [72][1020/3239]	Time 0.227 (0.654)	Data Time 0.001 (0.041)	Loss 2.3050 (2.4404)	Entropy 0.99463 (0.99611)	Top-1 acc 73.047 (65.403)	Top-5 acc 87.891 (84.719)	lr 0.00885
Train [72][1030/3239]	Time 0.237 (0.653)	Data Time 0.001 (0.040)	Loss 2.4912 (2.4405)	Entropy 0.99457 (0.99610)	Top-1 acc 65.234 (65.402)	Top-5 acc 83.594 (84.719)	lr 0.00885
Train [72][1040/3239]	Time 0.241 (0.651)	Data Time 0.001 (0.040)	Loss 2.3907 (2.4407)	Entropy 0.99454 (0.99608)	Top-1 acc 66.016 (65.384)	Top-5 acc 83.984 (84.716)	lr 0.00885
Train [72][1050/3239]	Time 0.232 (0.650)	Data Time 0.001 (0.040)	Loss 2.3592 (2.4403)	Entropy 0.99455 (0.99607)	Top-1 acc 67.188 (65.390)	Top-5 acc 86.719 (84.725)	lr 0.00885
Train [72][1060/3239]	Time 0.225 (0.648)	Data Time 0.001 (0.039)	Loss 2.4851 (2.4400)	Entropy 0.99450 (0.99605)	Top-1 acc 60.938 (65.402)	Top-5 acc 86.328 (84.729)	lr 0.00885
Train [72][1070/3239]	Time 0.239 (0.646)	Data Time 0.001 (0.039)	Loss 2.4822 (2.4399)	Entropy 0.99448 (0.99604)	Top-1 acc 64.844 (65.401)	Top-5 acc 81.641 (84.731)	lr 0.00885
Train [72][1080/3239]	Time 0.163 (0.645)	Data Time 0.001 (0.038)	Loss 2.4778 (2.4400)	Entropy 0.99448 (0.99603)	Top-1 acc 63.672 (65.397)	Top-5 acc 82.422 (84.725)	lr 0.00885
Train [72][1090/3239]	Time 0.226 (0.643)	Data Time 0.001 (0.038)	Loss 2.3816 (2.4400)	Entropy 0.99445 (0.99601)	Top-1 acc 67.969 (65.389)	Top-5 acc 83.984 (84.720)	lr 0.00884
Train [72][1100/3239]	Time 0.358 (0.642)	Data Time 0.001 (0.038)	Loss 2.4104 (2.4396)	Entropy 0.99446 (0.99600)	Top-1 acc 62.109 (65.393)	Top-5 acc 85.938 (84.723)	lr 0.00884
Train [72][1110/3239]	Time 2.553 (0.641)	Data Time 0.001 (0.037)	Loss 2.4345 (2.4394)	Entropy 0.99446 (0.99598)	Top-1 acc 67.578 (65.396)	Top-5 acc 85.156 (84.730)	lr 0.00884
Train [72][1120/3239]	Time 0.249 (0.637)	Data Time 0.001 (0.037)	Loss 2.4254 (2.4392)	Entropy 0.99442 (0.99597)	Top-1 acc 66.406 (65.399)	Top-5 acc 85.156 (84.728)	lr 0.00884
Train [72][1130/3239]	Time 0.221 (0.636)	Data Time 0.001 (0.037)	Loss 2.3581 (2.4388)	Entropy 0.99438 (0.99596)	Top-1 acc 64.453 (65.403)	Top-5 acc 86.328 (84.740)	lr 0.00884
Train [72][1140/3239]	Time 0.265 (0.634)	Data Time 0.003 (0.037)	Loss 2.4744 (2.4388)	Entropy 0.99438 (0.99594)	Top-1 acc 62.891 (65.409)	Top-5 acc 83.203 (84.737)	lr 0.00884
Train [72][1150/3239]	Time 0.223 (0.633)	Data Time 0.001 (0.036)	Loss 2.3920 (2.4393)	Entropy 0.99434 (0.99593)	Top-1 acc 66.016 (65.389)	Top-5 acc 85.156 (84.728)	lr 0.00884
Train [72][1160/3239]	Time 0.214 (0.632)	Data Time 0.001 (0.036)	Loss 2.4215 (2.4392)	Entropy 0.99434 (0.99591)	Top-1 acc 69.531 (65.389)	Top-5 acc 84.766 (84.725)	lr 0.00884
Train [72][1170/3239]	Time 0.221 (0.631)	Data Time 0.001 (0.036)	Loss 2.4064 (2.4392)	Entropy 0.99433 (0.99590)	Top-1 acc 66.016 (65.385)	Top-5 acc 84.375 (84.727)	lr 0.00884
Train [72][1180/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.035)	Loss 2.4870 (2.4396)	Entropy 0.99433 (0.99589)	Top-1 acc 64.062 (65.379)	Top-5 acc 82.031 (84.718)	lr 0.00884
Train [72][1190/3239]	Time 0.388 (0.628)	Data Time 0.001 (0.035)	Loss 2.4244 (2.4403)	Entropy 0.99434 (0.99587)	Top-1 acc 63.281 (65.367)	Top-5 acc 86.719 (84.707)	lr 0.00883
Train [72][1200/3239]	Time 0.224 (0.627)	Data Time 0.001 (0.035)	Loss 2.4530 (2.4403)	Entropy 0.99427 (0.99586)	Top-1 acc 65.234 (65.365)	Top-5 acc 85.547 (84.705)	lr 0.00883
Train [72][1210/3239]	Time 0.215 (0.626)	Data Time 0.001 (0.035)	Loss 2.5277 (2.4404)	Entropy 0.99424 (0.99585)	Top-1 acc 62.500 (65.364)	Top-5 acc 83.594 (84.702)	lr 0.00883
Train [72][1220/3239]	Time 2.569 (0.624)	Data Time 0.002 (0.034)	Loss 2.4406 (2.4406)	Entropy 0.99424 (0.99583)	Top-1 acc 63.281 (65.352)	Top-5 acc 83.984 (84.704)	lr 0.00883
Train [72][1230/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.034)	Loss 2.3085 (2.4407)	Entropy 0.99432 (0.99582)	Top-1 acc 68.750 (65.350)	Top-5 acc 86.328 (84.705)	lr 0.00883
Train [72][1240/3239]	Time 0.233 (0.620)	Data Time 0.001 (0.034)	Loss 2.4290 (2.4404)	Entropy 0.99429 (0.99581)	Top-1 acc 66.016 (65.359)	Top-5 acc 84.766 (84.709)	lr 0.00883
Train [72][1250/3239]	Time 0.214 (0.619)	Data Time 0.001 (0.034)	Loss 2.4121 (2.4404)	Entropy 0.99431 (0.99580)	Top-1 acc 67.578 (65.354)	Top-5 acc 84.375 (84.707)	lr 0.00883
Train [72][1260/3239]	Time 0.222 (0.618)	Data Time 0.001 (0.033)	Loss 2.4204 (2.4408)	Entropy 0.99428 (0.99579)	Top-1 acc 65.625 (65.338)	Top-5 acc 83.203 (84.702)	lr 0.00883
Train [72][1270/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.033)	Loss 2.5888 (2.4411)	Entropy 0.99432 (0.99577)	Top-1 acc 63.672 (65.334)	Top-5 acc 81.641 (84.696)	lr 0.00883
Train [72][1280/3239]	Time 0.355 (0.616)	Data Time 0.001 (0.033)	Loss 2.5404 (2.4411)	Entropy 0.99434 (0.99576)	Top-1 acc 62.500 (65.326)	Top-5 acc 83.984 (84.701)	lr 0.00883
Train [72][1290/3239]	Time 0.236 (0.615)	Data Time 0.001 (0.033)	Loss 2.4925 (2.4412)	Entropy 0.99430 (0.99575)	Top-1 acc 67.578 (65.329)	Top-5 acc 85.156 (84.699)	lr 0.00883
Train [72][1300/3239]	Time 0.221 (0.614)	Data Time 0.001 (0.032)	Loss 2.7334 (2.4413)	Entropy 0.99425 (0.99574)	Top-1 acc 57.031 (65.323)	Top-5 acc 78.906 (84.698)	lr 0.00882
Train [72][1310/3239]	Time 0.228 (0.613)	Data Time 0.001 (0.032)	Loss 2.4072 (2.4407)	Entropy 0.99414 (0.99573)	Top-1 acc 66.797 (65.335)	Top-5 acc 85.156 (84.706)	lr 0.00882
Train [72][1320/3239]	Time 0.244 (0.611)	Data Time 0.001 (0.032)	Loss 2.3193 (2.4410)	Entropy 0.99411 (0.99572)	Top-1 acc 69.531 (65.335)	Top-5 acc 85.938 (84.700)	lr 0.00882
Train [72][1330/3239]	Time 58.012 (0.652)	Data Time 0.002 (0.032)	Loss 2.5210 (2.4409)	Entropy 0.99411 (0.99571)	Top-1 acc 64.844 (65.341)	Top-5 acc 82.812 (84.703)	lr 0.00882
Train [72][1340/3239]	Time 0.390 (0.649)	Data Time 0.003 (0.031)	Loss 2.3935 (2.4408)	Entropy 0.99412 (0.99569)	Top-1 acc 69.141 (65.344)	Top-5 acc 86.328 (84.700)	lr 0.00882
Train [72][1350/3239]	Time 0.261 (0.648)	Data Time 0.002 (0.031)	Loss 2.5586 (2.4410)	Entropy 0.99410 (0.99568)	Top-1 acc 61.328 (65.332)	Top-5 acc 82.812 (84.695)	lr 0.00882
Train [72][1360/3239]	Time 0.233 (0.647)	Data Time 0.001 (0.031)	Loss 2.4058 (2.4408)	Entropy 0.99411 (0.99567)	Top-1 acc 65.625 (65.337)	Top-5 acc 84.375 (84.699)	lr 0.00882
Train [72][1370/3239]	Time 0.320 (0.646)	Data Time 0.002 (0.031)	Loss 2.5253 (2.4409)	Entropy 0.99409 (0.99566)	Top-1 acc 62.891 (65.331)	Top-5 acc 84.375 (84.698)	lr 0.00882
Train [72][1380/3239]	Time 0.220 (0.645)	Data Time 0.001 (0.031)	Loss 2.3025 (2.4410)	Entropy 0.99409 (0.99565)	Top-1 acc 72.266 (65.334)	Top-5 acc 87.109 (84.697)	lr 0.00882
Train [72][1390/3239]	Time 0.238 (0.643)	Data Time 0.001 (0.030)	Loss 2.3037 (2.4412)	Entropy 0.99410 (0.99564)	Top-1 acc 67.188 (65.321)	Top-5 acc 85.156 (84.695)	lr 0.00882
Train [72][1400/3239]	Time 0.208 (0.642)	Data Time 0.003 (0.030)	Loss 2.2823 (2.4414)	Entropy 0.99406 (0.99562)	Top-1 acc 71.484 (65.317)	Top-5 acc 87.109 (84.693)	lr 0.00881
Train [72][1410/3239]	Time 0.242 (0.641)	Data Time 0.001 (0.030)	Loss 2.4120 (2.4412)	Entropy 0.99405 (0.99561)	Top-1 acc 66.797 (65.321)	Top-5 acc 86.328 (84.698)	lr 0.00881
Train [72][1420/3239]	Time 0.233 (0.640)	Data Time 0.001 (0.030)	Loss 2.3712 (2.4409)	Entropy 0.99408 (0.99560)	Top-1 acc 66.406 (65.326)	Top-5 acc 87.109 (84.704)	lr 0.00881
Train [72][1430/3239]	Time 0.243 (0.639)	Data Time 0.001 (0.030)	Loss 2.3962 (2.4405)	Entropy 0.99408 (0.99559)	Top-1 acc 68.750 (65.335)	Top-5 acc 86.719 (84.714)	lr 0.00881
Train [72][1440/3239]	Time 2.560 (0.638)	Data Time 0.002 (0.029)	Loss 2.4745 (2.4404)	Entropy 0.99408 (0.99558)	Top-1 acc 69.922 (65.339)	Top-5 acc 85.156 (84.718)	lr 0.00881
Train [72][1450/3239]	Time 0.213 (0.635)	Data Time 0.001 (0.029)	Loss 2.2190 (2.4400)	Entropy 0.99406 (0.99557)	Top-1 acc 72.266 (65.356)	Top-5 acc 88.281 (84.724)	lr 0.00881
Train [72][1460/3239]	Time 0.316 (0.634)	Data Time 0.001 (0.029)	Loss 2.3811 (2.4398)	Entropy 0.99407 (0.99556)	Top-1 acc 69.922 (65.367)	Top-5 acc 87.109 (84.731)	lr 0.00881
Train [72][1470/3239]	Time 0.244 (0.633)	Data Time 0.001 (0.029)	Loss 2.5037 (2.4399)	Entropy 0.99411 (0.99555)	Top-1 acc 66.406 (65.367)	Top-5 acc 81.641 (84.723)	lr 0.00881
Train [72][1480/3239]	Time 0.218 (0.632)	Data Time 0.001 (0.029)	Loss 2.4774 (2.4400)	Entropy 0.99413 (0.99554)	Top-1 acc 67.188 (65.370)	Top-5 acc 82.031 (84.721)	lr 0.00881
Train [72][1490/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.028)	Loss 2.4557 (2.4400)	Entropy 0.99411 (0.99553)	Top-1 acc 62.109 (65.365)	Top-5 acc 83.984 (84.721)	lr 0.00881
Train [72][1500/3239]	Time 0.365 (0.630)	Data Time 0.001 (0.028)	Loss 2.4307 (2.4401)	Entropy 0.99410 (0.99552)	Top-1 acc 67.188 (65.364)	Top-5 acc 82.422 (84.719)	lr 0.00880
Train [72][1510/3239]	Time 0.253 (0.629)	Data Time 0.001 (0.028)	Loss 2.4037 (2.4404)	Entropy 0.99406 (0.99551)	Top-1 acc 67.188 (65.362)	Top-5 acc 85.547 (84.713)	lr 0.00880
Train [72][1520/3239]	Time 0.222 (0.628)	Data Time 0.001 (0.028)	Loss 2.3796 (2.4403)	Entropy 0.99406 (0.99550)	Top-1 acc 69.531 (65.367)	Top-5 acc 84.375 (84.716)	lr 0.00880
Train [72][1530/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.028)	Loss 2.5071 (2.4402)	Entropy 0.99402 (0.99549)	Top-1 acc 64.453 (65.366)	Top-5 acc 85.156 (84.723)	lr 0.00880
Train [72][1540/3239]	Time 0.254 (0.625)	Data Time 0.001 (0.028)	Loss 2.3628 (2.4397)	Entropy 0.99405 (0.99548)	Top-1 acc 66.406 (65.376)	Top-5 acc 84.766 (84.732)	lr 0.00880
Train [72][1550/3239]	Time 2.480 (0.624)	Data Time 0.001 (0.027)	Loss 2.4479 (2.4398)	Entropy 0.99405 (0.99547)	Top-1 acc 63.281 (65.371)	Top-5 acc 86.328 (84.734)	lr 0.00880
Train [72][1560/3239]	Time 0.239 (0.622)	Data Time 0.001 (0.027)	Loss 2.2637 (2.4396)	Entropy 0.99399 (0.99547)	Top-1 acc 69.922 (65.376)	Top-5 acc 88.281 (84.734)	lr 0.00880
Train [72][1570/3239]	Time 0.231 (0.621)	Data Time 0.001 (0.027)	Loss 2.5311 (2.4398)	Entropy 0.99395 (0.99546)	Top-1 acc 58.203 (65.366)	Top-5 acc 86.719 (84.731)	lr 0.00880
Train [72][1580/3239]	Time 0.217 (0.620)	Data Time 0.001 (0.027)	Loss 2.5035 (2.4398)	Entropy 0.99399 (0.99545)	Top-1 acc 65.234 (65.371)	Top-5 acc 80.078 (84.726)	lr 0.00880
Train [72][1590/3239]	Time 0.318 (0.619)	Data Time 0.001 (0.027)	Loss 2.3916 (2.4401)	Entropy 0.99392 (0.99544)	Top-1 acc 67.969 (65.367)	Top-5 acc 87.500 (84.719)	lr 0.00880
Train [72][1600/3239]	Time 0.218 (0.618)	Data Time 0.001 (0.027)	Loss 2.4683 (2.4402)	Entropy 0.99392 (0.99543)	Top-1 acc 67.578 (65.364)	Top-5 acc 86.328 (84.719)	lr 0.00880
Train [72][1610/3239]	Time 0.224 (0.617)	Data Time 0.001 (0.026)	Loss 2.3595 (2.4399)	Entropy 0.99392 (0.99542)	Top-1 acc 63.281 (65.369)	Top-5 acc 85.156 (84.725)	lr 0.00879
Train [72][1620/3239]	Time 0.257 (0.616)	Data Time 0.002 (0.026)	Loss 2.5626 (2.4396)	Entropy 0.99390 (0.99541)	Top-1 acc 61.719 (65.371)	Top-5 acc 83.984 (84.728)	lr 0.00879
Train [72][1630/3239]	Time 0.221 (0.615)	Data Time 0.001 (0.026)	Loss 2.3443 (2.4396)	Entropy 0.99381 (0.99540)	Top-1 acc 68.359 (65.368)	Top-5 acc 89.062 (84.734)	lr 0.00879
Train [72][1640/3239]	Time 0.233 (0.614)	Data Time 0.001 (0.026)	Loss 2.4044 (2.4398)	Entropy 0.99381 (0.99539)	Top-1 acc 66.406 (65.369)	Top-5 acc 85.547 (84.728)	lr 0.00879
Train [72][1650/3239]	Time 0.262 (0.613)	Data Time 0.001 (0.026)	Loss 2.4195 (2.4400)	Entropy 0.99373 (0.99538)	Top-1 acc 66.406 (65.362)	Top-5 acc 82.031 (84.725)	lr 0.00879
Train [72][1660/3239]	Time 2.596 (0.613)	Data Time 0.001 (0.026)	Loss 2.4822 (2.4401)	Entropy 0.99373 (0.99537)	Top-1 acc 61.719 (65.359)	Top-5 acc 84.766 (84.724)	lr 0.00879
Train [72][1670/3239]	Time 0.233 (0.610)	Data Time 0.001 (0.025)	Loss 2.5068 (2.4399)	Entropy 0.99367 (0.99536)	Top-1 acc 64.062 (65.363)	Top-5 acc 84.375 (84.728)	lr 0.00879
Train [72][1680/3239]	Time 0.225 (0.610)	Data Time 0.001 (0.025)	Loss 2.2905 (2.4396)	Entropy 0.99368 (0.99535)	Top-1 acc 70.312 (65.374)	Top-5 acc 87.891 (84.736)	lr 0.00879
Train [72][1690/3239]	Time 0.241 (0.609)	Data Time 0.002 (0.025)	Loss 2.6870 (2.4400)	Entropy 0.99368 (0.99534)	Top-1 acc 59.766 (65.366)	Top-5 acc 78.906 (84.730)	lr 0.00879
Train [72][1700/3239]	Time 0.220 (0.640)	Data Time 0.002 (0.025)	Loss 2.4411 (2.4402)	Entropy 0.99358 (0.99533)	Top-1 acc 66.797 (65.368)	Top-5 acc 83.594 (84.723)	lr 0.00879
Train [72][1710/3239]	Time 0.227 (0.640)	Data Time 0.002 (0.025)	Loss 2.5306 (2.4401)	Entropy 0.99357 (0.99532)	Top-1 acc 61.719 (65.363)	Top-5 acc 81.250 (84.720)	lr 0.00878
Train [72][1720/3239]	Time 0.317 (0.639)	Data Time 0.001 (0.025)	Loss 2.4521 (2.4396)	Entropy 0.99355 (0.99531)	Top-1 acc 64.453 (65.372)	Top-5 acc 83.594 (84.727)	lr 0.00878
Train [72][1730/3239]	Time 0.236 (0.638)	Data Time 0.001 (0.025)	Loss 2.3101 (2.4398)	Entropy 0.99351 (0.99530)	Top-1 acc 67.188 (65.362)	Top-5 acc 85.938 (84.723)	lr 0.00878
Train [72][1740/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.025)	Loss 2.5663 (2.4398)	Entropy 0.99344 (0.99529)	Top-1 acc 60.156 (65.365)	Top-5 acc 82.031 (84.724)	lr 0.00878
Train [72][1750/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.024)	Loss 2.5600 (2.4398)	Entropy 0.99340 (0.99528)	Top-1 acc 61.328 (65.366)	Top-5 acc 84.766 (84.724)	lr 0.00878
Train [72][1760/3239]	Time 0.220 (0.635)	Data Time 0.001 (0.024)	Loss 2.3557 (2.4399)	Entropy 0.99343 (0.99527)	Top-1 acc 67.188 (65.366)	Top-5 acc 84.375 (84.721)	lr 0.00878
Train [72][1770/3239]	Time 2.525 (0.634)	Data Time 0.001 (0.024)	Loss 2.4689 (2.4399)	Entropy 0.99343 (0.99526)	Top-1 acc 66.016 (65.373)	Top-5 acc 85.547 (84.721)	lr 0.00878
Train [72][1780/3239]	Time 0.243 (0.632)	Data Time 0.001 (0.024)	Loss 2.4763 (2.4401)	Entropy 0.99337 (0.99525)	Top-1 acc 67.578 (65.365)	Top-5 acc 83.984 (84.719)	lr 0.00878
Train [72][1790/3239]	Time 0.231 (0.631)	Data Time 0.001 (0.024)	Loss 2.2874 (2.4398)	Entropy 0.99365 (0.99524)	Top-1 acc 66.406 (65.375)	Top-5 acc 85.938 (84.722)	lr 0.00878
Train [72][1800/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.024)	Loss 2.3818 (2.4397)	Entropy 0.99360 (0.99523)	Top-1 acc 67.578 (65.379)	Top-5 acc 85.156 (84.717)	lr 0.00878
Train [72][1810/3239]	Time 0.334 (0.629)	Data Time 0.002 (0.024)	Loss 2.4947 (2.4396)	Entropy 0.99353 (0.99522)	Top-1 acc 64.453 (65.382)	Top-5 acc 85.156 (84.721)	lr 0.00877
Train [72][1820/3239]	Time 0.233 (0.628)	Data Time 0.001 (0.024)	Loss 2.3091 (2.4399)	Entropy 0.99350 (0.99521)	Top-1 acc 68.359 (65.374)	Top-5 acc 87.500 (84.716)	lr 0.00877
Train [72][1830/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.023)	Loss 2.6759 (2.4400)	Entropy 0.99348 (0.99520)	Top-1 acc 58.984 (65.375)	Top-5 acc 77.734 (84.710)	lr 0.00877
Train [72][1840/3239]	Time 0.214 (0.627)	Data Time 0.001 (0.023)	Loss 2.4674 (2.4402)	Entropy 0.99340 (0.99519)	Top-1 acc 61.719 (65.366)	Top-5 acc 84.375 (84.708)	lr 0.00877
Train [72][1850/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.023)	Loss 2.4152 (2.4402)	Entropy 0.99337 (0.99518)	Top-1 acc 65.234 (65.361)	Top-5 acc 84.375 (84.710)	lr 0.00877
Train [72][1860/3239]	Time 0.227 (0.625)	Data Time 0.001 (0.023)	Loss 2.5225 (2.4402)	Entropy 0.99330 (0.99517)	Top-1 acc 63.672 (65.364)	Top-5 acc 82.422 (84.709)	lr 0.00877
Train [72][1870/3239]	Time 0.219 (0.624)	Data Time 0.001 (0.023)	Loss 2.3870 (2.4401)	Entropy 0.99333 (0.99516)	Top-1 acc 68.359 (65.368)	Top-5 acc 83.203 (84.705)	lr 0.00877
Train [72][1880/3239]	Time 2.480 (0.623)	Data Time 0.001 (0.023)	Loss 2.5749 (2.4402)	Entropy 0.99333 (0.99515)	Top-1 acc 64.844 (65.370)	Top-5 acc 81.250 (84.706)	lr 0.00877
Train [72][1890/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.023)	Loss 2.4781 (2.4403)	Entropy 0.99331 (0.99514)	Top-1 acc 65.625 (65.365)	Top-5 acc 83.984 (84.702)	lr 0.00877
Train [72][1900/3239]	Time 0.227 (0.620)	Data Time 0.001 (0.023)	Loss 2.5027 (2.4400)	Entropy 0.99332 (0.99513)	Top-1 acc 65.234 (65.373)	Top-5 acc 83.594 (84.704)	lr 0.00877
Train [72][1910/3239]	Time 0.256 (0.619)	Data Time 0.001 (0.022)	Loss 2.4396 (2.4398)	Entropy 0.99338 (0.99512)	Top-1 acc 69.531 (65.377)	Top-5 acc 83.594 (84.710)	lr 0.00877
Train [72][1920/3239]	Time 0.265 (0.619)	Data Time 0.002 (0.022)	Loss 2.5319 (2.4400)	Entropy 0.99329 (0.99511)	Top-1 acc 61.328 (65.370)	Top-5 acc 83.594 (84.705)	lr 0.00876
Train [72][1930/3239]	Time 0.280 (0.618)	Data Time 0.001 (0.022)	Loss 2.5997 (2.4402)	Entropy 0.99330 (0.99510)	Top-1 acc 61.328 (65.359)	Top-5 acc 78.906 (84.701)	lr 0.00876
Train [72][1940/3239]	Time 0.252 (0.618)	Data Time 0.001 (0.022)	Loss 2.4429 (2.4401)	Entropy 0.99310 (0.99509)	Top-1 acc 65.625 (65.358)	Top-5 acc 84.766 (84.706)	lr 0.00876
Train [72][1950/3239]	Time 0.220 (0.617)	Data Time 0.001 (0.022)	Loss 2.4016 (2.4403)	Entropy 0.99310 (0.99508)	Top-1 acc 65.625 (65.352)	Top-5 acc 85.547 (84.703)	lr 0.00876
Train [72][1960/3239]	Time 0.223 (0.616)	Data Time 0.001 (0.022)	Loss 2.4682 (2.4398)	Entropy 0.99309 (0.99507)	Top-1 acc 61.328 (65.361)	Top-5 acc 83.984 (84.713)	lr 0.00876
Train [72][1970/3239]	Time 0.216 (0.616)	Data Time 0.001 (0.022)	Loss 2.4697 (2.4398)	Entropy 0.99306 (0.99506)	Top-1 acc 66.797 (65.358)	Top-5 acc 83.203 (84.710)	lr 0.00876
Train [72][1980/3239]	Time 0.298 (0.615)	Data Time 0.002 (0.022)	Loss 2.4591 (2.4399)	Entropy 0.99291 (0.99505)	Top-1 acc 68.750 (65.355)	Top-5 acc 83.594 (84.704)	lr 0.00876
Train [72][1990/3239]	Time 2.482 (0.614)	Data Time 0.001 (0.022)	Loss 2.3030 (2.4404)	Entropy 0.99291 (0.99504)	Top-1 acc 66.797 (65.341)	Top-5 acc 89.062 (84.697)	lr 0.00876
Train [72][2000/3239]	Time 0.242 (0.612)	Data Time 0.001 (0.022)	Loss 2.3736 (2.4403)	Entropy 0.99287 (0.99503)	Top-1 acc 68.359 (65.342)	Top-5 acc 85.938 (84.700)	lr 0.00876
Train [72][2010/3239]	Time 0.217 (0.612)	Data Time 0.001 (0.021)	Loss 2.6206 (2.4402)	Entropy 0.99276 (0.99502)	Top-1 acc 62.891 (65.343)	Top-5 acc 81.641 (84.703)	lr 0.00876
Train [72][2020/3239]	Time 0.231 (0.611)	Data Time 0.001 (0.021)	Loss 2.5171 (2.4402)	Entropy 0.99273 (0.99501)	Top-1 acc 64.844 (65.341)	Top-5 acc 84.766 (84.704)	lr 0.00875
Train [72][2030/3239]	Time 0.315 (0.610)	Data Time 0.002 (0.021)	Loss 2.4611 (2.4402)	Entropy 0.99270 (0.99500)	Top-1 acc 62.109 (65.336)	Top-5 acc 85.547 (84.703)	lr 0.00875
Train [72][2040/3239]	Time 0.218 (0.610)	Data Time 0.001 (0.021)	Loss 2.4414 (2.4401)	Entropy 0.99265 (0.99499)	Top-1 acc 65.234 (65.340)	Top-5 acc 85.156 (84.707)	lr 0.00875
Train [72][2050/3239]	Time 0.224 (0.609)	Data Time 0.001 (0.021)	Loss 2.5089 (2.4402)	Entropy 0.99253 (0.99498)	Top-1 acc 63.672 (65.337)	Top-5 acc 81.641 (84.700)	lr 0.00875
Train [72][2060/3239]	Time 0.247 (0.635)	Data Time 0.003 (0.021)	Loss 2.5754 (2.4403)	Entropy 0.99252 (0.99496)	Top-1 acc 61.328 (65.338)	Top-5 acc 82.812 (84.700)	lr 0.00875
Train [72][2070/3239]	Time 0.261 (0.635)	Data Time 0.002 (0.021)	Loss 2.4487 (2.4404)	Entropy 0.99248 (0.99495)	Top-1 acc 62.500 (65.335)	Top-5 acc 84.766 (84.696)	lr 0.00875
Train [72][2080/3239]	Time 0.242 (0.634)	Data Time 0.002 (0.021)	Loss 2.3878 (2.4406)	Entropy 0.99244 (0.99494)	Top-1 acc 64.453 (65.327)	Top-5 acc 85.938 (84.692)	lr 0.00875
Train [72][2090/3239]	Time 0.229 (0.633)	Data Time 0.001 (0.021)	Loss 2.3767 (2.4407)	Entropy 0.99234 (0.99493)	Top-1 acc 66.797 (65.325)	Top-5 acc 84.766 (84.690)	lr 0.00875
Train [72][2100/3239]	Time 2.526 (0.633)	Data Time 0.001 (0.021)	Loss 2.4303 (2.4407)	Entropy 0.99234 (0.99492)	Top-1 acc 69.922 (65.333)	Top-5 acc 83.984 (84.686)	lr 0.00875
Train [72][2110/3239]	Time 0.233 (0.631)	Data Time 0.001 (0.021)	Loss 2.4141 (2.4405)	Entropy 0.99233 (0.99490)	Top-1 acc 66.797 (65.337)	Top-5 acc 86.719 (84.691)	lr 0.00875
Train [72][2120/3239]	Time 0.335 (0.630)	Data Time 0.001 (0.020)	Loss 2.5043 (2.4405)	Entropy 0.99221 (0.99489)	Top-1 acc 61.328 (65.334)	Top-5 acc 85.547 (84.691)	lr 0.00875
Train [72][2130/3239]	Time 0.221 (0.629)	Data Time 0.001 (0.020)	Loss 2.3390 (2.4404)	Entropy 0.99207 (0.99488)	Top-1 acc 68.359 (65.335)	Top-5 acc 88.672 (84.694)	lr 0.00874
Train [72][2140/3239]	Time 0.233 (0.629)	Data Time 0.002 (0.020)	Loss 2.6655 (2.4407)	Entropy 0.99205 (0.99486)	Top-1 acc 60.938 (65.336)	Top-5 acc 79.688 (84.690)	lr 0.00874
Train [72][2150/3239]	Time 0.216 (0.628)	Data Time 0.001 (0.020)	Loss 2.3181 (2.4405)	Entropy 0.99204 (0.99485)	Top-1 acc 66.406 (65.339)	Top-5 acc 87.109 (84.692)	lr 0.00874
Train [72][2160/3239]	Time 0.252 (0.627)	Data Time 0.001 (0.020)	Loss 2.6063 (2.4409)	Entropy 0.99198 (0.99484)	Top-1 acc 60.156 (65.332)	Top-5 acc 81.250 (84.688)	lr 0.00874
Train [72][2170/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.020)	Loss 2.4299 (2.4411)	Entropy 0.99194 (0.99482)	Top-1 acc 65.234 (65.330)	Top-5 acc 85.938 (84.688)	lr 0.00874
Train [72][2180/3239]	Time 0.220 (0.626)	Data Time 0.001 (0.020)	Loss 2.3573 (2.4410)	Entropy 0.99188 (0.99481)	Top-1 acc 69.141 (65.334)	Top-5 acc 85.156 (84.691)	lr 0.00874
Train [72][2190/3239]	Time 0.219 (0.625)	Data Time 0.001 (0.020)	Loss 2.3998 (2.4410)	Entropy 0.99191 (0.99480)	Top-1 acc 65.625 (65.331)	Top-5 acc 85.156 (84.692)	lr 0.00874
Train [72][2200/3239]	Time 0.262 (0.624)	Data Time 0.001 (0.020)	Loss 2.4864 (2.4412)	Entropy 0.99191 (0.99478)	Top-1 acc 64.062 (65.327)	Top-5 acc 84.766 (84.685)	lr 0.00874
Train [72][2210/3239]	Time 2.556 (0.623)	Data Time 0.001 (0.020)	Loss 2.6041 (2.4412)	Entropy 0.99191 (0.99477)	Top-1 acc 64.453 (65.324)	Top-5 acc 80.859 (84.689)	lr 0.00874
Train [72][2220/3239]	Time 0.255 (0.622)	Data Time 0.001 (0.020)	Loss 2.5108 (2.4412)	Entropy 0.99181 (0.99476)	Top-1 acc 63.672 (65.323)	Top-5 acc 83.203 (84.688)	lr 0.00874
Train [72][2230/3239]	Time 0.236 (0.621)	Data Time 0.002 (0.020)	Loss 2.4713 (2.4419)	Entropy 0.99182 (0.99475)	Top-1 acc 65.234 (65.309)	Top-5 acc 80.859 (84.681)	lr 0.00873
Train [72][2240/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.019)	Loss 2.4955 (2.4417)	Entropy 0.99181 (0.99473)	Top-1 acc 62.891 (65.312)	Top-5 acc 83.594 (84.683)	lr 0.00873
Train [72][2250/3239]	Time 0.296 (0.620)	Data Time 0.001 (0.019)	Loss 2.4656 (2.4418)	Entropy 0.99174 (0.99472)	Top-1 acc 64.453 (65.316)	Top-5 acc 83.984 (84.680)	lr 0.00873
Train [72][2260/3239]	Time 0.238 (0.619)	Data Time 0.001 (0.019)	Loss 2.4434 (2.4419)	Entropy 0.99168 (0.99471)	Top-1 acc 60.938 (65.311)	Top-5 acc 86.328 (84.680)	lr 0.00873
Train [72][2270/3239]	Time 0.254 (0.618)	Data Time 0.001 (0.019)	Loss 2.2954 (2.4417)	Entropy 0.99166 (0.99469)	Top-1 acc 67.578 (65.320)	Top-5 acc 89.844 (84.685)	lr 0.00873
Train [72][2280/3239]	Time 0.232 (0.618)	Data Time 0.001 (0.019)	Loss 2.6080 (2.4417)	Entropy 0.99154 (0.99468)	Top-1 acc 62.891 (65.320)	Top-5 acc 80.469 (84.685)	lr 0.00873
Train [72][2290/3239]	Time 0.227 (0.617)	Data Time 0.001 (0.019)	Loss 2.6229 (2.4417)	Entropy 0.99152 (0.99467)	Top-1 acc 63.281 (65.319)	Top-5 acc 81.250 (84.682)	lr 0.00873
Train [72][2300/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.019)	Loss 2.4113 (2.4418)	Entropy 0.99144 (0.99465)	Top-1 acc 65.234 (65.318)	Top-5 acc 84.766 (84.679)	lr 0.00873
Train [72][2310/3239]	Time 0.246 (0.616)	Data Time 0.001 (0.019)	Loss 2.2913 (2.4417)	Entropy 0.99137 (0.99464)	Top-1 acc 70.703 (65.318)	Top-5 acc 87.500 (84.682)	lr 0.00873
Train [72][2320/3239]	Time 2.536 (0.615)	Data Time 0.003 (0.019)	Loss 2.2784 (2.4417)	Entropy 0.99137 (0.99462)	Top-1 acc 72.656 (65.326)	Top-5 acc 89.453 (84.686)	lr 0.00873
Train [72][2330/3239]	Time 0.218 (0.614)	Data Time 0.001 (0.019)	Loss 2.3554 (2.4416)	Entropy 0.99132 (0.99461)	Top-1 acc 64.844 (65.323)	Top-5 acc 87.891 (84.688)	lr 0.00872
Train [72][2340/3239]	Time 0.325 (0.613)	Data Time 0.001 (0.019)	Loss 2.3302 (2.4419)	Entropy 0.99124 (0.99459)	Top-1 acc 69.922 (65.319)	Top-5 acc 87.109 (84.683)	lr 0.00872
Train [72][2350/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.019)	Loss 2.5409 (2.4418)	Entropy 0.99119 (0.99458)	Top-1 acc 64.453 (65.320)	Top-5 acc 81.250 (84.682)	lr 0.00872
Train [72][2360/3239]	Time 0.219 (0.612)	Data Time 0.001 (0.019)	Loss 2.4575 (2.4419)	Entropy 0.99112 (0.99457)	Top-1 acc 67.578 (65.321)	Top-5 acc 83.984 (84.681)	lr 0.00872
Train [72][2370/3239]	Time 0.245 (0.611)	Data Time 0.001 (0.018)	Loss 2.5315 (2.4421)	Entropy 0.99094 (0.99455)	Top-1 acc 65.625 (65.316)	Top-5 acc 83.203 (84.678)	lr 0.00872
Train [72][2380/3239]	Time 0.226 (0.611)	Data Time 0.001 (0.018)	Loss 2.4630 (2.4419)	Entropy 0.99093 (0.99454)	Top-1 acc 65.234 (65.322)	Top-5 acc 82.812 (84.682)	lr 0.00872
Train [72][2390/3239]	Time 0.233 (0.610)	Data Time 0.001 (0.018)	Loss 2.4946 (2.4421)	Entropy 0.99091 (0.99452)	Top-1 acc 66.406 (65.316)	Top-5 acc 83.984 (84.681)	lr 0.00872
Train [72][2400/3239]	Time 0.222 (0.609)	Data Time 0.001 (0.018)	Loss 2.3878 (2.4421)	Entropy 0.99089 (0.99451)	Top-1 acc 68.359 (65.318)	Top-5 acc 85.938 (84.680)	lr 0.00872
Train [72][2410/3239]	Time 0.230 (0.609)	Data Time 0.001 (0.018)	Loss 2.6251 (2.4422)	Entropy 0.99085 (0.99449)	Top-1 acc 58.984 (65.319)	Top-5 acc 81.250 (84.678)	lr 0.00872
Train [72][2420/3239]	Time 0.328 (0.629)	Data Time 0.005 (0.018)	Loss 2.4984 (2.4422)	Entropy 0.99081 (0.99448)	Top-1 acc 64.062 (65.320)	Top-5 acc 81.250 (84.676)	lr 0.00872
Train [72][2430/3239]	Time 3.656 (0.629)	Data Time 0.004 (0.018)	Loss 2.5480 (2.4425)	Entropy 0.99081 (0.99446)	Top-1 acc 63.281 (65.315)	Top-5 acc 82.031 (84.670)	lr 0.00872
Train [72][2440/3239]	Time 0.230 (0.627)	Data Time 0.002 (0.018)	Loss 2.3664 (2.4426)	Entropy 0.99082 (0.99445)	Top-1 acc 68.359 (65.309)	Top-5 acc 86.328 (84.667)	lr 0.00871
Train [72][2450/3239]	Time 0.247 (0.627)	Data Time 0.002 (0.018)	Loss 2.4007 (2.4428)	Entropy 0.99080 (0.99443)	Top-1 acc 69.922 (65.309)	Top-5 acc 85.547 (84.663)	lr 0.00871
Train [72][2460/3239]	Time 0.240 (0.626)	Data Time 0.001 (0.018)	Loss 2.4358 (2.4429)	Entropy 0.99080 (0.99442)	Top-1 acc 62.500 (65.307)	Top-5 acc 87.500 (84.663)	lr 0.00871
Train [72][2470/3239]	Time 0.335 (0.626)	Data Time 0.001 (0.018)	Loss 2.4497 (2.4432)	Entropy 0.99078 (0.99440)	Top-1 acc 66.406 (65.297)	Top-5 acc 83.594 (84.657)	lr 0.00871
Train [72][2480/3239]	Time 0.245 (0.625)	Data Time 0.001 (0.018)	Loss 2.4569 (2.4434)	Entropy 0.99074 (0.99439)	Top-1 acc 64.844 (65.294)	Top-5 acc 84.766 (84.654)	lr 0.00871
Train [72][2490/3239]	Time 0.232 (0.624)	Data Time 0.001 (0.018)	Loss 2.5811 (2.4436)	Entropy 0.99076 (0.99437)	Top-1 acc 63.281 (65.291)	Top-5 acc 81.641 (84.652)	lr 0.00871
Train [72][2500/3239]	Time 0.228 (0.624)	Data Time 0.001 (0.018)	Loss 2.6553 (2.4439)	Entropy 0.99074 (0.99436)	Top-1 acc 58.984 (65.280)	Top-5 acc 80.859 (84.644)	lr 0.00871
Train [72][2510/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.018)	Loss 2.6312 (2.4440)	Entropy 0.99070 (0.99434)	Top-1 acc 58.594 (65.277)	Top-5 acc 81.641 (84.641)	lr 0.00871
Train [72][2520/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.017)	Loss 2.3711 (2.4440)	Entropy 0.99082 (0.99433)	Top-1 acc 67.969 (65.278)	Top-5 acc 86.328 (84.645)	lr 0.00871
Train [72][2530/3239]	Time 0.218 (0.622)	Data Time 0.001 (0.017)	Loss 2.6541 (2.4440)	Entropy 0.99072 (0.99431)	Top-1 acc 62.109 (65.278)	Top-5 acc 76.953 (84.642)	lr 0.00871
Train [72][2540/3239]	Time 2.478 (0.621)	Data Time 0.001 (0.017)	Loss 2.4426 (2.4441)	Entropy 0.99072 (0.99430)	Top-1 acc 66.016 (65.275)	Top-5 acc 85.156 (84.641)	lr 0.00870
Train [72][2550/3239]	Time 0.247 (0.620)	Data Time 0.001 (0.017)	Loss 2.6180 (2.4443)	Entropy 0.99074 (0.99429)	Top-1 acc 59.766 (65.270)	Top-5 acc 81.250 (84.638)	lr 0.00870
Train [72][2560/3239]	Time 0.336 (0.619)	Data Time 0.001 (0.017)	Loss 2.3317 (2.4442)	Entropy 0.99073 (0.99427)	Top-1 acc 67.578 (65.269)	Top-5 acc 86.328 (84.638)	lr 0.00870
Train [72][2570/3239]	Time 0.241 (0.619)	Data Time 0.001 (0.017)	Loss 2.4087 (2.4442)	Entropy 0.99067 (0.99426)	Top-1 acc 69.922 (65.271)	Top-5 acc 83.203 (84.635)	lr 0.00870
Train [72][2580/3239]	Time 0.222 (0.618)	Data Time 0.001 (0.017)	Loss 2.4231 (2.4443)	Entropy 0.99065 (0.99424)	Top-1 acc 67.969 (65.272)	Top-5 acc 83.594 (84.635)	lr 0.00870
Train [72][2590/3239]	Time 0.242 (0.618)	Data Time 0.001 (0.017)	Loss 2.5883 (2.4443)	Entropy 0.99063 (0.99423)	Top-1 acc 62.109 (65.273)	Top-5 acc 83.594 (84.636)	lr 0.00870
Train [72][2600/3239]	Time 0.333 (0.617)	Data Time 0.001 (0.017)	Loss 2.3980 (2.4443)	Entropy 0.99061 (0.99422)	Top-1 acc 66.797 (65.268)	Top-5 acc 84.375 (84.633)	lr 0.00870
Train [72][2610/3239]	Time 0.214 (0.617)	Data Time 0.001 (0.017)	Loss 2.4366 (2.4444)	Entropy 0.99057 (0.99420)	Top-1 acc 64.062 (65.264)	Top-5 acc 85.156 (84.632)	lr 0.00870
Train [72][2620/3239]	Time 0.240 (0.616)	Data Time 0.001 (0.017)	Loss 2.3357 (2.4445)	Entropy 0.99049 (0.99419)	Top-1 acc 67.188 (65.260)	Top-5 acc 87.109 (84.631)	lr 0.00870
Train [72][2630/3239]	Time 0.234 (0.616)	Data Time 0.001 (0.017)	Loss 2.5914 (2.4445)	Entropy 0.99045 (0.99417)	Top-1 acc 62.891 (65.263)	Top-5 acc 83.203 (84.631)	lr 0.00870
Train [72][2640/3239]	Time 0.245 (0.615)	Data Time 0.001 (0.017)	Loss 2.6061 (2.4446)	Entropy 0.99030 (0.99416)	Top-1 acc 61.719 (65.262)	Top-5 acc 83.594 (84.629)	lr 0.00869
Train [72][2650/3239]	Time 0.255 (0.615)	Data Time 0.001 (0.017)	Loss 2.3465 (2.4447)	Entropy 0.99024 (0.99415)	Top-1 acc 67.578 (65.258)	Top-5 acc 84.766 (84.625)	lr 0.00869
Train [72][2660/3239]	Time 0.226 (0.614)	Data Time 0.001 (0.017)	Loss 2.4877 (2.4451)	Entropy 0.99019 (0.99413)	Top-1 acc 66.406 (65.252)	Top-5 acc 83.203 (84.617)	lr 0.00869
Train [72][2670/3239]	Time 0.245 (0.613)	Data Time 0.001 (0.017)	Loss 2.6724 (2.4451)	Entropy 0.99020 (0.99412)	Top-1 acc 58.203 (65.249)	Top-5 acc 79.297 (84.617)	lr 0.00869
Train [72][2680/3239]	Time 0.221 (0.613)	Data Time 0.001 (0.017)	Loss 2.3350 (2.4451)	Entropy 0.99016 (0.99410)	Top-1 acc 67.578 (65.249)	Top-5 acc 85.938 (84.618)	lr 0.00869
Train [72][2690/3239]	Time 0.328 (0.612)	Data Time 0.001 (0.016)	Loss 2.4221 (2.4450)	Entropy 0.99019 (0.99409)	Top-1 acc 67.578 (65.250)	Top-5 acc 82.422 (84.615)	lr 0.00869
Train [72][2700/3239]	Time 0.253 (0.612)	Data Time 0.001 (0.016)	Loss 2.3843 (2.4450)	Entropy 0.99017 (0.99407)	Top-1 acc 66.016 (65.251)	Top-5 acc 85.938 (84.619)	lr 0.00869
Train [72][2710/3239]	Time 0.257 (0.611)	Data Time 0.001 (0.016)	Loss 2.3560 (2.4451)	Entropy 0.99014 (0.99406)	Top-1 acc 69.141 (65.248)	Top-5 acc 86.719 (84.619)	lr 0.00869
Train [72][2720/3239]	Time 0.284 (0.611)	Data Time 0.001 (0.016)	Loss 2.4040 (2.4450)	Entropy 0.99013 (0.99404)	Top-1 acc 63.672 (65.252)	Top-5 acc 85.547 (84.619)	lr 0.00869
Train [72][2730/3239]	Time 0.226 (0.610)	Data Time 0.002 (0.016)	Loss 2.4746 (2.4449)	Entropy 0.99009 (0.99403)	Top-1 acc 62.500 (65.253)	Top-5 acc 84.766 (84.622)	lr 0.00869
Train [72][2740/3239]	Time 0.242 (0.610)	Data Time 0.001 (0.016)	Loss 2.5225 (2.4448)	Entropy 0.99005 (0.99402)	Top-1 acc 64.844 (65.254)	Top-5 acc 83.984 (84.623)	lr 0.00869
Train [72][2750/3239]	Time 0.224 (0.609)	Data Time 0.001 (0.016)	Loss 2.3915 (2.4448)	Entropy 0.99004 (0.99400)	Top-1 acc 67.188 (65.255)	Top-5 acc 87.109 (84.623)	lr 0.00868
Train [72][2760/3239]	Time 0.268 (0.609)	Data Time 0.003 (0.016)	Loss 2.4554 (2.4447)	Entropy 0.99001 (0.99399)	Top-1 acc 63.672 (65.256)	Top-5 acc 82.812 (84.622)	lr 0.00868
Train [72][2770/3239]	Time 0.267 (0.629)	Data Time 0.004 (0.016)	Loss 2.4349 (2.4446)	Entropy 0.98996 (0.99397)	Top-1 acc 64.062 (65.257)	Top-5 acc 83.594 (84.626)	lr 0.00868
Train [72][2780/3239]	Time 0.329 (0.628)	Data Time 0.002 (0.016)	Loss 2.5663 (2.4448)	Entropy 0.98996 (0.99396)	Top-1 acc 61.719 (65.254)	Top-5 acc 83.984 (84.622)	lr 0.00868
Train [72][2790/3239]	Time 0.224 (0.627)	Data Time 0.002 (0.016)	Loss 2.3306 (2.4451)	Entropy 0.98968 (0.99394)	Top-1 acc 68.359 (65.246)	Top-5 acc 87.891 (84.618)	lr 0.00868
Train [72][2800/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.016)	Loss 2.3507 (2.4450)	Entropy 0.98968 (0.99393)	Top-1 acc 65.625 (65.245)	Top-5 acc 87.109 (84.620)	lr 0.00868
Train [72][2810/3239]	Time 0.253 (0.626)	Data Time 0.001 (0.016)	Loss 2.4687 (2.4452)	Entropy 0.98966 (0.99391)	Top-1 acc 64.062 (65.240)	Top-5 acc 82.812 (84.616)	lr 0.00868
Train [72][2820/3239]	Time 0.268 (0.626)	Data Time 0.001 (0.016)	Loss 2.3698 (2.4453)	Entropy 0.98957 (0.99390)	Top-1 acc 66.797 (65.238)	Top-5 acc 87.500 (84.615)	lr 0.00868
Train [72][2830/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.016)	Loss 2.4673 (2.4455)	Entropy 0.98951 (0.99388)	Top-1 acc 66.016 (65.236)	Top-5 acc 85.156 (84.611)	lr 0.00868
Train [72][2840/3239]	Time 0.304 (0.625)	Data Time 0.001 (0.016)	Loss 2.3428 (2.4453)	Entropy 0.98953 (0.99387)	Top-1 acc 66.797 (65.240)	Top-5 acc 85.938 (84.614)	lr 0.00868
Train [72][2850/3239]	Time 0.237 (0.624)	Data Time 0.001 (0.016)	Loss 2.4799 (2.4453)	Entropy 0.98953 (0.99385)	Top-1 acc 65.625 (65.240)	Top-5 acc 82.812 (84.611)	lr 0.00867
Train [72][2860/3239]	Time 0.237 (0.624)	Data Time 0.001 (0.016)	Loss 2.2667 (2.4451)	Entropy 0.98954 (0.99384)	Top-1 acc 69.141 (65.247)	Top-5 acc 87.109 (84.617)	lr 0.00867
Train [72][2870/3239]	Time 0.386 (0.623)	Data Time 0.001 (0.016)	Loss 2.6572 (2.4453)	Entropy 0.98952 (0.99382)	Top-1 acc 60.938 (65.243)	Top-5 acc 79.688 (84.614)	lr 0.00867
Train [72][2880/3239]	Time 0.278 (0.623)	Data Time 0.001 (0.015)	Loss 2.6011 (2.4453)	Entropy 0.98946 (0.99381)	Top-1 acc 60.547 (65.239)	Top-5 acc 80.469 (84.615)	lr 0.00867
Train [72][2890/3239]	Time 0.233 (0.622)	Data Time 0.001 (0.015)	Loss 2.4722 (2.4452)	Entropy 0.98946 (0.99379)	Top-1 acc 65.625 (65.243)	Top-5 acc 85.156 (84.616)	lr 0.00867
Train [72][2900/3239]	Time 0.228 (0.622)	Data Time 0.002 (0.015)	Loss 2.3018 (2.4454)	Entropy 0.98932 (0.99378)	Top-1 acc 68.359 (65.241)	Top-5 acc 87.109 (84.614)	lr 0.00867
Train [72][2910/3239]	Time 0.250 (0.621)	Data Time 0.001 (0.015)	Loss 2.5001 (2.4456)	Entropy 0.98924 (0.99376)	Top-1 acc 62.891 (65.238)	Top-5 acc 85.547 (84.611)	lr 0.00867
Train [72][2920/3239]	Time 0.245 (0.621)	Data Time 0.001 (0.015)	Loss 2.3310 (2.4453)	Entropy 0.98921 (0.99375)	Top-1 acc 71.484 (65.245)	Top-5 acc 88.281 (84.617)	lr 0.00867
Train [72][2930/3239]	Time 0.244 (0.620)	Data Time 0.002 (0.015)	Loss 2.6025 (2.4454)	Entropy 0.98919 (0.99373)	Top-1 acc 62.500 (65.242)	Top-5 acc 80.469 (84.617)	lr 0.00867
Train [72][2940/3239]	Time 0.231 (0.620)	Data Time 0.001 (0.015)	Loss 2.4219 (2.4454)	Entropy 0.98918 (0.99372)	Top-1 acc 67.578 (65.236)	Top-5 acc 83.984 (84.616)	lr 0.00867
Train [72][2950/3239]	Time 0.254 (0.619)	Data Time 0.001 (0.015)	Loss 2.3604 (2.4456)	Entropy 0.98912 (0.99370)	Top-1 acc 65.234 (65.231)	Top-5 acc 86.719 (84.613)	lr 0.00867
Train [72][2960/3239]	Time 0.323 (0.619)	Data Time 0.002 (0.015)	Loss 2.5156 (2.4456)	Entropy 0.98914 (0.99368)	Top-1 acc 64.062 (65.233)	Top-5 acc 83.203 (84.612)	lr 0.00866
Train [72][2970/3239]	Time 0.222 (0.618)	Data Time 0.001 (0.015)	Loss 2.4742 (2.4455)	Entropy 0.98910 (0.99367)	Top-1 acc 62.500 (65.236)	Top-5 acc 83.594 (84.614)	lr 0.00866
Train [72][2980/3239]	Time 0.275 (0.618)	Data Time 0.001 (0.015)	Loss 2.4945 (2.4456)	Entropy 0.98907 (0.99365)	Top-1 acc 65.625 (65.234)	Top-5 acc 83.984 (84.609)	lr 0.00866
Train [72][2990/3239]	Time 0.222 (0.617)	Data Time 0.001 (0.015)	Loss 2.4303 (2.4455)	Entropy 0.98901 (0.99364)	Top-1 acc 64.062 (65.237)	Top-5 acc 85.938 (84.612)	lr 0.00866
Train [72][3000/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.015)	Loss 2.4534 (2.4454)	Entropy 0.98901 (0.99362)	Top-1 acc 64.844 (65.238)	Top-5 acc 83.203 (84.614)	lr 0.00866
Train [72][3010/3239]	Time 0.267 (0.616)	Data Time 0.001 (0.015)	Loss 2.3744 (2.4454)	Entropy 0.98901 (0.99361)	Top-1 acc 68.359 (65.239)	Top-5 acc 87.500 (84.615)	lr 0.00866
Train [72][3020/3239]	Time 0.279 (0.616)	Data Time 0.001 (0.015)	Loss 2.5538 (2.4458)	Entropy 0.98895 (0.99359)	Top-1 acc 59.766 (65.228)	Top-5 acc 81.250 (84.607)	lr 0.00866
Train [72][3030/3239]	Time 0.250 (0.615)	Data Time 0.001 (0.015)	Loss 2.5027 (2.4457)	Entropy 0.98900 (0.99358)	Top-1 acc 65.234 (65.236)	Top-5 acc 83.984 (84.611)	lr 0.00866
Train [72][3040/3239]	Time 0.229 (0.615)	Data Time 0.002 (0.015)	Loss 2.4017 (2.4456)	Entropy 0.98896 (0.99356)	Top-1 acc 67.969 (65.239)	Top-5 acc 83.984 (84.612)	lr 0.00866
Train [72][3050/3239]	Time 0.352 (0.614)	Data Time 0.001 (0.015)	Loss 2.4730 (2.4456)	Entropy 0.98908 (0.99355)	Top-1 acc 65.234 (65.238)	Top-5 acc 83.984 (84.613)	lr 0.00866
Train [72][3060/3239]	Time 0.228 (0.614)	Data Time 0.001 (0.015)	Loss 2.2668 (2.4464)	Entropy 0.98900 (0.99353)	Top-1 acc 70.312 (65.221)	Top-5 acc 89.062 (84.602)	lr 0.00865
Train [72][3070/3239]	Time 0.231 (0.613)	Data Time 0.001 (0.015)	Loss 2.4526 (2.4463)	Entropy 0.98897 (0.99352)	Top-1 acc 64.453 (65.224)	Top-5 acc 83.203 (84.603)	lr 0.00865
Train [72][3080/3239]	Time 0.269 (0.613)	Data Time 0.001 (0.015)	Loss 2.5649 (2.4463)	Entropy 0.98898 (0.99350)	Top-1 acc 60.938 (65.222)	Top-5 acc 83.203 (84.600)	lr 0.00865
Train [72][3090/3239]	Time 0.238 (0.612)	Data Time 0.001 (0.015)	Loss 2.6277 (2.4466)	Entropy 0.98897 (0.99349)	Top-1 acc 55.859 (65.216)	Top-5 acc 81.250 (84.595)	lr 0.00865
Train [72][3100/3239]	Time 0.246 (0.630)	Data Time 0.003 (0.015)	Loss 2.5330 (2.4466)	Entropy 0.98894 (0.99347)	Top-1 acc 65.234 (65.212)	Top-5 acc 83.594 (84.594)	lr 0.00865
Train [72][3110/3239]	Time 0.261 (0.630)	Data Time 0.002 (0.014)	Loss 2.5375 (2.4466)	Entropy 0.98890 (0.99346)	Top-1 acc 62.500 (65.212)	Top-5 acc 82.812 (84.591)	lr 0.00865
Train [72][3120/3239]	Time 0.259 (0.629)	Data Time 0.002 (0.014)	Loss 2.5368 (2.4466)	Entropy 0.98894 (0.99344)	Top-1 acc 66.797 (65.212)	Top-5 acc 81.641 (84.590)	lr 0.00865
Train [72][3130/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.014)	Loss 2.2938 (2.4466)	Entropy 0.98891 (0.99343)	Top-1 acc 71.484 (65.212)	Top-5 acc 85.938 (84.587)	lr 0.00865
Train [72][3140/3239]	Time 0.393 (0.628)	Data Time 0.002 (0.014)	Loss 2.5725 (2.4466)	Entropy 0.98890 (0.99342)	Top-1 acc 63.281 (65.212)	Top-5 acc 83.984 (84.591)	lr 0.00865
Train [72][3150/3239]	Time 0.218 (0.628)	Data Time 0.001 (0.014)	Loss 2.4427 (2.4465)	Entropy 0.98890 (0.99340)	Top-1 acc 65.625 (65.212)	Top-5 acc 83.984 (84.594)	lr 0.00865
Train [72][3160/3239]	Time 0.233 (0.627)	Data Time 0.002 (0.014)	Loss 2.5066 (2.4465)	Entropy 0.98889 (0.99339)	Top-1 acc 64.062 (65.215)	Top-5 acc 80.859 (84.592)	lr 0.00864
Train [72][3170/3239]	Time 0.237 (0.627)	Data Time 0.004 (0.014)	Loss 2.2691 (2.4466)	Entropy 0.98885 (0.99337)	Top-1 acc 68.750 (65.215)	Top-5 acc 90.234 (84.593)	lr 0.00864
Train [72][3180/3239]	Time 0.351 (0.626)	Data Time 0.000 (0.014)	Loss 2.5084 (2.4466)	Entropy 0.98882 (0.99336)	Top-1 acc 62.500 (65.217)	Top-5 acc 83.984 (84.593)	lr 0.00864
Train [72][3190/3239]	Time 0.221 (0.626)	Data Time 0.000 (0.014)	Loss 2.3594 (2.4465)	Entropy 0.98875 (0.99334)	Top-1 acc 65.234 (65.217)	Top-5 acc 87.500 (84.592)	lr 0.00864
Train [72][3200/3239]	Time 0.238 (0.625)	Data Time 0.000 (0.014)	Loss 2.5123 (2.4466)	Entropy 0.98875 (0.99333)	Top-1 acc 63.672 (65.212)	Top-5 acc 84.375 (84.592)	lr 0.00864
Train [72][3210/3239]	Time 0.228 (0.625)	Data Time 0.000 (0.014)	Loss 2.4074 (2.4467)	Entropy 0.98879 (0.99332)	Top-1 acc 67.188 (65.211)	Top-5 acc 87.109 (84.591)	lr 0.00864
Train [72][3220/3239]	Time 0.240 (0.624)	Data Time 0.000 (0.014)	Loss 2.3472 (2.4466)	Entropy 0.98871 (0.99330)	Top-1 acc 67.578 (65.209)	Top-5 acc 87.500 (84.595)	lr 0.00864
Train [72][3230/3239]	Time 0.235 (0.624)	Data Time 0.000 (0.014)	Loss 2.5266 (2.4467)	Entropy 0.98865 (0.99329)	Top-1 acc 63.672 (65.206)	Top-5 acc 80.078 (84.594)	lr 0.00864
Train [72][3239/3239]	Time 2.311 (0.623)	Data Time 0.000 (0.014)	Loss 2.6970 (2.4467)	Entropy 0.98865 (0.99327)	Top-1 acc 61.728 (65.207)	Top-5 acc 75.309 (84.593)	lr 0.00864
==========Valid [72/120]	loss 1.349	top-1 acc 69.246 (69.246)	top-5 acc 87.884	Train top-1 65.207	top-5 84.593	Entropy 0.98865	Latency-None: 0.000ms	Flops: 546.53M
Train [73][0/3239]	Time 42.486 (42.486)	Data Time 40.787 (40.787)	Loss 2.2820 (2.2820)	Entropy 0.98854 (0.98854)	Top-1 acc 67.188 (67.188)	Top-5 acc 87.109 (87.109)	lr 0.00864
Train [73][10/3239]	Time 2.709 (4.381)	Data Time 0.003 (3.712)	Loss 2.3917 (2.4450)	Entropy 0.98854 (0.98854)	Top-1 acc 65.625 (65.305)	Top-5 acc 86.328 (84.624)	lr 0.00864
Train [73][20/3239]	Time 0.236 (2.414)	Data Time 0.001 (1.945)	Loss 2.5661 (2.4539)	Entropy 0.98854 (0.98854)	Top-1 acc 63.672 (65.141)	Top-5 acc 83.594 (84.505)	lr 0.00864
Train [73][30/3239]	Time 0.231 (1.788)	Data Time 0.002 (1.318)	Loss 2.4283 (2.4366)	Entropy 0.98851 (0.98853)	Top-1 acc 64.453 (65.461)	Top-5 acc 83.984 (84.539)	lr 0.00863
Train [73][40/3239]	Time 0.235 (1.470)	Data Time 0.002 (0.997)	Loss 2.4758 (2.4321)	Entropy 0.98843 (0.98851)	Top-1 acc 64.062 (65.701)	Top-5 acc 85.547 (84.832)	lr 0.00863
Train [73][50/3239]	Time 0.229 (1.275)	Data Time 0.001 (0.802)	Loss 2.4854 (2.4299)	Entropy 0.98839 (0.98849)	Top-1 acc 64.844 (65.740)	Top-5 acc 83.203 (84.942)	lr 0.00863
Train [73][60/3239]	Time 0.226 (1.143)	Data Time 0.001 (0.671)	Loss 2.3677 (2.4306)	Entropy 0.98837 (0.98847)	Top-1 acc 66.016 (65.657)	Top-5 acc 84.766 (84.932)	lr 0.00863
Train [73][70/3239]	Time 0.239 (1.049)	Data Time 0.001 (0.577)	Loss 2.4659 (2.4323)	Entropy 0.98835 (0.98846)	Top-1 acc 64.844 (65.697)	Top-5 acc 83.203 (84.892)	lr 0.00863
Train [73][80/3239]	Time 0.319 (0.977)	Data Time 0.001 (0.506)	Loss 2.4376 (2.4273)	Entropy 0.98831 (0.98844)	Top-1 acc 61.719 (65.750)	Top-5 acc 85.156 (85.012)	lr 0.00863
Train [73][90/3239]	Time 0.241 (0.922)	Data Time 0.001 (0.450)	Loss 2.5553 (2.4317)	Entropy 0.98828 (0.98843)	Top-1 acc 64.453 (65.719)	Top-5 acc 82.422 (84.869)	lr 0.00863
Train [73][100/3239]	Time 0.217 (0.878)	Data Time 0.001 (0.406)	Loss 2.4325 (2.4294)	Entropy 0.98826 (0.98841)	Top-1 acc 64.062 (65.880)	Top-5 acc 87.500 (84.916)	lr 0.00863
Train [73][110/3239]	Time 0.268 (0.841)	Data Time 0.003 (0.369)	Loss 2.4061 (2.4297)	Entropy 0.98825 (0.98840)	Top-1 acc 66.797 (65.875)	Top-5 acc 85.938 (84.945)	lr 0.00863
Train [73][120/3239]	Time 2.641 (0.812)	Data Time 0.001 (0.339)	Loss 2.4154 (2.4286)	Entropy 0.98825 (0.98839)	Top-1 acc 65.234 (65.916)	Top-5 acc 87.500 (84.933)	lr 0.00863
Train [73][130/3239]	Time 0.275 (0.769)	Data Time 0.001 (0.313)	Loss 2.4846 (2.4278)	Entropy 0.98818 (0.98837)	Top-1 acc 66.797 (65.998)	Top-5 acc 82.812 (84.933)	lr 0.00862
Train [73][140/3239]	Time 0.237 (0.749)	Data Time 0.001 (0.291)	Loss 2.3271 (2.4205)	Entropy 0.98818 (0.98836)	Top-1 acc 69.141 (66.102)	Top-5 acc 87.500 (85.040)	lr 0.00862
Train [73][150/3239]	Time 0.240 (0.731)	Data Time 0.001 (0.272)	Loss 2.2201 (2.4224)	Entropy 0.98813 (0.98834)	Top-1 acc 69.531 (65.995)	Top-5 acc 87.500 (85.006)	lr 0.00862
Train [73][160/3239]	Time 0.230 (0.715)	Data Time 0.001 (0.255)	Loss 2.3062 (2.4220)	Entropy 0.98809 (0.98833)	Top-1 acc 67.578 (65.926)	Top-5 acc 88.281 (85.016)	lr 0.00862
Train [73][170/3239]	Time 0.164 (0.701)	Data Time 0.001 (0.240)	Loss 2.6304 (2.4229)	Entropy 0.98805 (0.98831)	Top-1 acc 64.062 (65.906)	Top-5 acc 80.859 (85.017)	lr 0.00862
Train [73][180/3239]	Time 0.228 (0.689)	Data Time 0.001 (0.227)	Loss 2.2974 (2.4210)	Entropy 0.98808 (0.98830)	Top-1 acc 66.797 (65.985)	Top-5 acc 86.328 (85.053)	lr 0.00862
Train [73][190/3239]	Time 0.239 (0.677)	Data Time 0.001 (0.215)	Loss 2.2711 (2.4291)	Entropy 0.98804 (0.98829)	Top-1 acc 67.969 (65.758)	Top-5 acc 88.672 (84.925)	lr 0.00862
Train [73][200/3239]	Time 0.167 (0.667)	Data Time 0.001 (0.205)	Loss 2.3166 (2.4325)	Entropy 0.98791 (0.98827)	Top-1 acc 67.188 (65.664)	Top-5 acc 87.109 (84.915)	lr 0.00862
Train [73][210/3239]	Time 0.581 (0.913)	Data Time 0.003 (0.195)	Loss 2.4160 (2.4306)	Entropy 0.98784 (0.98825)	Top-1 acc 66.016 (65.719)	Top-5 acc 86.328 (84.960)	lr 0.00862
Train [73][220/3239]	Time 0.235 (0.897)	Data Time 0.002 (0.187)	Loss 2.4695 (2.4280)	Entropy 0.98781 (0.98823)	Top-1 acc 63.672 (65.768)	Top-5 acc 85.156 (85.040)	lr 0.00862
Train [73][230/3239]	Time 2.504 (0.879)	Data Time 0.002 (0.179)	Loss 2.3125 (2.4263)	Entropy 0.98781 (0.98822)	Top-1 acc 69.531 (65.770)	Top-5 acc 86.328 (85.056)	lr 0.00862
Train [73][240/3239]	Time 0.235 (0.852)	Data Time 0.002 (0.171)	Loss 2.4024 (2.4254)	Entropy 0.98779 (0.98820)	Top-1 acc 66.406 (65.815)	Top-5 acc 83.203 (85.051)	lr 0.00861
Train [73][250/3239]	Time 0.233 (0.837)	Data Time 0.001 (0.165)	Loss 2.3799 (2.4264)	Entropy 0.98777 (0.98818)	Top-1 acc 67.969 (65.754)	Top-5 acc 85.156 (85.021)	lr 0.00861
Train [73][260/3239]	Time 0.236 (0.823)	Data Time 0.001 (0.158)	Loss 2.4952 (2.4248)	Entropy 0.98772 (0.98816)	Top-1 acc 63.281 (65.830)	Top-5 acc 83.594 (85.031)	lr 0.00861
Train [73][270/3239]	Time 0.201 (0.810)	Data Time 0.002 (0.153)	Loss 2.3262 (2.4261)	Entropy 0.98765 (0.98815)	Top-1 acc 68.359 (65.817)	Top-5 acc 85.547 (85.011)	lr 0.00861
Train [73][280/3239]	Time 0.241 (0.798)	Data Time 0.001 (0.147)	Loss 2.4731 (2.4270)	Entropy 0.98758 (0.98813)	Top-1 acc 64.062 (65.804)	Top-5 acc 85.156 (84.977)	lr 0.00861
Train [73][290/3239]	Time 0.208 (0.786)	Data Time 0.001 (0.142)	Loss 2.2382 (2.4246)	Entropy 0.98749 (0.98811)	Top-1 acc 71.484 (65.849)	Top-5 acc 89.062 (85.026)	lr 0.00861
Train [73][300/3239]	Time 0.349 (0.777)	Data Time 0.001 (0.138)	Loss 2.3444 (2.4230)	Entropy 0.98748 (0.98809)	Top-1 acc 67.969 (65.872)	Top-5 acc 85.547 (85.043)	lr 0.00861
Train [73][310/3239]	Time 0.233 (0.768)	Data Time 0.001 (0.133)	Loss 2.4341 (2.4222)	Entropy 0.98752 (0.98807)	Top-1 acc 67.969 (65.904)	Top-5 acc 83.203 (85.024)	lr 0.00861
Train [73][320/3239]	Time 0.239 (0.760)	Data Time 0.001 (0.129)	Loss 2.6205 (2.4230)	Entropy 0.98753 (0.98805)	Top-1 acc 60.938 (65.860)	Top-5 acc 82.812 (85.010)	lr 0.00861
Train [73][330/3239]	Time 0.288 (0.753)	Data Time 0.001 (0.125)	Loss 2.3316 (2.4225)	Entropy 0.98748 (0.98803)	Top-1 acc 68.750 (65.866)	Top-5 acc 87.500 (85.026)	lr 0.00861
Train [73][340/3239]	Time 2.554 (0.745)	Data Time 0.002 (0.122)	Loss 2.5483 (2.4238)	Entropy 0.98748 (0.98802)	Top-1 acc 63.281 (65.839)	Top-5 acc 82.812 (84.996)	lr 0.00860
Train [73][350/3239]	Time 0.231 (0.730)	Data Time 0.001 (0.118)	Loss 2.3490 (2.4240)	Entropy 0.98748 (0.98800)	Top-1 acc 67.969 (65.815)	Top-5 acc 88.281 (84.996)	lr 0.00860
Train [73][360/3239]	Time 0.232 (0.723)	Data Time 0.001 (0.115)	Loss 2.2918 (2.4247)	Entropy 0.98743 (0.98799)	Top-1 acc 68.750 (65.811)	Top-5 acc 87.500 (84.979)	lr 0.00860
Train [73][370/3239]	Time 0.270 (0.716)	Data Time 0.001 (0.112)	Loss 2.4634 (2.4252)	Entropy 0.98735 (0.98797)	Top-1 acc 65.625 (65.815)	Top-5 acc 85.938 (84.972)	lr 0.00860
Train [73][380/3239]	Time 0.239 (0.711)	Data Time 0.001 (0.109)	Loss 2.5116 (2.4247)	Entropy 0.98736 (0.98795)	Top-1 acc 64.844 (65.829)	Top-5 acc 86.328 (84.998)	lr 0.00860
Train [73][390/3239]	Time 0.351 (0.705)	Data Time 0.001 (0.106)	Loss 2.3467 (2.4231)	Entropy 0.98735 (0.98794)	Top-1 acc 66.797 (65.875)	Top-5 acc 87.109 (85.039)	lr 0.00860
Train [73][400/3239]	Time 0.239 (0.700)	Data Time 0.001 (0.104)	Loss 2.4004 (2.4208)	Entropy 0.98733 (0.98792)	Top-1 acc 67.578 (65.924)	Top-5 acc 86.719 (85.079)	lr 0.00860
Train [73][410/3239]	Time 0.274 (0.695)	Data Time 0.002 (0.101)	Loss 2.4981 (2.4210)	Entropy 0.98731 (0.98791)	Top-1 acc 62.109 (65.924)	Top-5 acc 84.766 (85.074)	lr 0.00860
Train [73][420/3239]	Time 0.208 (0.690)	Data Time 0.001 (0.099)	Loss 2.3539 (2.4210)	Entropy 0.98731 (0.98789)	Top-1 acc 67.969 (65.931)	Top-5 acc 88.672 (85.085)	lr 0.00860
Train [73][430/3239]	Time 0.267 (0.685)	Data Time 0.001 (0.097)	Loss 2.4351 (2.4210)	Entropy 0.98733 (0.98788)	Top-1 acc 66.797 (65.932)	Top-5 acc 83.984 (85.088)	lr 0.00860
Train [73][440/3239]	Time 0.229 (0.681)	Data Time 0.001 (0.094)	Loss 2.5416 (2.4212)	Entropy 0.98731 (0.98787)	Top-1 acc 60.547 (65.916)	Top-5 acc 82.031 (85.095)	lr 0.00860
Train [73][450/3239]	Time 2.692 (0.677)	Data Time 0.002 (0.092)	Loss 2.4549 (2.4219)	Entropy 0.98731 (0.98786)	Top-1 acc 64.062 (65.884)	Top-5 acc 85.156 (85.090)	lr 0.00859
Train [73][460/3239]	Time 0.244 (0.667)	Data Time 0.002 (0.090)	Loss 2.3884 (2.4213)	Entropy 0.98730 (0.98784)	Top-1 acc 69.141 (65.911)	Top-5 acc 87.500 (85.099)	lr 0.00859
Train [73][470/3239]	Time 0.220 (0.663)	Data Time 0.001 (0.089)	Loss 2.4007 (2.4211)	Entropy 0.98728 (0.98783)	Top-1 acc 68.359 (65.934)	Top-5 acc 82.812 (85.089)	lr 0.00859
Train [73][480/3239]	Time 0.375 (0.660)	Data Time 0.001 (0.087)	Loss 2.4857 (2.4205)	Entropy 0.98728 (0.98782)	Top-1 acc 65.625 (65.944)	Top-5 acc 86.719 (85.111)	lr 0.00859
Train [73][490/3239]	Time 0.232 (0.656)	Data Time 0.001 (0.085)	Loss 2.4175 (2.4201)	Entropy 0.98724 (0.98781)	Top-1 acc 64.453 (65.945)	Top-5 acc 86.328 (85.118)	lr 0.00859
Train [73][500/3239]	Time 0.234 (0.652)	Data Time 0.001 (0.083)	Loss 2.3809 (2.4196)	Entropy 0.98723 (0.98780)	Top-1 acc 69.531 (65.972)	Top-5 acc 86.719 (85.131)	lr 0.00859
Train [73][510/3239]	Time 0.226 (0.649)	Data Time 0.001 (0.082)	Loss 2.4927 (2.4193)	Entropy 0.98715 (0.98779)	Top-1 acc 64.062 (65.969)	Top-5 acc 85.156 (85.146)	lr 0.00859
Train [73][520/3239]	Time 0.262 (0.645)	Data Time 0.001 (0.080)	Loss 2.6880 (2.4199)	Entropy 0.98699 (0.98777)	Top-1 acc 58.984 (65.953)	Top-5 acc 81.641 (85.135)	lr 0.00859
Train [73][530/3239]	Time 0.223 (0.642)	Data Time 0.001 (0.079)	Loss 2.3244 (2.4194)	Entropy 0.98697 (0.98776)	Top-1 acc 67.969 (65.962)	Top-5 acc 86.328 (85.149)	lr 0.00859
Train [73][540/3239]	Time 0.211 (0.639)	Data Time 0.001 (0.077)	Loss 2.5228 (2.4194)	Entropy 0.98697 (0.98774)	Top-1 acc 62.500 (65.946)	Top-5 acc 82.031 (85.149)	lr 0.00859
Train [73][550/3239]	Time 0.238 (0.636)	Data Time 0.001 (0.076)	Loss 2.3627 (2.4196)	Entropy 0.98695 (0.98773)	Top-1 acc 70.312 (65.950)	Top-5 acc 85.938 (85.156)	lr 0.00858
Train [73][560/3239]	Time 2.557 (0.633)	Data Time 0.001 (0.075)	Loss 2.3519 (2.4192)	Entropy 0.98695 (0.98771)	Top-1 acc 67.969 (65.954)	Top-5 acc 85.547 (85.158)	lr 0.00858
Train [73][570/3239]	Time 0.243 (0.626)	Data Time 0.001 (0.073)	Loss 2.2950 (2.4199)	Entropy 0.98695 (0.98770)	Top-1 acc 69.922 (65.940)	Top-5 acc 86.328 (85.138)	lr 0.00858
Train [73][580/3239]	Time 0.418 (0.713)	Data Time 0.002 (0.072)	Loss 2.1625 (2.4200)	Entropy 0.98695 (0.98769)	Top-1 acc 69.531 (65.921)	Top-5 acc 91.016 (85.141)	lr 0.00858
Train [73][590/3239]	Time 0.231 (0.710)	Data Time 0.002 (0.071)	Loss 2.4850 (2.4208)	Entropy 0.98694 (0.98768)	Top-1 acc 65.234 (65.886)	Top-5 acc 82.422 (85.118)	lr 0.00858
Train [73][600/3239]	Time 0.223 (0.706)	Data Time 0.001 (0.070)	Loss 2.4331 (2.4206)	Entropy 0.98690 (0.98766)	Top-1 acc 64.453 (65.871)	Top-5 acc 85.547 (85.128)	lr 0.00858
Train [73][610/3239]	Time 0.326 (0.703)	Data Time 0.002 (0.069)	Loss 2.5044 (2.4204)	Entropy 0.98685 (0.98765)	Top-1 acc 61.719 (65.892)	Top-5 acc 82.812 (85.134)	lr 0.00858
Train [73][620/3239]	Time 0.242 (0.699)	Data Time 0.001 (0.068)	Loss 2.2431 (2.4209)	Entropy 0.98684 (0.98764)	Top-1 acc 67.969 (65.884)	Top-5 acc 89.062 (85.119)	lr 0.00858
Train [73][630/3239]	Time 0.230 (0.695)	Data Time 0.001 (0.066)	Loss 2.3470 (2.4209)	Entropy 0.98678 (0.98762)	Top-1 acc 66.797 (65.888)	Top-5 acc 85.938 (85.125)	lr 0.00858
Train [73][640/3239]	Time 0.225 (0.692)	Data Time 0.002 (0.065)	Loss 2.3611 (2.4216)	Entropy 0.98679 (0.98761)	Top-1 acc 66.406 (65.876)	Top-5 acc 85.938 (85.101)	lr 0.00858
Train [73][650/3239]	Time 0.229 (0.688)	Data Time 0.002 (0.065)	Loss 2.4744 (2.4220)	Entropy 0.98678 (0.98760)	Top-1 acc 63.672 (65.869)	Top-5 acc 83.984 (85.090)	lr 0.00857
Train [73][660/3239]	Time 0.226 (0.685)	Data Time 0.001 (0.064)	Loss 2.3747 (2.4218)	Entropy 0.98678 (0.98759)	Top-1 acc 69.141 (65.868)	Top-5 acc 84.766 (85.090)	lr 0.00857
Train [73][670/3239]	Time 2.518 (0.682)	Data Time 0.001 (0.063)	Loss 2.4431 (2.4219)	Entropy 0.98678 (0.98757)	Top-1 acc 64.453 (65.868)	Top-5 acc 83.984 (85.088)	lr 0.00857
Train [73][680/3239]	Time 0.235 (0.675)	Data Time 0.001 (0.062)	Loss 2.4358 (2.4206)	Entropy 0.98676 (0.98756)	Top-1 acc 64.844 (65.895)	Top-5 acc 84.766 (85.110)	lr 0.00857
Train [73][690/3239]	Time 0.218 (0.672)	Data Time 0.001 (0.061)	Loss 2.3792 (2.4204)	Entropy 0.98675 (0.98755)	Top-1 acc 66.406 (65.883)	Top-5 acc 84.375 (85.113)	lr 0.00857
Train [73][700/3239]	Time 0.239 (0.670)	Data Time 0.001 (0.060)	Loss 2.4341 (2.4198)	Entropy 0.98664 (0.98754)	Top-1 acc 64.844 (65.903)	Top-5 acc 83.203 (85.116)	lr 0.00857
Train [73][710/3239]	Time 0.234 (0.667)	Data Time 0.001 (0.059)	Loss 2.3749 (2.4195)	Entropy 0.98668 (0.98753)	Top-1 acc 67.188 (65.908)	Top-5 acc 86.328 (85.126)	lr 0.00857
Train [73][720/3239]	Time 0.221 (0.664)	Data Time 0.001 (0.058)	Loss 2.3497 (2.4191)	Entropy 0.98666 (0.98751)	Top-1 acc 65.234 (65.912)	Top-5 acc 86.328 (85.143)	lr 0.00857
Train [73][730/3239]	Time 0.247 (0.662)	Data Time 0.001 (0.058)	Loss 2.3576 (2.4195)	Entropy 0.98662 (0.98750)	Top-1 acc 69.922 (65.903)	Top-5 acc 85.156 (85.135)	lr 0.00857
Train [73][740/3239]	Time 0.216 (0.659)	Data Time 0.001 (0.057)	Loss 2.2372 (2.4198)	Entropy 0.98659 (0.98749)	Top-1 acc 71.875 (65.886)	Top-5 acc 90.234 (85.137)	lr 0.00857
Train [73][750/3239]	Time 0.345 (0.657)	Data Time 0.001 (0.056)	Loss 2.3482 (2.4196)	Entropy 0.98661 (0.98748)	Top-1 acc 66.016 (65.890)	Top-5 acc 87.500 (85.142)	lr 0.00857
Train [73][760/3239]	Time 0.224 (0.654)	Data Time 0.002 (0.055)	Loss 2.2480 (2.4189)	Entropy 0.98656 (0.98747)	Top-1 acc 70.703 (65.887)	Top-5 acc 86.328 (85.153)	lr 0.00856
Train [73][770/3239]	Time 0.253 (0.652)	Data Time 0.001 (0.055)	Loss 2.4627 (2.4202)	Entropy 0.98646 (0.98745)	Top-1 acc 67.578 (65.874)	Top-5 acc 82.812 (85.127)	lr 0.00856
Train [73][780/3239]	Time 2.476 (0.649)	Data Time 0.002 (0.054)	Loss 2.4306 (2.4200)	Entropy 0.98646 (0.98744)	Top-1 acc 64.453 (65.883)	Top-5 acc 83.203 (85.135)	lr 0.00856
Train [73][790/3239]	Time 0.261 (0.644)	Data Time 0.001 (0.053)	Loss 2.3173 (2.4205)	Entropy 0.98644 (0.98743)	Top-1 acc 69.141 (65.867)	Top-5 acc 86.328 (85.128)	lr 0.00856
Train [73][800/3239]	Time 0.217 (0.642)	Data Time 0.001 (0.053)	Loss 2.3085 (2.4199)	Entropy 0.98641 (0.98742)	Top-1 acc 66.797 (65.883)	Top-5 acc 88.281 (85.133)	lr 0.00856
Train [73][810/3239]	Time 0.232 (0.640)	Data Time 0.001 (0.052)	Loss 2.7179 (2.4203)	Entropy 0.98639 (0.98740)	Top-1 acc 59.375 (65.875)	Top-5 acc 79.688 (85.132)	lr 0.00856
Train [73][820/3239]	Time 0.215 (0.638)	Data Time 0.001 (0.051)	Loss 2.4706 (2.4206)	Entropy 0.98638 (0.98739)	Top-1 acc 65.234 (65.872)	Top-5 acc 84.375 (85.124)	lr 0.00856
Train [73][830/3239]	Time 0.238 (0.636)	Data Time 0.002 (0.051)	Loss 2.3609 (2.4203)	Entropy 0.98635 (0.98738)	Top-1 acc 70.312 (65.891)	Top-5 acc 84.375 (85.123)	lr 0.00856
Train [73][840/3239]	Time 0.322 (0.634)	Data Time 0.001 (0.050)	Loss 2.7245 (2.4205)	Entropy 0.98625 (0.98737)	Top-1 acc 57.031 (65.883)	Top-5 acc 78.906 (85.117)	lr 0.00856
Train [73][850/3239]	Time 0.208 (0.632)	Data Time 0.001 (0.050)	Loss 2.5154 (2.4210)	Entropy 0.98620 (0.98735)	Top-1 acc 65.234 (65.870)	Top-5 acc 83.203 (85.112)	lr 0.00856
Train [73][860/3239]	Time 0.223 (0.630)	Data Time 0.001 (0.049)	Loss 2.5482 (2.4212)	Entropy 0.98623 (0.98734)	Top-1 acc 62.500 (65.861)	Top-5 acc 82.812 (85.106)	lr 0.00855
Train [73][870/3239]	Time 0.219 (0.628)	Data Time 0.001 (0.049)	Loss 2.2805 (2.4212)	Entropy 0.98622 (0.98733)	Top-1 acc 69.531 (65.862)	Top-5 acc 88.672 (85.114)	lr 0.00855
Train [73][880/3239]	Time 0.322 (0.626)	Data Time 0.001 (0.048)	Loss 2.3855 (2.4209)	Entropy 0.98621 (0.98731)	Top-1 acc 64.844 (65.869)	Top-5 acc 86.719 (85.119)	lr 0.00855
Train [73][890/3239]	Time 2.556 (0.625)	Data Time 0.001 (0.048)	Loss 2.4662 (2.4207)	Entropy 0.98621 (0.98730)	Top-1 acc 65.234 (65.886)	Top-5 acc 85.547 (85.123)	lr 0.00855
Train [73][900/3239]	Time 0.218 (0.620)	Data Time 0.001 (0.047)	Loss 2.3930 (2.4211)	Entropy 0.98622 (0.98729)	Top-1 acc 67.188 (65.882)	Top-5 acc 85.547 (85.116)	lr 0.00855
Train [73][910/3239]	Time 0.237 (0.619)	Data Time 0.001 (0.047)	Loss 2.4078 (2.4216)	Entropy 0.98620 (0.98728)	Top-1 acc 64.844 (65.867)	Top-5 acc 85.156 (85.110)	lr 0.00855
Train [73][920/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.046)	Loss 2.4319 (2.4214)	Entropy 0.98612 (0.98727)	Top-1 acc 60.938 (65.860)	Top-5 acc 87.500 (85.122)	lr 0.00855
Train [73][930/3239]	Time 0.228 (0.616)	Data Time 0.001 (0.046)	Loss 2.5515 (2.4213)	Entropy 0.98594 (0.98725)	Top-1 acc 65.234 (65.857)	Top-5 acc 83.594 (85.130)	lr 0.00855
Train [73][940/3239]	Time 0.370 (0.668)	Data Time 0.004 (0.045)	Loss 2.3923 (2.4212)	Entropy 0.98592 (0.98724)	Top-1 acc 67.188 (65.868)	Top-5 acc 83.984 (85.125)	lr 0.00855
Train [73][950/3239]	Time 0.240 (0.668)	Data Time 0.002 (0.045)	Loss 2.3635 (2.4210)	Entropy 0.98592 (0.98722)	Top-1 acc 66.406 (65.874)	Top-5 acc 87.109 (85.131)	lr 0.00855
Train [73][960/3239]	Time 0.244 (0.666)	Data Time 0.002 (0.044)	Loss 2.3199 (2.4206)	Entropy 0.98589 (0.98721)	Top-1 acc 65.625 (65.873)	Top-5 acc 85.938 (85.139)	lr 0.00855
Train [73][970/3239]	Time 0.319 (0.664)	Data Time 0.002 (0.044)	Loss 2.4065 (2.4203)	Entropy 0.98587 (0.98720)	Top-1 acc 64.062 (65.879)	Top-5 acc 84.766 (85.133)	lr 0.00854
Train [73][980/3239]	Time 0.214 (0.662)	Data Time 0.001 (0.044)	Loss 2.4035 (2.4204)	Entropy 0.98583 (0.98718)	Top-1 acc 68.359 (65.878)	Top-5 acc 87.109 (85.144)	lr 0.00854
Train [73][990/3239]	Time 0.246 (0.660)	Data Time 0.001 (0.043)	Loss 2.4348 (2.4203)	Entropy 0.98577 (0.98717)	Top-1 acc 63.672 (65.882)	Top-5 acc 84.375 (85.140)	lr 0.00854
Train [73][1000/3239]	Time 2.539 (0.658)	Data Time 0.001 (0.043)	Loss 2.4419 (2.4200)	Entropy 0.98577 (0.98716)	Top-1 acc 62.891 (65.888)	Top-5 acc 85.938 (85.148)	lr 0.00854
Train [73][1010/3239]	Time 0.243 (0.654)	Data Time 0.001 (0.042)	Loss 2.4501 (2.4206)	Entropy 0.98565 (0.98714)	Top-1 acc 64.062 (65.873)	Top-5 acc 83.594 (85.136)	lr 0.00854
Train [73][1020/3239]	Time 0.232 (0.653)	Data Time 0.001 (0.042)	Loss 2.4262 (2.4211)	Entropy 0.98569 (0.98713)	Top-1 acc 66.016 (65.857)	Top-5 acc 84.375 (85.128)	lr 0.00854
Train [73][1030/3239]	Time 0.236 (0.651)	Data Time 0.001 (0.042)	Loss 2.2308 (2.4207)	Entropy 0.98556 (0.98711)	Top-1 acc 70.703 (65.868)	Top-5 acc 88.281 (85.134)	lr 0.00854
Train [73][1040/3239]	Time 0.250 (0.649)	Data Time 0.002 (0.041)	Loss 2.7281 (2.4212)	Entropy 0.98555 (0.98710)	Top-1 acc 55.078 (65.836)	Top-5 acc 81.250 (85.131)	lr 0.00854
Train [73][1050/3239]	Time 0.226 (0.647)	Data Time 0.001 (0.041)	Loss 2.4817 (2.4209)	Entropy 0.98548 (0.98708)	Top-1 acc 64.062 (65.839)	Top-5 acc 82.812 (85.137)	lr 0.00854
Train [73][1060/3239]	Time 0.328 (0.645)	Data Time 0.001 (0.040)	Loss 2.5129 (2.4213)	Entropy 0.98550 (0.98707)	Top-1 acc 62.891 (65.829)	Top-5 acc 82.812 (85.128)	lr 0.00854
Train [73][1070/3239]	Time 0.241 (0.644)	Data Time 0.001 (0.040)	Loss 2.5166 (2.4215)	Entropy 0.98557 (0.98705)	Top-1 acc 64.453 (65.820)	Top-5 acc 84.375 (85.123)	lr 0.00853
Train [73][1080/3239]	Time 0.264 (0.642)	Data Time 0.003 (0.040)	Loss 2.5375 (2.4218)	Entropy 0.98549 (0.98704)	Top-1 acc 64.062 (65.816)	Top-5 acc 82.422 (85.117)	lr 0.00853
Train [73][1090/3239]	Time 0.209 (0.641)	Data Time 0.001 (0.039)	Loss 2.3480 (2.4218)	Entropy 0.98545 (0.98702)	Top-1 acc 66.797 (65.814)	Top-5 acc 86.328 (85.118)	lr 0.00853
Train [73][1100/3239]	Time 0.251 (0.639)	Data Time 0.001 (0.039)	Loss 2.4327 (2.4219)	Entropy 0.98542 (0.98701)	Top-1 acc 65.625 (65.811)	Top-5 acc 85.938 (85.117)	lr 0.00853
Train [73][1110/3239]	Time 2.476 (0.638)	Data Time 0.001 (0.039)	Loss 2.3490 (2.4218)	Entropy 0.98542 (0.98699)	Top-1 acc 71.094 (65.813)	Top-5 acc 87.500 (85.116)	lr 0.00853
Train [73][1120/3239]	Time 0.238 (0.634)	Data Time 0.001 (0.038)	Loss 2.4335 (2.4215)	Entropy 0.98543 (0.98698)	Top-1 acc 68.750 (65.824)	Top-5 acc 86.328 (85.124)	lr 0.00853
Train [73][1130/3239]	Time 0.241 (0.633)	Data Time 0.001 (0.038)	Loss 2.3988 (2.4213)	Entropy 0.98543 (0.98697)	Top-1 acc 64.453 (65.833)	Top-5 acc 85.938 (85.128)	lr 0.00853
Train [73][1140/3239]	Time 0.220 (0.631)	Data Time 0.001 (0.038)	Loss 2.4392 (2.4221)	Entropy 0.98542 (0.98695)	Top-1 acc 67.969 (65.809)	Top-5 acc 81.641 (85.109)	lr 0.00853
Train [73][1150/3239]	Time 0.314 (0.630)	Data Time 0.001 (0.037)	Loss 2.4267 (2.4224)	Entropy 0.98543 (0.98694)	Top-1 acc 62.109 (65.795)	Top-5 acc 83.203 (85.103)	lr 0.00853
Train [73][1160/3239]	Time 0.239 (0.629)	Data Time 0.001 (0.037)	Loss 2.2742 (2.4228)	Entropy 0.98548 (0.98693)	Top-1 acc 68.359 (65.776)	Top-5 acc 87.500 (85.095)	lr 0.00853
Train [73][1170/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.037)	Loss 2.3525 (2.4229)	Entropy 0.98543 (0.98692)	Top-1 acc 68.359 (65.777)	Top-5 acc 87.500 (85.092)	lr 0.00853
Train [73][1180/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.036)	Loss 2.5114 (2.4228)	Entropy 0.98581 (0.98690)	Top-1 acc 64.844 (65.785)	Top-5 acc 83.203 (85.097)	lr 0.00852
Train [73][1190/3239]	Time 0.232 (0.625)	Data Time 0.002 (0.036)	Loss 2.4899 (2.4230)	Entropy 0.98583 (0.98689)	Top-1 acc 64.844 (65.776)	Top-5 acc 82.422 (85.091)	lr 0.00852
Train [73][1200/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.036)	Loss 2.4415 (2.4229)	Entropy 0.98581 (0.98689)	Top-1 acc 65.234 (65.776)	Top-5 acc 83.984 (85.091)	lr 0.00852
Train [73][1210/3239]	Time 0.237 (0.622)	Data Time 0.001 (0.036)	Loss 2.2527 (2.4226)	Entropy 0.98582 (0.98688)	Top-1 acc 68.750 (65.783)	Top-5 acc 87.109 (85.095)	lr 0.00852
Train [73][1220/3239]	Time 2.534 (0.621)	Data Time 0.001 (0.035)	Loss 2.5655 (2.4227)	Entropy 0.98582 (0.98687)	Top-1 acc 62.500 (65.785)	Top-5 acc 80.078 (85.096)	lr 0.00852
Train [73][1230/3239]	Time 0.242 (0.618)	Data Time 0.001 (0.035)	Loss 2.5186 (2.4231)	Entropy 0.98575 (0.98686)	Top-1 acc 63.281 (65.777)	Top-5 acc 82.031 (85.089)	lr 0.00852
Train [73][1240/3239]	Time 0.324 (0.617)	Data Time 0.001 (0.035)	Loss 2.3895 (2.4230)	Entropy 0.98576 (0.98685)	Top-1 acc 69.141 (65.780)	Top-5 acc 85.938 (85.091)	lr 0.00852
Train [73][1250/3239]	Time 0.227 (0.616)	Data Time 0.001 (0.035)	Loss 2.6023 (2.4232)	Entropy 0.98570 (0.98684)	Top-1 acc 61.719 (65.776)	Top-5 acc 83.594 (85.088)	lr 0.00852
Train [73][1260/3239]	Time 0.225 (0.614)	Data Time 0.001 (0.034)	Loss 2.4364 (2.4234)	Entropy 0.98572 (0.98683)	Top-1 acc 66.016 (65.773)	Top-5 acc 84.766 (85.080)	lr 0.00852
Train [73][1270/3239]	Time 0.233 (0.613)	Data Time 0.001 (0.034)	Loss 2.3791 (2.4237)	Entropy 0.98568 (0.98682)	Top-1 acc 63.672 (65.769)	Top-5 acc 86.328 (85.076)	lr 0.00852
Train [73][1280/3239]	Time 0.227 (0.612)	Data Time 0.001 (0.034)	Loss 2.3564 (2.4239)	Entropy 0.98573 (0.98681)	Top-1 acc 68.750 (65.771)	Top-5 acc 85.938 (85.074)	lr 0.00851
Train [73][1290/3239]	Time 0.235 (0.611)	Data Time 0.001 (0.033)	Loss 2.3848 (2.4235)	Entropy 0.98568 (0.98681)	Top-1 acc 68.750 (65.784)	Top-5 acc 85.156 (85.082)	lr 0.00851
Train [73][1300/3239]	Time 0.296 (0.650)	Data Time 0.003 (0.033)	Loss 2.3569 (2.4231)	Entropy 0.98560 (0.98680)	Top-1 acc 68.359 (65.797)	Top-5 acc 89.062 (85.093)	lr 0.00851
Train [73][1310/3239]	Time 0.240 (0.649)	Data Time 0.002 (0.033)	Loss 2.3174 (2.4235)	Entropy 0.98556 (0.98679)	Top-1 acc 67.969 (65.787)	Top-5 acc 88.672 (85.084)	lr 0.00851
Train [73][1320/3239]	Time 0.268 (0.648)	Data Time 0.001 (0.033)	Loss 2.4053 (2.4237)	Entropy 0.98553 (0.98678)	Top-1 acc 68.359 (65.784)	Top-5 acc 85.156 (85.076)	lr 0.00851
Train [73][1330/3239]	Time 2.631 (0.647)	Data Time 0.002 (0.033)	Loss 2.3845 (2.4235)	Entropy 0.98553 (0.98677)	Top-1 acc 64.844 (65.787)	Top-5 acc 85.938 (85.081)	lr 0.00851
Train [73][1340/3239]	Time 0.260 (0.644)	Data Time 0.001 (0.032)	Loss 2.3437 (2.4232)	Entropy 0.98553 (0.98676)	Top-1 acc 66.797 (65.799)	Top-5 acc 87.109 (85.089)	lr 0.00851
Train [73][1350/3239]	Time 0.221 (0.643)	Data Time 0.001 (0.032)	Loss 2.2911 (2.4230)	Entropy 0.98557 (0.98675)	Top-1 acc 69.141 (65.802)	Top-5 acc 90.234 (85.097)	lr 0.00851
Train [73][1360/3239]	Time 0.236 (0.642)	Data Time 0.001 (0.032)	Loss 2.3315 (2.4227)	Entropy 0.98553 (0.98674)	Top-1 acc 69.141 (65.813)	Top-5 acc 87.500 (85.102)	lr 0.00851
Train [73][1370/3239]	Time 0.258 (0.640)	Data Time 0.001 (0.032)	Loss 2.4155 (2.4227)	Entropy 0.98550 (0.98673)	Top-1 acc 64.453 (65.810)	Top-5 acc 87.500 (85.105)	lr 0.00851
Train [73][1380/3239]	Time 0.253 (0.639)	Data Time 0.001 (0.031)	Loss 2.4717 (2.4231)	Entropy 0.98543 (0.98672)	Top-1 acc 67.188 (65.799)	Top-5 acc 85.547 (85.098)	lr 0.00850
Train [73][1390/3239]	Time 0.210 (0.638)	Data Time 0.001 (0.031)	Loss 2.4879 (2.4240)	Entropy 0.98542 (0.98671)	Top-1 acc 63.672 (65.779)	Top-5 acc 84.766 (85.086)	lr 0.00850
Train [73][1400/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.031)	Loss 2.5063 (2.4242)	Entropy 0.98545 (0.98671)	Top-1 acc 65.625 (65.773)	Top-5 acc 83.594 (85.078)	lr 0.00850
Train [73][1410/3239]	Time 0.212 (0.636)	Data Time 0.001 (0.031)	Loss 2.4744 (2.4242)	Entropy 0.98537 (0.98670)	Top-1 acc 62.891 (65.773)	Top-5 acc 84.375 (85.077)	lr 0.00850
Train [73][1420/3239]	Time 0.228 (0.635)	Data Time 0.001 (0.031)	Loss 2.5518 (2.4247)	Entropy 0.98537 (0.98669)	Top-1 acc 65.625 (65.765)	Top-5 acc 79.688 (85.067)	lr 0.00850
Train [73][1430/3239]	Time 0.243 (0.633)	Data Time 0.001 (0.030)	Loss 2.4878 (2.4245)	Entropy 0.98553 (0.98668)	Top-1 acc 66.406 (65.775)	Top-5 acc 82.812 (85.064)	lr 0.00850
Train [73][1440/3239]	Time 2.501 (0.632)	Data Time 0.001 (0.030)	Loss 2.4304 (2.4248)	Entropy 0.98553 (0.98667)	Top-1 acc 62.500 (65.764)	Top-5 acc 85.156 (85.061)	lr 0.00850
Train [73][1450/3239]	Time 0.290 (0.630)	Data Time 0.001 (0.030)	Loss 2.4340 (2.4244)	Entropy 0.98547 (0.98666)	Top-1 acc 65.234 (65.769)	Top-5 acc 86.328 (85.072)	lr 0.00850
Train [73][1460/3239]	Time 0.213 (0.628)	Data Time 0.001 (0.030)	Loss 2.5160 (2.4244)	Entropy 0.98543 (0.98665)	Top-1 acc 63.672 (65.772)	Top-5 acc 83.594 (85.068)	lr 0.00850
Train [73][1470/3239]	Time 0.238 (0.627)	Data Time 0.001 (0.030)	Loss 2.2409 (2.4242)	Entropy 0.98539 (0.98665)	Top-1 acc 69.141 (65.772)	Top-5 acc 87.500 (85.070)	lr 0.00850
Train [73][1480/3239]	Time 0.240 (0.626)	Data Time 0.001 (0.029)	Loss 2.3367 (2.4243)	Entropy 0.98536 (0.98664)	Top-1 acc 69.141 (65.776)	Top-5 acc 86.719 (85.071)	lr 0.00850
Train [73][1490/3239]	Time 0.240 (0.625)	Data Time 0.001 (0.029)	Loss 2.5811 (2.4246)	Entropy 0.98538 (0.98663)	Top-1 acc 64.062 (65.775)	Top-5 acc 81.641 (85.060)	lr 0.00849
Train [73][1500/3239]	Time 0.255 (0.624)	Data Time 0.001 (0.029)	Loss 2.3458 (2.4242)	Entropy 0.98533 (0.98662)	Top-1 acc 67.578 (65.781)	Top-5 acc 86.719 (85.062)	lr 0.00849
Train [73][1510/3239]	Time 0.312 (0.623)	Data Time 0.001 (0.029)	Loss 2.4649 (2.4241)	Entropy 0.98531 (0.98661)	Top-1 acc 66.797 (65.781)	Top-5 acc 82.422 (85.060)	lr 0.00849
Train [73][1520/3239]	Time 0.227 (0.622)	Data Time 0.001 (0.029)	Loss 2.5391 (2.4241)	Entropy 0.98529 (0.98660)	Top-1 acc 64.062 (65.774)	Top-5 acc 81.641 (85.060)	lr 0.00849
Train [73][1530/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.029)	Loss 2.3168 (2.4240)	Entropy 0.98525 (0.98659)	Top-1 acc 66.797 (65.773)	Top-5 acc 87.109 (85.061)	lr 0.00849
Train [73][1540/3239]	Time 0.234 (0.620)	Data Time 0.001 (0.028)	Loss 2.6312 (2.4246)	Entropy 0.98523 (0.98659)	Top-1 acc 63.672 (65.769)	Top-5 acc 78.906 (85.047)	lr 0.00849
Train [73][1550/3239]	Time 2.584 (0.619)	Data Time 0.001 (0.028)	Loss 2.3952 (2.4246)	Entropy 0.98523 (0.98658)	Top-1 acc 64.844 (65.771)	Top-5 acc 86.328 (85.040)	lr 0.00849
Train [73][1560/3239]	Time 0.231 (0.617)	Data Time 0.001 (0.028)	Loss 2.4447 (2.4246)	Entropy 0.98521 (0.98657)	Top-1 acc 67.578 (65.774)	Top-5 acc 84.375 (85.038)	lr 0.00849
Train [73][1570/3239]	Time 0.227 (0.616)	Data Time 0.001 (0.028)	Loss 2.3029 (2.4247)	Entropy 0.98521 (0.98656)	Top-1 acc 72.656 (65.773)	Top-5 acc 87.109 (85.036)	lr 0.00849
Train [73][1580/3239]	Time 0.239 (0.615)	Data Time 0.001 (0.028)	Loss 2.3843 (2.4246)	Entropy 0.98519 (0.98655)	Top-1 acc 62.500 (65.775)	Top-5 acc 90.625 (85.042)	lr 0.00849
Train [73][1590/3239]	Time 0.244 (0.614)	Data Time 0.002 (0.028)	Loss 2.3567 (2.4246)	Entropy 0.98515 (0.98654)	Top-1 acc 66.406 (65.776)	Top-5 acc 83.984 (85.041)	lr 0.00848
Train [73][1600/3239]	Time 0.223 (0.613)	Data Time 0.001 (0.027)	Loss 2.6044 (2.4249)	Entropy 0.98513 (0.98653)	Top-1 acc 63.281 (65.770)	Top-5 acc 82.422 (85.040)	lr 0.00848
Train [73][1610/3239]	Time 0.225 (0.612)	Data Time 0.001 (0.027)	Loss 2.4325 (2.4249)	Entropy 0.98513 (0.98652)	Top-1 acc 68.359 (65.771)	Top-5 acc 84.375 (85.041)	lr 0.00848
Train [73][1620/3239]	Time 0.216 (0.611)	Data Time 0.001 (0.027)	Loss 2.5096 (2.4250)	Entropy 0.98509 (0.98652)	Top-1 acc 65.625 (65.776)	Top-5 acc 83.594 (85.042)	lr 0.00848
Train [73][1630/3239]	Time 0.230 (0.610)	Data Time 0.001 (0.027)	Loss 2.5094 (2.4252)	Entropy 0.98505 (0.98651)	Top-1 acc 67.188 (65.770)	Top-5 acc 82.422 (85.039)	lr 0.00848
Train [73][1640/3239]	Time 0.317 (0.610)	Data Time 0.001 (0.027)	Loss 2.3409 (2.4249)	Entropy 0.98501 (0.98650)	Top-1 acc 69.922 (65.778)	Top-5 acc 85.156 (85.049)	lr 0.00848
Train [73][1650/3239]	Time 0.289 (0.609)	Data Time 0.001 (0.027)	Loss 2.4113 (2.4253)	Entropy 0.98493 (0.98649)	Top-1 acc 65.234 (65.767)	Top-5 acc 85.156 (85.042)	lr 0.00848
Train [73][1660/3239]	Time 53.084 (0.639)	Data Time 0.001 (0.026)	Loss 2.2277 (2.4252)	Entropy 0.98493 (0.98648)	Top-1 acc 68.750 (65.767)	Top-5 acc 91.797 (85.048)	lr 0.00848
Train [73][1670/3239]	Time 0.274 (0.637)	Data Time 0.005 (0.026)	Loss 2.4456 (2.4252)	Entropy 0.98482 (0.98647)	Top-1 acc 63.672 (65.766)	Top-5 acc 85.156 (85.045)	lr 0.00848
Train [73][1680/3239]	Time 0.262 (0.636)	Data Time 0.002 (0.026)	Loss 2.4581 (2.4248)	Entropy 0.98482 (0.98646)	Top-1 acc 66.016 (65.778)	Top-5 acc 86.328 (85.055)	lr 0.00848
Train [73][1690/3239]	Time 0.241 (0.635)	Data Time 0.001 (0.026)	Loss 2.3121 (2.4250)	Entropy 0.98477 (0.98645)	Top-1 acc 65.234 (65.775)	Top-5 acc 89.062 (85.052)	lr 0.00848
Train [73][1700/3239]	Time 0.230 (0.634)	Data Time 0.001 (0.026)	Loss 2.3495 (2.4249)	Entropy 0.98466 (0.98644)	Top-1 acc 68.750 (65.783)	Top-5 acc 87.500 (85.054)	lr 0.00847
Train [73][1710/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.026)	Loss 2.3694 (2.4247)	Entropy 0.98463 (0.98643)	Top-1 acc 62.891 (65.789)	Top-5 acc 87.500 (85.059)	lr 0.00847
Train [73][1720/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.026)	Loss 2.3270 (2.4249)	Entropy 0.98455 (0.98642)	Top-1 acc 66.406 (65.777)	Top-5 acc 87.500 (85.055)	lr 0.00847
Train [73][1730/3239]	Time 0.324 (0.631)	Data Time 0.001 (0.025)	Loss 2.4446 (2.4246)	Entropy 0.98451 (0.98641)	Top-1 acc 64.062 (65.781)	Top-5 acc 82.422 (85.058)	lr 0.00847
Train [73][1740/3239]	Time 0.212 (0.630)	Data Time 0.001 (0.025)	Loss 2.5069 (2.4245)	Entropy 0.98448 (0.98640)	Top-1 acc 64.062 (65.784)	Top-5 acc 84.375 (85.059)	lr 0.00847
Train [73][1750/3239]	Time 0.220 (0.629)	Data Time 0.001 (0.025)	Loss 2.3609 (2.4245)	Entropy 0.98445 (0.98639)	Top-1 acc 68.359 (65.785)	Top-5 acc 88.281 (85.059)	lr 0.00847
Train [73][1760/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.025)	Loss 2.5278 (2.4248)	Entropy 0.98439 (0.98637)	Top-1 acc 66.016 (65.778)	Top-5 acc 82.812 (85.053)	lr 0.00847
Train [73][1770/3239]	Time 2.567 (0.628)	Data Time 0.001 (0.025)	Loss 2.4719 (2.4251)	Entropy 0.98439 (0.98636)	Top-1 acc 67.578 (65.773)	Top-5 acc 82.422 (85.048)	lr 0.00847
Train [73][1780/3239]	Time 0.240 (0.626)	Data Time 0.001 (0.025)	Loss 2.5635 (2.4254)	Entropy 0.98439 (0.98635)	Top-1 acc 60.938 (65.764)	Top-5 acc 81.641 (85.041)	lr 0.00847
Train [73][1790/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.025)	Loss 2.5142 (2.4255)	Entropy 0.98439 (0.98634)	Top-1 acc 61.719 (65.760)	Top-5 acc 85.547 (85.040)	lr 0.00847
Train [73][1800/3239]	Time 0.240 (0.624)	Data Time 0.001 (0.024)	Loss 2.4733 (2.4256)	Entropy 0.98437 (0.98633)	Top-1 acc 62.500 (65.754)	Top-5 acc 85.547 (85.038)	lr 0.00846
Train [73][1810/3239]	Time 0.253 (0.623)	Data Time 0.001 (0.024)	Loss 2.6449 (2.4259)	Entropy 0.98430 (0.98632)	Top-1 acc 62.891 (65.751)	Top-5 acc 78.906 (85.031)	lr 0.00846
Train [73][1820/3239]	Time 0.312 (0.622)	Data Time 0.001 (0.024)	Loss 2.6159 (2.4259)	Entropy 0.98431 (0.98631)	Top-1 acc 63.672 (65.754)	Top-5 acc 80.078 (85.030)	lr 0.00846
Train [73][1830/3239]	Time 0.205 (0.621)	Data Time 0.001 (0.024)	Loss 2.3607 (2.4261)	Entropy 0.98424 (0.98630)	Top-1 acc 68.359 (65.747)	Top-5 acc 88.281 (85.029)	lr 0.00846
Train [73][1840/3239]	Time 0.260 (0.620)	Data Time 0.001 (0.024)	Loss 2.4422 (2.4261)	Entropy 0.98417 (0.98629)	Top-1 acc 63.672 (65.743)	Top-5 acc 83.984 (85.027)	lr 0.00846
Train [73][1850/3239]	Time 0.243 (0.620)	Data Time 0.001 (0.024)	Loss 2.3397 (2.4262)	Entropy 0.98414 (0.98627)	Top-1 acc 68.359 (65.743)	Top-5 acc 86.328 (85.024)	lr 0.00846
Train [73][1860/3239]	Time 0.259 (0.619)	Data Time 0.002 (0.024)	Loss 2.3466 (2.4263)	Entropy 0.98411 (0.98626)	Top-1 acc 66.797 (65.743)	Top-5 acc 87.500 (85.024)	lr 0.00846
Train [73][1870/3239]	Time 0.223 (0.618)	Data Time 0.002 (0.024)	Loss 2.6590 (2.4265)	Entropy 0.98408 (0.98625)	Top-1 acc 60.156 (65.735)	Top-5 acc 78.906 (85.019)	lr 0.00846
Train [73][1880/3239]	Time 2.517 (0.617)	Data Time 0.001 (0.024)	Loss 2.3931 (2.4263)	Entropy 0.98408 (0.98624)	Top-1 acc 67.188 (65.743)	Top-5 acc 85.547 (85.019)	lr 0.00846
Train [73][1890/3239]	Time 0.236 (0.615)	Data Time 0.001 (0.023)	Loss 2.4964 (2.4267)	Entropy 0.98407 (0.98623)	Top-1 acc 63.281 (65.730)	Top-5 acc 87.109 (85.011)	lr 0.00846
Train [73][1900/3239]	Time 0.218 (0.615)	Data Time 0.001 (0.023)	Loss 2.4870 (2.4268)	Entropy 0.98406 (0.98622)	Top-1 acc 60.938 (65.723)	Top-5 acc 83.984 (85.007)	lr 0.00846
Train [73][1910/3239]	Time 0.330 (0.614)	Data Time 0.001 (0.023)	Loss 2.4952 (2.4268)	Entropy 0.98402 (0.98621)	Top-1 acc 66.406 (65.727)	Top-5 acc 82.812 (85.007)	lr 0.00845
Train [73][1920/3239]	Time 0.222 (0.613)	Data Time 0.001 (0.023)	Loss 2.5156 (2.4270)	Entropy 0.98399 (0.98619)	Top-1 acc 61.719 (65.720)	Top-5 acc 83.984 (85.003)	lr 0.00845
Train [73][1930/3239]	Time 0.224 (0.612)	Data Time 0.001 (0.023)	Loss 2.4842 (2.4271)	Entropy 0.98398 (0.98618)	Top-1 acc 64.844 (65.718)	Top-5 acc 85.156 (85.001)	lr 0.00845
Train [73][1940/3239]	Time 0.218 (0.612)	Data Time 0.001 (0.023)	Loss 2.4576 (2.4271)	Entropy 0.98401 (0.98617)	Top-1 acc 65.234 (65.719)	Top-5 acc 86.328 (85.005)	lr 0.00845
Train [73][1950/3239]	Time 0.327 (0.611)	Data Time 0.001 (0.023)	Loss 2.4776 (2.4271)	Entropy 0.98397 (0.98616)	Top-1 acc 63.281 (65.719)	Top-5 acc 85.156 (85.002)	lr 0.00845
Train [73][1960/3239]	Time 0.223 (0.610)	Data Time 0.001 (0.023)	Loss 2.5606 (2.4274)	Entropy 0.98399 (0.98615)	Top-1 acc 62.891 (65.707)	Top-5 acc 83.594 (84.997)	lr 0.00845
Train [73][1970/3239]	Time 0.224 (0.610)	Data Time 0.001 (0.023)	Loss 2.3618 (2.4277)	Entropy 0.98383 (0.98614)	Top-1 acc 64.062 (65.694)	Top-5 acc 85.156 (84.990)	lr 0.00845
Train [73][1980/3239]	Time 0.276 (0.609)	Data Time 0.002 (0.022)	Loss 2.4049 (2.4278)	Entropy 0.98380 (0.98613)	Top-1 acc 66.406 (65.696)	Top-5 acc 84.375 (84.988)	lr 0.00845
Train [73][1990/3239]	Time 2.546 (0.608)	Data Time 0.002 (0.022)	Loss 2.3774 (2.4277)	Entropy 0.98380 (0.98611)	Top-1 acc 64.062 (65.691)	Top-5 acc 86.719 (84.992)	lr 0.00845
Train [73][2000/3239]	Time 0.268 (0.607)	Data Time 0.001 (0.022)	Loss 2.5066 (2.4276)	Entropy 0.98378 (0.98610)	Top-1 acc 61.328 (65.689)	Top-5 acc 82.031 (84.995)	lr 0.00845
Train [73][2010/3239]	Time 0.211 (0.606)	Data Time 0.001 (0.022)	Loss 2.3391 (2.4278)	Entropy 0.98370 (0.98609)	Top-1 acc 66.406 (65.686)	Top-5 acc 87.109 (84.999)	lr 0.00844
Train [73][2020/3239]	Time 0.246 (0.605)	Data Time 0.001 (0.022)	Loss 2.3797 (2.4278)	Entropy 0.98370 (0.98608)	Top-1 acc 67.188 (65.685)	Top-5 acc 85.547 (84.997)	lr 0.00844
Train [73][2030/3239]	Time 0.263 (0.630)	Data Time 0.002 (0.022)	Loss 2.3190 (2.4277)	Entropy 0.98364 (0.98607)	Top-1 acc 67.969 (65.690)	Top-5 acc 87.109 (84.998)	lr 0.00844
Train [73][2040/3239]	Time 0.326 (0.629)	Data Time 0.002 (0.022)	Loss 2.3723 (2.4278)	Entropy 0.98359 (0.98605)	Top-1 acc 64.844 (65.688)	Top-5 acc 87.109 (84.996)	lr 0.00844
Train [73][2050/3239]	Time 0.226 (0.629)	Data Time 0.002 (0.022)	Loss 2.5181 (2.4281)	Entropy 0.98351 (0.98604)	Top-1 acc 58.594 (65.683)	Top-5 acc 83.594 (84.989)	lr 0.00844
Train [73][2060/3239]	Time 0.238 (0.628)	Data Time 0.002 (0.022)	Loss 2.3582 (2.4281)	Entropy 0.98347 (0.98603)	Top-1 acc 66.797 (65.685)	Top-5 acc 87.109 (84.990)	lr 0.00844
Train [73][2070/3239]	Time 0.216 (0.627)	Data Time 0.002 (0.022)	Loss 2.5018 (2.4281)	Entropy 0.98345 (0.98602)	Top-1 acc 64.453 (65.685)	Top-5 acc 83.203 (84.989)	lr 0.00844
Train [73][2080/3239]	Time 0.241 (0.627)	Data Time 0.001 (0.021)	Loss 2.3828 (2.4283)	Entropy 0.98337 (0.98601)	Top-1 acc 70.703 (65.680)	Top-5 acc 84.375 (84.985)	lr 0.00844
Train [73][2090/3239]	Time 0.226 (0.626)	Data Time 0.001 (0.021)	Loss 2.3318 (2.4283)	Entropy 0.98341 (0.98599)	Top-1 acc 64.844 (65.680)	Top-5 acc 87.109 (84.984)	lr 0.00844
Train [73][2100/3239]	Time 2.518 (0.625)	Data Time 0.001 (0.021)	Loss 2.2756 (2.4284)	Entropy 0.98341 (0.98598)	Top-1 acc 69.922 (65.674)	Top-5 acc 87.500 (84.980)	lr 0.00844
Train [73][2110/3239]	Time 0.235 (0.623)	Data Time 0.002 (0.021)	Loss 2.3086 (2.4283)	Entropy 0.98340 (0.98597)	Top-1 acc 67.969 (65.678)	Top-5 acc 83.984 (84.978)	lr 0.00844
Train [73][2120/3239]	Time 0.236 (0.622)	Data Time 0.001 (0.021)	Loss 2.3426 (2.4284)	Entropy 0.98332 (0.98596)	Top-1 acc 68.750 (65.674)	Top-5 acc 87.109 (84.977)	lr 0.00843
Train [73][2130/3239]	Time 0.310 (0.622)	Data Time 0.001 (0.021)	Loss 2.4608 (2.4282)	Entropy 0.98326 (0.98594)	Top-1 acc 65.234 (65.677)	Top-5 acc 83.984 (84.981)	lr 0.00843
Train [73][2140/3239]	Time 0.228 (0.621)	Data Time 0.001 (0.021)	Loss 2.3114 (2.4282)	Entropy 0.98325 (0.98593)	Top-1 acc 69.922 (65.675)	Top-5 acc 84.766 (84.980)	lr 0.00843
Train [73][2150/3239]	Time 0.252 (0.620)	Data Time 0.001 (0.021)	Loss 2.4617 (2.4281)	Entropy 0.98321 (0.98592)	Top-1 acc 62.891 (65.676)	Top-5 acc 84.375 (84.978)	lr 0.00843
Train [73][2160/3239]	Time 0.204 (0.620)	Data Time 0.001 (0.021)	Loss 2.4673 (2.4279)	Entropy 0.98313 (0.98591)	Top-1 acc 62.109 (65.679)	Top-5 acc 85.156 (84.976)	lr 0.00843
Train [73][2170/3239]	Time 0.235 (0.619)	Data Time 0.001 (0.021)	Loss 2.4944 (2.4280)	Entropy 0.98310 (0.98589)	Top-1 acc 67.188 (65.678)	Top-5 acc 83.203 (84.977)	lr 0.00843
Train [73][2180/3239]	Time 0.222 (0.618)	Data Time 0.001 (0.021)	Loss 2.1919 (2.4278)	Entropy 0.98298 (0.98588)	Top-1 acc 70.312 (65.681)	Top-5 acc 91.406 (84.984)	lr 0.00843
Train [73][2190/3239]	Time 0.241 (0.618)	Data Time 0.002 (0.020)	Loss 2.4316 (2.4277)	Entropy 0.98296 (0.98587)	Top-1 acc 66.797 (65.684)	Top-5 acc 85.156 (84.987)	lr 0.00843
Train [73][2200/3239]	Time 0.214 (0.617)	Data Time 0.001 (0.020)	Loss 2.6101 (2.4277)	Entropy 0.98294 (0.98585)	Top-1 acc 60.938 (65.684)	Top-5 acc 80.469 (84.985)	lr 0.00843
Train [73][2210/3239]	Time 2.527 (0.616)	Data Time 0.001 (0.020)	Loss 2.6259 (2.4277)	Entropy 0.98294 (0.98584)	Top-1 acc 63.672 (65.689)	Top-5 acc 80.078 (84.985)	lr 0.00843
Train [73][2220/3239]	Time 0.234 (0.614)	Data Time 0.001 (0.020)	Loss 2.2334 (2.4276)	Entropy 0.98293 (0.98583)	Top-1 acc 73.047 (65.694)	Top-5 acc 90.625 (84.986)	lr 0.00842
Train [73][2230/3239]	Time 0.237 (0.614)	Data Time 0.001 (0.020)	Loss 2.5679 (2.4278)	Entropy 0.98291 (0.98581)	Top-1 acc 62.891 (65.689)	Top-5 acc 82.422 (84.983)	lr 0.00842
Train [73][2240/3239]	Time 0.278 (0.613)	Data Time 0.002 (0.020)	Loss 2.5071 (2.4280)	Entropy 0.98287 (0.98580)	Top-1 acc 64.453 (65.687)	Top-5 acc 83.984 (84.978)	lr 0.00842
Train [73][2250/3239]	Time 0.270 (0.613)	Data Time 0.002 (0.020)	Loss 2.4690 (2.4283)	Entropy 0.98288 (0.98579)	Top-1 acc 63.281 (65.682)	Top-5 acc 84.766 (84.974)	lr 0.00842
Train [73][2260/3239]	Time 0.435 (0.613)	Data Time 0.002 (0.020)	Loss 2.5227 (2.4284)	Entropy 0.98290 (0.98578)	Top-1 acc 63.281 (65.679)	Top-5 acc 82.812 (84.975)	lr 0.00842
Train [73][2270/3239]	Time 0.241 (0.613)	Data Time 0.002 (0.020)	Loss 2.4231 (2.4283)	Entropy 0.98287 (0.98576)	Top-1 acc 64.062 (65.678)	Top-5 acc 85.938 (84.980)	lr 0.00842
Train [73][2280/3239]	Time 0.239 (0.612)	Data Time 0.001 (0.020)	Loss 2.4450 (2.4283)	Entropy 0.98289 (0.98575)	Top-1 acc 63.672 (65.676)	Top-5 acc 85.938 (84.980)	lr 0.00842
Train [73][2290/3239]	Time 0.214 (0.611)	Data Time 0.001 (0.020)	Loss 2.3280 (2.4283)	Entropy 0.98288 (0.98574)	Top-1 acc 68.359 (65.677)	Top-5 acc 86.719 (84.979)	lr 0.00842
Train [73][2300/3239]	Time 0.236 (0.611)	Data Time 0.001 (0.020)	Loss 2.3678 (2.4281)	Entropy 0.98289 (0.98572)	Top-1 acc 67.969 (65.681)	Top-5 acc 87.500 (84.982)	lr 0.00842
Train [73][2310/3239]	Time 0.228 (0.610)	Data Time 0.001 (0.019)	Loss 2.3504 (2.4282)	Entropy 0.98284 (0.98571)	Top-1 acc 64.844 (65.678)	Top-5 acc 87.109 (84.983)	lr 0.00842
Train [73][2320/3239]	Time 2.591 (0.610)	Data Time 0.002 (0.019)	Loss 2.4736 (2.4283)	Entropy 0.98284 (0.98570)	Top-1 acc 64.844 (65.677)	Top-5 acc 83.984 (84.981)	lr 0.00842
Train [73][2330/3239]	Time 0.281 (0.608)	Data Time 0.002 (0.019)	Loss 2.6027 (2.4285)	Entropy 0.98281 (0.98569)	Top-1 acc 63.672 (65.673)	Top-5 acc 81.250 (84.976)	lr 0.00841
Train [73][2340/3239]	Time 0.273 (0.608)	Data Time 0.002 (0.019)	Loss 2.3734 (2.4288)	Entropy 0.98274 (0.98568)	Top-1 acc 66.797 (65.668)	Top-5 acc 87.891 (84.972)	lr 0.00841
Train [73][2350/3239]	Time 0.326 (0.607)	Data Time 0.001 (0.019)	Loss 2.4126 (2.4287)	Entropy 0.98275 (0.98566)	Top-1 acc 67.188 (65.669)	Top-5 acc 82.422 (84.972)	lr 0.00841
Train [73][2360/3239]	Time 0.214 (0.607)	Data Time 0.001 (0.019)	Loss 2.5811 (2.4289)	Entropy 0.98274 (0.98565)	Top-1 acc 62.891 (65.665)	Top-5 acc 82.812 (84.969)	lr 0.00841
Train [73][2370/3239]	Time 0.226 (0.606)	Data Time 0.001 (0.019)	Loss 2.3554 (2.4287)	Entropy 0.98274 (0.98564)	Top-1 acc 67.969 (65.671)	Top-5 acc 85.938 (84.971)	lr 0.00841
Train [73][2380/3239]	Time 0.228 (0.606)	Data Time 0.001 (0.019)	Loss 2.4849 (2.4288)	Entropy 0.98271 (0.98563)	Top-1 acc 63.672 (65.669)	Top-5 acc 81.641 (84.967)	lr 0.00841
Train [73][2390/3239]	Time 0.443 (0.626)	Data Time 0.002 (0.019)	Loss 2.3484 (2.4288)	Entropy 0.98271 (0.98561)	Top-1 acc 68.359 (65.670)	Top-5 acc 87.500 (84.967)	lr 0.00841
Train [73][2400/3239]	Time 0.215 (0.626)	Data Time 0.002 (0.019)	Loss 2.7229 (2.4288)	Entropy 0.98265 (0.98560)	Top-1 acc 55.078 (65.666)	Top-5 acc 79.688 (84.966)	lr 0.00841
Train [73][2410/3239]	Time 0.222 (0.626)	Data Time 0.001 (0.019)	Loss 2.4241 (2.4289)	Entropy 0.98264 (0.98559)	Top-1 acc 66.797 (65.667)	Top-5 acc 84.375 (84.963)	lr 0.00841
Train [73][2420/3239]	Time 0.252 (0.625)	Data Time 0.001 (0.019)	Loss 2.3898 (2.4289)	Entropy 0.98264 (0.98558)	Top-1 acc 63.672 (65.666)	Top-5 acc 85.547 (84.963)	lr 0.00841
Train [73][2430/3239]	Time 2.519 (0.624)	Data Time 0.001 (0.019)	Loss 2.3686 (2.4290)	Entropy 0.98264 (0.98557)	Top-1 acc 63.281 (65.661)	Top-5 acc 85.938 (84.963)	lr 0.00840
Train [73][2440/3239]	Time 0.342 (0.623)	Data Time 0.001 (0.019)	Loss 2.5930 (2.4292)	Entropy 0.98260 (0.98555)	Top-1 acc 58.594 (65.656)	Top-5 acc 82.812 (84.957)	lr 0.00840
Train [73][2450/3239]	Time 0.214 (0.622)	Data Time 0.002 (0.018)	Loss 2.6492 (2.4296)	Entropy 0.98256 (0.98554)	Top-1 acc 59.375 (65.648)	Top-5 acc 82.031 (84.950)	lr 0.00840
Train [73][2460/3239]	Time 0.214 (0.622)	Data Time 0.002 (0.018)	Loss 2.5874 (2.4296)	Entropy 0.98257 (0.98553)	Top-1 acc 60.938 (65.647)	Top-5 acc 84.375 (84.952)	lr 0.00840
Train [73][2470/3239]	Time 0.263 (0.621)	Data Time 0.002 (0.018)	Loss 2.4905 (2.4296)	Entropy 0.98254 (0.98552)	Top-1 acc 62.891 (65.646)	Top-5 acc 82.031 (84.949)	lr 0.00840
Train [73][2480/3239]	Time 0.328 (0.621)	Data Time 0.001 (0.018)	Loss 2.5449 (2.4295)	Entropy 0.98267 (0.98550)	Top-1 acc 63.281 (65.646)	Top-5 acc 83.594 (84.953)	lr 0.00840
Train [73][2490/3239]	Time 0.227 (0.620)	Data Time 0.001 (0.018)	Loss 2.3219 (2.4295)	Entropy 0.98267 (0.98549)	Top-1 acc 67.969 (65.646)	Top-5 acc 85.938 (84.953)	lr 0.00840
Train [73][2500/3239]	Time 0.225 (0.619)	Data Time 0.002 (0.018)	Loss 2.4792 (2.4296)	Entropy 0.98269 (0.98548)	Top-1 acc 63.281 (65.645)	Top-5 acc 85.156 (84.952)	lr 0.00840
Train [73][2510/3239]	Time 0.242 (0.619)	Data Time 0.001 (0.018)	Loss 2.4028 (2.4296)	Entropy 0.98263 (0.98547)	Top-1 acc 66.016 (65.642)	Top-5 acc 84.375 (84.952)	lr 0.00840
Train [73][2520/3239]	Time 0.224 (0.618)	Data Time 0.001 (0.018)	Loss 2.4811 (2.4295)	Entropy 0.98242 (0.98546)	Top-1 acc 64.844 (65.648)	Top-5 acc 84.766 (84.952)	lr 0.00840
Train [73][2530/3239]	Time 0.338 (0.618)	Data Time 0.001 (0.018)	Loss 2.4533 (2.4295)	Entropy 0.98237 (0.98545)	Top-1 acc 63.672 (65.650)	Top-5 acc 85.547 (84.951)	lr 0.00840
Train [73][2540/3239]	Time 2.458 (0.617)	Data Time 0.001 (0.018)	Loss 2.4814 (2.4297)	Entropy 0.98237 (0.98544)	Top-1 acc 61.328 (65.645)	Top-5 acc 84.375 (84.949)	lr 0.00839
Train [73][2550/3239]	Time 0.280 (0.616)	Data Time 0.001 (0.018)	Loss 2.4170 (2.4298)	Entropy 0.98234 (0.98542)	Top-1 acc 66.016 (65.644)	Top-5 acc 84.375 (84.946)	lr 0.00839
Train [73][2560/3239]	Time 0.238 (0.615)	Data Time 0.001 (0.018)	Loss 2.4606 (2.4297)	Entropy 0.98228 (0.98541)	Top-1 acc 66.016 (65.642)	Top-5 acc 83.203 (84.949)	lr 0.00839
Train [73][2570/3239]	Time 0.231 (0.615)	Data Time 0.001 (0.018)	Loss 2.3996 (2.4297)	Entropy 0.98226 (0.98540)	Top-1 acc 66.797 (65.641)	Top-5 acc 85.156 (84.948)	lr 0.00839
Train [73][2580/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.018)	Loss 2.4984 (2.4298)	Entropy 0.98221 (0.98539)	Top-1 acc 64.062 (65.644)	Top-5 acc 85.547 (84.945)	lr 0.00839
Train [73][2590/3239]	Time 0.255 (0.614)	Data Time 0.002 (0.018)	Loss 2.3348 (2.4298)	Entropy 0.98222 (0.98537)	Top-1 acc 69.922 (65.645)	Top-5 acc 85.938 (84.943)	lr 0.00839
Train [73][2600/3239]	Time 0.236 (0.613)	Data Time 0.001 (0.017)	Loss 2.3753 (2.4297)	Entropy 0.98223 (0.98536)	Top-1 acc 69.531 (65.646)	Top-5 acc 84.766 (84.949)	lr 0.00839
Train [73][2610/3239]	Time 0.223 (0.613)	Data Time 0.001 (0.017)	Loss 2.5554 (2.4298)	Entropy 0.98217 (0.98535)	Top-1 acc 61.328 (65.641)	Top-5 acc 80.859 (84.945)	lr 0.00839
Train [73][2620/3239]	Time 0.348 (0.612)	Data Time 0.001 (0.017)	Loss 2.3629 (2.4300)	Entropy 0.98237 (0.98534)	Top-1 acc 66.406 (65.632)	Top-5 acc 85.938 (84.942)	lr 0.00839
Train [73][2630/3239]	Time 0.223 (0.612)	Data Time 0.001 (0.017)	Loss 2.4297 (2.4299)	Entropy 0.98235 (0.98533)	Top-1 acc 65.625 (65.636)	Top-5 acc 84.375 (84.944)	lr 0.00839
Train [73][2640/3239]	Time 0.226 (0.611)	Data Time 0.001 (0.017)	Loss 2.3797 (2.4298)	Entropy 0.98221 (0.98532)	Top-1 acc 66.406 (65.638)	Top-5 acc 85.156 (84.948)	lr 0.00838
Train [73][2650/3239]	Time 0.231 (0.611)	Data Time 0.001 (0.017)	Loss 2.4211 (2.4301)	Entropy 0.98215 (0.98530)	Top-1 acc 64.062 (65.631)	Top-5 acc 87.500 (84.945)	lr 0.00838
Train [73][2660/3239]	Time 0.266 (0.610)	Data Time 0.001 (0.017)	Loss 2.4526 (2.4300)	Entropy 0.98211 (0.98529)	Top-1 acc 66.406 (65.632)	Top-5 acc 83.984 (84.946)	lr 0.00838
Train [73][2670/3239]	Time 0.245 (0.610)	Data Time 0.001 (0.017)	Loss 2.3780 (2.4301)	Entropy 0.98211 (0.98528)	Top-1 acc 64.453 (65.630)	Top-5 acc 86.328 (84.943)	lr 0.00838
Train [73][2680/3239]	Time 0.220 (0.609)	Data Time 0.001 (0.017)	Loss 2.3273 (2.4302)	Entropy 0.98205 (0.98527)	Top-1 acc 67.578 (65.632)	Top-5 acc 86.719 (84.942)	lr 0.00838
Train [73][2690/3239]	Time 0.224 (0.609)	Data Time 0.001 (0.017)	Loss 2.4008 (2.4302)	Entropy 0.98204 (0.98526)	Top-1 acc 62.500 (65.633)	Top-5 acc 85.156 (84.941)	lr 0.00838
Train [73][2700/3239]	Time 0.229 (0.608)	Data Time 0.002 (0.017)	Loss 2.4350 (2.4303)	Entropy 0.98205 (0.98524)	Top-1 acc 64.453 (65.630)	Top-5 acc 83.984 (84.940)	lr 0.00838
Train [73][2710/3239]	Time 0.238 (0.608)	Data Time 0.001 (0.017)	Loss 2.5180 (2.4302)	Entropy 0.98201 (0.98523)	Top-1 acc 63.672 (65.629)	Top-5 acc 85.547 (84.941)	lr 0.00838
Train [73][2720/3239]	Time 0.257 (0.607)	Data Time 0.001 (0.017)	Loss 2.5720 (2.4302)	Entropy 0.98197 (0.98522)	Top-1 acc 62.500 (65.632)	Top-5 acc 82.031 (84.941)	lr 0.00838
Train [73][2730/3239]	Time 0.255 (0.607)	Data Time 0.002 (0.017)	Loss 2.3979 (2.4304)	Entropy 0.98180 (0.98521)	Top-1 acc 64.062 (65.625)	Top-5 acc 88.281 (84.940)	lr 0.00838
Train [73][2740/3239]	Time 0.260 (0.626)	Data Time 0.004 (0.017)	Loss 2.3800 (2.4303)	Entropy 0.98174 (0.98520)	Top-1 acc 67.188 (65.630)	Top-5 acc 86.719 (84.942)	lr 0.00838
Train [73][2750/3239]	Time 0.326 (0.625)	Data Time 0.002 (0.017)	Loss 2.3795 (2.4302)	Entropy 0.98170 (0.98518)	Top-1 acc 61.719 (65.631)	Top-5 acc 87.500 (84.945)	lr 0.00837
Train [73][2760/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.017)	Loss 2.4959 (2.4302)	Entropy 0.98168 (0.98517)	Top-1 acc 64.062 (65.627)	Top-5 acc 80.859 (84.940)	lr 0.00837
Train [73][2770/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.017)	Loss 2.4631 (2.4303)	Entropy 0.98165 (0.98516)	Top-1 acc 64.062 (65.623)	Top-5 acc 83.984 (84.937)	lr 0.00837
Train [73][2780/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.016)	Loss 2.5366 (2.4304)	Entropy 0.98158 (0.98515)	Top-1 acc 63.672 (65.626)	Top-5 acc 83.984 (84.936)	lr 0.00837
Train [73][2790/3239]	Time 0.264 (0.623)	Data Time 0.001 (0.016)	Loss 2.4374 (2.4303)	Entropy 0.98152 (0.98513)	Top-1 acc 64.062 (65.629)	Top-5 acc 83.203 (84.936)	lr 0.00837
Train [73][2800/3239]	Time 0.217 (0.623)	Data Time 0.001 (0.016)	Loss 2.3815 (2.4303)	Entropy 0.98149 (0.98512)	Top-1 acc 70.312 (65.631)	Top-5 acc 85.547 (84.938)	lr 0.00837
Train [73][2810/3239]	Time 0.259 (0.622)	Data Time 0.001 (0.016)	Loss 2.4352 (2.4304)	Entropy 0.98148 (0.98511)	Top-1 acc 64.844 (65.627)	Top-5 acc 84.766 (84.936)	lr 0.00837
Train [73][2820/3239]	Time 0.246 (0.622)	Data Time 0.001 (0.016)	Loss 2.5865 (2.4305)	Entropy 0.98141 (0.98509)	Top-1 acc 62.500 (65.625)	Top-5 acc 82.422 (84.934)	lr 0.00837
Train [73][2830/3239]	Time 0.282 (0.621)	Data Time 0.001 (0.016)	Loss 2.4429 (2.4304)	Entropy 0.98137 (0.98508)	Top-1 acc 65.234 (65.624)	Top-5 acc 84.375 (84.935)	lr 0.00837
Train [73][2840/3239]	Time 0.375 (0.621)	Data Time 0.001 (0.016)	Loss 2.4772 (2.4305)	Entropy 0.98130 (0.98507)	Top-1 acc 60.938 (65.618)	Top-5 acc 85.938 (84.934)	lr 0.00837
Train [73][2850/3239]	Time 0.247 (0.620)	Data Time 0.001 (0.016)	Loss 2.4908 (2.4309)	Entropy 0.98125 (0.98505)	Top-1 acc 71.484 (65.612)	Top-5 acc 84.375 (84.928)	lr 0.00836
Train [73][2860/3239]	Time 0.261 (0.620)	Data Time 0.001 (0.016)	Loss 2.4259 (2.4309)	Entropy 0.98121 (0.98504)	Top-1 acc 66.797 (65.607)	Top-5 acc 86.328 (84.928)	lr 0.00836
Train [73][2870/3239]	Time 0.273 (0.619)	Data Time 0.001 (0.016)	Loss 2.2541 (2.4308)	Entropy 0.98115 (0.98503)	Top-1 acc 71.094 (65.613)	Top-5 acc 87.891 (84.928)	lr 0.00836
Train [73][2880/3239]	Time 0.334 (0.619)	Data Time 0.001 (0.016)	Loss 2.5751 (2.4309)	Entropy 0.98116 (0.98501)	Top-1 acc 60.938 (65.609)	Top-5 acc 80.859 (84.927)	lr 0.00836
Train [73][2890/3239]	Time 0.241 (0.618)	Data Time 0.001 (0.016)	Loss 2.5950 (2.4310)	Entropy 0.98118 (0.98500)	Top-1 acc 62.109 (65.605)	Top-5 acc 80.469 (84.924)	lr 0.00836
Train [73][2900/3239]	Time 0.227 (0.618)	Data Time 0.001 (0.016)	Loss 2.5031 (2.4312)	Entropy 0.98115 (0.98499)	Top-1 acc 62.891 (65.600)	Top-5 acc 82.422 (84.920)	lr 0.00836
Train [73][2910/3239]	Time 0.238 (0.617)	Data Time 0.001 (0.016)	Loss 2.4208 (2.4312)	Entropy 0.98116 (0.98497)	Top-1 acc 64.844 (65.599)	Top-5 acc 84.375 (84.919)	lr 0.00836
Train [73][2920/3239]	Time 0.252 (0.617)	Data Time 0.001 (0.016)	Loss 2.3949 (2.4314)	Entropy 0.98108 (0.98496)	Top-1 acc 66.016 (65.594)	Top-5 acc 86.328 (84.914)	lr 0.00836
Train [73][2930/3239]	Time 0.221 (0.616)	Data Time 0.001 (0.016)	Loss 2.4784 (2.4315)	Entropy 0.98105 (0.98495)	Top-1 acc 64.062 (65.592)	Top-5 acc 83.984 (84.912)	lr 0.00836
Train [73][2940/3239]	Time 0.286 (0.616)	Data Time 0.001 (0.016)	Loss 2.4961 (2.4318)	Entropy 0.98102 (0.98494)	Top-1 acc 63.281 (65.586)	Top-5 acc 81.641 (84.906)	lr 0.00836
Train [73][2950/3239]	Time 0.229 (0.615)	Data Time 0.001 (0.016)	Loss 2.5096 (2.4319)	Entropy 0.98105 (0.98492)	Top-1 acc 67.188 (65.585)	Top-5 acc 83.594 (84.906)	lr 0.00836
Train [73][2960/3239]	Time 0.243 (0.615)	Data Time 0.001 (0.016)	Loss 2.3828 (2.4318)	Entropy 0.98101 (0.98491)	Top-1 acc 67.969 (65.586)	Top-5 acc 83.984 (84.906)	lr 0.00835
Train [73][2970/3239]	Time 0.289 (0.615)	Data Time 0.001 (0.016)	Loss 2.4362 (2.4319)	Entropy 0.98100 (0.98490)	Top-1 acc 63.672 (65.586)	Top-5 acc 86.328 (84.905)	lr 0.00835
Train [73][2980/3239]	Time 0.217 (0.614)	Data Time 0.001 (0.015)	Loss 2.3774 (2.4319)	Entropy 0.98098 (0.98488)	Top-1 acc 66.406 (65.581)	Top-5 acc 90.625 (84.908)	lr 0.00835
Train [73][2990/3239]	Time 0.245 (0.614)	Data Time 0.001 (0.015)	Loss 2.5291 (2.4317)	Entropy 0.98095 (0.98487)	Top-1 acc 62.891 (65.583)	Top-5 acc 83.984 (84.912)	lr 0.00835
Train [73][3000/3239]	Time 0.219 (0.613)	Data Time 0.001 (0.015)	Loss 2.5324 (2.4318)	Entropy 0.98093 (0.98486)	Top-1 acc 61.719 (65.583)	Top-5 acc 83.594 (84.912)	lr 0.00835
Train [73][3010/3239]	Time 0.236 (0.613)	Data Time 0.001 (0.015)	Loss 2.5348 (2.4318)	Entropy 0.98091 (0.98484)	Top-1 acc 63.672 (65.584)	Top-5 acc 82.031 (84.912)	lr 0.00835
Train [73][3020/3239]	Time 0.364 (0.612)	Data Time 0.001 (0.015)	Loss 2.3317 (2.4317)	Entropy 0.98089 (0.98483)	Top-1 acc 68.750 (65.585)	Top-5 acc 87.891 (84.913)	lr 0.00835
Train [73][3030/3239]	Time 0.230 (0.612)	Data Time 0.001 (0.015)	Loss 2.5794 (2.4317)	Entropy 0.98073 (0.98482)	Top-1 acc 62.500 (65.583)	Top-5 acc 82.812 (84.910)	lr 0.00835
Train [73][3040/3239]	Time 0.250 (0.611)	Data Time 0.001 (0.015)	Loss 2.4035 (2.4318)	Entropy 0.98072 (0.98480)	Top-1 acc 66.016 (65.578)	Top-5 acc 85.938 (84.912)	lr 0.00835
Train [73][3050/3239]	Time 0.242 (0.611)	Data Time 0.001 (0.015)	Loss 2.3465 (2.4319)	Entropy 0.98069 (0.98479)	Top-1 acc 69.922 (65.579)	Top-5 acc 87.109 (84.911)	lr 0.00835
Train [73][3060/3239]	Time 0.327 (0.610)	Data Time 0.001 (0.015)	Loss 2.5290 (2.4320)	Entropy 0.98066 (0.98478)	Top-1 acc 62.500 (65.576)	Top-5 acc 82.812 (84.910)	lr 0.00834
Train [73][3070/3239]	Time 0.249 (0.628)	Data Time 0.004 (0.015)	Loss 2.5516 (2.4321)	Entropy 0.98062 (0.98476)	Top-1 acc 60.547 (65.575)	Top-5 acc 82.812 (84.909)	lr 0.00834
Train [73][3080/3239]	Time 0.275 (0.628)	Data Time 0.002 (0.015)	Loss 2.4092 (2.4322)	Entropy 0.98063 (0.98475)	Top-1 acc 66.016 (65.573)	Top-5 acc 87.109 (84.910)	lr 0.00834
Train [73][3090/3239]	Time 0.258 (0.627)	Data Time 0.002 (0.015)	Loss 2.3781 (2.4323)	Entropy 0.98067 (0.98474)	Top-1 acc 66.016 (65.569)	Top-5 acc 86.719 (84.911)	lr 0.00834
Train [73][3100/3239]	Time 0.326 (0.627)	Data Time 0.002 (0.015)	Loss 2.3469 (2.4323)	Entropy 0.98073 (0.98472)	Top-1 acc 64.062 (65.568)	Top-5 acc 86.719 (84.913)	lr 0.00834
Train [73][3110/3239]	Time 0.251 (0.626)	Data Time 0.002 (0.015)	Loss 2.2850 (2.4322)	Entropy 0.98074 (0.98471)	Top-1 acc 69.141 (65.571)	Top-5 acc 87.891 (84.913)	lr 0.00834
Train [73][3120/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.015)	Loss 2.3574 (2.4321)	Entropy 0.98065 (0.98470)	Top-1 acc 67.188 (65.570)	Top-5 acc 88.281 (84.916)	lr 0.00834
Train [73][3130/3239]	Time 0.253 (0.625)	Data Time 0.001 (0.015)	Loss 2.1823 (2.4321)	Entropy 0.98061 (0.98469)	Top-1 acc 70.312 (65.570)	Top-5 acc 89.062 (84.917)	lr 0.00834
Train [73][3140/3239]	Time 0.244 (0.625)	Data Time 0.003 (0.015)	Loss 2.3570 (2.4322)	Entropy 0.98054 (0.98467)	Top-1 acc 67.578 (65.567)	Top-5 acc 83.203 (84.912)	lr 0.00834
Train [73][3150/3239]	Time 0.367 (0.624)	Data Time 0.001 (0.015)	Loss 2.6105 (2.4324)	Entropy 0.98045 (0.98466)	Top-1 acc 61.328 (65.562)	Top-5 acc 81.250 (84.907)	lr 0.00834
Train [73][3160/3239]	Time 0.298 (0.624)	Data Time 0.002 (0.015)	Loss 2.4244 (2.4324)	Entropy 0.98040 (0.98465)	Top-1 acc 62.891 (65.563)	Top-5 acc 85.938 (84.907)	lr 0.00834
Train [73][3170/3239]	Time 0.235 (0.623)	Data Time 0.002 (0.015)	Loss 2.5086 (2.4323)	Entropy 0.98037 (0.98463)	Top-1 acc 63.672 (65.565)	Top-5 acc 81.250 (84.908)	lr 0.00833
Train [73][3180/3239]	Time 0.228 (0.623)	Data Time 0.000 (0.015)	Loss 2.4117 (2.4323)	Entropy 0.98033 (0.98462)	Top-1 acc 65.234 (65.566)	Top-5 acc 85.547 (84.910)	lr 0.00833
Train [73][3190/3239]	Time 0.235 (0.622)	Data Time 0.000 (0.015)	Loss 2.6322 (2.4325)	Entropy 0.98031 (0.98461)	Top-1 acc 63.672 (65.559)	Top-5 acc 80.859 (84.905)	lr 0.00833
Train [73][3200/3239]	Time 0.225 (0.622)	Data Time 0.000 (0.015)	Loss 2.3328 (2.4324)	Entropy 0.98018 (0.98459)	Top-1 acc 65.625 (65.559)	Top-5 acc 89.062 (84.906)	lr 0.00833
Train [73][3210/3239]	Time 0.222 (0.621)	Data Time 0.000 (0.014)	Loss 2.3950 (2.4326)	Entropy 0.98009 (0.98458)	Top-1 acc 67.188 (65.555)	Top-5 acc 83.594 (84.903)	lr 0.00833
Train [73][3220/3239]	Time 0.238 (0.621)	Data Time 0.000 (0.014)	Loss 2.6680 (2.4325)	Entropy 0.98007 (0.98456)	Top-1 acc 58.203 (65.552)	Top-5 acc 83.203 (84.905)	lr 0.00833
Train [73][3230/3239]	Time 0.227 (0.620)	Data Time 0.000 (0.014)	Loss 2.3456 (2.4325)	Entropy 0.98004 (0.98455)	Top-1 acc 67.578 (65.552)	Top-5 acc 84.375 (84.907)	lr 0.00833
Train [73][3239/3239]	Time 2.396 (0.620)	Data Time 0.000 (0.014)	Loss 2.5761 (2.4325)	Entropy 0.98004 (0.98454)	Top-1 acc 66.667 (65.551)	Top-5 acc 80.247 (84.905)	lr 0.00833
==========Valid [73/120]	loss 1.350	top-1 acc 69.009 (69.246)	top-5 acc 88.018	Train top-1 65.551	top-5 84.905	Entropy 0.98004	Latency-None: 0.000ms	Flops: 546.53M
Train [74][0/3239]	Time 42.733 (42.733)	Data Time 40.455 (40.455)	Loss 2.3351 (2.3351)	Entropy 0.98002 (0.98002)	Top-1 acc 66.016 (66.016)	Top-5 acc 87.109 (87.109)	lr 0.00833
Train [74][10/3239]	Time 2.573 (4.416)	Data Time 0.002 (3.685)	Loss 2.3761 (2.4145)	Entropy 0.98002 (0.98002)	Top-1 acc 69.922 (66.300)	Top-5 acc 84.766 (85.085)	lr 0.00833
Train [74][20/3239]	Time 0.239 (2.433)	Data Time 0.001 (1.931)	Loss 2.2815 (2.4110)	Entropy 0.97999 (0.98000)	Top-1 acc 69.531 (65.867)	Top-5 acc 87.500 (85.249)	lr 0.00833
Train [74][30/3239]	Time 0.240 (1.802)	Data Time 0.001 (1.308)	Loss 2.2605 (2.4106)	Entropy 0.97992 (0.97998)	Top-1 acc 71.875 (66.104)	Top-5 acc 87.109 (85.295)	lr 0.00832
Train [74][40/3239]	Time 0.354 (1.482)	Data Time 0.001 (0.990)	Loss 2.5731 (2.4273)	Entropy 0.97988 (0.97996)	Top-1 acc 61.328 (65.682)	Top-5 acc 81.250 (85.042)	lr 0.00832
Train [74][50/3239]	Time 0.231 (1.283)	Data Time 0.001 (0.796)	Loss 2.5215 (2.4191)	Entropy 0.97985 (0.97994)	Top-1 acc 62.500 (65.931)	Top-5 acc 83.984 (85.156)	lr 0.00832
Train [74][60/3239]	Time 0.218 (1.150)	Data Time 0.001 (0.666)	Loss 2.5528 (2.4209)	Entropy 0.97985 (0.97992)	Top-1 acc 61.719 (65.766)	Top-5 acc 83.594 (85.207)	lr 0.00832
Train [74][70/3239]	Time 0.259 (1.054)	Data Time 0.002 (0.572)	Loss 2.5146 (2.4136)	Entropy 0.97979 (0.97991)	Top-1 acc 61.719 (65.972)	Top-5 acc 84.766 (85.310)	lr 0.00832
Train [74][80/3239]	Time 0.234 (0.985)	Data Time 0.001 (0.502)	Loss 2.3648 (2.4292)	Entropy 0.97978 (0.97989)	Top-1 acc 67.969 (65.856)	Top-5 acc 83.594 (85.045)	lr 0.00832
Train [74][90/3239]	Time 0.239 (0.931)	Data Time 0.001 (0.447)	Loss 2.4062 (2.4244)	Entropy 0.97977 (0.97988)	Top-1 acc 67.969 (65.990)	Top-5 acc 84.766 (85.096)	lr 0.00832
Train [74][100/3239]	Time 0.229 (0.888)	Data Time 0.001 (0.403)	Loss 2.3931 (2.4240)	Entropy 0.97968 (0.97987)	Top-1 acc 67.578 (66.008)	Top-5 acc 85.547 (85.079)	lr 0.00832
Train [74][110/3239]	Time 0.245 (0.853)	Data Time 0.001 (0.367)	Loss 2.3888 (2.4201)	Entropy 0.97960 (0.97985)	Top-1 acc 67.578 (66.086)	Top-5 acc 86.328 (85.184)	lr 0.00832
Train [74][120/3239]	Time 2.567 (0.822)	Data Time 0.001 (0.336)	Loss 2.4581 (2.4195)	Entropy 0.97960 (0.97983)	Top-1 acc 65.234 (66.119)	Top-5 acc 83.594 (85.137)	lr 0.00832
Train [74][130/3239]	Time 0.266 (0.778)	Data Time 0.001 (0.311)	Loss 2.4256 (2.4187)	Entropy 0.97960 (0.97981)	Top-1 acc 67.188 (66.129)	Top-5 acc 85.156 (85.123)	lr 0.00832
Train [74][140/3239]	Time 0.230 (0.757)	Data Time 0.001 (0.289)	Loss 2.4230 (2.4164)	Entropy 0.97956 (0.97979)	Top-1 acc 66.016 (66.259)	Top-5 acc 87.500 (85.176)	lr 0.00831
Train [74][150/3239]	Time 0.227 (0.738)	Data Time 0.001 (0.270)	Loss 2.4010 (2.4170)	Entropy 0.97957 (0.97978)	Top-1 acc 65.625 (66.292)	Top-5 acc 85.938 (85.148)	lr 0.00831
Train [74][160/3239]	Time 0.239 (0.722)	Data Time 0.001 (0.253)	Loss 2.4577 (2.4163)	Entropy 0.97955 (0.97977)	Top-1 acc 68.359 (66.273)	Top-5 acc 83.203 (85.156)	lr 0.00831
Train [74][170/3239]	Time 0.241 (0.709)	Data Time 0.001 (0.238)	Loss 2.3328 (2.4142)	Entropy 0.97949 (0.97975)	Top-1 acc 66.406 (66.331)	Top-5 acc 85.938 (85.188)	lr 0.00831
Train [74][180/3239]	Time 0.254 (1.016)	Data Time 0.002 (0.225)	Loss 2.3430 (2.4134)	Entropy 0.97952 (0.97974)	Top-1 acc 66.406 (66.354)	Top-5 acc 88.672 (85.178)	lr 0.00831
Train [74][190/3239]	Time 0.236 (0.988)	Data Time 0.002 (0.214)	Loss 2.3063 (2.4111)	Entropy 0.97950 (0.97972)	Top-1 acc 66.016 (66.378)	Top-5 acc 88.672 (85.207)	lr 0.00831
Train [74][200/3239]	Time 0.234 (0.963)	Data Time 0.002 (0.203)	Loss 2.5512 (2.4093)	Entropy 0.97944 (0.97971)	Top-1 acc 62.500 (66.416)	Top-5 acc 80.469 (85.232)	lr 0.00831
Train [74][210/3239]	Time 0.231 (0.941)	Data Time 0.002 (0.194)	Loss 2.5016 (2.4113)	Entropy 0.97939 (0.97970)	Top-1 acc 65.234 (66.382)	Top-5 acc 85.547 (85.225)	lr 0.00831
Train [74][220/3239]	Time 0.320 (0.920)	Data Time 0.001 (0.185)	Loss 2.3863 (2.4126)	Entropy 0.97933 (0.97968)	Top-1 acc 66.016 (66.297)	Top-5 acc 83.984 (85.176)	lr 0.00831
Train [74][230/3239]	Time 2.449 (0.901)	Data Time 0.001 (0.177)	Loss 2.3409 (2.4142)	Entropy 0.97933 (0.97967)	Top-1 acc 66.797 (66.256)	Top-5 acc 86.719 (85.128)	lr 0.00831
Train [74][240/3239]	Time 0.226 (0.873)	Data Time 0.001 (0.170)	Loss 2.4616 (2.4143)	Entropy 0.97922 (0.97965)	Top-1 acc 65.625 (66.281)	Top-5 acc 85.156 (85.106)	lr 0.00830
Train [74][250/3239]	Time 0.233 (0.858)	Data Time 0.001 (0.163)	Loss 2.3970 (2.4132)	Entropy 0.97919 (0.97963)	Top-1 acc 64.062 (66.311)	Top-5 acc 87.109 (85.127)	lr 0.00830
Train [74][260/3239]	Time 0.228 (0.844)	Data Time 0.001 (0.157)	Loss 2.3530 (2.4121)	Entropy 0.97912 (0.97961)	Top-1 acc 65.234 (66.330)	Top-5 acc 87.109 (85.117)	lr 0.00830
Train [74][270/3239]	Time 0.223 (0.830)	Data Time 0.002 (0.151)	Loss 2.3352 (2.4123)	Entropy 0.97912 (0.97960)	Top-1 acc 69.141 (66.320)	Top-5 acc 86.328 (85.114)	lr 0.00830
Train [74][280/3239]	Time 0.254 (0.817)	Data Time 0.001 (0.146)	Loss 2.4061 (2.4119)	Entropy 0.97915 (0.97958)	Top-1 acc 64.062 (66.308)	Top-5 acc 86.328 (85.130)	lr 0.00830
Train [74][290/3239]	Time 0.215 (0.806)	Data Time 0.001 (0.141)	Loss 2.3104 (2.4108)	Entropy 0.97914 (0.97956)	Top-1 acc 68.750 (66.370)	Top-5 acc 87.109 (85.146)	lr 0.00830
Train [74][300/3239]	Time 0.229 (0.795)	Data Time 0.001 (0.136)	Loss 2.3160 (2.4104)	Entropy 0.97913 (0.97955)	Top-1 acc 68.750 (66.396)	Top-5 acc 89.062 (85.158)	lr 0.00830
Train [74][310/3239]	Time 0.284 (0.785)	Data Time 0.001 (0.132)	Loss 2.3981 (2.4103)	Entropy 0.97912 (0.97954)	Top-1 acc 65.625 (66.394)	Top-5 acc 85.938 (85.158)	lr 0.00830
Train [74][320/3239]	Time 0.232 (0.776)	Data Time 0.001 (0.128)	Loss 2.4662 (2.4092)	Entropy 0.97913 (0.97952)	Top-1 acc 66.016 (66.443)	Top-5 acc 85.156 (85.160)	lr 0.00830
Train [74][330/3239]	Time 0.266 (0.767)	Data Time 0.001 (0.124)	Loss 2.3907 (2.4090)	Entropy 0.97910 (0.97951)	Top-1 acc 66.406 (66.423)	Top-5 acc 84.766 (85.150)	lr 0.00830
Train [74][340/3239]	Time 2.421 (0.758)	Data Time 0.001 (0.120)	Loss 2.3695 (2.4084)	Entropy 0.97910 (0.97950)	Top-1 acc 65.625 (66.406)	Top-5 acc 86.328 (85.173)	lr 0.00830
Train [74][350/3239]	Time 0.235 (0.743)	Data Time 0.002 (0.117)	Loss 2.5000 (2.4139)	Entropy 0.97899 (0.97948)	Top-1 acc 62.500 (66.279)	Top-5 acc 81.641 (85.087)	lr 0.00829
Train [74][360/3239]	Time 0.360 (0.735)	Data Time 0.001 (0.114)	Loss 2.2810 (2.4140)	Entropy 0.97896 (0.97947)	Top-1 acc 69.922 (66.264)	Top-5 acc 86.719 (85.079)	lr 0.00829
Train [74][370/3239]	Time 0.233 (0.729)	Data Time 0.001 (0.111)	Loss 2.3354 (2.4138)	Entropy 0.97892 (0.97946)	Top-1 acc 66.016 (66.263)	Top-5 acc 87.500 (85.094)	lr 0.00829
Train [74][380/3239]	Time 0.201 (0.722)	Data Time 0.001 (0.108)	Loss 2.5608 (2.4134)	Entropy 0.97883 (0.97944)	Top-1 acc 58.594 (66.272)	Top-5 acc 84.766 (85.107)	lr 0.00829
Train [74][390/3239]	Time 0.240 (0.716)	Data Time 0.002 (0.105)	Loss 2.5596 (2.4149)	Entropy 0.97868 (0.97942)	Top-1 acc 62.500 (66.242)	Top-5 acc 83.594 (85.083)	lr 0.00829
Train [74][400/3239]	Time 0.322 (0.709)	Data Time 0.001 (0.103)	Loss 2.3886 (2.4156)	Entropy 0.97867 (0.97940)	Top-1 acc 68.750 (66.207)	Top-5 acc 85.547 (85.072)	lr 0.00829
Train [74][410/3239]	Time 0.232 (0.703)	Data Time 0.001 (0.100)	Loss 2.3535 (2.4153)	Entropy 0.97863 (0.97939)	Top-1 acc 71.094 (66.215)	Top-5 acc 85.938 (85.099)	lr 0.00829
Train [74][420/3239]	Time 0.239 (0.698)	Data Time 0.001 (0.098)	Loss 2.3685 (2.4152)	Entropy 0.97860 (0.97937)	Top-1 acc 69.531 (66.214)	Top-5 acc 85.156 (85.100)	lr 0.00829
Train [74][430/3239]	Time 0.240 (0.693)	Data Time 0.002 (0.096)	Loss 2.5554 (2.4154)	Entropy 0.97859 (0.97935)	Top-1 acc 63.281 (66.184)	Top-5 acc 82.031 (85.093)	lr 0.00829
Train [74][440/3239]	Time 0.223 (0.688)	Data Time 0.002 (0.094)	Loss 2.4272 (2.4150)	Entropy 0.97852 (0.97933)	Top-1 acc 65.234 (66.186)	Top-5 acc 84.375 (85.103)	lr 0.00829
Train [74][450/3239]	Time 2.506 (0.684)	Data Time 0.004 (0.092)	Loss 2.3828 (2.4158)	Entropy 0.97852 (0.97931)	Top-1 acc 64.062 (66.142)	Top-5 acc 86.719 (85.090)	lr 0.00828
Train [74][460/3239]	Time 0.273 (0.675)	Data Time 0.001 (0.090)	Loss 2.4812 (2.4153)	Entropy 0.97845 (0.97930)	Top-1 acc 65.625 (66.159)	Top-5 acc 83.594 (85.091)	lr 0.00828
Train [74][470/3239]	Time 0.237 (0.671)	Data Time 0.001 (0.088)	Loss 2.4874 (2.4147)	Entropy 0.97841 (0.97928)	Top-1 acc 61.328 (66.173)	Top-5 acc 83.594 (85.118)	lr 0.00828
Train [74][480/3239]	Time 0.226 (0.667)	Data Time 0.001 (0.086)	Loss 2.3900 (2.4160)	Entropy 0.97841 (0.97926)	Top-1 acc 66.797 (66.139)	Top-5 acc 85.156 (85.094)	lr 0.00828
Train [74][490/3239]	Time 0.385 (0.663)	Data Time 0.002 (0.084)	Loss 2.5424 (2.4159)	Entropy 0.97836 (0.97924)	Top-1 acc 63.281 (66.146)	Top-5 acc 81.250 (85.099)	lr 0.00828
Train [74][500/3239]	Time 0.278 (0.660)	Data Time 0.001 (0.083)	Loss 2.4508 (2.4167)	Entropy 0.97836 (0.97922)	Top-1 acc 64.062 (66.095)	Top-5 acc 83.203 (85.088)	lr 0.00828
Train [74][510/3239]	Time 0.229 (0.656)	Data Time 0.001 (0.081)	Loss 2.4247 (2.4171)	Entropy 0.97833 (0.97921)	Top-1 acc 63.672 (66.088)	Top-5 acc 85.938 (85.074)	lr 0.00828
Train [74][520/3239]	Time 0.244 (0.652)	Data Time 0.001 (0.079)	Loss 2.4404 (2.4179)	Entropy 0.97824 (0.97919)	Top-1 acc 67.188 (66.061)	Top-5 acc 86.328 (85.065)	lr 0.00828
Train [74][530/3239]	Time 0.245 (0.649)	Data Time 0.001 (0.078)	Loss 2.3489 (2.4172)	Entropy 0.97821 (0.97917)	Top-1 acc 68.359 (66.083)	Top-5 acc 86.328 (85.081)	lr 0.00828
Train [74][540/3239]	Time 0.418 (0.740)	Data Time 0.003 (0.077)	Loss 2.3885 (2.4175)	Entropy 0.97815 (0.97915)	Top-1 acc 68.750 (66.066)	Top-5 acc 88.281 (85.080)	lr 0.00828
Train [74][550/3239]	Time 0.224 (0.738)	Data Time 0.002 (0.075)	Loss 2.3629 (2.4167)	Entropy 0.97815 (0.97913)	Top-1 acc 67.188 (66.087)	Top-5 acc 88.672 (85.090)	lr 0.00828
Train [74][560/3239]	Time 2.458 (0.733)	Data Time 0.002 (0.074)	Loss 2.3199 (2.4179)	Entropy 0.97815 (0.97912)	Top-1 acc 69.531 (66.076)	Top-5 acc 87.500 (85.066)	lr 0.00827
Train [74][570/3239]	Time 0.222 (0.725)	Data Time 0.001 (0.073)	Loss 2.3216 (2.4169)	Entropy 0.97819 (0.97910)	Top-1 acc 67.578 (66.109)	Top-5 acc 86.719 (85.078)	lr 0.00827
Train [74][580/3239]	Time 0.338 (0.721)	Data Time 0.001 (0.071)	Loss 2.4722 (2.4173)	Entropy 0.97821 (0.97908)	Top-1 acc 62.500 (66.088)	Top-5 acc 84.766 (85.079)	lr 0.00827
Train [74][590/3239]	Time 0.224 (0.717)	Data Time 0.001 (0.070)	Loss 2.4002 (2.4173)	Entropy 0.97819 (0.97907)	Top-1 acc 66.797 (66.091)	Top-5 acc 85.938 (85.078)	lr 0.00827
Train [74][600/3239]	Time 0.228 (0.712)	Data Time 0.001 (0.069)	Loss 2.5344 (2.4175)	Entropy 0.97811 (0.97905)	Top-1 acc 64.453 (66.070)	Top-5 acc 82.812 (85.073)	lr 0.00827
Train [74][610/3239]	Time 0.250 (0.708)	Data Time 0.001 (0.068)	Loss 2.3864 (2.4184)	Entropy 0.97798 (0.97904)	Top-1 acc 67.578 (66.041)	Top-5 acc 85.938 (85.065)	lr 0.00827
Train [74][620/3239]	Time 0.243 (0.705)	Data Time 0.001 (0.067)	Loss 2.3325 (2.4177)	Entropy 0.97794 (0.97902)	Top-1 acc 68.750 (66.055)	Top-5 acc 87.891 (85.082)	lr 0.00827
Train [74][630/3239]	Time 0.286 (0.702)	Data Time 0.002 (0.066)	Loss 2.3505 (2.4178)	Entropy 0.97790 (0.97900)	Top-1 acc 67.578 (66.056)	Top-5 acc 86.328 (85.077)	lr 0.00827
Train [74][640/3239]	Time 0.300 (0.699)	Data Time 0.003 (0.065)	Loss 2.5492 (2.4188)	Entropy 0.97787 (0.97899)	Top-1 acc 61.719 (66.036)	Top-5 acc 80.859 (85.067)	lr 0.00827
Train [74][650/3239]	Time 0.243 (0.698)	Data Time 0.002 (0.064)	Loss 2.3540 (2.4180)	Entropy 0.97762 (0.97897)	Top-1 acc 67.969 (66.064)	Top-5 acc 85.156 (85.076)	lr 0.00827
Train [74][660/3239]	Time 0.290 (0.694)	Data Time 0.001 (0.063)	Loss 2.4298 (2.4182)	Entropy 0.97761 (0.97895)	Top-1 acc 66.797 (66.039)	Top-5 acc 85.547 (85.068)	lr 0.00826
Train [74][670/3239]	Time 2.570 (0.691)	Data Time 0.001 (0.062)	Loss 2.4863 (2.4180)	Entropy 0.97761 (0.97893)	Top-1 acc 64.844 (66.044)	Top-5 acc 86.328 (85.074)	lr 0.00826
Train [74][680/3239]	Time 0.219 (0.684)	Data Time 0.001 (0.061)	Loss 2.3560 (2.4181)	Entropy 0.97760 (0.97891)	Top-1 acc 65.625 (66.035)	Top-5 acc 85.938 (85.068)	lr 0.00826
Train [74][690/3239]	Time 0.225 (0.681)	Data Time 0.001 (0.060)	Loss 2.3961 (2.4183)	Entropy 0.97760 (0.97889)	Top-1 acc 70.312 (66.037)	Top-5 acc 85.938 (85.057)	lr 0.00826
Train [74][700/3239]	Time 0.275 (0.679)	Data Time 0.002 (0.060)	Loss 2.4534 (2.4185)	Entropy 0.97761 (0.97887)	Top-1 acc 63.672 (66.027)	Top-5 acc 83.203 (85.055)	lr 0.00826
Train [74][710/3239]	Time 0.397 (0.677)	Data Time 0.002 (0.059)	Loss 2.2782 (2.4189)	Entropy 0.97757 (0.97885)	Top-1 acc 69.141 (66.018)	Top-5 acc 87.500 (85.048)	lr 0.00826
Train [74][720/3239]	Time 0.283 (0.675)	Data Time 0.003 (0.058)	Loss 2.2860 (2.4191)	Entropy 0.97754 (0.97883)	Top-1 acc 69.922 (66.003)	Top-5 acc 88.672 (85.042)	lr 0.00826
Train [74][730/3239]	Time 0.239 (0.672)	Data Time 0.001 (0.057)	Loss 2.5033 (2.4189)	Entropy 0.97754 (0.97882)	Top-1 acc 62.891 (66.013)	Top-5 acc 83.984 (85.047)	lr 0.00826
Train [74][740/3239]	Time 0.232 (0.670)	Data Time 0.001 (0.056)	Loss 2.2922 (2.4191)	Entropy 0.97753 (0.97880)	Top-1 acc 69.531 (66.009)	Top-5 acc 88.281 (85.037)	lr 0.00826
Train [74][750/3239]	Time 0.210 (0.668)	Data Time 0.001 (0.056)	Loss 2.4720 (2.4186)	Entropy 0.97739 (0.97878)	Top-1 acc 63.672 (66.011)	Top-5 acc 83.984 (85.054)	lr 0.00826
Train [74][760/3239]	Time 0.224 (0.665)	Data Time 0.001 (0.055)	Loss 2.3998 (2.4190)	Entropy 0.97739 (0.97876)	Top-1 acc 64.844 (66.010)	Top-5 acc 83.594 (85.043)	lr 0.00826
Train [74][770/3239]	Time 0.232 (0.663)	Data Time 0.001 (0.054)	Loss 2.4148 (2.4190)	Entropy 0.97761 (0.97875)	Top-1 acc 65.625 (66.004)	Top-5 acc 84.766 (85.045)	lr 0.00825
Train [74][780/3239]	Time 2.481 (0.660)	Data Time 0.002 (0.054)	Loss 2.3793 (2.4191)	Entropy 0.97761 (0.97873)	Top-1 acc 66.797 (66.000)	Top-5 acc 84.766 (85.045)	lr 0.00825
Train [74][790/3239]	Time 0.237 (0.655)	Data Time 0.001 (0.053)	Loss 2.7155 (2.4199)	Entropy 0.97761 (0.97872)	Top-1 acc 59.766 (65.991)	Top-5 acc 79.688 (85.029)	lr 0.00825
Train [74][800/3239]	Time 0.319 (0.653)	Data Time 0.001 (0.052)	Loss 2.3956 (2.4195)	Entropy 0.97764 (0.97870)	Top-1 acc 64.453 (65.992)	Top-5 acc 85.938 (85.039)	lr 0.00825
Train [74][810/3239]	Time 0.216 (0.651)	Data Time 0.001 (0.052)	Loss 2.6860 (2.4200)	Entropy 0.97764 (0.97869)	Top-1 acc 59.766 (65.974)	Top-5 acc 79.297 (85.033)	lr 0.00825
Train [74][820/3239]	Time 0.221 (0.648)	Data Time 0.001 (0.051)	Loss 2.3410 (2.4196)	Entropy 0.97761 (0.97868)	Top-1 acc 66.406 (65.981)	Top-5 acc 87.891 (85.045)	lr 0.00825
Train [74][830/3239]	Time 0.221 (0.646)	Data Time 0.001 (0.051)	Loss 2.2504 (2.4196)	Entropy 0.97758 (0.97866)	Top-1 acc 71.094 (65.991)	Top-5 acc 89.453 (85.051)	lr 0.00825
Train [74][840/3239]	Time 0.240 (0.644)	Data Time 0.001 (0.050)	Loss 2.4770 (2.4200)	Entropy 0.97750 (0.97865)	Top-1 acc 66.016 (65.988)	Top-5 acc 83.203 (85.045)	lr 0.00825
Train [74][850/3239]	Time 0.227 (0.642)	Data Time 0.002 (0.049)	Loss 2.3273 (2.4201)	Entropy 0.97750 (0.97864)	Top-1 acc 66.406 (65.976)	Top-5 acc 86.328 (85.041)	lr 0.00825
Train [74][860/3239]	Time 0.234 (0.640)	Data Time 0.001 (0.049)	Loss 2.4308 (2.4209)	Entropy 0.97745 (0.97862)	Top-1 acc 62.891 (65.961)	Top-5 acc 83.984 (85.022)	lr 0.00825
Train [74][870/3239]	Time 0.244 (0.638)	Data Time 0.001 (0.048)	Loss 2.5619 (2.4211)	Entropy 0.97743 (0.97861)	Top-1 acc 65.234 (65.961)	Top-5 acc 79.688 (85.010)	lr 0.00824
Train [74][880/3239]	Time 0.243 (0.636)	Data Time 0.001 (0.048)	Loss 2.3992 (2.4214)	Entropy 0.97740 (0.97860)	Top-1 acc 67.578 (65.951)	Top-5 acc 85.156 (85.006)	lr 0.00824
Train [74][890/3239]	Time 2.610 (0.635)	Data Time 0.001 (0.047)	Loss 2.3449 (2.4213)	Entropy 0.97740 (0.97858)	Top-1 acc 66.406 (65.963)	Top-5 acc 86.719 (85.012)	lr 0.00824
Train [74][900/3239]	Time 0.222 (0.630)	Data Time 0.001 (0.047)	Loss 2.2524 (2.4207)	Entropy 0.97739 (0.97857)	Top-1 acc 67.578 (65.975)	Top-5 acc 89.062 (85.024)	lr 0.00824
Train [74][910/3239]	Time 0.294 (0.685)	Data Time 0.003 (0.046)	Loss 2.4441 (2.4202)	Entropy 0.97732 (0.97856)	Top-1 acc 67.969 (65.980)	Top-5 acc 83.984 (85.038)	lr 0.00824
Train [74][920/3239]	Time 0.223 (0.683)	Data Time 0.002 (0.046)	Loss 2.1086 (2.4200)	Entropy 0.97730 (0.97854)	Top-1 acc 75.000 (65.988)	Top-5 acc 92.578 (85.046)	lr 0.00824
Train [74][930/3239]	Time 0.240 (0.681)	Data Time 0.002 (0.045)	Loss 2.4709 (2.4203)	Entropy 0.97724 (0.97853)	Top-1 acc 64.844 (65.979)	Top-5 acc 82.422 (85.048)	lr 0.00824
Train [74][940/3239]	Time 0.254 (0.679)	Data Time 0.002 (0.045)	Loss 2.3887 (2.4203)	Entropy 0.97711 (0.97852)	Top-1 acc 66.406 (65.974)	Top-5 acc 87.109 (85.040)	lr 0.00824
Train [74][950/3239]	Time 0.246 (0.677)	Data Time 0.001 (0.044)	Loss 2.4184 (2.4201)	Entropy 0.97707 (0.97850)	Top-1 acc 62.891 (65.979)	Top-5 acc 85.938 (85.046)	lr 0.00824
Train [74][960/3239]	Time 0.205 (0.674)	Data Time 0.001 (0.044)	Loss 2.4814 (2.4202)	Entropy 0.97707 (0.97849)	Top-1 acc 64.844 (65.975)	Top-5 acc 84.766 (85.046)	lr 0.00824
Train [74][970/3239]	Time 0.237 (0.672)	Data Time 0.001 (0.044)	Loss 2.4780 (2.4206)	Entropy 0.97709 (0.97847)	Top-1 acc 63.672 (65.960)	Top-5 acc 83.984 (85.039)	lr 0.00824
Train [74][980/3239]	Time 0.355 (0.670)	Data Time 0.001 (0.043)	Loss 2.3042 (2.4201)	Entropy 0.97709 (0.97846)	Top-1 acc 66.797 (65.980)	Top-5 acc 88.672 (85.049)	lr 0.00823
Train [74][990/3239]	Time 0.219 (0.668)	Data Time 0.001 (0.043)	Loss 2.4652 (2.4201)	Entropy 0.97708 (0.97844)	Top-1 acc 62.891 (65.976)	Top-5 acc 84.375 (85.047)	lr 0.00823
Train [74][1000/3239]	Time 2.511 (0.666)	Data Time 0.001 (0.042)	Loss 2.3245 (2.4200)	Entropy 0.97708 (0.97843)	Top-1 acc 65.625 (65.976)	Top-5 acc 85.938 (85.047)	lr 0.00823
Train [74][1010/3239]	Time 0.247 (0.662)	Data Time 0.001 (0.042)	Loss 2.2916 (2.4200)	Entropy 0.97697 (0.97841)	Top-1 acc 68.359 (65.970)	Top-5 acc 87.500 (85.052)	lr 0.00823
Train [74][1020/3239]	Time 0.247 (0.660)	Data Time 0.001 (0.041)	Loss 2.4146 (2.4200)	Entropy 0.97689 (0.97840)	Top-1 acc 64.453 (65.969)	Top-5 acc 87.109 (85.058)	lr 0.00823
Train [74][1030/3239]	Time 0.227 (0.658)	Data Time 0.001 (0.041)	Loss 2.5090 (2.4207)	Entropy 0.97688 (0.97839)	Top-1 acc 66.797 (65.946)	Top-5 acc 80.469 (85.041)	lr 0.00823
Train [74][1040/3239]	Time 0.232 (0.656)	Data Time 0.001 (0.041)	Loss 2.4024 (2.4208)	Entropy 0.97688 (0.97837)	Top-1 acc 65.625 (65.941)	Top-5 acc 86.719 (85.047)	lr 0.00823
Train [74][1050/3239]	Time 0.222 (0.655)	Data Time 0.001 (0.040)	Loss 2.5808 (2.4205)	Entropy 0.97690 (0.97836)	Top-1 acc 64.453 (65.947)	Top-5 acc 79.297 (85.047)	lr 0.00823
Train [74][1060/3239]	Time 0.222 (0.653)	Data Time 0.001 (0.040)	Loss 2.4932 (2.4206)	Entropy 0.97678 (0.97834)	Top-1 acc 63.281 (65.953)	Top-5 acc 84.766 (85.045)	lr 0.00823
Train [74][1070/3239]	Time 0.339 (0.651)	Data Time 0.001 (0.040)	Loss 2.4850 (2.4208)	Entropy 0.97672 (0.97833)	Top-1 acc 64.453 (65.945)	Top-5 acc 83.203 (85.035)	lr 0.00823
Train [74][1080/3239]	Time 0.235 (0.650)	Data Time 0.001 (0.039)	Loss 2.3696 (2.4207)	Entropy 0.97671 (0.97831)	Top-1 acc 64.062 (65.941)	Top-5 acc 86.328 (85.032)	lr 0.00822
Train [74][1090/3239]	Time 0.216 (0.648)	Data Time 0.001 (0.039)	Loss 2.2742 (2.4201)	Entropy 0.97665 (0.97830)	Top-1 acc 68.359 (65.960)	Top-5 acc 87.109 (85.043)	lr 0.00822
Train [74][1100/3239]	Time 0.262 (0.646)	Data Time 0.002 (0.039)	Loss 2.4003 (2.4214)	Entropy 0.97654 (0.97828)	Top-1 acc 71.484 (65.934)	Top-5 acc 86.328 (85.027)	lr 0.00822
Train [74][1110/3239]	Time 2.478 (0.645)	Data Time 0.001 (0.038)	Loss 2.1666 (2.4212)	Entropy 0.97654 (0.97827)	Top-1 acc 69.141 (65.938)	Top-5 acc 89.844 (85.026)	lr 0.00822
Train [74][1120/3239]	Time 0.242 (0.641)	Data Time 0.001 (0.038)	Loss 2.3537 (2.4217)	Entropy 0.97652 (0.97825)	Top-1 acc 64.453 (65.926)	Top-5 acc 87.109 (85.022)	lr 0.00822
Train [74][1130/3239]	Time 0.231 (0.639)	Data Time 0.001 (0.038)	Loss 2.4420 (2.4216)	Entropy 0.97650 (0.97824)	Top-1 acc 66.016 (65.922)	Top-5 acc 83.594 (85.021)	lr 0.00822
Train [74][1140/3239]	Time 0.231 (0.638)	Data Time 0.001 (0.037)	Loss 2.4321 (2.4218)	Entropy 0.97647 (0.97822)	Top-1 acc 64.062 (65.917)	Top-5 acc 83.594 (85.018)	lr 0.00822
Train [74][1150/3239]	Time 0.219 (0.637)	Data Time 0.001 (0.037)	Loss 2.3994 (2.4215)	Entropy 0.97647 (0.97821)	Top-1 acc 64.062 (65.918)	Top-5 acc 85.547 (85.027)	lr 0.00822
Train [74][1160/3239]	Time 0.330 (0.635)	Data Time 0.001 (0.037)	Loss 2.3290 (2.4214)	Entropy 0.97646 (0.97819)	Top-1 acc 69.141 (65.911)	Top-5 acc 85.156 (85.032)	lr 0.00822
Train [74][1170/3239]	Time 0.220 (0.634)	Data Time 0.001 (0.036)	Loss 2.4930 (2.4211)	Entropy 0.97640 (0.97818)	Top-1 acc 66.797 (65.919)	Top-5 acc 83.984 (85.041)	lr 0.00822
Train [74][1180/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.036)	Loss 2.6260 (2.4211)	Entropy 0.97629 (0.97816)	Top-1 acc 62.891 (65.923)	Top-5 acc 80.078 (85.042)	lr 0.00822
Train [74][1190/3239]	Time 0.224 (0.631)	Data Time 0.001 (0.036)	Loss 2.2904 (2.4209)	Entropy 0.97632 (0.97814)	Top-1 acc 66.797 (65.930)	Top-5 acc 88.281 (85.048)	lr 0.00821
Train [74][1200/3239]	Time 0.221 (0.630)	Data Time 0.001 (0.035)	Loss 2.5193 (2.4211)	Entropy 0.97626 (0.97813)	Top-1 acc 65.625 (65.926)	Top-5 acc 83.203 (85.041)	lr 0.00821
Train [74][1210/3239]	Time 0.282 (0.628)	Data Time 0.001 (0.035)	Loss 2.3453 (2.4216)	Entropy 0.97626 (0.97811)	Top-1 acc 67.578 (65.913)	Top-5 acc 86.328 (85.031)	lr 0.00821
Train [74][1220/3239]	Time 2.464 (0.627)	Data Time 0.001 (0.035)	Loss 2.4473 (2.4218)	Entropy 0.97626 (0.97810)	Top-1 acc 64.844 (65.908)	Top-5 acc 84.375 (85.029)	lr 0.00821
Train [74][1230/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.035)	Loss 2.4642 (2.4218)	Entropy 0.97632 (0.97808)	Top-1 acc 65.625 (65.906)	Top-5 acc 83.984 (85.028)	lr 0.00821
Train [74][1240/3239]	Time 0.216 (0.623)	Data Time 0.001 (0.034)	Loss 2.3094 (2.4219)	Entropy 0.97630 (0.97807)	Top-1 acc 68.359 (65.897)	Top-5 acc 87.500 (85.027)	lr 0.00821
Train [74][1250/3239]	Time 0.279 (0.622)	Data Time 0.001 (0.034)	Loss 2.3707 (2.4220)	Entropy 0.97630 (0.97806)	Top-1 acc 68.750 (65.896)	Top-5 acc 86.719 (85.024)	lr 0.00821
Train [74][1260/3239]	Time 0.231 (0.620)	Data Time 0.001 (0.034)	Loss 2.2400 (2.4219)	Entropy 0.97622 (0.97804)	Top-1 acc 70.312 (65.890)	Top-5 acc 87.891 (85.023)	lr 0.00821
Train [74][1270/3239]	Time 0.242 (0.660)	Data Time 0.003 (0.034)	Loss 2.3965 (2.4215)	Entropy 0.97619 (0.97803)	Top-1 acc 66.016 (65.899)	Top-5 acc 85.547 (85.031)	lr 0.00821
Train [74][1280/3239]	Time 0.220 (0.659)	Data Time 0.003 (0.033)	Loss 2.5461 (2.4215)	Entropy 0.97618 (0.97801)	Top-1 acc 61.719 (65.899)	Top-5 acc 86.328 (85.039)	lr 0.00821
Train [74][1290/3239]	Time 0.225 (0.658)	Data Time 0.001 (0.033)	Loss 2.4540 (2.4213)	Entropy 0.97611 (0.97800)	Top-1 acc 67.188 (65.901)	Top-5 acc 85.547 (85.045)	lr 0.00820
Train [74][1300/3239]	Time 0.226 (0.657)	Data Time 0.001 (0.033)	Loss 2.4492 (2.4208)	Entropy 0.97606 (0.97798)	Top-1 acc 64.844 (65.917)	Top-5 acc 85.156 (85.051)	lr 0.00820
Train [74][1310/3239]	Time 0.183 (0.655)	Data Time 0.001 (0.033)	Loss 2.6428 (2.4212)	Entropy 0.97606 (0.97797)	Top-1 acc 59.766 (65.914)	Top-5 acc 81.250 (85.042)	lr 0.00820
Train [74][1320/3239]	Time 0.246 (0.654)	Data Time 0.001 (0.032)	Loss 2.5317 (2.4212)	Entropy 0.97605 (0.97795)	Top-1 acc 64.844 (65.909)	Top-5 acc 82.812 (85.042)	lr 0.00820
Train [74][1330/3239]	Time 2.428 (0.653)	Data Time 0.002 (0.032)	Loss 2.3659 (2.4215)	Entropy 0.97605 (0.97794)	Top-1 acc 67.578 (65.900)	Top-5 acc 87.109 (85.037)	lr 0.00820
Train [74][1340/3239]	Time 0.301 (0.650)	Data Time 0.001 (0.032)	Loss 2.5073 (2.4216)	Entropy 0.97596 (0.97792)	Top-1 acc 66.016 (65.902)	Top-5 acc 82.812 (85.041)	lr 0.00820
Train [74][1350/3239]	Time 0.240 (0.648)	Data Time 0.001 (0.032)	Loss 2.5537 (2.4215)	Entropy 0.97580 (0.97791)	Top-1 acc 64.062 (65.907)	Top-5 acc 80.469 (85.041)	lr 0.00820
Train [74][1360/3239]	Time 0.233 (0.647)	Data Time 0.001 (0.032)	Loss 2.3714 (2.4215)	Entropy 0.97582 (0.97789)	Top-1 acc 66.797 (65.906)	Top-5 acc 86.719 (85.041)	lr 0.00820
Train [74][1370/3239]	Time 0.225 (0.646)	Data Time 0.001 (0.031)	Loss 2.4949 (2.4219)	Entropy 0.97580 (0.97788)	Top-1 acc 63.672 (65.893)	Top-5 acc 83.203 (85.032)	lr 0.00820
Train [74][1380/3239]	Time 0.221 (0.644)	Data Time 0.001 (0.031)	Loss 2.3695 (2.4221)	Entropy 0.97576 (0.97786)	Top-1 acc 65.234 (65.883)	Top-5 acc 84.375 (85.029)	lr 0.00820
Train [74][1390/3239]	Time 0.234 (0.643)	Data Time 0.001 (0.031)	Loss 2.6700 (2.4223)	Entropy 0.97572 (0.97785)	Top-1 acc 59.375 (65.874)	Top-5 acc 78.906 (85.026)	lr 0.00820
Train [74][1400/3239]	Time 0.215 (0.642)	Data Time 0.001 (0.031)	Loss 2.4477 (2.4224)	Entropy 0.97577 (0.97783)	Top-1 acc 65.625 (65.870)	Top-5 acc 83.594 (85.026)	lr 0.00819
Train [74][1410/3239]	Time 0.230 (0.641)	Data Time 0.001 (0.030)	Loss 2.3449 (2.4224)	Entropy 0.97578 (0.97782)	Top-1 acc 66.797 (65.865)	Top-5 acc 87.500 (85.029)	lr 0.00819
Train [74][1420/3239]	Time 0.221 (0.639)	Data Time 0.001 (0.030)	Loss 2.5173 (2.4223)	Entropy 0.97573 (0.97780)	Top-1 acc 65.625 (65.870)	Top-5 acc 82.812 (85.030)	lr 0.00819
Train [74][1430/3239]	Time 0.317 (0.638)	Data Time 0.001 (0.030)	Loss 2.5639 (2.4222)	Entropy 0.97571 (0.97779)	Top-1 acc 64.844 (65.869)	Top-5 acc 83.203 (85.026)	lr 0.00819
Train [74][1440/3239]	Time 2.672 (0.637)	Data Time 0.001 (0.030)	Loss 2.3733 (2.4220)	Entropy 0.97571 (0.97778)	Top-1 acc 68.750 (65.873)	Top-5 acc 85.938 (85.028)	lr 0.00819
Train [74][1450/3239]	Time 0.244 (0.634)	Data Time 0.001 (0.030)	Loss 2.5867 (2.4221)	Entropy 0.97570 (0.97776)	Top-1 acc 62.500 (65.864)	Top-5 acc 79.297 (85.025)	lr 0.00819
Train [74][1460/3239]	Time 0.223 (0.633)	Data Time 0.001 (0.029)	Loss 2.4680 (2.4218)	Entropy 0.97566 (0.97775)	Top-1 acc 66.797 (65.868)	Top-5 acc 85.938 (85.030)	lr 0.00819
Train [74][1470/3239]	Time 0.326 (0.632)	Data Time 0.001 (0.029)	Loss 2.6686 (2.4220)	Entropy 0.97561 (0.97773)	Top-1 acc 59.766 (65.862)	Top-5 acc 77.344 (85.022)	lr 0.00819
Train [74][1480/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.029)	Loss 2.4765 (2.4218)	Entropy 0.97553 (0.97772)	Top-1 acc 65.234 (65.868)	Top-5 acc 85.156 (85.028)	lr 0.00819
Train [74][1490/3239]	Time 0.246 (0.630)	Data Time 0.001 (0.029)	Loss 2.4538 (2.4219)	Entropy 0.97548 (0.97770)	Top-1 acc 64.453 (65.866)	Top-5 acc 85.156 (85.026)	lr 0.00819
Train [74][1500/3239]	Time 0.237 (0.629)	Data Time 0.001 (0.029)	Loss 2.5135 (2.4213)	Entropy 0.97551 (0.97769)	Top-1 acc 65.625 (65.883)	Top-5 acc 82.422 (85.040)	lr 0.00818
Train [74][1510/3239]	Time 0.259 (0.628)	Data Time 0.002 (0.029)	Loss 2.5407 (2.4215)	Entropy 0.97546 (0.97767)	Top-1 acc 66.016 (65.887)	Top-5 acc 81.641 (85.037)	lr 0.00818
Train [74][1520/3239]	Time 0.227 (0.627)	Data Time 0.001 (0.028)	Loss 2.3936 (2.4214)	Entropy 0.97542 (0.97766)	Top-1 acc 66.016 (65.886)	Top-5 acc 86.328 (85.040)	lr 0.00818
Train [74][1530/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.028)	Loss 2.5066 (2.4217)	Entropy 0.97544 (0.97764)	Top-1 acc 63.672 (65.882)	Top-5 acc 82.812 (85.036)	lr 0.00818
Train [74][1540/3239]	Time 0.274 (0.625)	Data Time 0.001 (0.028)	Loss 2.3653 (2.4214)	Entropy 0.97546 (0.97763)	Top-1 acc 66.016 (65.884)	Top-5 acc 85.156 (85.039)	lr 0.00818
Train [74][1550/3239]	Time 2.609 (0.624)	Data Time 0.002 (0.028)	Loss 2.3227 (2.4213)	Entropy 0.97546 (0.97762)	Top-1 acc 69.531 (65.882)	Top-5 acc 86.328 (85.042)	lr 0.00818
Train [74][1560/3239]	Time 0.346 (0.622)	Data Time 0.001 (0.028)	Loss 2.2698 (2.4211)	Entropy 0.97543 (0.97760)	Top-1 acc 66.797 (65.880)	Top-5 acc 90.234 (85.045)	lr 0.00818
Train [74][1570/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.028)	Loss 2.6997 (2.4213)	Entropy 0.97544 (0.97759)	Top-1 acc 59.766 (65.879)	Top-5 acc 81.250 (85.041)	lr 0.00818
Train [74][1580/3239]	Time 0.246 (0.620)	Data Time 0.001 (0.027)	Loss 2.4256 (2.4213)	Entropy 0.97545 (0.97757)	Top-1 acc 63.672 (65.878)	Top-5 acc 87.891 (85.045)	lr 0.00818
Train [74][1590/3239]	Time 0.234 (0.619)	Data Time 0.001 (0.027)	Loss 2.6066 (2.4214)	Entropy 0.97542 (0.97756)	Top-1 acc 59.375 (65.877)	Top-5 acc 81.250 (85.038)	lr 0.00818
Train [74][1600/3239]	Time 0.286 (0.618)	Data Time 0.001 (0.027)	Loss 2.3853 (2.4216)	Entropy 0.97536 (0.97755)	Top-1 acc 69.922 (65.871)	Top-5 acc 86.719 (85.035)	lr 0.00818
Train [74][1610/3239]	Time 0.220 (0.617)	Data Time 0.002 (0.027)	Loss 2.3441 (2.4214)	Entropy 0.97528 (0.97753)	Top-1 acc 66.406 (65.874)	Top-5 acc 87.500 (85.041)	lr 0.00817
Train [74][1620/3239]	Time 0.234 (0.616)	Data Time 0.001 (0.027)	Loss 2.4029 (2.4213)	Entropy 0.97515 (0.97752)	Top-1 acc 64.453 (65.878)	Top-5 acc 85.547 (85.041)	lr 0.00817
Train [74][1630/3239]	Time 0.229 (0.649)	Data Time 0.002 (0.027)	Loss 2.4199 (2.4215)	Entropy 0.97514 (0.97750)	Top-1 acc 67.188 (65.874)	Top-5 acc 85.156 (85.035)	lr 0.00817
Train [74][1640/3239]	Time 0.231 (0.648)	Data Time 0.002 (0.026)	Loss 2.3214 (2.4215)	Entropy 0.97512 (0.97749)	Top-1 acc 68.359 (65.870)	Top-5 acc 88.672 (85.041)	lr 0.00817
Train [74][1650/3239]	Time 0.222 (0.647)	Data Time 0.001 (0.026)	Loss 2.5208 (2.4215)	Entropy 0.97510 (0.97748)	Top-1 acc 61.719 (65.864)	Top-5 acc 82.422 (85.046)	lr 0.00817
Train [74][1660/3239]	Time 2.495 (0.646)	Data Time 0.001 (0.026)	Loss 2.4620 (2.4215)	Entropy 0.97510 (0.97746)	Top-1 acc 66.016 (65.860)	Top-5 acc 83.203 (85.047)	lr 0.00817
Train [74][1670/3239]	Time 0.246 (0.643)	Data Time 0.002 (0.026)	Loss 2.3850 (2.4215)	Entropy 0.97511 (0.97745)	Top-1 acc 66.406 (65.856)	Top-5 acc 84.375 (85.045)	lr 0.00817
Train [74][1680/3239]	Time 0.272 (0.642)	Data Time 0.002 (0.026)	Loss 2.4272 (2.4215)	Entropy 0.97506 (0.97743)	Top-1 acc 69.141 (65.856)	Top-5 acc 84.375 (85.046)	lr 0.00817
Train [74][1690/3239]	Time 0.302 (0.641)	Data Time 0.001 (0.026)	Loss 2.4145 (2.4218)	Entropy 0.97499 (0.97742)	Top-1 acc 64.844 (65.847)	Top-5 acc 87.500 (85.042)	lr 0.00817
Train [74][1700/3239]	Time 0.247 (0.641)	Data Time 0.001 (0.026)	Loss 2.3296 (2.4220)	Entropy 0.97495 (0.97740)	Top-1 acc 67.188 (65.844)	Top-5 acc 88.672 (85.036)	lr 0.00817
Train [74][1710/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.025)	Loss 2.4901 (2.4218)	Entropy 0.97495 (0.97739)	Top-1 acc 63.281 (65.849)	Top-5 acc 83.594 (85.042)	lr 0.00817
Train [74][1720/3239]	Time 0.228 (0.639)	Data Time 0.001 (0.025)	Loss 2.4943 (2.4222)	Entropy 0.97488 (0.97738)	Top-1 acc 62.500 (65.835)	Top-5 acc 83.984 (85.035)	lr 0.00816
Train [74][1730/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.025)	Loss 2.5171 (2.4224)	Entropy 0.97489 (0.97736)	Top-1 acc 64.844 (65.831)	Top-5 acc 82.812 (85.034)	lr 0.00816
Train [74][1740/3239]	Time 0.354 (0.637)	Data Time 0.001 (0.025)	Loss 2.3933 (2.4224)	Entropy 0.97489 (0.97735)	Top-1 acc 66.797 (65.833)	Top-5 acc 83.984 (85.034)	lr 0.00816
Train [74][1750/3239]	Time 0.211 (0.636)	Data Time 0.001 (0.025)	Loss 2.4015 (2.4222)	Entropy 0.97487 (0.97733)	Top-1 acc 66.406 (65.837)	Top-5 acc 84.375 (85.037)	lr 0.00816
Train [74][1760/3239]	Time 0.306 (0.635)	Data Time 0.001 (0.025)	Loss 2.4291 (2.4222)	Entropy 0.97486 (0.97732)	Top-1 acc 66.797 (65.841)	Top-5 acc 85.938 (85.040)	lr 0.00816
Train [74][1770/3239]	Time 2.531 (0.634)	Data Time 0.002 (0.025)	Loss 2.4574 (2.4220)	Entropy 0.97486 (0.97731)	Top-1 acc 66.797 (65.843)	Top-5 acc 83.594 (85.043)	lr 0.00816
Train [74][1780/3239]	Time 0.240 (0.632)	Data Time 0.001 (0.024)	Loss 2.4704 (2.4219)	Entropy 0.97489 (0.97729)	Top-1 acc 66.016 (65.849)	Top-5 acc 83.984 (85.042)	lr 0.00816
Train [74][1790/3239]	Time 0.240 (0.631)	Data Time 0.001 (0.024)	Loss 2.2557 (2.4219)	Entropy 0.97486 (0.97728)	Top-1 acc 70.312 (65.849)	Top-5 acc 87.891 (85.041)	lr 0.00816
Train [74][1800/3239]	Time 0.245 (0.630)	Data Time 0.001 (0.024)	Loss 2.1909 (2.4219)	Entropy 0.97471 (0.97726)	Top-1 acc 72.656 (65.849)	Top-5 acc 88.672 (85.040)	lr 0.00816
Train [74][1810/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.024)	Loss 2.4968 (2.4216)	Entropy 0.97462 (0.97725)	Top-1 acc 61.719 (65.852)	Top-5 acc 81.641 (85.044)	lr 0.00816
Train [74][1820/3239]	Time 0.250 (0.629)	Data Time 0.001 (0.024)	Loss 2.6121 (2.4215)	Entropy 0.97464 (0.97724)	Top-1 acc 61.719 (65.855)	Top-5 acc 80.078 (85.045)	lr 0.00815
Train [74][1830/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.024)	Loss 2.7261 (2.4219)	Entropy 0.97466 (0.97722)	Top-1 acc 57.422 (65.846)	Top-5 acc 80.469 (85.038)	lr 0.00815
Train [74][1840/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.024)	Loss 2.4458 (2.4217)	Entropy 0.97462 (0.97721)	Top-1 acc 64.062 (65.849)	Top-5 acc 84.375 (85.042)	lr 0.00815
Train [74][1850/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.024)	Loss 2.2796 (2.4215)	Entropy 0.97451 (0.97719)	Top-1 acc 71.484 (65.858)	Top-5 acc 88.672 (85.043)	lr 0.00815
Train [74][1860/3239]	Time 0.247 (0.626)	Data Time 0.002 (0.023)	Loss 2.3471 (2.4215)	Entropy 0.97438 (0.97718)	Top-1 acc 68.359 (65.856)	Top-5 acc 85.547 (85.043)	lr 0.00815
Train [74][1870/3239]	Time 0.315 (0.625)	Data Time 0.002 (0.023)	Loss 2.3299 (2.4216)	Entropy 0.97430 (0.97716)	Top-1 acc 68.750 (65.853)	Top-5 acc 86.719 (85.039)	lr 0.00815
Train [74][1880/3239]	Time 2.535 (0.624)	Data Time 0.001 (0.023)	Loss 2.5170 (2.4217)	Entropy 0.97430 (0.97715)	Top-1 acc 62.500 (65.850)	Top-5 acc 80.859 (85.039)	lr 0.00815
Train [74][1890/3239]	Time 0.239 (0.622)	Data Time 0.001 (0.023)	Loss 2.4534 (2.4217)	Entropy 0.97423 (0.97713)	Top-1 acc 64.844 (65.854)	Top-5 acc 83.984 (85.041)	lr 0.00815
Train [74][1900/3239]	Time 0.222 (0.621)	Data Time 0.001 (0.023)	Loss 2.2788 (2.4218)	Entropy 0.97423 (0.97712)	Top-1 acc 68.750 (65.851)	Top-5 acc 89.453 (85.043)	lr 0.00815
Train [74][1910/3239]	Time 0.222 (0.621)	Data Time 0.002 (0.023)	Loss 2.3316 (2.4217)	Entropy 0.97427 (0.97710)	Top-1 acc 64.844 (65.850)	Top-5 acc 86.719 (85.044)	lr 0.00815
Train [74][1920/3239]	Time 0.230 (0.620)	Data Time 0.001 (0.023)	Loss 2.1905 (2.4213)	Entropy 0.97417 (0.97709)	Top-1 acc 71.484 (65.857)	Top-5 acc 87.109 (85.052)	lr 0.00815
Train [74][1930/3239]	Time 0.239 (0.619)	Data Time 0.001 (0.023)	Loss 2.4176 (2.4214)	Entropy 0.97417 (0.97707)	Top-1 acc 66.016 (65.849)	Top-5 acc 83.984 (85.050)	lr 0.00814
Train [74][1940/3239]	Time 0.237 (0.618)	Data Time 0.001 (0.023)	Loss 2.3101 (2.4216)	Entropy 0.97414 (0.97706)	Top-1 acc 67.188 (65.844)	Top-5 acc 88.281 (85.050)	lr 0.00814
Train [74][1950/3239]	Time 0.254 (0.618)	Data Time 0.002 (0.022)	Loss 2.4160 (2.4214)	Entropy 0.97406 (0.97704)	Top-1 acc 67.578 (65.852)	Top-5 acc 82.422 (85.051)	lr 0.00814
Train [74][1960/3239]	Time 0.338 (0.617)	Data Time 0.002 (0.022)	Loss 2.2282 (2.4212)	Entropy 0.97407 (0.97703)	Top-1 acc 69.531 (65.857)	Top-5 acc 89.844 (85.053)	lr 0.00814
Train [74][1970/3239]	Time 0.235 (0.616)	Data Time 0.001 (0.022)	Loss 2.4033 (2.4212)	Entropy 0.97402 (0.97701)	Top-1 acc 66.016 (65.859)	Top-5 acc 87.109 (85.054)	lr 0.00814
Train [74][1980/3239]	Time 0.256 (0.616)	Data Time 0.001 (0.022)	Loss 2.3467 (2.4213)	Entropy 0.97399 (0.97700)	Top-1 acc 66.016 (65.848)	Top-5 acc 85.938 (85.056)	lr 0.00814
Train [74][1990/3239]	Time 54.956 (0.641)	Data Time 0.002 (0.022)	Loss 2.4591 (2.4214)	Entropy 0.97399 (0.97698)	Top-1 acc 66.016 (65.852)	Top-5 acc 83.984 (85.052)	lr 0.00814
Train [74][2000/3239]	Time 0.317 (0.640)	Data Time 0.002 (0.022)	Loss 2.4643 (2.4211)	Entropy 0.97398 (0.97697)	Top-1 acc 65.625 (65.857)	Top-5 acc 83.984 (85.057)	lr 0.00814
Train [74][2010/3239]	Time 0.241 (0.639)	Data Time 0.001 (0.022)	Loss 2.5042 (2.4211)	Entropy 0.97395 (0.97695)	Top-1 acc 64.844 (65.859)	Top-5 acc 85.156 (85.058)	lr 0.00814
Train [74][2020/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.022)	Loss 2.4942 (2.4212)	Entropy 0.97397 (0.97694)	Top-1 acc 63.281 (65.853)	Top-5 acc 82.422 (85.058)	lr 0.00814
Train [74][2030/3239]	Time 0.237 (0.637)	Data Time 0.001 (0.022)	Loss 2.5208 (2.4214)	Entropy 0.97393 (0.97692)	Top-1 acc 64.062 (65.843)	Top-5 acc 83.984 (85.056)	lr 0.00813
Train [74][2040/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.022)	Loss 2.2504 (2.4213)	Entropy 0.97387 (0.97691)	Top-1 acc 72.266 (65.847)	Top-5 acc 86.719 (85.057)	lr 0.00813
Train [74][2050/3239]	Time 0.231 (0.636)	Data Time 0.001 (0.021)	Loss 2.5073 (2.4211)	Entropy 0.97381 (0.97689)	Top-1 acc 66.797 (65.854)	Top-5 acc 84.375 (85.063)	lr 0.00813
Train [74][2060/3239]	Time 0.233 (0.635)	Data Time 0.001 (0.021)	Loss 2.4236 (2.4211)	Entropy 0.97379 (0.97688)	Top-1 acc 62.109 (65.852)	Top-5 acc 84.766 (85.063)	lr 0.00813
Train [74][2070/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.021)	Loss 2.4574 (2.4213)	Entropy 0.97378 (0.97686)	Top-1 acc 65.234 (65.846)	Top-5 acc 83.203 (85.057)	lr 0.00813
Train [74][2080/3239]	Time 0.215 (0.633)	Data Time 0.001 (0.021)	Loss 2.3479 (2.4215)	Entropy 0.97377 (0.97685)	Top-1 acc 66.406 (65.844)	Top-5 acc 83.594 (85.049)	lr 0.00813
Train [74][2090/3239]	Time 0.329 (0.633)	Data Time 0.001 (0.021)	Loss 2.4650 (2.4216)	Entropy 0.97374 (0.97683)	Top-1 acc 65.625 (65.845)	Top-5 acc 85.156 (85.044)	lr 0.00813
Train [74][2100/3239]	Time 2.454 (0.632)	Data Time 0.001 (0.021)	Loss 2.3539 (2.4215)	Entropy 0.97374 (0.97682)	Top-1 acc 67.969 (65.851)	Top-5 acc 85.938 (85.045)	lr 0.00813
Train [74][2110/3239]	Time 0.231 (0.630)	Data Time 0.001 (0.021)	Loss 2.3270 (2.4214)	Entropy 0.97371 (0.97680)	Top-1 acc 67.578 (65.852)	Top-5 acc 86.719 (85.044)	lr 0.00813
Train [74][2120/3239]	Time 0.227 (0.629)	Data Time 0.001 (0.021)	Loss 2.3226 (2.4210)	Entropy 0.97367 (0.97679)	Top-1 acc 67.188 (65.857)	Top-5 acc 86.328 (85.053)	lr 0.00813
Train [74][2130/3239]	Time 0.242 (0.628)	Data Time 0.001 (0.021)	Loss 2.3144 (2.4209)	Entropy 0.97358 (0.97677)	Top-1 acc 67.969 (65.857)	Top-5 acc 89.062 (85.055)	lr 0.00813
Train [74][2140/3239]	Time 0.211 (0.628)	Data Time 0.002 (0.021)	Loss 2.4033 (2.4207)	Entropy 0.97359 (0.97676)	Top-1 acc 67.969 (65.863)	Top-5 acc 86.328 (85.055)	lr 0.00812
Train [74][2150/3239]	Time 0.227 (0.627)	Data Time 0.001 (0.021)	Loss 2.4520 (2.4207)	Entropy 0.97356 (0.97674)	Top-1 acc 64.844 (65.865)	Top-5 acc 83.984 (85.055)	lr 0.00812
Train [74][2160/3239]	Time 0.263 (0.626)	Data Time 0.002 (0.020)	Loss 2.6204 (2.4209)	Entropy 0.97352 (0.97673)	Top-1 acc 63.281 (65.861)	Top-5 acc 81.250 (85.052)	lr 0.00812
Train [74][2170/3239]	Time 0.223 (0.626)	Data Time 0.002 (0.020)	Loss 2.4765 (2.4206)	Entropy 0.97348 (0.97671)	Top-1 acc 63.281 (65.865)	Top-5 acc 84.766 (85.057)	lr 0.00812
Train [74][2180/3239]	Time 0.326 (0.625)	Data Time 0.001 (0.020)	Loss 2.4915 (2.4206)	Entropy 0.97345 (0.97670)	Top-1 acc 65.625 (65.866)	Top-5 acc 82.031 (85.055)	lr 0.00812
Train [74][2190/3239]	Time 0.216 (0.624)	Data Time 0.001 (0.020)	Loss 2.4342 (2.4205)	Entropy 0.97341 (0.97668)	Top-1 acc 64.453 (65.870)	Top-5 acc 86.328 (85.057)	lr 0.00812
Train [74][2200/3239]	Time 0.263 (0.623)	Data Time 0.001 (0.020)	Loss 2.3668 (2.4208)	Entropy 0.97337 (0.97667)	Top-1 acc 66.016 (65.863)	Top-5 acc 85.938 (85.056)	lr 0.00812
Train [74][2210/3239]	Time 2.614 (0.623)	Data Time 0.037 (0.020)	Loss 2.3449 (2.4207)	Entropy 0.97337 (0.97666)	Top-1 acc 66.797 (65.865)	Top-5 acc 86.328 (85.055)	lr 0.00812
Train [74][2220/3239]	Time 0.297 (0.621)	Data Time 0.002 (0.020)	Loss 2.4086 (2.4207)	Entropy 0.97334 (0.97664)	Top-1 acc 66.016 (65.864)	Top-5 acc 85.547 (85.055)	lr 0.00812
Train [74][2230/3239]	Time 0.328 (0.621)	Data Time 0.001 (0.020)	Loss 2.5812 (2.4210)	Entropy 0.97330 (0.97663)	Top-1 acc 62.109 (65.854)	Top-5 acc 83.203 (85.052)	lr 0.00812
Train [74][2240/3239]	Time 0.229 (0.620)	Data Time 0.001 (0.020)	Loss 2.4699 (2.4211)	Entropy 0.97332 (0.97661)	Top-1 acc 62.109 (65.846)	Top-5 acc 85.547 (85.054)	lr 0.00811
Train [74][2250/3239]	Time 0.249 (0.619)	Data Time 0.002 (0.020)	Loss 2.4503 (2.4211)	Entropy 0.97332 (0.97660)	Top-1 acc 67.578 (65.849)	Top-5 acc 83.984 (85.054)	lr 0.00811
Train [74][2260/3239]	Time 0.239 (0.618)	Data Time 0.001 (0.020)	Loss 2.2947 (2.4211)	Entropy 0.97324 (0.97658)	Top-1 acc 68.359 (65.854)	Top-5 acc 88.672 (85.053)	lr 0.00811
Train [74][2270/3239]	Time 0.241 (0.618)	Data Time 0.001 (0.020)	Loss 2.4265 (2.4209)	Entropy 0.97327 (0.97657)	Top-1 acc 67.578 (65.859)	Top-5 acc 82.812 (85.053)	lr 0.00811
Train [74][2280/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.019)	Loss 2.6512 (2.4212)	Entropy 0.97326 (0.97655)	Top-1 acc 61.328 (65.855)	Top-5 acc 79.688 (85.050)	lr 0.00811
Train [74][2290/3239]	Time 0.210 (0.617)	Data Time 0.001 (0.019)	Loss 2.5126 (2.4213)	Entropy 0.97326 (0.97654)	Top-1 acc 61.719 (65.850)	Top-5 acc 82.422 (85.048)	lr 0.00811
Train [74][2300/3239]	Time 0.223 (0.616)	Data Time 0.001 (0.019)	Loss 2.3366 (2.4214)	Entropy 0.97320 (0.97652)	Top-1 acc 67.578 (65.846)	Top-5 acc 87.109 (85.049)	lr 0.00811
Train [74][2310/3239]	Time 0.213 (0.616)	Data Time 0.001 (0.019)	Loss 2.3134 (2.4211)	Entropy 0.97326 (0.97651)	Top-1 acc 68.750 (65.850)	Top-5 acc 87.500 (85.054)	lr 0.00811
Train [74][2320/3239]	Time 2.553 (0.615)	Data Time 0.001 (0.019)	Loss 2.4097 (2.4214)	Entropy 0.97326 (0.97649)	Top-1 acc 66.797 (65.848)	Top-5 acc 85.156 (85.048)	lr 0.00811
Train [74][2330/3239]	Time 0.220 (0.613)	Data Time 0.001 (0.019)	Loss 2.3325 (2.4214)	Entropy 0.97318 (0.97648)	Top-1 acc 72.266 (65.850)	Top-5 acc 88.281 (85.050)	lr 0.00811
Train [74][2340/3239]	Time 0.234 (0.613)	Data Time 0.007 (0.019)	Loss 2.5491 (2.4214)	Entropy 0.97321 (0.97647)	Top-1 acc 66.016 (65.850)	Top-5 acc 81.250 (85.050)	lr 0.00811
Train [74][2350/3239]	Time 0.256 (0.612)	Data Time 0.001 (0.019)	Loss 2.4962 (2.4215)	Entropy 0.97318 (0.97645)	Top-1 acc 68.750 (65.852)	Top-5 acc 83.203 (85.048)	lr 0.00810
Train [74][2360/3239]	Time 0.345 (0.635)	Data Time 0.002 (0.019)	Loss 2.3632 (2.4212)	Entropy 0.97314 (0.97644)	Top-1 acc 67.188 (65.860)	Top-5 acc 88.672 (85.050)	lr 0.00810
Train [74][2370/3239]	Time 0.214 (0.634)	Data Time 0.002 (0.019)	Loss 2.3688 (2.4212)	Entropy 0.97314 (0.97642)	Top-1 acc 67.578 (65.860)	Top-5 acc 87.891 (85.050)	lr 0.00810
Train [74][2380/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.019)	Loss 2.3170 (2.4214)	Entropy 0.97311 (0.97641)	Top-1 acc 67.578 (65.853)	Top-5 acc 86.719 (85.046)	lr 0.00810
Train [74][2390/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.019)	Loss 2.3971 (2.4213)	Entropy 0.97306 (0.97640)	Top-1 acc 67.969 (65.856)	Top-5 acc 86.328 (85.050)	lr 0.00810
Train [74][2400/3239]	Time 0.232 (0.632)	Data Time 0.001 (0.019)	Loss 2.4643 (2.4213)	Entropy 0.97304 (0.97638)	Top-1 acc 62.891 (65.854)	Top-5 acc 82.812 (85.050)	lr 0.00810
Train [74][2410/3239]	Time 0.239 (0.631)	Data Time 0.002 (0.019)	Loss 2.3242 (2.4212)	Entropy 0.97305 (0.97637)	Top-1 acc 67.188 (65.851)	Top-5 acc 87.891 (85.053)	lr 0.00810
Train [74][2420/3239]	Time 0.269 (0.631)	Data Time 0.001 (0.018)	Loss 2.4383 (2.4210)	Entropy 0.97301 (0.97636)	Top-1 acc 67.578 (65.859)	Top-5 acc 83.203 (85.056)	lr 0.00810
Train [74][2430/3239]	Time 2.507 (0.630)	Data Time 0.001 (0.018)	Loss 2.4040 (2.4209)	Entropy 0.97301 (0.97634)	Top-1 acc 68.359 (65.863)	Top-5 acc 86.719 (85.059)	lr 0.00810
Train [74][2440/3239]	Time 0.238 (0.629)	Data Time 0.001 (0.018)	Loss 2.3335 (2.4210)	Entropy 0.97298 (0.97633)	Top-1 acc 67.188 (65.864)	Top-5 acc 86.328 (85.059)	lr 0.00810
Train [74][2450/3239]	Time 0.339 (0.628)	Data Time 0.001 (0.018)	Loss 2.2497 (2.4212)	Entropy 0.97294 (0.97631)	Top-1 acc 69.141 (65.858)	Top-5 acc 87.891 (85.058)	lr 0.00809
Train [74][2460/3239]	Time 0.223 (0.627)	Data Time 0.001 (0.018)	Loss 2.5123 (2.4214)	Entropy 0.97293 (0.97630)	Top-1 acc 63.281 (65.855)	Top-5 acc 83.594 (85.055)	lr 0.00809
Train [74][2470/3239]	Time 0.263 (0.627)	Data Time 0.002 (0.018)	Loss 2.5774 (2.4216)	Entropy 0.97289 (0.97629)	Top-1 acc 60.938 (65.846)	Top-5 acc 83.594 (85.051)	lr 0.00809
Train [74][2480/3239]	Time 0.239 (0.626)	Data Time 0.001 (0.018)	Loss 2.6467 (2.4218)	Entropy 0.97288 (0.97627)	Top-1 acc 60.156 (65.841)	Top-5 acc 82.031 (85.048)	lr 0.00809
Train [74][2490/3239]	Time 0.317 (0.626)	Data Time 0.001 (0.018)	Loss 2.4113 (2.4219)	Entropy 0.97283 (0.97626)	Top-1 acc 65.625 (65.839)	Top-5 acc 85.156 (85.047)	lr 0.00809
Train [74][2500/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.018)	Loss 2.3540 (2.4218)	Entropy 0.97285 (0.97625)	Top-1 acc 66.016 (65.840)	Top-5 acc 85.547 (85.048)	lr 0.00809
Train [74][2510/3239]	Time 0.233 (0.624)	Data Time 0.002 (0.018)	Loss 2.5518 (2.4220)	Entropy 0.97286 (0.97623)	Top-1 acc 62.891 (65.834)	Top-5 acc 82.812 (85.047)	lr 0.00809
Train [74][2520/3239]	Time 0.240 (0.624)	Data Time 0.001 (0.018)	Loss 2.3959 (2.4221)	Entropy 0.97280 (0.97622)	Top-1 acc 65.625 (65.832)	Top-5 acc 87.500 (85.045)	lr 0.00809
Train [74][2530/3239]	Time 0.241 (0.623)	Data Time 0.001 (0.018)	Loss 2.4039 (2.4219)	Entropy 0.97276 (0.97621)	Top-1 acc 64.844 (65.834)	Top-5 acc 85.938 (85.045)	lr 0.00809
Train [74][2540/3239]	Time 2.514 (0.623)	Data Time 0.001 (0.018)	Loss 2.4773 (2.4220)	Entropy 0.97276 (0.97619)	Top-1 acc 61.719 (65.830)	Top-5 acc 84.766 (85.043)	lr 0.00809
Train [74][2550/3239]	Time 0.185 (0.621)	Data Time 0.001 (0.018)	Loss 2.4041 (2.4222)	Entropy 0.97269 (0.97618)	Top-1 acc 61.719 (65.823)	Top-5 acc 85.156 (85.038)	lr 0.00809
Train [74][2560/3239]	Time 0.250 (0.621)	Data Time 0.001 (0.018)	Loss 2.2951 (2.4221)	Entropy 0.97271 (0.97616)	Top-1 acc 68.359 (65.826)	Top-5 acc 87.109 (85.039)	lr 0.00808
Train [74][2570/3239]	Time 0.218 (0.620)	Data Time 0.001 (0.017)	Loss 2.4875 (2.4219)	Entropy 0.97271 (0.97615)	Top-1 acc 63.281 (65.826)	Top-5 acc 84.766 (85.041)	lr 0.00808
Train [74][2580/3239]	Time 0.249 (0.620)	Data Time 0.001 (0.017)	Loss 2.3903 (2.4220)	Entropy 0.97258 (0.97614)	Top-1 acc 66.016 (65.826)	Top-5 acc 84.375 (85.039)	lr 0.00808
Train [74][2590/3239]	Time 0.213 (0.619)	Data Time 0.002 (0.017)	Loss 2.2959 (2.4219)	Entropy 0.97255 (0.97612)	Top-1 acc 66.406 (65.826)	Top-5 acc 88.281 (85.042)	lr 0.00808
Train [74][2600/3239]	Time 0.235 (0.619)	Data Time 0.002 (0.017)	Loss 2.2525 (2.4218)	Entropy 0.97254 (0.97611)	Top-1 acc 70.312 (65.827)	Top-5 acc 88.281 (85.044)	lr 0.00808
Train [74][2610/3239]	Time 0.236 (0.618)	Data Time 0.001 (0.017)	Loss 2.5004 (2.4219)	Entropy 0.97251 (0.97610)	Top-1 acc 60.938 (65.821)	Top-5 acc 84.766 (85.043)	lr 0.00808
Train [74][2620/3239]	Time 0.228 (0.618)	Data Time 0.001 (0.017)	Loss 2.5278 (2.4220)	Entropy 0.97242 (0.97608)	Top-1 acc 62.891 (65.819)	Top-5 acc 82.812 (85.041)	lr 0.00808
Train [74][2630/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.017)	Loss 2.4061 (2.4220)	Entropy 0.97239 (0.97607)	Top-1 acc 64.062 (65.818)	Top-5 acc 85.938 (85.039)	lr 0.00808
Train [74][2640/3239]	Time 0.259 (0.617)	Data Time 0.002 (0.017)	Loss 2.5425 (2.4220)	Entropy 0.97238 (0.97605)	Top-1 acc 64.062 (65.817)	Top-5 acc 81.641 (85.039)	lr 0.00808
Train [74][2650/3239]	Time 0.255 (0.616)	Data Time 0.002 (0.017)	Loss 2.4312 (2.4220)	Entropy 0.97237 (0.97604)	Top-1 acc 66.406 (65.817)	Top-5 acc 84.375 (85.040)	lr 0.00808
Train [74][2660/3239]	Time 0.243 (0.616)	Data Time 0.001 (0.017)	Loss 2.3404 (2.4220)	Entropy 0.97229 (0.97603)	Top-1 acc 65.625 (65.818)	Top-5 acc 87.500 (85.039)	lr 0.00808
Train [74][2670/3239]	Time 0.403 (0.615)	Data Time 0.001 (0.017)	Loss 2.3693 (2.4220)	Entropy 0.97228 (0.97601)	Top-1 acc 66.797 (65.814)	Top-5 acc 85.547 (85.039)	lr 0.00807
Train [74][2680/3239]	Time 0.217 (0.615)	Data Time 0.001 (0.017)	Loss 2.4053 (2.4219)	Entropy 0.97226 (0.97600)	Top-1 acc 68.750 (65.817)	Top-5 acc 85.547 (85.041)	lr 0.00807
Train [74][2690/3239]	Time 0.229 (0.614)	Data Time 0.001 (0.017)	Loss 2.4909 (2.4222)	Entropy 0.97224 (0.97598)	Top-1 acc 63.281 (65.811)	Top-5 acc 83.984 (85.036)	lr 0.00807
Train [74][2700/3239]	Time 0.254 (0.614)	Data Time 0.001 (0.017)	Loss 2.2924 (2.4221)	Entropy 0.97219 (0.97597)	Top-1 acc 66.797 (65.812)	Top-5 acc 88.281 (85.037)	lr 0.00807
Train [74][2710/3239]	Time 0.480 (0.632)	Data Time 0.006 (0.017)	Loss 2.4770 (2.4220)	Entropy 0.97218 (0.97596)	Top-1 acc 66.016 (65.816)	Top-5 acc 81.250 (85.035)	lr 0.00807
Train [74][2720/3239]	Time 0.257 (0.632)	Data Time 0.006 (0.017)	Loss 2.3218 (2.4219)	Entropy 0.97212 (0.97594)	Top-1 acc 69.141 (65.817)	Top-5 acc 85.938 (85.039)	lr 0.00807
Train [74][2730/3239]	Time 0.305 (0.631)	Data Time 0.002 (0.017)	Loss 2.5245 (2.4220)	Entropy 0.97203 (0.97593)	Top-1 acc 64.062 (65.812)	Top-5 acc 82.812 (85.037)	lr 0.00807
Train [74][2740/3239]	Time 0.244 (0.631)	Data Time 0.001 (0.017)	Loss 2.5273 (2.4222)	Entropy 0.97243 (0.97592)	Top-1 acc 64.844 (65.809)	Top-5 acc 82.422 (85.033)	lr 0.00807
Train [74][2750/3239]	Time 0.261 (0.630)	Data Time 0.001 (0.016)	Loss 2.6835 (2.4223)	Entropy 0.97240 (0.97590)	Top-1 acc 62.500 (65.808)	Top-5 acc 82.422 (85.037)	lr 0.00807
Train [74][2760/3239]	Time 0.328 (0.630)	Data Time 0.001 (0.016)	Loss 2.3815 (2.4223)	Entropy 0.97233 (0.97589)	Top-1 acc 67.969 (65.813)	Top-5 acc 87.500 (85.035)	lr 0.00807
Train [74][2770/3239]	Time 0.215 (0.629)	Data Time 0.001 (0.016)	Loss 2.3847 (2.4222)	Entropy 0.97230 (0.97588)	Top-1 acc 65.625 (65.812)	Top-5 acc 85.156 (85.038)	lr 0.00806
Train [74][2780/3239]	Time 0.222 (0.629)	Data Time 0.001 (0.016)	Loss 2.5057 (2.4222)	Entropy 0.97222 (0.97586)	Top-1 acc 63.672 (65.812)	Top-5 acc 82.812 (85.038)	lr 0.00806
Train [74][2790/3239]	Time 0.252 (0.628)	Data Time 0.001 (0.016)	Loss 2.3681 (2.4222)	Entropy 0.97227 (0.97585)	Top-1 acc 70.312 (65.813)	Top-5 acc 84.375 (85.037)	lr 0.00806
Train [74][2800/3239]	Time 0.243 (0.628)	Data Time 0.001 (0.016)	Loss 2.4015 (2.4223)	Entropy 0.97225 (0.97584)	Top-1 acc 69.531 (65.811)	Top-5 acc 84.766 (85.037)	lr 0.00806
Train [74][2810/3239]	Time 0.226 (0.627)	Data Time 0.001 (0.016)	Loss 2.5355 (2.4225)	Entropy 0.97228 (0.97583)	Top-1 acc 63.281 (65.807)	Top-5 acc 83.203 (85.036)	lr 0.00806
Train [74][2820/3239]	Time 0.296 (0.627)	Data Time 0.001 (0.016)	Loss 2.5226 (2.4226)	Entropy 0.97227 (0.97581)	Top-1 acc 64.844 (65.805)	Top-5 acc 82.812 (85.035)	lr 0.00806
Train [74][2830/3239]	Time 0.240 (0.626)	Data Time 0.005 (0.016)	Loss 2.4145 (2.4225)	Entropy 0.97224 (0.97580)	Top-1 acc 64.844 (65.805)	Top-5 acc 85.938 (85.036)	lr 0.00806
Train [74][2840/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.016)	Loss 2.4424 (2.4226)	Entropy 0.97214 (0.97579)	Top-1 acc 65.625 (65.803)	Top-5 acc 84.375 (85.033)	lr 0.00806
Train [74][2850/3239]	Time 0.397 (0.625)	Data Time 0.001 (0.016)	Loss 2.2713 (2.4225)	Entropy 0.97213 (0.97577)	Top-1 acc 67.188 (65.803)	Top-5 acc 91.016 (85.033)	lr 0.00806
Train [74][2860/3239]	Time 0.249 (0.624)	Data Time 0.001 (0.016)	Loss 2.5242 (2.4226)	Entropy 0.97215 (0.97576)	Top-1 acc 65.625 (65.798)	Top-5 acc 83.203 (85.032)	lr 0.00806
Train [74][2870/3239]	Time 0.237 (0.624)	Data Time 0.001 (0.016)	Loss 2.4400 (2.4226)	Entropy 0.97207 (0.97575)	Top-1 acc 67.188 (65.796)	Top-5 acc 82.422 (85.031)	lr 0.00806
Train [74][2880/3239]	Time 0.271 (0.623)	Data Time 0.001 (0.016)	Loss 2.6485 (2.4228)	Entropy 0.97228 (0.97574)	Top-1 acc 61.328 (65.795)	Top-5 acc 80.078 (85.026)	lr 0.00805
Train [74][2890/3239]	Time 0.283 (0.623)	Data Time 0.001 (0.016)	Loss 2.2541 (2.4227)	Entropy 0.97214 (0.97572)	Top-1 acc 67.578 (65.799)	Top-5 acc 86.328 (85.028)	lr 0.00805
Train [74][2900/3239]	Time 0.247 (0.623)	Data Time 0.002 (0.016)	Loss 2.3781 (2.4228)	Entropy 0.97215 (0.97571)	Top-1 acc 70.312 (65.796)	Top-5 acc 82.031 (85.024)	lr 0.00805
Train [74][2910/3239]	Time 0.260 (0.622)	Data Time 0.001 (0.016)	Loss 2.2983 (2.4227)	Entropy 0.97208 (0.97570)	Top-1 acc 68.359 (65.798)	Top-5 acc 84.766 (85.026)	lr 0.00805
Train [74][2920/3239]	Time 0.238 (0.622)	Data Time 0.001 (0.016)	Loss 2.4731 (2.4225)	Entropy 0.97208 (0.97569)	Top-1 acc 66.406 (65.802)	Top-5 acc 82.812 (85.030)	lr 0.00805
Train [74][2930/3239]	Time 0.221 (0.621)	Data Time 0.001 (0.016)	Loss 2.1829 (2.4225)	Entropy 0.97209 (0.97568)	Top-1 acc 70.703 (65.803)	Top-5 acc 90.625 (85.031)	lr 0.00805
Train [74][2940/3239]	Time 0.342 (0.621)	Data Time 0.001 (0.016)	Loss 2.5567 (2.4226)	Entropy 0.97206 (0.97566)	Top-1 acc 62.109 (65.799)	Top-5 acc 83.203 (85.029)	lr 0.00805
Train [74][2950/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.015)	Loss 2.5366 (2.4226)	Entropy 0.97205 (0.97565)	Top-1 acc 63.672 (65.801)	Top-5 acc 82.422 (85.029)	lr 0.00805
Train [74][2960/3239]	Time 0.226 (0.620)	Data Time 0.001 (0.015)	Loss 2.3310 (2.4226)	Entropy 0.97207 (0.97564)	Top-1 acc 67.578 (65.800)	Top-5 acc 88.281 (85.026)	lr 0.00805
Train [74][2970/3239]	Time 0.259 (0.619)	Data Time 0.001 (0.015)	Loss 2.5354 (2.4227)	Entropy 0.97203 (0.97563)	Top-1 acc 65.625 (65.798)	Top-5 acc 83.203 (85.025)	lr 0.00805
Train [74][2980/3239]	Time 0.228 (0.619)	Data Time 0.001 (0.015)	Loss 2.5044 (2.4228)	Entropy 0.97201 (0.97561)	Top-1 acc 64.844 (65.796)	Top-5 acc 82.422 (85.025)	lr 0.00804
Train [74][2990/3239]	Time 0.227 (0.618)	Data Time 0.001 (0.015)	Loss 2.4210 (2.4229)	Entropy 0.97203 (0.97560)	Top-1 acc 65.234 (65.795)	Top-5 acc 87.500 (85.023)	lr 0.00804
Train [74][3000/3239]	Time 0.211 (0.618)	Data Time 0.001 (0.015)	Loss 2.5159 (2.4228)	Entropy 0.97200 (0.97559)	Top-1 acc 67.578 (65.801)	Top-5 acc 83.594 (85.027)	lr 0.00804
Train [74][3010/3239]	Time 0.245 (0.617)	Data Time 0.001 (0.015)	Loss 2.2522 (2.4227)	Entropy 0.97201 (0.97558)	Top-1 acc 70.703 (65.802)	Top-5 acc 88.672 (85.028)	lr 0.00804
Train [74][3020/3239]	Time 0.264 (0.617)	Data Time 0.001 (0.015)	Loss 2.4426 (2.4226)	Entropy 0.97195 (0.97557)	Top-1 acc 60.156 (65.805)	Top-5 acc 84.766 (85.030)	lr 0.00804
Train [74][3030/3239]	Time 0.367 (0.617)	Data Time 0.001 (0.015)	Loss 2.3955 (2.4226)	Entropy 0.97192 (0.97556)	Top-1 acc 66.406 (65.807)	Top-5 acc 85.547 (85.031)	lr 0.00804
Train [74][3040/3239]	Time 0.250 (0.634)	Data Time 0.003 (0.015)	Loss 2.3358 (2.4224)	Entropy 0.97182 (0.97554)	Top-1 acc 65.625 (65.814)	Top-5 acc 86.719 (85.033)	lr 0.00804
Train [74][3050/3239]	Time 0.229 (0.633)	Data Time 0.002 (0.015)	Loss 2.4199 (2.4224)	Entropy 0.97179 (0.97553)	Top-1 acc 65.234 (65.814)	Top-5 acc 85.938 (85.036)	lr 0.00804
Train [74][3060/3239]	Time 0.260 (0.633)	Data Time 0.002 (0.015)	Loss 2.2376 (2.4223)	Entropy 0.97172 (0.97552)	Top-1 acc 68.359 (65.817)	Top-5 acc 87.500 (85.036)	lr 0.00804
Train [74][3070/3239]	Time 0.265 (0.632)	Data Time 0.001 (0.015)	Loss 2.1145 (2.4223)	Entropy 0.97169 (0.97551)	Top-1 acc 73.828 (65.817)	Top-5 acc 91.797 (85.034)	lr 0.00804
Train [74][3080/3239]	Time 0.246 (0.632)	Data Time 0.001 (0.015)	Loss 2.4208 (2.4223)	Entropy 0.97163 (0.97549)	Top-1 acc 63.672 (65.816)	Top-5 acc 85.547 (85.035)	lr 0.00804
Train [74][3090/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.015)	Loss 2.2941 (2.4224)	Entropy 0.97164 (0.97548)	Top-1 acc 67.578 (65.812)	Top-5 acc 85.938 (85.031)	lr 0.00803
Train [74][3100/3239]	Time 0.219 (0.631)	Data Time 0.001 (0.015)	Loss 2.5646 (2.4225)	Entropy 0.97163 (0.97547)	Top-1 acc 65.625 (65.813)	Top-5 acc 82.422 (85.029)	lr 0.00803
Train [74][3110/3239]	Time 0.219 (0.630)	Data Time 0.001 (0.015)	Loss 2.5187 (2.4228)	Entropy 0.97149 (0.97546)	Top-1 acc 59.766 (65.805)	Top-5 acc 83.203 (85.025)	lr 0.00803
Train [74][3120/3239]	Time 0.369 (0.630)	Data Time 0.001 (0.015)	Loss 2.5831 (2.4228)	Entropy 0.97148 (0.97544)	Top-1 acc 61.719 (65.807)	Top-5 acc 82.422 (85.024)	lr 0.00803
Train [74][3130/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.015)	Loss 2.2829 (2.4228)	Entropy 0.97146 (0.97543)	Top-1 acc 71.875 (65.810)	Top-5 acc 87.500 (85.023)	lr 0.00803
Train [74][3140/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.015)	Loss 2.5049 (2.4229)	Entropy 0.97144 (0.97542)	Top-1 acc 62.891 (65.807)	Top-5 acc 85.156 (85.025)	lr 0.00803
Train [74][3150/3239]	Time 0.279 (0.628)	Data Time 0.001 (0.015)	Loss 2.6133 (2.4230)	Entropy 0.97145 (0.97541)	Top-1 acc 60.547 (65.804)	Top-5 acc 81.641 (85.023)	lr 0.00803
Train [74][3160/3239]	Time 0.244 (0.628)	Data Time 0.001 (0.015)	Loss 2.6261 (2.4231)	Entropy 0.97139 (0.97539)	Top-1 acc 62.891 (65.806)	Top-5 acc 81.250 (85.023)	lr 0.00803
Train [74][3170/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.015)	Loss 2.3481 (2.4232)	Entropy 0.97135 (0.97538)	Top-1 acc 66.016 (65.801)	Top-5 acc 87.891 (85.024)	lr 0.00803
Train [74][3180/3239]	Time 0.236 (0.627)	Data Time 0.000 (0.014)	Loss 2.3770 (2.4233)	Entropy 0.97128 (0.97537)	Top-1 acc 68.750 (65.799)	Top-5 acc 85.547 (85.024)	lr 0.00803
Train [74][3190/3239]	Time 0.229 (0.626)	Data Time 0.000 (0.014)	Loss 2.3872 (2.4233)	Entropy 0.97124 (0.97536)	Top-1 acc 64.453 (65.799)	Top-5 acc 84.766 (85.025)	lr 0.00803
Train [74][3200/3239]	Time 0.228 (0.626)	Data Time 0.000 (0.014)	Loss 2.3670 (2.4232)	Entropy 0.97124 (0.97534)	Top-1 acc 68.750 (65.803)	Top-5 acc 85.547 (85.023)	lr 0.00802
Train [74][3210/3239]	Time 0.347 (0.625)	Data Time 0.000 (0.014)	Loss 2.2111 (2.4231)	Entropy 0.97123 (0.97533)	Top-1 acc 70.312 (65.804)	Top-5 acc 85.938 (85.024)	lr 0.00802
Train [74][3220/3239]	Time 0.229 (0.625)	Data Time 0.000 (0.014)	Loss 2.5302 (2.4231)	Entropy 0.97120 (0.97532)	Top-1 acc 65.234 (65.804)	Top-5 acc 83.203 (85.024)	lr 0.00802
Train [74][3230/3239]	Time 0.231 (0.624)	Data Time 0.000 (0.014)	Loss 2.5490 (2.4232)	Entropy 0.97115 (0.97530)	Top-1 acc 59.766 (65.804)	Top-5 acc 81.250 (85.021)	lr 0.00802
Train [74][3239/3239]	Time 2.382 (0.624)	Data Time 0.000 (0.014)	Loss 2.6701 (2.4232)	Entropy 0.97115 (0.97529)	Top-1 acc 55.556 (65.801)	Top-5 acc 80.247 (85.023)	lr 0.00802
==========Valid [74/120]	loss 1.343	top-1 acc 69.445 (69.445)	top-5 acc 88.142	Train top-1 65.801	top-5 85.023	Entropy 0.97115	Latency-None: 0.000ms	Flops: 546.53M
Train [75][0/3239]	Time 38.912 (38.912)	Data Time 36.842 (36.842)	Loss 2.5559 (2.5559)	Entropy 0.97110 (0.97110)	Top-1 acc 62.891 (62.891)	Top-5 acc 82.031 (82.031)	lr 0.00802
Train [75][10/3239]	Time 2.840 (4.121)	Data Time 0.001 (3.351)	Loss 2.3697 (2.4383)	Entropy 0.97110 (0.97110)	Top-1 acc 67.969 (65.199)	Top-5 acc 84.766 (84.197)	lr 0.00802
Train [75][20/3239]	Time 0.285 (2.285)	Data Time 0.006 (1.756)	Loss 2.5883 (2.4538)	Entropy 0.97108 (0.97109)	Top-1 acc 64.062 (65.030)	Top-5 acc 82.812 (84.598)	lr 0.00802
Train [75][30/3239]	Time 0.245 (1.702)	Data Time 0.001 (1.190)	Loss 2.3716 (2.4212)	Entropy 0.97106 (0.97108)	Top-1 acc 68.359 (65.965)	Top-5 acc 85.156 (84.929)	lr 0.00802
Train [75][40/3239]	Time 0.244 (1.407)	Data Time 0.001 (0.900)	Loss 2.4786 (2.4169)	Entropy 0.97090 (0.97104)	Top-1 acc 61.719 (66.301)	Top-5 acc 83.203 (84.975)	lr 0.00802
Train [75][50/3239]	Time 0.241 (1.227)	Data Time 0.001 (0.724)	Loss 2.2839 (2.4111)	Entropy 0.97090 (0.97102)	Top-1 acc 68.359 (66.406)	Top-5 acc 86.719 (85.034)	lr 0.00802
Train [75][60/3239]	Time 0.329 (1.104)	Data Time 0.002 (0.606)	Loss 2.4970 (2.4117)	Entropy 0.97091 (0.97100)	Top-1 acc 59.375 (66.201)	Top-5 acc 81.641 (85.015)	lr 0.00801
Train [75][70/3239]	Time 0.240 (1.016)	Data Time 0.001 (0.521)	Loss 2.4050 (2.4162)	Entropy 0.97087 (0.97098)	Top-1 acc 67.188 (66.115)	Top-5 acc 85.156 (84.865)	lr 0.00801
Train [75][80/3239]	Time 0.249 (0.949)	Data Time 0.001 (0.457)	Loss 2.3243 (2.4165)	Entropy 0.97082 (0.97097)	Top-1 acc 71.094 (66.102)	Top-5 acc 86.328 (84.963)	lr 0.00801
Train [75][90/3239]	Time 0.242 (0.897)	Data Time 0.001 (0.407)	Loss 2.4678 (2.4174)	Entropy 0.97077 (0.97095)	Top-1 acc 62.109 (66.076)	Top-5 acc 87.891 (84.877)	lr 0.00801
Train [75][100/3239]	Time 0.235 (0.855)	Data Time 0.001 (0.366)	Loss 2.3513 (2.4132)	Entropy 0.97077 (0.97093)	Top-1 acc 66.406 (66.186)	Top-5 acc 84.766 (84.913)	lr 0.00801
Train [75][110/3239]	Time 0.247 (0.824)	Data Time 0.001 (0.334)	Loss 2.3402 (2.4105)	Entropy 0.97065 (0.97091)	Top-1 acc 67.188 (66.297)	Top-5 acc 87.500 (85.023)	lr 0.00801
Train [75][120/3239]	Time 2.529 (0.796)	Data Time 0.001 (0.306)	Loss 2.4310 (2.4086)	Entropy 0.97065 (0.97089)	Top-1 acc 68.750 (66.261)	Top-5 acc 82.812 (85.037)	lr 0.00801
Train [75][130/3239]	Time 0.227 (0.754)	Data Time 0.001 (0.283)	Loss 2.4307 (2.4083)	Entropy 0.97051 (0.97086)	Top-1 acc 64.453 (66.284)	Top-5 acc 83.594 (85.061)	lr 0.00801
Train [75][140/3239]	Time 0.254 (0.735)	Data Time 0.001 (0.263)	Loss 2.4054 (2.4053)	Entropy 0.97051 (0.97084)	Top-1 acc 64.453 (66.290)	Top-5 acc 85.547 (85.165)	lr 0.00801
Train [75][150/3239]	Time 0.223 (1.115)	Data Time 0.002 (0.246)	Loss 2.2468 (2.3998)	Entropy 0.97048 (0.97082)	Top-1 acc 71.484 (66.453)	Top-5 acc 87.500 (85.278)	lr 0.00801
Train [75][160/3239]	Time 0.228 (1.075)	Data Time 0.002 (0.231)	Loss 2.2447 (2.4001)	Entropy 0.97044 (0.97079)	Top-1 acc 73.438 (66.409)	Top-5 acc 89.062 (85.333)	lr 0.00801
Train [75][170/3239]	Time 0.241 (1.040)	Data Time 0.002 (0.217)	Loss 2.4125 (2.3999)	Entropy 0.97042 (0.97077)	Top-1 acc 66.016 (66.434)	Top-5 acc 85.156 (85.337)	lr 0.00800
Train [75][180/3239]	Time 0.237 (1.010)	Data Time 0.001 (0.205)	Loss 2.4404 (2.3984)	Entropy 0.97037 (0.97075)	Top-1 acc 69.922 (66.456)	Top-5 acc 82.812 (85.370)	lr 0.00800
Train [75][190/3239]	Time 0.309 (0.982)	Data Time 0.002 (0.195)	Loss 2.4437 (2.4007)	Entropy 0.97032 (0.97073)	Top-1 acc 62.109 (66.429)	Top-5 acc 85.547 (85.324)	lr 0.00800
Train [75][200/3239]	Time 0.221 (0.957)	Data Time 0.001 (0.185)	Loss 2.3611 (2.3992)	Entropy 0.97065 (0.97071)	Top-1 acc 65.234 (66.430)	Top-5 acc 88.281 (85.407)	lr 0.00800
Train [75][210/3239]	Time 0.230 (0.934)	Data Time 0.002 (0.176)	Loss 2.5300 (2.4021)	Entropy 0.97063 (0.97071)	Top-1 acc 63.672 (66.349)	Top-5 acc 82.812 (85.380)	lr 0.00800
Train [75][220/3239]	Time 0.288 (0.915)	Data Time 0.002 (0.169)	Loss 2.4753 (2.4034)	Entropy 0.97068 (0.97071)	Top-1 acc 64.844 (66.309)	Top-5 acc 84.766 (85.356)	lr 0.00800
Train [75][230/3239]	Time 2.525 (0.896)	Data Time 0.001 (0.161)	Loss 2.4245 (2.4038)	Entropy 0.97068 (0.97071)	Top-1 acc 64.453 (66.361)	Top-5 acc 85.547 (85.327)	lr 0.00800
Train [75][240/3239]	Time 0.225 (0.869)	Data Time 0.001 (0.155)	Loss 2.4099 (2.4057)	Entropy 0.97068 (0.97070)	Top-1 acc 66.406 (66.337)	Top-5 acc 85.938 (85.284)	lr 0.00800
Train [75][250/3239]	Time 0.232 (0.853)	Data Time 0.001 (0.149)	Loss 2.4334 (2.4037)	Entropy 0.97066 (0.97070)	Top-1 acc 65.625 (66.372)	Top-5 acc 85.547 (85.345)	lr 0.00800
Train [75][260/3239]	Time 0.244 (0.839)	Data Time 0.001 (0.143)	Loss 2.4052 (2.4039)	Entropy 0.97056 (0.97070)	Top-1 acc 65.234 (66.376)	Top-5 acc 85.938 (85.364)	lr 0.00800
Train [75][270/3239]	Time 0.241 (0.826)	Data Time 0.001 (0.138)	Loss 2.4164 (2.4061)	Entropy 0.97052 (0.97069)	Top-1 acc 62.500 (66.321)	Top-5 acc 86.328 (85.310)	lr 0.00799
Train [75][280/3239]	Time 0.336 (0.814)	Data Time 0.001 (0.133)	Loss 2.3951 (2.4055)	Entropy 0.97049 (0.97069)	Top-1 acc 65.234 (66.321)	Top-5 acc 85.938 (85.311)	lr 0.00799
Train [75][290/3239]	Time 0.212 (0.802)	Data Time 0.001 (0.128)	Loss 2.4410 (2.4059)	Entropy 0.97045 (0.97068)	Top-1 acc 65.625 (66.296)	Top-5 acc 85.938 (85.304)	lr 0.00799
Train [75][300/3239]	Time 0.226 (0.792)	Data Time 0.001 (0.124)	Loss 2.4449 (2.4067)	Entropy 0.97039 (0.97067)	Top-1 acc 63.281 (66.279)	Top-5 acc 84.766 (85.277)	lr 0.00799
Train [75][310/3239]	Time 0.232 (0.782)	Data Time 0.001 (0.120)	Loss 2.2377 (2.4080)	Entropy 0.97036 (0.97066)	Top-1 acc 69.531 (66.247)	Top-5 acc 87.891 (85.259)	lr 0.00799
Train [75][320/3239]	Time 0.330 (0.773)	Data Time 0.001 (0.117)	Loss 2.5038 (2.4093)	Entropy 0.97028 (0.97065)	Top-1 acc 65.625 (66.244)	Top-5 acc 83.594 (85.254)	lr 0.00799
Train [75][330/3239]	Time 0.214 (0.765)	Data Time 0.001 (0.113)	Loss 2.4717 (2.4093)	Entropy 0.97029 (0.97064)	Top-1 acc 62.500 (66.232)	Top-5 acc 83.984 (85.249)	lr 0.00799
Train [75][340/3239]	Time 2.491 (0.757)	Data Time 0.001 (0.110)	Loss 2.5830 (2.4088)	Entropy 0.97029 (0.97063)	Top-1 acc 59.766 (66.249)	Top-5 acc 82.422 (85.269)	lr 0.00799
Train [75][350/3239]	Time 0.239 (0.742)	Data Time 0.001 (0.107)	Loss 2.3350 (2.4078)	Entropy 0.97029 (0.97062)	Top-1 acc 64.844 (66.254)	Top-5 acc 88.672 (85.295)	lr 0.00799
Train [75][360/3239]	Time 0.240 (0.734)	Data Time 0.001 (0.104)	Loss 2.4629 (2.4075)	Entropy 0.97030 (0.97061)	Top-1 acc 63.672 (66.258)	Top-5 acc 83.984 (85.288)	lr 0.00799
Train [75][370/3239]	Time 0.220 (0.728)	Data Time 0.001 (0.101)	Loss 2.4299 (2.4071)	Entropy 0.97026 (0.97060)	Top-1 acc 66.797 (66.255)	Top-5 acc 84.766 (85.300)	lr 0.00799
Train [75][380/3239]	Time 0.229 (0.721)	Data Time 0.001 (0.098)	Loss 2.1567 (2.4068)	Entropy 0.97025 (0.97059)	Top-1 acc 70.703 (66.288)	Top-5 acc 90.234 (85.290)	lr 0.00798
Train [75][390/3239]	Time 0.222 (0.715)	Data Time 0.001 (0.096)	Loss 2.4718 (2.4084)	Entropy 0.97027 (0.97058)	Top-1 acc 63.281 (66.235)	Top-5 acc 86.328 (85.270)	lr 0.00798
Train [75][400/3239]	Time 0.252 (0.709)	Data Time 0.001 (0.094)	Loss 2.6354 (2.4092)	Entropy 0.97026 (0.97058)	Top-1 acc 60.938 (66.222)	Top-5 acc 82.031 (85.256)	lr 0.00798
Train [75][410/3239]	Time 0.323 (0.704)	Data Time 0.001 (0.091)	Loss 2.3368 (2.4085)	Entropy 0.97022 (0.97057)	Top-1 acc 68.359 (66.238)	Top-5 acc 85.938 (85.259)	lr 0.00798
Train [75][420/3239]	Time 0.233 (0.698)	Data Time 0.001 (0.089)	Loss 2.3582 (2.4082)	Entropy 0.97022 (0.97056)	Top-1 acc 67.578 (66.249)	Top-5 acc 87.109 (85.267)	lr 0.00798
Train [75][430/3239]	Time 0.225 (0.693)	Data Time 0.001 (0.087)	Loss 2.4599 (2.4091)	Entropy 0.97020 (0.97055)	Top-1 acc 67.969 (66.221)	Top-5 acc 85.156 (85.249)	lr 0.00798
Train [75][440/3239]	Time 0.230 (0.689)	Data Time 0.001 (0.085)	Loss 2.4391 (2.4084)	Entropy 0.97011 (0.97054)	Top-1 acc 65.234 (66.249)	Top-5 acc 86.328 (85.272)	lr 0.00798
Train [75][450/3239]	Time 2.610 (0.685)	Data Time 0.003 (0.083)	Loss 2.4538 (2.4074)	Entropy 0.97011 (0.97053)	Top-1 acc 64.844 (66.272)	Top-5 acc 83.594 (85.302)	lr 0.00798
Train [75][460/3239]	Time 0.255 (0.675)	Data Time 0.001 (0.082)	Loss 2.2780 (2.4087)	Entropy 0.97012 (0.97052)	Top-1 acc 69.531 (66.224)	Top-5 acc 87.500 (85.275)	lr 0.00798
Train [75][470/3239]	Time 0.248 (0.671)	Data Time 0.001 (0.080)	Loss 2.3582 (2.4087)	Entropy 0.97011 (0.97052)	Top-1 acc 69.531 (66.215)	Top-5 acc 86.328 (85.277)	lr 0.00798
Train [75][480/3239]	Time 0.251 (0.667)	Data Time 0.001 (0.078)	Loss 2.5279 (2.4086)	Entropy 0.97009 (0.97051)	Top-1 acc 66.797 (66.235)	Top-5 acc 83.594 (85.289)	lr 0.00798
Train [75][490/3239]	Time 0.223 (0.663)	Data Time 0.001 (0.077)	Loss 2.4714 (2.4093)	Entropy 0.97006 (0.97050)	Top-1 acc 61.328 (66.204)	Top-5 acc 86.328 (85.283)	lr 0.00797
Train [75][500/3239]	Time 0.235 (0.660)	Data Time 0.001 (0.075)	Loss 2.4229 (2.4092)	Entropy 0.97002 (0.97049)	Top-1 acc 64.453 (66.213)	Top-5 acc 84.375 (85.276)	lr 0.00797
Train [75][510/3239]	Time 0.314 (0.761)	Data Time 0.004 (0.074)	Loss 2.4941 (2.4087)	Entropy 0.96996 (0.97048)	Top-1 acc 63.281 (66.230)	Top-5 acc 83.984 (85.282)	lr 0.00797
Train [75][520/3239]	Time 0.232 (0.756)	Data Time 0.002 (0.072)	Loss 2.3241 (2.4086)	Entropy 0.96997 (0.97047)	Top-1 acc 65.625 (66.229)	Top-5 acc 85.938 (85.290)	lr 0.00797
Train [75][530/3239]	Time 0.223 (0.751)	Data Time 0.001 (0.071)	Loss 2.3641 (2.4077)	Entropy 0.96991 (0.97046)	Top-1 acc 65.234 (66.234)	Top-5 acc 85.156 (85.311)	lr 0.00797
Train [75][540/3239]	Time 0.316 (0.745)	Data Time 0.001 (0.070)	Loss 2.3664 (2.4069)	Entropy 0.96986 (0.97045)	Top-1 acc 62.891 (66.241)	Top-5 acc 87.891 (85.323)	lr 0.00797
Train [75][550/3239]	Time 0.212 (0.740)	Data Time 0.001 (0.069)	Loss 2.4755 (2.4065)	Entropy 0.96985 (0.97044)	Top-1 acc 64.453 (66.255)	Top-5 acc 83.203 (85.321)	lr 0.00797
Train [75][560/3239]	Time 2.580 (0.736)	Data Time 0.001 (0.067)	Loss 2.4173 (2.4064)	Entropy 0.96985 (0.97043)	Top-1 acc 64.453 (66.246)	Top-5 acc 84.375 (85.328)	lr 0.00797
Train [75][570/3239]	Time 0.243 (0.727)	Data Time 0.001 (0.066)	Loss 2.4324 (2.4058)	Entropy 0.96982 (0.97042)	Top-1 acc 64.453 (66.252)	Top-5 acc 83.984 (85.341)	lr 0.00797
Train [75][580/3239]	Time 0.218 (0.723)	Data Time 0.001 (0.065)	Loss 2.2887 (2.4057)	Entropy 0.96971 (0.97041)	Top-1 acc 70.703 (66.264)	Top-5 acc 85.547 (85.340)	lr 0.00797
Train [75][590/3239]	Time 0.327 (0.718)	Data Time 0.001 (0.064)	Loss 2.3627 (2.4063)	Entropy 0.96968 (0.97039)	Top-1 acc 67.578 (66.254)	Top-5 acc 84.375 (85.327)	lr 0.00796
Train [75][600/3239]	Time 0.249 (0.714)	Data Time 0.002 (0.063)	Loss 2.4229 (2.4070)	Entropy 0.96960 (0.97038)	Top-1 acc 64.844 (66.230)	Top-5 acc 85.547 (85.325)	lr 0.00796
Train [75][610/3239]	Time 0.215 (0.710)	Data Time 0.003 (0.062)	Loss 2.2671 (2.4067)	Entropy 0.96920 (0.97036)	Top-1 acc 69.141 (66.227)	Top-5 acc 87.109 (85.333)	lr 0.00796
Train [75][620/3239]	Time 0.231 (0.707)	Data Time 0.001 (0.061)	Loss 2.2850 (2.4070)	Entropy 0.96914 (0.97034)	Top-1 acc 68.750 (66.211)	Top-5 acc 87.109 (85.327)	lr 0.00796
Train [75][630/3239]	Time 0.334 (0.703)	Data Time 0.001 (0.060)	Loss 2.3465 (2.4079)	Entropy 0.96915 (0.97033)	Top-1 acc 69.141 (66.194)	Top-5 acc 86.328 (85.306)	lr 0.00796
Train [75][640/3239]	Time 0.239 (0.699)	Data Time 0.001 (0.059)	Loss 2.2696 (2.4075)	Entropy 0.96916 (0.97031)	Top-1 acc 70.703 (66.199)	Top-5 acc 89.062 (85.320)	lr 0.00796
Train [75][650/3239]	Time 0.226 (0.696)	Data Time 0.001 (0.058)	Loss 2.3837 (2.4088)	Entropy 0.96911 (0.97029)	Top-1 acc 65.625 (66.173)	Top-5 acc 83.594 (85.297)	lr 0.00796
Train [75][660/3239]	Time 0.226 (0.692)	Data Time 0.001 (0.057)	Loss 2.3227 (2.4083)	Entropy 0.96907 (0.97027)	Top-1 acc 65.234 (66.183)	Top-5 acc 87.109 (85.311)	lr 0.00796
Train [75][670/3239]	Time 2.551 (0.689)	Data Time 0.001 (0.057)	Loss 2.3379 (2.4081)	Entropy 0.96907 (0.97025)	Top-1 acc 68.359 (66.187)	Top-5 acc 87.500 (85.311)	lr 0.00796
Train [75][680/3239]	Time 0.234 (0.683)	Data Time 0.001 (0.056)	Loss 2.3508 (2.4078)	Entropy 0.96907 (0.97024)	Top-1 acc 67.969 (66.185)	Top-5 acc 86.328 (85.326)	lr 0.00796
Train [75][690/3239]	Time 0.223 (0.679)	Data Time 0.001 (0.055)	Loss 2.5113 (2.4080)	Entropy 0.96909 (0.97022)	Top-1 acc 61.719 (66.182)	Top-5 acc 83.203 (85.326)	lr 0.00796
Train [75][700/3239]	Time 0.218 (0.677)	Data Time 0.001 (0.054)	Loss 2.4817 (2.4086)	Entropy 0.96901 (0.97020)	Top-1 acc 66.797 (66.160)	Top-5 acc 83.203 (85.319)	lr 0.00795
Train [75][710/3239]	Time 0.220 (0.674)	Data Time 0.001 (0.054)	Loss 2.5895 (2.4092)	Entropy 0.96909 (0.97019)	Top-1 acc 62.500 (66.144)	Top-5 acc 81.641 (85.319)	lr 0.00795
Train [75][720/3239]	Time 0.337 (0.671)	Data Time 0.001 (0.053)	Loss 2.5377 (2.4095)	Entropy 0.96908 (0.97017)	Top-1 acc 59.766 (66.130)	Top-5 acc 81.250 (85.319)	lr 0.00795
Train [75][730/3239]	Time 0.233 (0.668)	Data Time 0.001 (0.052)	Loss 2.4571 (2.4095)	Entropy 0.96907 (0.97016)	Top-1 acc 64.062 (66.120)	Top-5 acc 84.766 (85.321)	lr 0.00795
Train [75][740/3239]	Time 0.232 (0.666)	Data Time 0.001 (0.051)	Loss 2.3056 (2.4095)	Entropy 0.96901 (0.97014)	Top-1 acc 66.406 (66.116)	Top-5 acc 89.844 (85.320)	lr 0.00795
Train [75][750/3239]	Time 0.234 (0.663)	Data Time 0.001 (0.051)	Loss 2.4112 (2.4096)	Entropy 0.96897 (0.97013)	Top-1 acc 64.453 (66.110)	Top-5 acc 84.375 (85.335)	lr 0.00795
Train [75][760/3239]	Time 0.220 (0.661)	Data Time 0.001 (0.050)	Loss 2.5123 (2.4103)	Entropy 0.96897 (0.97011)	Top-1 acc 62.500 (66.078)	Top-5 acc 81.641 (85.325)	lr 0.00795
Train [75][770/3239]	Time 0.272 (0.658)	Data Time 0.001 (0.049)	Loss 2.3835 (2.4099)	Entropy 0.96892 (0.97010)	Top-1 acc 68.359 (66.094)	Top-5 acc 87.500 (85.332)	lr 0.00795
Train [75][780/3239]	Time 2.524 (0.656)	Data Time 0.001 (0.049)	Loss 2.4591 (2.4106)	Entropy 0.96892 (0.97008)	Top-1 acc 64.453 (66.063)	Top-5 acc 81.250 (85.308)	lr 0.00795
Train [75][790/3239]	Time 0.222 (0.651)	Data Time 0.001 (0.048)	Loss 2.4141 (2.4103)	Entropy 0.96888 (0.97007)	Top-1 acc 64.453 (66.057)	Top-5 acc 85.938 (85.320)	lr 0.00795
Train [75][800/3239]	Time 0.217 (0.648)	Data Time 0.001 (0.048)	Loss 2.4522 (2.4101)	Entropy 0.96887 (0.97005)	Top-1 acc 63.672 (66.056)	Top-5 acc 84.766 (85.318)	lr 0.00795
Train [75][810/3239]	Time 0.307 (0.646)	Data Time 0.001 (0.047)	Loss 2.3829 (2.4101)	Entropy 0.96888 (0.97004)	Top-1 acc 67.969 (66.068)	Top-5 acc 85.547 (85.323)	lr 0.00794
Train [75][820/3239]	Time 0.176 (0.644)	Data Time 0.001 (0.047)	Loss 2.5019 (2.4093)	Entropy 0.96888 (0.97002)	Top-1 acc 66.797 (66.095)	Top-5 acc 83.203 (85.345)	lr 0.00794
Train [75][830/3239]	Time 0.225 (0.642)	Data Time 0.001 (0.046)	Loss 2.3810 (2.4085)	Entropy 0.96894 (0.97001)	Top-1 acc 67.969 (66.110)	Top-5 acc 85.156 (85.358)	lr 0.00794
Train [75][840/3239]	Time 0.248 (0.640)	Data Time 0.005 (0.046)	Loss 2.4147 (2.4085)	Entropy 0.96888 (0.97000)	Top-1 acc 68.359 (66.109)	Top-5 acc 82.812 (85.354)	lr 0.00794
Train [75][850/3239]	Time 0.247 (0.638)	Data Time 0.001 (0.045)	Loss 2.3623 (2.4085)	Entropy 0.96886 (0.96998)	Top-1 acc 64.844 (66.108)	Top-5 acc 87.109 (85.350)	lr 0.00794
Train [75][860/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.045)	Loss 2.3339 (2.4081)	Entropy 0.96887 (0.96997)	Top-1 acc 68.359 (66.117)	Top-5 acc 88.281 (85.353)	lr 0.00794
Train [75][870/3239]	Time 0.271 (0.698)	Data Time 0.002 (0.044)	Loss 2.3284 (2.4084)	Entropy 0.96890 (0.96996)	Top-1 acc 67.969 (66.123)	Top-5 acc 87.109 (85.347)	lr 0.00794
Train [75][880/3239]	Time 0.279 (0.696)	Data Time 0.002 (0.044)	Loss 2.5117 (2.4085)	Entropy 0.96887 (0.96995)	Top-1 acc 63.672 (66.124)	Top-5 acc 84.375 (85.342)	lr 0.00794
Train [75][890/3239]	Time 2.664 (0.693)	Data Time 0.002 (0.043)	Loss 2.3166 (2.4092)	Entropy 0.96887 (0.96993)	Top-1 acc 68.359 (66.110)	Top-5 acc 85.938 (85.328)	lr 0.00794
Train [75][900/3239]	Time 0.353 (0.688)	Data Time 0.001 (0.043)	Loss 2.3239 (2.4095)	Entropy 0.96888 (0.96992)	Top-1 acc 67.578 (66.099)	Top-5 acc 85.547 (85.324)	lr 0.00794
Train [75][910/3239]	Time 0.232 (0.686)	Data Time 0.001 (0.042)	Loss 2.3147 (2.4095)	Entropy 0.96892 (0.96991)	Top-1 acc 66.406 (66.089)	Top-5 acc 88.672 (85.321)	lr 0.00793
Train [75][920/3239]	Time 0.219 (0.683)	Data Time 0.001 (0.042)	Loss 2.4348 (2.4096)	Entropy 0.96881 (0.96990)	Top-1 acc 65.234 (66.099)	Top-5 acc 85.938 (85.321)	lr 0.00793
Train [75][930/3239]	Time 0.246 (0.681)	Data Time 0.001 (0.041)	Loss 2.3295 (2.4101)	Entropy 0.96876 (0.96989)	Top-1 acc 67.969 (66.097)	Top-5 acc 85.156 (85.317)	lr 0.00793
Train [75][940/3239]	Time 0.191 (0.679)	Data Time 0.001 (0.041)	Loss 2.5325 (2.4102)	Entropy 0.96871 (0.96987)	Top-1 acc 64.453 (66.098)	Top-5 acc 82.422 (85.317)	lr 0.00793
Train [75][950/3239]	Time 0.232 (0.677)	Data Time 0.001 (0.041)	Loss 2.3473 (2.4098)	Entropy 0.96867 (0.96986)	Top-1 acc 70.312 (66.107)	Top-5 acc 85.156 (85.315)	lr 0.00793
Train [75][960/3239]	Time 0.242 (0.675)	Data Time 0.001 (0.040)	Loss 2.4580 (2.4097)	Entropy 0.96853 (0.96985)	Top-1 acc 65.625 (66.116)	Top-5 acc 82.812 (85.315)	lr 0.00793
Train [75][970/3239]	Time 0.230 (0.673)	Data Time 0.002 (0.040)	Loss 2.4150 (2.4095)	Entropy 0.96853 (0.96984)	Top-1 acc 62.500 (66.116)	Top-5 acc 83.984 (85.323)	lr 0.00793
Train [75][980/3239]	Time 0.231 (0.671)	Data Time 0.001 (0.039)	Loss 2.3813 (2.4090)	Entropy 0.96848 (0.96982)	Top-1 acc 63.281 (66.124)	Top-5 acc 84.375 (85.331)	lr 0.00793
Train [75][990/3239]	Time 0.363 (0.669)	Data Time 0.001 (0.039)	Loss 2.2969 (2.4091)	Entropy 0.96849 (0.96981)	Top-1 acc 68.359 (66.120)	Top-5 acc 86.328 (85.335)	lr 0.00793
Train [75][1000/3239]	Time 2.533 (0.667)	Data Time 0.001 (0.039)	Loss 2.2730 (2.4087)	Entropy 0.96849 (0.96980)	Top-1 acc 71.875 (66.133)	Top-5 acc 87.891 (85.340)	lr 0.00793
Train [75][1010/3239]	Time 0.216 (0.663)	Data Time 0.001 (0.038)	Loss 2.4937 (2.4088)	Entropy 0.96851 (0.96978)	Top-1 acc 65.234 (66.128)	Top-5 acc 83.984 (85.337)	lr 0.00793
Train [75][1020/3239]	Time 0.221 (0.661)	Data Time 0.001 (0.038)	Loss 2.4410 (2.4092)	Entropy 0.96853 (0.96977)	Top-1 acc 63.672 (66.101)	Top-5 acc 84.766 (85.332)	lr 0.00792
Train [75][1030/3239]	Time 0.220 (0.659)	Data Time 0.001 (0.038)	Loss 2.4190 (2.4092)	Entropy 0.96864 (0.96976)	Top-1 acc 67.578 (66.104)	Top-5 acc 86.719 (85.332)	lr 0.00792
Train [75][1040/3239]	Time 0.209 (0.658)	Data Time 0.001 (0.037)	Loss 2.3285 (2.4096)	Entropy 0.96857 (0.96975)	Top-1 acc 70.703 (66.097)	Top-5 acc 85.547 (85.325)	lr 0.00792
Train [75][1050/3239]	Time 0.234 (0.656)	Data Time 0.001 (0.037)	Loss 2.3284 (2.4099)	Entropy 0.96847 (0.96974)	Top-1 acc 67.969 (66.092)	Top-5 acc 87.891 (85.319)	lr 0.00792
Train [75][1060/3239]	Time 0.234 (0.654)	Data Time 0.001 (0.036)	Loss 2.4665 (2.4105)	Entropy 0.96843 (0.96972)	Top-1 acc 67.578 (66.083)	Top-5 acc 82.422 (85.307)	lr 0.00792
Train [75][1070/3239]	Time 0.232 (0.653)	Data Time 0.001 (0.036)	Loss 2.3524 (2.4110)	Entropy 0.96842 (0.96971)	Top-1 acc 65.625 (66.071)	Top-5 acc 88.281 (85.303)	lr 0.00792
Train [75][1080/3239]	Time 0.323 (0.651)	Data Time 0.001 (0.036)	Loss 2.3048 (2.4113)	Entropy 0.96842 (0.96970)	Top-1 acc 67.969 (66.064)	Top-5 acc 87.109 (85.298)	lr 0.00792
Train [75][1090/3239]	Time 0.246 (0.650)	Data Time 0.001 (0.036)	Loss 2.5231 (2.4116)	Entropy 0.96846 (0.96969)	Top-1 acc 62.500 (66.057)	Top-5 acc 82.031 (85.294)	lr 0.00792
Train [75][1100/3239]	Time 0.252 (0.648)	Data Time 0.001 (0.035)	Loss 2.3337 (2.4118)	Entropy 0.96837 (0.96968)	Top-1 acc 68.750 (66.061)	Top-5 acc 86.719 (85.288)	lr 0.00792
Train [75][1110/3239]	Time 2.467 (0.646)	Data Time 0.001 (0.035)	Loss 2.3206 (2.4110)	Entropy 0.96837 (0.96967)	Top-1 acc 68.359 (66.083)	Top-5 acc 85.156 (85.301)	lr 0.00792
Train [75][1120/3239]	Time 0.192 (0.643)	Data Time 0.001 (0.035)	Loss 2.3236 (2.4108)	Entropy 0.96833 (0.96965)	Top-1 acc 66.406 (66.089)	Top-5 acc 85.547 (85.298)	lr 0.00791
Train [75][1130/3239]	Time 0.232 (0.641)	Data Time 0.001 (0.034)	Loss 2.4361 (2.4110)	Entropy 0.96831 (0.96964)	Top-1 acc 65.625 (66.093)	Top-5 acc 83.984 (85.296)	lr 0.00791
Train [75][1140/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.034)	Loss 2.4477 (2.4107)	Entropy 0.96834 (0.96963)	Top-1 acc 64.453 (66.101)	Top-5 acc 84.375 (85.305)	lr 0.00791
Train [75][1150/3239]	Time 0.222 (0.638)	Data Time 0.001 (0.034)	Loss 2.3587 (2.4107)	Entropy 0.96833 (0.96962)	Top-1 acc 67.578 (66.097)	Top-5 acc 87.109 (85.307)	lr 0.00791
Train [75][1160/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.033)	Loss 2.4680 (2.4106)	Entropy 0.96831 (0.96961)	Top-1 acc 62.891 (66.101)	Top-5 acc 83.203 (85.308)	lr 0.00791
Train [75][1170/3239]	Time 0.259 (0.635)	Data Time 0.001 (0.033)	Loss 2.3343 (2.4106)	Entropy 0.96829 (0.96960)	Top-1 acc 67.969 (66.099)	Top-5 acc 85.938 (85.308)	lr 0.00791
Train [75][1180/3239]	Time 0.227 (0.634)	Data Time 0.001 (0.033)	Loss 2.4220 (2.4106)	Entropy 0.96818 (0.96958)	Top-1 acc 67.578 (66.094)	Top-5 acc 84.766 (85.305)	lr 0.00791
Train [75][1190/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.033)	Loss 2.4729 (2.4108)	Entropy 0.96813 (0.96957)	Top-1 acc 63.672 (66.085)	Top-5 acc 83.594 (85.302)	lr 0.00791
Train [75][1200/3239]	Time 0.223 (0.631)	Data Time 0.001 (0.032)	Loss 2.4241 (2.4114)	Entropy 0.96800 (0.96956)	Top-1 acc 67.969 (66.074)	Top-5 acc 85.547 (85.291)	lr 0.00791
Train [75][1210/3239]	Time 0.280 (0.630)	Data Time 0.001 (0.032)	Loss 2.3156 (2.4116)	Entropy 0.96799 (0.96955)	Top-1 acc 69.531 (66.066)	Top-5 acc 87.891 (85.288)	lr 0.00791
Train [75][1220/3239]	Time 2.449 (0.629)	Data Time 0.001 (0.032)	Loss 2.3177 (2.4111)	Entropy 0.96799 (0.96954)	Top-1 acc 71.094 (66.089)	Top-5 acc 85.547 (85.294)	lr 0.00791
Train [75][1230/3239]	Time 0.216 (0.626)	Data Time 0.001 (0.032)	Loss 2.5190 (2.4114)	Entropy 0.96799 (0.96952)	Top-1 acc 64.062 (66.087)	Top-5 acc 83.594 (85.288)	lr 0.00790
Train [75][1240/3239]	Time 0.258 (0.669)	Data Time 0.003 (0.031)	Loss 2.3325 (2.4112)	Entropy 0.96794 (0.96951)	Top-1 acc 68.750 (66.090)	Top-5 acc 88.281 (85.296)	lr 0.00790
Train [75][1250/3239]	Time 0.234 (0.667)	Data Time 0.002 (0.031)	Loss 2.5104 (2.4110)	Entropy 0.96796 (0.96950)	Top-1 acc 63.672 (66.093)	Top-5 acc 82.812 (85.302)	lr 0.00790
Train [75][1260/3239]	Time 0.314 (0.666)	Data Time 0.002 (0.031)	Loss 2.4058 (2.4108)	Entropy 0.96796 (0.96949)	Top-1 acc 67.969 (66.110)	Top-5 acc 87.109 (85.304)	lr 0.00790
Train [75][1270/3239]	Time 0.230 (0.665)	Data Time 0.001 (0.031)	Loss 2.3139 (2.4105)	Entropy 0.96796 (0.96947)	Top-1 acc 69.922 (66.115)	Top-5 acc 87.109 (85.309)	lr 0.00790
Train [75][1280/3239]	Time 0.246 (0.663)	Data Time 0.001 (0.031)	Loss 2.3617 (2.4105)	Entropy 0.96796 (0.96946)	Top-1 acc 68.750 (66.118)	Top-5 acc 86.719 (85.308)	lr 0.00790
Train [75][1290/3239]	Time 0.230 (0.662)	Data Time 0.002 (0.030)	Loss 2.3518 (2.4110)	Entropy 0.96795 (0.96945)	Top-1 acc 69.141 (66.105)	Top-5 acc 85.938 (85.299)	lr 0.00790
Train [75][1300/3239]	Time 0.345 (0.660)	Data Time 0.002 (0.030)	Loss 2.4469 (2.4109)	Entropy 0.96797 (0.96944)	Top-1 acc 63.672 (66.111)	Top-5 acc 83.984 (85.298)	lr 0.00790
Train [75][1310/3239]	Time 0.219 (0.659)	Data Time 0.001 (0.030)	Loss 2.4314 (2.4110)	Entropy 0.96793 (0.96943)	Top-1 acc 68.359 (66.111)	Top-5 acc 85.156 (85.298)	lr 0.00790
Train [75][1320/3239]	Time 0.234 (0.657)	Data Time 0.001 (0.030)	Loss 2.2718 (2.4108)	Entropy 0.96786 (0.96942)	Top-1 acc 70.703 (66.117)	Top-5 acc 87.109 (85.298)	lr 0.00790
Train [75][1330/3239]	Time 2.474 (0.656)	Data Time 0.001 (0.029)	Loss 2.2782 (2.4104)	Entropy 0.96786 (0.96940)	Top-1 acc 68.750 (66.127)	Top-5 acc 89.844 (85.307)	lr 0.00790
Train [75][1340/3239]	Time 0.223 (0.653)	Data Time 0.001 (0.029)	Loss 2.1916 (2.4102)	Entropy 0.96790 (0.96939)	Top-1 acc 75.391 (66.132)	Top-5 acc 88.281 (85.302)	lr 0.00789
Train [75][1350/3239]	Time 0.216 (0.652)	Data Time 0.001 (0.029)	Loss 2.3158 (2.4104)	Entropy 0.96790 (0.96938)	Top-1 acc 67.188 (66.126)	Top-5 acc 87.109 (85.303)	lr 0.00789
Train [75][1360/3239]	Time 0.235 (0.650)	Data Time 0.001 (0.029)	Loss 2.5320 (2.4102)	Entropy 0.96790 (0.96937)	Top-1 acc 63.672 (66.138)	Top-5 acc 83.203 (85.303)	lr 0.00789
Train [75][1370/3239]	Time 0.232 (0.649)	Data Time 0.001 (0.029)	Loss 2.4308 (2.4104)	Entropy 0.96785 (0.96936)	Top-1 acc 64.844 (66.136)	Top-5 acc 84.375 (85.295)	lr 0.00789
Train [75][1380/3239]	Time 0.225 (0.648)	Data Time 0.001 (0.028)	Loss 2.2236 (2.4099)	Entropy 0.96785 (0.96935)	Top-1 acc 72.266 (66.150)	Top-5 acc 87.891 (85.305)	lr 0.00789
Train [75][1390/3239]	Time 0.317 (0.646)	Data Time 0.001 (0.028)	Loss 2.2637 (2.4094)	Entropy 0.96779 (0.96934)	Top-1 acc 67.969 (66.165)	Top-5 acc 88.281 (85.313)	lr 0.00789
Train [75][1400/3239]	Time 0.235 (0.645)	Data Time 0.001 (0.028)	Loss 2.3274 (2.4094)	Entropy 0.96785 (0.96933)	Top-1 acc 67.969 (66.164)	Top-5 acc 89.062 (85.317)	lr 0.00789
Train [75][1410/3239]	Time 0.207 (0.644)	Data Time 0.001 (0.028)	Loss 2.6776 (2.4096)	Entropy 0.96788 (0.96932)	Top-1 acc 60.547 (66.160)	Top-5 acc 80.469 (85.312)	lr 0.00789
Train [75][1420/3239]	Time 0.276 (0.643)	Data Time 0.001 (0.028)	Loss 2.4439 (2.4097)	Entropy 0.96788 (0.96931)	Top-1 acc 65.625 (66.158)	Top-5 acc 86.328 (85.310)	lr 0.00789
Train [75][1430/3239]	Time 0.216 (0.641)	Data Time 0.002 (0.027)	Loss 2.5349 (2.4095)	Entropy 0.96788 (0.96930)	Top-1 acc 60.547 (66.158)	Top-5 acc 85.938 (85.316)	lr 0.00789
Train [75][1440/3239]	Time 2.694 (0.640)	Data Time 0.001 (0.027)	Loss 2.5374 (2.4094)	Entropy 0.96788 (0.96929)	Top-1 acc 65.625 (66.160)	Top-5 acc 84.375 (85.318)	lr 0.00788
Train [75][1450/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.027)	Loss 2.4243 (2.4092)	Entropy 0.96789 (0.96928)	Top-1 acc 65.625 (66.166)	Top-5 acc 88.281 (85.325)	lr 0.00788
Train [75][1460/3239]	Time 0.221 (0.637)	Data Time 0.001 (0.027)	Loss 2.4930 (2.4090)	Entropy 0.96783 (0.96927)	Top-1 acc 65.625 (66.170)	Top-5 acc 83.203 (85.327)	lr 0.00788
Train [75][1470/3239]	Time 0.277 (0.635)	Data Time 0.001 (0.027)	Loss 2.1552 (2.4090)	Entropy 0.96784 (0.96926)	Top-1 acc 71.094 (66.175)	Top-5 acc 89.062 (85.328)	lr 0.00788
Train [75][1480/3239]	Time 0.328 (0.635)	Data Time 0.001 (0.027)	Loss 2.5178 (2.4090)	Entropy 0.96782 (0.96925)	Top-1 acc 62.500 (66.169)	Top-5 acc 82.812 (85.326)	lr 0.00788
Train [75][1490/3239]	Time 0.239 (0.633)	Data Time 0.001 (0.026)	Loss 2.6064 (2.4088)	Entropy 0.96778 (0.96924)	Top-1 acc 60.547 (66.174)	Top-5 acc 82.031 (85.327)	lr 0.00788
Train [75][1500/3239]	Time 0.232 (0.632)	Data Time 0.001 (0.026)	Loss 2.3887 (2.4089)	Entropy 0.96772 (0.96923)	Top-1 acc 68.359 (66.171)	Top-5 acc 83.594 (85.320)	lr 0.00788
Train [75][1510/3239]	Time 0.243 (0.631)	Data Time 0.001 (0.026)	Loss 2.4786 (2.4091)	Entropy 0.96759 (0.96922)	Top-1 acc 66.016 (66.175)	Top-5 acc 83.594 (85.312)	lr 0.00788
Train [75][1520/3239]	Time 0.238 (0.630)	Data Time 0.001 (0.026)	Loss 2.4913 (2.4093)	Entropy 0.96757 (0.96921)	Top-1 acc 66.016 (66.171)	Top-5 acc 83.984 (85.308)	lr 0.00788
Train [75][1530/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.026)	Loss 2.4748 (2.4094)	Entropy 0.96749 (0.96920)	Top-1 acc 66.406 (66.171)	Top-5 acc 85.156 (85.309)	lr 0.00788
Train [75][1540/3239]	Time 0.304 (0.628)	Data Time 0.002 (0.026)	Loss 2.2658 (2.4095)	Entropy 0.96747 (0.96919)	Top-1 acc 66.797 (66.171)	Top-5 acc 89.844 (85.306)	lr 0.00788
Train [75][1550/3239]	Time 2.514 (0.627)	Data Time 0.001 (0.025)	Loss 2.4275 (2.4095)	Entropy 0.96747 (0.96917)	Top-1 acc 65.234 (66.179)	Top-5 acc 85.547 (85.302)	lr 0.00787
Train [75][1560/3239]	Time 0.228 (0.625)	Data Time 0.001 (0.025)	Loss 2.3756 (2.4096)	Entropy 0.96743 (0.96916)	Top-1 acc 68.359 (66.173)	Top-5 acc 83.984 (85.304)	lr 0.00787
Train [75][1570/3239]	Time 0.403 (0.624)	Data Time 0.002 (0.025)	Loss 2.4471 (2.4097)	Entropy 0.96737 (0.96915)	Top-1 acc 60.156 (66.161)	Top-5 acc 86.328 (85.306)	lr 0.00787
Train [75][1580/3239]	Time 0.256 (0.623)	Data Time 0.001 (0.025)	Loss 2.3673 (2.4098)	Entropy 0.96732 (0.96914)	Top-1 acc 64.453 (66.160)	Top-5 acc 86.719 (85.306)	lr 0.00787
Train [75][1590/3239]	Time 0.221 (0.622)	Data Time 0.001 (0.025)	Loss 2.3157 (2.4096)	Entropy 0.96730 (0.96913)	Top-1 acc 66.016 (66.155)	Top-5 acc 85.938 (85.308)	lr 0.00787
Train [75][1600/3239]	Time 0.234 (0.655)	Data Time 0.002 (0.025)	Loss 2.3331 (2.4096)	Entropy 0.96726 (0.96912)	Top-1 acc 67.578 (66.154)	Top-5 acc 88.672 (85.315)	lr 0.00787
Train [75][1610/3239]	Time 0.233 (0.654)	Data Time 0.002 (0.025)	Loss 2.4077 (2.4097)	Entropy 0.96723 (0.96911)	Top-1 acc 67.969 (66.157)	Top-5 acc 85.156 (85.316)	lr 0.00787
Train [75][1620/3239]	Time 0.229 (0.653)	Data Time 0.001 (0.024)	Loss 2.4433 (2.4100)	Entropy 0.96719 (0.96909)	Top-1 acc 65.625 (66.145)	Top-5 acc 84.766 (85.305)	lr 0.00787
Train [75][1630/3239]	Time 0.263 (0.652)	Data Time 0.001 (0.024)	Loss 2.4302 (2.4101)	Entropy 0.96719 (0.96908)	Top-1 acc 64.062 (66.137)	Top-5 acc 87.500 (85.305)	lr 0.00787
Train [75][1640/3239]	Time 0.229 (0.651)	Data Time 0.001 (0.024)	Loss 2.4875 (2.4101)	Entropy 0.96717 (0.96907)	Top-1 acc 66.406 (66.139)	Top-5 acc 84.766 (85.300)	lr 0.00787
Train [75][1650/3239]	Time 0.260 (0.650)	Data Time 0.001 (0.024)	Loss 2.4002 (2.4104)	Entropy 0.96715 (0.96906)	Top-1 acc 62.500 (66.133)	Top-5 acc 87.109 (85.297)	lr 0.00787
Train [75][1660/3239]	Time 2.589 (0.649)	Data Time 0.002 (0.024)	Loss 2.4824 (2.4104)	Entropy 0.96715 (0.96905)	Top-1 acc 62.500 (66.129)	Top-5 acc 84.375 (85.299)	lr 0.00786
Train [75][1670/3239]	Time 0.244 (0.646)	Data Time 0.002 (0.024)	Loss 2.5296 (2.4103)	Entropy 0.96716 (0.96904)	Top-1 acc 65.625 (66.132)	Top-5 acc 81.641 (85.305)	lr 0.00786
Train [75][1680/3239]	Time 0.243 (0.645)	Data Time 0.001 (0.024)	Loss 2.5695 (2.4103)	Entropy 0.96716 (0.96902)	Top-1 acc 64.844 (66.133)	Top-5 acc 81.250 (85.301)	lr 0.00786
Train [75][1690/3239]	Time 0.232 (0.644)	Data Time 0.001 (0.023)	Loss 2.4924 (2.4105)	Entropy 0.96711 (0.96901)	Top-1 acc 65.234 (66.130)	Top-5 acc 84.375 (85.299)	lr 0.00786
Train [75][1700/3239]	Time 0.242 (0.643)	Data Time 0.001 (0.023)	Loss 2.3337 (2.4108)	Entropy 0.96710 (0.96900)	Top-1 acc 67.578 (66.121)	Top-5 acc 87.109 (85.291)	lr 0.00786
Train [75][1710/3239]	Time 0.238 (0.642)	Data Time 0.001 (0.023)	Loss 2.2932 (2.4106)	Entropy 0.96707 (0.96899)	Top-1 acc 70.703 (66.132)	Top-5 acc 88.281 (85.293)	lr 0.00786
Train [75][1720/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.023)	Loss 2.5030 (2.4107)	Entropy 0.96698 (0.96898)	Top-1 acc 62.891 (66.125)	Top-5 acc 84.375 (85.293)	lr 0.00786
Train [75][1730/3239]	Time 0.237 (0.640)	Data Time 0.002 (0.023)	Loss 2.6124 (2.4112)	Entropy 0.96701 (0.96897)	Top-1 acc 62.891 (66.115)	Top-5 acc 81.250 (85.284)	lr 0.00786
Train [75][1740/3239]	Time 0.228 (0.639)	Data Time 0.001 (0.023)	Loss 2.4450 (2.4112)	Entropy 0.96697 (0.96896)	Top-1 acc 63.672 (66.115)	Top-5 acc 85.156 (85.287)	lr 0.00786
Train [75][1750/3239]	Time 0.367 (0.638)	Data Time 0.002 (0.023)	Loss 2.4532 (2.4115)	Entropy 0.96701 (0.96895)	Top-1 acc 68.750 (66.107)	Top-5 acc 84.766 (85.281)	lr 0.00786
Train [75][1760/3239]	Time 0.226 (0.637)	Data Time 0.002 (0.023)	Loss 2.3707 (2.4114)	Entropy 0.96697 (0.96893)	Top-1 acc 64.844 (66.107)	Top-5 acc 83.203 (85.283)	lr 0.00785
Train [75][1770/3239]	Time 2.552 (0.636)	Data Time 0.002 (0.023)	Loss 2.3727 (2.4115)	Entropy 0.96697 (0.96892)	Top-1 acc 67.578 (66.103)	Top-5 acc 87.891 (85.283)	lr 0.00785
Train [75][1780/3239]	Time 0.219 (0.634)	Data Time 0.001 (0.022)	Loss 2.5920 (2.4118)	Entropy 0.96697 (0.96891)	Top-1 acc 58.984 (66.091)	Top-5 acc 81.250 (85.276)	lr 0.00785
Train [75][1790/3239]	Time 0.237 (0.633)	Data Time 0.001 (0.022)	Loss 2.4143 (2.4117)	Entropy 0.96695 (0.96890)	Top-1 acc 66.406 (66.095)	Top-5 acc 85.156 (85.277)	lr 0.00785
Train [75][1800/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.022)	Loss 2.3407 (2.4117)	Entropy 0.96693 (0.96889)	Top-1 acc 70.312 (66.094)	Top-5 acc 87.500 (85.277)	lr 0.00785
Train [75][1810/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.022)	Loss 2.4044 (2.4115)	Entropy 0.96693 (0.96888)	Top-1 acc 66.016 (66.098)	Top-5 acc 87.109 (85.277)	lr 0.00785
Train [75][1820/3239]	Time 0.228 (0.631)	Data Time 0.001 (0.022)	Loss 2.3136 (2.4117)	Entropy 0.96694 (0.96887)	Top-1 acc 67.969 (66.098)	Top-5 acc 86.328 (85.273)	lr 0.00785
Train [75][1830/3239]	Time 0.218 (0.630)	Data Time 0.001 (0.022)	Loss 2.3631 (2.4117)	Entropy 0.96688 (0.96886)	Top-1 acc 67.969 (66.101)	Top-5 acc 87.109 (85.276)	lr 0.00785
Train [75][1840/3239]	Time 0.255 (0.629)	Data Time 0.001 (0.022)	Loss 2.3653 (2.4116)	Entropy 0.96694 (0.96885)	Top-1 acc 66.406 (66.100)	Top-5 acc 82.422 (85.275)	lr 0.00785
Train [75][1850/3239]	Time 0.228 (0.628)	Data Time 0.002 (0.022)	Loss 2.4750 (2.4118)	Entropy 0.96688 (0.96884)	Top-1 acc 64.844 (66.099)	Top-5 acc 84.375 (85.266)	lr 0.00785
Train [75][1860/3239]	Time 0.222 (0.627)	Data Time 0.001 (0.021)	Loss 2.4642 (2.4121)	Entropy 0.96686 (0.96883)	Top-1 acc 65.234 (66.093)	Top-5 acc 83.594 (85.255)	lr 0.00785
Train [75][1870/3239]	Time 0.249 (0.627)	Data Time 0.002 (0.021)	Loss 2.5343 (2.4124)	Entropy 0.96685 (0.96882)	Top-1 acc 62.500 (66.090)	Top-5 acc 84.766 (85.251)	lr 0.00784
Train [75][1880/3239]	Time 2.593 (0.626)	Data Time 0.002 (0.021)	Loss 2.4955 (2.4127)	Entropy 0.96685 (0.96881)	Top-1 acc 63.672 (66.084)	Top-5 acc 82.031 (85.247)	lr 0.00784
Train [75][1890/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.021)	Loss 2.5289 (2.4129)	Entropy 0.96676 (0.96880)	Top-1 acc 64.844 (66.073)	Top-5 acc 81.641 (85.239)	lr 0.00784
Train [75][1900/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.021)	Loss 2.4197 (2.4131)	Entropy 0.96675 (0.96878)	Top-1 acc 66.406 (66.065)	Top-5 acc 85.156 (85.235)	lr 0.00784
Train [75][1910/3239]	Time 0.251 (0.622)	Data Time 0.002 (0.021)	Loss 2.3383 (2.4128)	Entropy 0.96668 (0.96877)	Top-1 acc 68.750 (66.076)	Top-5 acc 85.547 (85.243)	lr 0.00784
Train [75][1920/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.021)	Loss 2.4099 (2.4131)	Entropy 0.96671 (0.96876)	Top-1 acc 65.625 (66.062)	Top-5 acc 85.156 (85.239)	lr 0.00784
Train [75][1930/3239]	Time 0.227 (0.621)	Data Time 0.001 (0.021)	Loss 2.4219 (2.4132)	Entropy 0.96667 (0.96875)	Top-1 acc 67.188 (66.059)	Top-5 acc 86.328 (85.237)	lr 0.00784
Train [75][1940/3239]	Time 0.226 (0.620)	Data Time 0.001 (0.021)	Loss 2.4027 (2.4131)	Entropy 0.96664 (0.96874)	Top-1 acc 64.844 (66.061)	Top-5 acc 85.938 (85.237)	lr 0.00784
Train [75][1950/3239]	Time 0.219 (0.619)	Data Time 0.001 (0.021)	Loss 2.6304 (2.4132)	Entropy 0.96660 (0.96873)	Top-1 acc 58.984 (66.055)	Top-5 acc 80.859 (85.235)	lr 0.00784
Train [75][1960/3239]	Time 0.257 (0.645)	Data Time 0.002 (0.020)	Loss 2.4464 (2.4134)	Entropy 0.96658 (0.96872)	Top-1 acc 67.188 (66.052)	Top-5 acc 83.203 (85.231)	lr 0.00784
Train [75][1970/3239]	Time 0.368 (0.644)	Data Time 0.002 (0.020)	Loss 2.3981 (2.4132)	Entropy 0.96656 (0.96871)	Top-1 acc 70.703 (66.059)	Top-5 acc 85.156 (85.233)	lr 0.00784
Train [75][1980/3239]	Time 0.215 (0.644)	Data Time 0.002 (0.020)	Loss 2.4595 (2.4130)	Entropy 0.96657 (0.96870)	Top-1 acc 66.797 (66.063)	Top-5 acc 82.422 (85.237)	lr 0.00783
Train [75][1990/3239]	Time 2.444 (0.643)	Data Time 0.002 (0.020)	Loss 2.3607 (2.4129)	Entropy 0.96657 (0.96869)	Top-1 acc 69.141 (66.066)	Top-5 acc 87.891 (85.239)	lr 0.00783
Train [75][2000/3239]	Time 0.245 (0.641)	Data Time 0.002 (0.020)	Loss 2.3890 (2.4127)	Entropy 0.96661 (0.96868)	Top-1 acc 67.188 (66.073)	Top-5 acc 85.156 (85.240)	lr 0.00783
Train [75][2010/3239]	Time 0.177 (0.640)	Data Time 0.001 (0.020)	Loss 2.3041 (2.4128)	Entropy 0.96655 (0.96867)	Top-1 acc 74.609 (66.071)	Top-5 acc 86.719 (85.241)	lr 0.00783
Train [75][2020/3239]	Time 0.247 (0.639)	Data Time 0.001 (0.020)	Loss 2.4698 (2.4128)	Entropy 0.96649 (0.96866)	Top-1 acc 63.672 (66.070)	Top-5 acc 83.984 (85.238)	lr 0.00783
Train [75][2030/3239]	Time 0.233 (0.638)	Data Time 0.001 (0.020)	Loss 2.4585 (2.4128)	Entropy 0.96651 (0.96864)	Top-1 acc 67.188 (66.075)	Top-5 acc 83.594 (85.236)	lr 0.00783
Train [75][2040/3239]	Time 0.217 (0.637)	Data Time 0.001 (0.020)	Loss 2.3930 (2.4126)	Entropy 0.96647 (0.96863)	Top-1 acc 66.016 (66.081)	Top-5 acc 85.156 (85.239)	lr 0.00783
Train [75][2050/3239]	Time 0.220 (0.636)	Data Time 0.001 (0.020)	Loss 2.3899 (2.4126)	Entropy 0.96647 (0.96862)	Top-1 acc 66.406 (66.082)	Top-5 acc 84.375 (85.239)	lr 0.00783
Train [75][2060/3239]	Time 0.336 (0.636)	Data Time 0.003 (0.020)	Loss 2.3972 (2.4127)	Entropy 0.96645 (0.96861)	Top-1 acc 64.844 (66.077)	Top-5 acc 84.375 (85.239)	lr 0.00783
Train [75][2070/3239]	Time 0.227 (0.635)	Data Time 0.001 (0.019)	Loss 2.3365 (2.4125)	Entropy 0.96638 (0.96860)	Top-1 acc 68.359 (66.075)	Top-5 acc 85.547 (85.244)	lr 0.00783
Train [75][2080/3239]	Time 0.224 (0.634)	Data Time 0.001 (0.019)	Loss 2.4308 (2.4125)	Entropy 0.96636 (0.96859)	Top-1 acc 68.359 (66.080)	Top-5 acc 84.766 (85.242)	lr 0.00782
Train [75][2090/3239]	Time 0.271 (0.633)	Data Time 0.001 (0.019)	Loss 2.3189 (2.4128)	Entropy 0.96634 (0.96858)	Top-1 acc 67.578 (66.068)	Top-5 acc 84.766 (85.236)	lr 0.00782
Train [75][2100/3239]	Time 2.376 (0.632)	Data Time 0.001 (0.019)	Loss 2.2752 (2.4126)	Entropy 0.96634 (0.96857)	Top-1 acc 73.047 (66.073)	Top-5 acc 87.500 (85.241)	lr 0.00782
Train [75][2110/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.019)	Loss 2.3917 (2.4124)	Entropy 0.96631 (0.96856)	Top-1 acc 68.750 (66.077)	Top-5 acc 83.984 (85.243)	lr 0.00782
Train [75][2120/3239]	Time 0.223 (0.630)	Data Time 0.001 (0.019)	Loss 2.3461 (2.4122)	Entropy 0.96624 (0.96855)	Top-1 acc 67.969 (66.084)	Top-5 acc 85.938 (85.247)	lr 0.00782
Train [75][2130/3239]	Time 0.244 (0.629)	Data Time 0.001 (0.019)	Loss 2.4949 (2.4123)	Entropy 0.96619 (0.96854)	Top-1 acc 67.578 (66.083)	Top-5 acc 82.422 (85.246)	lr 0.00782
Train [75][2140/3239]	Time 0.231 (0.629)	Data Time 0.002 (0.019)	Loss 2.3662 (2.4121)	Entropy 0.96614 (0.96853)	Top-1 acc 65.625 (66.092)	Top-5 acc 87.109 (85.251)	lr 0.00782
Train [75][2150/3239]	Time 0.336 (0.628)	Data Time 0.001 (0.019)	Loss 2.5287 (2.4121)	Entropy 0.96612 (0.96852)	Top-1 acc 63.672 (66.093)	Top-5 acc 83.984 (85.253)	lr 0.00782
Train [75][2160/3239]	Time 0.222 (0.627)	Data Time 0.001 (0.019)	Loss 2.5652 (2.4120)	Entropy 0.96606 (0.96850)	Top-1 acc 59.766 (66.097)	Top-5 acc 82.422 (85.253)	lr 0.00782
Train [75][2170/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.019)	Loss 2.4468 (2.4119)	Entropy 0.96604 (0.96849)	Top-1 acc 64.453 (66.098)	Top-5 acc 84.766 (85.252)	lr 0.00782
Train [75][2180/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.019)	Loss 2.4658 (2.4118)	Entropy 0.96601 (0.96848)	Top-1 acc 62.891 (66.099)	Top-5 acc 82.422 (85.250)	lr 0.00782
Train [75][2190/3239]	Time 0.245 (0.625)	Data Time 0.001 (0.019)	Loss 2.5351 (2.4119)	Entropy 0.96596 (0.96847)	Top-1 acc 63.672 (66.103)	Top-5 acc 82.812 (85.249)	lr 0.00781
Train [75][2200/3239]	Time 0.259 (0.624)	Data Time 0.001 (0.018)	Loss 2.3209 (2.4118)	Entropy 0.96592 (0.96846)	Top-1 acc 69.531 (66.106)	Top-5 acc 86.719 (85.248)	lr 0.00781
Train [75][2210/3239]	Time 2.436 (0.624)	Data Time 0.001 (0.018)	Loss 2.3371 (2.4120)	Entropy 0.96592 (0.96845)	Top-1 acc 67.188 (66.095)	Top-5 acc 85.156 (85.243)	lr 0.00781
Train [75][2220/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.018)	Loss 2.4630 (2.4122)	Entropy 0.96593 (0.96844)	Top-1 acc 67.969 (66.092)	Top-5 acc 84.375 (85.238)	lr 0.00781
Train [75][2230/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.018)	Loss 2.3475 (2.4121)	Entropy 0.96590 (0.96842)	Top-1 acc 67.969 (66.096)	Top-5 acc 86.719 (85.237)	lr 0.00781
Train [75][2240/3239]	Time 0.451 (0.621)	Data Time 0.001 (0.018)	Loss 2.6476 (2.4121)	Entropy 0.96583 (0.96841)	Top-1 acc 57.812 (66.096)	Top-5 acc 79.688 (85.235)	lr 0.00781
Train [75][2250/3239]	Time 0.237 (0.620)	Data Time 0.001 (0.018)	Loss 2.3763 (2.4120)	Entropy 0.96575 (0.96840)	Top-1 acc 67.578 (66.100)	Top-5 acc 84.766 (85.236)	lr 0.00781
Train [75][2260/3239]	Time 0.252 (0.620)	Data Time 0.001 (0.018)	Loss 2.3265 (2.4122)	Entropy 0.96574 (0.96839)	Top-1 acc 67.969 (66.095)	Top-5 acc 85.938 (85.235)	lr 0.00781
Train [75][2270/3239]	Time 0.227 (0.619)	Data Time 0.001 (0.018)	Loss 2.3071 (2.4122)	Entropy 0.96573 (0.96838)	Top-1 acc 68.359 (66.096)	Top-5 acc 85.547 (85.233)	lr 0.00781
Train [75][2280/3239]	Time 0.222 (0.618)	Data Time 0.001 (0.018)	Loss 2.4822 (2.4123)	Entropy 0.96566 (0.96837)	Top-1 acc 66.406 (66.094)	Top-5 acc 84.766 (85.228)	lr 0.00781
Train [75][2290/3239]	Time 0.261 (0.618)	Data Time 0.002 (0.018)	Loss 2.5344 (2.4122)	Entropy 0.96560 (0.96835)	Top-1 acc 62.109 (66.099)	Top-5 acc 83.594 (85.230)	lr 0.00781
Train [75][2300/3239]	Time 0.263 (0.617)	Data Time 0.001 (0.018)	Loss 2.4993 (2.4121)	Entropy 0.96559 (0.96834)	Top-1 acc 62.891 (66.102)	Top-5 acc 80.078 (85.231)	lr 0.00780
Train [75][2310/3239]	Time 0.289 (0.616)	Data Time 0.001 (0.018)	Loss 2.4128 (2.4122)	Entropy 0.96556 (0.96833)	Top-1 acc 64.453 (66.100)	Top-5 acc 85.938 (85.228)	lr 0.00780
Train [75][2320/3239]	Time 50.727 (0.637)	Data Time 0.001 (0.018)	Loss 2.4461 (2.4122)	Entropy 0.96556 (0.96832)	Top-1 acc 67.188 (66.103)	Top-5 acc 85.938 (85.227)	lr 0.00780
Train [75][2330/3239]	Time 0.533 (0.635)	Data Time 0.002 (0.017)	Loss 2.3469 (2.4123)	Entropy 0.96557 (0.96831)	Top-1 acc 66.797 (66.096)	Top-5 acc 87.891 (85.227)	lr 0.00780
Train [75][2340/3239]	Time 0.256 (0.635)	Data Time 0.001 (0.017)	Loss 2.5350 (2.4124)	Entropy 0.96556 (0.96830)	Top-1 acc 62.891 (66.094)	Top-5 acc 81.250 (85.225)	lr 0.00780
Train [75][2350/3239]	Time 0.186 (0.634)	Data Time 0.001 (0.017)	Loss 2.3798 (2.4125)	Entropy 0.96553 (0.96828)	Top-1 acc 64.453 (66.093)	Top-5 acc 89.453 (85.222)	lr 0.00780
Train [75][2360/3239]	Time 0.248 (0.634)	Data Time 0.002 (0.017)	Loss 2.2906 (2.4124)	Entropy 0.96545 (0.96827)	Top-1 acc 71.484 (66.094)	Top-5 acc 85.156 (85.223)	lr 0.00780
Train [75][2370/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.017)	Loss 2.4291 (2.4123)	Entropy 0.96535 (0.96826)	Top-1 acc 64.453 (66.096)	Top-5 acc 86.719 (85.226)	lr 0.00780
Train [75][2380/3239]	Time 0.257 (0.632)	Data Time 0.001 (0.017)	Loss 2.5214 (2.4125)	Entropy 0.96518 (0.96825)	Top-1 acc 61.719 (66.091)	Top-5 acc 84.766 (85.225)	lr 0.00780
Train [75][2390/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.017)	Loss 2.5500 (2.4125)	Entropy 0.96518 (0.96823)	Top-1 acc 63.281 (66.089)	Top-5 acc 84.375 (85.226)	lr 0.00780
Train [75][2400/3239]	Time 0.221 (0.631)	Data Time 0.001 (0.017)	Loss 2.4055 (2.4125)	Entropy 0.96516 (0.96822)	Top-1 acc 64.062 (66.086)	Top-5 acc 84.375 (85.224)	lr 0.00779
Train [75][2410/3239]	Time 0.288 (0.631)	Data Time 0.001 (0.017)	Loss 2.4462 (2.4127)	Entropy 0.96512 (0.96821)	Top-1 acc 64.062 (66.082)	Top-5 acc 81.250 (85.223)	lr 0.00779
Train [75][2420/3239]	Time 0.248 (0.630)	Data Time 0.001 (0.017)	Loss 2.3990 (2.4128)	Entropy 0.96509 (0.96820)	Top-1 acc 67.969 (66.080)	Top-5 acc 83.594 (85.220)	lr 0.00779
Train [75][2430/3239]	Time 2.453 (0.629)	Data Time 0.002 (0.017)	Loss 2.3836 (2.4128)	Entropy 0.96509 (0.96818)	Top-1 acc 66.406 (66.084)	Top-5 acc 85.156 (85.220)	lr 0.00779
Train [75][2440/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.017)	Loss 2.6645 (2.4129)	Entropy 0.96509 (0.96817)	Top-1 acc 60.938 (66.080)	Top-5 acc 81.250 (85.219)	lr 0.00779
Train [75][2450/3239]	Time 0.230 (0.627)	Data Time 0.002 (0.017)	Loss 2.3169 (2.4128)	Entropy 0.96507 (0.96816)	Top-1 acc 68.750 (66.081)	Top-5 acc 88.281 (85.225)	lr 0.00779
Train [75][2460/3239]	Time 0.261 (0.626)	Data Time 0.001 (0.017)	Loss 2.3504 (2.4129)	Entropy 0.96500 (0.96814)	Top-1 acc 63.281 (66.077)	Top-5 acc 87.891 (85.224)	lr 0.00779
Train [75][2470/3239]	Time 0.223 (0.626)	Data Time 0.001 (0.017)	Loss 2.4389 (2.4129)	Entropy 0.96498 (0.96813)	Top-1 acc 64.844 (66.072)	Top-5 acc 85.547 (85.226)	lr 0.00779
Train [75][2480/3239]	Time 0.224 (0.625)	Data Time 0.001 (0.017)	Loss 2.4482 (2.4128)	Entropy 0.96497 (0.96812)	Top-1 acc 66.406 (66.075)	Top-5 acc 83.203 (85.228)	lr 0.00779
Train [75][2490/3239]	Time 0.248 (0.625)	Data Time 0.001 (0.017)	Loss 2.4945 (2.4128)	Entropy 0.96495 (0.96811)	Top-1 acc 62.891 (66.073)	Top-5 acc 81.641 (85.227)	lr 0.00779
Train [75][2500/3239]	Time 0.298 (0.624)	Data Time 0.001 (0.016)	Loss 2.5393 (2.4128)	Entropy 0.96486 (0.96809)	Top-1 acc 62.891 (66.074)	Top-5 acc 81.641 (85.226)	lr 0.00779
Train [75][2510/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.016)	Loss 2.4206 (2.4128)	Entropy 0.96483 (0.96808)	Top-1 acc 65.234 (66.072)	Top-5 acc 83.203 (85.224)	lr 0.00778
Train [75][2520/3239]	Time 0.233 (0.623)	Data Time 0.001 (0.016)	Loss 2.2780 (2.4127)	Entropy 0.96461 (0.96807)	Top-1 acc 69.141 (66.077)	Top-5 acc 85.547 (85.226)	lr 0.00778
Train [75][2530/3239]	Time 0.307 (0.622)	Data Time 0.001 (0.016)	Loss 2.6022 (2.4127)	Entropy 0.96449 (0.96805)	Top-1 acc 60.547 (66.074)	Top-5 acc 81.641 (85.230)	lr 0.00778
Train [75][2540/3239]	Time 2.445 (0.622)	Data Time 0.001 (0.016)	Loss 2.4732 (2.4127)	Entropy 0.96449 (0.96804)	Top-1 acc 64.453 (66.075)	Top-5 acc 83.984 (85.229)	lr 0.00778
Train [75][2550/3239]	Time 0.251 (0.620)	Data Time 0.001 (0.016)	Loss 2.3216 (2.4125)	Entropy 0.96448 (0.96803)	Top-1 acc 67.969 (66.080)	Top-5 acc 87.500 (85.234)	lr 0.00778
Train [75][2560/3239]	Time 0.245 (0.620)	Data Time 0.002 (0.016)	Loss 2.3727 (2.4125)	Entropy 0.96445 (0.96801)	Top-1 acc 66.406 (66.078)	Top-5 acc 85.547 (85.236)	lr 0.00778
Train [75][2570/3239]	Time 0.234 (0.619)	Data Time 0.001 (0.016)	Loss 2.4220 (2.4126)	Entropy 0.96445 (0.96800)	Top-1 acc 65.625 (66.073)	Top-5 acc 82.422 (85.231)	lr 0.00778
Train [75][2580/3239]	Time 0.227 (0.618)	Data Time 0.002 (0.016)	Loss 2.2745 (2.4126)	Entropy 0.96442 (0.96798)	Top-1 acc 69.922 (66.070)	Top-5 acc 86.719 (85.231)	lr 0.00778
Train [75][2590/3239]	Time 0.266 (0.618)	Data Time 0.001 (0.016)	Loss 2.5469 (2.4126)	Entropy 0.96443 (0.96797)	Top-1 acc 62.500 (66.072)	Top-5 acc 82.812 (85.229)	lr 0.00778
Train [75][2600/3239]	Time 0.306 (0.617)	Data Time 0.001 (0.016)	Loss 2.3544 (2.4124)	Entropy 0.96439 (0.96796)	Top-1 acc 67.188 (66.077)	Top-5 acc 85.938 (85.234)	lr 0.00778
Train [75][2610/3239]	Time 0.278 (0.617)	Data Time 0.001 (0.016)	Loss 2.3914 (2.4123)	Entropy 0.96431 (0.96794)	Top-1 acc 65.625 (66.077)	Top-5 acc 85.547 (85.233)	lr 0.00778
Train [75][2620/3239]	Time 0.227 (0.616)	Data Time 0.001 (0.016)	Loss 2.3970 (2.4124)	Entropy 0.96431 (0.96793)	Top-1 acc 65.625 (66.075)	Top-5 acc 85.156 (85.231)	lr 0.00777
Train [75][2630/3239]	Time 0.223 (0.616)	Data Time 0.001 (0.016)	Loss 2.5149 (2.4125)	Entropy 0.96427 (0.96792)	Top-1 acc 63.672 (66.071)	Top-5 acc 80.859 (85.231)	lr 0.00777
Train [75][2640/3239]	Time 0.317 (0.615)	Data Time 0.001 (0.016)	Loss 2.4798 (2.4124)	Entropy 0.96425 (0.96790)	Top-1 acc 60.938 (66.070)	Top-5 acc 85.156 (85.230)	lr 0.00777
Train [75][2650/3239]	Time 0.243 (0.615)	Data Time 0.001 (0.016)	Loss 2.3366 (2.4123)	Entropy 0.96421 (0.96789)	Top-1 acc 68.359 (66.071)	Top-5 acc 84.766 (85.232)	lr 0.00777
Train [75][2660/3239]	Time 0.233 (0.614)	Data Time 0.001 (0.016)	Loss 2.4425 (2.4123)	Entropy 0.96419 (0.96787)	Top-1 acc 64.844 (66.072)	Top-5 acc 84.766 (85.232)	lr 0.00777
Train [75][2670/3239]	Time 0.235 (0.614)	Data Time 0.001 (0.015)	Loss 2.4563 (2.4122)	Entropy 0.96416 (0.96786)	Top-1 acc 65.625 (66.077)	Top-5 acc 87.500 (85.234)	lr 0.00777
Train [75][2680/3239]	Time 0.360 (0.632)	Data Time 0.004 (0.015)	Loss 2.3696 (2.4122)	Entropy 0.96416 (0.96785)	Top-1 acc 66.797 (66.080)	Top-5 acc 86.719 (85.236)	lr 0.00777
Train [75][2690/3239]	Time 0.258 (0.632)	Data Time 0.002 (0.015)	Loss 2.5354 (2.4122)	Entropy 0.96416 (0.96783)	Top-1 acc 62.500 (66.078)	Top-5 acc 80.859 (85.233)	lr 0.00777
Train [75][2700/3239]	Time 0.242 (0.631)	Data Time 0.001 (0.015)	Loss 2.4721 (2.4124)	Entropy 0.96412 (0.96782)	Top-1 acc 65.234 (66.071)	Top-5 acc 83.984 (85.232)	lr 0.00777
Train [75][2710/3239]	Time 0.262 (0.631)	Data Time 0.002 (0.015)	Loss 2.3341 (2.4123)	Entropy 0.96409 (0.96781)	Top-1 acc 66.797 (66.073)	Top-5 acc 85.547 (85.232)	lr 0.00777
Train [75][2720/3239]	Time 0.276 (0.630)	Data Time 0.002 (0.015)	Loss 2.4452 (2.4122)	Entropy 0.96411 (0.96779)	Top-1 acc 65.625 (66.077)	Top-5 acc 85.938 (85.236)	lr 0.00777
Train [75][2730/3239]	Time 0.300 (0.630)	Data Time 0.001 (0.015)	Loss 2.2192 (2.4122)	Entropy 0.96411 (0.96778)	Top-1 acc 69.922 (66.076)	Top-5 acc 89.844 (85.236)	lr 0.00776
Train [75][2740/3239]	Time 0.267 (0.629)	Data Time 0.001 (0.015)	Loss 2.3178 (2.4122)	Entropy 0.96410 (0.96777)	Top-1 acc 63.672 (66.073)	Top-5 acc 86.328 (85.233)	lr 0.00776
Train [75][2750/3239]	Time 0.225 (0.629)	Data Time 0.001 (0.015)	Loss 3.8899 (2.4128)	Entropy 0.96410 (0.96775)	Top-1 acc 41.406 (66.064)	Top-5 acc 66.406 (85.225)	lr 0.00776
Train [75][2760/3239]	Time 0.221 (0.628)	Data Time 0.001 (0.015)	Loss 2.5281 (2.4129)	Entropy 0.96402 (0.96774)	Top-1 acc 62.891 (66.060)	Top-5 acc 83.984 (85.224)	lr 0.00776
Train [75][2770/3239]	Time 0.336 (0.628)	Data Time 0.001 (0.015)	Loss 2.5552 (2.4129)	Entropy 0.96401 (0.96773)	Top-1 acc 61.328 (66.062)	Top-5 acc 83.984 (85.226)	lr 0.00776
Train [75][2780/3239]	Time 0.226 (0.627)	Data Time 0.001 (0.015)	Loss 2.6338 (2.4131)	Entropy 0.96395 (0.96771)	Top-1 acc 58.594 (66.061)	Top-5 acc 81.641 (85.223)	lr 0.00776
Train [75][2790/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.015)	Loss 2.5253 (2.4131)	Entropy 0.96390 (0.96770)	Top-1 acc 65.625 (66.060)	Top-5 acc 83.594 (85.222)	lr 0.00776
Train [75][2800/3239]	Time 0.254 (0.626)	Data Time 0.001 (0.015)	Loss 2.4884 (2.4132)	Entropy 0.96389 (0.96768)	Top-1 acc 65.234 (66.060)	Top-5 acc 82.422 (85.219)	lr 0.00776
Train [75][2810/3239]	Time 0.250 (0.625)	Data Time 0.002 (0.015)	Loss 2.2543 (2.4132)	Entropy 0.96386 (0.96767)	Top-1 acc 69.922 (66.062)	Top-5 acc 88.672 (85.220)	lr 0.00776
Train [75][2820/3239]	Time 0.269 (0.625)	Data Time 0.001 (0.015)	Loss 2.7075 (2.4132)	Entropy 0.96391 (0.96766)	Top-1 acc 57.812 (66.064)	Top-5 acc 79.688 (85.219)	lr 0.00776
Train [75][2830/3239]	Time 0.262 (0.624)	Data Time 0.001 (0.015)	Loss 2.2419 (2.4132)	Entropy 0.96390 (0.96764)	Top-1 acc 72.656 (66.065)	Top-5 acc 88.281 (85.221)	lr 0.00775
Train [75][2840/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.015)	Loss 2.5175 (2.4132)	Entropy 0.96386 (0.96763)	Top-1 acc 63.672 (66.065)	Top-5 acc 85.156 (85.223)	lr 0.00775
Train [75][2850/3239]	Time 0.276 (0.623)	Data Time 0.001 (0.015)	Loss 2.5174 (2.4130)	Entropy 0.96387 (0.96762)	Top-1 acc 63.281 (66.068)	Top-5 acc 82.812 (85.226)	lr 0.00775
Train [75][2860/3239]	Time 0.245 (0.623)	Data Time 0.001 (0.015)	Loss 2.3491 (2.4129)	Entropy 0.96381 (0.96761)	Top-1 acc 67.969 (66.072)	Top-5 acc 87.109 (85.229)	lr 0.00775
Train [75][2870/3239]	Time 0.246 (0.622)	Data Time 0.002 (0.015)	Loss 2.5140 (2.4129)	Entropy 0.96377 (0.96759)	Top-1 acc 65.234 (66.073)	Top-5 acc 84.766 (85.231)	lr 0.00775
Train [75][2880/3239]	Time 0.268 (0.622)	Data Time 0.002 (0.014)	Loss 2.5611 (2.4129)	Entropy 0.96373 (0.96758)	Top-1 acc 63.281 (66.069)	Top-5 acc 82.422 (85.233)	lr 0.00775
Train [75][2890/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.014)	Loss 2.4589 (2.4130)	Entropy 0.96367 (0.96757)	Top-1 acc 66.406 (66.066)	Top-5 acc 85.156 (85.233)	lr 0.00775
Train [75][2900/3239]	Time 0.361 (0.621)	Data Time 0.001 (0.014)	Loss 2.3173 (2.4129)	Entropy 0.96367 (0.96755)	Top-1 acc 71.875 (66.068)	Top-5 acc 85.547 (85.232)	lr 0.00775
Train [75][2910/3239]	Time 0.249 (0.621)	Data Time 0.001 (0.014)	Loss 2.4675 (2.4130)	Entropy 0.96363 (0.96754)	Top-1 acc 63.672 (66.067)	Top-5 acc 79.688 (85.231)	lr 0.00775
Train [75][2920/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.014)	Loss 2.4826 (2.4130)	Entropy 0.96352 (0.96753)	Top-1 acc 65.234 (66.067)	Top-5 acc 84.375 (85.231)	lr 0.00775
Train [75][2930/3239]	Time 0.301 (0.619)	Data Time 0.001 (0.014)	Loss 2.4191 (2.4132)	Entropy 0.96354 (0.96751)	Top-1 acc 68.359 (66.061)	Top-5 acc 83.594 (85.228)	lr 0.00775
Train [75][2940/3239]	Time 0.314 (0.619)	Data Time 0.001 (0.014)	Loss 2.3441 (2.4129)	Entropy 0.96354 (0.96750)	Top-1 acc 67.969 (66.069)	Top-5 acc 86.328 (85.234)	lr 0.00774
Train [75][2950/3239]	Time 0.270 (0.619)	Data Time 0.001 (0.014)	Loss 2.3571 (2.4129)	Entropy 0.96347 (0.96748)	Top-1 acc 66.406 (66.068)	Top-5 acc 86.719 (85.233)	lr 0.00774
Train [75][2960/3239]	Time 0.254 (0.618)	Data Time 0.002 (0.014)	Loss 2.4498 (2.4130)	Entropy 0.96349 (0.96747)	Top-1 acc 66.797 (66.065)	Top-5 acc 85.547 (85.233)	lr 0.00774
Train [75][2970/3239]	Time 0.271 (0.618)	Data Time 0.001 (0.014)	Loss 2.4731 (2.4130)	Entropy 0.96356 (0.96746)	Top-1 acc 65.625 (66.063)	Top-5 acc 84.766 (85.234)	lr 0.00774
Train [75][2980/3239]	Time 0.279 (0.617)	Data Time 0.001 (0.014)	Loss 2.4293 (2.4131)	Entropy 0.96351 (0.96744)	Top-1 acc 67.578 (66.059)	Top-5 acc 83.984 (85.231)	lr 0.00774
Train [75][2990/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.014)	Loss 2.4983 (2.4132)	Entropy 0.96354 (0.96743)	Top-1 acc 62.500 (66.055)	Top-5 acc 85.938 (85.230)	lr 0.00774
Train [75][3000/3239]	Time 0.240 (0.616)	Data Time 0.001 (0.014)	Loss 2.2375 (2.4134)	Entropy 0.96348 (0.96742)	Top-1 acc 69.141 (66.052)	Top-5 acc 91.016 (85.228)	lr 0.00774
Train [75][3010/3239]	Time 0.274 (0.633)	Data Time 0.004 (0.014)	Loss 2.5030 (2.4133)	Entropy 0.96348 (0.96741)	Top-1 acc 66.016 (66.055)	Top-5 acc 82.422 (85.231)	lr 0.00774
Train [75][3020/3239]	Time 0.229 (0.633)	Data Time 0.002 (0.014)	Loss 2.5037 (2.4134)	Entropy 0.96339 (0.96739)	Top-1 acc 65.625 (66.056)	Top-5 acc 83.203 (85.227)	lr 0.00774
Train [75][3030/3239]	Time 0.324 (0.632)	Data Time 0.002 (0.014)	Loss 2.4669 (2.4136)	Entropy 0.96330 (0.96738)	Top-1 acc 65.625 (66.052)	Top-5 acc 83.203 (85.221)	lr 0.00774
Train [75][3040/3239]	Time 0.266 (0.632)	Data Time 0.002 (0.014)	Loss 2.5737 (2.4137)	Entropy 0.96332 (0.96737)	Top-1 acc 63.281 (66.052)	Top-5 acc 80.859 (85.220)	lr 0.00774
Train [75][3050/3239]	Time 0.302 (0.632)	Data Time 0.001 (0.014)	Loss 2.4251 (2.4138)	Entropy 0.96319 (0.96735)	Top-1 acc 67.578 (66.053)	Top-5 acc 85.547 (85.217)	lr 0.00773
Train [75][3060/3239]	Time 0.273 (0.631)	Data Time 0.001 (0.014)	Loss 2.6815 (2.4139)	Entropy 0.96320 (0.96734)	Top-1 acc 58.984 (66.047)	Top-5 acc 80.078 (85.214)	lr 0.00773
Train [75][3070/3239]	Time 0.350 (0.631)	Data Time 0.001 (0.014)	Loss 2.5122 (2.4140)	Entropy 0.96316 (0.96733)	Top-1 acc 63.672 (66.046)	Top-5 acc 82.422 (85.213)	lr 0.00773
Train [75][3080/3239]	Time 0.262 (0.630)	Data Time 0.001 (0.014)	Loss 2.3986 (2.4138)	Entropy 0.96311 (0.96731)	Top-1 acc 67.578 (66.050)	Top-5 acc 85.938 (85.216)	lr 0.00773
Train [75][3090/3239]	Time 0.281 (0.630)	Data Time 0.001 (0.014)	Loss 2.5823 (2.4140)	Entropy 0.96306 (0.96730)	Top-1 acc 61.328 (66.044)	Top-5 acc 83.203 (85.215)	lr 0.00773
Train [75][3100/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.014)	Loss 2.5279 (2.4141)	Entropy 0.96302 (0.96728)	Top-1 acc 61.719 (66.045)	Top-5 acc 83.203 (85.213)	lr 0.00773
Train [75][3110/3239]	Time 0.264 (0.629)	Data Time 0.001 (0.014)	Loss 2.2884 (2.4141)	Entropy 0.96304 (0.96727)	Top-1 acc 69.531 (66.045)	Top-5 acc 87.109 (85.211)	lr 0.00773
Train [75][3120/3239]	Time 0.251 (0.628)	Data Time 0.001 (0.014)	Loss 2.7561 (2.4141)	Entropy 0.96300 (0.96726)	Top-1 acc 56.641 (66.042)	Top-5 acc 79.688 (85.211)	lr 0.00773
Train [75][3130/3239]	Time 0.278 (0.628)	Data Time 0.003 (0.013)	Loss 2.3917 (2.4141)	Entropy 0.96294 (0.96724)	Top-1 acc 66.797 (66.044)	Top-5 acc 86.719 (85.211)	lr 0.00773
Train [75][3140/3239]	Time 0.249 (0.627)	Data Time 0.001 (0.013)	Loss 2.4603 (2.4141)	Entropy 0.96293 (0.96723)	Top-1 acc 64.844 (66.045)	Top-5 acc 84.375 (85.209)	lr 0.00773
Train [75][3150/3239]	Time 0.219 (0.627)	Data Time 0.001 (0.013)	Loss 2.3302 (2.4140)	Entropy 0.96292 (0.96722)	Top-1 acc 69.141 (66.046)	Top-5 acc 86.719 (85.211)	lr 0.00772
Train [75][3160/3239]	Time 0.345 (0.626)	Data Time 0.001 (0.013)	Loss 2.4058 (2.4140)	Entropy 0.96293 (0.96720)	Top-1 acc 64.062 (66.044)	Top-5 acc 83.594 (85.211)	lr 0.00772
Train [75][3170/3239]	Time 0.232 (0.626)	Data Time 0.003 (0.013)	Loss 2.5099 (2.4140)	Entropy 0.96292 (0.96719)	Top-1 acc 62.891 (66.043)	Top-5 acc 83.203 (85.208)	lr 0.00772
Train [75][3180/3239]	Time 0.247 (0.625)	Data Time 0.000 (0.013)	Loss 2.3194 (2.4143)	Entropy 0.96289 (0.96718)	Top-1 acc 68.359 (66.034)	Top-5 acc 87.109 (85.203)	lr 0.00772
Train [75][3190/3239]	Time 0.232 (0.625)	Data Time 0.000 (0.013)	Loss 2.3989 (2.4143)	Entropy 0.96282 (0.96716)	Top-1 acc 67.188 (66.033)	Top-5 acc 83.984 (85.204)	lr 0.00772
Train [75][3200/3239]	Time 0.216 (0.624)	Data Time 0.000 (0.013)	Loss 2.4729 (2.4142)	Entropy 0.96281 (0.96715)	Top-1 acc 64.453 (66.036)	Top-5 acc 84.375 (85.204)	lr 0.00772
Train [75][3210/3239]	Time 0.226 (0.624)	Data Time 0.000 (0.013)	Loss 2.4249 (2.4144)	Entropy 0.96278 (0.96714)	Top-1 acc 67.188 (66.032)	Top-5 acc 84.375 (85.200)	lr 0.00772
Train [75][3220/3239]	Time 0.239 (0.623)	Data Time 0.000 (0.013)	Loss 2.5076 (2.4144)	Entropy 0.96273 (0.96712)	Top-1 acc 61.719 (66.029)	Top-5 acc 82.812 (85.200)	lr 0.00772
Train [75][3230/3239]	Time 0.217 (0.623)	Data Time 0.000 (0.013)	Loss 2.1818 (2.4144)	Entropy 0.96272 (0.96711)	Top-1 acc 73.047 (66.028)	Top-5 acc 91.797 (85.202)	lr 0.00772
Train [75][3239/3239]	Time 2.264 (0.622)	Data Time 0.000 (0.013)	Loss 2.5316 (2.4144)	Entropy 0.96272 (0.96710)	Top-1 acc 61.728 (66.031)	Top-5 acc 83.951 (85.202)	lr 0.00772
==========Valid [75/120]	loss 1.335	top-1 acc 69.433 (69.445)	top-5 acc 88.234	Train top-1 66.031	top-5 85.202	Entropy 0.96272	Latency-None: 0.000ms	Flops: 546.53M
Train [76][0/3239]	Time 41.360 (41.360)	Data Time 39.439 (39.439)	Loss 2.4021 (2.4021)	Entropy 0.96266 (0.96266)	Top-1 acc 69.141 (69.141)	Top-5 acc 86.719 (86.719)	lr 0.00772
Train [76][10/3239]	Time 2.749 (4.307)	Data Time 0.002 (3.590)	Loss 2.4311 (2.4129)	Entropy 0.96266 (0.96266)	Top-1 acc 65.234 (65.732)	Top-5 acc 83.984 (85.476)	lr 0.00772
Train [76][20/3239]	Time 0.252 (2.379)	Data Time 0.001 (1.881)	Loss 2.3592 (2.3666)	Entropy 0.96255 (0.96261)	Top-1 acc 70.312 (67.299)	Top-5 acc 87.500 (86.217)	lr 0.00771
Train [76][30/3239]	Time 0.224 (1.768)	Data Time 0.001 (1.275)	Loss 2.2897 (2.3863)	Entropy 0.96253 (0.96259)	Top-1 acc 70.703 (66.595)	Top-5 acc 84.766 (85.610)	lr 0.00771
Train [76][40/3239]	Time 0.251 (1.453)	Data Time 0.001 (0.964)	Loss 2.2712 (2.3833)	Entropy 0.96249 (0.96257)	Top-1 acc 70.312 (66.673)	Top-5 acc 86.719 (85.661)	lr 0.00771
Train [76][50/3239]	Time 0.219 (1.261)	Data Time 0.002 (0.776)	Loss 2.5731 (2.3853)	Entropy 0.96249 (0.96255)	Top-1 acc 61.719 (66.858)	Top-5 acc 83.594 (85.600)	lr 0.00771
Train [76][60/3239]	Time 0.317 (1.132)	Data Time 0.001 (0.649)	Loss 2.5027 (2.3893)	Entropy 0.96242 (0.96253)	Top-1 acc 62.500 (66.752)	Top-5 acc 83.594 (85.483)	lr 0.00771
Train [76][70/3239]	Time 0.244 (1.039)	Data Time 0.001 (0.558)	Loss 2.4109 (2.3903)	Entropy 0.96240 (0.96252)	Top-1 acc 62.891 (66.764)	Top-5 acc 88.281 (85.475)	lr 0.00771
Train [76][80/3239]	Time 0.218 (0.970)	Data Time 0.001 (0.489)	Loss 2.4066 (2.3892)	Entropy 0.96225 (0.96249)	Top-1 acc 64.844 (66.782)	Top-5 acc 86.328 (85.499)	lr 0.00771
Train [76][90/3239]	Time 0.240 (0.914)	Data Time 0.001 (0.435)	Loss 2.4859 (2.3937)	Entropy 0.96224 (0.96247)	Top-1 acc 64.844 (66.655)	Top-5 acc 84.766 (85.427)	lr 0.00771
Train [76][100/3239]	Time 0.289 (0.870)	Data Time 0.001 (0.392)	Loss 2.2992 (2.3912)	Entropy 0.96224 (0.96244)	Top-1 acc 70.703 (66.801)	Top-5 acc 85.938 (85.446)	lr 0.00771
Train [76][110/3239]	Time 0.294 (1.298)	Data Time 0.004 (0.357)	Loss 2.4520 (2.3921)	Entropy 0.96225 (0.96243)	Top-1 acc 67.188 (66.846)	Top-5 acc 83.984 (85.420)	lr 0.00771
Train [76][120/3239]	Time 3.117 (1.241)	Data Time 0.003 (0.328)	Loss 2.5082 (2.3896)	Entropy 0.96225 (0.96241)	Top-1 acc 63.672 (66.878)	Top-5 acc 85.938 (85.547)	lr 0.00771
Train [76][130/3239]	Time 0.227 (1.165)	Data Time 0.001 (0.303)	Loss 2.4023 (2.3871)	Entropy 0.96220 (0.96240)	Top-1 acc 66.406 (66.859)	Top-5 acc 86.719 (85.636)	lr 0.00770
Train [76][140/3239]	Time 0.245 (1.115)	Data Time 0.002 (0.282)	Loss 2.3410 (2.3866)	Entropy 0.96215 (0.96238)	Top-1 acc 70.703 (66.927)	Top-5 acc 85.547 (85.630)	lr 0.00770
Train [76][150/3239]	Time 0.342 (1.074)	Data Time 0.003 (0.263)	Loss 2.2431 (2.3841)	Entropy 0.96213 (0.96236)	Top-1 acc 71.094 (66.968)	Top-5 acc 89.453 (85.648)	lr 0.00770
Train [76][160/3239]	Time 0.224 (1.037)	Data Time 0.001 (0.247)	Loss 2.5130 (2.3883)	Entropy 0.96209 (0.96235)	Top-1 acc 60.938 (66.858)	Top-5 acc 83.594 (85.554)	lr 0.00770
Train [76][170/3239]	Time 0.285 (1.004)	Data Time 0.002 (0.233)	Loss 2.3634 (2.3873)	Entropy 0.96208 (0.96233)	Top-1 acc 66.406 (66.904)	Top-5 acc 88.281 (85.590)	lr 0.00770
Train [76][180/3239]	Time 0.254 (0.977)	Data Time 0.001 (0.220)	Loss 2.2426 (2.3842)	Entropy 0.96203 (0.96232)	Top-1 acc 68.750 (66.896)	Top-5 acc 88.672 (85.640)	lr 0.00770
Train [76][190/3239]	Time 0.270 (0.951)	Data Time 0.001 (0.208)	Loss 2.4308 (2.3846)	Entropy 0.96201 (0.96230)	Top-1 acc 66.016 (66.852)	Top-5 acc 85.547 (85.639)	lr 0.00770
Train [76][200/3239]	Time 0.221 (0.926)	Data Time 0.001 (0.198)	Loss 2.4459 (2.3837)	Entropy 0.96192 (0.96228)	Top-1 acc 65.234 (66.886)	Top-5 acc 85.156 (85.644)	lr 0.00770
Train [76][210/3239]	Time 0.245 (0.905)	Data Time 0.001 (0.189)	Loss 2.5763 (2.3843)	Entropy 0.96191 (0.96227)	Top-1 acc 59.766 (66.897)	Top-5 acc 82.422 (85.639)	lr 0.00770
Train [76][220/3239]	Time 0.252 (0.886)	Data Time 0.001 (0.180)	Loss 2.2165 (2.3865)	Entropy 0.96190 (0.96225)	Top-1 acc 71.094 (66.811)	Top-5 acc 89.062 (85.611)	lr 0.00770
Train [76][230/3239]	Time 2.481 (0.867)	Data Time 0.002 (0.173)	Loss 2.3554 (2.3870)	Entropy 0.96190 (0.96224)	Top-1 acc 68.359 (66.788)	Top-5 acc 85.547 (85.604)	lr 0.00770
Train [76][240/3239]	Time 0.314 (0.842)	Data Time 0.001 (0.165)	Loss 2.2223 (2.3877)	Entropy 0.96186 (0.96222)	Top-1 acc 68.359 (66.747)	Top-5 acc 87.109 (85.596)	lr 0.00769
Train [76][250/3239]	Time 0.223 (0.826)	Data Time 0.001 (0.159)	Loss 2.3146 (2.3861)	Entropy 0.96180 (0.96220)	Top-1 acc 67.969 (66.764)	Top-5 acc 86.719 (85.634)	lr 0.00769
Train [76][260/3239]	Time 0.226 (0.813)	Data Time 0.002 (0.153)	Loss 2.4161 (2.3860)	Entropy 0.96183 (0.96219)	Top-1 acc 62.891 (66.743)	Top-5 acc 85.938 (85.649)	lr 0.00769
Train [76][270/3239]	Time 0.229 (0.801)	Data Time 0.001 (0.147)	Loss 2.4197 (2.3877)	Entropy 0.96174 (0.96217)	Top-1 acc 63.672 (66.713)	Top-5 acc 84.375 (85.630)	lr 0.00769
Train [76][280/3239]	Time 0.217 (0.789)	Data Time 0.001 (0.142)	Loss 2.2692 (2.3880)	Entropy 0.96174 (0.96216)	Top-1 acc 69.531 (66.698)	Top-5 acc 89.062 (85.639)	lr 0.00769
Train [76][290/3239]	Time 0.232 (0.777)	Data Time 0.001 (0.137)	Loss 2.2530 (2.3876)	Entropy 0.96172 (0.96214)	Top-1 acc 70.703 (66.763)	Top-5 acc 89.062 (85.650)	lr 0.00769
Train [76][300/3239]	Time 0.237 (0.767)	Data Time 0.001 (0.133)	Loss 2.4058 (2.3856)	Entropy 0.96171 (0.96213)	Top-1 acc 60.938 (66.811)	Top-5 acc 85.938 (85.697)	lr 0.00769
Train [76][310/3239]	Time 0.236 (0.758)	Data Time 0.002 (0.129)	Loss 2.4553 (2.3861)	Entropy 0.96165 (0.96212)	Top-1 acc 66.406 (66.768)	Top-5 acc 83.594 (85.675)	lr 0.00769
Train [76][320/3239]	Time 0.219 (0.749)	Data Time 0.001 (0.125)	Loss 2.2824 (2.3851)	Entropy 0.96162 (0.96210)	Top-1 acc 71.094 (66.816)	Top-5 acc 87.109 (85.673)	lr 0.00769
Train [76][330/3239]	Time 0.319 (0.741)	Data Time 0.001 (0.121)	Loss 2.3734 (2.3870)	Entropy 0.96160 (0.96209)	Top-1 acc 66.016 (66.782)	Top-5 acc 87.109 (85.648)	lr 0.00769
Train [76][340/3239]	Time 2.450 (0.733)	Data Time 0.001 (0.117)	Loss 2.4056 (2.3868)	Entropy 0.96160 (0.96207)	Top-1 acc 66.016 (66.800)	Top-5 acc 85.938 (85.671)	lr 0.00768
Train [76][350/3239]	Time 0.267 (0.719)	Data Time 0.002 (0.114)	Loss 2.4240 (2.3883)	Entropy 0.96153 (0.96206)	Top-1 acc 68.750 (66.778)	Top-5 acc 83.203 (85.618)	lr 0.00768
Train [76][360/3239]	Time 0.217 (0.712)	Data Time 0.001 (0.111)	Loss 2.2394 (2.3879)	Entropy 0.96155 (0.96204)	Top-1 acc 67.969 (66.776)	Top-5 acc 88.672 (85.638)	lr 0.00768
Train [76][370/3239]	Time 0.296 (0.706)	Data Time 0.001 (0.108)	Loss 2.4626 (2.3875)	Entropy 0.96153 (0.96203)	Top-1 acc 63.672 (66.787)	Top-5 acc 83.203 (85.645)	lr 0.00768
Train [76][380/3239]	Time 0.224 (0.699)	Data Time 0.001 (0.105)	Loss 2.4354 (2.3865)	Entropy 0.96153 (0.96202)	Top-1 acc 61.719 (66.794)	Top-5 acc 87.109 (85.680)	lr 0.00768
Train [76][390/3239]	Time 0.254 (0.693)	Data Time 0.001 (0.103)	Loss 2.3297 (2.3863)	Entropy 0.96161 (0.96200)	Top-1 acc 67.188 (66.808)	Top-5 acc 85.547 (85.692)	lr 0.00768
Train [76][400/3239]	Time 0.237 (0.688)	Data Time 0.001 (0.100)	Loss 2.3662 (2.3875)	Entropy 0.96160 (0.96199)	Top-1 acc 67.969 (66.780)	Top-5 acc 87.891 (85.659)	lr 0.00768
Train [76][410/3239]	Time 0.234 (0.683)	Data Time 0.001 (0.098)	Loss 2.3088 (2.3870)	Entropy 0.96131 (0.96198)	Top-1 acc 71.484 (66.819)	Top-5 acc 85.156 (85.659)	lr 0.00768
Train [76][420/3239]	Time 0.238 (0.679)	Data Time 0.001 (0.095)	Loss 2.3288 (2.3871)	Entropy 0.96130 (0.96197)	Top-1 acc 70.312 (66.855)	Top-5 acc 88.281 (85.669)	lr 0.00768
Train [76][430/3239]	Time 0.231 (0.675)	Data Time 0.001 (0.093)	Loss 2.4718 (2.3890)	Entropy 0.96125 (0.96195)	Top-1 acc 64.453 (66.801)	Top-5 acc 84.766 (85.621)	lr 0.00768
Train [76][440/3239]	Time 0.272 (0.670)	Data Time 0.002 (0.091)	Loss 2.3223 (2.3887)	Entropy 0.96120 (0.96193)	Top-1 acc 66.406 (66.819)	Top-5 acc 84.375 (85.616)	lr 0.00768
Train [76][450/3239]	Time 2.553 (0.665)	Data Time 0.001 (0.089)	Loss 2.7184 (2.3897)	Entropy 0.96120 (0.96192)	Top-1 acc 57.812 (66.807)	Top-5 acc 79.297 (85.581)	lr 0.00767
Train [76][460/3239]	Time 0.218 (0.656)	Data Time 0.001 (0.087)	Loss 2.4322 (2.3903)	Entropy 0.96116 (0.96190)	Top-1 acc 65.234 (66.791)	Top-5 acc 85.938 (85.562)	lr 0.00767
Train [76][470/3239]	Time 0.222 (0.652)	Data Time 0.001 (0.085)	Loss 2.3184 (2.3901)	Entropy 0.96114 (0.96189)	Top-1 acc 70.312 (66.776)	Top-5 acc 87.109 (85.568)	lr 0.00767
Train [76][480/3239]	Time 0.241 (0.762)	Data Time 0.002 (0.084)	Loss 2.2685 (2.3903)	Entropy 0.96113 (0.96187)	Top-1 acc 69.531 (66.773)	Top-5 acc 87.500 (85.577)	lr 0.00767
Train [76][490/3239]	Time 0.218 (0.756)	Data Time 0.002 (0.082)	Loss 2.3294 (2.3896)	Entropy 0.96106 (0.96185)	Top-1 acc 67.188 (66.768)	Top-5 acc 86.328 (85.591)	lr 0.00767
Train [76][500/3239]	Time 0.238 (0.751)	Data Time 0.002 (0.081)	Loss 2.2443 (2.3902)	Entropy 0.96102 (0.96184)	Top-1 acc 73.438 (66.766)	Top-5 acc 87.109 (85.575)	lr 0.00767
Train [76][510/3239]	Time 0.239 (0.747)	Data Time 0.001 (0.079)	Loss 2.3923 (2.3904)	Entropy 0.96099 (0.96182)	Top-1 acc 67.969 (66.778)	Top-5 acc 85.547 (85.577)	lr 0.00767
Train [76][520/3239]	Time 0.239 (0.742)	Data Time 0.001 (0.078)	Loss 2.3924 (2.3903)	Entropy 0.96099 (0.96181)	Top-1 acc 66.797 (66.771)	Top-5 acc 84.766 (85.573)	lr 0.00767
Train [76][530/3239]	Time 0.239 (0.737)	Data Time 0.001 (0.076)	Loss 2.2723 (2.3900)	Entropy 0.96095 (0.96179)	Top-1 acc 71.484 (66.784)	Top-5 acc 89.062 (85.573)	lr 0.00767
Train [76][540/3239]	Time 0.218 (0.732)	Data Time 0.001 (0.075)	Loss 2.5380 (2.3912)	Entropy 0.96095 (0.96177)	Top-1 acc 63.672 (66.750)	Top-5 acc 82.031 (85.550)	lr 0.00767
Train [76][550/3239]	Time 0.224 (0.728)	Data Time 0.001 (0.074)	Loss 2.3776 (2.3916)	Entropy 0.96089 (0.96176)	Top-1 acc 65.625 (66.733)	Top-5 acc 85.547 (85.538)	lr 0.00767
Train [76][560/3239]	Time 2.504 (0.723)	Data Time 0.001 (0.072)	Loss 2.5722 (2.3920)	Entropy 0.96089 (0.96174)	Top-1 acc 66.406 (66.729)	Top-5 acc 81.641 (85.530)	lr 0.00766
Train [76][570/3239]	Time 0.233 (0.715)	Data Time 0.001 (0.071)	Loss 2.4478 (2.3925)	Entropy 0.96092 (0.96173)	Top-1 acc 67.578 (66.723)	Top-5 acc 83.984 (85.524)	lr 0.00766
Train [76][580/3239]	Time 0.259 (0.711)	Data Time 0.003 (0.070)	Loss 2.4887 (2.3932)	Entropy 0.96091 (0.96171)	Top-1 acc 64.062 (66.687)	Top-5 acc 83.594 (85.513)	lr 0.00766
Train [76][590/3239]	Time 0.365 (0.707)	Data Time 0.001 (0.069)	Loss 2.4929 (2.3937)	Entropy 0.96089 (0.96170)	Top-1 acc 64.844 (66.671)	Top-5 acc 84.766 (85.511)	lr 0.00766
Train [76][600/3239]	Time 0.238 (0.704)	Data Time 0.001 (0.068)	Loss 2.2101 (2.3937)	Entropy 0.96089 (0.96169)	Top-1 acc 70.312 (66.672)	Top-5 acc 89.453 (85.517)	lr 0.00766
Train [76][610/3239]	Time 0.214 (0.700)	Data Time 0.001 (0.066)	Loss 2.4012 (2.3935)	Entropy 0.96085 (0.96167)	Top-1 acc 66.406 (66.670)	Top-5 acc 84.375 (85.519)	lr 0.00766
Train [76][620/3239]	Time 0.218 (0.696)	Data Time 0.001 (0.065)	Loss 2.3727 (2.3932)	Entropy 0.96083 (0.96166)	Top-1 acc 67.578 (66.668)	Top-5 acc 87.500 (85.519)	lr 0.00766
Train [76][630/3239]	Time 0.222 (0.693)	Data Time 0.001 (0.064)	Loss 2.4416 (2.3937)	Entropy 0.96078 (0.96165)	Top-1 acc 63.281 (66.652)	Top-5 acc 85.156 (85.511)	lr 0.00766
Train [76][640/3239]	Time 0.230 (0.689)	Data Time 0.011 (0.063)	Loss 3.2449 (2.3945)	Entropy 0.96074 (0.96163)	Top-1 acc 48.047 (66.640)	Top-5 acc 70.703 (85.489)	lr 0.00766
Train [76][650/3239]	Time 0.277 (0.686)	Data Time 0.001 (0.062)	Loss 2.4509 (2.3954)	Entropy 0.96064 (0.96162)	Top-1 acc 65.234 (66.613)	Top-5 acc 86.719 (85.486)	lr 0.00766
Train [76][660/3239]	Time 0.270 (0.683)	Data Time 0.001 (0.062)	Loss 2.3705 (2.3954)	Entropy 0.96091 (0.96161)	Top-1 acc 69.531 (66.610)	Top-5 acc 86.719 (85.487)	lr 0.00765
Train [76][670/3239]	Time 2.568 (0.680)	Data Time 0.001 (0.061)	Loss 2.3700 (2.3947)	Entropy 0.96091 (0.96159)	Top-1 acc 69.922 (66.637)	Top-5 acc 85.938 (85.500)	lr 0.00765
Train [76][680/3239]	Time 0.341 (0.674)	Data Time 0.001 (0.060)	Loss 2.3790 (2.3945)	Entropy 0.96145 (0.96159)	Top-1 acc 65.234 (66.637)	Top-5 acc 85.938 (85.503)	lr 0.00765
Train [76][690/3239]	Time 0.243 (0.671)	Data Time 0.001 (0.059)	Loss 2.3787 (2.3945)	Entropy 0.96143 (0.96159)	Top-1 acc 66.016 (66.637)	Top-5 acc 87.500 (85.505)	lr 0.00765
Train [76][700/3239]	Time 0.239 (0.669)	Data Time 0.001 (0.058)	Loss 2.6559 (2.3958)	Entropy 0.96144 (0.96159)	Top-1 acc 60.938 (66.607)	Top-5 acc 81.250 (85.481)	lr 0.00765
Train [76][710/3239]	Time 0.229 (0.666)	Data Time 0.001 (0.057)	Loss 2.3909 (2.3958)	Entropy 0.96149 (0.96159)	Top-1 acc 67.188 (66.599)	Top-5 acc 85.938 (85.486)	lr 0.00765
Train [76][720/3239]	Time 0.369 (0.664)	Data Time 0.002 (0.057)	Loss 2.3869 (2.3957)	Entropy 0.96149 (0.96159)	Top-1 acc 67.969 (66.608)	Top-5 acc 86.719 (85.488)	lr 0.00765
Train [76][730/3239]	Time 0.239 (0.661)	Data Time 0.002 (0.056)	Loss 2.2767 (2.3958)	Entropy 0.96147 (0.96158)	Top-1 acc 67.578 (66.605)	Top-5 acc 86.719 (85.488)	lr 0.00765
Train [76][740/3239]	Time 0.226 (0.659)	Data Time 0.001 (0.055)	Loss 2.4719 (2.3957)	Entropy 0.96143 (0.96158)	Top-1 acc 63.281 (66.612)	Top-5 acc 83.594 (85.481)	lr 0.00765
Train [76][750/3239]	Time 0.219 (0.657)	Data Time 0.002 (0.054)	Loss 2.3786 (2.3958)	Entropy 0.96138 (0.96158)	Top-1 acc 66.016 (66.604)	Top-5 acc 85.156 (85.484)	lr 0.00765
Train [76][760/3239]	Time 0.285 (0.654)	Data Time 0.001 (0.054)	Loss 2.3138 (2.3957)	Entropy 0.96132 (0.96158)	Top-1 acc 66.797 (66.604)	Top-5 acc 87.500 (85.480)	lr 0.00765
Train [76][770/3239]	Time 0.253 (0.652)	Data Time 0.001 (0.053)	Loss 2.3561 (2.3953)	Entropy 0.96127 (0.96157)	Top-1 acc 67.188 (66.609)	Top-5 acc 86.719 (85.492)	lr 0.00764
Train [76][780/3239]	Time 2.522 (0.650)	Data Time 0.001 (0.052)	Loss 2.3198 (2.3954)	Entropy 0.96127 (0.96157)	Top-1 acc 66.797 (66.602)	Top-5 acc 85.938 (85.484)	lr 0.00764
Train [76][790/3239]	Time 0.223 (0.645)	Data Time 0.001 (0.052)	Loss 2.4437 (2.3956)	Entropy 0.96132 (0.96157)	Top-1 acc 61.719 (66.583)	Top-5 acc 84.766 (85.478)	lr 0.00764
Train [76][800/3239]	Time 0.245 (0.643)	Data Time 0.001 (0.051)	Loss 2.3320 (2.3952)	Entropy 0.96131 (0.96156)	Top-1 acc 67.969 (66.592)	Top-5 acc 83.594 (85.477)	lr 0.00764
Train [76][810/3239]	Time 0.344 (0.640)	Data Time 0.001 (0.050)	Loss 2.3908 (2.3955)	Entropy 0.96122 (0.96156)	Top-1 acc 66.016 (66.589)	Top-5 acc 85.156 (85.472)	lr 0.00764
Train [76][820/3239]	Time 0.234 (0.639)	Data Time 0.002 (0.050)	Loss 2.4074 (2.3956)	Entropy 0.96122 (0.96156)	Top-1 acc 63.281 (66.584)	Top-5 acc 86.328 (85.469)	lr 0.00764
Train [76][830/3239]	Time 0.238 (0.636)	Data Time 0.001 (0.049)	Loss 2.4425 (2.3959)	Entropy 0.96112 (0.96155)	Top-1 acc 61.719 (66.564)	Top-5 acc 84.375 (85.462)	lr 0.00764
Train [76][840/3239]	Time 0.233 (0.700)	Data Time 0.002 (0.049)	Loss 2.3419 (2.3960)	Entropy 0.96118 (0.96155)	Top-1 acc 68.359 (66.562)	Top-5 acc 87.500 (85.455)	lr 0.00764
Train [76][850/3239]	Time 0.235 (0.698)	Data Time 0.002 (0.048)	Loss 2.5584 (2.3964)	Entropy 0.96116 (0.96154)	Top-1 acc 64.062 (66.554)	Top-5 acc 82.422 (85.443)	lr 0.00764
Train [76][860/3239]	Time 0.263 (0.696)	Data Time 0.001 (0.048)	Loss 2.4147 (2.3962)	Entropy 0.96109 (0.96154)	Top-1 acc 65.625 (66.553)	Top-5 acc 88.281 (85.457)	lr 0.00764
Train [76][870/3239]	Time 0.271 (0.693)	Data Time 0.001 (0.047)	Loss 2.3720 (2.3963)	Entropy 0.96103 (0.96153)	Top-1 acc 65.234 (66.546)	Top-5 acc 85.938 (85.458)	lr 0.00764
Train [76][880/3239]	Time 0.243 (0.691)	Data Time 0.002 (0.047)	Loss 2.2802 (2.3961)	Entropy 0.96101 (0.96153)	Top-1 acc 68.359 (66.551)	Top-5 acc 87.891 (85.462)	lr 0.00763
Train [76][890/3239]	Time 2.473 (0.688)	Data Time 0.002 (0.046)	Loss 2.4862 (2.3966)	Entropy 0.96101 (0.96152)	Top-1 acc 63.672 (66.532)	Top-5 acc 83.984 (85.459)	lr 0.00763
Train [76][900/3239]	Time 0.341 (0.684)	Data Time 0.001 (0.046)	Loss 2.3707 (2.3966)	Entropy 0.96104 (0.96151)	Top-1 acc 64.844 (66.521)	Top-5 acc 87.500 (85.455)	lr 0.00763
Train [76][910/3239]	Time 0.241 (0.681)	Data Time 0.002 (0.045)	Loss 2.4790 (2.3966)	Entropy 0.96103 (0.96151)	Top-1 acc 67.188 (66.525)	Top-5 acc 83.203 (85.456)	lr 0.00763
Train [76][920/3239]	Time 0.232 (0.679)	Data Time 0.001 (0.045)	Loss 2.4269 (2.3965)	Entropy 0.96104 (0.96150)	Top-1 acc 64.453 (66.533)	Top-5 acc 85.938 (85.458)	lr 0.00763
Train [76][930/3239]	Time 0.228 (0.677)	Data Time 0.001 (0.044)	Loss 2.5107 (2.3965)	Entropy 0.96097 (0.96150)	Top-1 acc 64.453 (66.528)	Top-5 acc 82.422 (85.455)	lr 0.00763
Train [76][940/3239]	Time 0.213 (0.675)	Data Time 0.001 (0.044)	Loss 2.5413 (2.3967)	Entropy 0.96098 (0.96149)	Top-1 acc 63.672 (66.524)	Top-5 acc 82.812 (85.460)	lr 0.00763
Train [76][950/3239]	Time 0.240 (0.673)	Data Time 0.001 (0.043)	Loss 2.2904 (2.3965)	Entropy 0.96093 (0.96149)	Top-1 acc 67.188 (66.539)	Top-5 acc 90.234 (85.467)	lr 0.00763
Train [76][960/3239]	Time 0.249 (0.671)	Data Time 0.001 (0.043)	Loss 2.5836 (2.3975)	Entropy 0.96091 (0.96148)	Top-1 acc 61.328 (66.508)	Top-5 acc 82.031 (85.444)	lr 0.00763
Train [76][970/3239]	Time 0.274 (0.669)	Data Time 0.001 (0.042)	Loss 2.4340 (2.3974)	Entropy 0.96091 (0.96148)	Top-1 acc 61.328 (66.512)	Top-5 acc 86.328 (85.454)	lr 0.00763
Train [76][980/3239]	Time 0.213 (0.667)	Data Time 0.001 (0.042)	Loss 2.3001 (2.3969)	Entropy 0.96082 (0.96147)	Top-1 acc 67.969 (66.534)	Top-5 acc 87.891 (85.459)	lr 0.00763
Train [76][990/3239]	Time 0.325 (0.665)	Data Time 0.001 (0.042)	Loss 2.2399 (2.3967)	Entropy 0.96073 (0.96146)	Top-1 acc 67.969 (66.524)	Top-5 acc 89.453 (85.467)	lr 0.00762
Train [76][1000/3239]	Time 2.606 (0.663)	Data Time 0.001 (0.041)	Loss 2.2936 (2.3966)	Entropy 0.96073 (0.96146)	Top-1 acc 67.578 (66.523)	Top-5 acc 87.500 (85.465)	lr 0.00762
Train [76][1010/3239]	Time 0.229 (0.659)	Data Time 0.001 (0.041)	Loss 2.2584 (2.3968)	Entropy 0.96068 (0.96145)	Top-1 acc 69.531 (66.521)	Top-5 acc 86.328 (85.459)	lr 0.00762
Train [76][1020/3239]	Time 0.225 (0.657)	Data Time 0.001 (0.040)	Loss 3.1523 (2.3973)	Entropy 0.96058 (0.96144)	Top-1 acc 46.875 (66.502)	Top-5 acc 72.266 (85.452)	lr 0.00762
Train [76][1030/3239]	Time 0.236 (0.656)	Data Time 0.001 (0.040)	Loss 2.4996 (2.3977)	Entropy 0.96053 (0.96143)	Top-1 acc 64.062 (66.491)	Top-5 acc 83.203 (85.446)	lr 0.00762
Train [76][1040/3239]	Time 0.255 (0.654)	Data Time 0.001 (0.040)	Loss 2.3788 (2.3980)	Entropy 0.96041 (0.96142)	Top-1 acc 64.453 (66.473)	Top-5 acc 85.938 (85.444)	lr 0.00762
Train [76][1050/3239]	Time 0.241 (0.652)	Data Time 0.001 (0.039)	Loss 2.3414 (2.3984)	Entropy 0.96038 (0.96141)	Top-1 acc 67.188 (66.465)	Top-5 acc 86.719 (85.435)	lr 0.00762
Train [76][1060/3239]	Time 0.239 (0.651)	Data Time 0.002 (0.039)	Loss 2.3006 (2.3986)	Entropy 0.96031 (0.96140)	Top-1 acc 67.188 (66.451)	Top-5 acc 88.281 (85.427)	lr 0.00762
Train [76][1070/3239]	Time 0.242 (0.649)	Data Time 0.002 (0.039)	Loss 2.7041 (2.3985)	Entropy 0.96032 (0.96139)	Top-1 acc 57.812 (66.451)	Top-5 acc 82.031 (85.438)	lr 0.00762
Train [76][1080/3239]	Time 0.329 (0.648)	Data Time 0.002 (0.038)	Loss 2.6320 (2.3994)	Entropy 0.96033 (0.96138)	Top-1 acc 60.156 (66.430)	Top-5 acc 81.250 (85.428)	lr 0.00762
Train [76][1090/3239]	Time 0.230 (0.646)	Data Time 0.001 (0.038)	Loss 2.5267 (2.3997)	Entropy 0.96033 (0.96137)	Top-1 acc 64.844 (66.427)	Top-5 acc 80.859 (85.431)	lr 0.00761
Train [76][1100/3239]	Time 0.298 (0.645)	Data Time 0.001 (0.038)	Loss 2.3724 (2.3996)	Entropy 0.96029 (0.96136)	Top-1 acc 66.406 (66.426)	Top-5 acc 87.891 (85.436)	lr 0.00761
Train [76][1110/3239]	Time 2.547 (0.643)	Data Time 0.002 (0.037)	Loss 2.4470 (2.3998)	Entropy 0.96029 (0.96135)	Top-1 acc 65.234 (66.425)	Top-5 acc 84.766 (85.434)	lr 0.00761
Train [76][1120/3239]	Time 0.236 (0.640)	Data Time 0.001 (0.037)	Loss 2.5062 (2.3999)	Entropy 0.96031 (0.96134)	Top-1 acc 62.109 (66.420)	Top-5 acc 85.547 (85.429)	lr 0.00761
Train [76][1130/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.037)	Loss 2.2028 (2.3999)	Entropy 0.96034 (0.96133)	Top-1 acc 71.484 (66.422)	Top-5 acc 89.844 (85.430)	lr 0.00761
Train [76][1140/3239]	Time 0.228 (0.637)	Data Time 0.001 (0.036)	Loss 2.2442 (2.3996)	Entropy 0.96032 (0.96133)	Top-1 acc 70.312 (66.420)	Top-5 acc 88.281 (85.439)	lr 0.00761
Train [76][1150/3239]	Time 0.242 (0.636)	Data Time 0.002 (0.036)	Loss 2.3170 (2.4002)	Entropy 0.96031 (0.96132)	Top-1 acc 71.094 (66.407)	Top-5 acc 86.719 (85.429)	lr 0.00761
Train [76][1160/3239]	Time 0.226 (0.634)	Data Time 0.001 (0.036)	Loss 2.2865 (2.4002)	Entropy 0.96024 (0.96131)	Top-1 acc 70.703 (66.414)	Top-5 acc 89.062 (85.432)	lr 0.00761
Train [76][1170/3239]	Time 0.319 (0.633)	Data Time 0.001 (0.035)	Loss 2.4055 (2.3998)	Entropy 0.96023 (0.96130)	Top-1 acc 68.359 (66.423)	Top-5 acc 84.766 (85.432)	lr 0.00761
Train [76][1180/3239]	Time 0.243 (0.632)	Data Time 0.001 (0.035)	Loss 2.4961 (2.4000)	Entropy 0.96018 (0.96129)	Top-1 acc 64.062 (66.411)	Top-5 acc 83.984 (85.432)	lr 0.00761
Train [76][1190/3239]	Time 0.216 (0.631)	Data Time 0.001 (0.035)	Loss 2.4419 (2.4001)	Entropy 0.96016 (0.96128)	Top-1 acc 62.891 (66.406)	Top-5 acc 85.938 (85.426)	lr 0.00761
Train [76][1200/3239]	Time 0.263 (0.673)	Data Time 0.002 (0.035)	Loss 2.3356 (2.4008)	Entropy 0.96013 (0.96127)	Top-1 acc 69.141 (66.390)	Top-5 acc 87.109 (85.416)	lr 0.00760
Train [76][1210/3239]	Time 0.273 (0.672)	Data Time 0.003 (0.034)	Loss 2.3826 (2.4007)	Entropy 0.96012 (0.96126)	Top-1 acc 65.625 (66.386)	Top-5 acc 84.766 (85.415)	lr 0.00760
Train [76][1220/3239]	Time 2.548 (0.670)	Data Time 0.002 (0.034)	Loss 2.4691 (2.4008)	Entropy 0.96012 (0.96125)	Top-1 acc 65.625 (66.385)	Top-5 acc 83.594 (85.412)	lr 0.00760
Train [76][1230/3239]	Time 0.232 (0.667)	Data Time 0.002 (0.034)	Loss 2.4749 (2.4011)	Entropy 0.96013 (0.96124)	Top-1 acc 64.844 (66.372)	Top-5 acc 83.594 (85.405)	lr 0.00760
Train [76][1240/3239]	Time 0.238 (0.665)	Data Time 0.002 (0.034)	Loss 2.4887 (2.4008)	Entropy 0.96011 (0.96123)	Top-1 acc 65.234 (66.378)	Top-5 acc 83.203 (85.413)	lr 0.00760
Train [76][1250/3239]	Time 0.220 (0.664)	Data Time 0.002 (0.033)	Loss 2.3019 (2.4011)	Entropy 0.96009 (0.96122)	Top-1 acc 71.484 (66.373)	Top-5 acc 86.328 (85.406)	lr 0.00760
Train [76][1260/3239]	Time 0.264 (0.662)	Data Time 0.001 (0.033)	Loss 2.2979 (2.4010)	Entropy 0.96006 (0.96122)	Top-1 acc 70.703 (66.380)	Top-5 acc 88.281 (85.410)	lr 0.00760
Train [76][1270/3239]	Time 0.252 (0.661)	Data Time 0.001 (0.033)	Loss 2.6168 (2.4012)	Entropy 0.96004 (0.96121)	Top-1 acc 61.719 (66.373)	Top-5 acc 81.641 (85.409)	lr 0.00760
Train [76][1280/3239]	Time 0.233 (0.660)	Data Time 0.001 (0.033)	Loss 2.3065 (2.4008)	Entropy 0.96001 (0.96120)	Top-1 acc 66.016 (66.387)	Top-5 acc 88.281 (85.416)	lr 0.00760
Train [76][1290/3239]	Time 0.245 (0.659)	Data Time 0.001 (0.032)	Loss 2.4357 (2.4009)	Entropy 0.95991 (0.96119)	Top-1 acc 67.578 (66.391)	Top-5 acc 85.547 (85.411)	lr 0.00760
Train [76][1300/3239]	Time 0.264 (0.657)	Data Time 0.001 (0.032)	Loss 2.3520 (2.4008)	Entropy 0.95990 (0.96118)	Top-1 acc 67.578 (66.391)	Top-5 acc 85.156 (85.410)	lr 0.00760
Train [76][1310/3239]	Time 0.229 (0.656)	Data Time 0.001 (0.032)	Loss 2.5609 (2.4010)	Entropy 0.95984 (0.96117)	Top-1 acc 58.203 (66.383)	Top-5 acc 82.422 (85.410)	lr 0.00759
Train [76][1320/3239]	Time 0.233 (0.655)	Data Time 0.001 (0.032)	Loss 2.3258 (2.4012)	Entropy 0.95981 (0.96116)	Top-1 acc 68.750 (66.378)	Top-5 acc 86.328 (85.411)	lr 0.00759
Train [76][1330/3239]	Time 2.525 (0.653)	Data Time 0.001 (0.031)	Loss 2.5691 (2.4016)	Entropy 0.95981 (0.96115)	Top-1 acc 63.281 (66.364)	Top-5 acc 83.594 (85.404)	lr 0.00759
Train [76][1340/3239]	Time 0.276 (0.650)	Data Time 0.002 (0.031)	Loss 2.3705 (2.4017)	Entropy 0.95979 (0.96114)	Top-1 acc 67.188 (66.366)	Top-5 acc 85.156 (85.397)	lr 0.00759
Train [76][1350/3239]	Time 0.412 (0.649)	Data Time 0.005 (0.031)	Loss 2.3996 (2.4014)	Entropy 0.95984 (0.96113)	Top-1 acc 67.969 (66.368)	Top-5 acc 83.594 (85.401)	lr 0.00759
Train [76][1360/3239]	Time 0.273 (0.648)	Data Time 0.001 (0.031)	Loss 2.3695 (2.4017)	Entropy 0.95985 (0.96112)	Top-1 acc 71.094 (66.361)	Top-5 acc 86.719 (85.396)	lr 0.00759
Train [76][1370/3239]	Time 0.218 (0.647)	Data Time 0.001 (0.031)	Loss 2.5899 (2.4014)	Entropy 0.95981 (0.96111)	Top-1 acc 61.328 (66.365)	Top-5 acc 82.812 (85.398)	lr 0.00759
Train [76][1380/3239]	Time 0.288 (0.646)	Data Time 0.002 (0.030)	Loss 2.4812 (2.4012)	Entropy 0.95967 (0.96110)	Top-1 acc 64.844 (66.371)	Top-5 acc 84.766 (85.406)	lr 0.00759
Train [76][1390/3239]	Time 0.351 (0.645)	Data Time 0.003 (0.030)	Loss 2.2990 (2.4008)	Entropy 0.95957 (0.96109)	Top-1 acc 68.359 (66.381)	Top-5 acc 87.500 (85.411)	lr 0.00759
Train [76][1400/3239]	Time 0.205 (0.644)	Data Time 0.001 (0.030)	Loss 2.4428 (2.4012)	Entropy 0.95957 (0.96108)	Top-1 acc 67.969 (66.369)	Top-5 acc 83.984 (85.401)	lr 0.00759
Train [76][1410/3239]	Time 0.242 (0.642)	Data Time 0.001 (0.030)	Loss 2.3807 (2.4012)	Entropy 0.95953 (0.96107)	Top-1 acc 66.016 (66.365)	Top-5 acc 82.812 (85.402)	lr 0.00759
Train [76][1420/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.030)	Loss 2.3183 (2.4013)	Entropy 0.95952 (0.96106)	Top-1 acc 71.875 (66.371)	Top-5 acc 83.203 (85.399)	lr 0.00758
Train [76][1430/3239]	Time 0.269 (0.640)	Data Time 0.001 (0.029)	Loss 2.3300 (2.4013)	Entropy 0.95948 (0.96105)	Top-1 acc 67.578 (66.374)	Top-5 acc 87.891 (85.401)	lr 0.00758
Train [76][1440/3239]	Time 2.485 (0.639)	Data Time 0.001 (0.029)	Loss 2.3172 (2.4015)	Entropy 0.95948 (0.96103)	Top-1 acc 69.531 (66.375)	Top-5 acc 84.766 (85.397)	lr 0.00758
Train [76][1450/3239]	Time 0.230 (0.636)	Data Time 0.001 (0.029)	Loss 2.4540 (2.4014)	Entropy 0.95944 (0.96102)	Top-1 acc 66.406 (66.373)	Top-5 acc 86.719 (85.401)	lr 0.00758
Train [76][1460/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.029)	Loss 2.4519 (2.4015)	Entropy 0.95946 (0.96101)	Top-1 acc 63.281 (66.378)	Top-5 acc 82.812 (85.397)	lr 0.00758
Train [76][1470/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.029)	Loss 2.3517 (2.4018)	Entropy 0.95946 (0.96100)	Top-1 acc 69.141 (66.372)	Top-5 acc 85.156 (85.393)	lr 0.00758
Train [76][1480/3239]	Time 0.264 (0.633)	Data Time 0.001 (0.028)	Loss 2.3642 (2.4018)	Entropy 0.95943 (0.96099)	Top-1 acc 65.625 (66.376)	Top-5 acc 87.500 (85.397)	lr 0.00758
Train [76][1490/3239]	Time 0.239 (0.632)	Data Time 0.001 (0.028)	Loss 2.3002 (2.4014)	Entropy 0.95942 (0.96098)	Top-1 acc 67.969 (66.387)	Top-5 acc 89.844 (85.405)	lr 0.00758
Train [76][1500/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.028)	Loss 2.2062 (2.4013)	Entropy 0.95941 (0.96097)	Top-1 acc 70.703 (66.392)	Top-5 acc 89.062 (85.407)	lr 0.00758
Train [76][1510/3239]	Time 0.226 (0.630)	Data Time 0.002 (0.028)	Loss 2.4303 (2.4011)	Entropy 0.95943 (0.96096)	Top-1 acc 59.766 (66.388)	Top-5 acc 86.328 (85.412)	lr 0.00758
Train [76][1520/3239]	Time 0.210 (0.629)	Data Time 0.001 (0.028)	Loss 2.4688 (2.4011)	Entropy 0.95937 (0.96095)	Top-1 acc 65.234 (66.387)	Top-5 acc 83.594 (85.412)	lr 0.00757
Train [76][1530/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.028)	Loss 2.2717 (2.4012)	Entropy 0.95927 (0.96094)	Top-1 acc 69.922 (66.386)	Top-5 acc 90.234 (85.410)	lr 0.00757
Train [76][1540/3239]	Time 0.327 (0.627)	Data Time 0.002 (0.027)	Loss 2.4212 (2.4013)	Entropy 0.95932 (0.96093)	Top-1 acc 64.844 (66.378)	Top-5 acc 85.547 (85.409)	lr 0.00757
Train [76][1550/3239]	Time 2.552 (0.626)	Data Time 0.001 (0.027)	Loss 2.3962 (2.4013)	Entropy 0.95932 (0.96092)	Top-1 acc 63.672 (66.372)	Top-5 acc 87.109 (85.412)	lr 0.00757
Train [76][1560/3239]	Time 0.245 (0.624)	Data Time 0.001 (0.027)	Loss 2.4014 (2.4011)	Entropy 0.95935 (0.96091)	Top-1 acc 65.625 (66.374)	Top-5 acc 85.547 (85.419)	lr 0.00757
Train [76][1570/3239]	Time 0.473 (0.657)	Data Time 0.003 (0.027)	Loss 2.4079 (2.4012)	Entropy 0.95934 (0.96090)	Top-1 acc 68.750 (66.377)	Top-5 acc 84.375 (85.420)	lr 0.00757
Train [76][1580/3239]	Time 0.235 (0.656)	Data Time 0.002 (0.027)	Loss 2.4202 (2.4013)	Entropy 0.95936 (0.96089)	Top-1 acc 67.188 (66.372)	Top-5 acc 85.547 (85.414)	lr 0.00757
Train [76][1590/3239]	Time 0.234 (0.654)	Data Time 0.001 (0.027)	Loss 2.4464 (2.4013)	Entropy 0.95931 (0.96088)	Top-1 acc 63.281 (66.375)	Top-5 acc 82.812 (85.415)	lr 0.00757
Train [76][1600/3239]	Time 0.209 (0.653)	Data Time 0.001 (0.026)	Loss 2.4395 (2.4013)	Entropy 0.95932 (0.96087)	Top-1 acc 66.016 (66.372)	Top-5 acc 83.594 (85.418)	lr 0.00757
Train [76][1610/3239]	Time 0.254 (0.652)	Data Time 0.001 (0.026)	Loss 2.5638 (2.4013)	Entropy 0.95932 (0.96086)	Top-1 acc 61.328 (66.367)	Top-5 acc 83.984 (85.419)	lr 0.00757
Train [76][1620/3239]	Time 0.218 (0.651)	Data Time 0.001 (0.026)	Loss 2.4741 (2.4013)	Entropy 0.95931 (0.96085)	Top-1 acc 66.016 (66.359)	Top-5 acc 82.812 (85.421)	lr 0.00757
Train [76][1630/3239]	Time 0.234 (0.650)	Data Time 0.001 (0.026)	Loss 2.2446 (2.4013)	Entropy 0.95921 (0.96084)	Top-1 acc 69.922 (66.362)	Top-5 acc 86.719 (85.417)	lr 0.00756
Train [76][1640/3239]	Time 0.229 (0.649)	Data Time 0.001 (0.026)	Loss 2.5971 (2.4014)	Entropy 0.95905 (0.96083)	Top-1 acc 62.891 (66.366)	Top-5 acc 79.688 (85.411)	lr 0.00756
Train [76][1650/3239]	Time 0.230 (0.648)	Data Time 0.001 (0.026)	Loss 2.2573 (2.4014)	Entropy 0.95905 (0.96082)	Top-1 acc 68.750 (66.370)	Top-5 acc 90.625 (85.413)	lr 0.00756
Train [76][1660/3239]	Time 2.674 (0.647)	Data Time 0.001 (0.026)	Loss 2.2872 (2.4013)	Entropy 0.95905 (0.96081)	Top-1 acc 67.969 (66.367)	Top-5 acc 87.891 (85.415)	lr 0.00756
Train [76][1670/3239]	Time 0.225 (0.645)	Data Time 0.001 (0.025)	Loss 2.5038 (2.4017)	Entropy 0.95898 (0.96080)	Top-1 acc 62.891 (66.349)	Top-5 acc 82.422 (85.408)	lr 0.00756
Train [76][1680/3239]	Time 0.237 (0.644)	Data Time 0.001 (0.025)	Loss 2.3051 (2.4014)	Entropy 0.95890 (0.96079)	Top-1 acc 67.969 (66.351)	Top-5 acc 88.281 (85.417)	lr 0.00756
Train [76][1690/3239]	Time 0.241 (0.643)	Data Time 0.002 (0.025)	Loss 2.4128 (2.4016)	Entropy 0.95887 (0.96078)	Top-1 acc 65.625 (66.345)	Top-5 acc 87.109 (85.416)	lr 0.00756
Train [76][1700/3239]	Time 0.332 (0.642)	Data Time 0.001 (0.025)	Loss 2.3816 (2.4013)	Entropy 0.95886 (0.96076)	Top-1 acc 67.578 (66.356)	Top-5 acc 85.156 (85.421)	lr 0.00756
Train [76][1710/3239]	Time 0.239 (0.641)	Data Time 0.002 (0.025)	Loss 2.2919 (2.4013)	Entropy 0.95885 (0.96075)	Top-1 acc 70.312 (66.359)	Top-5 acc 89.062 (85.418)	lr 0.00756
Train [76][1720/3239]	Time 0.211 (0.640)	Data Time 0.001 (0.025)	Loss 2.2495 (2.4014)	Entropy 0.95879 (0.96074)	Top-1 acc 69.922 (66.358)	Top-5 acc 89.062 (85.421)	lr 0.00756
Train [76][1730/3239]	Time 0.235 (0.639)	Data Time 0.001 (0.025)	Loss 2.4043 (2.4015)	Entropy 0.95867 (0.96073)	Top-1 acc 64.844 (66.352)	Top-5 acc 84.375 (85.419)	lr 0.00756
Train [76][1740/3239]	Time 0.257 (0.638)	Data Time 0.001 (0.024)	Loss 2.4146 (2.4014)	Entropy 0.95864 (0.96072)	Top-1 acc 65.234 (66.352)	Top-5 acc 85.938 (85.419)	lr 0.00755
Train [76][1750/3239]	Time 0.246 (0.637)	Data Time 0.001 (0.024)	Loss 2.4818 (2.4015)	Entropy 0.95858 (0.96071)	Top-1 acc 64.453 (66.349)	Top-5 acc 83.594 (85.416)	lr 0.00755
Train [76][1760/3239]	Time 0.214 (0.636)	Data Time 0.001 (0.024)	Loss 2.4088 (2.4015)	Entropy 0.95855 (0.96069)	Top-1 acc 67.969 (66.357)	Top-5 acc 83.594 (85.414)	lr 0.00755
Train [76][1770/3239]	Time 2.534 (0.635)	Data Time 0.002 (0.024)	Loss 2.4057 (2.4018)	Entropy 0.95855 (0.96068)	Top-1 acc 66.406 (66.347)	Top-5 acc 85.547 (85.411)	lr 0.00755
Train [76][1780/3239]	Time 0.227 (0.633)	Data Time 0.001 (0.024)	Loss 2.3686 (2.4014)	Entropy 0.95849 (0.96067)	Top-1 acc 69.141 (66.356)	Top-5 acc 85.156 (85.416)	lr 0.00755
Train [76][1790/3239]	Time 0.313 (0.632)	Data Time 0.001 (0.024)	Loss 2.4335 (2.4013)	Entropy 0.95838 (0.96066)	Top-1 acc 65.234 (66.356)	Top-5 acc 85.547 (85.420)	lr 0.00755
Train [76][1800/3239]	Time 0.256 (0.631)	Data Time 0.001 (0.024)	Loss 2.4706 (2.4012)	Entropy 0.95816 (0.96064)	Top-1 acc 63.281 (66.359)	Top-5 acc 85.938 (85.426)	lr 0.00755
Train [76][1810/3239]	Time 0.225 (0.630)	Data Time 0.001 (0.024)	Loss 2.3347 (2.4011)	Entropy 0.95816 (0.96063)	Top-1 acc 68.359 (66.360)	Top-5 acc 87.891 (85.427)	lr 0.00755
Train [76][1820/3239]	Time 0.253 (0.629)	Data Time 0.001 (0.023)	Loss 2.2662 (2.4011)	Entropy 0.95806 (0.96062)	Top-1 acc 72.656 (66.355)	Top-5 acc 88.672 (85.425)	lr 0.00755
Train [76][1830/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.023)	Loss 2.5556 (2.4009)	Entropy 0.95796 (0.96060)	Top-1 acc 61.328 (66.362)	Top-5 acc 81.250 (85.427)	lr 0.00755
Train [76][1840/3239]	Time 0.240 (0.628)	Data Time 0.001 (0.023)	Loss 2.4888 (2.4010)	Entropy 0.95793 (0.96059)	Top-1 acc 59.375 (66.353)	Top-5 acc 84.375 (85.426)	lr 0.00755
Train [76][1850/3239]	Time 0.226 (0.627)	Data Time 0.001 (0.023)	Loss 2.4970 (2.4011)	Entropy 0.95789 (0.96057)	Top-1 acc 63.281 (66.351)	Top-5 acc 85.938 (85.426)	lr 0.00754
Train [76][1860/3239]	Time 0.244 (0.626)	Data Time 0.002 (0.023)	Loss 2.1972 (2.4012)	Entropy 0.95783 (0.96056)	Top-1 acc 73.438 (66.350)	Top-5 acc 89.062 (85.422)	lr 0.00754
Train [76][1870/3239]	Time 0.210 (0.625)	Data Time 0.001 (0.023)	Loss 2.4844 (2.4012)	Entropy 0.95778 (0.96054)	Top-1 acc 65.625 (66.352)	Top-5 acc 82.031 (85.416)	lr 0.00754
Train [76][1880/3239]	Time 2.689 (0.625)	Data Time 0.001 (0.023)	Loss 2.3123 (2.4012)	Entropy 0.95778 (0.96053)	Top-1 acc 68.359 (66.352)	Top-5 acc 87.109 (85.414)	lr 0.00754
Train [76][1890/3239]	Time 0.234 (0.623)	Data Time 0.001 (0.023)	Loss 2.4124 (2.4011)	Entropy 0.95776 (0.96051)	Top-1 acc 62.891 (66.355)	Top-5 acc 85.156 (85.413)	lr 0.00754
Train [76][1900/3239]	Time 0.233 (0.622)	Data Time 0.001 (0.022)	Loss 2.4573 (2.4012)	Entropy 0.95776 (0.96050)	Top-1 acc 63.672 (66.351)	Top-5 acc 84.375 (85.407)	lr 0.00754
Train [76][1910/3239]	Time 0.215 (0.621)	Data Time 0.001 (0.022)	Loss 2.4818 (2.4012)	Entropy 0.95770 (0.96049)	Top-1 acc 60.547 (66.348)	Top-5 acc 83.984 (85.406)	lr 0.00754
Train [76][1920/3239]	Time 0.249 (0.620)	Data Time 0.001 (0.022)	Loss 2.4946 (2.4012)	Entropy 0.95769 (0.96047)	Top-1 acc 64.062 (66.349)	Top-5 acc 82.812 (85.403)	lr 0.00754
Train [76][1930/3239]	Time 0.240 (0.647)	Data Time 0.002 (0.022)	Loss 2.4849 (2.4013)	Entropy 0.95767 (0.96046)	Top-1 acc 65.234 (66.350)	Top-5 acc 84.766 (85.401)	lr 0.00754
Train [76][1940/3239]	Time 0.223 (0.647)	Data Time 0.002 (0.022)	Loss 2.4532 (2.4017)	Entropy 0.95767 (0.96044)	Top-1 acc 66.016 (66.342)	Top-5 acc 85.156 (85.393)	lr 0.00754
Train [76][1950/3239]	Time 0.225 (0.646)	Data Time 0.002 (0.022)	Loss 2.3324 (2.4018)	Entropy 0.95767 (0.96043)	Top-1 acc 67.188 (66.338)	Top-5 acc 87.500 (85.390)	lr 0.00754
Train [76][1960/3239]	Time 0.230 (0.645)	Data Time 0.002 (0.022)	Loss 2.4678 (2.4020)	Entropy 0.95766 (0.96041)	Top-1 acc 65.234 (66.329)	Top-5 acc 85.547 (85.386)	lr 0.00753
Train [76][1970/3239]	Time 0.243 (0.644)	Data Time 0.002 (0.022)	Loss 2.4016 (2.4019)	Entropy 0.95761 (0.96040)	Top-1 acc 65.234 (66.326)	Top-5 acc 84.375 (85.386)	lr 0.00753
Train [76][1980/3239]	Time 0.242 (0.643)	Data Time 0.001 (0.022)	Loss 2.5683 (2.4020)	Entropy 0.95753 (0.96039)	Top-1 acc 63.281 (66.323)	Top-5 acc 82.031 (85.384)	lr 0.00753
Train [76][1990/3239]	Time 2.491 (0.642)	Data Time 0.001 (0.022)	Loss 2.2515 (2.4023)	Entropy 0.95753 (0.96037)	Top-1 acc 70.703 (66.318)	Top-5 acc 88.672 (85.378)	lr 0.00753
Train [76][2000/3239]	Time 0.247 (0.640)	Data Time 0.002 (0.021)	Loss 2.6282 (2.4024)	Entropy 0.95750 (0.96036)	Top-1 acc 59.375 (66.313)	Top-5 acc 82.812 (85.376)	lr 0.00753
Train [76][2010/3239]	Time 0.325 (0.639)	Data Time 0.001 (0.021)	Loss 2.6634 (2.4022)	Entropy 0.95744 (0.96034)	Top-1 acc 57.422 (66.320)	Top-5 acc 82.031 (85.382)	lr 0.00753
Train [76][2020/3239]	Time 0.203 (0.639)	Data Time 0.001 (0.021)	Loss 2.4273 (2.4022)	Entropy 0.95747 (0.96033)	Top-1 acc 65.234 (66.316)	Top-5 acc 83.984 (85.378)	lr 0.00753
Train [76][2030/3239]	Time 0.233 (0.638)	Data Time 0.006 (0.021)	Loss 2.4774 (2.4026)	Entropy 0.95745 (0.96031)	Top-1 acc 65.234 (66.313)	Top-5 acc 82.422 (85.367)	lr 0.00753
Train [76][2040/3239]	Time 0.264 (0.637)	Data Time 0.001 (0.021)	Loss 2.3269 (2.4025)	Entropy 0.95740 (0.96030)	Top-1 acc 66.797 (66.316)	Top-5 acc 87.109 (85.371)	lr 0.00753
Train [76][2050/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.021)	Loss 2.4761 (2.4025)	Entropy 0.95742 (0.96029)	Top-1 acc 62.500 (66.310)	Top-5 acc 85.156 (85.371)	lr 0.00753
Train [76][2060/3239]	Time 0.224 (0.635)	Data Time 0.001 (0.021)	Loss 2.3947 (2.4024)	Entropy 0.95733 (0.96027)	Top-1 acc 65.625 (66.310)	Top-5 acc 83.984 (85.371)	lr 0.00752
Train [76][2070/3239]	Time 0.234 (0.635)	Data Time 0.002 (0.021)	Loss 2.5222 (2.4023)	Entropy 0.95722 (0.96026)	Top-1 acc 64.844 (66.308)	Top-5 acc 82.031 (85.373)	lr 0.00752
Train [76][2080/3239]	Time 0.279 (0.634)	Data Time 0.001 (0.021)	Loss 2.5301 (2.4023)	Entropy 0.95718 (0.96024)	Top-1 acc 64.062 (66.302)	Top-5 acc 82.422 (85.374)	lr 0.00752
Train [76][2090/3239]	Time 0.265 (0.633)	Data Time 0.001 (0.021)	Loss 2.3012 (2.4025)	Entropy 0.95717 (0.96023)	Top-1 acc 66.016 (66.300)	Top-5 acc 87.500 (85.370)	lr 0.00752
Train [76][2100/3239]	Time 2.539 (0.632)	Data Time 0.002 (0.021)	Loss 2.4277 (2.4025)	Entropy 0.95717 (0.96021)	Top-1 acc 65.234 (66.308)	Top-5 acc 85.156 (85.368)	lr 0.00752
Train [76][2110/3239]	Time 0.229 (0.631)	Data Time 0.001 (0.020)	Loss 2.3888 (2.4026)	Entropy 0.95699 (0.96020)	Top-1 acc 67.188 (66.301)	Top-5 acc 85.156 (85.363)	lr 0.00752
Train [76][2120/3239]	Time 0.237 (0.630)	Data Time 0.001 (0.020)	Loss 2.2472 (2.4024)	Entropy 0.95696 (0.96018)	Top-1 acc 69.531 (66.309)	Top-5 acc 87.500 (85.368)	lr 0.00752
Train [76][2130/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.020)	Loss 2.3578 (2.4026)	Entropy 0.95694 (0.96017)	Top-1 acc 67.969 (66.302)	Top-5 acc 85.156 (85.362)	lr 0.00752
Train [76][2140/3239]	Time 0.241 (0.628)	Data Time 0.002 (0.020)	Loss 2.4342 (2.4028)	Entropy 0.95690 (0.96015)	Top-1 acc 63.281 (66.296)	Top-5 acc 85.938 (85.358)	lr 0.00752
Train [76][2150/3239]	Time 0.241 (0.628)	Data Time 0.001 (0.020)	Loss 2.5693 (2.4029)	Entropy 0.95688 (0.96014)	Top-1 acc 63.281 (66.292)	Top-5 acc 83.594 (85.358)	lr 0.00752
Train [76][2160/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.020)	Loss 2.4709 (2.4031)	Entropy 0.95675 (0.96012)	Top-1 acc 63.672 (66.289)	Top-5 acc 81.641 (85.353)	lr 0.00752
Train [76][2170/3239]	Time 0.259 (0.626)	Data Time 0.001 (0.020)	Loss 2.4644 (2.4032)	Entropy 0.95676 (0.96011)	Top-1 acc 63.672 (66.289)	Top-5 acc 82.812 (85.351)	lr 0.00751
Train [76][2180/3239]	Time 0.223 (0.626)	Data Time 0.001 (0.020)	Loss 2.4410 (2.4033)	Entropy 0.95668 (0.96009)	Top-1 acc 62.500 (66.282)	Top-5 acc 84.375 (85.351)	lr 0.00751
Train [76][2190/3239]	Time 0.251 (0.625)	Data Time 0.001 (0.020)	Loss 2.2785 (2.4032)	Entropy 0.95657 (0.96008)	Top-1 acc 67.969 (66.285)	Top-5 acc 87.500 (85.353)	lr 0.00751
Train [76][2200/3239]	Time 0.245 (0.624)	Data Time 0.001 (0.020)	Loss 2.2235 (2.4033)	Entropy 0.95652 (0.96006)	Top-1 acc 73.828 (66.285)	Top-5 acc 87.500 (85.349)	lr 0.00751
Train [76][2210/3239]	Time 2.494 (0.624)	Data Time 0.001 (0.020)	Loss 2.5452 (2.4034)	Entropy 0.95652 (0.96004)	Top-1 acc 62.109 (66.282)	Top-5 acc 83.203 (85.347)	lr 0.00751
Train [76][2220/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.019)	Loss 2.5125 (2.4035)	Entropy 0.95649 (0.96003)	Top-1 acc 64.844 (66.278)	Top-5 acc 83.594 (85.348)	lr 0.00751
Train [76][2230/3239]	Time 0.290 (0.621)	Data Time 0.002 (0.019)	Loss 2.4367 (2.4034)	Entropy 0.95647 (0.96001)	Top-1 acc 65.625 (66.274)	Top-5 acc 83.594 (85.353)	lr 0.00751
Train [76][2240/3239]	Time 0.256 (0.621)	Data Time 0.001 (0.019)	Loss 2.6016 (2.4037)	Entropy 0.95650 (0.96000)	Top-1 acc 63.281 (66.263)	Top-5 acc 82.031 (85.344)	lr 0.00751
Train [76][2250/3239]	Time 0.269 (0.620)	Data Time 0.001 (0.019)	Loss 2.3173 (2.4037)	Entropy 0.95654 (0.95998)	Top-1 acc 70.703 (66.265)	Top-5 acc 87.109 (85.346)	lr 0.00751
Train [76][2260/3239]	Time 0.229 (0.619)	Data Time 0.001 (0.019)	Loss 2.2640 (2.4036)	Entropy 0.95651 (0.95997)	Top-1 acc 68.750 (66.267)	Top-5 acc 89.453 (85.348)	lr 0.00751
Train [76][2270/3239]	Time 0.213 (0.619)	Data Time 0.001 (0.019)	Loss 2.4780 (2.4034)	Entropy 0.95653 (0.95995)	Top-1 acc 64.062 (66.269)	Top-5 acc 82.812 (85.351)	lr 0.00751
Train [76][2280/3239]	Time 0.356 (0.618)	Data Time 0.002 (0.019)	Loss 2.3292 (2.4033)	Entropy 0.95647 (0.95993)	Top-1 acc 66.797 (66.269)	Top-5 acc 87.109 (85.352)	lr 0.00750
Train [76][2290/3239]	Time 0.272 (0.640)	Data Time 0.003 (0.019)	Loss 2.4113 (2.4034)	Entropy 0.95645 (0.95992)	Top-1 acc 66.406 (66.266)	Top-5 acc 85.156 (85.351)	lr 0.00750
Train [76][2300/3239]	Time 0.233 (0.639)	Data Time 0.002 (0.019)	Loss 2.3930 (2.4034)	Entropy 0.95634 (0.95990)	Top-1 acc 67.969 (66.267)	Top-5 acc 84.766 (85.353)	lr 0.00750
Train [76][2310/3239]	Time 0.251 (0.639)	Data Time 0.001 (0.019)	Loss 2.4995 (2.4034)	Entropy 0.95634 (0.95989)	Top-1 acc 64.062 (66.266)	Top-5 acc 82.422 (85.352)	lr 0.00750
Train [76][2320/3239]	Time 2.505 (0.638)	Data Time 0.001 (0.019)	Loss 2.4260 (2.4032)	Entropy 0.95634 (0.95987)	Top-1 acc 62.891 (66.271)	Top-5 acc 85.938 (85.355)	lr 0.00750
Train [76][2330/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.019)	Loss 2.3561 (2.4031)	Entropy 0.95634 (0.95986)	Top-1 acc 69.922 (66.273)	Top-5 acc 85.938 (85.358)	lr 0.00750
Train [76][2340/3239]	Time 0.237 (0.636)	Data Time 0.001 (0.019)	Loss 2.5838 (2.4031)	Entropy 0.95635 (0.95984)	Top-1 acc 59.375 (66.271)	Top-5 acc 83.594 (85.360)	lr 0.00750
Train [76][2350/3239]	Time 0.233 (0.635)	Data Time 0.001 (0.019)	Loss 2.3833 (2.4031)	Entropy 0.95630 (0.95983)	Top-1 acc 63.672 (66.271)	Top-5 acc 85.938 (85.362)	lr 0.00750
Train [76][2360/3239]	Time 0.230 (0.634)	Data Time 0.001 (0.018)	Loss 2.4197 (2.4031)	Entropy 0.95630 (0.95981)	Top-1 acc 67.578 (66.270)	Top-5 acc 83.203 (85.359)	lr 0.00750
Train [76][2370/3239]	Time 0.256 (0.634)	Data Time 0.002 (0.018)	Loss 2.4446 (2.4031)	Entropy 0.95628 (0.95980)	Top-1 acc 65.234 (66.271)	Top-5 acc 85.547 (85.360)	lr 0.00750
Train [76][2380/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.018)	Loss 2.3762 (2.4031)	Entropy 0.95624 (0.95978)	Top-1 acc 64.844 (66.273)	Top-5 acc 86.328 (85.361)	lr 0.00750
Train [76][2390/3239]	Time 0.238 (0.632)	Data Time 0.001 (0.018)	Loss 2.4738 (2.4033)	Entropy 0.95618 (0.95977)	Top-1 acc 64.844 (66.269)	Top-5 acc 82.422 (85.356)	lr 0.00749
Train [76][2400/3239]	Time 0.256 (0.632)	Data Time 0.001 (0.018)	Loss 2.3883 (2.4034)	Entropy 0.95614 (0.95975)	Top-1 acc 69.531 (66.268)	Top-5 acc 85.938 (85.353)	lr 0.00749
Train [76][2410/3239]	Time 0.357 (0.631)	Data Time 0.001 (0.018)	Loss 2.2789 (2.4033)	Entropy 0.95604 (0.95974)	Top-1 acc 66.016 (66.270)	Top-5 acc 88.281 (85.355)	lr 0.00749
Train [76][2420/3239]	Time 0.266 (0.630)	Data Time 0.001 (0.018)	Loss 2.4669 (2.4034)	Entropy 0.95589 (0.95972)	Top-1 acc 66.406 (66.271)	Top-5 acc 85.938 (85.354)	lr 0.00749
Train [76][2430/3239]	Time 2.444 (0.630)	Data Time 0.001 (0.018)	Loss 2.3895 (2.4032)	Entropy 0.95589 (0.95971)	Top-1 acc 68.359 (66.276)	Top-5 acc 86.328 (85.358)	lr 0.00749
Train [76][2440/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.018)	Loss 2.4958 (2.4030)	Entropy 0.95590 (0.95969)	Top-1 acc 64.062 (66.280)	Top-5 acc 82.031 (85.362)	lr 0.00749
Train [76][2450/3239]	Time 0.312 (0.628)	Data Time 0.001 (0.018)	Loss 2.4548 (2.4030)	Entropy 0.95586 (0.95968)	Top-1 acc 67.969 (66.282)	Top-5 acc 83.203 (85.359)	lr 0.00749
Train [76][2460/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.018)	Loss 2.3369 (2.4028)	Entropy 0.95578 (0.95966)	Top-1 acc 69.922 (66.286)	Top-5 acc 86.719 (85.364)	lr 0.00749
Train [76][2470/3239]	Time 0.251 (0.626)	Data Time 0.001 (0.018)	Loss 2.4560 (2.4029)	Entropy 0.95563 (0.95964)	Top-1 acc 64.844 (66.282)	Top-5 acc 83.984 (85.365)	lr 0.00749
Train [76][2480/3239]	Time 0.241 (0.626)	Data Time 0.005 (0.018)	Loss 2.4557 (2.4029)	Entropy 0.95562 (0.95963)	Top-1 acc 62.891 (66.283)	Top-5 acc 82.422 (85.363)	lr 0.00749
Train [76][2490/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.018)	Loss 2.3832 (2.4027)	Entropy 0.95556 (0.95961)	Top-1 acc 67.578 (66.284)	Top-5 acc 85.938 (85.366)	lr 0.00749
Train [76][2500/3239]	Time 0.276 (0.625)	Data Time 0.001 (0.017)	Loss 2.4120 (2.4027)	Entropy 0.95546 (0.95960)	Top-1 acc 65.234 (66.284)	Top-5 acc 85.156 (85.367)	lr 0.00748
Train [76][2510/3239]	Time 0.239 (0.624)	Data Time 0.001 (0.017)	Loss 2.4079 (2.4027)	Entropy 0.95543 (0.95958)	Top-1 acc 67.188 (66.285)	Top-5 acc 83.984 (85.369)	lr 0.00748
Train [76][2520/3239]	Time 0.261 (0.623)	Data Time 0.001 (0.017)	Loss 2.4767 (2.4027)	Entropy 0.95542 (0.95956)	Top-1 acc 64.453 (66.287)	Top-5 acc 85.156 (85.370)	lr 0.00748
Train [76][2530/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.017)	Loss 2.3478 (2.4027)	Entropy 0.95538 (0.95955)	Top-1 acc 62.500 (66.282)	Top-5 acc 89.062 (85.371)	lr 0.00748
Train [76][2540/3239]	Time 2.659 (0.622)	Data Time 0.002 (0.017)	Loss 2.4395 (2.4028)	Entropy 0.95538 (0.95953)	Top-1 acc 62.891 (66.277)	Top-5 acc 84.375 (85.370)	lr 0.00748
Train [76][2550/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.017)	Loss 2.3330 (2.4026)	Entropy 0.95531 (0.95951)	Top-1 acc 67.188 (66.279)	Top-5 acc 86.328 (85.372)	lr 0.00748
Train [76][2560/3239]	Time 0.257 (0.620)	Data Time 0.001 (0.017)	Loss 2.3157 (2.4023)	Entropy 0.95526 (0.95950)	Top-1 acc 68.750 (66.287)	Top-5 acc 86.719 (85.373)	lr 0.00748
Train [76][2570/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.017)	Loss 2.4023 (2.4024)	Entropy 0.95519 (0.95948)	Top-1 acc 64.844 (66.283)	Top-5 acc 86.719 (85.374)	lr 0.00748
Train [76][2580/3239]	Time 0.247 (0.619)	Data Time 0.001 (0.017)	Loss 2.5895 (2.4024)	Entropy 0.95516 (0.95946)	Top-1 acc 61.719 (66.283)	Top-5 acc 82.422 (85.373)	lr 0.00748
Train [76][2590/3239]	Time 0.248 (0.619)	Data Time 0.001 (0.017)	Loss 2.2665 (2.4024)	Entropy 0.95506 (0.95945)	Top-1 acc 72.656 (66.284)	Top-5 acc 87.109 (85.374)	lr 0.00748
Train [76][2600/3239]	Time 0.277 (0.618)	Data Time 0.002 (0.017)	Loss 2.4853 (2.4024)	Entropy 0.95501 (0.95943)	Top-1 acc 64.844 (66.284)	Top-5 acc 84.766 (85.376)	lr 0.00747
Train [76][2610/3239]	Time 0.231 (0.618)	Data Time 0.002 (0.017)	Loss 2.3161 (2.4023)	Entropy 0.95493 (0.95941)	Top-1 acc 69.141 (66.283)	Top-5 acc 86.719 (85.379)	lr 0.00747
Train [76][2620/3239]	Time 0.253 (0.617)	Data Time 0.002 (0.017)	Loss 2.1842 (2.4023)	Entropy 0.95491 (0.95940)	Top-1 acc 74.219 (66.285)	Top-5 acc 88.672 (85.379)	lr 0.00747
Train [76][2630/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.017)	Loss 2.4743 (2.4024)	Entropy 0.95491 (0.95938)	Top-1 acc 61.719 (66.278)	Top-5 acc 83.594 (85.379)	lr 0.00747
Train [76][2640/3239]	Time 0.229 (0.616)	Data Time 0.001 (0.017)	Loss 2.4826 (2.4024)	Entropy 0.95486 (0.95936)	Top-1 acc 64.453 (66.284)	Top-5 acc 84.766 (85.380)	lr 0.00747
Train [76][2650/3239]	Time 0.252 (0.636)	Data Time 0.003 (0.017)	Loss 2.3402 (2.4025)	Entropy 0.95484 (0.95934)	Top-1 acc 67.969 (66.280)	Top-5 acc 87.891 (85.378)	lr 0.00747
Train [76][2660/3239]	Time 0.273 (0.635)	Data Time 0.002 (0.017)	Loss 2.3939 (2.4025)	Entropy 0.95483 (0.95933)	Top-1 acc 67.578 (66.281)	Top-5 acc 86.719 (85.377)	lr 0.00747
Train [76][2670/3239]	Time 0.447 (0.635)	Data Time 0.002 (0.017)	Loss 2.7217 (2.4027)	Entropy 0.95487 (0.95931)	Top-1 acc 57.422 (66.278)	Top-5 acc 80.078 (85.374)	lr 0.00747
Train [76][2680/3239]	Time 0.263 (0.634)	Data Time 0.001 (0.016)	Loss 2.4365 (2.4027)	Entropy 0.95479 (0.95929)	Top-1 acc 63.672 (66.276)	Top-5 acc 83.203 (85.372)	lr 0.00747
Train [76][2690/3239]	Time 0.249 (0.634)	Data Time 0.001 (0.016)	Loss 2.3087 (2.4028)	Entropy 0.95481 (0.95928)	Top-1 acc 69.922 (66.277)	Top-5 acc 86.328 (85.370)	lr 0.00747
Train [76][2700/3239]	Time 0.254 (0.633)	Data Time 0.001 (0.016)	Loss 2.4597 (2.4031)	Entropy 0.95472 (0.95926)	Top-1 acc 65.625 (66.268)	Top-5 acc 83.984 (85.366)	lr 0.00747
Train [76][2710/3239]	Time 0.314 (0.633)	Data Time 0.001 (0.016)	Loss 2.3721 (2.4031)	Entropy 0.95469 (0.95924)	Top-1 acc 66.797 (66.267)	Top-5 acc 86.719 (85.365)	lr 0.00746
Train [76][2720/3239]	Time 0.260 (0.632)	Data Time 0.001 (0.016)	Loss 2.3654 (2.4032)	Entropy 0.95458 (0.95923)	Top-1 acc 64.062 (66.266)	Top-5 acc 85.938 (85.363)	lr 0.00746
Train [76][2730/3239]	Time 0.227 (0.632)	Data Time 0.001 (0.016)	Loss 2.3475 (2.4032)	Entropy 0.95459 (0.95921)	Top-1 acc 67.969 (66.265)	Top-5 acc 86.328 (85.364)	lr 0.00746
Train [76][2740/3239]	Time 0.339 (0.631)	Data Time 0.001 (0.016)	Loss 2.3036 (2.4032)	Entropy 0.95455 (0.95919)	Top-1 acc 69.922 (66.269)	Top-5 acc 86.719 (85.364)	lr 0.00746
Train [76][2750/3239]	Time 0.255 (0.630)	Data Time 0.001 (0.016)	Loss 2.4152 (2.4033)	Entropy 0.95454 (0.95918)	Top-1 acc 63.672 (66.268)	Top-5 acc 87.109 (85.363)	lr 0.00746
Train [76][2760/3239]	Time 0.255 (0.630)	Data Time 0.001 (0.016)	Loss 2.4142 (2.4035)	Entropy 0.95449 (0.95916)	Top-1 acc 66.016 (66.264)	Top-5 acc 87.500 (85.359)	lr 0.00746
Train [76][2770/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.016)	Loss 2.3665 (2.4034)	Entropy 0.95455 (0.95914)	Top-1 acc 64.844 (66.268)	Top-5 acc 86.719 (85.360)	lr 0.00746
Train [76][2780/3239]	Time 0.276 (0.629)	Data Time 0.001 (0.016)	Loss 2.4625 (2.4036)	Entropy 0.95447 (0.95913)	Top-1 acc 66.016 (66.265)	Top-5 acc 84.766 (85.357)	lr 0.00746
Train [76][2790/3239]	Time 0.344 (0.628)	Data Time 0.002 (0.016)	Loss 2.5872 (2.4034)	Entropy 0.95445 (0.95911)	Top-1 acc 60.938 (66.269)	Top-5 acc 83.594 (85.360)	lr 0.00746
Train [76][2800/3239]	Time 0.330 (0.628)	Data Time 0.001 (0.016)	Loss 2.4345 (2.4035)	Entropy 0.95442 (0.95909)	Top-1 acc 68.750 (66.266)	Top-5 acc 84.375 (85.357)	lr 0.00746
Train [76][2810/3239]	Time 0.222 (0.627)	Data Time 0.001 (0.016)	Loss 2.5203 (2.4036)	Entropy 0.95435 (0.95908)	Top-1 acc 64.062 (66.263)	Top-5 acc 81.250 (85.354)	lr 0.00746
Train [76][2820/3239]	Time 0.261 (0.627)	Data Time 0.001 (0.016)	Loss 2.2841 (2.4036)	Entropy 0.95431 (0.95906)	Top-1 acc 69.141 (66.264)	Top-5 acc 88.672 (85.355)	lr 0.00745
Train [76][2830/3239]	Time 0.310 (0.626)	Data Time 0.001 (0.016)	Loss 2.2811 (2.4036)	Entropy 0.95401 (0.95904)	Top-1 acc 69.141 (66.264)	Top-5 acc 86.719 (85.355)	lr 0.00745
Train [76][2840/3239]	Time 0.262 (0.626)	Data Time 0.001 (0.016)	Loss 2.4532 (2.4036)	Entropy 0.95395 (0.95903)	Top-1 acc 65.234 (66.265)	Top-5 acc 84.375 (85.356)	lr 0.00745
Train [76][2850/3239]	Time 0.257 (0.625)	Data Time 0.001 (0.016)	Loss 2.3922 (2.4036)	Entropy 0.95394 (0.95901)	Top-1 acc 66.406 (66.263)	Top-5 acc 83.984 (85.355)	lr 0.00745
Train [76][2860/3239]	Time 0.244 (0.625)	Data Time 0.002 (0.016)	Loss 2.4697 (2.4037)	Entropy 0.95387 (0.95899)	Top-1 acc 64.844 (66.264)	Top-5 acc 86.328 (85.352)	lr 0.00745
Train [76][2870/3239]	Time 0.232 (0.624)	Data Time 0.001 (0.015)	Loss 2.5179 (2.4039)	Entropy 0.95377 (0.95897)	Top-1 acc 62.891 (66.260)	Top-5 acc 83.594 (85.347)	lr 0.00745
Train [76][2880/3239]	Time 0.279 (0.624)	Data Time 0.001 (0.015)	Loss 2.5064 (2.4042)	Entropy 0.95389 (0.95895)	Top-1 acc 65.234 (66.253)	Top-5 acc 85.547 (85.341)	lr 0.00745
Train [76][2890/3239]	Time 0.242 (0.623)	Data Time 0.001 (0.015)	Loss 2.5790 (2.4042)	Entropy 0.95378 (0.95894)	Top-1 acc 59.766 (66.248)	Top-5 acc 83.594 (85.342)	lr 0.00745
Train [76][2900/3239]	Time 0.314 (0.623)	Data Time 0.002 (0.015)	Loss 2.4098 (2.4043)	Entropy 0.95375 (0.95892)	Top-1 acc 66.016 (66.249)	Top-5 acc 85.547 (85.343)	lr 0.00745
Train [76][2910/3239]	Time 0.273 (0.622)	Data Time 0.001 (0.015)	Loss 2.4695 (2.4044)	Entropy 0.95377 (0.95890)	Top-1 acc 65.625 (66.246)	Top-5 acc 84.766 (85.340)	lr 0.00745
Train [76][2920/3239]	Time 0.239 (0.622)	Data Time 0.001 (0.015)	Loss 2.4922 (2.4044)	Entropy 0.95374 (0.95888)	Top-1 acc 63.672 (66.243)	Top-5 acc 84.766 (85.342)	lr 0.00745
Train [76][2930/3239]	Time 0.419 (0.621)	Data Time 0.001 (0.015)	Loss 2.4101 (2.4046)	Entropy 0.95344 (0.95887)	Top-1 acc 70.312 (66.240)	Top-5 acc 85.156 (85.341)	lr 0.00744
Train [76][2940/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.015)	Loss 2.5134 (2.4046)	Entropy 0.95351 (0.95885)	Top-1 acc 64.062 (66.236)	Top-5 acc 83.594 (85.340)	lr 0.00744
Train [76][2950/3239]	Time 0.206 (0.620)	Data Time 0.001 (0.015)	Loss 2.5419 (2.4047)	Entropy 0.95346 (0.95883)	Top-1 acc 60.156 (66.235)	Top-5 acc 81.250 (85.337)	lr 0.00744
Train [76][2960/3239]	Time 0.288 (0.620)	Data Time 0.001 (0.015)	Loss 2.3608 (2.4048)	Entropy 0.95340 (0.95881)	Top-1 acc 66.797 (66.238)	Top-5 acc 84.766 (85.335)	lr 0.00744
Train [76][2970/3239]	Time 0.201 (0.619)	Data Time 0.002 (0.015)	Loss 2.4439 (2.4048)	Entropy 0.95335 (0.95879)	Top-1 acc 66.016 (66.240)	Top-5 acc 85.547 (85.334)	lr 0.00744
Train [76][2980/3239]	Time 0.251 (0.638)	Data Time 0.003 (0.015)	Loss 2.5767 (2.4050)	Entropy 0.95327 (0.95877)	Top-1 acc 59.375 (66.232)	Top-5 acc 82.812 (85.331)	lr 0.00744
Train [76][2990/3239]	Time 0.311 (0.637)	Data Time 0.002 (0.015)	Loss 2.4483 (2.4050)	Entropy 0.95327 (0.95876)	Top-1 acc 63.672 (66.232)	Top-5 acc 84.766 (85.331)	lr 0.00744
Train [76][3000/3239]	Time 0.232 (0.637)	Data Time 0.001 (0.015)	Loss 2.5842 (2.4054)	Entropy 0.95329 (0.95874)	Top-1 acc 60.156 (66.222)	Top-5 acc 83.203 (85.324)	lr 0.00744
Train [76][3010/3239]	Time 0.235 (0.636)	Data Time 0.002 (0.015)	Loss 2.5757 (2.4054)	Entropy 0.95325 (0.95872)	Top-1 acc 61.719 (66.224)	Top-5 acc 81.250 (85.320)	lr 0.00744
Train [76][3020/3239]	Time 0.328 (0.636)	Data Time 0.001 (0.015)	Loss 2.5820 (2.4055)	Entropy 0.95329 (0.95870)	Top-1 acc 62.891 (66.226)	Top-5 acc 80.859 (85.318)	lr 0.00744
Train [76][3030/3239]	Time 0.235 (0.635)	Data Time 0.001 (0.015)	Loss 2.3727 (2.4055)	Entropy 0.95327 (0.95868)	Top-1 acc 70.312 (66.227)	Top-5 acc 85.156 (85.317)	lr 0.00744
Train [76][3040/3239]	Time 0.233 (0.635)	Data Time 0.001 (0.015)	Loss 2.5180 (2.4056)	Entropy 0.95328 (0.95867)	Top-1 acc 64.062 (66.226)	Top-5 acc 83.594 (85.317)	lr 0.00743
Train [76][3050/3239]	Time 0.238 (0.634)	Data Time 0.001 (0.015)	Loss 2.4083 (2.4055)	Entropy 0.95323 (0.95865)	Top-1 acc 66.797 (66.228)	Top-5 acc 83.984 (85.320)	lr 0.00743
Train [76][3060/3239]	Time 0.227 (0.634)	Data Time 0.001 (0.015)	Loss 2.4107 (2.4056)	Entropy 0.95326 (0.95863)	Top-1 acc 66.406 (66.225)	Top-5 acc 84.375 (85.315)	lr 0.00743
Train [76][3070/3239]	Time 0.220 (0.633)	Data Time 0.001 (0.015)	Loss 2.2419 (2.4055)	Entropy 0.95321 (0.95861)	Top-1 acc 73.828 (66.230)	Top-5 acc 87.500 (85.315)	lr 0.00743
Train [76][3080/3239]	Time 0.216 (0.633)	Data Time 0.001 (0.015)	Loss 2.4092 (2.4061)	Entropy 0.95320 (0.95860)	Top-1 acc 66.406 (66.217)	Top-5 acc 83.594 (85.304)	lr 0.00743
Train [76][3090/3239]	Time 0.283 (0.632)	Data Time 0.001 (0.015)	Loss 2.2617 (2.4062)	Entropy 0.95312 (0.95858)	Top-1 acc 71.094 (66.216)	Top-5 acc 87.500 (85.300)	lr 0.00743
Train [76][3100/3239]	Time 0.247 (0.632)	Data Time 0.001 (0.014)	Loss 2.4873 (2.4060)	Entropy 0.95311 (0.95856)	Top-1 acc 60.156 (66.218)	Top-5 acc 84.766 (85.304)	lr 0.00743
Train [76][3110/3239]	Time 0.327 (0.631)	Data Time 0.001 (0.014)	Loss 2.3461 (2.4059)	Entropy 0.95305 (0.95854)	Top-1 acc 69.531 (66.221)	Top-5 acc 85.547 (85.305)	lr 0.00743
Train [76][3120/3239]	Time 0.197 (0.631)	Data Time 0.002 (0.014)	Loss 2.5009 (2.4060)	Entropy 0.95298 (0.95853)	Top-1 acc 64.062 (66.222)	Top-5 acc 83.203 (85.306)	lr 0.00743
Train [76][3130/3239]	Time 0.297 (0.630)	Data Time 0.002 (0.014)	Loss 2.5529 (2.4060)	Entropy 0.95293 (0.95851)	Top-1 acc 61.328 (66.226)	Top-5 acc 81.250 (85.304)	lr 0.00743
Train [76][3140/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.014)	Loss 2.3577 (2.4059)	Entropy 0.95286 (0.95849)	Top-1 acc 65.625 (66.229)	Top-5 acc 87.891 (85.304)	lr 0.00743
Train [76][3150/3239]	Time 0.298 (0.630)	Data Time 0.002 (0.014)	Loss 2.4814 (2.4061)	Entropy 0.95276 (0.95847)	Top-1 acc 62.891 (66.224)	Top-5 acc 83.594 (85.299)	lr 0.00742
Train [76][3160/3239]	Time 0.288 (0.629)	Data Time 0.001 (0.014)	Loss 2.1807 (2.4062)	Entropy 0.95276 (0.95845)	Top-1 acc 72.656 (66.226)	Top-5 acc 89.844 (85.299)	lr 0.00742
Train [76][3170/3239]	Time 0.245 (0.629)	Data Time 0.001 (0.014)	Loss 2.4427 (2.4063)	Entropy 0.95273 (0.95844)	Top-1 acc 68.750 (66.225)	Top-5 acc 83.203 (85.299)	lr 0.00742
Train [76][3180/3239]	Time 0.218 (0.628)	Data Time 0.000 (0.014)	Loss 2.4191 (2.4065)	Entropy 0.95267 (0.95842)	Top-1 acc 63.672 (66.218)	Top-5 acc 84.375 (85.294)	lr 0.00742
Train [76][3190/3239]	Time 0.200 (0.628)	Data Time 0.000 (0.014)	Loss 2.3171 (2.4066)	Entropy 0.95262 (0.95840)	Top-1 acc 66.016 (66.215)	Top-5 acc 89.062 (85.293)	lr 0.00742
Train [76][3200/3239]	Time 0.215 (0.627)	Data Time 0.000 (0.014)	Loss 2.3575 (2.4065)	Entropy 0.95261 (0.95838)	Top-1 acc 69.141 (66.220)	Top-5 acc 84.766 (85.293)	lr 0.00742
Train [76][3210/3239]	Time 0.251 (0.627)	Data Time 0.000 (0.014)	Loss 2.3497 (2.4064)	Entropy 0.95253 (0.95836)	Top-1 acc 69.922 (66.220)	Top-5 acc 85.547 (85.293)	lr 0.00742
Train [76][3220/3239]	Time 0.226 (0.626)	Data Time 0.000 (0.014)	Loss 2.4004 (2.4064)	Entropy 0.95250 (0.95835)	Top-1 acc 65.625 (66.222)	Top-5 acc 86.328 (85.294)	lr 0.00742
Train [76][3230/3239]	Time 0.229 (0.626)	Data Time 0.000 (0.014)	Loss 2.4477 (2.4062)	Entropy 0.95254 (0.95833)	Top-1 acc 62.109 (66.226)	Top-5 acc 82.422 (85.299)	lr 0.00742
Train [76][3239/3239]	Time 2.302 (0.625)	Data Time 0.000 (0.014)	Loss 2.7336 (2.4063)	Entropy 0.95254 (0.95831)	Top-1 acc 61.728 (66.227)	Top-5 acc 79.012 (85.297)	lr 0.00742
==========Valid [76/120]	loss 1.332	top-1 acc 69.579 (69.579)	top-5 acc 88.139	Train top-1 66.227	top-5 85.297	Entropy 0.95254	Latency-None: 0.000ms	Flops: 546.53M
Train [77][0/3239]	Time 43.144 (43.144)	Data Time 41.751 (41.751)	Loss 2.4289 (2.4289)	Entropy 0.95256 (0.95256)	Top-1 acc 66.016 (66.016)	Top-5 acc 87.500 (87.500)	lr 0.00742
Train [77][10/3239]	Time 2.712 (4.456)	Data Time 0.026 (3.799)	Loss 2.4280 (2.3661)	Entropy 0.95256 (0.95256)	Top-1 acc 69.141 (67.294)	Top-5 acc 84.766 (86.364)	lr 0.00741
Train [77][20/3239]	Time 0.235 (2.452)	Data Time 0.001 (1.991)	Loss 2.3130 (2.3591)	Entropy 0.95255 (0.95255)	Top-1 acc 67.969 (67.597)	Top-5 acc 85.938 (86.533)	lr 0.00741
Train [77][30/3239]	Time 0.242 (1.814)	Data Time 0.001 (1.349)	Loss 2.2775 (2.3535)	Entropy 0.95247 (0.95253)	Top-1 acc 71.484 (67.780)	Top-5 acc 87.109 (86.429)	lr 0.00741
Train [77][40/3239]	Time 0.234 (1.488)	Data Time 0.001 (1.020)	Loss 2.4179 (2.3597)	Entropy 0.95248 (0.95252)	Top-1 acc 64.453 (67.559)	Top-5 acc 85.938 (86.366)	lr 0.00741
Train [77][50/3239]	Time 0.316 (1.291)	Data Time 0.001 (0.821)	Loss 2.3460 (2.3537)	Entropy 0.95245 (0.95250)	Top-1 acc 67.969 (67.417)	Top-5 acc 86.719 (86.619)	lr 0.00741
Train [77][60/3239]	Time 0.214 (1.155)	Data Time 0.001 (0.686)	Loss 2.2914 (2.3471)	Entropy 0.95244 (0.95250)	Top-1 acc 69.141 (67.617)	Top-5 acc 88.281 (86.642)	lr 0.00741
Train [77][70/3239]	Time 0.244 (1.059)	Data Time 0.001 (0.590)	Loss 2.4348 (2.3489)	Entropy 0.95237 (0.95248)	Top-1 acc 65.625 (67.441)	Top-5 acc 85.938 (86.669)	lr 0.00741
Train [77][80/3239]	Time 0.286 (1.664)	Data Time 0.003 (0.517)	Loss 2.3478 (2.3454)	Entropy 0.95230 (0.95247)	Top-1 acc 71.875 (67.617)	Top-5 acc 86.328 (86.728)	lr 0.00741
Train [77][90/3239]	Time 0.264 (1.537)	Data Time 0.002 (0.461)	Loss 2.2647 (2.3495)	Entropy 0.95222 (0.95244)	Top-1 acc 69.141 (67.509)	Top-5 acc 87.109 (86.551)	lr 0.00741
Train [77][100/3239]	Time 0.233 (1.433)	Data Time 0.002 (0.415)	Loss 2.4858 (2.3580)	Entropy 0.95217 (0.95242)	Top-1 acc 66.016 (67.350)	Top-5 acc 83.594 (86.336)	lr 0.00741
Train [77][110/3239]	Time 0.257 (1.347)	Data Time 0.001 (0.378)	Loss 2.4283 (2.3658)	Entropy 0.95218 (0.95240)	Top-1 acc 64.453 (67.145)	Top-5 acc 85.938 (86.219)	lr 0.00741
Train [77][120/3239]	Time 2.674 (1.275)	Data Time 0.001 (0.347)	Loss 2.4922 (2.3635)	Entropy 0.95218 (0.95238)	Top-1 acc 65.625 (67.181)	Top-5 acc 80.859 (86.247)	lr 0.00740
Train [77][130/3239]	Time 0.225 (1.196)	Data Time 0.001 (0.321)	Loss 2.4083 (2.3634)	Entropy 0.95216 (0.95236)	Top-1 acc 67.188 (67.238)	Top-5 acc 83.984 (86.248)	lr 0.00740
Train [77][140/3239]	Time 0.493 (1.147)	Data Time 0.007 (0.298)	Loss 2.2419 (2.3627)	Entropy 0.95213 (0.95235)	Top-1 acc 69.141 (67.201)	Top-5 acc 87.109 (86.245)	lr 0.00740
Train [77][150/3239]	Time 0.233 (1.103)	Data Time 0.001 (0.279)	Loss 2.2894 (2.3603)	Entropy 0.95206 (0.95233)	Top-1 acc 68.359 (67.257)	Top-5 acc 89.062 (86.307)	lr 0.00740
Train [77][160/3239]	Time 0.221 (1.064)	Data Time 0.001 (0.261)	Loss 2.3067 (2.3593)	Entropy 0.95206 (0.95231)	Top-1 acc 68.750 (67.234)	Top-5 acc 88.281 (86.396)	lr 0.00740
Train [77][170/3239]	Time 0.231 (1.029)	Data Time 0.001 (0.246)	Loss 2.3386 (2.3616)	Entropy 0.95207 (0.95230)	Top-1 acc 65.234 (67.238)	Top-5 acc 87.109 (86.321)	lr 0.00740
Train [77][180/3239]	Time 0.326 (0.999)	Data Time 0.001 (0.233)	Loss 2.6436 (2.3647)	Entropy 0.95206 (0.95229)	Top-1 acc 59.375 (67.179)	Top-5 acc 81.641 (86.246)	lr 0.00740
Train [77][190/3239]	Time 0.232 (0.971)	Data Time 0.001 (0.221)	Loss 2.5082 (2.3659)	Entropy 0.95206 (0.95227)	Top-1 acc 67.969 (67.165)	Top-5 acc 82.812 (86.205)	lr 0.00740
Train [77][200/3239]	Time 0.226 (0.946)	Data Time 0.001 (0.210)	Loss 2.3303 (2.3679)	Entropy 0.95204 (0.95226)	Top-1 acc 69.141 (67.147)	Top-5 acc 87.109 (86.136)	lr 0.00740
Train [77][210/3239]	Time 0.233 (0.924)	Data Time 0.001 (0.200)	Loss 2.3350 (2.3675)	Entropy 0.95199 (0.95225)	Top-1 acc 67.969 (67.138)	Top-5 acc 89.062 (86.152)	lr 0.00740
Train [77][220/3239]	Time 0.290 (0.903)	Data Time 0.001 (0.191)	Loss 2.5066 (2.3695)	Entropy 0.95203 (0.95224)	Top-1 acc 62.891 (67.067)	Top-5 acc 82.812 (86.093)	lr 0.00740
Train [77][230/3239]	Time 2.620 (0.885)	Data Time 0.001 (0.183)	Loss 2.4460 (2.3708)	Entropy 0.95203 (0.95223)	Top-1 acc 67.969 (67.073)	Top-5 acc 83.203 (86.083)	lr 0.00739
Train [77][240/3239]	Time 0.255 (0.858)	Data Time 0.001 (0.175)	Loss 2.4531 (2.3719)	Entropy 0.95194 (0.95222)	Top-1 acc 68.359 (67.040)	Top-5 acc 84.375 (86.072)	lr 0.00739
Train [77][250/3239]	Time 0.225 (0.843)	Data Time 0.001 (0.168)	Loss 2.2348 (2.3736)	Entropy 0.95194 (0.95221)	Top-1 acc 69.531 (67.007)	Top-5 acc 85.547 (86.026)	lr 0.00739
Train [77][260/3239]	Time 0.233 (0.828)	Data Time 0.001 (0.162)	Loss 2.3614 (2.3731)	Entropy 0.95190 (0.95220)	Top-1 acc 70.703 (67.059)	Top-5 acc 85.547 (86.018)	lr 0.00739
Train [77][270/3239]	Time 0.337 (0.816)	Data Time 0.001 (0.156)	Loss 2.4025 (2.3723)	Entropy 0.95190 (0.95219)	Top-1 acc 63.672 (67.088)	Top-5 acc 85.156 (86.018)	lr 0.00739
Train [77][280/3239]	Time 0.241 (0.804)	Data Time 0.001 (0.150)	Loss 2.5421 (2.3724)	Entropy 0.95187 (0.95217)	Top-1 acc 61.719 (67.017)	Top-5 acc 84.375 (86.042)	lr 0.00739
Train [77][290/3239]	Time 0.221 (0.792)	Data Time 0.001 (0.145)	Loss 2.5219 (2.3718)	Entropy 0.95182 (0.95216)	Top-1 acc 62.891 (67.048)	Top-5 acc 83.984 (86.042)	lr 0.00739
Train [77][300/3239]	Time 0.230 (0.782)	Data Time 0.001 (0.141)	Loss 2.4591 (2.3729)	Entropy 0.95180 (0.95215)	Top-1 acc 64.453 (67.015)	Top-5 acc 84.766 (86.013)	lr 0.00739
Train [77][310/3239]	Time 0.226 (0.772)	Data Time 0.002 (0.136)	Loss 2.4555 (2.3733)	Entropy 0.95180 (0.95214)	Top-1 acc 65.234 (67.012)	Top-5 acc 83.203 (85.984)	lr 0.00739
Train [77][320/3239]	Time 0.260 (0.764)	Data Time 0.001 (0.132)	Loss 2.3966 (2.3737)	Entropy 0.95176 (0.95213)	Top-1 acc 68.750 (67.034)	Top-5 acc 87.109 (86.008)	lr 0.00739
Train [77][330/3239]	Time 0.231 (0.755)	Data Time 0.001 (0.128)	Loss 2.5238 (2.3748)	Entropy 0.95176 (0.95212)	Top-1 acc 64.062 (67.026)	Top-5 acc 82.422 (85.962)	lr 0.00739
Train [77][340/3239]	Time 2.644 (0.747)	Data Time 0.001 (0.124)	Loss 2.5302 (2.3757)	Entropy 0.95176 (0.95211)	Top-1 acc 60.156 (66.981)	Top-5 acc 82.812 (85.948)	lr 0.00738
Train [77][350/3239]	Time 0.234 (0.732)	Data Time 0.001 (0.121)	Loss 2.3616 (2.3742)	Entropy 0.95166 (0.95209)	Top-1 acc 67.578 (67.036)	Top-5 acc 87.109 (85.962)	lr 0.00738
Train [77][360/3239]	Time 0.376 (0.726)	Data Time 0.004 (0.117)	Loss 2.5168 (2.3772)	Entropy 0.95171 (0.95208)	Top-1 acc 64.062 (66.961)	Top-5 acc 85.156 (85.928)	lr 0.00738
Train [77][370/3239]	Time 0.238 (0.719)	Data Time 0.001 (0.114)	Loss 2.2086 (2.3762)	Entropy 0.95168 (0.95207)	Top-1 acc 70.703 (66.986)	Top-5 acc 89.844 (85.935)	lr 0.00738
Train [77][380/3239]	Time 0.242 (0.713)	Data Time 0.001 (0.111)	Loss 2.2523 (2.3762)	Entropy 0.95166 (0.95206)	Top-1 acc 73.438 (66.994)	Top-5 acc 89.062 (85.946)	lr 0.00738
Train [77][390/3239]	Time 0.251 (0.707)	Data Time 0.002 (0.109)	Loss 2.4233 (2.3778)	Entropy 0.95166 (0.95205)	Top-1 acc 71.094 (66.958)	Top-5 acc 84.766 (85.907)	lr 0.00738
Train [77][400/3239]	Time 0.228 (0.702)	Data Time 0.001 (0.106)	Loss 2.3636 (2.3775)	Entropy 0.95161 (0.95204)	Top-1 acc 66.406 (66.975)	Top-5 acc 84.375 (85.900)	lr 0.00738
Train [77][410/3239]	Time 0.220 (0.696)	Data Time 0.001 (0.103)	Loss 2.5374 (2.3768)	Entropy 0.95166 (0.95203)	Top-1 acc 61.719 (66.977)	Top-5 acc 81.250 (85.914)	lr 0.00738
Train [77][420/3239]	Time 0.232 (0.691)	Data Time 0.001 (0.101)	Loss 2.3344 (2.3765)	Entropy 0.95163 (0.95202)	Top-1 acc 65.625 (66.986)	Top-5 acc 87.109 (85.923)	lr 0.00738
Train [77][430/3239]	Time 0.221 (0.685)	Data Time 0.001 (0.099)	Loss 2.2918 (2.3764)	Entropy 0.95159 (0.95201)	Top-1 acc 69.531 (66.981)	Top-5 acc 87.891 (85.912)	lr 0.00738
Train [77][440/3239]	Time 0.261 (0.808)	Data Time 0.003 (0.096)	Loss 2.4387 (2.3769)	Entropy 0.95159 (0.95200)	Top-1 acc 65.234 (66.963)	Top-5 acc 80.859 (85.878)	lr 0.00738
Train [77][450/3239]	Time 2.610 (0.801)	Data Time 0.003 (0.094)	Loss 2.3794 (2.3768)	Entropy 0.95159 (0.95200)	Top-1 acc 67.188 (66.986)	Top-5 acc 85.938 (85.888)	lr 0.00737
Train [77][460/3239]	Time 0.174 (0.789)	Data Time 0.002 (0.092)	Loss 2.4992 (2.3784)	Entropy 0.95156 (0.95199)	Top-1 acc 65.234 (66.935)	Top-5 acc 82.812 (85.852)	lr 0.00737
Train [77][470/3239]	Time 0.233 (0.782)	Data Time 0.001 (0.091)	Loss 2.3830 (2.3782)	Entropy 0.95149 (0.95198)	Top-1 acc 67.188 (66.945)	Top-5 acc 86.328 (85.862)	lr 0.00737
Train [77][480/3239]	Time 0.212 (0.775)	Data Time 0.001 (0.089)	Loss 2.3191 (2.3787)	Entropy 0.95141 (0.95196)	Top-1 acc 68.359 (66.932)	Top-5 acc 86.328 (85.850)	lr 0.00737
Train [77][490/3239]	Time 0.240 (0.769)	Data Time 0.001 (0.087)	Loss 2.3414 (2.3788)	Entropy 0.95164 (0.95196)	Top-1 acc 69.531 (66.921)	Top-5 acc 88.281 (85.855)	lr 0.00737
Train [77][500/3239]	Time 0.239 (0.763)	Data Time 0.001 (0.085)	Loss 2.4219 (2.3788)	Entropy 0.95161 (0.95195)	Top-1 acc 64.844 (66.907)	Top-5 acc 83.594 (85.842)	lr 0.00737
Train [77][510/3239]	Time 0.226 (0.757)	Data Time 0.001 (0.084)	Loss 2.5026 (2.3793)	Entropy 0.95157 (0.95194)	Top-1 acc 64.062 (66.896)	Top-5 acc 84.375 (85.838)	lr 0.00737
Train [77][520/3239]	Time 0.218 (0.752)	Data Time 0.001 (0.082)	Loss 2.2585 (2.3795)	Entropy 0.95155 (0.95194)	Top-1 acc 68.359 (66.896)	Top-5 acc 89.844 (85.842)	lr 0.00737
Train [77][530/3239]	Time 0.222 (0.746)	Data Time 0.001 (0.080)	Loss 2.1621 (2.3798)	Entropy 0.95153 (0.95193)	Top-1 acc 75.000 (66.876)	Top-5 acc 87.500 (85.848)	lr 0.00737
Train [77][540/3239]	Time 0.383 (0.741)	Data Time 0.001 (0.079)	Loss 2.3699 (2.3796)	Entropy 0.95156 (0.95192)	Top-1 acc 64.844 (66.884)	Top-5 acc 84.766 (85.852)	lr 0.00737
Train [77][550/3239]	Time 0.267 (0.736)	Data Time 0.001 (0.078)	Loss 2.2974 (2.3803)	Entropy 0.95156 (0.95191)	Top-1 acc 71.094 (66.864)	Top-5 acc 85.156 (85.827)	lr 0.00737
Train [77][560/3239]	Time 2.542 (0.732)	Data Time 0.001 (0.076)	Loss 2.4975 (2.3807)	Entropy 0.95156 (0.95191)	Top-1 acc 66.797 (66.842)	Top-5 acc 84.375 (85.825)	lr 0.00736
Train [77][570/3239]	Time 0.233 (0.723)	Data Time 0.002 (0.075)	Loss 2.4517 (2.3814)	Entropy 0.95155 (0.95190)	Top-1 acc 67.578 (66.816)	Top-5 acc 83.203 (85.814)	lr 0.00736
Train [77][580/3239]	Time 0.219 (0.719)	Data Time 0.001 (0.074)	Loss 2.3467 (2.3812)	Entropy 0.95151 (0.95189)	Top-1 acc 71.094 (66.832)	Top-5 acc 84.766 (85.823)	lr 0.00736
Train [77][590/3239]	Time 0.225 (0.715)	Data Time 0.001 (0.072)	Loss 2.3668 (2.3819)	Entropy 0.95142 (0.95189)	Top-1 acc 67.969 (66.815)	Top-5 acc 85.156 (85.812)	lr 0.00736
Train [77][600/3239]	Time 0.224 (0.711)	Data Time 0.001 (0.071)	Loss 2.3068 (2.3821)	Entropy 0.95135 (0.95188)	Top-1 acc 68.750 (66.812)	Top-5 acc 87.891 (85.809)	lr 0.00736
Train [77][610/3239]	Time 0.238 (0.707)	Data Time 0.001 (0.070)	Loss 2.3056 (2.3818)	Entropy 0.95136 (0.95187)	Top-1 acc 70.703 (66.826)	Top-5 acc 87.109 (85.810)	lr 0.00736
Train [77][620/3239]	Time 0.254 (0.703)	Data Time 0.001 (0.069)	Loss 2.4378 (2.3818)	Entropy 0.95136 (0.95186)	Top-1 acc 62.891 (66.825)	Top-5 acc 83.984 (85.806)	lr 0.00736
Train [77][630/3239]	Time 0.304 (0.699)	Data Time 0.001 (0.068)	Loss 2.4239 (2.3820)	Entropy 0.95130 (0.95185)	Top-1 acc 64.453 (66.812)	Top-5 acc 84.766 (85.800)	lr 0.00736
Train [77][640/3239]	Time 0.226 (0.695)	Data Time 0.001 (0.067)	Loss 2.3558 (2.3831)	Entropy 0.95122 (0.95184)	Top-1 acc 62.109 (66.780)	Top-5 acc 87.109 (85.785)	lr 0.00736
Train [77][650/3239]	Time 0.238 (0.692)	Data Time 0.001 (0.066)	Loss 2.3958 (2.3835)	Entropy 0.95115 (0.95183)	Top-1 acc 66.406 (66.757)	Top-5 acc 85.547 (85.778)	lr 0.00736
Train [77][660/3239]	Time 0.229 (0.688)	Data Time 0.001 (0.065)	Loss 2.3829 (2.3841)	Entropy 0.95116 (0.95182)	Top-1 acc 65.234 (66.759)	Top-5 acc 86.719 (85.769)	lr 0.00735
Train [77][670/3239]	Time 2.543 (0.685)	Data Time 0.001 (0.064)	Loss 2.2790 (2.3837)	Entropy 0.95116 (0.95181)	Top-1 acc 72.266 (66.778)	Top-5 acc 86.328 (85.767)	lr 0.00735
Train [77][680/3239]	Time 0.219 (0.679)	Data Time 0.001 (0.063)	Loss 2.5833 (2.3840)	Entropy 0.95114 (0.95180)	Top-1 acc 59.375 (66.772)	Top-5 acc 82.031 (85.767)	lr 0.00735
Train [77][690/3239]	Time 0.230 (0.676)	Data Time 0.001 (0.062)	Loss 2.2661 (2.3839)	Entropy 0.95114 (0.95179)	Top-1 acc 67.969 (66.760)	Top-5 acc 89.062 (85.773)	lr 0.00735
Train [77][700/3239]	Time 0.230 (0.673)	Data Time 0.001 (0.061)	Loss 2.3551 (2.3840)	Entropy 0.95113 (0.95179)	Top-1 acc 66.406 (66.769)	Top-5 acc 87.109 (85.774)	lr 0.00735
Train [77][710/3239]	Time 0.230 (0.670)	Data Time 0.001 (0.060)	Loss 2.3662 (2.3835)	Entropy 0.95108 (0.95178)	Top-1 acc 65.625 (66.791)	Top-5 acc 88.281 (85.784)	lr 0.00735
Train [77][720/3239]	Time 0.359 (0.667)	Data Time 0.001 (0.060)	Loss 2.5872 (2.3839)	Entropy 0.95101 (0.95177)	Top-1 acc 65.625 (66.786)	Top-5 acc 80.859 (85.784)	lr 0.00735
Train [77][730/3239]	Time 0.211 (0.664)	Data Time 0.001 (0.059)	Loss 2.5812 (2.3840)	Entropy 0.95100 (0.95176)	Top-1 acc 58.984 (66.768)	Top-5 acc 83.984 (85.788)	lr 0.00735
Train [77][740/3239]	Time 0.221 (0.662)	Data Time 0.001 (0.058)	Loss 2.3147 (2.3843)	Entropy 0.95098 (0.95174)	Top-1 acc 68.359 (66.770)	Top-5 acc 88.281 (85.785)	lr 0.00735
Train [77][750/3239]	Time 0.223 (0.659)	Data Time 0.001 (0.057)	Loss 2.4617 (2.3845)	Entropy 0.95098 (0.95173)	Top-1 acc 64.453 (66.767)	Top-5 acc 81.641 (85.774)	lr 0.00735
Train [77][760/3239]	Time 0.239 (0.657)	Data Time 0.001 (0.057)	Loss 2.4131 (2.3845)	Entropy 0.95094 (0.95172)	Top-1 acc 62.109 (66.755)	Top-5 acc 87.109 (85.775)	lr 0.00735
Train [77][770/3239]	Time 0.237 (0.654)	Data Time 0.001 (0.056)	Loss 2.3364 (2.3844)	Entropy 0.95093 (0.95171)	Top-1 acc 71.094 (66.764)	Top-5 acc 86.719 (85.779)	lr 0.00734
Train [77][780/3239]	Time 2.509 (0.652)	Data Time 0.001 (0.055)	Loss 2.4993 (2.3846)	Entropy 0.95093 (0.95170)	Top-1 acc 64.844 (66.762)	Top-5 acc 83.594 (85.775)	lr 0.00734
Train [77][790/3239]	Time 0.234 (0.647)	Data Time 0.001 (0.055)	Loss 2.4821 (2.3849)	Entropy 0.95090 (0.95169)	Top-1 acc 66.016 (66.760)	Top-5 acc 84.766 (85.776)	lr 0.00734
Train [77][800/3239]	Time 0.246 (0.645)	Data Time 0.001 (0.054)	Loss 2.2722 (2.3843)	Entropy 0.95086 (0.95168)	Top-1 acc 71.094 (66.770)	Top-5 acc 87.891 (85.790)	lr 0.00734
Train [77][810/3239]	Time 0.236 (0.709)	Data Time 0.002 (0.053)	Loss 2.3449 (2.3845)	Entropy 0.95076 (0.95167)	Top-1 acc 67.578 (66.769)	Top-5 acc 84.766 (85.788)	lr 0.00734
Train [77][820/3239]	Time 0.233 (0.706)	Data Time 0.002 (0.053)	Loss 2.3140 (2.3840)	Entropy 0.95069 (0.95166)	Top-1 acc 68.359 (66.789)	Top-5 acc 87.109 (85.798)	lr 0.00734
Train [77][830/3239]	Time 0.244 (0.703)	Data Time 0.002 (0.052)	Loss 2.4024 (2.3841)	Entropy 0.95060 (0.95165)	Top-1 acc 66.016 (66.787)	Top-5 acc 85.547 (85.800)	lr 0.00734
Train [77][840/3239]	Time 0.213 (0.700)	Data Time 0.001 (0.051)	Loss 2.2582 (2.3846)	Entropy 0.95061 (0.95164)	Top-1 acc 68.359 (66.780)	Top-5 acc 88.672 (85.786)	lr 0.00734
Train [77][850/3239]	Time 0.364 (0.698)	Data Time 0.001 (0.051)	Loss 2.5254 (2.3846)	Entropy 0.95059 (0.95162)	Top-1 acc 60.547 (66.772)	Top-5 acc 81.250 (85.782)	lr 0.00734
Train [77][860/3239]	Time 0.227 (0.695)	Data Time 0.002 (0.050)	Loss 2.3190 (2.3842)	Entropy 0.95046 (0.95161)	Top-1 acc 67.578 (66.780)	Top-5 acc 86.328 (85.783)	lr 0.00734
Train [77][870/3239]	Time 0.231 (0.693)	Data Time 0.001 (0.050)	Loss 2.4091 (2.3850)	Entropy 0.95048 (0.95160)	Top-1 acc 64.844 (66.751)	Top-5 acc 87.109 (85.777)	lr 0.00734
Train [77][880/3239]	Time 0.251 (0.690)	Data Time 0.001 (0.049)	Loss 2.4242 (2.3852)	Entropy 0.95050 (0.95159)	Top-1 acc 63.672 (66.731)	Top-5 acc 86.719 (85.775)	lr 0.00733
Train [77][890/3239]	Time 2.455 (0.687)	Data Time 0.001 (0.049)	Loss 2.4381 (2.3850)	Entropy 0.95050 (0.95157)	Top-1 acc 64.062 (66.738)	Top-5 acc 83.984 (85.778)	lr 0.00733
Train [77][900/3239]	Time 0.228 (0.683)	Data Time 0.001 (0.048)	Loss 2.5693 (2.3858)	Entropy 0.95053 (0.95156)	Top-1 acc 62.109 (66.721)	Top-5 acc 81.250 (85.768)	lr 0.00733
Train [77][910/3239]	Time 0.223 (0.680)	Data Time 0.001 (0.048)	Loss 2.2283 (2.3853)	Entropy 0.95049 (0.95155)	Top-1 acc 71.484 (66.742)	Top-5 acc 86.719 (85.775)	lr 0.00733
Train [77][920/3239]	Time 0.236 (0.678)	Data Time 0.001 (0.047)	Loss 2.4201 (2.3858)	Entropy 0.95046 (0.95154)	Top-1 acc 66.016 (66.735)	Top-5 acc 84.375 (85.767)	lr 0.00733
Train [77][930/3239]	Time 0.242 (0.676)	Data Time 0.001 (0.047)	Loss 2.4145 (2.3853)	Entropy 0.95042 (0.95153)	Top-1 acc 66.797 (66.751)	Top-5 acc 85.547 (85.783)	lr 0.00733
Train [77][940/3239]	Time 0.333 (0.674)	Data Time 0.002 (0.046)	Loss 2.3260 (2.3850)	Entropy 0.95042 (0.95151)	Top-1 acc 70.703 (66.749)	Top-5 acc 84.766 (85.788)	lr 0.00733
Train [77][950/3239]	Time 0.255 (0.672)	Data Time 0.001 (0.046)	Loss 2.4474 (2.3853)	Entropy 0.95042 (0.95150)	Top-1 acc 64.844 (66.741)	Top-5 acc 83.594 (85.781)	lr 0.00733
Train [77][960/3239]	Time 0.231 (0.670)	Data Time 0.001 (0.045)	Loss 2.4538 (2.3851)	Entropy 0.95038 (0.95149)	Top-1 acc 62.109 (66.752)	Top-5 acc 85.547 (85.783)	lr 0.00733
Train [77][970/3239]	Time 0.217 (0.668)	Data Time 0.001 (0.045)	Loss 2.4784 (2.3849)	Entropy 0.95036 (0.95148)	Top-1 acc 64.844 (66.747)	Top-5 acc 84.766 (85.782)	lr 0.00733
Train [77][980/3239]	Time 0.294 (0.666)	Data Time 0.001 (0.044)	Loss 2.3841 (2.3847)	Entropy 0.95029 (0.95147)	Top-1 acc 68.359 (66.760)	Top-5 acc 84.766 (85.789)	lr 0.00733
Train [77][990/3239]	Time 0.251 (0.664)	Data Time 0.002 (0.044)	Loss 2.3175 (2.3848)	Entropy 0.95027 (0.95146)	Top-1 acc 67.578 (66.757)	Top-5 acc 86.328 (85.780)	lr 0.00732
Train [77][1000/3239]	Time 2.504 (0.662)	Data Time 0.002 (0.043)	Loss 2.5389 (2.3854)	Entropy 0.95027 (0.95145)	Top-1 acc 64.062 (66.741)	Top-5 acc 82.812 (85.770)	lr 0.00732
Train [77][1010/3239]	Time 0.249 (0.658)	Data Time 0.001 (0.043)	Loss 2.3695 (2.3853)	Entropy 0.95031 (0.95143)	Top-1 acc 67.578 (66.741)	Top-5 acc 84.375 (85.776)	lr 0.00732
Train [77][1020/3239]	Time 0.248 (0.656)	Data Time 0.001 (0.043)	Loss 2.3077 (2.3851)	Entropy 0.95032 (0.95142)	Top-1 acc 64.062 (66.741)	Top-5 acc 89.844 (85.778)	lr 0.00732
Train [77][1030/3239]	Time 0.329 (0.655)	Data Time 0.001 (0.042)	Loss 2.3731 (2.3854)	Entropy 0.95023 (0.95141)	Top-1 acc 65.625 (66.730)	Top-5 acc 85.938 (85.774)	lr 0.00732
Train [77][1040/3239]	Time 0.233 (0.653)	Data Time 0.001 (0.042)	Loss 2.2533 (2.3853)	Entropy 0.95021 (0.95140)	Top-1 acc 67.188 (66.721)	Top-5 acc 87.500 (85.779)	lr 0.00732
Train [77][1050/3239]	Time 0.240 (0.651)	Data Time 0.001 (0.041)	Loss 2.4625 (2.3851)	Entropy 0.95018 (0.95139)	Top-1 acc 67.188 (66.727)	Top-5 acc 83.984 (85.783)	lr 0.00732
Train [77][1060/3239]	Time 0.223 (0.649)	Data Time 0.001 (0.041)	Loss 2.3587 (2.3856)	Entropy 0.95018 (0.95138)	Top-1 acc 69.531 (66.719)	Top-5 acc 86.719 (85.778)	lr 0.00732
Train [77][1070/3239]	Time 0.219 (0.648)	Data Time 0.001 (0.041)	Loss 2.4865 (2.3858)	Entropy 0.95018 (0.95137)	Top-1 acc 60.156 (66.710)	Top-5 acc 85.938 (85.773)	lr 0.00732
Train [77][1080/3239]	Time 0.256 (0.646)	Data Time 0.001 (0.040)	Loss 2.5086 (2.3858)	Entropy 0.95012 (0.95136)	Top-1 acc 64.844 (66.720)	Top-5 acc 83.203 (85.771)	lr 0.00732
Train [77][1090/3239]	Time 0.222 (0.644)	Data Time 0.001 (0.040)	Loss 2.3515 (2.3860)	Entropy 0.95009 (0.95134)	Top-1 acc 65.625 (66.715)	Top-5 acc 85.547 (85.762)	lr 0.00732
Train [77][1100/3239]	Time 0.290 (0.643)	Data Time 0.001 (0.040)	Loss 2.5028 (2.3861)	Entropy 0.95011 (0.95133)	Top-1 acc 67.578 (66.707)	Top-5 acc 84.375 (85.762)	lr 0.00731
Train [77][1110/3239]	Time 2.463 (0.641)	Data Time 0.002 (0.039)	Loss 2.4638 (2.3864)	Entropy 0.95011 (0.95132)	Top-1 acc 63.281 (66.700)	Top-5 acc 85.156 (85.755)	lr 0.00731
Train [77][1120/3239]	Time 0.338 (0.638)	Data Time 0.002 (0.039)	Loss 2.4427 (2.3867)	Entropy 0.95021 (0.95131)	Top-1 acc 69.141 (66.696)	Top-5 acc 82.812 (85.740)	lr 0.00731
Train [77][1130/3239]	Time 0.235 (0.636)	Data Time 0.001 (0.039)	Loss 2.4843 (2.3871)	Entropy 0.95003 (0.95130)	Top-1 acc 62.500 (66.683)	Top-5 acc 85.156 (85.736)	lr 0.00731
Train [77][1140/3239]	Time 0.239 (0.635)	Data Time 0.002 (0.038)	Loss 2.4695 (2.3874)	Entropy 0.94998 (0.95129)	Top-1 acc 68.750 (66.679)	Top-5 acc 84.766 (85.730)	lr 0.00731
Train [77][1150/3239]	Time 0.235 (0.634)	Data Time 0.001 (0.038)	Loss 2.4105 (2.3876)	Entropy 0.94997 (0.95128)	Top-1 acc 64.453 (66.667)	Top-5 acc 85.547 (85.727)	lr 0.00731
Train [77][1160/3239]	Time 0.346 (0.632)	Data Time 0.001 (0.038)	Loss 2.3215 (2.3873)	Entropy 0.94991 (0.95127)	Top-1 acc 70.312 (66.673)	Top-5 acc 86.328 (85.732)	lr 0.00731
Train [77][1170/3239]	Time 0.412 (0.676)	Data Time 0.003 (0.037)	Loss 2.4418 (2.3877)	Entropy 0.94984 (0.95125)	Top-1 acc 66.016 (66.661)	Top-5 acc 85.938 (85.723)	lr 0.00731
Train [77][1180/3239]	Time 0.222 (0.675)	Data Time 0.002 (0.037)	Loss 2.3832 (2.3880)	Entropy 0.94976 (0.95124)	Top-1 acc 65.625 (66.648)	Top-5 acc 88.281 (85.719)	lr 0.00731
Train [77][1190/3239]	Time 0.223 (0.673)	Data Time 0.001 (0.037)	Loss 2.4847 (2.3879)	Entropy 0.94961 (0.95123)	Top-1 acc 66.797 (66.656)	Top-5 acc 83.984 (85.722)	lr 0.00731
Train [77][1200/3239]	Time 0.283 (0.671)	Data Time 0.001 (0.037)	Loss 2.4255 (2.3884)	Entropy 0.94959 (0.95122)	Top-1 acc 66.406 (66.642)	Top-5 acc 83.984 (85.709)	lr 0.00731
Train [77][1210/3239]	Time 0.242 (0.670)	Data Time 0.001 (0.036)	Loss 2.3622 (2.3886)	Entropy 0.94953 (0.95120)	Top-1 acc 64.844 (66.638)	Top-5 acc 85.547 (85.706)	lr 0.00730
Train [77][1220/3239]	Time 2.504 (0.668)	Data Time 0.001 (0.036)	Loss 2.2210 (2.3886)	Entropy 0.94953 (0.95119)	Top-1 acc 70.312 (66.644)	Top-5 acc 87.891 (85.707)	lr 0.00730
Train [77][1230/3239]	Time 0.244 (0.665)	Data Time 0.001 (0.036)	Loss 2.4278 (2.3891)	Entropy 0.94940 (0.95117)	Top-1 acc 67.188 (66.630)	Top-5 acc 84.375 (85.697)	lr 0.00730
Train [77][1240/3239]	Time 0.219 (0.663)	Data Time 0.001 (0.035)	Loss 2.3924 (2.3893)	Entropy 0.94938 (0.95116)	Top-1 acc 66.016 (66.628)	Top-5 acc 85.938 (85.690)	lr 0.00730
Train [77][1250/3239]	Time 0.339 (0.662)	Data Time 0.002 (0.035)	Loss 2.3227 (2.3891)	Entropy 0.94932 (0.95114)	Top-1 acc 66.406 (66.634)	Top-5 acc 85.547 (85.694)	lr 0.00730
Train [77][1260/3239]	Time 0.227 (0.660)	Data Time 0.001 (0.035)	Loss 2.6444 (2.3892)	Entropy 0.94934 (0.95113)	Top-1 acc 62.500 (66.635)	Top-5 acc 80.859 (85.694)	lr 0.00730
Train [77][1270/3239]	Time 0.227 (0.659)	Data Time 0.001 (0.035)	Loss 2.4718 (2.3892)	Entropy 0.94932 (0.95112)	Top-1 acc 63.672 (66.634)	Top-5 acc 84.766 (85.694)	lr 0.00730
Train [77][1280/3239]	Time 0.222 (0.657)	Data Time 0.001 (0.034)	Loss 2.5060 (2.3895)	Entropy 0.94924 (0.95110)	Top-1 acc 61.719 (66.628)	Top-5 acc 85.547 (85.698)	lr 0.00730
Train [77][1290/3239]	Time 0.241 (0.656)	Data Time 0.002 (0.034)	Loss 2.3010 (2.3894)	Entropy 0.94921 (0.95109)	Top-1 acc 66.016 (66.633)	Top-5 acc 89.062 (85.699)	lr 0.00730
Train [77][1300/3239]	Time 0.212 (0.654)	Data Time 0.001 (0.034)	Loss 2.5148 (2.3894)	Entropy 0.94917 (0.95107)	Top-1 acc 63.672 (66.630)	Top-5 acc 83.594 (85.688)	lr 0.00730
Train [77][1310/3239]	Time 0.235 (0.653)	Data Time 0.001 (0.034)	Loss 2.5057 (2.3897)	Entropy 0.94910 (0.95106)	Top-1 acc 62.109 (66.627)	Top-5 acc 86.328 (85.684)	lr 0.00730
Train [77][1320/3239]	Time 0.212 (0.652)	Data Time 0.001 (0.033)	Loss 2.3376 (2.3893)	Entropy 0.94912 (0.95104)	Top-1 acc 67.969 (66.636)	Top-5 acc 87.500 (85.696)	lr 0.00729
Train [77][1330/3239]	Time 2.457 (0.650)	Data Time 0.001 (0.033)	Loss 2.3286 (2.3896)	Entropy 0.94912 (0.95103)	Top-1 acc 66.016 (66.636)	Top-5 acc 88.281 (85.691)	lr 0.00729
Train [77][1340/3239]	Time 0.244 (0.647)	Data Time 0.002 (0.033)	Loss 2.3775 (2.3895)	Entropy 0.94891 (0.95101)	Top-1 acc 64.844 (66.644)	Top-5 acc 84.375 (85.695)	lr 0.00729
Train [77][1350/3239]	Time 0.262 (0.646)	Data Time 0.002 (0.033)	Loss 2.3445 (2.3893)	Entropy 0.94890 (0.95100)	Top-1 acc 67.188 (66.653)	Top-5 acc 87.109 (85.695)	lr 0.00729
Train [77][1360/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.032)	Loss 2.3300 (2.3893)	Entropy 0.94895 (0.95098)	Top-1 acc 68.750 (66.647)	Top-5 acc 85.156 (85.694)	lr 0.00729
Train [77][1370/3239]	Time 0.234 (0.643)	Data Time 0.001 (0.032)	Loss 2.3017 (2.3896)	Entropy 0.94894 (0.95097)	Top-1 acc 71.094 (66.640)	Top-5 acc 87.500 (85.690)	lr 0.00729
Train [77][1380/3239]	Time 0.363 (0.642)	Data Time 0.001 (0.032)	Loss 2.3707 (2.3899)	Entropy 0.94884 (0.95095)	Top-1 acc 65.625 (66.633)	Top-5 acc 87.109 (85.681)	lr 0.00729
Train [77][1390/3239]	Time 0.219 (0.641)	Data Time 0.001 (0.032)	Loss 2.2995 (2.3902)	Entropy 0.94890 (0.95094)	Top-1 acc 66.797 (66.626)	Top-5 acc 88.281 (85.675)	lr 0.00729
Train [77][1400/3239]	Time 0.226 (0.640)	Data Time 0.001 (0.032)	Loss 2.3281 (2.3905)	Entropy 0.94885 (0.95092)	Top-1 acc 69.141 (66.629)	Top-5 acc 86.328 (85.667)	lr 0.00729
Train [77][1410/3239]	Time 0.222 (0.638)	Data Time 0.001 (0.031)	Loss 2.3379 (2.3901)	Entropy 0.94884 (0.95091)	Top-1 acc 67.969 (66.635)	Top-5 acc 87.109 (85.673)	lr 0.00729
Train [77][1420/3239]	Time 0.213 (0.637)	Data Time 0.001 (0.031)	Loss 2.4776 (2.3898)	Entropy 0.94882 (0.95089)	Top-1 acc 63.281 (66.648)	Top-5 acc 84.766 (85.681)	lr 0.00729
Train [77][1430/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.031)	Loss 2.3843 (2.3897)	Entropy 0.94867 (0.95088)	Top-1 acc 65.625 (66.649)	Top-5 acc 85.156 (85.686)	lr 0.00728
Train [77][1440/3239]	Time 2.558 (0.635)	Data Time 0.001 (0.031)	Loss 2.3217 (2.3895)	Entropy 0.94867 (0.95086)	Top-1 acc 66.797 (66.654)	Top-5 acc 86.328 (85.690)	lr 0.00728
Train [77][1450/3239]	Time 0.221 (0.632)	Data Time 0.001 (0.031)	Loss 2.3598 (2.3906)	Entropy 0.94864 (0.95085)	Top-1 acc 66.016 (66.626)	Top-5 acc 87.500 (85.670)	lr 0.00728
Train [77][1460/3239]	Time 0.231 (0.631)	Data Time 0.001 (0.030)	Loss 2.2323 (2.3906)	Entropy 0.94860 (0.95083)	Top-1 acc 71.094 (66.626)	Top-5 acc 89.062 (85.666)	lr 0.00728
Train [77][1470/3239]	Time 0.324 (0.630)	Data Time 0.001 (0.030)	Loss 2.4026 (2.3910)	Entropy 0.94852 (0.95082)	Top-1 acc 67.188 (66.624)	Top-5 acc 88.281 (85.658)	lr 0.00728
Train [77][1480/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.030)	Loss 2.4192 (2.3913)	Entropy 0.94842 (0.95080)	Top-1 acc 64.844 (66.618)	Top-5 acc 83.594 (85.650)	lr 0.00728
Train [77][1490/3239]	Time 0.241 (0.628)	Data Time 0.001 (0.030)	Loss 2.2710 (2.3910)	Entropy 0.94840 (0.95079)	Top-1 acc 67.188 (66.618)	Top-5 acc 87.500 (85.657)	lr 0.00728
Train [77][1500/3239]	Time 0.226 (0.627)	Data Time 0.001 (0.030)	Loss 2.4186 (2.3911)	Entropy 0.94841 (0.95077)	Top-1 acc 66.016 (66.619)	Top-5 acc 83.984 (85.651)	lr 0.00728
Train [77][1510/3239]	Time 0.289 (0.626)	Data Time 0.001 (0.029)	Loss 2.1965 (2.3911)	Entropy 0.94840 (0.95075)	Top-1 acc 72.656 (66.620)	Top-5 acc 89.453 (85.651)	lr 0.00728
Train [77][1520/3239]	Time 0.242 (0.625)	Data Time 0.001 (0.029)	Loss 2.3798 (2.3911)	Entropy 0.94837 (0.95074)	Top-1 acc 66.406 (66.622)	Top-5 acc 84.375 (85.650)	lr 0.00728
Train [77][1530/3239]	Time 0.219 (0.660)	Data Time 0.002 (0.029)	Loss 2.4317 (2.3912)	Entropy 0.94836 (0.95072)	Top-1 acc 67.188 (66.618)	Top-5 acc 85.156 (85.650)	lr 0.00728
Train [77][1540/3239]	Time 0.234 (0.659)	Data Time 0.002 (0.029)	Loss 2.4231 (2.3914)	Entropy 0.94831 (0.95071)	Top-1 acc 66.016 (66.616)	Top-5 acc 83.594 (85.645)	lr 0.00727
Train [77][1550/3239]	Time 2.554 (0.658)	Data Time 0.002 (0.029)	Loss 2.3733 (2.3914)	Entropy 0.94831 (0.95069)	Top-1 acc 67.578 (66.618)	Top-5 acc 84.375 (85.642)	lr 0.00727
Train [77][1560/3239]	Time 0.289 (0.656)	Data Time 0.002 (0.029)	Loss 2.4362 (2.3915)	Entropy 0.94836 (0.95068)	Top-1 acc 65.625 (66.615)	Top-5 acc 84.375 (85.638)	lr 0.00727
Train [77][1570/3239]	Time 0.259 (0.655)	Data Time 0.002 (0.028)	Loss 2.2746 (2.3913)	Entropy 0.94839 (0.95066)	Top-1 acc 68.359 (66.626)	Top-5 acc 86.719 (85.636)	lr 0.00727
Train [77][1580/3239]	Time 0.369 (0.654)	Data Time 0.002 (0.028)	Loss 2.6289 (2.3918)	Entropy 0.94835 (0.95065)	Top-1 acc 58.594 (66.617)	Top-5 acc 81.250 (85.625)	lr 0.00727
Train [77][1590/3239]	Time 0.292 (0.652)	Data Time 0.002 (0.028)	Loss 2.2665 (2.3915)	Entropy 0.94832 (0.95063)	Top-1 acc 72.656 (66.626)	Top-5 acc 86.328 (85.628)	lr 0.00727
Train [77][1600/3239]	Time 0.366 (0.651)	Data Time 0.001 (0.028)	Loss 2.2672 (2.3915)	Entropy 0.94827 (0.95062)	Top-1 acc 67.188 (66.628)	Top-5 acc 87.891 (85.629)	lr 0.00727
Train [77][1610/3239]	Time 0.242 (0.650)	Data Time 0.001 (0.028)	Loss 2.4007 (2.3915)	Entropy 0.94828 (0.95060)	Top-1 acc 66.016 (66.626)	Top-5 acc 85.547 (85.630)	lr 0.00727
Train [77][1620/3239]	Time 0.340 (0.649)	Data Time 0.001 (0.028)	Loss 2.2166 (2.3914)	Entropy 0.94792 (0.95059)	Top-1 acc 73.828 (66.627)	Top-5 acc 88.672 (85.630)	lr 0.00727
Train [77][1630/3239]	Time 0.280 (0.649)	Data Time 0.001 (0.027)	Loss 2.3349 (2.3915)	Entropy 0.94792 (0.95057)	Top-1 acc 67.969 (66.623)	Top-5 acc 86.328 (85.632)	lr 0.00727
Train [77][1640/3239]	Time 0.256 (0.647)	Data Time 0.001 (0.027)	Loss 2.2670 (2.3917)	Entropy 0.94796 (0.95056)	Top-1 acc 71.484 (66.618)	Top-5 acc 88.281 (85.631)	lr 0.00726
Train [77][1650/3239]	Time 0.225 (0.646)	Data Time 0.001 (0.027)	Loss 2.4352 (2.3915)	Entropy 0.94791 (0.95054)	Top-1 acc 62.109 (66.618)	Top-5 acc 83.984 (85.637)	lr 0.00726
Train [77][1660/3239]	Time 2.412 (0.645)	Data Time 0.002 (0.027)	Loss 2.4285 (2.3918)	Entropy 0.94791 (0.95052)	Top-1 acc 66.406 (66.610)	Top-5 acc 83.594 (85.629)	lr 0.00726
Train [77][1670/3239]	Time 0.277 (0.643)	Data Time 0.002 (0.027)	Loss 2.5464 (2.3922)	Entropy 0.94787 (0.95051)	Top-1 acc 64.453 (66.600)	Top-5 acc 83.594 (85.620)	lr 0.00726
Train [77][1680/3239]	Time 0.270 (0.642)	Data Time 0.001 (0.027)	Loss 2.3892 (2.3920)	Entropy 0.94787 (0.95049)	Top-1 acc 67.578 (66.612)	Top-5 acc 87.500 (85.627)	lr 0.00726
Train [77][1690/3239]	Time 0.314 (0.641)	Data Time 0.001 (0.026)	Loss 2.4110 (2.3918)	Entropy 0.94781 (0.95048)	Top-1 acc 67.578 (66.619)	Top-5 acc 85.547 (85.628)	lr 0.00726
Train [77][1700/3239]	Time 0.223 (0.640)	Data Time 0.001 (0.026)	Loss 2.3432 (2.3922)	Entropy 0.94778 (0.95046)	Top-1 acc 70.312 (66.610)	Top-5 acc 89.453 (85.623)	lr 0.00726
Train [77][1710/3239]	Time 0.226 (0.639)	Data Time 0.001 (0.026)	Loss 2.3641 (2.3924)	Entropy 0.94774 (0.95045)	Top-1 acc 68.359 (66.606)	Top-5 acc 85.156 (85.621)	lr 0.00726
Train [77][1720/3239]	Time 0.276 (0.638)	Data Time 0.001 (0.026)	Loss 2.4783 (2.3930)	Entropy 0.94776 (0.95043)	Top-1 acc 64.844 (66.590)	Top-5 acc 82.422 (85.612)	lr 0.00726
Train [77][1730/3239]	Time 0.302 (0.637)	Data Time 0.001 (0.026)	Loss 2.3484 (2.3928)	Entropy 0.94771 (0.95041)	Top-1 acc 67.188 (66.601)	Top-5 acc 88.672 (85.616)	lr 0.00726
Train [77][1740/3239]	Time 0.317 (0.637)	Data Time 0.037 (0.026)	Loss 2.2337 (2.3927)	Entropy 0.94765 (0.95040)	Top-1 acc 65.625 (66.601)	Top-5 acc 90.625 (85.618)	lr 0.00726
Train [77][1750/3239]	Time 0.246 (0.636)	Data Time 0.002 (0.026)	Loss 2.3981 (2.3926)	Entropy 0.94760 (0.95038)	Top-1 acc 64.844 (66.604)	Top-5 acc 84.766 (85.619)	lr 0.00725
Train [77][1760/3239]	Time 0.284 (0.635)	Data Time 0.002 (0.026)	Loss 2.4719 (2.3927)	Entropy 0.94762 (0.95037)	Top-1 acc 63.281 (66.603)	Top-5 acc 83.203 (85.619)	lr 0.00725
Train [77][1770/3239]	Time 2.427 (0.634)	Data Time 0.001 (0.025)	Loss 2.5010 (2.3930)	Entropy 0.94762 (0.95035)	Top-1 acc 67.969 (66.601)	Top-5 acc 83.203 (85.612)	lr 0.00725
Train [77][1780/3239]	Time 0.349 (0.632)	Data Time 0.001 (0.025)	Loss 2.5104 (2.3930)	Entropy 0.94760 (0.95034)	Top-1 acc 62.500 (66.597)	Top-5 acc 83.594 (85.610)	lr 0.00725
Train [77][1790/3239]	Time 0.239 (0.631)	Data Time 0.001 (0.025)	Loss 2.3468 (2.3928)	Entropy 0.94756 (0.95032)	Top-1 acc 64.844 (66.608)	Top-5 acc 85.156 (85.614)	lr 0.00725
Train [77][1800/3239]	Time 0.232 (0.630)	Data Time 0.002 (0.025)	Loss 2.2823 (2.3923)	Entropy 0.94746 (0.95031)	Top-1 acc 67.969 (66.619)	Top-5 acc 87.891 (85.623)	lr 0.00725
Train [77][1810/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.025)	Loss 2.2138 (2.3923)	Entropy 0.94743 (0.95029)	Top-1 acc 69.141 (66.620)	Top-5 acc 89.844 (85.622)	lr 0.00725
Train [77][1820/3239]	Time 0.289 (0.629)	Data Time 0.003 (0.025)	Loss 2.2907 (2.3918)	Entropy 0.94743 (0.95027)	Top-1 acc 71.094 (66.631)	Top-5 acc 87.109 (85.628)	lr 0.00725
Train [77][1830/3239]	Time 0.263 (0.628)	Data Time 0.001 (0.025)	Loss 2.2487 (2.3917)	Entropy 0.94734 (0.95026)	Top-1 acc 70.312 (66.636)	Top-5 acc 88.672 (85.633)	lr 0.00725
Train [77][1840/3239]	Time 0.258 (0.627)	Data Time 0.001 (0.024)	Loss 2.2580 (2.3918)	Entropy 0.94732 (0.95024)	Top-1 acc 68.750 (66.631)	Top-5 acc 87.500 (85.628)	lr 0.00725
Train [77][1850/3239]	Time 0.271 (0.626)	Data Time 0.001 (0.024)	Loss 2.2832 (2.3916)	Entropy 0.94731 (0.95023)	Top-1 acc 67.578 (66.631)	Top-5 acc 87.500 (85.629)	lr 0.00725
Train [77][1860/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.024)	Loss 2.3330 (2.3916)	Entropy 0.94723 (0.95021)	Top-1 acc 64.844 (66.634)	Top-5 acc 86.328 (85.630)	lr 0.00724
Train [77][1870/3239]	Time 0.354 (0.625)	Data Time 0.001 (0.024)	Loss 2.2280 (2.3915)	Entropy 0.94719 (0.95019)	Top-1 acc 67.969 (66.632)	Top-5 acc 87.891 (85.629)	lr 0.00724
Train [77][1880/3239]	Time 2.557 (0.624)	Data Time 0.001 (0.024)	Loss 2.2965 (2.3917)	Entropy 0.94719 (0.95018)	Top-1 acc 69.531 (66.624)	Top-5 acc 85.938 (85.625)	lr 0.00724
Train [77][1890/3239]	Time 0.242 (0.622)	Data Time 0.001 (0.024)	Loss 2.3019 (2.3916)	Entropy 0.94716 (0.95016)	Top-1 acc 66.797 (66.627)	Top-5 acc 85.938 (85.627)	lr 0.00724
Train [77][1900/3239]	Time 0.265 (0.649)	Data Time 0.003 (0.024)	Loss 2.5179 (2.3919)	Entropy 0.94723 (0.95015)	Top-1 acc 63.281 (66.619)	Top-5 acc 82.812 (85.623)	lr 0.00724
Train [77][1910/3239]	Time 0.334 (0.648)	Data Time 0.002 (0.024)	Loss 2.2809 (2.3916)	Entropy 0.94716 (0.95013)	Top-1 acc 67.188 (66.631)	Top-5 acc 90.234 (85.630)	lr 0.00724
Train [77][1920/3239]	Time 0.226 (0.647)	Data Time 0.002 (0.024)	Loss 2.5141 (2.3920)	Entropy 0.94704 (0.95012)	Top-1 acc 60.938 (66.626)	Top-5 acc 83.984 (85.622)	lr 0.00724
Train [77][1930/3239]	Time 0.236 (0.646)	Data Time 0.002 (0.023)	Loss 2.3970 (2.3919)	Entropy 0.94704 (0.95010)	Top-1 acc 63.281 (66.628)	Top-5 acc 85.156 (85.622)	lr 0.00724
Train [77][1940/3239]	Time 0.279 (0.645)	Data Time 0.001 (0.023)	Loss 2.4582 (2.3922)	Entropy 0.94702 (0.95008)	Top-1 acc 65.625 (66.622)	Top-5 acc 82.812 (85.619)	lr 0.00724
Train [77][1950/3239]	Time 0.280 (0.645)	Data Time 0.001 (0.023)	Loss 2.2974 (2.3921)	Entropy 0.94700 (0.95007)	Top-1 acc 69.922 (66.627)	Top-5 acc 85.547 (85.619)	lr 0.00724
Train [77][1960/3239]	Time 0.271 (0.644)	Data Time 0.001 (0.023)	Loss 2.5073 (2.3922)	Entropy 0.94702 (0.95005)	Top-1 acc 62.500 (66.624)	Top-5 acc 86.328 (85.620)	lr 0.00724
Train [77][1970/3239]	Time 0.262 (0.643)	Data Time 0.001 (0.023)	Loss 2.4298 (2.3920)	Entropy 0.94696 (0.95004)	Top-1 acc 67.969 (66.633)	Top-5 acc 85.547 (85.622)	lr 0.00723
Train [77][1980/3239]	Time 0.274 (0.642)	Data Time 0.001 (0.023)	Loss 2.4212 (2.3922)	Entropy 0.94687 (0.95002)	Top-1 acc 64.062 (66.627)	Top-5 acc 86.328 (85.624)	lr 0.00723
Train [77][1990/3239]	Time 2.668 (0.642)	Data Time 0.001 (0.023)	Loss 2.4596 (2.3922)	Entropy 0.94687 (0.95001)	Top-1 acc 67.188 (66.620)	Top-5 acc 82.812 (85.622)	lr 0.00723
Train [77][2000/3239]	Time 0.348 (0.640)	Data Time 0.001 (0.023)	Loss 2.1458 (2.3924)	Entropy 0.94689 (0.94999)	Top-1 acc 73.047 (66.612)	Top-5 acc 91.406 (85.619)	lr 0.00723
Train [77][2010/3239]	Time 0.227 (0.639)	Data Time 0.001 (0.023)	Loss 2.5102 (2.3923)	Entropy 0.94670 (0.94997)	Top-1 acc 64.453 (66.613)	Top-5 acc 82.812 (85.623)	lr 0.00723
Train [77][2020/3239]	Time 0.242 (0.638)	Data Time 0.001 (0.022)	Loss 2.2499 (2.3922)	Entropy 0.94662 (0.94996)	Top-1 acc 70.703 (66.619)	Top-5 acc 87.500 (85.622)	lr 0.00723
Train [77][2030/3239]	Time 0.223 (0.637)	Data Time 0.001 (0.022)	Loss 2.2988 (2.3923)	Entropy 0.94655 (0.94994)	Top-1 acc 68.359 (66.616)	Top-5 acc 88.281 (85.622)	lr 0.00723
Train [77][2040/3239]	Time 0.256 (0.636)	Data Time 0.002 (0.022)	Loss 2.3589 (2.3924)	Entropy 0.94651 (0.94992)	Top-1 acc 64.844 (66.609)	Top-5 acc 86.328 (85.621)	lr 0.00723
Train [77][2050/3239]	Time 0.246 (0.636)	Data Time 0.001 (0.022)	Loss 2.4258 (2.3923)	Entropy 0.94645 (0.94991)	Top-1 acc 67.969 (66.605)	Top-5 acc 84.375 (85.621)	lr 0.00723
Train [77][2060/3239]	Time 0.223 (0.635)	Data Time 0.001 (0.022)	Loss 3.7126 (2.3928)	Entropy 0.94639 (0.94989)	Top-1 acc 35.938 (66.593)	Top-5 acc 64.844 (85.614)	lr 0.00723
Train [77][2070/3239]	Time 0.240 (0.634)	Data Time 0.001 (0.022)	Loss 2.5380 (2.3930)	Entropy 0.94641 (0.94987)	Top-1 acc 62.891 (66.588)	Top-5 acc 82.812 (85.609)	lr 0.00723
Train [77][2080/3239]	Time 0.285 (0.633)	Data Time 0.001 (0.022)	Loss 2.4099 (2.3930)	Entropy 0.94623 (0.94986)	Top-1 acc 66.016 (66.587)	Top-5 acc 86.328 (85.607)	lr 0.00722
Train [77][2090/3239]	Time 0.252 (0.633)	Data Time 0.001 (0.022)	Loss 2.4620 (2.3930)	Entropy 0.94620 (0.94984)	Top-1 acc 65.625 (66.588)	Top-5 acc 84.766 (85.608)	lr 0.00722
Train [77][2100/3239]	Time 2.451 (0.632)	Data Time 0.001 (0.022)	Loss 2.5751 (2.3932)	Entropy 0.94620 (0.94982)	Top-1 acc 58.203 (66.578)	Top-5 acc 82.812 (85.605)	lr 0.00722
Train [77][2110/3239]	Time 0.237 (0.630)	Data Time 0.001 (0.022)	Loss 2.3512 (2.3932)	Entropy 0.94612 (0.94980)	Top-1 acc 65.625 (66.574)	Top-5 acc 88.672 (85.607)	lr 0.00722
Train [77][2120/3239]	Time 0.249 (0.629)	Data Time 0.001 (0.021)	Loss 2.3997 (2.3933)	Entropy 0.94611 (0.94979)	Top-1 acc 68.750 (66.574)	Top-5 acc 83.203 (85.602)	lr 0.00722
Train [77][2130/3239]	Time 0.325 (0.629)	Data Time 0.001 (0.021)	Loss 2.3529 (2.3933)	Entropy 0.94610 (0.94977)	Top-1 acc 64.844 (66.573)	Top-5 acc 85.547 (85.603)	lr 0.00722
Train [77][2140/3239]	Time 0.229 (0.628)	Data Time 0.001 (0.021)	Loss 2.4338 (2.3935)	Entropy 0.94606 (0.94975)	Top-1 acc 66.016 (66.569)	Top-5 acc 83.984 (85.594)	lr 0.00722
Train [77][2150/3239]	Time 0.236 (0.627)	Data Time 0.003 (0.021)	Loss 2.4509 (2.3933)	Entropy 0.94597 (0.94973)	Top-1 acc 64.453 (66.573)	Top-5 acc 85.156 (85.599)	lr 0.00722
Train [77][2160/3239]	Time 0.230 (0.626)	Data Time 0.001 (0.021)	Loss 2.3364 (2.3934)	Entropy 0.94595 (0.94972)	Top-1 acc 68.359 (66.574)	Top-5 acc 83.984 (85.596)	lr 0.00722
Train [77][2170/3239]	Time 0.307 (0.626)	Data Time 0.001 (0.021)	Loss 2.5032 (2.3934)	Entropy 0.94597 (0.94970)	Top-1 acc 65.625 (66.570)	Top-5 acc 82.422 (85.596)	lr 0.00722
Train [77][2180/3239]	Time 0.273 (0.625)	Data Time 0.002 (0.021)	Loss 2.2777 (2.3932)	Entropy 0.94586 (0.94968)	Top-1 acc 67.969 (66.576)	Top-5 acc 89.062 (85.600)	lr 0.00722
Train [77][2190/3239]	Time 0.243 (0.624)	Data Time 0.002 (0.021)	Loss 2.4348 (2.3933)	Entropy 0.94585 (0.94967)	Top-1 acc 64.844 (66.577)	Top-5 acc 84.375 (85.594)	lr 0.00721
Train [77][2200/3239]	Time 0.259 (0.624)	Data Time 0.001 (0.021)	Loss 2.2954 (2.3933)	Entropy 0.94579 (0.94965)	Top-1 acc 69.141 (66.578)	Top-5 acc 87.109 (85.594)	lr 0.00721
Train [77][2210/3239]	Time 2.380 (0.623)	Data Time 0.001 (0.021)	Loss 2.4505 (2.3933)	Entropy 0.94579 (0.94963)	Top-1 acc 66.016 (66.578)	Top-5 acc 85.547 (85.594)	lr 0.00721
Train [77][2220/3239]	Time 0.321 (0.621)	Data Time 0.001 (0.021)	Loss 2.2608 (2.3930)	Entropy 0.94578 (0.94961)	Top-1 acc 70.312 (66.583)	Top-5 acc 87.891 (85.595)	lr 0.00721
Train [77][2230/3239]	Time 0.224 (0.621)	Data Time 0.001 (0.021)	Loss 2.4536 (2.3930)	Entropy 0.94580 (0.94960)	Top-1 acc 63.672 (66.583)	Top-5 acc 85.547 (85.599)	lr 0.00721
Train [77][2240/3239]	Time 0.243 (0.620)	Data Time 0.001 (0.020)	Loss 2.4090 (2.3932)	Entropy 0.94608 (0.94958)	Top-1 acc 67.969 (66.580)	Top-5 acc 86.719 (85.596)	lr 0.00721
Train [77][2250/3239]	Time 0.254 (0.619)	Data Time 0.002 (0.020)	Loss 2.5131 (2.3933)	Entropy 0.94605 (0.94956)	Top-1 acc 64.453 (66.580)	Top-5 acc 85.156 (85.594)	lr 0.00721
Train [77][2260/3239]	Time 0.413 (0.641)	Data Time 0.002 (0.020)	Loss 2.2684 (2.3933)	Entropy 0.94604 (0.94955)	Top-1 acc 72.266 (66.579)	Top-5 acc 87.500 (85.594)	lr 0.00721
Train [77][2270/3239]	Time 0.228 (0.641)	Data Time 0.002 (0.020)	Loss 2.3105 (2.3933)	Entropy 0.94609 (0.94953)	Top-1 acc 67.578 (66.576)	Top-5 acc 85.938 (85.592)	lr 0.00721
Train [77][2280/3239]	Time 0.245 (0.640)	Data Time 0.002 (0.020)	Loss 2.3625 (2.3933)	Entropy 0.94599 (0.94952)	Top-1 acc 67.188 (66.575)	Top-5 acc 88.672 (85.595)	lr 0.00721
Train [77][2290/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.020)	Loss 2.5205 (2.3932)	Entropy 0.94596 (0.94950)	Top-1 acc 63.672 (66.578)	Top-5 acc 83.984 (85.595)	lr 0.00721
Train [77][2300/3239]	Time 0.229 (0.639)	Data Time 0.001 (0.020)	Loss 2.4623 (2.3933)	Entropy 0.94595 (0.94949)	Top-1 acc 63.281 (66.573)	Top-5 acc 83.984 (85.596)	lr 0.00720
Train [77][2310/3239]	Time 0.225 (0.638)	Data Time 0.001 (0.020)	Loss 2.4768 (2.3933)	Entropy 0.94597 (0.94947)	Top-1 acc 62.109 (66.568)	Top-5 acc 85.938 (85.597)	lr 0.00720
Train [77][2320/3239]	Time 2.415 (0.637)	Data Time 0.001 (0.020)	Loss 2.2875 (2.3930)	Entropy 0.94597 (0.94946)	Top-1 acc 71.094 (66.577)	Top-5 acc 87.109 (85.602)	lr 0.00720
Train [77][2330/3239]	Time 0.240 (0.636)	Data Time 0.001 (0.020)	Loss 2.2896 (2.3930)	Entropy 0.94596 (0.94944)	Top-1 acc 69.141 (66.580)	Top-5 acc 85.156 (85.601)	lr 0.00720
Train [77][2340/3239]	Time 0.254 (0.635)	Data Time 0.002 (0.020)	Loss 2.4847 (2.3927)	Entropy 0.94582 (0.94943)	Top-1 acc 64.062 (66.586)	Top-5 acc 83.984 (85.606)	lr 0.00720
Train [77][2350/3239]	Time 0.325 (0.634)	Data Time 0.001 (0.020)	Loss 2.4145 (2.3927)	Entropy 0.94579 (0.94941)	Top-1 acc 67.188 (66.589)	Top-5 acc 86.328 (85.608)	lr 0.00720
Train [77][2360/3239]	Time 0.252 (0.634)	Data Time 0.002 (0.019)	Loss 2.3113 (2.3926)	Entropy 0.94573 (0.94940)	Top-1 acc 67.969 (66.588)	Top-5 acc 86.719 (85.610)	lr 0.00720
Train [77][2370/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.019)	Loss 2.3456 (2.3922)	Entropy 0.94573 (0.94938)	Top-1 acc 67.969 (66.594)	Top-5 acc 87.500 (85.618)	lr 0.00720
Train [77][2380/3239]	Time 0.226 (0.632)	Data Time 0.001 (0.019)	Loss 2.3419 (2.3920)	Entropy 0.94569 (0.94936)	Top-1 acc 67.578 (66.601)	Top-5 acc 85.156 (85.619)	lr 0.00720
Train [77][2390/3239]	Time 0.332 (0.632)	Data Time 0.001 (0.019)	Loss 2.3555 (2.3922)	Entropy 0.94569 (0.94935)	Top-1 acc 68.750 (66.594)	Top-5 acc 86.328 (85.617)	lr 0.00720
Train [77][2400/3239]	Time 0.296 (0.631)	Data Time 0.001 (0.019)	Loss 2.5226 (2.3923)	Entropy 0.94562 (0.94933)	Top-1 acc 59.766 (66.593)	Top-5 acc 82.031 (85.615)	lr 0.00720
Train [77][2410/3239]	Time 0.291 (0.631)	Data Time 0.002 (0.019)	Loss 2.3378 (2.3924)	Entropy 0.94558 (0.94932)	Top-1 acc 67.188 (66.592)	Top-5 acc 87.500 (85.612)	lr 0.00719
Train [77][2420/3239]	Time 0.289 (0.630)	Data Time 0.001 (0.019)	Loss 2.4324 (2.3926)	Entropy 0.94557 (0.94930)	Top-1 acc 65.234 (66.586)	Top-5 acc 85.547 (85.608)	lr 0.00719
Train [77][2430/3239]	Time 2.483 (0.629)	Data Time 0.001 (0.019)	Loss 2.6226 (2.3926)	Entropy 0.94557 (0.94929)	Top-1 acc 60.156 (66.587)	Top-5 acc 82.031 (85.606)	lr 0.00719
Train [77][2440/3239]	Time 0.234 (0.628)	Data Time 0.001 (0.019)	Loss 2.3699 (2.3926)	Entropy 0.94552 (0.94927)	Top-1 acc 67.578 (66.585)	Top-5 acc 84.766 (85.608)	lr 0.00719
Train [77][2450/3239]	Time 0.251 (0.627)	Data Time 0.002 (0.019)	Loss 2.2250 (2.3927)	Entropy 0.94554 (0.94926)	Top-1 acc 67.188 (66.582)	Top-5 acc 89.453 (85.608)	lr 0.00719
Train [77][2460/3239]	Time 0.219 (0.626)	Data Time 0.001 (0.019)	Loss 2.5275 (2.3927)	Entropy 0.94550 (0.94924)	Top-1 acc 64.062 (66.580)	Top-5 acc 82.812 (85.610)	lr 0.00719
Train [77][2470/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.019)	Loss 2.4120 (2.3927)	Entropy 0.94538 (0.94923)	Top-1 acc 65.234 (66.583)	Top-5 acc 85.938 (85.613)	lr 0.00719
Train [77][2480/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.019)	Loss 2.4916 (2.3928)	Entropy 0.94536 (0.94921)	Top-1 acc 62.891 (66.582)	Top-5 acc 85.156 (85.612)	lr 0.00719
Train [77][2490/3239]	Time 0.224 (0.625)	Data Time 0.001 (0.019)	Loss 2.4553 (2.3928)	Entropy 0.94539 (0.94920)	Top-1 acc 64.453 (66.581)	Top-5 acc 86.719 (85.614)	lr 0.00719
Train [77][2500/3239]	Time 0.216 (0.624)	Data Time 0.001 (0.018)	Loss 2.3881 (2.3927)	Entropy 0.94542 (0.94918)	Top-1 acc 64.453 (66.583)	Top-5 acc 86.719 (85.614)	lr 0.00719
Train [77][2510/3239]	Time 0.219 (0.623)	Data Time 0.001 (0.018)	Loss 2.4403 (2.3928)	Entropy 0.94538 (0.94917)	Top-1 acc 64.844 (66.582)	Top-5 acc 83.594 (85.612)	lr 0.00719
Train [77][2520/3239]	Time 0.380 (0.623)	Data Time 0.001 (0.018)	Loss 2.3735 (2.3928)	Entropy 0.94533 (0.94915)	Top-1 acc 66.406 (66.584)	Top-5 acc 86.328 (85.613)	lr 0.00718
Train [77][2530/3239]	Time 0.220 (0.622)	Data Time 0.001 (0.018)	Loss 2.5335 (2.3928)	Entropy 0.94521 (0.94914)	Top-1 acc 61.328 (66.581)	Top-5 acc 83.203 (85.612)	lr 0.00718
Train [77][2540/3239]	Time 2.466 (0.622)	Data Time 0.002 (0.018)	Loss 2.4385 (2.3927)	Entropy 0.94521 (0.94912)	Top-1 acc 64.844 (66.584)	Top-5 acc 85.547 (85.615)	lr 0.00718
Train [77][2550/3239]	Time 0.213 (0.620)	Data Time 0.001 (0.018)	Loss 2.3535 (2.3926)	Entropy 0.94520 (0.94910)	Top-1 acc 65.234 (66.586)	Top-5 acc 86.328 (85.616)	lr 0.00718
Train [77][2560/3239]	Time 0.244 (0.619)	Data Time 0.001 (0.018)	Loss 2.2929 (2.3925)	Entropy 0.94523 (0.94909)	Top-1 acc 71.484 (66.588)	Top-5 acc 87.500 (85.618)	lr 0.00718
Train [77][2570/3239]	Time 0.217 (0.619)	Data Time 0.001 (0.018)	Loss 2.4021 (2.3924)	Entropy 0.94524 (0.94907)	Top-1 acc 67.969 (66.594)	Top-5 acc 85.938 (85.619)	lr 0.00718
Train [77][2580/3239]	Time 0.259 (0.618)	Data Time 0.001 (0.018)	Loss 2.5021 (2.3926)	Entropy 0.94524 (0.94906)	Top-1 acc 64.062 (66.592)	Top-5 acc 82.812 (85.615)	lr 0.00718
Train [77][2590/3239]	Time 0.229 (0.618)	Data Time 0.001 (0.018)	Loss 2.2979 (2.3926)	Entropy 0.94524 (0.94904)	Top-1 acc 69.922 (66.590)	Top-5 acc 87.891 (85.618)	lr 0.00718
Train [77][2600/3239]	Time 0.227 (0.617)	Data Time 0.001 (0.018)	Loss 2.3918 (2.3926)	Entropy 0.94515 (0.94903)	Top-1 acc 64.844 (66.586)	Top-5 acc 86.719 (85.617)	lr 0.00718
Train [77][2610/3239]	Time 0.389 (0.617)	Data Time 0.002 (0.018)	Loss 2.4774 (2.3928)	Entropy 0.94516 (0.94901)	Top-1 acc 64.062 (66.581)	Top-5 acc 84.766 (85.617)	lr 0.00718
Train [77][2620/3239]	Time 0.236 (0.637)	Data Time 0.002 (0.018)	Loss 2.3690 (2.3929)	Entropy 0.94514 (0.94900)	Top-1 acc 64.844 (66.580)	Top-5 acc 86.328 (85.615)	lr 0.00718
Train [77][2630/3239]	Time 0.293 (0.636)	Data Time 0.002 (0.018)	Loss 2.3916 (2.3929)	Entropy 0.94512 (0.94899)	Top-1 acc 69.141 (66.582)	Top-5 acc 84.375 (85.613)	lr 0.00717
Train [77][2640/3239]	Time 0.271 (0.636)	Data Time 0.002 (0.018)	Loss 2.4773 (2.3931)	Entropy 0.94508 (0.94897)	Top-1 acc 60.938 (66.574)	Top-5 acc 85.156 (85.612)	lr 0.00717
Train [77][2650/3239]	Time 0.251 (0.635)	Data Time 0.001 (0.018)	Loss 2.4842 (2.3931)	Entropy 0.94509 (0.94896)	Top-1 acc 60.156 (66.573)	Top-5 acc 84.766 (85.611)	lr 0.00717
Train [77][2660/3239]	Time 0.269 (0.635)	Data Time 0.001 (0.017)	Loss 2.4298 (2.3932)	Entropy 0.94513 (0.94894)	Top-1 acc 64.453 (66.571)	Top-5 acc 84.766 (85.609)	lr 0.00717
Train [77][2670/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.017)	Loss 2.4053 (2.3931)	Entropy 0.94507 (0.94893)	Top-1 acc 62.500 (66.571)	Top-5 acc 86.328 (85.612)	lr 0.00717
Train [77][2680/3239]	Time 0.238 (0.633)	Data Time 0.001 (0.017)	Loss 2.3463 (2.3929)	Entropy 0.94508 (0.94891)	Top-1 acc 69.531 (66.577)	Top-5 acc 87.109 (85.614)	lr 0.00717
Train [77][2690/3239]	Time 0.263 (0.633)	Data Time 0.001 (0.017)	Loss 2.2479 (2.3929)	Entropy 0.94513 (0.94890)	Top-1 acc 70.312 (66.576)	Top-5 acc 89.844 (85.615)	lr 0.00717
Train [77][2700/3239]	Time 0.372 (0.632)	Data Time 0.001 (0.017)	Loss 2.4642 (2.3930)	Entropy 0.94514 (0.94888)	Top-1 acc 65.625 (66.572)	Top-5 acc 83.984 (85.613)	lr 0.00717
Train [77][2710/3239]	Time 0.236 (0.632)	Data Time 0.001 (0.017)	Loss 2.3281 (2.3930)	Entropy 0.94511 (0.94887)	Top-1 acc 71.094 (66.573)	Top-5 acc 86.328 (85.615)	lr 0.00717
Train [77][2720/3239]	Time 0.291 (0.631)	Data Time 0.001 (0.017)	Loss 2.3968 (2.3930)	Entropy 0.94504 (0.94886)	Top-1 acc 65.234 (66.570)	Top-5 acc 87.109 (85.616)	lr 0.00717
Train [77][2730/3239]	Time 0.235 (0.631)	Data Time 0.001 (0.017)	Loss 2.3499 (2.3931)	Entropy 0.94504 (0.94884)	Top-1 acc 64.844 (66.566)	Top-5 acc 87.500 (85.615)	lr 0.00717
Train [77][2740/3239]	Time 0.330 (0.630)	Data Time 0.002 (0.017)	Loss 2.3476 (2.3933)	Entropy 0.94503 (0.94883)	Top-1 acc 69.141 (66.561)	Top-5 acc 87.109 (85.610)	lr 0.00716
Train [77][2750/3239]	Time 0.285 (0.630)	Data Time 0.001 (0.017)	Loss 2.4253 (2.3933)	Entropy 0.94497 (0.94882)	Top-1 acc 64.062 (66.559)	Top-5 acc 85.938 (85.610)	lr 0.00716
Train [77][2760/3239]	Time 0.273 (0.629)	Data Time 0.001 (0.017)	Loss 2.3370 (2.3935)	Entropy 0.94495 (0.94880)	Top-1 acc 65.234 (66.557)	Top-5 acc 88.672 (85.609)	lr 0.00716
Train [77][2770/3239]	Time 0.253 (0.629)	Data Time 0.001 (0.017)	Loss 2.3520 (2.3937)	Entropy 0.94497 (0.94879)	Top-1 acc 64.453 (66.551)	Top-5 acc 87.109 (85.605)	lr 0.00716
Train [77][2780/3239]	Time 0.259 (0.628)	Data Time 0.001 (0.017)	Loss 2.4816 (2.3938)	Entropy 0.94496 (0.94877)	Top-1 acc 66.016 (66.549)	Top-5 acc 83.984 (85.606)	lr 0.00716
Train [77][2790/3239]	Time 0.264 (0.628)	Data Time 0.001 (0.017)	Loss 2.5587 (2.3939)	Entropy 0.94500 (0.94876)	Top-1 acc 60.156 (66.543)	Top-5 acc 82.812 (85.605)	lr 0.00716
Train [77][2800/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.017)	Loss 2.2203 (2.3940)	Entropy 0.94499 (0.94875)	Top-1 acc 71.484 (66.538)	Top-5 acc 87.891 (85.599)	lr 0.00716
Train [77][2810/3239]	Time 0.303 (0.627)	Data Time 0.001 (0.017)	Loss 2.4251 (2.3940)	Entropy 0.94501 (0.94873)	Top-1 acc 64.062 (66.534)	Top-5 acc 85.156 (85.602)	lr 0.00716
Train [77][2820/3239]	Time 0.259 (0.626)	Data Time 0.001 (0.017)	Loss 2.3363 (2.3940)	Entropy 0.94502 (0.94872)	Top-1 acc 67.578 (66.536)	Top-5 acc 87.109 (85.603)	lr 0.00716
Train [77][2830/3239]	Time 0.450 (0.626)	Data Time 0.001 (0.017)	Loss 2.3293 (2.3937)	Entropy 0.94510 (0.94871)	Top-1 acc 69.531 (66.539)	Top-5 acc 86.719 (85.608)	lr 0.00716
Train [77][2840/3239]	Time 0.256 (0.625)	Data Time 0.001 (0.016)	Loss 2.4590 (2.3940)	Entropy 0.94504 (0.94869)	Top-1 acc 65.234 (66.533)	Top-5 acc 84.375 (85.604)	lr 0.00716
Train [77][2850/3239]	Time 0.242 (0.625)	Data Time 0.001 (0.016)	Loss 2.2783 (2.3938)	Entropy 0.94495 (0.94868)	Top-1 acc 66.797 (66.536)	Top-5 acc 88.672 (85.608)	lr 0.00715
Train [77][2860/3239]	Time 0.286 (0.624)	Data Time 0.001 (0.016)	Loss 2.3988 (2.3939)	Entropy 0.94492 (0.94867)	Top-1 acc 64.062 (66.533)	Top-5 acc 85.547 (85.604)	lr 0.00715
Train [77][2870/3239]	Time 0.250 (0.624)	Data Time 0.001 (0.016)	Loss 2.3068 (2.3939)	Entropy 0.94493 (0.94866)	Top-1 acc 69.141 (66.532)	Top-5 acc 86.719 (85.606)	lr 0.00715
Train [77][2880/3239]	Time 0.282 (0.623)	Data Time 0.002 (0.016)	Loss 2.5256 (2.3940)	Entropy 0.94486 (0.94864)	Top-1 acc 62.891 (66.526)	Top-5 acc 84.766 (85.602)	lr 0.00715
Train [77][2890/3239]	Time 0.223 (0.622)	Data Time 0.001 (0.016)	Loss 2.2628 (2.3940)	Entropy 0.94478 (0.94863)	Top-1 acc 67.578 (66.529)	Top-5 acc 87.109 (85.601)	lr 0.00715
Train [77][2900/3239]	Time 0.264 (0.622)	Data Time 0.001 (0.016)	Loss 2.2567 (2.3941)	Entropy 0.94485 (0.94862)	Top-1 acc 71.484 (66.527)	Top-5 acc 88.281 (85.600)	lr 0.00715
Train [77][2910/3239]	Time 0.235 (0.622)	Data Time 0.001 (0.016)	Loss 2.3869 (2.3941)	Entropy 0.94479 (0.94860)	Top-1 acc 67.188 (66.526)	Top-5 acc 83.203 (85.600)	lr 0.00715
Train [77][2920/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.016)	Loss 2.5722 (2.3943)	Entropy 0.94479 (0.94859)	Top-1 acc 64.062 (66.524)	Top-5 acc 83.984 (85.596)	lr 0.00715
Train [77][2930/3239]	Time 0.224 (0.620)	Data Time 0.001 (0.016)	Loss 2.4613 (2.3943)	Entropy 0.94474 (0.94858)	Top-1 acc 64.453 (66.520)	Top-5 acc 85.547 (85.595)	lr 0.00715
Train [77][2940/3239]	Time 0.262 (0.620)	Data Time 0.001 (0.016)	Loss 2.3849 (2.3944)	Entropy 0.94466 (0.94856)	Top-1 acc 69.922 (66.519)	Top-5 acc 83.984 (85.593)	lr 0.00715
Train [77][2950/3239]	Time 0.270 (0.638)	Data Time 0.004 (0.016)	Loss 2.4305 (2.3945)	Entropy 0.94454 (0.94855)	Top-1 acc 65.625 (66.516)	Top-5 acc 83.984 (85.591)	lr 0.00715
Train [77][2960/3239]	Time 0.361 (0.637)	Data Time 0.002 (0.016)	Loss 2.3370 (2.3944)	Entropy 0.94451 (0.94854)	Top-1 acc 66.406 (66.515)	Top-5 acc 85.547 (85.590)	lr 0.00714
Train [77][2970/3239]	Time 0.229 (0.637)	Data Time 0.002 (0.016)	Loss 2.3632 (2.3947)	Entropy 0.94453 (0.94852)	Top-1 acc 65.234 (66.506)	Top-5 acc 87.500 (85.584)	lr 0.00714
Train [77][2980/3239]	Time 0.245 (0.636)	Data Time 0.001 (0.016)	Loss 2.5816 (2.3948)	Entropy 0.94455 (0.94851)	Top-1 acc 63.672 (66.506)	Top-5 acc 80.859 (85.578)	lr 0.00714
Train [77][2990/3239]	Time 0.251 (0.636)	Data Time 0.001 (0.016)	Loss 2.2645 (2.3949)	Entropy 0.94457 (0.94850)	Top-1 acc 74.609 (66.506)	Top-5 acc 87.500 (85.576)	lr 0.00714
Train [77][3000/3239]	Time 0.384 (0.635)	Data Time 0.001 (0.016)	Loss 2.2909 (2.3950)	Entropy 0.94457 (0.94848)	Top-1 acc 71.875 (66.504)	Top-5 acc 88.281 (85.573)	lr 0.00714
Train [77][3010/3239]	Time 0.282 (0.635)	Data Time 0.001 (0.016)	Loss 2.6165 (2.3950)	Entropy 0.94454 (0.94847)	Top-1 acc 62.500 (66.505)	Top-5 acc 80.469 (85.573)	lr 0.00714
Train [77][3020/3239]	Time 0.244 (0.634)	Data Time 0.001 (0.016)	Loss 2.4091 (2.3948)	Entropy 0.94453 (0.94846)	Top-1 acc 62.891 (66.510)	Top-5 acc 84.766 (85.576)	lr 0.00714
Train [77][3030/3239]	Time 0.299 (0.634)	Data Time 0.001 (0.016)	Loss 2.3857 (2.3949)	Entropy 0.94449 (0.94845)	Top-1 acc 67.188 (66.506)	Top-5 acc 85.547 (85.574)	lr 0.00714
Train [77][3040/3239]	Time 0.280 (0.633)	Data Time 0.001 (0.016)	Loss 2.3689 (2.3950)	Entropy 0.94447 (0.94843)	Top-1 acc 69.531 (66.503)	Top-5 acc 85.938 (85.573)	lr 0.00714
Train [77][3050/3239]	Time 0.228 (0.633)	Data Time 0.002 (0.015)	Loss 2.1960 (2.3951)	Entropy 0.94443 (0.94842)	Top-1 acc 74.219 (66.502)	Top-5 acc 89.453 (85.571)	lr 0.00714
Train [77][3060/3239]	Time 0.216 (0.632)	Data Time 0.001 (0.015)	Loss 2.6722 (2.3951)	Entropy 0.94444 (0.94841)	Top-1 acc 64.844 (66.503)	Top-5 acc 79.297 (85.568)	lr 0.00714
Train [77][3070/3239]	Time 0.289 (0.632)	Data Time 0.001 (0.015)	Loss 2.3379 (2.3951)	Entropy 0.94445 (0.94839)	Top-1 acc 68.750 (66.503)	Top-5 acc 87.891 (85.569)	lr 0.00713
Train [77][3080/3239]	Time 0.276 (0.631)	Data Time 0.002 (0.015)	Loss 2.5472 (2.3952)	Entropy 0.94437 (0.94838)	Top-1 acc 61.719 (66.500)	Top-5 acc 83.203 (85.568)	lr 0.00713
Train [77][3090/3239]	Time 0.254 (0.631)	Data Time 0.002 (0.015)	Loss 2.3241 (2.3953)	Entropy 0.94442 (0.94837)	Top-1 acc 69.141 (66.500)	Top-5 acc 87.109 (85.565)	lr 0.00713
Train [77][3100/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.015)	Loss 2.4023 (2.3954)	Entropy 0.94438 (0.94835)	Top-1 acc 69.922 (66.497)	Top-5 acc 84.375 (85.564)	lr 0.00713
Train [77][3110/3239]	Time 0.279 (0.630)	Data Time 0.001 (0.015)	Loss 2.3145 (2.3953)	Entropy 0.94437 (0.94834)	Top-1 acc 69.531 (66.499)	Top-5 acc 88.672 (85.564)	lr 0.00713
Train [77][3120/3239]	Time 0.227 (0.629)	Data Time 0.001 (0.015)	Loss 2.4817 (2.3955)	Entropy 0.94431 (0.94833)	Top-1 acc 64.062 (66.494)	Top-5 acc 85.156 (85.564)	lr 0.00713
Train [77][3130/3239]	Time 0.359 (0.629)	Data Time 0.001 (0.015)	Loss 2.5314 (2.3956)	Entropy 0.94428 (0.94832)	Top-1 acc 66.797 (66.495)	Top-5 acc 82.422 (85.560)	lr 0.00713
Train [77][3140/3239]	Time 0.285 (0.628)	Data Time 0.001 (0.015)	Loss 2.4723 (2.3956)	Entropy 0.94419 (0.94830)	Top-1 acc 64.453 (66.490)	Top-5 acc 84.375 (85.561)	lr 0.00713
Train [77][3150/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.015)	Loss 2.4438 (2.3956)	Entropy 0.94416 (0.94829)	Top-1 acc 65.625 (66.492)	Top-5 acc 83.594 (85.562)	lr 0.00713
Train [77][3160/3239]	Time 0.235 (0.627)	Data Time 0.001 (0.015)	Loss 2.3972 (2.3957)	Entropy 0.94414 (0.94828)	Top-1 acc 65.625 (66.490)	Top-5 acc 86.328 (85.559)	lr 0.00713
Train [77][3170/3239]	Time 0.252 (0.627)	Data Time 0.001 (0.015)	Loss 2.5295 (2.3957)	Entropy 0.94402 (0.94826)	Top-1 acc 64.062 (66.487)	Top-5 acc 82.422 (85.560)	lr 0.00712
Train [77][3180/3239]	Time 0.277 (0.626)	Data Time 0.000 (0.015)	Loss 2.4456 (2.3958)	Entropy 0.94401 (0.94825)	Top-1 acc 64.062 (66.484)	Top-5 acc 85.938 (85.560)	lr 0.00712
Train [77][3190/3239]	Time 0.243 (0.626)	Data Time 0.000 (0.015)	Loss 2.4226 (2.3959)	Entropy 0.94392 (0.94824)	Top-1 acc 60.938 (66.479)	Top-5 acc 89.453 (85.560)	lr 0.00712
Train [77][3200/3239]	Time 0.223 (0.625)	Data Time 0.000 (0.015)	Loss 2.3978 (2.3960)	Entropy 0.94392 (0.94822)	Top-1 acc 65.234 (66.476)	Top-5 acc 86.719 (85.560)	lr 0.00712
Train [77][3210/3239]	Time 0.227 (0.625)	Data Time 0.000 (0.015)	Loss 2.4141 (2.3962)	Entropy 0.94391 (0.94821)	Top-1 acc 65.625 (66.471)	Top-5 acc 84.766 (85.555)	lr 0.00712
Train [77][3220/3239]	Time 0.227 (0.624)	Data Time 0.000 (0.015)	Loss 2.2871 (2.3962)	Entropy 0.94389 (0.94820)	Top-1 acc 67.969 (66.474)	Top-5 acc 88.281 (85.555)	lr 0.00712
Train [77][3230/3239]	Time 0.230 (0.624)	Data Time 0.000 (0.015)	Loss 2.2899 (2.3963)	Entropy 0.94380 (0.94818)	Top-1 acc 67.969 (66.474)	Top-5 acc 87.891 (85.552)	lr 0.00712
Train [77][3239/3239]	Time 2.312 (0.623)	Data Time 0.000 (0.015)	Loss 2.7778 (2.3964)	Entropy 0.94380 (0.94817)	Top-1 acc 59.259 (66.472)	Top-5 acc 80.247 (85.552)	lr 0.00712
==========Valid [77/120]	loss 1.321	top-1 acc 69.872 (69.872)	top-5 acc 88.481	Train top-1 66.472	top-5 85.552	Entropy 0.94380	Latency-None: 0.000ms	Flops: 546.53M
Train [78][0/3239]	Time 42.289 (42.289)	Data Time 40.562 (40.562)	Loss 2.4524 (2.4524)	Entropy 0.94373 (0.94373)	Top-1 acc 63.672 (63.672)	Top-5 acc 82.422 (82.422)	lr 0.00712
Train [78][10/3239]	Time 2.562 (4.341)	Data Time 0.002 (3.689)	Loss 2.5494 (2.3280)	Entropy 0.94373 (0.94373)	Top-1 acc 62.891 (67.685)	Top-5 acc 84.375 (86.719)	lr 0.00712
Train [78][20/3239]	Time 0.246 (2.390)	Data Time 0.001 (1.933)	Loss 2.4703 (2.3412)	Entropy 0.94373 (0.94373)	Top-1 acc 66.406 (67.504)	Top-5 acc 85.156 (86.458)	lr 0.00712
Train [78][30/3239]	Time 0.275 (1.777)	Data Time 0.001 (1.310)	Loss 2.4472 (2.3497)	Entropy 0.94370 (0.94372)	Top-1 acc 64.844 (67.692)	Top-5 acc 83.984 (86.064)	lr 0.00712
Train [78][40/3239]	Time 0.255 (1.459)	Data Time 0.002 (0.991)	Loss 2.3793 (2.3549)	Entropy 0.94346 (0.94367)	Top-1 acc 66.016 (67.426)	Top-5 acc 86.719 (86.109)	lr 0.00711
Train [78][50/3239]	Time 0.287 (2.249)	Data Time 0.003 (0.797)	Loss 2.2740 (2.3587)	Entropy 0.94344 (0.94363)	Top-1 acc 68.750 (67.402)	Top-5 acc 89.062 (86.121)	lr 0.00711
Train [78][60/3239]	Time 0.228 (1.975)	Data Time 0.001 (0.667)	Loss 2.2751 (2.3556)	Entropy 0.94342 (0.94359)	Top-1 acc 70.312 (67.456)	Top-5 acc 89.062 (86.174)	lr 0.00711
Train [78][70/3239]	Time 0.304 (1.762)	Data Time 0.001 (0.573)	Loss 2.4326 (2.3584)	Entropy 0.94339 (0.94357)	Top-1 acc 63.672 (67.463)	Top-5 acc 83.984 (86.207)	lr 0.00711
Train [78][80/3239]	Time 0.217 (1.605)	Data Time 0.001 (0.503)	Loss 2.4915 (2.3660)	Entropy 0.94339 (0.94355)	Top-1 acc 64.453 (67.279)	Top-5 acc 84.766 (86.116)	lr 0.00711
Train [78][90/3239]	Time 0.227 (1.483)	Data Time 0.001 (0.448)	Loss 2.2670 (2.3640)	Entropy 0.94338 (0.94353)	Top-1 acc 67.969 (67.286)	Top-5 acc 87.891 (86.212)	lr 0.00711
Train [78][100/3239]	Time 0.213 (1.383)	Data Time 0.001 (0.403)	Loss 2.4050 (2.3663)	Entropy 0.94335 (0.94351)	Top-1 acc 67.188 (67.315)	Top-5 acc 84.766 (86.139)	lr 0.00711
Train [78][110/3239]	Time 0.312 (1.305)	Data Time 0.001 (0.367)	Loss 2.2516 (2.3689)	Entropy 0.94336 (0.94350)	Top-1 acc 67.578 (67.254)	Top-5 acc 89.062 (86.103)	lr 0.00711
Train [78][120/3239]	Time 2.641 (1.237)	Data Time 0.005 (0.337)	Loss 2.3504 (2.3808)	Entropy 0.94336 (0.94349)	Top-1 acc 67.578 (66.968)	Top-5 acc 86.719 (85.925)	lr 0.00711
Train [78][130/3239]	Time 0.249 (1.162)	Data Time 0.001 (0.311)	Loss 2.3423 (2.3807)	Entropy 0.94332 (0.94347)	Top-1 acc 69.531 (66.991)	Top-5 acc 86.719 (85.911)	lr 0.00711
Train [78][140/3239]	Time 0.233 (1.112)	Data Time 0.001 (0.289)	Loss 2.3960 (2.3788)	Entropy 0.94331 (0.94346)	Top-1 acc 66.797 (66.977)	Top-5 acc 85.938 (85.926)	lr 0.00711
Train [78][150/3239]	Time 0.239 (1.070)	Data Time 0.002 (0.270)	Loss 2.2460 (2.3749)	Entropy 0.94332 (0.94345)	Top-1 acc 66.797 (67.019)	Top-5 acc 90.625 (86.018)	lr 0.00710
Train [78][160/3239]	Time 0.242 (1.035)	Data Time 0.001 (0.254)	Loss 2.3499 (2.3748)	Entropy 0.94329 (0.94344)	Top-1 acc 65.625 (67.044)	Top-5 acc 85.547 (85.998)	lr 0.00710
Train [78][170/3239]	Time 0.231 (1.003)	Data Time 0.001 (0.239)	Loss 2.5328 (2.3749)	Entropy 0.94325 (0.94343)	Top-1 acc 64.844 (67.030)	Top-5 acc 82.422 (86.001)	lr 0.00710
Train [78][180/3239]	Time 0.241 (0.975)	Data Time 0.001 (0.226)	Loss 2.2105 (2.3735)	Entropy 0.94318 (0.94342)	Top-1 acc 71.875 (67.062)	Top-5 acc 89.062 (86.035)	lr 0.00710
Train [78][190/3239]	Time 0.233 (0.950)	Data Time 0.001 (0.214)	Loss 2.2681 (2.3739)	Entropy 0.94318 (0.94341)	Top-1 acc 70.703 (67.102)	Top-5 acc 88.281 (86.001)	lr 0.00710
Train [78][200/3239]	Time 0.328 (0.927)	Data Time 0.001 (0.204)	Loss 2.3225 (2.3750)	Entropy 0.94316 (0.94340)	Top-1 acc 66.016 (67.061)	Top-5 acc 88.281 (85.976)	lr 0.00710
Train [78][210/3239]	Time 0.224 (0.906)	Data Time 0.002 (0.194)	Loss 2.3751 (2.3725)	Entropy 0.94316 (0.94338)	Top-1 acc 66.797 (67.102)	Top-5 acc 84.375 (86.004)	lr 0.00710
Train [78][220/3239]	Time 0.220 (0.887)	Data Time 0.002 (0.185)	Loss 2.3962 (2.3737)	Entropy 0.94316 (0.94337)	Top-1 acc 68.359 (67.064)	Top-5 acc 84.766 (85.980)	lr 0.00710
Train [78][230/3239]	Time 2.531 (0.869)	Data Time 0.002 (0.177)	Loss 2.4876 (2.3739)	Entropy 0.94316 (0.94337)	Top-1 acc 63.672 (67.076)	Top-5 acc 83.594 (85.968)	lr 0.00710
Train [78][240/3239]	Time 0.268 (0.843)	Data Time 0.001 (0.170)	Loss 2.2381 (2.3741)	Entropy 0.94310 (0.94335)	Top-1 acc 68.750 (67.076)	Top-5 acc 88.672 (85.981)	lr 0.00710
Train [78][250/3239]	Time 0.220 (0.829)	Data Time 0.001 (0.163)	Loss 2.2423 (2.3728)	Entropy 0.94307 (0.94334)	Top-1 acc 69.922 (67.096)	Top-5 acc 87.891 (86.023)	lr 0.00710
Train [78][260/3239]	Time 0.239 (0.816)	Data Time 0.001 (0.157)	Loss 2.3606 (2.3705)	Entropy 0.94298 (0.94333)	Top-1 acc 64.453 (67.138)	Top-5 acc 85.547 (86.074)	lr 0.00709
Train [78][270/3239]	Time 0.234 (0.803)	Data Time 0.001 (0.151)	Loss 2.4441 (2.3735)	Entropy 0.94296 (0.94332)	Top-1 acc 65.625 (67.097)	Top-5 acc 84.375 (85.999)	lr 0.00709
Train [78][280/3239]	Time 0.239 (0.791)	Data Time 0.001 (0.146)	Loss 2.4789 (2.3743)	Entropy 0.94297 (0.94330)	Top-1 acc 63.672 (67.048)	Top-5 acc 83.984 (86.000)	lr 0.00709
Train [78][290/3239]	Time 0.322 (0.781)	Data Time 0.001 (0.141)	Loss 2.2494 (2.3742)	Entropy 0.94298 (0.94329)	Top-1 acc 69.922 (67.059)	Top-5 acc 89.062 (85.983)	lr 0.00709
Train [78][300/3239]	Time 0.244 (0.771)	Data Time 0.001 (0.137)	Loss 2.4267 (2.3717)	Entropy 0.94293 (0.94328)	Top-1 acc 63.672 (67.115)	Top-5 acc 84.766 (86.019)	lr 0.00709
Train [78][310/3239]	Time 0.236 (0.761)	Data Time 0.003 (0.132)	Loss 2.1905 (2.3706)	Entropy 0.94287 (0.94327)	Top-1 acc 69.922 (67.150)	Top-5 acc 89.453 (86.053)	lr 0.00709
Train [78][320/3239]	Time 0.221 (0.752)	Data Time 0.001 (0.128)	Loss 2.3631 (2.3693)	Entropy 0.94281 (0.94326)	Top-1 acc 64.844 (67.173)	Top-5 acc 82.812 (86.066)	lr 0.00709
Train [78][330/3239]	Time 0.242 (0.744)	Data Time 0.001 (0.124)	Loss 2.4577 (2.3693)	Entropy 0.94277 (0.94324)	Top-1 acc 62.500 (67.172)	Top-5 acc 84.766 (86.047)	lr 0.00709
Train [78][340/3239]	Time 2.564 (0.736)	Data Time 0.001 (0.121)	Loss 2.3281 (2.3693)	Entropy 0.94277 (0.94323)	Top-1 acc 68.359 (67.190)	Top-5 acc 87.109 (86.033)	lr 0.00709
Train [78][350/3239]	Time 0.234 (0.722)	Data Time 0.001 (0.117)	Loss 2.5375 (2.3699)	Entropy 0.94274 (0.94322)	Top-1 acc 64.453 (67.186)	Top-5 acc 85.156 (86.018)	lr 0.00709
Train [78][360/3239]	Time 0.226 (0.715)	Data Time 0.001 (0.114)	Loss 2.4828 (2.3694)	Entropy 0.94271 (0.94320)	Top-1 acc 66.016 (67.209)	Top-5 acc 83.594 (86.033)	lr 0.00709
Train [78][370/3239]	Time 0.224 (0.708)	Data Time 0.001 (0.111)	Loss 2.2284 (2.3688)	Entropy 0.94268 (0.94319)	Top-1 acc 72.266 (67.229)	Top-5 acc 87.891 (86.032)	lr 0.00708
Train [78][380/3239]	Time 0.291 (0.702)	Data Time 0.001 (0.108)	Loss 2.5275 (2.3690)	Entropy 0.94260 (0.94317)	Top-1 acc 60.547 (67.201)	Top-5 acc 83.984 (86.038)	lr 0.00708
Train [78][390/3239]	Time 0.227 (0.696)	Data Time 0.002 (0.105)	Loss 2.4036 (2.3688)	Entropy 0.94254 (0.94316)	Top-1 acc 64.453 (67.192)	Top-5 acc 82.422 (86.038)	lr 0.00708
Train [78][400/3239]	Time 0.243 (0.690)	Data Time 0.001 (0.103)	Loss 2.4012 (2.3683)	Entropy 0.94254 (0.94314)	Top-1 acc 67.188 (67.194)	Top-5 acc 86.328 (86.053)	lr 0.00708
Train [78][410/3239]	Time 0.245 (0.820)	Data Time 0.002 (0.100)	Loss 2.3394 (2.3675)	Entropy 0.94250 (0.94313)	Top-1 acc 68.359 (67.218)	Top-5 acc 86.719 (86.073)	lr 0.00708
Train [78][420/3239]	Time 0.268 (0.812)	Data Time 0.002 (0.098)	Loss 2.3114 (2.3680)	Entropy 0.94248 (0.94311)	Top-1 acc 66.406 (67.218)	Top-5 acc 89.062 (86.056)	lr 0.00708
Train [78][430/3239]	Time 0.223 (0.805)	Data Time 0.002 (0.096)	Loss 2.3529 (2.3681)	Entropy 0.94245 (0.94310)	Top-1 acc 68.750 (67.234)	Top-5 acc 85.156 (86.038)	lr 0.00708
Train [78][440/3239]	Time 0.219 (0.797)	Data Time 0.001 (0.094)	Loss 2.5379 (2.3691)	Entropy 0.94250 (0.94308)	Top-1 acc 63.281 (67.214)	Top-5 acc 82.812 (86.023)	lr 0.00708
Train [78][450/3239]	Time 2.543 (0.790)	Data Time 0.001 (0.092)	Loss 2.4244 (2.3691)	Entropy 0.94250 (0.94307)	Top-1 acc 64.453 (67.221)	Top-5 acc 86.719 (86.019)	lr 0.00708
Train [78][460/3239]	Time 0.283 (0.778)	Data Time 0.002 (0.090)	Loss 2.2092 (2.3695)	Entropy 0.94246 (0.94306)	Top-1 acc 68.750 (67.220)	Top-5 acc 89.453 (86.013)	lr 0.00708
Train [78][470/3239]	Time 0.382 (0.773)	Data Time 0.003 (0.088)	Loss 2.4760 (2.3691)	Entropy 0.94244 (0.94304)	Top-1 acc 65.234 (67.217)	Top-5 acc 83.984 (86.024)	lr 0.00708
Train [78][480/3239]	Time 0.225 (0.767)	Data Time 0.001 (0.086)	Loss 2.4367 (2.3701)	Entropy 0.94237 (0.94303)	Top-1 acc 68.359 (67.187)	Top-5 acc 82.422 (85.991)	lr 0.00707
Train [78][490/3239]	Time 0.229 (0.761)	Data Time 0.002 (0.084)	Loss 2.4822 (2.3707)	Entropy 0.94243 (0.94302)	Top-1 acc 64.062 (67.159)	Top-5 acc 82.812 (85.982)	lr 0.00707
Train [78][500/3239]	Time 0.227 (0.755)	Data Time 0.002 (0.083)	Loss 2.4598 (2.3704)	Entropy 0.94236 (0.94300)	Top-1 acc 62.891 (67.150)	Top-5 acc 84.375 (85.987)	lr 0.00707
Train [78][510/3239]	Time 0.268 (0.750)	Data Time 0.001 (0.081)	Loss 2.3648 (2.3709)	Entropy 0.94229 (0.94299)	Top-1 acc 67.188 (67.139)	Top-5 acc 85.156 (85.960)	lr 0.00707
Train [78][520/3239]	Time 0.228 (0.745)	Data Time 0.001 (0.080)	Loss 2.5219 (2.3708)	Entropy 0.94226 (0.94298)	Top-1 acc 62.891 (67.128)	Top-5 acc 82.812 (85.964)	lr 0.00707
Train [78][530/3239]	Time 0.257 (0.740)	Data Time 0.001 (0.078)	Loss 2.1429 (2.3698)	Entropy 0.94215 (0.94296)	Top-1 acc 72.656 (67.156)	Top-5 acc 90.234 (85.987)	lr 0.00707
Train [78][540/3239]	Time 0.228 (0.736)	Data Time 0.001 (0.077)	Loss 2.3086 (2.3688)	Entropy 0.94206 (0.94295)	Top-1 acc 69.531 (67.198)	Top-5 acc 89.062 (85.998)	lr 0.00707
Train [78][550/3239]	Time 0.257 (0.731)	Data Time 0.001 (0.075)	Loss 2.3883 (2.3693)	Entropy 0.94202 (0.94293)	Top-1 acc 67.969 (67.197)	Top-5 acc 85.156 (85.990)	lr 0.00707
Train [78][560/3239]	Time 2.662 (0.727)	Data Time 0.001 (0.074)	Loss 2.2959 (2.3692)	Entropy 0.94202 (0.94292)	Top-1 acc 68.359 (67.202)	Top-5 acc 85.156 (85.989)	lr 0.00707
Train [78][570/3239]	Time 0.282 (0.719)	Data Time 0.001 (0.073)	Loss 2.2414 (2.3683)	Entropy 0.94217 (0.94290)	Top-1 acc 70.703 (67.207)	Top-5 acc 89.453 (86.005)	lr 0.00707
Train [78][580/3239]	Time 0.240 (0.714)	Data Time 0.001 (0.072)	Loss 2.2224 (2.3676)	Entropy 0.94212 (0.94289)	Top-1 acc 70.312 (67.218)	Top-5 acc 87.891 (86.015)	lr 0.00707
Train [78][590/3239]	Time 0.210 (0.711)	Data Time 0.001 (0.070)	Loss 2.4247 (2.3676)	Entropy 0.94208 (0.94288)	Top-1 acc 64.844 (67.204)	Top-5 acc 85.938 (86.022)	lr 0.00706
Train [78][600/3239]	Time 0.358 (0.707)	Data Time 0.001 (0.069)	Loss 2.3563 (2.3681)	Entropy 0.94210 (0.94286)	Top-1 acc 70.312 (67.211)	Top-5 acc 85.547 (86.007)	lr 0.00706
Train [78][610/3239]	Time 0.229 (0.703)	Data Time 0.001 (0.068)	Loss 2.3720 (2.3682)	Entropy 0.94201 (0.94285)	Top-1 acc 67.969 (67.200)	Top-5 acc 86.328 (86.007)	lr 0.00706
Train [78][620/3239]	Time 0.233 (0.700)	Data Time 0.001 (0.067)	Loss 2.3975 (2.3688)	Entropy 0.94197 (0.94284)	Top-1 acc 64.844 (67.192)	Top-5 acc 84.375 (85.982)	lr 0.00706
Train [78][630/3239]	Time 0.252 (0.696)	Data Time 0.002 (0.066)	Loss 2.3568 (2.3706)	Entropy 0.94192 (0.94282)	Top-1 acc 66.797 (67.142)	Top-5 acc 85.938 (85.957)	lr 0.00706
Train [78][640/3239]	Time 0.224 (0.693)	Data Time 0.001 (0.065)	Loss 2.5746 (2.3709)	Entropy 0.94192 (0.94281)	Top-1 acc 59.375 (67.125)	Top-5 acc 84.766 (85.950)	lr 0.00706
Train [78][650/3239]	Time 0.228 (0.690)	Data Time 0.001 (0.064)	Loss 2.4706 (2.3709)	Entropy 0.94201 (0.94279)	Top-1 acc 64.844 (67.123)	Top-5 acc 84.375 (85.943)	lr 0.00706
Train [78][660/3239]	Time 0.232 (0.687)	Data Time 0.001 (0.063)	Loss 2.4370 (2.3708)	Entropy 0.94187 (0.94278)	Top-1 acc 67.969 (67.130)	Top-5 acc 84.375 (85.941)	lr 0.00706
Train [78][670/3239]	Time 2.618 (0.684)	Data Time 0.001 (0.062)	Loss 2.2916 (2.3710)	Entropy 0.94187 (0.94277)	Top-1 acc 69.531 (67.119)	Top-5 acc 87.109 (85.942)	lr 0.00706
Train [78][680/3239]	Time 0.225 (0.677)	Data Time 0.001 (0.061)	Loss 2.4302 (2.3717)	Entropy 0.94185 (0.94275)	Top-1 acc 65.234 (67.111)	Top-5 acc 83.594 (85.915)	lr 0.00706
Train [78][690/3239]	Time 0.332 (0.674)	Data Time 0.001 (0.060)	Loss 2.4576 (2.3718)	Entropy 0.94187 (0.94274)	Top-1 acc 62.500 (67.095)	Top-5 acc 85.938 (85.905)	lr 0.00706
Train [78][700/3239]	Time 0.229 (0.671)	Data Time 0.001 (0.060)	Loss 2.3347 (2.3724)	Entropy 0.94185 (0.94273)	Top-1 acc 66.797 (67.064)	Top-5 acc 86.328 (85.893)	lr 0.00705
Train [78][710/3239]	Time 0.219 (0.668)	Data Time 0.001 (0.059)	Loss 2.4493 (2.3730)	Entropy 0.94175 (0.94272)	Top-1 acc 64.062 (67.042)	Top-5 acc 85.938 (85.888)	lr 0.00705
Train [78][720/3239]	Time 0.232 (0.666)	Data Time 0.001 (0.058)	Loss 2.4563 (2.3732)	Entropy 0.94172 (0.94270)	Top-1 acc 64.453 (67.038)	Top-5 acc 84.375 (85.878)	lr 0.00705
Train [78][730/3239]	Time 0.281 (0.663)	Data Time 0.001 (0.057)	Loss 2.3586 (2.3733)	Entropy 0.94178 (0.94269)	Top-1 acc 67.578 (67.045)	Top-5 acc 85.938 (85.872)	lr 0.00705
Train [78][740/3239]	Time 0.247 (0.661)	Data Time 0.002 (0.056)	Loss 2.6075 (2.3735)	Entropy 0.94175 (0.94268)	Top-1 acc 61.719 (67.038)	Top-5 acc 79.688 (85.866)	lr 0.00705
Train [78][750/3239]	Time 0.229 (0.658)	Data Time 0.001 (0.056)	Loss 2.5157 (2.3737)	Entropy 0.94166 (0.94266)	Top-1 acc 64.062 (67.039)	Top-5 acc 83.594 (85.859)	lr 0.00705
Train [78][760/3239]	Time 0.229 (0.656)	Data Time 0.001 (0.055)	Loss 2.2608 (2.3737)	Entropy 0.94168 (0.94265)	Top-1 acc 69.922 (67.032)	Top-5 acc 87.891 (85.863)	lr 0.00705
Train [78][770/3239]	Time 0.259 (0.726)	Data Time 0.003 (0.054)	Loss 2.2564 (2.3742)	Entropy 0.94161 (0.94264)	Top-1 acc 71.094 (67.017)	Top-5 acc 85.938 (85.840)	lr 0.00705
Train [78][780/3239]	Time 2.536 (0.723)	Data Time 0.002 (0.054)	Loss 2.4083 (2.3740)	Entropy 0.94161 (0.94263)	Top-1 acc 67.578 (67.020)	Top-5 acc 85.547 (85.850)	lr 0.00705
Train [78][790/3239]	Time 0.230 (0.717)	Data Time 0.002 (0.053)	Loss 2.3419 (2.3739)	Entropy 0.94160 (0.94261)	Top-1 acc 67.578 (67.017)	Top-5 acc 85.938 (85.858)	lr 0.00705
Train [78][800/3239]	Time 0.238 (0.714)	Data Time 0.001 (0.052)	Loss 2.6174 (2.3741)	Entropy 0.94156 (0.94260)	Top-1 acc 58.984 (67.008)	Top-5 acc 82.422 (85.855)	lr 0.00705
Train [78][810/3239]	Time 0.226 (0.711)	Data Time 0.001 (0.052)	Loss 2.1737 (2.3739)	Entropy 0.94136 (0.94258)	Top-1 acc 70.312 (67.019)	Top-5 acc 89.453 (85.862)	lr 0.00704
Train [78][820/3239]	Time 0.329 (0.708)	Data Time 0.001 (0.051)	Loss 2.4773 (2.3742)	Entropy 0.94132 (0.94257)	Top-1 acc 66.406 (67.020)	Top-5 acc 85.547 (85.859)	lr 0.00704
Train [78][830/3239]	Time 0.228 (0.705)	Data Time 0.001 (0.051)	Loss 2.2633 (2.3743)	Entropy 0.94128 (0.94255)	Top-1 acc 68.359 (67.024)	Top-5 acc 88.281 (85.859)	lr 0.00704
Train [78][840/3239]	Time 0.258 (0.703)	Data Time 0.006 (0.050)	Loss 2.3521 (2.3741)	Entropy 0.94126 (0.94254)	Top-1 acc 69.531 (67.023)	Top-5 acc 87.500 (85.867)	lr 0.00704
Train [78][850/3239]	Time 0.226 (0.700)	Data Time 0.001 (0.049)	Loss 2.2146 (2.3741)	Entropy 0.94123 (0.94252)	Top-1 acc 70.703 (67.016)	Top-5 acc 90.625 (85.873)	lr 0.00704
Train [78][860/3239]	Time 0.220 (0.698)	Data Time 0.001 (0.049)	Loss 2.3568 (2.3741)	Entropy 0.94121 (0.94251)	Top-1 acc 67.578 (67.017)	Top-5 acc 89.062 (85.874)	lr 0.00704
Train [78][870/3239]	Time 0.241 (0.695)	Data Time 0.001 (0.048)	Loss 2.6101 (2.3741)	Entropy 0.94102 (0.94249)	Top-1 acc 61.719 (67.016)	Top-5 acc 82.031 (85.877)	lr 0.00704
Train [78][880/3239]	Time 0.224 (0.693)	Data Time 0.001 (0.048)	Loss 2.3423 (2.3739)	Entropy 0.94103 (0.94248)	Top-1 acc 66.797 (67.029)	Top-5 acc 85.938 (85.886)	lr 0.00704
Train [78][890/3239]	Time 2.562 (0.691)	Data Time 0.001 (0.047)	Loss 2.2943 (2.3738)	Entropy 0.94103 (0.94246)	Top-1 acc 67.188 (67.039)	Top-5 acc 86.719 (85.884)	lr 0.00704
Train [78][900/3239]	Time 0.299 (0.686)	Data Time 0.001 (0.047)	Loss 2.3025 (2.3735)	Entropy 0.94104 (0.94244)	Top-1 acc 68.750 (67.047)	Top-5 acc 86.719 (85.894)	lr 0.00704
Train [78][910/3239]	Time 0.331 (0.683)	Data Time 0.001 (0.046)	Loss 2.4201 (2.3736)	Entropy 0.94102 (0.94243)	Top-1 acc 67.188 (67.045)	Top-5 acc 85.547 (85.889)	lr 0.00704
Train [78][920/3239]	Time 0.235 (0.681)	Data Time 0.003 (0.046)	Loss 2.3743 (2.3738)	Entropy 0.94095 (0.94241)	Top-1 acc 66.797 (67.044)	Top-5 acc 87.109 (85.883)	lr 0.00703
Train [78][930/3239]	Time 0.232 (0.679)	Data Time 0.001 (0.045)	Loss 2.4638 (2.3740)	Entropy 0.94090 (0.94240)	Top-1 acc 61.719 (67.032)	Top-5 acc 86.719 (85.879)	lr 0.00703
Train [78][940/3239]	Time 0.224 (0.677)	Data Time 0.001 (0.045)	Loss 2.3735 (2.3744)	Entropy 0.94089 (0.94238)	Top-1 acc 67.969 (67.024)	Top-5 acc 84.766 (85.873)	lr 0.00703
Train [78][950/3239]	Time 0.348 (0.675)	Data Time 0.001 (0.044)	Loss 2.6035 (2.3746)	Entropy 0.94088 (0.94237)	Top-1 acc 63.672 (67.016)	Top-5 acc 80.469 (85.862)	lr 0.00703
Train [78][960/3239]	Time 0.228 (0.673)	Data Time 0.001 (0.044)	Loss 2.3482 (2.3748)	Entropy 0.94087 (0.94235)	Top-1 acc 68.359 (67.009)	Top-5 acc 86.328 (85.865)	lr 0.00703
Train [78][970/3239]	Time 0.229 (0.670)	Data Time 0.001 (0.044)	Loss 2.2721 (2.3750)	Entropy 0.94082 (0.94233)	Top-1 acc 70.703 (67.011)	Top-5 acc 86.328 (85.859)	lr 0.00703
Train [78][980/3239]	Time 0.236 (0.668)	Data Time 0.001 (0.043)	Loss 2.4036 (2.3752)	Entropy 0.94081 (0.94232)	Top-1 acc 64.844 (67.007)	Top-5 acc 85.547 (85.854)	lr 0.00703
Train [78][990/3239]	Time 0.251 (0.666)	Data Time 0.001 (0.043)	Loss 2.5006 (2.3751)	Entropy 0.94073 (0.94230)	Top-1 acc 62.500 (67.006)	Top-5 acc 84.375 (85.851)	lr 0.00703
Train [78][1000/3239]	Time 2.650 (0.665)	Data Time 0.001 (0.042)	Loss 2.4896 (2.3753)	Entropy 0.94073 (0.94229)	Top-1 acc 62.891 (67.003)	Top-5 acc 82.422 (85.838)	lr 0.00703
Train [78][1010/3239]	Time 0.271 (0.660)	Data Time 0.002 (0.042)	Loss 2.2683 (2.3748)	Entropy 0.94065 (0.94227)	Top-1 acc 69.922 (67.014)	Top-5 acc 87.891 (85.844)	lr 0.00703
Train [78][1020/3239]	Time 0.224 (0.659)	Data Time 0.001 (0.041)	Loss 2.3113 (2.3750)	Entropy 0.94067 (0.94226)	Top-1 acc 68.359 (67.001)	Top-5 acc 85.547 (85.835)	lr 0.00703
Train [78][1030/3239]	Time 0.217 (0.657)	Data Time 0.001 (0.041)	Loss 2.4291 (2.3751)	Entropy 0.94064 (0.94224)	Top-1 acc 66.797 (67.002)	Top-5 acc 85.156 (85.831)	lr 0.00702
Train [78][1040/3239]	Time 0.239 (0.655)	Data Time 0.001 (0.041)	Loss 2.2555 (2.3750)	Entropy 0.94064 (0.94222)	Top-1 acc 69.922 (67.003)	Top-5 acc 87.500 (85.832)	lr 0.00702
Train [78][1050/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.040)	Loss 2.2520 (2.3753)	Entropy 0.94063 (0.94221)	Top-1 acc 68.359 (66.994)	Top-5 acc 89.453 (85.833)	lr 0.00702
Train [78][1060/3239]	Time 0.238 (0.651)	Data Time 0.001 (0.040)	Loss 2.4753 (2.3759)	Entropy 0.94060 (0.94219)	Top-1 acc 64.844 (66.977)	Top-5 acc 87.109 (85.826)	lr 0.00702
Train [78][1070/3239]	Time 0.224 (0.650)	Data Time 0.001 (0.040)	Loss 2.2372 (2.3760)	Entropy 0.94054 (0.94218)	Top-1 acc 69.141 (66.977)	Top-5 acc 88.281 (85.821)	lr 0.00702
Train [78][1080/3239]	Time 0.333 (0.648)	Data Time 0.001 (0.039)	Loss 2.3969 (2.3759)	Entropy 0.94059 (0.94216)	Top-1 acc 68.359 (66.976)	Top-5 acc 84.766 (85.826)	lr 0.00702
Train [78][1090/3239]	Time 0.215 (0.647)	Data Time 0.001 (0.039)	Loss 2.3983 (2.3762)	Entropy 0.94059 (0.94215)	Top-1 acc 66.406 (66.972)	Top-5 acc 83.984 (85.821)	lr 0.00702
Train [78][1100/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.039)	Loss 2.3465 (2.3763)	Entropy 0.94055 (0.94214)	Top-1 acc 68.750 (66.980)	Top-5 acc 85.156 (85.817)	lr 0.00702
Train [78][1110/3239]	Time 2.541 (0.643)	Data Time 0.001 (0.038)	Loss 2.4756 (2.3764)	Entropy 0.94055 (0.94212)	Top-1 acc 65.234 (66.978)	Top-5 acc 85.938 (85.817)	lr 0.00702
Train [78][1120/3239]	Time 0.224 (0.640)	Data Time 0.001 (0.038)	Loss 2.2375 (2.3764)	Entropy 0.94051 (0.94211)	Top-1 acc 70.312 (66.978)	Top-5 acc 88.672 (85.814)	lr 0.00702
Train [78][1130/3239]	Time 0.213 (0.638)	Data Time 0.001 (0.038)	Loss 2.2216 (2.3762)	Entropy 0.94034 (0.94209)	Top-1 acc 76.172 (66.984)	Top-5 acc 89.453 (85.825)	lr 0.00702
Train [78][1140/3239]	Time 0.236 (0.687)	Data Time 0.002 (0.037)	Loss 2.4260 (2.3762)	Entropy 0.94032 (0.94208)	Top-1 acc 65.625 (66.987)	Top-5 acc 82.812 (85.821)	lr 0.00701
Train [78][1150/3239]	Time 0.232 (0.685)	Data Time 0.002 (0.037)	Loss 2.1625 (2.3760)	Entropy 0.94050 (0.94206)	Top-1 acc 71.484 (66.990)	Top-5 acc 89.453 (85.827)	lr 0.00701
Train [78][1160/3239]	Time 0.231 (0.683)	Data Time 0.001 (0.037)	Loss 2.3194 (2.3762)	Entropy 0.94053 (0.94205)	Top-1 acc 68.359 (66.984)	Top-5 acc 86.719 (85.824)	lr 0.00701
Train [78][1170/3239]	Time 0.215 (0.681)	Data Time 0.001 (0.036)	Loss 2.5417 (2.3761)	Entropy 0.94038 (0.94204)	Top-1 acc 63.281 (66.989)	Top-5 acc 82.812 (85.822)	lr 0.00701
Train [78][1180/3239]	Time 0.206 (0.679)	Data Time 0.001 (0.036)	Loss 2.4384 (2.3759)	Entropy 0.94036 (0.94202)	Top-1 acc 64.844 (66.993)	Top-5 acc 83.594 (85.826)	lr 0.00701
Train [78][1190/3239]	Time 0.237 (0.678)	Data Time 0.001 (0.036)	Loss 2.5176 (2.3761)	Entropy 0.94027 (0.94201)	Top-1 acc 61.719 (66.978)	Top-5 acc 84.766 (85.823)	lr 0.00701
Train [78][1200/3239]	Time 0.233 (0.676)	Data Time 0.001 (0.035)	Loss 2.3721 (2.3762)	Entropy 0.94025 (0.94199)	Top-1 acc 67.969 (66.977)	Top-5 acc 86.719 (85.825)	lr 0.00701
Train [78][1210/3239]	Time 0.350 (0.674)	Data Time 0.001 (0.035)	Loss 2.4469 (2.3765)	Entropy 0.94025 (0.94198)	Top-1 acc 64.844 (66.965)	Top-5 acc 87.109 (85.819)	lr 0.00701
Train [78][1220/3239]	Time 2.563 (0.673)	Data Time 0.001 (0.035)	Loss 2.4549 (2.3768)	Entropy 0.94025 (0.94196)	Top-1 acc 66.016 (66.964)	Top-5 acc 86.328 (85.817)	lr 0.00701
Train [78][1230/3239]	Time 0.270 (0.669)	Data Time 0.001 (0.035)	Loss 2.3099 (2.3771)	Entropy 0.94022 (0.94195)	Top-1 acc 68.359 (66.963)	Top-5 acc 87.891 (85.809)	lr 0.00701
Train [78][1240/3239]	Time 0.231 (0.668)	Data Time 0.001 (0.034)	Loss 2.4914 (2.3773)	Entropy 0.94017 (0.94194)	Top-1 acc 63.281 (66.960)	Top-5 acc 81.641 (85.806)	lr 0.00701
Train [78][1250/3239]	Time 0.266 (0.666)	Data Time 0.002 (0.034)	Loss 2.4358 (2.3773)	Entropy 0.94002 (0.94192)	Top-1 acc 65.234 (66.955)	Top-5 acc 83.594 (85.799)	lr 0.00700
Train [78][1260/3239]	Time 0.236 (0.665)	Data Time 0.002 (0.034)	Loss 2.3254 (2.3773)	Entropy 0.93997 (0.94190)	Top-1 acc 67.188 (66.946)	Top-5 acc 86.719 (85.797)	lr 0.00700
Train [78][1270/3239]	Time 0.250 (0.663)	Data Time 0.002 (0.034)	Loss 2.2784 (2.3773)	Entropy 0.93991 (0.94189)	Top-1 acc 71.094 (66.943)	Top-5 acc 87.500 (85.796)	lr 0.00700
Train [78][1280/3239]	Time 0.230 (0.662)	Data Time 0.001 (0.033)	Loss 2.4256 (2.3776)	Entropy 0.93989 (0.94187)	Top-1 acc 65.234 (66.935)	Top-5 acc 87.500 (85.789)	lr 0.00700
Train [78][1290/3239]	Time 0.226 (0.660)	Data Time 0.001 (0.033)	Loss 2.4731 (2.3776)	Entropy 0.93974 (0.94186)	Top-1 acc 67.578 (66.935)	Top-5 acc 83.984 (85.790)	lr 0.00700
Train [78][1300/3239]	Time 0.232 (0.659)	Data Time 0.002 (0.033)	Loss 2.4910 (2.3781)	Entropy 0.93961 (0.94184)	Top-1 acc 60.938 (66.922)	Top-5 acc 84.766 (85.780)	lr 0.00700
Train [78][1310/3239]	Time 0.233 (0.657)	Data Time 0.001 (0.033)	Loss 2.3282 (2.3779)	Entropy 0.93959 (0.94182)	Top-1 acc 67.969 (66.927)	Top-5 acc 85.547 (85.785)	lr 0.00700
Train [78][1320/3239]	Time 0.250 (0.656)	Data Time 0.001 (0.032)	Loss 2.4855 (2.3781)	Entropy 0.93957 (0.94181)	Top-1 acc 64.844 (66.924)	Top-5 acc 83.203 (85.776)	lr 0.00700
Train [78][1330/3239]	Time 2.509 (0.655)	Data Time 0.001 (0.032)	Loss 2.2997 (2.3781)	Entropy 0.93957 (0.94179)	Top-1 acc 67.578 (66.923)	Top-5 acc 85.547 (85.777)	lr 0.00700
Train [78][1340/3239]	Time 0.338 (0.652)	Data Time 0.001 (0.032)	Loss 2.4564 (2.3781)	Entropy 0.93947 (0.94177)	Top-1 acc 63.672 (66.926)	Top-5 acc 83.984 (85.773)	lr 0.00700
Train [78][1350/3239]	Time 0.293 (0.650)	Data Time 0.001 (0.032)	Loss 2.2875 (2.3778)	Entropy 0.93952 (0.94176)	Top-1 acc 71.094 (66.936)	Top-5 acc 87.109 (85.783)	lr 0.00700
Train [78][1360/3239]	Time 0.219 (0.649)	Data Time 0.001 (0.032)	Loss 2.3991 (2.3779)	Entropy 0.93951 (0.94174)	Top-1 acc 67.969 (66.939)	Top-5 acc 85.547 (85.782)	lr 0.00699
Train [78][1370/3239]	Time 0.237 (0.648)	Data Time 0.001 (0.031)	Loss 2.3686 (2.3778)	Entropy 0.93953 (0.94172)	Top-1 acc 67.969 (66.945)	Top-5 acc 87.891 (85.783)	lr 0.00699
Train [78][1380/3239]	Time 0.227 (0.646)	Data Time 0.001 (0.031)	Loss 2.2492 (2.3778)	Entropy 0.93955 (0.94171)	Top-1 acc 69.531 (66.953)	Top-5 acc 88.672 (85.788)	lr 0.00699
Train [78][1390/3239]	Time 0.226 (0.645)	Data Time 0.001 (0.031)	Loss 2.3304 (2.3778)	Entropy 0.93954 (0.94169)	Top-1 acc 67.969 (66.949)	Top-5 acc 86.328 (85.790)	lr 0.00699
Train [78][1400/3239]	Time 0.216 (0.644)	Data Time 0.001 (0.031)	Loss 2.3570 (2.3782)	Entropy 0.93952 (0.94168)	Top-1 acc 68.359 (66.940)	Top-5 acc 86.719 (85.786)	lr 0.00699
Train [78][1410/3239]	Time 0.227 (0.643)	Data Time 0.001 (0.030)	Loss 2.4255 (2.3784)	Entropy 0.93960 (0.94166)	Top-1 acc 68.750 (66.934)	Top-5 acc 83.203 (85.781)	lr 0.00699
Train [78][1420/3239]	Time 0.214 (0.641)	Data Time 0.001 (0.030)	Loss 2.4280 (2.3787)	Entropy 0.93953 (0.94165)	Top-1 acc 65.625 (66.927)	Top-5 acc 85.156 (85.776)	lr 0.00699
Train [78][1430/3239]	Time 0.314 (0.640)	Data Time 0.001 (0.030)	Loss 2.5904 (2.3787)	Entropy 0.93973 (0.94163)	Top-1 acc 64.453 (66.931)	Top-5 acc 82.031 (85.773)	lr 0.00699
Train [78][1440/3239]	Time 2.516 (0.639)	Data Time 0.001 (0.030)	Loss 2.3976 (2.3794)	Entropy 0.93973 (0.94162)	Top-1 acc 69.531 (66.921)	Top-5 acc 87.500 (85.771)	lr 0.00699
Train [78][1450/3239]	Time 0.234 (0.636)	Data Time 0.001 (0.030)	Loss 2.4916 (2.3796)	Entropy 0.93973 (0.94161)	Top-1 acc 64.062 (66.917)	Top-5 acc 82.422 (85.768)	lr 0.00699
Train [78][1460/3239]	Time 0.238 (0.635)	Data Time 0.001 (0.029)	Loss 2.5020 (2.3800)	Entropy 0.93973 (0.94159)	Top-1 acc 64.844 (66.907)	Top-5 acc 83.984 (85.760)	lr 0.00699
Train [78][1470/3239]	Time 0.290 (0.634)	Data Time 0.001 (0.029)	Loss 2.4411 (2.3803)	Entropy 0.93969 (0.94158)	Top-1 acc 65.234 (66.905)	Top-5 acc 85.547 (85.755)	lr 0.00698
Train [78][1480/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.029)	Loss 2.3250 (2.3802)	Entropy 0.93964 (0.94157)	Top-1 acc 67.188 (66.910)	Top-5 acc 87.891 (85.759)	lr 0.00698
Train [78][1490/3239]	Time 0.213 (0.632)	Data Time 0.001 (0.029)	Loss 2.5121 (2.3806)	Entropy 0.93964 (0.94156)	Top-1 acc 63.281 (66.902)	Top-5 acc 82.812 (85.749)	lr 0.00698
Train [78][1500/3239]	Time 0.238 (0.666)	Data Time 0.002 (0.029)	Loss 2.5826 (2.3807)	Entropy 0.93965 (0.94154)	Top-1 acc 65.625 (66.907)	Top-5 acc 82.422 (85.745)	lr 0.00698
Train [78][1510/3239]	Time 0.233 (0.665)	Data Time 0.002 (0.029)	Loss 2.5074 (2.3805)	Entropy 0.93960 (0.94153)	Top-1 acc 61.328 (66.908)	Top-5 acc 83.203 (85.745)	lr 0.00698
Train [78][1520/3239]	Time 0.223 (0.664)	Data Time 0.002 (0.028)	Loss 2.3264 (2.3805)	Entropy 0.93955 (0.94152)	Top-1 acc 68.750 (66.909)	Top-5 acc 87.109 (85.744)	lr 0.00698
Train [78][1530/3239]	Time 0.239 (0.663)	Data Time 0.001 (0.028)	Loss 2.2992 (2.3803)	Entropy 0.93960 (0.94150)	Top-1 acc 67.188 (66.908)	Top-5 acc 88.281 (85.751)	lr 0.00698
Train [78][1540/3239]	Time 0.258 (0.662)	Data Time 0.001 (0.028)	Loss 2.4085 (2.3805)	Entropy 0.93956 (0.94149)	Top-1 acc 67.188 (66.900)	Top-5 acc 82.031 (85.744)	lr 0.00698
Train [78][1550/3239]	Time 2.550 (0.660)	Data Time 0.002 (0.028)	Loss 2.3806 (2.3802)	Entropy 0.93956 (0.94148)	Top-1 acc 65.625 (66.903)	Top-5 acc 87.109 (85.748)	lr 0.00698
Train [78][1560/3239]	Time 0.337 (0.658)	Data Time 0.001 (0.028)	Loss 2.3735 (2.3801)	Entropy 0.93956 (0.94147)	Top-1 acc 66.406 (66.902)	Top-5 acc 86.719 (85.750)	lr 0.00698
Train [78][1570/3239]	Time 0.228 (0.657)	Data Time 0.001 (0.028)	Loss 2.4287 (2.3802)	Entropy 0.93950 (0.94145)	Top-1 acc 64.844 (66.894)	Top-5 acc 85.156 (85.754)	lr 0.00698
Train [78][1580/3239]	Time 0.237 (0.655)	Data Time 0.001 (0.027)	Loss 2.2375 (2.3802)	Entropy 0.93947 (0.94144)	Top-1 acc 70.312 (66.889)	Top-5 acc 90.625 (85.756)	lr 0.00698
Train [78][1590/3239]	Time 0.262 (0.654)	Data Time 0.001 (0.027)	Loss 2.4121 (2.3812)	Entropy 0.93948 (0.94143)	Top-1 acc 66.406 (66.863)	Top-5 acc 82.812 (85.741)	lr 0.00697
Train [78][1600/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.027)	Loss 2.5239 (2.3814)	Entropy 0.93949 (0.94142)	Top-1 acc 67.578 (66.861)	Top-5 acc 82.812 (85.735)	lr 0.00697
Train [78][1610/3239]	Time 0.228 (0.652)	Data Time 0.001 (0.027)	Loss 2.3566 (2.3823)	Entropy 0.93945 (0.94141)	Top-1 acc 64.062 (66.836)	Top-5 acc 87.109 (85.718)	lr 0.00697
Train [78][1620/3239]	Time 0.223 (0.651)	Data Time 0.002 (0.027)	Loss 2.5742 (2.3825)	Entropy 0.93947 (0.94139)	Top-1 acc 62.109 (66.826)	Top-5 acc 84.766 (85.716)	lr 0.00697
Train [78][1630/3239]	Time 0.229 (0.650)	Data Time 0.001 (0.027)	Loss 2.3282 (2.3825)	Entropy 0.93944 (0.94138)	Top-1 acc 68.359 (66.825)	Top-5 acc 87.891 (85.717)	lr 0.00697
Train [78][1640/3239]	Time 0.223 (0.649)	Data Time 0.001 (0.026)	Loss 3.3941 (2.3829)	Entropy 0.93939 (0.94137)	Top-1 acc 44.141 (66.814)	Top-5 acc 68.750 (85.712)	lr 0.00697
Train [78][1650/3239]	Time 0.324 (0.648)	Data Time 0.001 (0.026)	Loss 2.4704 (2.3829)	Entropy 0.93939 (0.94136)	Top-1 acc 66.016 (66.815)	Top-5 acc 84.375 (85.710)	lr 0.00697
Train [78][1660/3239]	Time 2.583 (0.647)	Data Time 0.001 (0.026)	Loss 2.3429 (2.3830)	Entropy 0.93939 (0.94135)	Top-1 acc 67.578 (66.818)	Top-5 acc 89.062 (85.710)	lr 0.00697
Train [78][1670/3239]	Time 0.243 (0.644)	Data Time 0.001 (0.026)	Loss 2.3906 (2.3840)	Entropy 0.93933 (0.94133)	Top-1 acc 64.844 (66.798)	Top-5 acc 85.547 (85.699)	lr 0.00697
Train [78][1680/3239]	Time 0.219 (0.643)	Data Time 0.001 (0.026)	Loss 2.3284 (2.3841)	Entropy 0.93929 (0.94132)	Top-1 acc 65.625 (66.799)	Top-5 acc 87.891 (85.697)	lr 0.00697
Train [78][1690/3239]	Time 0.233 (0.642)	Data Time 0.001 (0.026)	Loss 2.3874 (2.3839)	Entropy 0.93932 (0.94131)	Top-1 acc 66.406 (66.796)	Top-5 acc 83.984 (85.699)	lr 0.00697
Train [78][1700/3239]	Time 0.223 (0.641)	Data Time 0.001 (0.026)	Loss 2.1889 (2.3835)	Entropy 0.93934 (0.94130)	Top-1 acc 71.484 (66.801)	Top-5 acc 88.672 (85.705)	lr 0.00696
Train [78][1710/3239]	Time 0.226 (0.640)	Data Time 0.001 (0.025)	Loss 2.4131 (2.3836)	Entropy 0.93927 (0.94129)	Top-1 acc 68.750 (66.799)	Top-5 acc 80.469 (85.697)	lr 0.00696
Train [78][1720/3239]	Time 0.213 (0.639)	Data Time 0.001 (0.025)	Loss 2.5781 (2.3837)	Entropy 0.93925 (0.94127)	Top-1 acc 60.547 (66.792)	Top-5 acc 78.125 (85.695)	lr 0.00696
Train [78][1730/3239]	Time 0.249 (0.638)	Data Time 0.001 (0.025)	Loss 2.4543 (2.3836)	Entropy 0.93920 (0.94126)	Top-1 acc 66.016 (66.791)	Top-5 acc 83.594 (85.699)	lr 0.00696
Train [78][1740/3239]	Time 0.310 (0.637)	Data Time 0.001 (0.025)	Loss 2.6000 (2.3835)	Entropy 0.93923 (0.94125)	Top-1 acc 58.203 (66.786)	Top-5 acc 83.203 (85.706)	lr 0.00696
Train [78][1750/3239]	Time 0.238 (0.636)	Data Time 0.001 (0.025)	Loss 2.3528 (2.3836)	Entropy 0.93908 (0.94124)	Top-1 acc 70.312 (66.785)	Top-5 acc 86.328 (85.699)	lr 0.00696
Train [78][1760/3239]	Time 0.231 (0.636)	Data Time 0.001 (0.025)	Loss 2.4118 (2.3838)	Entropy 0.93910 (0.94123)	Top-1 acc 65.234 (66.782)	Top-5 acc 85.938 (85.697)	lr 0.00696
Train [78][1770/3239]	Time 2.561 (0.635)	Data Time 0.001 (0.025)	Loss 2.4672 (2.3838)	Entropy 0.93910 (0.94121)	Top-1 acc 65.625 (66.787)	Top-5 acc 85.547 (85.698)	lr 0.00696
Train [78][1780/3239]	Time 0.215 (0.632)	Data Time 0.001 (0.024)	Loss 2.2322 (2.3835)	Entropy 0.93905 (0.94120)	Top-1 acc 71.484 (66.791)	Top-5 acc 91.016 (85.707)	lr 0.00696
Train [78][1790/3239]	Time 0.223 (0.632)	Data Time 0.001 (0.024)	Loss 2.4357 (2.3837)	Entropy 0.93897 (0.94119)	Top-1 acc 67.969 (66.781)	Top-5 acc 83.594 (85.704)	lr 0.00696
Train [78][1800/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.024)	Loss 2.2619 (2.3839)	Entropy 0.93897 (0.94118)	Top-1 acc 71.484 (66.781)	Top-5 acc 90.234 (85.699)	lr 0.00696
Train [78][1810/3239]	Time 0.284 (0.630)	Data Time 0.001 (0.024)	Loss 2.4423 (2.3839)	Entropy 0.93892 (0.94117)	Top-1 acc 67.578 (66.788)	Top-5 acc 83.203 (85.697)	lr 0.00695
Train [78][1820/3239]	Time 0.240 (0.629)	Data Time 0.001 (0.024)	Loss 2.4097 (2.3838)	Entropy 0.93891 (0.94115)	Top-1 acc 65.625 (66.787)	Top-5 acc 84.766 (85.697)	lr 0.00695
Train [78][1830/3239]	Time 0.337 (0.628)	Data Time 0.001 (0.024)	Loss 2.4710 (2.3837)	Entropy 0.93886 (0.94114)	Top-1 acc 67.188 (66.787)	Top-5 acc 85.156 (85.698)	lr 0.00695
Train [78][1840/3239]	Time 0.224 (0.627)	Data Time 0.001 (0.024)	Loss 2.4198 (2.3838)	Entropy 0.93887 (0.94113)	Top-1 acc 64.062 (66.783)	Top-5 acc 84.375 (85.698)	lr 0.00695
Train [78][1850/3239]	Time 0.198 (0.627)	Data Time 0.001 (0.024)	Loss 2.4093 (2.3836)	Entropy 0.93866 (0.94112)	Top-1 acc 69.141 (66.782)	Top-5 acc 83.984 (85.699)	lr 0.00695
Train [78][1860/3239]	Time 0.284 (0.655)	Data Time 0.002 (0.023)	Loss 2.4848 (2.3837)	Entropy 0.93866 (0.94110)	Top-1 acc 66.016 (66.788)	Top-5 acc 85.938 (85.701)	lr 0.00695
Train [78][1870/3239]	Time 0.255 (0.654)	Data Time 0.002 (0.023)	Loss 2.5870 (2.3836)	Entropy 0.93864 (0.94109)	Top-1 acc 62.891 (66.791)	Top-5 acc 80.469 (85.701)	lr 0.00695
Train [78][1880/3239]	Time 2.470 (0.653)	Data Time 0.002 (0.023)	Loss 2.3773 (2.3836)	Entropy 0.93864 (0.94108)	Top-1 acc 67.578 (66.790)	Top-5 acc 86.328 (85.701)	lr 0.00695
Train [78][1890/3239]	Time 0.253 (0.650)	Data Time 0.001 (0.023)	Loss 2.2862 (2.3835)	Entropy 0.93864 (0.94106)	Top-1 acc 69.922 (66.793)	Top-5 acc 85.547 (85.701)	lr 0.00695
Train [78][1900/3239]	Time 0.229 (0.650)	Data Time 0.001 (0.023)	Loss 2.4300 (2.3836)	Entropy 0.93862 (0.94105)	Top-1 acc 63.281 (66.793)	Top-5 acc 84.766 (85.699)	lr 0.00695
Train [78][1910/3239]	Time 0.219 (0.649)	Data Time 0.001 (0.023)	Loss 2.2495 (2.3833)	Entropy 0.93862 (0.94104)	Top-1 acc 69.531 (66.802)	Top-5 acc 87.500 (85.703)	lr 0.00695
Train [78][1920/3239]	Time 0.237 (0.648)	Data Time 0.001 (0.023)	Loss 2.4881 (2.3836)	Entropy 0.93866 (0.94103)	Top-1 acc 62.891 (66.796)	Top-5 acc 85.938 (85.696)	lr 0.00694
Train [78][1930/3239]	Time 0.230 (0.647)	Data Time 0.001 (0.023)	Loss 2.6429 (2.3838)	Entropy 0.93868 (0.94101)	Top-1 acc 62.109 (66.795)	Top-5 acc 80.469 (85.695)	lr 0.00694
Train [78][1940/3239]	Time 0.231 (0.646)	Data Time 0.001 (0.023)	Loss 2.3518 (2.3836)	Entropy 0.93861 (0.94100)	Top-1 acc 66.406 (66.797)	Top-5 acc 85.156 (85.700)	lr 0.00694
Train [78][1950/3239]	Time 0.223 (0.645)	Data Time 0.001 (0.022)	Loss 2.3588 (2.3834)	Entropy 0.93858 (0.94099)	Top-1 acc 65.625 (66.801)	Top-5 acc 85.547 (85.701)	lr 0.00694
Train [78][1960/3239]	Time 0.321 (0.644)	Data Time 0.001 (0.022)	Loss 2.3743 (2.3834)	Entropy 0.93858 (0.94098)	Top-1 acc 65.234 (66.802)	Top-5 acc 87.500 (85.701)	lr 0.00694
Train [78][1970/3239]	Time 0.236 (0.643)	Data Time 0.001 (0.022)	Loss 2.3470 (2.3835)	Entropy 0.93858 (0.94096)	Top-1 acc 66.797 (66.799)	Top-5 acc 83.984 (85.697)	lr 0.00694
Train [78][1980/3239]	Time 0.226 (0.642)	Data Time 0.001 (0.022)	Loss 2.3050 (2.3834)	Entropy 0.93860 (0.94095)	Top-1 acc 66.406 (66.807)	Top-5 acc 87.500 (85.700)	lr 0.00694
Train [78][1990/3239]	Time 2.936 (0.642)	Data Time 0.002 (0.022)	Loss 2.3703 (2.3834)	Entropy 0.93860 (0.94094)	Top-1 acc 65.625 (66.810)	Top-5 acc 85.547 (85.699)	lr 0.00694
Train [78][2000/3239]	Time 0.276 (0.640)	Data Time 0.002 (0.022)	Loss 2.4998 (2.3838)	Entropy 0.93857 (0.94093)	Top-1 acc 64.062 (66.798)	Top-5 acc 83.594 (85.693)	lr 0.00694
Train [78][2010/3239]	Time 0.227 (0.640)	Data Time 0.001 (0.022)	Loss 2.3701 (2.3838)	Entropy 0.93855 (0.94092)	Top-1 acc 67.969 (66.800)	Top-5 acc 83.594 (85.691)	lr 0.00694
Train [78][2020/3239]	Time 0.236 (0.639)	Data Time 0.001 (0.022)	Loss 2.3983 (2.3842)	Entropy 0.93855 (0.94091)	Top-1 acc 66.797 (66.790)	Top-5 acc 85.156 (85.684)	lr 0.00694
Train [78][2030/3239]	Time 0.248 (0.638)	Data Time 0.002 (0.022)	Loss 2.3853 (2.3843)	Entropy 0.93843 (0.94089)	Top-1 acc 66.016 (66.785)	Top-5 acc 84.766 (85.681)	lr 0.00693
Train [78][2040/3239]	Time 0.225 (0.637)	Data Time 0.001 (0.022)	Loss 2.3392 (2.3843)	Entropy 0.93841 (0.94088)	Top-1 acc 66.797 (66.783)	Top-5 acc 85.156 (85.683)	lr 0.00693
Train [78][2050/3239]	Time 0.324 (0.636)	Data Time 0.001 (0.021)	Loss 2.2383 (2.3842)	Entropy 0.93836 (0.94087)	Top-1 acc 71.484 (66.784)	Top-5 acc 89.844 (85.685)	lr 0.00693
Train [78][2060/3239]	Time 0.221 (0.636)	Data Time 0.001 (0.021)	Loss 2.4325 (2.3843)	Entropy 0.93834 (0.94086)	Top-1 acc 68.359 (66.787)	Top-5 acc 86.328 (85.683)	lr 0.00693
Train [78][2070/3239]	Time 0.226 (0.635)	Data Time 0.001 (0.021)	Loss 2.3209 (2.3844)	Entropy 0.93835 (0.94084)	Top-1 acc 69.531 (66.785)	Top-5 acc 87.109 (85.681)	lr 0.00693
Train [78][2080/3239]	Time 0.235 (0.634)	Data Time 0.001 (0.021)	Loss 2.3275 (2.3842)	Entropy 0.93827 (0.94083)	Top-1 acc 67.578 (66.787)	Top-5 acc 85.547 (85.684)	lr 0.00693
Train [78][2090/3239]	Time 0.356 (0.634)	Data Time 0.001 (0.021)	Loss 2.3491 (2.3844)	Entropy 0.93824 (0.94082)	Top-1 acc 68.359 (66.785)	Top-5 acc 86.719 (85.682)	lr 0.00693
Train [78][2100/3239]	Time 2.565 (0.633)	Data Time 0.001 (0.021)	Loss 2.5051 (2.3843)	Entropy 0.93824 (0.94081)	Top-1 acc 63.281 (66.785)	Top-5 acc 83.984 (85.686)	lr 0.00693
Train [78][2110/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.021)	Loss 2.3903 (2.3842)	Entropy 0.93822 (0.94080)	Top-1 acc 67.188 (66.787)	Top-5 acc 85.938 (85.688)	lr 0.00693
Train [78][2120/3239]	Time 0.261 (0.630)	Data Time 0.002 (0.021)	Loss 2.2771 (2.3843)	Entropy 0.93808 (0.94078)	Top-1 acc 72.656 (66.787)	Top-5 acc 85.938 (85.685)	lr 0.00693
Train [78][2130/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.021)	Loss 2.3915 (2.3844)	Entropy 0.93807 (0.94077)	Top-1 acc 62.109 (66.783)	Top-5 acc 88.281 (85.687)	lr 0.00693
Train [78][2140/3239]	Time 0.322 (0.629)	Data Time 0.002 (0.021)	Loss 2.5014 (2.3845)	Entropy 0.93803 (0.94076)	Top-1 acc 59.766 (66.784)	Top-5 acc 87.500 (85.684)	lr 0.00692
Train [78][2150/3239]	Time 0.241 (0.628)	Data Time 0.001 (0.021)	Loss 2.3757 (2.3844)	Entropy 0.93809 (0.94074)	Top-1 acc 62.109 (66.783)	Top-5 acc 86.328 (85.685)	lr 0.00692
Train [78][2160/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.020)	Loss 2.3404 (2.3844)	Entropy 0.93806 (0.94073)	Top-1 acc 64.844 (66.778)	Top-5 acc 87.500 (85.687)	lr 0.00692
Train [78][2170/3239]	Time 0.222 (0.627)	Data Time 0.001 (0.020)	Loss 2.4731 (2.3844)	Entropy 0.93801 (0.94072)	Top-1 acc 62.109 (66.776)	Top-5 acc 84.766 (85.687)	lr 0.00692
Train [78][2180/3239]	Time 0.259 (0.626)	Data Time 0.001 (0.020)	Loss 2.4351 (2.3844)	Entropy 0.93799 (0.94071)	Top-1 acc 66.406 (66.778)	Top-5 acc 81.641 (85.686)	lr 0.00692
Train [78][2190/3239]	Time 0.224 (0.625)	Data Time 0.001 (0.020)	Loss 2.3954 (2.3845)	Entropy 0.93797 (0.94070)	Top-1 acc 66.797 (66.775)	Top-5 acc 87.109 (85.683)	lr 0.00692
Train [78][2200/3239]	Time 0.256 (0.624)	Data Time 0.001 (0.020)	Loss 2.3611 (2.3845)	Entropy 0.93794 (0.94068)	Top-1 acc 66.406 (66.774)	Top-5 acc 85.156 (85.682)	lr 0.00692
Train [78][2210/3239]	Time 2.540 (0.624)	Data Time 0.002 (0.020)	Loss 2.3800 (2.3846)	Entropy 0.93794 (0.94067)	Top-1 acc 66.016 (66.771)	Top-5 acc 85.547 (85.678)	lr 0.00692
Train [78][2220/3239]	Time 0.249 (0.622)	Data Time 0.001 (0.020)	Loss 2.4505 (2.3849)	Entropy 0.93786 (0.94066)	Top-1 acc 65.234 (66.760)	Top-5 acc 84.766 (85.674)	lr 0.00692
Train [78][2230/3239]	Time 0.236 (0.645)	Data Time 0.002 (0.020)	Loss 2.4031 (2.3850)	Entropy 0.93781 (0.94065)	Top-1 acc 68.359 (66.757)	Top-5 acc 86.328 (85.672)	lr 0.00692
Train [78][2240/3239]	Time 0.237 (0.644)	Data Time 0.002 (0.020)	Loss 2.2330 (2.3850)	Entropy 0.93769 (0.94063)	Top-1 acc 71.094 (66.760)	Top-5 acc 87.109 (85.669)	lr 0.00692
Train [78][2250/3239]	Time 0.255 (0.643)	Data Time 0.002 (0.020)	Loss 2.3755 (2.3852)	Entropy 0.93765 (0.94062)	Top-1 acc 66.797 (66.760)	Top-5 acc 85.547 (85.667)	lr 0.00691
Train [78][2260/3239]	Time 0.254 (0.643)	Data Time 0.002 (0.020)	Loss 2.4144 (2.3850)	Entropy 0.93763 (0.94061)	Top-1 acc 66.406 (66.763)	Top-5 acc 84.766 (85.669)	lr 0.00691
Train [78][2270/3239]	Time 0.330 (0.642)	Data Time 0.001 (0.020)	Loss 2.2506 (2.3851)	Entropy 0.93765 (0.94059)	Top-1 acc 70.312 (66.759)	Top-5 acc 86.719 (85.667)	lr 0.00691
Train [78][2280/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.019)	Loss 2.4679 (2.3851)	Entropy 0.93768 (0.94058)	Top-1 acc 63.281 (66.761)	Top-5 acc 82.422 (85.667)	lr 0.00691
Train [78][2290/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.019)	Loss 2.2361 (2.3851)	Entropy 0.93761 (0.94057)	Top-1 acc 67.578 (66.762)	Top-5 acc 88.281 (85.664)	lr 0.00691
Train [78][2300/3239]	Time 0.227 (0.640)	Data Time 0.002 (0.019)	Loss 2.3788 (2.3852)	Entropy 0.93746 (0.94055)	Top-1 acc 70.312 (66.761)	Top-5 acc 87.891 (85.666)	lr 0.00691
Train [78][2310/3239]	Time 0.237 (0.639)	Data Time 0.001 (0.019)	Loss 2.5517 (2.3851)	Entropy 0.93752 (0.94054)	Top-1 acc 60.938 (66.764)	Top-5 acc 82.031 (85.666)	lr 0.00691
Train [78][2320/3239]	Time 2.507 (0.638)	Data Time 0.001 (0.019)	Loss 2.4667 (2.3850)	Entropy 0.93752 (0.94053)	Top-1 acc 64.453 (66.765)	Top-5 acc 84.375 (85.668)	lr 0.00691
Train [78][2330/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.019)	Loss 2.2902 (2.3848)	Entropy 0.93755 (0.94051)	Top-1 acc 66.016 (66.768)	Top-5 acc 89.453 (85.672)	lr 0.00691
Train [78][2340/3239]	Time 0.254 (0.636)	Data Time 0.001 (0.019)	Loss 2.4374 (2.3849)	Entropy 0.93758 (0.94050)	Top-1 acc 67.578 (66.769)	Top-5 acc 85.547 (85.673)	lr 0.00691
Train [78][2350/3239]	Time 0.235 (0.635)	Data Time 0.001 (0.019)	Loss 2.5672 (2.3849)	Entropy 0.93751 (0.94049)	Top-1 acc 61.328 (66.767)	Top-5 acc 80.859 (85.672)	lr 0.00691
Train [78][2360/3239]	Time 0.344 (0.634)	Data Time 0.001 (0.019)	Loss 2.3230 (2.3848)	Entropy 0.93748 (0.94048)	Top-1 acc 67.969 (66.768)	Top-5 acc 88.672 (85.676)	lr 0.00690
Train [78][2370/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.019)	Loss 2.5814 (2.3849)	Entropy 0.93749 (0.94046)	Top-1 acc 65.234 (66.764)	Top-5 acc 81.641 (85.673)	lr 0.00690
Train [78][2380/3239]	Time 0.240 (0.633)	Data Time 0.001 (0.019)	Loss 2.4148 (2.3849)	Entropy 0.93746 (0.94045)	Top-1 acc 66.406 (66.765)	Top-5 acc 86.328 (85.676)	lr 0.00690
Train [78][2390/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.019)	Loss 2.4838 (2.3850)	Entropy 0.93752 (0.94044)	Top-1 acc 62.891 (66.764)	Top-5 acc 84.766 (85.675)	lr 0.00690
Train [78][2400/3239]	Time 0.306 (0.632)	Data Time 0.001 (0.019)	Loss 2.3300 (2.3849)	Entropy 0.93750 (0.94043)	Top-1 acc 66.797 (66.763)	Top-5 acc 87.500 (85.677)	lr 0.00690
Train [78][2410/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.018)	Loss 2.4082 (2.3850)	Entropy 0.93745 (0.94041)	Top-1 acc 65.625 (66.763)	Top-5 acc 83.203 (85.675)	lr 0.00690
Train [78][2420/3239]	Time 0.235 (0.631)	Data Time 0.001 (0.018)	Loss 2.3660 (2.3849)	Entropy 0.93735 (0.94040)	Top-1 acc 70.703 (66.764)	Top-5 acc 85.156 (85.676)	lr 0.00690
Train [78][2430/3239]	Time 2.519 (0.630)	Data Time 0.002 (0.018)	Loss 2.5105 (2.3848)	Entropy 0.93735 (0.94039)	Top-1 acc 65.234 (66.772)	Top-5 acc 82.422 (85.679)	lr 0.00690
Train [78][2440/3239]	Time 0.246 (0.628)	Data Time 0.001 (0.018)	Loss 2.2442 (2.3847)	Entropy 0.93730 (0.94038)	Top-1 acc 70.312 (66.772)	Top-5 acc 89.453 (85.678)	lr 0.00690
Train [78][2450/3239]	Time 0.233 (0.628)	Data Time 0.001 (0.018)	Loss 2.3765 (2.3846)	Entropy 0.93722 (0.94036)	Top-1 acc 64.844 (66.772)	Top-5 acc 85.156 (85.679)	lr 0.00690
Train [78][2460/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.018)	Loss 2.1342 (2.3844)	Entropy 0.93715 (0.94035)	Top-1 acc 72.656 (66.775)	Top-5 acc 90.625 (85.686)	lr 0.00690
Train [78][2470/3239]	Time 0.271 (0.626)	Data Time 0.001 (0.018)	Loss 2.2262 (2.3845)	Entropy 0.93712 (0.94034)	Top-1 acc 70.312 (66.774)	Top-5 acc 88.672 (85.686)	lr 0.00689
Train [78][2480/3239]	Time 0.241 (0.626)	Data Time 0.002 (0.018)	Loss 2.6029 (2.3842)	Entropy 0.93711 (0.94033)	Top-1 acc 60.938 (66.780)	Top-5 acc 81.250 (85.688)	lr 0.00689
Train [78][2490/3239]	Time 0.317 (0.625)	Data Time 0.001 (0.018)	Loss 2.3112 (2.3842)	Entropy 0.93707 (0.94031)	Top-1 acc 71.094 (66.782)	Top-5 acc 86.719 (85.688)	lr 0.00689
Train [78][2500/3239]	Time 0.223 (0.625)	Data Time 0.002 (0.018)	Loss 2.4017 (2.3844)	Entropy 0.93706 (0.94030)	Top-1 acc 68.359 (66.774)	Top-5 acc 83.984 (85.686)	lr 0.00689
Train [78][2510/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.018)	Loss 2.2291 (2.3843)	Entropy 0.93707 (0.94029)	Top-1 acc 71.484 (66.777)	Top-5 acc 87.500 (85.686)	lr 0.00689
Train [78][2520/3239]	Time 0.219 (0.623)	Data Time 0.001 (0.018)	Loss 2.5543 (2.3846)	Entropy 0.93700 (0.94027)	Top-1 acc 62.500 (66.769)	Top-5 acc 83.203 (85.680)	lr 0.00689
Train [78][2530/3239]	Time 0.220 (0.623)	Data Time 0.001 (0.018)	Loss 2.3446 (2.3848)	Entropy 0.93701 (0.94026)	Top-1 acc 69.141 (66.767)	Top-5 acc 85.156 (85.675)	lr 0.00689
Train [78][2540/3239]	Time 2.511 (0.622)	Data Time 0.001 (0.018)	Loss 2.2985 (2.3848)	Entropy 0.93701 (0.94025)	Top-1 acc 66.797 (66.766)	Top-5 acc 87.891 (85.677)	lr 0.00689
Train [78][2550/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.018)	Loss 2.4030 (2.3847)	Entropy 0.93701 (0.94024)	Top-1 acc 69.922 (66.771)	Top-5 acc 85.938 (85.677)	lr 0.00689
Train [78][2560/3239]	Time 0.222 (0.620)	Data Time 0.001 (0.017)	Loss 2.4429 (2.3847)	Entropy 0.93701 (0.94022)	Top-1 acc 62.109 (66.766)	Top-5 acc 83.594 (85.679)	lr 0.00689
Train [78][2570/3239]	Time 0.216 (0.620)	Data Time 0.001 (0.017)	Loss 2.4264 (2.3849)	Entropy 0.93697 (0.94021)	Top-1 acc 65.625 (66.761)	Top-5 acc 84.766 (85.679)	lr 0.00689
Train [78][2580/3239]	Time 0.278 (0.619)	Data Time 0.002 (0.017)	Loss 2.4859 (2.3851)	Entropy 0.93705 (0.94020)	Top-1 acc 62.500 (66.749)	Top-5 acc 85.156 (85.672)	lr 0.00688
Train [78][2590/3239]	Time 0.273 (0.639)	Data Time 0.002 (0.017)	Loss 2.2828 (2.3851)	Entropy 0.93705 (0.94019)	Top-1 acc 67.578 (66.747)	Top-5 acc 87.891 (85.672)	lr 0.00688
Train [78][2600/3239]	Time 0.224 (0.638)	Data Time 0.002 (0.017)	Loss 2.5422 (2.3853)	Entropy 0.93700 (0.94017)	Top-1 acc 62.500 (66.744)	Top-5 acc 83.203 (85.668)	lr 0.00688
Train [78][2610/3239]	Time 0.233 (0.638)	Data Time 0.001 (0.017)	Loss 2.3882 (2.3853)	Entropy 0.93704 (0.94016)	Top-1 acc 65.234 (66.739)	Top-5 acc 86.719 (85.668)	lr 0.00688
Train [78][2620/3239]	Time 0.319 (0.637)	Data Time 0.001 (0.017)	Loss 2.3536 (2.3853)	Entropy 0.93705 (0.94015)	Top-1 acc 69.531 (66.738)	Top-5 acc 85.156 (85.668)	lr 0.00688
Train [78][2630/3239]	Time 0.238 (0.636)	Data Time 0.001 (0.017)	Loss 2.4389 (2.3852)	Entropy 0.93705 (0.94014)	Top-1 acc 65.625 (66.737)	Top-5 acc 84.766 (85.669)	lr 0.00688
Train [78][2640/3239]	Time 0.214 (0.636)	Data Time 0.001 (0.017)	Loss 2.5562 (2.3854)	Entropy 0.93712 (0.94013)	Top-1 acc 61.719 (66.732)	Top-5 acc 81.250 (85.665)	lr 0.00688
Train [78][2650/3239]	Time 0.248 (0.635)	Data Time 0.001 (0.017)	Loss 2.4929 (2.3855)	Entropy 0.93726 (0.94011)	Top-1 acc 66.406 (66.730)	Top-5 acc 82.812 (85.662)	lr 0.00688
Train [78][2660/3239]	Time 0.293 (0.634)	Data Time 0.001 (0.017)	Loss 2.3027 (2.3854)	Entropy 0.93721 (0.94010)	Top-1 acc 68.359 (66.730)	Top-5 acc 86.328 (85.663)	lr 0.00688
Train [78][2670/3239]	Time 0.288 (0.634)	Data Time 0.001 (0.017)	Loss 2.3973 (2.3853)	Entropy 0.93718 (0.94009)	Top-1 acc 68.359 (66.731)	Top-5 acc 83.984 (85.663)	lr 0.00688
Train [78][2680/3239]	Time 0.240 (0.633)	Data Time 0.001 (0.017)	Loss 2.1884 (2.3852)	Entropy 0.93713 (0.94008)	Top-1 acc 69.141 (66.733)	Top-5 acc 89.453 (85.665)	lr 0.00688
Train [78][2690/3239]	Time 0.222 (0.633)	Data Time 0.001 (0.017)	Loss 2.5375 (2.3854)	Entropy 0.93707 (0.94007)	Top-1 acc 63.672 (66.730)	Top-5 acc 85.547 (85.663)	lr 0.00687
Train [78][2700/3239]	Time 0.257 (0.632)	Data Time 0.001 (0.017)	Loss 2.4171 (2.3854)	Entropy 0.93707 (0.94006)	Top-1 acc 64.844 (66.732)	Top-5 acc 85.156 (85.661)	lr 0.00687
Train [78][2710/3239]	Time 0.227 (0.632)	Data Time 0.002 (0.017)	Loss 2.3155 (2.3853)	Entropy 0.93694 (0.94005)	Top-1 acc 71.875 (66.734)	Top-5 acc 85.547 (85.661)	lr 0.00687
Train [78][2720/3239]	Time 0.238 (0.631)	Data Time 0.002 (0.017)	Loss 2.4540 (2.3853)	Entropy 0.93697 (0.94004)	Top-1 acc 64.844 (66.735)	Top-5 acc 85.547 (85.662)	lr 0.00687
Train [78][2730/3239]	Time 0.302 (0.630)	Data Time 0.001 (0.017)	Loss 2.5306 (2.3856)	Entropy 0.93636 (0.94003)	Top-1 acc 64.453 (66.728)	Top-5 acc 83.203 (85.657)	lr 0.00687
Train [78][2740/3239]	Time 0.285 (0.630)	Data Time 0.001 (0.016)	Loss 2.3393 (2.3856)	Entropy 0.93627 (0.94001)	Top-1 acc 68.359 (66.728)	Top-5 acc 86.719 (85.658)	lr 0.00687
Train [78][2750/3239]	Time 0.347 (0.629)	Data Time 0.001 (0.016)	Loss 2.4929 (2.3855)	Entropy 0.93617 (0.94000)	Top-1 acc 64.453 (66.729)	Top-5 acc 82.812 (85.659)	lr 0.00687
Train [78][2760/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.016)	Loss 2.4334 (2.3854)	Entropy 0.93611 (0.93999)	Top-1 acc 68.359 (66.730)	Top-5 acc 87.891 (85.661)	lr 0.00687
Train [78][2770/3239]	Time 0.229 (0.628)	Data Time 0.001 (0.016)	Loss 2.5452 (2.3855)	Entropy 0.93607 (0.93997)	Top-1 acc 64.062 (66.729)	Top-5 acc 83.203 (85.662)	lr 0.00687
Train [78][2780/3239]	Time 0.221 (0.628)	Data Time 0.005 (0.016)	Loss 2.3564 (2.3855)	Entropy 0.93605 (0.93996)	Top-1 acc 65.625 (66.721)	Top-5 acc 87.891 (85.661)	lr 0.00687
Train [78][2790/3239]	Time 0.253 (0.627)	Data Time 0.001 (0.016)	Loss 2.4159 (2.3855)	Entropy 0.93594 (0.93994)	Top-1 acc 65.234 (66.719)	Top-5 acc 87.500 (85.660)	lr 0.00687
Train [78][2800/3239]	Time 0.279 (0.627)	Data Time 0.001 (0.016)	Loss 2.3217 (2.3855)	Entropy 0.93589 (0.93993)	Top-1 acc 69.922 (66.721)	Top-5 acc 87.500 (85.662)	lr 0.00686
Train [78][2810/3239]	Time 0.256 (0.626)	Data Time 0.001 (0.016)	Loss 2.3068 (2.3854)	Entropy 0.93585 (0.93991)	Top-1 acc 66.406 (66.722)	Top-5 acc 89.062 (85.663)	lr 0.00686
Train [78][2820/3239]	Time 0.279 (0.626)	Data Time 0.001 (0.016)	Loss 2.3555 (2.3856)	Entropy 0.93585 (0.93990)	Top-1 acc 67.188 (66.722)	Top-5 acc 87.500 (85.661)	lr 0.00686
Train [78][2830/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.016)	Loss 2.4102 (2.3857)	Entropy 0.93582 (0.93989)	Top-1 acc 65.625 (66.719)	Top-5 acc 84.766 (85.658)	lr 0.00686
Train [78][2840/3239]	Time 0.316 (0.624)	Data Time 0.001 (0.016)	Loss 2.4092 (2.3857)	Entropy 0.93589 (0.93987)	Top-1 acc 69.531 (66.720)	Top-5 acc 85.547 (85.655)	lr 0.00686
Train [78][2850/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.016)	Loss 2.5402 (2.3859)	Entropy 0.93586 (0.93986)	Top-1 acc 62.891 (66.716)	Top-5 acc 82.031 (85.650)	lr 0.00686
Train [78][2860/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.016)	Loss 2.4717 (2.3860)	Entropy 0.93593 (0.93984)	Top-1 acc 66.797 (66.714)	Top-5 acc 85.938 (85.650)	lr 0.00686
Train [78][2870/3239]	Time 0.273 (0.623)	Data Time 0.001 (0.016)	Loss 2.4983 (2.3862)	Entropy 0.93581 (0.93983)	Top-1 acc 64.062 (66.709)	Top-5 acc 82.031 (85.646)	lr 0.00686
Train [78][2880/3239]	Time 0.266 (0.622)	Data Time 0.001 (0.016)	Loss 2.5283 (2.3862)	Entropy 0.93579 (0.93982)	Top-1 acc 63.281 (66.708)	Top-5 acc 82.422 (85.645)	lr 0.00686
Train [78][2890/3239]	Time 0.262 (0.622)	Data Time 0.001 (0.016)	Loss 2.4412 (2.3862)	Entropy 0.93581 (0.93980)	Top-1 acc 64.844 (66.711)	Top-5 acc 83.984 (85.646)	lr 0.00686
Train [78][2900/3239]	Time 0.274 (0.621)	Data Time 0.001 (0.016)	Loss 2.3312 (2.3861)	Entropy 0.93564 (0.93979)	Top-1 acc 66.016 (66.711)	Top-5 acc 86.328 (85.646)	lr 0.00686
Train [78][2910/3239]	Time 0.262 (0.621)	Data Time 0.001 (0.016)	Loss 2.4166 (2.3861)	Entropy 0.93561 (0.93977)	Top-1 acc 65.234 (66.711)	Top-5 acc 83.594 (85.647)	lr 0.00685
Train [78][2920/3239]	Time 0.472 (0.638)	Data Time 0.004 (0.016)	Loss 2.5128 (2.3862)	Entropy 0.93556 (0.93976)	Top-1 acc 64.844 (66.707)	Top-5 acc 82.422 (85.646)	lr 0.00685
Train [78][2930/3239]	Time 0.233 (0.638)	Data Time 0.002 (0.015)	Loss 2.3973 (2.3861)	Entropy 0.93551 (0.93975)	Top-1 acc 68.359 (66.709)	Top-5 acc 84.766 (85.647)	lr 0.00685
Train [78][2940/3239]	Time 0.238 (0.637)	Data Time 0.002 (0.015)	Loss 2.4961 (2.3861)	Entropy 0.93544 (0.93973)	Top-1 acc 63.281 (66.709)	Top-5 acc 81.641 (85.645)	lr 0.00685
Train [78][2950/3239]	Time 0.231 (0.637)	Data Time 0.001 (0.015)	Loss 2.3638 (2.3862)	Entropy 0.93545 (0.93972)	Top-1 acc 65.625 (66.704)	Top-5 acc 87.109 (85.645)	lr 0.00685
Train [78][2960/3239]	Time 0.256 (0.636)	Data Time 0.001 (0.015)	Loss 2.3932 (2.3864)	Entropy 0.93545 (0.93970)	Top-1 acc 64.844 (66.697)	Top-5 acc 87.891 (85.642)	lr 0.00685
Train [78][2970/3239]	Time 0.352 (0.635)	Data Time 0.001 (0.015)	Loss 2.2841 (2.3864)	Entropy 0.93529 (0.93969)	Top-1 acc 70.703 (66.702)	Top-5 acc 86.328 (85.639)	lr 0.00685
Train [78][2980/3239]	Time 0.280 (0.635)	Data Time 0.001 (0.015)	Loss 2.4279 (2.3863)	Entropy 0.93530 (0.93967)	Top-1 acc 65.234 (66.704)	Top-5 acc 85.547 (85.639)	lr 0.00685
Train [78][2990/3239]	Time 0.249 (0.634)	Data Time 0.002 (0.015)	Loss 2.3469 (2.3865)	Entropy 0.93524 (0.93966)	Top-1 acc 65.234 (66.699)	Top-5 acc 86.719 (85.639)	lr 0.00685
Train [78][3000/3239]	Time 0.266 (0.634)	Data Time 0.002 (0.015)	Loss 2.3315 (2.3864)	Entropy 0.93516 (0.93964)	Top-1 acc 67.578 (66.705)	Top-5 acc 86.719 (85.641)	lr 0.00685
Train [78][3010/3239]	Time 0.290 (0.633)	Data Time 0.002 (0.015)	Loss 2.3834 (2.3863)	Entropy 0.93515 (0.93963)	Top-1 acc 66.797 (66.706)	Top-5 acc 84.766 (85.643)	lr 0.00685
Train [78][3020/3239]	Time 0.214 (0.633)	Data Time 0.001 (0.015)	Loss 2.3598 (2.3863)	Entropy 0.93514 (0.93961)	Top-1 acc 66.406 (66.707)	Top-5 acc 83.594 (85.640)	lr 0.00684
Train [78][3030/3239]	Time 0.255 (0.632)	Data Time 0.001 (0.015)	Loss 2.5048 (2.3864)	Entropy 0.93518 (0.93960)	Top-1 acc 67.578 (66.708)	Top-5 acc 81.641 (85.638)	lr 0.00684
Train [78][3040/3239]	Time 0.254 (0.632)	Data Time 0.001 (0.015)	Loss 2.1957 (2.3864)	Entropy 0.93514 (0.93958)	Top-1 acc 70.312 (66.708)	Top-5 acc 89.062 (85.637)	lr 0.00684
Train [78][3050/3239]	Time 0.267 (0.631)	Data Time 0.001 (0.015)	Loss 2.2960 (2.3864)	Entropy 0.93511 (0.93957)	Top-1 acc 67.188 (66.709)	Top-5 acc 90.625 (85.637)	lr 0.00684
Train [78][3060/3239]	Time 0.235 (0.631)	Data Time 0.001 (0.015)	Loss 2.2569 (2.3865)	Entropy 0.93511 (0.93956)	Top-1 acc 71.875 (66.704)	Top-5 acc 88.672 (85.635)	lr 0.00684
Train [78][3070/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.015)	Loss 2.2365 (2.3864)	Entropy 0.93506 (0.93954)	Top-1 acc 68.359 (66.707)	Top-5 acc 89.062 (85.639)	lr 0.00684
Train [78][3080/3239]	Time 0.240 (0.630)	Data Time 0.002 (0.015)	Loss 2.1700 (2.3864)	Entropy 0.93502 (0.93953)	Top-1 acc 69.531 (66.702)	Top-5 acc 93.359 (85.640)	lr 0.00684
Train [78][3090/3239]	Time 0.247 (0.629)	Data Time 0.001 (0.015)	Loss 2.5264 (2.3865)	Entropy 0.93504 (0.93951)	Top-1 acc 66.016 (66.700)	Top-5 acc 80.859 (85.641)	lr 0.00684
Train [78][3100/3239]	Time 0.332 (0.629)	Data Time 0.001 (0.015)	Loss 2.3535 (2.3865)	Entropy 0.93501 (0.93950)	Top-1 acc 67.188 (66.701)	Top-5 acc 87.109 (85.639)	lr 0.00684
Train [78][3110/3239]	Time 0.217 (0.628)	Data Time 0.001 (0.015)	Loss 2.4820 (2.3864)	Entropy 0.93495 (0.93948)	Top-1 acc 64.453 (66.702)	Top-5 acc 85.547 (85.640)	lr 0.00684
Train [78][3120/3239]	Time 0.236 (0.628)	Data Time 0.001 (0.015)	Loss 2.4152 (2.3864)	Entropy 0.93489 (0.93947)	Top-1 acc 70.312 (66.705)	Top-5 acc 84.375 (85.642)	lr 0.00684
Train [78][3130/3239]	Time 0.264 (0.628)	Data Time 0.001 (0.015)	Loss 2.4373 (2.3865)	Entropy 0.93483 (0.93945)	Top-1 acc 64.453 (66.703)	Top-5 acc 84.375 (85.641)	lr 0.00684
Train [78][3140/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.015)	Loss 2.3171 (2.3866)	Entropy 0.93480 (0.93944)	Top-1 acc 66.797 (66.701)	Top-5 acc 87.500 (85.638)	lr 0.00683
Train [78][3150/3239]	Time 0.255 (0.627)	Data Time 0.001 (0.015)	Loss 2.3632 (2.3868)	Entropy 0.93472 (0.93942)	Top-1 acc 68.359 (66.697)	Top-5 acc 85.156 (85.635)	lr 0.00683
Train [78][3160/3239]	Time 0.279 (0.626)	Data Time 0.002 (0.014)	Loss 2.2567 (2.3868)	Entropy 0.93470 (0.93941)	Top-1 acc 70.703 (66.696)	Top-5 acc 87.500 (85.636)	lr 0.00683
Train [78][3170/3239]	Time 0.266 (0.626)	Data Time 0.001 (0.014)	Loss 2.3763 (2.3868)	Entropy 0.93476 (0.93939)	Top-1 acc 66.797 (66.698)	Top-5 acc 83.984 (85.634)	lr 0.00683
Train [78][3180/3239]	Time 0.229 (0.625)	Data Time 0.000 (0.014)	Loss 2.3623 (2.3868)	Entropy 0.93479 (0.93938)	Top-1 acc 68.359 (66.695)	Top-5 acc 87.891 (85.634)	lr 0.00683
Train [78][3190/3239]	Time 0.347 (0.625)	Data Time 0.000 (0.014)	Loss 2.3533 (2.3869)	Entropy 0.93477 (0.93937)	Top-1 acc 67.969 (66.696)	Top-5 acc 84.375 (85.632)	lr 0.00683
Train [78][3200/3239]	Time 0.242 (0.624)	Data Time 0.000 (0.014)	Loss 2.3651 (2.3868)	Entropy 0.93477 (0.93935)	Top-1 acc 66.797 (66.698)	Top-5 acc 87.891 (85.635)	lr 0.00683
Train [78][3210/3239]	Time 0.222 (0.624)	Data Time 0.000 (0.014)	Loss 2.4322 (2.3868)	Entropy 0.93480 (0.93934)	Top-1 acc 64.453 (66.699)	Top-5 acc 83.594 (85.634)	lr 0.00683
Train [78][3220/3239]	Time 0.233 (0.623)	Data Time 0.000 (0.014)	Loss 2.6303 (2.3868)	Entropy 0.93479 (0.93932)	Top-1 acc 58.203 (66.700)	Top-5 acc 82.422 (85.634)	lr 0.00683
Train [78][3230/3239]	Time 0.349 (0.623)	Data Time 0.000 (0.014)	Loss 2.2991 (2.3868)	Entropy 0.93478 (0.93931)	Top-1 acc 66.797 (66.698)	Top-5 acc 86.719 (85.632)	lr 0.00683
Train [78][3239/3239]	Time 2.401 (0.622)	Data Time 0.000 (0.014)	Loss 2.3306 (2.3869)	Entropy 0.93478 (0.93930)	Top-1 acc 62.963 (66.694)	Top-5 acc 90.123 (85.631)	lr 0.00683
==========Valid [78/120]	loss 1.318	top-1 acc 69.935 (69.935)	top-5 acc 88.472	Train top-1 66.694	top-5 85.631	Entropy 0.93478	Latency-None: 0.000ms	Flops: 546.53M
Train [79][0/3239]	Time 40.564 (40.564)	Data Time 38.039 (38.039)	Loss 2.5515 (2.5515)	Entropy 0.93473 (0.93473)	Top-1 acc 64.062 (64.062)	Top-5 acc 83.594 (83.594)	lr 0.00683
Train [79][10/3239]	Time 59.790 (9.535)	Data Time 0.002 (3.460)	Loss 2.2356 (2.3656)	Entropy 0.93473 (0.93473)	Top-1 acc 71.094 (67.969)	Top-5 acc 89.062 (86.151)	lr 0.00682
Train [79][20/3239]	Time 0.225 (5.115)	Data Time 0.002 (1.814)	Loss 2.4479 (2.3593)	Entropy 0.93470 (0.93471)	Top-1 acc 64.062 (67.411)	Top-5 acc 84.766 (86.235)	lr 0.00682
Train [79][30/3239]	Time 0.260 (3.625)	Data Time 0.002 (1.229)	Loss 2.3857 (2.3524)	Entropy 0.93459 (0.93468)	Top-1 acc 67.578 (67.717)	Top-5 acc 85.938 (86.253)	lr 0.00682
Train [79][40/3239]	Time 0.338 (2.859)	Data Time 0.002 (0.930)	Loss 2.3166 (2.3582)	Entropy 0.93458 (0.93465)	Top-1 acc 66.406 (67.521)	Top-5 acc 87.500 (86.204)	lr 0.00682
Train [79][50/3239]	Time 0.203 (2.390)	Data Time 0.001 (0.748)	Loss 2.5619 (2.3631)	Entropy 0.93453 (0.93463)	Top-1 acc 64.453 (67.226)	Top-5 acc 81.641 (86.114)	lr 0.00682
Train [79][60/3239]	Time 0.229 (2.075)	Data Time 0.001 (0.626)	Loss 2.3517 (2.3653)	Entropy 0.93447 (0.93461)	Top-1 acc 66.797 (67.328)	Top-5 acc 86.328 (86.066)	lr 0.00682
Train [79][70/3239]	Time 0.227 (1.849)	Data Time 0.001 (0.538)	Loss 2.3789 (2.3643)	Entropy 0.93448 (0.93459)	Top-1 acc 66.797 (67.320)	Top-5 acc 86.719 (85.954)	lr 0.00682
Train [79][80/3239]	Time 0.231 (1.678)	Data Time 0.001 (0.472)	Loss 2.3456 (2.3643)	Entropy 0.93449 (0.93458)	Top-1 acc 66.797 (67.342)	Top-5 acc 87.109 (85.962)	lr 0.00682
Train [79][90/3239]	Time 0.265 (1.547)	Data Time 0.001 (0.420)	Loss 2.3385 (2.3634)	Entropy 0.93451 (0.93457)	Top-1 acc 68.750 (67.406)	Top-5 acc 86.328 (85.959)	lr 0.00682
Train [79][100/3239]	Time 0.228 (1.440)	Data Time 0.001 (0.379)	Loss 2.3121 (2.3691)	Entropy 0.93457 (0.93456)	Top-1 acc 65.625 (67.180)	Top-5 acc 87.891 (85.903)	lr 0.00682
Train [79][110/3239]	Time 0.234 (1.353)	Data Time 0.001 (0.345)	Loss 2.3263 (2.3727)	Entropy 0.93458 (0.93456)	Top-1 acc 69.141 (67.092)	Top-5 acc 85.938 (85.871)	lr 0.00682
Train [79][120/3239]	Time 2.527 (1.279)	Data Time 0.001 (0.316)	Loss 2.1618 (2.3677)	Entropy 0.93458 (0.93456)	Top-1 acc 75.391 (67.255)	Top-5 acc 88.672 (85.950)	lr 0.00681
Train [79][130/3239]	Time 0.366 (1.201)	Data Time 0.001 (0.292)	Loss 2.3060 (2.3677)	Entropy 0.93459 (0.93457)	Top-1 acc 69.922 (67.322)	Top-5 acc 87.891 (85.970)	lr 0.00681
Train [79][140/3239]	Time 0.228 (1.150)	Data Time 0.001 (0.272)	Loss 2.3493 (2.3685)	Entropy 0.93451 (0.93456)	Top-1 acc 71.484 (67.307)	Top-5 acc 87.891 (85.998)	lr 0.00681
Train [79][150/3239]	Time 0.219 (1.105)	Data Time 0.001 (0.254)	Loss 2.3336 (2.3675)	Entropy 0.93443 (0.93456)	Top-1 acc 66.016 (67.312)	Top-5 acc 87.500 (85.997)	lr 0.00681
Train [79][160/3239]	Time 0.236 (1.066)	Data Time 0.001 (0.238)	Loss 2.2679 (2.3677)	Entropy 0.93437 (0.93454)	Top-1 acc 70.312 (67.323)	Top-5 acc 86.328 (85.957)	lr 0.00681
Train [79][170/3239]	Time 0.252 (1.032)	Data Time 0.002 (0.224)	Loss 2.6003 (2.3717)	Entropy 0.93433 (0.93453)	Top-1 acc 62.500 (67.254)	Top-5 acc 79.297 (85.837)	lr 0.00681
Train [79][180/3239]	Time 0.226 (1.002)	Data Time 0.001 (0.212)	Loss 2.3572 (2.3697)	Entropy 0.93439 (0.93452)	Top-1 acc 73.438 (67.349)	Top-5 acc 84.766 (85.894)	lr 0.00681
Train [79][190/3239]	Time 0.237 (0.974)	Data Time 0.001 (0.201)	Loss 2.4118 (2.3691)	Entropy 0.93439 (0.93452)	Top-1 acc 62.891 (67.333)	Top-5 acc 85.156 (85.870)	lr 0.00681
Train [79][200/3239]	Time 0.250 (0.950)	Data Time 0.001 (0.191)	Loss 2.2478 (2.3657)	Entropy 0.93437 (0.93451)	Top-1 acc 73.047 (67.413)	Top-5 acc 87.109 (85.953)	lr 0.00681
Train [79][210/3239]	Time 0.240 (0.927)	Data Time 0.001 (0.182)	Loss 2.2781 (2.3660)	Entropy 0.93433 (0.93450)	Top-1 acc 69.531 (67.417)	Top-5 acc 87.109 (85.949)	lr 0.00681
Train [79][220/3239]	Time 0.242 (0.908)	Data Time 0.001 (0.174)	Loss 2.3052 (2.3657)	Entropy 0.93429 (0.93449)	Top-1 acc 68.750 (67.407)	Top-5 acc 87.891 (85.969)	lr 0.00681
Train [79][230/3239]	Time 2.552 (0.889)	Data Time 0.001 (0.166)	Loss 2.2772 (2.3649)	Entropy 0.93429 (0.93449)	Top-1 acc 68.359 (67.394)	Top-5 acc 87.500 (85.981)	lr 0.00680
Train [79][240/3239]	Time 0.227 (0.862)	Data Time 0.001 (0.160)	Loss 2.6147 (2.3668)	Entropy 0.93429 (0.93448)	Top-1 acc 61.719 (67.345)	Top-5 acc 81.250 (85.962)	lr 0.00680
Train [79][250/3239]	Time 0.232 (0.847)	Data Time 0.001 (0.153)	Loss 2.3089 (2.3691)	Entropy 0.93429 (0.93447)	Top-1 acc 68.359 (67.300)	Top-5 acc 87.109 (85.917)	lr 0.00680
Train [79][260/3239]	Time 0.343 (0.834)	Data Time 0.001 (0.147)	Loss 2.2424 (2.3694)	Entropy 0.93423 (0.93446)	Top-1 acc 71.875 (67.283)	Top-5 acc 87.109 (85.923)	lr 0.00680
Train [79][270/3239]	Time 0.210 (0.820)	Data Time 0.001 (0.142)	Loss 2.3521 (2.3689)	Entropy 0.93424 (0.93445)	Top-1 acc 70.312 (67.277)	Top-5 acc 85.156 (85.950)	lr 0.00680
Train [79][280/3239]	Time 0.231 (0.808)	Data Time 0.001 (0.137)	Loss 2.2718 (2.3673)	Entropy 0.93421 (0.93444)	Top-1 acc 69.922 (67.360)	Top-5 acc 87.109 (85.963)	lr 0.00680
Train [79][290/3239]	Time 0.240 (0.798)	Data Time 0.001 (0.132)	Loss 2.3868 (2.3675)	Entropy 0.93415 (0.93444)	Top-1 acc 65.625 (67.335)	Top-5 acc 86.719 (85.966)	lr 0.00680
Train [79][300/3239]	Time 0.238 (0.788)	Data Time 0.001 (0.128)	Loss 2.1575 (2.3657)	Entropy 0.93408 (0.93443)	Top-1 acc 71.094 (67.382)	Top-5 acc 89.453 (85.997)	lr 0.00680
Train [79][310/3239]	Time 0.229 (0.778)	Data Time 0.001 (0.124)	Loss 2.4618 (2.3648)	Entropy 0.93413 (0.93441)	Top-1 acc 63.281 (67.414)	Top-5 acc 84.375 (86.023)	lr 0.00680
Train [79][320/3239]	Time 0.233 (0.769)	Data Time 0.001 (0.120)	Loss 2.3931 (2.3637)	Entropy 0.93416 (0.93441)	Top-1 acc 65.234 (67.417)	Top-5 acc 86.328 (86.043)	lr 0.00680
Train [79][330/3239]	Time 0.283 (0.760)	Data Time 0.001 (0.117)	Loss 2.3992 (2.3636)	Entropy 0.93416 (0.93440)	Top-1 acc 67.188 (67.416)	Top-5 acc 86.328 (86.058)	lr 0.00680
Train [79][340/3239]	Time 2.702 (0.753)	Data Time 0.001 (0.113)	Loss 2.2741 (2.3630)	Entropy 0.93416 (0.93439)	Top-1 acc 68.750 (67.457)	Top-5 acc 88.281 (86.049)	lr 0.00679
Train [79][350/3239]	Time 0.372 (0.739)	Data Time 0.001 (0.110)	Loss 2.3237 (2.3627)	Entropy 0.93503 (0.93441)	Top-1 acc 67.969 (67.460)	Top-5 acc 87.109 (86.074)	lr 0.00679
Train [79][360/3239]	Time 0.222 (0.731)	Data Time 0.001 (0.107)	Loss 2.3004 (2.3627)	Entropy 0.93494 (0.93442)	Top-1 acc 71.484 (67.466)	Top-5 acc 88.281 (86.080)	lr 0.00679
Train [79][370/3239]	Time 0.221 (0.724)	Data Time 0.001 (0.104)	Loss 2.2900 (2.3627)	Entropy 0.93492 (0.93444)	Top-1 acc 70.312 (67.471)	Top-5 acc 85.938 (86.080)	lr 0.00679
Train [79][380/3239]	Time 0.277 (0.854)	Data Time 0.002 (0.102)	Loss 2.4271 (2.3627)	Entropy 0.93490 (0.93445)	Top-1 acc 63.672 (67.479)	Top-5 acc 84.766 (86.087)	lr 0.00679
Train [79][390/3239]	Time 0.232 (0.846)	Data Time 0.002 (0.099)	Loss 2.3874 (2.3640)	Entropy 0.93489 (0.93446)	Top-1 acc 66.406 (67.437)	Top-5 acc 88.281 (86.056)	lr 0.00679
Train [79][400/3239]	Time 0.216 (0.838)	Data Time 0.002 (0.097)	Loss 2.2346 (2.3630)	Entropy 0.93486 (0.93447)	Top-1 acc 71.875 (67.481)	Top-5 acc 88.281 (86.064)	lr 0.00679
Train [79][410/3239]	Time 0.232 (0.830)	Data Time 0.001 (0.094)	Loss 2.3375 (2.3632)	Entropy 0.93480 (0.93448)	Top-1 acc 66.406 (67.470)	Top-5 acc 86.328 (86.052)	lr 0.00679
Train [79][420/3239]	Time 0.223 (0.821)	Data Time 0.001 (0.092)	Loss 2.5631 (2.3632)	Entropy 0.93477 (0.93449)	Top-1 acc 62.109 (67.450)	Top-5 acc 80.859 (86.059)	lr 0.00679
Train [79][430/3239]	Time 0.211 (0.813)	Data Time 0.001 (0.090)	Loss 2.4058 (2.3634)	Entropy 0.93472 (0.93449)	Top-1 acc 65.625 (67.437)	Top-5 acc 85.156 (86.044)	lr 0.00679
Train [79][440/3239]	Time 0.401 (0.805)	Data Time 0.001 (0.088)	Loss 2.3520 (2.3642)	Entropy 0.93470 (0.93450)	Top-1 acc 66.406 (67.391)	Top-5 acc 85.156 (86.037)	lr 0.00679
Train [79][450/3239]	Time 2.610 (0.798)	Data Time 0.001 (0.086)	Loss 2.3675 (2.3640)	Entropy 0.93470 (0.93450)	Top-1 acc 67.188 (67.390)	Top-5 acc 86.328 (86.048)	lr 0.00678
Train [79][460/3239]	Time 0.269 (0.786)	Data Time 0.046 (0.084)	Loss 2.3274 (2.3637)	Entropy 0.93470 (0.93451)	Top-1 acc 68.359 (67.377)	Top-5 acc 88.281 (86.044)	lr 0.00678
Train [79][470/3239]	Time 0.223 (0.780)	Data Time 0.001 (0.083)	Loss 2.5772 (2.3634)	Entropy 0.93470 (0.93451)	Top-1 acc 63.281 (67.378)	Top-5 acc 81.250 (86.049)	lr 0.00678
Train [79][480/3239]	Time 0.420 (0.774)	Data Time 0.002 (0.081)	Loss 2.4099 (2.3645)	Entropy 0.93464 (0.93452)	Top-1 acc 67.188 (67.348)	Top-5 acc 83.984 (86.031)	lr 0.00678
Train [79][490/3239]	Time 0.240 (0.768)	Data Time 0.001 (0.079)	Loss 2.3167 (2.3644)	Entropy 0.93461 (0.93452)	Top-1 acc 67.969 (67.344)	Top-5 acc 87.109 (86.029)	lr 0.00678
Train [79][500/3239]	Time 0.242 (0.763)	Data Time 0.001 (0.078)	Loss 2.2251 (2.3642)	Entropy 0.93461 (0.93452)	Top-1 acc 69.922 (67.354)	Top-5 acc 88.672 (86.033)	lr 0.00678
Train [79][510/3239]	Time 0.233 (0.758)	Data Time 0.001 (0.076)	Loss 2.3546 (2.3647)	Entropy 0.93454 (0.93452)	Top-1 acc 69.531 (67.334)	Top-5 acc 86.719 (86.023)	lr 0.00678
Train [79][520/3239]	Time 0.236 (0.753)	Data Time 0.001 (0.075)	Loss 2.3264 (2.3632)	Entropy 0.93444 (0.93452)	Top-1 acc 71.875 (67.370)	Top-5 acc 86.328 (86.055)	lr 0.00678
Train [79][530/3239]	Time 0.236 (0.748)	Data Time 0.001 (0.073)	Loss 2.3110 (2.3625)	Entropy 0.93438 (0.93452)	Top-1 acc 67.969 (67.386)	Top-5 acc 87.109 (86.068)	lr 0.00678
Train [79][540/3239]	Time 0.264 (0.743)	Data Time 0.001 (0.072)	Loss 2.3832 (2.3629)	Entropy 0.93444 (0.93452)	Top-1 acc 65.625 (67.364)	Top-5 acc 87.891 (86.066)	lr 0.00678
Train [79][550/3239]	Time 0.228 (0.738)	Data Time 0.001 (0.071)	Loss 2.4589 (2.3638)	Entropy 0.93444 (0.93451)	Top-1 acc 63.281 (67.305)	Top-5 acc 82.812 (86.044)	lr 0.00678
Train [79][560/3239]	Time 2.502 (0.733)	Data Time 0.001 (0.070)	Loss 2.4136 (2.3645)	Entropy 0.93444 (0.93451)	Top-1 acc 67.969 (67.284)	Top-5 acc 83.984 (86.027)	lr 0.00677
Train [79][570/3239]	Time 0.406 (0.725)	Data Time 0.001 (0.068)	Loss 2.3721 (2.3637)	Entropy 0.93440 (0.93451)	Top-1 acc 66.797 (67.317)	Top-5 acc 86.328 (86.025)	lr 0.00677
Train [79][580/3239]	Time 0.240 (0.721)	Data Time 0.001 (0.067)	Loss 2.3066 (2.3637)	Entropy 0.93442 (0.93451)	Top-1 acc 67.578 (67.320)	Top-5 acc 87.109 (86.024)	lr 0.00677
Train [79][590/3239]	Time 0.262 (0.717)	Data Time 0.002 (0.066)	Loss 2.3668 (2.3627)	Entropy 0.93440 (0.93451)	Top-1 acc 65.625 (67.333)	Top-5 acc 87.500 (86.043)	lr 0.00677
Train [79][600/3239]	Time 0.232 (0.713)	Data Time 0.001 (0.065)	Loss 2.5305 (2.3635)	Entropy 0.93443 (0.93451)	Top-1 acc 62.500 (67.321)	Top-5 acc 83.594 (86.029)	lr 0.00677
Train [79][610/3239]	Time 0.248 (0.709)	Data Time 0.001 (0.064)	Loss 2.4625 (2.3633)	Entropy 0.93439 (0.93450)	Top-1 acc 64.062 (67.329)	Top-5 acc 82.812 (86.035)	lr 0.00677
Train [79][620/3239]	Time 0.255 (0.706)	Data Time 0.001 (0.063)	Loss 2.3489 (2.3642)	Entropy 0.93437 (0.93450)	Top-1 acc 67.969 (67.290)	Top-5 acc 85.938 (86.014)	lr 0.00677
Train [79][630/3239]	Time 0.242 (0.703)	Data Time 0.001 (0.062)	Loss 2.2753 (2.3649)	Entropy 0.93435 (0.93450)	Top-1 acc 69.531 (67.277)	Top-5 acc 87.500 (86.004)	lr 0.00677
Train [79][640/3239]	Time 0.228 (0.699)	Data Time 0.001 (0.061)	Loss 2.3490 (2.3648)	Entropy 0.93433 (0.93450)	Top-1 acc 67.578 (67.288)	Top-5 acc 86.328 (86.004)	lr 0.00677
Train [79][650/3239]	Time 0.237 (0.695)	Data Time 0.001 (0.060)	Loss 2.2445 (2.3646)	Entropy 0.93435 (0.93450)	Top-1 acc 69.531 (67.301)	Top-5 acc 89.844 (86.013)	lr 0.00677
Train [79][660/3239]	Time 0.233 (0.692)	Data Time 0.001 (0.059)	Loss 2.3090 (2.3646)	Entropy 0.93432 (0.93449)	Top-1 acc 70.312 (67.306)	Top-5 acc 87.500 (86.019)	lr 0.00677
Train [79][670/3239]	Time 2.685 (0.689)	Data Time 0.002 (0.058)	Loss 2.3106 (2.3651)	Entropy 0.93432 (0.93449)	Top-1 acc 72.656 (67.298)	Top-5 acc 84.375 (86.000)	lr 0.00676
Train [79][680/3239]	Time 0.283 (0.683)	Data Time 0.001 (0.058)	Loss 2.3667 (2.3649)	Entropy 0.93428 (0.93449)	Top-1 acc 70.703 (67.309)	Top-5 acc 87.500 (86.003)	lr 0.00676
Train [79][690/3239]	Time 0.257 (0.680)	Data Time 0.002 (0.057)	Loss 2.2827 (2.3651)	Entropy 0.93422 (0.93448)	Top-1 acc 66.016 (67.297)	Top-5 acc 87.109 (86.000)	lr 0.00676
Train [79][700/3239]	Time 0.344 (0.677)	Data Time 0.002 (0.056)	Loss 2.3266 (2.3648)	Entropy 0.93420 (0.93448)	Top-1 acc 67.578 (67.317)	Top-5 acc 86.719 (86.005)	lr 0.00676
Train [79][710/3239]	Time 0.236 (0.674)	Data Time 0.001 (0.055)	Loss 2.3059 (2.3649)	Entropy 0.93417 (0.93448)	Top-1 acc 67.188 (67.305)	Top-5 acc 87.500 (86.000)	lr 0.00676
Train [79][720/3239]	Time 0.241 (0.672)	Data Time 0.001 (0.055)	Loss 2.3858 (2.3647)	Entropy 0.93412 (0.93447)	Top-1 acc 68.750 (67.319)	Top-5 acc 83.984 (85.988)	lr 0.00676
Train [79][730/3239]	Time 0.238 (0.669)	Data Time 0.001 (0.054)	Loss 2.2622 (2.3640)	Entropy 0.93416 (0.93447)	Top-1 acc 70.703 (67.332)	Top-5 acc 89.844 (86.002)	lr 0.00676
Train [79][740/3239]	Time 0.260 (0.737)	Data Time 0.003 (0.053)	Loss 2.3364 (2.3642)	Entropy 0.93407 (0.93446)	Top-1 acc 69.531 (67.325)	Top-5 acc 85.547 (86.002)	lr 0.00676
Train [79][750/3239]	Time 0.228 (0.735)	Data Time 0.002 (0.052)	Loss 2.4421 (2.3642)	Entropy 0.93406 (0.93446)	Top-1 acc 70.312 (67.336)	Top-5 acc 84.375 (85.999)	lr 0.00676
Train [79][760/3239]	Time 0.243 (0.732)	Data Time 0.002 (0.052)	Loss 2.3827 (2.3642)	Entropy 0.93403 (0.93445)	Top-1 acc 62.109 (67.326)	Top-5 acc 86.719 (85.998)	lr 0.00676
Train [79][770/3239]	Time 0.235 (0.729)	Data Time 0.001 (0.051)	Loss 2.5209 (2.3649)	Entropy 0.93403 (0.93445)	Top-1 acc 63.672 (67.301)	Top-5 acc 83.594 (85.989)	lr 0.00676
Train [79][780/3239]	Time 2.640 (0.726)	Data Time 0.002 (0.051)	Loss 2.3559 (2.3652)	Entropy 0.93403 (0.93444)	Top-1 acc 67.578 (67.306)	Top-5 acc 87.109 (85.990)	lr 0.00676
Train [79][790/3239]	Time 0.367 (0.720)	Data Time 0.001 (0.050)	Loss 2.1989 (2.3652)	Entropy 0.93409 (0.93444)	Top-1 acc 71.484 (67.295)	Top-5 acc 90.234 (85.990)	lr 0.00675
Train [79][800/3239]	Time 0.236 (0.717)	Data Time 0.001 (0.049)	Loss 2.3414 (2.3650)	Entropy 0.93409 (0.93443)	Top-1 acc 67.578 (67.296)	Top-5 acc 86.328 (85.996)	lr 0.00675
Train [79][810/3239]	Time 0.253 (0.714)	Data Time 0.001 (0.049)	Loss 2.2652 (2.3647)	Entropy 0.93404 (0.93443)	Top-1 acc 71.875 (67.305)	Top-5 acc 87.500 (85.999)	lr 0.00675
Train [79][820/3239]	Time 0.231 (0.711)	Data Time 0.001 (0.048)	Loss 2.2938 (2.3645)	Entropy 0.93402 (0.93442)	Top-1 acc 67.188 (67.309)	Top-5 acc 85.938 (86.000)	lr 0.00675
Train [79][830/3239]	Time 0.324 (0.709)	Data Time 0.001 (0.048)	Loss 2.3480 (2.3647)	Entropy 0.93401 (0.93442)	Top-1 acc 67.188 (67.303)	Top-5 acc 86.328 (85.991)	lr 0.00675
Train [79][840/3239]	Time 0.232 (0.706)	Data Time 0.001 (0.047)	Loss 2.2227 (2.3642)	Entropy 0.93401 (0.93441)	Top-1 acc 72.266 (67.316)	Top-5 acc 86.328 (85.998)	lr 0.00675
Train [79][850/3239]	Time 0.235 (0.703)	Data Time 0.001 (0.046)	Loss 2.3578 (2.3642)	Entropy 0.93398 (0.93441)	Top-1 acc 67.188 (67.324)	Top-5 acc 86.719 (85.993)	lr 0.00675
Train [79][860/3239]	Time 0.227 (0.701)	Data Time 0.001 (0.046)	Loss 2.4766 (2.3642)	Entropy 0.93396 (0.93440)	Top-1 acc 64.844 (67.326)	Top-5 acc 83.594 (85.991)	lr 0.00675
Train [79][870/3239]	Time 0.215 (0.698)	Data Time 0.001 (0.045)	Loss 2.3880 (2.3639)	Entropy 0.93392 (0.93440)	Top-1 acc 66.016 (67.339)	Top-5 acc 85.938 (85.994)	lr 0.00675
Train [79][880/3239]	Time 0.240 (0.696)	Data Time 0.001 (0.045)	Loss 2.4289 (2.3634)	Entropy 0.93387 (0.93439)	Top-1 acc 65.234 (67.346)	Top-5 acc 83.594 (86.002)	lr 0.00675
Train [79][890/3239]	Time 2.574 (0.694)	Data Time 0.002 (0.044)	Loss 2.3068 (2.3636)	Entropy 0.93387 (0.93439)	Top-1 acc 68.750 (67.342)	Top-5 acc 87.500 (85.984)	lr 0.00675
Train [79][900/3239]	Time 0.226 (0.689)	Data Time 0.001 (0.044)	Loss 2.2962 (2.3634)	Entropy 0.93386 (0.93438)	Top-1 acc 69.531 (67.351)	Top-5 acc 89.844 (85.986)	lr 0.00674
Train [79][910/3239]	Time 0.222 (0.686)	Data Time 0.001 (0.044)	Loss 2.3443 (2.3637)	Entropy 0.93385 (0.93437)	Top-1 acc 67.969 (67.343)	Top-5 acc 86.719 (85.980)	lr 0.00674
Train [79][920/3239]	Time 0.331 (0.684)	Data Time 0.001 (0.043)	Loss 2.1454 (2.3634)	Entropy 0.93380 (0.93437)	Top-1 acc 71.875 (67.352)	Top-5 acc 90.625 (85.982)	lr 0.00674
Train [79][930/3239]	Time 0.238 (0.682)	Data Time 0.001 (0.043)	Loss 2.3989 (2.3639)	Entropy 0.93377 (0.93436)	Top-1 acc 66.406 (67.340)	Top-5 acc 85.938 (85.976)	lr 0.00674
Train [79][940/3239]	Time 0.216 (0.680)	Data Time 0.001 (0.042)	Loss 2.4298 (2.3637)	Entropy 0.93379 (0.93436)	Top-1 acc 65.625 (67.341)	Top-5 acc 85.156 (85.983)	lr 0.00674
Train [79][950/3239]	Time 0.210 (0.677)	Data Time 0.001 (0.042)	Loss 2.3598 (2.3636)	Entropy 0.93374 (0.93435)	Top-1 acc 68.359 (67.356)	Top-5 acc 87.109 (85.987)	lr 0.00674
Train [79][960/3239]	Time 0.233 (0.675)	Data Time 0.001 (0.041)	Loss 2.4462 (2.3637)	Entropy 0.93374 (0.93434)	Top-1 acc 63.672 (67.355)	Top-5 acc 84.375 (85.984)	lr 0.00674
Train [79][970/3239]	Time 0.230 (0.673)	Data Time 0.001 (0.041)	Loss 2.3195 (2.3637)	Entropy 0.93371 (0.93434)	Top-1 acc 64.453 (67.356)	Top-5 acc 87.500 (85.986)	lr 0.00674
Train [79][980/3239]	Time 0.227 (0.671)	Data Time 0.001 (0.041)	Loss 2.3276 (2.3636)	Entropy 0.93366 (0.93433)	Top-1 acc 72.656 (67.362)	Top-5 acc 86.719 (85.989)	lr 0.00674
Train [79][990/3239]	Time 0.264 (0.669)	Data Time 0.002 (0.040)	Loss 2.3172 (2.3637)	Entropy 0.93267 (0.93432)	Top-1 acc 69.531 (67.362)	Top-5 acc 87.500 (85.988)	lr 0.00674
Train [79][1000/3239]	Time 2.522 (0.667)	Data Time 0.002 (0.040)	Loss 2.4227 (2.3638)	Entropy 0.93267 (0.93431)	Top-1 acc 67.188 (67.359)	Top-5 acc 84.375 (85.994)	lr 0.00674
Train [79][1010/3239]	Time 0.355 (0.663)	Data Time 0.001 (0.039)	Loss 2.4006 (2.3639)	Entropy 0.93252 (0.93429)	Top-1 acc 64.062 (67.361)	Top-5 acc 85.547 (85.994)	lr 0.00673
Train [79][1020/3239]	Time 0.250 (0.661)	Data Time 0.001 (0.039)	Loss 2.4046 (2.3640)	Entropy 0.93248 (0.93427)	Top-1 acc 64.844 (67.348)	Top-5 acc 85.938 (85.993)	lr 0.00673
Train [79][1030/3239]	Time 0.276 (0.659)	Data Time 0.001 (0.039)	Loss 2.2985 (2.3643)	Entropy 0.93235 (0.93425)	Top-1 acc 70.703 (67.339)	Top-5 acc 87.500 (85.986)	lr 0.00673
Train [79][1040/3239]	Time 0.233 (0.657)	Data Time 0.002 (0.038)	Loss 2.3096 (2.3640)	Entropy 0.93233 (0.93423)	Top-1 acc 71.484 (67.347)	Top-5 acc 87.891 (85.991)	lr 0.00673
Train [79][1050/3239]	Time 0.318 (0.656)	Data Time 0.001 (0.038)	Loss 2.4594 (2.3642)	Entropy 0.93217 (0.93421)	Top-1 acc 64.062 (67.345)	Top-5 acc 84.375 (85.988)	lr 0.00673
Train [79][1060/3239]	Time 0.227 (0.654)	Data Time 0.001 (0.038)	Loss 2.2098 (2.3648)	Entropy 0.93212 (0.93420)	Top-1 acc 71.875 (67.334)	Top-5 acc 89.062 (85.971)	lr 0.00673
Train [79][1070/3239]	Time 0.218 (0.652)	Data Time 0.001 (0.037)	Loss 2.2280 (2.3645)	Entropy 0.93214 (0.93418)	Top-1 acc 72.266 (67.340)	Top-5 acc 88.672 (85.981)	lr 0.00673
Train [79][1080/3239]	Time 0.202 (0.651)	Data Time 0.001 (0.037)	Loss 2.3201 (2.3643)	Entropy 0.93211 (0.93416)	Top-1 acc 71.875 (67.346)	Top-5 acc 86.328 (85.985)	lr 0.00673
Train [79][1090/3239]	Time 0.235 (0.649)	Data Time 0.001 (0.037)	Loss 2.2379 (2.3638)	Entropy 0.93212 (0.93414)	Top-1 acc 69.531 (67.355)	Top-5 acc 87.500 (85.997)	lr 0.00673
Train [79][1100/3239]	Time 0.420 (0.693)	Data Time 0.004 (0.036)	Loss 2.3437 (2.3638)	Entropy 0.93210 (0.93412)	Top-1 acc 69.531 (67.353)	Top-5 acc 84.375 (85.998)	lr 0.00673
Train [79][1110/3239]	Time 3.367 (0.692)	Data Time 0.003 (0.036)	Loss 2.2850 (2.3639)	Entropy 0.93210 (0.93410)	Top-1 acc 66.797 (67.353)	Top-5 acc 87.109 (85.993)	lr 0.00673
Train [79][1120/3239]	Time 0.239 (0.688)	Data Time 0.001 (0.036)	Loss 2.2704 (2.3634)	Entropy 0.93206 (0.93408)	Top-1 acc 69.922 (67.365)	Top-5 acc 88.281 (86.004)	lr 0.00672
Train [79][1130/3239]	Time 0.232 (0.686)	Data Time 0.001 (0.035)	Loss 2.4424 (2.3635)	Entropy 0.93207 (0.93407)	Top-1 acc 62.500 (67.354)	Top-5 acc 85.547 (86.004)	lr 0.00672
Train [79][1140/3239]	Time 0.316 (0.684)	Data Time 0.001 (0.035)	Loss 2.4999 (2.3637)	Entropy 0.93203 (0.93405)	Top-1 acc 67.969 (67.345)	Top-5 acc 84.766 (86.003)	lr 0.00672
Train [79][1150/3239]	Time 0.233 (0.682)	Data Time 0.001 (0.035)	Loss 2.5603 (2.3642)	Entropy 0.93188 (0.93403)	Top-1 acc 63.672 (67.334)	Top-5 acc 82.422 (85.989)	lr 0.00672
Train [79][1160/3239]	Time 0.224 (0.680)	Data Time 0.001 (0.035)	Loss 2.4665 (2.3649)	Entropy 0.93181 (0.93401)	Top-1 acc 64.844 (67.309)	Top-5 acc 85.156 (85.982)	lr 0.00672
Train [79][1170/3239]	Time 0.211 (0.679)	Data Time 0.001 (0.034)	Loss 2.3610 (2.3648)	Entropy 0.93167 (0.93399)	Top-1 acc 68.359 (67.314)	Top-5 acc 86.719 (85.985)	lr 0.00672
Train [79][1180/3239]	Time 0.322 (0.677)	Data Time 0.001 (0.034)	Loss 2.5546 (2.3651)	Entropy 0.93164 (0.93397)	Top-1 acc 60.938 (67.305)	Top-5 acc 85.547 (85.980)	lr 0.00672
Train [79][1190/3239]	Time 0.264 (0.675)	Data Time 0.002 (0.034)	Loss 2.3650 (2.3654)	Entropy 0.93160 (0.93395)	Top-1 acc 66.797 (67.303)	Top-5 acc 87.891 (85.975)	lr 0.00672
Train [79][1200/3239]	Time 0.222 (0.674)	Data Time 0.001 (0.033)	Loss 2.3659 (2.3655)	Entropy 0.93152 (0.93393)	Top-1 acc 66.016 (67.291)	Top-5 acc 86.328 (85.974)	lr 0.00672
Train [79][1210/3239]	Time 0.261 (0.672)	Data Time 0.001 (0.033)	Loss 2.3566 (2.3656)	Entropy 0.93155 (0.93391)	Top-1 acc 66.797 (67.291)	Top-5 acc 85.156 (85.972)	lr 0.00672
Train [79][1220/3239]	Time 2.721 (0.671)	Data Time 0.002 (0.033)	Loss 2.2939 (2.3657)	Entropy 0.93155 (0.93389)	Top-1 acc 66.797 (67.288)	Top-5 acc 87.109 (85.970)	lr 0.00672
Train [79][1230/3239]	Time 0.310 (0.667)	Data Time 0.001 (0.033)	Loss 2.4515 (2.3658)	Entropy 0.93151 (0.93387)	Top-1 acc 65.234 (67.279)	Top-5 acc 85.156 (85.971)	lr 0.00671
Train [79][1240/3239]	Time 0.231 (0.666)	Data Time 0.001 (0.032)	Loss 2.3026 (2.3657)	Entropy 0.93150 (0.93385)	Top-1 acc 68.359 (67.285)	Top-5 acc 85.547 (85.971)	lr 0.00671
Train [79][1250/3239]	Time 0.197 (0.664)	Data Time 0.001 (0.032)	Loss 2.2378 (2.3656)	Entropy 0.93153 (0.93384)	Top-1 acc 73.828 (67.293)	Top-5 acc 86.719 (85.974)	lr 0.00671
Train [79][1260/3239]	Time 0.229 (0.662)	Data Time 0.001 (0.032)	Loss 2.1610 (2.3658)	Entropy 0.93146 (0.93382)	Top-1 acc 71.094 (67.285)	Top-5 acc 90.234 (85.973)	lr 0.00671
Train [79][1270/3239]	Time 0.223 (0.661)	Data Time 0.001 (0.032)	Loss 2.5189 (2.3660)	Entropy 0.93145 (0.93380)	Top-1 acc 66.016 (67.277)	Top-5 acc 83.594 (85.966)	lr 0.00671
Train [79][1280/3239]	Time 0.212 (0.659)	Data Time 0.001 (0.031)	Loss 2.4995 (2.3663)	Entropy 0.93141 (0.93378)	Top-1 acc 59.375 (67.268)	Top-5 acc 84.375 (85.963)	lr 0.00671
Train [79][1290/3239]	Time 0.229 (0.658)	Data Time 0.001 (0.031)	Loss 2.3036 (2.3662)	Entropy 0.93141 (0.93376)	Top-1 acc 67.969 (67.265)	Top-5 acc 86.719 (85.964)	lr 0.00671
Train [79][1300/3239]	Time 0.229 (0.657)	Data Time 0.001 (0.031)	Loss 2.3174 (2.3658)	Entropy 0.93146 (0.93374)	Top-1 acc 67.969 (67.272)	Top-5 acc 86.719 (85.976)	lr 0.00671
Train [79][1310/3239]	Time 0.219 (0.655)	Data Time 0.001 (0.031)	Loss 2.4060 (2.3657)	Entropy 0.93144 (0.93373)	Top-1 acc 65.234 (67.270)	Top-5 acc 85.938 (85.980)	lr 0.00671
Train [79][1320/3239]	Time 0.271 (0.654)	Data Time 0.001 (0.031)	Loss 2.4088 (2.3660)	Entropy 0.93141 (0.93371)	Top-1 acc 65.625 (67.261)	Top-5 acc 85.156 (85.976)	lr 0.00671
Train [79][1330/3239]	Time 2.553 (0.652)	Data Time 0.002 (0.030)	Loss 2.2839 (2.3655)	Entropy 0.93141 (0.93369)	Top-1 acc 64.062 (67.269)	Top-5 acc 87.891 (85.982)	lr 0.00671
Train [79][1340/3239]	Time 0.220 (0.649)	Data Time 0.001 (0.030)	Loss 2.4258 (2.3659)	Entropy 0.93140 (0.93367)	Top-1 acc 67.578 (67.268)	Top-5 acc 83.984 (85.970)	lr 0.00670
Train [79][1350/3239]	Time 0.229 (0.648)	Data Time 0.001 (0.030)	Loss 2.2624 (2.3657)	Entropy 0.93139 (0.93366)	Top-1 acc 70.703 (67.276)	Top-5 acc 87.500 (85.971)	lr 0.00670
Train [79][1360/3239]	Time 0.380 (0.647)	Data Time 0.001 (0.030)	Loss 2.3299 (2.3658)	Entropy 0.93140 (0.93364)	Top-1 acc 66.016 (67.275)	Top-5 acc 85.938 (85.966)	lr 0.00670
Train [79][1370/3239]	Time 0.221 (0.646)	Data Time 0.001 (0.029)	Loss 2.3293 (2.3657)	Entropy 0.93137 (0.93362)	Top-1 acc 67.969 (67.276)	Top-5 acc 84.766 (85.971)	lr 0.00670
Train [79][1380/3239]	Time 0.226 (0.644)	Data Time 0.001 (0.029)	Loss 2.3047 (2.3656)	Entropy 0.93134 (0.93361)	Top-1 acc 67.578 (67.273)	Top-5 acc 87.891 (85.969)	lr 0.00670
Train [79][1390/3239]	Time 0.218 (0.643)	Data Time 0.001 (0.029)	Loss 2.3313 (2.3657)	Entropy 0.93126 (0.93359)	Top-1 acc 68.359 (67.269)	Top-5 acc 87.109 (85.969)	lr 0.00670
Train [79][1400/3239]	Time 0.235 (0.642)	Data Time 0.001 (0.029)	Loss 2.3082 (2.3655)	Entropy 0.93128 (0.93357)	Top-1 acc 67.578 (67.277)	Top-5 acc 85.938 (85.973)	lr 0.00670
Train [79][1410/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.029)	Loss 2.3056 (2.3657)	Entropy 0.93118 (0.93356)	Top-1 acc 68.359 (67.267)	Top-5 acc 86.328 (85.970)	lr 0.00670
Train [79][1420/3239]	Time 0.234 (0.640)	Data Time 0.002 (0.028)	Loss 2.3799 (2.3656)	Entropy 0.93111 (0.93354)	Top-1 acc 66.797 (67.271)	Top-5 acc 86.719 (85.971)	lr 0.00670
Train [79][1430/3239]	Time 0.286 (0.638)	Data Time 0.001 (0.028)	Loss 2.4276 (2.3660)	Entropy 0.93113 (0.93352)	Top-1 acc 66.016 (67.259)	Top-5 acc 85.938 (85.958)	lr 0.00670
Train [79][1440/3239]	Time 2.438 (0.637)	Data Time 0.001 (0.028)	Loss 2.4324 (2.3660)	Entropy 0.93113 (0.93351)	Top-1 acc 65.625 (67.258)	Top-5 acc 83.594 (85.958)	lr 0.00670
Train [79][1450/3239]	Time 0.324 (0.634)	Data Time 0.001 (0.028)	Loss 2.5614 (2.3662)	Entropy 0.93114 (0.93349)	Top-1 acc 60.547 (67.254)	Top-5 acc 80.078 (85.953)	lr 0.00670
Train [79][1460/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.028)	Loss 2.4732 (2.3662)	Entropy 0.93111 (0.93348)	Top-1 acc 64.062 (67.245)	Top-5 acc 84.766 (85.954)	lr 0.00669
Train [79][1470/3239]	Time 0.260 (0.670)	Data Time 0.002 (0.028)	Loss 2.4594 (2.3662)	Entropy 0.93111 (0.93346)	Top-1 acc 66.797 (67.246)	Top-5 acc 85.938 (85.952)	lr 0.00669
Train [79][1480/3239]	Time 0.229 (0.669)	Data Time 0.002 (0.027)	Loss 2.4823 (2.3665)	Entropy 0.93105 (0.93344)	Top-1 acc 67.969 (67.239)	Top-5 acc 83.203 (85.945)	lr 0.00669
Train [79][1490/3239]	Time 0.338 (0.668)	Data Time 0.002 (0.027)	Loss 2.4416 (2.3666)	Entropy 0.93101 (0.93343)	Top-1 acc 67.969 (67.240)	Top-5 acc 84.375 (85.945)	lr 0.00669
Train [79][1500/3239]	Time 0.235 (0.667)	Data Time 0.001 (0.027)	Loss 2.4056 (2.3666)	Entropy 0.93092 (0.93341)	Top-1 acc 66.406 (67.242)	Top-5 acc 86.328 (85.944)	lr 0.00669
Train [79][1510/3239]	Time 0.235 (0.665)	Data Time 0.001 (0.027)	Loss 2.4993 (2.3667)	Entropy 0.93089 (0.93339)	Top-1 acc 69.531 (67.248)	Top-5 acc 84.766 (85.944)	lr 0.00669
Train [79][1520/3239]	Time 0.229 (0.664)	Data Time 0.001 (0.027)	Loss 2.4657 (2.3666)	Entropy 0.93086 (0.93338)	Top-1 acc 66.016 (67.245)	Top-5 acc 81.250 (85.952)	lr 0.00669
Train [79][1530/3239]	Time 0.212 (0.663)	Data Time 0.001 (0.027)	Loss 2.3282 (2.3667)	Entropy 0.93083 (0.93336)	Top-1 acc 69.141 (67.239)	Top-5 acc 85.547 (85.951)	lr 0.00669
Train [79][1540/3239]	Time 0.217 (0.662)	Data Time 0.001 (0.026)	Loss 2.2793 (2.3666)	Entropy 0.93076 (0.93334)	Top-1 acc 69.531 (67.245)	Top-5 acc 87.500 (85.957)	lr 0.00669
Train [79][1550/3239]	Time 2.434 (0.661)	Data Time 0.003 (0.026)	Loss 2.1893 (2.3669)	Entropy 0.93076 (0.93333)	Top-1 acc 71.094 (67.237)	Top-5 acc 88.672 (85.952)	lr 0.00669
Train [79][1560/3239]	Time 0.226 (0.658)	Data Time 0.001 (0.026)	Loss 2.4150 (2.3670)	Entropy 0.93075 (0.93331)	Top-1 acc 66.016 (67.234)	Top-5 acc 84.375 (85.943)	lr 0.00669
Train [79][1570/3239]	Time 0.267 (0.657)	Data Time 0.001 (0.026)	Loss 2.3617 (2.3671)	Entropy 0.93074 (0.93329)	Top-1 acc 67.188 (67.234)	Top-5 acc 86.719 (85.945)	lr 0.00668
Train [79][1580/3239]	Time 0.330 (0.656)	Data Time 0.001 (0.026)	Loss 2.3482 (2.3671)	Entropy 0.93066 (0.93328)	Top-1 acc 67.578 (67.230)	Top-5 acc 87.500 (85.944)	lr 0.00668
Train [79][1590/3239]	Time 0.237 (0.654)	Data Time 0.001 (0.026)	Loss 2.3056 (2.3671)	Entropy 0.93064 (0.93326)	Top-1 acc 69.922 (67.230)	Top-5 acc 87.109 (85.948)	lr 0.00668
Train [79][1600/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.025)	Loss 2.3776 (2.3674)	Entropy 0.93063 (0.93325)	Top-1 acc 69.141 (67.226)	Top-5 acc 82.812 (85.935)	lr 0.00668
Train [79][1610/3239]	Time 0.249 (0.652)	Data Time 0.001 (0.025)	Loss 2.3931 (2.3676)	Entropy 0.93067 (0.93323)	Top-1 acc 66.016 (67.223)	Top-5 acc 83.594 (85.931)	lr 0.00668
Train [79][1620/3239]	Time 0.246 (0.651)	Data Time 0.001 (0.025)	Loss 2.4867 (2.3676)	Entropy 0.93062 (0.93321)	Top-1 acc 62.500 (67.226)	Top-5 acc 84.766 (85.930)	lr 0.00668
Train [79][1630/3239]	Time 0.222 (0.650)	Data Time 0.001 (0.025)	Loss 2.4227 (2.3677)	Entropy 0.93061 (0.93320)	Top-1 acc 65.625 (67.219)	Top-5 acc 84.375 (85.931)	lr 0.00668
Train [79][1640/3239]	Time 0.218 (0.649)	Data Time 0.001 (0.025)	Loss 2.2172 (2.3675)	Entropy 0.93067 (0.93318)	Top-1 acc 71.875 (67.222)	Top-5 acc 89.453 (85.934)	lr 0.00668
Train [79][1650/3239]	Time 0.243 (0.648)	Data Time 0.001 (0.025)	Loss 2.3746 (2.3676)	Entropy 0.93063 (0.93317)	Top-1 acc 66.797 (67.219)	Top-5 acc 86.328 (85.933)	lr 0.00668
Train [79][1660/3239]	Time 2.521 (0.647)	Data Time 0.001 (0.025)	Loss 2.3608 (2.3678)	Entropy 0.93063 (0.93315)	Top-1 acc 66.797 (67.214)	Top-5 acc 83.984 (85.932)	lr 0.00668
Train [79][1670/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.024)	Loss 2.5033 (2.3680)	Entropy 0.93056 (0.93314)	Top-1 acc 60.547 (67.202)	Top-5 acc 82.422 (85.926)	lr 0.00668
Train [79][1680/3239]	Time 0.224 (0.644)	Data Time 0.001 (0.024)	Loss 2.3366 (2.3677)	Entropy 0.93057 (0.93312)	Top-1 acc 69.141 (67.211)	Top-5 acc 85.938 (85.929)	lr 0.00667
Train [79][1690/3239]	Time 0.225 (0.643)	Data Time 0.001 (0.024)	Loss 2.6109 (2.3681)	Entropy 0.93056 (0.93311)	Top-1 acc 62.109 (67.204)	Top-5 acc 80.859 (85.923)	lr 0.00667
Train [79][1700/3239]	Time 0.215 (0.642)	Data Time 0.001 (0.024)	Loss 2.4789 (2.3679)	Entropy 0.93055 (0.93309)	Top-1 acc 62.891 (67.206)	Top-5 acc 82.422 (85.925)	lr 0.00667
Train [79][1710/3239]	Time 0.305 (0.641)	Data Time 0.001 (0.024)	Loss 2.2995 (2.3676)	Entropy 0.93054 (0.93308)	Top-1 acc 69.141 (67.214)	Top-5 acc 87.109 (85.935)	lr 0.00667
Train [79][1720/3239]	Time 0.257 (0.640)	Data Time 0.001 (0.024)	Loss 2.2994 (2.3677)	Entropy 0.93055 (0.93306)	Top-1 acc 65.625 (67.208)	Top-5 acc 88.672 (85.934)	lr 0.00667
Train [79][1730/3239]	Time 0.230 (0.639)	Data Time 0.001 (0.024)	Loss 2.6375 (2.3680)	Entropy 0.93053 (0.93305)	Top-1 acc 64.844 (67.201)	Top-5 acc 79.688 (85.931)	lr 0.00667
Train [79][1740/3239]	Time 0.214 (0.638)	Data Time 0.001 (0.024)	Loss 2.3567 (2.3681)	Entropy 0.93052 (0.93303)	Top-1 acc 68.750 (67.198)	Top-5 acc 87.109 (85.931)	lr 0.00667
Train [79][1750/3239]	Time 0.221 (0.637)	Data Time 0.001 (0.023)	Loss 2.2676 (2.3681)	Entropy 0.93053 (0.93302)	Top-1 acc 66.016 (67.195)	Top-5 acc 85.156 (85.933)	lr 0.00667
Train [79][1760/3239]	Time 0.263 (0.636)	Data Time 0.001 (0.023)	Loss 2.3289 (2.3679)	Entropy 0.93051 (0.93300)	Top-1 acc 65.234 (67.197)	Top-5 acc 85.547 (85.934)	lr 0.00667
Train [79][1770/3239]	Time 2.635 (0.635)	Data Time 0.001 (0.023)	Loss 2.3222 (2.3679)	Entropy 0.93051 (0.93299)	Top-1 acc 67.578 (67.199)	Top-5 acc 87.109 (85.933)	lr 0.00667
Train [79][1780/3239]	Time 0.265 (0.633)	Data Time 0.002 (0.023)	Loss 2.2087 (2.3681)	Entropy 0.93051 (0.93298)	Top-1 acc 71.875 (67.194)	Top-5 acc 89.844 (85.929)	lr 0.00667
Train [79][1790/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.023)	Loss 2.3944 (2.3681)	Entropy 0.93047 (0.93296)	Top-1 acc 66.406 (67.187)	Top-5 acc 84.375 (85.929)	lr 0.00666
Train [79][1800/3239]	Time 0.340 (0.631)	Data Time 0.001 (0.023)	Loss 2.4248 (2.3682)	Entropy 0.93049 (0.93295)	Top-1 acc 66.016 (67.188)	Top-5 acc 84.766 (85.930)	lr 0.00666
Train [79][1810/3239]	Time 0.269 (0.631)	Data Time 0.001 (0.023)	Loss 2.2415 (2.3683)	Entropy 0.93040 (0.93293)	Top-1 acc 73.047 (67.187)	Top-5 acc 88.672 (85.930)	lr 0.00666
Train [79][1820/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.023)	Loss 2.2405 (2.3683)	Entropy 0.93036 (0.93292)	Top-1 acc 73.047 (67.192)	Top-5 acc 85.938 (85.929)	lr 0.00666
Train [79][1830/3239]	Time 0.239 (0.659)	Data Time 0.002 (0.022)	Loss 2.3809 (2.3687)	Entropy 0.93025 (0.93291)	Top-1 acc 68.750 (67.185)	Top-5 acc 84.375 (85.922)	lr 0.00666
Train [79][1840/3239]	Time 0.323 (0.658)	Data Time 0.002 (0.022)	Loss 2.2710 (2.3683)	Entropy 0.93024 (0.93289)	Top-1 acc 69.922 (67.191)	Top-5 acc 88.281 (85.931)	lr 0.00666
Train [79][1850/3239]	Time 0.238 (0.657)	Data Time 0.001 (0.022)	Loss 2.3613 (2.3685)	Entropy 0.93028 (0.93288)	Top-1 acc 66.016 (67.187)	Top-5 acc 84.375 (85.928)	lr 0.00666
Train [79][1860/3239]	Time 0.229 (0.656)	Data Time 0.002 (0.022)	Loss 2.3782 (2.3687)	Entropy 0.93030 (0.93286)	Top-1 acc 66.797 (67.180)	Top-5 acc 86.719 (85.926)	lr 0.00666
Train [79][1870/3239]	Time 0.259 (0.655)	Data Time 0.001 (0.022)	Loss 2.3692 (2.3687)	Entropy 0.93026 (0.93285)	Top-1 acc 67.578 (67.178)	Top-5 acc 85.156 (85.927)	lr 0.00666
Train [79][1880/3239]	Time 2.664 (0.654)	Data Time 0.001 (0.022)	Loss 2.4553 (2.3688)	Entropy 0.93026 (0.93284)	Top-1 acc 66.016 (67.176)	Top-5 acc 84.766 (85.926)	lr 0.00666
Train [79][1890/3239]	Time 0.241 (0.652)	Data Time 0.001 (0.022)	Loss 2.3592 (2.3687)	Entropy 0.93015 (0.93282)	Top-1 acc 69.531 (67.177)	Top-5 acc 86.719 (85.928)	lr 0.00666
Train [79][1900/3239]	Time 0.228 (0.651)	Data Time 0.001 (0.022)	Loss 2.5898 (2.3689)	Entropy 0.93013 (0.93281)	Top-1 acc 64.062 (67.173)	Top-5 acc 82.812 (85.926)	lr 0.00665
Train [79][1910/3239]	Time 0.226 (0.650)	Data Time 0.001 (0.022)	Loss 2.2567 (2.3689)	Entropy 0.93007 (0.93279)	Top-1 acc 72.656 (67.176)	Top-5 acc 88.672 (85.925)	lr 0.00665
Train [79][1920/3239]	Time 0.221 (0.649)	Data Time 0.001 (0.021)	Loss 2.2572 (2.3691)	Entropy 0.93007 (0.93278)	Top-1 acc 69.922 (67.171)	Top-5 acc 89.844 (85.924)	lr 0.00665
Train [79][1930/3239]	Time 0.340 (0.649)	Data Time 0.001 (0.021)	Loss 2.2332 (2.3691)	Entropy 0.93000 (0.93276)	Top-1 acc 69.922 (67.175)	Top-5 acc 87.500 (85.922)	lr 0.00665
Train [79][1940/3239]	Time 0.230 (0.648)	Data Time 0.001 (0.021)	Loss 2.2454 (2.3689)	Entropy 0.92993 (0.93275)	Top-1 acc 69.531 (67.180)	Top-5 acc 88.281 (85.924)	lr 0.00665
Train [79][1950/3239]	Time 0.224 (0.647)	Data Time 0.001 (0.021)	Loss 2.3461 (2.3691)	Entropy 0.92991 (0.93274)	Top-1 acc 65.234 (67.168)	Top-5 acc 87.500 (85.921)	lr 0.00665
Train [79][1960/3239]	Time 0.223 (0.646)	Data Time 0.001 (0.021)	Loss 2.4107 (2.3693)	Entropy 0.92989 (0.93272)	Top-1 acc 66.406 (67.167)	Top-5 acc 84.375 (85.916)	lr 0.00665
Train [79][1970/3239]	Time 0.226 (0.645)	Data Time 0.001 (0.021)	Loss 2.7336 (2.3696)	Entropy 0.92989 (0.93271)	Top-1 acc 59.766 (67.158)	Top-5 acc 80.078 (85.912)	lr 0.00665
Train [79][1980/3239]	Time 0.233 (0.644)	Data Time 0.001 (0.021)	Loss 2.5368 (2.3701)	Entropy 0.92991 (0.93269)	Top-1 acc 62.500 (67.151)	Top-5 acc 84.766 (85.903)	lr 0.00665
Train [79][1990/3239]	Time 2.532 (0.643)	Data Time 0.001 (0.021)	Loss 2.2624 (2.3702)	Entropy 0.92991 (0.93268)	Top-1 acc 66.016 (67.145)	Top-5 acc 88.281 (85.904)	lr 0.00665
Train [79][2000/3239]	Time 0.228 (0.641)	Data Time 0.001 (0.021)	Loss 2.3640 (2.3701)	Entropy 0.92995 (0.93266)	Top-1 acc 67.969 (67.145)	Top-5 acc 84.766 (85.905)	lr 0.00665
Train [79][2010/3239]	Time 0.232 (0.641)	Data Time 0.001 (0.021)	Loss 2.4161 (2.3701)	Entropy 0.92991 (0.93265)	Top-1 acc 66.797 (67.147)	Top-5 acc 86.719 (85.904)	lr 0.00664
Train [79][2020/3239]	Time 0.326 (0.640)	Data Time 0.001 (0.021)	Loss 2.2245 (2.3703)	Entropy 0.92990 (0.93264)	Top-1 acc 70.703 (67.143)	Top-5 acc 87.891 (85.898)	lr 0.00664
Train [79][2030/3239]	Time 0.208 (0.639)	Data Time 0.001 (0.020)	Loss 2.3476 (2.3700)	Entropy 0.92987 (0.93262)	Top-1 acc 65.625 (67.153)	Top-5 acc 85.156 (85.901)	lr 0.00664
Train [79][2040/3239]	Time 0.265 (0.638)	Data Time 0.001 (0.020)	Loss 2.3990 (2.3703)	Entropy 0.92985 (0.93261)	Top-1 acc 66.406 (67.145)	Top-5 acc 86.328 (85.898)	lr 0.00664
Train [79][2050/3239]	Time 0.234 (0.637)	Data Time 0.001 (0.020)	Loss 2.5392 (2.3702)	Entropy 0.92985 (0.93260)	Top-1 acc 61.719 (67.143)	Top-5 acc 83.984 (85.900)	lr 0.00664
Train [79][2060/3239]	Time 0.276 (0.637)	Data Time 0.001 (0.020)	Loss 2.3009 (2.3700)	Entropy 0.92977 (0.93258)	Top-1 acc 67.188 (67.147)	Top-5 acc 88.281 (85.904)	lr 0.00664
Train [79][2070/3239]	Time 0.232 (0.636)	Data Time 0.002 (0.020)	Loss 2.2661 (2.3701)	Entropy 0.92971 (0.93257)	Top-1 acc 66.016 (67.143)	Top-5 acc 90.234 (85.898)	lr 0.00664
Train [79][2080/3239]	Time 0.219 (0.635)	Data Time 0.001 (0.020)	Loss 2.1975 (2.3701)	Entropy 0.92975 (0.93256)	Top-1 acc 76.172 (67.145)	Top-5 acc 89.844 (85.903)	lr 0.00664
Train [79][2090/3239]	Time 0.257 (0.634)	Data Time 0.001 (0.020)	Loss 2.3527 (2.3704)	Entropy 0.92973 (0.93254)	Top-1 acc 66.797 (67.140)	Top-5 acc 85.938 (85.900)	lr 0.00664
Train [79][2100/3239]	Time 2.568 (0.634)	Data Time 0.001 (0.020)	Loss 2.1758 (2.3703)	Entropy 0.92973 (0.93253)	Top-1 acc 74.219 (67.142)	Top-5 acc 89.453 (85.903)	lr 0.00664
Train [79][2110/3239]	Time 0.248 (0.632)	Data Time 0.001 (0.020)	Loss 2.2589 (2.3705)	Entropy 0.92975 (0.93252)	Top-1 acc 69.531 (67.133)	Top-5 acc 88.672 (85.901)	lr 0.00664
Train [79][2120/3239]	Time 0.236 (0.631)	Data Time 0.001 (0.020)	Loss 2.3378 (2.3708)	Entropy 0.92970 (0.93250)	Top-1 acc 68.750 (67.125)	Top-5 acc 85.547 (85.895)	lr 0.00664
Train [79][2130/3239]	Time 0.219 (0.630)	Data Time 0.001 (0.020)	Loss 2.3914 (2.3707)	Entropy 0.92971 (0.93249)	Top-1 acc 64.844 (67.121)	Top-5 acc 85.547 (85.895)	lr 0.00663
Train [79][2140/3239]	Time 0.253 (0.629)	Data Time 0.001 (0.019)	Loss 2.2483 (2.3706)	Entropy 0.92970 (0.93248)	Top-1 acc 72.656 (67.124)	Top-5 acc 87.891 (85.899)	lr 0.00663
Train [79][2150/3239]	Time 0.332 (0.629)	Data Time 0.001 (0.019)	Loss 2.3084 (2.3708)	Entropy 0.92956 (0.93246)	Top-1 acc 68.359 (67.120)	Top-5 acc 87.891 (85.896)	lr 0.00663
Train [79][2160/3239]	Time 0.238 (0.628)	Data Time 0.001 (0.019)	Loss 2.3572 (2.3707)	Entropy 0.92956 (0.93245)	Top-1 acc 67.188 (67.121)	Top-5 acc 85.938 (85.899)	lr 0.00663
Train [79][2170/3239]	Time 0.218 (0.627)	Data Time 0.001 (0.019)	Loss 2.2693 (2.3706)	Entropy 0.92953 (0.93244)	Top-1 acc 69.922 (67.123)	Top-5 acc 88.281 (85.901)	lr 0.00663
Train [79][2180/3239]	Time 0.217 (0.627)	Data Time 0.001 (0.019)	Loss 2.5033 (2.3709)	Entropy 0.92950 (0.93242)	Top-1 acc 66.016 (67.114)	Top-5 acc 82.422 (85.898)	lr 0.00663
Train [79][2190/3239]	Time 0.341 (0.650)	Data Time 0.003 (0.019)	Loss 2.4652 (2.3710)	Entropy 0.92924 (0.93241)	Top-1 acc 63.281 (67.109)	Top-5 acc 81.250 (85.895)	lr 0.00663
Train [79][2200/3239]	Time 0.232 (0.649)	Data Time 0.002 (0.019)	Loss 2.3500 (2.3714)	Entropy 0.92920 (0.93240)	Top-1 acc 66.406 (67.097)	Top-5 acc 85.938 (85.888)	lr 0.00663
Train [79][2210/3239]	Time 2.462 (0.649)	Data Time 0.003 (0.019)	Loss 2.2629 (2.3713)	Entropy 0.92920 (0.93238)	Top-1 acc 67.969 (67.100)	Top-5 acc 87.500 (85.887)	lr 0.00663
Train [79][2220/3239]	Time 0.242 (0.647)	Data Time 0.002 (0.019)	Loss 2.6043 (2.3713)	Entropy 0.92918 (0.93237)	Top-1 acc 63.281 (67.108)	Top-5 acc 80.469 (85.889)	lr 0.00663
Train [79][2230/3239]	Time 0.240 (0.646)	Data Time 0.002 (0.019)	Loss 2.3190 (2.3714)	Entropy 0.92905 (0.93235)	Top-1 acc 68.750 (67.105)	Top-5 acc 87.109 (85.885)	lr 0.00663
Train [79][2240/3239]	Time 0.370 (0.645)	Data Time 0.002 (0.019)	Loss 2.3414 (2.3715)	Entropy 0.92896 (0.93234)	Top-1 acc 66.797 (67.100)	Top-5 acc 85.547 (85.883)	lr 0.00662
Train [79][2250/3239]	Time 0.230 (0.645)	Data Time 0.001 (0.019)	Loss 2.4188 (2.3718)	Entropy 0.92893 (0.93232)	Top-1 acc 67.188 (67.094)	Top-5 acc 85.547 (85.879)	lr 0.00662
Train [79][2260/3239]	Time 0.234 (0.644)	Data Time 0.001 (0.019)	Loss 2.2252 (2.3719)	Entropy 0.92882 (0.93231)	Top-1 acc 69.922 (67.091)	Top-5 acc 90.234 (85.878)	lr 0.00662
Train [79][2270/3239]	Time 0.233 (0.643)	Data Time 0.001 (0.018)	Loss 2.3786 (2.3718)	Entropy 0.92879 (0.93229)	Top-1 acc 66.016 (67.093)	Top-5 acc 86.719 (85.877)	lr 0.00662
Train [79][2280/3239]	Time 0.256 (0.643)	Data Time 0.001 (0.018)	Loss 2.1167 (2.3718)	Entropy 0.92873 (0.93228)	Top-1 acc 74.609 (67.093)	Top-5 acc 89.062 (85.882)	lr 0.00662
Train [79][2290/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.018)	Loss 2.3773 (2.3719)	Entropy 0.92865 (0.93226)	Top-1 acc 66.797 (67.087)	Top-5 acc 86.328 (85.878)	lr 0.00662
Train [79][2300/3239]	Time 0.249 (0.641)	Data Time 0.001 (0.018)	Loss 2.3597 (2.3721)	Entropy 0.92868 (0.93224)	Top-1 acc 66.797 (67.082)	Top-5 acc 85.547 (85.874)	lr 0.00662
Train [79][2310/3239]	Time 0.222 (0.641)	Data Time 0.001 (0.018)	Loss 2.3967 (2.3720)	Entropy 0.92867 (0.93223)	Top-1 acc 67.188 (67.087)	Top-5 acc 86.328 (85.876)	lr 0.00662
Train [79][2320/3239]	Time 2.537 (0.640)	Data Time 0.001 (0.018)	Loss 2.2638 (2.3720)	Entropy 0.92867 (0.93221)	Top-1 acc 67.578 (67.086)	Top-5 acc 87.109 (85.877)	lr 0.00662
Train [79][2330/3239]	Time 0.252 (0.638)	Data Time 0.001 (0.018)	Loss 2.2086 (2.3719)	Entropy 0.92866 (0.93220)	Top-1 acc 72.266 (67.086)	Top-5 acc 86.719 (85.877)	lr 0.00662
Train [79][2340/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.018)	Loss 2.4794 (2.3721)	Entropy 0.92862 (0.93218)	Top-1 acc 64.844 (67.081)	Top-5 acc 82.812 (85.870)	lr 0.00662
Train [79][2350/3239]	Time 0.243 (0.637)	Data Time 0.001 (0.018)	Loss 2.3471 (2.3719)	Entropy 0.92860 (0.93217)	Top-1 acc 71.094 (67.090)	Top-5 acc 85.547 (85.875)	lr 0.00661
Train [79][2360/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.018)	Loss 2.3109 (2.3717)	Entropy 0.92861 (0.93215)	Top-1 acc 65.625 (67.094)	Top-5 acc 87.891 (85.881)	lr 0.00661
Train [79][2370/3239]	Time 0.328 (0.636)	Data Time 0.001 (0.018)	Loss 2.2304 (2.3716)	Entropy 0.92853 (0.93214)	Top-1 acc 69.531 (67.095)	Top-5 acc 88.672 (85.885)	lr 0.00661
Train [79][2380/3239]	Time 0.215 (0.635)	Data Time 0.001 (0.018)	Loss 2.2830 (2.3715)	Entropy 0.92851 (0.93212)	Top-1 acc 70.703 (67.098)	Top-5 acc 85.938 (85.884)	lr 0.00661
Train [79][2390/3239]	Time 0.232 (0.635)	Data Time 0.001 (0.018)	Loss 2.3496 (2.3717)	Entropy 0.92843 (0.93211)	Top-1 acc 69.922 (67.092)	Top-5 acc 85.938 (85.881)	lr 0.00661
Train [79][2400/3239]	Time 0.198 (0.634)	Data Time 0.001 (0.018)	Loss 2.4099 (2.3717)	Entropy 0.92817 (0.93209)	Top-1 acc 68.359 (67.092)	Top-5 acc 85.547 (85.881)	lr 0.00661
Train [79][2410/3239]	Time 0.231 (0.633)	Data Time 0.001 (0.017)	Loss 2.2293 (2.3716)	Entropy 0.92814 (0.93208)	Top-1 acc 71.875 (67.094)	Top-5 acc 88.672 (85.884)	lr 0.00661
Train [79][2420/3239]	Time 0.292 (0.633)	Data Time 0.001 (0.017)	Loss 2.3700 (2.3716)	Entropy 0.92815 (0.93206)	Top-1 acc 67.188 (67.089)	Top-5 acc 87.109 (85.887)	lr 0.00661
Train [79][2430/3239]	Time 2.597 (0.632)	Data Time 0.001 (0.017)	Loss 2.2663 (2.3715)	Entropy 0.92815 (0.93204)	Top-1 acc 69.922 (67.094)	Top-5 acc 87.109 (85.888)	lr 0.00661
Train [79][2440/3239]	Time 0.248 (0.631)	Data Time 0.001 (0.017)	Loss 2.2685 (2.3713)	Entropy 0.92811 (0.93203)	Top-1 acc 68.750 (67.096)	Top-5 acc 86.328 (85.887)	lr 0.00661
Train [79][2450/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.017)	Loss 2.2100 (2.3714)	Entropy 0.92808 (0.93201)	Top-1 acc 69.141 (67.092)	Top-5 acc 90.625 (85.888)	lr 0.00661
Train [79][2460/3239]	Time 0.401 (0.629)	Data Time 0.001 (0.017)	Loss 2.2938 (2.3711)	Entropy 0.92800 (0.93199)	Top-1 acc 66.016 (67.095)	Top-5 acc 86.719 (85.891)	lr 0.00660
Train [79][2470/3239]	Time 0.242 (0.629)	Data Time 0.001 (0.017)	Loss 2.4739 (2.3713)	Entropy 0.92796 (0.93198)	Top-1 acc 63.672 (67.092)	Top-5 acc 84.375 (85.887)	lr 0.00660
Train [79][2480/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.017)	Loss 2.4610 (2.3714)	Entropy 0.92793 (0.93196)	Top-1 acc 66.016 (67.092)	Top-5 acc 84.766 (85.885)	lr 0.00660
Train [79][2490/3239]	Time 0.252 (0.628)	Data Time 0.001 (0.017)	Loss 2.3519 (2.3715)	Entropy 0.92792 (0.93195)	Top-1 acc 66.406 (67.090)	Top-5 acc 86.328 (85.885)	lr 0.00660
Train [79][2500/3239]	Time 0.223 (0.627)	Data Time 0.001 (0.017)	Loss 2.4979 (2.3717)	Entropy 0.92790 (0.93193)	Top-1 acc 67.578 (67.086)	Top-5 acc 83.594 (85.879)	lr 0.00660
Train [79][2510/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.017)	Loss 2.3543 (2.3717)	Entropy 0.92785 (0.93191)	Top-1 acc 69.531 (67.089)	Top-5 acc 88.281 (85.880)	lr 0.00660
Train [79][2520/3239]	Time 0.236 (0.626)	Data Time 0.002 (0.017)	Loss 2.3926 (2.3716)	Entropy 0.92781 (0.93190)	Top-1 acc 66.406 (67.091)	Top-5 acc 87.109 (85.882)	lr 0.00660
Train [79][2530/3239]	Time 0.227 (0.626)	Data Time 0.002 (0.017)	Loss 2.4002 (2.3721)	Entropy 0.92778 (0.93188)	Top-1 acc 62.891 (67.075)	Top-5 acc 83.594 (85.875)	lr 0.00660
Train [79][2540/3239]	Time 2.733 (0.625)	Data Time 0.001 (0.017)	Loss 2.2095 (2.3719)	Entropy 0.92778 (0.93186)	Top-1 acc 71.094 (67.082)	Top-5 acc 88.672 (85.880)	lr 0.00660
Train [79][2550/3239]	Time 0.268 (0.624)	Data Time 0.001 (0.017)	Loss 2.2519 (2.3719)	Entropy 0.92776 (0.93185)	Top-1 acc 67.578 (67.083)	Top-5 acc 89.453 (85.879)	lr 0.00660
Train [79][2560/3239]	Time 0.387 (0.643)	Data Time 0.002 (0.017)	Loss 2.2215 (2.3719)	Entropy 0.92773 (0.93183)	Top-1 acc 71.094 (67.083)	Top-5 acc 88.672 (85.880)	lr 0.00660
Train [79][2570/3239]	Time 0.271 (0.643)	Data Time 0.007 (0.017)	Loss 2.3393 (2.3720)	Entropy 0.92764 (0.93182)	Top-1 acc 68.750 (67.080)	Top-5 acc 87.891 (85.877)	lr 0.00660
Train [79][2580/3239]	Time 0.246 (0.642)	Data Time 0.002 (0.016)	Loss 2.5315 (2.3722)	Entropy 0.92763 (0.93180)	Top-1 acc 60.156 (67.078)	Top-5 acc 85.156 (85.874)	lr 0.00659
Train [79][2590/3239]	Time 0.342 (0.641)	Data Time 0.001 (0.016)	Loss 2.5015 (2.3722)	Entropy 0.92762 (0.93178)	Top-1 acc 62.891 (67.076)	Top-5 acc 83.594 (85.874)	lr 0.00659
Train [79][2600/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.016)	Loss 2.3130 (2.3720)	Entropy 0.92752 (0.93177)	Top-1 acc 67.578 (67.078)	Top-5 acc 88.281 (85.878)	lr 0.00659
Train [79][2610/3239]	Time 0.227 (0.640)	Data Time 0.001 (0.016)	Loss 2.3044 (2.3720)	Entropy 0.92740 (0.93175)	Top-1 acc 68.359 (67.079)	Top-5 acc 86.719 (85.878)	lr 0.00659
Train [79][2620/3239]	Time 0.223 (0.640)	Data Time 0.001 (0.016)	Loss 2.5706 (2.3722)	Entropy 0.92739 (0.93173)	Top-1 acc 63.281 (67.069)	Top-5 acc 83.594 (85.875)	lr 0.00659
Train [79][2630/3239]	Time 0.303 (0.639)	Data Time 0.001 (0.016)	Loss 2.3037 (2.3722)	Entropy 0.92730 (0.93172)	Top-1 acc 72.656 (67.068)	Top-5 acc 85.156 (85.874)	lr 0.00659
Train [79][2640/3239]	Time 0.231 (0.638)	Data Time 0.001 (0.016)	Loss 2.3055 (2.3725)	Entropy 0.92723 (0.93170)	Top-1 acc 69.141 (67.061)	Top-5 acc 87.109 (85.869)	lr 0.00659
Train [79][2650/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.016)	Loss 2.3809 (2.3724)	Entropy 0.92727 (0.93168)	Top-1 acc 64.844 (67.064)	Top-5 acc 84.766 (85.869)	lr 0.00659
Train [79][2660/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.016)	Loss 2.3430 (2.3723)	Entropy 0.92712 (0.93167)	Top-1 acc 67.578 (67.065)	Top-5 acc 88.672 (85.874)	lr 0.00659
Train [79][2670/3239]	Time 0.275 (0.636)	Data Time 0.001 (0.016)	Loss 2.3805 (2.3722)	Entropy 0.92708 (0.93165)	Top-1 acc 65.625 (67.066)	Top-5 acc 87.109 (85.875)	lr 0.00659
Train [79][2680/3239]	Time 0.229 (0.636)	Data Time 0.001 (0.016)	Loss 2.3754 (2.3724)	Entropy 0.92715 (0.93163)	Top-1 acc 66.406 (67.058)	Top-5 acc 82.812 (85.871)	lr 0.00659
Train [79][2690/3239]	Time 0.237 (0.635)	Data Time 0.001 (0.016)	Loss 2.2960 (2.3724)	Entropy 0.92710 (0.93162)	Top-1 acc 69.141 (67.055)	Top-5 acc 89.453 (85.872)	lr 0.00658
Train [79][2700/3239]	Time 0.262 (0.635)	Data Time 0.002 (0.016)	Loss 2.4162 (2.3725)	Entropy 0.92707 (0.93160)	Top-1 acc 68.359 (67.056)	Top-5 acc 83.984 (85.870)	lr 0.00658
Train [79][2710/3239]	Time 0.222 (0.634)	Data Time 0.002 (0.016)	Loss 2.2561 (2.3723)	Entropy 0.92706 (0.93158)	Top-1 acc 71.484 (67.067)	Top-5 acc 88.281 (85.873)	lr 0.00658
Train [79][2720/3239]	Time 0.315 (0.634)	Data Time 0.003 (0.016)	Loss 2.4088 (2.3722)	Entropy 0.92690 (0.93157)	Top-1 acc 62.891 (67.073)	Top-5 acc 84.766 (85.874)	lr 0.00658
Train [79][2730/3239]	Time 0.242 (0.633)	Data Time 0.001 (0.016)	Loss 2.3315 (2.3724)	Entropy 0.92689 (0.93155)	Top-1 acc 70.703 (67.071)	Top-5 acc 84.375 (85.870)	lr 0.00658
Train [79][2740/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.016)	Loss 2.3444 (2.3724)	Entropy 0.92686 (0.93153)	Top-1 acc 69.531 (67.072)	Top-5 acc 87.109 (85.870)	lr 0.00658
Train [79][2750/3239]	Time 0.219 (0.632)	Data Time 0.001 (0.016)	Loss 2.4014 (2.3725)	Entropy 0.92688 (0.93152)	Top-1 acc 65.625 (67.071)	Top-5 acc 84.375 (85.867)	lr 0.00658
Train [79][2760/3239]	Time 0.313 (0.631)	Data Time 0.001 (0.016)	Loss 2.4340 (2.3725)	Entropy 0.92684 (0.93150)	Top-1 acc 64.062 (67.070)	Top-5 acc 87.500 (85.869)	lr 0.00658
Train [79][2770/3239]	Time 0.228 (0.631)	Data Time 0.006 (0.015)	Loss 2.3535 (2.3727)	Entropy 0.92677 (0.93148)	Top-1 acc 67.969 (67.064)	Top-5 acc 85.938 (85.864)	lr 0.00658
Train [79][2780/3239]	Time 0.234 (0.630)	Data Time 0.002 (0.015)	Loss 2.3186 (2.3728)	Entropy 0.92679 (0.93147)	Top-1 acc 69.922 (67.061)	Top-5 acc 88.672 (85.861)	lr 0.00658
Train [79][2790/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.015)	Loss 2.3663 (2.3729)	Entropy 0.92677 (0.93145)	Top-1 acc 69.922 (67.056)	Top-5 acc 86.328 (85.860)	lr 0.00658
Train [79][2800/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.015)	Loss 2.4156 (2.3728)	Entropy 0.92676 (0.93143)	Top-1 acc 66.797 (67.058)	Top-5 acc 86.328 (85.863)	lr 0.00657
Train [79][2810/3239]	Time 0.218 (0.629)	Data Time 0.001 (0.015)	Loss 2.3315 (2.3728)	Entropy 0.92678 (0.93142)	Top-1 acc 65.625 (67.056)	Top-5 acc 86.328 (85.862)	lr 0.00657
Train [79][2820/3239]	Time 0.250 (0.628)	Data Time 0.001 (0.015)	Loss 2.2981 (2.3729)	Entropy 0.92681 (0.93140)	Top-1 acc 68.750 (67.053)	Top-5 acc 87.500 (85.859)	lr 0.00657
Train [79][2830/3239]	Time 0.258 (0.628)	Data Time 0.001 (0.015)	Loss 2.2486 (2.3728)	Entropy 0.92668 (0.93138)	Top-1 acc 72.266 (67.057)	Top-5 acc 88.672 (85.860)	lr 0.00657
Train [79][2840/3239]	Time 0.280 (0.627)	Data Time 0.001 (0.015)	Loss 2.3565 (2.3729)	Entropy 0.92665 (0.93137)	Top-1 acc 69.141 (67.059)	Top-5 acc 85.547 (85.860)	lr 0.00657
Train [79][2850/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.015)	Loss 2.4856 (2.3731)	Entropy 0.92664 (0.93135)	Top-1 acc 63.281 (67.054)	Top-5 acc 82.422 (85.854)	lr 0.00657
Train [79][2860/3239]	Time 0.265 (0.626)	Data Time 0.001 (0.015)	Loss 2.3289 (2.3729)	Entropy 0.92664 (0.93133)	Top-1 acc 67.969 (67.058)	Top-5 acc 86.719 (85.857)	lr 0.00657
Train [79][2870/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.015)	Loss 2.4661 (2.3731)	Entropy 0.92665 (0.93132)	Top-1 acc 68.359 (67.054)	Top-5 acc 85.938 (85.853)	lr 0.00657
Train [79][2880/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.015)	Loss 2.5103 (2.3732)	Entropy 0.92665 (0.93130)	Top-1 acc 66.406 (67.053)	Top-5 acc 83.984 (85.848)	lr 0.00657
Train [79][2890/3239]	Time 0.356 (0.644)	Data Time 0.003 (0.015)	Loss 2.2575 (2.3734)	Entropy 0.92663 (0.93128)	Top-1 acc 72.266 (67.048)	Top-5 acc 85.156 (85.844)	lr 0.00657
Train [79][2900/3239]	Time 0.251 (0.643)	Data Time 0.002 (0.015)	Loss 2.4768 (2.3737)	Entropy 0.92659 (0.93127)	Top-1 acc 62.109 (67.040)	Top-5 acc 86.328 (85.840)	lr 0.00657
Train [79][2910/3239]	Time 0.248 (0.643)	Data Time 0.001 (0.015)	Loss 2.3960 (2.3738)	Entropy 0.92662 (0.93125)	Top-1 acc 67.578 (67.037)	Top-5 acc 86.328 (85.839)	lr 0.00656
Train [79][2920/3239]	Time 0.240 (0.642)	Data Time 0.001 (0.015)	Loss 2.3200 (2.3739)	Entropy 0.92659 (0.93124)	Top-1 acc 67.969 (67.032)	Top-5 acc 88.281 (85.841)	lr 0.00656
Train [79][2930/3239]	Time 0.342 (0.641)	Data Time 0.002 (0.015)	Loss 2.3215 (2.3739)	Entropy 0.92656 (0.93122)	Top-1 acc 66.406 (67.026)	Top-5 acc 86.328 (85.841)	lr 0.00656
Train [79][2940/3239]	Time 0.236 (0.641)	Data Time 0.001 (0.015)	Loss 2.4010 (2.3739)	Entropy 0.92639 (0.93120)	Top-1 acc 65.625 (67.023)	Top-5 acc 88.281 (85.843)	lr 0.00656
Train [79][2950/3239]	Time 0.219 (0.640)	Data Time 0.001 (0.015)	Loss 2.5083 (2.3740)	Entropy 0.92634 (0.93119)	Top-1 acc 64.844 (67.022)	Top-5 acc 82.812 (85.843)	lr 0.00656
Train [79][2960/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.015)	Loss 2.2839 (2.3739)	Entropy 0.92626 (0.93117)	Top-1 acc 69.922 (67.023)	Top-5 acc 86.719 (85.844)	lr 0.00656
Train [79][2970/3239]	Time 0.308 (0.639)	Data Time 0.001 (0.015)	Loss 2.2394 (2.3739)	Entropy 0.92623 (0.93116)	Top-1 acc 71.875 (67.021)	Top-5 acc 89.453 (85.843)	lr 0.00656
Train [79][2980/3239]	Time 0.223 (0.639)	Data Time 0.001 (0.014)	Loss 2.3725 (2.3740)	Entropy 0.92615 (0.93114)	Top-1 acc 70.312 (67.021)	Top-5 acc 85.938 (85.843)	lr 0.00656
Train [79][2990/3239]	Time 0.244 (0.638)	Data Time 0.001 (0.014)	Loss 2.4396 (2.3739)	Entropy 0.92615 (0.93112)	Top-1 acc 66.797 (67.022)	Top-5 acc 82.812 (85.846)	lr 0.00656
Train [79][3000/3239]	Time 0.234 (0.638)	Data Time 0.002 (0.014)	Loss 2.5352 (2.3743)	Entropy 0.92610 (0.93111)	Top-1 acc 60.547 (67.011)	Top-5 acc 84.375 (85.840)	lr 0.00656
Train [79][3010/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.014)	Loss 2.5303 (2.3744)	Entropy 0.92608 (0.93109)	Top-1 acc 64.844 (67.010)	Top-5 acc 83.203 (85.841)	lr 0.00656
Train [79][3020/3239]	Time 0.295 (0.637)	Data Time 0.001 (0.014)	Loss 2.2359 (2.3744)	Entropy 0.92607 (0.93107)	Top-1 acc 71.484 (67.009)	Top-5 acc 90.234 (85.838)	lr 0.00656
Train [79][3030/3239]	Time 0.231 (0.636)	Data Time 0.001 (0.014)	Loss 2.5383 (2.3745)	Entropy 0.92594 (0.93106)	Top-1 acc 63.281 (67.007)	Top-5 acc 82.031 (85.835)	lr 0.00655
Train [79][3040/3239]	Time 0.272 (0.636)	Data Time 0.001 (0.014)	Loss 2.4127 (2.3745)	Entropy 0.92593 (0.93104)	Top-1 acc 64.062 (67.008)	Top-5 acc 87.500 (85.838)	lr 0.00655
Train [79][3050/3239]	Time 0.241 (0.635)	Data Time 0.001 (0.014)	Loss 2.3007 (2.3744)	Entropy 0.92590 (0.93102)	Top-1 acc 69.531 (67.011)	Top-5 acc 86.719 (85.839)	lr 0.00655
Train [79][3060/3239]	Time 0.373 (0.635)	Data Time 0.001 (0.014)	Loss 2.2670 (2.3745)	Entropy 0.92588 (0.93101)	Top-1 acc 69.922 (67.009)	Top-5 acc 88.281 (85.836)	lr 0.00655
Train [79][3070/3239]	Time 0.239 (0.634)	Data Time 0.001 (0.014)	Loss 2.3042 (2.3745)	Entropy 0.92592 (0.93099)	Top-1 acc 68.750 (67.012)	Top-5 acc 84.766 (85.836)	lr 0.00655
Train [79][3080/3239]	Time 0.239 (0.634)	Data Time 0.001 (0.014)	Loss 2.3906 (2.3744)	Entropy 0.92594 (0.93097)	Top-1 acc 70.312 (67.014)	Top-5 acc 84.375 (85.836)	lr 0.00655
Train [79][3090/3239]	Time 0.226 (0.633)	Data Time 0.001 (0.014)	Loss 2.4171 (2.3746)	Entropy 0.92591 (0.93096)	Top-1 acc 65.625 (67.008)	Top-5 acc 85.547 (85.832)	lr 0.00655
Train [79][3100/3239]	Time 0.239 (0.632)	Data Time 0.001 (0.014)	Loss 2.3828 (2.3746)	Entropy 0.92591 (0.93094)	Top-1 acc 69.141 (67.008)	Top-5 acc 84.766 (85.833)	lr 0.00655
Train [79][3110/3239]	Time 0.215 (0.632)	Data Time 0.001 (0.014)	Loss 2.4551 (2.3746)	Entropy 0.92585 (0.93092)	Top-1 acc 66.797 (67.007)	Top-5 acc 85.156 (85.833)	lr 0.00655
Train [79][3120/3239]	Time 0.233 (0.631)	Data Time 0.001 (0.014)	Loss 2.4729 (2.3747)	Entropy 0.92584 (0.93091)	Top-1 acc 62.891 (67.002)	Top-5 acc 82.812 (85.831)	lr 0.00655
Train [79][3130/3239]	Time 0.259 (0.631)	Data Time 0.001 (0.014)	Loss 2.4251 (2.3747)	Entropy 0.92582 (0.93089)	Top-1 acc 65.234 (67.003)	Top-5 acc 83.594 (85.829)	lr 0.00655
Train [79][3140/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.014)	Loss 2.4128 (2.3749)	Entropy 0.92579 (0.93088)	Top-1 acc 64.844 (66.999)	Top-5 acc 85.156 (85.825)	lr 0.00654
Train [79][3150/3239]	Time 0.335 (0.630)	Data Time 0.001 (0.014)	Loss 2.4686 (2.3748)	Entropy 0.92572 (0.93086)	Top-1 acc 67.188 (67.003)	Top-5 acc 83.984 (85.828)	lr 0.00654
Train [79][3160/3239]	Time 0.257 (0.630)	Data Time 0.001 (0.014)	Loss 2.3723 (2.3748)	Entropy 0.92567 (0.93084)	Top-1 acc 68.359 (66.999)	Top-5 acc 84.375 (85.826)	lr 0.00654
Train [79][3170/3239]	Time 0.217 (0.629)	Data Time 0.001 (0.014)	Loss 2.4743 (2.3750)	Entropy 0.92564 (0.93083)	Top-1 acc 65.625 (66.995)	Top-5 acc 82.422 (85.821)	lr 0.00654
Train [79][3180/3239]	Time 0.209 (0.628)	Data Time 0.000 (0.014)	Loss 2.2460 (2.3749)	Entropy 0.92565 (0.93081)	Top-1 acc 73.438 (66.996)	Top-5 acc 87.500 (85.823)	lr 0.00654
Train [79][3190/3239]	Time 0.218 (0.628)	Data Time 0.000 (0.014)	Loss 2.2729 (2.3750)	Entropy 0.92562 (0.93079)	Top-1 acc 69.922 (66.992)	Top-5 acc 87.109 (85.821)	lr 0.00654
Train [79][3200/3239]	Time 0.214 (0.627)	Data Time 0.000 (0.014)	Loss 2.3825 (2.3749)	Entropy 0.92561 (0.93078)	Top-1 acc 70.703 (66.995)	Top-5 acc 85.938 (85.824)	lr 0.00654
Train [79][3210/3239]	Time 0.213 (0.627)	Data Time 0.000 (0.014)	Loss 2.3570 (2.3750)	Entropy 0.92561 (0.93076)	Top-1 acc 67.969 (66.995)	Top-5 acc 85.547 (85.822)	lr 0.00654
Train [79][3220/3239]	Time 0.291 (0.642)	Data Time 0.000 (0.014)	Loss 2.3936 (2.3750)	Entropy 0.92562 (0.93075)	Top-1 acc 66.016 (66.995)	Top-5 acc 84.375 (85.821)	lr 0.00654
Train [79][3230/3239]	Time 0.244 (0.642)	Data Time 0.000 (0.013)	Loss 2.3370 (2.3750)	Entropy 0.92557 (0.93073)	Top-1 acc 69.531 (66.992)	Top-5 acc 86.328 (85.821)	lr 0.00654
Train [79][3239/3239]	Time 2.385 (0.641)	Data Time 0.000 (0.013)	Loss 2.9778 (2.3750)	Entropy 0.92557 (0.93072)	Top-1 acc 51.852 (66.991)	Top-5 acc 74.074 (85.821)	lr 0.00654
==========Valid [79/120]	loss 1.314	top-1 acc 69.947 (69.947)	top-5 acc 88.398	Train top-1 66.991	top-5 85.821	Entropy 0.92557	Latency-None: 0.000ms	Flops: 546.53M
Train [80][0/3239]	Time 40.394 (40.394)	Data Time 39.091 (39.091)	Loss 2.2777 (2.2777)	Entropy 0.92556 (0.92556)	Top-1 acc 68.359 (68.359)	Top-5 acc 89.062 (89.062)	lr 0.00654
Train [80][10/3239]	Time 2.699 (4.290)	Data Time 0.002 (3.622)	Loss 2.2462 (2.3475)	Entropy 0.92556 (0.92556)	Top-1 acc 68.750 (67.827)	Top-5 acc 88.672 (86.328)	lr 0.00653
Train [80][20/3239]	Time 0.246 (2.366)	Data Time 0.001 (1.898)	Loss 2.3536 (2.3483)	Entropy 0.92554 (0.92555)	Top-1 acc 68.359 (67.913)	Top-5 acc 88.672 (86.347)	lr 0.00653
Train [80][30/3239]	Time 0.230 (1.764)	Data Time 0.001 (1.286)	Loss 2.2117 (2.3399)	Entropy 0.92551 (0.92554)	Top-1 acc 73.438 (68.032)	Top-5 acc 89.453 (86.605)	lr 0.00653
Train [80][40/3239]	Time 0.244 (1.451)	Data Time 0.001 (0.973)	Loss 2.2907 (2.3493)	Entropy 0.92547 (0.92552)	Top-1 acc 67.969 (67.588)	Top-5 acc 88.672 (86.423)	lr 0.00653
Train [80][50/3239]	Time 0.239 (1.263)	Data Time 0.001 (0.783)	Loss 2.3620 (2.3418)	Entropy 0.92549 (0.92551)	Top-1 acc 66.797 (67.739)	Top-5 acc 86.328 (86.520)	lr 0.00653
Train [80][60/3239]	Time 0.241 (1.135)	Data Time 0.001 (0.654)	Loss 2.1461 (2.3367)	Entropy 0.92551 (0.92551)	Top-1 acc 75.000 (68.033)	Top-5 acc 89.844 (86.597)	lr 0.00653
Train [80][70/3239]	Time 0.280 (1.044)	Data Time 0.001 (0.563)	Loss 2.3227 (2.3333)	Entropy 0.92544 (0.92551)	Top-1 acc 66.406 (68.029)	Top-5 acc 85.938 (86.647)	lr 0.00653
Train [80][80/3239]	Time 0.242 (0.973)	Data Time 0.001 (0.493)	Loss 2.4690 (2.3372)	Entropy 0.92544 (0.92550)	Top-1 acc 63.672 (67.930)	Top-5 acc 83.984 (86.564)	lr 0.00653
Train [80][90/3239]	Time 0.233 (0.922)	Data Time 0.001 (0.439)	Loss 2.3095 (2.3346)	Entropy 0.92540 (0.92549)	Top-1 acc 68.750 (67.930)	Top-5 acc 87.109 (86.603)	lr 0.00653
Train [80][100/3239]	Time 0.231 (0.878)	Data Time 0.001 (0.396)	Loss 2.3796 (2.3336)	Entropy 0.92535 (0.92548)	Top-1 acc 66.016 (67.853)	Top-5 acc 84.375 (86.672)	lr 0.00653
Train [80][110/3239]	Time 0.264 (0.841)	Data Time 0.001 (0.360)	Loss 2.4563 (2.3403)	Entropy 0.92534 (0.92547)	Top-1 acc 64.453 (67.740)	Top-5 acc 85.938 (86.578)	lr 0.00653
Train [80][120/3239]	Time 2.396 (0.809)	Data Time 0.001 (0.331)	Loss 2.2728 (2.3392)	Entropy 0.92534 (0.92546)	Top-1 acc 68.750 (67.752)	Top-5 acc 89.062 (86.628)	lr 0.00652
Train [80][130/3239]	Time 0.329 (0.766)	Data Time 0.001 (0.306)	Loss 2.3935 (2.3387)	Entropy 0.92527 (0.92544)	Top-1 acc 68.750 (67.787)	Top-5 acc 84.766 (86.650)	lr 0.00652
Train [80][140/3239]	Time 0.230 (0.745)	Data Time 0.001 (0.284)	Loss 2.4657 (2.3368)	Entropy 0.92515 (0.92542)	Top-1 acc 64.062 (67.883)	Top-5 acc 84.766 (86.649)	lr 0.00652
Train [80][150/3239]	Time 0.245 (0.727)	Data Time 0.002 (0.265)	Loss 2.4282 (2.3379)	Entropy 0.92512 (0.92540)	Top-1 acc 66.016 (67.826)	Top-5 acc 85.938 (86.649)	lr 0.00652
Train [80][160/3239]	Time 0.228 (0.711)	Data Time 0.001 (0.249)	Loss 2.4303 (2.3401)	Entropy 0.92510 (0.92539)	Top-1 acc 64.844 (67.818)	Top-5 acc 82.812 (86.590)	lr 0.00652
Train [80][170/3239]	Time 0.215 (0.696)	Data Time 0.001 (0.234)	Loss 2.3583 (2.3376)	Entropy 0.92509 (0.92537)	Top-1 acc 71.484 (67.898)	Top-5 acc 87.891 (86.632)	lr 0.00652
Train [80][180/3239]	Time 0.233 (0.685)	Data Time 0.002 (0.222)	Loss 2.1819 (2.3374)	Entropy 0.92507 (0.92535)	Top-1 acc 74.219 (67.874)	Top-5 acc 88.672 (86.650)	lr 0.00652
Train [80][190/3239]	Time 0.232 (0.675)	Data Time 0.001 (0.210)	Loss 2.3317 (2.3374)	Entropy 0.92502 (0.92534)	Top-1 acc 67.188 (67.842)	Top-5 acc 86.328 (86.668)	lr 0.00652
Train [80][200/3239]	Time 0.219 (0.665)	Data Time 0.001 (0.200)	Loss 2.5016 (2.3370)	Entropy 0.92501 (0.92532)	Top-1 acc 63.672 (67.837)	Top-5 acc 84.766 (86.690)	lr 0.00652
Train [80][210/3239]	Time 0.226 (0.656)	Data Time 0.002 (0.190)	Loss 2.2914 (2.3369)	Entropy 0.92499 (0.92531)	Top-1 acc 69.141 (67.860)	Top-5 acc 88.672 (86.672)	lr 0.00652
Train [80][220/3239]	Time 0.392 (0.649)	Data Time 0.001 (0.182)	Loss 2.2366 (2.3376)	Entropy 0.92489 (0.92529)	Top-1 acc 69.922 (67.857)	Top-5 acc 89.062 (86.669)	lr 0.00652
Train [80][230/3239]	Time 2.612 (0.642)	Data Time 0.001 (0.174)	Loss 2.5575 (2.3391)	Entropy 0.92489 (0.92527)	Top-1 acc 61.328 (67.784)	Top-5 acc 82.812 (86.629)	lr 0.00652
Train [80][240/3239]	Time 0.250 (0.626)	Data Time 0.001 (0.167)	Loss 2.1931 (2.3381)	Entropy 0.92485 (0.92526)	Top-1 acc 71.484 (67.805)	Top-5 acc 88.672 (86.602)	lr 0.00651
Train [80][250/3239]	Time 0.260 (0.620)	Data Time 0.001 (0.160)	Loss 2.3163 (2.3409)	Entropy 0.92484 (0.92524)	Top-1 acc 69.531 (67.768)	Top-5 acc 85.547 (86.560)	lr 0.00651
Train [80][260/3239]	Time 0.223 (0.615)	Data Time 0.001 (0.154)	Loss 2.3295 (2.3406)	Entropy 0.92483 (0.92522)	Top-1 acc 67.578 (67.767)	Top-5 acc 85.938 (86.563)	lr 0.00651
Train [80][270/3239]	Time 0.214 (0.610)	Data Time 0.001 (0.149)	Loss 2.3575 (2.3420)	Entropy 0.92480 (0.92521)	Top-1 acc 65.625 (67.728)	Top-5 acc 87.891 (86.547)	lr 0.00651
Train [80][280/3239]	Time 0.269 (0.606)	Data Time 0.001 (0.143)	Loss 2.2305 (2.3420)	Entropy 0.92480 (0.92519)	Top-1 acc 70.312 (67.707)	Top-5 acc 87.109 (86.546)	lr 0.00651
Train [80][290/3239]	Time 0.219 (0.601)	Data Time 0.001 (0.138)	Loss 2.3150 (2.3420)	Entropy 0.92479 (0.92518)	Top-1 acc 70.703 (67.695)	Top-5 acc 87.891 (86.544)	lr 0.00651
Train [80][300/3239]	Time 0.228 (0.597)	Data Time 0.001 (0.134)	Loss 2.4879 (2.3424)	Entropy 0.92473 (0.92517)	Top-1 acc 63.672 (67.687)	Top-5 acc 82.422 (86.516)	lr 0.00651
Train [80][310/3239]	Time 0.376 (0.594)	Data Time 0.002 (0.130)	Loss 2.1669 (2.3430)	Entropy 0.92466 (0.92515)	Top-1 acc 73.047 (67.680)	Top-5 acc 90.625 (86.506)	lr 0.00651
Train [80][320/3239]	Time 0.213 (0.591)	Data Time 0.001 (0.126)	Loss 2.4972 (2.3440)	Entropy 0.92462 (0.92514)	Top-1 acc 62.500 (67.661)	Top-5 acc 82.422 (86.478)	lr 0.00651
Train [80][330/3239]	Time 0.221 (0.588)	Data Time 0.001 (0.122)	Loss 2.4804 (2.3454)	Entropy 0.92461 (0.92512)	Top-1 acc 64.062 (67.627)	Top-5 acc 85.938 (86.439)	lr 0.00651
Train [80][340/3239]	Time 58.097 (0.747)	Data Time 0.001 (0.118)	Loss 2.3998 (2.3450)	Entropy 0.92461 (0.92511)	Top-1 acc 69.531 (67.638)	Top-5 acc 85.938 (86.453)	lr 0.00651
Train [80][350/3239]	Time 0.272 (0.735)	Data Time 0.002 (0.115)	Loss 2.2799 (2.3441)	Entropy 0.92459 (0.92509)	Top-1 acc 70.703 (67.698)	Top-5 acc 86.328 (86.465)	lr 0.00650
Train [80][360/3239]	Time 0.214 (0.728)	Data Time 0.002 (0.112)	Loss 2.3628 (2.3452)	Entropy 0.92452 (0.92508)	Top-1 acc 68.750 (67.698)	Top-5 acc 85.156 (86.423)	lr 0.00650
Train [80][370/3239]	Time 0.237 (0.721)	Data Time 0.002 (0.109)	Loss 2.3453 (2.3460)	Entropy 0.92456 (0.92506)	Top-1 acc 67.578 (67.667)	Top-5 acc 85.938 (86.413)	lr 0.00650
Train [80][380/3239]	Time 0.223 (0.715)	Data Time 0.001 (0.106)	Loss 2.4495 (2.3474)	Entropy 0.92457 (0.92505)	Top-1 acc 67.578 (67.640)	Top-5 acc 82.812 (86.371)	lr 0.00650
Train [80][390/3239]	Time 0.226 (0.709)	Data Time 0.001 (0.104)	Loss 2.2864 (2.3482)	Entropy 0.92449 (0.92503)	Top-1 acc 69.141 (67.604)	Top-5 acc 87.109 (86.370)	lr 0.00650
Train [80][400/3239]	Time 0.225 (0.704)	Data Time 0.001 (0.101)	Loss 2.5518 (2.3481)	Entropy 0.92436 (0.92502)	Top-1 acc 66.016 (67.615)	Top-5 acc 81.641 (86.376)	lr 0.00650
Train [80][410/3239]	Time 0.216 (0.698)	Data Time 0.001 (0.099)	Loss 2.3369 (2.3464)	Entropy 0.92444 (0.92500)	Top-1 acc 65.625 (67.656)	Top-5 acc 86.328 (86.400)	lr 0.00650
Train [80][420/3239]	Time 0.245 (0.693)	Data Time 0.001 (0.096)	Loss 2.4953 (2.3457)	Entropy 0.92418 (0.92499)	Top-1 acc 66.406 (67.700)	Top-5 acc 85.156 (86.395)	lr 0.00650
Train [80][430/3239]	Time 0.221 (0.688)	Data Time 0.001 (0.094)	Loss 2.3377 (2.3465)	Entropy 0.92413 (0.92497)	Top-1 acc 69.141 (67.671)	Top-5 acc 86.328 (86.378)	lr 0.00650
Train [80][440/3239]	Time 0.301 (0.683)	Data Time 0.001 (0.092)	Loss 2.5094 (2.3463)	Entropy 0.92405 (0.92495)	Top-1 acc 63.672 (67.685)	Top-5 acc 83.203 (86.366)	lr 0.00650
Train [80][450/3239]	Time 2.468 (0.678)	Data Time 0.001 (0.090)	Loss 2.2053 (2.3459)	Entropy 0.92405 (0.92493)	Top-1 acc 70.703 (67.676)	Top-5 acc 89.453 (86.388)	lr 0.00650
Train [80][460/3239]	Time 0.249 (0.669)	Data Time 0.002 (0.088)	Loss 2.3486 (2.3456)	Entropy 0.92404 (0.92491)	Top-1 acc 67.969 (67.689)	Top-5 acc 86.328 (86.379)	lr 0.00649
Train [80][470/3239]	Time 0.269 (0.665)	Data Time 0.013 (0.086)	Loss 2.2528 (2.3460)	Entropy 0.92405 (0.92489)	Top-1 acc 66.406 (67.682)	Top-5 acc 89.844 (86.374)	lr 0.00649
Train [80][480/3239]	Time 0.229 (0.662)	Data Time 0.001 (0.085)	Loss 2.4113 (2.3449)	Entropy 0.92395 (0.92487)	Top-1 acc 67.969 (67.705)	Top-5 acc 84.766 (86.399)	lr 0.00649
Train [80][490/3239]	Time 0.236 (0.658)	Data Time 0.001 (0.083)	Loss 2.3807 (2.3454)	Entropy 0.92408 (0.92486)	Top-1 acc 67.188 (67.694)	Top-5 acc 85.938 (86.402)	lr 0.00649
Train [80][500/3239]	Time 0.240 (0.655)	Data Time 0.001 (0.081)	Loss 2.2664 (2.3463)	Entropy 0.92406 (0.92484)	Top-1 acc 70.703 (67.659)	Top-5 acc 87.109 (86.380)	lr 0.00649
Train [80][510/3239]	Time 0.222 (0.651)	Data Time 0.001 (0.080)	Loss 2.3230 (2.3460)	Entropy 0.92398 (0.92483)	Top-1 acc 66.797 (67.667)	Top-5 acc 85.938 (86.375)	lr 0.00649
Train [80][520/3239]	Time 0.225 (0.648)	Data Time 0.001 (0.078)	Loss 2.4337 (2.3452)	Entropy 0.92391 (0.92481)	Top-1 acc 64.453 (67.683)	Top-5 acc 82.422 (86.381)	lr 0.00649
Train [80][530/3239]	Time 0.228 (0.644)	Data Time 0.001 (0.077)	Loss 2.3816 (2.3458)	Entropy 0.92382 (0.92479)	Top-1 acc 67.578 (67.675)	Top-5 acc 86.719 (86.375)	lr 0.00649
Train [80][540/3239]	Time 0.238 (0.641)	Data Time 0.001 (0.075)	Loss 2.4122 (2.3456)	Entropy 0.92375 (0.92477)	Top-1 acc 64.453 (67.672)	Top-5 acc 85.938 (86.380)	lr 0.00649
Train [80][550/3239]	Time 0.229 (0.638)	Data Time 0.001 (0.074)	Loss 2.2870 (2.3454)	Entropy 0.92370 (0.92475)	Top-1 acc 68.750 (67.673)	Top-5 acc 86.328 (86.384)	lr 0.00649
Train [80][560/3239]	Time 2.541 (0.635)	Data Time 0.001 (0.073)	Loss 2.1465 (2.3455)	Entropy 0.92370 (0.92474)	Top-1 acc 74.219 (67.674)	Top-5 acc 90.625 (86.386)	lr 0.00649
Train [80][570/3239]	Time 0.399 (0.629)	Data Time 0.001 (0.071)	Loss 2.3870 (2.3458)	Entropy 0.92359 (0.92472)	Top-1 acc 62.109 (67.676)	Top-5 acc 83.594 (86.377)	lr 0.00648
Train [80][580/3239]	Time 0.220 (0.626)	Data Time 0.001 (0.070)	Loss 2.3400 (2.3451)	Entropy 0.92359 (0.92470)	Top-1 acc 69.922 (67.707)	Top-5 acc 86.719 (86.405)	lr 0.00648
Train [80][590/3239]	Time 0.221 (0.623)	Data Time 0.001 (0.069)	Loss 2.2813 (2.3444)	Entropy 0.92354 (0.92468)	Top-1 acc 67.578 (67.708)	Top-5 acc 88.672 (86.418)	lr 0.00648
Train [80][600/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.068)	Loss 2.3302 (2.3440)	Entropy 0.92348 (0.92466)	Top-1 acc 69.531 (67.723)	Top-5 acc 86.719 (86.416)	lr 0.00648
Train [80][610/3239]	Time 0.220 (0.618)	Data Time 0.001 (0.067)	Loss 2.3921 (2.3442)	Entropy 0.92348 (0.92464)	Top-1 acc 64.453 (67.703)	Top-5 acc 87.500 (86.420)	lr 0.00648
Train [80][620/3239]	Time 0.226 (0.616)	Data Time 0.002 (0.066)	Loss 2.3166 (2.3445)	Entropy 0.92348 (0.92462)	Top-1 acc 70.703 (67.701)	Top-5 acc 85.938 (86.413)	lr 0.00648
Train [80][630/3239]	Time 0.230 (0.614)	Data Time 0.001 (0.065)	Loss 2.2906 (2.3448)	Entropy 0.92343 (0.92460)	Top-1 acc 68.750 (67.701)	Top-5 acc 86.328 (86.409)	lr 0.00648
Train [80][640/3239]	Time 0.231 (0.611)	Data Time 0.002 (0.064)	Loss 2.3780 (2.3444)	Entropy 0.92341 (0.92458)	Top-1 acc 68.750 (67.720)	Top-5 acc 86.719 (86.421)	lr 0.00648
Train [80][650/3239]	Time 0.223 (0.609)	Data Time 0.001 (0.063)	Loss 2.4194 (2.3444)	Entropy 0.92341 (0.92456)	Top-1 acc 66.016 (67.716)	Top-5 acc 85.547 (86.413)	lr 0.00648
Train [80][660/3239]	Time 0.245 (0.607)	Data Time 0.001 (0.062)	Loss 2.4220 (2.3444)	Entropy 0.92338 (0.92455)	Top-1 acc 65.625 (67.718)	Top-5 acc 85.156 (86.411)	lr 0.00648
Train [80][670/3239]	Time 2.558 (0.605)	Data Time 0.001 (0.061)	Loss 2.4835 (2.3447)	Entropy 0.92338 (0.92453)	Top-1 acc 64.062 (67.707)	Top-5 acc 84.766 (86.411)	lr 0.00648
Train [80][680/3239]	Time 0.238 (0.600)	Data Time 0.001 (0.060)	Loss 2.4423 (2.3455)	Entropy 0.92340 (0.92451)	Top-1 acc 65.625 (67.673)	Top-5 acc 86.719 (86.394)	lr 0.00648
Train [80][690/3239]	Time 0.236 (0.598)	Data Time 0.001 (0.059)	Loss 2.4966 (2.3461)	Entropy 0.92334 (0.92450)	Top-1 acc 63.672 (67.656)	Top-5 acc 83.984 (86.386)	lr 0.00647
Train [80][700/3239]	Time 0.336 (0.596)	Data Time 0.001 (0.058)	Loss 2.3416 (2.3466)	Entropy 0.92335 (0.92448)	Top-1 acc 69.922 (67.659)	Top-5 acc 85.547 (86.366)	lr 0.00647
Train [80][710/3239]	Time 0.244 (0.675)	Data Time 0.002 (0.058)	Loss 2.3513 (2.3476)	Entropy 0.92330 (0.92446)	Top-1 acc 67.969 (67.646)	Top-5 acc 85.547 (86.349)	lr 0.00647
Train [80][720/3239]	Time 0.224 (0.672)	Data Time 0.002 (0.057)	Loss 2.2199 (2.3482)	Entropy 0.92327 (0.92445)	Top-1 acc 69.922 (67.622)	Top-5 acc 89.062 (86.321)	lr 0.00647
Train [80][730/3239]	Time 0.282 (0.670)	Data Time 0.002 (0.056)	Loss 2.3501 (2.3484)	Entropy 0.92323 (0.92443)	Top-1 acc 67.578 (67.630)	Top-5 acc 84.766 (86.315)	lr 0.00647
Train [80][740/3239]	Time 0.288 (0.668)	Data Time 0.002 (0.055)	Loss 2.3014 (2.3480)	Entropy 0.92323 (0.92441)	Top-1 acc 69.141 (67.639)	Top-5 acc 83.594 (86.311)	lr 0.00647
Train [80][750/3239]	Time 0.260 (0.667)	Data Time 0.001 (0.055)	Loss 2.3684 (2.3488)	Entropy 0.92318 (0.92440)	Top-1 acc 66.406 (67.615)	Top-5 acc 85.938 (86.308)	lr 0.00647
Train [80][760/3239]	Time 0.257 (0.665)	Data Time 0.001 (0.054)	Loss 2.3444 (2.3497)	Entropy 0.92320 (0.92438)	Top-1 acc 68.750 (67.602)	Top-5 acc 86.328 (86.292)	lr 0.00647
Train [80][770/3239]	Time 0.244 (0.663)	Data Time 0.001 (0.053)	Loss 2.4579 (2.3501)	Entropy 0.92319 (0.92437)	Top-1 acc 62.109 (67.584)	Top-5 acc 85.938 (86.282)	lr 0.00647
Train [80][780/3239]	Time 2.605 (0.661)	Data Time 0.002 (0.053)	Loss 2.4938 (2.3506)	Entropy 0.92319 (0.92435)	Top-1 acc 65.625 (67.578)	Top-5 acc 86.719 (86.278)	lr 0.00647
Train [80][790/3239]	Time 0.405 (0.656)	Data Time 0.002 (0.052)	Loss 2.4773 (2.3507)	Entropy 0.92315 (0.92434)	Top-1 acc 67.188 (67.587)	Top-5 acc 84.375 (86.276)	lr 0.00647
Train [80][800/3239]	Time 0.246 (0.654)	Data Time 0.002 (0.051)	Loss 2.4774 (2.3512)	Entropy 0.92316 (0.92432)	Top-1 acc 62.500 (67.587)	Top-5 acc 85.938 (86.269)	lr 0.00646
Train [80][810/3239]	Time 0.243 (0.652)	Data Time 0.002 (0.051)	Loss 2.2761 (2.3519)	Entropy 0.92315 (0.92431)	Top-1 acc 68.750 (67.576)	Top-5 acc 85.938 (86.248)	lr 0.00646
Train [80][820/3239]	Time 0.228 (0.650)	Data Time 0.001 (0.050)	Loss 2.2045 (2.3517)	Entropy 0.92307 (0.92429)	Top-1 acc 71.484 (67.579)	Top-5 acc 85.156 (86.240)	lr 0.00646
Train [80][830/3239]	Time 0.243 (0.648)	Data Time 0.001 (0.050)	Loss 2.5409 (2.3523)	Entropy 0.92307 (0.92428)	Top-1 acc 64.453 (67.558)	Top-5 acc 82.812 (86.235)	lr 0.00646
Train [80][840/3239]	Time 0.226 (0.646)	Data Time 0.001 (0.049)	Loss 2.3049 (2.3526)	Entropy 0.92297 (0.92426)	Top-1 acc 71.484 (67.541)	Top-5 acc 87.109 (86.237)	lr 0.00646
Train [80][850/3239]	Time 0.236 (0.644)	Data Time 0.001 (0.049)	Loss 2.3490 (2.3524)	Entropy 0.92296 (0.92425)	Top-1 acc 67.578 (67.544)	Top-5 acc 84.375 (86.234)	lr 0.00646
Train [80][860/3239]	Time 0.217 (0.642)	Data Time 0.002 (0.048)	Loss 2.3782 (2.3528)	Entropy 0.92293 (0.92423)	Top-1 acc 66.797 (67.547)	Top-5 acc 83.984 (86.222)	lr 0.00646
Train [80][870/3239]	Time 0.221 (0.640)	Data Time 0.001 (0.047)	Loss 2.2955 (2.3528)	Entropy 0.92293 (0.92422)	Top-1 acc 69.141 (67.549)	Top-5 acc 87.500 (86.225)	lr 0.00646
Train [80][880/3239]	Time 0.234 (0.638)	Data Time 0.002 (0.047)	Loss 2.2707 (2.3520)	Entropy 0.92299 (0.92420)	Top-1 acc 67.969 (67.558)	Top-5 acc 88.281 (86.243)	lr 0.00646
Train [80][890/3239]	Time 2.569 (0.637)	Data Time 0.001 (0.046)	Loss 2.5330 (2.3527)	Entropy 0.92299 (0.92419)	Top-1 acc 65.625 (67.551)	Top-5 acc 83.984 (86.230)	lr 0.00646
Train [80][900/3239]	Time 0.246 (0.632)	Data Time 0.001 (0.046)	Loss 2.4872 (2.3524)	Entropy 0.92294 (0.92418)	Top-1 acc 65.625 (67.554)	Top-5 acc 82.031 (86.237)	lr 0.00646
Train [80][910/3239]	Time 0.224 (0.631)	Data Time 0.001 (0.045)	Loss 2.5011 (2.3525)	Entropy 0.92284 (0.92416)	Top-1 acc 63.281 (67.557)	Top-5 acc 82.031 (86.229)	lr 0.00645
Train [80][920/3239]	Time 0.241 (0.629)	Data Time 0.001 (0.045)	Loss 2.2394 (2.3522)	Entropy 0.92281 (0.92415)	Top-1 acc 72.656 (67.576)	Top-5 acc 85.938 (86.226)	lr 0.00645
Train [80][930/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.044)	Loss 2.3259 (2.3526)	Entropy 0.92278 (0.92413)	Top-1 acc 68.750 (67.554)	Top-5 acc 87.500 (86.224)	lr 0.00645
Train [80][940/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.044)	Loss 2.3254 (2.3526)	Entropy 0.92269 (0.92412)	Top-1 acc 66.797 (67.551)	Top-5 acc 89.062 (86.224)	lr 0.00645
Train [80][950/3239]	Time 0.220 (0.624)	Data Time 0.001 (0.044)	Loss 2.3255 (2.3528)	Entropy 0.92265 (0.92410)	Top-1 acc 69.531 (67.551)	Top-5 acc 87.109 (86.218)	lr 0.00645
Train [80][960/3239]	Time 0.248 (0.622)	Data Time 0.001 (0.043)	Loss 2.4198 (2.3528)	Entropy 0.92255 (0.92409)	Top-1 acc 65.625 (67.553)	Top-5 acc 84.375 (86.224)	lr 0.00645
Train [80][970/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.043)	Loss 2.4005 (2.3532)	Entropy 0.92246 (0.92407)	Top-1 acc 66.797 (67.544)	Top-5 acc 85.547 (86.220)	lr 0.00645
Train [80][980/3239]	Time 0.242 (0.619)	Data Time 0.001 (0.042)	Loss 2.3365 (2.3536)	Entropy 0.92242 (0.92405)	Top-1 acc 67.969 (67.533)	Top-5 acc 85.938 (86.212)	lr 0.00645
Train [80][990/3239]	Time 0.229 (0.618)	Data Time 0.001 (0.042)	Loss 2.4842 (2.3537)	Entropy 0.92237 (0.92404)	Top-1 acc 64.062 (67.530)	Top-5 acc 81.250 (86.211)	lr 0.00645
Train [80][1000/3239]	Time 2.482 (0.616)	Data Time 0.001 (0.041)	Loss 2.3996 (2.3537)	Entropy 0.92237 (0.92402)	Top-1 acc 68.750 (67.523)	Top-5 acc 84.766 (86.207)	lr 0.00645
Train [80][1010/3239]	Time 0.262 (0.613)	Data Time 0.001 (0.041)	Loss 2.1397 (2.3538)	Entropy 0.92237 (0.92400)	Top-1 acc 75.000 (67.522)	Top-5 acc 90.625 (86.205)	lr 0.00645
Train [80][1020/3239]	Time 0.243 (0.611)	Data Time 0.001 (0.041)	Loss 2.3867 (2.3537)	Entropy 0.92233 (0.92399)	Top-1 acc 70.703 (67.530)	Top-5 acc 85.547 (86.205)	lr 0.00645
Train [80][1030/3239]	Time 0.249 (0.610)	Data Time 0.001 (0.040)	Loss 2.3555 (2.3541)	Entropy 0.92231 (0.92397)	Top-1 acc 69.922 (67.514)	Top-5 acc 83.984 (86.192)	lr 0.00644
Train [80][1040/3239]	Time 0.239 (0.609)	Data Time 0.001 (0.040)	Loss 2.4005 (2.3554)	Entropy 0.92228 (0.92396)	Top-1 acc 67.969 (67.481)	Top-5 acc 83.594 (86.166)	lr 0.00644
Train [80][1050/3239]	Time 0.240 (0.608)	Data Time 0.001 (0.040)	Loss 2.3364 (2.3557)	Entropy 0.92226 (0.92394)	Top-1 acc 70.312 (67.474)	Top-5 acc 85.938 (86.162)	lr 0.00644
Train [80][1060/3239]	Time 0.224 (0.607)	Data Time 0.001 (0.039)	Loss 2.3282 (2.3561)	Entropy 0.92226 (0.92392)	Top-1 acc 68.359 (67.457)	Top-5 acc 85.938 (86.154)	lr 0.00644
Train [80][1070/3239]	Time 0.225 (0.658)	Data Time 0.002 (0.039)	Loss 2.3407 (2.3559)	Entropy 0.92226 (0.92391)	Top-1 acc 67.969 (67.460)	Top-5 acc 85.938 (86.153)	lr 0.00644
Train [80][1080/3239]	Time 0.232 (0.656)	Data Time 0.002 (0.039)	Loss 2.4900 (2.3562)	Entropy 0.92222 (0.92389)	Top-1 acc 64.453 (67.455)	Top-5 acc 83.984 (86.149)	lr 0.00644
Train [80][1090/3239]	Time 0.242 (0.655)	Data Time 0.002 (0.038)	Loss 2.3471 (2.3563)	Entropy 0.92217 (0.92388)	Top-1 acc 66.406 (67.451)	Top-5 acc 87.109 (86.149)	lr 0.00644
Train [80][1100/3239]	Time 0.238 (0.653)	Data Time 0.001 (0.038)	Loss 2.4605 (2.3567)	Entropy 0.92215 (0.92386)	Top-1 acc 65.234 (67.435)	Top-5 acc 84.375 (86.141)	lr 0.00644
Train [80][1110/3239]	Time 2.515 (0.652)	Data Time 0.001 (0.038)	Loss 2.1335 (2.3567)	Entropy 0.92215 (0.92385)	Top-1 acc 73.047 (67.430)	Top-5 acc 92.188 (86.141)	lr 0.00644
Train [80][1120/3239]	Time 0.231 (0.648)	Data Time 0.001 (0.037)	Loss 2.5271 (2.3569)	Entropy 0.92211 (0.92383)	Top-1 acc 61.719 (67.432)	Top-5 acc 83.594 (86.130)	lr 0.00644
Train [80][1130/3239]	Time 0.221 (0.646)	Data Time 0.001 (0.037)	Loss 2.3991 (2.3570)	Entropy 0.92206 (0.92382)	Top-1 acc 63.672 (67.432)	Top-5 acc 85.547 (86.133)	lr 0.00644
Train [80][1140/3239]	Time 0.332 (0.645)	Data Time 0.001 (0.037)	Loss 2.3248 (2.3572)	Entropy 0.92203 (0.92380)	Top-1 acc 67.188 (67.433)	Top-5 acc 86.719 (86.129)	lr 0.00643
Train [80][1150/3239]	Time 0.222 (0.644)	Data Time 0.001 (0.036)	Loss 2.2939 (2.3568)	Entropy 0.92201 (0.92378)	Top-1 acc 68.359 (67.446)	Top-5 acc 85.938 (86.134)	lr 0.00643
Train [80][1160/3239]	Time 0.214 (0.642)	Data Time 0.001 (0.036)	Loss 2.3331 (2.3567)	Entropy 0.92206 (0.92377)	Top-1 acc 69.531 (67.450)	Top-5 acc 85.938 (86.133)	lr 0.00643
Train [80][1170/3239]	Time 0.230 (0.641)	Data Time 0.001 (0.036)	Loss 2.4342 (2.3567)	Entropy 0.92203 (0.92375)	Top-1 acc 65.625 (67.446)	Top-5 acc 85.547 (86.137)	lr 0.00643
Train [80][1180/3239]	Time 0.260 (0.639)	Data Time 0.001 (0.035)	Loss 2.3768 (2.3566)	Entropy 0.92194 (0.92374)	Top-1 acc 70.312 (67.452)	Top-5 acc 84.766 (86.134)	lr 0.00643
Train [80][1190/3239]	Time 0.226 (0.638)	Data Time 0.001 (0.035)	Loss 2.3661 (2.3563)	Entropy 0.92193 (0.92372)	Top-1 acc 71.484 (67.460)	Top-5 acc 85.547 (86.137)	lr 0.00643
Train [80][1200/3239]	Time 0.231 (0.637)	Data Time 0.001 (0.035)	Loss 2.3553 (2.3566)	Entropy 0.92189 (0.92371)	Top-1 acc 66.797 (67.456)	Top-5 acc 85.156 (86.133)	lr 0.00643
Train [80][1210/3239]	Time 0.224 (0.635)	Data Time 0.001 (0.035)	Loss 2.2798 (2.3566)	Entropy 0.92187 (0.92369)	Top-1 acc 69.141 (67.460)	Top-5 acc 89.062 (86.132)	lr 0.00643
Train [80][1220/3239]	Time 2.452 (0.634)	Data Time 0.001 (0.034)	Loss 2.2413 (2.3567)	Entropy 0.92187 (0.92368)	Top-1 acc 68.359 (67.456)	Top-5 acc 86.328 (86.130)	lr 0.00643
Train [80][1230/3239]	Time 0.278 (0.631)	Data Time 0.001 (0.034)	Loss 2.2483 (2.3563)	Entropy 0.92182 (0.92366)	Top-1 acc 67.578 (67.467)	Top-5 acc 88.672 (86.142)	lr 0.00643
Train [80][1240/3239]	Time 0.232 (0.629)	Data Time 0.002 (0.034)	Loss 2.3186 (2.3560)	Entropy 0.92181 (0.92365)	Top-1 acc 67.578 (67.472)	Top-5 acc 88.281 (86.151)	lr 0.00643
Train [80][1250/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.033)	Loss 2.2863 (2.3558)	Entropy 0.92177 (0.92363)	Top-1 acc 69.141 (67.475)	Top-5 acc 85.938 (86.154)	lr 0.00642
Train [80][1260/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.033)	Loss 2.2511 (2.3556)	Entropy 0.92179 (0.92362)	Top-1 acc 70.312 (67.488)	Top-5 acc 85.938 (86.153)	lr 0.00642
Train [80][1270/3239]	Time 0.334 (0.626)	Data Time 0.001 (0.033)	Loss 2.3605 (2.3556)	Entropy 0.92177 (0.92361)	Top-1 acc 64.453 (67.482)	Top-5 acc 86.328 (86.156)	lr 0.00642
Train [80][1280/3239]	Time 0.228 (0.625)	Data Time 0.001 (0.033)	Loss 2.3649 (2.3556)	Entropy 0.92175 (0.92359)	Top-1 acc 69.141 (67.481)	Top-5 acc 85.938 (86.155)	lr 0.00642
Train [80][1290/3239]	Time 0.227 (0.623)	Data Time 0.001 (0.032)	Loss 2.5398 (2.3558)	Entropy 0.92172 (0.92358)	Top-1 acc 60.938 (67.475)	Top-5 acc 82.812 (86.146)	lr 0.00642
Train [80][1300/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.032)	Loss 2.3138 (2.3557)	Entropy 0.92171 (0.92356)	Top-1 acc 67.188 (67.480)	Top-5 acc 87.891 (86.146)	lr 0.00642
Train [80][1310/3239]	Time 0.252 (0.621)	Data Time 0.002 (0.032)	Loss 2.3938 (2.3560)	Entropy 0.92169 (0.92355)	Top-1 acc 65.234 (67.469)	Top-5 acc 87.109 (86.141)	lr 0.00642
Train [80][1320/3239]	Time 0.221 (0.620)	Data Time 0.001 (0.032)	Loss 2.3222 (2.3559)	Entropy 0.92162 (0.92353)	Top-1 acc 71.484 (67.479)	Top-5 acc 87.109 (86.142)	lr 0.00642
Train [80][1330/3239]	Time 2.457 (0.618)	Data Time 0.001 (0.032)	Loss 2.2934 (2.3559)	Entropy 0.92162 (0.92352)	Top-1 acc 69.141 (67.478)	Top-5 acc 86.719 (86.146)	lr 0.00642
Train [80][1340/3239]	Time 0.217 (0.616)	Data Time 0.001 (0.031)	Loss 2.4362 (2.3560)	Entropy 0.92160 (0.92351)	Top-1 acc 62.500 (67.470)	Top-5 acc 83.984 (86.141)	lr 0.00642
Train [80][1350/3239]	Time 0.239 (0.614)	Data Time 0.001 (0.031)	Loss 2.3119 (2.3560)	Entropy 0.92155 (0.92349)	Top-1 acc 69.141 (67.466)	Top-5 acc 85.938 (86.139)	lr 0.00642
Train [80][1360/3239]	Time 0.318 (0.613)	Data Time 0.001 (0.031)	Loss 2.4141 (2.3563)	Entropy 0.92157 (0.92348)	Top-1 acc 66.406 (67.454)	Top-5 acc 85.156 (86.130)	lr 0.00642
Train [80][1370/3239]	Time 0.228 (0.612)	Data Time 0.001 (0.031)	Loss 2.3586 (2.3563)	Entropy 0.92156 (0.92346)	Top-1 acc 68.359 (67.454)	Top-5 acc 85.547 (86.129)	lr 0.00641
Train [80][1380/3239]	Time 0.271 (0.611)	Data Time 0.001 (0.030)	Loss 2.2560 (2.3563)	Entropy 0.92156 (0.92345)	Top-1 acc 68.750 (67.457)	Top-5 acc 86.328 (86.132)	lr 0.00641
Train [80][1390/3239]	Time 0.222 (0.610)	Data Time 0.001 (0.030)	Loss 2.3672 (2.3562)	Entropy 0.92145 (0.92343)	Top-1 acc 66.797 (67.466)	Top-5 acc 84.375 (86.135)	lr 0.00641
Train [80][1400/3239]	Time 0.322 (0.609)	Data Time 0.001 (0.030)	Loss 2.2664 (2.3562)	Entropy 0.92150 (0.92342)	Top-1 acc 66.797 (67.454)	Top-5 acc 87.109 (86.133)	lr 0.00641
Train [80][1410/3239]	Time 0.227 (0.608)	Data Time 0.001 (0.030)	Loss 2.2997 (2.3560)	Entropy 0.92147 (0.92341)	Top-1 acc 67.188 (67.459)	Top-5 acc 87.891 (86.138)	lr 0.00641
Train [80][1420/3239]	Time 0.233 (0.607)	Data Time 0.001 (0.030)	Loss 2.4376 (2.3563)	Entropy 0.92142 (0.92339)	Top-1 acc 67.188 (67.452)	Top-5 acc 83.594 (86.130)	lr 0.00641
Train [80][1430/3239]	Time 0.240 (0.645)	Data Time 0.003 (0.029)	Loss 2.2726 (2.3558)	Entropy 0.92145 (0.92338)	Top-1 acc 68.359 (67.459)	Top-5 acc 86.328 (86.139)	lr 0.00641
Train [80][1440/3239]	Time 2.582 (0.644)	Data Time 0.002 (0.029)	Loss 2.2683 (2.3559)	Entropy 0.92145 (0.92337)	Top-1 acc 69.922 (67.462)	Top-5 acc 88.281 (86.139)	lr 0.00641
Train [80][1450/3239]	Time 0.272 (0.641)	Data Time 0.002 (0.029)	Loss 2.3949 (2.3561)	Entropy 0.92144 (0.92335)	Top-1 acc 68.359 (67.458)	Top-5 acc 82.031 (86.134)	lr 0.00641
Train [80][1460/3239]	Time 0.229 (0.640)	Data Time 0.002 (0.029)	Loss 2.3986 (2.3561)	Entropy 0.92127 (0.92334)	Top-1 acc 70.312 (67.460)	Top-5 acc 83.594 (86.133)	lr 0.00641
Train [80][1470/3239]	Time 0.225 (0.639)	Data Time 0.001 (0.029)	Loss 2.3176 (2.3560)	Entropy 0.92125 (0.92332)	Top-1 acc 66.016 (67.466)	Top-5 acc 87.109 (86.133)	lr 0.00641
Train [80][1480/3239]	Time 0.221 (0.638)	Data Time 0.001 (0.029)	Loss 2.3351 (2.3559)	Entropy 0.92116 (0.92331)	Top-1 acc 67.969 (67.469)	Top-5 acc 87.500 (86.137)	lr 0.00640
Train [80][1490/3239]	Time 0.322 (0.637)	Data Time 0.001 (0.028)	Loss 2.4409 (2.3563)	Entropy 0.92114 (0.92330)	Top-1 acc 66.797 (67.465)	Top-5 acc 86.719 (86.130)	lr 0.00640
Train [80][1500/3239]	Time 0.241 (0.635)	Data Time 0.001 (0.028)	Loss 2.4291 (2.3567)	Entropy 0.92113 (0.92328)	Top-1 acc 64.844 (67.450)	Top-5 acc 84.375 (86.123)	lr 0.00640
Train [80][1510/3239]	Time 0.213 (0.634)	Data Time 0.001 (0.028)	Loss 2.4466 (2.3568)	Entropy 0.92108 (0.92327)	Top-1 acc 64.844 (67.447)	Top-5 acc 83.984 (86.121)	lr 0.00640
Train [80][1520/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.028)	Loss 2.3757 (2.3565)	Entropy 0.92107 (0.92325)	Top-1 acc 66.406 (67.455)	Top-5 acc 85.156 (86.125)	lr 0.00640
Train [80][1530/3239]	Time 0.379 (0.632)	Data Time 0.001 (0.028)	Loss 2.4249 (2.3566)	Entropy 0.92091 (0.92324)	Top-1 acc 66.797 (67.453)	Top-5 acc 83.203 (86.119)	lr 0.00640
Train [80][1540/3239]	Time 0.229 (0.631)	Data Time 0.001 (0.027)	Loss 2.3404 (2.3564)	Entropy 0.92095 (0.92322)	Top-1 acc 68.750 (67.460)	Top-5 acc 86.719 (86.123)	lr 0.00640
Train [80][1550/3239]	Time 2.515 (0.630)	Data Time 0.001 (0.027)	Loss 2.4846 (2.3565)	Entropy 0.92095 (0.92321)	Top-1 acc 64.062 (67.455)	Top-5 acc 84.766 (86.119)	lr 0.00640
Train [80][1560/3239]	Time 0.226 (0.628)	Data Time 0.001 (0.027)	Loss 2.2164 (2.3566)	Entropy 0.92087 (0.92319)	Top-1 acc 68.750 (67.443)	Top-5 acc 90.625 (86.115)	lr 0.00640
Train [80][1570/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.027)	Loss 2.3653 (2.3568)	Entropy 0.92080 (0.92318)	Top-1 acc 66.016 (67.441)	Top-5 acc 87.891 (86.116)	lr 0.00640
Train [80][1580/3239]	Time 0.237 (0.626)	Data Time 0.001 (0.027)	Loss 2.3621 (2.3572)	Entropy 0.92064 (0.92316)	Top-1 acc 68.750 (67.438)	Top-5 acc 85.547 (86.109)	lr 0.00640
Train [80][1590/3239]	Time 0.231 (0.625)	Data Time 0.001 (0.027)	Loss 2.4495 (2.3576)	Entropy 0.92075 (0.92315)	Top-1 acc 63.672 (67.424)	Top-5 acc 82.422 (86.100)	lr 0.00639
Train [80][1600/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.026)	Loss 2.5065 (2.3578)	Entropy 0.92072 (0.92313)	Top-1 acc 67.969 (67.425)	Top-5 acc 83.984 (86.097)	lr 0.00639
Train [80][1610/3239]	Time 0.220 (0.623)	Data Time 0.001 (0.026)	Loss 2.2288 (2.3580)	Entropy 0.92069 (0.92312)	Top-1 acc 71.094 (67.418)	Top-5 acc 87.891 (86.097)	lr 0.00639
Train [80][1620/3239]	Time 0.326 (0.622)	Data Time 0.001 (0.026)	Loss 2.3943 (2.3580)	Entropy 0.92071 (0.92310)	Top-1 acc 66.797 (67.417)	Top-5 acc 85.156 (86.096)	lr 0.00639
Train [80][1630/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.026)	Loss 2.4108 (2.3582)	Entropy 0.92066 (0.92309)	Top-1 acc 66.016 (67.413)	Top-5 acc 86.719 (86.092)	lr 0.00639
Train [80][1640/3239]	Time 0.227 (0.620)	Data Time 0.002 (0.026)	Loss 2.3841 (2.3585)	Entropy 0.92066 (0.92307)	Top-1 acc 69.141 (67.404)	Top-5 acc 85.156 (86.092)	lr 0.00639
Train [80][1650/3239]	Time 0.208 (0.619)	Data Time 0.001 (0.026)	Loss 2.5557 (2.3582)	Entropy 0.92065 (0.92306)	Top-1 acc 60.938 (67.414)	Top-5 acc 85.156 (86.102)	lr 0.00639
Train [80][1660/3239]	Time 2.579 (0.619)	Data Time 0.001 (0.026)	Loss 2.3732 (2.3580)	Entropy 0.92065 (0.92304)	Top-1 acc 67.969 (67.419)	Top-5 acc 82.422 (86.103)	lr 0.00639
Train [80][1670/3239]	Time 0.267 (0.616)	Data Time 0.001 (0.025)	Loss 2.5183 (2.3580)	Entropy 0.92065 (0.92303)	Top-1 acc 60.547 (67.417)	Top-5 acc 83.984 (86.105)	lr 0.00639
Train [80][1680/3239]	Time 0.296 (0.615)	Data Time 0.001 (0.025)	Loss 2.4145 (2.3581)	Entropy 0.92063 (0.92301)	Top-1 acc 64.453 (67.415)	Top-5 acc 82.422 (86.100)	lr 0.00639
Train [80][1690/3239]	Time 0.224 (0.615)	Data Time 0.001 (0.025)	Loss 2.3453 (2.3582)	Entropy 0.92062 (0.92300)	Top-1 acc 61.328 (67.412)	Top-5 acc 89.062 (86.101)	lr 0.00639
Train [80][1700/3239]	Time 0.238 (0.614)	Data Time 0.001 (0.025)	Loss 2.2389 (2.3581)	Entropy 0.92060 (0.92299)	Top-1 acc 70.312 (67.413)	Top-5 acc 87.891 (86.103)	lr 0.00639
Train [80][1710/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.025)	Loss 2.1780 (2.3579)	Entropy 0.92054 (0.92297)	Top-1 acc 72.266 (67.419)	Top-5 acc 87.891 (86.108)	lr 0.00638
Train [80][1720/3239]	Time 0.222 (0.612)	Data Time 0.001 (0.025)	Loss 2.4162 (2.3582)	Entropy 0.92056 (0.92296)	Top-1 acc 64.453 (67.407)	Top-5 acc 83.984 (86.106)	lr 0.00638
Train [80][1730/3239]	Time 0.214 (0.611)	Data Time 0.001 (0.025)	Loss 2.3481 (2.3585)	Entropy 0.92053 (0.92294)	Top-1 acc 66.797 (67.395)	Top-5 acc 87.891 (86.098)	lr 0.00638
Train [80][1740/3239]	Time 0.235 (0.610)	Data Time 0.001 (0.024)	Loss 2.3720 (2.3583)	Entropy 0.92054 (0.92293)	Top-1 acc 65.625 (67.398)	Top-5 acc 86.328 (86.105)	lr 0.00638
Train [80][1750/3239]	Time 0.307 (0.610)	Data Time 0.001 (0.024)	Loss 2.4754 (2.3583)	Entropy 0.92048 (0.92292)	Top-1 acc 65.625 (67.401)	Top-5 acc 85.156 (86.103)	lr 0.00638
Train [80][1760/3239]	Time 0.259 (0.609)	Data Time 0.001 (0.024)	Loss 2.4176 (2.3586)	Entropy 0.92045 (0.92290)	Top-1 acc 66.797 (67.388)	Top-5 acc 83.203 (86.098)	lr 0.00638
Train [80][1770/3239]	Time 2.508 (0.608)	Data Time 0.001 (0.024)	Loss 2.3348 (2.3589)	Entropy 0.92045 (0.92289)	Top-1 acc 70.703 (67.382)	Top-5 acc 85.156 (86.094)	lr 0.00638
Train [80][1780/3239]	Time 0.244 (0.606)	Data Time 0.001 (0.024)	Loss 2.3041 (2.3588)	Entropy 0.92035 (0.92287)	Top-1 acc 67.578 (67.381)	Top-5 acc 87.109 (86.092)	lr 0.00638
Train [80][1790/3239]	Time 0.262 (0.605)	Data Time 0.002 (0.024)	Loss 2.5437 (2.3590)	Entropy 0.92040 (0.92286)	Top-1 acc 62.891 (67.375)	Top-5 acc 82.031 (86.089)	lr 0.00638
Train [80][1800/3239]	Time 0.444 (0.634)	Data Time 0.003 (0.024)	Loss 2.4072 (2.3592)	Entropy 0.92036 (0.92285)	Top-1 acc 66.016 (67.368)	Top-5 acc 83.984 (86.082)	lr 0.00638
Train [80][1810/3239]	Time 0.256 (0.633)	Data Time 0.002 (0.024)	Loss 2.2878 (2.3592)	Entropy 0.92032 (0.92283)	Top-1 acc 68.750 (67.364)	Top-5 acc 87.891 (86.083)	lr 0.00638
Train [80][1820/3239]	Time 0.261 (0.633)	Data Time 0.002 (0.024)	Loss 2.2672 (2.3590)	Entropy 0.92029 (0.92282)	Top-1 acc 71.094 (67.370)	Top-5 acc 89.062 (86.089)	lr 0.00637
Train [80][1830/3239]	Time 0.224 (0.632)	Data Time 0.001 (0.023)	Loss 2.3585 (2.3590)	Entropy 0.92020 (0.92281)	Top-1 acc 66.406 (67.370)	Top-5 acc 84.766 (86.089)	lr 0.00637
Train [80][1840/3239]	Time 0.317 (0.631)	Data Time 0.001 (0.023)	Loss 2.3034 (2.3592)	Entropy 0.92015 (0.92279)	Top-1 acc 68.359 (67.367)	Top-5 acc 88.672 (86.088)	lr 0.00637
Train [80][1850/3239]	Time 0.237 (0.630)	Data Time 0.001 (0.023)	Loss 2.4403 (2.3593)	Entropy 0.92017 (0.92278)	Top-1 acc 64.453 (67.367)	Top-5 acc 82.812 (86.082)	lr 0.00637
Train [80][1860/3239]	Time 0.227 (0.629)	Data Time 0.001 (0.023)	Loss 2.2910 (2.3592)	Entropy 0.92016 (0.92276)	Top-1 acc 69.141 (67.365)	Top-5 acc 86.719 (86.086)	lr 0.00637
Train [80][1870/3239]	Time 0.220 (0.629)	Data Time 0.001 (0.023)	Loss 2.3127 (2.3595)	Entropy 0.92016 (0.92275)	Top-1 acc 67.578 (67.359)	Top-5 acc 85.938 (86.080)	lr 0.00637
Train [80][1880/3239]	Time 2.581 (0.628)	Data Time 0.001 (0.023)	Loss 2.4484 (2.3596)	Entropy 0.92016 (0.92274)	Top-1 acc 64.844 (67.351)	Top-5 acc 85.938 (86.076)	lr 0.00637
Train [80][1890/3239]	Time 0.224 (0.626)	Data Time 0.002 (0.023)	Loss 2.2504 (2.3598)	Entropy 0.92019 (0.92272)	Top-1 acc 69.141 (67.349)	Top-5 acc 87.109 (86.075)	lr 0.00637
Train [80][1900/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.023)	Loss 2.3947 (2.3601)	Entropy 0.92014 (0.92271)	Top-1 acc 62.891 (67.338)	Top-5 acc 84.375 (86.063)	lr 0.00637
Train [80][1910/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.022)	Loss 2.4661 (2.3602)	Entropy 0.92006 (0.92269)	Top-1 acc 64.453 (67.337)	Top-5 acc 84.766 (86.061)	lr 0.00637
Train [80][1920/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.022)	Loss 2.5506 (2.3602)	Entropy 0.91995 (0.92268)	Top-1 acc 60.547 (67.336)	Top-5 acc 82.812 (86.062)	lr 0.00637
Train [80][1930/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.022)	Loss 2.3087 (2.3601)	Entropy 0.91994 (0.92267)	Top-1 acc 71.484 (67.342)	Top-5 acc 87.109 (86.063)	lr 0.00636
Train [80][1940/3239]	Time 0.259 (0.622)	Data Time 0.001 (0.022)	Loss 2.2472 (2.3599)	Entropy 0.91996 (0.92265)	Top-1 acc 70.703 (67.348)	Top-5 acc 89.062 (86.067)	lr 0.00636
Train [80][1950/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.022)	Loss 2.3168 (2.3600)	Entropy 0.91986 (0.92264)	Top-1 acc 68.359 (67.342)	Top-5 acc 85.547 (86.069)	lr 0.00636
Train [80][1960/3239]	Time 0.230 (0.621)	Data Time 0.001 (0.022)	Loss 2.3083 (2.3600)	Entropy 0.91984 (0.92262)	Top-1 acc 67.969 (67.339)	Top-5 acc 85.938 (86.069)	lr 0.00636
Train [80][1970/3239]	Time 0.382 (0.620)	Data Time 0.001 (0.022)	Loss 2.3628 (2.3600)	Entropy 0.91984 (0.92261)	Top-1 acc 67.188 (67.341)	Top-5 acc 85.156 (86.070)	lr 0.00636
Train [80][1980/3239]	Time 0.250 (0.619)	Data Time 0.001 (0.022)	Loss 2.3276 (2.3600)	Entropy 0.91983 (0.92260)	Top-1 acc 68.359 (67.340)	Top-5 acc 85.938 (86.071)	lr 0.00636
Train [80][1990/3239]	Time 2.633 (0.619)	Data Time 0.001 (0.022)	Loss 2.3425 (2.3600)	Entropy 0.91983 (0.92258)	Top-1 acc 71.094 (67.349)	Top-5 acc 85.547 (86.069)	lr 0.00636
Train [80][2000/3239]	Time 0.219 (0.617)	Data Time 0.001 (0.022)	Loss 2.3684 (2.3600)	Entropy 0.91942 (0.92257)	Top-1 acc 70.703 (67.349)	Top-5 acc 86.328 (86.070)	lr 0.00636
Train [80][2010/3239]	Time 0.251 (0.616)	Data Time 0.001 (0.021)	Loss 2.3467 (2.3598)	Entropy 0.91933 (0.92255)	Top-1 acc 66.016 (67.351)	Top-5 acc 87.500 (86.073)	lr 0.00636
Train [80][2020/3239]	Time 0.234 (0.615)	Data Time 0.002 (0.021)	Loss 2.3195 (2.3599)	Entropy 0.91929 (0.92253)	Top-1 acc 69.922 (67.348)	Top-5 acc 86.719 (86.073)	lr 0.00636
Train [80][2030/3239]	Time 0.235 (0.615)	Data Time 0.002 (0.021)	Loss 2.3055 (2.3599)	Entropy 0.91932 (0.92252)	Top-1 acc 68.359 (67.352)	Top-5 acc 86.328 (86.071)	lr 0.00636
Train [80][2040/3239]	Time 0.228 (0.614)	Data Time 0.001 (0.021)	Loss 2.3287 (2.3599)	Entropy 0.91927 (0.92250)	Top-1 acc 69.141 (67.349)	Top-5 acc 87.109 (86.071)	lr 0.00636
Train [80][2050/3239]	Time 0.230 (0.613)	Data Time 0.001 (0.021)	Loss 2.2780 (2.3601)	Entropy 0.91925 (0.92249)	Top-1 acc 67.969 (67.344)	Top-5 acc 86.328 (86.067)	lr 0.00635
Train [80][2060/3239]	Time 0.234 (0.613)	Data Time 0.001 (0.021)	Loss 2.3796 (2.3600)	Entropy 0.91928 (0.92247)	Top-1 acc 66.406 (67.344)	Top-5 acc 84.375 (86.070)	lr 0.00635
Train [80][2070/3239]	Time 0.203 (0.612)	Data Time 0.001 (0.021)	Loss 2.4198 (2.3598)	Entropy 0.91928 (0.92246)	Top-1 acc 63.672 (67.351)	Top-5 acc 85.938 (86.074)	lr 0.00635
Train [80][2080/3239]	Time 0.261 (0.611)	Data Time 0.001 (0.021)	Loss 2.4156 (2.3598)	Entropy 0.91926 (0.92244)	Top-1 acc 67.578 (67.354)	Top-5 acc 84.375 (86.070)	lr 0.00635
Train [80][2090/3239]	Time 0.225 (0.611)	Data Time 0.002 (0.021)	Loss 2.3979 (2.3598)	Entropy 0.91923 (0.92243)	Top-1 acc 63.672 (67.352)	Top-5 acc 85.156 (86.072)	lr 0.00635
Train [80][2100/3239]	Time 2.595 (0.610)	Data Time 0.001 (0.021)	Loss 2.1916 (2.3596)	Entropy 0.91923 (0.92241)	Top-1 acc 74.609 (67.358)	Top-5 acc 88.672 (86.075)	lr 0.00635
Train [80][2110/3239]	Time 0.245 (0.608)	Data Time 0.001 (0.020)	Loss 2.3090 (2.3598)	Entropy 0.91909 (0.92239)	Top-1 acc 68.359 (67.349)	Top-5 acc 87.109 (86.073)	lr 0.00635
Train [80][2120/3239]	Time 0.276 (0.608)	Data Time 0.002 (0.020)	Loss 2.3882 (2.3601)	Entropy 0.91908 (0.92238)	Top-1 acc 67.969 (67.341)	Top-5 acc 85.156 (86.064)	lr 0.00635
Train [80][2130/3239]	Time 0.240 (0.607)	Data Time 0.001 (0.020)	Loss 2.2519 (2.3601)	Entropy 0.91905 (0.92236)	Top-1 acc 69.531 (67.335)	Top-5 acc 86.719 (86.064)	lr 0.00635
Train [80][2140/3239]	Time 0.223 (0.607)	Data Time 0.002 (0.020)	Loss 2.1806 (2.3599)	Entropy 0.91899 (0.92235)	Top-1 acc 72.656 (67.339)	Top-5 acc 87.891 (86.066)	lr 0.00635
Train [80][2150/3239]	Time 0.242 (0.606)	Data Time 0.001 (0.020)	Loss 2.3712 (2.3601)	Entropy 0.91894 (0.92233)	Top-1 acc 69.531 (67.334)	Top-5 acc 86.328 (86.066)	lr 0.00635
Train [80][2160/3239]	Time 0.361 (0.628)	Data Time 0.003 (0.020)	Loss 2.3699 (2.3603)	Entropy 0.91889 (0.92232)	Top-1 acc 67.188 (67.327)	Top-5 acc 83.203 (86.065)	lr 0.00634
Train [80][2170/3239]	Time 0.220 (0.628)	Data Time 0.002 (0.020)	Loss 2.4477 (2.3605)	Entropy 0.91889 (0.92230)	Top-1 acc 64.453 (67.318)	Top-5 acc 83.594 (86.060)	lr 0.00634
Train [80][2180/3239]	Time 0.248 (0.628)	Data Time 0.002 (0.020)	Loss 2.0674 (2.3604)	Entropy 0.91891 (0.92228)	Top-1 acc 74.609 (67.318)	Top-5 acc 92.578 (86.059)	lr 0.00634
Train [80][2190/3239]	Time 0.318 (0.627)	Data Time 0.001 (0.020)	Loss 2.1800 (2.3603)	Entropy 0.91889 (0.92227)	Top-1 acc 70.312 (67.319)	Top-5 acc 89.844 (86.061)	lr 0.00634
Train [80][2200/3239]	Time 0.242 (0.626)	Data Time 0.001 (0.020)	Loss 2.5279 (2.3602)	Entropy 0.91882 (0.92225)	Top-1 acc 63.672 (67.319)	Top-5 acc 84.375 (86.058)	lr 0.00634
Train [80][2210/3239]	Time 2.541 (0.626)	Data Time 0.001 (0.020)	Loss 2.3615 (2.3603)	Entropy 0.91882 (0.92224)	Top-1 acc 69.141 (67.316)	Top-5 acc 85.156 (86.057)	lr 0.00634
Train [80][2220/3239]	Time 0.253 (0.624)	Data Time 0.002 (0.020)	Loss 2.3462 (2.3603)	Entropy 0.91882 (0.92222)	Top-1 acc 67.969 (67.317)	Top-5 acc 86.328 (86.058)	lr 0.00634
Train [80][2230/3239]	Time 0.333 (0.623)	Data Time 0.003 (0.019)	Loss 2.1937 (2.3603)	Entropy 0.91881 (0.92221)	Top-1 acc 70.703 (67.320)	Top-5 acc 88.281 (86.056)	lr 0.00634
Train [80][2240/3239]	Time 0.218 (0.623)	Data Time 0.001 (0.019)	Loss 2.3551 (2.3603)	Entropy 0.91883 (0.92219)	Top-1 acc 67.578 (67.321)	Top-5 acc 85.938 (86.056)	lr 0.00634
Train [80][2250/3239]	Time 0.268 (0.622)	Data Time 0.001 (0.019)	Loss 2.3075 (2.3602)	Entropy 0.91884 (0.92218)	Top-1 acc 67.578 (67.321)	Top-5 acc 86.328 (86.057)	lr 0.00634
Train [80][2260/3239]	Time 0.247 (0.621)	Data Time 0.002 (0.019)	Loss 2.2805 (2.3604)	Entropy 0.91878 (0.92216)	Top-1 acc 67.969 (67.316)	Top-5 acc 89.062 (86.053)	lr 0.00634
Train [80][2270/3239]	Time 0.218 (0.621)	Data Time 0.001 (0.019)	Loss 2.2512 (2.3604)	Entropy 0.91882 (0.92215)	Top-1 acc 70.703 (67.312)	Top-5 acc 86.328 (86.055)	lr 0.00634
Train [80][2280/3239]	Time 0.260 (0.620)	Data Time 0.001 (0.019)	Loss 2.5734 (2.3603)	Entropy 0.91878 (0.92213)	Top-1 acc 62.109 (67.311)	Top-5 acc 81.641 (86.056)	lr 0.00633
Train [80][2290/3239]	Time 0.229 (0.620)	Data Time 0.001 (0.019)	Loss 2.1854 (2.3604)	Entropy 0.91882 (0.92212)	Top-1 acc 69.141 (67.311)	Top-5 acc 89.844 (86.056)	lr 0.00633
Train [80][2300/3239]	Time 0.215 (0.619)	Data Time 0.001 (0.019)	Loss 2.3102 (2.3606)	Entropy 0.91877 (0.92210)	Top-1 acc 69.922 (67.309)	Top-5 acc 86.328 (86.049)	lr 0.00633
Train [80][2310/3239]	Time 0.260 (0.618)	Data Time 0.001 (0.019)	Loss 2.4013 (2.3609)	Entropy 0.91878 (0.92209)	Top-1 acc 67.578 (67.302)	Top-5 acc 84.766 (86.041)	lr 0.00633
Train [80][2320/3239]	Time 2.687 (0.618)	Data Time 0.001 (0.019)	Loss 2.4356 (2.3611)	Entropy 0.91878 (0.92208)	Top-1 acc 64.844 (67.296)	Top-5 acc 86.328 (86.040)	lr 0.00633
Train [80][2330/3239]	Time 0.246 (0.616)	Data Time 0.001 (0.019)	Loss 2.3465 (2.3611)	Entropy 0.91877 (0.92206)	Top-1 acc 66.016 (67.292)	Top-5 acc 89.453 (86.040)	lr 0.00633
Train [80][2340/3239]	Time 0.229 (0.616)	Data Time 0.001 (0.019)	Loss 2.2519 (2.3612)	Entropy 0.91873 (0.92205)	Top-1 acc 69.141 (67.289)	Top-5 acc 89.453 (86.039)	lr 0.00633
Train [80][2350/3239]	Time 0.252 (0.615)	Data Time 0.001 (0.019)	Loss 2.5313 (2.3618)	Entropy 0.91866 (0.92203)	Top-1 acc 66.406 (67.278)	Top-5 acc 82.422 (86.030)	lr 0.00633
Train [80][2360/3239]	Time 0.252 (0.614)	Data Time 0.001 (0.019)	Loss 2.3211 (2.3617)	Entropy 0.91865 (0.92202)	Top-1 acc 67.578 (67.282)	Top-5 acc 87.109 (86.030)	lr 0.00633
Train [80][2370/3239]	Time 0.233 (0.614)	Data Time 0.001 (0.018)	Loss 2.2804 (2.3618)	Entropy 0.91867 (0.92200)	Top-1 acc 69.922 (67.275)	Top-5 acc 89.062 (86.031)	lr 0.00633
Train [80][2380/3239]	Time 0.247 (0.613)	Data Time 0.001 (0.018)	Loss 2.2944 (2.3617)	Entropy 0.91869 (0.92199)	Top-1 acc 69.531 (67.275)	Top-5 acc 86.328 (86.032)	lr 0.00633
Train [80][2390/3239]	Time 0.216 (0.613)	Data Time 0.001 (0.018)	Loss 2.5341 (2.3616)	Entropy 0.91869 (0.92198)	Top-1 acc 62.891 (67.281)	Top-5 acc 82.812 (86.034)	lr 0.00632
Train [80][2400/3239]	Time 0.237 (0.612)	Data Time 0.001 (0.018)	Loss 2.6458 (2.3618)	Entropy 0.91866 (0.92196)	Top-1 acc 58.203 (67.274)	Top-5 acc 80.859 (86.030)	lr 0.00632
Train [80][2410/3239]	Time 0.222 (0.612)	Data Time 0.001 (0.018)	Loss 2.3594 (2.3618)	Entropy 0.91870 (0.92195)	Top-1 acc 65.625 (67.275)	Top-5 acc 86.719 (86.029)	lr 0.00632
Train [80][2420/3239]	Time 0.227 (0.611)	Data Time 0.001 (0.018)	Loss 2.4094 (2.3621)	Entropy 0.91868 (0.92194)	Top-1 acc 66.797 (67.270)	Top-5 acc 84.375 (86.021)	lr 0.00632
Train [80][2430/3239]	Time 2.594 (0.610)	Data Time 0.001 (0.018)	Loss 2.3613 (2.3620)	Entropy 0.91868 (0.92192)	Top-1 acc 66.406 (67.272)	Top-5 acc 88.281 (86.024)	lr 0.00632
Train [80][2440/3239]	Time 0.255 (0.609)	Data Time 0.002 (0.018)	Loss 2.3317 (2.3619)	Entropy 0.91897 (0.92191)	Top-1 acc 68.359 (67.274)	Top-5 acc 87.109 (86.025)	lr 0.00632
Train [80][2450/3239]	Time 0.359 (0.608)	Data Time 0.001 (0.018)	Loss 2.4581 (2.3617)	Entropy 0.91893 (0.92190)	Top-1 acc 64.844 (67.273)	Top-5 acc 83.984 (86.026)	lr 0.00632
Train [80][2460/3239]	Time 0.248 (0.608)	Data Time 0.001 (0.018)	Loss 2.3355 (2.3618)	Entropy 0.91892 (0.92189)	Top-1 acc 65.625 (67.268)	Top-5 acc 84.766 (86.023)	lr 0.00632
Train [80][2470/3239]	Time 0.220 (0.608)	Data Time 0.001 (0.018)	Loss 2.2690 (2.3619)	Entropy 0.91885 (0.92187)	Top-1 acc 71.094 (67.268)	Top-5 acc 86.719 (86.021)	lr 0.00632
Train [80][2480/3239]	Time 0.234 (0.607)	Data Time 0.001 (0.018)	Loss 2.3720 (2.3620)	Entropy 0.91881 (0.92186)	Top-1 acc 67.969 (67.264)	Top-5 acc 86.328 (86.017)	lr 0.00632
Train [80][2490/3239]	Time 0.296 (0.606)	Data Time 0.001 (0.018)	Loss 2.2540 (2.3618)	Entropy 0.91876 (0.92185)	Top-1 acc 70.312 (67.270)	Top-5 acc 87.109 (86.023)	lr 0.00632
Train [80][2500/3239]	Time 0.224 (0.606)	Data Time 0.001 (0.018)	Loss 2.3847 (2.3619)	Entropy 0.91878 (0.92184)	Top-1 acc 67.969 (67.270)	Top-5 acc 86.328 (86.021)	lr 0.00631
Train [80][2510/3239]	Time 0.238 (0.605)	Data Time 0.001 (0.017)	Loss 2.3080 (2.3623)	Entropy 0.91876 (0.92182)	Top-1 acc 67.578 (67.260)	Top-5 acc 88.281 (86.017)	lr 0.00631
Train [80][2520/3239]	Time 0.263 (0.625)	Data Time 0.002 (0.017)	Loss 2.2420 (2.3623)	Entropy 0.91877 (0.92181)	Top-1 acc 68.750 (67.258)	Top-5 acc 85.938 (86.017)	lr 0.00631
Train [80][2530/3239]	Time 0.258 (0.625)	Data Time 0.002 (0.017)	Loss 2.3314 (2.3623)	Entropy 0.91869 (0.92180)	Top-1 acc 68.359 (67.265)	Top-5 acc 85.938 (86.015)	lr 0.00631
Train [80][2540/3239]	Time 2.592 (0.624)	Data Time 0.002 (0.017)	Loss 2.3776 (2.3622)	Entropy 0.91869 (0.92179)	Top-1 acc 68.750 (67.269)	Top-5 acc 87.109 (86.019)	lr 0.00631
Train [80][2550/3239]	Time 0.237 (0.623)	Data Time 0.002 (0.017)	Loss 2.3351 (2.3621)	Entropy 0.91871 (0.92178)	Top-1 acc 69.141 (67.274)	Top-5 acc 87.891 (86.022)	lr 0.00631
Train [80][2560/3239]	Time 0.279 (0.623)	Data Time 0.002 (0.017)	Loss 2.4927 (2.3622)	Entropy 0.91864 (0.92176)	Top-1 acc 63.281 (67.272)	Top-5 acc 85.547 (86.020)	lr 0.00631
Train [80][2570/3239]	Time 0.269 (0.622)	Data Time 0.002 (0.017)	Loss 2.4160 (2.3622)	Entropy 0.91863 (0.92175)	Top-1 acc 63.672 (67.268)	Top-5 acc 84.766 (86.022)	lr 0.00631
Train [80][2580/3239]	Time 0.232 (0.622)	Data Time 0.001 (0.017)	Loss 2.3307 (2.3621)	Entropy 0.91859 (0.92174)	Top-1 acc 65.625 (67.268)	Top-5 acc 85.938 (86.023)	lr 0.00631
Train [80][2590/3239]	Time 0.248 (0.621)	Data Time 0.001 (0.017)	Loss 2.1568 (2.3625)	Entropy 0.91859 (0.92173)	Top-1 acc 71.094 (67.262)	Top-5 acc 86.719 (86.015)	lr 0.00631
Train [80][2600/3239]	Time 0.235 (0.621)	Data Time 0.001 (0.017)	Loss 2.3273 (2.3625)	Entropy 0.91860 (0.92172)	Top-1 acc 70.312 (67.262)	Top-5 acc 87.500 (86.014)	lr 0.00631
Train [80][2610/3239]	Time 0.234 (0.620)	Data Time 0.001 (0.017)	Loss 2.3518 (2.3627)	Entropy 0.91858 (0.92170)	Top-1 acc 68.750 (67.257)	Top-5 acc 85.547 (86.011)	lr 0.00631
Train [80][2620/3239]	Time 0.321 (0.620)	Data Time 0.001 (0.017)	Loss 2.4632 (2.3627)	Entropy 0.91847 (0.92169)	Top-1 acc 64.844 (67.254)	Top-5 acc 81.250 (86.010)	lr 0.00630
Train [80][2630/3239]	Time 0.224 (0.619)	Data Time 0.001 (0.017)	Loss 2.3522 (2.3627)	Entropy 0.91849 (0.92168)	Top-1 acc 67.578 (67.258)	Top-5 acc 86.328 (86.011)	lr 0.00630
Train [80][2640/3239]	Time 0.317 (0.619)	Data Time 0.001 (0.017)	Loss 2.4424 (2.3627)	Entropy 0.91851 (0.92167)	Top-1 acc 68.750 (67.262)	Top-5 acc 82.812 (86.008)	lr 0.00630
Train [80][2650/3239]	Time 0.289 (0.618)	Data Time 0.001 (0.017)	Loss 2.5978 (2.3629)	Entropy 0.91846 (0.92165)	Top-1 acc 62.891 (67.257)	Top-5 acc 76.953 (86.001)	lr 0.00630
Train [80][2660/3239]	Time 0.280 (0.618)	Data Time 0.001 (0.017)	Loss 2.2925 (2.3631)	Entropy 0.91842 (0.92164)	Top-1 acc 67.969 (67.254)	Top-5 acc 88.281 (86.000)	lr 0.00630
Train [80][2670/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.017)	Loss 2.2548 (2.3629)	Entropy 0.91846 (0.92163)	Top-1 acc 68.750 (67.257)	Top-5 acc 89.453 (86.002)	lr 0.00630
Train [80][2680/3239]	Time 0.249 (0.617)	Data Time 0.001 (0.016)	Loss 2.3534 (2.3631)	Entropy 0.91838 (0.92162)	Top-1 acc 69.531 (67.251)	Top-5 acc 87.109 (86.000)	lr 0.00630
Train [80][2690/3239]	Time 0.223 (0.616)	Data Time 0.001 (0.016)	Loss 2.2023 (2.3629)	Entropy 0.91820 (0.92161)	Top-1 acc 73.828 (67.255)	Top-5 acc 87.109 (86.001)	lr 0.00630
Train [80][2700/3239]	Time 0.264 (0.616)	Data Time 0.001 (0.016)	Loss 2.3608 (2.3630)	Entropy 0.91808 (0.92159)	Top-1 acc 69.141 (67.252)	Top-5 acc 86.328 (86.002)	lr 0.00630
Train [80][2710/3239]	Time 0.368 (0.615)	Data Time 0.001 (0.016)	Loss 2.4258 (2.3631)	Entropy 0.91802 (0.92158)	Top-1 acc 66.797 (67.248)	Top-5 acc 85.938 (86.002)	lr 0.00630
Train [80][2720/3239]	Time 0.244 (0.615)	Data Time 0.001 (0.016)	Loss 2.2970 (2.3631)	Entropy 0.91809 (0.92157)	Top-1 acc 69.531 (67.250)	Top-5 acc 85.156 (86.004)	lr 0.00630
Train [80][2730/3239]	Time 0.254 (0.615)	Data Time 0.001 (0.016)	Loss 2.1948 (2.3632)	Entropy 0.91813 (0.92156)	Top-1 acc 72.266 (67.245)	Top-5 acc 91.016 (86.000)	lr 0.00629
Train [80][2740/3239]	Time 0.249 (0.614)	Data Time 0.001 (0.016)	Loss 2.4337 (2.3632)	Entropy 0.91815 (0.92154)	Top-1 acc 65.234 (67.244)	Top-5 acc 83.594 (85.998)	lr 0.00629
Train [80][2750/3239]	Time 0.335 (0.614)	Data Time 0.001 (0.016)	Loss 2.3893 (2.3632)	Entropy 0.91818 (0.92153)	Top-1 acc 65.234 (67.242)	Top-5 acc 85.156 (86.001)	lr 0.00629
Train [80][2760/3239]	Time 0.215 (0.613)	Data Time 0.001 (0.016)	Loss 2.4076 (2.3634)	Entropy 0.91812 (0.92152)	Top-1 acc 64.453 (67.237)	Top-5 acc 83.984 (85.996)	lr 0.00629
Train [80][2770/3239]	Time 0.263 (0.613)	Data Time 0.001 (0.016)	Loss 2.3702 (2.3635)	Entropy 0.91806 (0.92151)	Top-1 acc 66.016 (67.235)	Top-5 acc 87.891 (85.993)	lr 0.00629
Train [80][2780/3239]	Time 0.217 (0.612)	Data Time 0.001 (0.016)	Loss 2.4898 (2.3637)	Entropy 0.91804 (0.92149)	Top-1 acc 63.672 (67.229)	Top-5 acc 83.594 (85.988)	lr 0.00629
Train [80][2790/3239]	Time 0.237 (0.612)	Data Time 0.001 (0.016)	Loss 2.4649 (2.3639)	Entropy 0.91801 (0.92148)	Top-1 acc 66.016 (67.224)	Top-5 acc 83.984 (85.985)	lr 0.00629
Train [80][2800/3239]	Time 0.264 (0.611)	Data Time 0.001 (0.016)	Loss 2.3577 (2.3639)	Entropy 0.91798 (0.92147)	Top-1 acc 67.188 (67.226)	Top-5 acc 86.719 (85.984)	lr 0.00629
Train [80][2810/3239]	Time 0.229 (0.611)	Data Time 0.001 (0.016)	Loss 2.4821 (2.3640)	Entropy 0.91799 (0.92146)	Top-1 acc 64.844 (67.223)	Top-5 acc 83.594 (85.979)	lr 0.00629
Train [80][2820/3239]	Time 0.327 (0.610)	Data Time 0.001 (0.016)	Loss 2.3052 (2.3640)	Entropy 0.91798 (0.92144)	Top-1 acc 69.922 (67.224)	Top-5 acc 87.500 (85.981)	lr 0.00629
Train [80][2830/3239]	Time 0.230 (0.610)	Data Time 0.001 (0.016)	Loss 2.3843 (2.3641)	Entropy 0.91792 (0.92143)	Top-1 acc 64.062 (67.222)	Top-5 acc 87.500 (85.982)	lr 0.00629
Train [80][2840/3239]	Time 0.325 (0.609)	Data Time 0.001 (0.016)	Loss 2.1205 (2.3640)	Entropy 0.91790 (0.92142)	Top-1 acc 74.219 (67.225)	Top-5 acc 91.406 (85.982)	lr 0.00629
Train [80][2850/3239]	Time 0.211 (0.609)	Data Time 0.001 (0.016)	Loss 2.3146 (2.3641)	Entropy 0.91780 (0.92141)	Top-1 acc 67.578 (67.221)	Top-5 acc 85.938 (85.982)	lr 0.00628
Train [80][2860/3239]	Time 0.307 (0.628)	Data Time 0.004 (0.016)	Loss 2.4667 (2.3641)	Entropy 0.91773 (0.92139)	Top-1 acc 62.109 (67.223)	Top-5 acc 83.984 (85.980)	lr 0.00628
Train [80][2870/3239]	Time 0.257 (0.627)	Data Time 0.002 (0.016)	Loss 2.2627 (2.3640)	Entropy 0.91774 (0.92138)	Top-1 acc 68.750 (67.225)	Top-5 acc 88.281 (85.981)	lr 0.00628
Train [80][2880/3239]	Time 0.349 (0.627)	Data Time 0.002 (0.015)	Loss 2.2752 (2.3641)	Entropy 0.91770 (0.92137)	Top-1 acc 69.531 (67.225)	Top-5 acc 85.156 (85.979)	lr 0.00628
Train [80][2890/3239]	Time 0.210 (0.626)	Data Time 0.001 (0.015)	Loss 2.3220 (2.3641)	Entropy 0.91769 (0.92136)	Top-1 acc 67.969 (67.227)	Top-5 acc 87.109 (85.982)	lr 0.00628
Train [80][2900/3239]	Time 0.238 (0.626)	Data Time 0.001 (0.015)	Loss 2.3692 (2.3641)	Entropy 0.91768 (0.92134)	Top-1 acc 67.188 (67.228)	Top-5 acc 87.500 (85.981)	lr 0.00628
Train [80][2910/3239]	Time 0.267 (0.625)	Data Time 0.001 (0.015)	Loss 2.4938 (2.3641)	Entropy 0.91767 (0.92133)	Top-1 acc 65.234 (67.229)	Top-5 acc 84.766 (85.981)	lr 0.00628
Train [80][2920/3239]	Time 0.376 (0.625)	Data Time 0.001 (0.015)	Loss 2.3346 (2.3641)	Entropy 0.91769 (0.92132)	Top-1 acc 68.750 (67.231)	Top-5 acc 86.719 (85.983)	lr 0.00628
Train [80][2930/3239]	Time 0.220 (0.624)	Data Time 0.001 (0.015)	Loss 2.4871 (2.3642)	Entropy 0.91773 (0.92131)	Top-1 acc 62.891 (67.227)	Top-5 acc 84.766 (85.980)	lr 0.00628
Train [80][2940/3239]	Time 0.239 (0.624)	Data Time 0.001 (0.015)	Loss 2.4448 (2.3643)	Entropy 0.91763 (0.92129)	Top-1 acc 65.625 (67.224)	Top-5 acc 85.938 (85.976)	lr 0.00628
Train [80][2950/3239]	Time 0.250 (0.623)	Data Time 0.001 (0.015)	Loss 2.4872 (2.3645)	Entropy 0.91761 (0.92128)	Top-1 acc 64.844 (67.219)	Top-5 acc 83.203 (85.974)	lr 0.00628
Train [80][2960/3239]	Time 0.253 (0.623)	Data Time 0.001 (0.015)	Loss 2.3775 (2.3645)	Entropy 0.91752 (0.92127)	Top-1 acc 68.359 (67.218)	Top-5 acc 87.109 (85.974)	lr 0.00627
Train [80][2970/3239]	Time 0.239 (0.622)	Data Time 0.001 (0.015)	Loss 2.4192 (2.3646)	Entropy 0.91752 (0.92126)	Top-1 acc 67.188 (67.215)	Top-5 acc 87.109 (85.974)	lr 0.00627
Train [80][2980/3239]	Time 0.235 (0.622)	Data Time 0.001 (0.015)	Loss 2.2172 (2.3646)	Entropy 0.91745 (0.92124)	Top-1 acc 74.609 (67.216)	Top-5 acc 87.500 (85.974)	lr 0.00627
Train [80][2990/3239]	Time 0.282 (0.621)	Data Time 0.006 (0.015)	Loss 2.4688 (2.3646)	Entropy 0.91743 (0.92123)	Top-1 acc 64.844 (67.215)	Top-5 acc 86.719 (85.974)	lr 0.00627
Train [80][3000/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.015)	Loss 2.4115 (2.3647)	Entropy 0.91741 (0.92122)	Top-1 acc 66.406 (67.214)	Top-5 acc 85.156 (85.974)	lr 0.00627
Train [80][3010/3239]	Time 0.349 (0.620)	Data Time 0.001 (0.015)	Loss 2.3025 (2.3646)	Entropy 0.91738 (0.92121)	Top-1 acc 66.406 (67.212)	Top-5 acc 88.281 (85.976)	lr 0.00627
Train [80][3020/3239]	Time 0.296 (0.620)	Data Time 0.001 (0.015)	Loss 2.2385 (2.3646)	Entropy 0.91731 (0.92119)	Top-1 acc 69.922 (67.211)	Top-5 acc 86.328 (85.975)	lr 0.00627
Train [80][3030/3239]	Time 0.269 (0.619)	Data Time 0.001 (0.015)	Loss 2.3107 (2.3646)	Entropy 0.91720 (0.92118)	Top-1 acc 68.750 (67.210)	Top-5 acc 85.547 (85.975)	lr 0.00627
Train [80][3040/3239]	Time 0.228 (0.619)	Data Time 0.001 (0.015)	Loss 2.4592 (2.3648)	Entropy 0.91716 (0.92117)	Top-1 acc 69.531 (67.208)	Top-5 acc 84.375 (85.973)	lr 0.00627
Train [80][3050/3239]	Time 0.375 (0.619)	Data Time 0.001 (0.015)	Loss 2.4501 (2.3649)	Entropy 0.91720 (0.92115)	Top-1 acc 67.188 (67.205)	Top-5 acc 84.375 (85.971)	lr 0.00627
Train [80][3060/3239]	Time 0.234 (0.618)	Data Time 0.001 (0.015)	Loss 2.2573 (2.3649)	Entropy 0.91722 (0.92114)	Top-1 acc 73.828 (67.206)	Top-5 acc 87.109 (85.971)	lr 0.00627
Train [80][3070/3239]	Time 0.284 (0.618)	Data Time 0.001 (0.015)	Loss 2.3600 (2.3648)	Entropy 0.91720 (0.92113)	Top-1 acc 67.578 (67.210)	Top-5 acc 86.719 (85.971)	lr 0.00626
Train [80][3080/3239]	Time 0.263 (0.617)	Data Time 0.001 (0.015)	Loss 2.3324 (2.3648)	Entropy 0.91725 (0.92112)	Top-1 acc 66.406 (67.209)	Top-5 acc 84.766 (85.969)	lr 0.00626
Train [80][3090/3239]	Time 0.287 (0.617)	Data Time 0.002 (0.015)	Loss 2.2766 (2.3648)	Entropy 0.91724 (0.92110)	Top-1 acc 71.094 (67.211)	Top-5 acc 88.281 (85.973)	lr 0.00626
Train [80][3100/3239]	Time 0.220 (0.616)	Data Time 0.002 (0.014)	Loss 2.5176 (2.3650)	Entropy 0.91723 (0.92109)	Top-1 acc 61.719 (67.207)	Top-5 acc 82.031 (85.968)	lr 0.00626
Train [80][3110/3239]	Time 0.276 (0.616)	Data Time 0.001 (0.014)	Loss 2.3337 (2.3650)	Entropy 0.91721 (0.92108)	Top-1 acc 66.016 (67.204)	Top-5 acc 86.328 (85.968)	lr 0.00626
Train [80][3120/3239]	Time 0.250 (0.615)	Data Time 0.001 (0.014)	Loss 2.2650 (2.3649)	Entropy 0.91713 (0.92107)	Top-1 acc 69.141 (67.208)	Top-5 acc 88.281 (85.970)	lr 0.00626
Train [80][3130/3239]	Time 0.234 (0.615)	Data Time 0.001 (0.014)	Loss 2.4252 (2.3650)	Entropy 0.91707 (0.92105)	Top-1 acc 68.750 (67.204)	Top-5 acc 85.156 (85.968)	lr 0.00626
Train [80][3140/3239]	Time 0.312 (0.615)	Data Time 0.001 (0.014)	Loss 2.4265 (2.3650)	Entropy 0.91705 (0.92104)	Top-1 acc 64.453 (67.199)	Top-5 acc 86.328 (85.969)	lr 0.00626
Train [80][3150/3239]	Time 0.216 (0.614)	Data Time 0.001 (0.014)	Loss 2.2441 (2.3653)	Entropy 0.91696 (0.92103)	Top-1 acc 69.531 (67.194)	Top-5 acc 87.891 (85.965)	lr 0.00626
Train [80][3160/3239]	Time 0.268 (0.614)	Data Time 0.001 (0.014)	Loss 2.5911 (2.3653)	Entropy 0.91689 (0.92102)	Top-1 acc 61.719 (67.191)	Top-5 acc 81.641 (85.963)	lr 0.00626
Train [80][3170/3239]	Time 0.261 (0.613)	Data Time 0.001 (0.014)	Loss 2.3463 (2.3652)	Entropy 0.91682 (0.92100)	Top-1 acc 65.234 (67.192)	Top-5 acc 87.109 (85.966)	lr 0.00626
Train [80][3180/3239]	Time 0.208 (0.613)	Data Time 0.000 (0.014)	Loss 2.4113 (2.3652)	Entropy 0.91679 (0.92099)	Top-1 acc 66.016 (67.191)	Top-5 acc 86.328 (85.968)	lr 0.00626
Train [80][3190/3239]	Time 0.246 (0.630)	Data Time 0.000 (0.014)	Loss 2.2816 (2.3652)	Entropy 0.91680 (0.92098)	Top-1 acc 70.703 (67.193)	Top-5 acc 86.328 (85.966)	lr 0.00625
Train [80][3200/3239]	Time 0.235 (0.629)	Data Time 0.000 (0.014)	Loss 2.4153 (2.3653)	Entropy 0.91678 (0.92096)	Top-1 acc 64.844 (67.189)	Top-5 acc 84.375 (85.964)	lr 0.00625
Train [80][3210/3239]	Time 0.239 (0.629)	Data Time 0.000 (0.014)	Loss 2.6230 (2.3655)	Entropy 0.91661 (0.92095)	Top-1 acc 60.938 (67.185)	Top-5 acc 82.812 (85.962)	lr 0.00625
Train [80][3220/3239]	Time 0.232 (0.628)	Data Time 0.000 (0.014)	Loss 2.3813 (2.3655)	Entropy 0.91657 (0.92094)	Top-1 acc 67.969 (67.186)	Top-5 acc 86.719 (85.960)	lr 0.00625
Train [80][3230/3239]	Time 0.245 (0.628)	Data Time 0.000 (0.014)	Loss 2.3861 (2.3655)	Entropy 0.91657 (0.92092)	Top-1 acc 64.844 (67.187)	Top-5 acc 87.500 (85.959)	lr 0.00625
Train [80][3239/3239]	Time 2.281 (0.627)	Data Time 0.000 (0.014)	Loss 2.4754 (2.3655)	Entropy 0.91657 (0.92091)	Top-1 acc 61.728 (67.187)	Top-5 acc 87.654 (85.958)	lr 0.00625
==========Valid [80/120]	loss 1.308	top-1 acc 70.279 (70.279)	top-5 acc 88.519	Train top-1 67.187	top-5 85.958	Entropy 0.91657	Latency-None: 0.000ms	Flops: 546.53M
Train [81][0/3239]	Time 42.331 (42.331)	Data Time 40.829 (40.829)	Loss 2.4699 (2.4699)	Entropy 0.91661 (0.91661)	Top-1 acc 67.578 (67.578)	Top-5 acc 82.422 (82.422)	lr 0.00625
Train [81][10/3239]	Time 3.282 (4.448)	Data Time 0.002 (3.717)	Loss 2.2018 (2.3340)	Entropy 0.91661 (0.91661)	Top-1 acc 71.875 (68.466)	Top-5 acc 87.891 (86.435)	lr 0.00625
Train [81][20/3239]	Time 0.229 (2.448)	Data Time 0.001 (1.948)	Loss 2.2720 (2.3147)	Entropy 0.91656 (0.91659)	Top-1 acc 69.141 (69.085)	Top-5 acc 86.719 (86.607)	lr 0.00625
Train [81][30/3239]	Time 0.322 (1.815)	Data Time 0.001 (1.320)	Loss 2.3832 (2.3195)	Entropy 0.91654 (0.91658)	Top-1 acc 65.234 (68.700)	Top-5 acc 83.594 (86.542)	lr 0.00625
Train [81][40/3239]	Time 0.300 (1.492)	Data Time 0.002 (0.998)	Loss 2.2709 (2.3268)	Entropy 0.91633 (0.91653)	Top-1 acc 70.312 (68.636)	Top-5 acc 86.328 (86.519)	lr 0.00625
Train [81][50/3239]	Time 0.233 (1.295)	Data Time 0.001 (0.803)	Loss 2.2692 (2.3244)	Entropy 0.91632 (0.91649)	Top-1 acc 67.188 (68.589)	Top-5 acc 88.672 (86.535)	lr 0.00625
Train [81][60/3239]	Time 0.233 (1.161)	Data Time 0.001 (0.672)	Loss 2.1832 (2.3253)	Entropy 0.91623 (0.91645)	Top-1 acc 69.531 (68.449)	Top-5 acc 91.406 (86.616)	lr 0.00624
Train [81][70/3239]	Time 0.222 (1.064)	Data Time 0.001 (0.578)	Loss 2.3999 (2.3270)	Entropy 0.91621 (0.91642)	Top-1 acc 67.578 (68.502)	Top-5 acc 86.719 (86.603)	lr 0.00624
Train [81][80/3239]	Time 0.242 (0.993)	Data Time 0.001 (0.506)	Loss 2.4159 (2.3326)	Entropy 0.91624 (0.91640)	Top-1 acc 66.016 (68.244)	Top-5 acc 83.594 (86.574)	lr 0.00624
Train [81][90/3239]	Time 0.235 (0.937)	Data Time 0.001 (0.451)	Loss 2.2205 (2.3315)	Entropy 0.91639 (0.91638)	Top-1 acc 70.312 (68.269)	Top-5 acc 89.453 (86.603)	lr 0.00624
Train [81][100/3239]	Time 0.237 (0.892)	Data Time 0.001 (0.406)	Loss 2.4327 (2.3363)	Entropy 0.91634 (0.91638)	Top-1 acc 66.797 (68.139)	Top-5 acc 83.594 (86.521)	lr 0.00624
Train [81][110/3239]	Time 0.226 (0.855)	Data Time 0.001 (0.370)	Loss 2.3043 (2.3357)	Entropy 0.91630 (0.91638)	Top-1 acc 67.969 (68.138)	Top-5 acc 85.156 (86.582)	lr 0.00624
Train [81][120/3239]	Time 2.728 (0.826)	Data Time 0.002 (0.340)	Loss 2.2348 (2.3378)	Entropy 0.91630 (0.91637)	Top-1 acc 71.094 (68.043)	Top-5 acc 86.719 (86.525)	lr 0.00624
Train [81][130/3239]	Time 0.225 (0.781)	Data Time 0.001 (0.314)	Loss 2.3608 (2.3373)	Entropy 0.91626 (0.91636)	Top-1 acc 69.141 (68.070)	Top-5 acc 86.328 (86.540)	lr 0.00624
Train [81][140/3239]	Time 0.245 (0.760)	Data Time 0.001 (0.292)	Loss 2.3486 (2.3373)	Entropy 0.91623 (0.91636)	Top-1 acc 68.359 (68.063)	Top-5 acc 86.719 (86.533)	lr 0.00624
Train [81][150/3239]	Time 0.245 (0.741)	Data Time 0.001 (0.272)	Loss 2.5232 (2.3400)	Entropy 0.91612 (0.91634)	Top-1 acc 66.797 (68.057)	Top-5 acc 82.031 (86.509)	lr 0.00624
Train [81][160/3239]	Time 0.321 (0.725)	Data Time 0.001 (0.256)	Loss 2.3895 (2.3397)	Entropy 0.91608 (0.91633)	Top-1 acc 63.672 (68.046)	Top-5 acc 81.641 (86.464)	lr 0.00624
Train [81][170/3239]	Time 0.225 (0.710)	Data Time 0.001 (0.241)	Loss 2.3339 (2.3401)	Entropy 0.91606 (0.91631)	Top-1 acc 71.875 (68.056)	Top-5 acc 87.109 (86.461)	lr 0.00624
Train [81][180/3239]	Time 0.223 (0.697)	Data Time 0.001 (0.227)	Loss 2.3076 (2.3394)	Entropy 0.91607 (0.91630)	Top-1 acc 67.578 (68.079)	Top-5 acc 87.109 (86.477)	lr 0.00623
Train [81][190/3239]	Time 0.234 (0.686)	Data Time 0.001 (0.216)	Loss 2.2881 (2.3412)	Entropy 0.91600 (0.91628)	Top-1 acc 70.312 (68.040)	Top-5 acc 87.500 (86.473)	lr 0.00623
Train [81][200/3239]	Time 0.276 (0.676)	Data Time 0.001 (0.205)	Loss 2.2921 (2.3406)	Entropy 0.91596 (0.91627)	Top-1 acc 71.094 (68.066)	Top-5 acc 87.500 (86.449)	lr 0.00623
Train [81][210/3239]	Time 0.230 (0.667)	Data Time 0.001 (0.195)	Loss 2.2962 (2.3413)	Entropy 0.91596 (0.91625)	Top-1 acc 70.703 (68.061)	Top-5 acc 88.281 (86.447)	lr 0.00623
Train [81][220/3239]	Time 0.235 (0.659)	Data Time 0.002 (0.187)	Loss 2.0945 (2.3383)	Entropy 0.91591 (0.91624)	Top-1 acc 71.484 (68.112)	Top-5 acc 90.234 (86.512)	lr 0.00623
Train [81][230/3239]	Time 2.416 (0.650)	Data Time 0.001 (0.179)	Loss 2.2786 (2.3410)	Entropy 0.91591 (0.91623)	Top-1 acc 65.625 (68.038)	Top-5 acc 89.062 (86.457)	lr 0.00623
Train [81][240/3239]	Time 0.249 (0.634)	Data Time 0.001 (0.171)	Loss 2.3018 (2.3410)	Entropy 0.91590 (0.91621)	Top-1 acc 70.703 (68.019)	Top-5 acc 87.109 (86.469)	lr 0.00623
Train [81][250/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.164)	Loss 2.2296 (2.3394)	Entropy 0.91583 (0.91620)	Top-1 acc 73.828 (68.056)	Top-5 acc 87.500 (86.488)	lr 0.00623
Train [81][260/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.158)	Loss 2.2646 (2.3411)	Entropy 0.91581 (0.91618)	Top-1 acc 69.141 (68.047)	Top-5 acc 89.062 (86.460)	lr 0.00623
Train [81][270/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.152)	Loss 2.2662 (2.3404)	Entropy 0.91561 (0.91616)	Top-1 acc 69.531 (68.051)	Top-5 acc 87.500 (86.439)	lr 0.00623
Train [81][280/3239]	Time 0.247 (0.613)	Data Time 0.001 (0.147)	Loss 2.2939 (2.3412)	Entropy 0.91560 (0.91614)	Top-1 acc 68.750 (68.015)	Top-5 acc 85.938 (86.403)	lr 0.00623
Train [81][290/3239]	Time 0.315 (0.608)	Data Time 0.001 (0.142)	Loss 2.2549 (2.3408)	Entropy 0.91554 (0.91612)	Top-1 acc 71.094 (68.045)	Top-5 acc 88.281 (86.389)	lr 0.00622
Train [81][300/3239]	Time 0.236 (0.604)	Data Time 0.001 (0.137)	Loss 2.5041 (2.3430)	Entropy 0.91553 (0.91610)	Top-1 acc 62.891 (67.988)	Top-5 acc 83.984 (86.364)	lr 0.00622
Train [81][310/3239]	Time 0.272 (0.772)	Data Time 0.003 (0.133)	Loss 2.4042 (2.3448)	Entropy 0.91555 (0.91609)	Top-1 acc 69.531 (67.926)	Top-5 acc 84.375 (86.318)	lr 0.00622
Train [81][320/3239]	Time 0.230 (0.766)	Data Time 0.002 (0.129)	Loss 2.0778 (2.3430)	Entropy 0.91562 (0.91607)	Top-1 acc 75.000 (67.957)	Top-5 acc 90.234 (86.355)	lr 0.00622
Train [81][330/3239]	Time 0.394 (0.758)	Data Time 0.002 (0.125)	Loss 2.4519 (2.3426)	Entropy 0.91560 (0.91606)	Top-1 acc 65.625 (67.960)	Top-5 acc 85.156 (86.353)	lr 0.00622
Train [81][340/3239]	Time 2.630 (0.750)	Data Time 0.002 (0.122)	Loss 2.2465 (2.3437)	Entropy 0.91560 (0.91604)	Top-1 acc 72.266 (67.924)	Top-5 acc 89.453 (86.328)	lr 0.00622
Train [81][350/3239]	Time 0.242 (0.736)	Data Time 0.006 (0.118)	Loss 2.3090 (2.3432)	Entropy 0.91563 (0.91603)	Top-1 acc 67.578 (67.941)	Top-5 acc 85.156 (86.317)	lr 0.00622
Train [81][360/3239]	Time 0.233 (0.729)	Data Time 0.001 (0.115)	Loss 2.2323 (2.3438)	Entropy 0.91565 (0.91602)	Top-1 acc 71.875 (67.940)	Top-5 acc 87.891 (86.316)	lr 0.00622
Train [81][370/3239]	Time 0.225 (0.722)	Data Time 0.001 (0.112)	Loss 2.3207 (2.3414)	Entropy 0.91562 (0.91601)	Top-1 acc 67.188 (68.000)	Top-5 acc 85.156 (86.347)	lr 0.00622
Train [81][380/3239]	Time 0.237 (0.716)	Data Time 0.002 (0.109)	Loss 2.4422 (2.3419)	Entropy 0.91559 (0.91600)	Top-1 acc 61.328 (68.003)	Top-5 acc 84.766 (86.335)	lr 0.00622
Train [81][390/3239]	Time 0.222 (0.710)	Data Time 0.001 (0.106)	Loss 2.5462 (2.3431)	Entropy 0.91512 (0.91598)	Top-1 acc 62.891 (67.980)	Top-5 acc 83.203 (86.309)	lr 0.00622
Train [81][400/3239]	Time 0.230 (0.705)	Data Time 0.001 (0.104)	Loss 2.3766 (2.3429)	Entropy 0.91506 (0.91596)	Top-1 acc 66.016 (67.981)	Top-5 acc 83.594 (86.299)	lr 0.00622
Train [81][410/3239]	Time 0.221 (0.699)	Data Time 0.001 (0.101)	Loss 2.4585 (2.3441)	Entropy 0.91503 (0.91594)	Top-1 acc 62.891 (67.931)	Top-5 acc 85.156 (86.286)	lr 0.00621
Train [81][420/3239]	Time 0.328 (0.694)	Data Time 0.001 (0.099)	Loss 2.4348 (2.3448)	Entropy 0.91504 (0.91592)	Top-1 acc 68.359 (67.897)	Top-5 acc 84.766 (86.259)	lr 0.00621
Train [81][430/3239]	Time 0.242 (0.689)	Data Time 0.001 (0.097)	Loss 2.2876 (2.3448)	Entropy 0.91502 (0.91590)	Top-1 acc 70.312 (67.905)	Top-5 acc 87.500 (86.261)	lr 0.00621
Train [81][440/3239]	Time 0.282 (0.685)	Data Time 0.001 (0.094)	Loss 2.3326 (2.3442)	Entropy 0.91497 (0.91588)	Top-1 acc 67.578 (67.921)	Top-5 acc 87.891 (86.257)	lr 0.00621
Train [81][450/3239]	Time 2.506 (0.680)	Data Time 0.002 (0.092)	Loss 2.4073 (2.3441)	Entropy 0.91497 (0.91586)	Top-1 acc 62.891 (67.899)	Top-5 acc 83.594 (86.253)	lr 0.00621
Train [81][460/3239]	Time 0.229 (0.671)	Data Time 0.001 (0.090)	Loss 2.3771 (2.3445)	Entropy 0.91489 (0.91583)	Top-1 acc 69.531 (67.902)	Top-5 acc 86.719 (86.247)	lr 0.00621
Train [81][470/3239]	Time 0.252 (0.667)	Data Time 0.001 (0.088)	Loss 2.4179 (2.3457)	Entropy 0.91485 (0.91581)	Top-1 acc 64.453 (67.882)	Top-5 acc 86.328 (86.228)	lr 0.00621
Train [81][480/3239]	Time 0.208 (0.663)	Data Time 0.001 (0.087)	Loss 2.2312 (2.3462)	Entropy 0.91485 (0.91579)	Top-1 acc 69.141 (67.865)	Top-5 acc 89.062 (86.228)	lr 0.00621
Train [81][490/3239]	Time 0.235 (0.660)	Data Time 0.001 (0.085)	Loss 2.3583 (2.3459)	Entropy 0.91481 (0.91577)	Top-1 acc 67.578 (67.867)	Top-5 acc 85.547 (86.237)	lr 0.00621
Train [81][500/3239]	Time 0.234 (0.656)	Data Time 0.001 (0.083)	Loss 2.2157 (2.3460)	Entropy 0.91482 (0.91575)	Top-1 acc 71.094 (67.853)	Top-5 acc 88.281 (86.238)	lr 0.00621
Train [81][510/3239]	Time 0.257 (0.653)	Data Time 0.001 (0.082)	Loss 2.4277 (2.3454)	Entropy 0.91482 (0.91574)	Top-1 acc 65.625 (67.872)	Top-5 acc 83.203 (86.249)	lr 0.00621
Train [81][520/3239]	Time 0.227 (0.650)	Data Time 0.001 (0.080)	Loss 2.3075 (2.3454)	Entropy 0.91470 (0.91572)	Top-1 acc 67.578 (67.886)	Top-5 acc 85.156 (86.249)	lr 0.00620
Train [81][530/3239]	Time 0.228 (0.646)	Data Time 0.001 (0.079)	Loss 2.4753 (2.3451)	Entropy 0.91467 (0.91570)	Top-1 acc 66.406 (67.894)	Top-5 acc 85.547 (86.251)	lr 0.00620
Train [81][540/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.077)	Loss 2.3354 (2.3441)	Entropy 0.91465 (0.91568)	Top-1 acc 67.969 (67.914)	Top-5 acc 86.719 (86.283)	lr 0.00620
Train [81][550/3239]	Time 0.336 (0.640)	Data Time 0.001 (0.076)	Loss 2.2819 (2.3451)	Entropy 0.91472 (0.91566)	Top-1 acc 67.578 (67.894)	Top-5 acc 88.281 (86.262)	lr 0.00620
Train [81][560/3239]	Time 2.536 (0.638)	Data Time 0.001 (0.075)	Loss 2.2941 (2.3443)	Entropy 0.91472 (0.91564)	Top-1 acc 68.359 (67.903)	Top-5 acc 88.281 (86.279)	lr 0.00620
Train [81][570/3239]	Time 0.228 (0.631)	Data Time 0.001 (0.073)	Loss 2.2634 (2.3442)	Entropy 0.91471 (0.91563)	Top-1 acc 69.922 (67.916)	Top-5 acc 88.672 (86.280)	lr 0.00620
Train [81][580/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.072)	Loss 2.3044 (2.3439)	Entropy 0.91466 (0.91561)	Top-1 acc 69.141 (67.937)	Top-5 acc 86.719 (86.284)	lr 0.00620
Train [81][590/3239]	Time 0.330 (0.626)	Data Time 0.001 (0.071)	Loss 2.3211 (2.3441)	Entropy 0.91464 (0.91559)	Top-1 acc 68.750 (67.926)	Top-5 acc 85.938 (86.269)	lr 0.00620
Train [81][600/3239]	Time 0.233 (0.623)	Data Time 0.001 (0.070)	Loss 2.3398 (2.3444)	Entropy 0.91466 (0.91558)	Top-1 acc 67.969 (67.914)	Top-5 acc 86.328 (86.264)	lr 0.00620
Train [81][610/3239]	Time 0.235 (0.621)	Data Time 0.001 (0.069)	Loss 2.4077 (2.3453)	Entropy 0.91468 (0.91556)	Top-1 acc 67.188 (67.900)	Top-5 acc 83.203 (86.235)	lr 0.00620
Train [81][620/3239]	Time 0.221 (0.618)	Data Time 0.001 (0.068)	Loss 2.2873 (2.3464)	Entropy 0.91469 (0.91555)	Top-1 acc 69.531 (67.862)	Top-5 acc 87.500 (86.217)	lr 0.00620
Train [81][630/3239]	Time 0.239 (0.616)	Data Time 0.003 (0.066)	Loss 2.2519 (2.3463)	Entropy 0.91464 (0.91554)	Top-1 acc 71.484 (67.867)	Top-5 acc 87.891 (86.212)	lr 0.00619
Train [81][640/3239]	Time 0.244 (0.614)	Data Time 0.002 (0.065)	Loss 2.3943 (2.3469)	Entropy 0.91465 (0.91552)	Top-1 acc 65.234 (67.847)	Top-5 acc 85.938 (86.203)	lr 0.00619
Train [81][650/3239]	Time 0.235 (0.612)	Data Time 0.001 (0.064)	Loss 2.3268 (2.3472)	Entropy 0.91468 (0.91551)	Top-1 acc 67.578 (67.824)	Top-5 acc 85.938 (86.195)	lr 0.00619
Train [81][660/3239]	Time 0.259 (0.610)	Data Time 0.001 (0.064)	Loss 2.3627 (2.3468)	Entropy 0.91465 (0.91550)	Top-1 acc 68.750 (67.834)	Top-5 acc 85.156 (86.204)	lr 0.00619
Train [81][670/3239]	Time 55.151 (0.687)	Data Time 0.001 (0.063)	Loss 2.4034 (2.3474)	Entropy 0.91465 (0.91548)	Top-1 acc 67.578 (67.817)	Top-5 acc 84.375 (86.195)	lr 0.00619
Train [81][680/3239]	Time 0.332 (0.681)	Data Time 0.002 (0.062)	Loss 2.3910 (2.3485)	Entropy 0.91466 (0.91547)	Top-1 acc 68.359 (67.789)	Top-5 acc 84.375 (86.176)	lr 0.00619
Train [81][690/3239]	Time 0.251 (0.678)	Data Time 0.002 (0.061)	Loss 2.2472 (2.3483)	Entropy 0.91518 (0.91547)	Top-1 acc 69.531 (67.791)	Top-5 acc 89.453 (86.181)	lr 0.00619
Train [81][700/3239]	Time 0.233 (0.675)	Data Time 0.001 (0.060)	Loss 2.3763 (2.3484)	Entropy 0.91515 (0.91546)	Top-1 acc 62.500 (67.773)	Top-5 acc 85.938 (86.185)	lr 0.00619
Train [81][710/3239]	Time 0.256 (0.673)	Data Time 0.002 (0.059)	Loss 2.2469 (2.3487)	Entropy 0.91500 (0.91546)	Top-1 acc 69.922 (67.759)	Top-5 acc 89.453 (86.188)	lr 0.00619
Train [81][720/3239]	Time 0.236 (0.670)	Data Time 0.001 (0.058)	Loss 2.2672 (2.3478)	Entropy 0.91501 (0.91545)	Top-1 acc 68.750 (67.799)	Top-5 acc 88.281 (86.206)	lr 0.00619
Train [81][730/3239]	Time 0.224 (0.668)	Data Time 0.002 (0.058)	Loss 2.2776 (2.3477)	Entropy 0.91503 (0.91544)	Top-1 acc 67.578 (67.790)	Top-5 acc 85.938 (86.204)	lr 0.00619
Train [81][740/3239]	Time 0.225 (0.666)	Data Time 0.001 (0.057)	Loss 2.3738 (2.3483)	Entropy 0.91503 (0.91544)	Top-1 acc 68.750 (67.775)	Top-5 acc 86.328 (86.193)	lr 0.00619
Train [81][750/3239]	Time 0.231 (0.663)	Data Time 0.001 (0.056)	Loss 2.3893 (2.3482)	Entropy 0.91497 (0.91543)	Top-1 acc 66.406 (67.782)	Top-5 acc 83.984 (86.196)	lr 0.00618
Train [81][760/3239]	Time 0.221 (0.661)	Data Time 0.001 (0.055)	Loss 2.3452 (2.3482)	Entropy 0.91500 (0.91543)	Top-1 acc 69.141 (67.788)	Top-5 acc 86.719 (86.198)	lr 0.00618
Train [81][770/3239]	Time 0.241 (0.658)	Data Time 0.001 (0.055)	Loss 2.2705 (2.3478)	Entropy 0.91502 (0.91542)	Top-1 acc 72.266 (67.798)	Top-5 acc 87.109 (86.196)	lr 0.00618
Train [81][780/3239]	Time 2.645 (0.656)	Data Time 0.001 (0.054)	Loss 2.2720 (2.3481)	Entropy 0.91502 (0.91542)	Top-1 acc 71.484 (67.795)	Top-5 acc 85.547 (86.195)	lr 0.00618
Train [81][790/3239]	Time 0.237 (0.651)	Data Time 0.001 (0.053)	Loss 2.1793 (2.3478)	Entropy 0.91500 (0.91541)	Top-1 acc 72.656 (67.803)	Top-5 acc 87.109 (86.193)	lr 0.00618
Train [81][800/3239]	Time 0.229 (0.649)	Data Time 0.001 (0.053)	Loss 2.2125 (2.3476)	Entropy 0.91498 (0.91541)	Top-1 acc 71.875 (67.814)	Top-5 acc 89.844 (86.201)	lr 0.00618
Train [81][810/3239]	Time 0.375 (0.647)	Data Time 0.001 (0.052)	Loss 2.3171 (2.3469)	Entropy 0.91506 (0.91540)	Top-1 acc 69.531 (67.829)	Top-5 acc 87.891 (86.220)	lr 0.00618
Train [81][820/3239]	Time 0.245 (0.645)	Data Time 0.001 (0.052)	Loss 2.3552 (2.3465)	Entropy 0.91507 (0.91540)	Top-1 acc 68.359 (67.832)	Top-5 acc 87.109 (86.230)	lr 0.00618
Train [81][830/3239]	Time 0.211 (0.643)	Data Time 0.001 (0.051)	Loss 2.3151 (2.3466)	Entropy 0.91506 (0.91539)	Top-1 acc 69.531 (67.831)	Top-5 acc 84.375 (86.222)	lr 0.00618
Train [81][840/3239]	Time 0.224 (0.641)	Data Time 0.001 (0.050)	Loss 2.2865 (2.3467)	Entropy 0.91504 (0.91539)	Top-1 acc 68.750 (67.829)	Top-5 acc 89.844 (86.232)	lr 0.00618
Train [81][850/3239]	Time 0.268 (0.639)	Data Time 0.001 (0.050)	Loss 2.3955 (2.3468)	Entropy 0.91502 (0.91538)	Top-1 acc 68.359 (67.824)	Top-5 acc 84.766 (86.228)	lr 0.00618
Train [81][860/3239]	Time 0.187 (0.637)	Data Time 0.001 (0.049)	Loss 2.4875 (2.3469)	Entropy 0.91499 (0.91538)	Top-1 acc 64.062 (67.811)	Top-5 acc 85.156 (86.234)	lr 0.00617
Train [81][870/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.049)	Loss 2.2981 (2.3467)	Entropy 0.91469 (0.91537)	Top-1 acc 69.141 (67.813)	Top-5 acc 85.547 (86.239)	lr 0.00617
Train [81][880/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.048)	Loss 2.5076 (2.3458)	Entropy 0.91470 (0.91537)	Top-1 acc 66.406 (67.836)	Top-5 acc 82.812 (86.259)	lr 0.00617
Train [81][890/3239]	Time 2.565 (0.632)	Data Time 0.002 (0.048)	Loss 2.3664 (2.3467)	Entropy 0.91470 (0.91536)	Top-1 acc 67.578 (67.814)	Top-5 acc 89.844 (86.246)	lr 0.00617
Train [81][900/3239]	Time 0.371 (0.628)	Data Time 0.001 (0.047)	Loss 2.3289 (2.3465)	Entropy 0.91465 (0.91535)	Top-1 acc 69.141 (67.809)	Top-5 acc 85.547 (86.248)	lr 0.00617
Train [81][910/3239]	Time 0.250 (0.626)	Data Time 0.001 (0.047)	Loss 2.4058 (2.3467)	Entropy 0.91464 (0.91534)	Top-1 acc 67.578 (67.805)	Top-5 acc 83.594 (86.245)	lr 0.00617
Train [81][920/3239]	Time 0.227 (0.625)	Data Time 0.001 (0.046)	Loss 2.3112 (2.3465)	Entropy 0.91462 (0.91534)	Top-1 acc 71.875 (67.816)	Top-5 acc 88.672 (86.243)	lr 0.00617
Train [81][930/3239]	Time 0.239 (0.623)	Data Time 0.001 (0.046)	Loss 2.3223 (2.3463)	Entropy 0.91453 (0.91533)	Top-1 acc 67.188 (67.821)	Top-5 acc 87.891 (86.244)	lr 0.00617
Train [81][940/3239]	Time 0.222 (0.621)	Data Time 0.001 (0.045)	Loss 2.3417 (2.3468)	Entropy 0.91451 (0.91532)	Top-1 acc 67.188 (67.808)	Top-5 acc 87.109 (86.229)	lr 0.00617
Train [81][950/3239]	Time 0.237 (0.620)	Data Time 0.001 (0.045)	Loss 2.3914 (2.3464)	Entropy 0.91451 (0.91531)	Top-1 acc 65.625 (67.810)	Top-5 acc 85.156 (86.242)	lr 0.00617
Train [81][960/3239]	Time 0.221 (0.618)	Data Time 0.001 (0.044)	Loss 2.3941 (2.3466)	Entropy 0.91447 (0.91530)	Top-1 acc 64.453 (67.804)	Top-5 acc 85.938 (86.237)	lr 0.00617
Train [81][970/3239]	Time 0.231 (0.617)	Data Time 0.001 (0.044)	Loss 2.3164 (2.3469)	Entropy 0.91443 (0.91529)	Top-1 acc 70.703 (67.803)	Top-5 acc 86.719 (86.227)	lr 0.00617
Train [81][980/3239]	Time 0.216 (0.615)	Data Time 0.001 (0.043)	Loss 2.5357 (2.3470)	Entropy 0.91439 (0.91528)	Top-1 acc 61.719 (67.795)	Top-5 acc 82.812 (86.231)	lr 0.00616
Train [81][990/3239]	Time 0.337 (0.614)	Data Time 0.001 (0.043)	Loss 2.1932 (2.3467)	Entropy 0.91426 (0.91528)	Top-1 acc 72.266 (67.804)	Top-5 acc 89.844 (86.240)	lr 0.00616
Train [81][1000/3239]	Time 2.434 (0.612)	Data Time 0.001 (0.043)	Loss 2.4856 (2.3471)	Entropy 0.91426 (0.91527)	Top-1 acc 67.188 (67.787)	Top-5 acc 82.031 (86.232)	lr 0.00616
Train [81][1010/3239]	Time 0.227 (0.608)	Data Time 0.001 (0.042)	Loss 2.2794 (2.3475)	Entropy 0.91420 (0.91525)	Top-1 acc 68.750 (67.781)	Top-5 acc 87.109 (86.219)	lr 0.00616
Train [81][1020/3239]	Time 0.237 (0.607)	Data Time 0.001 (0.042)	Loss 2.4262 (2.3473)	Entropy 0.91416 (0.91524)	Top-1 acc 65.625 (67.784)	Top-5 acc 83.594 (86.223)	lr 0.00616
Train [81][1030/3239]	Time 0.329 (0.606)	Data Time 0.001 (0.041)	Loss 2.3717 (2.3477)	Entropy 0.91413 (0.91523)	Top-1 acc 65.625 (67.772)	Top-5 acc 85.938 (86.211)	lr 0.00616
Train [81][1040/3239]	Time 0.290 (0.658)	Data Time 0.003 (0.041)	Loss 2.3466 (2.3476)	Entropy 0.91417 (0.91522)	Top-1 acc 67.188 (67.768)	Top-5 acc 86.719 (86.217)	lr 0.00616
Train [81][1050/3239]	Time 0.240 (0.657)	Data Time 0.002 (0.041)	Loss 2.3063 (2.3476)	Entropy 0.91413 (0.91521)	Top-1 acc 65.234 (67.762)	Top-5 acc 87.500 (86.216)	lr 0.00616
Train [81][1060/3239]	Time 0.245 (0.655)	Data Time 0.001 (0.040)	Loss 2.2599 (2.3474)	Entropy 0.91418 (0.91520)	Top-1 acc 67.969 (67.772)	Top-5 acc 89.062 (86.216)	lr 0.00616
Train [81][1070/3239]	Time 0.219 (0.654)	Data Time 0.002 (0.040)	Loss 2.3605 (2.3476)	Entropy 0.91409 (0.91519)	Top-1 acc 66.406 (67.756)	Top-5 acc 85.156 (86.208)	lr 0.00616
Train [81][1080/3239]	Time 0.229 (0.652)	Data Time 0.001 (0.040)	Loss 2.3000 (2.3478)	Entropy 0.91405 (0.91518)	Top-1 acc 67.969 (67.754)	Top-5 acc 85.938 (86.199)	lr 0.00616
Train [81][1090/3239]	Time 0.168 (0.651)	Data Time 0.001 (0.039)	Loss 2.3578 (2.3479)	Entropy 0.91407 (0.91517)	Top-1 acc 67.969 (67.742)	Top-5 acc 85.938 (86.198)	lr 0.00615
Train [81][1100/3239]	Time 0.281 (0.649)	Data Time 0.001 (0.039)	Loss 2.6375 (2.3482)	Entropy 0.91399 (0.91516)	Top-1 acc 60.938 (67.729)	Top-5 acc 83.984 (86.196)	lr 0.00615
Train [81][1110/3239]	Time 2.489 (0.648)	Data Time 0.001 (0.039)	Loss 2.3748 (2.3482)	Entropy 0.91399 (0.91515)	Top-1 acc 69.141 (67.738)	Top-5 acc 83.984 (86.194)	lr 0.00615
Train [81][1120/3239]	Time 0.225 (0.644)	Data Time 0.001 (0.038)	Loss 2.3889 (2.3482)	Entropy 0.91390 (0.91514)	Top-1 acc 67.578 (67.737)	Top-5 acc 86.719 (86.200)	lr 0.00615
Train [81][1130/3239]	Time 0.224 (0.642)	Data Time 0.001 (0.038)	Loss 2.3468 (2.3483)	Entropy 0.91388 (0.91513)	Top-1 acc 67.969 (67.733)	Top-5 acc 88.281 (86.199)	lr 0.00615
Train [81][1140/3239]	Time 0.240 (0.641)	Data Time 0.002 (0.038)	Loss 2.3166 (2.3484)	Entropy 0.91382 (0.91512)	Top-1 acc 69.141 (67.731)	Top-5 acc 85.938 (86.196)	lr 0.00615
Train [81][1150/3239]	Time 0.236 (0.640)	Data Time 0.001 (0.037)	Loss 2.3042 (2.3486)	Entropy 0.91374 (0.91511)	Top-1 acc 71.484 (67.736)	Top-5 acc 87.109 (86.187)	lr 0.00615
Train [81][1160/3239]	Time 0.327 (0.638)	Data Time 0.001 (0.037)	Loss 2.2477 (2.3487)	Entropy 0.91361 (0.91509)	Top-1 acc 71.875 (67.736)	Top-5 acc 88.281 (86.191)	lr 0.00615
Train [81][1170/3239]	Time 0.232 (0.637)	Data Time 0.001 (0.037)	Loss 2.3076 (2.3488)	Entropy 0.91363 (0.91508)	Top-1 acc 68.750 (67.734)	Top-5 acc 86.328 (86.186)	lr 0.00615
Train [81][1180/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.036)	Loss 2.2977 (2.3490)	Entropy 0.91364 (0.91507)	Top-1 acc 69.531 (67.734)	Top-5 acc 86.719 (86.177)	lr 0.00615
Train [81][1190/3239]	Time 0.242 (0.635)	Data Time 0.001 (0.036)	Loss 2.2305 (2.3491)	Entropy 0.91358 (0.91506)	Top-1 acc 69.531 (67.728)	Top-5 acc 89.062 (86.172)	lr 0.00615
Train [81][1200/3239]	Time 0.330 (0.633)	Data Time 0.001 (0.036)	Loss 2.2806 (2.3492)	Entropy 0.91360 (0.91504)	Top-1 acc 69.141 (67.725)	Top-5 acc 87.109 (86.174)	lr 0.00615
Train [81][1210/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.036)	Loss 2.3079 (2.3488)	Entropy 0.91356 (0.91503)	Top-1 acc 71.094 (67.738)	Top-5 acc 87.109 (86.183)	lr 0.00614
Train [81][1220/3239]	Time 2.508 (0.631)	Data Time 0.002 (0.035)	Loss 2.2896 (2.3484)	Entropy 0.91356 (0.91502)	Top-1 acc 70.312 (67.743)	Top-5 acc 86.328 (86.188)	lr 0.00614
Train [81][1230/3239]	Time 0.205 (0.628)	Data Time 0.001 (0.035)	Loss 2.2077 (2.3484)	Entropy 0.91358 (0.91501)	Top-1 acc 69.531 (67.744)	Top-5 acc 90.234 (86.185)	lr 0.00614
Train [81][1240/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.035)	Loss 2.4771 (2.3483)	Entropy 0.91355 (0.91500)	Top-1 acc 64.844 (67.752)	Top-5 acc 84.375 (86.192)	lr 0.00614
Train [81][1250/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.034)	Loss 2.4408 (2.3484)	Entropy 0.91352 (0.91499)	Top-1 acc 66.406 (67.760)	Top-5 acc 84.766 (86.193)	lr 0.00614
Train [81][1260/3239]	Time 0.239 (0.624)	Data Time 0.001 (0.034)	Loss 2.3975 (2.3487)	Entropy 0.91347 (0.91497)	Top-1 acc 66.406 (67.753)	Top-5 acc 87.500 (86.189)	lr 0.00614
Train [81][1270/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.034)	Loss 2.2678 (2.3484)	Entropy 0.91344 (0.91496)	Top-1 acc 70.703 (67.764)	Top-5 acc 88.281 (86.197)	lr 0.00614
Train [81][1280/3239]	Time 0.221 (0.622)	Data Time 0.001 (0.034)	Loss 2.4711 (2.3486)	Entropy 0.91340 (0.91495)	Top-1 acc 64.453 (67.761)	Top-5 acc 83.594 (86.194)	lr 0.00614
Train [81][1290/3239]	Time 0.321 (0.621)	Data Time 0.001 (0.033)	Loss 2.1848 (2.3482)	Entropy 0.91336 (0.91494)	Top-1 acc 72.656 (67.768)	Top-5 acc 88.672 (86.205)	lr 0.00614
Train [81][1300/3239]	Time 0.220 (0.620)	Data Time 0.001 (0.033)	Loss 2.3375 (2.3480)	Entropy 0.91334 (0.91493)	Top-1 acc 68.359 (67.768)	Top-5 acc 86.719 (86.209)	lr 0.00614
Train [81][1310/3239]	Time 0.229 (0.619)	Data Time 0.002 (0.033)	Loss 2.3291 (2.3482)	Entropy 0.91326 (0.91491)	Top-1 acc 69.922 (67.763)	Top-5 acc 86.328 (86.203)	lr 0.00614
Train [81][1320/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.033)	Loss 2.2350 (2.3480)	Entropy 0.91328 (0.91490)	Top-1 acc 69.531 (67.765)	Top-5 acc 88.672 (86.207)	lr 0.00613
Train [81][1330/3239]	Time 2.547 (0.616)	Data Time 0.001 (0.032)	Loss 2.4649 (2.3485)	Entropy 0.91328 (0.91489)	Top-1 acc 64.453 (67.756)	Top-5 acc 83.594 (86.196)	lr 0.00613
Train [81][1340/3239]	Time 0.267 (0.614)	Data Time 0.001 (0.032)	Loss 2.1955 (2.3484)	Entropy 0.91325 (0.91488)	Top-1 acc 72.656 (67.757)	Top-5 acc 90.234 (86.203)	lr 0.00613
Train [81][1350/3239]	Time 0.226 (0.612)	Data Time 0.001 (0.032)	Loss 2.5396 (2.3488)	Entropy 0.91308 (0.91486)	Top-1 acc 62.891 (67.760)	Top-5 acc 82.812 (86.200)	lr 0.00613
Train [81][1360/3239]	Time 0.222 (0.611)	Data Time 0.001 (0.032)	Loss 2.4053 (2.3484)	Entropy 0.91307 (0.91485)	Top-1 acc 62.891 (67.767)	Top-5 acc 84.766 (86.203)	lr 0.00613
Train [81][1370/3239]	Time 0.245 (0.610)	Data Time 0.001 (0.032)	Loss 2.3077 (2.3486)	Entropy 0.91305 (0.91484)	Top-1 acc 69.531 (67.762)	Top-5 acc 85.547 (86.202)	lr 0.00613
Train [81][1380/3239]	Time 0.326 (0.609)	Data Time 0.001 (0.031)	Loss 2.3087 (2.3487)	Entropy 0.91309 (0.91482)	Top-1 acc 66.406 (67.757)	Top-5 acc 87.109 (86.207)	lr 0.00613
Train [81][1390/3239]	Time 0.238 (0.608)	Data Time 0.001 (0.031)	Loss 2.5345 (2.3490)	Entropy 0.91310 (0.91481)	Top-1 acc 64.453 (67.744)	Top-5 acc 82.031 (86.203)	lr 0.00613
Train [81][1400/3239]	Time 0.251 (0.646)	Data Time 0.003 (0.031)	Loss 2.1831 (2.3489)	Entropy 0.91308 (0.91480)	Top-1 acc 73.047 (67.749)	Top-5 acc 90.625 (86.207)	lr 0.00613
Train [81][1410/3239]	Time 0.230 (0.645)	Data Time 0.002 (0.031)	Loss 2.4366 (2.3490)	Entropy 0.91305 (0.91479)	Top-1 acc 64.844 (67.755)	Top-5 acc 83.594 (86.203)	lr 0.00613
Train [81][1420/3239]	Time 0.333 (0.644)	Data Time 0.002 (0.031)	Loss 2.7680 (2.3490)	Entropy 0.91303 (0.91477)	Top-1 acc 54.297 (67.755)	Top-5 acc 79.688 (86.200)	lr 0.00613
Train [81][1430/3239]	Time 0.218 (0.643)	Data Time 0.001 (0.030)	Loss 2.4099 (2.3492)	Entropy 0.91297 (0.91476)	Top-1 acc 66.797 (67.755)	Top-5 acc 84.766 (86.199)	lr 0.00613
Train [81][1440/3239]	Time 2.574 (0.641)	Data Time 0.001 (0.030)	Loss 2.3628 (2.3494)	Entropy 0.91297 (0.91475)	Top-1 acc 65.625 (67.749)	Top-5 acc 84.766 (86.194)	lr 0.00612
Train [81][1450/3239]	Time 0.216 (0.639)	Data Time 0.001 (0.030)	Loss 2.4043 (2.3497)	Entropy 0.91300 (0.91474)	Top-1 acc 68.359 (67.743)	Top-5 acc 84.766 (86.189)	lr 0.00612
Train [81][1460/3239]	Time 0.240 (0.638)	Data Time 0.001 (0.030)	Loss 2.2761 (2.3496)	Entropy 0.91300 (0.91473)	Top-1 acc 69.141 (67.742)	Top-5 acc 88.281 (86.194)	lr 0.00612
Train [81][1470/3239]	Time 0.219 (0.637)	Data Time 0.004 (0.030)	Loss 2.2742 (2.3494)	Entropy 0.91294 (0.91471)	Top-1 acc 69.141 (67.745)	Top-5 acc 88.281 (86.201)	lr 0.00612
Train [81][1480/3239]	Time 0.235 (0.635)	Data Time 0.001 (0.029)	Loss 2.0923 (2.3494)	Entropy 0.91293 (0.91470)	Top-1 acc 73.438 (67.748)	Top-5 acc 90.625 (86.200)	lr 0.00612
Train [81][1490/3239]	Time 0.224 (0.634)	Data Time 0.001 (0.029)	Loss 2.3015 (2.3497)	Entropy 0.91291 (0.91469)	Top-1 acc 69.531 (67.743)	Top-5 acc 85.938 (86.199)	lr 0.00612
Train [81][1500/3239]	Time 0.244 (0.633)	Data Time 0.001 (0.029)	Loss 2.1961 (2.3497)	Entropy 0.91282 (0.91468)	Top-1 acc 69.531 (67.740)	Top-5 acc 89.062 (86.200)	lr 0.00612
Train [81][1510/3239]	Time 0.335 (0.632)	Data Time 0.002 (0.029)	Loss 2.1847 (2.3495)	Entropy 0.91278 (0.91467)	Top-1 acc 74.609 (67.745)	Top-5 acc 89.453 (86.203)	lr 0.00612
Train [81][1520/3239]	Time 0.221 (0.631)	Data Time 0.001 (0.029)	Loss 2.2490 (2.3497)	Entropy 0.91265 (0.91465)	Top-1 acc 67.578 (67.743)	Top-5 acc 90.625 (86.200)	lr 0.00612
Train [81][1530/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.028)	Loss 2.6486 (2.3497)	Entropy 0.91266 (0.91464)	Top-1 acc 60.938 (67.741)	Top-5 acc 79.688 (86.201)	lr 0.00612
Train [81][1540/3239]	Time 0.224 (0.629)	Data Time 0.002 (0.028)	Loss 2.2426 (2.3496)	Entropy 0.91265 (0.91463)	Top-1 acc 72.266 (67.752)	Top-5 acc 87.109 (86.200)	lr 0.00612
Train [81][1550/3239]	Time 2.559 (0.628)	Data Time 0.001 (0.028)	Loss 2.3481 (2.3496)	Entropy 0.91265 (0.91461)	Top-1 acc 66.797 (67.753)	Top-5 acc 86.328 (86.205)	lr 0.00611
Train [81][1560/3239]	Time 0.250 (0.626)	Data Time 0.001 (0.028)	Loss 2.3099 (2.3495)	Entropy 0.91259 (0.91460)	Top-1 acc 68.359 (67.752)	Top-5 acc 85.938 (86.202)	lr 0.00611
Train [81][1570/3239]	Time 0.228 (0.625)	Data Time 0.001 (0.028)	Loss 2.3756 (2.3498)	Entropy 0.91258 (0.91459)	Top-1 acc 65.625 (67.742)	Top-5 acc 85.156 (86.202)	lr 0.00611
Train [81][1580/3239]	Time 0.244 (0.624)	Data Time 0.001 (0.028)	Loss 2.3293 (2.3497)	Entropy 0.91265 (0.91458)	Top-1 acc 68.359 (67.753)	Top-5 acc 85.547 (86.202)	lr 0.00611
Train [81][1590/3239]	Time 0.252 (0.623)	Data Time 0.001 (0.027)	Loss 2.3730 (2.3499)	Entropy 0.91261 (0.91456)	Top-1 acc 63.672 (67.751)	Top-5 acc 84.766 (86.194)	lr 0.00611
Train [81][1600/3239]	Time 0.321 (0.622)	Data Time 0.001 (0.027)	Loss 2.3786 (2.3498)	Entropy 0.91258 (0.91455)	Top-1 acc 67.188 (67.757)	Top-5 acc 87.109 (86.202)	lr 0.00611
Train [81][1610/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.027)	Loss 2.2511 (2.3500)	Entropy 0.91252 (0.91454)	Top-1 acc 71.484 (67.749)	Top-5 acc 89.062 (86.204)	lr 0.00611
Train [81][1620/3239]	Time 0.226 (0.620)	Data Time 0.001 (0.027)	Loss 2.2739 (2.3502)	Entropy 0.91241 (0.91453)	Top-1 acc 66.797 (67.740)	Top-5 acc 88.672 (86.197)	lr 0.00611
Train [81][1630/3239]	Time 0.224 (0.619)	Data Time 0.001 (0.027)	Loss 2.3481 (2.3502)	Entropy 0.91238 (0.91451)	Top-1 acc 65.234 (67.736)	Top-5 acc 85.547 (86.192)	lr 0.00611
Train [81][1640/3239]	Time 0.326 (0.618)	Data Time 0.001 (0.027)	Loss 2.3857 (2.3504)	Entropy 0.91232 (0.91450)	Top-1 acc 66.406 (67.730)	Top-5 acc 82.031 (86.187)	lr 0.00611
Train [81][1650/3239]	Time 0.253 (0.617)	Data Time 0.001 (0.026)	Loss 2.3048 (2.3506)	Entropy 0.91230 (0.91449)	Top-1 acc 69.141 (67.726)	Top-5 acc 87.500 (86.183)	lr 0.00611
Train [81][1660/3239]	Time 2.431 (0.616)	Data Time 0.001 (0.026)	Loss 2.2899 (2.3505)	Entropy 0.91230 (0.91447)	Top-1 acc 70.312 (67.729)	Top-5 acc 87.109 (86.187)	lr 0.00611
Train [81][1670/3239]	Time 0.218 (0.614)	Data Time 0.001 (0.026)	Loss 2.3554 (2.3504)	Entropy 0.91230 (0.91446)	Top-1 acc 63.281 (67.723)	Top-5 acc 86.719 (86.187)	lr 0.00610
Train [81][1680/3239]	Time 0.236 (0.613)	Data Time 0.002 (0.026)	Loss 2.3195 (2.3504)	Entropy 0.91231 (0.91445)	Top-1 acc 68.750 (67.724)	Top-5 acc 85.938 (86.188)	lr 0.00610
Train [81][1690/3239]	Time 0.234 (0.613)	Data Time 0.001 (0.026)	Loss 2.3774 (2.3504)	Entropy 0.91228 (0.91443)	Top-1 acc 66.016 (67.718)	Top-5 acc 85.547 (86.189)	lr 0.00610
Train [81][1700/3239]	Time 0.246 (0.612)	Data Time 0.001 (0.026)	Loss 2.3781 (2.3504)	Entropy 0.91217 (0.91442)	Top-1 acc 65.625 (67.717)	Top-5 acc 87.109 (86.191)	lr 0.00610
Train [81][1710/3239]	Time 0.242 (0.611)	Data Time 0.001 (0.026)	Loss 2.3529 (2.3503)	Entropy 0.91214 (0.91441)	Top-1 acc 68.750 (67.718)	Top-5 acc 85.938 (86.195)	lr 0.00610
Train [81][1720/3239]	Time 0.232 (0.610)	Data Time 0.001 (0.025)	Loss 2.4258 (2.3503)	Entropy 0.91200 (0.91439)	Top-1 acc 66.406 (67.722)	Top-5 acc 84.375 (86.192)	lr 0.00610
Train [81][1730/3239]	Time 0.308 (0.609)	Data Time 0.001 (0.025)	Loss 2.4910 (2.3502)	Entropy 0.91201 (0.91438)	Top-1 acc 63.672 (67.724)	Top-5 acc 84.766 (86.193)	lr 0.00610
Train [81][1740/3239]	Time 0.263 (0.609)	Data Time 0.001 (0.025)	Loss 2.3738 (2.3504)	Entropy 0.91195 (0.91437)	Top-1 acc 65.234 (67.726)	Top-5 acc 85.547 (86.187)	lr 0.00610
Train [81][1750/3239]	Time 0.296 (0.608)	Data Time 0.003 (0.025)	Loss 2.2181 (2.3507)	Entropy 0.91196 (0.91435)	Top-1 acc 69.922 (67.717)	Top-5 acc 87.109 (86.186)	lr 0.00610
Train [81][1760/3239]	Time 0.265 (0.635)	Data Time 0.004 (0.025)	Loss 2.2820 (2.3507)	Entropy 0.91192 (0.91434)	Top-1 acc 66.406 (67.715)	Top-5 acc 88.672 (86.183)	lr 0.00610
Train [81][1770/3239]	Time 3.437 (0.635)	Data Time 0.005 (0.025)	Loss 2.4893 (2.3508)	Entropy 0.91192 (0.91433)	Top-1 acc 66.797 (67.715)	Top-5 acc 83.984 (86.179)	lr 0.00610
Train [81][1780/3239]	Time 0.227 (0.633)	Data Time 0.002 (0.025)	Loss 2.2592 (2.3506)	Entropy 0.91185 (0.91431)	Top-1 acc 68.359 (67.716)	Top-5 acc 88.672 (86.188)	lr 0.00609
Train [81][1790/3239]	Time 0.220 (0.632)	Data Time 0.001 (0.025)	Loss 2.3568 (2.3508)	Entropy 0.91181 (0.91430)	Top-1 acc 65.625 (67.711)	Top-5 acc 85.547 (86.184)	lr 0.00609
Train [81][1800/3239]	Time 0.239 (0.631)	Data Time 0.001 (0.024)	Loss 2.3505 (2.3508)	Entropy 0.91186 (0.91428)	Top-1 acc 65.625 (67.707)	Top-5 acc 88.281 (86.188)	lr 0.00609
Train [81][1810/3239]	Time 0.226 (0.630)	Data Time 0.001 (0.024)	Loss 2.3202 (2.3510)	Entropy 0.91186 (0.91427)	Top-1 acc 65.625 (67.697)	Top-5 acc 85.938 (86.187)	lr 0.00609
Train [81][1820/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.024)	Loss 2.3484 (2.3512)	Entropy 0.91185 (0.91426)	Top-1 acc 68.359 (67.688)	Top-5 acc 86.719 (86.180)	lr 0.00609
Train [81][1830/3239]	Time 0.239 (0.629)	Data Time 0.001 (0.024)	Loss 2.2863 (2.3515)	Entropy 0.91180 (0.91425)	Top-1 acc 69.531 (67.688)	Top-5 acc 86.328 (86.175)	lr 0.00609
Train [81][1840/3239]	Time 0.223 (0.628)	Data Time 0.001 (0.024)	Loss 2.3705 (2.3516)	Entropy 0.91176 (0.91423)	Top-1 acc 65.625 (67.686)	Top-5 acc 85.938 (86.173)	lr 0.00609
Train [81][1850/3239]	Time 0.227 (0.627)	Data Time 0.001 (0.024)	Loss 2.3569 (2.3519)	Entropy 0.91174 (0.91422)	Top-1 acc 67.578 (67.675)	Top-5 acc 86.719 (86.168)	lr 0.00609
Train [81][1860/3239]	Time 0.309 (0.626)	Data Time 0.001 (0.024)	Loss 2.2272 (2.3516)	Entropy 0.91171 (0.91420)	Top-1 acc 71.484 (67.685)	Top-5 acc 86.328 (86.172)	lr 0.00609
Train [81][1870/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.024)	Loss 2.5418 (2.3515)	Entropy 0.91165 (0.91419)	Top-1 acc 61.328 (67.685)	Top-5 acc 84.766 (86.176)	lr 0.00609
Train [81][1880/3239]	Time 2.502 (0.625)	Data Time 0.001 (0.023)	Loss 2.2424 (2.3513)	Entropy 0.91165 (0.91418)	Top-1 acc 69.531 (67.687)	Top-5 acc 89.453 (86.180)	lr 0.00609
Train [81][1890/3239]	Time 0.244 (0.623)	Data Time 0.002 (0.023)	Loss 2.3577 (2.3514)	Entropy 0.91156 (0.91416)	Top-1 acc 66.797 (67.688)	Top-5 acc 87.500 (86.178)	lr 0.00609
Train [81][1900/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.023)	Loss 2.4146 (2.3514)	Entropy 0.91154 (0.91415)	Top-1 acc 62.500 (67.688)	Top-5 acc 85.156 (86.177)	lr 0.00608
Train [81][1910/3239]	Time 0.222 (0.621)	Data Time 0.001 (0.023)	Loss 2.5012 (2.3516)	Entropy 0.91139 (0.91414)	Top-1 acc 64.844 (67.686)	Top-5 acc 84.766 (86.174)	lr 0.00608
Train [81][1920/3239]	Time 0.231 (0.621)	Data Time 0.001 (0.023)	Loss 2.3157 (2.3518)	Entropy 0.91137 (0.91412)	Top-1 acc 68.750 (67.680)	Top-5 acc 87.109 (86.171)	lr 0.00608
Train [81][1930/3239]	Time 0.219 (0.620)	Data Time 0.001 (0.023)	Loss 2.4692 (2.3520)	Entropy 0.91117 (0.91411)	Top-1 acc 61.328 (67.673)	Top-5 acc 84.375 (86.164)	lr 0.00608
Train [81][1940/3239]	Time 0.231 (0.619)	Data Time 0.001 (0.023)	Loss 2.3940 (2.3522)	Entropy 0.91085 (0.91409)	Top-1 acc 65.625 (67.669)	Top-5 acc 85.938 (86.159)	lr 0.00608
Train [81][1950/3239]	Time 0.311 (0.618)	Data Time 0.001 (0.023)	Loss 2.4816 (2.3524)	Entropy 0.91082 (0.91407)	Top-1 acc 65.625 (67.657)	Top-5 acc 84.766 (86.159)	lr 0.00608
Train [81][1960/3239]	Time 0.217 (0.617)	Data Time 0.001 (0.023)	Loss 2.2648 (2.3523)	Entropy 0.91076 (0.91406)	Top-1 acc 68.750 (67.659)	Top-5 acc 87.109 (86.159)	lr 0.00608
Train [81][1970/3239]	Time 0.223 (0.617)	Data Time 0.001 (0.022)	Loss 2.3221 (2.3520)	Entropy 0.91074 (0.91404)	Top-1 acc 67.188 (67.664)	Top-5 acc 85.938 (86.167)	lr 0.00608
Train [81][1980/3239]	Time 0.252 (0.616)	Data Time 0.001 (0.022)	Loss 2.4576 (2.3523)	Entropy 0.91078 (0.91402)	Top-1 acc 69.531 (67.661)	Top-5 acc 86.719 (86.167)	lr 0.00608
Train [81][1990/3239]	Time 2.707 (0.615)	Data Time 0.002 (0.022)	Loss 2.3292 (2.3524)	Entropy 0.91078 (0.91401)	Top-1 acc 70.703 (67.659)	Top-5 acc 86.719 (86.162)	lr 0.00608
Train [81][2000/3239]	Time 0.253 (0.613)	Data Time 0.001 (0.022)	Loss 2.3851 (2.3523)	Entropy 0.91073 (0.91399)	Top-1 acc 69.531 (67.663)	Top-5 acc 87.500 (86.165)	lr 0.00608
Train [81][2010/3239]	Time 0.221 (0.613)	Data Time 0.001 (0.022)	Loss 2.4259 (2.3525)	Entropy 0.91072 (0.91398)	Top-1 acc 66.797 (67.659)	Top-5 acc 85.156 (86.165)	lr 0.00608
Train [81][2020/3239]	Time 0.209 (0.612)	Data Time 0.001 (0.022)	Loss 2.3440 (2.3527)	Entropy 0.91076 (0.91396)	Top-1 acc 67.578 (67.658)	Top-5 acc 85.547 (86.159)	lr 0.00607
Train [81][2030/3239]	Time 0.226 (0.611)	Data Time 0.001 (0.022)	Loss 2.2602 (2.3524)	Entropy 0.91070 (0.91394)	Top-1 acc 71.094 (67.662)	Top-5 acc 88.281 (86.164)	lr 0.00607
Train [81][2040/3239]	Time 0.321 (0.610)	Data Time 0.001 (0.022)	Loss 2.2844 (2.3526)	Entropy 0.91059 (0.91393)	Top-1 acc 67.969 (67.654)	Top-5 acc 89.062 (86.161)	lr 0.00607
Train [81][2050/3239]	Time 0.217 (0.610)	Data Time 0.001 (0.022)	Loss 2.3660 (2.3525)	Entropy 0.91037 (0.91391)	Top-1 acc 67.969 (67.656)	Top-5 acc 87.109 (86.161)	lr 0.00607
Train [81][2060/3239]	Time 0.235 (0.609)	Data Time 0.001 (0.022)	Loss 2.2041 (2.3525)	Entropy 0.91032 (0.91389)	Top-1 acc 73.047 (67.660)	Top-5 acc 87.109 (86.158)	lr 0.00607
Train [81][2070/3239]	Time 0.231 (0.608)	Data Time 0.001 (0.021)	Loss 2.4797 (2.3526)	Entropy 0.91025 (0.91388)	Top-1 acc 60.938 (67.651)	Top-5 acc 83.203 (86.155)	lr 0.00607
Train [81][2080/3239]	Time 0.224 (0.608)	Data Time 0.001 (0.021)	Loss 2.2115 (2.3534)	Entropy 0.91018 (0.91386)	Top-1 acc 71.484 (67.633)	Top-5 acc 89.062 (86.145)	lr 0.00607
Train [81][2090/3239]	Time 0.255 (0.607)	Data Time 0.002 (0.021)	Loss 2.2779 (2.3532)	Entropy 0.91019 (0.91384)	Top-1 acc 70.703 (67.641)	Top-5 acc 86.328 (86.147)	lr 0.00607
Train [81][2100/3239]	Time 2.501 (0.606)	Data Time 0.001 (0.021)	Loss 2.3742 (2.3533)	Entropy 0.91019 (0.91382)	Top-1 acc 69.531 (67.639)	Top-5 acc 84.766 (86.149)	lr 0.00607
Train [81][2110/3239]	Time 0.233 (0.605)	Data Time 0.001 (0.021)	Loss 2.3955 (2.3533)	Entropy 0.91015 (0.91381)	Top-1 acc 67.578 (67.639)	Top-5 acc 84.766 (86.148)	lr 0.00607
Train [81][2120/3239]	Time 0.238 (0.604)	Data Time 0.001 (0.021)	Loss 2.2631 (2.3531)	Entropy 0.91008 (0.91379)	Top-1 acc 69.141 (67.644)	Top-5 acc 89.453 (86.151)	lr 0.00607
Train [81][2130/3239]	Time 0.235 (0.629)	Data Time 0.002 (0.021)	Loss 2.4706 (2.3531)	Entropy 0.91005 (0.91377)	Top-1 acc 61.328 (67.638)	Top-5 acc 85.547 (86.155)	lr 0.00606
Train [81][2140/3239]	Time 0.245 (0.628)	Data Time 0.002 (0.021)	Loss 2.3178 (2.3533)	Entropy 0.90994 (0.91375)	Top-1 acc 67.578 (67.632)	Top-5 acc 88.281 (86.151)	lr 0.00606
Train [81][2150/3239]	Time 0.218 (0.628)	Data Time 0.001 (0.021)	Loss 2.2088 (2.3532)	Entropy 0.90994 (0.91374)	Top-1 acc 73.047 (67.635)	Top-5 acc 89.062 (86.154)	lr 0.00606
Train [81][2160/3239]	Time 0.222 (0.627)	Data Time 0.001 (0.021)	Loss 2.3457 (2.3532)	Entropy 0.90991 (0.91372)	Top-1 acc 67.188 (67.636)	Top-5 acc 88.672 (86.157)	lr 0.00606
Train [81][2170/3239]	Time 0.315 (0.626)	Data Time 0.001 (0.021)	Loss 2.3831 (2.3532)	Entropy 0.90984 (0.91370)	Top-1 acc 65.234 (67.632)	Top-5 acc 85.156 (86.158)	lr 0.00606
Train [81][2180/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.020)	Loss 2.2727 (2.3531)	Entropy 0.90977 (0.91368)	Top-1 acc 67.188 (67.632)	Top-5 acc 87.891 (86.158)	lr 0.00606
Train [81][2190/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.020)	Loss 2.3212 (2.3534)	Entropy 0.90975 (0.91366)	Top-1 acc 68.750 (67.621)	Top-5 acc 85.547 (86.156)	lr 0.00606
Train [81][2200/3239]	Time 0.236 (0.624)	Data Time 0.001 (0.020)	Loss 2.4578 (2.3540)	Entropy 0.90977 (0.91365)	Top-1 acc 62.500 (67.606)	Top-5 acc 83.984 (86.149)	lr 0.00606
Train [81][2210/3239]	Time 2.610 (0.623)	Data Time 0.001 (0.020)	Loss 2.3894 (2.3543)	Entropy 0.90977 (0.91363)	Top-1 acc 67.188 (67.593)	Top-5 acc 84.766 (86.145)	lr 0.00606
Train [81][2220/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.020)	Loss 2.4848 (2.3544)	Entropy 0.90971 (0.91361)	Top-1 acc 66.797 (67.592)	Top-5 acc 83.203 (86.144)	lr 0.00606
Train [81][2230/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.020)	Loss 2.3152 (2.3545)	Entropy 0.90968 (0.91359)	Top-1 acc 68.359 (67.590)	Top-5 acc 89.062 (86.138)	lr 0.00606
Train [81][2240/3239]	Time 0.223 (0.620)	Data Time 0.001 (0.020)	Loss 2.2916 (2.3546)	Entropy 0.90963 (0.91358)	Top-1 acc 69.141 (67.591)	Top-5 acc 85.547 (86.134)	lr 0.00606
Train [81][2250/3239]	Time 0.227 (0.620)	Data Time 0.001 (0.020)	Loss 2.3232 (2.3547)	Entropy 0.90958 (0.91356)	Top-1 acc 67.188 (67.586)	Top-5 acc 84.375 (86.130)	lr 0.00605
Train [81][2260/3239]	Time 0.221 (0.619)	Data Time 0.001 (0.020)	Loss 2.3459 (2.3547)	Entropy 0.90958 (0.91354)	Top-1 acc 67.969 (67.588)	Top-5 acc 86.719 (86.133)	lr 0.00605
Train [81][2270/3239]	Time 0.224 (0.618)	Data Time 0.001 (0.020)	Loss 2.3450 (2.3547)	Entropy 0.90957 (0.91352)	Top-1 acc 69.531 (67.586)	Top-5 acc 87.109 (86.134)	lr 0.00605
Train [81][2280/3239]	Time 0.219 (0.618)	Data Time 0.001 (0.020)	Loss 2.4711 (2.3549)	Entropy 0.90946 (0.91351)	Top-1 acc 65.625 (67.585)	Top-5 acc 82.812 (86.128)	lr 0.00605
Train [81][2290/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.020)	Loss 2.2311 (2.3549)	Entropy 0.90948 (0.91349)	Top-1 acc 71.875 (67.587)	Top-5 acc 89.844 (86.126)	lr 0.00605
Train [81][2300/3239]	Time 0.320 (0.616)	Data Time 0.001 (0.019)	Loss 2.3263 (2.3550)	Entropy 0.90941 (0.91347)	Top-1 acc 68.750 (67.583)	Top-5 acc 86.328 (86.125)	lr 0.00605
Train [81][2310/3239]	Time 0.212 (0.616)	Data Time 0.002 (0.019)	Loss 2.3367 (2.3554)	Entropy 0.90943 (0.91345)	Top-1 acc 67.188 (67.572)	Top-5 acc 84.375 (86.119)	lr 0.00605
Train [81][2320/3239]	Time 2.585 (0.615)	Data Time 0.001 (0.019)	Loss 2.3760 (2.3554)	Entropy 0.90943 (0.91344)	Top-1 acc 64.844 (67.570)	Top-5 acc 85.938 (86.117)	lr 0.00605
Train [81][2330/3239]	Time 0.271 (0.614)	Data Time 0.002 (0.019)	Loss 2.4395 (2.3558)	Entropy 0.90934 (0.91342)	Top-1 acc 63.281 (67.558)	Top-5 acc 82.812 (86.108)	lr 0.00605
Train [81][2340/3239]	Time 0.232 (0.613)	Data Time 0.003 (0.019)	Loss 2.2995 (2.3556)	Entropy 0.90928 (0.91340)	Top-1 acc 68.750 (67.561)	Top-5 acc 87.891 (86.110)	lr 0.00605
Train [81][2350/3239]	Time 0.236 (0.612)	Data Time 0.001 (0.019)	Loss 2.3991 (2.3557)	Entropy 0.90915 (0.91338)	Top-1 acc 65.625 (67.559)	Top-5 acc 86.719 (86.108)	lr 0.00605
Train [81][2360/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.019)	Loss 2.4259 (2.3558)	Entropy 0.90912 (0.91336)	Top-1 acc 64.844 (67.561)	Top-5 acc 86.328 (86.108)	lr 0.00604
Train [81][2370/3239]	Time 0.229 (0.611)	Data Time 0.001 (0.019)	Loss 2.4134 (2.3559)	Entropy 0.90909 (0.91335)	Top-1 acc 66.797 (67.556)	Top-5 acc 85.938 (86.105)	lr 0.00604
Train [81][2380/3239]	Time 0.227 (0.611)	Data Time 0.002 (0.019)	Loss 2.3467 (2.3559)	Entropy 0.90909 (0.91333)	Top-1 acc 69.141 (67.551)	Top-5 acc 85.938 (86.103)	lr 0.00604
Train [81][2390/3239]	Time 0.219 (0.610)	Data Time 0.001 (0.019)	Loss 2.3924 (2.3559)	Entropy 0.90907 (0.91331)	Top-1 acc 63.281 (67.552)	Top-5 acc 87.109 (86.101)	lr 0.00604
Train [81][2400/3239]	Time 0.227 (0.609)	Data Time 0.001 (0.019)	Loss 2.2897 (2.3559)	Entropy 0.90907 (0.91329)	Top-1 acc 67.188 (67.552)	Top-5 acc 85.938 (86.101)	lr 0.00604
Train [81][2410/3239]	Time 0.223 (0.609)	Data Time 0.001 (0.019)	Loss 2.1775 (2.3559)	Entropy 0.90902 (0.91328)	Top-1 acc 72.266 (67.553)	Top-5 acc 89.453 (86.101)	lr 0.00604
Train [81][2420/3239]	Time 0.266 (0.608)	Data Time 0.002 (0.019)	Loss 2.3689 (2.3559)	Entropy 0.90900 (0.91326)	Top-1 acc 67.969 (67.554)	Top-5 acc 85.547 (86.100)	lr 0.00604
Train [81][2430/3239]	Time 2.547 (0.608)	Data Time 0.001 (0.019)	Loss 2.4404 (2.3558)	Entropy 0.90900 (0.91324)	Top-1 acc 65.625 (67.556)	Top-5 acc 83.984 (86.100)	lr 0.00604
Train [81][2440/3239]	Time 0.230 (0.606)	Data Time 0.001 (0.018)	Loss 2.3854 (2.3559)	Entropy 0.90895 (0.91322)	Top-1 acc 67.188 (67.557)	Top-5 acc 87.109 (86.100)	lr 0.00604
Train [81][2450/3239]	Time 0.222 (0.606)	Data Time 0.001 (0.018)	Loss 2.3447 (2.3557)	Entropy 0.90890 (0.91321)	Top-1 acc 67.578 (67.559)	Top-5 acc 87.109 (86.105)	lr 0.00604
Train [81][2460/3239]	Time 0.226 (0.605)	Data Time 0.001 (0.018)	Loss 2.3455 (2.3556)	Entropy 0.90893 (0.91319)	Top-1 acc 67.188 (67.564)	Top-5 acc 86.328 (86.107)	lr 0.00604
Train [81][2470/3239]	Time 0.240 (0.605)	Data Time 0.001 (0.018)	Loss 2.3117 (2.3559)	Entropy 0.90892 (0.91317)	Top-1 acc 67.969 (67.557)	Top-5 acc 86.719 (86.102)	lr 0.00604
Train [81][2480/3239]	Time 0.224 (0.604)	Data Time 0.001 (0.018)	Loss 2.4401 (2.3558)	Entropy 0.90890 (0.91315)	Top-1 acc 66.406 (67.556)	Top-5 acc 84.766 (86.102)	lr 0.00603
Train [81][2490/3239]	Time 0.272 (0.624)	Data Time 0.003 (0.018)	Loss 2.1198 (2.3559)	Entropy 0.90891 (0.91314)	Top-1 acc 76.953 (67.556)	Top-5 acc 91.016 (86.101)	lr 0.00603
Train [81][2500/3239]	Time 0.241 (0.624)	Data Time 0.002 (0.018)	Loss 2.2260 (2.3560)	Entropy 0.90896 (0.91312)	Top-1 acc 69.141 (67.554)	Top-5 acc 88.281 (86.100)	lr 0.00603
Train [81][2510/3239]	Time 0.238 (0.623)	Data Time 0.001 (0.018)	Loss 2.2435 (2.3560)	Entropy 0.90899 (0.91310)	Top-1 acc 68.750 (67.550)	Top-5 acc 88.672 (86.101)	lr 0.00603
Train [81][2520/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.018)	Loss 2.3092 (2.3560)	Entropy 0.90899 (0.91309)	Top-1 acc 69.141 (67.548)	Top-5 acc 87.500 (86.099)	lr 0.00603
Train [81][2530/3239]	Time 0.224 (0.622)	Data Time 0.001 (0.018)	Loss 2.2589 (2.3559)	Entropy 0.90893 (0.91307)	Top-1 acc 68.750 (67.547)	Top-5 acc 89.453 (86.104)	lr 0.00603
Train [81][2540/3239]	Time 2.474 (0.621)	Data Time 0.001 (0.018)	Loss 2.3864 (2.3559)	Entropy 0.90893 (0.91305)	Top-1 acc 67.969 (67.549)	Top-5 acc 83.984 (86.104)	lr 0.00603
Train [81][2550/3239]	Time 0.212 (0.620)	Data Time 0.001 (0.018)	Loss 2.5400 (2.3561)	Entropy 0.90884 (0.91304)	Top-1 acc 64.062 (67.547)	Top-5 acc 80.859 (86.098)	lr 0.00603
Train [81][2560/3239]	Time 0.223 (0.619)	Data Time 0.001 (0.018)	Loss 2.2144 (2.3561)	Entropy 0.90884 (0.91302)	Top-1 acc 72.656 (67.549)	Top-5 acc 88.672 (86.097)	lr 0.00603
Train [81][2570/3239]	Time 0.225 (0.619)	Data Time 0.001 (0.018)	Loss 2.5495 (2.3563)	Entropy 0.90875 (0.91300)	Top-1 acc 62.500 (67.538)	Top-5 acc 82.422 (86.096)	lr 0.00603
Train [81][2580/3239]	Time 0.230 (0.618)	Data Time 0.001 (0.018)	Loss 2.3505 (2.3563)	Entropy 0.90880 (0.91299)	Top-1 acc 68.750 (67.537)	Top-5 acc 86.328 (86.097)	lr 0.00603
Train [81][2590/3239]	Time 0.233 (0.617)	Data Time 0.001 (0.017)	Loss 2.2560 (2.3561)	Entropy 0.90886 (0.91297)	Top-1 acc 71.484 (67.539)	Top-5 acc 88.281 (86.101)	lr 0.00602
Train [81][2600/3239]	Time 0.235 (0.617)	Data Time 0.002 (0.017)	Loss 2.4007 (2.3561)	Entropy 0.90882 (0.91296)	Top-1 acc 66.016 (67.539)	Top-5 acc 84.766 (86.104)	lr 0.00602
Train [81][2610/3239]	Time 0.234 (0.616)	Data Time 0.001 (0.017)	Loss 2.4666 (2.3563)	Entropy 0.90883 (0.91294)	Top-1 acc 64.453 (67.535)	Top-5 acc 84.375 (86.101)	lr 0.00602
Train [81][2620/3239]	Time 0.241 (0.616)	Data Time 0.001 (0.017)	Loss 2.4565 (2.3563)	Entropy 0.90881 (0.91293)	Top-1 acc 61.719 (67.532)	Top-5 acc 86.328 (86.102)	lr 0.00602
Train [81][2630/3239]	Time 0.232 (0.615)	Data Time 0.002 (0.017)	Loss 2.3384 (2.3563)	Entropy 0.90882 (0.91291)	Top-1 acc 68.359 (67.530)	Top-5 acc 85.938 (86.100)	lr 0.00602
Train [81][2640/3239]	Time 0.245 (0.615)	Data Time 0.001 (0.017)	Loss 2.3277 (2.3565)	Entropy 0.90882 (0.91289)	Top-1 acc 69.141 (67.528)	Top-5 acc 83.594 (86.096)	lr 0.00602
Train [81][2650/3239]	Time 0.361 (0.614)	Data Time 0.001 (0.017)	Loss 2.3690 (2.3564)	Entropy 0.90882 (0.91288)	Top-1 acc 66.406 (67.528)	Top-5 acc 87.500 (86.099)	lr 0.00602
Train [81][2660/3239]	Time 0.258 (0.614)	Data Time 0.001 (0.017)	Loss 2.5760 (2.3565)	Entropy 0.90885 (0.91286)	Top-1 acc 62.500 (67.528)	Top-5 acc 80.078 (86.096)	lr 0.00602
Train [81][2670/3239]	Time 0.255 (0.613)	Data Time 0.001 (0.017)	Loss 2.3264 (2.3563)	Entropy 0.90883 (0.91285)	Top-1 acc 66.016 (67.529)	Top-5 acc 88.281 (86.103)	lr 0.00602
Train [81][2680/3239]	Time 0.278 (0.613)	Data Time 0.001 (0.017)	Loss 2.3107 (2.3562)	Entropy 0.90882 (0.91283)	Top-1 acc 65.234 (67.532)	Top-5 acc 89.062 (86.106)	lr 0.00602
Train [81][2690/3239]	Time 0.322 (0.612)	Data Time 0.001 (0.017)	Loss 2.4432 (2.3563)	Entropy 0.90878 (0.91282)	Top-1 acc 67.188 (67.529)	Top-5 acc 85.938 (86.108)	lr 0.00602
Train [81][2700/3239]	Time 0.263 (0.612)	Data Time 0.001 (0.017)	Loss 2.3873 (2.3563)	Entropy 0.90880 (0.91280)	Top-1 acc 65.234 (67.528)	Top-5 acc 87.109 (86.107)	lr 0.00602
Train [81][2710/3239]	Time 0.225 (0.611)	Data Time 0.001 (0.017)	Loss 2.2711 (2.3565)	Entropy 0.90883 (0.91279)	Top-1 acc 69.922 (67.526)	Top-5 acc 88.672 (86.102)	lr 0.00601
Train [81][2720/3239]	Time 0.265 (0.611)	Data Time 0.001 (0.017)	Loss 2.3712 (2.3568)	Entropy 0.90884 (0.91277)	Top-1 acc 69.922 (67.520)	Top-5 acc 84.766 (86.098)	lr 0.00601
Train [81][2730/3239]	Time 0.222 (0.610)	Data Time 0.001 (0.017)	Loss 2.4482 (2.3567)	Entropy 0.90887 (0.91276)	Top-1 acc 64.062 (67.519)	Top-5 acc 82.031 (86.097)	lr 0.00601
Train [81][2740/3239]	Time 0.222 (0.610)	Data Time 0.001 (0.017)	Loss 2.3679 (2.3566)	Entropy 0.90879 (0.91275)	Top-1 acc 67.188 (67.520)	Top-5 acc 86.719 (86.099)	lr 0.00601
Train [81][2750/3239]	Time 0.243 (0.609)	Data Time 0.001 (0.017)	Loss 2.4579 (2.3566)	Entropy 0.90880 (0.91273)	Top-1 acc 64.453 (67.517)	Top-5 acc 87.500 (86.100)	lr 0.00601
Train [81][2760/3239]	Time 0.243 (0.609)	Data Time 0.001 (0.016)	Loss 2.3552 (2.3566)	Entropy 0.90877 (0.91272)	Top-1 acc 67.578 (67.513)	Top-5 acc 84.375 (86.101)	lr 0.00601
Train [81][2770/3239]	Time 0.250 (0.608)	Data Time 0.001 (0.016)	Loss 2.2937 (2.3567)	Entropy 0.90877 (0.91270)	Top-1 acc 67.578 (67.511)	Top-5 acc 87.500 (86.102)	lr 0.00601
Train [81][2780/3239]	Time 0.376 (0.608)	Data Time 0.001 (0.016)	Loss 2.3631 (2.3569)	Entropy 0.90874 (0.91269)	Top-1 acc 68.359 (67.511)	Top-5 acc 86.328 (86.099)	lr 0.00601
Train [81][2790/3239]	Time 0.260 (0.607)	Data Time 0.001 (0.016)	Loss 2.4921 (2.3572)	Entropy 0.90878 (0.91267)	Top-1 acc 62.109 (67.509)	Top-5 acc 85.156 (86.098)	lr 0.00601
Train [81][2800/3239]	Time 0.220 (0.607)	Data Time 0.002 (0.016)	Loss 2.5095 (2.3572)	Entropy 0.90874 (0.91266)	Top-1 acc 64.453 (67.508)	Top-5 acc 81.641 (86.098)	lr 0.00601
Train [81][2810/3239]	Time 0.296 (0.606)	Data Time 0.001 (0.016)	Loss 2.4123 (2.3571)	Entropy 0.90872 (0.91265)	Top-1 acc 66.406 (67.510)	Top-5 acc 84.375 (86.100)	lr 0.00601
Train [81][2820/3239]	Time 0.281 (0.606)	Data Time 0.001 (0.016)	Loss 2.5718 (2.3572)	Entropy 0.90868 (0.91263)	Top-1 acc 63.281 (67.507)	Top-5 acc 78.906 (86.094)	lr 0.00600
Train [81][2830/3239]	Time 0.242 (0.624)	Data Time 0.003 (0.016)	Loss 2.3945 (2.3573)	Entropy 0.90868 (0.91262)	Top-1 acc 66.016 (67.502)	Top-5 acc 86.719 (86.095)	lr 0.00600
Train [81][2840/3239]	Time 0.206 (0.624)	Data Time 0.002 (0.016)	Loss 2.3279 (2.3575)	Entropy 0.90857 (0.91260)	Top-1 acc 68.359 (67.497)	Top-5 acc 85.547 (86.092)	lr 0.00600
Train [81][2850/3239]	Time 0.289 (0.623)	Data Time 0.002 (0.016)	Loss 2.3535 (2.3576)	Entropy 0.90860 (0.91259)	Top-1 acc 65.625 (67.494)	Top-5 acc 87.891 (86.091)	lr 0.00600
Train [81][2860/3239]	Time 0.232 (0.623)	Data Time 0.002 (0.016)	Loss 2.2759 (2.3577)	Entropy 0.90850 (0.91258)	Top-1 acc 69.531 (67.495)	Top-5 acc 87.500 (86.092)	lr 0.00600
Train [81][2870/3239]	Time 0.365 (0.622)	Data Time 0.002 (0.016)	Loss 2.3332 (2.3578)	Entropy 0.90844 (0.91256)	Top-1 acc 67.969 (67.493)	Top-5 acc 86.719 (86.088)	lr 0.00600
Train [81][2880/3239]	Time 0.307 (0.622)	Data Time 0.002 (0.016)	Loss 2.2768 (2.3578)	Entropy 0.90835 (0.91255)	Top-1 acc 71.094 (67.492)	Top-5 acc 85.156 (86.089)	lr 0.00600
Train [81][2890/3239]	Time 0.262 (0.622)	Data Time 0.001 (0.016)	Loss 2.3300 (2.3579)	Entropy 0.90834 (0.91253)	Top-1 acc 66.406 (67.486)	Top-5 acc 87.500 (86.087)	lr 0.00600
Train [81][2900/3239]	Time 0.222 (0.621)	Data Time 0.001 (0.016)	Loss 2.4207 (2.3578)	Entropy 0.90834 (0.91252)	Top-1 acc 70.312 (67.488)	Top-5 acc 85.938 (86.090)	lr 0.00600
Train [81][2910/3239]	Time 0.343 (0.620)	Data Time 0.001 (0.016)	Loss 2.3892 (2.3579)	Entropy 0.90821 (0.91250)	Top-1 acc 63.281 (67.486)	Top-5 acc 83.203 (86.088)	lr 0.00600
Train [81][2920/3239]	Time 0.253 (0.620)	Data Time 0.001 (0.016)	Loss 2.2986 (2.3579)	Entropy 0.90814 (0.91249)	Top-1 acc 67.969 (67.487)	Top-5 acc 86.719 (86.087)	lr 0.00600
Train [81][2930/3239]	Time 0.289 (0.620)	Data Time 0.001 (0.016)	Loss 2.4291 (2.3580)	Entropy 0.90814 (0.91247)	Top-1 acc 67.969 (67.485)	Top-5 acc 82.812 (86.085)	lr 0.00600
Train [81][2940/3239]	Time 0.282 (0.619)	Data Time 0.001 (0.016)	Loss 2.3078 (2.3580)	Entropy 0.90806 (0.91246)	Top-1 acc 71.484 (67.484)	Top-5 acc 87.109 (86.086)	lr 0.00599
Train [81][2950/3239]	Time 0.225 (0.619)	Data Time 0.001 (0.016)	Loss 2.4622 (2.3580)	Entropy 0.90795 (0.91245)	Top-1 acc 65.625 (67.484)	Top-5 acc 83.984 (86.084)	lr 0.00599
Train [81][2960/3239]	Time 0.245 (0.618)	Data Time 0.001 (0.015)	Loss 2.4170 (2.3580)	Entropy 0.90808 (0.91243)	Top-1 acc 67.188 (67.487)	Top-5 acc 83.594 (86.084)	lr 0.00599
Train [81][2970/3239]	Time 0.237 (0.618)	Data Time 0.001 (0.015)	Loss 2.3088 (2.3580)	Entropy 0.90802 (0.91242)	Top-1 acc 71.094 (67.484)	Top-5 acc 87.109 (86.087)	lr 0.00599
Train [81][2980/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.015)	Loss 2.4312 (2.3582)	Entropy 0.90801 (0.91240)	Top-1 acc 63.672 (67.476)	Top-5 acc 83.594 (86.081)	lr 0.00599
Train [81][2990/3239]	Time 0.247 (0.617)	Data Time 0.001 (0.015)	Loss 2.5008 (2.3582)	Entropy 0.90801 (0.91239)	Top-1 acc 60.547 (67.475)	Top-5 acc 84.375 (86.081)	lr 0.00599
Train [81][3000/3239]	Time 0.427 (0.617)	Data Time 0.001 (0.015)	Loss 2.3805 (2.3582)	Entropy 0.90805 (0.91237)	Top-1 acc 67.969 (67.477)	Top-5 acc 86.719 (86.082)	lr 0.00599
Train [81][3010/3239]	Time 0.251 (0.616)	Data Time 0.001 (0.015)	Loss 2.3759 (2.3581)	Entropy 0.90804 (0.91236)	Top-1 acc 66.016 (67.481)	Top-5 acc 82.812 (86.083)	lr 0.00599
Train [81][3020/3239]	Time 0.251 (0.616)	Data Time 0.001 (0.015)	Loss 2.4170 (2.3580)	Entropy 0.90796 (0.91234)	Top-1 acc 63.672 (67.481)	Top-5 acc 83.203 (86.085)	lr 0.00599
Train [81][3030/3239]	Time 0.241 (0.615)	Data Time 0.002 (0.015)	Loss 2.3305 (2.3580)	Entropy 0.90794 (0.91233)	Top-1 acc 67.188 (67.480)	Top-5 acc 87.109 (86.086)	lr 0.00599
Train [81][3040/3239]	Time 0.380 (0.615)	Data Time 0.001 (0.015)	Loss 2.3030 (2.3581)	Entropy 0.90794 (0.91231)	Top-1 acc 67.969 (67.479)	Top-5 acc 89.453 (86.084)	lr 0.00599
Train [81][3050/3239]	Time 0.256 (0.614)	Data Time 0.001 (0.015)	Loss 2.4479 (2.3582)	Entropy 0.90796 (0.91230)	Top-1 acc 66.406 (67.478)	Top-5 acc 83.594 (86.083)	lr 0.00599
Train [81][3060/3239]	Time 0.253 (0.614)	Data Time 0.001 (0.015)	Loss 2.5747 (2.3582)	Entropy 0.90787 (0.91229)	Top-1 acc 61.328 (67.478)	Top-5 acc 83.203 (86.082)	lr 0.00598
Train [81][3070/3239]	Time 0.241 (0.613)	Data Time 0.001 (0.015)	Loss 2.2879 (2.3581)	Entropy 0.90789 (0.91227)	Top-1 acc 67.578 (67.479)	Top-5 acc 88.672 (86.086)	lr 0.00598
Train [81][3080/3239]	Time 0.275 (0.613)	Data Time 0.001 (0.015)	Loss 2.2985 (2.3581)	Entropy 0.90777 (0.91226)	Top-1 acc 68.359 (67.482)	Top-5 acc 87.891 (86.083)	lr 0.00598
Train [81][3090/3239]	Time 0.262 (0.613)	Data Time 0.002 (0.015)	Loss 2.4602 (2.3581)	Entropy 0.90769 (0.91224)	Top-1 acc 64.062 (67.483)	Top-5 acc 83.984 (86.083)	lr 0.00598
Train [81][3100/3239]	Time 0.288 (0.612)	Data Time 0.001 (0.015)	Loss 2.5607 (2.3582)	Entropy 0.90766 (0.91223)	Top-1 acc 58.203 (67.478)	Top-5 acc 82.422 (86.082)	lr 0.00598
Train [81][3110/3239]	Time 0.210 (0.612)	Data Time 0.001 (0.015)	Loss 2.3340 (2.3581)	Entropy 0.90762 (0.91221)	Top-1 acc 67.188 (67.478)	Top-5 acc 87.109 (86.082)	lr 0.00598
Train [81][3120/3239]	Time 0.247 (0.611)	Data Time 0.001 (0.015)	Loss 2.4615 (2.3582)	Entropy 0.90760 (0.91220)	Top-1 acc 64.062 (67.475)	Top-5 acc 83.594 (86.083)	lr 0.00598
Train [81][3130/3239]	Time 0.404 (0.611)	Data Time 0.001 (0.015)	Loss 2.3247 (2.3582)	Entropy 0.90749 (0.91218)	Top-1 acc 68.750 (67.473)	Top-5 acc 86.328 (86.081)	lr 0.00598
Train [81][3140/3239]	Time 0.249 (0.610)	Data Time 0.001 (0.015)	Loss 2.2303 (2.3583)	Entropy 0.90751 (0.91217)	Top-1 acc 67.578 (67.469)	Top-5 acc 91.016 (86.081)	lr 0.00598
Train [81][3150/3239]	Time 0.247 (0.610)	Data Time 0.001 (0.015)	Loss 2.3496 (2.3584)	Entropy 0.90752 (0.91215)	Top-1 acc 67.969 (67.472)	Top-5 acc 86.719 (86.078)	lr 0.00598
Train [81][3160/3239]	Time 0.250 (0.627)	Data Time 0.003 (0.015)	Loss 2.2161 (2.3583)	Entropy 0.90749 (0.91214)	Top-1 acc 73.828 (67.476)	Top-5 acc 87.891 (86.080)	lr 0.00598
Train [81][3170/3239]	Time 0.394 (0.627)	Data Time 0.002 (0.015)	Loss 2.4706 (2.3584)	Entropy 0.90739 (0.91212)	Top-1 acc 65.234 (67.476)	Top-5 acc 83.203 (86.077)	lr 0.00597
Train [81][3180/3239]	Time 0.238 (0.626)	Data Time 0.000 (0.015)	Loss 2.3843 (2.3584)	Entropy 0.90741 (0.91211)	Top-1 acc 69.531 (67.476)	Top-5 acc 86.328 (86.076)	lr 0.00597
Train [81][3190/3239]	Time 0.228 (0.626)	Data Time 0.000 (0.014)	Loss 2.4170 (2.3586)	Entropy 0.90732 (0.91209)	Top-1 acc 64.453 (67.471)	Top-5 acc 86.328 (86.073)	lr 0.00597
Train [81][3200/3239]	Time 0.222 (0.625)	Data Time 0.000 (0.014)	Loss 2.2449 (2.3585)	Entropy 0.90732 (0.91208)	Top-1 acc 71.875 (67.474)	Top-5 acc 89.453 (86.076)	lr 0.00597
Train [81][3210/3239]	Time 0.225 (0.624)	Data Time 0.000 (0.014)	Loss 2.3418 (2.3586)	Entropy 0.90734 (0.91206)	Top-1 acc 66.016 (67.470)	Top-5 acc 87.500 (86.075)	lr 0.00597
Train [81][3220/3239]	Time 0.235 (0.624)	Data Time 0.000 (0.014)	Loss 2.2450 (2.3586)	Entropy 0.90734 (0.91205)	Top-1 acc 70.703 (67.471)	Top-5 acc 89.844 (86.076)	lr 0.00597
Train [81][3230/3239]	Time 0.232 (0.623)	Data Time 0.000 (0.014)	Loss 2.2790 (2.3585)	Entropy 0.90733 (0.91204)	Top-1 acc 71.875 (67.476)	Top-5 acc 85.938 (86.074)	lr 0.00597
Train [81][3239/3239]	Time 2.289 (0.623)	Data Time 0.000 (0.014)	Loss 2.4629 (2.3586)	Entropy 0.90733 (0.91202)	Top-1 acc 71.605 (67.471)	Top-5 acc 81.481 (86.072)	lr 0.00597
==========Valid [81/120]	loss 1.307	top-1 acc 70.104 (70.279)	top-5 acc 88.424	Train top-1 67.471	top-5 86.072	Entropy 0.90733	Latency-None: 0.000ms	Flops: 546.53M
Train [82][0/3239]	Time 41.742 (41.742)	Data Time 38.917 (38.917)	Loss 2.2409 (2.2409)	Entropy 0.90730 (0.90730)	Top-1 acc 67.969 (67.969)	Top-5 acc 88.281 (88.281)	lr 0.00597
Train [82][10/3239]	Time 2.864 (4.374)	Data Time 0.001 (3.573)	Loss 2.3759 (2.3010)	Entropy 0.90730 (0.90730)	Top-1 acc 66.016 (68.075)	Top-5 acc 85.938 (87.251)	lr 0.00597
Train [82][20/3239]	Time 0.333 (2.413)	Data Time 0.003 (1.873)	Loss 2.2334 (2.2931)	Entropy 0.90720 (0.90725)	Top-1 acc 67.578 (68.285)	Top-5 acc 89.453 (87.537)	lr 0.00597
Train [82][30/3239]	Time 0.238 (1.783)	Data Time 0.001 (1.269)	Loss 2.3226 (2.3090)	Entropy 0.90719 (0.90723)	Top-1 acc 67.578 (68.259)	Top-5 acc 85.156 (87.135)	lr 0.00597
Train [82][40/3239]	Time 0.217 (1.462)	Data Time 0.001 (0.960)	Loss 2.4573 (2.3235)	Entropy 0.90727 (0.90724)	Top-1 acc 64.844 (67.978)	Top-5 acc 83.203 (86.766)	lr 0.00597
Train [82][50/3239]	Time 0.241 (1.268)	Data Time 0.001 (0.772)	Loss 2.2965 (2.3327)	Entropy 0.90724 (0.90724)	Top-1 acc 70.703 (67.930)	Top-5 acc 87.109 (86.581)	lr 0.00596
Train [82][60/3239]	Time 0.230 (1.140)	Data Time 0.001 (0.646)	Loss 2.2260 (2.3274)	Entropy 0.90723 (0.90724)	Top-1 acc 69.531 (68.186)	Top-5 acc 87.891 (86.610)	lr 0.00596
Train [82][70/3239]	Time 0.236 (1.047)	Data Time 0.001 (0.555)	Loss 2.3739 (2.3231)	Entropy 0.90720 (0.90724)	Top-1 acc 64.844 (68.310)	Top-5 acc 87.891 (86.675)	lr 0.00596
Train [82][80/3239]	Time 0.220 (0.975)	Data Time 0.001 (0.487)	Loss 2.2800 (2.3217)	Entropy 0.90722 (0.90723)	Top-1 acc 71.875 (68.465)	Top-5 acc 86.328 (86.728)	lr 0.00596
Train [82][90/3239]	Time 0.248 (0.922)	Data Time 0.001 (0.433)	Loss 2.4108 (2.3256)	Entropy 0.90719 (0.90723)	Top-1 acc 64.844 (68.389)	Top-5 acc 84.375 (86.689)	lr 0.00596
Train [82][100/3239]	Time 0.258 (0.879)	Data Time 0.001 (0.391)	Loss 2.4573 (2.3281)	Entropy 0.90725 (0.90723)	Top-1 acc 66.797 (68.390)	Top-5 acc 85.938 (86.665)	lr 0.00596
Train [82][110/3239]	Time 0.330 (0.845)	Data Time 0.002 (0.356)	Loss 2.3491 (2.3268)	Entropy 0.90722 (0.90723)	Top-1 acc 70.312 (68.468)	Top-5 acc 85.156 (86.596)	lr 0.00596
Train [82][120/3239]	Time 2.494 (0.813)	Data Time 0.001 (0.326)	Loss 2.4740 (2.3248)	Entropy 0.90722 (0.90723)	Top-1 acc 63.672 (68.456)	Top-5 acc 83.203 (86.628)	lr 0.00596
Train [82][130/3239]	Time 0.246 (0.769)	Data Time 0.002 (0.302)	Loss 2.3387 (2.3252)	Entropy 0.90719 (0.90723)	Top-1 acc 69.922 (68.511)	Top-5 acc 85.547 (86.611)	lr 0.00596
Train [82][140/3239]	Time 0.251 (0.748)	Data Time 0.001 (0.280)	Loss 2.3458 (2.3239)	Entropy 0.90720 (0.90722)	Top-1 acc 66.797 (68.578)	Top-5 acc 85.547 (86.602)	lr 0.00596
Train [82][150/3239]	Time 0.211 (0.731)	Data Time 0.001 (0.262)	Loss 2.5043 (2.3262)	Entropy 0.90715 (0.90722)	Top-1 acc 63.281 (68.520)	Top-5 acc 80.469 (86.543)	lr 0.00596
Train [82][160/3239]	Time 0.228 (0.716)	Data Time 0.001 (0.246)	Loss 2.2310 (2.3299)	Entropy 0.90710 (0.90721)	Top-1 acc 70.312 (68.410)	Top-5 acc 89.062 (86.471)	lr 0.00595
Train [82][170/3239]	Time 0.235 (0.702)	Data Time 0.001 (0.231)	Loss 2.4842 (2.3301)	Entropy 0.90708 (0.90721)	Top-1 acc 65.625 (68.412)	Top-5 acc 82.422 (86.472)	lr 0.00595
Train [82][180/3239]	Time 0.217 (0.689)	Data Time 0.001 (0.219)	Loss 2.5784 (2.3336)	Entropy 0.90707 (0.90720)	Top-1 acc 62.891 (68.338)	Top-5 acc 81.641 (86.406)	lr 0.00595
Train [82][190/3239]	Time 0.221 (0.677)	Data Time 0.001 (0.207)	Loss 2.2525 (2.3323)	Entropy 0.90705 (0.90719)	Top-1 acc 71.875 (68.384)	Top-5 acc 87.500 (86.424)	lr 0.00595
Train [82][200/3239]	Time 0.321 (0.668)	Data Time 0.002 (0.197)	Loss 2.4057 (2.3323)	Entropy 0.90704 (0.90718)	Top-1 acc 63.672 (68.352)	Top-5 acc 85.938 (86.423)	lr 0.00595
Train [82][210/3239]	Time 0.229 (0.658)	Data Time 0.002 (0.188)	Loss 2.4020 (2.3322)	Entropy 0.90709 (0.90718)	Top-1 acc 70.312 (68.378)	Top-5 acc 85.547 (86.423)	lr 0.00595
Train [82][220/3239]	Time 0.219 (0.650)	Data Time 0.001 (0.179)	Loss 2.3712 (2.3330)	Entropy 0.90703 (0.90717)	Top-1 acc 66.406 (68.336)	Top-5 acc 86.719 (86.429)	lr 0.00595
Train [82][230/3239]	Time 2.546 (0.642)	Data Time 0.001 (0.172)	Loss 2.3178 (2.3344)	Entropy 0.90703 (0.90717)	Top-1 acc 67.578 (68.248)	Top-5 acc 87.109 (86.406)	lr 0.00595
Train [82][240/3239]	Time 0.250 (0.626)	Data Time 0.001 (0.165)	Loss 2.4336 (2.3371)	Entropy 0.90701 (0.90716)	Top-1 acc 64.844 (68.165)	Top-5 acc 86.328 (86.396)	lr 0.00595
Train [82][250/3239]	Time 0.233 (0.620)	Data Time 0.001 (0.158)	Loss 2.2648 (2.3351)	Entropy 0.90703 (0.90716)	Top-1 acc 70.703 (68.207)	Top-5 acc 88.281 (86.429)	lr 0.00595
Train [82][260/3239]	Time 0.240 (0.614)	Data Time 0.001 (0.152)	Loss 2.2404 (2.3350)	Entropy 0.90694 (0.90715)	Top-1 acc 69.141 (68.211)	Top-5 acc 87.891 (86.430)	lr 0.00595
Train [82][270/3239]	Time 0.236 (0.609)	Data Time 0.001 (0.147)	Loss 2.3095 (2.3371)	Entropy 0.90687 (0.90714)	Top-1 acc 67.188 (68.166)	Top-5 acc 86.328 (86.387)	lr 0.00595
Train [82][280/3239]	Time 0.260 (0.806)	Data Time 0.004 (0.141)	Loss 2.1917 (2.3357)	Entropy 0.90689 (0.90713)	Top-1 acc 71.094 (68.213)	Top-5 acc 89.062 (86.435)	lr 0.00594
Train [82][290/3239]	Time 0.228 (0.795)	Data Time 0.002 (0.137)	Loss 2.2956 (2.3361)	Entropy 0.90687 (0.90712)	Top-1 acc 67.578 (68.213)	Top-5 acc 87.500 (86.425)	lr 0.00594
Train [82][300/3239]	Time 0.229 (0.785)	Data Time 0.001 (0.132)	Loss 2.2915 (2.3353)	Entropy 0.90681 (0.90711)	Top-1 acc 70.703 (68.259)	Top-5 acc 87.891 (86.440)	lr 0.00594
Train [82][310/3239]	Time 0.236 (0.775)	Data Time 0.001 (0.128)	Loss 2.2817 (2.3349)	Entropy 0.90668 (0.90710)	Top-1 acc 71.094 (68.270)	Top-5 acc 85.938 (86.449)	lr 0.00594
Train [82][320/3239]	Time 0.225 (0.766)	Data Time 0.001 (0.124)	Loss 2.5014 (2.3357)	Entropy 0.90664 (0.90709)	Top-1 acc 63.672 (68.245)	Top-5 acc 82.812 (86.439)	lr 0.00594
Train [82][330/3239]	Time 0.221 (0.757)	Data Time 0.001 (0.120)	Loss 2.2377 (2.3357)	Entropy 0.90667 (0.90707)	Top-1 acc 71.875 (68.241)	Top-5 acc 89.453 (86.428)	lr 0.00594
Train [82][340/3239]	Time 2.614 (0.750)	Data Time 0.002 (0.117)	Loss 2.3226 (2.3417)	Entropy 0.90667 (0.90706)	Top-1 acc 71.094 (68.123)	Top-5 acc 87.500 (86.326)	lr 0.00594
Train [82][350/3239]	Time 0.237 (0.735)	Data Time 0.001 (0.114)	Loss 2.3633 (2.3404)	Entropy 0.90660 (0.90705)	Top-1 acc 66.016 (68.149)	Top-5 acc 85.156 (86.336)	lr 0.00594
Train [82][360/3239]	Time 0.237 (0.728)	Data Time 0.001 (0.110)	Loss 2.4530 (2.3415)	Entropy 0.90662 (0.90704)	Top-1 acc 62.891 (68.108)	Top-5 acc 84.766 (86.309)	lr 0.00594
Train [82][370/3239]	Time 0.327 (0.722)	Data Time 0.001 (0.108)	Loss 2.5169 (2.3431)	Entropy 0.90640 (0.90702)	Top-1 acc 62.109 (68.065)	Top-5 acc 83.984 (86.289)	lr 0.00594
Train [82][380/3239]	Time 0.232 (0.715)	Data Time 0.001 (0.105)	Loss 2.2342 (2.3428)	Entropy 0.90638 (0.90700)	Top-1 acc 71.484 (68.059)	Top-5 acc 87.891 (86.291)	lr 0.00594
Train [82][390/3239]	Time 0.223 (0.709)	Data Time 0.001 (0.102)	Loss 2.4269 (2.3424)	Entropy 0.90635 (0.90699)	Top-1 acc 65.234 (68.053)	Top-5 acc 84.766 (86.314)	lr 0.00594
Train [82][400/3239]	Time 0.242 (0.703)	Data Time 0.002 (0.100)	Loss 2.3430 (2.3428)	Entropy 0.90625 (0.90697)	Top-1 acc 69.141 (68.044)	Top-5 acc 85.156 (86.307)	lr 0.00593
Train [82][410/3239]	Time 0.221 (0.697)	Data Time 0.001 (0.097)	Loss 2.4163 (2.3429)	Entropy 0.90629 (0.90695)	Top-1 acc 67.969 (68.044)	Top-5 acc 84.766 (86.281)	lr 0.00593
Train [82][420/3239]	Time 0.227 (0.692)	Data Time 0.001 (0.095)	Loss 2.2190 (2.3425)	Entropy 0.90631 (0.90694)	Top-1 acc 73.438 (68.048)	Top-5 acc 90.234 (86.299)	lr 0.00593
Train [82][430/3239]	Time 0.224 (0.687)	Data Time 0.001 (0.093)	Loss 2.2765 (2.3417)	Entropy 0.90638 (0.90692)	Top-1 acc 69.141 (68.045)	Top-5 acc 86.328 (86.310)	lr 0.00593
Train [82][440/3239]	Time 0.323 (0.682)	Data Time 0.002 (0.091)	Loss 2.2757 (2.3412)	Entropy 0.90635 (0.90691)	Top-1 acc 69.141 (68.048)	Top-5 acc 87.109 (86.326)	lr 0.00593
Train [82][450/3239]	Time 2.642 (0.678)	Data Time 0.001 (0.089)	Loss 2.3024 (2.3414)	Entropy 0.90635 (0.90690)	Top-1 acc 66.016 (68.003)	Top-5 acc 86.719 (86.328)	lr 0.00593
Train [82][460/3239]	Time 0.227 (0.668)	Data Time 0.001 (0.087)	Loss 2.4189 (2.3431)	Entropy 0.90634 (0.90689)	Top-1 acc 66.406 (67.942)	Top-5 acc 85.938 (86.300)	lr 0.00593
Train [82][470/3239]	Time 0.226 (0.664)	Data Time 0.001 (0.085)	Loss 2.2384 (2.3428)	Entropy 0.90633 (0.90687)	Top-1 acc 66.016 (67.943)	Top-5 acc 88.672 (86.313)	lr 0.00593
Train [82][480/3239]	Time 0.234 (0.660)	Data Time 0.001 (0.083)	Loss 2.3963 (2.3434)	Entropy 0.90632 (0.90686)	Top-1 acc 66.016 (67.920)	Top-5 acc 85.938 (86.306)	lr 0.00593
Train [82][490/3239]	Time 0.226 (0.657)	Data Time 0.001 (0.082)	Loss 2.2436 (2.3434)	Entropy 0.90623 (0.90685)	Top-1 acc 71.875 (67.949)	Top-5 acc 86.719 (86.303)	lr 0.00593
Train [82][500/3239]	Time 0.307 (0.653)	Data Time 0.001 (0.080)	Loss 2.2575 (2.3443)	Entropy 0.90631 (0.90684)	Top-1 acc 69.531 (67.924)	Top-5 acc 87.109 (86.284)	lr 0.00593
Train [82][510/3239]	Time 0.233 (0.650)	Data Time 0.001 (0.078)	Loss 2.3120 (2.3445)	Entropy 0.90627 (0.90683)	Top-1 acc 66.797 (67.900)	Top-5 acc 89.844 (86.295)	lr 0.00592
Train [82][520/3239]	Time 0.223 (0.647)	Data Time 0.001 (0.077)	Loss 2.2466 (2.3437)	Entropy 0.90624 (0.90682)	Top-1 acc 71.094 (67.926)	Top-5 acc 89.062 (86.321)	lr 0.00592
Train [82][530/3239]	Time 0.206 (0.643)	Data Time 0.001 (0.076)	Loss 2.4104 (2.3431)	Entropy 0.90625 (0.90681)	Top-1 acc 67.188 (67.943)	Top-5 acc 87.500 (86.342)	lr 0.00592
Train [82][540/3239]	Time 0.232 (0.640)	Data Time 0.001 (0.074)	Loss 2.2399 (2.3434)	Entropy 0.90617 (0.90680)	Top-1 acc 70.312 (67.938)	Top-5 acc 86.719 (86.332)	lr 0.00592
Train [82][550/3239]	Time 0.236 (0.637)	Data Time 0.001 (0.073)	Loss 2.3832 (2.3429)	Entropy 0.90615 (0.90679)	Top-1 acc 67.969 (67.954)	Top-5 acc 85.547 (86.349)	lr 0.00592
Train [82][560/3239]	Time 2.533 (0.635)	Data Time 0.001 (0.072)	Loss 2.5376 (2.3430)	Entropy 0.90615 (0.90677)	Top-1 acc 60.938 (67.949)	Top-5 acc 82.812 (86.339)	lr 0.00592
Train [82][570/3239]	Time 0.311 (0.628)	Data Time 0.001 (0.070)	Loss 2.3042 (2.3433)	Entropy 0.90611 (0.90676)	Top-1 acc 70.703 (67.968)	Top-5 acc 87.500 (86.320)	lr 0.00592
Train [82][580/3239]	Time 0.261 (0.625)	Data Time 0.001 (0.069)	Loss 2.4797 (2.3426)	Entropy 0.90607 (0.90675)	Top-1 acc 64.062 (67.988)	Top-5 acc 81.641 (86.340)	lr 0.00592
Train [82][590/3239]	Time 0.311 (0.623)	Data Time 0.001 (0.068)	Loss 2.2348 (2.3410)	Entropy 0.90599 (0.90674)	Top-1 acc 69.922 (68.022)	Top-5 acc 89.844 (86.375)	lr 0.00592
Train [82][600/3239]	Time 0.223 (0.620)	Data Time 0.001 (0.067)	Loss 2.4477 (2.3418)	Entropy 0.90599 (0.90673)	Top-1 acc 69.531 (68.010)	Top-5 acc 83.594 (86.353)	lr 0.00592
Train [82][610/3239]	Time 0.225 (0.618)	Data Time 0.001 (0.066)	Loss 2.4685 (2.3426)	Entropy 0.90597 (0.90671)	Top-1 acc 66.016 (67.988)	Top-5 acc 84.375 (86.335)	lr 0.00592
Train [82][620/3239]	Time 0.229 (0.615)	Data Time 0.001 (0.065)	Loss 2.3042 (2.3432)	Entropy 0.90594 (0.90670)	Top-1 acc 68.359 (67.976)	Top-5 acc 85.547 (86.315)	lr 0.00592
Train [82][630/3239]	Time 0.224 (0.613)	Data Time 0.001 (0.064)	Loss 2.4707 (2.3430)	Entropy 0.90602 (0.90669)	Top-1 acc 58.594 (67.962)	Top-5 acc 82.422 (86.309)	lr 0.00591
Train [82][640/3239]	Time 0.245 (0.696)	Data Time 0.002 (0.063)	Loss 2.1626 (2.3430)	Entropy 0.90602 (0.90668)	Top-1 acc 72.656 (67.961)	Top-5 acc 89.062 (86.316)	lr 0.00591
Train [82][650/3239]	Time 0.231 (0.693)	Data Time 0.002 (0.062)	Loss 2.4356 (2.3432)	Entropy 0.90594 (0.90667)	Top-1 acc 68.750 (67.968)	Top-5 acc 83.594 (86.306)	lr 0.00591
Train [82][660/3239]	Time 0.242 (0.689)	Data Time 0.002 (0.061)	Loss 2.3137 (2.3433)	Entropy 0.90589 (0.90666)	Top-1 acc 66.797 (67.958)	Top-5 acc 86.328 (86.307)	lr 0.00591
Train [82][670/3239]	Time 2.606 (0.686)	Data Time 0.002 (0.060)	Loss 2.2405 (2.3425)	Entropy 0.90589 (0.90665)	Top-1 acc 68.750 (67.965)	Top-5 acc 87.891 (86.326)	lr 0.00591
Train [82][680/3239]	Time 0.230 (0.680)	Data Time 0.001 (0.059)	Loss 2.2824 (2.3426)	Entropy 0.90589 (0.90664)	Top-1 acc 69.531 (67.960)	Top-5 acc 86.719 (86.322)	lr 0.00591
Train [82][690/3239]	Time 0.230 (0.677)	Data Time 0.001 (0.059)	Loss 2.4187 (2.3430)	Entropy 0.90592 (0.90662)	Top-1 acc 65.625 (67.955)	Top-5 acc 84.375 (86.319)	lr 0.00591
Train [82][700/3239]	Time 0.236 (0.674)	Data Time 0.001 (0.058)	Loss 2.2704 (2.3430)	Entropy 0.90596 (0.90662)	Top-1 acc 68.359 (67.944)	Top-5 acc 88.281 (86.318)	lr 0.00591
Train [82][710/3239]	Time 0.230 (0.671)	Data Time 0.001 (0.057)	Loss 2.5178 (2.3434)	Entropy 0.90594 (0.90661)	Top-1 acc 64.062 (67.941)	Top-5 acc 83.984 (86.320)	lr 0.00591
Train [82][720/3239]	Time 0.235 (0.669)	Data Time 0.001 (0.056)	Loss 2.2797 (2.3439)	Entropy 0.90592 (0.90660)	Top-1 acc 71.094 (67.925)	Top-5 acc 87.109 (86.312)	lr 0.00591
Train [82][730/3239]	Time 0.232 (0.666)	Data Time 0.001 (0.055)	Loss 2.3829 (2.3433)	Entropy 0.90590 (0.90659)	Top-1 acc 71.484 (67.946)	Top-5 acc 83.594 (86.319)	lr 0.00591
Train [82][740/3239]	Time 0.221 (0.663)	Data Time 0.001 (0.055)	Loss 2.3480 (2.3436)	Entropy 0.90572 (0.90658)	Top-1 acc 66.797 (67.936)	Top-5 acc 86.719 (86.311)	lr 0.00591
Train [82][750/3239]	Time 0.229 (0.661)	Data Time 0.001 (0.054)	Loss 2.2188 (2.3433)	Entropy 0.90571 (0.90657)	Top-1 acc 72.656 (67.941)	Top-5 acc 88.672 (86.322)	lr 0.00590
Train [82][760/3239]	Time 0.352 (0.658)	Data Time 0.001 (0.053)	Loss 2.2175 (2.3432)	Entropy 0.90569 (0.90655)	Top-1 acc 68.750 (67.936)	Top-5 acc 90.625 (86.328)	lr 0.00590
Train [82][770/3239]	Time 0.267 (0.656)	Data Time 0.001 (0.053)	Loss 2.2352 (2.3426)	Entropy 0.90571 (0.90654)	Top-1 acc 69.531 (67.940)	Top-5 acc 85.938 (86.333)	lr 0.00590
Train [82][780/3239]	Time 2.479 (0.654)	Data Time 0.001 (0.052)	Loss 2.4349 (2.3429)	Entropy 0.90571 (0.90653)	Top-1 acc 66.406 (67.933)	Top-5 acc 84.375 (86.326)	lr 0.00590
Train [82][790/3239]	Time 0.228 (0.648)	Data Time 0.001 (0.051)	Loss 2.1898 (2.3426)	Entropy 0.90565 (0.90652)	Top-1 acc 72.656 (67.934)	Top-5 acc 88.672 (86.333)	lr 0.00590
Train [82][800/3239]	Time 0.238 (0.646)	Data Time 0.001 (0.051)	Loss 2.4965 (2.3432)	Entropy 0.90563 (0.90651)	Top-1 acc 63.672 (67.912)	Top-5 acc 81.641 (86.327)	lr 0.00590
Train [82][810/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.050)	Loss 2.3119 (2.3432)	Entropy 0.90560 (0.90650)	Top-1 acc 71.875 (67.923)	Top-5 acc 87.109 (86.326)	lr 0.00590
Train [82][820/3239]	Time 0.248 (0.642)	Data Time 0.001 (0.049)	Loss 2.3253 (2.3433)	Entropy 0.90559 (0.90649)	Top-1 acc 69.531 (67.925)	Top-5 acc 87.500 (86.327)	lr 0.00590
Train [82][830/3239]	Time 0.239 (0.640)	Data Time 0.001 (0.049)	Loss 2.2381 (2.3433)	Entropy 0.90550 (0.90648)	Top-1 acc 70.703 (67.922)	Top-5 acc 85.547 (86.329)	lr 0.00590
Train [82][840/3239]	Time 0.225 (0.638)	Data Time 0.001 (0.048)	Loss 2.2991 (2.3429)	Entropy 0.90547 (0.90646)	Top-1 acc 67.969 (67.927)	Top-5 acc 90.625 (86.343)	lr 0.00590
Train [82][850/3239]	Time 0.309 (0.636)	Data Time 0.001 (0.048)	Loss 2.0310 (2.3422)	Entropy 0.90549 (0.90645)	Top-1 acc 76.562 (67.947)	Top-5 acc 91.406 (86.357)	lr 0.00590
Train [82][860/3239]	Time 0.225 (0.634)	Data Time 0.001 (0.047)	Loss 2.2632 (2.3420)	Entropy 0.90542 (0.90644)	Top-1 acc 67.969 (67.947)	Top-5 acc 85.938 (86.358)	lr 0.00589
Train [82][870/3239]	Time 0.246 (0.632)	Data Time 0.001 (0.047)	Loss 2.5938 (2.3428)	Entropy 0.90534 (0.90643)	Top-1 acc 62.891 (67.928)	Top-5 acc 83.594 (86.351)	lr 0.00589
Train [82][880/3239]	Time 0.270 (0.630)	Data Time 0.001 (0.046)	Loss 2.3082 (2.3419)	Entropy 0.90539 (0.90642)	Top-1 acc 71.094 (67.963)	Top-5 acc 86.328 (86.367)	lr 0.00589
Train [82][890/3239]	Time 2.605 (0.629)	Data Time 0.001 (0.046)	Loss 2.4553 (2.3422)	Entropy 0.90539 (0.90641)	Top-1 acc 64.844 (67.947)	Top-5 acc 81.250 (86.356)	lr 0.00589
Train [82][900/3239]	Time 0.259 (0.624)	Data Time 0.002 (0.045)	Loss 2.3086 (2.3420)	Entropy 0.90536 (0.90639)	Top-1 acc 70.703 (67.944)	Top-5 acc 87.891 (86.361)	lr 0.00589
Train [82][910/3239]	Time 0.238 (0.623)	Data Time 0.001 (0.045)	Loss 2.3672 (2.3415)	Entropy 0.90536 (0.90638)	Top-1 acc 64.844 (67.952)	Top-5 acc 86.719 (86.370)	lr 0.00589
Train [82][920/3239]	Time 0.239 (0.621)	Data Time 0.001 (0.044)	Loss 2.3750 (2.3414)	Entropy 0.90535 (0.90637)	Top-1 acc 66.406 (67.968)	Top-5 acc 87.891 (86.375)	lr 0.00589
Train [82][930/3239]	Time 0.216 (0.619)	Data Time 0.002 (0.044)	Loss 2.3788 (2.3413)	Entropy 0.90527 (0.90636)	Top-1 acc 69.141 (67.959)	Top-5 acc 83.594 (86.381)	lr 0.00589
Train [82][940/3239]	Time 0.220 (0.618)	Data Time 0.001 (0.043)	Loss 2.3467 (2.3413)	Entropy 0.90534 (0.90635)	Top-1 acc 66.016 (67.958)	Top-5 acc 84.766 (86.381)	lr 0.00589
Train [82][950/3239]	Time 0.221 (0.616)	Data Time 0.001 (0.043)	Loss 2.3031 (2.3416)	Entropy 0.90535 (0.90634)	Top-1 acc 66.406 (67.946)	Top-5 acc 87.109 (86.377)	lr 0.00589
Train [82][960/3239]	Time 0.237 (0.615)	Data Time 0.001 (0.043)	Loss 2.3184 (2.3419)	Entropy 0.90522 (0.90633)	Top-1 acc 69.141 (67.948)	Top-5 acc 85.547 (86.368)	lr 0.00589
Train [82][970/3239]	Time 0.228 (0.613)	Data Time 0.001 (0.042)	Loss 2.1821 (2.3413)	Entropy 0.90511 (0.90632)	Top-1 acc 72.266 (67.967)	Top-5 acc 90.234 (86.384)	lr 0.00589
Train [82][980/3239]	Time 0.229 (0.612)	Data Time 0.001 (0.042)	Loss 2.2297 (2.3407)	Entropy 0.90509 (0.90630)	Top-1 acc 70.312 (67.980)	Top-5 acc 86.719 (86.394)	lr 0.00588
Train [82][990/3239]	Time 0.265 (0.611)	Data Time 0.002 (0.041)	Loss 2.3399 (2.3410)	Entropy 0.90504 (0.90629)	Top-1 acc 65.625 (67.981)	Top-5 acc 85.156 (86.381)	lr 0.00588
Train [82][1000/3239]	Time 57.777 (0.664)	Data Time 0.001 (0.041)	Loss 2.4557 (2.3411)	Entropy 0.90504 (0.90628)	Top-1 acc 64.062 (67.972)	Top-5 acc 85.156 (86.381)	lr 0.00588
Train [82][1010/3239]	Time 0.222 (0.661)	Data Time 0.002 (0.040)	Loss 2.2632 (2.3411)	Entropy 0.90505 (0.90627)	Top-1 acc 70.703 (67.975)	Top-5 acc 87.109 (86.383)	lr 0.00588
Train [82][1020/3239]	Time 0.367 (0.659)	Data Time 0.002 (0.040)	Loss 2.3393 (2.3407)	Entropy 0.90507 (0.90625)	Top-1 acc 67.578 (67.983)	Top-5 acc 86.719 (86.390)	lr 0.00588
Train [82][1030/3239]	Time 0.247 (0.657)	Data Time 0.001 (0.040)	Loss 2.4522 (2.3408)	Entropy 0.90505 (0.90624)	Top-1 acc 68.359 (67.982)	Top-5 acc 84.375 (86.391)	lr 0.00588
Train [82][1040/3239]	Time 0.228 (0.656)	Data Time 0.001 (0.039)	Loss 2.4029 (2.3408)	Entropy 0.90500 (0.90623)	Top-1 acc 67.578 (67.979)	Top-5 acc 87.109 (86.389)	lr 0.00588
Train [82][1050/3239]	Time 0.236 (0.654)	Data Time 0.001 (0.039)	Loss 2.2197 (2.3405)	Entropy 0.90497 (0.90622)	Top-1 acc 71.484 (67.978)	Top-5 acc 87.500 (86.395)	lr 0.00588
Train [82][1060/3239]	Time 0.231 (0.652)	Data Time 0.001 (0.039)	Loss 2.2771 (2.3400)	Entropy 0.90496 (0.90621)	Top-1 acc 71.094 (67.999)	Top-5 acc 85.156 (86.404)	lr 0.00588
Train [82][1070/3239]	Time 0.235 (0.651)	Data Time 0.001 (0.038)	Loss 2.4840 (2.3400)	Entropy 0.90500 (0.90620)	Top-1 acc 62.891 (68.001)	Top-5 acc 85.547 (86.413)	lr 0.00588
Train [82][1080/3239]	Time 0.238 (0.649)	Data Time 0.001 (0.038)	Loss 2.2639 (2.3399)	Entropy 0.90498 (0.90618)	Top-1 acc 73.047 (68.014)	Top-5 acc 88.281 (86.416)	lr 0.00588
Train [82][1090/3239]	Time 0.253 (0.648)	Data Time 0.001 (0.038)	Loss 2.3241 (2.3399)	Entropy 0.90499 (0.90617)	Top-1 acc 71.484 (68.016)	Top-5 acc 86.328 (86.410)	lr 0.00588
Train [82][1100/3239]	Time 0.258 (0.646)	Data Time 0.002 (0.037)	Loss 2.3463 (2.3399)	Entropy 0.90489 (0.90616)	Top-1 acc 64.453 (68.010)	Top-5 acc 84.766 (86.409)	lr 0.00587
Train [82][1110/3239]	Time 2.679 (0.645)	Data Time 0.002 (0.037)	Loss 2.3512 (2.3399)	Entropy 0.90489 (0.90615)	Top-1 acc 66.797 (68.002)	Top-5 acc 87.109 (86.414)	lr 0.00587
Train [82][1120/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.037)	Loss 2.1822 (2.3395)	Entropy 0.90488 (0.90614)	Top-1 acc 73.047 (68.014)	Top-5 acc 87.109 (86.418)	lr 0.00587
Train [82][1130/3239]	Time 0.246 (0.640)	Data Time 0.001 (0.036)	Loss 2.3354 (2.3399)	Entropy 0.90486 (0.90613)	Top-1 acc 64.453 (68.004)	Top-5 acc 87.109 (86.413)	lr 0.00587
Train [82][1140/3239]	Time 0.234 (0.638)	Data Time 0.001 (0.036)	Loss 2.3721 (2.3401)	Entropy 0.90489 (0.90612)	Top-1 acc 65.234 (67.996)	Top-5 acc 85.938 (86.416)	lr 0.00587
Train [82][1150/3239]	Time 0.285 (0.637)	Data Time 0.002 (0.036)	Loss 2.3897 (2.3398)	Entropy 0.90485 (0.90611)	Top-1 acc 66.797 (67.998)	Top-5 acc 87.109 (86.418)	lr 0.00587
Train [82][1160/3239]	Time 0.237 (0.636)	Data Time 0.001 (0.035)	Loss 2.4777 (2.3403)	Entropy 0.90491 (0.90610)	Top-1 acc 66.797 (67.990)	Top-5 acc 83.984 (86.409)	lr 0.00587
Train [82][1170/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.035)	Loss 2.2390 (2.3407)	Entropy 0.90486 (0.90609)	Top-1 acc 70.312 (67.979)	Top-5 acc 89.062 (86.407)	lr 0.00587
Train [82][1180/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.035)	Loss 2.1794 (2.3407)	Entropy 0.90479 (0.90608)	Top-1 acc 71.484 (67.981)	Top-5 acc 89.062 (86.405)	lr 0.00587
Train [82][1190/3239]	Time 0.226 (0.632)	Data Time 0.002 (0.035)	Loss 2.2824 (2.3405)	Entropy 0.90477 (0.90606)	Top-1 acc 68.750 (67.983)	Top-5 acc 87.891 (86.410)	lr 0.00587
Train [82][1200/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.034)	Loss 2.2822 (2.3401)	Entropy 0.90469 (0.90605)	Top-1 acc 71.875 (67.999)	Top-5 acc 87.500 (86.418)	lr 0.00587
Train [82][1210/3239]	Time 0.221 (0.630)	Data Time 0.001 (0.034)	Loss 2.5513 (2.3400)	Entropy 0.90475 (0.90604)	Top-1 acc 59.766 (68.000)	Top-5 acc 82.812 (86.420)	lr 0.00586
Train [82][1220/3239]	Time 2.537 (0.629)	Data Time 0.001 (0.034)	Loss 2.3968 (2.3404)	Entropy 0.90475 (0.90603)	Top-1 acc 65.234 (67.989)	Top-5 acc 87.891 (86.414)	lr 0.00586
Train [82][1230/3239]	Time 0.255 (0.625)	Data Time 0.001 (0.034)	Loss 2.3386 (2.3407)	Entropy 0.90473 (0.90602)	Top-1 acc 70.703 (67.976)	Top-5 acc 87.109 (86.410)	lr 0.00586
Train [82][1240/3239]	Time 0.345 (0.624)	Data Time 0.001 (0.033)	Loss 2.3251 (2.3408)	Entropy 0.90463 (0.90601)	Top-1 acc 69.531 (67.975)	Top-5 acc 87.891 (86.404)	lr 0.00586
Train [82][1250/3239]	Time 0.254 (0.623)	Data Time 0.002 (0.033)	Loss 2.4391 (2.3411)	Entropy 0.90458 (0.90600)	Top-1 acc 66.797 (67.970)	Top-5 acc 84.766 (86.394)	lr 0.00586
Train [82][1260/3239]	Time 0.293 (0.622)	Data Time 0.001 (0.033)	Loss 2.3966 (2.3410)	Entropy 0.90455 (0.90599)	Top-1 acc 65.234 (67.981)	Top-5 acc 83.984 (86.394)	lr 0.00586
Train [82][1270/3239]	Time 0.235 (0.621)	Data Time 0.001 (0.033)	Loss 2.2592 (2.3409)	Entropy 0.90459 (0.90598)	Top-1 acc 67.188 (67.976)	Top-5 acc 87.891 (86.396)	lr 0.00586
Train [82][1280/3239]	Time 0.344 (0.620)	Data Time 0.001 (0.032)	Loss 2.3637 (2.3414)	Entropy 0.90461 (0.90597)	Top-1 acc 64.844 (67.962)	Top-5 acc 87.500 (86.391)	lr 0.00586
Train [82][1290/3239]	Time 0.228 (0.619)	Data Time 0.001 (0.032)	Loss 2.3261 (2.3411)	Entropy 0.90457 (0.90596)	Top-1 acc 68.750 (67.968)	Top-5 acc 85.156 (86.401)	lr 0.00586
Train [82][1300/3239]	Time 0.216 (0.618)	Data Time 0.001 (0.032)	Loss 2.3201 (2.3413)	Entropy 0.90458 (0.90594)	Top-1 acc 68.750 (67.958)	Top-5 acc 85.938 (86.396)	lr 0.00586
Train [82][1310/3239]	Time 0.237 (0.617)	Data Time 0.001 (0.032)	Loss 2.3982 (2.3417)	Entropy 0.90445 (0.90593)	Top-1 acc 65.625 (67.952)	Top-5 acc 85.938 (86.387)	lr 0.00586
Train [82][1320/3239]	Time 0.239 (0.616)	Data Time 0.001 (0.031)	Loss 2.2513 (2.3417)	Entropy 0.90449 (0.90592)	Top-1 acc 71.875 (67.950)	Top-5 acc 88.281 (86.385)	lr 0.00586
Train [82][1330/3239]	Time 2.491 (0.614)	Data Time 0.001 (0.031)	Loss 2.2288 (2.3419)	Entropy 0.90449 (0.90591)	Top-1 acc 70.703 (67.940)	Top-5 acc 88.672 (86.383)	lr 0.00585
Train [82][1340/3239]	Time 0.209 (0.612)	Data Time 0.001 (0.031)	Loss 2.4888 (2.3421)	Entropy 0.90445 (0.90590)	Top-1 acc 62.891 (67.928)	Top-5 acc 85.156 (86.381)	lr 0.00585
Train [82][1350/3239]	Time 0.237 (0.611)	Data Time 0.001 (0.031)	Loss 2.3848 (2.3420)	Entropy 0.90442 (0.90589)	Top-1 acc 69.141 (67.931)	Top-5 acc 85.156 (86.387)	lr 0.00585
Train [82][1360/3239]	Time 0.228 (0.610)	Data Time 0.001 (0.030)	Loss 2.2613 (2.3420)	Entropy 0.90441 (0.90588)	Top-1 acc 67.188 (67.929)	Top-5 acc 88.281 (86.389)	lr 0.00585
Train [82][1370/3239]	Time 0.229 (0.649)	Data Time 0.002 (0.030)	Loss 2.3506 (2.3420)	Entropy 0.90438 (0.90587)	Top-1 acc 68.359 (67.933)	Top-5 acc 87.109 (86.388)	lr 0.00585
Train [82][1380/3239]	Time 0.226 (0.648)	Data Time 0.002 (0.030)	Loss 2.2728 (2.3420)	Entropy 0.90420 (0.90586)	Top-1 acc 67.969 (67.935)	Top-5 acc 89.062 (86.386)	lr 0.00585
Train [82][1390/3239]	Time 0.277 (0.646)	Data Time 0.005 (0.030)	Loss 2.2286 (2.3420)	Entropy 0.90420 (0.90584)	Top-1 acc 68.750 (67.936)	Top-5 acc 89.062 (86.381)	lr 0.00585
Train [82][1400/3239]	Time 0.228 (0.645)	Data Time 0.001 (0.030)	Loss 2.3445 (2.3420)	Entropy 0.90420 (0.90583)	Top-1 acc 70.312 (67.943)	Top-5 acc 87.109 (86.381)	lr 0.00585
Train [82][1410/3239]	Time 0.392 (0.644)	Data Time 0.002 (0.029)	Loss 2.4245 (2.3423)	Entropy 0.90422 (0.90582)	Top-1 acc 60.156 (67.926)	Top-5 acc 84.766 (86.377)	lr 0.00585
Train [82][1420/3239]	Time 0.238 (0.643)	Data Time 0.002 (0.029)	Loss 2.3139 (2.3421)	Entropy 0.90423 (0.90581)	Top-1 acc 66.016 (67.921)	Top-5 acc 85.938 (86.384)	lr 0.00585
Train [82][1430/3239]	Time 0.270 (0.642)	Data Time 0.001 (0.029)	Loss 2.2746 (2.3422)	Entropy 0.90422 (0.90580)	Top-1 acc 68.750 (67.920)	Top-5 acc 87.500 (86.386)	lr 0.00585
Train [82][1440/3239]	Time 2.584 (0.641)	Data Time 0.001 (0.029)	Loss 2.5277 (2.3422)	Entropy 0.90422 (0.90579)	Top-1 acc 61.328 (67.914)	Top-5 acc 82.812 (86.384)	lr 0.00585
Train [82][1450/3239]	Time 0.222 (0.638)	Data Time 0.001 (0.029)	Loss 2.4047 (2.3429)	Entropy 0.90423 (0.90578)	Top-1 acc 65.625 (67.901)	Top-5 acc 86.328 (86.370)	lr 0.00584
Train [82][1460/3239]	Time 0.239 (0.637)	Data Time 0.001 (0.029)	Loss 2.2171 (2.3429)	Entropy 0.90423 (0.90577)	Top-1 acc 67.578 (67.902)	Top-5 acc 87.891 (86.367)	lr 0.00584
Train [82][1470/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.028)	Loss 2.4741 (2.3431)	Entropy 0.90423 (0.90576)	Top-1 acc 63.281 (67.893)	Top-5 acc 86.719 (86.362)	lr 0.00584
Train [82][1480/3239]	Time 0.243 (0.635)	Data Time 0.001 (0.028)	Loss 2.3328 (2.3431)	Entropy 0.90423 (0.90575)	Top-1 acc 71.094 (67.891)	Top-5 acc 84.766 (86.357)	lr 0.00584
Train [82][1490/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.028)	Loss 2.2185 (2.3434)	Entropy 0.90424 (0.90574)	Top-1 acc 71.484 (67.882)	Top-5 acc 86.328 (86.351)	lr 0.00584
Train [82][1500/3239]	Time 0.326 (0.633)	Data Time 0.001 (0.028)	Loss 2.3746 (2.3436)	Entropy 0.90418 (0.90573)	Top-1 acc 66.016 (67.877)	Top-5 acc 86.719 (86.349)	lr 0.00584
Train [82][1510/3239]	Time 0.222 (0.632)	Data Time 0.001 (0.028)	Loss 2.2582 (2.3438)	Entropy 0.90410 (0.90572)	Top-1 acc 67.969 (67.858)	Top-5 acc 87.109 (86.343)	lr 0.00584
Train [82][1520/3239]	Time 0.215 (0.631)	Data Time 0.001 (0.027)	Loss 2.6345 (2.3441)	Entropy 0.90411 (0.90570)	Top-1 acc 62.109 (67.851)	Top-5 acc 82.422 (86.339)	lr 0.00584
Train [82][1530/3239]	Time 0.239 (0.630)	Data Time 0.001 (0.027)	Loss 2.2786 (2.3438)	Entropy 0.90411 (0.90569)	Top-1 acc 69.922 (67.860)	Top-5 acc 87.891 (86.344)	lr 0.00584
Train [82][1540/3239]	Time 0.250 (0.629)	Data Time 0.001 (0.027)	Loss 2.3338 (2.3439)	Entropy 0.90411 (0.90568)	Top-1 acc 66.406 (67.853)	Top-5 acc 86.328 (86.342)	lr 0.00584
Train [82][1550/3239]	Time 2.458 (0.628)	Data Time 0.001 (0.027)	Loss 2.3786 (2.3437)	Entropy 0.90411 (0.90567)	Top-1 acc 66.016 (67.860)	Top-5 acc 87.109 (86.343)	lr 0.00584
Train [82][1560/3239]	Time 0.231 (0.625)	Data Time 0.001 (0.027)	Loss 2.3960 (2.3435)	Entropy 0.90403 (0.90566)	Top-1 acc 67.188 (67.867)	Top-5 acc 85.547 (86.341)	lr 0.00583
Train [82][1570/3239]	Time 0.235 (0.624)	Data Time 0.001 (0.027)	Loss 2.3060 (2.3435)	Entropy 0.90397 (0.90565)	Top-1 acc 70.703 (67.870)	Top-5 acc 87.500 (86.343)	lr 0.00583
Train [82][1580/3239]	Time 0.220 (0.623)	Data Time 0.001 (0.026)	Loss 2.2305 (2.3434)	Entropy 0.90395 (0.90564)	Top-1 acc 73.438 (67.874)	Top-5 acc 87.891 (86.344)	lr 0.00583
Train [82][1590/3239]	Time 0.335 (0.622)	Data Time 0.002 (0.026)	Loss 2.3978 (2.3437)	Entropy 0.90388 (0.90563)	Top-1 acc 63.281 (67.863)	Top-5 acc 86.328 (86.339)	lr 0.00583
Train [82][1600/3239]	Time 0.221 (0.622)	Data Time 0.001 (0.026)	Loss 2.3351 (2.3438)	Entropy 0.90382 (0.90562)	Top-1 acc 71.875 (67.863)	Top-5 acc 85.547 (86.334)	lr 0.00583
Train [82][1610/3239]	Time 0.231 (0.621)	Data Time 0.001 (0.026)	Loss 2.3453 (2.3437)	Entropy 0.90388 (0.90561)	Top-1 acc 67.188 (67.865)	Top-5 acc 87.500 (86.338)	lr 0.00583
Train [82][1620/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.026)	Loss 2.3661 (2.3436)	Entropy 0.90394 (0.90560)	Top-1 acc 64.844 (67.864)	Top-5 acc 87.500 (86.340)	lr 0.00583
Train [82][1630/3239]	Time 0.325 (0.619)	Data Time 0.001 (0.026)	Loss 2.3400 (2.3437)	Entropy 0.90395 (0.90559)	Top-1 acc 67.578 (67.862)	Top-5 acc 87.109 (86.342)	lr 0.00583
Train [82][1640/3239]	Time 0.285 (0.618)	Data Time 0.001 (0.026)	Loss 2.3855 (2.3441)	Entropy 0.90389 (0.90558)	Top-1 acc 68.750 (67.855)	Top-5 acc 86.719 (86.339)	lr 0.00583
Train [82][1650/3239]	Time 0.286 (0.617)	Data Time 0.001 (0.025)	Loss 2.2860 (2.3441)	Entropy 0.90385 (0.90557)	Top-1 acc 69.922 (67.855)	Top-5 acc 88.281 (86.341)	lr 0.00583
Train [82][1660/3239]	Time 2.499 (0.616)	Data Time 0.002 (0.025)	Loss 2.3868 (2.3440)	Entropy 0.90385 (0.90556)	Top-1 acc 64.844 (67.850)	Top-5 acc 84.766 (86.341)	lr 0.00583
Train [82][1670/3239]	Time 0.236 (0.614)	Data Time 0.001 (0.025)	Loss 2.3072 (2.3441)	Entropy 0.90384 (0.90555)	Top-1 acc 64.844 (67.850)	Top-5 acc 83.594 (86.338)	lr 0.00583
Train [82][1680/3239]	Time 0.222 (0.613)	Data Time 0.001 (0.025)	Loss 2.5026 (2.3442)	Entropy 0.90384 (0.90554)	Top-1 acc 64.453 (67.848)	Top-5 acc 83.203 (86.338)	lr 0.00582
Train [82][1690/3239]	Time 0.240 (0.612)	Data Time 0.001 (0.025)	Loss 2.3631 (2.3442)	Entropy 0.90379 (0.90553)	Top-1 acc 67.188 (67.845)	Top-5 acc 86.719 (86.338)	lr 0.00582
Train [82][1700/3239]	Time 0.232 (0.611)	Data Time 0.001 (0.025)	Loss 2.3787 (2.3442)	Entropy 0.90365 (0.90552)	Top-1 acc 66.016 (67.847)	Top-5 acc 85.156 (86.339)	lr 0.00582
Train [82][1710/3239]	Time 0.230 (0.611)	Data Time 0.001 (0.025)	Loss 2.1449 (2.3440)	Entropy 0.90359 (0.90551)	Top-1 acc 73.438 (67.856)	Top-5 acc 88.281 (86.341)	lr 0.00582
Train [82][1720/3239]	Time 0.327 (0.610)	Data Time 0.001 (0.024)	Loss 2.4752 (2.3438)	Entropy 0.90355 (0.90549)	Top-1 acc 62.500 (67.856)	Top-5 acc 85.547 (86.348)	lr 0.00582
Train [82][1730/3239]	Time 0.225 (0.641)	Data Time 0.002 (0.024)	Loss 2.3201 (2.3437)	Entropy 0.90353 (0.90548)	Top-1 acc 72.266 (67.862)	Top-5 acc 85.938 (86.349)	lr 0.00582
Train [82][1740/3239]	Time 0.251 (0.640)	Data Time 0.002 (0.024)	Loss 2.3370 (2.3440)	Entropy 0.90353 (0.90547)	Top-1 acc 67.969 (67.857)	Top-5 acc 86.328 (86.345)	lr 0.00582
Train [82][1750/3239]	Time 0.242 (0.639)	Data Time 0.002 (0.024)	Loss 2.2653 (2.3438)	Entropy 0.90335 (0.90546)	Top-1 acc 69.141 (67.860)	Top-5 acc 86.328 (86.347)	lr 0.00582
Train [82][1760/3239]	Time 0.361 (0.638)	Data Time 0.001 (0.024)	Loss 2.4121 (2.3437)	Entropy 0.90331 (0.90545)	Top-1 acc 66.406 (67.861)	Top-5 acc 85.547 (86.349)	lr 0.00582
Train [82][1770/3239]	Time 2.630 (0.637)	Data Time 0.002 (0.024)	Loss 2.5535 (2.3440)	Entropy 0.90331 (0.90544)	Top-1 acc 61.328 (67.852)	Top-5 acc 82.422 (86.344)	lr 0.00582
Train [82][1780/3239]	Time 0.248 (0.635)	Data Time 0.001 (0.024)	Loss 2.4018 (2.3442)	Entropy 0.90325 (0.90542)	Top-1 acc 66.797 (67.845)	Top-5 acc 86.328 (86.337)	lr 0.00582
Train [82][1790/3239]	Time 0.220 (0.634)	Data Time 0.001 (0.024)	Loss 2.3606 (2.3441)	Entropy 0.90329 (0.90541)	Top-1 acc 69.141 (67.855)	Top-5 acc 84.766 (86.338)	lr 0.00582
Train [82][1800/3239]	Time 0.241 (0.633)	Data Time 0.001 (0.023)	Loss 2.3123 (2.3442)	Entropy 0.90322 (0.90540)	Top-1 acc 69.141 (67.850)	Top-5 acc 86.719 (86.334)	lr 0.00581
Train [82][1810/3239]	Time 0.236 (0.633)	Data Time 0.001 (0.023)	Loss 2.3734 (2.3441)	Entropy 0.90317 (0.90539)	Top-1 acc 69.531 (67.852)	Top-5 acc 83.594 (86.332)	lr 0.00581
Train [82][1820/3239]	Time 0.230 (0.632)	Data Time 0.001 (0.023)	Loss 2.3900 (2.3442)	Entropy 0.90313 (0.90538)	Top-1 acc 67.578 (67.851)	Top-5 acc 85.156 (86.329)	lr 0.00581
Train [82][1830/3239]	Time 0.210 (0.631)	Data Time 0.001 (0.023)	Loss 2.3347 (2.3442)	Entropy 0.90304 (0.90536)	Top-1 acc 68.750 (67.849)	Top-5 acc 86.719 (86.328)	lr 0.00581
Train [82][1840/3239]	Time 0.231 (0.630)	Data Time 0.001 (0.023)	Loss 2.2196 (2.3441)	Entropy 0.90302 (0.90535)	Top-1 acc 69.531 (67.850)	Top-5 acc 89.453 (86.333)	lr 0.00581
Train [82][1850/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.023)	Loss 2.4068 (2.3441)	Entropy 0.90304 (0.90534)	Top-1 acc 67.188 (67.851)	Top-5 acc 83.203 (86.330)	lr 0.00581
Train [82][1860/3239]	Time 0.287 (0.629)	Data Time 0.002 (0.023)	Loss 2.2732 (2.3438)	Entropy 0.90306 (0.90533)	Top-1 acc 71.875 (67.853)	Top-5 acc 87.500 (86.337)	lr 0.00581
Train [82][1870/3239]	Time 0.237 (0.628)	Data Time 0.001 (0.023)	Loss 2.3968 (2.3440)	Entropy 0.90298 (0.90531)	Top-1 acc 65.234 (67.847)	Top-5 acc 85.938 (86.335)	lr 0.00581
Train [82][1880/3239]	Time 2.502 (0.627)	Data Time 0.001 (0.023)	Loss 2.3827 (2.3441)	Entropy 0.90298 (0.90530)	Top-1 acc 66.016 (67.847)	Top-5 acc 85.156 (86.328)	lr 0.00581
Train [82][1890/3239]	Time 0.216 (0.625)	Data Time 0.001 (0.022)	Loss 2.3504 (2.3441)	Entropy 0.90289 (0.90529)	Top-1 acc 69.141 (67.847)	Top-5 acc 85.938 (86.328)	lr 0.00581
Train [82][1900/3239]	Time 0.234 (0.624)	Data Time 0.001 (0.022)	Loss 2.3732 (2.3442)	Entropy 0.90290 (0.90528)	Top-1 acc 68.750 (67.839)	Top-5 acc 86.328 (86.325)	lr 0.00581
Train [82][1910/3239]	Time 0.248 (0.623)	Data Time 0.001 (0.022)	Loss 2.4966 (2.3443)	Entropy 0.90291 (0.90526)	Top-1 acc 62.109 (67.839)	Top-5 acc 82.812 (86.324)	lr 0.00581
Train [82][1920/3239]	Time 0.233 (0.622)	Data Time 0.001 (0.022)	Loss 2.2170 (2.3442)	Entropy 0.90284 (0.90525)	Top-1 acc 72.266 (67.847)	Top-5 acc 87.891 (86.321)	lr 0.00580
Train [82][1930/3239]	Time 0.238 (0.622)	Data Time 0.001 (0.022)	Loss 2.5611 (2.3446)	Entropy 0.90276 (0.90524)	Top-1 acc 60.938 (67.838)	Top-5 acc 82.422 (86.315)	lr 0.00580
Train [82][1940/3239]	Time 0.202 (0.621)	Data Time 0.001 (0.022)	Loss 2.3303 (2.3445)	Entropy 0.90275 (0.90522)	Top-1 acc 66.797 (67.842)	Top-5 acc 85.547 (86.316)	lr 0.00580
Train [82][1950/3239]	Time 0.230 (0.620)	Data Time 0.001 (0.022)	Loss 2.3492 (2.3445)	Entropy 0.90269 (0.90521)	Top-1 acc 69.141 (67.844)	Top-5 acc 86.328 (86.316)	lr 0.00580
Train [82][1960/3239]	Time 0.251 (0.619)	Data Time 0.001 (0.022)	Loss 2.4554 (2.3445)	Entropy 0.90269 (0.90520)	Top-1 acc 64.844 (67.846)	Top-5 acc 85.547 (86.316)	lr 0.00580
Train [82][1970/3239]	Time 0.238 (0.619)	Data Time 0.001 (0.022)	Loss 2.1844 (2.3444)	Entropy 0.90268 (0.90519)	Top-1 acc 71.094 (67.848)	Top-5 acc 87.500 (86.316)	lr 0.00580
Train [82][1980/3239]	Time 0.359 (0.618)	Data Time 0.002 (0.021)	Loss 2.4679 (2.3446)	Entropy 0.90267 (0.90517)	Top-1 acc 66.016 (67.842)	Top-5 acc 82.422 (86.312)	lr 0.00580
Train [82][1990/3239]	Time 2.468 (0.617)	Data Time 0.001 (0.021)	Loss 2.6259 (2.3446)	Entropy 0.90267 (0.90516)	Top-1 acc 62.109 (67.839)	Top-5 acc 80.859 (86.312)	lr 0.00580
Train [82][2000/3239]	Time 0.243 (0.616)	Data Time 0.002 (0.021)	Loss 2.4486 (2.3446)	Entropy 0.90261 (0.90515)	Top-1 acc 67.188 (67.843)	Top-5 acc 83.984 (86.314)	lr 0.00580
Train [82][2010/3239]	Time 0.228 (0.615)	Data Time 0.001 (0.021)	Loss 2.4465 (2.3445)	Entropy 0.90255 (0.90514)	Top-1 acc 64.062 (67.845)	Top-5 acc 85.938 (86.314)	lr 0.00580
Train [82][2020/3239]	Time 0.232 (0.614)	Data Time 0.001 (0.021)	Loss 2.2984 (2.3444)	Entropy 0.90249 (0.90512)	Top-1 acc 67.578 (67.847)	Top-5 acc 86.719 (86.316)	lr 0.00580
Train [82][2030/3239]	Time 0.237 (0.613)	Data Time 0.001 (0.021)	Loss 2.3310 (2.3446)	Entropy 0.90250 (0.90511)	Top-1 acc 65.234 (67.837)	Top-5 acc 88.281 (86.311)	lr 0.00579
Train [82][2040/3239]	Time 0.222 (0.613)	Data Time 0.001 (0.021)	Loss 2.3229 (2.3447)	Entropy 0.90247 (0.90510)	Top-1 acc 64.453 (67.832)	Top-5 acc 86.719 (86.310)	lr 0.00579
Train [82][2050/3239]	Time 0.236 (0.612)	Data Time 0.001 (0.021)	Loss 2.4490 (2.3447)	Entropy 0.90244 (0.90508)	Top-1 acc 67.969 (67.835)	Top-5 acc 83.203 (86.312)	lr 0.00579
Train [82][2060/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.021)	Loss 2.1870 (2.3446)	Entropy 0.90245 (0.90507)	Top-1 acc 70.312 (67.833)	Top-5 acc 89.453 (86.312)	lr 0.00579
Train [82][2070/3239]	Time 0.229 (0.611)	Data Time 0.001 (0.021)	Loss 2.4136 (2.3446)	Entropy 0.90248 (0.90506)	Top-1 acc 69.922 (67.832)	Top-5 acc 87.109 (86.315)	lr 0.00579
Train [82][2080/3239]	Time 0.244 (0.610)	Data Time 0.001 (0.020)	Loss 2.0622 (2.3443)	Entropy 0.90249 (0.90505)	Top-1 acc 73.828 (67.838)	Top-5 acc 91.406 (86.323)	lr 0.00579
Train [82][2090/3239]	Time 0.291 (0.635)	Data Time 0.004 (0.020)	Loss 2.3530 (2.3442)	Entropy 0.90248 (0.90503)	Top-1 acc 65.625 (67.840)	Top-5 acc 88.672 (86.327)	lr 0.00579
Train [82][2100/3239]	Time 2.519 (0.634)	Data Time 0.002 (0.020)	Loss 2.2457 (2.3443)	Entropy 0.90248 (0.90502)	Top-1 acc 69.141 (67.835)	Top-5 acc 87.109 (86.324)	lr 0.00579
Train [82][2110/3239]	Time 0.375 (0.633)	Data Time 0.002 (0.020)	Loss 2.3404 (2.3445)	Entropy 0.90247 (0.90501)	Top-1 acc 68.359 (67.831)	Top-5 acc 86.328 (86.319)	lr 0.00579
Train [82][2120/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.020)	Loss 2.1814 (2.3449)	Entropy 0.90245 (0.90500)	Top-1 acc 68.750 (67.821)	Top-5 acc 89.062 (86.313)	lr 0.00579
Train [82][2130/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.020)	Loss 2.3132 (2.3450)	Entropy 0.90247 (0.90499)	Top-1 acc 69.141 (67.816)	Top-5 acc 86.328 (86.308)	lr 0.00579
Train [82][2140/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.020)	Loss 2.2972 (2.3448)	Entropy 0.90243 (0.90497)	Top-1 acc 70.312 (67.820)	Top-5 acc 90.625 (86.316)	lr 0.00579
Train [82][2150/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.020)	Loss 2.4041 (2.3449)	Entropy 0.90235 (0.90496)	Top-1 acc 66.016 (67.813)	Top-5 acc 83.984 (86.312)	lr 0.00578
Train [82][2160/3239]	Time 0.241 (0.629)	Data Time 0.002 (0.020)	Loss 2.2043 (2.3449)	Entropy 0.90233 (0.90495)	Top-1 acc 73.438 (67.814)	Top-5 acc 89.453 (86.311)	lr 0.00578
Train [82][2170/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.020)	Loss 2.4801 (2.3450)	Entropy 0.90232 (0.90494)	Top-1 acc 62.500 (67.809)	Top-5 acc 85.938 (86.308)	lr 0.00578
Train [82][2180/3239]	Time 0.244 (0.628)	Data Time 0.002 (0.020)	Loss 2.2979 (2.3451)	Entropy 0.90231 (0.90493)	Top-1 acc 67.188 (67.809)	Top-5 acc 87.891 (86.301)	lr 0.00578
Train [82][2190/3239]	Time 0.278 (0.627)	Data Time 0.001 (0.020)	Loss 2.2916 (2.3450)	Entropy 0.90231 (0.90491)	Top-1 acc 69.141 (67.815)	Top-5 acc 88.281 (86.302)	lr 0.00578
Train [82][2200/3239]	Time 0.231 (0.626)	Data Time 0.002 (0.019)	Loss 2.1975 (2.3451)	Entropy 0.90230 (0.90490)	Top-1 acc 72.266 (67.809)	Top-5 acc 88.281 (86.298)	lr 0.00578
Train [82][2210/3239]	Time 2.539 (0.626)	Data Time 0.001 (0.019)	Loss 2.2767 (2.3451)	Entropy 0.90230 (0.90489)	Top-1 acc 69.922 (67.811)	Top-5 acc 86.328 (86.297)	lr 0.00578
Train [82][2220/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.019)	Loss 2.2686 (2.3454)	Entropy 0.90234 (0.90488)	Top-1 acc 69.922 (67.807)	Top-5 acc 87.500 (86.293)	lr 0.00578
Train [82][2230/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.019)	Loss 2.3050 (2.3454)	Entropy 0.90232 (0.90487)	Top-1 acc 69.922 (67.803)	Top-5 acc 88.672 (86.296)	lr 0.00578
Train [82][2240/3239]	Time 0.330 (0.623)	Data Time 0.001 (0.019)	Loss 2.4591 (2.3454)	Entropy 0.90226 (0.90486)	Top-1 acc 65.234 (67.806)	Top-5 acc 85.938 (86.297)	lr 0.00578
Train [82][2250/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.019)	Loss 2.4013 (2.3455)	Entropy 0.90222 (0.90484)	Top-1 acc 70.312 (67.807)	Top-5 acc 85.547 (86.296)	lr 0.00578
Train [82][2260/3239]	Time 0.224 (0.621)	Data Time 0.001 (0.019)	Loss 2.3982 (2.3455)	Entropy 0.90213 (0.90483)	Top-1 acc 69.141 (67.809)	Top-5 acc 85.547 (86.298)	lr 0.00578
Train [82][2270/3239]	Time 0.253 (0.621)	Data Time 0.001 (0.019)	Loss 2.3137 (2.3454)	Entropy 0.90213 (0.90482)	Top-1 acc 68.359 (67.809)	Top-5 acc 86.328 (86.296)	lr 0.00577
Train [82][2280/3239]	Time 0.369 (0.620)	Data Time 0.001 (0.019)	Loss 2.4609 (2.3454)	Entropy 0.90216 (0.90481)	Top-1 acc 62.500 (67.808)	Top-5 acc 83.984 (86.298)	lr 0.00577
Train [82][2290/3239]	Time 0.230 (0.620)	Data Time 0.001 (0.019)	Loss 2.6257 (2.3456)	Entropy 0.90220 (0.90480)	Top-1 acc 60.938 (67.799)	Top-5 acc 79.297 (86.296)	lr 0.00577
Train [82][2300/3239]	Time 0.251 (0.619)	Data Time 0.001 (0.019)	Loss 2.4605 (2.3456)	Entropy 0.90219 (0.90479)	Top-1 acc 66.016 (67.797)	Top-5 acc 83.203 (86.294)	lr 0.00577
Train [82][2310/3239]	Time 0.307 (0.618)	Data Time 0.002 (0.019)	Loss 2.4449 (2.3457)	Entropy 0.90221 (0.90477)	Top-1 acc 64.453 (67.794)	Top-5 acc 85.547 (86.293)	lr 0.00577
Train [82][2320/3239]	Time 2.569 (0.618)	Data Time 0.001 (0.019)	Loss 2.3160 (2.3456)	Entropy 0.90221 (0.90476)	Top-1 acc 66.406 (67.797)	Top-5 acc 87.109 (86.296)	lr 0.00577
Train [82][2330/3239]	Time 0.233 (0.616)	Data Time 0.001 (0.018)	Loss 2.3177 (2.3459)	Entropy 0.90221 (0.90475)	Top-1 acc 66.406 (67.790)	Top-5 acc 88.672 (86.294)	lr 0.00577
Train [82][2340/3239]	Time 0.230 (0.616)	Data Time 0.001 (0.018)	Loss 2.3696 (2.3459)	Entropy 0.90221 (0.90474)	Top-1 acc 64.844 (67.791)	Top-5 acc 86.719 (86.295)	lr 0.00577
Train [82][2350/3239]	Time 0.222 (0.615)	Data Time 0.001 (0.018)	Loss 2.3012 (2.3466)	Entropy 0.90223 (0.90473)	Top-1 acc 69.141 (67.777)	Top-5 acc 85.547 (86.287)	lr 0.00577
Train [82][2360/3239]	Time 0.250 (0.614)	Data Time 0.001 (0.018)	Loss 2.2887 (2.3464)	Entropy 0.90220 (0.90472)	Top-1 acc 66.016 (67.777)	Top-5 acc 88.281 (86.290)	lr 0.00577
Train [82][2370/3239]	Time 0.312 (0.614)	Data Time 0.001 (0.018)	Loss 2.3678 (2.3466)	Entropy 0.90214 (0.90471)	Top-1 acc 66.016 (67.763)	Top-5 acc 86.719 (86.291)	lr 0.00577
Train [82][2380/3239]	Time 0.225 (0.613)	Data Time 0.001 (0.018)	Loss 2.4003 (2.3468)	Entropy 0.90214 (0.90470)	Top-1 acc 68.750 (67.760)	Top-5 acc 85.547 (86.289)	lr 0.00577
Train [82][2390/3239]	Time 0.222 (0.613)	Data Time 0.001 (0.018)	Loss 2.3261 (2.3468)	Entropy 0.90204 (0.90469)	Top-1 acc 68.359 (67.760)	Top-5 acc 86.719 (86.290)	lr 0.00576
Train [82][2400/3239]	Time 0.235 (0.612)	Data Time 0.002 (0.018)	Loss 2.3120 (2.3468)	Entropy 0.90198 (0.90468)	Top-1 acc 70.312 (67.761)	Top-5 acc 87.109 (86.290)	lr 0.00576
Train [82][2410/3239]	Time 0.309 (0.612)	Data Time 0.001 (0.018)	Loss 2.3785 (2.3467)	Entropy 0.90210 (0.90467)	Top-1 acc 69.141 (67.764)	Top-5 acc 85.938 (86.291)	lr 0.00576
Train [82][2420/3239]	Time 0.295 (0.611)	Data Time 0.001 (0.018)	Loss 2.3265 (2.3467)	Entropy 0.90205 (0.90465)	Top-1 acc 69.531 (67.766)	Top-5 acc 85.938 (86.288)	lr 0.00576
Train [82][2430/3239]	Time 2.455 (0.610)	Data Time 0.001 (0.018)	Loss 2.3587 (2.3468)	Entropy 0.90205 (0.90464)	Top-1 acc 67.578 (67.764)	Top-5 acc 84.375 (86.287)	lr 0.00576
Train [82][2440/3239]	Time 0.214 (0.609)	Data Time 0.001 (0.018)	Loss 2.4437 (2.3470)	Entropy 0.90201 (0.90463)	Top-1 acc 66.406 (67.757)	Top-5 acc 84.375 (86.283)	lr 0.00576
Train [82][2450/3239]	Time 0.286 (0.608)	Data Time 0.001 (0.018)	Loss 2.2661 (2.3468)	Entropy 0.90201 (0.90462)	Top-1 acc 69.141 (67.760)	Top-5 acc 90.234 (86.289)	lr 0.00576
Train [82][2460/3239]	Time 0.266 (0.628)	Data Time 0.003 (0.018)	Loss 2.3761 (2.3466)	Entropy 0.90205 (0.90461)	Top-1 acc 67.188 (67.765)	Top-5 acc 85.938 (86.293)	lr 0.00576
Train [82][2470/3239]	Time 0.228 (0.628)	Data Time 0.002 (0.018)	Loss 2.1899 (2.3465)	Entropy 0.90202 (0.90460)	Top-1 acc 70.703 (67.766)	Top-5 acc 88.672 (86.295)	lr 0.00576
Train [82][2480/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.017)	Loss 2.4108 (2.3465)	Entropy 0.90199 (0.90459)	Top-1 acc 67.969 (67.767)	Top-5 acc 85.156 (86.293)	lr 0.00576
Train [82][2490/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.017)	Loss 2.2319 (2.3464)	Entropy 0.90199 (0.90458)	Top-1 acc 70.312 (67.766)	Top-5 acc 87.891 (86.297)	lr 0.00576
Train [82][2500/3239]	Time 0.221 (0.626)	Data Time 0.001 (0.017)	Loss 2.2072 (2.3465)	Entropy 0.90197 (0.90457)	Top-1 acc 71.094 (67.763)	Top-5 acc 87.891 (86.293)	lr 0.00575
Train [82][2510/3239]	Time 0.267 (0.625)	Data Time 0.001 (0.017)	Loss 2.2587 (2.3464)	Entropy 0.90200 (0.90456)	Top-1 acc 70.312 (67.768)	Top-5 acc 85.547 (86.294)	lr 0.00575
Train [82][2520/3239]	Time 0.261 (0.625)	Data Time 0.001 (0.017)	Loss 2.3645 (2.3464)	Entropy 0.90194 (0.90455)	Top-1 acc 68.359 (67.769)	Top-5 acc 86.328 (86.290)	lr 0.00575
Train [82][2530/3239]	Time 0.271 (0.624)	Data Time 0.001 (0.017)	Loss 2.4541 (2.3466)	Entropy 0.90192 (0.90454)	Top-1 acc 66.797 (67.767)	Top-5 acc 85.547 (86.285)	lr 0.00575
Train [82][2540/3239]	Time 2.614 (0.623)	Data Time 0.001 (0.017)	Loss 2.2301 (2.3468)	Entropy 0.90192 (0.90453)	Top-1 acc 67.188 (67.763)	Top-5 acc 87.891 (86.278)	lr 0.00575
Train [82][2550/3239]	Time 0.257 (0.622)	Data Time 0.002 (0.017)	Loss 2.3112 (2.3469)	Entropy 0.90181 (0.90452)	Top-1 acc 67.578 (67.758)	Top-5 acc 85.547 (86.277)	lr 0.00575
Train [82][2560/3239]	Time 0.222 (0.621)	Data Time 0.001 (0.017)	Loss 2.4235 (2.3470)	Entropy 0.90187 (0.90451)	Top-1 acc 66.797 (67.756)	Top-5 acc 86.719 (86.276)	lr 0.00575
Train [82][2570/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.017)	Loss 2.5402 (2.3472)	Entropy 0.90188 (0.90450)	Top-1 acc 60.938 (67.749)	Top-5 acc 83.203 (86.275)	lr 0.00575
Train [82][2580/3239]	Time 0.236 (0.620)	Data Time 0.001 (0.017)	Loss 2.4540 (2.3473)	Entropy 0.90192 (0.90449)	Top-1 acc 64.844 (67.747)	Top-5 acc 85.156 (86.272)	lr 0.00575
Train [82][2590/3239]	Time 0.231 (0.620)	Data Time 0.001 (0.017)	Loss 2.6434 (2.3474)	Entropy 0.90189 (0.90448)	Top-1 acc 60.547 (67.744)	Top-5 acc 80.469 (86.271)	lr 0.00575
Train [82][2600/3239]	Time 0.219 (0.619)	Data Time 0.001 (0.017)	Loss 2.2868 (2.3474)	Entropy 0.90186 (0.90447)	Top-1 acc 71.094 (67.748)	Top-5 acc 85.547 (86.271)	lr 0.00575
Train [82][2610/3239]	Time 0.230 (0.619)	Data Time 0.001 (0.017)	Loss 2.4223 (2.3475)	Entropy 0.90183 (0.90446)	Top-1 acc 67.969 (67.743)	Top-5 acc 84.766 (86.268)	lr 0.00575
Train [82][2620/3239]	Time 0.234 (0.618)	Data Time 0.001 (0.017)	Loss 2.2647 (2.3475)	Entropy 0.90184 (0.90445)	Top-1 acc 69.531 (67.744)	Top-5 acc 87.500 (86.268)	lr 0.00574
Train [82][2630/3239]	Time 0.233 (0.617)	Data Time 0.001 (0.017)	Loss 2.3596 (2.3474)	Entropy 0.90181 (0.90444)	Top-1 acc 66.016 (67.744)	Top-5 acc 85.156 (86.272)	lr 0.00574
Train [82][2640/3239]	Time 0.243 (0.617)	Data Time 0.001 (0.016)	Loss 2.4697 (2.3474)	Entropy 0.90175 (0.90443)	Top-1 acc 67.578 (67.742)	Top-5 acc 84.766 (86.273)	lr 0.00574
Train [82][2650/3239]	Time 0.233 (0.616)	Data Time 0.001 (0.016)	Loss 2.1510 (2.3474)	Entropy 0.90173 (0.90442)	Top-1 acc 73.438 (67.744)	Top-5 acc 91.797 (86.273)	lr 0.00574
Train [82][2660/3239]	Time 0.259 (0.616)	Data Time 0.001 (0.016)	Loss 2.2148 (2.3473)	Entropy 0.90171 (0.90441)	Top-1 acc 71.094 (67.744)	Top-5 acc 88.281 (86.272)	lr 0.00574
Train [82][2670/3239]	Time 0.388 (0.615)	Data Time 0.001 (0.016)	Loss 2.4745 (2.3473)	Entropy 0.90168 (0.90440)	Top-1 acc 66.797 (67.744)	Top-5 acc 83.203 (86.273)	lr 0.00574
Train [82][2680/3239]	Time 0.276 (0.615)	Data Time 0.001 (0.016)	Loss 2.3192 (2.3472)	Entropy 0.90168 (0.90439)	Top-1 acc 67.578 (67.745)	Top-5 acc 85.938 (86.274)	lr 0.00574
Train [82][2690/3239]	Time 0.275 (0.614)	Data Time 0.001 (0.016)	Loss 2.4814 (2.3474)	Entropy 0.90173 (0.90438)	Top-1 acc 65.234 (67.739)	Top-5 acc 85.547 (86.273)	lr 0.00574
Train [82][2700/3239]	Time 0.261 (0.614)	Data Time 0.001 (0.016)	Loss 2.3825 (2.3475)	Entropy 0.90167 (0.90437)	Top-1 acc 63.672 (67.737)	Top-5 acc 85.156 (86.272)	lr 0.00574
Train [82][2710/3239]	Time 0.264 (0.613)	Data Time 0.001 (0.016)	Loss 2.1855 (2.3475)	Entropy 0.90168 (0.90436)	Top-1 acc 69.922 (67.736)	Top-5 acc 89.844 (86.271)	lr 0.00574
Train [82][2720/3239]	Time 0.282 (0.613)	Data Time 0.001 (0.016)	Loss 2.2024 (2.3475)	Entropy 0.90160 (0.90435)	Top-1 acc 70.312 (67.738)	Top-5 acc 87.500 (86.270)	lr 0.00574
Train [82][2730/3239]	Time 0.300 (0.612)	Data Time 0.001 (0.016)	Loss 2.3232 (2.3475)	Entropy 0.90155 (0.90434)	Top-1 acc 65.234 (67.733)	Top-5 acc 89.062 (86.273)	lr 0.00574
Train [82][2740/3239]	Time 0.245 (0.612)	Data Time 0.001 (0.016)	Loss 2.2187 (2.3475)	Entropy 0.90157 (0.90433)	Top-1 acc 72.266 (67.734)	Top-5 acc 89.062 (86.274)	lr 0.00573
Train [82][2750/3239]	Time 0.275 (0.611)	Data Time 0.001 (0.016)	Loss 2.2924 (2.3475)	Entropy 0.90156 (0.90432)	Top-1 acc 70.312 (67.735)	Top-5 acc 87.109 (86.274)	lr 0.00573
Train [82][2760/3239]	Time 0.382 (0.611)	Data Time 0.001 (0.016)	Loss 2.2056 (2.3475)	Entropy 0.90150 (0.90431)	Top-1 acc 71.484 (67.738)	Top-5 acc 87.500 (86.273)	lr 0.00573
Train [82][2770/3239]	Time 0.238 (0.610)	Data Time 0.002 (0.016)	Loss 2.4157 (2.3475)	Entropy 0.90142 (0.90430)	Top-1 acc 64.062 (67.739)	Top-5 acc 85.547 (86.272)	lr 0.00573
Train [82][2780/3239]	Time 0.230 (0.610)	Data Time 0.001 (0.016)	Loss 2.3367 (2.3476)	Entropy 0.90138 (0.90429)	Top-1 acc 69.531 (67.735)	Top-5 acc 86.719 (86.272)	lr 0.00573
Train [82][2790/3239]	Time 0.234 (0.609)	Data Time 0.001 (0.016)	Loss 2.3750 (2.3474)	Entropy 0.90133 (0.90428)	Top-1 acc 63.281 (67.738)	Top-5 acc 85.156 (86.276)	lr 0.00573
Train [82][2800/3239]	Time 0.340 (0.628)	Data Time 0.004 (0.016)	Loss 2.3042 (2.3475)	Entropy 0.90133 (0.90427)	Top-1 acc 71.875 (67.739)	Top-5 acc 84.375 (86.274)	lr 0.00573
Train [82][2810/3239]	Time 0.284 (0.627)	Data Time 0.002 (0.016)	Loss 2.2753 (2.3475)	Entropy 0.90129 (0.90426)	Top-1 acc 69.141 (67.736)	Top-5 acc 87.109 (86.272)	lr 0.00573
Train [82][2820/3239]	Time 0.234 (0.627)	Data Time 0.002 (0.016)	Loss 2.3331 (2.3476)	Entropy 0.90138 (0.90425)	Top-1 acc 68.359 (67.731)	Top-5 acc 84.766 (86.271)	lr 0.00573
Train [82][2830/3239]	Time 0.263 (0.626)	Data Time 0.002 (0.015)	Loss 2.1178 (2.3474)	Entropy 0.90139 (0.90424)	Top-1 acc 73.438 (67.734)	Top-5 acc 87.891 (86.275)	lr 0.00573
Train [82][2840/3239]	Time 0.331 (0.626)	Data Time 0.001 (0.015)	Loss 2.2634 (2.3475)	Entropy 0.90135 (0.90422)	Top-1 acc 67.188 (67.730)	Top-5 acc 88.281 (86.274)	lr 0.00573
Train [82][2850/3239]	Time 0.295 (0.625)	Data Time 0.001 (0.015)	Loss 2.3770 (2.3475)	Entropy 0.90123 (0.90421)	Top-1 acc 70.703 (67.730)	Top-5 acc 87.109 (86.278)	lr 0.00573
Train [82][2860/3239]	Time 0.316 (0.625)	Data Time 0.002 (0.015)	Loss 2.3584 (2.3475)	Entropy 0.90120 (0.90420)	Top-1 acc 67.188 (67.729)	Top-5 acc 84.766 (86.278)	lr 0.00572
Train [82][2870/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.015)	Loss 2.2656 (2.3476)	Entropy 0.90113 (0.90419)	Top-1 acc 73.047 (67.723)	Top-5 acc 85.547 (86.272)	lr 0.00572
Train [82][2880/3239]	Time 0.243 (0.624)	Data Time 0.001 (0.015)	Loss 2.4828 (2.3475)	Entropy 0.90116 (0.90418)	Top-1 acc 64.062 (67.724)	Top-5 acc 85.156 (86.274)	lr 0.00572
Train [82][2890/3239]	Time 0.334 (0.623)	Data Time 0.001 (0.015)	Loss 2.1305 (2.3474)	Entropy 0.90121 (0.90417)	Top-1 acc 72.266 (67.727)	Top-5 acc 88.672 (86.277)	lr 0.00572
Train [82][2900/3239]	Time 0.291 (0.623)	Data Time 0.001 (0.015)	Loss 2.3667 (2.3476)	Entropy 0.90116 (0.90416)	Top-1 acc 67.188 (67.725)	Top-5 acc 86.328 (86.275)	lr 0.00572
Train [82][2910/3239]	Time 0.286 (0.622)	Data Time 0.001 (0.015)	Loss 2.2348 (2.3477)	Entropy 0.90112 (0.90415)	Top-1 acc 70.312 (67.724)	Top-5 acc 88.281 (86.271)	lr 0.00572
Train [82][2920/3239]	Time 0.240 (0.622)	Data Time 0.001 (0.015)	Loss 2.0849 (2.3476)	Entropy 0.90110 (0.90414)	Top-1 acc 75.391 (67.728)	Top-5 acc 90.625 (86.274)	lr 0.00572
Train [82][2930/3239]	Time 0.255 (0.621)	Data Time 0.001 (0.015)	Loss 2.2276 (2.3477)	Entropy 0.90107 (0.90413)	Top-1 acc 67.969 (67.722)	Top-5 acc 89.453 (86.275)	lr 0.00572
Train [82][2940/3239]	Time 0.279 (0.621)	Data Time 0.001 (0.015)	Loss 2.3353 (2.3476)	Entropy 0.90102 (0.90412)	Top-1 acc 67.969 (67.722)	Top-5 acc 85.547 (86.276)	lr 0.00572
Train [82][2950/3239]	Time 0.256 (0.620)	Data Time 0.001 (0.015)	Loss 2.3763 (2.3476)	Entropy 0.90103 (0.90411)	Top-1 acc 64.453 (67.725)	Top-5 acc 86.719 (86.276)	lr 0.00572
Train [82][2960/3239]	Time 0.271 (0.620)	Data Time 0.001 (0.015)	Loss 2.4574 (2.3477)	Entropy 0.90088 (0.90410)	Top-1 acc 65.625 (67.722)	Top-5 acc 82.422 (86.275)	lr 0.00572
Train [82][2970/3239]	Time 0.330 (0.620)	Data Time 0.001 (0.015)	Loss 2.5360 (2.3477)	Entropy 0.90090 (0.90409)	Top-1 acc 60.547 (67.720)	Top-5 acc 83.984 (86.274)	lr 0.00571
Train [82][2980/3239]	Time 0.224 (0.619)	Data Time 0.001 (0.015)	Loss 2.3422 (2.3478)	Entropy 0.90085 (0.90408)	Top-1 acc 64.844 (67.716)	Top-5 acc 83.984 (86.273)	lr 0.00571
Train [82][2990/3239]	Time 0.281 (0.619)	Data Time 0.001 (0.015)	Loss 2.4203 (2.3479)	Entropy 0.90083 (0.90407)	Top-1 acc 62.891 (67.712)	Top-5 acc 85.156 (86.272)	lr 0.00571
Train [82][3000/3239]	Time 0.297 (0.618)	Data Time 0.002 (0.015)	Loss 2.5135 (2.3479)	Entropy 0.90081 (0.90406)	Top-1 acc 62.500 (67.715)	Top-5 acc 85.938 (86.272)	lr 0.00571
Train [82][3010/3239]	Time 0.257 (0.618)	Data Time 0.001 (0.015)	Loss 2.3944 (2.3480)	Entropy 0.90079 (0.90405)	Top-1 acc 66.797 (67.711)	Top-5 acc 85.938 (86.268)	lr 0.00571
Train [82][3020/3239]	Time 0.315 (0.617)	Data Time 0.001 (0.015)	Loss 2.3666 (2.3482)	Entropy 0.90083 (0.90404)	Top-1 acc 70.703 (67.709)	Top-5 acc 85.156 (86.264)	lr 0.00571
Train [82][3030/3239]	Time 0.214 (0.617)	Data Time 0.001 (0.015)	Loss 2.5696 (2.3484)	Entropy 0.90086 (0.90402)	Top-1 acc 64.062 (67.705)	Top-5 acc 80.469 (86.260)	lr 0.00571
Train [82][3040/3239]	Time 0.266 (0.616)	Data Time 0.002 (0.015)	Loss 2.3324 (2.3483)	Entropy 0.90078 (0.90401)	Top-1 acc 69.922 (67.706)	Top-5 acc 86.328 (86.261)	lr 0.00571
Train [82][3050/3239]	Time 0.232 (0.616)	Data Time 0.001 (0.014)	Loss 2.2543 (2.3483)	Entropy 0.90075 (0.90400)	Top-1 acc 73.438 (67.707)	Top-5 acc 89.062 (86.261)	lr 0.00571
Train [82][3060/3239]	Time 0.305 (0.615)	Data Time 0.001 (0.014)	Loss 2.1913 (2.3483)	Entropy 0.90072 (0.90399)	Top-1 acc 72.266 (67.709)	Top-5 acc 89.453 (86.261)	lr 0.00571
Train [82][3070/3239]	Time 0.252 (0.615)	Data Time 0.001 (0.014)	Loss 2.4270 (2.3482)	Entropy 0.90071 (0.90398)	Top-1 acc 66.797 (67.709)	Top-5 acc 83.203 (86.262)	lr 0.00571
Train [82][3080/3239]	Time 0.216 (0.614)	Data Time 0.001 (0.014)	Loss 2.3941 (2.3484)	Entropy 0.90071 (0.90397)	Top-1 acc 68.359 (67.702)	Top-5 acc 86.328 (86.263)	lr 0.00571
Train [82][3090/3239]	Time 0.228 (0.614)	Data Time 0.002 (0.014)	Loss 2.2445 (2.3483)	Entropy 0.90076 (0.90396)	Top-1 acc 70.312 (67.706)	Top-5 acc 87.109 (86.262)	lr 0.00570
Train [82][3100/3239]	Time 0.229 (0.613)	Data Time 0.001 (0.014)	Loss 2.2804 (2.3483)	Entropy 0.90067 (0.90395)	Top-1 acc 70.312 (67.708)	Top-5 acc 87.891 (86.263)	lr 0.00570
Train [82][3110/3239]	Time 0.293 (0.613)	Data Time 0.001 (0.014)	Loss 2.5045 (2.3483)	Entropy 0.90053 (0.90394)	Top-1 acc 62.891 (67.705)	Top-5 acc 84.766 (86.262)	lr 0.00570
Train [82][3120/3239]	Time 0.281 (0.613)	Data Time 0.021 (0.014)	Loss 2.2542 (2.3484)	Entropy 0.90060 (0.90393)	Top-1 acc 65.625 (67.706)	Top-5 acc 90.625 (86.260)	lr 0.00570
Train [82][3130/3239]	Time 0.326 (0.628)	Data Time 0.004 (0.014)	Loss 2.3861 (2.3484)	Entropy 0.90061 (0.90392)	Top-1 acc 67.188 (67.709)	Top-5 acc 85.547 (86.259)	lr 0.00570
Train [82][3140/3239]	Time 0.247 (0.628)	Data Time 0.002 (0.014)	Loss 2.2950 (2.3484)	Entropy 0.90059 (0.90391)	Top-1 acc 66.797 (67.710)	Top-5 acc 87.500 (86.258)	lr 0.00570
Train [82][3150/3239]	Time 0.370 (0.627)	Data Time 0.002 (0.014)	Loss 2.4472 (2.3484)	Entropy 0.90057 (0.90390)	Top-1 acc 63.672 (67.707)	Top-5 acc 82.031 (86.256)	lr 0.00570
Train [82][3160/3239]	Time 0.287 (0.627)	Data Time 0.001 (0.014)	Loss 2.3224 (2.3484)	Entropy 0.90055 (0.90389)	Top-1 acc 66.797 (67.706)	Top-5 acc 87.109 (86.257)	lr 0.00570
Train [82][3170/3239]	Time 0.260 (0.626)	Data Time 0.001 (0.014)	Loss 2.3198 (2.3484)	Entropy 0.90060 (0.90388)	Top-1 acc 67.578 (67.705)	Top-5 acc 87.109 (86.257)	lr 0.00570
Train [82][3180/3239]	Time 0.205 (0.626)	Data Time 0.000 (0.014)	Loss 2.7634 (2.3486)	Entropy 0.90056 (0.90387)	Top-1 acc 62.109 (67.705)	Top-5 acc 77.734 (86.253)	lr 0.00570
Train [82][3190/3239]	Time 0.312 (0.625)	Data Time 0.000 (0.014)	Loss 2.2664 (2.3485)	Entropy 0.90048 (0.90386)	Top-1 acc 69.141 (67.704)	Top-5 acc 87.109 (86.255)	lr 0.00570
Train [82][3200/3239]	Time 0.222 (0.625)	Data Time 0.000 (0.014)	Loss 2.3052 (2.3485)	Entropy 0.90021 (0.90385)	Top-1 acc 68.359 (67.703)	Top-5 acc 87.500 (86.254)	lr 0.00570
Train [82][3210/3239]	Time 0.221 (0.624)	Data Time 0.000 (0.014)	Loss 2.4406 (2.3486)	Entropy 0.90017 (0.90383)	Top-1 acc 64.844 (67.702)	Top-5 acc 84.766 (86.254)	lr 0.00569
Train [82][3220/3239]	Time 0.240 (0.624)	Data Time 0.000 (0.014)	Loss 2.2608 (2.3486)	Entropy 0.90009 (0.90382)	Top-1 acc 71.484 (67.700)	Top-5 acc 88.281 (86.256)	lr 0.00569
Train [82][3230/3239]	Time 0.227 (0.623)	Data Time 0.000 (0.014)	Loss 2.2913 (2.3486)	Entropy 0.90006 (0.90381)	Top-1 acc 68.359 (67.698)	Top-5 acc 87.891 (86.255)	lr 0.00569
Train [82][3239/3239]	Time 2.436 (0.623)	Data Time 0.000 (0.014)	Loss 2.8686 (2.3488)	Entropy 0.90006 (0.90380)	Top-1 acc 55.556 (67.696)	Top-5 acc 76.543 (86.253)	lr 0.00569
==========Valid [82/120]	loss 1.299	top-1 acc 70.377 (70.377)	top-5 acc 88.608	Train top-1 67.696	top-5 86.253	Entropy 0.90006	Latency-None: 0.000ms	Flops: 546.53M
Train [83][0/3239]	Time 40.796 (40.796)	Data Time 39.202 (39.202)	Loss 2.1968 (2.1968)	Entropy 0.90001 (0.90001)	Top-1 acc 70.703 (70.703)	Top-5 acc 90.234 (90.234)	lr 0.00569
Train [83][10/3239]	Time 2.699 (4.257)	Data Time 0.002 (3.566)	Loss 2.3542 (2.3079)	Entropy 0.90001 (0.90001)	Top-1 acc 63.672 (68.182)	Top-5 acc 86.328 (86.612)	lr 0.00569
Train [83][20/3239]	Time 0.236 (2.345)	Data Time 0.001 (1.868)	Loss 2.2891 (2.3270)	Entropy 0.90005 (0.90003)	Top-1 acc 67.969 (68.062)	Top-5 acc 88.281 (86.458)	lr 0.00569
Train [83][30/3239]	Time 0.267 (1.743)	Data Time 0.001 (1.266)	Loss 2.4596 (2.3346)	Entropy 0.89994 (0.90000)	Top-1 acc 67.188 (67.918)	Top-5 acc 83.984 (86.416)	lr 0.00569
Train [83][40/3239]	Time 0.377 (1.439)	Data Time 0.001 (0.958)	Loss 2.1946 (2.3148)	Entropy 0.89992 (0.89998)	Top-1 acc 72.656 (68.750)	Top-5 acc 89.062 (86.652)	lr 0.00569
Train [83][50/3239]	Time 0.227 (1.251)	Data Time 0.001 (0.770)	Loss 2.4655 (2.3227)	Entropy 0.90000 (0.89998)	Top-1 acc 63.281 (68.482)	Top-5 acc 83.594 (86.497)	lr 0.00569
Train [83][60/3239]	Time 0.232 (1.124)	Data Time 0.001 (0.644)	Loss 2.2948 (2.3216)	Entropy 0.89983 (0.89997)	Top-1 acc 69.922 (68.603)	Top-5 acc 85.547 (86.520)	lr 0.00569
Train [83][70/3239]	Time 0.225 (1.031)	Data Time 0.001 (0.554)	Loss 2.1273 (2.3198)	Entropy 0.89987 (0.89995)	Top-1 acc 73.047 (68.607)	Top-5 acc 89.062 (86.488)	lr 0.00569
Train [83][80/3239]	Time 0.227 (0.962)	Data Time 0.001 (0.486)	Loss 2.2067 (2.3186)	Entropy 0.89981 (0.89994)	Top-1 acc 67.969 (68.456)	Top-5 acc 86.328 (86.507)	lr 0.00569
Train [83][90/3239]	Time 0.223 (0.910)	Data Time 0.001 (0.432)	Loss 2.2459 (2.3227)	Entropy 0.89978 (0.89992)	Top-1 acc 72.266 (68.462)	Top-5 acc 87.891 (86.487)	lr 0.00568
Train [83][100/3239]	Time 0.245 (0.867)	Data Time 0.001 (0.390)	Loss 2.2566 (2.3251)	Entropy 0.89978 (0.89991)	Top-1 acc 68.359 (68.441)	Top-5 acc 87.109 (86.402)	lr 0.00568
Train [83][110/3239]	Time 0.264 (0.832)	Data Time 0.001 (0.355)	Loss 2.1126 (2.3257)	Entropy 0.89984 (0.89990)	Top-1 acc 74.219 (68.377)	Top-5 acc 90.625 (86.462)	lr 0.00568
Train [83][120/3239]	Time 2.500 (0.803)	Data Time 0.001 (0.326)	Loss 2.3385 (2.3242)	Entropy 0.89984 (0.89989)	Top-1 acc 66.406 (68.401)	Top-5 acc 83.594 (86.467)	lr 0.00568
Train [83][130/3239]	Time 0.379 (0.761)	Data Time 0.001 (0.301)	Loss 2.3538 (2.3245)	Entropy 0.89981 (0.89989)	Top-1 acc 69.141 (68.362)	Top-5 acc 84.766 (86.477)	lr 0.00568
Train [83][140/3239]	Time 0.232 (0.742)	Data Time 0.001 (0.280)	Loss 2.3026 (2.3213)	Entropy 0.89977 (0.89988)	Top-1 acc 67.188 (68.440)	Top-5 acc 89.844 (86.580)	lr 0.00568
Train [83][150/3239]	Time 0.251 (0.725)	Data Time 0.001 (0.261)	Loss 2.4025 (2.3264)	Entropy 0.89978 (0.89987)	Top-1 acc 66.797 (68.295)	Top-5 acc 83.984 (86.489)	lr 0.00568
Train [83][160/3239]	Time 0.229 (0.709)	Data Time 0.001 (0.245)	Loss 2.4198 (2.3233)	Entropy 0.89962 (0.89986)	Top-1 acc 62.891 (68.393)	Top-5 acc 85.156 (86.595)	lr 0.00568
Train [83][170/3239]	Time 0.231 (0.696)	Data Time 0.001 (0.231)	Loss 2.4497 (2.3225)	Entropy 0.89961 (0.89985)	Top-1 acc 62.500 (68.407)	Top-5 acc 84.766 (86.591)	lr 0.00568
Train [83][180/3239]	Time 0.231 (0.684)	Data Time 0.001 (0.218)	Loss 2.3854 (2.3230)	Entropy 0.89965 (0.89983)	Top-1 acc 70.312 (68.398)	Top-5 acc 87.109 (86.619)	lr 0.00568
Train [83][190/3239]	Time 0.230 (0.674)	Data Time 0.001 (0.207)	Loss 2.2513 (2.3210)	Entropy 0.89961 (0.89982)	Top-1 acc 69.531 (68.472)	Top-5 acc 85.938 (86.653)	lr 0.00568
Train [83][200/3239]	Time 0.230 (0.663)	Data Time 0.001 (0.197)	Loss 2.3564 (2.3217)	Entropy 0.89959 (0.89981)	Top-1 acc 67.578 (68.503)	Top-5 acc 87.500 (86.651)	lr 0.00568
Train [83][210/3239]	Time 0.247 (0.654)	Data Time 0.002 (0.187)	Loss 2.3157 (2.3214)	Entropy 0.89955 (0.89980)	Top-1 acc 68.750 (68.507)	Top-5 acc 85.938 (86.660)	lr 0.00567
Train [83][220/3239]	Time 0.235 (0.647)	Data Time 0.001 (0.179)	Loss 2.2808 (2.3216)	Entropy 0.89951 (0.89979)	Top-1 acc 66.797 (68.492)	Top-5 acc 85.547 (86.641)	lr 0.00567
Train [83][230/3239]	Time 2.625 (0.640)	Data Time 0.001 (0.171)	Loss 2.2697 (2.3197)	Entropy 0.89951 (0.89978)	Top-1 acc 70.312 (68.562)	Top-5 acc 87.500 (86.661)	lr 0.00567
Train [83][240/3239]	Time 0.266 (0.624)	Data Time 0.002 (0.164)	Loss 2.1787 (2.3185)	Entropy 0.89953 (0.89977)	Top-1 acc 75.000 (68.598)	Top-5 acc 89.453 (86.678)	lr 0.00567
Train [83][250/3239]	Time 0.220 (0.843)	Data Time 0.002 (0.158)	Loss 2.2221 (2.3188)	Entropy 0.89944 (0.89975)	Top-1 acc 72.656 (68.597)	Top-5 acc 86.328 (86.663)	lr 0.00567
Train [83][260/3239]	Time 0.314 (0.829)	Data Time 0.002 (0.152)	Loss 2.4199 (2.3215)	Entropy 0.89945 (0.89974)	Top-1 acc 67.188 (68.560)	Top-5 acc 85.938 (86.609)	lr 0.00567
Train [83][270/3239]	Time 0.224 (0.817)	Data Time 0.001 (0.146)	Loss 2.2979 (2.3219)	Entropy 0.89940 (0.89973)	Top-1 acc 70.703 (68.542)	Top-5 acc 87.109 (86.616)	lr 0.00567
Train [83][280/3239]	Time 0.230 (0.804)	Data Time 0.001 (0.141)	Loss 2.5570 (2.3237)	Entropy 0.89938 (0.89972)	Top-1 acc 62.500 (68.476)	Top-5 acc 83.984 (86.603)	lr 0.00567
Train [83][290/3239]	Time 0.278 (0.794)	Data Time 0.002 (0.136)	Loss 2.2862 (2.3243)	Entropy 0.89934 (0.89971)	Top-1 acc 66.406 (68.444)	Top-5 acc 90.625 (86.626)	lr 0.00567
Train [83][300/3239]	Time 0.325 (0.784)	Data Time 0.001 (0.132)	Loss 2.3354 (2.3232)	Entropy 0.89932 (0.89969)	Top-1 acc 65.234 (68.440)	Top-5 acc 89.062 (86.647)	lr 0.00567
Train [83][310/3239]	Time 0.237 (0.775)	Data Time 0.001 (0.128)	Loss 2.1856 (2.3216)	Entropy 0.89937 (0.89968)	Top-1 acc 70.312 (68.462)	Top-5 acc 91.797 (86.686)	lr 0.00567
Train [83][320/3239]	Time 0.243 (0.765)	Data Time 0.001 (0.124)	Loss 2.1862 (2.3225)	Entropy 0.89939 (0.89967)	Top-1 acc 73.438 (68.434)	Top-5 acc 88.672 (86.680)	lr 0.00566
Train [83][330/3239]	Time 0.254 (0.757)	Data Time 0.001 (0.120)	Loss 2.3942 (2.3225)	Entropy 0.89937 (0.89966)	Top-1 acc 64.453 (68.423)	Top-5 acc 85.156 (86.679)	lr 0.00566
Train [83][340/3239]	Time 2.547 (0.749)	Data Time 0.002 (0.117)	Loss 2.3216 (2.3220)	Entropy 0.89937 (0.89966)	Top-1 acc 69.531 (68.424)	Top-5 acc 87.891 (86.705)	lr 0.00566
Train [83][350/3239]	Time 0.274 (0.735)	Data Time 0.001 (0.113)	Loss 2.5253 (2.3218)	Entropy 0.89933 (0.89965)	Top-1 acc 65.625 (68.451)	Top-5 acc 82.031 (86.711)	lr 0.00566
Train [83][360/3239]	Time 0.251 (0.728)	Data Time 0.001 (0.110)	Loss 2.2655 (2.3220)	Entropy 0.89928 (0.89964)	Top-1 acc 74.219 (68.454)	Top-5 acc 87.109 (86.712)	lr 0.00566
Train [83][370/3239]	Time 0.213 (0.722)	Data Time 0.001 (0.107)	Loss 2.3300 (2.3226)	Entropy 0.89921 (0.89962)	Top-1 acc 69.531 (68.466)	Top-5 acc 87.109 (86.687)	lr 0.00566
Train [83][380/3239]	Time 0.236 (0.716)	Data Time 0.002 (0.105)	Loss 2.3305 (2.3226)	Entropy 0.89914 (0.89961)	Top-1 acc 67.578 (68.469)	Top-5 acc 85.938 (86.682)	lr 0.00566
Train [83][390/3239]	Time 0.228 (0.710)	Data Time 0.001 (0.102)	Loss 2.2958 (2.3222)	Entropy 0.89912 (0.89960)	Top-1 acc 68.750 (68.459)	Top-5 acc 85.547 (86.687)	lr 0.00566
Train [83][400/3239]	Time 0.242 (0.704)	Data Time 0.001 (0.099)	Loss 2.5721 (2.3225)	Entropy 0.89911 (0.89959)	Top-1 acc 61.328 (68.430)	Top-5 acc 81.641 (86.694)	lr 0.00566
Train [83][410/3239]	Time 0.238 (0.699)	Data Time 0.001 (0.097)	Loss 2.3548 (2.3225)	Entropy 0.89913 (0.89958)	Top-1 acc 67.578 (68.417)	Top-5 acc 85.547 (86.688)	lr 0.00566
Train [83][420/3239]	Time 0.221 (0.694)	Data Time 0.001 (0.095)	Loss 2.2720 (2.3220)	Entropy 0.89912 (0.89957)	Top-1 acc 69.531 (68.419)	Top-5 acc 87.109 (86.686)	lr 0.00566
Train [83][430/3239]	Time 0.220 (0.688)	Data Time 0.001 (0.093)	Loss 2.2044 (2.3222)	Entropy 0.89907 (0.89956)	Top-1 acc 71.484 (68.412)	Top-5 acc 88.281 (86.691)	lr 0.00566
Train [83][440/3239]	Time 0.229 (0.684)	Data Time 0.001 (0.091)	Loss 2.3592 (2.3221)	Entropy 0.89883 (0.89954)	Top-1 acc 69.922 (68.431)	Top-5 acc 84.375 (86.703)	lr 0.00565
Train [83][450/3239]	Time 2.641 (0.679)	Data Time 0.004 (0.089)	Loss 2.3671 (2.3230)	Entropy 0.89883 (0.89953)	Top-1 acc 67.188 (68.408)	Top-5 acc 85.938 (86.692)	lr 0.00565
Train [83][460/3239]	Time 0.261 (0.670)	Data Time 0.001 (0.087)	Loss 2.3187 (2.3227)	Entropy 0.89882 (0.89951)	Top-1 acc 69.531 (68.411)	Top-5 acc 85.938 (86.695)	lr 0.00565
Train [83][470/3239]	Time 0.308 (0.666)	Data Time 0.001 (0.085)	Loss 2.3986 (2.3223)	Entropy 0.89878 (0.89950)	Top-1 acc 67.969 (68.448)	Top-5 acc 83.984 (86.691)	lr 0.00565
Train [83][480/3239]	Time 0.225 (0.662)	Data Time 0.001 (0.083)	Loss 2.3736 (2.3242)	Entropy 0.89871 (0.89948)	Top-1 acc 66.797 (68.407)	Top-5 acc 85.156 (86.649)	lr 0.00565
Train [83][490/3239]	Time 0.221 (0.658)	Data Time 0.001 (0.081)	Loss 2.3192 (2.3258)	Entropy 0.89864 (0.89946)	Top-1 acc 69.141 (68.375)	Top-5 acc 89.453 (86.626)	lr 0.00565
Train [83][500/3239]	Time 0.215 (0.654)	Data Time 0.001 (0.080)	Loss 2.4541 (2.3267)	Entropy 0.89855 (0.89945)	Top-1 acc 65.234 (68.350)	Top-5 acc 85.156 (86.618)	lr 0.00565
Train [83][510/3239]	Time 0.209 (0.651)	Data Time 0.001 (0.078)	Loss 2.3152 (2.3274)	Entropy 0.89855 (0.89943)	Top-1 acc 68.750 (68.351)	Top-5 acc 85.938 (86.603)	lr 0.00565
Train [83][520/3239]	Time 0.237 (0.648)	Data Time 0.002 (0.077)	Loss 2.3708 (2.3280)	Entropy 0.89852 (0.89941)	Top-1 acc 68.359 (68.341)	Top-5 acc 84.375 (86.592)	lr 0.00565
Train [83][530/3239]	Time 0.228 (0.645)	Data Time 0.001 (0.075)	Loss 2.3059 (2.3278)	Entropy 0.89858 (0.89940)	Top-1 acc 69.531 (68.346)	Top-5 acc 87.891 (86.600)	lr 0.00565
Train [83][540/3239]	Time 0.212 (0.641)	Data Time 0.001 (0.074)	Loss 2.4016 (2.3277)	Entropy 0.89867 (0.89938)	Top-1 acc 68.750 (68.350)	Top-5 acc 83.984 (86.597)	lr 0.00565
Train [83][550/3239]	Time 0.249 (0.638)	Data Time 0.001 (0.073)	Loss 2.2477 (2.3267)	Entropy 0.89865 (0.89937)	Top-1 acc 67.188 (68.393)	Top-5 acc 88.281 (86.606)	lr 0.00565
Train [83][560/3239]	Time 2.686 (0.636)	Data Time 0.002 (0.072)	Loss 2.2382 (2.3267)	Entropy 0.89865 (0.89936)	Top-1 acc 71.484 (68.394)	Top-5 acc 89.062 (86.604)	lr 0.00564
Train [83][570/3239]	Time 0.237 (0.629)	Data Time 0.001 (0.070)	Loss 2.3175 (2.3270)	Entropy 0.89863 (0.89934)	Top-1 acc 69.922 (68.398)	Top-5 acc 85.156 (86.602)	lr 0.00564
Train [83][580/3239]	Time 0.215 (0.626)	Data Time 0.001 (0.069)	Loss 2.4457 (2.3270)	Entropy 0.89871 (0.89933)	Top-1 acc 62.109 (68.377)	Top-5 acc 87.109 (86.609)	lr 0.00564
Train [83][590/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.068)	Loss 2.2844 (2.3270)	Entropy 0.89868 (0.89932)	Top-1 acc 66.797 (68.385)	Top-5 acc 85.938 (86.601)	lr 0.00564
Train [83][600/3239]	Time 0.373 (0.622)	Data Time 0.001 (0.067)	Loss 2.2849 (2.3268)	Entropy 0.89871 (0.89931)	Top-1 acc 68.750 (68.396)	Top-5 acc 87.500 (86.613)	lr 0.00564
Train [83][610/3239]	Time 0.283 (0.700)	Data Time 0.005 (0.066)	Loss 2.3681 (2.3269)	Entropy 0.89873 (0.89930)	Top-1 acc 68.750 (68.389)	Top-5 acc 83.984 (86.613)	lr 0.00564
Train [83][620/3239]	Time 0.235 (0.699)	Data Time 0.002 (0.065)	Loss 2.2434 (2.3270)	Entropy 0.89871 (0.89929)	Top-1 acc 69.141 (68.382)	Top-5 acc 85.938 (86.601)	lr 0.00564
Train [83][630/3239]	Time 0.231 (0.695)	Data Time 0.002 (0.064)	Loss 2.2917 (2.3262)	Entropy 0.89870 (0.89928)	Top-1 acc 69.141 (68.403)	Top-5 acc 88.281 (86.612)	lr 0.00564
Train [83][640/3239]	Time 0.266 (0.692)	Data Time 0.001 (0.063)	Loss 2.1536 (2.3260)	Entropy 0.89863 (0.89927)	Top-1 acc 68.750 (68.403)	Top-5 acc 90.234 (86.617)	lr 0.00564
Train [83][650/3239]	Time 0.221 (0.689)	Data Time 0.001 (0.062)	Loss 2.3741 (2.3257)	Entropy 0.89865 (0.89926)	Top-1 acc 63.672 (68.402)	Top-5 acc 85.938 (86.616)	lr 0.00564
Train [83][660/3239]	Time 0.295 (0.685)	Data Time 0.001 (0.061)	Loss 2.4958 (2.3262)	Entropy 0.89865 (0.89925)	Top-1 acc 66.016 (68.383)	Top-5 acc 83.203 (86.608)	lr 0.00564
Train [83][670/3239]	Time 2.447 (0.682)	Data Time 0.001 (0.060)	Loss 2.2600 (2.3261)	Entropy 0.89865 (0.89924)	Top-1 acc 67.969 (68.376)	Top-5 acc 87.891 (86.619)	lr 0.00564
Train [83][680/3239]	Time 0.249 (0.676)	Data Time 0.002 (0.059)	Loss 2.4014 (2.3261)	Entropy 0.89861 (0.89924)	Top-1 acc 66.016 (68.379)	Top-5 acc 84.766 (86.617)	lr 0.00563
Train [83][690/3239]	Time 0.352 (0.673)	Data Time 0.002 (0.058)	Loss 2.1271 (2.3262)	Entropy 0.89858 (0.89923)	Top-1 acc 74.219 (68.383)	Top-5 acc 90.234 (86.614)	lr 0.00563
Train [83][700/3239]	Time 0.237 (0.670)	Data Time 0.001 (0.058)	Loss 2.3563 (2.3259)	Entropy 0.89856 (0.89922)	Top-1 acc 68.750 (68.396)	Top-5 acc 86.328 (86.612)	lr 0.00563
Train [83][710/3239]	Time 0.226 (0.667)	Data Time 0.001 (0.057)	Loss 2.2954 (2.3262)	Entropy 0.89858 (0.89921)	Top-1 acc 67.969 (68.386)	Top-5 acc 87.109 (86.608)	lr 0.00563
Train [83][720/3239]	Time 0.230 (0.665)	Data Time 0.001 (0.056)	Loss 2.5482 (2.3262)	Entropy 0.89852 (0.89920)	Top-1 acc 62.109 (68.388)	Top-5 acc 83.984 (86.610)	lr 0.00563
Train [83][730/3239]	Time 0.332 (0.663)	Data Time 0.001 (0.055)	Loss 2.1669 (2.3262)	Entropy 0.89852 (0.89919)	Top-1 acc 75.391 (68.395)	Top-5 acc 89.453 (86.605)	lr 0.00563
Train [83][740/3239]	Time 0.230 (0.660)	Data Time 0.001 (0.055)	Loss 2.5364 (2.3272)	Entropy 0.89845 (0.89918)	Top-1 acc 66.016 (68.368)	Top-5 acc 83.203 (86.593)	lr 0.00563
Train [83][750/3239]	Time 0.242 (0.658)	Data Time 0.001 (0.054)	Loss 2.3258 (2.3276)	Entropy 0.89843 (0.89917)	Top-1 acc 66.797 (68.357)	Top-5 acc 90.234 (86.592)	lr 0.00563
Train [83][760/3239]	Time 0.236 (0.655)	Data Time 0.001 (0.053)	Loss 2.4906 (2.3276)	Entropy 0.89838 (0.89916)	Top-1 acc 66.406 (68.359)	Top-5 acc 85.156 (86.590)	lr 0.00563
Train [83][770/3239]	Time 0.237 (0.653)	Data Time 0.001 (0.053)	Loss 2.3240 (2.3277)	Entropy 0.89827 (0.89915)	Top-1 acc 66.797 (68.361)	Top-5 acc 86.719 (86.579)	lr 0.00563
Train [83][780/3239]	Time 2.423 (0.651)	Data Time 0.002 (0.052)	Loss 2.3638 (2.3280)	Entropy 0.89827 (0.89914)	Top-1 acc 65.234 (68.348)	Top-5 acc 84.766 (86.569)	lr 0.00563
Train [83][790/3239]	Time 0.244 (0.645)	Data Time 0.001 (0.051)	Loss 2.3101 (2.3280)	Entropy 0.89818 (0.89913)	Top-1 acc 68.750 (68.347)	Top-5 acc 86.719 (86.572)	lr 0.00563
Train [83][800/3239]	Time 0.225 (0.643)	Data Time 0.001 (0.051)	Loss 2.2893 (2.3281)	Entropy 0.89810 (0.89911)	Top-1 acc 69.531 (68.356)	Top-5 acc 85.547 (86.570)	lr 0.00562
Train [83][810/3239]	Time 0.225 (0.641)	Data Time 0.001 (0.050)	Loss 2.4270 (2.3287)	Entropy 0.89803 (0.89910)	Top-1 acc 66.797 (68.342)	Top-5 acc 85.938 (86.558)	lr 0.00562
Train [83][820/3239]	Time 0.330 (0.639)	Data Time 0.001 (0.049)	Loss 2.4298 (2.3290)	Entropy 0.89798 (0.89909)	Top-1 acc 64.062 (68.326)	Top-5 acc 83.984 (86.543)	lr 0.00562
Train [83][830/3239]	Time 0.225 (0.637)	Data Time 0.001 (0.049)	Loss 2.2723 (2.3286)	Entropy 0.89795 (0.89907)	Top-1 acc 68.359 (68.333)	Top-5 acc 89.062 (86.552)	lr 0.00562
Train [83][840/3239]	Time 0.241 (0.635)	Data Time 0.001 (0.048)	Loss 2.3567 (2.3294)	Entropy 0.89791 (0.89906)	Top-1 acc 62.891 (68.313)	Top-5 acc 89.453 (86.538)	lr 0.00562
Train [83][850/3239]	Time 0.221 (0.633)	Data Time 0.001 (0.048)	Loss 2.4972 (2.3298)	Entropy 0.89781 (0.89905)	Top-1 acc 65.234 (68.300)	Top-5 acc 83.203 (86.530)	lr 0.00562
Train [83][860/3239]	Time 0.316 (0.632)	Data Time 0.001 (0.047)	Loss 2.3257 (2.3300)	Entropy 0.89780 (0.89903)	Top-1 acc 66.797 (68.293)	Top-5 acc 87.109 (86.517)	lr 0.00562
Train [83][870/3239]	Time 0.237 (0.630)	Data Time 0.001 (0.047)	Loss 2.4558 (2.3299)	Entropy 0.89779 (0.89902)	Top-1 acc 65.625 (68.302)	Top-5 acc 85.938 (86.517)	lr 0.00562
Train [83][880/3239]	Time 0.274 (0.628)	Data Time 0.001 (0.046)	Loss 2.3418 (2.3302)	Entropy 0.89781 (0.89900)	Top-1 acc 68.359 (68.302)	Top-5 acc 89.062 (86.514)	lr 0.00562
Train [83][890/3239]	Time 2.586 (0.627)	Data Time 0.001 (0.046)	Loss 2.3350 (2.3304)	Entropy 0.89781 (0.89899)	Top-1 acc 63.281 (68.286)	Top-5 acc 86.328 (86.509)	lr 0.00562
Train [83][900/3239]	Time 0.283 (0.622)	Data Time 0.001 (0.045)	Loss 2.3172 (2.3305)	Entropy 0.89778 (0.89898)	Top-1 acc 70.312 (68.284)	Top-5 acc 87.500 (86.504)	lr 0.00562
Train [83][910/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.045)	Loss 2.4034 (2.3300)	Entropy 0.89782 (0.89896)	Top-1 acc 66.016 (68.289)	Top-5 acc 84.766 (86.518)	lr 0.00562
Train [83][920/3239]	Time 0.225 (0.619)	Data Time 0.001 (0.044)	Loss 2.3080 (2.3300)	Entropy 0.89779 (0.89895)	Top-1 acc 67.188 (68.291)	Top-5 acc 88.672 (86.520)	lr 0.00561
Train [83][930/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.044)	Loss 2.5075 (2.3299)	Entropy 0.89774 (0.89894)	Top-1 acc 65.234 (68.299)	Top-5 acc 83.984 (86.524)	lr 0.00561
Train [83][940/3239]	Time 0.219 (0.616)	Data Time 0.001 (0.043)	Loss 2.2605 (2.3301)	Entropy 0.89769 (0.89893)	Top-1 acc 70.703 (68.295)	Top-5 acc 86.719 (86.511)	lr 0.00561
Train [83][950/3239]	Time 0.305 (0.614)	Data Time 0.001 (0.043)	Loss 2.3013 (2.3302)	Entropy 0.89763 (0.89891)	Top-1 acc 67.578 (68.273)	Top-5 acc 87.109 (86.516)	lr 0.00561
Train [83][960/3239]	Time 0.217 (0.613)	Data Time 0.001 (0.042)	Loss 2.3089 (2.3304)	Entropy 0.89756 (0.89890)	Top-1 acc 68.750 (68.276)	Top-5 acc 84.766 (86.513)	lr 0.00561
Train [83][970/3239]	Time 0.299 (0.663)	Data Time 0.003 (0.042)	Loss 2.1659 (2.3309)	Entropy 0.89754 (0.89888)	Top-1 acc 73.047 (68.268)	Top-5 acc 88.672 (86.516)	lr 0.00561
Train [83][980/3239]	Time 0.229 (0.662)	Data Time 0.002 (0.042)	Loss 2.3302 (2.3310)	Entropy 0.89754 (0.89887)	Top-1 acc 69.531 (68.270)	Top-5 acc 86.719 (86.513)	lr 0.00561
Train [83][990/3239]	Time 0.360 (0.660)	Data Time 0.002 (0.041)	Loss 2.3500 (2.3311)	Entropy 0.89754 (0.89886)	Top-1 acc 67.969 (68.260)	Top-5 acc 85.938 (86.517)	lr 0.00561
Train [83][1000/3239]	Time 2.519 (0.658)	Data Time 0.001 (0.041)	Loss 2.4023 (2.3311)	Entropy 0.89754 (0.89884)	Top-1 acc 67.188 (68.263)	Top-5 acc 86.328 (86.522)	lr 0.00561
Train [83][1010/3239]	Time 0.251 (0.654)	Data Time 0.001 (0.040)	Loss 2.2741 (2.3309)	Entropy 0.89748 (0.89883)	Top-1 acc 67.969 (68.268)	Top-5 acc 89.453 (86.522)	lr 0.00561
Train [83][1020/3239]	Time 0.229 (0.652)	Data Time 0.001 (0.040)	Loss 2.3379 (2.3309)	Entropy 0.89746 (0.89882)	Top-1 acc 71.094 (68.268)	Top-5 acc 88.672 (86.525)	lr 0.00561
Train [83][1030/3239]	Time 0.217 (0.651)	Data Time 0.001 (0.040)	Loss 2.4769 (2.3316)	Entropy 0.89740 (0.89880)	Top-1 acc 64.062 (68.244)	Top-5 acc 83.594 (86.512)	lr 0.00560
Train [83][1040/3239]	Time 0.247 (0.649)	Data Time 0.002 (0.039)	Loss 2.2704 (2.3315)	Entropy 0.89737 (0.89879)	Top-1 acc 69.531 (68.243)	Top-5 acc 88.281 (86.518)	lr 0.00560
Train [83][1050/3239]	Time 0.235 (0.648)	Data Time 0.001 (0.039)	Loss 2.4837 (2.3317)	Entropy 0.89730 (0.89878)	Top-1 acc 63.281 (68.241)	Top-5 acc 82.812 (86.515)	lr 0.00560
Train [83][1060/3239]	Time 0.225 (0.646)	Data Time 0.001 (0.039)	Loss 2.3005 (2.3316)	Entropy 0.89728 (0.89876)	Top-1 acc 65.234 (68.238)	Top-5 acc 87.891 (86.514)	lr 0.00560
Train [83][1070/3239]	Time 0.238 (0.644)	Data Time 0.002 (0.038)	Loss 2.3868 (2.3311)	Entropy 0.89730 (0.89875)	Top-1 acc 67.578 (68.258)	Top-5 acc 86.328 (86.525)	lr 0.00560
Train [83][1080/3239]	Time 0.325 (0.643)	Data Time 0.001 (0.038)	Loss 2.3146 (2.3307)	Entropy 0.89729 (0.89873)	Top-1 acc 69.531 (68.277)	Top-5 acc 86.328 (86.534)	lr 0.00560
Train [83][1090/3239]	Time 0.245 (0.641)	Data Time 0.001 (0.038)	Loss 2.3167 (2.3305)	Entropy 0.89715 (0.89872)	Top-1 acc 70.312 (68.279)	Top-5 acc 87.500 (86.540)	lr 0.00560
Train [83][1100/3239]	Time 0.260 (0.639)	Data Time 0.001 (0.037)	Loss 2.3726 (2.3307)	Entropy 0.89710 (0.89871)	Top-1 acc 64.844 (68.261)	Top-5 acc 87.500 (86.537)	lr 0.00560
Train [83][1110/3239]	Time 2.419 (0.638)	Data Time 0.001 (0.037)	Loss 2.4227 (2.3309)	Entropy 0.89710 (0.89869)	Top-1 acc 68.359 (68.260)	Top-5 acc 82.812 (86.529)	lr 0.00560
Train [83][1120/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.037)	Loss 2.2797 (2.3309)	Entropy 0.89706 (0.89868)	Top-1 acc 66.016 (68.265)	Top-5 acc 87.500 (86.533)	lr 0.00560
Train [83][1130/3239]	Time 0.250 (0.633)	Data Time 0.001 (0.036)	Loss 2.3002 (2.3311)	Entropy 0.89709 (0.89866)	Top-1 acc 68.750 (68.259)	Top-5 acc 86.719 (86.524)	lr 0.00560
Train [83][1140/3239]	Time 0.229 (0.631)	Data Time 0.001 (0.036)	Loss 2.4267 (2.3313)	Entropy 0.89706 (0.89865)	Top-1 acc 62.500 (68.251)	Top-5 acc 82.812 (86.517)	lr 0.00560
Train [83][1150/3239]	Time 0.218 (0.630)	Data Time 0.001 (0.036)	Loss 2.2698 (2.3315)	Entropy 0.89700 (0.89864)	Top-1 acc 68.359 (68.235)	Top-5 acc 87.500 (86.516)	lr 0.00559
Train [83][1160/3239]	Time 0.227 (0.629)	Data Time 0.001 (0.035)	Loss 2.3428 (2.3312)	Entropy 0.89694 (0.89862)	Top-1 acc 69.141 (68.247)	Top-5 acc 84.375 (86.524)	lr 0.00559
Train [83][1170/3239]	Time 0.338 (0.627)	Data Time 0.001 (0.035)	Loss 2.3110 (2.3313)	Entropy 0.89697 (0.89861)	Top-1 acc 67.969 (68.235)	Top-5 acc 87.891 (86.525)	lr 0.00559
Train [83][1180/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.035)	Loss 2.3747 (2.3315)	Entropy 0.89699 (0.89859)	Top-1 acc 66.406 (68.230)	Top-5 acc 89.453 (86.524)	lr 0.00559
Train [83][1190/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.035)	Loss 2.2960 (2.3313)	Entropy 0.89681 (0.89858)	Top-1 acc 69.531 (68.236)	Top-5 acc 86.719 (86.527)	lr 0.00559
Train [83][1200/3239]	Time 0.221 (0.624)	Data Time 0.001 (0.034)	Loss 2.3437 (2.3313)	Entropy 0.89684 (0.89856)	Top-1 acc 70.703 (68.235)	Top-5 acc 87.891 (86.530)	lr 0.00559
Train [83][1210/3239]	Time 0.236 (0.622)	Data Time 0.001 (0.034)	Loss 2.4554 (2.3318)	Entropy 0.89679 (0.89855)	Top-1 acc 65.234 (68.220)	Top-5 acc 83.203 (86.523)	lr 0.00559
Train [83][1220/3239]	Time 2.663 (0.621)	Data Time 0.001 (0.034)	Loss 2.5033 (2.3323)	Entropy 0.89679 (0.89854)	Top-1 acc 65.625 (68.208)	Top-5 acc 85.547 (86.513)	lr 0.00559
Train [83][1230/3239]	Time 0.246 (0.618)	Data Time 0.001 (0.034)	Loss 2.4531 (2.3324)	Entropy 0.89671 (0.89852)	Top-1 acc 64.453 (68.205)	Top-5 acc 85.156 (86.511)	lr 0.00559
Train [83][1240/3239]	Time 0.223 (0.617)	Data Time 0.001 (0.033)	Loss 2.2592 (2.3324)	Entropy 0.89670 (0.89851)	Top-1 acc 66.797 (68.200)	Top-5 acc 87.500 (86.513)	lr 0.00559
Train [83][1250/3239]	Time 0.229 (0.616)	Data Time 0.001 (0.033)	Loss 2.4993 (2.3328)	Entropy 0.89670 (0.89849)	Top-1 acc 63.281 (68.191)	Top-5 acc 83.984 (86.501)	lr 0.00559
Train [83][1260/3239]	Time 0.241 (0.615)	Data Time 0.001 (0.033)	Loss 2.3989 (2.3327)	Entropy 0.89671 (0.89848)	Top-1 acc 64.844 (68.192)	Top-5 acc 85.156 (86.504)	lr 0.00559
Train [83][1270/3239]	Time 0.245 (0.613)	Data Time 0.002 (0.033)	Loss 2.3059 (2.3332)	Entropy 0.89658 (0.89846)	Top-1 acc 69.141 (68.178)	Top-5 acc 88.672 (86.499)	lr 0.00558
Train [83][1280/3239]	Time 0.228 (0.612)	Data Time 0.001 (0.032)	Loss 2.2598 (2.3333)	Entropy 0.89661 (0.89845)	Top-1 acc 69.141 (68.176)	Top-5 acc 89.062 (86.490)	lr 0.00558
Train [83][1290/3239]	Time 0.245 (0.611)	Data Time 0.001 (0.032)	Loss 2.5874 (2.3332)	Entropy 0.89659 (0.89843)	Top-1 acc 60.938 (68.167)	Top-5 acc 80.078 (86.490)	lr 0.00558
Train [83][1300/3239]	Time 0.322 (0.610)	Data Time 0.001 (0.032)	Loss 2.2120 (2.3330)	Entropy 0.89657 (0.89842)	Top-1 acc 72.656 (68.174)	Top-5 acc 88.672 (86.498)	lr 0.00558
Train [83][1310/3239]	Time 0.223 (0.609)	Data Time 0.001 (0.032)	Loss 2.1700 (2.3327)	Entropy 0.89650 (0.89841)	Top-1 acc 69.531 (68.172)	Top-5 acc 89.453 (86.507)	lr 0.00558
Train [83][1320/3239]	Time 0.236 (0.608)	Data Time 0.001 (0.031)	Loss 2.2572 (2.3328)	Entropy 0.89648 (0.89839)	Top-1 acc 69.531 (68.173)	Top-5 acc 88.672 (86.505)	lr 0.00558
Train [83][1330/3239]	Time 51.468 (0.644)	Data Time 0.001 (0.031)	Loss 2.4050 (2.3329)	Entropy 0.89648 (0.89838)	Top-1 acc 65.625 (68.164)	Top-5 acc 83.594 (86.498)	lr 0.00558
Train [83][1340/3239]	Time 0.436 (0.642)	Data Time 0.093 (0.031)	Loss 2.2525 (2.3332)	Entropy 0.89647 (0.89836)	Top-1 acc 69.141 (68.158)	Top-5 acc 88.281 (86.495)	lr 0.00558
Train [83][1350/3239]	Time 0.254 (0.641)	Data Time 0.002 (0.031)	Loss 2.2813 (2.3330)	Entropy 0.89644 (0.89835)	Top-1 acc 69.141 (68.163)	Top-5 acc 85.938 (86.500)	lr 0.00558
Train [83][1360/3239]	Time 0.269 (0.640)	Data Time 0.001 (0.031)	Loss 2.2247 (2.3331)	Entropy 0.89634 (0.89833)	Top-1 acc 68.750 (68.158)	Top-5 acc 88.281 (86.497)	lr 0.00558
Train [83][1370/3239]	Time 0.241 (0.639)	Data Time 0.001 (0.030)	Loss 2.2476 (2.3330)	Entropy 0.89634 (0.89832)	Top-1 acc 72.266 (68.162)	Top-5 acc 88.672 (86.501)	lr 0.00558
Train [83][1380/3239]	Time 0.224 (0.638)	Data Time 0.001 (0.030)	Loss 2.1771 (2.3332)	Entropy 0.89627 (0.89830)	Top-1 acc 74.219 (68.153)	Top-5 acc 89.453 (86.502)	lr 0.00558
Train [83][1390/3239]	Time 0.231 (0.636)	Data Time 0.001 (0.030)	Loss 2.4924 (2.3337)	Entropy 0.89629 (0.89829)	Top-1 acc 67.188 (68.150)	Top-5 acc 80.859 (86.486)	lr 0.00557
Train [83][1400/3239]	Time 0.238 (0.635)	Data Time 0.001 (0.030)	Loss 2.4556 (2.3339)	Entropy 0.89626 (0.89828)	Top-1 acc 64.453 (68.146)	Top-5 acc 82.812 (86.482)	lr 0.00557
Train [83][1410/3239]	Time 0.222 (0.634)	Data Time 0.001 (0.030)	Loss 2.4616 (2.3342)	Entropy 0.89620 (0.89826)	Top-1 acc 65.234 (68.138)	Top-5 acc 87.109 (86.477)	lr 0.00557
Train [83][1420/3239]	Time 0.221 (0.633)	Data Time 0.001 (0.029)	Loss 2.3105 (2.3345)	Entropy 0.89620 (0.89825)	Top-1 acc 69.141 (68.125)	Top-5 acc 85.938 (86.471)	lr 0.00557
Train [83][1430/3239]	Time 0.307 (0.632)	Data Time 0.001 (0.029)	Loss 2.2255 (2.3341)	Entropy 0.89616 (0.89823)	Top-1 acc 71.484 (68.132)	Top-5 acc 85.547 (86.479)	lr 0.00557
Train [83][1440/3239]	Time 2.523 (0.631)	Data Time 0.001 (0.029)	Loss 2.2362 (2.3342)	Entropy 0.89616 (0.89822)	Top-1 acc 69.141 (68.126)	Top-5 acc 90.234 (86.477)	lr 0.00557
Train [83][1450/3239]	Time 0.233 (0.628)	Data Time 0.001 (0.029)	Loss 2.3008 (2.3342)	Entropy 0.89614 (0.89820)	Top-1 acc 68.359 (68.129)	Top-5 acc 86.719 (86.479)	lr 0.00557
Train [83][1460/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.029)	Loss 2.3997 (2.3342)	Entropy 0.89612 (0.89819)	Top-1 acc 66.797 (68.126)	Top-5 acc 85.938 (86.485)	lr 0.00557
Train [83][1470/3239]	Time 0.246 (0.626)	Data Time 0.001 (0.028)	Loss 2.3275 (2.3342)	Entropy 0.89610 (0.89818)	Top-1 acc 70.703 (68.127)	Top-5 acc 86.719 (86.485)	lr 0.00557
Train [83][1480/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.028)	Loss 2.2081 (2.3342)	Entropy 0.89610 (0.89816)	Top-1 acc 70.312 (68.124)	Top-5 acc 89.844 (86.486)	lr 0.00557
Train [83][1490/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.028)	Loss 2.5282 (2.3343)	Entropy 0.89607 (0.89815)	Top-1 acc 64.844 (68.120)	Top-5 acc 84.766 (86.481)	lr 0.00557
Train [83][1500/3239]	Time 0.240 (0.623)	Data Time 0.001 (0.028)	Loss 2.3017 (2.3342)	Entropy 0.89596 (0.89813)	Top-1 acc 69.922 (68.125)	Top-5 acc 86.719 (86.480)	lr 0.00557
Train [83][1510/3239]	Time 0.235 (0.622)	Data Time 0.001 (0.028)	Loss 2.2372 (2.3339)	Entropy 0.89593 (0.89812)	Top-1 acc 69.531 (68.130)	Top-5 acc 87.891 (86.481)	lr 0.00556
Train [83][1520/3239]	Time 0.310 (0.621)	Data Time 0.001 (0.028)	Loss 2.3598 (2.3341)	Entropy 0.89591 (0.89810)	Top-1 acc 66.016 (68.123)	Top-5 acc 85.938 (86.478)	lr 0.00556
Train [83][1530/3239]	Time 0.200 (0.620)	Data Time 0.001 (0.027)	Loss 2.3708 (2.3343)	Entropy 0.89590 (0.89809)	Top-1 acc 66.406 (68.116)	Top-5 acc 85.156 (86.474)	lr 0.00556
Train [83][1540/3239]	Time 0.259 (0.619)	Data Time 0.001 (0.027)	Loss 2.4315 (2.3344)	Entropy 0.89587 (0.89808)	Top-1 acc 65.625 (68.111)	Top-5 acc 83.594 (86.468)	lr 0.00556
Train [83][1550/3239]	Time 2.516 (0.618)	Data Time 0.001 (0.027)	Loss 2.4146 (2.3343)	Entropy 0.89587 (0.89806)	Top-1 acc 64.062 (68.116)	Top-5 acc 85.938 (86.473)	lr 0.00556
Train [83][1560/3239]	Time 0.282 (0.616)	Data Time 0.001 (0.027)	Loss 2.4544 (2.3343)	Entropy 0.89583 (0.89805)	Top-1 acc 64.453 (68.118)	Top-5 acc 84.375 (86.479)	lr 0.00556
Train [83][1570/3239]	Time 0.227 (0.615)	Data Time 0.001 (0.027)	Loss 2.3850 (2.3347)	Entropy 0.89583 (0.89803)	Top-1 acc 68.750 (68.110)	Top-5 acc 85.156 (86.472)	lr 0.00556
Train [83][1580/3239]	Time 0.222 (0.614)	Data Time 0.001 (0.027)	Loss 2.3087 (2.3345)	Entropy 0.89579 (0.89802)	Top-1 acc 68.359 (68.111)	Top-5 acc 87.891 (86.478)	lr 0.00556
Train [83][1590/3239]	Time 0.213 (0.613)	Data Time 0.001 (0.026)	Loss 2.4115 (2.3345)	Entropy 0.89578 (0.89801)	Top-1 acc 65.625 (68.112)	Top-5 acc 86.328 (86.477)	lr 0.00556
Train [83][1600/3239]	Time 0.224 (0.612)	Data Time 0.001 (0.026)	Loss 2.3424 (2.3343)	Entropy 0.89575 (0.89799)	Top-1 acc 69.922 (68.119)	Top-5 acc 87.891 (86.482)	lr 0.00556
Train [83][1610/3239]	Time 0.336 (0.611)	Data Time 0.001 (0.026)	Loss 2.1081 (2.3341)	Entropy 0.89572 (0.89798)	Top-1 acc 70.703 (68.120)	Top-5 acc 92.969 (86.488)	lr 0.00556
Train [83][1620/3239]	Time 0.214 (0.610)	Data Time 0.001 (0.026)	Loss 2.3170 (2.3339)	Entropy 0.89574 (0.89796)	Top-1 acc 66.406 (68.120)	Top-5 acc 87.109 (86.489)	lr 0.00556
Train [83][1630/3239]	Time 0.232 (0.610)	Data Time 0.001 (0.026)	Loss 2.3960 (2.3340)	Entropy 0.89574 (0.89795)	Top-1 acc 66.797 (68.117)	Top-5 acc 88.672 (86.490)	lr 0.00555
Train [83][1640/3239]	Time 0.217 (0.609)	Data Time 0.001 (0.026)	Loss 2.1501 (2.3342)	Entropy 0.89570 (0.89794)	Top-1 acc 71.094 (68.113)	Top-5 acc 89.844 (86.486)	lr 0.00555
Train [83][1650/3239]	Time 0.335 (0.608)	Data Time 0.001 (0.025)	Loss 2.3739 (2.3342)	Entropy 0.89560 (0.89792)	Top-1 acc 69.922 (68.114)	Top-5 acc 85.547 (86.490)	lr 0.00555
Train [83][1660/3239]	Time 2.510 (0.607)	Data Time 0.001 (0.025)	Loss 2.3674 (2.3345)	Entropy 0.89560 (0.89791)	Top-1 acc 67.969 (68.109)	Top-5 acc 87.891 (86.487)	lr 0.00555
Train [83][1670/3239]	Time 0.293 (0.605)	Data Time 0.001 (0.025)	Loss 2.5511 (2.3346)	Entropy 0.89548 (0.89789)	Top-1 acc 67.578 (68.110)	Top-5 acc 80.469 (86.482)	lr 0.00555
Train [83][1680/3239]	Time 0.229 (0.604)	Data Time 0.001 (0.025)	Loss 2.4267 (2.3344)	Entropy 0.89544 (0.89788)	Top-1 acc 67.188 (68.119)	Top-5 acc 84.766 (86.485)	lr 0.00555
Train [83][1690/3239]	Time 0.280 (0.604)	Data Time 0.002 (0.025)	Loss 2.1487 (2.3345)	Entropy 0.89544 (0.89787)	Top-1 acc 73.438 (68.116)	Top-5 acc 91.406 (86.488)	lr 0.00555
Train [83][1700/3239]	Time 0.224 (0.636)	Data Time 0.002 (0.025)	Loss 2.3548 (2.3345)	Entropy 0.89539 (0.89785)	Top-1 acc 66.797 (68.115)	Top-5 acc 87.891 (86.489)	lr 0.00555
Train [83][1710/3239]	Time 0.232 (0.635)	Data Time 0.002 (0.025)	Loss 2.3672 (2.3343)	Entropy 0.89533 (0.89784)	Top-1 acc 68.359 (68.120)	Top-5 acc 85.938 (86.490)	lr 0.00555
Train [83][1720/3239]	Time 0.241 (0.634)	Data Time 0.001 (0.024)	Loss 2.4945 (2.3345)	Entropy 0.89530 (0.89782)	Top-1 acc 65.234 (68.116)	Top-5 acc 84.766 (86.487)	lr 0.00555
Train [83][1730/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.024)	Loss 2.4231 (2.3348)	Entropy 0.89529 (0.89781)	Top-1 acc 66.406 (68.105)	Top-5 acc 85.547 (86.478)	lr 0.00555
Train [83][1740/3239]	Time 0.227 (0.632)	Data Time 0.001 (0.024)	Loss 2.2665 (2.3345)	Entropy 0.89529 (0.89779)	Top-1 acc 72.656 (68.112)	Top-5 acc 85.938 (86.483)	lr 0.00555
Train [83][1750/3239]	Time 0.218 (0.632)	Data Time 0.001 (0.024)	Loss 2.3047 (2.3345)	Entropy 0.89527 (0.89778)	Top-1 acc 71.484 (68.120)	Top-5 acc 85.938 (86.478)	lr 0.00554
Train [83][1760/3239]	Time 0.229 (0.631)	Data Time 0.001 (0.024)	Loss 2.4702 (2.3344)	Entropy 0.89527 (0.89776)	Top-1 acc 65.625 (68.125)	Top-5 acc 84.375 (86.478)	lr 0.00554
Train [83][1770/3239]	Time 2.534 (0.630)	Data Time 0.001 (0.024)	Loss 2.3175 (2.3344)	Entropy 0.89527 (0.89775)	Top-1 acc 68.750 (68.120)	Top-5 acc 88.281 (86.481)	lr 0.00554
Train [83][1780/3239]	Time 0.229 (0.628)	Data Time 0.001 (0.024)	Loss 2.3362 (2.3345)	Entropy 0.89513 (0.89774)	Top-1 acc 68.750 (68.121)	Top-5 acc 87.109 (86.482)	lr 0.00554
Train [83][1790/3239]	Time 0.221 (0.627)	Data Time 0.001 (0.024)	Loss 2.2830 (2.3343)	Entropy 0.89513 (0.89772)	Top-1 acc 67.578 (68.124)	Top-5 acc 86.328 (86.486)	lr 0.00554
Train [83][1800/3239]	Time 0.226 (0.626)	Data Time 0.001 (0.023)	Loss 2.2586 (2.3342)	Entropy 0.89499 (0.89771)	Top-1 acc 70.312 (68.124)	Top-5 acc 87.500 (86.491)	lr 0.00554
Train [83][1810/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.023)	Loss 2.1027 (2.3339)	Entropy 0.89498 (0.89769)	Top-1 acc 75.000 (68.135)	Top-5 acc 90.234 (86.497)	lr 0.00554
Train [83][1820/3239]	Time 0.347 (0.624)	Data Time 0.001 (0.023)	Loss 2.3783 (2.3338)	Entropy 0.89499 (0.89768)	Top-1 acc 67.969 (68.138)	Top-5 acc 88.281 (86.498)	lr 0.00554
Train [83][1830/3239]	Time 0.232 (0.624)	Data Time 0.001 (0.023)	Loss 2.2932 (2.3341)	Entropy 0.89489 (0.89766)	Top-1 acc 69.141 (68.134)	Top-5 acc 87.500 (86.490)	lr 0.00554
Train [83][1840/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.023)	Loss 2.2961 (2.3341)	Entropy 0.89462 (0.89764)	Top-1 acc 67.188 (68.131)	Top-5 acc 86.719 (86.487)	lr 0.00554
Train [83][1850/3239]	Time 0.229 (0.622)	Data Time 0.001 (0.023)	Loss 2.3905 (2.3344)	Entropy 0.89464 (0.89763)	Top-1 acc 66.406 (68.123)	Top-5 acc 85.938 (86.483)	lr 0.00554
Train [83][1860/3239]	Time 0.371 (0.621)	Data Time 0.001 (0.023)	Loss 2.2853 (2.3341)	Entropy 0.89460 (0.89761)	Top-1 acc 68.750 (68.128)	Top-5 acc 87.500 (86.487)	lr 0.00554
Train [83][1870/3239]	Time 0.222 (0.620)	Data Time 0.001 (0.023)	Loss 2.3215 (2.3345)	Entropy 0.89465 (0.89760)	Top-1 acc 67.578 (68.118)	Top-5 acc 87.500 (86.481)	lr 0.00553
Train [83][1880/3239]	Time 2.531 (0.620)	Data Time 0.001 (0.023)	Loss 2.3354 (2.3344)	Entropy 0.89465 (0.89758)	Top-1 acc 69.141 (68.118)	Top-5 acc 86.719 (86.484)	lr 0.00553
Train [83][1890/3239]	Time 0.243 (0.618)	Data Time 0.001 (0.022)	Loss 2.2358 (2.3345)	Entropy 0.89463 (0.89757)	Top-1 acc 73.438 (68.117)	Top-5 acc 89.844 (86.485)	lr 0.00553
Train [83][1900/3239]	Time 0.239 (0.617)	Data Time 0.001 (0.022)	Loss 2.3078 (2.3347)	Entropy 0.89458 (0.89755)	Top-1 acc 67.578 (68.106)	Top-5 acc 87.891 (86.481)	lr 0.00553
Train [83][1910/3239]	Time 0.232 (0.616)	Data Time 0.001 (0.022)	Loss 2.4968 (2.3348)	Entropy 0.89459 (0.89753)	Top-1 acc 67.578 (68.104)	Top-5 acc 82.812 (86.479)	lr 0.00553
Train [83][1920/3239]	Time 0.245 (0.615)	Data Time 0.001 (0.022)	Loss 2.4498 (2.3350)	Entropy 0.89459 (0.89752)	Top-1 acc 64.844 (68.101)	Top-5 acc 83.984 (86.476)	lr 0.00553
Train [83][1930/3239]	Time 0.239 (0.615)	Data Time 0.001 (0.022)	Loss 2.2506 (2.3351)	Entropy 0.89466 (0.89750)	Top-1 acc 67.969 (68.095)	Top-5 acc 88.281 (86.476)	lr 0.00553
Train [83][1940/3239]	Time 0.248 (0.614)	Data Time 0.001 (0.022)	Loss 2.3079 (2.3351)	Entropy 0.89463 (0.89749)	Top-1 acc 67.578 (68.094)	Top-5 acc 86.719 (86.476)	lr 0.00553
Train [83][1950/3239]	Time 0.237 (0.613)	Data Time 0.001 (0.022)	Loss 2.1572 (2.3347)	Entropy 0.89464 (0.89747)	Top-1 acc 73.438 (68.105)	Top-5 acc 89.062 (86.484)	lr 0.00553
Train [83][1960/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.022)	Loss 2.2751 (2.3346)	Entropy 0.89462 (0.89746)	Top-1 acc 68.359 (68.106)	Top-5 acc 90.234 (86.485)	lr 0.00553
Train [83][1970/3239]	Time 0.235 (0.612)	Data Time 0.002 (0.022)	Loss 2.3320 (2.3348)	Entropy 0.89462 (0.89745)	Top-1 acc 69.922 (68.103)	Top-5 acc 85.547 (86.483)	lr 0.00553
Train [83][1980/3239]	Time 0.265 (0.611)	Data Time 0.001 (0.021)	Loss 2.4864 (2.3350)	Entropy 0.89463 (0.89743)	Top-1 acc 64.844 (68.101)	Top-5 acc 83.594 (86.478)	lr 0.00553
Train [83][1990/3239]	Time 2.599 (0.610)	Data Time 0.002 (0.021)	Loss 2.2052 (2.3349)	Entropy 0.89463 (0.89742)	Top-1 acc 70.703 (68.102)	Top-5 acc 88.672 (86.481)	lr 0.00552
Train [83][2000/3239]	Time 0.238 (0.609)	Data Time 0.001 (0.021)	Loss 2.4470 (2.3354)	Entropy 0.89461 (0.89740)	Top-1 acc 64.844 (68.091)	Top-5 acc 84.766 (86.469)	lr 0.00552
Train [83][2010/3239]	Time 0.207 (0.608)	Data Time 0.001 (0.021)	Loss 2.4686 (2.3355)	Entropy 0.89460 (0.89739)	Top-1 acc 64.844 (68.091)	Top-5 acc 87.109 (86.466)	lr 0.00552
Train [83][2020/3239]	Time 0.231 (0.607)	Data Time 0.002 (0.021)	Loss 2.2582 (2.3354)	Entropy 0.89454 (0.89738)	Top-1 acc 69.531 (68.090)	Top-5 acc 87.500 (86.469)	lr 0.00552
Train [83][2030/3239]	Time 0.220 (0.607)	Data Time 0.001 (0.021)	Loss 2.4270 (2.3357)	Entropy 0.89457 (0.89736)	Top-1 acc 66.797 (68.085)	Top-5 acc 84.375 (86.462)	lr 0.00552
Train [83][2040/3239]	Time 0.222 (0.606)	Data Time 0.001 (0.021)	Loss 2.3289 (2.3356)	Entropy 0.89464 (0.89735)	Top-1 acc 66.016 (68.089)	Top-5 acc 87.500 (86.463)	lr 0.00552
Train [83][2050/3239]	Time 0.233 (0.605)	Data Time 0.001 (0.021)	Loss 2.2752 (2.3354)	Entropy 0.89460 (0.89733)	Top-1 acc 67.969 (68.096)	Top-5 acc 85.156 (86.462)	lr 0.00552
Train [83][2060/3239]	Time 0.262 (0.629)	Data Time 0.003 (0.021)	Loss 2.2957 (2.3356)	Entropy 0.89467 (0.89732)	Top-1 acc 68.750 (68.089)	Top-5 acc 88.281 (86.458)	lr 0.00552
Train [83][2070/3239]	Time 0.220 (0.629)	Data Time 0.002 (0.021)	Loss 2.2428 (2.3356)	Entropy 0.89457 (0.89731)	Top-1 acc 74.219 (68.092)	Top-5 acc 85.938 (86.456)	lr 0.00552
Train [83][2080/3239]	Time 0.234 (0.628)	Data Time 0.001 (0.021)	Loss 2.2629 (2.3358)	Entropy 0.89455 (0.89730)	Top-1 acc 71.875 (68.092)	Top-5 acc 87.109 (86.454)	lr 0.00552
Train [83][2090/3239]	Time 0.256 (0.627)	Data Time 0.001 (0.020)	Loss 2.6715 (2.3361)	Entropy 0.89453 (0.89728)	Top-1 acc 59.375 (68.083)	Top-5 acc 77.344 (86.446)	lr 0.00552
Train [83][2100/3239]	Time 2.564 (0.627)	Data Time 0.002 (0.020)	Loss 2.3331 (2.3362)	Entropy 0.89453 (0.89727)	Top-1 acc 67.188 (68.085)	Top-5 acc 86.719 (86.448)	lr 0.00552
Train [83][2110/3239]	Time 0.227 (0.625)	Data Time 0.001 (0.020)	Loss 2.3320 (2.3364)	Entropy 0.89446 (0.89726)	Top-1 acc 67.578 (68.083)	Top-5 acc 85.938 (86.443)	lr 0.00551
Train [83][2120/3239]	Time 0.315 (0.624)	Data Time 0.001 (0.020)	Loss 2.2386 (2.3363)	Entropy 0.89449 (0.89724)	Top-1 acc 69.531 (68.084)	Top-5 acc 89.453 (86.445)	lr 0.00551
Train [83][2130/3239]	Time 0.228 (0.623)	Data Time 0.001 (0.020)	Loss 2.4060 (2.3364)	Entropy 0.89450 (0.89723)	Top-1 acc 64.844 (68.081)	Top-5 acc 83.594 (86.445)	lr 0.00551
Train [83][2140/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.020)	Loss 2.3807 (2.3362)	Entropy 0.89453 (0.89722)	Top-1 acc 66.016 (68.086)	Top-5 acc 85.938 (86.448)	lr 0.00551
Train [83][2150/3239]	Time 0.229 (0.622)	Data Time 0.001 (0.020)	Loss 2.3046 (2.3362)	Entropy 0.89453 (0.89720)	Top-1 acc 68.750 (68.086)	Top-5 acc 84.375 (86.446)	lr 0.00551
Train [83][2160/3239]	Time 0.335 (0.621)	Data Time 0.001 (0.020)	Loss 2.3576 (2.3362)	Entropy 0.89451 (0.89719)	Top-1 acc 66.016 (68.087)	Top-5 acc 87.109 (86.449)	lr 0.00551
Train [83][2170/3239]	Time 0.225 (0.621)	Data Time 0.001 (0.020)	Loss 2.3740 (2.3363)	Entropy 0.89451 (0.89718)	Top-1 acc 64.844 (68.084)	Top-5 acc 84.375 (86.448)	lr 0.00551
Train [83][2180/3239]	Time 0.222 (0.620)	Data Time 0.001 (0.020)	Loss 2.1985 (2.3364)	Entropy 0.89501 (0.89717)	Top-1 acc 67.969 (68.080)	Top-5 acc 88.672 (86.445)	lr 0.00551
Train [83][2190/3239]	Time 0.232 (0.619)	Data Time 0.001 (0.020)	Loss 2.3556 (2.3365)	Entropy 0.89501 (0.89716)	Top-1 acc 67.578 (68.076)	Top-5 acc 85.938 (86.444)	lr 0.00551
Train [83][2200/3239]	Time 0.355 (0.619)	Data Time 0.001 (0.020)	Loss 2.3200 (2.3366)	Entropy 0.89496 (0.89715)	Top-1 acc 68.750 (68.077)	Top-5 acc 87.891 (86.440)	lr 0.00551
Train [83][2210/3239]	Time 2.607 (0.618)	Data Time 0.001 (0.019)	Loss 2.2776 (2.3365)	Entropy 0.89496 (0.89714)	Top-1 acc 69.531 (68.081)	Top-5 acc 88.281 (86.443)	lr 0.00551
Train [83][2220/3239]	Time 0.221 (0.616)	Data Time 0.001 (0.019)	Loss 2.1665 (2.3367)	Entropy 0.89493 (0.89713)	Top-1 acc 69.531 (68.076)	Top-5 acc 91.016 (86.441)	lr 0.00551
Train [83][2230/3239]	Time 0.243 (0.616)	Data Time 0.001 (0.019)	Loss 2.4610 (2.3365)	Entropy 0.89488 (0.89712)	Top-1 acc 63.281 (68.075)	Top-5 acc 82.812 (86.444)	lr 0.00550
Train [83][2240/3239]	Time 0.232 (0.615)	Data Time 0.001 (0.019)	Loss 2.5126 (2.3365)	Entropy 0.89489 (0.89711)	Top-1 acc 63.672 (68.079)	Top-5 acc 82.422 (86.445)	lr 0.00550
Train [83][2250/3239]	Time 0.226 (0.615)	Data Time 0.001 (0.019)	Loss 2.2502 (2.3365)	Entropy 0.89483 (0.89710)	Top-1 acc 68.750 (68.079)	Top-5 acc 90.234 (86.448)	lr 0.00550
Train [83][2260/3239]	Time 0.258 (0.614)	Data Time 0.001 (0.019)	Loss 2.2968 (2.3364)	Entropy 0.89480 (0.89709)	Top-1 acc 69.141 (68.081)	Top-5 acc 87.109 (86.448)	lr 0.00550
Train [83][2270/3239]	Time 0.219 (0.613)	Data Time 0.001 (0.019)	Loss 2.2371 (2.3363)	Entropy 0.89473 (0.89708)	Top-1 acc 67.578 (68.081)	Top-5 acc 87.891 (86.452)	lr 0.00550
Train [83][2280/3239]	Time 0.229 (0.613)	Data Time 0.001 (0.019)	Loss 2.5290 (2.3366)	Entropy 0.89475 (0.89707)	Top-1 acc 65.625 (68.075)	Top-5 acc 83.594 (86.448)	lr 0.00550
Train [83][2290/3239]	Time 0.222 (0.612)	Data Time 0.001 (0.019)	Loss 2.5314 (2.3367)	Entropy 0.89476 (0.89706)	Top-1 acc 60.938 (68.070)	Top-5 acc 84.766 (86.447)	lr 0.00550
Train [83][2300/3239]	Time 0.231 (0.612)	Data Time 0.001 (0.019)	Loss 2.3895 (2.3365)	Entropy 0.89475 (0.89705)	Top-1 acc 69.922 (68.077)	Top-5 acc 86.719 (86.451)	lr 0.00550
Train [83][2310/3239]	Time 0.224 (0.611)	Data Time 0.001 (0.019)	Loss 2.2182 (2.3364)	Entropy 0.89473 (0.89704)	Top-1 acc 67.188 (68.074)	Top-5 acc 90.234 (86.452)	lr 0.00550
Train [83][2320/3239]	Time 2.429 (0.610)	Data Time 0.001 (0.019)	Loss 2.4147 (2.3362)	Entropy 0.89473 (0.89703)	Top-1 acc 67.969 (68.075)	Top-5 acc 85.156 (86.454)	lr 0.00550
Train [83][2330/3239]	Time 0.339 (0.609)	Data Time 0.001 (0.018)	Loss 2.3793 (2.3362)	Entropy 0.89465 (0.89702)	Top-1 acc 66.016 (68.074)	Top-5 acc 85.938 (86.455)	lr 0.00550
Train [83][2340/3239]	Time 0.245 (0.608)	Data Time 0.001 (0.018)	Loss 2.3544 (2.3362)	Entropy 0.89463 (0.89701)	Top-1 acc 66.797 (68.074)	Top-5 acc 85.938 (86.453)	lr 0.00550
Train [83][2350/3239]	Time 0.222 (0.608)	Data Time 0.001 (0.018)	Loss 2.4971 (2.3364)	Entropy 0.89465 (0.89700)	Top-1 acc 62.500 (68.065)	Top-5 acc 83.203 (86.449)	lr 0.00549
Train [83][2360/3239]	Time 0.217 (0.607)	Data Time 0.001 (0.018)	Loss 2.4589 (2.3364)	Entropy 0.89464 (0.89699)	Top-1 acc 64.453 (68.068)	Top-5 acc 84.375 (86.449)	lr 0.00549
Train [83][2370/3239]	Time 0.231 (0.607)	Data Time 0.001 (0.018)	Loss 2.2258 (2.3365)	Entropy 0.89454 (0.89698)	Top-1 acc 70.312 (68.064)	Top-5 acc 89.453 (86.444)	lr 0.00549
Train [83][2380/3239]	Time 0.212 (0.606)	Data Time 0.001 (0.018)	Loss 2.3367 (2.3367)	Entropy 0.89453 (0.89697)	Top-1 acc 68.359 (68.062)	Top-5 acc 85.156 (86.442)	lr 0.00549
Train [83][2390/3239]	Time 0.225 (0.606)	Data Time 0.001 (0.018)	Loss 2.3614 (2.3368)	Entropy 0.89447 (0.89696)	Top-1 acc 64.844 (68.057)	Top-5 acc 86.328 (86.439)	lr 0.00549
Train [83][2400/3239]	Time 0.262 (0.605)	Data Time 0.001 (0.018)	Loss 2.3213 (2.3367)	Entropy 0.89437 (0.89695)	Top-1 acc 66.797 (68.057)	Top-5 acc 86.328 (86.439)	lr 0.00549
Train [83][2410/3239]	Time 0.228 (0.605)	Data Time 0.001 (0.018)	Loss 2.4229 (2.3369)	Entropy 0.89432 (0.89694)	Top-1 acc 69.531 (68.053)	Top-5 acc 84.375 (86.436)	lr 0.00549
Train [83][2420/3239]	Time 0.248 (0.627)	Data Time 0.003 (0.018)	Loss 2.3277 (2.3370)	Entropy 0.89425 (0.89693)	Top-1 acc 67.578 (68.050)	Top-5 acc 86.719 (86.436)	lr 0.00549
Train [83][2430/3239]	Time 2.652 (0.626)	Data Time 0.002 (0.018)	Loss 2.2557 (2.3369)	Entropy 0.89425 (0.89691)	Top-1 acc 70.703 (68.049)	Top-5 acc 86.719 (86.437)	lr 0.00549
Train [83][2440/3239]	Time 0.269 (0.625)	Data Time 0.002 (0.018)	Loss 2.2563 (2.3369)	Entropy 0.89423 (0.89690)	Top-1 acc 68.359 (68.051)	Top-5 acc 90.234 (86.441)	lr 0.00549
Train [83][2450/3239]	Time 0.271 (0.624)	Data Time 0.002 (0.018)	Loss 2.5692 (2.3372)	Entropy 0.89411 (0.89689)	Top-1 acc 60.938 (68.042)	Top-5 acc 82.422 (86.436)	lr 0.00549
Train [83][2460/3239]	Time 0.240 (0.623)	Data Time 0.001 (0.018)	Loss 2.4730 (2.3374)	Entropy 0.89406 (0.89688)	Top-1 acc 64.844 (68.034)	Top-5 acc 83.984 (86.433)	lr 0.00548
Train [83][2470/3239]	Time 0.292 (0.623)	Data Time 0.002 (0.018)	Loss 2.2813 (2.3374)	Entropy 0.89409 (0.89687)	Top-1 acc 68.359 (68.033)	Top-5 acc 87.891 (86.433)	lr 0.00548
Train [83][2480/3239]	Time 0.256 (0.622)	Data Time 0.002 (0.017)	Loss 2.4422 (2.3373)	Entropy 0.89413 (0.89686)	Top-1 acc 61.719 (68.034)	Top-5 acc 88.281 (86.435)	lr 0.00548
Train [83][2490/3239]	Time 0.276 (0.622)	Data Time 0.001 (0.017)	Loss 2.3942 (2.3372)	Entropy 0.89410 (0.89685)	Top-1 acc 67.969 (68.037)	Top-5 acc 87.891 (86.436)	lr 0.00548
Train [83][2500/3239]	Time 0.233 (0.621)	Data Time 0.006 (0.017)	Loss 2.2729 (2.3370)	Entropy 0.89408 (0.89684)	Top-1 acc 67.188 (68.040)	Top-5 acc 89.062 (86.440)	lr 0.00548
Train [83][2510/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.017)	Loss 2.3230 (2.3371)	Entropy 0.89405 (0.89682)	Top-1 acc 66.016 (68.035)	Top-5 acc 87.891 (86.442)	lr 0.00548
Train [83][2520/3239]	Time 0.256 (0.620)	Data Time 0.001 (0.017)	Loss 2.3139 (2.3373)	Entropy 0.89405 (0.89681)	Top-1 acc 68.750 (68.032)	Top-5 acc 85.547 (86.439)	lr 0.00548
Train [83][2530/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.017)	Loss 2.4869 (2.3374)	Entropy 0.89404 (0.89680)	Top-1 acc 65.625 (68.029)	Top-5 acc 84.375 (86.438)	lr 0.00548
Train [83][2540/3239]	Time 2.616 (0.619)	Data Time 0.001 (0.017)	Loss 2.2936 (2.3374)	Entropy 0.89404 (0.89679)	Top-1 acc 68.359 (68.026)	Top-5 acc 87.109 (86.441)	lr 0.00548
Train [83][2550/3239]	Time 0.259 (0.618)	Data Time 0.002 (0.017)	Loss 2.2895 (2.3375)	Entropy 0.89403 (0.89678)	Top-1 acc 69.531 (68.024)	Top-5 acc 85.938 (86.440)	lr 0.00548
Train [83][2560/3239]	Time 0.237 (0.617)	Data Time 0.001 (0.017)	Loss 2.3857 (2.3376)	Entropy 0.89401 (0.89677)	Top-1 acc 62.500 (68.020)	Top-5 acc 85.547 (86.439)	lr 0.00548
Train [83][2570/3239]	Time 0.256 (0.617)	Data Time 0.001 (0.017)	Loss 2.3938 (2.3377)	Entropy 0.89401 (0.89676)	Top-1 acc 66.016 (68.020)	Top-5 acc 85.156 (86.437)	lr 0.00548
Train [83][2580/3239]	Time 0.226 (0.616)	Data Time 0.001 (0.017)	Loss 2.3098 (2.3383)	Entropy 0.89401 (0.89675)	Top-1 acc 67.188 (68.006)	Top-5 acc 85.938 (86.426)	lr 0.00547
Train [83][2590/3239]	Time 0.323 (0.616)	Data Time 0.001 (0.017)	Loss 2.2263 (2.3383)	Entropy 0.89395 (0.89674)	Top-1 acc 69.922 (68.008)	Top-5 acc 88.281 (86.425)	lr 0.00547
Train [83][2600/3239]	Time 0.229 (0.615)	Data Time 0.001 (0.017)	Loss 2.2729 (2.3385)	Entropy 0.89395 (0.89673)	Top-1 acc 69.922 (68.000)	Top-5 acc 87.891 (86.422)	lr 0.00547
Train [83][2610/3239]	Time 0.219 (0.614)	Data Time 0.001 (0.017)	Loss 2.5180 (2.3384)	Entropy 0.89389 (0.89672)	Top-1 acc 64.062 (68.004)	Top-5 acc 82.812 (86.422)	lr 0.00547
Train [83][2620/3239]	Time 0.241 (0.614)	Data Time 0.001 (0.017)	Loss 2.3246 (2.3385)	Entropy 0.89388 (0.89671)	Top-1 acc 67.188 (68.001)	Top-5 acc 84.375 (86.420)	lr 0.00547
Train [83][2630/3239]	Time 0.263 (0.614)	Data Time 0.001 (0.017)	Loss 2.2846 (2.3386)	Entropy 0.89382 (0.89670)	Top-1 acc 69.531 (67.998)	Top-5 acc 89.062 (86.423)	lr 0.00547
Train [83][2640/3239]	Time 0.224 (0.613)	Data Time 0.001 (0.017)	Loss 2.4127 (2.3384)	Entropy 0.89380 (0.89668)	Top-1 acc 65.625 (68.001)	Top-5 acc 84.375 (86.426)	lr 0.00547
Train [83][2650/3239]	Time 0.225 (0.612)	Data Time 0.001 (0.016)	Loss 2.3346 (2.3382)	Entropy 0.89374 (0.89667)	Top-1 acc 69.531 (68.003)	Top-5 acc 88.281 (86.430)	lr 0.00547
Train [83][2660/3239]	Time 0.242 (0.612)	Data Time 0.001 (0.016)	Loss 2.4519 (2.3382)	Entropy 0.89364 (0.89666)	Top-1 acc 65.234 (68.000)	Top-5 acc 83.203 (86.429)	lr 0.00547
Train [83][2670/3239]	Time 0.248 (0.611)	Data Time 0.001 (0.016)	Loss 2.3160 (2.3382)	Entropy 0.89353 (0.89665)	Top-1 acc 66.406 (68.002)	Top-5 acc 86.719 (86.429)	lr 0.00547
Train [83][2680/3239]	Time 0.215 (0.611)	Data Time 0.001 (0.016)	Loss 2.1850 (2.3383)	Entropy 0.89353 (0.89664)	Top-1 acc 69.531 (68.003)	Top-5 acc 89.844 (86.430)	lr 0.00547
Train [83][2690/3239]	Time 0.260 (0.610)	Data Time 0.001 (0.016)	Loss 2.3802 (2.3382)	Entropy 0.89347 (0.89663)	Top-1 acc 64.844 (68.005)	Top-5 acc 84.375 (86.432)	lr 0.00547
Train [83][2700/3239]	Time 0.221 (0.610)	Data Time 0.001 (0.016)	Loss 2.3021 (2.3382)	Entropy 0.89347 (0.89662)	Top-1 acc 73.438 (68.000)	Top-5 acc 86.328 (86.433)	lr 0.00546
Train [83][2710/3239]	Time 0.254 (0.609)	Data Time 0.001 (0.016)	Loss 2.2472 (2.3379)	Entropy 0.89347 (0.89660)	Top-1 acc 67.969 (68.007)	Top-5 acc 86.719 (86.437)	lr 0.00546
Train [83][2720/3239]	Time 0.346 (0.609)	Data Time 0.001 (0.016)	Loss 2.2816 (2.3377)	Entropy 0.89342 (0.89659)	Top-1 acc 70.703 (68.012)	Top-5 acc 88.281 (86.444)	lr 0.00546
Train [83][2730/3239]	Time 0.228 (0.608)	Data Time 0.001 (0.016)	Loss 2.3939 (2.3377)	Entropy 0.89341 (0.89658)	Top-1 acc 69.141 (68.013)	Top-5 acc 87.500 (86.442)	lr 0.00546
Train [83][2740/3239]	Time 0.278 (0.608)	Data Time 0.001 (0.016)	Loss 2.3661 (2.3377)	Entropy 0.89338 (0.89657)	Top-1 acc 70.312 (68.011)	Top-5 acc 83.984 (86.441)	lr 0.00546
Train [83][2750/3239]	Time 0.234 (0.607)	Data Time 0.001 (0.016)	Loss 2.3969 (2.3377)	Entropy 0.89333 (0.89656)	Top-1 acc 66.406 (68.010)	Top-5 acc 84.375 (86.442)	lr 0.00546
Train [83][2760/3239]	Time 0.341 (0.607)	Data Time 0.002 (0.016)	Loss 2.4678 (2.3379)	Entropy 0.89328 (0.89655)	Top-1 acc 63.672 (68.004)	Top-5 acc 83.984 (86.438)	lr 0.00546
Train [83][2770/3239]	Time 0.244 (0.626)	Data Time 0.003 (0.016)	Loss 2.5599 (2.3381)	Entropy 0.89322 (0.89653)	Top-1 acc 64.453 (67.997)	Top-5 acc 78.906 (86.435)	lr 0.00546
Train [83][2780/3239]	Time 0.242 (0.626)	Data Time 0.002 (0.016)	Loss 2.4364 (2.3381)	Entropy 0.89324 (0.89652)	Top-1 acc 63.672 (67.999)	Top-5 acc 83.203 (86.435)	lr 0.00546
Train [83][2790/3239]	Time 0.241 (0.625)	Data Time 0.001 (0.016)	Loss 2.3050 (2.3382)	Entropy 0.89324 (0.89651)	Top-1 acc 66.406 (67.994)	Top-5 acc 88.281 (86.432)	lr 0.00546
Train [83][2800/3239]	Time 0.231 (0.625)	Data Time 0.001 (0.016)	Loss 2.4543 (2.3384)	Entropy 0.89318 (0.89650)	Top-1 acc 65.234 (67.985)	Top-5 acc 85.938 (86.431)	lr 0.00546
Train [83][2810/3239]	Time 0.228 (0.624)	Data Time 0.001 (0.016)	Loss 2.1949 (2.3383)	Entropy 0.89317 (0.89649)	Top-1 acc 70.703 (67.987)	Top-5 acc 88.672 (86.435)	lr 0.00546
Train [83][2820/3239]	Time 0.235 (0.624)	Data Time 0.001 (0.016)	Loss 2.3389 (2.3384)	Entropy 0.89315 (0.89648)	Top-1 acc 67.578 (67.984)	Top-5 acc 86.719 (86.434)	lr 0.00545
Train [83][2830/3239]	Time 0.227 (0.623)	Data Time 0.001 (0.016)	Loss 2.3407 (2.3386)	Entropy 0.89316 (0.89646)	Top-1 acc 71.875 (67.980)	Top-5 acc 85.547 (86.431)	lr 0.00545
Train [83][2840/3239]	Time 0.229 (0.623)	Data Time 0.002 (0.015)	Loss 2.2601 (2.3386)	Entropy 0.89312 (0.89645)	Top-1 acc 71.484 (67.980)	Top-5 acc 87.891 (86.432)	lr 0.00545
Train [83][2850/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.015)	Loss 2.2584 (2.3386)	Entropy 0.89317 (0.89644)	Top-1 acc 70.312 (67.980)	Top-5 acc 88.672 (86.433)	lr 0.00545
Train [83][2860/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.015)	Loss 2.2898 (2.3384)	Entropy 0.89312 (0.89643)	Top-1 acc 69.922 (67.984)	Top-5 acc 86.719 (86.435)	lr 0.00545
Train [83][2870/3239]	Time 0.263 (0.621)	Data Time 0.001 (0.015)	Loss 2.2945 (2.3384)	Entropy 0.89311 (0.89642)	Top-1 acc 72.266 (67.985)	Top-5 acc 87.500 (86.435)	lr 0.00545
Train [83][2880/3239]	Time 0.243 (0.621)	Data Time 0.001 (0.015)	Loss 2.3966 (2.3385)	Entropy 0.89306 (0.89641)	Top-1 acc 69.922 (67.982)	Top-5 acc 84.766 (86.431)	lr 0.00545
Train [83][2890/3239]	Time 0.382 (0.621)	Data Time 0.002 (0.015)	Loss 2.4097 (2.3386)	Entropy 0.89298 (0.89639)	Top-1 acc 68.750 (67.979)	Top-5 acc 83.594 (86.432)	lr 0.00545
Train [83][2900/3239]	Time 0.231 (0.620)	Data Time 0.001 (0.015)	Loss 2.3568 (2.3385)	Entropy 0.89294 (0.89638)	Top-1 acc 69.531 (67.980)	Top-5 acc 85.156 (86.430)	lr 0.00545
Train [83][2910/3239]	Time 0.282 (0.620)	Data Time 0.001 (0.015)	Loss 2.2255 (2.3385)	Entropy 0.89294 (0.89637)	Top-1 acc 72.266 (67.979)	Top-5 acc 87.500 (86.429)	lr 0.00545
Train [83][2920/3239]	Time 0.285 (0.619)	Data Time 0.001 (0.015)	Loss 2.3055 (2.3386)	Entropy 0.89288 (0.89636)	Top-1 acc 67.969 (67.975)	Top-5 acc 85.156 (86.425)	lr 0.00545
Train [83][2930/3239]	Time 0.223 (0.619)	Data Time 0.001 (0.015)	Loss 2.3919 (2.3385)	Entropy 0.89289 (0.89635)	Top-1 acc 66.797 (67.975)	Top-5 acc 85.938 (86.428)	lr 0.00545
Train [83][2940/3239]	Time 0.234 (0.618)	Data Time 0.001 (0.015)	Loss 2.5653 (2.3385)	Entropy 0.89292 (0.89634)	Top-1 acc 64.453 (67.978)	Top-5 acc 80.859 (86.431)	lr 0.00544
Train [83][2950/3239]	Time 0.223 (0.618)	Data Time 0.001 (0.015)	Loss 2.3244 (2.3385)	Entropy 0.89285 (0.89632)	Top-1 acc 67.188 (67.977)	Top-5 acc 87.891 (86.431)	lr 0.00544
Train [83][2960/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.015)	Loss 2.3858 (2.3385)	Entropy 0.89267 (0.89631)	Top-1 acc 66.797 (67.979)	Top-5 acc 85.156 (86.429)	lr 0.00544
Train [83][2970/3239]	Time 0.296 (0.617)	Data Time 0.001 (0.015)	Loss 2.2685 (2.3385)	Entropy 0.89259 (0.89630)	Top-1 acc 69.531 (67.978)	Top-5 acc 88.281 (86.429)	lr 0.00544
Train [83][2980/3239]	Time 0.222 (0.616)	Data Time 0.002 (0.015)	Loss 2.3076 (2.3386)	Entropy 0.89260 (0.89629)	Top-1 acc 69.531 (67.976)	Top-5 acc 87.500 (86.432)	lr 0.00544
Train [83][2990/3239]	Time 0.234 (0.616)	Data Time 0.002 (0.015)	Loss 2.3460 (2.3386)	Entropy 0.89264 (0.89628)	Top-1 acc 63.672 (67.972)	Top-5 acc 86.328 (86.431)	lr 0.00544
Train [83][3000/3239]	Time 0.216 (0.615)	Data Time 0.001 (0.015)	Loss 2.3020 (2.3389)	Entropy 0.89255 (0.89626)	Top-1 acc 70.312 (67.968)	Top-5 acc 85.156 (86.425)	lr 0.00544
Train [83][3010/3239]	Time 0.219 (0.615)	Data Time 0.001 (0.015)	Loss 2.2029 (2.3389)	Entropy 0.89255 (0.89625)	Top-1 acc 73.438 (67.967)	Top-5 acc 87.500 (86.425)	lr 0.00544
Train [83][3020/3239]	Time 0.344 (0.615)	Data Time 0.001 (0.015)	Loss 2.3924 (2.3389)	Entropy 0.89256 (0.89624)	Top-1 acc 69.531 (67.965)	Top-5 acc 85.938 (86.424)	lr 0.00544
Train [83][3030/3239]	Time 0.249 (0.614)	Data Time 0.001 (0.015)	Loss 2.5298 (2.3390)	Entropy 0.89254 (0.89623)	Top-1 acc 62.891 (67.965)	Top-5 acc 82.422 (86.421)	lr 0.00544
Train [83][3040/3239]	Time 0.257 (0.614)	Data Time 0.001 (0.015)	Loss 2.5672 (2.3392)	Entropy 0.89254 (0.89621)	Top-1 acc 62.500 (67.962)	Top-5 acc 80.859 (86.417)	lr 0.00544
Train [83][3050/3239]	Time 0.229 (0.613)	Data Time 0.001 (0.015)	Loss 2.4378 (2.3391)	Entropy 0.89252 (0.89620)	Top-1 acc 63.281 (67.964)	Top-5 acc 83.594 (86.420)	lr 0.00544
Train [83][3060/3239]	Time 0.249 (0.613)	Data Time 0.001 (0.014)	Loss 2.3614 (2.3394)	Entropy 0.89250 (0.89619)	Top-1 acc 68.359 (67.958)	Top-5 acc 87.109 (86.417)	lr 0.00543
Train [83][3070/3239]	Time 0.241 (0.612)	Data Time 0.001 (0.014)	Loss 2.4283 (2.3395)	Entropy 0.89241 (0.89618)	Top-1 acc 66.797 (67.955)	Top-5 acc 83.203 (86.412)	lr 0.00543
Train [83][3080/3239]	Time 0.229 (0.612)	Data Time 0.001 (0.014)	Loss 2.2246 (2.3395)	Entropy 0.89241 (0.89617)	Top-1 acc 70.703 (67.955)	Top-5 acc 89.062 (86.413)	lr 0.00543
Train [83][3090/3239]	Time 0.252 (0.611)	Data Time 0.001 (0.014)	Loss 2.2891 (2.3395)	Entropy 0.89240 (0.89615)	Top-1 acc 70.703 (67.959)	Top-5 acc 88.281 (86.412)	lr 0.00543
Train [83][3100/3239]	Time 0.300 (0.627)	Data Time 0.004 (0.014)	Loss 2.4312 (2.3395)	Entropy 0.89239 (0.89614)	Top-1 acc 66.016 (67.961)	Top-5 acc 85.547 (86.412)	lr 0.00543
Train [83][3110/3239]	Time 0.323 (0.627)	Data Time 0.002 (0.014)	Loss 2.4528 (2.3397)	Entropy 0.89256 (0.89613)	Top-1 acc 64.453 (67.957)	Top-5 acc 82.812 (86.407)	lr 0.00543
Train [83][3120/3239]	Time 0.281 (0.626)	Data Time 0.002 (0.014)	Loss 2.5176 (2.3398)	Entropy 0.89246 (0.89612)	Top-1 acc 61.328 (67.954)	Top-5 acc 83.594 (86.405)	lr 0.00543
Train [83][3130/3239]	Time 0.252 (0.626)	Data Time 0.002 (0.014)	Loss 2.4673 (2.3400)	Entropy 0.89242 (0.89611)	Top-1 acc 62.500 (67.948)	Top-5 acc 83.594 (86.401)	lr 0.00543
Train [83][3140/3239]	Time 0.245 (0.625)	Data Time 0.001 (0.014)	Loss 2.2445 (2.3398)	Entropy 0.89238 (0.89609)	Top-1 acc 67.578 (67.950)	Top-5 acc 89.844 (86.405)	lr 0.00543
Train [83][3150/3239]	Time 0.260 (0.625)	Data Time 0.002 (0.014)	Loss 2.3384 (2.3400)	Entropy 0.89233 (0.89608)	Top-1 acc 67.188 (67.947)	Top-5 acc 86.719 (86.403)	lr 0.00543
Train [83][3160/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.014)	Loss 2.1816 (2.3399)	Entropy 0.89223 (0.89607)	Top-1 acc 69.531 (67.948)	Top-5 acc 90.625 (86.404)	lr 0.00543
Train [83][3170/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.014)	Loss 2.3163 (2.3400)	Entropy 0.89216 (0.89606)	Top-1 acc 67.188 (67.942)	Top-5 acc 85.938 (86.404)	lr 0.00543
Train [83][3180/3239]	Time 0.231 (0.623)	Data Time 0.000 (0.014)	Loss 2.5714 (2.3401)	Entropy 0.89211 (0.89605)	Top-1 acc 62.109 (67.935)	Top-5 acc 81.641 (86.401)	lr 0.00542
Train [83][3190/3239]	Time 0.223 (0.623)	Data Time 0.000 (0.014)	Loss 2.3012 (2.3400)	Entropy 0.89206 (0.89603)	Top-1 acc 67.969 (67.941)	Top-5 acc 86.328 (86.402)	lr 0.00542
Train [83][3200/3239]	Time 0.227 (0.622)	Data Time 0.000 (0.014)	Loss 2.3752 (2.3403)	Entropy 0.89212 (0.89602)	Top-1 acc 66.406 (67.932)	Top-5 acc 87.109 (86.395)	lr 0.00542
Train [83][3210/3239]	Time 0.222 (0.622)	Data Time 0.000 (0.014)	Loss 2.4196 (2.3403)	Entropy 0.89205 (0.89601)	Top-1 acc 69.141 (67.933)	Top-5 acc 83.984 (86.396)	lr 0.00542
Train [83][3220/3239]	Time 0.231 (0.621)	Data Time 0.000 (0.014)	Loss 2.6339 (2.3404)	Entropy 0.89205 (0.89600)	Top-1 acc 59.766 (67.929)	Top-5 acc 80.859 (86.395)	lr 0.00542
Train [83][3230/3239]	Time 0.232 (0.621)	Data Time 0.000 (0.014)	Loss 2.3189 (2.3405)	Entropy 0.89205 (0.89598)	Top-1 acc 69.922 (67.926)	Top-5 acc 86.719 (86.395)	lr 0.00542
Train [83][3239/3239]	Time 2.331 (0.620)	Data Time 0.000 (0.014)	Loss 2.6536 (2.3405)	Entropy 0.89205 (0.89597)	Top-1 acc 64.198 (67.924)	Top-5 acc 86.420 (86.396)	lr 0.00542
==========Valid [83/120]	loss 1.289	top-1 acc 70.789 (70.789)	top-5 acc 88.558	Train top-1 67.924	top-5 86.396	Entropy 0.89205	Latency-None: 0.000ms	Flops: 546.53M
Train [84][0/3239]	Time 38.467 (38.467)	Data Time 37.131 (37.131)	Loss 2.1321 (2.1321)	Entropy 0.89195 (0.89195)	Top-1 acc 72.266 (72.266)	Top-5 acc 90.625 (90.625)	lr 0.00542
Train [84][10/3239]	Time 2.717 (4.152)	Data Time 0.002 (3.480)	Loss 2.3692 (2.3026)	Entropy 0.89195 (0.89195)	Top-1 acc 68.750 (69.283)	Top-5 acc 85.156 (87.571)	lr 0.00542
Train [84][20/3239]	Time 0.228 (2.288)	Data Time 0.001 (1.823)	Loss 2.4118 (2.3183)	Entropy 0.89190 (0.89192)	Top-1 acc 64.453 (68.248)	Top-5 acc 83.984 (87.202)	lr 0.00542
Train [84][30/3239]	Time 0.225 (1.699)	Data Time 0.001 (1.237)	Loss 2.3674 (2.3331)	Entropy 0.89191 (0.89192)	Top-1 acc 68.359 (67.906)	Top-5 acc 85.156 (86.530)	lr 0.00542
Train [84][40/3239]	Time 0.253 (1.397)	Data Time 0.001 (0.935)	Loss 2.3377 (2.3398)	Entropy 0.89185 (0.89190)	Top-1 acc 71.094 (68.016)	Top-5 acc 86.328 (86.385)	lr 0.00542
Train [84][50/3239]	Time 0.345 (1.219)	Data Time 0.001 (0.752)	Loss 2.4582 (2.3322)	Entropy 0.89175 (0.89188)	Top-1 acc 66.797 (68.321)	Top-5 acc 85.156 (86.566)	lr 0.00542
Train [84][60/3239]	Time 0.236 (1.096)	Data Time 0.001 (0.629)	Loss 2.1817 (2.3255)	Entropy 0.89167 (0.89185)	Top-1 acc 71.094 (68.334)	Top-5 acc 88.281 (86.680)	lr 0.00541
Train [84][70/3239]	Time 0.236 (1.010)	Data Time 0.001 (0.541)	Loss 2.4178 (2.3194)	Entropy 0.89169 (0.89183)	Top-1 acc 64.844 (68.403)	Top-5 acc 83.984 (86.785)	lr 0.00541
Train [84][80/3239]	Time 0.238 (0.946)	Data Time 0.001 (0.474)	Loss 2.2236 (2.3194)	Entropy 0.89162 (0.89181)	Top-1 acc 69.141 (68.374)	Top-5 acc 89.062 (86.839)	lr 0.00541
Train [84][90/3239]	Time 0.261 (0.896)	Data Time 0.001 (0.422)	Loss 2.3185 (2.3170)	Entropy 0.89157 (0.89178)	Top-1 acc 67.969 (68.458)	Top-5 acc 88.281 (86.899)	lr 0.00541
Train [84][100/3239]	Time 0.223 (0.857)	Data Time 0.001 (0.381)	Loss 2.3822 (2.3170)	Entropy 0.89141 (0.89176)	Top-1 acc 66.016 (68.429)	Top-5 acc 86.328 (86.908)	lr 0.00541
Train [84][110/3239]	Time 0.237 (0.824)	Data Time 0.002 (0.346)	Loss 2.3944 (2.3177)	Entropy 0.89142 (0.89173)	Top-1 acc 66.406 (68.366)	Top-5 acc 85.156 (86.881)	lr 0.00541
Train [84][120/3239]	Time 2.523 (0.795)	Data Time 0.001 (0.318)	Loss 2.2286 (2.3132)	Entropy 0.89142 (0.89170)	Top-1 acc 69.922 (68.511)	Top-5 acc 91.016 (86.974)	lr 0.00541
Train [84][130/3239]	Time 0.229 (0.752)	Data Time 0.001 (0.294)	Loss 2.3813 (2.3115)	Entropy 0.89140 (0.89168)	Top-1 acc 67.969 (68.601)	Top-5 acc 85.156 (86.999)	lr 0.00541
Train [84][140/3239]	Time 0.224 (0.732)	Data Time 0.001 (0.273)	Loss 2.4612 (2.3153)	Entropy 0.89136 (0.89166)	Top-1 acc 64.453 (68.498)	Top-5 acc 82.812 (86.918)	lr 0.00541
Train [84][150/3239]	Time 0.264 (0.717)	Data Time 0.002 (0.255)	Loss 2.2814 (2.3166)	Entropy 0.89130 (0.89163)	Top-1 acc 70.703 (68.414)	Top-5 acc 88.672 (86.900)	lr 0.00541
Train [84][160/3239]	Time 0.260 (0.706)	Data Time 0.001 (0.239)	Loss 2.1418 (2.3183)	Entropy 0.89124 (0.89161)	Top-1 acc 71.484 (68.379)	Top-5 acc 92.188 (86.867)	lr 0.00541
Train [84][170/3239]	Time 0.239 (0.696)	Data Time 0.001 (0.225)	Loss 2.2863 (2.3218)	Entropy 0.89127 (0.89159)	Top-1 acc 70.703 (68.279)	Top-5 acc 87.109 (86.826)	lr 0.00541
Train [84][180/3239]	Time 0.330 (0.686)	Data Time 0.002 (0.213)	Loss 2.4445 (2.3220)	Entropy 0.89126 (0.89157)	Top-1 acc 65.625 (68.249)	Top-5 acc 83.203 (86.855)	lr 0.00540
Train [84][190/3239]	Time 0.240 (0.676)	Data Time 0.001 (0.202)	Loss 2.2990 (2.3201)	Entropy 0.89123 (0.89156)	Top-1 acc 69.922 (68.329)	Top-5 acc 89.453 (86.872)	lr 0.00540
Train [84][200/3239]	Time 0.237 (0.667)	Data Time 0.001 (0.192)	Loss 2.3110 (2.3212)	Entropy 0.89120 (0.89154)	Top-1 acc 68.750 (68.289)	Top-5 acc 88.672 (86.865)	lr 0.00540
Train [84][210/3239]	Time 0.273 (0.926)	Data Time 0.002 (0.183)	Loss 2.2278 (2.3208)	Entropy 0.89118 (0.89152)	Top-1 acc 69.531 (68.221)	Top-5 acc 86.328 (86.887)	lr 0.00540
Train [84][220/3239]	Time 0.369 (0.907)	Data Time 0.002 (0.175)	Loss 2.2604 (2.3221)	Entropy 0.89116 (0.89151)	Top-1 acc 68.750 (68.172)	Top-5 acc 87.891 (86.858)	lr 0.00540
Train [84][230/3239]	Time 2.550 (0.889)	Data Time 0.002 (0.167)	Loss 2.4946 (2.3236)	Entropy 0.89116 (0.89149)	Top-1 acc 64.844 (68.119)	Top-5 acc 85.938 (86.849)	lr 0.00540
Train [84][240/3239]	Time 0.255 (0.862)	Data Time 0.002 (0.161)	Loss 2.2415 (2.3225)	Entropy 0.89113 (0.89148)	Top-1 acc 72.656 (68.139)	Top-5 acc 87.891 (86.850)	lr 0.00540
Train [84][250/3239]	Time 0.235 (0.847)	Data Time 0.001 (0.154)	Loss 2.4621 (2.3242)	Entropy 0.89112 (0.89146)	Top-1 acc 60.547 (68.132)	Top-5 acc 85.547 (86.807)	lr 0.00540
Train [84][260/3239]	Time 0.220 (0.833)	Data Time 0.001 (0.148)	Loss 2.4658 (2.3246)	Entropy 0.89112 (0.89145)	Top-1 acc 61.719 (68.100)	Top-5 acc 86.328 (86.809)	lr 0.00540
Train [84][270/3239]	Time 0.232 (0.820)	Data Time 0.001 (0.143)	Loss 2.2603 (2.3249)	Entropy 0.89100 (0.89143)	Top-1 acc 69.141 (68.106)	Top-5 acc 89.844 (86.784)	lr 0.00540
Train [84][280/3239]	Time 0.225 (0.808)	Data Time 0.001 (0.138)	Loss 2.2946 (2.3236)	Entropy 0.89098 (0.89142)	Top-1 acc 71.094 (68.174)	Top-5 acc 85.938 (86.780)	lr 0.00540
Train [84][290/3239]	Time 0.228 (0.797)	Data Time 0.001 (0.133)	Loss 2.3200 (2.3245)	Entropy 0.89097 (0.89140)	Top-1 acc 67.969 (68.163)	Top-5 acc 85.938 (86.752)	lr 0.00540
Train [84][300/3239]	Time 0.239 (0.787)	Data Time 0.001 (0.129)	Loss 2.3567 (2.3291)	Entropy 0.89094 (0.89139)	Top-1 acc 64.062 (68.041)	Top-5 acc 87.500 (86.679)	lr 0.00539
Train [84][310/3239]	Time 0.234 (0.778)	Data Time 0.001 (0.125)	Loss 2.3622 (2.3302)	Entropy 0.89092 (0.89137)	Top-1 acc 69.141 (68.038)	Top-5 acc 82.812 (86.657)	lr 0.00539
Train [84][320/3239]	Time 0.239 (0.769)	Data Time 0.001 (0.121)	Loss 2.3270 (2.3291)	Entropy 0.89096 (0.89136)	Top-1 acc 69.141 (68.054)	Top-5 acc 86.328 (86.673)	lr 0.00539
Train [84][330/3239]	Time 0.245 (0.760)	Data Time 0.001 (0.117)	Loss 2.3338 (2.3272)	Entropy 0.89092 (0.89135)	Top-1 acc 67.969 (68.121)	Top-5 acc 86.328 (86.708)	lr 0.00539
Train [84][340/3239]	Time 2.474 (0.752)	Data Time 0.002 (0.114)	Loss 2.3630 (2.3271)	Entropy 0.89092 (0.89134)	Top-1 acc 67.969 (68.138)	Top-5 acc 84.375 (86.699)	lr 0.00539
Train [84][350/3239]	Time 0.385 (0.737)	Data Time 0.001 (0.111)	Loss 2.2912 (2.3255)	Entropy 0.89090 (0.89132)	Top-1 acc 70.703 (68.209)	Top-5 acc 86.328 (86.733)	lr 0.00539
Train [84][360/3239]	Time 0.276 (0.731)	Data Time 0.002 (0.108)	Loss 2.3941 (2.3262)	Entropy 0.89091 (0.89131)	Top-1 acc 64.453 (68.181)	Top-5 acc 85.938 (86.711)	lr 0.00539
Train [84][370/3239]	Time 0.228 (0.724)	Data Time 0.001 (0.105)	Loss 2.1299 (2.3267)	Entropy 0.89086 (0.89130)	Top-1 acc 75.000 (68.180)	Top-5 acc 89.062 (86.700)	lr 0.00539
Train [84][380/3239]	Time 0.223 (0.718)	Data Time 0.001 (0.102)	Loss 2.3883 (2.3254)	Entropy 0.89084 (0.89129)	Top-1 acc 65.625 (68.194)	Top-5 acc 86.328 (86.728)	lr 0.00539
Train [84][390/3239]	Time 0.218 (0.712)	Data Time 0.001 (0.100)	Loss 2.2429 (2.3246)	Entropy 0.89086 (0.89128)	Top-1 acc 69.531 (68.203)	Top-5 acc 89.062 (86.743)	lr 0.00539
Train [84][400/3239]	Time 0.249 (0.707)	Data Time 0.001 (0.097)	Loss 2.2788 (2.3245)	Entropy 0.89082 (0.89127)	Top-1 acc 71.094 (68.230)	Top-5 acc 86.719 (86.736)	lr 0.00539
Train [84][410/3239]	Time 0.229 (0.701)	Data Time 0.001 (0.095)	Loss 2.1947 (2.3242)	Entropy 0.89059 (0.89125)	Top-1 acc 69.141 (68.249)	Top-5 acc 89.844 (86.739)	lr 0.00539
Train [84][420/3239]	Time 0.228 (0.696)	Data Time 0.001 (0.093)	Loss 2.2375 (2.3251)	Entropy 0.89059 (0.89124)	Top-1 acc 69.922 (68.240)	Top-5 acc 89.453 (86.733)	lr 0.00539
Train [84][430/3239]	Time 0.236 (0.691)	Data Time 0.001 (0.091)	Loss 2.4076 (2.3252)	Entropy 0.89056 (0.89122)	Top-1 acc 69.141 (68.248)	Top-5 acc 82.812 (86.719)	lr 0.00538
Train [84][440/3239]	Time 0.230 (0.686)	Data Time 0.001 (0.089)	Loss 2.4242 (2.3245)	Entropy 0.89053 (0.89121)	Top-1 acc 66.406 (68.271)	Top-5 acc 84.766 (86.728)	lr 0.00538
Train [84][450/3239]	Time 2.624 (0.682)	Data Time 0.001 (0.087)	Loss 2.3789 (2.3275)	Entropy 0.89053 (0.89119)	Top-1 acc 65.625 (68.198)	Top-5 acc 85.938 (86.688)	lr 0.00538
Train [84][460/3239]	Time 0.240 (0.672)	Data Time 0.001 (0.085)	Loss 2.6071 (2.3270)	Entropy 0.89046 (0.89118)	Top-1 acc 59.375 (68.202)	Top-5 acc 82.422 (86.700)	lr 0.00538
Train [84][470/3239]	Time 0.239 (0.668)	Data Time 0.001 (0.083)	Loss 2.2915 (2.3299)	Entropy 0.89044 (0.89116)	Top-1 acc 67.969 (68.136)	Top-5 acc 88.672 (86.656)	lr 0.00538
Train [84][480/3239]	Time 0.367 (0.664)	Data Time 0.001 (0.081)	Loss 2.3645 (2.3298)	Entropy 0.89036 (0.89114)	Top-1 acc 68.359 (68.146)	Top-5 acc 83.984 (86.654)	lr 0.00538
Train [84][490/3239]	Time 0.240 (0.661)	Data Time 0.001 (0.080)	Loss 2.4313 (2.3302)	Entropy 0.89030 (0.89113)	Top-1 acc 64.453 (68.129)	Top-5 acc 86.719 (86.656)	lr 0.00538
Train [84][500/3239]	Time 0.243 (0.657)	Data Time 0.001 (0.078)	Loss 2.3380 (2.3296)	Entropy 0.89030 (0.89111)	Top-1 acc 68.359 (68.122)	Top-5 acc 86.328 (86.672)	lr 0.00538
Train [84][510/3239]	Time 0.242 (0.654)	Data Time 0.001 (0.077)	Loss 2.2784 (2.3295)	Entropy 0.89030 (0.89109)	Top-1 acc 67.578 (68.124)	Top-5 acc 87.891 (86.677)	lr 0.00538
Train [84][520/3239]	Time 0.255 (0.651)	Data Time 0.001 (0.075)	Loss 2.2892 (2.3296)	Entropy 0.89025 (0.89108)	Top-1 acc 70.703 (68.122)	Top-5 acc 87.500 (86.676)	lr 0.00538
Train [84][530/3239]	Time 0.255 (0.648)	Data Time 0.002 (0.074)	Loss 2.3507 (2.3295)	Entropy 0.89025 (0.89106)	Top-1 acc 68.359 (68.128)	Top-5 acc 86.328 (86.673)	lr 0.00538
Train [84][540/3239]	Time 0.224 (0.645)	Data Time 0.001 (0.072)	Loss 2.4901 (2.3305)	Entropy 0.89023 (0.89105)	Top-1 acc 62.109 (68.107)	Top-5 acc 83.203 (86.651)	lr 0.00538
Train [84][550/3239]	Time 0.216 (0.642)	Data Time 0.001 (0.071)	Loss 2.3142 (2.3303)	Entropy 0.89018 (0.89103)	Top-1 acc 64.844 (68.106)	Top-5 acc 88.281 (86.662)	lr 0.00537
Train [84][560/3239]	Time 2.559 (0.639)	Data Time 0.001 (0.070)	Loss 2.2437 (2.3302)	Entropy 0.89018 (0.89102)	Top-1 acc 70.312 (68.099)	Top-5 acc 85.156 (86.660)	lr 0.00537
Train [84][570/3239]	Time 0.322 (0.632)	Data Time 0.001 (0.069)	Loss 2.5428 (2.3298)	Entropy 0.89006 (0.89100)	Top-1 acc 61.719 (68.106)	Top-5 acc 81.641 (86.660)	lr 0.00537
Train [84][580/3239]	Time 0.245 (0.723)	Data Time 0.002 (0.068)	Loss 2.3322 (2.3300)	Entropy 0.88992 (0.89098)	Top-1 acc 66.797 (68.094)	Top-5 acc 85.156 (86.654)	lr 0.00537
Train [84][590/3239]	Time 0.251 (0.719)	Data Time 0.002 (0.066)	Loss 2.0933 (2.3292)	Entropy 0.88988 (0.89096)	Top-1 acc 74.219 (68.111)	Top-5 acc 90.234 (86.669)	lr 0.00537
Train [84][600/3239]	Time 0.231 (0.715)	Data Time 0.001 (0.065)	Loss 2.1684 (2.3291)	Entropy 0.88987 (0.89095)	Top-1 acc 73.047 (68.127)	Top-5 acc 89.453 (86.670)	lr 0.00537
Train [84][610/3239]	Time 0.316 (0.711)	Data Time 0.001 (0.064)	Loss 2.4131 (2.3284)	Entropy 0.88986 (0.89093)	Top-1 acc 67.969 (68.143)	Top-5 acc 83.203 (86.681)	lr 0.00537
Train [84][620/3239]	Time 0.229 (0.707)	Data Time 0.001 (0.063)	Loss 2.2698 (2.3286)	Entropy 0.88998 (0.89091)	Top-1 acc 69.531 (68.136)	Top-5 acc 88.281 (86.672)	lr 0.00537
Train [84][630/3239]	Time 0.223 (0.703)	Data Time 0.001 (0.062)	Loss 2.3377 (2.3282)	Entropy 0.88996 (0.89090)	Top-1 acc 68.750 (68.158)	Top-5 acc 87.109 (86.681)	lr 0.00537
Train [84][640/3239]	Time 0.231 (0.700)	Data Time 0.001 (0.061)	Loss 2.2372 (2.3283)	Entropy 0.88992 (0.89088)	Top-1 acc 69.531 (68.150)	Top-5 acc 87.500 (86.674)	lr 0.00537
Train [84][650/3239]	Time 0.238 (0.696)	Data Time 0.002 (0.061)	Loss 2.3165 (2.3277)	Entropy 0.88988 (0.89087)	Top-1 acc 68.750 (68.167)	Top-5 acc 85.547 (86.684)	lr 0.00537
Train [84][660/3239]	Time 0.221 (0.693)	Data Time 0.001 (0.060)	Loss 2.4126 (2.3272)	Entropy 0.88980 (0.89085)	Top-1 acc 66.016 (68.184)	Top-5 acc 84.766 (86.693)	lr 0.00537
Train [84][670/3239]	Time 2.499 (0.690)	Data Time 0.002 (0.059)	Loss 2.1775 (2.3269)	Entropy 0.88980 (0.89084)	Top-1 acc 73.828 (68.178)	Top-5 acc 86.328 (86.702)	lr 0.00536
Train [84][680/3239]	Time 0.265 (0.683)	Data Time 0.001 (0.058)	Loss 2.1594 (2.3268)	Entropy 0.88977 (0.89082)	Top-1 acc 72.656 (68.198)	Top-5 acc 90.234 (86.710)	lr 0.00536
Train [84][690/3239]	Time 0.234 (0.680)	Data Time 0.001 (0.057)	Loss 2.4553 (2.3263)	Entropy 0.88979 (0.89081)	Top-1 acc 66.406 (68.215)	Top-5 acc 84.766 (86.720)	lr 0.00536
Train [84][700/3239]	Time 0.344 (0.677)	Data Time 0.001 (0.056)	Loss 2.1500 (2.3259)	Entropy 0.88966 (0.89079)	Top-1 acc 73.438 (68.225)	Top-5 acc 89.062 (86.727)	lr 0.00536
Train [84][710/3239]	Time 0.234 (0.674)	Data Time 0.001 (0.056)	Loss 2.3497 (2.3262)	Entropy 0.88969 (0.89077)	Top-1 acc 66.016 (68.216)	Top-5 acc 84.375 (86.714)	lr 0.00536
Train [84][720/3239]	Time 0.225 (0.671)	Data Time 0.001 (0.055)	Loss 2.4962 (2.3261)	Entropy 0.88960 (0.89076)	Top-1 acc 65.625 (68.219)	Top-5 acc 84.375 (86.717)	lr 0.00536
Train [84][730/3239]	Time 0.246 (0.669)	Data Time 0.001 (0.054)	Loss 2.2439 (2.3258)	Entropy 0.88964 (0.89074)	Top-1 acc 69.531 (68.210)	Top-5 acc 87.109 (86.723)	lr 0.00536
Train [84][740/3239]	Time 0.312 (0.666)	Data Time 0.001 (0.053)	Loss 2.3010 (2.3253)	Entropy 0.88957 (0.89073)	Top-1 acc 69.531 (68.218)	Top-5 acc 86.719 (86.729)	lr 0.00536
Train [84][750/3239]	Time 0.212 (0.664)	Data Time 0.001 (0.053)	Loss 2.3128 (2.3253)	Entropy 0.88987 (0.89071)	Top-1 acc 66.016 (68.213)	Top-5 acc 89.844 (86.737)	lr 0.00536
Train [84][760/3239]	Time 0.231 (0.661)	Data Time 0.001 (0.052)	Loss 2.2123 (2.3271)	Entropy 0.88989 (0.89070)	Top-1 acc 70.312 (68.165)	Top-5 acc 89.062 (86.702)	lr 0.00536
Train [84][770/3239]	Time 0.281 (0.659)	Data Time 0.001 (0.051)	Loss 2.3006 (2.3274)	Entropy 0.88989 (0.89069)	Top-1 acc 67.969 (68.149)	Top-5 acc 84.766 (86.695)	lr 0.00536
Train [84][780/3239]	Time 2.629 (0.657)	Data Time 0.001 (0.051)	Loss 2.2550 (2.3264)	Entropy 0.88989 (0.89068)	Top-1 acc 67.969 (68.165)	Top-5 acc 89.062 (86.714)	lr 0.00536
Train [84][790/3239]	Time 0.228 (0.652)	Data Time 0.001 (0.050)	Loss 2.4987 (2.3261)	Entropy 0.88986 (0.89067)	Top-1 acc 63.281 (68.169)	Top-5 acc 82.812 (86.718)	lr 0.00535
Train [84][800/3239]	Time 0.243 (0.650)	Data Time 0.001 (0.049)	Loss 2.4141 (2.3266)	Entropy 0.88985 (0.89066)	Top-1 acc 62.891 (68.152)	Top-5 acc 85.547 (86.712)	lr 0.00535
Train [84][810/3239]	Time 0.229 (0.647)	Data Time 0.001 (0.049)	Loss 2.4358 (2.3261)	Entropy 0.88974 (0.89065)	Top-1 acc 63.672 (68.162)	Top-5 acc 83.594 (86.723)	lr 0.00535
Train [84][820/3239]	Time 0.235 (0.645)	Data Time 0.001 (0.048)	Loss 2.3587 (2.3264)	Entropy 0.88973 (0.89064)	Top-1 acc 66.797 (68.156)	Top-5 acc 87.891 (86.721)	lr 0.00535
Train [84][830/3239]	Time 0.219 (0.644)	Data Time 0.001 (0.048)	Loss 2.1954 (2.3260)	Entropy 0.88961 (0.89063)	Top-1 acc 73.438 (68.174)	Top-5 acc 89.844 (86.725)	lr 0.00535
Train [84][840/3239]	Time 0.217 (0.642)	Data Time 0.001 (0.047)	Loss 2.3919 (2.3258)	Entropy 0.88958 (0.89062)	Top-1 acc 65.625 (68.179)	Top-5 acc 86.328 (86.729)	lr 0.00535
Train [84][850/3239]	Time 0.226 (0.639)	Data Time 0.001 (0.047)	Loss 2.2722 (2.3260)	Entropy 0.88950 (0.89060)	Top-1 acc 71.875 (68.183)	Top-5 acc 87.109 (86.725)	lr 0.00535
Train [84][860/3239]	Time 0.232 (0.637)	Data Time 0.001 (0.046)	Loss 2.3366 (2.3255)	Entropy 0.88944 (0.89059)	Top-1 acc 66.406 (68.206)	Top-5 acc 87.500 (86.733)	lr 0.00535
Train [84][870/3239]	Time 0.328 (0.636)	Data Time 0.001 (0.046)	Loss 2.3917 (2.3254)	Entropy 0.88943 (0.89058)	Top-1 acc 65.234 (68.204)	Top-5 acc 84.766 (86.733)	lr 0.00535
Train [84][880/3239]	Time 0.218 (0.634)	Data Time 0.001 (0.045)	Loss 2.2531 (2.3251)	Entropy 0.88941 (0.89056)	Top-1 acc 70.703 (68.214)	Top-5 acc 86.719 (86.744)	lr 0.00535
Train [84][890/3239]	Time 2.470 (0.632)	Data Time 0.002 (0.045)	Loss 2.2842 (2.3245)	Entropy 0.88941 (0.89055)	Top-1 acc 67.578 (68.223)	Top-5 acc 86.719 (86.753)	lr 0.00535
Train [84][900/3239]	Time 0.247 (0.628)	Data Time 0.001 (0.044)	Loss 2.4191 (2.3240)	Entropy 0.88945 (0.89054)	Top-1 acc 69.922 (68.242)	Top-5 acc 84.375 (86.756)	lr 0.00535
Train [84][910/3239]	Time 0.244 (0.626)	Data Time 0.001 (0.044)	Loss 2.5587 (2.3249)	Entropy 0.88937 (0.89053)	Top-1 acc 66.797 (68.220)	Top-5 acc 80.859 (86.734)	lr 0.00534
Train [84][920/3239]	Time 0.220 (0.625)	Data Time 0.001 (0.043)	Loss 2.4221 (2.3255)	Entropy 0.88942 (0.89051)	Top-1 acc 63.281 (68.205)	Top-5 acc 86.328 (86.721)	lr 0.00534
Train [84][930/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.043)	Loss 2.3116 (2.3257)	Entropy 0.88946 (0.89050)	Top-1 acc 70.312 (68.193)	Top-5 acc 84.375 (86.713)	lr 0.00534
Train [84][940/3239]	Time 0.274 (0.673)	Data Time 0.005 (0.042)	Loss 2.4216 (2.3262)	Entropy 0.88949 (0.89049)	Top-1 acc 61.719 (68.171)	Top-5 acc 86.719 (86.709)	lr 0.00534
Train [84][950/3239]	Time 0.235 (0.673)	Data Time 0.002 (0.042)	Loss 2.4680 (2.3260)	Entropy 0.88945 (0.89048)	Top-1 acc 59.766 (68.167)	Top-5 acc 84.766 (86.718)	lr 0.00534
Train [84][960/3239]	Time 0.220 (0.671)	Data Time 0.002 (0.042)	Loss 2.2577 (2.3253)	Entropy 0.88942 (0.89047)	Top-1 acc 70.312 (68.189)	Top-5 acc 87.891 (86.733)	lr 0.00534
Train [84][970/3239]	Time 0.238 (0.669)	Data Time 0.001 (0.041)	Loss 2.4411 (2.3255)	Entropy 0.88938 (0.89046)	Top-1 acc 67.578 (68.183)	Top-5 acc 85.938 (86.734)	lr 0.00534
Train [84][980/3239]	Time 0.214 (0.667)	Data Time 0.001 (0.041)	Loss 2.4447 (2.3258)	Entropy 0.88943 (0.89045)	Top-1 acc 62.500 (68.182)	Top-5 acc 85.938 (86.730)	lr 0.00534
Train [84][990/3239]	Time 0.220 (0.665)	Data Time 0.001 (0.040)	Loss 2.3592 (2.3263)	Entropy 0.88936 (0.89044)	Top-1 acc 62.891 (68.156)	Top-5 acc 87.500 (86.725)	lr 0.00534
Train [84][1000/3239]	Time 2.709 (0.663)	Data Time 0.001 (0.040)	Loss 2.2849 (2.3260)	Entropy 0.88936 (0.89043)	Top-1 acc 69.531 (68.171)	Top-5 acc 89.453 (86.734)	lr 0.00534
Train [84][1010/3239]	Time 0.268 (0.659)	Data Time 0.001 (0.040)	Loss 2.3902 (2.3261)	Entropy 0.88934 (0.89042)	Top-1 acc 67.188 (68.175)	Top-5 acc 85.547 (86.733)	lr 0.00534
Train [84][1020/3239]	Time 0.232 (0.657)	Data Time 0.001 (0.039)	Loss 2.3411 (2.3262)	Entropy 0.88933 (0.89041)	Top-1 acc 67.578 (68.180)	Top-5 acc 86.719 (86.735)	lr 0.00534
Train [84][1030/3239]	Time 0.230 (0.655)	Data Time 0.001 (0.039)	Loss 2.3956 (2.3261)	Entropy 0.88930 (0.89039)	Top-1 acc 63.281 (68.192)	Top-5 acc 86.328 (86.735)	lr 0.00533
Train [84][1040/3239]	Time 0.330 (0.654)	Data Time 0.001 (0.038)	Loss 2.4668 (2.3260)	Entropy 0.88930 (0.89038)	Top-1 acc 63.672 (68.199)	Top-5 acc 82.422 (86.725)	lr 0.00533
Train [84][1050/3239]	Time 0.241 (0.652)	Data Time 0.002 (0.038)	Loss 2.2150 (2.3259)	Entropy 0.88922 (0.89037)	Top-1 acc 71.484 (68.198)	Top-5 acc 87.500 (86.720)	lr 0.00533
Train [84][1060/3239]	Time 0.246 (0.650)	Data Time 0.001 (0.038)	Loss 2.3102 (2.3260)	Entropy 0.88920 (0.89036)	Top-1 acc 65.625 (68.200)	Top-5 acc 85.938 (86.721)	lr 0.00533
Train [84][1070/3239]	Time 0.232 (0.649)	Data Time 0.001 (0.037)	Loss 2.4358 (2.3264)	Entropy 0.89009 (0.89035)	Top-1 acc 66.797 (68.191)	Top-5 acc 85.156 (86.723)	lr 0.00533
Train [84][1080/3239]	Time 0.238 (0.647)	Data Time 0.002 (0.037)	Loss 2.4076 (2.3264)	Entropy 0.89007 (0.89035)	Top-1 acc 63.672 (68.191)	Top-5 acc 84.375 (86.717)	lr 0.00533
Train [84][1090/3239]	Time 0.234 (0.646)	Data Time 0.001 (0.037)	Loss 2.3257 (2.3266)	Entropy 0.89004 (0.89035)	Top-1 acc 68.750 (68.184)	Top-5 acc 87.891 (86.720)	lr 0.00533
Train [84][1100/3239]	Time 0.273 (0.644)	Data Time 0.001 (0.036)	Loss 2.4654 (2.3268)	Entropy 0.89006 (0.89035)	Top-1 acc 63.281 (68.184)	Top-5 acc 84.375 (86.716)	lr 0.00533
Train [84][1110/3239]	Time 2.547 (0.643)	Data Time 0.002 (0.036)	Loss 2.1703 (2.3269)	Entropy 0.89006 (0.89034)	Top-1 acc 74.219 (68.178)	Top-5 acc 88.281 (86.712)	lr 0.00533
Train [84][1120/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.036)	Loss 2.5455 (2.3271)	Entropy 0.89003 (0.89034)	Top-1 acc 57.422 (68.172)	Top-5 acc 82.422 (86.706)	lr 0.00533
Train [84][1130/3239]	Time 0.331 (0.638)	Data Time 0.001 (0.036)	Loss 2.3903 (2.3274)	Entropy 0.89002 (0.89034)	Top-1 acc 68.750 (68.164)	Top-5 acc 83.594 (86.697)	lr 0.00533
Train [84][1140/3239]	Time 0.237 (0.636)	Data Time 0.001 (0.035)	Loss 2.1673 (2.3278)	Entropy 0.89001 (0.89034)	Top-1 acc 71.484 (68.165)	Top-5 acc 91.406 (86.692)	lr 0.00533
Train [84][1150/3239]	Time 0.240 (0.635)	Data Time 0.001 (0.035)	Loss 2.3167 (2.3273)	Entropy 0.89001 (0.89033)	Top-1 acc 66.406 (68.177)	Top-5 acc 86.328 (86.699)	lr 0.00532
Train [84][1160/3239]	Time 0.242 (0.634)	Data Time 0.001 (0.035)	Loss 2.3647 (2.3272)	Entropy 0.88996 (0.89033)	Top-1 acc 66.406 (68.175)	Top-5 acc 86.328 (86.698)	lr 0.00532
Train [84][1170/3239]	Time 0.232 (0.632)	Data Time 0.001 (0.034)	Loss 2.3069 (2.3269)	Entropy 0.88994 (0.89033)	Top-1 acc 69.141 (68.181)	Top-5 acc 85.156 (86.697)	lr 0.00532
Train [84][1180/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.034)	Loss 2.3887 (2.3270)	Entropy 0.88994 (0.89032)	Top-1 acc 64.453 (68.175)	Top-5 acc 85.938 (86.695)	lr 0.00532
Train [84][1190/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.034)	Loss 2.3206 (2.3269)	Entropy 0.88997 (0.89032)	Top-1 acc 64.453 (68.168)	Top-5 acc 87.500 (86.692)	lr 0.00532
Train [84][1200/3239]	Time 0.207 (0.628)	Data Time 0.001 (0.034)	Loss 2.1393 (2.3265)	Entropy 0.88998 (0.89032)	Top-1 acc 73.828 (68.171)	Top-5 acc 90.234 (86.694)	lr 0.00532
Train [84][1210/3239]	Time 0.220 (0.627)	Data Time 0.001 (0.033)	Loss 2.3700 (2.3265)	Entropy 0.88993 (0.89031)	Top-1 acc 67.969 (68.172)	Top-5 acc 86.328 (86.689)	lr 0.00532
Train [84][1220/3239]	Time 2.581 (0.626)	Data Time 0.001 (0.033)	Loss 2.1821 (2.3265)	Entropy 0.88993 (0.89031)	Top-1 acc 70.312 (68.168)	Top-5 acc 89.062 (86.684)	lr 0.00532
Train [84][1230/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.033)	Loss 2.6514 (2.3266)	Entropy 0.88991 (0.89031)	Top-1 acc 63.281 (68.172)	Top-5 acc 82.812 (86.683)	lr 0.00532
Train [84][1240/3239]	Time 0.228 (0.621)	Data Time 0.001 (0.033)	Loss 2.4345 (2.3267)	Entropy 0.88983 (0.89030)	Top-1 acc 63.672 (68.172)	Top-5 acc 84.766 (86.683)	lr 0.00532
Train [84][1250/3239]	Time 0.237 (0.620)	Data Time 0.001 (0.032)	Loss 2.4299 (2.3269)	Entropy 0.88979 (0.89030)	Top-1 acc 67.188 (68.169)	Top-5 acc 83.594 (86.678)	lr 0.00532
Train [84][1260/3239]	Time 0.220 (0.619)	Data Time 0.001 (0.032)	Loss 2.3323 (2.3268)	Entropy 0.88978 (0.89030)	Top-1 acc 66.016 (68.167)	Top-5 acc 87.891 (86.678)	lr 0.00532
Train [84][1270/3239]	Time 0.229 (0.618)	Data Time 0.001 (0.032)	Loss 2.3287 (2.3268)	Entropy 0.88970 (0.89029)	Top-1 acc 68.750 (68.175)	Top-5 acc 87.109 (86.682)	lr 0.00531
Train [84][1280/3239]	Time 0.237 (0.617)	Data Time 0.001 (0.032)	Loss 2.2708 (2.3268)	Entropy 0.88964 (0.89029)	Top-1 acc 68.750 (68.179)	Top-5 acc 88.281 (86.674)	lr 0.00531
Train [84][1290/3239]	Time 0.231 (0.616)	Data Time 0.001 (0.031)	Loss 2.3246 (2.3267)	Entropy 0.88961 (0.89028)	Top-1 acc 67.578 (68.181)	Top-5 acc 87.500 (86.677)	lr 0.00531
Train [84][1300/3239]	Time 0.249 (0.658)	Data Time 0.002 (0.031)	Loss 2.4039 (2.3265)	Entropy 0.88957 (0.89028)	Top-1 acc 63.672 (68.177)	Top-5 acc 85.938 (86.682)	lr 0.00531
Train [84][1310/3239]	Time 0.248 (0.656)	Data Time 0.002 (0.031)	Loss 2.3525 (2.3265)	Entropy 0.88955 (0.89027)	Top-1 acc 66.797 (68.173)	Top-5 acc 87.500 (86.685)	lr 0.00531
Train [84][1320/3239]	Time 0.282 (0.655)	Data Time 0.002 (0.031)	Loss 2.4638 (2.3266)	Entropy 0.88947 (0.89027)	Top-1 acc 67.969 (68.169)	Top-5 acc 84.375 (86.679)	lr 0.00531
Train [84][1330/3239]	Time 2.578 (0.654)	Data Time 0.002 (0.030)	Loss 2.4412 (2.3269)	Entropy 0.88947 (0.89026)	Top-1 acc 68.750 (68.159)	Top-5 acc 85.547 (86.672)	lr 0.00531
Train [84][1340/3239]	Time 0.233 (0.651)	Data Time 0.002 (0.030)	Loss 2.3927 (2.3270)	Entropy 0.88956 (0.89025)	Top-1 acc 66.406 (68.159)	Top-5 acc 86.719 (86.674)	lr 0.00531
Train [84][1350/3239]	Time 0.364 (0.649)	Data Time 0.001 (0.030)	Loss 2.3800 (2.3271)	Entropy 0.88953 (0.89025)	Top-1 acc 64.844 (68.158)	Top-5 acc 85.547 (86.671)	lr 0.00531
Train [84][1360/3239]	Time 0.250 (0.648)	Data Time 0.001 (0.030)	Loss 2.3340 (2.3270)	Entropy 0.88951 (0.89024)	Top-1 acc 65.625 (68.162)	Top-5 acc 86.719 (86.670)	lr 0.00531
Train [84][1370/3239]	Time 0.230 (0.647)	Data Time 0.001 (0.030)	Loss 2.2545 (2.3268)	Entropy 0.88948 (0.89024)	Top-1 acc 68.359 (68.167)	Top-5 acc 87.891 (86.673)	lr 0.00531
Train [84][1380/3239]	Time 0.261 (0.646)	Data Time 0.002 (0.029)	Loss 2.2529 (2.3267)	Entropy 0.88945 (0.89023)	Top-1 acc 70.703 (68.169)	Top-5 acc 88.281 (86.680)	lr 0.00531
Train [84][1390/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.029)	Loss 2.3424 (2.3266)	Entropy 0.88940 (0.89023)	Top-1 acc 67.188 (68.167)	Top-5 acc 85.156 (86.681)	lr 0.00530
Train [84][1400/3239]	Time 0.215 (0.643)	Data Time 0.002 (0.029)	Loss 2.3103 (2.3270)	Entropy 0.88928 (0.89022)	Top-1 acc 68.359 (68.161)	Top-5 acc 89.062 (86.678)	lr 0.00530
Train [84][1410/3239]	Time 0.209 (0.642)	Data Time 0.001 (0.029)	Loss 2.4328 (2.3273)	Entropy 0.88924 (0.89021)	Top-1 acc 61.719 (68.152)	Top-5 acc 85.156 (86.674)	lr 0.00530
Train [84][1420/3239]	Time 0.218 (0.641)	Data Time 0.001 (0.029)	Loss 2.3612 (2.3274)	Entropy 0.88925 (0.89021)	Top-1 acc 66.016 (68.153)	Top-5 acc 85.156 (86.670)	lr 0.00530
Train [84][1430/3239]	Time 0.236 (0.640)	Data Time 0.002 (0.028)	Loss 2.2719 (2.3276)	Entropy 0.88924 (0.89020)	Top-1 acc 69.141 (68.148)	Top-5 acc 85.938 (86.667)	lr 0.00530
Train [84][1440/3239]	Time 2.776 (0.639)	Data Time 0.001 (0.028)	Loss 2.2488 (2.3276)	Entropy 0.88924 (0.89019)	Top-1 acc 71.484 (68.157)	Top-5 acc 89.844 (86.664)	lr 0.00530
Train [84][1450/3239]	Time 0.246 (0.636)	Data Time 0.001 (0.028)	Loss 2.5077 (2.3276)	Entropy 0.88915 (0.89019)	Top-1 acc 64.062 (68.159)	Top-5 acc 82.422 (86.662)	lr 0.00530
Train [84][1460/3239]	Time 0.257 (0.635)	Data Time 0.004 (0.028)	Loss 2.2676 (2.3275)	Entropy 0.88918 (0.89018)	Top-1 acc 73.828 (68.169)	Top-5 acc 87.891 (86.659)	lr 0.00530
Train [84][1470/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.028)	Loss 2.4291 (2.3278)	Entropy 0.88916 (0.89017)	Top-1 acc 66.797 (68.164)	Top-5 acc 86.719 (86.658)	lr 0.00530
Train [84][1480/3239]	Time 0.334 (0.633)	Data Time 0.001 (0.028)	Loss 2.2591 (2.3281)	Entropy 0.88921 (0.89017)	Top-1 acc 69.141 (68.161)	Top-5 acc 89.844 (86.653)	lr 0.00530
Train [84][1490/3239]	Time 0.238 (0.632)	Data Time 0.001 (0.027)	Loss 2.3026 (2.3283)	Entropy 0.88939 (0.89016)	Top-1 acc 69.531 (68.154)	Top-5 acc 85.938 (86.650)	lr 0.00530
Train [84][1500/3239]	Time 0.237 (0.631)	Data Time 0.001 (0.027)	Loss 2.3368 (2.3281)	Entropy 0.88932 (0.89015)	Top-1 acc 67.969 (68.156)	Top-5 acc 87.109 (86.653)	lr 0.00530
Train [84][1510/3239]	Time 0.243 (0.630)	Data Time 0.001 (0.027)	Loss 2.2498 (2.3281)	Entropy 0.88929 (0.89015)	Top-1 acc 73.828 (68.157)	Top-5 acc 85.938 (86.653)	lr 0.00529
Train [84][1520/3239]	Time 0.244 (0.629)	Data Time 0.001 (0.027)	Loss 2.2827 (2.3283)	Entropy 0.88928 (0.89014)	Top-1 acc 66.406 (68.146)	Top-5 acc 85.156 (86.648)	lr 0.00529
Train [84][1530/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.027)	Loss 2.3971 (2.3286)	Entropy 0.88917 (0.89014)	Top-1 acc 66.797 (68.135)	Top-5 acc 86.719 (86.647)	lr 0.00529
Train [84][1540/3239]	Time 0.266 (0.627)	Data Time 0.001 (0.026)	Loss 2.3561 (2.3287)	Entropy 0.88906 (0.89013)	Top-1 acc 68.359 (68.135)	Top-5 acc 87.500 (86.645)	lr 0.00529
Train [84][1550/3239]	Time 2.539 (0.626)	Data Time 0.002 (0.026)	Loss 2.2770 (2.3290)	Entropy 0.88906 (0.89012)	Top-1 acc 67.188 (68.134)	Top-5 acc 87.500 (86.639)	lr 0.00529
Train [84][1560/3239]	Time 0.228 (0.624)	Data Time 0.001 (0.026)	Loss 2.3874 (2.3290)	Entropy 0.88904 (0.89012)	Top-1 acc 64.844 (68.126)	Top-5 acc 84.375 (86.636)	lr 0.00529
Train [84][1570/3239]	Time 0.311 (0.623)	Data Time 0.001 (0.026)	Loss 2.2593 (2.3290)	Entropy 0.88901 (0.89011)	Top-1 acc 68.750 (68.125)	Top-5 acc 89.062 (86.638)	lr 0.00529
Train [84][1580/3239]	Time 0.216 (0.622)	Data Time 0.001 (0.026)	Loss 2.3376 (2.3289)	Entropy 0.88899 (0.89010)	Top-1 acc 68.359 (68.123)	Top-5 acc 87.109 (86.636)	lr 0.00529
Train [84][1590/3239]	Time 0.243 (0.621)	Data Time 0.001 (0.026)	Loss 2.4282 (2.3289)	Entropy 0.88884 (0.89010)	Top-1 acc 66.797 (68.133)	Top-5 acc 84.375 (86.635)	lr 0.00529
Train [84][1600/3239]	Time 0.240 (0.620)	Data Time 0.001 (0.026)	Loss 2.1928 (2.3291)	Entropy 0.88876 (0.89009)	Top-1 acc 71.484 (68.133)	Top-5 acc 87.891 (86.630)	lr 0.00529
Train [84][1610/3239]	Time 0.308 (0.619)	Data Time 0.001 (0.025)	Loss 2.2833 (2.3296)	Entropy 0.88867 (0.89008)	Top-1 acc 67.969 (68.125)	Top-5 acc 87.891 (86.621)	lr 0.00529
Train [84][1620/3239]	Time 0.217 (0.618)	Data Time 0.001 (0.025)	Loss 2.3813 (2.3301)	Entropy 0.88858 (0.89007)	Top-1 acc 67.188 (68.112)	Top-5 acc 85.547 (86.615)	lr 0.00529
Train [84][1630/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.025)	Loss 2.3118 (2.3301)	Entropy 0.88853 (0.89006)	Top-1 acc 68.359 (68.106)	Top-5 acc 85.156 (86.612)	lr 0.00528
Train [84][1640/3239]	Time 0.230 (0.616)	Data Time 0.001 (0.025)	Loss 2.3572 (2.3300)	Entropy 0.88849 (0.89005)	Top-1 acc 68.750 (68.106)	Top-5 acc 85.938 (86.611)	lr 0.00528
Train [84][1650/3239]	Time 0.268 (0.616)	Data Time 0.001 (0.025)	Loss 2.2599 (2.3302)	Entropy 0.88846 (0.89004)	Top-1 acc 71.094 (68.100)	Top-5 acc 88.672 (86.608)	lr 0.00528
Train [84][1660/3239]	Time 57.056 (0.648)	Data Time 0.001 (0.025)	Loss 2.3779 (2.3302)	Entropy 0.88846 (0.89003)	Top-1 acc 65.234 (68.097)	Top-5 acc 85.938 (86.614)	lr 0.00528
Train [84][1670/3239]	Time 0.261 (0.645)	Data Time 0.002 (0.025)	Loss 2.3941 (2.3299)	Entropy 0.88833 (0.89002)	Top-1 acc 68.750 (68.100)	Top-5 acc 86.719 (86.621)	lr 0.00528
Train [84][1680/3239]	Time 0.224 (0.644)	Data Time 0.002 (0.024)	Loss 2.4327 (2.3302)	Entropy 0.88818 (0.89001)	Top-1 acc 66.797 (68.098)	Top-5 acc 86.328 (86.614)	lr 0.00528
Train [84][1690/3239]	Time 0.239 (0.643)	Data Time 0.001 (0.024)	Loss 2.3559 (2.3302)	Entropy 0.88816 (0.89000)	Top-1 acc 70.703 (68.103)	Top-5 acc 88.281 (86.615)	lr 0.00528
Train [84][1700/3239]	Time 0.216 (0.642)	Data Time 0.001 (0.024)	Loss 2.2857 (2.3299)	Entropy 0.88814 (0.88999)	Top-1 acc 70.703 (68.107)	Top-5 acc 85.547 (86.619)	lr 0.00528
Train [84][1710/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.024)	Loss 2.4257 (2.3301)	Entropy 0.88815 (0.88998)	Top-1 acc 66.797 (68.105)	Top-5 acc 84.766 (86.617)	lr 0.00528
Train [84][1720/3239]	Time 0.234 (0.640)	Data Time 0.001 (0.024)	Loss 2.2792 (2.3296)	Entropy 0.88810 (0.88997)	Top-1 acc 68.359 (68.117)	Top-5 acc 87.109 (86.622)	lr 0.00528
Train [84][1730/3239]	Time 0.218 (0.639)	Data Time 0.001 (0.024)	Loss 2.2627 (2.3297)	Entropy 0.88811 (0.88996)	Top-1 acc 68.359 (68.119)	Top-5 acc 89.062 (86.622)	lr 0.00528
Train [84][1740/3239]	Time 0.317 (0.638)	Data Time 0.001 (0.024)	Loss 2.3780 (2.3300)	Entropy 0.88809 (0.88995)	Top-1 acc 67.188 (68.116)	Top-5 acc 86.719 (86.612)	lr 0.00528
Train [84][1750/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.024)	Loss 2.3524 (2.3306)	Entropy 0.88807 (0.88994)	Top-1 acc 68.359 (68.103)	Top-5 acc 88.281 (86.605)	lr 0.00527
Train [84][1760/3239]	Time 0.268 (0.636)	Data Time 0.001 (0.023)	Loss 2.2957 (2.3304)	Entropy 0.88808 (0.88993)	Top-1 acc 62.891 (68.103)	Top-5 acc 87.891 (86.607)	lr 0.00527
Train [84][1770/3239]	Time 2.683 (0.636)	Data Time 0.001 (0.023)	Loss 2.5058 (2.3304)	Entropy 0.88808 (0.88991)	Top-1 acc 60.156 (68.104)	Top-5 acc 83.984 (86.608)	lr 0.00527
Train [84][1780/3239]	Time 0.285 (0.633)	Data Time 0.001 (0.023)	Loss 2.4844 (2.3304)	Entropy 0.88804 (0.88990)	Top-1 acc 64.062 (68.109)	Top-5 acc 84.375 (86.608)	lr 0.00527
Train [84][1790/3239]	Time 0.222 (0.633)	Data Time 0.001 (0.023)	Loss 2.3103 (2.3305)	Entropy 0.88805 (0.88989)	Top-1 acc 72.266 (68.106)	Top-5 acc 87.109 (86.607)	lr 0.00527
Train [84][1800/3239]	Time 0.217 (0.632)	Data Time 0.001 (0.023)	Loss 2.4682 (2.3304)	Entropy 0.88798 (0.88988)	Top-1 acc 61.328 (68.104)	Top-5 acc 83.203 (86.608)	lr 0.00527
Train [84][1810/3239]	Time 0.245 (0.631)	Data Time 0.002 (0.023)	Loss 2.3838 (2.3305)	Entropy 0.88793 (0.88987)	Top-1 acc 66.406 (68.108)	Top-5 acc 87.109 (86.606)	lr 0.00527
Train [84][1820/3239]	Time 0.260 (0.630)	Data Time 0.002 (0.023)	Loss 2.5397 (2.3306)	Entropy 0.88794 (0.88986)	Top-1 acc 63.281 (68.107)	Top-5 acc 83.594 (86.600)	lr 0.00527
Train [84][1830/3239]	Time 0.246 (0.629)	Data Time 0.001 (0.023)	Loss 2.3454 (2.3308)	Entropy 0.88786 (0.88985)	Top-1 acc 71.094 (68.103)	Top-5 acc 86.719 (86.597)	lr 0.00527
Train [84][1840/3239]	Time 0.251 (0.629)	Data Time 0.001 (0.022)	Loss 2.4454 (2.3309)	Entropy 0.88786 (0.88984)	Top-1 acc 65.234 (68.099)	Top-5 acc 84.375 (86.594)	lr 0.00527
Train [84][1850/3239]	Time 0.233 (0.628)	Data Time 0.001 (0.022)	Loss 2.3904 (2.3309)	Entropy 0.88783 (0.88983)	Top-1 acc 64.844 (68.097)	Top-5 acc 86.719 (86.597)	lr 0.00527
Train [84][1860/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.022)	Loss 2.2699 (2.3310)	Entropy 0.88786 (0.88982)	Top-1 acc 67.969 (68.096)	Top-5 acc 85.938 (86.593)	lr 0.00527
Train [84][1870/3239]	Time 0.328 (0.626)	Data Time 0.001 (0.022)	Loss 2.3064 (2.3308)	Entropy 0.88794 (0.88981)	Top-1 acc 69.922 (68.098)	Top-5 acc 87.500 (86.596)	lr 0.00527
Train [84][1880/3239]	Time 2.542 (0.625)	Data Time 0.001 (0.022)	Loss 2.4003 (2.3307)	Entropy 0.88794 (0.88980)	Top-1 acc 65.234 (68.095)	Top-5 acc 85.938 (86.599)	lr 0.00526
Train [84][1890/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.022)	Loss 2.4046 (2.3308)	Entropy 0.88784 (0.88979)	Top-1 acc 66.406 (68.085)	Top-5 acc 85.938 (86.599)	lr 0.00526
Train [84][1900/3239]	Time 0.218 (0.623)	Data Time 0.001 (0.022)	Loss 2.3925 (2.3308)	Entropy 0.88779 (0.88978)	Top-1 acc 66.406 (68.084)	Top-5 acc 85.156 (86.596)	lr 0.00526
Train [84][1910/3239]	Time 0.273 (0.622)	Data Time 0.002 (0.022)	Loss 2.4003 (2.3308)	Entropy 0.88777 (0.88977)	Top-1 acc 67.969 (68.091)	Top-5 acc 83.594 (86.593)	lr 0.00526
Train [84][1920/3239]	Time 0.253 (0.621)	Data Time 0.001 (0.022)	Loss 2.2502 (2.3304)	Entropy 0.88776 (0.88976)	Top-1 acc 71.484 (68.105)	Top-5 acc 85.938 (86.596)	lr 0.00526
Train [84][1930/3239]	Time 0.234 (0.621)	Data Time 0.010 (0.021)	Loss 2.4388 (2.3305)	Entropy 0.88779 (0.88975)	Top-1 acc 62.500 (68.096)	Top-5 acc 88.281 (86.594)	lr 0.00526
Train [84][1940/3239]	Time 0.226 (0.620)	Data Time 0.001 (0.021)	Loss 2.3704 (2.3308)	Entropy 0.88774 (0.88974)	Top-1 acc 68.359 (68.089)	Top-5 acc 85.938 (86.588)	lr 0.00526
Train [84][1950/3239]	Time 0.218 (0.619)	Data Time 0.001 (0.021)	Loss 2.4044 (2.3309)	Entropy 0.88774 (0.88973)	Top-1 acc 65.625 (68.088)	Top-5 acc 83.984 (86.585)	lr 0.00526
Train [84][1960/3239]	Time 0.232 (0.618)	Data Time 0.001 (0.021)	Loss 2.2220 (2.3310)	Entropy 0.88770 (0.88972)	Top-1 acc 70.312 (68.083)	Top-5 acc 87.500 (86.581)	lr 0.00526
Train [84][1970/3239]	Time 0.237 (0.618)	Data Time 0.001 (0.021)	Loss 2.3739 (2.3309)	Entropy 0.88767 (0.88971)	Top-1 acc 66.406 (68.089)	Top-5 acc 84.375 (86.583)	lr 0.00526
Train [84][1980/3239]	Time 0.257 (0.617)	Data Time 0.001 (0.021)	Loss 2.4141 (2.3309)	Entropy 0.88756 (0.88970)	Top-1 acc 69.141 (68.089)	Top-5 acc 83.203 (86.578)	lr 0.00526
Train [84][1990/3239]	Time 2.536 (0.616)	Data Time 0.002 (0.021)	Loss 2.3776 (2.3312)	Entropy 0.88756 (0.88968)	Top-1 acc 65.625 (68.079)	Top-5 acc 88.281 (86.574)	lr 0.00526
Train [84][2000/3239]	Time 0.381 (0.614)	Data Time 0.002 (0.021)	Loss 2.3261 (2.3311)	Entropy 0.88746 (0.88967)	Top-1 acc 69.531 (68.081)	Top-5 acc 84.766 (86.572)	lr 0.00525
Train [84][2010/3239]	Time 0.260 (0.614)	Data Time 0.001 (0.021)	Loss 2.1955 (2.3309)	Entropy 0.88743 (0.88966)	Top-1 acc 70.703 (68.086)	Top-5 acc 89.062 (86.578)	lr 0.00525
Train [84][2020/3239]	Time 0.233 (0.613)	Data Time 0.001 (0.021)	Loss 2.4351 (2.3309)	Entropy 0.88736 (0.88965)	Top-1 acc 65.625 (68.089)	Top-5 acc 83.594 (86.577)	lr 0.00525
Train [84][2030/3239]	Time 0.398 (0.637)	Data Time 0.003 (0.020)	Loss 2.4035 (2.3311)	Entropy 0.88729 (0.88964)	Top-1 acc 65.625 (68.088)	Top-5 acc 83.984 (86.573)	lr 0.00525
Train [84][2040/3239]	Time 0.240 (0.637)	Data Time 0.002 (0.020)	Loss 2.2326 (2.3311)	Entropy 0.88731 (0.88963)	Top-1 acc 70.703 (68.085)	Top-5 acc 88.672 (86.573)	lr 0.00525
Train [84][2050/3239]	Time 0.239 (0.636)	Data Time 0.002 (0.020)	Loss 2.2460 (2.3312)	Entropy 0.88725 (0.88962)	Top-1 acc 71.094 (68.085)	Top-5 acc 89.062 (86.572)	lr 0.00525
Train [84][2060/3239]	Time 0.240 (0.636)	Data Time 0.001 (0.020)	Loss 2.3632 (2.3314)	Entropy 0.88715 (0.88961)	Top-1 acc 69.531 (68.086)	Top-5 acc 85.156 (86.569)	lr 0.00525
Train [84][2070/3239]	Time 0.236 (0.635)	Data Time 0.001 (0.020)	Loss 2.4885 (2.3318)	Entropy 0.88713 (0.88959)	Top-1 acc 60.547 (68.076)	Top-5 acc 82.812 (86.563)	lr 0.00525
Train [84][2080/3239]	Time 0.239 (0.634)	Data Time 0.002 (0.020)	Loss 2.4594 (2.3317)	Entropy 0.88710 (0.88958)	Top-1 acc 64.844 (68.079)	Top-5 acc 83.203 (86.563)	lr 0.00525
Train [84][2090/3239]	Time 0.213 (0.633)	Data Time 0.001 (0.020)	Loss 2.4815 (2.3320)	Entropy 0.88703 (0.88957)	Top-1 acc 60.156 (68.070)	Top-5 acc 88.281 (86.560)	lr 0.00525
Train [84][2100/3239]	Time 2.536 (0.633)	Data Time 0.001 (0.020)	Loss 2.3644 (2.3319)	Entropy 0.88703 (0.88956)	Top-1 acc 66.016 (68.074)	Top-5 acc 87.109 (86.561)	lr 0.00525
Train [84][2110/3239]	Time 0.235 (0.631)	Data Time 0.001 (0.020)	Loss 2.3901 (2.3321)	Entropy 0.88702 (0.88955)	Top-1 acc 65.625 (68.066)	Top-5 acc 83.984 (86.554)	lr 0.00525
Train [84][2120/3239]	Time 0.241 (0.630)	Data Time 0.001 (0.020)	Loss 2.2995 (2.3323)	Entropy 0.88703 (0.88953)	Top-1 acc 69.531 (68.055)	Top-5 acc 87.109 (86.550)	lr 0.00524
Train [84][2130/3239]	Time 0.263 (0.629)	Data Time 0.001 (0.020)	Loss 2.3109 (2.3322)	Entropy 0.88696 (0.88952)	Top-1 acc 66.016 (68.058)	Top-5 acc 88.281 (86.549)	lr 0.00524
Train [84][2140/3239]	Time 0.251 (0.629)	Data Time 0.001 (0.020)	Loss 2.3153 (2.3321)	Entropy 0.88694 (0.88951)	Top-1 acc 66.016 (68.067)	Top-5 acc 87.500 (86.553)	lr 0.00524
Train [84][2150/3239]	Time 0.264 (0.628)	Data Time 0.001 (0.019)	Loss 2.5196 (2.3322)	Entropy 0.88692 (0.88950)	Top-1 acc 64.453 (68.065)	Top-5 acc 84.766 (86.552)	lr 0.00524
Train [84][2160/3239]	Time 0.251 (0.627)	Data Time 0.001 (0.019)	Loss 2.2626 (2.3322)	Entropy 0.88693 (0.88949)	Top-1 acc 72.656 (68.069)	Top-5 acc 87.109 (86.550)	lr 0.00524
Train [84][2170/3239]	Time 0.235 (0.627)	Data Time 0.001 (0.019)	Loss 2.2937 (2.3321)	Entropy 0.88692 (0.88947)	Top-1 acc 66.406 (68.071)	Top-5 acc 89.062 (86.550)	lr 0.00524
Train [84][2180/3239]	Time 0.238 (0.626)	Data Time 0.001 (0.019)	Loss 2.2615 (2.3321)	Entropy 0.88696 (0.88946)	Top-1 acc 67.578 (68.070)	Top-5 acc 88.281 (86.554)	lr 0.00524
Train [84][2190/3239]	Time 0.237 (0.625)	Data Time 0.001 (0.019)	Loss 2.3436 (2.3321)	Entropy 0.88688 (0.88945)	Top-1 acc 65.625 (68.067)	Top-5 acc 85.156 (86.554)	lr 0.00524
Train [84][2200/3239]	Time 0.242 (0.625)	Data Time 0.001 (0.019)	Loss 2.3415 (2.3319)	Entropy 0.88707 (0.88944)	Top-1 acc 67.188 (68.070)	Top-5 acc 83.984 (86.554)	lr 0.00524
Train [84][2210/3239]	Time 2.714 (0.624)	Data Time 0.001 (0.019)	Loss 2.2638 (2.3317)	Entropy 0.88707 (0.88943)	Top-1 acc 69.141 (68.070)	Top-5 acc 87.500 (86.557)	lr 0.00524
Train [84][2220/3239]	Time 0.348 (0.623)	Data Time 0.002 (0.019)	Loss 2.3696 (2.3316)	Entropy 0.88703 (0.88942)	Top-1 acc 66.797 (68.076)	Top-5 acc 87.500 (86.560)	lr 0.00524
Train [84][2230/3239]	Time 0.238 (0.622)	Data Time 0.002 (0.019)	Loss 2.3618 (2.3318)	Entropy 0.88702 (0.88941)	Top-1 acc 66.016 (68.075)	Top-5 acc 85.547 (86.557)	lr 0.00524
Train [84][2240/3239]	Time 0.247 (0.621)	Data Time 0.001 (0.019)	Loss 2.3617 (2.3318)	Entropy 0.88704 (0.88940)	Top-1 acc 65.234 (68.071)	Top-5 acc 86.328 (86.557)	lr 0.00523
Train [84][2250/3239]	Time 0.246 (0.621)	Data Time 0.001 (0.019)	Loss 2.3887 (2.3320)	Entropy 0.88702 (0.88939)	Top-1 acc 69.922 (68.069)	Top-5 acc 84.766 (86.553)	lr 0.00523
Train [84][2260/3239]	Time 0.266 (0.620)	Data Time 0.002 (0.019)	Loss 2.4678 (2.3322)	Entropy 0.88704 (0.88938)	Top-1 acc 63.281 (68.067)	Top-5 acc 84.375 (86.545)	lr 0.00523
Train [84][2270/3239]	Time 0.244 (0.620)	Data Time 0.001 (0.019)	Loss 2.2517 (2.3321)	Entropy 0.88702 (0.88936)	Top-1 acc 68.359 (68.067)	Top-5 acc 87.891 (86.549)	lr 0.00523
Train [84][2280/3239]	Time 0.232 (0.619)	Data Time 0.001 (0.018)	Loss 2.2649 (2.3321)	Entropy 0.88698 (0.88935)	Top-1 acc 74.219 (68.071)	Top-5 acc 87.891 (86.548)	lr 0.00523
Train [84][2290/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.018)	Loss 2.3912 (2.3322)	Entropy 0.88695 (0.88934)	Top-1 acc 62.891 (68.069)	Top-5 acc 85.547 (86.546)	lr 0.00523
Train [84][2300/3239]	Time 0.226 (0.618)	Data Time 0.001 (0.018)	Loss 2.2294 (2.3321)	Entropy 0.88690 (0.88933)	Top-1 acc 71.094 (68.068)	Top-5 acc 88.672 (86.550)	lr 0.00523
Train [84][2310/3239]	Time 0.337 (0.617)	Data Time 0.001 (0.018)	Loss 2.3828 (2.3323)	Entropy 0.88690 (0.88932)	Top-1 acc 70.312 (68.067)	Top-5 acc 85.547 (86.549)	lr 0.00523
Train [84][2320/3239]	Time 2.621 (0.617)	Data Time 0.001 (0.018)	Loss 2.3730 (2.3323)	Entropy 0.88690 (0.88931)	Top-1 acc 69.141 (68.067)	Top-5 acc 83.984 (86.546)	lr 0.00523
Train [84][2330/3239]	Time 0.250 (0.615)	Data Time 0.001 (0.018)	Loss 2.1392 (2.3324)	Entropy 0.88687 (0.88930)	Top-1 acc 70.703 (68.064)	Top-5 acc 89.844 (86.545)	lr 0.00523
Train [84][2340/3239]	Time 0.224 (0.615)	Data Time 0.001 (0.018)	Loss 2.5130 (2.3327)	Entropy 0.88679 (0.88929)	Top-1 acc 67.188 (68.061)	Top-5 acc 82.812 (86.542)	lr 0.00523
Train [84][2350/3239]	Time 0.321 (0.614)	Data Time 0.002 (0.018)	Loss 2.3198 (2.3326)	Entropy 0.88680 (0.88928)	Top-1 acc 67.188 (68.064)	Top-5 acc 87.109 (86.543)	lr 0.00523
Train [84][2360/3239]	Time 0.220 (0.613)	Data Time 0.001 (0.018)	Loss 2.3077 (2.3326)	Entropy 0.88677 (0.88927)	Top-1 acc 69.922 (68.066)	Top-5 acc 87.891 (86.544)	lr 0.00522
Train [84][2370/3239]	Time 0.237 (0.613)	Data Time 0.001 (0.018)	Loss 2.4191 (2.3325)	Entropy 0.88681 (0.88926)	Top-1 acc 66.016 (68.066)	Top-5 acc 83.984 (86.547)	lr 0.00522
Train [84][2380/3239]	Time 0.237 (0.612)	Data Time 0.001 (0.018)	Loss 2.2921 (2.3325)	Entropy 0.88678 (0.88925)	Top-1 acc 69.141 (68.067)	Top-5 acc 87.109 (86.546)	lr 0.00522
Train [84][2390/3239]	Time 0.385 (0.633)	Data Time 0.002 (0.018)	Loss 2.3156 (2.3324)	Entropy 0.88672 (0.88924)	Top-1 acc 69.141 (68.067)	Top-5 acc 86.719 (86.549)	lr 0.00522
Train [84][2400/3239]	Time 0.270 (0.633)	Data Time 0.002 (0.018)	Loss 2.4477 (2.3324)	Entropy 0.88668 (0.88923)	Top-1 acc 65.234 (68.071)	Top-5 acc 82.812 (86.547)	lr 0.00522
Train [84][2410/3239]	Time 0.242 (0.632)	Data Time 0.001 (0.018)	Loss 2.2413 (2.3325)	Entropy 0.88657 (0.88922)	Top-1 acc 70.703 (68.072)	Top-5 acc 88.672 (86.540)	lr 0.00522
Train [84][2420/3239]	Time 0.242 (0.632)	Data Time 0.001 (0.017)	Loss 2.5237 (2.3325)	Entropy 0.88653 (0.88921)	Top-1 acc 64.062 (68.072)	Top-5 acc 83.203 (86.540)	lr 0.00522
Train [84][2430/3239]	Time 2.557 (0.631)	Data Time 0.002 (0.017)	Loss 2.4258 (2.3326)	Entropy 0.88653 (0.88920)	Top-1 acc 67.969 (68.076)	Top-5 acc 83.594 (86.543)	lr 0.00522
Train [84][2440/3239]	Time 0.277 (0.630)	Data Time 0.001 (0.017)	Loss 2.2983 (2.3326)	Entropy 0.88644 (0.88918)	Top-1 acc 66.406 (68.078)	Top-5 acc 87.500 (86.542)	lr 0.00522
Train [84][2450/3239]	Time 0.236 (0.629)	Data Time 0.001 (0.017)	Loss 2.2988 (2.3326)	Entropy 0.88647 (0.88917)	Top-1 acc 69.531 (68.081)	Top-5 acc 86.328 (86.545)	lr 0.00522
Train [84][2460/3239]	Time 0.253 (0.628)	Data Time 0.001 (0.017)	Loss 2.3013 (2.3325)	Entropy 0.88644 (0.88916)	Top-1 acc 66.406 (68.078)	Top-5 acc 87.109 (86.550)	lr 0.00522
Train [84][2470/3239]	Time 0.221 (0.628)	Data Time 0.002 (0.017)	Loss 2.1944 (2.3325)	Entropy 0.88638 (0.88915)	Top-1 acc 73.047 (68.077)	Top-5 acc 88.672 (86.553)	lr 0.00522
Train [84][2480/3239]	Time 0.380 (0.627)	Data Time 0.001 (0.017)	Loss 2.4882 (2.3324)	Entropy 0.88647 (0.88914)	Top-1 acc 65.234 (68.077)	Top-5 acc 85.156 (86.556)	lr 0.00521
Train [84][2490/3239]	Time 0.247 (0.627)	Data Time 0.001 (0.017)	Loss 2.4639 (2.3325)	Entropy 0.88641 (0.88913)	Top-1 acc 64.844 (68.070)	Top-5 acc 83.594 (86.553)	lr 0.00521
Train [84][2500/3239]	Time 0.233 (0.626)	Data Time 0.001 (0.017)	Loss 2.4165 (2.3325)	Entropy 0.88635 (0.88912)	Top-1 acc 64.844 (68.072)	Top-5 acc 85.156 (86.556)	lr 0.00521
Train [84][2510/3239]	Time 0.236 (0.626)	Data Time 0.001 (0.017)	Loss 2.3028 (2.3324)	Entropy 0.88625 (0.88911)	Top-1 acc 67.578 (68.072)	Top-5 acc 88.281 (86.554)	lr 0.00521
Train [84][2520/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.017)	Loss 2.2042 (2.3324)	Entropy 0.88620 (0.88910)	Top-1 acc 74.219 (68.067)	Top-5 acc 87.500 (86.551)	lr 0.00521
Train [84][2530/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.017)	Loss 2.3816 (2.3323)	Entropy 0.88629 (0.88908)	Top-1 acc 64.844 (68.071)	Top-5 acc 89.062 (86.554)	lr 0.00521
Train [84][2540/3239]	Time 2.663 (0.624)	Data Time 0.001 (0.017)	Loss 2.3590 (2.3323)	Entropy 0.88629 (0.88907)	Top-1 acc 66.797 (68.071)	Top-5 acc 84.766 (86.552)	lr 0.00521
Train [84][2550/3239]	Time 0.211 (0.622)	Data Time 0.001 (0.017)	Loss 2.3254 (2.3325)	Entropy 0.88626 (0.88906)	Top-1 acc 69.922 (68.067)	Top-5 acc 87.109 (86.549)	lr 0.00521
Train [84][2560/3239]	Time 0.245 (0.622)	Data Time 0.002 (0.017)	Loss 2.4265 (2.3326)	Entropy 0.88628 (0.88905)	Top-1 acc 67.969 (68.064)	Top-5 acc 83.594 (86.547)	lr 0.00521
Train [84][2570/3239]	Time 0.403 (0.621)	Data Time 0.001 (0.017)	Loss 2.4406 (2.3325)	Entropy 0.88628 (0.88904)	Top-1 acc 63.672 (68.061)	Top-5 acc 83.984 (86.548)	lr 0.00521
Train [84][2580/3239]	Time 0.236 (0.621)	Data Time 0.001 (0.016)	Loss 2.3910 (2.3325)	Entropy 0.88628 (0.88903)	Top-1 acc 63.672 (68.059)	Top-5 acc 86.719 (86.551)	lr 0.00521
Train [84][2590/3239]	Time 0.226 (0.620)	Data Time 0.001 (0.016)	Loss 2.5472 (2.3326)	Entropy 0.88620 (0.88902)	Top-1 acc 62.109 (68.054)	Top-5 acc 82.422 (86.549)	lr 0.00521
Train [84][2600/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.016)	Loss 2.4058 (2.3327)	Entropy 0.88613 (0.88901)	Top-1 acc 66.016 (68.050)	Top-5 acc 85.156 (86.546)	lr 0.00521
Train [84][2610/3239]	Time 0.231 (0.619)	Data Time 0.001 (0.016)	Loss 2.4204 (2.3327)	Entropy 0.88615 (0.88900)	Top-1 acc 66.016 (68.052)	Top-5 acc 85.547 (86.546)	lr 0.00520
Train [84][2620/3239]	Time 0.238 (0.619)	Data Time 0.001 (0.016)	Loss 2.2804 (2.3326)	Entropy 0.88615 (0.88899)	Top-1 acc 72.656 (68.054)	Top-5 acc 87.109 (86.548)	lr 0.00520
Train [84][2630/3239]	Time 0.250 (0.618)	Data Time 0.001 (0.016)	Loss 2.2347 (2.3326)	Entropy 0.88616 (0.88898)	Top-1 acc 71.875 (68.055)	Top-5 acc 88.281 (86.546)	lr 0.00520
Train [84][2640/3239]	Time 0.209 (0.618)	Data Time 0.001 (0.016)	Loss 2.2602 (2.3326)	Entropy 0.88612 (0.88897)	Top-1 acc 66.797 (68.058)	Top-5 acc 89.453 (86.546)	lr 0.00520
Train [84][2650/3239]	Time 0.254 (0.617)	Data Time 0.001 (0.016)	Loss 2.2079 (2.3326)	Entropy 0.88614 (0.88895)	Top-1 acc 72.656 (68.057)	Top-5 acc 87.891 (86.547)	lr 0.00520
Train [84][2660/3239]	Time 0.231 (0.617)	Data Time 0.001 (0.016)	Loss 2.3076 (2.3325)	Entropy 0.88617 (0.88894)	Top-1 acc 65.234 (68.064)	Top-5 acc 86.719 (86.550)	lr 0.00520
Train [84][2670/3239]	Time 0.258 (0.616)	Data Time 0.001 (0.016)	Loss 2.3753 (2.3326)	Entropy 0.88614 (0.88893)	Top-1 acc 66.797 (68.062)	Top-5 acc 85.938 (86.548)	lr 0.00520
Train [84][2680/3239]	Time 0.229 (0.616)	Data Time 0.001 (0.016)	Loss 2.2831 (2.3326)	Entropy 0.88611 (0.88892)	Top-1 acc 69.531 (68.065)	Top-5 acc 85.547 (86.550)	lr 0.00520
Train [84][2690/3239]	Time 0.239 (0.615)	Data Time 0.002 (0.016)	Loss 2.2764 (2.3325)	Entropy 0.88613 (0.88891)	Top-1 acc 69.141 (68.071)	Top-5 acc 89.844 (86.552)	lr 0.00520
Train [84][2700/3239]	Time 0.371 (0.615)	Data Time 0.001 (0.016)	Loss 2.3471 (2.3324)	Entropy 0.88600 (0.88890)	Top-1 acc 67.578 (68.072)	Top-5 acc 88.281 (86.553)	lr 0.00520
Train [84][2710/3239]	Time 0.234 (0.614)	Data Time 0.001 (0.016)	Loss 2.4763 (2.3325)	Entropy 0.88596 (0.88889)	Top-1 acc 61.328 (68.070)	Top-5 acc 85.938 (86.553)	lr 0.00520
Train [84][2720/3239]	Time 0.230 (0.614)	Data Time 0.001 (0.016)	Loss 2.1903 (2.3324)	Entropy 0.88591 (0.88888)	Top-1 acc 72.656 (68.073)	Top-5 acc 89.453 (86.555)	lr 0.00520
Train [84][2730/3239]	Time 0.221 (0.613)	Data Time 0.001 (0.016)	Loss 2.0985 (2.3324)	Entropy 0.88593 (0.88887)	Top-1 acc 73.438 (68.076)	Top-5 acc 91.406 (86.555)	lr 0.00519
Train [84][2740/3239]	Time 0.294 (0.632)	Data Time 0.004 (0.016)	Loss 2.2575 (2.3324)	Entropy 0.88583 (0.88886)	Top-1 acc 69.922 (68.077)	Top-5 acc 87.109 (86.550)	lr 0.00519
Train [84][2750/3239]	Time 0.247 (0.631)	Data Time 0.002 (0.016)	Loss 2.4638 (2.3323)	Entropy 0.88587 (0.88885)	Top-1 acc 67.188 (68.083)	Top-5 acc 83.203 (86.553)	lr 0.00519
Train [84][2760/3239]	Time 0.223 (0.631)	Data Time 0.002 (0.016)	Loss 2.3893 (2.3323)	Entropy 0.88574 (0.88884)	Top-1 acc 67.969 (68.083)	Top-5 acc 85.547 (86.552)	lr 0.00519
Train [84][2770/3239]	Time 0.310 (0.630)	Data Time 0.002 (0.015)	Loss 2.3113 (2.3323)	Entropy 0.88572 (0.88883)	Top-1 acc 71.094 (68.086)	Top-5 acc 85.938 (86.552)	lr 0.00519
Train [84][2780/3239]	Time 0.212 (0.630)	Data Time 0.001 (0.015)	Loss 2.4010 (2.3324)	Entropy 0.88567 (0.88881)	Top-1 acc 66.016 (68.083)	Top-5 acc 85.547 (86.547)	lr 0.00519
Train [84][2790/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.015)	Loss 2.4156 (2.3325)	Entropy 0.88561 (0.88880)	Top-1 acc 65.625 (68.080)	Top-5 acc 83.984 (86.545)	lr 0.00519
Train [84][2800/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.015)	Loss 2.2433 (2.3324)	Entropy 0.88553 (0.88879)	Top-1 acc 69.141 (68.080)	Top-5 acc 89.453 (86.545)	lr 0.00519
Train [84][2810/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.015)	Loss 2.2222 (2.3324)	Entropy 0.88554 (0.88878)	Top-1 acc 69.922 (68.078)	Top-5 acc 87.500 (86.543)	lr 0.00519
Train [84][2820/3239]	Time 0.263 (0.628)	Data Time 0.001 (0.015)	Loss 2.3826 (2.3324)	Entropy 0.88555 (0.88877)	Top-1 acc 66.797 (68.075)	Top-5 acc 82.812 (86.543)	lr 0.00519
Train [84][2830/3239]	Time 0.417 (0.627)	Data Time 0.002 (0.015)	Loss 2.2371 (2.3324)	Entropy 0.88549 (0.88876)	Top-1 acc 69.141 (68.074)	Top-5 acc 88.672 (86.541)	lr 0.00519
Train [84][2840/3239]	Time 0.265 (0.627)	Data Time 0.001 (0.015)	Loss 2.2877 (2.3323)	Entropy 0.88543 (0.88875)	Top-1 acc 69.531 (68.075)	Top-5 acc 87.500 (86.541)	lr 0.00519
Train [84][2850/3239]	Time 0.295 (0.626)	Data Time 0.001 (0.015)	Loss 2.5155 (2.3328)	Entropy 0.88541 (0.88873)	Top-1 acc 63.672 (68.066)	Top-5 acc 85.938 (86.536)	lr 0.00518
Train [84][2860/3239]	Time 0.251 (0.626)	Data Time 0.001 (0.015)	Loss 2.3693 (2.3328)	Entropy 0.88541 (0.88872)	Top-1 acc 68.750 (68.065)	Top-5 acc 87.109 (86.535)	lr 0.00518
Train [84][2870/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.015)	Loss 2.3454 (2.3329)	Entropy 0.88537 (0.88871)	Top-1 acc 69.922 (68.066)	Top-5 acc 85.938 (86.533)	lr 0.00518
Train [84][2880/3239]	Time 0.273 (0.625)	Data Time 0.001 (0.015)	Loss 2.2581 (2.3330)	Entropy 0.88532 (0.88870)	Top-1 acc 71.094 (68.067)	Top-5 acc 87.500 (86.532)	lr 0.00518
Train [84][2890/3239]	Time 0.221 (0.624)	Data Time 0.001 (0.015)	Loss 2.4348 (2.3330)	Entropy 0.88530 (0.88869)	Top-1 acc 64.453 (68.068)	Top-5 acc 83.984 (86.530)	lr 0.00518
Train [84][2900/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.015)	Loss 2.3383 (2.3330)	Entropy 0.88534 (0.88868)	Top-1 acc 65.625 (68.071)	Top-5 acc 85.938 (86.530)	lr 0.00518
Train [84][2910/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.015)	Loss 2.6050 (2.3329)	Entropy 0.88530 (0.88866)	Top-1 acc 62.891 (68.075)	Top-5 acc 85.156 (86.533)	lr 0.00518
Train [84][2920/3239]	Time 0.253 (0.623)	Data Time 0.001 (0.015)	Loss 2.3864 (2.3330)	Entropy 0.88521 (0.88865)	Top-1 acc 64.453 (68.073)	Top-5 acc 85.938 (86.530)	lr 0.00518
Train [84][2930/3239]	Time 0.239 (0.622)	Data Time 0.001 (0.015)	Loss 2.2480 (2.3329)	Entropy 0.88506 (0.88864)	Top-1 acc 70.703 (68.074)	Top-5 acc 87.891 (86.532)	lr 0.00518
Train [84][2940/3239]	Time 0.269 (0.622)	Data Time 0.001 (0.015)	Loss 2.2701 (2.3329)	Entropy 0.88498 (0.88863)	Top-1 acc 69.141 (68.073)	Top-5 acc 87.891 (86.532)	lr 0.00518
Train [84][2950/3239]	Time 0.269 (0.621)	Data Time 0.001 (0.015)	Loss 2.2368 (2.3330)	Entropy 0.88495 (0.88862)	Top-1 acc 71.484 (68.074)	Top-5 acc 87.109 (86.530)	lr 0.00518
Train [84][2960/3239]	Time 0.328 (0.621)	Data Time 0.001 (0.015)	Loss 2.4927 (2.3330)	Entropy 0.88496 (0.88860)	Top-1 acc 63.672 (68.069)	Top-5 acc 80.469 (86.529)	lr 0.00518
Train [84][2970/3239]	Time 0.271 (0.620)	Data Time 0.001 (0.015)	Loss 2.2602 (2.3330)	Entropy 0.88498 (0.88859)	Top-1 acc 71.484 (68.069)	Top-5 acc 87.500 (86.532)	lr 0.00517
Train [84][2980/3239]	Time 0.251 (0.620)	Data Time 0.001 (0.014)	Loss 2.5370 (2.3331)	Entropy 0.88508 (0.88858)	Top-1 acc 60.938 (68.062)	Top-5 acc 83.203 (86.528)	lr 0.00517
Train [84][2990/3239]	Time 0.257 (0.619)	Data Time 0.001 (0.014)	Loss 2.5895 (2.3333)	Entropy 0.88506 (0.88857)	Top-1 acc 62.891 (68.059)	Top-5 acc 83.203 (86.527)	lr 0.00517
Train [84][3000/3239]	Time 0.327 (0.619)	Data Time 0.001 (0.014)	Loss 2.3343 (2.3332)	Entropy 0.88509 (0.88856)	Top-1 acc 67.578 (68.062)	Top-5 acc 87.500 (86.532)	lr 0.00517
Train [84][3010/3239]	Time 0.223 (0.618)	Data Time 0.001 (0.014)	Loss 2.3060 (2.3330)	Entropy 0.88508 (0.88855)	Top-1 acc 70.312 (68.068)	Top-5 acc 87.891 (86.534)	lr 0.00517
Train [84][3020/3239]	Time 0.274 (0.618)	Data Time 0.007 (0.014)	Loss 2.2409 (2.3330)	Entropy 0.88499 (0.88853)	Top-1 acc 72.266 (68.068)	Top-5 acc 87.891 (86.534)	lr 0.00517
Train [84][3030/3239]	Time 0.270 (0.618)	Data Time 0.001 (0.014)	Loss 2.2735 (2.3330)	Entropy 0.88495 (0.88852)	Top-1 acc 70.312 (68.068)	Top-5 acc 87.109 (86.536)	lr 0.00517
Train [84][3040/3239]	Time 0.217 (0.617)	Data Time 0.001 (0.014)	Loss 2.4140 (2.3332)	Entropy 0.88500 (0.88851)	Top-1 acc 67.969 (68.062)	Top-5 acc 85.938 (86.534)	lr 0.00517
Train [84][3050/3239]	Time 0.206 (0.617)	Data Time 0.001 (0.014)	Loss 2.2711 (2.3332)	Entropy 0.88500 (0.88850)	Top-1 acc 69.531 (68.062)	Top-5 acc 85.938 (86.533)	lr 0.00517
Train [84][3060/3239]	Time 0.239 (0.616)	Data Time 0.001 (0.014)	Loss 2.3503 (2.3331)	Entropy 0.88492 (0.88849)	Top-1 acc 64.453 (68.064)	Top-5 acc 83.984 (86.532)	lr 0.00517
Train [84][3070/3239]	Time 0.227 (0.634)	Data Time 0.003 (0.014)	Loss 2.5615 (2.3333)	Entropy 0.88489 (0.88848)	Top-1 acc 58.984 (68.063)	Top-5 acc 83.984 (86.529)	lr 0.00517
Train [84][3080/3239]	Time 0.230 (0.633)	Data Time 0.002 (0.014)	Loss 2.2135 (2.3332)	Entropy 0.88480 (0.88846)	Top-1 acc 70.312 (68.067)	Top-5 acc 89.062 (86.530)	lr 0.00517
Train [84][3090/3239]	Time 0.244 (0.633)	Data Time 0.002 (0.014)	Loss 2.2426 (2.3333)	Entropy 0.88480 (0.88845)	Top-1 acc 70.312 (68.060)	Top-5 acc 88.281 (86.530)	lr 0.00516
Train [84][3100/3239]	Time 0.264 (0.632)	Data Time 0.002 (0.014)	Loss 2.3578 (2.3333)	Entropy 0.88482 (0.88844)	Top-1 acc 69.141 (68.060)	Top-5 acc 83.984 (86.527)	lr 0.00516
Train [84][3110/3239]	Time 0.251 (0.632)	Data Time 0.001 (0.014)	Loss 2.4051 (2.3336)	Entropy 0.88481 (0.88843)	Top-1 acc 69.531 (68.056)	Top-5 acc 83.984 (86.522)	lr 0.00516
Train [84][3120/3239]	Time 0.252 (0.631)	Data Time 0.001 (0.014)	Loss 2.3710 (2.3336)	Entropy 0.88475 (0.88842)	Top-1 acc 66.797 (68.056)	Top-5 acc 85.547 (86.521)	lr 0.00516
Train [84][3130/3239]	Time 0.333 (0.631)	Data Time 0.001 (0.014)	Loss 2.4220 (2.3334)	Entropy 0.88474 (0.88841)	Top-1 acc 67.188 (68.059)	Top-5 acc 84.375 (86.526)	lr 0.00516
Train [84][3140/3239]	Time 0.298 (0.630)	Data Time 0.001 (0.014)	Loss 2.1978 (2.3334)	Entropy 0.88468 (0.88839)	Top-1 acc 68.359 (68.061)	Top-5 acc 91.797 (86.528)	lr 0.00516
Train [84][3150/3239]	Time 0.212 (0.630)	Data Time 0.001 (0.014)	Loss 2.2087 (2.3334)	Entropy 0.88457 (0.88838)	Top-1 acc 73.828 (68.059)	Top-5 acc 87.891 (86.529)	lr 0.00516
Train [84][3160/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.014)	Loss 2.3459 (2.3333)	Entropy 0.88458 (0.88837)	Top-1 acc 69.141 (68.060)	Top-5 acc 85.938 (86.529)	lr 0.00516
Train [84][3170/3239]	Time 0.350 (0.629)	Data Time 0.001 (0.014)	Loss 2.3460 (2.3334)	Entropy 0.88459 (0.88836)	Top-1 acc 68.359 (68.058)	Top-5 acc 87.109 (86.530)	lr 0.00516
Train [84][3180/3239]	Time 0.246 (0.628)	Data Time 0.000 (0.014)	Loss 2.2611 (2.3331)	Entropy 0.88461 (0.88835)	Top-1 acc 73.438 (68.065)	Top-5 acc 88.281 (86.536)	lr 0.00516
Train [84][3190/3239]	Time 0.237 (0.628)	Data Time 0.000 (0.014)	Loss 2.2789 (2.3329)	Entropy 0.88456 (0.88833)	Top-1 acc 71.094 (68.072)	Top-5 acc 86.719 (86.536)	lr 0.00516
Train [84][3200/3239]	Time 0.227 (0.627)	Data Time 0.000 (0.014)	Loss 2.3755 (2.3329)	Entropy 0.88442 (0.88832)	Top-1 acc 65.625 (68.072)	Top-5 acc 85.156 (86.537)	lr 0.00516
Train [84][3210/3239]	Time 0.224 (0.627)	Data Time 0.000 (0.014)	Loss 2.2774 (2.3330)	Entropy 0.88434 (0.88831)	Top-1 acc 67.188 (68.071)	Top-5 acc 87.500 (86.535)	lr 0.00516
Train [84][3220/3239]	Time 0.230 (0.626)	Data Time 0.000 (0.014)	Loss 2.6521 (2.3329)	Entropy 0.88438 (0.88830)	Top-1 acc 59.375 (68.068)	Top-5 acc 83.203 (86.537)	lr 0.00515
Train [84][3230/3239]	Time 0.225 (0.626)	Data Time 0.000 (0.013)	Loss 2.1766 (2.3327)	Entropy 0.88431 (0.88829)	Top-1 acc 75.000 (68.076)	Top-5 acc 90.234 (86.539)	lr 0.00515
Train [84][3239/3239]	Time 2.308 (0.625)	Data Time 0.000 (0.013)	Loss 2.6896 (2.3329)	Entropy 0.88431 (0.88827)	Top-1 acc 56.790 (68.070)	Top-5 acc 85.185 (86.536)	lr 0.00515
==========Valid [84/120]	loss 1.287	top-1 acc 70.555 (70.789)	top-5 acc 88.751	Train top-1 68.070	top-5 86.536	Entropy 0.88431	Latency-None: 0.000ms	Flops: 546.53M
Train [85][0/3239]	Time 42.988 (42.988)	Data Time 40.889 (40.889)	Loss 2.3264 (2.3264)	Entropy 0.88430 (0.88430)	Top-1 acc 68.750 (68.750)	Top-5 acc 87.500 (87.500)	lr 0.00515
Train [85][10/3239]	Time 2.622 (4.418)	Data Time 0.002 (3.719)	Loss 2.2841 (2.3324)	Entropy 0.88430 (0.88430)	Top-1 acc 72.266 (68.430)	Top-5 acc 86.719 (86.577)	lr 0.00515
Train [85][20/3239]	Time 0.220 (2.424)	Data Time 0.001 (1.949)	Loss 2.3629 (2.3239)	Entropy 0.88432 (0.88431)	Top-1 acc 68.359 (68.545)	Top-5 acc 87.109 (86.868)	lr 0.00515
Train [85][30/3239]	Time 0.230 (1.801)	Data Time 0.001 (1.321)	Loss 2.1981 (2.3155)	Entropy 0.88424 (0.88429)	Top-1 acc 71.484 (68.725)	Top-5 acc 88.281 (87.046)	lr 0.00515
Train [85][40/3239]	Time 0.222 (1.478)	Data Time 0.001 (0.999)	Loss 2.2028 (2.3174)	Entropy 0.88422 (0.88428)	Top-1 acc 71.094 (68.740)	Top-5 acc 90.625 (87.005)	lr 0.00515
Train [85][50/3239]	Time 0.248 (1.282)	Data Time 0.002 (0.803)	Loss 2.4116 (2.3195)	Entropy 0.88426 (0.88427)	Top-1 acc 63.672 (68.827)	Top-5 acc 86.328 (87.002)	lr 0.00515
Train [85][60/3239]	Time 0.230 (1.151)	Data Time 0.001 (0.672)	Loss 2.2640 (2.3144)	Entropy 0.88428 (0.88427)	Top-1 acc 71.875 (68.872)	Top-5 acc 86.328 (86.975)	lr 0.00515
Train [85][70/3239]	Time 0.232 (1.061)	Data Time 0.001 (0.577)	Loss 2.2427 (2.3127)	Entropy 0.88426 (0.88427)	Top-1 acc 69.922 (68.827)	Top-5 acc 88.281 (87.060)	lr 0.00515
Train [85][80/3239]	Time 0.231 (0.988)	Data Time 0.001 (0.506)	Loss 2.3001 (2.3178)	Entropy 0.88416 (0.88426)	Top-1 acc 72.266 (68.702)	Top-5 acc 88.281 (86.926)	lr 0.00515
Train [85][90/3239]	Time 0.229 (0.931)	Data Time 0.001 (0.451)	Loss 2.2707 (2.3073)	Entropy 0.88417 (0.88425)	Top-1 acc 73.047 (68.995)	Top-5 acc 88.281 (87.075)	lr 0.00515
Train [85][100/3239]	Time 0.229 (0.887)	Data Time 0.001 (0.406)	Loss 2.3305 (2.3013)	Entropy 0.88415 (0.88424)	Top-1 acc 69.531 (69.121)	Top-5 acc 85.938 (87.133)	lr 0.00514
Train [85][110/3239]	Time 0.317 (0.851)	Data Time 0.002 (0.370)	Loss 2.1633 (2.3026)	Entropy 0.88415 (0.88423)	Top-1 acc 73.828 (68.982)	Top-5 acc 89.844 (87.099)	lr 0.00514
Train [85][120/3239]	Time 2.501 (0.819)	Data Time 0.001 (0.339)	Loss 2.3390 (2.3011)	Entropy 0.88415 (0.88423)	Top-1 acc 70.312 (68.992)	Top-5 acc 85.547 (87.061)	lr 0.00514
Train [85][130/3239]	Time 0.232 (0.775)	Data Time 0.001 (0.314)	Loss 2.3427 (2.3035)	Entropy 0.88406 (0.88421)	Top-1 acc 67.969 (68.875)	Top-5 acc 87.891 (86.987)	lr 0.00514
Train [85][140/3239]	Time 0.242 (0.753)	Data Time 0.001 (0.292)	Loss 2.2424 (2.3071)	Entropy 0.88394 (0.88420)	Top-1 acc 70.703 (68.789)	Top-5 acc 89.062 (86.929)	lr 0.00514
Train [85][150/3239]	Time 0.240 (0.735)	Data Time 0.001 (0.272)	Loss 2.1351 (2.3070)	Entropy 0.88371 (0.88417)	Top-1 acc 73.047 (68.820)	Top-5 acc 91.016 (86.933)	lr 0.00514
Train [85][160/3239]	Time 0.232 (0.720)	Data Time 0.001 (0.255)	Loss 2.2943 (2.3069)	Entropy 0.88370 (0.88414)	Top-1 acc 69.141 (68.871)	Top-5 acc 85.156 (86.923)	lr 0.00514
Train [85][170/3239]	Time 0.227 (0.706)	Data Time 0.001 (0.241)	Loss 2.2672 (2.3040)	Entropy 0.88371 (0.88411)	Top-1 acc 68.750 (68.937)	Top-5 acc 86.719 (86.993)	lr 0.00514
Train [85][180/3239]	Time 0.319 (0.989)	Data Time 0.002 (0.227)	Loss 2.2339 (2.3029)	Entropy 0.88367 (0.88409)	Top-1 acc 71.094 (68.974)	Top-5 acc 87.500 (87.004)	lr 0.00514
Train [85][190/3239]	Time 0.223 (0.966)	Data Time 0.002 (0.216)	Loss 2.3409 (2.3052)	Entropy 0.88368 (0.88407)	Top-1 acc 67.578 (68.940)	Top-5 acc 85.156 (86.970)	lr 0.00514
Train [85][200/3239]	Time 0.233 (0.942)	Data Time 0.001 (0.205)	Loss 2.2183 (2.3052)	Entropy 0.88365 (0.88405)	Top-1 acc 73.047 (68.911)	Top-5 acc 87.891 (86.983)	lr 0.00514
Train [85][210/3239]	Time 0.237 (0.920)	Data Time 0.002 (0.195)	Loss 2.2940 (2.3046)	Entropy 0.88364 (0.88403)	Top-1 acc 68.359 (68.915)	Top-5 acc 87.891 (86.991)	lr 0.00514
Train [85][220/3239]	Time 0.255 (0.899)	Data Time 0.001 (0.187)	Loss 2.4069 (2.3099)	Entropy 0.88362 (0.88401)	Top-1 acc 64.844 (68.789)	Top-5 acc 85.547 (86.941)	lr 0.00513
Train [85][230/3239]	Time 2.469 (0.880)	Data Time 0.001 (0.179)	Loss 2.3117 (2.3109)	Entropy 0.88362 (0.88399)	Top-1 acc 69.531 (68.799)	Top-5 acc 87.109 (86.908)	lr 0.00513
Train [85][240/3239]	Time 0.273 (0.854)	Data Time 0.001 (0.171)	Loss 2.2441 (2.3124)	Entropy 0.88359 (0.88398)	Top-1 acc 69.141 (68.744)	Top-5 acc 87.500 (86.866)	lr 0.00513
Train [85][250/3239]	Time 0.228 (0.839)	Data Time 0.001 (0.165)	Loss 2.5123 (2.3132)	Entropy 0.88359 (0.88396)	Top-1 acc 66.797 (68.736)	Top-5 acc 81.250 (86.815)	lr 0.00513
Train [85][260/3239]	Time 0.245 (0.825)	Data Time 0.002 (0.158)	Loss 2.2611 (2.3123)	Entropy 0.88361 (0.88395)	Top-1 acc 70.312 (68.771)	Top-5 acc 88.281 (86.831)	lr 0.00513
Train [85][270/3239]	Time 0.236 (0.812)	Data Time 0.001 (0.152)	Loss 2.2721 (2.3135)	Entropy 0.88356 (0.88393)	Top-1 acc 71.094 (68.738)	Top-5 acc 87.891 (86.820)	lr 0.00513
Train [85][280/3239]	Time 0.233 (0.800)	Data Time 0.001 (0.147)	Loss 2.4434 (2.3153)	Entropy 0.88338 (0.88392)	Top-1 acc 66.406 (68.699)	Top-5 acc 85.938 (86.802)	lr 0.00513
Train [85][290/3239]	Time 0.223 (0.789)	Data Time 0.001 (0.142)	Loss 2.3337 (2.3157)	Entropy 0.88326 (0.88390)	Top-1 acc 66.016 (68.661)	Top-5 acc 85.156 (86.789)	lr 0.00513
Train [85][300/3239]	Time 0.213 (0.778)	Data Time 0.001 (0.137)	Loss 2.2511 (2.3153)	Entropy 0.88349 (0.88388)	Top-1 acc 71.484 (68.699)	Top-5 acc 87.500 (86.785)	lr 0.00513
Train [85][310/3239]	Time 0.221 (0.768)	Data Time 0.001 (0.133)	Loss 2.4633 (2.3158)	Entropy 0.88358 (0.88387)	Top-1 acc 66.016 (68.696)	Top-5 acc 83.203 (86.788)	lr 0.00513
Train [85][320/3239]	Time 0.222 (0.758)	Data Time 0.001 (0.129)	Loss 2.3127 (2.3151)	Entropy 0.88361 (0.88386)	Top-1 acc 66.797 (68.707)	Top-5 acc 86.719 (86.781)	lr 0.00513
Train [85][330/3239]	Time 0.344 (0.750)	Data Time 0.001 (0.125)	Loss 2.3490 (2.3147)	Entropy 0.88360 (0.88385)	Top-1 acc 69.922 (68.737)	Top-5 acc 87.109 (86.775)	lr 0.00513
Train [85][340/3239]	Time 2.532 (0.742)	Data Time 0.001 (0.122)	Loss 2.5133 (2.3162)	Entropy 0.88360 (0.88384)	Top-1 acc 64.844 (68.709)	Top-5 acc 85.156 (86.745)	lr 0.00512
Train [85][350/3239]	Time 0.232 (0.728)	Data Time 0.002 (0.118)	Loss 2.0791 (2.3148)	Entropy 0.88360 (0.88384)	Top-1 acc 75.000 (68.766)	Top-5 acc 92.969 (86.753)	lr 0.00512
Train [85][360/3239]	Time 0.229 (0.721)	Data Time 0.001 (0.115)	Loss 2.2393 (2.3159)	Entropy 0.88355 (0.88383)	Top-1 acc 68.359 (68.702)	Top-5 acc 89.844 (86.743)	lr 0.00512
Train [85][370/3239]	Time 0.340 (0.715)	Data Time 0.001 (0.112)	Loss 2.4052 (2.3163)	Entropy 0.88352 (0.88382)	Top-1 acc 64.453 (68.682)	Top-5 acc 86.328 (86.742)	lr 0.00512
Train [85][380/3239]	Time 0.241 (0.709)	Data Time 0.001 (0.109)	Loss 2.2856 (2.3162)	Entropy 0.88353 (0.88381)	Top-1 acc 69.141 (68.673)	Top-5 acc 88.281 (86.751)	lr 0.00512
Train [85][390/3239]	Time 0.250 (0.703)	Data Time 0.001 (0.106)	Loss 2.3315 (2.3181)	Entropy 0.88356 (0.88381)	Top-1 acc 66.797 (68.626)	Top-5 acc 87.109 (86.708)	lr 0.00512
Train [85][400/3239]	Time 0.224 (0.698)	Data Time 0.001 (0.104)	Loss 2.3745 (2.3174)	Entropy 0.88344 (0.88380)	Top-1 acc 64.844 (68.644)	Top-5 acc 84.766 (86.722)	lr 0.00512
Train [85][410/3239]	Time 0.218 (0.693)	Data Time 0.002 (0.101)	Loss 2.3195 (2.3176)	Entropy 0.88337 (0.88379)	Top-1 acc 69.922 (68.641)	Top-5 acc 85.156 (86.698)	lr 0.00512
Train [85][420/3239]	Time 0.231 (0.688)	Data Time 0.001 (0.099)	Loss 2.2797 (2.3182)	Entropy 0.88328 (0.88378)	Top-1 acc 72.266 (68.620)	Top-5 acc 88.672 (86.689)	lr 0.00512
Train [85][430/3239]	Time 0.233 (0.683)	Data Time 0.001 (0.097)	Loss 2.3247 (2.3167)	Entropy 0.88305 (0.88377)	Top-1 acc 68.359 (68.645)	Top-5 acc 85.547 (86.711)	lr 0.00512
Train [85][440/3239]	Time 0.267 (0.678)	Data Time 0.001 (0.094)	Loss 2.3556 (2.3198)	Entropy 0.88298 (0.88375)	Top-1 acc 68.359 (68.597)	Top-5 acc 83.984 (86.659)	lr 0.00512
Train [85][450/3239]	Time 2.535 (0.674)	Data Time 0.002 (0.092)	Loss 2.1973 (2.3197)	Entropy 0.88298 (0.88373)	Top-1 acc 71.094 (68.574)	Top-5 acc 89.844 (86.649)	lr 0.00512
Train [85][460/3239]	Time 0.263 (0.665)	Data Time 0.001 (0.090)	Loss 2.2914 (2.3192)	Entropy 0.88296 (0.88372)	Top-1 acc 69.531 (68.584)	Top-5 acc 85.156 (86.665)	lr 0.00512
Train [85][470/3239]	Time 0.240 (0.661)	Data Time 0.001 (0.088)	Loss 2.3122 (2.3190)	Entropy 0.88296 (0.88370)	Top-1 acc 67.969 (68.594)	Top-5 acc 87.500 (86.673)	lr 0.00511
Train [85][480/3239]	Time 0.222 (0.657)	Data Time 0.001 (0.087)	Loss 2.1992 (2.3183)	Entropy 0.88285 (0.88368)	Top-1 acc 71.094 (68.610)	Top-5 acc 88.672 (86.688)	lr 0.00511
Train [85][490/3239]	Time 0.259 (0.653)	Data Time 0.003 (0.085)	Loss 2.2739 (2.3175)	Entropy 0.88281 (0.88367)	Top-1 acc 73.438 (68.646)	Top-5 acc 87.109 (86.708)	lr 0.00511
Train [85][500/3239]	Time 0.319 (0.650)	Data Time 0.001 (0.083)	Loss 2.3081 (2.3166)	Entropy 0.88280 (0.88365)	Top-1 acc 67.578 (68.683)	Top-5 acc 85.938 (86.712)	lr 0.00511
Train [85][510/3239]	Time 0.226 (0.647)	Data Time 0.001 (0.082)	Loss 2.3657 (2.3164)	Entropy 0.88276 (0.88363)	Top-1 acc 68.750 (68.683)	Top-5 acc 84.766 (86.719)	lr 0.00511
Train [85][520/3239]	Time 0.230 (0.643)	Data Time 0.002 (0.080)	Loss 2.2172 (2.3164)	Entropy 0.88275 (0.88361)	Top-1 acc 69.141 (68.674)	Top-5 acc 87.891 (86.722)	lr 0.00511
Train [85][530/3239]	Time 0.224 (0.641)	Data Time 0.002 (0.079)	Loss 2.3548 (2.3156)	Entropy 0.88272 (0.88360)	Top-1 acc 66.797 (68.690)	Top-5 acc 84.766 (86.731)	lr 0.00511
Train [85][540/3239]	Time 0.305 (0.741)	Data Time 0.003 (0.077)	Loss 2.2866 (2.3161)	Entropy 0.88265 (0.88358)	Top-1 acc 73.438 (68.684)	Top-5 acc 85.156 (86.727)	lr 0.00511
Train [85][550/3239]	Time 0.248 (0.736)	Data Time 0.002 (0.076)	Loss 2.1622 (2.3165)	Entropy 0.88266 (0.88356)	Top-1 acc 70.703 (68.654)	Top-5 acc 91.406 (86.722)	lr 0.00511
Train [85][560/3239]	Time 2.527 (0.731)	Data Time 0.001 (0.075)	Loss 2.3362 (2.3165)	Entropy 0.88266 (0.88355)	Top-1 acc 66.797 (68.651)	Top-5 acc 86.719 (86.717)	lr 0.00511
Train [85][570/3239]	Time 0.231 (0.723)	Data Time 0.001 (0.073)	Loss 2.3380 (2.3155)	Entropy 0.88268 (0.88353)	Top-1 acc 67.578 (68.675)	Top-5 acc 87.109 (86.733)	lr 0.00511
Train [85][580/3239]	Time 0.223 (0.718)	Data Time 0.001 (0.072)	Loss 2.3214 (2.3149)	Entropy 0.88270 (0.88352)	Top-1 acc 67.969 (68.679)	Top-5 acc 87.109 (86.744)	lr 0.00511
Train [85][590/3239]	Time 0.230 (0.714)	Data Time 0.001 (0.071)	Loss 2.1562 (2.3151)	Entropy 0.88253 (0.88350)	Top-1 acc 73.438 (68.666)	Top-5 acc 88.672 (86.741)	lr 0.00510
Train [85][600/3239]	Time 0.234 (0.711)	Data Time 0.001 (0.070)	Loss 2.2972 (2.3149)	Entropy 0.88248 (0.88349)	Top-1 acc 71.875 (68.676)	Top-5 acc 87.891 (86.751)	lr 0.00510
Train [85][610/3239]	Time 0.227 (0.707)	Data Time 0.001 (0.069)	Loss 2.3829 (2.3143)	Entropy 0.88251 (0.88347)	Top-1 acc 66.016 (68.693)	Top-5 acc 87.109 (86.759)	lr 0.00510
Train [85][620/3239]	Time 0.220 (0.703)	Data Time 0.001 (0.068)	Loss 2.3562 (2.3143)	Entropy 0.88245 (0.88345)	Top-1 acc 69.922 (68.698)	Top-5 acc 84.766 (86.762)	lr 0.00510
Train [85][630/3239]	Time 0.344 (0.700)	Data Time 0.001 (0.066)	Loss 2.2160 (2.3142)	Entropy 0.88247 (0.88344)	Top-1 acc 72.656 (68.704)	Top-5 acc 89.453 (86.770)	lr 0.00510
Train [85][640/3239]	Time 0.232 (0.697)	Data Time 0.001 (0.065)	Loss 2.4116 (2.3141)	Entropy 0.88247 (0.88342)	Top-1 acc 65.625 (68.705)	Top-5 acc 83.984 (86.773)	lr 0.00510
Train [85][650/3239]	Time 0.235 (0.693)	Data Time 0.001 (0.064)	Loss 2.2968 (2.3134)	Entropy 0.88241 (0.88341)	Top-1 acc 72.656 (68.727)	Top-5 acc 88.281 (86.784)	lr 0.00510
Train [85][660/3239]	Time 0.289 (0.690)	Data Time 0.001 (0.064)	Loss 2.3251 (2.3139)	Entropy 0.88238 (0.88339)	Top-1 acc 69.531 (68.725)	Top-5 acc 89.062 (86.776)	lr 0.00510
Train [85][670/3239]	Time 2.652 (0.687)	Data Time 0.001 (0.063)	Loss 2.4055 (2.3150)	Entropy 0.88238 (0.88338)	Top-1 acc 70.703 (68.710)	Top-5 acc 86.719 (86.752)	lr 0.00510
Train [85][680/3239]	Time 0.237 (0.681)	Data Time 0.001 (0.062)	Loss 2.3426 (2.3150)	Entropy 0.88235 (0.88336)	Top-1 acc 69.141 (68.720)	Top-5 acc 85.547 (86.751)	lr 0.00510
Train [85][690/3239]	Time 0.232 (0.678)	Data Time 0.001 (0.061)	Loss 2.1565 (2.3152)	Entropy 0.88235 (0.88335)	Top-1 acc 72.266 (68.698)	Top-5 acc 89.844 (86.743)	lr 0.00510
Train [85][700/3239]	Time 0.240 (0.675)	Data Time 0.001 (0.060)	Loss 2.1256 (2.3153)	Entropy 0.88228 (0.88333)	Top-1 acc 73.047 (68.700)	Top-5 acc 89.062 (86.737)	lr 0.00510
Train [85][710/3239]	Time 0.246 (0.673)	Data Time 0.001 (0.059)	Loss 2.3638 (2.3153)	Entropy 0.88238 (0.88332)	Top-1 acc 67.578 (68.703)	Top-5 acc 85.156 (86.734)	lr 0.00509
Train [85][720/3239]	Time 0.234 (0.670)	Data Time 0.001 (0.058)	Loss 2.4623 (2.3165)	Entropy 0.88235 (0.88331)	Top-1 acc 63.281 (68.677)	Top-5 acc 83.984 (86.706)	lr 0.00509
Train [85][730/3239]	Time 0.244 (0.667)	Data Time 0.001 (0.058)	Loss 2.5992 (2.3172)	Entropy 0.88234 (0.88329)	Top-1 acc 62.109 (68.665)	Top-5 acc 81.250 (86.691)	lr 0.00509
Train [85][740/3239]	Time 0.234 (0.665)	Data Time 0.001 (0.057)	Loss 2.3636 (2.3174)	Entropy 0.88234 (0.88328)	Top-1 acc 64.844 (68.656)	Top-5 acc 85.547 (86.681)	lr 0.00509
Train [85][750/3239]	Time 0.230 (0.662)	Data Time 0.001 (0.056)	Loss 2.2272 (2.3171)	Entropy 0.88229 (0.88327)	Top-1 acc 72.656 (68.667)	Top-5 acc 87.500 (86.697)	lr 0.00509
Train [85][760/3239]	Time 0.219 (0.660)	Data Time 0.001 (0.055)	Loss 2.2531 (2.3166)	Entropy 0.88222 (0.88325)	Top-1 acc 71.484 (68.686)	Top-5 acc 87.109 (86.700)	lr 0.00509
Train [85][770/3239]	Time 0.278 (0.658)	Data Time 0.001 (0.055)	Loss 2.4509 (2.3166)	Entropy 0.88220 (0.88324)	Top-1 acc 66.016 (68.682)	Top-5 acc 85.938 (86.707)	lr 0.00509
Train [85][780/3239]	Time 2.544 (0.655)	Data Time 0.001 (0.054)	Loss 2.1873 (2.3165)	Entropy 0.88220 (0.88323)	Top-1 acc 77.344 (68.684)	Top-5 acc 85.938 (86.708)	lr 0.00509
Train [85][790/3239]	Time 0.261 (0.650)	Data Time 0.001 (0.053)	Loss 2.4530 (2.3172)	Entropy 0.88217 (0.88321)	Top-1 acc 67.578 (68.666)	Top-5 acc 80.859 (86.688)	lr 0.00509
Train [85][800/3239]	Time 0.222 (0.648)	Data Time 0.001 (0.053)	Loss 2.3288 (2.3176)	Entropy 0.88206 (0.88320)	Top-1 acc 68.750 (68.647)	Top-5 acc 87.500 (86.677)	lr 0.00509
Train [85][810/3239]	Time 0.235 (0.646)	Data Time 0.001 (0.052)	Loss 2.3528 (2.3176)	Entropy 0.88197 (0.88319)	Top-1 acc 68.750 (68.654)	Top-5 acc 83.594 (86.675)	lr 0.00509
Train [85][820/3239]	Time 0.239 (0.644)	Data Time 0.001 (0.051)	Loss 2.3061 (2.3176)	Entropy 0.88195 (0.88317)	Top-1 acc 70.703 (68.660)	Top-5 acc 87.109 (86.676)	lr 0.00509
Train [85][830/3239]	Time 0.241 (0.642)	Data Time 0.002 (0.051)	Loss 2.1322 (2.3170)	Entropy 0.88189 (0.88316)	Top-1 acc 75.391 (68.680)	Top-5 acc 89.844 (86.686)	lr 0.00509
Train [85][840/3239]	Time 0.336 (0.640)	Data Time 0.002 (0.050)	Loss 2.3673 (2.3169)	Entropy 0.88187 (0.88314)	Top-1 acc 67.188 (68.675)	Top-5 acc 85.156 (86.690)	lr 0.00508
Train [85][850/3239]	Time 0.234 (0.639)	Data Time 0.001 (0.050)	Loss 2.2921 (2.3169)	Entropy 0.88178 (0.88312)	Top-1 acc 69.531 (68.671)	Top-5 acc 84.375 (86.689)	lr 0.00508
Train [85][860/3239]	Time 0.244 (0.637)	Data Time 0.001 (0.049)	Loss 2.2599 (2.3173)	Entropy 0.88181 (0.88311)	Top-1 acc 67.188 (68.652)	Top-5 acc 88.672 (86.679)	lr 0.00508
Train [85][870/3239]	Time 0.269 (0.635)	Data Time 0.001 (0.049)	Loss 2.4058 (2.3178)	Entropy 0.88180 (0.88309)	Top-1 acc 67.188 (68.639)	Top-5 acc 83.984 (86.673)	lr 0.00508
Train [85][880/3239]	Time 0.249 (0.633)	Data Time 0.001 (0.048)	Loss 2.3383 (2.3184)	Entropy 0.88171 (0.88308)	Top-1 acc 66.797 (68.626)	Top-5 acc 84.766 (86.659)	lr 0.00508
Train [85][890/3239]	Time 2.555 (0.632)	Data Time 0.001 (0.048)	Loss 2.2141 (2.3183)	Entropy 0.88171 (0.88306)	Top-1 acc 71.875 (68.623)	Top-5 acc 87.500 (86.657)	lr 0.00508
Train [85][900/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.047)	Loss 2.4398 (2.3187)	Entropy 0.88174 (0.88305)	Top-1 acc 67.578 (68.618)	Top-5 acc 85.547 (86.651)	lr 0.00508
Train [85][910/3239]	Time 0.241 (0.687)	Data Time 0.002 (0.047)	Loss 2.3591 (2.3185)	Entropy 0.88172 (0.88303)	Top-1 acc 63.672 (68.611)	Top-5 acc 85.156 (86.654)	lr 0.00508
Train [85][920/3239]	Time 0.225 (0.685)	Data Time 0.001 (0.046)	Loss 2.2456 (2.3189)	Entropy 0.88177 (0.88302)	Top-1 acc 73.047 (68.601)	Top-5 acc 87.109 (86.644)	lr 0.00508
Train [85][930/3239]	Time 0.223 (0.683)	Data Time 0.001 (0.046)	Loss 2.3267 (2.3191)	Entropy 0.88178 (0.88301)	Top-1 acc 69.922 (68.594)	Top-5 acc 84.375 (86.636)	lr 0.00508
Train [85][940/3239]	Time 0.226 (0.681)	Data Time 0.001 (0.045)	Loss 2.5343 (2.3192)	Entropy 0.88175 (0.88299)	Top-1 acc 60.547 (68.597)	Top-5 acc 83.594 (86.632)	lr 0.00508
Train [85][950/3239]	Time 0.244 (0.679)	Data Time 0.001 (0.045)	Loss 2.2777 (2.3197)	Entropy 0.88168 (0.88298)	Top-1 acc 69.531 (68.593)	Top-5 acc 86.719 (86.626)	lr 0.00508
Train [85][960/3239]	Time 0.213 (0.676)	Data Time 0.001 (0.044)	Loss 2.1444 (2.3192)	Entropy 0.88173 (0.88297)	Top-1 acc 71.094 (68.607)	Top-5 acc 92.188 (86.637)	lr 0.00507
Train [85][970/3239]	Time 0.325 (0.674)	Data Time 0.001 (0.044)	Loss 2.3899 (2.3197)	Entropy 0.88174 (0.88295)	Top-1 acc 64.062 (68.581)	Top-5 acc 86.719 (86.631)	lr 0.00507
Train [85][980/3239]	Time 0.222 (0.672)	Data Time 0.001 (0.043)	Loss 2.3139 (2.3195)	Entropy 0.88173 (0.88294)	Top-1 acc 66.406 (68.578)	Top-5 acc 89.453 (86.640)	lr 0.00507
Train [85][990/3239]	Time 0.259 (0.670)	Data Time 0.001 (0.043)	Loss 2.3428 (2.3194)	Entropy 0.88172 (0.88293)	Top-1 acc 70.703 (68.590)	Top-5 acc 85.938 (86.646)	lr 0.00507
Train [85][1000/3239]	Time 2.552 (0.668)	Data Time 0.001 (0.042)	Loss 2.3474 (2.3195)	Entropy 0.88172 (0.88292)	Top-1 acc 67.188 (68.580)	Top-5 acc 87.500 (86.650)	lr 0.00507
Train [85][1010/3239]	Time 0.285 (0.664)	Data Time 0.001 (0.042)	Loss 2.2704 (2.3189)	Entropy 0.88172 (0.88291)	Top-1 acc 68.750 (68.585)	Top-5 acc 87.891 (86.663)	lr 0.00507
Train [85][1020/3239]	Time 0.270 (0.663)	Data Time 0.001 (0.042)	Loss 2.3188 (2.3194)	Entropy 0.88159 (0.88289)	Top-1 acc 70.703 (68.575)	Top-5 acc 84.766 (86.650)	lr 0.00507
Train [85][1030/3239]	Time 0.232 (0.661)	Data Time 0.002 (0.041)	Loss 2.2604 (2.3196)	Entropy 0.88158 (0.88288)	Top-1 acc 71.484 (68.566)	Top-5 acc 86.719 (86.646)	lr 0.00507
Train [85][1040/3239]	Time 0.235 (0.659)	Data Time 0.001 (0.041)	Loss 2.4109 (2.3200)	Entropy 0.88158 (0.88287)	Top-1 acc 62.891 (68.556)	Top-5 acc 85.156 (86.640)	lr 0.00507
Train [85][1050/3239]	Time 0.227 (0.657)	Data Time 0.001 (0.041)	Loss 2.1994 (2.3197)	Entropy 0.88158 (0.88286)	Top-1 acc 71.094 (68.554)	Top-5 acc 89.062 (86.652)	lr 0.00507
Train [85][1060/3239]	Time 0.227 (0.656)	Data Time 0.001 (0.040)	Loss 2.2687 (2.3200)	Entropy 0.88154 (0.88284)	Top-1 acc 71.484 (68.544)	Top-5 acc 87.500 (86.645)	lr 0.00507
Train [85][1070/3239]	Time 0.239 (0.654)	Data Time 0.001 (0.040)	Loss 2.4723 (2.3200)	Entropy 0.88150 (0.88283)	Top-1 acc 65.234 (68.541)	Top-5 acc 83.984 (86.650)	lr 0.00507
Train [85][1080/3239]	Time 0.228 (0.652)	Data Time 0.001 (0.039)	Loss 2.2477 (2.3195)	Entropy 0.88147 (0.88282)	Top-1 acc 69.922 (68.551)	Top-5 acc 87.109 (86.662)	lr 0.00506
Train [85][1090/3239]	Time 0.231 (0.651)	Data Time 0.002 (0.039)	Loss 2.2854 (2.3199)	Entropy 0.88141 (0.88281)	Top-1 acc 70.703 (68.538)	Top-5 acc 87.891 (86.660)	lr 0.00506
Train [85][1100/3239]	Time 0.219 (0.649)	Data Time 0.001 (0.039)	Loss 2.2526 (2.3200)	Entropy 0.88137 (0.88279)	Top-1 acc 67.969 (68.528)	Top-5 acc 87.109 (86.659)	lr 0.00506
Train [85][1110/3239]	Time 2.447 (0.647)	Data Time 0.001 (0.038)	Loss 2.4637 (2.3204)	Entropy 0.88137 (0.88278)	Top-1 acc 65.625 (68.513)	Top-5 acc 83.984 (86.650)	lr 0.00506
Train [85][1120/3239]	Time 0.237 (0.644)	Data Time 0.001 (0.038)	Loss 2.3535 (2.3202)	Entropy 0.88138 (0.88277)	Top-1 acc 66.797 (68.521)	Top-5 acc 88.281 (86.653)	lr 0.00506
Train [85][1130/3239]	Time 0.250 (0.642)	Data Time 0.001 (0.038)	Loss 2.1811 (2.3197)	Entropy 0.88137 (0.88276)	Top-1 acc 72.656 (68.535)	Top-5 acc 89.062 (86.661)	lr 0.00506
Train [85][1140/3239]	Time 0.365 (0.641)	Data Time 0.001 (0.037)	Loss 2.3246 (2.3195)	Entropy 0.88134 (0.88274)	Top-1 acc 70.703 (68.535)	Top-5 acc 85.938 (86.672)	lr 0.00506
Train [85][1150/3239]	Time 0.261 (0.640)	Data Time 0.002 (0.037)	Loss 2.3677 (2.3198)	Entropy 0.88133 (0.88273)	Top-1 acc 67.188 (68.519)	Top-5 acc 82.812 (86.669)	lr 0.00506
Train [85][1160/3239]	Time 0.239 (0.638)	Data Time 0.001 (0.037)	Loss 2.2790 (2.3198)	Entropy 0.88132 (0.88272)	Top-1 acc 71.094 (68.510)	Top-5 acc 87.109 (86.666)	lr 0.00506
Train [85][1170/3239]	Time 0.251 (0.637)	Data Time 0.001 (0.037)	Loss 2.2336 (2.3200)	Entropy 0.88134 (0.88271)	Top-1 acc 67.969 (68.503)	Top-5 acc 89.453 (86.667)	lr 0.00506
Train [85][1180/3239]	Time 0.234 (0.635)	Data Time 0.001 (0.036)	Loss 2.3467 (2.3202)	Entropy 0.88135 (0.88270)	Top-1 acc 67.578 (68.495)	Top-5 acc 86.719 (86.659)	lr 0.00506
Train [85][1190/3239]	Time 0.221 (0.634)	Data Time 0.001 (0.036)	Loss 2.3397 (2.3200)	Entropy 0.88134 (0.88268)	Top-1 acc 67.188 (68.506)	Top-5 acc 85.938 (86.661)	lr 0.00506
Train [85][1200/3239]	Time 0.227 (0.633)	Data Time 0.001 (0.036)	Loss 2.3167 (2.3199)	Entropy 0.88132 (0.88267)	Top-1 acc 68.750 (68.515)	Top-5 acc 84.375 (86.661)	lr 0.00505
Train [85][1210/3239]	Time 0.274 (0.631)	Data Time 0.001 (0.035)	Loss 2.2696 (2.3200)	Entropy 0.88129 (0.88266)	Top-1 acc 67.578 (68.513)	Top-5 acc 87.891 (86.665)	lr 0.00505
Train [85][1220/3239]	Time 2.495 (0.630)	Data Time 0.001 (0.035)	Loss 2.2818 (2.3194)	Entropy 0.88129 (0.88265)	Top-1 acc 66.406 (68.525)	Top-5 acc 89.844 (86.678)	lr 0.00505
Train [85][1230/3239]	Time 0.360 (0.627)	Data Time 0.001 (0.035)	Loss 2.4671 (2.3193)	Entropy 0.88129 (0.88264)	Top-1 acc 62.500 (68.527)	Top-5 acc 85.547 (86.686)	lr 0.00505
Train [85][1240/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.035)	Loss 2.1957 (2.3196)	Entropy 0.88128 (0.88263)	Top-1 acc 75.391 (68.516)	Top-5 acc 88.672 (86.681)	lr 0.00505
Train [85][1250/3239]	Time 0.222 (0.625)	Data Time 0.001 (0.034)	Loss 2.3077 (2.3197)	Entropy 0.88125 (0.88262)	Top-1 acc 69.531 (68.506)	Top-5 acc 88.281 (86.688)	lr 0.00505
Train [85][1260/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.034)	Loss 2.3798 (2.3193)	Entropy 0.88130 (0.88261)	Top-1 acc 66.797 (68.520)	Top-5 acc 83.594 (86.696)	lr 0.00505
Train [85][1270/3239]	Time 0.317 (0.665)	Data Time 0.002 (0.034)	Loss 2.2829 (2.3189)	Entropy 0.88129 (0.88260)	Top-1 acc 71.875 (68.524)	Top-5 acc 87.891 (86.705)	lr 0.00505
Train [85][1280/3239]	Time 0.236 (0.664)	Data Time 0.002 (0.034)	Loss 2.1416 (2.3185)	Entropy 0.88119 (0.88259)	Top-1 acc 71.875 (68.533)	Top-5 acc 91.016 (86.713)	lr 0.00505
Train [85][1290/3239]	Time 0.270 (0.662)	Data Time 0.002 (0.033)	Loss 2.2753 (2.3184)	Entropy 0.88116 (0.88258)	Top-1 acc 70.703 (68.532)	Top-5 acc 88.281 (86.723)	lr 0.00505
Train [85][1300/3239]	Time 0.242 (0.661)	Data Time 0.001 (0.033)	Loss 2.3924 (2.3185)	Entropy 0.88115 (0.88257)	Top-1 acc 63.281 (68.526)	Top-5 acc 87.500 (86.725)	lr 0.00505
Train [85][1310/3239]	Time 0.224 (0.660)	Data Time 0.001 (0.033)	Loss 2.3324 (2.3186)	Entropy 0.88105 (0.88255)	Top-1 acc 67.578 (68.522)	Top-5 acc 87.891 (86.728)	lr 0.00505
Train [85][1320/3239]	Time 0.236 (0.658)	Data Time 0.001 (0.033)	Loss 2.2218 (2.3186)	Entropy 0.88094 (0.88254)	Top-1 acc 67.578 (68.517)	Top-5 acc 89.062 (86.731)	lr 0.00505
Train [85][1330/3239]	Time 2.539 (0.657)	Data Time 0.001 (0.032)	Loss 2.3574 (2.3186)	Entropy 0.88094 (0.88253)	Top-1 acc 66.406 (68.511)	Top-5 acc 85.547 (86.726)	lr 0.00504
Train [85][1340/3239]	Time 0.230 (0.654)	Data Time 0.001 (0.032)	Loss 2.3086 (2.3182)	Entropy 0.88092 (0.88252)	Top-1 acc 69.922 (68.520)	Top-5 acc 86.328 (86.731)	lr 0.00504
Train [85][1350/3239]	Time 0.259 (0.652)	Data Time 0.001 (0.032)	Loss 2.2362 (2.3181)	Entropy 0.88084 (0.88251)	Top-1 acc 70.312 (68.528)	Top-5 acc 85.156 (86.733)	lr 0.00504
Train [85][1360/3239]	Time 0.245 (0.651)	Data Time 0.001 (0.032)	Loss 2.3852 (2.3183)	Entropy 0.88075 (0.88249)	Top-1 acc 67.578 (68.527)	Top-5 acc 85.547 (86.731)	lr 0.00504
Train [85][1370/3239]	Time 0.229 (0.650)	Data Time 0.001 (0.031)	Loss 2.3470 (2.3186)	Entropy 0.88068 (0.88248)	Top-1 acc 69.141 (68.523)	Top-5 acc 85.156 (86.724)	lr 0.00504
Train [85][1380/3239]	Time 0.253 (0.649)	Data Time 0.001 (0.031)	Loss 2.4853 (2.3186)	Entropy 0.88072 (0.88247)	Top-1 acc 63.281 (68.526)	Top-5 acc 84.766 (86.724)	lr 0.00504
Train [85][1390/3239]	Time 0.259 (0.648)	Data Time 0.001 (0.031)	Loss 2.4010 (2.3187)	Entropy 0.88059 (0.88245)	Top-1 acc 63.281 (68.522)	Top-5 acc 86.719 (86.720)	lr 0.00504
Train [85][1400/3239]	Time 0.329 (0.647)	Data Time 0.001 (0.031)	Loss 2.2440 (2.3187)	Entropy 0.88057 (0.88244)	Top-1 acc 70.312 (68.520)	Top-5 acc 87.891 (86.725)	lr 0.00504
Train [85][1410/3239]	Time 0.223 (0.645)	Data Time 0.001 (0.031)	Loss 2.2812 (2.3188)	Entropy 0.88054 (0.88243)	Top-1 acc 69.922 (68.519)	Top-5 acc 87.109 (86.725)	lr 0.00504
Train [85][1420/3239]	Time 0.237 (0.644)	Data Time 0.001 (0.030)	Loss 2.2351 (2.3188)	Entropy 0.88058 (0.88241)	Top-1 acc 72.656 (68.521)	Top-5 acc 87.500 (86.721)	lr 0.00504
Train [85][1430/3239]	Time 0.285 (0.643)	Data Time 0.001 (0.030)	Loss 2.2076 (2.3189)	Entropy 0.88053 (0.88240)	Top-1 acc 70.312 (68.517)	Top-5 acc 87.109 (86.719)	lr 0.00504
Train [85][1440/3239]	Time 2.521 (0.642)	Data Time 0.002 (0.030)	Loss 2.4370 (2.3191)	Entropy 0.88053 (0.88239)	Top-1 acc 64.844 (68.508)	Top-5 acc 82.812 (86.711)	lr 0.00504
Train [85][1450/3239]	Time 0.266 (0.639)	Data Time 0.002 (0.030)	Loss 2.4204 (2.3191)	Entropy 0.88052 (0.88238)	Top-1 acc 66.797 (68.508)	Top-5 acc 84.766 (86.708)	lr 0.00503
Train [85][1460/3239]	Time 0.261 (0.638)	Data Time 0.001 (0.030)	Loss 2.3117 (2.3192)	Entropy 0.88053 (0.88236)	Top-1 acc 68.359 (68.504)	Top-5 acc 85.547 (86.707)	lr 0.00503
Train [85][1470/3239]	Time 0.240 (0.637)	Data Time 0.001 (0.029)	Loss 2.2343 (2.3189)	Entropy 0.88047 (0.88235)	Top-1 acc 69.531 (68.496)	Top-5 acc 89.844 (86.715)	lr 0.00503
Train [85][1480/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.029)	Loss 2.3316 (2.3186)	Entropy 0.88044 (0.88234)	Top-1 acc 66.406 (68.506)	Top-5 acc 87.109 (86.720)	lr 0.00503
Train [85][1490/3239]	Time 0.321 (0.635)	Data Time 0.001 (0.029)	Loss 2.2875 (2.3183)	Entropy 0.88041 (0.88232)	Top-1 acc 69.141 (68.516)	Top-5 acc 89.062 (86.726)	lr 0.00503
Train [85][1500/3239]	Time 0.215 (0.634)	Data Time 0.001 (0.029)	Loss 2.1783 (2.3187)	Entropy 0.88038 (0.88231)	Top-1 acc 70.703 (68.511)	Top-5 acc 88.672 (86.715)	lr 0.00503
Train [85][1510/3239]	Time 0.215 (0.633)	Data Time 0.001 (0.029)	Loss 2.3270 (2.3191)	Entropy 0.88034 (0.88230)	Top-1 acc 65.234 (68.496)	Top-5 acc 88.281 (86.706)	lr 0.00503
Train [85][1520/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.028)	Loss 2.5047 (2.3194)	Entropy 0.88035 (0.88229)	Top-1 acc 63.281 (68.485)	Top-5 acc 84.375 (86.701)	lr 0.00503
Train [85][1530/3239]	Time 0.327 (0.631)	Data Time 0.001 (0.028)	Loss 2.2675 (2.3195)	Entropy 0.88026 (0.88227)	Top-1 acc 72.656 (68.489)	Top-5 acc 87.109 (86.697)	lr 0.00503
Train [85][1540/3239]	Time 0.225 (0.630)	Data Time 0.001 (0.028)	Loss 2.3174 (2.3196)	Entropy 0.88025 (0.88226)	Top-1 acc 67.578 (68.486)	Top-5 acc 88.672 (86.698)	lr 0.00503
Train [85][1550/3239]	Time 2.520 (0.629)	Data Time 0.001 (0.028)	Loss 2.2534 (2.3195)	Entropy 0.88025 (0.88225)	Top-1 acc 69.141 (68.488)	Top-5 acc 87.500 (86.700)	lr 0.00503
Train [85][1560/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.028)	Loss 2.3643 (2.3192)	Entropy 0.88019 (0.88223)	Top-1 acc 68.359 (68.495)	Top-5 acc 86.719 (86.705)	lr 0.00503
Train [85][1570/3239]	Time 0.228 (0.625)	Data Time 0.001 (0.028)	Loss 2.3233 (2.3193)	Entropy 0.88017 (0.88222)	Top-1 acc 69.141 (68.495)	Top-5 acc 89.844 (86.707)	lr 0.00502
Train [85][1580/3239]	Time 0.240 (0.625)	Data Time 0.001 (0.027)	Loss 2.3264 (2.3194)	Entropy 0.88017 (0.88221)	Top-1 acc 67.578 (68.490)	Top-5 acc 85.156 (86.702)	lr 0.00502
Train [85][1590/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.027)	Loss 2.2091 (2.3193)	Entropy 0.88013 (0.88219)	Top-1 acc 71.094 (68.494)	Top-5 acc 88.281 (86.705)	lr 0.00502
Train [85][1600/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.027)	Loss 2.3259 (2.3191)	Entropy 0.88012 (0.88218)	Top-1 acc 69.141 (68.497)	Top-5 acc 87.109 (86.713)	lr 0.00502
Train [85][1610/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.027)	Loss 2.3048 (2.3190)	Entropy 0.88009 (0.88217)	Top-1 acc 64.453 (68.493)	Top-5 acc 89.453 (86.716)	lr 0.00502
Train [85][1620/3239]	Time 0.321 (0.621)	Data Time 0.001 (0.027)	Loss 2.3943 (2.3190)	Entropy 0.88009 (0.88216)	Top-1 acc 65.625 (68.490)	Top-5 acc 86.328 (86.719)	lr 0.00502
Train [85][1630/3239]	Time 0.426 (0.651)	Data Time 0.002 (0.027)	Loss 2.1749 (2.3190)	Entropy 0.88007 (0.88214)	Top-1 acc 74.219 (68.494)	Top-5 acc 88.672 (86.720)	lr 0.00502
Train [85][1640/3239]	Time 0.239 (0.651)	Data Time 0.002 (0.027)	Loss 2.3163 (2.3190)	Entropy 0.88005 (0.88213)	Top-1 acc 66.797 (68.486)	Top-5 acc 85.938 (86.719)	lr 0.00502
Train [85][1650/3239]	Time 0.261 (0.650)	Data Time 0.002 (0.026)	Loss 2.4277 (2.3193)	Entropy 0.88002 (0.88212)	Top-1 acc 62.891 (68.476)	Top-5 acc 86.328 (86.717)	lr 0.00502
Train [85][1660/3239]	Time 2.698 (0.649)	Data Time 0.001 (0.026)	Loss 2.2904 (2.3193)	Entropy 0.88002 (0.88211)	Top-1 acc 71.094 (68.475)	Top-5 acc 88.672 (86.722)	lr 0.00502
Train [85][1670/3239]	Time 0.235 (0.646)	Data Time 0.001 (0.026)	Loss 2.1558 (2.3193)	Entropy 0.87999 (0.88209)	Top-1 acc 68.750 (68.470)	Top-5 acc 89.844 (86.724)	lr 0.00502
Train [85][1680/3239]	Time 0.234 (0.645)	Data Time 0.001 (0.026)	Loss 2.2779 (2.3191)	Entropy 0.88003 (0.88208)	Top-1 acc 69.141 (68.478)	Top-5 acc 88.672 (86.727)	lr 0.00502
Train [85][1690/3239]	Time 0.249 (0.644)	Data Time 0.001 (0.026)	Loss 2.1196 (2.3191)	Entropy 0.88003 (0.88207)	Top-1 acc 72.266 (68.480)	Top-5 acc 91.406 (86.726)	lr 0.00502
Train [85][1700/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.026)	Loss 2.3543 (2.3189)	Entropy 0.88001 (0.88206)	Top-1 acc 66.797 (68.484)	Top-5 acc 86.719 (86.728)	lr 0.00501
Train [85][1710/3239]	Time 0.232 (0.643)	Data Time 0.001 (0.026)	Loss 2.1873 (2.3187)	Entropy 0.87998 (0.88204)	Top-1 acc 73.828 (68.492)	Top-5 acc 87.891 (86.729)	lr 0.00501
Train [85][1720/3239]	Time 0.222 (0.642)	Data Time 0.001 (0.025)	Loss 2.1919 (2.3189)	Entropy 0.87992 (0.88203)	Top-1 acc 71.875 (68.489)	Top-5 acc 90.234 (86.729)	lr 0.00501
Train [85][1730/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.025)	Loss 2.2326 (2.3191)	Entropy 0.87992 (0.88202)	Top-1 acc 72.266 (68.491)	Top-5 acc 87.891 (86.721)	lr 0.00501
Train [85][1740/3239]	Time 0.234 (0.640)	Data Time 0.001 (0.025)	Loss 2.2615 (2.3191)	Entropy 0.87980 (0.88201)	Top-1 acc 66.406 (68.489)	Top-5 acc 89.062 (86.720)	lr 0.00501
Train [85][1750/3239]	Time 0.236 (0.639)	Data Time 0.001 (0.025)	Loss 2.5203 (2.3193)	Entropy 0.87974 (0.88200)	Top-1 acc 62.891 (68.483)	Top-5 acc 83.594 (86.717)	lr 0.00501
Train [85][1760/3239]	Time 0.258 (0.638)	Data Time 0.001 (0.025)	Loss 2.2238 (2.3192)	Entropy 0.87980 (0.88198)	Top-1 acc 70.703 (68.492)	Top-5 acc 88.281 (86.717)	lr 0.00501
Train [85][1770/3239]	Time 2.689 (0.637)	Data Time 0.001 (0.025)	Loss 2.2879 (2.3191)	Entropy 0.87980 (0.88197)	Top-1 acc 67.188 (68.494)	Top-5 acc 87.500 (86.716)	lr 0.00501
Train [85][1780/3239]	Time 0.222 (0.635)	Data Time 0.001 (0.025)	Loss 2.3617 (2.3188)	Entropy 0.87972 (0.88196)	Top-1 acc 68.359 (68.504)	Top-5 acc 86.328 (86.721)	lr 0.00501
Train [85][1790/3239]	Time 0.317 (0.634)	Data Time 0.001 (0.024)	Loss 2.2834 (2.3188)	Entropy 0.87957 (0.88194)	Top-1 acc 66.406 (68.505)	Top-5 acc 88.281 (86.717)	lr 0.00501
Train [85][1800/3239]	Time 0.226 (0.633)	Data Time 0.001 (0.024)	Loss 2.2822 (2.3189)	Entropy 0.87955 (0.88193)	Top-1 acc 72.656 (68.503)	Top-5 acc 84.766 (86.715)	lr 0.00501
Train [85][1810/3239]	Time 0.206 (0.632)	Data Time 0.001 (0.024)	Loss 2.3723 (2.3188)	Entropy 0.87955 (0.88192)	Top-1 acc 66.797 (68.503)	Top-5 acc 86.328 (86.717)	lr 0.00501
Train [85][1820/3239]	Time 0.242 (0.631)	Data Time 0.001 (0.024)	Loss 2.3696 (2.3190)	Entropy 0.87952 (0.88190)	Top-1 acc 67.578 (68.503)	Top-5 acc 86.328 (86.714)	lr 0.00500
Train [85][1830/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.024)	Loss 2.4141 (2.3191)	Entropy 0.87946 (0.88189)	Top-1 acc 67.969 (68.502)	Top-5 acc 86.719 (86.713)	lr 0.00500
Train [85][1840/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.024)	Loss 2.2506 (2.3192)	Entropy 0.87943 (0.88188)	Top-1 acc 67.969 (68.502)	Top-5 acc 88.672 (86.712)	lr 0.00500
Train [85][1850/3239]	Time 0.239 (0.629)	Data Time 0.002 (0.024)	Loss 2.4379 (2.3193)	Entropy 0.87946 (0.88186)	Top-1 acc 67.578 (68.499)	Top-5 acc 83.594 (86.710)	lr 0.00500
Train [85][1860/3239]	Time 0.226 (0.628)	Data Time 0.001 (0.024)	Loss 2.2502 (2.3192)	Entropy 0.87950 (0.88185)	Top-1 acc 69.531 (68.500)	Top-5 acc 87.109 (86.711)	lr 0.00500
Train [85][1870/3239]	Time 0.256 (0.627)	Data Time 0.001 (0.023)	Loss 2.2618 (2.3194)	Entropy 0.87938 (0.88184)	Top-1 acc 67.969 (68.496)	Top-5 acc 87.500 (86.706)	lr 0.00500
Train [85][1880/3239]	Time 2.637 (0.626)	Data Time 0.001 (0.023)	Loss 2.2486 (2.3194)	Entropy 0.87938 (0.88183)	Top-1 acc 67.969 (68.494)	Top-5 acc 87.109 (86.705)	lr 0.00500
Train [85][1890/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.023)	Loss 2.2796 (2.3193)	Entropy 0.87941 (0.88181)	Top-1 acc 67.969 (68.494)	Top-5 acc 87.891 (86.704)	lr 0.00500
Train [85][1900/3239]	Time 0.240 (0.623)	Data Time 0.001 (0.023)	Loss 2.3068 (2.3195)	Entropy 0.87922 (0.88180)	Top-1 acc 69.531 (68.493)	Top-5 acc 87.891 (86.703)	lr 0.00500
Train [85][1910/3239]	Time 0.250 (0.623)	Data Time 0.001 (0.023)	Loss 2.2012 (2.3196)	Entropy 0.87915 (0.88179)	Top-1 acc 70.703 (68.489)	Top-5 acc 89.062 (86.701)	lr 0.00500
Train [85][1920/3239]	Time 0.337 (0.622)	Data Time 0.001 (0.023)	Loss 2.3002 (2.3197)	Entropy 0.87915 (0.88177)	Top-1 acc 71.094 (68.489)	Top-5 acc 87.109 (86.696)	lr 0.00500
Train [85][1930/3239]	Time 0.238 (0.621)	Data Time 0.001 (0.023)	Loss 2.4298 (2.3200)	Entropy 0.87905 (0.88176)	Top-1 acc 63.281 (68.482)	Top-5 acc 84.766 (86.689)	lr 0.00500
Train [85][1940/3239]	Time 0.214 (0.621)	Data Time 0.001 (0.023)	Loss 2.2670 (2.3202)	Entropy 0.87908 (0.88174)	Top-1 acc 69.922 (68.479)	Top-5 acc 88.281 (86.685)	lr 0.00500
Train [85][1950/3239]	Time 0.255 (0.620)	Data Time 0.002 (0.023)	Loss 2.2534 (2.3203)	Entropy 0.87904 (0.88173)	Top-1 acc 71.094 (68.473)	Top-5 acc 87.109 (86.682)	lr 0.00499
Train [85][1960/3239]	Time 0.265 (0.619)	Data Time 0.002 (0.022)	Loss 2.1705 (2.3205)	Entropy 0.87899 (0.88172)	Top-1 acc 70.703 (68.465)	Top-5 acc 88.672 (86.676)	lr 0.00499
Train [85][1970/3239]	Time 0.224 (0.618)	Data Time 0.001 (0.022)	Loss 2.3301 (2.3203)	Entropy 0.87896 (0.88170)	Top-1 acc 66.406 (68.465)	Top-5 acc 88.672 (86.680)	lr 0.00499
Train [85][1980/3239]	Time 0.259 (0.618)	Data Time 0.002 (0.022)	Loss 2.3734 (2.3205)	Entropy 0.87894 (0.88169)	Top-1 acc 64.844 (68.457)	Top-5 acc 87.109 (86.681)	lr 0.00499
Train [85][1990/3239]	Time 56.663 (0.644)	Data Time 0.001 (0.022)	Loss 2.3154 (2.3206)	Entropy 0.87894 (0.88168)	Top-1 acc 66.016 (68.457)	Top-5 acc 87.109 (86.680)	lr 0.00499
Train [85][2000/3239]	Time 0.242 (0.642)	Data Time 0.002 (0.022)	Loss 2.3728 (2.3205)	Entropy 0.87896 (0.88166)	Top-1 acc 66.406 (68.457)	Top-5 acc 86.328 (86.684)	lr 0.00499
Train [85][2010/3239]	Time 0.231 (0.641)	Data Time 0.002 (0.022)	Loss 2.1538 (2.3204)	Entropy 0.87887 (0.88165)	Top-1 acc 73.047 (68.458)	Top-5 acc 89.453 (86.687)	lr 0.00499
Train [85][2020/3239]	Time 0.260 (0.641)	Data Time 0.002 (0.022)	Loss 2.2686 (2.3207)	Entropy 0.87877 (0.88163)	Top-1 acc 69.922 (68.449)	Top-5 acc 87.891 (86.685)	lr 0.00499
Train [85][2030/3239]	Time 0.236 (0.640)	Data Time 0.002 (0.022)	Loss 2.2981 (2.3208)	Entropy 0.87877 (0.88162)	Top-1 acc 69.531 (68.445)	Top-5 acc 85.938 (86.682)	lr 0.00499
Train [85][2040/3239]	Time 0.236 (0.639)	Data Time 0.002 (0.022)	Loss 2.2945 (2.3208)	Entropy 0.87877 (0.88161)	Top-1 acc 71.484 (68.443)	Top-5 acc 86.719 (86.686)	lr 0.00499
Train [85][2050/3239]	Time 0.324 (0.638)	Data Time 0.002 (0.022)	Loss 2.3239 (2.3206)	Entropy 0.87877 (0.88159)	Top-1 acc 71.875 (68.450)	Top-5 acc 85.547 (86.687)	lr 0.00499
Train [85][2060/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.021)	Loss 2.2731 (2.3206)	Entropy 0.87870 (0.88158)	Top-1 acc 69.141 (68.450)	Top-5 acc 88.281 (86.683)	lr 0.00499
Train [85][2070/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.021)	Loss 2.2196 (2.3205)	Entropy 0.87860 (0.88156)	Top-1 acc 67.969 (68.447)	Top-5 acc 89.062 (86.685)	lr 0.00498
Train [85][2080/3239]	Time 0.235 (0.636)	Data Time 0.002 (0.021)	Loss 2.3758 (2.3206)	Entropy 0.87859 (0.88155)	Top-1 acc 65.234 (68.445)	Top-5 acc 86.719 (86.685)	lr 0.00498
Train [85][2090/3239]	Time 0.348 (0.635)	Data Time 0.002 (0.021)	Loss 2.2898 (2.3205)	Entropy 0.87857 (0.88154)	Top-1 acc 63.672 (68.445)	Top-5 acc 87.500 (86.686)	lr 0.00498
Train [85][2100/3239]	Time 2.542 (0.635)	Data Time 0.001 (0.021)	Loss 2.4065 (2.3209)	Entropy 0.87857 (0.88152)	Top-1 acc 67.969 (68.436)	Top-5 acc 83.984 (86.677)	lr 0.00498
Train [85][2110/3239]	Time 0.215 (0.633)	Data Time 0.001 (0.021)	Loss 2.2484 (2.3208)	Entropy 0.87863 (0.88151)	Top-1 acc 73.047 (68.438)	Top-5 acc 87.109 (86.677)	lr 0.00498
Train [85][2120/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.021)	Loss 2.3095 (2.3209)	Entropy 0.87860 (0.88149)	Top-1 acc 69.922 (68.439)	Top-5 acc 86.719 (86.673)	lr 0.00498
Train [85][2130/3239]	Time 0.235 (0.631)	Data Time 0.001 (0.021)	Loss 2.3382 (2.3207)	Entropy 0.87857 (0.88148)	Top-1 acc 70.312 (68.445)	Top-5 acc 87.891 (86.676)	lr 0.00498
Train [85][2140/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.021)	Loss 2.2281 (2.3203)	Entropy 0.87863 (0.88147)	Top-1 acc 71.875 (68.453)	Top-5 acc 88.672 (86.682)	lr 0.00498
Train [85][2150/3239]	Time 0.221 (0.630)	Data Time 0.001 (0.021)	Loss 2.4515 (2.3204)	Entropy 0.87860 (0.88145)	Top-1 acc 62.500 (68.453)	Top-5 acc 85.938 (86.682)	lr 0.00498
Train [85][2160/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.021)	Loss 2.2546 (2.3203)	Entropy 0.87862 (0.88144)	Top-1 acc 67.188 (68.450)	Top-5 acc 88.672 (86.682)	lr 0.00498
Train [85][2170/3239]	Time 0.241 (0.628)	Data Time 0.001 (0.020)	Loss 2.4004 (2.3200)	Entropy 0.87867 (0.88143)	Top-1 acc 67.188 (68.458)	Top-5 acc 85.547 (86.689)	lr 0.00498
Train [85][2180/3239]	Time 0.342 (0.628)	Data Time 0.002 (0.020)	Loss 2.3136 (2.3200)	Entropy 0.87875 (0.88142)	Top-1 acc 66.406 (68.460)	Top-5 acc 87.109 (86.690)	lr 0.00498
Train [85][2190/3239]	Time 0.218 (0.627)	Data Time 0.001 (0.020)	Loss 2.1589 (2.3200)	Entropy 0.87872 (0.88140)	Top-1 acc 73.047 (68.463)	Top-5 acc 88.281 (86.686)	lr 0.00497
Train [85][2200/3239]	Time 0.223 (0.626)	Data Time 0.001 (0.020)	Loss 2.0506 (2.3203)	Entropy 0.87864 (0.88139)	Top-1 acc 77.734 (68.460)	Top-5 acc 90.234 (86.680)	lr 0.00497
Train [85][2210/3239]	Time 2.619 (0.626)	Data Time 0.001 (0.020)	Loss 2.3244 (2.3203)	Entropy 0.87864 (0.88138)	Top-1 acc 68.750 (68.455)	Top-5 acc 87.109 (86.678)	lr 0.00497
Train [85][2220/3239]	Time 0.364 (0.624)	Data Time 0.001 (0.020)	Loss 2.1932 (2.3202)	Entropy 0.87862 (0.88137)	Top-1 acc 71.094 (68.453)	Top-5 acc 89.062 (86.680)	lr 0.00497
Train [85][2230/3239]	Time 0.239 (0.623)	Data Time 0.001 (0.020)	Loss 2.4473 (2.3203)	Entropy 0.87862 (0.88135)	Top-1 acc 65.234 (68.449)	Top-5 acc 85.547 (86.680)	lr 0.00497
Train [85][2240/3239]	Time 0.225 (0.623)	Data Time 0.001 (0.020)	Loss 2.2782 (2.3204)	Entropy 0.87858 (0.88134)	Top-1 acc 68.750 (68.444)	Top-5 acc 88.281 (86.676)	lr 0.00497
Train [85][2250/3239]	Time 0.240 (0.622)	Data Time 0.001 (0.020)	Loss 2.3706 (2.3204)	Entropy 0.87853 (0.88133)	Top-1 acc 67.969 (68.447)	Top-5 acc 85.156 (86.676)	lr 0.00497
Train [85][2260/3239]	Time 0.272 (0.622)	Data Time 0.001 (0.020)	Loss 2.4087 (2.3204)	Entropy 0.87850 (0.88132)	Top-1 acc 67.969 (68.449)	Top-5 acc 87.500 (86.680)	lr 0.00497
Train [85][2270/3239]	Time 0.275 (0.621)	Data Time 0.001 (0.020)	Loss 2.3281 (2.3203)	Entropy 0.87843 (0.88130)	Top-1 acc 68.359 (68.457)	Top-5 acc 88.281 (86.682)	lr 0.00497
Train [85][2280/3239]	Time 0.284 (0.621)	Data Time 0.002 (0.020)	Loss 2.2536 (2.3204)	Entropy 0.87838 (0.88129)	Top-1 acc 69.141 (68.452)	Top-5 acc 86.719 (86.680)	lr 0.00497
Train [85][2290/3239]	Time 0.281 (0.621)	Data Time 0.001 (0.019)	Loss 2.2796 (2.3204)	Entropy 0.87831 (0.88128)	Top-1 acc 69.531 (68.456)	Top-5 acc 88.281 (86.680)	lr 0.00497
Train [85][2300/3239]	Time 0.222 (0.620)	Data Time 0.001 (0.019)	Loss 2.4910 (2.3206)	Entropy 0.87818 (0.88127)	Top-1 acc 62.891 (68.450)	Top-5 acc 83.203 (86.679)	lr 0.00497
Train [85][2310/3239]	Time 0.234 (0.620)	Data Time 0.001 (0.019)	Loss 2.3827 (2.3208)	Entropy 0.87830 (0.88125)	Top-1 acc 63.672 (68.438)	Top-5 acc 86.328 (86.675)	lr 0.00497
Train [85][2320/3239]	Time 2.587 (0.619)	Data Time 0.001 (0.019)	Loss 2.3320 (2.3210)	Entropy 0.87830 (0.88124)	Top-1 acc 68.750 (68.434)	Top-5 acc 85.547 (86.673)	lr 0.00496
Train [85][2330/3239]	Time 0.247 (0.618)	Data Time 0.001 (0.019)	Loss 2.1304 (2.3211)	Entropy 0.87823 (0.88123)	Top-1 acc 73.828 (68.437)	Top-5 acc 87.891 (86.672)	lr 0.00496
Train [85][2340/3239]	Time 0.243 (0.617)	Data Time 0.001 (0.019)	Loss 2.2998 (2.3211)	Entropy 0.87809 (0.88121)	Top-1 acc 69.531 (68.436)	Top-5 acc 86.719 (86.673)	lr 0.00496
Train [85][2350/3239]	Time 0.337 (0.616)	Data Time 0.001 (0.019)	Loss 2.2753 (2.3212)	Entropy 0.87802 (0.88120)	Top-1 acc 70.703 (68.431)	Top-5 acc 87.109 (86.672)	lr 0.00496
Train [85][2360/3239]	Time 0.242 (0.639)	Data Time 0.002 (0.019)	Loss 2.4561 (2.3212)	Entropy 0.87792 (0.88119)	Top-1 acc 65.234 (68.429)	Top-5 acc 84.375 (86.671)	lr 0.00496
Train [85][2370/3239]	Time 0.242 (0.638)	Data Time 0.002 (0.019)	Loss 2.1872 (2.3212)	Entropy 0.87786 (0.88117)	Top-1 acc 75.781 (68.432)	Top-5 acc 87.109 (86.671)	lr 0.00496
Train [85][2380/3239]	Time 0.241 (0.637)	Data Time 0.001 (0.019)	Loss 2.2606 (2.3213)	Entropy 0.87785 (0.88116)	Top-1 acc 67.188 (68.428)	Top-5 acc 86.719 (86.670)	lr 0.00496
Train [85][2390/3239]	Time 0.334 (0.637)	Data Time 0.002 (0.019)	Loss 2.4164 (2.3216)	Entropy 0.87792 (0.88114)	Top-1 acc 66.406 (68.421)	Top-5 acc 86.328 (86.665)	lr 0.00496
Train [85][2400/3239]	Time 0.241 (0.636)	Data Time 0.001 (0.019)	Loss 2.4269 (2.3216)	Entropy 0.87792 (0.88113)	Top-1 acc 63.281 (68.420)	Top-5 acc 85.156 (86.665)	lr 0.00496
Train [85][2410/3239]	Time 0.231 (0.635)	Data Time 0.001 (0.019)	Loss 2.1996 (2.3214)	Entropy 0.87796 (0.88112)	Top-1 acc 68.750 (68.419)	Top-5 acc 90.234 (86.672)	lr 0.00496
Train [85][2420/3239]	Time 0.264 (0.635)	Data Time 0.002 (0.019)	Loss 2.3417 (2.3215)	Entropy 0.87791 (0.88110)	Top-1 acc 67.578 (68.418)	Top-5 acc 85.938 (86.671)	lr 0.00496
Train [85][2430/3239]	Time 2.558 (0.634)	Data Time 0.002 (0.018)	Loss 2.3717 (2.3217)	Entropy 0.87791 (0.88109)	Top-1 acc 65.234 (68.413)	Top-5 acc 86.719 (86.671)	lr 0.00496
Train [85][2440/3239]	Time 0.271 (0.633)	Data Time 0.001 (0.018)	Loss 2.3362 (2.3218)	Entropy 0.87789 (0.88108)	Top-1 acc 66.406 (68.412)	Top-5 acc 87.891 (86.669)	lr 0.00495
Train [85][2450/3239]	Time 0.213 (0.632)	Data Time 0.002 (0.018)	Loss 2.2421 (2.3218)	Entropy 0.87791 (0.88107)	Top-1 acc 71.875 (68.414)	Top-5 acc 88.672 (86.671)	lr 0.00495
Train [85][2460/3239]	Time 0.219 (0.631)	Data Time 0.001 (0.018)	Loss 2.2851 (2.3219)	Entropy 0.87792 (0.88105)	Top-1 acc 70.703 (68.414)	Top-5 acc 88.281 (86.673)	lr 0.00495
Train [85][2470/3239]	Time 0.216 (0.631)	Data Time 0.001 (0.018)	Loss 2.2912 (2.3217)	Entropy 0.87791 (0.88104)	Top-1 acc 69.141 (68.416)	Top-5 acc 87.109 (86.675)	lr 0.00495
Train [85][2480/3239]	Time 0.341 (0.630)	Data Time 0.001 (0.018)	Loss 2.2284 (2.3218)	Entropy 0.87787 (0.88103)	Top-1 acc 69.922 (68.412)	Top-5 acc 88.281 (86.673)	lr 0.00495
Train [85][2490/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.018)	Loss 2.3519 (2.3218)	Entropy 0.87784 (0.88101)	Top-1 acc 66.406 (68.411)	Top-5 acc 85.156 (86.669)	lr 0.00495
Train [85][2500/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.018)	Loss 2.4724 (2.3218)	Entropy 0.87780 (0.88100)	Top-1 acc 63.672 (68.412)	Top-5 acc 85.547 (86.670)	lr 0.00495
Train [85][2510/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.018)	Loss 2.2352 (2.3218)	Entropy 0.87781 (0.88099)	Top-1 acc 71.484 (68.413)	Top-5 acc 86.328 (86.668)	lr 0.00495
Train [85][2520/3239]	Time 0.319 (0.628)	Data Time 0.001 (0.018)	Loss 2.4398 (2.3219)	Entropy 0.87781 (0.88098)	Top-1 acc 63.672 (68.409)	Top-5 acc 83.594 (86.667)	lr 0.00495
Train [85][2530/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.018)	Loss 2.2120 (2.3220)	Entropy 0.87778 (0.88096)	Top-1 acc 72.266 (68.410)	Top-5 acc 88.672 (86.665)	lr 0.00495
Train [85][2540/3239]	Time 2.538 (0.626)	Data Time 0.001 (0.018)	Loss 2.3125 (2.3219)	Entropy 0.87778 (0.88095)	Top-1 acc 68.750 (68.412)	Top-5 acc 87.109 (86.665)	lr 0.00495
Train [85][2550/3239]	Time 0.231 (0.625)	Data Time 0.001 (0.018)	Loss 2.2066 (2.3220)	Entropy 0.87782 (0.88094)	Top-1 acc 70.312 (68.409)	Top-5 acc 88.672 (86.664)	lr 0.00495
Train [85][2560/3239]	Time 0.390 (0.624)	Data Time 0.001 (0.018)	Loss 2.3672 (2.3222)	Entropy 0.87777 (0.88093)	Top-1 acc 66.016 (68.404)	Top-5 acc 86.328 (86.657)	lr 0.00495
Train [85][2570/3239]	Time 0.237 (0.624)	Data Time 0.002 (0.018)	Loss 2.5602 (2.3223)	Entropy 0.87775 (0.88091)	Top-1 acc 61.719 (68.401)	Top-5 acc 80.859 (86.657)	lr 0.00494
Train [85][2580/3239]	Time 0.214 (0.623)	Data Time 0.001 (0.017)	Loss 2.3001 (2.3222)	Entropy 0.87773 (0.88090)	Top-1 acc 67.969 (68.398)	Top-5 acc 87.500 (86.658)	lr 0.00494
Train [85][2590/3239]	Time 0.233 (0.623)	Data Time 0.001 (0.017)	Loss 2.3573 (2.3222)	Entropy 0.87766 (0.88089)	Top-1 acc 68.750 (68.399)	Top-5 acc 84.766 (86.659)	lr 0.00494
Train [85][2600/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.017)	Loss 2.2738 (2.3220)	Entropy 0.87763 (0.88088)	Top-1 acc 70.703 (68.402)	Top-5 acc 88.281 (86.662)	lr 0.00494
Train [85][2610/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.017)	Loss 2.3716 (2.3221)	Entropy 0.87762 (0.88086)	Top-1 acc 66.016 (68.398)	Top-5 acc 87.109 (86.662)	lr 0.00494
Train [85][2620/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.017)	Loss 2.3353 (2.3221)	Entropy 0.87768 (0.88085)	Top-1 acc 67.969 (68.398)	Top-5 acc 87.109 (86.661)	lr 0.00494
Train [85][2630/3239]	Time 0.228 (0.620)	Data Time 0.002 (0.017)	Loss 2.1190 (2.3221)	Entropy 0.87767 (0.88084)	Top-1 acc 75.000 (68.397)	Top-5 acc 91.406 (86.662)	lr 0.00494
Train [85][2640/3239]	Time 0.219 (0.620)	Data Time 0.001 (0.017)	Loss 2.3072 (2.3220)	Entropy 0.87764 (0.88083)	Top-1 acc 70.312 (68.398)	Top-5 acc 87.109 (86.665)	lr 0.00494
Train [85][2650/3239]	Time 0.314 (0.619)	Data Time 0.001 (0.017)	Loss 2.5868 (2.3221)	Entropy 0.87761 (0.88082)	Top-1 acc 64.453 (68.395)	Top-5 acc 83.594 (86.664)	lr 0.00494
Train [85][2660/3239]	Time 0.259 (0.619)	Data Time 0.001 (0.017)	Loss 2.4325 (2.3221)	Entropy 0.87759 (0.88080)	Top-1 acc 64.062 (68.395)	Top-5 acc 84.766 (86.664)	lr 0.00494
Train [85][2670/3239]	Time 0.223 (0.618)	Data Time 0.001 (0.017)	Loss 2.1699 (2.3220)	Entropy 0.87748 (0.88079)	Top-1 acc 73.828 (68.401)	Top-5 acc 89.453 (86.664)	lr 0.00494
Train [85][2680/3239]	Time 0.262 (0.618)	Data Time 0.001 (0.017)	Loss 2.2796 (2.3219)	Entropy 0.87747 (0.88078)	Top-1 acc 71.484 (68.405)	Top-5 acc 86.328 (86.666)	lr 0.00494
Train [85][2690/3239]	Time 0.289 (0.617)	Data Time 0.001 (0.017)	Loss 2.2365 (2.3219)	Entropy 0.87744 (0.88077)	Top-1 acc 68.750 (68.401)	Top-5 acc 85.156 (86.662)	lr 0.00493
Train [85][2700/3239]	Time 0.265 (0.617)	Data Time 0.001 (0.017)	Loss 2.3913 (2.3221)	Entropy 0.87736 (0.88076)	Top-1 acc 66.797 (68.395)	Top-5 acc 85.938 (86.657)	lr 0.00493
Train [85][2710/3239]	Time 0.250 (0.636)	Data Time 0.003 (0.017)	Loss 2.2638 (2.3221)	Entropy 0.87733 (0.88074)	Top-1 acc 68.359 (68.395)	Top-5 acc 88.281 (86.657)	lr 0.00493
Train [85][2720/3239]	Time 0.236 (0.635)	Data Time 0.002 (0.017)	Loss 2.1279 (2.3219)	Entropy 0.87731 (0.88073)	Top-1 acc 72.656 (68.401)	Top-5 acc 91.016 (86.661)	lr 0.00493
Train [85][2730/3239]	Time 0.234 (0.635)	Data Time 0.002 (0.017)	Loss 2.3497 (2.3219)	Entropy 0.87731 (0.88072)	Top-1 acc 67.188 (68.396)	Top-5 acc 86.328 (86.660)	lr 0.00493
Train [85][2740/3239]	Time 0.257 (0.634)	Data Time 0.001 (0.017)	Loss 2.3025 (2.3219)	Entropy 0.87707 (0.88071)	Top-1 acc 65.234 (68.397)	Top-5 acc 88.672 (86.660)	lr 0.00493
Train [85][2750/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.016)	Loss 2.2392 (2.3226)	Entropy 0.87704 (0.88069)	Top-1 acc 68.750 (68.384)	Top-5 acc 87.891 (86.649)	lr 0.00493
Train [85][2760/3239]	Time 0.232 (0.633)	Data Time 0.001 (0.016)	Loss 2.3611 (2.3226)	Entropy 0.87702 (0.88068)	Top-1 acc 67.578 (68.383)	Top-5 acc 85.547 (86.649)	lr 0.00493
Train [85][2770/3239]	Time 0.222 (0.633)	Data Time 0.001 (0.016)	Loss 2.3036 (2.3228)	Entropy 0.87688 (0.88067)	Top-1 acc 67.969 (68.375)	Top-5 acc 84.766 (86.643)	lr 0.00493
Train [85][2780/3239]	Time 0.292 (0.632)	Data Time 0.001 (0.016)	Loss 2.3169 (2.3227)	Entropy 0.87685 (0.88065)	Top-1 acc 69.531 (68.376)	Top-5 acc 88.672 (86.648)	lr 0.00493
Train [85][2790/3239]	Time 0.254 (0.632)	Data Time 0.001 (0.016)	Loss 2.3197 (2.3227)	Entropy 0.87683 (0.88064)	Top-1 acc 67.188 (68.375)	Top-5 acc 87.500 (86.650)	lr 0.00493
Train [85][2800/3239]	Time 0.266 (0.631)	Data Time 0.001 (0.016)	Loss 2.3692 (2.3225)	Entropy 0.87681 (0.88062)	Top-1 acc 66.797 (68.380)	Top-5 acc 86.719 (86.652)	lr 0.00493
Train [85][2810/3239]	Time 0.295 (0.631)	Data Time 0.003 (0.016)	Loss 2.1576 (2.3224)	Entropy 0.87683 (0.88061)	Top-1 acc 71.875 (68.385)	Top-5 acc 89.062 (86.656)	lr 0.00492
Train [85][2820/3239]	Time 0.213 (0.630)	Data Time 0.001 (0.016)	Loss 2.3275 (2.3225)	Entropy 0.87683 (0.88060)	Top-1 acc 66.406 (68.379)	Top-5 acc 88.281 (86.656)	lr 0.00492
Train [85][2830/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.016)	Loss 2.2907 (2.3223)	Entropy 0.87681 (0.88058)	Top-1 acc 67.188 (68.383)	Top-5 acc 85.547 (86.659)	lr 0.00492
Train [85][2840/3239]	Time 0.270 (0.629)	Data Time 0.003 (0.016)	Loss 2.3995 (2.3225)	Entropy 0.87689 (0.88057)	Top-1 acc 67.188 (68.373)	Top-5 acc 83.203 (86.654)	lr 0.00492
Train [85][2850/3239]	Time 0.241 (0.629)	Data Time 0.001 (0.016)	Loss 2.2943 (2.3225)	Entropy 0.87672 (0.88056)	Top-1 acc 69.922 (68.372)	Top-5 acc 87.891 (86.655)	lr 0.00492
Train [85][2860/3239]	Time 0.360 (0.628)	Data Time 0.001 (0.016)	Loss 2.2466 (2.3227)	Entropy 0.87667 (0.88054)	Top-1 acc 70.312 (68.365)	Top-5 acc 89.453 (86.650)	lr 0.00492
Train [85][2870/3239]	Time 0.227 (0.627)	Data Time 0.001 (0.016)	Loss 2.3153 (2.3226)	Entropy 0.87663 (0.88053)	Top-1 acc 69.141 (68.369)	Top-5 acc 85.547 (86.652)	lr 0.00492
Train [85][2880/3239]	Time 0.246 (0.627)	Data Time 0.001 (0.016)	Loss 2.2692 (2.3226)	Entropy 0.87656 (0.88052)	Top-1 acc 70.312 (68.370)	Top-5 acc 88.281 (86.652)	lr 0.00492
Train [85][2890/3239]	Time 0.265 (0.626)	Data Time 0.001 (0.016)	Loss 2.4520 (2.3228)	Entropy 0.87657 (0.88050)	Top-1 acc 65.234 (68.365)	Top-5 acc 83.594 (86.647)	lr 0.00492
Train [85][2900/3239]	Time 0.355 (0.626)	Data Time 0.001 (0.016)	Loss 2.2592 (2.3228)	Entropy 0.87653 (0.88049)	Top-1 acc 69.141 (68.364)	Top-5 acc 89.844 (86.647)	lr 0.00492
Train [85][2910/3239]	Time 0.283 (0.625)	Data Time 0.001 (0.016)	Loss 2.3115 (2.3229)	Entropy 0.87638 (0.88048)	Top-1 acc 65.625 (68.363)	Top-5 acc 86.719 (86.645)	lr 0.00492
Train [85][2920/3239]	Time 0.241 (0.625)	Data Time 0.001 (0.016)	Loss 2.2414 (2.3229)	Entropy 0.87637 (0.88046)	Top-1 acc 71.875 (68.361)	Top-5 acc 89.062 (86.644)	lr 0.00492
Train [85][2930/3239]	Time 0.253 (0.624)	Data Time 0.001 (0.016)	Loss 2.2473 (2.3230)	Entropy 0.87630 (0.88045)	Top-1 acc 69.531 (68.363)	Top-5 acc 88.281 (86.644)	lr 0.00492
Train [85][2940/3239]	Time 0.226 (0.624)	Data Time 0.002 (0.016)	Loss 2.3050 (2.3231)	Entropy 0.87630 (0.88043)	Top-1 acc 72.266 (68.363)	Top-5 acc 88.281 (86.641)	lr 0.00491
Train [85][2950/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.015)	Loss 2.1355 (2.3231)	Entropy 0.87638 (0.88042)	Top-1 acc 71.484 (68.365)	Top-5 acc 88.672 (86.644)	lr 0.00491
Train [85][2960/3239]	Time 0.244 (0.623)	Data Time 0.002 (0.015)	Loss 2.4559 (2.3233)	Entropy 0.87630 (0.88041)	Top-1 acc 65.625 (68.357)	Top-5 acc 83.594 (86.642)	lr 0.00491
Train [85][2970/3239]	Time 0.255 (0.623)	Data Time 0.001 (0.015)	Loss 2.3310 (2.3233)	Entropy 0.87629 (0.88039)	Top-1 acc 71.484 (68.358)	Top-5 acc 86.719 (86.641)	lr 0.00491
Train [85][2980/3239]	Time 0.238 (0.622)	Data Time 0.001 (0.015)	Loss 2.4317 (2.3232)	Entropy 0.87627 (0.88038)	Top-1 acc 65.234 (68.356)	Top-5 acc 84.766 (86.642)	lr 0.00491
Train [85][2990/3239]	Time 0.244 (0.622)	Data Time 0.001 (0.015)	Loss 2.4466 (2.3232)	Entropy 0.87628 (0.88037)	Top-1 acc 65.234 (68.360)	Top-5 acc 86.719 (86.642)	lr 0.00491
Train [85][3000/3239]	Time 0.249 (0.621)	Data Time 0.001 (0.015)	Loss 2.2799 (2.3230)	Entropy 0.87628 (0.88035)	Top-1 acc 70.312 (68.364)	Top-5 acc 87.891 (86.644)	lr 0.00491
Train [85][3010/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.015)	Loss 2.4613 (2.3231)	Entropy 0.87625 (0.88034)	Top-1 acc 65.234 (68.362)	Top-5 acc 83.594 (86.640)	lr 0.00491
Train [85][3020/3239]	Time 0.267 (0.620)	Data Time 0.001 (0.015)	Loss 2.5421 (2.3231)	Entropy 0.87625 (0.88032)	Top-1 acc 63.672 (68.365)	Top-5 acc 83.594 (86.641)	lr 0.00491
Train [85][3030/3239]	Time 0.336 (0.620)	Data Time 0.001 (0.015)	Loss 2.5952 (2.3232)	Entropy 0.87616 (0.88031)	Top-1 acc 63.672 (68.361)	Top-5 acc 79.297 (86.638)	lr 0.00491
Train [85][3040/3239]	Time 0.297 (0.636)	Data Time 0.004 (0.015)	Loss 2.4493 (2.3232)	Entropy 0.87617 (0.88030)	Top-1 acc 64.844 (68.360)	Top-5 acc 84.375 (86.639)	lr 0.00491
Train [85][3050/3239]	Time 0.304 (0.636)	Data Time 0.002 (0.015)	Loss 2.5082 (2.3234)	Entropy 0.87604 (0.88028)	Top-1 acc 67.188 (68.357)	Top-5 acc 83.594 (86.636)	lr 0.00491
Train [85][3060/3239]	Time 0.221 (0.635)	Data Time 0.001 (0.015)	Loss 2.3754 (2.3234)	Entropy 0.87608 (0.88027)	Top-1 acc 64.844 (68.357)	Top-5 acc 83.984 (86.635)	lr 0.00490
Train [85][3070/3239]	Time 0.384 (0.635)	Data Time 0.001 (0.015)	Loss 2.1735 (2.3235)	Entropy 0.87607 (0.88026)	Top-1 acc 73.438 (68.355)	Top-5 acc 88.672 (86.632)	lr 0.00490
Train [85][3080/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.015)	Loss 2.2378 (2.3235)	Entropy 0.87607 (0.88024)	Top-1 acc 69.141 (68.355)	Top-5 acc 89.453 (86.633)	lr 0.00490
Train [85][3090/3239]	Time 0.270 (0.634)	Data Time 0.004 (0.015)	Loss 2.2340 (2.3235)	Entropy 0.87603 (0.88023)	Top-1 acc 71.484 (68.358)	Top-5 acc 87.109 (86.634)	lr 0.00490
Train [85][3100/3239]	Time 0.212 (0.634)	Data Time 0.001 (0.015)	Loss 2.2002 (2.3235)	Entropy 0.87600 (0.88022)	Top-1 acc 71.094 (68.355)	Top-5 acc 89.453 (86.634)	lr 0.00490
Train [85][3110/3239]	Time 0.263 (0.633)	Data Time 0.002 (0.015)	Loss 2.3640 (2.3235)	Entropy 0.87597 (0.88020)	Top-1 acc 68.750 (68.355)	Top-5 acc 86.719 (86.636)	lr 0.00490
Train [85][3120/3239]	Time 0.251 (0.633)	Data Time 0.002 (0.015)	Loss 2.3056 (2.3235)	Entropy 0.87594 (0.88019)	Top-1 acc 67.969 (68.352)	Top-5 acc 87.891 (86.634)	lr 0.00490
Train [85][3130/3239]	Time 0.238 (0.632)	Data Time 0.001 (0.015)	Loss 2.3728 (2.3236)	Entropy 0.87589 (0.88018)	Top-1 acc 68.750 (68.352)	Top-5 acc 85.547 (86.634)	lr 0.00490
Train [85][3140/3239]	Time 0.256 (0.632)	Data Time 0.001 (0.015)	Loss 2.2187 (2.3235)	Entropy 0.87582 (0.88016)	Top-1 acc 70.703 (68.352)	Top-5 acc 89.453 (86.636)	lr 0.00490
Train [85][3150/3239]	Time 0.246 (0.631)	Data Time 0.012 (0.015)	Loss 2.4150 (2.3235)	Entropy 0.87580 (0.88015)	Top-1 acc 64.062 (68.349)	Top-5 acc 84.375 (86.636)	lr 0.00490
Train [85][3160/3239]	Time 0.364 (0.631)	Data Time 0.001 (0.015)	Loss 2.3723 (2.3235)	Entropy 0.87576 (0.88013)	Top-1 acc 65.625 (68.348)	Top-5 acc 85.938 (86.638)	lr 0.00490
Train [85][3170/3239]	Time 0.279 (0.630)	Data Time 0.001 (0.015)	Loss 2.2367 (2.3236)	Entropy 0.87568 (0.88012)	Top-1 acc 71.484 (68.346)	Top-5 acc 87.109 (86.636)	lr 0.00490
Train [85][3180/3239]	Time 0.236 (0.630)	Data Time 0.000 (0.014)	Loss 2.1453 (2.3235)	Entropy 0.87561 (0.88011)	Top-1 acc 73.047 (68.347)	Top-5 acc 91.797 (86.639)	lr 0.00490
Train [85][3190/3239]	Time 0.217 (0.629)	Data Time 0.000 (0.014)	Loss 2.3440 (2.3236)	Entropy 0.87564 (0.88009)	Top-1 acc 67.578 (68.340)	Top-5 acc 86.328 (86.634)	lr 0.00489
Train [85][3200/3239]	Time 0.351 (0.629)	Data Time 0.000 (0.014)	Loss 2.3257 (2.3235)	Entropy 0.87555 (0.88008)	Top-1 acc 65.625 (68.343)	Top-5 acc 86.719 (86.635)	lr 0.00489
Train [85][3210/3239]	Time 0.226 (0.628)	Data Time 0.000 (0.014)	Loss 2.1375 (2.3235)	Entropy 0.87557 (0.88006)	Top-1 acc 73.047 (68.345)	Top-5 acc 89.453 (86.637)	lr 0.00489
Train [85][3220/3239]	Time 0.231 (0.628)	Data Time 0.000 (0.014)	Loss 2.4000 (2.3235)	Entropy 0.87546 (0.88005)	Top-1 acc 68.359 (68.347)	Top-5 acc 85.547 (86.636)	lr 0.00489
Train [85][3230/3239]	Time 0.226 (0.627)	Data Time 0.000 (0.014)	Loss 2.4380 (2.3236)	Entropy 0.87545 (0.88004)	Top-1 acc 64.062 (68.345)	Top-5 acc 83.594 (86.636)	lr 0.00489
Train [85][3239/3239]	Time 2.407 (0.627)	Data Time 0.000 (0.014)	Loss 2.5680 (2.3236)	Entropy 0.87545 (0.88002)	Top-1 acc 65.432 (68.343)	Top-5 acc 79.012 (86.632)	lr 0.00489
==========Valid [85/120]	loss 1.278	top-1 acc 70.671 (70.789)	top-5 acc 88.849	Train top-1 68.343	top-5 86.632	Entropy 0.87545	Latency-None: 0.000ms	Flops: 546.53M
Train [86][0/3239]	Time 41.535 (41.535)	Data Time 38.368 (38.368)	Loss 2.4050 (2.4050)	Entropy 0.87536 (0.87536)	Top-1 acc 67.578 (67.578)	Top-5 acc 85.938 (85.938)	lr 0.00489
Train [86][10/3239]	Time 2.939 (4.416)	Data Time 0.002 (3.596)	Loss 2.4003 (2.3253)	Entropy 0.87536 (0.87536)	Top-1 acc 63.672 (67.720)	Top-5 acc 85.938 (86.222)	lr 0.00489
Train [86][20/3239]	Time 0.281 (2.433)	Data Time 0.002 (1.884)	Loss 2.3508 (2.3053)	Entropy 0.87535 (0.87535)	Top-1 acc 68.359 (68.415)	Top-5 acc 83.203 (86.812)	lr 0.00489
Train [86][30/3239]	Time 0.241 (1.807)	Data Time 0.001 (1.277)	Loss 2.3679 (2.2980)	Entropy 0.87532 (0.87534)	Top-1 acc 69.141 (68.548)	Top-5 acc 82.031 (86.996)	lr 0.00489
Train [86][40/3239]	Time 0.246 (1.486)	Data Time 0.001 (0.966)	Loss 2.3339 (2.3086)	Entropy 0.87526 (0.87532)	Top-1 acc 71.094 (68.493)	Top-5 acc 86.328 (86.700)	lr 0.00489
Train [86][50/3239]	Time 0.237 (1.288)	Data Time 0.001 (0.777)	Loss 2.3376 (2.3146)	Entropy 0.87525 (0.87531)	Top-1 acc 67.969 (68.298)	Top-5 acc 86.719 (86.657)	lr 0.00489
Train [86][60/3239]	Time 0.261 (1.155)	Data Time 0.001 (0.650)	Loss 2.4274 (2.3151)	Entropy 0.87535 (0.87531)	Top-1 acc 65.234 (68.359)	Top-5 acc 86.719 (86.706)	lr 0.00489
Train [86][70/3239]	Time 0.242 (1.063)	Data Time 0.001 (0.558)	Loss 2.2060 (2.3135)	Entropy 0.87530 (0.87531)	Top-1 acc 69.531 (68.409)	Top-5 acc 89.062 (86.757)	lr 0.00488
Train [86][80/3239]	Time 0.225 (0.991)	Data Time 0.002 (0.490)	Loss 2.2861 (2.3115)	Entropy 0.87512 (0.87530)	Top-1 acc 70.312 (68.456)	Top-5 acc 86.719 (86.801)	lr 0.00488
Train [86][90/3239]	Time 0.235 (0.935)	Data Time 0.001 (0.436)	Loss 2.2759 (2.3112)	Entropy 0.87510 (0.87528)	Top-1 acc 68.750 (68.441)	Top-5 acc 86.719 (86.848)	lr 0.00488
Train [86][100/3239]	Time 0.358 (0.891)	Data Time 0.001 (0.393)	Loss 2.2244 (2.3105)	Entropy 0.87509 (0.87526)	Top-1 acc 72.656 (68.591)	Top-5 acc 86.328 (86.870)	lr 0.00488
Train [86][110/3239]	Time 0.236 (0.856)	Data Time 0.002 (0.358)	Loss 2.1706 (2.3089)	Entropy 0.87509 (0.87525)	Top-1 acc 74.609 (68.701)	Top-5 acc 89.062 (86.947)	lr 0.00488
Train [86][120/3239]	Time 2.566 (0.824)	Data Time 0.002 (0.328)	Loss 2.3340 (2.3086)	Entropy 0.87509 (0.87524)	Top-1 acc 66.016 (68.669)	Top-5 acc 83.984 (86.929)	lr 0.00488
Train [86][130/3239]	Time 0.273 (0.780)	Data Time 0.001 (0.303)	Loss 2.3176 (2.3225)	Entropy 0.87508 (0.87522)	Top-1 acc 68.750 (68.350)	Top-5 acc 87.891 (86.701)	lr 0.00488
Train [86][140/3239]	Time 0.214 (0.758)	Data Time 0.001 (0.282)	Loss 2.3360 (2.3198)	Entropy 0.87504 (0.87521)	Top-1 acc 67.188 (68.470)	Top-5 acc 87.109 (86.769)	lr 0.00488
Train [86][150/3239]	Time 0.294 (1.093)	Data Time 0.003 (0.263)	Loss 2.3200 (2.3183)	Entropy 0.87499 (0.87520)	Top-1 acc 64.844 (68.556)	Top-5 acc 86.328 (86.789)	lr 0.00488
Train [86][160/3239]	Time 0.241 (1.057)	Data Time 0.002 (0.247)	Loss 2.2762 (2.3177)	Entropy 0.87499 (0.87518)	Top-1 acc 71.094 (68.607)	Top-5 acc 87.109 (86.777)	lr 0.00488
Train [86][170/3239]	Time 0.241 (1.024)	Data Time 0.002 (0.233)	Loss 2.2478 (2.3142)	Entropy 0.87499 (0.87517)	Top-1 acc 71.484 (68.736)	Top-5 acc 86.719 (86.822)	lr 0.00488
Train [86][180/3239]	Time 0.242 (0.995)	Data Time 0.002 (0.220)	Loss 2.3437 (2.3143)	Entropy 0.87503 (0.87516)	Top-1 acc 66.406 (68.733)	Top-5 acc 86.719 (86.818)	lr 0.00488
Train [86][190/3239]	Time 0.233 (0.969)	Data Time 0.001 (0.209)	Loss 2.3195 (2.3125)	Entropy 0.87501 (0.87516)	Top-1 acc 65.625 (68.764)	Top-5 acc 87.500 (86.868)	lr 0.00488
Train [86][200/3239]	Time 0.225 (0.944)	Data Time 0.001 (0.198)	Loss 2.2871 (2.3110)	Entropy 0.87494 (0.87515)	Top-1 acc 66.016 (68.806)	Top-5 acc 89.453 (86.899)	lr 0.00487
Train [86][210/3239]	Time 0.223 (0.922)	Data Time 0.001 (0.189)	Loss 2.1871 (2.3098)	Entropy 0.87502 (0.87514)	Top-1 acc 71.875 (68.793)	Top-5 acc 88.672 (86.904)	lr 0.00487
Train [86][220/3239]	Time 0.251 (0.902)	Data Time 0.001 (0.181)	Loss 2.2856 (2.3085)	Entropy 0.87498 (0.87513)	Top-1 acc 67.188 (68.833)	Top-5 acc 86.328 (86.911)	lr 0.00487
Train [86][230/3239]	Time 2.585 (0.885)	Data Time 0.002 (0.173)	Loss 2.3573 (2.3084)	Entropy 0.87498 (0.87513)	Top-1 acc 67.969 (68.858)	Top-5 acc 84.766 (86.927)	lr 0.00487
Train [86][240/3239]	Time 0.292 (0.859)	Data Time 0.001 (0.166)	Loss 2.2722 (2.3091)	Entropy 0.87491 (0.87512)	Top-1 acc 71.484 (68.859)	Top-5 acc 85.547 (86.920)	lr 0.00487
Train [86][250/3239]	Time 0.229 (0.844)	Data Time 0.001 (0.159)	Loss 2.2568 (2.3099)	Entropy 0.87487 (0.87511)	Top-1 acc 71.484 (68.790)	Top-5 acc 88.672 (86.923)	lr 0.00487
Train [86][260/3239]	Time 0.226 (0.829)	Data Time 0.001 (0.153)	Loss 2.4868 (2.3115)	Entropy 0.87480 (0.87510)	Top-1 acc 62.891 (68.713)	Top-5 acc 83.203 (86.891)	lr 0.00487
Train [86][270/3239]	Time 0.353 (0.817)	Data Time 0.001 (0.148)	Loss 2.3390 (2.3110)	Entropy 0.87481 (0.87509)	Top-1 acc 67.578 (68.741)	Top-5 acc 85.547 (86.893)	lr 0.00487
Train [86][280/3239]	Time 0.243 (0.805)	Data Time 0.001 (0.142)	Loss 2.3164 (2.3094)	Entropy 0.87480 (0.87508)	Top-1 acc 68.359 (68.815)	Top-5 acc 87.109 (86.922)	lr 0.00487
Train [86][290/3239]	Time 0.232 (0.794)	Data Time 0.003 (0.138)	Loss 2.3887 (2.3087)	Entropy 0.87476 (0.87506)	Top-1 acc 65.625 (68.849)	Top-5 acc 85.547 (86.943)	lr 0.00487
Train [86][300/3239]	Time 0.222 (0.784)	Data Time 0.001 (0.133)	Loss 2.3771 (2.3089)	Entropy 0.87473 (0.87505)	Top-1 acc 70.312 (68.832)	Top-5 acc 86.328 (86.921)	lr 0.00487
Train [86][310/3239]	Time 0.342 (0.775)	Data Time 0.001 (0.129)	Loss 2.2338 (2.3072)	Entropy 0.87469 (0.87504)	Top-1 acc 72.266 (68.867)	Top-5 acc 87.891 (86.950)	lr 0.00487
Train [86][320/3239]	Time 0.232 (0.766)	Data Time 0.001 (0.125)	Loss 2.2794 (2.3074)	Entropy 0.87462 (0.87503)	Top-1 acc 72.266 (68.849)	Top-5 acc 87.109 (86.940)	lr 0.00486
Train [86][330/3239]	Time 0.231 (0.758)	Data Time 0.001 (0.121)	Loss 2.3721 (2.3080)	Entropy 0.87465 (0.87502)	Top-1 acc 65.625 (68.840)	Top-5 acc 87.109 (86.919)	lr 0.00486
Train [86][340/3239]	Time 2.489 (0.749)	Data Time 0.001 (0.118)	Loss 2.3650 (2.3085)	Entropy 0.87465 (0.87501)	Top-1 acc 67.969 (68.823)	Top-5 acc 85.156 (86.911)	lr 0.00486
Train [86][350/3239]	Time 0.252 (0.735)	Data Time 0.001 (0.114)	Loss 2.2626 (2.3083)	Entropy 0.87465 (0.87500)	Top-1 acc 69.922 (68.842)	Top-5 acc 88.281 (86.900)	lr 0.00486
Train [86][360/3239]	Time 0.232 (0.728)	Data Time 0.001 (0.111)	Loss 2.5174 (2.3094)	Entropy 0.87462 (0.87499)	Top-1 acc 64.062 (68.808)	Top-5 acc 82.812 (86.863)	lr 0.00486
Train [86][370/3239]	Time 0.250 (0.722)	Data Time 0.001 (0.108)	Loss 2.1476 (2.3089)	Entropy 0.87462 (0.87498)	Top-1 acc 72.266 (68.798)	Top-5 acc 90.234 (86.868)	lr 0.00486
Train [86][380/3239]	Time 0.251 (0.716)	Data Time 0.001 (0.105)	Loss 2.1961 (2.3075)	Entropy 0.87458 (0.87497)	Top-1 acc 68.750 (68.825)	Top-5 acc 86.719 (86.880)	lr 0.00486
Train [86][390/3239]	Time 0.233 (0.710)	Data Time 0.001 (0.103)	Loss 2.2116 (2.3074)	Entropy 0.87461 (0.87496)	Top-1 acc 73.438 (68.827)	Top-5 acc 88.281 (86.884)	lr 0.00486
Train [86][400/3239]	Time 0.352 (0.705)	Data Time 0.002 (0.100)	Loss 2.2902 (2.3074)	Entropy 0.87459 (0.87495)	Top-1 acc 66.797 (68.829)	Top-5 acc 88.672 (86.892)	lr 0.00486
Train [86][410/3239]	Time 0.232 (0.700)	Data Time 0.001 (0.098)	Loss 2.1749 (2.3062)	Entropy 0.87462 (0.87494)	Top-1 acc 70.703 (68.853)	Top-5 acc 87.891 (86.926)	lr 0.00486
Train [86][420/3239]	Time 0.252 (0.694)	Data Time 0.001 (0.096)	Loss 2.2734 (2.3063)	Entropy 0.87447 (0.87493)	Top-1 acc 68.750 (68.844)	Top-5 acc 86.328 (86.913)	lr 0.00486
Train [86][430/3239]	Time 0.236 (0.690)	Data Time 0.001 (0.093)	Loss 2.3313 (2.3064)	Entropy 0.87442 (0.87492)	Top-1 acc 67.188 (68.845)	Top-5 acc 87.109 (86.910)	lr 0.00486
Train [86][440/3239]	Time 0.309 (0.685)	Data Time 0.001 (0.091)	Loss 2.1965 (2.3060)	Entropy 0.87446 (0.87491)	Top-1 acc 72.656 (68.849)	Top-5 acc 87.891 (86.927)	lr 0.00486
Train [86][450/3239]	Time 2.537 (0.680)	Data Time 0.001 (0.089)	Loss 2.3576 (2.3077)	Entropy 0.87446 (0.87490)	Top-1 acc 66.406 (68.800)	Top-5 acc 86.719 (86.888)	lr 0.00485
Train [86][460/3239]	Time 0.236 (0.671)	Data Time 0.001 (0.087)	Loss 2.2613 (2.3079)	Entropy 0.87440 (0.87489)	Top-1 acc 71.094 (68.792)	Top-5 acc 88.281 (86.895)	lr 0.00485
Train [86][470/3239]	Time 0.235 (0.667)	Data Time 0.001 (0.086)	Loss 2.4761 (2.3083)	Entropy 0.87429 (0.87488)	Top-1 acc 66.797 (68.782)	Top-5 acc 83.203 (86.899)	lr 0.00485
Train [86][480/3239]	Time 0.246 (0.663)	Data Time 0.001 (0.084)	Loss 2.2970 (2.3084)	Entropy 0.87437 (0.87487)	Top-1 acc 67.969 (68.787)	Top-5 acc 87.891 (86.897)	lr 0.00485
Train [86][490/3239]	Time 0.235 (0.660)	Data Time 0.002 (0.082)	Loss 2.3492 (2.3084)	Entropy 0.87432 (0.87486)	Top-1 acc 66.406 (68.775)	Top-5 acc 86.328 (86.895)	lr 0.00485
Train [86][500/3239]	Time 0.241 (0.656)	Data Time 0.001 (0.081)	Loss 2.4490 (2.3089)	Entropy 0.87429 (0.87484)	Top-1 acc 66.797 (68.765)	Top-5 acc 82.812 (86.900)	lr 0.00485
Train [86][510/3239]	Time 0.237 (0.756)	Data Time 0.002 (0.079)	Loss 2.2613 (2.3103)	Entropy 0.87428 (0.87483)	Top-1 acc 71.094 (68.739)	Top-5 acc 85.938 (86.866)	lr 0.00485
Train [86][520/3239]	Time 0.233 (0.752)	Data Time 0.002 (0.078)	Loss 2.2321 (2.3092)	Entropy 0.87424 (0.87482)	Top-1 acc 72.656 (68.777)	Top-5 acc 87.891 (86.887)	lr 0.00485
Train [86][530/3239]	Time 0.243 (0.747)	Data Time 0.002 (0.076)	Loss 2.3124 (2.3087)	Entropy 0.87420 (0.87481)	Top-1 acc 68.359 (68.786)	Top-5 acc 87.891 (86.902)	lr 0.00485
Train [86][540/3239]	Time 0.246 (0.742)	Data Time 0.002 (0.075)	Loss 2.1628 (2.3086)	Entropy 0.87420 (0.87480)	Top-1 acc 71.875 (68.785)	Top-5 acc 88.672 (86.899)	lr 0.00485
Train [86][550/3239]	Time 0.235 (0.737)	Data Time 0.002 (0.073)	Loss 2.3935 (2.3076)	Entropy 0.87419 (0.87479)	Top-1 acc 66.406 (68.801)	Top-5 acc 87.109 (86.923)	lr 0.00485
Train [86][560/3239]	Time 2.470 (0.732)	Data Time 0.001 (0.072)	Loss 2.2776 (2.3072)	Entropy 0.87419 (0.87478)	Top-1 acc 69.531 (68.798)	Top-5 acc 87.109 (86.941)	lr 0.00485
Train [86][570/3239]	Time 0.292 (0.723)	Data Time 0.001 (0.071)	Loss 2.3724 (2.3071)	Entropy 0.87428 (0.87477)	Top-1 acc 71.484 (68.808)	Top-5 acc 85.938 (86.927)	lr 0.00484
Train [86][580/3239]	Time 0.224 (0.719)	Data Time 0.001 (0.070)	Loss 2.4079 (2.3080)	Entropy 0.87426 (0.87476)	Top-1 acc 64.453 (68.777)	Top-5 acc 84.375 (86.900)	lr 0.00484
Train [86][590/3239]	Time 0.236 (0.715)	Data Time 0.001 (0.069)	Loss 2.4217 (2.3075)	Entropy 0.87422 (0.87475)	Top-1 acc 62.891 (68.785)	Top-5 acc 83.984 (86.912)	lr 0.00484
Train [86][600/3239]	Time 0.228 (0.711)	Data Time 0.001 (0.067)	Loss 2.2344 (2.3070)	Entropy 0.87418 (0.87474)	Top-1 acc 73.438 (68.814)	Top-5 acc 88.672 (86.922)	lr 0.00484
Train [86][610/3239]	Time 0.242 (0.707)	Data Time 0.001 (0.066)	Loss 2.4320 (2.3075)	Entropy 0.87422 (0.87473)	Top-1 acc 64.062 (68.792)	Top-5 acc 83.594 (86.907)	lr 0.00484
Train [86][620/3239]	Time 0.236 (0.703)	Data Time 0.001 (0.065)	Loss 2.4278 (2.3077)	Entropy 0.87416 (0.87473)	Top-1 acc 65.625 (68.779)	Top-5 acc 87.109 (86.907)	lr 0.00484
Train [86][630/3239]	Time 0.224 (0.699)	Data Time 0.001 (0.064)	Loss 2.2623 (2.3085)	Entropy 0.87413 (0.87472)	Top-1 acc 69.531 (68.762)	Top-5 acc 87.109 (86.895)	lr 0.00484
Train [86][640/3239]	Time 0.240 (0.696)	Data Time 0.001 (0.063)	Loss 2.4142 (2.3085)	Entropy 0.87415 (0.87471)	Top-1 acc 62.109 (68.753)	Top-5 acc 87.109 (86.897)	lr 0.00484
Train [86][650/3239]	Time 0.243 (0.693)	Data Time 0.001 (0.062)	Loss 2.2968 (2.3082)	Entropy 0.87417 (0.87470)	Top-1 acc 67.188 (68.766)	Top-5 acc 86.719 (86.907)	lr 0.00484
Train [86][660/3239]	Time 0.395 (0.690)	Data Time 0.001 (0.061)	Loss 2.1748 (2.3069)	Entropy 0.87416 (0.87469)	Top-1 acc 71.094 (68.807)	Top-5 acc 89.844 (86.935)	lr 0.00484
Train [86][670/3239]	Time 2.621 (0.688)	Data Time 0.001 (0.061)	Loss 2.3626 (2.3071)	Entropy 0.87416 (0.87468)	Top-1 acc 69.141 (68.795)	Top-5 acc 86.719 (86.937)	lr 0.00484
Train [86][680/3239]	Time 0.273 (0.681)	Data Time 0.002 (0.060)	Loss 2.4484 (2.3072)	Entropy 0.87416 (0.87467)	Top-1 acc 63.672 (68.780)	Top-5 acc 82.812 (86.924)	lr 0.00484
Train [86][690/3239]	Time 0.242 (0.678)	Data Time 0.002 (0.059)	Loss 2.2417 (2.3068)	Entropy 0.87420 (0.87467)	Top-1 acc 68.359 (68.783)	Top-5 acc 89.844 (86.934)	lr 0.00484
Train [86][700/3239]	Time 0.317 (0.676)	Data Time 0.001 (0.058)	Loss 2.0519 (2.3059)	Entropy 0.87424 (0.87466)	Top-1 acc 75.391 (68.814)	Top-5 acc 91.797 (86.949)	lr 0.00483
Train [86][710/3239]	Time 0.221 (0.673)	Data Time 0.001 (0.057)	Loss 2.2597 (2.3057)	Entropy 0.87420 (0.87466)	Top-1 acc 69.141 (68.825)	Top-5 acc 88.672 (86.957)	lr 0.00483
Train [86][720/3239]	Time 0.241 (0.670)	Data Time 0.001 (0.056)	Loss 2.1990 (2.3050)	Entropy 0.87423 (0.87465)	Top-1 acc 71.484 (68.839)	Top-5 acc 87.109 (86.965)	lr 0.00483
Train [86][730/3239]	Time 0.236 (0.668)	Data Time 0.001 (0.056)	Loss 2.2715 (2.3052)	Entropy 0.87422 (0.87464)	Top-1 acc 69.531 (68.826)	Top-5 acc 88.672 (86.959)	lr 0.00483
Train [86][740/3239]	Time 0.231 (0.665)	Data Time 0.001 (0.055)	Loss 2.1560 (2.3051)	Entropy 0.87422 (0.87464)	Top-1 acc 72.656 (68.831)	Top-5 acc 89.453 (86.954)	lr 0.00483
Train [86][750/3239]	Time 0.240 (0.663)	Data Time 0.001 (0.054)	Loss 2.2413 (2.3055)	Entropy 0.87416 (0.87463)	Top-1 acc 71.875 (68.818)	Top-5 acc 86.328 (86.949)	lr 0.00483
Train [86][760/3239]	Time 0.235 (0.661)	Data Time 0.001 (0.054)	Loss 2.3226 (2.3051)	Entropy 0.87400 (0.87463)	Top-1 acc 67.969 (68.831)	Top-5 acc 85.547 (86.954)	lr 0.00483
Train [86][770/3239]	Time 0.240 (0.659)	Data Time 0.001 (0.053)	Loss 2.1806 (2.3052)	Entropy 0.87396 (0.87462)	Top-1 acc 71.094 (68.813)	Top-5 acc 89.062 (86.953)	lr 0.00483
Train [86][780/3239]	Time 2.477 (0.656)	Data Time 0.001 (0.052)	Loss 2.1644 (2.3047)	Entropy 0.87396 (0.87461)	Top-1 acc 73.047 (68.829)	Top-5 acc 89.844 (86.968)	lr 0.00483
Train [86][790/3239]	Time 0.259 (0.651)	Data Time 0.001 (0.052)	Loss 2.2761 (2.3048)	Entropy 0.87394 (0.87460)	Top-1 acc 69.922 (68.821)	Top-5 acc 88.672 (86.971)	lr 0.00483
Train [86][800/3239]	Time 0.257 (0.649)	Data Time 0.001 (0.051)	Loss 2.2812 (2.3043)	Entropy 0.87397 (0.87459)	Top-1 acc 67.188 (68.841)	Top-5 acc 86.719 (86.979)	lr 0.00483
Train [86][810/3239]	Time 0.228 (0.647)	Data Time 0.001 (0.050)	Loss 2.2616 (2.3040)	Entropy 0.87383 (0.87458)	Top-1 acc 70.703 (68.857)	Top-5 acc 86.328 (86.986)	lr 0.00483
Train [86][820/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.050)	Loss 2.3875 (2.3041)	Entropy 0.87380 (0.87457)	Top-1 acc 63.672 (68.851)	Top-5 acc 85.156 (86.980)	lr 0.00482
Train [86][830/3239]	Time 0.344 (0.643)	Data Time 0.001 (0.049)	Loss 2.4727 (2.3044)	Entropy 0.87376 (0.87456)	Top-1 acc 67.578 (68.847)	Top-5 acc 83.203 (86.981)	lr 0.00482
Train [86][840/3239]	Time 0.230 (0.641)	Data Time 0.001 (0.049)	Loss 2.1877 (2.3039)	Entropy 0.87364 (0.87455)	Top-1 acc 71.875 (68.864)	Top-5 acc 89.062 (86.997)	lr 0.00482
Train [86][850/3239]	Time 0.239 (0.639)	Data Time 0.001 (0.048)	Loss 2.2731 (2.3040)	Entropy 0.87369 (0.87454)	Top-1 acc 70.312 (68.860)	Top-5 acc 87.891 (86.997)	lr 0.00482
Train [86][860/3239]	Time 0.211 (0.637)	Data Time 0.001 (0.048)	Loss 2.4001 (2.3041)	Entropy 0.87373 (0.87453)	Top-1 acc 66.797 (68.842)	Top-5 acc 86.328 (86.996)	lr 0.00482
Train [86][870/3239]	Time 0.432 (0.694)	Data Time 0.002 (0.047)	Loss 2.3839 (2.3045)	Entropy 0.87368 (0.87452)	Top-1 acc 66.797 (68.833)	Top-5 acc 84.766 (86.992)	lr 0.00482
Train [86][880/3239]	Time 0.227 (0.693)	Data Time 0.002 (0.047)	Loss 2.3444 (2.3050)	Entropy 0.87371 (0.87451)	Top-1 acc 66.016 (68.818)	Top-5 acc 85.156 (86.983)	lr 0.00482
Train [86][890/3239]	Time 2.455 (0.690)	Data Time 0.002 (0.046)	Loss 2.2505 (2.3046)	Entropy 0.87371 (0.87451)	Top-1 acc 69.922 (68.829)	Top-5 acc 90.234 (86.993)	lr 0.00482
Train [86][900/3239]	Time 0.235 (0.685)	Data Time 0.001 (0.046)	Loss 2.3876 (2.3050)	Entropy 0.87366 (0.87450)	Top-1 acc 66.406 (68.822)	Top-5 acc 85.547 (86.991)	lr 0.00482
Train [86][910/3239]	Time 0.224 (0.683)	Data Time 0.001 (0.045)	Loss 2.3713 (2.3052)	Entropy 0.87365 (0.87449)	Top-1 acc 68.750 (68.810)	Top-5 acc 85.547 (86.988)	lr 0.00482
Train [86][920/3239]	Time 0.308 (0.681)	Data Time 0.001 (0.045)	Loss 2.2707 (2.3051)	Entropy 0.87365 (0.87448)	Top-1 acc 68.359 (68.812)	Top-5 acc 86.719 (86.989)	lr 0.00482
Train [86][930/3239]	Time 0.221 (0.678)	Data Time 0.001 (0.044)	Loss 2.2859 (2.3052)	Entropy 0.87380 (0.87447)	Top-1 acc 71.484 (68.805)	Top-5 acc 84.766 (86.984)	lr 0.00482
Train [86][940/3239]	Time 0.280 (0.676)	Data Time 0.001 (0.044)	Loss 2.2308 (2.3049)	Entropy 0.87377 (0.87446)	Top-1 acc 72.656 (68.811)	Top-5 acc 89.062 (86.991)	lr 0.00482
Train [86][950/3239]	Time 0.229 (0.674)	Data Time 0.001 (0.043)	Loss 2.2433 (2.3047)	Entropy 0.87367 (0.87446)	Top-1 acc 70.703 (68.814)	Top-5 acc 88.281 (86.995)	lr 0.00481
Train [86][960/3239]	Time 0.238 (0.672)	Data Time 0.001 (0.043)	Loss 2.3660 (2.3049)	Entropy 0.87366 (0.87445)	Top-1 acc 66.797 (68.815)	Top-5 acc 86.328 (86.988)	lr 0.00481
Train [86][970/3239]	Time 0.218 (0.670)	Data Time 0.001 (0.042)	Loss 2.2400 (2.3050)	Entropy 0.87363 (0.87444)	Top-1 acc 70.703 (68.818)	Top-5 acc 87.500 (86.980)	lr 0.00481
Train [86][980/3239]	Time 0.223 (0.668)	Data Time 0.001 (0.042)	Loss 2.2577 (2.3044)	Entropy 0.87360 (0.87443)	Top-1 acc 69.141 (68.830)	Top-5 acc 85.156 (86.989)	lr 0.00481
Train [86][990/3239]	Time 0.271 (0.666)	Data Time 0.001 (0.042)	Loss 2.1981 (2.3043)	Entropy 0.87359 (0.87442)	Top-1 acc 74.219 (68.827)	Top-5 acc 88.281 (86.993)	lr 0.00481
Train [86][1000/3239]	Time 2.642 (0.664)	Data Time 0.002 (0.041)	Loss 2.2695 (2.3047)	Entropy 0.87359 (0.87441)	Top-1 acc 69.922 (68.818)	Top-5 acc 85.547 (86.979)	lr 0.00481
Train [86][1010/3239]	Time 0.339 (0.660)	Data Time 0.001 (0.041)	Loss 2.2092 (2.3049)	Entropy 0.87360 (0.87441)	Top-1 acc 69.922 (68.805)	Top-5 acc 89.844 (86.976)	lr 0.00481
Train [86][1020/3239]	Time 0.254 (0.658)	Data Time 0.002 (0.040)	Loss 2.2630 (2.3049)	Entropy 0.87358 (0.87440)	Top-1 acc 69.531 (68.807)	Top-5 acc 87.109 (86.972)	lr 0.00481
Train [86][1030/3239]	Time 0.221 (0.657)	Data Time 0.001 (0.040)	Loss 2.1844 (2.3048)	Entropy 0.87348 (0.87439)	Top-1 acc 71.484 (68.811)	Top-5 acc 89.844 (86.976)	lr 0.00481
Train [86][1040/3239]	Time 0.238 (0.655)	Data Time 0.001 (0.040)	Loss 2.2804 (2.3042)	Entropy 0.87342 (0.87438)	Top-1 acc 67.969 (68.823)	Top-5 acc 87.500 (86.989)	lr 0.00481
Train [86][1050/3239]	Time 0.343 (0.653)	Data Time 0.001 (0.039)	Loss 2.3092 (2.3043)	Entropy 0.87340 (0.87437)	Top-1 acc 65.625 (68.818)	Top-5 acc 86.719 (86.986)	lr 0.00481
Train [86][1060/3239]	Time 0.214 (0.651)	Data Time 0.001 (0.039)	Loss 2.2885 (2.3042)	Entropy 0.87337 (0.87436)	Top-1 acc 68.750 (68.822)	Top-5 acc 87.500 (86.989)	lr 0.00481
Train [86][1070/3239]	Time 0.219 (0.650)	Data Time 0.001 (0.039)	Loss 2.3089 (2.3042)	Entropy 0.87334 (0.87435)	Top-1 acc 68.750 (68.826)	Top-5 acc 87.109 (86.984)	lr 0.00481
Train [86][1080/3239]	Time 0.241 (0.648)	Data Time 0.001 (0.038)	Loss 2.5027 (2.3046)	Entropy 0.87335 (0.87434)	Top-1 acc 64.453 (68.815)	Top-5 acc 85.547 (86.979)	lr 0.00480
Train [86][1090/3239]	Time 0.223 (0.646)	Data Time 0.001 (0.038)	Loss 2.3964 (2.3047)	Entropy 0.87335 (0.87433)	Top-1 acc 66.406 (68.816)	Top-5 acc 85.547 (86.977)	lr 0.00480
Train [86][1100/3239]	Time 0.228 (0.645)	Data Time 0.001 (0.038)	Loss 2.3906 (2.3047)	Entropy 0.87339 (0.87432)	Top-1 acc 65.234 (68.816)	Top-5 acc 84.766 (86.977)	lr 0.00480
Train [86][1110/3239]	Time 2.526 (0.643)	Data Time 0.001 (0.037)	Loss 2.2391 (2.3049)	Entropy 0.87339 (0.87432)	Top-1 acc 71.484 (68.813)	Top-5 acc 89.453 (86.980)	lr 0.00480
Train [86][1120/3239]	Time 0.231 (0.640)	Data Time 0.002 (0.037)	Loss 2.3666 (2.3048)	Entropy 0.87336 (0.87431)	Top-1 acc 66.406 (68.821)	Top-5 acc 87.500 (86.982)	lr 0.00480
Train [86][1130/3239]	Time 0.236 (0.638)	Data Time 0.001 (0.037)	Loss 2.1864 (2.3045)	Entropy 0.87329 (0.87430)	Top-1 acc 73.047 (68.835)	Top-5 acc 89.453 (86.983)	lr 0.00480
Train [86][1140/3239]	Time 0.322 (0.637)	Data Time 0.001 (0.036)	Loss 2.3357 (2.3047)	Entropy 0.87327 (0.87429)	Top-1 acc 71.875 (68.835)	Top-5 acc 85.547 (86.976)	lr 0.00480
Train [86][1150/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.036)	Loss 2.4258 (2.3047)	Entropy 0.87308 (0.87428)	Top-1 acc 71.484 (68.845)	Top-5 acc 84.766 (86.977)	lr 0.00480
Train [86][1160/3239]	Time 0.243 (0.634)	Data Time 0.001 (0.036)	Loss 2.2503 (2.3042)	Entropy 0.87306 (0.87427)	Top-1 acc 70.703 (68.849)	Top-5 acc 88.672 (86.984)	lr 0.00480
Train [86][1170/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.035)	Loss 2.1854 (2.3044)	Entropy 0.87298 (0.87426)	Top-1 acc 72.656 (68.853)	Top-5 acc 90.234 (86.979)	lr 0.00480
Train [86][1180/3239]	Time 0.372 (0.632)	Data Time 0.002 (0.035)	Loss 2.3092 (2.3044)	Entropy 0.87290 (0.87425)	Top-1 acc 67.578 (68.854)	Top-5 acc 88.672 (86.977)	lr 0.00480
Train [86][1190/3239]	Time 0.238 (0.630)	Data Time 0.002 (0.035)	Loss 2.3799 (2.3043)	Entropy 0.87290 (0.87424)	Top-1 acc 66.016 (68.855)	Top-5 acc 87.109 (86.982)	lr 0.00480
Train [86][1200/3239]	Time 0.251 (0.629)	Data Time 0.001 (0.035)	Loss 2.2094 (2.3038)	Entropy 0.87288 (0.87423)	Top-1 acc 69.531 (68.869)	Top-5 acc 90.234 (86.993)	lr 0.00479
Train [86][1210/3239]	Time 0.252 (0.628)	Data Time 0.001 (0.034)	Loss 2.2167 (2.3037)	Entropy 0.87288 (0.87421)	Top-1 acc 69.141 (68.875)	Top-5 acc 88.281 (86.995)	lr 0.00479
Train [86][1220/3239]	Time 2.604 (0.627)	Data Time 0.001 (0.034)	Loss 2.3035 (2.3039)	Entropy 0.87288 (0.87420)	Top-1 acc 67.578 (68.868)	Top-5 acc 88.672 (86.995)	lr 0.00479
Train [86][1230/3239]	Time 0.290 (0.624)	Data Time 0.001 (0.034)	Loss 2.3424 (2.3039)	Entropy 0.87317 (0.87419)	Top-1 acc 66.406 (68.867)	Top-5 acc 85.547 (86.999)	lr 0.00479
Train [86][1240/3239]	Time 0.439 (0.665)	Data Time 0.004 (0.033)	Loss 2.4821 (2.3043)	Entropy 0.87314 (0.87419)	Top-1 acc 63.281 (68.854)	Top-5 acc 82.812 (86.993)	lr 0.00479
Train [86][1250/3239]	Time 0.223 (0.663)	Data Time 0.002 (0.033)	Loss 2.2444 (2.3040)	Entropy 0.87314 (0.87418)	Top-1 acc 70.312 (68.865)	Top-5 acc 85.547 (86.997)	lr 0.00479
Train [86][1260/3239]	Time 0.221 (0.662)	Data Time 0.001 (0.033)	Loss 2.2413 (2.3040)	Entropy 0.87295 (0.87417)	Top-1 acc 68.359 (68.863)	Top-5 acc 88.672 (86.994)	lr 0.00479
Train [86][1270/3239]	Time 0.220 (0.661)	Data Time 0.001 (0.033)	Loss 2.3268 (2.3042)	Entropy 0.87288 (0.87416)	Top-1 acc 67.188 (68.862)	Top-5 acc 87.109 (86.987)	lr 0.00479
Train [86][1280/3239]	Time 0.245 (0.659)	Data Time 0.006 (0.033)	Loss 2.3251 (2.3045)	Entropy 0.87286 (0.87415)	Top-1 acc 67.578 (68.853)	Top-5 acc 85.156 (86.981)	lr 0.00479
Train [86][1290/3239]	Time 0.230 (0.658)	Data Time 0.001 (0.032)	Loss 2.3339 (2.3042)	Entropy 0.87276 (0.87414)	Top-1 acc 70.703 (68.859)	Top-5 acc 86.719 (86.991)	lr 0.00479
Train [86][1300/3239]	Time 0.233 (0.656)	Data Time 0.001 (0.032)	Loss 2.3123 (2.3043)	Entropy 0.87266 (0.87413)	Top-1 acc 68.359 (68.859)	Top-5 acc 88.672 (86.992)	lr 0.00479
Train [86][1310/3239]	Time 0.313 (0.655)	Data Time 0.002 (0.032)	Loss 2.3331 (2.3047)	Entropy 0.87251 (0.87412)	Top-1 acc 69.141 (68.853)	Top-5 acc 87.500 (86.986)	lr 0.00479
Train [86][1320/3239]	Time 0.235 (0.653)	Data Time 0.001 (0.032)	Loss 2.2970 (2.3050)	Entropy 0.87250 (0.87410)	Top-1 acc 68.359 (68.846)	Top-5 acc 86.328 (86.977)	lr 0.00479
Train [86][1330/3239]	Time 2.498 (0.652)	Data Time 0.001 (0.031)	Loss 2.2631 (2.3046)	Entropy 0.87250 (0.87409)	Top-1 acc 70.312 (68.856)	Top-5 acc 87.891 (86.990)	lr 0.00478
Train [86][1340/3239]	Time 0.227 (0.649)	Data Time 0.001 (0.031)	Loss 2.1694 (2.3042)	Entropy 0.87235 (0.87408)	Top-1 acc 73.438 (68.871)	Top-5 acc 88.672 (86.991)	lr 0.00478
Train [86][1350/3239]	Time 0.227 (0.648)	Data Time 0.001 (0.031)	Loss 2.3448 (2.3046)	Entropy 0.87214 (0.87406)	Top-1 acc 70.703 (68.864)	Top-5 acc 86.719 (86.986)	lr 0.00478
Train [86][1360/3239]	Time 0.229 (0.646)	Data Time 0.001 (0.031)	Loss 2.3991 (2.3047)	Entropy 0.87212 (0.87405)	Top-1 acc 65.625 (68.859)	Top-5 acc 83.594 (86.988)	lr 0.00478
Train [86][1370/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.030)	Loss 2.2464 (2.3047)	Entropy 0.87216 (0.87404)	Top-1 acc 67.578 (68.861)	Top-5 acc 90.234 (86.982)	lr 0.00478
Train [86][1380/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.030)	Loss 2.2709 (2.3049)	Entropy 0.87221 (0.87402)	Top-1 acc 71.875 (68.862)	Top-5 acc 87.109 (86.975)	lr 0.00478
Train [86][1390/3239]	Time 0.250 (0.643)	Data Time 0.001 (0.030)	Loss 2.1978 (2.3050)	Entropy 0.87216 (0.87401)	Top-1 acc 72.266 (68.861)	Top-5 acc 89.453 (86.978)	lr 0.00478
Train [86][1400/3239]	Time 0.249 (0.642)	Data Time 0.001 (0.030)	Loss 2.2874 (2.3050)	Entropy 0.87217 (0.87400)	Top-1 acc 70.312 (68.862)	Top-5 acc 86.719 (86.976)	lr 0.00478
Train [86][1410/3239]	Time 0.232 (0.641)	Data Time 0.001 (0.030)	Loss 2.3613 (2.3048)	Entropy 0.87221 (0.87398)	Top-1 acc 67.578 (68.868)	Top-5 acc 85.156 (86.975)	lr 0.00478
Train [86][1420/3239]	Time 0.237 (0.640)	Data Time 0.001 (0.029)	Loss 2.3923 (2.3050)	Entropy 0.87221 (0.87397)	Top-1 acc 69.922 (68.868)	Top-5 acc 85.547 (86.974)	lr 0.00478
Train [86][1430/3239]	Time 0.262 (0.639)	Data Time 0.001 (0.029)	Loss 2.2635 (2.3049)	Entropy 0.87220 (0.87396)	Top-1 acc 70.703 (68.872)	Top-5 acc 89.062 (86.978)	lr 0.00478
Train [86][1440/3239]	Time 2.697 (0.638)	Data Time 0.002 (0.029)	Loss 2.2511 (2.3051)	Entropy 0.87220 (0.87395)	Top-1 acc 71.484 (68.865)	Top-5 acc 88.281 (86.978)	lr 0.00478
Train [86][1450/3239]	Time 0.238 (0.635)	Data Time 0.001 (0.029)	Loss 2.1961 (2.3048)	Entropy 0.87218 (0.87393)	Top-1 acc 69.141 (68.868)	Top-5 acc 90.234 (86.983)	lr 0.00477
Train [86][1460/3239]	Time 0.233 (0.634)	Data Time 0.001 (0.029)	Loss 2.2913 (2.3045)	Entropy 0.87220 (0.87392)	Top-1 acc 70.703 (68.876)	Top-5 acc 87.500 (86.991)	lr 0.00477
Train [86][1470/3239]	Time 0.230 (0.633)	Data Time 0.002 (0.028)	Loss 2.1077 (2.3045)	Entropy 0.87220 (0.87391)	Top-1 acc 72.266 (68.876)	Top-5 acc 92.578 (86.993)	lr 0.00477
Train [86][1480/3239]	Time 0.221 (0.632)	Data Time 0.001 (0.028)	Loss 2.2259 (2.3044)	Entropy 0.87220 (0.87390)	Top-1 acc 69.531 (68.867)	Top-5 acc 89.453 (86.995)	lr 0.00477
Train [86][1490/3239]	Time 0.236 (0.631)	Data Time 0.001 (0.028)	Loss 2.2180 (2.3042)	Entropy 0.87217 (0.87389)	Top-1 acc 69.922 (68.871)	Top-5 acc 90.234 (86.999)	lr 0.00477
Train [86][1500/3239]	Time 0.248 (0.630)	Data Time 0.001 (0.028)	Loss 2.3011 (2.3042)	Entropy 0.87214 (0.87388)	Top-1 acc 70.312 (68.870)	Top-5 acc 86.328 (86.999)	lr 0.00477
Train [86][1510/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.028)	Loss 2.1827 (2.3040)	Entropy 0.87222 (0.87387)	Top-1 acc 69.922 (68.875)	Top-5 acc 90.234 (87.000)	lr 0.00477
Train [86][1520/3239]	Time 0.239 (0.628)	Data Time 0.001 (0.028)	Loss 2.2787 (2.3041)	Entropy 0.87218 (0.87385)	Top-1 acc 69.141 (68.872)	Top-5 acc 86.719 (87.000)	lr 0.00477
Train [86][1530/3239]	Time 0.219 (0.627)	Data Time 0.001 (0.027)	Loss 2.3747 (2.3041)	Entropy 0.87215 (0.87384)	Top-1 acc 69.141 (68.871)	Top-5 acc 83.203 (87.000)	lr 0.00477
Train [86][1540/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.027)	Loss 2.4372 (2.3041)	Entropy 0.87213 (0.87383)	Top-1 acc 65.234 (68.870)	Top-5 acc 82.422 (87.000)	lr 0.00477
Train [86][1550/3239]	Time 2.577 (0.625)	Data Time 0.001 (0.027)	Loss 2.2109 (2.3037)	Entropy 0.87213 (0.87382)	Top-1 acc 69.141 (68.875)	Top-5 acc 87.500 (87.003)	lr 0.00477
Train [86][1560/3239]	Time 0.246 (0.622)	Data Time 0.001 (0.027)	Loss 2.3267 (2.3039)	Entropy 0.87212 (0.87381)	Top-1 acc 68.750 (68.869)	Top-5 acc 85.156 (86.998)	lr 0.00477
Train [86][1570/3239]	Time 0.373 (0.621)	Data Time 0.001 (0.027)	Loss 2.3917 (2.3039)	Entropy 0.87225 (0.87380)	Top-1 acc 68.359 (68.870)	Top-5 acc 85.547 (87.000)	lr 0.00477
Train [86][1580/3239]	Time 0.224 (0.620)	Data Time 0.001 (0.027)	Loss 2.3859 (2.3038)	Entropy 0.87222 (0.87379)	Top-1 acc 64.844 (68.876)	Top-5 acc 84.375 (87.004)	lr 0.00476
Train [86][1590/3239]	Time 0.221 (0.620)	Data Time 0.001 (0.026)	Loss 2.4049 (2.3043)	Entropy 0.87222 (0.87378)	Top-1 acc 67.188 (68.867)	Top-5 acc 85.938 (86.997)	lr 0.00476
Train [86][1600/3239]	Time 0.273 (0.649)	Data Time 0.004 (0.026)	Loss 2.3117 (2.3045)	Entropy 0.87217 (0.87377)	Top-1 acc 71.094 (68.859)	Top-5 acc 86.719 (86.993)	lr 0.00476
Train [86][1610/3239]	Time 0.220 (0.649)	Data Time 0.002 (0.026)	Loss 2.3339 (2.3048)	Entropy 0.87209 (0.87376)	Top-1 acc 69.141 (68.850)	Top-5 acc 84.766 (86.987)	lr 0.00476
Train [86][1620/3239]	Time 0.241 (0.648)	Data Time 0.002 (0.026)	Loss 2.2470 (2.3050)	Entropy 0.87202 (0.87375)	Top-1 acc 69.531 (68.849)	Top-5 acc 87.891 (86.978)	lr 0.00476
Train [86][1630/3239]	Time 0.229 (0.647)	Data Time 0.001 (0.026)	Loss 2.1671 (2.3052)	Entropy 0.87200 (0.87374)	Top-1 acc 73.828 (68.849)	Top-5 acc 88.672 (86.979)	lr 0.00476
Train [86][1640/3239]	Time 0.223 (0.646)	Data Time 0.001 (0.026)	Loss 2.3537 (2.3053)	Entropy 0.87199 (0.87373)	Top-1 acc 69.531 (68.847)	Top-5 acc 85.547 (86.978)	lr 0.00476
Train [86][1650/3239]	Time 0.239 (0.645)	Data Time 0.001 (0.026)	Loss 2.2527 (2.3056)	Entropy 0.87196 (0.87372)	Top-1 acc 67.969 (68.843)	Top-5 acc 87.891 (86.969)	lr 0.00476
Train [86][1660/3239]	Time 2.550 (0.644)	Data Time 0.001 (0.025)	Loss 2.3810 (2.3058)	Entropy 0.87196 (0.87371)	Top-1 acc 66.016 (68.843)	Top-5 acc 86.719 (86.963)	lr 0.00476
Train [86][1670/3239]	Time 0.234 (0.642)	Data Time 0.003 (0.025)	Loss 2.3461 (2.3060)	Entropy 0.87191 (0.87370)	Top-1 acc 67.969 (68.836)	Top-5 acc 85.938 (86.959)	lr 0.00476
Train [86][1680/3239]	Time 0.213 (0.641)	Data Time 0.001 (0.025)	Loss 2.2941 (2.3061)	Entropy 0.87186 (0.87369)	Top-1 acc 70.703 (68.834)	Top-5 acc 85.938 (86.956)	lr 0.00476
Train [86][1690/3239]	Time 0.226 (0.640)	Data Time 0.001 (0.025)	Loss 2.2302 (2.3058)	Entropy 0.87183 (0.87368)	Top-1 acc 70.312 (68.832)	Top-5 acc 87.500 (86.962)	lr 0.00476
Train [86][1700/3239]	Time 0.318 (0.639)	Data Time 0.001 (0.025)	Loss 2.5400 (2.3061)	Entropy 0.87174 (0.87366)	Top-1 acc 60.547 (68.821)	Top-5 acc 81.250 (86.956)	lr 0.00475
Train [86][1710/3239]	Time 0.234 (0.638)	Data Time 0.001 (0.025)	Loss 2.2327 (2.3060)	Entropy 0.87169 (0.87365)	Top-1 acc 69.141 (68.826)	Top-5 acc 87.891 (86.956)	lr 0.00475
Train [86][1720/3239]	Time 0.236 (0.637)	Data Time 0.001 (0.025)	Loss 2.1642 (2.3062)	Entropy 0.87165 (0.87364)	Top-1 acc 73.047 (68.821)	Top-5 acc 89.453 (86.955)	lr 0.00475
Train [86][1730/3239]	Time 0.234 (0.636)	Data Time 0.001 (0.024)	Loss 2.2409 (2.3061)	Entropy 0.87152 (0.87363)	Top-1 acc 67.188 (68.825)	Top-5 acc 88.672 (86.959)	lr 0.00475
Train [86][1740/3239]	Time 0.254 (0.635)	Data Time 0.001 (0.024)	Loss 2.4179 (2.3060)	Entropy 0.87142 (0.87362)	Top-1 acc 67.969 (68.831)	Top-5 acc 87.109 (86.966)	lr 0.00475
Train [86][1750/3239]	Time 0.218 (0.634)	Data Time 0.001 (0.024)	Loss 2.4587 (2.3062)	Entropy 0.87145 (0.87360)	Top-1 acc 60.938 (68.825)	Top-5 acc 84.375 (86.965)	lr 0.00475
Train [86][1760/3239]	Time 0.224 (0.633)	Data Time 0.001 (0.024)	Loss 2.4340 (2.3065)	Entropy 0.87142 (0.87359)	Top-1 acc 65.234 (68.821)	Top-5 acc 86.328 (86.965)	lr 0.00475
Train [86][1770/3239]	Time 2.596 (0.633)	Data Time 0.001 (0.024)	Loss 2.3086 (2.3063)	Entropy 0.87142 (0.87358)	Top-1 acc 67.188 (68.826)	Top-5 acc 85.547 (86.971)	lr 0.00475
Train [86][1780/3239]	Time 0.224 (0.630)	Data Time 0.001 (0.024)	Loss 2.3007 (2.3063)	Entropy 0.87140 (0.87357)	Top-1 acc 70.312 (68.822)	Top-5 acc 87.891 (86.972)	lr 0.00475
Train [86][1790/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.024)	Loss 2.2790 (2.3063)	Entropy 0.87134 (0.87356)	Top-1 acc 68.750 (68.820)	Top-5 acc 87.891 (86.972)	lr 0.00475
Train [86][1800/3239]	Time 0.240 (0.629)	Data Time 0.001 (0.024)	Loss 2.2570 (2.3061)	Entropy 0.87128 (0.87354)	Top-1 acc 69.922 (68.825)	Top-5 acc 88.672 (86.978)	lr 0.00475
Train [86][1810/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.023)	Loss 2.2146 (2.3060)	Entropy 0.87119 (0.87353)	Top-1 acc 70.312 (68.829)	Top-5 acc 89.453 (86.981)	lr 0.00475
Train [86][1820/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.023)	Loss 2.1127 (2.3060)	Entropy 0.87113 (0.87352)	Top-1 acc 71.875 (68.831)	Top-5 acc 91.797 (86.984)	lr 0.00475
Train [86][1830/3239]	Time 0.336 (0.626)	Data Time 0.002 (0.023)	Loss 2.3726 (2.3060)	Entropy 0.87108 (0.87350)	Top-1 acc 62.891 (68.825)	Top-5 acc 87.500 (86.983)	lr 0.00474
Train [86][1840/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.023)	Loss 2.2900 (2.3060)	Entropy 0.87110 (0.87349)	Top-1 acc 68.359 (68.821)	Top-5 acc 87.109 (86.985)	lr 0.00474
Train [86][1850/3239]	Time 0.227 (0.625)	Data Time 0.001 (0.023)	Loss 2.4021 (2.3063)	Entropy 0.87113 (0.87348)	Top-1 acc 65.234 (68.812)	Top-5 acc 87.109 (86.982)	lr 0.00474
Train [86][1860/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.023)	Loss 2.3012 (2.3063)	Entropy 0.87106 (0.87346)	Top-1 acc 69.922 (68.815)	Top-5 acc 86.719 (86.984)	lr 0.00474
Train [86][1870/3239]	Time 0.373 (0.623)	Data Time 0.001 (0.023)	Loss 2.3237 (2.3065)	Entropy 0.87100 (0.87345)	Top-1 acc 67.188 (68.813)	Top-5 acc 88.281 (86.984)	lr 0.00474
Train [86][1880/3239]	Time 2.549 (0.622)	Data Time 0.002 (0.023)	Loss 2.4981 (2.3065)	Entropy 0.87100 (0.87344)	Top-1 acc 64.453 (68.814)	Top-5 acc 84.766 (86.984)	lr 0.00474
Train [86][1890/3239]	Time 0.241 (0.620)	Data Time 0.001 (0.023)	Loss 2.3256 (2.3064)	Entropy 0.87107 (0.87343)	Top-1 acc 69.141 (68.816)	Top-5 acc 87.109 (86.983)	lr 0.00474
Train [86][1900/3239]	Time 0.238 (0.620)	Data Time 0.001 (0.022)	Loss 2.4857 (2.3066)	Entropy 0.87110 (0.87341)	Top-1 acc 65.625 (68.814)	Top-5 acc 83.594 (86.983)	lr 0.00474
Train [86][1910/3239]	Time 0.222 (0.619)	Data Time 0.001 (0.022)	Loss 2.2696 (2.3063)	Entropy 0.87104 (0.87340)	Top-1 acc 69.531 (68.824)	Top-5 acc 86.328 (86.985)	lr 0.00474
Train [86][1920/3239]	Time 0.223 (0.618)	Data Time 0.001 (0.022)	Loss 2.2158 (2.3063)	Entropy 0.87103 (0.87339)	Top-1 acc 71.875 (68.825)	Top-5 acc 87.109 (86.983)	lr 0.00474
Train [86][1930/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.022)	Loss 2.4430 (2.3064)	Entropy 0.87099 (0.87338)	Top-1 acc 66.406 (68.825)	Top-5 acc 84.766 (86.982)	lr 0.00474
Train [86][1940/3239]	Time 0.227 (0.617)	Data Time 0.001 (0.022)	Loss 2.3582 (2.3065)	Entropy 0.87098 (0.87336)	Top-1 acc 67.578 (68.820)	Top-5 acc 85.156 (86.978)	lr 0.00474
Train [86][1950/3239]	Time 0.204 (0.616)	Data Time 0.001 (0.022)	Loss 2.2726 (2.3065)	Entropy 0.87088 (0.87335)	Top-1 acc 69.141 (68.819)	Top-5 acc 85.547 (86.977)	lr 0.00474
Train [86][1960/3239]	Time 0.395 (0.640)	Data Time 0.003 (0.022)	Loss 2.3582 (2.3068)	Entropy 0.87089 (0.87334)	Top-1 acc 66.016 (68.805)	Top-5 acc 84.375 (86.974)	lr 0.00473
Train [86][1970/3239]	Time 0.231 (0.640)	Data Time 0.002 (0.022)	Loss 2.3400 (2.3069)	Entropy 0.87086 (0.87333)	Top-1 acc 64.453 (68.803)	Top-5 acc 85.547 (86.972)	lr 0.00473
Train [86][1980/3239]	Time 0.253 (0.639)	Data Time 0.002 (0.022)	Loss 2.2955 (2.3067)	Entropy 0.87083 (0.87331)	Top-1 acc 70.312 (68.808)	Top-5 acc 85.547 (86.977)	lr 0.00473
Train [86][1990/3239]	Time 2.632 (0.639)	Data Time 0.002 (0.021)	Loss 2.2804 (2.3067)	Entropy 0.87083 (0.87330)	Top-1 acc 68.750 (68.802)	Top-5 acc 85.938 (86.974)	lr 0.00473
Train [86][2000/3239]	Time 0.251 (0.637)	Data Time 0.002 (0.021)	Loss 2.1931 (2.3067)	Entropy 0.87085 (0.87329)	Top-1 acc 72.656 (68.801)	Top-5 acc 88.672 (86.974)	lr 0.00473
Train [86][2010/3239]	Time 0.244 (0.636)	Data Time 0.002 (0.021)	Loss 2.0972 (2.3065)	Entropy 0.87087 (0.87328)	Top-1 acc 74.609 (68.807)	Top-5 acc 90.234 (86.977)	lr 0.00473
Train [86][2020/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.021)	Loss 2.1919 (2.3066)	Entropy 0.87086 (0.87327)	Top-1 acc 73.438 (68.806)	Top-5 acc 89.453 (86.973)	lr 0.00473
Train [86][2030/3239]	Time 0.247 (0.634)	Data Time 0.002 (0.021)	Loss 2.5225 (2.3068)	Entropy 0.87078 (0.87325)	Top-1 acc 59.766 (68.800)	Top-5 acc 82.031 (86.969)	lr 0.00473
Train [86][2040/3239]	Time 0.251 (0.634)	Data Time 0.002 (0.021)	Loss 2.4285 (2.3071)	Entropy 0.87074 (0.87324)	Top-1 acc 66.797 (68.791)	Top-5 acc 83.594 (86.963)	lr 0.00473
Train [86][2050/3239]	Time 0.231 (0.633)	Data Time 0.001 (0.021)	Loss 2.4619 (2.3073)	Entropy 0.87071 (0.87323)	Top-1 acc 67.578 (68.789)	Top-5 acc 82.422 (86.956)	lr 0.00473
Train [86][2060/3239]	Time 0.224 (0.632)	Data Time 0.001 (0.021)	Loss 2.2695 (2.3072)	Entropy 0.87059 (0.87322)	Top-1 acc 70.703 (68.794)	Top-5 acc 85.938 (86.955)	lr 0.00473
Train [86][2070/3239]	Time 0.229 (0.631)	Data Time 0.001 (0.021)	Loss 2.3677 (2.3074)	Entropy 0.87107 (0.87321)	Top-1 acc 64.453 (68.787)	Top-5 acc 85.938 (86.956)	lr 0.00473
Train [86][2080/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.021)	Loss 2.4573 (2.3075)	Entropy 0.87096 (0.87319)	Top-1 acc 61.719 (68.778)	Top-5 acc 83.203 (86.953)	lr 0.00472
Train [86][2090/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.021)	Loss 2.5211 (2.3073)	Entropy 0.87096 (0.87318)	Top-1 acc 64.453 (68.782)	Top-5 acc 83.984 (86.955)	lr 0.00472
Train [86][2100/3239]	Time 2.563 (0.629)	Data Time 0.002 (0.020)	Loss 2.3697 (2.3076)	Entropy 0.87096 (0.87317)	Top-1 acc 68.359 (68.778)	Top-5 acc 83.984 (86.946)	lr 0.00472
Train [86][2110/3239]	Time 0.242 (0.628)	Data Time 0.001 (0.020)	Loss 2.3660 (2.3077)	Entropy 0.87094 (0.87316)	Top-1 acc 69.531 (68.777)	Top-5 acc 86.719 (86.945)	lr 0.00472
Train [86][2120/3239]	Time 0.230 (0.627)	Data Time 0.001 (0.020)	Loss 2.3711 (2.3075)	Entropy 0.87088 (0.87315)	Top-1 acc 69.531 (68.784)	Top-5 acc 85.156 (86.949)	lr 0.00472
Train [86][2130/3239]	Time 0.381 (0.626)	Data Time 0.001 (0.020)	Loss 2.6230 (2.3077)	Entropy 0.87081 (0.87314)	Top-1 acc 64.453 (68.778)	Top-5 acc 81.250 (86.947)	lr 0.00472
Train [86][2140/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.020)	Loss 2.3652 (2.3079)	Entropy 0.87079 (0.87313)	Top-1 acc 67.188 (68.768)	Top-5 acc 85.156 (86.943)	lr 0.00472
Train [86][2150/3239]	Time 0.237 (0.625)	Data Time 0.001 (0.020)	Loss 2.3885 (2.3081)	Entropy 0.87076 (0.87312)	Top-1 acc 64.844 (68.759)	Top-5 acc 86.719 (86.940)	lr 0.00472
Train [86][2160/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.020)	Loss 2.3025 (2.3082)	Entropy 0.87076 (0.87311)	Top-1 acc 69.141 (68.757)	Top-5 acc 86.328 (86.941)	lr 0.00472
Train [86][2170/3239]	Time 0.242 (0.623)	Data Time 0.001 (0.020)	Loss 2.3280 (2.3082)	Entropy 0.87070 (0.87310)	Top-1 acc 72.656 (68.756)	Top-5 acc 86.328 (86.941)	lr 0.00472
Train [86][2180/3239]	Time 0.241 (0.623)	Data Time 0.001 (0.020)	Loss 2.3921 (2.3083)	Entropy 0.87058 (0.87309)	Top-1 acc 67.578 (68.755)	Top-5 acc 84.766 (86.935)	lr 0.00472
Train [86][2190/3239]	Time 0.248 (0.622)	Data Time 0.001 (0.020)	Loss 2.3231 (2.3083)	Entropy 0.87057 (0.87307)	Top-1 acc 66.016 (68.754)	Top-5 acc 85.938 (86.933)	lr 0.00472
Train [86][2200/3239]	Time 0.245 (0.622)	Data Time 0.002 (0.020)	Loss 2.3952 (2.3084)	Entropy 0.87059 (0.87306)	Top-1 acc 66.797 (68.755)	Top-5 acc 86.719 (86.935)	lr 0.00472
Train [86][2210/3239]	Time 2.618 (0.621)	Data Time 0.001 (0.020)	Loss 2.3241 (2.3085)	Entropy 0.87059 (0.87305)	Top-1 acc 69.922 (68.751)	Top-5 acc 88.672 (86.933)	lr 0.00471
Train [86][2220/3239]	Time 0.354 (0.619)	Data Time 0.001 (0.019)	Loss 2.3254 (2.3086)	Entropy 0.87056 (0.87304)	Top-1 acc 69.922 (68.752)	Top-5 acc 88.281 (86.932)	lr 0.00471
Train [86][2230/3239]	Time 0.237 (0.619)	Data Time 0.001 (0.019)	Loss 2.2417 (2.3087)	Entropy 0.87056 (0.87303)	Top-1 acc 69.531 (68.748)	Top-5 acc 89.453 (86.928)	lr 0.00471
Train [86][2240/3239]	Time 0.239 (0.618)	Data Time 0.001 (0.019)	Loss 2.3230 (2.3088)	Entropy 0.87062 (0.87302)	Top-1 acc 67.188 (68.749)	Top-5 acc 86.328 (86.925)	lr 0.00471
Train [86][2250/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.019)	Loss 2.3184 (2.3086)	Entropy 0.87057 (0.87301)	Top-1 acc 67.188 (68.751)	Top-5 acc 86.328 (86.930)	lr 0.00471
Train [86][2260/3239]	Time 0.250 (0.617)	Data Time 0.001 (0.019)	Loss 2.2550 (2.3088)	Entropy 0.87056 (0.87300)	Top-1 acc 69.531 (68.748)	Top-5 acc 87.500 (86.926)	lr 0.00471
Train [86][2270/3239]	Time 0.231 (0.616)	Data Time 0.001 (0.019)	Loss 2.4261 (2.3088)	Entropy 0.87053 (0.87299)	Top-1 acc 64.062 (68.746)	Top-5 acc 85.156 (86.925)	lr 0.00471
Train [86][2280/3239]	Time 0.221 (0.616)	Data Time 0.001 (0.019)	Loss 2.1745 (2.3088)	Entropy 0.87054 (0.87298)	Top-1 acc 70.312 (68.749)	Top-5 acc 88.672 (86.923)	lr 0.00471
Train [86][2290/3239]	Time 0.222 (0.615)	Data Time 0.001 (0.019)	Loss 2.2507 (2.3089)	Entropy 0.87056 (0.87297)	Top-1 acc 71.875 (68.749)	Top-5 acc 85.547 (86.924)	lr 0.00471
Train [86][2300/3239]	Time 0.243 (0.614)	Data Time 0.002 (0.019)	Loss 2.3027 (2.3091)	Entropy 0.87055 (0.87295)	Top-1 acc 66.016 (68.740)	Top-5 acc 89.844 (86.921)	lr 0.00471
Train [86][2310/3239]	Time 0.228 (0.614)	Data Time 0.001 (0.019)	Loss 2.2500 (2.3089)	Entropy 0.87055 (0.87294)	Top-1 acc 69.922 (68.744)	Top-5 acc 89.062 (86.925)	lr 0.00471
Train [86][2320/3239]	Time 56.392 (0.636)	Data Time 0.001 (0.019)	Loss 2.1971 (2.3088)	Entropy 0.87055 (0.87293)	Top-1 acc 69.922 (68.745)	Top-5 acc 89.062 (86.927)	lr 0.00471
Train [86][2330/3239]	Time 0.252 (0.635)	Data Time 0.002 (0.019)	Loss 2.4678 (2.3087)	Entropy 0.87042 (0.87292)	Top-1 acc 63.281 (68.748)	Top-5 acc 85.547 (86.929)	lr 0.00471
Train [86][2340/3239]	Time 0.242 (0.634)	Data Time 0.002 (0.019)	Loss 2.1845 (2.3088)	Entropy 0.87042 (0.87291)	Top-1 acc 70.312 (68.745)	Top-5 acc 89.453 (86.926)	lr 0.00470
Train [86][2350/3239]	Time 0.263 (0.633)	Data Time 0.002 (0.018)	Loss 2.3479 (2.3089)	Entropy 0.87040 (0.87290)	Top-1 acc 66.016 (68.745)	Top-5 acc 85.547 (86.925)	lr 0.00470
Train [86][2360/3239]	Time 0.241 (0.633)	Data Time 0.001 (0.018)	Loss 2.3326 (2.3090)	Entropy 0.87032 (0.87289)	Top-1 acc 67.969 (68.743)	Top-5 acc 85.938 (86.922)	lr 0.00470
Train [86][2370/3239]	Time 0.251 (0.632)	Data Time 0.001 (0.018)	Loss 2.4004 (2.3090)	Entropy 0.87028 (0.87288)	Top-1 acc 66.016 (68.738)	Top-5 acc 85.156 (86.921)	lr 0.00470
Train [86][2380/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.018)	Loss 2.4693 (2.3090)	Entropy 0.87023 (0.87287)	Top-1 acc 60.938 (68.738)	Top-5 acc 86.328 (86.923)	lr 0.00470
Train [86][2390/3239]	Time 0.348 (0.631)	Data Time 0.001 (0.018)	Loss 2.4486 (2.3090)	Entropy 0.87018 (0.87286)	Top-1 acc 66.406 (68.733)	Top-5 acc 82.812 (86.923)	lr 0.00470
Train [86][2400/3239]	Time 0.239 (0.630)	Data Time 0.001 (0.018)	Loss 2.3080 (2.3090)	Entropy 0.87015 (0.87285)	Top-1 acc 67.578 (68.733)	Top-5 acc 87.500 (86.925)	lr 0.00470
Train [86][2410/3239]	Time 0.221 (0.630)	Data Time 0.001 (0.018)	Loss 2.3305 (2.3090)	Entropy 0.87013 (0.87284)	Top-1 acc 70.312 (68.731)	Top-5 acc 86.328 (86.926)	lr 0.00470
Train [86][2420/3239]	Time 0.255 (0.629)	Data Time 0.001 (0.018)	Loss 2.3438 (2.3089)	Entropy 0.87007 (0.87282)	Top-1 acc 67.188 (68.733)	Top-5 acc 87.500 (86.927)	lr 0.00470
Train [86][2430/3239]	Time 2.575 (0.628)	Data Time 0.001 (0.018)	Loss 2.3300 (2.3089)	Entropy 0.87007 (0.87281)	Top-1 acc 67.578 (68.731)	Top-5 acc 86.328 (86.928)	lr 0.00470
Train [86][2440/3239]	Time 0.258 (0.627)	Data Time 0.001 (0.018)	Loss 2.3447 (2.3087)	Entropy 0.86995 (0.87280)	Top-1 acc 67.188 (68.737)	Top-5 acc 84.766 (86.932)	lr 0.00470
Train [86][2450/3239]	Time 0.257 (0.626)	Data Time 0.002 (0.018)	Loss 2.2738 (2.3086)	Entropy 0.86985 (0.87279)	Top-1 acc 69.141 (68.737)	Top-5 acc 88.672 (86.932)	lr 0.00470
Train [86][2460/3239]	Time 0.240 (0.626)	Data Time 0.002 (0.018)	Loss 2.3430 (2.3084)	Entropy 0.86977 (0.87278)	Top-1 acc 68.359 (68.743)	Top-5 acc 86.328 (86.935)	lr 0.00469
Train [86][2470/3239]	Time 0.220 (0.625)	Data Time 0.001 (0.018)	Loss 2.3259 (2.3085)	Entropy 0.86977 (0.87276)	Top-1 acc 67.578 (68.743)	Top-5 acc 87.500 (86.932)	lr 0.00469
Train [86][2480/3239]	Time 0.333 (0.624)	Data Time 0.001 (0.018)	Loss 2.3177 (2.3084)	Entropy 0.86977 (0.87275)	Top-1 acc 67.188 (68.741)	Top-5 acc 86.719 (86.934)	lr 0.00469
Train [86][2490/3239]	Time 0.218 (0.624)	Data Time 0.001 (0.018)	Loss 2.3691 (2.3085)	Entropy 0.86975 (0.87274)	Top-1 acc 66.016 (68.742)	Top-5 acc 86.328 (86.933)	lr 0.00469
Train [86][2500/3239]	Time 0.243 (0.623)	Data Time 0.002 (0.017)	Loss 2.0657 (2.3086)	Entropy 0.86966 (0.87273)	Top-1 acc 73.047 (68.735)	Top-5 acc 91.797 (86.933)	lr 0.00469
Train [86][2510/3239]	Time 0.243 (0.622)	Data Time 0.001 (0.017)	Loss 2.1151 (2.3087)	Entropy 0.86962 (0.87272)	Top-1 acc 71.094 (68.732)	Top-5 acc 92.188 (86.931)	lr 0.00469
Train [86][2520/3239]	Time 0.304 (0.622)	Data Time 0.001 (0.017)	Loss 2.2248 (2.3087)	Entropy 0.86962 (0.87270)	Top-1 acc 69.922 (68.732)	Top-5 acc 88.281 (86.929)	lr 0.00469
Train [86][2530/3239]	Time 0.244 (0.621)	Data Time 0.001 (0.017)	Loss 2.3509 (2.3088)	Entropy 0.86960 (0.87269)	Top-1 acc 65.234 (68.725)	Top-5 acc 87.109 (86.929)	lr 0.00469
Train [86][2540/3239]	Time 2.598 (0.621)	Data Time 0.001 (0.017)	Loss 2.4268 (2.3089)	Entropy 0.86960 (0.87268)	Top-1 acc 68.359 (68.720)	Top-5 acc 85.156 (86.927)	lr 0.00469
Train [86][2550/3239]	Time 0.249 (0.619)	Data Time 0.002 (0.017)	Loss 2.3728 (2.3091)	Entropy 0.86950 (0.87267)	Top-1 acc 66.016 (68.713)	Top-5 acc 84.375 (86.924)	lr 0.00469
Train [86][2560/3239]	Time 0.226 (0.619)	Data Time 0.001 (0.017)	Loss 2.2596 (2.3091)	Entropy 0.86951 (0.87266)	Top-1 acc 70.703 (68.712)	Top-5 acc 88.281 (86.921)	lr 0.00469
Train [86][2570/3239]	Time 0.222 (0.618)	Data Time 0.001 (0.017)	Loss 2.3030 (2.3092)	Entropy 0.86954 (0.87264)	Top-1 acc 71.094 (68.710)	Top-5 acc 85.938 (86.917)	lr 0.00469
Train [86][2580/3239]	Time 0.263 (0.618)	Data Time 0.001 (0.017)	Loss 2.3557 (2.3093)	Entropy 0.86952 (0.87263)	Top-1 acc 67.969 (68.706)	Top-5 acc 83.594 (86.912)	lr 0.00469
Train [86][2590/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.017)	Loss 2.1460 (2.3095)	Entropy 0.86954 (0.87262)	Top-1 acc 72.266 (68.704)	Top-5 acc 88.672 (86.910)	lr 0.00468
Train [86][2600/3239]	Time 0.233 (0.617)	Data Time 0.001 (0.017)	Loss 2.2766 (2.3095)	Entropy 0.86943 (0.87261)	Top-1 acc 70.312 (68.707)	Top-5 acc 86.328 (86.908)	lr 0.00468
Train [86][2610/3239]	Time 0.342 (0.617)	Data Time 0.001 (0.017)	Loss 2.1799 (2.3093)	Entropy 0.86937 (0.87259)	Top-1 acc 73.438 (68.714)	Top-5 acc 87.109 (86.914)	lr 0.00468
Train [86][2620/3239]	Time 0.223 (0.616)	Data Time 0.001 (0.017)	Loss 2.3002 (2.3094)	Entropy 0.86934 (0.87258)	Top-1 acc 70.312 (68.712)	Top-5 acc 86.719 (86.911)	lr 0.00468
Train [86][2630/3239]	Time 0.228 (0.615)	Data Time 0.001 (0.017)	Loss 2.2352 (2.3092)	Entropy 0.86928 (0.87257)	Top-1 acc 71.484 (68.721)	Top-5 acc 88.281 (86.916)	lr 0.00468
Train [86][2640/3239]	Time 0.242 (0.615)	Data Time 0.001 (0.017)	Loss 2.0726 (2.3091)	Entropy 0.86928 (0.87256)	Top-1 acc 75.781 (68.722)	Top-5 acc 92.578 (86.920)	lr 0.00468
Train [86][2650/3239]	Time 0.243 (0.614)	Data Time 0.001 (0.017)	Loss 2.4492 (2.3092)	Entropy 0.86940 (0.87255)	Top-1 acc 69.141 (68.719)	Top-5 acc 84.375 (86.919)	lr 0.00468
Train [86][2660/3239]	Time 0.275 (0.614)	Data Time 0.001 (0.016)	Loss 2.2622 (2.3093)	Entropy 0.86943 (0.87253)	Top-1 acc 67.578 (68.721)	Top-5 acc 88.672 (86.917)	lr 0.00468
Train [86][2670/3239]	Time 0.272 (0.614)	Data Time 0.001 (0.016)	Loss 2.4245 (2.3093)	Entropy 0.86937 (0.87252)	Top-1 acc 67.578 (68.721)	Top-5 acc 82.422 (86.916)	lr 0.00468
Train [86][2680/3239]	Time 0.299 (0.634)	Data Time 0.005 (0.016)	Loss 2.3536 (2.3092)	Entropy 0.86926 (0.87251)	Top-1 acc 65.625 (68.722)	Top-5 acc 85.156 (86.919)	lr 0.00468
Train [86][2690/3239]	Time 0.237 (0.633)	Data Time 0.002 (0.016)	Loss 2.3139 (2.3094)	Entropy 0.86924 (0.87250)	Top-1 acc 69.141 (68.718)	Top-5 acc 87.109 (86.916)	lr 0.00468
Train [86][2700/3239]	Time 0.233 (0.633)	Data Time 0.002 (0.016)	Loss 2.1798 (2.3096)	Entropy 0.86923 (0.87249)	Top-1 acc 73.828 (68.711)	Top-5 acc 88.672 (86.914)	lr 0.00468
Train [86][2710/3239]	Time 0.266 (0.632)	Data Time 0.001 (0.016)	Loss 2.2913 (2.3098)	Entropy 0.86916 (0.87247)	Top-1 acc 69.141 (68.707)	Top-5 acc 87.109 (86.911)	lr 0.00468
Train [86][2720/3239]	Time 0.277 (0.632)	Data Time 0.001 (0.016)	Loss 2.2635 (2.3098)	Entropy 0.86913 (0.87246)	Top-1 acc 69.922 (68.703)	Top-5 acc 88.672 (86.913)	lr 0.00467
Train [86][2730/3239]	Time 0.260 (0.631)	Data Time 0.003 (0.016)	Loss 2.2137 (2.3100)	Entropy 0.86918 (0.87245)	Top-1 acc 66.406 (68.693)	Top-5 acc 91.797 (86.911)	lr 0.00467
Train [86][2740/3239]	Time 0.276 (0.631)	Data Time 0.001 (0.016)	Loss 2.3690 (2.3100)	Entropy 0.86892 (0.87244)	Top-1 acc 66.797 (68.690)	Top-5 acc 85.938 (86.912)	lr 0.00467
Train [86][2750/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.016)	Loss 2.2891 (2.3101)	Entropy 0.86895 (0.87242)	Top-1 acc 68.359 (68.686)	Top-5 acc 86.328 (86.909)	lr 0.00467
Train [86][2760/3239]	Time 0.284 (0.630)	Data Time 0.001 (0.016)	Loss 2.4913 (2.3103)	Entropy 0.86898 (0.87241)	Top-1 acc 64.062 (68.680)	Top-5 acc 82.812 (86.907)	lr 0.00467
Train [86][2770/3239]	Time 0.244 (0.629)	Data Time 0.001 (0.016)	Loss 2.3570 (2.3103)	Entropy 0.86894 (0.87240)	Top-1 acc 66.797 (68.680)	Top-5 acc 86.328 (86.908)	lr 0.00467
Train [86][2780/3239]	Time 0.310 (0.629)	Data Time 0.001 (0.016)	Loss 2.3299 (2.3103)	Entropy 0.86890 (0.87239)	Top-1 acc 65.625 (68.678)	Top-5 acc 86.719 (86.908)	lr 0.00467
Train [86][2790/3239]	Time 0.229 (0.628)	Data Time 0.001 (0.016)	Loss 2.1198 (2.3104)	Entropy 0.86892 (0.87237)	Top-1 acc 75.391 (68.676)	Top-5 acc 89.453 (86.908)	lr 0.00467
Train [86][2800/3239]	Time 0.274 (0.628)	Data Time 0.001 (0.016)	Loss 2.5388 (2.3105)	Entropy 0.86896 (0.87236)	Top-1 acc 62.500 (68.675)	Top-5 acc 81.250 (86.902)	lr 0.00467
Train [86][2810/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.016)	Loss 2.3437 (2.3106)	Entropy 0.86892 (0.87235)	Top-1 acc 64.453 (68.671)	Top-5 acc 89.062 (86.901)	lr 0.00467
Train [86][2820/3239]	Time 0.317 (0.627)	Data Time 0.001 (0.016)	Loss 2.3915 (2.3107)	Entropy 0.86893 (0.87234)	Top-1 acc 62.891 (68.668)	Top-5 acc 85.156 (86.898)	lr 0.00467
Train [86][2830/3239]	Time 0.251 (0.626)	Data Time 0.002 (0.016)	Loss 2.4018 (2.3107)	Entropy 0.86895 (0.87233)	Top-1 acc 68.359 (68.668)	Top-5 acc 85.547 (86.892)	lr 0.00467
Train [86][2840/3239]	Time 0.250 (0.626)	Data Time 0.001 (0.016)	Loss 2.3926 (2.3108)	Entropy 0.86886 (0.87231)	Top-1 acc 63.672 (68.665)	Top-5 acc 83.984 (86.888)	lr 0.00466
Train [86][2850/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.016)	Loss 2.2057 (2.3108)	Entropy 0.86887 (0.87230)	Top-1 acc 73.438 (68.666)	Top-5 acc 88.281 (86.890)	lr 0.00466
Train [86][2860/3239]	Time 0.240 (0.625)	Data Time 0.001 (0.015)	Loss 2.1719 (2.3107)	Entropy 0.86908 (0.87229)	Top-1 acc 72.656 (68.669)	Top-5 acc 88.672 (86.893)	lr 0.00466
Train [86][2870/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.015)	Loss 2.1882 (2.3109)	Entropy 0.86911 (0.87228)	Top-1 acc 70.703 (68.665)	Top-5 acc 89.453 (86.892)	lr 0.00466
Train [86][2880/3239]	Time 0.256 (0.624)	Data Time 0.001 (0.015)	Loss 2.4201 (2.3107)	Entropy 0.86912 (0.87227)	Top-1 acc 65.234 (68.666)	Top-5 acc 85.156 (86.893)	lr 0.00466
Train [86][2890/3239]	Time 0.231 (0.623)	Data Time 0.001 (0.015)	Loss 2.3190 (2.3108)	Entropy 0.86912 (0.87226)	Top-1 acc 69.922 (68.663)	Top-5 acc 88.672 (86.893)	lr 0.00466
Train [86][2900/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.015)	Loss 2.3100 (2.3108)	Entropy 0.86908 (0.87225)	Top-1 acc 68.359 (68.661)	Top-5 acc 87.500 (86.894)	lr 0.00466
Train [86][2910/3239]	Time 0.323 (0.622)	Data Time 0.001 (0.015)	Loss 2.3363 (2.3110)	Entropy 0.86922 (0.87224)	Top-1 acc 67.969 (68.659)	Top-5 acc 83.594 (86.889)	lr 0.00466
Train [86][2920/3239]	Time 0.264 (0.622)	Data Time 0.002 (0.015)	Loss 2.2574 (2.3109)	Entropy 0.86918 (0.87222)	Top-1 acc 67.578 (68.662)	Top-5 acc 90.234 (86.891)	lr 0.00466
Train [86][2930/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.015)	Loss 2.2694 (2.3108)	Entropy 0.86918 (0.87221)	Top-1 acc 70.703 (68.664)	Top-5 acc 85.938 (86.891)	lr 0.00466
Train [86][2940/3239]	Time 0.236 (0.621)	Data Time 0.001 (0.015)	Loss 2.4101 (2.3111)	Entropy 0.86914 (0.87220)	Top-1 acc 67.578 (68.659)	Top-5 acc 86.328 (86.887)	lr 0.00466
Train [86][2950/3239]	Time 0.318 (0.620)	Data Time 0.001 (0.015)	Loss 2.3087 (2.3111)	Entropy 0.86901 (0.87219)	Top-1 acc 70.312 (68.659)	Top-5 acc 86.719 (86.884)	lr 0.00466
Train [86][2960/3239]	Time 0.225 (0.620)	Data Time 0.001 (0.015)	Loss 2.1966 (2.3110)	Entropy 0.86900 (0.87218)	Top-1 acc 71.484 (68.660)	Top-5 acc 89.844 (86.887)	lr 0.00466
Train [86][2970/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.015)	Loss 2.3221 (2.3108)	Entropy 0.86896 (0.87217)	Top-1 acc 70.703 (68.661)	Top-5 acc 86.719 (86.892)	lr 0.00465
Train [86][2980/3239]	Time 0.218 (0.619)	Data Time 0.001 (0.015)	Loss 2.3611 (2.3109)	Entropy 0.86888 (0.87216)	Top-1 acc 69.531 (68.663)	Top-5 acc 86.719 (86.891)	lr 0.00465
Train [86][2990/3239]	Time 0.273 (0.618)	Data Time 0.001 (0.015)	Loss 2.2949 (2.3110)	Entropy 0.86886 (0.87215)	Top-1 acc 67.578 (68.663)	Top-5 acc 88.281 (86.892)	lr 0.00465
Train [86][3000/3239]	Time 0.224 (0.618)	Data Time 0.001 (0.015)	Loss 2.4848 (2.3112)	Entropy 0.86885 (0.87214)	Top-1 acc 66.016 (68.659)	Top-5 acc 84.766 (86.889)	lr 0.00465
Train [86][3010/3239]	Time 0.236 (0.635)	Data Time 0.003 (0.015)	Loss 2.5867 (2.3113)	Entropy 0.86884 (0.87213)	Top-1 acc 59.375 (68.655)	Top-5 acc 81.250 (86.887)	lr 0.00465
Train [86][3020/3239]	Time 0.291 (0.635)	Data Time 0.002 (0.015)	Loss 2.2578 (2.3115)	Entropy 0.86880 (0.87212)	Top-1 acc 69.531 (68.651)	Top-5 acc 87.500 (86.881)	lr 0.00465
Train [86][3030/3239]	Time 0.246 (0.634)	Data Time 0.002 (0.015)	Loss 2.1208 (2.3116)	Entropy 0.86881 (0.87211)	Top-1 acc 71.875 (68.649)	Top-5 acc 89.844 (86.879)	lr 0.00465
Train [86][3040/3239]	Time 0.280 (0.634)	Data Time 0.002 (0.015)	Loss 2.3142 (2.3116)	Entropy 0.86889 (0.87210)	Top-1 acc 67.969 (68.649)	Top-5 acc 86.328 (86.876)	lr 0.00465
Train [86][3050/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.015)	Loss 2.3596 (2.3116)	Entropy 0.86891 (0.87209)	Top-1 acc 68.359 (68.652)	Top-5 acc 86.719 (86.876)	lr 0.00465
Train [86][3060/3239]	Time 0.265 (0.633)	Data Time 0.001 (0.015)	Loss 2.3914 (2.3118)	Entropy 0.86890 (0.87208)	Top-1 acc 67.969 (68.647)	Top-5 acc 83.203 (86.872)	lr 0.00465
Train [86][3070/3239]	Time 0.221 (0.632)	Data Time 0.001 (0.015)	Loss 2.6321 (2.3120)	Entropy 0.86889 (0.87206)	Top-1 acc 60.156 (68.636)	Top-5 acc 80.469 (86.868)	lr 0.00465
Train [86][3080/3239]	Time 0.235 (0.632)	Data Time 0.001 (0.014)	Loss 2.3195 (2.3122)	Entropy 0.86883 (0.87205)	Top-1 acc 68.750 (68.632)	Top-5 acc 90.234 (86.866)	lr 0.00465
Train [86][3090/3239]	Time 0.250 (0.631)	Data Time 0.001 (0.014)	Loss 2.4002 (2.3123)	Entropy 0.86875 (0.87204)	Top-1 acc 64.453 (68.629)	Top-5 acc 84.766 (86.864)	lr 0.00465
Train [86][3100/3239]	Time 0.237 (0.631)	Data Time 0.001 (0.014)	Loss 2.2558 (2.3125)	Entropy 0.86877 (0.87203)	Top-1 acc 69.531 (68.625)	Top-5 acc 86.719 (86.858)	lr 0.00464
Train [86][3110/3239]	Time 0.265 (0.630)	Data Time 0.001 (0.014)	Loss 2.2668 (2.3124)	Entropy 0.86864 (0.87202)	Top-1 acc 69.922 (68.626)	Top-5 acc 88.672 (86.860)	lr 0.00464
Train [86][3120/3239]	Time 0.302 (0.630)	Data Time 0.001 (0.014)	Loss 2.3936 (2.3123)	Entropy 0.86859 (0.87201)	Top-1 acc 66.406 (68.627)	Top-5 acc 85.547 (86.861)	lr 0.00464
Train [86][3130/3239]	Time 0.225 (0.629)	Data Time 0.001 (0.014)	Loss 2.3478 (2.3124)	Entropy 0.86857 (0.87200)	Top-1 acc 69.531 (68.629)	Top-5 acc 87.109 (86.860)	lr 0.00464
Train [86][3140/3239]	Time 0.222 (0.629)	Data Time 0.001 (0.014)	Loss 2.2571 (2.3124)	Entropy 0.86856 (0.87199)	Top-1 acc 71.094 (68.629)	Top-5 acc 86.719 (86.860)	lr 0.00464
Train [86][3150/3239]	Time 0.274 (0.628)	Data Time 0.001 (0.014)	Loss 2.2716 (2.3123)	Entropy 0.86852 (0.87198)	Top-1 acc 71.484 (68.631)	Top-5 acc 86.328 (86.863)	lr 0.00464
Train [86][3160/3239]	Time 0.308 (0.628)	Data Time 0.001 (0.014)	Loss 2.2901 (2.3125)	Entropy 0.86847 (0.87197)	Top-1 acc 69.141 (68.628)	Top-5 acc 86.328 (86.860)	lr 0.00464
Train [86][3170/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.014)	Loss 2.5267 (2.3127)	Entropy 0.86847 (0.87196)	Top-1 acc 64.062 (68.625)	Top-5 acc 82.031 (86.858)	lr 0.00464
Train [86][3180/3239]	Time 0.220 (0.627)	Data Time 0.000 (0.014)	Loss 2.1748 (2.3127)	Entropy 0.86842 (0.87195)	Top-1 acc 71.484 (68.624)	Top-5 acc 89.844 (86.858)	lr 0.00464
Train [86][3190/3239]	Time 0.237 (0.626)	Data Time 0.000 (0.014)	Loss 2.2601 (2.3127)	Entropy 0.86840 (0.87194)	Top-1 acc 68.359 (68.623)	Top-5 acc 88.672 (86.857)	lr 0.00464
Train [86][3200/3239]	Time 0.232 (0.626)	Data Time 0.000 (0.014)	Loss 2.1479 (2.3127)	Entropy 0.86835 (0.87192)	Top-1 acc 73.828 (68.626)	Top-5 acc 91.406 (86.858)	lr 0.00464
Train [86][3210/3239]	Time 0.222 (0.625)	Data Time 0.000 (0.014)	Loss 2.2994 (2.3126)	Entropy 0.86824 (0.87191)	Top-1 acc 70.703 (68.630)	Top-5 acc 90.234 (86.863)	lr 0.00464
Train [86][3220/3239]	Time 0.221 (0.625)	Data Time 0.000 (0.014)	Loss 2.2256 (2.3124)	Entropy 0.86821 (0.87190)	Top-1 acc 72.656 (68.634)	Top-5 acc 89.062 (86.866)	lr 0.00464
Train [86][3230/3239]	Time 0.228 (0.624)	Data Time 0.000 (0.014)	Loss 2.2630 (2.3125)	Entropy 0.86811 (0.87189)	Top-1 acc 69.922 (68.630)	Top-5 acc 85.938 (86.865)	lr 0.00463
Train [86][3239/3239]	Time 2.342 (0.624)	Data Time 0.000 (0.014)	Loss 2.2110 (2.3125)	Entropy 0.86811 (0.87188)	Top-1 acc 71.605 (68.633)	Top-5 acc 91.358 (86.865)	lr 0.00463
==========Valid [86/120]	loss 1.267	top-1 acc 70.757 (70.789)	top-5 acc 89.009	Train top-1 68.633	top-5 86.865	Entropy 0.86811	Latency-None: 0.000ms	Flops: 546.53M
Train [87][0/3239]	Time 39.686 (39.686)	Data Time 38.386 (38.386)	Loss 2.2010 (2.2010)	Entropy 0.86806 (0.86806)	Top-1 acc 70.703 (70.703)	Top-5 acc 91.406 (91.406)	lr 0.00463
Train [87][10/3239]	Time 2.962 (4.328)	Data Time 0.004 (3.632)	Loss 2.4071 (2.2937)	Entropy 0.86806 (0.86806)	Top-1 acc 66.797 (68.963)	Top-5 acc 84.766 (87.038)	lr 0.00463
Train [87][20/3239]	Time 0.227 (2.376)	Data Time 0.001 (1.903)	Loss 2.3723 (2.2988)	Entropy 0.86809 (0.86807)	Top-1 acc 66.797 (69.234)	Top-5 acc 84.766 (86.719)	lr 0.00463
Train [87][30/3239]	Time 0.243 (1.764)	Data Time 0.001 (1.290)	Loss 2.1709 (2.3058)	Entropy 0.86806 (0.86807)	Top-1 acc 72.656 (69.090)	Top-5 acc 90.234 (86.719)	lr 0.00463
Train [87][40/3239]	Time 0.242 (1.452)	Data Time 0.001 (0.975)	Loss 2.2582 (2.2968)	Entropy 0.86808 (0.86807)	Top-1 acc 69.531 (69.379)	Top-5 acc 86.719 (87.062)	lr 0.00463
Train [87][50/3239]	Time 0.226 (1.260)	Data Time 0.001 (0.784)	Loss 2.1404 (2.2912)	Entropy 0.86806 (0.86807)	Top-1 acc 72.656 (69.631)	Top-5 acc 90.234 (87.086)	lr 0.00463
Train [87][60/3239]	Time 0.244 (1.135)	Data Time 0.001 (0.656)	Loss 2.3572 (2.3057)	Entropy 0.86808 (0.86807)	Top-1 acc 66.016 (69.153)	Top-5 acc 88.281 (86.892)	lr 0.00463
Train [87][70/3239]	Time 0.228 (1.044)	Data Time 0.001 (0.564)	Loss 2.1830 (2.2968)	Entropy 0.86813 (0.86808)	Top-1 acc 73.438 (69.273)	Top-5 acc 90.234 (87.027)	lr 0.00463
Train [87][80/3239]	Time 0.223 (0.975)	Data Time 0.001 (0.494)	Loss 2.2797 (2.2965)	Entropy 0.86816 (0.86808)	Top-1 acc 67.188 (69.276)	Top-5 acc 86.328 (86.989)	lr 0.00463
Train [87][90/3239]	Time 0.220 (0.919)	Data Time 0.001 (0.440)	Loss 2.4114 (2.2974)	Entropy 0.86816 (0.86809)	Top-1 acc 62.891 (69.128)	Top-5 acc 87.109 (87.045)	lr 0.00463
Train [87][100/3239]	Time 0.358 (0.877)	Data Time 0.001 (0.397)	Loss 2.3058 (2.2934)	Entropy 0.86817 (0.86810)	Top-1 acc 67.969 (69.226)	Top-5 acc 87.500 (87.160)	lr 0.00463
Train [87][110/3239]	Time 0.276 (1.309)	Data Time 0.004 (0.361)	Loss 2.3163 (2.2891)	Entropy 0.86815 (0.86811)	Top-1 acc 67.969 (69.366)	Top-5 acc 84.766 (87.208)	lr 0.00462
Train [87][120/3239]	Time 3.536 (1.256)	Data Time 0.003 (0.332)	Loss 2.3109 (2.2898)	Entropy 0.86815 (0.86811)	Top-1 acc 69.531 (69.302)	Top-5 acc 85.938 (87.177)	lr 0.00462
Train [87][130/3239]	Time 0.234 (1.178)	Data Time 0.001 (0.306)	Loss 2.4129 (2.2921)	Entropy 0.86811 (0.86811)	Top-1 acc 66.406 (69.144)	Top-5 acc 86.328 (87.187)	lr 0.00462
Train [87][140/3239]	Time 0.372 (1.131)	Data Time 0.002 (0.285)	Loss 2.3243 (2.2932)	Entropy 0.86810 (0.86811)	Top-1 acc 66.797 (69.121)	Top-5 acc 87.891 (87.179)	lr 0.00462
Train [87][150/3239]	Time 0.225 (1.088)	Data Time 0.001 (0.266)	Loss 2.4904 (2.2947)	Entropy 0.86808 (0.86811)	Top-1 acc 67.578 (69.125)	Top-5 acc 83.594 (87.159)	lr 0.00462
Train [87][160/3239]	Time 0.215 (1.051)	Data Time 0.001 (0.250)	Loss 2.2214 (2.2922)	Entropy 0.86817 (0.86811)	Top-1 acc 70.312 (69.187)	Top-5 acc 87.891 (87.219)	lr 0.00462
Train [87][170/3239]	Time 0.242 (1.018)	Data Time 0.001 (0.235)	Loss 2.2759 (2.2926)	Entropy 0.86812 (0.86811)	Top-1 acc 68.750 (69.163)	Top-5 acc 87.891 (87.203)	lr 0.00462
Train [87][180/3239]	Time 0.253 (0.988)	Data Time 0.001 (0.222)	Loss 2.2066 (2.2897)	Entropy 0.86809 (0.86811)	Top-1 acc 69.922 (69.236)	Top-5 acc 87.109 (87.252)	lr 0.00462
Train [87][190/3239]	Time 0.224 (0.963)	Data Time 0.002 (0.211)	Loss 2.3240 (2.2902)	Entropy 0.86808 (0.86811)	Top-1 acc 67.969 (69.259)	Top-5 acc 87.500 (87.234)	lr 0.00462
Train [87][200/3239]	Time 0.262 (0.939)	Data Time 0.001 (0.200)	Loss 3.2192 (2.2948)	Entropy 0.86806 (0.86811)	Top-1 acc 48.438 (69.110)	Top-5 acc 75.391 (87.164)	lr 0.00462
Train [87][210/3239]	Time 0.235 (0.918)	Data Time 0.001 (0.191)	Loss 2.2887 (2.2936)	Entropy 0.86812 (0.86811)	Top-1 acc 69.141 (69.148)	Top-5 acc 85.156 (87.145)	lr 0.00462
Train [87][220/3239]	Time 0.225 (0.898)	Data Time 0.002 (0.182)	Loss 2.1803 (2.2945)	Entropy 0.86813 (0.86811)	Top-1 acc 72.656 (69.107)	Top-5 acc 88.672 (87.132)	lr 0.00462
Train [87][230/3239]	Time 2.615 (0.880)	Data Time 0.001 (0.175)	Loss 2.2572 (2.2931)	Entropy 0.86813 (0.86811)	Top-1 acc 70.312 (69.107)	Top-5 acc 85.938 (87.148)	lr 0.00462
Train [87][240/3239]	Time 0.244 (0.854)	Data Time 0.001 (0.167)	Loss 2.2969 (2.2940)	Entropy 0.86804 (0.86810)	Top-1 acc 72.266 (69.084)	Top-5 acc 88.672 (87.124)	lr 0.00461
Train [87][250/3239]	Time 0.287 (0.839)	Data Time 0.002 (0.161)	Loss 2.3726 (2.2942)	Entropy 0.86803 (0.86810)	Top-1 acc 70.703 (69.113)	Top-5 acc 85.156 (87.089)	lr 0.00461
Train [87][260/3239]	Time 0.251 (0.825)	Data Time 0.001 (0.155)	Loss 2.1846 (2.2934)	Entropy 0.86800 (0.86810)	Top-1 acc 74.219 (69.163)	Top-5 acc 88.281 (87.097)	lr 0.00461
Train [87][270/3239]	Time 0.328 (0.813)	Data Time 0.001 (0.149)	Loss 2.1558 (2.2924)	Entropy 0.86798 (0.86809)	Top-1 acc 72.656 (69.171)	Top-5 acc 88.672 (87.112)	lr 0.00461
Train [87][280/3239]	Time 0.237 (0.801)	Data Time 0.001 (0.144)	Loss 2.1162 (2.2914)	Entropy 0.86793 (0.86809)	Top-1 acc 76.172 (69.184)	Top-5 acc 89.453 (87.134)	lr 0.00461
Train [87][290/3239]	Time 0.231 (0.790)	Data Time 0.001 (0.139)	Loss 2.3674 (2.2923)	Entropy 0.86792 (0.86808)	Top-1 acc 68.359 (69.150)	Top-5 acc 86.328 (87.138)	lr 0.00461
Train [87][300/3239]	Time 0.235 (0.780)	Data Time 0.001 (0.134)	Loss 2.1647 (2.2929)	Entropy 0.86794 (0.86808)	Top-1 acc 72.266 (69.177)	Top-5 acc 88.672 (87.125)	lr 0.00461
Train [87][310/3239]	Time 0.249 (0.769)	Data Time 0.001 (0.130)	Loss 2.1806 (2.2925)	Entropy 0.86789 (0.86807)	Top-1 acc 69.531 (69.177)	Top-5 acc 89.453 (87.119)	lr 0.00461
Train [87][320/3239]	Time 0.254 (0.761)	Data Time 0.001 (0.126)	Loss 2.2807 (2.2930)	Entropy 0.86788 (0.86807)	Top-1 acc 70.312 (69.152)	Top-5 acc 88.281 (87.126)	lr 0.00461
Train [87][330/3239]	Time 0.252 (0.752)	Data Time 0.001 (0.122)	Loss 2.2559 (2.2916)	Entropy 0.86794 (0.86806)	Top-1 acc 72.656 (69.196)	Top-5 acc 87.109 (87.151)	lr 0.00461
Train [87][340/3239]	Time 2.668 (0.744)	Data Time 0.001 (0.119)	Loss 2.2276 (2.2915)	Entropy 0.86794 (0.86806)	Top-1 acc 72.656 (69.192)	Top-5 acc 89.062 (87.162)	lr 0.00461
Train [87][350/3239]	Time 0.245 (0.730)	Data Time 0.001 (0.115)	Loss 2.2300 (2.2918)	Entropy 0.86793 (0.86805)	Top-1 acc 71.094 (69.193)	Top-5 acc 87.891 (87.155)	lr 0.00461
Train [87][360/3239]	Time 0.355 (0.723)	Data Time 0.001 (0.112)	Loss 2.4811 (2.2921)	Entropy 0.86790 (0.86805)	Top-1 acc 64.062 (69.161)	Top-5 acc 85.938 (87.156)	lr 0.00461
Train [87][370/3239]	Time 0.250 (0.717)	Data Time 0.004 (0.109)	Loss 2.2079 (2.2917)	Entropy 0.86791 (0.86805)	Top-1 acc 71.094 (69.180)	Top-5 acc 85.938 (87.154)	lr 0.00460
Train [87][380/3239]	Time 0.241 (0.711)	Data Time 0.001 (0.106)	Loss 2.3430 (2.2925)	Entropy 0.86788 (0.86804)	Top-1 acc 69.922 (69.170)	Top-5 acc 84.766 (87.140)	lr 0.00460
Train [87][390/3239]	Time 0.238 (0.705)	Data Time 0.001 (0.104)	Loss 2.3304 (2.2929)	Entropy 0.86783 (0.86804)	Top-1 acc 71.094 (69.190)	Top-5 acc 84.766 (87.112)	lr 0.00460
Train [87][400/3239]	Time 0.330 (0.699)	Data Time 0.001 (0.101)	Loss 2.2783 (2.2929)	Entropy 0.86788 (0.86803)	Top-1 acc 69.531 (69.183)	Top-5 acc 87.109 (87.122)	lr 0.00460
Train [87][410/3239]	Time 0.223 (0.693)	Data Time 0.001 (0.099)	Loss 2.3966 (2.2930)	Entropy 0.86785 (0.86803)	Top-1 acc 66.797 (69.173)	Top-5 acc 84.766 (87.110)	lr 0.00460
Train [87][420/3239]	Time 0.227 (0.688)	Data Time 0.001 (0.097)	Loss 2.2048 (2.2928)	Entropy 0.86783 (0.86802)	Top-1 acc 72.266 (69.166)	Top-5 acc 88.281 (87.118)	lr 0.00460
Train [87][430/3239]	Time 0.234 (0.683)	Data Time 0.001 (0.094)	Loss 2.4060 (2.2940)	Entropy 0.86784 (0.86802)	Top-1 acc 65.234 (69.124)	Top-5 acc 86.719 (87.104)	lr 0.00460
Train [87][440/3239]	Time 0.221 (0.678)	Data Time 0.001 (0.092)	Loss 2.2086 (2.2941)	Entropy 0.86779 (0.86802)	Top-1 acc 71.875 (69.105)	Top-5 acc 89.453 (87.098)	lr 0.00460
Train [87][450/3239]	Time 2.502 (0.674)	Data Time 0.002 (0.090)	Loss 2.2042 (2.2941)	Entropy 0.86779 (0.86801)	Top-1 acc 73.047 (69.117)	Top-5 acc 89.453 (87.089)	lr 0.00460
Train [87][460/3239]	Time 0.249 (0.664)	Data Time 0.001 (0.088)	Loss 2.3890 (2.2940)	Entropy 0.86778 (0.86801)	Top-1 acc 66.016 (69.136)	Top-5 acc 85.547 (87.096)	lr 0.00460
Train [87][470/3239]	Time 0.224 (0.660)	Data Time 0.001 (0.086)	Loss 2.3521 (2.2942)	Entropy 0.86775 (0.86800)	Top-1 acc 67.578 (69.142)	Top-5 acc 88.672 (87.109)	lr 0.00460
Train [87][480/3239]	Time 0.256 (0.768)	Data Time 0.002 (0.085)	Loss 2.2465 (2.2942)	Entropy 0.86774 (0.86800)	Top-1 acc 71.094 (69.136)	Top-5 acc 89.453 (87.112)	lr 0.00460
Train [87][490/3239]	Time 0.311 (0.763)	Data Time 0.002 (0.083)	Loss 2.2437 (2.2941)	Entropy 0.86767 (0.86799)	Top-1 acc 70.703 (69.133)	Top-5 acc 86.328 (87.104)	lr 0.00460
Train [87][500/3239]	Time 0.233 (0.757)	Data Time 0.002 (0.081)	Loss 2.3737 (2.2948)	Entropy 0.86766 (0.86798)	Top-1 acc 68.359 (69.131)	Top-5 acc 87.109 (87.106)	lr 0.00459
Train [87][510/3239]	Time 0.246 (0.752)	Data Time 0.002 (0.080)	Loss 2.2449 (2.2944)	Entropy 0.86764 (0.86798)	Top-1 acc 72.266 (69.163)	Top-5 acc 87.500 (87.110)	lr 0.00459
Train [87][520/3239]	Time 0.236 (0.747)	Data Time 0.001 (0.078)	Loss 2.3496 (2.2943)	Entropy 0.86763 (0.86797)	Top-1 acc 66.016 (69.164)	Top-5 acc 85.547 (87.106)	lr 0.00459
Train [87][530/3239]	Time 0.247 (0.742)	Data Time 0.001 (0.077)	Loss 2.4804 (2.2943)	Entropy 0.86759 (0.86796)	Top-1 acc 65.234 (69.155)	Top-5 acc 83.203 (87.101)	lr 0.00459
Train [87][540/3239]	Time 0.235 (0.737)	Data Time 0.001 (0.075)	Loss 2.2612 (2.2952)	Entropy 0.86758 (0.86796)	Top-1 acc 71.094 (69.133)	Top-5 acc 89.844 (87.096)	lr 0.00459
Train [87][550/3239]	Time 0.236 (0.732)	Data Time 0.001 (0.074)	Loss 2.3059 (2.2948)	Entropy 0.86761 (0.86795)	Top-1 acc 66.016 (69.119)	Top-5 acc 86.719 (87.105)	lr 0.00459
Train [87][560/3239]	Time 2.544 (0.728)	Data Time 0.003 (0.073)	Loss 2.3797 (2.2953)	Entropy 0.86761 (0.86794)	Top-1 acc 67.188 (69.112)	Top-5 acc 84.766 (87.088)	lr 0.00459
Train [87][570/3239]	Time 0.231 (0.719)	Data Time 0.001 (0.072)	Loss 2.2986 (2.2959)	Entropy 0.86761 (0.86794)	Top-1 acc 71.875 (69.097)	Top-5 acc 85.938 (87.074)	lr 0.00459
Train [87][580/3239]	Time 0.229 (0.715)	Data Time 0.001 (0.070)	Loss 2.4150 (2.2959)	Entropy 0.86757 (0.86793)	Top-1 acc 67.969 (69.108)	Top-5 acc 83.203 (87.085)	lr 0.00459
Train [87][590/3239]	Time 0.219 (0.711)	Data Time 0.001 (0.069)	Loss 2.2624 (2.2955)	Entropy 0.86757 (0.86793)	Top-1 acc 69.141 (69.116)	Top-5 acc 89.062 (87.099)	lr 0.00459
Train [87][600/3239]	Time 0.222 (0.706)	Data Time 0.001 (0.068)	Loss 2.3580 (2.2956)	Entropy 0.86753 (0.86792)	Top-1 acc 67.969 (69.110)	Top-5 acc 86.328 (87.100)	lr 0.00459
Train [87][610/3239]	Time 0.230 (0.703)	Data Time 0.001 (0.067)	Loss 2.3149 (2.2950)	Entropy 0.86749 (0.86791)	Top-1 acc 66.406 (69.123)	Top-5 acc 85.938 (87.110)	lr 0.00459
Train [87][620/3239]	Time 0.375 (0.699)	Data Time 0.001 (0.066)	Loss 2.3934 (2.2951)	Entropy 0.86743 (0.86790)	Top-1 acc 65.625 (69.113)	Top-5 acc 84.766 (87.113)	lr 0.00458
Train [87][630/3239]	Time 0.241 (0.696)	Data Time 0.001 (0.065)	Loss 2.1739 (2.2954)	Entropy 0.86746 (0.86790)	Top-1 acc 71.484 (69.116)	Top-5 acc 89.062 (87.119)	lr 0.00458
Train [87][640/3239]	Time 0.233 (0.692)	Data Time 0.001 (0.064)	Loss 2.3010 (2.2964)	Entropy 0.86738 (0.86789)	Top-1 acc 69.141 (69.099)	Top-5 acc 87.109 (87.094)	lr 0.00458
Train [87][650/3239]	Time 0.240 (0.689)	Data Time 0.001 (0.063)	Loss 2.2945 (2.2962)	Entropy 0.86736 (0.86788)	Top-1 acc 73.828 (69.114)	Top-5 acc 84.766 (87.099)	lr 0.00458
Train [87][660/3239]	Time 0.322 (0.686)	Data Time 0.001 (0.062)	Loss 2.3251 (2.2957)	Entropy 0.86735 (0.86787)	Top-1 acc 64.844 (69.113)	Top-5 acc 86.328 (87.113)	lr 0.00458
Train [87][670/3239]	Time 2.497 (0.683)	Data Time 0.001 (0.061)	Loss 2.3397 (2.2962)	Entropy 0.86735 (0.86787)	Top-1 acc 63.281 (69.102)	Top-5 acc 86.328 (87.107)	lr 0.00458
Train [87][680/3239]	Time 0.238 (0.676)	Data Time 0.001 (0.060)	Loss 2.3428 (2.2961)	Entropy 0.86727 (0.86786)	Top-1 acc 67.578 (69.089)	Top-5 acc 85.938 (87.100)	lr 0.00458
Train [87][690/3239]	Time 0.230 (0.673)	Data Time 0.001 (0.059)	Loss 2.1499 (2.2961)	Entropy 0.86726 (0.86785)	Top-1 acc 67.969 (69.081)	Top-5 acc 92.969 (87.105)	lr 0.00458
Train [87][700/3239]	Time 0.219 (0.670)	Data Time 0.001 (0.059)	Loss 2.2055 (2.2955)	Entropy 0.86727 (0.86784)	Top-1 acc 69.141 (69.079)	Top-5 acc 89.453 (87.122)	lr 0.00458
Train [87][710/3239]	Time 0.255 (0.668)	Data Time 0.002 (0.058)	Loss 2.2774 (2.2960)	Entropy 0.86725 (0.86783)	Top-1 acc 69.922 (69.051)	Top-5 acc 86.328 (87.121)	lr 0.00458
Train [87][720/3239]	Time 0.215 (0.665)	Data Time 0.001 (0.057)	Loss 2.3642 (2.2959)	Entropy 0.86721 (0.86782)	Top-1 acc 65.234 (69.064)	Top-5 acc 86.328 (87.122)	lr 0.00458
Train [87][730/3239]	Time 0.255 (0.663)	Data Time 0.002 (0.056)	Loss 2.3964 (2.2960)	Entropy 0.86718 (0.86782)	Top-1 acc 67.578 (69.068)	Top-5 acc 85.547 (87.123)	lr 0.00458
Train [87][740/3239]	Time 0.283 (0.660)	Data Time 0.001 (0.056)	Loss 2.3034 (2.2959)	Entropy 0.86722 (0.86781)	Top-1 acc 71.094 (69.082)	Top-5 acc 87.500 (87.126)	lr 0.00458
Train [87][750/3239]	Time 0.293 (0.658)	Data Time 0.001 (0.055)	Loss 2.2400 (2.2962)	Entropy 0.86728 (0.86780)	Top-1 acc 71.484 (69.071)	Top-5 acc 87.500 (87.128)	lr 0.00457
Train [87][760/3239]	Time 0.231 (0.655)	Data Time 0.001 (0.054)	Loss 2.2683 (2.2967)	Entropy 0.86724 (0.86779)	Top-1 acc 70.312 (69.077)	Top-5 acc 88.281 (87.114)	lr 0.00457
Train [87][770/3239]	Time 0.244 (0.653)	Data Time 0.001 (0.053)	Loss 2.4205 (2.2965)	Entropy 0.86724 (0.86779)	Top-1 acc 68.750 (69.081)	Top-5 acc 82.812 (87.120)	lr 0.00457
Train [87][780/3239]	Time 2.522 (0.651)	Data Time 0.001 (0.053)	Loss 2.3333 (2.2967)	Entropy 0.86724 (0.86778)	Top-1 acc 68.750 (69.069)	Top-5 acc 86.328 (87.119)	lr 0.00457
Train [87][790/3239]	Time 0.262 (0.645)	Data Time 0.002 (0.052)	Loss 2.3032 (2.2966)	Entropy 0.86722 (0.86777)	Top-1 acc 68.359 (69.071)	Top-5 acc 86.328 (87.118)	lr 0.00457
Train [87][800/3239]	Time 0.239 (0.644)	Data Time 0.001 (0.051)	Loss 2.3112 (2.2969)	Entropy 0.86714 (0.86776)	Top-1 acc 69.141 (69.073)	Top-5 acc 85.547 (87.119)	lr 0.00457
Train [87][810/3239]	Time 0.248 (0.641)	Data Time 0.001 (0.051)	Loss 2.4467 (2.2973)	Entropy 0.86713 (0.86776)	Top-1 acc 68.359 (69.070)	Top-5 acc 83.594 (87.108)	lr 0.00457
Train [87][820/3239]	Time 0.259 (0.639)	Data Time 0.001 (0.050)	Loss 2.3377 (2.2976)	Entropy 0.86714 (0.86775)	Top-1 acc 70.312 (69.057)	Top-5 acc 85.156 (87.092)	lr 0.00457
Train [87][830/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.050)	Loss 2.2914 (2.2972)	Entropy 0.86704 (0.86774)	Top-1 acc 71.875 (69.059)	Top-5 acc 89.453 (87.106)	lr 0.00457
Train [87][840/3239]	Time 0.399 (0.699)	Data Time 0.003 (0.049)	Loss 2.3772 (2.2973)	Entropy 0.86698 (0.86773)	Top-1 acc 67.188 (69.064)	Top-5 acc 86.719 (87.108)	lr 0.00457
Train [87][850/3239]	Time 0.228 (0.697)	Data Time 0.002 (0.049)	Loss 2.2242 (2.2974)	Entropy 0.86698 (0.86772)	Top-1 acc 69.141 (69.053)	Top-5 acc 87.891 (87.100)	lr 0.00457
Train [87][860/3239]	Time 0.229 (0.694)	Data Time 0.002 (0.048)	Loss 2.2949 (2.2977)	Entropy 0.86697 (0.86771)	Top-1 acc 69.531 (69.044)	Top-5 acc 86.719 (87.093)	lr 0.00457
Train [87][870/3239]	Time 0.230 (0.691)	Data Time 0.002 (0.047)	Loss 2.5000 (2.2979)	Entropy 0.86695 (0.86771)	Top-1 acc 60.938 (69.030)	Top-5 acc 83.984 (87.087)	lr 0.00457
Train [87][880/3239]	Time 0.299 (0.689)	Data Time 0.002 (0.047)	Loss 2.2125 (2.2973)	Entropy 0.86694 (0.86770)	Top-1 acc 71.094 (69.042)	Top-5 acc 89.062 (87.095)	lr 0.00456
Train [87][890/3239]	Time 2.490 (0.686)	Data Time 0.001 (0.046)	Loss 2.4417 (2.2974)	Entropy 0.86694 (0.86769)	Top-1 acc 65.234 (69.036)	Top-5 acc 84.375 (87.094)	lr 0.00456
Train [87][900/3239]	Time 0.255 (0.682)	Data Time 0.001 (0.046)	Loss 2.2485 (2.2969)	Entropy 0.86685 (0.86768)	Top-1 acc 70.703 (69.047)	Top-5 acc 87.500 (87.101)	lr 0.00456
Train [87][910/3239]	Time 0.219 (0.679)	Data Time 0.001 (0.045)	Loss 2.2296 (2.2977)	Entropy 0.86685 (0.86767)	Top-1 acc 71.094 (69.028)	Top-5 acc 87.109 (87.085)	lr 0.00456
Train [87][920/3239]	Time 0.353 (0.677)	Data Time 0.001 (0.045)	Loss 2.3456 (2.2975)	Entropy 0.86684 (0.86766)	Top-1 acc 67.969 (69.026)	Top-5 acc 85.938 (87.092)	lr 0.00456
Train [87][930/3239]	Time 0.235 (0.675)	Data Time 0.001 (0.045)	Loss 2.1781 (2.2972)	Entropy 0.86680 (0.86765)	Top-1 acc 73.047 (69.036)	Top-5 acc 87.891 (87.098)	lr 0.00456
Train [87][940/3239]	Time 0.207 (0.673)	Data Time 0.001 (0.044)	Loss 2.1093 (2.2968)	Entropy 0.86671 (0.86764)	Top-1 acc 69.141 (69.038)	Top-5 acc 89.844 (87.112)	lr 0.00456
Train [87][950/3239]	Time 0.237 (0.670)	Data Time 0.001 (0.044)	Loss 2.3548 (2.2967)	Entropy 0.86666 (0.86763)	Top-1 acc 67.969 (69.051)	Top-5 acc 87.500 (87.117)	lr 0.00456
Train [87][960/3239]	Time 0.275 (0.669)	Data Time 0.002 (0.043)	Loss 2.3487 (2.2970)	Entropy 0.86659 (0.86762)	Top-1 acc 68.750 (69.042)	Top-5 acc 85.156 (87.104)	lr 0.00456
Train [87][970/3239]	Time 0.293 (0.668)	Data Time 0.002 (0.043)	Loss 2.2357 (2.2975)	Entropy 0.86665 (0.86761)	Top-1 acc 67.578 (69.039)	Top-5 acc 89.453 (87.096)	lr 0.00456
Train [87][980/3239]	Time 0.272 (0.667)	Data Time 0.002 (0.042)	Loss 2.3614 (2.2975)	Entropy 0.86661 (0.86760)	Top-1 acc 67.578 (69.045)	Top-5 acc 86.328 (87.092)	lr 0.00456
Train [87][990/3239]	Time 0.289 (0.665)	Data Time 0.001 (0.042)	Loss 2.1719 (2.2976)	Entropy 0.86659 (0.86759)	Top-1 acc 74.609 (69.039)	Top-5 acc 88.281 (87.086)	lr 0.00456
Train [87][1000/3239]	Time 2.593 (0.663)	Data Time 0.001 (0.042)	Loss 2.4193 (2.2977)	Entropy 0.86659 (0.86758)	Top-1 acc 64.453 (69.031)	Top-5 acc 85.547 (87.089)	lr 0.00456
Train [87][1010/3239]	Time 0.353 (0.659)	Data Time 0.002 (0.041)	Loss 2.1102 (2.2974)	Entropy 0.86652 (0.86757)	Top-1 acc 75.391 (69.046)	Top-5 acc 90.625 (87.102)	lr 0.00455
Train [87][1020/3239]	Time 0.246 (0.657)	Data Time 0.002 (0.041)	Loss 2.2532 (2.2971)	Entropy 0.86659 (0.86756)	Top-1 acc 67.969 (69.047)	Top-5 acc 89.844 (87.103)	lr 0.00455
Train [87][1030/3239]	Time 0.249 (0.656)	Data Time 0.001 (0.040)	Loss 2.1288 (2.2967)	Entropy 0.86641 (0.86755)	Top-1 acc 72.656 (69.054)	Top-5 acc 89.844 (87.108)	lr 0.00455
Train [87][1040/3239]	Time 0.228 (0.654)	Data Time 0.001 (0.040)	Loss 2.3141 (2.2973)	Entropy 0.86638 (0.86754)	Top-1 acc 68.359 (69.045)	Top-5 acc 85.547 (87.093)	lr 0.00455
Train [87][1050/3239]	Time 0.241 (0.652)	Data Time 0.001 (0.040)	Loss 2.1494 (2.2974)	Entropy 0.86632 (0.86753)	Top-1 acc 73.047 (69.043)	Top-5 acc 89.062 (87.086)	lr 0.00455
Train [87][1060/3239]	Time 0.250 (0.651)	Data Time 0.002 (0.039)	Loss 2.2420 (2.2976)	Entropy 0.86622 (0.86752)	Top-1 acc 69.922 (69.038)	Top-5 acc 86.719 (87.080)	lr 0.00455
Train [87][1070/3239]	Time 0.256 (0.649)	Data Time 0.001 (0.039)	Loss 2.2203 (2.2975)	Entropy 0.86615 (0.86750)	Top-1 acc 71.484 (69.040)	Top-5 acc 89.062 (87.086)	lr 0.00455
Train [87][1080/3239]	Time 0.218 (0.648)	Data Time 0.001 (0.039)	Loss 3.7636 (2.2988)	Entropy 0.86604 (0.86749)	Top-1 acc 39.453 (69.009)	Top-5 acc 66.016 (87.069)	lr 0.00455
Train [87][1090/3239]	Time 0.225 (0.647)	Data Time 0.001 (0.038)	Loss 2.2912 (2.2989)	Entropy 0.86593 (0.86748)	Top-1 acc 62.891 (69.011)	Top-5 acc 87.500 (87.066)	lr 0.00455
Train [87][1100/3239]	Time 0.230 (0.645)	Data Time 0.001 (0.038)	Loss 2.4305 (2.2995)	Entropy 0.86595 (0.86746)	Top-1 acc 66.406 (68.997)	Top-5 acc 83.984 (87.058)	lr 0.00455
Train [87][1110/3239]	Time 2.577 (0.644)	Data Time 0.001 (0.038)	Loss 2.0943 (2.2995)	Entropy 0.86595 (0.86745)	Top-1 acc 77.734 (68.996)	Top-5 acc 87.500 (87.053)	lr 0.00455
Train [87][1120/3239]	Time 0.250 (0.640)	Data Time 0.001 (0.037)	Loss 2.3374 (2.2993)	Entropy 0.86586 (0.86744)	Top-1 acc 68.750 (69.001)	Top-5 acc 85.547 (87.054)	lr 0.00455
Train [87][1130/3239]	Time 0.255 (0.639)	Data Time 0.001 (0.037)	Loss 2.2907 (2.2993)	Entropy 0.86588 (0.86742)	Top-1 acc 70.312 (69.003)	Top-5 acc 85.938 (87.052)	lr 0.00455
Train [87][1140/3239]	Time 0.342 (0.637)	Data Time 0.001 (0.037)	Loss 2.2190 (2.2993)	Entropy 0.86567 (0.86741)	Top-1 acc 74.219 (69.006)	Top-5 acc 89.453 (87.046)	lr 0.00454
Train [87][1150/3239]	Time 0.221 (0.636)	Data Time 0.001 (0.036)	Loss 2.3263 (2.2996)	Entropy 0.86560 (0.86739)	Top-1 acc 67.578 (69.001)	Top-5 acc 85.156 (87.037)	lr 0.00454
Train [87][1160/3239]	Time 0.245 (0.635)	Data Time 0.001 (0.036)	Loss 2.2321 (2.2994)	Entropy 0.86559 (0.86738)	Top-1 acc 71.484 (69.012)	Top-5 acc 87.500 (87.041)	lr 0.00454
Train [87][1170/3239]	Time 0.222 (0.633)	Data Time 0.001 (0.036)	Loss 2.2859 (2.2994)	Entropy 0.86557 (0.86736)	Top-1 acc 67.969 (69.000)	Top-5 acc 86.719 (87.042)	lr 0.00454
Train [87][1180/3239]	Time 0.344 (0.632)	Data Time 0.002 (0.035)	Loss 2.3209 (2.2994)	Entropy 0.86554 (0.86735)	Top-1 acc 70.312 (69.002)	Top-5 acc 85.156 (87.037)	lr 0.00454
Train [87][1190/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.035)	Loss 2.1252 (2.2993)	Entropy 0.86547 (0.86733)	Top-1 acc 72.266 (69.005)	Top-5 acc 92.188 (87.040)	lr 0.00454
Train [87][1200/3239]	Time 0.376 (0.671)	Data Time 0.002 (0.035)	Loss 2.3317 (2.2993)	Entropy 0.86550 (0.86731)	Top-1 acc 68.750 (69.003)	Top-5 acc 86.328 (87.037)	lr 0.00454
Train [87][1210/3239]	Time 0.215 (0.671)	Data Time 0.002 (0.035)	Loss 2.3855 (2.2993)	Entropy 0.86553 (0.86730)	Top-1 acc 67.188 (69.000)	Top-5 acc 82.422 (87.032)	lr 0.00454
Train [87][1220/3239]	Time 2.735 (0.669)	Data Time 0.002 (0.034)	Loss 2.3947 (2.2996)	Entropy 0.86553 (0.86729)	Top-1 acc 64.453 (69.001)	Top-5 acc 84.766 (87.028)	lr 0.00454
Train [87][1230/3239]	Time 0.245 (0.666)	Data Time 0.002 (0.034)	Loss 2.2526 (2.2996)	Entropy 0.86548 (0.86727)	Top-1 acc 70.312 (69.002)	Top-5 acc 87.109 (87.028)	lr 0.00454
Train [87][1240/3239]	Time 0.236 (0.664)	Data Time 0.001 (0.034)	Loss 2.3017 (2.2998)	Entropy 0.86547 (0.86726)	Top-1 acc 70.312 (69.004)	Top-5 acc 87.109 (87.025)	lr 0.00454
Train [87][1250/3239]	Time 0.229 (0.663)	Data Time 0.001 (0.034)	Loss 2.2452 (2.2998)	Entropy 0.86537 (0.86724)	Top-1 acc 69.922 (69.004)	Top-5 acc 89.062 (87.028)	lr 0.00454
Train [87][1260/3239]	Time 0.243 (0.662)	Data Time 0.001 (0.033)	Loss 2.3806 (2.2999)	Entropy 0.86525 (0.86723)	Top-1 acc 64.062 (69.002)	Top-5 acc 87.500 (87.029)	lr 0.00454
Train [87][1270/3239]	Time 0.241 (0.660)	Data Time 0.001 (0.033)	Loss 2.2991 (2.2995)	Entropy 0.86518 (0.86721)	Top-1 acc 69.531 (69.007)	Top-5 acc 88.281 (87.036)	lr 0.00453
Train [87][1280/3239]	Time 0.227 (0.659)	Data Time 0.001 (0.033)	Loss 2.2894 (2.2994)	Entropy 0.86509 (0.86719)	Top-1 acc 70.312 (69.015)	Top-5 acc 87.500 (87.034)	lr 0.00453
Train [87][1290/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.033)	Loss 2.3544 (2.2994)	Entropy 0.86506 (0.86718)	Top-1 acc 67.578 (69.021)	Top-5 acc 87.500 (87.032)	lr 0.00453
Train [87][1300/3239]	Time 0.220 (0.656)	Data Time 0.001 (0.032)	Loss 2.3792 (2.2992)	Entropy 0.86508 (0.86716)	Top-1 acc 64.844 (69.023)	Top-5 acc 85.938 (87.034)	lr 0.00453
Train [87][1310/3239]	Time 0.218 (0.654)	Data Time 0.001 (0.032)	Loss 2.2925 (2.2993)	Entropy 0.86514 (0.86715)	Top-1 acc 70.312 (69.025)	Top-5 acc 87.891 (87.031)	lr 0.00453
Train [87][1320/3239]	Time 0.271 (0.653)	Data Time 0.002 (0.032)	Loss 2.1816 (2.2990)	Entropy 0.86519 (0.86713)	Top-1 acc 75.391 (69.034)	Top-5 acc 88.672 (87.036)	lr 0.00453
Train [87][1330/3239]	Time 2.491 (0.652)	Data Time 0.001 (0.032)	Loss 2.1246 (2.2989)	Entropy 0.86519 (0.86712)	Top-1 acc 77.344 (69.039)	Top-5 acc 88.672 (87.035)	lr 0.00453
Train [87][1340/3239]	Time 0.236 (0.649)	Data Time 0.001 (0.031)	Loss 2.1877 (2.2990)	Entropy 0.86531 (0.86710)	Top-1 acc 70.703 (69.040)	Top-5 acc 88.281 (87.038)	lr 0.00453
Train [87][1350/3239]	Time 0.245 (0.648)	Data Time 0.001 (0.031)	Loss 2.2506 (2.2988)	Entropy 0.86524 (0.86709)	Top-1 acc 71.484 (69.046)	Top-5 acc 87.891 (87.039)	lr 0.00453
Train [87][1360/3239]	Time 0.225 (0.646)	Data Time 0.001 (0.031)	Loss 2.3437 (2.2985)	Entropy 0.86526 (0.86707)	Top-1 acc 68.750 (69.057)	Top-5 acc 88.281 (87.048)	lr 0.00453
Train [87][1370/3239]	Time 0.224 (0.645)	Data Time 0.001 (0.031)	Loss 2.2910 (2.2985)	Entropy 0.86523 (0.86706)	Top-1 acc 67.578 (69.050)	Top-5 acc 85.938 (87.052)	lr 0.00453
Train [87][1380/3239]	Time 0.242 (0.644)	Data Time 0.001 (0.031)	Loss 2.1246 (2.2989)	Entropy 0.86524 (0.86705)	Top-1 acc 72.656 (69.035)	Top-5 acc 91.016 (87.045)	lr 0.00453
Train [87][1390/3239]	Time 0.232 (0.643)	Data Time 0.001 (0.030)	Loss 2.3594 (2.2991)	Entropy 0.86519 (0.86703)	Top-1 acc 64.453 (69.021)	Top-5 acc 86.328 (87.040)	lr 0.00452
Train [87][1400/3239]	Time 0.248 (0.642)	Data Time 0.002 (0.030)	Loss 2.3730 (2.2991)	Entropy 0.86513 (0.86702)	Top-1 acc 67.188 (69.020)	Top-5 acc 84.375 (87.038)	lr 0.00452
Train [87][1410/3239]	Time 0.224 (0.641)	Data Time 0.001 (0.030)	Loss 2.1756 (2.2989)	Entropy 0.86509 (0.86701)	Top-1 acc 71.484 (69.022)	Top-5 acc 89.062 (87.039)	lr 0.00452
Train [87][1420/3239]	Time 0.227 (0.639)	Data Time 0.001 (0.030)	Loss 2.2609 (2.2993)	Entropy 0.86503 (0.86699)	Top-1 acc 71.094 (69.013)	Top-5 acc 88.672 (87.030)	lr 0.00452
Train [87][1430/3239]	Time 0.272 (0.638)	Data Time 0.001 (0.030)	Loss 2.4037 (2.2996)	Entropy 0.86500 (0.86698)	Top-1 acc 68.750 (69.004)	Top-5 acc 86.328 (87.020)	lr 0.00452
Train [87][1440/3239]	Time 2.514 (0.637)	Data Time 0.001 (0.029)	Loss 2.3865 (2.2997)	Entropy 0.86500 (0.86697)	Top-1 acc 64.844 (69.008)	Top-5 acc 86.719 (87.016)	lr 0.00452
Train [87][1450/3239]	Time 0.235 (0.635)	Data Time 0.001 (0.029)	Loss 2.2572 (2.2995)	Entropy 0.86484 (0.86695)	Top-1 acc 69.922 (69.011)	Top-5 acc 87.500 (87.016)	lr 0.00452
Train [87][1460/3239]	Time 0.220 (0.633)	Data Time 0.001 (0.029)	Loss 2.2962 (2.3001)	Entropy 0.86485 (0.86694)	Top-1 acc 69.922 (68.993)	Top-5 acc 89.453 (87.009)	lr 0.00452
Train [87][1470/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.029)	Loss 2.1082 (2.2999)	Entropy 0.86482 (0.86692)	Top-1 acc 75.391 (68.996)	Top-5 acc 90.234 (87.016)	lr 0.00452
Train [87][1480/3239]	Time 0.291 (0.632)	Data Time 0.002 (0.029)	Loss 2.3893 (2.3004)	Entropy 0.86476 (0.86691)	Top-1 acc 64.844 (68.979)	Top-5 acc 86.719 (87.008)	lr 0.00452
Train [87][1490/3239]	Time 0.278 (0.631)	Data Time 0.003 (0.028)	Loss 2.5334 (2.3002)	Entropy 0.86473 (0.86689)	Top-1 acc 63.672 (68.985)	Top-5 acc 82.031 (87.008)	lr 0.00452
Train [87][1500/3239]	Time 0.223 (0.629)	Data Time 0.001 (0.028)	Loss 2.2593 (2.3001)	Entropy 0.86469 (0.86688)	Top-1 acc 67.969 (68.987)	Top-5 acc 88.672 (87.009)	lr 0.00452
Train [87][1510/3239]	Time 0.208 (0.628)	Data Time 0.001 (0.028)	Loss 2.2236 (2.3003)	Entropy 0.86469 (0.86687)	Top-1 acc 69.141 (68.975)	Top-5 acc 88.672 (87.006)	lr 0.00452
Train [87][1520/3239]	Time 0.226 (0.627)	Data Time 0.001 (0.028)	Loss 2.1909 (2.3000)	Entropy 0.86469 (0.86685)	Top-1 acc 72.266 (68.973)	Top-5 acc 89.844 (87.017)	lr 0.00451
Train [87][1530/3239]	Time 0.221 (0.627)	Data Time 0.001 (0.028)	Loss 2.8618 (2.3005)	Entropy 0.86465 (0.86684)	Top-1 acc 55.469 (68.961)	Top-5 acc 76.953 (87.006)	lr 0.00451
Train [87][1540/3239]	Time 0.273 (0.626)	Data Time 0.001 (0.028)	Loss 2.2431 (2.3006)	Entropy 0.86469 (0.86682)	Top-1 acc 70.703 (68.959)	Top-5 acc 87.891 (87.006)	lr 0.00451
Train [87][1550/3239]	Time 2.574 (0.625)	Data Time 0.001 (0.027)	Loss 2.4387 (2.3008)	Entropy 0.86469 (0.86681)	Top-1 acc 66.406 (68.952)	Top-5 acc 84.375 (87.005)	lr 0.00451
Train [87][1560/3239]	Time 0.248 (0.622)	Data Time 0.001 (0.027)	Loss 2.1857 (2.3008)	Entropy 0.86471 (0.86680)	Top-1 acc 71.875 (68.956)	Top-5 acc 90.234 (87.002)	lr 0.00451
Train [87][1570/3239]	Time 0.337 (0.655)	Data Time 0.002 (0.027)	Loss 2.4899 (2.3008)	Entropy 0.86472 (0.86678)	Top-1 acc 64.062 (68.954)	Top-5 acc 83.594 (86.998)	lr 0.00451
Train [87][1580/3239]	Time 0.229 (0.654)	Data Time 0.002 (0.027)	Loss 2.2257 (2.3010)	Entropy 0.86471 (0.86677)	Top-1 acc 70.703 (68.951)	Top-5 acc 89.844 (86.996)	lr 0.00451
Train [87][1590/3239]	Time 0.236 (0.653)	Data Time 0.001 (0.027)	Loss 2.3890 (2.3011)	Entropy 0.86474 (0.86676)	Top-1 acc 68.750 (68.949)	Top-5 acc 85.156 (87.000)	lr 0.00451
Train [87][1600/3239]	Time 0.207 (0.652)	Data Time 0.001 (0.027)	Loss 2.3815 (2.3010)	Entropy 0.86473 (0.86674)	Top-1 acc 68.750 (68.953)	Top-5 acc 83.594 (87.003)	lr 0.00451
Train [87][1610/3239]	Time 0.339 (0.651)	Data Time 0.001 (0.026)	Loss 2.3752 (2.3012)	Entropy 0.86469 (0.86673)	Top-1 acc 65.625 (68.945)	Top-5 acc 85.156 (87.003)	lr 0.00451
Train [87][1620/3239]	Time 0.236 (0.650)	Data Time 0.002 (0.026)	Loss 2.3341 (2.3013)	Entropy 0.86473 (0.86672)	Top-1 acc 66.406 (68.939)	Top-5 acc 87.109 (87.003)	lr 0.00451
Train [87][1630/3239]	Time 0.229 (0.649)	Data Time 0.002 (0.026)	Loss 2.3419 (2.3009)	Entropy 0.86471 (0.86671)	Top-1 acc 66.797 (68.948)	Top-5 acc 89.062 (87.008)	lr 0.00451
Train [87][1640/3239]	Time 0.232 (0.648)	Data Time 0.002 (0.026)	Loss 2.4657 (2.3010)	Entropy 0.86463 (0.86669)	Top-1 acc 65.625 (68.947)	Top-5 acc 82.812 (87.005)	lr 0.00451
Train [87][1650/3239]	Time 0.254 (0.647)	Data Time 0.001 (0.026)	Loss 2.3230 (2.3012)	Entropy 0.86461 (0.86668)	Top-1 acc 67.969 (68.942)	Top-5 acc 85.547 (87.002)	lr 0.00450
Train [87][1660/3239]	Time 2.504 (0.646)	Data Time 0.001 (0.026)	Loss 2.3500 (2.3014)	Entropy 0.86461 (0.86667)	Top-1 acc 68.359 (68.935)	Top-5 acc 87.891 (86.999)	lr 0.00450
Train [87][1670/3239]	Time 0.231 (0.643)	Data Time 0.001 (0.026)	Loss 2.3691 (2.3015)	Entropy 0.86461 (0.86666)	Top-1 acc 69.531 (68.935)	Top-5 acc 84.375 (86.997)	lr 0.00450
Train [87][1680/3239]	Time 0.245 (0.642)	Data Time 0.001 (0.025)	Loss 2.2084 (2.3017)	Entropy 0.86461 (0.86664)	Top-1 acc 69.922 (68.932)	Top-5 acc 88.281 (86.992)	lr 0.00450
Train [87][1690/3239]	Time 0.235 (0.642)	Data Time 0.001 (0.025)	Loss 2.3926 (2.3017)	Entropy 0.86454 (0.86663)	Top-1 acc 69.922 (68.936)	Top-5 acc 85.547 (86.991)	lr 0.00450
Train [87][1700/3239]	Time 0.234 (0.641)	Data Time 0.001 (0.025)	Loss 2.2796 (2.3020)	Entropy 0.86448 (0.86662)	Top-1 acc 67.969 (68.928)	Top-5 acc 85.938 (86.984)	lr 0.00450
Train [87][1710/3239]	Time 0.225 (0.640)	Data Time 0.001 (0.025)	Loss 2.3855 (2.3021)	Entropy 0.86445 (0.86661)	Top-1 acc 67.578 (68.923)	Top-5 acc 83.203 (86.980)	lr 0.00450
Train [87][1720/3239]	Time 0.228 (0.639)	Data Time 0.001 (0.025)	Loss 2.3576 (2.3021)	Entropy 0.86443 (0.86659)	Top-1 acc 68.359 (68.924)	Top-5 acc 84.766 (86.980)	lr 0.00450
Train [87][1730/3239]	Time 0.215 (0.638)	Data Time 0.001 (0.025)	Loss 2.3795 (2.3022)	Entropy 0.86437 (0.86658)	Top-1 acc 66.406 (68.926)	Top-5 acc 86.328 (86.980)	lr 0.00450
Train [87][1740/3239]	Time 0.325 (0.637)	Data Time 0.001 (0.025)	Loss 2.3696 (2.3022)	Entropy 0.86436 (0.86657)	Top-1 acc 67.188 (68.929)	Top-5 acc 87.109 (86.981)	lr 0.00450
Train [87][1750/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.024)	Loss 2.2748 (2.3022)	Entropy 0.86429 (0.86656)	Top-1 acc 67.578 (68.930)	Top-5 acc 88.672 (86.982)	lr 0.00450
Train [87][1760/3239]	Time 0.256 (0.635)	Data Time 0.001 (0.024)	Loss 2.3777 (2.3023)	Entropy 0.86427 (0.86654)	Top-1 acc 65.234 (68.925)	Top-5 acc 86.719 (86.978)	lr 0.00450
Train [87][1770/3239]	Time 2.607 (0.634)	Data Time 0.001 (0.024)	Loss 2.3847 (2.3025)	Entropy 0.86427 (0.86653)	Top-1 acc 68.750 (68.919)	Top-5 acc 85.156 (86.975)	lr 0.00450
Train [87][1780/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.024)	Loss 2.1974 (2.3026)	Entropy 0.86429 (0.86652)	Top-1 acc 71.484 (68.916)	Top-5 acc 89.453 (86.970)	lr 0.00449
Train [87][1790/3239]	Time 0.224 (0.631)	Data Time 0.001 (0.024)	Loss 2.2868 (2.3027)	Entropy 0.86423 (0.86651)	Top-1 acc 70.312 (68.916)	Top-5 acc 85.938 (86.969)	lr 0.00449
Train [87][1800/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.024)	Loss 2.4279 (2.3029)	Entropy 0.86416 (0.86649)	Top-1 acc 66.406 (68.907)	Top-5 acc 82.812 (86.965)	lr 0.00449
Train [87][1810/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.024)	Loss 2.3082 (2.3030)	Entropy 0.86410 (0.86648)	Top-1 acc 67.969 (68.902)	Top-5 acc 87.109 (86.962)	lr 0.00449
Train [87][1820/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.024)	Loss 2.2363 (2.3033)	Entropy 0.86410 (0.86647)	Top-1 acc 67.188 (68.892)	Top-5 acc 88.672 (86.957)	lr 0.00449
Train [87][1830/3239]	Time 0.324 (0.628)	Data Time 0.001 (0.023)	Loss 2.4518 (2.3034)	Entropy 0.86407 (0.86645)	Top-1 acc 68.359 (68.894)	Top-5 acc 85.547 (86.956)	lr 0.00449
Train [87][1840/3239]	Time 0.220 (0.627)	Data Time 0.001 (0.023)	Loss 2.3023 (2.3035)	Entropy 0.86399 (0.86644)	Top-1 acc 66.797 (68.893)	Top-5 acc 87.109 (86.956)	lr 0.00449
Train [87][1850/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.023)	Loss 2.2299 (2.3035)	Entropy 0.86389 (0.86643)	Top-1 acc 71.094 (68.892)	Top-5 acc 87.891 (86.954)	lr 0.00449
Train [87][1860/3239]	Time 0.226 (0.625)	Data Time 0.001 (0.023)	Loss 2.1928 (2.3035)	Entropy 0.86389 (0.86641)	Top-1 acc 70.312 (68.894)	Top-5 acc 87.891 (86.951)	lr 0.00449
Train [87][1870/3239]	Time 0.361 (0.625)	Data Time 0.001 (0.023)	Loss 2.3724 (2.3040)	Entropy 0.86387 (0.86640)	Top-1 acc 69.922 (68.882)	Top-5 acc 82.422 (86.940)	lr 0.00449
Train [87][1880/3239]	Time 2.489 (0.624)	Data Time 0.001 (0.023)	Loss 2.4779 (2.3044)	Entropy 0.86387 (0.86639)	Top-1 acc 64.062 (68.872)	Top-5 acc 83.984 (86.929)	lr 0.00449
Train [87][1890/3239]	Time 0.260 (0.622)	Data Time 0.002 (0.023)	Loss 2.2269 (2.3044)	Entropy 0.86383 (0.86637)	Top-1 acc 73.438 (68.873)	Top-5 acc 88.672 (86.930)	lr 0.00449
Train [87][1900/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.023)	Loss 2.2865 (2.3046)	Entropy 0.86363 (0.86636)	Top-1 acc 70.703 (68.871)	Top-5 acc 89.062 (86.926)	lr 0.00449
Train [87][1910/3239]	Time 0.250 (0.620)	Data Time 0.001 (0.023)	Loss 2.4417 (2.3048)	Entropy 0.86356 (0.86634)	Top-1 acc 68.359 (68.862)	Top-5 acc 82.812 (86.923)	lr 0.00448
Train [87][1920/3239]	Time 0.217 (0.620)	Data Time 0.001 (0.022)	Loss 2.4386 (2.3049)	Entropy 0.86351 (0.86633)	Top-1 acc 63.281 (68.861)	Top-5 acc 82.812 (86.923)	lr 0.00448
Train [87][1930/3239]	Time 0.242 (0.646)	Data Time 0.003 (0.022)	Loss 2.3869 (2.3049)	Entropy 0.86339 (0.86631)	Top-1 acc 70.312 (68.857)	Top-5 acc 86.719 (86.921)	lr 0.00448
Train [87][1940/3239]	Time 0.228 (0.645)	Data Time 0.002 (0.022)	Loss 2.3424 (2.3051)	Entropy 0.86337 (0.86630)	Top-1 acc 70.703 (68.855)	Top-5 acc 87.891 (86.920)	lr 0.00448
Train [87][1950/3239]	Time 0.239 (0.644)	Data Time 0.002 (0.022)	Loss 2.5163 (2.3053)	Entropy 0.86335 (0.86628)	Top-1 acc 65.625 (68.853)	Top-5 acc 81.641 (86.918)	lr 0.00448
Train [87][1960/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.022)	Loss 2.3624 (2.3051)	Entropy 0.86316 (0.86627)	Top-1 acc 69.141 (68.857)	Top-5 acc 87.109 (86.921)	lr 0.00448
Train [87][1970/3239]	Time 0.241 (0.643)	Data Time 0.002 (0.022)	Loss 2.1685 (2.3053)	Entropy 0.86307 (0.86625)	Top-1 acc 72.656 (68.849)	Top-5 acc 92.969 (86.922)	lr 0.00448
Train [87][1980/3239]	Time 0.222 (0.642)	Data Time 0.001 (0.022)	Loss 2.2900 (2.3055)	Entropy 0.86304 (0.86624)	Top-1 acc 69.531 (68.843)	Top-5 acc 88.281 (86.922)	lr 0.00448
Train [87][1990/3239]	Time 2.509 (0.641)	Data Time 0.001 (0.022)	Loss 2.3459 (2.3056)	Entropy 0.86304 (0.86622)	Top-1 acc 68.359 (68.840)	Top-5 acc 86.328 (86.916)	lr 0.00448
Train [87][2000/3239]	Time 0.333 (0.639)	Data Time 0.001 (0.022)	Loss 2.4916 (2.3057)	Entropy 0.86298 (0.86620)	Top-1 acc 65.625 (68.841)	Top-5 acc 83.594 (86.915)	lr 0.00448
Train [87][2010/3239]	Time 0.231 (0.638)	Data Time 0.001 (0.021)	Loss 2.3915 (2.3059)	Entropy 0.86299 (0.86619)	Top-1 acc 65.625 (68.834)	Top-5 acc 83.984 (86.915)	lr 0.00448
Train [87][2020/3239]	Time 0.248 (0.638)	Data Time 0.001 (0.021)	Loss 2.4183 (2.3059)	Entropy 0.86296 (0.86617)	Top-1 acc 66.016 (68.831)	Top-5 acc 82.031 (86.909)	lr 0.00448
Train [87][2030/3239]	Time 0.229 (0.637)	Data Time 0.001 (0.021)	Loss 2.3013 (2.3060)	Entropy 0.86296 (0.86616)	Top-1 acc 69.531 (68.834)	Top-5 acc 87.891 (86.907)	lr 0.00448
Train [87][2040/3239]	Time 0.264 (0.636)	Data Time 0.001 (0.021)	Loss 2.3034 (2.3061)	Entropy 0.86295 (0.86614)	Top-1 acc 68.750 (68.827)	Top-5 acc 87.500 (86.910)	lr 0.00447
Train [87][2050/3239]	Time 0.245 (0.635)	Data Time 0.001 (0.021)	Loss 2.3528 (2.3062)	Entropy 0.86297 (0.86613)	Top-1 acc 67.969 (68.823)	Top-5 acc 85.547 (86.908)	lr 0.00447
Train [87][2060/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.021)	Loss 2.2426 (2.3062)	Entropy 0.86294 (0.86611)	Top-1 acc 71.484 (68.821)	Top-5 acc 85.938 (86.907)	lr 0.00447
Train [87][2070/3239]	Time 0.244 (0.634)	Data Time 0.001 (0.021)	Loss 2.2261 (2.3064)	Entropy 0.86292 (0.86610)	Top-1 acc 67.969 (68.816)	Top-5 acc 89.062 (86.902)	lr 0.00447
Train [87][2080/3239]	Time 0.247 (0.633)	Data Time 0.002 (0.021)	Loss 2.4758 (2.3065)	Entropy 0.86291 (0.86608)	Top-1 acc 65.234 (68.810)	Top-5 acc 81.250 (86.898)	lr 0.00447
Train [87][2090/3239]	Time 0.226 (0.632)	Data Time 0.001 (0.021)	Loss 2.4609 (2.3065)	Entropy 0.86288 (0.86606)	Top-1 acc 65.234 (68.810)	Top-5 acc 84.375 (86.902)	lr 0.00447
Train [87][2100/3239]	Time 2.395 (0.632)	Data Time 0.001 (0.021)	Loss 2.3470 (2.3066)	Entropy 0.86288 (0.86605)	Top-1 acc 65.234 (68.804)	Top-5 acc 88.281 (86.903)	lr 0.00447
Train [87][2110/3239]	Time 0.260 (0.630)	Data Time 0.001 (0.021)	Loss 2.2410 (2.3066)	Entropy 0.86282 (0.86603)	Top-1 acc 73.828 (68.804)	Top-5 acc 88.672 (86.904)	lr 0.00447
Train [87][2120/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.020)	Loss 2.3805 (2.3067)	Entropy 0.86270 (0.86602)	Top-1 acc 68.359 (68.802)	Top-5 acc 85.156 (86.903)	lr 0.00447
Train [87][2130/3239]	Time 0.337 (0.628)	Data Time 0.001 (0.020)	Loss 2.3677 (2.3069)	Entropy 0.86271 (0.86600)	Top-1 acc 65.625 (68.798)	Top-5 acc 85.547 (86.901)	lr 0.00447
Train [87][2140/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.020)	Loss 2.0825 (2.3070)	Entropy 0.86271 (0.86599)	Top-1 acc 72.656 (68.793)	Top-5 acc 90.234 (86.897)	lr 0.00447
Train [87][2150/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.020)	Loss 2.3609 (2.3071)	Entropy 0.86270 (0.86597)	Top-1 acc 66.016 (68.793)	Top-5 acc 87.891 (86.897)	lr 0.00447
Train [87][2160/3239]	Time 0.244 (0.626)	Data Time 0.001 (0.020)	Loss 2.1331 (2.3070)	Entropy 0.86267 (0.86596)	Top-1 acc 72.266 (68.794)	Top-5 acc 88.672 (86.899)	lr 0.00447
Train [87][2170/3239]	Time 0.318 (0.626)	Data Time 0.001 (0.020)	Loss 2.4515 (2.3071)	Entropy 0.86263 (0.86594)	Top-1 acc 67.578 (68.790)	Top-5 acc 84.375 (86.895)	lr 0.00446
Train [87][2180/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.020)	Loss 2.3384 (2.3072)	Entropy 0.86263 (0.86593)	Top-1 acc 68.750 (68.785)	Top-5 acc 85.938 (86.895)	lr 0.00446
Train [87][2190/3239]	Time 0.257 (0.624)	Data Time 0.001 (0.020)	Loss 2.4339 (2.3071)	Entropy 0.86256 (0.86591)	Top-1 acc 64.062 (68.788)	Top-5 acc 83.594 (86.899)	lr 0.00446
Train [87][2200/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.020)	Loss 2.1584 (2.3072)	Entropy 0.86271 (0.86590)	Top-1 acc 71.484 (68.783)	Top-5 acc 91.406 (86.899)	lr 0.00446
Train [87][2210/3239]	Time 2.611 (0.623)	Data Time 0.001 (0.020)	Loss 2.4182 (2.3073)	Entropy 0.86271 (0.86588)	Top-1 acc 68.359 (68.783)	Top-5 acc 82.031 (86.896)	lr 0.00446
Train [87][2220/3239]	Time 0.239 (0.621)	Data Time 0.001 (0.020)	Loss 2.1576 (2.3071)	Entropy 0.86266 (0.86587)	Top-1 acc 72.656 (68.786)	Top-5 acc 87.500 (86.898)	lr 0.00446
Train [87][2230/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.020)	Loss 2.1019 (2.3071)	Entropy 0.86265 (0.86585)	Top-1 acc 76.562 (68.789)	Top-5 acc 89.062 (86.895)	lr 0.00446
Train [87][2240/3239]	Time 0.235 (0.620)	Data Time 0.001 (0.019)	Loss 2.3205 (2.3073)	Entropy 0.86265 (0.86584)	Top-1 acc 66.797 (68.783)	Top-5 acc 86.719 (86.892)	lr 0.00446
Train [87][2250/3239]	Time 0.221 (0.619)	Data Time 0.001 (0.019)	Loss 2.1194 (2.3072)	Entropy 0.86259 (0.86582)	Top-1 acc 76.172 (68.790)	Top-5 acc 91.016 (86.895)	lr 0.00446
Train [87][2260/3239]	Time 0.223 (0.619)	Data Time 0.001 (0.019)	Loss 2.3569 (2.3074)	Entropy 0.86261 (0.86581)	Top-1 acc 65.625 (68.786)	Top-5 acc 84.375 (86.892)	lr 0.00446
Train [87][2270/3239]	Time 0.228 (0.618)	Data Time 0.001 (0.019)	Loss 2.1682 (2.3072)	Entropy 0.86253 (0.86580)	Top-1 acc 73.828 (68.789)	Top-5 acc 90.625 (86.895)	lr 0.00446
Train [87][2280/3239]	Time 0.241 (0.617)	Data Time 0.001 (0.019)	Loss 2.2298 (2.3070)	Entropy 0.86247 (0.86578)	Top-1 acc 68.359 (68.794)	Top-5 acc 89.453 (86.903)	lr 0.00446
Train [87][2290/3239]	Time 0.277 (0.638)	Data Time 0.002 (0.019)	Loss 2.4497 (2.3071)	Entropy 0.86247 (0.86577)	Top-1 acc 63.281 (68.791)	Top-5 acc 85.156 (86.900)	lr 0.00446
Train [87][2300/3239]	Time 0.221 (0.638)	Data Time 0.002 (0.019)	Loss 2.2670 (2.3072)	Entropy 0.86244 (0.86575)	Top-1 acc 71.094 (68.789)	Top-5 acc 87.891 (86.899)	lr 0.00445
Train [87][2310/3239]	Time 0.231 (0.637)	Data Time 0.001 (0.019)	Loss 2.3142 (2.3070)	Entropy 0.86231 (0.86574)	Top-1 acc 67.969 (68.789)	Top-5 acc 87.500 (86.902)	lr 0.00445
Train [87][2320/3239]	Time 2.575 (0.637)	Data Time 0.002 (0.019)	Loss 2.2829 (2.3072)	Entropy 0.86231 (0.86572)	Top-1 acc 69.922 (68.786)	Top-5 acc 87.500 (86.897)	lr 0.00445
Train [87][2330/3239]	Time 0.276 (0.635)	Data Time 0.001 (0.019)	Loss 2.3508 (2.3072)	Entropy 0.86231 (0.86571)	Top-1 acc 69.141 (68.784)	Top-5 acc 86.719 (86.897)	lr 0.00445
Train [87][2340/3239]	Time 0.364 (0.634)	Data Time 0.001 (0.019)	Loss 2.3607 (2.3073)	Entropy 0.86226 (0.86569)	Top-1 acc 65.625 (68.785)	Top-5 acc 87.500 (86.896)	lr 0.00445
Train [87][2350/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.019)	Loss 2.2398 (2.3075)	Entropy 0.86232 (0.86568)	Top-1 acc 75.391 (68.785)	Top-5 acc 85.547 (86.892)	lr 0.00445
Train [87][2360/3239]	Time 0.232 (0.633)	Data Time 0.001 (0.019)	Loss 2.2705 (2.3072)	Entropy 0.86228 (0.86567)	Top-1 acc 71.484 (68.792)	Top-5 acc 84.766 (86.894)	lr 0.00445
Train [87][2370/3239]	Time 0.222 (0.632)	Data Time 0.001 (0.018)	Loss 2.3814 (2.3073)	Entropy 0.86226 (0.86565)	Top-1 acc 67.578 (68.789)	Top-5 acc 85.938 (86.892)	lr 0.00445
Train [87][2380/3239]	Time 0.225 (0.632)	Data Time 0.001 (0.018)	Loss 2.2759 (2.3073)	Entropy 0.86220 (0.86564)	Top-1 acc 68.750 (68.791)	Top-5 acc 88.672 (86.896)	lr 0.00445
Train [87][2390/3239]	Time 0.233 (0.631)	Data Time 0.001 (0.018)	Loss 2.3765 (2.3078)	Entropy 0.86213 (0.86562)	Top-1 acc 69.141 (68.781)	Top-5 acc 85.547 (86.889)	lr 0.00445
Train [87][2400/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.018)	Loss 2.4192 (2.3078)	Entropy 0.86209 (0.86561)	Top-1 acc 67.969 (68.781)	Top-5 acc 82.812 (86.888)	lr 0.00445
Train [87][2410/3239]	Time 0.273 (0.630)	Data Time 0.001 (0.018)	Loss 2.4576 (2.3080)	Entropy 0.86202 (0.86559)	Top-1 acc 64.844 (68.776)	Top-5 acc 84.766 (86.885)	lr 0.00445
Train [87][2420/3239]	Time 0.286 (0.629)	Data Time 0.001 (0.018)	Loss 2.4275 (2.3082)	Entropy 0.86190 (0.86558)	Top-1 acc 65.234 (68.767)	Top-5 acc 84.766 (86.883)	lr 0.00445
Train [87][2430/3239]	Time 2.560 (0.629)	Data Time 0.001 (0.018)	Loss 2.1123 (2.3080)	Entropy 0.86190 (0.86556)	Top-1 acc 76.562 (68.771)	Top-5 acc 89.062 (86.884)	lr 0.00444
Train [87][2440/3239]	Time 0.227 (0.627)	Data Time 0.001 (0.018)	Loss 2.1530 (2.3077)	Entropy 0.86183 (0.86555)	Top-1 acc 69.922 (68.779)	Top-5 acc 89.453 (86.888)	lr 0.00444
Train [87][2450/3239]	Time 0.240 (0.626)	Data Time 0.001 (0.018)	Loss 2.3074 (2.3079)	Entropy 0.86184 (0.86553)	Top-1 acc 70.312 (68.772)	Top-5 acc 85.547 (86.882)	lr 0.00444
Train [87][2460/3239]	Time 0.238 (0.626)	Data Time 0.001 (0.018)	Loss 2.2957 (2.3079)	Entropy 0.86187 (0.86552)	Top-1 acc 66.406 (68.770)	Top-5 acc 85.938 (86.881)	lr 0.00444
Train [87][2470/3239]	Time 0.340 (0.625)	Data Time 0.001 (0.018)	Loss 2.1220 (2.3079)	Entropy 0.86180 (0.86550)	Top-1 acc 73.047 (68.771)	Top-5 acc 92.969 (86.881)	lr 0.00444
Train [87][2480/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.018)	Loss 2.3977 (2.3079)	Entropy 0.86175 (0.86549)	Top-1 acc 67.188 (68.771)	Top-5 acc 83.203 (86.881)	lr 0.00444
Train [87][2490/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.018)	Loss 2.4140 (2.3080)	Entropy 0.86174 (0.86547)	Top-1 acc 66.406 (68.769)	Top-5 acc 84.766 (86.881)	lr 0.00444
Train [87][2500/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.018)	Loss 2.1866 (2.3079)	Entropy 0.86164 (0.86546)	Top-1 acc 69.922 (68.770)	Top-5 acc 89.844 (86.882)	lr 0.00444
Train [87][2510/3239]	Time 0.220 (0.623)	Data Time 0.001 (0.018)	Loss 2.3548 (2.3079)	Entropy 0.86165 (0.86544)	Top-1 acc 70.703 (68.768)	Top-5 acc 85.938 (86.881)	lr 0.00444
Train [87][2520/3239]	Time 0.273 (0.622)	Data Time 0.001 (0.017)	Loss 2.2524 (2.3077)	Entropy 0.86156 (0.86543)	Top-1 acc 69.922 (68.773)	Top-5 acc 90.234 (86.885)	lr 0.00444
Train [87][2530/3239]	Time 0.292 (0.622)	Data Time 0.001 (0.017)	Loss 2.2492 (2.3075)	Entropy 0.86153 (0.86541)	Top-1 acc 67.969 (68.780)	Top-5 acc 90.625 (86.888)	lr 0.00444
Train [87][2540/3239]	Time 2.527 (0.621)	Data Time 0.001 (0.017)	Loss 2.3055 (2.3076)	Entropy 0.86153 (0.86540)	Top-1 acc 70.312 (68.774)	Top-5 acc 87.500 (86.886)	lr 0.00444
Train [87][2550/3239]	Time 0.231 (0.620)	Data Time 0.001 (0.017)	Loss 2.3368 (2.3077)	Entropy 0.86149 (0.86538)	Top-1 acc 67.969 (68.774)	Top-5 acc 85.938 (86.886)	lr 0.00444
Train [87][2560/3239]	Time 0.225 (0.619)	Data Time 0.001 (0.017)	Loss 2.4375 (2.3076)	Entropy 0.86146 (0.86537)	Top-1 acc 64.062 (68.779)	Top-5 acc 85.156 (86.889)	lr 0.00443
Train [87][2570/3239]	Time 0.170 (0.619)	Data Time 0.001 (0.017)	Loss 2.1896 (2.3076)	Entropy 0.86141 (0.86535)	Top-1 acc 69.922 (68.776)	Top-5 acc 89.453 (86.888)	lr 0.00443
Train [87][2580/3239]	Time 0.244 (0.618)	Data Time 0.002 (0.017)	Loss 2.1777 (2.3080)	Entropy 0.86130 (0.86534)	Top-1 acc 70.312 (68.767)	Top-5 acc 90.625 (86.884)	lr 0.00443
Train [87][2590/3239]	Time 0.238 (0.617)	Data Time 0.001 (0.017)	Loss 2.6103 (2.3081)	Entropy 0.86119 (0.86532)	Top-1 acc 60.547 (68.762)	Top-5 acc 82.812 (86.881)	lr 0.00443
Train [87][2600/3239]	Time 0.322 (0.617)	Data Time 0.001 (0.017)	Loss 2.2021 (2.3082)	Entropy 0.86118 (0.86530)	Top-1 acc 75.000 (68.758)	Top-5 acc 90.234 (86.883)	lr 0.00443
Train [87][2610/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.017)	Loss 2.3982 (2.3082)	Entropy 0.86114 (0.86529)	Top-1 acc 68.750 (68.760)	Top-5 acc 85.547 (86.882)	lr 0.00443
Train [87][2620/3239]	Time 0.224 (0.616)	Data Time 0.001 (0.017)	Loss 2.3112 (2.3082)	Entropy 0.86109 (0.86527)	Top-1 acc 68.359 (68.761)	Top-5 acc 87.109 (86.881)	lr 0.00443
Train [87][2630/3239]	Time 0.226 (0.615)	Data Time 0.001 (0.017)	Loss 2.3330 (2.3080)	Entropy 0.86110 (0.86526)	Top-1 acc 67.188 (68.765)	Top-5 acc 86.719 (86.883)	lr 0.00443
Train [87][2640/3239]	Time 0.265 (0.615)	Data Time 0.002 (0.017)	Loss 2.1817 (2.3081)	Entropy 0.86107 (0.86524)	Top-1 acc 72.266 (68.761)	Top-5 acc 90.234 (86.882)	lr 0.00443
Train [87][2650/3239]	Time 0.417 (0.633)	Data Time 0.004 (0.017)	Loss 2.2151 (2.3081)	Entropy 0.86107 (0.86522)	Top-1 acc 72.266 (68.760)	Top-5 acc 91.016 (86.878)	lr 0.00443
Train [87][2660/3239]	Time 0.295 (0.633)	Data Time 0.002 (0.017)	Loss 2.2928 (2.3082)	Entropy 0.86103 (0.86521)	Top-1 acc 72.266 (68.760)	Top-5 acc 86.719 (86.879)	lr 0.00443
Train [87][2670/3239]	Time 0.235 (0.633)	Data Time 0.002 (0.017)	Loss 2.2376 (2.3081)	Entropy 0.86098 (0.86519)	Top-1 acc 71.875 (68.761)	Top-5 acc 87.500 (86.882)	lr 0.00443
Train [87][2680/3239]	Time 0.221 (0.632)	Data Time 0.002 (0.017)	Loss 2.3515 (2.3081)	Entropy 0.86100 (0.86518)	Top-1 acc 67.188 (68.761)	Top-5 acc 85.547 (86.882)	lr 0.00442
Train [87][2690/3239]	Time 0.259 (0.631)	Data Time 0.001 (0.017)	Loss 2.3056 (2.3081)	Entropy 0.86096 (0.86516)	Top-1 acc 70.703 (68.762)	Top-5 acc 85.156 (86.881)	lr 0.00442
Train [87][2700/3239]	Time 0.264 (0.631)	Data Time 0.002 (0.016)	Loss 2.2723 (2.3082)	Entropy 0.86097 (0.86515)	Top-1 acc 68.750 (68.759)	Top-5 acc 88.672 (86.881)	lr 0.00442
Train [87][2710/3239]	Time 0.290 (0.630)	Data Time 0.001 (0.016)	Loss 2.2854 (2.3082)	Entropy 0.86084 (0.86513)	Top-1 acc 69.141 (68.761)	Top-5 acc 87.500 (86.883)	lr 0.00442
Train [87][2720/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.016)	Loss 2.3884 (2.3083)	Entropy 0.86082 (0.86511)	Top-1 acc 66.797 (68.757)	Top-5 acc 85.547 (86.879)	lr 0.00442
Train [87][2730/3239]	Time 0.232 (0.629)	Data Time 0.001 (0.016)	Loss 2.2873 (2.3083)	Entropy 0.86086 (0.86510)	Top-1 acc 68.750 (68.759)	Top-5 acc 87.109 (86.882)	lr 0.00442
Train [87][2740/3239]	Time 0.285 (0.629)	Data Time 0.001 (0.016)	Loss 2.2820 (2.3082)	Entropy 0.86084 (0.86508)	Top-1 acc 67.578 (68.760)	Top-5 acc 89.453 (86.884)	lr 0.00442
Train [87][2750/3239]	Time 0.267 (0.628)	Data Time 0.002 (0.016)	Loss 2.3398 (2.3082)	Entropy 0.86082 (0.86507)	Top-1 acc 68.750 (68.762)	Top-5 acc 86.328 (86.886)	lr 0.00442
Train [87][2760/3239]	Time 0.315 (0.628)	Data Time 0.001 (0.016)	Loss 2.3199 (2.3082)	Entropy 0.86080 (0.86505)	Top-1 acc 69.531 (68.763)	Top-5 acc 85.547 (86.884)	lr 0.00442
Train [87][2770/3239]	Time 0.391 (0.627)	Data Time 0.001 (0.016)	Loss 2.1886 (2.3081)	Entropy 0.86083 (0.86504)	Top-1 acc 71.875 (68.764)	Top-5 acc 87.891 (86.887)	lr 0.00442
Train [87][2780/3239]	Time 0.277 (0.627)	Data Time 0.001 (0.016)	Loss 2.3023 (2.3081)	Entropy 0.86077 (0.86502)	Top-1 acc 66.406 (68.761)	Top-5 acc 87.500 (86.887)	lr 0.00442
Train [87][2790/3239]	Time 0.287 (0.626)	Data Time 0.001 (0.016)	Loss 2.3596 (2.3083)	Entropy 0.86077 (0.86501)	Top-1 acc 65.625 (68.758)	Top-5 acc 83.984 (86.883)	lr 0.00442
Train [87][2800/3239]	Time 0.179 (0.625)	Data Time 0.001 (0.016)	Loss 2.2493 (2.3083)	Entropy 0.86079 (0.86499)	Top-1 acc 69.922 (68.760)	Top-5 acc 87.109 (86.883)	lr 0.00442
Train [87][2810/3239]	Time 0.358 (0.625)	Data Time 0.001 (0.016)	Loss 2.2920 (2.3082)	Entropy 0.86080 (0.86498)	Top-1 acc 69.531 (68.763)	Top-5 acc 86.328 (86.885)	lr 0.00441
Train [87][2820/3239]	Time 0.232 (0.625)	Data Time 0.001 (0.016)	Loss 2.4996 (2.3084)	Entropy 0.86076 (0.86496)	Top-1 acc 65.234 (68.763)	Top-5 acc 83.594 (86.880)	lr 0.00441
Train [87][2830/3239]	Time 0.243 (0.624)	Data Time 0.001 (0.016)	Loss 2.2411 (2.3083)	Entropy 0.86063 (0.86495)	Top-1 acc 69.922 (68.763)	Top-5 acc 87.500 (86.880)	lr 0.00441
Train [87][2840/3239]	Time 0.267 (0.623)	Data Time 0.001 (0.016)	Loss 2.2020 (2.3083)	Entropy 0.86055 (0.86493)	Top-1 acc 72.656 (68.765)	Top-5 acc 87.500 (86.879)	lr 0.00441
Train [87][2850/3239]	Time 0.287 (0.623)	Data Time 0.002 (0.016)	Loss 2.2970 (2.3084)	Entropy 0.86052 (0.86492)	Top-1 acc 67.578 (68.762)	Top-5 acc 85.547 (86.878)	lr 0.00441
Train [87][2860/3239]	Time 0.242 (0.622)	Data Time 0.001 (0.016)	Loss 2.1804 (2.3083)	Entropy 0.86059 (0.86490)	Top-1 acc 72.656 (68.767)	Top-5 acc 90.234 (86.882)	lr 0.00441
Train [87][2870/3239]	Time 0.230 (0.622)	Data Time 0.001 (0.016)	Loss 2.3682 (2.3082)	Entropy 0.86049 (0.86489)	Top-1 acc 64.844 (68.764)	Top-5 acc 85.547 (86.886)	lr 0.00441
Train [87][2880/3239]	Time 0.249 (0.621)	Data Time 0.001 (0.016)	Loss 2.3685 (2.3081)	Entropy 0.86048 (0.86487)	Top-1 acc 70.312 (68.767)	Top-5 acc 84.375 (86.888)	lr 0.00441
Train [87][2890/3239]	Time 0.241 (0.621)	Data Time 0.001 (0.015)	Loss 2.1932 (2.3079)	Entropy 0.86045 (0.86486)	Top-1 acc 71.094 (68.770)	Top-5 acc 87.891 (86.890)	lr 0.00441
Train [87][2900/3239]	Time 0.339 (0.621)	Data Time 0.002 (0.015)	Loss 2.3330 (2.3078)	Entropy 0.86044 (0.86484)	Top-1 acc 67.578 (68.771)	Top-5 acc 86.719 (86.896)	lr 0.00441
Train [87][2910/3239]	Time 0.217 (0.620)	Data Time 0.001 (0.015)	Loss 2.2796 (2.3078)	Entropy 0.86044 (0.86483)	Top-1 acc 69.531 (68.768)	Top-5 acc 88.672 (86.893)	lr 0.00441
Train [87][2920/3239]	Time 0.230 (0.620)	Data Time 0.001 (0.015)	Loss 2.2121 (2.3079)	Entropy 0.86039 (0.86481)	Top-1 acc 69.141 (68.764)	Top-5 acc 87.109 (86.892)	lr 0.00441
Train [87][2930/3239]	Time 0.214 (0.619)	Data Time 0.001 (0.015)	Loss 2.3763 (2.3080)	Entropy 0.86034 (0.86480)	Top-1 acc 67.188 (68.758)	Top-5 acc 84.766 (86.889)	lr 0.00441
Train [87][2940/3239]	Time 0.251 (0.619)	Data Time 0.001 (0.015)	Loss 2.3099 (2.3080)	Entropy 0.86036 (0.86478)	Top-1 acc 68.359 (68.759)	Top-5 acc 86.719 (86.892)	lr 0.00440
Train [87][2950/3239]	Time 0.232 (0.618)	Data Time 0.001 (0.015)	Loss 2.3394 (2.3081)	Entropy 0.86037 (0.86477)	Top-1 acc 66.797 (68.757)	Top-5 acc 87.500 (86.891)	lr 0.00440
Train [87][2960/3239]	Time 0.262 (0.618)	Data Time 0.003 (0.015)	Loss 2.2345 (2.3081)	Entropy 0.86036 (0.86475)	Top-1 acc 70.703 (68.755)	Top-5 acc 88.281 (86.891)	lr 0.00440
Train [87][2970/3239]	Time 0.231 (0.617)	Data Time 0.001 (0.015)	Loss 2.3434 (2.3082)	Entropy 0.86033 (0.86474)	Top-1 acc 65.625 (68.756)	Top-5 acc 87.500 (86.890)	lr 0.00440
Train [87][2980/3239]	Time 0.249 (0.635)	Data Time 0.003 (0.015)	Loss 2.2133 (2.3081)	Entropy 0.86035 (0.86472)	Top-1 acc 71.875 (68.759)	Top-5 acc 89.062 (86.891)	lr 0.00440
Train [87][2990/3239]	Time 0.256 (0.635)	Data Time 0.002 (0.015)	Loss 2.6138 (2.3083)	Entropy 0.86027 (0.86471)	Top-1 acc 61.328 (68.754)	Top-5 acc 81.641 (86.888)	lr 0.00440
Train [87][3000/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.015)	Loss 2.3043 (2.3083)	Entropy 0.86023 (0.86469)	Top-1 acc 70.312 (68.755)	Top-5 acc 87.500 (86.888)	lr 0.00440
Train [87][3010/3239]	Time 0.272 (0.634)	Data Time 0.002 (0.015)	Loss 2.2187 (2.3081)	Entropy 0.86015 (0.86468)	Top-1 acc 67.578 (68.758)	Top-5 acc 89.062 (86.892)	lr 0.00440
Train [87][3020/3239]	Time 0.267 (0.633)	Data Time 0.001 (0.015)	Loss 2.2552 (2.3081)	Entropy 0.86019 (0.86466)	Top-1 acc 67.969 (68.759)	Top-5 acc 87.891 (86.891)	lr 0.00440
Train [87][3030/3239]	Time 0.251 (0.633)	Data Time 0.001 (0.015)	Loss 2.2811 (2.3083)	Entropy 0.86013 (0.86465)	Top-1 acc 70.312 (68.754)	Top-5 acc 83.984 (86.887)	lr 0.00440
Train [87][3040/3239]	Time 0.247 (0.632)	Data Time 0.002 (0.015)	Loss 2.3087 (2.3082)	Entropy 0.86009 (0.86463)	Top-1 acc 68.359 (68.753)	Top-5 acc 87.891 (86.891)	lr 0.00440
Train [87][3050/3239]	Time 0.260 (0.632)	Data Time 0.001 (0.015)	Loss 2.5006 (2.3082)	Entropy 0.86006 (0.86462)	Top-1 acc 66.406 (68.757)	Top-5 acc 82.031 (86.889)	lr 0.00440
Train [87][3060/3239]	Time 0.265 (0.631)	Data Time 0.001 (0.015)	Loss 2.5569 (2.3083)	Entropy 0.86003 (0.86460)	Top-1 acc 64.844 (68.754)	Top-5 acc 83.203 (86.889)	lr 0.00440
Train [87][3070/3239]	Time 0.365 (0.631)	Data Time 0.001 (0.015)	Loss 2.3062 (2.3082)	Entropy 0.85999 (0.86459)	Top-1 acc 69.531 (68.758)	Top-5 acc 88.281 (86.892)	lr 0.00439
Train [87][3080/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.015)	Loss 2.4394 (2.3083)	Entropy 0.85991 (0.86457)	Top-1 acc 67.188 (68.759)	Top-5 acc 82.422 (86.891)	lr 0.00439
Train [87][3090/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.015)	Loss 2.1827 (2.3083)	Entropy 0.85993 (0.86456)	Top-1 acc 71.094 (68.755)	Top-5 acc 88.672 (86.888)	lr 0.00439
Train [87][3100/3239]	Time 0.249 (0.629)	Data Time 0.001 (0.015)	Loss 2.2818 (2.3083)	Entropy 0.85995 (0.86454)	Top-1 acc 66.797 (68.753)	Top-5 acc 86.328 (86.889)	lr 0.00439
Train [87][3110/3239]	Time 0.246 (0.629)	Data Time 0.001 (0.015)	Loss 2.5961 (2.3083)	Entropy 0.85991 (0.86453)	Top-1 acc 62.891 (68.753)	Top-5 acc 82.812 (86.890)	lr 0.00439
Train [87][3120/3239]	Time 0.217 (0.628)	Data Time 0.001 (0.014)	Loss 2.2932 (2.3082)	Entropy 0.85981 (0.86451)	Top-1 acc 68.750 (68.756)	Top-5 acc 87.500 (86.893)	lr 0.00439
Train [87][3130/3239]	Time 0.229 (0.628)	Data Time 0.001 (0.014)	Loss 2.3387 (2.3082)	Entropy 0.85979 (0.86450)	Top-1 acc 68.750 (68.755)	Top-5 acc 85.156 (86.893)	lr 0.00439
Train [87][3140/3239]	Time 0.228 (0.627)	Data Time 0.002 (0.014)	Loss 2.3688 (2.3083)	Entropy 0.85979 (0.86448)	Top-1 acc 66.797 (68.749)	Top-5 acc 85.156 (86.890)	lr 0.00439
Train [87][3150/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.014)	Loss 2.3065 (2.3084)	Entropy 0.85985 (0.86447)	Top-1 acc 66.797 (68.747)	Top-5 acc 88.672 (86.888)	lr 0.00439
Train [87][3160/3239]	Time 0.205 (0.626)	Data Time 0.001 (0.014)	Loss 2.4119 (2.3083)	Entropy 0.85978 (0.86445)	Top-1 acc 66.797 (68.750)	Top-5 acc 84.766 (86.890)	lr 0.00439
Train [87][3170/3239]	Time 0.278 (0.626)	Data Time 0.001 (0.014)	Loss 2.3816 (2.3083)	Entropy 0.85970 (0.86444)	Top-1 acc 69.922 (68.751)	Top-5 acc 85.547 (86.886)	lr 0.00439
Train [87][3180/3239]	Time 0.239 (0.625)	Data Time 0.000 (0.014)	Loss 2.2549 (2.3084)	Entropy 0.85965 (0.86442)	Top-1 acc 70.703 (68.752)	Top-5 acc 87.109 (86.886)	lr 0.00439
Train [87][3190/3239]	Time 0.222 (0.625)	Data Time 0.000 (0.014)	Loss 2.3734 (2.3084)	Entropy 0.85962 (0.86441)	Top-1 acc 66.016 (68.751)	Top-5 acc 86.719 (86.887)	lr 0.00439
Train [87][3200/3239]	Time 0.237 (0.624)	Data Time 0.000 (0.014)	Loss 2.4044 (2.3085)	Entropy 0.85954 (0.86439)	Top-1 acc 68.359 (68.749)	Top-5 acc 83.594 (86.887)	lr 0.00438
Train [87][3210/3239]	Time 0.234 (0.624)	Data Time 0.000 (0.014)	Loss 2.2353 (2.3084)	Entropy 0.85949 (0.86438)	Top-1 acc 69.141 (68.749)	Top-5 acc 89.453 (86.888)	lr 0.00438
Train [87][3220/3239]	Time 0.207 (0.623)	Data Time 0.000 (0.014)	Loss 2.3162 (2.3087)	Entropy 0.85965 (0.86436)	Top-1 acc 70.703 (68.744)	Top-5 acc 87.891 (86.885)	lr 0.00438
Train [87][3230/3239]	Time 0.211 (0.623)	Data Time 0.000 (0.014)	Loss 2.2347 (2.3086)	Entropy 0.85967 (0.86435)	Top-1 acc 71.484 (68.749)	Top-5 acc 89.062 (86.887)	lr 0.00438
Train [87][3239/3239]	Time 2.298 (0.622)	Data Time 0.000 (0.014)	Loss 2.5376 (2.3085)	Entropy 0.85967 (0.86434)	Top-1 acc 55.556 (68.751)	Top-5 acc 86.420 (86.890)	lr 0.00438
==========Valid [87/120]	loss 1.274	top-1 acc 70.772 (70.789)	top-5 acc 88.923	Train top-1 68.751	top-5 86.890	Entropy 0.85967	Latency-None: 0.000ms	Flops: 546.53M
Train [88][0/3239]	Time 43.127 (43.127)	Data Time 40.557 (40.557)	Loss 2.2679 (2.2679)	Entropy 0.85974 (0.85974)	Top-1 acc 69.922 (69.922)	Top-5 acc 88.281 (88.281)	lr 0.00438
Train [88][10/3239]	Time 2.666 (4.432)	Data Time 0.002 (3.689)	Loss 2.3376 (2.2998)	Entropy 0.85974 (0.85974)	Top-1 acc 67.969 (69.034)	Top-5 acc 84.766 (86.825)	lr 0.00438
Train [88][20/3239]	Time 0.250 (2.447)	Data Time 0.003 (1.933)	Loss 2.3603 (2.2975)	Entropy 0.85974 (0.85974)	Top-1 acc 69.531 (68.899)	Top-5 acc 83.984 (86.812)	lr 0.00438
Train [88][30/3239]	Time 0.232 (1.808)	Data Time 0.001 (1.310)	Loss 2.2554 (2.2826)	Entropy 0.85975 (0.85974)	Top-1 acc 71.094 (69.254)	Top-5 acc 89.062 (87.261)	lr 0.00438
Train [88][40/3239]	Time 0.247 (1.482)	Data Time 0.001 (0.991)	Loss 2.4695 (2.2985)	Entropy 0.85974 (0.85974)	Top-1 acc 66.406 (68.731)	Top-5 acc 83.203 (87.062)	lr 0.00438
Train [88][50/3239]	Time 0.237 (1.282)	Data Time 0.002 (0.797)	Loss 2.3304 (2.2952)	Entropy 0.85966 (0.85973)	Top-1 acc 67.578 (69.003)	Top-5 acc 87.109 (87.132)	lr 0.00438
Train [88][60/3239]	Time 0.243 (1.151)	Data Time 0.001 (0.666)	Loss 2.3373 (2.2848)	Entropy 0.85965 (0.85972)	Top-1 acc 71.094 (69.205)	Top-5 acc 85.547 (87.366)	lr 0.00438
Train [88][70/3239]	Time 0.214 (1.054)	Data Time 0.001 (0.573)	Loss 2.0987 (2.2829)	Entropy 0.85959 (0.85970)	Top-1 acc 75.391 (69.300)	Top-5 acc 90.625 (87.313)	lr 0.00438
Train [88][80/3239]	Time 0.408 (1.639)	Data Time 0.003 (0.502)	Loss 2.3429 (2.2818)	Entropy 0.85958 (0.85969)	Top-1 acc 65.234 (69.247)	Top-5 acc 87.109 (87.379)	lr 0.00438
Train [88][90/3239]	Time 0.323 (1.519)	Data Time 0.002 (0.447)	Loss 2.1850 (2.2845)	Entropy 0.85957 (0.85968)	Top-1 acc 71.875 (69.244)	Top-5 acc 88.281 (87.371)	lr 0.00437
Train [88][100/3239]	Time 0.225 (1.416)	Data Time 0.001 (0.403)	Loss 2.3458 (2.2901)	Entropy 0.85963 (0.85967)	Top-1 acc 67.578 (69.129)	Top-5 acc 87.891 (87.198)	lr 0.00437
Train [88][110/3239]	Time 0.287 (1.331)	Data Time 0.001 (0.367)	Loss 2.1629 (2.2962)	Entropy 0.85964 (0.85967)	Top-1 acc 72.656 (68.986)	Top-5 acc 89.453 (87.116)	lr 0.00437
Train [88][120/3239]	Time 2.574 (1.261)	Data Time 0.001 (0.337)	Loss 2.2828 (2.2941)	Entropy 0.85964 (0.85966)	Top-1 acc 67.578 (69.008)	Top-5 acc 88.672 (87.190)	lr 0.00437
Train [88][130/3239]	Time 0.267 (1.185)	Data Time 0.002 (0.311)	Loss 2.2165 (2.2911)	Entropy 0.85960 (0.85966)	Top-1 acc 69.531 (69.072)	Top-5 acc 90.234 (87.288)	lr 0.00437
Train [88][140/3239]	Time 0.240 (1.136)	Data Time 0.001 (0.289)	Loss 2.3599 (2.2899)	Entropy 0.85958 (0.85965)	Top-1 acc 66.406 (69.096)	Top-5 acc 84.375 (87.278)	lr 0.00437
Train [88][150/3239]	Time 0.223 (1.091)	Data Time 0.001 (0.270)	Loss 2.3129 (2.2910)	Entropy 0.85953 (0.85965)	Top-1 acc 69.531 (69.107)	Top-5 acc 87.891 (87.262)	lr 0.00437
Train [88][160/3239]	Time 0.237 (1.053)	Data Time 0.001 (0.254)	Loss 2.2185 (2.2879)	Entropy 0.85950 (0.85964)	Top-1 acc 68.750 (69.175)	Top-5 acc 88.281 (87.301)	lr 0.00437
Train [88][170/3239]	Time 0.231 (1.019)	Data Time 0.001 (0.239)	Loss 2.1447 (2.2911)	Entropy 0.85944 (0.85963)	Top-1 acc 73.828 (69.063)	Top-5 acc 89.062 (87.246)	lr 0.00437
Train [88][180/3239]	Time 0.270 (0.990)	Data Time 0.001 (0.226)	Loss 2.3518 (2.2914)	Entropy 0.85946 (0.85962)	Top-1 acc 67.969 (69.054)	Top-5 acc 85.156 (87.226)	lr 0.00437
Train [88][190/3239]	Time 0.218 (0.964)	Data Time 0.001 (0.214)	Loss 2.3523 (2.2918)	Entropy 0.85945 (0.85961)	Top-1 acc 69.141 (69.047)	Top-5 acc 86.719 (87.216)	lr 0.00437
Train [88][200/3239]	Time 0.221 (0.939)	Data Time 0.001 (0.204)	Loss 2.4336 (2.2921)	Entropy 0.85946 (0.85960)	Top-1 acc 66.016 (69.049)	Top-5 acc 83.203 (87.218)	lr 0.00437
Train [88][210/3239]	Time 0.229 (0.917)	Data Time 0.001 (0.194)	Loss 2.3192 (2.2928)	Entropy 0.85949 (0.85959)	Top-1 acc 68.750 (69.020)	Top-5 acc 86.719 (87.246)	lr 0.00437
Train [88][220/3239]	Time 0.228 (0.897)	Data Time 0.001 (0.185)	Loss 2.1963 (2.2898)	Entropy 0.85950 (0.85959)	Top-1 acc 72.656 (69.123)	Top-5 acc 88.672 (87.281)	lr 0.00437
Train [88][230/3239]	Time 2.510 (0.878)	Data Time 0.001 (0.177)	Loss 2.4024 (2.2904)	Entropy 0.85950 (0.85959)	Top-1 acc 66.797 (69.114)	Top-5 acc 83.984 (87.289)	lr 0.00436
Train [88][240/3239]	Time 0.280 (0.852)	Data Time 0.001 (0.170)	Loss 2.2680 (2.2931)	Entropy 0.85949 (0.85958)	Top-1 acc 72.656 (69.071)	Top-5 acc 86.719 (87.224)	lr 0.00436
Train [88][250/3239]	Time 0.230 (0.838)	Data Time 0.002 (0.163)	Loss 2.2394 (2.2931)	Entropy 0.85945 (0.85958)	Top-1 acc 70.703 (69.071)	Top-5 acc 88.672 (87.207)	lr 0.00436
Train [88][260/3239]	Time 0.327 (0.824)	Data Time 0.001 (0.157)	Loss 2.2271 (2.2924)	Entropy 0.85941 (0.85957)	Top-1 acc 72.266 (69.079)	Top-5 acc 89.844 (87.229)	lr 0.00436
Train [88][270/3239]	Time 0.238 (0.811)	Data Time 0.001 (0.151)	Loss 2.4758 (2.2937)	Entropy 0.85939 (0.85956)	Top-1 acc 65.625 (69.053)	Top-5 acc 84.766 (87.193)	lr 0.00436
Train [88][280/3239]	Time 0.222 (0.799)	Data Time 0.001 (0.146)	Loss 2.5188 (2.2939)	Entropy 0.85940 (0.85956)	Top-1 acc 65.234 (69.032)	Top-5 acc 82.031 (87.175)	lr 0.00436
Train [88][290/3239]	Time 0.227 (0.787)	Data Time 0.001 (0.141)	Loss 2.1133 (2.2943)	Entropy 0.85938 (0.85955)	Top-1 acc 72.656 (69.039)	Top-5 acc 92.578 (87.156)	lr 0.00436
Train [88][300/3239]	Time 0.325 (0.777)	Data Time 0.001 (0.136)	Loss 2.2320 (2.2943)	Entropy 0.85941 (0.85955)	Top-1 acc 69.922 (69.055)	Top-5 acc 89.062 (87.159)	lr 0.00436
Train [88][310/3239]	Time 0.252 (0.768)	Data Time 0.001 (0.132)	Loss 2.2919 (2.2950)	Entropy 0.85920 (0.85954)	Top-1 acc 69.531 (69.055)	Top-5 acc 89.062 (87.145)	lr 0.00436
Train [88][320/3239]	Time 0.229 (0.759)	Data Time 0.001 (0.128)	Loss 2.2596 (2.2944)	Entropy 0.85918 (0.85953)	Top-1 acc 67.188 (69.059)	Top-5 acc 88.672 (87.158)	lr 0.00436
Train [88][330/3239]	Time 0.219 (0.750)	Data Time 0.001 (0.124)	Loss 2.3341 (2.2928)	Entropy 0.85915 (0.85952)	Top-1 acc 67.578 (69.128)	Top-5 acc 86.719 (87.173)	lr 0.00436
Train [88][340/3239]	Time 2.665 (0.742)	Data Time 0.001 (0.121)	Loss 2.3211 (2.2912)	Entropy 0.85915 (0.85951)	Top-1 acc 70.703 (69.189)	Top-5 acc 87.109 (87.201)	lr 0.00436
Train [88][350/3239]	Time 0.244 (0.728)	Data Time 0.001 (0.117)	Loss 2.3216 (2.2918)	Entropy 0.85906 (0.85950)	Top-1 acc 70.312 (69.182)	Top-5 acc 86.719 (87.187)	lr 0.00436
Train [88][360/3239]	Time 0.246 (0.721)	Data Time 0.001 (0.114)	Loss 2.2244 (2.2914)	Entropy 0.85904 (0.85948)	Top-1 acc 69.922 (69.183)	Top-5 acc 87.500 (87.197)	lr 0.00435
Train [88][370/3239]	Time 0.237 (0.715)	Data Time 0.002 (0.111)	Loss 2.3715 (2.2904)	Entropy 0.85901 (0.85947)	Top-1 acc 65.234 (69.182)	Top-5 acc 82.422 (87.182)	lr 0.00435
Train [88][380/3239]	Time 0.216 (0.709)	Data Time 0.001 (0.108)	Loss 2.1669 (2.2900)	Entropy 0.85899 (0.85946)	Top-1 acc 73.828 (69.205)	Top-5 acc 87.891 (87.178)	lr 0.00435
Train [88][390/3239]	Time 0.228 (0.703)	Data Time 0.001 (0.105)	Loss 2.4495 (2.2902)	Entropy 0.85900 (0.85945)	Top-1 acc 65.234 (69.209)	Top-5 acc 81.641 (87.166)	lr 0.00435
Train [88][400/3239]	Time 0.217 (0.697)	Data Time 0.001 (0.103)	Loss 2.3510 (2.2893)	Entropy 0.85897 (0.85943)	Top-1 acc 66.016 (69.238)	Top-5 acc 86.328 (87.198)	lr 0.00435
Train [88][410/3239]	Time 0.229 (0.692)	Data Time 0.001 (0.100)	Loss 2.3561 (2.2898)	Entropy 0.85894 (0.85942)	Top-1 acc 66.797 (69.198)	Top-5 acc 88.281 (87.196)	lr 0.00435
Train [88][420/3239]	Time 0.231 (0.687)	Data Time 0.001 (0.098)	Loss 2.2927 (2.2897)	Entropy 0.85890 (0.85941)	Top-1 acc 67.188 (69.190)	Top-5 acc 88.672 (87.205)	lr 0.00435
Train [88][430/3239]	Time 0.320 (0.682)	Data Time 0.001 (0.096)	Loss 2.2804 (2.2906)	Entropy 0.85887 (0.85940)	Top-1 acc 67.969 (69.165)	Top-5 acc 86.719 (87.189)	lr 0.00435
Train [88][440/3239]	Time 0.355 (0.797)	Data Time 0.004 (0.094)	Loss 2.1625 (2.2904)	Entropy 0.85882 (0.85939)	Top-1 acc 73.438 (69.180)	Top-5 acc 89.062 (87.196)	lr 0.00435
Train [88][450/3239]	Time 2.508 (0.791)	Data Time 0.002 (0.092)	Loss 2.3730 (2.2919)	Entropy 0.85882 (0.85937)	Top-1 acc 68.750 (69.148)	Top-5 acc 86.328 (87.160)	lr 0.00435
Train [88][460/3239]	Time 0.211 (0.778)	Data Time 0.002 (0.090)	Loss 2.2467 (2.2914)	Entropy 0.85881 (0.85936)	Top-1 acc 67.969 (69.169)	Top-5 acc 88.281 (87.165)	lr 0.00435
Train [88][470/3239]	Time 0.241 (0.772)	Data Time 0.001 (0.088)	Loss 2.3688 (2.2907)	Entropy 0.85879 (0.85935)	Top-1 acc 67.969 (69.170)	Top-5 acc 85.938 (87.181)	lr 0.00435
Train [88][480/3239]	Time 0.242 (0.766)	Data Time 0.001 (0.086)	Loss 2.2606 (2.2901)	Entropy 0.85936 (0.85935)	Top-1 acc 67.969 (69.177)	Top-5 acc 90.234 (87.211)	lr 0.00435
Train [88][490/3239]	Time 0.229 (0.760)	Data Time 0.001 (0.084)	Loss 2.2429 (2.2906)	Entropy 0.85934 (0.85935)	Top-1 acc 69.141 (69.164)	Top-5 acc 87.500 (87.205)	lr 0.00434
Train [88][500/3239]	Time 0.232 (0.754)	Data Time 0.001 (0.083)	Loss 2.2529 (2.2904)	Entropy 0.85933 (0.85935)	Top-1 acc 71.094 (69.176)	Top-5 acc 87.891 (87.198)	lr 0.00434
Train [88][510/3239]	Time 0.229 (0.749)	Data Time 0.001 (0.081)	Loss 2.2131 (2.2899)	Entropy 0.85934 (0.85935)	Top-1 acc 72.656 (69.210)	Top-5 acc 90.234 (87.203)	lr 0.00434
Train [88][520/3239]	Time 0.215 (0.743)	Data Time 0.001 (0.080)	Loss 2.2747 (2.2898)	Entropy 0.85938 (0.85935)	Top-1 acc 71.484 (69.206)	Top-5 acc 85.938 (87.202)	lr 0.00434
Train [88][530/3239]	Time 0.232 (0.738)	Data Time 0.001 (0.078)	Loss 2.2115 (2.2895)	Entropy 0.85941 (0.85935)	Top-1 acc 67.969 (69.216)	Top-5 acc 87.891 (87.213)	lr 0.00434
Train [88][540/3239]	Time 0.235 (0.733)	Data Time 0.001 (0.077)	Loss 2.3172 (2.2895)	Entropy 0.85933 (0.85935)	Top-1 acc 69.141 (69.211)	Top-5 acc 86.328 (87.203)	lr 0.00434
Train [88][550/3239]	Time 0.212 (0.728)	Data Time 0.001 (0.075)	Loss 2.2297 (2.2901)	Entropy 0.85931 (0.85935)	Top-1 acc 71.094 (69.185)	Top-5 acc 87.500 (87.179)	lr 0.00434
Train [88][560/3239]	Time 2.566 (0.724)	Data Time 0.001 (0.074)	Loss 2.3230 (2.2897)	Entropy 0.85931 (0.85935)	Top-1 acc 66.406 (69.187)	Top-5 acc 86.719 (87.186)	lr 0.00434
Train [88][570/3239]	Time 0.227 (0.715)	Data Time 0.001 (0.073)	Loss 2.0971 (2.2897)	Entropy 0.85924 (0.85935)	Top-1 acc 74.609 (69.184)	Top-5 acc 89.844 (87.194)	lr 0.00434
Train [88][580/3239]	Time 0.228 (0.711)	Data Time 0.001 (0.071)	Loss 2.4723 (2.2904)	Entropy 0.85909 (0.85934)	Top-1 acc 59.375 (69.151)	Top-5 acc 82.812 (87.183)	lr 0.00434
Train [88][590/3239]	Time 0.228 (0.707)	Data Time 0.001 (0.070)	Loss 2.2009 (2.2908)	Entropy 0.85916 (0.85934)	Top-1 acc 72.656 (69.153)	Top-5 acc 87.109 (87.161)	lr 0.00434
Train [88][600/3239]	Time 0.315 (0.704)	Data Time 0.001 (0.069)	Loss 2.1678 (2.2910)	Entropy 0.85901 (0.85933)	Top-1 acc 71.094 (69.138)	Top-5 acc 89.844 (87.157)	lr 0.00434
Train [88][610/3239]	Time 0.224 (0.700)	Data Time 0.001 (0.068)	Loss 2.1320 (2.2907)	Entropy 0.85901 (0.85933)	Top-1 acc 72.266 (69.153)	Top-5 acc 90.625 (87.164)	lr 0.00434
Train [88][620/3239]	Time 0.227 (0.696)	Data Time 0.001 (0.067)	Loss 2.3976 (2.2907)	Entropy 0.85898 (0.85932)	Top-1 acc 66.797 (69.155)	Top-5 acc 85.938 (87.158)	lr 0.00433
Train [88][630/3239]	Time 0.218 (0.693)	Data Time 0.001 (0.066)	Loss 2.2237 (2.2901)	Entropy 0.85893 (0.85932)	Top-1 acc 69.531 (69.168)	Top-5 acc 89.062 (87.158)	lr 0.00433
Train [88][640/3239]	Time 0.230 (0.689)	Data Time 0.002 (0.065)	Loss 2.3882 (2.2910)	Entropy 0.85879 (0.85931)	Top-1 acc 67.969 (69.158)	Top-5 acc 85.547 (87.142)	lr 0.00433
Train [88][650/3239]	Time 0.213 (0.686)	Data Time 0.001 (0.064)	Loss 2.3043 (2.2903)	Entropy 0.85877 (0.85930)	Top-1 acc 64.062 (69.181)	Top-5 acc 87.891 (87.150)	lr 0.00433
Train [88][660/3239]	Time 0.261 (0.683)	Data Time 0.001 (0.063)	Loss 2.1886 (2.2900)	Entropy 0.85869 (0.85929)	Top-1 acc 73.828 (69.184)	Top-5 acc 87.109 (87.166)	lr 0.00433
Train [88][670/3239]	Time 2.520 (0.680)	Data Time 0.002 (0.062)	Loss 2.3244 (2.2906)	Entropy 0.85869 (0.85929)	Top-1 acc 67.188 (69.173)	Top-5 acc 85.938 (87.152)	lr 0.00433
Train [88][680/3239]	Time 0.233 (0.673)	Data Time 0.001 (0.061)	Loss 2.3277 (2.2906)	Entropy 0.85864 (0.85928)	Top-1 acc 65.625 (69.173)	Top-5 acc 88.672 (87.158)	lr 0.00433
Train [88][690/3239]	Time 0.341 (0.671)	Data Time 0.001 (0.060)	Loss 2.3267 (2.2906)	Entropy 0.85862 (0.85927)	Top-1 acc 69.531 (69.186)	Top-5 acc 87.109 (87.162)	lr 0.00433
Train [88][700/3239]	Time 0.229 (0.668)	Data Time 0.001 (0.059)	Loss 2.2108 (2.2913)	Entropy 0.85849 (0.85926)	Top-1 acc 69.141 (69.165)	Top-5 acc 87.891 (87.151)	lr 0.00433
Train [88][710/3239]	Time 0.234 (0.665)	Data Time 0.001 (0.059)	Loss 2.3651 (2.2908)	Entropy 0.85839 (0.85924)	Top-1 acc 67.578 (69.175)	Top-5 acc 87.109 (87.159)	lr 0.00433
Train [88][720/3239]	Time 0.222 (0.662)	Data Time 0.001 (0.058)	Loss 2.2377 (2.2897)	Entropy 0.85830 (0.85923)	Top-1 acc 72.266 (69.206)	Top-5 acc 87.500 (87.175)	lr 0.00433
Train [88][730/3239]	Time 0.236 (0.660)	Data Time 0.001 (0.057)	Loss 2.3278 (2.2896)	Entropy 0.85826 (0.85922)	Top-1 acc 70.703 (69.202)	Top-5 acc 86.719 (87.180)	lr 0.00433
Train [88][740/3239]	Time 0.234 (0.657)	Data Time 0.001 (0.056)	Loss 2.1093 (2.2891)	Entropy 0.85805 (0.85920)	Top-1 acc 74.219 (69.202)	Top-5 acc 89.844 (87.191)	lr 0.00433
Train [88][750/3239]	Time 0.241 (0.655)	Data Time 0.001 (0.056)	Loss 2.1375 (2.2889)	Entropy 0.85795 (0.85919)	Top-1 acc 69.531 (69.205)	Top-5 acc 90.625 (87.188)	lr 0.00432
Train [88][760/3239]	Time 0.231 (0.652)	Data Time 0.002 (0.055)	Loss 2.2986 (2.2889)	Entropy 0.85783 (0.85917)	Top-1 acc 65.625 (69.216)	Top-5 acc 85.938 (87.185)	lr 0.00432
Train [88][770/3239]	Time 0.269 (0.650)	Data Time 0.002 (0.054)	Loss 2.1331 (2.2886)	Entropy 0.85781 (0.85915)	Top-1 acc 72.266 (69.220)	Top-5 acc 91.016 (87.191)	lr 0.00432
Train [88][780/3239]	Time 2.476 (0.648)	Data Time 0.001 (0.054)	Loss 2.3098 (2.2889)	Entropy 0.85781 (0.85914)	Top-1 acc 69.141 (69.222)	Top-5 acc 86.328 (87.187)	lr 0.00432
Train [88][790/3239]	Time 0.226 (0.643)	Data Time 0.001 (0.053)	Loss 2.1929 (2.2894)	Entropy 0.85782 (0.85912)	Top-1 acc 66.797 (69.209)	Top-5 acc 89.453 (87.181)	lr 0.00432
Train [88][800/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.052)	Loss 2.2753 (2.2892)	Entropy 0.85781 (0.85910)	Top-1 acc 68.750 (69.211)	Top-5 acc 88.281 (87.185)	lr 0.00432
Train [88][810/3239]	Time 0.225 (0.706)	Data Time 0.002 (0.052)	Loss 2.2661 (2.2896)	Entropy 0.85777 (0.85909)	Top-1 acc 68.359 (69.195)	Top-5 acc 88.281 (87.173)	lr 0.00432
Train [88][820/3239]	Time 0.227 (0.703)	Data Time 0.001 (0.051)	Loss 2.3393 (2.2892)	Entropy 0.85767 (0.85907)	Top-1 acc 67.188 (69.205)	Top-5 acc 87.500 (87.177)	lr 0.00432
Train [88][830/3239]	Time 0.221 (0.700)	Data Time 0.001 (0.050)	Loss 2.2227 (2.2890)	Entropy 0.85762 (0.85905)	Top-1 acc 70.312 (69.207)	Top-5 acc 90.234 (87.186)	lr 0.00432
Train [88][840/3239]	Time 0.227 (0.697)	Data Time 0.001 (0.050)	Loss 2.1859 (2.2886)	Entropy 0.85757 (0.85904)	Top-1 acc 69.922 (69.218)	Top-5 acc 88.281 (87.191)	lr 0.00432
Train [88][850/3239]	Time 0.239 (0.695)	Data Time 0.001 (0.049)	Loss 2.2551 (2.2885)	Entropy 0.85758 (0.85902)	Top-1 acc 71.875 (69.221)	Top-5 acc 89.062 (87.189)	lr 0.00432
Train [88][860/3239]	Time 0.316 (0.692)	Data Time 0.001 (0.049)	Loss 2.3306 (2.2888)	Entropy 0.85759 (0.85900)	Top-1 acc 69.531 (69.220)	Top-5 acc 87.500 (87.188)	lr 0.00432
Train [88][870/3239]	Time 0.238 (0.690)	Data Time 0.001 (0.048)	Loss 2.3030 (2.2888)	Entropy 0.85755 (0.85899)	Top-1 acc 71.484 (69.223)	Top-5 acc 86.328 (87.191)	lr 0.00432
Train [88][880/3239]	Time 0.206 (0.687)	Data Time 0.001 (0.048)	Loss 2.3354 (2.2892)	Entropy 0.85746 (0.85897)	Top-1 acc 69.141 (69.208)	Top-5 acc 87.891 (87.191)	lr 0.00431
Train [88][890/3239]	Time 2.565 (0.685)	Data Time 0.001 (0.047)	Loss 2.3074 (2.2888)	Entropy 0.85746 (0.85895)	Top-1 acc 71.875 (69.217)	Top-5 acc 86.719 (87.196)	lr 0.00431
Train [88][900/3239]	Time 0.244 (0.680)	Data Time 0.001 (0.047)	Loss 2.2385 (2.2890)	Entropy 0.85742 (0.85894)	Top-1 acc 65.625 (69.205)	Top-5 acc 86.328 (87.191)	lr 0.00431
Train [88][910/3239]	Time 0.247 (0.678)	Data Time 0.002 (0.046)	Loss 2.2513 (2.2890)	Entropy 0.85730 (0.85892)	Top-1 acc 69.922 (69.196)	Top-5 acc 89.844 (87.186)	lr 0.00431
Train [88][920/3239]	Time 0.234 (0.675)	Data Time 0.001 (0.046)	Loss 2.4595 (2.2894)	Entropy 0.85725 (0.85890)	Top-1 acc 66.016 (69.194)	Top-5 acc 82.422 (87.174)	lr 0.00431
Train [88][930/3239]	Time 0.232 (0.673)	Data Time 0.002 (0.045)	Loss 2.5111 (2.2896)	Entropy 0.85720 (0.85888)	Top-1 acc 62.109 (69.188)	Top-5 acc 84.766 (87.172)	lr 0.00431
Train [88][940/3239]	Time 0.230 (0.671)	Data Time 0.001 (0.045)	Loss 2.1018 (2.2904)	Entropy 0.85717 (0.85886)	Top-1 acc 76.172 (69.188)	Top-5 acc 91.016 (87.160)	lr 0.00431
Train [88][950/3239]	Time 0.224 (0.669)	Data Time 0.001 (0.044)	Loss 2.2526 (2.2900)	Entropy 0.85715 (0.85885)	Top-1 acc 67.578 (69.187)	Top-5 acc 88.281 (87.167)	lr 0.00431
Train [88][960/3239]	Time 0.234 (0.667)	Data Time 0.001 (0.044)	Loss 2.3403 (2.2902)	Entropy 0.85711 (0.85883)	Top-1 acc 66.016 (69.172)	Top-5 acc 87.500 (87.173)	lr 0.00431
Train [88][970/3239]	Time 0.273 (0.665)	Data Time 0.001 (0.043)	Loss 2.2467 (2.2902)	Entropy 0.85710 (0.85881)	Top-1 acc 67.578 (69.167)	Top-5 acc 88.672 (87.169)	lr 0.00431
Train [88][980/3239]	Time 0.214 (0.663)	Data Time 0.001 (0.043)	Loss 2.4060 (2.2903)	Entropy 0.85721 (0.85879)	Top-1 acc 66.797 (69.169)	Top-5 acc 83.984 (87.165)	lr 0.00431
Train [88][990/3239]	Time 0.324 (0.661)	Data Time 0.001 (0.043)	Loss 2.2148 (2.2906)	Entropy 0.85704 (0.85878)	Top-1 acc 69.531 (69.164)	Top-5 acc 90.625 (87.167)	lr 0.00431
Train [88][1000/3239]	Time 2.545 (0.659)	Data Time 0.001 (0.042)	Loss 2.3138 (2.2903)	Entropy 0.85704 (0.85876)	Top-1 acc 67.969 (69.179)	Top-5 acc 88.281 (87.171)	lr 0.00431
Train [88][1010/3239]	Time 0.231 (0.655)	Data Time 0.001 (0.042)	Loss 2.2512 (2.2902)	Entropy 0.85705 (0.85874)	Top-1 acc 69.922 (69.189)	Top-5 acc 88.281 (87.168)	lr 0.00430
Train [88][1020/3239]	Time 0.242 (0.653)	Data Time 0.001 (0.041)	Loss 2.1926 (2.2895)	Entropy 0.85688 (0.85873)	Top-1 acc 73.047 (69.204)	Top-5 acc 87.891 (87.187)	lr 0.00430
Train [88][1030/3239]	Time 0.340 (0.652)	Data Time 0.001 (0.041)	Loss 2.2332 (2.2894)	Entropy 0.85690 (0.85871)	Top-1 acc 71.094 (69.199)	Top-5 acc 89.844 (87.192)	lr 0.00430
Train [88][1040/3239]	Time 0.227 (0.650)	Data Time 0.001 (0.041)	Loss 2.3106 (2.2898)	Entropy 0.85689 (0.85869)	Top-1 acc 72.266 (69.201)	Top-5 acc 85.156 (87.182)	lr 0.00430
Train [88][1050/3239]	Time 0.218 (0.648)	Data Time 0.001 (0.040)	Loss 2.3152 (2.2899)	Entropy 0.85685 (0.85867)	Top-1 acc 69.141 (69.203)	Top-5 acc 87.500 (87.178)	lr 0.00430
Train [88][1060/3239]	Time 0.236 (0.646)	Data Time 0.001 (0.040)	Loss 2.3735 (2.2900)	Entropy 0.85693 (0.85866)	Top-1 acc 68.750 (69.203)	Top-5 acc 83.984 (87.177)	lr 0.00430
Train [88][1070/3239]	Time 0.233 (0.645)	Data Time 0.001 (0.039)	Loss 2.3705 (2.2901)	Entropy 0.85694 (0.85864)	Top-1 acc 67.578 (69.206)	Top-5 acc 85.547 (87.172)	lr 0.00430
Train [88][1080/3239]	Time 0.233 (0.643)	Data Time 0.001 (0.039)	Loss 2.1728 (2.2900)	Entropy 0.85685 (0.85862)	Top-1 acc 69.141 (69.210)	Top-5 acc 90.234 (87.174)	lr 0.00430
Train [88][1090/3239]	Time 0.274 (0.642)	Data Time 0.001 (0.039)	Loss 2.3948 (2.2902)	Entropy 0.85684 (0.85861)	Top-1 acc 65.625 (69.201)	Top-5 acc 86.719 (87.173)	lr 0.00430
Train [88][1100/3239]	Time 0.207 (0.640)	Data Time 0.001 (0.038)	Loss 2.4220 (2.2907)	Entropy 0.85679 (0.85859)	Top-1 acc 66.797 (69.191)	Top-5 acc 83.203 (87.164)	lr 0.00430
Train [88][1110/3239]	Time 2.530 (0.639)	Data Time 0.001 (0.038)	Loss 2.1849 (2.2904)	Entropy 0.85679 (0.85857)	Top-1 acc 72.266 (69.203)	Top-5 acc 89.062 (87.165)	lr 0.00430
Train [88][1120/3239]	Time 0.319 (0.635)	Data Time 0.001 (0.038)	Loss 2.1759 (2.2903)	Entropy 0.85680 (0.85856)	Top-1 acc 73.828 (69.200)	Top-5 acc 88.672 (87.167)	lr 0.00430
Train [88][1130/3239]	Time 0.245 (0.634)	Data Time 0.001 (0.037)	Loss 2.4394 (2.2904)	Entropy 0.85679 (0.85854)	Top-1 acc 67.969 (69.198)	Top-5 acc 84.766 (87.165)	lr 0.00430
Train [88][1140/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.037)	Loss 2.4369 (2.2902)	Entropy 0.85683 (0.85853)	Top-1 acc 63.672 (69.197)	Top-5 acc 85.547 (87.170)	lr 0.00429
Train [88][1150/3239]	Time 0.223 (0.631)	Data Time 0.001 (0.037)	Loss 2.1044 (2.2913)	Entropy 0.85677 (0.85851)	Top-1 acc 71.484 (69.177)	Top-5 acc 90.625 (87.155)	lr 0.00429
Train [88][1160/3239]	Time 0.329 (0.630)	Data Time 0.001 (0.037)	Loss 2.3096 (2.2917)	Entropy 0.85673 (0.85850)	Top-1 acc 69.922 (69.176)	Top-5 acc 88.281 (87.147)	lr 0.00429
Train [88][1170/3239]	Time 0.310 (0.670)	Data Time 0.004 (0.036)	Loss 2.3557 (2.2916)	Entropy 0.85674 (0.85848)	Top-1 acc 67.969 (69.189)	Top-5 acc 86.719 (87.147)	lr 0.00429
Train [88][1180/3239]	Time 0.235 (0.669)	Data Time 0.002 (0.036)	Loss 2.3292 (2.2915)	Entropy 0.85669 (0.85847)	Top-1 acc 70.703 (69.201)	Top-5 acc 85.156 (87.146)	lr 0.00429
Train [88][1190/3239]	Time 0.225 (0.668)	Data Time 0.001 (0.036)	Loss 2.2994 (2.2915)	Entropy 0.85671 (0.85845)	Top-1 acc 71.094 (69.203)	Top-5 acc 85.938 (87.150)	lr 0.00429
Train [88][1200/3239]	Time 0.319 (0.666)	Data Time 0.002 (0.035)	Loss 2.4126 (2.2916)	Entropy 0.85669 (0.85844)	Top-1 acc 66.797 (69.202)	Top-5 acc 84.375 (87.145)	lr 0.00429
Train [88][1210/3239]	Time 0.230 (0.665)	Data Time 0.001 (0.035)	Loss 2.3855 (2.2920)	Entropy 0.85668 (0.85842)	Top-1 acc 65.625 (69.194)	Top-5 acc 85.156 (87.140)	lr 0.00429
Train [88][1220/3239]	Time 2.506 (0.663)	Data Time 0.001 (0.035)	Loss 2.1934 (2.2917)	Entropy 0.85668 (0.85841)	Top-1 acc 69.922 (69.202)	Top-5 acc 89.062 (87.143)	lr 0.00429
Train [88][1230/3239]	Time 0.252 (0.660)	Data Time 0.001 (0.035)	Loss 2.3155 (2.2912)	Entropy 0.85663 (0.85840)	Top-1 acc 69.531 (69.212)	Top-5 acc 85.938 (87.152)	lr 0.00429
Train [88][1240/3239]	Time 0.249 (0.658)	Data Time 0.001 (0.034)	Loss 2.4423 (2.2912)	Entropy 0.85659 (0.85838)	Top-1 acc 65.234 (69.212)	Top-5 acc 85.938 (87.153)	lr 0.00429
Train [88][1250/3239]	Time 0.237 (0.657)	Data Time 0.001 (0.034)	Loss 2.1783 (2.2909)	Entropy 0.85649 (0.85837)	Top-1 acc 73.047 (69.216)	Top-5 acc 89.453 (87.162)	lr 0.00429
Train [88][1260/3239]	Time 0.241 (0.655)	Data Time 0.001 (0.034)	Loss 1.9922 (2.2908)	Entropy 0.85641 (0.85835)	Top-1 acc 74.609 (69.217)	Top-5 acc 92.578 (87.166)	lr 0.00429
Train [88][1270/3239]	Time 0.228 (0.654)	Data Time 0.001 (0.034)	Loss 2.3097 (2.2907)	Entropy 0.85636 (0.85833)	Top-1 acc 71.484 (69.225)	Top-5 acc 86.328 (87.164)	lr 0.00428
Train [88][1280/3239]	Time 0.246 (0.653)	Data Time 0.002 (0.033)	Loss 2.2314 (2.2910)	Entropy 0.85637 (0.85832)	Top-1 acc 71.094 (69.219)	Top-5 acc 87.891 (87.153)	lr 0.00428
Train [88][1290/3239]	Time 0.334 (0.651)	Data Time 0.001 (0.033)	Loss 2.1881 (2.2910)	Entropy 0.85640 (0.85830)	Top-1 acc 69.141 (69.216)	Top-5 acc 89.844 (87.156)	lr 0.00428
Train [88][1300/3239]	Time 0.231 (0.650)	Data Time 0.001 (0.033)	Loss 2.2857 (2.2912)	Entropy 0.85642 (0.85829)	Top-1 acc 68.359 (69.213)	Top-5 acc 87.500 (87.155)	lr 0.00428
Train [88][1310/3239]	Time 0.231 (0.649)	Data Time 0.001 (0.033)	Loss 2.1787 (2.2911)	Entropy 0.85640 (0.85828)	Top-1 acc 73.047 (69.213)	Top-5 acc 86.719 (87.153)	lr 0.00428
Train [88][1320/3239]	Time 0.269 (0.648)	Data Time 0.001 (0.032)	Loss 2.2998 (2.2908)	Entropy 0.85639 (0.85826)	Top-1 acc 66.797 (69.216)	Top-5 acc 87.891 (87.162)	lr 0.00428
Train [88][1330/3239]	Time 2.674 (0.647)	Data Time 0.001 (0.032)	Loss 2.2293 (2.2907)	Entropy 0.85639 (0.85825)	Top-1 acc 68.750 (69.212)	Top-5 acc 90.234 (87.167)	lr 0.00428
Train [88][1340/3239]	Time 0.223 (0.644)	Data Time 0.001 (0.032)	Loss 2.1663 (2.2909)	Entropy 0.85639 (0.85823)	Top-1 acc 71.875 (69.207)	Top-5 acc 89.844 (87.165)	lr 0.00428
Train [88][1350/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.032)	Loss 2.3179 (2.2910)	Entropy 0.85633 (0.85822)	Top-1 acc 70.312 (69.205)	Top-5 acc 87.500 (87.161)	lr 0.00428
Train [88][1360/3239]	Time 0.226 (0.642)	Data Time 0.001 (0.031)	Loss 2.4172 (2.2910)	Entropy 0.85635 (0.85821)	Top-1 acc 65.625 (69.207)	Top-5 acc 83.984 (87.165)	lr 0.00428
Train [88][1370/3239]	Time 0.252 (0.640)	Data Time 0.001 (0.031)	Loss 2.1775 (2.2906)	Entropy 0.85635 (0.85819)	Top-1 acc 73.047 (69.213)	Top-5 acc 89.062 (87.170)	lr 0.00428
Train [88][1380/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.031)	Loss 2.3534 (2.2906)	Entropy 0.85627 (0.85818)	Top-1 acc 69.922 (69.209)	Top-5 acc 88.281 (87.176)	lr 0.00428
Train [88][1390/3239]	Time 0.220 (0.638)	Data Time 0.001 (0.031)	Loss 2.3128 (2.2907)	Entropy 0.85630 (0.85816)	Top-1 acc 66.797 (69.213)	Top-5 acc 87.500 (87.173)	lr 0.00428
Train [88][1400/3239]	Time 0.226 (0.637)	Data Time 0.002 (0.031)	Loss 2.2682 (2.2903)	Entropy 0.85629 (0.85815)	Top-1 acc 68.750 (69.220)	Top-5 acc 87.109 (87.176)	lr 0.00427
Train [88][1410/3239]	Time 0.264 (0.636)	Data Time 0.001 (0.030)	Loss 2.5081 (2.2907)	Entropy 0.85619 (0.85814)	Top-1 acc 62.500 (69.215)	Top-5 acc 83.594 (87.167)	lr 0.00427
Train [88][1420/3239]	Time 0.232 (0.635)	Data Time 0.001 (0.030)	Loss 2.3554 (2.2907)	Entropy 0.85617 (0.85812)	Top-1 acc 71.094 (69.219)	Top-5 acc 83.984 (87.164)	lr 0.00427
Train [88][1430/3239]	Time 0.245 (0.634)	Data Time 0.001 (0.030)	Loss 2.2903 (2.2909)	Entropy 0.85620 (0.85811)	Top-1 acc 69.141 (69.214)	Top-5 acc 87.891 (87.158)	lr 0.00427
Train [88][1440/3239]	Time 2.463 (0.633)	Data Time 0.001 (0.030)	Loss 2.3782 (2.2906)	Entropy 0.85620 (0.85810)	Top-1 acc 64.844 (69.218)	Top-5 acc 86.719 (87.167)	lr 0.00427
Train [88][1450/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.030)	Loss 2.3841 (2.2903)	Entropy 0.85621 (0.85808)	Top-1 acc 66.406 (69.221)	Top-5 acc 86.328 (87.175)	lr 0.00427
Train [88][1460/3239]	Time 0.350 (0.629)	Data Time 0.001 (0.029)	Loss 2.4614 (2.2904)	Entropy 0.85615 (0.85807)	Top-1 acc 65.234 (69.219)	Top-5 acc 85.547 (87.175)	lr 0.00427
Train [88][1470/3239]	Time 0.222 (0.628)	Data Time 0.001 (0.029)	Loss 2.3877 (2.2905)	Entropy 0.85608 (0.85806)	Top-1 acc 64.844 (69.215)	Top-5 acc 84.766 (87.176)	lr 0.00427
Train [88][1480/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.029)	Loss 2.1790 (2.2907)	Entropy 0.85608 (0.85804)	Top-1 acc 73.828 (69.211)	Top-5 acc 88.281 (87.172)	lr 0.00427
Train [88][1490/3239]	Time 0.218 (0.626)	Data Time 0.001 (0.029)	Loss 2.4127 (2.2906)	Entropy 0.85608 (0.85803)	Top-1 acc 65.625 (69.209)	Top-5 acc 85.547 (87.175)	lr 0.00427
Train [88][1500/3239]	Time 0.243 (0.625)	Data Time 0.001 (0.029)	Loss 2.2796 (2.2906)	Entropy 0.85608 (0.85802)	Top-1 acc 67.969 (69.208)	Top-5 acc 88.281 (87.173)	lr 0.00427
Train [88][1510/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.028)	Loss 2.3468 (2.2909)	Entropy 0.85598 (0.85801)	Top-1 acc 69.531 (69.202)	Top-5 acc 85.156 (87.162)	lr 0.00427
Train [88][1520/3239]	Time 0.221 (0.623)	Data Time 0.001 (0.028)	Loss 2.3329 (2.2912)	Entropy 0.85599 (0.85799)	Top-1 acc 64.453 (69.195)	Top-5 acc 85.156 (87.153)	lr 0.00427
Train [88][1530/3239]	Time 0.287 (0.655)	Data Time 0.002 (0.028)	Loss 2.1419 (2.2911)	Entropy 0.85601 (0.85798)	Top-1 acc 72.266 (69.194)	Top-5 acc 91.016 (87.155)	lr 0.00427
Train [88][1540/3239]	Time 0.234 (0.654)	Data Time 0.002 (0.028)	Loss 2.2714 (2.2912)	Entropy 0.85601 (0.85797)	Top-1 acc 69.531 (69.196)	Top-5 acc 85.547 (87.152)	lr 0.00426
Train [88][1550/3239]	Time 2.519 (0.653)	Data Time 0.001 (0.028)	Loss 2.2218 (2.2910)	Entropy 0.85601 (0.85795)	Top-1 acc 71.094 (69.202)	Top-5 acc 87.891 (87.152)	lr 0.00426
Train [88][1560/3239]	Time 0.261 (0.650)	Data Time 0.001 (0.028)	Loss 2.1811 (2.2910)	Entropy 0.85594 (0.85794)	Top-1 acc 72.266 (69.202)	Top-5 acc 90.625 (87.148)	lr 0.00426
Train [88][1570/3239]	Time 0.229 (0.649)	Data Time 0.001 (0.027)	Loss 2.1398 (2.2911)	Entropy 0.85595 (0.85793)	Top-1 acc 70.703 (69.191)	Top-5 acc 91.406 (87.150)	lr 0.00426
Train [88][1580/3239]	Time 0.243 (0.648)	Data Time 0.001 (0.027)	Loss 2.2177 (2.2913)	Entropy 0.85593 (0.85792)	Top-1 acc 69.141 (69.182)	Top-5 acc 88.672 (87.148)	lr 0.00426
Train [88][1590/3239]	Time 0.389 (0.647)	Data Time 0.001 (0.027)	Loss 2.2346 (2.2913)	Entropy 0.85592 (0.85790)	Top-1 acc 67.188 (69.179)	Top-5 acc 91.016 (87.148)	lr 0.00426
Train [88][1600/3239]	Time 0.225 (0.646)	Data Time 0.001 (0.027)	Loss 2.1777 (2.2914)	Entropy 0.85592 (0.85789)	Top-1 acc 73.828 (69.172)	Top-5 acc 86.719 (87.148)	lr 0.00426
Train [88][1610/3239]	Time 0.263 (0.645)	Data Time 0.002 (0.027)	Loss 2.2639 (2.2914)	Entropy 0.85589 (0.85788)	Top-1 acc 67.578 (69.174)	Top-5 acc 87.109 (87.147)	lr 0.00426
Train [88][1620/3239]	Time 0.236 (0.644)	Data Time 0.001 (0.027)	Loss 2.2009 (2.2919)	Entropy 0.85586 (0.85787)	Top-1 acc 70.703 (69.166)	Top-5 acc 89.844 (87.141)	lr 0.00426
Train [88][1630/3239]	Time 0.335 (0.643)	Data Time 0.001 (0.026)	Loss 2.4065 (2.2920)	Entropy 0.85582 (0.85785)	Top-1 acc 67.188 (69.167)	Top-5 acc 84.766 (87.141)	lr 0.00426
Train [88][1640/3239]	Time 0.235 (0.642)	Data Time 0.001 (0.026)	Loss 2.3831 (2.2924)	Entropy 0.85577 (0.85784)	Top-1 acc 66.797 (69.160)	Top-5 acc 86.719 (87.137)	lr 0.00426
Train [88][1650/3239]	Time 0.230 (0.641)	Data Time 0.001 (0.026)	Loss 2.3366 (2.2925)	Entropy 0.85566 (0.85783)	Top-1 acc 67.578 (69.156)	Top-5 acc 84.766 (87.130)	lr 0.00426
Train [88][1660/3239]	Time 2.550 (0.640)	Data Time 0.001 (0.026)	Loss 2.4662 (2.2924)	Entropy 0.85566 (0.85782)	Top-1 acc 62.891 (69.159)	Top-5 acc 85.938 (87.138)	lr 0.00426
Train [88][1670/3239]	Time 0.241 (0.638)	Data Time 0.001 (0.026)	Loss 2.3251 (2.2924)	Entropy 0.85562 (0.85780)	Top-1 acc 67.969 (69.159)	Top-5 acc 86.719 (87.139)	lr 0.00425
Train [88][1680/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.026)	Loss 2.2247 (2.2922)	Entropy 0.85558 (0.85779)	Top-1 acc 66.797 (69.166)	Top-5 acc 89.844 (87.146)	lr 0.00425
Train [88][1690/3239]	Time 0.212 (0.636)	Data Time 0.001 (0.026)	Loss 2.4583 (2.2926)	Entropy 0.85547 (0.85778)	Top-1 acc 64.844 (69.156)	Top-5 acc 85.156 (87.139)	lr 0.00425
Train [88][1700/3239]	Time 0.218 (0.635)	Data Time 0.001 (0.025)	Loss 2.3384 (2.2930)	Entropy 0.85549 (0.85776)	Top-1 acc 68.359 (69.146)	Top-5 acc 85.547 (87.134)	lr 0.00425
Train [88][1710/3239]	Time 0.267 (0.634)	Data Time 0.002 (0.025)	Loss 2.4359 (2.2932)	Entropy 0.85545 (0.85775)	Top-1 acc 63.281 (69.142)	Top-5 acc 84.766 (87.124)	lr 0.00425
Train [88][1720/3239]	Time 0.322 (0.633)	Data Time 0.001 (0.025)	Loss 2.2036 (2.2932)	Entropy 0.85539 (0.85773)	Top-1 acc 71.484 (69.143)	Top-5 acc 87.891 (87.123)	lr 0.00425
Train [88][1730/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.025)	Loss 2.3363 (2.2934)	Entropy 0.85532 (0.85772)	Top-1 acc 67.188 (69.143)	Top-5 acc 86.719 (87.119)	lr 0.00425
Train [88][1740/3239]	Time 0.237 (0.631)	Data Time 0.002 (0.025)	Loss 2.1501 (2.2934)	Entropy 0.85528 (0.85771)	Top-1 acc 71.875 (69.144)	Top-5 acc 87.109 (87.116)	lr 0.00425
Train [88][1750/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.025)	Loss 2.1561 (2.2932)	Entropy 0.85532 (0.85769)	Top-1 acc 72.656 (69.149)	Top-5 acc 89.453 (87.122)	lr 0.00425
Train [88][1760/3239]	Time 0.327 (0.629)	Data Time 0.001 (0.025)	Loss 2.3530 (2.2935)	Entropy 0.85527 (0.85768)	Top-1 acc 62.500 (69.140)	Top-5 acc 85.156 (87.118)	lr 0.00425
Train [88][1770/3239]	Time 2.560 (0.629)	Data Time 0.001 (0.025)	Loss 2.2264 (2.2933)	Entropy 0.85527 (0.85767)	Top-1 acc 69.141 (69.136)	Top-5 acc 88.281 (87.122)	lr 0.00425
Train [88][1780/3239]	Time 0.215 (0.626)	Data Time 0.001 (0.024)	Loss 2.2854 (2.2933)	Entropy 0.85541 (0.85765)	Top-1 acc 68.750 (69.139)	Top-5 acc 89.453 (87.120)	lr 0.00425
Train [88][1790/3239]	Time 0.220 (0.626)	Data Time 0.001 (0.024)	Loss 2.3850 (2.2932)	Entropy 0.85543 (0.85764)	Top-1 acc 66.016 (69.141)	Top-5 acc 86.719 (87.124)	lr 0.00425
Train [88][1800/3239]	Time 0.222 (0.625)	Data Time 0.001 (0.024)	Loss 2.2840 (2.2934)	Entropy 0.85545 (0.85763)	Top-1 acc 67.188 (69.135)	Top-5 acc 86.719 (87.118)	lr 0.00424
Train [88][1810/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.024)	Loss 2.3961 (2.2935)	Entropy 0.85547 (0.85762)	Top-1 acc 67.578 (69.133)	Top-5 acc 85.547 (87.115)	lr 0.00424
Train [88][1820/3239]	Time 0.224 (0.623)	Data Time 0.001 (0.024)	Loss 2.3054 (2.2935)	Entropy 0.85547 (0.85761)	Top-1 acc 69.531 (69.131)	Top-5 acc 85.938 (87.117)	lr 0.00424
Train [88][1830/3239]	Time 0.220 (0.622)	Data Time 0.001 (0.024)	Loss 2.3218 (2.2937)	Entropy 0.85546 (0.85759)	Top-1 acc 67.969 (69.123)	Top-5 acc 85.938 (87.111)	lr 0.00424
Train [88][1840/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.024)	Loss 2.2064 (2.2936)	Entropy 0.85545 (0.85758)	Top-1 acc 70.312 (69.126)	Top-5 acc 89.453 (87.113)	lr 0.00424
Train [88][1850/3239]	Time 0.220 (0.621)	Data Time 0.001 (0.024)	Loss 2.2207 (2.2936)	Entropy 0.85538 (0.85757)	Top-1 acc 73.047 (69.127)	Top-5 acc 87.500 (87.111)	lr 0.00424
Train [88][1860/3239]	Time 0.245 (0.620)	Data Time 0.001 (0.023)	Loss 2.4451 (2.2936)	Entropy 0.85534 (0.85756)	Top-1 acc 64.453 (69.127)	Top-5 acc 83.984 (87.110)	lr 0.00424
Train [88][1870/3239]	Time 0.255 (0.619)	Data Time 0.001 (0.023)	Loss 2.4088 (2.2937)	Entropy 0.85521 (0.85755)	Top-1 acc 66.406 (69.125)	Top-5 acc 82.812 (87.108)	lr 0.00424
Train [88][1880/3239]	Time 2.537 (0.619)	Data Time 0.001 (0.023)	Loss 2.3758 (2.2939)	Entropy 0.85521 (0.85753)	Top-1 acc 64.062 (69.123)	Top-5 acc 85.547 (87.104)	lr 0.00424
Train [88][1890/3239]	Time 0.344 (0.617)	Data Time 0.001 (0.023)	Loss 2.2656 (2.2939)	Entropy 0.85514 (0.85752)	Top-1 acc 69.922 (69.128)	Top-5 acc 87.500 (87.103)	lr 0.00424
Train [88][1900/3239]	Time 0.288 (0.642)	Data Time 0.003 (0.023)	Loss 2.2019 (2.2940)	Entropy 0.85505 (0.85751)	Top-1 acc 73.438 (69.124)	Top-5 acc 89.062 (87.103)	lr 0.00424
Train [88][1910/3239]	Time 0.261 (0.641)	Data Time 0.002 (0.023)	Loss 2.3348 (2.2939)	Entropy 0.85501 (0.85750)	Top-1 acc 66.797 (69.124)	Top-5 acc 85.938 (87.110)	lr 0.00424
Train [88][1920/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.023)	Loss 2.3248 (2.2938)	Entropy 0.85496 (0.85748)	Top-1 acc 68.750 (69.125)	Top-5 acc 86.328 (87.112)	lr 0.00424
Train [88][1930/3239]	Time 0.349 (0.640)	Data Time 0.001 (0.023)	Loss 2.2739 (2.2938)	Entropy 0.85490 (0.85747)	Top-1 acc 70.312 (69.128)	Top-5 acc 88.281 (87.113)	lr 0.00423
Train [88][1940/3239]	Time 0.218 (0.639)	Data Time 0.001 (0.023)	Loss 2.1767 (2.2940)	Entropy 0.85490 (0.85746)	Top-1 acc 72.266 (69.125)	Top-5 acc 87.891 (87.106)	lr 0.00423
Train [88][1950/3239]	Time 0.232 (0.638)	Data Time 0.001 (0.022)	Loss 2.3409 (2.2939)	Entropy 0.85488 (0.85744)	Top-1 acc 66.797 (69.125)	Top-5 acc 87.891 (87.109)	lr 0.00423
Train [88][1960/3239]	Time 0.234 (0.637)	Data Time 0.001 (0.022)	Loss 2.2828 (2.2941)	Entropy 0.85461 (0.85743)	Top-1 acc 69.922 (69.125)	Top-5 acc 87.891 (87.104)	lr 0.00423
Train [88][1970/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.022)	Loss 2.2759 (2.2940)	Entropy 0.85461 (0.85742)	Top-1 acc 70.312 (69.127)	Top-5 acc 88.281 (87.104)	lr 0.00423
Train [88][1980/3239]	Time 0.219 (0.636)	Data Time 0.001 (0.022)	Loss 2.4844 (2.2944)	Entropy 0.85464 (0.85740)	Top-1 acc 63.281 (69.120)	Top-5 acc 85.156 (87.101)	lr 0.00423
Train [88][1990/3239]	Time 2.492 (0.635)	Data Time 0.001 (0.022)	Loss 2.2990 (2.2944)	Entropy 0.85464 (0.85739)	Top-1 acc 67.969 (69.116)	Top-5 acc 88.672 (87.102)	lr 0.00423
Train [88][2000/3239]	Time 0.265 (0.633)	Data Time 0.002 (0.022)	Loss 2.4538 (2.2944)	Entropy 0.85462 (0.85737)	Top-1 acc 66.016 (69.114)	Top-5 acc 83.984 (87.100)	lr 0.00423
Train [88][2010/3239]	Time 0.235 (0.632)	Data Time 0.002 (0.022)	Loss 2.3462 (2.2942)	Entropy 0.85461 (0.85736)	Top-1 acc 66.406 (69.120)	Top-5 acc 85.938 (87.104)	lr 0.00423
Train [88][2020/3239]	Time 0.256 (0.632)	Data Time 0.002 (0.022)	Loss 2.2054 (2.2943)	Entropy 0.85460 (0.85735)	Top-1 acc 71.094 (69.117)	Top-5 acc 87.891 (87.098)	lr 0.00423
Train [88][2030/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.022)	Loss 2.1774 (2.2940)	Entropy 0.85459 (0.85733)	Top-1 acc 73.438 (69.125)	Top-5 acc 87.891 (87.106)	lr 0.00423
Train [88][2040/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.022)	Loss 2.2953 (2.2942)	Entropy 0.85450 (0.85732)	Top-1 acc 69.141 (69.116)	Top-5 acc 87.891 (87.106)	lr 0.00423
Train [88][2050/3239]	Time 0.173 (0.629)	Data Time 0.001 (0.021)	Loss 2.4724 (2.2941)	Entropy 0.85450 (0.85730)	Top-1 acc 64.844 (69.111)	Top-5 acc 81.641 (87.108)	lr 0.00423
Train [88][2060/3239]	Time 0.320 (0.628)	Data Time 0.001 (0.021)	Loss 2.2538 (2.2939)	Entropy 0.85440 (0.85729)	Top-1 acc 71.094 (69.117)	Top-5 acc 87.109 (87.115)	lr 0.00422
Train [88][2070/3239]	Time 0.238 (0.628)	Data Time 0.001 (0.021)	Loss 2.2037 (2.2937)	Entropy 0.85435 (0.85728)	Top-1 acc 73.828 (69.125)	Top-5 acc 87.109 (87.117)	lr 0.00422
Train [88][2080/3239]	Time 0.239 (0.627)	Data Time 0.001 (0.021)	Loss 2.3949 (2.2937)	Entropy 0.85434 (0.85726)	Top-1 acc 67.188 (69.124)	Top-5 acc 86.328 (87.117)	lr 0.00422
Train [88][2090/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.021)	Loss 2.2909 (2.2938)	Entropy 0.85437 (0.85725)	Top-1 acc 69.141 (69.116)	Top-5 acc 85.938 (87.116)	lr 0.00422
Train [88][2100/3239]	Time 2.673 (0.626)	Data Time 0.025 (0.021)	Loss 2.1687 (2.2940)	Entropy 0.85437 (0.85724)	Top-1 acc 72.656 (69.111)	Top-5 acc 91.016 (87.113)	lr 0.00422
Train [88][2110/3239]	Time 0.242 (0.624)	Data Time 0.002 (0.021)	Loss 2.2462 (2.2941)	Entropy 0.85442 (0.85722)	Top-1 acc 69.141 (69.112)	Top-5 acc 89.062 (87.111)	lr 0.00422
Train [88][2120/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.021)	Loss 2.2759 (2.2940)	Entropy 0.85438 (0.85721)	Top-1 acc 67.578 (69.111)	Top-5 acc 87.891 (87.110)	lr 0.00422
Train [88][2130/3239]	Time 0.241 (0.622)	Data Time 0.001 (0.021)	Loss 2.3446 (2.2940)	Entropy 0.85438 (0.85720)	Top-1 acc 69.531 (69.110)	Top-5 acc 87.500 (87.109)	lr 0.00422
Train [88][2140/3239]	Time 0.237 (0.622)	Data Time 0.001 (0.021)	Loss 2.3378 (2.2941)	Entropy 0.85437 (0.85718)	Top-1 acc 66.016 (69.105)	Top-5 acc 87.109 (87.107)	lr 0.00422
Train [88][2150/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.021)	Loss 2.3960 (2.2940)	Entropy 0.85429 (0.85717)	Top-1 acc 67.969 (69.109)	Top-5 acc 84.766 (87.108)	lr 0.00422
Train [88][2160/3239]	Time 0.218 (0.620)	Data Time 0.001 (0.020)	Loss 2.2339 (2.2938)	Entropy 0.85423 (0.85716)	Top-1 acc 71.875 (69.118)	Top-5 acc 89.844 (87.112)	lr 0.00422
Train [88][2170/3239]	Time 0.234 (0.620)	Data Time 0.001 (0.020)	Loss 2.3098 (2.2938)	Entropy 0.85422 (0.85714)	Top-1 acc 70.703 (69.117)	Top-5 acc 87.109 (87.111)	lr 0.00422
Train [88][2180/3239]	Time 0.237 (0.619)	Data Time 0.002 (0.020)	Loss 2.2089 (2.2936)	Entropy 0.85420 (0.85713)	Top-1 acc 71.875 (69.120)	Top-5 acc 89.453 (87.114)	lr 0.00422
Train [88][2190/3239]	Time 0.353 (0.618)	Data Time 0.001 (0.020)	Loss 2.2098 (2.2936)	Entropy 0.85419 (0.85712)	Top-1 acc 70.312 (69.119)	Top-5 acc 89.453 (87.115)	lr 0.00421
Train [88][2200/3239]	Time 0.230 (0.618)	Data Time 0.001 (0.020)	Loss 2.5629 (2.2939)	Entropy 0.85418 (0.85710)	Top-1 acc 62.500 (69.107)	Top-5 acc 81.641 (87.114)	lr 0.00421
Train [88][2210/3239]	Time 2.441 (0.617)	Data Time 0.001 (0.020)	Loss 2.1811 (2.2938)	Entropy 0.85418 (0.85709)	Top-1 acc 71.484 (69.108)	Top-5 acc 87.109 (87.114)	lr 0.00421
Train [88][2220/3239]	Time 0.229 (0.615)	Data Time 0.001 (0.020)	Loss 2.3811 (2.2939)	Entropy 0.85420 (0.85708)	Top-1 acc 64.844 (69.106)	Top-5 acc 85.156 (87.115)	lr 0.00421
Train [88][2230/3239]	Time 0.363 (0.615)	Data Time 0.001 (0.020)	Loss 2.2048 (2.2940)	Entropy 0.85418 (0.85706)	Top-1 acc 71.875 (69.103)	Top-5 acc 87.109 (87.111)	lr 0.00421
Train [88][2240/3239]	Time 0.223 (0.614)	Data Time 0.001 (0.020)	Loss 2.3334 (2.2943)	Entropy 0.85410 (0.85705)	Top-1 acc 66.406 (69.099)	Top-5 acc 86.328 (87.106)	lr 0.00421
Train [88][2250/3239]	Time 0.251 (0.614)	Data Time 0.001 (0.020)	Loss 2.1138 (2.2942)	Entropy 0.85410 (0.85704)	Top-1 acc 72.656 (69.101)	Top-5 acc 89.844 (87.107)	lr 0.00421
Train [88][2260/3239]	Time 0.227 (0.636)	Data Time 0.003 (0.020)	Loss 2.3436 (2.2941)	Entropy 0.85411 (0.85702)	Top-1 acc 68.750 (69.100)	Top-5 acc 87.500 (87.109)	lr 0.00421
Train [88][2270/3239]	Time 0.331 (0.635)	Data Time 0.002 (0.020)	Loss 2.2881 (2.2941)	Entropy 0.85406 (0.85701)	Top-1 acc 69.141 (69.102)	Top-5 acc 86.719 (87.108)	lr 0.00421
Train [88][2280/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.020)	Loss 2.2486 (2.2940)	Entropy 0.85401 (0.85700)	Top-1 acc 69.531 (69.107)	Top-5 acc 90.625 (87.112)	lr 0.00421
Train [88][2290/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.019)	Loss 2.4415 (2.2939)	Entropy 0.85397 (0.85698)	Top-1 acc 64.844 (69.108)	Top-5 acc 86.719 (87.115)	lr 0.00421
Train [88][2300/3239]	Time 0.226 (0.633)	Data Time 0.001 (0.019)	Loss 2.2202 (2.2939)	Entropy 0.85398 (0.85697)	Top-1 acc 75.391 (69.113)	Top-5 acc 88.672 (87.116)	lr 0.00421
Train [88][2310/3239]	Time 0.262 (0.633)	Data Time 0.001 (0.019)	Loss 2.3389 (2.2939)	Entropy 0.85400 (0.85696)	Top-1 acc 68.750 (69.108)	Top-5 acc 86.328 (87.119)	lr 0.00421
Train [88][2320/3239]	Time 2.551 (0.632)	Data Time 0.001 (0.019)	Loss 2.2930 (2.2939)	Entropy 0.85400 (0.85695)	Top-1 acc 67.578 (69.108)	Top-5 acc 87.109 (87.120)	lr 0.00421
Train [88][2330/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.019)	Loss 2.2208 (2.2939)	Entropy 0.85400 (0.85693)	Top-1 acc 69.531 (69.108)	Top-5 acc 85.547 (87.122)	lr 0.00420
Train [88][2340/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.019)	Loss 2.2071 (2.2938)	Entropy 0.85397 (0.85692)	Top-1 acc 68.359 (69.107)	Top-5 acc 89.453 (87.126)	lr 0.00420
Train [88][2350/3239]	Time 0.242 (0.629)	Data Time 0.001 (0.019)	Loss 2.4051 (2.2942)	Entropy 0.85397 (0.85691)	Top-1 acc 66.016 (69.099)	Top-5 acc 85.156 (87.119)	lr 0.00420
Train [88][2360/3239]	Time 0.343 (0.628)	Data Time 0.001 (0.019)	Loss 2.3125 (2.2942)	Entropy 0.85395 (0.85690)	Top-1 acc 67.578 (69.096)	Top-5 acc 86.719 (87.119)	lr 0.00420
Train [88][2370/3239]	Time 0.241 (0.628)	Data Time 0.001 (0.019)	Loss 2.2796 (2.2942)	Entropy 0.85391 (0.85688)	Top-1 acc 70.703 (69.098)	Top-5 acc 85.547 (87.122)	lr 0.00420
Train [88][2380/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.019)	Loss 2.1966 (2.2942)	Entropy 0.85389 (0.85687)	Top-1 acc 70.703 (69.098)	Top-5 acc 87.891 (87.119)	lr 0.00420
Train [88][2390/3239]	Time 0.230 (0.627)	Data Time 0.001 (0.019)	Loss 2.2094 (2.2942)	Entropy 0.85388 (0.85686)	Top-1 acc 72.266 (69.098)	Top-5 acc 86.719 (87.119)	lr 0.00420
Train [88][2400/3239]	Time 0.331 (0.626)	Data Time 0.001 (0.019)	Loss 2.1505 (2.2942)	Entropy 0.85383 (0.85685)	Top-1 acc 71.875 (69.097)	Top-5 acc 91.016 (87.122)	lr 0.00420
Train [88][2410/3239]	Time 0.248 (0.625)	Data Time 0.001 (0.019)	Loss 2.2281 (2.2942)	Entropy 0.85381 (0.85683)	Top-1 acc 69.922 (69.096)	Top-5 acc 90.234 (87.124)	lr 0.00420
Train [88][2420/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.018)	Loss 2.3208 (2.2944)	Entropy 0.85376 (0.85682)	Top-1 acc 68.359 (69.094)	Top-5 acc 86.328 (87.122)	lr 0.00420
Train [88][2430/3239]	Time 2.542 (0.624)	Data Time 0.001 (0.018)	Loss 2.3301 (2.2945)	Entropy 0.85376 (0.85681)	Top-1 acc 68.750 (69.091)	Top-5 acc 88.672 (87.118)	lr 0.00420
Train [88][2440/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.018)	Loss 2.3608 (2.2948)	Entropy 0.85374 (0.85680)	Top-1 acc 69.141 (69.087)	Top-5 acc 84.375 (87.115)	lr 0.00420
Train [88][2450/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.018)	Loss 2.3361 (2.2950)	Entropy 0.85368 (0.85678)	Top-1 acc 67.578 (69.083)	Top-5 acc 85.547 (87.113)	lr 0.00420
Train [88][2460/3239]	Time 0.225 (0.622)	Data Time 0.003 (0.018)	Loss 2.2870 (2.2951)	Entropy 0.85363 (0.85677)	Top-1 acc 67.578 (69.076)	Top-5 acc 84.766 (87.111)	lr 0.00419
Train [88][2470/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.018)	Loss 2.2059 (2.2952)	Entropy 0.85359 (0.85676)	Top-1 acc 70.312 (69.073)	Top-5 acc 88.281 (87.112)	lr 0.00419
Train [88][2480/3239]	Time 0.229 (0.620)	Data Time 0.001 (0.018)	Loss 2.3379 (2.2953)	Entropy 0.85363 (0.85674)	Top-1 acc 68.359 (69.072)	Top-5 acc 86.328 (87.112)	lr 0.00419
Train [88][2490/3239]	Time 0.323 (0.620)	Data Time 0.001 (0.018)	Loss 2.2919 (2.2953)	Entropy 0.85359 (0.85673)	Top-1 acc 67.969 (69.069)	Top-5 acc 86.328 (87.109)	lr 0.00419
Train [88][2500/3239]	Time 0.230 (0.619)	Data Time 0.001 (0.018)	Loss 2.0610 (2.2952)	Entropy 0.85347 (0.85672)	Top-1 acc 76.172 (69.073)	Top-5 acc 91.016 (87.111)	lr 0.00419
Train [88][2510/3239]	Time 0.228 (0.619)	Data Time 0.001 (0.018)	Loss 2.2725 (2.2953)	Entropy 0.85345 (0.85671)	Top-1 acc 69.922 (69.073)	Top-5 acc 88.672 (87.111)	lr 0.00419
Train [88][2520/3239]	Time 0.233 (0.618)	Data Time 0.001 (0.018)	Loss 2.3026 (2.2951)	Entropy 0.85342 (0.85669)	Top-1 acc 68.359 (69.076)	Top-5 acc 86.328 (87.114)	lr 0.00419
Train [88][2530/3239]	Time 0.274 (0.618)	Data Time 0.002 (0.018)	Loss 2.3845 (2.2951)	Entropy 0.85334 (0.85668)	Top-1 acc 67.969 (69.075)	Top-5 acc 85.938 (87.116)	lr 0.00419
Train [88][2540/3239]	Time 2.536 (0.617)	Data Time 0.001 (0.018)	Loss 2.1913 (2.2951)	Entropy 0.85334 (0.85667)	Top-1 acc 72.266 (69.071)	Top-5 acc 86.719 (87.117)	lr 0.00419
Train [88][2550/3239]	Time 0.223 (0.616)	Data Time 0.001 (0.018)	Loss 2.3459 (2.2950)	Entropy 0.85330 (0.85665)	Top-1 acc 66.797 (69.074)	Top-5 acc 85.547 (87.118)	lr 0.00419
Train [88][2560/3239]	Time 0.262 (0.615)	Data Time 0.001 (0.018)	Loss 2.3275 (2.2948)	Entropy 0.85333 (0.85664)	Top-1 acc 69.531 (69.077)	Top-5 acc 87.891 (87.125)	lr 0.00419
Train [88][2570/3239]	Time 0.242 (0.615)	Data Time 0.001 (0.017)	Loss 2.4248 (2.2951)	Entropy 0.85329 (0.85663)	Top-1 acc 66.797 (69.068)	Top-5 acc 84.375 (87.121)	lr 0.00419
Train [88][2580/3239]	Time 0.228 (0.614)	Data Time 0.001 (0.017)	Loss 2.3176 (2.2951)	Entropy 0.85321 (0.85661)	Top-1 acc 68.750 (69.069)	Top-5 acc 89.453 (87.123)	lr 0.00419
Train [88][2590/3239]	Time 0.212 (0.613)	Data Time 0.001 (0.017)	Loss 2.3805 (2.2952)	Entropy 0.85321 (0.85660)	Top-1 acc 63.281 (69.064)	Top-5 acc 84.375 (87.119)	lr 0.00418
Train [88][2600/3239]	Time 0.218 (0.613)	Data Time 0.001 (0.017)	Loss 2.3055 (2.2951)	Entropy 0.85324 (0.85659)	Top-1 acc 65.625 (69.064)	Top-5 acc 85.938 (87.118)	lr 0.00418
Train [88][2610/3239]	Time 0.225 (0.612)	Data Time 0.001 (0.017)	Loss 2.3480 (2.2952)	Entropy 0.85324 (0.85658)	Top-1 acc 68.750 (69.063)	Top-5 acc 85.156 (87.114)	lr 0.00418
Train [88][2620/3239]	Time 0.415 (0.631)	Data Time 0.002 (0.017)	Loss 2.2271 (2.2951)	Entropy 0.85321 (0.85656)	Top-1 acc 69.531 (69.065)	Top-5 acc 87.500 (87.116)	lr 0.00418
Train [88][2630/3239]	Time 0.230 (0.631)	Data Time 0.002 (0.017)	Loss 2.3043 (2.2951)	Entropy 0.85320 (0.85655)	Top-1 acc 69.922 (69.067)	Top-5 acc 88.281 (87.116)	lr 0.00418
Train [88][2640/3239]	Time 0.290 (0.630)	Data Time 0.002 (0.017)	Loss 2.2964 (2.2953)	Entropy 0.85319 (0.85654)	Top-1 acc 68.750 (69.060)	Top-5 acc 87.891 (87.110)	lr 0.00418
Train [88][2650/3239]	Time 0.279 (0.630)	Data Time 0.001 (0.017)	Loss 2.1731 (2.2953)	Entropy 0.85315 (0.85652)	Top-1 acc 73.047 (69.063)	Top-5 acc 89.844 (87.107)	lr 0.00418
Train [88][2660/3239]	Time 0.361 (0.629)	Data Time 0.002 (0.017)	Loss 2.3183 (2.2953)	Entropy 0.85319 (0.85651)	Top-1 acc 68.750 (69.065)	Top-5 acc 85.938 (87.108)	lr 0.00418
Train [88][2670/3239]	Time 0.245 (0.628)	Data Time 0.001 (0.017)	Loss 2.2873 (2.2954)	Entropy 0.85311 (0.85650)	Top-1 acc 67.969 (69.063)	Top-5 acc 86.719 (87.105)	lr 0.00418
Train [88][2680/3239]	Time 0.258 (0.628)	Data Time 0.002 (0.017)	Loss 2.4761 (2.2954)	Entropy 0.85306 (0.85649)	Top-1 acc 64.453 (69.066)	Top-5 acc 85.547 (87.103)	lr 0.00418
Train [88][2690/3239]	Time 0.245 (0.627)	Data Time 0.001 (0.017)	Loss 2.2216 (2.2955)	Entropy 0.85312 (0.85647)	Top-1 acc 74.219 (69.061)	Top-5 acc 88.672 (87.104)	lr 0.00418
Train [88][2700/3239]	Time 0.326 (0.627)	Data Time 0.001 (0.017)	Loss 2.4834 (2.2957)	Entropy 0.85309 (0.85646)	Top-1 acc 67.188 (69.057)	Top-5 acc 83.594 (87.100)	lr 0.00418
Train [88][2710/3239]	Time 0.245 (0.626)	Data Time 0.002 (0.017)	Loss 2.3381 (2.2958)	Entropy 0.85306 (0.85645)	Top-1 acc 69.922 (69.051)	Top-5 acc 83.984 (87.095)	lr 0.00418
Train [88][2720/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.017)	Loss 2.2880 (2.2958)	Entropy 0.85309 (0.85644)	Top-1 acc 68.359 (69.051)	Top-5 acc 87.500 (87.095)	lr 0.00417
Train [88][2730/3239]	Time 0.261 (0.625)	Data Time 0.001 (0.017)	Loss 2.4406 (2.2958)	Entropy 0.85311 (0.85642)	Top-1 acc 67.188 (69.050)	Top-5 acc 84.766 (87.096)	lr 0.00417
Train [88][2740/3239]	Time 0.233 (0.625)	Data Time 0.002 (0.017)	Loss 2.2300 (2.2959)	Entropy 0.85308 (0.85641)	Top-1 acc 68.750 (69.046)	Top-5 acc 86.719 (87.092)	lr 0.00417
Train [88][2750/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.016)	Loss 2.2496 (2.2960)	Entropy 0.85304 (0.85640)	Top-1 acc 71.484 (69.046)	Top-5 acc 86.719 (87.089)	lr 0.00417
Train [88][2760/3239]	Time 0.208 (0.624)	Data Time 0.001 (0.016)	Loss 2.2936 (2.2960)	Entropy 0.85305 (0.85639)	Top-1 acc 72.266 (69.051)	Top-5 acc 88.281 (87.090)	lr 0.00417
Train [88][2770/3239]	Time 0.262 (0.623)	Data Time 0.001 (0.016)	Loss 2.2372 (2.2961)	Entropy 0.85299 (0.85638)	Top-1 acc 71.875 (69.050)	Top-5 acc 86.328 (87.085)	lr 0.00417
Train [88][2780/3239]	Time 0.254 (0.623)	Data Time 0.001 (0.016)	Loss 2.2347 (2.2961)	Entropy 0.85291 (0.85636)	Top-1 acc 70.312 (69.049)	Top-5 acc 88.672 (87.085)	lr 0.00417
Train [88][2790/3239]	Time 0.278 (0.622)	Data Time 0.001 (0.016)	Loss 2.2113 (2.2960)	Entropy 0.85290 (0.85635)	Top-1 acc 71.094 (69.051)	Top-5 acc 90.234 (87.089)	lr 0.00417
Train [88][2800/3239]	Time 0.230 (0.622)	Data Time 0.002 (0.016)	Loss 2.3871 (2.2961)	Entropy 0.85293 (0.85634)	Top-1 acc 66.016 (69.048)	Top-5 acc 87.500 (87.088)	lr 0.00417
Train [88][2810/3239]	Time 0.263 (0.621)	Data Time 0.001 (0.016)	Loss 2.3130 (2.2962)	Entropy 0.85294 (0.85633)	Top-1 acc 70.312 (69.043)	Top-5 acc 86.719 (87.085)	lr 0.00417
Train [88][2820/3239]	Time 0.276 (0.621)	Data Time 0.002 (0.016)	Loss 2.3694 (2.2962)	Entropy 0.85300 (0.85632)	Top-1 acc 64.453 (69.042)	Top-5 acc 86.719 (87.085)	lr 0.00417
Train [88][2830/3239]	Time 0.253 (0.620)	Data Time 0.001 (0.016)	Loss 2.3168 (2.2961)	Entropy 0.85296 (0.85630)	Top-1 acc 69.141 (69.044)	Top-5 acc 86.328 (87.086)	lr 0.00417
Train [88][2840/3239]	Time 0.222 (0.619)	Data Time 0.001 (0.016)	Loss 2.3826 (2.2960)	Entropy 0.85288 (0.85629)	Top-1 acc 66.797 (69.048)	Top-5 acc 85.547 (87.090)	lr 0.00417
Train [88][2850/3239]	Time 0.269 (0.619)	Data Time 0.001 (0.016)	Loss 2.5340 (2.2963)	Entropy 0.85266 (0.85628)	Top-1 acc 64.062 (69.040)	Top-5 acc 83.203 (87.084)	lr 0.00417
Train [88][2860/3239]	Time 0.233 (0.618)	Data Time 0.001 (0.016)	Loss 2.4197 (2.2962)	Entropy 0.85262 (0.85627)	Top-1 acc 64.062 (69.041)	Top-5 acc 85.547 (87.086)	lr 0.00416
Train [88][2870/3239]	Time 0.337 (0.618)	Data Time 0.001 (0.016)	Loss 2.1410 (2.2962)	Entropy 0.85243 (0.85625)	Top-1 acc 72.656 (69.043)	Top-5 acc 87.109 (87.086)	lr 0.00416
Train [88][2880/3239]	Time 0.266 (0.617)	Data Time 0.001 (0.016)	Loss 2.3153 (2.2963)	Entropy 0.85222 (0.85624)	Top-1 acc 70.312 (69.041)	Top-5 acc 87.891 (87.085)	lr 0.00416
Train [88][2890/3239]	Time 0.247 (0.617)	Data Time 0.001 (0.016)	Loss 2.4120 (2.2964)	Entropy 0.85223 (0.85623)	Top-1 acc 66.016 (69.039)	Top-5 acc 86.328 (87.084)	lr 0.00416
Train [88][2900/3239]	Time 0.232 (0.616)	Data Time 0.001 (0.016)	Loss 2.2531 (2.2963)	Entropy 0.85223 (0.85621)	Top-1 acc 68.750 (69.038)	Top-5 acc 87.891 (87.085)	lr 0.00416
Train [88][2910/3239]	Time 0.249 (0.616)	Data Time 0.001 (0.016)	Loss 2.3551 (2.2964)	Entropy 0.85211 (0.85620)	Top-1 acc 66.797 (69.034)	Top-5 acc 84.766 (87.085)	lr 0.00416
Train [88][2920/3239]	Time 0.233 (0.615)	Data Time 0.001 (0.016)	Loss 2.1948 (2.2963)	Entropy 0.85209 (0.85619)	Top-1 acc 73.438 (69.035)	Top-5 acc 89.453 (87.089)	lr 0.00416
Train [88][2930/3239]	Time 0.249 (0.615)	Data Time 0.001 (0.016)	Loss 2.2350 (2.2962)	Entropy 0.85200 (0.85617)	Top-1 acc 72.656 (69.037)	Top-5 acc 86.719 (87.093)	lr 0.00416
Train [88][2940/3239]	Time 0.298 (0.614)	Data Time 0.001 (0.015)	Loss 2.2883 (2.2964)	Entropy 0.85198 (0.85616)	Top-1 acc 69.141 (69.033)	Top-5 acc 88.281 (87.090)	lr 0.00416
Train [88][2950/3239]	Time 0.263 (0.630)	Data Time 0.004 (0.015)	Loss 2.4142 (2.2964)	Entropy 0.85195 (0.85614)	Top-1 acc 67.188 (69.032)	Top-5 acc 84.766 (87.090)	lr 0.00416
Train [88][2960/3239]	Time 0.227 (0.630)	Data Time 0.002 (0.015)	Loss 2.2033 (2.2964)	Entropy 0.85196 (0.85613)	Top-1 acc 72.656 (69.035)	Top-5 acc 89.453 (87.093)	lr 0.00416
Train [88][2970/3239]	Time 0.265 (0.630)	Data Time 0.005 (0.015)	Loss 2.1395 (2.2963)	Entropy 0.85192 (0.85611)	Top-1 acc 75.391 (69.037)	Top-5 acc 87.891 (87.093)	lr 0.00416
Train [88][2980/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.015)	Loss 2.1696 (2.2962)	Entropy 0.85193 (0.85610)	Top-1 acc 75.781 (69.040)	Top-5 acc 87.891 (87.093)	lr 0.00416
Train [88][2990/3239]	Time 0.260 (0.629)	Data Time 0.001 (0.015)	Loss 2.4464 (2.2964)	Entropy 0.85194 (0.85609)	Top-1 acc 66.797 (69.035)	Top-5 acc 87.500 (87.087)	lr 0.00415
Train [88][3000/3239]	Time 0.230 (0.628)	Data Time 0.004 (0.015)	Loss 2.2831 (2.2963)	Entropy 0.85187 (0.85607)	Top-1 acc 70.312 (69.038)	Top-5 acc 86.719 (87.089)	lr 0.00415
Train [88][3010/3239]	Time 0.258 (0.628)	Data Time 0.001 (0.015)	Loss 2.1922 (2.2964)	Entropy 0.85184 (0.85606)	Top-1 acc 73.438 (69.036)	Top-5 acc 87.109 (87.089)	lr 0.00415
Train [88][3020/3239]	Time 0.265 (0.627)	Data Time 0.001 (0.015)	Loss 2.3518 (2.2964)	Entropy 0.85177 (0.85604)	Top-1 acc 67.188 (69.035)	Top-5 acc 87.109 (87.090)	lr 0.00415
Train [88][3030/3239]	Time 0.262 (0.627)	Data Time 0.001 (0.015)	Loss 2.4276 (2.2964)	Entropy 0.85172 (0.85603)	Top-1 acc 64.453 (69.033)	Top-5 acc 85.156 (87.088)	lr 0.00415
Train [88][3040/3239]	Time 0.378 (0.626)	Data Time 0.002 (0.015)	Loss 2.3817 (2.2966)	Entropy 0.85167 (0.85602)	Top-1 acc 68.750 (69.029)	Top-5 acc 87.500 (87.088)	lr 0.00415
Train [88][3050/3239]	Time 0.262 (0.626)	Data Time 0.001 (0.015)	Loss 2.2027 (2.2966)	Entropy 0.85158 (0.85600)	Top-1 acc 70.703 (69.026)	Top-5 acc 87.109 (87.087)	lr 0.00415
Train [88][3060/3239]	Time 0.214 (0.625)	Data Time 0.001 (0.015)	Loss 2.3967 (2.2966)	Entropy 0.85146 (0.85599)	Top-1 acc 69.141 (69.026)	Top-5 acc 85.156 (87.085)	lr 0.00415
Train [88][3070/3239]	Time 0.267 (0.625)	Data Time 0.001 (0.015)	Loss 2.4305 (2.2968)	Entropy 0.85140 (0.85597)	Top-1 acc 63.672 (69.021)	Top-5 acc 83.594 (87.080)	lr 0.00415
Train [88][3080/3239]	Time 0.359 (0.625)	Data Time 0.001 (0.015)	Loss 2.3671 (2.2969)	Entropy 0.85140 (0.85596)	Top-1 acc 67.969 (69.018)	Top-5 acc 86.719 (87.079)	lr 0.00415
Train [88][3090/3239]	Time 0.256 (0.624)	Data Time 0.001 (0.015)	Loss 2.3287 (2.2969)	Entropy 0.85141 (0.85594)	Top-1 acc 67.188 (69.016)	Top-5 acc 87.500 (87.078)	lr 0.00415
Train [88][3100/3239]	Time 0.240 (0.624)	Data Time 0.001 (0.015)	Loss 2.3608 (2.2970)	Entropy 0.85145 (0.85593)	Top-1 acc 69.922 (69.016)	Top-5 acc 85.156 (87.079)	lr 0.00415
Train [88][3110/3239]	Time 0.253 (0.623)	Data Time 0.001 (0.015)	Loss 2.1738 (2.2971)	Entropy 0.85148 (0.85591)	Top-1 acc 75.000 (69.013)	Top-5 acc 89.062 (87.078)	lr 0.00415
Train [88][3120/3239]	Time 0.358 (0.623)	Data Time 0.001 (0.015)	Loss 2.3537 (2.2971)	Entropy 0.85150 (0.85590)	Top-1 acc 66.406 (69.010)	Top-5 acc 85.547 (87.078)	lr 0.00414
Train [88][3130/3239]	Time 0.235 (0.622)	Data Time 0.001 (0.015)	Loss 2.2446 (2.2971)	Entropy 0.85153 (0.85589)	Top-1 acc 70.703 (69.011)	Top-5 acc 87.891 (87.077)	lr 0.00414
Train [88][3140/3239]	Time 0.209 (0.622)	Data Time 0.001 (0.015)	Loss 2.2833 (2.2971)	Entropy 0.85154 (0.85587)	Top-1 acc 67.969 (69.012)	Top-5 acc 86.719 (87.077)	lr 0.00414
Train [88][3150/3239]	Time 0.271 (0.621)	Data Time 0.003 (0.015)	Loss 2.3891 (2.2973)	Entropy 0.85148 (0.85586)	Top-1 acc 67.578 (69.010)	Top-5 acc 83.594 (87.072)	lr 0.00414
Train [88][3160/3239]	Time 0.261 (0.621)	Data Time 0.001 (0.015)	Loss 2.2933 (2.2973)	Entropy 0.85148 (0.85584)	Top-1 acc 67.188 (69.010)	Top-5 acc 86.719 (87.072)	lr 0.00414
Train [88][3170/3239]	Time 0.225 (0.620)	Data Time 0.001 (0.014)	Loss 2.1645 (2.2973)	Entropy 0.85137 (0.85583)	Top-1 acc 72.656 (69.013)	Top-5 acc 90.234 (87.071)	lr 0.00414
Train [88][3180/3239]	Time 0.226 (0.620)	Data Time 0.000 (0.014)	Loss 2.1529 (2.2972)	Entropy 0.85137 (0.85582)	Top-1 acc 74.609 (69.014)	Top-5 acc 91.016 (87.074)	lr 0.00414
Train [88][3190/3239]	Time 0.216 (0.619)	Data Time 0.000 (0.014)	Loss 2.2953 (2.2972)	Entropy 0.85138 (0.85580)	Top-1 acc 69.531 (69.015)	Top-5 acc 87.109 (87.073)	lr 0.00414
Train [88][3200/3239]	Time 0.225 (0.619)	Data Time 0.000 (0.014)	Loss 2.2677 (2.2972)	Entropy 0.85123 (0.85579)	Top-1 acc 68.750 (69.016)	Top-5 acc 86.719 (87.071)	lr 0.00414
Train [88][3210/3239]	Time 0.332 (0.618)	Data Time 0.000 (0.014)	Loss 2.2762 (2.2971)	Entropy 0.85124 (0.85577)	Top-1 acc 72.656 (69.018)	Top-5 acc 87.109 (87.071)	lr 0.00414
Train [88][3220/3239]	Time 0.220 (0.618)	Data Time 0.000 (0.014)	Loss 2.3026 (2.2972)	Entropy 0.85114 (0.85576)	Top-1 acc 71.484 (69.016)	Top-5 acc 89.062 (87.069)	lr 0.00414
Train [88][3230/3239]	Time 0.222 (0.617)	Data Time 0.000 (0.014)	Loss 2.2515 (2.2974)	Entropy 0.85112 (0.85575)	Top-1 acc 71.875 (69.015)	Top-5 acc 88.281 (87.068)	lr 0.00414
Train [88][3239/3239]	Time 2.337 (0.617)	Data Time 0.000 (0.014)	Loss 2.6588 (2.2975)	Entropy 0.85112 (0.85573)	Top-1 acc 58.025 (69.012)	Top-5 acc 81.481 (87.066)	lr 0.00414
==========Valid [88/120]	loss 1.260	top-1 acc 71.098 (71.098)	top-5 acc 89.136	Train top-1 69.012	top-5 87.066	Entropy 0.85112	Latency-None: 0.000ms	Flops: 546.53M
Train [89][0/3239]	Time 42.931 (42.931)	Data Time 40.128 (40.128)	Loss 2.2127 (2.2127)	Entropy 0.85106 (0.85106)	Top-1 acc 73.438 (73.438)	Top-5 acc 88.672 (88.672)	lr 0.00414
Train [89][10/3239]	Time 2.774 (4.437)	Data Time 0.003 (3.653)	Loss 2.3606 (2.3140)	Entropy 0.85106 (0.85106)	Top-1 acc 68.750 (69.105)	Top-5 acc 85.938 (86.293)	lr 0.00414
Train [89][20/3239]	Time 0.242 (2.454)	Data Time 0.001 (1.914)	Loss 2.3252 (2.3307)	Entropy 0.85111 (0.85109)	Top-1 acc 67.969 (68.248)	Top-5 acc 87.109 (86.217)	lr 0.00413
Train [89][30/3239]	Time 0.242 (1.822)	Data Time 0.001 (1.297)	Loss 2.2831 (2.3071)	Entropy 0.85104 (0.85107)	Top-1 acc 68.750 (68.574)	Top-5 acc 87.109 (86.706)	lr 0.00413
Train [89][40/3239]	Time 0.219 (1.499)	Data Time 0.001 (0.981)	Loss 2.3025 (2.2981)	Entropy 0.85096 (0.85105)	Top-1 acc 69.531 (68.760)	Top-5 acc 85.938 (86.909)	lr 0.00413
Train [89][50/3239]	Time 0.269 (2.273)	Data Time 0.003 (0.789)	Loss 2.2556 (2.2967)	Entropy 0.85095 (0.85103)	Top-1 acc 69.531 (68.796)	Top-5 acc 88.281 (87.017)	lr 0.00413
Train [89][60/3239]	Time 0.266 (2.003)	Data Time 0.002 (0.660)	Loss 2.3363 (2.2955)	Entropy 0.85078 (0.85100)	Top-1 acc 66.406 (68.635)	Top-5 acc 85.938 (87.097)	lr 0.00413
Train [89][70/3239]	Time 0.247 (1.793)	Data Time 0.002 (0.568)	Loss 2.2771 (2.2989)	Entropy 0.85074 (0.85097)	Top-1 acc 71.484 (68.645)	Top-5 acc 85.938 (87.016)	lr 0.00413
Train [89][80/3239]	Time 0.246 (1.633)	Data Time 0.002 (0.498)	Loss 2.2946 (2.2928)	Entropy 0.85067 (0.85094)	Top-1 acc 69.141 (68.880)	Top-5 acc 87.109 (87.109)	lr 0.00413
Train [89][90/3239]	Time 0.227 (1.507)	Data Time 0.001 (0.443)	Loss 2.2035 (2.2922)	Entropy 0.85073 (0.85091)	Top-1 acc 71.094 (68.814)	Top-5 acc 89.062 (87.096)	lr 0.00413
Train [89][100/3239]	Time 0.328 (1.407)	Data Time 0.001 (0.400)	Loss 2.2506 (2.2914)	Entropy 0.85065 (0.85089)	Top-1 acc 70.703 (68.796)	Top-5 acc 87.891 (87.109)	lr 0.00413
Train [89][110/3239]	Time 0.237 (1.323)	Data Time 0.001 (0.364)	Loss 2.1386 (2.2930)	Entropy 0.85053 (0.85087)	Top-1 acc 73.828 (68.824)	Top-5 acc 89.453 (87.057)	lr 0.00413
Train [89][120/3239]	Time 2.572 (1.254)	Data Time 0.001 (0.334)	Loss 2.3038 (2.2900)	Entropy 0.85053 (0.85084)	Top-1 acc 71.875 (68.957)	Top-5 acc 86.719 (87.077)	lr 0.00413
Train [89][130/3239]	Time 0.233 (1.177)	Data Time 0.001 (0.308)	Loss 2.2199 (2.2913)	Entropy 0.85047 (0.85081)	Top-1 acc 68.750 (68.908)	Top-5 acc 86.719 (87.005)	lr 0.00413
Train [89][140/3239]	Time 0.241 (1.128)	Data Time 0.002 (0.287)	Loss 2.2430 (2.2931)	Entropy 0.85041 (0.85078)	Top-1 acc 69.531 (68.922)	Top-5 acc 88.672 (86.957)	lr 0.00413
Train [89][150/3239]	Time 0.249 (1.087)	Data Time 0.002 (0.268)	Loss 2.3865 (2.2930)	Entropy 0.85037 (0.85076)	Top-1 acc 66.797 (68.936)	Top-5 acc 83.594 (86.967)	lr 0.00412
Train [89][160/3239]	Time 0.267 (1.050)	Data Time 0.001 (0.251)	Loss 2.3756 (2.2907)	Entropy 0.85040 (0.85073)	Top-1 acc 68.750 (68.990)	Top-5 acc 85.938 (87.022)	lr 0.00412
Train [89][170/3239]	Time 0.255 (1.016)	Data Time 0.001 (0.237)	Loss 2.4149 (2.2905)	Entropy 0.85031 (0.85071)	Top-1 acc 64.062 (69.026)	Top-5 acc 85.156 (87.043)	lr 0.00412
Train [89][180/3239]	Time 0.223 (0.986)	Data Time 0.001 (0.224)	Loss 2.1733 (2.2891)	Entropy 0.85027 (0.85069)	Top-1 acc 69.141 (69.080)	Top-5 acc 89.453 (87.077)	lr 0.00412
Train [89][190/3239]	Time 0.212 (0.960)	Data Time 0.001 (0.212)	Loss 2.1928 (2.2874)	Entropy 0.85026 (0.85066)	Top-1 acc 72.656 (69.130)	Top-5 acc 87.891 (87.122)	lr 0.00412
Train [89][200/3239]	Time 0.224 (0.937)	Data Time 0.001 (0.202)	Loss 2.1850 (2.2872)	Entropy 0.85017 (0.85064)	Top-1 acc 70.703 (69.145)	Top-5 acc 89.062 (87.135)	lr 0.00412
Train [89][210/3239]	Time 0.241 (0.915)	Data Time 0.001 (0.192)	Loss 2.2861 (2.2858)	Entropy 0.85024 (0.85062)	Top-1 acc 69.922 (69.220)	Top-5 acc 87.891 (87.185)	lr 0.00412
Train [89][220/3239]	Time 0.230 (0.895)	Data Time 0.001 (0.183)	Loss 2.1284 (2.2842)	Entropy 0.85016 (0.85060)	Top-1 acc 73.828 (69.236)	Top-5 acc 89.453 (87.230)	lr 0.00412
Train [89][230/3239]	Time 2.822 (0.879)	Data Time 0.002 (0.176)	Loss 2.3223 (2.2828)	Entropy 0.85016 (0.85058)	Top-1 acc 68.750 (69.262)	Top-5 acc 87.109 (87.309)	lr 0.00412
Train [89][240/3239]	Time 0.256 (0.852)	Data Time 0.001 (0.168)	Loss 2.3811 (2.2831)	Entropy 0.85010 (0.85056)	Top-1 acc 66.406 (69.290)	Top-5 acc 86.328 (87.280)	lr 0.00412
Train [89][250/3239]	Time 0.228 (0.837)	Data Time 0.001 (0.162)	Loss 2.2976 (2.2840)	Entropy 0.85006 (0.85054)	Top-1 acc 69.141 (69.293)	Top-5 acc 86.719 (87.268)	lr 0.00412
Train [89][260/3239]	Time 0.228 (0.823)	Data Time 0.001 (0.156)	Loss 2.3506 (2.2837)	Entropy 0.85005 (0.85053)	Top-1 acc 67.969 (69.316)	Top-5 acc 87.109 (87.262)	lr 0.00412
Train [89][270/3239]	Time 0.322 (0.811)	Data Time 0.002 (0.150)	Loss 2.3523 (2.2834)	Entropy 0.85005 (0.85051)	Top-1 acc 67.188 (69.341)	Top-5 acc 89.453 (87.294)	lr 0.00412
Train [89][280/3239]	Time 0.242 (0.799)	Data Time 0.015 (0.145)	Loss 2.2010 (2.2820)	Entropy 0.85005 (0.85049)	Top-1 acc 69.922 (69.369)	Top-5 acc 89.453 (87.318)	lr 0.00411
Train [89][290/3239]	Time 0.233 (0.788)	Data Time 0.001 (0.140)	Loss 2.2500 (2.2821)	Entropy 0.85004 (0.85048)	Top-1 acc 69.141 (69.362)	Top-5 acc 88.281 (87.335)	lr 0.00411
Train [89][300/3239]	Time 0.241 (0.778)	Data Time 0.002 (0.135)	Loss 2.3672 (2.2823)	Entropy 0.84997 (0.85046)	Top-1 acc 68.750 (69.351)	Top-5 acc 84.766 (87.339)	lr 0.00411
Train [89][310/3239]	Time 0.293 (0.768)	Data Time 0.001 (0.131)	Loss 2.2563 (2.2830)	Entropy 0.84992 (0.85044)	Top-1 acc 70.703 (69.344)	Top-5 acc 87.109 (87.333)	lr 0.00411
Train [89][320/3239]	Time 0.230 (0.759)	Data Time 0.001 (0.127)	Loss 2.2843 (2.2825)	Entropy 0.84992 (0.85043)	Top-1 acc 68.750 (69.329)	Top-5 acc 88.672 (87.349)	lr 0.00411
Train [89][330/3239]	Time 0.236 (0.751)	Data Time 0.001 (0.123)	Loss 2.2487 (2.2827)	Entropy 0.84988 (0.85041)	Top-1 acc 68.359 (69.358)	Top-5 acc 87.109 (87.340)	lr 0.00411
Train [89][340/3239]	Time 2.507 (0.743)	Data Time 0.001 (0.119)	Loss 2.3535 (2.2820)	Entropy 0.84988 (0.85040)	Top-1 acc 67.188 (69.389)	Top-5 acc 86.328 (87.351)	lr 0.00411
Train [89][350/3239]	Time 0.250 (0.729)	Data Time 0.001 (0.116)	Loss 2.2783 (2.2819)	Entropy 0.84988 (0.85038)	Top-1 acc 73.438 (69.393)	Top-5 acc 88.281 (87.363)	lr 0.00411
Train [89][360/3239]	Time 0.238 (0.723)	Data Time 0.001 (0.113)	Loss 2.1532 (2.2804)	Entropy 0.84985 (0.85037)	Top-1 acc 69.922 (69.426)	Top-5 acc 88.672 (87.395)	lr 0.00411
Train [89][370/3239]	Time 0.241 (0.716)	Data Time 0.002 (0.110)	Loss 2.2864 (2.2821)	Entropy 0.84986 (0.85035)	Top-1 acc 71.484 (69.400)	Top-5 acc 85.938 (87.347)	lr 0.00411
Train [89][380/3239]	Time 0.274 (0.710)	Data Time 0.002 (0.107)	Loss 2.2832 (2.2831)	Entropy 0.84981 (0.85034)	Top-1 acc 68.750 (69.389)	Top-5 acc 87.109 (87.326)	lr 0.00411
Train [89][390/3239]	Time 0.228 (0.704)	Data Time 0.001 (0.104)	Loss 2.4740 (2.2830)	Entropy 0.84976 (0.85033)	Top-1 acc 65.234 (69.367)	Top-5 acc 83.203 (87.324)	lr 0.00411
Train [89][400/3239]	Time 0.318 (0.698)	Data Time 0.001 (0.102)	Loss 2.2272 (2.2831)	Entropy 0.84963 (0.85031)	Top-1 acc 73.438 (69.408)	Top-5 acc 87.891 (87.327)	lr 0.00411
Train [89][410/3239]	Time 0.229 (0.826)	Data Time 0.002 (0.099)	Loss 2.1968 (2.2821)	Entropy 0.84960 (0.85029)	Top-1 acc 71.094 (69.432)	Top-5 acc 89.844 (87.328)	lr 0.00411
Train [89][420/3239]	Time 0.240 (0.818)	Data Time 0.002 (0.097)	Loss 2.2693 (2.2830)	Entropy 0.84962 (0.85028)	Top-1 acc 69.531 (69.399)	Top-5 acc 87.891 (87.293)	lr 0.00410
Train [89][430/3239]	Time 0.238 (0.810)	Data Time 0.002 (0.095)	Loss 2.2642 (2.2828)	Entropy 0.84953 (0.85026)	Top-1 acc 71.094 (69.406)	Top-5 acc 86.328 (87.315)	lr 0.00410
Train [89][440/3239]	Time 0.428 (0.804)	Data Time 0.002 (0.093)	Loss 2.3445 (2.2832)	Entropy 0.84954 (0.85024)	Top-1 acc 68.750 (69.413)	Top-5 acc 89.062 (87.300)	lr 0.00410
Train [89][450/3239]	Time 2.587 (0.797)	Data Time 0.002 (0.091)	Loss 2.3922 (2.2823)	Entropy 0.84954 (0.85023)	Top-1 acc 68.359 (69.428)	Top-5 acc 87.891 (87.332)	lr 0.00410
Train [89][460/3239]	Time 0.281 (0.785)	Data Time 0.001 (0.089)	Loss 2.3774 (2.2834)	Entropy 0.84954 (0.85021)	Top-1 acc 69.141 (69.392)	Top-5 acc 84.375 (87.309)	lr 0.00410
Train [89][470/3239]	Time 0.241 (0.778)	Data Time 0.001 (0.087)	Loss 2.1481 (2.2833)	Entropy 0.84951 (0.85020)	Top-1 acc 73.047 (69.409)	Top-5 acc 91.797 (87.317)	lr 0.00410
Train [89][480/3239]	Time 0.223 (0.771)	Data Time 0.002 (0.085)	Loss 2.2885 (2.2838)	Entropy 0.84927 (0.85018)	Top-1 acc 66.797 (69.392)	Top-5 acc 89.453 (87.305)	lr 0.00410
Train [89][490/3239]	Time 0.221 (0.766)	Data Time 0.001 (0.084)	Loss 2.2257 (2.2843)	Entropy 0.84923 (0.85016)	Top-1 acc 68.750 (69.382)	Top-5 acc 89.453 (87.289)	lr 0.00410
Train [89][500/3239]	Time 0.226 (0.760)	Data Time 0.001 (0.082)	Loss 2.2740 (2.2840)	Entropy 0.84921 (0.85014)	Top-1 acc 70.703 (69.396)	Top-5 acc 87.109 (87.300)	lr 0.00410
Train [89][510/3239]	Time 0.234 (0.754)	Data Time 0.001 (0.080)	Loss 2.1642 (2.2846)	Entropy 0.84916 (0.85012)	Top-1 acc 74.609 (69.396)	Top-5 acc 87.500 (87.279)	lr 0.00410
Train [89][520/3239]	Time 0.236 (0.749)	Data Time 0.001 (0.079)	Loss 2.1368 (2.2847)	Entropy 0.84912 (0.85011)	Top-1 acc 71.875 (69.383)	Top-5 acc 89.062 (87.289)	lr 0.00410
Train [89][530/3239]	Time 0.360 (0.744)	Data Time 0.001 (0.077)	Loss 2.3019 (2.2844)	Entropy 0.84909 (0.85009)	Top-1 acc 71.484 (69.391)	Top-5 acc 87.109 (87.294)	lr 0.00410
Train [89][540/3239]	Time 0.219 (0.739)	Data Time 0.001 (0.076)	Loss 2.3163 (2.2840)	Entropy 0.84906 (0.85007)	Top-1 acc 71.094 (69.423)	Top-5 acc 85.938 (87.308)	lr 0.00410
Train [89][550/3239]	Time 0.246 (0.735)	Data Time 0.001 (0.075)	Loss 2.1899 (2.2840)	Entropy 0.84894 (0.85005)	Top-1 acc 71.484 (69.423)	Top-5 acc 89.844 (87.311)	lr 0.00409
Train [89][560/3239]	Time 2.554 (0.730)	Data Time 0.001 (0.073)	Loss 2.1009 (2.2836)	Entropy 0.84894 (0.85003)	Top-1 acc 74.219 (69.427)	Top-5 acc 92.188 (87.323)	lr 0.00409
Train [89][570/3239]	Time 0.264 (0.722)	Data Time 0.001 (0.072)	Loss 2.2318 (2.2845)	Entropy 0.84893 (0.85001)	Top-1 acc 71.094 (69.406)	Top-5 acc 89.062 (87.317)	lr 0.00409
Train [89][580/3239]	Time 0.239 (0.717)	Data Time 0.001 (0.071)	Loss 2.1793 (2.2836)	Entropy 0.84890 (0.84999)	Top-1 acc 72.266 (69.421)	Top-5 acc 89.062 (87.328)	lr 0.00409
Train [89][590/3239]	Time 0.236 (0.714)	Data Time 0.001 (0.070)	Loss 2.3033 (2.2839)	Entropy 0.84887 (0.84997)	Top-1 acc 71.484 (69.422)	Top-5 acc 87.891 (87.320)	lr 0.00409
Train [89][600/3239]	Time 0.253 (0.710)	Data Time 0.001 (0.069)	Loss 2.4459 (2.2842)	Entropy 0.84878 (0.84995)	Top-1 acc 68.359 (69.420)	Top-5 acc 84.766 (87.309)	lr 0.00409
Train [89][610/3239]	Time 0.244 (0.706)	Data Time 0.001 (0.067)	Loss 2.4455 (2.2841)	Entropy 0.84874 (0.84993)	Top-1 acc 62.109 (69.410)	Top-5 acc 83.984 (87.307)	lr 0.00409
Train [89][620/3239]	Time 0.238 (0.703)	Data Time 0.001 (0.066)	Loss 2.1710 (2.2837)	Entropy 0.84867 (0.84991)	Top-1 acc 73.828 (69.428)	Top-5 acc 85.938 (87.300)	lr 0.00409
Train [89][630/3239]	Time 0.238 (0.699)	Data Time 0.001 (0.065)	Loss 2.1080 (2.2832)	Entropy 0.84864 (0.84989)	Top-1 acc 71.094 (69.441)	Top-5 acc 90.625 (87.309)	lr 0.00409
Train [89][640/3239]	Time 0.233 (0.696)	Data Time 0.001 (0.064)	Loss 2.2485 (2.2836)	Entropy 0.84855 (0.84987)	Top-1 acc 71.484 (69.433)	Top-5 acc 87.891 (87.301)	lr 0.00409
Train [89][650/3239]	Time 0.230 (0.692)	Data Time 0.001 (0.063)	Loss 2.3531 (2.2837)	Entropy 0.84855 (0.84985)	Top-1 acc 65.234 (69.424)	Top-5 acc 85.156 (87.300)	lr 0.00409
Train [89][660/3239]	Time 0.319 (0.689)	Data Time 0.001 (0.062)	Loss 2.3999 (2.2833)	Entropy 0.84850 (0.84983)	Top-1 acc 68.359 (69.431)	Top-5 acc 85.938 (87.308)	lr 0.00409
Train [89][670/3239]	Time 2.606 (0.686)	Data Time 0.002 (0.062)	Loss 2.2224 (2.2831)	Entropy 0.84850 (0.84981)	Top-1 acc 68.750 (69.435)	Top-5 acc 89.062 (87.306)	lr 0.00409
Train [89][680/3239]	Time 0.226 (0.679)	Data Time 0.001 (0.061)	Loss 2.3232 (2.2831)	Entropy 0.84843 (0.84979)	Top-1 acc 67.969 (69.453)	Top-5 acc 85.938 (87.299)	lr 0.00408
Train [89][690/3239]	Time 0.232 (0.676)	Data Time 0.001 (0.060)	Loss 2.2876 (2.2830)	Entropy 0.84843 (0.84977)	Top-1 acc 69.141 (69.446)	Top-5 acc 85.938 (87.299)	lr 0.00408
Train [89][700/3239]	Time 0.269 (0.673)	Data Time 0.001 (0.059)	Loss 2.2605 (2.2833)	Entropy 0.84840 (0.84975)	Top-1 acc 69.141 (69.448)	Top-5 acc 87.891 (87.279)	lr 0.00408
Train [89][710/3239]	Time 0.225 (0.671)	Data Time 0.001 (0.058)	Loss 2.4012 (2.2829)	Entropy 0.84863 (0.84974)	Top-1 acc 65.234 (69.447)	Top-5 acc 86.719 (87.286)	lr 0.00408
Train [89][720/3239]	Time 0.242 (0.668)	Data Time 0.002 (0.057)	Loss 2.3768 (2.2831)	Entropy 0.84858 (0.84972)	Top-1 acc 68.359 (69.433)	Top-5 acc 86.719 (87.285)	lr 0.00408
Train [89][730/3239]	Time 0.258 (0.666)	Data Time 0.001 (0.057)	Loss 2.3001 (2.2828)	Entropy 0.84847 (0.84971)	Top-1 acc 67.188 (69.437)	Top-5 acc 87.891 (87.288)	lr 0.00408
Train [89][740/3239]	Time 0.234 (0.663)	Data Time 0.001 (0.056)	Loss 2.4233 (2.2826)	Entropy 0.84844 (0.84969)	Top-1 acc 64.062 (69.447)	Top-5 acc 85.547 (87.291)	lr 0.00408
Train [89][750/3239]	Time 0.236 (0.661)	Data Time 0.001 (0.055)	Loss 2.1095 (2.2823)	Entropy 0.84841 (0.84967)	Top-1 acc 73.438 (69.458)	Top-5 acc 92.188 (87.305)	lr 0.00408
Train [89][760/3239]	Time 0.267 (0.659)	Data Time 0.001 (0.054)	Loss 2.2746 (2.2822)	Entropy 0.84835 (0.84965)	Top-1 acc 67.969 (69.461)	Top-5 acc 87.109 (87.310)	lr 0.00408
Train [89][770/3239]	Time 0.283 (0.728)	Data Time 0.003 (0.054)	Loss 2.2006 (2.2814)	Entropy 0.84831 (0.84964)	Top-1 acc 69.531 (69.483)	Top-5 acc 88.672 (87.322)	lr 0.00408
Train [89][780/3239]	Time 2.574 (0.725)	Data Time 0.002 (0.053)	Loss 2.3288 (2.2819)	Entropy 0.84831 (0.84962)	Top-1 acc 70.312 (69.485)	Top-5 acc 86.719 (87.305)	lr 0.00408
Train [89][790/3239]	Time 0.337 (0.719)	Data Time 0.002 (0.052)	Loss 2.2362 (2.2820)	Entropy 0.84828 (0.84960)	Top-1 acc 70.312 (69.487)	Top-5 acc 88.281 (87.300)	lr 0.00408
Train [89][800/3239]	Time 0.246 (0.716)	Data Time 0.002 (0.052)	Loss 2.2135 (2.2822)	Entropy 0.84822 (0.84959)	Top-1 acc 71.484 (69.486)	Top-5 acc 87.891 (87.306)	lr 0.00408
Train [89][810/3239]	Time 0.249 (0.713)	Data Time 0.002 (0.051)	Loss 2.2956 (2.2825)	Entropy 0.84816 (0.84957)	Top-1 acc 67.188 (69.472)	Top-5 acc 88.281 (87.304)	lr 0.00408
Train [89][820/3239]	Time 0.225 (0.710)	Data Time 0.002 (0.051)	Loss 2.3263 (2.2824)	Entropy 0.84812 (0.84955)	Top-1 acc 66.016 (69.473)	Top-5 acc 88.281 (87.315)	lr 0.00407
Train [89][830/3239]	Time 0.226 (0.707)	Data Time 0.001 (0.050)	Loss 2.3001 (2.2825)	Entropy 0.84818 (0.84953)	Top-1 acc 71.875 (69.484)	Top-5 acc 85.547 (87.315)	lr 0.00407
Train [89][840/3239]	Time 0.225 (0.705)	Data Time 0.001 (0.049)	Loss 2.3758 (2.2823)	Entropy 0.84810 (0.84952)	Top-1 acc 65.625 (69.478)	Top-5 acc 86.328 (87.320)	lr 0.00407
Train [89][850/3239]	Time 0.233 (0.702)	Data Time 0.001 (0.049)	Loss 2.4348 (2.2829)	Entropy 0.84814 (0.84950)	Top-1 acc 65.625 (69.461)	Top-5 acc 83.984 (87.308)	lr 0.00407
Train [89][860/3239]	Time 0.263 (0.700)	Data Time 0.002 (0.048)	Loss 2.3188 (2.2831)	Entropy 0.84800 (0.84949)	Top-1 acc 70.312 (69.453)	Top-5 acc 87.109 (87.299)	lr 0.00407
Train [89][870/3239]	Time 0.234 (0.697)	Data Time 0.002 (0.048)	Loss 2.3133 (2.2832)	Entropy 0.84794 (0.84947)	Top-1 acc 68.359 (69.446)	Top-5 acc 87.109 (87.295)	lr 0.00407
Train [89][880/3239]	Time 0.302 (0.695)	Data Time 0.001 (0.047)	Loss 2.4241 (2.2829)	Entropy 0.84789 (0.84945)	Top-1 acc 62.891 (69.456)	Top-5 acc 85.156 (87.295)	lr 0.00407
Train [89][890/3239]	Time 2.571 (0.692)	Data Time 0.001 (0.047)	Loss 2.3160 (2.2830)	Entropy 0.84789 (0.84943)	Top-1 acc 68.359 (69.453)	Top-5 acc 87.109 (87.292)	lr 0.00407
Train [89][900/3239]	Time 0.232 (0.688)	Data Time 0.001 (0.046)	Loss 2.2424 (2.2831)	Entropy 0.84784 (0.84942)	Top-1 acc 71.094 (69.457)	Top-5 acc 88.281 (87.293)	lr 0.00407
Train [89][910/3239]	Time 0.237 (0.685)	Data Time 0.002 (0.046)	Loss 2.3092 (2.2831)	Entropy 0.84780 (0.84940)	Top-1 acc 67.578 (69.456)	Top-5 acc 85.938 (87.290)	lr 0.00407
Train [89][920/3239]	Time 0.332 (0.683)	Data Time 0.001 (0.045)	Loss 2.4236 (2.2836)	Entropy 0.84766 (0.84938)	Top-1 acc 64.453 (69.442)	Top-5 acc 85.156 (87.281)	lr 0.00407
Train [89][930/3239]	Time 0.248 (0.681)	Data Time 0.001 (0.045)	Loss 2.2631 (2.2836)	Entropy 0.84766 (0.84936)	Top-1 acc 70.312 (69.443)	Top-5 acc 89.062 (87.284)	lr 0.00407
Train [89][940/3239]	Time 0.223 (0.679)	Data Time 0.001 (0.044)	Loss 2.3699 (2.2832)	Entropy 0.84764 (0.84934)	Top-1 acc 66.016 (69.449)	Top-5 acc 86.328 (87.295)	lr 0.00407
Train [89][950/3239]	Time 0.235 (0.677)	Data Time 0.001 (0.044)	Loss 2.1493 (2.2834)	Entropy 0.84765 (0.84933)	Top-1 acc 74.219 (69.445)	Top-5 acc 87.891 (87.291)	lr 0.00406
Train [89][960/3239]	Time 0.327 (0.675)	Data Time 0.001 (0.043)	Loss 2.1816 (2.2830)	Entropy 0.84767 (0.84931)	Top-1 acc 73.047 (69.454)	Top-5 acc 90.234 (87.300)	lr 0.00406
Train [89][970/3239]	Time 0.219 (0.672)	Data Time 0.001 (0.043)	Loss 2.3960 (2.2834)	Entropy 0.84770 (0.84929)	Top-1 acc 66.797 (69.448)	Top-5 acc 86.328 (87.298)	lr 0.00406
Train [89][980/3239]	Time 0.230 (0.670)	Data Time 0.001 (0.043)	Loss 2.2463 (2.2834)	Entropy 0.84767 (0.84927)	Top-1 acc 70.703 (69.448)	Top-5 acc 88.672 (87.299)	lr 0.00406
Train [89][990/3239]	Time 0.232 (0.669)	Data Time 0.001 (0.042)	Loss 2.2068 (2.2831)	Entropy 0.84770 (0.84926)	Top-1 acc 69.922 (69.451)	Top-5 acc 90.625 (87.312)	lr 0.00406
Train [89][1000/3239]	Time 2.512 (0.667)	Data Time 0.001 (0.042)	Loss 2.2772 (2.2835)	Entropy 0.84770 (0.84924)	Top-1 acc 73.047 (69.433)	Top-5 acc 88.672 (87.311)	lr 0.00406
Train [89][1010/3239]	Time 0.226 (0.663)	Data Time 0.001 (0.041)	Loss 2.3568 (2.2843)	Entropy 0.84772 (0.84923)	Top-1 acc 66.797 (69.402)	Top-5 acc 84.766 (87.297)	lr 0.00406
Train [89][1020/3239]	Time 0.236 (0.661)	Data Time 0.001 (0.041)	Loss 2.2469 (2.2842)	Entropy 0.84785 (0.84921)	Top-1 acc 70.312 (69.400)	Top-5 acc 87.109 (87.298)	lr 0.00406
Train [89][1030/3239]	Time 0.229 (0.659)	Data Time 0.001 (0.041)	Loss 2.2338 (2.2843)	Entropy 0.84779 (0.84920)	Top-1 acc 70.312 (69.391)	Top-5 acc 87.500 (87.293)	lr 0.00406
Train [89][1040/3239]	Time 0.233 (0.657)	Data Time 0.001 (0.040)	Loss 2.3160 (2.2841)	Entropy 0.84775 (0.84919)	Top-1 acc 67.188 (69.398)	Top-5 acc 85.938 (87.295)	lr 0.00406
Train [89][1050/3239]	Time 0.336 (0.656)	Data Time 0.001 (0.040)	Loss 2.3431 (2.2844)	Entropy 0.84768 (0.84917)	Top-1 acc 69.141 (69.394)	Top-5 acc 85.547 (87.294)	lr 0.00406
Train [89][1060/3239]	Time 0.260 (0.654)	Data Time 0.001 (0.039)	Loss 2.1658 (2.2839)	Entropy 0.84759 (0.84916)	Top-1 acc 72.656 (69.403)	Top-5 acc 89.062 (87.304)	lr 0.00406
Train [89][1070/3239]	Time 0.239 (0.653)	Data Time 0.002 (0.039)	Loss 2.2388 (2.2836)	Entropy 0.84749 (0.84914)	Top-1 acc 72.266 (69.411)	Top-5 acc 87.891 (87.306)	lr 0.00406
Train [89][1080/3239]	Time 0.237 (0.651)	Data Time 0.001 (0.039)	Loss 2.3530 (2.2839)	Entropy 0.84745 (0.84913)	Top-1 acc 68.750 (69.396)	Top-5 acc 84.375 (87.304)	lr 0.00406
Train [89][1090/3239]	Time 0.333 (0.649)	Data Time 0.001 (0.038)	Loss 2.2092 (2.2837)	Entropy 0.84742 (0.84911)	Top-1 acc 69.531 (69.394)	Top-5 acc 91.016 (87.311)	lr 0.00405
Train [89][1100/3239]	Time 0.276 (0.648)	Data Time 0.001 (0.038)	Loss 2.2405 (2.2837)	Entropy 0.84740 (0.84910)	Top-1 acc 69.141 (69.390)	Top-5 acc 90.234 (87.314)	lr 0.00405
Train [89][1110/3239]	Time 2.592 (0.646)	Data Time 0.001 (0.038)	Loss 2.2135 (2.2837)	Entropy 0.84740 (0.84908)	Top-1 acc 68.750 (69.384)	Top-5 acc 91.797 (87.316)	lr 0.00405
Train [89][1120/3239]	Time 0.224 (0.643)	Data Time 0.001 (0.037)	Loss 2.3251 (2.2837)	Entropy 0.84739 (0.84907)	Top-1 acc 68.750 (69.387)	Top-5 acc 86.719 (87.316)	lr 0.00405
Train [89][1130/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.037)	Loss 2.2069 (2.2834)	Entropy 0.84739 (0.84905)	Top-1 acc 70.703 (69.390)	Top-5 acc 89.453 (87.317)	lr 0.00405
Train [89][1140/3239]	Time 0.326 (0.687)	Data Time 0.002 (0.037)	Loss 2.3380 (2.2834)	Entropy 0.84733 (0.84904)	Top-1 acc 66.797 (69.381)	Top-5 acc 84.766 (87.321)	lr 0.00405
Train [89][1150/3239]	Time 0.237 (0.685)	Data Time 0.002 (0.037)	Loss 2.3263 (2.2828)	Entropy 0.84732 (0.84902)	Top-1 acc 68.359 (69.390)	Top-5 acc 85.938 (87.334)	lr 0.00405
Train [89][1160/3239]	Time 0.228 (0.683)	Data Time 0.002 (0.036)	Loss 2.3865 (2.2830)	Entropy 0.84735 (0.84901)	Top-1 acc 66.797 (69.377)	Top-5 acc 83.984 (87.331)	lr 0.00405
Train [89][1170/3239]	Time 0.238 (0.682)	Data Time 0.001 (0.036)	Loss 2.2942 (2.2829)	Entropy 0.84735 (0.84899)	Top-1 acc 71.875 (69.378)	Top-5 acc 87.500 (87.337)	lr 0.00405
Train [89][1180/3239]	Time 0.230 (0.680)	Data Time 0.001 (0.036)	Loss 2.2737 (2.2829)	Entropy 0.84731 (0.84898)	Top-1 acc 70.312 (69.376)	Top-5 acc 89.844 (87.341)	lr 0.00405
Train [89][1190/3239]	Time 0.249 (0.678)	Data Time 0.001 (0.035)	Loss 2.3495 (2.2831)	Entropy 0.84730 (0.84897)	Top-1 acc 67.188 (69.365)	Top-5 acc 87.109 (87.335)	lr 0.00405
Train [89][1200/3239]	Time 0.274 (0.677)	Data Time 0.001 (0.035)	Loss 2.2698 (2.2830)	Entropy 0.84734 (0.84895)	Top-1 acc 70.703 (69.365)	Top-5 acc 86.328 (87.341)	lr 0.00405
Train [89][1210/3239]	Time 0.265 (0.675)	Data Time 0.001 (0.035)	Loss 2.0999 (2.2843)	Entropy 0.84735 (0.84894)	Top-1 acc 75.000 (69.347)	Top-5 acc 90.234 (87.322)	lr 0.00405
Train [89][1220/3239]	Time 2.643 (0.673)	Data Time 0.001 (0.035)	Loss 2.3160 (2.2844)	Entropy 0.84735 (0.84893)	Top-1 acc 65.625 (69.350)	Top-5 acc 88.281 (87.321)	lr 0.00404
Train [89][1230/3239]	Time 0.247 (0.670)	Data Time 0.001 (0.034)	Loss 2.0864 (2.2841)	Entropy 0.84740 (0.84891)	Top-1 acc 73.438 (69.362)	Top-5 acc 90.234 (87.323)	lr 0.00404
Train [89][1240/3239]	Time 0.215 (0.668)	Data Time 0.001 (0.034)	Loss 2.4018 (2.2841)	Entropy 0.84726 (0.84890)	Top-1 acc 66.016 (69.360)	Top-5 acc 85.547 (87.324)	lr 0.00404
Train [89][1250/3239]	Time 0.221 (0.667)	Data Time 0.001 (0.034)	Loss 2.2387 (2.2841)	Entropy 0.84714 (0.84889)	Top-1 acc 69.922 (69.362)	Top-5 acc 86.719 (87.324)	lr 0.00404
Train [89][1260/3239]	Time 0.331 (0.666)	Data Time 0.001 (0.033)	Loss 2.1281 (2.2842)	Entropy 0.84699 (0.84887)	Top-1 acc 76.953 (69.361)	Top-5 acc 91.406 (87.318)	lr 0.00404
Train [89][1270/3239]	Time 0.238 (0.664)	Data Time 0.001 (0.033)	Loss 2.3472 (2.2842)	Entropy 0.84699 (0.84886)	Top-1 acc 67.969 (69.359)	Top-5 acc 85.938 (87.318)	lr 0.00404
Train [89][1280/3239]	Time 0.224 (0.663)	Data Time 0.001 (0.033)	Loss 2.2413 (2.2841)	Entropy 0.84698 (0.84884)	Top-1 acc 68.750 (69.353)	Top-5 acc 89.844 (87.320)	lr 0.00404
Train [89][1290/3239]	Time 0.229 (0.661)	Data Time 0.002 (0.033)	Loss 2.3599 (2.2841)	Entropy 0.84695 (0.84883)	Top-1 acc 69.141 (69.357)	Top-5 acc 87.109 (87.320)	lr 0.00404
Train [89][1300/3239]	Time 0.263 (0.660)	Data Time 0.002 (0.032)	Loss 2.3005 (2.2842)	Entropy 0.84691 (0.84881)	Top-1 acc 69.531 (69.358)	Top-5 acc 87.500 (87.316)	lr 0.00404
Train [89][1310/3239]	Time 0.229 (0.658)	Data Time 0.001 (0.032)	Loss 2.1727 (2.2847)	Entropy 0.84693 (0.84880)	Top-1 acc 70.703 (69.352)	Top-5 acc 88.672 (87.306)	lr 0.00404
Train [89][1320/3239]	Time 0.228 (0.657)	Data Time 0.001 (0.032)	Loss 2.3266 (2.2848)	Entropy 0.84692 (0.84878)	Top-1 acc 67.969 (69.347)	Top-5 acc 87.500 (87.303)	lr 0.00404
Train [89][1330/3239]	Time 2.471 (0.656)	Data Time 0.001 (0.032)	Loss 2.1638 (2.2851)	Entropy 0.84692 (0.84877)	Top-1 acc 72.266 (69.341)	Top-5 acc 89.844 (87.293)	lr 0.00404
Train [89][1340/3239]	Time 0.232 (0.652)	Data Time 0.002 (0.032)	Loss 2.3500 (2.2852)	Entropy 0.84692 (0.84876)	Top-1 acc 66.797 (69.340)	Top-5 acc 86.719 (87.295)	lr 0.00404
Train [89][1350/3239]	Time 0.231 (0.651)	Data Time 0.001 (0.031)	Loss 2.0966 (2.2850)	Entropy 0.84685 (0.84874)	Top-1 acc 74.219 (69.344)	Top-5 acc 90.625 (87.302)	lr 0.00404
Train [89][1360/3239]	Time 0.236 (0.650)	Data Time 0.001 (0.031)	Loss 2.3228 (2.2849)	Entropy 0.84688 (0.84873)	Top-1 acc 69.141 (69.344)	Top-5 acc 88.672 (87.299)	lr 0.00403
Train [89][1370/3239]	Time 0.213 (0.648)	Data Time 0.001 (0.031)	Loss 2.1275 (2.2857)	Entropy 0.84705 (0.84872)	Top-1 acc 76.562 (69.327)	Top-5 acc 90.625 (87.290)	lr 0.00403
Train [89][1380/3239]	Time 0.242 (0.647)	Data Time 0.001 (0.031)	Loss 2.3035 (2.2860)	Entropy 0.84711 (0.84870)	Top-1 acc 68.750 (69.323)	Top-5 acc 89.062 (87.283)	lr 0.00403
Train [89][1390/3239]	Time 0.305 (0.646)	Data Time 0.001 (0.030)	Loss 2.1757 (2.2863)	Entropy 0.84707 (0.84869)	Top-1 acc 70.312 (69.306)	Top-5 acc 89.844 (87.278)	lr 0.00403
Train [89][1400/3239]	Time 0.222 (0.645)	Data Time 0.002 (0.030)	Loss 2.2685 (2.2863)	Entropy 0.84702 (0.84868)	Top-1 acc 72.266 (69.310)	Top-5 acc 89.062 (87.280)	lr 0.00403
Train [89][1410/3239]	Time 0.218 (0.644)	Data Time 0.001 (0.030)	Loss 2.2499 (2.2864)	Entropy 0.84696 (0.84867)	Top-1 acc 68.359 (69.303)	Top-5 acc 87.109 (87.282)	lr 0.00403
Train [89][1420/3239]	Time 0.244 (0.642)	Data Time 0.001 (0.030)	Loss 2.4167 (2.2860)	Entropy 0.84690 (0.84866)	Top-1 acc 69.141 (69.315)	Top-5 acc 85.156 (87.289)	lr 0.00403
Train [89][1430/3239]	Time 0.416 (0.641)	Data Time 0.001 (0.030)	Loss 2.1247 (2.2864)	Entropy 0.84691 (0.84864)	Top-1 acc 75.781 (69.310)	Top-5 acc 89.062 (87.281)	lr 0.00403
Train [89][1440/3239]	Time 2.544 (0.640)	Data Time 0.001 (0.029)	Loss 2.1807 (2.2861)	Entropy 0.84691 (0.84863)	Top-1 acc 72.266 (69.317)	Top-5 acc 88.672 (87.285)	lr 0.00403
Train [89][1450/3239]	Time 0.232 (0.638)	Data Time 0.001 (0.029)	Loss 2.1395 (2.2861)	Entropy 0.84692 (0.84862)	Top-1 acc 71.484 (69.317)	Top-5 acc 91.797 (87.285)	lr 0.00403
Train [89][1460/3239]	Time 0.236 (0.636)	Data Time 0.002 (0.029)	Loss 2.3625 (2.2861)	Entropy 0.84682 (0.84861)	Top-1 acc 66.406 (69.313)	Top-5 acc 86.328 (87.285)	lr 0.00403
Train [89][1470/3239]	Time 0.225 (0.635)	Data Time 0.001 (0.029)	Loss 2.2012 (2.2861)	Entropy 0.84675 (0.84860)	Top-1 acc 67.578 (69.318)	Top-5 acc 91.406 (87.281)	lr 0.00403
Train [89][1480/3239]	Time 0.210 (0.634)	Data Time 0.001 (0.029)	Loss 2.1637 (2.2860)	Entropy 0.84668 (0.84858)	Top-1 acc 72.656 (69.322)	Top-5 acc 88.672 (87.283)	lr 0.00403
Train [89][1490/3239]	Time 0.221 (0.633)	Data Time 0.001 (0.029)	Loss 2.3519 (2.2861)	Entropy 0.84665 (0.84857)	Top-1 acc 66.406 (69.320)	Top-5 acc 85.547 (87.283)	lr 0.00402
Train [89][1500/3239]	Time 0.315 (0.667)	Data Time 0.003 (0.028)	Loss 2.2460 (2.2863)	Entropy 0.84663 (0.84856)	Top-1 acc 66.406 (69.317)	Top-5 acc 87.109 (87.272)	lr 0.00402
Train [89][1510/3239]	Time 0.253 (0.666)	Data Time 0.002 (0.028)	Loss 2.2291 (2.2862)	Entropy 0.84648 (0.84854)	Top-1 acc 69.922 (69.315)	Top-5 acc 88.672 (87.272)	lr 0.00402
Train [89][1520/3239]	Time 0.226 (0.665)	Data Time 0.002 (0.028)	Loss 2.5806 (2.2861)	Entropy 0.84641 (0.84853)	Top-1 acc 63.672 (69.318)	Top-5 acc 80.859 (87.270)	lr 0.00402
Train [89][1530/3239]	Time 0.258 (0.664)	Data Time 0.001 (0.028)	Loss 2.3756 (2.2864)	Entropy 0.84640 (0.84852)	Top-1 acc 66.797 (69.314)	Top-5 acc 87.109 (87.265)	lr 0.00402
Train [89][1540/3239]	Time 0.243 (0.662)	Data Time 0.001 (0.028)	Loss 2.1362 (2.2860)	Entropy 0.84635 (0.84850)	Top-1 acc 71.484 (69.315)	Top-5 acc 89.062 (87.270)	lr 0.00402
Train [89][1550/3239]	Time 2.587 (0.661)	Data Time 0.002 (0.028)	Loss 2.1878 (2.2861)	Entropy 0.84635 (0.84849)	Top-1 acc 69.922 (69.309)	Top-5 acc 90.234 (87.275)	lr 0.00402
Train [89][1560/3239]	Time 0.335 (0.658)	Data Time 0.001 (0.027)	Loss 2.1942 (2.2862)	Entropy 0.84632 (0.84847)	Top-1 acc 73.828 (69.309)	Top-5 acc 87.109 (87.269)	lr 0.00402
Train [89][1570/3239]	Time 0.240 (0.657)	Data Time 0.001 (0.027)	Loss 2.4351 (2.2864)	Entropy 0.84622 (0.84846)	Top-1 acc 66.016 (69.301)	Top-5 acc 83.594 (87.264)	lr 0.00402
Train [89][1580/3239]	Time 0.241 (0.656)	Data Time 0.001 (0.027)	Loss 2.3362 (2.2864)	Entropy 0.84620 (0.84845)	Top-1 acc 67.188 (69.295)	Top-5 acc 88.281 (87.262)	lr 0.00402
Train [89][1590/3239]	Time 0.221 (0.655)	Data Time 0.001 (0.027)	Loss 2.3055 (2.2863)	Entropy 0.84618 (0.84843)	Top-1 acc 72.656 (69.294)	Top-5 acc 87.109 (87.265)	lr 0.00402
Train [89][1600/3239]	Time 0.373 (0.654)	Data Time 0.001 (0.027)	Loss 2.2348 (2.2862)	Entropy 0.84612 (0.84842)	Top-1 acc 68.750 (69.295)	Top-5 acc 89.453 (87.265)	lr 0.00402
Train [89][1610/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.027)	Loss 2.2521 (2.2859)	Entropy 0.84610 (0.84840)	Top-1 acc 67.578 (69.296)	Top-5 acc 87.500 (87.268)	lr 0.00402
Train [89][1620/3239]	Time 0.253 (0.652)	Data Time 0.001 (0.026)	Loss 2.3743 (2.2859)	Entropy 0.84666 (0.84839)	Top-1 acc 65.234 (69.298)	Top-5 acc 82.812 (87.264)	lr 0.00401
Train [89][1630/3239]	Time 0.240 (0.651)	Data Time 0.001 (0.026)	Loss 2.3438 (2.2858)	Entropy 0.84667 (0.84838)	Top-1 acc 68.750 (69.298)	Top-5 acc 85.156 (87.267)	lr 0.00401
Train [89][1640/3239]	Time 0.279 (0.650)	Data Time 0.001 (0.026)	Loss 2.3503 (2.2857)	Entropy 0.84656 (0.84837)	Top-1 acc 67.188 (69.298)	Top-5 acc 83.203 (87.269)	lr 0.00401
Train [89][1650/3239]	Time 0.248 (0.649)	Data Time 0.001 (0.026)	Loss 2.2053 (2.2855)	Entropy 0.84658 (0.84836)	Top-1 acc 71.484 (69.298)	Top-5 acc 88.281 (87.272)	lr 0.00401
Train [89][1660/3239]	Time 2.543 (0.648)	Data Time 0.002 (0.026)	Loss 2.3491 (2.2853)	Entropy 0.84658 (0.84835)	Top-1 acc 67.188 (69.305)	Top-5 acc 87.500 (87.277)	lr 0.00401
Train [89][1670/3239]	Time 0.258 (0.646)	Data Time 0.001 (0.026)	Loss 2.2617 (2.2851)	Entropy 0.84652 (0.84834)	Top-1 acc 69.531 (69.312)	Top-5 acc 86.328 (87.282)	lr 0.00401
Train [89][1680/3239]	Time 0.235 (0.645)	Data Time 0.001 (0.026)	Loss 2.2855 (2.2849)	Entropy 0.84647 (0.84833)	Top-1 acc 70.312 (69.319)	Top-5 acc 87.109 (87.282)	lr 0.00401
Train [89][1690/3239]	Time 0.386 (0.644)	Data Time 0.003 (0.025)	Loss 2.3392 (2.2851)	Entropy 0.84646 (0.84831)	Top-1 acc 69.922 (69.312)	Top-5 acc 86.328 (87.277)	lr 0.00401
Train [89][1700/3239]	Time 0.230 (0.643)	Data Time 0.001 (0.025)	Loss 2.3569 (2.2850)	Entropy 0.84643 (0.84830)	Top-1 acc 69.531 (69.315)	Top-5 acc 85.156 (87.279)	lr 0.00401
Train [89][1710/3239]	Time 0.273 (0.642)	Data Time 0.001 (0.025)	Loss 2.1634 (2.2850)	Entropy 0.84643 (0.84829)	Top-1 acc 72.266 (69.314)	Top-5 acc 87.109 (87.276)	lr 0.00401
Train [89][1720/3239]	Time 0.228 (0.641)	Data Time 0.001 (0.025)	Loss 2.1974 (2.2848)	Entropy 0.84634 (0.84828)	Top-1 acc 72.266 (69.320)	Top-5 acc 88.281 (87.277)	lr 0.00401
Train [89][1730/3239]	Time 0.271 (0.640)	Data Time 0.002 (0.025)	Loss 2.4401 (2.2848)	Entropy 0.84630 (0.84827)	Top-1 acc 62.500 (69.316)	Top-5 acc 82.812 (87.279)	lr 0.00401
Train [89][1740/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.025)	Loss 2.2445 (2.2846)	Entropy 0.84629 (0.84826)	Top-1 acc 71.094 (69.320)	Top-5 acc 88.672 (87.280)	lr 0.00401
Train [89][1750/3239]	Time 0.231 (0.639)	Data Time 0.001 (0.025)	Loss 2.1252 (2.2843)	Entropy 0.84633 (0.84825)	Top-1 acc 75.391 (69.330)	Top-5 acc 90.234 (87.284)	lr 0.00401
Train [89][1760/3239]	Time 0.236 (0.638)	Data Time 0.001 (0.024)	Loss 2.2933 (2.2845)	Entropy 0.84630 (0.84824)	Top-1 acc 70.312 (69.327)	Top-5 acc 85.938 (87.282)	lr 0.00400
Train [89][1770/3239]	Time 2.537 (0.637)	Data Time 0.001 (0.024)	Loss 2.2964 (2.2849)	Entropy 0.84630 (0.84823)	Top-1 acc 67.188 (69.312)	Top-5 acc 87.891 (87.279)	lr 0.00400
Train [89][1780/3239]	Time 0.225 (0.635)	Data Time 0.001 (0.024)	Loss 2.4628 (2.2849)	Entropy 0.84613 (0.84821)	Top-1 acc 66.016 (69.310)	Top-5 acc 85.547 (87.279)	lr 0.00400
Train [89][1790/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.024)	Loss 2.1938 (2.2849)	Entropy 0.84610 (0.84820)	Top-1 acc 70.703 (69.304)	Top-5 acc 88.672 (87.279)	lr 0.00400
Train [89][1800/3239]	Time 0.224 (0.633)	Data Time 0.005 (0.024)	Loss 2.3042 (2.2850)	Entropy 0.84608 (0.84819)	Top-1 acc 67.969 (69.298)	Top-5 acc 85.547 (87.274)	lr 0.00400
Train [89][1810/3239]	Time 0.246 (0.632)	Data Time 0.001 (0.024)	Loss 2.3014 (2.2850)	Entropy 0.84606 (0.84818)	Top-1 acc 67.188 (69.300)	Top-5 acc 87.500 (87.274)	lr 0.00400
Train [89][1820/3239]	Time 0.225 (0.632)	Data Time 0.001 (0.024)	Loss 2.3199 (2.2851)	Entropy 0.84604 (0.84817)	Top-1 acc 67.188 (69.299)	Top-5 acc 87.891 (87.273)	lr 0.00400
Train [89][1830/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.024)	Loss 2.3760 (2.2854)	Entropy 0.84593 (0.84816)	Top-1 acc 66.016 (69.293)	Top-5 acc 83.984 (87.268)	lr 0.00400
Train [89][1840/3239]	Time 0.239 (0.630)	Data Time 0.001 (0.023)	Loss 2.2808 (2.2855)	Entropy 0.84578 (0.84814)	Top-1 acc 67.969 (69.288)	Top-5 acc 86.719 (87.266)	lr 0.00400
Train [89][1850/3239]	Time 0.248 (0.629)	Data Time 0.002 (0.023)	Loss 2.0472 (2.2855)	Entropy 0.84580 (0.84813)	Top-1 acc 74.219 (69.286)	Top-5 acc 92.188 (87.266)	lr 0.00400
Train [89][1860/3239]	Time 0.256 (0.659)	Data Time 0.002 (0.023)	Loss 2.1002 (2.2856)	Entropy 0.84580 (0.84812)	Top-1 acc 73.047 (69.279)	Top-5 acc 92.578 (87.269)	lr 0.00400
Train [89][1870/3239]	Time 0.241 (0.658)	Data Time 0.002 (0.023)	Loss 2.2136 (2.2854)	Entropy 0.84586 (0.84811)	Top-1 acc 68.750 (69.280)	Top-5 acc 88.281 (87.271)	lr 0.00400
Train [89][1880/3239]	Time 2.505 (0.657)	Data Time 0.002 (0.023)	Loss 2.2571 (2.2854)	Entropy 0.84586 (0.84809)	Top-1 acc 72.266 (69.289)	Top-5 acc 87.500 (87.274)	lr 0.00400
Train [89][1890/3239]	Time 0.235 (0.655)	Data Time 0.001 (0.023)	Loss 2.3762 (2.2855)	Entropy 0.84580 (0.84808)	Top-1 acc 64.844 (69.290)	Top-5 acc 85.547 (87.271)	lr 0.00399
Train [89][1900/3239]	Time 0.385 (0.654)	Data Time 0.002 (0.023)	Loss 2.4406 (2.2855)	Entropy 0.84578 (0.84807)	Top-1 acc 63.672 (69.287)	Top-5 acc 85.156 (87.271)	lr 0.00399
Train [89][1910/3239]	Time 0.234 (0.653)	Data Time 0.001 (0.023)	Loss 2.3320 (2.2855)	Entropy 0.84574 (0.84806)	Top-1 acc 66.016 (69.285)	Top-5 acc 86.719 (87.268)	lr 0.00399
Train [89][1920/3239]	Time 0.251 (0.652)	Data Time 0.001 (0.023)	Loss 2.2799 (2.2857)	Entropy 0.84572 (0.84804)	Top-1 acc 71.094 (69.282)	Top-5 acc 90.625 (87.266)	lr 0.00399
Train [89][1930/3239]	Time 0.246 (0.652)	Data Time 0.001 (0.022)	Loss 2.2940 (2.2858)	Entropy 0.84566 (0.84803)	Top-1 acc 70.312 (69.277)	Top-5 acc 87.109 (87.265)	lr 0.00399
Train [89][1940/3239]	Time 0.248 (0.651)	Data Time 0.001 (0.022)	Loss 2.3459 (2.2859)	Entropy 0.84565 (0.84802)	Top-1 acc 70.312 (69.274)	Top-5 acc 87.109 (87.263)	lr 0.00399
Train [89][1950/3239]	Time 0.272 (0.650)	Data Time 0.001 (0.022)	Loss 2.2235 (2.2858)	Entropy 0.84566 (0.84801)	Top-1 acc 73.047 (69.275)	Top-5 acc 86.328 (87.263)	lr 0.00399
Train [89][1960/3239]	Time 0.254 (0.649)	Data Time 0.001 (0.022)	Loss 2.1322 (2.2858)	Entropy 0.84688 (0.84800)	Top-1 acc 73.828 (69.276)	Top-5 acc 89.844 (87.263)	lr 0.00399
Train [89][1970/3239]	Time 0.212 (0.649)	Data Time 0.001 (0.022)	Loss 2.2885 (2.2858)	Entropy 0.84686 (0.84799)	Top-1 acc 71.094 (69.279)	Top-5 acc 88.281 (87.265)	lr 0.00399
Train [89][1980/3239]	Time 0.280 (0.648)	Data Time 0.001 (0.022)	Loss 2.2402 (2.2858)	Entropy 0.84689 (0.84799)	Top-1 acc 73.047 (69.277)	Top-5 acc 87.891 (87.263)	lr 0.00399
Train [89][1990/3239]	Time 2.713 (0.647)	Data Time 0.001 (0.022)	Loss 2.3547 (2.2858)	Entropy 0.84689 (0.84798)	Top-1 acc 68.750 (69.281)	Top-5 acc 87.109 (87.264)	lr 0.00399
Train [89][2000/3239]	Time 0.236 (0.645)	Data Time 0.001 (0.022)	Loss 2.2731 (2.2858)	Entropy 0.84686 (0.84798)	Top-1 acc 68.750 (69.285)	Top-5 acc 87.500 (87.263)	lr 0.00399
Train [89][2010/3239]	Time 0.246 (0.644)	Data Time 0.001 (0.022)	Loss 2.1304 (2.2857)	Entropy 0.84679 (0.84797)	Top-1 acc 71.484 (69.291)	Top-5 acc 91.016 (87.266)	lr 0.00399
Train [89][2020/3239]	Time 0.225 (0.643)	Data Time 0.001 (0.022)	Loss 2.4590 (2.2857)	Entropy 0.84680 (0.84796)	Top-1 acc 64.062 (69.291)	Top-5 acc 82.031 (87.264)	lr 0.00399
Train [89][2030/3239]	Time 0.243 (0.643)	Data Time 0.001 (0.021)	Loss 2.3043 (2.2856)	Entropy 0.84679 (0.84796)	Top-1 acc 71.094 (69.296)	Top-5 acc 87.891 (87.266)	lr 0.00398
Train [89][2040/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.021)	Loss 2.3385 (2.2857)	Entropy 0.84674 (0.84795)	Top-1 acc 67.969 (69.290)	Top-5 acc 85.547 (87.264)	lr 0.00398
Train [89][2050/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.021)	Loss 2.0747 (2.2856)	Entropy 0.84675 (0.84795)	Top-1 acc 71.094 (69.290)	Top-5 acc 92.188 (87.265)	lr 0.00398
Train [89][2060/3239]	Time 0.235 (0.640)	Data Time 0.002 (0.021)	Loss 2.2807 (2.2858)	Entropy 0.84662 (0.84794)	Top-1 acc 70.703 (69.290)	Top-5 acc 88.672 (87.260)	lr 0.00398
Train [89][2070/3239]	Time 0.257 (0.640)	Data Time 0.002 (0.021)	Loss 2.2742 (2.2857)	Entropy 0.84798 (0.84794)	Top-1 acc 67.188 (69.290)	Top-5 acc 87.891 (87.260)	lr 0.00398
Train [89][2080/3239]	Time 0.225 (0.639)	Data Time 0.001 (0.021)	Loss 2.1837 (2.2857)	Entropy 0.84792 (0.84794)	Top-1 acc 74.219 (69.292)	Top-5 acc 88.281 (87.260)	lr 0.00398
Train [89][2090/3239]	Time 0.270 (0.638)	Data Time 0.001 (0.021)	Loss 2.2965 (2.2859)	Entropy 0.84791 (0.84794)	Top-1 acc 66.406 (69.290)	Top-5 acc 87.891 (87.255)	lr 0.00398
Train [89][2100/3239]	Time 2.501 (0.637)	Data Time 0.001 (0.021)	Loss 2.2556 (2.2858)	Entropy 0.84791 (0.84794)	Top-1 acc 69.922 (69.295)	Top-5 acc 89.453 (87.256)	lr 0.00398
Train [89][2110/3239]	Time 0.243 (0.635)	Data Time 0.002 (0.021)	Loss 2.3566 (2.2859)	Entropy 0.84789 (0.84794)	Top-1 acc 67.969 (69.294)	Top-5 acc 83.984 (87.255)	lr 0.00398
Train [89][2120/3239]	Time 0.338 (0.635)	Data Time 0.001 (0.021)	Loss 2.2131 (2.2862)	Entropy 0.84792 (0.84794)	Top-1 acc 71.094 (69.288)	Top-5 acc 88.672 (87.249)	lr 0.00398
Train [89][2130/3239]	Time 0.216 (0.634)	Data Time 0.001 (0.021)	Loss 2.2508 (2.2863)	Entropy 0.84769 (0.84793)	Top-1 acc 71.094 (69.284)	Top-5 acc 88.672 (87.246)	lr 0.00398
Train [89][2140/3239]	Time 0.236 (0.633)	Data Time 0.001 (0.020)	Loss 2.3603 (2.2864)	Entropy 0.84765 (0.84793)	Top-1 acc 68.750 (69.283)	Top-5 acc 87.891 (87.246)	lr 0.00398
Train [89][2150/3239]	Time 0.235 (0.633)	Data Time 0.001 (0.020)	Loss 2.3853 (2.2865)	Entropy 0.84764 (0.84793)	Top-1 acc 65.234 (69.278)	Top-5 acc 85.547 (87.243)	lr 0.00398
Train [89][2160/3239]	Time 0.316 (0.632)	Data Time 0.002 (0.020)	Loss 2.2205 (2.2867)	Entropy 0.84758 (0.84793)	Top-1 acc 70.703 (69.277)	Top-5 acc 91.406 (87.241)	lr 0.00398
Train [89][2170/3239]	Time 0.239 (0.631)	Data Time 0.001 (0.020)	Loss 2.1530 (2.2867)	Entropy 0.84756 (0.84793)	Top-1 acc 69.922 (69.275)	Top-5 acc 89.453 (87.240)	lr 0.00397
Train [89][2180/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.020)	Loss 2.3212 (2.2866)	Entropy 0.84753 (0.84793)	Top-1 acc 68.359 (69.275)	Top-5 acc 87.109 (87.243)	lr 0.00397
Train [89][2190/3239]	Time 0.231 (0.630)	Data Time 0.001 (0.020)	Loss 2.3652 (2.2866)	Entropy 0.84749 (0.84793)	Top-1 acc 65.625 (69.274)	Top-5 acc 87.109 (87.245)	lr 0.00397
Train [89][2200/3239]	Time 0.270 (0.629)	Data Time 0.001 (0.020)	Loss 2.2855 (2.2867)	Entropy 0.84743 (0.84792)	Top-1 acc 66.406 (69.268)	Top-5 acc 86.719 (87.246)	lr 0.00397
Train [89][2210/3239]	Time 2.672 (0.629)	Data Time 0.001 (0.020)	Loss 2.2872 (2.2866)	Entropy 0.84743 (0.84792)	Top-1 acc 65.234 (69.263)	Top-5 acc 86.719 (87.247)	lr 0.00397
Train [89][2220/3239]	Time 0.265 (0.627)	Data Time 0.002 (0.020)	Loss 2.1469 (2.2867)	Entropy 0.84741 (0.84792)	Top-1 acc 72.656 (69.260)	Top-5 acc 90.234 (87.246)	lr 0.00397
Train [89][2230/3239]	Time 0.240 (0.650)	Data Time 0.002 (0.020)	Loss 2.5360 (2.2869)	Entropy 0.84744 (0.84792)	Top-1 acc 62.891 (69.254)	Top-5 acc 79.688 (87.240)	lr 0.00397
Train [89][2240/3239]	Time 0.271 (0.649)	Data Time 0.002 (0.020)	Loss 2.2995 (2.2868)	Entropy 0.84745 (0.84791)	Top-1 acc 69.141 (69.258)	Top-5 acc 87.109 (87.244)	lr 0.00397
Train [89][2250/3239]	Time 0.335 (0.648)	Data Time 0.001 (0.020)	Loss 2.1842 (2.2870)	Entropy 0.84743 (0.84791)	Top-1 acc 67.969 (69.249)	Top-5 acc 86.719 (87.239)	lr 0.00397
Train [89][2260/3239]	Time 0.232 (0.648)	Data Time 0.001 (0.019)	Loss 2.2955 (2.2870)	Entropy 0.84739 (0.84791)	Top-1 acc 66.406 (69.251)	Top-5 acc 87.891 (87.241)	lr 0.00397
Train [89][2270/3239]	Time 0.235 (0.647)	Data Time 0.001 (0.019)	Loss 2.3188 (2.2869)	Entropy 0.84739 (0.84791)	Top-1 acc 67.969 (69.254)	Top-5 acc 87.891 (87.240)	lr 0.00397
Train [89][2280/3239]	Time 0.244 (0.646)	Data Time 0.002 (0.019)	Loss 2.3680 (2.2871)	Entropy 0.84735 (0.84791)	Top-1 acc 66.797 (69.253)	Top-5 acc 85.938 (87.235)	lr 0.00397
Train [89][2290/3239]	Time 0.321 (0.645)	Data Time 0.001 (0.019)	Loss 2.2563 (2.2871)	Entropy 0.84733 (0.84790)	Top-1 acc 71.094 (69.252)	Top-5 acc 86.328 (87.232)	lr 0.00397
Train [89][2300/3239]	Time 0.276 (0.645)	Data Time 0.001 (0.019)	Loss 2.3465 (2.2870)	Entropy 0.84736 (0.84790)	Top-1 acc 67.578 (69.254)	Top-5 acc 86.328 (87.235)	lr 0.00396
Train [89][2310/3239]	Time 0.274 (0.644)	Data Time 0.001 (0.019)	Loss 2.3685 (2.2871)	Entropy 0.84732 (0.84790)	Top-1 acc 66.797 (69.254)	Top-5 acc 84.375 (87.232)	lr 0.00396
Train [89][2320/3239]	Time 2.485 (0.643)	Data Time 0.001 (0.019)	Loss 2.3530 (2.2874)	Entropy 0.84732 (0.84790)	Top-1 acc 67.188 (69.245)	Top-5 acc 87.109 (87.225)	lr 0.00396
Train [89][2330/3239]	Time 0.237 (0.642)	Data Time 0.001 (0.019)	Loss 2.3461 (2.2874)	Entropy 0.84721 (0.84789)	Top-1 acc 67.969 (69.252)	Top-5 acc 86.328 (87.220)	lr 0.00396
Train [89][2340/3239]	Time 0.228 (0.641)	Data Time 0.001 (0.019)	Loss 2.3400 (2.2876)	Entropy 0.84715 (0.84789)	Top-1 acc 69.141 (69.251)	Top-5 acc 87.109 (87.218)	lr 0.00396
Train [89][2350/3239]	Time 0.243 (0.640)	Data Time 0.001 (0.019)	Loss 2.2036 (2.2874)	Entropy 0.84712 (0.84789)	Top-1 acc 70.703 (69.259)	Top-5 acc 89.844 (87.220)	lr 0.00396
Train [89][2360/3239]	Time 0.230 (0.640)	Data Time 0.001 (0.019)	Loss 2.3358 (2.2877)	Entropy 0.84713 (0.84788)	Top-1 acc 67.969 (69.253)	Top-5 acc 85.547 (87.215)	lr 0.00396
Train [89][2370/3239]	Time 0.217 (0.639)	Data Time 0.001 (0.019)	Loss 2.2325 (2.2876)	Entropy 0.84709 (0.84788)	Top-1 acc 69.531 (69.252)	Top-5 acc 87.109 (87.218)	lr 0.00396
Train [89][2380/3239]	Time 0.359 (0.638)	Data Time 0.002 (0.019)	Loss 2.2212 (2.2876)	Entropy 0.84703 (0.84788)	Top-1 acc 71.094 (69.256)	Top-5 acc 88.672 (87.216)	lr 0.00396
Train [89][2390/3239]	Time 0.237 (0.638)	Data Time 0.001 (0.019)	Loss 2.3961 (2.2879)	Entropy 0.84702 (0.84787)	Top-1 acc 66.797 (69.248)	Top-5 acc 84.766 (87.209)	lr 0.00396
Train [89][2400/3239]	Time 0.229 (0.637)	Data Time 0.001 (0.018)	Loss 2.2569 (2.2880)	Entropy 0.84700 (0.84787)	Top-1 acc 68.750 (69.244)	Top-5 acc 89.453 (87.207)	lr 0.00396
Train [89][2410/3239]	Time 0.274 (0.636)	Data Time 0.002 (0.018)	Loss 2.5089 (2.2881)	Entropy 0.84693 (0.84787)	Top-1 acc 65.234 (69.244)	Top-5 acc 80.859 (87.203)	lr 0.00396
Train [89][2420/3239]	Time 0.350 (0.636)	Data Time 0.001 (0.018)	Loss 2.4906 (2.2881)	Entropy 0.84684 (0.84786)	Top-1 acc 65.625 (69.244)	Top-5 acc 84.766 (87.202)	lr 0.00396
Train [89][2430/3239]	Time 2.608 (0.635)	Data Time 0.001 (0.018)	Loss 2.1284 (2.2882)	Entropy 0.84684 (0.84786)	Top-1 acc 69.922 (69.239)	Top-5 acc 91.406 (87.203)	lr 0.00396
Train [89][2440/3239]	Time 0.238 (0.634)	Data Time 0.001 (0.018)	Loss 2.3309 (2.2883)	Entropy 0.84683 (0.84785)	Top-1 acc 65.625 (69.240)	Top-5 acc 87.109 (87.199)	lr 0.00395
Train [89][2450/3239]	Time 0.224 (0.633)	Data Time 0.001 (0.018)	Loss 2.3056 (2.2884)	Entropy 0.84682 (0.84785)	Top-1 acc 69.531 (69.240)	Top-5 acc 87.109 (87.196)	lr 0.00395
Train [89][2460/3239]	Time 0.243 (0.632)	Data Time 0.001 (0.018)	Loss 2.2526 (2.2882)	Entropy 0.84683 (0.84785)	Top-1 acc 68.750 (69.242)	Top-5 acc 86.719 (87.200)	lr 0.00395
Train [89][2470/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.018)	Loss 2.4057 (2.2882)	Entropy 0.84677 (0.84784)	Top-1 acc 67.969 (69.245)	Top-5 acc 85.938 (87.202)	lr 0.00395
Train [89][2480/3239]	Time 0.246 (0.631)	Data Time 0.002 (0.018)	Loss 2.3209 (2.2881)	Entropy 0.84679 (0.84784)	Top-1 acc 68.359 (69.245)	Top-5 acc 86.328 (87.202)	lr 0.00395
Train [89][2490/3239]	Time 0.228 (0.631)	Data Time 0.001 (0.018)	Loss 2.1988 (2.2881)	Entropy 0.84683 (0.84783)	Top-1 acc 69.922 (69.242)	Top-5 acc 87.109 (87.203)	lr 0.00395
Train [89][2500/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.018)	Loss 2.2497 (2.2881)	Entropy 0.84683 (0.84783)	Top-1 acc 69.922 (69.243)	Top-5 acc 86.719 (87.200)	lr 0.00395
Train [89][2510/3239]	Time 0.267 (0.629)	Data Time 0.001 (0.018)	Loss 2.4065 (2.2881)	Entropy 0.84685 (0.84782)	Top-1 acc 66.797 (69.242)	Top-5 acc 83.984 (87.199)	lr 0.00395
Train [89][2520/3239]	Time 0.257 (0.629)	Data Time 0.001 (0.018)	Loss 2.1811 (2.2881)	Entropy 0.84680 (0.84782)	Top-1 acc 71.484 (69.241)	Top-5 acc 91.016 (87.199)	lr 0.00395
Train [89][2530/3239]	Time 0.258 (0.628)	Data Time 0.001 (0.018)	Loss 2.2240 (2.2880)	Entropy 0.84680 (0.84782)	Top-1 acc 72.266 (69.247)	Top-5 acc 86.719 (87.200)	lr 0.00395
Train [89][2540/3239]	Time 2.536 (0.628)	Data Time 0.001 (0.018)	Loss 2.3776 (2.2880)	Entropy 0.84680 (0.84781)	Top-1 acc 64.844 (69.246)	Top-5 acc 87.500 (87.198)	lr 0.00395
Train [89][2550/3239]	Time 0.340 (0.626)	Data Time 0.001 (0.017)	Loss 2.3043 (2.2881)	Entropy 0.84676 (0.84781)	Top-1 acc 66.016 (69.244)	Top-5 acc 85.547 (87.195)	lr 0.00395
Train [89][2560/3239]	Time 0.275 (0.626)	Data Time 0.001 (0.017)	Loss 2.3881 (2.2882)	Entropy 0.84679 (0.84780)	Top-1 acc 67.578 (69.242)	Top-5 acc 82.812 (87.195)	lr 0.00395
Train [89][2570/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.017)	Loss 2.3896 (2.2884)	Entropy 0.84676 (0.84780)	Top-1 acc 69.531 (69.236)	Top-5 acc 85.547 (87.193)	lr 0.00394
Train [89][2580/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.017)	Loss 2.3645 (2.2885)	Entropy 0.84681 (0.84780)	Top-1 acc 71.094 (69.237)	Top-5 acc 85.156 (87.192)	lr 0.00394
Train [89][2590/3239]	Time 0.422 (0.643)	Data Time 0.002 (0.017)	Loss 2.3029 (2.2885)	Entropy 0.84682 (0.84779)	Top-1 acc 71.484 (69.237)	Top-5 acc 86.719 (87.190)	lr 0.00394
Train [89][2600/3239]	Time 0.281 (0.643)	Data Time 0.002 (0.017)	Loss 2.2897 (2.2886)	Entropy 0.84678 (0.84779)	Top-1 acc 71.484 (69.236)	Top-5 acc 89.062 (87.187)	lr 0.00394
Train [89][2610/3239]	Time 0.235 (0.642)	Data Time 0.002 (0.017)	Loss 2.1743 (2.2888)	Entropy 0.84677 (0.84779)	Top-1 acc 70.312 (69.231)	Top-5 acc 88.672 (87.186)	lr 0.00394
Train [89][2620/3239]	Time 0.268 (0.642)	Data Time 0.001 (0.017)	Loss 2.3381 (2.2889)	Entropy 0.84676 (0.84778)	Top-1 acc 65.625 (69.227)	Top-5 acc 88.281 (87.183)	lr 0.00394
Train [89][2630/3239]	Time 0.309 (0.641)	Data Time 0.002 (0.017)	Loss 2.1486 (2.2889)	Entropy 0.84669 (0.84778)	Top-1 acc 70.312 (69.222)	Top-5 acc 91.016 (87.185)	lr 0.00394
Train [89][2640/3239]	Time 0.278 (0.640)	Data Time 0.001 (0.017)	Loss 2.2850 (2.2892)	Entropy 0.84665 (0.84777)	Top-1 acc 69.922 (69.213)	Top-5 acc 85.938 (87.179)	lr 0.00394
Train [89][2650/3239]	Time 0.238 (0.640)	Data Time 0.001 (0.017)	Loss 2.2250 (2.2892)	Entropy 0.84658 (0.84777)	Top-1 acc 70.312 (69.211)	Top-5 acc 87.500 (87.178)	lr 0.00394
Train [89][2660/3239]	Time 0.239 (0.639)	Data Time 0.001 (0.017)	Loss 2.2929 (2.2892)	Entropy 0.84648 (0.84776)	Top-1 acc 67.578 (69.210)	Top-5 acc 87.891 (87.178)	lr 0.00394
Train [89][2670/3239]	Time 0.222 (0.639)	Data Time 0.001 (0.017)	Loss 2.2926 (2.2892)	Entropy 0.84648 (0.84776)	Top-1 acc 70.312 (69.214)	Top-5 acc 85.938 (87.179)	lr 0.00394
Train [89][2680/3239]	Time 0.234 (0.638)	Data Time 0.001 (0.017)	Loss 2.4327 (2.2892)	Entropy 0.84647 (0.84775)	Top-1 acc 67.578 (69.213)	Top-5 acc 82.812 (87.181)	lr 0.00394
Train [89][2690/3239]	Time 0.266 (0.638)	Data Time 0.001 (0.017)	Loss 2.3334 (2.2892)	Entropy 0.84643 (0.84775)	Top-1 acc 67.188 (69.213)	Top-5 acc 84.375 (87.180)	lr 0.00394
Train [89][2700/3239]	Time 0.266 (0.637)	Data Time 0.001 (0.017)	Loss 2.2017 (2.2891)	Entropy 0.84647 (0.84775)	Top-1 acc 68.750 (69.216)	Top-5 acc 88.672 (87.184)	lr 0.00394
Train [89][2710/3239]	Time 0.247 (0.636)	Data Time 0.001 (0.017)	Loss 2.2820 (2.2890)	Entropy 0.84647 (0.84774)	Top-1 acc 67.188 (69.216)	Top-5 acc 87.109 (87.183)	lr 0.00393
Train [89][2720/3239]	Time 0.404 (0.636)	Data Time 0.002 (0.016)	Loss 2.2791 (2.2890)	Entropy 0.84651 (0.84774)	Top-1 acc 69.141 (69.216)	Top-5 acc 88.672 (87.185)	lr 0.00393
Train [89][2730/3239]	Time 0.266 (0.635)	Data Time 0.001 (0.016)	Loss 2.3060 (2.2889)	Entropy 0.84647 (0.84773)	Top-1 acc 68.750 (69.218)	Top-5 acc 87.109 (87.185)	lr 0.00393
Train [89][2740/3239]	Time 0.237 (0.635)	Data Time 0.001 (0.016)	Loss 2.2699 (2.2887)	Entropy 0.84634 (0.84773)	Top-1 acc 66.797 (69.223)	Top-5 acc 84.375 (87.190)	lr 0.00393
Train [89][2750/3239]	Time 0.278 (0.634)	Data Time 0.002 (0.016)	Loss 2.2606 (2.2887)	Entropy 0.84634 (0.84772)	Top-1 acc 70.703 (69.228)	Top-5 acc 86.719 (87.189)	lr 0.00393
Train [89][2760/3239]	Time 0.368 (0.634)	Data Time 0.001 (0.016)	Loss 2.3818 (2.2885)	Entropy 0.84632 (0.84772)	Top-1 acc 66.016 (69.229)	Top-5 acc 83.594 (87.193)	lr 0.00393
Train [89][2770/3239]	Time 0.268 (0.633)	Data Time 0.001 (0.016)	Loss 2.2492 (2.2884)	Entropy 0.84635 (0.84771)	Top-1 acc 68.750 (69.227)	Top-5 acc 87.891 (87.194)	lr 0.00393
Train [89][2780/3239]	Time 0.242 (0.633)	Data Time 0.001 (0.016)	Loss 2.2355 (2.2884)	Entropy 0.84630 (0.84771)	Top-1 acc 71.094 (69.227)	Top-5 acc 88.672 (87.194)	lr 0.00393
Train [89][2790/3239]	Time 0.221 (0.632)	Data Time 0.001 (0.016)	Loss 2.3206 (2.2884)	Entropy 0.84627 (0.84770)	Top-1 acc 66.797 (69.230)	Top-5 acc 87.891 (87.196)	lr 0.00393
Train [89][2800/3239]	Time 0.348 (0.632)	Data Time 0.001 (0.016)	Loss 2.1583 (2.2884)	Entropy 0.84624 (0.84770)	Top-1 acc 67.578 (69.230)	Top-5 acc 90.234 (87.195)	lr 0.00393
Train [89][2810/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.016)	Loss 2.2280 (2.2885)	Entropy 0.84633 (0.84769)	Top-1 acc 66.797 (69.224)	Top-5 acc 88.672 (87.192)	lr 0.00393
Train [89][2820/3239]	Time 0.258 (0.631)	Data Time 0.001 (0.016)	Loss 2.2971 (2.2886)	Entropy 0.84633 (0.84769)	Top-1 acc 69.141 (69.225)	Top-5 acc 85.938 (87.189)	lr 0.00393
Train [89][2830/3239]	Time 0.238 (0.630)	Data Time 0.001 (0.016)	Loss 2.3292 (2.2886)	Entropy 0.84628 (0.84768)	Top-1 acc 68.359 (69.224)	Top-5 acc 84.375 (87.187)	lr 0.00393
Train [89][2840/3239]	Time 0.259 (0.630)	Data Time 0.002 (0.016)	Loss 2.3080 (2.2885)	Entropy 0.84628 (0.84768)	Top-1 acc 64.844 (69.227)	Top-5 acc 87.891 (87.191)	lr 0.00392
Train [89][2850/3239]	Time 0.275 (0.629)	Data Time 0.001 (0.016)	Loss 2.2416 (2.2886)	Entropy 0.84621 (0.84767)	Top-1 acc 71.094 (69.221)	Top-5 acc 89.062 (87.192)	lr 0.00392
Train [89][2860/3239]	Time 0.253 (0.629)	Data Time 0.001 (0.016)	Loss 2.0921 (2.2885)	Entropy 0.84620 (0.84767)	Top-1 acc 72.266 (69.226)	Top-5 acc 91.016 (87.194)	lr 0.00392
Train [89][2870/3239]	Time 0.249 (0.628)	Data Time 0.001 (0.016)	Loss 2.3517 (2.2885)	Entropy 0.84611 (0.84766)	Top-1 acc 68.750 (69.223)	Top-5 acc 85.156 (87.193)	lr 0.00392
Train [89][2880/3239]	Time 0.268 (0.628)	Data Time 0.001 (0.016)	Loss 2.3403 (2.2887)	Entropy 0.84610 (0.84766)	Top-1 acc 69.531 (69.216)	Top-5 acc 85.547 (87.191)	lr 0.00392
Train [89][2890/3239]	Time 0.292 (0.627)	Data Time 0.001 (0.016)	Loss 2.3789 (2.2888)	Entropy 0.84602 (0.84765)	Top-1 acc 67.969 (69.213)	Top-5 acc 87.109 (87.190)	lr 0.00392
Train [89][2900/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.016)	Loss 2.1245 (2.2887)	Entropy 0.84600 (0.84765)	Top-1 acc 73.438 (69.218)	Top-5 acc 90.234 (87.190)	lr 0.00392
Train [89][2910/3239]	Time 0.297 (0.626)	Data Time 0.001 (0.016)	Loss 2.3155 (2.2887)	Entropy 0.84594 (0.84764)	Top-1 acc 69.531 (69.219)	Top-5 acc 86.719 (87.188)	lr 0.00392
Train [89][2920/3239]	Time 0.300 (0.643)	Data Time 0.004 (0.015)	Loss 2.4494 (2.2890)	Entropy 0.84590 (0.84763)	Top-1 acc 64.453 (69.210)	Top-5 acc 85.156 (87.184)	lr 0.00392
Train [89][2930/3239]	Time 0.225 (0.643)	Data Time 0.002 (0.015)	Loss 2.2589 (2.2892)	Entropy 0.84583 (0.84763)	Top-1 acc 69.531 (69.206)	Top-5 acc 87.891 (87.179)	lr 0.00392
Train [89][2940/3239]	Time 0.235 (0.642)	Data Time 0.002 (0.015)	Loss 2.2649 (2.2891)	Entropy 0.84584 (0.84762)	Top-1 acc 68.750 (69.207)	Top-5 acc 89.062 (87.180)	lr 0.00392
Train [89][2950/3239]	Time 0.236 (0.642)	Data Time 0.001 (0.015)	Loss 2.4260 (2.2895)	Entropy 0.84570 (0.84762)	Top-1 acc 64.844 (69.201)	Top-5 acc 83.594 (87.176)	lr 0.00392
Train [89][2960/3239]	Time 0.291 (0.641)	Data Time 0.001 (0.015)	Loss 2.2746 (2.2895)	Entropy 0.84561 (0.84761)	Top-1 acc 71.875 (69.199)	Top-5 acc 87.891 (87.176)	lr 0.00392
Train [89][2970/3239]	Time 0.332 (0.641)	Data Time 0.001 (0.015)	Loss 2.2162 (2.2894)	Entropy 0.84564 (0.84760)	Top-1 acc 71.875 (69.204)	Top-5 acc 88.672 (87.177)	lr 0.00392
Train [89][2980/3239]	Time 0.248 (0.640)	Data Time 0.002 (0.015)	Loss 2.3802 (2.2896)	Entropy 0.84562 (0.84760)	Top-1 acc 66.797 (69.200)	Top-5 acc 85.547 (87.173)	lr 0.00391
Train [89][2990/3239]	Time 0.242 (0.640)	Data Time 0.001 (0.015)	Loss 2.2743 (2.2896)	Entropy 0.84563 (0.84759)	Top-1 acc 71.094 (69.199)	Top-5 acc 87.500 (87.174)	lr 0.00391
Train [89][3000/3239]	Time 0.250 (0.639)	Data Time 0.001 (0.015)	Loss 2.1942 (2.2893)	Entropy 0.84559 (0.84758)	Top-1 acc 68.359 (69.203)	Top-5 acc 89.453 (87.179)	lr 0.00391
Train [89][3010/3239]	Time 0.219 (0.639)	Data Time 0.001 (0.015)	Loss 2.2831 (2.2893)	Entropy 0.84555 (0.84758)	Top-1 acc 69.531 (69.203)	Top-5 acc 88.672 (87.182)	lr 0.00391
Train [89][3020/3239]	Time 0.289 (0.638)	Data Time 0.001 (0.015)	Loss 2.2106 (2.2893)	Entropy 0.84545 (0.84757)	Top-1 acc 71.094 (69.205)	Top-5 acc 89.062 (87.179)	lr 0.00391
Train [89][3030/3239]	Time 0.201 (0.638)	Data Time 0.001 (0.015)	Loss 2.4260 (2.2894)	Entropy 0.84538 (0.84756)	Top-1 acc 66.016 (69.203)	Top-5 acc 82.812 (87.179)	lr 0.00391
Train [89][3040/3239]	Time 0.230 (0.637)	Data Time 0.002 (0.015)	Loss 2.4217 (2.2895)	Entropy 0.84539 (0.84755)	Top-1 acc 66.016 (69.202)	Top-5 acc 85.547 (87.179)	lr 0.00391
Train [89][3050/3239]	Time 0.236 (0.637)	Data Time 0.001 (0.015)	Loss 2.2701 (2.2896)	Entropy 0.84530 (0.84755)	Top-1 acc 68.750 (69.197)	Top-5 acc 87.891 (87.179)	lr 0.00391
Train [89][3060/3239]	Time 0.335 (0.637)	Data Time 0.001 (0.015)	Loss 2.2213 (2.2896)	Entropy 0.84529 (0.84754)	Top-1 acc 68.750 (69.191)	Top-5 acc 89.062 (87.179)	lr 0.00391
Train [89][3070/3239]	Time 0.239 (0.636)	Data Time 0.001 (0.015)	Loss 2.2803 (2.2896)	Entropy 0.84529 (0.84753)	Top-1 acc 70.312 (69.192)	Top-5 acc 86.719 (87.180)	lr 0.00391
Train [89][3080/3239]	Time 0.236 (0.636)	Data Time 0.001 (0.015)	Loss 2.4689 (2.2898)	Entropy 0.84532 (0.84753)	Top-1 acc 61.328 (69.185)	Top-5 acc 83.594 (87.178)	lr 0.00391
Train [89][3090/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.015)	Loss 2.4236 (2.2898)	Entropy 0.84510 (0.84752)	Top-1 acc 62.891 (69.185)	Top-5 acc 84.766 (87.178)	lr 0.00391
Train [89][3100/3239]	Time 0.355 (0.635)	Data Time 0.001 (0.015)	Loss 2.2124 (2.2897)	Entropy 0.84507 (0.84751)	Top-1 acc 71.484 (69.184)	Top-5 acc 86.328 (87.178)	lr 0.00391
Train [89][3110/3239]	Time 0.230 (0.634)	Data Time 0.001 (0.015)	Loss 2.3748 (2.2899)	Entropy 0.84506 (0.84750)	Top-1 acc 67.188 (69.181)	Top-5 acc 85.156 (87.174)	lr 0.00391
Train [89][3120/3239]	Time 0.243 (0.634)	Data Time 0.001 (0.015)	Loss 2.2828 (2.2899)	Entropy 0.84501 (0.84750)	Top-1 acc 71.875 (69.181)	Top-5 acc 87.500 (87.173)	lr 0.00390
Train [89][3130/3239]	Time 0.262 (0.633)	Data Time 0.001 (0.015)	Loss 2.2286 (2.2899)	Entropy 0.84494 (0.84749)	Top-1 acc 70.312 (69.178)	Top-5 acc 89.844 (87.173)	lr 0.00390
Train [89][3140/3239]	Time 0.273 (0.633)	Data Time 0.001 (0.015)	Loss 2.3187 (2.2900)	Entropy 0.84487 (0.84748)	Top-1 acc 65.234 (69.177)	Top-5 acc 87.891 (87.170)	lr 0.00390
Train [89][3150/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.014)	Loss 2.3832 (2.2901)	Entropy 0.84473 (0.84747)	Top-1 acc 65.234 (69.170)	Top-5 acc 83.203 (87.167)	lr 0.00390
Train [89][3160/3239]	Time 0.251 (0.632)	Data Time 0.001 (0.014)	Loss 2.3499 (2.2902)	Entropy 0.84465 (0.84746)	Top-1 acc 70.703 (69.168)	Top-5 acc 86.719 (87.166)	lr 0.00390
Train [89][3170/3239]	Time 0.283 (0.632)	Data Time 0.001 (0.014)	Loss 2.3086 (2.2902)	Entropy 0.84466 (0.84745)	Top-1 acc 69.531 (69.168)	Top-5 acc 86.719 (87.167)	lr 0.00390
Train [89][3180/3239]	Time 0.222 (0.631)	Data Time 0.000 (0.014)	Loss 2.3278 (2.2901)	Entropy 0.84462 (0.84744)	Top-1 acc 66.016 (69.170)	Top-5 acc 87.109 (87.167)	lr 0.00390
Train [89][3190/3239]	Time 0.229 (0.631)	Data Time 0.000 (0.014)	Loss 2.2203 (2.2903)	Entropy 0.84463 (0.84744)	Top-1 acc 71.094 (69.168)	Top-5 acc 87.891 (87.164)	lr 0.00390
Train [89][3200/3239]	Time 0.233 (0.630)	Data Time 0.000 (0.014)	Loss 2.2667 (2.2905)	Entropy 0.84460 (0.84743)	Top-1 acc 71.875 (69.167)	Top-5 acc 86.328 (87.161)	lr 0.00390
Train [89][3210/3239]	Time 0.228 (0.630)	Data Time 0.000 (0.014)	Loss 2.3829 (2.2904)	Entropy 0.84458 (0.84742)	Top-1 acc 65.234 (69.171)	Top-5 acc 85.547 (87.164)	lr 0.00390
Train [89][3220/3239]	Time 0.240 (0.629)	Data Time 0.000 (0.014)	Loss 2.2211 (2.2902)	Entropy 0.84452 (0.84741)	Top-1 acc 69.141 (69.172)	Top-5 acc 87.891 (87.167)	lr 0.00390
Train [89][3230/3239]	Time 0.330 (0.629)	Data Time 0.000 (0.014)	Loss 2.1328 (2.2903)	Entropy 0.84455 (0.84740)	Top-1 acc 70.312 (69.168)	Top-5 acc 90.625 (87.165)	lr 0.00390
Train [89][3239/3239]	Time 2.447 (0.628)	Data Time 0.000 (0.014)	Loss 2.7918 (2.2905)	Entropy 0.84455 (0.84739)	Top-1 acc 56.790 (69.162)	Top-5 acc 80.247 (87.163)	lr 0.00390
==========Valid [89/120]	loss 1.257	top-1 acc 71.134 (71.134)	top-5 acc 89.211	Train top-1 69.162	top-5 87.163	Entropy 0.84455	Latency-None: 0.000ms	Flops: 546.53M
Train [90][0/3239]	Time 42.628 (42.628)	Data Time 39.655 (39.655)	Loss 2.3493 (2.3493)	Entropy 0.84456 (0.84456)	Top-1 acc 67.969 (67.969)	Top-5 acc 84.766 (84.766)	lr 0.00390
Train [90][10/3239]	Time 59.830 (9.585)	Data Time 0.001 (3.607)	Loss 2.2183 (2.2621)	Entropy 0.84456 (0.84456)	Top-1 acc 71.484 (69.922)	Top-5 acc 89.844 (88.104)	lr 0.00389
Train [90][20/3239]	Time 0.242 (5.141)	Data Time 0.002 (1.891)	Loss 2.2398 (2.2466)	Entropy 0.84448 (0.84452)	Top-1 acc 70.312 (70.406)	Top-5 acc 89.062 (88.449)	lr 0.00389
Train [90][30/3239]	Time 0.225 (3.636)	Data Time 0.002 (1.281)	Loss 2.3956 (2.2588)	Entropy 0.84452 (0.84452)	Top-1 acc 67.188 (69.972)	Top-5 acc 85.938 (88.180)	lr 0.00389
Train [90][40/3239]	Time 0.251 (2.866)	Data Time 0.002 (0.969)	Loss 2.2220 (2.2620)	Entropy 0.84443 (0.84450)	Top-1 acc 73.047 (69.922)	Top-5 acc 89.453 (87.957)	lr 0.00389
Train [90][50/3239]	Time 0.229 (2.399)	Data Time 0.002 (0.780)	Loss 2.0396 (2.2676)	Entropy 0.84439 (0.84448)	Top-1 acc 73.438 (69.631)	Top-5 acc 91.406 (87.829)	lr 0.00389
Train [90][60/3239]	Time 0.227 (2.085)	Data Time 0.001 (0.652)	Loss 2.1929 (2.2697)	Entropy 0.84431 (0.84446)	Top-1 acc 73.438 (69.685)	Top-5 acc 89.844 (87.782)	lr 0.00389
Train [90][70/3239]	Time 0.232 (1.858)	Data Time 0.001 (0.561)	Loss 2.3373 (2.2657)	Entropy 0.84427 (0.84444)	Top-1 acc 65.234 (69.735)	Top-5 acc 87.891 (87.781)	lr 0.00389
Train [90][80/3239]	Time 0.339 (1.689)	Data Time 0.001 (0.492)	Loss 2.2474 (2.2753)	Entropy 0.84425 (0.84442)	Top-1 acc 73.438 (69.367)	Top-5 acc 89.062 (87.659)	lr 0.00389
Train [90][90/3239]	Time 0.212 (1.555)	Data Time 0.001 (0.438)	Loss 2.1743 (2.2702)	Entropy 0.84425 (0.84440)	Top-1 acc 74.609 (69.548)	Top-5 acc 90.234 (87.753)	lr 0.00389
Train [90][100/3239]	Time 0.246 (1.451)	Data Time 0.002 (0.395)	Loss 2.1565 (2.2746)	Entropy 0.84426 (0.84438)	Top-1 acc 74.609 (69.551)	Top-5 acc 89.844 (87.651)	lr 0.00389
Train [90][110/3239]	Time 0.276 (1.363)	Data Time 0.001 (0.359)	Loss 2.2271 (2.2774)	Entropy 0.84412 (0.84437)	Top-1 acc 69.922 (69.493)	Top-5 acc 89.062 (87.556)	lr 0.00389
Train [90][120/3239]	Time 2.743 (1.292)	Data Time 0.002 (0.330)	Loss 2.2933 (2.2764)	Entropy 0.84412 (0.84435)	Top-1 acc 70.703 (69.564)	Top-5 acc 87.891 (87.545)	lr 0.00389
Train [90][130/3239]	Time 0.262 (1.214)	Data Time 0.001 (0.305)	Loss 2.3097 (2.2757)	Entropy 0.84404 (0.84433)	Top-1 acc 69.141 (69.627)	Top-5 acc 86.719 (87.503)	lr 0.00389
Train [90][140/3239]	Time 0.229 (1.164)	Data Time 0.001 (0.283)	Loss 2.1948 (2.2728)	Entropy 0.84407 (0.84431)	Top-1 acc 70.703 (69.717)	Top-5 acc 86.328 (87.539)	lr 0.00389
Train [90][150/3239]	Time 0.229 (1.117)	Data Time 0.001 (0.264)	Loss 2.0596 (2.2707)	Entropy 0.84398 (0.84429)	Top-1 acc 73.828 (69.746)	Top-5 acc 91.016 (87.596)	lr 0.00388
Train [90][160/3239]	Time 0.225 (1.076)	Data Time 0.001 (0.248)	Loss 2.1562 (2.2720)	Entropy 0.84393 (0.84427)	Top-1 acc 69.141 (69.706)	Top-5 acc 89.453 (87.604)	lr 0.00388
Train [90][170/3239]	Time 0.223 (1.042)	Data Time 0.001 (0.234)	Loss 2.2354 (2.2733)	Entropy 0.84387 (0.84424)	Top-1 acc 69.922 (69.629)	Top-5 acc 88.281 (87.571)	lr 0.00388
Train [90][180/3239]	Time 0.239 (1.010)	Data Time 0.001 (0.221)	Loss 2.2499 (2.2747)	Entropy 0.84386 (0.84422)	Top-1 acc 69.922 (69.590)	Top-5 acc 85.938 (87.539)	lr 0.00388
Train [90][190/3239]	Time 0.236 (0.983)	Data Time 0.001 (0.209)	Loss 2.2511 (2.2728)	Entropy 0.84383 (0.84420)	Top-1 acc 71.484 (69.603)	Top-5 acc 87.891 (87.527)	lr 0.00388
Train [90][200/3239]	Time 0.270 (0.958)	Data Time 0.001 (0.199)	Loss 2.2561 (2.2747)	Entropy 0.84377 (0.84418)	Top-1 acc 67.969 (69.562)	Top-5 acc 88.672 (87.500)	lr 0.00388
Train [90][210/3239]	Time 0.322 (0.936)	Data Time 0.001 (0.190)	Loss 2.1733 (2.2743)	Entropy 0.84374 (0.84416)	Top-1 acc 72.656 (69.592)	Top-5 acc 89.844 (87.507)	lr 0.00388
Train [90][220/3239]	Time 0.269 (0.917)	Data Time 0.002 (0.181)	Loss 2.4213 (2.2778)	Entropy 0.84372 (0.84415)	Top-1 acc 65.625 (69.517)	Top-5 acc 83.594 (87.458)	lr 0.00388
Train [90][230/3239]	Time 2.602 (0.898)	Data Time 0.001 (0.173)	Loss 2.1573 (2.2773)	Entropy 0.84372 (0.84413)	Top-1 acc 71.484 (69.514)	Top-5 acc 89.844 (87.463)	lr 0.00388
Train [90][240/3239]	Time 0.244 (0.871)	Data Time 0.001 (0.166)	Loss 2.3155 (2.2760)	Entropy 0.84367 (0.84411)	Top-1 acc 70.312 (69.562)	Top-5 acc 87.891 (87.482)	lr 0.00388
Train [90][250/3239]	Time 0.217 (0.855)	Data Time 0.001 (0.160)	Loss 2.2929 (2.2754)	Entropy 0.84359 (0.84409)	Top-1 acc 69.141 (69.583)	Top-5 acc 88.281 (87.500)	lr 0.00388
Train [90][260/3239]	Time 0.245 (0.841)	Data Time 0.001 (0.154)	Loss 2.1985 (2.2765)	Entropy 0.84362 (0.84407)	Top-1 acc 73.438 (69.536)	Top-5 acc 88.281 (87.481)	lr 0.00388
Train [90][270/3239]	Time 0.240 (0.828)	Data Time 0.001 (0.148)	Loss 2.2310 (2.2769)	Entropy 0.84358 (0.84405)	Top-1 acc 72.266 (69.500)	Top-5 acc 89.844 (87.464)	lr 0.00388
Train [90][280/3239]	Time 0.239 (0.815)	Data Time 0.001 (0.143)	Loss 2.3223 (2.2771)	Entropy 0.84358 (0.84403)	Top-1 acc 70.703 (69.537)	Top-5 acc 85.547 (87.458)	lr 0.00388
Train [90][290/3239]	Time 0.243 (0.803)	Data Time 0.001 (0.138)	Loss 2.2737 (2.2785)	Entropy 0.84350 (0.84402)	Top-1 acc 69.531 (69.510)	Top-5 acc 86.719 (87.417)	lr 0.00387
Train [90][300/3239]	Time 0.258 (0.793)	Data Time 0.001 (0.133)	Loss 2.2382 (2.2767)	Entropy 0.84348 (0.84400)	Top-1 acc 69.922 (69.552)	Top-5 acc 88.281 (87.465)	lr 0.00387
Train [90][310/3239]	Time 0.230 (0.783)	Data Time 0.001 (0.129)	Loss 2.3969 (2.2751)	Entropy 0.84345 (0.84398)	Top-1 acc 67.188 (69.593)	Top-5 acc 84.375 (87.486)	lr 0.00387
Train [90][320/3239]	Time 0.235 (0.773)	Data Time 0.001 (0.125)	Loss 2.2611 (2.2745)	Entropy 0.84347 (0.84397)	Top-1 acc 69.922 (69.627)	Top-5 acc 87.891 (87.507)	lr 0.00387
Train [90][330/3239]	Time 0.215 (0.764)	Data Time 0.001 (0.121)	Loss 2.2258 (2.2748)	Entropy 0.84351 (0.84395)	Top-1 acc 72.266 (69.627)	Top-5 acc 89.062 (87.506)	lr 0.00387
Train [90][340/3239]	Time 2.624 (0.756)	Data Time 0.001 (0.118)	Loss 2.4345 (2.2773)	Entropy 0.84351 (0.84394)	Top-1 acc 64.062 (69.575)	Top-5 acc 83.984 (87.462)	lr 0.00387
Train [90][350/3239]	Time 0.226 (0.741)	Data Time 0.001 (0.115)	Loss 2.1988 (2.2785)	Entropy 0.84348 (0.84393)	Top-1 acc 67.969 (69.558)	Top-5 acc 89.062 (87.434)	lr 0.00387
Train [90][360/3239]	Time 0.236 (0.734)	Data Time 0.002 (0.111)	Loss 2.2656 (2.2782)	Entropy 0.84351 (0.84391)	Top-1 acc 69.141 (69.537)	Top-5 acc 87.500 (87.433)	lr 0.00387
Train [90][370/3239]	Time 0.239 (0.727)	Data Time 0.001 (0.109)	Loss 2.3781 (2.2775)	Entropy 0.84350 (0.84390)	Top-1 acc 66.406 (69.541)	Top-5 acc 85.547 (87.457)	lr 0.00387
Train [90][380/3239]	Time 0.459 (0.860)	Data Time 0.003 (0.106)	Loss 2.1747 (2.2754)	Entropy 0.84347 (0.84389)	Top-1 acc 73.438 (69.596)	Top-5 acc 87.500 (87.490)	lr 0.00387
Train [90][390/3239]	Time 0.236 (0.850)	Data Time 0.002 (0.103)	Loss 2.2650 (2.2744)	Entropy 0.84347 (0.84388)	Top-1 acc 71.094 (69.612)	Top-5 acc 87.109 (87.513)	lr 0.00387
Train [90][400/3239]	Time 0.242 (0.841)	Data Time 0.002 (0.101)	Loss 2.2243 (2.2738)	Entropy 0.84347 (0.84387)	Top-1 acc 66.406 (69.628)	Top-5 acc 91.016 (87.526)	lr 0.00387
Train [90][410/3239]	Time 0.234 (0.832)	Data Time 0.001 (0.098)	Loss 2.3105 (2.2730)	Entropy 0.84336 (0.84386)	Top-1 acc 69.922 (69.642)	Top-5 acc 83.594 (87.520)	lr 0.00387
Train [90][420/3239]	Time 0.223 (0.824)	Data Time 0.001 (0.096)	Loss 2.2162 (2.2727)	Entropy 0.84333 (0.84385)	Top-1 acc 72.656 (69.662)	Top-5 acc 87.500 (87.520)	lr 0.00386
Train [90][430/3239]	Time 0.228 (0.816)	Data Time 0.001 (0.094)	Loss 2.0983 (2.2717)	Entropy 0.84335 (0.84384)	Top-1 acc 71.875 (69.684)	Top-5 acc 91.016 (87.535)	lr 0.00386
Train [90][440/3239]	Time 0.285 (0.808)	Data Time 0.001 (0.092)	Loss 2.3879 (2.2716)	Entropy 0.84331 (0.84383)	Top-1 acc 64.062 (69.659)	Top-5 acc 85.156 (87.547)	lr 0.00386
Train [90][450/3239]	Time 2.495 (0.801)	Data Time 0.002 (0.090)	Loss 2.1453 (2.2716)	Entropy 0.84331 (0.84381)	Top-1 acc 73.438 (69.672)	Top-5 acc 89.062 (87.528)	lr 0.00386
Train [90][460/3239]	Time 0.227 (0.788)	Data Time 0.001 (0.088)	Loss 2.3430 (2.2710)	Entropy 0.84322 (0.84380)	Top-1 acc 71.875 (69.679)	Top-5 acc 84.375 (87.535)	lr 0.00386
Train [90][470/3239]	Time 0.323 (0.782)	Data Time 0.001 (0.086)	Loss 2.3458 (2.2706)	Entropy 0.84313 (0.84379)	Top-1 acc 69.141 (69.702)	Top-5 acc 86.719 (87.539)	lr 0.00386
Train [90][480/3239]	Time 0.234 (0.776)	Data Time 0.002 (0.084)	Loss 2.1434 (2.2701)	Entropy 0.84316 (0.84377)	Top-1 acc 72.266 (69.704)	Top-5 acc 91.406 (87.541)	lr 0.00386
Train [90][490/3239]	Time 0.231 (0.770)	Data Time 0.001 (0.083)	Loss 2.3966 (2.2708)	Entropy 0.84284 (0.84376)	Top-1 acc 64.844 (69.702)	Top-5 acc 82.422 (87.514)	lr 0.00386
Train [90][500/3239]	Time 0.250 (0.764)	Data Time 0.001 (0.081)	Loss 2.2970 (2.2707)	Entropy 0.84275 (0.84374)	Top-1 acc 70.703 (69.695)	Top-5 acc 86.328 (87.517)	lr 0.00386
Train [90][510/3239]	Time 0.264 (0.759)	Data Time 0.001 (0.079)	Loss 2.2139 (2.2699)	Entropy 0.84268 (0.84372)	Top-1 acc 72.266 (69.719)	Top-5 acc 89.453 (87.544)	lr 0.00386
Train [90][520/3239]	Time 0.210 (0.753)	Data Time 0.001 (0.078)	Loss 2.1789 (2.2697)	Entropy 0.84260 (0.84370)	Top-1 acc 72.266 (69.734)	Top-5 acc 89.844 (87.546)	lr 0.00386
Train [90][530/3239]	Time 0.235 (0.748)	Data Time 0.001 (0.077)	Loss 2.4310 (2.2701)	Entropy 0.84253 (0.84368)	Top-1 acc 63.672 (69.719)	Top-5 acc 85.547 (87.538)	lr 0.00386
Train [90][540/3239]	Time 0.230 (0.744)	Data Time 0.001 (0.075)	Loss 2.2231 (2.2705)	Entropy 0.84247 (0.84365)	Top-1 acc 69.141 (69.702)	Top-5 acc 89.062 (87.533)	lr 0.00386
Train [90][550/3239]	Time 0.245 (0.739)	Data Time 0.001 (0.074)	Loss 2.3059 (2.2712)	Entropy 0.84247 (0.84363)	Top-1 acc 67.188 (69.689)	Top-5 acc 87.891 (87.518)	lr 0.00386
Train [90][560/3239]	Time 2.569 (0.734)	Data Time 0.001 (0.073)	Loss 2.2063 (2.2708)	Entropy 0.84247 (0.84361)	Top-1 acc 70.703 (69.687)	Top-5 acc 88.281 (87.522)	lr 0.00385
Train [90][570/3239]	Time 0.251 (0.726)	Data Time 0.001 (0.071)	Loss 2.2255 (2.2711)	Entropy 0.84238 (0.84359)	Top-1 acc 72.266 (69.698)	Top-5 acc 89.844 (87.518)	lr 0.00385
Train [90][580/3239]	Time 0.232 (0.722)	Data Time 0.001 (0.070)	Loss 2.2905 (2.2723)	Entropy 0.84241 (0.84357)	Top-1 acc 69.141 (69.672)	Top-5 acc 88.672 (87.501)	lr 0.00385
Train [90][590/3239]	Time 0.211 (0.717)	Data Time 0.001 (0.069)	Loss 2.2909 (2.2724)	Entropy 0.84240 (0.84355)	Top-1 acc 68.359 (69.660)	Top-5 acc 87.500 (87.493)	lr 0.00385
Train [90][600/3239]	Time 0.309 (0.713)	Data Time 0.001 (0.068)	Loss 2.2441 (2.2726)	Entropy 0.84240 (0.84353)	Top-1 acc 70.312 (69.664)	Top-5 acc 86.328 (87.489)	lr 0.00385
Train [90][610/3239]	Time 0.228 (0.709)	Data Time 0.001 (0.067)	Loss 2.1247 (2.2733)	Entropy 0.84236 (0.84351)	Top-1 acc 73.047 (69.653)	Top-5 acc 88.281 (87.471)	lr 0.00385
Train [90][620/3239]	Time 0.232 (0.706)	Data Time 0.001 (0.066)	Loss 2.1929 (2.2736)	Entropy 0.84237 (0.84349)	Top-1 acc 73.047 (69.640)	Top-5 acc 88.672 (87.458)	lr 0.00385
Train [90][630/3239]	Time 0.229 (0.702)	Data Time 0.001 (0.065)	Loss 2.1496 (2.2738)	Entropy 0.84235 (0.84348)	Top-1 acc 72.656 (69.645)	Top-5 acc 88.672 (87.450)	lr 0.00385
Train [90][640/3239]	Time 0.312 (0.699)	Data Time 0.001 (0.064)	Loss 2.3038 (2.2737)	Entropy 0.84230 (0.84346)	Top-1 acc 70.312 (69.635)	Top-5 acc 88.281 (87.463)	lr 0.00385
Train [90][650/3239]	Time 0.227 (0.696)	Data Time 0.001 (0.063)	Loss 2.3117 (2.2744)	Entropy 0.84232 (0.84344)	Top-1 acc 68.359 (69.613)	Top-5 acc 87.500 (87.453)	lr 0.00385
Train [90][660/3239]	Time 0.226 (0.692)	Data Time 0.001 (0.062)	Loss 2.1465 (2.2753)	Entropy 0.84221 (0.84342)	Top-1 acc 72.266 (69.583)	Top-5 acc 89.453 (87.444)	lr 0.00385
Train [90][670/3239]	Time 2.546 (0.689)	Data Time 0.002 (0.061)	Loss 2.4554 (2.2760)	Entropy 0.84221 (0.84341)	Top-1 acc 65.625 (69.572)	Top-5 acc 84.375 (87.438)	lr 0.00385
Train [90][680/3239]	Time 0.233 (0.682)	Data Time 0.001 (0.060)	Loss 2.3149 (2.2762)	Entropy 0.84218 (0.84339)	Top-1 acc 67.578 (69.569)	Top-5 acc 85.938 (87.430)	lr 0.00385
Train [90][690/3239]	Time 0.228 (0.680)	Data Time 0.001 (0.059)	Loss 2.3297 (2.2760)	Entropy 0.84209 (0.84337)	Top-1 acc 66.797 (69.578)	Top-5 acc 88.672 (87.437)	lr 0.00385
Train [90][700/3239]	Time 0.236 (0.676)	Data Time 0.001 (0.058)	Loss 2.2656 (2.2758)	Entropy 0.84205 (0.84335)	Top-1 acc 71.094 (69.592)	Top-5 acc 88.672 (87.442)	lr 0.00384
Train [90][710/3239]	Time 0.219 (0.674)	Data Time 0.001 (0.058)	Loss 2.1951 (2.2762)	Entropy 0.84207 (0.84333)	Top-1 acc 72.266 (69.587)	Top-5 acc 87.500 (87.431)	lr 0.00384
Train [90][720/3239]	Time 0.231 (0.671)	Data Time 0.001 (0.057)	Loss 2.2980 (2.2767)	Entropy 0.84200 (0.84331)	Top-1 acc 67.188 (69.569)	Top-5 acc 86.719 (87.423)	lr 0.00384
Train [90][730/3239]	Time 0.224 (0.668)	Data Time 0.001 (0.056)	Loss 2.2803 (2.2763)	Entropy 0.84192 (0.84330)	Top-1 acc 69.141 (69.601)	Top-5 acc 87.891 (87.439)	lr 0.00384
Train [90][740/3239]	Time 0.269 (0.733)	Data Time 0.002 (0.055)	Loss 2.4300 (2.2756)	Entropy 0.84188 (0.84328)	Top-1 acc 67.578 (69.620)	Top-5 acc 84.766 (87.447)	lr 0.00384
Train [90][750/3239]	Time 0.249 (0.732)	Data Time 0.002 (0.055)	Loss 2.1653 (2.2751)	Entropy 0.84183 (0.84326)	Top-1 acc 73.438 (69.624)	Top-5 acc 89.453 (87.455)	lr 0.00384
Train [90][760/3239]	Time 0.222 (0.729)	Data Time 0.002 (0.054)	Loss 2.1537 (2.2751)	Entropy 0.84183 (0.84324)	Top-1 acc 71.484 (69.619)	Top-5 acc 91.016 (87.464)	lr 0.00384
Train [90][770/3239]	Time 0.249 (0.726)	Data Time 0.002 (0.053)	Loss 2.2396 (2.2751)	Entropy 0.84178 (0.84322)	Top-1 acc 71.875 (69.615)	Top-5 acc 86.328 (87.466)	lr 0.00384
Train [90][780/3239]	Time 2.645 (0.723)	Data Time 0.001 (0.053)	Loss 2.1583 (2.2747)	Entropy 0.84178 (0.84320)	Top-1 acc 73.047 (69.622)	Top-5 acc 90.234 (87.479)	lr 0.00384
Train [90][790/3239]	Time 0.250 (0.717)	Data Time 0.001 (0.052)	Loss 2.2165 (2.2741)	Entropy 0.84171 (0.84318)	Top-1 acc 71.484 (69.640)	Top-5 acc 85.938 (87.489)	lr 0.00384
Train [90][800/3239]	Time 0.220 (0.714)	Data Time 0.001 (0.051)	Loss 2.3275 (2.2744)	Entropy 0.84175 (0.84317)	Top-1 acc 71.484 (69.644)	Top-5 acc 85.156 (87.480)	lr 0.00384
Train [90][810/3239]	Time 0.331 (0.711)	Data Time 0.001 (0.051)	Loss 2.2021 (2.2744)	Entropy 0.84186 (0.84315)	Top-1 acc 75.391 (69.647)	Top-5 acc 88.672 (87.480)	lr 0.00384
Train [90][820/3239]	Time 0.225 (0.708)	Data Time 0.001 (0.050)	Loss 2.9160 (2.2754)	Entropy 0.84176 (0.84313)	Top-1 acc 53.125 (69.615)	Top-5 acc 78.125 (87.464)	lr 0.00384
Train [90][830/3239]	Time 0.232 (0.705)	Data Time 0.001 (0.050)	Loss 2.3369 (2.2755)	Entropy 0.84174 (0.84312)	Top-1 acc 67.578 (69.605)	Top-5 acc 87.891 (87.469)	lr 0.00383
Train [90][840/3239]	Time 0.225 (0.703)	Data Time 0.001 (0.049)	Loss 2.2342 (2.2760)	Entropy 0.84171 (0.84310)	Top-1 acc 70.312 (69.580)	Top-5 acc 87.891 (87.455)	lr 0.00383
Train [90][850/3239]	Time 0.245 (0.700)	Data Time 0.001 (0.048)	Loss 2.2425 (2.2762)	Entropy 0.84172 (0.84308)	Top-1 acc 73.047 (69.584)	Top-5 acc 87.891 (87.450)	lr 0.00383
Train [90][860/3239]	Time 0.230 (0.697)	Data Time 0.001 (0.048)	Loss 2.3203 (2.2761)	Entropy 0.84175 (0.84307)	Top-1 acc 69.531 (69.587)	Top-5 acc 86.328 (87.451)	lr 0.00383
Train [90][870/3239]	Time 0.232 (0.695)	Data Time 0.001 (0.047)	Loss 2.2210 (2.2758)	Entropy 0.84166 (0.84305)	Top-1 acc 70.312 (69.596)	Top-5 acc 87.109 (87.450)	lr 0.00383
Train [90][880/3239]	Time 0.240 (0.693)	Data Time 0.001 (0.047)	Loss 2.1783 (2.2753)	Entropy 0.84165 (0.84304)	Top-1 acc 73.047 (69.611)	Top-5 acc 89.844 (87.453)	lr 0.00383
Train [90][890/3239]	Time 2.545 (0.690)	Data Time 0.001 (0.046)	Loss 2.3663 (2.2752)	Entropy 0.84165 (0.84302)	Top-1 acc 65.625 (69.607)	Top-5 acc 83.594 (87.454)	lr 0.00383
Train [90][900/3239]	Time 0.344 (0.685)	Data Time 0.001 (0.046)	Loss 2.1863 (2.2751)	Entropy 0.84156 (0.84300)	Top-1 acc 67.578 (69.605)	Top-5 acc 90.625 (87.466)	lr 0.00383
Train [90][910/3239]	Time 0.234 (0.683)	Data Time 0.001 (0.045)	Loss 2.3000 (2.2747)	Entropy 0.84152 (0.84299)	Top-1 acc 69.141 (69.624)	Top-5 acc 87.109 (87.479)	lr 0.00383
Train [90][920/3239]	Time 0.229 (0.681)	Data Time 0.001 (0.045)	Loss 2.3325 (2.2742)	Entropy 0.84149 (0.84297)	Top-1 acc 67.578 (69.625)	Top-5 acc 86.719 (87.482)	lr 0.00383
Train [90][930/3239]	Time 0.220 (0.678)	Data Time 0.001 (0.044)	Loss 2.3404 (2.2738)	Entropy 0.84146 (0.84296)	Top-1 acc 67.969 (69.636)	Top-5 acc 83.594 (87.483)	lr 0.00383
Train [90][940/3239]	Time 0.290 (0.676)	Data Time 0.001 (0.044)	Loss 2.3175 (2.2751)	Entropy 0.84143 (0.84294)	Top-1 acc 65.625 (69.604)	Top-5 acc 87.891 (87.461)	lr 0.00383
Train [90][950/3239]	Time 0.233 (0.674)	Data Time 0.001 (0.044)	Loss 2.3732 (2.2751)	Entropy 0.84134 (0.84292)	Top-1 acc 68.750 (69.608)	Top-5 acc 84.766 (87.451)	lr 0.00383
Train [90][960/3239]	Time 0.229 (0.672)	Data Time 0.001 (0.043)	Loss 2.2726 (2.2753)	Entropy 0.84127 (0.84291)	Top-1 acc 69.531 (69.599)	Top-5 acc 87.109 (87.447)	lr 0.00383
Train [90][970/3239]	Time 0.251 (0.670)	Data Time 0.025 (0.043)	Loss 2.2242 (2.2749)	Entropy 0.84104 (0.84289)	Top-1 acc 70.703 (69.598)	Top-5 acc 87.109 (87.454)	lr 0.00382
Train [90][980/3239]	Time 0.269 (0.668)	Data Time 0.001 (0.042)	Loss 2.2728 (2.2750)	Entropy 0.84103 (0.84287)	Top-1 acc 67.578 (69.594)	Top-5 acc 87.891 (87.453)	lr 0.00382
Train [90][990/3239]	Time 0.265 (0.667)	Data Time 0.001 (0.042)	Loss 2.1798 (2.2748)	Entropy 0.84091 (0.84285)	Top-1 acc 71.875 (69.597)	Top-5 acc 88.281 (87.459)	lr 0.00382
Train [90][1000/3239]	Time 2.564 (0.665)	Data Time 0.001 (0.041)	Loss 2.2630 (2.2748)	Entropy 0.84091 (0.84283)	Top-1 acc 67.969 (69.595)	Top-5 acc 85.938 (87.463)	lr 0.00382
Train [90][1010/3239]	Time 0.243 (0.660)	Data Time 0.001 (0.041)	Loss 2.2770 (2.2747)	Entropy 0.84074 (0.84281)	Top-1 acc 69.141 (69.599)	Top-5 acc 89.062 (87.464)	lr 0.00382
Train [90][1020/3239]	Time 0.242 (0.659)	Data Time 0.001 (0.041)	Loss 2.2522 (2.2747)	Entropy 0.84071 (0.84279)	Top-1 acc 68.750 (69.594)	Top-5 acc 85.156 (87.460)	lr 0.00382
Train [90][1030/3239]	Time 0.319 (0.657)	Data Time 0.001 (0.040)	Loss 2.3564 (2.2748)	Entropy 0.84072 (0.84277)	Top-1 acc 69.141 (69.596)	Top-5 acc 89.062 (87.462)	lr 0.00382
Train [90][1040/3239]	Time 0.211 (0.655)	Data Time 0.001 (0.040)	Loss 2.2113 (2.2749)	Entropy 0.84067 (0.84275)	Top-1 acc 71.875 (69.590)	Top-5 acc 88.281 (87.464)	lr 0.00382
Train [90][1050/3239]	Time 0.236 (0.654)	Data Time 0.001 (0.040)	Loss 2.4430 (2.2753)	Entropy 0.84065 (0.84273)	Top-1 acc 67.188 (69.588)	Top-5 acc 86.328 (87.453)	lr 0.00382
Train [90][1060/3239]	Time 0.226 (0.652)	Data Time 0.001 (0.039)	Loss 2.1218 (2.2753)	Entropy 0.84060 (0.84271)	Top-1 acc 69.531 (69.589)	Top-5 acc 90.234 (87.452)	lr 0.00382
Train [90][1070/3239]	Time 0.331 (0.651)	Data Time 0.001 (0.039)	Loss 2.3729 (2.2753)	Entropy 0.84057 (0.84269)	Top-1 acc 63.672 (69.580)	Top-5 acc 84.375 (87.449)	lr 0.00382
Train [90][1080/3239]	Time 0.224 (0.649)	Data Time 0.001 (0.039)	Loss 2.3841 (2.2752)	Entropy 0.84058 (0.84267)	Top-1 acc 66.797 (69.591)	Top-5 acc 85.938 (87.444)	lr 0.00382
Train [90][1090/3239]	Time 0.226 (0.647)	Data Time 0.002 (0.038)	Loss 2.2377 (2.2751)	Entropy 0.84054 (0.84265)	Top-1 acc 74.219 (69.589)	Top-5 acc 88.281 (87.447)	lr 0.00382
Train [90][1100/3239]	Time 0.299 (0.691)	Data Time 0.004 (0.038)	Loss 2.3561 (2.2750)	Entropy 0.84047 (0.84263)	Top-1 acc 67.578 (69.593)	Top-5 acc 86.328 (87.450)	lr 0.00382
Train [90][1110/3239]	Time 3.500 (0.691)	Data Time 0.004 (0.038)	Loss 2.2006 (2.2750)	Entropy 0.84047 (0.84261)	Top-1 acc 67.578 (69.586)	Top-5 acc 91.406 (87.456)	lr 0.00381
Train [90][1120/3239]	Time 0.271 (0.687)	Data Time 0.002 (0.037)	Loss 2.2637 (2.2752)	Entropy 0.84048 (0.84259)	Top-1 acc 69.531 (69.589)	Top-5 acc 86.328 (87.453)	lr 0.00381
Train [90][1130/3239]	Time 0.226 (0.685)	Data Time 0.002 (0.037)	Loss 2.2300 (2.2752)	Entropy 0.84036 (0.84257)	Top-1 acc 68.359 (69.594)	Top-5 acc 89.453 (87.457)	lr 0.00381
Train [90][1140/3239]	Time 0.241 (0.683)	Data Time 0.002 (0.037)	Loss 2.1337 (2.2746)	Entropy 0.84031 (0.84256)	Top-1 acc 71.484 (69.608)	Top-5 acc 91.797 (87.462)	lr 0.00381
Train [90][1150/3239]	Time 0.255 (0.682)	Data Time 0.002 (0.036)	Loss 2.1983 (2.2751)	Entropy 0.84030 (0.84254)	Top-1 acc 71.094 (69.595)	Top-5 acc 87.109 (87.447)	lr 0.00381
Train [90][1160/3239]	Time 0.240 (0.680)	Data Time 0.001 (0.036)	Loss 2.3913 (2.2753)	Entropy 0.84027 (0.84252)	Top-1 acc 64.844 (69.590)	Top-5 acc 85.156 (87.443)	lr 0.00381
Train [90][1170/3239]	Time 0.235 (0.678)	Data Time 0.001 (0.036)	Loss 2.3738 (2.2754)	Entropy 0.84023 (0.84250)	Top-1 acc 66.406 (69.599)	Top-5 acc 85.547 (87.439)	lr 0.00381
Train [90][1180/3239]	Time 0.222 (0.677)	Data Time 0.001 (0.035)	Loss 2.2103 (2.2751)	Entropy 0.84017 (0.84248)	Top-1 acc 73.438 (69.604)	Top-5 acc 88.672 (87.445)	lr 0.00381
Train [90][1190/3239]	Time 0.266 (0.675)	Data Time 0.001 (0.035)	Loss 2.1668 (2.2751)	Entropy 0.84011 (0.84246)	Top-1 acc 69.141 (69.603)	Top-5 acc 91.797 (87.451)	lr 0.00381
Train [90][1200/3239]	Time 0.215 (0.673)	Data Time 0.001 (0.035)	Loss 2.2053 (2.2750)	Entropy 0.84003 (0.84244)	Top-1 acc 69.922 (69.606)	Top-5 acc 88.672 (87.455)	lr 0.00381
Train [90][1210/3239]	Time 0.256 (0.672)	Data Time 0.001 (0.035)	Loss 2.5173 (2.2753)	Entropy 0.84002 (0.84242)	Top-1 acc 61.328 (69.596)	Top-5 acc 84.766 (87.455)	lr 0.00381
Train [90][1220/3239]	Time 2.590 (0.670)	Data Time 0.001 (0.034)	Loss 2.3735 (2.2752)	Entropy 0.84002 (0.84240)	Top-1 acc 69.922 (69.601)	Top-5 acc 86.719 (87.452)	lr 0.00381
Train [90][1230/3239]	Time 0.245 (0.667)	Data Time 0.001 (0.034)	Loss 2.2427 (2.2752)	Entropy 0.83998 (0.84238)	Top-1 acc 70.703 (69.601)	Top-5 acc 87.891 (87.458)	lr 0.00381
Train [90][1240/3239]	Time 0.319 (0.665)	Data Time 0.001 (0.034)	Loss 2.2517 (2.2752)	Entropy 0.83994 (0.84236)	Top-1 acc 68.359 (69.600)	Top-5 acc 87.891 (87.457)	lr 0.00381
Train [90][1250/3239]	Time 0.221 (0.664)	Data Time 0.001 (0.034)	Loss 2.2131 (2.2752)	Entropy 0.83989 (0.84234)	Top-1 acc 66.797 (69.604)	Top-5 acc 90.234 (87.457)	lr 0.00380
Train [90][1260/3239]	Time 0.240 (0.662)	Data Time 0.001 (0.033)	Loss 2.4767 (2.2751)	Entropy 0.83976 (0.84232)	Top-1 acc 64.062 (69.608)	Top-5 acc 83.203 (87.454)	lr 0.00380
Train [90][1270/3239]	Time 0.239 (0.661)	Data Time 0.001 (0.033)	Loss 2.0896 (2.2753)	Entropy 0.83952 (0.84230)	Top-1 acc 75.781 (69.613)	Top-5 acc 89.062 (87.452)	lr 0.00380
Train [90][1280/3239]	Time 0.331 (0.660)	Data Time 0.001 (0.033)	Loss 2.3082 (2.2755)	Entropy 0.83948 (0.84228)	Top-1 acc 70.312 (69.611)	Top-5 acc 87.500 (87.444)	lr 0.00380
Train [90][1290/3239]	Time 0.239 (0.658)	Data Time 0.001 (0.033)	Loss 2.1790 (2.2751)	Entropy 0.83941 (0.84225)	Top-1 acc 74.609 (69.620)	Top-5 acc 89.844 (87.446)	lr 0.00380
Train [90][1300/3239]	Time 0.236 (0.657)	Data Time 0.002 (0.032)	Loss 2.2843 (2.2751)	Entropy 0.83934 (0.84223)	Top-1 acc 68.750 (69.618)	Top-5 acc 86.719 (87.446)	lr 0.00380
Train [90][1310/3239]	Time 0.216 (0.655)	Data Time 0.001 (0.032)	Loss 2.2788 (2.2753)	Entropy 0.83934 (0.84221)	Top-1 acc 69.531 (69.608)	Top-5 acc 85.938 (87.437)	lr 0.00380
Train [90][1320/3239]	Time 0.229 (0.654)	Data Time 0.001 (0.032)	Loss 2.3343 (2.2757)	Entropy 0.83929 (0.84219)	Top-1 acc 68.359 (69.599)	Top-5 acc 86.719 (87.430)	lr 0.00380
Train [90][1330/3239]	Time 2.467 (0.653)	Data Time 0.001 (0.032)	Loss 2.3886 (2.2758)	Entropy 0.83929 (0.84217)	Top-1 acc 66.797 (69.599)	Top-5 acc 85.156 (87.423)	lr 0.00380
Train [90][1340/3239]	Time 0.227 (0.650)	Data Time 0.001 (0.031)	Loss 2.1934 (2.2757)	Entropy 0.83924 (0.84214)	Top-1 acc 74.219 (69.611)	Top-5 acc 88.281 (87.422)	lr 0.00380
Train [90][1350/3239]	Time 0.236 (0.648)	Data Time 0.001 (0.031)	Loss 2.1786 (2.2757)	Entropy 0.83924 (0.84212)	Top-1 acc 70.703 (69.617)	Top-5 acc 89.844 (87.422)	lr 0.00380
Train [90][1360/3239]	Time 0.229 (0.647)	Data Time 0.001 (0.031)	Loss 2.2138 (2.2755)	Entropy 0.83916 (0.84210)	Top-1 acc 73.047 (69.622)	Top-5 acc 90.625 (87.419)	lr 0.00380
Train [90][1370/3239]	Time 0.317 (0.646)	Data Time 0.001 (0.031)	Loss 2.3870 (2.2754)	Entropy 0.83914 (0.84208)	Top-1 acc 66.406 (69.623)	Top-5 acc 84.375 (87.421)	lr 0.00380
Train [90][1380/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.031)	Loss 2.3956 (2.2757)	Entropy 0.83901 (0.84206)	Top-1 acc 68.359 (69.611)	Top-5 acc 87.109 (87.420)	lr 0.00379
Train [90][1390/3239]	Time 0.242 (0.643)	Data Time 0.001 (0.030)	Loss 2.3620 (2.2757)	Entropy 0.83912 (0.84204)	Top-1 acc 69.141 (69.611)	Top-5 acc 85.156 (87.421)	lr 0.00379
Train [90][1400/3239]	Time 0.224 (0.642)	Data Time 0.001 (0.030)	Loss 2.3618 (2.2757)	Entropy 0.83915 (0.84202)	Top-1 acc 64.062 (69.611)	Top-5 acc 87.109 (87.424)	lr 0.00379
Train [90][1410/3239]	Time 0.313 (0.641)	Data Time 0.001 (0.030)	Loss 2.3096 (2.2758)	Entropy 0.83911 (0.84200)	Top-1 acc 67.578 (69.604)	Top-5 acc 87.891 (87.422)	lr 0.00379
Train [90][1420/3239]	Time 0.225 (0.640)	Data Time 0.001 (0.030)	Loss 2.4190 (2.2760)	Entropy 0.83915 (0.84198)	Top-1 acc 63.281 (69.594)	Top-5 acc 85.938 (87.417)	lr 0.00379
Train [90][1430/3239]	Time 0.256 (0.638)	Data Time 0.001 (0.029)	Loss 2.3344 (2.2758)	Entropy 0.83911 (0.84196)	Top-1 acc 65.625 (69.597)	Top-5 acc 88.281 (87.423)	lr 0.00379
Train [90][1440/3239]	Time 2.609 (0.637)	Data Time 0.002 (0.029)	Loss 2.2335 (2.2753)	Entropy 0.83911 (0.84194)	Top-1 acc 66.406 (69.608)	Top-5 acc 88.281 (87.432)	lr 0.00379
Train [90][1450/3239]	Time 0.249 (0.635)	Data Time 0.001 (0.029)	Loss 2.1827 (2.2755)	Entropy 0.83911 (0.84192)	Top-1 acc 69.922 (69.605)	Top-5 acc 88.672 (87.425)	lr 0.00379
Train [90][1460/3239]	Time 0.235 (0.634)	Data Time 0.001 (0.029)	Loss 2.1427 (2.2758)	Entropy 0.83910 (0.84190)	Top-1 acc 72.266 (69.605)	Top-5 acc 91.797 (87.423)	lr 0.00379
Train [90][1470/3239]	Time 0.278 (0.669)	Data Time 0.002 (0.029)	Loss 2.2457 (2.2759)	Entropy 0.83906 (0.84188)	Top-1 acc 71.484 (69.607)	Top-5 acc 86.328 (87.419)	lr 0.00379
Train [90][1480/3239]	Time 0.228 (0.668)	Data Time 0.002 (0.029)	Loss 2.1805 (2.2759)	Entropy 0.83901 (0.84186)	Top-1 acc 73.047 (69.607)	Top-5 acc 89.062 (87.421)	lr 0.00379
Train [90][1490/3239]	Time 0.249 (0.667)	Data Time 0.002 (0.028)	Loss 2.1960 (2.2761)	Entropy 0.83899 (0.84184)	Top-1 acc 71.094 (69.601)	Top-5 acc 86.719 (87.414)	lr 0.00379
Train [90][1500/3239]	Time 0.240 (0.665)	Data Time 0.001 (0.028)	Loss 2.3580 (2.2761)	Entropy 0.83895 (0.84182)	Top-1 acc 67.578 (69.604)	Top-5 acc 87.500 (87.413)	lr 0.00379
Train [90][1510/3239]	Time 0.239 (0.664)	Data Time 0.002 (0.028)	Loss 2.2705 (2.2759)	Entropy 0.83895 (0.84180)	Top-1 acc 71.484 (69.612)	Top-5 acc 87.891 (87.415)	lr 0.00379
Train [90][1520/3239]	Time 0.228 (0.663)	Data Time 0.001 (0.028)	Loss 2.2951 (2.2762)	Entropy 0.83897 (0.84178)	Top-1 acc 69.531 (69.601)	Top-5 acc 87.500 (87.413)	lr 0.00378
Train [90][1530/3239]	Time 0.224 (0.662)	Data Time 0.001 (0.028)	Loss 2.3550 (2.2763)	Entropy 0.83908 (0.84176)	Top-1 acc 67.578 (69.601)	Top-5 acc 87.109 (87.406)	lr 0.00378
Train [90][1540/3239]	Time 0.397 (0.661)	Data Time 0.001 (0.028)	Loss 2.2141 (2.2761)	Entropy 0.83917 (0.84175)	Top-1 acc 71.094 (69.607)	Top-5 acc 87.891 (87.408)	lr 0.00378
Train [90][1550/3239]	Time 2.588 (0.660)	Data Time 0.001 (0.027)	Loss 2.2363 (2.2761)	Entropy 0.83917 (0.84173)	Top-1 acc 69.141 (69.602)	Top-5 acc 86.719 (87.406)	lr 0.00378
Train [90][1560/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.027)	Loss 2.1241 (2.2759)	Entropy 0.83912 (0.84171)	Top-1 acc 74.609 (69.611)	Top-5 acc 89.844 (87.409)	lr 0.00378
Train [90][1570/3239]	Time 0.237 (0.656)	Data Time 0.001 (0.027)	Loss 2.2307 (2.2760)	Entropy 0.83913 (0.84170)	Top-1 acc 74.219 (69.607)	Top-5 acc 89.062 (87.406)	lr 0.00378
Train [90][1580/3239]	Time 0.229 (0.655)	Data Time 0.001 (0.027)	Loss 2.2896 (2.2764)	Entropy 0.83902 (0.84168)	Top-1 acc 67.188 (69.596)	Top-5 acc 87.500 (87.397)	lr 0.00378
Train [90][1590/3239]	Time 0.222 (0.654)	Data Time 0.001 (0.027)	Loss 2.3166 (2.2768)	Entropy 0.83904 (0.84166)	Top-1 acc 67.188 (69.591)	Top-5 acc 85.938 (87.389)	lr 0.00378
Train [90][1600/3239]	Time 0.227 (0.652)	Data Time 0.001 (0.027)	Loss 2.2370 (2.2766)	Entropy 0.83895 (0.84165)	Top-1 acc 69.531 (69.593)	Top-5 acc 89.453 (87.392)	lr 0.00378
Train [90][1610/3239]	Time 0.222 (0.651)	Data Time 0.001 (0.026)	Loss 2.1924 (2.2766)	Entropy 0.83895 (0.84163)	Top-1 acc 71.875 (69.593)	Top-5 acc 89.062 (87.390)	lr 0.00378
Train [90][1620/3239]	Time 0.237 (0.650)	Data Time 0.001 (0.026)	Loss 2.2371 (2.2763)	Entropy 0.83892 (0.84161)	Top-1 acc 72.656 (69.602)	Top-5 acc 87.109 (87.392)	lr 0.00378
Train [90][1630/3239]	Time 0.227 (0.649)	Data Time 0.001 (0.026)	Loss 2.2231 (2.2758)	Entropy 0.83881 (0.84160)	Top-1 acc 73.047 (69.621)	Top-5 acc 87.109 (87.401)	lr 0.00378
Train [90][1640/3239]	Time 0.239 (0.648)	Data Time 0.001 (0.026)	Loss 2.3273 (2.2757)	Entropy 0.83876 (0.84158)	Top-1 acc 69.141 (69.620)	Top-5 acc 85.156 (87.400)	lr 0.00378
Train [90][1650/3239]	Time 0.223 (0.647)	Data Time 0.001 (0.026)	Loss 2.2221 (2.2757)	Entropy 0.83882 (0.84156)	Top-1 acc 71.484 (69.627)	Top-5 acc 87.109 (87.400)	lr 0.00378
Train [90][1660/3239]	Time 2.705 (0.646)	Data Time 0.001 (0.026)	Loss 2.4742 (2.2757)	Entropy 0.83882 (0.84155)	Top-1 acc 64.062 (69.625)	Top-5 acc 83.203 (87.398)	lr 0.00377
Train [90][1670/3239]	Time 0.399 (0.644)	Data Time 0.001 (0.025)	Loss 2.3012 (2.2756)	Entropy 0.83875 (0.84153)	Top-1 acc 71.875 (69.625)	Top-5 acc 87.891 (87.400)	lr 0.00377
Train [90][1680/3239]	Time 0.225 (0.643)	Data Time 0.001 (0.025)	Loss 2.2137 (2.2755)	Entropy 0.83880 (0.84151)	Top-1 acc 72.656 (69.628)	Top-5 acc 85.938 (87.400)	lr 0.00377
Train [90][1690/3239]	Time 0.229 (0.642)	Data Time 0.001 (0.025)	Loss 2.1837 (2.2754)	Entropy 0.83872 (0.84150)	Top-1 acc 73.828 (69.636)	Top-5 acc 87.891 (87.403)	lr 0.00377
Train [90][1700/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.025)	Loss 2.2269 (2.2751)	Entropy 0.83872 (0.84148)	Top-1 acc 70.312 (69.647)	Top-5 acc 88.672 (87.407)	lr 0.00377
Train [90][1710/3239]	Time 0.242 (0.640)	Data Time 0.001 (0.025)	Loss 2.3756 (2.2751)	Entropy 0.83859 (0.84146)	Top-1 acc 69.141 (69.647)	Top-5 acc 87.109 (87.409)	lr 0.00377
Train [90][1720/3239]	Time 0.234 (0.639)	Data Time 0.001 (0.025)	Loss 2.5620 (2.2752)	Entropy 0.83860 (0.84145)	Top-1 acc 63.672 (69.642)	Top-5 acc 81.250 (87.410)	lr 0.00377
Train [90][1730/3239]	Time 0.222 (0.638)	Data Time 0.001 (0.025)	Loss 2.2107 (2.2750)	Entropy 0.83865 (0.84143)	Top-1 acc 68.750 (69.646)	Top-5 acc 90.625 (87.415)	lr 0.00377
Train [90][1740/3239]	Time 0.225 (0.637)	Data Time 0.001 (0.025)	Loss 2.2440 (2.2749)	Entropy 0.83866 (0.84141)	Top-1 acc 71.484 (69.643)	Top-5 acc 86.719 (87.417)	lr 0.00377
Train [90][1750/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.024)	Loss 2.4083 (2.2748)	Entropy 0.83859 (0.84140)	Top-1 acc 63.672 (69.644)	Top-5 acc 85.938 (87.419)	lr 0.00377
Train [90][1760/3239]	Time 0.230 (0.636)	Data Time 0.001 (0.024)	Loss 2.2915 (2.2750)	Entropy 0.83856 (0.84138)	Top-1 acc 69.922 (69.641)	Top-5 acc 87.500 (87.415)	lr 0.00377
Train [90][1770/3239]	Time 2.515 (0.635)	Data Time 0.001 (0.024)	Loss 2.1648 (2.2750)	Entropy 0.83856 (0.84137)	Top-1 acc 74.219 (69.644)	Top-5 acc 88.672 (87.411)	lr 0.00377
Train [90][1780/3239]	Time 0.236 (0.632)	Data Time 0.001 (0.024)	Loss 2.2327 (2.2753)	Entropy 0.83854 (0.84135)	Top-1 acc 71.484 (69.640)	Top-5 acc 87.500 (87.405)	lr 0.00377
Train [90][1790/3239]	Time 0.238 (0.632)	Data Time 0.001 (0.024)	Loss 2.2056 (2.2753)	Entropy 0.83853 (0.84134)	Top-1 acc 70.312 (69.640)	Top-5 acc 90.234 (87.406)	lr 0.00377
Train [90][1800/3239]	Time 0.335 (0.631)	Data Time 0.001 (0.024)	Loss 2.2591 (2.2752)	Entropy 0.83843 (0.84132)	Top-1 acc 71.094 (69.638)	Top-5 acc 87.500 (87.408)	lr 0.00376
Train [90][1810/3239]	Time 0.220 (0.630)	Data Time 0.001 (0.024)	Loss 2.3378 (2.2752)	Entropy 0.83830 (0.84130)	Top-1 acc 63.281 (69.633)	Top-5 acc 88.672 (87.408)	lr 0.00376
Train [90][1820/3239]	Time 0.227 (0.629)	Data Time 0.001 (0.024)	Loss 2.1754 (2.2754)	Entropy 0.83828 (0.84129)	Top-1 acc 72.656 (69.630)	Top-5 acc 88.672 (87.405)	lr 0.00376
Train [90][1830/3239]	Time 0.274 (0.657)	Data Time 0.003 (0.023)	Loss 2.2736 (2.2756)	Entropy 0.83816 (0.84127)	Top-1 acc 71.094 (69.627)	Top-5 acc 87.500 (87.401)	lr 0.00376
Train [90][1840/3239]	Time 0.238 (0.656)	Data Time 0.002 (0.023)	Loss 2.0738 (2.2754)	Entropy 0.83813 (0.84125)	Top-1 acc 74.609 (69.632)	Top-5 acc 91.016 (87.404)	lr 0.00376
Train [90][1850/3239]	Time 0.240 (0.655)	Data Time 0.001 (0.023)	Loss 2.3051 (2.2757)	Entropy 0.83812 (0.84124)	Top-1 acc 65.625 (69.624)	Top-5 acc 87.500 (87.402)	lr 0.00376
Train [90][1860/3239]	Time 0.227 (0.654)	Data Time 0.001 (0.023)	Loss 2.2756 (2.2757)	Entropy 0.83803 (0.84122)	Top-1 acc 71.484 (69.624)	Top-5 acc 87.109 (87.403)	lr 0.00376
Train [90][1870/3239]	Time 0.224 (0.653)	Data Time 0.001 (0.023)	Loss 2.3041 (2.2758)	Entropy 0.83800 (0.84120)	Top-1 acc 67.578 (69.624)	Top-5 acc 86.719 (87.401)	lr 0.00376
Train [90][1880/3239]	Time 2.588 (0.652)	Data Time 0.002 (0.023)	Loss 2.1587 (2.2757)	Entropy 0.83800 (0.84118)	Top-1 acc 73.047 (69.623)	Top-5 acc 89.062 (87.403)	lr 0.00376
Train [90][1890/3239]	Time 0.303 (0.650)	Data Time 0.001 (0.023)	Loss 2.3323 (2.2758)	Entropy 0.83792 (0.84117)	Top-1 acc 69.141 (69.618)	Top-5 acc 87.500 (87.404)	lr 0.00376
Train [90][1900/3239]	Time 0.230 (0.649)	Data Time 0.001 (0.023)	Loss 1.9870 (2.2758)	Entropy 0.83778 (0.84115)	Top-1 acc 75.781 (69.618)	Top-5 acc 92.578 (87.402)	lr 0.00376
Train [90][1910/3239]	Time 0.235 (0.648)	Data Time 0.001 (0.023)	Loss 2.1281 (2.2757)	Entropy 0.83776 (0.84113)	Top-1 acc 72.266 (69.620)	Top-5 acc 88.672 (87.405)	lr 0.00376
Train [90][1920/3239]	Time 0.234 (0.647)	Data Time 0.001 (0.022)	Loss 2.3103 (2.2757)	Entropy 0.83770 (0.84111)	Top-1 acc 69.141 (69.618)	Top-5 acc 86.328 (87.402)	lr 0.00376
Train [90][1930/3239]	Time 0.324 (0.647)	Data Time 0.001 (0.022)	Loss 2.3907 (2.2761)	Entropy 0.83718 (0.84110)	Top-1 acc 64.453 (69.605)	Top-5 acc 86.328 (87.399)	lr 0.00376
Train [90][1940/3239]	Time 0.235 (0.646)	Data Time 0.001 (0.022)	Loss 2.4663 (2.2764)	Entropy 0.83721 (0.84108)	Top-1 acc 69.141 (69.598)	Top-5 acc 83.203 (87.394)	lr 0.00375
Train [90][1950/3239]	Time 0.226 (0.645)	Data Time 0.001 (0.022)	Loss 2.3796 (2.2764)	Entropy 0.83713 (0.84106)	Top-1 acc 67.188 (69.594)	Top-5 acc 86.719 (87.390)	lr 0.00375
Train [90][1960/3239]	Time 0.215 (0.644)	Data Time 0.001 (0.022)	Loss 2.1891 (2.2763)	Entropy 0.83715 (0.84104)	Top-1 acc 69.922 (69.597)	Top-5 acc 89.453 (87.393)	lr 0.00375
Train [90][1970/3239]	Time 0.240 (0.643)	Data Time 0.002 (0.022)	Loss 2.3646 (2.2762)	Entropy 0.83712 (0.84102)	Top-1 acc 68.359 (69.604)	Top-5 acc 86.719 (87.394)	lr 0.00375
Train [90][1980/3239]	Time 0.280 (0.642)	Data Time 0.001 (0.022)	Loss 2.2946 (2.2761)	Entropy 0.83715 (0.84100)	Top-1 acc 67.188 (69.601)	Top-5 acc 87.109 (87.394)	lr 0.00375
Train [90][1990/3239]	Time 2.541 (0.641)	Data Time 0.001 (0.022)	Loss 2.2038 (2.2764)	Entropy 0.83715 (0.84098)	Top-1 acc 73.047 (69.594)	Top-5 acc 87.891 (87.387)	lr 0.00375
Train [90][2000/3239]	Time 0.232 (0.639)	Data Time 0.001 (0.022)	Loss 2.2826 (2.2763)	Entropy 0.83709 (0.84096)	Top-1 acc 67.188 (69.599)	Top-5 acc 87.891 (87.391)	lr 0.00375
Train [90][2010/3239]	Time 0.242 (0.639)	Data Time 0.001 (0.021)	Loss 2.2791 (2.2765)	Entropy 0.83704 (0.84094)	Top-1 acc 67.188 (69.590)	Top-5 acc 87.109 (87.389)	lr 0.00375
Train [90][2020/3239]	Time 0.261 (0.638)	Data Time 0.002 (0.021)	Loss 2.3903 (2.2769)	Entropy 0.83704 (0.84092)	Top-1 acc 67.188 (69.580)	Top-5 acc 83.984 (87.382)	lr 0.00375
Train [90][2030/3239]	Time 0.223 (0.637)	Data Time 0.001 (0.021)	Loss 2.3162 (2.2769)	Entropy 0.83694 (0.84090)	Top-1 acc 67.969 (69.579)	Top-5 acc 86.719 (87.381)	lr 0.00375
Train [90][2040/3239]	Time 0.218 (0.636)	Data Time 0.001 (0.021)	Loss 2.2638 (2.2769)	Entropy 0.83695 (0.84088)	Top-1 acc 69.922 (69.579)	Top-5 acc 89.844 (87.382)	lr 0.00375
Train [90][2050/3239]	Time 0.217 (0.635)	Data Time 0.001 (0.021)	Loss 2.2668 (2.2765)	Entropy 0.83686 (0.84086)	Top-1 acc 69.922 (69.590)	Top-5 acc 87.500 (87.389)	lr 0.00375
Train [90][2060/3239]	Time 0.341 (0.635)	Data Time 0.001 (0.021)	Loss 2.3866 (2.2766)	Entropy 0.83682 (0.84084)	Top-1 acc 66.406 (69.584)	Top-5 acc 83.203 (87.384)	lr 0.00375
Train [90][2070/3239]	Time 0.227 (0.634)	Data Time 0.001 (0.021)	Loss 2.1548 (2.2766)	Entropy 0.83679 (0.84082)	Top-1 acc 71.094 (69.586)	Top-5 acc 88.281 (87.383)	lr 0.00375
Train [90][2080/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.021)	Loss 2.3995 (2.2769)	Entropy 0.83685 (0.84080)	Top-1 acc 66.797 (69.575)	Top-5 acc 83.984 (87.379)	lr 0.00374
Train [90][2090/3239]	Time 0.226 (0.632)	Data Time 0.001 (0.021)	Loss 2.4056 (2.2771)	Entropy 0.83682 (0.84078)	Top-1 acc 66.797 (69.567)	Top-5 acc 86.719 (87.377)	lr 0.00374
Train [90][2100/3239]	Time 2.656 (0.632)	Data Time 0.002 (0.021)	Loss 2.3067 (2.2771)	Entropy 0.83682 (0.84076)	Top-1 acc 69.141 (69.565)	Top-5 acc 86.328 (87.379)	lr 0.00374
Train [90][2110/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.021)	Loss 2.2095 (2.2770)	Entropy 0.83682 (0.84075)	Top-1 acc 74.219 (69.570)	Top-5 acc 85.938 (87.379)	lr 0.00374
Train [90][2120/3239]	Time 0.238 (0.629)	Data Time 0.001 (0.020)	Loss 2.2649 (2.2771)	Entropy 0.83672 (0.84073)	Top-1 acc 70.703 (69.571)	Top-5 acc 87.891 (87.377)	lr 0.00374
Train [90][2130/3239]	Time 0.212 (0.629)	Data Time 0.001 (0.020)	Loss 2.2230 (2.2773)	Entropy 0.83670 (0.84071)	Top-1 acc 70.703 (69.566)	Top-5 acc 88.672 (87.375)	lr 0.00374
Train [90][2140/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.020)	Loss 2.4979 (2.2774)	Entropy 0.83657 (0.84069)	Top-1 acc 63.281 (69.559)	Top-5 acc 85.938 (87.379)	lr 0.00374
Train [90][2150/3239]	Time 0.261 (0.627)	Data Time 0.001 (0.020)	Loss 2.1827 (2.2773)	Entropy 0.83650 (0.84067)	Top-1 acc 71.094 (69.562)	Top-5 acc 87.500 (87.380)	lr 0.00374
Train [90][2160/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.020)	Loss 2.2353 (2.2775)	Entropy 0.83650 (0.84065)	Top-1 acc 73.047 (69.557)	Top-5 acc 88.672 (87.378)	lr 0.00374
Train [90][2170/3239]	Time 0.230 (0.626)	Data Time 0.001 (0.020)	Loss 2.2909 (2.2776)	Entropy 0.83636 (0.84063)	Top-1 acc 69.531 (69.556)	Top-5 acc 86.328 (87.375)	lr 0.00374
Train [90][2180/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.020)	Loss 2.1721 (2.2773)	Entropy 0.83636 (0.84061)	Top-1 acc 71.875 (69.564)	Top-5 acc 89.453 (87.382)	lr 0.00374
Train [90][2190/3239]	Time 0.302 (0.647)	Data Time 0.002 (0.020)	Loss 2.4098 (2.2776)	Entropy 0.83620 (0.84059)	Top-1 acc 67.969 (69.558)	Top-5 acc 83.984 (87.378)	lr 0.00374
Train [90][2200/3239]	Time 0.270 (0.647)	Data Time 0.002 (0.020)	Loss 2.3204 (2.2778)	Entropy 0.83617 (0.84057)	Top-1 acc 66.016 (69.552)	Top-5 acc 86.719 (87.376)	lr 0.00374
Train [90][2210/3239]	Time 2.489 (0.647)	Data Time 0.002 (0.020)	Loss 2.2269 (2.2779)	Entropy 0.83617 (0.84055)	Top-1 acc 69.531 (69.549)	Top-5 acc 89.844 (87.370)	lr 0.00374
Train [90][2220/3239]	Time 0.226 (0.645)	Data Time 0.001 (0.020)	Loss 2.2198 (2.2781)	Entropy 0.83616 (0.84053)	Top-1 acc 71.484 (69.547)	Top-5 acc 86.719 (87.365)	lr 0.00373
Train [90][2230/3239]	Time 0.381 (0.644)	Data Time 0.002 (0.020)	Loss 2.3310 (2.2779)	Entropy 0.83617 (0.84051)	Top-1 acc 66.406 (69.547)	Top-5 acc 88.281 (87.369)	lr 0.00373
Train [90][2240/3239]	Time 0.237 (0.644)	Data Time 0.001 (0.019)	Loss 2.3106 (2.2779)	Entropy 0.83609 (0.84049)	Top-1 acc 71.875 (69.548)	Top-5 acc 84.375 (87.370)	lr 0.00373
Train [90][2250/3239]	Time 0.245 (0.643)	Data Time 0.001 (0.019)	Loss 2.3726 (2.2779)	Entropy 0.83608 (0.84047)	Top-1 acc 68.750 (69.546)	Top-5 acc 87.500 (87.369)	lr 0.00373
Train [90][2260/3239]	Time 0.251 (0.642)	Data Time 0.002 (0.019)	Loss 2.0786 (2.2777)	Entropy 0.83612 (0.84045)	Top-1 acc 76.172 (69.552)	Top-5 acc 90.234 (87.374)	lr 0.00373
Train [90][2270/3239]	Time 0.232 (0.641)	Data Time 0.001 (0.019)	Loss 2.2718 (2.2776)	Entropy 0.83606 (0.84043)	Top-1 acc 69.141 (69.555)	Top-5 acc 89.453 (87.376)	lr 0.00373
Train [90][2280/3239]	Time 0.226 (0.641)	Data Time 0.001 (0.019)	Loss 2.2336 (2.2777)	Entropy 0.83595 (0.84042)	Top-1 acc 69.531 (69.551)	Top-5 acc 88.281 (87.374)	lr 0.00373
Train [90][2290/3239]	Time 0.219 (0.640)	Data Time 0.001 (0.019)	Loss 2.3715 (2.2778)	Entropy 0.83599 (0.84040)	Top-1 acc 68.359 (69.555)	Top-5 acc 85.938 (87.375)	lr 0.00373
Train [90][2300/3239]	Time 0.272 (0.639)	Data Time 0.001 (0.019)	Loss 2.2682 (2.2777)	Entropy 0.83607 (0.84038)	Top-1 acc 69.531 (69.560)	Top-5 acc 85.156 (87.375)	lr 0.00373
Train [90][2310/3239]	Time 0.224 (0.639)	Data Time 0.001 (0.019)	Loss 2.3533 (2.2776)	Entropy 0.83601 (0.84036)	Top-1 acc 69.531 (69.561)	Top-5 acc 86.328 (87.377)	lr 0.00373
Train [90][2320/3239]	Time 2.650 (0.638)	Data Time 0.001 (0.019)	Loss 2.2262 (2.2774)	Entropy 0.83601 (0.84034)	Top-1 acc 67.578 (69.567)	Top-5 acc 89.453 (87.379)	lr 0.00373
Train [90][2330/3239]	Time 0.230 (0.636)	Data Time 0.001 (0.019)	Loss 2.3539 (2.2774)	Entropy 0.83602 (0.84032)	Top-1 acc 67.578 (69.566)	Top-5 acc 85.938 (87.379)	lr 0.00373
Train [90][2340/3239]	Time 0.240 (0.636)	Data Time 0.001 (0.019)	Loss 2.3007 (2.2776)	Entropy 0.83596 (0.84030)	Top-1 acc 69.141 (69.563)	Top-5 acc 88.672 (87.374)	lr 0.00373
Train [90][2350/3239]	Time 0.237 (0.635)	Data Time 0.001 (0.019)	Loss 2.1888 (2.2775)	Entropy 0.83591 (0.84028)	Top-1 acc 71.875 (69.565)	Top-5 acc 88.281 (87.377)	lr 0.00372
Train [90][2360/3239]	Time 0.324 (0.634)	Data Time 0.001 (0.019)	Loss 2.2058 (2.2776)	Entropy 0.83590 (0.84026)	Top-1 acc 70.703 (69.559)	Top-5 acc 87.891 (87.373)	lr 0.00372
Train [90][2370/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.018)	Loss 2.3455 (2.2778)	Entropy 0.83588 (0.84025)	Top-1 acc 69.531 (69.557)	Top-5 acc 84.375 (87.368)	lr 0.00372
Train [90][2380/3239]	Time 0.219 (0.633)	Data Time 0.002 (0.018)	Loss 2.1799 (2.2778)	Entropy 0.83589 (0.84023)	Top-1 acc 73.828 (69.556)	Top-5 acc 87.109 (87.368)	lr 0.00372
Train [90][2390/3239]	Time 0.252 (0.632)	Data Time 0.001 (0.018)	Loss 2.1679 (2.2778)	Entropy 0.83585 (0.84021)	Top-1 acc 73.828 (69.560)	Top-5 acc 89.062 (87.366)	lr 0.00372
Train [90][2400/3239]	Time 0.359 (0.632)	Data Time 0.001 (0.018)	Loss 2.2105 (2.2776)	Entropy 0.83579 (0.84019)	Top-1 acc 69.531 (69.567)	Top-5 acc 89.453 (87.369)	lr 0.00372
Train [90][2410/3239]	Time 0.258 (0.631)	Data Time 0.001 (0.018)	Loss 2.2773 (2.2774)	Entropy 0.83578 (0.84017)	Top-1 acc 70.703 (69.575)	Top-5 acc 88.672 (87.374)	lr 0.00372
Train [90][2420/3239]	Time 0.262 (0.630)	Data Time 0.001 (0.018)	Loss 2.0546 (2.2773)	Entropy 0.83568 (0.84016)	Top-1 acc 73.828 (69.577)	Top-5 acc 91.797 (87.376)	lr 0.00372
Train [90][2430/3239]	Time 2.675 (0.630)	Data Time 0.002 (0.018)	Loss 2.2390 (2.2772)	Entropy 0.83568 (0.84014)	Top-1 acc 70.703 (69.579)	Top-5 acc 87.891 (87.376)	lr 0.00372
Train [90][2440/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.018)	Loss 2.2679 (2.2773)	Entropy 0.83573 (0.84012)	Top-1 acc 66.797 (69.576)	Top-5 acc 88.672 (87.374)	lr 0.00372
Train [90][2450/3239]	Time 0.225 (0.628)	Data Time 0.001 (0.018)	Loss 2.1340 (2.2775)	Entropy 0.83572 (0.84010)	Top-1 acc 74.219 (69.575)	Top-5 acc 89.453 (87.369)	lr 0.00372
Train [90][2460/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.018)	Loss 2.4561 (2.2775)	Entropy 0.83569 (0.84008)	Top-1 acc 63.281 (69.570)	Top-5 acc 86.328 (87.370)	lr 0.00372
Train [90][2470/3239]	Time 0.270 (0.626)	Data Time 0.001 (0.018)	Loss 2.1142 (2.2775)	Entropy 0.83563 (0.84006)	Top-1 acc 73.828 (69.567)	Top-5 acc 91.797 (87.372)	lr 0.00372
Train [90][2480/3239]	Time 0.199 (0.626)	Data Time 0.001 (0.018)	Loss 2.2900 (2.2777)	Entropy 0.83558 (0.84005)	Top-1 acc 67.969 (69.562)	Top-5 acc 87.500 (87.367)	lr 0.00372
Train [90][2490/3239]	Time 0.324 (0.625)	Data Time 0.001 (0.018)	Loss 2.2477 (2.2780)	Entropy 0.83553 (0.84003)	Top-1 acc 72.266 (69.551)	Top-5 acc 89.844 (87.361)	lr 0.00371
Train [90][2500/3239]	Time 0.244 (0.625)	Data Time 0.002 (0.018)	Loss 2.1391 (2.2782)	Entropy 0.83556 (0.84001)	Top-1 acc 71.484 (69.547)	Top-5 acc 89.453 (87.358)	lr 0.00371
Train [90][2510/3239]	Time 0.245 (0.624)	Data Time 0.001 (0.018)	Loss 2.2750 (2.2780)	Entropy 0.83547 (0.83999)	Top-1 acc 67.578 (69.553)	Top-5 acc 89.062 (87.363)	lr 0.00371
Train [90][2520/3239]	Time 0.255 (0.624)	Data Time 0.001 (0.017)	Loss 2.4426 (2.2781)	Entropy 0.83545 (0.83998)	Top-1 acc 66.797 (69.546)	Top-5 acc 84.375 (87.359)	lr 0.00371
Train [90][2530/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.017)	Loss 2.1384 (2.2781)	Entropy 0.83535 (0.83996)	Top-1 acc 72.266 (69.550)	Top-5 acc 89.062 (87.359)	lr 0.00371
Train [90][2540/3239]	Time 2.584 (0.622)	Data Time 0.001 (0.017)	Loss 2.3458 (2.2783)	Entropy 0.83535 (0.83994)	Top-1 acc 68.359 (69.547)	Top-5 acc 84.766 (87.356)	lr 0.00371
Train [90][2550/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.017)	Loss 2.2388 (2.2783)	Entropy 0.83527 (0.83992)	Top-1 acc 69.531 (69.542)	Top-5 acc 88.281 (87.358)	lr 0.00371
Train [90][2560/3239]	Time 0.322 (0.640)	Data Time 0.003 (0.017)	Loss 2.2946 (2.2783)	Entropy 0.83522 (0.83990)	Top-1 acc 66.797 (69.539)	Top-5 acc 87.891 (87.360)	lr 0.00371
Train [90][2570/3239]	Time 0.239 (0.639)	Data Time 0.002 (0.017)	Loss 2.3633 (2.2782)	Entropy 0.83515 (0.83988)	Top-1 acc 66.406 (69.540)	Top-5 acc 84.375 (87.362)	lr 0.00371
Train [90][2580/3239]	Time 0.336 (0.639)	Data Time 0.001 (0.017)	Loss 2.4995 (2.2783)	Entropy 0.83515 (0.83987)	Top-1 acc 64.062 (69.539)	Top-5 acc 82.031 (87.361)	lr 0.00371
Train [90][2590/3239]	Time 0.256 (0.638)	Data Time 0.002 (0.017)	Loss 2.2798 (2.2784)	Entropy 0.83515 (0.83985)	Top-1 acc 71.484 (69.540)	Top-5 acc 89.453 (87.360)	lr 0.00371
Train [90][2600/3239]	Time 0.258 (0.638)	Data Time 0.002 (0.017)	Loss 2.4581 (2.2784)	Entropy 0.83506 (0.83983)	Top-1 acc 64.844 (69.538)	Top-5 acc 83.984 (87.362)	lr 0.00371
Train [90][2610/3239]	Time 0.212 (0.637)	Data Time 0.001 (0.017)	Loss 2.3566 (2.2784)	Entropy 0.83501 (0.83981)	Top-1 acc 70.312 (69.537)	Top-5 acc 87.109 (87.364)	lr 0.00371
Train [90][2620/3239]	Time 0.332 (0.636)	Data Time 0.002 (0.017)	Loss 2.3356 (2.2784)	Entropy 0.83503 (0.83979)	Top-1 acc 69.141 (69.537)	Top-5 acc 85.547 (87.363)	lr 0.00371
Train [90][2630/3239]	Time 0.234 (0.636)	Data Time 0.002 (0.017)	Loss 2.5695 (2.2783)	Entropy 0.83507 (0.83977)	Top-1 acc 61.719 (69.539)	Top-5 acc 82.812 (87.364)	lr 0.00370
Train [90][2640/3239]	Time 0.292 (0.635)	Data Time 0.001 (0.017)	Loss 2.3300 (2.2785)	Entropy 0.83505 (0.83976)	Top-1 acc 68.359 (69.536)	Top-5 acc 87.891 (87.361)	lr 0.00370
Train [90][2650/3239]	Time 0.266 (0.635)	Data Time 0.002 (0.017)	Loss 2.3385 (2.2785)	Entropy 0.83483 (0.83974)	Top-1 acc 66.406 (69.537)	Top-5 acc 85.547 (87.361)	lr 0.00370
Train [90][2660/3239]	Time 0.274 (0.634)	Data Time 0.001 (0.017)	Loss 2.1237 (2.2787)	Entropy 0.83482 (0.83972)	Top-1 acc 73.438 (69.537)	Top-5 acc 89.062 (87.357)	lr 0.00370
Train [90][2670/3239]	Time 0.218 (0.634)	Data Time 0.001 (0.017)	Loss 2.2806 (2.2787)	Entropy 0.83478 (0.83970)	Top-1 acc 69.141 (69.536)	Top-5 acc 84.375 (87.353)	lr 0.00370
Train [90][2680/3239]	Time 0.273 (0.633)	Data Time 0.001 (0.017)	Loss 2.2067 (2.2787)	Entropy 0.83479 (0.83968)	Top-1 acc 69.922 (69.534)	Top-5 acc 90.234 (87.354)	lr 0.00370
Train [90][2690/3239]	Time 0.208 (0.632)	Data Time 0.001 (0.016)	Loss 2.2572 (2.2786)	Entropy 0.83476 (0.83967)	Top-1 acc 70.703 (69.537)	Top-5 acc 89.062 (87.358)	lr 0.00370
Train [90][2700/3239]	Time 0.259 (0.632)	Data Time 0.001 (0.016)	Loss 2.3789 (2.2787)	Entropy 0.83474 (0.83965)	Top-1 acc 64.844 (69.534)	Top-5 acc 85.938 (87.358)	lr 0.00370
Train [90][2710/3239]	Time 0.266 (0.631)	Data Time 0.001 (0.016)	Loss 2.1629 (2.2787)	Entropy 0.83475 (0.83963)	Top-1 acc 71.094 (69.534)	Top-5 acc 90.625 (87.359)	lr 0.00370
Train [90][2720/3239]	Time 0.277 (0.631)	Data Time 0.001 (0.016)	Loss 2.3782 (2.2786)	Entropy 0.83475 (0.83961)	Top-1 acc 67.188 (69.539)	Top-5 acc 86.328 (87.360)	lr 0.00370
Train [90][2730/3239]	Time 0.225 (0.630)	Data Time 0.001 (0.016)	Loss 2.3442 (2.2787)	Entropy 0.83476 (0.83959)	Top-1 acc 69.141 (69.538)	Top-5 acc 87.500 (87.359)	lr 0.00370
Train [90][2740/3239]	Time 0.272 (0.629)	Data Time 0.002 (0.016)	Loss 2.3513 (2.2788)	Entropy 0.83466 (0.83958)	Top-1 acc 64.062 (69.532)	Top-5 acc 85.547 (87.357)	lr 0.00370
Train [90][2750/3239]	Time 0.319 (0.629)	Data Time 0.001 (0.016)	Loss 2.3545 (2.2789)	Entropy 0.83466 (0.83956)	Top-1 acc 66.016 (69.531)	Top-5 acc 87.109 (87.356)	lr 0.00370
Train [90][2760/3239]	Time 0.229 (0.628)	Data Time 0.001 (0.016)	Loss 2.2501 (2.2788)	Entropy 0.83467 (0.83954)	Top-1 acc 71.484 (69.534)	Top-5 acc 86.719 (87.356)	lr 0.00370
Train [90][2770/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.016)	Loss 2.2235 (2.2790)	Entropy 0.83466 (0.83952)	Top-1 acc 72.266 (69.530)	Top-5 acc 87.109 (87.352)	lr 0.00369
Train [90][2780/3239]	Time 0.224 (0.627)	Data Time 0.001 (0.016)	Loss 2.3409 (2.2789)	Entropy 0.83468 (0.83951)	Top-1 acc 70.312 (69.534)	Top-5 acc 85.547 (87.353)	lr 0.00369
Train [90][2790/3239]	Time 0.325 (0.627)	Data Time 0.001 (0.016)	Loss 2.3156 (2.2789)	Entropy 0.83461 (0.83949)	Top-1 acc 69.141 (69.531)	Top-5 acc 86.328 (87.349)	lr 0.00369
Train [90][2800/3239]	Time 0.298 (0.626)	Data Time 0.001 (0.016)	Loss 2.4302 (2.2791)	Entropy 0.83456 (0.83947)	Top-1 acc 67.578 (69.524)	Top-5 acc 84.766 (87.348)	lr 0.00369
Train [90][2810/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.016)	Loss 2.1951 (2.2793)	Entropy 0.83458 (0.83945)	Top-1 acc 74.609 (69.517)	Top-5 acc 87.500 (87.343)	lr 0.00369
Train [90][2820/3239]	Time 0.222 (0.625)	Data Time 0.001 (0.016)	Loss 2.1065 (2.2793)	Entropy 0.83459 (0.83944)	Top-1 acc 73.438 (69.515)	Top-5 acc 91.406 (87.344)	lr 0.00369
Train [90][2830/3239]	Time 0.372 (0.624)	Data Time 0.001 (0.016)	Loss 2.2813 (2.2795)	Entropy 0.83454 (0.83942)	Top-1 acc 71.484 (69.513)	Top-5 acc 87.109 (87.339)	lr 0.00369
Train [90][2840/3239]	Time 0.276 (0.624)	Data Time 0.001 (0.016)	Loss 2.3067 (2.2795)	Entropy 0.83434 (0.83940)	Top-1 acc 69.141 (69.511)	Top-5 acc 86.719 (87.339)	lr 0.00369
Train [90][2850/3239]	Time 0.285 (0.623)	Data Time 0.002 (0.016)	Loss 2.4371 (2.2796)	Entropy 0.83435 (0.83938)	Top-1 acc 67.969 (69.506)	Top-5 acc 84.766 (87.336)	lr 0.00369
Train [90][2860/3239]	Time 0.249 (0.623)	Data Time 0.001 (0.016)	Loss 2.3785 (2.2796)	Entropy 0.83434 (0.83937)	Top-1 acc 67.188 (69.507)	Top-5 acc 83.594 (87.337)	lr 0.00369
Train [90][2870/3239]	Time 0.262 (0.622)	Data Time 0.001 (0.016)	Loss 2.2181 (2.2797)	Entropy 0.83433 (0.83935)	Top-1 acc 70.312 (69.503)	Top-5 acc 88.281 (87.338)	lr 0.00369
Train [90][2880/3239]	Time 0.225 (0.622)	Data Time 0.001 (0.015)	Loss 2.1532 (2.2795)	Entropy 0.83432 (0.83933)	Top-1 acc 71.875 (69.509)	Top-5 acc 89.062 (87.340)	lr 0.00369
Train [90][2890/3239]	Time 0.278 (0.639)	Data Time 0.004 (0.015)	Loss 2.2244 (2.2797)	Entropy 0.83428 (0.83931)	Top-1 acc 69.141 (69.501)	Top-5 acc 89.453 (87.337)	lr 0.00369
Train [90][2900/3239]	Time 0.251 (0.639)	Data Time 0.002 (0.015)	Loss 2.2215 (2.2796)	Entropy 0.83429 (0.83930)	Top-1 acc 73.047 (69.507)	Top-5 acc 89.844 (87.342)	lr 0.00369
Train [90][2910/3239]	Time 0.238 (0.638)	Data Time 0.002 (0.015)	Loss 2.3508 (2.2797)	Entropy 0.83428 (0.83928)	Top-1 acc 69.531 (69.503)	Top-5 acc 85.547 (87.338)	lr 0.00368
Train [90][2920/3239]	Time 0.234 (0.638)	Data Time 0.001 (0.015)	Loss 2.4138 (2.2798)	Entropy 0.83430 (0.83926)	Top-1 acc 66.797 (69.502)	Top-5 acc 83.984 (87.337)	lr 0.00368
Train [90][2930/3239]	Time 0.246 (0.637)	Data Time 0.001 (0.015)	Loss 2.2298 (2.2797)	Entropy 0.83426 (0.83924)	Top-1 acc 69.922 (69.503)	Top-5 acc 89.453 (87.335)	lr 0.00368
Train [90][2940/3239]	Time 0.299 (0.637)	Data Time 0.001 (0.015)	Loss 2.1682 (2.2798)	Entropy 0.83419 (0.83923)	Top-1 acc 70.703 (69.503)	Top-5 acc 91.797 (87.337)	lr 0.00368
Train [90][2950/3239]	Time 0.243 (0.636)	Data Time 0.001 (0.015)	Loss 2.2930 (2.2796)	Entropy 0.83411 (0.83921)	Top-1 acc 67.578 (69.508)	Top-5 acc 87.891 (87.339)	lr 0.00368
Train [90][2960/3239]	Time 0.219 (0.636)	Data Time 0.001 (0.015)	Loss 2.4208 (2.2795)	Entropy 0.83409 (0.83919)	Top-1 acc 68.359 (69.513)	Top-5 acc 82.812 (87.339)	lr 0.00368
Train [90][2970/3239]	Time 0.257 (0.635)	Data Time 0.001 (0.015)	Loss 2.3360 (2.2794)	Entropy 0.83408 (0.83918)	Top-1 acc 65.625 (69.514)	Top-5 acc 88.672 (87.342)	lr 0.00368
Train [90][2980/3239]	Time 0.270 (0.635)	Data Time 0.001 (0.015)	Loss 2.3090 (2.2793)	Entropy 0.83404 (0.83916)	Top-1 acc 71.094 (69.519)	Top-5 acc 85.547 (87.344)	lr 0.00368
Train [90][2990/3239]	Time 0.218 (0.634)	Data Time 0.001 (0.015)	Loss 2.4551 (2.2795)	Entropy 0.83396 (0.83914)	Top-1 acc 65.234 (69.514)	Top-5 acc 84.375 (87.340)	lr 0.00368
Train [90][3000/3239]	Time 0.325 (0.634)	Data Time 0.001 (0.015)	Loss 2.2707 (2.2794)	Entropy 0.83391 (0.83912)	Top-1 acc 66.797 (69.513)	Top-5 acc 87.500 (87.340)	lr 0.00368
Train [90][3010/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.015)	Loss 2.1922 (2.2795)	Entropy 0.83385 (0.83911)	Top-1 acc 71.875 (69.514)	Top-5 acc 90.625 (87.339)	lr 0.00368
Train [90][3020/3239]	Time 0.293 (0.632)	Data Time 0.001 (0.015)	Loss 2.2860 (2.2796)	Entropy 0.83387 (0.83909)	Top-1 acc 69.141 (69.510)	Top-5 acc 87.891 (87.336)	lr 0.00368
Train [90][3030/3239]	Time 0.250 (0.632)	Data Time 0.001 (0.015)	Loss 2.3378 (2.2796)	Entropy 0.83388 (0.83907)	Top-1 acc 68.750 (69.510)	Top-5 acc 87.109 (87.337)	lr 0.00368
Train [90][3040/3239]	Time 0.257 (0.632)	Data Time 0.001 (0.015)	Loss 2.3321 (2.2797)	Entropy 0.83390 (0.83906)	Top-1 acc 67.969 (69.508)	Top-5 acc 85.938 (87.338)	lr 0.00368
Train [90][3050/3239]	Time 0.259 (0.631)	Data Time 0.001 (0.015)	Loss 2.3886 (2.2799)	Entropy 0.83382 (0.83904)	Top-1 acc 68.750 (69.501)	Top-5 acc 83.984 (87.334)	lr 0.00367
Train [90][3060/3239]	Time 0.238 (0.630)	Data Time 0.001 (0.015)	Loss 2.3965 (2.2799)	Entropy 0.83381 (0.83902)	Top-1 acc 64.844 (69.498)	Top-5 acc 84.766 (87.336)	lr 0.00367
Train [90][3070/3239]	Time 0.234 (0.630)	Data Time 0.002 (0.015)	Loss 2.2832 (2.2797)	Entropy 0.83372 (0.83900)	Top-1 acc 67.188 (69.506)	Top-5 acc 88.281 (87.338)	lr 0.00367
Train [90][3080/3239]	Time 0.246 (0.630)	Data Time 0.001 (0.015)	Loss 2.2238 (2.2797)	Entropy 0.83368 (0.83899)	Top-1 acc 73.438 (69.508)	Top-5 acc 88.672 (87.340)	lr 0.00367
Train [90][3090/3239]	Time 0.321 (0.629)	Data Time 0.001 (0.015)	Loss 2.2724 (2.2797)	Entropy 0.83364 (0.83897)	Top-1 acc 68.750 (69.513)	Top-5 acc 85.938 (87.342)	lr 0.00367
Train [90][3100/3239]	Time 0.221 (0.629)	Data Time 0.001 (0.014)	Loss 2.4193 (2.2799)	Entropy 0.83362 (0.83895)	Top-1 acc 69.531 (69.508)	Top-5 acc 85.547 (87.338)	lr 0.00367
Train [90][3110/3239]	Time 0.218 (0.628)	Data Time 0.001 (0.014)	Loss 2.5280 (2.2799)	Entropy 0.83371 (0.83894)	Top-1 acc 62.500 (69.507)	Top-5 acc 82.812 (87.339)	lr 0.00367
Train [90][3120/3239]	Time 0.276 (0.628)	Data Time 0.001 (0.014)	Loss 2.3627 (2.2802)	Entropy 0.83365 (0.83892)	Top-1 acc 67.188 (69.501)	Top-5 acc 84.766 (87.332)	lr 0.00367
Train [90][3130/3239]	Time 0.326 (0.627)	Data Time 0.001 (0.014)	Loss 2.3470 (2.2803)	Entropy 0.83360 (0.83890)	Top-1 acc 67.578 (69.501)	Top-5 acc 83.203 (87.328)	lr 0.00367
Train [90][3140/3239]	Time 0.226 (0.627)	Data Time 0.001 (0.014)	Loss 2.2658 (2.2802)	Entropy 0.83351 (0.83889)	Top-1 acc 71.484 (69.503)	Top-5 acc 85.547 (87.329)	lr 0.00367
Train [90][3150/3239]	Time 0.261 (0.626)	Data Time 0.001 (0.014)	Loss 2.3210 (2.2802)	Entropy 0.83351 (0.83887)	Top-1 acc 67.188 (69.502)	Top-5 acc 86.328 (87.327)	lr 0.00367
Train [90][3160/3239]	Time 0.238 (0.626)	Data Time 0.001 (0.014)	Loss 2.2899 (2.2801)	Entropy 0.83350 (0.83885)	Top-1 acc 67.188 (69.500)	Top-5 acc 86.719 (87.329)	lr 0.00367
Train [90][3170/3239]	Time 0.331 (0.625)	Data Time 0.001 (0.014)	Loss 2.3344 (2.2803)	Entropy 0.83351 (0.83883)	Top-1 acc 67.578 (69.497)	Top-5 acc 85.938 (87.327)	lr 0.00367
Train [90][3180/3239]	Time 0.234 (0.625)	Data Time 0.000 (0.014)	Loss 2.2381 (2.2803)	Entropy 0.83344 (0.83882)	Top-1 acc 68.359 (69.494)	Top-5 acc 87.500 (87.325)	lr 0.00367
Train [90][3190/3239]	Time 0.228 (0.624)	Data Time 0.000 (0.014)	Loss 2.2430 (2.2803)	Entropy 0.83347 (0.83880)	Top-1 acc 69.141 (69.493)	Top-5 acc 90.234 (87.327)	lr 0.00366
Train [90][3200/3239]	Time 0.213 (0.624)	Data Time 0.000 (0.014)	Loss 2.1530 (2.2803)	Entropy 0.83344 (0.83878)	Top-1 acc 70.312 (69.492)	Top-5 acc 92.188 (87.328)	lr 0.00366
Train [90][3210/3239]	Time 0.238 (0.623)	Data Time 0.000 (0.014)	Loss 2.2993 (2.2803)	Entropy 0.83349 (0.83877)	Top-1 acc 71.094 (69.492)	Top-5 acc 84.766 (87.330)	lr 0.00366
Train [90][3220/3239]	Time 0.351 (0.638)	Data Time 0.000 (0.014)	Loss 2.3327 (2.2803)	Entropy 0.83343 (0.83875)	Top-1 acc 66.016 (69.492)	Top-5 acc 86.719 (87.330)	lr 0.00366
Train [90][3230/3239]	Time 0.232 (0.638)	Data Time 0.000 (0.014)	Loss 2.4647 (2.2803)	Entropy 0.83346 (0.83874)	Top-1 acc 66.016 (69.491)	Top-5 acc 82.422 (87.329)	lr 0.00366
Train [90][3239/3239]	Time 2.353 (0.637)	Data Time 0.000 (0.014)	Loss 2.2219 (2.2804)	Entropy 0.83346 (0.83872)	Top-1 acc 71.605 (69.493)	Top-5 acc 88.889 (87.329)	lr 0.00366
==========Valid [90/120]	loss 1.252	top-1 acc 71.234 (71.234)	top-5 acc 89.273	Train top-1 69.493	top-5 87.329	Entropy 0.83346	Latency-None: 0.000ms	Flops: 546.53M
Train [91][0/3239]	Time 39.876 (39.876)	Data Time 38.055 (38.055)	Loss 2.3469 (2.3469)	Entropy 0.83337 (0.83337)	Top-1 acc 69.141 (69.141)	Top-5 acc 86.328 (86.328)	lr 0.00366
Train [91][10/3239]	Time 2.643 (4.136)	Data Time 0.002 (3.462)	Loss 2.2835 (2.2258)	Entropy 0.83337 (0.83337)	Top-1 acc 67.969 (71.200)	Top-5 acc 90.234 (88.707)	lr 0.00366
Train [91][20/3239]	Time 0.338 (2.284)	Data Time 0.001 (1.814)	Loss 2.2028 (2.2344)	Entropy 0.83333 (0.83335)	Top-1 acc 68.359 (70.461)	Top-5 acc 90.625 (88.560)	lr 0.00366
Train [91][30/3239]	Time 0.238 (1.698)	Data Time 0.001 (1.229)	Loss 2.0696 (2.2427)	Entropy 0.83327 (0.83333)	Top-1 acc 74.609 (70.464)	Top-5 acc 89.844 (88.269)	lr 0.00366
Train [91][40/3239]	Time 0.234 (1.400)	Data Time 0.001 (0.930)	Loss 2.3566 (2.2611)	Entropy 0.83324 (0.83331)	Top-1 acc 66.016 (70.055)	Top-5 acc 83.594 (87.891)	lr 0.00366
Train [91][50/3239]	Time 0.233 (1.220)	Data Time 0.002 (0.748)	Loss 2.2365 (2.2667)	Entropy 0.83328 (0.83330)	Top-1 acc 71.094 (69.907)	Top-5 acc 88.672 (87.691)	lr 0.00366
Train [91][60/3239]	Time 0.271 (1.102)	Data Time 0.001 (0.626)	Loss 2.2537 (2.2695)	Entropy 0.83328 (0.83330)	Top-1 acc 73.438 (69.941)	Top-5 acc 87.891 (87.538)	lr 0.00366
Train [91][70/3239]	Time 0.257 (1.015)	Data Time 0.001 (0.538)	Loss 2.2426 (2.2686)	Entropy 0.83328 (0.83329)	Top-1 acc 73.047 (69.993)	Top-5 acc 87.500 (87.539)	lr 0.00366
Train [91][80/3239]	Time 0.227 (0.948)	Data Time 0.001 (0.471)	Loss 2.3425 (2.2643)	Entropy 0.83322 (0.83329)	Top-1 acc 69.922 (70.192)	Top-5 acc 85.156 (87.577)	lr 0.00366
Train [91][90/3239]	Time 0.239 (0.897)	Data Time 0.001 (0.420)	Loss 2.1952 (2.2620)	Entropy 0.83319 (0.83328)	Top-1 acc 73.047 (70.235)	Top-5 acc 88.281 (87.599)	lr 0.00365
Train [91][100/3239]	Time 0.252 (0.857)	Data Time 0.002 (0.378)	Loss 2.3219 (2.2631)	Entropy 0.83313 (0.83327)	Top-1 acc 68.750 (70.251)	Top-5 acc 85.938 (87.581)	lr 0.00365
Train [91][110/3239]	Time 0.247 (0.823)	Data Time 0.002 (0.345)	Loss 2.4314 (2.2630)	Entropy 0.83316 (0.83326)	Top-1 acc 68.750 (70.249)	Top-5 acc 84.375 (87.595)	lr 0.00365
Train [91][120/3239]	Time 2.487 (0.794)	Data Time 0.001 (0.316)	Loss 2.2085 (2.2622)	Entropy 0.83316 (0.83325)	Top-1 acc 69.922 (70.206)	Top-5 acc 86.719 (87.600)	lr 0.00365
Train [91][130/3239]	Time 0.237 (0.752)	Data Time 0.002 (0.292)	Loss 2.3206 (2.2620)	Entropy 0.83311 (0.83324)	Top-1 acc 71.094 (70.280)	Top-5 acc 85.938 (87.586)	lr 0.00365
Train [91][140/3239]	Time 0.230 (0.731)	Data Time 0.001 (0.272)	Loss 2.2699 (2.2613)	Entropy 0.83308 (0.83323)	Top-1 acc 72.656 (70.324)	Top-5 acc 87.891 (87.622)	lr 0.00365
Train [91][150/3239]	Time 0.336 (0.715)	Data Time 0.001 (0.254)	Loss 2.2038 (2.2575)	Entropy 0.83301 (0.83321)	Top-1 acc 73.047 (70.344)	Top-5 acc 90.625 (87.699)	lr 0.00365
Train [91][160/3239]	Time 0.234 (0.700)	Data Time 0.001 (0.238)	Loss 2.3774 (2.2569)	Entropy 0.83291 (0.83320)	Top-1 acc 65.625 (70.397)	Top-5 acc 85.547 (87.711)	lr 0.00365
Train [91][170/3239]	Time 0.226 (0.687)	Data Time 0.001 (0.224)	Loss 2.3817 (2.2560)	Entropy 0.83292 (0.83318)	Top-1 acc 65.625 (70.429)	Top-5 acc 84.766 (87.735)	lr 0.00365
Train [91][180/3239]	Time 0.224 (0.676)	Data Time 0.001 (0.212)	Loss 2.1221 (2.2534)	Entropy 0.83288 (0.83317)	Top-1 acc 72.656 (70.479)	Top-5 acc 89.844 (87.772)	lr 0.00365
Train [91][190/3239]	Time 0.325 (0.666)	Data Time 0.001 (0.201)	Loss 2.2009 (2.2527)	Entropy 0.83287 (0.83315)	Top-1 acc 73.047 (70.484)	Top-5 acc 87.500 (87.768)	lr 0.00365
Train [91][200/3239]	Time 0.228 (0.657)	Data Time 0.001 (0.191)	Loss 2.1615 (2.2541)	Entropy 0.83285 (0.83314)	Top-1 acc 72.266 (70.423)	Top-5 acc 88.672 (87.706)	lr 0.00365
Train [91][210/3239]	Time 0.218 (0.648)	Data Time 0.001 (0.182)	Loss 2.3245 (2.2557)	Entropy 0.83282 (0.83312)	Top-1 acc 67.578 (70.322)	Top-5 acc 89.062 (87.693)	lr 0.00365
Train [91][220/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.174)	Loss 2.3760 (2.2554)	Entropy 0.83286 (0.83311)	Top-1 acc 69.531 (70.309)	Top-5 acc 86.328 (87.700)	lr 0.00365
Train [91][230/3239]	Time 2.734 (0.634)	Data Time 0.001 (0.166)	Loss 2.2999 (2.2559)	Entropy 0.83286 (0.83310)	Top-1 acc 65.234 (70.241)	Top-5 acc 85.938 (87.710)	lr 0.00364
Train [91][240/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.159)	Loss 2.1840 (2.2560)	Entropy 0.83288 (0.83309)	Top-1 acc 71.484 (70.272)	Top-5 acc 89.844 (87.714)	lr 0.00364
Train [91][250/3239]	Time 0.235 (0.611)	Data Time 0.001 (0.153)	Loss 2.3525 (2.2569)	Entropy 0.83283 (0.83308)	Top-1 acc 68.359 (70.250)	Top-5 acc 87.109 (87.723)	lr 0.00364
Train [91][260/3239]	Time 0.233 (0.606)	Data Time 0.001 (0.147)	Loss 2.2155 (2.2587)	Entropy 0.83274 (0.83307)	Top-1 acc 72.656 (70.175)	Top-5 acc 87.891 (87.695)	lr 0.00364
Train [91][270/3239]	Time 0.229 (0.601)	Data Time 0.001 (0.142)	Loss 2.2639 (2.2576)	Entropy 0.83274 (0.83305)	Top-1 acc 69.531 (70.145)	Top-5 acc 87.500 (87.716)	lr 0.00364
Train [91][280/3239]	Time 0.307 (0.597)	Data Time 0.001 (0.137)	Loss 2.1398 (2.2584)	Entropy 0.83222 (0.83303)	Top-1 acc 75.000 (70.130)	Top-5 acc 89.453 (87.704)	lr 0.00364
Train [91][290/3239]	Time 0.222 (0.592)	Data Time 0.001 (0.132)	Loss 2.2183 (2.2583)	Entropy 0.83218 (0.83300)	Top-1 acc 71.484 (70.147)	Top-5 acc 88.672 (87.709)	lr 0.00364
Train [91][300/3239]	Time 0.208 (0.588)	Data Time 0.001 (0.128)	Loss 2.1952 (2.2590)	Entropy 0.83219 (0.83298)	Top-1 acc 74.219 (70.127)	Top-5 acc 88.672 (87.701)	lr 0.00364
Train [91][310/3239]	Time 0.258 (0.585)	Data Time 0.001 (0.124)	Loss 2.1448 (2.2602)	Entropy 0.83219 (0.83295)	Top-1 acc 70.312 (70.105)	Top-5 acc 88.672 (87.653)	lr 0.00364
Train [91][320/3239]	Time 0.321 (0.582)	Data Time 0.001 (0.120)	Loss 2.2450 (2.2603)	Entropy 0.83216 (0.83293)	Top-1 acc 72.656 (70.109)	Top-5 acc 87.891 (87.647)	lr 0.00364
Train [91][330/3239]	Time 0.211 (0.579)	Data Time 0.001 (0.117)	Loss 2.2176 (2.2604)	Entropy 0.83215 (0.83290)	Top-1 acc 68.750 (70.068)	Top-5 acc 88.281 (87.659)	lr 0.00364
Train [91][340/3239]	Time 52.431 (0.723)	Data Time 0.001 (0.113)	Loss 2.1960 (2.2597)	Entropy 0.83215 (0.83288)	Top-1 acc 69.922 (70.096)	Top-5 acc 90.234 (87.670)	lr 0.00364
Train [91][350/3239]	Time 0.272 (0.711)	Data Time 0.003 (0.110)	Loss 2.0524 (2.2606)	Entropy 0.83211 (0.83286)	Top-1 acc 78.125 (70.098)	Top-5 acc 92.188 (87.655)	lr 0.00364
Train [91][360/3239]	Time 0.238 (0.706)	Data Time 0.002 (0.107)	Loss 2.1600 (2.2596)	Entropy 0.83212 (0.83284)	Top-1 acc 74.609 (70.137)	Top-5 acc 89.062 (87.662)	lr 0.00364
Train [91][370/3239]	Time 0.233 (0.700)	Data Time 0.001 (0.104)	Loss 2.2500 (2.2598)	Entropy 0.83214 (0.83282)	Top-1 acc 69.922 (70.144)	Top-5 acc 87.500 (87.662)	lr 0.00363
Train [91][380/3239]	Time 0.231 (0.693)	Data Time 0.001 (0.101)	Loss 2.2629 (2.2593)	Entropy 0.83221 (0.83280)	Top-1 acc 68.750 (70.155)	Top-5 acc 89.844 (87.675)	lr 0.00363
Train [91][390/3239]	Time 0.221 (0.688)	Data Time 0.001 (0.099)	Loss 2.2467 (2.2592)	Entropy 0.83217 (0.83279)	Top-1 acc 67.188 (70.139)	Top-5 acc 87.500 (87.671)	lr 0.00363
Train [91][400/3239]	Time 0.230 (0.682)	Data Time 0.001 (0.097)	Loss 2.1121 (2.2640)	Entropy 0.83216 (0.83277)	Top-1 acc 76.953 (70.053)	Top-5 acc 87.891 (87.597)	lr 0.00363
Train [91][410/3239]	Time 0.222 (0.677)	Data Time 0.001 (0.094)	Loss 2.3629 (2.2659)	Entropy 0.83215 (0.83276)	Top-1 acc 67.188 (70.004)	Top-5 acc 86.328 (87.565)	lr 0.00363
Train [91][420/3239]	Time 0.226 (0.673)	Data Time 0.001 (0.092)	Loss 2.2971 (2.2660)	Entropy 0.83209 (0.83274)	Top-1 acc 67.969 (70.012)	Top-5 acc 86.719 (87.555)	lr 0.00363
Train [91][430/3239]	Time 0.223 (0.668)	Data Time 0.001 (0.090)	Loss 2.2801 (2.2667)	Entropy 0.83207 (0.83273)	Top-1 acc 64.062 (69.969)	Top-5 acc 89.453 (87.537)	lr 0.00363
Train [91][440/3239]	Time 0.213 (0.663)	Data Time 0.001 (0.088)	Loss 2.4909 (2.2670)	Entropy 0.83197 (0.83271)	Top-1 acc 62.500 (69.943)	Top-5 acc 83.984 (87.526)	lr 0.00363
Train [91][450/3239]	Time 2.695 (0.659)	Data Time 0.001 (0.086)	Loss 2.2822 (2.2670)	Entropy 0.83197 (0.83270)	Top-1 acc 71.094 (69.952)	Top-5 acc 86.328 (87.512)	lr 0.00363
Train [91][460/3239]	Time 0.232 (0.650)	Data Time 0.001 (0.084)	Loss 2.2325 (2.2670)	Entropy 0.83184 (0.83268)	Top-1 acc 68.359 (69.953)	Top-5 acc 89.453 (87.518)	lr 0.00363
Train [91][470/3239]	Time 0.266 (0.646)	Data Time 0.002 (0.082)	Loss 2.1512 (2.2671)	Entropy 0.83188 (0.83266)	Top-1 acc 73.438 (69.943)	Top-5 acc 91.406 (87.532)	lr 0.00363
Train [91][480/3239]	Time 0.276 (0.643)	Data Time 0.002 (0.081)	Loss 2.4043 (2.2668)	Entropy 0.83190 (0.83264)	Top-1 acc 66.797 (69.956)	Top-5 acc 84.766 (87.529)	lr 0.00363
Train [91][490/3239]	Time 0.265 (0.641)	Data Time 0.001 (0.079)	Loss 2.3695 (2.2677)	Entropy 0.83184 (0.83263)	Top-1 acc 67.578 (69.944)	Top-5 acc 85.156 (87.516)	lr 0.00363
Train [91][500/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.078)	Loss 2.2810 (2.2673)	Entropy 0.83188 (0.83261)	Top-1 acc 69.922 (69.949)	Top-5 acc 88.281 (87.520)	lr 0.00363
Train [91][510/3239]	Time 0.226 (0.634)	Data Time 0.001 (0.076)	Loss 2.3304 (2.2671)	Entropy 0.83191 (0.83260)	Top-1 acc 66.016 (69.944)	Top-5 acc 85.156 (87.514)	lr 0.00362
Train [91][520/3239]	Time 0.221 (0.630)	Data Time 0.001 (0.075)	Loss 2.2681 (2.2664)	Entropy 0.83193 (0.83259)	Top-1 acc 69.141 (69.957)	Top-5 acc 88.281 (87.521)	lr 0.00362
Train [91][530/3239]	Time 0.218 (0.627)	Data Time 0.001 (0.073)	Loss 2.3641 (2.2660)	Entropy 0.83188 (0.83257)	Top-1 acc 70.312 (69.978)	Top-5 acc 86.719 (87.532)	lr 0.00362
Train [91][540/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.072)	Loss 2.2794 (2.2658)	Entropy 0.83181 (0.83256)	Top-1 acc 69.141 (69.986)	Top-5 acc 86.719 (87.534)	lr 0.00362
Train [91][550/3239]	Time 0.224 (0.622)	Data Time 0.001 (0.071)	Loss 2.3578 (2.2659)	Entropy 0.83183 (0.83255)	Top-1 acc 66.016 (69.984)	Top-5 acc 85.156 (87.524)	lr 0.00362
Train [91][560/3239]	Time 2.584 (0.619)	Data Time 0.001 (0.069)	Loss 2.3234 (2.2663)	Entropy 0.83183 (0.83253)	Top-1 acc 70.312 (69.975)	Top-5 acc 85.156 (87.517)	lr 0.00362
Train [91][570/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.068)	Loss 2.2798 (2.2664)	Entropy 0.83176 (0.83252)	Top-1 acc 72.656 (69.970)	Top-5 acc 87.109 (87.524)	lr 0.00362
Train [91][580/3239]	Time 0.320 (0.611)	Data Time 0.001 (0.067)	Loss 2.3083 (2.2675)	Entropy 0.83153 (0.83250)	Top-1 acc 71.094 (69.956)	Top-5 acc 87.109 (87.497)	lr 0.00362
Train [91][590/3239]	Time 0.259 (0.609)	Data Time 0.003 (0.066)	Loss 2.2462 (2.2679)	Entropy 0.83145 (0.83249)	Top-1 acc 68.359 (69.954)	Top-5 acc 89.062 (87.488)	lr 0.00362
Train [91][600/3239]	Time 0.225 (0.607)	Data Time 0.001 (0.065)	Loss 2.1657 (2.2677)	Entropy 0.83147 (0.83247)	Top-1 acc 71.094 (69.939)	Top-5 acc 92.188 (87.504)	lr 0.00362
Train [91][610/3239]	Time 0.221 (0.604)	Data Time 0.001 (0.064)	Loss 2.2698 (2.2678)	Entropy 0.83144 (0.83245)	Top-1 acc 66.016 (69.921)	Top-5 acc 85.547 (87.497)	lr 0.00362
Train [91][620/3239]	Time 0.241 (0.603)	Data Time 0.001 (0.063)	Loss 2.3404 (2.2692)	Entropy 0.83141 (0.83244)	Top-1 acc 70.312 (69.893)	Top-5 acc 84.766 (87.462)	lr 0.00362
Train [91][630/3239]	Time 0.230 (0.600)	Data Time 0.001 (0.062)	Loss 2.3759 (2.2694)	Entropy 0.83140 (0.83242)	Top-1 acc 66.406 (69.894)	Top-5 acc 86.719 (87.459)	lr 0.00362
Train [91][640/3239]	Time 0.227 (0.598)	Data Time 0.001 (0.061)	Loss 2.3070 (2.2704)	Entropy 0.83128 (0.83240)	Top-1 acc 69.531 (69.869)	Top-5 acc 88.672 (87.439)	lr 0.00362
Train [91][650/3239]	Time 0.219 (0.596)	Data Time 0.001 (0.060)	Loss 2.2495 (2.2701)	Entropy 0.83116 (0.83239)	Top-1 acc 70.703 (69.872)	Top-5 acc 88.281 (87.453)	lr 0.00361
Train [91][660/3239]	Time 0.228 (0.595)	Data Time 0.001 (0.059)	Loss 2.2673 (2.2694)	Entropy 0.83093 (0.83237)	Top-1 acc 68.359 (69.891)	Top-5 acc 86.719 (87.467)	lr 0.00361
Train [91][670/3239]	Time 2.635 (0.593)	Data Time 0.002 (0.058)	Loss 2.2465 (2.2688)	Entropy 0.83093 (0.83235)	Top-1 acc 67.578 (69.896)	Top-5 acc 89.844 (87.477)	lr 0.00361
Train [91][680/3239]	Time 0.247 (0.588)	Data Time 0.001 (0.057)	Loss 2.2708 (2.2686)	Entropy 0.83097 (0.83233)	Top-1 acc 68.750 (69.908)	Top-5 acc 89.844 (87.490)	lr 0.00361
Train [91][690/3239]	Time 0.224 (0.586)	Data Time 0.001 (0.057)	Loss 2.1455 (2.2682)	Entropy 0.83091 (0.83230)	Top-1 acc 71.484 (69.922)	Top-5 acc 89.844 (87.496)	lr 0.00361
Train [91][700/3239]	Time 0.229 (0.585)	Data Time 0.001 (0.056)	Loss 2.4480 (2.2680)	Entropy 0.83088 (0.83228)	Top-1 acc 64.844 (69.924)	Top-5 acc 85.547 (87.509)	lr 0.00361
Train [91][710/3239]	Time 0.259 (0.660)	Data Time 0.002 (0.055)	Loss 2.2752 (2.2680)	Entropy 0.83106 (0.83227)	Top-1 acc 68.750 (69.936)	Top-5 acc 87.109 (87.504)	lr 0.00361
Train [91][720/3239]	Time 0.242 (0.658)	Data Time 0.002 (0.054)	Loss 2.2950 (2.2680)	Entropy 0.83093 (0.83225)	Top-1 acc 68.359 (69.939)	Top-5 acc 85.156 (87.494)	lr 0.00361
Train [91][730/3239]	Time 0.228 (0.656)	Data Time 0.001 (0.054)	Loss 2.3795 (2.2682)	Entropy 0.83091 (0.83223)	Top-1 acc 66.406 (69.926)	Top-5 acc 84.375 (87.498)	lr 0.00361
Train [91][740/3239]	Time 0.245 (0.654)	Data Time 0.001 (0.053)	Loss 2.3980 (2.2676)	Entropy 0.83093 (0.83221)	Top-1 acc 67.578 (69.951)	Top-5 acc 84.375 (87.504)	lr 0.00361
Train [91][750/3239]	Time 0.338 (0.652)	Data Time 0.001 (0.052)	Loss 2.1904 (2.2676)	Entropy 0.83113 (0.83220)	Top-1 acc 70.703 (69.929)	Top-5 acc 89.844 (87.519)	lr 0.00361
Train [91][760/3239]	Time 0.227 (0.650)	Data Time 0.001 (0.052)	Loss 2.2970 (2.2682)	Entropy 0.83109 (0.83218)	Top-1 acc 68.750 (69.913)	Top-5 acc 87.500 (87.505)	lr 0.00361
Train [91][770/3239]	Time 0.225 (0.647)	Data Time 0.001 (0.051)	Loss 2.1540 (2.2680)	Entropy 0.83108 (0.83217)	Top-1 acc 75.391 (69.918)	Top-5 acc 89.453 (87.508)	lr 0.00361
Train [91][780/3239]	Time 2.585 (0.645)	Data Time 0.001 (0.050)	Loss 2.2817 (2.2687)	Entropy 0.83108 (0.83215)	Top-1 acc 69.531 (69.895)	Top-5 acc 89.453 (87.507)	lr 0.00361
Train [91][790/3239]	Time 0.257 (0.640)	Data Time 0.001 (0.050)	Loss 2.3114 (2.2686)	Entropy 0.83103 (0.83214)	Top-1 acc 68.750 (69.903)	Top-5 acc 87.109 (87.508)	lr 0.00360
Train [91][800/3239]	Time 0.228 (0.638)	Data Time 0.001 (0.049)	Loss 2.3463 (2.2688)	Entropy 0.83101 (0.83213)	Top-1 acc 64.062 (69.904)	Top-5 acc 85.938 (87.497)	lr 0.00360
Train [91][810/3239]	Time 0.234 (0.636)	Data Time 0.001 (0.049)	Loss 2.1293 (2.2679)	Entropy 0.83100 (0.83211)	Top-1 acc 74.609 (69.925)	Top-5 acc 89.062 (87.511)	lr 0.00360
Train [91][820/3239]	Time 0.215 (0.634)	Data Time 0.001 (0.048)	Loss 2.2398 (2.2676)	Entropy 0.83097 (0.83210)	Top-1 acc 72.266 (69.944)	Top-5 acc 87.500 (87.513)	lr 0.00360
Train [91][830/3239]	Time 0.227 (0.632)	Data Time 0.001 (0.047)	Loss 2.3167 (2.2682)	Entropy 0.83093 (0.83208)	Top-1 acc 64.062 (69.920)	Top-5 acc 86.328 (87.500)	lr 0.00360
Train [91][840/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.047)	Loss 2.2446 (2.2679)	Entropy 0.83088 (0.83207)	Top-1 acc 71.875 (69.938)	Top-5 acc 87.891 (87.501)	lr 0.00360
Train [91][850/3239]	Time 0.234 (0.629)	Data Time 0.002 (0.046)	Loss 2.4709 (2.2685)	Entropy 0.83085 (0.83206)	Top-1 acc 64.453 (69.919)	Top-5 acc 83.594 (87.487)	lr 0.00360
Train [91][860/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.046)	Loss 2.4127 (2.2683)	Entropy 0.83074 (0.83204)	Top-1 acc 67.188 (69.920)	Top-5 acc 83.203 (87.490)	lr 0.00360
Train [91][870/3239]	Time 0.252 (0.625)	Data Time 0.001 (0.045)	Loss 2.0563 (2.2684)	Entropy 0.83073 (0.83203)	Top-1 acc 78.125 (69.912)	Top-5 acc 92.578 (87.495)	lr 0.00360
Train [91][880/3239]	Time 0.316 (0.624)	Data Time 0.001 (0.045)	Loss 2.4322 (2.2688)	Entropy 0.83065 (0.83201)	Top-1 acc 64.062 (69.898)	Top-5 acc 82.812 (87.484)	lr 0.00360
Train [91][890/3239]	Time 2.495 (0.622)	Data Time 0.002 (0.044)	Loss 2.1124 (2.2684)	Entropy 0.83065 (0.83200)	Top-1 acc 74.609 (69.897)	Top-5 acc 89.062 (87.486)	lr 0.00360
Train [91][900/3239]	Time 0.238 (0.618)	Data Time 0.001 (0.044)	Loss 2.2755 (2.2684)	Entropy 0.83057 (0.83198)	Top-1 acc 72.266 (69.895)	Top-5 acc 87.109 (87.487)	lr 0.00360
Train [91][910/3239]	Time 0.235 (0.616)	Data Time 0.001 (0.043)	Loss 2.3519 (2.2689)	Entropy 0.83055 (0.83197)	Top-1 acc 68.750 (69.884)	Top-5 acc 85.156 (87.480)	lr 0.00360
Train [91][920/3239]	Time 0.243 (0.615)	Data Time 0.001 (0.043)	Loss 2.2280 (2.2686)	Entropy 0.83051 (0.83195)	Top-1 acc 73.047 (69.888)	Top-5 acc 87.109 (87.486)	lr 0.00360
Train [91][930/3239]	Time 0.229 (0.613)	Data Time 0.001 (0.042)	Loss 2.2101 (2.2686)	Entropy 0.83046 (0.83193)	Top-1 acc 70.703 (69.884)	Top-5 acc 87.891 (87.482)	lr 0.00359
Train [91][940/3239]	Time 0.232 (0.612)	Data Time 0.001 (0.042)	Loss 2.1941 (2.2685)	Entropy 0.83054 (0.83192)	Top-1 acc 69.922 (69.882)	Top-5 acc 89.453 (87.483)	lr 0.00359
Train [91][950/3239]	Time 0.228 (0.611)	Data Time 0.001 (0.042)	Loss 2.3337 (2.2683)	Entropy 0.83040 (0.83190)	Top-1 acc 69.922 (69.892)	Top-5 acc 87.109 (87.482)	lr 0.00359
Train [91][960/3239]	Time 0.213 (0.609)	Data Time 0.001 (0.041)	Loss 2.3498 (2.2690)	Entropy 0.83041 (0.83189)	Top-1 acc 64.453 (69.861)	Top-5 acc 85.938 (87.466)	lr 0.00359
Train [91][970/3239]	Time 0.231 (0.608)	Data Time 0.001 (0.041)	Loss 2.1034 (2.2688)	Entropy 0.83036 (0.83187)	Top-1 acc 74.219 (69.872)	Top-5 acc 90.234 (87.472)	lr 0.00359
Train [91][980/3239]	Time 0.222 (0.606)	Data Time 0.001 (0.040)	Loss 2.3065 (2.2689)	Entropy 0.83037 (0.83186)	Top-1 acc 67.969 (69.877)	Top-5 acc 85.938 (87.474)	lr 0.00359
Train [91][990/3239]	Time 0.271 (0.605)	Data Time 0.001 (0.040)	Loss 2.1812 (2.2691)	Entropy 0.83104 (0.83184)	Top-1 acc 68.750 (69.870)	Top-5 acc 89.844 (87.475)	lr 0.00359
Train [91][1000/3239]	Time 2.515 (0.604)	Data Time 0.002 (0.040)	Loss 2.2063 (2.2691)	Entropy 0.83104 (0.83183)	Top-1 acc 71.875 (69.877)	Top-5 acc 88.281 (87.479)	lr 0.00359
Train [91][1010/3239]	Time 0.378 (0.600)	Data Time 0.001 (0.039)	Loss 2.2040 (2.2687)	Entropy 0.83094 (0.83183)	Top-1 acc 69.531 (69.880)	Top-5 acc 89.844 (87.488)	lr 0.00359
Train [91][1020/3239]	Time 0.236 (0.599)	Data Time 0.001 (0.039)	Loss 2.0538 (2.2689)	Entropy 0.83087 (0.83182)	Top-1 acc 75.391 (69.879)	Top-5 acc 91.797 (87.487)	lr 0.00359
Train [91][1030/3239]	Time 0.247 (0.598)	Data Time 0.001 (0.038)	Loss 2.3886 (2.2687)	Entropy 0.83089 (0.83181)	Top-1 acc 67.969 (69.887)	Top-5 acc 82.812 (87.489)	lr 0.00359
Train [91][1040/3239]	Time 0.241 (0.597)	Data Time 0.001 (0.038)	Loss 2.1841 (2.2690)	Entropy 0.83082 (0.83180)	Top-1 acc 68.750 (69.887)	Top-5 acc 89.844 (87.484)	lr 0.00359
Train [91][1050/3239]	Time 0.227 (0.595)	Data Time 0.001 (0.038)	Loss 2.1331 (2.2687)	Entropy 0.83075 (0.83179)	Top-1 acc 72.656 (69.898)	Top-5 acc 91.016 (87.490)	lr 0.00359
Train [91][1060/3239]	Time 0.253 (0.594)	Data Time 0.001 (0.037)	Loss 2.3807 (2.2692)	Entropy 0.83074 (0.83178)	Top-1 acc 66.797 (69.879)	Top-5 acc 85.547 (87.479)	lr 0.00359
Train [91][1070/3239]	Time 0.250 (0.644)	Data Time 0.002 (0.037)	Loss 2.2364 (2.2693)	Entropy 0.83074 (0.83177)	Top-1 acc 69.922 (69.880)	Top-5 acc 87.891 (87.482)	lr 0.00359
Train [91][1080/3239]	Time 0.228 (0.643)	Data Time 0.002 (0.037)	Loss 2.3208 (2.2690)	Entropy 0.83068 (0.83176)	Top-1 acc 67.578 (69.896)	Top-5 acc 86.328 (87.490)	lr 0.00358
Train [91][1090/3239]	Time 0.233 (0.641)	Data Time 0.002 (0.036)	Loss 2.2920 (2.2690)	Entropy 0.83066 (0.83175)	Top-1 acc 69.531 (69.893)	Top-5 acc 87.500 (87.490)	lr 0.00358
Train [91][1100/3239]	Time 0.227 (0.640)	Data Time 0.001 (0.036)	Loss 2.1364 (2.2691)	Entropy 0.83063 (0.83174)	Top-1 acc 73.047 (69.888)	Top-5 acc 89.453 (87.490)	lr 0.00358
Train [91][1110/3239]	Time 2.595 (0.638)	Data Time 0.001 (0.036)	Loss 2.0755 (2.2689)	Entropy 0.83063 (0.83173)	Top-1 acc 74.219 (69.889)	Top-5 acc 91.016 (87.493)	lr 0.00358
Train [91][1120/3239]	Time 0.238 (0.635)	Data Time 0.001 (0.036)	Loss 2.1906 (2.2692)	Entropy 0.83063 (0.83172)	Top-1 acc 71.875 (69.886)	Top-5 acc 90.625 (87.484)	lr 0.00358
Train [91][1130/3239]	Time 0.221 (0.633)	Data Time 0.001 (0.035)	Loss 2.2041 (2.2688)	Entropy 0.83059 (0.83171)	Top-1 acc 70.703 (69.893)	Top-5 acc 89.453 (87.489)	lr 0.00358
Train [91][1140/3239]	Time 0.327 (0.632)	Data Time 0.001 (0.035)	Loss 2.4146 (2.2687)	Entropy 0.83055 (0.83170)	Top-1 acc 67.578 (69.892)	Top-5 acc 81.641 (87.491)	lr 0.00358
Train [91][1150/3239]	Time 0.222 (0.631)	Data Time 0.001 (0.035)	Loss 2.6619 (2.2688)	Entropy 0.83056 (0.83169)	Top-1 acc 58.984 (69.886)	Top-5 acc 81.641 (87.493)	lr 0.00358
Train [91][1160/3239]	Time 0.230 (0.629)	Data Time 0.002 (0.034)	Loss 2.2522 (2.2686)	Entropy 0.83049 (0.83168)	Top-1 acc 69.922 (69.895)	Top-5 acc 85.547 (87.498)	lr 0.00358
Train [91][1170/3239]	Time 0.220 (0.628)	Data Time 0.001 (0.034)	Loss 2.2281 (2.2688)	Entropy 0.83051 (0.83167)	Top-1 acc 69.141 (69.886)	Top-5 acc 89.062 (87.499)	lr 0.00358
Train [91][1180/3239]	Time 0.363 (0.627)	Data Time 0.002 (0.034)	Loss 2.2657 (2.2691)	Entropy 0.83048 (0.83166)	Top-1 acc 69.531 (69.879)	Top-5 acc 87.500 (87.494)	lr 0.00358
Train [91][1190/3239]	Time 0.245 (0.626)	Data Time 0.001 (0.034)	Loss 2.3052 (2.2695)	Entropy 0.83044 (0.83165)	Top-1 acc 69.922 (69.870)	Top-5 acc 85.938 (87.488)	lr 0.00358
Train [91][1200/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.033)	Loss 2.1307 (2.2694)	Entropy 0.83040 (0.83164)	Top-1 acc 72.266 (69.858)	Top-5 acc 90.234 (87.490)	lr 0.00358
Train [91][1210/3239]	Time 0.266 (0.624)	Data Time 0.001 (0.033)	Loss 2.3325 (2.2700)	Entropy 0.83033 (0.83163)	Top-1 acc 66.016 (69.841)	Top-5 acc 87.500 (87.480)	lr 0.00358
Train [91][1220/3239]	Time 2.708 (0.623)	Data Time 0.001 (0.033)	Loss 2.4678 (2.2697)	Entropy 0.83033 (0.83162)	Top-1 acc 65.625 (69.840)	Top-5 acc 84.766 (87.491)	lr 0.00357
Train [91][1230/3239]	Time 0.232 (0.619)	Data Time 0.001 (0.032)	Loss 2.2209 (2.2695)	Entropy 0.83032 (0.83161)	Top-1 acc 71.875 (69.840)	Top-5 acc 89.453 (87.499)	lr 0.00357
Train [91][1240/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.032)	Loss 2.2363 (2.2694)	Entropy 0.83023 (0.83160)	Top-1 acc 69.531 (69.838)	Top-5 acc 91.016 (87.505)	lr 0.00357
Train [91][1250/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.032)	Loss 2.2542 (2.2693)	Entropy 0.83024 (0.83159)	Top-1 acc 69.531 (69.839)	Top-5 acc 89.844 (87.509)	lr 0.00357
Train [91][1260/3239]	Time 0.243 (0.616)	Data Time 0.001 (0.032)	Loss 2.4388 (2.2694)	Entropy 0.83018 (0.83158)	Top-1 acc 67.188 (69.838)	Top-5 acc 83.984 (87.510)	lr 0.00357
Train [91][1270/3239]	Time 0.240 (0.615)	Data Time 0.001 (0.032)	Loss 2.2635 (2.2695)	Entropy 0.83020 (0.83156)	Top-1 acc 70.703 (69.838)	Top-5 acc 85.938 (87.510)	lr 0.00357
Train [91][1280/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.031)	Loss 2.3303 (2.2695)	Entropy 0.83036 (0.83155)	Top-1 acc 69.141 (69.841)	Top-5 acc 85.547 (87.507)	lr 0.00357
Train [91][1290/3239]	Time 0.225 (0.613)	Data Time 0.001 (0.031)	Loss 2.4283 (2.2697)	Entropy 0.83035 (0.83154)	Top-1 acc 66.406 (69.834)	Top-5 acc 83.984 (87.501)	lr 0.00357
Train [91][1300/3239]	Time 0.229 (0.612)	Data Time 0.001 (0.031)	Loss 2.4987 (2.2700)	Entropy 0.83034 (0.83154)	Top-1 acc 63.672 (69.819)	Top-5 acc 82.422 (87.492)	lr 0.00357
Train [91][1310/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.031)	Loss 2.3431 (2.2698)	Entropy 0.83035 (0.83153)	Top-1 acc 69.922 (69.828)	Top-5 acc 87.109 (87.496)	lr 0.00357
Train [91][1320/3239]	Time 0.261 (0.610)	Data Time 0.001 (0.030)	Loss 2.1710 (2.2695)	Entropy 0.83012 (0.83152)	Top-1 acc 73.047 (69.836)	Top-5 acc 89.062 (87.501)	lr 0.00357
Train [91][1330/3239]	Time 2.531 (0.609)	Data Time 0.001 (0.030)	Loss 2.2774 (2.2692)	Entropy 0.83012 (0.83151)	Top-1 acc 68.359 (69.834)	Top-5 acc 89.062 (87.509)	lr 0.00357
Train [91][1340/3239]	Time 0.239 (0.606)	Data Time 0.001 (0.030)	Loss 2.2166 (2.2692)	Entropy 0.83014 (0.83150)	Top-1 acc 72.656 (69.832)	Top-5 acc 87.891 (87.505)	lr 0.00357
Train [91][1350/3239]	Time 0.322 (0.605)	Data Time 0.001 (0.030)	Loss 2.3405 (2.2693)	Entropy 0.83013 (0.83149)	Top-1 acc 69.531 (69.829)	Top-5 acc 84.766 (87.502)	lr 0.00357
Train [91][1360/3239]	Time 0.248 (0.604)	Data Time 0.001 (0.030)	Loss 2.1638 (2.2691)	Entropy 0.83012 (0.83148)	Top-1 acc 70.703 (69.835)	Top-5 acc 88.281 (87.507)	lr 0.00356
Train [91][1370/3239]	Time 0.234 (0.603)	Data Time 0.001 (0.029)	Loss 2.2029 (2.2692)	Entropy 0.83011 (0.83147)	Top-1 acc 73.047 (69.828)	Top-5 acc 91.016 (87.509)	lr 0.00356
Train [91][1380/3239]	Time 0.222 (0.602)	Data Time 0.001 (0.029)	Loss 2.2081 (2.2693)	Entropy 0.83008 (0.83146)	Top-1 acc 72.266 (69.823)	Top-5 acc 87.109 (87.505)	lr 0.00356
Train [91][1390/3239]	Time 0.262 (0.602)	Data Time 0.001 (0.029)	Loss 2.2962 (2.2695)	Entropy 0.82990 (0.83145)	Top-1 acc 67.578 (69.826)	Top-5 acc 87.500 (87.500)	lr 0.00356
Train [91][1400/3239]	Time 0.242 (0.600)	Data Time 0.001 (0.029)	Loss 2.3055 (2.2694)	Entropy 0.82987 (0.83144)	Top-1 acc 70.312 (69.829)	Top-5 acc 87.891 (87.502)	lr 0.00356
Train [91][1410/3239]	Time 0.237 (0.600)	Data Time 0.001 (0.029)	Loss 2.1828 (2.2691)	Entropy 0.82987 (0.83142)	Top-1 acc 69.141 (69.833)	Top-5 acc 85.547 (87.505)	lr 0.00356
Train [91][1420/3239]	Time 0.216 (0.599)	Data Time 0.001 (0.028)	Loss 2.2994 (2.2689)	Entropy 0.82986 (0.83141)	Top-1 acc 69.531 (69.837)	Top-5 acc 85.938 (87.505)	lr 0.00356
Train [91][1430/3239]	Time 0.333 (0.635)	Data Time 0.004 (0.028)	Loss 2.3610 (2.2691)	Entropy 0.82986 (0.83140)	Top-1 acc 66.797 (69.831)	Top-5 acc 84.375 (87.499)	lr 0.00356
Train [91][1440/3239]	Time 2.643 (0.634)	Data Time 0.002 (0.028)	Loss 2.2621 (2.2695)	Entropy 0.82986 (0.83139)	Top-1 acc 69.141 (69.816)	Top-5 acc 87.500 (87.486)	lr 0.00356
Train [91][1450/3239]	Time 0.248 (0.632)	Data Time 0.002 (0.028)	Loss 2.3440 (2.2692)	Entropy 0.82975 (0.83138)	Top-1 acc 69.922 (69.827)	Top-5 acc 83.594 (87.490)	lr 0.00356
Train [91][1460/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.028)	Loss 2.1569 (2.2690)	Entropy 0.82975 (0.83137)	Top-1 acc 72.656 (69.832)	Top-5 acc 89.453 (87.497)	lr 0.00356
Train [91][1470/3239]	Time 0.248 (0.630)	Data Time 0.001 (0.028)	Loss 2.1812 (2.2687)	Entropy 0.82976 (0.83136)	Top-1 acc 73.047 (69.838)	Top-5 acc 88.281 (87.506)	lr 0.00356
Train [91][1480/3239]	Time 0.232 (0.629)	Data Time 0.001 (0.027)	Loss 2.2772 (2.2686)	Entropy 0.82972 (0.83135)	Top-1 acc 69.531 (69.840)	Top-5 acc 87.891 (87.513)	lr 0.00356
Train [91][1490/3239]	Time 0.271 (0.628)	Data Time 0.001 (0.027)	Loss 2.0513 (2.2684)	Entropy 0.82973 (0.83134)	Top-1 acc 74.219 (69.845)	Top-5 acc 90.625 (87.514)	lr 0.00356
Train [91][1500/3239]	Time 0.222 (0.626)	Data Time 0.001 (0.027)	Loss 2.3374 (2.2686)	Entropy 0.82974 (0.83133)	Top-1 acc 65.625 (69.841)	Top-5 acc 86.719 (87.510)	lr 0.00355
Train [91][1510/3239]	Time 0.247 (0.625)	Data Time 0.001 (0.027)	Loss 2.1777 (2.2688)	Entropy 0.82973 (0.83131)	Top-1 acc 73.047 (69.835)	Top-5 acc 86.719 (87.505)	lr 0.00355
Train [91][1520/3239]	Time 0.326 (0.625)	Data Time 0.001 (0.027)	Loss 2.2511 (2.2692)	Entropy 0.82965 (0.83130)	Top-1 acc 69.922 (69.827)	Top-5 acc 87.109 (87.498)	lr 0.00355
Train [91][1530/3239]	Time 0.209 (0.624)	Data Time 0.001 (0.026)	Loss 2.3177 (2.2693)	Entropy 0.82965 (0.83129)	Top-1 acc 70.312 (69.827)	Top-5 acc 85.156 (87.497)	lr 0.00355
Train [91][1540/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.026)	Loss 2.4449 (2.2697)	Entropy 0.82961 (0.83128)	Top-1 acc 66.016 (69.816)	Top-5 acc 83.594 (87.495)	lr 0.00355
Train [91][1550/3239]	Time 2.633 (0.622)	Data Time 0.001 (0.026)	Loss 2.2395 (2.2697)	Entropy 0.82961 (0.83127)	Top-1 acc 72.266 (69.816)	Top-5 acc 87.500 (87.491)	lr 0.00355
Train [91][1560/3239]	Time 0.253 (0.619)	Data Time 0.001 (0.026)	Loss 2.1895 (2.2697)	Entropy 0.82963 (0.83126)	Top-1 acc 74.219 (69.818)	Top-5 acc 89.453 (87.490)	lr 0.00355
Train [91][1570/3239]	Time 0.233 (0.619)	Data Time 0.002 (0.026)	Loss 2.3074 (2.2699)	Entropy 0.82969 (0.83125)	Top-1 acc 68.359 (69.809)	Top-5 acc 85.547 (87.489)	lr 0.00355
Train [91][1580/3239]	Time 0.227 (0.618)	Data Time 0.001 (0.026)	Loss 2.3327 (2.2701)	Entropy 0.82966 (0.83124)	Top-1 acc 67.188 (69.802)	Top-5 acc 86.719 (87.488)	lr 0.00355
Train [91][1590/3239]	Time 0.236 (0.617)	Data Time 0.002 (0.026)	Loss 2.2128 (2.2699)	Entropy 0.82963 (0.83123)	Top-1 acc 71.094 (69.814)	Top-5 acc 89.062 (87.494)	lr 0.00355
Train [91][1600/3239]	Time 0.229 (0.616)	Data Time 0.001 (0.025)	Loss 2.2679 (2.2698)	Entropy 0.82965 (0.83122)	Top-1 acc 69.141 (69.815)	Top-5 acc 88.281 (87.498)	lr 0.00355
Train [91][1610/3239]	Time 0.318 (0.615)	Data Time 0.001 (0.025)	Loss 2.3732 (2.2698)	Entropy 0.82958 (0.83121)	Top-1 acc 69.141 (69.813)	Top-5 acc 85.938 (87.496)	lr 0.00355
Train [91][1620/3239]	Time 0.247 (0.614)	Data Time 0.001 (0.025)	Loss 2.0720 (2.2698)	Entropy 0.82956 (0.83120)	Top-1 acc 75.781 (69.813)	Top-5 acc 89.844 (87.500)	lr 0.00355
Train [91][1630/3239]	Time 0.243 (0.613)	Data Time 0.001 (0.025)	Loss 2.2535 (2.2700)	Entropy 0.82959 (0.83119)	Top-1 acc 73.438 (69.815)	Top-5 acc 89.062 (87.503)	lr 0.00355
Train [91][1640/3239]	Time 0.236 (0.613)	Data Time 0.001 (0.025)	Loss 2.1766 (2.2697)	Entropy 0.82959 (0.83118)	Top-1 acc 73.828 (69.817)	Top-5 acc 89.062 (87.505)	lr 0.00354
Train [91][1650/3239]	Time 0.345 (0.612)	Data Time 0.001 (0.025)	Loss 2.3064 (2.2698)	Entropy 0.82960 (0.83117)	Top-1 acc 71.875 (69.816)	Top-5 acc 85.156 (87.501)	lr 0.00354
Train [91][1660/3239]	Time 2.673 (0.611)	Data Time 0.001 (0.025)	Loss 2.1406 (2.2697)	Entropy 0.82960 (0.83116)	Top-1 acc 69.531 (69.809)	Top-5 acc 92.188 (87.503)	lr 0.00354
Train [91][1670/3239]	Time 0.231 (0.609)	Data Time 0.001 (0.024)	Loss 2.3027 (2.2696)	Entropy 0.82948 (0.83115)	Top-1 acc 71.875 (69.818)	Top-5 acc 85.938 (87.502)	lr 0.00354
Train [91][1680/3239]	Time 0.231 (0.608)	Data Time 0.001 (0.024)	Loss 2.3760 (2.2697)	Entropy 0.82943 (0.83114)	Top-1 acc 67.188 (69.818)	Top-5 acc 88.281 (87.503)	lr 0.00354
Train [91][1690/3239]	Time 0.227 (0.607)	Data Time 0.001 (0.024)	Loss 2.1657 (2.2698)	Entropy 0.82942 (0.83113)	Top-1 acc 73.047 (69.815)	Top-5 acc 89.844 (87.498)	lr 0.00354
Train [91][1700/3239]	Time 0.244 (0.607)	Data Time 0.001 (0.024)	Loss 2.1400 (2.2697)	Entropy 0.82906 (0.83112)	Top-1 acc 71.094 (69.813)	Top-5 acc 87.891 (87.500)	lr 0.00354
Train [91][1710/3239]	Time 0.239 (0.606)	Data Time 0.002 (0.024)	Loss 2.0960 (2.2698)	Entropy 0.82906 (0.83111)	Top-1 acc 72.266 (69.807)	Top-5 acc 90.625 (87.500)	lr 0.00354
Train [91][1720/3239]	Time 0.237 (0.605)	Data Time 0.001 (0.024)	Loss 2.3744 (2.2699)	Entropy 0.82907 (0.83110)	Top-1 acc 64.062 (69.800)	Top-5 acc 87.500 (87.502)	lr 0.00354
Train [91][1730/3239]	Time 0.237 (0.604)	Data Time 0.001 (0.024)	Loss 2.3128 (2.2697)	Entropy 0.82906 (0.83108)	Top-1 acc 68.359 (69.803)	Top-5 acc 87.500 (87.506)	lr 0.00354
Train [91][1740/3239]	Time 0.309 (0.604)	Data Time 0.001 (0.024)	Loss 2.3239 (2.2700)	Entropy 0.82904 (0.83107)	Top-1 acc 71.875 (69.797)	Top-5 acc 86.719 (87.500)	lr 0.00354
Train [91][1750/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.023)	Loss 2.4217 (2.2701)	Entropy 0.82902 (0.83106)	Top-1 acc 66.797 (69.797)	Top-5 acc 85.547 (87.501)	lr 0.00354
Train [91][1760/3239]	Time 0.262 (0.602)	Data Time 0.001 (0.023)	Loss 2.2553 (2.2701)	Entropy 0.82906 (0.83105)	Top-1 acc 71.875 (69.796)	Top-5 acc 89.453 (87.501)	lr 0.00354
Train [91][1770/3239]	Time 2.644 (0.601)	Data Time 0.001 (0.023)	Loss 2.2652 (2.2703)	Entropy 0.82906 (0.83104)	Top-1 acc 69.531 (69.796)	Top-5 acc 89.844 (87.503)	lr 0.00354
Train [91][1780/3239]	Time 0.326 (0.600)	Data Time 0.001 (0.023)	Loss 2.3972 (2.2701)	Entropy 0.82908 (0.83103)	Top-1 acc 65.234 (69.801)	Top-5 acc 85.547 (87.505)	lr 0.00353
Train [91][1790/3239]	Time 0.232 (0.599)	Data Time 0.001 (0.023)	Loss 2.3344 (2.2701)	Entropy 0.82911 (0.83102)	Top-1 acc 68.750 (69.804)	Top-5 acc 86.328 (87.504)	lr 0.00353
Train [91][1800/3239]	Time 0.287 (0.626)	Data Time 0.002 (0.023)	Loss 2.3958 (2.2702)	Entropy 0.82900 (0.83101)	Top-1 acc 62.891 (69.798)	Top-5 acc 85.547 (87.504)	lr 0.00353
Train [91][1810/3239]	Time 0.216 (0.626)	Data Time 0.002 (0.023)	Loss 2.1791 (2.2701)	Entropy 0.82901 (0.83099)	Top-1 acc 73.438 (69.800)	Top-5 acc 89.844 (87.507)	lr 0.00353
Train [91][1820/3239]	Time 0.235 (0.625)	Data Time 0.002 (0.023)	Loss 2.3729 (2.2702)	Entropy 0.82902 (0.83098)	Top-1 acc 67.969 (69.797)	Top-5 acc 84.375 (87.504)	lr 0.00353
Train [91][1830/3239]	Time 0.228 (0.624)	Data Time 0.001 (0.022)	Loss 2.2824 (2.2701)	Entropy 0.82887 (0.83097)	Top-1 acc 69.141 (69.802)	Top-5 acc 87.109 (87.507)	lr 0.00353
Train [91][1840/3239]	Time 0.242 (0.623)	Data Time 0.001 (0.022)	Loss 2.1988 (2.2700)	Entropy 0.82872 (0.83096)	Top-1 acc 73.438 (69.806)	Top-5 acc 88.672 (87.508)	lr 0.00353
Train [91][1850/3239]	Time 0.224 (0.623)	Data Time 0.001 (0.022)	Loss 2.3018 (2.2701)	Entropy 0.82872 (0.83095)	Top-1 acc 71.484 (69.801)	Top-5 acc 85.156 (87.506)	lr 0.00353
Train [91][1860/3239]	Time 0.229 (0.622)	Data Time 0.008 (0.022)	Loss 2.2123 (2.2702)	Entropy 0.82868 (0.83094)	Top-1 acc 73.438 (69.798)	Top-5 acc 86.719 (87.504)	lr 0.00353
Train [91][1870/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.022)	Loss 2.4425 (2.2705)	Entropy 0.82873 (0.83093)	Top-1 acc 66.797 (69.789)	Top-5 acc 83.594 (87.497)	lr 0.00353
Train [91][1880/3239]	Time 2.531 (0.620)	Data Time 0.001 (0.022)	Loss 2.2152 (2.2705)	Entropy 0.82873 (0.83091)	Top-1 acc 74.219 (69.790)	Top-5 acc 87.500 (87.496)	lr 0.00353
Train [91][1890/3239]	Time 0.266 (0.618)	Data Time 0.002 (0.022)	Loss 2.3360 (2.2705)	Entropy 0.82868 (0.83090)	Top-1 acc 67.578 (69.792)	Top-5 acc 87.500 (87.493)	lr 0.00353
Train [91][1900/3239]	Time 0.246 (0.618)	Data Time 0.001 (0.022)	Loss 2.4434 (2.2705)	Entropy 0.82860 (0.83089)	Top-1 acc 66.797 (69.793)	Top-5 acc 82.812 (87.493)	lr 0.00353
Train [91][1910/3239]	Time 0.356 (0.617)	Data Time 0.001 (0.022)	Loss 2.2975 (2.2705)	Entropy 0.82858 (0.83088)	Top-1 acc 67.188 (69.790)	Top-5 acc 89.453 (87.494)	lr 0.00353
Train [91][1920/3239]	Time 0.222 (0.617)	Data Time 0.001 (0.021)	Loss 2.2960 (2.2709)	Entropy 0.82855 (0.83087)	Top-1 acc 69.531 (69.785)	Top-5 acc 86.328 (87.486)	lr 0.00353
Train [91][1930/3239]	Time 0.232 (0.616)	Data Time 0.001 (0.021)	Loss 2.3078 (2.2711)	Entropy 0.82851 (0.83085)	Top-1 acc 67.188 (69.775)	Top-5 acc 87.500 (87.481)	lr 0.00352
Train [91][1940/3239]	Time 0.237 (0.615)	Data Time 0.001 (0.021)	Loss 2.1766 (2.2709)	Entropy 0.82850 (0.83084)	Top-1 acc 74.219 (69.780)	Top-5 acc 89.844 (87.487)	lr 0.00352
Train [91][1950/3239]	Time 0.349 (0.615)	Data Time 0.001 (0.021)	Loss 2.3461 (2.2711)	Entropy 0.82848 (0.83083)	Top-1 acc 68.750 (69.774)	Top-5 acc 85.156 (87.484)	lr 0.00352
Train [91][1960/3239]	Time 0.228 (0.614)	Data Time 0.001 (0.021)	Loss 2.3404 (2.2713)	Entropy 0.82847 (0.83082)	Top-1 acc 64.062 (69.767)	Top-5 acc 87.500 (87.483)	lr 0.00352
Train [91][1970/3239]	Time 0.225 (0.613)	Data Time 0.001 (0.021)	Loss 2.2001 (2.2715)	Entropy 0.82843 (0.83081)	Top-1 acc 69.141 (69.762)	Top-5 acc 88.672 (87.480)	lr 0.00352
Train [91][1980/3239]	Time 0.237 (0.613)	Data Time 0.001 (0.021)	Loss 2.3429 (2.2716)	Entropy 0.82840 (0.83079)	Top-1 acc 70.703 (69.761)	Top-5 acc 84.375 (87.476)	lr 0.00352
Train [91][1990/3239]	Time 2.669 (0.612)	Data Time 0.001 (0.021)	Loss 2.4285 (2.2720)	Entropy 0.82840 (0.83078)	Top-1 acc 70.312 (69.756)	Top-5 acc 84.375 (87.468)	lr 0.00352
Train [91][2000/3239]	Time 0.297 (0.611)	Data Time 0.001 (0.021)	Loss 2.2283 (2.2718)	Entropy 0.82822 (0.83077)	Top-1 acc 67.578 (69.760)	Top-5 acc 90.234 (87.474)	lr 0.00352
Train [91][2010/3239]	Time 0.237 (0.610)	Data Time 0.001 (0.021)	Loss 2.2935 (2.2720)	Entropy 0.82820 (0.83076)	Top-1 acc 70.312 (69.753)	Top-5 acc 86.719 (87.470)	lr 0.00352
Train [91][2020/3239]	Time 0.282 (0.609)	Data Time 0.001 (0.020)	Loss 2.3160 (2.2719)	Entropy 0.82816 (0.83074)	Top-1 acc 67.188 (69.755)	Top-5 acc 87.891 (87.471)	lr 0.00352
Train [91][2030/3239]	Time 0.230 (0.609)	Data Time 0.001 (0.020)	Loss 2.2780 (2.2719)	Entropy 0.82822 (0.83073)	Top-1 acc 68.359 (69.754)	Top-5 acc 87.109 (87.473)	lr 0.00352
Train [91][2040/3239]	Time 0.350 (0.608)	Data Time 0.001 (0.020)	Loss 2.4014 (2.2722)	Entropy 0.82811 (0.83072)	Top-1 acc 65.625 (69.747)	Top-5 acc 86.328 (87.467)	lr 0.00352
Train [91][2050/3239]	Time 0.206 (0.607)	Data Time 0.001 (0.020)	Loss 2.3143 (2.2721)	Entropy 0.82804 (0.83070)	Top-1 acc 67.188 (69.744)	Top-5 acc 86.719 (87.464)	lr 0.00352
Train [91][2060/3239]	Time 0.238 (0.607)	Data Time 0.001 (0.020)	Loss 2.3576 (2.2722)	Entropy 0.82799 (0.83069)	Top-1 acc 66.016 (69.738)	Top-5 acc 85.938 (87.463)	lr 0.00352
Train [91][2070/3239]	Time 0.233 (0.606)	Data Time 0.001 (0.020)	Loss 2.3846 (2.2722)	Entropy 0.82792 (0.83068)	Top-1 acc 61.328 (69.736)	Top-5 acc 84.766 (87.464)	lr 0.00351
Train [91][2080/3239]	Time 0.378 (0.606)	Data Time 0.001 (0.020)	Loss 2.3777 (2.2722)	Entropy 0.82794 (0.83067)	Top-1 acc 66.016 (69.736)	Top-5 acc 85.938 (87.463)	lr 0.00351
Train [91][2090/3239]	Time 0.247 (0.605)	Data Time 0.001 (0.020)	Loss 2.3404 (2.2723)	Entropy 0.82788 (0.83065)	Top-1 acc 63.672 (69.731)	Top-5 acc 84.375 (87.463)	lr 0.00351
Train [91][2100/3239]	Time 2.691 (0.605)	Data Time 0.002 (0.020)	Loss 2.2239 (2.2722)	Entropy 0.82788 (0.83064)	Top-1 acc 72.266 (69.734)	Top-5 acc 88.672 (87.464)	lr 0.00351
Train [91][2110/3239]	Time 0.229 (0.603)	Data Time 0.001 (0.020)	Loss 2.3335 (2.2722)	Entropy 0.82789 (0.83063)	Top-1 acc 71.875 (69.733)	Top-5 acc 84.766 (87.462)	lr 0.00351
Train [91][2120/3239]	Time 0.251 (0.602)	Data Time 0.001 (0.020)	Loss 2.2926 (2.2722)	Entropy 0.82782 (0.83061)	Top-1 acc 69.141 (69.738)	Top-5 acc 87.891 (87.462)	lr 0.00351
Train [91][2130/3239]	Time 0.246 (0.602)	Data Time 0.002 (0.020)	Loss 2.2945 (2.2720)	Entropy 0.82780 (0.83060)	Top-1 acc 71.875 (69.747)	Top-5 acc 86.328 (87.463)	lr 0.00351
Train [91][2140/3239]	Time 0.236 (0.601)	Data Time 0.001 (0.019)	Loss 2.3867 (2.2722)	Entropy 0.82774 (0.83059)	Top-1 acc 63.672 (69.741)	Top-5 acc 86.719 (87.457)	lr 0.00351
Train [91][2150/3239]	Time 0.227 (0.601)	Data Time 0.001 (0.019)	Loss 2.3407 (2.2724)	Entropy 0.82771 (0.83057)	Top-1 acc 67.969 (69.734)	Top-5 acc 86.719 (87.455)	lr 0.00351
Train [91][2160/3239]	Time 0.271 (0.626)	Data Time 0.002 (0.019)	Loss 2.2813 (2.2723)	Entropy 0.82773 (0.83056)	Top-1 acc 72.266 (69.743)	Top-5 acc 86.719 (87.458)	lr 0.00351
Train [91][2170/3239]	Time 0.347 (0.626)	Data Time 0.002 (0.019)	Loss 2.2517 (2.2722)	Entropy 0.82768 (0.83055)	Top-1 acc 70.312 (69.746)	Top-5 acc 86.719 (87.460)	lr 0.00351
Train [91][2180/3239]	Time 0.219 (0.625)	Data Time 0.002 (0.019)	Loss 2.3165 (2.2722)	Entropy 0.82760 (0.83053)	Top-1 acc 69.141 (69.743)	Top-5 acc 87.109 (87.458)	lr 0.00351
Train [91][2190/3239]	Time 0.218 (0.624)	Data Time 0.001 (0.019)	Loss 2.1937 (2.2721)	Entropy 0.82762 (0.83052)	Top-1 acc 71.875 (69.748)	Top-5 acc 89.453 (87.463)	lr 0.00351
Train [91][2200/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.019)	Loss 2.1885 (2.2721)	Entropy 0.82766 (0.83051)	Top-1 acc 69.922 (69.743)	Top-5 acc 89.453 (87.464)	lr 0.00351
Train [91][2210/3239]	Time 2.941 (0.623)	Data Time 0.002 (0.019)	Loss 2.4659 (2.2720)	Entropy 0.82766 (0.83049)	Top-1 acc 60.547 (69.740)	Top-5 acc 85.156 (87.466)	lr 0.00350
Train [91][2220/3239]	Time 0.255 (0.621)	Data Time 0.001 (0.019)	Loss 2.2009 (2.2720)	Entropy 0.82767 (0.83048)	Top-1 acc 72.266 (69.737)	Top-5 acc 89.062 (87.469)	lr 0.00350
Train [91][2230/3239]	Time 0.246 (0.621)	Data Time 0.001 (0.019)	Loss 2.3061 (2.2720)	Entropy 0.82766 (0.83047)	Top-1 acc 69.141 (69.736)	Top-5 acc 86.328 (87.467)	lr 0.00350
Train [91][2240/3239]	Time 0.238 (0.620)	Data Time 0.001 (0.019)	Loss 2.5653 (2.2725)	Entropy 0.82758 (0.83046)	Top-1 acc 62.500 (69.726)	Top-5 acc 82.422 (87.458)	lr 0.00350
Train [91][2250/3239]	Time 0.220 (0.619)	Data Time 0.001 (0.019)	Loss 2.1397 (2.2723)	Entropy 0.82742 (0.83044)	Top-1 acc 70.703 (69.727)	Top-5 acc 91.016 (87.465)	lr 0.00350
Train [91][2260/3239]	Time 0.205 (0.619)	Data Time 0.001 (0.018)	Loss 2.3853 (2.2724)	Entropy 0.82742 (0.83043)	Top-1 acc 67.188 (69.727)	Top-5 acc 85.156 (87.462)	lr 0.00350
Train [91][2270/3239]	Time 0.219 (0.618)	Data Time 0.001 (0.018)	Loss 2.3121 (2.2724)	Entropy 0.82743 (0.83042)	Top-1 acc 66.797 (69.725)	Top-5 acc 88.281 (87.466)	lr 0.00350
Train [91][2280/3239]	Time 0.234 (0.618)	Data Time 0.001 (0.018)	Loss 2.2306 (2.2723)	Entropy 0.82738 (0.83040)	Top-1 acc 67.578 (69.727)	Top-5 acc 88.281 (87.468)	lr 0.00350
Train [91][2290/3239]	Time 0.223 (0.617)	Data Time 0.001 (0.018)	Loss 2.3145 (2.2726)	Entropy 0.82737 (0.83039)	Top-1 acc 69.531 (69.718)	Top-5 acc 85.938 (87.461)	lr 0.00350
Train [91][2300/3239]	Time 0.217 (0.616)	Data Time 0.001 (0.018)	Loss 2.4707 (2.2729)	Entropy 0.82728 (0.83038)	Top-1 acc 65.234 (69.712)	Top-5 acc 83.594 (87.456)	lr 0.00350
Train [91][2310/3239]	Time 0.245 (0.616)	Data Time 0.001 (0.018)	Loss 2.3190 (2.2737)	Entropy 0.82728 (0.83036)	Top-1 acc 67.969 (69.695)	Top-5 acc 88.281 (87.449)	lr 0.00350
Train [91][2320/3239]	Time 2.585 (0.615)	Data Time 0.001 (0.018)	Loss 2.2660 (2.2735)	Entropy 0.82728 (0.83035)	Top-1 acc 71.484 (69.696)	Top-5 acc 88.281 (87.456)	lr 0.00350
Train [91][2330/3239]	Time 0.233 (0.614)	Data Time 0.001 (0.018)	Loss 2.1559 (2.2734)	Entropy 0.82728 (0.83034)	Top-1 acc 71.875 (69.702)	Top-5 acc 89.453 (87.456)	lr 0.00350
Train [91][2340/3239]	Time 0.333 (0.613)	Data Time 0.001 (0.018)	Loss 2.4011 (2.2736)	Entropy 0.82727 (0.83032)	Top-1 acc 66.406 (69.697)	Top-5 acc 85.938 (87.453)	lr 0.00350
Train [91][2350/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.018)	Loss 2.4066 (2.2736)	Entropy 0.82706 (0.83031)	Top-1 acc 66.406 (69.695)	Top-5 acc 85.156 (87.452)	lr 0.00349
Train [91][2360/3239]	Time 0.246 (0.612)	Data Time 0.001 (0.018)	Loss 2.2399 (2.2736)	Entropy 0.82710 (0.83030)	Top-1 acc 68.750 (69.698)	Top-5 acc 87.109 (87.451)	lr 0.00349
Train [91][2370/3239]	Time 0.244 (0.611)	Data Time 0.001 (0.018)	Loss 2.2729 (2.2737)	Entropy 0.82706 (0.83028)	Top-1 acc 71.094 (69.693)	Top-5 acc 85.938 (87.446)	lr 0.00349
Train [91][2380/3239]	Time 0.245 (0.611)	Data Time 0.001 (0.018)	Loss 2.1836 (2.2737)	Entropy 0.82709 (0.83027)	Top-1 acc 71.875 (69.694)	Top-5 acc 88.672 (87.447)	lr 0.00349
Train [91][2390/3239]	Time 0.235 (0.611)	Data Time 0.001 (0.018)	Loss 2.2111 (2.2737)	Entropy 0.82701 (0.83026)	Top-1 acc 73.047 (69.700)	Top-5 acc 89.453 (87.447)	lr 0.00349
Train [91][2400/3239]	Time 0.234 (0.610)	Data Time 0.001 (0.018)	Loss 2.2095 (2.2737)	Entropy 0.82702 (0.83024)	Top-1 acc 71.094 (69.698)	Top-5 acc 89.453 (87.447)	lr 0.00349
Train [91][2410/3239]	Time 0.215 (0.610)	Data Time 0.002 (0.017)	Loss 2.4300 (2.2739)	Entropy 0.82705 (0.83023)	Top-1 acc 66.406 (69.694)	Top-5 acc 83.594 (87.444)	lr 0.00349
Train [91][2420/3239]	Time 0.223 (0.609)	Data Time 0.001 (0.017)	Loss 2.3909 (2.2738)	Entropy 0.82698 (0.83022)	Top-1 acc 67.188 (69.701)	Top-5 acc 82.031 (87.443)	lr 0.00349
Train [91][2430/3239]	Time 2.416 (0.608)	Data Time 0.001 (0.017)	Loss 2.1603 (2.2739)	Entropy 0.82698 (0.83020)	Top-1 acc 73.438 (69.698)	Top-5 acc 90.234 (87.442)	lr 0.00349
Train [91][2440/3239]	Time 0.231 (0.607)	Data Time 0.001 (0.017)	Loss 2.2699 (2.2739)	Entropy 0.82696 (0.83019)	Top-1 acc 70.703 (69.697)	Top-5 acc 88.672 (87.441)	lr 0.00349
Train [91][2450/3239]	Time 0.263 (0.606)	Data Time 0.001 (0.017)	Loss 2.1463 (2.2740)	Entropy 0.82697 (0.83018)	Top-1 acc 72.656 (69.695)	Top-5 acc 90.625 (87.439)	lr 0.00349
Train [91][2460/3239]	Time 0.254 (0.606)	Data Time 0.001 (0.017)	Loss 2.2637 (2.2742)	Entropy 0.82696 (0.83016)	Top-1 acc 69.531 (69.692)	Top-5 acc 89.062 (87.435)	lr 0.00349
Train [91][2470/3239]	Time 0.345 (0.606)	Data Time 0.001 (0.017)	Loss 2.4570 (2.2746)	Entropy 0.82698 (0.83015)	Top-1 acc 66.016 (69.682)	Top-5 acc 83.984 (87.425)	lr 0.00349
Train [91][2480/3239]	Time 0.225 (0.605)	Data Time 0.001 (0.017)	Loss 2.3384 (2.2746)	Entropy 0.82693 (0.83014)	Top-1 acc 69.141 (69.683)	Top-5 acc 84.375 (87.425)	lr 0.00349
Train [91][2490/3239]	Time 0.225 (0.605)	Data Time 0.001 (0.017)	Loss 2.2760 (2.2746)	Entropy 0.82697 (0.83012)	Top-1 acc 71.094 (69.685)	Top-5 acc 86.719 (87.424)	lr 0.00349
Train [91][2500/3239]	Time 0.273 (0.604)	Data Time 0.001 (0.017)	Loss 2.4162 (2.2745)	Entropy 0.82687 (0.83011)	Top-1 acc 66.016 (69.684)	Top-5 acc 86.328 (87.426)	lr 0.00348
Train [91][2510/3239]	Time 0.326 (0.604)	Data Time 0.001 (0.017)	Loss 2.2276 (2.2746)	Entropy 0.82688 (0.83010)	Top-1 acc 73.047 (69.687)	Top-5 acc 88.672 (87.422)	lr 0.00348
Train [91][2520/3239]	Time 0.246 (0.625)	Data Time 0.002 (0.017)	Loss 2.2284 (2.2745)	Entropy 0.82685 (0.83009)	Top-1 acc 69.922 (69.690)	Top-5 acc 87.109 (87.424)	lr 0.00348
Train [91][2530/3239]	Time 0.234 (0.625)	Data Time 0.002 (0.017)	Loss 2.4322 (2.2746)	Entropy 0.82667 (0.83007)	Top-1 acc 68.750 (69.686)	Top-5 acc 84.375 (87.424)	lr 0.00348
Train [91][2540/3239]	Time 2.565 (0.624)	Data Time 0.001 (0.017)	Loss 2.3304 (2.2748)	Entropy 0.82667 (0.83006)	Top-1 acc 70.703 (69.686)	Top-5 acc 87.109 (87.424)	lr 0.00348
Train [91][2550/3239]	Time 0.277 (0.623)	Data Time 0.001 (0.017)	Loss 2.2098 (2.2748)	Entropy 0.82663 (0.83005)	Top-1 acc 68.750 (69.687)	Top-5 acc 92.188 (87.425)	lr 0.00348
Train [91][2560/3239]	Time 0.233 (0.622)	Data Time 0.001 (0.017)	Loss 2.3192 (2.2747)	Entropy 0.82662 (0.83003)	Top-1 acc 69.531 (69.687)	Top-5 acc 85.156 (87.424)	lr 0.00348
Train [91][2570/3239]	Time 0.243 (0.622)	Data Time 0.001 (0.016)	Loss 2.3088 (2.2748)	Entropy 0.82663 (0.83002)	Top-1 acc 69.141 (69.687)	Top-5 acc 85.547 (87.422)	lr 0.00348
Train [91][2580/3239]	Time 0.237 (0.621)	Data Time 0.001 (0.016)	Loss 2.3852 (2.2749)	Entropy 0.82661 (0.83001)	Top-1 acc 70.703 (69.687)	Top-5 acc 82.812 (87.421)	lr 0.00348
Train [91][2590/3239]	Time 0.240 (0.620)	Data Time 0.002 (0.016)	Loss 2.3109 (2.2748)	Entropy 0.82659 (0.82999)	Top-1 acc 71.094 (69.690)	Top-5 acc 85.547 (87.424)	lr 0.00348
Train [91][2600/3239]	Time 0.237 (0.620)	Data Time 0.001 (0.016)	Loss 2.3139 (2.2748)	Entropy 0.82659 (0.82998)	Top-1 acc 64.453 (69.688)	Top-5 acc 87.500 (87.421)	lr 0.00348
Train [91][2610/3239]	Time 0.230 (0.619)	Data Time 0.001 (0.016)	Loss 2.2434 (2.2748)	Entropy 0.82662 (0.82997)	Top-1 acc 71.094 (69.686)	Top-5 acc 89.453 (87.421)	lr 0.00348
Train [91][2620/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.016)	Loss 2.1184 (2.2748)	Entropy 0.82660 (0.82995)	Top-1 acc 73.438 (69.684)	Top-5 acc 91.797 (87.421)	lr 0.00348
Train [91][2630/3239]	Time 0.247 (0.619)	Data Time 0.001 (0.016)	Loss 2.2587 (2.2747)	Entropy 0.82654 (0.82994)	Top-1 acc 69.141 (69.683)	Top-5 acc 88.672 (87.422)	lr 0.00348
Train [91][2640/3239]	Time 0.338 (0.618)	Data Time 0.001 (0.016)	Loss 2.2833 (2.2749)	Entropy 0.82657 (0.82993)	Top-1 acc 70.703 (69.678)	Top-5 acc 87.109 (87.419)	lr 0.00347
Train [91][2650/3239]	Time 0.272 (0.617)	Data Time 0.001 (0.016)	Loss 2.2069 (2.2749)	Entropy 0.82651 (0.82992)	Top-1 acc 71.094 (69.679)	Top-5 acc 87.109 (87.421)	lr 0.00347
Train [91][2660/3239]	Time 0.215 (0.617)	Data Time 0.001 (0.016)	Loss 2.2874 (2.2748)	Entropy 0.82653 (0.82990)	Top-1 acc 68.750 (69.681)	Top-5 acc 86.328 (87.422)	lr 0.00347
Train [91][2670/3239]	Time 0.220 (0.616)	Data Time 0.001 (0.016)	Loss 2.2070 (2.2748)	Entropy 0.82653 (0.82989)	Top-1 acc 66.016 (69.681)	Top-5 acc 90.234 (87.421)	lr 0.00347
Train [91][2680/3239]	Time 0.315 (0.616)	Data Time 0.001 (0.016)	Loss 2.2101 (2.2746)	Entropy 0.82656 (0.82988)	Top-1 acc 69.141 (69.681)	Top-5 acc 89.453 (87.425)	lr 0.00347
Train [91][2690/3239]	Time 0.241 (0.615)	Data Time 0.001 (0.016)	Loss 2.3224 (2.2748)	Entropy 0.82650 (0.82987)	Top-1 acc 71.484 (69.679)	Top-5 acc 84.375 (87.421)	lr 0.00347
Train [91][2700/3239]	Time 0.236 (0.615)	Data Time 0.001 (0.016)	Loss 2.3168 (2.2748)	Entropy 0.82640 (0.82985)	Top-1 acc 67.578 (69.677)	Top-5 acc 87.109 (87.423)	lr 0.00347
Train [91][2710/3239]	Time 0.259 (0.614)	Data Time 0.001 (0.016)	Loss 2.3621 (2.2748)	Entropy 0.82630 (0.82984)	Top-1 acc 67.188 (69.677)	Top-5 acc 85.547 (87.424)	lr 0.00347
Train [91][2720/3239]	Time 0.204 (0.614)	Data Time 0.001 (0.016)	Loss 2.3623 (2.2749)	Entropy 0.82631 (0.82983)	Top-1 acc 66.797 (69.672)	Top-5 acc 85.938 (87.424)	lr 0.00347
Train [91][2730/3239]	Time 0.259 (0.613)	Data Time 0.001 (0.016)	Loss 2.2611 (2.2749)	Entropy 0.82620 (0.82981)	Top-1 acc 71.484 (69.673)	Top-5 acc 88.672 (87.424)	lr 0.00347
Train [91][2740/3239]	Time 0.219 (0.613)	Data Time 0.001 (0.016)	Loss 2.3984 (2.2748)	Entropy 0.82642 (0.82980)	Top-1 acc 65.625 (69.674)	Top-5 acc 83.594 (87.424)	lr 0.00347
Train [91][2750/3239]	Time 0.263 (0.612)	Data Time 0.001 (0.015)	Loss 2.0886 (2.2747)	Entropy 0.82644 (0.82979)	Top-1 acc 73.438 (69.671)	Top-5 acc 89.844 (87.427)	lr 0.00347
Train [91][2760/3239]	Time 0.251 (0.612)	Data Time 0.001 (0.015)	Loss 2.4396 (2.2748)	Entropy 0.82642 (0.82978)	Top-1 acc 65.625 (69.667)	Top-5 acc 84.375 (87.426)	lr 0.00347
Train [91][2770/3239]	Time 0.222 (0.611)	Data Time 0.001 (0.015)	Loss 2.2793 (2.2747)	Entropy 0.82643 (0.82976)	Top-1 acc 68.359 (69.671)	Top-5 acc 87.891 (87.427)	lr 0.00347
Train [91][2780/3239]	Time 0.230 (0.611)	Data Time 0.001 (0.015)	Loss 2.3404 (2.2747)	Entropy 0.82644 (0.82975)	Top-1 acc 68.359 (69.667)	Top-5 acc 87.109 (87.426)	lr 0.00346
Train [91][2790/3239]	Time 0.245 (0.610)	Data Time 0.001 (0.015)	Loss 2.2258 (2.2746)	Entropy 0.82641 (0.82974)	Top-1 acc 72.266 (69.670)	Top-5 acc 89.062 (87.429)	lr 0.00346
Train [91][2800/3239]	Time 0.223 (0.610)	Data Time 0.001 (0.015)	Loss 2.2530 (2.2747)	Entropy 0.82643 (0.82973)	Top-1 acc 69.922 (69.670)	Top-5 acc 87.109 (87.427)	lr 0.00346
Train [91][2810/3239]	Time 0.232 (0.610)	Data Time 0.001 (0.015)	Loss 2.2777 (2.2747)	Entropy 0.82646 (0.82972)	Top-1 acc 69.922 (69.669)	Top-5 acc 83.984 (87.424)	lr 0.00346
Train [91][2820/3239]	Time 0.234 (0.609)	Data Time 0.001 (0.015)	Loss 2.0494 (2.2747)	Entropy 0.82656 (0.82971)	Top-1 acc 73.438 (69.668)	Top-5 acc 92.969 (87.428)	lr 0.00346
Train [91][2830/3239]	Time 0.272 (0.609)	Data Time 0.001 (0.015)	Loss 2.2605 (2.2745)	Entropy 0.82644 (0.82969)	Top-1 acc 66.797 (69.673)	Top-5 acc 88.672 (87.434)	lr 0.00346
Train [91][2840/3239]	Time 0.266 (0.608)	Data Time 0.001 (0.015)	Loss 2.1834 (2.2744)	Entropy 0.82642 (0.82968)	Top-1 acc 67.969 (69.670)	Top-5 acc 90.625 (87.437)	lr 0.00346
Train [91][2850/3239]	Time 0.230 (0.608)	Data Time 0.001 (0.015)	Loss 2.2425 (2.2745)	Entropy 0.82638 (0.82967)	Top-1 acc 68.359 (69.665)	Top-5 acc 89.062 (87.437)	lr 0.00346
Train [91][2860/3239]	Time 0.304 (0.624)	Data Time 0.004 (0.015)	Loss 2.1971 (2.2744)	Entropy 0.82643 (0.82966)	Top-1 acc 69.141 (69.665)	Top-5 acc 88.281 (87.439)	lr 0.00346
Train [91][2870/3239]	Time 0.223 (0.624)	Data Time 0.002 (0.015)	Loss 2.3824 (2.2744)	Entropy 0.82646 (0.82965)	Top-1 acc 66.797 (69.665)	Top-5 acc 83.203 (87.439)	lr 0.00346
Train [91][2880/3239]	Time 0.260 (0.624)	Data Time 0.002 (0.015)	Loss 2.2448 (2.2744)	Entropy 0.82641 (0.82964)	Top-1 acc 68.359 (69.663)	Top-5 acc 87.891 (87.440)	lr 0.00346
Train [91][2890/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.015)	Loss 2.2991 (2.2745)	Entropy 0.82576 (0.82963)	Top-1 acc 66.406 (69.662)	Top-5 acc 87.891 (87.440)	lr 0.00346
Train [91][2900/3239]	Time 0.234 (0.623)	Data Time 0.001 (0.015)	Loss 2.3961 (2.2745)	Entropy 0.82575 (0.82961)	Top-1 acc 69.531 (69.660)	Top-5 acc 83.594 (87.437)	lr 0.00346
Train [91][2910/3239]	Time 0.233 (0.622)	Data Time 0.001 (0.015)	Loss 2.2836 (2.2747)	Entropy 0.82568 (0.82960)	Top-1 acc 69.922 (69.657)	Top-5 acc 86.719 (87.434)	lr 0.00346
Train [91][2920/3239]	Time 0.267 (0.622)	Data Time 0.001 (0.015)	Loss 2.3368 (2.2746)	Entropy 0.82569 (0.82959)	Top-1 acc 69.141 (69.658)	Top-5 acc 85.156 (87.434)	lr 0.00346
Train [91][2930/3239]	Time 0.327 (0.621)	Data Time 0.001 (0.015)	Loss 2.4762 (2.2747)	Entropy 0.82569 (0.82957)	Top-1 acc 66.406 (69.655)	Top-5 acc 83.203 (87.433)	lr 0.00345
Train [91][2940/3239]	Time 0.228 (0.621)	Data Time 0.001 (0.015)	Loss 2.3005 (2.2747)	Entropy 0.82570 (0.82956)	Top-1 acc 69.531 (69.659)	Top-5 acc 86.328 (87.433)	lr 0.00345
Train [91][2950/3239]	Time 0.279 (0.620)	Data Time 0.001 (0.015)	Loss 2.3241 (2.2748)	Entropy 0.82564 (0.82955)	Top-1 acc 71.484 (69.657)	Top-5 acc 86.328 (87.433)	lr 0.00345
Train [91][2960/3239]	Time 0.222 (0.620)	Data Time 0.001 (0.015)	Loss 2.2009 (2.2748)	Entropy 0.82556 (0.82953)	Top-1 acc 68.750 (69.658)	Top-5 acc 88.672 (87.432)	lr 0.00345
Train [91][2970/3239]	Time 0.274 (0.619)	Data Time 0.001 (0.014)	Loss 2.3060 (2.2746)	Entropy 0.82557 (0.82952)	Top-1 acc 73.047 (69.666)	Top-5 acc 84.766 (87.434)	lr 0.00345
Train [91][2980/3239]	Time 0.216 (0.619)	Data Time 0.002 (0.014)	Loss 2.2542 (2.2744)	Entropy 0.82554 (0.82951)	Top-1 acc 69.922 (69.669)	Top-5 acc 87.891 (87.438)	lr 0.00345
Train [91][2990/3239]	Time 0.269 (0.618)	Data Time 0.001 (0.014)	Loss 2.2366 (2.2743)	Entropy 0.82552 (0.82949)	Top-1 acc 70.703 (69.669)	Top-5 acc 89.844 (87.441)	lr 0.00345
Train [91][3000/3239]	Time 0.225 (0.618)	Data Time 0.001 (0.014)	Loss 2.2901 (2.2743)	Entropy 0.82545 (0.82948)	Top-1 acc 68.750 (69.673)	Top-5 acc 88.281 (87.440)	lr 0.00345
Train [91][3010/3239]	Time 0.240 (0.617)	Data Time 0.001 (0.014)	Loss 2.2414 (2.2743)	Entropy 0.82546 (0.82947)	Top-1 acc 72.656 (69.673)	Top-5 acc 85.938 (87.438)	lr 0.00345
Train [91][3020/3239]	Time 0.217 (0.617)	Data Time 0.001 (0.014)	Loss 2.3967 (2.2743)	Entropy 0.82542 (0.82945)	Top-1 acc 62.500 (69.672)	Top-5 acc 85.156 (87.437)	lr 0.00345
Train [91][3030/3239]	Time 0.262 (0.617)	Data Time 0.001 (0.014)	Loss 2.1103 (2.2743)	Entropy 0.82542 (0.82944)	Top-1 acc 73.047 (69.669)	Top-5 acc 90.625 (87.438)	lr 0.00345
Train [91][3040/3239]	Time 0.230 (0.616)	Data Time 0.001 (0.014)	Loss 2.2952 (2.2742)	Entropy 0.82539 (0.82943)	Top-1 acc 67.969 (69.666)	Top-5 acc 85.156 (87.438)	lr 0.00345
Train [91][3050/3239]	Time 0.226 (0.616)	Data Time 0.001 (0.014)	Loss 2.2641 (2.2743)	Entropy 0.82539 (0.82941)	Top-1 acc 72.266 (69.664)	Top-5 acc 87.891 (87.439)	lr 0.00345
Train [91][3060/3239]	Time 0.362 (0.615)	Data Time 0.001 (0.014)	Loss 2.2709 (2.2742)	Entropy 0.82548 (0.82940)	Top-1 acc 68.359 (69.666)	Top-5 acc 86.719 (87.441)	lr 0.00345
Train [91][3070/3239]	Time 0.234 (0.615)	Data Time 0.002 (0.014)	Loss 2.3160 (2.2743)	Entropy 0.82547 (0.82939)	Top-1 acc 69.141 (69.667)	Top-5 acc 86.328 (87.441)	lr 0.00344
Train [91][3080/3239]	Time 0.239 (0.614)	Data Time 0.002 (0.014)	Loss 2.2992 (2.2742)	Entropy 0.82548 (0.82938)	Top-1 acc 69.141 (69.667)	Top-5 acc 86.719 (87.443)	lr 0.00344
Train [91][3090/3239]	Time 0.232 (0.614)	Data Time 0.001 (0.014)	Loss 2.1805 (2.2742)	Entropy 0.82547 (0.82936)	Top-1 acc 72.656 (69.667)	Top-5 acc 88.672 (87.444)	lr 0.00344
Train [91][3100/3239]	Time 0.360 (0.614)	Data Time 0.001 (0.014)	Loss 2.2131 (2.2740)	Entropy 0.82543 (0.82935)	Top-1 acc 68.359 (69.671)	Top-5 acc 86.719 (87.445)	lr 0.00344
Train [91][3110/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.014)	Loss 2.5383 (2.2741)	Entropy 0.82538 (0.82934)	Top-1 acc 63.672 (69.670)	Top-5 acc 83.594 (87.442)	lr 0.00344
Train [91][3120/3239]	Time 0.261 (0.613)	Data Time 0.001 (0.014)	Loss 2.3228 (2.2742)	Entropy 0.82537 (0.82933)	Top-1 acc 69.531 (69.670)	Top-5 acc 87.500 (87.440)	lr 0.00344
Train [91][3130/3239]	Time 0.297 (0.612)	Data Time 0.001 (0.014)	Loss 2.2408 (2.2743)	Entropy 0.82532 (0.82931)	Top-1 acc 69.922 (69.670)	Top-5 acc 89.453 (87.439)	lr 0.00344
Train [91][3140/3239]	Time 0.250 (0.612)	Data Time 0.001 (0.014)	Loss 2.2031 (2.2743)	Entropy 0.82529 (0.82930)	Top-1 acc 72.266 (69.668)	Top-5 acc 89.062 (87.437)	lr 0.00344
Train [91][3150/3239]	Time 0.264 (0.611)	Data Time 0.003 (0.014)	Loss 2.2776 (2.2744)	Entropy 0.82529 (0.82929)	Top-1 acc 69.531 (69.665)	Top-5 acc 87.891 (87.436)	lr 0.00344
Train [91][3160/3239]	Time 0.253 (0.611)	Data Time 0.001 (0.014)	Loss 2.1899 (2.2744)	Entropy 0.82527 (0.82927)	Top-1 acc 71.875 (69.665)	Top-5 acc 86.719 (87.434)	lr 0.00344
Train [91][3170/3239]	Time 0.252 (0.611)	Data Time 0.001 (0.014)	Loss 2.2649 (2.2744)	Entropy 0.82526 (0.82926)	Top-1 acc 68.750 (69.668)	Top-5 acc 87.500 (87.433)	lr 0.00344
Train [91][3180/3239]	Time 0.233 (0.610)	Data Time 0.000 (0.014)	Loss 2.3187 (2.2744)	Entropy 0.82523 (0.82925)	Top-1 acc 69.141 (69.668)	Top-5 acc 86.328 (87.431)	lr 0.00344
Train [91][3190/3239]	Time 0.253 (0.626)	Data Time 0.000 (0.014)	Loss 2.3706 (2.2744)	Entropy 0.82522 (0.82924)	Top-1 acc 66.406 (69.669)	Top-5 acc 84.766 (87.434)	lr 0.00344
Train [91][3200/3239]	Time 0.227 (0.626)	Data Time 0.000 (0.014)	Loss 2.1660 (2.2744)	Entropy 0.82520 (0.82922)	Top-1 acc 72.266 (69.668)	Top-5 acc 86.328 (87.433)	lr 0.00344
Train [91][3210/3239]	Time 0.234 (0.625)	Data Time 0.000 (0.014)	Loss 2.3081 (2.2744)	Entropy 0.82520 (0.82921)	Top-1 acc 71.484 (69.668)	Top-5 acc 85.156 (87.431)	lr 0.00343
Train [91][3220/3239]	Time 0.235 (0.625)	Data Time 0.000 (0.013)	Loss 2.1889 (2.2745)	Entropy 0.82519 (0.82920)	Top-1 acc 70.703 (69.665)	Top-5 acc 87.891 (87.431)	lr 0.00343
Train [91][3230/3239]	Time 0.231 (0.624)	Data Time 0.000 (0.013)	Loss 2.1135 (2.2743)	Entropy 0.82514 (0.82919)	Top-1 acc 74.219 (69.670)	Top-5 acc 89.844 (87.433)	lr 0.00343
Train [91][3239/3239]	Time 2.316 (0.624)	Data Time 0.000 (0.013)	Loss 2.5373 (2.2744)	Entropy 0.82514 (0.82918)	Top-1 acc 64.198 (69.670)	Top-5 acc 82.716 (87.430)	lr 0.00343
==========Valid [91/120]	loss 1.250	top-1 acc 71.469 (71.469)	top-5 acc 89.228	Train top-1 69.670	top-5 87.430	Entropy 0.82514	Latency-None: 0.000ms	Flops: 546.53M
Train [92][0/3239]	Time 39.398 (39.398)	Data Time 37.477 (37.477)	Loss 2.2177 (2.2177)	Entropy 0.82512 (0.82512)	Top-1 acc 71.875 (71.875)	Top-5 acc 87.891 (87.891)	lr 0.00343
Train [92][10/3239]	Time 2.521 (4.098)	Data Time 0.002 (3.411)	Loss 2.4037 (2.2706)	Entropy 0.82512 (0.82512)	Top-1 acc 66.406 (70.490)	Top-5 acc 87.109 (87.926)	lr 0.00343
Train [92][20/3239]	Time 0.263 (2.265)	Data Time 0.001 (1.787)	Loss 2.2648 (2.2581)	Entropy 0.82511 (0.82511)	Top-1 acc 68.359 (70.480)	Top-5 acc 89.844 (87.891)	lr 0.00343
Train [92][30/3239]	Time 0.227 (1.688)	Data Time 0.001 (1.211)	Loss 2.2876 (2.2472)	Entropy 0.82509 (0.82511)	Top-1 acc 66.406 (70.413)	Top-5 acc 85.938 (88.218)	lr 0.00343
Train [92][40/3239]	Time 0.235 (1.395)	Data Time 0.002 (0.916)	Loss 2.3970 (2.2506)	Entropy 0.82505 (0.82510)	Top-1 acc 67.578 (70.370)	Top-5 acc 87.500 (88.157)	lr 0.00343
Train [92][50/3239]	Time 0.246 (1.215)	Data Time 0.002 (0.737)	Loss 2.4746 (2.2582)	Entropy 0.82505 (0.82509)	Top-1 acc 65.625 (70.144)	Top-5 acc 82.812 (87.891)	lr 0.00343
Train [92][60/3239]	Time 0.233 (1.096)	Data Time 0.001 (0.616)	Loss 2.3110 (2.2585)	Entropy 0.82494 (0.82507)	Top-1 acc 67.969 (70.133)	Top-5 acc 86.328 (87.865)	lr 0.00343
Train [92][70/3239]	Time 0.246 (1.011)	Data Time 0.002 (0.530)	Loss 2.3320 (2.2605)	Entropy 0.82483 (0.82504)	Top-1 acc 71.875 (70.208)	Top-5 acc 85.938 (87.709)	lr 0.00343
Train [92][80/3239]	Time 0.219 (0.946)	Data Time 0.001 (0.465)	Loss 2.0818 (2.2549)	Entropy 0.82478 (0.82502)	Top-1 acc 73.828 (70.259)	Top-5 acc 90.625 (87.847)	lr 0.00343
Train [92][90/3239]	Time 0.238 (0.898)	Data Time 0.001 (0.414)	Loss 2.4200 (2.2640)	Entropy 0.82482 (0.82499)	Top-1 acc 65.625 (70.003)	Top-5 acc 83.203 (87.650)	lr 0.00343
Train [92][100/3239]	Time 0.252 (0.862)	Data Time 0.001 (0.373)	Loss 2.2428 (2.2646)	Entropy 0.82479 (0.82497)	Top-1 acc 71.094 (70.022)	Top-5 acc 88.281 (87.624)	lr 0.00343
Train [92][110/3239]	Time 0.267 (0.829)	Data Time 0.001 (0.339)	Loss 2.0238 (2.2611)	Entropy 0.82472 (0.82496)	Top-1 acc 75.391 (70.080)	Top-5 acc 91.797 (87.697)	lr 0.00343
Train [92][120/3239]	Time 2.664 (0.802)	Data Time 0.002 (0.312)	Loss 2.3013 (2.2737)	Entropy 0.82472 (0.82494)	Top-1 acc 71.484 (69.841)	Top-5 acc 89.453 (87.545)	lr 0.00342
Train [92][130/3239]	Time 0.251 (0.759)	Data Time 0.001 (0.288)	Loss 2.2275 (2.2692)	Entropy 0.82472 (0.82492)	Top-1 acc 68.359 (69.925)	Top-5 acc 87.109 (87.575)	lr 0.00342
Train [92][140/3239]	Time 0.246 (0.739)	Data Time 0.001 (0.268)	Loss 2.1848 (2.2677)	Entropy 0.82471 (0.82490)	Top-1 acc 73.047 (69.997)	Top-5 acc 87.891 (87.603)	lr 0.00342
Train [92][150/3239]	Time 0.232 (0.722)	Data Time 0.001 (0.250)	Loss 2.3507 (2.2657)	Entropy 0.82466 (0.82489)	Top-1 acc 66.797 (70.075)	Top-5 acc 87.891 (87.635)	lr 0.00342
Train [92][160/3239]	Time 0.375 (0.707)	Data Time 0.001 (0.235)	Loss 2.2553 (2.2643)	Entropy 0.82468 (0.82488)	Top-1 acc 73.047 (70.164)	Top-5 acc 88.672 (87.667)	lr 0.00342
Train [92][170/3239]	Time 0.225 (0.694)	Data Time 0.001 (0.221)	Loss 2.1404 (2.2646)	Entropy 0.82469 (0.82486)	Top-1 acc 72.266 (70.148)	Top-5 acc 90.625 (87.680)	lr 0.00342
Train [92][180/3239]	Time 0.249 (0.683)	Data Time 0.001 (0.209)	Loss 2.2170 (2.2651)	Entropy 0.82469 (0.82485)	Top-1 acc 69.141 (70.077)	Top-5 acc 89.062 (87.658)	lr 0.00342
Train [92][190/3239]	Time 0.211 (0.672)	Data Time 0.001 (0.198)	Loss 2.4646 (2.2645)	Entropy 0.82469 (0.82485)	Top-1 acc 63.281 (70.106)	Top-5 acc 83.203 (87.635)	lr 0.00342
Train [92][200/3239]	Time 0.237 (0.663)	Data Time 0.003 (0.188)	Loss 2.3323 (2.2620)	Entropy 0.82473 (0.82484)	Top-1 acc 68.359 (70.141)	Top-5 acc 87.109 (87.632)	lr 0.00342
Train [92][210/3239]	Time 0.227 (0.656)	Data Time 0.002 (0.179)	Loss 2.1235 (2.2593)	Entropy 0.82479 (0.82483)	Top-1 acc 72.266 (70.188)	Top-5 acc 89.844 (87.661)	lr 0.00342
Train [92][220/3239]	Time 0.235 (0.648)	Data Time 0.002 (0.171)	Loss 2.1822 (2.2576)	Entropy 0.82476 (0.82483)	Top-1 acc 70.703 (70.224)	Top-5 acc 87.500 (87.691)	lr 0.00342
Train [92][230/3239]	Time 2.509 (0.640)	Data Time 0.001 (0.164)	Loss 2.3033 (2.2567)	Entropy 0.82476 (0.82483)	Top-1 acc 68.359 (70.287)	Top-5 acc 86.719 (87.674)	lr 0.00342
Train [92][240/3239]	Time 0.251 (0.623)	Data Time 0.001 (0.157)	Loss 2.1063 (2.2570)	Entropy 0.82482 (0.82483)	Top-1 acc 72.266 (70.235)	Top-5 acc 91.406 (87.682)	lr 0.00342
Train [92][250/3239]	Time 0.370 (0.619)	Data Time 0.001 (0.151)	Loss 2.3181 (2.2567)	Entropy 0.82482 (0.82483)	Top-1 acc 68.359 (70.218)	Top-5 acc 84.375 (87.704)	lr 0.00342
Train [92][260/3239]	Time 0.227 (0.613)	Data Time 0.001 (0.145)	Loss 2.3658 (2.2566)	Entropy 0.82481 (0.82483)	Top-1 acc 67.969 (70.205)	Top-5 acc 85.938 (87.720)	lr 0.00341
Train [92][270/3239]	Time 0.233 (0.608)	Data Time 0.002 (0.140)	Loss 2.2028 (2.2565)	Entropy 0.82477 (0.82483)	Top-1 acc 71.094 (70.173)	Top-5 acc 88.281 (87.746)	lr 0.00341
Train [92][280/3239]	Time 0.234 (0.604)	Data Time 0.002 (0.135)	Loss 2.3155 (2.2575)	Entropy 0.82475 (0.82482)	Top-1 acc 71.484 (70.157)	Top-5 acc 84.375 (87.736)	lr 0.00341
Train [92][290/3239]	Time 0.243 (0.600)	Data Time 0.001 (0.131)	Loss 2.2828 (2.2566)	Entropy 0.82438 (0.82481)	Top-1 acc 68.750 (70.165)	Top-5 acc 85.156 (87.746)	lr 0.00341
Train [92][300/3239]	Time 0.236 (0.596)	Data Time 0.001 (0.126)	Loss 2.3241 (2.2583)	Entropy 0.82428 (0.82480)	Top-1 acc 66.016 (70.119)	Top-5 acc 87.500 (87.725)	lr 0.00341
Train [92][310/3239]	Time 0.232 (0.769)	Data Time 0.002 (0.122)	Loss 2.2732 (2.2584)	Entropy 0.82426 (0.82478)	Top-1 acc 70.312 (70.107)	Top-5 acc 87.891 (87.726)	lr 0.00341
Train [92][320/3239]	Time 0.241 (0.761)	Data Time 0.002 (0.119)	Loss 2.2361 (2.2584)	Entropy 0.82421 (0.82477)	Top-1 acc 68.750 (70.078)	Top-5 acc 86.328 (87.728)	lr 0.00341
Train [92][330/3239]	Time 0.243 (0.753)	Data Time 0.002 (0.115)	Loss 2.2536 (2.2579)	Entropy 0.82416 (0.82475)	Top-1 acc 70.703 (70.086)	Top-5 acc 86.719 (87.728)	lr 0.00341
Train [92][340/3239]	Time 2.654 (0.745)	Data Time 0.002 (0.112)	Loss 2.3227 (2.2596)	Entropy 0.82416 (0.82473)	Top-1 acc 66.406 (70.038)	Top-5 acc 85.938 (87.712)	lr 0.00341
Train [92][350/3239]	Time 0.229 (0.731)	Data Time 0.001 (0.109)	Loss 2.0674 (2.2591)	Entropy 0.82421 (0.82472)	Top-1 acc 74.219 (70.048)	Top-5 acc 90.625 (87.743)	lr 0.00341
Train [92][360/3239]	Time 0.234 (0.724)	Data Time 0.001 (0.106)	Loss 2.2386 (2.2590)	Entropy 0.82423 (0.82470)	Top-1 acc 69.141 (70.057)	Top-5 acc 87.109 (87.727)	lr 0.00341
Train [92][370/3239]	Time 0.247 (0.718)	Data Time 0.002 (0.103)	Loss 2.2703 (2.2585)	Entropy 0.82414 (0.82469)	Top-1 acc 71.484 (70.083)	Top-5 acc 87.891 (87.747)	lr 0.00341
Train [92][380/3239]	Time 0.245 (0.712)	Data Time 0.002 (0.100)	Loss 2.2545 (2.2580)	Entropy 0.82395 (0.82467)	Top-1 acc 69.141 (70.100)	Top-5 acc 88.281 (87.749)	lr 0.00341
Train [92][390/3239]	Time 0.236 (0.706)	Data Time 0.001 (0.098)	Loss 2.1720 (2.2580)	Entropy 0.82389 (0.82465)	Top-1 acc 73.438 (70.095)	Top-5 acc 88.281 (87.755)	lr 0.00341
Train [92][400/3239]	Time 0.246 (0.700)	Data Time 0.001 (0.095)	Loss 2.3673 (2.2594)	Entropy 0.82380 (0.82463)	Top-1 acc 66.797 (70.052)	Top-5 acc 87.109 (87.742)	lr 0.00341
Train [92][410/3239]	Time 0.227 (0.695)	Data Time 0.001 (0.093)	Loss 2.2300 (2.2609)	Entropy 0.82375 (0.82461)	Top-1 acc 68.359 (70.002)	Top-5 acc 87.109 (87.696)	lr 0.00340
Train [92][420/3239]	Time 0.318 (0.690)	Data Time 0.001 (0.091)	Loss 2.2394 (2.2597)	Entropy 0.82383 (0.82459)	Top-1 acc 71.484 (70.047)	Top-5 acc 90.234 (87.731)	lr 0.00340
Train [92][430/3239]	Time 0.241 (0.685)	Data Time 0.002 (0.089)	Loss 2.3424 (2.2601)	Entropy 0.82381 (0.82457)	Top-1 acc 68.750 (70.046)	Top-5 acc 85.938 (87.718)	lr 0.00340
Train [92][440/3239]	Time 0.281 (0.680)	Data Time 0.001 (0.087)	Loss 2.1773 (2.2612)	Entropy 0.82377 (0.82456)	Top-1 acc 72.656 (70.010)	Top-5 acc 91.406 (87.712)	lr 0.00340
Train [92][450/3239]	Time 2.491 (0.675)	Data Time 0.001 (0.085)	Loss 2.2245 (2.2612)	Entropy 0.82377 (0.82454)	Top-1 acc 68.750 (70.000)	Top-5 acc 87.500 (87.703)	lr 0.00340
Train [92][460/3239]	Time 0.234 (0.666)	Data Time 0.001 (0.083)	Loss 2.3122 (2.2627)	Entropy 0.82370 (0.82452)	Top-1 acc 66.797 (69.963)	Top-5 acc 85.938 (87.679)	lr 0.00340
Train [92][470/3239]	Time 0.231 (0.662)	Data Time 0.001 (0.081)	Loss 2.1810 (2.2647)	Entropy 0.82372 (0.82450)	Top-1 acc 72.656 (69.921)	Top-5 acc 88.672 (87.646)	lr 0.00340
Train [92][480/3239]	Time 0.184 (0.659)	Data Time 0.001 (0.080)	Loss 2.2973 (2.2647)	Entropy 0.82372 (0.82449)	Top-1 acc 67.578 (69.908)	Top-5 acc 85.938 (87.648)	lr 0.00340
Train [92][490/3239]	Time 0.223 (0.655)	Data Time 0.001 (0.078)	Loss 2.2415 (2.2637)	Entropy 0.82370 (0.82447)	Top-1 acc 68.750 (69.923)	Top-5 acc 88.672 (87.665)	lr 0.00340
Train [92][500/3239]	Time 0.231 (0.651)	Data Time 0.002 (0.077)	Loss 2.4625 (2.2631)	Entropy 0.82369 (0.82445)	Top-1 acc 68.359 (69.945)	Top-5 acc 82.422 (87.666)	lr 0.00340
Train [92][510/3239]	Time 0.345 (0.648)	Data Time 0.001 (0.075)	Loss 2.2873 (2.2629)	Entropy 0.82366 (0.82444)	Top-1 acc 70.703 (69.959)	Top-5 acc 86.328 (87.665)	lr 0.00340
Train [92][520/3239]	Time 0.219 (0.645)	Data Time 0.001 (0.074)	Loss 2.3226 (2.2623)	Entropy 0.82362 (0.82442)	Top-1 acc 68.750 (69.971)	Top-5 acc 85.547 (87.680)	lr 0.00340
Train [92][530/3239]	Time 0.233 (0.642)	Data Time 0.001 (0.072)	Loss 2.0850 (2.2601)	Entropy 0.82351 (0.82441)	Top-1 acc 72.656 (70.027)	Top-5 acc 92.969 (87.715)	lr 0.00340
Train [92][540/3239]	Time 0.233 (0.639)	Data Time 0.001 (0.071)	Loss 2.2799 (2.2607)	Entropy 0.82338 (0.82439)	Top-1 acc 67.188 (70.018)	Top-5 acc 87.891 (87.700)	lr 0.00340
Train [92][550/3239]	Time 0.327 (0.636)	Data Time 0.001 (0.070)	Loss 2.4666 (2.2621)	Entropy 0.82331 (0.82437)	Top-1 acc 67.969 (69.988)	Top-5 acc 86.328 (87.681)	lr 0.00339
Train [92][560/3239]	Time 2.622 (0.633)	Data Time 0.001 (0.069)	Loss 2.2619 (2.2616)	Entropy 0.82331 (0.82435)	Top-1 acc 69.141 (69.996)	Top-5 acc 90.234 (87.705)	lr 0.00339
Train [92][570/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.067)	Loss 2.3858 (2.2623)	Entropy 0.82335 (0.82434)	Top-1 acc 69.922 (69.964)	Top-5 acc 85.938 (87.687)	lr 0.00339
Train [92][580/3239]	Time 0.231 (0.623)	Data Time 0.001 (0.066)	Loss 2.1869 (2.2624)	Entropy 0.82328 (0.82432)	Top-1 acc 69.531 (69.964)	Top-5 acc 88.672 (87.680)	lr 0.00339
Train [92][590/3239]	Time 0.265 (0.621)	Data Time 0.002 (0.065)	Loss 2.1330 (2.2615)	Entropy 0.82329 (0.82430)	Top-1 acc 71.484 (69.983)	Top-5 acc 90.234 (87.696)	lr 0.00339
Train [92][600/3239]	Time 0.232 (0.619)	Data Time 0.001 (0.064)	Loss 2.3064 (2.2614)	Entropy 0.82381 (0.82429)	Top-1 acc 67.578 (69.989)	Top-5 acc 87.891 (87.692)	lr 0.00339
Train [92][610/3239]	Time 0.234 (0.617)	Data Time 0.002 (0.063)	Loss 2.3367 (2.2610)	Entropy 0.82375 (0.82428)	Top-1 acc 66.406 (69.980)	Top-5 acc 87.500 (87.707)	lr 0.00339
Train [92][620/3239]	Time 0.234 (0.615)	Data Time 0.001 (0.062)	Loss 2.1827 (2.2601)	Entropy 0.82373 (0.82427)	Top-1 acc 74.219 (70.008)	Top-5 acc 87.500 (87.718)	lr 0.00339
Train [92][630/3239]	Time 0.238 (0.612)	Data Time 0.001 (0.061)	Loss 2.2476 (2.2604)	Entropy 0.82374 (0.82426)	Top-1 acc 71.094 (70.006)	Top-5 acc 87.891 (87.710)	lr 0.00339
Train [92][640/3239]	Time 0.241 (0.610)	Data Time 0.001 (0.060)	Loss 2.1447 (2.2610)	Entropy 0.82371 (0.82426)	Top-1 acc 73.438 (69.998)	Top-5 acc 89.844 (87.698)	lr 0.00339
Train [92][650/3239]	Time 0.227 (0.608)	Data Time 0.001 (0.059)	Loss 2.2066 (2.2615)	Entropy 0.82367 (0.82425)	Top-1 acc 70.312 (69.986)	Top-5 acc 89.844 (87.683)	lr 0.00339
Train [92][660/3239]	Time 0.254 (0.607)	Data Time 0.001 (0.058)	Loss 2.2770 (2.2631)	Entropy 0.82362 (0.82424)	Top-1 acc 67.969 (69.969)	Top-5 acc 86.719 (87.645)	lr 0.00339
Train [92][670/3239]	Time 52.632 (0.679)	Data Time 0.002 (0.058)	Loss 2.2628 (2.2627)	Entropy 0.82362 (0.82423)	Top-1 acc 68.359 (69.981)	Top-5 acc 87.500 (87.647)	lr 0.00339
Train [92][680/3239]	Time 0.566 (0.675)	Data Time 0.002 (0.057)	Loss 2.0458 (2.2622)	Entropy 0.82358 (0.82422)	Top-1 acc 72.266 (69.988)	Top-5 acc 93.359 (87.658)	lr 0.00339
Train [92][690/3239]	Time 0.232 (0.672)	Data Time 0.002 (0.056)	Loss 2.1727 (2.2619)	Entropy 0.82346 (0.82421)	Top-1 acc 73.047 (69.994)	Top-5 acc 91.016 (87.675)	lr 0.00338
Train [92][700/3239]	Time 0.251 (0.669)	Data Time 0.002 (0.055)	Loss 2.2092 (2.2624)	Entropy 0.82345 (0.82420)	Top-1 acc 69.141 (69.979)	Top-5 acc 88.672 (87.672)	lr 0.00338
Train [92][710/3239]	Time 0.224 (0.666)	Data Time 0.001 (0.054)	Loss 2.3076 (2.2633)	Entropy 0.82345 (0.82419)	Top-1 acc 67.578 (69.964)	Top-5 acc 85.938 (87.654)	lr 0.00338
Train [92][720/3239]	Time 0.331 (0.664)	Data Time 0.001 (0.054)	Loss 2.1672 (2.2633)	Entropy 0.82341 (0.82418)	Top-1 acc 72.266 (69.964)	Top-5 acc 89.453 (87.648)	lr 0.00338
Train [92][730/3239]	Time 0.253 (0.661)	Data Time 0.001 (0.053)	Loss 2.3218 (2.2633)	Entropy 0.82338 (0.82417)	Top-1 acc 70.312 (69.955)	Top-5 acc 86.719 (87.650)	lr 0.00338
Train [92][740/3239]	Time 0.217 (0.659)	Data Time 0.001 (0.052)	Loss 2.2294 (2.2632)	Entropy 0.82323 (0.82416)	Top-1 acc 69.922 (69.947)	Top-5 acc 88.672 (87.658)	lr 0.00338
Train [92][750/3239]	Time 0.225 (0.656)	Data Time 0.001 (0.052)	Loss 2.1668 (2.2627)	Entropy 0.82324 (0.82414)	Top-1 acc 71.875 (69.952)	Top-5 acc 91.016 (87.663)	lr 0.00338
Train [92][760/3239]	Time 0.261 (0.654)	Data Time 0.001 (0.051)	Loss 2.3178 (2.2626)	Entropy 0.82320 (0.82413)	Top-1 acc 67.188 (69.952)	Top-5 acc 86.719 (87.661)	lr 0.00338
Train [92][770/3239]	Time 0.229 (0.652)	Data Time 0.001 (0.050)	Loss 2.2779 (2.2627)	Entropy 0.82322 (0.82412)	Top-1 acc 71.094 (69.952)	Top-5 acc 87.109 (87.655)	lr 0.00338
Train [92][780/3239]	Time 2.580 (0.650)	Data Time 0.001 (0.050)	Loss 2.1474 (2.2632)	Entropy 0.82322 (0.82411)	Top-1 acc 74.609 (69.947)	Top-5 acc 91.016 (87.646)	lr 0.00338
Train [92][790/3239]	Time 0.235 (0.645)	Data Time 0.001 (0.049)	Loss 2.2176 (2.2631)	Entropy 0.82319 (0.82410)	Top-1 acc 69.922 (69.948)	Top-5 acc 90.234 (87.649)	lr 0.00338
Train [92][800/3239]	Time 0.241 (0.643)	Data Time 0.001 (0.049)	Loss 2.2477 (2.2630)	Entropy 0.82320 (0.82408)	Top-1 acc 69.531 (69.955)	Top-5 acc 87.891 (87.648)	lr 0.00338
Train [92][810/3239]	Time 0.325 (0.641)	Data Time 0.001 (0.048)	Loss 2.3638 (2.2621)	Entropy 0.82319 (0.82407)	Top-1 acc 65.234 (69.966)	Top-5 acc 83.984 (87.656)	lr 0.00338
Train [92][820/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.047)	Loss 2.0862 (2.2628)	Entropy 0.82314 (0.82406)	Top-1 acc 75.781 (69.946)	Top-5 acc 89.844 (87.642)	lr 0.00338
Train [92][830/3239]	Time 0.228 (0.637)	Data Time 0.001 (0.047)	Loss 2.3787 (2.2635)	Entropy 0.82309 (0.82405)	Top-1 acc 65.234 (69.932)	Top-5 acc 87.109 (87.633)	lr 0.00338
Train [92][840/3239]	Time 0.223 (0.634)	Data Time 0.001 (0.046)	Loss 2.4214 (2.2640)	Entropy 0.82302 (0.82404)	Top-1 acc 68.750 (69.918)	Top-5 acc 84.766 (87.630)	lr 0.00337
Train [92][850/3239]	Time 0.307 (0.633)	Data Time 0.001 (0.046)	Loss 2.2773 (2.2642)	Entropy 0.82301 (0.82403)	Top-1 acc 67.578 (69.908)	Top-5 acc 84.766 (87.628)	lr 0.00337
Train [92][860/3239]	Time 0.217 (0.631)	Data Time 0.001 (0.045)	Loss 2.4214 (2.2647)	Entropy 0.82302 (0.82402)	Top-1 acc 64.453 (69.892)	Top-5 acc 83.984 (87.614)	lr 0.00337
Train [92][870/3239]	Time 0.228 (0.629)	Data Time 0.002 (0.045)	Loss 2.2704 (2.2648)	Entropy 0.82301 (0.82400)	Top-1 acc 67.578 (69.890)	Top-5 acc 88.281 (87.617)	lr 0.00337
Train [92][880/3239]	Time 0.219 (0.628)	Data Time 0.001 (0.044)	Loss 2.0595 (2.2648)	Entropy 0.82299 (0.82399)	Top-1 acc 71.484 (69.892)	Top-5 acc 90.625 (87.615)	lr 0.00337
Train [92][890/3239]	Time 2.640 (0.626)	Data Time 0.002 (0.044)	Loss 2.2589 (2.2645)	Entropy 0.82299 (0.82398)	Top-1 acc 66.797 (69.899)	Top-5 acc 84.766 (87.616)	lr 0.00337
Train [92][900/3239]	Time 0.275 (0.622)	Data Time 0.002 (0.043)	Loss 2.2501 (2.2643)	Entropy 0.82287 (0.82397)	Top-1 acc 71.094 (69.899)	Top-5 acc 89.453 (87.616)	lr 0.00337
Train [92][910/3239]	Time 0.277 (0.621)	Data Time 0.001 (0.043)	Loss 2.5340 (2.2643)	Entropy 0.82289 (0.82396)	Top-1 acc 66.016 (69.899)	Top-5 acc 82.031 (87.618)	lr 0.00337
Train [92][920/3239]	Time 0.247 (0.619)	Data Time 0.001 (0.042)	Loss 2.2291 (2.2639)	Entropy 0.82288 (0.82395)	Top-1 acc 71.484 (69.918)	Top-5 acc 85.938 (87.617)	lr 0.00337
Train [92][930/3239]	Time 0.242 (0.618)	Data Time 0.001 (0.042)	Loss 2.1602 (2.2635)	Entropy 0.82285 (0.82393)	Top-1 acc 73.438 (69.927)	Top-5 acc 89.453 (87.623)	lr 0.00337
Train [92][940/3239]	Time 0.235 (0.616)	Data Time 0.001 (0.042)	Loss 2.1826 (2.2637)	Entropy 0.82274 (0.82392)	Top-1 acc 71.875 (69.924)	Top-5 acc 87.500 (87.617)	lr 0.00337
Train [92][950/3239]	Time 0.232 (0.615)	Data Time 0.001 (0.041)	Loss 2.1260 (2.2637)	Entropy 0.82272 (0.82391)	Top-1 acc 73.828 (69.921)	Top-5 acc 89.453 (87.615)	lr 0.00337
Train [92][960/3239]	Time 0.209 (0.613)	Data Time 0.001 (0.041)	Loss 2.3023 (2.2636)	Entropy 0.82274 (0.82390)	Top-1 acc 70.703 (69.921)	Top-5 acc 84.375 (87.609)	lr 0.00337
Train [92][970/3239]	Time 0.238 (0.612)	Data Time 0.001 (0.040)	Loss 2.2575 (2.2632)	Entropy 0.82267 (0.82388)	Top-1 acc 66.797 (69.925)	Top-5 acc 88.672 (87.616)	lr 0.00337
Train [92][980/3239]	Time 0.325 (0.610)	Data Time 0.001 (0.040)	Loss 2.1777 (2.2634)	Entropy 0.82265 (0.82387)	Top-1 acc 73.047 (69.916)	Top-5 acc 87.500 (87.611)	lr 0.00336
Train [92][990/3239]	Time 0.262 (0.609)	Data Time 0.001 (0.040)	Loss 2.3501 (2.2630)	Entropy 0.82263 (0.82386)	Top-1 acc 65.234 (69.912)	Top-5 acc 87.109 (87.624)	lr 0.00336
Train [92][1000/3239]	Time 2.513 (0.608)	Data Time 0.002 (0.039)	Loss 2.2875 (2.2630)	Entropy 0.82263 (0.82385)	Top-1 acc 69.922 (69.915)	Top-5 acc 84.375 (87.621)	lr 0.00336
Train [92][1010/3239]	Time 0.239 (0.604)	Data Time 0.002 (0.039)	Loss 2.3559 (2.2627)	Entropy 0.82265 (0.82384)	Top-1 acc 72.656 (69.932)	Top-5 acc 85.938 (87.623)	lr 0.00336
Train [92][1020/3239]	Time 0.243 (0.603)	Data Time 0.002 (0.038)	Loss 2.2308 (2.2627)	Entropy 0.82262 (0.82382)	Top-1 acc 69.922 (69.939)	Top-5 acc 88.672 (87.622)	lr 0.00336
Train [92][1030/3239]	Time 0.217 (0.602)	Data Time 0.001 (0.038)	Loss 2.4314 (2.2626)	Entropy 0.82272 (0.82381)	Top-1 acc 67.188 (69.943)	Top-5 acc 83.984 (87.617)	lr 0.00336
Train [92][1040/3239]	Time 0.294 (0.649)	Data Time 0.002 (0.038)	Loss 2.1908 (2.2628)	Entropy 0.82271 (0.82380)	Top-1 acc 70.312 (69.940)	Top-5 acc 89.453 (87.613)	lr 0.00336
Train [92][1050/3239]	Time 0.241 (0.648)	Data Time 0.002 (0.037)	Loss 2.3327 (2.2629)	Entropy 0.82274 (0.82379)	Top-1 acc 66.797 (69.933)	Top-5 acc 84.766 (87.610)	lr 0.00336
Train [92][1060/3239]	Time 0.237 (0.647)	Data Time 0.001 (0.037)	Loss 2.1257 (2.2631)	Entropy 0.82269 (0.82378)	Top-1 acc 70.703 (69.923)	Top-5 acc 90.234 (87.608)	lr 0.00336
Train [92][1070/3239]	Time 0.244 (0.645)	Data Time 0.001 (0.037)	Loss 2.3441 (2.2632)	Entropy 0.82268 (0.82377)	Top-1 acc 69.922 (69.917)	Top-5 acc 85.156 (87.607)	lr 0.00336
Train [92][1080/3239]	Time 0.230 (0.644)	Data Time 0.001 (0.036)	Loss 2.1616 (2.2631)	Entropy 0.82258 (0.82376)	Top-1 acc 71.094 (69.921)	Top-5 acc 89.062 (87.602)	lr 0.00336
Train [92][1090/3239]	Time 0.228 (0.642)	Data Time 0.001 (0.036)	Loss 2.2587 (2.2630)	Entropy 0.82250 (0.82375)	Top-1 acc 69.531 (69.926)	Top-5 acc 89.062 (87.606)	lr 0.00336
Train [92][1100/3239]	Time 0.253 (0.641)	Data Time 0.001 (0.036)	Loss 2.4491 (2.2631)	Entropy 0.82248 (0.82374)	Top-1 acc 64.844 (69.924)	Top-5 acc 83.594 (87.602)	lr 0.00336
Train [92][1110/3239]	Time 2.614 (0.639)	Data Time 0.001 (0.036)	Loss 2.3005 (2.2629)	Entropy 0.82248 (0.82373)	Top-1 acc 69.922 (69.929)	Top-5 acc 87.500 (87.604)	lr 0.00336
Train [92][1120/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.035)	Loss 2.2857 (2.2630)	Entropy 0.82245 (0.82372)	Top-1 acc 68.359 (69.924)	Top-5 acc 87.500 (87.603)	lr 0.00336
Train [92][1130/3239]	Time 0.225 (0.634)	Data Time 0.001 (0.035)	Loss 2.0999 (2.2628)	Entropy 0.82241 (0.82371)	Top-1 acc 75.000 (69.932)	Top-5 acc 90.625 (87.609)	lr 0.00335
Train [92][1140/3239]	Time 0.229 (0.633)	Data Time 0.001 (0.035)	Loss 2.3349 (2.2628)	Entropy 0.82239 (0.82369)	Top-1 acc 72.656 (69.928)	Top-5 acc 85.938 (87.612)	lr 0.00335
Train [92][1150/3239]	Time 0.217 (0.632)	Data Time 0.001 (0.034)	Loss 2.3386 (2.2625)	Entropy 0.82243 (0.82368)	Top-1 acc 66.406 (69.936)	Top-5 acc 86.328 (87.616)	lr 0.00335
Train [92][1160/3239]	Time 0.238 (0.630)	Data Time 0.001 (0.034)	Loss 2.3446 (2.2623)	Entropy 0.82230 (0.82367)	Top-1 acc 69.531 (69.952)	Top-5 acc 87.500 (87.622)	lr 0.00335
Train [92][1170/3239]	Time 0.232 (0.629)	Data Time 0.001 (0.034)	Loss 2.1674 (2.2624)	Entropy 0.82227 (0.82366)	Top-1 acc 70.312 (69.946)	Top-5 acc 91.797 (87.623)	lr 0.00335
Train [92][1180/3239]	Time 0.236 (0.628)	Data Time 0.001 (0.034)	Loss 2.2394 (2.2621)	Entropy 0.82228 (0.82365)	Top-1 acc 71.875 (69.948)	Top-5 acc 90.234 (87.630)	lr 0.00335
Train [92][1190/3239]	Time 0.220 (0.626)	Data Time 0.001 (0.033)	Loss 2.2920 (2.2627)	Entropy 0.82215 (0.82364)	Top-1 acc 68.750 (69.935)	Top-5 acc 86.719 (87.614)	lr 0.00335
Train [92][1200/3239]	Time 0.245 (0.625)	Data Time 0.001 (0.033)	Loss 2.2179 (2.2624)	Entropy 0.82193 (0.82362)	Top-1 acc 70.312 (69.950)	Top-5 acc 89.062 (87.621)	lr 0.00335
Train [92][1210/3239]	Time 0.223 (0.624)	Data Time 0.001 (0.033)	Loss 2.3210 (2.2623)	Entropy 0.82191 (0.82361)	Top-1 acc 69.922 (69.943)	Top-5 acc 87.500 (87.628)	lr 0.00335
Train [92][1220/3239]	Time 2.524 (0.623)	Data Time 0.001 (0.032)	Loss 2.2308 (2.2626)	Entropy 0.82191 (0.82360)	Top-1 acc 72.266 (69.940)	Top-5 acc 87.109 (87.618)	lr 0.00335
Train [92][1230/3239]	Time 0.224 (0.620)	Data Time 0.001 (0.032)	Loss 2.2966 (2.2625)	Entropy 0.82192 (0.82358)	Top-1 acc 69.141 (69.940)	Top-5 acc 88.281 (87.618)	lr 0.00335
Train [92][1240/3239]	Time 0.234 (0.619)	Data Time 0.001 (0.032)	Loss 2.4809 (2.2629)	Entropy 0.82188 (0.82357)	Top-1 acc 65.625 (69.935)	Top-5 acc 83.984 (87.612)	lr 0.00335
Train [92][1250/3239]	Time 0.245 (0.618)	Data Time 0.001 (0.032)	Loss 2.2529 (2.2633)	Entropy 0.82182 (0.82355)	Top-1 acc 70.703 (69.933)	Top-5 acc 87.500 (87.610)	lr 0.00335
Train [92][1260/3239]	Time 0.246 (0.616)	Data Time 0.001 (0.032)	Loss 2.4323 (2.2632)	Entropy 0.82183 (0.82354)	Top-1 acc 64.062 (69.923)	Top-5 acc 85.938 (87.612)	lr 0.00335
Train [92][1270/3239]	Time 0.218 (0.615)	Data Time 0.001 (0.031)	Loss 2.2932 (2.2627)	Entropy 0.82181 (0.82353)	Top-1 acc 71.875 (69.938)	Top-5 acc 86.328 (87.623)	lr 0.00335
Train [92][1280/3239]	Time 0.223 (0.614)	Data Time 0.001 (0.031)	Loss 2.0730 (2.2631)	Entropy 0.82182 (0.82351)	Top-1 acc 75.781 (69.930)	Top-5 acc 90.234 (87.609)	lr 0.00334
Train [92][1290/3239]	Time 0.231 (0.613)	Data Time 0.001 (0.031)	Loss 2.2077 (2.2634)	Entropy 0.82181 (0.82350)	Top-1 acc 71.094 (69.928)	Top-5 acc 88.672 (87.606)	lr 0.00334
Train [92][1300/3239]	Time 0.234 (0.612)	Data Time 0.001 (0.031)	Loss 2.2477 (2.2634)	Entropy 0.82179 (0.82349)	Top-1 acc 69.141 (69.932)	Top-5 acc 87.109 (87.603)	lr 0.00334
Train [92][1310/3239]	Time 0.232 (0.611)	Data Time 0.001 (0.030)	Loss 2.2475 (2.2636)	Entropy 0.82173 (0.82347)	Top-1 acc 68.359 (69.925)	Top-5 acc 88.281 (87.603)	lr 0.00334
Train [92][1320/3239]	Time 0.323 (0.610)	Data Time 0.001 (0.030)	Loss 2.2672 (2.2638)	Entropy 0.82164 (0.82346)	Top-1 acc 68.750 (69.924)	Top-5 acc 88.672 (87.598)	lr 0.00334
Train [92][1330/3239]	Time 2.643 (0.609)	Data Time 0.001 (0.030)	Loss 2.2521 (2.2638)	Entropy 0.82164 (0.82345)	Top-1 acc 71.094 (69.922)	Top-5 acc 89.062 (87.603)	lr 0.00334
Train [92][1340/3239]	Time 0.226 (0.607)	Data Time 0.001 (0.030)	Loss 2.2548 (2.2639)	Entropy 0.82153 (0.82343)	Top-1 acc 67.578 (69.918)	Top-5 acc 89.062 (87.604)	lr 0.00334
Train [92][1350/3239]	Time 0.231 (0.606)	Data Time 0.001 (0.030)	Loss 2.2186 (2.2640)	Entropy 0.82128 (0.82342)	Top-1 acc 70.312 (69.916)	Top-5 acc 86.719 (87.602)	lr 0.00334
Train [92][1360/3239]	Time 0.228 (0.605)	Data Time 0.001 (0.029)	Loss 2.2428 (2.2642)	Entropy 0.82125 (0.82340)	Top-1 acc 71.094 (69.914)	Top-5 acc 87.109 (87.597)	lr 0.00334
Train [92][1370/3239]	Time 0.236 (0.604)	Data Time 0.001 (0.029)	Loss 2.2520 (2.2642)	Entropy 0.82121 (0.82339)	Top-1 acc 71.875 (69.914)	Top-5 acc 89.062 (87.598)	lr 0.00334
Train [92][1380/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.029)	Loss 2.3485 (2.2644)	Entropy 0.82115 (0.82337)	Top-1 acc 68.750 (69.916)	Top-5 acc 86.328 (87.597)	lr 0.00334
Train [92][1390/3239]	Time 0.239 (0.602)	Data Time 0.002 (0.029)	Loss 2.1459 (2.2642)	Entropy 0.82116 (0.82335)	Top-1 acc 73.828 (69.928)	Top-5 acc 90.234 (87.599)	lr 0.00334
Train [92][1400/3239]	Time 0.253 (0.640)	Data Time 0.002 (0.029)	Loss 2.3277 (2.2643)	Entropy 0.82108 (0.82334)	Top-1 acc 69.141 (69.925)	Top-5 acc 85.156 (87.598)	lr 0.00334
Train [92][1410/3239]	Time 0.232 (0.639)	Data Time 0.002 (0.028)	Loss 2.2620 (2.2641)	Entropy 0.82112 (0.82332)	Top-1 acc 67.969 (69.932)	Top-5 acc 86.719 (87.600)	lr 0.00334
Train [92][1420/3239]	Time 0.228 (0.638)	Data Time 0.001 (0.028)	Loss 2.4422 (2.2640)	Entropy 0.82107 (0.82331)	Top-1 acc 66.406 (69.938)	Top-5 acc 85.156 (87.600)	lr 0.00333
Train [92][1430/3239]	Time 0.243 (0.637)	Data Time 0.002 (0.028)	Loss 2.3121 (2.2644)	Entropy 0.82099 (0.82329)	Top-1 acc 68.359 (69.927)	Top-5 acc 84.766 (87.592)	lr 0.00333
Train [92][1440/3239]	Time 2.533 (0.636)	Data Time 0.003 (0.028)	Loss 2.2020 (2.2644)	Entropy 0.82099 (0.82327)	Top-1 acc 71.094 (69.926)	Top-5 acc 89.062 (87.591)	lr 0.00333
Train [92][1450/3239]	Time 0.328 (0.633)	Data Time 0.001 (0.028)	Loss 2.3342 (2.2647)	Entropy 0.82092 (0.82326)	Top-1 acc 65.234 (69.918)	Top-5 acc 85.938 (87.579)	lr 0.00333
Train [92][1460/3239]	Time 0.217 (0.632)	Data Time 0.001 (0.027)	Loss 2.3816 (2.2645)	Entropy 0.82086 (0.82324)	Top-1 acc 68.359 (69.923)	Top-5 acc 85.938 (87.584)	lr 0.00333
Train [92][1470/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.027)	Loss 2.4162 (2.2648)	Entropy 0.82079 (0.82323)	Top-1 acc 69.531 (69.918)	Top-5 acc 82.422 (87.578)	lr 0.00333
Train [92][1480/3239]	Time 0.241 (0.630)	Data Time 0.001 (0.027)	Loss 2.2251 (2.2648)	Entropy 0.82066 (0.82321)	Top-1 acc 75.000 (69.916)	Top-5 acc 87.109 (87.576)	lr 0.00333
Train [92][1490/3239]	Time 0.325 (0.629)	Data Time 0.001 (0.027)	Loss 2.2430 (2.2647)	Entropy 0.82066 (0.82319)	Top-1 acc 70.703 (69.909)	Top-5 acc 87.500 (87.578)	lr 0.00333
Train [92][1500/3239]	Time 0.237 (0.628)	Data Time 0.001 (0.027)	Loss 2.2472 (2.2649)	Entropy 0.82061 (0.82317)	Top-1 acc 70.312 (69.902)	Top-5 acc 87.891 (87.577)	lr 0.00333
Train [92][1510/3239]	Time 0.254 (0.627)	Data Time 0.001 (0.027)	Loss 2.2913 (2.2648)	Entropy 0.82057 (0.82316)	Top-1 acc 68.359 (69.906)	Top-5 acc 86.719 (87.579)	lr 0.00333
Train [92][1520/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.026)	Loss 2.1846 (2.2646)	Entropy 0.82058 (0.82314)	Top-1 acc 70.703 (69.909)	Top-5 acc 88.281 (87.581)	lr 0.00333
Train [92][1530/3239]	Time 0.232 (0.625)	Data Time 0.001 (0.026)	Loss 2.1169 (2.2646)	Entropy 0.82057 (0.82312)	Top-1 acc 75.391 (69.911)	Top-5 acc 90.234 (87.583)	lr 0.00333
Train [92][1540/3239]	Time 0.230 (0.624)	Data Time 0.001 (0.026)	Loss 2.2038 (2.2646)	Entropy 0.82060 (0.82311)	Top-1 acc 67.578 (69.906)	Top-5 acc 89.062 (87.581)	lr 0.00333
Train [92][1550/3239]	Time 2.576 (0.623)	Data Time 0.001 (0.026)	Loss 2.2309 (2.2646)	Entropy 0.82060 (0.82309)	Top-1 acc 70.703 (69.908)	Top-5 acc 87.109 (87.581)	lr 0.00333
Train [92][1560/3239]	Time 0.227 (0.621)	Data Time 0.001 (0.026)	Loss 2.2735 (2.2646)	Entropy 0.82053 (0.82307)	Top-1 acc 66.016 (69.906)	Top-5 acc 86.719 (87.579)	lr 0.00333
Train [92][1570/3239]	Time 0.255 (0.620)	Data Time 0.002 (0.026)	Loss 2.1662 (2.2644)	Entropy 0.82050 (0.82306)	Top-1 acc 73.438 (69.909)	Top-5 acc 87.500 (87.582)	lr 0.00332
Train [92][1580/3239]	Time 0.244 (0.619)	Data Time 0.001 (0.025)	Loss 2.2470 (2.2647)	Entropy 0.82050 (0.82304)	Top-1 acc 69.141 (69.905)	Top-5 acc 87.891 (87.578)	lr 0.00332
Train [92][1590/3239]	Time 0.243 (0.618)	Data Time 0.001 (0.025)	Loss 2.2711 (2.2647)	Entropy 0.82048 (0.82303)	Top-1 acc 69.531 (69.905)	Top-5 acc 86.719 (87.575)	lr 0.00332
Train [92][1600/3239]	Time 0.233 (0.617)	Data Time 0.001 (0.025)	Loss 2.1764 (2.2651)	Entropy 0.82049 (0.82301)	Top-1 acc 71.875 (69.895)	Top-5 acc 89.062 (87.567)	lr 0.00332
Train [92][1610/3239]	Time 0.224 (0.616)	Data Time 0.001 (0.025)	Loss 2.3020 (2.2650)	Entropy 0.82048 (0.82299)	Top-1 acc 67.969 (69.898)	Top-5 acc 88.281 (87.569)	lr 0.00332
Train [92][1620/3239]	Time 0.217 (0.615)	Data Time 0.001 (0.025)	Loss 2.2637 (2.2651)	Entropy 0.82028 (0.82298)	Top-1 acc 68.359 (69.890)	Top-5 acc 89.062 (87.563)	lr 0.00332
Train [92][1630/3239]	Time 0.222 (0.615)	Data Time 0.001 (0.025)	Loss 2.2803 (2.2654)	Entropy 0.82019 (0.82296)	Top-1 acc 71.484 (69.887)	Top-5 acc 86.328 (87.557)	lr 0.00332
Train [92][1640/3239]	Time 0.225 (0.614)	Data Time 0.001 (0.025)	Loss 2.1850 (2.2649)	Entropy 0.82007 (0.82294)	Top-1 acc 72.656 (69.895)	Top-5 acc 88.672 (87.567)	lr 0.00332
Train [92][1650/3239]	Time 0.272 (0.613)	Data Time 0.001 (0.024)	Loss 2.1992 (2.2649)	Entropy 0.82005 (0.82293)	Top-1 acc 70.312 (69.892)	Top-5 acc 89.062 (87.571)	lr 0.00332
Train [92][1660/3239]	Time 2.698 (0.612)	Data Time 0.001 (0.024)	Loss 2.3327 (2.2649)	Entropy 0.82005 (0.82291)	Top-1 acc 68.750 (69.889)	Top-5 acc 87.891 (87.572)	lr 0.00332
Train [92][1670/3239]	Time 0.237 (0.610)	Data Time 0.001 (0.024)	Loss 2.4227 (2.2650)	Entropy 0.82003 (0.82289)	Top-1 acc 64.844 (69.888)	Top-5 acc 81.250 (87.570)	lr 0.00332
Train [92][1680/3239]	Time 0.275 (0.609)	Data Time 0.001 (0.024)	Loss 2.1135 (2.2651)	Entropy 0.82006 (0.82288)	Top-1 acc 72.266 (69.883)	Top-5 acc 89.062 (87.569)	lr 0.00332
Train [92][1690/3239]	Time 0.242 (0.609)	Data Time 0.001 (0.024)	Loss 2.2821 (2.2651)	Entropy 0.81999 (0.82286)	Top-1 acc 64.453 (69.887)	Top-5 acc 89.062 (87.570)	lr 0.00332
Train [92][1700/3239]	Time 0.343 (0.608)	Data Time 0.001 (0.024)	Loss 2.4655 (2.2652)	Entropy 0.81991 (0.82284)	Top-1 acc 65.625 (69.888)	Top-5 acc 85.547 (87.569)	lr 0.00332
Train [92][1710/3239]	Time 0.254 (0.607)	Data Time 0.001 (0.024)	Loss 2.1229 (2.2652)	Entropy 0.81989 (0.82282)	Top-1 acc 74.219 (69.892)	Top-5 acc 88.672 (87.565)	lr 0.00331
Train [92][1720/3239]	Time 0.235 (0.606)	Data Time 0.001 (0.023)	Loss 2.2766 (2.2653)	Entropy 0.81989 (0.82281)	Top-1 acc 69.531 (69.887)	Top-5 acc 88.672 (87.561)	lr 0.00331
Train [92][1730/3239]	Time 0.205 (0.606)	Data Time 0.001 (0.023)	Loss 2.3696 (2.2655)	Entropy 0.82002 (0.82279)	Top-1 acc 68.750 (69.883)	Top-5 acc 85.938 (87.554)	lr 0.00331
Train [92][1740/3239]	Time 0.229 (0.605)	Data Time 0.001 (0.023)	Loss 2.3919 (2.2659)	Entropy 0.82003 (0.82277)	Top-1 acc 68.359 (69.874)	Top-5 acc 85.938 (87.549)	lr 0.00331
Train [92][1750/3239]	Time 0.217 (0.604)	Data Time 0.001 (0.023)	Loss 2.2823 (2.2657)	Entropy 0.81998 (0.82276)	Top-1 acc 69.922 (69.881)	Top-5 acc 87.891 (87.547)	lr 0.00331
Train [92][1760/3239]	Time 0.348 (0.632)	Data Time 0.004 (0.023)	Loss 2.0297 (2.2658)	Entropy 0.81998 (0.82274)	Top-1 acc 74.219 (69.877)	Top-5 acc 92.188 (87.549)	lr 0.00331
Train [92][1770/3239]	Time 2.679 (0.632)	Data Time 0.002 (0.023)	Loss 2.1190 (2.2659)	Entropy 0.81998 (0.82273)	Top-1 acc 73.438 (69.871)	Top-5 acc 89.062 (87.544)	lr 0.00331
Train [92][1780/3239]	Time 0.239 (0.630)	Data Time 0.001 (0.023)	Loss 2.1518 (2.2658)	Entropy 0.82004 (0.82271)	Top-1 acc 71.094 (69.870)	Top-5 acc 88.672 (87.545)	lr 0.00331
Train [92][1790/3239]	Time 0.221 (0.629)	Data Time 0.001 (0.023)	Loss 2.1710 (2.2657)	Entropy 0.81998 (0.82270)	Top-1 acc 73.047 (69.873)	Top-5 acc 88.281 (87.541)	lr 0.00331
Train [92][1800/3239]	Time 0.238 (0.628)	Data Time 0.001 (0.023)	Loss 2.2375 (2.2661)	Entropy 0.81996 (0.82268)	Top-1 acc 74.219 (69.862)	Top-5 acc 85.547 (87.538)	lr 0.00331
Train [92][1810/3239]	Time 0.254 (0.628)	Data Time 0.002 (0.022)	Loss 2.2477 (2.2662)	Entropy 0.81997 (0.82267)	Top-1 acc 72.656 (69.859)	Top-5 acc 87.109 (87.536)	lr 0.00331
Train [92][1820/3239]	Time 0.226 (0.627)	Data Time 0.001 (0.022)	Loss 2.2117 (2.2662)	Entropy 0.81993 (0.82265)	Top-1 acc 71.484 (69.857)	Top-5 acc 89.062 (87.536)	lr 0.00331
Train [92][1830/3239]	Time 0.241 (0.626)	Data Time 0.001 (0.022)	Loss 2.2500 (2.2666)	Entropy 0.81982 (0.82264)	Top-1 acc 71.484 (69.847)	Top-5 acc 89.062 (87.531)	lr 0.00331
Train [92][1840/3239]	Time 0.227 (0.625)	Data Time 0.001 (0.022)	Loss 2.3164 (2.2664)	Entropy 0.81990 (0.82262)	Top-1 acc 67.578 (69.851)	Top-5 acc 85.547 (87.536)	lr 0.00331
Train [92][1850/3239]	Time 0.240 (0.625)	Data Time 0.002 (0.022)	Loss 2.2295 (2.2663)	Entropy 0.81990 (0.82261)	Top-1 acc 71.875 (69.855)	Top-5 acc 85.938 (87.534)	lr 0.00331
Train [92][1860/3239]	Time 0.237 (0.624)	Data Time 0.001 (0.022)	Loss 2.2933 (2.2662)	Entropy 0.81993 (0.82259)	Top-1 acc 69.141 (69.856)	Top-5 acc 87.500 (87.537)	lr 0.00330
Train [92][1870/3239]	Time 0.319 (0.623)	Data Time 0.001 (0.022)	Loss 2.3306 (2.2663)	Entropy 0.81993 (0.82258)	Top-1 acc 70.312 (69.850)	Top-5 acc 87.109 (87.535)	lr 0.00330
Train [92][1880/3239]	Time 2.641 (0.622)	Data Time 0.001 (0.022)	Loss 2.2651 (2.2666)	Entropy 0.81993 (0.82256)	Top-1 acc 66.016 (69.841)	Top-5 acc 89.453 (87.532)	lr 0.00330
Train [92][1890/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.022)	Loss 2.2111 (2.2665)	Entropy 0.81992 (0.82255)	Top-1 acc 72.266 (69.842)	Top-5 acc 87.109 (87.535)	lr 0.00330
Train [92][1900/3239]	Time 0.248 (0.620)	Data Time 0.001 (0.021)	Loss 2.1537 (2.2662)	Entropy 0.81991 (0.82254)	Top-1 acc 72.266 (69.856)	Top-5 acc 89.844 (87.539)	lr 0.00330
Train [92][1910/3239]	Time 0.342 (0.619)	Data Time 0.001 (0.021)	Loss 2.4861 (2.2664)	Entropy 0.81989 (0.82252)	Top-1 acc 63.281 (69.853)	Top-5 acc 85.156 (87.535)	lr 0.00330
Train [92][1920/3239]	Time 0.242 (0.618)	Data Time 0.001 (0.021)	Loss 2.2881 (2.2666)	Entropy 0.81991 (0.82251)	Top-1 acc 68.750 (69.845)	Top-5 acc 88.672 (87.534)	lr 0.00330
Train [92][1930/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.021)	Loss 2.1649 (2.2665)	Entropy 0.81991 (0.82250)	Top-1 acc 75.000 (69.848)	Top-5 acc 89.453 (87.537)	lr 0.00330
Train [92][1940/3239]	Time 0.225 (0.617)	Data Time 0.001 (0.021)	Loss 2.2602 (2.2665)	Entropy 0.81991 (0.82248)	Top-1 acc 71.484 (69.844)	Top-5 acc 87.891 (87.538)	lr 0.00330
Train [92][1950/3239]	Time 0.336 (0.616)	Data Time 0.001 (0.021)	Loss 2.1877 (2.2664)	Entropy 0.81965 (0.82247)	Top-1 acc 72.266 (69.846)	Top-5 acc 89.062 (87.539)	lr 0.00330
Train [92][1960/3239]	Time 0.220 (0.616)	Data Time 0.001 (0.021)	Loss 2.2176 (2.2662)	Entropy 0.81967 (0.82245)	Top-1 acc 66.797 (69.846)	Top-5 acc 90.234 (87.549)	lr 0.00330
Train [92][1970/3239]	Time 0.225 (0.615)	Data Time 0.001 (0.021)	Loss 2.1971 (2.2661)	Entropy 0.81967 (0.82244)	Top-1 acc 70.312 (69.850)	Top-5 acc 90.234 (87.550)	lr 0.00330
Train [92][1980/3239]	Time 0.262 (0.614)	Data Time 0.001 (0.021)	Loss 2.2505 (2.2660)	Entropy 0.81959 (0.82243)	Top-1 acc 71.484 (69.851)	Top-5 acc 87.891 (87.553)	lr 0.00330
Train [92][1990/3239]	Time 2.556 (0.614)	Data Time 0.002 (0.021)	Loss 2.2374 (2.2659)	Entropy 0.81959 (0.82241)	Top-1 acc 72.656 (69.853)	Top-5 acc 90.234 (87.558)	lr 0.00330
Train [92][2000/3239]	Time 0.228 (0.612)	Data Time 0.001 (0.020)	Loss 2.2154 (2.2657)	Entropy 0.81950 (0.82240)	Top-1 acc 69.922 (69.858)	Top-5 acc 89.453 (87.567)	lr 0.00329
Train [92][2010/3239]	Time 0.225 (0.611)	Data Time 0.001 (0.020)	Loss 2.2747 (2.2659)	Entropy 0.81948 (0.82238)	Top-1 acc 69.922 (69.853)	Top-5 acc 87.891 (87.566)	lr 0.00329
Train [92][2020/3239]	Time 0.241 (0.610)	Data Time 0.002 (0.020)	Loss 2.3188 (2.2661)	Entropy 0.81943 (0.82237)	Top-1 acc 66.016 (69.849)	Top-5 acc 87.500 (87.563)	lr 0.00329
Train [92][2030/3239]	Time 0.242 (0.610)	Data Time 0.001 (0.020)	Loss 2.3334 (2.2661)	Entropy 0.81935 (0.82235)	Top-1 acc 67.969 (69.848)	Top-5 acc 85.547 (87.562)	lr 0.00329
Train [92][2040/3239]	Time 0.238 (0.609)	Data Time 0.001 (0.020)	Loss 2.3140 (2.2665)	Entropy 0.81937 (0.82234)	Top-1 acc 70.312 (69.842)	Top-5 acc 86.328 (87.553)	lr 0.00329
Train [92][2050/3239]	Time 0.242 (0.608)	Data Time 0.001 (0.020)	Loss 2.2699 (2.2666)	Entropy 0.81931 (0.82232)	Top-1 acc 68.750 (69.840)	Top-5 acc 87.109 (87.550)	lr 0.00329
Train [92][2060/3239]	Time 0.235 (0.608)	Data Time 0.001 (0.020)	Loss 2.2640 (2.2666)	Entropy 0.81935 (0.82231)	Top-1 acc 70.312 (69.842)	Top-5 acc 85.547 (87.549)	lr 0.00329
Train [92][2070/3239]	Time 0.229 (0.607)	Data Time 0.001 (0.020)	Loss 2.4191 (2.2667)	Entropy 0.81931 (0.82230)	Top-1 acc 67.969 (69.838)	Top-5 acc 83.594 (87.551)	lr 0.00329
Train [92][2080/3239]	Time 0.329 (0.607)	Data Time 0.001 (0.020)	Loss 2.1993 (2.2664)	Entropy 0.81926 (0.82228)	Top-1 acc 71.484 (69.847)	Top-5 acc 88.672 (87.555)	lr 0.00329
Train [92][2090/3239]	Time 0.217 (0.606)	Data Time 0.001 (0.020)	Loss 2.2433 (2.2666)	Entropy 0.81920 (0.82227)	Top-1 acc 68.750 (69.839)	Top-5 acc 89.453 (87.553)	lr 0.00329
Train [92][2100/3239]	Time 2.581 (0.605)	Data Time 0.002 (0.020)	Loss 2.3020 (2.2665)	Entropy 0.81920 (0.82225)	Top-1 acc 67.578 (69.840)	Top-5 acc 89.453 (87.555)	lr 0.00329
Train [92][2110/3239]	Time 0.240 (0.604)	Data Time 0.001 (0.019)	Loss 2.3767 (2.2666)	Entropy 0.81913 (0.82224)	Top-1 acc 64.453 (69.840)	Top-5 acc 85.547 (87.552)	lr 0.00329
Train [92][2120/3239]	Time 0.221 (0.603)	Data Time 0.001 (0.019)	Loss 2.2221 (2.2664)	Entropy 0.81910 (0.82222)	Top-1 acc 71.094 (69.845)	Top-5 acc 87.891 (87.553)	lr 0.00329
Train [92][2130/3239]	Time 0.453 (0.626)	Data Time 0.003 (0.019)	Loss 2.2360 (2.2664)	Entropy 0.81915 (0.82221)	Top-1 acc 69.922 (69.846)	Top-5 acc 87.500 (87.554)	lr 0.00329
Train [92][2140/3239]	Time 0.254 (0.626)	Data Time 0.002 (0.019)	Loss 2.2182 (2.2666)	Entropy 0.81914 (0.82219)	Top-1 acc 72.266 (69.844)	Top-5 acc 88.672 (87.549)	lr 0.00329
Train [92][2150/3239]	Time 0.243 (0.625)	Data Time 0.002 (0.019)	Loss 2.4112 (2.2668)	Entropy 0.81916 (0.82218)	Top-1 acc 69.531 (69.841)	Top-5 acc 84.375 (87.548)	lr 0.00328
Train [92][2160/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.019)	Loss 2.1825 (2.2665)	Entropy 0.81909 (0.82217)	Top-1 acc 73.047 (69.848)	Top-5 acc 89.844 (87.557)	lr 0.00328
Train [92][2170/3239]	Time 0.243 (0.624)	Data Time 0.001 (0.019)	Loss 2.1661 (2.2665)	Entropy 0.81934 (0.82215)	Top-1 acc 71.484 (69.848)	Top-5 acc 89.844 (87.556)	lr 0.00328
Train [92][2180/3239]	Time 0.217 (0.623)	Data Time 0.001 (0.019)	Loss 2.2521 (2.2664)	Entropy 0.81935 (0.82214)	Top-1 acc 67.188 (69.846)	Top-5 acc 88.281 (87.559)	lr 0.00328
Train [92][2190/3239]	Time 0.238 (0.623)	Data Time 0.001 (0.019)	Loss 2.3207 (2.2664)	Entropy 0.81929 (0.82213)	Top-1 acc 67.188 (69.850)	Top-5 acc 87.500 (87.557)	lr 0.00328
Train [92][2200/3239]	Time 0.267 (0.622)	Data Time 0.001 (0.019)	Loss 2.3142 (2.2665)	Entropy 0.81927 (0.82211)	Top-1 acc 70.703 (69.845)	Top-5 acc 87.109 (87.554)	lr 0.00328
Train [92][2210/3239]	Time 2.446 (0.621)	Data Time 0.001 (0.019)	Loss 2.2543 (2.2666)	Entropy 0.81927 (0.82210)	Top-1 acc 71.484 (69.843)	Top-5 acc 88.672 (87.553)	lr 0.00328
Train [92][2220/3239]	Time 0.236 (0.620)	Data Time 0.001 (0.019)	Loss 2.2741 (2.2665)	Entropy 0.81921 (0.82209)	Top-1 acc 69.531 (69.845)	Top-5 acc 86.719 (87.553)	lr 0.00328
Train [92][2230/3239]	Time 0.258 (0.619)	Data Time 0.002 (0.019)	Loss 2.4196 (2.2666)	Entropy 0.81910 (0.82207)	Top-1 acc 66.406 (69.843)	Top-5 acc 83.594 (87.550)	lr 0.00328
Train [92][2240/3239]	Time 0.246 (0.618)	Data Time 0.001 (0.018)	Loss 2.2859 (2.2666)	Entropy 0.81905 (0.82206)	Top-1 acc 69.141 (69.841)	Top-5 acc 87.500 (87.550)	lr 0.00328
Train [92][2250/3239]	Time 0.321 (0.618)	Data Time 0.001 (0.018)	Loss 2.3294 (2.2665)	Entropy 0.81905 (0.82205)	Top-1 acc 70.312 (69.847)	Top-5 acc 86.719 (87.553)	lr 0.00328
Train [92][2260/3239]	Time 0.260 (0.617)	Data Time 0.001 (0.018)	Loss 2.3330 (2.2664)	Entropy 0.81906 (0.82203)	Top-1 acc 67.969 (69.848)	Top-5 acc 85.938 (87.554)	lr 0.00328
Train [92][2270/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.018)	Loss 2.1729 (2.2663)	Entropy 0.81906 (0.82202)	Top-1 acc 73.438 (69.850)	Top-5 acc 89.844 (87.557)	lr 0.00328
Train [92][2280/3239]	Time 0.220 (0.616)	Data Time 0.001 (0.018)	Loss 2.1395 (2.2662)	Entropy 0.81897 (0.82201)	Top-1 acc 73.828 (69.855)	Top-5 acc 88.281 (87.558)	lr 0.00328
Train [92][2290/3239]	Time 0.222 (0.615)	Data Time 0.002 (0.018)	Loss 2.2626 (2.2662)	Entropy 0.81889 (0.82199)	Top-1 acc 69.922 (69.855)	Top-5 acc 88.672 (87.558)	lr 0.00328
Train [92][2300/3239]	Time 0.248 (0.615)	Data Time 0.001 (0.018)	Loss 2.2131 (2.2663)	Entropy 0.81884 (0.82198)	Top-1 acc 68.359 (69.851)	Top-5 acc 90.625 (87.557)	lr 0.00327
Train [92][2310/3239]	Time 0.229 (0.614)	Data Time 0.001 (0.018)	Loss 2.2037 (2.2665)	Entropy 0.81876 (0.82197)	Top-1 acc 73.047 (69.847)	Top-5 acc 89.062 (87.555)	lr 0.00327
Train [92][2320/3239]	Time 2.635 (0.614)	Data Time 0.001 (0.018)	Loss 2.1985 (2.2665)	Entropy 0.81876 (0.82195)	Top-1 acc 70.703 (69.848)	Top-5 acc 89.062 (87.558)	lr 0.00327
Train [92][2330/3239]	Time 0.228 (0.612)	Data Time 0.001 (0.018)	Loss 2.1931 (2.2665)	Entropy 0.81878 (0.82194)	Top-1 acc 71.094 (69.847)	Top-5 acc 89.844 (87.559)	lr 0.00327
Train [92][2340/3239]	Time 0.235 (0.612)	Data Time 0.001 (0.018)	Loss 2.2149 (2.2665)	Entropy 0.81867 (0.82193)	Top-1 acc 74.219 (69.848)	Top-5 acc 85.547 (87.560)	lr 0.00327
Train [92][2350/3239]	Time 0.244 (0.611)	Data Time 0.001 (0.018)	Loss 2.3157 (2.2665)	Entropy 0.81867 (0.82191)	Top-1 acc 66.797 (69.847)	Top-5 acc 85.547 (87.562)	lr 0.00327
Train [92][2360/3239]	Time 0.221 (0.610)	Data Time 0.001 (0.018)	Loss 2.4191 (2.2666)	Entropy 0.81853 (0.82190)	Top-1 acc 66.797 (69.845)	Top-5 acc 87.500 (87.560)	lr 0.00327
Train [92][2370/3239]	Time 0.240 (0.610)	Data Time 0.002 (0.017)	Loss 2.2808 (2.2668)	Entropy 0.81846 (0.82188)	Top-1 acc 67.969 (69.838)	Top-5 acc 87.891 (87.556)	lr 0.00327
Train [92][2380/3239]	Time 0.336 (0.609)	Data Time 0.001 (0.017)	Loss 2.3037 (2.2669)	Entropy 0.81839 (0.82187)	Top-1 acc 71.875 (69.838)	Top-5 acc 85.547 (87.554)	lr 0.00327
Train [92][2390/3239]	Time 0.232 (0.609)	Data Time 0.002 (0.017)	Loss 2.2196 (2.2668)	Entropy 0.81840 (0.82185)	Top-1 acc 68.750 (69.839)	Top-5 acc 88.672 (87.555)	lr 0.00327
Train [92][2400/3239]	Time 0.234 (0.608)	Data Time 0.002 (0.017)	Loss 2.1017 (2.2667)	Entropy 0.81839 (0.82184)	Top-1 acc 73.828 (69.841)	Top-5 acc 90.625 (87.558)	lr 0.00327
Train [92][2410/3239]	Time 0.223 (0.608)	Data Time 0.001 (0.017)	Loss 2.3039 (2.2669)	Entropy 0.81838 (0.82183)	Top-1 acc 68.750 (69.837)	Top-5 acc 86.328 (87.557)	lr 0.00327
Train [92][2420/3239]	Time 0.394 (0.607)	Data Time 0.001 (0.017)	Loss 2.2594 (2.2669)	Entropy 0.81833 (0.82181)	Top-1 acc 73.828 (69.836)	Top-5 acc 88.672 (87.557)	lr 0.00327
Train [92][2430/3239]	Time 2.511 (0.607)	Data Time 0.001 (0.017)	Loss 2.2452 (2.2670)	Entropy 0.81833 (0.82180)	Top-1 acc 69.922 (69.837)	Top-5 acc 88.281 (87.556)	lr 0.00327
Train [92][2440/3239]	Time 0.237 (0.605)	Data Time 0.001 (0.017)	Loss 2.2216 (2.2671)	Entropy 0.81832 (0.82178)	Top-1 acc 75.000 (69.831)	Top-5 acc 88.281 (87.553)	lr 0.00326
Train [92][2450/3239]	Time 0.226 (0.605)	Data Time 0.001 (0.017)	Loss 2.4548 (2.2672)	Entropy 0.81836 (0.82177)	Top-1 acc 65.234 (69.834)	Top-5 acc 86.328 (87.551)	lr 0.00326
Train [92][2460/3239]	Time 0.232 (0.604)	Data Time 0.001 (0.017)	Loss 2.2283 (2.2671)	Entropy 0.81837 (0.82175)	Top-1 acc 70.312 (69.839)	Top-5 acc 87.891 (87.551)	lr 0.00326
Train [92][2470/3239]	Time 0.235 (0.604)	Data Time 0.001 (0.017)	Loss 2.1821 (2.2670)	Entropy 0.81835 (0.82174)	Top-1 acc 71.484 (69.842)	Top-5 acc 89.453 (87.551)	lr 0.00326
Train [92][2480/3239]	Time 0.240 (0.603)	Data Time 0.006 (0.017)	Loss 2.2771 (2.2671)	Entropy 0.81835 (0.82173)	Top-1 acc 70.312 (69.841)	Top-5 acc 87.109 (87.550)	lr 0.00326
Train [92][2490/3239]	Time 0.251 (0.624)	Data Time 0.002 (0.017)	Loss 2.2298 (2.2672)	Entropy 0.81826 (0.82171)	Top-1 acc 71.875 (69.838)	Top-5 acc 88.672 (87.550)	lr 0.00326
Train [92][2500/3239]	Time 0.238 (0.624)	Data Time 0.002 (0.017)	Loss 2.3283 (2.2670)	Entropy 0.81820 (0.82170)	Top-1 acc 68.359 (69.841)	Top-5 acc 84.766 (87.552)	lr 0.00326
Train [92][2510/3239]	Time 0.228 (0.623)	Data Time 0.002 (0.017)	Loss 2.2646 (2.2670)	Entropy 0.81820 (0.82169)	Top-1 acc 69.922 (69.847)	Top-5 acc 87.109 (87.551)	lr 0.00326
Train [92][2520/3239]	Time 0.251 (0.623)	Data Time 0.002 (0.017)	Loss 2.3384 (2.2670)	Entropy 0.81820 (0.82167)	Top-1 acc 65.625 (69.842)	Top-5 acc 85.547 (87.548)	lr 0.00326
Train [92][2530/3239]	Time 0.236 (0.622)	Data Time 0.002 (0.017)	Loss 2.2753 (2.2671)	Entropy 0.81822 (0.82166)	Top-1 acc 68.750 (69.843)	Top-5 acc 87.109 (87.546)	lr 0.00326
Train [92][2540/3239]	Time 2.705 (0.621)	Data Time 0.001 (0.016)	Loss 2.4408 (2.2673)	Entropy 0.81822 (0.82164)	Top-1 acc 65.234 (69.839)	Top-5 acc 85.547 (87.543)	lr 0.00326
Train [92][2550/3239]	Time 0.333 (0.620)	Data Time 0.002 (0.016)	Loss 2.3155 (2.2674)	Entropy 0.81815 (0.82163)	Top-1 acc 67.188 (69.834)	Top-5 acc 86.719 (87.541)	lr 0.00326
Train [92][2560/3239]	Time 0.243 (0.619)	Data Time 0.001 (0.016)	Loss 2.3266 (2.2675)	Entropy 0.81812 (0.82162)	Top-1 acc 67.578 (69.833)	Top-5 acc 85.156 (87.538)	lr 0.00326
Train [92][2570/3239]	Time 0.241 (0.619)	Data Time 0.001 (0.016)	Loss 2.3691 (2.2674)	Entropy 0.81813 (0.82160)	Top-1 acc 66.406 (69.837)	Top-5 acc 84.766 (87.539)	lr 0.00326
Train [92][2580/3239]	Time 0.244 (0.618)	Data Time 0.001 (0.016)	Loss 2.2435 (2.2673)	Entropy 0.81812 (0.82159)	Top-1 acc 73.047 (69.843)	Top-5 acc 85.938 (87.540)	lr 0.00326
Train [92][2590/3239]	Time 0.329 (0.618)	Data Time 0.001 (0.016)	Loss 2.3429 (2.2671)	Entropy 0.81813 (0.82158)	Top-1 acc 66.797 (69.846)	Top-5 acc 86.328 (87.544)	lr 0.00325
Train [92][2600/3239]	Time 0.223 (0.617)	Data Time 0.001 (0.016)	Loss 2.2340 (2.2672)	Entropy 0.81812 (0.82156)	Top-1 acc 67.969 (69.844)	Top-5 acc 89.453 (87.543)	lr 0.00325
Train [92][2610/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.016)	Loss 2.2313 (2.2672)	Entropy 0.81815 (0.82155)	Top-1 acc 70.703 (69.842)	Top-5 acc 88.281 (87.542)	lr 0.00325
Train [92][2620/3239]	Time 0.243 (0.616)	Data Time 0.001 (0.016)	Loss 2.1753 (2.2671)	Entropy 0.81817 (0.82154)	Top-1 acc 71.875 (69.845)	Top-5 acc 87.109 (87.542)	lr 0.00325
Train [92][2630/3239]	Time 0.245 (0.616)	Data Time 0.001 (0.016)	Loss 2.3941 (2.2672)	Entropy 0.81811 (0.82152)	Top-1 acc 65.625 (69.836)	Top-5 acc 85.938 (87.539)	lr 0.00325
Train [92][2640/3239]	Time 0.222 (0.615)	Data Time 0.001 (0.016)	Loss 2.1884 (2.2672)	Entropy 0.81812 (0.82151)	Top-1 acc 72.656 (69.839)	Top-5 acc 86.719 (87.540)	lr 0.00325
Train [92][2650/3239]	Time 0.257 (0.615)	Data Time 0.002 (0.016)	Loss 2.3208 (2.2671)	Entropy 0.81808 (0.82150)	Top-1 acc 67.188 (69.836)	Top-5 acc 86.328 (87.543)	lr 0.00325
Train [92][2660/3239]	Time 0.225 (0.614)	Data Time 0.001 (0.016)	Loss 2.2742 (2.2672)	Entropy 0.81800 (0.82149)	Top-1 acc 70.703 (69.834)	Top-5 acc 88.672 (87.543)	lr 0.00325
Train [92][2670/3239]	Time 0.238 (0.614)	Data Time 0.001 (0.016)	Loss 2.2772 (2.2672)	Entropy 0.81799 (0.82147)	Top-1 acc 67.578 (69.837)	Top-5 acc 88.281 (87.543)	lr 0.00325
Train [92][2680/3239]	Time 0.207 (0.613)	Data Time 0.001 (0.016)	Loss 2.3039 (2.2672)	Entropy 0.81796 (0.82146)	Top-1 acc 69.141 (69.835)	Top-5 acc 87.891 (87.541)	lr 0.00325
Train [92][2690/3239]	Time 0.232 (0.612)	Data Time 0.001 (0.016)	Loss 2.3045 (2.2671)	Entropy 0.81798 (0.82145)	Top-1 acc 68.359 (69.835)	Top-5 acc 87.891 (87.544)	lr 0.00325
Train [92][2700/3239]	Time 0.257 (0.612)	Data Time 0.001 (0.016)	Loss 2.4376 (2.2672)	Entropy 0.81803 (0.82143)	Top-1 acc 67.578 (69.835)	Top-5 acc 83.594 (87.542)	lr 0.00325
Train [92][2710/3239]	Time 0.231 (0.611)	Data Time 0.001 (0.016)	Loss 2.3345 (2.2671)	Entropy 0.81795 (0.82142)	Top-1 acc 67.969 (69.835)	Top-5 acc 86.719 (87.542)	lr 0.00325
Train [92][2720/3239]	Time 0.309 (0.611)	Data Time 0.001 (0.015)	Loss 2.1155 (2.2670)	Entropy 0.81799 (0.82141)	Top-1 acc 73.828 (69.836)	Top-5 acc 91.797 (87.544)	lr 0.00325
Train [92][2730/3239]	Time 0.209 (0.610)	Data Time 0.001 (0.015)	Loss 2.2235 (2.2671)	Entropy 0.81797 (0.82140)	Top-1 acc 68.359 (69.832)	Top-5 acc 89.453 (87.543)	lr 0.00325
Train [92][2740/3239]	Time 0.223 (0.610)	Data Time 0.001 (0.015)	Loss 2.1814 (2.2672)	Entropy 0.81789 (0.82138)	Top-1 acc 72.266 (69.828)	Top-5 acc 89.844 (87.542)	lr 0.00324
Train [92][2750/3239]	Time 0.220 (0.610)	Data Time 0.001 (0.015)	Loss 2.1962 (2.2672)	Entropy 0.81787 (0.82137)	Top-1 acc 72.656 (69.829)	Top-5 acc 88.281 (87.542)	lr 0.00324
Train [92][2760/3239]	Time 0.331 (0.609)	Data Time 0.002 (0.015)	Loss 2.2100 (2.2671)	Entropy 0.81778 (0.82136)	Top-1 acc 73.438 (69.832)	Top-5 acc 88.672 (87.543)	lr 0.00324
Train [92][2770/3239]	Time 0.279 (0.609)	Data Time 0.001 (0.015)	Loss 2.1917 (2.2669)	Entropy 0.81778 (0.82135)	Top-1 acc 73.047 (69.836)	Top-5 acc 87.891 (87.546)	lr 0.00324
Train [92][2780/3239]	Time 0.258 (0.608)	Data Time 0.001 (0.015)	Loss 2.2757 (2.2669)	Entropy 0.81776 (0.82133)	Top-1 acc 69.141 (69.834)	Top-5 acc 88.281 (87.546)	lr 0.00324
Train [92][2790/3239]	Time 0.247 (0.608)	Data Time 0.001 (0.015)	Loss 2.0961 (2.2669)	Entropy 0.81773 (0.82132)	Top-1 acc 74.219 (69.835)	Top-5 acc 90.234 (87.545)	lr 0.00324
Train [92][2800/3239]	Time 0.265 (0.607)	Data Time 0.001 (0.015)	Loss 2.1869 (2.2668)	Entropy 0.81772 (0.82131)	Top-1 acc 68.359 (69.836)	Top-5 acc 89.062 (87.547)	lr 0.00324
Train [92][2810/3239]	Time 0.267 (0.607)	Data Time 0.001 (0.015)	Loss 2.1923 (2.2667)	Entropy 0.81767 (0.82129)	Top-1 acc 72.656 (69.838)	Top-5 acc 90.234 (87.548)	lr 0.00324
Train [92][2820/3239]	Time 0.246 (0.606)	Data Time 0.001 (0.015)	Loss 2.2298 (2.2666)	Entropy 0.81764 (0.82128)	Top-1 acc 70.703 (69.839)	Top-5 acc 88.281 (87.550)	lr 0.00324
Train [92][2830/3239]	Time 0.286 (0.623)	Data Time 0.005 (0.015)	Loss 2.2808 (2.2666)	Entropy 0.81762 (0.82127)	Top-1 acc 70.703 (69.839)	Top-5 acc 86.719 (87.551)	lr 0.00324
Train [92][2840/3239]	Time 0.231 (0.623)	Data Time 0.002 (0.015)	Loss 2.2008 (2.2666)	Entropy 0.81758 (0.82126)	Top-1 acc 69.531 (69.838)	Top-5 acc 88.281 (87.550)	lr 0.00324
Train [92][2850/3239]	Time 0.236 (0.622)	Data Time 0.001 (0.015)	Loss 2.3493 (2.2667)	Entropy 0.81759 (0.82124)	Top-1 acc 66.406 (69.834)	Top-5 acc 87.500 (87.549)	lr 0.00324
Train [92][2860/3239]	Time 0.267 (0.622)	Data Time 0.001 (0.015)	Loss 2.2338 (2.2667)	Entropy 0.81758 (0.82123)	Top-1 acc 72.266 (69.834)	Top-5 acc 91.016 (87.548)	lr 0.00324
Train [92][2870/3239]	Time 0.246 (0.622)	Data Time 0.002 (0.015)	Loss 2.3475 (2.2668)	Entropy 0.81752 (0.82122)	Top-1 acc 67.578 (69.831)	Top-5 acc 87.109 (87.548)	lr 0.00324
Train [92][2880/3239]	Time 0.237 (0.621)	Data Time 0.001 (0.015)	Loss 2.3006 (2.2667)	Entropy 0.81746 (0.82120)	Top-1 acc 67.578 (69.831)	Top-5 acc 88.281 (87.550)	lr 0.00324
Train [92][2890/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.015)	Loss 2.3340 (2.2667)	Entropy 0.81744 (0.82119)	Top-1 acc 66.016 (69.832)	Top-5 acc 87.109 (87.551)	lr 0.00323
Train [92][2900/3239]	Time 0.232 (0.620)	Data Time 0.002 (0.015)	Loss 2.1454 (2.2667)	Entropy 0.81736 (0.82118)	Top-1 acc 71.094 (69.833)	Top-5 acc 89.844 (87.551)	lr 0.00323
Train [92][2910/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.015)	Loss 2.2419 (2.2668)	Entropy 0.81730 (0.82117)	Top-1 acc 71.875 (69.828)	Top-5 acc 87.891 (87.547)	lr 0.00323
Train [92][2920/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.015)	Loss 2.1987 (2.2669)	Entropy 0.81732 (0.82115)	Top-1 acc 71.875 (69.824)	Top-5 acc 88.281 (87.544)	lr 0.00323
Train [92][2930/3239]	Time 0.227 (0.619)	Data Time 0.001 (0.014)	Loss 2.1323 (2.2669)	Entropy 0.81730 (0.82114)	Top-1 acc 71.484 (69.820)	Top-5 acc 91.016 (87.546)	lr 0.00323
Train [92][2940/3239]	Time 0.233 (0.618)	Data Time 0.001 (0.014)	Loss 2.1417 (2.2669)	Entropy 0.81725 (0.82113)	Top-1 acc 71.875 (69.818)	Top-5 acc 90.625 (87.545)	lr 0.00323
Train [92][2950/3239]	Time 0.246 (0.618)	Data Time 0.001 (0.014)	Loss 2.4036 (2.2669)	Entropy 0.81705 (0.82111)	Top-1 acc 67.188 (69.817)	Top-5 acc 84.766 (87.545)	lr 0.00323
Train [92][2960/3239]	Time 0.224 (0.617)	Data Time 0.001 (0.014)	Loss 2.3543 (2.2669)	Entropy 0.81703 (0.82110)	Top-1 acc 65.234 (69.815)	Top-5 acc 86.719 (87.544)	lr 0.00323
Train [92][2970/3239]	Time 0.332 (0.617)	Data Time 0.001 (0.014)	Loss 2.3499 (2.2669)	Entropy 0.81693 (0.82109)	Top-1 acc 67.578 (69.815)	Top-5 acc 86.719 (87.545)	lr 0.00323
Train [92][2980/3239]	Time 0.241 (0.616)	Data Time 0.001 (0.014)	Loss 2.2531 (2.2669)	Entropy 0.81692 (0.82107)	Top-1 acc 73.047 (69.818)	Top-5 acc 87.109 (87.547)	lr 0.00323
Train [92][2990/3239]	Time 0.247 (0.616)	Data Time 0.001 (0.014)	Loss 2.2159 (2.2668)	Entropy 0.81689 (0.82106)	Top-1 acc 68.750 (69.819)	Top-5 acc 91.016 (87.550)	lr 0.00323
Train [92][3000/3239]	Time 0.236 (0.615)	Data Time 0.001 (0.014)	Loss 2.3002 (2.2668)	Entropy 0.81696 (0.82104)	Top-1 acc 69.922 (69.821)	Top-5 acc 87.500 (87.549)	lr 0.00323
Train [92][3010/3239]	Time 0.323 (0.615)	Data Time 0.002 (0.014)	Loss 2.1915 (2.2667)	Entropy 0.81700 (0.82103)	Top-1 acc 73.438 (69.825)	Top-5 acc 87.891 (87.551)	lr 0.00323
Train [92][3020/3239]	Time 0.264 (0.615)	Data Time 0.001 (0.014)	Loss 2.2640 (2.2668)	Entropy 0.81705 (0.82102)	Top-1 acc 69.141 (69.826)	Top-5 acc 88.281 (87.552)	lr 0.00323
Train [92][3030/3239]	Time 0.255 (0.614)	Data Time 0.001 (0.014)	Loss 2.3948 (2.2668)	Entropy 0.81701 (0.82100)	Top-1 acc 64.062 (69.819)	Top-5 acc 84.766 (87.552)	lr 0.00322
Train [92][3040/3239]	Time 0.233 (0.614)	Data Time 0.001 (0.014)	Loss 2.4026 (2.2668)	Entropy 0.81680 (0.82099)	Top-1 acc 65.234 (69.819)	Top-5 acc 82.812 (87.552)	lr 0.00322
Train [92][3050/3239]	Time 0.216 (0.613)	Data Time 0.001 (0.014)	Loss 2.3370 (2.2669)	Entropy 0.81686 (0.82098)	Top-1 acc 68.750 (69.816)	Top-5 acc 87.109 (87.550)	lr 0.00322
Train [92][3060/3239]	Time 0.198 (0.613)	Data Time 0.001 (0.014)	Loss 2.2375 (2.2669)	Entropy 0.81683 (0.82096)	Top-1 acc 73.438 (69.819)	Top-5 acc 86.328 (87.550)	lr 0.00322
Train [92][3070/3239]	Time 0.257 (0.612)	Data Time 0.001 (0.014)	Loss 2.2928 (2.2669)	Entropy 0.81689 (0.82095)	Top-1 acc 68.750 (69.821)	Top-5 acc 87.109 (87.551)	lr 0.00322
Train [92][3080/3239]	Time 0.232 (0.612)	Data Time 0.001 (0.014)	Loss 2.3065 (2.2670)	Entropy 0.81684 (0.82094)	Top-1 acc 68.359 (69.817)	Top-5 acc 87.891 (87.549)	lr 0.00322
Train [92][3090/3239]	Time 0.224 (0.611)	Data Time 0.001 (0.014)	Loss 2.2559 (2.2670)	Entropy 0.81680 (0.82092)	Top-1 acc 71.875 (69.817)	Top-5 acc 86.328 (87.545)	lr 0.00322
Train [92][3100/3239]	Time 0.258 (0.611)	Data Time 0.001 (0.014)	Loss 2.2939 (2.2670)	Entropy 0.81677 (0.82091)	Top-1 acc 69.531 (69.818)	Top-5 acc 85.156 (87.545)	lr 0.00322
Train [92][3110/3239]	Time 0.226 (0.611)	Data Time 0.001 (0.014)	Loss 2.1895 (2.2671)	Entropy 0.81673 (0.82090)	Top-1 acc 71.094 (69.815)	Top-5 acc 91.406 (87.544)	lr 0.00322
Train [92][3120/3239]	Time 0.241 (0.610)	Data Time 0.001 (0.014)	Loss 2.2755 (2.2670)	Entropy 0.81667 (0.82088)	Top-1 acc 70.312 (69.816)	Top-5 acc 85.156 (87.546)	lr 0.00322
Train [92][3130/3239]	Time 0.243 (0.610)	Data Time 0.001 (0.014)	Loss 2.3302 (2.2670)	Entropy 0.81656 (0.82087)	Top-1 acc 67.188 (69.813)	Top-5 acc 88.672 (87.548)	lr 0.00322
Train [92][3140/3239]	Time 0.319 (0.609)	Data Time 0.001 (0.014)	Loss 2.3523 (2.2671)	Entropy 0.81651 (0.82086)	Top-1 acc 71.094 (69.815)	Top-5 acc 84.766 (87.546)	lr 0.00322
Train [92][3150/3239]	Time 0.266 (0.609)	Data Time 0.001 (0.014)	Loss 2.2214 (2.2672)	Entropy 0.81651 (0.82084)	Top-1 acc 69.141 (69.813)	Top-5 acc 89.844 (87.544)	lr 0.00322
Train [92][3160/3239]	Time 0.263 (0.625)	Data Time 0.004 (0.014)	Loss 2.2653 (2.2672)	Entropy 0.81643 (0.82083)	Top-1 acc 71.484 (69.813)	Top-5 acc 87.109 (87.546)	lr 0.00322
Train [92][3170/3239]	Time 0.237 (0.625)	Data Time 0.002 (0.014)	Loss 2.3516 (2.2670)	Entropy 0.81621 (0.82081)	Top-1 acc 69.141 (69.815)	Top-5 acc 85.156 (87.550)	lr 0.00322
Train [92][3180/3239]	Time 0.319 (0.624)	Data Time 0.000 (0.013)	Loss 2.3759 (2.2671)	Entropy 0.81615 (0.82080)	Top-1 acc 64.453 (69.812)	Top-5 acc 87.891 (87.551)	lr 0.00321
Train [92][3190/3239]	Time 0.241 (0.624)	Data Time 0.000 (0.013)	Loss 2.2479 (2.2673)	Entropy 0.81623 (0.82079)	Top-1 acc 69.531 (69.805)	Top-5 acc 88.281 (87.549)	lr 0.00321
Train [92][3200/3239]	Time 0.226 (0.623)	Data Time 0.000 (0.013)	Loss 2.4973 (2.2674)	Entropy 0.81614 (0.82077)	Top-1 acc 65.625 (69.804)	Top-5 acc 83.984 (87.548)	lr 0.00321
Train [92][3210/3239]	Time 0.221 (0.623)	Data Time 0.000 (0.013)	Loss 2.2372 (2.2673)	Entropy 0.81618 (0.82076)	Top-1 acc 71.484 (69.805)	Top-5 acc 89.062 (87.548)	lr 0.00321
Train [92][3220/3239]	Time 0.324 (0.622)	Data Time 0.000 (0.013)	Loss 2.2894 (2.2673)	Entropy 0.81613 (0.82074)	Top-1 acc 69.922 (69.805)	Top-5 acc 87.500 (87.549)	lr 0.00321
Train [92][3230/3239]	Time 0.227 (0.622)	Data Time 0.000 (0.013)	Loss 2.1921 (2.2673)	Entropy 0.81614 (0.82073)	Top-1 acc 72.266 (69.805)	Top-5 acc 91.016 (87.548)	lr 0.00321
Train [92][3239/3239]	Time 2.328 (0.621)	Data Time 0.000 (0.013)	Loss 2.5949 (2.2673)	Entropy 0.81614 (0.82072)	Top-1 acc 65.432 (69.807)	Top-5 acc 77.778 (87.547)	lr 0.00321
==========Valid [92/120]	loss 1.246	top-1 acc 71.579 (71.579)	top-5 acc 89.412	Train top-1 69.807	top-5 87.547	Entropy 0.81614	Latency-None: 0.000ms	Flops: 546.53M
Train [93][0/3239]	Time 37.055 (37.055)	Data Time 35.589 (35.589)	Loss 2.2195 (2.2195)	Entropy 0.81607 (0.81607)	Top-1 acc 71.875 (71.875)	Top-5 acc 89.844 (89.844)	lr 0.00321
Train [93][10/3239]	Time 2.887 (4.023)	Data Time 0.003 (3.270)	Loss 2.2103 (2.2277)	Entropy 0.81607 (0.81607)	Top-1 acc 71.875 (71.058)	Top-5 acc 88.672 (88.068)	lr 0.00321
Train [93][20/3239]	Time 0.231 (2.224)	Data Time 0.001 (1.714)	Loss 2.2240 (2.2491)	Entropy 0.81600 (0.81604)	Top-1 acc 69.141 (69.959)	Top-5 acc 88.672 (88.132)	lr 0.00321
Train [93][30/3239]	Time 0.224 (1.660)	Data Time 0.001 (1.161)	Loss 2.1492 (2.2417)	Entropy 0.81724 (0.81639)	Top-1 acc 74.219 (70.653)	Top-5 acc 92.969 (88.218)	lr 0.00321
Train [93][40/3239]	Time 0.239 (1.373)	Data Time 0.002 (0.879)	Loss 2.3369 (2.2455)	Entropy 0.81722 (0.81659)	Top-1 acc 64.453 (70.617)	Top-5 acc 86.719 (88.196)	lr 0.00321
Train [93][50/3239]	Time 0.232 (1.195)	Data Time 0.001 (0.707)	Loss 2.1807 (2.2447)	Entropy 0.81723 (0.81672)	Top-1 acc 70.312 (70.642)	Top-5 acc 89.062 (88.074)	lr 0.00321
Train [93][60/3239]	Time 0.239 (1.077)	Data Time 0.001 (0.591)	Loss 2.2257 (2.2440)	Entropy 0.81729 (0.81681)	Top-1 acc 71.094 (70.575)	Top-5 acc 87.891 (87.961)	lr 0.00321
Train [93][70/3239]	Time 0.315 (0.994)	Data Time 0.001 (0.508)	Loss 2.1141 (2.2432)	Entropy 0.81728 (0.81687)	Top-1 acc 75.391 (70.483)	Top-5 acc 89.453 (88.006)	lr 0.00321
Train [93][80/3239]	Time 0.232 (0.928)	Data Time 0.001 (0.445)	Loss 2.2321 (2.2400)	Entropy 0.81726 (0.81692)	Top-1 acc 72.656 (70.549)	Top-5 acc 87.891 (88.059)	lr 0.00321
Train [93][90/3239]	Time 0.223 (0.879)	Data Time 0.001 (0.397)	Loss 2.3860 (2.2441)	Entropy 0.81722 (0.81696)	Top-1 acc 67.578 (70.497)	Top-5 acc 86.328 (88.041)	lr 0.00320
Train [93][100/3239]	Time 0.230 (0.839)	Data Time 0.001 (0.358)	Loss 2.3164 (2.2458)	Entropy 0.81739 (0.81699)	Top-1 acc 67.188 (70.359)	Top-5 acc 89.453 (87.972)	lr 0.00320
Train [93][110/3239]	Time 0.376 (0.807)	Data Time 0.002 (0.326)	Loss 2.3088 (2.2485)	Entropy 0.81732 (0.81702)	Top-1 acc 69.531 (70.260)	Top-5 acc 85.938 (87.901)	lr 0.00320
Train [93][120/3239]	Time 2.670 (0.781)	Data Time 0.001 (0.299)	Loss 2.4645 (2.2521)	Entropy 0.81732 (0.81705)	Top-1 acc 64.844 (70.203)	Top-5 acc 85.156 (87.865)	lr 0.00320
Train [93][130/3239]	Time 0.230 (0.739)	Data Time 0.001 (0.276)	Loss 2.2474 (2.2539)	Entropy 0.81734 (0.81707)	Top-1 acc 67.969 (70.125)	Top-5 acc 89.453 (87.864)	lr 0.00320
Train [93][140/3239]	Time 0.233 (0.720)	Data Time 0.001 (0.257)	Loss 2.2814 (2.2533)	Entropy 0.81730 (0.81709)	Top-1 acc 70.312 (70.202)	Top-5 acc 86.328 (87.821)	lr 0.00320
Train [93][150/3239]	Time 0.232 (0.704)	Data Time 0.001 (0.240)	Loss 2.2595 (2.2550)	Entropy 0.81725 (0.81710)	Top-1 acc 74.219 (70.168)	Top-5 acc 87.109 (87.821)	lr 0.00320
Train [93][160/3239]	Time 0.232 (0.690)	Data Time 0.001 (0.225)	Loss 2.1251 (2.2504)	Entropy 0.81715 (0.81710)	Top-1 acc 74.219 (70.225)	Top-5 acc 89.844 (87.905)	lr 0.00320
Train [93][170/3239]	Time 0.229 (0.677)	Data Time 0.001 (0.212)	Loss 2.2817 (2.2514)	Entropy 0.81718 (0.81711)	Top-1 acc 67.578 (70.150)	Top-5 acc 87.109 (87.836)	lr 0.00320
Train [93][180/3239]	Time 0.230 (0.665)	Data Time 0.001 (0.200)	Loss 2.1498 (2.2511)	Entropy 0.81721 (0.81711)	Top-1 acc 73.438 (70.181)	Top-5 acc 89.844 (87.858)	lr 0.00320
Train [93][190/3239]	Time 0.231 (0.655)	Data Time 0.001 (0.190)	Loss 2.3928 (2.2516)	Entropy 0.81716 (0.81712)	Top-1 acc 64.844 (70.133)	Top-5 acc 84.766 (87.852)	lr 0.00320
Train [93][200/3239]	Time 0.252 (0.646)	Data Time 0.001 (0.180)	Loss 2.3561 (2.2515)	Entropy 0.81713 (0.81712)	Top-1 acc 70.312 (70.200)	Top-5 acc 85.938 (87.832)	lr 0.00320
Train [93][210/3239]	Time 0.232 (0.638)	Data Time 0.001 (0.172)	Loss 2.2207 (2.2543)	Entropy 0.81708 (0.81712)	Top-1 acc 69.531 (70.100)	Top-5 acc 88.672 (87.780)	lr 0.00320
Train [93][220/3239]	Time 0.239 (0.631)	Data Time 0.001 (0.164)	Loss 2.1518 (2.2566)	Entropy 0.81711 (0.81712)	Top-1 acc 70.703 (70.046)	Top-5 acc 90.625 (87.756)	lr 0.00320
Train [93][230/3239]	Time 2.530 (0.624)	Data Time 0.001 (0.157)	Loss 2.2799 (2.2571)	Entropy 0.81711 (0.81712)	Top-1 acc 68.359 (70.050)	Top-5 acc 87.891 (87.732)	lr 0.00320
Train [93][240/3239]	Time 0.321 (0.608)	Data Time 0.001 (0.151)	Loss 2.3637 (2.2582)	Entropy 0.81705 (0.81711)	Top-1 acc 67.188 (69.993)	Top-5 acc 87.500 (87.719)	lr 0.00319
Train [93][250/3239]	Time 0.222 (0.602)	Data Time 0.001 (0.145)	Loss 2.4005 (2.2580)	Entropy 0.81707 (0.81711)	Top-1 acc 64.453 (70.007)	Top-5 acc 85.938 (87.716)	lr 0.00319
Train [93][260/3239]	Time 0.232 (0.597)	Data Time 0.001 (0.139)	Loss 2.3292 (2.2576)	Entropy 0.81702 (0.81711)	Top-1 acc 67.578 (70.009)	Top-5 acc 86.328 (87.723)	lr 0.00319
Train [93][270/3239]	Time 0.236 (0.593)	Data Time 0.001 (0.134)	Loss 2.2054 (2.2578)	Entropy 0.81704 (0.81711)	Top-1 acc 72.266 (69.967)	Top-5 acc 85.938 (87.708)	lr 0.00319
Train [93][280/3239]	Time 0.362 (0.789)	Data Time 0.002 (0.129)	Loss 2.2167 (2.2576)	Entropy 0.81701 (0.81710)	Top-1 acc 70.312 (69.979)	Top-5 acc 88.672 (87.706)	lr 0.00319
Train [93][290/3239]	Time 0.278 (0.778)	Data Time 0.002 (0.125)	Loss 2.3108 (2.2566)	Entropy 0.81698 (0.81710)	Top-1 acc 66.797 (69.993)	Top-5 acc 87.109 (87.716)	lr 0.00319
Train [93][300/3239]	Time 0.225 (0.768)	Data Time 0.001 (0.121)	Loss 2.4115 (2.2569)	Entropy 0.81694 (0.81710)	Top-1 acc 64.062 (69.967)	Top-5 acc 85.938 (87.713)	lr 0.00319
Train [93][310/3239]	Time 0.230 (0.759)	Data Time 0.001 (0.117)	Loss 2.1501 (2.2567)	Entropy 0.81684 (0.81709)	Top-1 acc 78.516 (69.980)	Top-5 acc 89.844 (87.721)	lr 0.00319
Train [93][320/3239]	Time 0.263 (0.750)	Data Time 0.001 (0.114)	Loss 2.2412 (2.2567)	Entropy 0.81681 (0.81708)	Top-1 acc 70.312 (70.023)	Top-5 acc 87.500 (87.737)	lr 0.00319
Train [93][330/3239]	Time 0.243 (0.743)	Data Time 0.002 (0.110)	Loss 2.3303 (2.2572)	Entropy 0.81684 (0.81707)	Top-1 acc 69.531 (70.008)	Top-5 acc 85.156 (87.716)	lr 0.00319
Train [93][340/3239]	Time 2.529 (0.735)	Data Time 0.002 (0.107)	Loss 2.2072 (2.2564)	Entropy 0.81684 (0.81707)	Top-1 acc 71.484 (70.019)	Top-5 acc 89.062 (87.725)	lr 0.00319
Train [93][350/3239]	Time 0.247 (0.722)	Data Time 0.001 (0.104)	Loss 2.2536 (2.2564)	Entropy 0.81680 (0.81706)	Top-1 acc 72.656 (70.029)	Top-5 acc 89.062 (87.716)	lr 0.00319
Train [93][360/3239]	Time 0.241 (0.715)	Data Time 0.001 (0.101)	Loss 2.4298 (2.2567)	Entropy 0.81665 (0.81705)	Top-1 acc 66.016 (70.019)	Top-5 acc 86.719 (87.723)	lr 0.00319
Train [93][370/3239]	Time 0.226 (0.708)	Data Time 0.001 (0.098)	Loss 2.1630 (2.2564)	Entropy 0.81658 (0.81704)	Top-1 acc 73.438 (70.019)	Top-5 acc 88.672 (87.715)	lr 0.00319
Train [93][380/3239]	Time 0.214 (0.702)	Data Time 0.001 (0.096)	Loss 2.4824 (2.2568)	Entropy 0.81657 (0.81702)	Top-1 acc 65.625 (70.004)	Top-5 acc 84.375 (87.701)	lr 0.00319
Train [93][390/3239]	Time 0.252 (0.697)	Data Time 0.001 (0.094)	Loss 2.2103 (2.2578)	Entropy 0.81657 (0.81701)	Top-1 acc 70.703 (69.986)	Top-5 acc 87.109 (87.665)	lr 0.00318
Train [93][400/3239]	Time 0.238 (0.691)	Data Time 0.001 (0.091)	Loss 2.1867 (2.2568)	Entropy 0.81660 (0.81700)	Top-1 acc 73.828 (70.045)	Top-5 acc 88.281 (87.679)	lr 0.00318
Train [93][410/3239]	Time 0.323 (0.686)	Data Time 0.001 (0.089)	Loss 2.2159 (2.2564)	Entropy 0.81661 (0.81699)	Top-1 acc 71.094 (70.064)	Top-5 acc 86.719 (87.676)	lr 0.00318
Train [93][420/3239]	Time 0.239 (0.681)	Data Time 0.001 (0.087)	Loss 2.2301 (2.2550)	Entropy 0.81666 (0.81698)	Top-1 acc 66.797 (70.107)	Top-5 acc 89.453 (87.699)	lr 0.00318
Train [93][430/3239]	Time 0.223 (0.676)	Data Time 0.001 (0.085)	Loss 2.3228 (2.2547)	Entropy 0.81676 (0.81698)	Top-1 acc 72.266 (70.123)	Top-5 acc 85.547 (87.699)	lr 0.00318
Train [93][440/3239]	Time 0.220 (0.671)	Data Time 0.001 (0.083)	Loss 2.3475 (2.2542)	Entropy 0.81680 (0.81697)	Top-1 acc 69.922 (70.135)	Top-5 acc 84.766 (87.713)	lr 0.00318
Train [93][450/3239]	Time 2.665 (0.667)	Data Time 0.001 (0.081)	Loss 2.2478 (2.2538)	Entropy 0.81680 (0.81697)	Top-1 acc 70.312 (70.156)	Top-5 acc 86.328 (87.717)	lr 0.00318
Train [93][460/3239]	Time 0.260 (0.658)	Data Time 0.001 (0.080)	Loss 2.3613 (2.2533)	Entropy 0.81672 (0.81696)	Top-1 acc 68.359 (70.166)	Top-5 acc 84.375 (87.728)	lr 0.00318
Train [93][470/3239]	Time 0.222 (0.654)	Data Time 0.001 (0.078)	Loss 2.3061 (2.2529)	Entropy 0.81673 (0.81696)	Top-1 acc 70.312 (70.195)	Top-5 acc 85.547 (87.747)	lr 0.00318
Train [93][480/3239]	Time 0.223 (0.650)	Data Time 0.001 (0.076)	Loss 2.3252 (2.2534)	Entropy 0.81659 (0.81695)	Top-1 acc 67.578 (70.185)	Top-5 acc 87.500 (87.723)	lr 0.00318
Train [93][490/3239]	Time 0.240 (0.647)	Data Time 0.001 (0.075)	Loss 2.2384 (2.2534)	Entropy 0.81658 (0.81694)	Top-1 acc 72.656 (70.208)	Top-5 acc 85.547 (87.726)	lr 0.00318
Train [93][500/3239]	Time 0.239 (0.644)	Data Time 0.001 (0.073)	Loss 2.3319 (2.2535)	Entropy 0.81649 (0.81693)	Top-1 acc 69.141 (70.210)	Top-5 acc 85.938 (87.718)	lr 0.00318
Train [93][510/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.072)	Loss 2.4121 (2.2535)	Entropy 0.81643 (0.81693)	Top-1 acc 67.578 (70.197)	Top-5 acc 84.375 (87.721)	lr 0.00318
Train [93][520/3239]	Time 0.213 (0.637)	Data Time 0.001 (0.071)	Loss 2.3101 (2.2544)	Entropy 0.81638 (0.81692)	Top-1 acc 68.359 (70.163)	Top-5 acc 86.719 (87.711)	lr 0.00318
Train [93][530/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.069)	Loss 2.2550 (2.2555)	Entropy 0.81642 (0.81691)	Top-1 acc 69.922 (70.145)	Top-5 acc 87.500 (87.692)	lr 0.00317
Train [93][540/3239]	Time 0.222 (0.631)	Data Time 0.001 (0.068)	Loss 2.1135 (2.2544)	Entropy 0.81641 (0.81690)	Top-1 acc 72.656 (70.162)	Top-5 acc 90.234 (87.704)	lr 0.00317
Train [93][550/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.067)	Loss 2.1637 (2.2545)	Entropy 0.81644 (0.81689)	Top-1 acc 73.438 (70.159)	Top-5 acc 89.062 (87.714)	lr 0.00317
Train [93][560/3239]	Time 2.542 (0.626)	Data Time 0.001 (0.066)	Loss 2.2735 (2.2542)	Entropy 0.81644 (0.81688)	Top-1 acc 67.188 (70.167)	Top-5 acc 90.625 (87.717)	lr 0.00317
Train [93][570/3239]	Time 0.322 (0.619)	Data Time 0.001 (0.065)	Loss 2.1212 (2.2542)	Entropy 0.81645 (0.81687)	Top-1 acc 73.828 (70.165)	Top-5 acc 90.234 (87.716)	lr 0.00317
Train [93][580/3239]	Time 0.374 (0.617)	Data Time 0.001 (0.063)	Loss 2.1829 (2.2538)	Entropy 0.81638 (0.81686)	Top-1 acc 70.312 (70.164)	Top-5 acc 89.844 (87.732)	lr 0.00317
Train [93][590/3239]	Time 0.223 (0.615)	Data Time 0.001 (0.062)	Loss 2.2997 (2.2535)	Entropy 0.81636 (0.81686)	Top-1 acc 69.531 (70.160)	Top-5 acc 87.891 (87.749)	lr 0.00317
Train [93][600/3239]	Time 0.278 (0.612)	Data Time 0.001 (0.061)	Loss 2.3159 (2.2545)	Entropy 0.81630 (0.81685)	Top-1 acc 65.234 (70.129)	Top-5 acc 84.375 (87.716)	lr 0.00317
Train [93][610/3239]	Time 0.240 (0.610)	Data Time 0.001 (0.060)	Loss 2.2167 (2.2544)	Entropy 0.81628 (0.81684)	Top-1 acc 71.875 (70.127)	Top-5 acc 85.547 (87.705)	lr 0.00317
Train [93][620/3239]	Time 0.303 (0.608)	Data Time 0.001 (0.059)	Loss 2.2623 (2.2543)	Entropy 0.81618 (0.81683)	Top-1 acc 69.531 (70.134)	Top-5 acc 87.109 (87.717)	lr 0.00317
Train [93][630/3239]	Time 0.279 (0.606)	Data Time 0.001 (0.059)	Loss 2.2712 (2.2545)	Entropy 0.81612 (0.81682)	Top-1 acc 71.484 (70.131)	Top-5 acc 85.938 (87.715)	lr 0.00317
Train [93][640/3239]	Time 0.291 (0.686)	Data Time 0.002 (0.058)	Loss 2.3605 (2.2548)	Entropy 0.81610 (0.81681)	Top-1 acc 67.578 (70.127)	Top-5 acc 84.766 (87.712)	lr 0.00317
Train [93][650/3239]	Time 0.238 (0.683)	Data Time 0.002 (0.057)	Loss 2.1149 (2.2553)	Entropy 0.81604 (0.81679)	Top-1 acc 77.734 (70.141)	Top-5 acc 90.625 (87.709)	lr 0.00317
Train [93][660/3239]	Time 0.318 (0.681)	Data Time 0.001 (0.056)	Loss 2.1776 (2.2554)	Entropy 0.81599 (0.81678)	Top-1 acc 75.000 (70.152)	Top-5 acc 85.547 (87.707)	lr 0.00317
Train [93][670/3239]	Time 2.537 (0.678)	Data Time 0.001 (0.055)	Loss 2.0619 (2.2552)	Entropy 0.81599 (0.81677)	Top-1 acc 73.047 (70.156)	Top-5 acc 91.406 (87.711)	lr 0.00317
Train [93][680/3239]	Time 0.244 (0.671)	Data Time 0.002 (0.054)	Loss 2.2551 (2.2560)	Entropy 0.81600 (0.81676)	Top-1 acc 73.438 (70.147)	Top-5 acc 85.938 (87.700)	lr 0.00316
Train [93][690/3239]	Time 0.243 (0.669)	Data Time 0.001 (0.054)	Loss 2.2407 (2.2565)	Entropy 0.81608 (0.81675)	Top-1 acc 69.531 (70.152)	Top-5 acc 87.500 (87.687)	lr 0.00316
Train [93][700/3239]	Time 0.238 (0.666)	Data Time 0.001 (0.053)	Loss 2.3112 (2.2572)	Entropy 0.81595 (0.81674)	Top-1 acc 69.141 (70.134)	Top-5 acc 86.719 (87.678)	lr 0.00316
Train [93][710/3239]	Time 0.228 (0.663)	Data Time 0.001 (0.052)	Loss 2.2539 (2.2574)	Entropy 0.81590 (0.81673)	Top-1 acc 67.578 (70.133)	Top-5 acc 86.328 (87.671)	lr 0.00316
Train [93][720/3239]	Time 0.230 (0.661)	Data Time 0.001 (0.051)	Loss 2.4026 (2.2584)	Entropy 0.81587 (0.81672)	Top-1 acc 62.500 (70.098)	Top-5 acc 85.547 (87.656)	lr 0.00316
Train [93][730/3239]	Time 0.230 (0.658)	Data Time 0.001 (0.051)	Loss 2.3284 (2.2586)	Entropy 0.81584 (0.81670)	Top-1 acc 69.922 (70.102)	Top-5 acc 84.766 (87.652)	lr 0.00316
Train [93][740/3239]	Time 0.238 (0.656)	Data Time 0.001 (0.050)	Loss 2.2325 (2.2581)	Entropy 0.81578 (0.81669)	Top-1 acc 71.484 (70.110)	Top-5 acc 88.672 (87.663)	lr 0.00316
Train [93][750/3239]	Time 0.238 (0.654)	Data Time 0.001 (0.049)	Loss 2.3945 (2.2587)	Entropy 0.81574 (0.81668)	Top-1 acc 69.531 (70.089)	Top-5 acc 85.156 (87.651)	lr 0.00316
Train [93][760/3239]	Time 0.224 (0.651)	Data Time 0.001 (0.049)	Loss 2.2045 (2.2584)	Entropy 0.81576 (0.81667)	Top-1 acc 71.875 (70.094)	Top-5 acc 87.891 (87.647)	lr 0.00316
Train [93][770/3239]	Time 0.228 (0.649)	Data Time 0.001 (0.048)	Loss 2.1178 (2.2591)	Entropy 0.81576 (0.81666)	Top-1 acc 72.656 (70.081)	Top-5 acc 91.406 (87.640)	lr 0.00316
Train [93][780/3239]	Time 2.584 (0.647)	Data Time 0.001 (0.048)	Loss 2.1649 (2.2587)	Entropy 0.81576 (0.81664)	Top-1 acc 71.094 (70.088)	Top-5 acc 88.281 (87.648)	lr 0.00316
Train [93][790/3239]	Time 0.338 (0.642)	Data Time 0.001 (0.047)	Loss 2.2535 (2.2586)	Entropy 0.81549 (0.81663)	Top-1 acc 70.703 (70.081)	Top-5 acc 88.672 (87.647)	lr 0.00316
Train [93][800/3239]	Time 0.240 (0.640)	Data Time 0.001 (0.046)	Loss 2.2542 (2.2585)	Entropy 0.81545 (0.81662)	Top-1 acc 71.484 (70.076)	Top-5 acc 87.891 (87.658)	lr 0.00316
Train [93][810/3239]	Time 0.222 (0.638)	Data Time 0.001 (0.046)	Loss 2.2957 (2.2586)	Entropy 0.81536 (0.81660)	Top-1 acc 68.750 (70.070)	Top-5 acc 88.281 (87.656)	lr 0.00316
Train [93][820/3239]	Time 0.257 (0.636)	Data Time 0.001 (0.045)	Loss 2.3839 (2.2586)	Entropy 0.81533 (0.81658)	Top-1 acc 65.625 (70.070)	Top-5 acc 83.594 (87.646)	lr 0.00316
Train [93][830/3239]	Time 0.315 (0.634)	Data Time 0.001 (0.045)	Loss 2.3750 (2.2592)	Entropy 0.81528 (0.81657)	Top-1 acc 68.750 (70.060)	Top-5 acc 85.547 (87.639)	lr 0.00315
Train [93][840/3239]	Time 0.237 (0.632)	Data Time 0.001 (0.044)	Loss 2.3239 (2.2592)	Entropy 0.81530 (0.81655)	Top-1 acc 69.922 (70.070)	Top-5 acc 87.109 (87.637)	lr 0.00315
Train [93][850/3239]	Time 0.214 (0.630)	Data Time 0.001 (0.044)	Loss 2.2471 (2.2596)	Entropy 0.81524 (0.81654)	Top-1 acc 70.312 (70.067)	Top-5 acc 87.500 (87.629)	lr 0.00315
Train [93][860/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.043)	Loss 2.2352 (2.2595)	Entropy 0.81521 (0.81652)	Top-1 acc 71.484 (70.072)	Top-5 acc 91.016 (87.632)	lr 0.00315
Train [93][870/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.043)	Loss 2.2705 (2.2597)	Entropy 0.81510 (0.81651)	Top-1 acc 72.656 (70.070)	Top-5 acc 87.109 (87.629)	lr 0.00315
Train [93][880/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.042)	Loss 2.1946 (2.2591)	Entropy 0.81507 (0.81649)	Top-1 acc 70.312 (70.080)	Top-5 acc 88.672 (87.639)	lr 0.00315
Train [93][890/3239]	Time 2.645 (0.624)	Data Time 0.002 (0.042)	Loss 2.3048 (2.2590)	Entropy 0.81507 (0.81648)	Top-1 acc 69.922 (70.077)	Top-5 acc 85.938 (87.632)	lr 0.00315
Train [93][900/3239]	Time 0.226 (0.619)	Data Time 0.002 (0.042)	Loss 2.2149 (2.2589)	Entropy 0.81510 (0.81646)	Top-1 acc 69.141 (70.078)	Top-5 acc 89.844 (87.634)	lr 0.00315
Train [93][910/3239]	Time 0.237 (0.618)	Data Time 0.001 (0.041)	Loss 2.4705 (2.2596)	Entropy 0.81500 (0.81645)	Top-1 acc 66.016 (70.056)	Top-5 acc 87.109 (87.634)	lr 0.00315
Train [93][920/3239]	Time 0.226 (0.616)	Data Time 0.001 (0.041)	Loss 2.1376 (2.2591)	Entropy 0.81509 (0.81643)	Top-1 acc 70.703 (70.063)	Top-5 acc 89.453 (87.644)	lr 0.00315
Train [93][930/3239]	Time 0.231 (0.615)	Data Time 0.001 (0.040)	Loss 2.1419 (2.2589)	Entropy 0.81507 (0.81642)	Top-1 acc 69.531 (70.070)	Top-5 acc 89.453 (87.644)	lr 0.00315
Train [93][940/3239]	Time 0.227 (0.614)	Data Time 0.001 (0.040)	Loss 2.1520 (2.2588)	Entropy 0.81504 (0.81640)	Top-1 acc 70.312 (70.069)	Top-5 acc 91.406 (87.648)	lr 0.00315
Train [93][950/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.039)	Loss 2.2571 (2.2587)	Entropy 0.81497 (0.81639)	Top-1 acc 69.531 (70.071)	Top-5 acc 86.719 (87.654)	lr 0.00315
Train [93][960/3239]	Time 0.321 (0.611)	Data Time 0.001 (0.039)	Loss 2.2645 (2.2585)	Entropy 0.81501 (0.81637)	Top-1 acc 71.484 (70.076)	Top-5 acc 87.500 (87.659)	lr 0.00315
Train [93][970/3239]	Time 0.219 (0.609)	Data Time 0.001 (0.039)	Loss 2.2281 (2.2588)	Entropy 0.81503 (0.81636)	Top-1 acc 73.047 (70.067)	Top-5 acc 88.281 (87.658)	lr 0.00315
Train [93][980/3239]	Time 0.220 (0.608)	Data Time 0.001 (0.038)	Loss 2.1256 (2.2586)	Entropy 0.81498 (0.81634)	Top-1 acc 76.562 (70.084)	Top-5 acc 89.062 (87.661)	lr 0.00314
Train [93][990/3239]	Time 0.259 (0.607)	Data Time 0.001 (0.038)	Loss 2.1186 (2.2586)	Entropy 0.81495 (0.81633)	Top-1 acc 73.828 (70.083)	Top-5 acc 89.453 (87.660)	lr 0.00314
Train [93][1000/3239]	Time 52.325 (0.655)	Data Time 0.001 (0.038)	Loss 2.1824 (2.2584)	Entropy 0.81495 (0.81632)	Top-1 acc 73.438 (70.094)	Top-5 acc 87.500 (87.660)	lr 0.00314
Train [93][1010/3239]	Time 0.329 (0.652)	Data Time 0.003 (0.037)	Loss 2.3598 (2.2583)	Entropy 0.81491 (0.81630)	Top-1 acc 66.406 (70.090)	Top-5 acc 85.938 (87.666)	lr 0.00314
Train [93][1020/3239]	Time 0.220 (0.650)	Data Time 0.002 (0.037)	Loss 2.3167 (2.2580)	Entropy 0.81486 (0.81629)	Top-1 acc 67.578 (70.097)	Top-5 acc 86.328 (87.668)	lr 0.00314
Train [93][1030/3239]	Time 0.236 (0.649)	Data Time 0.001 (0.036)	Loss 2.2033 (2.2578)	Entropy 0.81483 (0.81627)	Top-1 acc 71.094 (70.102)	Top-5 acc 87.109 (87.664)	lr 0.00314
Train [93][1040/3239]	Time 0.235 (0.647)	Data Time 0.001 (0.036)	Loss 2.2326 (2.2575)	Entropy 0.81475 (0.81626)	Top-1 acc 73.828 (70.116)	Top-5 acc 87.500 (87.661)	lr 0.00314
Train [93][1050/3239]	Time 0.221 (0.645)	Data Time 0.001 (0.036)	Loss 2.3344 (2.2574)	Entropy 0.81471 (0.81625)	Top-1 acc 69.141 (70.119)	Top-5 acc 86.719 (87.667)	lr 0.00314
Train [93][1060/3239]	Time 0.228 (0.644)	Data Time 0.001 (0.035)	Loss 2.2992 (2.2570)	Entropy 0.81467 (0.81623)	Top-1 acc 69.922 (70.124)	Top-5 acc 85.938 (87.675)	lr 0.00314
Train [93][1070/3239]	Time 0.221 (0.642)	Data Time 0.001 (0.035)	Loss 2.3934 (2.2573)	Entropy 0.81465 (0.81622)	Top-1 acc 68.359 (70.114)	Top-5 acc 83.984 (87.669)	lr 0.00314
Train [93][1080/3239]	Time 0.236 (0.641)	Data Time 0.001 (0.035)	Loss 2.2517 (2.2574)	Entropy 0.81467 (0.81620)	Top-1 acc 69.141 (70.105)	Top-5 acc 87.891 (87.664)	lr 0.00314
Train [93][1090/3239]	Time 0.222 (0.639)	Data Time 0.001 (0.035)	Loss 2.1582 (2.2575)	Entropy 0.81463 (0.81619)	Top-1 acc 71.875 (70.101)	Top-5 acc 87.891 (87.660)	lr 0.00314
Train [93][1100/3239]	Time 0.246 (0.638)	Data Time 0.001 (0.034)	Loss 2.3237 (2.2571)	Entropy 0.81455 (0.81617)	Top-1 acc 69.922 (70.118)	Top-5 acc 87.891 (87.670)	lr 0.00314
Train [93][1110/3239]	Time 2.643 (0.636)	Data Time 0.001 (0.034)	Loss 2.3613 (2.2570)	Entropy 0.81455 (0.81616)	Top-1 acc 70.312 (70.119)	Top-5 acc 85.156 (87.676)	lr 0.00314
Train [93][1120/3239]	Time 0.235 (0.633)	Data Time 0.001 (0.034)	Loss 2.2670 (2.2573)	Entropy 0.81446 (0.81614)	Top-1 acc 68.750 (70.110)	Top-5 acc 89.062 (87.670)	lr 0.00314
Train [93][1130/3239]	Time 0.334 (0.631)	Data Time 0.001 (0.033)	Loss 2.2639 (2.2571)	Entropy 0.81446 (0.81613)	Top-1 acc 69.922 (70.109)	Top-5 acc 85.547 (87.673)	lr 0.00313
Train [93][1140/3239]	Time 0.242 (0.630)	Data Time 0.001 (0.033)	Loss 2.2375 (2.2571)	Entropy 0.81442 (0.81611)	Top-1 acc 69.141 (70.112)	Top-5 acc 88.672 (87.677)	lr 0.00313
Train [93][1150/3239]	Time 0.245 (0.629)	Data Time 0.001 (0.033)	Loss 2.1783 (2.2569)	Entropy 0.81433 (0.81610)	Top-1 acc 73.047 (70.112)	Top-5 acc 90.234 (87.681)	lr 0.00313
Train [93][1160/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.033)	Loss 2.3612 (2.2578)	Entropy 0.81455 (0.81608)	Top-1 acc 69.141 (70.089)	Top-5 acc 86.328 (87.667)	lr 0.00313
Train [93][1170/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.032)	Loss 2.4471 (2.2579)	Entropy 0.81456 (0.81607)	Top-1 acc 68.750 (70.090)	Top-5 acc 85.156 (87.666)	lr 0.00313
Train [93][1180/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.032)	Loss 2.3016 (2.2576)	Entropy 0.81451 (0.81606)	Top-1 acc 68.359 (70.094)	Top-5 acc 86.719 (87.674)	lr 0.00313
Train [93][1190/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.032)	Loss 2.2409 (2.2575)	Entropy 0.81445 (0.81605)	Top-1 acc 72.266 (70.101)	Top-5 acc 89.844 (87.682)	lr 0.00313
Train [93][1200/3239]	Time 0.221 (0.622)	Data Time 0.001 (0.032)	Loss 2.3397 (2.2572)	Entropy 0.81445 (0.81603)	Top-1 acc 69.141 (70.094)	Top-5 acc 84.766 (87.693)	lr 0.00313
Train [93][1210/3239]	Time 0.259 (0.621)	Data Time 0.001 (0.031)	Loss 2.0640 (2.2566)	Entropy 0.81443 (0.81602)	Top-1 acc 73.438 (70.107)	Top-5 acc 91.406 (87.705)	lr 0.00313
Train [93][1220/3239]	Time 2.614 (0.620)	Data Time 0.002 (0.031)	Loss 2.1314 (2.2565)	Entropy 0.81443 (0.81601)	Top-1 acc 71.484 (70.106)	Top-5 acc 90.234 (87.710)	lr 0.00313
Train [93][1230/3239]	Time 0.282 (0.617)	Data Time 0.001 (0.031)	Loss 2.2914 (2.2559)	Entropy 0.81444 (0.81599)	Top-1 acc 72.656 (70.131)	Top-5 acc 85.156 (87.723)	lr 0.00313
Train [93][1240/3239]	Time 0.259 (0.616)	Data Time 0.002 (0.031)	Loss 2.4134 (2.2569)	Entropy 0.81442 (0.81598)	Top-1 acc 67.188 (70.109)	Top-5 acc 83.594 (87.707)	lr 0.00313
Train [93][1250/3239]	Time 0.237 (0.615)	Data Time 0.001 (0.030)	Loss 2.1165 (2.2569)	Entropy 0.81454 (0.81597)	Top-1 acc 73.047 (70.108)	Top-5 acc 89.062 (87.710)	lr 0.00313
Train [93][1260/3239]	Time 0.349 (0.615)	Data Time 0.003 (0.030)	Loss 2.2544 (2.2569)	Entropy 0.81442 (0.81596)	Top-1 acc 69.531 (70.107)	Top-5 acc 88.672 (87.713)	lr 0.00313
Train [93][1270/3239]	Time 0.220 (0.614)	Data Time 0.002 (0.030)	Loss 2.1161 (2.2568)	Entropy 0.81438 (0.81594)	Top-1 acc 74.219 (70.111)	Top-5 acc 89.062 (87.713)	lr 0.00313
Train [93][1280/3239]	Time 0.235 (0.613)	Data Time 0.001 (0.030)	Loss 2.2621 (2.2568)	Entropy 0.81425 (0.81593)	Top-1 acc 71.094 (70.114)	Top-5 acc 86.719 (87.714)	lr 0.00312
Train [93][1290/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.029)	Loss 2.1734 (2.2567)	Entropy 0.81416 (0.81592)	Top-1 acc 69.141 (70.110)	Top-5 acc 90.625 (87.717)	lr 0.00312
Train [93][1300/3239]	Time 0.247 (0.611)	Data Time 0.001 (0.029)	Loss 2.2584 (2.2566)	Entropy 0.81415 (0.81590)	Top-1 acc 69.922 (70.116)	Top-5 acc 89.062 (87.723)	lr 0.00312
Train [93][1310/3239]	Time 0.240 (0.610)	Data Time 0.001 (0.029)	Loss 2.0930 (2.2565)	Entropy 0.81414 (0.81589)	Top-1 acc 74.609 (70.116)	Top-5 acc 91.016 (87.721)	lr 0.00312
Train [93][1320/3239]	Time 0.258 (0.609)	Data Time 0.001 (0.029)	Loss 2.2423 (2.2566)	Entropy 0.81415 (0.81588)	Top-1 acc 71.875 (70.109)	Top-5 acc 85.547 (87.718)	lr 0.00312
Train [93][1330/3239]	Time 2.709 (0.608)	Data Time 0.002 (0.029)	Loss 2.2161 (2.2567)	Entropy 0.81415 (0.81587)	Top-1 acc 69.922 (70.104)	Top-5 acc 86.328 (87.713)	lr 0.00312
Train [93][1340/3239]	Time 0.228 (0.605)	Data Time 0.001 (0.028)	Loss 2.2013 (2.2567)	Entropy 0.81412 (0.81585)	Top-1 acc 71.484 (70.106)	Top-5 acc 89.844 (87.718)	lr 0.00312
Train [93][1350/3239]	Time 0.232 (0.604)	Data Time 0.001 (0.028)	Loss 2.4208 (2.2569)	Entropy 0.81416 (0.81584)	Top-1 acc 63.281 (70.101)	Top-5 acc 85.938 (87.715)	lr 0.00312
Train [93][1360/3239]	Time 0.238 (0.603)	Data Time 0.001 (0.028)	Loss 2.2911 (2.2569)	Entropy 0.81418 (0.81583)	Top-1 acc 71.484 (70.103)	Top-5 acc 87.109 (87.711)	lr 0.00312
Train [93][1370/3239]	Time 0.238 (0.642)	Data Time 0.002 (0.028)	Loss 2.1435 (2.2567)	Entropy 0.81412 (0.81582)	Top-1 acc 73.047 (70.109)	Top-5 acc 89.062 (87.713)	lr 0.00312
Train [93][1380/3239]	Time 0.245 (0.641)	Data Time 0.002 (0.028)	Loss 2.2566 (2.2567)	Entropy 0.81411 (0.81580)	Top-1 acc 66.797 (70.110)	Top-5 acc 87.500 (87.715)	lr 0.00312
Train [93][1390/3239]	Time 0.331 (0.640)	Data Time 0.002 (0.027)	Loss 2.3978 (2.2565)	Entropy 0.81414 (0.81579)	Top-1 acc 63.281 (70.115)	Top-5 acc 85.547 (87.716)	lr 0.00312
Train [93][1400/3239]	Time 0.235 (0.639)	Data Time 0.001 (0.027)	Loss 2.4004 (2.2571)	Entropy 0.81411 (0.81578)	Top-1 acc 68.750 (70.110)	Top-5 acc 83.203 (87.702)	lr 0.00312
Train [93][1410/3239]	Time 0.238 (0.637)	Data Time 0.001 (0.027)	Loss 2.0666 (2.2569)	Entropy 0.81412 (0.81577)	Top-1 acc 73.438 (70.110)	Top-5 acc 91.406 (87.707)	lr 0.00312
Train [93][1420/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.027)	Loss 2.1697 (2.2569)	Entropy 0.81405 (0.81576)	Top-1 acc 71.484 (70.104)	Top-5 acc 90.625 (87.710)	lr 0.00312
Train [93][1430/3239]	Time 0.328 (0.635)	Data Time 0.001 (0.027)	Loss 2.3610 (2.2572)	Entropy 0.81402 (0.81574)	Top-1 acc 68.750 (70.098)	Top-5 acc 84.766 (87.703)	lr 0.00311
Train [93][1440/3239]	Time 2.596 (0.634)	Data Time 0.001 (0.027)	Loss 2.2177 (2.2572)	Entropy 0.81402 (0.81573)	Top-1 acc 70.703 (70.099)	Top-5 acc 89.062 (87.701)	lr 0.00311
Train [93][1450/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.026)	Loss 2.3425 (2.2573)	Entropy 0.81398 (0.81572)	Top-1 acc 67.969 (70.095)	Top-5 acc 84.766 (87.701)	lr 0.00311
Train [93][1460/3239]	Time 0.243 (0.631)	Data Time 0.001 (0.026)	Loss 2.3607 (2.2575)	Entropy 0.81398 (0.81571)	Top-1 acc 68.750 (70.087)	Top-5 acc 83.594 (87.694)	lr 0.00311
Train [93][1470/3239]	Time 0.273 (0.630)	Data Time 0.001 (0.026)	Loss 2.0886 (2.2571)	Entropy 0.81395 (0.81570)	Top-1 acc 76.562 (70.093)	Top-5 acc 91.016 (87.702)	lr 0.00311
Train [93][1480/3239]	Time 0.231 (0.629)	Data Time 0.002 (0.026)	Loss 2.4296 (2.2567)	Entropy 0.81393 (0.81568)	Top-1 acc 68.359 (70.106)	Top-5 acc 82.812 (87.708)	lr 0.00311
Train [93][1490/3239]	Time 0.236 (0.628)	Data Time 0.001 (0.026)	Loss 2.1362 (2.2570)	Entropy 0.81388 (0.81567)	Top-1 acc 73.047 (70.103)	Top-5 acc 90.234 (87.697)	lr 0.00311
Train [93][1500/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.026)	Loss 2.2773 (2.2572)	Entropy 0.81383 (0.81566)	Top-1 acc 71.094 (70.095)	Top-5 acc 87.109 (87.694)	lr 0.00311
Train [93][1510/3239]	Time 0.230 (0.626)	Data Time 0.001 (0.025)	Loss 2.2019 (2.2571)	Entropy 0.81382 (0.81565)	Top-1 acc 69.922 (70.098)	Top-5 acc 88.281 (87.696)	lr 0.00311
Train [93][1520/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.025)	Loss 2.2525 (2.2573)	Entropy 0.81375 (0.81564)	Top-1 acc 71.094 (70.092)	Top-5 acc 89.062 (87.692)	lr 0.00311
Train [93][1530/3239]	Time 0.228 (0.624)	Data Time 0.001 (0.025)	Loss 2.2292 (2.2570)	Entropy 0.81370 (0.81562)	Top-1 acc 72.266 (70.099)	Top-5 acc 88.281 (87.696)	lr 0.00311
Train [93][1540/3239]	Time 0.241 (0.623)	Data Time 0.002 (0.025)	Loss 2.2653 (2.2571)	Entropy 0.81359 (0.81561)	Top-1 acc 69.922 (70.092)	Top-5 acc 86.719 (87.698)	lr 0.00311
Train [93][1550/3239]	Time 2.533 (0.622)	Data Time 0.002 (0.025)	Loss 2.1355 (2.2572)	Entropy 0.81359 (0.81560)	Top-1 acc 74.219 (70.087)	Top-5 acc 89.844 (87.696)	lr 0.00311
Train [93][1560/3239]	Time 0.338 (0.619)	Data Time 0.001 (0.025)	Loss 2.3044 (2.2573)	Entropy 0.81359 (0.81558)	Top-1 acc 71.484 (70.088)	Top-5 acc 89.062 (87.692)	lr 0.00311
Train [93][1570/3239]	Time 0.264 (0.619)	Data Time 0.001 (0.024)	Loss 2.2095 (2.2572)	Entropy 0.81360 (0.81557)	Top-1 acc 69.531 (70.085)	Top-5 acc 89.062 (87.698)	lr 0.00311
Train [93][1580/3239]	Time 0.238 (0.618)	Data Time 0.001 (0.024)	Loss 2.2145 (2.2574)	Entropy 0.81357 (0.81556)	Top-1 acc 72.656 (70.083)	Top-5 acc 88.281 (87.695)	lr 0.00310
Train [93][1590/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.024)	Loss 2.5430 (2.2577)	Entropy 0.81341 (0.81555)	Top-1 acc 62.891 (70.071)	Top-5 acc 81.250 (87.689)	lr 0.00310
Train [93][1600/3239]	Time 0.248 (0.616)	Data Time 0.001 (0.024)	Loss 2.1090 (2.2575)	Entropy 0.81363 (0.81553)	Top-1 acc 74.609 (70.078)	Top-5 acc 92.188 (87.695)	lr 0.00310
Train [93][1610/3239]	Time 0.221 (0.615)	Data Time 0.001 (0.024)	Loss 2.0843 (2.2576)	Entropy 0.81363 (0.81552)	Top-1 acc 75.391 (70.077)	Top-5 acc 89.453 (87.692)	lr 0.00310
Train [93][1620/3239]	Time 0.225 (0.614)	Data Time 0.001 (0.024)	Loss 2.3650 (2.2575)	Entropy 0.81360 (0.81551)	Top-1 acc 70.312 (70.080)	Top-5 acc 84.766 (87.691)	lr 0.00310
Train [93][1630/3239]	Time 0.227 (0.613)	Data Time 0.001 (0.024)	Loss 2.1958 (2.2576)	Entropy 0.81346 (0.81550)	Top-1 acc 73.438 (70.082)	Top-5 acc 87.891 (87.688)	lr 0.00310
Train [93][1640/3239]	Time 0.227 (0.612)	Data Time 0.001 (0.023)	Loss 2.0938 (2.2577)	Entropy 0.81348 (0.81549)	Top-1 acc 72.266 (70.083)	Top-5 acc 90.234 (87.685)	lr 0.00310
Train [93][1650/3239]	Time 0.240 (0.612)	Data Time 0.001 (0.023)	Loss 2.1938 (2.2577)	Entropy 0.81334 (0.81547)	Top-1 acc 69.922 (70.074)	Top-5 acc 88.281 (87.684)	lr 0.00310
Train [93][1660/3239]	Time 2.620 (0.611)	Data Time 0.001 (0.023)	Loss 2.1218 (2.2577)	Entropy 0.81334 (0.81546)	Top-1 acc 72.656 (70.081)	Top-5 acc 90.234 (87.686)	lr 0.00310
Train [93][1670/3239]	Time 0.246 (0.609)	Data Time 0.001 (0.023)	Loss 2.2289 (2.2575)	Entropy 0.81324 (0.81545)	Top-1 acc 72.266 (70.086)	Top-5 acc 86.719 (87.685)	lr 0.00310
Train [93][1680/3239]	Time 0.239 (0.608)	Data Time 0.001 (0.023)	Loss 2.1566 (2.2577)	Entropy 0.81321 (0.81543)	Top-1 acc 70.312 (70.080)	Top-5 acc 89.062 (87.679)	lr 0.00310
Train [93][1690/3239]	Time 0.231 (0.607)	Data Time 0.001 (0.023)	Loss 2.1793 (2.2577)	Entropy 0.81322 (0.81542)	Top-1 acc 71.094 (70.078)	Top-5 acc 88.672 (87.685)	lr 0.00310
Train [93][1700/3239]	Time 0.228 (0.606)	Data Time 0.001 (0.023)	Loss 2.2691 (2.2576)	Entropy 0.81321 (0.81541)	Top-1 acc 71.875 (70.082)	Top-5 acc 87.891 (87.686)	lr 0.00310
Train [93][1710/3239]	Time 0.273 (0.606)	Data Time 0.001 (0.023)	Loss 2.2403 (2.2581)	Entropy 0.81323 (0.81540)	Top-1 acc 70.703 (70.078)	Top-5 acc 87.109 (87.677)	lr 0.00310
Train [93][1720/3239]	Time 0.225 (0.605)	Data Time 0.001 (0.022)	Loss 2.3349 (2.2583)	Entropy 0.81319 (0.81538)	Top-1 acc 67.188 (70.071)	Top-5 acc 86.328 (87.670)	lr 0.00310
Train [93][1730/3239]	Time 0.331 (0.635)	Data Time 0.002 (0.022)	Loss 2.0642 (2.2582)	Entropy 0.81313 (0.81537)	Top-1 acc 76.562 (70.081)	Top-5 acc 92.969 (87.674)	lr 0.00309
Train [93][1740/3239]	Time 0.233 (0.634)	Data Time 0.002 (0.022)	Loss 2.0992 (2.2579)	Entropy 0.81312 (0.81536)	Top-1 acc 75.391 (70.085)	Top-5 acc 89.844 (87.677)	lr 0.00309
Train [93][1750/3239]	Time 0.235 (0.633)	Data Time 0.002 (0.022)	Loss 2.3861 (2.2582)	Entropy 0.81310 (0.81534)	Top-1 acc 66.797 (70.081)	Top-5 acc 84.766 (87.668)	lr 0.00309
Train [93][1760/3239]	Time 0.232 (0.633)	Data Time 0.001 (0.022)	Loss 2.4505 (2.2582)	Entropy 0.81308 (0.81533)	Top-1 acc 65.625 (70.084)	Top-5 acc 86.328 (87.671)	lr 0.00309
Train [93][1770/3239]	Time 2.621 (0.632)	Data Time 0.002 (0.022)	Loss 2.1742 (2.2583)	Entropy 0.81308 (0.81532)	Top-1 acc 73.828 (70.081)	Top-5 acc 87.109 (87.668)	lr 0.00309
Train [93][1780/3239]	Time 0.240 (0.629)	Data Time 0.001 (0.022)	Loss 2.2051 (2.2583)	Entropy 0.81308 (0.81531)	Top-1 acc 67.188 (70.080)	Top-5 acc 87.109 (87.672)	lr 0.00309
Train [93][1790/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.022)	Loss 2.4609 (2.2584)	Entropy 0.81303 (0.81529)	Top-1 acc 65.234 (70.077)	Top-5 acc 82.031 (87.672)	lr 0.00309
Train [93][1800/3239]	Time 0.229 (0.628)	Data Time 0.002 (0.022)	Loss 2.2497 (2.2585)	Entropy 0.81299 (0.81528)	Top-1 acc 70.312 (70.073)	Top-5 acc 86.719 (87.666)	lr 0.00309
Train [93][1810/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.021)	Loss 2.2277 (2.2586)	Entropy 0.81285 (0.81527)	Top-1 acc 71.875 (70.073)	Top-5 acc 86.719 (87.665)	lr 0.00309
Train [93][1820/3239]	Time 0.234 (0.626)	Data Time 0.001 (0.021)	Loss 2.2851 (2.2587)	Entropy 0.81280 (0.81525)	Top-1 acc 68.750 (70.070)	Top-5 acc 87.109 (87.665)	lr 0.00309
Train [93][1830/3239]	Time 0.228 (0.625)	Data Time 0.001 (0.021)	Loss 2.3704 (2.2585)	Entropy 0.81275 (0.81524)	Top-1 acc 67.578 (70.071)	Top-5 acc 86.719 (87.665)	lr 0.00309
Train [93][1840/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.021)	Loss 2.2027 (2.2588)	Entropy 0.81274 (0.81523)	Top-1 acc 71.875 (70.066)	Top-5 acc 89.062 (87.661)	lr 0.00309
Train [93][1850/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.021)	Loss 2.2907 (2.2590)	Entropy 0.81272 (0.81521)	Top-1 acc 67.969 (70.059)	Top-5 acc 87.500 (87.658)	lr 0.00309
Train [93][1860/3239]	Time 0.255 (0.623)	Data Time 0.001 (0.021)	Loss 2.5337 (2.2592)	Entropy 0.81269 (0.81520)	Top-1 acc 64.062 (70.057)	Top-5 acc 82.422 (87.654)	lr 0.00309
Train [93][1870/3239]	Time 0.225 (0.622)	Data Time 0.001 (0.021)	Loss 2.1752 (2.2590)	Entropy 0.81269 (0.81519)	Top-1 acc 73.438 (70.061)	Top-5 acc 87.109 (87.661)	lr 0.00309
Train [93][1880/3239]	Time 2.644 (0.621)	Data Time 0.001 (0.021)	Loss 2.2235 (2.2587)	Entropy 0.81269 (0.81517)	Top-1 acc 71.875 (70.067)	Top-5 acc 88.281 (87.667)	lr 0.00308
Train [93][1890/3239]	Time 0.237 (0.619)	Data Time 0.001 (0.021)	Loss 2.4482 (2.2589)	Entropy 0.81269 (0.81516)	Top-1 acc 68.359 (70.065)	Top-5 acc 83.594 (87.665)	lr 0.00308
Train [93][1900/3239]	Time 0.372 (0.619)	Data Time 0.002 (0.021)	Loss 2.0522 (2.2588)	Entropy 0.81267 (0.81515)	Top-1 acc 76.172 (70.066)	Top-5 acc 92.578 (87.668)	lr 0.00308
Train [93][1910/3239]	Time 0.244 (0.618)	Data Time 0.001 (0.020)	Loss 2.3350 (2.2589)	Entropy 0.81269 (0.81513)	Top-1 acc 69.922 (70.064)	Top-5 acc 82.812 (87.663)	lr 0.00308
Train [93][1920/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.020)	Loss 2.3699 (2.2591)	Entropy 0.81268 (0.81512)	Top-1 acc 66.797 (70.059)	Top-5 acc 83.594 (87.661)	lr 0.00308
Train [93][1930/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.020)	Loss 2.1335 (2.2589)	Entropy 0.81262 (0.81511)	Top-1 acc 76.172 (70.067)	Top-5 acc 87.891 (87.659)	lr 0.00308
Train [93][1940/3239]	Time 0.333 (0.616)	Data Time 0.001 (0.020)	Loss 2.2006 (2.2590)	Entropy 0.81259 (0.81510)	Top-1 acc 69.141 (70.066)	Top-5 acc 87.109 (87.656)	lr 0.00308
Train [93][1950/3239]	Time 0.237 (0.615)	Data Time 0.002 (0.020)	Loss 2.3139 (2.2589)	Entropy 0.81264 (0.81508)	Top-1 acc 69.922 (70.068)	Top-5 acc 85.938 (87.655)	lr 0.00308
Train [93][1960/3239]	Time 0.219 (0.615)	Data Time 0.001 (0.020)	Loss 2.2721 (2.2589)	Entropy 0.81265 (0.81507)	Top-1 acc 67.969 (70.069)	Top-5 acc 88.281 (87.657)	lr 0.00308
Train [93][1970/3239]	Time 0.222 (0.614)	Data Time 0.001 (0.020)	Loss 2.0850 (2.2588)	Entropy 0.81268 (0.81506)	Top-1 acc 73.438 (70.072)	Top-5 acc 92.969 (87.657)	lr 0.00308
Train [93][1980/3239]	Time 0.242 (0.613)	Data Time 0.001 (0.020)	Loss 2.2286 (2.2587)	Entropy 0.81264 (0.81505)	Top-1 acc 74.219 (70.079)	Top-5 acc 87.891 (87.659)	lr 0.00308
Train [93][1990/3239]	Time 2.605 (0.613)	Data Time 0.001 (0.020)	Loss 2.1998 (2.2589)	Entropy 0.81264 (0.81503)	Top-1 acc 73.047 (70.072)	Top-5 acc 87.891 (87.655)	lr 0.00308
Train [93][2000/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.020)	Loss 2.2044 (2.2590)	Entropy 0.81261 (0.81502)	Top-1 acc 69.141 (70.070)	Top-5 acc 89.062 (87.655)	lr 0.00308
Train [93][2010/3239]	Time 0.244 (0.610)	Data Time 0.001 (0.019)	Loss 2.3523 (2.2590)	Entropy 0.81261 (0.81501)	Top-1 acc 66.797 (70.071)	Top-5 acc 83.984 (87.653)	lr 0.00308
Train [93][2020/3239]	Time 0.228 (0.610)	Data Time 0.001 (0.019)	Loss 2.3541 (2.2592)	Entropy 0.81256 (0.81500)	Top-1 acc 70.703 (70.072)	Top-5 acc 85.156 (87.649)	lr 0.00308
Train [93][2030/3239]	Time 0.220 (0.609)	Data Time 0.001 (0.019)	Loss 2.2795 (2.2593)	Entropy 0.81256 (0.81499)	Top-1 acc 71.484 (70.070)	Top-5 acc 87.109 (87.648)	lr 0.00307
Train [93][2040/3239]	Time 0.231 (0.608)	Data Time 0.001 (0.019)	Loss 2.3360 (2.2596)	Entropy 0.81261 (0.81497)	Top-1 acc 70.312 (70.066)	Top-5 acc 85.156 (87.642)	lr 0.00307
Train [93][2050/3239]	Time 0.231 (0.608)	Data Time 0.001 (0.019)	Loss 2.2067 (2.2595)	Entropy 0.81256 (0.81496)	Top-1 acc 69.922 (70.071)	Top-5 acc 87.891 (87.641)	lr 0.00307
Train [93][2060/3239]	Time 0.234 (0.607)	Data Time 0.001 (0.019)	Loss 2.2650 (2.2597)	Entropy 0.81252 (0.81495)	Top-1 acc 67.969 (70.067)	Top-5 acc 87.891 (87.642)	lr 0.00307
Train [93][2070/3239]	Time 0.319 (0.606)	Data Time 0.001 (0.019)	Loss 2.3282 (2.2596)	Entropy 0.81264 (0.81494)	Top-1 acc 71.094 (70.073)	Top-5 acc 85.547 (87.644)	lr 0.00307
Train [93][2080/3239]	Time 0.234 (0.606)	Data Time 0.002 (0.019)	Loss 2.1624 (2.2596)	Entropy 0.81265 (0.81493)	Top-1 acc 69.922 (70.072)	Top-5 acc 90.625 (87.644)	lr 0.00307
Train [93][2090/3239]	Time 0.254 (0.631)	Data Time 0.003 (0.019)	Loss 2.5170 (2.2596)	Entropy 0.81255 (0.81492)	Top-1 acc 67.188 (70.075)	Top-5 acc 82.031 (87.643)	lr 0.00307
Train [93][2100/3239]	Time 2.645 (0.630)	Data Time 0.002 (0.019)	Loss 2.2640 (2.2599)	Entropy 0.81255 (0.81491)	Top-1 acc 72.266 (70.063)	Top-5 acc 87.891 (87.637)	lr 0.00307
Train [93][2110/3239]	Time 0.326 (0.628)	Data Time 0.002 (0.019)	Loss 2.4626 (2.2601)	Entropy 0.81259 (0.81489)	Top-1 acc 64.062 (70.060)	Top-5 acc 84.766 (87.634)	lr 0.00307
Train [93][2120/3239]	Time 0.224 (0.627)	Data Time 0.001 (0.019)	Loss 2.2641 (2.2599)	Entropy 0.81257 (0.81488)	Top-1 acc 69.141 (70.063)	Top-5 acc 87.109 (87.638)	lr 0.00307
Train [93][2130/3239]	Time 0.268 (0.627)	Data Time 0.001 (0.018)	Loss 2.1736 (2.2598)	Entropy 0.81248 (0.81487)	Top-1 acc 73.828 (70.063)	Top-5 acc 89.062 (87.643)	lr 0.00307
Train [93][2140/3239]	Time 0.222 (0.626)	Data Time 0.001 (0.018)	Loss 2.3783 (2.2596)	Entropy 0.81251 (0.81486)	Top-1 acc 63.672 (70.068)	Top-5 acc 85.938 (87.646)	lr 0.00307
Train [93][2150/3239]	Time 0.343 (0.625)	Data Time 0.001 (0.018)	Loss 2.3702 (2.2596)	Entropy 0.81253 (0.81485)	Top-1 acc 68.359 (70.070)	Top-5 acc 85.156 (87.645)	lr 0.00307
Train [93][2160/3239]	Time 0.219 (0.625)	Data Time 0.001 (0.018)	Loss 2.3070 (2.2595)	Entropy 0.81256 (0.81484)	Top-1 acc 70.312 (70.070)	Top-5 acc 86.328 (87.646)	lr 0.00307
Train [93][2170/3239]	Time 0.228 (0.624)	Data Time 0.001 (0.018)	Loss 2.2268 (2.2594)	Entropy 0.81270 (0.81483)	Top-1 acc 68.359 (70.070)	Top-5 acc 89.453 (87.648)	lr 0.00307
Train [93][2180/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.018)	Loss 2.3528 (2.2596)	Entropy 0.81274 (0.81482)	Top-1 acc 68.750 (70.069)	Top-5 acc 84.766 (87.645)	lr 0.00306
Train [93][2190/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.018)	Loss 2.3633 (2.2596)	Entropy 0.81272 (0.81481)	Top-1 acc 67.969 (70.066)	Top-5 acc 83.984 (87.644)	lr 0.00306
Train [93][2200/3239]	Time 0.211 (0.622)	Data Time 0.001 (0.018)	Loss 2.2392 (2.2596)	Entropy 0.81275 (0.81480)	Top-1 acc 69.922 (70.068)	Top-5 acc 88.281 (87.643)	lr 0.00306
Train [93][2210/3239]	Time 2.540 (0.621)	Data Time 0.001 (0.018)	Loss 2.2572 (2.2595)	Entropy 0.81275 (0.81479)	Top-1 acc 70.312 (70.074)	Top-5 acc 88.281 (87.643)	lr 0.00306
Train [93][2220/3239]	Time 0.229 (0.620)	Data Time 0.001 (0.018)	Loss 2.4407 (2.2596)	Entropy 0.81280 (0.81478)	Top-1 acc 67.188 (70.067)	Top-5 acc 84.375 (87.642)	lr 0.00306
Train [93][2230/3239]	Time 0.265 (0.619)	Data Time 0.002 (0.018)	Loss 2.1339 (2.2597)	Entropy 0.81281 (0.81477)	Top-1 acc 72.266 (70.062)	Top-5 acc 90.234 (87.641)	lr 0.00306
Train [93][2240/3239]	Time 0.333 (0.619)	Data Time 0.001 (0.018)	Loss 2.2333 (2.2597)	Entropy 0.81280 (0.81477)	Top-1 acc 70.312 (70.063)	Top-5 acc 89.453 (87.641)	lr 0.00306
Train [93][2250/3239]	Time 0.240 (0.618)	Data Time 0.002 (0.018)	Loss 2.2475 (2.2599)	Entropy 0.81279 (0.81476)	Top-1 acc 69.922 (70.056)	Top-5 acc 90.234 (87.639)	lr 0.00306
Train [93][2260/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.018)	Loss 2.1857 (2.2599)	Entropy 0.81278 (0.81475)	Top-1 acc 69.922 (70.055)	Top-5 acc 89.844 (87.638)	lr 0.00306
Train [93][2270/3239]	Time 0.288 (0.617)	Data Time 0.001 (0.017)	Loss 2.2689 (2.2601)	Entropy 0.81275 (0.81474)	Top-1 acc 70.312 (70.050)	Top-5 acc 87.500 (87.635)	lr 0.00306
Train [93][2280/3239]	Time 0.359 (0.616)	Data Time 0.001 (0.017)	Loss 2.2894 (2.2600)	Entropy 0.81278 (0.81473)	Top-1 acc 68.750 (70.056)	Top-5 acc 88.281 (87.639)	lr 0.00306
Train [93][2290/3239]	Time 0.223 (0.616)	Data Time 0.001 (0.017)	Loss 2.1829 (2.2600)	Entropy 0.81275 (0.81472)	Top-1 acc 71.875 (70.055)	Top-5 acc 88.281 (87.642)	lr 0.00306
Train [93][2300/3239]	Time 0.239 (0.615)	Data Time 0.001 (0.017)	Loss 2.1430 (2.2600)	Entropy 0.81274 (0.81471)	Top-1 acc 72.266 (70.054)	Top-5 acc 89.844 (87.640)	lr 0.00306
Train [93][2310/3239]	Time 0.223 (0.615)	Data Time 0.001 (0.017)	Loss 2.4634 (2.2602)	Entropy 0.81270 (0.81470)	Top-1 acc 63.281 (70.047)	Top-5 acc 84.375 (87.636)	lr 0.00306
Train [93][2320/3239]	Time 2.694 (0.614)	Data Time 0.001 (0.017)	Loss 2.1539 (2.2601)	Entropy 0.81270 (0.81470)	Top-1 acc 73.438 (70.049)	Top-5 acc 89.453 (87.638)	lr 0.00306
Train [93][2330/3239]	Time 0.239 (0.612)	Data Time 0.001 (0.017)	Loss 2.3474 (2.2600)	Entropy 0.81267 (0.81469)	Top-1 acc 69.531 (70.056)	Top-5 acc 85.547 (87.640)	lr 0.00305
Train [93][2340/3239]	Time 0.231 (0.612)	Data Time 0.001 (0.017)	Loss 2.2624 (2.2600)	Entropy 0.81270 (0.81468)	Top-1 acc 66.797 (70.053)	Top-5 acc 88.281 (87.638)	lr 0.00305
Train [93][2350/3239]	Time 0.227 (0.611)	Data Time 0.001 (0.017)	Loss 2.1239 (2.2596)	Entropy 0.81271 (0.81467)	Top-1 acc 72.656 (70.065)	Top-5 acc 90.234 (87.645)	lr 0.00305
Train [93][2360/3239]	Time 0.252 (0.611)	Data Time 0.001 (0.017)	Loss 2.2979 (2.2598)	Entropy 0.81270 (0.81466)	Top-1 acc 66.016 (70.060)	Top-5 acc 85.938 (87.642)	lr 0.00305
Train [93][2370/3239]	Time 0.220 (0.610)	Data Time 0.001 (0.017)	Loss 2.2742 (2.2598)	Entropy 0.81266 (0.81465)	Top-1 acc 72.266 (70.062)	Top-5 acc 87.500 (87.640)	lr 0.00305
Train [93][2380/3239]	Time 0.222 (0.610)	Data Time 0.001 (0.017)	Loss 2.1319 (2.2599)	Entropy 0.81262 (0.81465)	Top-1 acc 72.656 (70.058)	Top-5 acc 89.453 (87.638)	lr 0.00305
Train [93][2390/3239]	Time 0.230 (0.609)	Data Time 0.001 (0.017)	Loss 2.2689 (2.2600)	Entropy 0.81263 (0.81464)	Top-1 acc 73.047 (70.055)	Top-5 acc 85.547 (87.638)	lr 0.00305
Train [93][2400/3239]	Time 0.227 (0.608)	Data Time 0.001 (0.017)	Loss 2.3174 (2.2600)	Entropy 0.81258 (0.81463)	Top-1 acc 71.094 (70.056)	Top-5 acc 85.547 (87.638)	lr 0.00305
Train [93][2410/3239]	Time 0.227 (0.608)	Data Time 0.001 (0.017)	Loss 2.1975 (2.2599)	Entropy 0.81254 (0.81462)	Top-1 acc 72.266 (70.063)	Top-5 acc 89.062 (87.640)	lr 0.00305
Train [93][2420/3239]	Time 0.243 (0.607)	Data Time 0.001 (0.016)	Loss 2.3118 (2.2599)	Entropy 0.81251 (0.81461)	Top-1 acc 71.875 (70.062)	Top-5 acc 85.156 (87.638)	lr 0.00305
Train [93][2430/3239]	Time 2.537 (0.607)	Data Time 0.001 (0.016)	Loss 2.2944 (2.2599)	Entropy 0.81251 (0.81460)	Top-1 acc 65.625 (70.061)	Top-5 acc 88.281 (87.640)	lr 0.00305
Train [93][2440/3239]	Time 0.243 (0.605)	Data Time 0.001 (0.016)	Loss 2.2379 (2.2600)	Entropy 0.81247 (0.81459)	Top-1 acc 69.141 (70.060)	Top-5 acc 88.672 (87.638)	lr 0.00305
Train [93][2450/3239]	Time 0.331 (0.605)	Data Time 0.001 (0.016)	Loss 2.2060 (2.2600)	Entropy 0.81246 (0.81459)	Top-1 acc 72.656 (70.062)	Top-5 acc 88.672 (87.639)	lr 0.00305
Train [93][2460/3239]	Time 0.253 (0.626)	Data Time 0.002 (0.016)	Loss 2.2909 (2.2599)	Entropy 0.81242 (0.81458)	Top-1 acc 71.094 (70.064)	Top-5 acc 87.891 (87.642)	lr 0.00305
Train [93][2470/3239]	Time 0.232 (0.626)	Data Time 0.002 (0.016)	Loss 2.2177 (2.2600)	Entropy 0.81241 (0.81457)	Top-1 acc 68.359 (70.057)	Top-5 acc 88.281 (87.641)	lr 0.00305
Train [93][2480/3239]	Time 0.220 (0.625)	Data Time 0.001 (0.016)	Loss 2.2444 (2.2598)	Entropy 0.81242 (0.81456)	Top-1 acc 69.141 (70.061)	Top-5 acc 89.453 (87.645)	lr 0.00304
Train [93][2490/3239]	Time 0.328 (0.625)	Data Time 0.001 (0.016)	Loss 2.2111 (2.2596)	Entropy 0.81242 (0.81455)	Top-1 acc 70.703 (70.067)	Top-5 acc 87.891 (87.648)	lr 0.00304
Train [93][2500/3239]	Time 0.219 (0.624)	Data Time 0.001 (0.016)	Loss 2.2607 (2.2598)	Entropy 0.81242 (0.81454)	Top-1 acc 66.406 (70.062)	Top-5 acc 88.281 (87.645)	lr 0.00304
Train [93][2510/3239]	Time 0.233 (0.623)	Data Time 0.001 (0.016)	Loss 2.2867 (2.2597)	Entropy 0.81241 (0.81453)	Top-1 acc 70.703 (70.064)	Top-5 acc 87.109 (87.643)	lr 0.00304
Train [93][2520/3239]	Time 0.219 (0.623)	Data Time 0.001 (0.016)	Loss 2.2927 (2.2596)	Entropy 0.81242 (0.81452)	Top-1 acc 71.484 (70.067)	Top-5 acc 86.719 (87.645)	lr 0.00304
Train [93][2530/3239]	Time 0.320 (0.622)	Data Time 0.001 (0.016)	Loss 2.1810 (2.2595)	Entropy 0.81243 (0.81452)	Top-1 acc 71.094 (70.071)	Top-5 acc 87.500 (87.648)	lr 0.00304
Train [93][2540/3239]	Time 2.662 (0.622)	Data Time 0.001 (0.016)	Loss 2.4090 (2.2594)	Entropy 0.81243 (0.81451)	Top-1 acc 67.188 (70.071)	Top-5 acc 85.547 (87.650)	lr 0.00304
Train [93][2550/3239]	Time 0.248 (0.620)	Data Time 0.001 (0.016)	Loss 2.3104 (2.2594)	Entropy 0.81235 (0.81450)	Top-1 acc 70.312 (70.072)	Top-5 acc 85.156 (87.647)	lr 0.00304
Train [93][2560/3239]	Time 0.233 (0.620)	Data Time 0.002 (0.016)	Loss 2.3830 (2.2594)	Entropy 0.81225 (0.81449)	Top-1 acc 64.844 (70.070)	Top-5 acc 85.938 (87.649)	lr 0.00304
Train [93][2570/3239]	Time 0.242 (0.619)	Data Time 0.001 (0.016)	Loss 2.3895 (2.2595)	Entropy 0.81225 (0.81448)	Top-1 acc 66.016 (70.067)	Top-5 acc 85.547 (87.644)	lr 0.00304
Train [93][2580/3239]	Time 0.168 (0.619)	Data Time 0.001 (0.016)	Loss 2.3952 (2.2596)	Entropy 0.81221 (0.81447)	Top-1 acc 65.625 (70.059)	Top-5 acc 87.500 (87.646)	lr 0.00304
Train [93][2590/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.015)	Loss 2.1726 (2.2596)	Entropy 0.81213 (0.81446)	Top-1 acc 69.141 (70.055)	Top-5 acc 89.062 (87.645)	lr 0.00304
Train [93][2600/3239]	Time 0.236 (0.618)	Data Time 0.001 (0.015)	Loss 2.1750 (2.2598)	Entropy 0.81201 (0.81446)	Top-1 acc 74.609 (70.054)	Top-5 acc 88.281 (87.642)	lr 0.00304
Train [93][2610/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.015)	Loss 2.1453 (2.2597)	Entropy 0.81214 (0.81445)	Top-1 acc 69.531 (70.051)	Top-5 acc 90.234 (87.645)	lr 0.00304
Train [93][2620/3239]	Time 0.217 (0.617)	Data Time 0.002 (0.015)	Loss 2.3719 (2.2597)	Entropy 0.81214 (0.81444)	Top-1 acc 67.578 (70.049)	Top-5 acc 87.109 (87.645)	lr 0.00304
Train [93][2630/3239]	Time 0.210 (0.616)	Data Time 0.001 (0.015)	Loss 2.1120 (2.2598)	Entropy 0.81215 (0.81443)	Top-1 acc 73.438 (70.049)	Top-5 acc 90.625 (87.646)	lr 0.00304
Train [93][2640/3239]	Time 0.229 (0.616)	Data Time 0.001 (0.015)	Loss 2.2248 (2.2597)	Entropy 0.81213 (0.81442)	Top-1 acc 70.312 (70.054)	Top-5 acc 88.672 (87.646)	lr 0.00303
Train [93][2650/3239]	Time 0.223 (0.615)	Data Time 0.002 (0.015)	Loss 2.3079 (2.2597)	Entropy 0.81213 (0.81441)	Top-1 acc 71.094 (70.053)	Top-5 acc 85.938 (87.647)	lr 0.00303
Train [93][2660/3239]	Time 0.224 (0.615)	Data Time 0.001 (0.015)	Loss 2.2385 (2.2597)	Entropy 0.81206 (0.81440)	Top-1 acc 68.750 (70.053)	Top-5 acc 87.109 (87.647)	lr 0.00303
Train [93][2670/3239]	Time 0.225 (0.614)	Data Time 0.001 (0.015)	Loss 2.2730 (2.2598)	Entropy 0.81204 (0.81439)	Top-1 acc 68.750 (70.049)	Top-5 acc 89.062 (87.645)	lr 0.00303
Train [93][2680/3239]	Time 0.233 (0.614)	Data Time 0.001 (0.015)	Loss 2.2862 (2.2598)	Entropy 0.81203 (0.81439)	Top-1 acc 72.266 (70.046)	Top-5 acc 88.672 (87.644)	lr 0.00303
Train [93][2690/3239]	Time 0.228 (0.613)	Data Time 0.001 (0.015)	Loss 2.1717 (2.2599)	Entropy 0.81203 (0.81438)	Top-1 acc 71.484 (70.045)	Top-5 acc 91.797 (87.642)	lr 0.00303
Train [93][2700/3239]	Time 0.316 (0.612)	Data Time 0.001 (0.015)	Loss 2.2491 (2.2598)	Entropy 0.81206 (0.81437)	Top-1 acc 68.750 (70.042)	Top-5 acc 87.109 (87.643)	lr 0.00303
Train [93][2710/3239]	Time 0.229 (0.612)	Data Time 0.001 (0.015)	Loss 2.2514 (2.2600)	Entropy 0.81201 (0.81436)	Top-1 acc 70.703 (70.038)	Top-5 acc 88.281 (87.637)	lr 0.00303
Train [93][2720/3239]	Time 0.211 (0.612)	Data Time 0.001 (0.015)	Loss 2.4121 (2.2600)	Entropy 0.81200 (0.81435)	Top-1 acc 67.969 (70.039)	Top-5 acc 83.594 (87.636)	lr 0.00303
Train [93][2730/3239]	Time 0.260 (0.611)	Data Time 0.001 (0.015)	Loss 2.0832 (2.2598)	Entropy 0.81198 (0.81434)	Top-1 acc 74.609 (70.044)	Top-5 acc 89.062 (87.638)	lr 0.00303
Train [93][2740/3239]	Time 0.370 (0.611)	Data Time 0.001 (0.015)	Loss 2.3243 (2.2599)	Entropy 0.81196 (0.81433)	Top-1 acc 70.312 (70.041)	Top-5 acc 86.328 (87.635)	lr 0.00303
Train [93][2750/3239]	Time 0.227 (0.610)	Data Time 0.001 (0.015)	Loss 2.2604 (2.2599)	Entropy 0.81198 (0.81433)	Top-1 acc 69.922 (70.039)	Top-5 acc 87.109 (87.638)	lr 0.00303
Train [93][2760/3239]	Time 0.227 (0.610)	Data Time 0.001 (0.015)	Loss 2.2719 (2.2600)	Entropy 0.81195 (0.81432)	Top-1 acc 67.969 (70.037)	Top-5 acc 89.453 (87.637)	lr 0.00303
Train [93][2770/3239]	Time 0.232 (0.609)	Data Time 0.001 (0.015)	Loss 2.2449 (2.2600)	Entropy 0.81181 (0.81431)	Top-1 acc 73.047 (70.038)	Top-5 acc 85.938 (87.637)	lr 0.00303
Train [93][2780/3239]	Time 0.230 (0.609)	Data Time 0.001 (0.015)	Loss 2.1412 (2.2599)	Entropy 0.81182 (0.81430)	Top-1 acc 73.438 (70.042)	Top-5 acc 89.453 (87.638)	lr 0.00303
Train [93][2790/3239]	Time 0.229 (0.608)	Data Time 0.001 (0.014)	Loss 2.2943 (2.2600)	Entropy 0.81184 (0.81429)	Top-1 acc 67.578 (70.036)	Top-5 acc 87.500 (87.636)	lr 0.00302
Train [93][2800/3239]	Time 0.272 (0.627)	Data Time 0.003 (0.014)	Loss 2.2457 (2.2600)	Entropy 0.81179 (0.81428)	Top-1 acc 73.438 (70.038)	Top-5 acc 84.766 (87.635)	lr 0.00302
Train [93][2810/3239]	Time 0.268 (0.627)	Data Time 0.002 (0.014)	Loss 2.0792 (2.2598)	Entropy 0.81182 (0.81427)	Top-1 acc 74.609 (70.046)	Top-5 acc 90.625 (87.637)	lr 0.00302
Train [93][2820/3239]	Time 0.245 (0.626)	Data Time 0.002 (0.014)	Loss 2.3897 (2.2599)	Entropy 0.81174 (0.81426)	Top-1 acc 66.797 (70.043)	Top-5 acc 85.156 (87.635)	lr 0.00302
Train [93][2830/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.014)	Loss 2.3029 (2.2599)	Entropy 0.81151 (0.81425)	Top-1 acc 68.359 (70.044)	Top-5 acc 87.109 (87.634)	lr 0.00302
Train [93][2840/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.014)	Loss 2.1646 (2.2600)	Entropy 0.81149 (0.81425)	Top-1 acc 72.656 (70.039)	Top-5 acc 89.453 (87.635)	lr 0.00302
Train [93][2850/3239]	Time 0.255 (0.625)	Data Time 0.001 (0.014)	Loss 2.2420 (2.2599)	Entropy 0.81145 (0.81424)	Top-1 acc 69.531 (70.042)	Top-5 acc 87.500 (87.638)	lr 0.00302
Train [93][2860/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.014)	Loss 2.3717 (2.2601)	Entropy 0.81146 (0.81423)	Top-1 acc 66.797 (70.041)	Top-5 acc 86.328 (87.635)	lr 0.00302
Train [93][2870/3239]	Time 0.311 (0.624)	Data Time 0.001 (0.014)	Loss 2.0421 (2.2600)	Entropy 0.81149 (0.81422)	Top-1 acc 76.172 (70.039)	Top-5 acc 90.234 (87.637)	lr 0.00302
Train [93][2880/3239]	Time 0.278 (0.623)	Data Time 0.001 (0.014)	Loss 2.3368 (2.2599)	Entropy 0.81145 (0.81421)	Top-1 acc 69.922 (70.039)	Top-5 acc 87.500 (87.642)	lr 0.00302
Train [93][2890/3239]	Time 0.241 (0.623)	Data Time 0.001 (0.014)	Loss 2.2672 (2.2598)	Entropy 0.81142 (0.81420)	Top-1 acc 64.453 (70.041)	Top-5 acc 90.234 (87.644)	lr 0.00302
Train [93][2900/3239]	Time 0.232 (0.622)	Data Time 0.001 (0.014)	Loss 2.3264 (2.2599)	Entropy 0.81149 (0.81419)	Top-1 acc 69.922 (70.039)	Top-5 acc 86.719 (87.643)	lr 0.00302
Train [93][2910/3239]	Time 0.342 (0.622)	Data Time 0.002 (0.014)	Loss 2.3068 (2.2599)	Entropy 0.81147 (0.81418)	Top-1 acc 73.047 (70.042)	Top-5 acc 87.500 (87.646)	lr 0.00302
Train [93][2920/3239]	Time 0.218 (0.621)	Data Time 0.001 (0.014)	Loss 2.2146 (2.2599)	Entropy 0.81149 (0.81417)	Top-1 acc 70.703 (70.040)	Top-5 acc 87.891 (87.646)	lr 0.00302
Train [93][2930/3239]	Time 0.219 (0.621)	Data Time 0.001 (0.014)	Loss 2.1781 (2.2599)	Entropy 0.81148 (0.81416)	Top-1 acc 71.875 (70.042)	Top-5 acc 88.281 (87.644)	lr 0.00302
Train [93][2940/3239]	Time 0.223 (0.620)	Data Time 0.001 (0.014)	Loss 2.3061 (2.2599)	Entropy 0.81149 (0.81415)	Top-1 acc 67.969 (70.039)	Top-5 acc 85.938 (87.642)	lr 0.00301
Train [93][2950/3239]	Time 0.313 (0.620)	Data Time 0.001 (0.014)	Loss 2.3865 (2.2600)	Entropy 0.81141 (0.81414)	Top-1 acc 62.891 (70.036)	Top-5 acc 87.109 (87.642)	lr 0.00301
Train [93][2960/3239]	Time 0.228 (0.619)	Data Time 0.001 (0.014)	Loss 2.3150 (2.2601)	Entropy 0.81143 (0.81413)	Top-1 acc 70.312 (70.035)	Top-5 acc 85.156 (87.640)	lr 0.00301
Train [93][2970/3239]	Time 0.212 (0.619)	Data Time 0.001 (0.014)	Loss 2.2639 (2.2601)	Entropy 0.81139 (0.81412)	Top-1 acc 69.922 (70.036)	Top-5 acc 89.453 (87.639)	lr 0.00301
Train [93][2980/3239]	Time 0.225 (0.618)	Data Time 0.001 (0.014)	Loss 2.4502 (2.2601)	Entropy 0.81133 (0.81411)	Top-1 acc 64.062 (70.035)	Top-5 acc 82.422 (87.640)	lr 0.00301
Train [93][2990/3239]	Time 0.215 (0.617)	Data Time 0.001 (0.014)	Loss 2.0845 (2.2600)	Entropy 0.81127 (0.81410)	Top-1 acc 73.828 (70.037)	Top-5 acc 90.234 (87.641)	lr 0.00301
Train [93][3000/3239]	Time 0.263 (0.617)	Data Time 0.001 (0.014)	Loss 2.2224 (2.2599)	Entropy 0.81123 (0.81410)	Top-1 acc 68.750 (70.036)	Top-5 acc 87.109 (87.644)	lr 0.00301
Train [93][3010/3239]	Time 0.271 (0.617)	Data Time 0.001 (0.014)	Loss 2.1930 (2.2599)	Entropy 0.81119 (0.81409)	Top-1 acc 72.656 (70.037)	Top-5 acc 89.844 (87.644)	lr 0.00301
Train [93][3020/3239]	Time 0.226 (0.616)	Data Time 0.001 (0.014)	Loss 2.2401 (2.2598)	Entropy 0.81126 (0.81408)	Top-1 acc 72.266 (70.043)	Top-5 acc 86.328 (87.644)	lr 0.00301
Train [93][3030/3239]	Time 0.272 (0.616)	Data Time 0.001 (0.013)	Loss 2.1541 (2.2597)	Entropy 0.81124 (0.81407)	Top-1 acc 73.828 (70.044)	Top-5 acc 89.453 (87.647)	lr 0.00301
Train [93][3040/3239]	Time 0.222 (0.615)	Data Time 0.001 (0.013)	Loss 2.3631 (2.2597)	Entropy 0.81124 (0.81406)	Top-1 acc 69.141 (70.042)	Top-5 acc 84.375 (87.646)	lr 0.00301
Train [93][3050/3239]	Time 0.223 (0.615)	Data Time 0.001 (0.013)	Loss 2.1976 (2.2596)	Entropy 0.81123 (0.81405)	Top-1 acc 71.484 (70.044)	Top-5 acc 88.281 (87.647)	lr 0.00301
Train [93][3060/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.013)	Loss 2.1634 (2.2597)	Entropy 0.81119 (0.81404)	Top-1 acc 72.656 (70.042)	Top-5 acc 88.281 (87.646)	lr 0.00301
Train [93][3070/3239]	Time 0.216 (0.614)	Data Time 0.001 (0.013)	Loss 2.2711 (2.2597)	Entropy 0.81116 (0.81403)	Top-1 acc 71.484 (70.040)	Top-5 acc 86.719 (87.648)	lr 0.00301
Train [93][3080/3239]	Time 0.329 (0.613)	Data Time 0.001 (0.013)	Loss 2.1296 (2.2596)	Entropy 0.81110 (0.81402)	Top-1 acc 75.391 (70.044)	Top-5 acc 89.062 (87.648)	lr 0.00301
Train [93][3090/3239]	Time 0.239 (0.613)	Data Time 0.001 (0.013)	Loss 2.2507 (2.2598)	Entropy 0.81110 (0.81401)	Top-1 acc 69.531 (70.039)	Top-5 acc 89.844 (87.649)	lr 0.00300
Train [93][3100/3239]	Time 0.229 (0.613)	Data Time 0.001 (0.013)	Loss 2.3307 (2.2600)	Entropy 0.81112 (0.81400)	Top-1 acc 68.750 (70.037)	Top-5 acc 86.719 (87.647)	lr 0.00300
Train [93][3110/3239]	Time 0.262 (0.612)	Data Time 0.001 (0.013)	Loss 2.1416 (2.2598)	Entropy 0.81103 (0.81399)	Top-1 acc 71.875 (70.038)	Top-5 acc 89.844 (87.648)	lr 0.00300
Train [93][3120/3239]	Time 0.356 (0.612)	Data Time 0.001 (0.013)	Loss 2.1665 (2.2599)	Entropy 0.81091 (0.81398)	Top-1 acc 71.484 (70.034)	Top-5 acc 87.891 (87.646)	lr 0.00300
Train [93][3130/3239]	Time 0.312 (0.627)	Data Time 0.004 (0.013)	Loss 2.2728 (2.2599)	Entropy 0.81091 (0.81397)	Top-1 acc 68.750 (70.034)	Top-5 acc 88.281 (87.645)	lr 0.00300
Train [93][3140/3239]	Time 0.212 (0.627)	Data Time 0.002 (0.013)	Loss 2.2352 (2.2601)	Entropy 0.81089 (0.81396)	Top-1 acc 70.703 (70.029)	Top-5 acc 89.062 (87.642)	lr 0.00300
Train [93][3150/3239]	Time 0.250 (0.626)	Data Time 0.002 (0.013)	Loss 2.2019 (2.2601)	Entropy 0.81093 (0.81395)	Top-1 acc 71.484 (70.030)	Top-5 acc 87.891 (87.642)	lr 0.00300
Train [93][3160/3239]	Time 0.249 (0.626)	Data Time 0.001 (0.013)	Loss 2.3301 (2.2602)	Entropy 0.81095 (0.81394)	Top-1 acc 64.453 (70.026)	Top-5 acc 87.109 (87.640)	lr 0.00300
Train [93][3170/3239]	Time 0.237 (0.626)	Data Time 0.001 (0.013)	Loss 2.3232 (2.2603)	Entropy 0.81093 (0.81393)	Top-1 acc 69.922 (70.025)	Top-5 acc 87.109 (87.640)	lr 0.00300
Train [93][3180/3239]	Time 0.231 (0.625)	Data Time 0.000 (0.013)	Loss 2.2883 (2.2603)	Entropy 0.81090 (0.81393)	Top-1 acc 71.875 (70.023)	Top-5 acc 88.281 (87.639)	lr 0.00300
Train [93][3190/3239]	Time 0.234 (0.624)	Data Time 0.000 (0.013)	Loss 2.3057 (2.2602)	Entropy 0.81074 (0.81392)	Top-1 acc 70.312 (70.026)	Top-5 acc 87.891 (87.640)	lr 0.00300
Train [93][3200/3239]	Time 0.229 (0.624)	Data Time 0.000 (0.013)	Loss 2.1546 (2.2603)	Entropy 0.81063 (0.81391)	Top-1 acc 74.609 (70.024)	Top-5 acc 87.891 (87.639)	lr 0.00300
Train [93][3210/3239]	Time 0.230 (0.623)	Data Time 0.000 (0.013)	Loss 2.3556 (2.2603)	Entropy 0.81061 (0.81390)	Top-1 acc 69.922 (70.023)	Top-5 acc 85.547 (87.641)	lr 0.00300
Train [93][3220/3239]	Time 0.236 (0.623)	Data Time 0.000 (0.013)	Loss 2.2691 (2.2601)	Entropy 0.81058 (0.81389)	Top-1 acc 69.531 (70.026)	Top-5 acc 87.891 (87.644)	lr 0.00300
Train [93][3230/3239]	Time 0.239 (0.622)	Data Time 0.000 (0.013)	Loss 2.3435 (2.2602)	Entropy 0.81049 (0.81387)	Top-1 acc 69.922 (70.025)	Top-5 acc 82.812 (87.642)	lr 0.00300
Train [93][3239/3239]	Time 2.330 (0.622)	Data Time 0.000 (0.013)	Loss 2.1262 (2.2601)	Entropy 0.81049 (0.81387)	Top-1 acc 72.840 (70.028)	Top-5 acc 91.358 (87.644)	lr 0.00299
==========Valid [93/120]	loss 1.242	top-1 acc 71.724 (71.724)	top-5 acc 89.368	Train top-1 70.028	top-5 87.644	Entropy 0.81049	Latency-None: 0.000ms	Flops: 546.53M
Train [94][0/3239]	Time 42.143 (42.143)	Data Time 37.335 (37.335)	Loss 2.4143 (2.4143)	Entropy 0.81045 (0.81045)	Top-1 acc 62.109 (62.109)	Top-5 acc 85.547 (85.547)	lr 0.00299
Train [94][10/3239]	Time 2.864 (4.357)	Data Time 0.002 (3.396)	Loss 2.3277 (2.2518)	Entropy 0.81045 (0.81045)	Top-1 acc 67.969 (69.602)	Top-5 acc 87.891 (87.749)	lr 0.00299
Train [94][20/3239]	Time 0.236 (2.403)	Data Time 0.001 (1.779)	Loss 2.4428 (2.2627)	Entropy 0.81045 (0.81045)	Top-1 acc 67.578 (69.717)	Top-5 acc 83.984 (87.723)	lr 0.00299
Train [94][30/3239]	Time 0.226 (1.783)	Data Time 0.001 (1.206)	Loss 2.2508 (2.2885)	Entropy 0.81040 (0.81044)	Top-1 acc 71.484 (69.178)	Top-5 acc 89.062 (87.399)	lr 0.00299
Train [94][40/3239]	Time 0.244 (1.467)	Data Time 0.001 (0.912)	Loss 2.2663 (2.2842)	Entropy 0.81038 (0.81042)	Top-1 acc 73.047 (69.131)	Top-5 acc 87.891 (87.605)	lr 0.00299
Train [94][50/3239]	Time 0.245 (1.273)	Data Time 0.001 (0.734)	Loss 2.4684 (2.2690)	Entropy 0.81037 (0.81041)	Top-1 acc 66.406 (69.539)	Top-5 acc 85.547 (87.883)	lr 0.00299
Train [94][60/3239]	Time 0.237 (1.147)	Data Time 0.001 (0.614)	Loss 2.1876 (2.2641)	Entropy 0.81037 (0.81041)	Top-1 acc 72.266 (69.666)	Top-5 acc 89.453 (87.891)	lr 0.00299
Train [94][70/3239]	Time 0.236 (1.052)	Data Time 0.001 (0.527)	Loss 2.2431 (2.2635)	Entropy 0.81016 (0.81039)	Top-1 acc 71.094 (69.746)	Top-5 acc 86.328 (87.902)	lr 0.00299
Train [94][80/3239]	Time 0.218 (0.980)	Data Time 0.001 (0.462)	Loss 2.1348 (2.2594)	Entropy 0.81019 (0.81036)	Top-1 acc 72.266 (69.816)	Top-5 acc 89.062 (87.948)	lr 0.00299
Train [94][90/3239]	Time 0.216 (0.925)	Data Time 0.001 (0.412)	Loss 2.3046 (2.2650)	Entropy 0.81021 (0.81034)	Top-1 acc 67.969 (69.686)	Top-5 acc 87.109 (87.805)	lr 0.00299
Train [94][100/3239]	Time 0.363 (0.883)	Data Time 0.001 (0.371)	Loss 2.1866 (2.2677)	Entropy 0.81017 (0.81033)	Top-1 acc 72.656 (69.713)	Top-5 acc 87.891 (87.740)	lr 0.00299
Train [94][110/3239]	Time 0.216 (0.848)	Data Time 0.001 (0.338)	Loss 2.3424 (2.2623)	Entropy 0.81019 (0.81031)	Top-1 acc 68.750 (69.985)	Top-5 acc 86.328 (87.834)	lr 0.00299
Train [94][120/3239]	Time 2.600 (0.818)	Data Time 0.001 (0.310)	Loss 2.2078 (2.2629)	Entropy 0.81019 (0.81030)	Top-1 acc 70.312 (70.012)	Top-5 acc 87.891 (87.865)	lr 0.00299
Train [94][130/3239]	Time 0.247 (0.775)	Data Time 0.001 (0.287)	Loss 2.3835 (2.2621)	Entropy 0.81018 (0.81029)	Top-1 acc 69.141 (70.047)	Top-5 acc 83.203 (87.858)	lr 0.00299
Train [94][140/3239]	Time 0.350 (0.756)	Data Time 0.001 (0.266)	Loss 2.1574 (2.2599)	Entropy 0.81017 (0.81029)	Top-1 acc 72.266 (70.024)	Top-5 acc 90.625 (87.893)	lr 0.00299
Train [94][150/3239]	Time 0.180 (0.736)	Data Time 0.001 (0.249)	Loss 2.2237 (2.2578)	Entropy 0.81017 (0.81028)	Top-1 acc 69.141 (70.056)	Top-5 acc 91.016 (87.955)	lr 0.00299
Train [94][160/3239]	Time 0.219 (0.720)	Data Time 0.001 (0.234)	Loss 2.2266 (2.2549)	Entropy 0.81019 (0.81027)	Top-1 acc 71.484 (70.109)	Top-5 acc 87.891 (87.983)	lr 0.00298
Train [94][170/3239]	Time 0.222 (0.705)	Data Time 0.001 (0.220)	Loss 2.1654 (2.2535)	Entropy 0.81009 (0.81026)	Top-1 acc 74.219 (70.230)	Top-5 acc 88.281 (87.989)	lr 0.00298
Train [94][180/3239]	Time 0.276 (0.692)	Data Time 0.001 (0.208)	Loss 2.2628 (2.2520)	Entropy 0.81008 (0.81025)	Top-1 acc 70.312 (70.261)	Top-5 acc 85.547 (87.999)	lr 0.00298
Train [94][190/3239]	Time 0.253 (0.681)	Data Time 0.001 (0.197)	Loss 2.4096 (2.2528)	Entropy 0.80992 (0.81024)	Top-1 acc 65.625 (70.223)	Top-5 acc 84.375 (87.972)	lr 0.00298
Train [94][200/3239]	Time 0.239 (0.671)	Data Time 0.001 (0.187)	Loss 2.4069 (2.2523)	Entropy 0.80993 (0.81023)	Top-1 acc 65.234 (70.254)	Top-5 acc 84.766 (87.953)	lr 0.00298
Train [94][210/3239]	Time 0.227 (0.662)	Data Time 0.001 (0.179)	Loss 2.2772 (2.2519)	Entropy 0.80991 (0.81021)	Top-1 acc 69.141 (70.263)	Top-5 acc 88.672 (87.972)	lr 0.00298
Train [94][220/3239]	Time 0.270 (0.654)	Data Time 0.002 (0.171)	Loss 2.1530 (2.2508)	Entropy 0.80998 (0.81020)	Top-1 acc 73.047 (70.302)	Top-5 acc 89.844 (87.984)	lr 0.00298
Train [94][230/3239]	Time 2.537 (0.647)	Data Time 0.001 (0.163)	Loss 2.2293 (2.2523)	Entropy 0.80998 (0.81019)	Top-1 acc 73.438 (70.314)	Top-5 acc 89.062 (87.941)	lr 0.00298
Train [94][240/3239]	Time 0.214 (0.630)	Data Time 0.001 (0.157)	Loss 2.2575 (2.2525)	Entropy 0.81135 (0.81024)	Top-1 acc 69.141 (70.347)	Top-5 acc 88.672 (87.954)	lr 0.00298
Train [94][250/3239]	Time 0.240 (0.841)	Data Time 0.002 (0.150)	Loss 2.2540 (2.2528)	Entropy 0.81128 (0.81028)	Top-1 acc 70.312 (70.297)	Top-5 acc 88.672 (87.984)	lr 0.00298
Train [94][260/3239]	Time 0.230 (0.828)	Data Time 0.002 (0.145)	Loss 2.1807 (2.2534)	Entropy 0.81128 (0.81032)	Top-1 acc 74.609 (70.283)	Top-5 acc 91.797 (87.950)	lr 0.00298
Train [94][270/3239]	Time 0.223 (0.815)	Data Time 0.002 (0.139)	Loss 2.2096 (2.2510)	Entropy 0.81124 (0.81035)	Top-1 acc 67.969 (70.334)	Top-5 acc 88.281 (87.973)	lr 0.00298
Train [94][280/3239]	Time 0.240 (0.803)	Data Time 0.001 (0.135)	Loss 2.1091 (2.2502)	Entropy 0.81117 (0.81038)	Top-1 acc 74.609 (70.368)	Top-5 acc 87.500 (87.948)	lr 0.00298
Train [94][290/3239]	Time 0.225 (0.791)	Data Time 0.001 (0.130)	Loss 2.1199 (2.2491)	Entropy 0.81110 (0.81041)	Top-1 acc 74.609 (70.390)	Top-5 acc 91.797 (87.967)	lr 0.00298
Train [94][300/3239]	Time 0.226 (0.781)	Data Time 0.001 (0.126)	Loss 2.1388 (2.2477)	Entropy 0.81109 (0.81043)	Top-1 acc 75.781 (70.420)	Top-5 acc 90.234 (87.994)	lr 0.00298
Train [94][310/3239]	Time 0.324 (0.771)	Data Time 0.001 (0.122)	Loss 2.0632 (2.2474)	Entropy 0.81109 (0.81045)	Top-1 acc 75.391 (70.414)	Top-5 acc 90.625 (88.002)	lr 0.00297
Train [94][320/3239]	Time 0.226 (0.762)	Data Time 0.001 (0.118)	Loss 2.0399 (2.2470)	Entropy 0.81102 (0.81047)	Top-1 acc 74.219 (70.407)	Top-5 acc 90.234 (87.994)	lr 0.00297
Train [94][330/3239]	Time 0.226 (0.753)	Data Time 0.001 (0.115)	Loss 2.4024 (2.2486)	Entropy 0.81085 (0.81049)	Top-1 acc 63.672 (70.344)	Top-5 acc 83.203 (87.953)	lr 0.00297
Train [94][340/3239]	Time 2.576 (0.745)	Data Time 0.001 (0.111)	Loss 2.1279 (2.2473)	Entropy 0.81085 (0.81050)	Top-1 acc 75.000 (70.390)	Top-5 acc 89.844 (87.964)	lr 0.00297
Train [94][350/3239]	Time 0.340 (0.731)	Data Time 0.001 (0.108)	Loss 2.1249 (2.2461)	Entropy 0.81077 (0.81051)	Top-1 acc 76.172 (70.424)	Top-5 acc 89.062 (87.989)	lr 0.00297
Train [94][360/3239]	Time 0.255 (0.724)	Data Time 0.001 (0.105)	Loss 2.4296 (2.2464)	Entropy 0.81076 (0.81051)	Top-1 acc 65.234 (70.426)	Top-5 acc 86.719 (87.970)	lr 0.00297
Train [94][370/3239]	Time 0.240 (0.717)	Data Time 0.001 (0.102)	Loss 2.1791 (2.2452)	Entropy 0.81077 (0.81052)	Top-1 acc 73.438 (70.458)	Top-5 acc 89.062 (87.977)	lr 0.00297
Train [94][380/3239]	Time 0.270 (0.711)	Data Time 0.002 (0.100)	Loss 2.4506 (2.2452)	Entropy 0.81079 (0.81053)	Top-1 acc 67.578 (70.482)	Top-5 acc 83.984 (87.986)	lr 0.00297
Train [94][390/3239]	Time 0.226 (0.705)	Data Time 0.001 (0.097)	Loss 2.2325 (2.2460)	Entropy 0.81079 (0.81053)	Top-1 acc 72.266 (70.451)	Top-5 acc 88.672 (87.978)	lr 0.00297
Train [94][400/3239]	Time 0.232 (0.700)	Data Time 0.001 (0.095)	Loss 2.1137 (2.2464)	Entropy 0.81072 (0.81054)	Top-1 acc 74.609 (70.472)	Top-5 acc 90.625 (87.960)	lr 0.00297
Train [94][410/3239]	Time 0.230 (0.694)	Data Time 0.001 (0.093)	Loss 2.2755 (2.2461)	Entropy 0.81064 (0.81054)	Top-1 acc 71.094 (70.468)	Top-5 acc 87.109 (87.976)	lr 0.00297
Train [94][420/3239]	Time 0.228 (0.689)	Data Time 0.001 (0.090)	Loss 2.2326 (2.2454)	Entropy 0.81062 (0.81054)	Top-1 acc 69.141 (70.501)	Top-5 acc 89.062 (87.989)	lr 0.00297
Train [94][430/3239]	Time 0.212 (0.684)	Data Time 0.001 (0.088)	Loss 2.1483 (2.2455)	Entropy 0.81059 (0.81055)	Top-1 acc 75.391 (70.516)	Top-5 acc 89.844 (87.985)	lr 0.00297
Train [94][440/3239]	Time 0.319 (0.680)	Data Time 0.001 (0.086)	Loss 2.2742 (2.2452)	Entropy 0.81066 (0.81055)	Top-1 acc 69.141 (70.518)	Top-5 acc 87.500 (87.986)	lr 0.00297
Train [94][450/3239]	Time 2.490 (0.675)	Data Time 0.001 (0.084)	Loss 2.2617 (2.2450)	Entropy 0.81066 (0.81055)	Top-1 acc 67.578 (70.511)	Top-5 acc 87.109 (87.993)	lr 0.00297
Train [94][460/3239]	Time 0.221 (0.666)	Data Time 0.001 (0.083)	Loss 2.3246 (2.2461)	Entropy 0.81069 (0.81055)	Top-1 acc 67.578 (70.462)	Top-5 acc 87.109 (87.990)	lr 0.00296
Train [94][470/3239]	Time 0.244 (0.661)	Data Time 0.001 (0.081)	Loss 2.3814 (2.2464)	Entropy 0.81068 (0.81056)	Top-1 acc 69.141 (70.439)	Top-5 acc 85.547 (87.983)	lr 0.00296
Train [94][480/3239]	Time 0.335 (0.658)	Data Time 0.001 (0.079)	Loss 2.1487 (2.2455)	Entropy 0.81061 (0.81056)	Top-1 acc 72.266 (70.455)	Top-5 acc 89.062 (87.991)	lr 0.00296
Train [94][490/3239]	Time 0.231 (0.654)	Data Time 0.001 (0.078)	Loss 2.1558 (2.2460)	Entropy 0.81058 (0.81056)	Top-1 acc 73.047 (70.445)	Top-5 acc 89.062 (87.980)	lr 0.00296
Train [94][500/3239]	Time 0.245 (0.650)	Data Time 0.001 (0.076)	Loss 2.2265 (2.2456)	Entropy 0.81054 (0.81056)	Top-1 acc 72.656 (70.472)	Top-5 acc 89.453 (87.998)	lr 0.00296
Train [94][510/3239]	Time 0.225 (0.647)	Data Time 0.001 (0.075)	Loss 2.0364 (2.2461)	Entropy 0.81052 (0.81056)	Top-1 acc 76.953 (70.463)	Top-5 acc 90.625 (87.990)	lr 0.00296
Train [94][520/3239]	Time 0.239 (0.644)	Data Time 0.001 (0.073)	Loss 2.0955 (2.2456)	Entropy 0.81052 (0.81056)	Top-1 acc 74.609 (70.462)	Top-5 acc 90.625 (87.989)	lr 0.00296
Train [94][530/3239]	Time 0.226 (0.640)	Data Time 0.001 (0.072)	Loss 2.2407 (2.2452)	Entropy 0.81045 (0.81056)	Top-1 acc 70.703 (70.474)	Top-5 acc 89.062 (87.991)	lr 0.00296
Train [94][540/3239]	Time 0.221 (0.637)	Data Time 0.001 (0.071)	Loss 2.4902 (2.2465)	Entropy 0.81043 (0.81055)	Top-1 acc 65.234 (70.443)	Top-5 acc 81.641 (87.964)	lr 0.00296
Train [94][550/3239]	Time 0.216 (0.634)	Data Time 0.001 (0.069)	Loss 2.2909 (2.2471)	Entropy 0.81042 (0.81055)	Top-1 acc 71.875 (70.437)	Top-5 acc 88.672 (87.946)	lr 0.00296
Train [94][560/3239]	Time 2.587 (0.631)	Data Time 0.001 (0.068)	Loss 2.2704 (2.2469)	Entropy 0.81042 (0.81055)	Top-1 acc 69.531 (70.436)	Top-5 acc 88.672 (87.959)	lr 0.00296
Train [94][570/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.067)	Loss 2.1346 (2.2470)	Entropy 0.81038 (0.81055)	Top-1 acc 74.609 (70.441)	Top-5 acc 89.844 (87.954)	lr 0.00296
Train [94][580/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.066)	Loss 2.3280 (2.2469)	Entropy 0.81037 (0.81054)	Top-1 acc 69.141 (70.451)	Top-5 acc 86.328 (87.942)	lr 0.00296
Train [94][590/3239]	Time 0.229 (0.620)	Data Time 0.001 (0.065)	Loss 2.1823 (2.2466)	Entropy 0.81031 (0.81054)	Top-1 acc 71.484 (70.446)	Top-5 acc 87.891 (87.948)	lr 0.00296
Train [94][600/3239]	Time 0.223 (0.617)	Data Time 0.001 (0.064)	Loss 2.2198 (2.2475)	Entropy 0.81032 (0.81054)	Top-1 acc 68.750 (70.433)	Top-5 acc 87.500 (87.930)	lr 0.00296
Train [94][610/3239]	Time 0.397 (0.697)	Data Time 0.005 (0.063)	Loss 2.0494 (2.2465)	Entropy 0.81030 (0.81053)	Top-1 acc 73.828 (70.442)	Top-5 acc 90.625 (87.942)	lr 0.00296
Train [94][620/3239]	Time 0.238 (0.694)	Data Time 0.002 (0.062)	Loss 2.2635 (2.2462)	Entropy 0.81019 (0.81053)	Top-1 acc 70.312 (70.450)	Top-5 acc 87.500 (87.949)	lr 0.00295
Train [94][630/3239]	Time 0.252 (0.691)	Data Time 0.002 (0.061)	Loss 2.2165 (2.2464)	Entropy 0.81014 (0.81052)	Top-1 acc 69.531 (70.448)	Top-5 acc 88.281 (87.947)	lr 0.00295
Train [94][640/3239]	Time 0.228 (0.688)	Data Time 0.001 (0.060)	Loss 2.4075 (2.2470)	Entropy 0.81011 (0.81052)	Top-1 acc 68.359 (70.432)	Top-5 acc 85.547 (87.932)	lr 0.00295
Train [94][650/3239]	Time 0.237 (0.685)	Data Time 0.002 (0.059)	Loss 2.2408 (2.2466)	Entropy 0.81010 (0.81051)	Top-1 acc 72.266 (70.449)	Top-5 acc 86.719 (87.930)	lr 0.00295
Train [94][660/3239]	Time 0.218 (0.682)	Data Time 0.001 (0.058)	Loss 2.2786 (2.2460)	Entropy 0.81010 (0.81050)	Top-1 acc 67.188 (70.466)	Top-5 acc 88.672 (87.942)	lr 0.00295
Train [94][670/3239]	Time 2.576 (0.679)	Data Time 0.001 (0.057)	Loss 2.2982 (2.2458)	Entropy 0.81010 (0.81050)	Top-1 acc 67.969 (70.456)	Top-5 acc 88.281 (87.948)	lr 0.00295
Train [94][680/3239]	Time 0.263 (0.672)	Data Time 0.001 (0.057)	Loss 2.2424 (2.2460)	Entropy 0.81013 (0.81049)	Top-1 acc 71.484 (70.439)	Top-5 acc 87.891 (87.947)	lr 0.00295
Train [94][690/3239]	Time 0.335 (0.670)	Data Time 0.001 (0.056)	Loss 2.1846 (2.2462)	Entropy 0.81004 (0.81049)	Top-1 acc 71.875 (70.419)	Top-5 acc 88.672 (87.940)	lr 0.00295
Train [94][700/3239]	Time 0.230 (0.667)	Data Time 0.001 (0.055)	Loss 2.2161 (2.2460)	Entropy 0.81003 (0.81048)	Top-1 acc 68.750 (70.425)	Top-5 acc 89.844 (87.949)	lr 0.00295
Train [94][710/3239]	Time 0.241 (0.664)	Data Time 0.001 (0.054)	Loss 2.3172 (2.2460)	Entropy 0.80998 (0.81047)	Top-1 acc 71.094 (70.438)	Top-5 acc 86.719 (87.955)	lr 0.00295
Train [94][720/3239]	Time 0.267 (0.662)	Data Time 0.001 (0.054)	Loss 2.3619 (2.2465)	Entropy 0.80999 (0.81047)	Top-1 acc 64.062 (70.409)	Top-5 acc 86.719 (87.946)	lr 0.00295
Train [94][730/3239]	Time 0.242 (0.659)	Data Time 0.001 (0.053)	Loss 2.2172 (2.2467)	Entropy 0.80999 (0.81046)	Top-1 acc 72.656 (70.403)	Top-5 acc 88.281 (87.944)	lr 0.00295
Train [94][740/3239]	Time 0.225 (0.657)	Data Time 0.001 (0.052)	Loss 2.2061 (2.2467)	Entropy 0.80994 (0.81045)	Top-1 acc 73.828 (70.413)	Top-5 acc 91.016 (87.942)	lr 0.00295
Train [94][750/3239]	Time 0.242 (0.654)	Data Time 0.001 (0.051)	Loss 2.4458 (2.2468)	Entropy 0.80976 (0.81044)	Top-1 acc 63.672 (70.406)	Top-5 acc 84.375 (87.933)	lr 0.00295
Train [94][760/3239]	Time 0.232 (0.652)	Data Time 0.001 (0.051)	Loss 2.1067 (2.2473)	Entropy 0.80973 (0.81044)	Top-1 acc 73.828 (70.400)	Top-5 acc 92.969 (87.921)	lr 0.00295
Train [94][770/3239]	Time 0.232 (0.650)	Data Time 0.001 (0.050)	Loss 2.2858 (2.2468)	Entropy 0.80972 (0.81043)	Top-1 acc 70.312 (70.403)	Top-5 acc 88.281 (87.927)	lr 0.00294
Train [94][780/3239]	Time 2.531 (0.648)	Data Time 0.002 (0.050)	Loss 2.2632 (2.2470)	Entropy 0.80972 (0.81042)	Top-1 acc 71.094 (70.388)	Top-5 acc 87.500 (87.920)	lr 0.00294
Train [94][790/3239]	Time 0.228 (0.643)	Data Time 0.001 (0.049)	Loss 2.2493 (2.2470)	Entropy 0.80970 (0.81041)	Top-1 acc 67.969 (70.382)	Top-5 acc 88.672 (87.917)	lr 0.00294
Train [94][800/3239]	Time 0.264 (0.641)	Data Time 0.002 (0.048)	Loss 2.4314 (2.2471)	Entropy 0.80967 (0.81040)	Top-1 acc 67.578 (70.393)	Top-5 acc 85.156 (87.913)	lr 0.00294
Train [94][810/3239]	Time 0.242 (0.639)	Data Time 0.001 (0.048)	Loss 2.2794 (2.2474)	Entropy 0.80961 (0.81039)	Top-1 acc 69.922 (70.383)	Top-5 acc 85.547 (87.902)	lr 0.00294
Train [94][820/3239]	Time 0.327 (0.637)	Data Time 0.001 (0.047)	Loss 2.2269 (2.2476)	Entropy 0.80964 (0.81038)	Top-1 acc 71.875 (70.385)	Top-5 acc 87.891 (87.906)	lr 0.00294
Train [94][830/3239]	Time 0.242 (0.635)	Data Time 0.001 (0.047)	Loss 2.1294 (2.2467)	Entropy 0.80961 (0.81037)	Top-1 acc 75.000 (70.415)	Top-5 acc 91.797 (87.925)	lr 0.00294
Train [94][840/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.046)	Loss 2.1863 (2.2461)	Entropy 0.80958 (0.81036)	Top-1 acc 70.312 (70.430)	Top-5 acc 91.016 (87.938)	lr 0.00294
Train [94][850/3239]	Time 0.224 (0.631)	Data Time 0.001 (0.046)	Loss 2.4501 (2.2461)	Entropy 0.80958 (0.81035)	Top-1 acc 67.578 (70.429)	Top-5 acc 84.766 (87.937)	lr 0.00294
Train [94][860/3239]	Time 0.324 (0.629)	Data Time 0.001 (0.045)	Loss 2.4477 (2.2463)	Entropy 0.80956 (0.81034)	Top-1 acc 64.844 (70.425)	Top-5 acc 83.984 (87.933)	lr 0.00294
Train [94][870/3239]	Time 0.206 (0.628)	Data Time 0.001 (0.045)	Loss 2.2883 (2.2465)	Entropy 0.80955 (0.81033)	Top-1 acc 67.578 (70.419)	Top-5 acc 89.062 (87.935)	lr 0.00294
Train [94][880/3239]	Time 0.260 (0.626)	Data Time 0.001 (0.044)	Loss 2.1301 (2.2463)	Entropy 0.80957 (0.81033)	Top-1 acc 76.562 (70.427)	Top-5 acc 89.844 (87.941)	lr 0.00294
Train [94][890/3239]	Time 2.641 (0.624)	Data Time 0.001 (0.044)	Loss 2.1798 (2.2463)	Entropy 0.80957 (0.81032)	Top-1 acc 73.047 (70.432)	Top-5 acc 89.844 (87.939)	lr 0.00294
Train [94][900/3239]	Time 0.235 (0.620)	Data Time 0.001 (0.043)	Loss 2.1884 (2.2459)	Entropy 0.80951 (0.81031)	Top-1 acc 71.484 (70.446)	Top-5 acc 89.062 (87.951)	lr 0.00294
Train [94][910/3239]	Time 0.232 (0.619)	Data Time 0.001 (0.043)	Loss 2.0608 (2.2456)	Entropy 0.80943 (0.81030)	Top-1 acc 75.781 (70.456)	Top-5 acc 91.797 (87.959)	lr 0.00294
Train [94][920/3239]	Time 0.225 (0.617)	Data Time 0.001 (0.042)	Loss 2.1007 (2.2448)	Entropy 0.80929 (0.81029)	Top-1 acc 72.656 (70.476)	Top-5 acc 90.625 (87.968)	lr 0.00293
Train [94][930/3239]	Time 0.233 (0.616)	Data Time 0.001 (0.042)	Loss 2.2027 (2.2450)	Entropy 0.80924 (0.81028)	Top-1 acc 69.531 (70.464)	Top-5 acc 91.016 (87.962)	lr 0.00293
Train [94][940/3239]	Time 0.175 (0.614)	Data Time 0.001 (0.041)	Loss 2.2357 (2.2446)	Entropy 0.80923 (0.81027)	Top-1 acc 74.609 (70.483)	Top-5 acc 86.719 (87.967)	lr 0.00293
Train [94][950/3239]	Time 0.244 (0.613)	Data Time 0.001 (0.041)	Loss 2.3003 (2.2449)	Entropy 0.80919 (0.81026)	Top-1 acc 71.094 (70.476)	Top-5 acc 85.938 (87.958)	lr 0.00293
Train [94][960/3239]	Time 0.238 (0.611)	Data Time 0.001 (0.041)	Loss 2.3302 (2.2450)	Entropy 0.80912 (0.81024)	Top-1 acc 69.141 (70.477)	Top-5 acc 86.328 (87.948)	lr 0.00293
Train [94][970/3239]	Time 0.227 (0.666)	Data Time 0.002 (0.040)	Loss 2.1864 (2.2445)	Entropy 0.80909 (0.81023)	Top-1 acc 71.094 (70.492)	Top-5 acc 89.453 (87.953)	lr 0.00293
Train [94][980/3239]	Time 0.225 (0.664)	Data Time 0.002 (0.040)	Loss 2.1365 (2.2445)	Entropy 0.80900 (0.81022)	Top-1 acc 73.438 (70.480)	Top-5 acc 90.234 (87.952)	lr 0.00293
Train [94][990/3239]	Time 0.353 (0.662)	Data Time 0.001 (0.039)	Loss 2.3394 (2.2445)	Entropy 0.80896 (0.81021)	Top-1 acc 69.531 (70.486)	Top-5 acc 86.719 (87.951)	lr 0.00293
Train [94][1000/3239]	Time 2.503 (0.660)	Data Time 0.001 (0.039)	Loss 2.2613 (2.2445)	Entropy 0.80896 (0.81020)	Top-1 acc 71.875 (70.494)	Top-5 acc 85.547 (87.951)	lr 0.00293
Train [94][1010/3239]	Time 0.224 (0.656)	Data Time 0.001 (0.039)	Loss 2.3071 (2.2443)	Entropy 0.80897 (0.81018)	Top-1 acc 64.453 (70.489)	Top-5 acc 87.109 (87.957)	lr 0.00293
Train [94][1020/3239]	Time 0.241 (0.655)	Data Time 0.001 (0.038)	Loss 2.1959 (2.2444)	Entropy 0.80897 (0.81017)	Top-1 acc 72.266 (70.486)	Top-5 acc 87.500 (87.951)	lr 0.00293
Train [94][1030/3239]	Time 0.240 (0.653)	Data Time 0.001 (0.038)	Loss 2.1086 (2.2441)	Entropy 0.80881 (0.81016)	Top-1 acc 73.047 (70.504)	Top-5 acc 89.062 (87.948)	lr 0.00293
Train [94][1040/3239]	Time 0.237 (0.651)	Data Time 0.001 (0.038)	Loss 2.4665 (2.2440)	Entropy 0.80873 (0.81015)	Top-1 acc 64.453 (70.500)	Top-5 acc 84.375 (87.944)	lr 0.00293
Train [94][1050/3239]	Time 0.233 (0.650)	Data Time 0.001 (0.037)	Loss 2.2237 (2.2436)	Entropy 0.80875 (0.81013)	Top-1 acc 72.266 (70.508)	Top-5 acc 87.500 (87.946)	lr 0.00293
Train [94][1060/3239]	Time 0.221 (0.648)	Data Time 0.001 (0.037)	Loss 2.2105 (2.2441)	Entropy 0.80875 (0.81012)	Top-1 acc 69.141 (70.492)	Top-5 acc 90.234 (87.936)	lr 0.00293
Train [94][1070/3239]	Time 0.229 (0.646)	Data Time 0.001 (0.037)	Loss 2.1239 (2.2443)	Entropy 0.80871 (0.81011)	Top-1 acc 74.609 (70.486)	Top-5 acc 89.844 (87.931)	lr 0.00293
Train [94][1080/3239]	Time 0.225 (0.645)	Data Time 0.001 (0.036)	Loss 2.3456 (2.2444)	Entropy 0.80866 (0.81009)	Top-1 acc 66.406 (70.481)	Top-5 acc 86.328 (87.934)	lr 0.00292
Train [94][1090/3239]	Time 0.226 (0.643)	Data Time 0.001 (0.036)	Loss 2.2649 (2.2444)	Entropy 0.80859 (0.81008)	Top-1 acc 69.531 (70.475)	Top-5 acc 89.062 (87.938)	lr 0.00292
Train [94][1100/3239]	Time 0.239 (0.642)	Data Time 0.001 (0.036)	Loss 2.2016 (2.2445)	Entropy 0.80862 (0.81007)	Top-1 acc 69.531 (70.476)	Top-5 acc 87.891 (87.935)	lr 0.00292
Train [94][1110/3239]	Time 2.626 (0.641)	Data Time 0.001 (0.035)	Loss 2.2387 (2.2445)	Entropy 0.80862 (0.81005)	Top-1 acc 69.531 (70.470)	Top-5 acc 87.500 (87.939)	lr 0.00292
Train [94][1120/3239]	Time 0.402 (0.637)	Data Time 0.002 (0.035)	Loss 2.2008 (2.2451)	Entropy 0.80862 (0.81004)	Top-1 acc 73.438 (70.448)	Top-5 acc 87.891 (87.931)	lr 0.00292
Train [94][1130/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.035)	Loss 2.2644 (2.2454)	Entropy 0.80864 (0.81003)	Top-1 acc 67.969 (70.437)	Top-5 acc 87.109 (87.925)	lr 0.00292
Train [94][1140/3239]	Time 0.218 (0.634)	Data Time 0.001 (0.034)	Loss 2.1860 (2.2455)	Entropy 0.80859 (0.81002)	Top-1 acc 71.484 (70.438)	Top-5 acc 89.453 (87.926)	lr 0.00292
Train [94][1150/3239]	Time 0.232 (0.633)	Data Time 0.001 (0.034)	Loss 2.2337 (2.2455)	Entropy 0.80850 (0.81000)	Top-1 acc 69.531 (70.442)	Top-5 acc 87.891 (87.923)	lr 0.00292
Train [94][1160/3239]	Time 0.334 (0.632)	Data Time 0.001 (0.034)	Loss 2.1845 (2.2454)	Entropy 0.80845 (0.80999)	Top-1 acc 70.312 (70.438)	Top-5 acc 89.062 (87.921)	lr 0.00292
Train [94][1170/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.034)	Loss 2.2576 (2.2452)	Entropy 0.80839 (0.80998)	Top-1 acc 68.750 (70.439)	Top-5 acc 90.234 (87.924)	lr 0.00292
Train [94][1180/3239]	Time 0.247 (0.629)	Data Time 0.001 (0.033)	Loss 2.2718 (2.2451)	Entropy 0.80838 (0.80996)	Top-1 acc 69.531 (70.441)	Top-5 acc 85.938 (87.924)	lr 0.00292
Train [94][1190/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.033)	Loss 2.1492 (2.2451)	Entropy 0.80833 (0.80995)	Top-1 acc 71.094 (70.444)	Top-5 acc 91.797 (87.921)	lr 0.00292
Train [94][1200/3239]	Time 0.293 (0.627)	Data Time 0.001 (0.033)	Loss 2.4513 (2.2455)	Entropy 0.80831 (0.80994)	Top-1 acc 62.891 (70.427)	Top-5 acc 85.156 (87.913)	lr 0.00292
Train [94][1210/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.033)	Loss 2.1485 (2.2454)	Entropy 0.80828 (0.80992)	Top-1 acc 69.922 (70.424)	Top-5 acc 90.234 (87.915)	lr 0.00292
Train [94][1220/3239]	Time 2.512 (0.625)	Data Time 0.002 (0.032)	Loss 2.3140 (2.2454)	Entropy 0.80828 (0.80991)	Top-1 acc 69.922 (70.426)	Top-5 acc 85.156 (87.916)	lr 0.00292
Train [94][1230/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.032)	Loss 2.3953 (2.2456)	Entropy 0.80825 (0.80989)	Top-1 acc 66.406 (70.419)	Top-5 acc 85.156 (87.909)	lr 0.00291
Train [94][1240/3239]	Time 0.238 (0.620)	Data Time 0.001 (0.032)	Loss 2.3371 (2.2455)	Entropy 0.80826 (0.80988)	Top-1 acc 66.016 (70.416)	Top-5 acc 85.938 (87.910)	lr 0.00291
Train [94][1250/3239]	Time 0.236 (0.619)	Data Time 0.001 (0.032)	Loss 2.1786 (2.2457)	Entropy 0.80821 (0.80987)	Top-1 acc 68.750 (70.408)	Top-5 acc 89.453 (87.911)	lr 0.00291
Train [94][1260/3239]	Time 0.226 (0.618)	Data Time 0.001 (0.031)	Loss 2.3242 (2.2456)	Entropy 0.80814 (0.80985)	Top-1 acc 68.359 (70.408)	Top-5 acc 87.109 (87.910)	lr 0.00291
Train [94][1270/3239]	Time 0.223 (0.617)	Data Time 0.001 (0.031)	Loss 2.2119 (2.2456)	Entropy 0.80810 (0.80984)	Top-1 acc 67.969 (70.410)	Top-5 acc 90.234 (87.910)	lr 0.00291
Train [94][1280/3239]	Time 0.231 (0.616)	Data Time 0.001 (0.031)	Loss 2.3013 (2.2457)	Entropy 0.80802 (0.80983)	Top-1 acc 69.141 (70.412)	Top-5 acc 87.500 (87.908)	lr 0.00291
Train [94][1290/3239]	Time 0.225 (0.615)	Data Time 0.001 (0.031)	Loss 2.1636 (2.2461)	Entropy 0.80811 (0.80981)	Top-1 acc 71.875 (70.403)	Top-5 acc 89.453 (87.904)	lr 0.00291
Train [94][1300/3239]	Time 0.230 (0.614)	Data Time 0.001 (0.030)	Loss 2.2037 (2.2465)	Entropy 0.80803 (0.80980)	Top-1 acc 67.188 (70.388)	Top-5 acc 89.453 (87.896)	lr 0.00291
Train [94][1310/3239]	Time 0.232 (0.613)	Data Time 0.001 (0.030)	Loss 2.1560 (2.2460)	Entropy 0.80802 (0.80979)	Top-1 acc 75.000 (70.400)	Top-5 acc 89.453 (87.907)	lr 0.00291
Train [94][1320/3239]	Time 0.222 (0.612)	Data Time 0.001 (0.030)	Loss 2.1768 (2.2459)	Entropy 0.80803 (0.80977)	Top-1 acc 70.312 (70.404)	Top-5 acc 88.281 (87.907)	lr 0.00291
Train [94][1330/3239]	Time 51.509 (0.647)	Data Time 0.001 (0.030)	Loss 2.0734 (2.2458)	Entropy 0.80803 (0.80976)	Top-1 acc 73.438 (70.399)	Top-5 acc 91.797 (87.913)	lr 0.00291
Train [94][1340/3239]	Time 0.307 (0.645)	Data Time 0.002 (0.030)	Loss 2.3232 (2.2458)	Entropy 0.80806 (0.80975)	Top-1 acc 69.141 (70.400)	Top-5 acc 85.938 (87.914)	lr 0.00291
Train [94][1350/3239]	Time 0.254 (0.644)	Data Time 0.002 (0.029)	Loss 2.2535 (2.2459)	Entropy 0.80800 (0.80973)	Top-1 acc 72.656 (70.398)	Top-5 acc 89.062 (87.910)	lr 0.00291
Train [94][1360/3239]	Time 0.232 (0.643)	Data Time 0.001 (0.029)	Loss 2.1719 (2.2457)	Entropy 0.80789 (0.80972)	Top-1 acc 70.312 (70.408)	Top-5 acc 90.234 (87.910)	lr 0.00291
Train [94][1370/3239]	Time 0.321 (0.641)	Data Time 0.001 (0.029)	Loss 2.1809 (2.2452)	Entropy 0.80776 (0.80971)	Top-1 acc 74.219 (70.420)	Top-5 acc 86.328 (87.917)	lr 0.00291
Train [94][1380/3239]	Time 0.223 (0.640)	Data Time 0.001 (0.029)	Loss 2.4317 (2.2451)	Entropy 0.80775 (0.80969)	Top-1 acc 69.141 (70.424)	Top-5 acc 83.594 (87.916)	lr 0.00290
Train [94][1390/3239]	Time 0.241 (0.639)	Data Time 0.001 (0.029)	Loss 2.2121 (2.2451)	Entropy 0.80777 (0.80968)	Top-1 acc 67.969 (70.420)	Top-5 acc 87.891 (87.912)	lr 0.00290
Train [94][1400/3239]	Time 0.231 (0.638)	Data Time 0.001 (0.028)	Loss 2.1931 (2.2450)	Entropy 0.80770 (0.80967)	Top-1 acc 66.406 (70.418)	Top-5 acc 91.797 (87.915)	lr 0.00290
Train [94][1410/3239]	Time 0.229 (0.637)	Data Time 0.001 (0.028)	Loss 2.1588 (2.2450)	Entropy 0.80766 (0.80965)	Top-1 acc 71.875 (70.418)	Top-5 acc 89.062 (87.912)	lr 0.00290
Train [94][1420/3239]	Time 0.233 (0.635)	Data Time 0.001 (0.028)	Loss 2.1908 (2.2446)	Entropy 0.80762 (0.80964)	Top-1 acc 71.484 (70.428)	Top-5 acc 88.672 (87.918)	lr 0.00290
Train [94][1430/3239]	Time 0.222 (0.634)	Data Time 0.001 (0.028)	Loss 2.3221 (2.2449)	Entropy 0.80759 (0.80962)	Top-1 acc 69.922 (70.421)	Top-5 acc 87.891 (87.915)	lr 0.00290
Train [94][1440/3239]	Time 2.603 (0.633)	Data Time 0.001 (0.028)	Loss 2.4388 (2.2452)	Entropy 0.80759 (0.80961)	Top-1 acc 66.797 (70.411)	Top-5 acc 83.984 (87.908)	lr 0.00290
Train [94][1450/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.027)	Loss 2.2774 (2.2450)	Entropy 0.80759 (0.80960)	Top-1 acc 69.531 (70.428)	Top-5 acc 85.938 (87.909)	lr 0.00290
Train [94][1460/3239]	Time 0.243 (0.630)	Data Time 0.001 (0.027)	Loss 2.3245 (2.2450)	Entropy 0.80754 (0.80958)	Top-1 acc 69.922 (70.432)	Top-5 acc 85.938 (87.912)	lr 0.00290
Train [94][1470/3239]	Time 0.241 (0.629)	Data Time 0.001 (0.027)	Loss 2.1885 (2.2449)	Entropy 0.80753 (0.80957)	Top-1 acc 70.312 (70.434)	Top-5 acc 86.719 (87.913)	lr 0.00290
Train [94][1480/3239]	Time 0.237 (0.628)	Data Time 0.001 (0.027)	Loss 2.2900 (2.2449)	Entropy 0.80752 (0.80955)	Top-1 acc 68.750 (70.429)	Top-5 acc 85.547 (87.913)	lr 0.00290
Train [94][1490/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.027)	Loss 2.3107 (2.2454)	Entropy 0.80748 (0.80954)	Top-1 acc 67.188 (70.416)	Top-5 acc 86.328 (87.906)	lr 0.00290
Train [94][1500/3239]	Time 0.320 (0.626)	Data Time 0.001 (0.027)	Loss 2.2017 (2.2452)	Entropy 0.80746 (0.80953)	Top-1 acc 71.094 (70.419)	Top-5 acc 90.234 (87.912)	lr 0.00290
Train [94][1510/3239]	Time 0.231 (0.625)	Data Time 0.001 (0.026)	Loss 2.5148 (2.2454)	Entropy 0.80749 (0.80951)	Top-1 acc 64.453 (70.419)	Top-5 acc 82.422 (87.908)	lr 0.00290
Train [94][1520/3239]	Time 0.243 (0.623)	Data Time 0.001 (0.026)	Loss 2.3932 (2.2456)	Entropy 0.80752 (0.80950)	Top-1 acc 67.969 (70.415)	Top-5 acc 86.328 (87.902)	lr 0.00290
Train [94][1530/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.026)	Loss 2.1823 (2.2455)	Entropy 0.80750 (0.80949)	Top-1 acc 74.609 (70.420)	Top-5 acc 88.672 (87.906)	lr 0.00290
Train [94][1540/3239]	Time 0.257 (0.622)	Data Time 0.001 (0.026)	Loss 2.2220 (2.2456)	Entropy 0.80743 (0.80947)	Top-1 acc 71.094 (70.416)	Top-5 acc 86.328 (87.904)	lr 0.00289
Train [94][1550/3239]	Time 2.497 (0.621)	Data Time 0.002 (0.026)	Loss 2.1728 (2.2455)	Entropy 0.80743 (0.80946)	Top-1 acc 73.828 (70.420)	Top-5 acc 89.844 (87.905)	lr 0.00289
Train [94][1560/3239]	Time 0.238 (0.618)	Data Time 0.001 (0.026)	Loss 2.3784 (2.2456)	Entropy 0.80734 (0.80945)	Top-1 acc 66.406 (70.418)	Top-5 acc 83.203 (87.904)	lr 0.00289
Train [94][1570/3239]	Time 0.264 (0.618)	Data Time 0.001 (0.025)	Loss 2.2816 (2.2459)	Entropy 0.80736 (0.80943)	Top-1 acc 70.312 (70.412)	Top-5 acc 86.719 (87.900)	lr 0.00289
Train [94][1580/3239]	Time 0.259 (0.617)	Data Time 0.001 (0.025)	Loss 2.2137 (2.2458)	Entropy 0.80722 (0.80942)	Top-1 acc 72.656 (70.415)	Top-5 acc 87.109 (87.898)	lr 0.00289
Train [94][1590/3239]	Time 0.271 (0.616)	Data Time 0.001 (0.025)	Loss 2.0393 (2.2458)	Entropy 0.80720 (0.80941)	Top-1 acc 71.484 (70.413)	Top-5 acc 92.969 (87.894)	lr 0.00289
Train [94][1600/3239]	Time 0.241 (0.615)	Data Time 0.001 (0.025)	Loss 2.3472 (2.2458)	Entropy 0.80711 (0.80939)	Top-1 acc 63.672 (70.415)	Top-5 acc 87.500 (87.889)	lr 0.00289
Train [94][1610/3239]	Time 0.238 (0.614)	Data Time 0.001 (0.025)	Loss 2.3455 (2.2458)	Entropy 0.80713 (0.80938)	Top-1 acc 67.188 (70.410)	Top-5 acc 85.938 (87.890)	lr 0.00289
Train [94][1620/3239]	Time 0.227 (0.614)	Data Time 0.001 (0.025)	Loss 2.1833 (2.2457)	Entropy 0.80716 (0.80936)	Top-1 acc 72.266 (70.414)	Top-5 acc 89.062 (87.891)	lr 0.00289
Train [94][1630/3239]	Time 0.224 (0.613)	Data Time 0.001 (0.025)	Loss 2.4072 (2.2458)	Entropy 0.80712 (0.80935)	Top-1 acc 67.578 (70.408)	Top-5 acc 85.156 (87.893)	lr 0.00289
Train [94][1640/3239]	Time 0.205 (0.612)	Data Time 0.001 (0.024)	Loss 2.1911 (2.2460)	Entropy 0.80712 (0.80934)	Top-1 acc 67.969 (70.407)	Top-5 acc 87.891 (87.892)	lr 0.00289
Train [94][1650/3239]	Time 0.208 (0.611)	Data Time 0.001 (0.024)	Loss 2.3342 (2.2458)	Entropy 0.80709 (0.80932)	Top-1 acc 71.094 (70.408)	Top-5 acc 83.594 (87.892)	lr 0.00289
Train [94][1660/3239]	Time 2.610 (0.610)	Data Time 0.001 (0.024)	Loss 2.2310 (2.2455)	Entropy 0.80709 (0.80931)	Top-1 acc 72.266 (70.411)	Top-5 acc 89.062 (87.897)	lr 0.00289
Train [94][1670/3239]	Time 0.297 (0.608)	Data Time 0.002 (0.024)	Loss 2.2376 (2.2456)	Entropy 0.80716 (0.80930)	Top-1 acc 71.094 (70.412)	Top-5 acc 87.891 (87.895)	lr 0.00289
Train [94][1680/3239]	Time 0.237 (0.607)	Data Time 0.001 (0.024)	Loss 2.1501 (2.2456)	Entropy 0.80753 (0.80929)	Top-1 acc 71.094 (70.411)	Top-5 acc 91.406 (87.895)	lr 0.00289
Train [94][1690/3239]	Time 0.249 (0.607)	Data Time 0.001 (0.024)	Loss 2.2385 (2.2457)	Entropy 0.80750 (0.80928)	Top-1 acc 71.875 (70.411)	Top-5 acc 89.062 (87.894)	lr 0.00288
Train [94][1700/3239]	Time 0.299 (0.636)	Data Time 0.003 (0.024)	Loss 2.2048 (2.2456)	Entropy 0.80746 (0.80926)	Top-1 acc 72.656 (70.417)	Top-5 acc 87.891 (87.897)	lr 0.00288
Train [94][1710/3239]	Time 0.251 (0.635)	Data Time 0.002 (0.023)	Loss 2.2545 (2.2458)	Entropy 0.80737 (0.80925)	Top-1 acc 71.875 (70.412)	Top-5 acc 87.109 (87.893)	lr 0.00288
Train [94][1720/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.023)	Loss 2.2508 (2.2459)	Entropy 0.80739 (0.80924)	Top-1 acc 70.703 (70.409)	Top-5 acc 87.891 (87.892)	lr 0.00288
Train [94][1730/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.023)	Loss 2.1480 (2.2460)	Entropy 0.80736 (0.80923)	Top-1 acc 75.000 (70.408)	Top-5 acc 89.062 (87.893)	lr 0.00288
Train [94][1740/3239]	Time 0.232 (0.633)	Data Time 0.001 (0.023)	Loss 2.5283 (2.2459)	Entropy 0.80732 (0.80922)	Top-1 acc 64.062 (70.408)	Top-5 acc 82.031 (87.892)	lr 0.00288
Train [94][1750/3239]	Time 0.236 (0.632)	Data Time 0.001 (0.023)	Loss 2.3121 (2.2462)	Entropy 0.80732 (0.80921)	Top-1 acc 69.141 (70.400)	Top-5 acc 88.281 (87.888)	lr 0.00288
Train [94][1760/3239]	Time 0.239 (0.631)	Data Time 0.001 (0.023)	Loss 2.2530 (2.2463)	Entropy 0.80729 (0.80920)	Top-1 acc 71.484 (70.398)	Top-5 acc 88.672 (87.887)	lr 0.00288
Train [94][1770/3239]	Time 2.541 (0.630)	Data Time 0.001 (0.023)	Loss 2.3276 (2.2463)	Entropy 0.80729 (0.80919)	Top-1 acc 69.141 (70.396)	Top-5 acc 85.938 (87.888)	lr 0.00288
Train [94][1780/3239]	Time 0.259 (0.628)	Data Time 0.001 (0.023)	Loss 2.3316 (2.2464)	Entropy 0.80732 (0.80918)	Top-1 acc 68.750 (70.394)	Top-5 acc 83.984 (87.886)	lr 0.00288
Train [94][1790/3239]	Time 0.258 (0.627)	Data Time 0.002 (0.023)	Loss 2.3042 (2.2462)	Entropy 0.80732 (0.80917)	Top-1 acc 67.969 (70.398)	Top-5 acc 87.891 (87.890)	lr 0.00288
Train [94][1800/3239]	Time 0.328 (0.626)	Data Time 0.001 (0.022)	Loss 2.1368 (2.2461)	Entropy 0.80727 (0.80916)	Top-1 acc 75.391 (70.403)	Top-5 acc 89.453 (87.892)	lr 0.00288
Train [94][1810/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.022)	Loss 2.1259 (2.2459)	Entropy 0.80714 (0.80915)	Top-1 acc 73.438 (70.409)	Top-5 acc 91.406 (87.895)	lr 0.00288
Train [94][1820/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.022)	Loss 2.2862 (2.2465)	Entropy 0.80717 (0.80914)	Top-1 acc 69.531 (70.397)	Top-5 acc 88.281 (87.880)	lr 0.00288
Train [94][1830/3239]	Time 0.248 (0.624)	Data Time 0.002 (0.022)	Loss 2.1801 (2.2466)	Entropy 0.80714 (0.80912)	Top-1 acc 71.484 (70.394)	Top-5 acc 87.109 (87.879)	lr 0.00288
Train [94][1840/3239]	Time 0.333 (0.623)	Data Time 0.001 (0.022)	Loss 2.3272 (2.2465)	Entropy 0.80712 (0.80911)	Top-1 acc 66.797 (70.395)	Top-5 acc 85.156 (87.882)	lr 0.00288
Train [94][1850/3239]	Time 0.217 (0.623)	Data Time 0.001 (0.022)	Loss 2.1930 (2.2467)	Entropy 0.80706 (0.80910)	Top-1 acc 71.875 (70.386)	Top-5 acc 88.672 (87.881)	lr 0.00287
Train [94][1860/3239]	Time 0.232 (0.622)	Data Time 0.001 (0.022)	Loss 2.2852 (2.2467)	Entropy 0.80708 (0.80909)	Top-1 acc 66.406 (70.387)	Top-5 acc 88.281 (87.882)	lr 0.00287
Train [94][1870/3239]	Time 0.235 (0.621)	Data Time 0.001 (0.022)	Loss 2.1833 (2.2465)	Entropy 0.80701 (0.80908)	Top-1 acc 74.609 (70.389)	Top-5 acc 87.891 (87.885)	lr 0.00287
Train [94][1880/3239]	Time 2.608 (0.620)	Data Time 0.001 (0.022)	Loss 2.2562 (2.2467)	Entropy 0.80701 (0.80907)	Top-1 acc 70.312 (70.385)	Top-5 acc 87.891 (87.883)	lr 0.00287
Train [94][1890/3239]	Time 0.261 (0.618)	Data Time 0.001 (0.021)	Loss 2.1230 (2.2467)	Entropy 0.80707 (0.80906)	Top-1 acc 73.047 (70.384)	Top-5 acc 89.062 (87.881)	lr 0.00287
Train [94][1900/3239]	Time 0.242 (0.618)	Data Time 0.001 (0.021)	Loss 2.3187 (2.2470)	Entropy 0.80708 (0.80905)	Top-1 acc 69.141 (70.369)	Top-5 acc 85.938 (87.873)	lr 0.00287
Train [94][1910/3239]	Time 0.239 (0.617)	Data Time 0.001 (0.021)	Loss 2.2295 (2.2470)	Entropy 0.80709 (0.80904)	Top-1 acc 68.359 (70.364)	Top-5 acc 86.719 (87.877)	lr 0.00287
Train [94][1920/3239]	Time 0.243 (0.616)	Data Time 0.001 (0.021)	Loss 2.4519 (2.2471)	Entropy 0.80706 (0.80903)	Top-1 acc 63.672 (70.363)	Top-5 acc 83.984 (87.873)	lr 0.00287
Train [94][1930/3239]	Time 0.230 (0.616)	Data Time 0.001 (0.021)	Loss 2.4040 (2.2473)	Entropy 0.80693 (0.80902)	Top-1 acc 70.312 (70.360)	Top-5 acc 83.594 (87.867)	lr 0.00287
Train [94][1940/3239]	Time 0.241 (0.615)	Data Time 0.002 (0.021)	Loss 2.3322 (2.2474)	Entropy 0.80675 (0.80901)	Top-1 acc 66.797 (70.356)	Top-5 acc 85.547 (87.863)	lr 0.00287
Train [94][1950/3239]	Time 0.232 (0.614)	Data Time 0.001 (0.021)	Loss 2.3609 (2.2474)	Entropy 0.80667 (0.80900)	Top-1 acc 66.406 (70.354)	Top-5 acc 85.547 (87.862)	lr 0.00287
Train [94][1960/3239]	Time 0.225 (0.613)	Data Time 0.001 (0.021)	Loss 2.1016 (2.2473)	Entropy 0.80669 (0.80898)	Top-1 acc 71.094 (70.353)	Top-5 acc 90.625 (87.863)	lr 0.00287
Train [94][1970/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.021)	Loss 2.1783 (2.2473)	Entropy 0.80663 (0.80897)	Top-1 acc 72.656 (70.356)	Top-5 acc 89.453 (87.865)	lr 0.00287
Train [94][1980/3239]	Time 0.216 (0.612)	Data Time 0.001 (0.020)	Loss 2.2982 (2.2474)	Entropy 0.80665 (0.80896)	Top-1 acc 69.922 (70.346)	Top-5 acc 88.672 (87.862)	lr 0.00287
Train [94][1990/3239]	Time 2.575 (0.611)	Data Time 0.001 (0.020)	Loss 2.1516 (2.2471)	Entropy 0.80665 (0.80895)	Top-1 acc 72.656 (70.352)	Top-5 acc 91.016 (87.869)	lr 0.00287
Train [94][2000/3239]	Time 0.237 (0.610)	Data Time 0.001 (0.020)	Loss 2.3522 (2.2472)	Entropy 0.80662 (0.80894)	Top-1 acc 65.625 (70.350)	Top-5 acc 87.891 (87.870)	lr 0.00286
Train [94][2010/3239]	Time 0.354 (0.609)	Data Time 0.002 (0.020)	Loss 2.4700 (2.2473)	Entropy 0.80659 (0.80893)	Top-1 acc 64.844 (70.343)	Top-5 acc 83.594 (87.869)	lr 0.00286
Train [94][2020/3239]	Time 0.242 (0.608)	Data Time 0.001 (0.020)	Loss 2.2417 (2.2473)	Entropy 0.80648 (0.80891)	Top-1 acc 69.922 (70.341)	Top-5 acc 88.281 (87.871)	lr 0.00286
Train [94][2030/3239]	Time 0.237 (0.607)	Data Time 0.001 (0.020)	Loss 2.2711 (2.2471)	Entropy 0.80643 (0.80890)	Top-1 acc 67.578 (70.346)	Top-5 acc 89.062 (87.872)	lr 0.00286
Train [94][2040/3239]	Time 0.229 (0.607)	Data Time 0.001 (0.020)	Loss 2.0336 (2.2471)	Entropy 0.80647 (0.80889)	Top-1 acc 76.172 (70.346)	Top-5 acc 92.969 (87.872)	lr 0.00286
Train [94][2050/3239]	Time 0.321 (0.606)	Data Time 0.001 (0.020)	Loss 1.9931 (2.2468)	Entropy 0.80647 (0.80888)	Top-1 acc 78.906 (70.356)	Top-5 acc 92.969 (87.877)	lr 0.00286
Train [94][2060/3239]	Time 0.383 (0.631)	Data Time 0.003 (0.020)	Loss 2.3169 (2.2468)	Entropy 0.80641 (0.80887)	Top-1 acc 66.016 (70.351)	Top-5 acc 87.891 (87.881)	lr 0.00286
Train [94][2070/3239]	Time 0.230 (0.630)	Data Time 0.002 (0.020)	Loss 2.5604 (2.2473)	Entropy 0.80634 (0.80885)	Top-1 acc 64.453 (70.341)	Top-5 acc 81.641 (87.871)	lr 0.00286
Train [94][2080/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.020)	Loss 2.3768 (2.2476)	Entropy 0.80632 (0.80884)	Top-1 acc 65.234 (70.337)	Top-5 acc 86.328 (87.864)	lr 0.00286
Train [94][2090/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.020)	Loss 2.1437 (2.2476)	Entropy 0.80621 (0.80883)	Top-1 acc 73.047 (70.344)	Top-5 acc 89.062 (87.865)	lr 0.00286
Train [94][2100/3239]	Time 2.582 (0.628)	Data Time 0.001 (0.019)	Loss 2.2226 (2.2475)	Entropy 0.80621 (0.80882)	Top-1 acc 71.094 (70.347)	Top-5 acc 86.328 (87.867)	lr 0.00286
Train [94][2110/3239]	Time 0.229 (0.626)	Data Time 0.002 (0.019)	Loss 2.3308 (2.2475)	Entropy 0.80618 (0.80880)	Top-1 acc 66.016 (70.345)	Top-5 acc 86.719 (87.866)	lr 0.00286
Train [94][2120/3239]	Time 0.234 (0.626)	Data Time 0.002 (0.019)	Loss 2.3206 (2.2475)	Entropy 0.80618 (0.80879)	Top-1 acc 69.922 (70.343)	Top-5 acc 85.938 (87.866)	lr 0.00286
Train [94][2130/3239]	Time 0.253 (0.625)	Data Time 0.001 (0.019)	Loss 2.1016 (2.2476)	Entropy 0.80618 (0.80878)	Top-1 acc 73.828 (70.344)	Top-5 acc 89.844 (87.863)	lr 0.00286
Train [94][2140/3239]	Time 0.236 (0.624)	Data Time 0.001 (0.019)	Loss 2.3365 (2.2477)	Entropy 0.80612 (0.80877)	Top-1 acc 70.312 (70.340)	Top-5 acc 85.156 (87.860)	lr 0.00286
Train [94][2150/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.019)	Loss 2.0937 (2.2479)	Entropy 0.80603 (0.80875)	Top-1 acc 75.781 (70.337)	Top-5 acc 90.625 (87.858)	lr 0.00286
Train [94][2160/3239]	Time 0.255 (0.623)	Data Time 0.001 (0.019)	Loss 2.2428 (2.2479)	Entropy 0.80596 (0.80874)	Top-1 acc 71.094 (70.341)	Top-5 acc 89.062 (87.857)	lr 0.00285
Train [94][2170/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.019)	Loss 2.2343 (2.2478)	Entropy 0.80596 (0.80873)	Top-1 acc 70.703 (70.342)	Top-5 acc 86.719 (87.858)	lr 0.00285
Train [94][2180/3239]	Time 0.323 (0.622)	Data Time 0.001 (0.019)	Loss 2.2168 (2.2481)	Entropy 0.80595 (0.80872)	Top-1 acc 72.266 (70.336)	Top-5 acc 89.062 (87.855)	lr 0.00285
Train [94][2190/3239]	Time 0.222 (0.621)	Data Time 0.001 (0.019)	Loss 2.2864 (2.2483)	Entropy 0.80580 (0.80870)	Top-1 acc 67.578 (70.330)	Top-5 acc 88.281 (87.853)	lr 0.00285
Train [94][2200/3239]	Time 0.233 (0.620)	Data Time 0.001 (0.019)	Loss 2.1763 (2.2484)	Entropy 0.80578 (0.80869)	Top-1 acc 71.484 (70.326)	Top-5 acc 88.672 (87.850)	lr 0.00285
Train [94][2210/3239]	Time 2.657 (0.620)	Data Time 0.001 (0.019)	Loss 2.2498 (2.2482)	Entropy 0.80578 (0.80868)	Top-1 acc 69.141 (70.329)	Top-5 acc 88.281 (87.853)	lr 0.00285
Train [94][2220/3239]	Time 0.279 (0.618)	Data Time 0.002 (0.018)	Loss 2.3781 (2.2485)	Entropy 0.80578 (0.80866)	Top-1 acc 66.797 (70.320)	Top-5 acc 85.547 (87.848)	lr 0.00285
Train [94][2230/3239]	Time 0.229 (0.618)	Data Time 0.001 (0.018)	Loss 2.2367 (2.2485)	Entropy 0.80582 (0.80865)	Top-1 acc 70.703 (70.321)	Top-5 acc 89.453 (87.850)	lr 0.00285
Train [94][2240/3239]	Time 0.236 (0.617)	Data Time 0.002 (0.018)	Loss 2.2118 (2.2485)	Entropy 0.80581 (0.80864)	Top-1 acc 70.312 (70.321)	Top-5 acc 89.453 (87.851)	lr 0.00285
Train [94][2250/3239]	Time 0.255 (0.616)	Data Time 0.001 (0.018)	Loss 2.2673 (2.2486)	Entropy 0.80579 (0.80863)	Top-1 acc 71.484 (70.317)	Top-5 acc 87.891 (87.848)	lr 0.00285
Train [94][2260/3239]	Time 0.238 (0.616)	Data Time 0.001 (0.018)	Loss 2.2772 (2.2486)	Entropy 0.80579 (0.80861)	Top-1 acc 69.141 (70.315)	Top-5 acc 89.062 (87.848)	lr 0.00285
Train [94][2270/3239]	Time 0.221 (0.615)	Data Time 0.001 (0.018)	Loss 2.3198 (2.2488)	Entropy 0.80580 (0.80860)	Top-1 acc 67.578 (70.303)	Top-5 acc 86.719 (87.845)	lr 0.00285
Train [94][2280/3239]	Time 0.227 (0.614)	Data Time 0.001 (0.018)	Loss 2.3420 (2.2488)	Entropy 0.80573 (0.80859)	Top-1 acc 68.359 (70.304)	Top-5 acc 85.547 (87.844)	lr 0.00285
Train [94][2290/3239]	Time 0.255 (0.614)	Data Time 0.001 (0.018)	Loss 2.3231 (2.2489)	Entropy 0.80569 (0.80858)	Top-1 acc 70.312 (70.303)	Top-5 acc 87.109 (87.845)	lr 0.00285
Train [94][2300/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.018)	Loss 1.9989 (2.2486)	Entropy 0.80571 (0.80856)	Top-1 acc 74.609 (70.315)	Top-5 acc 94.531 (87.852)	lr 0.00285
Train [94][2310/3239]	Time 0.317 (0.613)	Data Time 0.001 (0.018)	Loss 2.2456 (2.2486)	Entropy 0.80568 (0.80855)	Top-1 acc 70.703 (70.314)	Top-5 acc 87.109 (87.851)	lr 0.00285
Train [94][2320/3239]	Time 2.679 (0.612)	Data Time 0.001 (0.018)	Loss 2.2390 (2.2486)	Entropy 0.80568 (0.80854)	Top-1 acc 69.922 (70.317)	Top-5 acc 89.062 (87.852)	lr 0.00284
Train [94][2330/3239]	Time 0.268 (0.611)	Data Time 0.001 (0.018)	Loss 2.1458 (2.2485)	Entropy 0.80567 (0.80853)	Top-1 acc 69.922 (70.313)	Top-5 acc 89.844 (87.853)	lr 0.00284
Train [94][2340/3239]	Time 0.225 (0.610)	Data Time 0.001 (0.018)	Loss 2.2122 (2.2486)	Entropy 0.80557 (0.80851)	Top-1 acc 69.531 (70.305)	Top-5 acc 89.453 (87.855)	lr 0.00284
Train [94][2350/3239]	Time 0.240 (0.610)	Data Time 0.001 (0.018)	Loss 2.3894 (2.2486)	Entropy 0.80554 (0.80850)	Top-1 acc 67.188 (70.307)	Top-5 acc 86.328 (87.854)	lr 0.00284
Train [94][2360/3239]	Time 0.220 (0.609)	Data Time 0.001 (0.017)	Loss 2.1739 (2.2486)	Entropy 0.80548 (0.80849)	Top-1 acc 74.219 (70.312)	Top-5 acc 91.406 (87.856)	lr 0.00284
Train [94][2370/3239]	Time 0.237 (0.609)	Data Time 0.001 (0.017)	Loss 2.2726 (2.2485)	Entropy 0.80544 (0.80848)	Top-1 acc 68.750 (70.316)	Top-5 acc 87.500 (87.855)	lr 0.00284
Train [94][2380/3239]	Time 0.219 (0.608)	Data Time 0.001 (0.017)	Loss 2.3013 (2.2487)	Entropy 0.80539 (0.80846)	Top-1 acc 68.750 (70.314)	Top-5 acc 87.891 (87.855)	lr 0.00284
Train [94][2390/3239]	Time 0.239 (0.608)	Data Time 0.001 (0.017)	Loss 2.4359 (2.2487)	Entropy 0.80530 (0.80845)	Top-1 acc 65.625 (70.310)	Top-5 acc 83.594 (87.855)	lr 0.00284
Train [94][2400/3239]	Time 0.213 (0.607)	Data Time 0.001 (0.017)	Loss 2.2725 (2.2488)	Entropy 0.80526 (0.80844)	Top-1 acc 68.359 (70.308)	Top-5 acc 87.500 (87.852)	lr 0.00284
Train [94][2410/3239]	Time 0.229 (0.607)	Data Time 0.002 (0.017)	Loss 2.1836 (2.2491)	Entropy 0.80527 (0.80842)	Top-1 acc 70.703 (70.297)	Top-5 acc 91.406 (87.851)	lr 0.00284
Train [94][2420/3239]	Time 0.286 (0.626)	Data Time 0.004 (0.017)	Loss 2.3779 (2.2492)	Entropy 0.80515 (0.80841)	Top-1 acc 66.797 (70.298)	Top-5 acc 85.547 (87.850)	lr 0.00284
Train [94][2430/3239]	Time 3.028 (0.626)	Data Time 0.002 (0.017)	Loss 2.3425 (2.2493)	Entropy 0.80515 (0.80840)	Top-1 acc 68.359 (70.297)	Top-5 acc 87.500 (87.846)	lr 0.00284
Train [94][2440/3239]	Time 0.362 (0.625)	Data Time 0.002 (0.017)	Loss 2.2599 (2.2493)	Entropy 0.80508 (0.80838)	Top-1 acc 67.969 (70.296)	Top-5 acc 88.281 (87.846)	lr 0.00284
Train [94][2450/3239]	Time 0.244 (0.624)	Data Time 0.002 (0.017)	Loss 2.2786 (2.2497)	Entropy 0.80508 (0.80837)	Top-1 acc 68.750 (70.287)	Top-5 acc 88.281 (87.838)	lr 0.00284
Train [94][2460/3239]	Time 0.250 (0.624)	Data Time 0.001 (0.017)	Loss 2.2305 (2.2498)	Entropy 0.80498 (0.80836)	Top-1 acc 69.531 (70.283)	Top-5 acc 86.719 (87.836)	lr 0.00284
Train [94][2470/3239]	Time 0.229 (0.623)	Data Time 0.002 (0.017)	Loss 2.2764 (2.2498)	Entropy 0.80497 (0.80834)	Top-1 acc 69.141 (70.283)	Top-5 acc 87.500 (87.835)	lr 0.00283
Train [94][2480/3239]	Time 0.346 (0.623)	Data Time 0.001 (0.017)	Loss 2.3073 (2.2496)	Entropy 0.80490 (0.80833)	Top-1 acc 70.312 (70.289)	Top-5 acc 87.109 (87.837)	lr 0.00283
Train [94][2490/3239]	Time 0.269 (0.622)	Data Time 0.001 (0.017)	Loss 2.0729 (2.2497)	Entropy 0.80492 (0.80832)	Top-1 acc 77.734 (70.292)	Top-5 acc 88.281 (87.835)	lr 0.00283
Train [94][2500/3239]	Time 0.242 (0.622)	Data Time 0.001 (0.017)	Loss 2.2440 (2.2496)	Entropy 0.80484 (0.80830)	Top-1 acc 69.922 (70.297)	Top-5 acc 87.109 (87.837)	lr 0.00283
Train [94][2510/3239]	Time 0.238 (0.621)	Data Time 0.001 (0.017)	Loss 2.0978 (2.2495)	Entropy 0.80481 (0.80829)	Top-1 acc 73.047 (70.297)	Top-5 acc 90.234 (87.839)	lr 0.00283
Train [94][2520/3239]	Time 0.219 (0.621)	Data Time 0.001 (0.016)	Loss 2.1571 (2.2494)	Entropy 0.80471 (0.80827)	Top-1 acc 72.266 (70.301)	Top-5 acc 90.625 (87.841)	lr 0.00283
Train [94][2530/3239]	Time 0.231 (0.620)	Data Time 0.001 (0.016)	Loss 2.1547 (2.2494)	Entropy 0.80466 (0.80826)	Top-1 acc 73.828 (70.302)	Top-5 acc 89.453 (87.841)	lr 0.00283
Train [94][2540/3239]	Time 2.554 (0.620)	Data Time 0.001 (0.016)	Loss 2.0995 (2.2493)	Entropy 0.80466 (0.80825)	Top-1 acc 75.000 (70.311)	Top-5 acc 90.625 (87.840)	lr 0.00283
Train [94][2550/3239]	Time 0.232 (0.618)	Data Time 0.001 (0.016)	Loss 2.0967 (2.2491)	Entropy 0.80462 (0.80823)	Top-1 acc 74.219 (70.315)	Top-5 acc 91.016 (87.843)	lr 0.00283
Train [94][2560/3239]	Time 0.236 (0.618)	Data Time 0.001 (0.016)	Loss 2.2026 (2.2491)	Entropy 0.80460 (0.80822)	Top-1 acc 68.359 (70.310)	Top-5 acc 89.453 (87.845)	lr 0.00283
Train [94][2570/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.016)	Loss 2.2232 (2.2492)	Entropy 0.80454 (0.80820)	Top-1 acc 70.703 (70.308)	Top-5 acc 88.281 (87.841)	lr 0.00283
Train [94][2580/3239]	Time 0.231 (0.617)	Data Time 0.001 (0.016)	Loss 2.3448 (2.2490)	Entropy 0.80446 (0.80819)	Top-1 acc 68.359 (70.317)	Top-5 acc 86.719 (87.843)	lr 0.00283
Train [94][2590/3239]	Time 0.228 (0.616)	Data Time 0.001 (0.016)	Loss 2.4008 (2.2490)	Entropy 0.80442 (0.80817)	Top-1 acc 67.188 (70.316)	Top-5 acc 85.156 (87.842)	lr 0.00283
Train [94][2600/3239]	Time 0.235 (0.616)	Data Time 0.001 (0.016)	Loss 2.2450 (2.2492)	Entropy 0.80437 (0.80816)	Top-1 acc 69.141 (70.311)	Top-5 acc 89.844 (87.841)	lr 0.00283
Train [94][2610/3239]	Time 0.325 (0.615)	Data Time 0.001 (0.016)	Loss 2.2544 (2.2493)	Entropy 0.80435 (0.80815)	Top-1 acc 67.969 (70.307)	Top-5 acc 90.234 (87.837)	lr 0.00283
Train [94][2620/3239]	Time 0.229 (0.615)	Data Time 0.001 (0.016)	Loss 2.4035 (2.2493)	Entropy 0.80432 (0.80813)	Top-1 acc 66.797 (70.308)	Top-5 acc 86.719 (87.837)	lr 0.00283
Train [94][2630/3239]	Time 0.244 (0.614)	Data Time 0.001 (0.016)	Loss 2.3569 (2.2493)	Entropy 0.80427 (0.80812)	Top-1 acc 69.141 (70.308)	Top-5 acc 84.766 (87.835)	lr 0.00282
Train [94][2640/3239]	Time 0.250 (0.614)	Data Time 0.001 (0.016)	Loss 2.2652 (2.2492)	Entropy 0.80418 (0.80810)	Top-1 acc 72.656 (70.309)	Top-5 acc 86.328 (87.834)	lr 0.00282
Train [94][2650/3239]	Time 0.329 (0.613)	Data Time 0.001 (0.016)	Loss 2.3597 (2.2494)	Entropy 0.80448 (0.80809)	Top-1 acc 66.406 (70.302)	Top-5 acc 85.156 (87.831)	lr 0.00282
Train [94][2660/3239]	Time 0.232 (0.613)	Data Time 0.001 (0.016)	Loss 2.4493 (2.2495)	Entropy 0.80446 (0.80807)	Top-1 acc 64.062 (70.300)	Top-5 acc 83.594 (87.829)	lr 0.00282
Train [94][2670/3239]	Time 0.260 (0.612)	Data Time 0.001 (0.016)	Loss 2.2911 (2.2496)	Entropy 0.80448 (0.80806)	Top-1 acc 66.797 (70.298)	Top-5 acc 88.672 (87.831)	lr 0.00282
Train [94][2680/3239]	Time 0.231 (0.611)	Data Time 0.001 (0.016)	Loss 2.2526 (2.2496)	Entropy 0.80448 (0.80805)	Top-1 acc 71.484 (70.297)	Top-5 acc 89.062 (87.832)	lr 0.00282
Train [94][2690/3239]	Time 0.235 (0.611)	Data Time 0.001 (0.016)	Loss 2.1767 (2.2496)	Entropy 0.80443 (0.80803)	Top-1 acc 71.484 (70.299)	Top-5 acc 88.672 (87.831)	lr 0.00282
Train [94][2700/3239]	Time 0.215 (0.611)	Data Time 0.001 (0.015)	Loss 2.2398 (2.2496)	Entropy 0.80438 (0.80802)	Top-1 acc 69.531 (70.301)	Top-5 acc 88.281 (87.832)	lr 0.00282
Train [94][2710/3239]	Time 0.224 (0.610)	Data Time 0.001 (0.015)	Loss 2.2204 (2.2496)	Entropy 0.80434 (0.80801)	Top-1 acc 72.656 (70.302)	Top-5 acc 88.672 (87.829)	lr 0.00282
Train [94][2720/3239]	Time 0.274 (0.610)	Data Time 0.001 (0.015)	Loss 2.2518 (2.2497)	Entropy 0.80434 (0.80799)	Top-1 acc 69.922 (70.300)	Top-5 acc 90.234 (87.828)	lr 0.00282
Train [94][2730/3239]	Time 0.226 (0.609)	Data Time 0.001 (0.015)	Loss 2.2870 (2.2496)	Entropy 0.80444 (0.80798)	Top-1 acc 71.094 (70.301)	Top-5 acc 85.938 (87.828)	lr 0.00282
Train [94][2740/3239]	Time 0.234 (0.609)	Data Time 0.001 (0.015)	Loss 2.4386 (2.2496)	Entropy 0.80440 (0.80797)	Top-1 acc 66.406 (70.305)	Top-5 acc 83.594 (87.827)	lr 0.00282
Train [94][2750/3239]	Time 0.230 (0.608)	Data Time 0.001 (0.015)	Loss 2.1056 (2.2495)	Entropy 0.80440 (0.80795)	Top-1 acc 76.562 (70.305)	Top-5 acc 89.062 (87.827)	lr 0.00282
Train [94][2760/3239]	Time 0.239 (0.608)	Data Time 0.001 (0.015)	Loss 2.0963 (2.2495)	Entropy 0.80440 (0.80794)	Top-1 acc 74.219 (70.306)	Top-5 acc 89.844 (87.825)	lr 0.00282
Train [94][2770/3239]	Time 0.283 (0.627)	Data Time 0.005 (0.015)	Loss 2.2313 (2.2494)	Entropy 0.80443 (0.80793)	Top-1 acc 73.047 (70.312)	Top-5 acc 89.453 (87.829)	lr 0.00282
Train [94][2780/3239]	Time 0.360 (0.626)	Data Time 0.002 (0.015)	Loss 2.2492 (2.2494)	Entropy 0.80442 (0.80792)	Top-1 acc 70.312 (70.309)	Top-5 acc 87.891 (87.831)	lr 0.00281
Train [94][2790/3239]	Time 0.234 (0.626)	Data Time 0.002 (0.015)	Loss 2.2125 (2.2494)	Entropy 0.80433 (0.80790)	Top-1 acc 72.266 (70.313)	Top-5 acc 87.109 (87.829)	lr 0.00281
Train [94][2800/3239]	Time 0.244 (0.625)	Data Time 0.002 (0.015)	Loss 2.4523 (2.2494)	Entropy 0.80434 (0.80789)	Top-1 acc 62.891 (70.311)	Top-5 acc 86.719 (87.830)	lr 0.00281
Train [94][2810/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.015)	Loss 2.1127 (2.2495)	Entropy 0.80430 (0.80788)	Top-1 acc 73.047 (70.309)	Top-5 acc 89.453 (87.827)	lr 0.00281
Train [94][2820/3239]	Time 0.319 (0.624)	Data Time 0.001 (0.015)	Loss 2.4337 (2.2497)	Entropy 0.80424 (0.80786)	Top-1 acc 67.578 (70.304)	Top-5 acc 83.984 (87.824)	lr 0.00281
Train [94][2830/3239]	Time 0.220 (0.624)	Data Time 0.001 (0.015)	Loss 2.3087 (2.2499)	Entropy 0.80426 (0.80785)	Top-1 acc 69.922 (70.299)	Top-5 acc 85.938 (87.821)	lr 0.00281
Train [94][2840/3239]	Time 0.264 (0.623)	Data Time 0.001 (0.015)	Loss 2.2272 (2.2499)	Entropy 0.80419 (0.80784)	Top-1 acc 70.312 (70.298)	Top-5 acc 87.500 (87.818)	lr 0.00281
Train [94][2850/3239]	Time 0.227 (0.623)	Data Time 0.001 (0.015)	Loss 2.2050 (2.2498)	Entropy 0.80419 (0.80783)	Top-1 acc 68.750 (70.299)	Top-5 acc 87.891 (87.818)	lr 0.00281
Train [94][2860/3239]	Time 0.369 (0.622)	Data Time 0.001 (0.015)	Loss 2.3479 (2.2499)	Entropy 0.80412 (0.80781)	Top-1 acc 67.578 (70.297)	Top-5 acc 85.156 (87.815)	lr 0.00281
Train [94][2870/3239]	Time 0.266 (0.622)	Data Time 0.001 (0.015)	Loss 2.2939 (2.2499)	Entropy 0.80410 (0.80780)	Top-1 acc 69.141 (70.298)	Top-5 acc 85.547 (87.816)	lr 0.00281
Train [94][2880/3239]	Time 0.230 (0.621)	Data Time 0.001 (0.015)	Loss 2.2113 (2.2500)	Entropy 0.80418 (0.80779)	Top-1 acc 71.094 (70.292)	Top-5 acc 89.453 (87.814)	lr 0.00281
Train [94][2890/3239]	Time 0.235 (0.621)	Data Time 0.001 (0.015)	Loss 2.2537 (2.2500)	Entropy 0.80422 (0.80778)	Top-1 acc 74.609 (70.293)	Top-5 acc 85.938 (87.816)	lr 0.00281
Train [94][2900/3239]	Time 0.277 (0.620)	Data Time 0.001 (0.015)	Loss 2.3527 (2.2499)	Entropy 0.80424 (0.80776)	Top-1 acc 66.797 (70.291)	Top-5 acc 87.500 (87.818)	lr 0.00281
Train [94][2910/3239]	Time 0.235 (0.620)	Data Time 0.001 (0.014)	Loss 2.3027 (2.2501)	Entropy 0.80415 (0.80775)	Top-1 acc 68.359 (70.288)	Top-5 acc 87.500 (87.816)	lr 0.00281
Train [94][2920/3239]	Time 0.233 (0.620)	Data Time 0.001 (0.014)	Loss 2.1540 (2.2500)	Entropy 0.80413 (0.80774)	Top-1 acc 72.656 (70.289)	Top-5 acc 89.844 (87.817)	lr 0.00281
Train [94][2930/3239]	Time 0.260 (0.619)	Data Time 0.001 (0.014)	Loss 2.2853 (2.2499)	Entropy 0.80410 (0.80773)	Top-1 acc 69.531 (70.288)	Top-5 acc 87.500 (87.820)	lr 0.00281
Train [94][2940/3239]	Time 0.234 (0.619)	Data Time 0.001 (0.014)	Loss 2.1451 (2.2497)	Entropy 0.80414 (0.80771)	Top-1 acc 71.484 (70.291)	Top-5 acc 89.062 (87.822)	lr 0.00280
Train [94][2950/3239]	Time 0.227 (0.618)	Data Time 0.001 (0.014)	Loss 2.3009 (2.2498)	Entropy 0.80410 (0.80770)	Top-1 acc 67.969 (70.289)	Top-5 acc 88.281 (87.822)	lr 0.00280
Train [94][2960/3239]	Time 0.243 (0.618)	Data Time 0.001 (0.014)	Loss 2.3241 (2.2498)	Entropy 0.80410 (0.80769)	Top-1 acc 69.531 (70.289)	Top-5 acc 86.328 (87.822)	lr 0.00280
Train [94][2970/3239]	Time 0.259 (0.617)	Data Time 0.001 (0.014)	Loss 2.1804 (2.2499)	Entropy 0.80415 (0.80768)	Top-1 acc 70.312 (70.288)	Top-5 acc 87.891 (87.819)	lr 0.00280
Train [94][2980/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.014)	Loss 2.3893 (2.2499)	Entropy 0.80411 (0.80767)	Top-1 acc 65.625 (70.283)	Top-5 acc 83.984 (87.820)	lr 0.00280
Train [94][2990/3239]	Time 0.338 (0.616)	Data Time 0.001 (0.014)	Loss 2.4160 (2.2500)	Entropy 0.80411 (0.80765)	Top-1 acc 66.016 (70.282)	Top-5 acc 85.547 (87.821)	lr 0.00280
Train [94][3000/3239]	Time 0.228 (0.616)	Data Time 0.001 (0.014)	Loss 2.1566 (2.2501)	Entropy 0.80408 (0.80764)	Top-1 acc 71.484 (70.283)	Top-5 acc 88.281 (87.819)	lr 0.00280
Train [94][3010/3239]	Time 0.247 (0.615)	Data Time 0.001 (0.014)	Loss 2.2432 (2.2500)	Entropy 0.80402 (0.80763)	Top-1 acc 69.141 (70.286)	Top-5 acc 88.672 (87.820)	lr 0.00280
Train [94][3020/3239]	Time 0.230 (0.615)	Data Time 0.001 (0.014)	Loss 2.2821 (2.2501)	Entropy 0.80405 (0.80762)	Top-1 acc 69.141 (70.281)	Top-5 acc 85.938 (87.820)	lr 0.00280
Train [94][3030/3239]	Time 0.261 (0.614)	Data Time 0.001 (0.014)	Loss 2.2608 (2.2500)	Entropy 0.80400 (0.80761)	Top-1 acc 69.141 (70.285)	Top-5 acc 87.891 (87.822)	lr 0.00280
Train [94][3040/3239]	Time 0.229 (0.614)	Data Time 0.001 (0.014)	Loss 2.1923 (2.2500)	Entropy 0.80396 (0.80759)	Top-1 acc 73.047 (70.288)	Top-5 acc 88.672 (87.823)	lr 0.00280
Train [94][3050/3239]	Time 0.231 (0.613)	Data Time 0.001 (0.014)	Loss 2.3113 (2.2501)	Entropy 0.80400 (0.80758)	Top-1 acc 69.531 (70.284)	Top-5 acc 85.156 (87.820)	lr 0.00280
Train [94][3060/3239]	Time 0.227 (0.613)	Data Time 0.001 (0.014)	Loss 2.4150 (2.2503)	Entropy 0.80402 (0.80757)	Top-1 acc 66.016 (70.275)	Top-5 acc 84.766 (87.818)	lr 0.00280
Train [94][3070/3239]	Time 0.240 (0.613)	Data Time 0.001 (0.014)	Loss 2.2644 (2.2504)	Entropy 0.80402 (0.80756)	Top-1 acc 71.094 (70.275)	Top-5 acc 87.500 (87.815)	lr 0.00280
Train [94][3080/3239]	Time 0.230 (0.612)	Data Time 0.001 (0.014)	Loss 2.2813 (2.2504)	Entropy 0.80396 (0.80755)	Top-1 acc 69.531 (70.277)	Top-5 acc 86.328 (87.817)	lr 0.00280
Train [94][3090/3239]	Time 0.237 (0.612)	Data Time 0.001 (0.014)	Loss 2.2091 (2.2503)	Entropy 0.80392 (0.80754)	Top-1 acc 71.094 (70.282)	Top-5 acc 88.281 (87.819)	lr 0.00280
Train [94][3100/3239]	Time 0.303 (0.628)	Data Time 0.004 (0.014)	Loss 2.2662 (2.2503)	Entropy 0.80393 (0.80752)	Top-1 acc 72.656 (70.279)	Top-5 acc 86.328 (87.819)	lr 0.00279
Train [94][3110/3239]	Time 0.270 (0.627)	Data Time 0.002 (0.014)	Loss 2.2475 (2.2503)	Entropy 0.80386 (0.80751)	Top-1 acc 69.531 (70.277)	Top-5 acc 89.453 (87.820)	lr 0.00279
Train [94][3120/3239]	Time 0.369 (0.627)	Data Time 0.002 (0.014)	Loss 2.0827 (2.2504)	Entropy 0.80390 (0.80750)	Top-1 acc 76.562 (70.273)	Top-5 acc 91.016 (87.819)	lr 0.00279
Train [94][3130/3239]	Time 0.221 (0.626)	Data Time 0.001 (0.014)	Loss 2.0682 (2.2504)	Entropy 0.80392 (0.80749)	Top-1 acc 76.953 (70.269)	Top-5 acc 89.844 (87.819)	lr 0.00279
Train [94][3140/3239]	Time 0.230 (0.626)	Data Time 0.001 (0.014)	Loss 2.1910 (2.2503)	Entropy 0.80396 (0.80748)	Top-1 acc 73.438 (70.270)	Top-5 acc 88.281 (87.818)	lr 0.00279
Train [94][3150/3239]	Time 0.234 (0.626)	Data Time 0.001 (0.014)	Loss 2.2317 (2.2502)	Entropy 0.80392 (0.80747)	Top-1 acc 71.484 (70.274)	Top-5 acc 88.281 (87.821)	lr 0.00279
Train [94][3160/3239]	Time 0.328 (0.625)	Data Time 0.001 (0.013)	Loss 2.2842 (2.2503)	Entropy 0.80390 (0.80746)	Top-1 acc 69.531 (70.272)	Top-5 acc 87.500 (87.820)	lr 0.00279
Train [94][3170/3239]	Time 0.210 (0.625)	Data Time 0.001 (0.013)	Loss 2.2686 (2.2502)	Entropy 0.80380 (0.80745)	Top-1 acc 71.484 (70.278)	Top-5 acc 85.938 (87.821)	lr 0.00279
Train [94][3180/3239]	Time 0.239 (0.624)	Data Time 0.000 (0.013)	Loss 2.1867 (2.2501)	Entropy 0.80378 (0.80743)	Top-1 acc 70.703 (70.278)	Top-5 acc 89.062 (87.824)	lr 0.00279
Train [94][3190/3239]	Time 0.245 (0.624)	Data Time 0.000 (0.013)	Loss 2.1352 (2.2499)	Entropy 0.80375 (0.80742)	Top-1 acc 73.438 (70.280)	Top-5 acc 89.062 (87.826)	lr 0.00279
Train [94][3200/3239]	Time 0.285 (0.623)	Data Time 0.000 (0.013)	Loss 2.3010 (2.2501)	Entropy 0.80374 (0.80741)	Top-1 acc 70.703 (70.277)	Top-5 acc 87.891 (87.825)	lr 0.00279
Train [94][3210/3239]	Time 0.236 (0.622)	Data Time 0.000 (0.013)	Loss 2.1924 (2.2500)	Entropy 0.80370 (0.80740)	Top-1 acc 71.875 (70.277)	Top-5 acc 88.281 (87.824)	lr 0.00279
Train [94][3220/3239]	Time 0.225 (0.622)	Data Time 0.000 (0.013)	Loss 2.3124 (2.2500)	Entropy 0.80368 (0.80739)	Top-1 acc 70.312 (70.276)	Top-5 acc 86.719 (87.825)	lr 0.00279
Train [94][3230/3239]	Time 0.239 (0.621)	Data Time 0.000 (0.013)	Loss 2.3019 (2.2500)	Entropy 0.80363 (0.80738)	Top-1 acc 73.438 (70.278)	Top-5 acc 87.891 (87.826)	lr 0.00279
Train [94][3239/3239]	Time 2.386 (0.621)	Data Time 0.000 (0.013)	Loss 2.4970 (2.2500)	Entropy 0.80363 (0.80737)	Top-1 acc 67.901 (70.277)	Top-5 acc 83.951 (87.827)	lr 0.00279
==========Valid [94/120]	loss 1.241	top-1 acc 71.685 (71.724)	top-5 acc 89.315	Train top-1 70.277	top-5 87.827	Entropy 0.80363	Latency-None: 0.000ms	Flops: 546.53M
Train [95][0/3239]	Time 41.199 (41.199)	Data Time 38.272 (38.272)	Loss 2.1807 (2.1807)	Entropy 0.80356 (0.80356)	Top-1 acc 74.219 (74.219)	Top-5 acc 90.234 (90.234)	lr 0.00279
Train [95][10/3239]	Time 3.204 (4.384)	Data Time 0.001 (3.481)	Loss 2.3369 (2.2653)	Entropy 0.80356 (0.80356)	Top-1 acc 67.188 (69.531)	Top-5 acc 85.547 (87.678)	lr 0.00279
Train [95][20/3239]	Time 0.246 (2.424)	Data Time 0.001 (1.824)	Loss 2.1237 (2.2118)	Entropy 0.80355 (0.80355)	Top-1 acc 73.047 (70.815)	Top-5 acc 86.719 (88.486)	lr 0.00278
Train [95][30/3239]	Time 0.242 (1.801)	Data Time 0.001 (1.236)	Loss 2.2290 (2.2109)	Entropy 0.80352 (0.80354)	Top-1 acc 70.703 (71.031)	Top-5 acc 87.109 (88.697)	lr 0.00278
Train [95][40/3239]	Time 0.234 (1.479)	Data Time 0.001 (0.935)	Loss 2.2205 (2.2064)	Entropy 0.80338 (0.80351)	Top-1 acc 67.969 (71.246)	Top-5 acc 91.406 (88.758)	lr 0.00278
Train [95][50/3239]	Time 0.323 (1.284)	Data Time 0.001 (0.752)	Loss 2.0762 (2.2083)	Entropy 0.80336 (0.80348)	Top-1 acc 72.656 (71.255)	Top-5 acc 91.016 (88.680)	lr 0.00278
Train [95][60/3239]	Time 0.259 (1.152)	Data Time 0.002 (0.629)	Loss 2.3251 (2.2120)	Entropy 0.80336 (0.80346)	Top-1 acc 65.234 (71.119)	Top-5 acc 87.500 (88.608)	lr 0.00278
Train [95][70/3239]	Time 0.214 (1.056)	Data Time 0.001 (0.541)	Loss 2.2370 (2.2158)	Entropy 0.80339 (0.80345)	Top-1 acc 69.922 (70.962)	Top-5 acc 86.328 (88.490)	lr 0.00278
Train [95][80/3239]	Time 0.255 (0.987)	Data Time 0.001 (0.474)	Loss 2.1248 (2.2103)	Entropy 0.80338 (0.80344)	Top-1 acc 73.047 (71.214)	Top-5 acc 90.234 (88.585)	lr 0.00278
Train [95][90/3239]	Time 0.239 (0.932)	Data Time 0.001 (0.422)	Loss 2.3058 (2.2156)	Entropy 0.80333 (0.80343)	Top-1 acc 71.875 (71.124)	Top-5 acc 88.672 (88.440)	lr 0.00278
Train [95][100/3239]	Time 0.228 (0.885)	Data Time 0.001 (0.381)	Loss 2.2920 (2.2164)	Entropy 0.80327 (0.80342)	Top-1 acc 71.875 (71.160)	Top-5 acc 85.547 (88.397)	lr 0.00278
Train [95][110/3239]	Time 0.225 (0.849)	Data Time 0.001 (0.346)	Loss 2.2582 (2.2215)	Entropy 0.80325 (0.80341)	Top-1 acc 68.750 (71.055)	Top-5 acc 88.672 (88.306)	lr 0.00278
Train [95][120/3239]	Time 2.615 (0.818)	Data Time 0.001 (0.318)	Loss 2.2057 (2.2251)	Entropy 0.80325 (0.80340)	Top-1 acc 70.703 (70.858)	Top-5 acc 87.891 (88.236)	lr 0.00278
Train [95][130/3239]	Time 0.232 (0.774)	Data Time 0.001 (0.294)	Loss 2.2576 (2.2304)	Entropy 0.80321 (0.80338)	Top-1 acc 65.625 (70.754)	Top-5 acc 87.891 (88.147)	lr 0.00278
Train [95][140/3239]	Time 0.313 (0.752)	Data Time 0.001 (0.273)	Loss 2.2695 (2.2309)	Entropy 0.80314 (0.80336)	Top-1 acc 70.703 (70.714)	Top-5 acc 86.328 (88.170)	lr 0.00278
Train [95][150/3239]	Time 0.267 (0.734)	Data Time 0.001 (0.255)	Loss 2.2198 (2.2296)	Entropy 0.80310 (0.80335)	Top-1 acc 72.656 (70.695)	Top-5 acc 89.844 (88.201)	lr 0.00278
Train [95][160/3239]	Time 0.229 (0.717)	Data Time 0.001 (0.239)	Loss 2.1565 (2.2298)	Entropy 0.80307 (0.80333)	Top-1 acc 73.828 (70.764)	Top-5 acc 87.891 (88.182)	lr 0.00278
Train [95][170/3239]	Time 0.222 (0.703)	Data Time 0.002 (0.225)	Loss 2.2041 (2.2279)	Entropy 0.80303 (0.80331)	Top-1 acc 70.703 (70.879)	Top-5 acc 91.016 (88.222)	lr 0.00277
Train [95][180/3239]	Time 0.325 (0.691)	Data Time 0.002 (0.213)	Loss 2.1867 (2.2297)	Entropy 0.80300 (0.80330)	Top-1 acc 73.047 (70.846)	Top-5 acc 88.672 (88.210)	lr 0.00277
Train [95][190/3239]	Time 0.225 (0.679)	Data Time 0.001 (0.202)	Loss 2.1616 (2.2302)	Entropy 0.80300 (0.80328)	Top-1 acc 73.047 (70.840)	Top-5 acc 91.016 (88.197)	lr 0.00277
Train [95][200/3239]	Time 0.228 (0.670)	Data Time 0.001 (0.192)	Loss 2.2517 (2.2318)	Entropy 0.80297 (0.80327)	Top-1 acc 67.969 (70.791)	Top-5 acc 87.500 (88.130)	lr 0.00277
Train [95][210/3239]	Time 0.373 (0.916)	Data Time 0.003 (0.183)	Loss 2.3852 (2.2324)	Entropy 0.80287 (0.80325)	Top-1 acc 68.359 (70.746)	Top-5 acc 82.422 (88.144)	lr 0.00277
Train [95][220/3239]	Time 0.372 (0.898)	Data Time 0.002 (0.175)	Loss 2.2114 (2.2300)	Entropy 0.80289 (0.80324)	Top-1 acc 70.703 (70.830)	Top-5 acc 88.281 (88.179)	lr 0.00277
Train [95][230/3239]	Time 2.598 (0.880)	Data Time 0.002 (0.167)	Loss 2.1070 (2.2293)	Entropy 0.80289 (0.80322)	Top-1 acc 76.172 (70.859)	Top-5 acc 89.844 (88.188)	lr 0.00277
Train [95][240/3239]	Time 0.238 (0.854)	Data Time 0.002 (0.160)	Loss 2.1706 (2.2277)	Entropy 0.80285 (0.80321)	Top-1 acc 72.656 (70.924)	Top-5 acc 87.500 (88.184)	lr 0.00277
Train [95][250/3239]	Time 0.244 (0.839)	Data Time 0.002 (0.154)	Loss 2.1326 (2.2284)	Entropy 0.80286 (0.80319)	Top-1 acc 71.875 (70.907)	Top-5 acc 91.797 (88.175)	lr 0.00277
Train [95][260/3239]	Time 0.220 (0.825)	Data Time 0.001 (0.148)	Loss 2.3129 (2.2281)	Entropy 0.80284 (0.80318)	Top-1 acc 68.359 (70.940)	Top-5 acc 86.719 (88.166)	lr 0.00277
Train [95][270/3239]	Time 0.231 (0.812)	Data Time 0.001 (0.143)	Loss 2.0871 (2.2268)	Entropy 0.80280 (0.80316)	Top-1 acc 71.484 (70.944)	Top-5 acc 91.406 (88.202)	lr 0.00277
Train [95][280/3239]	Time 0.230 (0.800)	Data Time 0.001 (0.138)	Loss 2.1539 (2.2260)	Entropy 0.80278 (0.80315)	Top-1 acc 72.656 (70.960)	Top-5 acc 88.281 (88.209)	lr 0.00277
Train [95][290/3239]	Time 0.224 (0.789)	Data Time 0.001 (0.133)	Loss 2.2186 (2.2269)	Entropy 0.80274 (0.80314)	Top-1 acc 71.484 (70.921)	Top-5 acc 87.109 (88.178)	lr 0.00277
Train [95][300/3239]	Time 0.232 (0.778)	Data Time 0.002 (0.129)	Loss 2.2918 (2.2287)	Entropy 0.80272 (0.80312)	Top-1 acc 71.484 (70.884)	Top-5 acc 87.500 (88.158)	lr 0.00277
Train [95][310/3239]	Time 0.250 (0.768)	Data Time 0.001 (0.125)	Loss 2.3095 (2.2292)	Entropy 0.80270 (0.80311)	Top-1 acc 69.531 (70.878)	Top-5 acc 88.672 (88.167)	lr 0.00277
Train [95][320/3239]	Time 0.207 (0.760)	Data Time 0.001 (0.121)	Loss 2.2445 (2.2315)	Entropy 0.80259 (0.80310)	Top-1 acc 69.922 (70.811)	Top-5 acc 86.719 (88.132)	lr 0.00277
Train [95][330/3239]	Time 0.233 (0.751)	Data Time 0.001 (0.117)	Loss 2.1764 (2.2304)	Entropy 0.80263 (0.80308)	Top-1 acc 73.047 (70.833)	Top-5 acc 87.891 (88.140)	lr 0.00276
Train [95][340/3239]	Time 2.502 (0.743)	Data Time 0.002 (0.114)	Loss 2.2109 (2.2304)	Entropy 0.80263 (0.80307)	Top-1 acc 73.438 (70.839)	Top-5 acc 89.062 (88.138)	lr 0.00276
Train [95][350/3239]	Time 0.319 (0.729)	Data Time 0.001 (0.111)	Loss 2.2007 (2.2305)	Entropy 0.80255 (0.80305)	Top-1 acc 69.922 (70.836)	Top-5 acc 88.672 (88.139)	lr 0.00276
Train [95][360/3239]	Time 0.251 (0.721)	Data Time 0.001 (0.108)	Loss 2.1425 (2.2304)	Entropy 0.80252 (0.80304)	Top-1 acc 72.266 (70.821)	Top-5 acc 91.406 (88.151)	lr 0.00276
Train [95][370/3239]	Time 0.239 (0.715)	Data Time 0.001 (0.105)	Loss 2.3121 (2.2306)	Entropy 0.80248 (0.80302)	Top-1 acc 69.531 (70.833)	Top-5 acc 88.672 (88.160)	lr 0.00276
Train [95][380/3239]	Time 0.229 (0.708)	Data Time 0.001 (0.102)	Loss 2.1101 (2.2305)	Entropy 0.80234 (0.80301)	Top-1 acc 76.562 (70.847)	Top-5 acc 90.234 (88.165)	lr 0.00276
Train [95][390/3239]	Time 0.235 (0.702)	Data Time 0.001 (0.100)	Loss 2.2492 (2.2303)	Entropy 0.80235 (0.80299)	Top-1 acc 66.797 (70.838)	Top-5 acc 89.062 (88.189)	lr 0.00276
Train [95][400/3239]	Time 0.223 (0.697)	Data Time 0.001 (0.097)	Loss 2.1057 (2.2311)	Entropy 0.80245 (0.80298)	Top-1 acc 77.344 (70.828)	Top-5 acc 88.672 (88.177)	lr 0.00276
Train [95][410/3239]	Time 0.234 (0.692)	Data Time 0.001 (0.095)	Loss 2.2560 (2.2318)	Entropy 0.80243 (0.80296)	Top-1 acc 67.188 (70.796)	Top-5 acc 88.672 (88.155)	lr 0.00276
Train [95][420/3239]	Time 0.243 (0.687)	Data Time 0.001 (0.093)	Loss 2.1896 (2.2325)	Entropy 0.80246 (0.80295)	Top-1 acc 71.484 (70.791)	Top-5 acc 89.453 (88.137)	lr 0.00276
Train [95][430/3239]	Time 0.237 (0.682)	Data Time 0.001 (0.090)	Loss 2.2526 (2.2331)	Entropy 0.80239 (0.80294)	Top-1 acc 67.188 (70.778)	Top-5 acc 87.109 (88.127)	lr 0.00276
Train [95][440/3239]	Time 0.307 (0.678)	Data Time 0.001 (0.088)	Loss 2.2168 (2.2328)	Entropy 0.80237 (0.80293)	Top-1 acc 69.531 (70.781)	Top-5 acc 87.891 (88.126)	lr 0.00276
Train [95][450/3239]	Time 2.538 (0.673)	Data Time 0.001 (0.087)	Loss 2.2419 (2.2327)	Entropy 0.80237 (0.80291)	Top-1 acc 69.141 (70.770)	Top-5 acc 88.672 (88.141)	lr 0.00276
Train [95][460/3239]	Time 0.284 (0.664)	Data Time 0.001 (0.085)	Loss 2.3014 (2.2333)	Entropy 0.80233 (0.80290)	Top-1 acc 68.359 (70.745)	Top-5 acc 87.891 (88.131)	lr 0.00276
Train [95][470/3239]	Time 0.250 (0.660)	Data Time 0.001 (0.083)	Loss 2.2889 (2.2326)	Entropy 0.80234 (0.80289)	Top-1 acc 69.141 (70.761)	Top-5 acc 87.500 (88.146)	lr 0.00276
Train [95][480/3239]	Time 0.328 (0.656)	Data Time 0.001 (0.081)	Loss 2.0665 (2.2330)	Entropy 0.80223 (0.80288)	Top-1 acc 75.000 (70.741)	Top-5 acc 91.016 (88.129)	lr 0.00276
Train [95][490/3239]	Time 0.268 (0.653)	Data Time 0.002 (0.080)	Loss 2.2314 (2.2332)	Entropy 0.80219 (0.80286)	Top-1 acc 72.266 (70.747)	Top-5 acc 87.109 (88.115)	lr 0.00275
Train [95][500/3239]	Time 0.232 (0.649)	Data Time 0.002 (0.078)	Loss 2.1429 (2.2323)	Entropy 0.80220 (0.80285)	Top-1 acc 70.703 (70.759)	Top-5 acc 89.062 (88.123)	lr 0.00275
Train [95][510/3239]	Time 0.237 (0.646)	Data Time 0.001 (0.077)	Loss 2.3398 (2.2315)	Entropy 0.80214 (0.80284)	Top-1 acc 67.969 (70.794)	Top-5 acc 85.156 (88.152)	lr 0.00275
Train [95][520/3239]	Time 0.227 (0.642)	Data Time 0.001 (0.075)	Loss 2.1651 (2.2312)	Entropy 0.80212 (0.80282)	Top-1 acc 68.359 (70.805)	Top-5 acc 90.234 (88.152)	lr 0.00275
Train [95][530/3239]	Time 0.242 (0.640)	Data Time 0.001 (0.074)	Loss 2.4052 (2.2324)	Entropy 0.80201 (0.80281)	Top-1 acc 65.625 (70.772)	Top-5 acc 85.547 (88.128)	lr 0.00275
Train [95][540/3239]	Time 0.224 (0.637)	Data Time 0.001 (0.072)	Loss 2.3460 (2.2328)	Entropy 0.80198 (0.80279)	Top-1 acc 66.406 (70.753)	Top-5 acc 84.766 (88.124)	lr 0.00275
Train [95][550/3239]	Time 0.257 (0.634)	Data Time 0.001 (0.071)	Loss 2.2070 (2.2334)	Entropy 0.80196 (0.80278)	Top-1 acc 73.438 (70.747)	Top-5 acc 87.891 (88.124)	lr 0.00275
Train [95][560/3239]	Time 2.580 (0.631)	Data Time 0.001 (0.070)	Loss 2.0944 (2.2331)	Entropy 0.80196 (0.80276)	Top-1 acc 74.219 (70.737)	Top-5 acc 90.625 (88.132)	lr 0.00275
Train [95][570/3239]	Time 0.356 (0.625)	Data Time 0.002 (0.069)	Loss 2.4527 (2.2331)	Entropy 0.80195 (0.80275)	Top-1 acc 64.844 (70.743)	Top-5 acc 83.984 (88.123)	lr 0.00275
Train [95][580/3239]	Time 0.298 (0.710)	Data Time 0.003 (0.068)	Loss 2.1672 (2.2341)	Entropy 0.80195 (0.80274)	Top-1 acc 72.266 (70.732)	Top-5 acc 88.281 (88.110)	lr 0.00275
Train [95][590/3239]	Time 0.240 (0.707)	Data Time 0.002 (0.066)	Loss 2.3275 (2.2343)	Entropy 0.80195 (0.80272)	Top-1 acc 66.797 (70.739)	Top-5 acc 87.109 (88.113)	lr 0.00275
Train [95][600/3239]	Time 0.218 (0.703)	Data Time 0.002 (0.065)	Loss 2.2323 (2.2345)	Entropy 0.80191 (0.80271)	Top-1 acc 71.484 (70.735)	Top-5 acc 88.281 (88.110)	lr 0.00275
Train [95][610/3239]	Time 0.316 (0.699)	Data Time 0.001 (0.064)	Loss 2.1566 (2.2344)	Entropy 0.80185 (0.80270)	Top-1 acc 71.094 (70.733)	Top-5 acc 89.844 (88.116)	lr 0.00275
Train [95][620/3239]	Time 0.237 (0.696)	Data Time 0.001 (0.063)	Loss 2.2258 (2.2337)	Entropy 0.80183 (0.80268)	Top-1 acc 71.094 (70.741)	Top-5 acc 90.234 (88.132)	lr 0.00275
Train [95][630/3239]	Time 0.224 (0.692)	Data Time 0.001 (0.062)	Loss 2.3583 (2.2349)	Entropy 0.80177 (0.80267)	Top-1 acc 67.578 (70.717)	Top-5 acc 85.547 (88.106)	lr 0.00275
Train [95][640/3239]	Time 0.248 (0.689)	Data Time 0.001 (0.061)	Loss 2.1182 (2.2349)	Entropy 0.80179 (0.80265)	Top-1 acc 73.047 (70.720)	Top-5 acc 91.016 (88.105)	lr 0.00275
Train [95][650/3239]	Time 0.236 (0.686)	Data Time 0.001 (0.060)	Loss 2.3959 (2.2349)	Entropy 0.80172 (0.80264)	Top-1 acc 62.500 (70.736)	Top-5 acc 85.547 (88.098)	lr 0.00274
Train [95][660/3239]	Time 0.221 (0.682)	Data Time 0.001 (0.060)	Loss 2.2355 (2.2344)	Entropy 0.80174 (0.80263)	Top-1 acc 72.266 (70.742)	Top-5 acc 86.328 (88.103)	lr 0.00274
Train [95][670/3239]	Time 2.544 (0.679)	Data Time 0.009 (0.059)	Loss 2.2986 (2.2335)	Entropy 0.80174 (0.80261)	Top-1 acc 69.922 (70.772)	Top-5 acc 87.109 (88.122)	lr 0.00274
Train [95][680/3239]	Time 0.262 (0.673)	Data Time 0.001 (0.058)	Loss 2.2523 (2.2335)	Entropy 0.80174 (0.80260)	Top-1 acc 74.219 (70.798)	Top-5 acc 87.500 (88.110)	lr 0.00274
Train [95][690/3239]	Time 0.248 (0.670)	Data Time 0.001 (0.057)	Loss 2.3181 (2.2337)	Entropy 0.80171 (0.80259)	Top-1 acc 67.969 (70.782)	Top-5 acc 87.500 (88.108)	lr 0.00274
Train [95][700/3239]	Time 0.315 (0.667)	Data Time 0.001 (0.056)	Loss 2.2159 (2.2338)	Entropy 0.80162 (0.80257)	Top-1 acc 69.922 (70.777)	Top-5 acc 87.891 (88.096)	lr 0.00274
Train [95][710/3239]	Time 0.237 (0.665)	Data Time 0.001 (0.055)	Loss 2.3214 (2.2340)	Entropy 0.80163 (0.80256)	Top-1 acc 67.188 (70.780)	Top-5 acc 86.719 (88.099)	lr 0.00274
Train [95][720/3239]	Time 0.225 (0.662)	Data Time 0.001 (0.055)	Loss 2.2931 (2.2337)	Entropy 0.80162 (0.80255)	Top-1 acc 69.141 (70.787)	Top-5 acc 86.328 (88.098)	lr 0.00274
Train [95][730/3239]	Time 0.242 (0.660)	Data Time 0.001 (0.054)	Loss 2.2489 (2.2344)	Entropy 0.80172 (0.80254)	Top-1 acc 71.094 (70.769)	Top-5 acc 87.891 (88.095)	lr 0.00274
Train [95][740/3239]	Time 0.317 (0.657)	Data Time 0.001 (0.053)	Loss 2.3861 (2.2346)	Entropy 0.80174 (0.80253)	Top-1 acc 65.234 (70.756)	Top-5 acc 83.984 (88.084)	lr 0.00274
Train [95][750/3239]	Time 0.220 (0.655)	Data Time 0.001 (0.053)	Loss 2.5300 (2.2352)	Entropy 0.80170 (0.80251)	Top-1 acc 65.234 (70.743)	Top-5 acc 84.766 (88.078)	lr 0.00274
Train [95][760/3239]	Time 0.228 (0.653)	Data Time 0.001 (0.052)	Loss 2.1262 (2.2363)	Entropy 0.80171 (0.80250)	Top-1 acc 73.438 (70.713)	Top-5 acc 88.281 (88.056)	lr 0.00274
Train [95][770/3239]	Time 0.221 (0.650)	Data Time 0.001 (0.051)	Loss 2.2444 (2.2361)	Entropy 0.80172 (0.80249)	Top-1 acc 69.141 (70.716)	Top-5 acc 89.062 (88.057)	lr 0.00274
Train [95][780/3239]	Time 2.625 (0.648)	Data Time 0.002 (0.051)	Loss 2.2264 (2.2362)	Entropy 0.80172 (0.80248)	Top-1 acc 69.922 (70.717)	Top-5 acc 87.500 (88.060)	lr 0.00274
Train [95][790/3239]	Time 0.250 (0.643)	Data Time 0.001 (0.050)	Loss 2.2148 (2.2365)	Entropy 0.80163 (0.80247)	Top-1 acc 68.359 (70.714)	Top-5 acc 88.281 (88.057)	lr 0.00274
Train [95][800/3239]	Time 0.222 (0.641)	Data Time 0.002 (0.049)	Loss 2.2158 (2.2366)	Entropy 0.80160 (0.80246)	Top-1 acc 70.703 (70.707)	Top-5 acc 87.109 (88.052)	lr 0.00274
Train [95][810/3239]	Time 0.242 (0.639)	Data Time 0.001 (0.049)	Loss 2.3486 (2.2364)	Entropy 0.80155 (0.80245)	Top-1 acc 66.406 (70.719)	Top-5 acc 88.281 (88.054)	lr 0.00273
Train [95][820/3239]	Time 0.247 (0.637)	Data Time 0.001 (0.048)	Loss 2.3309 (2.2368)	Entropy 0.80157 (0.80244)	Top-1 acc 67.969 (70.712)	Top-5 acc 85.547 (88.047)	lr 0.00273
Train [95][830/3239]	Time 0.319 (0.635)	Data Time 0.001 (0.048)	Loss 2.2515 (2.2362)	Entropy 0.80162 (0.80243)	Top-1 acc 70.312 (70.733)	Top-5 acc 87.109 (88.056)	lr 0.00273
Train [95][840/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.047)	Loss 2.2941 (2.2366)	Entropy 0.80142 (0.80242)	Top-1 acc 70.312 (70.723)	Top-5 acc 87.109 (88.047)	lr 0.00273
Train [95][850/3239]	Time 0.245 (0.631)	Data Time 0.001 (0.047)	Loss 2.2340 (2.2364)	Entropy 0.80135 (0.80241)	Top-1 acc 70.312 (70.711)	Top-5 acc 87.109 (88.056)	lr 0.00273
Train [95][860/3239]	Time 0.216 (0.630)	Data Time 0.001 (0.046)	Loss 2.2511 (2.2373)	Entropy 0.80128 (0.80239)	Top-1 acc 71.094 (70.690)	Top-5 acc 87.109 (88.040)	lr 0.00273
Train [95][870/3239]	Time 0.326 (0.628)	Data Time 0.001 (0.046)	Loss 2.4702 (2.2378)	Entropy 0.80132 (0.80238)	Top-1 acc 66.797 (70.678)	Top-5 acc 85.156 (88.037)	lr 0.00273
Train [95][880/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.045)	Loss 2.4170 (2.2375)	Entropy 0.80126 (0.80237)	Top-1 acc 67.188 (70.682)	Top-5 acc 83.203 (88.042)	lr 0.00273
Train [95][890/3239]	Time 2.600 (0.624)	Data Time 0.001 (0.045)	Loss 2.2710 (2.2375)	Entropy 0.80126 (0.80236)	Top-1 acc 69.141 (70.686)	Top-5 acc 89.062 (88.048)	lr 0.00273
Train [95][900/3239]	Time 0.245 (0.620)	Data Time 0.001 (0.044)	Loss 2.2633 (2.2370)	Entropy 0.80121 (0.80234)	Top-1 acc 71.094 (70.698)	Top-5 acc 88.281 (88.060)	lr 0.00273
Train [95][910/3239]	Time 0.227 (0.619)	Data Time 0.001 (0.044)	Loss 2.1984 (2.2364)	Entropy 0.80113 (0.80233)	Top-1 acc 73.828 (70.724)	Top-5 acc 89.844 (88.074)	lr 0.00273
Train [95][920/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.043)	Loss 2.2321 (2.2362)	Entropy 0.80114 (0.80232)	Top-1 acc 69.531 (70.732)	Top-5 acc 87.109 (88.074)	lr 0.00273
Train [95][930/3239]	Time 0.267 (0.616)	Data Time 0.001 (0.043)	Loss 2.2131 (2.2362)	Entropy 0.80109 (0.80231)	Top-1 acc 71.875 (70.728)	Top-5 acc 86.719 (88.070)	lr 0.00273
Train [95][940/3239]	Time 0.272 (0.669)	Data Time 0.002 (0.042)	Loss 2.1346 (2.2361)	Entropy 0.80105 (0.80229)	Top-1 acc 73.438 (70.729)	Top-5 acc 87.891 (88.068)	lr 0.00273
Train [95][950/3239]	Time 0.234 (0.667)	Data Time 0.002 (0.042)	Loss 2.3093 (2.2360)	Entropy 0.80097 (0.80228)	Top-1 acc 68.359 (70.726)	Top-5 acc 87.500 (88.075)	lr 0.00273
Train [95][960/3239]	Time 0.256 (0.666)	Data Time 0.001 (0.041)	Loss 2.2830 (2.2359)	Entropy 0.80090 (0.80226)	Top-1 acc 72.656 (70.728)	Top-5 acc 89.844 (88.073)	lr 0.00272
Train [95][970/3239]	Time 0.283 (0.664)	Data Time 0.001 (0.041)	Loss 2.2790 (2.2359)	Entropy 0.80094 (0.80225)	Top-1 acc 69.922 (70.723)	Top-5 acc 87.891 (88.072)	lr 0.00272
Train [95][980/3239]	Time 0.233 (0.662)	Data Time 0.001 (0.041)	Loss 2.1484 (2.2360)	Entropy 0.80095 (0.80224)	Top-1 acc 74.219 (70.723)	Top-5 acc 89.453 (88.074)	lr 0.00272
Train [95][990/3239]	Time 0.258 (0.660)	Data Time 0.001 (0.040)	Loss 2.2297 (2.2356)	Entropy 0.80098 (0.80222)	Top-1 acc 73.438 (70.727)	Top-5 acc 86.719 (88.075)	lr 0.00272
Train [95][1000/3239]	Time 2.680 (0.658)	Data Time 0.002 (0.040)	Loss 2.1962 (2.2356)	Entropy 0.80098 (0.80221)	Top-1 acc 73.047 (70.728)	Top-5 acc 88.281 (88.076)	lr 0.00272
Train [95][1010/3239]	Time 0.228 (0.654)	Data Time 0.001 (0.039)	Loss 2.1916 (2.2361)	Entropy 0.80091 (0.80220)	Top-1 acc 67.578 (70.715)	Top-5 acc 89.062 (88.065)	lr 0.00272
Train [95][1020/3239]	Time 0.234 (0.653)	Data Time 0.001 (0.039)	Loss 2.2129 (2.2356)	Entropy 0.80086 (0.80219)	Top-1 acc 73.047 (70.732)	Top-5 acc 89.062 (88.079)	lr 0.00272
Train [95][1030/3239]	Time 0.248 (0.651)	Data Time 0.001 (0.039)	Loss 2.2032 (2.2358)	Entropy 0.80077 (0.80217)	Top-1 acc 71.094 (70.727)	Top-5 acc 87.500 (88.080)	lr 0.00272
Train [95][1040/3239]	Time 0.380 (0.650)	Data Time 0.001 (0.038)	Loss 2.2286 (2.2358)	Entropy 0.80068 (0.80216)	Top-1 acc 69.531 (70.725)	Top-5 acc 88.281 (88.078)	lr 0.00272
Train [95][1050/3239]	Time 0.222 (0.648)	Data Time 0.002 (0.038)	Loss 2.1997 (2.2358)	Entropy 0.80073 (0.80214)	Top-1 acc 69.531 (70.727)	Top-5 acc 90.234 (88.075)	lr 0.00272
Train [95][1060/3239]	Time 0.227 (0.646)	Data Time 0.001 (0.038)	Loss 2.1973 (2.2354)	Entropy 0.80072 (0.80213)	Top-1 acc 72.656 (70.729)	Top-5 acc 89.844 (88.078)	lr 0.00272
Train [95][1070/3239]	Time 0.216 (0.645)	Data Time 0.001 (0.037)	Loss 2.0785 (2.2353)	Entropy 0.80082 (0.80212)	Top-1 acc 73.047 (70.731)	Top-5 acc 90.625 (88.078)	lr 0.00272
Train [95][1080/3239]	Time 0.329 (0.643)	Data Time 0.001 (0.037)	Loss 2.2704 (2.2355)	Entropy 0.80082 (0.80211)	Top-1 acc 68.359 (70.722)	Top-5 acc 87.500 (88.074)	lr 0.00272
Train [95][1090/3239]	Time 0.227 (0.642)	Data Time 0.002 (0.037)	Loss 2.6665 (2.2358)	Entropy 0.80081 (0.80209)	Top-1 acc 59.766 (70.709)	Top-5 acc 80.469 (88.067)	lr 0.00272
Train [95][1100/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.036)	Loss 2.3238 (2.2358)	Entropy 0.80076 (0.80208)	Top-1 acc 68.359 (70.707)	Top-5 acc 85.938 (88.062)	lr 0.00272
Train [95][1110/3239]	Time 2.653 (0.639)	Data Time 0.001 (0.036)	Loss 2.3263 (2.2362)	Entropy 0.80076 (0.80207)	Top-1 acc 69.141 (70.696)	Top-5 acc 86.328 (88.054)	lr 0.00272
Train [95][1120/3239]	Time 0.242 (0.635)	Data Time 0.001 (0.036)	Loss 2.0515 (2.2362)	Entropy 0.80073 (0.80206)	Top-1 acc 75.391 (70.699)	Top-5 acc 89.062 (88.048)	lr 0.00271
Train [95][1130/3239]	Time 0.245 (0.634)	Data Time 0.001 (0.035)	Loss 2.1809 (2.2362)	Entropy 0.80071 (0.80205)	Top-1 acc 71.875 (70.700)	Top-5 acc 86.328 (88.048)	lr 0.00271
Train [95][1140/3239]	Time 0.249 (0.633)	Data Time 0.002 (0.035)	Loss 2.1870 (2.2366)	Entropy 0.80070 (0.80204)	Top-1 acc 69.922 (70.686)	Top-5 acc 89.844 (88.042)	lr 0.00271
Train [95][1150/3239]	Time 0.237 (0.631)	Data Time 0.002 (0.035)	Loss 2.1843 (2.2365)	Entropy 0.80068 (0.80202)	Top-1 acc 71.875 (70.684)	Top-5 acc 89.062 (88.042)	lr 0.00271
Train [95][1160/3239]	Time 0.200 (0.630)	Data Time 0.001 (0.035)	Loss 2.3851 (2.2367)	Entropy 0.80069 (0.80201)	Top-1 acc 65.625 (70.672)	Top-5 acc 85.938 (88.039)	lr 0.00271
Train [95][1170/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.034)	Loss 2.2145 (2.2366)	Entropy 0.80069 (0.80200)	Top-1 acc 70.703 (70.676)	Top-5 acc 90.234 (88.040)	lr 0.00271
Train [95][1180/3239]	Time 0.241 (0.627)	Data Time 0.001 (0.034)	Loss 2.1371 (2.2365)	Entropy 0.80065 (0.80199)	Top-1 acc 73.438 (70.684)	Top-5 acc 88.672 (88.041)	lr 0.00271
Train [95][1190/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.034)	Loss 2.2869 (2.2366)	Entropy 0.80065 (0.80198)	Top-1 acc 68.750 (70.682)	Top-5 acc 86.719 (88.037)	lr 0.00271
Train [95][1200/3239]	Time 0.244 (0.625)	Data Time 0.001 (0.034)	Loss 2.1698 (2.2368)	Entropy 0.80069 (0.80197)	Top-1 acc 70.703 (70.679)	Top-5 acc 89.453 (88.033)	lr 0.00271
Train [95][1210/3239]	Time 0.327 (0.624)	Data Time 0.001 (0.033)	Loss 2.2141 (2.2365)	Entropy 0.80064 (0.80196)	Top-1 acc 69.922 (70.679)	Top-5 acc 87.109 (88.036)	lr 0.00271
Train [95][1220/3239]	Time 2.635 (0.623)	Data Time 0.001 (0.033)	Loss 2.0532 (2.2363)	Entropy 0.80064 (0.80195)	Top-1 acc 76.172 (70.689)	Top-5 acc 90.234 (88.046)	lr 0.00271
Train [95][1230/3239]	Time 0.252 (0.620)	Data Time 0.001 (0.033)	Loss 2.0694 (2.2365)	Entropy 0.80067 (0.80194)	Top-1 acc 71.094 (70.684)	Top-5 acc 92.578 (88.045)	lr 0.00271
Train [95][1240/3239]	Time 0.206 (0.619)	Data Time 0.001 (0.033)	Loss 2.0301 (2.2362)	Entropy 0.80053 (0.80192)	Top-1 acc 73.438 (70.686)	Top-5 acc 92.188 (88.050)	lr 0.00271
Train [95][1250/3239]	Time 0.251 (0.617)	Data Time 0.002 (0.032)	Loss 2.1055 (2.2363)	Entropy 0.80054 (0.80191)	Top-1 acc 70.703 (70.677)	Top-5 acc 91.797 (88.046)	lr 0.00271
Train [95][1260/3239]	Time 0.233 (0.616)	Data Time 0.002 (0.032)	Loss 2.2581 (2.2362)	Entropy 0.80054 (0.80190)	Top-1 acc 70.312 (70.682)	Top-5 acc 88.281 (88.053)	lr 0.00271
Train [95][1270/3239]	Time 0.215 (0.615)	Data Time 0.001 (0.032)	Loss 2.4094 (2.2360)	Entropy 0.80053 (0.80189)	Top-1 acc 70.312 (70.690)	Top-5 acc 83.594 (88.052)	lr 0.00271
Train [95][1280/3239]	Time 0.258 (0.614)	Data Time 0.001 (0.032)	Loss 2.3818 (2.2361)	Entropy 0.80050 (0.80188)	Top-1 acc 67.188 (70.686)	Top-5 acc 85.938 (88.056)	lr 0.00270
Train [95][1290/3239]	Time 0.229 (0.613)	Data Time 0.002 (0.031)	Loss 2.2052 (2.2364)	Entropy 0.80050 (0.80187)	Top-1 acc 73.828 (70.681)	Top-5 acc 88.672 (88.049)	lr 0.00270
Train [95][1300/3239]	Time 0.235 (0.654)	Data Time 0.002 (0.031)	Loss 2.1251 (2.2364)	Entropy 0.80050 (0.80186)	Top-1 acc 72.266 (70.680)	Top-5 acc 91.016 (88.050)	lr 0.00270
Train [95][1310/3239]	Time 0.232 (0.652)	Data Time 0.002 (0.031)	Loss 2.2762 (2.2364)	Entropy 0.80052 (0.80185)	Top-1 acc 63.672 (70.668)	Top-5 acc 88.281 (88.050)	lr 0.00270
Train [95][1320/3239]	Time 0.235 (0.651)	Data Time 0.002 (0.031)	Loss 2.2010 (2.2365)	Entropy 0.80048 (0.80184)	Top-1 acc 70.703 (70.667)	Top-5 acc 88.672 (88.049)	lr 0.00270
Train [95][1330/3239]	Time 2.578 (0.650)	Data Time 0.002 (0.030)	Loss 2.1441 (2.2365)	Entropy 0.80048 (0.80183)	Top-1 acc 69.531 (70.660)	Top-5 acc 89.062 (88.049)	lr 0.00270
Train [95][1340/3239]	Time 0.326 (0.647)	Data Time 0.001 (0.030)	Loss 2.1853 (2.2365)	Entropy 0.80049 (0.80182)	Top-1 acc 71.875 (70.660)	Top-5 acc 89.844 (88.047)	lr 0.00270
Train [95][1350/3239]	Time 0.261 (0.646)	Data Time 0.001 (0.030)	Loss 2.1598 (2.2366)	Entropy 0.80041 (0.80181)	Top-1 acc 74.609 (70.663)	Top-5 acc 87.891 (88.041)	lr 0.00270
Train [95][1360/3239]	Time 0.226 (0.644)	Data Time 0.001 (0.030)	Loss 2.2350 (2.2364)	Entropy 0.80040 (0.80180)	Top-1 acc 69.531 (70.663)	Top-5 acc 86.719 (88.041)	lr 0.00270
Train [95][1370/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.030)	Loss 2.2378 (2.2366)	Entropy 0.80036 (0.80179)	Top-1 acc 70.703 (70.661)	Top-5 acc 87.109 (88.036)	lr 0.00270
Train [95][1380/3239]	Time 0.361 (0.642)	Data Time 0.001 (0.029)	Loss 2.2108 (2.2366)	Entropy 0.80029 (0.80178)	Top-1 acc 67.578 (70.655)	Top-5 acc 86.719 (88.032)	lr 0.00270
Train [95][1390/3239]	Time 0.241 (0.641)	Data Time 0.002 (0.029)	Loss 2.1348 (2.2368)	Entropy 0.80033 (0.80177)	Top-1 acc 72.656 (70.651)	Top-5 acc 89.844 (88.028)	lr 0.00270
Train [95][1400/3239]	Time 0.219 (0.640)	Data Time 0.001 (0.029)	Loss 2.2073 (2.2368)	Entropy 0.80033 (0.80176)	Top-1 acc 71.484 (70.657)	Top-5 acc 87.891 (88.023)	lr 0.00270
Train [95][1410/3239]	Time 0.245 (0.638)	Data Time 0.001 (0.029)	Loss 2.4238 (2.2370)	Entropy 0.80035 (0.80175)	Top-1 acc 61.719 (70.647)	Top-5 acc 83.203 (88.015)	lr 0.00270
Train [95][1420/3239]	Time 0.252 (0.637)	Data Time 0.001 (0.029)	Loss 2.2842 (2.2370)	Entropy 0.80033 (0.80174)	Top-1 acc 68.750 (70.644)	Top-5 acc 87.500 (88.017)	lr 0.00270
Train [95][1430/3239]	Time 0.238 (0.636)	Data Time 0.001 (0.028)	Loss 3.3844 (2.2377)	Entropy 0.80033 (0.80173)	Top-1 acc 44.531 (70.629)	Top-5 acc 72.656 (88.004)	lr 0.00270
Train [95][1440/3239]	Time 2.708 (0.635)	Data Time 0.001 (0.028)	Loss 2.1373 (2.2380)	Entropy 0.80033 (0.80172)	Top-1 acc 74.609 (70.622)	Top-5 acc 90.625 (87.998)	lr 0.00269
Train [95][1450/3239]	Time 0.223 (0.632)	Data Time 0.001 (0.028)	Loss 2.1966 (2.2378)	Entropy 0.80032 (0.80171)	Top-1 acc 74.609 (70.629)	Top-5 acc 89.062 (88.001)	lr 0.00269
Train [95][1460/3239]	Time 0.246 (0.631)	Data Time 0.001 (0.028)	Loss 2.1465 (2.2379)	Entropy 0.80024 (0.80170)	Top-1 acc 73.438 (70.625)	Top-5 acc 91.797 (88.003)	lr 0.00269
Train [95][1470/3239]	Time 0.330 (0.630)	Data Time 0.002 (0.028)	Loss 2.2495 (2.2377)	Entropy 0.80011 (0.80169)	Top-1 acc 69.141 (70.628)	Top-5 acc 91.016 (88.008)	lr 0.00269
Train [95][1480/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.027)	Loss 2.3093 (2.2379)	Entropy 0.80005 (0.80168)	Top-1 acc 71.484 (70.623)	Top-5 acc 83.984 (88.000)	lr 0.00269
Train [95][1490/3239]	Time 0.225 (0.628)	Data Time 0.001 (0.027)	Loss 2.2689 (2.2381)	Entropy 0.80005 (0.80167)	Top-1 acc 69.922 (70.616)	Top-5 acc 86.719 (87.996)	lr 0.00269
Train [95][1500/3239]	Time 0.224 (0.627)	Data Time 0.001 (0.027)	Loss 2.3176 (2.2383)	Entropy 0.79997 (0.80165)	Top-1 acc 70.312 (70.611)	Top-5 acc 87.109 (87.990)	lr 0.00269
Train [95][1510/3239]	Time 0.320 (0.626)	Data Time 0.001 (0.027)	Loss 2.0840 (2.2382)	Entropy 0.79996 (0.80164)	Top-1 acc 74.609 (70.617)	Top-5 acc 90.625 (87.991)	lr 0.00269
Train [95][1520/3239]	Time 0.228 (0.625)	Data Time 0.001 (0.027)	Loss 2.1214 (2.2378)	Entropy 0.79999 (0.80163)	Top-1 acc 74.609 (70.627)	Top-5 acc 89.844 (87.996)	lr 0.00269
Train [95][1530/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.027)	Loss 2.1240 (2.2376)	Entropy 0.79997 (0.80162)	Top-1 acc 69.141 (70.630)	Top-5 acc 90.625 (88.001)	lr 0.00269
Train [95][1540/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.026)	Loss 2.2614 (2.2375)	Entropy 0.79970 (0.80161)	Top-1 acc 72.266 (70.644)	Top-5 acc 87.109 (88.006)	lr 0.00269
Train [95][1550/3239]	Time 2.674 (0.623)	Data Time 0.001 (0.026)	Loss 2.2538 (2.2373)	Entropy 0.79970 (0.80160)	Top-1 acc 69.141 (70.644)	Top-5 acc 89.062 (88.013)	lr 0.00269
Train [95][1560/3239]	Time 0.229 (0.620)	Data Time 0.001 (0.026)	Loss 2.2361 (2.2373)	Entropy 0.79973 (0.80159)	Top-1 acc 67.188 (70.646)	Top-5 acc 87.500 (88.012)	lr 0.00269
Train [95][1570/3239]	Time 0.272 (0.619)	Data Time 0.001 (0.026)	Loss 2.3699 (2.2378)	Entropy 0.79972 (0.80157)	Top-1 acc 66.797 (70.635)	Top-5 acc 89.062 (88.002)	lr 0.00269
Train [95][1580/3239]	Time 0.264 (0.618)	Data Time 0.001 (0.026)	Loss 2.2506 (2.2380)	Entropy 0.79969 (0.80156)	Top-1 acc 72.656 (70.632)	Top-5 acc 87.109 (87.997)	lr 0.00269
Train [95][1590/3239]	Time 0.260 (0.617)	Data Time 0.001 (0.026)	Loss 2.3683 (2.2383)	Entropy 0.79967 (0.80155)	Top-1 acc 69.531 (70.625)	Top-5 acc 84.766 (87.993)	lr 0.00269
Train [95][1600/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.026)	Loss 2.3524 (2.2383)	Entropy 0.79950 (0.80154)	Top-1 acc 68.750 (70.622)	Top-5 acc 85.547 (87.993)	lr 0.00268
Train [95][1610/3239]	Time 0.235 (0.616)	Data Time 0.001 (0.025)	Loss 2.2800 (2.2384)	Entropy 0.79946 (0.80153)	Top-1 acc 68.359 (70.617)	Top-5 acc 87.500 (87.989)	lr 0.00268
Train [95][1620/3239]	Time 0.225 (0.615)	Data Time 0.001 (0.025)	Loss 2.1861 (2.2385)	Entropy 0.79948 (0.80151)	Top-1 acc 72.656 (70.619)	Top-5 acc 89.453 (87.985)	lr 0.00268
Train [95][1630/3239]	Time 0.222 (0.614)	Data Time 0.001 (0.025)	Loss 2.3514 (2.2386)	Entropy 0.79948 (0.80150)	Top-1 acc 67.969 (70.618)	Top-5 acc 85.156 (87.980)	lr 0.00268
Train [95][1640/3239]	Time 0.227 (0.613)	Data Time 0.001 (0.025)	Loss 2.3011 (2.2385)	Entropy 0.79946 (0.80149)	Top-1 acc 70.312 (70.623)	Top-5 acc 87.891 (87.983)	lr 0.00268
Train [95][1650/3239]	Time 0.260 (0.612)	Data Time 0.001 (0.025)	Loss 2.3651 (2.2389)	Entropy 0.79945 (0.80148)	Top-1 acc 70.312 (70.615)	Top-5 acc 85.938 (87.976)	lr 0.00268
Train [95][1660/3239]	Time 55.919 (0.643)	Data Time 0.001 (0.025)	Loss 2.3518 (2.2392)	Entropy 0.79945 (0.80146)	Top-1 acc 70.703 (70.611)	Top-5 acc 84.766 (87.972)	lr 0.00268
Train [95][1670/3239]	Time 0.270 (0.641)	Data Time 0.002 (0.025)	Loss 2.1746 (2.2394)	Entropy 0.79955 (0.80145)	Top-1 acc 72.266 (70.600)	Top-5 acc 88.672 (87.970)	lr 0.00268
Train [95][1680/3239]	Time 0.245 (0.640)	Data Time 0.002 (0.024)	Loss 2.3147 (2.2394)	Entropy 0.79960 (0.80144)	Top-1 acc 69.141 (70.602)	Top-5 acc 85.938 (87.969)	lr 0.00268
Train [95][1690/3239]	Time 0.249 (0.639)	Data Time 0.002 (0.024)	Loss 2.3159 (2.2396)	Entropy 0.79955 (0.80143)	Top-1 acc 69.922 (70.599)	Top-5 acc 87.500 (87.962)	lr 0.00268
Train [95][1700/3239]	Time 0.231 (0.638)	Data Time 0.005 (0.024)	Loss 2.1464 (2.2399)	Entropy 0.79957 (0.80142)	Top-1 acc 74.219 (70.596)	Top-5 acc 89.844 (87.958)	lr 0.00268
Train [95][1710/3239]	Time 0.242 (0.637)	Data Time 0.002 (0.024)	Loss 2.0991 (2.2398)	Entropy 0.79950 (0.80141)	Top-1 acc 75.000 (70.596)	Top-5 acc 89.844 (87.960)	lr 0.00268
Train [95][1720/3239]	Time 0.307 (0.636)	Data Time 0.001 (0.024)	Loss 2.0919 (2.2397)	Entropy 0.79946 (0.80140)	Top-1 acc 75.391 (70.603)	Top-5 acc 90.234 (87.961)	lr 0.00268
Train [95][1730/3239]	Time 0.220 (0.635)	Data Time 0.001 (0.024)	Loss 2.2261 (2.2396)	Entropy 0.79938 (0.80139)	Top-1 acc 70.312 (70.604)	Top-5 acc 88.672 (87.962)	lr 0.00268
Train [95][1740/3239]	Time 0.219 (0.634)	Data Time 0.001 (0.024)	Loss 2.1958 (2.2397)	Entropy 0.79938 (0.80137)	Top-1 acc 75.391 (70.597)	Top-5 acc 88.281 (87.963)	lr 0.00268
Train [95][1750/3239]	Time 0.223 (0.634)	Data Time 0.001 (0.024)	Loss 2.1587 (2.2401)	Entropy 0.79936 (0.80136)	Top-1 acc 73.438 (70.592)	Top-5 acc 90.625 (87.954)	lr 0.00268
Train [95][1760/3239]	Time 0.221 (0.633)	Data Time 0.001 (0.023)	Loss 2.2198 (2.2402)	Entropy 0.79938 (0.80135)	Top-1 acc 70.703 (70.585)	Top-5 acc 89.453 (87.953)	lr 0.00267
Train [95][1770/3239]	Time 2.708 (0.632)	Data Time 0.002 (0.023)	Loss 2.3333 (2.2403)	Entropy 0.79938 (0.80134)	Top-1 acc 66.797 (70.579)	Top-5 acc 85.547 (87.949)	lr 0.00267
Train [95][1780/3239]	Time 0.257 (0.630)	Data Time 0.002 (0.023)	Loss 2.2625 (2.2402)	Entropy 0.79936 (0.80133)	Top-1 acc 69.531 (70.584)	Top-5 acc 87.500 (87.953)	lr 0.00267
Train [95][1790/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.023)	Loss 2.1574 (2.2402)	Entropy 0.79937 (0.80132)	Top-1 acc 71.484 (70.583)	Top-5 acc 89.453 (87.954)	lr 0.00267
Train [95][1800/3239]	Time 0.237 (0.628)	Data Time 0.001 (0.023)	Loss 2.0477 (2.2404)	Entropy 0.79929 (0.80131)	Top-1 acc 75.391 (70.574)	Top-5 acc 90.234 (87.949)	lr 0.00267
Train [95][1810/3239]	Time 0.324 (0.628)	Data Time 0.001 (0.023)	Loss 2.2394 (2.2405)	Entropy 0.79926 (0.80130)	Top-1 acc 72.266 (70.573)	Top-5 acc 86.328 (87.945)	lr 0.00267
Train [95][1820/3239]	Time 0.223 (0.627)	Data Time 0.002 (0.023)	Loss 2.2551 (2.2403)	Entropy 0.79923 (0.80128)	Top-1 acc 69.141 (70.577)	Top-5 acc 87.109 (87.951)	lr 0.00267
Train [95][1830/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.023)	Loss 2.1029 (2.2403)	Entropy 0.79914 (0.80127)	Top-1 acc 75.000 (70.574)	Top-5 acc 89.844 (87.952)	lr 0.00267
Train [95][1840/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.022)	Loss 2.1816 (2.2405)	Entropy 0.79915 (0.80126)	Top-1 acc 71.094 (70.576)	Top-5 acc 89.062 (87.949)	lr 0.00267
Train [95][1850/3239]	Time 0.278 (0.624)	Data Time 0.001 (0.022)	Loss 2.2086 (2.2405)	Entropy 0.79915 (0.80125)	Top-1 acc 68.750 (70.572)	Top-5 acc 90.625 (87.950)	lr 0.00267
Train [95][1860/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.022)	Loss 2.1578 (2.2406)	Entropy 0.79909 (0.80124)	Top-1 acc 73.047 (70.571)	Top-5 acc 89.844 (87.948)	lr 0.00267
Train [95][1870/3239]	Time 0.246 (0.623)	Data Time 0.001 (0.022)	Loss 2.1609 (2.2407)	Entropy 0.79902 (0.80123)	Top-1 acc 71.484 (70.568)	Top-5 acc 91.016 (87.948)	lr 0.00267
Train [95][1880/3239]	Time 2.929 (0.622)	Data Time 0.002 (0.022)	Loss 2.0608 (2.2404)	Entropy 0.79902 (0.80122)	Top-1 acc 74.609 (70.572)	Top-5 acc 91.406 (87.953)	lr 0.00267
Train [95][1890/3239]	Time 0.260 (0.620)	Data Time 0.002 (0.022)	Loss 2.3100 (2.2414)	Entropy 0.79901 (0.80120)	Top-1 acc 68.359 (70.546)	Top-5 acc 86.328 (87.939)	lr 0.00267
Train [95][1900/3239]	Time 0.234 (0.620)	Data Time 0.001 (0.022)	Loss 2.3259 (2.2414)	Entropy 0.79894 (0.80119)	Top-1 acc 67.969 (70.547)	Top-5 acc 86.719 (87.941)	lr 0.00267
Train [95][1910/3239]	Time 0.240 (0.619)	Data Time 0.001 (0.022)	Loss 2.2986 (2.2415)	Entropy 0.79888 (0.80118)	Top-1 acc 70.703 (70.543)	Top-5 acc 87.109 (87.939)	lr 0.00267
Train [95][1920/3239]	Time 0.228 (0.618)	Data Time 0.001 (0.022)	Loss 2.0926 (2.2413)	Entropy 0.79885 (0.80117)	Top-1 acc 75.781 (70.541)	Top-5 acc 88.672 (87.943)	lr 0.00266
Train [95][1930/3239]	Time 0.229 (0.618)	Data Time 0.001 (0.021)	Loss 2.2950 (2.2414)	Entropy 0.79883 (0.80116)	Top-1 acc 69.531 (70.538)	Top-5 acc 86.328 (87.940)	lr 0.00266
Train [95][1940/3239]	Time 0.326 (0.617)	Data Time 0.001 (0.021)	Loss 2.2148 (2.2413)	Entropy 0.79880 (0.80114)	Top-1 acc 71.094 (70.535)	Top-5 acc 88.281 (87.943)	lr 0.00266
Train [95][1950/3239]	Time 0.232 (0.616)	Data Time 0.001 (0.021)	Loss 2.3239 (2.2413)	Entropy 0.79875 (0.80113)	Top-1 acc 69.531 (70.540)	Top-5 acc 87.109 (87.940)	lr 0.00266
Train [95][1960/3239]	Time 0.231 (0.616)	Data Time 0.001 (0.021)	Loss 2.1302 (2.2412)	Entropy 0.79872 (0.80112)	Top-1 acc 73.047 (70.540)	Top-5 acc 89.844 (87.943)	lr 0.00266
Train [95][1970/3239]	Time 0.226 (0.615)	Data Time 0.001 (0.021)	Loss 2.3098 (2.2415)	Entropy 0.79867 (0.80111)	Top-1 acc 68.750 (70.539)	Top-5 acc 87.891 (87.940)	lr 0.00266
Train [95][1980/3239]	Time 0.253 (0.614)	Data Time 0.001 (0.021)	Loss 2.3246 (2.2413)	Entropy 0.79863 (0.80109)	Top-1 acc 66.797 (70.542)	Top-5 acc 87.500 (87.943)	lr 0.00266
Train [95][1990/3239]	Time 2.578 (0.614)	Data Time 0.002 (0.021)	Loss 2.0733 (2.2412)	Entropy 0.79863 (0.80108)	Top-1 acc 73.828 (70.548)	Top-5 acc 90.625 (87.943)	lr 0.00266
Train [95][2000/3239]	Time 0.230 (0.612)	Data Time 0.001 (0.021)	Loss 2.1915 (2.2409)	Entropy 0.79866 (0.80107)	Top-1 acc 74.609 (70.555)	Top-5 acc 87.500 (87.946)	lr 0.00266
Train [95][2010/3239]	Time 0.237 (0.611)	Data Time 0.001 (0.021)	Loss 2.3110 (2.2410)	Entropy 0.79865 (0.80106)	Top-1 acc 69.141 (70.554)	Top-5 acc 87.500 (87.943)	lr 0.00266
Train [95][2020/3239]	Time 0.246 (0.611)	Data Time 0.001 (0.021)	Loss 2.2549 (2.2413)	Entropy 0.79864 (0.80105)	Top-1 acc 68.359 (70.544)	Top-5 acc 87.109 (87.941)	lr 0.00266
Train [95][2030/3239]	Time 0.486 (0.635)	Data Time 0.002 (0.020)	Loss 2.1642 (2.2415)	Entropy 0.79849 (0.80103)	Top-1 acc 71.094 (70.538)	Top-5 acc 90.234 (87.936)	lr 0.00266
Train [95][2040/3239]	Time 0.240 (0.634)	Data Time 0.002 (0.020)	Loss 2.2349 (2.2414)	Entropy 0.79848 (0.80102)	Top-1 acc 68.750 (70.540)	Top-5 acc 87.109 (87.935)	lr 0.00266
Train [95][2050/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.020)	Loss 2.2358 (2.2415)	Entropy 0.79837 (0.80101)	Top-1 acc 73.047 (70.541)	Top-5 acc 87.109 (87.935)	lr 0.00266
Train [95][2060/3239]	Time 0.236 (0.633)	Data Time 0.001 (0.020)	Loss 2.2852 (2.2413)	Entropy 0.79837 (0.80100)	Top-1 acc 69.922 (70.545)	Top-5 acc 86.328 (87.934)	lr 0.00266
Train [95][2070/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.020)	Loss 2.2540 (2.2412)	Entropy 0.79835 (0.80098)	Top-1 acc 66.797 (70.547)	Top-5 acc 89.453 (87.936)	lr 0.00266
Train [95][2080/3239]	Time 0.272 (0.632)	Data Time 0.001 (0.020)	Loss 2.3047 (2.2412)	Entropy 0.79831 (0.80097)	Top-1 acc 66.406 (70.546)	Top-5 acc 86.328 (87.935)	lr 0.00265
Train [95][2090/3239]	Time 0.262 (0.631)	Data Time 0.001 (0.020)	Loss 2.2034 (2.2412)	Entropy 0.79827 (0.80096)	Top-1 acc 72.266 (70.545)	Top-5 acc 88.672 (87.936)	lr 0.00265
Train [95][2100/3239]	Time 2.489 (0.630)	Data Time 0.002 (0.020)	Loss 2.2689 (2.2414)	Entropy 0.79827 (0.80094)	Top-1 acc 71.875 (70.543)	Top-5 acc 87.891 (87.930)	lr 0.00265
Train [95][2110/3239]	Time 0.322 (0.628)	Data Time 0.002 (0.020)	Loss 2.3108 (2.2415)	Entropy 0.79828 (0.80093)	Top-1 acc 67.578 (70.541)	Top-5 acc 87.109 (87.929)	lr 0.00265
Train [95][2120/3239]	Time 0.242 (0.628)	Data Time 0.002 (0.020)	Loss 2.3062 (2.2413)	Entropy 0.79824 (0.80092)	Top-1 acc 69.141 (70.550)	Top-5 acc 87.109 (87.932)	lr 0.00265
Train [95][2130/3239]	Time 0.258 (0.627)	Data Time 0.001 (0.020)	Loss 2.1940 (2.2411)	Entropy 0.79823 (0.80091)	Top-1 acc 72.266 (70.553)	Top-5 acc 89.062 (87.935)	lr 0.00265
Train [95][2140/3239]	Time 0.245 (0.626)	Data Time 0.001 (0.020)	Loss 2.2588 (2.2411)	Entropy 0.79818 (0.80089)	Top-1 acc 70.312 (70.557)	Top-5 acc 87.891 (87.935)	lr 0.00265
Train [95][2150/3239]	Time 0.378 (0.626)	Data Time 0.001 (0.019)	Loss 2.1726 (2.2407)	Entropy 0.79814 (0.80088)	Top-1 acc 71.484 (70.561)	Top-5 acc 91.406 (87.940)	lr 0.00265
Train [95][2160/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.019)	Loss 2.3022 (2.2411)	Entropy 0.79811 (0.80087)	Top-1 acc 69.531 (70.551)	Top-5 acc 88.281 (87.934)	lr 0.00265
Train [95][2170/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.019)	Loss 2.2168 (2.2411)	Entropy 0.79810 (0.80086)	Top-1 acc 69.922 (70.550)	Top-5 acc 89.844 (87.935)	lr 0.00265
Train [95][2180/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.019)	Loss 2.2472 (2.2411)	Entropy 0.79805 (0.80084)	Top-1 acc 70.312 (70.551)	Top-5 acc 87.891 (87.934)	lr 0.00265
Train [95][2190/3239]	Time 0.344 (0.623)	Data Time 0.001 (0.019)	Loss 2.3352 (2.2411)	Entropy 0.79787 (0.80083)	Top-1 acc 67.969 (70.551)	Top-5 acc 85.938 (87.935)	lr 0.00265
Train [95][2200/3239]	Time 0.203 (0.623)	Data Time 0.001 (0.019)	Loss 2.3339 (2.2411)	Entropy 0.79784 (0.80082)	Top-1 acc 69.531 (70.553)	Top-5 acc 85.938 (87.934)	lr 0.00265
Train [95][2210/3239]	Time 2.647 (0.622)	Data Time 0.001 (0.019)	Loss 2.3212 (2.2412)	Entropy 0.79784 (0.80080)	Top-1 acc 67.969 (70.552)	Top-5 acc 85.938 (87.932)	lr 0.00265
Train [95][2220/3239]	Time 0.220 (0.621)	Data Time 0.001 (0.019)	Loss 2.2332 (2.2415)	Entropy 0.79791 (0.80079)	Top-1 acc 70.312 (70.545)	Top-5 acc 89.844 (87.929)	lr 0.00265
Train [95][2230/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.019)	Loss 2.1771 (2.2415)	Entropy 0.79781 (0.80078)	Top-1 acc 73.828 (70.546)	Top-5 acc 89.062 (87.927)	lr 0.00265
Train [95][2240/3239]	Time 0.229 (0.619)	Data Time 0.001 (0.019)	Loss 2.2875 (2.2415)	Entropy 0.79782 (0.80076)	Top-1 acc 68.359 (70.544)	Top-5 acc 85.547 (87.928)	lr 0.00264
Train [95][2250/3239]	Time 0.241 (0.619)	Data Time 0.001 (0.019)	Loss 2.2239 (2.2415)	Entropy 0.79770 (0.80075)	Top-1 acc 69.531 (70.543)	Top-5 acc 89.062 (87.926)	lr 0.00264
Train [95][2260/3239]	Time 0.249 (0.618)	Data Time 0.001 (0.019)	Loss 2.3100 (2.2415)	Entropy 0.79758 (0.80074)	Top-1 acc 68.359 (70.542)	Top-5 acc 87.109 (87.924)	lr 0.00264
Train [95][2270/3239]	Time 0.230 (0.618)	Data Time 0.001 (0.018)	Loss 2.1021 (2.2413)	Entropy 0.79761 (0.80072)	Top-1 acc 71.094 (70.543)	Top-5 acc 90.625 (87.928)	lr 0.00264
Train [95][2280/3239]	Time 0.340 (0.617)	Data Time 0.001 (0.018)	Loss 2.2457 (2.2415)	Entropy 0.79760 (0.80071)	Top-1 acc 69.922 (70.537)	Top-5 acc 87.109 (87.925)	lr 0.00264
Train [95][2290/3239]	Time 0.240 (0.616)	Data Time 0.001 (0.018)	Loss 2.1236 (2.2414)	Entropy 0.79753 (0.80070)	Top-1 acc 74.219 (70.539)	Top-5 acc 89.453 (87.926)	lr 0.00264
Train [95][2300/3239]	Time 0.238 (0.616)	Data Time 0.001 (0.018)	Loss 2.3396 (2.2414)	Entropy 0.79754 (0.80068)	Top-1 acc 65.234 (70.534)	Top-5 acc 83.984 (87.925)	lr 0.00264
Train [95][2310/3239]	Time 0.233 (0.615)	Data Time 0.001 (0.018)	Loss 2.2550 (2.2414)	Entropy 0.79745 (0.80067)	Top-1 acc 68.750 (70.529)	Top-5 acc 87.500 (87.927)	lr 0.00264
Train [95][2320/3239]	Time 2.693 (0.615)	Data Time 0.001 (0.018)	Loss 2.2440 (2.2415)	Entropy 0.79745 (0.80065)	Top-1 acc 69.922 (70.525)	Top-5 acc 90.625 (87.927)	lr 0.00264
Train [95][2330/3239]	Time 0.285 (0.613)	Data Time 0.001 (0.018)	Loss 2.0914 (2.2415)	Entropy 0.79746 (0.80064)	Top-1 acc 74.219 (70.522)	Top-5 acc 91.406 (87.926)	lr 0.00264
Train [95][2340/3239]	Time 0.242 (0.613)	Data Time 0.002 (0.018)	Loss 2.2556 (2.2417)	Entropy 0.79749 (0.80063)	Top-1 acc 72.266 (70.514)	Top-5 acc 85.547 (87.924)	lr 0.00264
Train [95][2350/3239]	Time 0.240 (0.612)	Data Time 0.001 (0.018)	Loss 2.3327 (2.2418)	Entropy 0.79750 (0.80061)	Top-1 acc 69.141 (70.515)	Top-5 acc 85.938 (87.922)	lr 0.00264
Train [95][2360/3239]	Time 0.224 (0.612)	Data Time 0.001 (0.018)	Loss 2.3924 (2.2418)	Entropy 0.79742 (0.80060)	Top-1 acc 66.797 (70.512)	Top-5 acc 86.719 (87.919)	lr 0.00264
Train [95][2370/3239]	Time 0.231 (0.611)	Data Time 0.002 (0.018)	Loss 2.1852 (2.2420)	Entropy 0.79741 (0.80059)	Top-1 acc 71.094 (70.504)	Top-5 acc 90.625 (87.919)	lr 0.00264
Train [95][2380/3239]	Time 0.236 (0.611)	Data Time 0.001 (0.018)	Loss 2.2234 (2.2419)	Entropy 0.79740 (0.80057)	Top-1 acc 67.578 (70.500)	Top-5 acc 89.453 (87.919)	lr 0.00264
Train [95][2390/3239]	Time 0.290 (0.632)	Data Time 0.003 (0.018)	Loss 2.1805 (2.2419)	Entropy 0.79738 (0.80056)	Top-1 acc 73.047 (70.505)	Top-5 acc 87.891 (87.919)	lr 0.00264
Train [95][2400/3239]	Time 0.208 (0.631)	Data Time 0.002 (0.018)	Loss 2.3231 (2.2419)	Entropy 0.79735 (0.80055)	Top-1 acc 64.453 (70.501)	Top-5 acc 89.062 (87.918)	lr 0.00263
Train [95][2410/3239]	Time 0.220 (0.631)	Data Time 0.001 (0.018)	Loss 2.1563 (2.2420)	Entropy 0.79733 (0.80053)	Top-1 acc 75.781 (70.504)	Top-5 acc 88.672 (87.915)	lr 0.00263
Train [95][2420/3239]	Time 0.224 (0.630)	Data Time 0.001 (0.017)	Loss 2.0885 (2.2419)	Entropy 0.79731 (0.80052)	Top-1 acc 73.047 (70.501)	Top-5 acc 90.234 (87.917)	lr 0.00263
Train [95][2430/3239]	Time 2.531 (0.629)	Data Time 0.002 (0.017)	Loss 2.2148 (2.2418)	Entropy 0.79731 (0.80051)	Top-1 acc 71.094 (70.500)	Top-5 acc 85.938 (87.921)	lr 0.00263
Train [95][2440/3239]	Time 0.249 (0.628)	Data Time 0.001 (0.017)	Loss 2.2721 (2.2420)	Entropy 0.79727 (0.80049)	Top-1 acc 68.750 (70.496)	Top-5 acc 86.328 (87.918)	lr 0.00263
Train [95][2450/3239]	Time 0.357 (0.627)	Data Time 0.002 (0.017)	Loss 2.1572 (2.2419)	Entropy 0.79715 (0.80048)	Top-1 acc 73.438 (70.494)	Top-5 acc 89.844 (87.921)	lr 0.00263
Train [95][2460/3239]	Time 0.280 (0.627)	Data Time 0.002 (0.017)	Loss 2.3321 (2.2418)	Entropy 0.79716 (0.80047)	Top-1 acc 68.359 (70.500)	Top-5 acc 85.547 (87.922)	lr 0.00263
Train [95][2470/3239]	Time 0.241 (0.626)	Data Time 0.001 (0.017)	Loss 2.2528 (2.2417)	Entropy 0.79724 (0.80045)	Top-1 acc 70.312 (70.502)	Top-5 acc 87.891 (87.923)	lr 0.00263
Train [95][2480/3239]	Time 0.235 (0.626)	Data Time 0.001 (0.017)	Loss 2.3763 (2.2416)	Entropy 0.79723 (0.80044)	Top-1 acc 64.844 (70.504)	Top-5 acc 85.938 (87.925)	lr 0.00263
Train [95][2490/3239]	Time 0.335 (0.625)	Data Time 0.001 (0.017)	Loss 2.2740 (2.2418)	Entropy 0.79717 (0.80043)	Top-1 acc 71.094 (70.502)	Top-5 acc 87.891 (87.922)	lr 0.00263
Train [95][2500/3239]	Time 0.243 (0.625)	Data Time 0.002 (0.017)	Loss 2.3401 (2.2417)	Entropy 0.79715 (0.80041)	Top-1 acc 68.750 (70.505)	Top-5 acc 84.375 (87.923)	lr 0.00263
Train [95][2510/3239]	Time 0.238 (0.624)	Data Time 0.001 (0.017)	Loss 2.2809 (2.2418)	Entropy 0.79709 (0.80040)	Top-1 acc 69.922 (70.506)	Top-5 acc 87.109 (87.921)	lr 0.00263
Train [95][2520/3239]	Time 0.215 (0.623)	Data Time 0.001 (0.017)	Loss 2.1686 (2.2419)	Entropy 0.79705 (0.80039)	Top-1 acc 73.047 (70.505)	Top-5 acc 90.625 (87.920)	lr 0.00263
Train [95][2530/3239]	Time 0.335 (0.623)	Data Time 0.001 (0.017)	Loss 2.1737 (2.2418)	Entropy 0.79688 (0.80038)	Top-1 acc 71.875 (70.507)	Top-5 acc 89.844 (87.920)	lr 0.00263
Train [95][2540/3239]	Time 2.535 (0.622)	Data Time 0.001 (0.017)	Loss 2.2195 (2.2419)	Entropy 0.79688 (0.80036)	Top-1 acc 73.438 (70.505)	Top-5 acc 87.891 (87.916)	lr 0.00263
Train [95][2550/3239]	Time 0.256 (0.621)	Data Time 0.001 (0.017)	Loss 2.2547 (2.2420)	Entropy 0.79764 (0.80035)	Top-1 acc 69.531 (70.502)	Top-5 acc 89.062 (87.914)	lr 0.00263
Train [95][2560/3239]	Time 0.233 (0.620)	Data Time 0.002 (0.017)	Loss 2.3377 (2.2421)	Entropy 0.79760 (0.80034)	Top-1 acc 68.359 (70.502)	Top-5 acc 85.938 (87.910)	lr 0.00263
Train [95][2570/3239]	Time 0.241 (0.620)	Data Time 0.001 (0.017)	Loss 2.1801 (2.2422)	Entropy 0.79754 (0.80033)	Top-1 acc 70.703 (70.499)	Top-5 acc 91.016 (87.909)	lr 0.00262
Train [95][2580/3239]	Time 0.265 (0.619)	Data Time 0.001 (0.016)	Loss 2.1733 (2.2422)	Entropy 0.79755 (0.80032)	Top-1 acc 72.266 (70.498)	Top-5 acc 89.062 (87.908)	lr 0.00262
Train [95][2590/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.016)	Loss 2.2117 (2.2423)	Entropy 0.79749 (0.80031)	Top-1 acc 72.656 (70.493)	Top-5 acc 90.625 (87.912)	lr 0.00262
Train [95][2600/3239]	Time 0.220 (0.618)	Data Time 0.001 (0.016)	Loss 2.2792 (2.2423)	Entropy 0.79749 (0.80030)	Top-1 acc 67.188 (70.492)	Top-5 acc 87.500 (87.910)	lr 0.00262
Train [95][2610/3239]	Time 0.267 (0.618)	Data Time 0.001 (0.016)	Loss 2.1495 (2.2424)	Entropy 0.79739 (0.80029)	Top-1 acc 72.266 (70.489)	Top-5 acc 88.672 (87.909)	lr 0.00262
Train [95][2620/3239]	Time 0.312 (0.617)	Data Time 0.001 (0.016)	Loss 2.2832 (2.2424)	Entropy 0.79772 (0.80028)	Top-1 acc 71.094 (70.490)	Top-5 acc 87.891 (87.910)	lr 0.00262
Train [95][2630/3239]	Time 0.233 (0.617)	Data Time 0.001 (0.016)	Loss 2.1660 (2.2425)	Entropy 0.79770 (0.80027)	Top-1 acc 70.312 (70.489)	Top-5 acc 90.625 (87.908)	lr 0.00262
Train [95][2640/3239]	Time 0.292 (0.616)	Data Time 0.001 (0.016)	Loss 2.3113 (2.2423)	Entropy 0.79767 (0.80026)	Top-1 acc 71.094 (70.498)	Top-5 acc 87.109 (87.913)	lr 0.00262
Train [95][2650/3239]	Time 0.310 (0.616)	Data Time 0.001 (0.016)	Loss 2.2898 (2.2424)	Entropy 0.79761 (0.80025)	Top-1 acc 71.094 (70.497)	Top-5 acc 88.672 (87.914)	lr 0.00262
Train [95][2660/3239]	Time 0.212 (0.615)	Data Time 0.001 (0.016)	Loss 2.1896 (2.2424)	Entropy 0.79753 (0.80024)	Top-1 acc 74.219 (70.499)	Top-5 acc 89.453 (87.913)	lr 0.00262
Train [95][2670/3239]	Time 0.237 (0.615)	Data Time 0.001 (0.016)	Loss 2.0818 (2.2425)	Entropy 0.79750 (0.80023)	Top-1 acc 73.047 (70.496)	Top-5 acc 89.844 (87.911)	lr 0.00262
Train [95][2680/3239]	Time 0.216 (0.614)	Data Time 0.001 (0.016)	Loss 2.1178 (2.2424)	Entropy 0.79749 (0.80022)	Top-1 acc 75.781 (70.498)	Top-5 acc 90.234 (87.913)	lr 0.00262
Train [95][2690/3239]	Time 0.247 (0.614)	Data Time 0.001 (0.016)	Loss 2.3999 (2.2424)	Entropy 0.79747 (0.80021)	Top-1 acc 69.531 (70.499)	Top-5 acc 84.766 (87.912)	lr 0.00262
Train [95][2700/3239]	Time 0.232 (0.613)	Data Time 0.001 (0.016)	Loss 2.1933 (2.2424)	Entropy 0.79744 (0.80020)	Top-1 acc 70.703 (70.497)	Top-5 acc 89.453 (87.915)	lr 0.00262
Train [95][2710/3239]	Time 0.271 (0.613)	Data Time 0.001 (0.016)	Loss 2.3044 (2.2425)	Entropy 0.79736 (0.80019)	Top-1 acc 67.188 (70.495)	Top-5 acc 86.719 (87.912)	lr 0.00262
Train [95][2720/3239]	Time 0.236 (0.612)	Data Time 0.001 (0.016)	Loss 2.2926 (2.2426)	Entropy 0.79735 (0.80017)	Top-1 acc 68.359 (70.499)	Top-5 acc 86.328 (87.910)	lr 0.00262
Train [95][2730/3239]	Time 0.227 (0.612)	Data Time 0.001 (0.016)	Loss 2.2601 (2.2426)	Entropy 0.79719 (0.80016)	Top-1 acc 68.750 (70.495)	Top-5 acc 87.500 (87.906)	lr 0.00261
Train [95][2740/3239]	Time 0.282 (0.629)	Data Time 0.004 (0.016)	Loss 2.2786 (2.2427)	Entropy 0.79716 (0.80015)	Top-1 acc 71.484 (70.499)	Top-5 acc 85.156 (87.906)	lr 0.00261
Train [95][2750/3239]	Time 0.236 (0.629)	Data Time 0.002 (0.016)	Loss 2.2171 (2.2428)	Entropy 0.79721 (0.80014)	Top-1 acc 71.094 (70.497)	Top-5 acc 89.453 (87.906)	lr 0.00261
Train [95][2760/3239]	Time 0.241 (0.628)	Data Time 0.002 (0.015)	Loss 2.1794 (2.2428)	Entropy 0.79720 (0.80013)	Top-1 acc 71.875 (70.495)	Top-5 acc 89.062 (87.904)	lr 0.00261
Train [95][2770/3239]	Time 0.223 (0.628)	Data Time 0.001 (0.015)	Loss 2.2246 (2.2430)	Entropy 0.79719 (0.80012)	Top-1 acc 71.875 (70.491)	Top-5 acc 87.500 (87.899)	lr 0.00261
Train [95][2780/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.015)	Loss 2.2351 (2.2429)	Entropy 0.79727 (0.80011)	Top-1 acc 71.875 (70.492)	Top-5 acc 88.281 (87.900)	lr 0.00261
Train [95][2790/3239]	Time 0.237 (0.627)	Data Time 0.002 (0.015)	Loss 2.1408 (2.2429)	Entropy 0.79722 (0.80010)	Top-1 acc 70.703 (70.493)	Top-5 acc 90.234 (87.900)	lr 0.00261
Train [95][2800/3239]	Time 0.283 (0.626)	Data Time 0.001 (0.015)	Loss 2.3502 (2.2430)	Entropy 0.79719 (0.80009)	Top-1 acc 72.656 (70.491)	Top-5 acc 84.766 (87.897)	lr 0.00261
Train [95][2810/3239]	Time 0.264 (0.626)	Data Time 0.001 (0.015)	Loss 2.2760 (2.2429)	Entropy 0.79709 (0.80008)	Top-1 acc 68.359 (70.494)	Top-5 acc 89.844 (87.902)	lr 0.00261
Train [95][2820/3239]	Time 0.240 (0.625)	Data Time 0.001 (0.015)	Loss 2.4057 (2.2429)	Entropy 0.79715 (0.80007)	Top-1 acc 68.750 (70.497)	Top-5 acc 83.203 (87.900)	lr 0.00261
Train [95][2830/3239]	Time 0.360 (0.625)	Data Time 0.001 (0.015)	Loss 2.2046 (2.2430)	Entropy 0.79716 (0.80006)	Top-1 acc 75.391 (70.493)	Top-5 acc 89.062 (87.899)	lr 0.00261
Train [95][2840/3239]	Time 0.262 (0.624)	Data Time 0.001 (0.015)	Loss 2.2810 (2.2429)	Entropy 0.79709 (0.80005)	Top-1 acc 70.703 (70.497)	Top-5 acc 86.719 (87.900)	lr 0.00261
Train [95][2850/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.015)	Loss 2.1945 (2.2429)	Entropy 0.79693 (0.80004)	Top-1 acc 71.875 (70.494)	Top-5 acc 88.672 (87.900)	lr 0.00261
Train [95][2860/3239]	Time 0.233 (0.623)	Data Time 0.001 (0.015)	Loss 2.2351 (2.2429)	Entropy 0.79694 (0.80003)	Top-1 acc 69.141 (70.492)	Top-5 acc 85.938 (87.900)	lr 0.00261
Train [95][2870/3239]	Time 0.337 (0.623)	Data Time 0.001 (0.015)	Loss 2.2777 (2.2431)	Entropy 0.79695 (0.80002)	Top-1 acc 67.578 (70.489)	Top-5 acc 87.891 (87.896)	lr 0.00261
Train [95][2880/3239]	Time 0.243 (0.622)	Data Time 0.001 (0.015)	Loss 2.1556 (2.2430)	Entropy 0.79691 (0.80001)	Top-1 acc 72.266 (70.489)	Top-5 acc 87.891 (87.899)	lr 0.00261
Train [95][2890/3239]	Time 0.243 (0.622)	Data Time 0.001 (0.015)	Loss 2.1976 (2.2431)	Entropy 0.79689 (0.80000)	Top-1 acc 74.609 (70.488)	Top-5 acc 89.453 (87.899)	lr 0.00260
Train [95][2900/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.015)	Loss 2.3290 (2.2430)	Entropy 0.79682 (0.79998)	Top-1 acc 67.188 (70.488)	Top-5 acc 85.938 (87.898)	lr 0.00260
Train [95][2910/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.015)	Loss 2.2102 (2.2431)	Entropy 0.79681 (0.79997)	Top-1 acc 68.750 (70.484)	Top-5 acc 87.891 (87.897)	lr 0.00260
Train [95][2920/3239]	Time 0.244 (0.621)	Data Time 0.001 (0.015)	Loss 2.2736 (2.2430)	Entropy 0.79745 (0.79996)	Top-1 acc 68.750 (70.486)	Top-5 acc 89.062 (87.896)	lr 0.00260
Train [95][2930/3239]	Time 0.253 (0.620)	Data Time 0.001 (0.015)	Loss 2.3216 (2.2431)	Entropy 0.79734 (0.79995)	Top-1 acc 67.969 (70.487)	Top-5 acc 87.500 (87.895)	lr 0.00260
Train [95][2940/3239]	Time 0.259 (0.620)	Data Time 0.001 (0.015)	Loss 2.1784 (2.2432)	Entropy 0.79731 (0.79995)	Top-1 acc 73.047 (70.485)	Top-5 acc 89.453 (87.894)	lr 0.00260
Train [95][2950/3239]	Time 0.235 (0.619)	Data Time 0.001 (0.015)	Loss 2.3170 (2.2432)	Entropy 0.79728 (0.79994)	Top-1 acc 67.578 (70.485)	Top-5 acc 86.719 (87.893)	lr 0.00260
Train [95][2960/3239]	Time 0.260 (0.619)	Data Time 0.001 (0.015)	Loss 2.3208 (2.2433)	Entropy 0.79724 (0.79993)	Top-1 acc 69.531 (70.480)	Top-5 acc 87.500 (87.891)	lr 0.00260
Train [95][2970/3239]	Time 0.242 (0.618)	Data Time 0.001 (0.015)	Loss 2.1322 (2.2436)	Entropy 0.79721 (0.79992)	Top-1 acc 73.828 (70.471)	Top-5 acc 89.453 (87.884)	lr 0.00260
Train [95][2980/3239]	Time 0.247 (0.618)	Data Time 0.001 (0.014)	Loss 2.2554 (2.2435)	Entropy 0.79723 (0.79991)	Top-1 acc 68.750 (70.475)	Top-5 acc 87.891 (87.885)	lr 0.00260
Train [95][2990/3239]	Time 0.221 (0.618)	Data Time 0.001 (0.014)	Loss 2.4323 (2.2437)	Entropy 0.79726 (0.79990)	Top-1 acc 66.016 (70.471)	Top-5 acc 87.109 (87.885)	lr 0.00260
Train [95][3000/3239]	Time 0.233 (0.617)	Data Time 0.001 (0.014)	Loss 2.2348 (2.2437)	Entropy 0.79722 (0.79989)	Top-1 acc 71.094 (70.473)	Top-5 acc 86.328 (87.883)	lr 0.00260
Train [95][3010/3239]	Time 0.248 (0.617)	Data Time 0.001 (0.014)	Loss 2.2129 (2.2437)	Entropy 0.79719 (0.79988)	Top-1 acc 72.656 (70.471)	Top-5 acc 88.281 (87.884)	lr 0.00260
Train [95][3020/3239]	Time 0.238 (0.616)	Data Time 0.001 (0.014)	Loss 2.2621 (2.2437)	Entropy 0.79722 (0.79987)	Top-1 acc 67.188 (70.471)	Top-5 acc 87.109 (87.882)	lr 0.00260
Train [95][3030/3239]	Time 0.261 (0.616)	Data Time 0.002 (0.014)	Loss 2.2475 (2.2437)	Entropy 0.79720 (0.79987)	Top-1 acc 69.141 (70.470)	Top-5 acc 86.719 (87.881)	lr 0.00260
Train [95][3040/3239]	Time 0.215 (0.615)	Data Time 0.001 (0.014)	Loss 2.1758 (2.2437)	Entropy 0.79723 (0.79986)	Top-1 acc 72.266 (70.473)	Top-5 acc 88.672 (87.882)	lr 0.00260
Train [95][3050/3239]	Time 0.256 (0.615)	Data Time 0.001 (0.014)	Loss 2.1860 (2.2439)	Entropy 0.79719 (0.79985)	Top-1 acc 67.578 (70.467)	Top-5 acc 90.625 (87.881)	lr 0.00259
Train [95][3060/3239]	Time 0.266 (0.614)	Data Time 0.001 (0.014)	Loss 2.2259 (2.2439)	Entropy 0.79718 (0.79984)	Top-1 acc 73.438 (70.468)	Top-5 acc 86.719 (87.882)	lr 0.00259
Train [95][3070/3239]	Time 0.301 (0.630)	Data Time 0.005 (0.014)	Loss 2.2486 (2.2439)	Entropy 0.79718 (0.79983)	Top-1 acc 72.266 (70.463)	Top-5 acc 86.328 (87.881)	lr 0.00259
Train [95][3080/3239]	Time 0.330 (0.630)	Data Time 0.002 (0.014)	Loss 2.1509 (2.2440)	Entropy 0.79720 (0.79982)	Top-1 acc 69.141 (70.461)	Top-5 acc 89.453 (87.879)	lr 0.00259
Train [95][3090/3239]	Time 0.216 (0.629)	Data Time 0.002 (0.014)	Loss 2.1336 (2.2439)	Entropy 0.79706 (0.79981)	Top-1 acc 75.000 (70.464)	Top-5 acc 89.844 (87.881)	lr 0.00259
Train [95][3100/3239]	Time 0.299 (0.629)	Data Time 0.003 (0.014)	Loss 2.1273 (2.2439)	Entropy 0.79701 (0.79980)	Top-1 acc 70.703 (70.465)	Top-5 acc 89.844 (87.882)	lr 0.00259
Train [95][3110/3239]	Time 0.245 (0.629)	Data Time 0.002 (0.014)	Loss 2.3264 (2.2439)	Entropy 0.79698 (0.79980)	Top-1 acc 69.141 (70.465)	Top-5 acc 88.281 (87.884)	lr 0.00259
Train [95][3120/3239]	Time 0.207 (0.628)	Data Time 0.001 (0.014)	Loss 2.1341 (2.2438)	Entropy 0.79698 (0.79979)	Top-1 acc 70.703 (70.469)	Top-5 acc 92.188 (87.887)	lr 0.00259
Train [95][3130/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.014)	Loss 2.1306 (2.2439)	Entropy 0.79696 (0.79978)	Top-1 acc 73.438 (70.465)	Top-5 acc 91.406 (87.885)	lr 0.00259
Train [95][3140/3239]	Time 0.243 (0.627)	Data Time 0.001 (0.014)	Loss 2.3225 (2.2438)	Entropy 0.79686 (0.79977)	Top-1 acc 64.844 (70.464)	Top-5 acc 85.156 (87.885)	lr 0.00259
Train [95][3150/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.014)	Loss 2.3107 (2.2438)	Entropy 0.79681 (0.79976)	Top-1 acc 67.188 (70.465)	Top-5 acc 87.891 (87.887)	lr 0.00259
Train [95][3160/3239]	Time 0.255 (0.626)	Data Time 0.001 (0.014)	Loss 2.1690 (2.2437)	Entropy 0.79678 (0.79975)	Top-1 acc 69.531 (70.469)	Top-5 acc 89.453 (87.891)	lr 0.00259
Train [95][3170/3239]	Time 0.236 (0.626)	Data Time 0.001 (0.014)	Loss 2.1506 (2.2436)	Entropy 0.79674 (0.79974)	Top-1 acc 75.391 (70.471)	Top-5 acc 89.062 (87.892)	lr 0.00259
Train [95][3180/3239]	Time 0.216 (0.625)	Data Time 0.000 (0.014)	Loss 2.2589 (2.2436)	Entropy 0.79671 (0.79973)	Top-1 acc 71.484 (70.471)	Top-5 acc 85.938 (87.895)	lr 0.00259
Train [95][3190/3239]	Time 0.232 (0.625)	Data Time 0.000 (0.014)	Loss 2.2145 (2.2436)	Entropy 0.79666 (0.79972)	Top-1 acc 72.656 (70.470)	Top-5 acc 87.500 (87.893)	lr 0.00259
Train [95][3200/3239]	Time 0.241 (0.624)	Data Time 0.000 (0.014)	Loss 2.1730 (2.2436)	Entropy 0.79662 (0.79971)	Top-1 acc 72.266 (70.473)	Top-5 acc 88.672 (87.895)	lr 0.00259
Train [95][3210/3239]	Time 0.319 (0.624)	Data Time 0.000 (0.014)	Loss 2.2864 (2.2435)	Entropy 0.79660 (0.79970)	Top-1 acc 71.484 (70.475)	Top-5 acc 85.156 (87.894)	lr 0.00258
Train [95][3220/3239]	Time 0.226 (0.623)	Data Time 0.000 (0.014)	Loss 2.2430 (2.2435)	Entropy 0.79660 (0.79969)	Top-1 acc 68.750 (70.478)	Top-5 acc 88.281 (87.897)	lr 0.00258
Train [95][3230/3239]	Time 0.224 (0.623)	Data Time 0.000 (0.013)	Loss 2.2976 (2.2435)	Entropy 0.79655 (0.79968)	Top-1 acc 67.578 (70.479)	Top-5 acc 87.109 (87.895)	lr 0.00258
Train [95][3239/3239]	Time 2.371 (0.623)	Data Time 0.000 (0.013)	Loss 2.3671 (2.2437)	Entropy 0.79655 (0.79967)	Top-1 acc 59.259 (70.474)	Top-5 acc 90.123 (87.892)	lr 0.00258
==========Valid [95/120]	loss 1.233	top-1 acc 71.754 (71.754)	top-5 acc 89.588	Train top-1 70.474	top-5 87.892	Entropy 0.79655	Latency-None: 0.000ms	Flops: 546.53M
Train [96][0/3239]	Time 40.986 (40.986)	Data Time 39.080 (39.080)	Loss 2.2672 (2.2672)	Entropy 0.79653 (0.79653)	Top-1 acc 66.797 (66.797)	Top-5 acc 87.891 (87.891)	lr 0.00258
Train [96][10/3239]	Time 2.842 (4.369)	Data Time 0.002 (3.610)	Loss 2.0170 (2.2317)	Entropy 0.79653 (0.79653)	Top-1 acc 74.609 (71.058)	Top-5 acc 92.188 (88.743)	lr 0.00258
Train [96][20/3239]	Time 0.260 (2.422)	Data Time 0.002 (1.892)	Loss 2.3694 (2.2357)	Entropy 0.79650 (0.79652)	Top-1 acc 67.578 (70.796)	Top-5 acc 84.375 (88.244)	lr 0.00258
Train [96][30/3239]	Time 0.244 (1.799)	Data Time 0.001 (1.282)	Loss 2.1554 (2.2267)	Entropy 0.79643 (0.79649)	Top-1 acc 73.438 (70.917)	Top-5 acc 89.844 (88.470)	lr 0.00258
Train [96][40/3239]	Time 0.245 (1.484)	Data Time 0.001 (0.970)	Loss 2.2269 (2.2324)	Entropy 0.79649 (0.79649)	Top-1 acc 73.438 (70.998)	Top-5 acc 86.719 (88.167)	lr 0.00258
Train [96][50/3239]	Time 0.252 (1.291)	Data Time 0.001 (0.780)	Loss 2.1187 (2.2182)	Entropy 0.79649 (0.79649)	Top-1 acc 75.391 (71.193)	Top-5 acc 90.625 (88.442)	lr 0.00258
Train [96][60/3239]	Time 0.326 (1.161)	Data Time 0.001 (0.652)	Loss 2.2320 (2.2250)	Entropy 0.79642 (0.79648)	Top-1 acc 68.750 (71.030)	Top-5 acc 88.672 (88.345)	lr 0.00258
Train [96][70/3239]	Time 0.233 (1.067)	Data Time 0.001 (0.561)	Loss 2.3202 (2.2261)	Entropy 0.79641 (0.79647)	Top-1 acc 67.578 (71.039)	Top-5 acc 85.156 (88.243)	lr 0.00258
Train [96][80/3239]	Time 0.250 (0.997)	Data Time 0.002 (0.492)	Loss 2.4194 (2.2303)	Entropy 0.79638 (0.79646)	Top-1 acc 67.578 (70.925)	Top-5 acc 83.984 (88.190)	lr 0.00258
Train [96][90/3239]	Time 0.242 (0.940)	Data Time 0.001 (0.438)	Loss 2.1439 (2.2276)	Entropy 0.79648 (0.79646)	Top-1 acc 73.047 (70.986)	Top-5 acc 89.453 (88.311)	lr 0.00258
Train [96][100/3239]	Time 0.323 (0.896)	Data Time 0.001 (0.395)	Loss 2.2062 (2.2304)	Entropy 0.79648 (0.79646)	Top-1 acc 74.219 (70.955)	Top-5 acc 89.453 (88.281)	lr 0.00258
Train [96][110/3239]	Time 0.217 (0.858)	Data Time 0.001 (0.359)	Loss 2.2853 (2.2342)	Entropy 0.79651 (0.79646)	Top-1 acc 66.016 (70.759)	Top-5 acc 88.672 (88.221)	lr 0.00258
Train [96][120/3239]	Time 2.683 (0.828)	Data Time 0.001 (0.330)	Loss 2.1602 (2.2282)	Entropy 0.79651 (0.79647)	Top-1 acc 74.219 (70.942)	Top-5 acc 88.672 (88.307)	lr 0.00258
Train [96][130/3239]	Time 0.263 (0.784)	Data Time 0.001 (0.305)	Loss 2.4183 (2.2301)	Entropy 0.79646 (0.79647)	Top-1 acc 66.016 (70.849)	Top-5 acc 85.938 (88.284)	lr 0.00258
Train [96][140/3239]	Time 0.244 (0.762)	Data Time 0.001 (0.283)	Loss 2.3042 (2.2308)	Entropy 0.79644 (0.79646)	Top-1 acc 69.531 (70.872)	Top-5 acc 87.500 (88.278)	lr 0.00257
Train [96][150/3239]	Time 0.234 (0.745)	Data Time 0.001 (0.264)	Loss 2.1974 (2.2325)	Entropy 0.79640 (0.79646)	Top-1 acc 71.094 (70.915)	Top-5 acc 89.844 (88.209)	lr 0.00257
Train [96][160/3239]	Time 0.245 (0.728)	Data Time 0.001 (0.248)	Loss 2.1347 (2.2284)	Entropy 0.79638 (0.79646)	Top-1 acc 74.609 (71.014)	Top-5 acc 89.844 (88.276)	lr 0.00257
Train [96][170/3239]	Time 0.225 (0.713)	Data Time 0.001 (0.234)	Loss 2.1571 (2.2254)	Entropy 0.79635 (0.79645)	Top-1 acc 73.828 (71.069)	Top-5 acc 88.281 (88.327)	lr 0.00257
Train [96][180/3239]	Time 0.320 (1.001)	Data Time 0.004 (0.221)	Loss 2.2887 (2.2246)	Entropy 0.79635 (0.79644)	Top-1 acc 70.703 (71.064)	Top-5 acc 84.766 (88.340)	lr 0.00257
Train [96][190/3239]	Time 0.340 (0.976)	Data Time 0.002 (0.210)	Loss 2.0981 (2.2238)	Entropy 0.79636 (0.79644)	Top-1 acc 70.703 (71.039)	Top-5 acc 92.578 (88.375)	lr 0.00257
Train [96][200/3239]	Time 0.236 (0.952)	Data Time 0.002 (0.199)	Loss 2.2359 (2.2257)	Entropy 0.79632 (0.79644)	Top-1 acc 70.312 (71.008)	Top-5 acc 85.156 (88.338)	lr 0.00257
Train [96][210/3239]	Time 0.237 (0.930)	Data Time 0.001 (0.190)	Loss 2.1658 (2.2289)	Entropy 0.79625 (0.79643)	Top-1 acc 69.922 (70.942)	Top-5 acc 89.062 (88.246)	lr 0.00257
Train [96][220/3239]	Time 0.211 (0.909)	Data Time 0.001 (0.181)	Loss 2.2887 (2.2295)	Entropy 0.79613 (0.79642)	Top-1 acc 70.703 (70.926)	Top-5 acc 89.062 (88.251)	lr 0.00257
Train [96][230/3239]	Time 2.628 (0.891)	Data Time 0.001 (0.174)	Loss 2.2941 (2.2296)	Entropy 0.79613 (0.79641)	Top-1 acc 67.188 (70.915)	Top-5 acc 88.281 (88.241)	lr 0.00257
Train [96][240/3239]	Time 0.281 (0.864)	Data Time 0.001 (0.166)	Loss 2.0538 (2.2292)	Entropy 0.79616 (0.79640)	Top-1 acc 77.734 (70.927)	Top-5 acc 92.188 (88.246)	lr 0.00257
Train [96][250/3239]	Time 0.238 (0.848)	Data Time 0.001 (0.160)	Loss 2.2501 (2.2284)	Entropy 0.79616 (0.79639)	Top-1 acc 69.531 (70.943)	Top-5 acc 89.062 (88.261)	lr 0.00257
Train [96][260/3239]	Time 0.247 (0.835)	Data Time 0.001 (0.154)	Loss 2.2798 (2.2293)	Entropy 0.79614 (0.79638)	Top-1 acc 66.797 (70.913)	Top-5 acc 89.453 (88.242)	lr 0.00257
Train [96][270/3239]	Time 0.250 (0.822)	Data Time 0.001 (0.148)	Loss 2.2593 (2.2299)	Entropy 0.79614 (0.79637)	Top-1 acc 66.406 (70.891)	Top-5 acc 86.719 (88.228)	lr 0.00257
Train [96][280/3239]	Time 0.245 (0.811)	Data Time 0.001 (0.143)	Loss 2.2966 (2.2283)	Entropy 0.79615 (0.79636)	Top-1 acc 70.312 (70.931)	Top-5 acc 88.281 (88.273)	lr 0.00257
Train [96][290/3239]	Time 0.237 (0.800)	Data Time 0.001 (0.138)	Loss 2.2059 (2.2284)	Entropy 0.79612 (0.79635)	Top-1 acc 70.703 (70.946)	Top-5 acc 86.719 (88.268)	lr 0.00257
Train [96][300/3239]	Time 0.245 (0.790)	Data Time 0.001 (0.134)	Loss 2.1661 (2.2276)	Entropy 0.79617 (0.79635)	Top-1 acc 71.875 (70.959)	Top-5 acc 89.062 (88.276)	lr 0.00256
Train [96][310/3239]	Time 0.229 (0.781)	Data Time 0.001 (0.129)	Loss 2.1470 (2.2287)	Entropy 0.79614 (0.79634)	Top-1 acc 74.219 (70.944)	Top-5 acc 91.016 (88.259)	lr 0.00256
Train [96][320/3239]	Time 0.240 (0.772)	Data Time 0.001 (0.125)	Loss 2.2353 (2.2301)	Entropy 0.79616 (0.79633)	Top-1 acc 69.922 (70.895)	Top-5 acc 88.672 (88.237)	lr 0.00256
Train [96][330/3239]	Time 0.265 (0.764)	Data Time 0.001 (0.122)	Loss 2.2946 (2.2306)	Entropy 0.79614 (0.79633)	Top-1 acc 71.484 (70.870)	Top-5 acc 87.500 (88.221)	lr 0.00256
Train [96][340/3239]	Time 2.545 (0.756)	Data Time 0.001 (0.118)	Loss 2.2656 (2.2298)	Entropy 0.79614 (0.79632)	Top-1 acc 67.188 (70.896)	Top-5 acc 88.672 (88.238)	lr 0.00256
Train [96][350/3239]	Time 0.255 (0.741)	Data Time 0.001 (0.115)	Loss 2.4389 (2.2298)	Entropy 0.79612 (0.79632)	Top-1 acc 66.406 (70.877)	Top-5 acc 85.547 (88.265)	lr 0.00256
Train [96][360/3239]	Time 0.339 (0.734)	Data Time 0.001 (0.112)	Loss 2.3698 (2.2296)	Entropy 0.79611 (0.79631)	Top-1 acc 68.750 (70.898)	Top-5 acc 86.328 (88.268)	lr 0.00256
Train [96][370/3239]	Time 0.228 (0.727)	Data Time 0.001 (0.109)	Loss 2.3108 (2.2305)	Entropy 0.79618 (0.79631)	Top-1 acc 70.703 (70.894)	Top-5 acc 85.547 (88.235)	lr 0.00256
Train [96][380/3239]	Time 0.236 (0.721)	Data Time 0.001 (0.106)	Loss 2.1295 (2.2313)	Entropy 0.79612 (0.79630)	Top-1 acc 73.047 (70.908)	Top-5 acc 91.016 (88.213)	lr 0.00256
Train [96][390/3239]	Time 0.231 (0.714)	Data Time 0.001 (0.103)	Loss 2.2039 (2.2321)	Entropy 0.79612 (0.79630)	Top-1 acc 69.531 (70.908)	Top-5 acc 89.844 (88.198)	lr 0.00256
Train [96][400/3239]	Time 0.321 (0.709)	Data Time 0.001 (0.101)	Loss 2.2592 (2.2327)	Entropy 0.79607 (0.79629)	Top-1 acc 68.359 (70.887)	Top-5 acc 88.672 (88.193)	lr 0.00256
Train [96][410/3239]	Time 0.252 (0.703)	Data Time 0.001 (0.098)	Loss 2.1921 (2.2328)	Entropy 0.79601 (0.79629)	Top-1 acc 71.875 (70.913)	Top-5 acc 89.844 (88.196)	lr 0.00256
Train [96][420/3239]	Time 0.226 (0.698)	Data Time 0.001 (0.096)	Loss 2.1869 (2.2330)	Entropy 0.79600 (0.79628)	Top-1 acc 71.484 (70.896)	Top-5 acc 87.891 (88.201)	lr 0.00256
Train [96][430/3239]	Time 0.221 (0.692)	Data Time 0.001 (0.094)	Loss 2.3301 (2.2323)	Entropy 0.79599 (0.79627)	Top-1 acc 67.188 (70.904)	Top-5 acc 88.672 (88.221)	lr 0.00256
Train [96][440/3239]	Time 0.219 (0.688)	Data Time 0.001 (0.092)	Loss 2.1634 (2.2324)	Entropy 0.79595 (0.79627)	Top-1 acc 71.875 (70.918)	Top-5 acc 89.453 (88.211)	lr 0.00256
Train [96][450/3239]	Time 2.675 (0.683)	Data Time 0.001 (0.090)	Loss 2.2480 (2.2312)	Entropy 0.79595 (0.79626)	Top-1 acc 72.266 (70.956)	Top-5 acc 87.500 (88.235)	lr 0.00256
Train [96][460/3239]	Time 0.254 (0.674)	Data Time 0.001 (0.088)	Loss 2.3058 (2.2319)	Entropy 0.79592 (0.79625)	Top-1 acc 66.797 (70.930)	Top-5 acc 86.719 (88.220)	lr 0.00255
Train [96][470/3239]	Time 0.246 (0.670)	Data Time 0.002 (0.086)	Loss 2.2153 (2.2338)	Entropy 0.79589 (0.79625)	Top-1 acc 70.703 (70.868)	Top-5 acc 87.500 (88.193)	lr 0.00255
Train [96][480/3239]	Time 0.238 (0.666)	Data Time 0.001 (0.084)	Loss 2.2100 (2.2348)	Entropy 0.79585 (0.79624)	Top-1 acc 68.359 (70.840)	Top-5 acc 90.234 (88.181)	lr 0.00255
Train [96][490/3239]	Time 0.225 (0.662)	Data Time 0.001 (0.082)	Loss 2.1641 (2.2339)	Entropy 0.79583 (0.79623)	Top-1 acc 74.219 (70.863)	Top-5 acc 89.844 (88.202)	lr 0.00255
Train [96][500/3239]	Time 0.245 (0.659)	Data Time 0.001 (0.081)	Loss 2.2087 (2.2333)	Entropy 0.79583 (0.79622)	Top-1 acc 72.656 (70.886)	Top-5 acc 87.500 (88.195)	lr 0.00255
Train [96][510/3239]	Time 0.230 (0.655)	Data Time 0.002 (0.079)	Loss 2.2863 (2.2333)	Entropy 0.79583 (0.79621)	Top-1 acc 69.922 (70.879)	Top-5 acc 86.719 (88.193)	lr 0.00255
Train [96][520/3239]	Time 0.218 (0.652)	Data Time 0.001 (0.078)	Loss 2.1625 (2.2330)	Entropy 0.79578 (0.79621)	Top-1 acc 71.875 (70.882)	Top-5 acc 90.234 (88.191)	lr 0.00255
Train [96][530/3239]	Time 0.319 (0.649)	Data Time 0.001 (0.076)	Loss 2.3348 (2.2334)	Entropy 0.79577 (0.79620)	Top-1 acc 68.750 (70.871)	Top-5 acc 87.500 (88.185)	lr 0.00255
Train [96][540/3239]	Time 0.290 (0.740)	Data Time 0.003 (0.075)	Loss 2.1169 (2.2326)	Entropy 0.79568 (0.79619)	Top-1 acc 73.828 (70.892)	Top-5 acc 90.234 (88.187)	lr 0.00255
Train [96][550/3239]	Time 0.221 (0.737)	Data Time 0.002 (0.074)	Loss 2.3508 (2.2322)	Entropy 0.79567 (0.79618)	Top-1 acc 69.141 (70.902)	Top-5 acc 87.500 (88.200)	lr 0.00255
Train [96][560/3239]	Time 2.675 (0.732)	Data Time 0.002 (0.072)	Loss 2.1821 (2.2330)	Entropy 0.79567 (0.79617)	Top-1 acc 67.578 (70.868)	Top-5 acc 87.891 (88.185)	lr 0.00255
Train [96][570/3239]	Time 0.337 (0.724)	Data Time 0.001 (0.071)	Loss 2.1690 (2.2337)	Entropy 0.79564 (0.79616)	Top-1 acc 74.219 (70.867)	Top-5 acc 89.062 (88.170)	lr 0.00255
Train [96][580/3239]	Time 0.248 (0.720)	Data Time 0.002 (0.070)	Loss 2.2490 (2.2329)	Entropy 0.79565 (0.79615)	Top-1 acc 69.531 (70.874)	Top-5 acc 87.891 (88.182)	lr 0.00255
Train [96][590/3239]	Time 0.220 (0.716)	Data Time 0.002 (0.069)	Loss 2.1051 (2.2324)	Entropy 0.79565 (0.79615)	Top-1 acc 72.656 (70.886)	Top-5 acc 91.016 (88.186)	lr 0.00255
Train [96][600/3239]	Time 0.230 (0.712)	Data Time 0.001 (0.068)	Loss 2.2153 (2.2321)	Entropy 0.79559 (0.79614)	Top-1 acc 67.188 (70.886)	Top-5 acc 89.844 (88.190)	lr 0.00255
Train [96][610/3239]	Time 0.330 (0.709)	Data Time 0.001 (0.067)	Loss 2.3792 (2.2323)	Entropy 0.79560 (0.79613)	Top-1 acc 66.016 (70.876)	Top-5 acc 85.938 (88.191)	lr 0.00255
Train [96][620/3239]	Time 0.228 (0.705)	Data Time 0.002 (0.066)	Loss 2.2855 (2.2330)	Entropy 0.79562 (0.79612)	Top-1 acc 69.922 (70.867)	Top-5 acc 87.109 (88.177)	lr 0.00255
Train [96][630/3239]	Time 0.224 (0.702)	Data Time 0.001 (0.065)	Loss 2.1980 (2.2329)	Entropy 0.79565 (0.79611)	Top-1 acc 70.703 (70.874)	Top-5 acc 89.844 (88.187)	lr 0.00254
Train [96][640/3239]	Time 0.238 (0.698)	Data Time 0.001 (0.064)	Loss 2.2721 (2.2327)	Entropy 0.79560 (0.79610)	Top-1 acc 69.141 (70.891)	Top-5 acc 87.109 (88.184)	lr 0.00254
Train [96][650/3239]	Time 0.326 (0.695)	Data Time 0.001 (0.063)	Loss 2.1369 (2.2327)	Entropy 0.79556 (0.79610)	Top-1 acc 75.391 (70.898)	Top-5 acc 90.234 (88.187)	lr 0.00254
Train [96][660/3239]	Time 0.236 (0.692)	Data Time 0.002 (0.062)	Loss 2.4324 (2.2329)	Entropy 0.79557 (0.79609)	Top-1 acc 66.016 (70.894)	Top-5 acc 85.547 (88.183)	lr 0.00254
Train [96][670/3239]	Time 2.612 (0.689)	Data Time 0.001 (0.061)	Loss 2.1714 (2.2320)	Entropy 0.79557 (0.79608)	Top-1 acc 70.703 (70.920)	Top-5 acc 91.016 (88.196)	lr 0.00254
Train [96][680/3239]	Time 0.275 (0.682)	Data Time 0.001 (0.060)	Loss 2.1843 (2.2317)	Entropy 0.79551 (0.79607)	Top-1 acc 69.922 (70.914)	Top-5 acc 89.062 (88.195)	lr 0.00254
Train [96][690/3239]	Time 0.243 (0.679)	Data Time 0.001 (0.059)	Loss 2.3695 (2.2326)	Entropy 0.79550 (0.79606)	Top-1 acc 66.797 (70.882)	Top-5 acc 86.328 (88.181)	lr 0.00254
Train [96][700/3239]	Time 0.227 (0.677)	Data Time 0.001 (0.058)	Loss 2.2334 (2.2317)	Entropy 0.79551 (0.79606)	Top-1 acc 69.922 (70.898)	Top-5 acc 85.938 (88.200)	lr 0.00254
Train [96][710/3239]	Time 0.242 (0.674)	Data Time 0.001 (0.057)	Loss 2.3042 (2.2320)	Entropy 0.79549 (0.79605)	Top-1 acc 68.750 (70.893)	Top-5 acc 86.328 (88.199)	lr 0.00254
Train [96][720/3239]	Time 0.239 (0.671)	Data Time 0.001 (0.057)	Loss 2.1643 (2.2317)	Entropy 0.79549 (0.79604)	Top-1 acc 71.484 (70.905)	Top-5 acc 91.016 (88.203)	lr 0.00254
Train [96][730/3239]	Time 0.254 (0.669)	Data Time 0.001 (0.056)	Loss 2.2363 (2.2307)	Entropy 0.79552 (0.79603)	Top-1 acc 70.312 (70.935)	Top-5 acc 86.328 (88.221)	lr 0.00254
Train [96][740/3239]	Time 0.229 (0.666)	Data Time 0.001 (0.055)	Loss 2.1983 (2.2310)	Entropy 0.79546 (0.79603)	Top-1 acc 69.531 (70.918)	Top-5 acc 87.109 (88.213)	lr 0.00254
Train [96][750/3239]	Time 0.237 (0.664)	Data Time 0.001 (0.055)	Loss 2.1202 (2.2307)	Entropy 0.79537 (0.79602)	Top-1 acc 74.219 (70.925)	Top-5 acc 89.453 (88.216)	lr 0.00254
Train [96][760/3239]	Time 0.234 (0.662)	Data Time 0.001 (0.054)	Loss 2.5258 (2.2314)	Entropy 0.79535 (0.79601)	Top-1 acc 63.281 (70.905)	Top-5 acc 81.641 (88.208)	lr 0.00254
Train [96][770/3239]	Time 0.225 (0.660)	Data Time 0.001 (0.053)	Loss 2.1621 (2.2313)	Entropy 0.79525 (0.79600)	Top-1 acc 73.438 (70.905)	Top-5 acc 89.062 (88.206)	lr 0.00254
Train [96][780/3239]	Time 2.727 (0.658)	Data Time 0.001 (0.052)	Loss 2.5095 (2.2316)	Entropy 0.79525 (0.79599)	Top-1 acc 64.062 (70.903)	Top-5 acc 82.031 (88.204)	lr 0.00254
Train [96][790/3239]	Time 0.254 (0.652)	Data Time 0.001 (0.052)	Loss 2.2927 (2.2320)	Entropy 0.79520 (0.79598)	Top-1 acc 67.969 (70.900)	Top-5 acc 84.766 (88.188)	lr 0.00253
Train [96][800/3239]	Time 0.234 (0.650)	Data Time 0.002 (0.051)	Loss 2.1351 (2.2317)	Entropy 0.79520 (0.79597)	Top-1 acc 74.609 (70.906)	Top-5 acc 87.109 (88.192)	lr 0.00253
Train [96][810/3239]	Time 0.237 (0.648)	Data Time 0.001 (0.051)	Loss 2.1978 (2.2319)	Entropy 0.79513 (0.79596)	Top-1 acc 69.531 (70.893)	Top-5 acc 89.844 (88.190)	lr 0.00253
Train [96][820/3239]	Time 0.340 (0.646)	Data Time 0.001 (0.050)	Loss 2.1858 (2.2321)	Entropy 0.79511 (0.79595)	Top-1 acc 70.312 (70.887)	Top-5 acc 88.672 (88.184)	lr 0.00253
Train [96][830/3239]	Time 0.244 (0.644)	Data Time 0.001 (0.049)	Loss 2.2832 (2.2323)	Entropy 0.79511 (0.79594)	Top-1 acc 69.531 (70.881)	Top-5 acc 88.672 (88.181)	lr 0.00253
Train [96][840/3239]	Time 0.232 (0.642)	Data Time 0.001 (0.049)	Loss 2.2501 (2.2324)	Entropy 0.79504 (0.79593)	Top-1 acc 71.875 (70.892)	Top-5 acc 88.281 (88.174)	lr 0.00253
Train [96][850/3239]	Time 0.222 (0.640)	Data Time 0.001 (0.048)	Loss 2.4542 (2.2328)	Entropy 0.79501 (0.79592)	Top-1 acc 66.797 (70.877)	Top-5 acc 85.547 (88.167)	lr 0.00253
Train [96][860/3239]	Time 0.229 (0.638)	Data Time 0.001 (0.048)	Loss 2.1596 (2.2325)	Entropy 0.79500 (0.79591)	Top-1 acc 72.266 (70.882)	Top-5 acc 91.406 (88.172)	lr 0.00253
Train [96][870/3239]	Time 0.238 (0.637)	Data Time 0.001 (0.047)	Loss 2.3378 (2.2326)	Entropy 0.79493 (0.79590)	Top-1 acc 67.969 (70.879)	Top-5 acc 85.156 (88.167)	lr 0.00253
Train [96][880/3239]	Time 0.246 (0.635)	Data Time 0.001 (0.047)	Loss 2.0776 (2.2327)	Entropy 0.79485 (0.79589)	Top-1 acc 75.781 (70.884)	Top-5 acc 90.234 (88.163)	lr 0.00253
Train [96][890/3239]	Time 2.653 (0.633)	Data Time 0.001 (0.046)	Loss 2.2338 (2.2326)	Entropy 0.79485 (0.79588)	Top-1 acc 69.531 (70.891)	Top-5 acc 87.500 (88.161)	lr 0.00253
Train [96][900/3239]	Time 0.218 (0.629)	Data Time 0.001 (0.046)	Loss 2.1667 (2.2325)	Entropy 0.79482 (0.79586)	Top-1 acc 69.141 (70.886)	Top-5 acc 90.234 (88.162)	lr 0.00253
Train [96][910/3239]	Time 0.351 (0.686)	Data Time 0.003 (0.045)	Loss 2.0696 (2.2322)	Entropy 0.79482 (0.79585)	Top-1 acc 75.391 (70.893)	Top-5 acc 92.969 (88.175)	lr 0.00253
Train [96][920/3239]	Time 0.247 (0.684)	Data Time 0.002 (0.045)	Loss 2.2144 (2.2323)	Entropy 0.79484 (0.79584)	Top-1 acc 68.750 (70.891)	Top-5 acc 89.844 (88.172)	lr 0.00253
Train [96][930/3239]	Time 0.243 (0.682)	Data Time 0.002 (0.044)	Loss 2.3081 (2.2324)	Entropy 0.79476 (0.79583)	Top-1 acc 69.922 (70.892)	Top-5 acc 87.109 (88.181)	lr 0.00253
Train [96][940/3239]	Time 0.237 (0.680)	Data Time 0.001 (0.044)	Loss 2.2258 (2.2339)	Entropy 0.79475 (0.79582)	Top-1 acc 72.266 (70.855)	Top-5 acc 87.891 (88.154)	lr 0.00253
Train [96][950/3239]	Time 0.333 (0.678)	Data Time 0.001 (0.043)	Loss 2.2519 (2.2339)	Entropy 0.79471 (0.79581)	Top-1 acc 68.359 (70.856)	Top-5 acc 88.281 (88.150)	lr 0.00252
Train [96][960/3239]	Time 0.223 (0.676)	Data Time 0.001 (0.043)	Loss 2.0856 (2.2335)	Entropy 0.79468 (0.79580)	Top-1 acc 73.828 (70.868)	Top-5 acc 91.016 (88.155)	lr 0.00252
Train [96][970/3239]	Time 0.226 (0.674)	Data Time 0.001 (0.043)	Loss 2.2969 (2.2335)	Entropy 0.79453 (0.79578)	Top-1 acc 71.094 (70.867)	Top-5 acc 87.500 (88.155)	lr 0.00252
Train [96][980/3239]	Time 0.240 (0.672)	Data Time 0.001 (0.042)	Loss 2.1936 (2.2331)	Entropy 0.79454 (0.79577)	Top-1 acc 73.828 (70.875)	Top-5 acc 89.453 (88.161)	lr 0.00252
Train [96][990/3239]	Time 0.332 (0.670)	Data Time 0.001 (0.042)	Loss 2.2366 (2.2335)	Entropy 0.79461 (0.79576)	Top-1 acc 67.969 (70.861)	Top-5 acc 87.500 (88.152)	lr 0.00252
Train [96][1000/3239]	Time 2.616 (0.668)	Data Time 0.001 (0.041)	Loss 2.3370 (2.2337)	Entropy 0.79461 (0.79575)	Top-1 acc 71.094 (70.858)	Top-5 acc 86.719 (88.152)	lr 0.00252
Train [96][1010/3239]	Time 0.285 (0.664)	Data Time 0.001 (0.041)	Loss 2.3347 (2.2341)	Entropy 0.79458 (0.79574)	Top-1 acc 70.703 (70.854)	Top-5 acc 86.328 (88.144)	lr 0.00252
Train [96][1020/3239]	Time 0.256 (0.662)	Data Time 0.001 (0.041)	Loss 2.2232 (2.2340)	Entropy 0.79459 (0.79572)	Top-1 acc 71.484 (70.855)	Top-5 acc 87.500 (88.144)	lr 0.00252
Train [96][1030/3239]	Time 0.241 (0.660)	Data Time 0.001 (0.040)	Loss 2.2416 (2.2334)	Entropy 0.79462 (0.79571)	Top-1 acc 70.312 (70.868)	Top-5 acc 88.672 (88.149)	lr 0.00252
Train [96][1040/3239]	Time 0.241 (0.659)	Data Time 0.001 (0.040)	Loss 2.4000 (2.2334)	Entropy 0.79464 (0.79570)	Top-1 acc 66.016 (70.873)	Top-5 acc 84.766 (88.153)	lr 0.00252
Train [96][1050/3239]	Time 0.226 (0.657)	Data Time 0.001 (0.039)	Loss 2.2170 (2.2336)	Entropy 0.79461 (0.79569)	Top-1 acc 71.484 (70.860)	Top-5 acc 86.328 (88.146)	lr 0.00252
Train [96][1060/3239]	Time 0.219 (0.655)	Data Time 0.001 (0.039)	Loss 2.3197 (2.2338)	Entropy 0.79461 (0.79568)	Top-1 acc 65.625 (70.849)	Top-5 acc 86.719 (88.146)	lr 0.00252
Train [96][1070/3239]	Time 0.219 (0.654)	Data Time 0.001 (0.039)	Loss 2.2820 (2.2338)	Entropy 0.79448 (0.79567)	Top-1 acc 69.141 (70.848)	Top-5 acc 88.672 (88.146)	lr 0.00252
Train [96][1080/3239]	Time 0.240 (0.652)	Data Time 0.001 (0.038)	Loss 2.3405 (2.2337)	Entropy 0.79444 (0.79566)	Top-1 acc 66.797 (70.844)	Top-5 acc 87.109 (88.145)	lr 0.00252
Train [96][1090/3239]	Time 0.229 (0.651)	Data Time 0.001 (0.038)	Loss 2.1133 (2.2340)	Entropy 0.79443 (0.79565)	Top-1 acc 72.656 (70.833)	Top-5 acc 90.234 (88.137)	lr 0.00252
Train [96][1100/3239]	Time 0.238 (0.649)	Data Time 0.001 (0.038)	Loss 2.2414 (2.2343)	Entropy 0.79438 (0.79564)	Top-1 acc 72.656 (70.826)	Top-5 acc 87.891 (88.134)	lr 0.00252
Train [96][1110/3239]	Time 2.702 (0.648)	Data Time 0.001 (0.037)	Loss 2.0982 (2.2344)	Entropy 0.79438 (0.79563)	Top-1 acc 73.047 (70.816)	Top-5 acc 89.062 (88.132)	lr 0.00252
Train [96][1120/3239]	Time 0.322 (0.644)	Data Time 0.001 (0.037)	Loss 2.4743 (2.2344)	Entropy 0.79426 (0.79561)	Top-1 acc 65.625 (70.818)	Top-5 acc 85.156 (88.131)	lr 0.00251
Train [96][1130/3239]	Time 0.244 (0.643)	Data Time 0.002 (0.037)	Loss 2.3490 (2.2346)	Entropy 0.79423 (0.79560)	Top-1 acc 67.188 (70.803)	Top-5 acc 88.281 (88.136)	lr 0.00251
Train [96][1140/3239]	Time 0.248 (0.641)	Data Time 0.001 (0.036)	Loss 2.3252 (2.2349)	Entropy 0.79418 (0.79559)	Top-1 acc 70.312 (70.799)	Top-5 acc 86.719 (88.128)	lr 0.00251
Train [96][1150/3239]	Time 0.227 (0.640)	Data Time 0.001 (0.036)	Loss 2.2729 (2.2348)	Entropy 0.79414 (0.79558)	Top-1 acc 69.922 (70.798)	Top-5 acc 89.062 (88.130)	lr 0.00251
Train [96][1160/3239]	Time 0.324 (0.639)	Data Time 0.001 (0.036)	Loss 2.1592 (2.2348)	Entropy 0.79411 (0.79557)	Top-1 acc 72.656 (70.799)	Top-5 acc 89.844 (88.134)	lr 0.00251
Train [96][1170/3239]	Time 0.213 (0.637)	Data Time 0.001 (0.036)	Loss 2.3117 (2.2347)	Entropy 0.79404 (0.79555)	Top-1 acc 68.750 (70.799)	Top-5 acc 89.453 (88.139)	lr 0.00251
Train [96][1180/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.035)	Loss 2.2140 (2.2346)	Entropy 0.79402 (0.79554)	Top-1 acc 69.531 (70.801)	Top-5 acc 88.672 (88.136)	lr 0.00251
Train [96][1190/3239]	Time 0.227 (0.635)	Data Time 0.001 (0.035)	Loss 2.1660 (2.2343)	Entropy 0.79398 (0.79553)	Top-1 acc 71.875 (70.803)	Top-5 acc 89.844 (88.137)	lr 0.00251
Train [96][1200/3239]	Time 0.337 (0.633)	Data Time 0.001 (0.035)	Loss 2.3021 (2.2342)	Entropy 0.79404 (0.79551)	Top-1 acc 73.438 (70.810)	Top-5 acc 87.500 (88.138)	lr 0.00251
Train [96][1210/3239]	Time 0.223 (0.632)	Data Time 0.001 (0.034)	Loss 2.2546 (2.2342)	Entropy 0.79406 (0.79550)	Top-1 acc 69.531 (70.811)	Top-5 acc 88.672 (88.138)	lr 0.00251
Train [96][1220/3239]	Time 2.503 (0.631)	Data Time 0.001 (0.034)	Loss 2.2783 (2.2344)	Entropy 0.79406 (0.79549)	Top-1 acc 71.094 (70.800)	Top-5 acc 85.938 (88.132)	lr 0.00251
Train [96][1230/3239]	Time 0.244 (0.628)	Data Time 0.001 (0.034)	Loss 2.3253 (2.2344)	Entropy 0.79403 (0.79548)	Top-1 acc 68.750 (70.802)	Top-5 acc 87.500 (88.134)	lr 0.00251
Train [96][1240/3239]	Time 0.207 (0.626)	Data Time 0.001 (0.034)	Loss 2.2510 (2.2344)	Entropy 0.79402 (0.79547)	Top-1 acc 71.484 (70.811)	Top-5 acc 87.109 (88.134)	lr 0.00251
Train [96][1250/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.033)	Loss 2.2383 (2.2345)	Entropy 0.79387 (0.79545)	Top-1 acc 69.922 (70.805)	Top-5 acc 88.672 (88.130)	lr 0.00251
Train [96][1260/3239]	Time 0.239 (0.624)	Data Time 0.002 (0.033)	Loss 2.3485 (2.2346)	Entropy 0.79379 (0.79544)	Top-1 acc 66.406 (70.803)	Top-5 acc 85.156 (88.124)	lr 0.00251
Train [96][1270/3239]	Time 0.251 (0.666)	Data Time 0.002 (0.033)	Loss 2.2912 (2.2347)	Entropy 0.79379 (0.79543)	Top-1 acc 67.578 (70.796)	Top-5 acc 85.938 (88.125)	lr 0.00251
Train [96][1280/3239]	Time 0.229 (0.665)	Data Time 0.002 (0.033)	Loss 2.3496 (2.2350)	Entropy 0.79371 (0.79541)	Top-1 acc 70.312 (70.790)	Top-5 acc 85.547 (88.122)	lr 0.00250
Train [96][1290/3239]	Time 0.244 (0.663)	Data Time 0.002 (0.032)	Loss 2.3779 (2.2352)	Entropy 0.79369 (0.79540)	Top-1 acc 64.062 (70.788)	Top-5 acc 84.766 (88.119)	lr 0.00250
Train [96][1300/3239]	Time 0.212 (0.662)	Data Time 0.001 (0.032)	Loss 2.1416 (2.2350)	Entropy 0.79374 (0.79539)	Top-1 acc 73.047 (70.793)	Top-5 acc 87.891 (88.121)	lr 0.00250
Train [96][1310/3239]	Time 0.226 (0.660)	Data Time 0.001 (0.032)	Loss 2.3144 (2.2349)	Entropy 0.79373 (0.79538)	Top-1 acc 69.141 (70.795)	Top-5 acc 86.719 (88.120)	lr 0.00250
Train [96][1320/3239]	Time 0.260 (0.659)	Data Time 0.001 (0.032)	Loss 2.1748 (2.2346)	Entropy 0.79324 (0.79536)	Top-1 acc 72.656 (70.804)	Top-5 acc 88.672 (88.123)	lr 0.00250
Train [96][1330/3239]	Time 2.589 (0.658)	Data Time 0.002 (0.031)	Loss 2.3684 (2.2346)	Entropy 0.79324 (0.79535)	Top-1 acc 67.969 (70.807)	Top-5 acc 85.938 (88.122)	lr 0.00250
Train [96][1340/3239]	Time 0.257 (0.655)	Data Time 0.003 (0.031)	Loss 2.2117 (2.2346)	Entropy 0.79325 (0.79533)	Top-1 acc 69.922 (70.807)	Top-5 acc 86.719 (88.121)	lr 0.00250
Train [96][1350/3239]	Time 0.239 (0.653)	Data Time 0.001 (0.031)	Loss 2.3479 (2.2348)	Entropy 0.79322 (0.79532)	Top-1 acc 67.578 (70.797)	Top-5 acc 84.375 (88.114)	lr 0.00250
Train [96][1360/3239]	Time 0.227 (0.652)	Data Time 0.001 (0.031)	Loss 2.1550 (2.2349)	Entropy 0.79312 (0.79530)	Top-1 acc 71.094 (70.793)	Top-5 acc 89.844 (88.113)	lr 0.00250
Train [96][1370/3239]	Time 0.320 (0.651)	Data Time 0.001 (0.031)	Loss 2.4017 (2.2349)	Entropy 0.79312 (0.79528)	Top-1 acc 65.234 (70.789)	Top-5 acc 83.594 (88.115)	lr 0.00250
Train [96][1380/3239]	Time 0.239 (0.650)	Data Time 0.001 (0.030)	Loss 2.2346 (2.2352)	Entropy 0.79313 (0.79527)	Top-1 acc 70.703 (70.783)	Top-5 acc 89.453 (88.109)	lr 0.00250
Train [96][1390/3239]	Time 0.223 (0.648)	Data Time 0.001 (0.030)	Loss 2.0757 (2.2354)	Entropy 0.79304 (0.79525)	Top-1 acc 75.000 (70.779)	Top-5 acc 91.406 (88.096)	lr 0.00250
Train [96][1400/3239]	Time 0.218 (0.647)	Data Time 0.001 (0.030)	Loss 2.2833 (2.2355)	Entropy 0.79300 (0.79524)	Top-1 acc 66.406 (70.772)	Top-5 acc 86.328 (88.095)	lr 0.00250
Train [96][1410/3239]	Time 0.231 (0.646)	Data Time 0.001 (0.030)	Loss 2.1791 (2.2358)	Entropy 0.79300 (0.79522)	Top-1 acc 73.047 (70.762)	Top-5 acc 89.844 (88.089)	lr 0.00250
Train [96][1420/3239]	Time 0.230 (0.645)	Data Time 0.001 (0.030)	Loss 2.4120 (2.2362)	Entropy 0.79303 (0.79521)	Top-1 acc 64.844 (70.745)	Top-5 acc 85.938 (88.078)	lr 0.00250
Train [96][1430/3239]	Time 0.263 (0.643)	Data Time 0.001 (0.029)	Loss 2.2969 (2.2360)	Entropy 0.79307 (0.79519)	Top-1 acc 69.141 (70.745)	Top-5 acc 86.719 (88.081)	lr 0.00250
Train [96][1440/3239]	Time 2.565 (0.642)	Data Time 0.001 (0.029)	Loss 2.2586 (2.2359)	Entropy 0.79307 (0.79518)	Top-1 acc 68.750 (70.746)	Top-5 acc 88.281 (88.085)	lr 0.00250
Train [96][1450/3239]	Time 0.271 (0.640)	Data Time 0.001 (0.029)	Loss 2.1145 (2.2357)	Entropy 0.79306 (0.79516)	Top-1 acc 74.219 (70.746)	Top-5 acc 89.453 (88.091)	lr 0.00249
Train [96][1460/3239]	Time 0.325 (0.638)	Data Time 0.001 (0.029)	Loss 2.1392 (2.2356)	Entropy 0.79302 (0.79515)	Top-1 acc 74.609 (70.754)	Top-5 acc 89.453 (88.091)	lr 0.00249
Train [96][1470/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.029)	Loss 2.2204 (2.2354)	Entropy 0.79298 (0.79513)	Top-1 acc 69.531 (70.762)	Top-5 acc 87.891 (88.096)	lr 0.00249
Train [96][1480/3239]	Time 0.242 (0.636)	Data Time 0.001 (0.028)	Loss 2.2278 (2.2351)	Entropy 0.79294 (0.79512)	Top-1 acc 71.875 (70.772)	Top-5 acc 85.938 (88.103)	lr 0.00249
Train [96][1490/3239]	Time 0.219 (0.635)	Data Time 0.001 (0.028)	Loss 2.3234 (2.2351)	Entropy 0.79285 (0.79510)	Top-1 acc 67.188 (70.770)	Top-5 acc 87.891 (88.101)	lr 0.00249
Train [96][1500/3239]	Time 0.364 (0.634)	Data Time 0.001 (0.028)	Loss 2.1025 (2.2350)	Entropy 0.79277 (0.79509)	Top-1 acc 76.953 (70.771)	Top-5 acc 91.797 (88.103)	lr 0.00249
Train [96][1510/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.028)	Loss 2.3183 (2.2350)	Entropy 0.79277 (0.79507)	Top-1 acc 69.141 (70.770)	Top-5 acc 85.938 (88.101)	lr 0.00249
Train [96][1520/3239]	Time 0.163 (0.632)	Data Time 0.001 (0.028)	Loss 2.1169 (2.2348)	Entropy 0.79280 (0.79506)	Top-1 acc 73.828 (70.775)	Top-5 acc 88.672 (88.104)	lr 0.00249
Train [96][1530/3239]	Time 0.220 (0.631)	Data Time 0.001 (0.028)	Loss 2.1730 (2.2348)	Entropy 0.79283 (0.79504)	Top-1 acc 70.312 (70.781)	Top-5 acc 88.281 (88.105)	lr 0.00249
Train [96][1540/3239]	Time 0.230 (0.630)	Data Time 0.002 (0.027)	Loss 2.2459 (2.2347)	Entropy 0.79293 (0.79503)	Top-1 acc 71.484 (70.775)	Top-5 acc 88.672 (88.107)	lr 0.00249
Train [96][1550/3239]	Time 2.537 (0.629)	Data Time 0.001 (0.027)	Loss 2.1468 (2.2350)	Entropy 0.79293 (0.79501)	Top-1 acc 75.000 (70.766)	Top-5 acc 91.016 (88.103)	lr 0.00249
Train [96][1560/3239]	Time 0.252 (0.627)	Data Time 0.001 (0.027)	Loss 2.3481 (2.2355)	Entropy 0.79286 (0.79500)	Top-1 acc 69.531 (70.756)	Top-5 acc 84.766 (88.092)	lr 0.00249
Train [96][1570/3239]	Time 0.235 (0.626)	Data Time 0.001 (0.027)	Loss 2.2930 (2.2354)	Entropy 0.79278 (0.79499)	Top-1 acc 67.188 (70.756)	Top-5 acc 86.719 (88.096)	lr 0.00249
Train [96][1580/3239]	Time 0.263 (0.625)	Data Time 0.001 (0.027)	Loss 2.1970 (2.2352)	Entropy 0.79297 (0.79497)	Top-1 acc 70.312 (70.760)	Top-5 acc 85.156 (88.100)	lr 0.00249
Train [96][1590/3239]	Time 0.234 (0.624)	Data Time 0.001 (0.027)	Loss 2.5223 (2.2354)	Entropy 0.79296 (0.79496)	Top-1 acc 64.844 (70.757)	Top-5 acc 82.031 (88.094)	lr 0.00249
Train [96][1600/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.026)	Loss 2.2610 (2.2352)	Entropy 0.79295 (0.79495)	Top-1 acc 73.828 (70.761)	Top-5 acc 86.328 (88.097)	lr 0.00249
Train [96][1610/3239]	Time 0.227 (0.622)	Data Time 0.001 (0.026)	Loss 2.3308 (2.2351)	Entropy 0.79294 (0.79494)	Top-1 acc 71.484 (70.763)	Top-5 acc 87.500 (88.100)	lr 0.00248
Train [96][1620/3239]	Time 0.258 (0.621)	Data Time 0.001 (0.026)	Loss 2.2422 (2.2352)	Entropy 0.79295 (0.79492)	Top-1 acc 72.656 (70.765)	Top-5 acc 89.062 (88.099)	lr 0.00248
Train [96][1630/3239]	Time 0.240 (0.653)	Data Time 0.002 (0.026)	Loss 2.1866 (2.2353)	Entropy 0.79291 (0.79491)	Top-1 acc 70.312 (70.758)	Top-5 acc 87.891 (88.098)	lr 0.00248
Train [96][1640/3239]	Time 0.210 (0.652)	Data Time 0.002 (0.026)	Loss 2.2743 (2.2358)	Entropy 0.79287 (0.79490)	Top-1 acc 69.922 (70.745)	Top-5 acc 85.938 (88.088)	lr 0.00248
Train [96][1650/3239]	Time 0.237 (0.651)	Data Time 0.002 (0.026)	Loss 2.5947 (2.2359)	Entropy 0.79283 (0.79489)	Top-1 acc 63.281 (70.744)	Top-5 acc 83.594 (88.084)	lr 0.00248
Train [96][1660/3239]	Time 2.503 (0.650)	Data Time 0.001 (0.026)	Loss 2.1163 (2.2360)	Entropy 0.79283 (0.79487)	Top-1 acc 73.438 (70.736)	Top-5 acc 89.062 (88.082)	lr 0.00248
Train [96][1670/3239]	Time 0.341 (0.648)	Data Time 0.001 (0.025)	Loss 2.2851 (2.2362)	Entropy 0.79282 (0.79486)	Top-1 acc 71.875 (70.734)	Top-5 acc 86.328 (88.074)	lr 0.00248
Train [96][1680/3239]	Time 0.220 (0.647)	Data Time 0.001 (0.025)	Loss 2.0896 (2.2361)	Entropy 0.79260 (0.79485)	Top-1 acc 71.484 (70.737)	Top-5 acc 90.234 (88.075)	lr 0.00248
Train [96][1690/3239]	Time 0.236 (0.646)	Data Time 0.001 (0.025)	Loss 2.2147 (2.2361)	Entropy 0.79257 (0.79483)	Top-1 acc 70.312 (70.730)	Top-5 acc 88.281 (88.077)	lr 0.00248
Train [96][1700/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.025)	Loss 2.2657 (2.2361)	Entropy 0.79260 (0.79482)	Top-1 acc 70.312 (70.726)	Top-5 acc 86.719 (88.075)	lr 0.00248
Train [96][1710/3239]	Time 0.227 (0.644)	Data Time 0.001 (0.025)	Loss 2.1380 (2.2360)	Entropy 0.79267 (0.79481)	Top-1 acc 71.484 (70.728)	Top-5 acc 91.016 (88.079)	lr 0.00248
Train [96][1720/3239]	Time 0.231 (0.643)	Data Time 0.001 (0.025)	Loss 2.4353 (2.2362)	Entropy 0.79266 (0.79480)	Top-1 acc 65.234 (70.720)	Top-5 acc 86.328 (88.074)	lr 0.00248
Train [96][1730/3239]	Time 0.218 (0.642)	Data Time 0.001 (0.025)	Loss 2.2348 (2.2361)	Entropy 0.79264 (0.79478)	Top-1 acc 71.094 (70.718)	Top-5 acc 87.500 (88.079)	lr 0.00248
Train [96][1740/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.024)	Loss 2.1552 (2.2361)	Entropy 0.79262 (0.79477)	Top-1 acc 73.828 (70.715)	Top-5 acc 91.406 (88.081)	lr 0.00248
Train [96][1750/3239]	Time 0.231 (0.640)	Data Time 0.001 (0.024)	Loss 2.3910 (2.2360)	Entropy 0.79259 (0.79476)	Top-1 acc 70.703 (70.717)	Top-5 acc 84.766 (88.082)	lr 0.00248
Train [96][1760/3239]	Time 0.228 (0.639)	Data Time 0.001 (0.024)	Loss 2.3616 (2.2361)	Entropy 0.79247 (0.79475)	Top-1 acc 70.312 (70.713)	Top-5 acc 86.328 (88.082)	lr 0.00248
Train [96][1770/3239]	Time 2.597 (0.638)	Data Time 0.001 (0.024)	Loss 2.1875 (2.2363)	Entropy 0.79247 (0.79473)	Top-1 acc 72.656 (70.705)	Top-5 acc 88.281 (88.075)	lr 0.00248
Train [96][1780/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.024)	Loss 2.3505 (2.2364)	Entropy 0.79243 (0.79472)	Top-1 acc 73.047 (70.711)	Top-5 acc 83.984 (88.071)	lr 0.00247
Train [96][1790/3239]	Time 0.247 (0.635)	Data Time 0.001 (0.024)	Loss 2.3142 (2.2362)	Entropy 0.79247 (0.79471)	Top-1 acc 69.531 (70.716)	Top-5 acc 86.719 (88.072)	lr 0.00247
Train [96][1800/3239]	Time 0.336 (0.634)	Data Time 0.001 (0.024)	Loss 2.3591 (2.2363)	Entropy 0.79243 (0.79470)	Top-1 acc 66.406 (70.714)	Top-5 acc 84.766 (88.068)	lr 0.00247
Train [96][1810/3239]	Time 0.223 (0.633)	Data Time 0.001 (0.024)	Loss 2.3466 (2.2364)	Entropy 0.79233 (0.79468)	Top-1 acc 65.625 (70.708)	Top-5 acc 87.891 (88.070)	lr 0.00247
Train [96][1820/3239]	Time 0.239 (0.633)	Data Time 0.001 (0.023)	Loss 2.1663 (2.2365)	Entropy 0.79239 (0.79467)	Top-1 acc 73.047 (70.702)	Top-5 acc 88.281 (88.068)	lr 0.00247
Train [96][1830/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.023)	Loss 2.3201 (2.2363)	Entropy 0.79246 (0.79466)	Top-1 acc 72.266 (70.708)	Top-5 acc 85.938 (88.070)	lr 0.00247
Train [96][1840/3239]	Time 0.328 (0.631)	Data Time 0.001 (0.023)	Loss 2.3221 (2.2367)	Entropy 0.79232 (0.79465)	Top-1 acc 67.578 (70.692)	Top-5 acc 88.672 (88.063)	lr 0.00247
Train [96][1850/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.023)	Loss 2.2840 (2.2367)	Entropy 0.79227 (0.79463)	Top-1 acc 69.141 (70.691)	Top-5 acc 88.281 (88.065)	lr 0.00247
Train [96][1860/3239]	Time 0.232 (0.629)	Data Time 0.001 (0.023)	Loss 2.2566 (2.2368)	Entropy 0.79218 (0.79462)	Top-1 acc 69.141 (70.690)	Top-5 acc 89.062 (88.064)	lr 0.00247
Train [96][1870/3239]	Time 0.247 (0.628)	Data Time 0.001 (0.023)	Loss 2.3996 (2.2368)	Entropy 0.79235 (0.79461)	Top-1 acc 65.625 (70.692)	Top-5 acc 84.375 (88.063)	lr 0.00247
Train [96][1880/3239]	Time 2.671 (0.628)	Data Time 0.002 (0.023)	Loss 2.1878 (2.2367)	Entropy 0.79235 (0.79460)	Top-1 acc 72.656 (70.692)	Top-5 acc 88.281 (88.063)	lr 0.00247
Train [96][1890/3239]	Time 0.266 (0.626)	Data Time 0.001 (0.023)	Loss 2.1887 (2.2365)	Entropy 0.79236 (0.79458)	Top-1 acc 73.438 (70.696)	Top-5 acc 88.281 (88.071)	lr 0.00247
Train [96][1900/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.022)	Loss 2.3279 (2.2366)	Entropy 0.79229 (0.79457)	Top-1 acc 68.750 (70.695)	Top-5 acc 84.766 (88.069)	lr 0.00247
Train [96][1910/3239]	Time 0.221 (0.624)	Data Time 0.001 (0.022)	Loss 2.2936 (2.2366)	Entropy 0.79224 (0.79456)	Top-1 acc 67.969 (70.697)	Top-5 acc 87.891 (88.069)	lr 0.00247
Train [96][1920/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.022)	Loss 2.1191 (2.2363)	Entropy 0.79228 (0.79455)	Top-1 acc 73.828 (70.696)	Top-5 acc 91.406 (88.074)	lr 0.00247
Train [96][1930/3239]	Time 0.249 (0.623)	Data Time 0.001 (0.022)	Loss 2.2431 (2.2359)	Entropy 0.79218 (0.79454)	Top-1 acc 69.922 (70.708)	Top-5 acc 88.281 (88.082)	lr 0.00247
Train [96][1940/3239]	Time 0.242 (0.622)	Data Time 0.002 (0.022)	Loss 2.2435 (2.2358)	Entropy 0.79216 (0.79452)	Top-1 acc 69.922 (70.705)	Top-5 acc 86.719 (88.087)	lr 0.00247
Train [96][1950/3239]	Time 0.228 (0.621)	Data Time 0.001 (0.022)	Loss 2.3624 (2.2360)	Entropy 0.79211 (0.79451)	Top-1 acc 67.188 (70.698)	Top-5 acc 83.984 (88.083)	lr 0.00246
Train [96][1960/3239]	Time 0.229 (0.620)	Data Time 0.001 (0.022)	Loss 2.3694 (2.2361)	Entropy 0.79205 (0.79450)	Top-1 acc 67.969 (70.696)	Top-5 acc 85.938 (88.082)	lr 0.00246
Train [96][1970/3239]	Time 0.237 (0.620)	Data Time 0.001 (0.022)	Loss 2.1876 (2.2358)	Entropy 0.79210 (0.79449)	Top-1 acc 75.391 (70.705)	Top-5 acc 87.891 (88.085)	lr 0.00246
Train [96][1980/3239]	Time 0.235 (0.619)	Data Time 0.001 (0.022)	Loss 2.1631 (2.2358)	Entropy 0.79205 (0.79447)	Top-1 acc 72.656 (70.705)	Top-5 acc 89.062 (88.087)	lr 0.00246
Train [96][1990/3239]	Time 54.330 (0.644)	Data Time 0.001 (0.022)	Loss 2.1947 (2.2358)	Entropy 0.79205 (0.79446)	Top-1 acc 71.484 (70.705)	Top-5 acc 88.281 (88.086)	lr 0.00246
Train [96][2000/3239]	Time 0.261 (0.642)	Data Time 0.002 (0.021)	Loss 2.2819 (2.2358)	Entropy 0.79193 (0.79445)	Top-1 acc 69.531 (70.706)	Top-5 acc 85.156 (88.086)	lr 0.00246
Train [96][2010/3239]	Time 0.328 (0.642)	Data Time 0.002 (0.021)	Loss 2.1962 (2.2358)	Entropy 0.79183 (0.79444)	Top-1 acc 73.828 (70.709)	Top-5 acc 89.062 (88.087)	lr 0.00246
Train [96][2020/3239]	Time 0.240 (0.641)	Data Time 0.002 (0.021)	Loss 2.0332 (2.2358)	Entropy 0.79183 (0.79442)	Top-1 acc 75.781 (70.711)	Top-5 acc 92.188 (88.086)	lr 0.00246
Train [96][2030/3239]	Time 0.224 (0.640)	Data Time 0.001 (0.021)	Loss 2.3625 (2.2356)	Entropy 0.79185 (0.79441)	Top-1 acc 64.453 (70.718)	Top-5 acc 86.328 (88.087)	lr 0.00246
Train [96][2040/3239]	Time 0.231 (0.639)	Data Time 0.001 (0.021)	Loss 2.3771 (2.2356)	Entropy 0.79190 (0.79440)	Top-1 acc 67.188 (70.722)	Top-5 acc 84.375 (88.081)	lr 0.00246
Train [96][2050/3239]	Time 0.356 (0.638)	Data Time 0.001 (0.021)	Loss 2.1344 (2.2355)	Entropy 0.79183 (0.79439)	Top-1 acc 74.609 (70.729)	Top-5 acc 90.234 (88.085)	lr 0.00246
Train [96][2060/3239]	Time 0.233 (0.638)	Data Time 0.001 (0.021)	Loss 2.3907 (2.2357)	Entropy 0.79181 (0.79437)	Top-1 acc 68.359 (70.728)	Top-5 acc 82.422 (88.079)	lr 0.00246
Train [96][2070/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.021)	Loss 2.1833 (2.2358)	Entropy 0.79181 (0.79436)	Top-1 acc 70.312 (70.728)	Top-5 acc 87.109 (88.075)	lr 0.00246
Train [96][2080/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.021)	Loss 2.2389 (2.2358)	Entropy 0.79178 (0.79435)	Top-1 acc 69.922 (70.726)	Top-5 acc 88.672 (88.072)	lr 0.00246
Train [96][2090/3239]	Time 0.271 (0.635)	Data Time 0.001 (0.021)	Loss 2.2129 (2.2357)	Entropy 0.79178 (0.79434)	Top-1 acc 70.703 (70.729)	Top-5 acc 86.328 (88.075)	lr 0.00246
Train [96][2100/3239]	Time 2.619 (0.635)	Data Time 0.001 (0.021)	Loss 2.2693 (2.2357)	Entropy 0.79178 (0.79432)	Top-1 acc 73.047 (70.729)	Top-5 acc 85.938 (88.075)	lr 0.00246
Train [96][2110/3239]	Time 0.250 (0.633)	Data Time 0.001 (0.020)	Loss 2.2313 (2.2357)	Entropy 0.79163 (0.79431)	Top-1 acc 70.312 (70.727)	Top-5 acc 86.719 (88.075)	lr 0.00245
Train [96][2120/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.020)	Loss 2.1509 (2.2357)	Entropy 0.79163 (0.79430)	Top-1 acc 69.531 (70.724)	Top-5 acc 89.844 (88.075)	lr 0.00245
Train [96][2130/3239]	Time 0.245 (0.631)	Data Time 0.001 (0.020)	Loss 2.2333 (2.2356)	Entropy 0.79160 (0.79429)	Top-1 acc 71.484 (70.723)	Top-5 acc 87.500 (88.082)	lr 0.00245
Train [96][2140/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.020)	Loss 2.2471 (2.2356)	Entropy 0.79155 (0.79427)	Top-1 acc 70.703 (70.723)	Top-5 acc 86.328 (88.078)	lr 0.00245
Train [96][2150/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.020)	Loss 2.2940 (2.2356)	Entropy 0.79157 (0.79426)	Top-1 acc 68.359 (70.723)	Top-5 acc 87.500 (88.077)	lr 0.00245
Train [96][2160/3239]	Time 0.243 (0.629)	Data Time 0.001 (0.020)	Loss 2.2127 (2.2356)	Entropy 0.79148 (0.79425)	Top-1 acc 73.438 (70.725)	Top-5 acc 89.453 (88.079)	lr 0.00245
Train [96][2170/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.020)	Loss 2.4018 (2.2354)	Entropy 0.79148 (0.79424)	Top-1 acc 66.406 (70.728)	Top-5 acc 86.328 (88.085)	lr 0.00245
Train [96][2180/3239]	Time 0.313 (0.628)	Data Time 0.001 (0.020)	Loss 2.1505 (2.2353)	Entropy 0.79133 (0.79422)	Top-1 acc 74.219 (70.731)	Top-5 acc 90.625 (88.086)	lr 0.00245
Train [96][2190/3239]	Time 0.238 (0.627)	Data Time 0.001 (0.020)	Loss 2.2916 (2.2353)	Entropy 0.79126 (0.79421)	Top-1 acc 67.969 (70.729)	Top-5 acc 86.328 (88.087)	lr 0.00245
Train [96][2200/3239]	Time 0.230 (0.627)	Data Time 0.001 (0.020)	Loss 2.2133 (2.2353)	Entropy 0.79117 (0.79420)	Top-1 acc 69.531 (70.727)	Top-5 acc 90.234 (88.086)	lr 0.00245
Train [96][2210/3239]	Time 2.566 (0.626)	Data Time 0.001 (0.020)	Loss 2.0147 (2.2352)	Entropy 0.79117 (0.79418)	Top-1 acc 76.172 (70.726)	Top-5 acc 92.578 (88.089)	lr 0.00245
Train [96][2220/3239]	Time 0.281 (0.624)	Data Time 0.001 (0.019)	Loss 2.1781 (2.2353)	Entropy 0.79119 (0.79417)	Top-1 acc 72.656 (70.725)	Top-5 acc 87.500 (88.085)	lr 0.00245
Train [96][2230/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.019)	Loss 2.2373 (2.2355)	Entropy 0.79115 (0.79416)	Top-1 acc 69.922 (70.722)	Top-5 acc 89.844 (88.084)	lr 0.00245
Train [96][2240/3239]	Time 0.239 (0.623)	Data Time 0.001 (0.019)	Loss 2.2087 (2.2354)	Entropy 0.79106 (0.79414)	Top-1 acc 72.266 (70.725)	Top-5 acc 90.625 (88.084)	lr 0.00245
Train [96][2250/3239]	Time 0.219 (0.623)	Data Time 0.001 (0.019)	Loss 2.3557 (2.2353)	Entropy 0.79102 (0.79413)	Top-1 acc 67.969 (70.727)	Top-5 acc 85.938 (88.086)	lr 0.00245
Train [96][2260/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.019)	Loss 2.3854 (2.2354)	Entropy 0.79096 (0.79411)	Top-1 acc 65.234 (70.722)	Top-5 acc 85.156 (88.085)	lr 0.00245
Train [96][2270/3239]	Time 0.239 (0.621)	Data Time 0.001 (0.019)	Loss 2.1141 (2.2354)	Entropy 0.79081 (0.79410)	Top-1 acc 76.172 (70.720)	Top-5 acc 89.453 (88.088)	lr 0.00245
Train [96][2280/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.019)	Loss 2.2874 (2.2352)	Entropy 0.79069 (0.79409)	Top-1 acc 69.141 (70.725)	Top-5 acc 86.328 (88.088)	lr 0.00244
Train [96][2290/3239]	Time 0.223 (0.620)	Data Time 0.001 (0.019)	Loss 2.2231 (2.2353)	Entropy 0.79071 (0.79407)	Top-1 acc 71.484 (70.725)	Top-5 acc 89.844 (88.087)	lr 0.00244
Train [96][2300/3239]	Time 0.231 (0.619)	Data Time 0.001 (0.019)	Loss 2.2895 (2.2354)	Entropy 0.79072 (0.79406)	Top-1 acc 67.969 (70.723)	Top-5 acc 87.891 (88.085)	lr 0.00244
Train [96][2310/3239]	Time 0.258 (0.619)	Data Time 0.001 (0.019)	Loss 2.2666 (2.2354)	Entropy 0.79073 (0.79404)	Top-1 acc 71.094 (70.723)	Top-5 acc 87.500 (88.087)	lr 0.00244
Train [96][2320/3239]	Time 2.561 (0.618)	Data Time 0.001 (0.019)	Loss 2.1491 (2.2352)	Entropy 0.79073 (0.79403)	Top-1 acc 69.531 (70.726)	Top-5 acc 90.234 (88.089)	lr 0.00244
Train [96][2330/3239]	Time 0.260 (0.617)	Data Time 0.001 (0.019)	Loss 2.2274 (2.2351)	Entropy 0.79075 (0.79401)	Top-1 acc 72.656 (70.725)	Top-5 acc 91.016 (88.092)	lr 0.00244
Train [96][2340/3239]	Time 0.234 (0.616)	Data Time 0.001 (0.019)	Loss 2.1744 (2.2353)	Entropy 0.79072 (0.79400)	Top-1 acc 72.266 (70.721)	Top-5 acc 89.062 (88.091)	lr 0.00244
Train [96][2350/3239]	Time 0.327 (0.615)	Data Time 0.001 (0.018)	Loss 2.2706 (2.2350)	Entropy 0.79061 (0.79398)	Top-1 acc 72.656 (70.731)	Top-5 acc 85.547 (88.093)	lr 0.00244
Train [96][2360/3239]	Time 0.238 (0.637)	Data Time 0.002 (0.018)	Loss 2.3268 (2.2350)	Entropy 0.79046 (0.79397)	Top-1 acc 65.625 (70.731)	Top-5 acc 87.109 (88.095)	lr 0.00244
Train [96][2370/3239]	Time 0.227 (0.637)	Data Time 0.002 (0.018)	Loss 2.1968 (2.2349)	Entropy 0.79050 (0.79396)	Top-1 acc 71.875 (70.735)	Top-5 acc 89.844 (88.098)	lr 0.00244
Train [96][2380/3239]	Time 0.236 (0.636)	Data Time 0.002 (0.018)	Loss 2.1180 (2.2347)	Entropy 0.79049 (0.79394)	Top-1 acc 75.000 (70.736)	Top-5 acc 89.453 (88.097)	lr 0.00244
Train [96][2390/3239]	Time 0.383 (0.635)	Data Time 0.002 (0.018)	Loss 2.3325 (2.2357)	Entropy 0.79042 (0.79393)	Top-1 acc 71.094 (70.719)	Top-5 acc 85.938 (88.083)	lr 0.00244
Train [96][2400/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.018)	Loss 2.4780 (2.2360)	Entropy 0.79042 (0.79391)	Top-1 acc 62.109 (70.708)	Top-5 acc 83.203 (88.078)	lr 0.00244
Train [96][2410/3239]	Time 0.233 (0.634)	Data Time 0.001 (0.018)	Loss 2.2264 (2.2361)	Entropy 0.79039 (0.79390)	Top-1 acc 73.438 (70.707)	Top-5 acc 87.109 (88.076)	lr 0.00244
Train [96][2420/3239]	Time 0.251 (0.634)	Data Time 0.001 (0.018)	Loss 2.1902 (2.2362)	Entropy 0.79039 (0.79388)	Top-1 acc 74.219 (70.702)	Top-5 acc 89.453 (88.076)	lr 0.00244
Train [96][2430/3239]	Time 2.569 (0.633)	Data Time 0.001 (0.018)	Loss 2.2703 (2.2363)	Entropy 0.79039 (0.79387)	Top-1 acc 67.969 (70.694)	Top-5 acc 87.500 (88.075)	lr 0.00244
Train [96][2440/3239]	Time 0.236 (0.631)	Data Time 0.001 (0.018)	Loss 2.1218 (2.2361)	Entropy 0.79038 (0.79385)	Top-1 acc 72.266 (70.701)	Top-5 acc 88.281 (88.080)	lr 0.00243
Train [96][2450/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.018)	Loss 2.2275 (2.2360)	Entropy 0.79033 (0.79384)	Top-1 acc 72.656 (70.703)	Top-5 acc 87.109 (88.082)	lr 0.00243
Train [96][2460/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.018)	Loss 2.4119 (2.2361)	Entropy 0.79102 (0.79383)	Top-1 acc 64.453 (70.700)	Top-5 acc 84.375 (88.079)	lr 0.00243
Train [96][2470/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.018)	Loss 2.1896 (2.2361)	Entropy 0.79101 (0.79382)	Top-1 acc 71.094 (70.697)	Top-5 acc 90.625 (88.079)	lr 0.00243
Train [96][2480/3239]	Time 0.253 (0.629)	Data Time 0.002 (0.018)	Loss 2.2189 (2.2361)	Entropy 0.79101 (0.79380)	Top-1 acc 69.141 (70.691)	Top-5 acc 87.891 (88.078)	lr 0.00243
Train [96][2490/3239]	Time 0.277 (0.629)	Data Time 0.001 (0.018)	Loss 2.2694 (2.2364)	Entropy 0.79097 (0.79379)	Top-1 acc 68.750 (70.684)	Top-5 acc 89.062 (88.074)	lr 0.00243
Train [96][2500/3239]	Time 0.222 (0.628)	Data Time 0.001 (0.017)	Loss 2.2487 (2.2363)	Entropy 0.79094 (0.79378)	Top-1 acc 67.188 (70.686)	Top-5 acc 87.109 (88.072)	lr 0.00243
Train [96][2510/3239]	Time 0.241 (0.627)	Data Time 0.001 (0.017)	Loss 2.1906 (2.2363)	Entropy 0.79095 (0.79377)	Top-1 acc 70.312 (70.685)	Top-5 acc 90.234 (88.073)	lr 0.00243
Train [96][2520/3239]	Time 0.219 (0.627)	Data Time 0.001 (0.017)	Loss 2.1905 (2.2362)	Entropy 0.79091 (0.79376)	Top-1 acc 75.391 (70.689)	Top-5 acc 88.281 (88.074)	lr 0.00243
Train [96][2530/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.017)	Loss 2.1220 (2.2360)	Entropy 0.79091 (0.79375)	Top-1 acc 77.734 (70.695)	Top-5 acc 90.234 (88.077)	lr 0.00243
Train [96][2540/3239]	Time 2.646 (0.626)	Data Time 0.003 (0.017)	Loss 2.1929 (2.2360)	Entropy 0.79091 (0.79374)	Top-1 acc 70.312 (70.697)	Top-5 acc 89.062 (88.078)	lr 0.00243
Train [96][2550/3239]	Time 0.198 (0.624)	Data Time 0.001 (0.017)	Loss 2.2267 (2.2359)	Entropy 0.79094 (0.79373)	Top-1 acc 70.312 (70.696)	Top-5 acc 88.281 (88.080)	lr 0.00243
Train [96][2560/3239]	Time 0.344 (0.624)	Data Time 0.001 (0.017)	Loss 2.4146 (2.2360)	Entropy 0.79095 (0.79372)	Top-1 acc 65.234 (70.690)	Top-5 acc 86.328 (88.078)	lr 0.00243
Train [96][2570/3239]	Time 0.242 (0.623)	Data Time 0.001 (0.017)	Loss 2.2537 (2.2359)	Entropy 0.79096 (0.79370)	Top-1 acc 70.312 (70.693)	Top-5 acc 87.891 (88.079)	lr 0.00243
Train [96][2580/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.017)	Loss 2.2049 (2.2359)	Entropy 0.79097 (0.79369)	Top-1 acc 69.141 (70.691)	Top-5 acc 87.500 (88.077)	lr 0.00243
Train [96][2590/3239]	Time 0.242 (0.622)	Data Time 0.001 (0.017)	Loss 2.1633 (2.2362)	Entropy 0.79097 (0.79368)	Top-1 acc 72.656 (70.687)	Top-5 acc 86.719 (88.072)	lr 0.00243
Train [96][2600/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.017)	Loss 2.1557 (2.2363)	Entropy 0.79094 (0.79367)	Top-1 acc 74.609 (70.687)	Top-5 acc 89.062 (88.068)	lr 0.00243
Train [96][2610/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.017)	Loss 2.1795 (2.2361)	Entropy 0.79090 (0.79366)	Top-1 acc 70.703 (70.694)	Top-5 acc 87.109 (88.070)	lr 0.00242
Train [96][2620/3239]	Time 0.246 (0.620)	Data Time 0.001 (0.017)	Loss 2.2707 (2.2359)	Entropy 0.79078 (0.79365)	Top-1 acc 68.359 (70.701)	Top-5 acc 89.062 (88.072)	lr 0.00242
Train [96][2630/3239]	Time 0.230 (0.620)	Data Time 0.001 (0.017)	Loss 2.2194 (2.2359)	Entropy 0.79078 (0.79364)	Top-1 acc 71.484 (70.702)	Top-5 acc 89.062 (88.074)	lr 0.00242
Train [96][2640/3239]	Time 0.236 (0.619)	Data Time 0.001 (0.017)	Loss 2.2218 (2.2360)	Entropy 0.79076 (0.79363)	Top-1 acc 71.484 (70.698)	Top-5 acc 88.672 (88.070)	lr 0.00242
Train [96][2650/3239]	Time 0.215 (0.619)	Data Time 0.001 (0.017)	Loss 2.3736 (2.2361)	Entropy 0.79075 (0.79362)	Top-1 acc 66.406 (70.695)	Top-5 acc 89.062 (88.067)	lr 0.00242
Train [96][2660/3239]	Time 0.266 (0.618)	Data Time 0.001 (0.017)	Loss 2.1375 (2.2362)	Entropy 0.79075 (0.79361)	Top-1 acc 73.438 (70.694)	Top-5 acc 91.406 (88.065)	lr 0.00242
Train [96][2670/3239]	Time 0.243 (0.617)	Data Time 0.002 (0.016)	Loss 2.2681 (2.2362)	Entropy 0.79071 (0.79360)	Top-1 acc 67.969 (70.694)	Top-5 acc 86.719 (88.065)	lr 0.00242
Train [96][2680/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.016)	Loss 2.1888 (2.2362)	Entropy 0.79065 (0.79359)	Top-1 acc 69.922 (70.695)	Top-5 acc 89.062 (88.065)	lr 0.00242
Train [96][2690/3239]	Time 0.360 (0.616)	Data Time 0.001 (0.016)	Loss 2.4011 (2.2363)	Entropy 0.79064 (0.79358)	Top-1 acc 69.141 (70.692)	Top-5 acc 82.422 (88.062)	lr 0.00242
Train [96][2700/3239]	Time 0.211 (0.616)	Data Time 0.001 (0.016)	Loss 2.1559 (2.2362)	Entropy 0.79067 (0.79357)	Top-1 acc 73.828 (70.695)	Top-5 acc 88.672 (88.061)	lr 0.00242
Train [96][2710/3239]	Time 0.301 (0.634)	Data Time 0.004 (0.016)	Loss 2.3030 (2.2362)	Entropy 0.79071 (0.79355)	Top-1 acc 67.188 (70.692)	Top-5 acc 88.281 (88.063)	lr 0.00242
Train [96][2720/3239]	Time 0.233 (0.634)	Data Time 0.002 (0.016)	Loss 2.2497 (2.2361)	Entropy 0.79062 (0.79354)	Top-1 acc 71.094 (70.694)	Top-5 acc 88.281 (88.065)	lr 0.00242
Train [96][2730/3239]	Time 0.273 (0.634)	Data Time 0.001 (0.016)	Loss 2.0527 (2.2361)	Entropy 0.79059 (0.79353)	Top-1 acc 72.656 (70.692)	Top-5 acc 92.578 (88.065)	lr 0.00242
Train [96][2740/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.016)	Loss 2.1624 (2.2361)	Entropy 0.79058 (0.79352)	Top-1 acc 71.875 (70.693)	Top-5 acc 88.281 (88.065)	lr 0.00242
Train [96][2750/3239]	Time 0.264 (0.632)	Data Time 0.001 (0.016)	Loss 2.1654 (2.2361)	Entropy 0.79055 (0.79351)	Top-1 acc 74.609 (70.689)	Top-5 acc 87.891 (88.063)	lr 0.00242
Train [96][2760/3239]	Time 0.252 (0.632)	Data Time 0.001 (0.016)	Loss 2.1378 (2.2363)	Entropy 0.79056 (0.79350)	Top-1 acc 73.438 (70.685)	Top-5 acc 91.797 (88.061)	lr 0.00242
Train [96][2770/3239]	Time 0.379 (0.631)	Data Time 0.001 (0.016)	Loss 2.0566 (2.2364)	Entropy 0.79055 (0.79349)	Top-1 acc 73.047 (70.681)	Top-5 acc 93.750 (88.060)	lr 0.00242
Train [96][2780/3239]	Time 0.224 (0.631)	Data Time 0.001 (0.016)	Loss 2.1484 (2.2363)	Entropy 0.79061 (0.79348)	Top-1 acc 72.656 (70.687)	Top-5 acc 89.844 (88.062)	lr 0.00241
Train [96][2790/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.016)	Loss 2.2244 (2.2363)	Entropy 0.79059 (0.79347)	Top-1 acc 69.922 (70.685)	Top-5 acc 89.453 (88.062)	lr 0.00241
Train [96][2800/3239]	Time 0.291 (0.630)	Data Time 0.001 (0.016)	Loss 2.2169 (2.2363)	Entropy 0.79058 (0.79346)	Top-1 acc 70.312 (70.683)	Top-5 acc 87.109 (88.062)	lr 0.00241
Train [96][2810/3239]	Time 0.209 (0.629)	Data Time 0.001 (0.016)	Loss 2.2866 (2.2364)	Entropy 0.79059 (0.79345)	Top-1 acc 68.359 (70.680)	Top-5 acc 88.281 (88.058)	lr 0.00241
Train [96][2820/3239]	Time 0.232 (0.629)	Data Time 0.002 (0.016)	Loss 2.2739 (2.2365)	Entropy 0.79051 (0.79344)	Top-1 acc 71.875 (70.679)	Top-5 acc 89.453 (88.057)	lr 0.00241
Train [96][2830/3239]	Time 0.223 (0.628)	Data Time 0.001 (0.016)	Loss 2.2632 (2.2367)	Entropy 0.79048 (0.79343)	Top-1 acc 69.141 (70.673)	Top-5 acc 87.500 (88.053)	lr 0.00241
Train [96][2840/3239]	Time 0.250 (0.628)	Data Time 0.001 (0.016)	Loss 2.2017 (2.2367)	Entropy 0.79043 (0.79342)	Top-1 acc 68.750 (70.669)	Top-5 acc 89.844 (88.054)	lr 0.00241
Train [96][2850/3239]	Time 0.222 (0.627)	Data Time 0.001 (0.016)	Loss 2.3164 (2.2367)	Entropy 0.79033 (0.79341)	Top-1 acc 68.750 (70.667)	Top-5 acc 86.328 (88.052)	lr 0.00241
Train [96][2860/3239]	Time 0.273 (0.626)	Data Time 0.001 (0.015)	Loss 2.3549 (2.2368)	Entropy 0.79028 (0.79340)	Top-1 acc 67.969 (70.666)	Top-5 acc 83.594 (88.050)	lr 0.00241
Train [96][2870/3239]	Time 0.241 (0.626)	Data Time 0.001 (0.015)	Loss 2.3342 (2.2368)	Entropy 0.79025 (0.79339)	Top-1 acc 67.578 (70.666)	Top-5 acc 83.203 (88.051)	lr 0.00241
Train [96][2880/3239]	Time 0.232 (0.625)	Data Time 0.001 (0.015)	Loss 2.2296 (2.2367)	Entropy 0.79025 (0.79338)	Top-1 acc 72.656 (70.667)	Top-5 acc 89.844 (88.053)	lr 0.00241
Train [96][2890/3239]	Time 0.250 (0.625)	Data Time 0.001 (0.015)	Loss 2.1737 (2.2367)	Entropy 0.79017 (0.79336)	Top-1 acc 73.047 (70.664)	Top-5 acc 87.500 (88.050)	lr 0.00241
Train [96][2900/3239]	Time 0.345 (0.624)	Data Time 0.001 (0.015)	Loss 2.3331 (2.2366)	Entropy 0.79019 (0.79335)	Top-1 acc 66.016 (70.668)	Top-5 acc 87.891 (88.054)	lr 0.00241
Train [96][2910/3239]	Time 0.257 (0.624)	Data Time 0.001 (0.015)	Loss 2.2845 (2.2366)	Entropy 0.79020 (0.79334)	Top-1 acc 71.094 (70.669)	Top-5 acc 87.109 (88.055)	lr 0.00241
Train [96][2920/3239]	Time 0.241 (0.623)	Data Time 0.001 (0.015)	Loss 2.2828 (2.2367)	Entropy 0.79020 (0.79333)	Top-1 acc 67.188 (70.666)	Top-5 acc 86.328 (88.055)	lr 0.00241
Train [96][2930/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.015)	Loss 2.2085 (2.2366)	Entropy 0.79016 (0.79332)	Top-1 acc 70.312 (70.671)	Top-5 acc 90.625 (88.057)	lr 0.00241
Train [96][2940/3239]	Time 0.271 (0.622)	Data Time 0.001 (0.015)	Loss 2.1325 (2.2366)	Entropy 0.79002 (0.79331)	Top-1 acc 74.609 (70.670)	Top-5 acc 89.062 (88.057)	lr 0.00241
Train [96][2950/3239]	Time 0.225 (0.622)	Data Time 0.001 (0.015)	Loss 2.3534 (2.2366)	Entropy 0.78994 (0.79330)	Top-1 acc 66.406 (70.672)	Top-5 acc 87.109 (88.059)	lr 0.00240
Train [96][2960/3239]	Time 0.256 (0.621)	Data Time 0.001 (0.015)	Loss 2.3231 (2.2367)	Entropy 0.78988 (0.79329)	Top-1 acc 66.406 (70.672)	Top-5 acc 89.453 (88.057)	lr 0.00240
Train [96][2970/3239]	Time 0.266 (0.621)	Data Time 0.001 (0.015)	Loss 2.2872 (2.2367)	Entropy 0.78985 (0.79328)	Top-1 acc 69.922 (70.668)	Top-5 acc 85.938 (88.055)	lr 0.00240
Train [96][2980/3239]	Time 0.218 (0.621)	Data Time 0.001 (0.015)	Loss 2.4362 (2.2368)	Entropy 0.78978 (0.79326)	Top-1 acc 68.359 (70.666)	Top-5 acc 83.203 (88.052)	lr 0.00240
Train [96][2990/3239]	Time 0.233 (0.620)	Data Time 0.001 (0.015)	Loss 2.2889 (2.2368)	Entropy 0.78977 (0.79325)	Top-1 acc 69.531 (70.669)	Top-5 acc 89.062 (88.052)	lr 0.00240
Train [96][3000/3239]	Time 0.225 (0.620)	Data Time 0.001 (0.015)	Loss 2.0994 (2.2367)	Entropy 0.78977 (0.79324)	Top-1 acc 75.391 (70.671)	Top-5 acc 89.453 (88.055)	lr 0.00240
Train [96][3010/3239]	Time 0.234 (0.619)	Data Time 0.002 (0.015)	Loss 2.1851 (2.2367)	Entropy 0.78970 (0.79323)	Top-1 acc 73.047 (70.675)	Top-5 acc 89.062 (88.055)	lr 0.00240
Train [96][3020/3239]	Time 0.228 (0.619)	Data Time 0.001 (0.015)	Loss 2.2348 (2.2369)	Entropy 0.78970 (0.79322)	Top-1 acc 72.266 (70.669)	Top-5 acc 87.891 (88.052)	lr 0.00240
Train [96][3030/3239]	Time 0.347 (0.618)	Data Time 0.001 (0.015)	Loss 2.2032 (2.2370)	Entropy 0.78965 (0.79321)	Top-1 acc 70.312 (70.667)	Top-5 acc 89.844 (88.051)	lr 0.00240
Train [96][3040/3239]	Time 0.343 (0.635)	Data Time 0.004 (0.015)	Loss 2.3286 (2.2371)	Entropy 0.78967 (0.79320)	Top-1 acc 68.359 (70.658)	Top-5 acc 87.500 (88.050)	lr 0.00240
Train [96][3050/3239]	Time 0.227 (0.634)	Data Time 0.002 (0.015)	Loss 2.1358 (2.2370)	Entropy 0.78968 (0.79318)	Top-1 acc 74.609 (70.662)	Top-5 acc 91.016 (88.054)	lr 0.00240
Train [96][3060/3239]	Time 0.289 (0.634)	Data Time 0.002 (0.015)	Loss 2.2075 (2.2368)	Entropy 0.78972 (0.79317)	Top-1 acc 71.875 (70.663)	Top-5 acc 87.500 (88.058)	lr 0.00240
Train [96][3070/3239]	Time 0.365 (0.633)	Data Time 0.002 (0.015)	Loss 2.1429 (2.2368)	Entropy 0.78978 (0.79316)	Top-1 acc 71.875 (70.665)	Top-5 acc 90.625 (88.056)	lr 0.00240
Train [96][3080/3239]	Time 0.239 (0.633)	Data Time 0.001 (0.015)	Loss 2.1532 (2.2369)	Entropy 0.78967 (0.79315)	Top-1 acc 74.609 (70.660)	Top-5 acc 89.844 (88.051)	lr 0.00240
Train [96][3090/3239]	Time 0.244 (0.632)	Data Time 0.001 (0.014)	Loss 2.1742 (2.2369)	Entropy 0.78959 (0.79314)	Top-1 acc 69.141 (70.663)	Top-5 acc 89.453 (88.052)	lr 0.00240
Train [96][3100/3239]	Time 0.274 (0.632)	Data Time 0.001 (0.014)	Loss 2.2844 (2.2369)	Entropy 0.78958 (0.79313)	Top-1 acc 70.312 (70.668)	Top-5 acc 87.500 (88.052)	lr 0.00240
Train [96][3110/3239]	Time 0.233 (0.631)	Data Time 0.001 (0.014)	Loss 2.3846 (2.2369)	Entropy 0.78953 (0.79312)	Top-1 acc 68.359 (70.664)	Top-5 acc 84.375 (88.052)	lr 0.00240
Train [96][3120/3239]	Time 0.240 (0.631)	Data Time 0.001 (0.014)	Loss 2.2833 (2.2370)	Entropy 0.78956 (0.79310)	Top-1 acc 69.141 (70.664)	Top-5 acc 87.891 (88.050)	lr 0.00239
Train [96][3130/3239]	Time 0.266 (0.631)	Data Time 0.001 (0.014)	Loss 2.4001 (2.2370)	Entropy 0.78954 (0.79309)	Top-1 acc 66.406 (70.663)	Top-5 acc 85.547 (88.051)	lr 0.00239
Train [96][3140/3239]	Time 0.254 (0.630)	Data Time 0.001 (0.014)	Loss 2.1790 (2.2370)	Entropy 0.78954 (0.79308)	Top-1 acc 74.219 (70.662)	Top-5 acc 89.062 (88.050)	lr 0.00239
Train [96][3150/3239]	Time 0.223 (0.630)	Data Time 0.001 (0.014)	Loss 2.1933 (2.2371)	Entropy 0.78944 (0.79307)	Top-1 acc 72.266 (70.658)	Top-5 acc 88.281 (88.048)	lr 0.00239
Train [96][3160/3239]	Time 0.220 (0.629)	Data Time 0.001 (0.014)	Loss 2.3644 (2.2371)	Entropy 0.78947 (0.79306)	Top-1 acc 67.188 (70.657)	Top-5 acc 86.719 (88.046)	lr 0.00239
Train [96][3170/3239]	Time 0.229 (0.629)	Data Time 0.001 (0.014)	Loss 2.2391 (2.2371)	Entropy 0.78944 (0.79305)	Top-1 acc 69.922 (70.658)	Top-5 acc 86.328 (88.048)	lr 0.00239
Train [96][3180/3239]	Time 0.224 (0.628)	Data Time 0.000 (0.014)	Loss 2.2264 (2.2372)	Entropy 0.78944 (0.79304)	Top-1 acc 73.438 (70.655)	Top-5 acc 89.062 (88.047)	lr 0.00239
Train [96][3190/3239]	Time 0.244 (0.628)	Data Time 0.000 (0.014)	Loss 2.2003 (2.2372)	Entropy 0.78930 (0.79302)	Top-1 acc 74.219 (70.653)	Top-5 acc 90.234 (88.051)	lr 0.00239
Train [96][3200/3239]	Time 0.327 (0.627)	Data Time 0.000 (0.014)	Loss 2.3043 (2.2373)	Entropy 0.78931 (0.79301)	Top-1 acc 67.969 (70.645)	Top-5 acc 87.891 (88.050)	lr 0.00239
Train [96][3210/3239]	Time 0.231 (0.627)	Data Time 0.000 (0.014)	Loss 2.0913 (2.2372)	Entropy 0.78939 (0.79300)	Top-1 acc 73.047 (70.646)	Top-5 acc 90.625 (88.053)	lr 0.00239
Train [96][3220/3239]	Time 0.236 (0.626)	Data Time 0.000 (0.014)	Loss 2.2449 (2.2371)	Entropy 0.78943 (0.79299)	Top-1 acc 71.875 (70.649)	Top-5 acc 87.891 (88.055)	lr 0.00239
Train [96][3230/3239]	Time 0.251 (0.626)	Data Time 0.000 (0.014)	Loss 2.1015 (2.2370)	Entropy 0.78937 (0.79298)	Top-1 acc 73.828 (70.653)	Top-5 acc 90.234 (88.057)	lr 0.00239
Train [96][3239/3239]	Time 2.366 (0.625)	Data Time 0.000 (0.014)	Loss 2.3096 (2.2372)	Entropy 0.78937 (0.79297)	Top-1 acc 71.605 (70.646)	Top-5 acc 87.654 (88.054)	lr 0.00239
==========Valid [96/120]	loss 1.234	top-1 acc 71.754 (71.754)	top-5 acc 89.469	Train top-1 70.646	top-5 88.054	Entropy 0.78937	Latency-None: 0.000ms	Flops: 546.53M
Train [97][0/3239]	Time 43.389 (43.389)	Data Time 40.007 (40.007)	Loss 2.2076 (2.2076)	Entropy 0.78940 (0.78940)	Top-1 acc 71.875 (71.875)	Top-5 acc 87.891 (87.891)	lr 0.00239
Train [97][10/3239]	Time 2.660 (4.460)	Data Time 0.001 (3.639)	Loss 2.2365 (2.2323)	Entropy 0.78940 (0.78940)	Top-1 acc 68.359 (71.129)	Top-5 acc 87.109 (87.997)	lr 0.00239
Train [97][20/3239]	Time 0.242 (2.454)	Data Time 0.001 (1.907)	Loss 2.2200 (2.2337)	Entropy 0.78934 (0.78937)	Top-1 acc 72.656 (71.150)	Top-5 acc 87.109 (87.909)	lr 0.00239
Train [97][30/3239]	Time 0.224 (1.820)	Data Time 0.001 (1.292)	Loss 2.1812 (2.2161)	Entropy 0.78931 (0.78935)	Top-1 acc 73.438 (71.623)	Top-5 acc 89.453 (88.155)	lr 0.00239
Train [97][40/3239]	Time 0.251 (1.500)	Data Time 0.001 (0.977)	Loss 2.1911 (2.2132)	Entropy 0.78928 (0.78934)	Top-1 acc 71.094 (71.618)	Top-5 acc 90.625 (88.300)	lr 0.00238
Train [97][50/3239]	Time 0.331 (1.301)	Data Time 0.002 (0.786)	Loss 2.3492 (2.2139)	Entropy 0.78929 (0.78933)	Top-1 acc 67.578 (71.653)	Top-5 acc 84.766 (88.189)	lr 0.00238
Train [97][60/3239]	Time 0.243 (1.166)	Data Time 0.001 (0.658)	Loss 2.1396 (2.2163)	Entropy 0.78921 (0.78931)	Top-1 acc 75.391 (71.619)	Top-5 acc 91.797 (88.192)	lr 0.00238
Train [97][70/3239]	Time 0.242 (1.070)	Data Time 0.001 (0.565)	Loss 2.1622 (2.2193)	Entropy 0.78924 (0.78930)	Top-1 acc 73.438 (71.468)	Top-5 acc 89.062 (88.116)	lr 0.00238
Train [97][80/3239]	Time 0.242 (0.998)	Data Time 0.002 (0.496)	Loss 2.2880 (2.2183)	Entropy 0.78922 (0.78929)	Top-1 acc 69.141 (71.311)	Top-5 acc 86.719 (88.127)	lr 0.00238
Train [97][90/3239]	Time 0.313 (0.942)	Data Time 0.001 (0.441)	Loss 2.1652 (2.2166)	Entropy 0.78924 (0.78928)	Top-1 acc 71.094 (71.257)	Top-5 acc 88.672 (88.165)	lr 0.00238
Train [97][100/3239]	Time 0.234 (0.897)	Data Time 0.001 (0.398)	Loss 2.2527 (2.2118)	Entropy 0.78925 (0.78928)	Top-1 acc 71.094 (71.357)	Top-5 acc 84.766 (88.204)	lr 0.00238
Train [97][110/3239]	Time 0.249 (0.859)	Data Time 0.001 (0.362)	Loss 2.2288 (2.2127)	Entropy 0.78922 (0.78928)	Top-1 acc 70.703 (71.305)	Top-5 acc 88.672 (88.239)	lr 0.00238
Train [97][120/3239]	Time 2.618 (0.827)	Data Time 0.002 (0.332)	Loss 2.3047 (2.2168)	Entropy 0.78922 (0.78927)	Top-1 acc 68.359 (71.184)	Top-5 acc 86.328 (88.236)	lr 0.00238
Train [97][130/3239]	Time 0.235 (0.783)	Data Time 0.001 (0.307)	Loss 2.3157 (2.2234)	Entropy 0.78915 (0.78926)	Top-1 acc 71.094 (70.998)	Top-5 acc 87.500 (88.162)	lr 0.00238
Train [97][140/3239]	Time 0.224 (0.762)	Data Time 0.001 (0.285)	Loss 2.3492 (2.2245)	Entropy 0.78918 (0.78926)	Top-1 acc 69.141 (71.036)	Top-5 acc 88.281 (88.170)	lr 0.00238
Train [97][150/3239]	Time 0.261 (1.096)	Data Time 0.003 (0.267)	Loss 2.0519 (2.2228)	Entropy 0.78908 (0.78925)	Top-1 acc 75.000 (71.065)	Top-5 acc 92.188 (88.232)	lr 0.00238
Train [97][160/3239]	Time 0.227 (1.059)	Data Time 0.002 (0.250)	Loss 2.1165 (2.2211)	Entropy 0.78909 (0.78924)	Top-1 acc 73.438 (71.065)	Top-5 acc 87.500 (88.269)	lr 0.00238
Train [97][170/3239]	Time 0.218 (1.025)	Data Time 0.001 (0.236)	Loss 2.1089 (2.2218)	Entropy 0.78908 (0.78923)	Top-1 acc 72.656 (71.053)	Top-5 acc 89.844 (88.238)	lr 0.00238
Train [97][180/3239]	Time 0.225 (0.994)	Data Time 0.001 (0.223)	Loss 2.2135 (2.2253)	Entropy 0.78903 (0.78922)	Top-1 acc 71.094 (70.941)	Top-5 acc 89.453 (88.163)	lr 0.00238
Train [97][190/3239]	Time 0.215 (0.968)	Data Time 0.001 (0.211)	Loss 2.3207 (2.2251)	Entropy 0.78894 (0.78921)	Top-1 acc 67.578 (70.928)	Top-5 acc 86.328 (88.112)	lr 0.00238
Train [97][200/3239]	Time 0.233 (0.943)	Data Time 0.001 (0.201)	Loss 2.1530 (2.2244)	Entropy 0.78895 (0.78919)	Top-1 acc 72.266 (70.956)	Top-5 acc 91.016 (88.165)	lr 0.00238
Train [97][210/3239]	Time 0.234 (0.922)	Data Time 0.001 (0.191)	Loss 2.3077 (2.2255)	Entropy 0.78896 (0.78918)	Top-1 acc 67.969 (70.907)	Top-5 acc 88.672 (88.161)	lr 0.00237
Train [97][220/3239]	Time 0.229 (0.902)	Data Time 0.001 (0.183)	Loss 2.4674 (2.2241)	Entropy 0.78893 (0.78917)	Top-1 acc 64.062 (70.959)	Top-5 acc 83.984 (88.207)	lr 0.00237
Train [97][230/3239]	Time 2.487 (0.883)	Data Time 0.001 (0.175)	Loss 2.2063 (2.2239)	Entropy 0.78893 (0.78916)	Top-1 acc 68.359 (70.982)	Top-5 acc 89.453 (88.210)	lr 0.00237
Train [97][240/3239]	Time 0.239 (0.857)	Data Time 0.002 (0.168)	Loss 2.4391 (2.2239)	Entropy 0.78895 (0.78915)	Top-1 acc 66.016 (70.980)	Top-5 acc 85.156 (88.239)	lr 0.00237
Train [97][250/3239]	Time 0.238 (0.842)	Data Time 0.001 (0.161)	Loss 2.2222 (2.2246)	Entropy 0.78893 (0.78914)	Top-1 acc 72.266 (70.980)	Top-5 acc 90.234 (88.217)	lr 0.00237
Train [97][260/3239]	Time 0.311 (0.829)	Data Time 0.001 (0.155)	Loss 2.0675 (2.2241)	Entropy 0.78886 (0.78913)	Top-1 acc 76.172 (71.016)	Top-5 acc 90.625 (88.215)	lr 0.00237
Train [97][270/3239]	Time 0.231 (0.815)	Data Time 0.002 (0.149)	Loss 2.1827 (2.2240)	Entropy 0.78897 (0.78913)	Top-1 acc 71.875 (71.051)	Top-5 acc 86.719 (88.209)	lr 0.00237
Train [97][280/3239]	Time 0.226 (0.803)	Data Time 0.001 (0.144)	Loss 2.2196 (2.2232)	Entropy 0.78889 (0.78912)	Top-1 acc 72.266 (71.059)	Top-5 acc 88.281 (88.233)	lr 0.00237
Train [97][290/3239]	Time 0.229 (0.792)	Data Time 0.001 (0.139)	Loss 2.1439 (2.2226)	Entropy 0.78891 (0.78911)	Top-1 acc 73.828 (71.075)	Top-5 acc 88.672 (88.233)	lr 0.00237
Train [97][300/3239]	Time 0.230 (0.781)	Data Time 0.001 (0.135)	Loss 2.2711 (2.2217)	Entropy 0.78873 (0.78910)	Top-1 acc 69.922 (71.131)	Top-5 acc 88.281 (88.249)	lr 0.00237
Train [97][310/3239]	Time 0.233 (0.772)	Data Time 0.001 (0.130)	Loss 2.1663 (2.2227)	Entropy 0.78878 (0.78909)	Top-1 acc 72.656 (71.114)	Top-5 acc 88.281 (88.234)	lr 0.00237
Train [97][320/3239]	Time 0.252 (0.763)	Data Time 0.001 (0.126)	Loss 2.2780 (2.2231)	Entropy 0.78876 (0.78908)	Top-1 acc 68.750 (71.093)	Top-5 acc 89.453 (88.241)	lr 0.00237
Train [97][330/3239]	Time 0.224 (0.755)	Data Time 0.001 (0.122)	Loss 2.1431 (2.2230)	Entropy 0.78877 (0.78907)	Top-1 acc 72.656 (71.113)	Top-5 acc 89.453 (88.228)	lr 0.00237
Train [97][340/3239]	Time 2.613 (0.747)	Data Time 0.001 (0.119)	Loss 2.4738 (2.2231)	Entropy 0.78877 (0.78906)	Top-1 acc 64.453 (71.106)	Top-5 acc 82.422 (88.227)	lr 0.00237
Train [97][350/3239]	Time 0.344 (0.733)	Data Time 0.002 (0.116)	Loss 2.2888 (2.2230)	Entropy 0.78869 (0.78905)	Top-1 acc 70.703 (71.126)	Top-5 acc 85.938 (88.222)	lr 0.00237
Train [97][360/3239]	Time 0.249 (0.726)	Data Time 0.001 (0.112)	Loss 2.1365 (2.2215)	Entropy 0.78867 (0.78904)	Top-1 acc 74.609 (71.159)	Top-5 acc 89.844 (88.244)	lr 0.00237
Train [97][370/3239]	Time 0.253 (0.719)	Data Time 0.002 (0.109)	Loss 2.0833 (2.2229)	Entropy 0.78869 (0.78903)	Top-1 acc 73.828 (71.108)	Top-5 acc 91.406 (88.230)	lr 0.00237
Train [97][380/3239]	Time 0.211 (0.713)	Data Time 0.001 (0.107)	Loss 2.3216 (2.2228)	Entropy 0.78865 (0.78902)	Top-1 acc 71.875 (71.111)	Top-5 acc 89.453 (88.234)	lr 0.00236
Train [97][390/3239]	Time 0.316 (0.707)	Data Time 0.001 (0.104)	Loss 2.1496 (2.2237)	Entropy 0.78860 (0.78901)	Top-1 acc 69.141 (71.091)	Top-5 acc 90.234 (88.215)	lr 0.00236
Train [97][400/3239]	Time 0.246 (0.701)	Data Time 0.001 (0.101)	Loss 2.3004 (2.2245)	Entropy 0.78858 (0.78900)	Top-1 acc 69.531 (71.071)	Top-5 acc 84.766 (88.195)	lr 0.00236
Train [97][410/3239]	Time 0.247 (0.695)	Data Time 0.002 (0.099)	Loss 2.3227 (2.2252)	Entropy 0.78865 (0.78899)	Top-1 acc 66.797 (71.042)	Top-5 acc 87.109 (88.168)	lr 0.00236
Train [97][420/3239]	Time 0.226 (0.690)	Data Time 0.001 (0.097)	Loss 2.1567 (2.2255)	Entropy 0.78860 (0.78898)	Top-1 acc 73.438 (71.057)	Top-5 acc 88.281 (88.162)	lr 0.00236
Train [97][430/3239]	Time 0.243 (0.685)	Data Time 0.002 (0.094)	Loss 2.1632 (2.2258)	Entropy 0.78853 (0.78897)	Top-1 acc 71.094 (71.057)	Top-5 acc 90.234 (88.146)	lr 0.00236
Train [97][440/3239]	Time 0.223 (0.681)	Data Time 0.001 (0.092)	Loss 2.1789 (2.2262)	Entropy 0.78854 (0.78896)	Top-1 acc 71.484 (71.039)	Top-5 acc 90.625 (88.127)	lr 0.00236
Train [97][450/3239]	Time 2.440 (0.676)	Data Time 0.002 (0.090)	Loss 2.0431 (2.2251)	Entropy 0.78854 (0.78895)	Top-1 acc 77.344 (71.097)	Top-5 acc 89.453 (88.144)	lr 0.00236
Train [97][460/3239]	Time 0.230 (0.667)	Data Time 0.001 (0.088)	Loss 2.4876 (2.2257)	Entropy 0.78849 (0.78894)	Top-1 acc 66.406 (71.069)	Top-5 acc 83.203 (88.131)	lr 0.00236
Train [97][470/3239]	Time 0.262 (0.663)	Data Time 0.001 (0.087)	Loss 2.4328 (2.2265)	Entropy 0.78847 (0.78893)	Top-1 acc 67.578 (71.055)	Top-5 acc 84.375 (88.111)	lr 0.00236
Train [97][480/3239]	Time 0.247 (0.659)	Data Time 0.001 (0.085)	Loss 2.2222 (2.2272)	Entropy 0.78847 (0.78892)	Top-1 acc 72.656 (71.045)	Top-5 acc 87.891 (88.103)	lr 0.00236
Train [97][490/3239]	Time 0.230 (0.656)	Data Time 0.001 (0.083)	Loss 2.1850 (2.2273)	Entropy 0.78826 (0.78891)	Top-1 acc 71.875 (71.048)	Top-5 acc 86.719 (88.102)	lr 0.00236
Train [97][500/3239]	Time 0.231 (0.652)	Data Time 0.001 (0.081)	Loss 2.1257 (2.2265)	Entropy 0.78825 (0.78890)	Top-1 acc 73.047 (71.049)	Top-5 acc 90.625 (88.129)	lr 0.00236
Train [97][510/3239]	Time 0.221 (0.758)	Data Time 0.002 (0.080)	Loss 2.3747 (2.2273)	Entropy 0.78812 (0.78889)	Top-1 acc 66.406 (71.024)	Top-5 acc 85.938 (88.121)	lr 0.00236
Train [97][520/3239]	Time 0.215 (0.752)	Data Time 0.002 (0.078)	Loss 2.2165 (2.2270)	Entropy 0.78813 (0.78887)	Top-1 acc 70.703 (71.029)	Top-5 acc 88.281 (88.126)	lr 0.00236
Train [97][530/3239]	Time 0.241 (0.747)	Data Time 0.002 (0.077)	Loss 2.1351 (2.2256)	Entropy 0.78807 (0.78886)	Top-1 acc 72.656 (71.050)	Top-5 acc 88.672 (88.141)	lr 0.00236
Train [97][540/3239]	Time 0.224 (0.743)	Data Time 0.001 (0.076)	Loss 2.1901 (2.2255)	Entropy 0.78794 (0.78884)	Top-1 acc 75.000 (71.059)	Top-5 acc 89.453 (88.150)	lr 0.00236
Train [97][550/3239]	Time 0.245 (0.738)	Data Time 0.001 (0.074)	Loss 2.2355 (2.2251)	Entropy 0.78794 (0.78882)	Top-1 acc 72.266 (71.062)	Top-5 acc 85.938 (88.149)	lr 0.00235
Train [97][560/3239]	Time 2.725 (0.733)	Data Time 0.001 (0.073)	Loss 2.3001 (2.2253)	Entropy 0.78794 (0.78881)	Top-1 acc 67.578 (71.058)	Top-5 acc 85.547 (88.143)	lr 0.00235
Train [97][570/3239]	Time 0.301 (0.725)	Data Time 0.002 (0.072)	Loss 2.3054 (2.2245)	Entropy 0.78796 (0.78879)	Top-1 acc 69.531 (71.068)	Top-5 acc 86.328 (88.159)	lr 0.00235
Train [97][580/3239]	Time 0.240 (0.721)	Data Time 0.001 (0.071)	Loss 2.1861 (2.2240)	Entropy 0.78781 (0.78878)	Top-1 acc 75.391 (71.107)	Top-5 acc 88.281 (88.164)	lr 0.00235
Train [97][590/3239]	Time 0.235 (0.717)	Data Time 0.001 (0.069)	Loss 2.0842 (2.2236)	Entropy 0.78777 (0.78876)	Top-1 acc 72.656 (71.114)	Top-5 acc 91.406 (88.164)	lr 0.00235
Train [97][600/3239]	Time 0.357 (0.713)	Data Time 0.001 (0.068)	Loss 2.1997 (2.2237)	Entropy 0.78779 (0.78874)	Top-1 acc 71.875 (71.100)	Top-5 acc 89.453 (88.176)	lr 0.00235
Train [97][610/3239]	Time 0.238 (0.710)	Data Time 0.001 (0.067)	Loss 2.2181 (2.2228)	Entropy 0.78780 (0.78873)	Top-1 acc 69.922 (71.123)	Top-5 acc 89.844 (88.195)	lr 0.00235
Train [97][620/3239]	Time 0.229 (0.706)	Data Time 0.001 (0.066)	Loss 2.3285 (2.2230)	Entropy 0.78775 (0.78871)	Top-1 acc 71.484 (71.138)	Top-5 acc 84.766 (88.180)	lr 0.00235
Train [97][630/3239]	Time 0.234 (0.702)	Data Time 0.001 (0.065)	Loss 2.2715 (2.2233)	Entropy 0.78769 (0.78870)	Top-1 acc 67.969 (71.125)	Top-5 acc 87.109 (88.169)	lr 0.00235
Train [97][640/3239]	Time 0.231 (0.699)	Data Time 0.001 (0.064)	Loss 2.2007 (2.2227)	Entropy 0.78765 (0.78868)	Top-1 acc 71.484 (71.147)	Top-5 acc 87.500 (88.176)	lr 0.00235
Train [97][650/3239]	Time 0.220 (0.696)	Data Time 0.001 (0.063)	Loss 2.1747 (2.2224)	Entropy 0.78759 (0.78867)	Top-1 acc 73.438 (71.147)	Top-5 acc 86.328 (88.182)	lr 0.00235
Train [97][660/3239]	Time 0.230 (0.692)	Data Time 0.001 (0.062)	Loss 2.2734 (2.2229)	Entropy 0.78755 (0.78865)	Top-1 acc 72.266 (71.133)	Top-5 acc 87.891 (88.177)	lr 0.00235
Train [97][670/3239]	Time 2.597 (0.689)	Data Time 0.002 (0.061)	Loss 2.2817 (2.2221)	Entropy 0.78755 (0.78863)	Top-1 acc 68.359 (71.144)	Top-5 acc 86.719 (88.195)	lr 0.00235
Train [97][680/3239]	Time 0.288 (0.683)	Data Time 0.001 (0.060)	Loss 2.0976 (2.2217)	Entropy 0.78753 (0.78862)	Top-1 acc 75.391 (71.156)	Top-5 acc 88.281 (88.200)	lr 0.00235
Train [97][690/3239]	Time 0.247 (0.680)	Data Time 0.001 (0.060)	Loss 2.3887 (2.2219)	Entropy 0.78756 (0.78860)	Top-1 acc 67.578 (71.155)	Top-5 acc 86.328 (88.195)	lr 0.00235
Train [97][700/3239]	Time 0.246 (0.677)	Data Time 0.001 (0.059)	Loss 2.1747 (2.2218)	Entropy 0.78758 (0.78859)	Top-1 acc 69.922 (71.168)	Top-5 acc 90.625 (88.196)	lr 0.00235
Train [97][710/3239]	Time 0.229 (0.674)	Data Time 0.001 (0.058)	Loss 2.1682 (2.2212)	Entropy 0.78757 (0.78857)	Top-1 acc 74.609 (71.190)	Top-5 acc 90.234 (88.205)	lr 0.00235
Train [97][720/3239]	Time 0.222 (0.672)	Data Time 0.001 (0.057)	Loss 2.1993 (2.2220)	Entropy 0.78755 (0.78856)	Top-1 acc 71.094 (71.175)	Top-5 acc 88.672 (88.188)	lr 0.00234
Train [97][730/3239]	Time 0.332 (0.669)	Data Time 0.001 (0.056)	Loss 2.1793 (2.2219)	Entropy 0.78746 (0.78854)	Top-1 acc 73.047 (71.172)	Top-5 acc 87.500 (88.185)	lr 0.00234
Train [97][740/3239]	Time 0.219 (0.666)	Data Time 0.001 (0.056)	Loss 2.2194 (2.2226)	Entropy 0.78740 (0.78853)	Top-1 acc 73.438 (71.168)	Top-5 acc 88.281 (88.175)	lr 0.00234
Train [97][750/3239]	Time 0.215 (0.664)	Data Time 0.001 (0.055)	Loss 2.3249 (2.2225)	Entropy 0.78735 (0.78851)	Top-1 acc 71.484 (71.155)	Top-5 acc 83.594 (88.180)	lr 0.00234
Train [97][760/3239]	Time 0.241 (0.661)	Data Time 0.001 (0.054)	Loss 2.3411 (2.2220)	Entropy 0.78742 (0.78850)	Top-1 acc 67.188 (71.158)	Top-5 acc 85.938 (88.189)	lr 0.00234
Train [97][770/3239]	Time 0.331 (0.659)	Data Time 0.001 (0.054)	Loss 2.2983 (2.2225)	Entropy 0.78734 (0.78849)	Top-1 acc 67.188 (71.145)	Top-5 acc 88.281 (88.177)	lr 0.00234
Train [97][780/3239]	Time 2.506 (0.657)	Data Time 0.001 (0.053)	Loss 2.1884 (2.2227)	Entropy 0.78734 (0.78847)	Top-1 acc 70.703 (71.135)	Top-5 acc 88.281 (88.169)	lr 0.00234
Train [97][790/3239]	Time 0.249 (0.652)	Data Time 0.001 (0.052)	Loss 2.1016 (2.2232)	Entropy 0.78730 (0.78846)	Top-1 acc 72.266 (71.118)	Top-5 acc 89.062 (88.162)	lr 0.00234
Train [97][800/3239]	Time 0.237 (0.650)	Data Time 0.001 (0.052)	Loss 2.2697 (2.2236)	Entropy 0.78731 (0.78844)	Top-1 acc 71.094 (71.108)	Top-5 acc 85.938 (88.152)	lr 0.00234
Train [97][810/3239]	Time 0.237 (0.648)	Data Time 0.001 (0.051)	Loss 2.4327 (2.2246)	Entropy 0.78729 (0.78843)	Top-1 acc 66.406 (71.077)	Top-5 acc 82.812 (88.124)	lr 0.00234
Train [97][820/3239]	Time 0.221 (0.645)	Data Time 0.001 (0.050)	Loss 2.2140 (2.2237)	Entropy 0.78727 (0.78841)	Top-1 acc 75.391 (71.109)	Top-5 acc 87.891 (88.142)	lr 0.00234
Train [97][830/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.050)	Loss 2.2959 (2.2240)	Entropy 0.78736 (0.78840)	Top-1 acc 69.141 (71.103)	Top-5 acc 85.938 (88.142)	lr 0.00234
Train [97][840/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.049)	Loss 2.1175 (2.2242)	Entropy 0.78739 (0.78839)	Top-1 acc 74.219 (71.100)	Top-5 acc 89.453 (88.141)	lr 0.00234
Train [97][850/3239]	Time 0.230 (0.640)	Data Time 0.001 (0.049)	Loss 2.1929 (2.2244)	Entropy 0.78729 (0.78838)	Top-1 acc 73.438 (71.085)	Top-5 acc 87.891 (88.141)	lr 0.00234
Train [97][860/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.048)	Loss 2.2860 (2.2244)	Entropy 0.78724 (0.78836)	Top-1 acc 67.969 (71.081)	Top-5 acc 85.547 (88.139)	lr 0.00234
Train [97][870/3239]	Time 0.242 (0.698)	Data Time 0.002 (0.048)	Loss 2.2786 (2.2243)	Entropy 0.78722 (0.78835)	Top-1 acc 71.094 (71.080)	Top-5 acc 86.328 (88.135)	lr 0.00234
Train [97][880/3239]	Time 0.228 (0.696)	Data Time 0.002 (0.047)	Loss 2.1877 (2.2244)	Entropy 0.78724 (0.78834)	Top-1 acc 75.000 (71.089)	Top-5 acc 90.625 (88.137)	lr 0.00234
Train [97][890/3239]	Time 2.604 (0.694)	Data Time 0.002 (0.047)	Loss 2.2887 (2.2246)	Entropy 0.78724 (0.78832)	Top-1 acc 68.359 (71.089)	Top-5 acc 86.719 (88.135)	lr 0.00233
Train [97][900/3239]	Time 0.330 (0.689)	Data Time 0.002 (0.046)	Loss 2.0814 (2.2242)	Entropy 0.78722 (0.78831)	Top-1 acc 75.781 (71.097)	Top-5 acc 89.844 (88.136)	lr 0.00233
Train [97][910/3239]	Time 0.230 (0.686)	Data Time 0.001 (0.046)	Loss 2.2478 (2.2240)	Entropy 0.78721 (0.78830)	Top-1 acc 66.406 (71.098)	Top-5 acc 87.891 (88.140)	lr 0.00233
Train [97][920/3239]	Time 0.236 (0.684)	Data Time 0.001 (0.045)	Loss 2.2045 (2.2239)	Entropy 0.78712 (0.78829)	Top-1 acc 70.312 (71.093)	Top-5 acc 90.234 (88.141)	lr 0.00233
Train [97][930/3239]	Time 0.281 (0.682)	Data Time 0.001 (0.045)	Loss 2.2168 (2.2241)	Entropy 0.78709 (0.78827)	Top-1 acc 68.359 (71.083)	Top-5 acc 87.500 (88.134)	lr 0.00233
Train [97][940/3239]	Time 0.335 (0.680)	Data Time 0.001 (0.044)	Loss 2.1070 (2.2241)	Entropy 0.78702 (0.78826)	Top-1 acc 73.047 (71.095)	Top-5 acc 91.797 (88.133)	lr 0.00233
Train [97][950/3239]	Time 0.233 (0.678)	Data Time 0.001 (0.044)	Loss 2.3301 (2.2242)	Entropy 0.78697 (0.78825)	Top-1 acc 68.359 (71.096)	Top-5 acc 83.984 (88.131)	lr 0.00233
Train [97][960/3239]	Time 0.250 (0.676)	Data Time 0.002 (0.043)	Loss 2.1101 (2.2247)	Entropy 0.78689 (0.78823)	Top-1 acc 71.484 (71.077)	Top-5 acc 90.234 (88.126)	lr 0.00233
Train [97][970/3239]	Time 0.238 (0.674)	Data Time 0.001 (0.043)	Loss 2.2621 (2.2246)	Entropy 0.78688 (0.78822)	Top-1 acc 71.484 (71.072)	Top-5 acc 86.328 (88.132)	lr 0.00233
Train [97][980/3239]	Time 0.246 (0.672)	Data Time 0.001 (0.042)	Loss 2.3494 (2.2248)	Entropy 0.78682 (0.78821)	Top-1 acc 68.359 (71.067)	Top-5 acc 85.156 (88.126)	lr 0.00233
Train [97][990/3239]	Time 0.227 (0.670)	Data Time 0.001 (0.042)	Loss 2.1642 (2.2247)	Entropy 0.78670 (0.78819)	Top-1 acc 70.703 (71.060)	Top-5 acc 89.453 (88.129)	lr 0.00233
Train [97][1000/3239]	Time 2.513 (0.668)	Data Time 0.001 (0.042)	Loss 2.3078 (2.2255)	Entropy 0.78670 (0.78818)	Top-1 acc 69.922 (71.051)	Top-5 acc 84.766 (88.108)	lr 0.00233
Train [97][1010/3239]	Time 0.240 (0.664)	Data Time 0.001 (0.041)	Loss 2.0973 (2.2257)	Entropy 0.78666 (0.78816)	Top-1 acc 73.047 (71.042)	Top-5 acc 90.625 (88.104)	lr 0.00233
Train [97][1020/3239]	Time 0.229 (0.662)	Data Time 0.001 (0.041)	Loss 2.2115 (2.2255)	Entropy 0.78667 (0.78815)	Top-1 acc 71.875 (71.042)	Top-5 acc 87.500 (88.109)	lr 0.00233
Train [97][1030/3239]	Time 0.231 (0.660)	Data Time 0.001 (0.040)	Loss 2.0925 (2.2248)	Entropy 0.78667 (0.78813)	Top-1 acc 73.047 (71.062)	Top-5 acc 91.406 (88.132)	lr 0.00233
Train [97][1040/3239]	Time 0.227 (0.658)	Data Time 0.001 (0.040)	Loss 2.2358 (2.2252)	Entropy 0.78654 (0.78812)	Top-1 acc 70.703 (71.057)	Top-5 acc 87.109 (88.124)	lr 0.00233
Train [97][1050/3239]	Time 0.231 (0.656)	Data Time 0.001 (0.040)	Loss 2.2212 (2.2249)	Entropy 0.78649 (0.78810)	Top-1 acc 73.438 (71.058)	Top-5 acc 87.500 (88.124)	lr 0.00233
Train [97][1060/3239]	Time 0.236 (0.655)	Data Time 0.001 (0.039)	Loss 2.2509 (2.2252)	Entropy 0.78640 (0.78809)	Top-1 acc 69.922 (71.048)	Top-5 acc 89.062 (88.120)	lr 0.00232
Train [97][1070/3239]	Time 0.221 (0.653)	Data Time 0.001 (0.039)	Loss 2.1979 (2.2256)	Entropy 0.78634 (0.78807)	Top-1 acc 69.922 (71.033)	Top-5 acc 88.672 (88.115)	lr 0.00232
Train [97][1080/3239]	Time 0.225 (0.652)	Data Time 0.001 (0.039)	Loss 2.3056 (2.2258)	Entropy 0.78631 (0.78806)	Top-1 acc 71.484 (71.032)	Top-5 acc 86.328 (88.118)	lr 0.00232
Train [97][1090/3239]	Time 0.230 (0.650)	Data Time 0.001 (0.038)	Loss 2.4636 (2.2261)	Entropy 0.78628 (0.78804)	Top-1 acc 67.578 (71.026)	Top-5 acc 85.156 (88.107)	lr 0.00232
Train [97][1100/3239]	Time 0.231 (0.648)	Data Time 0.001 (0.038)	Loss 2.1756 (2.2264)	Entropy 0.78624 (0.78802)	Top-1 acc 70.703 (71.013)	Top-5 acc 88.672 (88.103)	lr 0.00232
Train [97][1110/3239]	Time 2.669 (0.647)	Data Time 0.001 (0.038)	Loss 2.1588 (2.2261)	Entropy 0.78624 (0.78801)	Top-1 acc 72.266 (71.019)	Top-5 acc 89.844 (88.110)	lr 0.00232
Train [97][1120/3239]	Time 0.278 (0.643)	Data Time 0.002 (0.037)	Loss 2.1217 (2.2260)	Entropy 0.78626 (0.78799)	Top-1 acc 71.875 (71.010)	Top-5 acc 90.234 (88.117)	lr 0.00232
Train [97][1130/3239]	Time 0.229 (0.642)	Data Time 0.001 (0.037)	Loss 2.3228 (2.2263)	Entropy 0.78595 (0.78797)	Top-1 acc 65.234 (71.005)	Top-5 acc 89.844 (88.115)	lr 0.00232
Train [97][1140/3239]	Time 0.238 (0.641)	Data Time 0.001 (0.037)	Loss 2.2665 (2.2265)	Entropy 0.78591 (0.78796)	Top-1 acc 68.750 (70.992)	Top-5 acc 88.281 (88.117)	lr 0.00232
Train [97][1150/3239]	Time 0.344 (0.639)	Data Time 0.001 (0.036)	Loss 2.2072 (2.2271)	Entropy 0.78585 (0.78794)	Top-1 acc 73.047 (70.978)	Top-5 acc 89.844 (88.112)	lr 0.00232
Train [97][1160/3239]	Time 0.217 (0.638)	Data Time 0.001 (0.036)	Loss 2.1725 (2.2269)	Entropy 0.78587 (0.78792)	Top-1 acc 72.266 (70.977)	Top-5 acc 90.234 (88.118)	lr 0.00232
Train [97][1170/3239]	Time 0.239 (0.637)	Data Time 0.002 (0.036)	Loss 2.1008 (2.2265)	Entropy 0.78596 (0.78790)	Top-1 acc 75.391 (70.981)	Top-5 acc 91.406 (88.130)	lr 0.00232
Train [97][1180/3239]	Time 0.230 (0.635)	Data Time 0.002 (0.035)	Loss 2.3408 (2.2264)	Entropy 0.78596 (0.78789)	Top-1 acc 68.359 (70.985)	Top-5 acc 85.938 (88.136)	lr 0.00232
Train [97][1190/3239]	Time 0.230 (0.634)	Data Time 0.001 (0.035)	Loss 2.1605 (2.2262)	Entropy 0.78593 (0.78787)	Top-1 acc 69.141 (70.986)	Top-5 acc 89.844 (88.139)	lr 0.00232
Train [97][1200/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.035)	Loss 2.3034 (2.2264)	Entropy 0.78583 (0.78785)	Top-1 acc 69.922 (70.973)	Top-5 acc 88.281 (88.133)	lr 0.00232
Train [97][1210/3239]	Time 0.246 (0.631)	Data Time 0.001 (0.035)	Loss 2.3063 (2.2262)	Entropy 0.78578 (0.78784)	Top-1 acc 72.266 (70.979)	Top-5 acc 88.672 (88.138)	lr 0.00232
Train [97][1220/3239]	Time 2.434 (0.630)	Data Time 0.001 (0.034)	Loss 2.2699 (2.2262)	Entropy 0.78578 (0.78782)	Top-1 acc 71.094 (70.977)	Top-5 acc 87.109 (88.141)	lr 0.00232
Train [97][1230/3239]	Time 0.249 (0.627)	Data Time 0.001 (0.034)	Loss 2.2223 (2.2262)	Entropy 0.78553 (0.78780)	Top-1 acc 69.922 (70.971)	Top-5 acc 87.500 (88.139)	lr 0.00231
Train [97][1240/3239]	Time 0.360 (0.666)	Data Time 0.003 (0.034)	Loss 2.2198 (2.2262)	Entropy 0.78549 (0.78778)	Top-1 acc 70.312 (70.965)	Top-5 acc 89.844 (88.144)	lr 0.00231
Train [97][1250/3239]	Time 0.256 (0.665)	Data Time 0.002 (0.034)	Loss 2.3324 (2.2261)	Entropy 0.78543 (0.78777)	Top-1 acc 69.922 (70.965)	Top-5 acc 85.156 (88.146)	lr 0.00231
Train [97][1260/3239]	Time 0.231 (0.663)	Data Time 0.001 (0.033)	Loss 2.1866 (2.2262)	Entropy 0.78543 (0.78775)	Top-1 acc 70.703 (70.963)	Top-5 acc 89.844 (88.143)	lr 0.00231
Train [97][1270/3239]	Time 0.245 (0.662)	Data Time 0.002 (0.033)	Loss 2.2193 (2.2260)	Entropy 0.78540 (0.78773)	Top-1 acc 69.922 (70.964)	Top-5 acc 87.891 (88.141)	lr 0.00231
Train [97][1280/3239]	Time 0.336 (0.660)	Data Time 0.001 (0.033)	Loss 2.1454 (2.2258)	Entropy 0.78542 (0.78771)	Top-1 acc 74.609 (70.976)	Top-5 acc 88.672 (88.146)	lr 0.00231
Train [97][1290/3239]	Time 0.252 (0.659)	Data Time 0.002 (0.033)	Loss 2.2574 (2.2258)	Entropy 0.78538 (0.78769)	Top-1 acc 70.312 (70.976)	Top-5 acc 87.500 (88.147)	lr 0.00231
Train [97][1300/3239]	Time 0.243 (0.658)	Data Time 0.002 (0.032)	Loss 2.1034 (2.2260)	Entropy 0.78540 (0.78767)	Top-1 acc 75.000 (70.971)	Top-5 acc 89.844 (88.145)	lr 0.00231
Train [97][1310/3239]	Time 0.238 (0.656)	Data Time 0.002 (0.032)	Loss 2.2635 (2.2261)	Entropy 0.78531 (0.78766)	Top-1 acc 71.484 (70.971)	Top-5 acc 88.672 (88.140)	lr 0.00231
Train [97][1320/3239]	Time 0.253 (0.655)	Data Time 0.001 (0.032)	Loss 2.2218 (2.2262)	Entropy 0.78525 (0.78764)	Top-1 acc 70.703 (70.968)	Top-5 acc 89.062 (88.140)	lr 0.00231
Train [97][1330/3239]	Time 2.579 (0.654)	Data Time 0.001 (0.032)	Loss 2.2302 (2.2264)	Entropy 0.78525 (0.78762)	Top-1 acc 69.141 (70.960)	Top-5 acc 87.891 (88.136)	lr 0.00231
Train [97][1340/3239]	Time 0.236 (0.651)	Data Time 0.001 (0.032)	Loss 2.2350 (2.2262)	Entropy 0.78518 (0.78760)	Top-1 acc 66.797 (70.969)	Top-5 acc 88.281 (88.140)	lr 0.00231
Train [97][1350/3239]	Time 0.247 (0.649)	Data Time 0.002 (0.031)	Loss 2.4653 (2.2262)	Entropy 0.78508 (0.78758)	Top-1 acc 65.234 (70.965)	Top-5 acc 82.422 (88.138)	lr 0.00231
Train [97][1360/3239]	Time 0.236 (0.648)	Data Time 0.001 (0.031)	Loss 2.1756 (2.2260)	Entropy 0.78500 (0.78757)	Top-1 acc 73.438 (70.970)	Top-5 acc 91.016 (88.141)	lr 0.00231
Train [97][1370/3239]	Time 0.335 (0.647)	Data Time 0.001 (0.031)	Loss 2.3389 (2.2261)	Entropy 0.78512 (0.78755)	Top-1 acc 69.531 (70.964)	Top-5 acc 86.719 (88.142)	lr 0.00231
Train [97][1380/3239]	Time 0.234 (0.646)	Data Time 0.001 (0.031)	Loss 2.1348 (2.2260)	Entropy 0.78632 (0.78754)	Top-1 acc 72.266 (70.956)	Top-5 acc 91.406 (88.143)	lr 0.00231
Train [97][1390/3239]	Time 0.230 (0.644)	Data Time 0.001 (0.030)	Loss 2.2509 (2.2261)	Entropy 0.78628 (0.78753)	Top-1 acc 71.094 (70.957)	Top-5 acc 89.844 (88.143)	lr 0.00231
Train [97][1400/3239]	Time 0.259 (0.643)	Data Time 0.001 (0.030)	Loss 2.1988 (2.2262)	Entropy 0.78632 (0.78752)	Top-1 acc 69.922 (70.951)	Top-5 acc 90.625 (88.144)	lr 0.00230
Train [97][1410/3239]	Time 0.331 (0.642)	Data Time 0.001 (0.030)	Loss 2.3731 (2.2262)	Entropy 0.78631 (0.78751)	Top-1 acc 65.234 (70.943)	Top-5 acc 82.812 (88.144)	lr 0.00230
Train [97][1420/3239]	Time 0.236 (0.641)	Data Time 0.001 (0.030)	Loss 2.2987 (2.2263)	Entropy 0.78629 (0.78750)	Top-1 acc 70.703 (70.940)	Top-5 acc 85.938 (88.141)	lr 0.00230
Train [97][1430/3239]	Time 0.259 (0.640)	Data Time 0.001 (0.030)	Loss 2.3930 (2.2266)	Entropy 0.78626 (0.78749)	Top-1 acc 67.188 (70.932)	Top-5 acc 83.203 (88.136)	lr 0.00230
Train [97][1440/3239]	Time 2.575 (0.639)	Data Time 0.001 (0.029)	Loss 2.3083 (2.2269)	Entropy 0.78626 (0.78748)	Top-1 acc 70.703 (70.932)	Top-5 acc 86.328 (88.130)	lr 0.00230
Train [97][1450/3239]	Time 0.238 (0.636)	Data Time 0.001 (0.029)	Loss 2.1380 (2.2269)	Entropy 0.78626 (0.78748)	Top-1 acc 73.047 (70.934)	Top-5 acc 90.234 (88.129)	lr 0.00230
Train [97][1460/3239]	Time 0.237 (0.635)	Data Time 0.002 (0.029)	Loss 2.1576 (2.2268)	Entropy 0.78627 (0.78747)	Top-1 acc 71.484 (70.940)	Top-5 acc 90.625 (88.126)	lr 0.00230
Train [97][1470/3239]	Time 0.238 (0.634)	Data Time 0.002 (0.029)	Loss 2.2336 (2.2269)	Entropy 0.78626 (0.78746)	Top-1 acc 69.922 (70.939)	Top-5 acc 86.719 (88.123)	lr 0.00230
Train [97][1480/3239]	Time 0.246 (0.633)	Data Time 0.001 (0.029)	Loss 2.2603 (2.2268)	Entropy 0.78613 (0.78745)	Top-1 acc 68.750 (70.941)	Top-5 acc 86.719 (88.123)	lr 0.00230
Train [97][1490/3239]	Time 0.227 (0.632)	Data Time 0.001 (0.029)	Loss 2.3442 (2.2269)	Entropy 0.78607 (0.78744)	Top-1 acc 68.750 (70.937)	Top-5 acc 87.109 (88.123)	lr 0.00230
Train [97][1500/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.028)	Loss 2.3111 (2.2271)	Entropy 0.78601 (0.78743)	Top-1 acc 66.797 (70.935)	Top-5 acc 87.500 (88.120)	lr 0.00230
Train [97][1510/3239]	Time 0.224 (0.630)	Data Time 0.001 (0.028)	Loss 2.1104 (2.2271)	Entropy 0.78600 (0.78742)	Top-1 acc 75.000 (70.934)	Top-5 acc 90.234 (88.120)	lr 0.00230
Train [97][1520/3239]	Time 0.221 (0.629)	Data Time 0.001 (0.028)	Loss 2.1739 (2.2271)	Entropy 0.78602 (0.78741)	Top-1 acc 71.875 (70.928)	Top-5 acc 87.109 (88.121)	lr 0.00230
Train [97][1530/3239]	Time 0.238 (0.628)	Data Time 0.001 (0.028)	Loss 2.2112 (2.2273)	Entropy 0.78600 (0.78740)	Top-1 acc 72.266 (70.917)	Top-5 acc 88.672 (88.121)	lr 0.00230
Train [97][1540/3239]	Time 0.314 (0.627)	Data Time 0.001 (0.028)	Loss 2.3517 (2.2274)	Entropy 0.78600 (0.78739)	Top-1 acc 67.578 (70.915)	Top-5 acc 84.766 (88.120)	lr 0.00230
Train [97][1550/3239]	Time 2.533 (0.626)	Data Time 0.001 (0.027)	Loss 2.1350 (2.2273)	Entropy 0.78600 (0.78739)	Top-1 acc 72.656 (70.920)	Top-5 acc 91.016 (88.123)	lr 0.00230
Train [97][1560/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.027)	Loss 2.2826 (2.2274)	Entropy 0.78603 (0.78738)	Top-1 acc 69.531 (70.920)	Top-5 acc 87.109 (88.119)	lr 0.00230
Train [97][1570/3239]	Time 0.233 (0.623)	Data Time 0.001 (0.027)	Loss 2.2646 (2.2277)	Entropy 0.78604 (0.78737)	Top-1 acc 67.969 (70.909)	Top-5 acc 85.547 (88.112)	lr 0.00229
Train [97][1580/3239]	Time 0.334 (0.622)	Data Time 0.001 (0.027)	Loss 2.0986 (2.2277)	Entropy 0.78605 (0.78736)	Top-1 acc 74.609 (70.904)	Top-5 acc 90.234 (88.111)	lr 0.00229
Train [97][1590/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.027)	Loss 2.2746 (2.2277)	Entropy 0.78610 (0.78735)	Top-1 acc 66.797 (70.901)	Top-5 acc 88.672 (88.115)	lr 0.00229
Train [97][1600/3239]	Time 0.306 (0.652)	Data Time 0.003 (0.027)	Loss 2.0591 (2.2272)	Entropy 0.78609 (0.78734)	Top-1 acc 77.734 (70.916)	Top-5 acc 92.188 (88.123)	lr 0.00229
Train [97][1610/3239]	Time 0.235 (0.651)	Data Time 0.002 (0.027)	Loss 2.1086 (2.2273)	Entropy 0.78610 (0.78734)	Top-1 acc 74.219 (70.914)	Top-5 acc 89.062 (88.121)	lr 0.00229
Train [97][1620/3239]	Time 0.348 (0.650)	Data Time 0.002 (0.026)	Loss 2.2408 (2.2269)	Entropy 0.78608 (0.78733)	Top-1 acc 73.438 (70.925)	Top-5 acc 85.938 (88.127)	lr 0.00229
Train [97][1630/3239]	Time 0.229 (0.649)	Data Time 0.001 (0.026)	Loss 2.3217 (2.2273)	Entropy 0.78591 (0.78732)	Top-1 acc 69.922 (70.921)	Top-5 acc 86.328 (88.123)	lr 0.00229
Train [97][1640/3239]	Time 0.246 (0.648)	Data Time 0.001 (0.026)	Loss 2.3130 (2.2278)	Entropy 0.78593 (0.78731)	Top-1 acc 66.797 (70.913)	Top-5 acc 85.547 (88.113)	lr 0.00229
Train [97][1650/3239]	Time 0.232 (0.647)	Data Time 0.002 (0.026)	Loss 2.2780 (2.2277)	Entropy 0.78595 (0.78730)	Top-1 acc 69.141 (70.916)	Top-5 acc 86.328 (88.111)	lr 0.00229
Train [97][1660/3239]	Time 2.615 (0.646)	Data Time 0.001 (0.026)	Loss 2.1247 (2.2274)	Entropy 0.78595 (0.78730)	Top-1 acc 73.828 (70.925)	Top-5 acc 90.625 (88.116)	lr 0.00229
Train [97][1670/3239]	Time 0.228 (0.644)	Data Time 0.001 (0.026)	Loss 2.9447 (2.2280)	Entropy 0.78597 (0.78729)	Top-1 acc 55.859 (70.913)	Top-5 acc 76.953 (88.106)	lr 0.00229
Train [97][1680/3239]	Time 0.253 (0.643)	Data Time 0.002 (0.026)	Loss 2.1360 (2.2280)	Entropy 0.78584 (0.78728)	Top-1 acc 73.828 (70.917)	Top-5 acc 88.672 (88.106)	lr 0.00229
Train [97][1690/3239]	Time 0.238 (0.642)	Data Time 0.001 (0.025)	Loss 2.2534 (2.2284)	Entropy 0.78585 (0.78727)	Top-1 acc 73.438 (70.912)	Top-5 acc 86.719 (88.095)	lr 0.00229
Train [97][1700/3239]	Time 0.239 (0.641)	Data Time 0.001 (0.025)	Loss 2.2600 (2.2286)	Entropy 0.78583 (0.78726)	Top-1 acc 67.578 (70.902)	Top-5 acc 88.672 (88.090)	lr 0.00229
Train [97][1710/3239]	Time 0.217 (0.640)	Data Time 0.001 (0.025)	Loss 2.1699 (2.2287)	Entropy 0.78582 (0.78725)	Top-1 acc 75.391 (70.898)	Top-5 acc 87.500 (88.087)	lr 0.00229
Train [97][1720/3239]	Time 0.229 (0.639)	Data Time 0.001 (0.025)	Loss 2.2375 (2.2288)	Entropy 0.78577 (0.78725)	Top-1 acc 69.531 (70.900)	Top-5 acc 87.500 (88.083)	lr 0.00229
Train [97][1730/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.025)	Loss 2.0665 (2.2288)	Entropy 0.78571 (0.78724)	Top-1 acc 74.219 (70.899)	Top-5 acc 89.844 (88.083)	lr 0.00229
Train [97][1740/3239]	Time 0.229 (0.637)	Data Time 0.001 (0.025)	Loss 2.2241 (2.2286)	Entropy 0.78567 (0.78723)	Top-1 acc 69.531 (70.900)	Top-5 acc 89.844 (88.090)	lr 0.00228
Train [97][1750/3239]	Time 0.312 (0.636)	Data Time 0.001 (0.025)	Loss 2.3482 (2.2286)	Entropy 0.78557 (0.78722)	Top-1 acc 67.578 (70.897)	Top-5 acc 84.375 (88.087)	lr 0.00228
Train [97][1760/3239]	Time 0.256 (0.636)	Data Time 0.001 (0.024)	Loss 2.2477 (2.2288)	Entropy 0.78557 (0.78721)	Top-1 acc 71.875 (70.887)	Top-5 acc 87.109 (88.082)	lr 0.00228
Train [97][1770/3239]	Time 2.510 (0.635)	Data Time 0.002 (0.024)	Loss 2.2201 (2.2287)	Entropy 0.78557 (0.78720)	Top-1 acc 69.141 (70.891)	Top-5 acc 88.672 (88.084)	lr 0.00228
Train [97][1780/3239]	Time 0.219 (0.633)	Data Time 0.001 (0.024)	Loss 2.2328 (2.2286)	Entropy 0.78549 (0.78719)	Top-1 acc 69.141 (70.893)	Top-5 acc 89.062 (88.084)	lr 0.00228
Train [97][1790/3239]	Time 0.329 (0.632)	Data Time 0.001 (0.024)	Loss 2.1280 (2.2287)	Entropy 0.78545 (0.78718)	Top-1 acc 72.266 (70.889)	Top-5 acc 89.453 (88.083)	lr 0.00228
Train [97][1800/3239]	Time 0.233 (0.631)	Data Time 0.001 (0.024)	Loss 2.1495 (2.2285)	Entropy 0.78541 (0.78717)	Top-1 acc 72.656 (70.890)	Top-5 acc 88.672 (88.089)	lr 0.00228
Train [97][1810/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.024)	Loss 2.1472 (2.2285)	Entropy 0.78534 (0.78716)	Top-1 acc 75.781 (70.894)	Top-5 acc 90.234 (88.087)	lr 0.00228
Train [97][1820/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.024)	Loss 2.2990 (2.2286)	Entropy 0.78530 (0.78715)	Top-1 acc 69.141 (70.894)	Top-5 acc 85.938 (88.085)	lr 0.00228
Train [97][1830/3239]	Time 0.320 (0.628)	Data Time 0.001 (0.024)	Loss 2.1896 (2.2287)	Entropy 0.78527 (0.78714)	Top-1 acc 70.703 (70.889)	Top-5 acc 92.188 (88.086)	lr 0.00228
Train [97][1840/3239]	Time 0.240 (0.628)	Data Time 0.001 (0.023)	Loss 2.3605 (2.2288)	Entropy 0.78519 (0.78713)	Top-1 acc 69.141 (70.886)	Top-5 acc 84.375 (88.084)	lr 0.00228
Train [97][1850/3239]	Time 0.241 (0.627)	Data Time 0.001 (0.023)	Loss 2.2648 (2.2290)	Entropy 0.78515 (0.78712)	Top-1 acc 70.312 (70.882)	Top-5 acc 86.719 (88.081)	lr 0.00228
Train [97][1860/3239]	Time 0.246 (0.626)	Data Time 0.002 (0.023)	Loss 2.2367 (2.2289)	Entropy 0.78493 (0.78711)	Top-1 acc 70.312 (70.885)	Top-5 acc 86.328 (88.083)	lr 0.00228
Train [97][1870/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.023)	Loss 2.2219 (2.2291)	Entropy 0.78488 (0.78710)	Top-1 acc 69.922 (70.877)	Top-5 acc 87.109 (88.080)	lr 0.00228
Train [97][1880/3239]	Time 2.589 (0.624)	Data Time 0.001 (0.023)	Loss 2.1638 (2.2290)	Entropy 0.78488 (0.78709)	Top-1 acc 71.484 (70.875)	Top-5 acc 90.234 (88.080)	lr 0.00228
Train [97][1890/3239]	Time 0.221 (0.622)	Data Time 0.001 (0.023)	Loss 2.2242 (2.2290)	Entropy 0.78490 (0.78707)	Top-1 acc 71.094 (70.875)	Top-5 acc 89.453 (88.083)	lr 0.00228
Train [97][1900/3239]	Time 0.222 (0.622)	Data Time 0.001 (0.023)	Loss 2.1346 (2.2290)	Entropy 0.78483 (0.78706)	Top-1 acc 72.656 (70.874)	Top-5 acc 90.625 (88.086)	lr 0.00228
Train [97][1910/3239]	Time 0.224 (0.621)	Data Time 0.001 (0.023)	Loss 2.3446 (2.2290)	Entropy 0.78474 (0.78705)	Top-1 acc 68.359 (70.871)	Top-5 acc 85.938 (88.088)	lr 0.00228
Train [97][1920/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.022)	Loss 2.0657 (2.2289)	Entropy 0.78475 (0.78704)	Top-1 acc 73.828 (70.873)	Top-5 acc 92.188 (88.090)	lr 0.00227
Train [97][1930/3239]	Time 0.265 (0.619)	Data Time 0.001 (0.022)	Loss 2.3342 (2.2290)	Entropy 0.78467 (0.78703)	Top-1 acc 67.969 (70.867)	Top-5 acc 85.938 (88.088)	lr 0.00227
Train [97][1940/3239]	Time 0.249 (0.619)	Data Time 0.001 (0.022)	Loss 2.2851 (2.2291)	Entropy 0.78474 (0.78701)	Top-1 acc 69.141 (70.866)	Top-5 acc 85.938 (88.086)	lr 0.00227
Train [97][1950/3239]	Time 0.224 (0.618)	Data Time 0.001 (0.022)	Loss 2.3226 (2.2291)	Entropy 0.78473 (0.78700)	Top-1 acc 67.188 (70.862)	Top-5 acc 87.891 (88.089)	lr 0.00227
Train [97][1960/3239]	Time 0.270 (0.644)	Data Time 0.002 (0.022)	Loss 2.2772 (2.2292)	Entropy 0.78463 (0.78699)	Top-1 acc 69.141 (70.864)	Top-5 acc 85.547 (88.084)	lr 0.00227
Train [97][1970/3239]	Time 0.229 (0.643)	Data Time 0.002 (0.022)	Loss 2.3276 (2.2293)	Entropy 0.78460 (0.78698)	Top-1 acc 72.656 (70.860)	Top-5 acc 86.328 (88.082)	lr 0.00227
Train [97][1980/3239]	Time 0.238 (0.642)	Data Time 0.002 (0.022)	Loss 2.2920 (2.2293)	Entropy 0.78458 (0.78697)	Top-1 acc 66.016 (70.852)	Top-5 acc 87.109 (88.084)	lr 0.00227
Train [97][1990/3239]	Time 2.676 (0.642)	Data Time 0.002 (0.022)	Loss 2.1348 (2.2292)	Entropy 0.78458 (0.78695)	Top-1 acc 69.922 (70.850)	Top-5 acc 91.406 (88.087)	lr 0.00227
Train [97][2000/3239]	Time 0.318 (0.640)	Data Time 0.001 (0.022)	Loss 2.3197 (2.2292)	Entropy 0.78454 (0.78694)	Top-1 acc 66.016 (70.847)	Top-5 acc 86.328 (88.088)	lr 0.00227
Train [97][2010/3239]	Time 0.241 (0.639)	Data Time 0.002 (0.022)	Loss 2.3175 (2.2293)	Entropy 0.78455 (0.78693)	Top-1 acc 69.922 (70.846)	Top-5 acc 86.719 (88.087)	lr 0.00227
Train [97][2020/3239]	Time 0.225 (0.638)	Data Time 0.001 (0.021)	Loss 2.3747 (2.2294)	Entropy 0.78455 (0.78692)	Top-1 acc 67.578 (70.847)	Top-5 acc 84.375 (88.084)	lr 0.00227
Train [97][2030/3239]	Time 0.236 (0.637)	Data Time 0.001 (0.021)	Loss 2.2815 (2.2294)	Entropy 0.78454 (0.78691)	Top-1 acc 70.703 (70.847)	Top-5 acc 87.109 (88.085)	lr 0.00227
Train [97][2040/3239]	Time 0.330 (0.637)	Data Time 0.002 (0.021)	Loss 2.2968 (2.2294)	Entropy 0.78457 (0.78690)	Top-1 acc 70.312 (70.846)	Top-5 acc 87.109 (88.085)	lr 0.00227
Train [97][2050/3239]	Time 0.227 (0.636)	Data Time 0.001 (0.021)	Loss 2.2012 (2.2294)	Entropy 0.78450 (0.78688)	Top-1 acc 73.438 (70.854)	Top-5 acc 88.281 (88.084)	lr 0.00227
Train [97][2060/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.021)	Loss 2.1607 (2.2294)	Entropy 0.78440 (0.78687)	Top-1 acc 75.000 (70.849)	Top-5 acc 87.891 (88.082)	lr 0.00227
Train [97][2070/3239]	Time 0.244 (0.634)	Data Time 0.001 (0.021)	Loss 2.0500 (2.2295)	Entropy 0.78441 (0.78686)	Top-1 acc 75.781 (70.849)	Top-5 acc 91.797 (88.082)	lr 0.00227
Train [97][2080/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.021)	Loss 2.0801 (2.2294)	Entropy 0.78443 (0.78685)	Top-1 acc 75.391 (70.850)	Top-5 acc 91.406 (88.084)	lr 0.00227
Train [97][2090/3239]	Time 0.229 (0.633)	Data Time 0.002 (0.021)	Loss 2.2543 (2.2292)	Entropy 0.78440 (0.78684)	Top-1 acc 68.359 (70.857)	Top-5 acc 89.062 (88.089)	lr 0.00226
Train [97][2100/3239]	Time 2.536 (0.632)	Data Time 0.001 (0.021)	Loss 2.1671 (2.2291)	Entropy 0.78440 (0.78683)	Top-1 acc 71.484 (70.855)	Top-5 acc 89.453 (88.088)	lr 0.00226
Train [97][2110/3239]	Time 0.285 (0.630)	Data Time 0.001 (0.021)	Loss 2.3723 (2.2290)	Entropy 0.78443 (0.78681)	Top-1 acc 66.406 (70.854)	Top-5 acc 87.109 (88.093)	lr 0.00226
Train [97][2120/3239]	Time 0.241 (0.630)	Data Time 0.001 (0.021)	Loss 2.3276 (2.2289)	Entropy 0.78443 (0.78680)	Top-1 acc 69.531 (70.857)	Top-5 acc 86.719 (88.092)	lr 0.00226
Train [97][2130/3239]	Time 0.236 (0.629)	Data Time 0.001 (0.020)	Loss 2.3002 (2.2291)	Entropy 0.78442 (0.78679)	Top-1 acc 69.922 (70.855)	Top-5 acc 85.547 (88.087)	lr 0.00226
Train [97][2140/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.020)	Loss 2.2031 (2.2293)	Entropy 0.78441 (0.78678)	Top-1 acc 71.875 (70.851)	Top-5 acc 88.281 (88.085)	lr 0.00226
Train [97][2150/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.020)	Loss 2.1880 (2.2294)	Entropy 0.78442 (0.78677)	Top-1 acc 75.000 (70.848)	Top-5 acc 91.406 (88.087)	lr 0.00226
Train [97][2160/3239]	Time 0.249 (0.627)	Data Time 0.001 (0.020)	Loss 2.2173 (2.2293)	Entropy 0.78442 (0.78676)	Top-1 acc 70.703 (70.847)	Top-5 acc 89.844 (88.090)	lr 0.00226
Train [97][2170/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.020)	Loss 2.0611 (2.2290)	Entropy 0.78425 (0.78675)	Top-1 acc 76.562 (70.856)	Top-5 acc 89.844 (88.095)	lr 0.00226
Train [97][2180/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.020)	Loss 2.2698 (2.2290)	Entropy 0.78422 (0.78674)	Top-1 acc 70.703 (70.859)	Top-5 acc 88.281 (88.098)	lr 0.00226
Train [97][2190/3239]	Time 0.221 (0.625)	Data Time 0.001 (0.020)	Loss 2.2197 (2.2290)	Entropy 0.78419 (0.78672)	Top-1 acc 76.562 (70.859)	Top-5 acc 88.672 (88.101)	lr 0.00226
Train [97][2200/3239]	Time 0.226 (0.624)	Data Time 0.001 (0.020)	Loss 2.0395 (2.2291)	Entropy 0.78420 (0.78671)	Top-1 acc 74.609 (70.858)	Top-5 acc 90.234 (88.099)	lr 0.00226
Train [97][2210/3239]	Time 2.671 (0.623)	Data Time 0.001 (0.020)	Loss 2.2274 (2.2293)	Entropy 0.78420 (0.78670)	Top-1 acc 70.312 (70.855)	Top-5 acc 87.891 (88.096)	lr 0.00226
Train [97][2220/3239]	Time 0.256 (0.622)	Data Time 0.001 (0.020)	Loss 2.2538 (2.2292)	Entropy 0.78414 (0.78669)	Top-1 acc 72.656 (70.860)	Top-5 acc 87.109 (88.098)	lr 0.00226
Train [97][2230/3239]	Time 0.231 (0.621)	Data Time 0.002 (0.020)	Loss 2.1914 (2.2291)	Entropy 0.78419 (0.78668)	Top-1 acc 70.703 (70.864)	Top-5 acc 87.891 (88.101)	lr 0.00226
Train [97][2240/3239]	Time 0.224 (0.620)	Data Time 0.001 (0.020)	Loss 2.2150 (2.2291)	Entropy 0.78417 (0.78667)	Top-1 acc 71.484 (70.859)	Top-5 acc 86.328 (88.099)	lr 0.00226
Train [97][2250/3239]	Time 0.259 (0.620)	Data Time 0.001 (0.019)	Loss 2.2848 (2.2291)	Entropy 0.78418 (0.78666)	Top-1 acc 69.531 (70.859)	Top-5 acc 84.766 (88.097)	lr 0.00226
Train [97][2260/3239]	Time 0.209 (0.619)	Data Time 0.001 (0.019)	Loss 2.3801 (2.2291)	Entropy 0.78418 (0.78665)	Top-1 acc 67.969 (70.856)	Top-5 acc 85.938 (88.096)	lr 0.00225
Train [97][2270/3239]	Time 0.230 (0.618)	Data Time 0.001 (0.019)	Loss 2.1556 (2.2291)	Entropy 0.78412 (0.78664)	Top-1 acc 72.656 (70.859)	Top-5 acc 89.062 (88.096)	lr 0.00225
Train [97][2280/3239]	Time 0.234 (0.618)	Data Time 0.001 (0.019)	Loss 2.2520 (2.2292)	Entropy 0.78412 (0.78662)	Top-1 acc 69.531 (70.860)	Top-5 acc 86.328 (88.095)	lr 0.00225
Train [97][2290/3239]	Time 0.232 (0.617)	Data Time 0.001 (0.019)	Loss 2.3189 (2.2293)	Entropy 0.78406 (0.78661)	Top-1 acc 65.234 (70.852)	Top-5 acc 88.281 (88.094)	lr 0.00225
Train [97][2300/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.019)	Loss 2.0762 (2.2290)	Entropy 0.78395 (0.78660)	Top-1 acc 75.391 (70.862)	Top-5 acc 88.672 (88.094)	lr 0.00225
Train [97][2310/3239]	Time 0.219 (0.616)	Data Time 0.001 (0.019)	Loss 2.3329 (2.2291)	Entropy 0.78397 (0.78659)	Top-1 acc 67.188 (70.858)	Top-5 acc 88.281 (88.092)	lr 0.00225
Train [97][2320/3239]	Time 56.405 (0.639)	Data Time 0.001 (0.019)	Loss 2.2201 (2.2291)	Entropy 0.78397 (0.78658)	Top-1 acc 72.266 (70.862)	Top-5 acc 87.109 (88.093)	lr 0.00225
Train [97][2330/3239]	Time 0.234 (0.637)	Data Time 0.002 (0.019)	Loss 2.2104 (2.2290)	Entropy 0.78395 (0.78657)	Top-1 acc 69.922 (70.861)	Top-5 acc 87.891 (88.097)	lr 0.00225
Train [97][2340/3239]	Time 0.226 (0.636)	Data Time 0.002 (0.019)	Loss 2.2212 (2.2291)	Entropy 0.78399 (0.78656)	Top-1 acc 68.750 (70.857)	Top-5 acc 87.109 (88.099)	lr 0.00225
Train [97][2350/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.019)	Loss 2.2345 (2.2290)	Entropy 0.78401 (0.78655)	Top-1 acc 70.312 (70.860)	Top-5 acc 87.891 (88.098)	lr 0.00225
Train [97][2360/3239]	Time 0.208 (0.635)	Data Time 0.001 (0.019)	Loss 2.2465 (2.2290)	Entropy 0.78401 (0.78654)	Top-1 acc 71.484 (70.857)	Top-5 acc 89.062 (88.097)	lr 0.00225
Train [97][2370/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.019)	Loss 2.1861 (2.2289)	Entropy 0.78396 (0.78652)	Top-1 acc 73.047 (70.859)	Top-5 acc 89.844 (88.100)	lr 0.00225
Train [97][2380/3239]	Time 0.326 (0.634)	Data Time 0.001 (0.018)	Loss 2.4052 (2.2292)	Entropy 0.78393 (0.78651)	Top-1 acc 67.188 (70.853)	Top-5 acc 86.719 (88.099)	lr 0.00225
Train [97][2390/3239]	Time 0.227 (0.633)	Data Time 0.001 (0.018)	Loss 2.0959 (2.2293)	Entropy 0.78389 (0.78650)	Top-1 acc 73.438 (70.847)	Top-5 acc 90.625 (88.095)	lr 0.00225
Train [97][2400/3239]	Time 0.233 (0.632)	Data Time 0.001 (0.018)	Loss 2.4061 (2.2295)	Entropy 0.78391 (0.78649)	Top-1 acc 64.844 (70.844)	Top-5 acc 82.422 (88.094)	lr 0.00225
Train [97][2410/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.018)	Loss 2.1173 (2.2295)	Entropy 0.78391 (0.78648)	Top-1 acc 73.828 (70.842)	Top-5 acc 90.234 (88.091)	lr 0.00225
Train [97][2420/3239]	Time 0.360 (0.631)	Data Time 0.001 (0.018)	Loss 2.3652 (2.2297)	Entropy 0.78388 (0.78647)	Top-1 acc 68.750 (70.837)	Top-5 acc 84.766 (88.089)	lr 0.00225
Train [97][2430/3239]	Time 2.574 (0.631)	Data Time 0.001 (0.018)	Loss 2.2993 (2.2298)	Entropy 0.78388 (0.78646)	Top-1 acc 70.703 (70.839)	Top-5 acc 86.719 (88.087)	lr 0.00224
Train [97][2440/3239]	Time 0.236 (0.629)	Data Time 0.001 (0.018)	Loss 2.2075 (2.2299)	Entropy 0.78375 (0.78645)	Top-1 acc 69.531 (70.836)	Top-5 acc 89.062 (88.088)	lr 0.00224
Train [97][2450/3239]	Time 0.251 (0.628)	Data Time 0.001 (0.018)	Loss 2.2698 (2.2300)	Entropy 0.78362 (0.78644)	Top-1 acc 69.922 (70.830)	Top-5 acc 87.109 (88.084)	lr 0.00224
Train [97][2460/3239]	Time 0.297 (0.628)	Data Time 0.002 (0.018)	Loss 2.2248 (2.2299)	Entropy 0.78367 (0.78643)	Top-1 acc 72.656 (70.834)	Top-5 acc 85.938 (88.083)	lr 0.00224
Train [97][2470/3239]	Time 0.253 (0.627)	Data Time 0.001 (0.018)	Loss 2.2287 (2.2300)	Entropy 0.78370 (0.78641)	Top-1 acc 72.266 (70.834)	Top-5 acc 88.281 (88.082)	lr 0.00224
Train [97][2480/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.018)	Loss 2.2634 (2.2301)	Entropy 0.78371 (0.78640)	Top-1 acc 69.922 (70.828)	Top-5 acc 87.500 (88.082)	lr 0.00224
Train [97][2490/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.018)	Loss 2.2395 (2.2301)	Entropy 0.78367 (0.78639)	Top-1 acc 66.797 (70.825)	Top-5 acc 88.672 (88.081)	lr 0.00224
Train [97][2500/3239]	Time 0.239 (0.626)	Data Time 0.001 (0.018)	Loss 2.1258 (2.2301)	Entropy 0.78360 (0.78638)	Top-1 acc 71.484 (70.824)	Top-5 acc 91.797 (88.081)	lr 0.00224
Train [97][2510/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.018)	Loss 2.2322 (2.2301)	Entropy 0.78362 (0.78637)	Top-1 acc 69.531 (70.820)	Top-5 acc 87.109 (88.082)	lr 0.00224
Train [97][2520/3239]	Time 0.227 (0.625)	Data Time 0.001 (0.018)	Loss 2.1226 (2.2302)	Entropy 0.78364 (0.78636)	Top-1 acc 74.609 (70.815)	Top-5 acc 89.453 (88.081)	lr 0.00224
Train [97][2530/3239]	Time 0.276 (0.624)	Data Time 0.001 (0.017)	Loss 2.1366 (2.2299)	Entropy 0.78368 (0.78635)	Top-1 acc 74.609 (70.821)	Top-5 acc 88.672 (88.085)	lr 0.00224
Train [97][2540/3239]	Time 2.465 (0.624)	Data Time 0.001 (0.017)	Loss 2.4255 (2.2298)	Entropy 0.78368 (0.78634)	Top-1 acc 64.844 (70.821)	Top-5 acc 87.109 (88.087)	lr 0.00224
Train [97][2550/3239]	Time 0.329 (0.622)	Data Time 0.001 (0.017)	Loss 2.1956 (2.2298)	Entropy 0.78369 (0.78633)	Top-1 acc 73.047 (70.820)	Top-5 acc 88.281 (88.086)	lr 0.00224
Train [97][2560/3239]	Time 0.231 (0.622)	Data Time 0.002 (0.017)	Loss 2.2986 (2.2297)	Entropy 0.78374 (0.78632)	Top-1 acc 69.141 (70.828)	Top-5 acc 87.109 (88.089)	lr 0.00224
Train [97][2570/3239]	Time 0.242 (0.621)	Data Time 0.001 (0.017)	Loss 2.2537 (2.2296)	Entropy 0.78376 (0.78631)	Top-1 acc 71.875 (70.829)	Top-5 acc 87.500 (88.089)	lr 0.00224
Train [97][2580/3239]	Time 0.234 (0.621)	Data Time 0.002 (0.017)	Loss 2.2185 (2.2296)	Entropy 0.78379 (0.78630)	Top-1 acc 71.484 (70.829)	Top-5 acc 87.500 (88.090)	lr 0.00224
Train [97][2590/3239]	Time 0.348 (0.620)	Data Time 0.001 (0.017)	Loss 2.1578 (2.2295)	Entropy 0.78380 (0.78629)	Top-1 acc 73.047 (70.829)	Top-5 acc 88.672 (88.092)	lr 0.00224
Train [97][2600/3239]	Time 0.246 (0.620)	Data Time 0.001 (0.017)	Loss 2.2904 (2.2295)	Entropy 0.78366 (0.78628)	Top-1 acc 71.484 (70.829)	Top-5 acc 87.891 (88.091)	lr 0.00224
Train [97][2610/3239]	Time 0.238 (0.619)	Data Time 0.001 (0.017)	Loss 2.6027 (2.2298)	Entropy 0.78365 (0.78627)	Top-1 acc 66.016 (70.823)	Top-5 acc 82.812 (88.085)	lr 0.00223
Train [97][2620/3239]	Time 0.250 (0.619)	Data Time 0.001 (0.017)	Loss 2.2059 (2.2297)	Entropy 0.78358 (0.78626)	Top-1 acc 71.875 (70.825)	Top-5 acc 89.062 (88.088)	lr 0.00223
Train [97][2630/3239]	Time 0.342 (0.618)	Data Time 0.001 (0.017)	Loss 2.4298 (2.2297)	Entropy 0.78350 (0.78625)	Top-1 acc 67.188 (70.829)	Top-5 acc 85.156 (88.089)	lr 0.00223
Train [97][2640/3239]	Time 0.236 (0.618)	Data Time 0.001 (0.017)	Loss 2.1961 (2.2298)	Entropy 0.78347 (0.78624)	Top-1 acc 71.875 (70.828)	Top-5 acc 87.500 (88.086)	lr 0.00223
Train [97][2650/3239]	Time 0.237 (0.617)	Data Time 0.002 (0.017)	Loss 2.2767 (2.2298)	Entropy 0.78340 (0.78623)	Top-1 acc 72.266 (70.829)	Top-5 acc 85.938 (88.086)	lr 0.00223
Train [97][2660/3239]	Time 0.234 (0.617)	Data Time 0.001 (0.017)	Loss 2.2407 (2.2300)	Entropy 0.78335 (0.78622)	Top-1 acc 71.875 (70.826)	Top-5 acc 90.234 (88.082)	lr 0.00223
Train [97][2670/3239]	Time 0.262 (0.616)	Data Time 0.001 (0.017)	Loss 2.0866 (2.2299)	Entropy 0.78336 (0.78621)	Top-1 acc 74.219 (70.824)	Top-5 acc 90.234 (88.083)	lr 0.00223
Train [97][2680/3239]	Time 0.280 (0.634)	Data Time 0.004 (0.017)	Loss 2.1924 (2.2299)	Entropy 0.78341 (0.78620)	Top-1 acc 71.875 (70.826)	Top-5 acc 88.281 (88.084)	lr 0.00223
Train [97][2690/3239]	Time 0.240 (0.634)	Data Time 0.002 (0.017)	Loss 2.3729 (2.2299)	Entropy 0.78341 (0.78619)	Top-1 acc 67.969 (70.826)	Top-5 acc 85.547 (88.084)	lr 0.00223
Train [97][2700/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.016)	Loss 2.0892 (2.2302)	Entropy 0.78340 (0.78618)	Top-1 acc 72.656 (70.818)	Top-5 acc 91.406 (88.079)	lr 0.00223
Train [97][2710/3239]	Time 0.264 (0.633)	Data Time 0.002 (0.016)	Loss 2.3129 (2.2303)	Entropy 0.78341 (0.78616)	Top-1 acc 72.656 (70.815)	Top-5 acc 88.281 (88.075)	lr 0.00223
Train [97][2720/3239]	Time 0.230 (0.632)	Data Time 0.001 (0.016)	Loss 2.3695 (2.2305)	Entropy 0.78335 (0.78615)	Top-1 acc 68.359 (70.813)	Top-5 acc 85.156 (88.073)	lr 0.00223
Train [97][2730/3239]	Time 0.232 (0.632)	Data Time 0.001 (0.016)	Loss 2.1111 (2.2305)	Entropy 0.78335 (0.78614)	Top-1 acc 73.438 (70.812)	Top-5 acc 92.578 (88.072)	lr 0.00223
Train [97][2740/3239]	Time 0.240 (0.631)	Data Time 0.001 (0.016)	Loss 2.0960 (2.2304)	Entropy 0.78335 (0.78613)	Top-1 acc 73.438 (70.817)	Top-5 acc 89.844 (88.075)	lr 0.00223
Train [97][2750/3239]	Time 0.221 (0.630)	Data Time 0.001 (0.016)	Loss 2.1182 (2.2303)	Entropy 0.78334 (0.78612)	Top-1 acc 71.875 (70.817)	Top-5 acc 88.281 (88.078)	lr 0.00223
Train [97][2760/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.016)	Loss 2.3510 (2.2304)	Entropy 0.78334 (0.78611)	Top-1 acc 65.625 (70.814)	Top-5 acc 86.719 (88.074)	lr 0.00223
Train [97][2770/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.016)	Loss 2.0694 (2.2304)	Entropy 0.78334 (0.78610)	Top-1 acc 74.609 (70.812)	Top-5 acc 90.234 (88.076)	lr 0.00223
Train [97][2780/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.016)	Loss 2.4781 (2.2307)	Entropy 0.78356 (0.78609)	Top-1 acc 65.625 (70.799)	Top-5 acc 83.984 (88.073)	lr 0.00222
Train [97][2790/3239]	Time 0.221 (0.628)	Data Time 0.001 (0.016)	Loss 2.2051 (2.2308)	Entropy 0.78365 (0.78609)	Top-1 acc 69.531 (70.795)	Top-5 acc 87.891 (88.073)	lr 0.00222
Train [97][2800/3239]	Time 0.222 (0.628)	Data Time 0.001 (0.016)	Loss 2.1691 (2.2308)	Entropy 0.78365 (0.78608)	Top-1 acc 71.484 (70.793)	Top-5 acc 89.453 (88.073)	lr 0.00222
Train [97][2810/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.016)	Loss 2.2436 (2.2310)	Entropy 0.78373 (0.78607)	Top-1 acc 68.359 (70.784)	Top-5 acc 88.672 (88.070)	lr 0.00222
Train [97][2820/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.016)	Loss 2.2567 (2.2310)	Entropy 0.78374 (0.78606)	Top-1 acc 69.531 (70.780)	Top-5 acc 87.891 (88.072)	lr 0.00222
Train [97][2830/3239]	Time 0.247 (0.626)	Data Time 0.001 (0.016)	Loss 2.3733 (2.2310)	Entropy 0.78375 (0.78605)	Top-1 acc 68.359 (70.783)	Top-5 acc 83.594 (88.070)	lr 0.00222
Train [97][2840/3239]	Time 0.239 (0.626)	Data Time 0.001 (0.016)	Loss 2.2322 (2.2311)	Entropy 0.78374 (0.78604)	Top-1 acc 71.094 (70.785)	Top-5 acc 87.891 (88.068)	lr 0.00222
Train [97][2850/3239]	Time 0.228 (0.625)	Data Time 0.001 (0.016)	Loss 2.2779 (2.2313)	Entropy 0.78369 (0.78604)	Top-1 acc 69.922 (70.778)	Top-5 acc 86.328 (88.061)	lr 0.00222
Train [97][2860/3239]	Time 0.233 (0.625)	Data Time 0.001 (0.016)	Loss 2.2943 (2.2314)	Entropy 0.78364 (0.78603)	Top-1 acc 66.016 (70.772)	Top-5 acc 86.719 (88.059)	lr 0.00222
Train [97][2870/3239]	Time 0.247 (0.624)	Data Time 0.002 (0.016)	Loss 2.1882 (2.2314)	Entropy 0.78350 (0.78602)	Top-1 acc 71.094 (70.774)	Top-5 acc 89.453 (88.062)	lr 0.00222
Train [97][2880/3239]	Time 0.324 (0.624)	Data Time 0.001 (0.016)	Loss 2.2901 (2.2313)	Entropy 0.78345 (0.78601)	Top-1 acc 68.359 (70.773)	Top-5 acc 86.719 (88.063)	lr 0.00222
Train [97][2890/3239]	Time 0.231 (0.623)	Data Time 0.001 (0.015)	Loss 2.0769 (2.2313)	Entropy 0.78343 (0.78600)	Top-1 acc 75.781 (70.773)	Top-5 acc 91.406 (88.065)	lr 0.00222
Train [97][2900/3239]	Time 0.243 (0.623)	Data Time 0.001 (0.015)	Loss 2.2266 (2.2313)	Entropy 0.78337 (0.78599)	Top-1 acc 70.703 (70.768)	Top-5 acc 91.797 (88.067)	lr 0.00222
Train [97][2910/3239]	Time 0.274 (0.622)	Data Time 0.001 (0.015)	Loss 2.0624 (2.2311)	Entropy 0.78330 (0.78598)	Top-1 acc 76.172 (70.777)	Top-5 acc 89.453 (88.069)	lr 0.00222
Train [97][2920/3239]	Time 0.249 (0.622)	Data Time 0.001 (0.015)	Loss 2.0698 (2.2311)	Entropy 0.78323 (0.78597)	Top-1 acc 75.781 (70.778)	Top-5 acc 90.625 (88.068)	lr 0.00222
Train [97][2930/3239]	Time 0.273 (0.621)	Data Time 0.001 (0.015)	Loss 2.2865 (2.2312)	Entropy 0.78315 (0.78596)	Top-1 acc 69.922 (70.774)	Top-5 acc 83.984 (88.065)	lr 0.00222
Train [97][2940/3239]	Time 0.280 (0.621)	Data Time 0.001 (0.015)	Loss 2.3036 (2.2313)	Entropy 0.78318 (0.78596)	Top-1 acc 71.484 (70.775)	Top-5 acc 87.109 (88.064)	lr 0.00222
Train [97][2950/3239]	Time 0.251 (0.621)	Data Time 0.001 (0.015)	Loss 2.2014 (2.2311)	Entropy 0.78307 (0.78595)	Top-1 acc 70.703 (70.781)	Top-5 acc 88.281 (88.065)	lr 0.00222
Train [97][2960/3239]	Time 0.258 (0.620)	Data Time 0.001 (0.015)	Loss 2.2339 (2.2310)	Entropy 0.78306 (0.78594)	Top-1 acc 70.312 (70.784)	Top-5 acc 89.062 (88.067)	lr 0.00221
Train [97][2970/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.015)	Loss 2.2041 (2.2309)	Entropy 0.78306 (0.78593)	Top-1 acc 71.094 (70.782)	Top-5 acc 89.062 (88.069)	lr 0.00221
Train [97][2980/3239]	Time 0.258 (0.619)	Data Time 0.001 (0.015)	Loss 2.2324 (2.2308)	Entropy 0.78309 (0.78592)	Top-1 acc 72.266 (70.786)	Top-5 acc 86.719 (88.071)	lr 0.00221
Train [97][2990/3239]	Time 0.239 (0.619)	Data Time 0.001 (0.015)	Loss 2.2144 (2.2308)	Entropy 0.78311 (0.78591)	Top-1 acc 71.875 (70.789)	Top-5 acc 88.281 (88.071)	lr 0.00221
Train [97][3000/3239]	Time 0.250 (0.618)	Data Time 0.002 (0.015)	Loss 2.2698 (2.2309)	Entropy 0.78310 (0.78590)	Top-1 acc 67.969 (70.787)	Top-5 acc 87.500 (88.069)	lr 0.00221
Train [97][3010/3239]	Time 0.271 (0.636)	Data Time 0.003 (0.015)	Loss 2.3657 (2.2309)	Entropy 0.78308 (0.78589)	Top-1 acc 68.750 (70.786)	Top-5 acc 87.109 (88.067)	lr 0.00221
Train [97][3020/3239]	Time 0.237 (0.635)	Data Time 0.002 (0.015)	Loss 2.2503 (2.2308)	Entropy 0.78306 (0.78588)	Top-1 acc 70.703 (70.790)	Top-5 acc 86.328 (88.068)	lr 0.00221
Train [97][3030/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.015)	Loss 2.1463 (2.2308)	Entropy 0.78300 (0.78587)	Top-1 acc 74.609 (70.789)	Top-5 acc 88.281 (88.069)	lr 0.00221
Train [97][3040/3239]	Time 0.260 (0.634)	Data Time 0.002 (0.015)	Loss 2.2450 (2.2308)	Entropy 0.78299 (0.78586)	Top-1 acc 68.750 (70.792)	Top-5 acc 87.891 (88.066)	lr 0.00221
Train [97][3050/3239]	Time 0.321 (0.634)	Data Time 0.001 (0.015)	Loss 2.3127 (2.2309)	Entropy 0.78297 (0.78585)	Top-1 acc 66.406 (70.788)	Top-5 acc 87.891 (88.066)	lr 0.00221
Train [97][3060/3239]	Time 0.274 (0.633)	Data Time 0.001 (0.015)	Loss 2.2599 (2.2308)	Entropy 0.78294 (0.78584)	Top-1 acc 69.922 (70.788)	Top-5 acc 86.328 (88.066)	lr 0.00221
Train [97][3070/3239]	Time 0.260 (0.633)	Data Time 0.003 (0.015)	Loss 2.3014 (2.2307)	Entropy 0.78289 (0.78583)	Top-1 acc 69.922 (70.791)	Top-5 acc 85.156 (88.066)	lr 0.00221
Train [97][3080/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.015)	Loss 2.1840 (2.2306)	Entropy 0.78293 (0.78582)	Top-1 acc 73.828 (70.792)	Top-5 acc 89.453 (88.069)	lr 0.00221
Train [97][3090/3239]	Time 0.432 (0.632)	Data Time 0.002 (0.015)	Loss 2.2124 (2.2307)	Entropy 0.78285 (0.78581)	Top-1 acc 72.656 (70.790)	Top-5 acc 89.062 (88.067)	lr 0.00221
Train [97][3100/3239]	Time 0.259 (0.631)	Data Time 0.001 (0.015)	Loss 2.1636 (2.2308)	Entropy 0.78278 (0.78580)	Top-1 acc 71.875 (70.788)	Top-5 acc 90.234 (88.065)	lr 0.00221
Train [97][3110/3239]	Time 0.251 (0.631)	Data Time 0.001 (0.015)	Loss 2.2475 (2.2309)	Entropy 0.78276 (0.78579)	Top-1 acc 70.703 (70.785)	Top-5 acc 87.500 (88.063)	lr 0.00221
Train [97][3120/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.014)	Loss 2.3266 (2.2309)	Entropy 0.78273 (0.78578)	Top-1 acc 67.969 (70.786)	Top-5 acc 86.719 (88.063)	lr 0.00221
Train [97][3130/3239]	Time 0.265 (0.630)	Data Time 0.002 (0.014)	Loss 2.1625 (2.2309)	Entropy 0.78260 (0.78577)	Top-1 acc 72.656 (70.791)	Top-5 acc 89.453 (88.061)	lr 0.00220
Train [97][3140/3239]	Time 0.264 (0.630)	Data Time 0.001 (0.014)	Loss 2.3112 (2.2309)	Entropy 0.78261 (0.78576)	Top-1 acc 68.359 (70.789)	Top-5 acc 86.719 (88.062)	lr 0.00220
Train [97][3150/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.014)	Loss 2.1138 (2.2308)	Entropy 0.78256 (0.78575)	Top-1 acc 75.000 (70.795)	Top-5 acc 89.453 (88.066)	lr 0.00220
Train [97][3160/3239]	Time 0.222 (0.629)	Data Time 0.001 (0.014)	Loss 2.1017 (2.2308)	Entropy 0.78254 (0.78574)	Top-1 acc 73.828 (70.795)	Top-5 acc 92.578 (88.068)	lr 0.00220
Train [97][3170/3239]	Time 0.245 (0.628)	Data Time 0.002 (0.014)	Loss 2.4067 (2.2309)	Entropy 0.78247 (0.78573)	Top-1 acc 64.844 (70.793)	Top-5 acc 85.938 (88.065)	lr 0.00220
Train [97][3180/3239]	Time 0.225 (0.628)	Data Time 0.000 (0.014)	Loss 2.3183 (2.2308)	Entropy 0.78244 (0.78572)	Top-1 acc 66.406 (70.793)	Top-5 acc 86.719 (88.067)	lr 0.00220
Train [97][3190/3239]	Time 0.233 (0.627)	Data Time 0.000 (0.014)	Loss 2.2541 (2.2309)	Entropy 0.78228 (0.78571)	Top-1 acc 71.094 (70.790)	Top-5 acc 86.328 (88.064)	lr 0.00220
Train [97][3200/3239]	Time 0.226 (0.627)	Data Time 0.000 (0.014)	Loss 2.2726 (2.2310)	Entropy 0.78231 (0.78570)	Top-1 acc 70.312 (70.788)	Top-5 acc 88.281 (88.065)	lr 0.00220
Train [97][3210/3239]	Time 0.225 (0.626)	Data Time 0.000 (0.014)	Loss 2.0203 (2.2309)	Entropy 0.78225 (0.78569)	Top-1 acc 73.828 (70.789)	Top-5 acc 92.188 (88.064)	lr 0.00220
Train [97][3220/3239]	Time 0.221 (0.626)	Data Time 0.000 (0.014)	Loss 2.3893 (2.2315)	Entropy 0.78220 (0.78568)	Top-1 acc 66.406 (70.779)	Top-5 acc 85.547 (88.055)	lr 0.00220
Train [97][3230/3239]	Time 0.225 (0.625)	Data Time 0.000 (0.014)	Loss 2.2947 (2.2315)	Entropy 0.78216 (0.78567)	Top-1 acc 69.141 (70.781)	Top-5 acc 87.109 (88.055)	lr 0.00220
Train [97][3239/3239]	Time 2.323 (0.625)	Data Time 0.000 (0.014)	Loss 2.3033 (2.2315)	Entropy 0.78216 (0.78566)	Top-1 acc 69.136 (70.782)	Top-5 acc 87.654 (88.054)	lr 0.00220
==========Valid [97/120]	loss 1.228	top-1 acc 71.855 (71.855)	top-5 acc 89.499	Train top-1 70.782	top-5 88.054	Entropy 0.78216	Latency-None: 0.000ms	Flops: 546.53M
Train [98][0/3239]	Time 40.923 (40.923)	Data Time 39.251 (39.251)	Loss 2.4337 (2.4337)	Entropy 0.78211 (0.78211)	Top-1 acc 66.016 (66.016)	Top-5 acc 83.984 (83.984)	lr 0.00220
Train [98][10/3239]	Time 2.803 (4.361)	Data Time 0.027 (3.626)	Loss 2.4920 (2.2606)	Entropy 0.78211 (0.78211)	Top-1 acc 64.062 (70.170)	Top-5 acc 83.594 (87.464)	lr 0.00220
Train [98][20/3239]	Time 0.286 (2.406)	Data Time 0.002 (1.900)	Loss 2.1338 (2.2145)	Entropy 0.78211 (0.78211)	Top-1 acc 73.438 (71.019)	Top-5 acc 87.891 (88.021)	lr 0.00220
Train [98][30/3239]	Time 0.246 (1.784)	Data Time 0.001 (1.288)	Loss 2.2939 (2.2180)	Entropy 0.78213 (0.78211)	Top-1 acc 69.922 (70.955)	Top-5 acc 88.672 (88.168)	lr 0.00220
Train [98][40/3239]	Time 0.229 (1.463)	Data Time 0.001 (0.974)	Loss 2.2910 (2.2193)	Entropy 0.78212 (0.78212)	Top-1 acc 67.578 (70.894)	Top-5 acc 87.109 (88.300)	lr 0.00220
Train [98][50/3239]	Time 0.240 (1.268)	Data Time 0.001 (0.783)	Loss 2.2630 (2.2229)	Entropy 0.78300 (0.78224)	Top-1 acc 66.797 (70.688)	Top-5 acc 87.109 (88.343)	lr 0.00220
Train [98][60/3239]	Time 0.223 (1.138)	Data Time 0.001 (0.655)	Loss 2.1580 (2.2154)	Entropy 0.78286 (0.78235)	Top-1 acc 70.703 (70.882)	Top-5 acc 89.453 (88.537)	lr 0.00219
Train [98][70/3239]	Time 0.229 (1.047)	Data Time 0.001 (0.563)	Loss 2.1072 (2.2140)	Entropy 0.78281 (0.78242)	Top-1 acc 76.172 (70.874)	Top-5 acc 91.797 (88.496)	lr 0.00219
Train [98][80/3239]	Time 0.234 (0.977)	Data Time 0.002 (0.494)	Loss 2.2498 (2.2103)	Entropy 0.78282 (0.78247)	Top-1 acc 70.312 (71.041)	Top-5 acc 88.281 (88.518)	lr 0.00219
Train [98][90/3239]	Time 0.222 (0.922)	Data Time 0.001 (0.440)	Loss 2.2968 (2.2101)	Entropy 0.78285 (0.78251)	Top-1 acc 73.047 (71.175)	Top-5 acc 84.375 (88.449)	lr 0.00219
Train [98][100/3239]	Time 0.234 (0.878)	Data Time 0.002 (0.396)	Loss 2.2399 (2.2061)	Entropy 0.78287 (0.78254)	Top-1 acc 70.703 (71.318)	Top-5 acc 89.844 (88.517)	lr 0.00219
Train [98][110/3239]	Time 0.226 (1.352)	Data Time 0.003 (0.361)	Loss 2.1765 (2.2056)	Entropy 0.78284 (0.78257)	Top-1 acc 67.969 (71.340)	Top-5 acc 89.844 (88.570)	lr 0.00219
Train [98][120/3239]	Time 2.680 (1.281)	Data Time 0.002 (0.331)	Loss 2.2681 (2.2062)	Entropy 0.78284 (0.78259)	Top-1 acc 69.531 (71.278)	Top-5 acc 89.062 (88.601)	lr 0.00219
Train [98][130/3239]	Time 0.231 (1.202)	Data Time 0.002 (0.306)	Loss 2.1914 (2.2057)	Entropy 0.78284 (0.78261)	Top-1 acc 70.703 (71.297)	Top-5 acc 89.844 (88.648)	lr 0.00219
Train [98][140/3239]	Time 0.278 (1.150)	Data Time 0.002 (0.284)	Loss 2.2352 (2.2048)	Entropy 0.78278 (0.78262)	Top-1 acc 71.094 (71.293)	Top-5 acc 89.062 (88.672)	lr 0.00219
Train [98][150/3239]	Time 0.351 (1.107)	Data Time 0.002 (0.266)	Loss 2.1886 (2.2033)	Entropy 0.78277 (0.78263)	Top-1 acc 73.047 (71.378)	Top-5 acc 87.109 (88.630)	lr 0.00219
Train [98][160/3239]	Time 0.238 (1.068)	Data Time 0.001 (0.249)	Loss 2.2292 (2.2032)	Entropy 0.78273 (0.78264)	Top-1 acc 71.094 (71.365)	Top-5 acc 88.672 (88.662)	lr 0.00219
Train [98][170/3239]	Time 0.230 (1.034)	Data Time 0.001 (0.235)	Loss 2.1519 (2.2041)	Entropy 0.78270 (0.78264)	Top-1 acc 73.438 (71.290)	Top-5 acc 88.281 (88.638)	lr 0.00219
Train [98][180/3239]	Time 0.173 (1.003)	Data Time 0.001 (0.222)	Loss 2.2265 (2.2073)	Entropy 0.78273 (0.78265)	Top-1 acc 69.922 (71.167)	Top-5 acc 88.672 (88.594)	lr 0.00219
Train [98][190/3239]	Time 0.242 (0.977)	Data Time 0.002 (0.211)	Loss 2.2957 (2.2078)	Entropy 0.78275 (0.78265)	Top-1 acc 69.922 (71.182)	Top-5 acc 87.500 (88.592)	lr 0.00219
Train [98][200/3239]	Time 0.230 (0.952)	Data Time 0.001 (0.200)	Loss 2.1480 (2.2096)	Entropy 0.78273 (0.78266)	Top-1 acc 72.656 (71.109)	Top-5 acc 89.062 (88.569)	lr 0.00219
Train [98][210/3239]	Time 0.217 (0.930)	Data Time 0.001 (0.191)	Loss 2.1371 (2.2095)	Entropy 0.78271 (0.78266)	Top-1 acc 71.875 (71.107)	Top-5 acc 89.062 (88.526)	lr 0.00219
Train [98][220/3239]	Time 0.228 (0.909)	Data Time 0.001 (0.182)	Loss 2.1970 (2.2095)	Entropy 0.78282 (0.78266)	Top-1 acc 69.922 (71.110)	Top-5 acc 89.062 (88.507)	lr 0.00219
Train [98][230/3239]	Time 2.573 (0.891)	Data Time 0.001 (0.174)	Loss 2.1838 (2.2089)	Entropy 0.78282 (0.78267)	Top-1 acc 71.875 (71.146)	Top-5 acc 85.547 (88.488)	lr 0.00219
Train [98][240/3239]	Time 0.274 (0.865)	Data Time 0.001 (0.167)	Loss 2.2818 (2.2106)	Entropy 0.78281 (0.78268)	Top-1 acc 70.312 (71.116)	Top-5 acc 84.375 (88.476)	lr 0.00218
Train [98][250/3239]	Time 0.228 (0.849)	Data Time 0.001 (0.161)	Loss 2.2321 (2.2107)	Entropy 0.78285 (0.78268)	Top-1 acc 68.359 (71.103)	Top-5 acc 91.016 (88.468)	lr 0.00218
Train [98][260/3239]	Time 0.224 (0.835)	Data Time 0.001 (0.155)	Loss 2.3104 (2.2139)	Entropy 0.78283 (0.78269)	Top-1 acc 68.359 (71.023)	Top-5 acc 87.500 (88.395)	lr 0.00218
Train [98][270/3239]	Time 0.220 (0.821)	Data Time 0.001 (0.149)	Loss 2.3084 (2.2157)	Entropy 0.78284 (0.78269)	Top-1 acc 69.922 (70.999)	Top-5 acc 87.500 (88.376)	lr 0.00218
Train [98][280/3239]	Time 0.329 (0.809)	Data Time 0.001 (0.144)	Loss 2.2038 (2.2163)	Entropy 0.78282 (0.78270)	Top-1 acc 71.484 (70.989)	Top-5 acc 89.453 (88.344)	lr 0.00218
Train [98][290/3239]	Time 0.225 (0.798)	Data Time 0.001 (0.139)	Loss 2.0680 (2.2146)	Entropy 0.78276 (0.78270)	Top-1 acc 73.047 (71.009)	Top-5 acc 91.406 (88.379)	lr 0.00218
Train [98][300/3239]	Time 0.227 (0.787)	Data Time 0.001 (0.134)	Loss 2.1986 (2.2140)	Entropy 0.78276 (0.78270)	Top-1 acc 72.266 (71.028)	Top-5 acc 89.453 (88.411)	lr 0.00218
Train [98][310/3239]	Time 0.230 (0.777)	Data Time 0.001 (0.130)	Loss 2.1698 (2.2146)	Entropy 0.78277 (0.78271)	Top-1 acc 71.484 (71.021)	Top-5 acc 88.672 (88.398)	lr 0.00218
Train [98][320/3239]	Time 0.361 (0.768)	Data Time 0.001 (0.126)	Loss 2.3300 (2.2151)	Entropy 0.78271 (0.78271)	Top-1 acc 65.625 (71.017)	Top-5 acc 86.719 (88.404)	lr 0.00218
Train [98][330/3239]	Time 0.222 (0.759)	Data Time 0.001 (0.122)	Loss 2.0365 (2.2125)	Entropy 0.78266 (0.78271)	Top-1 acc 75.000 (71.103)	Top-5 acc 93.750 (88.452)	lr 0.00218
Train [98][340/3239]	Time 2.562 (0.751)	Data Time 0.001 (0.119)	Loss 2.2033 (2.2123)	Entropy 0.78266 (0.78271)	Top-1 acc 71.875 (71.124)	Top-5 acc 88.672 (88.446)	lr 0.00218
Train [98][350/3239]	Time 0.276 (0.737)	Data Time 0.006 (0.115)	Loss 2.1446 (2.2118)	Entropy 0.78264 (0.78270)	Top-1 acc 73.047 (71.145)	Top-5 acc 89.453 (88.458)	lr 0.00218
Train [98][360/3239]	Time 0.258 (0.730)	Data Time 0.001 (0.112)	Loss 2.1994 (2.2117)	Entropy 0.78244 (0.78270)	Top-1 acc 75.391 (71.187)	Top-5 acc 89.844 (88.455)	lr 0.00218
Train [98][370/3239]	Time 0.239 (0.724)	Data Time 0.002 (0.109)	Loss 2.2892 (2.2129)	Entropy 0.78243 (0.78269)	Top-1 acc 67.188 (71.166)	Top-5 acc 89.062 (88.443)	lr 0.00218
Train [98][380/3239]	Time 0.252 (0.717)	Data Time 0.002 (0.106)	Loss 2.1429 (2.2126)	Entropy 0.78241 (0.78268)	Top-1 acc 72.266 (71.188)	Top-5 acc 88.672 (88.453)	lr 0.00218
Train [98][390/3239]	Time 0.237 (0.711)	Data Time 0.002 (0.104)	Loss 2.1572 (2.2127)	Entropy 0.78234 (0.78267)	Top-1 acc 71.484 (71.173)	Top-5 acc 89.453 (88.458)	lr 0.00218
Train [98][400/3239]	Time 0.267 (0.706)	Data Time 0.001 (0.101)	Loss 2.4043 (2.2143)	Entropy 0.78232 (0.78267)	Top-1 acc 67.578 (71.144)	Top-5 acc 82.422 (88.426)	lr 0.00218
Train [98][410/3239]	Time 0.224 (0.701)	Data Time 0.001 (0.099)	Loss 2.0879 (2.2146)	Entropy 0.78233 (0.78266)	Top-1 acc 71.484 (71.152)	Top-5 acc 92.188 (88.418)	lr 0.00218
Train [98][420/3239]	Time 0.218 (0.696)	Data Time 0.001 (0.096)	Loss 2.1204 (2.2124)	Entropy 0.78219 (0.78265)	Top-1 acc 71.094 (71.210)	Top-5 acc 91.016 (88.458)	lr 0.00217
Train [98][430/3239]	Time 0.227 (0.691)	Data Time 0.001 (0.094)	Loss 2.2208 (2.2129)	Entropy 0.78218 (0.78264)	Top-1 acc 71.875 (71.226)	Top-5 acc 88.672 (88.443)	lr 0.00217
Train [98][440/3239]	Time 0.232 (0.687)	Data Time 0.001 (0.092)	Loss 2.2891 (2.2121)	Entropy 0.78210 (0.78263)	Top-1 acc 70.312 (71.243)	Top-5 acc 88.672 (88.466)	lr 0.00217
Train [98][450/3239]	Time 2.632 (0.682)	Data Time 0.001 (0.090)	Loss 2.2387 (2.2133)	Entropy 0.78210 (0.78262)	Top-1 acc 73.438 (71.215)	Top-5 acc 89.062 (88.443)	lr 0.00217
Train [98][460/3239]	Time 0.233 (0.672)	Data Time 0.001 (0.088)	Loss 2.2475 (2.2143)	Entropy 0.78202 (0.78260)	Top-1 acc 69.531 (71.189)	Top-5 acc 89.062 (88.424)	lr 0.00217
Train [98][470/3239]	Time 0.242 (0.668)	Data Time 0.001 (0.086)	Loss 2.2658 (2.2149)	Entropy 0.78200 (0.78259)	Top-1 acc 69.922 (71.158)	Top-5 acc 87.109 (88.411)	lr 0.00217
Train [98][480/3239]	Time 0.246 (0.780)	Data Time 0.002 (0.085)	Loss 2.1707 (2.2162)	Entropy 0.78199 (0.78258)	Top-1 acc 73.438 (71.100)	Top-5 acc 91.406 (88.393)	lr 0.00217
Train [98][490/3239]	Time 0.324 (0.775)	Data Time 0.002 (0.083)	Loss 2.2154 (2.2164)	Entropy 0.78194 (0.78257)	Top-1 acc 69.141 (71.110)	Top-5 acc 86.328 (88.374)	lr 0.00217
Train [98][500/3239]	Time 0.245 (0.769)	Data Time 0.002 (0.081)	Loss 2.2342 (2.2159)	Entropy 0.78188 (0.78255)	Top-1 acc 67.969 (71.138)	Top-5 acc 87.891 (88.379)	lr 0.00217
Train [98][510/3239]	Time 0.220 (0.763)	Data Time 0.001 (0.080)	Loss 2.1983 (2.2151)	Entropy 0.78188 (0.78254)	Top-1 acc 73.438 (71.156)	Top-5 acc 88.672 (88.392)	lr 0.00217
Train [98][520/3239]	Time 0.227 (0.758)	Data Time 0.001 (0.078)	Loss 2.0792 (2.2152)	Entropy 0.78187 (0.78253)	Top-1 acc 73.047 (71.169)	Top-5 acc 90.234 (88.390)	lr 0.00217
Train [98][530/3239]	Time 0.376 (0.753)	Data Time 0.002 (0.077)	Loss 2.2017 (2.2155)	Entropy 0.78192 (0.78251)	Top-1 acc 71.875 (71.157)	Top-5 acc 86.719 (88.389)	lr 0.00217
Train [98][540/3239]	Time 0.217 (0.748)	Data Time 0.001 (0.076)	Loss 2.2754 (2.2150)	Entropy 0.78190 (0.78250)	Top-1 acc 71.094 (71.163)	Top-5 acc 88.281 (88.410)	lr 0.00217
Train [98][550/3239]	Time 0.228 (0.743)	Data Time 0.001 (0.074)	Loss 2.1266 (2.2142)	Entropy 0.78186 (0.78249)	Top-1 acc 76.562 (71.186)	Top-5 acc 90.625 (88.424)	lr 0.00217
Train [98][560/3239]	Time 2.589 (0.738)	Data Time 0.001 (0.073)	Loss 2.1648 (2.2142)	Entropy 0.78186 (0.78248)	Top-1 acc 72.266 (71.186)	Top-5 acc 90.234 (88.428)	lr 0.00217
Train [98][570/3239]	Time 0.266 (0.730)	Data Time 0.001 (0.072)	Loss 2.3280 (2.2144)	Entropy 0.78186 (0.78247)	Top-1 acc 68.359 (71.183)	Top-5 acc 85.938 (88.423)	lr 0.00217
Train [98][580/3239]	Time 0.237 (0.725)	Data Time 0.001 (0.070)	Loss 2.2164 (2.2137)	Entropy 0.78172 (0.78246)	Top-1 acc 69.922 (71.205)	Top-5 acc 86.328 (88.428)	lr 0.00217
Train [98][590/3239]	Time 0.261 (0.721)	Data Time 0.002 (0.069)	Loss 2.2350 (2.2132)	Entropy 0.78166 (0.78244)	Top-1 acc 70.703 (71.221)	Top-5 acc 87.891 (88.437)	lr 0.00216
Train [98][600/3239]	Time 0.232 (0.717)	Data Time 0.002 (0.068)	Loss 2.1505 (2.2127)	Entropy 0.78167 (0.78243)	Top-1 acc 72.656 (71.235)	Top-5 acc 89.453 (88.442)	lr 0.00216
Train [98][610/3239]	Time 0.230 (0.713)	Data Time 0.001 (0.067)	Loss 2.2017 (2.2130)	Entropy 0.78144 (0.78242)	Top-1 acc 75.000 (71.225)	Top-5 acc 89.062 (88.438)	lr 0.00216
Train [98][620/3239]	Time 0.236 (0.710)	Data Time 0.001 (0.066)	Loss 2.2671 (2.2128)	Entropy 0.78142 (0.78240)	Top-1 acc 67.578 (71.232)	Top-5 acc 87.500 (88.438)	lr 0.00216
Train [98][630/3239]	Time 0.232 (0.706)	Data Time 0.001 (0.065)	Loss 2.1507 (2.2134)	Entropy 0.78139 (0.78238)	Top-1 acc 74.609 (71.222)	Top-5 acc 90.625 (88.425)	lr 0.00216
Train [98][640/3239]	Time 0.218 (0.703)	Data Time 0.001 (0.064)	Loss 2.2920 (2.2132)	Entropy 0.78139 (0.78237)	Top-1 acc 70.312 (71.214)	Top-5 acc 87.891 (88.438)	lr 0.00216
Train [98][650/3239]	Time 0.234 (0.699)	Data Time 0.001 (0.063)	Loss 2.3229 (2.2138)	Entropy 0.78138 (0.78235)	Top-1 acc 63.672 (71.202)	Top-5 acc 88.281 (88.426)	lr 0.00216
Train [98][660/3239]	Time 0.287 (0.696)	Data Time 0.001 (0.062)	Loss 2.1512 (2.2139)	Entropy 0.78145 (0.78234)	Top-1 acc 73.047 (71.192)	Top-5 acc 89.844 (88.427)	lr 0.00216
Train [98][670/3239]	Time 2.535 (0.693)	Data Time 0.001 (0.061)	Loss 2.1849 (2.2142)	Entropy 0.78145 (0.78233)	Top-1 acc 69.531 (71.173)	Top-5 acc 87.891 (88.427)	lr 0.00216
Train [98][680/3239]	Time 0.235 (0.686)	Data Time 0.001 (0.060)	Loss 2.2455 (2.2149)	Entropy 0.78134 (0.78231)	Top-1 acc 72.656 (71.161)	Top-5 acc 87.891 (88.401)	lr 0.00216
Train [98][690/3239]	Time 0.246 (0.683)	Data Time 0.001 (0.059)	Loss 2.3200 (2.2150)	Entropy 0.78117 (0.78230)	Top-1 acc 71.094 (71.170)	Top-5 acc 84.766 (88.406)	lr 0.00216
Train [98][700/3239]	Time 0.346 (0.680)	Data Time 0.001 (0.059)	Loss 2.2034 (2.2145)	Entropy 0.78118 (0.78228)	Top-1 acc 70.703 (71.186)	Top-5 acc 89.844 (88.419)	lr 0.00216
Train [98][710/3239]	Time 0.231 (0.678)	Data Time 0.001 (0.058)	Loss 2.2316 (2.2148)	Entropy 0.78105 (0.78226)	Top-1 acc 69.531 (71.183)	Top-5 acc 87.109 (88.410)	lr 0.00216
Train [98][720/3239]	Time 0.227 (0.675)	Data Time 0.001 (0.057)	Loss 2.3359 (2.2151)	Entropy 0.78101 (0.78225)	Top-1 acc 69.922 (71.185)	Top-5 acc 86.328 (88.407)	lr 0.00216
Train [98][730/3239]	Time 0.245 (0.672)	Data Time 0.001 (0.056)	Loss 2.2483 (2.2154)	Entropy 0.78093 (0.78223)	Top-1 acc 72.266 (71.181)	Top-5 acc 87.891 (88.404)	lr 0.00216
Train [98][740/3239]	Time 0.328 (0.670)	Data Time 0.002 (0.056)	Loss 2.1792 (2.2156)	Entropy 0.78092 (0.78221)	Top-1 acc 71.875 (71.175)	Top-5 acc 90.625 (88.405)	lr 0.00216
Train [98][750/3239]	Time 0.235 (0.667)	Data Time 0.001 (0.055)	Loss 2.1859 (2.2159)	Entropy 0.78079 (0.78219)	Top-1 acc 73.047 (71.160)	Top-5 acc 89.453 (88.406)	lr 0.00216
Train [98][760/3239]	Time 0.221 (0.664)	Data Time 0.001 (0.054)	Loss 2.2352 (2.2160)	Entropy 0.78077 (0.78217)	Top-1 acc 70.312 (71.170)	Top-5 acc 87.891 (88.404)	lr 0.00216
Train [98][770/3239]	Time 0.229 (0.662)	Data Time 0.001 (0.053)	Loss 2.2270 (2.2165)	Entropy 0.78080 (0.78216)	Top-1 acc 69.922 (71.157)	Top-5 acc 87.109 (88.394)	lr 0.00215
Train [98][780/3239]	Time 2.583 (0.659)	Data Time 0.002 (0.053)	Loss 2.2704 (2.2167)	Entropy 0.78080 (0.78214)	Top-1 acc 71.875 (71.151)	Top-5 acc 84.766 (88.387)	lr 0.00215
Train [98][790/3239]	Time 0.226 (0.654)	Data Time 0.001 (0.052)	Loss 2.2974 (2.2167)	Entropy 0.78079 (0.78212)	Top-1 acc 66.797 (71.149)	Top-5 acc 87.500 (88.392)	lr 0.00215
Train [98][800/3239]	Time 0.221 (0.651)	Data Time 0.001 (0.051)	Loss 2.2076 (2.2170)	Entropy 0.78073 (0.78210)	Top-1 acc 70.312 (71.133)	Top-5 acc 90.625 (88.394)	lr 0.00215
Train [98][810/3239]	Time 0.228 (0.649)	Data Time 0.001 (0.051)	Loss 2.2184 (2.2168)	Entropy 0.78071 (0.78209)	Top-1 acc 72.266 (71.147)	Top-5 acc 89.062 (88.408)	lr 0.00215
Train [98][820/3239]	Time 0.219 (0.647)	Data Time 0.001 (0.050)	Loss 2.2538 (2.2173)	Entropy 0.78066 (0.78207)	Top-1 acc 73.047 (71.140)	Top-5 acc 89.062 (88.405)	lr 0.00215
Train [98][830/3239]	Time 0.222 (0.645)	Data Time 0.001 (0.050)	Loss 2.0620 (2.2168)	Entropy 0.78068 (0.78205)	Top-1 acc 74.219 (71.153)	Top-5 acc 92.578 (88.412)	lr 0.00215
Train [98][840/3239]	Time 0.236 (0.707)	Data Time 0.003 (0.049)	Loss 2.3587 (2.2175)	Entropy 0.78065 (0.78204)	Top-1 acc 70.312 (71.134)	Top-5 acc 85.938 (88.394)	lr 0.00215
Train [98][850/3239]	Time 0.236 (0.705)	Data Time 0.002 (0.049)	Loss 2.1334 (2.2176)	Entropy 0.78064 (0.78202)	Top-1 acc 73.047 (71.123)	Top-5 acc 89.844 (88.394)	lr 0.00215
Train [98][860/3239]	Time 0.229 (0.703)	Data Time 0.002 (0.048)	Loss 2.1810 (2.2184)	Entropy 0.78063 (0.78200)	Top-1 acc 75.000 (71.100)	Top-5 acc 89.062 (88.377)	lr 0.00215
Train [98][870/3239]	Time 0.233 (0.700)	Data Time 0.001 (0.048)	Loss 2.1491 (2.2184)	Entropy 0.78057 (0.78199)	Top-1 acc 71.484 (71.096)	Top-5 acc 90.234 (88.382)	lr 0.00215
Train [98][880/3239]	Time 0.241 (0.698)	Data Time 0.001 (0.047)	Loss 2.3251 (2.2186)	Entropy 0.78057 (0.78197)	Top-1 acc 68.750 (71.088)	Top-5 acc 87.891 (88.380)	lr 0.00215
Train [98][890/3239]	Time 2.563 (0.695)	Data Time 0.002 (0.046)	Loss 2.2309 (2.2185)	Entropy 0.78057 (0.78196)	Top-1 acc 68.750 (71.090)	Top-5 acc 89.844 (88.382)	lr 0.00215
Train [98][900/3239]	Time 0.238 (0.690)	Data Time 0.001 (0.046)	Loss 2.3021 (2.2185)	Entropy 0.78046 (0.78194)	Top-1 acc 64.453 (71.087)	Top-5 acc 89.844 (88.385)	lr 0.00215
Train [98][910/3239]	Time 0.324 (0.688)	Data Time 0.001 (0.045)	Loss 2.2735 (2.2189)	Entropy 0.78045 (0.78192)	Top-1 acc 66.797 (71.078)	Top-5 acc 85.938 (88.370)	lr 0.00215
Train [98][920/3239]	Time 0.241 (0.686)	Data Time 0.001 (0.045)	Loss 2.2499 (2.2191)	Entropy 0.78043 (0.78191)	Top-1 acc 68.359 (71.067)	Top-5 acc 89.062 (88.376)	lr 0.00215
Train [98][930/3239]	Time 0.243 (0.683)	Data Time 0.001 (0.045)	Loss 2.4036 (2.2189)	Entropy 0.78044 (0.78189)	Top-1 acc 68.359 (71.075)	Top-5 acc 83.203 (88.382)	lr 0.00215
Train [98][940/3239]	Time 0.239 (0.681)	Data Time 0.001 (0.044)	Loss 2.1262 (2.2186)	Entropy 0.78043 (0.78188)	Top-1 acc 75.391 (71.082)	Top-5 acc 90.234 (88.383)	lr 0.00214
Train [98][950/3239]	Time 0.316 (0.679)	Data Time 0.001 (0.044)	Loss 2.2273 (2.2194)	Entropy 0.78042 (0.78186)	Top-1 acc 73.828 (71.058)	Top-5 acc 87.891 (88.369)	lr 0.00214
Train [98][960/3239]	Time 0.219 (0.677)	Data Time 0.001 (0.043)	Loss 2.3116 (2.2197)	Entropy 0.78028 (0.78185)	Top-1 acc 66.797 (71.050)	Top-5 acc 87.891 (88.359)	lr 0.00214
Train [98][970/3239]	Time 0.231 (0.675)	Data Time 0.001 (0.043)	Loss 2.1820 (2.2192)	Entropy 0.78026 (0.78183)	Top-1 acc 72.266 (71.078)	Top-5 acc 87.500 (88.361)	lr 0.00214
Train [98][980/3239]	Time 0.222 (0.673)	Data Time 0.001 (0.042)	Loss 2.2099 (2.2194)	Entropy 0.78023 (0.78181)	Top-1 acc 73.828 (71.074)	Top-5 acc 88.281 (88.364)	lr 0.00214
Train [98][990/3239]	Time 0.265 (0.671)	Data Time 0.002 (0.042)	Loss 2.3082 (2.2193)	Entropy 0.78020 (0.78180)	Top-1 acc 68.750 (71.075)	Top-5 acc 87.109 (88.359)	lr 0.00214
Train [98][1000/3239]	Time 2.680 (0.669)	Data Time 0.001 (0.042)	Loss 2.2514 (2.2195)	Entropy 0.78020 (0.78178)	Top-1 acc 69.141 (71.054)	Top-5 acc 89.844 (88.361)	lr 0.00214
Train [98][1010/3239]	Time 0.288 (0.665)	Data Time 0.002 (0.041)	Loss 2.2529 (2.2199)	Entropy 0.78022 (0.78177)	Top-1 acc 68.750 (71.040)	Top-5 acc 87.500 (88.349)	lr 0.00214
Train [98][1020/3239]	Time 0.272 (0.663)	Data Time 0.001 (0.041)	Loss 2.2024 (2.2196)	Entropy 0.78019 (0.78175)	Top-1 acc 71.875 (71.042)	Top-5 acc 88.281 (88.350)	lr 0.00214
Train [98][1030/3239]	Time 0.235 (0.661)	Data Time 0.001 (0.040)	Loss 2.2052 (2.2198)	Entropy 0.78019 (0.78174)	Top-1 acc 71.484 (71.032)	Top-5 acc 87.109 (88.346)	lr 0.00214
Train [98][1040/3239]	Time 0.232 (0.659)	Data Time 0.002 (0.040)	Loss 2.2315 (2.2207)	Entropy 0.78015 (0.78172)	Top-1 acc 70.703 (71.014)	Top-5 acc 87.500 (88.327)	lr 0.00214
Train [98][1050/3239]	Time 0.231 (0.658)	Data Time 0.001 (0.040)	Loss 2.1429 (2.2205)	Entropy 0.78016 (0.78171)	Top-1 acc 73.438 (71.019)	Top-5 acc 90.234 (88.331)	lr 0.00214
Train [98][1060/3239]	Time 0.214 (0.656)	Data Time 0.001 (0.039)	Loss 2.1614 (2.2209)	Entropy 0.78015 (0.78169)	Top-1 acc 73.828 (71.013)	Top-5 acc 89.453 (88.324)	lr 0.00214
Train [98][1070/3239]	Time 0.205 (0.654)	Data Time 0.001 (0.039)	Loss 2.2858 (2.2208)	Entropy 0.78004 (0.78168)	Top-1 acc 66.406 (71.014)	Top-5 acc 88.281 (88.320)	lr 0.00214
Train [98][1080/3239]	Time 0.226 (0.652)	Data Time 0.001 (0.039)	Loss 2.1113 (2.2201)	Entropy 0.77996 (0.78166)	Top-1 acc 73.047 (71.022)	Top-5 acc 90.234 (88.334)	lr 0.00214
Train [98][1090/3239]	Time 0.224 (0.651)	Data Time 0.001 (0.038)	Loss 2.1579 (2.2205)	Entropy 0.77987 (0.78164)	Top-1 acc 70.312 (71.016)	Top-5 acc 91.016 (88.326)	lr 0.00214
Train [98][1100/3239]	Time 0.255 (0.649)	Data Time 0.001 (0.038)	Loss 2.2425 (2.2207)	Entropy 0.77988 (0.78163)	Top-1 acc 71.484 (71.003)	Top-5 acc 88.672 (88.321)	lr 0.00214
Train [98][1110/3239]	Time 2.480 (0.647)	Data Time 0.001 (0.038)	Loss 2.1283 (2.2210)	Entropy 0.77988 (0.78161)	Top-1 acc 73.438 (70.992)	Top-5 acc 89.844 (88.314)	lr 0.00214
Train [98][1120/3239]	Time 0.356 (0.644)	Data Time 0.001 (0.037)	Loss 2.1498 (2.2208)	Entropy 0.77986 (0.78160)	Top-1 acc 74.219 (71.003)	Top-5 acc 88.281 (88.311)	lr 0.00213
Train [98][1130/3239]	Time 0.229 (0.642)	Data Time 0.001 (0.037)	Loss 2.2053 (2.2211)	Entropy 0.77986 (0.78158)	Top-1 acc 71.094 (70.996)	Top-5 acc 88.672 (88.303)	lr 0.00213
Train [98][1140/3239]	Time 0.232 (0.641)	Data Time 0.002 (0.037)	Loss 2.2692 (2.2210)	Entropy 0.77978 (0.78157)	Top-1 acc 67.578 (70.998)	Top-5 acc 88.672 (88.305)	lr 0.00213
Train [98][1150/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.036)	Loss 2.1777 (2.2213)	Entropy 0.77973 (0.78155)	Top-1 acc 68.359 (70.993)	Top-5 acc 90.234 (88.302)	lr 0.00213
Train [98][1160/3239]	Time 0.267 (0.638)	Data Time 0.001 (0.036)	Loss 2.2159 (2.2210)	Entropy 0.77961 (0.78153)	Top-1 acc 69.922 (71.002)	Top-5 acc 89.844 (88.310)	lr 0.00213
Train [98][1170/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.036)	Loss 2.1624 (2.2211)	Entropy 0.77949 (0.78152)	Top-1 acc 75.781 (71.006)	Top-5 acc 89.844 (88.309)	lr 0.00213
Train [98][1180/3239]	Time 0.227 (0.635)	Data Time 0.001 (0.035)	Loss 2.1942 (2.2212)	Entropy 0.77942 (0.78150)	Top-1 acc 70.703 (70.999)	Top-5 acc 88.672 (88.312)	lr 0.00213
Train [98][1190/3239]	Time 0.219 (0.634)	Data Time 0.002 (0.035)	Loss 2.3197 (2.2214)	Entropy 0.77937 (0.78148)	Top-1 acc 66.797 (70.986)	Top-5 acc 87.891 (88.309)	lr 0.00213
Train [98][1200/3239]	Time 0.302 (0.678)	Data Time 0.004 (0.035)	Loss 2.3193 (2.2212)	Entropy 0.77937 (0.78146)	Top-1 acc 65.234 (70.983)	Top-5 acc 87.109 (88.312)	lr 0.00213
Train [98][1210/3239]	Time 0.259 (0.677)	Data Time 0.002 (0.035)	Loss 2.2993 (2.2215)	Entropy 0.77938 (0.78145)	Top-1 acc 70.312 (70.978)	Top-5 acc 86.719 (88.303)	lr 0.00213
Train [98][1220/3239]	Time 2.668 (0.675)	Data Time 0.002 (0.034)	Loss 2.2449 (2.2214)	Entropy 0.77938 (0.78143)	Top-1 acc 72.656 (70.978)	Top-5 acc 86.719 (88.310)	lr 0.00213
Train [98][1230/3239]	Time 0.246 (0.672)	Data Time 0.001 (0.034)	Loss 2.1114 (2.2214)	Entropy 0.77937 (0.78141)	Top-1 acc 74.609 (70.981)	Top-5 acc 89.453 (88.314)	lr 0.00213
Train [98][1240/3239]	Time 0.242 (0.670)	Data Time 0.001 (0.034)	Loss 2.3018 (2.2217)	Entropy 0.77936 (0.78140)	Top-1 acc 71.484 (70.976)	Top-5 acc 87.500 (88.312)	lr 0.00213
Train [98][1250/3239]	Time 0.228 (0.669)	Data Time 0.001 (0.034)	Loss 2.2118 (2.2216)	Entropy 0.77933 (0.78138)	Top-1 acc 69.922 (70.976)	Top-5 acc 89.453 (88.317)	lr 0.00213
Train [98][1260/3239]	Time 0.230 (0.667)	Data Time 0.001 (0.033)	Loss 2.2771 (2.2220)	Entropy 0.77922 (0.78136)	Top-1 acc 69.922 (70.969)	Top-5 acc 88.672 (88.311)	lr 0.00213
Train [98][1270/3239]	Time 0.221 (0.666)	Data Time 0.001 (0.033)	Loss 2.3097 (2.2219)	Entropy 0.77918 (0.78135)	Top-1 acc 66.797 (70.965)	Top-5 acc 86.719 (88.312)	lr 0.00213
Train [98][1280/3239]	Time 0.229 (0.664)	Data Time 0.001 (0.033)	Loss 2.1643 (2.2221)	Entropy 0.77918 (0.78133)	Top-1 acc 73.047 (70.963)	Top-5 acc 89.453 (88.308)	lr 0.00213
Train [98][1290/3239]	Time 0.328 (0.663)	Data Time 0.001 (0.033)	Loss 2.2644 (2.2222)	Entropy 0.77919 (0.78131)	Top-1 acc 71.094 (70.959)	Top-5 acc 88.281 (88.303)	lr 0.00213
Train [98][1300/3239]	Time 0.232 (0.662)	Data Time 0.001 (0.032)	Loss 2.2609 (2.2225)	Entropy 0.77922 (0.78130)	Top-1 acc 71.094 (70.955)	Top-5 acc 87.109 (88.298)	lr 0.00212
Train [98][1310/3239]	Time 0.240 (0.660)	Data Time 0.001 (0.032)	Loss 2.3381 (2.2228)	Entropy 0.77916 (0.78128)	Top-1 acc 68.359 (70.953)	Top-5 acc 87.109 (88.289)	lr 0.00212
Train [98][1320/3239]	Time 0.240 (0.659)	Data Time 0.001 (0.032)	Loss 2.1232 (2.2225)	Entropy 0.77915 (0.78127)	Top-1 acc 72.656 (70.961)	Top-5 acc 87.500 (88.290)	lr 0.00212
Train [98][1330/3239]	Time 2.567 (0.658)	Data Time 0.001 (0.032)	Loss 2.1448 (2.2225)	Entropy 0.77915 (0.78125)	Top-1 acc 73.438 (70.963)	Top-5 acc 89.844 (88.295)	lr 0.00212
Train [98][1340/3239]	Time 0.238 (0.655)	Data Time 0.001 (0.031)	Loss 2.1563 (2.2220)	Entropy 0.77908 (0.78123)	Top-1 acc 72.656 (70.971)	Top-5 acc 89.453 (88.301)	lr 0.00212
Train [98][1350/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.031)	Loss 2.1851 (2.2217)	Entropy 0.77904 (0.78122)	Top-1 acc 72.656 (70.976)	Top-5 acc 88.672 (88.302)	lr 0.00212
Train [98][1360/3239]	Time 0.244 (0.652)	Data Time 0.001 (0.031)	Loss 2.2598 (2.2217)	Entropy 0.77898 (0.78120)	Top-1 acc 72.266 (70.986)	Top-5 acc 87.891 (88.301)	lr 0.00212
Train [98][1370/3239]	Time 0.384 (0.651)	Data Time 0.002 (0.031)	Loss 2.1180 (2.2219)	Entropy 0.77907 (0.78118)	Top-1 acc 73.047 (70.979)	Top-5 acc 89.062 (88.298)	lr 0.00212
Train [98][1380/3239]	Time 0.244 (0.650)	Data Time 0.001 (0.031)	Loss 2.0421 (2.2214)	Entropy 0.77910 (0.78117)	Top-1 acc 74.609 (70.989)	Top-5 acc 93.359 (88.309)	lr 0.00212
Train [98][1390/3239]	Time 0.233 (0.649)	Data Time 0.002 (0.030)	Loss 2.3343 (2.2216)	Entropy 0.77909 (0.78115)	Top-1 acc 67.188 (70.983)	Top-5 acc 85.547 (88.308)	lr 0.00212
Train [98][1400/3239]	Time 0.236 (0.648)	Data Time 0.001 (0.030)	Loss 2.2042 (2.2216)	Entropy 0.77905 (0.78114)	Top-1 acc 69.531 (70.979)	Top-5 acc 89.844 (88.306)	lr 0.00212
Train [98][1410/3239]	Time 0.249 (0.646)	Data Time 0.001 (0.030)	Loss 2.2761 (2.2216)	Entropy 0.77903 (0.78112)	Top-1 acc 67.188 (70.981)	Top-5 acc 85.156 (88.309)	lr 0.00212
Train [98][1420/3239]	Time 0.252 (0.645)	Data Time 0.001 (0.030)	Loss 2.1079 (2.2218)	Entropy 0.77903 (0.78111)	Top-1 acc 71.875 (70.974)	Top-5 acc 91.797 (88.306)	lr 0.00212
Train [98][1430/3239]	Time 0.253 (0.644)	Data Time 0.001 (0.030)	Loss 2.0766 (2.2219)	Entropy 0.77894 (0.78110)	Top-1 acc 74.609 (70.977)	Top-5 acc 92.578 (88.304)	lr 0.00212
Train [98][1440/3239]	Time 2.575 (0.643)	Data Time 0.001 (0.029)	Loss 2.1810 (2.2221)	Entropy 0.77894 (0.78108)	Top-1 acc 71.094 (70.967)	Top-5 acc 89.062 (88.295)	lr 0.00212
Train [98][1450/3239]	Time 0.242 (0.640)	Data Time 0.001 (0.029)	Loss 2.1281 (2.2222)	Entropy 0.77889 (0.78107)	Top-1 acc 72.266 (70.962)	Top-5 acc 89.453 (88.291)	lr 0.00212
Train [98][1460/3239]	Time 0.368 (0.639)	Data Time 0.001 (0.029)	Loss 2.1760 (2.2221)	Entropy 0.77884 (0.78105)	Top-1 acc 71.094 (70.966)	Top-5 acc 88.672 (88.291)	lr 0.00212
Train [98][1470/3239]	Time 0.241 (0.638)	Data Time 0.001 (0.029)	Loss 2.1045 (2.2220)	Entropy 0.77889 (0.78104)	Top-1 acc 74.219 (70.976)	Top-5 acc 92.188 (88.291)	lr 0.00212
Train [98][1480/3239]	Time 0.238 (0.637)	Data Time 0.001 (0.029)	Loss 2.1257 (2.2219)	Entropy 0.77882 (0.78102)	Top-1 acc 74.219 (70.976)	Top-5 acc 89.844 (88.292)	lr 0.00211
Train [98][1490/3239]	Time 0.243 (0.636)	Data Time 0.001 (0.028)	Loss 2.2488 (2.2219)	Entropy 0.77885 (0.78101)	Top-1 acc 69.141 (70.973)	Top-5 acc 87.500 (88.292)	lr 0.00211
Train [98][1500/3239]	Time 0.330 (0.635)	Data Time 0.001 (0.028)	Loss 2.1458 (2.2217)	Entropy 0.77882 (0.78099)	Top-1 acc 71.484 (70.977)	Top-5 acc 89.453 (88.296)	lr 0.00211
Train [98][1510/3239]	Time 0.238 (0.634)	Data Time 0.001 (0.028)	Loss 2.3341 (2.2219)	Entropy 0.77881 (0.78098)	Top-1 acc 67.578 (70.976)	Top-5 acc 86.328 (88.288)	lr 0.00211
Train [98][1520/3239]	Time 0.242 (0.633)	Data Time 0.002 (0.028)	Loss 2.1247 (2.2221)	Entropy 0.77879 (0.78096)	Top-1 acc 73.438 (70.972)	Top-5 acc 88.281 (88.281)	lr 0.00211
Train [98][1530/3239]	Time 0.237 (0.632)	Data Time 0.001 (0.028)	Loss 2.3448 (2.2223)	Entropy 0.77882 (0.78095)	Top-1 acc 68.359 (70.969)	Top-5 acc 86.719 (88.279)	lr 0.00211
Train [98][1540/3239]	Time 0.328 (0.631)	Data Time 0.001 (0.028)	Loss 2.2996 (2.2219)	Entropy 0.77875 (0.78094)	Top-1 acc 73.047 (70.981)	Top-5 acc 87.500 (88.289)	lr 0.00211
Train [98][1550/3239]	Time 2.601 (0.630)	Data Time 0.001 (0.027)	Loss 2.1749 (2.2218)	Entropy 0.77875 (0.78092)	Top-1 acc 70.312 (70.984)	Top-5 acc 89.844 (88.291)	lr 0.00211
Train [98][1560/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.027)	Loss 2.2519 (2.2217)	Entropy 0.77872 (0.78091)	Top-1 acc 71.875 (70.987)	Top-5 acc 87.500 (88.290)	lr 0.00211
Train [98][1570/3239]	Time 0.252 (0.662)	Data Time 0.003 (0.027)	Loss 2.1867 (2.2221)	Entropy 0.77858 (0.78089)	Top-1 acc 71.484 (70.974)	Top-5 acc 87.109 (88.280)	lr 0.00211
Train [98][1580/3239]	Time 0.245 (0.661)	Data Time 0.002 (0.027)	Loss 2.2320 (2.2221)	Entropy 0.77854 (0.78088)	Top-1 acc 69.922 (70.976)	Top-5 acc 90.625 (88.284)	lr 0.00211
Train [98][1590/3239]	Time 0.224 (0.659)	Data Time 0.001 (0.027)	Loss 2.2973 (2.2222)	Entropy 0.77853 (0.78086)	Top-1 acc 68.750 (70.975)	Top-5 acc 87.109 (88.284)	lr 0.00211
Train [98][1600/3239]	Time 0.229 (0.658)	Data Time 0.001 (0.027)	Loss 2.2851 (2.2224)	Entropy 0.77847 (0.78085)	Top-1 acc 71.875 (70.972)	Top-5 acc 87.891 (88.282)	lr 0.00211
Train [98][1610/3239]	Time 0.244 (0.657)	Data Time 0.001 (0.026)	Loss 2.3516 (2.2223)	Entropy 0.77841 (0.78083)	Top-1 acc 67.578 (70.969)	Top-5 acc 87.109 (88.285)	lr 0.00211
Train [98][1620/3239]	Time 0.240 (0.656)	Data Time 0.001 (0.026)	Loss 2.2251 (2.2222)	Entropy 0.77841 (0.78082)	Top-1 acc 71.094 (70.968)	Top-5 acc 87.109 (88.280)	lr 0.00211
Train [98][1630/3239]	Time 0.236 (0.655)	Data Time 0.001 (0.026)	Loss 2.1388 (2.2221)	Entropy 0.77842 (0.78080)	Top-1 acc 69.922 (70.973)	Top-5 acc 90.625 (88.278)	lr 0.00211
Train [98][1640/3239]	Time 0.231 (0.654)	Data Time 0.001 (0.026)	Loss 2.1621 (2.2224)	Entropy 0.77842 (0.78079)	Top-1 acc 73.047 (70.959)	Top-5 acc 89.453 (88.273)	lr 0.00211
Train [98][1650/3239]	Time 0.252 (0.653)	Data Time 0.001 (0.026)	Loss 2.2293 (2.2227)	Entropy 0.77833 (0.78077)	Top-1 acc 71.484 (70.954)	Top-5 acc 87.500 (88.266)	lr 0.00210
Train [98][1660/3239]	Time 2.664 (0.652)	Data Time 0.001 (0.026)	Loss 2.1727 (2.2228)	Entropy 0.77833 (0.78076)	Top-1 acc 70.703 (70.947)	Top-5 acc 88.281 (88.263)	lr 0.00210
Train [98][1670/3239]	Time 0.330 (0.649)	Data Time 0.001 (0.026)	Loss 2.0896 (2.2223)	Entropy 0.77835 (0.78075)	Top-1 acc 73.047 (70.966)	Top-5 acc 90.234 (88.273)	lr 0.00210
Train [98][1680/3239]	Time 0.203 (0.648)	Data Time 0.001 (0.025)	Loss 2.3125 (2.2226)	Entropy 0.77833 (0.78073)	Top-1 acc 69.531 (70.959)	Top-5 acc 86.719 (88.268)	lr 0.00210
Train [98][1690/3239]	Time 0.237 (0.647)	Data Time 0.002 (0.025)	Loss 2.4294 (2.2225)	Entropy 0.77815 (0.78072)	Top-1 acc 65.234 (70.952)	Top-5 acc 85.938 (88.266)	lr 0.00210
Train [98][1700/3239]	Time 0.222 (0.646)	Data Time 0.001 (0.025)	Loss 2.2081 (2.2227)	Entropy 0.77817 (0.78070)	Top-1 acc 69.531 (70.947)	Top-5 acc 89.062 (88.268)	lr 0.00210
Train [98][1710/3239]	Time 0.303 (0.645)	Data Time 0.002 (0.025)	Loss 2.4387 (2.2227)	Entropy 0.77817 (0.78069)	Top-1 acc 65.625 (70.943)	Top-5 acc 85.156 (88.267)	lr 0.00210
Train [98][1720/3239]	Time 0.242 (0.644)	Data Time 0.001 (0.025)	Loss 2.2748 (2.2228)	Entropy 0.77810 (0.78067)	Top-1 acc 72.656 (70.943)	Top-5 acc 85.156 (88.264)	lr 0.00210
Train [98][1730/3239]	Time 0.244 (0.643)	Data Time 0.001 (0.025)	Loss 2.1245 (2.2229)	Entropy 0.77810 (0.78066)	Top-1 acc 73.438 (70.938)	Top-5 acc 89.453 (88.261)	lr 0.00210
Train [98][1740/3239]	Time 0.229 (0.642)	Data Time 0.001 (0.025)	Loss 2.2514 (2.2230)	Entropy 0.77809 (0.78064)	Top-1 acc 72.266 (70.937)	Top-5 acc 87.500 (88.258)	lr 0.00210
Train [98][1750/3239]	Time 0.230 (0.642)	Data Time 0.001 (0.024)	Loss 2.3398 (2.2228)	Entropy 0.77809 (0.78063)	Top-1 acc 68.750 (70.941)	Top-5 acc 86.328 (88.262)	lr 0.00210
Train [98][1760/3239]	Time 0.225 (0.641)	Data Time 0.001 (0.024)	Loss 2.0660 (2.2226)	Entropy 0.77807 (0.78061)	Top-1 acc 75.000 (70.949)	Top-5 acc 91.406 (88.264)	lr 0.00210
Train [98][1770/3239]	Time 2.523 (0.640)	Data Time 0.002 (0.024)	Loss 2.2702 (2.2230)	Entropy 0.77807 (0.78060)	Top-1 acc 71.094 (70.942)	Top-5 acc 87.891 (88.258)	lr 0.00210
Train [98][1780/3239]	Time 0.263 (0.638)	Data Time 0.002 (0.024)	Loss 2.1385 (2.2231)	Entropy 0.77803 (0.78058)	Top-1 acc 76.953 (70.940)	Top-5 acc 89.453 (88.256)	lr 0.00210
Train [98][1790/3239]	Time 0.231 (0.637)	Data Time 0.002 (0.024)	Loss 2.3145 (2.2229)	Entropy 0.77795 (0.78057)	Top-1 acc 69.922 (70.947)	Top-5 acc 86.719 (88.262)	lr 0.00210
Train [98][1800/3239]	Time 0.328 (0.636)	Data Time 0.001 (0.024)	Loss 2.2395 (2.2228)	Entropy 0.77795 (0.78055)	Top-1 acc 69.531 (70.952)	Top-5 acc 87.500 (88.261)	lr 0.00210
Train [98][1810/3239]	Time 0.241 (0.635)	Data Time 0.002 (0.024)	Loss 2.0902 (2.2228)	Entropy 0.77799 (0.78054)	Top-1 acc 73.438 (70.953)	Top-5 acc 90.234 (88.263)	lr 0.00210
Train [98][1820/3239]	Time 0.240 (0.634)	Data Time 0.001 (0.024)	Loss 2.1619 (2.2228)	Entropy 0.77798 (0.78053)	Top-1 acc 71.875 (70.949)	Top-5 acc 89.453 (88.262)	lr 0.00210
Train [98][1830/3239]	Time 0.222 (0.633)	Data Time 0.001 (0.023)	Loss 2.2578 (2.2225)	Entropy 0.77788 (0.78051)	Top-1 acc 68.750 (70.960)	Top-5 acc 85.938 (88.266)	lr 0.00209
Train [98][1840/3239]	Time 0.327 (0.632)	Data Time 0.001 (0.023)	Loss 2.2360 (2.2225)	Entropy 0.77787 (0.78050)	Top-1 acc 69.922 (70.958)	Top-5 acc 87.891 (88.266)	lr 0.00209
Train [98][1850/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.023)	Loss 2.2093 (2.2225)	Entropy 0.77785 (0.78048)	Top-1 acc 69.141 (70.961)	Top-5 acc 89.844 (88.272)	lr 0.00209
Train [98][1860/3239]	Time 0.231 (0.631)	Data Time 0.001 (0.023)	Loss 2.1508 (2.2225)	Entropy 0.77772 (0.78047)	Top-1 acc 73.438 (70.959)	Top-5 acc 88.672 (88.269)	lr 0.00209
Train [98][1870/3239]	Time 0.223 (0.630)	Data Time 0.001 (0.023)	Loss 2.1694 (2.2224)	Entropy 0.77771 (0.78045)	Top-1 acc 69.531 (70.961)	Top-5 acc 89.453 (88.273)	lr 0.00209
Train [98][1880/3239]	Time 2.572 (0.629)	Data Time 0.001 (0.023)	Loss 2.2169 (2.2222)	Entropy 0.77771 (0.78044)	Top-1 acc 72.266 (70.968)	Top-5 acc 90.234 (88.280)	lr 0.00209
Train [98][1890/3239]	Time 0.241 (0.627)	Data Time 0.001 (0.023)	Loss 2.1561 (2.2221)	Entropy 0.77771 (0.78043)	Top-1 acc 74.219 (70.974)	Top-5 acc 87.891 (88.279)	lr 0.00209
Train [98][1900/3239]	Time 0.235 (0.626)	Data Time 0.001 (0.023)	Loss 2.2140 (2.2220)	Entropy 0.77770 (0.78041)	Top-1 acc 69.141 (70.982)	Top-5 acc 89.844 (88.283)	lr 0.00209
Train [98][1910/3239]	Time 0.236 (0.625)	Data Time 0.001 (0.023)	Loss 2.3655 (2.2221)	Entropy 0.77766 (0.78040)	Top-1 acc 69.141 (70.981)	Top-5 acc 86.328 (88.284)	lr 0.00209
Train [98][1920/3239]	Time 0.241 (0.625)	Data Time 0.002 (0.022)	Loss 2.2858 (2.2222)	Entropy 0.77769 (0.78038)	Top-1 acc 68.750 (70.981)	Top-5 acc 87.500 (88.282)	lr 0.00209
Train [98][1930/3239]	Time 0.276 (0.650)	Data Time 0.003 (0.022)	Loss 2.1779 (2.2224)	Entropy 0.77771 (0.78037)	Top-1 acc 71.875 (70.982)	Top-5 acc 89.062 (88.277)	lr 0.00209
Train [98][1940/3239]	Time 0.249 (0.649)	Data Time 0.002 (0.022)	Loss 2.2605 (2.2225)	Entropy 0.77762 (0.78035)	Top-1 acc 70.312 (70.977)	Top-5 acc 85.938 (88.275)	lr 0.00209
Train [98][1950/3239]	Time 0.224 (0.648)	Data Time 0.002 (0.022)	Loss 2.1615 (2.2224)	Entropy 0.77758 (0.78034)	Top-1 acc 73.047 (70.976)	Top-5 acc 89.453 (88.272)	lr 0.00209
Train [98][1960/3239]	Time 0.239 (0.647)	Data Time 0.001 (0.022)	Loss 2.1175 (2.2223)	Entropy 0.77755 (0.78033)	Top-1 acc 77.344 (70.976)	Top-5 acc 89.844 (88.274)	lr 0.00209
Train [98][1970/3239]	Time 0.320 (0.647)	Data Time 0.001 (0.022)	Loss 2.3709 (2.2223)	Entropy 0.77753 (0.78031)	Top-1 acc 65.625 (70.977)	Top-5 acc 85.938 (88.277)	lr 0.00209
Train [98][1980/3239]	Time 0.272 (0.646)	Data Time 0.001 (0.022)	Loss 2.3276 (2.2227)	Entropy 0.77749 (0.78030)	Top-1 acc 67.188 (70.967)	Top-5 acc 88.281 (88.271)	lr 0.00209
Train [98][1990/3239]	Time 2.686 (0.645)	Data Time 0.001 (0.022)	Loss 2.2718 (2.2226)	Entropy 0.77749 (0.78028)	Top-1 acc 69.922 (70.965)	Top-5 acc 86.719 (88.271)	lr 0.00209
Train [98][2000/3239]	Time 0.250 (0.643)	Data Time 0.002 (0.022)	Loss 2.3300 (2.2225)	Entropy 0.77754 (0.78027)	Top-1 acc 68.359 (70.971)	Top-5 acc 85.938 (88.269)	lr 0.00209
Train [98][2010/3239]	Time 0.399 (0.642)	Data Time 0.001 (0.021)	Loss 2.2127 (2.2226)	Entropy 0.77752 (0.78026)	Top-1 acc 71.875 (70.970)	Top-5 acc 86.328 (88.266)	lr 0.00208
Train [98][2020/3239]	Time 0.271 (0.642)	Data Time 0.001 (0.021)	Loss 2.1070 (2.2225)	Entropy 0.77750 (0.78024)	Top-1 acc 74.609 (70.972)	Top-5 acc 91.406 (88.268)	lr 0.00208
Train [98][2030/3239]	Time 0.247 (0.641)	Data Time 0.001 (0.021)	Loss 2.5988 (2.2226)	Entropy 0.77742 (0.78023)	Top-1 acc 60.938 (70.973)	Top-5 acc 83.594 (88.268)	lr 0.00208
Train [98][2040/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.021)	Loss 2.2691 (2.2225)	Entropy 0.77738 (0.78022)	Top-1 acc 71.094 (70.976)	Top-5 acc 86.719 (88.270)	lr 0.00208
Train [98][2050/3239]	Time 0.336 (0.639)	Data Time 0.001 (0.021)	Loss 2.1479 (2.2226)	Entropy 0.77741 (0.78020)	Top-1 acc 72.266 (70.975)	Top-5 acc 89.844 (88.265)	lr 0.00208
Train [98][2060/3239]	Time 0.224 (0.639)	Data Time 0.001 (0.021)	Loss 2.2040 (2.2227)	Entropy 0.77741 (0.78019)	Top-1 acc 71.484 (70.974)	Top-5 acc 89.062 (88.262)	lr 0.00208
Train [98][2070/3239]	Time 0.228 (0.638)	Data Time 0.001 (0.021)	Loss 2.2487 (2.2229)	Entropy 0.77746 (0.78017)	Top-1 acc 67.969 (70.970)	Top-5 acc 87.500 (88.256)	lr 0.00208
Train [98][2080/3239]	Time 0.234 (0.637)	Data Time 0.001 (0.021)	Loss 2.3684 (2.2228)	Entropy 0.77730 (0.78016)	Top-1 acc 65.234 (70.972)	Top-5 acc 85.547 (88.257)	lr 0.00208
Train [98][2090/3239]	Time 0.335 (0.636)	Data Time 0.001 (0.021)	Loss 2.2691 (2.2229)	Entropy 0.77728 (0.78015)	Top-1 acc 72.266 (70.973)	Top-5 acc 85.547 (88.257)	lr 0.00208
Train [98][2100/3239]	Time 2.548 (0.636)	Data Time 0.001 (0.021)	Loss 2.2092 (2.2229)	Entropy 0.77728 (0.78013)	Top-1 acc 69.141 (70.974)	Top-5 acc 88.281 (88.256)	lr 0.00208
Train [98][2110/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.021)	Loss 2.2170 (2.2228)	Entropy 0.77723 (0.78012)	Top-1 acc 71.875 (70.979)	Top-5 acc 87.891 (88.257)	lr 0.00208
Train [98][2120/3239]	Time 0.224 (0.633)	Data Time 0.001 (0.020)	Loss 2.2276 (2.2228)	Entropy 0.77718 (0.78011)	Top-1 acc 72.266 (70.978)	Top-5 acc 86.719 (88.255)	lr 0.00208
Train [98][2130/3239]	Time 0.223 (0.632)	Data Time 0.001 (0.020)	Loss 2.1028 (2.2228)	Entropy 0.77707 (0.78009)	Top-1 acc 71.484 (70.978)	Top-5 acc 92.578 (88.261)	lr 0.00208
Train [98][2140/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.020)	Loss 2.1500 (2.2226)	Entropy 0.77710 (0.78008)	Top-1 acc 72.656 (70.982)	Top-5 acc 89.844 (88.267)	lr 0.00208
Train [98][2150/3239]	Time 0.214 (0.631)	Data Time 0.001 (0.020)	Loss 2.2419 (2.2223)	Entropy 0.77705 (0.78006)	Top-1 acc 71.094 (70.993)	Top-5 acc 88.281 (88.276)	lr 0.00208
Train [98][2160/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.020)	Loss 2.1038 (2.2224)	Entropy 0.77699 (0.78005)	Top-1 acc 75.000 (70.994)	Top-5 acc 90.625 (88.274)	lr 0.00208
Train [98][2170/3239]	Time 0.224 (0.629)	Data Time 0.001 (0.020)	Loss 2.1957 (2.2223)	Entropy 0.77693 (0.78004)	Top-1 acc 71.484 (70.998)	Top-5 acc 86.719 (88.273)	lr 0.00208
Train [98][2180/3239]	Time 0.326 (0.629)	Data Time 0.001 (0.020)	Loss 2.1698 (2.2220)	Entropy 0.77680 (0.78002)	Top-1 acc 69.531 (71.002)	Top-5 acc 87.891 (88.279)	lr 0.00208
Train [98][2190/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.020)	Loss 2.2586 (2.2221)	Entropy 0.77678 (0.78001)	Top-1 acc 69.141 (71.003)	Top-5 acc 85.938 (88.275)	lr 0.00207
Train [98][2200/3239]	Time 0.262 (0.627)	Data Time 0.001 (0.020)	Loss 2.1490 (2.2220)	Entropy 0.77674 (0.77999)	Top-1 acc 75.000 (71.008)	Top-5 acc 88.672 (88.274)	lr 0.00207
Train [98][2210/3239]	Time 2.712 (0.627)	Data Time 0.001 (0.020)	Loss 2.4217 (2.2222)	Entropy 0.77674 (0.77998)	Top-1 acc 64.844 (71.004)	Top-5 acc 83.984 (88.272)	lr 0.00207
Train [98][2220/3239]	Time 0.269 (0.625)	Data Time 0.001 (0.020)	Loss 2.2221 (2.2223)	Entropy 0.77671 (0.77996)	Top-1 acc 71.875 (71.001)	Top-5 acc 87.891 (88.270)	lr 0.00207
Train [98][2230/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.020)	Loss 2.2005 (2.2222)	Entropy 0.77674 (0.77995)	Top-1 acc 71.094 (71.004)	Top-5 acc 88.281 (88.272)	lr 0.00207
Train [98][2240/3239]	Time 0.253 (0.624)	Data Time 0.001 (0.019)	Loss 2.2020 (2.2222)	Entropy 0.77676 (0.77993)	Top-1 acc 71.094 (70.999)	Top-5 acc 87.891 (88.271)	lr 0.00207
Train [98][2250/3239]	Time 0.222 (0.623)	Data Time 0.001 (0.019)	Loss 2.0274 (2.2221)	Entropy 0.77674 (0.77992)	Top-1 acc 77.344 (71.001)	Top-5 acc 91.016 (88.273)	lr 0.00207
Train [98][2260/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.019)	Loss 2.3894 (2.2222)	Entropy 0.77670 (0.77991)	Top-1 acc 68.359 (71.002)	Top-5 acc 84.375 (88.272)	lr 0.00207
Train [98][2270/3239]	Time 0.219 (0.622)	Data Time 0.001 (0.019)	Loss 2.2525 (2.2224)	Entropy 0.77670 (0.77989)	Top-1 acc 69.141 (70.998)	Top-5 acc 87.500 (88.269)	lr 0.00207
Train [98][2280/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.019)	Loss 2.2509 (2.2224)	Entropy 0.77664 (0.77988)	Top-1 acc 70.312 (70.996)	Top-5 acc 87.500 (88.269)	lr 0.00207
Train [98][2290/3239]	Time 0.230 (0.645)	Data Time 0.002 (0.019)	Loss 2.2643 (2.2225)	Entropy 0.77659 (0.77986)	Top-1 acc 71.484 (70.994)	Top-5 acc 86.719 (88.267)	lr 0.00207
Train [98][2300/3239]	Time 0.260 (0.644)	Data Time 0.002 (0.019)	Loss 2.3373 (2.2224)	Entropy 0.77652 (0.77985)	Top-1 acc 69.922 (70.997)	Top-5 acc 85.547 (88.270)	lr 0.00207
Train [98][2310/3239]	Time 0.232 (0.643)	Data Time 0.002 (0.019)	Loss 2.1912 (2.2224)	Entropy 0.77645 (0.77983)	Top-1 acc 66.797 (70.994)	Top-5 acc 89.453 (88.271)	lr 0.00207
Train [98][2320/3239]	Time 2.553 (0.643)	Data Time 0.001 (0.019)	Loss 2.5414 (2.2226)	Entropy 0.77645 (0.77982)	Top-1 acc 65.234 (70.989)	Top-5 acc 83.984 (88.268)	lr 0.00207
Train [98][2330/3239]	Time 0.226 (0.641)	Data Time 0.002 (0.019)	Loss 2.2270 (2.2228)	Entropy 0.77636 (0.77981)	Top-1 acc 72.656 (70.987)	Top-5 acc 87.109 (88.264)	lr 0.00207
Train [98][2340/3239]	Time 0.236 (0.640)	Data Time 0.001 (0.019)	Loss 2.1450 (2.2229)	Entropy 0.77633 (0.77979)	Top-1 acc 72.656 (70.986)	Top-5 acc 91.016 (88.262)	lr 0.00207
Train [98][2350/3239]	Time 0.398 (0.640)	Data Time 0.002 (0.019)	Loss 2.1849 (2.2229)	Entropy 0.77631 (0.77978)	Top-1 acc 72.656 (70.987)	Top-5 acc 90.625 (88.263)	lr 0.00207
Train [98][2360/3239]	Time 0.229 (0.639)	Data Time 0.001 (0.019)	Loss 2.1337 (2.2229)	Entropy 0.77626 (0.77976)	Top-1 acc 74.219 (70.986)	Top-5 acc 90.625 (88.263)	lr 0.00207
Train [98][2370/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.018)	Loss 2.2513 (2.2228)	Entropy 0.77629 (0.77975)	Top-1 acc 71.094 (70.992)	Top-5 acc 88.672 (88.265)	lr 0.00206
Train [98][2380/3239]	Time 0.260 (0.638)	Data Time 0.002 (0.018)	Loss 2.3580 (2.2229)	Entropy 0.77625 (0.77973)	Top-1 acc 67.578 (70.990)	Top-5 acc 85.547 (88.264)	lr 0.00206
Train [98][2390/3239]	Time 0.254 (0.637)	Data Time 0.001 (0.018)	Loss 2.2308 (2.2230)	Entropy 0.77621 (0.77972)	Top-1 acc 67.188 (70.986)	Top-5 acc 91.406 (88.262)	lr 0.00206
Train [98][2400/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.018)	Loss 2.0799 (2.2230)	Entropy 0.77616 (0.77970)	Top-1 acc 75.000 (70.984)	Top-5 acc 91.797 (88.263)	lr 0.00206
Train [98][2410/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.018)	Loss 2.1107 (2.2230)	Entropy 0.77616 (0.77969)	Top-1 acc 73.047 (70.986)	Top-5 acc 88.672 (88.263)	lr 0.00206
Train [98][2420/3239]	Time 0.278 (0.635)	Data Time 0.001 (0.018)	Loss 2.3538 (2.2229)	Entropy 0.77627 (0.77967)	Top-1 acc 66.016 (70.986)	Top-5 acc 86.719 (88.266)	lr 0.00206
Train [98][2430/3239]	Time 2.606 (0.635)	Data Time 0.001 (0.018)	Loss 2.2669 (2.2232)	Entropy 0.77627 (0.77966)	Top-1 acc 72.266 (70.981)	Top-5 acc 88.281 (88.262)	lr 0.00206
Train [98][2440/3239]	Time 0.247 (0.633)	Data Time 0.002 (0.018)	Loss 2.1425 (2.2232)	Entropy 0.77619 (0.77964)	Top-1 acc 73.828 (70.979)	Top-5 acc 89.453 (88.260)	lr 0.00206
Train [98][2450/3239]	Time 0.239 (0.632)	Data Time 0.001 (0.018)	Loss 2.1936 (2.2232)	Entropy 0.77617 (0.77963)	Top-1 acc 71.875 (70.980)	Top-5 acc 89.453 (88.260)	lr 0.00206
Train [98][2460/3239]	Time 0.244 (0.632)	Data Time 0.001 (0.018)	Loss 2.1913 (2.2232)	Entropy 0.77618 (0.77962)	Top-1 acc 72.266 (70.980)	Top-5 acc 87.891 (88.261)	lr 0.00206
Train [98][2470/3239]	Time 0.266 (0.631)	Data Time 0.002 (0.018)	Loss 2.0799 (2.2230)	Entropy 0.77606 (0.77960)	Top-1 acc 73.438 (70.985)	Top-5 acc 90.625 (88.261)	lr 0.00206
Train [98][2480/3239]	Time 0.346 (0.631)	Data Time 0.001 (0.018)	Loss 2.2761 (2.2231)	Entropy 0.77605 (0.77959)	Top-1 acc 69.531 (70.984)	Top-5 acc 86.719 (88.259)	lr 0.00206
Train [98][2490/3239]	Time 0.226 (0.630)	Data Time 0.001 (0.018)	Loss 2.1643 (2.2232)	Entropy 0.77603 (0.77957)	Top-1 acc 67.969 (70.979)	Top-5 acc 90.234 (88.259)	lr 0.00206
Train [98][2500/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.018)	Loss 2.0310 (2.2230)	Entropy 0.77590 (0.77956)	Top-1 acc 76.172 (70.982)	Top-5 acc 92.578 (88.263)	lr 0.00206
Train [98][2510/3239]	Time 0.237 (0.629)	Data Time 0.002 (0.018)	Loss 2.2790 (2.2230)	Entropy 0.77591 (0.77955)	Top-1 acc 66.797 (70.980)	Top-5 acc 88.281 (88.263)	lr 0.00206
Train [98][2520/3239]	Time 0.320 (0.628)	Data Time 0.001 (0.017)	Loss 2.1269 (2.2228)	Entropy 0.77591 (0.77953)	Top-1 acc 72.266 (70.984)	Top-5 acc 90.625 (88.265)	lr 0.00206
Train [98][2530/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.017)	Loss 2.2086 (2.2229)	Entropy 0.77583 (0.77952)	Top-1 acc 69.922 (70.980)	Top-5 acc 86.719 (88.264)	lr 0.00206
Train [98][2540/3239]	Time 2.506 (0.627)	Data Time 0.001 (0.017)	Loss 2.1597 (2.2228)	Entropy 0.77583 (0.77950)	Top-1 acc 75.000 (70.983)	Top-5 acc 89.453 (88.268)	lr 0.00206
Train [98][2550/3239]	Time 0.242 (0.626)	Data Time 0.001 (0.017)	Loss 2.2817 (2.2227)	Entropy 0.77578 (0.77949)	Top-1 acc 70.312 (70.986)	Top-5 acc 89.062 (88.271)	lr 0.00205
Train [98][2560/3239]	Time 0.224 (0.625)	Data Time 0.001 (0.017)	Loss 2.1742 (2.2227)	Entropy 0.77579 (0.77947)	Top-1 acc 72.656 (70.984)	Top-5 acc 87.891 (88.274)	lr 0.00205
Train [98][2570/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.017)	Loss 2.1665 (2.2227)	Entropy 0.77584 (0.77946)	Top-1 acc 75.781 (70.990)	Top-5 acc 87.891 (88.274)	lr 0.00205
Train [98][2580/3239]	Time 0.223 (0.624)	Data Time 0.001 (0.017)	Loss 2.2182 (2.2226)	Entropy 0.77585 (0.77944)	Top-1 acc 68.359 (70.991)	Top-5 acc 88.281 (88.273)	lr 0.00205
Train [98][2590/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.017)	Loss 2.1910 (2.2226)	Entropy 0.77580 (0.77943)	Top-1 acc 73.438 (70.990)	Top-5 acc 89.062 (88.272)	lr 0.00205
Train [98][2600/3239]	Time 0.237 (0.623)	Data Time 0.002 (0.017)	Loss 2.1940 (2.2226)	Entropy 0.77578 (0.77942)	Top-1 acc 71.094 (70.992)	Top-5 acc 89.062 (88.271)	lr 0.00205
Train [98][2610/3239]	Time 0.231 (0.622)	Data Time 0.001 (0.017)	Loss 2.2782 (2.2225)	Entropy 0.77580 (0.77940)	Top-1 acc 69.141 (70.995)	Top-5 acc 85.547 (88.273)	lr 0.00205
Train [98][2620/3239]	Time 0.237 (0.622)	Data Time 0.001 (0.017)	Loss 2.1973 (2.2227)	Entropy 0.77572 (0.77939)	Top-1 acc 71.484 (70.994)	Top-5 acc 89.062 (88.268)	lr 0.00205
Train [98][2630/3239]	Time 0.235 (0.621)	Data Time 0.001 (0.017)	Loss 2.2388 (2.2227)	Entropy 0.77573 (0.77937)	Top-1 acc 71.875 (70.994)	Top-5 acc 87.500 (88.269)	lr 0.00205
Train [98][2640/3239]	Time 0.286 (0.621)	Data Time 0.001 (0.017)	Loss 2.2588 (2.2227)	Entropy 0.77563 (0.77936)	Top-1 acc 70.703 (70.993)	Top-5 acc 86.328 (88.265)	lr 0.00205
Train [98][2650/3239]	Time 0.489 (0.639)	Data Time 0.004 (0.017)	Loss 2.2392 (2.2226)	Entropy 0.77553 (0.77935)	Top-1 acc 73.047 (70.996)	Top-5 acc 86.719 (88.264)	lr 0.00205
Train [98][2660/3239]	Time 0.279 (0.639)	Data Time 0.002 (0.017)	Loss 2.2011 (2.2225)	Entropy 0.77553 (0.77933)	Top-1 acc 71.484 (71.001)	Top-5 acc 89.844 (88.268)	lr 0.00205
Train [98][2670/3239]	Time 0.229 (0.639)	Data Time 0.002 (0.017)	Loss 2.3584 (2.2225)	Entropy 0.77561 (0.77932)	Top-1 acc 67.578 (71.001)	Top-5 acc 85.547 (88.266)	lr 0.00205
Train [98][2680/3239]	Time 0.224 (0.638)	Data Time 0.001 (0.017)	Loss 2.1388 (2.2224)	Entropy 0.77565 (0.77930)	Top-1 acc 76.953 (71.004)	Top-5 acc 89.453 (88.269)	lr 0.00205
Train [98][2690/3239]	Time 0.316 (0.637)	Data Time 0.001 (0.016)	Loss 2.2840 (2.2223)	Entropy 0.77561 (0.77929)	Top-1 acc 68.750 (71.005)	Top-5 acc 87.109 (88.271)	lr 0.00205
Train [98][2700/3239]	Time 0.222 (0.637)	Data Time 0.001 (0.016)	Loss 2.1615 (2.2224)	Entropy 0.77565 (0.77928)	Top-1 acc 75.000 (71.004)	Top-5 acc 88.281 (88.267)	lr 0.00205
Train [98][2710/3239]	Time 0.254 (0.636)	Data Time 0.001 (0.016)	Loss 2.1916 (2.2225)	Entropy 0.77566 (0.77926)	Top-1 acc 73.047 (71.005)	Top-5 acc 89.844 (88.265)	lr 0.00205
Train [98][2720/3239]	Time 0.304 (0.636)	Data Time 0.001 (0.016)	Loss 2.3391 (2.2228)	Entropy 0.77562 (0.77925)	Top-1 acc 69.141 (70.999)	Top-5 acc 87.500 (88.258)	lr 0.00205
Train [98][2730/3239]	Time 0.303 (0.635)	Data Time 0.001 (0.016)	Loss 2.2906 (2.2229)	Entropy 0.77555 (0.77924)	Top-1 acc 66.797 (70.994)	Top-5 acc 86.328 (88.257)	lr 0.00204
Train [98][2740/3239]	Time 0.266 (0.635)	Data Time 0.004 (0.016)	Loss 2.3901 (2.2229)	Entropy 0.77556 (0.77922)	Top-1 acc 68.359 (70.993)	Top-5 acc 86.719 (88.258)	lr 0.00204
Train [98][2750/3239]	Time 0.239 (0.634)	Data Time 0.002 (0.016)	Loss 2.2766 (2.2230)	Entropy 0.77552 (0.77921)	Top-1 acc 67.578 (70.993)	Top-5 acc 85.938 (88.255)	lr 0.00204
Train [98][2760/3239]	Time 0.288 (0.634)	Data Time 0.001 (0.016)	Loss 2.2379 (2.2230)	Entropy 0.77548 (0.77920)	Top-1 acc 71.875 (70.993)	Top-5 acc 87.109 (88.254)	lr 0.00204
Train [98][2770/3239]	Time 0.253 (0.633)	Data Time 0.001 (0.016)	Loss 2.1735 (2.2232)	Entropy 0.77552 (0.77918)	Top-1 acc 73.828 (70.989)	Top-5 acc 87.891 (88.249)	lr 0.00204
Train [98][2780/3239]	Time 0.237 (0.633)	Data Time 0.004 (0.016)	Loss 2.2103 (2.2232)	Entropy 0.77545 (0.77917)	Top-1 acc 70.312 (70.989)	Top-5 acc 91.016 (88.250)	lr 0.00204
Train [98][2790/3239]	Time 0.254 (0.632)	Data Time 0.001 (0.016)	Loss 2.1416 (2.2233)	Entropy 0.77540 (0.77916)	Top-1 acc 72.656 (70.986)	Top-5 acc 89.062 (88.246)	lr 0.00204
Train [98][2800/3239]	Time 0.271 (0.632)	Data Time 0.001 (0.016)	Loss 2.4076 (2.2233)	Entropy 0.77548 (0.77914)	Top-1 acc 67.969 (70.990)	Top-5 acc 86.719 (88.249)	lr 0.00204
Train [98][2810/3239]	Time 0.215 (0.631)	Data Time 0.001 (0.016)	Loss 2.3689 (2.2234)	Entropy 0.77547 (0.77913)	Top-1 acc 66.797 (70.988)	Top-5 acc 85.156 (88.246)	lr 0.00204
Train [98][2820/3239]	Time 0.216 (0.630)	Data Time 0.001 (0.016)	Loss 2.2832 (2.2235)	Entropy 0.77546 (0.77912)	Top-1 acc 67.969 (70.986)	Top-5 acc 86.328 (88.243)	lr 0.00204
Train [98][2830/3239]	Time 0.252 (0.630)	Data Time 0.002 (0.016)	Loss 2.1196 (2.2237)	Entropy 0.77539 (0.77910)	Top-1 acc 74.609 (70.983)	Top-5 acc 87.891 (88.240)	lr 0.00204
Train [98][2840/3239]	Time 0.213 (0.629)	Data Time 0.001 (0.016)	Loss 2.1956 (2.2237)	Entropy 0.77532 (0.77909)	Top-1 acc 73.828 (70.984)	Top-5 acc 88.281 (88.238)	lr 0.00204
Train [98][2850/3239]	Time 0.245 (0.629)	Data Time 0.001 (0.016)	Loss 2.2850 (2.2238)	Entropy 0.77526 (0.77908)	Top-1 acc 71.484 (70.985)	Top-5 acc 85.938 (88.233)	lr 0.00204
Train [98][2860/3239]	Time 0.350 (0.628)	Data Time 0.001 (0.016)	Loss 2.2682 (2.2238)	Entropy 0.77520 (0.77907)	Top-1 acc 70.703 (70.985)	Top-5 acc 88.672 (88.233)	lr 0.00204
Train [98][2870/3239]	Time 0.298 (0.628)	Data Time 0.001 (0.016)	Loss 2.3074 (2.2237)	Entropy 0.77519 (0.77905)	Top-1 acc 70.312 (70.990)	Top-5 acc 85.547 (88.234)	lr 0.00204
Train [98][2880/3239]	Time 0.248 (0.627)	Data Time 0.016 (0.015)	Loss 2.1667 (2.2238)	Entropy 0.77516 (0.77904)	Top-1 acc 72.266 (70.989)	Top-5 acc 87.109 (88.232)	lr 0.00204
Train [98][2890/3239]	Time 0.255 (0.627)	Data Time 0.001 (0.015)	Loss 2.3063 (2.2238)	Entropy 0.77513 (0.77902)	Top-1 acc 67.969 (70.991)	Top-5 acc 85.938 (88.232)	lr 0.00204
Train [98][2900/3239]	Time 0.323 (0.626)	Data Time 0.001 (0.015)	Loss 2.2033 (2.2238)	Entropy 0.77511 (0.77901)	Top-1 acc 69.531 (70.990)	Top-5 acc 89.062 (88.232)	lr 0.00204
Train [98][2910/3239]	Time 0.219 (0.626)	Data Time 0.001 (0.015)	Loss 2.1762 (2.2238)	Entropy 0.77516 (0.77900)	Top-1 acc 69.141 (70.985)	Top-5 acc 89.844 (88.233)	lr 0.00203
Train [98][2920/3239]	Time 0.232 (0.625)	Data Time 0.001 (0.015)	Loss 2.2162 (2.2237)	Entropy 0.77512 (0.77898)	Top-1 acc 73.438 (70.986)	Top-5 acc 88.281 (88.233)	lr 0.00203
Train [98][2930/3239]	Time 0.222 (0.625)	Data Time 0.001 (0.015)	Loss 2.3750 (2.2238)	Entropy 0.77510 (0.77897)	Top-1 acc 68.750 (70.983)	Top-5 acc 86.328 (88.231)	lr 0.00203
Train [98][2940/3239]	Time 0.320 (0.624)	Data Time 0.001 (0.015)	Loss 2.3145 (2.2239)	Entropy 0.77515 (0.77896)	Top-1 acc 69.141 (70.984)	Top-5 acc 86.719 (88.229)	lr 0.00203
Train [98][2950/3239]	Time 0.214 (0.624)	Data Time 0.001 (0.015)	Loss 2.1489 (2.2238)	Entropy 0.77513 (0.77895)	Top-1 acc 72.656 (70.987)	Top-5 acc 88.281 (88.232)	lr 0.00203
Train [98][2960/3239]	Time 0.264 (0.623)	Data Time 0.002 (0.015)	Loss 2.1761 (2.2238)	Entropy 0.77522 (0.77893)	Top-1 acc 73.438 (70.987)	Top-5 acc 87.500 (88.232)	lr 0.00203
Train [98][2970/3239]	Time 0.231 (0.623)	Data Time 0.001 (0.015)	Loss 2.1828 (2.2237)	Entropy 0.77521 (0.77892)	Top-1 acc 72.266 (70.992)	Top-5 acc 90.625 (88.232)	lr 0.00203
Train [98][2980/3239]	Time 0.464 (0.639)	Data Time 0.004 (0.015)	Loss 2.1972 (2.2235)	Entropy 0.77520 (0.77891)	Top-1 acc 71.875 (70.997)	Top-5 acc 88.281 (88.236)	lr 0.00203
Train [98][2990/3239]	Time 0.229 (0.639)	Data Time 0.002 (0.015)	Loss 2.2612 (2.2234)	Entropy 0.77522 (0.77890)	Top-1 acc 70.312 (71.001)	Top-5 acc 88.281 (88.238)	lr 0.00203
Train [98][3000/3239]	Time 0.261 (0.638)	Data Time 0.002 (0.015)	Loss 2.0486 (2.2233)	Entropy 0.77521 (0.77888)	Top-1 acc 75.391 (71.007)	Top-5 acc 90.625 (88.241)	lr 0.00203
Train [98][3010/3239]	Time 0.218 (0.637)	Data Time 0.001 (0.015)	Loss 2.1296 (2.2232)	Entropy 0.77520 (0.77887)	Top-1 acc 72.656 (71.010)	Top-5 acc 90.234 (88.243)	lr 0.00203
Train [98][3020/3239]	Time 0.386 (0.637)	Data Time 0.002 (0.015)	Loss 2.2065 (2.2232)	Entropy 0.77520 (0.77886)	Top-1 acc 73.438 (71.010)	Top-5 acc 89.453 (88.242)	lr 0.00203
Train [98][3030/3239]	Time 0.251 (0.637)	Data Time 0.002 (0.015)	Loss 2.1087 (2.2231)	Entropy 0.77520 (0.77885)	Top-1 acc 75.000 (71.013)	Top-5 acc 91.016 (88.244)	lr 0.00203
Train [98][3040/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.015)	Loss 2.1649 (2.2232)	Entropy 0.77519 (0.77883)	Top-1 acc 71.094 (71.008)	Top-5 acc 87.109 (88.241)	lr 0.00203
Train [98][3050/3239]	Time 0.261 (0.636)	Data Time 0.001 (0.015)	Loss 2.3397 (2.2232)	Entropy 0.77520 (0.77882)	Top-1 acc 67.969 (71.003)	Top-5 acc 85.938 (88.240)	lr 0.00203
Train [98][3060/3239]	Time 0.333 (0.635)	Data Time 0.002 (0.015)	Loss 2.0839 (2.2233)	Entropy 0.77519 (0.77881)	Top-1 acc 73.438 (71.003)	Top-5 acc 93.750 (88.241)	lr 0.00203
Train [98][3070/3239]	Time 0.233 (0.635)	Data Time 0.001 (0.015)	Loss 2.1171 (2.2233)	Entropy 0.77506 (0.77880)	Top-1 acc 73.047 (71.003)	Top-5 acc 89.062 (88.239)	lr 0.00203
Train [98][3080/3239]	Time 0.230 (0.634)	Data Time 0.001 (0.015)	Loss 2.2059 (2.2232)	Entropy 0.77508 (0.77879)	Top-1 acc 69.922 (71.006)	Top-5 acc 88.281 (88.241)	lr 0.00203
Train [98][3090/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.015)	Loss 2.2583 (2.2233)	Entropy 0.77507 (0.77878)	Top-1 acc 70.703 (71.005)	Top-5 acc 88.281 (88.240)	lr 0.00202
Train [98][3100/3239]	Time 0.241 (0.633)	Data Time 0.001 (0.015)	Loss 2.0800 (2.2233)	Entropy 0.77507 (0.77876)	Top-1 acc 73.047 (71.004)	Top-5 acc 88.672 (88.240)	lr 0.00202
Train [98][3110/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.015)	Loss 2.1014 (2.2233)	Entropy 0.77503 (0.77875)	Top-1 acc 71.484 (71.006)	Top-5 acc 89.844 (88.239)	lr 0.00202
Train [98][3120/3239]	Time 0.259 (0.632)	Data Time 0.001 (0.014)	Loss 2.2602 (2.2231)	Entropy 0.77493 (0.77874)	Top-1 acc 69.141 (71.008)	Top-5 acc 86.719 (88.243)	lr 0.00202
Train [98][3130/3239]	Time 0.259 (0.632)	Data Time 0.001 (0.014)	Loss 2.3096 (2.2232)	Entropy 0.77494 (0.77873)	Top-1 acc 67.578 (71.008)	Top-5 acc 88.672 (88.243)	lr 0.00202
Train [98][3140/3239]	Time 0.243 (0.631)	Data Time 0.001 (0.014)	Loss 2.0608 (2.2231)	Entropy 0.77489 (0.77872)	Top-1 acc 73.828 (71.007)	Top-5 acc 91.797 (88.243)	lr 0.00202
Train [98][3150/3239]	Time 0.349 (0.631)	Data Time 0.001 (0.014)	Loss 2.2239 (2.2231)	Entropy 0.77488 (0.77870)	Top-1 acc 70.312 (71.011)	Top-5 acc 89.844 (88.243)	lr 0.00202
Train [98][3160/3239]	Time 0.267 (0.630)	Data Time 0.002 (0.014)	Loss 2.3135 (2.2230)	Entropy 0.77477 (0.77869)	Top-1 acc 68.750 (71.010)	Top-5 acc 85.938 (88.244)	lr 0.00202
Train [98][3170/3239]	Time 0.215 (0.630)	Data Time 0.001 (0.014)	Loss 2.0945 (2.2230)	Entropy 0.77460 (0.77868)	Top-1 acc 72.656 (71.009)	Top-5 acc 90.234 (88.244)	lr 0.00202
Train [98][3180/3239]	Time 0.226 (0.629)	Data Time 0.000 (0.014)	Loss 2.1550 (2.2227)	Entropy 0.77460 (0.77867)	Top-1 acc 73.828 (71.018)	Top-5 acc 89.453 (88.249)	lr 0.00202
Train [98][3190/3239]	Time 0.323 (0.629)	Data Time 0.000 (0.014)	Loss 2.1818 (2.2227)	Entropy 0.77454 (0.77865)	Top-1 acc 71.484 (71.016)	Top-5 acc 88.672 (88.249)	lr 0.00202
Train [98][3200/3239]	Time 0.232 (0.628)	Data Time 0.000 (0.014)	Loss 2.0913 (2.2228)	Entropy 0.77457 (0.77864)	Top-1 acc 77.734 (71.015)	Top-5 acc 91.016 (88.248)	lr 0.00202
Train [98][3210/3239]	Time 0.229 (0.628)	Data Time 0.000 (0.014)	Loss 2.0862 (2.2228)	Entropy 0.77456 (0.77863)	Top-1 acc 73.828 (71.013)	Top-5 acc 91.406 (88.249)	lr 0.00202
Train [98][3220/3239]	Time 0.234 (0.627)	Data Time 0.000 (0.014)	Loss 2.1357 (2.2230)	Entropy 0.77449 (0.77861)	Top-1 acc 72.656 (71.008)	Top-5 acc 91.797 (88.246)	lr 0.00202
Train [98][3230/3239]	Time 0.320 (0.627)	Data Time 0.000 (0.014)	Loss 2.1670 (2.2231)	Entropy 0.77446 (0.77860)	Top-1 acc 71.094 (71.001)	Top-5 acc 87.500 (88.244)	lr 0.00202
Train [98][3239/3239]	Time 2.404 (0.626)	Data Time 0.000 (0.014)	Loss 2.7670 (2.2233)	Entropy 0.77446 (0.77859)	Top-1 acc 59.259 (70.998)	Top-5 acc 77.778 (88.241)	lr 0.00202
==========Valid [98/120]	loss 1.228	top-1 acc 71.825 (71.855)	top-5 acc 89.599	Train top-1 70.998	top-5 88.241	Entropy 0.77446	Latency-None: 0.000ms	Flops: 546.53M
Train [99][0/3239]	Time 43.080 (43.080)	Data Time 40.563 (40.563)	Loss 2.2228 (2.2228)	Entropy 0.77443 (0.77443)	Top-1 acc 71.094 (71.094)	Top-5 acc 87.109 (87.109)	lr 0.00202
Train [99][10/3239]	Time 2.581 (4.412)	Data Time 0.002 (3.689)	Loss 2.1588 (2.2703)	Entropy 0.77443 (0.77443)	Top-1 acc 74.609 (69.105)	Top-5 acc 88.672 (87.749)	lr 0.00202
Train [99][20/3239]	Time 0.297 (2.440)	Data Time 0.001 (1.933)	Loss 2.2245 (2.2384)	Entropy 0.77443 (0.77443)	Top-1 acc 73.828 (70.126)	Top-5 acc 87.109 (88.263)	lr 0.00202
Train [99][30/3239]	Time 0.232 (1.810)	Data Time 0.001 (1.310)	Loss 2.1807 (2.2328)	Entropy 0.77436 (0.77441)	Top-1 acc 72.266 (70.665)	Top-5 acc 89.453 (88.105)	lr 0.00201
Train [99][40/3239]	Time 0.242 (1.497)	Data Time 0.001 (0.992)	Loss 2.3133 (2.2319)	Entropy 0.77437 (0.77440)	Top-1 acc 70.312 (70.770)	Top-5 acc 85.547 (88.205)	lr 0.00201
Train [99][50/3239]	Time 0.230 (1.294)	Data Time 0.001 (0.798)	Loss 2.2753 (2.2272)	Entropy 0.77436 (0.77439)	Top-1 acc 74.219 (70.933)	Top-5 acc 88.281 (88.327)	lr 0.00201
Train [99][60/3239]	Time 0.235 (1.163)	Data Time 0.002 (0.667)	Loss 2.3207 (2.2249)	Entropy 0.77433 (0.77438)	Top-1 acc 71.875 (71.049)	Top-5 acc 83.594 (88.204)	lr 0.00201
Train [99][70/3239]	Time 0.240 (1.068)	Data Time 0.001 (0.574)	Loss 2.1048 (2.2250)	Entropy 0.77428 (0.77437)	Top-1 acc 74.609 (71.077)	Top-5 acc 91.016 (88.149)	lr 0.00201
Train [99][80/3239]	Time 0.322 (1.706)	Data Time 0.003 (0.503)	Loss 2.1950 (2.2235)	Entropy 0.77419 (0.77436)	Top-1 acc 71.094 (71.200)	Top-5 acc 86.719 (88.194)	lr 0.00201
Train [99][90/3239]	Time 0.221 (1.572)	Data Time 0.002 (0.448)	Loss 2.1859 (2.2244)	Entropy 0.77416 (0.77434)	Top-1 acc 71.875 (71.278)	Top-5 acc 89.062 (88.152)	lr 0.00201
Train [99][100/3239]	Time 0.224 (1.465)	Data Time 0.001 (0.404)	Loss 2.1998 (2.2239)	Entropy 0.77413 (0.77432)	Top-1 acc 74.219 (71.276)	Top-5 acc 87.500 (88.165)	lr 0.00201
Train [99][110/3239]	Time 0.232 (1.376)	Data Time 0.001 (0.368)	Loss 2.2391 (2.2251)	Entropy 0.77414 (0.77430)	Top-1 acc 72.266 (71.270)	Top-5 acc 88.281 (88.144)	lr 0.00201
Train [99][120/3239]	Time 2.815 (1.304)	Data Time 0.002 (0.337)	Loss 2.1860 (2.2229)	Entropy 0.77414 (0.77429)	Top-1 acc 71.094 (71.287)	Top-5 acc 89.062 (88.204)	lr 0.00201
Train [99][130/3239]	Time 0.253 (1.223)	Data Time 0.001 (0.312)	Loss 2.1714 (2.2216)	Entropy 0.77409 (0.77427)	Top-1 acc 69.531 (71.267)	Top-5 acc 88.281 (88.237)	lr 0.00201
Train [99][140/3239]	Time 0.236 (1.170)	Data Time 0.001 (0.290)	Loss 2.2844 (2.2191)	Entropy 0.77410 (0.77426)	Top-1 acc 69.141 (71.340)	Top-5 acc 86.719 (88.273)	lr 0.00201
Train [99][150/3239]	Time 0.232 (1.125)	Data Time 0.001 (0.271)	Loss 2.2348 (2.2214)	Entropy 0.77404 (0.77425)	Top-1 acc 73.438 (71.275)	Top-5 acc 88.281 (88.242)	lr 0.00201
Train [99][160/3239]	Time 0.283 (1.085)	Data Time 0.001 (0.254)	Loss 2.1118 (2.2199)	Entropy 0.77397 (0.77423)	Top-1 acc 75.000 (71.346)	Top-5 acc 91.797 (88.223)	lr 0.00201
Train [99][170/3239]	Time 0.234 (1.049)	Data Time 0.001 (0.239)	Loss 2.2490 (2.2204)	Entropy 0.77398 (0.77422)	Top-1 acc 71.484 (71.409)	Top-5 acc 90.625 (88.222)	lr 0.00201
Train [99][180/3239]	Time 0.240 (1.018)	Data Time 0.001 (0.226)	Loss 2.1126 (2.2177)	Entropy 0.77390 (0.77420)	Top-1 acc 73.438 (71.452)	Top-5 acc 91.406 (88.281)	lr 0.00201
Train [99][190/3239]	Time 0.284 (0.990)	Data Time 0.001 (0.214)	Loss 2.2560 (2.2198)	Entropy 0.77382 (0.77418)	Top-1 acc 74.219 (71.448)	Top-5 acc 87.109 (88.201)	lr 0.00201
Train [99][200/3239]	Time 0.237 (0.965)	Data Time 0.002 (0.204)	Loss 2.1860 (2.2194)	Entropy 0.77385 (0.77417)	Top-1 acc 73.438 (71.420)	Top-5 acc 89.453 (88.227)	lr 0.00201
Train [99][210/3239]	Time 0.245 (0.944)	Data Time 0.001 (0.194)	Loss 2.0826 (2.2165)	Entropy 0.77390 (0.77415)	Top-1 acc 75.000 (71.460)	Top-5 acc 89.844 (88.298)	lr 0.00201
Train [99][220/3239]	Time 0.249 (0.923)	Data Time 0.002 (0.185)	Loss 2.1417 (2.2155)	Entropy 0.77377 (0.77414)	Top-1 acc 75.781 (71.488)	Top-5 acc 87.891 (88.295)	lr 0.00200
Train [99][230/3239]	Time 2.476 (0.904)	Data Time 0.002 (0.178)	Loss 2.1923 (2.2167)	Entropy 0.77377 (0.77412)	Top-1 acc 71.875 (71.454)	Top-5 acc 88.281 (88.285)	lr 0.00200
Train [99][240/3239]	Time 0.236 (0.876)	Data Time 0.001 (0.170)	Loss 2.4027 (2.2177)	Entropy 0.77374 (0.77411)	Top-1 acc 65.625 (71.410)	Top-5 acc 85.938 (88.263)	lr 0.00200
Train [99][250/3239]	Time 0.232 (0.861)	Data Time 0.001 (0.163)	Loss 2.2361 (2.2176)	Entropy 0.77374 (0.77409)	Top-1 acc 69.531 (71.377)	Top-5 acc 86.328 (88.253)	lr 0.00200
Train [99][260/3239]	Time 0.242 (0.847)	Data Time 0.001 (0.157)	Loss 2.4891 (2.2171)	Entropy 0.77368 (0.77408)	Top-1 acc 63.672 (71.360)	Top-5 acc 84.375 (88.268)	lr 0.00200
Train [99][270/3239]	Time 0.244 (0.833)	Data Time 0.002 (0.152)	Loss 2.1316 (2.2165)	Entropy 0.77369 (0.77406)	Top-1 acc 73.828 (71.368)	Top-5 acc 91.016 (88.274)	lr 0.00200
Train [99][280/3239]	Time 0.226 (0.821)	Data Time 0.001 (0.146)	Loss 2.3519 (2.2170)	Entropy 0.77362 (0.77405)	Top-1 acc 65.625 (71.365)	Top-5 acc 86.719 (88.265)	lr 0.00200
Train [99][290/3239]	Time 0.324 (0.810)	Data Time 0.001 (0.141)	Loss 2.4260 (2.2175)	Entropy 0.77341 (0.77403)	Top-1 acc 68.359 (71.400)	Top-5 acc 83.203 (88.238)	lr 0.00200
Train [99][300/3239]	Time 0.213 (0.799)	Data Time 0.001 (0.137)	Loss 2.1617 (2.2176)	Entropy 0.77325 (0.77401)	Top-1 acc 70.703 (71.405)	Top-5 acc 89.844 (88.235)	lr 0.00200
Train [99][310/3239]	Time 0.223 (0.788)	Data Time 0.001 (0.132)	Loss 2.0301 (2.2162)	Entropy 0.77328 (0.77398)	Top-1 acc 76.953 (71.437)	Top-5 acc 90.625 (88.260)	lr 0.00200
Train [99][320/3239]	Time 0.219 (0.779)	Data Time 0.001 (0.128)	Loss 2.1781 (2.2172)	Entropy 0.77324 (0.77396)	Top-1 acc 73.828 (71.397)	Top-5 acc 87.109 (88.245)	lr 0.00200
Train [99][330/3239]	Time 0.353 (0.770)	Data Time 0.002 (0.124)	Loss 2.2190 (2.2173)	Entropy 0.77309 (0.77394)	Top-1 acc 66.797 (71.366)	Top-5 acc 87.109 (88.236)	lr 0.00200
Train [99][340/3239]	Time 2.587 (0.761)	Data Time 0.001 (0.121)	Loss 2.2456 (2.2154)	Entropy 0.77309 (0.77391)	Top-1 acc 67.969 (71.426)	Top-5 acc 87.891 (88.276)	lr 0.00200
Train [99][350/3239]	Time 0.243 (0.747)	Data Time 0.001 (0.117)	Loss 2.3723 (2.2154)	Entropy 0.77312 (0.77389)	Top-1 acc 67.188 (71.440)	Top-5 acc 86.719 (88.273)	lr 0.00200
Train [99][360/3239]	Time 0.259 (0.740)	Data Time 0.001 (0.114)	Loss 2.1157 (2.2147)	Entropy 0.77307 (0.77387)	Top-1 acc 71.484 (71.428)	Top-5 acc 89.844 (88.278)	lr 0.00200
Train [99][370/3239]	Time 0.258 (0.733)	Data Time 0.001 (0.111)	Loss 2.1842 (2.2153)	Entropy 0.77304 (0.77385)	Top-1 acc 72.266 (71.398)	Top-5 acc 87.500 (88.279)	lr 0.00200
Train [99][380/3239]	Time 0.235 (0.726)	Data Time 0.002 (0.108)	Loss 2.1730 (2.2155)	Entropy 0.77301 (0.77382)	Top-1 acc 75.391 (71.412)	Top-5 acc 86.719 (88.287)	lr 0.00200
Train [99][390/3239]	Time 0.267 (0.720)	Data Time 0.001 (0.105)	Loss 2.2007 (2.2141)	Entropy 0.77290 (0.77380)	Top-1 acc 70.312 (71.434)	Top-5 acc 88.281 (88.322)	lr 0.00200
Train [99][400/3239]	Time 0.244 (0.715)	Data Time 0.001 (0.103)	Loss 2.2121 (2.2144)	Entropy 0.77285 (0.77378)	Top-1 acc 71.094 (71.426)	Top-5 acc 87.891 (88.308)	lr 0.00199
Train [99][410/3239]	Time 0.231 (0.709)	Data Time 0.001 (0.100)	Loss 2.0897 (2.2142)	Entropy 0.77281 (0.77376)	Top-1 acc 73.828 (71.424)	Top-5 acc 88.281 (88.296)	lr 0.00199
Train [99][420/3239]	Time 0.238 (0.704)	Data Time 0.001 (0.098)	Loss 2.0843 (2.2137)	Entropy 0.77274 (0.77373)	Top-1 acc 75.000 (71.451)	Top-5 acc 90.234 (88.310)	lr 0.00199
Train [99][430/3239]	Time 0.260 (0.699)	Data Time 0.001 (0.096)	Loss 2.2101 (2.2133)	Entropy 0.77272 (0.77371)	Top-1 acc 71.875 (71.454)	Top-5 acc 88.281 (88.319)	lr 0.00199
Train [99][440/3239]	Time 0.267 (0.816)	Data Time 0.004 (0.094)	Loss 2.1295 (2.2130)	Entropy 0.77260 (0.77369)	Top-1 acc 73.438 (71.468)	Top-5 acc 90.234 (88.323)	lr 0.00199
Train [99][450/3239]	Time 2.701 (0.809)	Data Time 0.002 (0.092)	Loss 2.2155 (2.2132)	Entropy 0.77260 (0.77366)	Top-1 acc 71.875 (71.483)	Top-5 acc 88.281 (88.317)	lr 0.00199
Train [99][460/3239]	Time 0.237 (0.797)	Data Time 0.002 (0.090)	Loss 2.2400 (2.2131)	Entropy 0.77254 (0.77364)	Top-1 acc 71.875 (71.469)	Top-5 acc 88.672 (88.322)	lr 0.00199
Train [99][470/3239]	Time 0.234 (0.790)	Data Time 0.001 (0.088)	Loss 2.3135 (2.2139)	Entropy 0.77253 (0.77362)	Top-1 acc 70.312 (71.450)	Top-5 acc 85.938 (88.305)	lr 0.00199
Train [99][480/3239]	Time 0.232 (0.784)	Data Time 0.001 (0.086)	Loss 2.2462 (2.2137)	Entropy 0.77252 (0.77359)	Top-1 acc 70.703 (71.454)	Top-5 acc 86.719 (88.311)	lr 0.00199
Train [99][490/3239]	Time 0.238 (0.777)	Data Time 0.002 (0.084)	Loss 2.0637 (2.2136)	Entropy 0.77252 (0.77357)	Top-1 acc 77.734 (71.464)	Top-5 acc 89.844 (88.284)	lr 0.00199
Train [99][500/3239]	Time 0.228 (0.772)	Data Time 0.001 (0.083)	Loss 2.2411 (2.2140)	Entropy 0.77242 (0.77355)	Top-1 acc 70.312 (71.458)	Top-5 acc 87.500 (88.274)	lr 0.00199
Train [99][510/3239]	Time 0.248 (0.766)	Data Time 0.001 (0.081)	Loss 2.1528 (2.2138)	Entropy 0.77242 (0.77353)	Top-1 acc 71.484 (71.474)	Top-5 acc 89.844 (88.279)	lr 0.00199
Train [99][520/3239]	Time 0.228 (0.760)	Data Time 0.001 (0.080)	Loss 2.1480 (2.2140)	Entropy 0.77244 (0.77351)	Top-1 acc 73.047 (71.472)	Top-5 acc 89.844 (88.271)	lr 0.00199
Train [99][530/3239]	Time 0.223 (0.755)	Data Time 0.001 (0.078)	Loss 2.2854 (2.2140)	Entropy 0.77244 (0.77349)	Top-1 acc 67.578 (71.453)	Top-5 acc 86.719 (88.282)	lr 0.00199
Train [99][540/3239]	Time 0.325 (0.750)	Data Time 0.001 (0.077)	Loss 2.2346 (2.2139)	Entropy 0.77243 (0.77347)	Top-1 acc 69.922 (71.448)	Top-5 acc 89.453 (88.302)	lr 0.00199
Train [99][550/3239]	Time 0.234 (0.745)	Data Time 0.001 (0.075)	Loss 2.1065 (2.2137)	Entropy 0.77239 (0.77345)	Top-1 acc 74.219 (71.452)	Top-5 acc 90.234 (88.294)	lr 0.00199
Train [99][560/3239]	Time 2.566 (0.740)	Data Time 0.001 (0.074)	Loss 2.2566 (2.2137)	Entropy 0.77239 (0.77343)	Top-1 acc 65.625 (71.442)	Top-5 acc 89.453 (88.301)	lr 0.00199
Train [99][570/3239]	Time 0.246 (0.732)	Data Time 0.001 (0.073)	Loss 2.1281 (2.2138)	Entropy 0.77237 (0.77341)	Top-1 acc 75.391 (71.441)	Top-5 acc 89.062 (88.303)	lr 0.00199
Train [99][580/3239]	Time 0.334 (0.727)	Data Time 0.001 (0.072)	Loss 2.1692 (2.2141)	Entropy 0.77234 (0.77339)	Top-1 acc 73.828 (71.442)	Top-5 acc 87.891 (88.297)	lr 0.00198
Train [99][590/3239]	Time 0.241 (0.723)	Data Time 0.001 (0.070)	Loss 2.3784 (2.2141)	Entropy 0.77232 (0.77337)	Top-1 acc 65.625 (71.443)	Top-5 acc 84.766 (88.298)	lr 0.00198
Train [99][600/3239]	Time 0.226 (0.719)	Data Time 0.001 (0.069)	Loss 2.2430 (2.2144)	Entropy 0.77230 (0.77336)	Top-1 acc 67.578 (71.436)	Top-5 acc 88.281 (88.303)	lr 0.00198
Train [99][610/3239]	Time 0.226 (0.715)	Data Time 0.001 (0.068)	Loss 2.1884 (2.2144)	Entropy 0.77235 (0.77334)	Top-1 acc 74.219 (71.442)	Top-5 acc 88.281 (88.293)	lr 0.00198
Train [99][620/3239]	Time 0.230 (0.711)	Data Time 0.001 (0.067)	Loss 2.6341 (2.2149)	Entropy 0.77239 (0.77332)	Top-1 acc 62.109 (71.430)	Top-5 acc 82.031 (88.289)	lr 0.00198
Train [99][630/3239]	Time 0.243 (0.708)	Data Time 0.001 (0.066)	Loss 2.2434 (2.2149)	Entropy 0.77230 (0.77331)	Top-1 acc 71.094 (71.414)	Top-5 acc 87.891 (88.302)	lr 0.00198
Train [99][640/3239]	Time 0.225 (0.705)	Data Time 0.001 (0.065)	Loss 2.0945 (2.2146)	Entropy 0.77226 (0.77329)	Top-1 acc 73.828 (71.416)	Top-5 acc 90.234 (88.303)	lr 0.00198
Train [99][650/3239]	Time 0.233 (0.701)	Data Time 0.001 (0.064)	Loss 2.2197 (2.2147)	Entropy 0.77226 (0.77328)	Top-1 acc 70.703 (71.420)	Top-5 acc 89.453 (88.315)	lr 0.00198
Train [99][660/3239]	Time 0.249 (0.698)	Data Time 0.001 (0.063)	Loss 2.2315 (2.2148)	Entropy 0.77221 (0.77326)	Top-1 acc 71.875 (71.415)	Top-5 acc 88.281 (88.306)	lr 0.00198
Train [99][670/3239]	Time 2.655 (0.695)	Data Time 0.001 (0.062)	Loss 2.1638 (2.2145)	Entropy 0.77221 (0.77325)	Top-1 acc 71.484 (71.411)	Top-5 acc 89.453 (88.318)	lr 0.00198
Train [99][680/3239]	Time 0.243 (0.689)	Data Time 0.001 (0.061)	Loss 2.2201 (2.2148)	Entropy 0.77216 (0.77323)	Top-1 acc 69.922 (71.395)	Top-5 acc 88.281 (88.307)	lr 0.00198
Train [99][690/3239]	Time 0.242 (0.686)	Data Time 0.002 (0.060)	Loss 2.2742 (2.2148)	Entropy 0.77221 (0.77321)	Top-1 acc 69.922 (71.379)	Top-5 acc 84.375 (88.306)	lr 0.00198
Train [99][700/3239]	Time 0.234 (0.683)	Data Time 0.001 (0.060)	Loss 2.2986 (2.2151)	Entropy 0.77214 (0.77320)	Top-1 acc 67.969 (71.365)	Top-5 acc 85.156 (88.304)	lr 0.00198
Train [99][710/3239]	Time 0.347 (0.680)	Data Time 0.001 (0.059)	Loss 2.2412 (2.2150)	Entropy 0.77208 (0.77318)	Top-1 acc 69.922 (71.366)	Top-5 acc 87.500 (88.297)	lr 0.00198
Train [99][720/3239]	Time 0.237 (0.677)	Data Time 0.002 (0.058)	Loss 2.1664 (2.2157)	Entropy 0.77207 (0.77317)	Top-1 acc 71.875 (71.349)	Top-5 acc 90.625 (88.296)	lr 0.00198
Train [99][730/3239]	Time 0.231 (0.674)	Data Time 0.001 (0.057)	Loss 2.1920 (2.2152)	Entropy 0.77225 (0.77315)	Top-1 acc 71.484 (71.358)	Top-5 acc 91.016 (88.312)	lr 0.00198
Train [99][740/3239]	Time 0.225 (0.672)	Data Time 0.001 (0.056)	Loss 2.3428 (2.2161)	Entropy 0.77223 (0.77314)	Top-1 acc 64.844 (71.329)	Top-5 acc 84.766 (88.297)	lr 0.00198
Train [99][750/3239]	Time 0.323 (0.669)	Data Time 0.001 (0.056)	Loss 2.1620 (2.2163)	Entropy 0.77214 (0.77313)	Top-1 acc 71.875 (71.328)	Top-5 acc 89.062 (88.283)	lr 0.00198
Train [99][760/3239]	Time 0.227 (0.666)	Data Time 0.001 (0.055)	Loss 2.3356 (2.2166)	Entropy 0.77216 (0.77312)	Top-1 acc 63.672 (71.314)	Top-5 acc 87.109 (88.281)	lr 0.00198
Train [99][770/3239]	Time 0.216 (0.664)	Data Time 0.002 (0.054)	Loss 2.3002 (2.2166)	Entropy 0.77217 (0.77310)	Top-1 acc 67.578 (71.314)	Top-5 acc 87.891 (88.285)	lr 0.00197
Train [99][780/3239]	Time 2.520 (0.661)	Data Time 0.002 (0.054)	Loss 2.4253 (2.2175)	Entropy 0.77217 (0.77309)	Top-1 acc 64.453 (71.285)	Top-5 acc 82.812 (88.273)	lr 0.00197
Train [99][790/3239]	Time 0.237 (0.656)	Data Time 0.001 (0.053)	Loss 2.1556 (2.2178)	Entropy 0.77214 (0.77308)	Top-1 acc 72.266 (71.265)	Top-5 acc 89.844 (88.269)	lr 0.00197
Train [99][800/3239]	Time 0.237 (0.654)	Data Time 0.001 (0.052)	Loss 2.2533 (2.2181)	Entropy 0.77218 (0.77307)	Top-1 acc 69.922 (71.265)	Top-5 acc 84.766 (88.262)	lr 0.00197
Train [99][810/3239]	Time 0.234 (0.717)	Data Time 0.002 (0.052)	Loss 2.1690 (2.2180)	Entropy 0.77218 (0.77306)	Top-1 acc 73.438 (71.270)	Top-5 acc 91.406 (88.273)	lr 0.00197
Train [99][820/3239]	Time 0.253 (0.714)	Data Time 0.002 (0.051)	Loss 2.3396 (2.2182)	Entropy 0.77209 (0.77305)	Top-1 acc 66.797 (71.264)	Top-5 acc 84.375 (88.267)	lr 0.00197
Train [99][830/3239]	Time 0.236 (0.712)	Data Time 0.002 (0.050)	Loss 2.2622 (2.2182)	Entropy 0.77208 (0.77303)	Top-1 acc 71.094 (71.267)	Top-5 acc 87.500 (88.263)	lr 0.00197
Train [99][840/3239]	Time 0.324 (0.709)	Data Time 0.001 (0.050)	Loss 2.1051 (2.2181)	Entropy 0.77212 (0.77302)	Top-1 acc 74.219 (71.267)	Top-5 acc 89.844 (88.267)	lr 0.00197
Train [99][850/3239]	Time 0.235 (0.706)	Data Time 0.001 (0.049)	Loss 2.1527 (2.2175)	Entropy 0.77209 (0.77301)	Top-1 acc 73.828 (71.292)	Top-5 acc 88.281 (88.276)	lr 0.00197
Train [99][860/3239]	Time 0.235 (0.703)	Data Time 0.001 (0.049)	Loss 2.2043 (2.2173)	Entropy 0.77205 (0.77300)	Top-1 acc 72.266 (71.298)	Top-5 acc 89.844 (88.284)	lr 0.00197
Train [99][870/3239]	Time 0.226 (0.701)	Data Time 0.001 (0.048)	Loss 2.1975 (2.2175)	Entropy 0.77210 (0.77299)	Top-1 acc 71.094 (71.286)	Top-5 acc 88.672 (88.282)	lr 0.00197
Train [99][880/3239]	Time 0.395 (0.699)	Data Time 0.001 (0.048)	Loss 2.2503 (2.2175)	Entropy 0.77207 (0.77298)	Top-1 acc 70.312 (71.281)	Top-5 acc 87.109 (88.284)	lr 0.00197
Train [99][890/3239]	Time 2.537 (0.696)	Data Time 0.001 (0.047)	Loss 2.3225 (2.2177)	Entropy 0.77207 (0.77297)	Top-1 acc 71.875 (71.270)	Top-5 acc 86.719 (88.283)	lr 0.00197
Train [99][900/3239]	Time 0.235 (0.691)	Data Time 0.001 (0.047)	Loss 2.0554 (2.2173)	Entropy 0.77208 (0.77296)	Top-1 acc 73.438 (71.275)	Top-5 acc 91.016 (88.285)	lr 0.00197
Train [99][910/3239]	Time 0.231 (0.689)	Data Time 0.001 (0.046)	Loss 2.0523 (2.2167)	Entropy 0.77206 (0.77295)	Top-1 acc 75.000 (71.285)	Top-5 acc 91.797 (88.296)	lr 0.00197
Train [99][920/3239]	Time 0.250 (0.687)	Data Time 0.001 (0.046)	Loss 2.1288 (2.2171)	Entropy 0.77202 (0.77294)	Top-1 acc 72.656 (71.264)	Top-5 acc 89.453 (88.290)	lr 0.00197
Train [99][930/3239]	Time 0.223 (0.684)	Data Time 0.001 (0.045)	Loss 2.1150 (2.2169)	Entropy 0.77201 (0.77293)	Top-1 acc 70.703 (71.248)	Top-5 acc 90.625 (88.299)	lr 0.00197
Train [99][940/3239]	Time 0.250 (0.682)	Data Time 0.001 (0.045)	Loss 2.0724 (2.2164)	Entropy 0.77195 (0.77292)	Top-1 acc 76.172 (71.262)	Top-5 acc 91.406 (88.309)	lr 0.00197
Train [99][950/3239]	Time 0.224 (0.680)	Data Time 0.001 (0.044)	Loss 2.1377 (2.2159)	Entropy 0.77195 (0.77291)	Top-1 acc 70.703 (71.262)	Top-5 acc 89.453 (88.322)	lr 0.00196
Train [99][960/3239]	Time 0.233 (0.678)	Data Time 0.001 (0.044)	Loss 2.1442 (2.2156)	Entropy 0.77194 (0.77290)	Top-1 acc 71.484 (71.271)	Top-5 acc 91.016 (88.328)	lr 0.00196
Train [99][970/3239]	Time 0.236 (0.676)	Data Time 0.001 (0.043)	Loss 2.0913 (2.2157)	Entropy 0.77197 (0.77289)	Top-1 acc 73.438 (71.267)	Top-5 acc 91.016 (88.330)	lr 0.00196
Train [99][980/3239]	Time 0.259 (0.674)	Data Time 0.002 (0.043)	Loss 2.2097 (2.2159)	Entropy 0.77198 (0.77288)	Top-1 acc 74.609 (71.273)	Top-5 acc 87.891 (88.333)	lr 0.00196
Train [99][990/3239]	Time 0.263 (0.673)	Data Time 0.001 (0.043)	Loss 2.2124 (2.2156)	Entropy 0.77198 (0.77287)	Top-1 acc 69.922 (71.274)	Top-5 acc 89.453 (88.339)	lr 0.00196
Train [99][1000/3239]	Time 2.560 (0.671)	Data Time 0.001 (0.042)	Loss 2.1932 (2.2154)	Entropy 0.77198 (0.77286)	Top-1 acc 70.312 (71.275)	Top-5 acc 88.672 (88.346)	lr 0.00196
Train [99][1010/3239]	Time 0.337 (0.667)	Data Time 0.001 (0.042)	Loss 2.5136 (2.2159)	Entropy 0.77197 (0.77285)	Top-1 acc 67.188 (71.268)	Top-5 acc 83.984 (88.338)	lr 0.00196
Train [99][1020/3239]	Time 0.239 (0.665)	Data Time 0.001 (0.041)	Loss 2.2779 (2.2161)	Entropy 0.77194 (0.77285)	Top-1 acc 69.531 (71.265)	Top-5 acc 87.500 (88.336)	lr 0.00196
Train [99][1030/3239]	Time 0.232 (0.663)	Data Time 0.001 (0.041)	Loss 2.1554 (2.2168)	Entropy 0.77191 (0.77284)	Top-1 acc 70.703 (71.258)	Top-5 acc 89.453 (88.317)	lr 0.00196
Train [99][1040/3239]	Time 0.242 (0.661)	Data Time 0.002 (0.041)	Loss 2.2724 (2.2167)	Entropy 0.77195 (0.77283)	Top-1 acc 70.703 (71.264)	Top-5 acc 86.719 (88.314)	lr 0.00196
Train [99][1050/3239]	Time 0.334 (0.660)	Data Time 0.001 (0.040)	Loss 2.2230 (2.2167)	Entropy 0.77192 (0.77282)	Top-1 acc 71.875 (71.262)	Top-5 acc 87.891 (88.312)	lr 0.00196
Train [99][1060/3239]	Time 0.231 (0.658)	Data Time 0.002 (0.040)	Loss 2.1037 (2.2172)	Entropy 0.77185 (0.77281)	Top-1 acc 73.438 (71.252)	Top-5 acc 91.406 (88.304)	lr 0.00196
Train [99][1070/3239]	Time 0.230 (0.656)	Data Time 0.001 (0.040)	Loss 2.2420 (2.2173)	Entropy 0.77186 (0.77280)	Top-1 acc 75.000 (71.260)	Top-5 acc 87.891 (88.301)	lr 0.00196
Train [99][1080/3239]	Time 0.229 (0.654)	Data Time 0.002 (0.039)	Loss 2.1440 (2.2175)	Entropy 0.77185 (0.77279)	Top-1 acc 74.219 (71.264)	Top-5 acc 88.281 (88.296)	lr 0.00196
Train [99][1090/3239]	Time 0.330 (0.653)	Data Time 0.001 (0.039)	Loss 2.2152 (2.2178)	Entropy 0.77185 (0.77278)	Top-1 acc 74.219 (71.258)	Top-5 acc 89.062 (88.288)	lr 0.00196
Train [99][1100/3239]	Time 0.216 (0.651)	Data Time 0.001 (0.039)	Loss 2.1742 (2.2179)	Entropy 0.77181 (0.77278)	Top-1 acc 73.828 (71.257)	Top-5 acc 87.109 (88.284)	lr 0.00196
Train [99][1110/3239]	Time 2.535 (0.649)	Data Time 0.001 (0.038)	Loss 2.2400 (2.2178)	Entropy 0.77181 (0.77277)	Top-1 acc 68.359 (71.257)	Top-5 acc 87.109 (88.288)	lr 0.00196
Train [99][1120/3239]	Time 0.218 (0.646)	Data Time 0.001 (0.038)	Loss 2.1224 (2.2182)	Entropy 0.77173 (0.77276)	Top-1 acc 72.266 (71.247)	Top-5 acc 91.797 (88.281)	lr 0.00196
Train [99][1130/3239]	Time 0.230 (0.644)	Data Time 0.001 (0.038)	Loss 2.1830 (2.2179)	Entropy 0.77180 (0.77275)	Top-1 acc 71.484 (71.258)	Top-5 acc 88.672 (88.283)	lr 0.00195
Train [99][1140/3239]	Time 0.227 (0.643)	Data Time 0.001 (0.037)	Loss 2.1189 (2.2178)	Entropy 0.77168 (0.77274)	Top-1 acc 72.656 (71.255)	Top-5 acc 88.281 (88.283)	lr 0.00195
Train [99][1150/3239]	Time 0.234 (0.641)	Data Time 0.001 (0.037)	Loss 2.2680 (2.2180)	Entropy 0.77168 (0.77273)	Top-1 acc 70.703 (71.254)	Top-5 acc 87.109 (88.273)	lr 0.00195
Train [99][1160/3239]	Time 0.238 (0.640)	Data Time 0.001 (0.037)	Loss 2.1597 (2.2180)	Entropy 0.77156 (0.77272)	Top-1 acc 73.828 (71.258)	Top-5 acc 89.844 (88.276)	lr 0.00195
Train [99][1170/3239]	Time 0.396 (0.681)	Data Time 0.003 (0.036)	Loss 2.1461 (2.2176)	Entropy 0.77152 (0.77271)	Top-1 acc 71.484 (71.271)	Top-5 acc 89.844 (88.275)	lr 0.00195
Train [99][1180/3239]	Time 0.236 (0.682)	Data Time 0.002 (0.036)	Loss 2.2825 (2.2174)	Entropy 0.77156 (0.77270)	Top-1 acc 69.531 (71.267)	Top-5 acc 88.281 (88.281)	lr 0.00195
Train [99][1190/3239]	Time 0.239 (0.680)	Data Time 0.001 (0.036)	Loss 2.3609 (2.2176)	Entropy 0.77156 (0.77269)	Top-1 acc 67.578 (71.266)	Top-5 acc 85.156 (88.278)	lr 0.00195
Train [99][1200/3239]	Time 0.217 (0.678)	Data Time 0.001 (0.035)	Loss 2.0607 (2.2177)	Entropy 0.77149 (0.77268)	Top-1 acc 77.734 (71.259)	Top-5 acc 89.844 (88.273)	lr 0.00195
Train [99][1210/3239]	Time 0.292 (0.677)	Data Time 0.001 (0.035)	Loss 2.1895 (2.2175)	Entropy 0.77148 (0.77267)	Top-1 acc 72.656 (71.263)	Top-5 acc 88.281 (88.279)	lr 0.00195
Train [99][1220/3239]	Time 2.700 (0.675)	Data Time 0.001 (0.035)	Loss 2.2174 (2.2173)	Entropy 0.77148 (0.77266)	Top-1 acc 72.656 (71.265)	Top-5 acc 89.062 (88.284)	lr 0.00195
Train [99][1230/3239]	Time 0.248 (0.671)	Data Time 0.001 (0.035)	Loss 2.0418 (2.2172)	Entropy 0.77145 (0.77265)	Top-1 acc 73.828 (71.264)	Top-5 acc 91.797 (88.283)	lr 0.00195
Train [99][1240/3239]	Time 0.235 (0.670)	Data Time 0.001 (0.034)	Loss 2.2456 (2.2174)	Entropy 0.77140 (0.77264)	Top-1 acc 70.312 (71.252)	Top-5 acc 88.672 (88.283)	lr 0.00195
Train [99][1250/3239]	Time 0.242 (0.668)	Data Time 0.001 (0.034)	Loss 2.2603 (2.2172)	Entropy 0.77123 (0.77263)	Top-1 acc 71.875 (71.256)	Top-5 acc 87.109 (88.281)	lr 0.00195
Train [99][1260/3239]	Time 0.325 (0.667)	Data Time 0.001 (0.034)	Loss 2.1373 (2.2170)	Entropy 0.77114 (0.77262)	Top-1 acc 74.219 (71.262)	Top-5 acc 88.672 (88.282)	lr 0.00195
Train [99][1270/3239]	Time 0.233 (0.666)	Data Time 0.001 (0.034)	Loss 2.2492 (2.2170)	Entropy 0.77117 (0.77261)	Top-1 acc 70.312 (71.263)	Top-5 acc 87.500 (88.285)	lr 0.00195
Train [99][1280/3239]	Time 0.222 (0.664)	Data Time 0.001 (0.033)	Loss 2.1532 (2.2169)	Entropy 0.77110 (0.77260)	Top-1 acc 71.484 (71.254)	Top-5 acc 89.062 (88.289)	lr 0.00195
Train [99][1290/3239]	Time 0.226 (0.663)	Data Time 0.001 (0.033)	Loss 2.1914 (2.2168)	Entropy 0.77087 (0.77258)	Top-1 acc 68.750 (71.258)	Top-5 acc 91.016 (88.289)	lr 0.00195
Train [99][1300/3239]	Time 0.235 (0.661)	Data Time 0.001 (0.033)	Loss 2.2487 (2.2168)	Entropy 0.77086 (0.77257)	Top-1 acc 69.531 (71.261)	Top-5 acc 87.109 (88.288)	lr 0.00195
Train [99][1310/3239]	Time 0.227 (0.660)	Data Time 0.001 (0.033)	Loss 2.2415 (2.2167)	Entropy 0.77088 (0.77256)	Top-1 acc 70.312 (71.260)	Top-5 acc 87.109 (88.281)	lr 0.00195
Train [99][1320/3239]	Time 0.229 (0.658)	Data Time 0.001 (0.032)	Loss 2.2636 (2.2167)	Entropy 0.77092 (0.77255)	Top-1 acc 68.359 (71.260)	Top-5 acc 87.891 (88.282)	lr 0.00194
Train [99][1330/3239]	Time 2.658 (0.657)	Data Time 0.001 (0.032)	Loss 2.3066 (2.2169)	Entropy 0.77092 (0.77253)	Top-1 acc 69.141 (71.257)	Top-5 acc 85.547 (88.274)	lr 0.00194
Train [99][1340/3239]	Time 0.230 (0.654)	Data Time 0.001 (0.032)	Loss 2.2770 (2.2175)	Entropy 0.77093 (0.77252)	Top-1 acc 66.016 (71.239)	Top-5 acc 90.234 (88.267)	lr 0.00194
Train [99][1350/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.032)	Loss 2.1967 (2.2175)	Entropy 0.77092 (0.77251)	Top-1 acc 70.703 (71.239)	Top-5 acc 89.062 (88.272)	lr 0.00194
Train [99][1360/3239]	Time 0.287 (0.652)	Data Time 0.001 (0.031)	Loss 2.1772 (2.2177)	Entropy 0.77094 (0.77250)	Top-1 acc 68.359 (71.230)	Top-5 acc 90.625 (88.269)	lr 0.00194
Train [99][1370/3239]	Time 0.227 (0.650)	Data Time 0.001 (0.031)	Loss 2.2568 (2.2178)	Entropy 0.77091 (0.77249)	Top-1 acc 71.484 (71.231)	Top-5 acc 87.109 (88.267)	lr 0.00194
Train [99][1380/3239]	Time 0.243 (0.649)	Data Time 0.001 (0.031)	Loss 2.1901 (2.2177)	Entropy 0.77082 (0.77247)	Top-1 acc 70.703 (71.229)	Top-5 acc 88.672 (88.268)	lr 0.00194
Train [99][1390/3239]	Time 0.331 (0.648)	Data Time 0.006 (0.031)	Loss 2.1863 (2.2178)	Entropy 0.77086 (0.77246)	Top-1 acc 72.656 (71.226)	Top-5 acc 89.453 (88.266)	lr 0.00194
Train [99][1400/3239]	Time 0.235 (0.647)	Data Time 0.002 (0.031)	Loss 2.4116 (2.2179)	Entropy 0.77080 (0.77245)	Top-1 acc 71.875 (71.225)	Top-5 acc 83.984 (88.261)	lr 0.00194
Train [99][1410/3239]	Time 0.256 (0.645)	Data Time 0.001 (0.030)	Loss 2.2310 (2.2176)	Entropy 0.77075 (0.77244)	Top-1 acc 71.094 (71.236)	Top-5 acc 87.109 (88.261)	lr 0.00194
Train [99][1420/3239]	Time 0.222 (0.644)	Data Time 0.001 (0.030)	Loss 2.0715 (2.2176)	Entropy 0.77069 (0.77243)	Top-1 acc 73.828 (71.234)	Top-5 acc 91.797 (88.261)	lr 0.00194
Train [99][1430/3239]	Time 0.400 (0.643)	Data Time 0.002 (0.030)	Loss 2.2109 (2.2176)	Entropy 0.77064 (0.77242)	Top-1 acc 71.484 (71.228)	Top-5 acc 90.234 (88.258)	lr 0.00194
Train [99][1440/3239]	Time 2.604 (0.642)	Data Time 0.001 (0.030)	Loss 2.1508 (2.2174)	Entropy 0.77064 (0.77240)	Top-1 acc 74.219 (71.228)	Top-5 acc 88.672 (88.258)	lr 0.00194
Train [99][1450/3239]	Time 0.255 (0.639)	Data Time 0.001 (0.030)	Loss 2.1732 (2.2171)	Entropy 0.77053 (0.77239)	Top-1 acc 74.219 (71.241)	Top-5 acc 88.281 (88.260)	lr 0.00194
Train [99][1460/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.029)	Loss 2.3971 (2.2174)	Entropy 0.77054 (0.77238)	Top-1 acc 66.016 (71.229)	Top-5 acc 87.500 (88.255)	lr 0.00194
Train [99][1470/3239]	Time 0.241 (0.637)	Data Time 0.001 (0.029)	Loss 2.1119 (2.2173)	Entropy 0.77050 (0.77237)	Top-1 acc 73.828 (71.232)	Top-5 acc 90.625 (88.257)	lr 0.00194
Train [99][1480/3239]	Time 0.258 (0.636)	Data Time 0.001 (0.029)	Loss 2.2630 (2.2174)	Entropy 0.77063 (0.77235)	Top-1 acc 69.922 (71.230)	Top-5 acc 87.500 (88.256)	lr 0.00194
Train [99][1490/3239]	Time 0.236 (0.635)	Data Time 0.001 (0.029)	Loss 2.0726 (2.2169)	Entropy 0.77061 (0.77234)	Top-1 acc 75.000 (71.237)	Top-5 acc 91.406 (88.263)	lr 0.00194
Train [99][1500/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.029)	Loss 2.2157 (2.2169)	Entropy 0.77038 (0.77233)	Top-1 acc 71.094 (71.235)	Top-5 acc 89.062 (88.266)	lr 0.00193
Train [99][1510/3239]	Time 0.240 (0.633)	Data Time 0.001 (0.029)	Loss 2.1193 (2.2167)	Entropy 0.77036 (0.77232)	Top-1 acc 74.609 (71.243)	Top-5 acc 91.406 (88.269)	lr 0.00193
Train [99][1520/3239]	Time 0.238 (0.632)	Data Time 0.001 (0.028)	Loss 2.1114 (2.2167)	Entropy 0.77042 (0.77230)	Top-1 acc 75.781 (71.245)	Top-5 acc 90.234 (88.268)	lr 0.00193
Train [99][1530/3239]	Time 0.384 (0.667)	Data Time 0.003 (0.028)	Loss 2.1664 (2.2170)	Entropy 0.77033 (0.77229)	Top-1 acc 71.875 (71.241)	Top-5 acc 90.625 (88.262)	lr 0.00193
Train [99][1540/3239]	Time 0.253 (0.666)	Data Time 0.002 (0.028)	Loss 2.1323 (2.2166)	Entropy 0.77035 (0.77228)	Top-1 acc 70.703 (71.246)	Top-5 acc 90.234 (88.269)	lr 0.00193
Train [99][1550/3239]	Time 2.520 (0.664)	Data Time 0.002 (0.028)	Loss 2.2259 (2.2166)	Entropy 0.77035 (0.77227)	Top-1 acc 69.141 (71.246)	Top-5 acc 86.719 (88.266)	lr 0.00193
Train [99][1560/3239]	Time 0.292 (0.662)	Data Time 0.002 (0.028)	Loss 2.0165 (2.2164)	Entropy 0.77034 (0.77225)	Top-1 acc 75.000 (71.246)	Top-5 acc 91.797 (88.270)	lr 0.00193
Train [99][1570/3239]	Time 0.225 (0.661)	Data Time 0.001 (0.028)	Loss 2.3541 (2.2166)	Entropy 0.77027 (0.77224)	Top-1 acc 67.188 (71.236)	Top-5 acc 83.984 (88.264)	lr 0.00193
Train [99][1580/3239]	Time 0.235 (0.660)	Data Time 0.002 (0.027)	Loss 2.1772 (2.2165)	Entropy 0.77024 (0.77223)	Top-1 acc 73.438 (71.245)	Top-5 acc 89.062 (88.261)	lr 0.00193
Train [99][1590/3239]	Time 0.242 (0.658)	Data Time 0.002 (0.027)	Loss 2.1769 (2.2166)	Entropy 0.77016 (0.77222)	Top-1 acc 71.484 (71.247)	Top-5 acc 86.328 (88.259)	lr 0.00193
Train [99][1600/3239]	Time 0.320 (0.657)	Data Time 0.002 (0.027)	Loss 2.2797 (2.2167)	Entropy 0.77019 (0.77220)	Top-1 acc 70.703 (71.247)	Top-5 acc 87.891 (88.256)	lr 0.00193
Train [99][1610/3239]	Time 0.237 (0.656)	Data Time 0.001 (0.027)	Loss 2.1419 (2.2164)	Entropy 0.77002 (0.77219)	Top-1 acc 73.047 (71.256)	Top-5 acc 90.234 (88.262)	lr 0.00193
Train [99][1620/3239]	Time 0.223 (0.655)	Data Time 0.001 (0.027)	Loss 2.1892 (2.2165)	Entropy 0.76985 (0.77218)	Top-1 acc 70.703 (71.258)	Top-5 acc 90.234 (88.258)	lr 0.00193
Train [99][1630/3239]	Time 0.240 (0.654)	Data Time 0.001 (0.027)	Loss 1.9945 (2.2162)	Entropy 0.76969 (0.77216)	Top-1 acc 75.781 (71.265)	Top-5 acc 93.359 (88.262)	lr 0.00193
Train [99][1640/3239]	Time 0.332 (0.653)	Data Time 0.001 (0.026)	Loss 2.4157 (2.2164)	Entropy 0.76968 (0.77215)	Top-1 acc 68.750 (71.262)	Top-5 acc 83.203 (88.256)	lr 0.00193
Train [99][1650/3239]	Time 0.221 (0.652)	Data Time 0.001 (0.026)	Loss 2.1542 (2.2162)	Entropy 0.76959 (0.77213)	Top-1 acc 73.047 (71.262)	Top-5 acc 90.234 (88.260)	lr 0.00193
Train [99][1660/3239]	Time 2.585 (0.651)	Data Time 0.001 (0.026)	Loss 2.2114 (2.2161)	Entropy 0.76959 (0.77212)	Top-1 acc 72.266 (71.265)	Top-5 acc 91.016 (88.265)	lr 0.00193
Train [99][1670/3239]	Time 0.242 (0.649)	Data Time 0.001 (0.026)	Loss 2.3861 (2.2160)	Entropy 0.76950 (0.77210)	Top-1 acc 67.578 (71.270)	Top-5 acc 87.500 (88.269)	lr 0.00193
Train [99][1680/3239]	Time 0.298 (0.648)	Data Time 0.002 (0.026)	Loss 2.1067 (2.2158)	Entropy 0.76944 (0.77208)	Top-1 acc 68.750 (71.269)	Top-5 acc 92.578 (88.273)	lr 0.00193
Train [99][1690/3239]	Time 0.244 (0.647)	Data Time 0.001 (0.026)	Loss 2.1561 (2.2158)	Entropy 0.76939 (0.77207)	Top-1 acc 74.219 (71.269)	Top-5 acc 90.234 (88.270)	lr 0.00192
Train [99][1700/3239]	Time 0.234 (0.646)	Data Time 0.001 (0.026)	Loss 2.2580 (2.2160)	Entropy 0.76933 (0.77205)	Top-1 acc 69.922 (71.268)	Top-5 acc 87.500 (88.268)	lr 0.00192
Train [99][1710/3239]	Time 0.222 (0.645)	Data Time 0.001 (0.025)	Loss 2.3509 (2.2158)	Entropy 0.76932 (0.77204)	Top-1 acc 70.312 (71.270)	Top-5 acc 85.156 (88.273)	lr 0.00192
Train [99][1720/3239]	Time 0.233 (0.644)	Data Time 0.001 (0.025)	Loss 2.3842 (2.2158)	Entropy 0.76936 (0.77202)	Top-1 acc 67.969 (71.272)	Top-5 acc 84.375 (88.273)	lr 0.00192
Train [99][1730/3239]	Time 0.241 (0.643)	Data Time 0.001 (0.025)	Loss 2.3363 (2.2160)	Entropy 0.76931 (0.77201)	Top-1 acc 64.453 (71.269)	Top-5 acc 86.719 (88.271)	lr 0.00192
Train [99][1740/3239]	Time 0.232 (0.642)	Data Time 0.001 (0.025)	Loss 2.1920 (2.2160)	Entropy 0.76933 (0.77199)	Top-1 acc 70.312 (71.276)	Top-5 acc 90.625 (88.274)	lr 0.00192
Train [99][1750/3239]	Time 0.234 (0.641)	Data Time 0.001 (0.025)	Loss 2.1075 (2.2161)	Entropy 0.76935 (0.77197)	Top-1 acc 73.047 (71.272)	Top-5 acc 90.625 (88.273)	lr 0.00192
Train [99][1760/3239]	Time 0.232 (0.640)	Data Time 0.001 (0.025)	Loss 2.1697 (2.2161)	Entropy 0.76934 (0.77196)	Top-1 acc 69.531 (71.271)	Top-5 acc 91.406 (88.275)	lr 0.00192
Train [99][1770/3239]	Time 2.458 (0.639)	Data Time 0.001 (0.025)	Loss 2.1839 (2.2162)	Entropy 0.76934 (0.77195)	Top-1 acc 73.828 (71.272)	Top-5 acc 87.891 (88.276)	lr 0.00192
Train [99][1780/3239]	Time 0.247 (0.637)	Data Time 0.001 (0.024)	Loss 2.2289 (2.2161)	Entropy 0.76940 (0.77193)	Top-1 acc 71.094 (71.275)	Top-5 acc 86.719 (88.273)	lr 0.00192
Train [99][1790/3239]	Time 0.245 (0.636)	Data Time 0.001 (0.024)	Loss 2.1461 (2.2157)	Entropy 0.76927 (0.77192)	Top-1 acc 72.656 (71.290)	Top-5 acc 87.891 (88.280)	lr 0.00192
Train [99][1800/3239]	Time 0.227 (0.635)	Data Time 0.001 (0.024)	Loss 2.1778 (2.2156)	Entropy 0.76920 (0.77190)	Top-1 acc 70.703 (71.291)	Top-5 acc 89.844 (88.283)	lr 0.00192
Train [99][1810/3239]	Time 0.323 (0.635)	Data Time 0.001 (0.024)	Loss 2.2015 (2.2155)	Entropy 0.76916 (0.77189)	Top-1 acc 70.312 (71.293)	Top-5 acc 87.109 (88.288)	lr 0.00192
Train [99][1820/3239]	Time 0.225 (0.634)	Data Time 0.001 (0.024)	Loss 2.2313 (2.2158)	Entropy 0.76916 (0.77187)	Top-1 acc 69.922 (71.281)	Top-5 acc 89.062 (88.282)	lr 0.00192
Train [99][1830/3239]	Time 0.238 (0.633)	Data Time 0.001 (0.024)	Loss 2.2600 (2.2159)	Entropy 0.76907 (0.77186)	Top-1 acc 69.922 (71.281)	Top-5 acc 89.062 (88.282)	lr 0.00192
Train [99][1840/3239]	Time 0.230 (0.632)	Data Time 0.002 (0.024)	Loss 2.1423 (2.2158)	Entropy 0.76895 (0.77184)	Top-1 acc 73.047 (71.280)	Top-5 acc 87.109 (88.284)	lr 0.00192
Train [99][1850/3239]	Time 0.316 (0.631)	Data Time 0.001 (0.024)	Loss 2.1624 (2.2158)	Entropy 0.76883 (0.77182)	Top-1 acc 71.875 (71.276)	Top-5 acc 89.844 (88.284)	lr 0.00192
Train [99][1860/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.023)	Loss 2.0832 (2.2159)	Entropy 0.76875 (0.77181)	Top-1 acc 75.391 (71.274)	Top-5 acc 90.234 (88.283)	lr 0.00192
Train [99][1870/3239]	Time 0.222 (0.629)	Data Time 0.001 (0.023)	Loss 2.3337 (2.2159)	Entropy 0.76868 (0.77179)	Top-1 acc 68.359 (71.272)	Top-5 acc 87.109 (88.278)	lr 0.00191
Train [99][1880/3239]	Time 2.660 (0.629)	Data Time 0.001 (0.023)	Loss 2.2790 (2.2164)	Entropy 0.76868 (0.77178)	Top-1 acc 70.312 (71.268)	Top-5 acc 88.281 (88.269)	lr 0.00191
Train [99][1890/3239]	Time 0.266 (0.627)	Data Time 0.001 (0.023)	Loss 2.3062 (2.2165)	Entropy 0.76864 (0.77176)	Top-1 acc 69.531 (71.263)	Top-5 acc 86.328 (88.269)	lr 0.00191
Train [99][1900/3239]	Time 0.242 (0.654)	Data Time 0.002 (0.023)	Loss 2.1840 (2.2167)	Entropy 0.76859 (0.77174)	Top-1 acc 71.484 (71.250)	Top-5 acc 89.062 (88.266)	lr 0.00191
Train [99][1910/3239]	Time 0.223 (0.653)	Data Time 0.002 (0.023)	Loss 2.3699 (2.2167)	Entropy 0.76856 (0.77173)	Top-1 acc 67.578 (71.250)	Top-5 acc 83.984 (88.264)	lr 0.00191
Train [99][1920/3239]	Time 0.231 (0.652)	Data Time 0.001 (0.023)	Loss 2.1191 (2.2162)	Entropy 0.76862 (0.77171)	Top-1 acc 73.828 (71.264)	Top-5 acc 90.234 (88.273)	lr 0.00191
Train [99][1930/3239]	Time 0.262 (0.651)	Data Time 0.001 (0.023)	Loss 2.2549 (2.2163)	Entropy 0.76864 (0.77169)	Top-1 acc 67.969 (71.255)	Top-5 acc 88.672 (88.271)	lr 0.00191
Train [99][1940/3239]	Time 0.229 (0.650)	Data Time 0.001 (0.023)	Loss 2.0354 (2.2162)	Entropy 0.76865 (0.77168)	Top-1 acc 76.172 (71.259)	Top-5 acc 92.188 (88.274)	lr 0.00191
Train [99][1950/3239]	Time 0.227 (0.649)	Data Time 0.001 (0.022)	Loss 2.3166 (2.2161)	Entropy 0.76864 (0.77166)	Top-1 acc 67.188 (71.254)	Top-5 acc 88.281 (88.279)	lr 0.00191
Train [99][1960/3239]	Time 0.235 (0.648)	Data Time 0.001 (0.022)	Loss 2.2745 (2.2160)	Entropy 0.76868 (0.77165)	Top-1 acc 70.312 (71.258)	Top-5 acc 85.938 (88.277)	lr 0.00191
Train [99][1970/3239]	Time 0.240 (0.647)	Data Time 0.002 (0.022)	Loss 2.1899 (2.2160)	Entropy 0.76869 (0.77163)	Top-1 acc 73.828 (71.254)	Top-5 acc 89.453 (88.278)	lr 0.00191
Train [99][1980/3239]	Time 0.229 (0.647)	Data Time 0.001 (0.022)	Loss 2.1636 (2.2161)	Entropy 0.76865 (0.77162)	Top-1 acc 72.656 (71.252)	Top-5 acc 89.453 (88.275)	lr 0.00191
Train [99][1990/3239]	Time 2.577 (0.646)	Data Time 0.002 (0.022)	Loss 2.0621 (2.2163)	Entropy 0.76865 (0.77160)	Top-1 acc 73.438 (71.251)	Top-5 acc 91.406 (88.270)	lr 0.00191
Train [99][2000/3239]	Time 0.242 (0.644)	Data Time 0.001 (0.022)	Loss 2.2245 (2.2163)	Entropy 0.76854 (0.77159)	Top-1 acc 71.094 (71.251)	Top-5 acc 87.891 (88.269)	lr 0.00191
Train [99][2010/3239]	Time 0.238 (0.643)	Data Time 0.002 (0.022)	Loss 2.0704 (2.2161)	Entropy 0.76863 (0.77157)	Top-1 acc 75.781 (71.253)	Top-5 acc 90.625 (88.273)	lr 0.00191
Train [99][2020/3239]	Time 0.340 (0.642)	Data Time 0.001 (0.022)	Loss 2.1629 (2.2161)	Entropy 0.76851 (0.77156)	Top-1 acc 68.359 (71.253)	Top-5 acc 89.062 (88.275)	lr 0.00191
Train [99][2030/3239]	Time 0.232 (0.641)	Data Time 0.002 (0.022)	Loss 2.2462 (2.2158)	Entropy 0.76847 (0.77154)	Top-1 acc 68.750 (71.261)	Top-5 acc 89.453 (88.282)	lr 0.00191
Train [99][2040/3239]	Time 0.239 (0.641)	Data Time 0.001 (0.022)	Loss 2.2702 (2.2158)	Entropy 0.76847 (0.77153)	Top-1 acc 73.828 (71.259)	Top-5 acc 87.109 (88.275)	lr 0.00191
Train [99][2050/3239]	Time 0.219 (0.640)	Data Time 0.001 (0.021)	Loss 2.1559 (2.2158)	Entropy 0.76845 (0.77151)	Top-1 acc 72.266 (71.265)	Top-5 acc 88.281 (88.272)	lr 0.00191
Train [99][2060/3239]	Time 0.239 (0.639)	Data Time 0.001 (0.021)	Loss 2.3646 (2.2160)	Entropy 0.76842 (0.77150)	Top-1 acc 69.531 (71.260)	Top-5 acc 87.109 (88.269)	lr 0.00190
Train [99][2070/3239]	Time 0.220 (0.638)	Data Time 0.001 (0.021)	Loss 2.2296 (2.2159)	Entropy 0.76841 (0.77148)	Top-1 acc 71.094 (71.259)	Top-5 acc 90.625 (88.270)	lr 0.00190
Train [99][2080/3239]	Time 0.284 (0.638)	Data Time 0.001 (0.021)	Loss 2.1485 (2.2159)	Entropy 0.76822 (0.77147)	Top-1 acc 69.531 (71.258)	Top-5 acc 90.234 (88.267)	lr 0.00190
Train [99][2090/3239]	Time 0.240 (0.637)	Data Time 0.002 (0.021)	Loss 2.1567 (2.2161)	Entropy 0.76836 (0.77145)	Top-1 acc 70.312 (71.253)	Top-5 acc 89.844 (88.266)	lr 0.00190
Train [99][2100/3239]	Time 2.526 (0.636)	Data Time 0.001 (0.021)	Loss 2.1603 (2.2161)	Entropy 0.76836 (0.77144)	Top-1 acc 71.484 (71.254)	Top-5 acc 89.844 (88.265)	lr 0.00190
Train [99][2110/3239]	Time 0.263 (0.634)	Data Time 0.001 (0.021)	Loss 2.0597 (2.2160)	Entropy 0.76836 (0.77142)	Top-1 acc 76.562 (71.258)	Top-5 acc 90.625 (88.265)	lr 0.00190
Train [99][2120/3239]	Time 0.238 (0.634)	Data Time 0.001 (0.021)	Loss 2.2900 (2.2162)	Entropy 0.76826 (0.77141)	Top-1 acc 69.141 (71.247)	Top-5 acc 85.938 (88.262)	lr 0.00190
Train [99][2130/3239]	Time 0.242 (0.633)	Data Time 0.001 (0.021)	Loss 2.1959 (2.2162)	Entropy 0.76824 (0.77139)	Top-1 acc 72.266 (71.247)	Top-5 acc 89.062 (88.263)	lr 0.00190
Train [99][2140/3239]	Time 0.239 (0.632)	Data Time 0.001 (0.021)	Loss 2.2896 (2.2161)	Entropy 0.76823 (0.77138)	Top-1 acc 68.359 (71.246)	Top-5 acc 87.891 (88.266)	lr 0.00190
Train [99][2150/3239]	Time 0.329 (0.631)	Data Time 0.001 (0.021)	Loss 2.2699 (2.2164)	Entropy 0.76813 (0.77136)	Top-1 acc 69.922 (71.235)	Top-5 acc 85.938 (88.261)	lr 0.00190
Train [99][2160/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.020)	Loss 2.0841 (2.2164)	Entropy 0.76814 (0.77135)	Top-1 acc 75.000 (71.238)	Top-5 acc 89.844 (88.263)	lr 0.00190
Train [99][2170/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.020)	Loss 2.1454 (2.2162)	Entropy 0.76809 (0.77133)	Top-1 acc 76.172 (71.246)	Top-5 acc 90.234 (88.263)	lr 0.00190
Train [99][2180/3239]	Time 0.220 (0.629)	Data Time 0.001 (0.020)	Loss 2.3293 (2.2164)	Entropy 0.76803 (0.77132)	Top-1 acc 69.141 (71.241)	Top-5 acc 83.984 (88.262)	lr 0.00190
Train [99][2190/3239]	Time 0.362 (0.629)	Data Time 0.001 (0.020)	Loss 2.3363 (2.2164)	Entropy 0.76800 (0.77130)	Top-1 acc 71.094 (71.243)	Top-5 acc 85.547 (88.262)	lr 0.00190
Train [99][2200/3239]	Time 0.259 (0.628)	Data Time 0.001 (0.020)	Loss 2.2718 (2.2163)	Entropy 0.76780 (0.77129)	Top-1 acc 69.922 (71.244)	Top-5 acc 87.109 (88.263)	lr 0.00190
Train [99][2210/3239]	Time 2.613 (0.627)	Data Time 0.001 (0.020)	Loss 2.3259 (2.2162)	Entropy 0.76780 (0.77127)	Top-1 acc 68.750 (71.250)	Top-5 acc 86.719 (88.264)	lr 0.00190
Train [99][2220/3239]	Time 0.269 (0.626)	Data Time 0.001 (0.020)	Loss 2.2513 (2.2160)	Entropy 0.76777 (0.77126)	Top-1 acc 66.797 (71.250)	Top-5 acc 89.062 (88.270)	lr 0.00190
Train [99][2230/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.020)	Loss 2.2070 (2.2161)	Entropy 0.76789 (0.77124)	Top-1 acc 71.094 (71.247)	Top-5 acc 88.281 (88.267)	lr 0.00190
Train [99][2240/3239]	Time 0.269 (0.624)	Data Time 0.001 (0.020)	Loss 2.1409 (2.2160)	Entropy 0.76790 (0.77123)	Top-1 acc 71.875 (71.246)	Top-5 acc 87.891 (88.270)	lr 0.00190
Train [99][2250/3239]	Time 0.234 (0.624)	Data Time 0.001 (0.020)	Loss 2.2369 (2.2159)	Entropy 0.76788 (0.77121)	Top-1 acc 68.359 (71.243)	Top-5 acc 89.453 (88.272)	lr 0.00189
Train [99][2260/3239]	Time 0.255 (0.646)	Data Time 0.002 (0.020)	Loss 2.0682 (2.2156)	Entropy 0.76779 (0.77120)	Top-1 acc 74.609 (71.249)	Top-5 acc 90.234 (88.277)	lr 0.00189
Train [99][2270/3239]	Time 0.232 (0.645)	Data Time 0.002 (0.020)	Loss 2.1696 (2.2158)	Entropy 0.76779 (0.77118)	Top-1 acc 72.266 (71.245)	Top-5 acc 89.062 (88.276)	lr 0.00189
Train [99][2280/3239]	Time 0.226 (0.644)	Data Time 0.001 (0.019)	Loss 2.0581 (2.2157)	Entropy 0.76778 (0.77117)	Top-1 acc 75.000 (71.252)	Top-5 acc 91.016 (88.276)	lr 0.00189
Train [99][2290/3239]	Time 0.230 (0.644)	Data Time 0.001 (0.019)	Loss 2.1046 (2.2156)	Entropy 0.76767 (0.77115)	Top-1 acc 75.391 (71.256)	Top-5 acc 88.672 (88.277)	lr 0.00189
Train [99][2300/3239]	Time 0.222 (0.643)	Data Time 0.001 (0.019)	Loss 2.2255 (2.2156)	Entropy 0.76762 (0.77114)	Top-1 acc 71.875 (71.261)	Top-5 acc 87.500 (88.279)	lr 0.00189
Train [99][2310/3239]	Time 0.226 (0.642)	Data Time 0.001 (0.019)	Loss 2.0110 (2.2155)	Entropy 0.76777 (0.77112)	Top-1 acc 75.391 (71.264)	Top-5 acc 91.016 (88.281)	lr 0.00189
Train [99][2320/3239]	Time 2.630 (0.642)	Data Time 0.001 (0.019)	Loss 2.1709 (2.2155)	Entropy 0.76777 (0.77111)	Top-1 acc 70.703 (71.265)	Top-5 acc 90.234 (88.279)	lr 0.00189
Train [99][2330/3239]	Time 0.262 (0.640)	Data Time 0.001 (0.019)	Loss 2.2333 (2.2156)	Entropy 0.76775 (0.77109)	Top-1 acc 70.312 (71.264)	Top-5 acc 86.719 (88.273)	lr 0.00189
Train [99][2340/3239]	Time 0.237 (0.639)	Data Time 0.001 (0.019)	Loss 2.0718 (2.2156)	Entropy 0.76772 (0.77108)	Top-1 acc 76.172 (71.270)	Top-5 acc 89.844 (88.272)	lr 0.00189
Train [99][2350/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.019)	Loss 2.1450 (2.2155)	Entropy 0.76770 (0.77106)	Top-1 acc 75.000 (71.272)	Top-5 acc 88.672 (88.275)	lr 0.00189
Train [99][2360/3239]	Time 0.318 (0.638)	Data Time 0.001 (0.019)	Loss 2.3068 (2.2156)	Entropy 0.76761 (0.77105)	Top-1 acc 69.531 (71.268)	Top-5 acc 87.500 (88.273)	lr 0.00189
Train [99][2370/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.019)	Loss 2.3421 (2.2159)	Entropy 0.76748 (0.77103)	Top-1 acc 66.797 (71.260)	Top-5 acc 86.719 (88.267)	lr 0.00189
Train [99][2380/3239]	Time 0.226 (0.637)	Data Time 0.001 (0.019)	Loss 2.0765 (2.2161)	Entropy 0.76738 (0.77102)	Top-1 acc 75.391 (71.254)	Top-5 acc 91.406 (88.265)	lr 0.00189
Train [99][2390/3239]	Time 0.215 (0.636)	Data Time 0.001 (0.019)	Loss 2.1602 (2.2162)	Entropy 0.76742 (0.77100)	Top-1 acc 68.750 (71.250)	Top-5 acc 88.672 (88.262)	lr 0.00189
Train [99][2400/3239]	Time 0.234 (0.635)	Data Time 0.001 (0.019)	Loss 2.1707 (2.2162)	Entropy 0.76746 (0.77099)	Top-1 acc 70.312 (71.250)	Top-5 acc 89.453 (88.265)	lr 0.00189
Train [99][2410/3239]	Time 0.233 (0.634)	Data Time 0.001 (0.018)	Loss 2.2977 (2.2164)	Entropy 0.76740 (0.77097)	Top-1 acc 70.703 (71.241)	Top-5 acc 86.328 (88.262)	lr 0.00189
Train [99][2420/3239]	Time 0.238 (0.634)	Data Time 0.001 (0.018)	Loss 2.3354 (2.2165)	Entropy 0.76732 (0.77096)	Top-1 acc 69.922 (71.242)	Top-5 acc 85.156 (88.260)	lr 0.00189
Train [99][2430/3239]	Time 2.591 (0.633)	Data Time 0.001 (0.018)	Loss 2.1826 (2.2165)	Entropy 0.76732 (0.77095)	Top-1 acc 74.609 (71.242)	Top-5 acc 87.500 (88.259)	lr 0.00188
Train [99][2440/3239]	Time 0.259 (0.632)	Data Time 0.001 (0.018)	Loss 2.3438 (2.2164)	Entropy 0.76727 (0.77093)	Top-1 acc 68.359 (71.246)	Top-5 acc 87.500 (88.260)	lr 0.00188
Train [99][2450/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.018)	Loss 2.2928 (2.2166)	Entropy 0.76732 (0.77092)	Top-1 acc 72.266 (71.238)	Top-5 acc 86.719 (88.259)	lr 0.00188
Train [99][2460/3239]	Time 0.239 (0.630)	Data Time 0.001 (0.018)	Loss 2.2007 (2.2167)	Entropy 0.76731 (0.77090)	Top-1 acc 71.875 (71.235)	Top-5 acc 88.281 (88.255)	lr 0.00188
Train [99][2470/3239]	Time 0.244 (0.630)	Data Time 0.001 (0.018)	Loss 2.2450 (2.2168)	Entropy 0.76722 (0.77089)	Top-1 acc 70.703 (71.232)	Top-5 acc 87.109 (88.251)	lr 0.00188
Train [99][2480/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.018)	Loss 2.3462 (2.2168)	Entropy 0.76717 (0.77087)	Top-1 acc 64.062 (71.232)	Top-5 acc 85.156 (88.253)	lr 0.00188
Train [99][2490/3239]	Time 0.334 (0.629)	Data Time 0.001 (0.018)	Loss 2.3367 (2.2168)	Entropy 0.76717 (0.77086)	Top-1 acc 67.969 (71.232)	Top-5 acc 87.891 (88.251)	lr 0.00188
Train [99][2500/3239]	Time 0.238 (0.628)	Data Time 0.001 (0.018)	Loss 2.1278 (2.2166)	Entropy 0.76715 (0.77084)	Top-1 acc 73.438 (71.234)	Top-5 acc 90.625 (88.256)	lr 0.00188
Train [99][2510/3239]	Time 0.235 (0.627)	Data Time 0.001 (0.018)	Loss 2.2484 (2.2167)	Entropy 0.76719 (0.77083)	Top-1 acc 67.969 (71.233)	Top-5 acc 87.500 (88.253)	lr 0.00188
Train [99][2520/3239]	Time 0.224 (0.627)	Data Time 0.001 (0.018)	Loss 2.2098 (2.2167)	Entropy 0.76720 (0.77081)	Top-1 acc 71.094 (71.230)	Top-5 acc 88.281 (88.252)	lr 0.00188
Train [99][2530/3239]	Time 0.321 (0.626)	Data Time 0.001 (0.018)	Loss 2.2758 (2.2168)	Entropy 0.76710 (0.77080)	Top-1 acc 66.016 (71.227)	Top-5 acc 88.672 (88.253)	lr 0.00188
Train [99][2540/3239]	Time 2.619 (0.626)	Data Time 0.001 (0.018)	Loss 2.2558 (2.2168)	Entropy 0.76710 (0.77078)	Top-1 acc 69.922 (71.229)	Top-5 acc 89.062 (88.255)	lr 0.00188
Train [99][2550/3239]	Time 0.258 (0.624)	Data Time 0.001 (0.018)	Loss 2.3517 (2.2169)	Entropy 0.76702 (0.77077)	Top-1 acc 67.578 (71.225)	Top-5 acc 87.500 (88.252)	lr 0.00188
Train [99][2560/3239]	Time 0.238 (0.624)	Data Time 0.001 (0.017)	Loss 2.3028 (2.2171)	Entropy 0.76693 (0.77075)	Top-1 acc 66.797 (71.218)	Top-5 acc 87.500 (88.250)	lr 0.00188
Train [99][2570/3239]	Time 0.262 (0.623)	Data Time 0.001 (0.017)	Loss 2.3048 (2.2170)	Entropy 0.76689 (0.77074)	Top-1 acc 66.797 (71.220)	Top-5 acc 87.500 (88.251)	lr 0.00188
Train [99][2580/3239]	Time 0.241 (0.623)	Data Time 0.001 (0.017)	Loss 2.1219 (2.2169)	Entropy 0.76682 (0.77072)	Top-1 acc 77.344 (71.223)	Top-5 acc 88.672 (88.252)	lr 0.00188
Train [99][2590/3239]	Time 0.234 (0.622)	Data Time 0.001 (0.017)	Loss 2.1396 (2.2168)	Entropy 0.76675 (0.77071)	Top-1 acc 71.094 (71.224)	Top-5 acc 89.062 (88.255)	lr 0.00188
Train [99][2600/3239]	Time 0.225 (0.622)	Data Time 0.001 (0.017)	Loss 2.1502 (2.2169)	Entropy 0.76674 (0.77069)	Top-1 acc 72.656 (71.218)	Top-5 acc 89.453 (88.256)	lr 0.00188
Train [99][2610/3239]	Time 0.239 (0.621)	Data Time 0.001 (0.017)	Loss 2.1196 (2.2169)	Entropy 0.76670 (0.77068)	Top-1 acc 74.609 (71.221)	Top-5 acc 92.969 (88.258)	lr 0.00188
Train [99][2620/3239]	Time 0.243 (0.641)	Data Time 0.003 (0.017)	Loss 2.3332 (2.2169)	Entropy 0.76671 (0.77066)	Top-1 acc 69.531 (71.218)	Top-5 acc 87.500 (88.257)	lr 0.00187
Train [99][2630/3239]	Time 0.227 (0.641)	Data Time 0.002 (0.017)	Loss 2.0848 (2.2169)	Entropy 0.76664 (0.77065)	Top-1 acc 71.484 (71.218)	Top-5 acc 89.844 (88.258)	lr 0.00187
Train [99][2640/3239]	Time 0.266 (0.640)	Data Time 0.002 (0.017)	Loss 2.1069 (2.2169)	Entropy 0.76670 (0.77063)	Top-1 acc 73.828 (71.222)	Top-5 acc 91.406 (88.258)	lr 0.00187
Train [99][2650/3239]	Time 0.265 (0.640)	Data Time 0.002 (0.017)	Loss 2.3192 (2.2169)	Entropy 0.76671 (0.77062)	Top-1 acc 66.406 (71.216)	Top-5 acc 86.719 (88.254)	lr 0.00187
Train [99][2660/3239]	Time 0.338 (0.639)	Data Time 0.002 (0.017)	Loss 2.2272 (2.2171)	Entropy 0.76673 (0.77060)	Top-1 acc 68.750 (71.209)	Top-5 acc 89.062 (88.254)	lr 0.00187
Train [99][2670/3239]	Time 0.258 (0.639)	Data Time 0.001 (0.017)	Loss 2.2287 (2.2172)	Entropy 0.76652 (0.77059)	Top-1 acc 72.266 (71.206)	Top-5 acc 86.328 (88.252)	lr 0.00187
Train [99][2680/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.017)	Loss 2.1574 (2.2172)	Entropy 0.76652 (0.77057)	Top-1 acc 73.047 (71.206)	Top-5 acc 88.281 (88.253)	lr 0.00187
Train [99][2690/3239]	Time 0.229 (0.638)	Data Time 0.002 (0.017)	Loss 2.3180 (2.2174)	Entropy 0.76645 (0.77056)	Top-1 acc 64.453 (71.198)	Top-5 acc 87.500 (88.247)	lr 0.00187
Train [99][2700/3239]	Time 0.333 (0.637)	Data Time 0.001 (0.017)	Loss 2.0726 (2.2175)	Entropy 0.76644 (0.77054)	Top-1 acc 71.875 (71.196)	Top-5 acc 91.016 (88.246)	lr 0.00187
Train [99][2710/3239]	Time 0.242 (0.636)	Data Time 0.001 (0.017)	Loss 2.1760 (2.2174)	Entropy 0.76639 (0.77053)	Top-1 acc 74.219 (71.196)	Top-5 acc 89.062 (88.249)	lr 0.00187
Train [99][2720/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.017)	Loss 2.1751 (2.2175)	Entropy 0.76633 (0.77051)	Top-1 acc 73.438 (71.195)	Top-5 acc 89.062 (88.249)	lr 0.00187
Train [99][2730/3239]	Time 0.282 (0.635)	Data Time 0.001 (0.017)	Loss 2.1823 (2.2175)	Entropy 0.76635 (0.77050)	Top-1 acc 70.703 (71.191)	Top-5 acc 87.109 (88.249)	lr 0.00187
Train [99][2740/3239]	Time 0.315 (0.635)	Data Time 0.001 (0.016)	Loss 2.1755 (2.2176)	Entropy 0.76627 (0.77048)	Top-1 acc 74.609 (71.190)	Top-5 acc 88.672 (88.248)	lr 0.00187
Train [99][2750/3239]	Time 0.256 (0.634)	Data Time 0.001 (0.016)	Loss 2.3057 (2.2175)	Entropy 0.76627 (0.77047)	Top-1 acc 72.656 (71.192)	Top-5 acc 84.766 (88.246)	lr 0.00187
Train [99][2760/3239]	Time 0.226 (0.634)	Data Time 0.001 (0.016)	Loss 2.2319 (2.2177)	Entropy 0.76621 (0.77045)	Top-1 acc 69.141 (71.184)	Top-5 acc 89.062 (88.246)	lr 0.00187
Train [99][2770/3239]	Time 0.250 (0.633)	Data Time 0.001 (0.016)	Loss 2.1696 (2.2176)	Entropy 0.76601 (0.77044)	Top-1 acc 71.484 (71.186)	Top-5 acc 88.281 (88.247)	lr 0.00187
Train [99][2780/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.016)	Loss 2.1643 (2.2178)	Entropy 0.76597 (0.77042)	Top-1 acc 72.656 (71.180)	Top-5 acc 91.016 (88.246)	lr 0.00187
Train [99][2790/3239]	Time 0.222 (0.632)	Data Time 0.001 (0.016)	Loss 2.3475 (2.2177)	Entropy 0.76596 (0.77040)	Top-1 acc 67.188 (71.182)	Top-5 acc 89.062 (88.250)	lr 0.00187
Train [99][2800/3239]	Time 0.231 (0.631)	Data Time 0.001 (0.016)	Loss 2.3200 (2.2176)	Entropy 0.76587 (0.77039)	Top-1 acc 66.797 (71.188)	Top-5 acc 87.891 (88.252)	lr 0.00187
Train [99][2810/3239]	Time 0.233 (0.631)	Data Time 0.001 (0.016)	Loss 2.2153 (2.2177)	Entropy 0.76577 (0.77037)	Top-1 acc 67.578 (71.185)	Top-5 acc 90.234 (88.251)	lr 0.00186
Train [99][2820/3239]	Time 0.247 (0.630)	Data Time 0.001 (0.016)	Loss 2.2105 (2.2177)	Entropy 0.76575 (0.77036)	Top-1 acc 70.703 (71.180)	Top-5 acc 89.844 (88.252)	lr 0.00186
Train [99][2830/3239]	Time 0.241 (0.630)	Data Time 0.001 (0.016)	Loss 2.2097 (2.2179)	Entropy 0.76571 (0.77034)	Top-1 acc 70.703 (71.179)	Top-5 acc 89.062 (88.249)	lr 0.00186
Train [99][2840/3239]	Time 0.271 (0.629)	Data Time 0.001 (0.016)	Loss 2.3960 (2.2181)	Entropy 0.76553 (0.77032)	Top-1 acc 68.750 (71.177)	Top-5 acc 82.812 (88.246)	lr 0.00186
Train [99][2850/3239]	Time 0.226 (0.629)	Data Time 0.002 (0.016)	Loss 2.0156 (2.2179)	Entropy 0.76557 (0.77031)	Top-1 acc 73.828 (71.180)	Top-5 acc 92.578 (88.247)	lr 0.00186
Train [99][2860/3239]	Time 0.252 (0.628)	Data Time 0.001 (0.016)	Loss 2.1600 (2.2177)	Entropy 0.76558 (0.77029)	Top-1 acc 72.266 (71.188)	Top-5 acc 85.938 (88.251)	lr 0.00186
Train [99][2870/3239]	Time 0.274 (0.628)	Data Time 0.002 (0.016)	Loss 2.0148 (2.2177)	Entropy 0.76550 (0.77027)	Top-1 acc 78.906 (71.188)	Top-5 acc 90.625 (88.250)	lr 0.00186
Train [99][2880/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.016)	Loss 2.2458 (2.2177)	Entropy 0.76550 (0.77026)	Top-1 acc 72.656 (71.189)	Top-5 acc 87.500 (88.248)	lr 0.00186
Train [99][2890/3239]	Time 0.243 (0.627)	Data Time 0.001 (0.016)	Loss 2.1590 (2.2176)	Entropy 0.76550 (0.77024)	Top-1 acc 69.922 (71.190)	Top-5 acc 88.281 (88.250)	lr 0.00186
Train [99][2900/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.016)	Loss 2.0670 (2.2177)	Entropy 0.76541 (0.77022)	Top-1 acc 73.047 (71.189)	Top-5 acc 92.969 (88.251)	lr 0.00186
Train [99][2910/3239]	Time 0.304 (0.626)	Data Time 0.001 (0.016)	Loss 2.1769 (2.2177)	Entropy 0.76537 (0.77021)	Top-1 acc 74.219 (71.190)	Top-5 acc 90.234 (88.249)	lr 0.00186
Train [99][2920/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.016)	Loss 2.2156 (2.2178)	Entropy 0.76528 (0.77019)	Top-1 acc 69.922 (71.189)	Top-5 acc 87.500 (88.248)	lr 0.00186
Train [99][2930/3239]	Time 0.226 (0.625)	Data Time 0.001 (0.015)	Loss 2.1993 (2.2178)	Entropy 0.76528 (0.77017)	Top-1 acc 73.828 (71.193)	Top-5 acc 88.672 (88.250)	lr 0.00186
Train [99][2940/3239]	Time 0.271 (0.624)	Data Time 0.001 (0.015)	Loss 2.2016 (2.2179)	Entropy 0.76513 (0.77016)	Top-1 acc 71.875 (71.188)	Top-5 acc 91.016 (88.249)	lr 0.00186
Train [99][2950/3239]	Time 0.538 (0.640)	Data Time 0.004 (0.015)	Loss 2.0636 (2.2178)	Entropy 0.76506 (0.77014)	Top-1 acc 76.562 (71.191)	Top-5 acc 89.844 (88.251)	lr 0.00186
Train [99][2960/3239]	Time 0.240 (0.640)	Data Time 0.002 (0.015)	Loss 2.0229 (2.2176)	Entropy 0.76501 (0.77012)	Top-1 acc 76.953 (71.198)	Top-5 acc 88.281 (88.253)	lr 0.00186
Train [99][2970/3239]	Time 0.245 (0.639)	Data Time 0.002 (0.015)	Loss 2.2053 (2.2175)	Entropy 0.76504 (0.77011)	Top-1 acc 72.266 (71.199)	Top-5 acc 89.844 (88.257)	lr 0.00186
Train [99][2980/3239]	Time 0.242 (0.639)	Data Time 0.001 (0.015)	Loss 2.1904 (2.2175)	Entropy 0.76499 (0.77009)	Top-1 acc 71.875 (71.199)	Top-5 acc 87.500 (88.259)	lr 0.00186
Train [99][2990/3239]	Time 0.312 (0.638)	Data Time 0.001 (0.015)	Loss 2.1811 (2.2178)	Entropy 0.76492 (0.77007)	Top-1 acc 72.656 (71.193)	Top-5 acc 89.062 (88.257)	lr 0.00186
Train [99][3000/3239]	Time 0.238 (0.638)	Data Time 0.001 (0.015)	Loss 2.3831 (2.2180)	Entropy 0.76487 (0.77005)	Top-1 acc 62.891 (71.186)	Top-5 acc 87.500 (88.255)	lr 0.00185
Train [99][3010/3239]	Time 0.222 (0.637)	Data Time 0.002 (0.015)	Loss 2.2267 (2.2179)	Entropy 0.76489 (0.77004)	Top-1 acc 68.750 (71.187)	Top-5 acc 90.625 (88.256)	lr 0.00185
Train [99][3020/3239]	Time 0.251 (0.637)	Data Time 0.001 (0.015)	Loss 2.4744 (2.2180)	Entropy 0.76484 (0.77002)	Top-1 acc 63.672 (71.183)	Top-5 acc 84.766 (88.255)	lr 0.00185
Train [99][3030/3239]	Time 0.283 (0.636)	Data Time 0.001 (0.015)	Loss 2.0624 (2.2181)	Entropy 0.76488 (0.77000)	Top-1 acc 74.609 (71.182)	Top-5 acc 90.625 (88.254)	lr 0.00185
Train [99][3040/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.015)	Loss 2.0933 (2.2181)	Entropy 0.76488 (0.76999)	Top-1 acc 75.000 (71.180)	Top-5 acc 89.844 (88.253)	lr 0.00185
Train [99][3050/3239]	Time 0.261 (0.635)	Data Time 0.001 (0.015)	Loss 2.2892 (2.2183)	Entropy 0.76490 (0.76997)	Top-1 acc 70.312 (71.178)	Top-5 acc 86.719 (88.250)	lr 0.00185
Train [99][3060/3239]	Time 0.234 (0.635)	Data Time 0.001 (0.015)	Loss 2.3626 (2.2183)	Entropy 0.76484 (0.76995)	Top-1 acc 68.359 (71.175)	Top-5 acc 85.938 (88.251)	lr 0.00185
Train [99][3070/3239]	Time 0.222 (0.634)	Data Time 0.001 (0.015)	Loss 2.1470 (2.2183)	Entropy 0.76479 (0.76994)	Top-1 acc 70.703 (71.174)	Top-5 acc 89.453 (88.250)	lr 0.00185
Train [99][3080/3239]	Time 0.268 (0.634)	Data Time 0.002 (0.015)	Loss 2.1432 (2.2184)	Entropy 0.76474 (0.76992)	Top-1 acc 74.609 (71.176)	Top-5 acc 87.109 (88.248)	lr 0.00185
Train [99][3090/3239]	Time 0.231 (0.633)	Data Time 0.001 (0.015)	Loss 2.2006 (2.2183)	Entropy 0.76476 (0.76990)	Top-1 acc 74.219 (71.180)	Top-5 acc 89.062 (88.251)	lr 0.00185
Train [99][3100/3239]	Time 0.261 (0.633)	Data Time 0.001 (0.015)	Loss 2.2218 (2.2183)	Entropy 0.76475 (0.76989)	Top-1 acc 71.484 (71.175)	Top-5 acc 86.719 (88.249)	lr 0.00185
Train [99][3110/3239]	Time 0.299 (0.632)	Data Time 0.002 (0.015)	Loss 2.0548 (2.2183)	Entropy 0.76476 (0.76987)	Top-1 acc 75.000 (71.175)	Top-5 acc 92.188 (88.246)	lr 0.00185
Train [99][3120/3239]	Time 0.325 (0.632)	Data Time 0.001 (0.015)	Loss 2.1624 (2.2183)	Entropy 0.76470 (0.76985)	Top-1 acc 74.219 (71.175)	Top-5 acc 89.453 (88.247)	lr 0.00185
Train [99][3130/3239]	Time 0.269 (0.631)	Data Time 0.002 (0.015)	Loss 2.2080 (2.2185)	Entropy 0.76438 (0.76984)	Top-1 acc 67.969 (71.167)	Top-5 acc 90.234 (88.245)	lr 0.00185
Train [99][3140/3239]	Time 0.230 (0.631)	Data Time 0.002 (0.015)	Loss 2.1309 (2.2184)	Entropy 0.76432 (0.76982)	Top-1 acc 74.219 (71.169)	Top-5 acc 89.844 (88.245)	lr 0.00185
Train [99][3150/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.015)	Loss 2.2312 (2.2185)	Entropy 0.76428 (0.76980)	Top-1 acc 67.969 (71.163)	Top-5 acc 87.109 (88.243)	lr 0.00185
Train [99][3160/3239]	Time 0.333 (0.630)	Data Time 0.001 (0.014)	Loss 2.0888 (2.2184)	Entropy 0.76421 (0.76978)	Top-1 acc 72.266 (71.164)	Top-5 acc 92.578 (88.244)	lr 0.00185
Train [99][3170/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.014)	Loss 2.1632 (2.2184)	Entropy 0.76417 (0.76977)	Top-1 acc 71.875 (71.165)	Top-5 acc 91.797 (88.246)	lr 0.00185
Train [99][3180/3239]	Time 0.223 (0.629)	Data Time 0.000 (0.014)	Loss 2.0862 (2.2184)	Entropy 0.76412 (0.76975)	Top-1 acc 75.391 (71.163)	Top-5 acc 92.188 (88.247)	lr 0.00185
Train [99][3190/3239]	Time 0.228 (0.629)	Data Time 0.000 (0.014)	Loss 2.3036 (2.2185)	Entropy 0.76395 (0.76973)	Top-1 acc 71.094 (71.162)	Top-5 acc 85.938 (88.247)	lr 0.00184
Train [99][3200/3239]	Time 0.261 (0.628)	Data Time 0.000 (0.014)	Loss 2.3348 (2.2187)	Entropy 0.76386 (0.76971)	Top-1 acc 70.312 (71.155)	Top-5 acc 86.328 (88.244)	lr 0.00184
Train [99][3210/3239]	Time 0.226 (0.628)	Data Time 0.000 (0.014)	Loss 2.1691 (2.2188)	Entropy 0.76385 (0.76970)	Top-1 acc 73.828 (71.151)	Top-5 acc 87.109 (88.243)	lr 0.00184
Train [99][3220/3239]	Time 0.243 (0.627)	Data Time 0.000 (0.014)	Loss 2.2240 (2.2188)	Entropy 0.76383 (0.76968)	Top-1 acc 71.094 (71.149)	Top-5 acc 87.891 (88.243)	lr 0.00184
Train [99][3230/3239]	Time 0.221 (0.627)	Data Time 0.000 (0.014)	Loss 2.2422 (2.2187)	Entropy 0.76379 (0.76966)	Top-1 acc 68.750 (71.148)	Top-5 acc 89.453 (88.246)	lr 0.00184
Train [99][3239/3239]	Time 2.407 (0.626)	Data Time 0.000 (0.014)	Loss 2.7326 (2.2187)	Entropy 0.76379 (0.76964)	Top-1 acc 56.790 (71.147)	Top-5 acc 81.481 (88.246)	lr 0.00184
==========Valid [99/120]	loss 1.226	top-1 acc 71.887 (71.887)	top-5 acc 89.466	Train top-1 71.147	top-5 88.246	Entropy 0.76379	Latency-None: 0.000ms	Flops: 546.53M
Train [100][0/3239]	Time 42.298 (42.298)	Data Time 40.471 (40.471)	Loss 2.1207 (2.1207)	Entropy 0.76370 (0.76370)	Top-1 acc 74.219 (74.219)	Top-5 acc 89.453 (89.453)	lr 0.00184
Train [100][10/3239]	Time 2.757 (4.508)	Data Time 0.002 (3.779)	Loss 2.2749 (2.1842)	Entropy 0.76370 (0.76370)	Top-1 acc 70.312 (72.159)	Top-5 acc 84.766 (89.027)	lr 0.00184
Train [100][20/3239]	Time 0.279 (2.482)	Data Time 0.001 (1.980)	Loss 2.1997 (2.1957)	Entropy 0.76370 (0.76370)	Top-1 acc 67.578 (71.633)	Top-5 acc 88.672 (88.579)	lr 0.00184
Train [100][30/3239]	Time 0.230 (1.835)	Data Time 0.001 (1.342)	Loss 2.1753 (2.1999)	Entropy 0.76368 (0.76370)	Top-1 acc 73.828 (71.447)	Top-5 acc 89.844 (88.710)	lr 0.00184
Train [100][40/3239]	Time 0.245 (1.509)	Data Time 0.001 (1.015)	Loss 2.2753 (2.2074)	Entropy 0.76364 (0.76368)	Top-1 acc 69.922 (70.960)	Top-5 acc 89.062 (88.710)	lr 0.00184
Train [100][50/3239]	Time 0.338 (2.386)	Data Time 0.002 (0.817)	Loss 2.0102 (2.2123)	Entropy 0.76343 (0.76365)	Top-1 acc 78.125 (70.833)	Top-5 acc 92.969 (88.611)	lr 0.00184
Train [100][60/3239]	Time 0.236 (2.075)	Data Time 0.002 (0.683)	Loss 2.1551 (2.2201)	Entropy 0.76339 (0.76361)	Top-1 acc 74.609 (70.857)	Top-5 acc 91.016 (88.397)	lr 0.00184
Train [100][70/3239]	Time 0.233 (1.853)	Data Time 0.002 (0.587)	Loss 2.0556 (2.2232)	Entropy 0.76328 (0.76357)	Top-1 acc 76.953 (70.824)	Top-5 acc 91.406 (88.314)	lr 0.00184
Train [100][80/3239]	Time 0.256 (1.685)	Data Time 0.002 (0.515)	Loss 2.2550 (2.2198)	Entropy 0.76332 (0.76354)	Top-1 acc 69.922 (71.031)	Top-5 acc 90.625 (88.325)	lr 0.00184
Train [100][90/3239]	Time 0.323 (1.554)	Data Time 0.001 (0.459)	Loss 2.2841 (2.2152)	Entropy 0.76335 (0.76351)	Top-1 acc 69.531 (71.167)	Top-5 acc 85.938 (88.393)	lr 0.00184
Train [100][100/3239]	Time 0.230 (1.449)	Data Time 0.001 (0.413)	Loss 2.0919 (2.2145)	Entropy 0.76336 (0.76350)	Top-1 acc 75.391 (71.190)	Top-5 acc 91.406 (88.397)	lr 0.00184
Train [100][110/3239]	Time 0.240 (1.363)	Data Time 0.001 (0.376)	Loss 2.4107 (2.2156)	Entropy 0.76333 (0.76348)	Top-1 acc 65.234 (71.055)	Top-5 acc 87.891 (88.408)	lr 0.00184
Train [100][120/3239]	Time 2.800 (1.292)	Data Time 0.002 (0.345)	Loss 2.1569 (2.2121)	Entropy 0.76333 (0.76347)	Top-1 acc 74.219 (71.107)	Top-5 acc 88.672 (88.465)	lr 0.00184
Train [100][130/3239]	Time 0.238 (1.213)	Data Time 0.001 (0.319)	Loss 2.3229 (2.2124)	Entropy 0.76328 (0.76346)	Top-1 acc 70.312 (71.138)	Top-5 acc 87.891 (88.502)	lr 0.00184
Train [100][140/3239]	Time 0.248 (1.162)	Data Time 0.001 (0.297)	Loss 2.2424 (2.2094)	Entropy 0.76333 (0.76345)	Top-1 acc 71.484 (71.224)	Top-5 acc 85.938 (88.536)	lr 0.00183
Train [100][150/3239]	Time 0.242 (1.119)	Data Time 0.003 (0.277)	Loss 2.2049 (2.2076)	Entropy 0.76337 (0.76344)	Top-1 acc 68.750 (71.285)	Top-5 acc 89.062 (88.540)	lr 0.00183
Train [100][160/3239]	Time 0.233 (1.079)	Data Time 0.001 (0.260)	Loss 2.0722 (2.2059)	Entropy 0.76333 (0.76344)	Top-1 acc 76.172 (71.365)	Top-5 acc 91.016 (88.575)	lr 0.00183
Train [100][170/3239]	Time 0.230 (1.044)	Data Time 0.001 (0.245)	Loss 2.1391 (2.2043)	Entropy 0.76328 (0.76343)	Top-1 acc 73.047 (71.446)	Top-5 acc 88.672 (88.590)	lr 0.00183
Train [100][180/3239]	Time 0.231 (1.013)	Data Time 0.001 (0.231)	Loss 2.2193 (2.2038)	Entropy 0.76327 (0.76342)	Top-1 acc 70.703 (71.484)	Top-5 acc 86.328 (88.601)	lr 0.00183
Train [100][190/3239]	Time 0.224 (0.985)	Data Time 0.001 (0.219)	Loss 2.2962 (2.2063)	Entropy 0.76324 (0.76341)	Top-1 acc 68.359 (71.450)	Top-5 acc 86.719 (88.510)	lr 0.00183
Train [100][200/3239]	Time 0.231 (0.959)	Data Time 0.001 (0.209)	Loss 2.2591 (2.2083)	Entropy 0.76323 (0.76340)	Top-1 acc 69.141 (71.393)	Top-5 acc 87.109 (88.485)	lr 0.00183
Train [100][210/3239]	Time 0.230 (0.937)	Data Time 0.001 (0.199)	Loss 2.1059 (2.2101)	Entropy 0.76322 (0.76339)	Top-1 acc 72.266 (71.314)	Top-5 acc 89.453 (88.453)	lr 0.00183
Train [100][220/3239]	Time 0.229 (0.916)	Data Time 0.001 (0.190)	Loss 2.1260 (2.2117)	Entropy 0.76323 (0.76339)	Top-1 acc 70.703 (71.286)	Top-5 acc 89.453 (88.424)	lr 0.00183
Train [100][230/3239]	Time 2.550 (0.897)	Data Time 0.001 (0.182)	Loss 2.2184 (2.2111)	Entropy 0.76323 (0.76338)	Top-1 acc 72.266 (71.287)	Top-5 acc 87.109 (88.439)	lr 0.00183
Train [100][240/3239]	Time 0.291 (0.870)	Data Time 0.002 (0.174)	Loss 2.2464 (2.2096)	Entropy 0.76321 (0.76337)	Top-1 acc 69.141 (71.309)	Top-5 acc 87.891 (88.463)	lr 0.00183
Train [100][250/3239]	Time 0.241 (0.855)	Data Time 0.001 (0.167)	Loss 2.1845 (2.2100)	Entropy 0.76311 (0.76336)	Top-1 acc 70.703 (71.291)	Top-5 acc 90.625 (88.459)	lr 0.00183
Train [100][260/3239]	Time 0.349 (0.842)	Data Time 0.001 (0.161)	Loss 2.2108 (2.2081)	Entropy 0.76309 (0.76335)	Top-1 acc 71.484 (71.362)	Top-5 acc 87.500 (88.470)	lr 0.00183
Train [100][270/3239]	Time 0.229 (0.829)	Data Time 0.001 (0.155)	Loss 2.1191 (2.2091)	Entropy 0.76308 (0.76334)	Top-1 acc 75.781 (71.349)	Top-5 acc 87.500 (88.440)	lr 0.00183
Train [100][280/3239]	Time 0.226 (0.817)	Data Time 0.002 (0.150)	Loss 2.2116 (2.2094)	Entropy 0.76309 (0.76333)	Top-1 acc 66.406 (71.327)	Top-5 acc 89.844 (88.459)	lr 0.00183
Train [100][290/3239]	Time 0.236 (0.805)	Data Time 0.001 (0.145)	Loss 2.2546 (2.2103)	Entropy 0.76317 (0.76333)	Top-1 acc 67.188 (71.283)	Top-5 acc 88.281 (88.456)	lr 0.00183
Train [100][300/3239]	Time 0.330 (0.795)	Data Time 0.001 (0.140)	Loss 2.1916 (2.2106)	Entropy 0.76309 (0.76332)	Top-1 acc 71.875 (71.294)	Top-5 acc 91.406 (88.458)	lr 0.00183
Train [100][310/3239]	Time 0.215 (0.784)	Data Time 0.001 (0.135)	Loss 2.1576 (2.2094)	Entropy 0.76301 (0.76331)	Top-1 acc 74.609 (71.347)	Top-5 acc 87.500 (88.457)	lr 0.00183
Train [100][320/3239]	Time 0.222 (0.774)	Data Time 0.001 (0.131)	Loss 2.0561 (2.2068)	Entropy 0.76301 (0.76330)	Top-1 acc 78.516 (71.432)	Top-5 acc 90.234 (88.497)	lr 0.00183
Train [100][330/3239]	Time 0.270 (0.765)	Data Time 0.001 (0.127)	Loss 2.1373 (2.2069)	Entropy 0.76304 (0.76329)	Top-1 acc 73.047 (71.419)	Top-5 acc 89.844 (88.511)	lr 0.00182
Train [100][340/3239]	Time 2.610 (0.756)	Data Time 0.001 (0.124)	Loss 2.2043 (2.2073)	Entropy 0.76304 (0.76329)	Top-1 acc 67.969 (71.384)	Top-5 acc 90.234 (88.521)	lr 0.00182
Train [100][350/3239]	Time 0.230 (0.742)	Data Time 0.001 (0.120)	Loss 2.2144 (2.2085)	Entropy 0.76303 (0.76328)	Top-1 acc 71.875 (71.362)	Top-5 acc 87.500 (88.480)	lr 0.00182
Train [100][360/3239]	Time 0.240 (0.735)	Data Time 0.001 (0.117)	Loss 2.3585 (2.2086)	Entropy 0.76307 (0.76327)	Top-1 acc 67.969 (71.339)	Top-5 acc 84.766 (88.484)	lr 0.00182
Train [100][370/3239]	Time 0.245 (0.728)	Data Time 0.001 (0.114)	Loss 2.1174 (2.2080)	Entropy 0.76298 (0.76326)	Top-1 acc 75.000 (71.378)	Top-5 acc 90.625 (88.486)	lr 0.00182
Train [100][380/3239]	Time 0.242 (0.721)	Data Time 0.001 (0.111)	Loss 2.1670 (2.2086)	Entropy 0.76286 (0.76326)	Top-1 acc 68.750 (71.339)	Top-5 acc 89.453 (88.477)	lr 0.00182
Train [100][390/3239]	Time 0.328 (0.715)	Data Time 0.002 (0.108)	Loss 2.2601 (2.2099)	Entropy 0.76288 (0.76325)	Top-1 acc 67.188 (71.278)	Top-5 acc 88.281 (88.459)	lr 0.00182
Train [100][400/3239]	Time 0.247 (0.709)	Data Time 0.001 (0.105)	Loss 2.0982 (2.2093)	Entropy 0.76289 (0.76324)	Top-1 acc 76.562 (71.314)	Top-5 acc 90.234 (88.469)	lr 0.00182
Train [100][410/3239]	Time 0.253 (0.829)	Data Time 0.002 (0.103)	Loss 2.2166 (2.2087)	Entropy 0.76290 (0.76323)	Top-1 acc 69.922 (71.327)	Top-5 acc 88.281 (88.477)	lr 0.00182
Train [100][420/3239]	Time 0.231 (0.822)	Data Time 0.002 (0.100)	Loss 2.1075 (2.2086)	Entropy 0.76294 (0.76322)	Top-1 acc 73.047 (71.320)	Top-5 acc 89.453 (88.471)	lr 0.00182
Train [100][430/3239]	Time 0.320 (0.814)	Data Time 0.001 (0.098)	Loss 2.1074 (2.2086)	Entropy 0.76288 (0.76321)	Top-1 acc 73.828 (71.320)	Top-5 acc 91.406 (88.473)	lr 0.00182
Train [100][440/3239]	Time 0.279 (0.807)	Data Time 0.001 (0.096)	Loss 2.1402 (2.2083)	Entropy 0.76289 (0.76321)	Top-1 acc 71.094 (71.317)	Top-5 acc 90.234 (88.482)	lr 0.00182
Train [100][450/3239]	Time 2.483 (0.799)	Data Time 0.002 (0.094)	Loss 2.2953 (2.2084)	Entropy 0.76289 (0.76320)	Top-1 acc 69.922 (71.337)	Top-5 acc 87.500 (88.480)	lr 0.00182
Train [100][460/3239]	Time 0.268 (0.787)	Data Time 0.001 (0.092)	Loss 2.3267 (2.2088)	Entropy 0.76291 (0.76319)	Top-1 acc 73.828 (71.336)	Top-5 acc 85.547 (88.468)	lr 0.00182
Train [100][470/3239]	Time 0.325 (0.781)	Data Time 0.001 (0.090)	Loss 2.1834 (2.2086)	Entropy 0.76292 (0.76319)	Top-1 acc 72.656 (71.359)	Top-5 acc 88.672 (88.473)	lr 0.00182
Train [100][480/3239]	Time 0.243 (0.775)	Data Time 0.001 (0.088)	Loss 2.1578 (2.2082)	Entropy 0.76285 (0.76318)	Top-1 acc 74.219 (71.369)	Top-5 acc 90.234 (88.479)	lr 0.00182
Train [100][490/3239]	Time 0.220 (0.769)	Data Time 0.001 (0.086)	Loss 2.1602 (2.2074)	Entropy 0.76288 (0.76317)	Top-1 acc 75.000 (71.387)	Top-5 acc 88.672 (88.494)	lr 0.00182
Train [100][500/3239]	Time 0.245 (0.763)	Data Time 0.001 (0.085)	Loss 2.1073 (2.2074)	Entropy 0.76278 (0.76317)	Top-1 acc 74.219 (71.386)	Top-5 acc 91.406 (88.492)	lr 0.00182
Train [100][510/3239]	Time 0.246 (0.758)	Data Time 0.001 (0.083)	Loss 2.1872 (2.2079)	Entropy 0.76269 (0.76316)	Top-1 acc 71.484 (71.383)	Top-5 acc 92.188 (88.474)	lr 0.00182
Train [100][520/3239]	Time 0.245 (0.752)	Data Time 0.001 (0.081)	Loss 2.2108 (2.2084)	Entropy 0.76259 (0.76315)	Top-1 acc 68.750 (71.377)	Top-5 acc 88.281 (88.465)	lr 0.00181
Train [100][530/3239]	Time 0.219 (0.747)	Data Time 0.001 (0.080)	Loss 2.0911 (2.2079)	Entropy 0.76257 (0.76314)	Top-1 acc 75.391 (71.398)	Top-5 acc 90.625 (88.473)	lr 0.00181
Train [100][540/3239]	Time 0.219 (0.742)	Data Time 0.001 (0.078)	Loss 2.1503 (2.2079)	Entropy 0.76255 (0.76313)	Top-1 acc 74.219 (71.386)	Top-5 acc 87.500 (88.474)	lr 0.00181
Train [100][550/3239]	Time 0.274 (0.737)	Data Time 0.001 (0.077)	Loss 2.1288 (2.2071)	Entropy 0.76257 (0.76312)	Top-1 acc 73.828 (71.405)	Top-5 acc 90.234 (88.482)	lr 0.00181
Train [100][560/3239]	Time 2.553 (0.733)	Data Time 0.001 (0.076)	Loss 2.2805 (2.2069)	Entropy 0.76257 (0.76311)	Top-1 acc 68.359 (71.400)	Top-5 acc 87.500 (88.470)	lr 0.00181
Train [100][570/3239]	Time 0.241 (0.724)	Data Time 0.001 (0.074)	Loss 2.3521 (2.2074)	Entropy 0.76262 (0.76310)	Top-1 acc 64.844 (71.369)	Top-5 acc 84.375 (88.467)	lr 0.00181
Train [100][580/3239]	Time 0.221 (0.720)	Data Time 0.001 (0.073)	Loss 2.1575 (2.2076)	Entropy 0.76267 (0.76309)	Top-1 acc 75.000 (71.365)	Top-5 acc 90.234 (88.465)	lr 0.00181
Train [100][590/3239]	Time 0.228 (0.716)	Data Time 0.001 (0.072)	Loss 2.1660 (2.2075)	Entropy 0.76268 (0.76308)	Top-1 acc 71.875 (71.365)	Top-5 acc 89.844 (88.467)	lr 0.00181
Train [100][600/3239]	Time 0.324 (0.712)	Data Time 0.001 (0.071)	Loss 2.2211 (2.2069)	Entropy 0.76263 (0.76308)	Top-1 acc 69.531 (71.391)	Top-5 acc 87.500 (88.470)	lr 0.00181
Train [100][610/3239]	Time 0.231 (0.708)	Data Time 0.001 (0.070)	Loss 2.3441 (2.2071)	Entropy 0.76265 (0.76307)	Top-1 acc 68.750 (71.396)	Top-5 acc 84.375 (88.461)	lr 0.00181
Train [100][620/3239]	Time 0.228 (0.705)	Data Time 0.001 (0.069)	Loss 2.2781 (2.2074)	Entropy 0.76261 (0.76306)	Top-1 acc 69.531 (71.394)	Top-5 acc 87.500 (88.460)	lr 0.00181
Train [100][630/3239]	Time 0.240 (0.701)	Data Time 0.001 (0.068)	Loss 2.4014 (2.2081)	Entropy 0.76260 (0.76306)	Top-1 acc 65.234 (71.382)	Top-5 acc 83.594 (88.438)	lr 0.00181
Train [100][640/3239]	Time 0.308 (0.698)	Data Time 0.001 (0.066)	Loss 2.3335 (2.2086)	Entropy 0.76260 (0.76305)	Top-1 acc 67.969 (71.378)	Top-5 acc 85.547 (88.437)	lr 0.00181
Train [100][650/3239]	Time 0.230 (0.694)	Data Time 0.001 (0.065)	Loss 2.2641 (2.2079)	Entropy 0.76259 (0.76304)	Top-1 acc 66.797 (71.400)	Top-5 acc 86.328 (88.443)	lr 0.00181
Train [100][660/3239]	Time 0.228 (0.691)	Data Time 0.001 (0.065)	Loss 2.2010 (2.2084)	Entropy 0.76258 (0.76304)	Top-1 acc 70.703 (71.383)	Top-5 acc 88.672 (88.446)	lr 0.00181
Train [100][670/3239]	Time 2.627 (0.688)	Data Time 0.001 (0.064)	Loss 2.2209 (2.2081)	Entropy 0.76258 (0.76303)	Top-1 acc 71.484 (71.398)	Top-5 acc 88.672 (88.449)	lr 0.00181
Train [100][680/3239]	Time 0.240 (0.681)	Data Time 0.001 (0.063)	Loss 2.2340 (2.2081)	Entropy 0.76249 (0.76302)	Top-1 acc 69.531 (71.395)	Top-5 acc 89.062 (88.457)	lr 0.00181
Train [100][690/3239]	Time 0.241 (0.678)	Data Time 0.001 (0.062)	Loss 2.0055 (2.2086)	Entropy 0.76252 (0.76301)	Top-1 acc 75.781 (71.381)	Top-5 acc 92.969 (88.451)	lr 0.00181
Train [100][700/3239]	Time 0.227 (0.676)	Data Time 0.001 (0.061)	Loss 2.2095 (2.2086)	Entropy 0.76257 (0.76301)	Top-1 acc 68.750 (71.380)	Top-5 acc 89.453 (88.452)	lr 0.00181
Train [100][710/3239]	Time 0.233 (0.673)	Data Time 0.001 (0.060)	Loss 2.1855 (2.2100)	Entropy 0.76246 (0.76300)	Top-1 acc 70.703 (71.335)	Top-5 acc 88.281 (88.433)	lr 0.00180
Train [100][720/3239]	Time 0.232 (0.670)	Data Time 0.001 (0.059)	Loss 2.4588 (2.2103)	Entropy 0.76240 (0.76299)	Top-1 acc 64.844 (71.323)	Top-5 acc 83.594 (88.428)	lr 0.00180
Train [100][730/3239]	Time 0.209 (0.668)	Data Time 0.001 (0.058)	Loss 2.3584 (2.2109)	Entropy 0.76236 (0.76298)	Top-1 acc 67.188 (71.331)	Top-5 acc 85.938 (88.417)	lr 0.00180
Train [100][740/3239]	Time 0.230 (0.665)	Data Time 0.001 (0.058)	Loss 2.2711 (2.2103)	Entropy 0.76237 (0.76297)	Top-1 acc 70.703 (71.344)	Top-5 acc 85.547 (88.427)	lr 0.00180
Train [100][750/3239]	Time 0.232 (0.663)	Data Time 0.001 (0.057)	Loss 2.2809 (2.2100)	Entropy 0.76239 (0.76297)	Top-1 acc 71.484 (71.366)	Top-5 acc 86.328 (88.429)	lr 0.00180
Train [100][760/3239]	Time 0.224 (0.660)	Data Time 0.001 (0.056)	Loss 2.1139 (2.2101)	Entropy 0.76234 (0.76296)	Top-1 acc 71.484 (71.353)	Top-5 acc 90.234 (88.433)	lr 0.00180
Train [100][770/3239]	Time 0.265 (0.724)	Data Time 0.004 (0.056)	Loss 2.1860 (2.2104)	Entropy 0.76215 (0.76295)	Top-1 acc 71.484 (71.365)	Top-5 acc 88.281 (88.430)	lr 0.00180
Train [100][780/3239]	Time 2.792 (0.722)	Data Time 0.004 (0.055)	Loss 2.0887 (2.2098)	Entropy 0.76215 (0.76294)	Top-1 acc 73.828 (71.380)	Top-5 acc 90.234 (88.448)	lr 0.00180
Train [100][790/3239]	Time 0.251 (0.716)	Data Time 0.002 (0.054)	Loss 2.0662 (2.2089)	Entropy 0.76219 (0.76293)	Top-1 acc 76.172 (71.405)	Top-5 acc 91.797 (88.471)	lr 0.00180
Train [100][800/3239]	Time 0.253 (0.713)	Data Time 0.002 (0.054)	Loss 2.2173 (2.2086)	Entropy 0.76207 (0.76292)	Top-1 acc 72.656 (71.409)	Top-5 acc 90.625 (88.479)	lr 0.00180
Train [100][810/3239]	Time 0.340 (0.710)	Data Time 0.001 (0.053)	Loss 2.2925 (2.2081)	Entropy 0.76205 (0.76291)	Top-1 acc 71.094 (71.414)	Top-5 acc 87.109 (88.492)	lr 0.00180
Train [100][820/3239]	Time 0.234 (0.707)	Data Time 0.001 (0.052)	Loss 2.1254 (2.2076)	Entropy 0.76201 (0.76290)	Top-1 acc 72.656 (71.429)	Top-5 acc 89.062 (88.498)	lr 0.00180
Train [100][830/3239]	Time 0.214 (0.704)	Data Time 0.001 (0.052)	Loss 2.1150 (2.2078)	Entropy 0.76205 (0.76289)	Top-1 acc 72.656 (71.419)	Top-5 acc 91.406 (88.495)	lr 0.00180
Train [100][840/3239]	Time 0.227 (0.702)	Data Time 0.001 (0.051)	Loss 2.2411 (2.2073)	Entropy 0.76184 (0.76288)	Top-1 acc 69.141 (71.418)	Top-5 acc 88.281 (88.509)	lr 0.00180
Train [100][850/3239]	Time 0.321 (0.699)	Data Time 0.001 (0.050)	Loss 2.2584 (2.2072)	Entropy 0.76181 (0.76286)	Top-1 acc 71.094 (71.427)	Top-5 acc 88.672 (88.511)	lr 0.00180
Train [100][860/3239]	Time 0.232 (0.697)	Data Time 0.001 (0.050)	Loss 2.2008 (2.2076)	Entropy 0.76176 (0.76285)	Top-1 acc 70.703 (71.412)	Top-5 acc 88.672 (88.501)	lr 0.00180
Train [100][870/3239]	Time 0.226 (0.694)	Data Time 0.001 (0.049)	Loss 2.2802 (2.2082)	Entropy 0.76166 (0.76284)	Top-1 acc 69.922 (71.392)	Top-5 acc 86.719 (88.491)	lr 0.00180
Train [100][880/3239]	Time 0.247 (0.692)	Data Time 0.001 (0.049)	Loss 2.1453 (2.2085)	Entropy 0.76163 (0.76283)	Top-1 acc 73.047 (71.381)	Top-5 acc 87.109 (88.480)	lr 0.00180
Train [100][890/3239]	Time 2.783 (0.690)	Data Time 0.002 (0.048)	Loss 2.1701 (2.2084)	Entropy 0.76163 (0.76281)	Top-1 acc 71.094 (71.385)	Top-5 acc 89.453 (88.479)	lr 0.00180
Train [100][900/3239]	Time 0.232 (0.685)	Data Time 0.001 (0.048)	Loss 2.1356 (2.2082)	Entropy 0.76160 (0.76280)	Top-1 acc 72.656 (71.380)	Top-5 acc 92.188 (88.486)	lr 0.00179
Train [100][910/3239]	Time 0.253 (0.682)	Data Time 0.001 (0.047)	Loss 2.2526 (2.2082)	Entropy 0.76162 (0.76279)	Top-1 acc 69.141 (71.390)	Top-5 acc 86.719 (88.491)	lr 0.00179
Train [100][920/3239]	Time 0.239 (0.680)	Data Time 0.001 (0.047)	Loss 2.2798 (2.2083)	Entropy 0.76149 (0.76277)	Top-1 acc 70.703 (71.397)	Top-5 acc 85.156 (88.484)	lr 0.00179
Train [100][930/3239]	Time 0.231 (0.678)	Data Time 0.001 (0.046)	Loss 2.3405 (2.2086)	Entropy 0.76147 (0.76276)	Top-1 acc 67.188 (71.387)	Top-5 acc 85.938 (88.483)	lr 0.00179
Train [100][940/3239]	Time 0.223 (0.676)	Data Time 0.001 (0.046)	Loss 2.0087 (2.2085)	Entropy 0.76144 (0.76274)	Top-1 acc 79.297 (71.392)	Top-5 acc 90.625 (88.487)	lr 0.00179
Train [100][950/3239]	Time 0.235 (0.674)	Data Time 0.001 (0.045)	Loss 2.1268 (2.2080)	Entropy 0.76143 (0.76273)	Top-1 acc 71.875 (71.399)	Top-5 acc 89.844 (88.501)	lr 0.00179
Train [100][960/3239]	Time 0.221 (0.672)	Data Time 0.001 (0.045)	Loss 2.2266 (2.2080)	Entropy 0.76147 (0.76272)	Top-1 acc 71.484 (71.389)	Top-5 acc 86.328 (88.503)	lr 0.00179
Train [100][970/3239]	Time 0.232 (0.670)	Data Time 0.001 (0.044)	Loss 2.3237 (2.2082)	Entropy 0.76133 (0.76270)	Top-1 acc 69.922 (71.382)	Top-5 acc 86.719 (88.505)	lr 0.00179
Train [100][980/3239]	Time 0.225 (0.668)	Data Time 0.001 (0.044)	Loss 2.2082 (2.2083)	Entropy 0.76132 (0.76269)	Top-1 acc 72.266 (71.381)	Top-5 acc 89.844 (88.503)	lr 0.00179
Train [100][990/3239]	Time 0.220 (0.666)	Data Time 0.001 (0.044)	Loss 2.2264 (2.2083)	Entropy 0.76131 (0.76268)	Top-1 acc 69.141 (71.373)	Top-5 acc 88.281 (88.501)	lr 0.00179
Train [100][1000/3239]	Time 2.660 (0.664)	Data Time 0.001 (0.043)	Loss 2.2936 (2.2082)	Entropy 0.76131 (0.76266)	Top-1 acc 66.797 (71.371)	Top-5 acc 86.719 (88.500)	lr 0.00179
Train [100][1010/3239]	Time 0.217 (0.660)	Data Time 0.001 (0.043)	Loss 2.1984 (2.2085)	Entropy 0.76130 (0.76265)	Top-1 acc 67.578 (71.362)	Top-5 acc 91.406 (88.499)	lr 0.00179
Train [100][1020/3239]	Time 0.337 (0.658)	Data Time 0.001 (0.042)	Loss 2.1131 (2.2083)	Entropy 0.76132 (0.76264)	Top-1 acc 73.047 (71.369)	Top-5 acc 91.016 (88.506)	lr 0.00179
Train [100][1030/3239]	Time 0.237 (0.657)	Data Time 0.001 (0.042)	Loss 2.2461 (2.2084)	Entropy 0.76124 (0.76262)	Top-1 acc 70.312 (71.364)	Top-5 acc 86.328 (88.503)	lr 0.00179
Train [100][1040/3239]	Time 0.230 (0.655)	Data Time 0.001 (0.042)	Loss 2.2398 (2.2088)	Entropy 0.76116 (0.76261)	Top-1 acc 69.141 (71.353)	Top-5 acc 87.891 (88.492)	lr 0.00179
Train [100][1050/3239]	Time 0.271 (0.653)	Data Time 0.001 (0.041)	Loss 2.3290 (2.2090)	Entropy 0.76115 (0.76260)	Top-1 acc 68.359 (71.351)	Top-5 acc 84.766 (88.487)	lr 0.00179
Train [100][1060/3239]	Time 0.352 (0.652)	Data Time 0.001 (0.041)	Loss 2.0726 (2.2088)	Entropy 0.76110 (0.76258)	Top-1 acc 75.000 (71.360)	Top-5 acc 90.234 (88.488)	lr 0.00179
Train [100][1070/3239]	Time 0.227 (0.650)	Data Time 0.001 (0.040)	Loss 2.1854 (2.2087)	Entropy 0.76092 (0.76257)	Top-1 acc 69.922 (71.363)	Top-5 acc 88.672 (88.489)	lr 0.00179
Train [100][1080/3239]	Time 0.239 (0.649)	Data Time 0.001 (0.040)	Loss 2.3336 (2.2088)	Entropy 0.76087 (0.76255)	Top-1 acc 66.797 (71.351)	Top-5 acc 86.328 (88.483)	lr 0.00179
Train [100][1090/3239]	Time 0.233 (0.647)	Data Time 0.001 (0.040)	Loss 2.2222 (2.2087)	Entropy 0.76084 (0.76254)	Top-1 acc 75.391 (71.357)	Top-5 acc 87.891 (88.480)	lr 0.00178
Train [100][1100/3239]	Time 0.245 (0.645)	Data Time 0.001 (0.039)	Loss 2.1545 (2.2086)	Entropy 0.76077 (0.76252)	Top-1 acc 74.609 (71.372)	Top-5 acc 89.453 (88.481)	lr 0.00178
Train [100][1110/3239]	Time 2.551 (0.644)	Data Time 0.002 (0.039)	Loss 2.1588 (2.2079)	Entropy 0.76077 (0.76250)	Top-1 acc 72.266 (71.388)	Top-5 acc 89.062 (88.489)	lr 0.00178
Train [100][1120/3239]	Time 0.232 (0.640)	Data Time 0.001 (0.039)	Loss 2.2730 (2.2079)	Entropy 0.76066 (0.76249)	Top-1 acc 68.750 (71.390)	Top-5 acc 86.719 (88.483)	lr 0.00178
Train [100][1130/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.038)	Loss 2.1829 (2.2083)	Entropy 0.76064 (0.76247)	Top-1 acc 73.438 (71.376)	Top-5 acc 87.500 (88.473)	lr 0.00178
Train [100][1140/3239]	Time 0.314 (0.682)	Data Time 0.003 (0.038)	Loss 2.1351 (2.2083)	Entropy 0.76061 (0.76246)	Top-1 acc 72.266 (71.378)	Top-5 acc 89.844 (88.471)	lr 0.00178
Train [100][1150/3239]	Time 0.332 (0.681)	Data Time 0.002 (0.038)	Loss 2.1806 (2.2085)	Entropy 0.76049 (0.76244)	Top-1 acc 71.875 (71.380)	Top-5 acc 88.672 (88.465)	lr 0.00178
Train [100][1160/3239]	Time 0.232 (0.680)	Data Time 0.001 (0.037)	Loss 2.3678 (2.2090)	Entropy 0.76039 (0.76242)	Top-1 acc 65.234 (71.365)	Top-5 acc 85.547 (88.459)	lr 0.00178
Train [100][1170/3239]	Time 0.223 (0.678)	Data Time 0.001 (0.037)	Loss 2.1889 (2.2090)	Entropy 0.76041 (0.76240)	Top-1 acc 73.438 (71.366)	Top-5 acc 90.234 (88.457)	lr 0.00178
Train [100][1180/3239]	Time 0.236 (0.676)	Data Time 0.001 (0.037)	Loss 2.1556 (2.2089)	Entropy 0.76042 (0.76239)	Top-1 acc 75.000 (71.370)	Top-5 acc 89.062 (88.454)	lr 0.00178
Train [100][1190/3239]	Time 0.325 (0.675)	Data Time 0.001 (0.037)	Loss 2.2101 (2.2089)	Entropy 0.76037 (0.76237)	Top-1 acc 70.703 (71.371)	Top-5 acc 89.062 (88.452)	lr 0.00178
Train [100][1200/3239]	Time 0.242 (0.673)	Data Time 0.001 (0.036)	Loss 2.1365 (2.2092)	Entropy 0.76036 (0.76235)	Top-1 acc 75.391 (71.365)	Top-5 acc 88.281 (88.450)	lr 0.00178
Train [100][1210/3239]	Time 0.213 (0.671)	Data Time 0.001 (0.036)	Loss 2.2786 (2.2091)	Entropy 0.76033 (0.76234)	Top-1 acc 69.922 (71.369)	Top-5 acc 87.500 (88.451)	lr 0.00178
Train [100][1220/3239]	Time 2.570 (0.670)	Data Time 0.001 (0.036)	Loss 2.3023 (2.2089)	Entropy 0.76033 (0.76232)	Top-1 acc 69.531 (71.379)	Top-5 acc 86.719 (88.458)	lr 0.00178
Train [100][1230/3239]	Time 0.427 (0.667)	Data Time 0.001 (0.035)	Loss 2.1466 (2.2089)	Entropy 0.76026 (0.76230)	Top-1 acc 75.000 (71.382)	Top-5 acc 90.625 (88.454)	lr 0.00178
Train [100][1240/3239]	Time 0.228 (0.665)	Data Time 0.001 (0.035)	Loss 2.1591 (2.2089)	Entropy 0.76031 (0.76229)	Top-1 acc 71.094 (71.379)	Top-5 acc 89.453 (88.451)	lr 0.00178
Train [100][1250/3239]	Time 0.246 (0.663)	Data Time 0.001 (0.035)	Loss 2.2295 (2.2088)	Entropy 0.76023 (0.76227)	Top-1 acc 70.312 (71.381)	Top-5 acc 85.938 (88.453)	lr 0.00178
Train [100][1260/3239]	Time 0.218 (0.662)	Data Time 0.001 (0.035)	Loss 2.1491 (2.2084)	Entropy 0.76013 (0.76226)	Top-1 acc 71.094 (71.386)	Top-5 acc 89.844 (88.461)	lr 0.00178
Train [100][1270/3239]	Time 0.228 (0.660)	Data Time 0.001 (0.034)	Loss 2.2285 (2.2085)	Entropy 0.76014 (0.76224)	Top-1 acc 72.266 (71.384)	Top-5 acc 89.453 (88.457)	lr 0.00178
Train [100][1280/3239]	Time 0.233 (0.659)	Data Time 0.005 (0.034)	Loss 2.0079 (2.2083)	Entropy 0.76016 (0.76222)	Top-1 acc 78.125 (71.387)	Top-5 acc 89.844 (88.453)	lr 0.00178
Train [100][1290/3239]	Time 0.213 (0.658)	Data Time 0.001 (0.034)	Loss 2.1877 (2.2080)	Entropy 0.76010 (0.76221)	Top-1 acc 72.266 (71.409)	Top-5 acc 86.719 (88.455)	lr 0.00177
Train [100][1300/3239]	Time 0.255 (0.656)	Data Time 0.001 (0.034)	Loss 2.1533 (2.2079)	Entropy 0.76007 (0.76219)	Top-1 acc 71.484 (71.414)	Top-5 acc 87.891 (88.457)	lr 0.00177
Train [100][1310/3239]	Time 0.233 (0.655)	Data Time 0.001 (0.033)	Loss 2.2879 (2.2080)	Entropy 0.75998 (0.76217)	Top-1 acc 69.922 (71.410)	Top-5 acc 87.109 (88.456)	lr 0.00177
Train [100][1320/3239]	Time 0.251 (0.654)	Data Time 0.001 (0.033)	Loss 2.1241 (2.2083)	Entropy 0.75996 (0.76216)	Top-1 acc 71.094 (71.394)	Top-5 acc 89.453 (88.452)	lr 0.00177
Train [100][1330/3239]	Time 2.577 (0.652)	Data Time 0.001 (0.033)	Loss 2.2895 (2.2085)	Entropy 0.75996 (0.76214)	Top-1 acc 71.094 (71.386)	Top-5 acc 85.938 (88.449)	lr 0.00177
Train [100][1340/3239]	Time 0.243 (0.649)	Data Time 0.001 (0.033)	Loss 2.4361 (2.2087)	Entropy 0.75994 (0.76212)	Top-1 acc 66.406 (71.375)	Top-5 acc 85.938 (88.449)	lr 0.00177
Train [100][1350/3239]	Time 0.226 (0.648)	Data Time 0.001 (0.032)	Loss 2.1758 (2.2088)	Entropy 0.75997 (0.76211)	Top-1 acc 70.312 (71.372)	Top-5 acc 89.844 (88.442)	lr 0.00177
Train [100][1360/3239]	Time 0.319 (0.647)	Data Time 0.001 (0.032)	Loss 2.2952 (2.2087)	Entropy 0.75998 (0.76209)	Top-1 acc 69.531 (71.372)	Top-5 acc 88.672 (88.443)	lr 0.00177
Train [100][1370/3239]	Time 0.230 (0.646)	Data Time 0.001 (0.032)	Loss 2.1663 (2.2086)	Entropy 0.75992 (0.76208)	Top-1 acc 72.266 (71.366)	Top-5 acc 89.062 (88.451)	lr 0.00177
Train [100][1380/3239]	Time 0.227 (0.644)	Data Time 0.001 (0.032)	Loss 2.2671 (2.2083)	Entropy 0.75979 (0.76206)	Top-1 acc 71.875 (71.372)	Top-5 acc 85.938 (88.457)	lr 0.00177
Train [100][1390/3239]	Time 0.221 (0.643)	Data Time 0.001 (0.031)	Loss 2.0801 (2.2081)	Entropy 0.75976 (0.76204)	Top-1 acc 75.000 (71.378)	Top-5 acc 89.844 (88.454)	lr 0.00177
Train [100][1400/3239]	Time 0.306 (0.642)	Data Time 0.001 (0.031)	Loss 2.1096 (2.2080)	Entropy 0.75965 (0.76203)	Top-1 acc 73.438 (71.376)	Top-5 acc 91.016 (88.453)	lr 0.00177
Train [100][1410/3239]	Time 0.221 (0.641)	Data Time 0.001 (0.031)	Loss 2.1275 (2.2077)	Entropy 0.75962 (0.76201)	Top-1 acc 71.094 (71.379)	Top-5 acc 88.281 (88.459)	lr 0.00177
Train [100][1420/3239]	Time 0.233 (0.639)	Data Time 0.001 (0.031)	Loss 2.2371 (2.2082)	Entropy 0.75959 (0.76199)	Top-1 acc 76.562 (71.369)	Top-5 acc 85.938 (88.447)	lr 0.00177
Train [100][1430/3239]	Time 0.243 (0.638)	Data Time 0.001 (0.031)	Loss 2.1045 (2.2078)	Entropy 0.75960 (0.76198)	Top-1 acc 72.656 (71.375)	Top-5 acc 89.844 (88.452)	lr 0.00177
Train [100][1440/3239]	Time 2.578 (0.637)	Data Time 0.001 (0.030)	Loss 2.2540 (2.2080)	Entropy 0.75960 (0.76196)	Top-1 acc 74.219 (71.370)	Top-5 acc 88.281 (88.449)	lr 0.00177
Train [100][1450/3239]	Time 0.225 (0.634)	Data Time 0.001 (0.030)	Loss 2.1386 (2.2079)	Entropy 0.75966 (0.76194)	Top-1 acc 73.438 (71.376)	Top-5 acc 89.453 (88.454)	lr 0.00177
Train [100][1460/3239]	Time 0.242 (0.633)	Data Time 0.002 (0.030)	Loss 2.1800 (2.2081)	Entropy 0.75965 (0.76193)	Top-1 acc 73.438 (71.376)	Top-5 acc 88.281 (88.448)	lr 0.00177
Train [100][1470/3239]	Time 0.236 (0.632)	Data Time 0.003 (0.030)	Loss 2.3386 (2.2081)	Entropy 0.75952 (0.76191)	Top-1 acc 67.969 (71.371)	Top-5 acc 84.766 (88.449)	lr 0.00177
Train [100][1480/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.030)	Loss 2.1318 (2.2080)	Entropy 0.75947 (0.76190)	Top-1 acc 74.219 (71.372)	Top-5 acc 91.406 (88.449)	lr 0.00176
Train [100][1490/3239]	Time 0.315 (0.630)	Data Time 0.001 (0.029)	Loss 2.3092 (2.2079)	Entropy 0.75942 (0.76188)	Top-1 acc 68.750 (71.375)	Top-5 acc 85.547 (88.452)	lr 0.00176
Train [100][1500/3239]	Time 0.276 (0.664)	Data Time 0.003 (0.029)	Loss 2.2870 (2.2078)	Entropy 0.75939 (0.76186)	Top-1 acc 67.578 (71.373)	Top-5 acc 86.328 (88.451)	lr 0.00176
Train [100][1510/3239]	Time 0.242 (0.663)	Data Time 0.002 (0.029)	Loss 2.1315 (2.2082)	Entropy 0.75929 (0.76185)	Top-1 acc 73.438 (71.362)	Top-5 acc 89.453 (88.445)	lr 0.00176
Train [100][1520/3239]	Time 0.246 (0.662)	Data Time 0.001 (0.029)	Loss 2.2618 (2.2080)	Entropy 0.75923 (0.76183)	Top-1 acc 70.312 (71.368)	Top-5 acc 87.500 (88.446)	lr 0.00176
Train [100][1530/3239]	Time 0.317 (0.661)	Data Time 0.001 (0.029)	Loss 2.2581 (2.2081)	Entropy 0.75913 (0.76181)	Top-1 acc 73.438 (71.368)	Top-5 acc 86.328 (88.442)	lr 0.00176
Train [100][1540/3239]	Time 0.233 (0.660)	Data Time 0.001 (0.029)	Loss 2.1966 (2.2085)	Entropy 0.75909 (0.76180)	Top-1 acc 71.875 (71.361)	Top-5 acc 89.453 (88.438)	lr 0.00176
Train [100][1550/3239]	Time 2.522 (0.659)	Data Time 0.001 (0.028)	Loss 2.1077 (2.2083)	Entropy 0.75909 (0.76178)	Top-1 acc 70.312 (71.356)	Top-5 acc 91.406 (88.443)	lr 0.00176
Train [100][1560/3239]	Time 0.243 (0.656)	Data Time 0.001 (0.028)	Loss 2.1557 (2.2081)	Entropy 0.75915 (0.76176)	Top-1 acc 73.828 (71.352)	Top-5 acc 89.844 (88.447)	lr 0.00176
Train [100][1570/3239]	Time 0.283 (0.655)	Data Time 0.001 (0.028)	Loss 2.0687 (2.2079)	Entropy 0.75912 (0.76174)	Top-1 acc 74.609 (71.356)	Top-5 acc 90.625 (88.449)	lr 0.00176
Train [100][1580/3239]	Time 0.223 (0.654)	Data Time 0.001 (0.028)	Loss 2.0826 (2.2079)	Entropy 0.75910 (0.76173)	Top-1 acc 75.391 (71.353)	Top-5 acc 91.797 (88.454)	lr 0.00176
Train [100][1590/3239]	Time 0.243 (0.653)	Data Time 0.003 (0.028)	Loss 2.2181 (2.2079)	Entropy 0.75907 (0.76171)	Top-1 acc 71.484 (71.355)	Top-5 acc 89.453 (88.452)	lr 0.00176
Train [100][1600/3239]	Time 0.229 (0.651)	Data Time 0.001 (0.028)	Loss 2.2818 (2.2078)	Entropy 0.75910 (0.76169)	Top-1 acc 69.531 (71.359)	Top-5 acc 83.984 (88.453)	lr 0.00176
Train [100][1610/3239]	Time 0.223 (0.650)	Data Time 0.001 (0.027)	Loss 1.9818 (2.2080)	Entropy 0.75902 (0.76168)	Top-1 acc 76.172 (71.354)	Top-5 acc 92.188 (88.444)	lr 0.00176
Train [100][1620/3239]	Time 0.250 (0.649)	Data Time 0.001 (0.027)	Loss 2.2143 (2.2081)	Entropy 0.75905 (0.76166)	Top-1 acc 71.875 (71.349)	Top-5 acc 88.281 (88.443)	lr 0.00176
Train [100][1630/3239]	Time 0.237 (0.648)	Data Time 0.001 (0.027)	Loss 2.2468 (2.2080)	Entropy 0.75900 (0.76165)	Top-1 acc 69.531 (71.347)	Top-5 acc 87.109 (88.443)	lr 0.00176
Train [100][1640/3239]	Time 0.237 (0.647)	Data Time 0.001 (0.027)	Loss 2.4129 (2.2081)	Entropy 0.75892 (0.76163)	Top-1 acc 64.453 (71.341)	Top-5 acc 86.328 (88.443)	lr 0.00176
Train [100][1650/3239]	Time 0.240 (0.646)	Data Time 0.001 (0.027)	Loss 2.1188 (2.2081)	Entropy 0.75882 (0.76161)	Top-1 acc 73.047 (71.344)	Top-5 acc 92.578 (88.443)	lr 0.00176
Train [100][1660/3239]	Time 2.689 (0.645)	Data Time 0.001 (0.027)	Loss 2.3316 (2.2082)	Entropy 0.75882 (0.76160)	Top-1 acc 70.312 (71.336)	Top-5 acc 86.719 (88.441)	lr 0.00176
Train [100][1670/3239]	Time 0.221 (0.643)	Data Time 0.001 (0.026)	Loss 2.1572 (2.2082)	Entropy 0.75876 (0.76158)	Top-1 acc 73.828 (71.334)	Top-5 acc 89.453 (88.441)	lr 0.00175
Train [100][1680/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.026)	Loss 2.0546 (2.2082)	Entropy 0.75869 (0.76156)	Top-1 acc 76.562 (71.340)	Top-5 acc 92.188 (88.444)	lr 0.00175
Train [100][1690/3239]	Time 0.230 (0.641)	Data Time 0.001 (0.026)	Loss 2.2600 (2.2081)	Entropy 0.75862 (0.76154)	Top-1 acc 67.188 (71.341)	Top-5 acc 88.281 (88.446)	lr 0.00175
Train [100][1700/3239]	Time 0.332 (0.640)	Data Time 0.001 (0.026)	Loss 2.3051 (2.2083)	Entropy 0.75869 (0.76153)	Top-1 acc 69.141 (71.333)	Top-5 acc 88.281 (88.443)	lr 0.00175
Train [100][1710/3239]	Time 0.238 (0.639)	Data Time 0.001 (0.026)	Loss 2.2388 (2.2081)	Entropy 0.75870 (0.76151)	Top-1 acc 70.703 (71.338)	Top-5 acc 86.328 (88.450)	lr 0.00175
Train [100][1720/3239]	Time 0.235 (0.638)	Data Time 0.001 (0.026)	Loss 2.2659 (2.2083)	Entropy 0.75864 (0.76149)	Top-1 acc 70.703 (71.337)	Top-5 acc 88.672 (88.449)	lr 0.00175
Train [100][1730/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.026)	Loss 2.2264 (2.2082)	Entropy 0.75849 (0.76148)	Top-1 acc 70.703 (71.340)	Top-5 acc 87.109 (88.453)	lr 0.00175
Train [100][1740/3239]	Time 0.365 (0.637)	Data Time 0.001 (0.025)	Loss 2.1927 (2.2082)	Entropy 0.75849 (0.76146)	Top-1 acc 73.438 (71.341)	Top-5 acc 88.281 (88.453)	lr 0.00175
Train [100][1750/3239]	Time 0.216 (0.636)	Data Time 0.001 (0.025)	Loss 2.2588 (2.2082)	Entropy 0.75837 (0.76144)	Top-1 acc 69.531 (71.343)	Top-5 acc 87.500 (88.450)	lr 0.00175
Train [100][1760/3239]	Time 0.232 (0.635)	Data Time 0.001 (0.025)	Loss 2.2937 (2.2081)	Entropy 0.75838 (0.76143)	Top-1 acc 69.922 (71.346)	Top-5 acc 87.500 (88.450)	lr 0.00175
Train [100][1770/3239]	Time 2.567 (0.634)	Data Time 0.001 (0.025)	Loss 2.1370 (2.2082)	Entropy 0.75838 (0.76141)	Top-1 acc 73.438 (71.343)	Top-5 acc 89.062 (88.449)	lr 0.00175
Train [100][1780/3239]	Time 0.249 (0.632)	Data Time 0.001 (0.025)	Loss 2.3152 (2.2082)	Entropy 0.75836 (0.76139)	Top-1 acc 68.750 (71.353)	Top-5 acc 84.766 (88.451)	lr 0.00175
Train [100][1790/3239]	Time 0.258 (0.631)	Data Time 0.001 (0.025)	Loss 2.1214 (2.2080)	Entropy 0.75830 (0.76137)	Top-1 acc 73.438 (71.358)	Top-5 acc 90.234 (88.452)	lr 0.00175
Train [100][1800/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.025)	Loss 2.1127 (2.2082)	Entropy 0.75829 (0.76136)	Top-1 acc 75.391 (71.354)	Top-5 acc 88.281 (88.446)	lr 0.00175
Train [100][1810/3239]	Time 0.223 (0.629)	Data Time 0.001 (0.025)	Loss 2.1920 (2.2083)	Entropy 0.75826 (0.76134)	Top-1 acc 75.391 (71.348)	Top-5 acc 86.719 (88.442)	lr 0.00175
Train [100][1820/3239]	Time 0.215 (0.628)	Data Time 0.001 (0.024)	Loss 2.3007 (2.2083)	Entropy 0.75816 (0.76132)	Top-1 acc 69.141 (71.355)	Top-5 acc 87.500 (88.441)	lr 0.00175
Train [100][1830/3239]	Time 0.320 (0.627)	Data Time 0.001 (0.024)	Loss 2.1763 (2.2082)	Entropy 0.75809 (0.76131)	Top-1 acc 71.484 (71.356)	Top-5 acc 88.672 (88.447)	lr 0.00175
Train [100][1840/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.024)	Loss 2.3332 (2.2081)	Entropy 0.75798 (0.76129)	Top-1 acc 64.844 (71.357)	Top-5 acc 87.109 (88.447)	lr 0.00175
Train [100][1850/3239]	Time 0.221 (0.626)	Data Time 0.001 (0.024)	Loss 2.2201 (2.2078)	Entropy 0.75781 (0.76127)	Top-1 acc 71.094 (71.366)	Top-5 acc 87.891 (88.452)	lr 0.00175
Train [100][1860/3239]	Time 0.233 (0.655)	Data Time 0.002 (0.024)	Loss 2.2943 (2.2078)	Entropy 0.75791 (0.76125)	Top-1 acc 67.969 (71.365)	Top-5 acc 88.281 (88.453)	lr 0.00175
Train [100][1870/3239]	Time 0.233 (0.654)	Data Time 0.002 (0.024)	Loss 2.2061 (2.2076)	Entropy 0.75788 (0.76123)	Top-1 acc 71.875 (71.367)	Top-5 acc 88.672 (88.460)	lr 0.00174
Train [100][1880/3239]	Time 2.679 (0.653)	Data Time 0.002 (0.024)	Loss 2.2146 (2.2072)	Entropy 0.75788 (0.76122)	Top-1 acc 70.312 (71.376)	Top-5 acc 87.891 (88.467)	lr 0.00174
Train [100][1890/3239]	Time 0.255 (0.651)	Data Time 0.001 (0.024)	Loss 2.3703 (2.2075)	Entropy 0.75782 (0.76120)	Top-1 acc 68.750 (71.371)	Top-5 acc 85.938 (88.461)	lr 0.00174
Train [100][1900/3239]	Time 0.264 (0.651)	Data Time 0.002 (0.023)	Loss 2.1901 (2.2073)	Entropy 0.75784 (0.76118)	Top-1 acc 74.609 (71.378)	Top-5 acc 86.328 (88.460)	lr 0.00174
Train [100][1910/3239]	Time 0.338 (0.650)	Data Time 0.001 (0.023)	Loss 2.2709 (2.2075)	Entropy 0.75779 (0.76116)	Top-1 acc 69.141 (71.375)	Top-5 acc 85.547 (88.456)	lr 0.00174
Train [100][1920/3239]	Time 0.252 (0.649)	Data Time 0.002 (0.023)	Loss 2.2502 (2.2075)	Entropy 0.75782 (0.76114)	Top-1 acc 70.312 (71.372)	Top-5 acc 87.500 (88.456)	lr 0.00174
Train [100][1930/3239]	Time 0.232 (0.648)	Data Time 0.001 (0.023)	Loss 2.1156 (2.2072)	Entropy 0.75772 (0.76113)	Top-1 acc 75.391 (71.382)	Top-5 acc 91.797 (88.463)	lr 0.00174
Train [100][1940/3239]	Time 0.230 (0.647)	Data Time 0.002 (0.023)	Loss 2.4272 (2.2079)	Entropy 0.75765 (0.76111)	Top-1 acc 66.406 (71.362)	Top-5 acc 85.156 (88.454)	lr 0.00174
Train [100][1950/3239]	Time 0.338 (0.646)	Data Time 0.001 (0.023)	Loss 2.1454 (2.2081)	Entropy 0.75765 (0.76109)	Top-1 acc 73.047 (71.354)	Top-5 acc 91.016 (88.452)	lr 0.00174
Train [100][1960/3239]	Time 0.220 (0.646)	Data Time 0.001 (0.023)	Loss 2.3906 (2.2084)	Entropy 0.75765 (0.76107)	Top-1 acc 68.359 (71.348)	Top-5 acc 84.766 (88.446)	lr 0.00174
Train [100][1970/3239]	Time 0.224 (0.645)	Data Time 0.001 (0.023)	Loss 2.3081 (2.2085)	Entropy 0.75764 (0.76106)	Top-1 acc 69.531 (71.339)	Top-5 acc 87.500 (88.444)	lr 0.00174
Train [100][1980/3239]	Time 0.255 (0.644)	Data Time 0.001 (0.023)	Loss 2.2892 (2.2086)	Entropy 0.75752 (0.76104)	Top-1 acc 69.531 (71.336)	Top-5 acc 86.719 (88.441)	lr 0.00174
Train [100][1990/3239]	Time 2.655 (0.643)	Data Time 0.001 (0.022)	Loss 2.2658 (2.2085)	Entropy 0.75752 (0.76102)	Top-1 acc 67.188 (71.342)	Top-5 acc 87.109 (88.441)	lr 0.00174
Train [100][2000/3239]	Time 0.239 (0.641)	Data Time 0.001 (0.022)	Loss 2.2994 (2.2086)	Entropy 0.75744 (0.76100)	Top-1 acc 67.578 (71.341)	Top-5 acc 86.719 (88.438)	lr 0.00174
Train [100][2010/3239]	Time 0.234 (0.641)	Data Time 0.001 (0.022)	Loss 2.2623 (2.2087)	Entropy 0.75751 (0.76099)	Top-1 acc 70.703 (71.341)	Top-5 acc 87.500 (88.435)	lr 0.00174
Train [100][2020/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.022)	Loss 2.1667 (2.2088)	Entropy 0.75746 (0.76097)	Top-1 acc 72.266 (71.343)	Top-5 acc 89.453 (88.430)	lr 0.00174
Train [100][2030/3239]	Time 0.250 (0.639)	Data Time 0.001 (0.022)	Loss 2.2669 (2.2088)	Entropy 0.75744 (0.76095)	Top-1 acc 72.266 (71.342)	Top-5 acc 86.719 (88.432)	lr 0.00174
Train [100][2040/3239]	Time 0.240 (0.638)	Data Time 0.002 (0.022)	Loss 2.1715 (2.2087)	Entropy 0.75742 (0.76093)	Top-1 acc 73.438 (71.345)	Top-5 acc 88.672 (88.434)	lr 0.00174
Train [100][2050/3239]	Time 0.234 (0.638)	Data Time 0.001 (0.022)	Loss 2.1728 (2.2086)	Entropy 0.75743 (0.76092)	Top-1 acc 73.438 (71.342)	Top-5 acc 91.016 (88.437)	lr 0.00174
Train [100][2060/3239]	Time 0.231 (0.637)	Data Time 0.001 (0.022)	Loss 2.2611 (2.2086)	Entropy 0.75744 (0.76090)	Top-1 acc 69.922 (71.341)	Top-5 acc 86.719 (88.435)	lr 0.00173
Train [100][2070/3239]	Time 0.269 (0.636)	Data Time 0.001 (0.022)	Loss 2.0756 (2.2087)	Entropy 0.75751 (0.76088)	Top-1 acc 75.000 (71.339)	Top-5 acc 89.844 (88.435)	lr 0.00173
Train [100][2080/3239]	Time 0.223 (0.635)	Data Time 0.002 (0.022)	Loss 2.1584 (2.2087)	Entropy 0.75741 (0.76087)	Top-1 acc 75.000 (71.337)	Top-5 acc 88.672 (88.434)	lr 0.00173
Train [100][2090/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.021)	Loss 2.2151 (2.2086)	Entropy 0.75741 (0.76085)	Top-1 acc 73.047 (71.341)	Top-5 acc 87.500 (88.435)	lr 0.00173
Train [100][2100/3239]	Time 2.604 (0.634)	Data Time 0.002 (0.021)	Loss 2.3075 (2.2087)	Entropy 0.75741 (0.76083)	Top-1 acc 69.922 (71.341)	Top-5 acc 85.156 (88.436)	lr 0.00173
Train [100][2110/3239]	Time 0.237 (0.632)	Data Time 0.001 (0.021)	Loss 2.0865 (2.2087)	Entropy 0.75740 (0.76082)	Top-1 acc 76.562 (71.341)	Top-5 acc 89.453 (88.436)	lr 0.00173
Train [100][2120/3239]	Time 0.338 (0.631)	Data Time 0.001 (0.021)	Loss 2.3461 (2.2087)	Entropy 0.75732 (0.76080)	Top-1 acc 66.797 (71.339)	Top-5 acc 84.766 (88.437)	lr 0.00173
Train [100][2130/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.021)	Loss 2.2565 (2.2090)	Entropy 0.75713 (0.76079)	Top-1 acc 70.312 (71.334)	Top-5 acc 87.500 (88.431)	lr 0.00173
Train [100][2140/3239]	Time 0.239 (0.630)	Data Time 0.001 (0.021)	Loss 2.3533 (2.2093)	Entropy 0.75706 (0.76077)	Top-1 acc 68.359 (71.325)	Top-5 acc 84.375 (88.424)	lr 0.00173
Train [100][2150/3239]	Time 0.242 (0.629)	Data Time 0.001 (0.021)	Loss 2.1820 (2.2093)	Entropy 0.75698 (0.76075)	Top-1 acc 71.094 (71.322)	Top-5 acc 89.062 (88.422)	lr 0.00173
Train [100][2160/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.021)	Loss 2.2212 (2.2094)	Entropy 0.75697 (0.76073)	Top-1 acc 71.875 (71.322)	Top-5 acc 90.234 (88.419)	lr 0.00173
Train [100][2170/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.021)	Loss 2.1694 (2.2095)	Entropy 0.75692 (0.76072)	Top-1 acc 73.438 (71.319)	Top-5 acc 88.281 (88.419)	lr 0.00173
Train [100][2180/3239]	Time 0.249 (0.627)	Data Time 0.001 (0.021)	Loss 2.1500 (2.2095)	Entropy 0.75686 (0.76070)	Top-1 acc 75.781 (71.322)	Top-5 acc 89.062 (88.417)	lr 0.00173
Train [100][2190/3239]	Time 0.234 (0.627)	Data Time 0.002 (0.021)	Loss 2.1688 (2.2094)	Entropy 0.75679 (0.76068)	Top-1 acc 72.266 (71.326)	Top-5 acc 91.016 (88.417)	lr 0.00173
Train [100][2200/3239]	Time 0.282 (0.626)	Data Time 0.001 (0.020)	Loss 2.1792 (2.2094)	Entropy 0.75680 (0.76066)	Top-1 acc 71.875 (71.327)	Top-5 acc 89.062 (88.417)	lr 0.00173
Train [100][2210/3239]	Time 2.599 (0.625)	Data Time 0.031 (0.020)	Loss 2.1973 (2.2096)	Entropy 0.75680 (0.76065)	Top-1 acc 73.828 (71.324)	Top-5 acc 89.844 (88.413)	lr 0.00173
Train [100][2220/3239]	Time 0.284 (0.624)	Data Time 0.001 (0.020)	Loss 2.3586 (2.2097)	Entropy 0.75678 (0.76063)	Top-1 acc 65.625 (71.323)	Top-5 acc 83.984 (88.410)	lr 0.00173
Train [100][2230/3239]	Time 0.413 (0.646)	Data Time 0.002 (0.020)	Loss 2.3288 (2.2099)	Entropy 0.75675 (0.76061)	Top-1 acc 72.656 (71.322)	Top-5 acc 87.891 (88.406)	lr 0.00173
Train [100][2240/3239]	Time 0.273 (0.645)	Data Time 0.002 (0.020)	Loss 2.2687 (2.2098)	Entropy 0.75674 (0.76059)	Top-1 acc 71.484 (71.326)	Top-5 acc 85.938 (88.408)	lr 0.00173
Train [100][2250/3239]	Time 0.236 (0.645)	Data Time 0.001 (0.020)	Loss 2.3349 (2.2097)	Entropy 0.75650 (0.76058)	Top-1 acc 67.188 (71.330)	Top-5 acc 85.547 (88.412)	lr 0.00173
Train [100][2260/3239]	Time 0.234 (0.644)	Data Time 0.001 (0.020)	Loss 2.2772 (2.2096)	Entropy 0.75645 (0.76056)	Top-1 acc 69.922 (71.332)	Top-5 acc 88.281 (88.411)	lr 0.00172
Train [100][2270/3239]	Time 0.227 (0.643)	Data Time 0.001 (0.020)	Loss 2.2710 (2.2094)	Entropy 0.75633 (0.76054)	Top-1 acc 70.703 (71.335)	Top-5 acc 85.938 (88.414)	lr 0.00172
Train [100][2280/3239]	Time 0.241 (0.643)	Data Time 0.002 (0.020)	Loss 2.1029 (2.2094)	Entropy 0.75632 (0.76052)	Top-1 acc 74.609 (71.334)	Top-5 acc 91.016 (88.413)	lr 0.00172
Train [100][2290/3239]	Time 0.235 (0.642)	Data Time 0.001 (0.020)	Loss 2.2580 (2.2095)	Entropy 0.75631 (0.76050)	Top-1 acc 71.094 (71.331)	Top-5 acc 87.109 (88.410)	lr 0.00172
Train [100][2300/3239]	Time 0.240 (0.641)	Data Time 0.002 (0.020)	Loss 2.1190 (2.2095)	Entropy 0.75628 (0.76048)	Top-1 acc 73.047 (71.332)	Top-5 acc 87.109 (88.409)	lr 0.00172
Train [100][2310/3239]	Time 0.229 (0.641)	Data Time 0.001 (0.020)	Loss 2.0205 (2.2095)	Entropy 0.75622 (0.76047)	Top-1 acc 77.734 (71.333)	Top-5 acc 92.188 (88.411)	lr 0.00172
Train [100][2320/3239]	Time 2.588 (0.640)	Data Time 0.001 (0.020)	Loss 2.1136 (2.2096)	Entropy 0.75622 (0.76045)	Top-1 acc 73.438 (71.328)	Top-5 acc 91.016 (88.409)	lr 0.00172
Train [100][2330/3239]	Time 0.321 (0.638)	Data Time 0.001 (0.019)	Loss 2.1404 (2.2096)	Entropy 0.75625 (0.76043)	Top-1 acc 73.828 (71.326)	Top-5 acc 87.500 (88.410)	lr 0.00172
Train [100][2340/3239]	Time 0.228 (0.638)	Data Time 0.001 (0.019)	Loss 2.1576 (2.2096)	Entropy 0.75633 (0.76041)	Top-1 acc 67.578 (71.325)	Top-5 acc 90.234 (88.411)	lr 0.00172
Train [100][2350/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.019)	Loss 2.2491 (2.2098)	Entropy 0.75632 (0.76039)	Top-1 acc 70.312 (71.321)	Top-5 acc 88.672 (88.411)	lr 0.00172
Train [100][2360/3239]	Time 0.222 (0.636)	Data Time 0.001 (0.019)	Loss 2.1224 (2.2098)	Entropy 0.75628 (0.76038)	Top-1 acc 73.828 (71.319)	Top-5 acc 92.578 (88.412)	lr 0.00172
Train [100][2370/3239]	Time 0.332 (0.636)	Data Time 0.001 (0.019)	Loss 1.9787 (2.2101)	Entropy 0.75623 (0.76036)	Top-1 acc 78.516 (71.314)	Top-5 acc 92.969 (88.408)	lr 0.00172
Train [100][2380/3239]	Time 0.234 (0.635)	Data Time 0.001 (0.019)	Loss 2.3214 (2.2102)	Entropy 0.75603 (0.76034)	Top-1 acc 63.281 (71.313)	Top-5 acc 88.281 (88.404)	lr 0.00172
Train [100][2390/3239]	Time 0.227 (0.634)	Data Time 0.001 (0.019)	Loss 2.0981 (2.2104)	Entropy 0.75595 (0.76032)	Top-1 acc 73.047 (71.304)	Top-5 acc 89.062 (88.404)	lr 0.00172
Train [100][2400/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.019)	Loss 2.1560 (2.2104)	Entropy 0.75594 (0.76031)	Top-1 acc 73.828 (71.302)	Top-5 acc 89.062 (88.401)	lr 0.00172
Train [100][2410/3239]	Time 0.330 (0.633)	Data Time 0.001 (0.019)	Loss 2.2413 (2.2104)	Entropy 0.75595 (0.76029)	Top-1 acc 67.188 (71.302)	Top-5 acc 89.844 (88.401)	lr 0.00172
Train [100][2420/3239]	Time 0.222 (0.632)	Data Time 0.001 (0.019)	Loss 2.2887 (2.2105)	Entropy 0.75582 (0.76027)	Top-1 acc 71.094 (71.298)	Top-5 acc 87.891 (88.401)	lr 0.00172
Train [100][2430/3239]	Time 2.644 (0.632)	Data Time 0.001 (0.019)	Loss 2.0628 (2.2104)	Entropy 0.75582 (0.76025)	Top-1 acc 75.391 (71.303)	Top-5 acc 92.578 (88.405)	lr 0.00172
Train [100][2440/3239]	Time 0.242 (0.630)	Data Time 0.001 (0.019)	Loss 2.1353 (2.2103)	Entropy 0.75577 (0.76023)	Top-1 acc 72.656 (71.306)	Top-5 acc 89.844 (88.409)	lr 0.00172
Train [100][2450/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.019)	Loss 2.4498 (2.2102)	Entropy 0.75572 (0.76021)	Top-1 acc 64.844 (71.308)	Top-5 acc 84.375 (88.411)	lr 0.00171
Train [100][2460/3239]	Time 0.229 (0.629)	Data Time 0.001 (0.019)	Loss 2.2251 (2.2103)	Entropy 0.75574 (0.76020)	Top-1 acc 72.266 (71.309)	Top-5 acc 87.500 (88.409)	lr 0.00171
Train [100][2470/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.018)	Loss 2.0912 (2.2103)	Entropy 0.75566 (0.76018)	Top-1 acc 74.609 (71.312)	Top-5 acc 89.453 (88.410)	lr 0.00171
Train [100][2480/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.018)	Loss 2.2410 (2.2102)	Entropy 0.75565 (0.76016)	Top-1 acc 72.266 (71.315)	Top-5 acc 87.109 (88.414)	lr 0.00171
Train [100][2490/3239]	Time 0.223 (0.627)	Data Time 0.001 (0.018)	Loss 2.1367 (2.2102)	Entropy 0.75559 (0.76014)	Top-1 acc 70.312 (71.311)	Top-5 acc 90.625 (88.413)	lr 0.00171
Train [100][2500/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.018)	Loss 2.1862 (2.2103)	Entropy 0.75555 (0.76012)	Top-1 acc 68.750 (71.306)	Top-5 acc 91.016 (88.411)	lr 0.00171
Train [100][2510/3239]	Time 0.218 (0.626)	Data Time 0.001 (0.018)	Loss 2.3950 (2.2102)	Entropy 0.75553 (0.76010)	Top-1 acc 66.406 (71.306)	Top-5 acc 85.938 (88.413)	lr 0.00171
Train [100][2520/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.018)	Loss 2.1545 (2.2102)	Entropy 0.75541 (0.76009)	Top-1 acc 71.484 (71.308)	Top-5 acc 89.062 (88.412)	lr 0.00171
Train [100][2530/3239]	Time 0.261 (0.625)	Data Time 0.001 (0.018)	Loss 2.2486 (2.2103)	Entropy 0.75545 (0.76007)	Top-1 acc 68.359 (71.303)	Top-5 acc 87.891 (88.412)	lr 0.00171
Train [100][2540/3239]	Time 2.735 (0.624)	Data Time 0.001 (0.018)	Loss 2.1115 (2.2102)	Entropy 0.75545 (0.76005)	Top-1 acc 73.828 (71.311)	Top-5 acc 90.234 (88.412)	lr 0.00171
Train [100][2550/3239]	Time 0.231 (0.623)	Data Time 0.001 (0.018)	Loss 2.0536 (2.2102)	Entropy 0.75540 (0.76003)	Top-1 acc 76.172 (71.310)	Top-5 acc 90.625 (88.411)	lr 0.00171
Train [100][2560/3239]	Time 0.235 (0.622)	Data Time 0.001 (0.018)	Loss 2.1610 (2.2100)	Entropy 0.75543 (0.76001)	Top-1 acc 69.922 (71.317)	Top-5 acc 90.234 (88.417)	lr 0.00171
Train [100][2570/3239]	Time 0.222 (0.622)	Data Time 0.001 (0.018)	Loss 2.0595 (2.2099)	Entropy 0.75548 (0.76000)	Top-1 acc 73.828 (71.320)	Top-5 acc 91.016 (88.418)	lr 0.00171
Train [100][2580/3239]	Time 0.325 (0.621)	Data Time 0.001 (0.018)	Loss 2.1450 (2.2099)	Entropy 0.75549 (0.75998)	Top-1 acc 74.609 (71.322)	Top-5 acc 89.844 (88.417)	lr 0.00171
Train [100][2590/3239]	Time 0.283 (0.639)	Data Time 0.003 (0.018)	Loss 2.3265 (2.2098)	Entropy 0.75549 (0.75996)	Top-1 acc 69.922 (71.326)	Top-5 acc 85.938 (88.417)	lr 0.00171
Train [100][2600/3239]	Time 0.234 (0.639)	Data Time 0.002 (0.018)	Loss 2.1822 (2.2097)	Entropy 0.75548 (0.75994)	Top-1 acc 73.047 (71.331)	Top-5 acc 88.672 (88.416)	lr 0.00171
Train [100][2610/3239]	Time 0.235 (0.639)	Data Time 0.001 (0.018)	Loss 2.2257 (2.2099)	Entropy 0.75552 (0.75993)	Top-1 acc 71.484 (71.329)	Top-5 acc 87.891 (88.412)	lr 0.00171
Train [100][2620/3239]	Time 0.333 (0.638)	Data Time 0.001 (0.017)	Loss 2.1467 (2.2099)	Entropy 0.75551 (0.75991)	Top-1 acc 69.922 (71.331)	Top-5 acc 89.844 (88.412)	lr 0.00171
Train [100][2630/3239]	Time 0.239 (0.638)	Data Time 0.001 (0.017)	Loss 2.2530 (2.2100)	Entropy 0.75552 (0.75989)	Top-1 acc 71.875 (71.332)	Top-5 acc 86.328 (88.410)	lr 0.00171
Train [100][2640/3239]	Time 0.240 (0.637)	Data Time 0.001 (0.017)	Loss 2.1864 (2.2100)	Entropy 0.75547 (0.75988)	Top-1 acc 71.094 (71.331)	Top-5 acc 90.234 (88.407)	lr 0.00171
Train [100][2650/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.017)	Loss 2.3319 (2.2103)	Entropy 0.75542 (0.75986)	Top-1 acc 67.578 (71.325)	Top-5 acc 84.766 (88.403)	lr 0.00170
Train [100][2660/3239]	Time 0.285 (0.636)	Data Time 0.001 (0.017)	Loss 2.1363 (2.2105)	Entropy 0.75528 (0.75984)	Top-1 acc 71.875 (71.321)	Top-5 acc 85.938 (88.398)	lr 0.00170
Train [100][2670/3239]	Time 0.263 (0.635)	Data Time 0.002 (0.017)	Loss 2.2685 (2.2105)	Entropy 0.75523 (0.75983)	Top-1 acc 75.000 (71.322)	Top-5 acc 86.328 (88.398)	lr 0.00170
Train [100][2680/3239]	Time 0.226 (0.634)	Data Time 0.001 (0.017)	Loss 2.1900 (2.2105)	Entropy 0.75528 (0.75981)	Top-1 acc 72.656 (71.322)	Top-5 acc 88.281 (88.400)	lr 0.00170
Train [100][2690/3239]	Time 0.266 (0.634)	Data Time 0.001 (0.017)	Loss 2.2327 (2.2106)	Entropy 0.75524 (0.75979)	Top-1 acc 69.141 (71.321)	Top-5 acc 88.281 (88.395)	lr 0.00170
Train [100][2700/3239]	Time 0.229 (0.633)	Data Time 0.001 (0.017)	Loss 2.2945 (2.2106)	Entropy 0.75523 (0.75978)	Top-1 acc 69.141 (71.317)	Top-5 acc 86.328 (88.394)	lr 0.00170
Train [100][2710/3239]	Time 0.221 (0.633)	Data Time 0.001 (0.017)	Loss 2.3672 (2.2107)	Entropy 0.75497 (0.75976)	Top-1 acc 65.234 (71.319)	Top-5 acc 85.938 (88.395)	lr 0.00170
Train [100][2720/3239]	Time 0.264 (0.632)	Data Time 0.001 (0.017)	Loss 2.1045 (2.2106)	Entropy 0.75493 (0.75974)	Top-1 acc 75.391 (71.323)	Top-5 acc 91.406 (88.395)	lr 0.00170
Train [100][2730/3239]	Time 0.239 (0.632)	Data Time 0.001 (0.017)	Loss 2.2387 (2.2107)	Entropy 0.75502 (0.75972)	Top-1 acc 66.016 (71.321)	Top-5 acc 86.328 (88.391)	lr 0.00170
Train [100][2740/3239]	Time 0.262 (0.631)	Data Time 0.002 (0.017)	Loss 2.3125 (2.2107)	Entropy 0.75497 (0.75971)	Top-1 acc 70.703 (71.316)	Top-5 acc 87.109 (88.390)	lr 0.00170
Train [100][2750/3239]	Time 0.315 (0.631)	Data Time 0.001 (0.017)	Loss 2.1855 (2.2108)	Entropy 0.75486 (0.75969)	Top-1 acc 73.438 (71.319)	Top-5 acc 90.234 (88.388)	lr 0.00170
Train [100][2760/3239]	Time 0.226 (0.630)	Data Time 0.001 (0.017)	Loss 2.0723 (2.2107)	Entropy 0.75476 (0.75967)	Top-1 acc 73.047 (71.316)	Top-5 acc 92.969 (88.389)	lr 0.00170
Train [100][2770/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.017)	Loss 2.2682 (2.2107)	Entropy 0.75476 (0.75965)	Top-1 acc 69.531 (71.321)	Top-5 acc 86.719 (88.391)	lr 0.00170
Train [100][2780/3239]	Time 0.253 (0.629)	Data Time 0.001 (0.017)	Loss 2.4081 (2.2107)	Entropy 0.75473 (0.75964)	Top-1 acc 64.062 (71.319)	Top-5 acc 83.594 (88.390)	lr 0.00170
Train [100][2790/3239]	Time 0.307 (0.628)	Data Time 0.001 (0.017)	Loss 2.1724 (2.2106)	Entropy 0.75466 (0.75962)	Top-1 acc 73.438 (71.324)	Top-5 acc 88.672 (88.393)	lr 0.00170
Train [100][2800/3239]	Time 0.269 (0.628)	Data Time 0.001 (0.016)	Loss 2.2462 (2.2106)	Entropy 0.75455 (0.75960)	Top-1 acc 68.359 (71.319)	Top-5 acc 89.062 (88.392)	lr 0.00170
Train [100][2810/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.016)	Loss 2.1887 (2.2107)	Entropy 0.75451 (0.75958)	Top-1 acc 76.562 (71.321)	Top-5 acc 87.500 (88.392)	lr 0.00170
Train [100][2820/3239]	Time 0.248 (0.627)	Data Time 0.001 (0.016)	Loss 2.3046 (2.2108)	Entropy 0.75457 (0.75956)	Top-1 acc 69.922 (71.320)	Top-5 acc 87.891 (88.392)	lr 0.00170
Train [100][2830/3239]	Time 0.333 (0.626)	Data Time 0.002 (0.016)	Loss 2.2121 (2.2107)	Entropy 0.75456 (0.75955)	Top-1 acc 71.094 (71.323)	Top-5 acc 85.938 (88.395)	lr 0.00170
Train [100][2840/3239]	Time 0.256 (0.626)	Data Time 0.001 (0.016)	Loss 2.2158 (2.2109)	Entropy 0.75455 (0.75953)	Top-1 acc 69.531 (71.323)	Top-5 acc 87.500 (88.392)	lr 0.00169
Train [100][2850/3239]	Time 0.279 (0.625)	Data Time 0.001 (0.016)	Loss 2.2715 (2.2110)	Entropy 0.75459 (0.75951)	Top-1 acc 68.750 (71.319)	Top-5 acc 88.672 (88.390)	lr 0.00169
Train [100][2860/3239]	Time 0.224 (0.625)	Data Time 0.001 (0.016)	Loss 2.0215 (2.2109)	Entropy 0.75463 (0.75950)	Top-1 acc 73.828 (71.320)	Top-5 acc 92.578 (88.390)	lr 0.00169
Train [100][2870/3239]	Time 0.361 (0.624)	Data Time 0.001 (0.016)	Loss 2.1679 (2.2110)	Entropy 0.75456 (0.75948)	Top-1 acc 73.047 (71.322)	Top-5 acc 91.016 (88.390)	lr 0.00169
Train [100][2880/3239]	Time 0.214 (0.624)	Data Time 0.001 (0.016)	Loss 2.2238 (2.2109)	Entropy 0.75460 (0.75946)	Top-1 acc 72.656 (71.325)	Top-5 acc 87.500 (88.391)	lr 0.00169
Train [100][2890/3239]	Time 0.247 (0.623)	Data Time 0.001 (0.016)	Loss 2.1952 (2.2109)	Entropy 0.75420 (0.75944)	Top-1 acc 69.141 (71.322)	Top-5 acc 89.062 (88.389)	lr 0.00169
Train [100][2900/3239]	Time 0.218 (0.623)	Data Time 0.001 (0.016)	Loss 2.2404 (2.2109)	Entropy 0.75417 (0.75943)	Top-1 acc 69.531 (71.320)	Top-5 acc 88.672 (88.391)	lr 0.00169
Train [100][2910/3239]	Time 0.251 (0.622)	Data Time 0.001 (0.016)	Loss 2.1147 (2.2110)	Entropy 0.75409 (0.75941)	Top-1 acc 72.656 (71.317)	Top-5 acc 89.062 (88.388)	lr 0.00169
Train [100][2920/3239]	Time 0.233 (0.640)	Data Time 0.003 (0.016)	Loss 2.2742 (2.2111)	Entropy 0.75413 (0.75939)	Top-1 acc 70.312 (71.316)	Top-5 acc 87.109 (88.388)	lr 0.00169
Train [100][2930/3239]	Time 0.240 (0.640)	Data Time 0.002 (0.016)	Loss 2.2078 (2.2111)	Entropy 0.75415 (0.75937)	Top-1 acc 75.781 (71.316)	Top-5 acc 88.281 (88.387)	lr 0.00169
Train [100][2940/3239]	Time 0.269 (0.639)	Data Time 0.002 (0.016)	Loss 2.2209 (2.2110)	Entropy 0.75414 (0.75935)	Top-1 acc 71.875 (71.318)	Top-5 acc 87.500 (88.390)	lr 0.00169
Train [100][2950/3239]	Time 0.243 (0.639)	Data Time 0.001 (0.016)	Loss 2.0265 (2.2110)	Entropy 0.75416 (0.75934)	Top-1 acc 75.781 (71.318)	Top-5 acc 91.797 (88.390)	lr 0.00169
Train [100][2960/3239]	Time 0.248 (0.638)	Data Time 0.001 (0.016)	Loss 2.1684 (2.2110)	Entropy 0.75416 (0.75932)	Top-1 acc 72.266 (71.317)	Top-5 acc 88.672 (88.387)	lr 0.00169
Train [100][2970/3239]	Time 0.223 (0.638)	Data Time 0.001 (0.016)	Loss 2.2188 (2.2110)	Entropy 0.75415 (0.75930)	Top-1 acc 71.094 (71.319)	Top-5 acc 89.453 (88.385)	lr 0.00169
Train [100][2980/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.016)	Loss 2.3423 (2.2110)	Entropy 0.75413 (0.75928)	Top-1 acc 69.531 (71.320)	Top-5 acc 82.812 (88.383)	lr 0.00169
Train [100][2990/3239]	Time 0.266 (0.636)	Data Time 0.002 (0.016)	Loss 2.0901 (2.2110)	Entropy 0.75408 (0.75927)	Top-1 acc 77.734 (71.323)	Top-5 acc 89.453 (88.383)	lr 0.00169
Train [100][3000/3239]	Time 0.242 (0.636)	Data Time 0.001 (0.015)	Loss 2.1510 (2.2109)	Entropy 0.75414 (0.75925)	Top-1 acc 69.141 (71.322)	Top-5 acc 90.234 (88.387)	lr 0.00169
Train [100][3010/3239]	Time 0.225 (0.635)	Data Time 0.002 (0.015)	Loss 2.2615 (2.2109)	Entropy 0.75407 (0.75923)	Top-1 acc 70.703 (71.327)	Top-5 acc 87.500 (88.385)	lr 0.00169
Train [100][3020/3239]	Time 0.228 (0.635)	Data Time 0.001 (0.015)	Loss 2.3466 (2.2109)	Entropy 0.75396 (0.75922)	Top-1 acc 68.359 (71.325)	Top-5 acc 86.328 (88.384)	lr 0.00169
Train [100][3030/3239]	Time 0.220 (0.634)	Data Time 0.001 (0.015)	Loss 2.3156 (2.2111)	Entropy 0.75403 (0.75920)	Top-1 acc 68.359 (71.323)	Top-5 acc 86.328 (88.381)	lr 0.00169
Train [100][3040/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.015)	Loss 2.1975 (2.2111)	Entropy 0.75406 (0.75918)	Top-1 acc 71.875 (71.321)	Top-5 acc 87.109 (88.379)	lr 0.00168
Train [100][3050/3239]	Time 0.259 (0.633)	Data Time 0.001 (0.015)	Loss 2.1550 (2.2111)	Entropy 0.75414 (0.75916)	Top-1 acc 72.656 (71.321)	Top-5 acc 90.625 (88.379)	lr 0.00168
Train [100][3060/3239]	Time 0.229 (0.633)	Data Time 0.001 (0.015)	Loss 2.2738 (2.2109)	Entropy 0.75402 (0.75915)	Top-1 acc 70.312 (71.326)	Top-5 acc 86.719 (88.382)	lr 0.00168
Train [100][3070/3239]	Time 0.224 (0.632)	Data Time 0.001 (0.015)	Loss 1.9830 (2.2110)	Entropy 0.75396 (0.75913)	Top-1 acc 78.516 (71.326)	Top-5 acc 92.188 (88.381)	lr 0.00168
Train [100][3080/3239]	Time 0.233 (0.632)	Data Time 0.001 (0.015)	Loss 2.2940 (2.2110)	Entropy 0.75397 (0.75911)	Top-1 acc 69.922 (71.330)	Top-5 acc 85.938 (88.380)	lr 0.00168
Train [100][3090/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.015)	Loss 2.2284 (2.2111)	Entropy 0.75397 (0.75910)	Top-1 acc 70.703 (71.326)	Top-5 acc 88.281 (88.378)	lr 0.00168
Train [100][3100/3239]	Time 0.224 (0.631)	Data Time 0.001 (0.015)	Loss 2.0826 (2.2111)	Entropy 0.75388 (0.75908)	Top-1 acc 77.734 (71.326)	Top-5 acc 92.578 (88.380)	lr 0.00168
Train [100][3110/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.015)	Loss 2.2631 (2.2112)	Entropy 0.75392 (0.75906)	Top-1 acc 71.094 (71.324)	Top-5 acc 85.938 (88.377)	lr 0.00168
Train [100][3120/3239]	Time 0.320 (0.630)	Data Time 0.001 (0.015)	Loss 2.1046 (2.2111)	Entropy 0.75392 (0.75905)	Top-1 acc 75.391 (71.324)	Top-5 acc 92.969 (88.378)	lr 0.00168
Train [100][3130/3239]	Time 0.246 (0.629)	Data Time 0.001 (0.015)	Loss 2.3088 (2.2112)	Entropy 0.75386 (0.75903)	Top-1 acc 69.922 (71.322)	Top-5 acc 87.109 (88.375)	lr 0.00168
Train [100][3140/3239]	Time 0.224 (0.629)	Data Time 0.001 (0.015)	Loss 2.2173 (2.2112)	Entropy 0.75383 (0.75902)	Top-1 acc 69.922 (71.320)	Top-5 acc 87.891 (88.375)	lr 0.00168
Train [100][3150/3239]	Time 0.299 (0.629)	Data Time 0.001 (0.015)	Loss 2.1722 (2.2112)	Entropy 0.75389 (0.75900)	Top-1 acc 72.266 (71.321)	Top-5 acc 89.453 (88.376)	lr 0.00168
Train [100][3160/3239]	Time 0.332 (0.628)	Data Time 0.001 (0.015)	Loss 2.4519 (2.2112)	Entropy 0.75385 (0.75898)	Top-1 acc 63.281 (71.320)	Top-5 acc 83.984 (88.375)	lr 0.00168
Train [100][3170/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.015)	Loss 2.2849 (2.2112)	Entropy 0.75390 (0.75897)	Top-1 acc 73.438 (71.322)	Top-5 acc 88.672 (88.376)	lr 0.00168
Train [100][3180/3239]	Time 0.220 (0.627)	Data Time 0.000 (0.015)	Loss 2.2212 (2.2113)	Entropy 0.75391 (0.75895)	Top-1 acc 70.312 (71.321)	Top-5 acc 87.891 (88.375)	lr 0.00168
Train [100][3190/3239]	Time 0.230 (0.627)	Data Time 0.000 (0.015)	Loss 2.2700 (2.2115)	Entropy 0.75391 (0.75893)	Top-1 acc 73.047 (71.320)	Top-5 acc 87.109 (88.371)	lr 0.00168
Train [100][3200/3239]	Time 0.223 (0.626)	Data Time 0.000 (0.015)	Loss 2.1654 (2.2115)	Entropy 0.75419 (0.75892)	Top-1 acc 71.094 (71.316)	Top-5 acc 91.797 (88.371)	lr 0.00168
Train [100][3210/3239]	Time 0.232 (0.625)	Data Time 0.000 (0.015)	Loss 2.2189 (2.2116)	Entropy 0.75422 (0.75890)	Top-1 acc 71.875 (71.315)	Top-5 acc 87.500 (88.370)	lr 0.00168
Train [100][3220/3239]	Time 0.239 (0.625)	Data Time 0.000 (0.014)	Loss 2.2205 (2.2115)	Entropy 0.75417 (0.75889)	Top-1 acc 68.359 (71.317)	Top-5 acc 89.062 (88.373)	lr 0.00168
Train [100][3230/3239]	Time 0.234 (0.624)	Data Time 0.000 (0.014)	Loss 2.2772 (2.2117)	Entropy 0.75411 (0.75888)	Top-1 acc 68.750 (71.313)	Top-5 acc 89.062 (88.370)	lr 0.00168
Train [100][3239/3239]	Time 2.315 (0.624)	Data Time 0.000 (0.014)	Loss 2.3483 (2.2117)	Entropy 0.75411 (0.75886)	Top-1 acc 71.605 (71.315)	Top-5 acc 87.654 (88.370)	lr 0.00167
==========Valid [100/120]	loss 1.219	top-1 acc 72.142 (72.142)	top-5 acc 89.766	Train top-1 71.315	top-5 88.370	Entropy 0.75411	Latency-None: 0.000ms	Flops: 546.53M
Train [101][0/3239]	Time 41.628 (41.628)	Data Time 39.098 (39.098)	Loss 2.2319 (2.2319)	Entropy 0.75414 (0.75414)	Top-1 acc 73.438 (73.438)	Top-5 acc 88.672 (88.672)	lr 0.00167
Train [101][10/3239]	Time 55.894 (9.137)	Data Time 0.002 (3.557)	Loss 2.2328 (2.1922)	Entropy 0.75414 (0.75414)	Top-1 acc 73.047 (72.230)	Top-5 acc 88.281 (89.098)	lr 0.00167
Train [101][20/3239]	Time 0.524 (4.949)	Data Time 0.003 (1.865)	Loss 2.0899 (2.1848)	Entropy 0.75416 (0.75415)	Top-1 acc 71.484 (72.154)	Top-5 acc 91.016 (88.932)	lr 0.00167
Train [101][30/3239]	Time 0.233 (3.511)	Data Time 0.002 (1.264)	Loss 2.1675 (2.1818)	Entropy 0.75418 (0.75416)	Top-1 acc 69.531 (72.392)	Top-5 acc 88.672 (88.798)	lr 0.00167
Train [101][40/3239]	Time 0.241 (2.785)	Data Time 0.001 (0.956)	Loss 2.2708 (2.1972)	Entropy 0.75412 (0.75415)	Top-1 acc 69.531 (72.066)	Top-5 acc 87.109 (88.558)	lr 0.00167
Train [101][50/3239]	Time 0.252 (2.332)	Data Time 0.002 (0.769)	Loss 2.0949 (2.2010)	Entropy 0.75409 (0.75414)	Top-1 acc 71.875 (71.952)	Top-5 acc 89.453 (88.358)	lr 0.00167
Train [101][60/3239]	Time 0.273 (2.031)	Data Time 0.002 (0.643)	Loss 2.2791 (2.2025)	Entropy 0.75404 (0.75413)	Top-1 acc 69.141 (71.664)	Top-5 acc 87.891 (88.499)	lr 0.00167
Train [101][70/3239]	Time 0.228 (1.810)	Data Time 0.001 (0.553)	Loss 2.0588 (2.1957)	Entropy 0.75400 (0.75411)	Top-1 acc 74.609 (71.803)	Top-5 acc 91.797 (88.589)	lr 0.00167
Train [101][80/3239]	Time 0.238 (1.645)	Data Time 0.001 (0.485)	Loss 2.2109 (2.1948)	Entropy 0.75377 (0.75409)	Top-1 acc 71.094 (71.817)	Top-5 acc 85.547 (88.619)	lr 0.00167
Train [101][90/3239]	Time 0.237 (1.520)	Data Time 0.002 (0.432)	Loss 2.0963 (2.1984)	Entropy 0.75368 (0.75405)	Top-1 acc 76.172 (71.647)	Top-5 acc 91.016 (88.556)	lr 0.00167
Train [101][100/3239]	Time 0.231 (1.416)	Data Time 0.001 (0.389)	Loss 2.1645 (2.1971)	Entropy 0.75372 (0.75401)	Top-1 acc 69.922 (71.635)	Top-5 acc 89.453 (88.521)	lr 0.00167
Train [101][110/3239]	Time 0.224 (1.331)	Data Time 0.002 (0.354)	Loss 2.3010 (2.1984)	Entropy 0.75371 (0.75399)	Top-1 acc 69.141 (71.660)	Top-5 acc 86.719 (88.471)	lr 0.00167
Train [101][120/3239]	Time 2.514 (1.260)	Data Time 0.002 (0.325)	Loss 2.1916 (2.1991)	Entropy 0.75371 (0.75396)	Top-1 acc 72.266 (71.688)	Top-5 acc 87.500 (88.498)	lr 0.00167
Train [101][130/3239]	Time 0.238 (1.183)	Data Time 0.001 (0.301)	Loss 2.0829 (2.2004)	Entropy 0.75378 (0.75395)	Top-1 acc 76.953 (71.711)	Top-5 acc 91.016 (88.517)	lr 0.00167
Train [101][140/3239]	Time 0.331 (1.133)	Data Time 0.001 (0.279)	Loss 2.0813 (2.2022)	Entropy 0.75361 (0.75393)	Top-1 acc 71.484 (71.731)	Top-5 acc 91.406 (88.511)	lr 0.00167
Train [101][150/3239]	Time 0.241 (1.090)	Data Time 0.001 (0.261)	Loss 2.1908 (2.2019)	Entropy 0.75359 (0.75391)	Top-1 acc 72.656 (71.681)	Top-5 acc 88.672 (88.514)	lr 0.00167
Train [101][160/3239]	Time 0.230 (1.051)	Data Time 0.001 (0.245)	Loss 2.1375 (2.1999)	Entropy 0.75359 (0.75389)	Top-1 acc 75.781 (71.737)	Top-5 acc 88.672 (88.551)	lr 0.00167
Train [101][170/3239]	Time 0.232 (1.017)	Data Time 0.001 (0.231)	Loss 2.1299 (2.1980)	Entropy 0.75354 (0.75387)	Top-1 acc 71.875 (71.804)	Top-5 acc 89.062 (88.608)	lr 0.00167
Train [101][180/3239]	Time 0.337 (0.988)	Data Time 0.001 (0.218)	Loss 2.1873 (2.1971)	Entropy 0.75353 (0.75385)	Top-1 acc 73.438 (71.812)	Top-5 acc 91.016 (88.650)	lr 0.00167
Train [101][190/3239]	Time 0.231 (0.962)	Data Time 0.001 (0.207)	Loss 2.0125 (2.1971)	Entropy 0.75356 (0.75383)	Top-1 acc 78.516 (71.818)	Top-5 acc 91.016 (88.625)	lr 0.00167
Train [101][200/3239]	Time 0.235 (0.938)	Data Time 0.001 (0.196)	Loss 2.3285 (2.1981)	Entropy 0.75357 (0.75382)	Top-1 acc 66.016 (71.766)	Top-5 acc 85.547 (88.577)	lr 0.00166
Train [101][210/3239]	Time 0.201 (0.916)	Data Time 0.001 (0.187)	Loss 1.9714 (2.1961)	Entropy 0.75346 (0.75381)	Top-1 acc 76.172 (71.825)	Top-5 acc 93.750 (88.614)	lr 0.00166
Train [101][220/3239]	Time 0.294 (0.897)	Data Time 0.001 (0.179)	Loss 2.2889 (2.1946)	Entropy 0.75336 (0.75379)	Top-1 acc 68.750 (71.848)	Top-5 acc 87.891 (88.654)	lr 0.00166
Train [101][230/3239]	Time 2.613 (0.880)	Data Time 0.001 (0.171)	Loss 2.2479 (2.1935)	Entropy 0.75336 (0.75377)	Top-1 acc 70.703 (71.850)	Top-5 acc 87.109 (88.677)	lr 0.00166
Train [101][240/3239]	Time 0.231 (0.853)	Data Time 0.001 (0.164)	Loss 2.2505 (2.1938)	Entropy 0.75335 (0.75375)	Top-1 acc 70.703 (71.822)	Top-5 acc 86.719 (88.665)	lr 0.00166
Train [101][250/3239]	Time 0.239 (0.838)	Data Time 0.002 (0.158)	Loss 2.1518 (2.1927)	Entropy 0.75330 (0.75374)	Top-1 acc 76.953 (71.880)	Top-5 acc 90.234 (88.666)	lr 0.00166
Train [101][260/3239]	Time 0.246 (0.825)	Data Time 0.007 (0.152)	Loss 1.9454 (2.1938)	Entropy 0.75329 (0.75372)	Top-1 acc 78.125 (71.874)	Top-5 acc 93.359 (88.637)	lr 0.00166
Train [101][270/3239]	Time 0.240 (0.814)	Data Time 0.001 (0.146)	Loss 2.1232 (2.1947)	Entropy 0.75330 (0.75370)	Top-1 acc 71.875 (71.866)	Top-5 acc 90.625 (88.646)	lr 0.00166
Train [101][280/3239]	Time 0.176 (0.801)	Data Time 0.001 (0.141)	Loss 2.2331 (2.1940)	Entropy 0.75316 (0.75369)	Top-1 acc 70.312 (71.847)	Top-5 acc 89.062 (88.662)	lr 0.00166
Train [101][290/3239]	Time 0.247 (0.790)	Data Time 0.001 (0.137)	Loss 2.1524 (2.1927)	Entropy 0.75311 (0.75367)	Top-1 acc 69.531 (71.825)	Top-5 acc 90.234 (88.704)	lr 0.00166
Train [101][300/3239]	Time 0.229 (0.780)	Data Time 0.001 (0.132)	Loss 2.2549 (2.1929)	Entropy 0.75308 (0.75365)	Top-1 acc 71.484 (71.804)	Top-5 acc 85.547 (88.702)	lr 0.00166
Train [101][310/3239]	Time 0.220 (0.770)	Data Time 0.001 (0.128)	Loss 2.2536 (2.1933)	Entropy 0.75306 (0.75363)	Top-1 acc 75.391 (71.845)	Top-5 acc 86.719 (88.692)	lr 0.00166
Train [101][320/3239]	Time 0.227 (0.761)	Data Time 0.001 (0.124)	Loss 2.0795 (2.1931)	Entropy 0.75311 (0.75361)	Top-1 acc 76.172 (71.882)	Top-5 acc 91.016 (88.697)	lr 0.00166
Train [101][330/3239]	Time 0.240 (0.753)	Data Time 0.001 (0.120)	Loss 2.0147 (2.1935)	Entropy 0.75305 (0.75360)	Top-1 acc 78.516 (71.884)	Top-5 acc 92.578 (88.685)	lr 0.00166
Train [101][340/3239]	Time 2.566 (0.745)	Data Time 0.002 (0.117)	Loss 2.2301 (2.1930)	Entropy 0.75305 (0.75358)	Top-1 acc 67.969 (71.888)	Top-5 acc 89.844 (88.703)	lr 0.00166
Train [101][350/3239]	Time 0.395 (0.731)	Data Time 0.001 (0.113)	Loss 2.3939 (2.1934)	Entropy 0.75299 (0.75356)	Top-1 acc 64.844 (71.879)	Top-5 acc 86.719 (88.689)	lr 0.00166
Train [101][360/3239]	Time 0.230 (0.725)	Data Time 0.001 (0.110)	Loss 2.1529 (2.1947)	Entropy 0.75286 (0.75354)	Top-1 acc 71.875 (71.844)	Top-5 acc 88.672 (88.668)	lr 0.00166
Train [101][370/3239]	Time 0.223 (0.718)	Data Time 0.001 (0.107)	Loss 2.1885 (2.1939)	Entropy 0.75285 (0.75353)	Top-1 acc 72.266 (71.864)	Top-5 acc 89.453 (88.688)	lr 0.00166
Train [101][380/3239]	Time 0.305 (0.854)	Data Time 0.005 (0.105)	Loss 2.2215 (2.1935)	Entropy 0.75285 (0.75351)	Top-1 acc 70.703 (71.869)	Top-5 acc 87.891 (88.691)	lr 0.00166
Train [101][390/3239]	Time 0.330 (0.845)	Data Time 0.002 (0.102)	Loss 2.2947 (2.1951)	Entropy 0.75277 (0.75349)	Top-1 acc 68.750 (71.794)	Top-5 acc 85.156 (88.660)	lr 0.00166
Train [101][400/3239]	Time 0.230 (0.836)	Data Time 0.001 (0.100)	Loss 2.0683 (2.1945)	Entropy 0.75270 (0.75347)	Top-1 acc 73.828 (71.809)	Top-5 acc 92.188 (88.677)	lr 0.00165
Train [101][410/3239]	Time 0.210 (0.827)	Data Time 0.001 (0.097)	Loss 2.1905 (2.1954)	Entropy 0.75265 (0.75345)	Top-1 acc 73.047 (71.811)	Top-5 acc 85.938 (88.654)	lr 0.00165
Train [101][420/3239]	Time 0.235 (0.819)	Data Time 0.001 (0.095)	Loss 2.0863 (2.1944)	Entropy 0.75265 (0.75343)	Top-1 acc 73.828 (71.833)	Top-5 acc 89.844 (88.671)	lr 0.00165
Train [101][430/3239]	Time 0.317 (0.811)	Data Time 0.001 (0.093)	Loss 2.1790 (2.1953)	Entropy 0.75269 (0.75342)	Top-1 acc 73.047 (71.778)	Top-5 acc 87.891 (88.632)	lr 0.00165
Train [101][440/3239]	Time 0.248 (0.804)	Data Time 0.001 (0.091)	Loss 2.1997 (2.1965)	Entropy 0.75268 (0.75340)	Top-1 acc 69.141 (71.741)	Top-5 acc 89.062 (88.613)	lr 0.00165
Train [101][450/3239]	Time 2.508 (0.796)	Data Time 0.001 (0.089)	Loss 2.2610 (2.1954)	Entropy 0.75268 (0.75338)	Top-1 acc 71.094 (71.768)	Top-5 acc 87.109 (88.629)	lr 0.00165
Train [101][460/3239]	Time 0.261 (0.784)	Data Time 0.002 (0.087)	Loss 2.1975 (2.1948)	Entropy 0.75270 (0.75337)	Top-1 acc 72.656 (71.803)	Top-5 acc 89.453 (88.634)	lr 0.00165
Train [101][470/3239]	Time 0.247 (0.778)	Data Time 0.002 (0.085)	Loss 2.2074 (2.1945)	Entropy 0.75266 (0.75335)	Top-1 acc 71.484 (71.804)	Top-5 acc 88.281 (88.646)	lr 0.00165
Train [101][480/3239]	Time 0.235 (0.772)	Data Time 0.001 (0.083)	Loss 2.1401 (2.1944)	Entropy 0.75261 (0.75334)	Top-1 acc 69.922 (71.800)	Top-5 acc 89.453 (88.648)	lr 0.00165
Train [101][490/3239]	Time 0.246 (0.766)	Data Time 0.001 (0.082)	Loss 2.1911 (2.1939)	Entropy 0.75262 (0.75332)	Top-1 acc 72.266 (71.825)	Top-5 acc 89.453 (88.655)	lr 0.00165
Train [101][500/3239]	Time 0.227 (0.761)	Data Time 0.001 (0.080)	Loss 2.1623 (2.1941)	Entropy 0.75258 (0.75331)	Top-1 acc 72.266 (71.819)	Top-5 acc 89.844 (88.645)	lr 0.00165
Train [101][510/3239]	Time 0.239 (0.755)	Data Time 0.001 (0.078)	Loss 2.0097 (2.1936)	Entropy 0.75253 (0.75329)	Top-1 acc 77.344 (71.834)	Top-5 acc 92.578 (88.644)	lr 0.00165
Train [101][520/3239]	Time 0.242 (0.750)	Data Time 0.001 (0.077)	Loss 2.1909 (2.1938)	Entropy 0.75253 (0.75328)	Top-1 acc 71.875 (71.828)	Top-5 acc 89.062 (88.649)	lr 0.00165
Train [101][530/3239]	Time 0.239 (0.745)	Data Time 0.001 (0.076)	Loss 2.1302 (2.1947)	Entropy 0.75239 (0.75326)	Top-1 acc 72.656 (71.798)	Top-5 acc 88.281 (88.626)	lr 0.00165
Train [101][540/3239]	Time 0.233 (0.740)	Data Time 0.001 (0.074)	Loss 2.1383 (2.1945)	Entropy 0.75221 (0.75325)	Top-1 acc 69.922 (71.804)	Top-5 acc 89.453 (88.626)	lr 0.00165
Train [101][550/3239]	Time 0.232 (0.736)	Data Time 0.001 (0.073)	Loss 2.3344 (2.1943)	Entropy 0.75215 (0.75323)	Top-1 acc 67.188 (71.815)	Top-5 acc 87.109 (88.622)	lr 0.00165
Train [101][560/3239]	Time 2.722 (0.731)	Data Time 0.002 (0.072)	Loss 2.2002 (2.1945)	Entropy 0.75215 (0.75321)	Top-1 acc 70.703 (71.815)	Top-5 acc 89.062 (88.622)	lr 0.00165
Train [101][570/3239]	Time 0.278 (0.723)	Data Time 0.001 (0.070)	Loss 2.1512 (2.1945)	Entropy 0.75218 (0.75319)	Top-1 acc 73.828 (71.833)	Top-5 acc 89.844 (88.612)	lr 0.00165
Train [101][580/3239]	Time 0.234 (0.719)	Data Time 0.001 (0.069)	Loss 2.3113 (2.1942)	Entropy 0.75213 (0.75317)	Top-1 acc 69.531 (71.840)	Top-5 acc 85.547 (88.626)	lr 0.00165
Train [101][590/3239]	Time 0.224 (0.714)	Data Time 0.001 (0.068)	Loss 2.1565 (2.1945)	Entropy 0.75212 (0.75316)	Top-1 acc 73.047 (71.823)	Top-5 acc 89.844 (88.620)	lr 0.00165
Train [101][600/3239]	Time 0.329 (0.711)	Data Time 0.001 (0.067)	Loss 2.0754 (2.1945)	Entropy 0.75211 (0.75314)	Top-1 acc 73.438 (71.815)	Top-5 acc 91.016 (88.634)	lr 0.00164
Train [101][610/3239]	Time 0.226 (0.707)	Data Time 0.001 (0.066)	Loss 2.1919 (2.1943)	Entropy 0.75206 (0.75312)	Top-1 acc 70.703 (71.810)	Top-5 acc 87.500 (88.642)	lr 0.00164
Train [101][620/3239]	Time 0.229 (0.703)	Data Time 0.001 (0.065)	Loss 2.0570 (2.1940)	Entropy 0.75206 (0.75310)	Top-1 acc 76.562 (71.811)	Top-5 acc 91.016 (88.654)	lr 0.00164
Train [101][630/3239]	Time 0.233 (0.699)	Data Time 0.001 (0.064)	Loss 2.2152 (2.1945)	Entropy 0.75214 (0.75309)	Top-1 acc 71.484 (71.796)	Top-5 acc 87.500 (88.646)	lr 0.00164
Train [101][640/3239]	Time 0.325 (0.696)	Data Time 0.001 (0.063)	Loss 1.9914 (2.1948)	Entropy 0.75207 (0.75307)	Top-1 acc 78.906 (71.797)	Top-5 acc 91.797 (88.641)	lr 0.00164
Train [101][650/3239]	Time 0.233 (0.693)	Data Time 0.001 (0.062)	Loss 2.1264 (2.1948)	Entropy 0.75202 (0.75306)	Top-1 acc 74.219 (71.798)	Top-5 acc 90.234 (88.651)	lr 0.00164
Train [101][660/3239]	Time 0.225 (0.690)	Data Time 0.001 (0.061)	Loss 2.0339 (2.1952)	Entropy 0.75204 (0.75304)	Top-1 acc 75.781 (71.784)	Top-5 acc 92.188 (88.645)	lr 0.00164
Train [101][670/3239]	Time 2.577 (0.686)	Data Time 0.001 (0.060)	Loss 2.1211 (2.1949)	Entropy 0.75204 (0.75303)	Top-1 acc 71.094 (71.779)	Top-5 acc 90.234 (88.664)	lr 0.00164
Train [101][680/3239]	Time 0.281 (0.680)	Data Time 0.001 (0.059)	Loss 2.2297 (2.1952)	Entropy 0.75195 (0.75301)	Top-1 acc 71.094 (71.776)	Top-5 acc 87.500 (88.658)	lr 0.00164
Train [101][690/3239]	Time 0.250 (0.677)	Data Time 0.001 (0.058)	Loss 2.1586 (2.1955)	Entropy 0.75161 (0.75299)	Top-1 acc 72.266 (71.767)	Top-5 acc 89.453 (88.656)	lr 0.00164
Train [101][700/3239]	Time 0.230 (0.674)	Data Time 0.001 (0.058)	Loss 2.1321 (2.1961)	Entropy 0.75132 (0.75297)	Top-1 acc 75.000 (71.761)	Top-5 acc 90.625 (88.643)	lr 0.00164
Train [101][710/3239]	Time 0.222 (0.671)	Data Time 0.001 (0.057)	Loss 2.2188 (2.1967)	Entropy 0.75125 (0.75294)	Top-1 acc 69.531 (71.737)	Top-5 acc 89.844 (88.636)	lr 0.00164
Train [101][720/3239]	Time 0.235 (0.669)	Data Time 0.001 (0.056)	Loss 2.4437 (2.1974)	Entropy 0.75126 (0.75292)	Top-1 acc 60.938 (71.703)	Top-5 acc 83.984 (88.625)	lr 0.00164
Train [101][730/3239]	Time 0.273 (0.666)	Data Time 0.002 (0.055)	Loss 2.2357 (2.1979)	Entropy 0.75127 (0.75290)	Top-1 acc 70.703 (71.682)	Top-5 acc 86.719 (88.619)	lr 0.00164
Train [101][740/3239]	Time 0.282 (0.733)	Data Time 0.003 (0.055)	Loss 2.1250 (2.1986)	Entropy 0.75128 (0.75288)	Top-1 acc 71.875 (71.671)	Top-5 acc 89.453 (88.606)	lr 0.00164
Train [101][750/3239]	Time 0.267 (0.731)	Data Time 0.002 (0.054)	Loss 2.3356 (2.1989)	Entropy 0.75129 (0.75285)	Top-1 acc 70.312 (71.658)	Top-5 acc 84.375 (88.592)	lr 0.00164
Train [101][760/3239]	Time 0.237 (0.729)	Data Time 0.001 (0.053)	Loss 2.1918 (2.1989)	Entropy 0.75126 (0.75283)	Top-1 acc 74.609 (71.656)	Top-5 acc 89.844 (88.596)	lr 0.00164
Train [101][770/3239]	Time 0.254 (0.725)	Data Time 0.001 (0.053)	Loss 2.1798 (2.1988)	Entropy 0.75129 (0.75281)	Top-1 acc 71.484 (71.665)	Top-5 acc 89.453 (88.603)	lr 0.00164
Train [101][780/3239]	Time 2.523 (0.722)	Data Time 0.002 (0.052)	Loss 2.1270 (2.1981)	Entropy 0.75129 (0.75279)	Top-1 acc 71.875 (71.681)	Top-5 acc 88.672 (88.618)	lr 0.00164
Train [101][790/3239]	Time 0.235 (0.716)	Data Time 0.001 (0.051)	Loss 2.1847 (2.1985)	Entropy 0.75114 (0.75277)	Top-1 acc 71.875 (71.676)	Top-5 acc 89.453 (88.613)	lr 0.00164
Train [101][800/3239]	Time 0.251 (0.713)	Data Time 0.001 (0.051)	Loss 2.2966 (2.1988)	Entropy 0.75110 (0.75275)	Top-1 acc 69.922 (71.673)	Top-5 acc 87.891 (88.613)	lr 0.00163
Train [101][810/3239]	Time 0.266 (0.710)	Data Time 0.002 (0.050)	Loss 2.2591 (2.1986)	Entropy 0.75107 (0.75273)	Top-1 acc 68.359 (71.686)	Top-5 acc 87.891 (88.616)	lr 0.00163
Train [101][820/3239]	Time 0.236 (0.707)	Data Time 0.001 (0.049)	Loss 2.2628 (2.1984)	Entropy 0.75089 (0.75271)	Top-1 acc 65.625 (71.682)	Top-5 acc 89.062 (88.625)	lr 0.00163
Train [101][830/3239]	Time 0.227 (0.704)	Data Time 0.001 (0.049)	Loss 2.3754 (2.2004)	Entropy 0.75083 (0.75269)	Top-1 acc 68.359 (71.642)	Top-5 acc 84.766 (88.589)	lr 0.00163
Train [101][840/3239]	Time 0.231 (0.702)	Data Time 0.002 (0.048)	Loss 2.3393 (2.2005)	Entropy 0.75082 (0.75267)	Top-1 acc 71.094 (71.655)	Top-5 acc 83.984 (88.579)	lr 0.00163
Train [101][850/3239]	Time 0.238 (0.699)	Data Time 0.002 (0.048)	Loss 2.1084 (2.2003)	Entropy 0.75084 (0.75264)	Top-1 acc 75.391 (71.666)	Top-5 acc 87.500 (88.575)	lr 0.00163
Train [101][860/3239]	Time 0.221 (0.697)	Data Time 0.001 (0.047)	Loss 2.2450 (2.2008)	Entropy 0.75075 (0.75262)	Top-1 acc 70.312 (71.654)	Top-5 acc 87.891 (88.566)	lr 0.00163
Train [101][870/3239]	Time 0.217 (0.694)	Data Time 0.001 (0.047)	Loss 2.2421 (2.2008)	Entropy 0.75073 (0.75260)	Top-1 acc 70.312 (71.656)	Top-5 acc 86.328 (88.561)	lr 0.00163
Train [101][880/3239]	Time 0.302 (0.692)	Data Time 0.001 (0.046)	Loss 2.3666 (2.2005)	Entropy 0.75075 (0.75258)	Top-1 acc 67.969 (71.648)	Top-5 acc 85.156 (88.569)	lr 0.00163
Train [101][890/3239]	Time 2.500 (0.689)	Data Time 0.001 (0.046)	Loss 2.0515 (2.2003)	Entropy 0.75075 (0.75256)	Top-1 acc 73.438 (71.662)	Top-5 acc 87.891 (88.571)	lr 0.00163
Train [101][900/3239]	Time 0.235 (0.684)	Data Time 0.001 (0.045)	Loss 2.2950 (2.2004)	Entropy 0.75058 (0.75254)	Top-1 acc 70.703 (71.647)	Top-5 acc 85.938 (88.572)	lr 0.00163
Train [101][910/3239]	Time 0.236 (0.682)	Data Time 0.001 (0.045)	Loss 2.2440 (2.2004)	Entropy 0.75053 (0.75252)	Top-1 acc 73.047 (71.646)	Top-5 acc 87.109 (88.576)	lr 0.00163
Train [101][920/3239]	Time 0.173 (0.680)	Data Time 0.001 (0.044)	Loss 2.0859 (2.2001)	Entropy 0.75049 (0.75249)	Top-1 acc 75.391 (71.649)	Top-5 acc 89.062 (88.574)	lr 0.00163
Train [101][930/3239]	Time 0.219 (0.678)	Data Time 0.001 (0.044)	Loss 2.2069 (2.2006)	Entropy 0.75036 (0.75247)	Top-1 acc 73.828 (71.637)	Top-5 acc 89.062 (88.559)	lr 0.00163
Train [101][940/3239]	Time 0.246 (0.676)	Data Time 0.001 (0.043)	Loss 2.0545 (2.2006)	Entropy 0.75038 (0.75245)	Top-1 acc 73.438 (71.623)	Top-5 acc 89.844 (88.559)	lr 0.00163
Train [101][950/3239]	Time 0.232 (0.674)	Data Time 0.001 (0.043)	Loss 2.2269 (2.2005)	Entropy 0.75027 (0.75243)	Top-1 acc 69.531 (71.628)	Top-5 acc 87.891 (88.565)	lr 0.00163
Train [101][960/3239]	Time 0.228 (0.672)	Data Time 0.001 (0.043)	Loss 2.1467 (2.2005)	Entropy 0.75022 (0.75240)	Top-1 acc 73.828 (71.627)	Top-5 acc 90.625 (88.560)	lr 0.00163
Train [101][970/3239]	Time 0.233 (0.670)	Data Time 0.001 (0.042)	Loss 2.3556 (2.2008)	Entropy 0.75026 (0.75238)	Top-1 acc 64.453 (71.615)	Top-5 acc 85.547 (88.560)	lr 0.00163
Train [101][980/3239]	Time 0.340 (0.668)	Data Time 0.002 (0.042)	Loss 2.1900 (2.2005)	Entropy 0.75019 (0.75236)	Top-1 acc 71.094 (71.623)	Top-5 acc 89.453 (88.563)	lr 0.00163
Train [101][990/3239]	Time 0.241 (0.666)	Data Time 0.001 (0.041)	Loss 2.2896 (2.2010)	Entropy 0.75029 (0.75234)	Top-1 acc 69.922 (71.612)	Top-5 acc 86.328 (88.556)	lr 0.00163
Train [101][1000/3239]	Time 2.651 (0.665)	Data Time 0.001 (0.041)	Loss 2.1372 (2.2010)	Entropy 0.75029 (0.75232)	Top-1 acc 74.609 (71.612)	Top-5 acc 89.453 (88.558)	lr 0.00162
Train [101][1010/3239]	Time 0.216 (0.660)	Data Time 0.001 (0.040)	Loss 2.3269 (2.2013)	Entropy 0.75026 (0.75230)	Top-1 acc 69.922 (71.608)	Top-5 acc 86.719 (88.555)	lr 0.00162
Train [101][1020/3239]	Time 0.356 (0.659)	Data Time 0.002 (0.040)	Loss 2.1742 (2.2013)	Entropy 0.75028 (0.75228)	Top-1 acc 71.094 (71.604)	Top-5 acc 88.281 (88.554)	lr 0.00162
Train [101][1030/3239]	Time 0.236 (0.657)	Data Time 0.001 (0.040)	Loss 2.3780 (2.2019)	Entropy 0.75027 (0.75226)	Top-1 acc 64.844 (71.590)	Top-5 acc 86.719 (88.546)	lr 0.00162
Train [101][1040/3239]	Time 0.236 (0.655)	Data Time 0.001 (0.039)	Loss 2.2049 (2.2015)	Entropy 0.75024 (0.75224)	Top-1 acc 69.531 (71.599)	Top-5 acc 89.062 (88.552)	lr 0.00162
Train [101][1050/3239]	Time 0.226 (0.654)	Data Time 0.001 (0.039)	Loss 2.2728 (2.2019)	Entropy 0.75012 (0.75222)	Top-1 acc 67.578 (71.581)	Top-5 acc 87.109 (88.543)	lr 0.00162
Train [101][1060/3239]	Time 0.271 (0.652)	Data Time 0.001 (0.039)	Loss 2.3018 (2.2019)	Entropy 0.75017 (0.75220)	Top-1 acc 68.359 (71.582)	Top-5 acc 84.375 (88.538)	lr 0.00162
Train [101][1070/3239]	Time 0.230 (0.650)	Data Time 0.001 (0.038)	Loss 2.2955 (2.2019)	Entropy 0.75012 (0.75218)	Top-1 acc 67.969 (71.584)	Top-5 acc 88.281 (88.547)	lr 0.00162
Train [101][1080/3239]	Time 0.237 (0.649)	Data Time 0.001 (0.038)	Loss 2.1189 (2.2021)	Entropy 0.75018 (0.75216)	Top-1 acc 74.219 (71.575)	Top-5 acc 88.672 (88.541)	lr 0.00162
Train [101][1090/3239]	Time 0.243 (0.647)	Data Time 0.001 (0.038)	Loss 2.2243 (2.2020)	Entropy 0.75009 (0.75214)	Top-1 acc 74.609 (71.585)	Top-5 acc 88.281 (88.548)	lr 0.00162
Train [101][1100/3239]	Time 0.271 (0.693)	Data Time 0.004 (0.037)	Loss 2.1801 (2.2023)	Entropy 0.75005 (0.75212)	Top-1 acc 70.312 (71.567)	Top-5 acc 89.062 (88.548)	lr 0.00162
Train [101][1110/3239]	Time 2.602 (0.692)	Data Time 0.003 (0.037)	Loss 2.1463 (2.2025)	Entropy 0.75005 (0.75211)	Top-1 acc 73.828 (71.563)	Top-5 acc 89.062 (88.541)	lr 0.00162
Train [101][1120/3239]	Time 0.305 (0.688)	Data Time 0.002 (0.037)	Loss 2.3182 (2.2023)	Entropy 0.75006 (0.75209)	Top-1 acc 69.531 (71.569)	Top-5 acc 85.938 (88.545)	lr 0.00162
Train [101][1130/3239]	Time 0.255 (0.686)	Data Time 0.002 (0.036)	Loss 2.1817 (2.2041)	Entropy 0.74999 (0.75207)	Top-1 acc 72.656 (71.529)	Top-5 acc 89.062 (88.525)	lr 0.00162
Train [101][1140/3239]	Time 0.235 (0.685)	Data Time 0.001 (0.036)	Loss 2.3005 (2.2040)	Entropy 0.74999 (0.75205)	Top-1 acc 71.094 (71.544)	Top-5 acc 86.328 (88.526)	lr 0.00162
Train [101][1150/3239]	Time 0.308 (0.683)	Data Time 0.001 (0.036)	Loss 2.2912 (2.2042)	Entropy 0.74995 (0.75203)	Top-1 acc 67.969 (71.536)	Top-5 acc 88.281 (88.524)	lr 0.00162
Train [101][1160/3239]	Time 0.228 (0.681)	Data Time 0.001 (0.035)	Loss 2.2185 (2.2045)	Entropy 0.74993 (0.75201)	Top-1 acc 70.703 (71.528)	Top-5 acc 87.891 (88.519)	lr 0.00162
Train [101][1170/3239]	Time 0.234 (0.679)	Data Time 0.001 (0.035)	Loss 2.2395 (2.2047)	Entropy 0.74982 (0.75200)	Top-1 acc 67.969 (71.516)	Top-5 acc 89.844 (88.517)	lr 0.00162
Train [101][1180/3239]	Time 0.236 (0.678)	Data Time 0.001 (0.035)	Loss 2.2467 (2.2046)	Entropy 0.74978 (0.75198)	Top-1 acc 71.094 (71.517)	Top-5 acc 87.109 (88.518)	lr 0.00162
Train [101][1190/3239]	Time 0.376 (0.676)	Data Time 0.001 (0.035)	Loss 2.2641 (2.2047)	Entropy 0.74975 (0.75196)	Top-1 acc 68.750 (71.512)	Top-5 acc 86.328 (88.514)	lr 0.00162
Train [101][1200/3239]	Time 0.221 (0.675)	Data Time 0.001 (0.034)	Loss 2.1698 (2.2052)	Entropy 0.74962 (0.75194)	Top-1 acc 69.141 (71.491)	Top-5 acc 89.062 (88.510)	lr 0.00161
Train [101][1210/3239]	Time 0.223 (0.673)	Data Time 0.001 (0.034)	Loss 2.3109 (2.2052)	Entropy 0.74962 (0.75192)	Top-1 acc 70.312 (71.494)	Top-5 acc 86.328 (88.507)	lr 0.00161
Train [101][1220/3239]	Time 2.633 (0.671)	Data Time 0.001 (0.034)	Loss 2.2503 (2.2050)	Entropy 0.74962 (0.75190)	Top-1 acc 70.703 (71.496)	Top-5 acc 87.500 (88.511)	lr 0.00161
Train [101][1230/3239]	Time 0.247 (0.668)	Data Time 0.001 (0.034)	Loss 2.1504 (2.2047)	Entropy 0.74962 (0.75188)	Top-1 acc 75.000 (71.504)	Top-5 acc 89.062 (88.514)	lr 0.00161
Train [101][1240/3239]	Time 0.228 (0.667)	Data Time 0.001 (0.033)	Loss 2.1425 (2.2048)	Entropy 0.74958 (0.75187)	Top-1 acc 69.922 (71.497)	Top-5 acc 89.844 (88.513)	lr 0.00161
Train [101][1250/3239]	Time 0.239 (0.665)	Data Time 0.001 (0.033)	Loss 2.2123 (2.2048)	Entropy 0.74954 (0.75185)	Top-1 acc 72.266 (71.493)	Top-5 acc 89.062 (88.513)	lr 0.00161
Train [101][1260/3239]	Time 0.237 (0.664)	Data Time 0.001 (0.033)	Loss 2.1680 (2.2049)	Entropy 0.74921 (0.75183)	Top-1 acc 72.656 (71.494)	Top-5 acc 89.844 (88.512)	lr 0.00161
Train [101][1270/3239]	Time 0.228 (0.662)	Data Time 0.001 (0.033)	Loss 2.0948 (2.2060)	Entropy 0.74920 (0.75181)	Top-1 acc 73.828 (71.463)	Top-5 acc 90.234 (88.489)	lr 0.00161
Train [101][1280/3239]	Time 0.218 (0.661)	Data Time 0.001 (0.032)	Loss 2.2001 (2.2063)	Entropy 0.74917 (0.75179)	Top-1 acc 70.312 (71.452)	Top-5 acc 87.891 (88.479)	lr 0.00161
Train [101][1290/3239]	Time 0.246 (0.660)	Data Time 0.001 (0.032)	Loss 2.0918 (2.2066)	Entropy 0.74916 (0.75177)	Top-1 acc 75.000 (71.445)	Top-5 acc 91.406 (88.475)	lr 0.00161
Train [101][1300/3239]	Time 0.222 (0.658)	Data Time 0.001 (0.032)	Loss 2.1672 (2.2066)	Entropy 0.74911 (0.75175)	Top-1 acc 71.094 (71.447)	Top-5 acc 90.625 (88.476)	lr 0.00161
Train [101][1310/3239]	Time 0.237 (0.657)	Data Time 0.001 (0.032)	Loss 2.2761 (2.2068)	Entropy 0.74910 (0.75172)	Top-1 acc 70.703 (71.446)	Top-5 acc 87.500 (88.473)	lr 0.00161
Train [101][1320/3239]	Time 0.225 (0.656)	Data Time 0.001 (0.031)	Loss 2.2088 (2.2068)	Entropy 0.74907 (0.75171)	Top-1 acc 69.922 (71.447)	Top-5 acc 88.281 (88.471)	lr 0.00161
Train [101][1330/3239]	Time 2.581 (0.654)	Data Time 0.001 (0.031)	Loss 2.2330 (2.2067)	Entropy 0.74907 (0.75169)	Top-1 acc 72.266 (71.459)	Top-5 acc 89.062 (88.475)	lr 0.00161
Train [101][1340/3239]	Time 0.235 (0.651)	Data Time 0.001 (0.031)	Loss 2.1839 (2.2066)	Entropy 0.74900 (0.75167)	Top-1 acc 72.656 (71.467)	Top-5 acc 88.281 (88.473)	lr 0.00161
Train [101][1350/3239]	Time 0.238 (0.650)	Data Time 0.001 (0.031)	Loss 2.0977 (2.2069)	Entropy 0.74902 (0.75165)	Top-1 acc 73.047 (71.458)	Top-5 acc 91.406 (88.467)	lr 0.00161
Train [101][1360/3239]	Time 0.332 (0.649)	Data Time 0.001 (0.030)	Loss 2.1866 (2.2069)	Entropy 0.74903 (0.75163)	Top-1 acc 72.656 (71.460)	Top-5 acc 87.891 (88.466)	lr 0.00161
Train [101][1370/3239]	Time 0.235 (0.647)	Data Time 0.001 (0.030)	Loss 2.1775 (2.2070)	Entropy 0.74897 (0.75161)	Top-1 acc 69.531 (71.456)	Top-5 acc 88.672 (88.463)	lr 0.00161
Train [101][1380/3239]	Time 0.228 (0.646)	Data Time 0.001 (0.030)	Loss 2.1942 (2.2070)	Entropy 0.74897 (0.75159)	Top-1 acc 74.219 (71.456)	Top-5 acc 90.625 (88.461)	lr 0.00161
Train [101][1390/3239]	Time 0.248 (0.645)	Data Time 0.001 (0.030)	Loss 2.2206 (2.2071)	Entropy 0.74898 (0.75157)	Top-1 acc 70.312 (71.452)	Top-5 acc 87.109 (88.458)	lr 0.00161
Train [101][1400/3239]	Time 0.320 (0.644)	Data Time 0.001 (0.030)	Loss 2.1870 (2.2069)	Entropy 0.74894 (0.75155)	Top-1 acc 71.484 (71.458)	Top-5 acc 87.109 (88.456)	lr 0.00160
Train [101][1410/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.029)	Loss 2.2951 (2.2068)	Entropy 0.74888 (0.75153)	Top-1 acc 71.875 (71.462)	Top-5 acc 88.672 (88.457)	lr 0.00160
Train [101][1420/3239]	Time 0.220 (0.642)	Data Time 0.001 (0.029)	Loss 2.3006 (2.2067)	Entropy 0.74878 (0.75151)	Top-1 acc 66.797 (71.464)	Top-5 acc 86.719 (88.456)	lr 0.00160
Train [101][1430/3239]	Time 0.225 (0.641)	Data Time 0.001 (0.029)	Loss 2.2673 (2.2066)	Entropy 0.74883 (0.75149)	Top-1 acc 73.438 (71.468)	Top-5 acc 89.453 (88.460)	lr 0.00160
Train [101][1440/3239]	Time 2.591 (0.640)	Data Time 0.003 (0.029)	Loss 2.1693 (2.2066)	Entropy 0.74883 (0.75148)	Top-1 acc 73.047 (71.468)	Top-5 acc 89.062 (88.458)	lr 0.00160
Train [101][1450/3239]	Time 0.249 (0.637)	Data Time 0.004 (0.029)	Loss 2.1658 (2.2071)	Entropy 0.74881 (0.75146)	Top-1 acc 73.828 (71.456)	Top-5 acc 89.062 (88.447)	lr 0.00160
Train [101][1460/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.029)	Loss 2.2663 (2.2072)	Entropy 0.74869 (0.75144)	Top-1 acc 71.484 (71.457)	Top-5 acc 87.891 (88.449)	lr 0.00160
Train [101][1470/3239]	Time 0.381 (0.670)	Data Time 0.003 (0.028)	Loss 2.1293 (2.2072)	Entropy 0.74866 (0.75142)	Top-1 acc 72.266 (71.457)	Top-5 acc 89.453 (88.448)	lr 0.00160
Train [101][1480/3239]	Time 0.248 (0.669)	Data Time 0.002 (0.028)	Loss 2.0690 (2.2068)	Entropy 0.74864 (0.75140)	Top-1 acc 77.344 (71.470)	Top-5 acc 88.672 (88.453)	lr 0.00160
Train [101][1490/3239]	Time 0.258 (0.667)	Data Time 0.001 (0.028)	Loss 2.2586 (2.2071)	Entropy 0.74873 (0.75138)	Top-1 acc 70.312 (71.460)	Top-5 acc 89.062 (88.450)	lr 0.00160
Train [101][1500/3239]	Time 0.226 (0.666)	Data Time 0.002 (0.028)	Loss 2.2687 (2.2069)	Entropy 0.74863 (0.75136)	Top-1 acc 68.359 (71.465)	Top-5 acc 88.281 (88.454)	lr 0.00160
Train [101][1510/3239]	Time 0.268 (0.665)	Data Time 0.001 (0.028)	Loss 2.3238 (2.2067)	Entropy 0.74865 (0.75135)	Top-1 acc 65.234 (71.463)	Top-5 acc 86.719 (88.461)	lr 0.00160
Train [101][1520/3239]	Time 0.220 (0.664)	Data Time 0.001 (0.027)	Loss 2.3828 (2.2068)	Entropy 0.74853 (0.75133)	Top-1 acc 67.969 (71.460)	Top-5 acc 85.938 (88.463)	lr 0.00160
Train [101][1530/3239]	Time 0.321 (0.663)	Data Time 0.001 (0.027)	Loss 2.1266 (2.2070)	Entropy 0.74850 (0.75131)	Top-1 acc 73.438 (71.449)	Top-5 acc 91.797 (88.462)	lr 0.00160
Train [101][1540/3239]	Time 0.221 (0.661)	Data Time 0.001 (0.027)	Loss 2.1847 (2.2076)	Entropy 0.74845 (0.75129)	Top-1 acc 73.047 (71.436)	Top-5 acc 86.328 (88.449)	lr 0.00160
Train [101][1550/3239]	Time 2.587 (0.660)	Data Time 0.001 (0.027)	Loss 2.2819 (2.2076)	Entropy 0.74845 (0.75127)	Top-1 acc 70.312 (71.441)	Top-5 acc 87.891 (88.447)	lr 0.00160
Train [101][1560/3239]	Time 0.237 (0.657)	Data Time 0.001 (0.027)	Loss 2.2658 (2.2075)	Entropy 0.74836 (0.75126)	Top-1 acc 69.531 (71.445)	Top-5 acc 85.938 (88.449)	lr 0.00160
Train [101][1570/3239]	Time 0.338 (0.656)	Data Time 0.001 (0.027)	Loss 2.1072 (2.2077)	Entropy 0.74832 (0.75124)	Top-1 acc 73.438 (71.440)	Top-5 acc 90.234 (88.449)	lr 0.00160
Train [101][1580/3239]	Time 0.236 (0.655)	Data Time 0.001 (0.026)	Loss 2.1927 (2.2077)	Entropy 0.74826 (0.75122)	Top-1 acc 68.750 (71.441)	Top-5 acc 90.625 (88.447)	lr 0.00160
Train [101][1590/3239]	Time 0.244 (0.654)	Data Time 0.001 (0.026)	Loss 2.1651 (2.2078)	Entropy 0.74835 (0.75120)	Top-1 acc 71.094 (71.438)	Top-5 acc 89.062 (88.442)	lr 0.00160
Train [101][1600/3239]	Time 0.226 (0.653)	Data Time 0.001 (0.026)	Loss 2.2650 (2.2079)	Entropy 0.74835 (0.75118)	Top-1 acc 72.656 (71.438)	Top-5 acc 85.938 (88.441)	lr 0.00159
Train [101][1610/3239]	Time 0.229 (0.652)	Data Time 0.001 (0.026)	Loss 2.3138 (2.2080)	Entropy 0.74830 (0.75116)	Top-1 acc 68.750 (71.426)	Top-5 acc 87.109 (88.439)	lr 0.00159
Train [101][1620/3239]	Time 0.228 (0.651)	Data Time 0.001 (0.026)	Loss 2.1538 (2.2077)	Entropy 0.74826 (0.75115)	Top-1 acc 72.266 (71.429)	Top-5 acc 87.891 (88.444)	lr 0.00159
Train [101][1630/3239]	Time 0.228 (0.650)	Data Time 0.001 (0.026)	Loss 2.1940 (2.2077)	Entropy 0.74824 (0.75113)	Top-1 acc 75.000 (71.432)	Top-5 acc 88.281 (88.443)	lr 0.00159
Train [101][1640/3239]	Time 0.229 (0.649)	Data Time 0.001 (0.026)	Loss 2.1876 (2.2076)	Entropy 0.74820 (0.75111)	Top-1 acc 71.094 (71.432)	Top-5 acc 89.453 (88.444)	lr 0.00159
Train [101][1650/3239]	Time 0.231 (0.648)	Data Time 0.001 (0.025)	Loss 2.1835 (2.2076)	Entropy 0.74827 (0.75109)	Top-1 acc 70.312 (71.432)	Top-5 acc 89.844 (88.449)	lr 0.00159
Train [101][1660/3239]	Time 2.570 (0.647)	Data Time 0.001 (0.025)	Loss 2.2129 (2.2078)	Entropy 0.74827 (0.75108)	Top-1 acc 75.391 (71.429)	Top-5 acc 86.719 (88.447)	lr 0.00159
Train [101][1670/3239]	Time 0.230 (0.644)	Data Time 0.001 (0.025)	Loss 2.1163 (2.2077)	Entropy 0.74811 (0.75106)	Top-1 acc 73.828 (71.430)	Top-5 acc 91.406 (88.448)	lr 0.00159
Train [101][1680/3239]	Time 0.230 (0.643)	Data Time 0.001 (0.025)	Loss 2.2219 (2.2076)	Entropy 0.74813 (0.75104)	Top-1 acc 71.484 (71.433)	Top-5 acc 89.453 (88.450)	lr 0.00159
Train [101][1690/3239]	Time 0.225 (0.642)	Data Time 0.001 (0.025)	Loss 2.2484 (2.2078)	Entropy 0.74808 (0.75102)	Top-1 acc 68.750 (71.427)	Top-5 acc 87.500 (88.445)	lr 0.00159
Train [101][1700/3239]	Time 0.313 (0.641)	Data Time 0.001 (0.025)	Loss 2.1602 (2.2076)	Entropy 0.74812 (0.75101)	Top-1 acc 70.312 (71.429)	Top-5 acc 87.891 (88.446)	lr 0.00159
Train [101][1710/3239]	Time 0.221 (0.640)	Data Time 0.001 (0.025)	Loss 2.1041 (2.2076)	Entropy 0.74811 (0.75099)	Top-1 acc 75.781 (71.428)	Top-5 acc 90.625 (88.447)	lr 0.00159
Train [101][1720/3239]	Time 0.223 (0.639)	Data Time 0.001 (0.024)	Loss 2.1774 (2.2075)	Entropy 0.74812 (0.75097)	Top-1 acc 72.656 (71.428)	Top-5 acc 89.844 (88.446)	lr 0.00159
Train [101][1730/3239]	Time 0.228 (0.638)	Data Time 0.001 (0.024)	Loss 2.2642 (2.2076)	Entropy 0.74810 (0.75096)	Top-1 acc 71.875 (71.427)	Top-5 acc 85.547 (88.446)	lr 0.00159
Train [101][1740/3239]	Time 0.315 (0.638)	Data Time 0.001 (0.024)	Loss 2.2615 (2.2074)	Entropy 0.74803 (0.75094)	Top-1 acc 67.969 (71.434)	Top-5 acc 88.281 (88.449)	lr 0.00159
Train [101][1750/3239]	Time 0.225 (0.637)	Data Time 0.001 (0.024)	Loss 2.2523 (2.2074)	Entropy 0.74790 (0.75092)	Top-1 acc 68.750 (71.433)	Top-5 acc 87.500 (88.448)	lr 0.00159
Train [101][1760/3239]	Time 0.272 (0.636)	Data Time 0.001 (0.024)	Loss 2.3663 (2.2076)	Entropy 0.74791 (0.75091)	Top-1 acc 66.016 (71.431)	Top-5 acc 85.156 (88.442)	lr 0.00159
Train [101][1770/3239]	Time 2.643 (0.635)	Data Time 0.001 (0.024)	Loss 2.3978 (2.2075)	Entropy 0.74791 (0.75089)	Top-1 acc 67.188 (71.432)	Top-5 acc 84.766 (88.440)	lr 0.00159
Train [101][1780/3239]	Time 0.391 (0.633)	Data Time 0.002 (0.024)	Loss 2.0955 (2.2075)	Entropy 0.74789 (0.75087)	Top-1 acc 73.047 (71.431)	Top-5 acc 90.234 (88.441)	lr 0.00159
Train [101][1790/3239]	Time 0.261 (0.632)	Data Time 0.001 (0.024)	Loss 2.2050 (2.2073)	Entropy 0.74788 (0.75086)	Top-1 acc 73.828 (71.436)	Top-5 acc 89.844 (88.446)	lr 0.00159
Train [101][1800/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.023)	Loss 2.1360 (2.2071)	Entropy 0.74777 (0.75084)	Top-1 acc 73.047 (71.439)	Top-5 acc 88.281 (88.444)	lr 0.00158
Train [101][1810/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.023)	Loss 2.2715 (2.2073)	Entropy 0.74767 (0.75082)	Top-1 acc 74.609 (71.438)	Top-5 acc 87.891 (88.441)	lr 0.00158
Train [101][1820/3239]	Time 0.242 (0.629)	Data Time 0.001 (0.023)	Loss 2.2411 (2.2071)	Entropy 0.74766 (0.75080)	Top-1 acc 71.875 (71.442)	Top-5 acc 87.500 (88.443)	lr 0.00158
Train [101][1830/3239]	Time 0.231 (0.659)	Data Time 0.002 (0.023)	Loss 2.2097 (2.2071)	Entropy 0.74768 (0.75079)	Top-1 acc 69.922 (71.437)	Top-5 acc 88.672 (88.442)	lr 0.00158
Train [101][1840/3239]	Time 0.239 (0.658)	Data Time 0.003 (0.023)	Loss 2.2898 (2.2072)	Entropy 0.74767 (0.75077)	Top-1 acc 69.531 (71.438)	Top-5 acc 85.547 (88.438)	lr 0.00158
Train [101][1850/3239]	Time 0.226 (0.657)	Data Time 0.002 (0.023)	Loss 2.3092 (2.2070)	Entropy 0.74771 (0.75075)	Top-1 acc 67.578 (71.437)	Top-5 acc 87.500 (88.443)	lr 0.00158
Train [101][1860/3239]	Time 0.295 (0.657)	Data Time 0.009 (0.023)	Loss 2.2290 (2.2069)	Entropy 0.74760 (0.75074)	Top-1 acc 71.875 (71.439)	Top-5 acc 84.766 (88.442)	lr 0.00158
Train [101][1870/3239]	Time 0.239 (0.656)	Data Time 0.002 (0.023)	Loss 2.0859 (2.2068)	Entropy 0.74749 (0.75072)	Top-1 acc 76.562 (71.438)	Top-5 acc 91.797 (88.442)	lr 0.00158
Train [101][1880/3239]	Time 2.673 (0.655)	Data Time 0.001 (0.023)	Loss 2.2041 (2.2070)	Entropy 0.74749 (0.75070)	Top-1 acc 71.875 (71.432)	Top-5 acc 90.625 (88.438)	lr 0.00158
Train [101][1890/3239]	Time 0.233 (0.653)	Data Time 0.001 (0.022)	Loss 2.2599 (2.2069)	Entropy 0.74748 (0.75069)	Top-1 acc 71.094 (71.432)	Top-5 acc 88.672 (88.439)	lr 0.00158
Train [101][1900/3239]	Time 0.246 (0.652)	Data Time 0.001 (0.022)	Loss 2.2567 (2.2068)	Entropy 0.74748 (0.75067)	Top-1 acc 69.141 (71.438)	Top-5 acc 89.844 (88.445)	lr 0.00158
Train [101][1910/3239]	Time 0.227 (0.651)	Data Time 0.001 (0.022)	Loss 2.2807 (2.2068)	Entropy 0.74744 (0.75065)	Top-1 acc 71.875 (71.441)	Top-5 acc 87.500 (88.446)	lr 0.00158
Train [101][1920/3239]	Time 0.233 (0.650)	Data Time 0.001 (0.022)	Loss 2.2004 (2.2067)	Entropy 0.74745 (0.75064)	Top-1 acc 69.141 (71.447)	Top-5 acc 89.453 (88.449)	lr 0.00158
Train [101][1930/3239]	Time 0.227 (0.649)	Data Time 0.001 (0.022)	Loss 2.3387 (2.2066)	Entropy 0.74745 (0.75062)	Top-1 acc 70.312 (71.447)	Top-5 acc 87.109 (88.451)	lr 0.00158
Train [101][1940/3239]	Time 0.227 (0.648)	Data Time 0.001 (0.022)	Loss 2.2314 (2.2066)	Entropy 0.74744 (0.75060)	Top-1 acc 71.875 (71.451)	Top-5 acc 87.891 (88.452)	lr 0.00158
Train [101][1950/3239]	Time 0.343 (0.647)	Data Time 0.002 (0.022)	Loss 2.2336 (2.2066)	Entropy 0.74738 (0.75059)	Top-1 acc 65.625 (71.449)	Top-5 acc 89.062 (88.452)	lr 0.00158
Train [101][1960/3239]	Time 0.237 (0.647)	Data Time 0.001 (0.022)	Loss 2.1724 (2.2066)	Entropy 0.74736 (0.75057)	Top-1 acc 70.703 (71.446)	Top-5 acc 90.234 (88.450)	lr 0.00158
Train [101][1970/3239]	Time 0.245 (0.646)	Data Time 0.001 (0.022)	Loss 2.0881 (2.2065)	Entropy 0.74738 (0.75055)	Top-1 acc 70.312 (71.451)	Top-5 acc 91.016 (88.452)	lr 0.00158
Train [101][1980/3239]	Time 0.249 (0.645)	Data Time 0.001 (0.021)	Loss 2.1233 (2.2066)	Entropy 0.74736 (0.75054)	Top-1 acc 73.828 (71.448)	Top-5 acc 89.844 (88.449)	lr 0.00158
Train [101][1990/3239]	Time 2.733 (0.644)	Data Time 0.001 (0.021)	Loss 2.2769 (2.2067)	Entropy 0.74736 (0.75052)	Top-1 acc 69.922 (71.445)	Top-5 acc 87.500 (88.445)	lr 0.00158
Train [101][2000/3239]	Time 0.270 (0.642)	Data Time 0.001 (0.021)	Loss 2.1629 (2.2068)	Entropy 0.74734 (0.75051)	Top-1 acc 71.875 (71.442)	Top-5 acc 90.625 (88.446)	lr 0.00158
Train [101][2010/3239]	Time 0.281 (0.642)	Data Time 0.002 (0.021)	Loss 2.1620 (2.2065)	Entropy 0.74732 (0.75049)	Top-1 acc 71.875 (71.453)	Top-5 acc 88.672 (88.451)	lr 0.00157
Train [101][2020/3239]	Time 0.219 (0.641)	Data Time 0.001 (0.021)	Loss 2.3839 (2.2066)	Entropy 0.74728 (0.75047)	Top-1 acc 68.750 (71.455)	Top-5 acc 85.547 (88.451)	lr 0.00157
Train [101][2030/3239]	Time 0.236 (0.640)	Data Time 0.001 (0.021)	Loss 2.1488 (2.2067)	Entropy 0.74725 (0.75046)	Top-1 acc 72.656 (71.455)	Top-5 acc 87.500 (88.445)	lr 0.00157
Train [101][2040/3239]	Time 0.238 (0.639)	Data Time 0.001 (0.021)	Loss 2.0595 (2.2067)	Entropy 0.74711 (0.75044)	Top-1 acc 73.828 (71.450)	Top-5 acc 90.625 (88.445)	lr 0.00157
Train [101][2050/3239]	Time 0.252 (0.638)	Data Time 0.001 (0.021)	Loss 2.0289 (2.2066)	Entropy 0.74711 (0.75043)	Top-1 acc 74.609 (71.451)	Top-5 acc 91.797 (88.450)	lr 0.00157
Train [101][2060/3239]	Time 0.226 (0.638)	Data Time 0.002 (0.021)	Loss 2.2587 (2.2069)	Entropy 0.74694 (0.75041)	Top-1 acc 71.094 (71.441)	Top-5 acc 89.062 (88.446)	lr 0.00157
Train [101][2070/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.021)	Loss 2.1806 (2.2071)	Entropy 0.74694 (0.75039)	Top-1 acc 70.312 (71.435)	Top-5 acc 89.844 (88.441)	lr 0.00157
Train [101][2080/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.021)	Loss 2.0556 (2.2070)	Entropy 0.74674 (0.75038)	Top-1 acc 75.391 (71.440)	Top-5 acc 92.188 (88.438)	lr 0.00157
Train [101][2090/3239]	Time 0.244 (0.636)	Data Time 0.001 (0.020)	Loss 1.9932 (2.2069)	Entropy 0.74665 (0.75036)	Top-1 acc 76.172 (71.448)	Top-5 acc 92.188 (88.442)	lr 0.00157
Train [101][2100/3239]	Time 2.546 (0.635)	Data Time 0.001 (0.020)	Loss 2.1238 (2.2068)	Entropy 0.74665 (0.75034)	Top-1 acc 73.828 (71.451)	Top-5 acc 89.844 (88.442)	lr 0.00157
Train [101][2110/3239]	Time 0.240 (0.633)	Data Time 0.001 (0.020)	Loss 2.1681 (2.2066)	Entropy 0.74661 (0.75032)	Top-1 acc 71.094 (71.453)	Top-5 acc 88.281 (88.448)	lr 0.00157
Train [101][2120/3239]	Time 0.227 (0.632)	Data Time 0.001 (0.020)	Loss 2.0690 (2.2066)	Entropy 0.74661 (0.75031)	Top-1 acc 76.172 (71.449)	Top-5 acc 90.234 (88.450)	lr 0.00157
Train [101][2130/3239]	Time 0.239 (0.632)	Data Time 0.001 (0.020)	Loss 2.2187 (2.2066)	Entropy 0.74652 (0.75029)	Top-1 acc 71.875 (71.452)	Top-5 acc 90.234 (88.453)	lr 0.00157
Train [101][2140/3239]	Time 0.229 (0.631)	Data Time 0.001 (0.020)	Loss 2.2106 (2.2066)	Entropy 0.74636 (0.75027)	Top-1 acc 70.703 (71.453)	Top-5 acc 89.453 (88.454)	lr 0.00157
Train [101][2150/3239]	Time 0.228 (0.630)	Data Time 0.001 (0.020)	Loss 2.3283 (2.2065)	Entropy 0.74628 (0.75025)	Top-1 acc 71.094 (71.458)	Top-5 acc 86.719 (88.454)	lr 0.00157
Train [101][2160/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.020)	Loss 2.2567 (2.2064)	Entropy 0.74658 (0.75023)	Top-1 acc 68.750 (71.460)	Top-5 acc 87.891 (88.454)	lr 0.00157
Train [101][2170/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.020)	Loss 2.2226 (2.2063)	Entropy 0.74661 (0.75022)	Top-1 acc 71.875 (71.463)	Top-5 acc 85.938 (88.454)	lr 0.00157
Train [101][2180/3239]	Time 0.244 (0.628)	Data Time 0.001 (0.020)	Loss 2.2560 (2.2064)	Entropy 0.74659 (0.75020)	Top-1 acc 69.922 (71.460)	Top-5 acc 87.500 (88.451)	lr 0.00157
Train [101][2190/3239]	Time 0.349 (0.652)	Data Time 0.002 (0.020)	Loss 2.1060 (2.2064)	Entropy 0.74653 (0.75018)	Top-1 acc 74.219 (71.459)	Top-5 acc 90.625 (88.455)	lr 0.00157
Train [101][2200/3239]	Time 0.217 (0.651)	Data Time 0.002 (0.019)	Loss 2.3134 (2.2065)	Entropy 0.74655 (0.75017)	Top-1 acc 68.750 (71.457)	Top-5 acc 87.500 (88.453)	lr 0.00157
Train [101][2210/3239]	Time 2.542 (0.650)	Data Time 0.002 (0.019)	Loss 2.2764 (2.2066)	Entropy 0.74655 (0.75015)	Top-1 acc 68.750 (71.456)	Top-5 acc 89.062 (88.454)	lr 0.00156
Train [101][2220/3239]	Time 0.268 (0.649)	Data Time 0.001 (0.019)	Loss 2.2682 (2.2065)	Entropy 0.74652 (0.75013)	Top-1 acc 69.531 (71.456)	Top-5 acc 85.938 (88.456)	lr 0.00156
Train [101][2230/3239]	Time 0.260 (0.648)	Data Time 0.002 (0.019)	Loss 2.1760 (2.2064)	Entropy 0.74647 (0.75012)	Top-1 acc 71.875 (71.458)	Top-5 acc 90.234 (88.459)	lr 0.00156
Train [101][2240/3239]	Time 0.340 (0.647)	Data Time 0.001 (0.019)	Loss 2.1397 (2.2062)	Entropy 0.74638 (0.75010)	Top-1 acc 73.047 (71.465)	Top-5 acc 88.281 (88.462)	lr 0.00156
Train [101][2250/3239]	Time 0.225 (0.646)	Data Time 0.001 (0.019)	Loss 2.1168 (2.2060)	Entropy 0.74636 (0.75008)	Top-1 acc 75.000 (71.472)	Top-5 acc 91.406 (88.467)	lr 0.00156
Train [101][2260/3239]	Time 0.234 (0.646)	Data Time 0.002 (0.019)	Loss 2.2571 (2.2060)	Entropy 0.74634 (0.75007)	Top-1 acc 71.094 (71.474)	Top-5 acc 87.500 (88.468)	lr 0.00156
Train [101][2270/3239]	Time 0.234 (0.645)	Data Time 0.001 (0.019)	Loss 2.2844 (2.2060)	Entropy 0.74636 (0.75005)	Top-1 acc 70.312 (71.476)	Top-5 acc 88.281 (88.468)	lr 0.00156
Train [101][2280/3239]	Time 0.226 (0.644)	Data Time 0.001 (0.019)	Loss 2.1600 (2.2062)	Entropy 0.74630 (0.75004)	Top-1 acc 75.000 (71.479)	Top-5 acc 90.234 (88.466)	lr 0.00156
Train [101][2290/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.019)	Loss 2.2040 (2.2060)	Entropy 0.74630 (0.75002)	Top-1 acc 69.141 (71.484)	Top-5 acc 90.234 (88.471)	lr 0.00156
Train [101][2300/3239]	Time 0.229 (0.643)	Data Time 0.001 (0.019)	Loss 2.3082 (2.2060)	Entropy 0.74704 (0.75000)	Top-1 acc 69.141 (71.479)	Top-5 acc 88.672 (88.473)	lr 0.00156
Train [101][2310/3239]	Time 0.230 (0.642)	Data Time 0.001 (0.019)	Loss 2.1821 (2.2060)	Entropy 0.74703 (0.74999)	Top-1 acc 69.922 (71.477)	Top-5 acc 88.672 (88.475)	lr 0.00156
Train [101][2320/3239]	Time 2.618 (0.641)	Data Time 0.001 (0.019)	Loss 2.2284 (2.2062)	Entropy 0.74703 (0.74998)	Top-1 acc 71.094 (71.471)	Top-5 acc 89.062 (88.470)	lr 0.00156
Train [101][2330/3239]	Time 0.285 (0.640)	Data Time 0.001 (0.018)	Loss 2.1682 (2.2062)	Entropy 0.74704 (0.74997)	Top-1 acc 71.875 (71.475)	Top-5 acc 89.062 (88.470)	lr 0.00156
Train [101][2340/3239]	Time 0.225 (0.639)	Data Time 0.001 (0.018)	Loss 2.1412 (2.2061)	Entropy 0.74691 (0.74995)	Top-1 acc 72.656 (71.478)	Top-5 acc 87.891 (88.470)	lr 0.00156
Train [101][2350/3239]	Time 0.223 (0.638)	Data Time 0.001 (0.018)	Loss 2.1223 (2.2060)	Entropy 0.74687 (0.74994)	Top-1 acc 73.438 (71.476)	Top-5 acc 89.453 (88.471)	lr 0.00156
Train [101][2360/3239]	Time 0.268 (0.638)	Data Time 0.001 (0.018)	Loss 2.0593 (2.2061)	Entropy 0.74679 (0.74993)	Top-1 acc 76.953 (71.477)	Top-5 acc 90.234 (88.469)	lr 0.00156
Train [101][2370/3239]	Time 0.254 (0.637)	Data Time 0.001 (0.018)	Loss 2.1334 (2.2062)	Entropy 0.74686 (0.74991)	Top-1 acc 69.141 (71.474)	Top-5 acc 90.625 (88.463)	lr 0.00156
Train [101][2380/3239]	Time 0.229 (0.636)	Data Time 0.001 (0.018)	Loss 2.1547 (2.2062)	Entropy 0.74685 (0.74990)	Top-1 acc 74.219 (71.474)	Top-5 acc 89.844 (88.463)	lr 0.00156
Train [101][2390/3239]	Time 0.229 (0.636)	Data Time 0.001 (0.018)	Loss 2.2004 (2.2064)	Entropy 0.74681 (0.74989)	Top-1 acc 72.656 (71.467)	Top-5 acc 88.672 (88.458)	lr 0.00156
Train [101][2400/3239]	Time 0.217 (0.635)	Data Time 0.001 (0.018)	Loss 2.0874 (2.2063)	Entropy 0.74685 (0.74987)	Top-1 acc 74.219 (71.473)	Top-5 acc 90.234 (88.459)	lr 0.00156
Train [101][2410/3239]	Time 0.334 (0.635)	Data Time 0.001 (0.018)	Loss 2.2732 (2.2064)	Entropy 0.74684 (0.74986)	Top-1 acc 67.969 (71.468)	Top-5 acc 86.719 (88.458)	lr 0.00156
Train [101][2420/3239]	Time 0.260 (0.634)	Data Time 0.001 (0.018)	Loss 2.1339 (2.2065)	Entropy 0.74681 (0.74985)	Top-1 acc 70.312 (71.459)	Top-5 acc 89.453 (88.451)	lr 0.00155
Train [101][2430/3239]	Time 2.561 (0.633)	Data Time 0.001 (0.018)	Loss 2.1754 (2.2066)	Entropy 0.74681 (0.74984)	Top-1 acc 70.703 (71.451)	Top-5 acc 88.672 (88.450)	lr 0.00155
Train [101][2440/3239]	Time 0.241 (0.632)	Data Time 0.001 (0.018)	Loss 2.2608 (2.2066)	Entropy 0.74674 (0.74982)	Top-1 acc 71.484 (71.452)	Top-5 acc 86.328 (88.453)	lr 0.00155
Train [101][2450/3239]	Time 0.249 (0.631)	Data Time 0.001 (0.018)	Loss 2.1671 (2.2066)	Entropy 0.74673 (0.74981)	Top-1 acc 71.094 (71.449)	Top-5 acc 88.672 (88.451)	lr 0.00155
Train [101][2460/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.018)	Loss 2.2560 (2.2066)	Entropy 0.74671 (0.74980)	Top-1 acc 70.312 (71.451)	Top-5 acc 87.891 (88.450)	lr 0.00155
Train [101][2470/3239]	Time 0.242 (0.630)	Data Time 0.001 (0.018)	Loss 2.2381 (2.2066)	Entropy 0.74676 (0.74979)	Top-1 acc 73.438 (71.453)	Top-5 acc 85.938 (88.451)	lr 0.00155
Train [101][2480/3239]	Time 0.225 (0.629)	Data Time 0.001 (0.017)	Loss 2.1412 (2.2065)	Entropy 0.74673 (0.74977)	Top-1 acc 73.828 (71.454)	Top-5 acc 89.062 (88.450)	lr 0.00155
Train [101][2490/3239]	Time 0.248 (0.629)	Data Time 0.001 (0.017)	Loss 2.1069 (2.2065)	Entropy 0.74671 (0.74976)	Top-1 acc 75.781 (71.459)	Top-5 acc 90.234 (88.452)	lr 0.00155
Train [101][2500/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.017)	Loss 2.0089 (2.2063)	Entropy 0.74669 (0.74975)	Top-1 acc 76.172 (71.467)	Top-5 acc 90.234 (88.456)	lr 0.00155
Train [101][2510/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.017)	Loss 2.2279 (2.2063)	Entropy 0.74667 (0.74974)	Top-1 acc 67.578 (71.464)	Top-5 acc 86.719 (88.455)	lr 0.00155
Train [101][2520/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.017)	Loss 2.0141 (2.2062)	Entropy 0.74663 (0.74973)	Top-1 acc 76.562 (71.471)	Top-5 acc 91.797 (88.456)	lr 0.00155
Train [101][2530/3239]	Time 0.251 (0.626)	Data Time 0.001 (0.017)	Loss 2.1366 (2.2060)	Entropy 0.74669 (0.74971)	Top-1 acc 70.312 (71.477)	Top-5 acc 89.062 (88.462)	lr 0.00155
Train [101][2540/3239]	Time 2.740 (0.626)	Data Time 0.003 (0.017)	Loss 2.2492 (2.2060)	Entropy 0.74669 (0.74970)	Top-1 acc 71.094 (71.479)	Top-5 acc 87.109 (88.463)	lr 0.00155
Train [101][2550/3239]	Time 0.253 (0.624)	Data Time 0.001 (0.017)	Loss 2.0777 (2.2062)	Entropy 0.74673 (0.74969)	Top-1 acc 72.266 (71.476)	Top-5 acc 90.625 (88.459)	lr 0.00155
Train [101][2560/3239]	Time 0.434 (0.644)	Data Time 0.003 (0.017)	Loss 2.2957 (2.2062)	Entropy 0.74672 (0.74968)	Top-1 acc 70.312 (71.474)	Top-5 acc 87.109 (88.458)	lr 0.00155
Train [101][2570/3239]	Time 0.230 (0.643)	Data Time 0.002 (0.017)	Loss 2.1856 (2.2062)	Entropy 0.74665 (0.74967)	Top-1 acc 72.656 (71.472)	Top-5 acc 88.281 (88.455)	lr 0.00155
Train [101][2580/3239]	Time 0.344 (0.643)	Data Time 0.002 (0.017)	Loss 2.2493 (2.2062)	Entropy 0.74663 (0.74965)	Top-1 acc 69.922 (71.472)	Top-5 acc 85.547 (88.454)	lr 0.00155
Train [101][2590/3239]	Time 0.227 (0.642)	Data Time 0.001 (0.017)	Loss 2.2239 (2.2062)	Entropy 0.74667 (0.74964)	Top-1 acc 74.219 (71.471)	Top-5 acc 87.500 (88.454)	lr 0.00155
Train [101][2600/3239]	Time 0.231 (0.642)	Data Time 0.002 (0.017)	Loss 2.3328 (2.2063)	Entropy 0.74662 (0.74963)	Top-1 acc 71.484 (71.468)	Top-5 acc 86.719 (88.454)	lr 0.00155
Train [101][2610/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.017)	Loss 2.2492 (2.2062)	Entropy 0.74660 (0.74962)	Top-1 acc 70.703 (71.469)	Top-5 acc 88.281 (88.455)	lr 0.00155
Train [101][2620/3239]	Time 0.231 (0.640)	Data Time 0.001 (0.017)	Loss 2.2143 (2.2062)	Entropy 0.74660 (0.74961)	Top-1 acc 70.312 (71.469)	Top-5 acc 89.453 (88.457)	lr 0.00154
Train [101][2630/3239]	Time 0.243 (0.640)	Data Time 0.001 (0.017)	Loss 2.1674 (2.2062)	Entropy 0.74658 (0.74960)	Top-1 acc 71.875 (71.468)	Top-5 acc 89.453 (88.457)	lr 0.00154
Train [101][2640/3239]	Time 0.229 (0.639)	Data Time 0.001 (0.016)	Loss 2.3171 (2.2063)	Entropy 0.74660 (0.74959)	Top-1 acc 68.359 (71.462)	Top-5 acc 85.938 (88.455)	lr 0.00154
Train [101][2650/3239]	Time 0.222 (0.639)	Data Time 0.001 (0.016)	Loss 2.2834 (2.2064)	Entropy 0.74657 (0.74957)	Top-1 acc 66.797 (71.460)	Top-5 acc 87.109 (88.453)	lr 0.00154
Train [101][2660/3239]	Time 0.267 (0.638)	Data Time 0.001 (0.016)	Loss 2.1445 (2.2064)	Entropy 0.74648 (0.74956)	Top-1 acc 74.609 (71.462)	Top-5 acc 89.062 (88.451)	lr 0.00154
Train [101][2670/3239]	Time 0.252 (0.637)	Data Time 0.001 (0.016)	Loss 2.0956 (2.2064)	Entropy 0.74643 (0.74955)	Top-1 acc 74.219 (71.464)	Top-5 acc 90.234 (88.453)	lr 0.00154
Train [101][2680/3239]	Time 0.229 (0.637)	Data Time 0.001 (0.016)	Loss 2.2633 (2.2065)	Entropy 0.74647 (0.74954)	Top-1 acc 67.969 (71.462)	Top-5 acc 87.891 (88.451)	lr 0.00154
Train [101][2690/3239]	Time 0.212 (0.636)	Data Time 0.001 (0.016)	Loss 2.2696 (2.2065)	Entropy 0.74647 (0.74953)	Top-1 acc 72.656 (71.464)	Top-5 acc 84.766 (88.450)	lr 0.00154
Train [101][2700/3239]	Time 0.254 (0.636)	Data Time 0.001 (0.016)	Loss 2.2304 (2.2064)	Entropy 0.74646 (0.74952)	Top-1 acc 68.750 (71.461)	Top-5 acc 86.328 (88.451)	lr 0.00154
Train [101][2710/3239]	Time 0.228 (0.635)	Data Time 0.001 (0.016)	Loss 2.0987 (2.2064)	Entropy 0.74639 (0.74951)	Top-1 acc 75.000 (71.461)	Top-5 acc 91.016 (88.449)	lr 0.00154
Train [101][2720/3239]	Time 0.273 (0.634)	Data Time 0.001 (0.016)	Loss 2.3123 (2.2066)	Entropy 0.74640 (0.74949)	Top-1 acc 69.141 (71.458)	Top-5 acc 84.375 (88.444)	lr 0.00154
Train [101][2730/3239]	Time 0.256 (0.634)	Data Time 0.001 (0.016)	Loss 2.4044 (2.2068)	Entropy 0.74636 (0.74948)	Top-1 acc 64.844 (71.449)	Top-5 acc 84.766 (88.441)	lr 0.00154
Train [101][2740/3239]	Time 0.279 (0.633)	Data Time 0.001 (0.016)	Loss 2.3723 (2.2067)	Entropy 0.74628 (0.74947)	Top-1 acc 66.406 (71.455)	Top-5 acc 86.328 (88.444)	lr 0.00154
Train [101][2750/3239]	Time 0.235 (0.633)	Data Time 0.001 (0.016)	Loss 2.1818 (2.2066)	Entropy 0.74631 (0.74946)	Top-1 acc 73.047 (71.460)	Top-5 acc 87.109 (88.445)	lr 0.00154
Train [101][2760/3239]	Time 0.226 (0.632)	Data Time 0.001 (0.016)	Loss 2.3305 (2.2066)	Entropy 0.74626 (0.74945)	Top-1 acc 69.141 (71.457)	Top-5 acc 86.328 (88.444)	lr 0.00154
Train [101][2770/3239]	Time 0.293 (0.632)	Data Time 0.001 (0.016)	Loss 2.2623 (2.2066)	Entropy 0.74623 (0.74944)	Top-1 acc 69.531 (71.456)	Top-5 acc 86.328 (88.444)	lr 0.00154
Train [101][2780/3239]	Time 0.223 (0.631)	Data Time 0.001 (0.016)	Loss 2.0808 (2.2064)	Entropy 0.74617 (0.74943)	Top-1 acc 74.609 (71.461)	Top-5 acc 90.625 (88.447)	lr 0.00154
Train [101][2790/3239]	Time 0.244 (0.631)	Data Time 0.002 (0.016)	Loss 2.2976 (2.2064)	Entropy 0.74617 (0.74941)	Top-1 acc 71.875 (71.459)	Top-5 acc 88.281 (88.448)	lr 0.00154
Train [101][2800/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.016)	Loss 2.3662 (2.2064)	Entropy 0.74614 (0.74940)	Top-1 acc 64.453 (71.456)	Top-5 acc 86.719 (88.449)	lr 0.00154
Train [101][2810/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.016)	Loss 2.1035 (2.2064)	Entropy 0.74611 (0.74939)	Top-1 acc 74.219 (71.457)	Top-5 acc 92.188 (88.446)	lr 0.00154
Train [101][2820/3239]	Time 0.255 (0.629)	Data Time 0.002 (0.016)	Loss 2.4029 (2.2066)	Entropy 0.74611 (0.74938)	Top-1 acc 64.062 (71.452)	Top-5 acc 83.984 (88.442)	lr 0.00154
Train [101][2830/3239]	Time 0.326 (0.628)	Data Time 0.001 (0.015)	Loss 2.2317 (2.2067)	Entropy 0.74606 (0.74937)	Top-1 acc 71.094 (71.449)	Top-5 acc 87.891 (88.439)	lr 0.00153
Train [101][2840/3239]	Time 0.266 (0.628)	Data Time 0.001 (0.015)	Loss 2.2074 (2.2068)	Entropy 0.74596 (0.74936)	Top-1 acc 71.875 (71.447)	Top-5 acc 89.062 (88.438)	lr 0.00153
Train [101][2850/3239]	Time 0.268 (0.627)	Data Time 0.001 (0.015)	Loss 2.2538 (2.2069)	Entropy 0.74540 (0.74934)	Top-1 acc 67.578 (71.445)	Top-5 acc 88.281 (88.439)	lr 0.00153
Train [101][2860/3239]	Time 0.247 (0.627)	Data Time 0.001 (0.015)	Loss 2.2757 (2.2069)	Entropy 0.74540 (0.74933)	Top-1 acc 68.359 (71.441)	Top-5 acc 87.891 (88.436)	lr 0.00153
Train [101][2870/3239]	Time 0.320 (0.626)	Data Time 0.001 (0.015)	Loss 2.1049 (2.2069)	Entropy 0.74533 (0.74932)	Top-1 acc 74.219 (71.441)	Top-5 acc 91.406 (88.439)	lr 0.00153
Train [101][2880/3239]	Time 0.254 (0.626)	Data Time 0.002 (0.015)	Loss 2.2570 (2.2067)	Entropy 0.74526 (0.74930)	Top-1 acc 70.703 (71.446)	Top-5 acc 88.281 (88.441)	lr 0.00153
Train [101][2890/3239]	Time 0.289 (0.643)	Data Time 0.004 (0.015)	Loss 2.0282 (2.2067)	Entropy 0.74527 (0.74929)	Top-1 acc 76.562 (71.450)	Top-5 acc 91.406 (88.442)	lr 0.00153
Train [101][2900/3239]	Time 0.253 (0.643)	Data Time 0.002 (0.015)	Loss 2.0955 (2.2066)	Entropy 0.74526 (0.74927)	Top-1 acc 73.438 (71.453)	Top-5 acc 91.016 (88.446)	lr 0.00153
Train [101][2910/3239]	Time 0.353 (0.643)	Data Time 0.003 (0.015)	Loss 2.1593 (2.2067)	Entropy 0.74516 (0.74926)	Top-1 acc 74.609 (71.449)	Top-5 acc 88.281 (88.443)	lr 0.00153
Train [101][2920/3239]	Time 0.223 (0.642)	Data Time 0.001 (0.015)	Loss 2.2810 (2.2067)	Entropy 0.74512 (0.74925)	Top-1 acc 69.922 (71.448)	Top-5 acc 86.719 (88.444)	lr 0.00153
Train [101][2930/3239]	Time 0.276 (0.641)	Data Time 0.001 (0.015)	Loss 2.1349 (2.2066)	Entropy 0.74507 (0.74923)	Top-1 acc 73.828 (71.452)	Top-5 acc 91.016 (88.446)	lr 0.00153
Train [101][2940/3239]	Time 0.240 (0.641)	Data Time 0.001 (0.015)	Loss 2.1275 (2.2064)	Entropy 0.74493 (0.74922)	Top-1 acc 74.219 (71.456)	Top-5 acc 89.453 (88.449)	lr 0.00153
Train [101][2950/3239]	Time 0.327 (0.640)	Data Time 0.001 (0.015)	Loss 2.3662 (2.2064)	Entropy 0.74491 (0.74920)	Top-1 acc 66.406 (71.457)	Top-5 acc 85.156 (88.448)	lr 0.00153
Train [101][2960/3239]	Time 0.303 (0.640)	Data Time 0.001 (0.015)	Loss 2.2511 (2.2063)	Entropy 0.74494 (0.74919)	Top-1 acc 71.094 (71.457)	Top-5 acc 86.719 (88.449)	lr 0.00153
Train [101][2970/3239]	Time 0.235 (0.639)	Data Time 0.001 (0.015)	Loss 2.2465 (2.2063)	Entropy 0.74494 (0.74918)	Top-1 acc 68.359 (71.455)	Top-5 acc 87.109 (88.448)	lr 0.00153
Train [101][2980/3239]	Time 0.267 (0.639)	Data Time 0.001 (0.015)	Loss 2.0561 (2.2065)	Entropy 0.74491 (0.74916)	Top-1 acc 75.391 (71.455)	Top-5 acc 89.453 (88.445)	lr 0.00153
Train [101][2990/3239]	Time 0.268 (0.638)	Data Time 0.002 (0.015)	Loss 2.1818 (2.2065)	Entropy 0.74490 (0.74915)	Top-1 acc 70.703 (71.453)	Top-5 acc 91.016 (88.448)	lr 0.00153
Train [101][3000/3239]	Time 0.236 (0.638)	Data Time 0.001 (0.015)	Loss 2.1942 (2.2064)	Entropy 0.74488 (0.74913)	Top-1 acc 71.094 (71.454)	Top-5 acc 90.625 (88.448)	lr 0.00153
Train [101][3010/3239]	Time 0.249 (0.637)	Data Time 0.001 (0.015)	Loss 2.2398 (2.2064)	Entropy 0.74487 (0.74912)	Top-1 acc 69.922 (71.453)	Top-5 acc 89.062 (88.449)	lr 0.00153
Train [101][3020/3239]	Time 0.242 (0.637)	Data Time 0.001 (0.015)	Loss 2.4470 (2.2066)	Entropy 0.74426 (0.74910)	Top-1 acc 65.625 (71.446)	Top-5 acc 83.203 (88.443)	lr 0.00153
Train [101][3030/3239]	Time 0.259 (0.636)	Data Time 0.001 (0.015)	Loss 2.3111 (2.2069)	Entropy 0.74427 (0.74909)	Top-1 acc 69.531 (71.437)	Top-5 acc 87.500 (88.438)	lr 0.00152
Train [101][3040/3239]	Time 0.250 (0.636)	Data Time 0.002 (0.015)	Loss 2.1737 (2.2071)	Entropy 0.74421 (0.74907)	Top-1 acc 70.312 (71.430)	Top-5 acc 91.797 (88.437)	lr 0.00152
Train [101][3050/3239]	Time 0.224 (0.635)	Data Time 0.001 (0.014)	Loss 2.1020 (2.2070)	Entropy 0.74418 (0.74906)	Top-1 acc 76.172 (71.435)	Top-5 acc 89.062 (88.437)	lr 0.00152
Train [101][3060/3239]	Time 0.251 (0.635)	Data Time 0.004 (0.014)	Loss 2.2740 (2.2071)	Entropy 0.74412 (0.74904)	Top-1 acc 69.922 (71.436)	Top-5 acc 85.156 (88.434)	lr 0.00152
Train [101][3070/3239]	Time 0.244 (0.634)	Data Time 0.002 (0.014)	Loss 2.2776 (2.2069)	Entropy 0.74412 (0.74902)	Top-1 acc 67.578 (71.438)	Top-5 acc 86.719 (88.437)	lr 0.00152
Train [101][3080/3239]	Time 0.237 (0.634)	Data Time 0.001 (0.014)	Loss 2.0947 (2.2068)	Entropy 0.74412 (0.74901)	Top-1 acc 74.219 (71.438)	Top-5 acc 92.578 (88.438)	lr 0.00152
Train [101][3090/3239]	Time 0.251 (0.633)	Data Time 0.001 (0.014)	Loss 2.1549 (2.2067)	Entropy 0.74409 (0.74899)	Top-1 acc 73.047 (71.441)	Top-5 acc 89.453 (88.440)	lr 0.00152
Train [101][3100/3239]	Time 0.242 (0.633)	Data Time 0.001 (0.014)	Loss 2.4003 (2.2069)	Entropy 0.74405 (0.74898)	Top-1 acc 64.062 (71.438)	Top-5 acc 86.328 (88.435)	lr 0.00152
Train [101][3110/3239]	Time 0.252 (0.632)	Data Time 0.001 (0.014)	Loss 2.1215 (2.2070)	Entropy 0.74397 (0.74896)	Top-1 acc 73.438 (71.434)	Top-5 acc 89.844 (88.435)	lr 0.00152
Train [101][3120/3239]	Time 0.319 (0.632)	Data Time 0.001 (0.014)	Loss 2.2853 (2.2070)	Entropy 0.74394 (0.74894)	Top-1 acc 70.312 (71.434)	Top-5 acc 86.719 (88.436)	lr 0.00152
Train [101][3130/3239]	Time 0.257 (0.631)	Data Time 0.001 (0.014)	Loss 2.2448 (2.2071)	Entropy 0.74341 (0.74893)	Top-1 acc 71.484 (71.432)	Top-5 acc 84.766 (88.434)	lr 0.00152
Train [101][3140/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.014)	Loss 2.3140 (2.2073)	Entropy 0.74332 (0.74891)	Top-1 acc 69.531 (71.430)	Top-5 acc 85.938 (88.430)	lr 0.00152
Train [101][3150/3239]	Time 0.264 (0.630)	Data Time 0.001 (0.014)	Loss 2.1756 (2.2073)	Entropy 0.74331 (0.74889)	Top-1 acc 74.219 (71.430)	Top-5 acc 87.891 (88.428)	lr 0.00152
Train [101][3160/3239]	Time 0.356 (0.630)	Data Time 0.001 (0.014)	Loss 2.1817 (2.2075)	Entropy 0.74333 (0.74888)	Top-1 acc 71.875 (71.425)	Top-5 acc 89.844 (88.424)	lr 0.00152
Train [101][3170/3239]	Time 0.229 (0.629)	Data Time 0.002 (0.014)	Loss 2.2287 (2.2077)	Entropy 0.74311 (0.74886)	Top-1 acc 71.875 (71.424)	Top-5 acc 89.844 (88.420)	lr 0.00152
Train [101][3180/3239]	Time 0.234 (0.629)	Data Time 0.000 (0.014)	Loss 2.2567 (2.2077)	Entropy 0.74304 (0.74884)	Top-1 acc 68.359 (71.419)	Top-5 acc 86.719 (88.418)	lr 0.00152
Train [101][3190/3239]	Time 0.218 (0.628)	Data Time 0.000 (0.014)	Loss 2.2436 (2.2076)	Entropy 0.74300 (0.74882)	Top-1 acc 69.141 (71.423)	Top-5 acc 85.547 (88.418)	lr 0.00152
Train [101][3200/3239]	Time 0.266 (0.628)	Data Time 0.000 (0.014)	Loss 2.2602 (2.2076)	Entropy 0.74295 (0.74880)	Top-1 acc 69.531 (71.425)	Top-5 acc 88.281 (88.418)	lr 0.00152
Train [101][3210/3239]	Time 0.228 (0.627)	Data Time 0.000 (0.014)	Loss 2.2198 (2.2076)	Entropy 0.74292 (0.74879)	Top-1 acc 71.484 (71.422)	Top-5 acc 87.891 (88.418)	lr 0.00152
Train [101][3220/3239]	Time 0.293 (0.642)	Data Time 0.000 (0.014)	Loss 2.1875 (2.2077)	Entropy 0.74290 (0.74877)	Top-1 acc 67.969 (71.421)	Top-5 acc 89.844 (88.416)	lr 0.00152
Train [101][3230/3239]	Time 0.234 (0.642)	Data Time 0.000 (0.014)	Loss 2.1853 (2.2078)	Entropy 0.74278 (0.74875)	Top-1 acc 72.656 (71.422)	Top-5 acc 88.672 (88.415)	lr 0.00152
Train [101][3239/3239]	Time 2.358 (0.642)	Data Time 0.000 (0.014)	Loss 2.3157 (2.2078)	Entropy 0.74278 (0.74873)	Top-1 acc 74.074 (71.422)	Top-5 acc 90.123 (88.416)	lr 0.00151
==========Valid [101/120]	loss 1.220	top-1 acc 72.172 (72.172)	top-5 acc 89.555	Train top-1 71.422	top-5 88.416	Entropy 0.74278	Latency-None: 0.000ms	Flops: 546.53M
Train [102][0/3239]	Time 41.234 (41.234)	Data Time 40.093 (40.093)	Loss 2.2099 (2.2099)	Entropy 0.74271 (0.74271)	Top-1 acc 68.750 (68.750)	Top-5 acc 88.672 (88.672)	lr 0.00151
Train [102][10/3239]	Time 2.625 (4.295)	Data Time 0.001 (3.647)	Loss 2.1793 (2.1822)	Entropy 0.74271 (0.74271)	Top-1 acc 73.047 (72.017)	Top-5 acc 91.797 (89.205)	lr 0.00151
Train [102][20/3239]	Time 0.238 (2.363)	Data Time 0.001 (1.911)	Loss 2.1709 (2.2042)	Entropy 0.74265 (0.74268)	Top-1 acc 73.047 (72.005)	Top-5 acc 88.281 (88.318)	lr 0.00151
Train [102][30/3239]	Time 0.239 (1.759)	Data Time 0.002 (1.295)	Loss 2.2140 (2.2067)	Entropy 0.74266 (0.74267)	Top-1 acc 70.703 (71.888)	Top-5 acc 87.500 (88.344)	lr 0.00151
Train [102][40/3239]	Time 0.288 (1.455)	Data Time 0.001 (0.980)	Loss 2.1937 (2.1965)	Entropy 0.74258 (0.74265)	Top-1 acc 74.219 (71.999)	Top-5 acc 86.719 (88.643)	lr 0.00151
Train [102][50/3239]	Time 0.320 (1.265)	Data Time 0.001 (0.788)	Loss 2.0597 (2.1912)	Entropy 0.74260 (0.74264)	Top-1 acc 81.250 (72.143)	Top-5 acc 90.234 (88.810)	lr 0.00151
Train [102][60/3239]	Time 0.238 (1.135)	Data Time 0.001 (0.659)	Loss 2.2199 (2.1921)	Entropy 0.74250 (0.74263)	Top-1 acc 73.438 (71.971)	Top-5 acc 89.453 (88.723)	lr 0.00151
Train [102][70/3239]	Time 0.239 (1.044)	Data Time 0.001 (0.566)	Loss 2.0397 (2.1952)	Entropy 0.74242 (0.74260)	Top-1 acc 74.219 (71.820)	Top-5 acc 92.578 (88.628)	lr 0.00151
Train [102][80/3239]	Time 0.245 (0.975)	Data Time 0.006 (0.497)	Loss 2.2040 (2.1961)	Entropy 0.74238 (0.74258)	Top-1 acc 73.047 (71.783)	Top-5 acc 88.281 (88.643)	lr 0.00151
Train [102][90/3239]	Time 0.332 (0.923)	Data Time 0.002 (0.442)	Loss 2.2792 (2.1960)	Entropy 0.74242 (0.74256)	Top-1 acc 70.312 (71.699)	Top-5 acc 86.719 (88.646)	lr 0.00151
Train [102][100/3239]	Time 0.238 (0.880)	Data Time 0.001 (0.399)	Loss 2.0974 (2.1962)	Entropy 0.74237 (0.74254)	Top-1 acc 77.734 (71.732)	Top-5 acc 89.062 (88.649)	lr 0.00151
Train [102][110/3239]	Time 0.232 (0.844)	Data Time 0.001 (0.363)	Loss 2.1799 (2.1944)	Entropy 0.74229 (0.74253)	Top-1 acc 72.266 (71.843)	Top-5 acc 87.109 (88.626)	lr 0.00151
Train [102][120/3239]	Time 2.458 (0.812)	Data Time 0.001 (0.333)	Loss 2.2478 (2.1963)	Entropy 0.74229 (0.74251)	Top-1 acc 70.312 (71.833)	Top-5 acc 86.719 (88.588)	lr 0.00151
Train [102][130/3239]	Time 0.242 (0.770)	Data Time 0.001 (0.308)	Loss 2.2624 (2.1941)	Entropy 0.74221 (0.74248)	Top-1 acc 71.094 (71.911)	Top-5 acc 84.766 (88.639)	lr 0.00151
Train [102][140/3239]	Time 0.234 (0.750)	Data Time 0.001 (0.286)	Loss 2.1853 (2.1960)	Entropy 0.74219 (0.74246)	Top-1 acc 69.531 (71.903)	Top-5 acc 88.672 (88.542)	lr 0.00151
Train [102][150/3239]	Time 0.228 (0.733)	Data Time 0.001 (0.267)	Loss 2.2031 (2.1995)	Entropy 0.74222 (0.74245)	Top-1 acc 69.922 (71.839)	Top-5 acc 90.234 (88.511)	lr 0.00151
Train [102][160/3239]	Time 0.224 (0.717)	Data Time 0.001 (0.251)	Loss 2.1506 (2.2021)	Entropy 0.74220 (0.74243)	Top-1 acc 72.266 (71.749)	Top-5 acc 88.281 (88.451)	lr 0.00151
Train [102][170/3239]	Time 0.223 (0.703)	Data Time 0.001 (0.236)	Loss 2.3078 (2.1999)	Entropy 0.74219 (0.74242)	Top-1 acc 70.312 (71.772)	Top-5 acc 88.672 (88.551)	lr 0.00151
Train [102][180/3239]	Time 0.234 (0.692)	Data Time 0.002 (0.223)	Loss 2.1820 (2.1982)	Entropy 0.74216 (0.74240)	Top-1 acc 71.875 (71.853)	Top-5 acc 90.234 (88.560)	lr 0.00151
Train [102][190/3239]	Time 0.229 (0.681)	Data Time 0.001 (0.212)	Loss 2.2061 (2.1974)	Entropy 0.74214 (0.74239)	Top-1 acc 69.531 (71.877)	Top-5 acc 88.281 (88.553)	lr 0.00151
Train [102][200/3239]	Time 0.235 (0.671)	Data Time 0.001 (0.201)	Loss 2.1207 (2.1956)	Entropy 0.74221 (0.74238)	Top-1 acc 73.828 (71.920)	Top-5 acc 89.453 (88.577)	lr 0.00151
Train [102][210/3239]	Time 0.228 (0.662)	Data Time 0.001 (0.192)	Loss 2.1318 (2.1955)	Entropy 0.74208 (0.74237)	Top-1 acc 72.266 (71.916)	Top-5 acc 90.625 (88.620)	lr 0.00150
Train [102][220/3239]	Time 0.337 (0.655)	Data Time 0.002 (0.183)	Loss 2.2938 (2.1965)	Entropy 0.74206 (0.74236)	Top-1 acc 69.141 (71.870)	Top-5 acc 87.500 (88.605)	lr 0.00150
Train [102][230/3239]	Time 2.634 (0.647)	Data Time 0.001 (0.175)	Loss 2.1474 (2.1965)	Entropy 0.74206 (0.74234)	Top-1 acc 73.828 (71.860)	Top-5 acc 89.062 (88.599)	lr 0.00150
Train [102][240/3239]	Time 0.312 (0.632)	Data Time 0.002 (0.168)	Loss 2.1066 (2.1950)	Entropy 0.74201 (0.74233)	Top-1 acc 76.953 (71.904)	Top-5 acc 89.062 (88.618)	lr 0.00150
Train [102][250/3239]	Time 0.235 (0.626)	Data Time 0.001 (0.161)	Loss 2.2516 (2.1940)	Entropy 0.74201 (0.74232)	Top-1 acc 71.875 (71.947)	Top-5 acc 87.109 (88.619)	lr 0.00150
Train [102][260/3239]	Time 0.295 (0.621)	Data Time 0.002 (0.155)	Loss 2.0903 (2.1940)	Entropy 0.74199 (0.74231)	Top-1 acc 74.219 (71.917)	Top-5 acc 90.625 (88.636)	lr 0.00150
Train [102][270/3239]	Time 0.253 (0.616)	Data Time 0.001 (0.150)	Loss 2.1950 (2.1942)	Entropy 0.74199 (0.74229)	Top-1 acc 70.312 (71.897)	Top-5 acc 89.062 (88.629)	lr 0.00150
Train [102][280/3239]	Time 0.210 (0.611)	Data Time 0.001 (0.144)	Loss 2.2650 (2.1955)	Entropy 0.74198 (0.74228)	Top-1 acc 71.484 (71.886)	Top-5 acc 88.281 (88.626)	lr 0.00150
Train [102][290/3239]	Time 0.235 (0.607)	Data Time 0.001 (0.139)	Loss 2.1127 (2.1966)	Entropy 0.74206 (0.74227)	Top-1 acc 76.172 (71.858)	Top-5 acc 89.062 (88.613)	lr 0.00150
Train [102][300/3239]	Time 0.213 (0.602)	Data Time 0.001 (0.135)	Loss 2.3018 (2.1975)	Entropy 0.74201 (0.74227)	Top-1 acc 68.750 (71.839)	Top-5 acc 86.719 (88.576)	lr 0.00150
Train [102][310/3239]	Time 0.273 (0.599)	Data Time 0.001 (0.131)	Loss 2.3143 (2.1991)	Entropy 0.74190 (0.74226)	Top-1 acc 70.703 (71.787)	Top-5 acc 85.938 (88.532)	lr 0.00150
Train [102][320/3239]	Time 0.231 (0.595)	Data Time 0.001 (0.127)	Loss 2.1156 (2.1987)	Entropy 0.74182 (0.74224)	Top-1 acc 75.781 (71.800)	Top-5 acc 89.844 (88.544)	lr 0.00150
Train [102][330/3239]	Time 0.238 (0.592)	Data Time 0.001 (0.123)	Loss 2.1530 (2.1988)	Entropy 0.74179 (0.74223)	Top-1 acc 73.438 (71.775)	Top-5 acc 88.672 (88.531)	lr 0.00150
Train [102][340/3239]	Time 54.844 (0.742)	Data Time 0.001 (0.119)	Loss 2.1927 (2.1980)	Entropy 0.74179 (0.74222)	Top-1 acc 73.438 (71.791)	Top-5 acc 88.281 (88.562)	lr 0.00150
Train [102][350/3239]	Time 0.448 (0.731)	Data Time 0.003 (0.116)	Loss 2.2406 (2.1987)	Entropy 0.74170 (0.74220)	Top-1 acc 70.703 (71.779)	Top-5 acc 87.891 (88.549)	lr 0.00150
Train [102][360/3239]	Time 0.241 (0.726)	Data Time 0.002 (0.113)	Loss 2.1608 (2.1982)	Entropy 0.74160 (0.74219)	Top-1 acc 70.703 (71.784)	Top-5 acc 90.625 (88.554)	lr 0.00150
Train [102][370/3239]	Time 0.236 (0.719)	Data Time 0.001 (0.110)	Loss 2.1927 (2.1978)	Entropy 0.74157 (0.74217)	Top-1 acc 72.266 (71.814)	Top-5 acc 87.109 (88.555)	lr 0.00150
Train [102][380/3239]	Time 0.285 (0.714)	Data Time 0.002 (0.107)	Loss 2.1096 (2.1970)	Entropy 0.74159 (0.74216)	Top-1 acc 73.828 (71.852)	Top-5 acc 89.062 (88.554)	lr 0.00150
Train [102][390/3239]	Time 0.241 (0.708)	Data Time 0.001 (0.104)	Loss 2.2582 (2.1969)	Entropy 0.74158 (0.74214)	Top-1 acc 70.312 (71.843)	Top-5 acc 88.281 (88.558)	lr 0.00150
Train [102][400/3239]	Time 0.228 (0.702)	Data Time 0.001 (0.102)	Loss 2.1945 (2.1977)	Entropy 0.74157 (0.74213)	Top-1 acc 75.781 (71.846)	Top-5 acc 88.281 (88.529)	lr 0.00150
Train [102][410/3239]	Time 0.232 (0.697)	Data Time 0.001 (0.099)	Loss 2.1456 (2.1976)	Entropy 0.74157 (0.74211)	Top-1 acc 75.391 (71.846)	Top-5 acc 90.234 (88.539)	lr 0.00150
Train [102][420/3239]	Time 0.239 (0.691)	Data Time 0.001 (0.097)	Loss 2.2920 (2.1975)	Entropy 0.74152 (0.74210)	Top-1 acc 71.875 (71.847)	Top-5 acc 85.938 (88.536)	lr 0.00149
Train [102][430/3239]	Time 0.314 (0.687)	Data Time 0.001 (0.095)	Loss 2.2600 (2.1973)	Entropy 0.74150 (0.74209)	Top-1 acc 68.359 (71.832)	Top-5 acc 88.672 (88.543)	lr 0.00149
Train [102][440/3239]	Time 0.231 (0.682)	Data Time 0.001 (0.093)	Loss 2.1011 (2.1962)	Entropy 0.74154 (0.74207)	Top-1 acc 73.047 (71.855)	Top-5 acc 90.625 (88.569)	lr 0.00149
Train [102][450/3239]	Time 2.517 (0.677)	Data Time 0.001 (0.091)	Loss 2.1643 (2.1955)	Entropy 0.74154 (0.74206)	Top-1 acc 72.266 (71.873)	Top-5 acc 89.062 (88.587)	lr 0.00149
Train [102][460/3239]	Time 0.250 (0.668)	Data Time 0.001 (0.089)	Loss 2.1805 (2.1960)	Entropy 0.74150 (0.74205)	Top-1 acc 72.266 (71.844)	Top-5 acc 87.891 (88.592)	lr 0.00149
Train [102][470/3239]	Time 0.237 (0.663)	Data Time 0.001 (0.087)	Loss 2.1402 (2.1947)	Entropy 0.74125 (0.74203)	Top-1 acc 73.047 (71.873)	Top-5 acc 89.844 (88.614)	lr 0.00149
Train [102][480/3239]	Time 0.239 (0.660)	Data Time 0.001 (0.085)	Loss 2.2705 (2.1948)	Entropy 0.74124 (0.74202)	Top-1 acc 68.750 (71.857)	Top-5 acc 89.062 (88.622)	lr 0.00149
Train [102][490/3239]	Time 0.226 (0.656)	Data Time 0.001 (0.083)	Loss 2.2091 (2.1948)	Entropy 0.74126 (0.74200)	Top-1 acc 70.312 (71.842)	Top-5 acc 87.891 (88.623)	lr 0.00149
Train [102][500/3239]	Time 0.236 (0.652)	Data Time 0.002 (0.082)	Loss 2.1209 (2.1942)	Entropy 0.74124 (0.74199)	Top-1 acc 72.266 (71.838)	Top-5 acc 89.844 (88.631)	lr 0.00149
Train [102][510/3239]	Time 0.247 (0.649)	Data Time 0.001 (0.080)	Loss 2.0225 (2.1933)	Entropy 0.74115 (0.74197)	Top-1 acc 79.297 (71.871)	Top-5 acc 91.406 (88.644)	lr 0.00149
Train [102][520/3239]	Time 0.217 (0.646)	Data Time 0.001 (0.079)	Loss 2.1646 (2.1931)	Entropy 0.74115 (0.74196)	Top-1 acc 70.703 (71.863)	Top-5 acc 90.625 (88.653)	lr 0.00149
Train [102][530/3239]	Time 0.224 (0.643)	Data Time 0.001 (0.077)	Loss 2.1095 (2.1931)	Entropy 0.74117 (0.74194)	Top-1 acc 73.438 (71.874)	Top-5 acc 89.844 (88.648)	lr 0.00149
Train [102][540/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.076)	Loss 2.0882 (2.1922)	Entropy 0.74096 (0.74192)	Top-1 acc 76.562 (71.888)	Top-5 acc 91.406 (88.665)	lr 0.00149
Train [102][550/3239]	Time 0.224 (0.637)	Data Time 0.001 (0.074)	Loss 2.0096 (2.1923)	Entropy 0.74096 (0.74191)	Top-1 acc 76.953 (71.898)	Top-5 acc 92.578 (88.663)	lr 0.00149
Train [102][560/3239]	Time 2.589 (0.634)	Data Time 0.001 (0.073)	Loss 2.3096 (2.1929)	Entropy 0.74096 (0.74189)	Top-1 acc 68.750 (71.899)	Top-5 acc 86.719 (88.653)	lr 0.00149
Train [102][570/3239]	Time 0.234 (0.628)	Data Time 0.001 (0.072)	Loss 2.2071 (2.1934)	Entropy 0.74094 (0.74187)	Top-1 acc 69.141 (71.879)	Top-5 acc 89.062 (88.649)	lr 0.00149
Train [102][580/3239]	Time 0.217 (0.625)	Data Time 0.001 (0.071)	Loss 2.1397 (2.1929)	Entropy 0.74089 (0.74186)	Top-1 acc 72.656 (71.887)	Top-5 acc 91.016 (88.669)	lr 0.00149
Train [102][590/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.070)	Loss 2.3741 (2.1933)	Entropy 0.74085 (0.74184)	Top-1 acc 65.625 (71.878)	Top-5 acc 87.109 (88.661)	lr 0.00149
Train [102][600/3239]	Time 0.398 (0.621)	Data Time 0.001 (0.068)	Loss 2.0766 (2.1934)	Entropy 0.74083 (0.74182)	Top-1 acc 74.609 (71.852)	Top-5 acc 89.844 (88.659)	lr 0.00149
Train [102][610/3239]	Time 0.227 (0.618)	Data Time 0.001 (0.067)	Loss 2.1857 (2.1938)	Entropy 0.74079 (0.74181)	Top-1 acc 69.922 (71.846)	Top-5 acc 88.672 (88.648)	lr 0.00149
Train [102][620/3239]	Time 0.237 (0.616)	Data Time 0.001 (0.066)	Loss 2.2149 (2.1946)	Entropy 0.74076 (0.74179)	Top-1 acc 71.094 (71.822)	Top-5 acc 87.891 (88.642)	lr 0.00149
Train [102][630/3239]	Time 0.227 (0.614)	Data Time 0.002 (0.065)	Loss 2.1578 (2.1948)	Entropy 0.74073 (0.74177)	Top-1 acc 73.828 (71.816)	Top-5 acc 88.281 (88.639)	lr 0.00148
Train [102][640/3239]	Time 0.327 (0.612)	Data Time 0.001 (0.064)	Loss 2.2759 (2.1946)	Entropy 0.74071 (0.74176)	Top-1 acc 69.141 (71.815)	Top-5 acc 88.672 (88.646)	lr 0.00148
Train [102][650/3239]	Time 0.229 (0.610)	Data Time 0.001 (0.063)	Loss 2.1930 (2.1942)	Entropy 0.74076 (0.74174)	Top-1 acc 72.266 (71.823)	Top-5 acc 86.719 (88.658)	lr 0.00148
Train [102][660/3239]	Time 0.234 (0.608)	Data Time 0.001 (0.062)	Loss 2.2141 (2.1940)	Entropy 0.74068 (0.74173)	Top-1 acc 72.266 (71.832)	Top-5 acc 85.547 (88.654)	lr 0.00148
Train [102][670/3239]	Time 2.576 (0.606)	Data Time 0.001 (0.061)	Loss 2.1220 (2.1944)	Entropy 0.74068 (0.74171)	Top-1 acc 74.219 (71.829)	Top-5 acc 91.406 (88.648)	lr 0.00148
Train [102][680/3239]	Time 0.261 (0.601)	Data Time 0.002 (0.061)	Loss 2.3260 (2.1945)	Entropy 0.74061 (0.74169)	Top-1 acc 66.797 (71.826)	Top-5 acc 83.984 (88.655)	lr 0.00148
Train [102][690/3239]	Time 0.233 (0.599)	Data Time 0.001 (0.060)	Loss 2.2588 (2.1945)	Entropy 0.74052 (0.74168)	Top-1 acc 71.875 (71.833)	Top-5 acc 89.062 (88.657)	lr 0.00148
Train [102][700/3239]	Time 0.220 (0.597)	Data Time 0.001 (0.059)	Loss 2.3198 (2.1955)	Entropy 0.74060 (0.74166)	Top-1 acc 68.359 (71.809)	Top-5 acc 85.938 (88.643)	lr 0.00148
Train [102][710/3239]	Time 0.233 (0.671)	Data Time 0.002 (0.058)	Loss 2.2499 (2.1952)	Entropy 0.74054 (0.74165)	Top-1 acc 71.484 (71.795)	Top-5 acc 86.719 (88.655)	lr 0.00148
Train [102][720/3239]	Time 0.241 (0.669)	Data Time 0.002 (0.057)	Loss 2.0599 (2.1950)	Entropy 0.74053 (0.74163)	Top-1 acc 75.781 (71.813)	Top-5 acc 90.625 (88.667)	lr 0.00148
Train [102][730/3239]	Time 0.226 (0.666)	Data Time 0.001 (0.057)	Loss 2.0571 (2.1950)	Entropy 0.74056 (0.74162)	Top-1 acc 71.875 (71.798)	Top-5 acc 90.625 (88.672)	lr 0.00148
Train [102][740/3239]	Time 0.240 (0.664)	Data Time 0.001 (0.056)	Loss 2.1836 (2.1950)	Entropy 0.74055 (0.74160)	Top-1 acc 70.703 (71.796)	Top-5 acc 86.719 (88.670)	lr 0.00148
Train [102][750/3239]	Time 0.217 (0.661)	Data Time 0.001 (0.055)	Loss 2.1653 (2.1950)	Entropy 0.74051 (0.74159)	Top-1 acc 71.875 (71.801)	Top-5 acc 88.281 (88.667)	lr 0.00148
Train [102][760/3239]	Time 0.235 (0.659)	Data Time 0.001 (0.054)	Loss 2.1716 (2.1948)	Entropy 0.74050 (0.74157)	Top-1 acc 73.828 (71.807)	Top-5 acc 87.891 (88.665)	lr 0.00148
Train [102][770/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.054)	Loss 2.2087 (2.1950)	Entropy 0.74037 (0.74156)	Top-1 acc 71.094 (71.802)	Top-5 acc 88.672 (88.656)	lr 0.00148
Train [102][780/3239]	Time 2.486 (0.654)	Data Time 0.001 (0.053)	Loss 2.3193 (2.1948)	Entropy 0.74037 (0.74154)	Top-1 acc 65.625 (71.794)	Top-5 acc 86.719 (88.659)	lr 0.00148
Train [102][790/3239]	Time 0.245 (0.649)	Data Time 0.001 (0.052)	Loss 1.9453 (2.1939)	Entropy 0.74033 (0.74153)	Top-1 acc 77.344 (71.820)	Top-5 acc 94.141 (88.682)	lr 0.00148
Train [102][800/3239]	Time 0.239 (0.647)	Data Time 0.001 (0.052)	Loss 2.0051 (2.1947)	Entropy 0.74024 (0.74151)	Top-1 acc 73.828 (71.801)	Top-5 acc 92.188 (88.669)	lr 0.00148
Train [102][810/3239]	Time 0.387 (0.645)	Data Time 0.002 (0.051)	Loss 2.0357 (2.1948)	Entropy 0.74014 (0.74150)	Top-1 acc 78.906 (71.791)	Top-5 acc 90.234 (88.670)	lr 0.00148
Train [102][820/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.051)	Loss 2.2763 (2.1950)	Entropy 0.74010 (0.74148)	Top-1 acc 68.359 (71.788)	Top-5 acc 86.719 (88.662)	lr 0.00148
Train [102][830/3239]	Time 0.223 (0.641)	Data Time 0.001 (0.050)	Loss 2.1518 (2.1947)	Entropy 0.74007 (0.74146)	Top-1 acc 71.484 (71.795)	Top-5 acc 89.844 (88.667)	lr 0.00148
Train [102][840/3239]	Time 0.231 (0.639)	Data Time 0.001 (0.049)	Loss 2.2021 (2.1955)	Entropy 0.74008 (0.74145)	Top-1 acc 72.656 (71.779)	Top-5 acc 85.938 (88.653)	lr 0.00147
Train [102][850/3239]	Time 0.325 (0.638)	Data Time 0.001 (0.049)	Loss 2.2224 (2.1958)	Entropy 0.74004 (0.74143)	Top-1 acc 72.266 (71.774)	Top-5 acc 87.500 (88.648)	lr 0.00147
Train [102][860/3239]	Time 0.227 (0.636)	Data Time 0.001 (0.048)	Loss 2.2490 (2.1960)	Entropy 0.74004 (0.74141)	Top-1 acc 69.531 (71.773)	Top-5 acc 86.328 (88.642)	lr 0.00147
Train [102][870/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.048)	Loss 2.1258 (2.1957)	Entropy 0.74004 (0.74140)	Top-1 acc 73.047 (71.775)	Top-5 acc 90.625 (88.655)	lr 0.00147
Train [102][880/3239]	Time 0.236 (0.632)	Data Time 0.001 (0.047)	Loss 2.1931 (2.1949)	Entropy 0.74004 (0.74138)	Top-1 acc 70.703 (71.789)	Top-5 acc 89.062 (88.665)	lr 0.00147
Train [102][890/3239]	Time 2.692 (0.631)	Data Time 0.002 (0.047)	Loss 2.1450 (2.1944)	Entropy 0.74004 (0.74137)	Top-1 acc 73.047 (71.801)	Top-5 acc 88.672 (88.680)	lr 0.00147
Train [102][900/3239]	Time 0.221 (0.626)	Data Time 0.001 (0.046)	Loss 2.1005 (2.1945)	Entropy 0.73994 (0.74135)	Top-1 acc 72.266 (71.791)	Top-5 acc 91.016 (88.677)	lr 0.00147
Train [102][910/3239]	Time 0.237 (0.624)	Data Time 0.001 (0.046)	Loss 2.0420 (2.1947)	Entropy 0.73996 (0.74134)	Top-1 acc 74.219 (71.802)	Top-5 acc 90.234 (88.672)	lr 0.00147
Train [102][920/3239]	Time 0.215 (0.623)	Data Time 0.001 (0.045)	Loss 2.2508 (2.1943)	Entropy 0.73990 (0.74132)	Top-1 acc 71.094 (71.802)	Top-5 acc 87.891 (88.680)	lr 0.00147
Train [102][930/3239]	Time 0.237 (0.621)	Data Time 0.001 (0.045)	Loss 2.2251 (2.1945)	Entropy 0.73983 (0.74130)	Top-1 acc 72.266 (71.797)	Top-5 acc 89.062 (88.678)	lr 0.00147
Train [102][940/3239]	Time 0.236 (0.620)	Data Time 0.001 (0.044)	Loss 2.1815 (2.1950)	Entropy 0.73985 (0.74129)	Top-1 acc 73.047 (71.771)	Top-5 acc 86.719 (88.666)	lr 0.00147
Train [102][950/3239]	Time 0.230 (0.618)	Data Time 0.001 (0.044)	Loss 2.1447 (2.1949)	Entropy 0.73979 (0.74127)	Top-1 acc 72.266 (71.769)	Top-5 acc 91.016 (88.678)	lr 0.00147
Train [102][960/3239]	Time 0.273 (0.617)	Data Time 0.001 (0.043)	Loss 2.1848 (2.1948)	Entropy 0.73972 (0.74126)	Top-1 acc 73.828 (71.768)	Top-5 acc 87.500 (88.678)	lr 0.00147
Train [102][970/3239]	Time 0.225 (0.616)	Data Time 0.001 (0.043)	Loss 2.1459 (2.1949)	Entropy 0.73971 (0.74124)	Top-1 acc 71.484 (71.761)	Top-5 acc 91.406 (88.675)	lr 0.00147
Train [102][980/3239]	Time 0.237 (0.614)	Data Time 0.001 (0.043)	Loss 1.9993 (2.1945)	Entropy 0.73969 (0.74123)	Top-1 acc 77.344 (71.775)	Top-5 acc 92.188 (88.683)	lr 0.00147
Train [102][990/3239]	Time 0.259 (0.613)	Data Time 0.001 (0.042)	Loss 2.1736 (2.1944)	Entropy 0.73968 (0.74121)	Top-1 acc 70.312 (71.778)	Top-5 acc 89.844 (88.681)	lr 0.00147
Train [102][1000/3239]	Time 2.505 (0.612)	Data Time 0.001 (0.042)	Loss 2.2748 (2.1943)	Entropy 0.73968 (0.74120)	Top-1 acc 71.875 (71.788)	Top-5 acc 85.156 (88.675)	lr 0.00147
Train [102][1010/3239]	Time 0.224 (0.608)	Data Time 0.001 (0.041)	Loss 2.0501 (2.1942)	Entropy 0.73969 (0.74118)	Top-1 acc 76.562 (71.787)	Top-5 acc 92.578 (88.682)	lr 0.00147
Train [102][1020/3239]	Time 0.335 (0.607)	Data Time 0.002 (0.041)	Loss 2.1092 (2.1940)	Entropy 0.73982 (0.74117)	Top-1 acc 75.781 (71.798)	Top-5 acc 91.016 (88.682)	lr 0.00147
Train [102][1030/3239]	Time 0.236 (0.605)	Data Time 0.001 (0.041)	Loss 2.3339 (2.1941)	Entropy 0.73971 (0.74115)	Top-1 acc 67.578 (71.788)	Top-5 acc 85.547 (88.679)	lr 0.00147
Train [102][1040/3239]	Time 0.234 (0.604)	Data Time 0.001 (0.040)	Loss 2.2598 (2.1941)	Entropy 0.73974 (0.74114)	Top-1 acc 72.266 (71.791)	Top-5 acc 88.672 (88.677)	lr 0.00147
Train [102][1050/3239]	Time 0.226 (0.603)	Data Time 0.001 (0.040)	Loss 2.0350 (2.1935)	Entropy 0.73961 (0.74113)	Top-1 acc 75.781 (71.812)	Top-5 acc 91.406 (88.686)	lr 0.00146
Train [102][1060/3239]	Time 0.332 (0.602)	Data Time 0.001 (0.039)	Loss 2.2634 (2.1936)	Entropy 0.73961 (0.74111)	Top-1 acc 71.094 (71.814)	Top-5 acc 88.281 (88.681)	lr 0.00146
Train [102][1070/3239]	Time 0.347 (0.650)	Data Time 0.003 (0.039)	Loss 2.1135 (2.1935)	Entropy 0.73955 (0.74110)	Top-1 acc 73.828 (71.817)	Top-5 acc 89.844 (88.683)	lr 0.00146
Train [102][1080/3239]	Time 0.249 (0.649)	Data Time 0.002 (0.039)	Loss 2.1913 (2.1933)	Entropy 0.73949 (0.74108)	Top-1 acc 71.094 (71.817)	Top-5 acc 88.281 (88.680)	lr 0.00146
Train [102][1090/3239]	Time 0.230 (0.647)	Data Time 0.001 (0.038)	Loss 2.2973 (2.1937)	Entropy 0.73949 (0.74107)	Top-1 acc 71.094 (71.812)	Top-5 acc 84.766 (88.668)	lr 0.00146
Train [102][1100/3239]	Time 0.316 (0.646)	Data Time 0.001 (0.038)	Loss 2.0710 (2.1936)	Entropy 0.73948 (0.74105)	Top-1 acc 75.781 (71.823)	Top-5 acc 90.625 (88.671)	lr 0.00146
Train [102][1110/3239]	Time 2.558 (0.644)	Data Time 0.001 (0.038)	Loss 2.2397 (2.1936)	Entropy 0.73948 (0.74104)	Top-1 acc 70.703 (71.826)	Top-5 acc 88.672 (88.669)	lr 0.00146
Train [102][1120/3239]	Time 0.236 (0.641)	Data Time 0.001 (0.037)	Loss 2.0769 (2.1936)	Entropy 0.73947 (0.74103)	Top-1 acc 76.562 (71.831)	Top-5 acc 91.406 (88.671)	lr 0.00146
Train [102][1130/3239]	Time 0.232 (0.639)	Data Time 0.001 (0.037)	Loss 2.3059 (2.1936)	Entropy 0.73947 (0.74101)	Top-1 acc 71.484 (71.829)	Top-5 acc 87.109 (88.674)	lr 0.00146
Train [102][1140/3239]	Time 0.243 (0.638)	Data Time 0.001 (0.037)	Loss 2.0915 (2.1934)	Entropy 0.73947 (0.74100)	Top-1 acc 76.953 (71.836)	Top-5 acc 91.016 (88.673)	lr 0.00146
Train [102][1150/3239]	Time 0.235 (0.636)	Data Time 0.001 (0.037)	Loss 2.1098 (2.1931)	Entropy 0.73942 (0.74098)	Top-1 acc 74.609 (71.840)	Top-5 acc 88.672 (88.679)	lr 0.00146
Train [102][1160/3239]	Time 0.235 (0.635)	Data Time 0.001 (0.036)	Loss 2.1735 (2.1934)	Entropy 0.73936 (0.74097)	Top-1 acc 73.047 (71.833)	Top-5 acc 89.453 (88.675)	lr 0.00146
Train [102][1170/3239]	Time 0.242 (0.634)	Data Time 0.002 (0.036)	Loss 2.2104 (2.1933)	Entropy 0.73934 (0.74096)	Top-1 acc 71.484 (71.835)	Top-5 acc 88.672 (88.679)	lr 0.00146
Train [102][1180/3239]	Time 0.241 (0.632)	Data Time 0.001 (0.036)	Loss 2.2135 (2.1939)	Entropy 0.73923 (0.74094)	Top-1 acc 69.922 (71.821)	Top-5 acc 89.844 (88.670)	lr 0.00146
Train [102][1190/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.035)	Loss 2.4247 (2.1941)	Entropy 0.73923 (0.74093)	Top-1 acc 70.312 (71.825)	Top-5 acc 84.766 (88.670)	lr 0.00146
Train [102][1200/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.035)	Loss 2.2014 (2.1941)	Entropy 0.73929 (0.74091)	Top-1 acc 70.703 (71.834)	Top-5 acc 88.281 (88.669)	lr 0.00146
Train [102][1210/3239]	Time 0.229 (0.629)	Data Time 0.001 (0.035)	Loss 2.1536 (2.1942)	Entropy 0.73930 (0.74090)	Top-1 acc 69.922 (71.830)	Top-5 acc 89.844 (88.668)	lr 0.00146
Train [102][1220/3239]	Time 2.584 (0.627)	Data Time 0.001 (0.035)	Loss 2.2233 (2.1939)	Entropy 0.73930 (0.74089)	Top-1 acc 72.656 (71.836)	Top-5 acc 86.719 (88.675)	lr 0.00146
Train [102][1230/3239]	Time 0.396 (0.624)	Data Time 0.001 (0.034)	Loss 2.3904 (2.1949)	Entropy 0.73918 (0.74087)	Top-1 acc 69.141 (71.813)	Top-5 acc 85.938 (88.659)	lr 0.00146
Train [102][1240/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.034)	Loss 2.3415 (2.1952)	Entropy 0.73914 (0.74086)	Top-1 acc 64.844 (71.800)	Top-5 acc 87.891 (88.654)	lr 0.00146
Train [102][1250/3239]	Time 0.230 (0.622)	Data Time 0.001 (0.034)	Loss 2.3612 (2.1956)	Entropy 0.73905 (0.74085)	Top-1 acc 67.578 (71.785)	Top-5 acc 87.109 (88.649)	lr 0.00146
Train [102][1260/3239]	Time 0.238 (0.621)	Data Time 0.001 (0.033)	Loss 2.1584 (2.1955)	Entropy 0.73901 (0.74083)	Top-1 acc 72.656 (71.784)	Top-5 acc 89.453 (88.650)	lr 0.00145
Train [102][1270/3239]	Time 0.241 (0.620)	Data Time 0.001 (0.033)	Loss 2.1198 (2.1956)	Entropy 0.73899 (0.74082)	Top-1 acc 75.781 (71.787)	Top-5 acc 89.062 (88.650)	lr 0.00145
Train [102][1280/3239]	Time 0.241 (0.619)	Data Time 0.001 (0.033)	Loss 2.2131 (2.1954)	Entropy 0.73900 (0.74080)	Top-1 acc 69.531 (71.785)	Top-5 acc 86.719 (88.650)	lr 0.00145
Train [102][1290/3239]	Time 0.233 (0.618)	Data Time 0.001 (0.033)	Loss 2.2350 (2.1956)	Entropy 0.73952 (0.74079)	Top-1 acc 69.141 (71.780)	Top-5 acc 87.109 (88.650)	lr 0.00145
Train [102][1300/3239]	Time 0.239 (0.617)	Data Time 0.001 (0.032)	Loss 2.2132 (2.1955)	Entropy 0.73951 (0.74078)	Top-1 acc 72.656 (71.781)	Top-5 acc 88.281 (88.650)	lr 0.00145
Train [102][1310/3239]	Time 0.216 (0.615)	Data Time 0.001 (0.032)	Loss 2.2206 (2.1957)	Entropy 0.73957 (0.74077)	Top-1 acc 71.875 (71.778)	Top-5 acc 87.109 (88.647)	lr 0.00145
Train [102][1320/3239]	Time 0.223 (0.614)	Data Time 0.001 (0.032)	Loss 2.2251 (2.1955)	Entropy 0.73964 (0.74076)	Top-1 acc 69.531 (71.773)	Top-5 acc 90.234 (88.652)	lr 0.00145
Train [102][1330/3239]	Time 2.551 (0.613)	Data Time 0.001 (0.032)	Loss 2.1656 (2.1954)	Entropy 0.73964 (0.74075)	Top-1 acc 70.703 (71.773)	Top-5 acc 88.672 (88.656)	lr 0.00145
Train [102][1340/3239]	Time 0.238 (0.611)	Data Time 0.001 (0.032)	Loss 2.2030 (2.1952)	Entropy 0.73966 (0.74075)	Top-1 acc 70.703 (71.773)	Top-5 acc 89.844 (88.663)	lr 0.00145
Train [102][1350/3239]	Time 0.227 (0.610)	Data Time 0.001 (0.031)	Loss 2.2972 (2.1954)	Entropy 0.73964 (0.74074)	Top-1 acc 73.047 (71.769)	Top-5 acc 85.547 (88.656)	lr 0.00145
Train [102][1360/3239]	Time 0.353 (0.609)	Data Time 0.001 (0.031)	Loss 2.0930 (2.1953)	Entropy 0.73963 (0.74073)	Top-1 acc 77.344 (71.771)	Top-5 acc 89.062 (88.655)	lr 0.00145
Train [102][1370/3239]	Time 0.237 (0.608)	Data Time 0.001 (0.031)	Loss 2.2463 (2.1953)	Entropy 0.73962 (0.74072)	Top-1 acc 66.406 (71.768)	Top-5 acc 89.062 (88.653)	lr 0.00145
Train [102][1380/3239]	Time 0.238 (0.607)	Data Time 0.001 (0.031)	Loss 2.1595 (2.1951)	Entropy 0.73960 (0.74071)	Top-1 acc 72.266 (71.779)	Top-5 acc 89.453 (88.657)	lr 0.00145
Train [102][1390/3239]	Time 0.225 (0.606)	Data Time 0.001 (0.030)	Loss 2.4119 (2.1951)	Entropy 0.73962 (0.74071)	Top-1 acc 65.625 (71.774)	Top-5 acc 84.375 (88.655)	lr 0.00145
Train [102][1400/3239]	Time 0.307 (0.605)	Data Time 0.001 (0.030)	Loss 2.2375 (2.1956)	Entropy 0.73960 (0.74070)	Top-1 acc 71.094 (71.754)	Top-5 acc 86.719 (88.651)	lr 0.00145
Train [102][1410/3239]	Time 0.236 (0.604)	Data Time 0.001 (0.030)	Loss 2.2175 (2.1957)	Entropy 0.73941 (0.74069)	Top-1 acc 71.875 (71.755)	Top-5 acc 89.844 (88.654)	lr 0.00145
Train [102][1420/3239]	Time 0.230 (0.603)	Data Time 0.001 (0.030)	Loss 2.2522 (2.1954)	Entropy 0.73935 (0.74068)	Top-1 acc 69.531 (71.761)	Top-5 acc 87.500 (88.659)	lr 0.00145
Train [102][1430/3239]	Time 0.294 (0.638)	Data Time 0.004 (0.030)	Loss 2.1030 (2.1954)	Entropy 0.73938 (0.74067)	Top-1 acc 74.609 (71.764)	Top-5 acc 90.625 (88.660)	lr 0.00145
Train [102][1440/3239]	Time 2.966 (0.638)	Data Time 0.003 (0.029)	Loss 2.1815 (2.1953)	Entropy 0.73938 (0.74066)	Top-1 acc 70.703 (71.770)	Top-5 acc 88.281 (88.662)	lr 0.00145
Train [102][1450/3239]	Time 0.232 (0.635)	Data Time 0.002 (0.029)	Loss 2.0175 (2.1949)	Entropy 0.73935 (0.74065)	Top-1 acc 75.781 (71.779)	Top-5 acc 92.969 (88.668)	lr 0.00145
Train [102][1460/3239]	Time 0.233 (0.634)	Data Time 0.001 (0.029)	Loss 2.0864 (2.1950)	Entropy 0.73938 (0.74064)	Top-1 acc 72.656 (71.779)	Top-5 acc 90.234 (88.666)	lr 0.00145
Train [102][1470/3239]	Time 0.227 (0.633)	Data Time 0.001 (0.029)	Loss 2.1143 (2.1948)	Entropy 0.73936 (0.74064)	Top-1 acc 76.562 (71.782)	Top-5 acc 89.062 (88.672)	lr 0.00144
Train [102][1480/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.029)	Loss 2.1260 (2.1949)	Entropy 0.73930 (0.74063)	Top-1 acc 72.656 (71.780)	Top-5 acc 92.578 (88.676)	lr 0.00144
Train [102][1490/3239]	Time 0.215 (0.631)	Data Time 0.001 (0.029)	Loss 2.2645 (2.1946)	Entropy 0.73918 (0.74062)	Top-1 acc 69.531 (71.790)	Top-5 acc 87.109 (88.681)	lr 0.00144
Train [102][1500/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.028)	Loss 2.1537 (2.1947)	Entropy 0.73915 (0.74061)	Top-1 acc 73.438 (71.782)	Top-5 acc 90.625 (88.686)	lr 0.00144
Train [102][1510/3239]	Time 0.223 (0.629)	Data Time 0.002 (0.028)	Loss 2.1083 (2.1948)	Entropy 0.73910 (0.74060)	Top-1 acc 73.047 (71.772)	Top-5 acc 90.234 (88.686)	lr 0.00144
Train [102][1520/3239]	Time 0.253 (0.628)	Data Time 0.001 (0.028)	Loss 2.3809 (2.1949)	Entropy 0.73912 (0.74059)	Top-1 acc 69.531 (71.771)	Top-5 acc 86.328 (88.683)	lr 0.00144
Train [102][1530/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.028)	Loss 2.1753 (2.1951)	Entropy 0.73908 (0.74058)	Top-1 acc 73.438 (71.768)	Top-5 acc 90.625 (88.683)	lr 0.00144
Train [102][1540/3239]	Time 0.245 (0.626)	Data Time 0.001 (0.028)	Loss 2.3630 (2.1951)	Entropy 0.73914 (0.74057)	Top-1 acc 66.797 (71.767)	Top-5 acc 86.328 (88.684)	lr 0.00144
Train [102][1550/3239]	Time 2.529 (0.625)	Data Time 0.001 (0.028)	Loss 2.1482 (2.1954)	Entropy 0.73914 (0.74056)	Top-1 acc 72.266 (71.764)	Top-5 acc 87.500 (88.674)	lr 0.00144
Train [102][1560/3239]	Time 0.250 (0.623)	Data Time 0.001 (0.027)	Loss 2.1257 (2.1955)	Entropy 0.73907 (0.74055)	Top-1 acc 71.094 (71.759)	Top-5 acc 91.016 (88.676)	lr 0.00144
Train [102][1570/3239]	Time 0.225 (0.622)	Data Time 0.001 (0.027)	Loss 2.0868 (2.1953)	Entropy 0.73897 (0.74054)	Top-1 acc 73.828 (71.762)	Top-5 acc 91.797 (88.679)	lr 0.00144
Train [102][1580/3239]	Time 0.254 (0.621)	Data Time 0.002 (0.027)	Loss 2.1891 (2.1956)	Entropy 0.73891 (0.74053)	Top-1 acc 71.484 (71.756)	Top-5 acc 90.625 (88.675)	lr 0.00144
Train [102][1590/3239]	Time 0.227 (0.620)	Data Time 0.001 (0.027)	Loss 2.2153 (2.1956)	Entropy 0.73886 (0.74052)	Top-1 acc 69.922 (71.754)	Top-5 acc 87.500 (88.675)	lr 0.00144
Train [102][1600/3239]	Time 0.241 (0.619)	Data Time 0.001 (0.027)	Loss 2.3094 (2.1958)	Entropy 0.73881 (0.74051)	Top-1 acc 67.969 (71.751)	Top-5 acc 89.453 (88.670)	lr 0.00144
Train [102][1610/3239]	Time 0.323 (0.618)	Data Time 0.001 (0.027)	Loss 2.1236 (2.1958)	Entropy 0.73877 (0.74050)	Top-1 acc 72.656 (71.749)	Top-5 acc 91.016 (88.669)	lr 0.00144
Train [102][1620/3239]	Time 0.254 (0.617)	Data Time 0.001 (0.026)	Loss 2.2267 (2.1956)	Entropy 0.73877 (0.74049)	Top-1 acc 69.531 (71.755)	Top-5 acc 88.281 (88.676)	lr 0.00144
Train [102][1630/3239]	Time 0.229 (0.617)	Data Time 0.001 (0.026)	Loss 2.1983 (2.1956)	Entropy 0.73874 (0.74048)	Top-1 acc 70.703 (71.746)	Top-5 acc 88.672 (88.675)	lr 0.00144
Train [102][1640/3239]	Time 0.234 (0.616)	Data Time 0.001 (0.026)	Loss 2.1664 (2.1959)	Entropy 0.73870 (0.74047)	Top-1 acc 71.484 (71.737)	Top-5 acc 89.453 (88.674)	lr 0.00144
Train [102][1650/3239]	Time 0.347 (0.615)	Data Time 0.001 (0.026)	Loss 2.0767 (2.1960)	Entropy 0.73871 (0.74046)	Top-1 acc 75.781 (71.739)	Top-5 acc 90.234 (88.673)	lr 0.00144
Train [102][1660/3239]	Time 2.582 (0.614)	Data Time 0.001 (0.026)	Loss 2.2354 (2.1962)	Entropy 0.73871 (0.74044)	Top-1 acc 71.484 (71.735)	Top-5 acc 89.062 (88.673)	lr 0.00144
Train [102][1670/3239]	Time 0.260 (0.612)	Data Time 0.002 (0.026)	Loss 2.3441 (2.1964)	Entropy 0.73864 (0.74043)	Top-1 acc 68.359 (71.736)	Top-5 acc 85.547 (88.666)	lr 0.00144
Train [102][1680/3239]	Time 0.222 (0.611)	Data Time 0.001 (0.025)	Loss 2.2134 (2.1963)	Entropy 0.73866 (0.74042)	Top-1 acc 70.312 (71.736)	Top-5 acc 88.672 (88.666)	lr 0.00143
Train [102][1690/3239]	Time 0.258 (0.610)	Data Time 0.001 (0.025)	Loss 2.3165 (2.1965)	Entropy 0.73866 (0.74041)	Top-1 acc 68.359 (71.731)	Top-5 acc 83.594 (88.666)	lr 0.00143
Train [102][1700/3239]	Time 0.235 (0.610)	Data Time 0.001 (0.025)	Loss 2.1596 (2.1964)	Entropy 0.73868 (0.74040)	Top-1 acc 73.047 (71.740)	Top-5 acc 88.672 (88.665)	lr 0.00143
Train [102][1710/3239]	Time 0.231 (0.609)	Data Time 0.001 (0.025)	Loss 2.1543 (2.1969)	Entropy 0.73868 (0.74039)	Top-1 acc 73.438 (71.729)	Top-5 acc 89.062 (88.659)	lr 0.00143
Train [102][1720/3239]	Time 0.214 (0.608)	Data Time 0.001 (0.025)	Loss 2.2201 (2.1969)	Entropy 0.73865 (0.74038)	Top-1 acc 73.047 (71.731)	Top-5 acc 88.281 (88.661)	lr 0.00143
Train [102][1730/3239]	Time 0.237 (0.607)	Data Time 0.001 (0.025)	Loss 2.4574 (2.1968)	Entropy 0.73869 (0.74037)	Top-1 acc 64.844 (71.731)	Top-5 acc 85.156 (88.664)	lr 0.00143
Train [102][1740/3239]	Time 0.259 (0.607)	Data Time 0.001 (0.025)	Loss 2.2007 (2.1968)	Entropy 0.73865 (0.74036)	Top-1 acc 67.969 (71.730)	Top-5 acc 87.891 (88.661)	lr 0.00143
Train [102][1750/3239]	Time 0.230 (0.606)	Data Time 0.001 (0.025)	Loss 2.0577 (2.1967)	Entropy 0.73863 (0.74035)	Top-1 acc 75.000 (71.735)	Top-5 acc 91.797 (88.661)	lr 0.00143
Train [102][1760/3239]	Time 0.231 (0.605)	Data Time 0.001 (0.024)	Loss 2.1734 (2.1963)	Entropy 0.73870 (0.74034)	Top-1 acc 69.141 (71.746)	Top-5 acc 88.281 (88.667)	lr 0.00143
Train [102][1770/3239]	Time 2.629 (0.605)	Data Time 0.001 (0.024)	Loss 2.0930 (2.1962)	Entropy 0.73870 (0.74033)	Top-1 acc 76.562 (71.749)	Top-5 acc 91.797 (88.668)	lr 0.00143
Train [102][1780/3239]	Time 0.304 (0.603)	Data Time 0.001 (0.024)	Loss 2.2107 (2.1964)	Entropy 0.73872 (0.74033)	Top-1 acc 71.484 (71.743)	Top-5 acc 89.062 (88.665)	lr 0.00143
Train [102][1790/3239]	Time 0.234 (0.602)	Data Time 0.002 (0.024)	Loss 1.9610 (2.1962)	Entropy 0.73868 (0.74032)	Top-1 acc 76.953 (71.754)	Top-5 acc 92.969 (88.671)	lr 0.00143
Train [102][1800/3239]	Time 0.242 (0.632)	Data Time 0.002 (0.024)	Loss 2.0823 (2.1961)	Entropy 0.73869 (0.74031)	Top-1 acc 74.609 (71.754)	Top-5 acc 91.797 (88.672)	lr 0.00143
Train [102][1810/3239]	Time 0.231 (0.631)	Data Time 0.002 (0.024)	Loss 2.2655 (2.1964)	Entropy 0.73869 (0.74030)	Top-1 acc 69.922 (71.747)	Top-5 acc 89.062 (88.668)	lr 0.00143
Train [102][1820/3239]	Time 0.231 (0.631)	Data Time 0.001 (0.024)	Loss 2.1815 (2.1968)	Entropy 0.73866 (0.74029)	Top-1 acc 73.047 (71.737)	Top-5 acc 88.281 (88.660)	lr 0.00143
Train [102][1830/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.024)	Loss 2.3086 (2.1967)	Entropy 0.73865 (0.74028)	Top-1 acc 69.922 (71.743)	Top-5 acc 85.938 (88.663)	lr 0.00143
Train [102][1840/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.023)	Loss 2.3012 (2.1969)	Entropy 0.73862 (0.74027)	Top-1 acc 71.094 (71.745)	Top-5 acc 85.938 (88.657)	lr 0.00143
Train [102][1850/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.023)	Loss 2.1527 (2.1967)	Entropy 0.73860 (0.74026)	Top-1 acc 73.438 (71.748)	Top-5 acc 89.844 (88.657)	lr 0.00143
Train [102][1860/3239]	Time 0.346 (0.627)	Data Time 0.002 (0.023)	Loss 2.2999 (2.1967)	Entropy 0.73849 (0.74025)	Top-1 acc 68.359 (71.745)	Top-5 acc 87.500 (88.659)	lr 0.00143
Train [102][1870/3239]	Time 0.237 (0.626)	Data Time 0.001 (0.023)	Loss 2.1690 (2.1967)	Entropy 0.73836 (0.74024)	Top-1 acc 69.141 (71.749)	Top-5 acc 89.453 (88.661)	lr 0.00143
Train [102][1880/3239]	Time 2.568 (0.626)	Data Time 0.001 (0.023)	Loss 2.2369 (2.1967)	Entropy 0.73836 (0.74023)	Top-1 acc 70.703 (71.747)	Top-5 acc 88.672 (88.659)	lr 0.00143
Train [102][1890/3239]	Time 0.254 (0.624)	Data Time 0.001 (0.023)	Loss 2.1855 (2.1967)	Entropy 0.73833 (0.74022)	Top-1 acc 69.922 (71.749)	Top-5 acc 90.234 (88.659)	lr 0.00142
Train [102][1900/3239]	Time 0.239 (0.623)	Data Time 0.001 (0.023)	Loss 2.2680 (2.1969)	Entropy 0.73827 (0.74021)	Top-1 acc 69.531 (71.742)	Top-5 acc 87.500 (88.656)	lr 0.00142
Train [102][1910/3239]	Time 0.217 (0.622)	Data Time 0.001 (0.023)	Loss 2.1978 (2.1970)	Entropy 0.73795 (0.74020)	Top-1 acc 73.828 (71.738)	Top-5 acc 86.328 (88.649)	lr 0.00142
Train [102][1920/3239]	Time 0.230 (0.621)	Data Time 0.001 (0.023)	Loss 2.2822 (2.1969)	Entropy 0.73794 (0.74019)	Top-1 acc 71.484 (71.740)	Top-5 acc 87.500 (88.651)	lr 0.00142
Train [102][1930/3239]	Time 0.223 (0.621)	Data Time 0.001 (0.022)	Loss 2.2121 (2.1969)	Entropy 0.73793 (0.74018)	Top-1 acc 69.922 (71.739)	Top-5 acc 88.281 (88.650)	lr 0.00142
Train [102][1940/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.022)	Loss 2.3555 (2.1971)	Entropy 0.73788 (0.74017)	Top-1 acc 66.797 (71.733)	Top-5 acc 87.500 (88.646)	lr 0.00142
Train [102][1950/3239]	Time 0.227 (0.619)	Data Time 0.001 (0.022)	Loss 2.2724 (2.1973)	Entropy 0.73779 (0.74015)	Top-1 acc 70.312 (71.724)	Top-5 acc 85.547 (88.641)	lr 0.00142
Train [102][1960/3239]	Time 0.225 (0.619)	Data Time 0.001 (0.022)	Loss 2.1349 (2.1975)	Entropy 0.73786 (0.74014)	Top-1 acc 76.172 (71.722)	Top-5 acc 90.234 (88.637)	lr 0.00142
Train [102][1970/3239]	Time 0.232 (0.618)	Data Time 0.001 (0.022)	Loss 2.1918 (2.1975)	Entropy 0.73782 (0.74013)	Top-1 acc 69.141 (71.724)	Top-5 acc 89.844 (88.637)	lr 0.00142
Train [102][1980/3239]	Time 0.220 (0.617)	Data Time 0.001 (0.022)	Loss 2.1374 (2.1974)	Entropy 0.73776 (0.74012)	Top-1 acc 77.734 (71.727)	Top-5 acc 89.062 (88.639)	lr 0.00142
Train [102][1990/3239]	Time 2.669 (0.616)	Data Time 0.001 (0.022)	Loss 2.0757 (2.1975)	Entropy 0.73776 (0.74011)	Top-1 acc 74.219 (71.725)	Top-5 acc 92.969 (88.640)	lr 0.00142
Train [102][2000/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.022)	Loss 2.0194 (2.1973)	Entropy 0.73761 (0.74010)	Top-1 acc 73.438 (71.727)	Top-5 acc 91.797 (88.643)	lr 0.00142
Train [102][2010/3239]	Time 0.233 (0.614)	Data Time 0.001 (0.022)	Loss 2.2891 (2.1973)	Entropy 0.73760 (0.74008)	Top-1 acc 69.531 (71.728)	Top-5 acc 87.109 (88.641)	lr 0.00142
Train [102][2020/3239]	Time 0.252 (0.613)	Data Time 0.001 (0.021)	Loss 2.1852 (2.1974)	Entropy 0.73748 (0.74007)	Top-1 acc 72.266 (71.724)	Top-5 acc 87.891 (88.638)	lr 0.00142
Train [102][2030/3239]	Time 0.322 (0.612)	Data Time 0.001 (0.021)	Loss 2.1430 (2.1972)	Entropy 0.73744 (0.74006)	Top-1 acc 73.047 (71.729)	Top-5 acc 90.234 (88.642)	lr 0.00142
Train [102][2040/3239]	Time 0.230 (0.612)	Data Time 0.001 (0.021)	Loss 2.0918 (2.1971)	Entropy 0.73747 (0.74004)	Top-1 acc 73.047 (71.728)	Top-5 acc 91.016 (88.642)	lr 0.00142
Train [102][2050/3239]	Time 0.233 (0.611)	Data Time 0.001 (0.021)	Loss 2.3112 (2.1970)	Entropy 0.73750 (0.74003)	Top-1 acc 67.969 (71.730)	Top-5 acc 85.156 (88.643)	lr 0.00142
Train [102][2060/3239]	Time 0.245 (0.611)	Data Time 0.001 (0.021)	Loss 2.1349 (2.1971)	Entropy 0.73747 (0.74002)	Top-1 acc 75.000 (71.731)	Top-5 acc 90.625 (88.641)	lr 0.00142
Train [102][2070/3239]	Time 0.346 (0.610)	Data Time 0.001 (0.021)	Loss 1.9870 (2.1973)	Entropy 0.73736 (0.74001)	Top-1 acc 76.953 (71.723)	Top-5 acc 90.234 (88.636)	lr 0.00142
Train [102][2080/3239]	Time 0.234 (0.610)	Data Time 0.001 (0.021)	Loss 2.1538 (2.1974)	Entropy 0.73738 (0.73999)	Top-1 acc 73.047 (71.719)	Top-5 acc 89.453 (88.632)	lr 0.00142
Train [102][2090/3239]	Time 0.221 (0.609)	Data Time 0.001 (0.021)	Loss 2.2008 (2.1974)	Entropy 0.73737 (0.73998)	Top-1 acc 69.922 (71.717)	Top-5 acc 88.281 (88.628)	lr 0.00142
Train [102][2100/3239]	Time 2.590 (0.608)	Data Time 0.001 (0.021)	Loss 2.1853 (2.1975)	Entropy 0.73737 (0.73997)	Top-1 acc 71.094 (71.713)	Top-5 acc 89.453 (88.629)	lr 0.00142
Train [102][2110/3239]	Time 0.237 (0.606)	Data Time 0.001 (0.021)	Loss 2.0863 (2.1974)	Entropy 0.73737 (0.73996)	Top-1 acc 78.516 (71.715)	Top-5 acc 91.016 (88.632)	lr 0.00141
Train [102][2120/3239]	Time 0.242 (0.606)	Data Time 0.001 (0.021)	Loss 2.2760 (2.1976)	Entropy 0.73745 (0.73995)	Top-1 acc 65.625 (71.704)	Top-5 acc 84.766 (88.629)	lr 0.00141
Train [102][2130/3239]	Time 0.298 (0.606)	Data Time 0.003 (0.020)	Loss 1.9957 (2.1977)	Entropy 0.73741 (0.73993)	Top-1 acc 76.953 (71.701)	Top-5 acc 91.797 (88.627)	lr 0.00141
Train [102][2140/3239]	Time 0.274 (0.605)	Data Time 0.001 (0.020)	Loss 2.2354 (2.1977)	Entropy 0.73743 (0.73992)	Top-1 acc 73.047 (71.700)	Top-5 acc 86.328 (88.626)	lr 0.00141
Train [102][2150/3239]	Time 0.259 (0.605)	Data Time 0.002 (0.020)	Loss 2.1055 (2.1978)	Entropy 0.73737 (0.73991)	Top-1 acc 72.656 (71.696)	Top-5 acc 90.625 (88.624)	lr 0.00141
Train [102][2160/3239]	Time 0.243 (0.631)	Data Time 0.003 (0.020)	Loss 2.1717 (2.1981)	Entropy 0.73737 (0.73990)	Top-1 acc 71.484 (71.687)	Top-5 acc 89.453 (88.618)	lr 0.00141
Train [102][2170/3239]	Time 0.227 (0.630)	Data Time 0.002 (0.020)	Loss 2.1472 (2.1980)	Entropy 0.73732 (0.73989)	Top-1 acc 75.391 (71.690)	Top-5 acc 88.281 (88.618)	lr 0.00141
Train [102][2180/3239]	Time 0.238 (0.630)	Data Time 0.002 (0.020)	Loss 2.1051 (2.1977)	Entropy 0.73725 (0.73987)	Top-1 acc 76.172 (71.696)	Top-5 acc 88.672 (88.619)	lr 0.00141
Train [102][2190/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.020)	Loss 2.2788 (2.1980)	Entropy 0.73724 (0.73986)	Top-1 acc 67.969 (71.689)	Top-5 acc 88.281 (88.614)	lr 0.00141
Train [102][2200/3239]	Time 0.223 (0.628)	Data Time 0.001 (0.020)	Loss 2.1401 (2.1980)	Entropy 0.73704 (0.73985)	Top-1 acc 72.656 (71.688)	Top-5 acc 90.234 (88.615)	lr 0.00141
Train [102][2210/3239]	Time 2.545 (0.628)	Data Time 0.001 (0.020)	Loss 2.2300 (2.1979)	Entropy 0.73704 (0.73984)	Top-1 acc 71.484 (71.690)	Top-5 acc 87.109 (88.614)	lr 0.00141
Train [102][2220/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.020)	Loss 2.2069 (2.1979)	Entropy 0.73701 (0.73983)	Top-1 acc 72.266 (71.693)	Top-5 acc 86.719 (88.614)	lr 0.00141
Train [102][2230/3239]	Time 0.250 (0.625)	Data Time 0.002 (0.020)	Loss 2.0083 (2.1979)	Entropy 0.73717 (0.73981)	Top-1 acc 78.906 (71.698)	Top-5 acc 91.016 (88.613)	lr 0.00141
Train [102][2240/3239]	Time 0.341 (0.625)	Data Time 0.001 (0.020)	Loss 2.2227 (2.1978)	Entropy 0.73717 (0.73980)	Top-1 acc 73.438 (71.701)	Top-5 acc 87.500 (88.615)	lr 0.00141
Train [102][2250/3239]	Time 0.218 (0.624)	Data Time 0.001 (0.019)	Loss 2.3180 (2.1978)	Entropy 0.73707 (0.73979)	Top-1 acc 67.969 (71.703)	Top-5 acc 83.984 (88.613)	lr 0.00141
Train [102][2260/3239]	Time 0.272 (0.623)	Data Time 0.002 (0.019)	Loss 2.0906 (2.1979)	Entropy 0.73707 (0.73978)	Top-1 acc 73.438 (71.701)	Top-5 acc 91.016 (88.609)	lr 0.00141
Train [102][2270/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.019)	Loss 2.1614 (2.1980)	Entropy 0.73706 (0.73977)	Top-1 acc 73.047 (71.697)	Top-5 acc 88.281 (88.605)	lr 0.00141
Train [102][2280/3239]	Time 0.343 (0.622)	Data Time 0.001 (0.019)	Loss 2.2803 (2.1982)	Entropy 0.73730 (0.73975)	Top-1 acc 67.188 (71.691)	Top-5 acc 89.844 (88.604)	lr 0.00141
Train [102][2290/3239]	Time 0.218 (0.622)	Data Time 0.001 (0.019)	Loss 2.2262 (2.1982)	Entropy 0.73729 (0.73974)	Top-1 acc 71.484 (71.695)	Top-5 acc 87.109 (88.605)	lr 0.00141
Train [102][2300/3239]	Time 0.240 (0.621)	Data Time 0.001 (0.019)	Loss 2.2353 (2.1983)	Entropy 0.73720 (0.73973)	Top-1 acc 74.609 (71.691)	Top-5 acc 88.672 (88.604)	lr 0.00141
Train [102][2310/3239]	Time 0.227 (0.620)	Data Time 0.001 (0.019)	Loss 2.4548 (2.1984)	Entropy 0.73719 (0.73972)	Top-1 acc 64.844 (71.685)	Top-5 acc 81.250 (88.600)	lr 0.00141
Train [102][2320/3239]	Time 2.773 (0.620)	Data Time 0.001 (0.019)	Loss 2.0760 (2.1984)	Entropy 0.73719 (0.73971)	Top-1 acc 73.828 (71.684)	Top-5 acc 91.797 (88.600)	lr 0.00140
Train [102][2330/3239]	Time 0.250 (0.618)	Data Time 0.001 (0.019)	Loss 2.0670 (2.1981)	Entropy 0.73715 (0.73970)	Top-1 acc 74.609 (71.693)	Top-5 acc 91.797 (88.606)	lr 0.00140
Train [102][2340/3239]	Time 0.245 (0.618)	Data Time 0.002 (0.019)	Loss 2.2900 (2.1982)	Entropy 0.73720 (0.73969)	Top-1 acc 72.266 (71.695)	Top-5 acc 85.547 (88.602)	lr 0.00140
Train [102][2350/3239]	Time 0.246 (0.617)	Data Time 0.001 (0.019)	Loss 2.1370 (2.1981)	Entropy 0.73724 (0.73968)	Top-1 acc 76.172 (71.696)	Top-5 acc 89.062 (88.603)	lr 0.00140
Train [102][2360/3239]	Time 0.229 (0.616)	Data Time 0.001 (0.019)	Loss 2.1899 (2.1982)	Entropy 0.73711 (0.73967)	Top-1 acc 68.750 (71.696)	Top-5 acc 88.672 (88.600)	lr 0.00140
Train [102][2370/3239]	Time 0.234 (0.616)	Data Time 0.001 (0.019)	Loss 2.1200 (2.1980)	Entropy 0.73715 (0.73966)	Top-1 acc 73.828 (71.698)	Top-5 acc 90.625 (88.601)	lr 0.00140
Train [102][2380/3239]	Time 0.241 (0.615)	Data Time 0.001 (0.018)	Loss 2.1724 (2.1980)	Entropy 0.73715 (0.73965)	Top-1 acc 71.875 (71.697)	Top-5 acc 90.625 (88.601)	lr 0.00140
Train [102][2390/3239]	Time 0.233 (0.615)	Data Time 0.001 (0.018)	Loss 2.1766 (2.1981)	Entropy 0.73704 (0.73964)	Top-1 acc 71.484 (71.696)	Top-5 acc 88.672 (88.598)	lr 0.00140
Train [102][2400/3239]	Time 0.231 (0.614)	Data Time 0.001 (0.018)	Loss 2.1592 (2.1980)	Entropy 0.73699 (0.73963)	Top-1 acc 75.391 (71.699)	Top-5 acc 89.062 (88.598)	lr 0.00140
Train [102][2410/3239]	Time 0.223 (0.613)	Data Time 0.001 (0.018)	Loss 2.0910 (2.1979)	Entropy 0.73687 (0.73961)	Top-1 acc 72.266 (71.701)	Top-5 acc 90.625 (88.602)	lr 0.00140
Train [102][2420/3239]	Time 0.248 (0.613)	Data Time 0.001 (0.018)	Loss 2.0453 (2.1977)	Entropy 0.73683 (0.73960)	Top-1 acc 73.047 (71.699)	Top-5 acc 94.531 (88.604)	lr 0.00140
Train [102][2430/3239]	Time 2.681 (0.612)	Data Time 0.001 (0.018)	Loss 2.0079 (2.1978)	Entropy 0.73683 (0.73959)	Top-1 acc 78.516 (71.698)	Top-5 acc 92.969 (88.603)	lr 0.00140
Train [102][2440/3239]	Time 0.264 (0.611)	Data Time 0.001 (0.018)	Loss 2.3742 (2.1978)	Entropy 0.73680 (0.73958)	Top-1 acc 68.750 (71.699)	Top-5 acc 85.156 (88.603)	lr 0.00140
Train [102][2450/3239]	Time 0.229 (0.610)	Data Time 0.001 (0.018)	Loss 2.1337 (2.1978)	Entropy 0.73703 (0.73957)	Top-1 acc 72.656 (71.700)	Top-5 acc 90.625 (88.602)	lr 0.00140
Train [102][2460/3239]	Time 0.222 (0.610)	Data Time 0.001 (0.018)	Loss 2.2365 (2.1979)	Entropy 0.73702 (0.73956)	Top-1 acc 68.359 (71.692)	Top-5 acc 89.453 (88.599)	lr 0.00140
Train [102][2470/3239]	Time 0.231 (0.609)	Data Time 0.001 (0.018)	Loss 2.0648 (2.1977)	Entropy 0.73702 (0.73955)	Top-1 acc 75.781 (71.698)	Top-5 acc 92.969 (88.602)	lr 0.00140
Train [102][2480/3239]	Time 0.263 (0.609)	Data Time 0.002 (0.018)	Loss 2.1384 (2.1976)	Entropy 0.73708 (0.73954)	Top-1 acc 74.219 (71.703)	Top-5 acc 86.719 (88.603)	lr 0.00140
Train [102][2490/3239]	Time 0.337 (0.608)	Data Time 0.001 (0.018)	Loss 2.2635 (2.1977)	Entropy 0.73707 (0.73953)	Top-1 acc 71.094 (71.702)	Top-5 acc 87.500 (88.602)	lr 0.00140
Train [102][2500/3239]	Time 0.233 (0.608)	Data Time 0.001 (0.018)	Loss 2.2069 (2.1977)	Entropy 0.73705 (0.73952)	Top-1 acc 69.922 (71.699)	Top-5 acc 90.234 (88.605)	lr 0.00140
Train [102][2510/3239]	Time 0.232 (0.607)	Data Time 0.001 (0.018)	Loss 2.2116 (2.1978)	Entropy 0.73703 (0.73951)	Top-1 acc 72.266 (71.693)	Top-5 acc 88.672 (88.601)	lr 0.00140
Train [102][2520/3239]	Time 0.329 (0.628)	Data Time 0.002 (0.018)	Loss 2.3500 (2.1980)	Entropy 0.73704 (0.73950)	Top-1 acc 68.750 (71.694)	Top-5 acc 85.156 (88.598)	lr 0.00140
Train [102][2530/3239]	Time 0.390 (0.628)	Data Time 0.002 (0.017)	Loss 2.2657 (2.1979)	Entropy 0.73706 (0.73949)	Top-1 acc 70.703 (71.695)	Top-5 acc 86.719 (88.600)	lr 0.00140
Train [102][2540/3239]	Time 2.642 (0.627)	Data Time 0.002 (0.017)	Loss 2.2218 (2.1977)	Entropy 0.73706 (0.73948)	Top-1 acc 70.312 (71.699)	Top-5 acc 87.891 (88.601)	lr 0.00139
Train [102][2550/3239]	Time 0.244 (0.626)	Data Time 0.002 (0.017)	Loss 2.0769 (2.1977)	Entropy 0.73691 (0.73947)	Top-1 acc 76.562 (71.701)	Top-5 acc 89.844 (88.600)	lr 0.00139
Train [102][2560/3239]	Time 0.234 (0.625)	Data Time 0.002 (0.017)	Loss 2.1026 (2.1974)	Entropy 0.73695 (0.73946)	Top-1 acc 74.219 (71.704)	Top-5 acc 91.406 (88.607)	lr 0.00139
Train [102][2570/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.017)	Loss 2.3972 (2.1974)	Entropy 0.73690 (0.73945)	Top-1 acc 68.750 (71.708)	Top-5 acc 86.719 (88.610)	lr 0.00139
Train [102][2580/3239]	Time 0.242 (0.624)	Data Time 0.001 (0.017)	Loss 2.2035 (2.1975)	Entropy 0.73688 (0.73944)	Top-1 acc 72.656 (71.703)	Top-5 acc 86.719 (88.610)	lr 0.00139
Train [102][2590/3239]	Time 0.242 (0.624)	Data Time 0.001 (0.017)	Loss 2.0844 (2.1973)	Entropy 0.73688 (0.73943)	Top-1 acc 75.781 (71.708)	Top-5 acc 91.797 (88.614)	lr 0.00139
Train [102][2600/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.017)	Loss 2.2059 (2.1974)	Entropy 0.73677 (0.73942)	Top-1 acc 74.219 (71.708)	Top-5 acc 86.719 (88.610)	lr 0.00139
Train [102][2610/3239]	Time 0.232 (0.622)	Data Time 0.001 (0.017)	Loss 2.2445 (2.1973)	Entropy 0.73682 (0.73941)	Top-1 acc 70.312 (71.711)	Top-5 acc 87.891 (88.610)	lr 0.00139
Train [102][2620/3239]	Time 0.229 (0.622)	Data Time 0.001 (0.017)	Loss 2.1416 (2.1974)	Entropy 0.73671 (0.73940)	Top-1 acc 72.656 (71.709)	Top-5 acc 90.234 (88.609)	lr 0.00139
Train [102][2630/3239]	Time 0.248 (0.621)	Data Time 0.001 (0.017)	Loss 2.2216 (2.1974)	Entropy 0.73665 (0.73939)	Top-1 acc 72.266 (71.711)	Top-5 acc 88.672 (88.609)	lr 0.00139
Train [102][2640/3239]	Time 0.222 (0.621)	Data Time 0.001 (0.017)	Loss 2.2682 (2.1973)	Entropy 0.73658 (0.73938)	Top-1 acc 68.750 (71.713)	Top-5 acc 86.328 (88.610)	lr 0.00139
Train [102][2650/3239]	Time 0.236 (0.620)	Data Time 0.001 (0.017)	Loss 2.1127 (2.1974)	Entropy 0.73657 (0.73937)	Top-1 acc 72.266 (71.709)	Top-5 acc 91.406 (88.610)	lr 0.00139
Train [102][2660/3239]	Time 0.401 (0.620)	Data Time 0.001 (0.017)	Loss 2.2260 (2.1975)	Entropy 0.73656 (0.73936)	Top-1 acc 69.141 (71.706)	Top-5 acc 87.109 (88.607)	lr 0.00139
Train [102][2670/3239]	Time 0.231 (0.619)	Data Time 0.001 (0.017)	Loss 2.1283 (2.1975)	Entropy 0.73648 (0.73935)	Top-1 acc 73.047 (71.707)	Top-5 acc 89.453 (88.607)	lr 0.00139
Train [102][2680/3239]	Time 0.240 (0.619)	Data Time 0.001 (0.017)	Loss 2.1605 (2.1976)	Entropy 0.73653 (0.73934)	Top-1 acc 76.562 (71.704)	Top-5 acc 89.062 (88.605)	lr 0.00139
Train [102][2690/3239]	Time 0.282 (0.618)	Data Time 0.001 (0.017)	Loss 2.1439 (2.1976)	Entropy 0.73653 (0.73933)	Top-1 acc 75.781 (71.705)	Top-5 acc 89.844 (88.604)	lr 0.00139
Train [102][2700/3239]	Time 0.310 (0.618)	Data Time 0.001 (0.016)	Loss 2.1978 (2.1978)	Entropy 0.73651 (0.73932)	Top-1 acc 73.047 (71.706)	Top-5 acc 87.500 (88.600)	lr 0.00139
Train [102][2710/3239]	Time 0.252 (0.617)	Data Time 0.001 (0.016)	Loss 2.0175 (2.1979)	Entropy 0.73657 (0.73931)	Top-1 acc 78.516 (71.709)	Top-5 acc 92.578 (88.599)	lr 0.00139
Train [102][2720/3239]	Time 0.294 (0.617)	Data Time 0.001 (0.016)	Loss 2.2060 (2.1978)	Entropy 0.73646 (0.73930)	Top-1 acc 71.484 (71.710)	Top-5 acc 89.453 (88.601)	lr 0.00139
Train [102][2730/3239]	Time 0.223 (0.616)	Data Time 0.001 (0.016)	Loss 2.1558 (2.1977)	Entropy 0.73644 (0.73929)	Top-1 acc 72.266 (71.709)	Top-5 acc 89.453 (88.601)	lr 0.00139
Train [102][2740/3239]	Time 0.247 (0.616)	Data Time 0.001 (0.016)	Loss 2.2208 (2.1979)	Entropy 0.73642 (0.73928)	Top-1 acc 68.359 (71.702)	Top-5 acc 87.500 (88.600)	lr 0.00139
Train [102][2750/3239]	Time 0.229 (0.615)	Data Time 0.001 (0.016)	Loss 2.3933 (2.1979)	Entropy 0.73631 (0.73927)	Top-1 acc 64.844 (71.700)	Top-5 acc 84.375 (88.600)	lr 0.00138
Train [102][2760/3239]	Time 0.236 (0.615)	Data Time 0.001 (0.016)	Loss 2.3459 (2.1981)	Entropy 0.73630 (0.73925)	Top-1 acc 68.359 (71.698)	Top-5 acc 85.547 (88.595)	lr 0.00138
Train [102][2770/3239]	Time 0.256 (0.614)	Data Time 0.001 (0.016)	Loss 2.1297 (2.1981)	Entropy 0.73627 (0.73924)	Top-1 acc 73.047 (71.699)	Top-5 acc 90.234 (88.596)	lr 0.00138
Train [102][2780/3239]	Time 0.267 (0.614)	Data Time 0.001 (0.016)	Loss 2.3049 (2.1983)	Entropy 0.73624 (0.73923)	Top-1 acc 71.094 (71.695)	Top-5 acc 87.500 (88.594)	lr 0.00138
Train [102][2790/3239]	Time 0.239 (0.613)	Data Time 0.001 (0.016)	Loss 2.3854 (2.1983)	Entropy 0.73618 (0.73922)	Top-1 acc 66.406 (71.698)	Top-5 acc 84.375 (88.595)	lr 0.00138
Train [102][2800/3239]	Time 0.249 (0.613)	Data Time 0.001 (0.016)	Loss 2.2477 (2.1982)	Entropy 0.73610 (0.73921)	Top-1 acc 72.656 (71.702)	Top-5 acc 86.719 (88.594)	lr 0.00138
Train [102][2810/3239]	Time 0.230 (0.613)	Data Time 0.001 (0.016)	Loss 2.2640 (2.1982)	Entropy 0.73609 (0.73920)	Top-1 acc 69.531 (71.700)	Top-5 acc 86.719 (88.594)	lr 0.00138
Train [102][2820/3239]	Time 0.219 (0.612)	Data Time 0.001 (0.016)	Loss 2.1920 (2.1980)	Entropy 0.73609 (0.73919)	Top-1 acc 68.750 (71.704)	Top-5 acc 89.453 (88.598)	lr 0.00138
Train [102][2830/3239]	Time 0.233 (0.612)	Data Time 0.001 (0.016)	Loss 2.2461 (2.1980)	Entropy 0.73607 (0.73918)	Top-1 acc 70.312 (71.706)	Top-5 acc 89.453 (88.601)	lr 0.00138
Train [102][2840/3239]	Time 0.278 (0.611)	Data Time 0.001 (0.016)	Loss 2.3240 (2.1981)	Entropy 0.73594 (0.73917)	Top-1 acc 65.234 (71.702)	Top-5 acc 86.328 (88.600)	lr 0.00138
Train [102][2850/3239]	Time 0.236 (0.611)	Data Time 0.001 (0.016)	Loss 2.2704 (2.1982)	Entropy 0.73600 (0.73916)	Top-1 acc 70.703 (71.698)	Top-5 acc 86.328 (88.600)	lr 0.00138
Train [102][2860/3239]	Time 0.259 (0.630)	Data Time 0.004 (0.016)	Loss 2.1772 (2.1981)	Entropy 0.73598 (0.73915)	Top-1 acc 71.094 (71.697)	Top-5 acc 88.281 (88.601)	lr 0.00138
Train [102][2870/3239]	Time 0.229 (0.630)	Data Time 0.002 (0.016)	Loss 2.0930 (2.1982)	Entropy 0.73590 (0.73913)	Top-1 acc 73.047 (71.693)	Top-5 acc 92.188 (88.601)	lr 0.00138
Train [102][2880/3239]	Time 0.234 (0.629)	Data Time 0.002 (0.016)	Loss 2.0720 (2.1980)	Entropy 0.73585 (0.73912)	Top-1 acc 73.047 (71.694)	Top-5 acc 91.016 (88.603)	lr 0.00138
Train [102][2890/3239]	Time 0.224 (0.629)	Data Time 0.002 (0.015)	Loss 2.0790 (2.1981)	Entropy 0.73580 (0.73911)	Top-1 acc 76.172 (71.695)	Top-5 acc 92.188 (88.600)	lr 0.00138
Train [102][2900/3239]	Time 0.265 (0.628)	Data Time 0.001 (0.015)	Loss 2.2161 (2.1981)	Entropy 0.73572 (0.73910)	Top-1 acc 70.703 (71.694)	Top-5 acc 86.719 (88.601)	lr 0.00138
Train [102][2910/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.015)	Loss 2.2391 (2.1980)	Entropy 0.73572 (0.73909)	Top-1 acc 69.922 (71.694)	Top-5 acc 87.891 (88.600)	lr 0.00138
Train [102][2920/3239]	Time 0.253 (0.627)	Data Time 0.001 (0.015)	Loss 2.7332 (2.1981)	Entropy 0.73573 (0.73908)	Top-1 acc 57.812 (71.694)	Top-5 acc 80.078 (88.598)	lr 0.00138
Train [102][2930/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.015)	Loss 2.0730 (2.1981)	Entropy 0.73572 (0.73907)	Top-1 acc 77.344 (71.697)	Top-5 acc 89.062 (88.600)	lr 0.00138
Train [102][2940/3239]	Time 0.260 (0.626)	Data Time 0.001 (0.015)	Loss 2.2574 (2.1980)	Entropy 0.73572 (0.73905)	Top-1 acc 69.531 (71.702)	Top-5 acc 89.453 (88.602)	lr 0.00138
Train [102][2950/3239]	Time 0.333 (0.626)	Data Time 0.001 (0.015)	Loss 2.0836 (2.1980)	Entropy 0.73572 (0.73904)	Top-1 acc 74.219 (71.699)	Top-5 acc 88.672 (88.601)	lr 0.00138
Train [102][2960/3239]	Time 0.234 (0.625)	Data Time 0.002 (0.015)	Loss 2.4005 (2.1980)	Entropy 0.73566 (0.73903)	Top-1 acc 67.188 (71.698)	Top-5 acc 85.938 (88.602)	lr 0.00138
Train [102][2970/3239]	Time 0.258 (0.625)	Data Time 0.001 (0.015)	Loss 2.2179 (2.1980)	Entropy 0.73565 (0.73902)	Top-1 acc 69.922 (71.700)	Top-5 acc 88.672 (88.603)	lr 0.00137
Train [102][2980/3239]	Time 0.265 (0.624)	Data Time 0.001 (0.015)	Loss 2.2345 (2.1981)	Entropy 0.73560 (0.73901)	Top-1 acc 73.438 (71.701)	Top-5 acc 87.891 (88.600)	lr 0.00137
Train [102][2990/3239]	Time 0.328 (0.624)	Data Time 0.002 (0.015)	Loss 2.1629 (2.1981)	Entropy 0.73560 (0.73900)	Top-1 acc 74.609 (71.702)	Top-5 acc 88.672 (88.601)	lr 0.00137
Train [102][3000/3239]	Time 0.224 (0.623)	Data Time 0.001 (0.015)	Loss 2.2039 (2.1982)	Entropy 0.73560 (0.73899)	Top-1 acc 71.484 (71.702)	Top-5 acc 87.500 (88.598)	lr 0.00137
Train [102][3010/3239]	Time 0.245 (0.623)	Data Time 0.001 (0.015)	Loss 2.2369 (2.1981)	Entropy 0.73557 (0.73897)	Top-1 acc 69.531 (71.703)	Top-5 acc 87.891 (88.601)	lr 0.00137
Train [102][3020/3239]	Time 0.236 (0.622)	Data Time 0.001 (0.015)	Loss 2.1977 (2.1982)	Entropy 0.73559 (0.73896)	Top-1 acc 70.703 (71.701)	Top-5 acc 89.062 (88.598)	lr 0.00137
Train [102][3030/3239]	Time 0.331 (0.622)	Data Time 0.001 (0.015)	Loss 2.1016 (2.1983)	Entropy 0.73557 (0.73895)	Top-1 acc 74.219 (71.701)	Top-5 acc 91.406 (88.598)	lr 0.00137
Train [102][3040/3239]	Time 0.237 (0.622)	Data Time 0.001 (0.015)	Loss 2.2305 (2.1983)	Entropy 0.73553 (0.73894)	Top-1 acc 70.312 (71.698)	Top-5 acc 87.500 (88.598)	lr 0.00137
Train [102][3050/3239]	Time 0.236 (0.621)	Data Time 0.001 (0.015)	Loss 2.2001 (2.1983)	Entropy 0.73539 (0.73893)	Top-1 acc 73.047 (71.694)	Top-5 acc 87.500 (88.598)	lr 0.00137
Train [102][3060/3239]	Time 0.246 (0.621)	Data Time 0.001 (0.015)	Loss 2.3844 (2.1984)	Entropy 0.73535 (0.73892)	Top-1 acc 65.234 (71.691)	Top-5 acc 87.109 (88.598)	lr 0.00137
Train [102][3070/3239]	Time 0.270 (0.620)	Data Time 0.001 (0.015)	Loss 2.1690 (2.1985)	Entropy 0.73531 (0.73891)	Top-1 acc 71.094 (71.688)	Top-5 acc 89.844 (88.597)	lr 0.00137
Train [102][3080/3239]	Time 0.235 (0.620)	Data Time 0.001 (0.015)	Loss 2.2215 (2.1985)	Entropy 0.73522 (0.73890)	Top-1 acc 71.094 (71.686)	Top-5 acc 88.281 (88.595)	lr 0.00137
Train [102][3090/3239]	Time 0.263 (0.619)	Data Time 0.002 (0.015)	Loss 2.0227 (2.1985)	Entropy 0.73523 (0.73888)	Top-1 acc 78.516 (71.690)	Top-5 acc 92.188 (88.596)	lr 0.00137
Train [102][3100/3239]	Time 0.255 (0.619)	Data Time 0.001 (0.015)	Loss 2.3235 (2.1985)	Entropy 0.73526 (0.73887)	Top-1 acc 67.188 (71.690)	Top-5 acc 88.281 (88.595)	lr 0.00137
Train [102][3110/3239]	Time 0.279 (0.619)	Data Time 0.002 (0.015)	Loss 2.3394 (2.1986)	Entropy 0.73518 (0.73886)	Top-1 acc 66.406 (71.688)	Top-5 acc 86.719 (88.594)	lr 0.00137
Train [102][3120/3239]	Time 0.229 (0.618)	Data Time 0.001 (0.014)	Loss 2.1958 (2.1985)	Entropy 0.73523 (0.73885)	Top-1 acc 71.094 (71.688)	Top-5 acc 87.109 (88.594)	lr 0.00137
Train [102][3130/3239]	Time 0.227 (0.618)	Data Time 0.001 (0.014)	Loss 2.2197 (2.1986)	Entropy 0.73524 (0.73884)	Top-1 acc 73.047 (71.686)	Top-5 acc 85.938 (88.592)	lr 0.00137
Train [102][3140/3239]	Time 0.228 (0.617)	Data Time 0.001 (0.014)	Loss 2.2875 (2.1987)	Entropy 0.73518 (0.73883)	Top-1 acc 68.750 (71.679)	Top-5 acc 88.281 (88.590)	lr 0.00137
Train [102][3150/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.014)	Loss 2.2573 (2.1988)	Entropy 0.73517 (0.73881)	Top-1 acc 71.875 (71.679)	Top-5 acc 86.719 (88.589)	lr 0.00137
Train [102][3160/3239]	Time 0.261 (0.616)	Data Time 0.001 (0.014)	Loss 2.3366 (2.1988)	Entropy 0.73517 (0.73880)	Top-1 acc 68.359 (71.677)	Top-5 acc 83.203 (88.589)	lr 0.00137
Train [102][3170/3239]	Time 0.245 (0.616)	Data Time 0.001 (0.014)	Loss 2.0788 (2.1988)	Entropy 0.73512 (0.73879)	Top-1 acc 72.656 (71.675)	Top-5 acc 91.406 (88.588)	lr 0.00137
Train [102][3180/3239]	Time 0.236 (0.616)	Data Time 0.000 (0.014)	Loss 2.2305 (2.1988)	Entropy 0.73509 (0.73878)	Top-1 acc 67.969 (71.676)	Top-5 acc 89.453 (88.591)	lr 0.00137
Train [102][3190/3239]	Time 0.270 (0.630)	Data Time 0.000 (0.014)	Loss 2.0421 (2.1987)	Entropy 0.73511 (0.73877)	Top-1 acc 76.172 (71.677)	Top-5 acc 90.234 (88.593)	lr 0.00136
Train [102][3200/3239]	Time 0.237 (0.630)	Data Time 0.000 (0.014)	Loss 2.2243 (2.1988)	Entropy 0.73508 (0.73876)	Top-1 acc 71.484 (71.675)	Top-5 acc 85.938 (88.587)	lr 0.00136
Train [102][3210/3239]	Time 0.224 (0.630)	Data Time 0.000 (0.014)	Loss 2.0454 (2.1988)	Entropy 0.73507 (0.73874)	Top-1 acc 76.953 (71.675)	Top-5 acc 90.625 (88.586)	lr 0.00136
Train [102][3220/3239]	Time 0.239 (0.629)	Data Time 0.000 (0.014)	Loss 2.1228 (2.1987)	Entropy 0.73499 (0.73873)	Top-1 acc 72.266 (71.679)	Top-5 acc 89.844 (88.587)	lr 0.00136
Train [102][3230/3239]	Time 0.245 (0.629)	Data Time 0.000 (0.014)	Loss 2.2638 (2.1988)	Entropy 0.73489 (0.73872)	Top-1 acc 71.094 (71.679)	Top-5 acc 86.719 (88.586)	lr 0.00136
Train [102][3239/3239]	Time 2.346 (0.628)	Data Time 0.000 (0.014)	Loss 2.5884 (2.1989)	Entropy 0.73489 (0.73871)	Top-1 acc 60.494 (71.675)	Top-5 acc 86.420 (88.586)	lr 0.00136
==========Valid [102/120]	loss 1.215	top-1 acc 72.086 (72.172)	top-5 acc 89.712	Train top-1 71.675	top-5 88.586	Entropy 0.73489	Latency-None: 0.000ms	Flops: 546.53M
Train [103][0/3239]	Time 42.736 (42.736)	Data Time 41.013 (41.013)	Loss 2.1394 (2.1394)	Entropy 0.73487 (0.73487)	Top-1 acc 71.094 (71.094)	Top-5 acc 90.234 (90.234)	lr 0.00136
Train [103][10/3239]	Time 2.644 (4.428)	Data Time 0.002 (3.731)	Loss 2.1121 (2.1708)	Entropy 0.73487 (0.73487)	Top-1 acc 73.438 (72.408)	Top-5 acc 89.844 (88.672)	lr 0.00136
Train [103][20/3239]	Time 0.256 (2.443)	Data Time 0.002 (1.955)	Loss 2.2091 (2.1831)	Entropy 0.73460 (0.73474)	Top-1 acc 72.266 (72.303)	Top-5 acc 88.672 (88.653)	lr 0.00136
Train [103][30/3239]	Time 0.233 (1.814)	Data Time 0.001 (1.325)	Loss 2.1459 (2.1843)	Entropy 0.73447 (0.73466)	Top-1 acc 73.047 (72.366)	Top-5 acc 89.453 (88.609)	lr 0.00136
Train [103][40/3239]	Time 0.245 (1.491)	Data Time 0.001 (1.002)	Loss 2.2739 (2.1863)	Entropy 0.73446 (0.73461)	Top-1 acc 69.141 (72.170)	Top-5 acc 85.156 (88.529)	lr 0.00136
Train [103][50/3239]	Time 0.242 (1.294)	Data Time 0.001 (0.806)	Loss 2.1222 (2.1885)	Entropy 0.73446 (0.73458)	Top-1 acc 76.953 (72.258)	Top-5 acc 90.234 (88.557)	lr 0.00136
Train [103][60/3239]	Time 0.237 (1.163)	Data Time 0.001 (0.674)	Loss 2.0932 (2.1825)	Entropy 0.73442 (0.73456)	Top-1 acc 73.438 (72.291)	Top-5 acc 91.016 (88.697)	lr 0.00136
Train [103][70/3239]	Time 0.241 (1.067)	Data Time 0.001 (0.579)	Loss 2.2750 (2.1897)	Entropy 0.73441 (0.73454)	Top-1 acc 65.234 (72.046)	Top-5 acc 85.156 (88.490)	lr 0.00136
Train [103][80/3239]	Time 0.256 (0.995)	Data Time 0.001 (0.508)	Loss 2.1679 (2.1963)	Entropy 0.73439 (0.73452)	Top-1 acc 73.047 (71.885)	Top-5 acc 88.281 (88.373)	lr 0.00136
Train [103][90/3239]	Time 0.242 (0.940)	Data Time 0.001 (0.452)	Loss 2.2949 (2.1960)	Entropy 0.73432 (0.73450)	Top-1 acc 75.000 (71.957)	Top-5 acc 88.672 (88.462)	lr 0.00136
Train [103][100/3239]	Time 0.236 (0.896)	Data Time 0.001 (0.408)	Loss 2.2493 (2.1957)	Entropy 0.73431 (0.73448)	Top-1 acc 71.094 (71.948)	Top-5 acc 88.281 (88.428)	lr 0.00136
Train [103][110/3239]	Time 0.237 (0.860)	Data Time 0.001 (0.371)	Loss 2.1920 (2.1952)	Entropy 0.73434 (0.73447)	Top-1 acc 72.656 (71.819)	Top-5 acc 90.625 (88.471)	lr 0.00136
Train [103][120/3239]	Time 2.670 (0.830)	Data Time 0.001 (0.341)	Loss 2.1726 (2.1965)	Entropy 0.73434 (0.73446)	Top-1 acc 72.266 (71.710)	Top-5 acc 89.062 (88.462)	lr 0.00136
Train [103][130/3239]	Time 0.329 (0.786)	Data Time 0.001 (0.315)	Loss 2.2224 (2.1970)	Entropy 0.73439 (0.73445)	Top-1 acc 71.094 (71.687)	Top-5 acc 89.453 (88.484)	lr 0.00136
Train [103][140/3239]	Time 0.238 (0.764)	Data Time 0.001 (0.293)	Loss 2.1199 (2.1962)	Entropy 0.73453 (0.73446)	Top-1 acc 72.656 (71.698)	Top-5 acc 92.188 (88.481)	lr 0.00136
Train [103][150/3239]	Time 0.232 (0.744)	Data Time 0.001 (0.273)	Loss 2.1413 (2.1959)	Entropy 0.73449 (0.73446)	Top-1 acc 73.047 (71.707)	Top-5 acc 89.453 (88.465)	lr 0.00136
Train [103][160/3239]	Time 0.254 (0.728)	Data Time 0.002 (0.256)	Loss 2.2517 (2.1979)	Entropy 0.73443 (0.73446)	Top-1 acc 71.875 (71.632)	Top-5 acc 87.109 (88.400)	lr 0.00136
Train [103][170/3239]	Time 0.320 (0.714)	Data Time 0.001 (0.242)	Loss 2.1675 (2.1959)	Entropy 0.73444 (0.73446)	Top-1 acc 72.266 (71.640)	Top-5 acc 90.625 (88.459)	lr 0.00135
Train [103][180/3239]	Time 0.225 (0.702)	Data Time 0.001 (0.228)	Loss 2.1001 (2.1937)	Entropy 0.73445 (0.73446)	Top-1 acc 76.172 (71.724)	Top-5 acc 86.719 (88.488)	lr 0.00135
Train [103][190/3239]	Time 0.228 (0.690)	Data Time 0.001 (0.216)	Loss 2.1103 (2.1928)	Entropy 0.73440 (0.73446)	Top-1 acc 73.828 (71.720)	Top-5 acc 89.453 (88.480)	lr 0.00135
Train [103][200/3239]	Time 0.230 (0.680)	Data Time 0.001 (0.206)	Loss 2.1711 (2.1928)	Entropy 0.73434 (0.73445)	Top-1 acc 72.656 (71.673)	Top-5 acc 90.625 (88.503)	lr 0.00135
Train [103][210/3239]	Time 0.331 (0.670)	Data Time 0.001 (0.196)	Loss 2.0032 (2.1898)	Entropy 0.73434 (0.73445)	Top-1 acc 77.734 (71.805)	Top-5 acc 90.625 (88.533)	lr 0.00135
Train [103][220/3239]	Time 0.226 (0.662)	Data Time 0.001 (0.187)	Loss 2.1648 (2.1887)	Entropy 0.73432 (0.73444)	Top-1 acc 75.000 (71.871)	Top-5 acc 89.062 (88.562)	lr 0.00135
Train [103][230/3239]	Time 2.614 (0.654)	Data Time 0.002 (0.179)	Loss 2.1204 (2.1877)	Entropy 0.73432 (0.73444)	Top-1 acc 71.875 (71.887)	Top-5 acc 91.406 (88.586)	lr 0.00135
Train [103][240/3239]	Time 0.237 (0.637)	Data Time 0.001 (0.172)	Loss 2.2456 (2.1871)	Entropy 0.73432 (0.73443)	Top-1 acc 73.047 (71.925)	Top-5 acc 88.281 (88.591)	lr 0.00135
Train [103][250/3239]	Time 0.246 (0.630)	Data Time 0.001 (0.165)	Loss 2.2545 (2.1872)	Entropy 0.73436 (0.73443)	Top-1 acc 70.703 (71.914)	Top-5 acc 87.500 (88.633)	lr 0.00135
Train [103][260/3239]	Time 0.242 (0.625)	Data Time 0.002 (0.159)	Loss 2.2108 (2.1881)	Entropy 0.73437 (0.73443)	Top-1 acc 73.828 (71.927)	Top-5 acc 89.453 (88.621)	lr 0.00135
Train [103][270/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.153)	Loss 2.2645 (2.1879)	Entropy 0.73439 (0.73443)	Top-1 acc 71.094 (71.921)	Top-5 acc 87.109 (88.643)	lr 0.00135
Train [103][280/3239]	Time 0.222 (0.614)	Data Time 0.001 (0.148)	Loss 2.2558 (2.1887)	Entropy 0.73431 (0.73442)	Top-1 acc 67.188 (71.888)	Top-5 acc 86.719 (88.644)	lr 0.00135
Train [103][290/3239]	Time 0.258 (0.611)	Data Time 0.001 (0.143)	Loss 2.1665 (2.1894)	Entropy 0.73426 (0.73442)	Top-1 acc 72.656 (71.890)	Top-5 acc 89.062 (88.626)	lr 0.00135
Train [103][300/3239]	Time 0.315 (0.607)	Data Time 0.001 (0.138)	Loss 2.2024 (2.1906)	Entropy 0.73429 (0.73441)	Top-1 acc 69.922 (71.866)	Top-5 acc 89.844 (88.611)	lr 0.00135
Train [103][310/3239]	Time 0.241 (0.791)	Data Time 0.002 (0.133)	Loss 2.3372 (2.1899)	Entropy 0.73428 (0.73441)	Top-1 acc 69.922 (71.875)	Top-5 acc 86.328 (88.635)	lr 0.00135
Train [103][320/3239]	Time 0.228 (0.781)	Data Time 0.002 (0.129)	Loss 2.2458 (2.1879)	Entropy 0.73428 (0.73440)	Top-1 acc 73.047 (71.918)	Top-5 acc 85.547 (88.660)	lr 0.00135
Train [103][330/3239]	Time 0.231 (0.772)	Data Time 0.002 (0.126)	Loss 2.2077 (2.1878)	Entropy 0.73427 (0.73440)	Top-1 acc 70.312 (71.926)	Top-5 acc 87.891 (88.666)	lr 0.00135
Train [103][340/3239]	Time 2.675 (0.764)	Data Time 0.001 (0.122)	Loss 2.1879 (2.1871)	Entropy 0.73427 (0.73440)	Top-1 acc 70.703 (71.941)	Top-5 acc 87.500 (88.675)	lr 0.00135
Train [103][350/3239]	Time 0.236 (0.749)	Data Time 0.002 (0.118)	Loss 2.0877 (2.1873)	Entropy 0.73428 (0.73439)	Top-1 acc 77.344 (71.947)	Top-5 acc 91.406 (88.666)	lr 0.00135
Train [103][360/3239]	Time 0.236 (0.741)	Data Time 0.001 (0.115)	Loss 2.4504 (2.1876)	Entropy 0.73430 (0.73439)	Top-1 acc 67.188 (71.932)	Top-5 acc 83.203 (88.643)	lr 0.00135
Train [103][370/3239]	Time 0.230 (0.734)	Data Time 0.001 (0.112)	Loss 2.1492 (2.1874)	Entropy 0.73426 (0.73439)	Top-1 acc 73.047 (71.943)	Top-5 acc 87.500 (88.629)	lr 0.00135
Train [103][380/3239]	Time 0.316 (0.727)	Data Time 0.001 (0.109)	Loss 2.2474 (2.1882)	Entropy 0.73429 (0.73438)	Top-1 acc 73.438 (71.933)	Top-5 acc 87.500 (88.610)	lr 0.00135
Train [103][390/3239]	Time 0.242 (0.721)	Data Time 0.001 (0.107)	Loss 2.0656 (2.1890)	Entropy 0.73428 (0.73438)	Top-1 acc 75.391 (71.912)	Top-5 acc 87.891 (88.583)	lr 0.00134
Train [103][400/3239]	Time 0.222 (0.715)	Data Time 0.001 (0.104)	Loss 2.1043 (2.1880)	Entropy 0.73422 (0.73438)	Top-1 acc 72.656 (71.934)	Top-5 acc 91.406 (88.608)	lr 0.00134
Train [103][410/3239]	Time 0.232 (0.710)	Data Time 0.001 (0.101)	Loss 2.1267 (2.1877)	Entropy 0.73421 (0.73437)	Top-1 acc 71.875 (71.943)	Top-5 acc 88.672 (88.599)	lr 0.00134
Train [103][420/3239]	Time 0.336 (0.705)	Data Time 0.001 (0.099)	Loss 2.1559 (2.1877)	Entropy 0.73423 (0.73437)	Top-1 acc 70.703 (71.934)	Top-5 acc 88.672 (88.614)	lr 0.00134
Train [103][430/3239]	Time 0.228 (0.700)	Data Time 0.001 (0.097)	Loss 2.1206 (2.1875)	Entropy 0.73421 (0.73437)	Top-1 acc 76.562 (71.965)	Top-5 acc 91.797 (88.612)	lr 0.00134
Train [103][440/3239]	Time 0.217 (0.695)	Data Time 0.001 (0.095)	Loss 2.2161 (2.1870)	Entropy 0.73418 (0.73436)	Top-1 acc 71.875 (71.974)	Top-5 acc 87.109 (88.623)	lr 0.00134
Train [103][450/3239]	Time 2.516 (0.690)	Data Time 0.001 (0.093)	Loss 2.0898 (2.1873)	Entropy 0.73418 (0.73436)	Top-1 acc 75.781 (71.976)	Top-5 acc 91.016 (88.625)	lr 0.00134
Train [103][460/3239]	Time 0.241 (0.680)	Data Time 0.001 (0.091)	Loss 2.2553 (2.1879)	Entropy 0.73421 (0.73436)	Top-1 acc 69.531 (71.941)	Top-5 acc 87.109 (88.629)	lr 0.00134
Train [103][470/3239]	Time 0.259 (0.676)	Data Time 0.001 (0.089)	Loss 2.1509 (2.1872)	Entropy 0.73420 (0.73435)	Top-1 acc 73.438 (71.970)	Top-5 acc 90.234 (88.646)	lr 0.00134
Train [103][480/3239]	Time 0.235 (0.672)	Data Time 0.001 (0.087)	Loss 2.0812 (2.1882)	Entropy 0.73417 (0.73435)	Top-1 acc 77.344 (71.935)	Top-5 acc 89.844 (88.637)	lr 0.00134
Train [103][490/3239]	Time 0.230 (0.668)	Data Time 0.001 (0.085)	Loss 2.2723 (2.1880)	Entropy 0.73408 (0.73434)	Top-1 acc 69.922 (71.933)	Top-5 acc 86.328 (88.634)	lr 0.00134
Train [103][500/3239]	Time 0.227 (0.664)	Data Time 0.001 (0.083)	Loss 2.1829 (2.1893)	Entropy 0.73409 (0.73434)	Top-1 acc 71.094 (71.898)	Top-5 acc 90.234 (88.610)	lr 0.00134
Train [103][510/3239]	Time 0.316 (0.661)	Data Time 0.001 (0.082)	Loss 2.1997 (2.1889)	Entropy 0.73392 (0.73433)	Top-1 acc 70.312 (71.900)	Top-5 acc 87.109 (88.623)	lr 0.00134
Train [103][520/3239]	Time 0.222 (0.657)	Data Time 0.001 (0.080)	Loss 2.1207 (2.1888)	Entropy 0.73391 (0.73432)	Top-1 acc 73.047 (71.897)	Top-5 acc 88.281 (88.624)	lr 0.00134
Train [103][530/3239]	Time 0.235 (0.654)	Data Time 0.001 (0.079)	Loss 2.1460 (2.1890)	Entropy 0.73390 (0.73432)	Top-1 acc 74.219 (71.886)	Top-5 acc 90.625 (88.627)	lr 0.00134
Train [103][540/3239]	Time 0.219 (0.651)	Data Time 0.001 (0.077)	Loss 2.2591 (2.1892)	Entropy 0.73393 (0.73431)	Top-1 acc 69.922 (71.878)	Top-5 acc 87.109 (88.629)	lr 0.00134
Train [103][550/3239]	Time 0.324 (0.648)	Data Time 0.001 (0.076)	Loss 2.2156 (2.1890)	Entropy 0.73390 (0.73430)	Top-1 acc 71.875 (71.877)	Top-5 acc 86.719 (88.625)	lr 0.00134
Train [103][560/3239]	Time 2.596 (0.645)	Data Time 0.001 (0.075)	Loss 2.2025 (2.1882)	Entropy 0.73390 (0.73429)	Top-1 acc 73.828 (71.892)	Top-5 acc 87.109 (88.644)	lr 0.00134
Train [103][570/3239]	Time 0.248 (0.638)	Data Time 0.001 (0.073)	Loss 2.1486 (2.1883)	Entropy 0.73392 (0.73429)	Top-1 acc 73.047 (71.875)	Top-5 acc 88.281 (88.640)	lr 0.00134
Train [103][580/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.072)	Loss 2.1404 (2.1889)	Entropy 0.73391 (0.73428)	Top-1 acc 73.828 (71.853)	Top-5 acc 89.844 (88.634)	lr 0.00134
Train [103][590/3239]	Time 0.315 (0.633)	Data Time 0.001 (0.071)	Loss 2.0515 (2.1882)	Entropy 0.73386 (0.73428)	Top-1 acc 78.125 (71.874)	Top-5 acc 92.969 (88.648)	lr 0.00134
Train [103][600/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.070)	Loss 2.1769 (2.1876)	Entropy 0.73383 (0.73427)	Top-1 acc 72.656 (71.882)	Top-5 acc 88.672 (88.663)	lr 0.00134
Train [103][610/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.069)	Loss 2.3120 (2.1882)	Entropy 0.73383 (0.73426)	Top-1 acc 66.406 (71.866)	Top-5 acc 86.328 (88.658)	lr 0.00133
Train [103][620/3239]	Time 0.224 (0.625)	Data Time 0.001 (0.068)	Loss 2.2962 (2.1874)	Entropy 0.73383 (0.73425)	Top-1 acc 69.141 (71.903)	Top-5 acc 88.281 (88.667)	lr 0.00133
Train [103][630/3239]	Time 0.248 (0.623)	Data Time 0.001 (0.067)	Loss 2.3609 (2.1882)	Entropy 0.73389 (0.73425)	Top-1 acc 66.016 (71.879)	Top-5 acc 84.375 (88.652)	lr 0.00133
Train [103][640/3239]	Time 0.219 (0.621)	Data Time 0.001 (0.066)	Loss 2.0690 (2.1884)	Entropy 0.73389 (0.73424)	Top-1 acc 74.219 (71.872)	Top-5 acc 91.016 (88.651)	lr 0.00133
Train [103][650/3239]	Time 0.245 (0.618)	Data Time 0.002 (0.065)	Loss 2.1535 (2.1880)	Entropy 0.73389 (0.73424)	Top-1 acc 70.703 (71.877)	Top-5 acc 91.016 (88.665)	lr 0.00133
Train [103][660/3239]	Time 0.237 (0.616)	Data Time 0.001 (0.064)	Loss 2.3059 (2.1880)	Entropy 0.73388 (0.73423)	Top-1 acc 68.750 (71.874)	Top-5 acc 87.109 (88.671)	lr 0.00133
Train [103][670/3239]	Time 53.719 (0.690)	Data Time 0.001 (0.063)	Loss 2.1767 (2.1874)	Entropy 0.73388 (0.73423)	Top-1 acc 71.484 (71.888)	Top-5 acc 89.453 (88.694)	lr 0.00133
Train [103][680/3239]	Time 0.418 (0.685)	Data Time 0.129 (0.062)	Loss 2.0629 (2.1877)	Entropy 0.73382 (0.73422)	Top-1 acc 77.734 (71.895)	Top-5 acc 90.625 (88.687)	lr 0.00133
Train [103][690/3239]	Time 0.230 (0.682)	Data Time 0.002 (0.061)	Loss 2.1824 (2.1875)	Entropy 0.73377 (0.73421)	Top-1 acc 74.219 (71.899)	Top-5 acc 88.672 (88.684)	lr 0.00133
Train [103][700/3239]	Time 0.234 (0.680)	Data Time 0.001 (0.060)	Loss 2.1633 (2.1871)	Entropy 0.73374 (0.73421)	Top-1 acc 71.484 (71.912)	Top-5 acc 87.109 (88.689)	lr 0.00133
Train [103][710/3239]	Time 0.240 (0.677)	Data Time 0.001 (0.059)	Loss 2.0668 (2.1872)	Entropy 0.73368 (0.73420)	Top-1 acc 75.391 (71.913)	Top-5 acc 90.234 (88.694)	lr 0.00133
Train [103][720/3239]	Time 0.236 (0.674)	Data Time 0.001 (0.059)	Loss 2.4015 (2.1877)	Entropy 0.73364 (0.73419)	Top-1 acc 67.969 (71.902)	Top-5 acc 85.547 (88.695)	lr 0.00133
Train [103][730/3239]	Time 0.233 (0.671)	Data Time 0.001 (0.058)	Loss 2.2869 (2.1884)	Entropy 0.73356 (0.73418)	Top-1 acc 72.266 (71.891)	Top-5 acc 88.672 (88.687)	lr 0.00133
Train [103][740/3239]	Time 0.229 (0.669)	Data Time 0.001 (0.057)	Loss 2.0992 (2.1886)	Entropy 0.73350 (0.73418)	Top-1 acc 76.172 (71.898)	Top-5 acc 89.453 (88.675)	lr 0.00133
Train [103][750/3239]	Time 0.226 (0.666)	Data Time 0.001 (0.056)	Loss 2.1446 (2.1886)	Entropy 0.73343 (0.73417)	Top-1 acc 68.750 (71.893)	Top-5 acc 89.844 (88.678)	lr 0.00133
Train [103][760/3239]	Time 0.325 (0.664)	Data Time 0.001 (0.056)	Loss 2.2630 (2.1889)	Entropy 0.73341 (0.73416)	Top-1 acc 69.531 (71.893)	Top-5 acc 88.672 (88.676)	lr 0.00133
Train [103][770/3239]	Time 0.231 (0.662)	Data Time 0.001 (0.055)	Loss 2.0912 (2.1887)	Entropy 0.73334 (0.73415)	Top-1 acc 76.172 (71.901)	Top-5 acc 89.844 (88.680)	lr 0.00133
Train [103][780/3239]	Time 2.630 (0.659)	Data Time 0.001 (0.054)	Loss 2.2624 (2.1895)	Entropy 0.73334 (0.73414)	Top-1 acc 70.312 (71.880)	Top-5 acc 87.500 (88.667)	lr 0.00133
Train [103][790/3239]	Time 0.306 (0.654)	Data Time 0.002 (0.054)	Loss 2.2803 (2.1891)	Entropy 0.73326 (0.73413)	Top-1 acc 72.656 (71.882)	Top-5 acc 85.156 (88.675)	lr 0.00133
Train [103][800/3239]	Time 0.223 (0.652)	Data Time 0.001 (0.053)	Loss 2.3394 (2.1895)	Entropy 0.73325 (0.73411)	Top-1 acc 67.578 (71.880)	Top-5 acc 87.109 (88.674)	lr 0.00133
Train [103][810/3239]	Time 0.249 (0.650)	Data Time 0.001 (0.052)	Loss 2.1498 (2.1897)	Entropy 0.73314 (0.73410)	Top-1 acc 72.656 (71.886)	Top-5 acc 89.453 (88.665)	lr 0.00133
Train [103][820/3239]	Time 0.246 (0.648)	Data Time 0.001 (0.052)	Loss 2.1853 (2.1895)	Entropy 0.73321 (0.73409)	Top-1 acc 73.438 (71.896)	Top-5 acc 89.844 (88.668)	lr 0.00133
Train [103][830/3239]	Time 0.232 (0.646)	Data Time 0.001 (0.051)	Loss 2.1626 (2.1891)	Entropy 0.73320 (0.73408)	Top-1 acc 71.875 (71.902)	Top-5 acc 91.016 (88.682)	lr 0.00132
Train [103][840/3239]	Time 0.221 (0.644)	Data Time 0.001 (0.051)	Loss 2.2402 (2.1891)	Entropy 0.73313 (0.73407)	Top-1 acc 71.484 (71.910)	Top-5 acc 88.281 (88.682)	lr 0.00132
Train [103][850/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.050)	Loss 2.0592 (2.1884)	Entropy 0.73310 (0.73406)	Top-1 acc 73.438 (71.927)	Top-5 acc 92.578 (88.703)	lr 0.00132
Train [103][860/3239]	Time 0.225 (0.640)	Data Time 0.001 (0.049)	Loss 2.1164 (2.1883)	Entropy 0.73312 (0.73405)	Top-1 acc 73.047 (71.927)	Top-5 acc 89.844 (88.711)	lr 0.00132
Train [103][870/3239]	Time 0.247 (0.638)	Data Time 0.001 (0.049)	Loss 2.2878 (2.1886)	Entropy 0.73303 (0.73404)	Top-1 acc 70.703 (71.917)	Top-5 acc 87.500 (88.710)	lr 0.00132
Train [103][880/3239]	Time 0.229 (0.636)	Data Time 0.001 (0.048)	Loss 2.2439 (2.1887)	Entropy 0.73299 (0.73403)	Top-1 acc 70.703 (71.907)	Top-5 acc 89.062 (88.718)	lr 0.00132
Train [103][890/3239]	Time 2.653 (0.635)	Data Time 0.002 (0.048)	Loss 2.0441 (2.1886)	Entropy 0.73299 (0.73401)	Top-1 acc 76.172 (71.913)	Top-5 acc 91.016 (88.721)	lr 0.00132
Train [103][900/3239]	Time 0.225 (0.630)	Data Time 0.001 (0.047)	Loss 2.2331 (2.1884)	Entropy 0.73301 (0.73400)	Top-1 acc 70.312 (71.922)	Top-5 acc 86.328 (88.714)	lr 0.00132
Train [103][910/3239]	Time 0.236 (0.629)	Data Time 0.001 (0.047)	Loss 2.2193 (2.1893)	Entropy 0.73299 (0.73399)	Top-1 acc 72.656 (71.891)	Top-5 acc 87.891 (88.709)	lr 0.00132
Train [103][920/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.046)	Loss 2.2378 (2.1896)	Entropy 0.73288 (0.73398)	Top-1 acc 70.312 (71.879)	Top-5 acc 88.672 (88.702)	lr 0.00132
Train [103][930/3239]	Time 0.235 (0.626)	Data Time 0.001 (0.046)	Loss 2.1398 (2.1903)	Entropy 0.73285 (0.73397)	Top-1 acc 68.359 (71.858)	Top-5 acc 90.234 (88.691)	lr 0.00132
Train [103][940/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.045)	Loss 2.3794 (2.1911)	Entropy 0.73289 (0.73396)	Top-1 acc 69.531 (71.838)	Top-5 acc 87.500 (88.673)	lr 0.00132
Train [103][950/3239]	Time 0.251 (0.623)	Data Time 0.002 (0.045)	Loss 2.0556 (2.1907)	Entropy 0.73288 (0.73394)	Top-1 acc 75.000 (71.849)	Top-5 acc 91.406 (88.677)	lr 0.00132
Train [103][960/3239]	Time 0.216 (0.621)	Data Time 0.001 (0.044)	Loss 2.3301 (2.1909)	Entropy 0.73288 (0.73393)	Top-1 acc 69.922 (71.847)	Top-5 acc 84.766 (88.669)	lr 0.00132
Train [103][970/3239]	Time 0.328 (0.620)	Data Time 0.002 (0.044)	Loss 2.1648 (2.1907)	Entropy 0.73287 (0.73392)	Top-1 acc 69.922 (71.854)	Top-5 acc 91.797 (88.674)	lr 0.00132
Train [103][980/3239]	Time 0.234 (0.618)	Data Time 0.001 (0.044)	Loss 2.2360 (2.1910)	Entropy 0.73289 (0.73391)	Top-1 acc 68.359 (71.848)	Top-5 acc 88.672 (88.668)	lr 0.00132
Train [103][990/3239]	Time 0.238 (0.617)	Data Time 0.001 (0.043)	Loss 2.1993 (2.1908)	Entropy 0.73287 (0.73390)	Top-1 acc 72.656 (71.858)	Top-5 acc 88.672 (88.671)	lr 0.00132
Train [103][1000/3239]	Time 2.630 (0.616)	Data Time 0.001 (0.043)	Loss 2.1390 (2.1910)	Entropy 0.73287 (0.73389)	Top-1 acc 75.000 (71.859)	Top-5 acc 88.281 (88.665)	lr 0.00132
Train [103][1010/3239]	Time 0.370 (0.612)	Data Time 0.001 (0.042)	Loss 2.2162 (2.1914)	Entropy 0.73280 (0.73388)	Top-1 acc 71.094 (71.844)	Top-5 acc 89.453 (88.657)	lr 0.00132
Train [103][1020/3239]	Time 0.230 (0.611)	Data Time 0.001 (0.042)	Loss 2.1127 (2.1911)	Entropy 0.73281 (0.73387)	Top-1 acc 72.656 (71.848)	Top-5 acc 87.891 (88.664)	lr 0.00132
Train [103][1030/3239]	Time 0.247 (0.609)	Data Time 0.001 (0.041)	Loss 2.2845 (2.1912)	Entropy 0.73279 (0.73386)	Top-1 acc 69.141 (71.852)	Top-5 acc 87.891 (88.665)	lr 0.00132
Train [103][1040/3239]	Time 0.244 (0.662)	Data Time 0.002 (0.041)	Loss 2.2818 (2.1912)	Entropy 0.73275 (0.73385)	Top-1 acc 72.266 (71.852)	Top-5 acc 85.156 (88.660)	lr 0.00132
Train [103][1050/3239]	Time 0.373 (0.661)	Data Time 0.002 (0.041)	Loss 2.1426 (2.1915)	Entropy 0.73276 (0.73384)	Top-1 acc 71.875 (71.850)	Top-5 acc 91.016 (88.653)	lr 0.00131
Train [103][1060/3239]	Time 0.241 (0.659)	Data Time 0.002 (0.040)	Loss 2.2863 (2.1918)	Entropy 0.73281 (0.73383)	Top-1 acc 67.578 (71.837)	Top-5 acc 88.281 (88.650)	lr 0.00131
Train [103][1070/3239]	Time 0.234 (0.658)	Data Time 0.001 (0.040)	Loss 2.1158 (2.1918)	Entropy 0.73277 (0.73382)	Top-1 acc 70.703 (71.838)	Top-5 acc 89.844 (88.650)	lr 0.00131
Train [103][1080/3239]	Time 0.228 (0.656)	Data Time 0.001 (0.040)	Loss 2.2294 (2.1917)	Entropy 0.73276 (0.73381)	Top-1 acc 70.312 (71.846)	Top-5 acc 87.891 (88.651)	lr 0.00131
Train [103][1090/3239]	Time 0.234 (0.655)	Data Time 0.001 (0.039)	Loss 2.1366 (2.1918)	Entropy 0.73271 (0.73380)	Top-1 acc 73.047 (71.840)	Top-5 acc 92.188 (88.653)	lr 0.00131
Train [103][1100/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.039)	Loss 2.2575 (2.1918)	Entropy 0.73265 (0.73379)	Top-1 acc 69.922 (71.838)	Top-5 acc 88.281 (88.649)	lr 0.00131
Train [103][1110/3239]	Time 2.588 (0.652)	Data Time 0.001 (0.039)	Loss 2.1947 (2.1920)	Entropy 0.73265 (0.73378)	Top-1 acc 73.047 (71.834)	Top-5 acc 89.062 (88.648)	lr 0.00131
Train [103][1120/3239]	Time 0.241 (0.648)	Data Time 0.001 (0.038)	Loss 2.0846 (2.1921)	Entropy 0.73266 (0.73377)	Top-1 acc 75.391 (71.833)	Top-5 acc 91.406 (88.643)	lr 0.00131
Train [103][1130/3239]	Time 0.246 (0.647)	Data Time 0.001 (0.038)	Loss 2.1164 (2.1919)	Entropy 0.73266 (0.73376)	Top-1 acc 74.219 (71.839)	Top-5 acc 89.844 (88.648)	lr 0.00131
Train [103][1140/3239]	Time 0.232 (0.645)	Data Time 0.001 (0.038)	Loss 2.1707 (2.1921)	Entropy 0.73267 (0.73375)	Top-1 acc 71.875 (71.842)	Top-5 acc 89.844 (88.650)	lr 0.00131
Train [103][1150/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.037)	Loss 2.2277 (2.1915)	Entropy 0.73270 (0.73374)	Top-1 acc 67.969 (71.859)	Top-5 acc 87.500 (88.654)	lr 0.00131
Train [103][1160/3239]	Time 0.235 (0.642)	Data Time 0.001 (0.037)	Loss 2.2362 (2.1916)	Entropy 0.73280 (0.73373)	Top-1 acc 73.438 (71.860)	Top-5 acc 86.719 (88.652)	lr 0.00131
Train [103][1170/3239]	Time 0.243 (0.641)	Data Time 0.001 (0.037)	Loss 2.0494 (2.1914)	Entropy 0.73284 (0.73372)	Top-1 acc 77.734 (71.866)	Top-5 acc 90.625 (88.655)	lr 0.00131
Train [103][1180/3239]	Time 0.229 (0.639)	Data Time 0.001 (0.036)	Loss 2.2717 (2.1912)	Entropy 0.73280 (0.73372)	Top-1 acc 70.312 (71.868)	Top-5 acc 87.891 (88.654)	lr 0.00131
Train [103][1190/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.036)	Loss 2.2874 (2.1913)	Entropy 0.73281 (0.73371)	Top-1 acc 66.016 (71.865)	Top-5 acc 88.672 (88.654)	lr 0.00131
Train [103][1200/3239]	Time 0.252 (0.637)	Data Time 0.004 (0.036)	Loss 2.2072 (2.1917)	Entropy 0.73288 (0.73370)	Top-1 acc 69.531 (71.853)	Top-5 acc 87.109 (88.646)	lr 0.00131
Train [103][1210/3239]	Time 0.213 (0.636)	Data Time 0.001 (0.036)	Loss 2.1778 (2.1918)	Entropy 0.73286 (0.73370)	Top-1 acc 73.438 (71.852)	Top-5 acc 86.719 (88.642)	lr 0.00131
Train [103][1220/3239]	Time 2.611 (0.634)	Data Time 0.002 (0.035)	Loss 2.3439 (2.1918)	Entropy 0.73286 (0.73369)	Top-1 acc 71.484 (71.851)	Top-5 acc 85.938 (88.636)	lr 0.00131
Train [103][1230/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.035)	Loss 2.3749 (2.1920)	Entropy 0.73287 (0.73368)	Top-1 acc 65.625 (71.848)	Top-5 acc 86.328 (88.633)	lr 0.00131
Train [103][1240/3239]	Time 0.266 (0.630)	Data Time 0.001 (0.035)	Loss 2.2586 (2.1923)	Entropy 0.73292 (0.73368)	Top-1 acc 68.359 (71.837)	Top-5 acc 89.062 (88.632)	lr 0.00131
Train [103][1250/3239]	Time 0.262 (0.629)	Data Time 0.001 (0.034)	Loss 2.1860 (2.1920)	Entropy 0.73288 (0.73367)	Top-1 acc 70.703 (71.843)	Top-5 acc 87.500 (88.635)	lr 0.00131
Train [103][1260/3239]	Time 0.335 (0.628)	Data Time 0.001 (0.034)	Loss 2.1874 (2.1922)	Entropy 0.73274 (0.73366)	Top-1 acc 73.047 (71.833)	Top-5 acc 86.719 (88.633)	lr 0.00131
Train [103][1270/3239]	Time 0.233 (0.626)	Data Time 0.001 (0.034)	Loss 2.3613 (2.1927)	Entropy 0.73272 (0.73365)	Top-1 acc 69.922 (71.822)	Top-5 acc 83.984 (88.623)	lr 0.00130
Train [103][1280/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.034)	Loss 2.1270 (2.1926)	Entropy 0.73272 (0.73365)	Top-1 acc 72.656 (71.820)	Top-5 acc 90.625 (88.625)	lr 0.00130
Train [103][1290/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.033)	Loss 2.1256 (2.1925)	Entropy 0.73265 (0.73364)	Top-1 acc 72.656 (71.814)	Top-5 acc 91.797 (88.627)	lr 0.00130
Train [103][1300/3239]	Time 0.255 (0.623)	Data Time 0.001 (0.033)	Loss 2.1931 (2.1928)	Entropy 0.73263 (0.73363)	Top-1 acc 73.047 (71.811)	Top-5 acc 89.844 (88.624)	lr 0.00130
Train [103][1310/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.033)	Loss 2.3558 (2.1930)	Entropy 0.73264 (0.73362)	Top-1 acc 67.188 (71.800)	Top-5 acc 85.938 (88.621)	lr 0.00130
Train [103][1320/3239]	Time 0.231 (0.621)	Data Time 0.001 (0.033)	Loss 2.3500 (2.1932)	Entropy 0.73268 (0.73362)	Top-1 acc 65.234 (71.795)	Top-5 acc 85.547 (88.621)	lr 0.00130
Train [103][1330/3239]	Time 2.562 (0.620)	Data Time 0.001 (0.033)	Loss 2.1474 (2.1931)	Entropy 0.73268 (0.73361)	Top-1 acc 73.438 (71.798)	Top-5 acc 87.891 (88.618)	lr 0.00130
Train [103][1340/3239]	Time 0.247 (0.617)	Data Time 0.001 (0.032)	Loss 2.1910 (2.1929)	Entropy 0.73271 (0.73360)	Top-1 acc 72.266 (71.809)	Top-5 acc 91.016 (88.628)	lr 0.00130
Train [103][1350/3239]	Time 0.246 (0.616)	Data Time 0.001 (0.032)	Loss 2.0704 (2.1926)	Entropy 0.73268 (0.73360)	Top-1 acc 75.781 (71.814)	Top-5 acc 91.406 (88.633)	lr 0.00130
Train [103][1360/3239]	Time 0.271 (0.615)	Data Time 0.001 (0.032)	Loss 2.1568 (2.1927)	Entropy 0.73270 (0.73359)	Top-1 acc 72.656 (71.810)	Top-5 acc 87.109 (88.630)	lr 0.00130
Train [103][1370/3239]	Time 0.226 (0.614)	Data Time 0.001 (0.032)	Loss 2.1293 (2.1925)	Entropy 0.73275 (0.73358)	Top-1 acc 71.484 (71.813)	Top-5 acc 88.281 (88.636)	lr 0.00130
Train [103][1380/3239]	Time 0.232 (0.613)	Data Time 0.001 (0.031)	Loss 2.1761 (2.1925)	Entropy 0.73272 (0.73358)	Top-1 acc 72.266 (71.817)	Top-5 acc 89.453 (88.637)	lr 0.00130
Train [103][1390/3239]	Time 0.237 (0.612)	Data Time 0.001 (0.031)	Loss 2.1190 (2.1927)	Entropy 0.73274 (0.73357)	Top-1 acc 71.484 (71.814)	Top-5 acc 91.406 (88.637)	lr 0.00130
Train [103][1400/3239]	Time 0.238 (0.651)	Data Time 0.002 (0.031)	Loss 2.0531 (2.1928)	Entropy 0.73261 (0.73357)	Top-1 acc 78.516 (71.813)	Top-5 acc 90.234 (88.634)	lr 0.00130
Train [103][1410/3239]	Time 0.227 (0.650)	Data Time 0.002 (0.031)	Loss 2.2110 (2.1928)	Entropy 0.73262 (0.73356)	Top-1 acc 72.266 (71.813)	Top-5 acc 89.062 (88.639)	lr 0.00130
Train [103][1420/3239]	Time 0.235 (0.649)	Data Time 0.001 (0.031)	Loss 2.3082 (2.1931)	Entropy 0.73283 (0.73355)	Top-1 acc 66.797 (71.804)	Top-5 acc 87.891 (88.633)	lr 0.00130
Train [103][1430/3239]	Time 0.244 (0.648)	Data Time 0.002 (0.030)	Loss 2.1807 (2.1932)	Entropy 0.73278 (0.73355)	Top-1 acc 71.484 (71.800)	Top-5 acc 88.281 (88.636)	lr 0.00130
Train [103][1440/3239]	Time 2.635 (0.646)	Data Time 0.002 (0.030)	Loss 2.2573 (2.1931)	Entropy 0.73278 (0.73354)	Top-1 acc 71.094 (71.804)	Top-5 acc 89.062 (88.636)	lr 0.00130
Train [103][1450/3239]	Time 0.286 (0.644)	Data Time 0.005 (0.030)	Loss 2.1837 (2.1934)	Entropy 0.73271 (0.73354)	Top-1 acc 68.359 (71.804)	Top-5 acc 91.016 (88.629)	lr 0.00130
Train [103][1460/3239]	Time 0.230 (0.643)	Data Time 0.001 (0.030)	Loss 2.3364 (2.1934)	Entropy 0.73274 (0.73353)	Top-1 acc 69.922 (71.797)	Top-5 acc 86.719 (88.630)	lr 0.00130
Train [103][1470/3239]	Time 0.243 (0.642)	Data Time 0.001 (0.030)	Loss 2.1231 (2.1932)	Entropy 0.73275 (0.73353)	Top-1 acc 73.438 (71.800)	Top-5 acc 89.844 (88.629)	lr 0.00130
Train [103][1480/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.029)	Loss 2.2110 (2.1932)	Entropy 0.73277 (0.73352)	Top-1 acc 71.484 (71.802)	Top-5 acc 88.281 (88.630)	lr 0.00130
Train [103][1490/3239]	Time 0.236 (0.640)	Data Time 0.001 (0.029)	Loss 2.1066 (2.1934)	Entropy 0.73274 (0.73352)	Top-1 acc 72.266 (71.793)	Top-5 acc 90.234 (88.624)	lr 0.00129
Train [103][1500/3239]	Time 0.237 (0.639)	Data Time 0.001 (0.029)	Loss 2.0699 (2.1932)	Entropy 0.73275 (0.73351)	Top-1 acc 74.609 (71.799)	Top-5 acc 89.453 (88.626)	lr 0.00129
Train [103][1510/3239]	Time 0.335 (0.638)	Data Time 0.001 (0.029)	Loss 2.1715 (2.1929)	Entropy 0.73271 (0.73350)	Top-1 acc 73.047 (71.808)	Top-5 acc 89.453 (88.638)	lr 0.00129
Train [103][1520/3239]	Time 0.237 (0.637)	Data Time 0.002 (0.029)	Loss 2.1827 (2.1926)	Entropy 0.73269 (0.73350)	Top-1 acc 70.703 (71.816)	Top-5 acc 87.109 (88.646)	lr 0.00129
Train [103][1530/3239]	Time 0.234 (0.636)	Data Time 0.002 (0.028)	Loss 2.1012 (2.1927)	Entropy 0.73270 (0.73349)	Top-1 acc 73.438 (71.810)	Top-5 acc 88.672 (88.643)	lr 0.00129
Train [103][1540/3239]	Time 0.269 (0.635)	Data Time 0.001 (0.028)	Loss 2.0289 (2.1925)	Entropy 0.73269 (0.73349)	Top-1 acc 78.125 (71.812)	Top-5 acc 90.625 (88.646)	lr 0.00129
Train [103][1550/3239]	Time 2.755 (0.634)	Data Time 0.001 (0.028)	Loss 2.3995 (2.1930)	Entropy 0.73269 (0.73348)	Top-1 acc 65.625 (71.797)	Top-5 acc 85.547 (88.640)	lr 0.00129
Train [103][1560/3239]	Time 0.229 (0.632)	Data Time 0.001 (0.028)	Loss 2.2257 (2.1928)	Entropy 0.73277 (0.73348)	Top-1 acc 71.094 (71.799)	Top-5 acc 86.719 (88.641)	lr 0.00129
Train [103][1570/3239]	Time 0.282 (0.631)	Data Time 0.001 (0.028)	Loss 2.2028 (2.1927)	Entropy 0.73265 (0.73347)	Top-1 acc 72.266 (71.806)	Top-5 acc 86.719 (88.642)	lr 0.00129
Train [103][1580/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.028)	Loss 2.2453 (2.1930)	Entropy 0.73246 (0.73347)	Top-1 acc 72.266 (71.800)	Top-5 acc 86.328 (88.634)	lr 0.00129
Train [103][1590/3239]	Time 0.239 (0.629)	Data Time 0.001 (0.027)	Loss 2.1678 (2.1931)	Entropy 0.73242 (0.73346)	Top-1 acc 73.047 (71.796)	Top-5 acc 89.453 (88.631)	lr 0.00129
Train [103][1600/3239]	Time 0.241 (0.628)	Data Time 0.001 (0.027)	Loss 2.2155 (2.1931)	Entropy 0.73235 (0.73345)	Top-1 acc 69.531 (71.793)	Top-5 acc 87.891 (88.632)	lr 0.00129
Train [103][1610/3239]	Time 0.239 (0.627)	Data Time 0.001 (0.027)	Loss 2.1510 (2.1930)	Entropy 0.73232 (0.73345)	Top-1 acc 74.609 (71.795)	Top-5 acc 89.453 (88.635)	lr 0.00129
Train [103][1620/3239]	Time 0.252 (0.627)	Data Time 0.001 (0.027)	Loss 2.2372 (2.1930)	Entropy 0.73238 (0.73344)	Top-1 acc 69.531 (71.798)	Top-5 acc 86.328 (88.636)	lr 0.00129
Train [103][1630/3239]	Time 0.259 (0.626)	Data Time 0.001 (0.027)	Loss 2.1026 (2.1930)	Entropy 0.73237 (0.73343)	Top-1 acc 74.219 (71.799)	Top-5 acc 89.844 (88.637)	lr 0.00129
Train [103][1640/3239]	Time 0.263 (0.625)	Data Time 0.001 (0.027)	Loss 2.1622 (2.1932)	Entropy 0.73237 (0.73343)	Top-1 acc 70.312 (71.799)	Top-5 acc 90.234 (88.635)	lr 0.00129
Train [103][1650/3239]	Time 0.236 (0.624)	Data Time 0.001 (0.027)	Loss 2.2193 (2.1933)	Entropy 0.73235 (0.73342)	Top-1 acc 68.359 (71.795)	Top-5 acc 91.406 (88.634)	lr 0.00129
Train [103][1660/3239]	Time 2.691 (0.623)	Data Time 0.001 (0.026)	Loss 1.9741 (2.1933)	Entropy 0.73235 (0.73342)	Top-1 acc 78.125 (71.789)	Top-5 acc 91.797 (88.637)	lr 0.00129
Train [103][1670/3239]	Time 0.286 (0.621)	Data Time 0.001 (0.026)	Loss 2.1318 (2.1934)	Entropy 0.73229 (0.73341)	Top-1 acc 73.438 (71.786)	Top-5 acc 89.453 (88.639)	lr 0.00129
Train [103][1680/3239]	Time 0.257 (0.620)	Data Time 0.002 (0.026)	Loss 2.2318 (2.1931)	Entropy 0.73233 (0.73340)	Top-1 acc 71.094 (71.792)	Top-5 acc 90.234 (88.643)	lr 0.00129
Train [103][1690/3239]	Time 0.253 (0.620)	Data Time 0.001 (0.026)	Loss 2.1848 (2.1930)	Entropy 0.73232 (0.73340)	Top-1 acc 72.656 (71.798)	Top-5 acc 87.891 (88.644)	lr 0.00129
Train [103][1700/3239]	Time 0.255 (0.619)	Data Time 0.001 (0.026)	Loss 2.2465 (2.1932)	Entropy 0.73230 (0.73339)	Top-1 acc 67.969 (71.796)	Top-5 acc 88.672 (88.642)	lr 0.00129
Train [103][1710/3239]	Time 0.240 (0.618)	Data Time 0.002 (0.026)	Loss 2.2390 (2.1935)	Entropy 0.73232 (0.73338)	Top-1 acc 71.484 (71.792)	Top-5 acc 88.281 (88.639)	lr 0.00129
Train [103][1720/3239]	Time 0.348 (0.617)	Data Time 0.001 (0.026)	Loss 2.1216 (2.1936)	Entropy 0.73238 (0.73338)	Top-1 acc 73.047 (71.786)	Top-5 acc 89.844 (88.640)	lr 0.00128
Train [103][1730/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.025)	Loss 2.1311 (2.1937)	Entropy 0.73237 (0.73337)	Top-1 acc 72.656 (71.783)	Top-5 acc 88.672 (88.637)	lr 0.00128
Train [103][1740/3239]	Time 0.219 (0.616)	Data Time 0.001 (0.025)	Loss 2.2104 (2.1936)	Entropy 0.73239 (0.73337)	Top-1 acc 68.750 (71.785)	Top-5 acc 89.062 (88.640)	lr 0.00128
Train [103][1750/3239]	Time 0.237 (0.615)	Data Time 0.002 (0.025)	Loss 2.0672 (2.1931)	Entropy 0.73239 (0.73336)	Top-1 acc 76.562 (71.795)	Top-5 acc 91.406 (88.649)	lr 0.00128
Train [103][1760/3239]	Time 0.473 (0.644)	Data Time 0.005 (0.025)	Loss 2.0224 (2.1931)	Entropy 0.73239 (0.73335)	Top-1 acc 73.438 (71.790)	Top-5 acc 91.406 (88.649)	lr 0.00128
Train [103][1770/3239]	Time 2.620 (0.644)	Data Time 0.002 (0.025)	Loss 2.0860 (2.1931)	Entropy 0.73239 (0.73335)	Top-1 acc 74.219 (71.793)	Top-5 acc 89.062 (88.650)	lr 0.00128
Train [103][1780/3239]	Time 0.287 (0.642)	Data Time 0.002 (0.025)	Loss 2.2995 (2.1929)	Entropy 0.73242 (0.73334)	Top-1 acc 70.312 (71.795)	Top-5 acc 86.719 (88.650)	lr 0.00128
Train [103][1790/3239]	Time 0.245 (0.641)	Data Time 0.001 (0.025)	Loss 2.2279 (2.1926)	Entropy 0.73243 (0.73334)	Top-1 acc 71.094 (71.800)	Top-5 acc 88.281 (88.656)	lr 0.00128
Train [103][1800/3239]	Time 0.341 (0.640)	Data Time 0.001 (0.025)	Loss 2.2379 (2.1926)	Entropy 0.73237 (0.73333)	Top-1 acc 69.922 (71.797)	Top-5 acc 88.281 (88.659)	lr 0.00128
Train [103][1810/3239]	Time 0.250 (0.639)	Data Time 0.001 (0.024)	Loss 2.2238 (2.1925)	Entropy 0.73244 (0.73333)	Top-1 acc 71.875 (71.805)	Top-5 acc 88.672 (88.663)	lr 0.00128
Train [103][1820/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.024)	Loss 2.2522 (2.1924)	Entropy 0.73243 (0.73332)	Top-1 acc 69.141 (71.808)	Top-5 acc 87.109 (88.665)	lr 0.00128
Train [103][1830/3239]	Time 0.243 (0.638)	Data Time 0.001 (0.024)	Loss 2.2501 (2.1922)	Entropy 0.73249 (0.73332)	Top-1 acc 73.438 (71.813)	Top-5 acc 87.109 (88.667)	lr 0.00128
Train [103][1840/3239]	Time 0.326 (0.637)	Data Time 0.001 (0.024)	Loss 2.0124 (2.1922)	Entropy 0.73243 (0.73331)	Top-1 acc 76.953 (71.814)	Top-5 acc 91.406 (88.669)	lr 0.00128
Train [103][1850/3239]	Time 0.244 (0.636)	Data Time 0.001 (0.024)	Loss 2.2449 (2.1923)	Entropy 0.73240 (0.73331)	Top-1 acc 70.312 (71.819)	Top-5 acc 85.938 (88.666)	lr 0.00128
Train [103][1860/3239]	Time 0.261 (0.636)	Data Time 0.002 (0.024)	Loss 2.3467 (2.1923)	Entropy 0.73234 (0.73330)	Top-1 acc 66.406 (71.814)	Top-5 acc 85.547 (88.662)	lr 0.00128
Train [103][1870/3239]	Time 0.237 (0.635)	Data Time 0.001 (0.024)	Loss 2.0945 (2.1922)	Entropy 0.73228 (0.73330)	Top-1 acc 71.484 (71.813)	Top-5 acc 91.016 (88.662)	lr 0.00128
Train [103][1880/3239]	Time 2.800 (0.634)	Data Time 0.001 (0.024)	Loss 2.1990 (2.1923)	Entropy 0.73228 (0.73329)	Top-1 acc 71.094 (71.810)	Top-5 acc 87.891 (88.660)	lr 0.00128
Train [103][1890/3239]	Time 0.225 (0.632)	Data Time 0.001 (0.023)	Loss 2.2869 (2.1923)	Entropy 0.73228 (0.73329)	Top-1 acc 68.359 (71.813)	Top-5 acc 87.500 (88.662)	lr 0.00128
Train [103][1900/3239]	Time 0.248 (0.631)	Data Time 0.001 (0.023)	Loss 2.1363 (2.1922)	Entropy 0.73228 (0.73328)	Top-1 acc 70.312 (71.813)	Top-5 acc 88.672 (88.663)	lr 0.00128
Train [103][1910/3239]	Time 0.249 (0.631)	Data Time 0.002 (0.023)	Loss 2.1788 (2.1922)	Entropy 0.73215 (0.73328)	Top-1 acc 73.047 (71.816)	Top-5 acc 90.234 (88.662)	lr 0.00128
Train [103][1920/3239]	Time 0.247 (0.630)	Data Time 0.001 (0.023)	Loss 2.2136 (2.1921)	Entropy 0.73211 (0.73327)	Top-1 acc 73.828 (71.820)	Top-5 acc 88.281 (88.665)	lr 0.00128
Train [103][1930/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.023)	Loss 2.3981 (2.1919)	Entropy 0.73207 (0.73327)	Top-1 acc 67.188 (71.826)	Top-5 acc 83.594 (88.668)	lr 0.00128
Train [103][1940/3239]	Time 0.244 (0.629)	Data Time 0.002 (0.023)	Loss 2.3008 (2.1920)	Entropy 0.73208 (0.73326)	Top-1 acc 68.750 (71.820)	Top-5 acc 86.328 (88.665)	lr 0.00127
Train [103][1950/3239]	Time 0.233 (0.628)	Data Time 0.001 (0.023)	Loss 2.1711 (2.1918)	Entropy 0.73207 (0.73325)	Top-1 acc 72.656 (71.825)	Top-5 acc 89.062 (88.668)	lr 0.00127
Train [103][1960/3239]	Time 0.239 (0.627)	Data Time 0.001 (0.023)	Loss 2.1022 (2.1918)	Entropy 0.73202 (0.73325)	Top-1 acc 76.953 (71.827)	Top-5 acc 90.625 (88.671)	lr 0.00127
Train [103][1970/3239]	Time 0.239 (0.627)	Data Time 0.001 (0.023)	Loss 2.0774 (2.1916)	Entropy 0.73198 (0.73324)	Top-1 acc 76.172 (71.828)	Top-5 acc 90.625 (88.675)	lr 0.00127
Train [103][1980/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.022)	Loss 2.1425 (2.1917)	Entropy 0.73183 (0.73323)	Top-1 acc 70.312 (71.828)	Top-5 acc 88.672 (88.669)	lr 0.00127
Train [103][1990/3239]	Time 2.735 (0.625)	Data Time 0.001 (0.022)	Loss 2.1629 (2.1917)	Entropy 0.73183 (0.73323)	Top-1 acc 71.484 (71.827)	Top-5 acc 89.062 (88.669)	lr 0.00127
Train [103][2000/3239]	Time 0.225 (0.623)	Data Time 0.001 (0.022)	Loss 2.1817 (2.1914)	Entropy 0.73167 (0.73322)	Top-1 acc 73.828 (71.832)	Top-5 acc 89.453 (88.677)	lr 0.00127
Train [103][2010/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.022)	Loss 2.0250 (2.1914)	Entropy 0.73161 (0.73321)	Top-1 acc 75.781 (71.829)	Top-5 acc 91.406 (88.676)	lr 0.00127
Train [103][2020/3239]	Time 0.242 (0.622)	Data Time 0.001 (0.022)	Loss 2.4011 (2.1916)	Entropy 0.73151 (0.73320)	Top-1 acc 67.969 (71.826)	Top-5 acc 83.984 (88.671)	lr 0.00127
Train [103][2030/3239]	Time 0.250 (0.621)	Data Time 0.001 (0.022)	Loss 2.1287 (2.1917)	Entropy 0.73146 (0.73319)	Top-1 acc 75.391 (71.826)	Top-5 acc 90.625 (88.671)	lr 0.00127
Train [103][2040/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.022)	Loss 2.1957 (2.1917)	Entropy 0.73145 (0.73319)	Top-1 acc 70.703 (71.827)	Top-5 acc 90.234 (88.670)	lr 0.00127
Train [103][2050/3239]	Time 0.350 (0.620)	Data Time 0.002 (0.022)	Loss 2.0456 (2.1918)	Entropy 0.73150 (0.73318)	Top-1 acc 74.609 (71.830)	Top-5 acc 90.625 (88.672)	lr 0.00127
Train [103][2060/3239]	Time 0.237 (0.620)	Data Time 0.001 (0.022)	Loss 2.2699 (2.1917)	Entropy 0.73147 (0.73317)	Top-1 acc 72.656 (71.832)	Top-5 acc 86.719 (88.671)	lr 0.00127
Train [103][2070/3239]	Time 0.281 (0.619)	Data Time 0.002 (0.022)	Loss 2.1948 (2.1917)	Entropy 0.73148 (0.73316)	Top-1 acc 75.391 (71.835)	Top-5 acc 88.672 (88.672)	lr 0.00127
Train [103][2080/3239]	Time 0.234 (0.618)	Data Time 0.001 (0.021)	Loss 2.1856 (2.1917)	Entropy 0.73148 (0.73315)	Top-1 acc 71.484 (71.830)	Top-5 acc 89.844 (88.674)	lr 0.00127
Train [103][2090/3239]	Time 0.275 (0.618)	Data Time 0.002 (0.021)	Loss 2.0966 (2.1916)	Entropy 0.73143 (0.73315)	Top-1 acc 76.172 (71.833)	Top-5 acc 88.672 (88.678)	lr 0.00127
Train [103][2100/3239]	Time 2.653 (0.617)	Data Time 0.001 (0.021)	Loss 2.2629 (2.1915)	Entropy 0.73143 (0.73314)	Top-1 acc 73.047 (71.841)	Top-5 acc 86.328 (88.679)	lr 0.00127
Train [103][2110/3239]	Time 0.225 (0.616)	Data Time 0.001 (0.021)	Loss 2.1517 (2.1918)	Entropy 0.73148 (0.73313)	Top-1 acc 71.094 (71.836)	Top-5 acc 88.672 (88.675)	lr 0.00127
Train [103][2120/3239]	Time 0.243 (0.615)	Data Time 0.001 (0.021)	Loss 2.0937 (2.1917)	Entropy 0.73155 (0.73312)	Top-1 acc 74.609 (71.843)	Top-5 acc 92.969 (88.681)	lr 0.00127
Train [103][2130/3239]	Time 0.415 (0.639)	Data Time 0.004 (0.021)	Loss 2.1209 (2.1918)	Entropy 0.73153 (0.73311)	Top-1 acc 70.703 (71.838)	Top-5 acc 88.672 (88.679)	lr 0.00127
Train [103][2140/3239]	Time 0.248 (0.639)	Data Time 0.002 (0.021)	Loss 2.3384 (2.1919)	Entropy 0.73158 (0.73311)	Top-1 acc 67.969 (71.832)	Top-5 acc 83.594 (88.676)	lr 0.00127
Train [103][2150/3239]	Time 0.239 (0.638)	Data Time 0.002 (0.021)	Loss 2.1200 (2.1919)	Entropy 0.73163 (0.73310)	Top-1 acc 74.219 (71.835)	Top-5 acc 89.453 (88.675)	lr 0.00127
Train [103][2160/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.021)	Loss 2.1550 (2.1916)	Entropy 0.73157 (0.73309)	Top-1 acc 73.828 (71.836)	Top-5 acc 90.625 (88.683)	lr 0.00127
Train [103][2170/3239]	Time 0.242 (0.637)	Data Time 0.001 (0.021)	Loss 2.1478 (2.1916)	Entropy 0.73151 (0.73309)	Top-1 acc 73.828 (71.836)	Top-5 acc 89.844 (88.682)	lr 0.00126
Train [103][2180/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.021)	Loss 2.1458 (2.1921)	Entropy 0.73146 (0.73308)	Top-1 acc 72.266 (71.822)	Top-5 acc 87.891 (88.671)	lr 0.00126
Train [103][2190/3239]	Time 0.221 (0.635)	Data Time 0.001 (0.020)	Loss 2.3113 (2.1920)	Entropy 0.73148 (0.73307)	Top-1 acc 70.312 (71.826)	Top-5 acc 85.938 (88.673)	lr 0.00126
Train [103][2200/3239]	Time 0.238 (0.635)	Data Time 0.001 (0.020)	Loss 1.9822 (2.1919)	Entropy 0.73149 (0.73306)	Top-1 acc 77.344 (71.828)	Top-5 acc 94.531 (88.674)	lr 0.00126
Train [103][2210/3239]	Time 2.599 (0.634)	Data Time 0.001 (0.020)	Loss 2.0512 (2.1920)	Entropy 0.73149 (0.73306)	Top-1 acc 74.609 (71.825)	Top-5 acc 91.797 (88.675)	lr 0.00126
Train [103][2220/3239]	Time 0.381 (0.632)	Data Time 0.001 (0.020)	Loss 2.1155 (2.1920)	Entropy 0.73144 (0.73305)	Top-1 acc 71.094 (71.825)	Top-5 acc 89.453 (88.676)	lr 0.00126
Train [103][2230/3239]	Time 0.235 (0.632)	Data Time 0.001 (0.020)	Loss 2.2391 (2.1920)	Entropy 0.73145 (0.73304)	Top-1 acc 73.438 (71.823)	Top-5 acc 86.719 (88.675)	lr 0.00126
Train [103][2240/3239]	Time 0.215 (0.631)	Data Time 0.001 (0.020)	Loss 2.2242 (2.1921)	Entropy 0.73140 (0.73304)	Top-1 acc 69.531 (71.824)	Top-5 acc 87.891 (88.672)	lr 0.00126
Train [103][2250/3239]	Time 0.238 (0.630)	Data Time 0.001 (0.020)	Loss 2.2909 (2.1922)	Entropy 0.73135 (0.73303)	Top-1 acc 71.875 (71.822)	Top-5 acc 86.328 (88.671)	lr 0.00126
Train [103][2260/3239]	Time 0.349 (0.630)	Data Time 0.001 (0.020)	Loss 2.1187 (2.1922)	Entropy 0.73141 (0.73302)	Top-1 acc 74.609 (71.824)	Top-5 acc 89.844 (88.671)	lr 0.00126
Train [103][2270/3239]	Time 0.258 (0.629)	Data Time 0.001 (0.020)	Loss 2.2812 (2.1923)	Entropy 0.73140 (0.73301)	Top-1 acc 69.141 (71.822)	Top-5 acc 85.938 (88.668)	lr 0.00126
Train [103][2280/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.020)	Loss 2.2390 (2.1923)	Entropy 0.73130 (0.73301)	Top-1 acc 69.531 (71.822)	Top-5 acc 87.500 (88.667)	lr 0.00126
Train [103][2290/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.020)	Loss 2.1613 (2.1923)	Entropy 0.73124 (0.73300)	Top-1 acc 71.094 (71.824)	Top-5 acc 91.797 (88.668)	lr 0.00126
Train [103][2300/3239]	Time 0.340 (0.627)	Data Time 0.001 (0.020)	Loss 2.1732 (2.1923)	Entropy 0.73115 (0.73299)	Top-1 acc 73.047 (71.826)	Top-5 acc 88.672 (88.671)	lr 0.00126
Train [103][2310/3239]	Time 0.238 (0.627)	Data Time 0.001 (0.020)	Loss 2.1632 (2.1925)	Entropy 0.73117 (0.73298)	Top-1 acc 70.312 (71.821)	Top-5 acc 88.281 (88.668)	lr 0.00126
Train [103][2320/3239]	Time 2.659 (0.626)	Data Time 0.002 (0.019)	Loss 2.0860 (2.1923)	Entropy 0.73117 (0.73298)	Top-1 acc 72.266 (71.823)	Top-5 acc 92.188 (88.671)	lr 0.00126
Train [103][2330/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.019)	Loss 2.0992 (2.1923)	Entropy 0.73116 (0.73297)	Top-1 acc 73.828 (71.823)	Top-5 acc 89.453 (88.669)	lr 0.00126
Train [103][2340/3239]	Time 0.237 (0.624)	Data Time 0.001 (0.019)	Loss 2.2425 (2.1924)	Entropy 0.73111 (0.73296)	Top-1 acc 71.094 (71.819)	Top-5 acc 87.891 (88.665)	lr 0.00126
Train [103][2350/3239]	Time 0.233 (0.623)	Data Time 0.001 (0.019)	Loss 2.1294 (2.1925)	Entropy 0.73109 (0.73295)	Top-1 acc 72.266 (71.815)	Top-5 acc 90.234 (88.666)	lr 0.00126
Train [103][2360/3239]	Time 0.238 (0.623)	Data Time 0.005 (0.019)	Loss 2.1508 (2.1925)	Entropy 0.73109 (0.73294)	Top-1 acc 73.438 (71.815)	Top-5 acc 89.062 (88.665)	lr 0.00126
Train [103][2370/3239]	Time 0.241 (0.622)	Data Time 0.002 (0.019)	Loss 2.2683 (2.1926)	Entropy 0.73102 (0.73294)	Top-1 acc 66.797 (71.812)	Top-5 acc 89.453 (88.665)	lr 0.00126
Train [103][2380/3239]	Time 0.249 (0.621)	Data Time 0.001 (0.019)	Loss 2.1028 (2.1928)	Entropy 0.73106 (0.73293)	Top-1 acc 73.047 (71.807)	Top-5 acc 89.062 (88.663)	lr 0.00126
Train [103][2390/3239]	Time 0.235 (0.621)	Data Time 0.001 (0.019)	Loss 2.2591 (2.1929)	Entropy 0.73101 (0.73292)	Top-1 acc 70.703 (71.804)	Top-5 acc 86.328 (88.656)	lr 0.00125
Train [103][2400/3239]	Time 0.229 (0.620)	Data Time 0.002 (0.019)	Loss 2.1751 (2.1929)	Entropy 0.73104 (0.73291)	Top-1 acc 67.969 (71.804)	Top-5 acc 90.234 (88.656)	lr 0.00125
Train [103][2410/3239]	Time 0.222 (0.620)	Data Time 0.001 (0.019)	Loss 2.2108 (2.1928)	Entropy 0.73103 (0.73290)	Top-1 acc 69.922 (71.809)	Top-5 acc 87.109 (88.658)	lr 0.00125
Train [103][2420/3239]	Time 0.228 (0.619)	Data Time 0.001 (0.019)	Loss 2.1862 (2.1929)	Entropy 0.73094 (0.73290)	Top-1 acc 72.266 (71.807)	Top-5 acc 89.844 (88.656)	lr 0.00125
Train [103][2430/3239]	Time 2.786 (0.619)	Data Time 0.001 (0.019)	Loss 2.1009 (2.1930)	Entropy 0.73094 (0.73289)	Top-1 acc 75.391 (71.802)	Top-5 acc 89.453 (88.655)	lr 0.00125
Train [103][2440/3239]	Time 0.225 (0.617)	Data Time 0.001 (0.019)	Loss 1.9883 (2.1930)	Entropy 0.73080 (0.73288)	Top-1 acc 76.953 (71.803)	Top-5 acc 90.625 (88.655)	lr 0.00125
Train [103][2450/3239]	Time 0.253 (0.617)	Data Time 0.001 (0.018)	Loss 2.2051 (2.1931)	Entropy 0.73078 (0.73287)	Top-1 acc 70.312 (71.801)	Top-5 acc 89.453 (88.653)	lr 0.00125
Train [103][2460/3239]	Time 0.231 (0.616)	Data Time 0.001 (0.018)	Loss 2.2571 (2.1930)	Entropy 0.73076 (0.73286)	Top-1 acc 70.312 (71.802)	Top-5 acc 87.109 (88.653)	lr 0.00125
Train [103][2470/3239]	Time 0.353 (0.616)	Data Time 0.001 (0.018)	Loss 2.1755 (2.1930)	Entropy 0.73071 (0.73285)	Top-1 acc 72.656 (71.803)	Top-5 acc 88.281 (88.653)	lr 0.00125
Train [103][2480/3239]	Time 0.227 (0.615)	Data Time 0.001 (0.018)	Loss 2.2348 (2.1930)	Entropy 0.73068 (0.73285)	Top-1 acc 72.656 (71.806)	Top-5 acc 86.719 (88.655)	lr 0.00125
Train [103][2490/3239]	Time 0.235 (0.637)	Data Time 0.002 (0.018)	Loss 2.0515 (2.1928)	Entropy 0.73055 (0.73284)	Top-1 acc 75.781 (71.808)	Top-5 acc 91.406 (88.658)	lr 0.00125
Train [103][2500/3239]	Time 0.246 (0.636)	Data Time 0.002 (0.018)	Loss 2.2306 (2.1927)	Entropy 0.73054 (0.73283)	Top-1 acc 72.656 (71.811)	Top-5 acc 87.109 (88.660)	lr 0.00125
Train [103][2510/3239]	Time 0.356 (0.636)	Data Time 0.002 (0.018)	Loss 2.0972 (2.1927)	Entropy 0.73053 (0.73282)	Top-1 acc 71.875 (71.808)	Top-5 acc 89.453 (88.657)	lr 0.00125
Train [103][2520/3239]	Time 0.257 (0.635)	Data Time 0.002 (0.018)	Loss 2.2812 (2.1927)	Entropy 0.73041 (0.73281)	Top-1 acc 66.406 (71.809)	Top-5 acc 87.891 (88.657)	lr 0.00125
Train [103][2530/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.018)	Loss 2.2288 (2.1925)	Entropy 0.73033 (0.73280)	Top-1 acc 70.312 (71.810)	Top-5 acc 87.500 (88.661)	lr 0.00125
Train [103][2540/3239]	Time 2.662 (0.634)	Data Time 0.002 (0.018)	Loss 2.1374 (2.1931)	Entropy 0.73033 (0.73279)	Top-1 acc 78.125 (71.797)	Top-5 acc 86.719 (88.652)	lr 0.00125
Train [103][2550/3239]	Time 0.243 (0.632)	Data Time 0.001 (0.018)	Loss 2.0323 (2.1930)	Entropy 0.73046 (0.73278)	Top-1 acc 76.562 (71.799)	Top-5 acc 91.016 (88.653)	lr 0.00125
Train [103][2560/3239]	Time 0.244 (0.632)	Data Time 0.001 (0.018)	Loss 2.1183 (2.1930)	Entropy 0.73040 (0.73277)	Top-1 acc 74.609 (71.800)	Top-5 acc 88.672 (88.652)	lr 0.00125
Train [103][2570/3239]	Time 0.231 (0.631)	Data Time 0.001 (0.018)	Loss 2.3002 (2.1930)	Entropy 0.73042 (0.73276)	Top-1 acc 69.922 (71.801)	Top-5 acc 86.719 (88.652)	lr 0.00125
Train [103][2580/3239]	Time 0.238 (0.631)	Data Time 0.001 (0.018)	Loss 2.2013 (2.1932)	Entropy 0.73033 (0.73275)	Top-1 acc 69.531 (71.794)	Top-5 acc 89.453 (88.648)	lr 0.00125
Train [103][2590/3239]	Time 0.257 (0.630)	Data Time 0.001 (0.018)	Loss 2.1419 (2.1931)	Entropy 0.73033 (0.73274)	Top-1 acc 77.344 (71.796)	Top-5 acc 91.406 (88.650)	lr 0.00125
Train [103][2600/3239]	Time 0.243 (0.630)	Data Time 0.001 (0.018)	Loss 2.1566 (2.1930)	Entropy 0.73032 (0.73273)	Top-1 acc 73.828 (71.802)	Top-5 acc 89.453 (88.653)	lr 0.00125
Train [103][2610/3239]	Time 0.227 (0.629)	Data Time 0.001 (0.017)	Loss 2.2057 (2.1933)	Entropy 0.73035 (0.73272)	Top-1 acc 72.266 (71.797)	Top-5 acc 87.891 (88.647)	lr 0.00125
Train [103][2620/3239]	Time 0.243 (0.628)	Data Time 0.001 (0.017)	Loss 2.0700 (2.1932)	Entropy 0.73031 (0.73272)	Top-1 acc 73.828 (71.799)	Top-5 acc 90.625 (88.648)	lr 0.00124
Train [103][2630/3239]	Time 0.223 (0.628)	Data Time 0.001 (0.017)	Loss 2.2139 (2.1934)	Entropy 0.73021 (0.73271)	Top-1 acc 71.875 (71.796)	Top-5 acc 88.281 (88.645)	lr 0.00124
Train [103][2640/3239]	Time 0.329 (0.627)	Data Time 0.001 (0.017)	Loss 2.2965 (2.1935)	Entropy 0.73020 (0.73270)	Top-1 acc 68.359 (71.792)	Top-5 acc 87.500 (88.643)	lr 0.00124
Train [103][2650/3239]	Time 0.247 (0.627)	Data Time 0.001 (0.017)	Loss 2.1663 (2.1937)	Entropy 0.73021 (0.73269)	Top-1 acc 71.875 (71.791)	Top-5 acc 88.672 (88.638)	lr 0.00124
Train [103][2660/3239]	Time 0.230 (0.626)	Data Time 0.001 (0.017)	Loss 2.2489 (2.1936)	Entropy 0.73029 (0.73268)	Top-1 acc 67.969 (71.796)	Top-5 acc 86.719 (88.640)	lr 0.00124
Train [103][2670/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.017)	Loss 2.4008 (2.1936)	Entropy 0.73030 (0.73267)	Top-1 acc 69.141 (71.794)	Top-5 acc 83.984 (88.639)	lr 0.00124
Train [103][2680/3239]	Time 0.349 (0.625)	Data Time 0.001 (0.017)	Loss 2.3966 (2.1938)	Entropy 0.73023 (0.73266)	Top-1 acc 67.969 (71.792)	Top-5 acc 85.547 (88.638)	lr 0.00124
Train [103][2690/3239]	Time 0.263 (0.625)	Data Time 0.001 (0.017)	Loss 2.2678 (2.1938)	Entropy 0.73024 (0.73265)	Top-1 acc 72.656 (71.789)	Top-5 acc 87.500 (88.639)	lr 0.00124
Train [103][2700/3239]	Time 0.232 (0.624)	Data Time 0.001 (0.017)	Loss 2.2429 (2.1938)	Entropy 0.73028 (0.73264)	Top-1 acc 71.875 (71.791)	Top-5 acc 88.281 (88.637)	lr 0.00124
Train [103][2710/3239]	Time 0.262 (0.624)	Data Time 0.001 (0.017)	Loss 2.1505 (2.1940)	Entropy 0.73027 (0.73263)	Top-1 acc 68.359 (71.786)	Top-5 acc 90.625 (88.634)	lr 0.00124
Train [103][2720/3239]	Time 0.330 (0.623)	Data Time 0.002 (0.017)	Loss 2.2355 (2.1940)	Entropy 0.73019 (0.73263)	Top-1 acc 70.312 (71.788)	Top-5 acc 86.328 (88.632)	lr 0.00124
Train [103][2730/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.017)	Loss 2.1354 (2.1941)	Entropy 0.73019 (0.73262)	Top-1 acc 69.141 (71.782)	Top-5 acc 89.062 (88.628)	lr 0.00124
Train [103][2740/3239]	Time 0.237 (0.622)	Data Time 0.001 (0.017)	Loss 2.3057 (2.1941)	Entropy 0.73017 (0.73261)	Top-1 acc 68.750 (71.784)	Top-5 acc 87.891 (88.630)	lr 0.00124
Train [103][2750/3239]	Time 0.230 (0.622)	Data Time 0.001 (0.017)	Loss 2.0440 (2.1940)	Entropy 0.73027 (0.73260)	Top-1 acc 75.000 (71.786)	Top-5 acc 91.406 (88.630)	lr 0.00124
Train [103][2760/3239]	Time 0.234 (0.621)	Data Time 0.002 (0.017)	Loss 2.1603 (2.1940)	Entropy 0.73022 (0.73259)	Top-1 acc 71.484 (71.789)	Top-5 acc 88.281 (88.631)	lr 0.00124
Train [103][2770/3239]	Time 0.275 (0.621)	Data Time 0.001 (0.017)	Loss 2.3239 (2.1940)	Entropy 0.73024 (0.73258)	Top-1 acc 69.141 (71.788)	Top-5 acc 85.156 (88.630)	lr 0.00124
Train [103][2780/3239]	Time 0.222 (0.620)	Data Time 0.001 (0.016)	Loss 2.1646 (2.1939)	Entropy 0.73024 (0.73257)	Top-1 acc 73.047 (71.791)	Top-5 acc 89.062 (88.634)	lr 0.00124
Train [103][2790/3239]	Time 0.234 (0.620)	Data Time 0.001 (0.016)	Loss 2.1680 (2.1938)	Entropy 0.73007 (0.73256)	Top-1 acc 69.531 (71.792)	Top-5 acc 91.016 (88.637)	lr 0.00124
Train [103][2800/3239]	Time 0.227 (0.619)	Data Time 0.001 (0.016)	Loss 2.1896 (2.1937)	Entropy 0.73010 (0.73256)	Top-1 acc 74.219 (71.797)	Top-5 acc 88.672 (88.639)	lr 0.00124
Train [103][2810/3239]	Time 0.217 (0.618)	Data Time 0.001 (0.016)	Loss 2.3350 (2.1937)	Entropy 0.73017 (0.73255)	Top-1 acc 68.750 (71.800)	Top-5 acc 85.156 (88.640)	lr 0.00124
Train [103][2820/3239]	Time 0.237 (0.618)	Data Time 0.001 (0.016)	Loss 2.1556 (2.1939)	Entropy 0.73018 (0.73254)	Top-1 acc 73.047 (71.796)	Top-5 acc 88.672 (88.637)	lr 0.00124
Train [103][2830/3239]	Time 0.311 (0.637)	Data Time 0.004 (0.016)	Loss 2.1352 (2.1938)	Entropy 0.73012 (0.73253)	Top-1 acc 71.875 (71.796)	Top-5 acc 88.672 (88.636)	lr 0.00124
Train [103][2840/3239]	Time 0.240 (0.636)	Data Time 0.002 (0.016)	Loss 2.3372 (2.1937)	Entropy 0.73008 (0.73252)	Top-1 acc 67.188 (71.795)	Top-5 acc 87.891 (88.640)	lr 0.00124
Train [103][2850/3239]	Time 0.261 (0.636)	Data Time 0.002 (0.016)	Loss 2.1604 (2.1936)	Entropy 0.73035 (0.73251)	Top-1 acc 76.562 (71.801)	Top-5 acc 90.625 (88.642)	lr 0.00123
Train [103][2860/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.016)	Loss 2.3064 (2.1934)	Entropy 0.73038 (0.73251)	Top-1 acc 66.406 (71.806)	Top-5 acc 85.156 (88.645)	lr 0.00123
Train [103][2870/3239]	Time 0.252 (0.635)	Data Time 0.001 (0.016)	Loss 2.2302 (2.1934)	Entropy 0.73035 (0.73250)	Top-1 acc 71.094 (71.806)	Top-5 acc 87.891 (88.644)	lr 0.00123
Train [103][2880/3239]	Time 0.236 (0.634)	Data Time 0.001 (0.016)	Loss 2.1930 (2.1935)	Entropy 0.73035 (0.73249)	Top-1 acc 70.703 (71.805)	Top-5 acc 87.891 (88.644)	lr 0.00123
Train [103][2890/3239]	Time 0.236 (0.634)	Data Time 0.001 (0.016)	Loss 2.2378 (2.1934)	Entropy 0.73038 (0.73248)	Top-1 acc 71.094 (71.807)	Top-5 acc 89.453 (88.644)	lr 0.00123
Train [103][2900/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.016)	Loss 2.2275 (2.1934)	Entropy 0.73030 (0.73248)	Top-1 acc 71.094 (71.806)	Top-5 acc 86.719 (88.645)	lr 0.00123
Train [103][2910/3239]	Time 0.224 (0.633)	Data Time 0.001 (0.016)	Loss 2.2548 (2.1934)	Entropy 0.73022 (0.73247)	Top-1 acc 68.359 (71.808)	Top-5 acc 86.328 (88.646)	lr 0.00123
Train [103][2920/3239]	Time 0.268 (0.632)	Data Time 0.001 (0.016)	Loss 2.1479 (2.1933)	Entropy 0.73022 (0.73246)	Top-1 acc 74.609 (71.810)	Top-5 acc 89.844 (88.648)	lr 0.00123
Train [103][2930/3239]	Time 0.254 (0.632)	Data Time 0.001 (0.016)	Loss 2.1048 (2.1933)	Entropy 0.73021 (0.73245)	Top-1 acc 74.219 (71.813)	Top-5 acc 87.891 (88.648)	lr 0.00123
Train [103][2940/3239]	Time 0.228 (0.631)	Data Time 0.001 (0.016)	Loss 2.1049 (2.1934)	Entropy 0.73024 (0.73245)	Top-1 acc 74.219 (71.812)	Top-5 acc 89.453 (88.644)	lr 0.00123
Train [103][2950/3239]	Time 0.257 (0.631)	Data Time 0.001 (0.016)	Loss 2.3537 (2.1936)	Entropy 0.73024 (0.73244)	Top-1 acc 69.141 (71.806)	Top-5 acc 86.328 (88.641)	lr 0.00123
Train [103][2960/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.016)	Loss 2.0325 (2.1936)	Entropy 0.73020 (0.73243)	Top-1 acc 76.562 (71.804)	Top-5 acc 91.406 (88.642)	lr 0.00123
Train [103][2970/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.016)	Loss 2.2410 (2.1935)	Entropy 0.73016 (0.73242)	Top-1 acc 72.266 (71.804)	Top-5 acc 89.453 (88.643)	lr 0.00123
Train [103][2980/3239]	Time 0.213 (0.629)	Data Time 0.001 (0.015)	Loss 2.3890 (2.1936)	Entropy 0.73014 (0.73242)	Top-1 acc 64.062 (71.802)	Top-5 acc 87.109 (88.641)	lr 0.00123
Train [103][2990/3239]	Time 0.249 (0.629)	Data Time 0.001 (0.015)	Loss 2.2877 (2.1936)	Entropy 0.73008 (0.73241)	Top-1 acc 67.188 (71.802)	Top-5 acc 88.281 (88.641)	lr 0.00123
Train [103][3000/3239]	Time 0.237 (0.628)	Data Time 0.001 (0.015)	Loss 2.1089 (2.1935)	Entropy 0.73018 (0.73240)	Top-1 acc 73.438 (71.806)	Top-5 acc 88.672 (88.640)	lr 0.00123
Train [103][3010/3239]	Time 0.344 (0.628)	Data Time 0.001 (0.015)	Loss 2.3137 (2.1936)	Entropy 0.73018 (0.73239)	Top-1 acc 67.969 (71.803)	Top-5 acc 88.672 (88.640)	lr 0.00123
Train [103][3020/3239]	Time 0.240 (0.627)	Data Time 0.001 (0.015)	Loss 2.1989 (2.1937)	Entropy 0.73012 (0.73239)	Top-1 acc 68.750 (71.796)	Top-5 acc 90.234 (88.639)	lr 0.00123
Train [103][3030/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.015)	Loss 2.1473 (2.1939)	Entropy 0.73015 (0.73238)	Top-1 acc 72.656 (71.794)	Top-5 acc 87.891 (88.634)	lr 0.00123
Train [103][3040/3239]	Time 0.248 (0.627)	Data Time 0.002 (0.015)	Loss 2.0205 (2.1939)	Entropy 0.73012 (0.73237)	Top-1 acc 79.297 (71.793)	Top-5 acc 92.969 (88.634)	lr 0.00123
Train [103][3050/3239]	Time 0.384 (0.626)	Data Time 0.001 (0.015)	Loss 2.2630 (2.1940)	Entropy 0.73007 (0.73236)	Top-1 acc 68.750 (71.789)	Top-5 acc 88.281 (88.630)	lr 0.00123
Train [103][3060/3239]	Time 0.260 (0.626)	Data Time 0.001 (0.015)	Loss 2.2817 (2.1941)	Entropy 0.73005 (0.73236)	Top-1 acc 67.969 (71.786)	Top-5 acc 88.281 (88.628)	lr 0.00123
Train [103][3070/3239]	Time 0.221 (0.625)	Data Time 0.001 (0.015)	Loss 2.0154 (2.1941)	Entropy 0.73002 (0.73235)	Top-1 acc 73.438 (71.788)	Top-5 acc 92.969 (88.628)	lr 0.00123
Train [103][3080/3239]	Time 0.257 (0.625)	Data Time 0.002 (0.015)	Loss 2.2548 (2.1941)	Entropy 0.73001 (0.73234)	Top-1 acc 71.484 (71.784)	Top-5 acc 85.156 (88.627)	lr 0.00122
Train [103][3090/3239]	Time 0.269 (0.624)	Data Time 0.001 (0.015)	Loss 2.2251 (2.1940)	Entropy 0.73030 (0.73233)	Top-1 acc 69.141 (71.788)	Top-5 acc 89.453 (88.631)	lr 0.00122
Train [103][3100/3239]	Time 0.265 (0.624)	Data Time 0.001 (0.015)	Loss 2.1480 (2.1939)	Entropy 0.73030 (0.73233)	Top-1 acc 71.484 (71.788)	Top-5 acc 89.844 (88.631)	lr 0.00122
Train [103][3110/3239]	Time 0.242 (0.623)	Data Time 0.002 (0.015)	Loss 2.4190 (2.1939)	Entropy 0.73026 (0.73232)	Top-1 acc 67.578 (71.792)	Top-5 acc 84.766 (88.631)	lr 0.00122
Train [103][3120/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.015)	Loss 2.3067 (2.1940)	Entropy 0.73024 (0.73231)	Top-1 acc 67.188 (71.792)	Top-5 acc 86.328 (88.629)	lr 0.00122
Train [103][3130/3239]	Time 0.259 (0.622)	Data Time 0.001 (0.015)	Loss 2.1419 (2.1940)	Entropy 0.73030 (0.73231)	Top-1 acc 73.438 (71.793)	Top-5 acc 89.453 (88.628)	lr 0.00122
Train [103][3140/3239]	Time 0.228 (0.622)	Data Time 0.001 (0.015)	Loss 2.3351 (2.1941)	Entropy 0.73029 (0.73230)	Top-1 acc 70.703 (71.791)	Top-5 acc 85.547 (88.628)	lr 0.00122
Train [103][3150/3239]	Time 0.224 (0.621)	Data Time 0.001 (0.015)	Loss 2.2961 (2.1942)	Entropy 0.73031 (0.73229)	Top-1 acc 71.094 (71.786)	Top-5 acc 86.328 (88.624)	lr 0.00122
Train [103][3160/3239]	Time 0.395 (0.638)	Data Time 0.004 (0.015)	Loss 2.0461 (2.1941)	Entropy 0.73037 (0.73229)	Top-1 acc 78.125 (71.788)	Top-5 acc 92.578 (88.629)	lr 0.00122
Train [103][3170/3239]	Time 0.242 (0.638)	Data Time 0.002 (0.015)	Loss 2.2674 (2.1942)	Entropy 0.73035 (0.73228)	Top-1 acc 69.531 (71.786)	Top-5 acc 86.328 (88.626)	lr 0.00122
Train [103][3180/3239]	Time 0.237 (0.637)	Data Time 0.000 (0.015)	Loss 2.3590 (2.1944)	Entropy 0.73031 (0.73228)	Top-1 acc 66.406 (71.783)	Top-5 acc 85.156 (88.626)	lr 0.00122
Train [103][3190/3239]	Time 0.229 (0.637)	Data Time 0.000 (0.015)	Loss 2.2940 (2.1946)	Entropy 0.73022 (0.73227)	Top-1 acc 72.656 (71.774)	Top-5 acc 85.156 (88.622)	lr 0.00122
Train [103][3200/3239]	Time 0.226 (0.636)	Data Time 0.000 (0.015)	Loss 2.3458 (2.1948)	Entropy 0.73019 (0.73226)	Top-1 acc 69.922 (71.772)	Top-5 acc 86.719 (88.620)	lr 0.00122
Train [103][3210/3239]	Time 0.235 (0.635)	Data Time 0.000 (0.014)	Loss 2.2699 (2.1951)	Entropy 0.73016 (0.73226)	Top-1 acc 69.531 (71.764)	Top-5 acc 88.281 (88.616)	lr 0.00122
Train [103][3220/3239]	Time 0.222 (0.635)	Data Time 0.000 (0.014)	Loss 2.1646 (2.1950)	Entropy 0.73016 (0.73225)	Top-1 acc 73.047 (71.765)	Top-5 acc 89.062 (88.616)	lr 0.00122
Train [103][3230/3239]	Time 0.240 (0.634)	Data Time 0.000 (0.014)	Loss 2.0617 (2.1949)	Entropy 0.73016 (0.73224)	Top-1 acc 74.609 (71.770)	Top-5 acc 89.453 (88.616)	lr 0.00122
Train [103][3239/3239]	Time 2.321 (0.634)	Data Time 0.000 (0.014)	Loss 2.5152 (2.1949)	Entropy 0.73016 (0.73224)	Top-1 acc 65.432 (71.770)	Top-5 acc 80.247 (88.617)	lr 0.00122
==========Valid [103/120]	loss 1.211	top-1 acc 72.249 (72.249)	top-5 acc 89.632	Train top-1 71.770	top-5 88.617	Entropy 0.73016	Latency-None: 0.000ms	Flops: 546.53M
Train [104][0/3239]	Time 40.497 (40.497)	Data Time 38.666 (38.666)	Loss 2.1300 (2.1300)	Entropy 0.73015 (0.73015)	Top-1 acc 70.703 (70.703)	Top-5 acc 90.625 (90.625)	lr 0.00122
Train [104][10/3239]	Time 2.996 (4.279)	Data Time 0.001 (3.519)	Loss 2.1458 (2.1460)	Entropy 0.73015 (0.73015)	Top-1 acc 75.391 (73.438)	Top-5 acc 89.844 (89.489)	lr 0.00122
Train [104][20/3239]	Time 0.357 (2.366)	Data Time 0.001 (1.844)	Loss 2.3634 (2.2004)	Entropy 0.73011 (0.73013)	Top-1 acc 66.406 (72.321)	Top-5 acc 84.766 (88.802)	lr 0.00122
Train [104][30/3239]	Time 0.241 (1.763)	Data Time 0.001 (1.251)	Loss 2.1991 (2.1973)	Entropy 0.73009 (0.73012)	Top-1 acc 72.266 (72.354)	Top-5 acc 87.109 (88.596)	lr 0.00122
Train [104][40/3239]	Time 0.236 (1.453)	Data Time 0.002 (0.946)	Loss 2.2061 (2.1966)	Entropy 0.72973 (0.73004)	Top-1 acc 71.875 (72.361)	Top-5 acc 85.938 (88.548)	lr 0.00122
Train [104][50/3239]	Time 0.239 (1.264)	Data Time 0.002 (0.761)	Loss 2.1160 (2.1961)	Entropy 0.72969 (0.72997)	Top-1 acc 71.484 (72.273)	Top-5 acc 90.625 (88.442)	lr 0.00122
Train [104][60/3239]	Time 0.237 (1.136)	Data Time 0.001 (0.636)	Loss 2.2932 (2.1913)	Entropy 0.72975 (0.72993)	Top-1 acc 71.094 (72.246)	Top-5 acc 84.375 (88.569)	lr 0.00122
Train [104][70/3239]	Time 0.231 (1.047)	Data Time 0.001 (0.547)	Loss 2.2250 (2.1900)	Entropy 0.72971 (0.72991)	Top-1 acc 72.656 (72.321)	Top-5 acc 88.672 (88.639)	lr 0.00121
Train [104][80/3239]	Time 0.237 (0.979)	Data Time 0.001 (0.480)	Loss 2.1948 (2.1904)	Entropy 0.72971 (0.72988)	Top-1 acc 71.094 (72.299)	Top-5 acc 86.719 (88.585)	lr 0.00121
Train [104][90/3239]	Time 0.236 (0.923)	Data Time 0.001 (0.427)	Loss 2.1636 (2.1857)	Entropy 0.72957 (0.72986)	Top-1 acc 73.828 (72.364)	Top-5 acc 89.453 (88.685)	lr 0.00121
Train [104][100/3239]	Time 0.239 (0.880)	Data Time 0.001 (0.385)	Loss 2.1351 (2.1823)	Entropy 0.72955 (0.72983)	Top-1 acc 73.047 (72.424)	Top-5 acc 89.844 (88.780)	lr 0.00121
Train [104][110/3239]	Time 0.381 (0.847)	Data Time 0.001 (0.350)	Loss 2.1196 (2.1806)	Entropy 0.72942 (0.72980)	Top-1 acc 72.266 (72.428)	Top-5 acc 90.625 (88.806)	lr 0.00121
Train [104][120/3239]	Time 2.586 (0.817)	Data Time 0.001 (0.322)	Loss 2.2297 (2.1790)	Entropy 0.72942 (0.72977)	Top-1 acc 70.703 (72.430)	Top-5 acc 89.062 (88.859)	lr 0.00121
Train [104][130/3239]	Time 0.221 (0.773)	Data Time 0.001 (0.297)	Loss 2.2520 (2.1808)	Entropy 0.72944 (0.72975)	Top-1 acc 69.531 (72.352)	Top-5 acc 87.891 (88.806)	lr 0.00121
Train [104][140/3239]	Time 0.227 (0.753)	Data Time 0.001 (0.276)	Loss 2.1939 (2.1842)	Entropy 0.72938 (0.72972)	Top-1 acc 70.703 (72.329)	Top-5 acc 91.016 (88.769)	lr 0.00121
Train [104][150/3239]	Time 0.338 (0.736)	Data Time 0.002 (0.258)	Loss 2.1225 (2.1870)	Entropy 0.73053 (0.72976)	Top-1 acc 75.000 (72.255)	Top-5 acc 88.281 (88.752)	lr 0.00121
Train [104][160/3239]	Time 0.248 (0.721)	Data Time 0.001 (0.242)	Loss 2.0361 (2.1852)	Entropy 0.73055 (0.72981)	Top-1 acc 76.172 (72.317)	Top-5 acc 90.234 (88.779)	lr 0.00121
Train [104][170/3239]	Time 0.224 (0.708)	Data Time 0.001 (0.228)	Loss 2.1097 (2.1834)	Entropy 0.73049 (0.72985)	Top-1 acc 75.781 (72.366)	Top-5 acc 89.062 (88.804)	lr 0.00121
Train [104][180/3239]	Time 0.240 (0.695)	Data Time 0.001 (0.215)	Loss 2.1757 (2.1827)	Entropy 0.73044 (0.72988)	Top-1 acc 70.703 (72.358)	Top-5 acc 90.625 (88.816)	lr 0.00121
Train [104][190/3239]	Time 0.337 (0.686)	Data Time 0.001 (0.204)	Loss 2.1675 (2.1861)	Entropy 0.73041 (0.72991)	Top-1 acc 75.391 (72.300)	Top-5 acc 88.672 (88.743)	lr 0.00121
Train [104][200/3239]	Time 0.240 (0.675)	Data Time 0.001 (0.194)	Loss 2.0827 (2.1835)	Entropy 0.73040 (0.72994)	Top-1 acc 75.391 (72.374)	Top-5 acc 91.016 (88.788)	lr 0.00121
Train [104][210/3239]	Time 0.227 (0.666)	Data Time 0.001 (0.185)	Loss 2.2530 (2.1853)	Entropy 0.73040 (0.72996)	Top-1 acc 70.703 (72.323)	Top-5 acc 87.109 (88.755)	lr 0.00121
Train [104][220/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.177)	Loss 2.1434 (2.1860)	Entropy 0.73047 (0.72998)	Top-1 acc 73.438 (72.305)	Top-5 acc 88.281 (88.721)	lr 0.00121
Train [104][230/3239]	Time 2.667 (0.650)	Data Time 0.001 (0.169)	Loss 2.3498 (2.1868)	Entropy 0.73047 (0.73000)	Top-1 acc 66.406 (72.274)	Top-5 acc 87.109 (88.706)	lr 0.00121
Train [104][240/3239]	Time 0.250 (0.634)	Data Time 0.001 (0.162)	Loss 2.2860 (2.1872)	Entropy 0.73045 (0.73002)	Top-1 acc 68.750 (72.219)	Top-5 acc 87.109 (88.712)	lr 0.00121
Train [104][250/3239]	Time 0.260 (0.628)	Data Time 0.001 (0.156)	Loss 2.1757 (2.1876)	Entropy 0.73036 (0.73003)	Top-1 acc 76.562 (72.233)	Top-5 acc 90.234 (88.714)	lr 0.00121
Train [104][260/3239]	Time 0.233 (0.623)	Data Time 0.002 (0.150)	Loss 2.1629 (2.1896)	Entropy 0.73032 (0.73004)	Top-1 acc 70.312 (72.119)	Top-5 acc 89.453 (88.699)	lr 0.00121
Train [104][270/3239]	Time 0.248 (0.618)	Data Time 0.001 (0.145)	Loss 2.2611 (2.1889)	Entropy 0.73029 (0.73005)	Top-1 acc 70.703 (72.094)	Top-5 acc 87.109 (88.722)	lr 0.00121
Train [104][280/3239]	Time 0.229 (0.820)	Data Time 0.002 (0.139)	Loss 1.8424 (2.1874)	Entropy 0.73035 (0.73006)	Top-1 acc 83.594 (72.149)	Top-5 acc 93.750 (88.754)	lr 0.00121
Train [104][290/3239]	Time 0.226 (0.809)	Data Time 0.002 (0.135)	Loss 2.1199 (2.1870)	Entropy 0.73038 (0.73007)	Top-1 acc 76.562 (72.172)	Top-5 acc 89.453 (88.744)	lr 0.00121
Train [104][300/3239]	Time 0.233 (0.798)	Data Time 0.001 (0.130)	Loss 2.1957 (2.1866)	Entropy 0.73034 (0.73008)	Top-1 acc 70.703 (72.186)	Top-5 acc 87.500 (88.754)	lr 0.00120
Train [104][310/3239]	Time 0.253 (0.788)	Data Time 0.002 (0.126)	Loss 2.2522 (2.1866)	Entropy 0.73033 (0.73009)	Top-1 acc 73.047 (72.213)	Top-5 acc 86.328 (88.737)	lr 0.00120
Train [104][320/3239]	Time 0.231 (0.779)	Data Time 0.001 (0.122)	Loss 2.0854 (2.1856)	Entropy 0.73034 (0.73010)	Top-1 acc 74.609 (72.260)	Top-5 acc 91.016 (88.756)	lr 0.00120
Train [104][330/3239]	Time 0.229 (0.771)	Data Time 0.001 (0.119)	Loss 2.2451 (2.1861)	Entropy 0.73030 (0.73011)	Top-1 acc 69.922 (72.234)	Top-5 acc 87.109 (88.736)	lr 0.00120
Train [104][340/3239]	Time 2.615 (0.762)	Data Time 0.001 (0.115)	Loss 2.2406 (2.1865)	Entropy 0.73030 (0.73011)	Top-1 acc 71.484 (72.207)	Top-5 acc 87.891 (88.738)	lr 0.00120
Train [104][350/3239]	Time 0.270 (0.747)	Data Time 0.001 (0.112)	Loss 2.1334 (2.1866)	Entropy 0.73029 (0.73012)	Top-1 acc 70.703 (72.198)	Top-5 acc 90.625 (88.733)	lr 0.00120
Train [104][360/3239]	Time 0.243 (0.740)	Data Time 0.001 (0.109)	Loss 2.3154 (2.1870)	Entropy 0.73030 (0.73012)	Top-1 acc 68.750 (72.165)	Top-5 acc 84.766 (88.732)	lr 0.00120
Train [104][370/3239]	Time 0.231 (0.733)	Data Time 0.001 (0.106)	Loss 2.2286 (2.1870)	Entropy 0.73033 (0.73013)	Top-1 acc 71.875 (72.167)	Top-5 acc 88.672 (88.729)	lr 0.00120
Train [104][380/3239]	Time 0.237 (0.727)	Data Time 0.001 (0.103)	Loss 2.0641 (2.1859)	Entropy 0.73027 (0.73013)	Top-1 acc 76.953 (72.210)	Top-5 acc 90.625 (88.757)	lr 0.00120
Train [104][390/3239]	Time 0.240 (0.721)	Data Time 0.001 (0.101)	Loss 2.1428 (2.1857)	Entropy 0.73030 (0.73014)	Top-1 acc 70.703 (72.195)	Top-5 acc 91.406 (88.762)	lr 0.00120
Train [104][400/3239]	Time 0.233 (0.715)	Data Time 0.001 (0.098)	Loss 2.1839 (2.1863)	Entropy 0.73028 (0.73014)	Top-1 acc 69.531 (72.168)	Top-5 acc 89.453 (88.759)	lr 0.00120
Train [104][410/3239]	Time 0.271 (0.709)	Data Time 0.002 (0.096)	Loss 2.1645 (2.1859)	Entropy 0.73027 (0.73014)	Top-1 acc 73.047 (72.173)	Top-5 acc 88.672 (88.779)	lr 0.00120
Train [104][420/3239]	Time 0.244 (0.704)	Data Time 0.001 (0.094)	Loss 2.1314 (2.1856)	Entropy 0.73028 (0.73015)	Top-1 acc 73.438 (72.177)	Top-5 acc 87.891 (88.782)	lr 0.00120
Train [104][430/3239]	Time 0.229 (0.699)	Data Time 0.001 (0.092)	Loss 2.1047 (2.1858)	Entropy 0.73016 (0.73015)	Top-1 acc 73.828 (72.176)	Top-5 acc 88.281 (88.773)	lr 0.00120
Train [104][440/3239]	Time 0.221 (0.694)	Data Time 0.001 (0.089)	Loss 2.1736 (2.1861)	Entropy 0.73022 (0.73015)	Top-1 acc 72.656 (72.175)	Top-5 acc 89.453 (88.760)	lr 0.00120
Train [104][450/3239]	Time 2.504 (0.689)	Data Time 0.001 (0.088)	Loss 2.2195 (2.1860)	Entropy 0.73022 (0.73015)	Top-1 acc 70.703 (72.156)	Top-5 acc 87.891 (88.770)	lr 0.00120
Train [104][460/3239]	Time 0.257 (0.680)	Data Time 0.001 (0.086)	Loss 2.0184 (2.1860)	Entropy 0.73016 (0.73015)	Top-1 acc 76.953 (72.151)	Top-5 acc 91.016 (88.755)	lr 0.00120
Train [104][470/3239]	Time 0.246 (0.676)	Data Time 0.001 (0.084)	Loss 2.2196 (2.1862)	Entropy 0.73021 (0.73015)	Top-1 acc 72.656 (72.151)	Top-5 acc 89.453 (88.764)	lr 0.00120
Train [104][480/3239]	Time 0.371 (0.672)	Data Time 0.001 (0.082)	Loss 2.2436 (2.1863)	Entropy 0.73022 (0.73015)	Top-1 acc 74.219 (72.162)	Top-5 acc 87.500 (88.774)	lr 0.00120
Train [104][490/3239]	Time 0.257 (0.668)	Data Time 0.001 (0.081)	Loss 2.2165 (2.1867)	Entropy 0.73021 (0.73016)	Top-1 acc 73.828 (72.155)	Top-5 acc 87.891 (88.765)	lr 0.00120
Train [104][500/3239]	Time 0.235 (0.665)	Data Time 0.001 (0.079)	Loss 2.2700 (2.1876)	Entropy 0.73014 (0.73016)	Top-1 acc 71.094 (72.142)	Top-5 acc 86.719 (88.741)	lr 0.00120
Train [104][510/3239]	Time 0.233 (0.661)	Data Time 0.001 (0.077)	Loss 2.0577 (2.1882)	Entropy 0.73014 (0.73016)	Top-1 acc 75.781 (72.113)	Top-5 acc 90.234 (88.738)	lr 0.00120
Train [104][520/3239]	Time 0.359 (0.658)	Data Time 0.001 (0.076)	Loss 2.1767 (2.1883)	Entropy 0.73012 (0.73015)	Top-1 acc 70.312 (72.101)	Top-5 acc 89.062 (88.745)	lr 0.00120
Train [104][530/3239]	Time 0.244 (0.655)	Data Time 0.001 (0.075)	Loss 2.2830 (2.1899)	Entropy 0.73005 (0.73015)	Top-1 acc 65.234 (72.056)	Top-5 acc 85.938 (88.701)	lr 0.00119
Train [104][540/3239]	Time 0.235 (0.652)	Data Time 0.001 (0.073)	Loss 2.1255 (2.1892)	Entropy 0.73002 (0.73015)	Top-1 acc 74.609 (72.075)	Top-5 acc 89.062 (88.705)	lr 0.00119
Train [104][550/3239]	Time 0.242 (0.649)	Data Time 0.001 (0.072)	Loss 2.1356 (2.1891)	Entropy 0.72995 (0.73015)	Top-1 acc 73.047 (72.073)	Top-5 acc 91.016 (88.716)	lr 0.00119
Train [104][560/3239]	Time 2.748 (0.646)	Data Time 0.002 (0.071)	Loss 2.1919 (2.1889)	Entropy 0.72995 (0.73015)	Top-1 acc 72.266 (72.078)	Top-5 acc 88.672 (88.732)	lr 0.00119
Train [104][570/3239]	Time 0.244 (0.639)	Data Time 0.001 (0.070)	Loss 2.3321 (2.1894)	Entropy 0.72990 (0.73014)	Top-1 acc 69.922 (72.067)	Top-5 acc 84.766 (88.727)	lr 0.00119
Train [104][580/3239]	Time 0.269 (0.636)	Data Time 0.001 (0.068)	Loss 2.0848 (2.1885)	Entropy 0.72975 (0.73013)	Top-1 acc 74.219 (72.083)	Top-5 acc 91.406 (88.736)	lr 0.00119
Train [104][590/3239]	Time 0.238 (0.633)	Data Time 0.001 (0.067)	Loss 2.4722 (2.1894)	Entropy 0.72973 (0.73013)	Top-1 acc 67.188 (72.044)	Top-5 acc 83.203 (88.719)	lr 0.00119
Train [104][600/3239]	Time 0.244 (0.631)	Data Time 0.001 (0.066)	Loss 2.2089 (2.1914)	Entropy 0.72976 (0.73012)	Top-1 acc 72.656 (72.017)	Top-5 acc 87.891 (88.708)	lr 0.00119
Train [104][610/3239]	Time 0.254 (0.629)	Data Time 0.001 (0.065)	Loss 2.1076 (2.1911)	Entropy 0.72972 (0.73012)	Top-1 acc 75.391 (72.035)	Top-5 acc 90.625 (88.713)	lr 0.00119
Train [104][620/3239]	Time 0.233 (0.627)	Data Time 0.002 (0.064)	Loss 2.1575 (2.1908)	Entropy 0.72970 (0.73011)	Top-1 acc 75.391 (72.052)	Top-5 acc 89.453 (88.715)	lr 0.00119
Train [104][630/3239]	Time 0.228 (0.624)	Data Time 0.002 (0.063)	Loss 2.3071 (2.1917)	Entropy 0.72974 (0.73010)	Top-1 acc 73.828 (72.047)	Top-5 acc 88.281 (88.700)	lr 0.00119
Train [104][640/3239]	Time 0.283 (0.705)	Data Time 0.002 (0.062)	Loss 2.4274 (2.1916)	Entropy 0.72962 (0.73010)	Top-1 acc 67.578 (72.063)	Top-5 acc 82.031 (88.702)	lr 0.00119
Train [104][650/3239]	Time 0.325 (0.702)	Data Time 0.002 (0.061)	Loss 2.2166 (2.1923)	Entropy 0.72961 (0.73009)	Top-1 acc 71.094 (72.027)	Top-5 acc 87.891 (88.690)	lr 0.00119
Train [104][660/3239]	Time 0.241 (0.699)	Data Time 0.002 (0.060)	Loss 2.2094 (2.1921)	Entropy 0.72943 (0.73008)	Top-1 acc 71.094 (72.027)	Top-5 acc 89.453 (88.696)	lr 0.00119
Train [104][670/3239]	Time 2.556 (0.696)	Data Time 0.003 (0.059)	Loss 2.3057 (2.1920)	Entropy 0.72943 (0.73007)	Top-1 acc 71.094 (72.021)	Top-5 acc 86.719 (88.697)	lr 0.00119
Train [104][680/3239]	Time 0.227 (0.689)	Data Time 0.002 (0.059)	Loss 2.2726 (2.1927)	Entropy 0.72937 (0.73006)	Top-1 acc 69.922 (72.013)	Top-5 acc 87.891 (88.692)	lr 0.00119
Train [104][690/3239]	Time 0.332 (0.686)	Data Time 0.001 (0.058)	Loss 2.2602 (2.1926)	Entropy 0.72945 (0.73005)	Top-1 acc 71.875 (72.034)	Top-5 acc 87.891 (88.693)	lr 0.00119
Train [104][700/3239]	Time 0.273 (0.683)	Data Time 0.002 (0.057)	Loss 2.2555 (2.1933)	Entropy 0.72940 (0.73004)	Top-1 acc 68.750 (72.014)	Top-5 acc 87.109 (88.681)	lr 0.00119
Train [104][710/3239]	Time 0.243 (0.681)	Data Time 0.001 (0.056)	Loss 2.2426 (2.1931)	Entropy 0.72936 (0.73003)	Top-1 acc 69.922 (72.017)	Top-5 acc 85.547 (88.687)	lr 0.00119
Train [104][720/3239]	Time 0.221 (0.678)	Data Time 0.001 (0.055)	Loss 2.2147 (2.1928)	Entropy 0.72927 (0.73002)	Top-1 acc 69.531 (72.027)	Top-5 acc 86.328 (88.689)	lr 0.00119
Train [104][730/3239]	Time 0.230 (0.676)	Data Time 0.001 (0.055)	Loss 2.1031 (2.1917)	Entropy 0.72925 (0.73001)	Top-1 acc 73.828 (72.052)	Top-5 acc 89.453 (88.707)	lr 0.00119
Train [104][740/3239]	Time 0.227 (0.673)	Data Time 0.001 (0.054)	Loss 2.2740 (2.1916)	Entropy 0.72905 (0.73000)	Top-1 acc 70.312 (72.056)	Top-5 acc 87.109 (88.698)	lr 0.00119
Train [104][750/3239]	Time 0.239 (0.671)	Data Time 0.001 (0.053)	Loss 2.1395 (2.1920)	Entropy 0.72904 (0.72999)	Top-1 acc 72.266 (72.035)	Top-5 acc 89.062 (88.698)	lr 0.00119
Train [104][760/3239]	Time 0.225 (0.668)	Data Time 0.001 (0.053)	Loss 2.2302 (2.1917)	Entropy 0.72908 (0.72998)	Top-1 acc 69.922 (72.021)	Top-5 acc 90.234 (88.715)	lr 0.00118
Train [104][770/3239]	Time 0.247 (0.666)	Data Time 0.001 (0.052)	Loss 2.2390 (2.1921)	Entropy 0.72906 (0.72997)	Top-1 acc 73.047 (72.022)	Top-5 acc 87.891 (88.709)	lr 0.00118
Train [104][780/3239]	Time 2.647 (0.664)	Data Time 0.001 (0.051)	Loss 2.3180 (2.1923)	Entropy 0.72906 (0.72995)	Top-1 acc 69.531 (72.025)	Top-5 acc 87.109 (88.696)	lr 0.00118
Train [104][790/3239]	Time 0.230 (0.659)	Data Time 0.001 (0.051)	Loss 2.2168 (2.1918)	Entropy 0.72903 (0.72994)	Top-1 acc 69.141 (72.029)	Top-5 acc 88.281 (88.701)	lr 0.00118
Train [104][800/3239]	Time 0.249 (0.657)	Data Time 0.001 (0.050)	Loss 2.1957 (2.1914)	Entropy 0.72899 (0.72993)	Top-1 acc 71.875 (72.030)	Top-5 acc 89.453 (88.706)	lr 0.00118
Train [104][810/3239]	Time 0.233 (0.654)	Data Time 0.001 (0.049)	Loss 2.1274 (2.1909)	Entropy 0.72902 (0.72992)	Top-1 acc 75.000 (72.044)	Top-5 acc 89.453 (88.711)	lr 0.00118
Train [104][820/3239]	Time 0.239 (0.652)	Data Time 0.001 (0.049)	Loss 2.0325 (2.1910)	Entropy 0.72890 (0.72991)	Top-1 acc 76.953 (72.048)	Top-5 acc 92.188 (88.704)	lr 0.00118
Train [104][830/3239]	Time 0.241 (0.650)	Data Time 0.001 (0.048)	Loss 2.1978 (2.1911)	Entropy 0.72882 (0.72989)	Top-1 acc 74.609 (72.056)	Top-5 acc 88.281 (88.703)	lr 0.00118
Train [104][840/3239]	Time 0.241 (0.648)	Data Time 0.002 (0.048)	Loss 2.2672 (2.1911)	Entropy 0.72886 (0.72988)	Top-1 acc 69.141 (72.053)	Top-5 acc 87.891 (88.702)	lr 0.00118
Train [104][850/3239]	Time 0.233 (0.646)	Data Time 0.001 (0.047)	Loss 2.1423 (2.1914)	Entropy 0.72877 (0.72987)	Top-1 acc 73.438 (72.040)	Top-5 acc 89.844 (88.692)	lr 0.00118
Train [104][860/3239]	Time 0.310 (0.644)	Data Time 0.001 (0.047)	Loss 2.0936 (2.1918)	Entropy 0.72868 (0.72986)	Top-1 acc 73.047 (72.027)	Top-5 acc 92.188 (88.687)	lr 0.00118
Train [104][870/3239]	Time 0.242 (0.643)	Data Time 0.001 (0.046)	Loss 2.0343 (2.1919)	Entropy 0.72868 (0.72984)	Top-1 acc 76.172 (72.024)	Top-5 acc 92.188 (88.684)	lr 0.00118
Train [104][880/3239]	Time 0.234 (0.641)	Data Time 0.001 (0.046)	Loss 2.0906 (2.1919)	Entropy 0.72868 (0.72983)	Top-1 acc 71.484 (72.018)	Top-5 acc 90.234 (88.679)	lr 0.00118
Train [104][890/3239]	Time 2.634 (0.639)	Data Time 0.001 (0.045)	Loss 2.3089 (2.1917)	Entropy 0.72868 (0.72982)	Top-1 acc 69.922 (72.012)	Top-5 acc 87.109 (88.687)	lr 0.00118
Train [104][900/3239]	Time 0.362 (0.635)	Data Time 0.002 (0.045)	Loss 2.1773 (2.1919)	Entropy 0.72862 (0.72980)	Top-1 acc 71.094 (72.003)	Top-5 acc 90.625 (88.689)	lr 0.00118
Train [104][910/3239]	Time 0.223 (0.633)	Data Time 0.001 (0.044)	Loss 2.1511 (2.1918)	Entropy 0.72855 (0.72979)	Top-1 acc 75.391 (72.004)	Top-5 acc 89.062 (88.695)	lr 0.00118
Train [104][920/3239]	Time 0.230 (0.631)	Data Time 0.001 (0.044)	Loss 2.1461 (2.1920)	Entropy 0.72844 (0.72978)	Top-1 acc 70.312 (71.999)	Top-5 acc 88.672 (88.695)	lr 0.00118
Train [104][930/3239]	Time 0.273 (0.630)	Data Time 0.002 (0.043)	Loss 2.0046 (2.1918)	Entropy 0.72844 (0.72976)	Top-1 acc 76.953 (72.007)	Top-5 acc 91.406 (88.695)	lr 0.00118
Train [104][940/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.043)	Loss 2.1742 (2.1919)	Entropy 0.72825 (0.72975)	Top-1 acc 68.750 (71.999)	Top-5 acc 87.891 (88.691)	lr 0.00118
Train [104][950/3239]	Time 0.230 (0.627)	Data Time 0.001 (0.042)	Loss 2.2362 (2.1919)	Entropy 0.72820 (0.72973)	Top-1 acc 67.969 (71.989)	Top-5 acc 88.672 (88.702)	lr 0.00118
Train [104][960/3239]	Time 0.255 (0.625)	Data Time 0.001 (0.042)	Loss 2.2474 (2.1918)	Entropy 0.72819 (0.72971)	Top-1 acc 70.312 (71.993)	Top-5 acc 89.062 (88.704)	lr 0.00118
Train [104][970/3239]	Time 0.222 (0.624)	Data Time 0.001 (0.042)	Loss 2.2400 (2.1918)	Entropy 0.72817 (0.72970)	Top-1 acc 67.969 (71.994)	Top-5 acc 88.672 (88.700)	lr 0.00118
Train [104][980/3239]	Time 0.239 (0.622)	Data Time 0.001 (0.041)	Loss 2.1928 (2.1920)	Entropy 0.72813 (0.72968)	Top-1 acc 71.484 (71.981)	Top-5 acc 88.281 (88.697)	lr 0.00118
Train [104][990/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.041)	Loss 2.2975 (2.1918)	Entropy 0.72810 (0.72967)	Top-1 acc 69.531 (71.989)	Top-5 acc 86.719 (88.701)	lr 0.00117
Train [104][1000/3239]	Time 55.806 (0.672)	Data Time 0.001 (0.040)	Loss 2.2027 (2.1917)	Entropy 0.72810 (0.72965)	Top-1 acc 71.484 (71.983)	Top-5 acc 87.109 (88.703)	lr 0.00117
Train [104][1010/3239]	Time 0.262 (0.669)	Data Time 0.003 (0.040)	Loss 2.1882 (2.1915)	Entropy 0.72818 (0.72964)	Top-1 acc 72.656 (71.989)	Top-5 acc 88.672 (88.699)	lr 0.00117
Train [104][1020/3239]	Time 0.231 (0.667)	Data Time 0.002 (0.040)	Loss 2.3179 (2.1918)	Entropy 0.72816 (0.72962)	Top-1 acc 67.188 (71.981)	Top-5 acc 84.375 (88.692)	lr 0.00117
Train [104][1030/3239]	Time 0.328 (0.666)	Data Time 0.001 (0.039)	Loss 2.3176 (2.1919)	Entropy 0.72822 (0.72961)	Top-1 acc 67.969 (71.980)	Top-5 acc 86.328 (88.687)	lr 0.00117
Train [104][1040/3239]	Time 0.231 (0.664)	Data Time 0.001 (0.039)	Loss 2.3044 (2.1922)	Entropy 0.72810 (0.72959)	Top-1 acc 67.188 (71.977)	Top-5 acc 87.891 (88.680)	lr 0.00117
Train [104][1050/3239]	Time 0.275 (0.662)	Data Time 0.001 (0.039)	Loss 2.2116 (2.1919)	Entropy 0.72806 (0.72958)	Top-1 acc 74.219 (71.981)	Top-5 acc 87.109 (88.679)	lr 0.00117
Train [104][1060/3239]	Time 0.223 (0.661)	Data Time 0.001 (0.038)	Loss 2.2014 (2.1919)	Entropy 0.72786 (0.72956)	Top-1 acc 73.047 (71.976)	Top-5 acc 90.234 (88.688)	lr 0.00117
Train [104][1070/3239]	Time 0.328 (0.659)	Data Time 0.001 (0.038)	Loss 2.1773 (2.1922)	Entropy 0.72774 (0.72955)	Top-1 acc 68.750 (71.965)	Top-5 acc 89.844 (88.685)	lr 0.00117
Train [104][1080/3239]	Time 0.238 (0.657)	Data Time 0.001 (0.038)	Loss 2.2605 (2.1923)	Entropy 0.72767 (0.72953)	Top-1 acc 69.922 (71.962)	Top-5 acc 86.328 (88.688)	lr 0.00117
Train [104][1090/3239]	Time 0.228 (0.656)	Data Time 0.001 (0.037)	Loss 2.0816 (2.1925)	Entropy 0.72768 (0.72951)	Top-1 acc 71.094 (71.957)	Top-5 acc 91.016 (88.687)	lr 0.00117
Train [104][1100/3239]	Time 0.239 (0.654)	Data Time 0.001 (0.037)	Loss 2.1196 (2.1919)	Entropy 0.72773 (0.72950)	Top-1 acc 75.391 (71.966)	Top-5 acc 91.016 (88.701)	lr 0.00117
Train [104][1110/3239]	Time 2.498 (0.652)	Data Time 0.025 (0.037)	Loss 2.0572 (2.1914)	Entropy 0.72773 (0.72948)	Top-1 acc 75.000 (71.986)	Top-5 acc 93.750 (88.715)	lr 0.00117
Train [104][1120/3239]	Time 0.272 (0.649)	Data Time 0.002 (0.036)	Loss 2.2502 (2.1915)	Entropy 0.72760 (0.72946)	Top-1 acc 67.969 (71.980)	Top-5 acc 88.281 (88.713)	lr 0.00117
Train [104][1130/3239]	Time 0.241 (0.647)	Data Time 0.001 (0.036)	Loss 2.1573 (2.1914)	Entropy 0.72752 (0.72945)	Top-1 acc 73.438 (71.979)	Top-5 acc 87.500 (88.708)	lr 0.00117
Train [104][1140/3239]	Time 0.230 (0.646)	Data Time 0.001 (0.036)	Loss 2.2010 (2.1912)	Entropy 0.72746 (0.72943)	Top-1 acc 70.312 (71.985)	Top-5 acc 89.844 (88.715)	lr 0.00117
Train [104][1150/3239]	Time 0.222 (0.644)	Data Time 0.001 (0.035)	Loss 2.0934 (2.1916)	Entropy 0.72745 (0.72941)	Top-1 acc 76.172 (71.982)	Top-5 acc 91.406 (88.707)	lr 0.00117
Train [104][1160/3239]	Time 0.228 (0.643)	Data Time 0.001 (0.035)	Loss 2.2543 (2.1921)	Entropy 0.72748 (0.72940)	Top-1 acc 71.484 (71.958)	Top-5 acc 85.547 (88.698)	lr 0.00117
Train [104][1170/3239]	Time 0.236 (0.642)	Data Time 0.001 (0.035)	Loss 2.0084 (2.1928)	Entropy 0.72745 (0.72938)	Top-1 acc 75.781 (71.943)	Top-5 acc 92.578 (88.685)	lr 0.00117
Train [104][1180/3239]	Time 0.227 (0.640)	Data Time 0.001 (0.034)	Loss 2.0383 (2.1925)	Entropy 0.72749 (0.72936)	Top-1 acc 77.344 (71.953)	Top-5 acc 91.016 (88.690)	lr 0.00117
Train [104][1190/3239]	Time 0.234 (0.639)	Data Time 0.001 (0.034)	Loss 2.1147 (2.1925)	Entropy 0.72745 (0.72935)	Top-1 acc 73.438 (71.950)	Top-5 acc 92.188 (88.693)	lr 0.00117
Train [104][1200/3239]	Time 0.332 (0.637)	Data Time 0.010 (0.034)	Loss 2.1615 (2.1922)	Entropy 0.72753 (0.72933)	Top-1 acc 71.484 (71.951)	Top-5 acc 88.672 (88.698)	lr 0.00117
Train [104][1210/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.034)	Loss 2.2499 (2.1919)	Entropy 0.72753 (0.72932)	Top-1 acc 71.484 (71.953)	Top-5 acc 86.719 (88.703)	lr 0.00117
Train [104][1220/3239]	Time 2.553 (0.635)	Data Time 0.001 (0.033)	Loss 2.2215 (2.1919)	Entropy 0.72753 (0.72930)	Top-1 acc 69.531 (71.951)	Top-5 acc 90.234 (88.700)	lr 0.00117
Train [104][1230/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.033)	Loss 2.2232 (2.1920)	Entropy 0.72757 (0.72929)	Top-1 acc 68.750 (71.945)	Top-5 acc 88.672 (88.698)	lr 0.00116
Train [104][1240/3239]	Time 0.343 (0.631)	Data Time 0.001 (0.033)	Loss 2.2294 (2.1921)	Entropy 0.72759 (0.72927)	Top-1 acc 70.312 (71.941)	Top-5 acc 87.109 (88.694)	lr 0.00116
Train [104][1250/3239]	Time 0.239 (0.629)	Data Time 0.001 (0.033)	Loss 1.9346 (2.1920)	Entropy 0.72756 (0.72926)	Top-1 acc 76.953 (71.936)	Top-5 acc 94.141 (88.703)	lr 0.00116
Train [104][1260/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.032)	Loss 2.0175 (2.1920)	Entropy 0.72757 (0.72925)	Top-1 acc 78.125 (71.939)	Top-5 acc 89.844 (88.705)	lr 0.00116
Train [104][1270/3239]	Time 0.252 (0.627)	Data Time 0.001 (0.032)	Loss 2.0968 (2.1920)	Entropy 0.72748 (0.72923)	Top-1 acc 74.219 (71.936)	Top-5 acc 91.016 (88.704)	lr 0.00116
Train [104][1280/3239]	Time 0.265 (0.626)	Data Time 0.002 (0.032)	Loss 2.3283 (2.1924)	Entropy 0.72748 (0.72922)	Top-1 acc 67.578 (71.927)	Top-5 acc 89.062 (88.695)	lr 0.00116
Train [104][1290/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.032)	Loss 2.1010 (2.1922)	Entropy 0.72741 (0.72921)	Top-1 acc 75.000 (71.936)	Top-5 acc 91.406 (88.695)	lr 0.00116
Train [104][1300/3239]	Time 0.243 (0.624)	Data Time 0.001 (0.031)	Loss 2.1690 (2.1921)	Entropy 0.72735 (0.72919)	Top-1 acc 74.219 (71.941)	Top-5 acc 89.062 (88.693)	lr 0.00116
Train [104][1310/3239]	Time 0.263 (0.623)	Data Time 0.002 (0.031)	Loss 2.1798 (2.1922)	Entropy 0.72720 (0.72918)	Top-1 acc 72.656 (71.947)	Top-5 acc 86.719 (88.686)	lr 0.00116
Train [104][1320/3239]	Time 0.249 (0.622)	Data Time 0.001 (0.031)	Loss 2.1274 (2.1921)	Entropy 0.72727 (0.72916)	Top-1 acc 73.047 (71.948)	Top-5 acc 90.234 (88.686)	lr 0.00116
Train [104][1330/3239]	Time 2.540 (0.621)	Data Time 0.001 (0.031)	Loss 2.1526 (2.1922)	Entropy 0.72727 (0.72915)	Top-1 acc 69.922 (71.940)	Top-5 acc 89.844 (88.687)	lr 0.00116
Train [104][1340/3239]	Time 0.244 (0.618)	Data Time 0.001 (0.031)	Loss 2.0333 (2.1921)	Entropy 0.72729 (0.72914)	Top-1 acc 76.953 (71.935)	Top-5 acc 92.188 (88.692)	lr 0.00116
Train [104][1350/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.030)	Loss 2.2162 (2.1923)	Entropy 0.72705 (0.72912)	Top-1 acc 68.750 (71.927)	Top-5 acc 88.672 (88.694)	lr 0.00116
Train [104][1360/3239]	Time 0.247 (0.616)	Data Time 0.005 (0.030)	Loss 2.2008 (2.1920)	Entropy 0.72700 (0.72910)	Top-1 acc 72.266 (71.932)	Top-5 acc 87.109 (88.695)	lr 0.00116
Train [104][1370/3239]	Time 0.270 (0.656)	Data Time 0.002 (0.030)	Loss 2.1941 (2.1921)	Entropy 0.72702 (0.72909)	Top-1 acc 72.656 (71.931)	Top-5 acc 89.062 (88.694)	lr 0.00116
Train [104][1380/3239]	Time 0.266 (0.654)	Data Time 0.002 (0.030)	Loss 2.0841 (2.1922)	Entropy 0.72697 (0.72907)	Top-1 acc 74.609 (71.926)	Top-5 acc 91.016 (88.689)	lr 0.00116
Train [104][1390/3239]	Time 0.241 (0.653)	Data Time 0.002 (0.030)	Loss 2.2506 (2.1925)	Entropy 0.72696 (0.72906)	Top-1 acc 72.656 (71.916)	Top-5 acc 87.500 (88.683)	lr 0.00116
Train [104][1400/3239]	Time 0.236 (0.652)	Data Time 0.002 (0.029)	Loss 2.1650 (2.1928)	Entropy 0.72691 (0.72904)	Top-1 acc 71.484 (71.902)	Top-5 acc 89.062 (88.676)	lr 0.00116
Train [104][1410/3239]	Time 0.232 (0.651)	Data Time 0.001 (0.029)	Loss 2.3365 (2.1930)	Entropy 0.72693 (0.72903)	Top-1 acc 68.359 (71.892)	Top-5 acc 87.109 (88.675)	lr 0.00116
Train [104][1420/3239]	Time 0.235 (0.650)	Data Time 0.001 (0.029)	Loss 2.2227 (2.1930)	Entropy 0.72692 (0.72901)	Top-1 acc 70.703 (71.888)	Top-5 acc 87.500 (88.673)	lr 0.00116
Train [104][1430/3239]	Time 0.244 (0.649)	Data Time 0.001 (0.029)	Loss 2.1048 (2.1926)	Entropy 0.72691 (0.72900)	Top-1 acc 78.125 (71.898)	Top-5 acc 88.281 (88.677)	lr 0.00116
Train [104][1440/3239]	Time 2.722 (0.648)	Data Time 0.001 (0.029)	Loss 2.4374 (2.1930)	Entropy 0.72691 (0.72899)	Top-1 acc 67.188 (71.887)	Top-5 acc 83.203 (88.666)	lr 0.00116
Train [104][1450/3239]	Time 0.387 (0.645)	Data Time 0.001 (0.028)	Loss 2.1988 (2.1932)	Entropy 0.72685 (0.72897)	Top-1 acc 73.828 (71.874)	Top-5 acc 89.844 (88.666)	lr 0.00116
Train [104][1460/3239]	Time 0.245 (0.644)	Data Time 0.001 (0.028)	Loss 2.1284 (2.1932)	Entropy 0.72678 (0.72896)	Top-1 acc 73.438 (71.869)	Top-5 acc 88.672 (88.668)	lr 0.00115
Train [104][1470/3239]	Time 0.251 (0.643)	Data Time 0.001 (0.028)	Loss 2.2457 (2.1932)	Entropy 0.72671 (0.72894)	Top-1 acc 71.484 (71.869)	Top-5 acc 85.547 (88.671)	lr 0.00115
Train [104][1480/3239]	Time 0.296 (0.642)	Data Time 0.001 (0.028)	Loss 2.1499 (2.1933)	Entropy 0.72670 (0.72893)	Top-1 acc 71.094 (71.866)	Top-5 acc 90.234 (88.667)	lr 0.00115
Train [104][1490/3239]	Time 0.346 (0.641)	Data Time 0.001 (0.028)	Loss 2.2100 (2.1931)	Entropy 0.72664 (0.72891)	Top-1 acc 73.438 (71.879)	Top-5 acc 90.234 (88.671)	lr 0.00115
Train [104][1500/3239]	Time 0.251 (0.640)	Data Time 0.001 (0.027)	Loss 2.2036 (2.1925)	Entropy 0.72652 (0.72889)	Top-1 acc 67.969 (71.893)	Top-5 acc 91.406 (88.682)	lr 0.00115
Train [104][1510/3239]	Time 0.238 (0.639)	Data Time 0.001 (0.027)	Loss 2.3018 (2.1923)	Entropy 0.72647 (0.72888)	Top-1 acc 67.969 (71.897)	Top-5 acc 85.156 (88.686)	lr 0.00115
Train [104][1520/3239]	Time 0.239 (0.638)	Data Time 0.001 (0.027)	Loss 2.3179 (2.1924)	Entropy 0.72642 (0.72886)	Top-1 acc 70.703 (71.894)	Top-5 acc 85.156 (88.685)	lr 0.00115
Train [104][1530/3239]	Time 0.349 (0.637)	Data Time 0.001 (0.027)	Loss 2.2705 (2.1924)	Entropy 0.72643 (0.72885)	Top-1 acc 68.359 (71.892)	Top-5 acc 85.938 (88.684)	lr 0.00115
Train [104][1540/3239]	Time 0.242 (0.636)	Data Time 0.001 (0.027)	Loss 2.2196 (2.1927)	Entropy 0.72639 (0.72883)	Top-1 acc 69.531 (71.889)	Top-5 acc 88.281 (88.681)	lr 0.00115
Train [104][1550/3239]	Time 2.665 (0.636)	Data Time 0.002 (0.027)	Loss 2.1990 (2.1928)	Entropy 0.72639 (0.72882)	Top-1 acc 73.438 (71.888)	Top-5 acc 87.500 (88.676)	lr 0.00115
Train [104][1560/3239]	Time 0.235 (0.633)	Data Time 0.001 (0.026)	Loss 2.1429 (2.1928)	Entropy 0.72641 (0.72880)	Top-1 acc 70.703 (71.886)	Top-5 acc 89.062 (88.675)	lr 0.00115
Train [104][1570/3239]	Time 0.247 (0.632)	Data Time 0.001 (0.026)	Loss 2.1137 (2.1924)	Entropy 0.72640 (0.72878)	Top-1 acc 75.000 (71.900)	Top-5 acc 87.891 (88.681)	lr 0.00115
Train [104][1580/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.026)	Loss 2.2409 (2.1926)	Entropy 0.72644 (0.72877)	Top-1 acc 72.656 (71.891)	Top-5 acc 87.500 (88.677)	lr 0.00115
Train [104][1590/3239]	Time 0.251 (0.630)	Data Time 0.002 (0.026)	Loss 2.3047 (2.1925)	Entropy 0.72635 (0.72875)	Top-1 acc 66.406 (71.893)	Top-5 acc 87.500 (88.680)	lr 0.00115
Train [104][1600/3239]	Time 0.253 (0.629)	Data Time 0.001 (0.026)	Loss 2.1362 (2.1925)	Entropy 0.72635 (0.72874)	Top-1 acc 75.391 (71.892)	Top-5 acc 89.844 (88.678)	lr 0.00115
Train [104][1610/3239]	Time 0.255 (0.629)	Data Time 0.001 (0.026)	Loss 2.1577 (2.1927)	Entropy 0.72630 (0.72872)	Top-1 acc 72.266 (71.884)	Top-5 acc 90.234 (88.675)	lr 0.00115
Train [104][1620/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.026)	Loss 2.1989 (2.1928)	Entropy 0.72627 (0.72871)	Top-1 acc 69.141 (71.878)	Top-5 acc 89.062 (88.674)	lr 0.00115
Train [104][1630/3239]	Time 0.225 (0.627)	Data Time 0.001 (0.025)	Loss 2.1926 (2.1926)	Entropy 0.72626 (0.72869)	Top-1 acc 69.531 (71.883)	Top-5 acc 88.672 (88.675)	lr 0.00115
Train [104][1640/3239]	Time 0.220 (0.626)	Data Time 0.001 (0.025)	Loss 2.1464 (2.1927)	Entropy 0.72631 (0.72868)	Top-1 acc 74.219 (71.886)	Top-5 acc 89.062 (88.673)	lr 0.00115
Train [104][1650/3239]	Time 0.245 (0.625)	Data Time 0.001 (0.025)	Loss 2.1047 (2.1926)	Entropy 0.72626 (0.72867)	Top-1 acc 74.219 (71.891)	Top-5 acc 90.234 (88.672)	lr 0.00115
Train [104][1660/3239]	Time 2.577 (0.624)	Data Time 0.001 (0.025)	Loss 2.0995 (2.1927)	Entropy 0.72626 (0.72865)	Top-1 acc 74.219 (71.887)	Top-5 acc 90.234 (88.666)	lr 0.00115
Train [104][1670/3239]	Time 0.225 (0.622)	Data Time 0.001 (0.025)	Loss 2.3545 (2.1930)	Entropy 0.72628 (0.72864)	Top-1 acc 72.266 (71.882)	Top-5 acc 84.766 (88.658)	lr 0.00115
Train [104][1680/3239]	Time 0.243 (0.621)	Data Time 0.002 (0.025)	Loss 2.0452 (2.1928)	Entropy 0.72629 (0.72862)	Top-1 acc 75.391 (71.886)	Top-5 acc 90.625 (88.663)	lr 0.00115
Train [104][1690/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.025)	Loss 2.2389 (2.1931)	Entropy 0.72633 (0.72861)	Top-1 acc 71.875 (71.883)	Top-5 acc 88.281 (88.656)	lr 0.00115
Train [104][1700/3239]	Time 0.344 (0.620)	Data Time 0.001 (0.024)	Loss 2.2681 (2.1932)	Entropy 0.72633 (0.72860)	Top-1 acc 67.578 (71.875)	Top-5 acc 87.109 (88.657)	lr 0.00114
Train [104][1710/3239]	Time 0.236 (0.619)	Data Time 0.001 (0.024)	Loss 2.2371 (2.1931)	Entropy 0.72624 (0.72858)	Top-1 acc 71.094 (71.879)	Top-5 acc 85.938 (88.660)	lr 0.00114
Train [104][1720/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.024)	Loss 2.2559 (2.1930)	Entropy 0.72621 (0.72857)	Top-1 acc 67.188 (71.874)	Top-5 acc 88.281 (88.661)	lr 0.00114
Train [104][1730/3239]	Time 0.354 (0.647)	Data Time 0.016 (0.024)	Loss 2.1945 (2.1931)	Entropy 0.72623 (0.72856)	Top-1 acc 70.703 (71.870)	Top-5 acc 89.062 (88.661)	lr 0.00114
Train [104][1740/3239]	Time 0.326 (0.647)	Data Time 0.002 (0.024)	Loss 2.3183 (2.1930)	Entropy 0.72629 (0.72854)	Top-1 acc 67.969 (71.864)	Top-5 acc 85.547 (88.662)	lr 0.00114
Train [104][1750/3239]	Time 0.229 (0.646)	Data Time 0.002 (0.024)	Loss 2.1867 (2.1932)	Entropy 0.72627 (0.72853)	Top-1 acc 73.438 (71.862)	Top-5 acc 89.062 (88.658)	lr 0.00114
Train [104][1760/3239]	Time 0.237 (0.645)	Data Time 0.001 (0.024)	Loss 2.1211 (2.1929)	Entropy 0.72627 (0.72852)	Top-1 acc 73.828 (71.871)	Top-5 acc 91.406 (88.665)	lr 0.00114
Train [104][1770/3239]	Time 2.600 (0.645)	Data Time 0.002 (0.024)	Loss 2.2213 (2.1927)	Entropy 0.72627 (0.72850)	Top-1 acc 71.094 (71.871)	Top-5 acc 89.062 (88.669)	lr 0.00114
Train [104][1780/3239]	Time 0.348 (0.642)	Data Time 0.003 (0.023)	Loss 2.1423 (2.1928)	Entropy 0.72622 (0.72849)	Top-1 acc 71.875 (71.866)	Top-5 acc 89.453 (88.667)	lr 0.00114
Train [104][1790/3239]	Time 0.241 (0.642)	Data Time 0.001 (0.023)	Loss 2.0870 (2.1925)	Entropy 0.72607 (0.72848)	Top-1 acc 75.391 (71.877)	Top-5 acc 89.453 (88.669)	lr 0.00114
Train [104][1800/3239]	Time 0.242 (0.641)	Data Time 0.001 (0.023)	Loss 2.2585 (2.1924)	Entropy 0.72599 (0.72846)	Top-1 acc 71.094 (71.881)	Top-5 acc 87.109 (88.669)	lr 0.00114
Train [104][1810/3239]	Time 0.276 (0.640)	Data Time 0.001 (0.023)	Loss 2.3723 (2.1930)	Entropy 0.72596 (0.72845)	Top-1 acc 70.312 (71.868)	Top-5 acc 84.375 (88.664)	lr 0.00114
Train [104][1820/3239]	Time 0.243 (0.639)	Data Time 0.001 (0.023)	Loss 2.2574 (2.1932)	Entropy 0.72594 (0.72844)	Top-1 acc 71.484 (71.863)	Top-5 acc 86.719 (88.657)	lr 0.00114
Train [104][1830/3239]	Time 0.233 (0.638)	Data Time 0.001 (0.023)	Loss 2.1714 (2.1931)	Entropy 0.72590 (0.72842)	Top-1 acc 71.094 (71.859)	Top-5 acc 91.016 (88.660)	lr 0.00114
Train [104][1840/3239]	Time 0.236 (0.637)	Data Time 0.001 (0.023)	Loss 2.2115 (2.1929)	Entropy 0.72584 (0.72841)	Top-1 acc 69.531 (71.864)	Top-5 acc 88.672 (88.666)	lr 0.00114
Train [104][1850/3239]	Time 0.217 (0.636)	Data Time 0.001 (0.023)	Loss 2.3126 (2.1928)	Entropy 0.72589 (0.72839)	Top-1 acc 68.359 (71.864)	Top-5 acc 86.719 (88.666)	lr 0.00114
Train [104][1860/3239]	Time 0.221 (0.636)	Data Time 0.001 (0.023)	Loss 2.2716 (2.1929)	Entropy 0.72576 (0.72838)	Top-1 acc 72.656 (71.864)	Top-5 acc 86.328 (88.665)	lr 0.00114
Train [104][1870/3239]	Time 0.242 (0.635)	Data Time 0.002 (0.022)	Loss 2.0586 (2.1928)	Entropy 0.72574 (0.72837)	Top-1 acc 72.266 (71.864)	Top-5 acc 91.797 (88.667)	lr 0.00114
Train [104][1880/3239]	Time 2.571 (0.634)	Data Time 0.001 (0.022)	Loss 2.0715 (2.1927)	Entropy 0.72574 (0.72835)	Top-1 acc 76.562 (71.863)	Top-5 acc 90.625 (88.668)	lr 0.00114
Train [104][1890/3239]	Time 0.301 (0.632)	Data Time 0.002 (0.022)	Loss 2.1009 (2.1924)	Entropy 0.72572 (0.72834)	Top-1 acc 73.828 (71.877)	Top-5 acc 90.625 (88.673)	lr 0.00114
Train [104][1900/3239]	Time 0.245 (0.631)	Data Time 0.002 (0.022)	Loss 2.1599 (2.1927)	Entropy 0.72568 (0.72833)	Top-1 acc 73.047 (71.872)	Top-5 acc 87.891 (88.667)	lr 0.00114
Train [104][1910/3239]	Time 0.327 (0.630)	Data Time 0.001 (0.022)	Loss 2.1321 (2.1925)	Entropy 0.72566 (0.72831)	Top-1 acc 74.609 (71.874)	Top-5 acc 90.625 (88.670)	lr 0.00114
Train [104][1920/3239]	Time 0.232 (0.630)	Data Time 0.001 (0.022)	Loss 2.1525 (2.1925)	Entropy 0.72566 (0.72830)	Top-1 acc 72.266 (71.874)	Top-5 acc 85.938 (88.666)	lr 0.00114
Train [104][1930/3239]	Time 0.232 (0.629)	Data Time 0.001 (0.022)	Loss 2.1830 (2.1926)	Entropy 0.72570 (0.72828)	Top-1 acc 72.656 (71.874)	Top-5 acc 89.453 (88.666)	lr 0.00114
Train [104][1940/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.022)	Loss 2.1891 (2.1928)	Entropy 0.72565 (0.72827)	Top-1 acc 69.141 (71.866)	Top-5 acc 88.281 (88.664)	lr 0.00113
Train [104][1950/3239]	Time 0.224 (0.627)	Data Time 0.001 (0.022)	Loss 2.0910 (2.1926)	Entropy 0.72556 (0.72826)	Top-1 acc 75.000 (71.869)	Top-5 acc 89.453 (88.665)	lr 0.00113
Train [104][1960/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.021)	Loss 2.1345 (2.1925)	Entropy 0.72550 (0.72824)	Top-1 acc 74.609 (71.875)	Top-5 acc 89.453 (88.666)	lr 0.00113
Train [104][1970/3239]	Time 0.236 (0.626)	Data Time 0.001 (0.021)	Loss 2.1986 (2.1925)	Entropy 0.72536 (0.72823)	Top-1 acc 68.359 (71.873)	Top-5 acc 90.234 (88.667)	lr 0.00113
Train [104][1980/3239]	Time 0.240 (0.625)	Data Time 0.001 (0.021)	Loss 2.1188 (2.1924)	Entropy 0.72533 (0.72821)	Top-1 acc 73.828 (71.872)	Top-5 acc 90.625 (88.672)	lr 0.00113
Train [104][1990/3239]	Time 2.676 (0.625)	Data Time 0.001 (0.021)	Loss 2.2412 (2.1925)	Entropy 0.72533 (0.72820)	Top-1 acc 71.484 (71.871)	Top-5 acc 87.891 (88.668)	lr 0.00113
Train [104][2000/3239]	Time 0.266 (0.623)	Data Time 0.001 (0.021)	Loss 2.1737 (2.1923)	Entropy 0.72530 (0.72819)	Top-1 acc 76.172 (71.876)	Top-5 acc 89.844 (88.674)	lr 0.00113
Train [104][2010/3239]	Time 0.223 (0.622)	Data Time 0.001 (0.021)	Loss 2.2916 (2.1924)	Entropy 0.72534 (0.72817)	Top-1 acc 67.969 (71.872)	Top-5 acc 87.891 (88.672)	lr 0.00113
Train [104][2020/3239]	Time 0.240 (0.621)	Data Time 0.001 (0.021)	Loss 2.1472 (2.1922)	Entropy 0.72533 (0.72816)	Top-1 acc 72.266 (71.879)	Top-5 acc 89.453 (88.672)	lr 0.00113
Train [104][2030/3239]	Time 0.345 (0.621)	Data Time 0.001 (0.021)	Loss 2.1930 (2.1922)	Entropy 0.72527 (0.72814)	Top-1 acc 70.312 (71.875)	Top-5 acc 86.719 (88.670)	lr 0.00113
Train [104][2040/3239]	Time 0.256 (0.620)	Data Time 0.001 (0.021)	Loss 2.3416 (2.1922)	Entropy 0.72529 (0.72813)	Top-1 acc 68.359 (71.872)	Top-5 acc 87.500 (88.669)	lr 0.00113
Train [104][2050/3239]	Time 0.248 (0.620)	Data Time 0.002 (0.021)	Loss 2.2198 (2.1926)	Entropy 0.72522 (0.72811)	Top-1 acc 72.266 (71.864)	Top-5 acc 86.328 (88.662)	lr 0.00113
Train [104][2060/3239]	Time 0.248 (0.619)	Data Time 0.001 (0.021)	Loss 2.1018 (2.1924)	Entropy 0.72520 (0.72810)	Top-1 acc 79.297 (71.873)	Top-5 acc 91.016 (88.666)	lr 0.00113
Train [104][2070/3239]	Time 0.286 (0.618)	Data Time 0.001 (0.020)	Loss 2.1521 (2.1925)	Entropy 0.72517 (0.72809)	Top-1 acc 67.188 (71.866)	Top-5 acc 91.406 (88.665)	lr 0.00113
Train [104][2080/3239]	Time 0.236 (0.618)	Data Time 0.001 (0.020)	Loss 2.0159 (2.1922)	Entropy 0.72523 (0.72807)	Top-1 acc 76.953 (71.873)	Top-5 acc 91.797 (88.668)	lr 0.00113
Train [104][2090/3239]	Time 0.312 (0.642)	Data Time 0.005 (0.020)	Loss 2.1532 (2.1923)	Entropy 0.72514 (0.72806)	Top-1 acc 72.266 (71.867)	Top-5 acc 89.453 (88.668)	lr 0.00113
Train [104][2100/3239]	Time 2.665 (0.642)	Data Time 0.002 (0.020)	Loss 2.3880 (2.1926)	Entropy 0.72514 (0.72805)	Top-1 acc 66.016 (71.855)	Top-5 acc 86.328 (88.668)	lr 0.00113
Train [104][2110/3239]	Time 0.245 (0.640)	Data Time 0.002 (0.020)	Loss 2.0516 (2.1926)	Entropy 0.72515 (0.72803)	Top-1 acc 74.609 (71.858)	Top-5 acc 91.797 (88.670)	lr 0.00113
Train [104][2120/3239]	Time 0.245 (0.639)	Data Time 0.001 (0.020)	Loss 2.0851 (2.1922)	Entropy 0.72486 (0.72802)	Top-1 acc 75.000 (71.870)	Top-5 acc 89.844 (88.678)	lr 0.00113
Train [104][2130/3239]	Time 0.246 (0.639)	Data Time 0.002 (0.020)	Loss 2.3065 (2.1925)	Entropy 0.72473 (0.72800)	Top-1 acc 71.094 (71.866)	Top-5 acc 86.328 (88.672)	lr 0.00113
Train [104][2140/3239]	Time 0.232 (0.638)	Data Time 0.001 (0.020)	Loss 2.1035 (2.1923)	Entropy 0.72464 (0.72799)	Top-1 acc 75.000 (71.874)	Top-5 acc 91.797 (88.677)	lr 0.00113
Train [104][2150/3239]	Time 0.231 (0.637)	Data Time 0.001 (0.020)	Loss 2.2832 (2.1921)	Entropy 0.72473 (0.72797)	Top-1 acc 71.094 (71.878)	Top-5 acc 85.938 (88.683)	lr 0.00113
Train [104][2160/3239]	Time 0.221 (0.636)	Data Time 0.001 (0.020)	Loss 2.1489 (2.1920)	Entropy 0.72477 (0.72796)	Top-1 acc 73.438 (71.884)	Top-5 acc 89.844 (88.685)	lr 0.00113
Train [104][2170/3239]	Time 0.243 (0.636)	Data Time 0.001 (0.020)	Loss 2.1694 (2.1920)	Entropy 0.72479 (0.72794)	Top-1 acc 72.266 (71.889)	Top-5 acc 87.109 (88.682)	lr 0.00113
Train [104][2180/3239]	Time 0.232 (0.635)	Data Time 0.001 (0.019)	Loss 2.2392 (2.1919)	Entropy 0.72479 (0.72793)	Top-1 acc 71.484 (71.890)	Top-5 acc 88.672 (88.685)	lr 0.00112
Train [104][2190/3239]	Time 0.252 (0.634)	Data Time 0.001 (0.019)	Loss 2.2629 (2.1922)	Entropy 0.72479 (0.72791)	Top-1 acc 73.047 (71.885)	Top-5 acc 86.328 (88.678)	lr 0.00112
Train [104][2200/3239]	Time 0.223 (0.634)	Data Time 0.002 (0.019)	Loss 2.3449 (2.1921)	Entropy 0.72481 (0.72790)	Top-1 acc 67.969 (71.890)	Top-5 acc 83.984 (88.678)	lr 0.00112
Train [104][2210/3239]	Time 2.523 (0.633)	Data Time 0.001 (0.019)	Loss 2.1800 (2.1922)	Entropy 0.72481 (0.72788)	Top-1 acc 71.875 (71.891)	Top-5 acc 87.891 (88.677)	lr 0.00112
Train [104][2220/3239]	Time 0.247 (0.631)	Data Time 0.001 (0.019)	Loss 2.2351 (2.1923)	Entropy 0.72462 (0.72787)	Top-1 acc 69.141 (71.890)	Top-5 acc 88.281 (88.674)	lr 0.00112
Train [104][2230/3239]	Time 0.258 (0.631)	Data Time 0.002 (0.019)	Loss 2.1611 (2.1923)	Entropy 0.72454 (0.72785)	Top-1 acc 72.656 (71.890)	Top-5 acc 87.891 (88.670)	lr 0.00112
Train [104][2240/3239]	Time 0.363 (0.630)	Data Time 0.001 (0.019)	Loss 2.2535 (2.1922)	Entropy 0.72454 (0.72784)	Top-1 acc 69.922 (71.894)	Top-5 acc 88.672 (88.673)	lr 0.00112
Train [104][2250/3239]	Time 0.249 (0.629)	Data Time 0.001 (0.019)	Loss 2.0568 (2.1920)	Entropy 0.72445 (0.72782)	Top-1 acc 76.953 (71.899)	Top-5 acc 90.234 (88.676)	lr 0.00112
Train [104][2260/3239]	Time 0.277 (0.629)	Data Time 0.001 (0.019)	Loss 2.1294 (2.1920)	Entropy 0.72445 (0.72781)	Top-1 acc 73.438 (71.897)	Top-5 acc 89.453 (88.677)	lr 0.00112
Train [104][2270/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.019)	Loss 2.4218 (2.1920)	Entropy 0.72441 (0.72779)	Top-1 acc 63.281 (71.896)	Top-5 acc 85.938 (88.677)	lr 0.00112
Train [104][2280/3239]	Time 0.343 (0.628)	Data Time 0.001 (0.019)	Loss 2.1783 (2.1921)	Entropy 0.72436 (0.72778)	Top-1 acc 74.219 (71.899)	Top-5 acc 88.672 (88.675)	lr 0.00112
Train [104][2290/3239]	Time 0.208 (0.627)	Data Time 0.001 (0.019)	Loss 2.1612 (2.1919)	Entropy 0.72434 (0.72777)	Top-1 acc 73.828 (71.904)	Top-5 acc 90.234 (88.679)	lr 0.00112
Train [104][2300/3239]	Time 0.223 (0.626)	Data Time 0.001 (0.019)	Loss 2.0288 (2.1917)	Entropy 0.72439 (0.72775)	Top-1 acc 74.219 (71.906)	Top-5 acc 91.016 (88.684)	lr 0.00112
Train [104][2310/3239]	Time 0.254 (0.626)	Data Time 0.001 (0.018)	Loss 2.2440 (2.1918)	Entropy 0.72437 (0.72774)	Top-1 acc 70.312 (71.905)	Top-5 acc 88.281 (88.685)	lr 0.00112
Train [104][2320/3239]	Time 2.691 (0.625)	Data Time 0.001 (0.018)	Loss 2.3200 (2.1917)	Entropy 0.72437 (0.72772)	Top-1 acc 68.359 (71.908)	Top-5 acc 85.156 (88.686)	lr 0.00112
Train [104][2330/3239]	Time 0.249 (0.623)	Data Time 0.001 (0.018)	Loss 2.1715 (2.1917)	Entropy 0.72436 (0.72771)	Top-1 acc 72.656 (71.910)	Top-5 acc 88.672 (88.686)	lr 0.00112
Train [104][2340/3239]	Time 0.234 (0.623)	Data Time 0.001 (0.018)	Loss 2.0636 (2.1916)	Entropy 0.72443 (0.72769)	Top-1 acc 74.219 (71.913)	Top-5 acc 91.406 (88.689)	lr 0.00112
Train [104][2350/3239]	Time 0.234 (0.622)	Data Time 0.001 (0.018)	Loss 2.2045 (2.1915)	Entropy 0.72437 (0.72768)	Top-1 acc 69.531 (71.913)	Top-5 acc 89.844 (88.689)	lr 0.00112
Train [104][2360/3239]	Time 0.234 (0.622)	Data Time 0.002 (0.018)	Loss 2.1149 (2.1914)	Entropy 0.72433 (0.72766)	Top-1 acc 74.219 (71.917)	Top-5 acc 90.234 (88.693)	lr 0.00112
Train [104][2370/3239]	Time 0.227 (0.621)	Data Time 0.001 (0.018)	Loss 2.2129 (2.1916)	Entropy 0.72421 (0.72765)	Top-1 acc 71.484 (71.911)	Top-5 acc 89.062 (88.689)	lr 0.00112
Train [104][2380/3239]	Time 0.227 (0.621)	Data Time 0.001 (0.018)	Loss 2.2270 (2.1917)	Entropy 0.72410 (0.72764)	Top-1 acc 70.703 (71.912)	Top-5 acc 89.062 (88.687)	lr 0.00112
Train [104][2390/3239]	Time 0.231 (0.620)	Data Time 0.001 (0.018)	Loss 2.2618 (2.1914)	Entropy 0.72404 (0.72762)	Top-1 acc 70.312 (71.918)	Top-5 acc 86.719 (88.691)	lr 0.00112
Train [104][2400/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.018)	Loss 2.2661 (2.1916)	Entropy 0.72390 (0.72761)	Top-1 acc 68.750 (71.912)	Top-5 acc 89.453 (88.693)	lr 0.00112
Train [104][2410/3239]	Time 0.220 (0.619)	Data Time 0.001 (0.018)	Loss 2.0774 (2.1913)	Entropy 0.72388 (0.72759)	Top-1 acc 75.781 (71.920)	Top-5 acc 89.453 (88.696)	lr 0.00112
Train [104][2420/3239]	Time 0.231 (0.618)	Data Time 0.001 (0.018)	Loss 2.3154 (2.1913)	Entropy 0.72425 (0.72757)	Top-1 acc 67.188 (71.919)	Top-5 acc 84.375 (88.694)	lr 0.00111
Train [104][2430/3239]	Time 2.709 (0.618)	Data Time 0.001 (0.018)	Loss 2.1211 (2.1911)	Entropy 0.72425 (0.72756)	Top-1 acc 73.438 (71.924)	Top-5 acc 89.062 (88.695)	lr 0.00111
Train [104][2440/3239]	Time 0.235 (0.616)	Data Time 0.001 (0.018)	Loss 2.2021 (2.1911)	Entropy 0.72424 (0.72755)	Top-1 acc 70.312 (71.926)	Top-5 acc 86.719 (88.697)	lr 0.00111
Train [104][2450/3239]	Time 0.371 (0.616)	Data Time 0.002 (0.018)	Loss 2.3343 (2.1912)	Entropy 0.72421 (0.72753)	Top-1 acc 71.484 (71.921)	Top-5 acc 85.156 (88.694)	lr 0.00111
Train [104][2460/3239]	Time 0.328 (0.635)	Data Time 0.003 (0.017)	Loss 2.2149 (2.1913)	Entropy 0.72411 (0.72752)	Top-1 acc 72.266 (71.920)	Top-5 acc 90.234 (88.695)	lr 0.00111
Train [104][2470/3239]	Time 0.244 (0.635)	Data Time 0.002 (0.017)	Loss 2.1284 (2.1912)	Entropy 0.72403 (0.72751)	Top-1 acc 72.266 (71.927)	Top-5 acc 92.188 (88.693)	lr 0.00111
Train [104][2480/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.017)	Loss 2.1724 (2.1912)	Entropy 0.72396 (0.72749)	Top-1 acc 70.312 (71.924)	Top-5 acc 88.672 (88.694)	lr 0.00111
Train [104][2490/3239]	Time 0.239 (0.634)	Data Time 0.001 (0.017)	Loss 2.1581 (2.1915)	Entropy 0.72391 (0.72748)	Top-1 acc 72.266 (71.916)	Top-5 acc 90.234 (88.688)	lr 0.00111
Train [104][2500/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.017)	Loss 2.1349 (2.1914)	Entropy 0.72385 (0.72746)	Top-1 acc 74.219 (71.920)	Top-5 acc 89.844 (88.689)	lr 0.00111
Train [104][2510/3239]	Time 0.244 (0.633)	Data Time 0.001 (0.017)	Loss 2.1710 (2.1915)	Entropy 0.72377 (0.72745)	Top-1 acc 74.219 (71.915)	Top-5 acc 91.016 (88.689)	lr 0.00111
Train [104][2520/3239]	Time 0.227 (0.632)	Data Time 0.001 (0.017)	Loss 2.0783 (2.1914)	Entropy 0.72369 (0.72743)	Top-1 acc 74.219 (71.919)	Top-5 acc 90.625 (88.691)	lr 0.00111
Train [104][2530/3239]	Time 0.238 (0.631)	Data Time 0.001 (0.017)	Loss 2.3673 (2.1914)	Entropy 0.72370 (0.72742)	Top-1 acc 67.969 (71.916)	Top-5 acc 84.766 (88.691)	lr 0.00111
Train [104][2540/3239]	Time 2.543 (0.631)	Data Time 0.001 (0.017)	Loss 2.4224 (2.1916)	Entropy 0.72370 (0.72740)	Top-1 acc 69.531 (71.910)	Top-5 acc 85.156 (88.685)	lr 0.00111
Train [104][2550/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.017)	Loss 2.3876 (2.1918)	Entropy 0.72356 (0.72739)	Top-1 acc 68.359 (71.906)	Top-5 acc 83.203 (88.686)	lr 0.00111
Train [104][2560/3239]	Time 0.217 (0.629)	Data Time 0.001 (0.017)	Loss 2.1969 (2.1918)	Entropy 0.72356 (0.72737)	Top-1 acc 73.047 (71.905)	Top-5 acc 86.719 (88.684)	lr 0.00111
Train [104][2570/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.017)	Loss 2.1452 (2.1922)	Entropy 0.72358 (0.72736)	Top-1 acc 73.828 (71.895)	Top-5 acc 89.844 (88.677)	lr 0.00111
Train [104][2580/3239]	Time 0.215 (0.628)	Data Time 0.001 (0.017)	Loss 2.2513 (2.1921)	Entropy 0.72349 (0.72735)	Top-1 acc 68.359 (71.896)	Top-5 acc 87.891 (88.676)	lr 0.00111
Train [104][2590/3239]	Time 0.240 (0.627)	Data Time 0.001 (0.017)	Loss 1.9745 (2.1919)	Entropy 0.72350 (0.72733)	Top-1 acc 76.562 (71.899)	Top-5 acc 91.797 (88.679)	lr 0.00111
Train [104][2600/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.017)	Loss 2.0832 (2.1919)	Entropy 0.72352 (0.72732)	Top-1 acc 72.266 (71.897)	Top-5 acc 91.016 (88.677)	lr 0.00111
Train [104][2610/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.017)	Loss 2.3456 (2.1920)	Entropy 0.72344 (0.72730)	Top-1 acc 67.578 (71.894)	Top-5 acc 85.547 (88.674)	lr 0.00111
Train [104][2620/3239]	Time 0.322 (0.626)	Data Time 0.001 (0.016)	Loss 2.3803 (2.1923)	Entropy 0.72349 (0.72729)	Top-1 acc 65.234 (71.886)	Top-5 acc 87.500 (88.672)	lr 0.00111
Train [104][2630/3239]	Time 0.245 (0.625)	Data Time 0.001 (0.016)	Loss 2.3920 (2.1923)	Entropy 0.72350 (0.72727)	Top-1 acc 63.672 (71.884)	Top-5 acc 86.719 (88.673)	lr 0.00111
Train [104][2640/3239]	Time 0.288 (0.625)	Data Time 0.001 (0.016)	Loss 2.1205 (2.1922)	Entropy 0.72339 (0.72726)	Top-1 acc 74.609 (71.890)	Top-5 acc 89.453 (88.671)	lr 0.00111
Train [104][2650/3239]	Time 0.268 (0.624)	Data Time 0.001 (0.016)	Loss 2.2431 (2.1919)	Entropy 0.72341 (0.72724)	Top-1 acc 71.094 (71.893)	Top-5 acc 86.719 (88.676)	lr 0.00111
Train [104][2660/3239]	Time 0.340 (0.624)	Data Time 0.002 (0.016)	Loss 2.1120 (2.1919)	Entropy 0.72335 (0.72723)	Top-1 acc 73.438 (71.896)	Top-5 acc 90.625 (88.677)	lr 0.00110
Train [104][2670/3239]	Time 0.225 (0.623)	Data Time 0.001 (0.016)	Loss 2.2095 (2.1919)	Entropy 0.72333 (0.72721)	Top-1 acc 70.312 (71.894)	Top-5 acc 87.109 (88.676)	lr 0.00110
Train [104][2680/3239]	Time 0.223 (0.622)	Data Time 0.001 (0.016)	Loss 2.2650 (2.1918)	Entropy 0.72335 (0.72720)	Top-1 acc 68.359 (71.895)	Top-5 acc 87.109 (88.679)	lr 0.00110
Train [104][2690/3239]	Time 0.236 (0.622)	Data Time 0.001 (0.016)	Loss 2.0243 (2.1918)	Entropy 0.72338 (0.72719)	Top-1 acc 76.562 (71.898)	Top-5 acc 91.406 (88.678)	lr 0.00110
Train [104][2700/3239]	Time 0.349 (0.621)	Data Time 0.001 (0.016)	Loss 2.0828 (2.1918)	Entropy 0.72322 (0.72717)	Top-1 acc 75.391 (71.898)	Top-5 acc 91.406 (88.677)	lr 0.00110
Train [104][2710/3239]	Time 0.290 (0.621)	Data Time 0.001 (0.016)	Loss 1.9754 (2.1915)	Entropy 0.72311 (0.72716)	Top-1 acc 77.734 (71.906)	Top-5 acc 90.234 (88.683)	lr 0.00110
Train [104][2720/3239]	Time 0.228 (0.620)	Data Time 0.001 (0.016)	Loss 2.1375 (2.1914)	Entropy 0.72299 (0.72714)	Top-1 acc 71.484 (71.910)	Top-5 acc 89.062 (88.684)	lr 0.00110
Train [104][2730/3239]	Time 0.210 (0.620)	Data Time 0.001 (0.016)	Loss 2.2833 (2.1914)	Entropy 0.72283 (0.72713)	Top-1 acc 70.312 (71.906)	Top-5 acc 84.375 (88.685)	lr 0.00110
Train [104][2740/3239]	Time 0.336 (0.619)	Data Time 0.001 (0.016)	Loss 2.1758 (2.1915)	Entropy 0.72282 (0.72711)	Top-1 acc 71.875 (71.904)	Top-5 acc 87.891 (88.685)	lr 0.00110
Train [104][2750/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.016)	Loss 2.2220 (2.1914)	Entropy 0.72280 (0.72709)	Top-1 acc 69.531 (71.904)	Top-5 acc 90.234 (88.684)	lr 0.00110
Train [104][2760/3239]	Time 0.221 (0.619)	Data Time 0.001 (0.016)	Loss 2.3154 (2.1915)	Entropy 0.72280 (0.72708)	Top-1 acc 69.531 (71.905)	Top-5 acc 85.156 (88.682)	lr 0.00110
Train [104][2770/3239]	Time 0.222 (0.618)	Data Time 0.002 (0.016)	Loss 2.1666 (2.1915)	Entropy 0.72281 (0.72706)	Top-1 acc 74.609 (71.910)	Top-5 acc 89.844 (88.680)	lr 0.00110
Train [104][2780/3239]	Time 0.226 (0.617)	Data Time 0.001 (0.016)	Loss 2.3639 (2.1915)	Entropy 0.72278 (0.72705)	Top-1 acc 67.188 (71.909)	Top-5 acc 85.156 (88.681)	lr 0.00110
Train [104][2790/3239]	Time 0.266 (0.617)	Data Time 0.001 (0.016)	Loss 2.0765 (2.1914)	Entropy 0.72278 (0.72703)	Top-1 acc 75.000 (71.909)	Top-5 acc 90.625 (88.684)	lr 0.00110
Train [104][2800/3239]	Time 0.385 (0.634)	Data Time 0.004 (0.016)	Loss 2.2560 (2.1914)	Entropy 0.72279 (0.72702)	Top-1 acc 69.922 (71.910)	Top-5 acc 86.328 (88.685)	lr 0.00110
Train [104][2810/3239]	Time 0.235 (0.634)	Data Time 0.002 (0.015)	Loss 2.1439 (2.1914)	Entropy 0.72281 (0.72700)	Top-1 acc 73.047 (71.907)	Top-5 acc 90.234 (88.686)	lr 0.00110
Train [104][2820/3239]	Time 0.236 (0.634)	Data Time 0.001 (0.015)	Loss 2.0832 (2.1916)	Entropy 0.72272 (0.72699)	Top-1 acc 74.219 (71.902)	Top-5 acc 89.062 (88.683)	lr 0.00110
Train [104][2830/3239]	Time 0.235 (0.633)	Data Time 0.001 (0.015)	Loss 2.2918 (2.1918)	Entropy 0.72269 (0.72697)	Top-1 acc 67.578 (71.894)	Top-5 acc 86.328 (88.680)	lr 0.00110
Train [104][2840/3239]	Time 0.240 (0.633)	Data Time 0.002 (0.015)	Loss 2.1599 (2.1917)	Entropy 0.72267 (0.72696)	Top-1 acc 73.438 (71.896)	Top-5 acc 90.625 (88.683)	lr 0.00110
Train [104][2850/3239]	Time 0.214 (0.632)	Data Time 0.001 (0.015)	Loss 2.1654 (2.1915)	Entropy 0.72274 (0.72694)	Top-1 acc 70.703 (71.902)	Top-5 acc 89.062 (88.685)	lr 0.00110
Train [104][2860/3239]	Time 0.271 (0.632)	Data Time 0.001 (0.015)	Loss 2.2265 (2.1917)	Entropy 0.72269 (0.72693)	Top-1 acc 68.359 (71.899)	Top-5 acc 90.625 (88.684)	lr 0.00110
Train [104][2870/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.015)	Loss 2.1766 (2.1916)	Entropy 0.72267 (0.72691)	Top-1 acc 71.094 (71.900)	Top-5 acc 89.844 (88.687)	lr 0.00110
Train [104][2880/3239]	Time 0.262 (0.631)	Data Time 0.002 (0.015)	Loss 2.2538 (2.1915)	Entropy 0.72263 (0.72690)	Top-1 acc 67.969 (71.903)	Top-5 acc 85.547 (88.689)	lr 0.00110
Train [104][2890/3239]	Time 0.239 (0.630)	Data Time 0.001 (0.015)	Loss 2.1779 (2.1916)	Entropy 0.72252 (0.72688)	Top-1 acc 72.266 (71.902)	Top-5 acc 88.672 (88.687)	lr 0.00110
Train [104][2900/3239]	Time 0.267 (0.630)	Data Time 0.011 (0.015)	Loss 2.2629 (2.1915)	Entropy 0.72256 (0.72687)	Top-1 acc 69.922 (71.904)	Top-5 acc 87.109 (88.686)	lr 0.00109
Train [104][2910/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.015)	Loss 2.2413 (2.1915)	Entropy 0.72252 (0.72685)	Top-1 acc 70.703 (71.908)	Top-5 acc 89.062 (88.684)	lr 0.00109
Train [104][2920/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.015)	Loss 2.1834 (2.1914)	Entropy 0.72247 (0.72684)	Top-1 acc 70.312 (71.908)	Top-5 acc 88.672 (88.686)	lr 0.00109
Train [104][2930/3239]	Time 0.274 (0.628)	Data Time 0.001 (0.015)	Loss 1.9208 (2.1915)	Entropy 0.72262 (0.72682)	Top-1 acc 76.562 (71.905)	Top-5 acc 94.141 (88.684)	lr 0.00109
Train [104][2940/3239]	Time 0.293 (0.628)	Data Time 0.001 (0.015)	Loss 2.1681 (2.1915)	Entropy 0.72258 (0.72681)	Top-1 acc 69.531 (71.907)	Top-5 acc 89.844 (88.687)	lr 0.00109
Train [104][2950/3239]	Time 0.361 (0.627)	Data Time 0.001 (0.015)	Loss 2.2345 (2.1914)	Entropy 0.72260 (0.72680)	Top-1 acc 71.094 (71.910)	Top-5 acc 87.891 (88.689)	lr 0.00109
Train [104][2960/3239]	Time 0.259 (0.627)	Data Time 0.001 (0.015)	Loss 2.1292 (2.1912)	Entropy 0.72248 (0.72678)	Top-1 acc 71.094 (71.916)	Top-5 acc 91.016 (88.692)	lr 0.00109
Train [104][2970/3239]	Time 0.233 (0.626)	Data Time 0.001 (0.015)	Loss 2.3373 (2.1913)	Entropy 0.72250 (0.72677)	Top-1 acc 66.016 (71.912)	Top-5 acc 85.547 (88.689)	lr 0.00109
Train [104][2980/3239]	Time 0.247 (0.626)	Data Time 0.001 (0.015)	Loss 2.1749 (2.1913)	Entropy 0.72252 (0.72675)	Top-1 acc 72.656 (71.914)	Top-5 acc 89.844 (88.687)	lr 0.00109
Train [104][2990/3239]	Time 0.336 (0.625)	Data Time 0.001 (0.015)	Loss 2.1978 (2.1913)	Entropy 0.72251 (0.72674)	Top-1 acc 71.484 (71.916)	Top-5 acc 87.500 (88.687)	lr 0.00109
Train [104][3000/3239]	Time 0.241 (0.625)	Data Time 0.001 (0.015)	Loss 2.2164 (2.1913)	Entropy 0.72248 (0.72672)	Top-1 acc 69.141 (71.913)	Top-5 acc 88.281 (88.685)	lr 0.00109
Train [104][3010/3239]	Time 0.288 (0.625)	Data Time 0.001 (0.015)	Loss 2.3277 (2.1913)	Entropy 0.72232 (0.72671)	Top-1 acc 69.922 (71.915)	Top-5 acc 84.766 (88.687)	lr 0.00109
Train [104][3020/3239]	Time 0.241 (0.624)	Data Time 0.001 (0.015)	Loss 2.2842 (2.1915)	Entropy 0.72227 (0.72670)	Top-1 acc 68.359 (71.910)	Top-5 acc 88.672 (88.684)	lr 0.00109
Train [104][3030/3239]	Time 0.348 (0.624)	Data Time 0.001 (0.014)	Loss 2.1228 (2.1914)	Entropy 0.72214 (0.72668)	Top-1 acc 72.656 (71.911)	Top-5 acc 90.625 (88.686)	lr 0.00109
Train [104][3040/3239]	Time 0.269 (0.623)	Data Time 0.001 (0.014)	Loss 2.2138 (2.1914)	Entropy 0.72210 (0.72667)	Top-1 acc 70.312 (71.910)	Top-5 acc 87.891 (88.684)	lr 0.00109
Train [104][3050/3239]	Time 0.234 (0.623)	Data Time 0.001 (0.014)	Loss 2.2963 (2.1915)	Entropy 0.72211 (0.72665)	Top-1 acc 70.703 (71.907)	Top-5 acc 86.719 (88.685)	lr 0.00109
Train [104][3060/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.014)	Loss 2.0444 (2.1912)	Entropy 0.72216 (0.72664)	Top-1 acc 77.734 (71.914)	Top-5 acc 91.016 (88.688)	lr 0.00109
Train [104][3070/3239]	Time 0.349 (0.622)	Data Time 0.001 (0.014)	Loss 2.2182 (2.1912)	Entropy 0.72210 (0.72662)	Top-1 acc 71.484 (71.914)	Top-5 acc 90.625 (88.687)	lr 0.00109
Train [104][3080/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.014)	Loss 2.3102 (2.1913)	Entropy 0.72201 (0.72661)	Top-1 acc 70.312 (71.912)	Top-5 acc 85.938 (88.688)	lr 0.00109
Train [104][3090/3239]	Time 0.230 (0.621)	Data Time 0.001 (0.014)	Loss 2.2301 (2.1912)	Entropy 0.72196 (0.72659)	Top-1 acc 70.312 (71.911)	Top-5 acc 89.844 (88.691)	lr 0.00109
Train [104][3100/3239]	Time 0.241 (0.620)	Data Time 0.001 (0.014)	Loss 2.2945 (2.1911)	Entropy 0.72197 (0.72658)	Top-1 acc 67.969 (71.912)	Top-5 acc 87.500 (88.693)	lr 0.00109
Train [104][3110/3239]	Time 0.329 (0.620)	Data Time 0.001 (0.014)	Loss 2.2049 (2.1911)	Entropy 0.72196 (0.72656)	Top-1 acc 73.438 (71.911)	Top-5 acc 87.500 (88.693)	lr 0.00109
Train [104][3120/3239]	Time 0.273 (0.620)	Data Time 0.001 (0.014)	Loss 2.0504 (2.1911)	Entropy 0.72194 (0.72655)	Top-1 acc 73.828 (71.910)	Top-5 acc 92.188 (88.695)	lr 0.00109
Train [104][3130/3239]	Time 0.283 (0.636)	Data Time 0.004 (0.014)	Loss 2.2090 (2.1911)	Entropy 0.72191 (0.72653)	Top-1 acc 72.656 (71.912)	Top-5 acc 89.453 (88.694)	lr 0.00109
Train [104][3140/3239]	Time 0.229 (0.635)	Data Time 0.002 (0.014)	Loss 2.1783 (2.1911)	Entropy 0.72195 (0.72652)	Top-1 acc 74.219 (71.912)	Top-5 acc 88.281 (88.694)	lr 0.00108
Train [104][3150/3239]	Time 0.236 (0.635)	Data Time 0.002 (0.014)	Loss 2.3209 (2.1911)	Entropy 0.72189 (0.72650)	Top-1 acc 66.797 (71.912)	Top-5 acc 86.719 (88.692)	lr 0.00108
Train [104][3160/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.014)	Loss 2.2886 (2.1911)	Entropy 0.72195 (0.72649)	Top-1 acc 67.969 (71.913)	Top-5 acc 87.891 (88.691)	lr 0.00108
Train [104][3170/3239]	Time 0.279 (0.634)	Data Time 0.001 (0.014)	Loss 2.2413 (2.1911)	Entropy 0.72198 (0.72647)	Top-1 acc 68.750 (71.913)	Top-5 acc 86.719 (88.693)	lr 0.00108
Train [104][3180/3239]	Time 0.232 (0.634)	Data Time 0.000 (0.014)	Loss 2.1663 (2.1911)	Entropy 0.72205 (0.72646)	Top-1 acc 71.094 (71.914)	Top-5 acc 91.016 (88.692)	lr 0.00108
Train [104][3190/3239]	Time 0.233 (0.633)	Data Time 0.000 (0.014)	Loss 2.2803 (2.1911)	Entropy 0.72204 (0.72645)	Top-1 acc 74.219 (71.915)	Top-5 acc 86.328 (88.692)	lr 0.00108
Train [104][3200/3239]	Time 0.226 (0.632)	Data Time 0.000 (0.014)	Loss 2.1822 (2.1910)	Entropy 0.72203 (0.72643)	Top-1 acc 72.266 (71.918)	Top-5 acc 87.891 (88.693)	lr 0.00108
Train [104][3210/3239]	Time 0.222 (0.632)	Data Time 0.000 (0.014)	Loss 2.0982 (2.1910)	Entropy 0.72205 (0.72642)	Top-1 acc 74.609 (71.916)	Top-5 acc 90.234 (88.694)	lr 0.00108
Train [104][3220/3239]	Time 0.227 (0.631)	Data Time 0.000 (0.014)	Loss 2.2073 (2.1910)	Entropy 0.72197 (0.72641)	Top-1 acc 72.266 (71.916)	Top-5 acc 89.453 (88.694)	lr 0.00108
Train [104][3230/3239]	Time 0.238 (0.631)	Data Time 0.000 (0.014)	Loss 2.0730 (2.1911)	Entropy 0.72203 (0.72639)	Top-1 acc 73.828 (71.913)	Top-5 acc 90.625 (88.694)	lr 0.00108
Train [104][3239/3239]	Time 2.517 (0.631)	Data Time 0.000 (0.014)	Loss 2.5482 (2.1911)	Entropy 0.72203 (0.72638)	Top-1 acc 65.432 (71.913)	Top-5 acc 81.481 (88.694)	lr 0.00108
==========Valid [104/120]	loss 1.212	top-1 acc 72.175 (72.249)	top-5 acc 89.887	Train top-1 71.913	top-5 88.694	Entropy 0.72203	Latency-None: 0.000ms	Flops: 546.53M
Train [105][0/3239]	Time 43.370 (43.370)	Data Time 41.708 (41.708)	Loss 2.1662 (2.1662)	Entropy 0.72205 (0.72205)	Top-1 acc 73.828 (73.828)	Top-5 acc 89.453 (89.453)	lr 0.00108
Train [105][10/3239]	Time 2.783 (4.493)	Data Time 0.002 (3.796)	Loss 2.2258 (2.1734)	Entropy 0.72205 (0.72205)	Top-1 acc 70.703 (71.982)	Top-5 acc 87.109 (88.849)	lr 0.00108
Train [105][20/3239]	Time 0.252 (2.475)	Data Time 0.001 (1.989)	Loss 2.1103 (2.1730)	Entropy 0.72197 (0.72201)	Top-1 acc 74.219 (72.321)	Top-5 acc 90.625 (88.858)	lr 0.00108
Train [105][30/3239]	Time 0.237 (1.836)	Data Time 0.001 (1.348)	Loss 2.2016 (2.1833)	Entropy 0.72198 (0.72200)	Top-1 acc 71.875 (72.127)	Top-5 acc 88.281 (88.684)	lr 0.00108
Train [105][40/3239]	Time 0.367 (1.520)	Data Time 0.002 (1.020)	Loss 1.9572 (2.1756)	Entropy 0.72195 (0.72199)	Top-1 acc 79.688 (72.351)	Top-5 acc 92.188 (88.862)	lr 0.00108
Train [105][50/3239]	Time 0.263 (1.320)	Data Time 0.001 (0.820)	Loss 2.1206 (2.1818)	Entropy 0.72190 (0.72197)	Top-1 acc 75.781 (72.120)	Top-5 acc 89.453 (88.848)	lr 0.00108
Train [105][60/3239]	Time 0.274 (1.183)	Data Time 0.001 (0.686)	Loss 2.2028 (2.1788)	Entropy 0.72190 (0.72196)	Top-1 acc 73.438 (72.195)	Top-5 acc 89.062 (88.915)	lr 0.00108
Train [105][70/3239]	Time 0.239 (1.086)	Data Time 0.001 (0.589)	Loss 2.1206 (2.1787)	Entropy 0.72188 (0.72195)	Top-1 acc 73.828 (72.321)	Top-5 acc 90.625 (88.974)	lr 0.00108
Train [105][80/3239]	Time 0.235 (1.014)	Data Time 0.001 (0.517)	Loss 2.1500 (2.1743)	Entropy 0.72189 (0.72194)	Top-1 acc 74.219 (72.362)	Top-5 acc 89.844 (89.043)	lr 0.00108
Train [105][90/3239]	Time 0.230 (0.958)	Data Time 0.001 (0.460)	Loss 2.0520 (2.1739)	Entropy 0.72177 (0.72193)	Top-1 acc 76.172 (72.321)	Top-5 acc 91.797 (89.041)	lr 0.00108
Train [105][100/3239]	Time 0.276 (0.912)	Data Time 0.002 (0.415)	Loss 2.2598 (2.1781)	Entropy 0.72178 (0.72192)	Top-1 acc 72.266 (72.165)	Top-5 acc 87.109 (88.954)	lr 0.00108
Train [105][110/3239]	Time 0.236 (0.874)	Data Time 0.002 (0.378)	Loss 2.2122 (2.1779)	Entropy 0.72173 (0.72191)	Top-1 acc 69.531 (72.051)	Top-5 acc 89.062 (89.024)	lr 0.00108
Train [105][120/3239]	Time 2.534 (0.841)	Data Time 0.001 (0.347)	Loss 2.2578 (2.1778)	Entropy 0.72173 (0.72189)	Top-1 acc 69.922 (72.053)	Top-5 acc 89.062 (89.030)	lr 0.00108
Train [105][130/3239]	Time 0.272 (0.796)	Data Time 0.003 (0.320)	Loss 1.9975 (2.1743)	Entropy 0.72167 (0.72187)	Top-1 acc 78.516 (72.161)	Top-5 acc 92.188 (89.077)	lr 0.00108
Train [105][140/3239]	Time 0.227 (0.774)	Data Time 0.001 (0.298)	Loss 2.1069 (2.1746)	Entropy 0.72156 (0.72185)	Top-1 acc 71.094 (72.166)	Top-5 acc 90.234 (89.035)	lr 0.00107
Train [105][150/3239]	Time 0.240 (0.756)	Data Time 0.001 (0.278)	Loss 2.3218 (2.1794)	Entropy 0.72166 (0.72184)	Top-1 acc 66.797 (72.069)	Top-5 acc 87.109 (88.941)	lr 0.00107
Train [105][160/3239]	Time 0.229 (0.739)	Data Time 0.001 (0.261)	Loss 2.1986 (2.1781)	Entropy 0.72169 (0.72183)	Top-1 acc 71.484 (72.093)	Top-5 acc 89.062 (88.965)	lr 0.00107
Train [105][170/3239]	Time 0.216 (0.724)	Data Time 0.001 (0.246)	Loss 2.1774 (2.1828)	Entropy 0.72178 (0.72182)	Top-1 acc 72.266 (71.973)	Top-5 acc 89.453 (88.875)	lr 0.00107
Train [105][180/3239]	Time 0.230 (0.711)	Data Time 0.001 (0.232)	Loss 2.0765 (2.1822)	Entropy 0.72173 (0.72182)	Top-1 acc 73.047 (71.972)	Top-5 acc 89.453 (88.890)	lr 0.00107
Train [105][190/3239]	Time 0.229 (0.699)	Data Time 0.001 (0.220)	Loss 2.2445 (2.1828)	Entropy 0.72168 (0.72181)	Top-1 acc 71.094 (71.990)	Top-5 acc 86.719 (88.829)	lr 0.00107
Train [105][200/3239]	Time 0.230 (0.688)	Data Time 0.001 (0.209)	Loss 2.1024 (2.1828)	Entropy 0.72172 (0.72181)	Top-1 acc 74.609 (72.011)	Top-5 acc 89.453 (88.800)	lr 0.00107
Train [105][210/3239]	Time 0.402 (0.679)	Data Time 0.001 (0.199)	Loss 2.1748 (2.1851)	Entropy 0.72170 (0.72180)	Top-1 acc 71.094 (71.938)	Top-5 acc 90.625 (88.790)	lr 0.00107
Train [105][220/3239]	Time 0.230 (0.670)	Data Time 0.001 (0.190)	Loss 2.2451 (2.1849)	Entropy 0.72162 (0.72180)	Top-1 acc 70.312 (71.946)	Top-5 acc 87.500 (88.820)	lr 0.00107
Train [105][230/3239]	Time 2.526 (0.661)	Data Time 0.001 (0.182)	Loss 2.2002 (2.1847)	Entropy 0.72162 (0.72179)	Top-1 acc 71.875 (71.951)	Top-5 acc 89.062 (88.799)	lr 0.00107
Train [105][240/3239]	Time 0.271 (0.644)	Data Time 0.001 (0.175)	Loss 2.2688 (2.1842)	Entropy 0.72158 (0.72178)	Top-1 acc 69.922 (71.963)	Top-5 acc 85.938 (88.800)	lr 0.00107
Train [105][250/3239]	Time 0.305 (0.879)	Data Time 0.002 (0.168)	Loss 2.3551 (2.1859)	Entropy 0.72162 (0.72178)	Top-1 acc 65.625 (71.950)	Top-5 acc 86.328 (88.751)	lr 0.00107
Train [105][260/3239]	Time 0.225 (0.864)	Data Time 0.002 (0.162)	Loss 2.2646 (2.1855)	Entropy 0.72160 (0.72177)	Top-1 acc 70.703 (71.944)	Top-5 acc 87.891 (88.777)	lr 0.00107
Train [105][270/3239]	Time 0.245 (0.850)	Data Time 0.002 (0.156)	Loss 2.2423 (2.1864)	Entropy 0.72167 (0.72176)	Top-1 acc 70.312 (71.905)	Top-5 acc 88.281 (88.793)	lr 0.00107
Train [105][280/3239]	Time 0.241 (0.838)	Data Time 0.001 (0.150)	Loss 2.2102 (2.1862)	Entropy 0.72162 (0.72176)	Top-1 acc 71.094 (71.868)	Top-5 acc 89.453 (88.807)	lr 0.00107
Train [105][290/3239]	Time 0.376 (0.826)	Data Time 0.001 (0.145)	Loss 2.2711 (2.1869)	Entropy 0.72144 (0.72175)	Top-1 acc 67.969 (71.843)	Top-5 acc 86.719 (88.786)	lr 0.00107
Train [105][300/3239]	Time 0.233 (0.815)	Data Time 0.001 (0.140)	Loss 2.1736 (2.1871)	Entropy 0.72142 (0.72174)	Top-1 acc 72.656 (71.843)	Top-5 acc 87.500 (88.790)	lr 0.00107
Train [105][310/3239]	Time 0.230 (0.804)	Data Time 0.001 (0.136)	Loss 2.0876 (2.1875)	Entropy 0.72143 (0.72173)	Top-1 acc 77.344 (71.854)	Top-5 acc 91.016 (88.784)	lr 0.00107
Train [105][320/3239]	Time 0.227 (0.794)	Data Time 0.001 (0.132)	Loss 2.0990 (2.1879)	Entropy 0.72138 (0.72172)	Top-1 acc 74.219 (71.881)	Top-5 acc 89.453 (88.751)	lr 0.00107
Train [105][330/3239]	Time 0.295 (0.784)	Data Time 0.001 (0.128)	Loss 2.3236 (2.1879)	Entropy 0.72128 (0.72171)	Top-1 acc 68.359 (71.893)	Top-5 acc 84.375 (88.762)	lr 0.00107
Train [105][340/3239]	Time 2.705 (0.776)	Data Time 0.002 (0.124)	Loss 2.0626 (2.1870)	Entropy 0.72128 (0.72170)	Top-1 acc 74.219 (71.954)	Top-5 acc 91.016 (88.792)	lr 0.00107
Train [105][350/3239]	Time 0.251 (0.761)	Data Time 0.002 (0.121)	Loss 2.1912 (2.1861)	Entropy 0.72122 (0.72168)	Top-1 acc 69.922 (71.961)	Top-5 acc 90.234 (88.820)	lr 0.00107
Train [105][360/3239]	Time 0.240 (0.754)	Data Time 0.001 (0.117)	Loss 2.2167 (2.1858)	Entropy 0.72122 (0.72167)	Top-1 acc 70.312 (71.957)	Top-5 acc 87.109 (88.830)	lr 0.00107
Train [105][370/3239]	Time 0.224 (0.746)	Data Time 0.001 (0.114)	Loss 2.2671 (2.1848)	Entropy 0.72121 (0.72166)	Top-1 acc 68.359 (71.990)	Top-5 acc 85.547 (88.852)	lr 0.00107
Train [105][380/3239]	Time 0.221 (0.740)	Data Time 0.001 (0.111)	Loss 2.4353 (2.1855)	Entropy 0.72120 (0.72165)	Top-1 acc 67.578 (71.984)	Top-5 acc 85.156 (88.840)	lr 0.00107
Train [105][390/3239]	Time 0.240 (0.733)	Data Time 0.002 (0.108)	Loss 2.1914 (2.1852)	Entropy 0.72121 (0.72164)	Top-1 acc 71.094 (71.984)	Top-5 acc 88.281 (88.834)	lr 0.00106
Train [105][400/3239]	Time 0.235 (0.727)	Data Time 0.001 (0.106)	Loss 2.1954 (2.1856)	Entropy 0.72116 (0.72162)	Top-1 acc 73.047 (71.977)	Top-5 acc 89.062 (88.823)	lr 0.00106
Train [105][410/3239]	Time 0.226 (0.721)	Data Time 0.001 (0.103)	Loss 2.2394 (2.1853)	Entropy 0.72115 (0.72161)	Top-1 acc 71.875 (71.981)	Top-5 acc 89.062 (88.843)	lr 0.00106
Train [105][420/3239]	Time 0.236 (0.715)	Data Time 0.001 (0.101)	Loss 2.3030 (2.1856)	Entropy 0.72118 (0.72160)	Top-1 acc 69.141 (71.955)	Top-5 acc 86.719 (88.839)	lr 0.00106
Train [105][430/3239]	Time 0.234 (0.710)	Data Time 0.001 (0.098)	Loss 2.1765 (2.1868)	Entropy 0.72118 (0.72159)	Top-1 acc 73.828 (71.932)	Top-5 acc 89.453 (88.824)	lr 0.00106
Train [105][440/3239]	Time 0.239 (0.704)	Data Time 0.002 (0.096)	Loss 2.1230 (2.1858)	Entropy 0.72117 (0.72158)	Top-1 acc 73.047 (71.985)	Top-5 acc 91.016 (88.850)	lr 0.00106
Train [105][450/3239]	Time 2.524 (0.699)	Data Time 0.001 (0.094)	Loss 2.1534 (2.1851)	Entropy 0.72117 (0.72157)	Top-1 acc 73.438 (72.010)	Top-5 acc 90.625 (88.864)	lr 0.00106
Train [105][460/3239]	Time 0.368 (0.690)	Data Time 0.001 (0.092)	Loss 2.1003 (2.1862)	Entropy 0.72125 (0.72157)	Top-1 acc 72.266 (71.987)	Top-5 acc 91.406 (88.846)	lr 0.00106
Train [105][470/3239]	Time 0.235 (0.685)	Data Time 0.001 (0.090)	Loss 2.1066 (2.1862)	Entropy 0.72122 (0.72156)	Top-1 acc 73.438 (71.988)	Top-5 acc 91.406 (88.840)	lr 0.00106
Train [105][480/3239]	Time 0.232 (0.681)	Data Time 0.001 (0.088)	Loss 1.9814 (2.1863)	Entropy 0.72121 (0.72155)	Top-1 acc 77.734 (71.980)	Top-5 acc 92.969 (88.851)	lr 0.00106
Train [105][490/3239]	Time 0.272 (0.677)	Data Time 0.001 (0.087)	Loss 2.1171 (2.1855)	Entropy 0.72121 (0.72155)	Top-1 acc 77.344 (72.001)	Top-5 acc 90.625 (88.872)	lr 0.00106
Train [105][500/3239]	Time 0.321 (0.673)	Data Time 0.001 (0.085)	Loss 2.0940 (2.1842)	Entropy 0.72115 (0.72154)	Top-1 acc 76.562 (72.022)	Top-5 acc 90.234 (88.893)	lr 0.00106
Train [105][510/3239]	Time 0.225 (0.669)	Data Time 0.001 (0.083)	Loss 1.9506 (2.1834)	Entropy 0.72108 (0.72153)	Top-1 acc 80.469 (72.046)	Top-5 acc 91.406 (88.901)	lr 0.00106
Train [105][520/3239]	Time 0.227 (0.666)	Data Time 0.001 (0.082)	Loss 2.2594 (2.1847)	Entropy 0.72110 (0.72152)	Top-1 acc 71.875 (72.010)	Top-5 acc 86.328 (88.881)	lr 0.00106
Train [105][530/3239]	Time 0.229 (0.662)	Data Time 0.001 (0.080)	Loss 2.0640 (2.1844)	Entropy 0.72110 (0.72151)	Top-1 acc 75.391 (72.027)	Top-5 acc 92.969 (88.885)	lr 0.00106
Train [105][540/3239]	Time 0.324 (0.659)	Data Time 0.003 (0.079)	Loss 2.1847 (2.1834)	Entropy 0.72102 (0.72151)	Top-1 acc 75.000 (72.067)	Top-5 acc 87.500 (88.901)	lr 0.00106
Train [105][550/3239]	Time 0.228 (0.656)	Data Time 0.001 (0.077)	Loss 2.1324 (2.1841)	Entropy 0.72100 (0.72150)	Top-1 acc 77.734 (72.071)	Top-5 acc 88.672 (88.884)	lr 0.00106
Train [105][560/3239]	Time 2.652 (0.653)	Data Time 0.001 (0.076)	Loss 2.2168 (2.1851)	Entropy 0.72100 (0.72149)	Top-1 acc 72.266 (72.045)	Top-5 acc 86.719 (88.859)	lr 0.00106
Train [105][570/3239]	Time 0.272 (0.646)	Data Time 0.002 (0.075)	Loss 2.0781 (2.1849)	Entropy 0.72097 (0.72148)	Top-1 acc 74.219 (72.041)	Top-5 acc 89.844 (88.855)	lr 0.00106
Train [105][580/3239]	Time 0.231 (0.643)	Data Time 0.001 (0.073)	Loss 2.0946 (2.1853)	Entropy 0.72096 (0.72147)	Top-1 acc 72.656 (72.038)	Top-5 acc 91.797 (88.853)	lr 0.00106
Train [105][590/3239]	Time 0.233 (0.640)	Data Time 0.001 (0.072)	Loss 2.2881 (2.1855)	Entropy 0.72092 (0.72146)	Top-1 acc 70.703 (72.038)	Top-5 acc 88.281 (88.856)	lr 0.00106
Train [105][600/3239]	Time 0.244 (0.637)	Data Time 0.002 (0.071)	Loss 2.1910 (2.1847)	Entropy 0.72087 (0.72145)	Top-1 acc 71.094 (72.057)	Top-5 acc 89.453 (88.869)	lr 0.00106
Train [105][610/3239]	Time 0.278 (0.728)	Data Time 0.002 (0.070)	Loss 2.1000 (2.1849)	Entropy 0.72077 (0.72144)	Top-1 acc 76.172 (72.048)	Top-5 acc 90.234 (88.866)	lr 0.00106
Train [105][620/3239]	Time 0.240 (0.724)	Data Time 0.002 (0.069)	Loss 2.1988 (2.1852)	Entropy 0.72077 (0.72143)	Top-1 acc 70.312 (72.034)	Top-5 acc 86.719 (88.854)	lr 0.00106
Train [105][630/3239]	Time 0.229 (0.720)	Data Time 0.001 (0.068)	Loss 2.3893 (2.1882)	Entropy 0.72069 (0.72142)	Top-1 acc 68.750 (71.976)	Top-5 acc 83.594 (88.817)	lr 0.00105
Train [105][640/3239]	Time 0.238 (0.716)	Data Time 0.002 (0.067)	Loss 2.1911 (2.1883)	Entropy 0.72068 (0.72141)	Top-1 acc 71.484 (71.984)	Top-5 acc 88.672 (88.815)	lr 0.00105
Train [105][650/3239]	Time 0.219 (0.713)	Data Time 0.001 (0.066)	Loss 2.1384 (2.1874)	Entropy 0.72080 (0.72140)	Top-1 acc 71.094 (71.993)	Top-5 acc 91.406 (88.827)	lr 0.00105
Train [105][660/3239]	Time 0.246 (0.709)	Data Time 0.001 (0.065)	Loss 2.1157 (2.1867)	Entropy 0.72074 (0.72139)	Top-1 acc 74.609 (72.017)	Top-5 acc 89.453 (88.831)	lr 0.00105
Train [105][670/3239]	Time 2.705 (0.706)	Data Time 0.001 (0.064)	Loss 2.3064 (2.1873)	Entropy 0.72074 (0.72138)	Top-1 acc 66.797 (72.004)	Top-5 acc 88.281 (88.816)	lr 0.00105
Train [105][680/3239]	Time 0.249 (0.699)	Data Time 0.001 (0.063)	Loss 2.2769 (2.1878)	Entropy 0.72069 (0.72137)	Top-1 acc 72.656 (72.003)	Top-5 acc 87.109 (88.804)	lr 0.00105
Train [105][690/3239]	Time 0.230 (0.696)	Data Time 0.001 (0.062)	Loss 2.2106 (2.1878)	Entropy 0.72067 (0.72136)	Top-1 acc 71.875 (72.005)	Top-5 acc 88.672 (88.808)	lr 0.00105
Train [105][700/3239]	Time 0.226 (0.693)	Data Time 0.001 (0.061)	Loss 2.1726 (2.1882)	Entropy 0.72065 (0.72135)	Top-1 acc 69.922 (71.985)	Top-5 acc 87.500 (88.795)	lr 0.00105
Train [105][710/3239]	Time 0.310 (0.690)	Data Time 0.001 (0.060)	Loss 2.1818 (2.1880)	Entropy 0.72060 (0.72134)	Top-1 acc 71.094 (71.995)	Top-5 acc 91.016 (88.794)	lr 0.00105
Train [105][720/3239]	Time 0.238 (0.687)	Data Time 0.002 (0.060)	Loss 2.3367 (2.1881)	Entropy 0.72060 (0.72133)	Top-1 acc 67.188 (71.973)	Top-5 acc 84.766 (88.796)	lr 0.00105
Train [105][730/3239]	Time 0.216 (0.684)	Data Time 0.001 (0.059)	Loss 2.4443 (2.1887)	Entropy 0.72060 (0.72132)	Top-1 acc 63.281 (71.951)	Top-5 acc 84.375 (88.784)	lr 0.00105
Train [105][740/3239]	Time 0.269 (0.681)	Data Time 0.001 (0.058)	Loss 2.2944 (2.1888)	Entropy 0.72060 (0.72131)	Top-1 acc 69.141 (71.942)	Top-5 acc 86.328 (88.777)	lr 0.00105
Train [105][750/3239]	Time 0.325 (0.679)	Data Time 0.002 (0.057)	Loss 2.3260 (2.1886)	Entropy 0.72051 (0.72130)	Top-1 acc 67.188 (71.950)	Top-5 acc 89.062 (88.777)	lr 0.00105
Train [105][760/3239]	Time 0.243 (0.676)	Data Time 0.001 (0.057)	Loss 2.1055 (2.1881)	Entropy 0.72043 (0.72129)	Top-1 acc 73.828 (71.956)	Top-5 acc 91.016 (88.788)	lr 0.00105
Train [105][770/3239]	Time 0.259 (0.674)	Data Time 0.001 (0.056)	Loss 2.2774 (2.1886)	Entropy 0.72038 (0.72128)	Top-1 acc 70.703 (71.954)	Top-5 acc 85.938 (88.781)	lr 0.00105
Train [105][780/3239]	Time 2.573 (0.671)	Data Time 0.001 (0.055)	Loss 2.0577 (2.1887)	Entropy 0.72038 (0.72127)	Top-1 acc 73.047 (71.948)	Top-5 acc 91.016 (88.777)	lr 0.00105
Train [105][790/3239]	Time 0.265 (0.666)	Data Time 0.001 (0.054)	Loss 2.0341 (2.1892)	Entropy 0.72061 (0.72126)	Top-1 acc 76.172 (71.931)	Top-5 acc 90.625 (88.766)	lr 0.00105
Train [105][800/3239]	Time 0.225 (0.664)	Data Time 0.001 (0.054)	Loss 2.1352 (2.1890)	Entropy 0.72062 (0.72125)	Top-1 acc 75.781 (71.939)	Top-5 acc 89.453 (88.764)	lr 0.00105
Train [105][810/3239]	Time 0.228 (0.661)	Data Time 0.001 (0.053)	Loss 2.1229 (2.1888)	Entropy 0.72061 (0.72124)	Top-1 acc 74.219 (71.945)	Top-5 acc 89.062 (88.768)	lr 0.00105
Train [105][820/3239]	Time 0.230 (0.659)	Data Time 0.001 (0.052)	Loss 2.1605 (2.1884)	Entropy 0.72059 (0.72123)	Top-1 acc 74.219 (71.966)	Top-5 acc 88.281 (88.771)	lr 0.00105
Train [105][830/3239]	Time 0.229 (0.657)	Data Time 0.001 (0.052)	Loss 2.2112 (2.1881)	Entropy 0.72050 (0.72122)	Top-1 acc 75.391 (71.978)	Top-5 acc 88.281 (88.781)	lr 0.00105
Train [105][840/3239]	Time 0.236 (0.655)	Data Time 0.001 (0.051)	Loss 2.2232 (2.1880)	Entropy 0.72047 (0.72122)	Top-1 acc 68.359 (71.981)	Top-5 acc 88.281 (88.787)	lr 0.00105
Train [105][850/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.051)	Loss 2.0544 (2.1879)	Entropy 0.72049 (0.72121)	Top-1 acc 75.781 (71.976)	Top-5 acc 91.016 (88.788)	lr 0.00105
Train [105][860/3239]	Time 0.223 (0.651)	Data Time 0.002 (0.050)	Loss 2.1896 (2.1877)	Entropy 0.72055 (0.72120)	Top-1 acc 68.750 (71.968)	Top-5 acc 89.844 (88.795)	lr 0.00105
Train [105][870/3239]	Time 0.215 (0.649)	Data Time 0.001 (0.050)	Loss 2.2382 (2.1879)	Entropy 0.72058 (0.72119)	Top-1 acc 67.578 (71.959)	Top-5 acc 88.281 (88.796)	lr 0.00105
Train [105][880/3239]	Time 0.295 (0.647)	Data Time 0.001 (0.049)	Loss 2.3916 (2.1875)	Entropy 0.72054 (0.72118)	Top-1 acc 64.062 (71.966)	Top-5 acc 87.500 (88.810)	lr 0.00104
Train [105][890/3239]	Time 2.550 (0.645)	Data Time 0.001 (0.048)	Loss 2.2075 (2.1889)	Entropy 0.72054 (0.72118)	Top-1 acc 68.750 (71.943)	Top-5 acc 87.891 (88.793)	lr 0.00104
Train [105][900/3239]	Time 0.239 (0.640)	Data Time 0.001 (0.048)	Loss 2.2954 (2.1890)	Entropy 0.72046 (0.72117)	Top-1 acc 69.531 (71.933)	Top-5 acc 87.891 (88.790)	lr 0.00104
Train [105][910/3239]	Time 0.220 (0.639)	Data Time 0.001 (0.047)	Loss 2.0963 (2.1887)	Entropy 0.72034 (0.72116)	Top-1 acc 75.391 (71.940)	Top-5 acc 90.234 (88.794)	lr 0.00104
Train [105][920/3239]	Time 0.326 (0.637)	Data Time 0.001 (0.047)	Loss 2.1005 (2.1886)	Entropy 0.72028 (0.72115)	Top-1 acc 75.000 (71.948)	Top-5 acc 91.016 (88.796)	lr 0.00104
Train [105][930/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.046)	Loss 2.1508 (2.1886)	Entropy 0.72029 (0.72114)	Top-1 acc 72.266 (71.947)	Top-5 acc 87.500 (88.801)	lr 0.00104
Train [105][940/3239]	Time 0.247 (0.633)	Data Time 0.002 (0.046)	Loss 2.2145 (2.1885)	Entropy 0.72024 (0.72113)	Top-1 acc 74.609 (71.949)	Top-5 acc 87.891 (88.802)	lr 0.00104
Train [105][950/3239]	Time 0.232 (0.632)	Data Time 0.001 (0.046)	Loss 2.2609 (2.1880)	Entropy 0.72017 (0.72112)	Top-1 acc 68.359 (71.949)	Top-5 acc 89.062 (88.811)	lr 0.00104
Train [105][960/3239]	Time 0.256 (0.630)	Data Time 0.001 (0.045)	Loss 2.3949 (2.1875)	Entropy 0.72021 (0.72111)	Top-1 acc 67.188 (71.962)	Top-5 acc 84.766 (88.816)	lr 0.00104
Train [105][970/3239]	Time 0.388 (0.682)	Data Time 0.003 (0.045)	Loss 2.4055 (2.1877)	Entropy 0.72011 (0.72110)	Top-1 acc 63.281 (71.959)	Top-5 acc 84.766 (88.811)	lr 0.00104
Train [105][980/3239]	Time 0.236 (0.682)	Data Time 0.002 (0.044)	Loss 2.3688 (2.1877)	Entropy 0.72002 (0.72109)	Top-1 acc 65.234 (71.959)	Top-5 acc 84.375 (88.809)	lr 0.00104
Train [105][990/3239]	Time 0.256 (0.681)	Data Time 0.002 (0.044)	Loss 2.2990 (2.1877)	Entropy 0.71985 (0.72108)	Top-1 acc 71.094 (71.958)	Top-5 acc 85.938 (88.803)	lr 0.00104
Train [105][1000/3239]	Time 2.716 (0.679)	Data Time 0.001 (0.043)	Loss 2.1157 (2.1871)	Entropy 0.71985 (0.72107)	Top-1 acc 73.438 (71.978)	Top-5 acc 87.891 (88.808)	lr 0.00104
Train [105][1010/3239]	Time 0.244 (0.674)	Data Time 0.002 (0.043)	Loss 2.1022 (2.1878)	Entropy 0.71968 (0.72106)	Top-1 acc 74.219 (71.963)	Top-5 acc 90.625 (88.792)	lr 0.00104
Train [105][1020/3239]	Time 0.215 (0.673)	Data Time 0.001 (0.043)	Loss 2.1016 (2.1879)	Entropy 0.71964 (0.72104)	Top-1 acc 72.266 (71.965)	Top-5 acc 91.016 (88.786)	lr 0.00104
Train [105][1030/3239]	Time 0.229 (0.671)	Data Time 0.001 (0.042)	Loss 2.2747 (2.1880)	Entropy 0.71956 (0.72103)	Top-1 acc 70.703 (71.964)	Top-5 acc 86.328 (88.786)	lr 0.00104
Train [105][1040/3239]	Time 0.237 (0.669)	Data Time 0.001 (0.042)	Loss 2.2630 (2.1881)	Entropy 0.71946 (0.72101)	Top-1 acc 72.266 (71.962)	Top-5 acc 87.891 (88.784)	lr 0.00104
Train [105][1050/3239]	Time 0.236 (0.667)	Data Time 0.001 (0.041)	Loss 2.0254 (2.1880)	Entropy 0.71944 (0.72100)	Top-1 acc 74.219 (71.963)	Top-5 acc 91.797 (88.788)	lr 0.00104
Train [105][1060/3239]	Time 0.235 (0.665)	Data Time 0.001 (0.041)	Loss 2.4302 (2.1882)	Entropy 0.71945 (0.72098)	Top-1 acc 65.625 (71.952)	Top-5 acc 84.766 (88.783)	lr 0.00104
Train [105][1070/3239]	Time 0.233 (0.664)	Data Time 0.001 (0.041)	Loss 2.2586 (2.1883)	Entropy 0.71943 (0.72097)	Top-1 acc 70.312 (71.956)	Top-5 acc 87.109 (88.779)	lr 0.00104
Train [105][1080/3239]	Time 0.236 (0.662)	Data Time 0.001 (0.040)	Loss 2.3382 (2.1884)	Entropy 0.71941 (0.72096)	Top-1 acc 67.969 (71.953)	Top-5 acc 87.500 (88.781)	lr 0.00104
Train [105][1090/3239]	Time 0.229 (0.660)	Data Time 0.001 (0.040)	Loss 2.0683 (2.1885)	Entropy 0.71937 (0.72094)	Top-1 acc 71.484 (71.959)	Top-5 acc 90.234 (88.779)	lr 0.00104
Train [105][1100/3239]	Time 0.239 (0.659)	Data Time 0.002 (0.040)	Loss 2.1970 (2.1885)	Entropy 0.71935 (0.72093)	Top-1 acc 69.922 (71.952)	Top-5 acc 89.844 (88.782)	lr 0.00104
Train [105][1110/3239]	Time 2.487 (0.657)	Data Time 0.001 (0.039)	Loss 2.1516 (2.1881)	Entropy 0.71935 (0.72091)	Top-1 acc 71.094 (71.956)	Top-5 acc 91.797 (88.786)	lr 0.00104
Train [105][1120/3239]	Time 0.234 (0.653)	Data Time 0.001 (0.039)	Loss 2.1175 (2.1887)	Entropy 0.71929 (0.72090)	Top-1 acc 76.562 (71.945)	Top-5 acc 90.625 (88.778)	lr 0.00104
Train [105][1130/3239]	Time 0.333 (0.652)	Data Time 0.001 (0.039)	Loss 2.2225 (2.1883)	Entropy 0.71921 (0.72088)	Top-1 acc 72.266 (71.944)	Top-5 acc 89.453 (88.782)	lr 0.00103
Train [105][1140/3239]	Time 0.247 (0.650)	Data Time 0.002 (0.038)	Loss 2.1972 (2.1886)	Entropy 0.71913 (0.72087)	Top-1 acc 71.094 (71.931)	Top-5 acc 90.234 (88.781)	lr 0.00103
Train [105][1150/3239]	Time 0.221 (0.649)	Data Time 0.001 (0.038)	Loss 2.3677 (2.1886)	Entropy 0.71909 (0.72085)	Top-1 acc 66.406 (71.927)	Top-5 acc 85.938 (88.782)	lr 0.00103
Train [105][1160/3239]	Time 0.216 (0.648)	Data Time 0.001 (0.038)	Loss 2.1810 (2.1888)	Entropy 0.71896 (0.72084)	Top-1 acc 73.438 (71.924)	Top-5 acc 88.281 (88.777)	lr 0.00103
Train [105][1170/3239]	Time 0.318 (0.646)	Data Time 0.001 (0.037)	Loss 2.1168 (2.1885)	Entropy 0.71897 (0.72082)	Top-1 acc 71.875 (71.933)	Top-5 acc 90.625 (88.788)	lr 0.00103
Train [105][1180/3239]	Time 0.245 (0.645)	Data Time 0.001 (0.037)	Loss 2.2693 (2.1884)	Entropy 0.71900 (0.72081)	Top-1 acc 69.922 (71.939)	Top-5 acc 87.109 (88.788)	lr 0.00103
Train [105][1190/3239]	Time 0.238 (0.643)	Data Time 0.001 (0.037)	Loss 2.1786 (2.1881)	Entropy 0.71918 (0.72079)	Top-1 acc 73.438 (71.949)	Top-5 acc 89.062 (88.797)	lr 0.00103
Train [105][1200/3239]	Time 0.234 (0.642)	Data Time 0.002 (0.036)	Loss 2.3261 (2.1881)	Entropy 0.71915 (0.72078)	Top-1 acc 67.578 (71.941)	Top-5 acc 85.547 (88.795)	lr 0.00103
Train [105][1210/3239]	Time 0.322 (0.641)	Data Time 0.001 (0.036)	Loss 2.1891 (2.1882)	Entropy 0.71907 (0.72076)	Top-1 acc 71.484 (71.938)	Top-5 acc 89.062 (88.794)	lr 0.00103
Train [105][1220/3239]	Time 2.615 (0.639)	Data Time 0.001 (0.036)	Loss 2.1383 (2.1881)	Entropy 0.71907 (0.72075)	Top-1 acc 73.438 (71.936)	Top-5 acc 90.234 (88.798)	lr 0.00103
Train [105][1230/3239]	Time 0.282 (0.636)	Data Time 0.001 (0.036)	Loss 2.1162 (2.1877)	Entropy 0.71896 (0.72074)	Top-1 acc 73.047 (71.946)	Top-5 acc 91.016 (88.810)	lr 0.00103
Train [105][1240/3239]	Time 0.247 (0.635)	Data Time 0.002 (0.035)	Loss 2.3734 (2.1880)	Entropy 0.71892 (0.72072)	Top-1 acc 66.406 (71.931)	Top-5 acc 85.547 (88.807)	lr 0.00103
Train [105][1250/3239]	Time 0.237 (0.634)	Data Time 0.001 (0.035)	Loss 2.0787 (2.1882)	Entropy 0.71901 (0.72071)	Top-1 acc 76.172 (71.929)	Top-5 acc 92.188 (88.801)	lr 0.00103
Train [105][1260/3239]	Time 0.235 (0.632)	Data Time 0.001 (0.035)	Loss 2.1808 (2.1881)	Entropy 0.71901 (0.72069)	Top-1 acc 76.562 (71.933)	Top-5 acc 90.234 (88.805)	lr 0.00103
Train [105][1270/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.034)	Loss 2.1112 (2.1877)	Entropy 0.71896 (0.72068)	Top-1 acc 73.828 (71.943)	Top-5 acc 89.453 (88.813)	lr 0.00103
Train [105][1280/3239]	Time 0.231 (0.630)	Data Time 0.001 (0.034)	Loss 2.2032 (2.1876)	Entropy 0.71917 (0.72067)	Top-1 acc 70.312 (71.944)	Top-5 acc 88.281 (88.817)	lr 0.00103
Train [105][1290/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.034)	Loss 2.1700 (2.1879)	Entropy 0.71915 (0.72066)	Top-1 acc 73.047 (71.930)	Top-5 acc 89.844 (88.813)	lr 0.00103
Train [105][1300/3239]	Time 0.236 (0.628)	Data Time 0.001 (0.034)	Loss 2.2141 (2.1877)	Entropy 0.71911 (0.72064)	Top-1 acc 73.047 (71.934)	Top-5 acc 88.281 (88.820)	lr 0.00103
Train [105][1310/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.033)	Loss 2.1700 (2.1878)	Entropy 0.71902 (0.72063)	Top-1 acc 71.484 (71.936)	Top-5 acc 87.500 (88.810)	lr 0.00103
Train [105][1320/3239]	Time 0.264 (0.626)	Data Time 0.001 (0.033)	Loss 2.3229 (2.1880)	Entropy 0.71897 (0.72062)	Top-1 acc 66.406 (71.933)	Top-5 acc 86.328 (88.808)	lr 0.00103
Train [105][1330/3239]	Time 55.190 (0.664)	Data Time 0.001 (0.033)	Loss 2.0961 (2.1878)	Entropy 0.71897 (0.72061)	Top-1 acc 73.047 (71.935)	Top-5 acc 90.625 (88.810)	lr 0.00103
Train [105][1340/3239]	Time 0.377 (0.661)	Data Time 0.004 (0.033)	Loss 2.2536 (2.1877)	Entropy 0.71886 (0.72059)	Top-1 acc 67.578 (71.934)	Top-5 acc 87.500 (88.810)	lr 0.00103
Train [105][1350/3239]	Time 0.248 (0.660)	Data Time 0.002 (0.033)	Loss 2.1712 (2.1879)	Entropy 0.71882 (0.72058)	Top-1 acc 72.656 (71.933)	Top-5 acc 89.844 (88.807)	lr 0.00103
Train [105][1360/3239]	Time 0.223 (0.659)	Data Time 0.001 (0.032)	Loss 2.2905 (2.1881)	Entropy 0.71874 (0.72057)	Top-1 acc 66.016 (71.929)	Top-5 acc 85.938 (88.801)	lr 0.00103
Train [105][1370/3239]	Time 0.232 (0.657)	Data Time 0.001 (0.032)	Loss 2.2815 (2.1881)	Entropy 0.71875 (0.72055)	Top-1 acc 67.969 (71.917)	Top-5 acc 86.719 (88.801)	lr 0.00103
Train [105][1380/3239]	Time 0.346 (0.656)	Data Time 0.001 (0.032)	Loss 2.1572 (2.1881)	Entropy 0.71866 (0.72054)	Top-1 acc 74.609 (71.919)	Top-5 acc 89.453 (88.801)	lr 0.00102
Train [105][1390/3239]	Time 0.238 (0.655)	Data Time 0.001 (0.032)	Loss 2.1336 (2.1881)	Entropy 0.71862 (0.72053)	Top-1 acc 73.828 (71.923)	Top-5 acc 89.062 (88.804)	lr 0.00102
Train [105][1400/3239]	Time 0.233 (0.654)	Data Time 0.001 (0.031)	Loss 2.3236 (2.1879)	Entropy 0.71859 (0.72051)	Top-1 acc 66.016 (71.934)	Top-5 acc 85.938 (88.804)	lr 0.00102
Train [105][1410/3239]	Time 0.238 (0.652)	Data Time 0.001 (0.031)	Loss 2.0402 (2.1876)	Entropy 0.71859 (0.72050)	Top-1 acc 76.953 (71.941)	Top-5 acc 91.016 (88.809)	lr 0.00102
Train [105][1420/3239]	Time 0.310 (0.651)	Data Time 0.001 (0.031)	Loss 2.1955 (2.1874)	Entropy 0.71848 (0.72049)	Top-1 acc 72.266 (71.948)	Top-5 acc 89.062 (88.810)	lr 0.00102
Train [105][1430/3239]	Time 0.232 (0.650)	Data Time 0.001 (0.031)	Loss 2.1617 (2.1887)	Entropy 0.71839 (0.72047)	Top-1 acc 72.266 (71.924)	Top-5 acc 89.844 (88.796)	lr 0.00102
Train [105][1440/3239]	Time 2.517 (0.649)	Data Time 0.002 (0.031)	Loss 2.2925 (2.1889)	Entropy 0.71839 (0.72046)	Top-1 acc 70.312 (71.921)	Top-5 acc 86.328 (88.795)	lr 0.00102
Train [105][1450/3239]	Time 0.295 (0.646)	Data Time 0.001 (0.030)	Loss 2.2819 (2.1888)	Entropy 0.71826 (0.72044)	Top-1 acc 68.359 (71.925)	Top-5 acc 87.891 (88.792)	lr 0.00102
Train [105][1460/3239]	Time 0.247 (0.645)	Data Time 0.001 (0.030)	Loss 2.1885 (2.1886)	Entropy 0.71821 (0.72043)	Top-1 acc 73.047 (71.937)	Top-5 acc 88.672 (88.793)	lr 0.00102
Train [105][1470/3239]	Time 0.234 (0.644)	Data Time 0.001 (0.030)	Loss 2.1345 (2.1889)	Entropy 0.71814 (0.72041)	Top-1 acc 74.219 (71.932)	Top-5 acc 91.406 (88.788)	lr 0.00102
Train [105][1480/3239]	Time 0.237 (0.643)	Data Time 0.001 (0.030)	Loss 2.3038 (2.1889)	Entropy 0.71822 (0.72040)	Top-1 acc 70.703 (71.930)	Top-5 acc 85.938 (88.785)	lr 0.00102
Train [105][1490/3239]	Time 0.246 (0.642)	Data Time 0.001 (0.030)	Loss 2.2596 (2.1889)	Entropy 0.71826 (0.72038)	Top-1 acc 69.141 (71.934)	Top-5 acc 90.625 (88.787)	lr 0.00102
Train [105][1500/3239]	Time 0.240 (0.641)	Data Time 0.001 (0.029)	Loss 2.1122 (2.1886)	Entropy 0.71823 (0.72037)	Top-1 acc 74.219 (71.937)	Top-5 acc 90.625 (88.793)	lr 0.00102
Train [105][1510/3239]	Time 0.239 (0.640)	Data Time 0.001 (0.029)	Loss 2.1199 (2.1886)	Entropy 0.71819 (0.72035)	Top-1 acc 74.609 (71.938)	Top-5 acc 90.234 (88.792)	lr 0.00102
Train [105][1520/3239]	Time 0.224 (0.639)	Data Time 0.001 (0.029)	Loss 2.3770 (2.1888)	Entropy 0.71825 (0.72034)	Top-1 acc 68.359 (71.935)	Top-5 acc 82.031 (88.785)	lr 0.00102
Train [105][1530/3239]	Time 0.233 (0.637)	Data Time 0.001 (0.029)	Loss 2.2393 (2.1887)	Entropy 0.71818 (0.72033)	Top-1 acc 73.438 (71.941)	Top-5 acc 88.672 (88.789)	lr 0.00102
Train [105][1540/3239]	Time 0.231 (0.636)	Data Time 0.002 (0.029)	Loss 2.2507 (2.1887)	Entropy 0.71817 (0.72031)	Top-1 acc 73.438 (71.950)	Top-5 acc 90.234 (88.793)	lr 0.00102
Train [105][1550/3239]	Time 2.639 (0.636)	Data Time 0.001 (0.029)	Loss 2.0731 (2.1881)	Entropy 0.71817 (0.72030)	Top-1 acc 74.219 (71.962)	Top-5 acc 91.406 (88.804)	lr 0.00102
Train [105][1560/3239]	Time 0.242 (0.633)	Data Time 0.001 (0.028)	Loss 2.2572 (2.1881)	Entropy 0.71821 (0.72029)	Top-1 acc 67.969 (71.956)	Top-5 acc 87.891 (88.805)	lr 0.00102
Train [105][1570/3239]	Time 0.238 (0.632)	Data Time 0.001 (0.028)	Loss 2.2128 (2.1880)	Entropy 0.71823 (0.72027)	Top-1 acc 71.875 (71.958)	Top-5 acc 87.891 (88.805)	lr 0.00102
Train [105][1580/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.028)	Loss 2.1579 (2.1878)	Entropy 0.71819 (0.72026)	Top-1 acc 71.484 (71.962)	Top-5 acc 89.844 (88.810)	lr 0.00102
Train [105][1590/3239]	Time 0.351 (0.630)	Data Time 0.002 (0.028)	Loss 2.2220 (2.1877)	Entropy 0.71814 (0.72025)	Top-1 acc 71.484 (71.962)	Top-5 acc 87.500 (88.813)	lr 0.00102
Train [105][1600/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.028)	Loss 2.3060 (2.1878)	Entropy 0.71812 (0.72023)	Top-1 acc 69.141 (71.960)	Top-5 acc 86.328 (88.812)	lr 0.00102
Train [105][1610/3239]	Time 0.234 (0.628)	Data Time 0.001 (0.028)	Loss 2.1738 (2.1877)	Entropy 0.71815 (0.72022)	Top-1 acc 72.656 (71.966)	Top-5 acc 88.281 (88.811)	lr 0.00102
Train [105][1620/3239]	Time 0.245 (0.627)	Data Time 0.001 (0.027)	Loss 2.0081 (2.1876)	Entropy 0.71811 (0.72021)	Top-1 acc 75.781 (71.960)	Top-5 acc 92.188 (88.816)	lr 0.00102
Train [105][1630/3239]	Time 0.347 (0.627)	Data Time 0.002 (0.027)	Loss 2.2210 (2.1879)	Entropy 0.71810 (0.72019)	Top-1 acc 70.312 (71.956)	Top-5 acc 88.672 (88.811)	lr 0.00101
Train [105][1640/3239]	Time 0.224 (0.626)	Data Time 0.001 (0.027)	Loss 2.2700 (2.1879)	Entropy 0.71813 (0.72018)	Top-1 acc 69.141 (71.959)	Top-5 acc 86.328 (88.810)	lr 0.00101
Train [105][1650/3239]	Time 0.266 (0.625)	Data Time 0.001 (0.027)	Loss 2.2029 (2.1879)	Entropy 0.71804 (0.72017)	Top-1 acc 72.266 (71.960)	Top-5 acc 87.109 (88.809)	lr 0.00101
Train [105][1660/3239]	Time 2.577 (0.624)	Data Time 0.003 (0.027)	Loss 2.2916 (2.1880)	Entropy 0.71804 (0.72016)	Top-1 acc 68.750 (71.952)	Top-5 acc 85.938 (88.803)	lr 0.00101
Train [105][1670/3239]	Time 0.328 (0.622)	Data Time 0.001 (0.027)	Loss 2.3240 (2.1883)	Entropy 0.71800 (0.72014)	Top-1 acc 70.703 (71.949)	Top-5 acc 87.109 (88.800)	lr 0.00101
Train [105][1680/3239]	Time 0.246 (0.621)	Data Time 0.001 (0.026)	Loss 2.1273 (2.1883)	Entropy 0.71791 (0.72013)	Top-1 acc 71.094 (71.949)	Top-5 acc 89.453 (88.797)	lr 0.00101
Train [105][1690/3239]	Time 0.239 (0.620)	Data Time 0.001 (0.026)	Loss 2.0563 (2.1881)	Entropy 0.71784 (0.72012)	Top-1 acc 76.953 (71.949)	Top-5 acc 91.016 (88.804)	lr 0.00101
Train [105][1700/3239]	Time 0.322 (0.652)	Data Time 0.002 (0.026)	Loss 2.2306 (2.1880)	Entropy 0.71782 (0.72010)	Top-1 acc 70.703 (71.950)	Top-5 acc 87.891 (88.807)	lr 0.00101
Train [105][1710/3239]	Time 0.230 (0.651)	Data Time 0.002 (0.026)	Loss 2.2698 (2.1881)	Entropy 0.71776 (0.72009)	Top-1 acc 71.094 (71.955)	Top-5 acc 86.328 (88.803)	lr 0.00101
Train [105][1720/3239]	Time 0.237 (0.650)	Data Time 0.001 (0.026)	Loss 2.1135 (2.1881)	Entropy 0.71775 (0.72008)	Top-1 acc 73.828 (71.955)	Top-5 acc 90.234 (88.804)	lr 0.00101
Train [105][1730/3239]	Time 0.228 (0.649)	Data Time 0.001 (0.026)	Loss 2.2488 (2.1880)	Entropy 0.71775 (0.72006)	Top-1 acc 69.922 (71.961)	Top-5 acc 86.719 (88.805)	lr 0.00101
Train [105][1740/3239]	Time 0.233 (0.648)	Data Time 0.001 (0.026)	Loss 2.1494 (2.1878)	Entropy 0.71774 (0.72005)	Top-1 acc 72.656 (71.960)	Top-5 acc 88.672 (88.808)	lr 0.00101
Train [105][1750/3239]	Time 0.240 (0.648)	Data Time 0.001 (0.025)	Loss 2.1705 (2.1877)	Entropy 0.71771 (0.72004)	Top-1 acc 71.094 (71.960)	Top-5 acc 89.844 (88.811)	lr 0.00101
Train [105][1760/3239]	Time 0.234 (0.647)	Data Time 0.001 (0.025)	Loss 2.1876 (2.1878)	Entropy 0.71764 (0.72002)	Top-1 acc 70.703 (71.958)	Top-5 acc 89.062 (88.809)	lr 0.00101
Train [105][1770/3239]	Time 2.584 (0.646)	Data Time 0.002 (0.025)	Loss 2.2182 (2.1877)	Entropy 0.71764 (0.72001)	Top-1 acc 70.312 (71.964)	Top-5 acc 88.281 (88.812)	lr 0.00101
Train [105][1780/3239]	Time 0.226 (0.644)	Data Time 0.001 (0.025)	Loss 2.0892 (2.1876)	Entropy 0.71776 (0.72000)	Top-1 acc 74.219 (71.968)	Top-5 acc 88.672 (88.812)	lr 0.00101
Train [105][1790/3239]	Time 0.236 (0.643)	Data Time 0.001 (0.025)	Loss 2.2415 (2.1878)	Entropy 0.71771 (0.71998)	Top-1 acc 71.875 (71.969)	Top-5 acc 87.891 (88.810)	lr 0.00101
Train [105][1800/3239]	Time 0.325 (0.642)	Data Time 0.001 (0.025)	Loss 2.2102 (2.1877)	Entropy 0.71770 (0.71997)	Top-1 acc 69.531 (71.973)	Top-5 acc 87.109 (88.810)	lr 0.00101
Train [105][1810/3239]	Time 0.239 (0.641)	Data Time 0.001 (0.025)	Loss 2.1549 (2.1875)	Entropy 0.71763 (0.71996)	Top-1 acc 73.828 (71.979)	Top-5 acc 89.062 (88.814)	lr 0.00101
Train [105][1820/3239]	Time 0.240 (0.640)	Data Time 0.001 (0.025)	Loss 2.3704 (2.1878)	Entropy 0.71757 (0.71995)	Top-1 acc 66.406 (71.969)	Top-5 acc 84.766 (88.807)	lr 0.00101
Train [105][1830/3239]	Time 0.220 (0.639)	Data Time 0.001 (0.024)	Loss 2.1241 (2.1877)	Entropy 0.71754 (0.71993)	Top-1 acc 75.000 (71.971)	Top-5 acc 88.281 (88.808)	lr 0.00101
Train [105][1840/3239]	Time 0.326 (0.639)	Data Time 0.001 (0.024)	Loss 2.1092 (2.1875)	Entropy 0.71752 (0.71992)	Top-1 acc 75.000 (71.980)	Top-5 acc 89.844 (88.809)	lr 0.00101
Train [105][1850/3239]	Time 0.253 (0.638)	Data Time 0.002 (0.024)	Loss 1.9333 (2.1871)	Entropy 0.71751 (0.71991)	Top-1 acc 78.516 (71.989)	Top-5 acc 93.359 (88.817)	lr 0.00101
Train [105][1860/3239]	Time 0.233 (0.637)	Data Time 0.002 (0.024)	Loss 2.3088 (2.1871)	Entropy 0.71752 (0.71989)	Top-1 acc 70.312 (71.989)	Top-5 acc 85.156 (88.817)	lr 0.00101
Train [105][1870/3239]	Time 0.245 (0.636)	Data Time 0.002 (0.024)	Loss 2.1428 (2.1873)	Entropy 0.71751 (0.71988)	Top-1 acc 74.609 (71.980)	Top-5 acc 89.844 (88.817)	lr 0.00101
Train [105][1880/3239]	Time 2.765 (0.635)	Data Time 0.002 (0.024)	Loss 2.1859 (2.1872)	Entropy 0.71751 (0.71987)	Top-1 acc 72.656 (71.985)	Top-5 acc 87.500 (88.817)	lr 0.00100
Train [105][1890/3239]	Time 0.265 (0.633)	Data Time 0.001 (0.024)	Loss 2.1589 (2.1874)	Entropy 0.71755 (0.71986)	Top-1 acc 73.828 (71.980)	Top-5 acc 90.625 (88.816)	lr 0.00100
Train [105][1900/3239]	Time 0.231 (0.633)	Data Time 0.001 (0.024)	Loss 2.1660 (2.1875)	Entropy 0.71746 (0.71984)	Top-1 acc 73.047 (71.977)	Top-5 acc 89.844 (88.813)	lr 0.00100
Train [105][1910/3239]	Time 0.230 (0.632)	Data Time 0.001 (0.023)	Loss 1.9741 (2.1873)	Entropy 0.71743 (0.71983)	Top-1 acc 78.125 (71.982)	Top-5 acc 91.016 (88.813)	lr 0.00100
Train [105][1920/3239]	Time 0.296 (0.631)	Data Time 0.001 (0.023)	Loss 2.1556 (2.1873)	Entropy 0.71739 (0.71982)	Top-1 acc 74.609 (71.987)	Top-5 acc 91.406 (88.815)	lr 0.00100
Train [105][1930/3239]	Time 0.234 (0.630)	Data Time 0.002 (0.023)	Loss 2.2553 (2.1874)	Entropy 0.71716 (0.71980)	Top-1 acc 71.875 (71.985)	Top-5 acc 86.328 (88.809)	lr 0.00100
Train [105][1940/3239]	Time 0.257 (0.630)	Data Time 0.001 (0.023)	Loss 2.1630 (2.1873)	Entropy 0.71713 (0.71979)	Top-1 acc 71.875 (71.985)	Top-5 acc 88.672 (88.812)	lr 0.00100
Train [105][1950/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.023)	Loss 2.7033 (2.1876)	Entropy 0.71716 (0.71978)	Top-1 acc 57.031 (71.975)	Top-5 acc 82.422 (88.807)	lr 0.00100
Train [105][1960/3239]	Time 0.233 (0.628)	Data Time 0.001 (0.023)	Loss 2.2722 (2.1875)	Entropy 0.71710 (0.71976)	Top-1 acc 69.141 (71.977)	Top-5 acc 89.453 (88.810)	lr 0.00100
Train [105][1970/3239]	Time 0.225 (0.627)	Data Time 0.001 (0.023)	Loss 2.3184 (2.1875)	Entropy 0.71707 (0.71975)	Top-1 acc 68.750 (71.976)	Top-5 acc 85.156 (88.810)	lr 0.00100
Train [105][1980/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.023)	Loss 2.3155 (2.1874)	Entropy 0.71697 (0.71974)	Top-1 acc 67.188 (71.981)	Top-5 acc 87.109 (88.811)	lr 0.00100
Train [105][1990/3239]	Time 2.635 (0.626)	Data Time 0.002 (0.023)	Loss 2.2788 (2.1875)	Entropy 0.71697 (0.71972)	Top-1 acc 69.531 (71.974)	Top-5 acc 84.766 (88.808)	lr 0.00100
Train [105][2000/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.022)	Loss 2.1611 (2.1875)	Entropy 0.71698 (0.71971)	Top-1 acc 73.438 (71.974)	Top-5 acc 90.234 (88.805)	lr 0.00100
Train [105][2010/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.022)	Loss 2.1210 (2.1877)	Entropy 0.71697 (0.71970)	Top-1 acc 77.734 (71.971)	Top-5 acc 90.234 (88.802)	lr 0.00100
Train [105][2020/3239]	Time 0.239 (0.623)	Data Time 0.002 (0.022)	Loss 2.3278 (2.1878)	Entropy 0.71693 (0.71968)	Top-1 acc 70.312 (71.967)	Top-5 acc 86.719 (88.798)	lr 0.00100
Train [105][2030/3239]	Time 0.253 (0.622)	Data Time 0.001 (0.022)	Loss 2.3934 (2.1879)	Entropy 0.71682 (0.71967)	Top-1 acc 67.578 (71.965)	Top-5 acc 85.547 (88.797)	lr 0.00100
Train [105][2040/3239]	Time 0.230 (0.621)	Data Time 0.001 (0.022)	Loss 2.1528 (2.1880)	Entropy 0.71677 (0.71965)	Top-1 acc 71.484 (71.968)	Top-5 acc 88.672 (88.793)	lr 0.00100
Train [105][2050/3239]	Time 0.330 (0.621)	Data Time 0.001 (0.022)	Loss 2.2940 (2.1884)	Entropy 0.71673 (0.71964)	Top-1 acc 66.797 (71.955)	Top-5 acc 87.500 (88.784)	lr 0.00100
Train [105][2060/3239]	Time 0.342 (0.645)	Data Time 0.003 (0.022)	Loss 2.0523 (2.1882)	Entropy 0.71669 (0.71963)	Top-1 acc 74.219 (71.956)	Top-5 acc 91.016 (88.787)	lr 0.00100
Train [105][2070/3239]	Time 0.234 (0.645)	Data Time 0.002 (0.022)	Loss 2.2661 (2.1881)	Entropy 0.71661 (0.71961)	Top-1 acc 68.359 (71.958)	Top-5 acc 88.672 (88.792)	lr 0.00100
Train [105][2080/3239]	Time 0.236 (0.644)	Data Time 0.001 (0.022)	Loss 2.1378 (2.1880)	Entropy 0.71654 (0.71960)	Top-1 acc 73.828 (71.960)	Top-5 acc 90.625 (88.796)	lr 0.00100
Train [105][2090/3239]	Time 0.318 (0.643)	Data Time 0.001 (0.022)	Loss 2.2403 (2.1881)	Entropy 0.71649 (0.71958)	Top-1 acc 71.094 (71.959)	Top-5 acc 89.453 (88.793)	lr 0.00100
Train [105][2100/3239]	Time 2.517 (0.643)	Data Time 0.001 (0.022)	Loss 2.1479 (2.1879)	Entropy 0.71649 (0.71957)	Top-1 acc 69.922 (71.967)	Top-5 acc 90.234 (88.796)	lr 0.00100
Train [105][2110/3239]	Time 0.236 (0.641)	Data Time 0.001 (0.021)	Loss 2.1509 (2.1877)	Entropy 0.71650 (0.71955)	Top-1 acc 72.656 (71.969)	Top-5 acc 89.062 (88.800)	lr 0.00100
Train [105][2120/3239]	Time 0.244 (0.640)	Data Time 0.001 (0.021)	Loss 2.3341 (2.1875)	Entropy 0.71652 (0.71954)	Top-1 acc 67.969 (71.968)	Top-5 acc 86.719 (88.803)	lr 0.00100
Train [105][2130/3239]	Time 0.252 (0.639)	Data Time 0.001 (0.021)	Loss 2.1499 (2.1873)	Entropy 0.71646 (0.71952)	Top-1 acc 73.047 (71.977)	Top-5 acc 89.844 (88.807)	lr 0.00099
Train [105][2140/3239]	Time 0.222 (0.639)	Data Time 0.001 (0.021)	Loss 2.1354 (2.1871)	Entropy 0.71644 (0.71951)	Top-1 acc 74.609 (71.983)	Top-5 acc 88.672 (88.810)	lr 0.00099
Train [105][2150/3239]	Time 0.258 (0.638)	Data Time 0.001 (0.021)	Loss 2.1745 (2.1871)	Entropy 0.71641 (0.71950)	Top-1 acc 71.094 (71.985)	Top-5 acc 90.625 (88.811)	lr 0.00099
Train [105][2160/3239]	Time 0.236 (0.637)	Data Time 0.001 (0.021)	Loss 2.2774 (2.1870)	Entropy 0.71639 (0.71948)	Top-1 acc 67.969 (71.989)	Top-5 acc 87.500 (88.809)	lr 0.00099
Train [105][2170/3239]	Time 0.229 (0.636)	Data Time 0.005 (0.021)	Loss 2.0074 (2.1873)	Entropy 0.71629 (0.71947)	Top-1 acc 78.125 (71.982)	Top-5 acc 92.188 (88.805)	lr 0.00099
Train [105][2180/3239]	Time 0.209 (0.636)	Data Time 0.001 (0.021)	Loss 2.2507 (2.1873)	Entropy 0.71627 (0.71945)	Top-1 acc 70.312 (71.979)	Top-5 acc 86.719 (88.803)	lr 0.00099
Train [105][2190/3239]	Time 0.243 (0.635)	Data Time 0.001 (0.021)	Loss 2.2034 (2.1873)	Entropy 0.71633 (0.71944)	Top-1 acc 70.703 (71.980)	Top-5 acc 88.281 (88.805)	lr 0.00099
Train [105][2200/3239]	Time 0.213 (0.634)	Data Time 0.001 (0.021)	Loss 2.2291 (2.1873)	Entropy 0.71620 (0.71942)	Top-1 acc 68.359 (71.985)	Top-5 acc 89.453 (88.806)	lr 0.00099
Train [105][2210/3239]	Time 2.612 (0.634)	Data Time 0.001 (0.021)	Loss 2.2015 (2.1873)	Entropy 0.71620 (0.71941)	Top-1 acc 72.266 (71.988)	Top-5 acc 86.719 (88.803)	lr 0.00099
Train [105][2220/3239]	Time 0.372 (0.632)	Data Time 0.001 (0.020)	Loss 2.3149 (2.1874)	Entropy 0.71587 (0.71939)	Top-1 acc 69.922 (71.989)	Top-5 acc 86.328 (88.800)	lr 0.00099
Train [105][2230/3239]	Time 0.233 (0.631)	Data Time 0.001 (0.020)	Loss 2.1080 (2.1874)	Entropy 0.71574 (0.71938)	Top-1 acc 73.047 (71.985)	Top-5 acc 90.234 (88.799)	lr 0.00099
Train [105][2240/3239]	Time 0.241 (0.631)	Data Time 0.001 (0.020)	Loss 2.0800 (2.1871)	Entropy 0.71566 (0.71936)	Top-1 acc 70.703 (71.985)	Top-5 acc 91.797 (88.804)	lr 0.00099
Train [105][2250/3239]	Time 0.232 (0.630)	Data Time 0.001 (0.020)	Loss 2.1754 (2.1871)	Entropy 0.71564 (0.71934)	Top-1 acc 70.703 (71.984)	Top-5 acc 90.625 (88.805)	lr 0.00099
Train [105][2260/3239]	Time 0.335 (0.629)	Data Time 0.001 (0.020)	Loss 2.1128 (2.1871)	Entropy 0.71564 (0.71933)	Top-1 acc 74.609 (71.982)	Top-5 acc 90.234 (88.805)	lr 0.00099
Train [105][2270/3239]	Time 0.224 (0.629)	Data Time 0.001 (0.020)	Loss 2.3139 (2.1870)	Entropy 0.71564 (0.71931)	Top-1 acc 68.359 (71.984)	Top-5 acc 84.766 (88.809)	lr 0.00099
Train [105][2280/3239]	Time 0.229 (0.628)	Data Time 0.001 (0.020)	Loss 2.3250 (2.1871)	Entropy 0.71564 (0.71929)	Top-1 acc 70.703 (71.983)	Top-5 acc 86.719 (88.808)	lr 0.00099
Train [105][2290/3239]	Time 0.257 (0.628)	Data Time 0.001 (0.020)	Loss 2.1406 (2.1872)	Entropy 0.71562 (0.71928)	Top-1 acc 71.875 (71.979)	Top-5 acc 89.062 (88.806)	lr 0.00099
Train [105][2300/3239]	Time 0.326 (0.627)	Data Time 0.001 (0.020)	Loss 2.2889 (2.1872)	Entropy 0.71566 (0.71926)	Top-1 acc 69.922 (71.980)	Top-5 acc 85.938 (88.803)	lr 0.00099
Train [105][2310/3239]	Time 0.230 (0.626)	Data Time 0.001 (0.020)	Loss 2.1478 (2.1871)	Entropy 0.71572 (0.71925)	Top-1 acc 75.000 (71.984)	Top-5 acc 90.625 (88.803)	lr 0.00099
Train [105][2320/3239]	Time 2.624 (0.626)	Data Time 0.001 (0.020)	Loss 2.2105 (2.1872)	Entropy 0.71572 (0.71923)	Top-1 acc 69.531 (71.980)	Top-5 acc 87.500 (88.800)	lr 0.00099
Train [105][2330/3239]	Time 0.238 (0.624)	Data Time 0.001 (0.020)	Loss 2.1675 (2.1874)	Entropy 0.71570 (0.71922)	Top-1 acc 73.438 (71.976)	Top-5 acc 90.234 (88.795)	lr 0.00099
Train [105][2340/3239]	Time 0.224 (0.623)	Data Time 0.001 (0.019)	Loss 2.2331 (2.1873)	Entropy 0.71561 (0.71920)	Top-1 acc 71.484 (71.978)	Top-5 acc 87.500 (88.796)	lr 0.00099
Train [105][2350/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.019)	Loss 2.1520 (2.1874)	Entropy 0.71556 (0.71919)	Top-1 acc 69.531 (71.973)	Top-5 acc 91.797 (88.795)	lr 0.00099
Train [105][2360/3239]	Time 0.229 (0.622)	Data Time 0.001 (0.019)	Loss 2.1434 (2.1873)	Entropy 0.71554 (0.71917)	Top-1 acc 73.047 (71.975)	Top-5 acc 88.672 (88.795)	lr 0.00099
Train [105][2370/3239]	Time 0.237 (0.622)	Data Time 0.001 (0.019)	Loss 2.2841 (2.1874)	Entropy 0.71523 (0.71915)	Top-1 acc 71.484 (71.974)	Top-5 acc 87.500 (88.793)	lr 0.00099
Train [105][2380/3239]	Time 0.238 (0.621)	Data Time 0.001 (0.019)	Loss 2.3462 (2.1876)	Entropy 0.71523 (0.71914)	Top-1 acc 70.703 (71.969)	Top-5 acc 83.594 (88.790)	lr 0.00099
Train [105][2390/3239]	Time 0.227 (0.620)	Data Time 0.001 (0.019)	Loss 2.1962 (2.1878)	Entropy 0.71526 (0.71912)	Top-1 acc 71.094 (71.964)	Top-5 acc 89.062 (88.785)	lr 0.00098
Train [105][2400/3239]	Time 0.232 (0.620)	Data Time 0.001 (0.019)	Loss 2.1160 (2.1876)	Entropy 0.71526 (0.71911)	Top-1 acc 72.266 (71.971)	Top-5 acc 91.016 (88.789)	lr 0.00098
Train [105][2410/3239]	Time 0.232 (0.619)	Data Time 0.001 (0.019)	Loss 2.1677 (2.1875)	Entropy 0.71524 (0.71909)	Top-1 acc 73.047 (71.971)	Top-5 acc 90.234 (88.788)	lr 0.00098
Train [105][2420/3239]	Time 0.252 (0.642)	Data Time 0.003 (0.019)	Loss 2.2334 (2.1875)	Entropy 0.71530 (0.71907)	Top-1 acc 70.312 (71.970)	Top-5 acc 87.891 (88.789)	lr 0.00098
Train [105][2430/3239]	Time 2.663 (0.641)	Data Time 0.002 (0.019)	Loss 2.2278 (2.1875)	Entropy 0.71530 (0.71906)	Top-1 acc 72.656 (71.975)	Top-5 acc 88.281 (88.789)	lr 0.00098
Train [105][2440/3239]	Time 0.292 (0.640)	Data Time 0.002 (0.019)	Loss 2.2521 (2.1876)	Entropy 0.71526 (0.71904)	Top-1 acc 68.359 (71.967)	Top-5 acc 89.453 (88.785)	lr 0.00098
Train [105][2450/3239]	Time 0.238 (0.639)	Data Time 0.001 (0.019)	Loss 2.0867 (2.1878)	Entropy 0.71524 (0.71903)	Top-1 acc 76.562 (71.963)	Top-5 acc 89.453 (88.782)	lr 0.00098
Train [105][2460/3239]	Time 0.238 (0.638)	Data Time 0.001 (0.019)	Loss 2.1565 (2.1877)	Entropy 0.71520 (0.71901)	Top-1 acc 70.703 (71.967)	Top-5 acc 88.281 (88.784)	lr 0.00098
Train [105][2470/3239]	Time 0.327 (0.638)	Data Time 0.001 (0.019)	Loss 2.1825 (2.1877)	Entropy 0.71522 (0.71900)	Top-1 acc 71.094 (71.968)	Top-5 acc 87.109 (88.785)	lr 0.00098
Train [105][2480/3239]	Time 0.256 (0.637)	Data Time 0.001 (0.018)	Loss 2.1493 (2.1877)	Entropy 0.71518 (0.71898)	Top-1 acc 70.703 (71.968)	Top-5 acc 91.016 (88.785)	lr 0.00098
Train [105][2490/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.018)	Loss 2.1771 (2.1878)	Entropy 0.71511 (0.71897)	Top-1 acc 72.266 (71.964)	Top-5 acc 88.281 (88.784)	lr 0.00098
Train [105][2500/3239]	Time 0.225 (0.636)	Data Time 0.001 (0.018)	Loss 2.2014 (2.1880)	Entropy 0.71510 (0.71895)	Top-1 acc 71.875 (71.956)	Top-5 acc 87.109 (88.780)	lr 0.00098
Train [105][2510/3239]	Time 0.224 (0.635)	Data Time 0.001 (0.018)	Loss 2.1263 (2.1879)	Entropy 0.71511 (0.71893)	Top-1 acc 72.656 (71.958)	Top-5 acc 90.234 (88.782)	lr 0.00098
Train [105][2520/3239]	Time 0.222 (0.635)	Data Time 0.001 (0.018)	Loss 2.3763 (2.1880)	Entropy 0.71507 (0.71892)	Top-1 acc 69.922 (71.959)	Top-5 acc 85.547 (88.782)	lr 0.00098
Train [105][2530/3239]	Time 0.278 (0.634)	Data Time 0.004 (0.018)	Loss 2.1266 (2.1881)	Entropy 0.71514 (0.71890)	Top-1 acc 74.609 (71.958)	Top-5 acc 89.453 (88.779)	lr 0.00098
Train [105][2540/3239]	Time 2.578 (0.633)	Data Time 0.002 (0.018)	Loss 2.1707 (2.1884)	Entropy 0.71514 (0.71889)	Top-1 acc 75.000 (71.954)	Top-5 acc 90.234 (88.772)	lr 0.00098
Train [105][2550/3239]	Time 0.252 (0.632)	Data Time 0.001 (0.018)	Loss 2.3692 (2.1886)	Entropy 0.71505 (0.71887)	Top-1 acc 66.797 (71.952)	Top-5 acc 83.984 (88.769)	lr 0.00098
Train [105][2560/3239]	Time 0.222 (0.631)	Data Time 0.001 (0.018)	Loss 2.0960 (2.1886)	Entropy 0.71511 (0.71886)	Top-1 acc 74.219 (71.952)	Top-5 acc 91.016 (88.768)	lr 0.00098
Train [105][2570/3239]	Time 0.262 (0.631)	Data Time 0.002 (0.018)	Loss 2.2419 (2.1885)	Entropy 0.71505 (0.71885)	Top-1 acc 70.703 (71.953)	Top-5 acc 86.719 (88.769)	lr 0.00098
Train [105][2580/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.018)	Loss 2.2736 (2.1888)	Entropy 0.71508 (0.71883)	Top-1 acc 68.750 (71.944)	Top-5 acc 86.719 (88.765)	lr 0.00098
Train [105][2590/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.018)	Loss 2.4065 (2.1886)	Entropy 0.71505 (0.71882)	Top-1 acc 68.359 (71.949)	Top-5 acc 85.547 (88.767)	lr 0.00098
Train [105][2600/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.018)	Loss 2.2076 (2.1887)	Entropy 0.71506 (0.71880)	Top-1 acc 75.391 (71.950)	Top-5 acc 87.891 (88.765)	lr 0.00098
Train [105][2610/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.018)	Loss 2.1252 (2.1886)	Entropy 0.71501 (0.71879)	Top-1 acc 74.609 (71.954)	Top-5 acc 88.672 (88.766)	lr 0.00098
Train [105][2620/3239]	Time 0.240 (0.628)	Data Time 0.001 (0.018)	Loss 2.2469 (2.1887)	Entropy 0.71501 (0.71877)	Top-1 acc 71.875 (71.952)	Top-5 acc 87.109 (88.763)	lr 0.00098
Train [105][2630/3239]	Time 0.247 (0.628)	Data Time 0.001 (0.017)	Loss 2.1933 (2.1887)	Entropy 0.71512 (0.71876)	Top-1 acc 73.047 (71.953)	Top-5 acc 87.500 (88.764)	lr 0.00098
Train [105][2640/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.017)	Loss 2.2445 (2.1887)	Entropy 0.71501 (0.71874)	Top-1 acc 72.656 (71.954)	Top-5 acc 88.281 (88.764)	lr 0.00097
Train [105][2650/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.017)	Loss 2.2950 (2.1888)	Entropy 0.71492 (0.71873)	Top-1 acc 73.828 (71.949)	Top-5 acc 88.672 (88.763)	lr 0.00097
Train [105][2660/3239]	Time 0.259 (0.626)	Data Time 0.001 (0.017)	Loss 2.1990 (2.1888)	Entropy 0.71492 (0.71872)	Top-1 acc 70.312 (71.949)	Top-5 acc 85.547 (88.763)	lr 0.00097
Train [105][2670/3239]	Time 0.270 (0.625)	Data Time 0.001 (0.017)	Loss 2.1334 (2.1889)	Entropy 0.71484 (0.71870)	Top-1 acc 71.094 (71.948)	Top-5 acc 89.453 (88.761)	lr 0.00097
Train [105][2680/3239]	Time 0.318 (0.625)	Data Time 0.001 (0.017)	Loss 2.2393 (2.1888)	Entropy 0.71472 (0.71869)	Top-1 acc 70.703 (71.949)	Top-5 acc 87.891 (88.762)	lr 0.00097
Train [105][2690/3239]	Time 0.262 (0.624)	Data Time 0.001 (0.017)	Loss 2.1850 (2.1887)	Entropy 0.71459 (0.71867)	Top-1 acc 75.000 (71.953)	Top-5 acc 87.109 (88.764)	lr 0.00097
Train [105][2700/3239]	Time 0.268 (0.624)	Data Time 0.001 (0.017)	Loss 2.1353 (2.1886)	Entropy 0.71457 (0.71866)	Top-1 acc 75.391 (71.954)	Top-5 acc 88.672 (88.765)	lr 0.00097
Train [105][2710/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.017)	Loss 2.1625 (2.1887)	Entropy 0.71450 (0.71864)	Top-1 acc 73.828 (71.953)	Top-5 acc 88.281 (88.763)	lr 0.00097
Train [105][2720/3239]	Time 0.326 (0.623)	Data Time 0.001 (0.017)	Loss 2.2307 (2.1886)	Entropy 0.71442 (0.71863)	Top-1 acc 71.484 (71.955)	Top-5 acc 89.062 (88.769)	lr 0.00097
Train [105][2730/3239]	Time 0.226 (0.622)	Data Time 0.001 (0.017)	Loss 2.0633 (2.1885)	Entropy 0.71444 (0.71861)	Top-1 acc 74.219 (71.957)	Top-5 acc 90.625 (88.769)	lr 0.00097
Train [105][2740/3239]	Time 0.222 (0.622)	Data Time 0.001 (0.017)	Loss 2.2629 (2.1884)	Entropy 0.71441 (0.71860)	Top-1 acc 67.578 (71.963)	Top-5 acc 88.672 (88.772)	lr 0.00097
Train [105][2750/3239]	Time 0.215 (0.621)	Data Time 0.001 (0.017)	Loss 2.2579 (2.1883)	Entropy 0.71433 (0.71858)	Top-1 acc 70.312 (71.967)	Top-5 acc 89.844 (88.773)	lr 0.00097
Train [105][2760/3239]	Time 0.364 (0.621)	Data Time 0.001 (0.017)	Loss 2.1741 (2.1882)	Entropy 0.71425 (0.71857)	Top-1 acc 71.094 (71.969)	Top-5 acc 90.234 (88.774)	lr 0.00097
Train [105][2770/3239]	Time 0.236 (0.640)	Data Time 0.003 (0.017)	Loss 2.1398 (2.1880)	Entropy 0.71419 (0.71855)	Top-1 acc 73.047 (71.973)	Top-5 acc 88.281 (88.777)	lr 0.00097
Train [105][2780/3239]	Time 0.273 (0.640)	Data Time 0.002 (0.017)	Loss 2.1602 (2.1882)	Entropy 0.71420 (0.71853)	Top-1 acc 69.531 (71.968)	Top-5 acc 91.797 (88.774)	lr 0.00097
Train [105][2790/3239]	Time 0.259 (0.639)	Data Time 0.001 (0.017)	Loss 2.2483 (2.1883)	Entropy 0.71407 (0.71852)	Top-1 acc 70.312 (71.965)	Top-5 acc 87.891 (88.772)	lr 0.00097
Train [105][2800/3239]	Time 0.334 (0.638)	Data Time 0.001 (0.017)	Loss 2.0857 (2.1881)	Entropy 0.71414 (0.71850)	Top-1 acc 72.656 (71.968)	Top-5 acc 89.453 (88.775)	lr 0.00097
Train [105][2810/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.016)	Loss 2.0815 (2.1883)	Entropy 0.71408 (0.71849)	Top-1 acc 70.703 (71.965)	Top-5 acc 92.188 (88.774)	lr 0.00097
Train [105][2820/3239]	Time 0.263 (0.637)	Data Time 0.003 (0.016)	Loss 2.2179 (2.1882)	Entropy 0.71393 (0.71847)	Top-1 acc 74.219 (71.963)	Top-5 acc 88.281 (88.775)	lr 0.00097
Train [105][2830/3239]	Time 0.242 (0.637)	Data Time 0.001 (0.016)	Loss 2.0924 (2.1883)	Entropy 0.71394 (0.71846)	Top-1 acc 76.172 (71.965)	Top-5 acc 89.453 (88.773)	lr 0.00097
Train [105][2840/3239]	Time 0.270 (0.637)	Data Time 0.001 (0.016)	Loss 2.3895 (2.1884)	Entropy 0.71384 (0.71844)	Top-1 acc 69.141 (71.962)	Top-5 acc 85.156 (88.772)	lr 0.00097
Train [105][2850/3239]	Time 0.264 (0.636)	Data Time 0.001 (0.016)	Loss 2.2297 (2.1884)	Entropy 0.71384 (0.71842)	Top-1 acc 71.094 (71.963)	Top-5 acc 86.719 (88.770)	lr 0.00097
Train [105][2860/3239]	Time 0.227 (0.635)	Data Time 0.001 (0.016)	Loss 2.3589 (2.1884)	Entropy 0.71379 (0.71841)	Top-1 acc 64.844 (71.966)	Top-5 acc 87.109 (88.770)	lr 0.00097
Train [105][2870/3239]	Time 0.223 (0.635)	Data Time 0.001 (0.016)	Loss 2.3406 (2.1884)	Entropy 0.71378 (0.71839)	Top-1 acc 67.969 (71.966)	Top-5 acc 85.938 (88.769)	lr 0.00097
Train [105][2880/3239]	Time 0.213 (0.634)	Data Time 0.001 (0.016)	Loss 2.1617 (2.1886)	Entropy 0.71378 (0.71838)	Top-1 acc 71.875 (71.964)	Top-5 acc 89.844 (88.767)	lr 0.00097
Train [105][2890/3239]	Time 0.239 (0.634)	Data Time 0.001 (0.016)	Loss 2.1260 (2.1885)	Entropy 0.71369 (0.71836)	Top-1 acc 74.219 (71.962)	Top-5 acc 89.062 (88.768)	lr 0.00097
Train [105][2900/3239]	Time 0.249 (0.633)	Data Time 0.001 (0.016)	Loss 2.1484 (2.1886)	Entropy 0.71368 (0.71834)	Top-1 acc 73.828 (71.961)	Top-5 acc 91.016 (88.765)	lr 0.00096
Train [105][2910/3239]	Time 0.255 (0.633)	Data Time 0.001 (0.016)	Loss 2.1955 (2.1885)	Entropy 0.71361 (0.71833)	Top-1 acc 71.094 (71.965)	Top-5 acc 88.281 (88.768)	lr 0.00096
Train [105][2920/3239]	Time 0.244 (0.632)	Data Time 0.001 (0.016)	Loss 2.3045 (2.1883)	Entropy 0.71368 (0.71831)	Top-1 acc 70.703 (71.972)	Top-5 acc 86.328 (88.768)	lr 0.00096
Train [105][2930/3239]	Time 0.223 (0.632)	Data Time 0.001 (0.016)	Loss 2.0618 (2.1882)	Entropy 0.71369 (0.71830)	Top-1 acc 73.047 (71.972)	Top-5 acc 92.578 (88.769)	lr 0.00096
Train [105][2940/3239]	Time 0.249 (0.631)	Data Time 0.001 (0.016)	Loss 2.1465 (2.1882)	Entropy 0.71368 (0.71828)	Top-1 acc 73.047 (71.973)	Top-5 acc 89.453 (88.769)	lr 0.00096
Train [105][2950/3239]	Time 0.245 (0.631)	Data Time 0.001 (0.016)	Loss 2.2940 (2.1881)	Entropy 0.71360 (0.71826)	Top-1 acc 70.312 (71.974)	Top-5 acc 84.766 (88.768)	lr 0.00096
Train [105][2960/3239]	Time 0.224 (0.630)	Data Time 0.001 (0.016)	Loss 1.9656 (2.1879)	Entropy 0.71354 (0.71825)	Top-1 acc 78.516 (71.979)	Top-5 acc 91.797 (88.768)	lr 0.00096
Train [105][2970/3239]	Time 0.266 (0.630)	Data Time 0.001 (0.016)	Loss 1.9876 (2.1879)	Entropy 0.71331 (0.71823)	Top-1 acc 80.078 (71.980)	Top-5 acc 92.188 (88.768)	lr 0.00096
Train [105][2980/3239]	Time 0.242 (0.629)	Data Time 0.001 (0.016)	Loss 2.0713 (2.1879)	Entropy 0.71334 (0.71822)	Top-1 acc 72.266 (71.980)	Top-5 acc 91.406 (88.769)	lr 0.00096
Train [105][2990/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.016)	Loss 2.2676 (2.1879)	Entropy 0.71322 (0.71820)	Top-1 acc 73.438 (71.978)	Top-5 acc 85.156 (88.768)	lr 0.00096
Train [105][3000/3239]	Time 0.222 (0.628)	Data Time 0.001 (0.016)	Loss 2.2410 (2.1878)	Entropy 0.71324 (0.71818)	Top-1 acc 70.312 (71.980)	Top-5 acc 87.891 (88.768)	lr 0.00096
Train [105][3010/3239]	Time 0.245 (0.628)	Data Time 0.001 (0.015)	Loss 1.9894 (2.1877)	Entropy 0.71319 (0.71817)	Top-1 acc 77.734 (71.984)	Top-5 acc 93.359 (88.772)	lr 0.00096
Train [105][3020/3239]	Time 0.221 (0.627)	Data Time 0.001 (0.015)	Loss 1.9248 (2.1877)	Entropy 0.71321 (0.71815)	Top-1 acc 78.125 (71.986)	Top-5 acc 92.969 (88.774)	lr 0.00096
Train [105][3030/3239]	Time 0.275 (0.627)	Data Time 0.001 (0.015)	Loss 2.2174 (2.1878)	Entropy 0.71315 (0.71813)	Top-1 acc 71.094 (71.979)	Top-5 acc 87.891 (88.770)	lr 0.00096
Train [105][3040/3239]	Time 0.271 (0.626)	Data Time 0.001 (0.015)	Loss 2.0773 (2.1878)	Entropy 0.71319 (0.71812)	Top-1 acc 75.391 (71.978)	Top-5 acc 91.797 (88.769)	lr 0.00096
Train [105][3050/3239]	Time 0.237 (0.626)	Data Time 0.001 (0.015)	Loss 2.2505 (2.1878)	Entropy 0.71308 (0.71810)	Top-1 acc 70.703 (71.979)	Top-5 acc 86.328 (88.768)	lr 0.00096
Train [105][3060/3239]	Time 0.241 (0.625)	Data Time 0.001 (0.015)	Loss 2.3253 (2.1878)	Entropy 0.71309 (0.71809)	Top-1 acc 71.875 (71.982)	Top-5 acc 85.938 (88.768)	lr 0.00096
Train [105][3070/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.015)	Loss 2.1134 (2.1878)	Entropy 0.71302 (0.71807)	Top-1 acc 74.609 (71.979)	Top-5 acc 89.844 (88.770)	lr 0.00096
Train [105][3080/3239]	Time 0.256 (0.624)	Data Time 0.001 (0.015)	Loss 2.4227 (2.1884)	Entropy 0.71300 (0.71805)	Top-1 acc 65.625 (71.965)	Top-5 acc 82.812 (88.761)	lr 0.00096
Train [105][3090/3239]	Time 0.244 (0.624)	Data Time 0.001 (0.015)	Loss 2.1528 (2.1885)	Entropy 0.71296 (0.71804)	Top-1 acc 73.047 (71.962)	Top-5 acc 89.453 (88.761)	lr 0.00096
Train [105][3100/3239]	Time 0.294 (0.640)	Data Time 0.004 (0.015)	Loss 2.1405 (2.1887)	Entropy 0.71300 (0.71802)	Top-1 acc 75.391 (71.956)	Top-5 acc 86.328 (88.759)	lr 0.00096
Train [105][3110/3239]	Time 0.233 (0.640)	Data Time 0.002 (0.015)	Loss 2.1271 (2.1887)	Entropy 0.71301 (0.71800)	Top-1 acc 76.562 (71.955)	Top-5 acc 89.844 (88.760)	lr 0.00096
Train [105][3120/3239]	Time 0.234 (0.640)	Data Time 0.001 (0.015)	Loss 2.2059 (2.1888)	Entropy 0.71301 (0.71799)	Top-1 acc 70.312 (71.952)	Top-5 acc 88.672 (88.760)	lr 0.00096
Train [105][3130/3239]	Time 0.359 (0.639)	Data Time 0.001 (0.015)	Loss 2.1774 (2.1888)	Entropy 0.71311 (0.71797)	Top-1 acc 73.047 (71.951)	Top-5 acc 90.234 (88.760)	lr 0.00096
Train [105][3140/3239]	Time 0.234 (0.639)	Data Time 0.002 (0.015)	Loss 2.0602 (2.1887)	Entropy 0.71313 (0.71796)	Top-1 acc 72.656 (71.948)	Top-5 acc 91.016 (88.760)	lr 0.00096
Train [105][3150/3239]	Time 0.233 (0.638)	Data Time 0.001 (0.015)	Loss 2.1120 (2.1887)	Entropy 0.71310 (0.71794)	Top-1 acc 72.656 (71.948)	Top-5 acc 90.234 (88.762)	lr 0.00095
Train [105][3160/3239]	Time 0.283 (0.638)	Data Time 0.001 (0.015)	Loss 2.1607 (2.1886)	Entropy 0.71308 (0.71793)	Top-1 acc 72.266 (71.953)	Top-5 acc 88.672 (88.763)	lr 0.00095
Train [105][3170/3239]	Time 0.336 (0.637)	Data Time 0.002 (0.015)	Loss 2.3032 (2.1886)	Entropy 0.71306 (0.71791)	Top-1 acc 67.188 (71.952)	Top-5 acc 86.328 (88.760)	lr 0.00095
Train [105][3180/3239]	Time 0.232 (0.637)	Data Time 0.000 (0.015)	Loss 2.2658 (2.1886)	Entropy 0.71305 (0.71790)	Top-1 acc 69.922 (71.951)	Top-5 acc 85.547 (88.759)	lr 0.00095
Train [105][3190/3239]	Time 0.231 (0.636)	Data Time 0.000 (0.015)	Loss 2.1543 (2.1885)	Entropy 0.71287 (0.71788)	Top-1 acc 69.531 (71.952)	Top-5 acc 91.016 (88.762)	lr 0.00095
Train [105][3200/3239]	Time 0.230 (0.636)	Data Time 0.000 (0.015)	Loss 2.1950 (2.1885)	Entropy 0.71289 (0.71786)	Top-1 acc 71.875 (71.951)	Top-5 acc 87.500 (88.759)	lr 0.00095
Train [105][3210/3239]	Time 0.334 (0.635)	Data Time 0.000 (0.015)	Loss 2.2521 (2.1884)	Entropy 0.71291 (0.71785)	Top-1 acc 68.359 (71.952)	Top-5 acc 89.453 (88.760)	lr 0.00095
Train [105][3220/3239]	Time 0.251 (0.635)	Data Time 0.000 (0.015)	Loss 2.1511 (2.1884)	Entropy 0.71280 (0.71783)	Top-1 acc 74.609 (71.953)	Top-5 acc 90.234 (88.761)	lr 0.00095
Train [105][3230/3239]	Time 0.229 (0.634)	Data Time 0.000 (0.015)	Loss 2.1765 (2.1884)	Entropy 0.71279 (0.71782)	Top-1 acc 73.828 (71.952)	Top-5 acc 89.453 (88.761)	lr 0.00095
Train [105][3239/3239]	Time 2.386 (0.634)	Data Time 0.000 (0.014)	Loss 2.5125 (2.1885)	Entropy 0.71279 (0.71780)	Top-1 acc 62.963 (71.950)	Top-5 acc 81.481 (88.758)	lr 0.00095
==========Valid [105/120]	loss 1.210	top-1 acc 72.484 (72.484)	top-5 acc 89.810	Train top-1 71.950	top-5 88.758	Entropy 0.71279	Latency-None: 0.000ms	Flops: 546.53M
Train [106][0/3239]	Time 41.655 (41.655)	Data Time 39.244 (39.244)	Loss 2.1976 (2.1976)	Entropy 0.71272 (0.71272)	Top-1 acc 69.531 (69.531)	Top-5 acc 90.234 (90.234)	lr 0.00095
Train [106][10/3239]	Time 2.810 (4.404)	Data Time 0.001 (3.607)	Loss 2.2128 (2.1882)	Entropy 0.71272 (0.71272)	Top-1 acc 71.094 (71.342)	Top-5 acc 88.281 (89.205)	lr 0.00095
Train [106][20/3239]	Time 0.377 (2.433)	Data Time 0.001 (1.890)	Loss 2.2302 (2.2116)	Entropy 0.71268 (0.71270)	Top-1 acc 70.312 (71.354)	Top-5 acc 86.719 (88.560)	lr 0.00095
Train [106][30/3239]	Time 0.245 (1.807)	Data Time 0.001 (1.281)	Loss 2.1412 (2.1908)	Entropy 0.71264 (0.71269)	Top-1 acc 74.219 (71.661)	Top-5 acc 88.672 (88.949)	lr 0.00095
Train [106][40/3239]	Time 0.227 (1.484)	Data Time 0.001 (0.969)	Loss 2.0668 (2.1809)	Entropy 0.71262 (0.71267)	Top-1 acc 71.875 (71.665)	Top-5 acc 91.406 (89.110)	lr 0.00095
Train [106][50/3239]	Time 0.245 (1.286)	Data Time 0.001 (0.779)	Loss 2.1312 (2.1837)	Entropy 0.71255 (0.71265)	Top-1 acc 73.047 (71.775)	Top-5 acc 89.062 (88.971)	lr 0.00095
Train [106][60/3239]	Time 0.328 (1.158)	Data Time 0.001 (0.652)	Loss 2.1899 (2.1877)	Entropy 0.71254 (0.71263)	Top-1 acc 70.703 (71.619)	Top-5 acc 89.844 (88.826)	lr 0.00095
Train [106][70/3239]	Time 0.223 (1.065)	Data Time 0.001 (0.560)	Loss 2.1753 (2.1925)	Entropy 0.71254 (0.71262)	Top-1 acc 70.312 (71.446)	Top-5 acc 89.453 (88.683)	lr 0.00095
Train [106][80/3239]	Time 0.234 (0.994)	Data Time 0.001 (0.491)	Loss 2.1863 (2.1913)	Entropy 0.71351 (0.71266)	Top-1 acc 75.781 (71.494)	Top-5 acc 88.281 (88.686)	lr 0.00095
Train [106][90/3239]	Time 0.242 (0.940)	Data Time 0.003 (0.437)	Loss 2.3923 (2.1884)	Entropy 0.71343 (0.71275)	Top-1 acc 69.141 (71.583)	Top-5 acc 83.203 (88.753)	lr 0.00095
Train [106][100/3239]	Time 0.322 (0.896)	Data Time 0.001 (0.394)	Loss 2.1463 (2.1854)	Entropy 0.71340 (0.71282)	Top-1 acc 73.438 (71.597)	Top-5 acc 87.891 (88.815)	lr 0.00095
Train [106][110/3239]	Time 0.226 (0.858)	Data Time 0.001 (0.359)	Loss 2.0060 (2.1766)	Entropy 0.71343 (0.71287)	Top-1 acc 76.953 (71.875)	Top-5 acc 92.578 (88.953)	lr 0.00095
Train [106][120/3239]	Time 2.675 (0.828)	Data Time 0.001 (0.329)	Loss 1.9860 (2.1771)	Entropy 0.71343 (0.71292)	Top-1 acc 76.953 (71.936)	Top-5 acc 92.188 (88.930)	lr 0.00095
Train [106][130/3239]	Time 0.245 (0.783)	Data Time 0.001 (0.304)	Loss 2.2048 (2.1797)	Entropy 0.71326 (0.71294)	Top-1 acc 68.359 (71.926)	Top-5 acc 91.016 (88.896)	lr 0.00095
Train [106][140/3239]	Time 0.336 (0.762)	Data Time 0.007 (0.283)	Loss 2.3323 (2.1830)	Entropy 0.71323 (0.71296)	Top-1 acc 64.844 (71.767)	Top-5 acc 83.203 (88.833)	lr 0.00095
Train [106][150/3239]	Time 0.230 (0.743)	Data Time 0.001 (0.264)	Loss 2.1518 (2.1824)	Entropy 0.71315 (0.71298)	Top-1 acc 69.531 (71.759)	Top-5 acc 92.578 (88.884)	lr 0.00095
Train [106][160/3239]	Time 0.236 (0.726)	Data Time 0.001 (0.248)	Loss 2.0208 (2.1801)	Entropy 0.71314 (0.71299)	Top-1 acc 73.828 (71.870)	Top-5 acc 91.016 (88.895)	lr 0.00095
Train [106][170/3239]	Time 0.241 (0.712)	Data Time 0.001 (0.233)	Loss 2.2418 (2.1808)	Entropy 0.71314 (0.71300)	Top-1 acc 69.141 (71.886)	Top-5 acc 86.328 (88.877)	lr 0.00094
Train [106][180/3239]	Time 0.330 (0.700)	Data Time 0.001 (0.221)	Loss 2.2388 (2.1822)	Entropy 0.71307 (0.71300)	Top-1 acc 71.094 (71.886)	Top-5 acc 87.109 (88.849)	lr 0.00094
Train [106][190/3239]	Time 0.220 (0.688)	Data Time 0.001 (0.209)	Loss 2.1020 (2.1812)	Entropy 0.71302 (0.71300)	Top-1 acc 76.172 (71.959)	Top-5 acc 89.844 (88.864)	lr 0.00094
Train [106][200/3239]	Time 0.228 (0.677)	Data Time 0.001 (0.199)	Loss 2.1108 (2.1814)	Entropy 0.71326 (0.71301)	Top-1 acc 73.828 (71.918)	Top-5 acc 89.062 (88.849)	lr 0.00094
Train [106][210/3239]	Time 0.245 (0.942)	Data Time 0.002 (0.190)	Loss 2.1040 (2.1827)	Entropy 0.71307 (0.71302)	Top-1 acc 71.094 (71.921)	Top-5 acc 91.016 (88.790)	lr 0.00094
Train [106][220/3239]	Time 0.229 (0.921)	Data Time 0.002 (0.181)	Loss 2.3464 (2.1851)	Entropy 0.71304 (0.71302)	Top-1 acc 72.656 (71.891)	Top-5 acc 85.156 (88.748)	lr 0.00094
Train [106][230/3239]	Time 2.607 (0.902)	Data Time 0.002 (0.173)	Loss 2.2997 (2.1865)	Entropy 0.71304 (0.71302)	Top-1 acc 69.922 (71.834)	Top-5 acc 85.938 (88.733)	lr 0.00094
Train [106][240/3239]	Time 0.265 (0.875)	Data Time 0.002 (0.166)	Loss 2.1380 (2.1857)	Entropy 0.71305 (0.71302)	Top-1 acc 73.828 (71.828)	Top-5 acc 90.234 (88.750)	lr 0.00094
Train [106][250/3239]	Time 0.231 (0.859)	Data Time 0.001 (0.160)	Loss 2.2186 (2.1853)	Entropy 0.71306 (0.71302)	Top-1 acc 72.266 (71.869)	Top-5 acc 85.938 (88.757)	lr 0.00094
Train [106][260/3239]	Time 0.243 (0.845)	Data Time 0.002 (0.154)	Loss 2.1879 (2.1864)	Entropy 0.71300 (0.71302)	Top-1 acc 69.922 (71.856)	Top-5 acc 89.453 (88.729)	lr 0.00094
Train [106][270/3239]	Time 0.223 (0.832)	Data Time 0.001 (0.148)	Loss 2.1291 (2.1866)	Entropy 0.71291 (0.71302)	Top-1 acc 73.047 (71.833)	Top-5 acc 91.406 (88.724)	lr 0.00094
Train [106][280/3239]	Time 0.231 (0.820)	Data Time 0.001 (0.143)	Loss 2.1912 (2.1876)	Entropy 0.71285 (0.71302)	Top-1 acc 76.953 (71.824)	Top-5 acc 89.844 (88.708)	lr 0.00094
Train [106][290/3239]	Time 0.234 (0.808)	Data Time 0.001 (0.138)	Loss 2.2518 (2.1865)	Entropy 0.71285 (0.71301)	Top-1 acc 68.750 (71.833)	Top-5 acc 87.500 (88.734)	lr 0.00094
Train [106][300/3239]	Time 0.238 (0.797)	Data Time 0.001 (0.133)	Loss 2.3446 (2.1876)	Entropy 0.71283 (0.71300)	Top-1 acc 67.969 (71.827)	Top-5 acc 86.719 (88.691)	lr 0.00094
Train [106][310/3239]	Time 0.226 (0.787)	Data Time 0.001 (0.129)	Loss 2.2526 (2.1889)	Entropy 0.71273 (0.71300)	Top-1 acc 66.797 (71.773)	Top-5 acc 89.453 (88.679)	lr 0.00094
Train [106][320/3239]	Time 0.241 (0.778)	Data Time 0.002 (0.125)	Loss 2.1410 (2.1888)	Entropy 0.71276 (0.71299)	Top-1 acc 71.875 (71.791)	Top-5 acc 88.281 (88.682)	lr 0.00094
Train [106][330/3239]	Time 0.254 (0.770)	Data Time 0.001 (0.121)	Loss 2.2103 (2.1892)	Entropy 0.71271 (0.71298)	Top-1 acc 70.312 (71.772)	Top-5 acc 88.281 (88.693)	lr 0.00094
Train [106][340/3239]	Time 2.490 (0.761)	Data Time 0.002 (0.118)	Loss 2.1116 (2.1888)	Entropy 0.71271 (0.71297)	Top-1 acc 73.047 (71.787)	Top-5 acc 89.844 (88.709)	lr 0.00094
Train [106][350/3239]	Time 0.237 (0.746)	Data Time 0.001 (0.115)	Loss 2.0290 (2.1885)	Entropy 0.71277 (0.71297)	Top-1 acc 76.172 (71.837)	Top-5 acc 92.969 (88.705)	lr 0.00094
Train [106][360/3239]	Time 0.247 (0.739)	Data Time 0.002 (0.111)	Loss 2.2750 (2.1882)	Entropy 0.71274 (0.71296)	Top-1 acc 71.094 (71.852)	Top-5 acc 85.547 (88.713)	lr 0.00094
Train [106][370/3239]	Time 0.233 (0.732)	Data Time 0.001 (0.109)	Loss 2.3477 (2.1880)	Entropy 0.71270 (0.71296)	Top-1 acc 67.969 (71.873)	Top-5 acc 87.500 (88.738)	lr 0.00094
Train [106][380/3239]	Time 0.215 (0.725)	Data Time 0.001 (0.106)	Loss 2.2813 (2.1883)	Entropy 0.71270 (0.71295)	Top-1 acc 73.438 (71.883)	Top-5 acc 87.500 (88.740)	lr 0.00094
Train [106][390/3239]	Time 0.235 (0.719)	Data Time 0.001 (0.103)	Loss 2.1420 (2.1877)	Entropy 0.71264 (0.71294)	Top-1 acc 74.609 (71.897)	Top-5 acc 88.672 (88.752)	lr 0.00094
Train [106][400/3239]	Time 0.287 (0.713)	Data Time 0.001 (0.100)	Loss 2.1748 (2.1871)	Entropy 0.71262 (0.71293)	Top-1 acc 69.922 (71.877)	Top-5 acc 91.016 (88.778)	lr 0.00094
Train [106][410/3239]	Time 0.227 (0.707)	Data Time 0.001 (0.098)	Loss 2.0710 (2.1868)	Entropy 0.71260 (0.71293)	Top-1 acc 78.125 (71.914)	Top-5 acc 92.188 (88.781)	lr 0.00094
Train [106][420/3239]	Time 0.226 (0.702)	Data Time 0.001 (0.096)	Loss 2.0529 (2.1856)	Entropy 0.71267 (0.71292)	Top-1 acc 75.000 (71.942)	Top-5 acc 89.844 (88.805)	lr 0.00094
Train [106][430/3239]	Time 0.303 (0.697)	Data Time 0.001 (0.094)	Loss 2.1597 (2.1845)	Entropy 0.71263 (0.71291)	Top-1 acc 70.312 (71.957)	Top-5 acc 89.062 (88.827)	lr 0.00093
Train [106][440/3239]	Time 0.213 (0.692)	Data Time 0.001 (0.092)	Loss 2.1555 (2.1836)	Entropy 0.71257 (0.71291)	Top-1 acc 73.438 (72.007)	Top-5 acc 89.844 (88.855)	lr 0.00093
Train [106][450/3239]	Time 2.589 (0.687)	Data Time 0.001 (0.090)	Loss 2.1571 (2.1840)	Entropy 0.71257 (0.71290)	Top-1 acc 73.828 (72.004)	Top-5 acc 89.453 (88.852)	lr 0.00093
Train [106][460/3239]	Time 0.247 (0.678)	Data Time 0.001 (0.088)	Loss 2.1710 (2.1845)	Entropy 0.71254 (0.71289)	Top-1 acc 73.438 (71.991)	Top-5 acc 88.281 (88.830)	lr 0.00093
Train [106][470/3239]	Time 0.229 (0.673)	Data Time 0.001 (0.086)	Loss 2.2670 (2.1845)	Entropy 0.71258 (0.71288)	Top-1 acc 71.094 (72.011)	Top-5 acc 87.891 (88.838)	lr 0.00093
Train [106][480/3239]	Time 0.220 (0.670)	Data Time 0.002 (0.084)	Loss 2.1511 (2.1842)	Entropy 0.71255 (0.71288)	Top-1 acc 73.438 (72.020)	Top-5 acc 89.062 (88.842)	lr 0.00093
Train [106][490/3239]	Time 0.232 (0.666)	Data Time 0.001 (0.082)	Loss 2.0096 (2.1839)	Entropy 0.71256 (0.71287)	Top-1 acc 75.000 (72.033)	Top-5 acc 92.578 (88.841)	lr 0.00093
Train [106][500/3239]	Time 0.244 (0.662)	Data Time 0.001 (0.081)	Loss 2.0522 (2.1825)	Entropy 0.71251 (0.71286)	Top-1 acc 75.391 (72.078)	Top-5 acc 93.750 (88.876)	lr 0.00093
Train [106][510/3239]	Time 0.228 (0.659)	Data Time 0.001 (0.079)	Loss 2.2223 (2.1839)	Entropy 0.71250 (0.71286)	Top-1 acc 72.266 (72.050)	Top-5 acc 88.281 (88.855)	lr 0.00093
Train [106][520/3239]	Time 0.242 (0.655)	Data Time 0.001 (0.078)	Loss 2.0466 (2.1834)	Entropy 0.71245 (0.71285)	Top-1 acc 75.391 (72.061)	Top-5 acc 91.797 (88.858)	lr 0.00093
Train [106][530/3239]	Time 0.237 (0.652)	Data Time 0.001 (0.076)	Loss 2.1248 (2.1827)	Entropy 0.71242 (0.71284)	Top-1 acc 75.000 (72.088)	Top-5 acc 89.453 (88.856)	lr 0.00093
Train [106][540/3239]	Time 0.236 (0.649)	Data Time 0.001 (0.075)	Loss 2.0580 (2.1824)	Entropy 0.71239 (0.71283)	Top-1 acc 76.172 (72.099)	Top-5 acc 89.844 (88.863)	lr 0.00093
Train [106][550/3239]	Time 0.256 (0.646)	Data Time 0.001 (0.074)	Loss 2.1819 (2.1819)	Entropy 0.71235 (0.71283)	Top-1 acc 71.484 (72.111)	Top-5 acc 88.281 (88.873)	lr 0.00093
Train [106][560/3239]	Time 2.660 (0.643)	Data Time 0.001 (0.072)	Loss 2.1863 (2.1818)	Entropy 0.71235 (0.71282)	Top-1 acc 71.484 (72.105)	Top-5 acc 87.891 (88.862)	lr 0.00093
Train [106][570/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.071)	Loss 2.0262 (2.1818)	Entropy 0.71238 (0.71281)	Top-1 acc 77.344 (72.114)	Top-5 acc 91.016 (88.861)	lr 0.00093
Train [106][580/3239]	Time 0.240 (0.732)	Data Time 0.002 (0.070)	Loss 2.1390 (2.1816)	Entropy 0.71234 (0.71280)	Top-1 acc 74.219 (72.120)	Top-5 acc 89.062 (88.857)	lr 0.00093
Train [106][590/3239]	Time 0.248 (0.728)	Data Time 0.002 (0.069)	Loss 2.1438 (2.1817)	Entropy 0.71236 (0.71279)	Top-1 acc 72.656 (72.127)	Top-5 acc 90.625 (88.860)	lr 0.00093
Train [106][600/3239]	Time 0.253 (0.724)	Data Time 0.002 (0.068)	Loss 2.2510 (2.1822)	Entropy 0.71235 (0.71279)	Top-1 acc 67.969 (72.123)	Top-5 acc 88.281 (88.847)	lr 0.00093
Train [106][610/3239]	Time 0.239 (0.720)	Data Time 0.001 (0.067)	Loss 2.0750 (2.1822)	Entropy 0.71220 (0.71278)	Top-1 acc 73.438 (72.120)	Top-5 acc 91.016 (88.849)	lr 0.00093
Train [106][620/3239]	Time 0.226 (0.716)	Data Time 0.001 (0.065)	Loss 2.1873 (2.1820)	Entropy 0.71216 (0.71277)	Top-1 acc 69.922 (72.115)	Top-5 acc 88.281 (88.858)	lr 0.00093
Train [106][630/3239]	Time 0.213 (0.713)	Data Time 0.001 (0.064)	Loss 2.0442 (2.1811)	Entropy 0.71190 (0.71276)	Top-1 acc 75.781 (72.123)	Top-5 acc 91.016 (88.874)	lr 0.00093
Train [106][640/3239]	Time 0.322 (0.709)	Data Time 0.001 (0.063)	Loss 2.2416 (2.1808)	Entropy 0.71183 (0.71274)	Top-1 acc 72.266 (72.141)	Top-5 acc 87.891 (88.880)	lr 0.00093
Train [106][650/3239]	Time 0.226 (0.705)	Data Time 0.001 (0.063)	Loss 2.2991 (2.1804)	Entropy 0.71170 (0.71273)	Top-1 acc 70.312 (72.150)	Top-5 acc 87.891 (88.887)	lr 0.00093
Train [106][660/3239]	Time 0.234 (0.702)	Data Time 0.001 (0.062)	Loss 2.1792 (2.1799)	Entropy 0.71158 (0.71271)	Top-1 acc 72.656 (72.164)	Top-5 acc 89.844 (88.900)	lr 0.00093
Train [106][670/3239]	Time 2.526 (0.699)	Data Time 0.001 (0.061)	Loss 2.2010 (2.1800)	Entropy 0.71158 (0.71270)	Top-1 acc 71.484 (72.151)	Top-5 acc 89.062 (88.894)	lr 0.00093
Train [106][680/3239]	Time 0.346 (0.692)	Data Time 0.001 (0.060)	Loss 2.1083 (2.1802)	Entropy 0.71158 (0.71268)	Top-1 acc 71.484 (72.151)	Top-5 acc 90.625 (88.898)	lr 0.00093
Train [106][690/3239]	Time 0.244 (0.689)	Data Time 0.001 (0.059)	Loss 2.0975 (2.1797)	Entropy 0.71152 (0.71266)	Top-1 acc 73.047 (72.151)	Top-5 acc 89.844 (88.899)	lr 0.00093
Train [106][700/3239]	Time 0.231 (0.686)	Data Time 0.001 (0.058)	Loss 2.2065 (2.1801)	Entropy 0.71154 (0.71265)	Top-1 acc 72.266 (72.134)	Top-5 acc 86.719 (88.891)	lr 0.00092
Train [106][710/3239]	Time 0.229 (0.683)	Data Time 0.001 (0.057)	Loss 2.1477 (2.1800)	Entropy 0.71154 (0.71263)	Top-1 acc 75.000 (72.144)	Top-5 acc 87.109 (88.891)	lr 0.00092
Train [106][720/3239]	Time 0.324 (0.680)	Data Time 0.001 (0.057)	Loss 2.0822 (2.1796)	Entropy 0.71152 (0.71262)	Top-1 acc 74.609 (72.142)	Top-5 acc 88.281 (88.894)	lr 0.00092
Train [106][730/3239]	Time 0.220 (0.678)	Data Time 0.001 (0.056)	Loss 2.3497 (2.1798)	Entropy 0.71142 (0.71260)	Top-1 acc 67.969 (72.131)	Top-5 acc 86.328 (88.890)	lr 0.00092
Train [106][740/3239]	Time 0.255 (0.675)	Data Time 0.001 (0.055)	Loss 2.0854 (2.1792)	Entropy 0.71135 (0.71258)	Top-1 acc 74.609 (72.150)	Top-5 acc 90.625 (88.902)	lr 0.00092
Train [106][750/3239]	Time 0.226 (0.672)	Data Time 0.001 (0.054)	Loss 2.1775 (2.1792)	Entropy 0.71130 (0.71257)	Top-1 acc 71.484 (72.148)	Top-5 acc 90.234 (88.907)	lr 0.00092
Train [106][760/3239]	Time 0.256 (0.670)	Data Time 0.001 (0.054)	Loss 2.1768 (2.1798)	Entropy 0.71129 (0.71255)	Top-1 acc 71.094 (72.120)	Top-5 acc 90.625 (88.899)	lr 0.00092
Train [106][770/3239]	Time 0.225 (0.667)	Data Time 0.001 (0.053)	Loss 2.1974 (2.1802)	Entropy 0.71115 (0.71253)	Top-1 acc 69.922 (72.114)	Top-5 acc 90.234 (88.894)	lr 0.00092
Train [106][780/3239]	Time 2.560 (0.665)	Data Time 0.001 (0.052)	Loss 2.1464 (2.1804)	Entropy 0.71115 (0.71252)	Top-1 acc 70.703 (72.106)	Top-5 acc 93.750 (88.892)	lr 0.00092
Train [106][790/3239]	Time 0.237 (0.659)	Data Time 0.001 (0.052)	Loss 2.1351 (2.1802)	Entropy 0.71111 (0.71250)	Top-1 acc 73.828 (72.123)	Top-5 acc 89.453 (88.884)	lr 0.00092
Train [106][800/3239]	Time 0.241 (0.657)	Data Time 0.001 (0.051)	Loss 2.3764 (2.1807)	Entropy 0.71111 (0.71248)	Top-1 acc 66.016 (72.115)	Top-5 acc 86.328 (88.873)	lr 0.00092
Train [106][810/3239]	Time 0.229 (0.655)	Data Time 0.001 (0.051)	Loss 2.1876 (2.1808)	Entropy 0.71092 (0.71246)	Top-1 acc 73.438 (72.115)	Top-5 acc 87.500 (88.866)	lr 0.00092
Train [106][820/3239]	Time 0.218 (0.653)	Data Time 0.001 (0.050)	Loss 2.1565 (2.1811)	Entropy 0.71086 (0.71244)	Top-1 acc 71.484 (72.108)	Top-5 acc 89.453 (88.863)	lr 0.00092
Train [106][830/3239]	Time 0.244 (0.651)	Data Time 0.002 (0.049)	Loss 2.1544 (2.1816)	Entropy 0.71081 (0.71242)	Top-1 acc 74.219 (72.091)	Top-5 acc 88.281 (88.852)	lr 0.00092
Train [106][840/3239]	Time 0.238 (0.649)	Data Time 0.001 (0.049)	Loss 2.1203 (2.1815)	Entropy 0.71081 (0.71241)	Top-1 acc 75.781 (72.089)	Top-5 acc 89.062 (88.855)	lr 0.00092
Train [106][850/3239]	Time 0.232 (0.647)	Data Time 0.001 (0.048)	Loss 2.1046 (2.1813)	Entropy 0.71073 (0.71239)	Top-1 acc 74.609 (72.087)	Top-5 acc 89.062 (88.859)	lr 0.00092
Train [106][860/3239]	Time 0.219 (0.645)	Data Time 0.001 (0.048)	Loss 2.0324 (2.1810)	Entropy 0.71070 (0.71237)	Top-1 acc 75.781 (72.095)	Top-5 acc 92.188 (88.862)	lr 0.00092
Train [106][870/3239]	Time 0.238 (0.643)	Data Time 0.001 (0.047)	Loss 2.1692 (2.1814)	Entropy 0.71064 (0.71235)	Top-1 acc 71.484 (72.090)	Top-5 acc 91.016 (88.857)	lr 0.00092
Train [106][880/3239]	Time 0.209 (0.641)	Data Time 0.001 (0.047)	Loss 2.2037 (2.1817)	Entropy 0.71059 (0.71233)	Top-1 acc 71.484 (72.087)	Top-5 acc 88.672 (88.850)	lr 0.00092
Train [106][890/3239]	Time 2.571 (0.639)	Data Time 0.001 (0.046)	Loss 2.1847 (2.1811)	Entropy 0.71059 (0.71231)	Top-1 acc 72.656 (72.097)	Top-5 acc 88.281 (88.859)	lr 0.00092
Train [106][900/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.046)	Loss 2.1263 (2.1809)	Entropy 0.71068 (0.71229)	Top-1 acc 72.656 (72.113)	Top-5 acc 89.844 (88.860)	lr 0.00092
Train [106][910/3239]	Time 0.231 (0.633)	Data Time 0.001 (0.045)	Loss 2.1821 (2.1809)	Entropy 0.71056 (0.71227)	Top-1 acc 72.266 (72.112)	Top-5 acc 91.016 (88.861)	lr 0.00092
Train [106][920/3239]	Time 0.236 (0.632)	Data Time 0.001 (0.045)	Loss 2.2121 (2.1808)	Entropy 0.71054 (0.71225)	Top-1 acc 72.656 (72.120)	Top-5 acc 87.891 (88.860)	lr 0.00092
Train [106][930/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.044)	Loss 2.2262 (2.1801)	Entropy 0.71060 (0.71223)	Top-1 acc 70.703 (72.136)	Top-5 acc 89.062 (88.869)	lr 0.00092
Train [106][940/3239]	Time 0.284 (0.685)	Data Time 0.005 (0.044)	Loss 2.1302 (2.1801)	Entropy 0.71065 (0.71222)	Top-1 acc 72.656 (72.143)	Top-5 acc 88.672 (88.869)	lr 0.00092
Train [106][950/3239]	Time 0.231 (0.683)	Data Time 0.002 (0.043)	Loss 2.1055 (2.1799)	Entropy 0.71060 (0.71220)	Top-1 acc 72.656 (72.144)	Top-5 acc 90.625 (88.873)	lr 0.00092
Train [106][960/3239]	Time 0.233 (0.681)	Data Time 0.001 (0.043)	Loss 2.0893 (2.1793)	Entropy 0.71063 (0.71218)	Top-1 acc 74.609 (72.167)	Top-5 acc 91.016 (88.878)	lr 0.00091
Train [106][970/3239]	Time 0.241 (0.678)	Data Time 0.001 (0.042)	Loss 2.2569 (2.1789)	Entropy 0.71056 (0.71217)	Top-1 acc 67.578 (72.176)	Top-5 acc 88.281 (88.888)	lr 0.00091
Train [106][980/3239]	Time 0.228 (0.677)	Data Time 0.001 (0.042)	Loss 2.2877 (2.1792)	Entropy 0.71052 (0.71215)	Top-1 acc 71.484 (72.168)	Top-5 acc 88.281 (88.878)	lr 0.00091
Train [106][990/3239]	Time 0.228 (0.675)	Data Time 0.001 (0.042)	Loss 2.1230 (2.1791)	Entropy 0.71046 (0.71214)	Top-1 acc 74.219 (72.177)	Top-5 acc 90.625 (88.882)	lr 0.00091
Train [106][1000/3239]	Time 2.592 (0.673)	Data Time 0.001 (0.041)	Loss 2.2327 (2.1786)	Entropy 0.71046 (0.71212)	Top-1 acc 70.312 (72.192)	Top-5 acc 86.719 (88.890)	lr 0.00091
Train [106][1010/3239]	Time 0.288 (0.668)	Data Time 0.001 (0.041)	Loss 2.1362 (2.1785)	Entropy 0.71041 (0.71210)	Top-1 acc 73.438 (72.198)	Top-5 acc 88.281 (88.889)	lr 0.00091
Train [106][1020/3239]	Time 0.322 (0.667)	Data Time 0.001 (0.040)	Loss 2.4724 (2.1789)	Entropy 0.71044 (0.71209)	Top-1 acc 64.453 (72.188)	Top-5 acc 81.250 (88.886)	lr 0.00091
Train [106][1030/3239]	Time 0.221 (0.665)	Data Time 0.001 (0.040)	Loss 2.2548 (2.1791)	Entropy 0.71047 (0.71207)	Top-1 acc 71.875 (72.182)	Top-5 acc 88.281 (88.877)	lr 0.00091
Train [106][1040/3239]	Time 0.263 (0.663)	Data Time 0.001 (0.040)	Loss 2.3035 (2.1795)	Entropy 0.71043 (0.71205)	Top-1 acc 70.703 (72.174)	Top-5 acc 85.938 (88.871)	lr 0.00091
Train [106][1050/3239]	Time 0.228 (0.661)	Data Time 0.001 (0.039)	Loss 2.1561 (2.1794)	Entropy 0.71030 (0.71204)	Top-1 acc 70.703 (72.179)	Top-5 acc 88.281 (88.871)	lr 0.00091
Train [106][1060/3239]	Time 0.315 (0.659)	Data Time 0.001 (0.039)	Loss 2.1063 (2.1795)	Entropy 0.71038 (0.71202)	Top-1 acc 73.828 (72.179)	Top-5 acc 89.844 (88.872)	lr 0.00091
Train [106][1070/3239]	Time 0.244 (0.658)	Data Time 0.001 (0.039)	Loss 2.3830 (2.1797)	Entropy 0.71044 (0.71201)	Top-1 acc 67.188 (72.174)	Top-5 acc 85.156 (88.868)	lr 0.00091
Train [106][1080/3239]	Time 0.221 (0.656)	Data Time 0.001 (0.038)	Loss 2.1147 (2.1799)	Entropy 0.71052 (0.71199)	Top-1 acc 71.875 (72.160)	Top-5 acc 90.625 (88.867)	lr 0.00091
Train [106][1090/3239]	Time 0.231 (0.654)	Data Time 0.001 (0.038)	Loss 2.1465 (2.1802)	Entropy 0.71044 (0.71198)	Top-1 acc 72.656 (72.152)	Top-5 acc 89.844 (88.862)	lr 0.00091
Train [106][1100/3239]	Time 0.222 (0.653)	Data Time 0.001 (0.038)	Loss 2.0988 (2.1801)	Entropy 0.71045 (0.71197)	Top-1 acc 75.000 (72.159)	Top-5 acc 90.234 (88.856)	lr 0.00091
Train [106][1110/3239]	Time 2.511 (0.651)	Data Time 0.001 (0.037)	Loss 2.1598 (2.1797)	Entropy 0.71045 (0.71195)	Top-1 acc 75.781 (72.168)	Top-5 acc 89.453 (88.864)	lr 0.00091
Train [106][1120/3239]	Time 0.283 (0.648)	Data Time 0.001 (0.037)	Loss 2.0675 (2.1800)	Entropy 0.71043 (0.71194)	Top-1 acc 74.219 (72.161)	Top-5 acc 90.625 (88.860)	lr 0.00091
Train [106][1130/3239]	Time 0.235 (0.646)	Data Time 0.001 (0.037)	Loss 2.2484 (2.1804)	Entropy 0.71039 (0.71192)	Top-1 acc 71.875 (72.155)	Top-5 acc 88.281 (88.853)	lr 0.00091
Train [106][1140/3239]	Time 0.244 (0.645)	Data Time 0.002 (0.036)	Loss 2.1146 (2.1807)	Entropy 0.71036 (0.71191)	Top-1 acc 74.609 (72.150)	Top-5 acc 87.891 (88.846)	lr 0.00091
Train [106][1150/3239]	Time 0.218 (0.643)	Data Time 0.001 (0.036)	Loss 2.2067 (2.1807)	Entropy 0.71032 (0.71190)	Top-1 acc 70.703 (72.147)	Top-5 acc 88.281 (88.849)	lr 0.00091
Train [106][1160/3239]	Time 0.242 (0.642)	Data Time 0.001 (0.036)	Loss 2.1160 (2.1808)	Entropy 0.71033 (0.71188)	Top-1 acc 73.438 (72.148)	Top-5 acc 90.234 (88.842)	lr 0.00091
Train [106][1170/3239]	Time 0.233 (0.640)	Data Time 0.001 (0.035)	Loss 2.1538 (2.1807)	Entropy 0.71032 (0.71187)	Top-1 acc 72.266 (72.146)	Top-5 acc 88.281 (88.843)	lr 0.00091
Train [106][1180/3239]	Time 0.215 (0.639)	Data Time 0.001 (0.035)	Loss 2.1603 (2.1806)	Entropy 0.71027 (0.71186)	Top-1 acc 75.000 (72.154)	Top-5 acc 88.672 (88.842)	lr 0.00091
Train [106][1190/3239]	Time 0.221 (0.637)	Data Time 0.001 (0.035)	Loss 2.1425 (2.1805)	Entropy 0.71018 (0.71184)	Top-1 acc 73.828 (72.159)	Top-5 acc 89.453 (88.850)	lr 0.00091
Train [106][1200/3239]	Time 0.213 (0.636)	Data Time 0.001 (0.035)	Loss 2.2579 (2.1802)	Entropy 0.71006 (0.71183)	Top-1 acc 69.922 (72.171)	Top-5 acc 85.547 (88.852)	lr 0.00091
Train [106][1210/3239]	Time 0.223 (0.635)	Data Time 0.001 (0.034)	Loss 2.4451 (2.1806)	Entropy 0.71006 (0.71181)	Top-1 acc 67.188 (72.157)	Top-5 acc 81.641 (88.848)	lr 0.00091
Train [106][1220/3239]	Time 2.516 (0.634)	Data Time 0.001 (0.034)	Loss 2.1956 (2.1806)	Entropy 0.71006 (0.71180)	Top-1 acc 73.828 (72.160)	Top-5 acc 89.062 (88.850)	lr 0.00090
Train [106][1230/3239]	Time 0.373 (0.631)	Data Time 0.001 (0.034)	Loss 2.2809 (2.1809)	Entropy 0.71004 (0.71179)	Top-1 acc 70.312 (72.148)	Top-5 acc 87.500 (88.847)	lr 0.00090
Train [106][1240/3239]	Time 0.269 (0.629)	Data Time 0.001 (0.034)	Loss 2.2439 (2.1808)	Entropy 0.71017 (0.71177)	Top-1 acc 71.094 (72.153)	Top-5 acc 87.500 (88.847)	lr 0.00090
Train [106][1250/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.033)	Loss 2.1499 (2.1810)	Entropy 0.71017 (0.71176)	Top-1 acc 71.094 (72.142)	Top-5 acc 88.672 (88.843)	lr 0.00090
Train [106][1260/3239]	Time 0.237 (0.627)	Data Time 0.002 (0.033)	Loss 2.2936 (2.1812)	Entropy 0.71007 (0.71175)	Top-1 acc 69.141 (72.139)	Top-5 acc 87.109 (88.843)	lr 0.00090
Train [106][1270/3239]	Time 0.307 (0.626)	Data Time 0.001 (0.033)	Loss 2.0943 (2.1816)	Entropy 0.71009 (0.71173)	Top-1 acc 73.828 (72.120)	Top-5 acc 91.016 (88.841)	lr 0.00090
Train [106][1280/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.033)	Loss 2.0436 (2.1815)	Entropy 0.71004 (0.71172)	Top-1 acc 76.172 (72.128)	Top-5 acc 91.406 (88.842)	lr 0.00090
Train [106][1290/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.032)	Loss 2.0914 (2.1814)	Entropy 0.71008 (0.71171)	Top-1 acc 72.656 (72.132)	Top-5 acc 91.016 (88.845)	lr 0.00090
Train [106][1300/3239]	Time 0.347 (0.664)	Data Time 0.003 (0.032)	Loss 2.2333 (2.1815)	Entropy 0.71007 (0.71170)	Top-1 acc 71.875 (72.131)	Top-5 acc 87.500 (88.842)	lr 0.00090
Train [106][1310/3239]	Time 0.322 (0.663)	Data Time 0.002 (0.032)	Loss 2.2266 (2.1821)	Entropy 0.71012 (0.71168)	Top-1 acc 72.266 (72.119)	Top-5 acc 88.672 (88.835)	lr 0.00090
Train [106][1320/3239]	Time 0.302 (0.661)	Data Time 0.002 (0.032)	Loss 2.2779 (2.1821)	Entropy 0.71005 (0.71167)	Top-1 acc 68.750 (72.113)	Top-5 acc 88.281 (88.835)	lr 0.00090
Train [106][1330/3239]	Time 2.625 (0.660)	Data Time 0.002 (0.031)	Loss 2.1337 (2.1822)	Entropy 0.71005 (0.71166)	Top-1 acc 73.047 (72.110)	Top-5 acc 91.406 (88.831)	lr 0.00090
Train [106][1340/3239]	Time 0.234 (0.657)	Data Time 0.001 (0.031)	Loss 2.1238 (2.1822)	Entropy 0.70996 (0.71165)	Top-1 acc 73.438 (72.111)	Top-5 acc 88.672 (88.830)	lr 0.00090
Train [106][1350/3239]	Time 0.234 (0.656)	Data Time 0.001 (0.031)	Loss 2.2262 (2.1823)	Entropy 0.70997 (0.71163)	Top-1 acc 70.312 (72.112)	Top-5 acc 86.719 (88.824)	lr 0.00090
Train [106][1360/3239]	Time 0.268 (0.655)	Data Time 0.001 (0.031)	Loss 2.1269 (2.1823)	Entropy 0.70995 (0.71162)	Top-1 acc 72.266 (72.106)	Top-5 acc 89.453 (88.823)	lr 0.00090
Train [106][1370/3239]	Time 0.232 (0.653)	Data Time 0.001 (0.031)	Loss 2.2115 (2.1825)	Entropy 0.70991 (0.71161)	Top-1 acc 72.266 (72.102)	Top-5 acc 88.672 (88.826)	lr 0.00090
Train [106][1380/3239]	Time 0.225 (0.652)	Data Time 0.001 (0.030)	Loss 2.1221 (2.1824)	Entropy 0.70986 (0.71160)	Top-1 acc 72.656 (72.100)	Top-5 acc 92.188 (88.833)	lr 0.00090
Train [106][1390/3239]	Time 0.237 (0.651)	Data Time 0.001 (0.030)	Loss 2.2702 (2.1825)	Entropy 0.70992 (0.71158)	Top-1 acc 71.094 (72.098)	Top-5 acc 87.891 (88.834)	lr 0.00090
Train [106][1400/3239]	Time 0.235 (0.650)	Data Time 0.001 (0.030)	Loss 2.1654 (2.1832)	Entropy 0.70998 (0.71157)	Top-1 acc 73.047 (72.086)	Top-5 acc 89.844 (88.822)	lr 0.00090
Train [106][1410/3239]	Time 0.224 (0.649)	Data Time 0.001 (0.030)	Loss 2.0898 (2.1835)	Entropy 0.70986 (0.71156)	Top-1 acc 76.172 (72.086)	Top-5 acc 88.672 (88.813)	lr 0.00090
Train [106][1420/3239]	Time 0.226 (0.647)	Data Time 0.001 (0.030)	Loss 2.1595 (2.1835)	Entropy 0.70980 (0.71155)	Top-1 acc 74.609 (72.077)	Top-5 acc 88.672 (88.811)	lr 0.00090
Train [106][1430/3239]	Time 0.254 (0.646)	Data Time 0.001 (0.029)	Loss 2.1509 (2.1835)	Entropy 0.70971 (0.71154)	Top-1 acc 73.047 (72.075)	Top-5 acc 88.281 (88.814)	lr 0.00090
Train [106][1440/3239]	Time 2.740 (0.645)	Data Time 0.001 (0.029)	Loss 2.2716 (2.1836)	Entropy 0.70971 (0.71152)	Top-1 acc 69.922 (72.073)	Top-5 acc 88.672 (88.813)	lr 0.00090
Train [106][1450/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.029)	Loss 2.2939 (2.1837)	Entropy 0.70967 (0.71151)	Top-1 acc 70.703 (72.076)	Top-5 acc 86.328 (88.815)	lr 0.00090
Train [106][1460/3239]	Time 0.224 (0.641)	Data Time 0.001 (0.029)	Loss 2.1209 (2.1838)	Entropy 0.70972 (0.71150)	Top-1 acc 71.875 (72.075)	Top-5 acc 90.234 (88.811)	lr 0.00090
Train [106][1470/3239]	Time 0.238 (0.640)	Data Time 0.001 (0.029)	Loss 2.2376 (2.1836)	Entropy 0.70968 (0.71149)	Top-1 acc 69.531 (72.080)	Top-5 acc 88.281 (88.814)	lr 0.00090
Train [106][1480/3239]	Time 0.327 (0.639)	Data Time 0.001 (0.028)	Loss 2.1663 (2.1836)	Entropy 0.70961 (0.71147)	Top-1 acc 72.266 (72.081)	Top-5 acc 88.281 (88.810)	lr 0.00090
Train [106][1490/3239]	Time 0.247 (0.638)	Data Time 0.002 (0.028)	Loss 2.1881 (2.1837)	Entropy 0.70948 (0.71146)	Top-1 acc 74.219 (72.078)	Top-5 acc 87.891 (88.808)	lr 0.00089
Train [106][1500/3239]	Time 0.243 (0.637)	Data Time 0.001 (0.028)	Loss 2.2445 (2.1839)	Entropy 0.70944 (0.71145)	Top-1 acc 68.750 (72.074)	Top-5 acc 87.500 (88.808)	lr 0.00089
Train [106][1510/3239]	Time 0.256 (0.636)	Data Time 0.001 (0.028)	Loss 2.2628 (2.1838)	Entropy 0.70942 (0.71143)	Top-1 acc 71.094 (72.082)	Top-5 acc 88.672 (88.813)	lr 0.00089
Train [106][1520/3239]	Time 0.256 (0.635)	Data Time 0.001 (0.028)	Loss 2.1712 (2.1838)	Entropy 0.70943 (0.71142)	Top-1 acc 71.094 (72.081)	Top-5 acc 89.062 (88.814)	lr 0.00089
Train [106][1530/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.027)	Loss 2.2252 (2.1841)	Entropy 0.70941 (0.71141)	Top-1 acc 72.266 (72.074)	Top-5 acc 88.672 (88.809)	lr 0.00089
Train [106][1540/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.027)	Loss 2.2295 (2.1841)	Entropy 0.70948 (0.71140)	Top-1 acc 72.266 (72.074)	Top-5 acc 88.672 (88.812)	lr 0.00089
Train [106][1550/3239]	Time 2.558 (0.632)	Data Time 0.001 (0.027)	Loss 2.0980 (2.1841)	Entropy 0.70948 (0.71138)	Top-1 acc 75.781 (72.080)	Top-5 acc 89.844 (88.812)	lr 0.00089
Train [106][1560/3239]	Time 0.297 (0.630)	Data Time 0.001 (0.027)	Loss 2.1428 (2.1840)	Entropy 0.70995 (0.71137)	Top-1 acc 71.875 (72.082)	Top-5 acc 88.672 (88.811)	lr 0.00089
Train [106][1570/3239]	Time 0.225 (0.629)	Data Time 0.001 (0.027)	Loss 2.1501 (2.1841)	Entropy 0.70995 (0.71136)	Top-1 acc 74.219 (72.086)	Top-5 acc 89.062 (88.806)	lr 0.00089
Train [106][1580/3239]	Time 0.243 (0.628)	Data Time 0.001 (0.027)	Loss 2.1421 (2.1841)	Entropy 0.70990 (0.71136)	Top-1 acc 71.094 (72.086)	Top-5 acc 89.844 (88.807)	lr 0.00089
Train [106][1590/3239]	Time 0.239 (0.627)	Data Time 0.001 (0.027)	Loss 2.2429 (2.1842)	Entropy 0.70987 (0.71135)	Top-1 acc 72.266 (72.082)	Top-5 acc 88.672 (88.806)	lr 0.00089
Train [106][1600/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.026)	Loss 2.3064 (2.1846)	Entropy 0.70996 (0.71134)	Top-1 acc 64.453 (72.065)	Top-5 acc 87.109 (88.797)	lr 0.00089
Train [106][1610/3239]	Time 0.241 (0.625)	Data Time 0.001 (0.026)	Loss 2.1762 (2.1845)	Entropy 0.70995 (0.71133)	Top-1 acc 74.219 (72.063)	Top-5 acc 90.625 (88.803)	lr 0.00089
Train [106][1620/3239]	Time 0.259 (0.624)	Data Time 0.002 (0.026)	Loss 2.3552 (2.1845)	Entropy 0.70994 (0.71132)	Top-1 acc 71.094 (72.064)	Top-5 acc 86.719 (88.805)	lr 0.00089
Train [106][1630/3239]	Time 0.223 (0.624)	Data Time 0.001 (0.026)	Loss 2.1933 (2.1847)	Entropy 0.70995 (0.71131)	Top-1 acc 71.094 (72.065)	Top-5 acc 87.500 (88.801)	lr 0.00089
Train [106][1640/3239]	Time 0.250 (0.623)	Data Time 0.003 (0.026)	Loss 2.1107 (2.1847)	Entropy 0.70992 (0.71130)	Top-1 acc 73.438 (72.059)	Top-5 acc 90.234 (88.798)	lr 0.00089
Train [106][1650/3239]	Time 0.253 (0.622)	Data Time 0.001 (0.026)	Loss 2.1881 (2.1846)	Entropy 0.70992 (0.71130)	Top-1 acc 75.000 (72.059)	Top-5 acc 88.281 (88.802)	lr 0.00089
Train [106][1660/3239]	Time 55.469 (0.653)	Data Time 0.001 (0.025)	Loss 2.2521 (2.1847)	Entropy 0.70992 (0.71129)	Top-1 acc 73.047 (72.062)	Top-5 acc 89.062 (88.799)	lr 0.00089
Train [106][1670/3239]	Time 0.272 (0.651)	Data Time 0.002 (0.025)	Loss 2.0814 (2.1845)	Entropy 0.70982 (0.71128)	Top-1 acc 74.609 (72.065)	Top-5 acc 90.625 (88.801)	lr 0.00089
Train [106][1680/3239]	Time 0.242 (0.650)	Data Time 0.002 (0.025)	Loss 2.3057 (2.1843)	Entropy 0.70974 (0.71127)	Top-1 acc 67.969 (72.071)	Top-5 acc 85.938 (88.802)	lr 0.00089
Train [106][1690/3239]	Time 0.327 (0.649)	Data Time 0.001 (0.025)	Loss 2.2300 (2.1844)	Entropy 0.70974 (0.71126)	Top-1 acc 72.266 (72.068)	Top-5 acc 88.672 (88.804)	lr 0.00089
Train [106][1700/3239]	Time 0.237 (0.648)	Data Time 0.001 (0.025)	Loss 2.1972 (2.1842)	Entropy 0.70971 (0.71125)	Top-1 acc 72.266 (72.071)	Top-5 acc 88.281 (88.807)	lr 0.00089
Train [106][1710/3239]	Time 0.249 (0.647)	Data Time 0.002 (0.025)	Loss 2.2698 (2.1842)	Entropy 0.70966 (0.71124)	Top-1 acc 70.703 (72.076)	Top-5 acc 87.500 (88.804)	lr 0.00089
Train [106][1720/3239]	Time 0.236 (0.646)	Data Time 0.001 (0.025)	Loss 2.1606 (2.1842)	Entropy 0.70964 (0.71123)	Top-1 acc 76.172 (72.079)	Top-5 acc 89.062 (88.802)	lr 0.00089
Train [106][1730/3239]	Time 0.363 (0.645)	Data Time 0.001 (0.025)	Loss 2.2677 (2.1841)	Entropy 0.70958 (0.71122)	Top-1 acc 66.406 (72.079)	Top-5 acc 89.844 (88.804)	lr 0.00089
Train [106][1740/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.024)	Loss 2.1715 (2.1841)	Entropy 0.70952 (0.71121)	Top-1 acc 75.000 (72.076)	Top-5 acc 87.500 (88.805)	lr 0.00089
Train [106][1750/3239]	Time 0.222 (0.643)	Data Time 0.001 (0.024)	Loss 2.0472 (2.1841)	Entropy 0.70949 (0.71120)	Top-1 acc 74.609 (72.078)	Top-5 acc 90.625 (88.807)	lr 0.00089
Train [106][1760/3239]	Time 0.237 (0.642)	Data Time 0.001 (0.024)	Loss 2.2513 (2.1841)	Entropy 0.70948 (0.71119)	Top-1 acc 69.922 (72.077)	Top-5 acc 88.672 (88.812)	lr 0.00088
Train [106][1770/3239]	Time 2.691 (0.641)	Data Time 0.001 (0.024)	Loss 2.0964 (2.1840)	Entropy 0.70948 (0.71118)	Top-1 acc 73.828 (72.082)	Top-5 acc 89.844 (88.812)	lr 0.00088
Train [106][1780/3239]	Time 0.300 (0.639)	Data Time 0.001 (0.024)	Loss 2.0209 (2.1837)	Entropy 0.70943 (0.71117)	Top-1 acc 76.172 (72.090)	Top-5 acc 92.188 (88.817)	lr 0.00088
Train [106][1790/3239]	Time 0.253 (0.638)	Data Time 0.001 (0.024)	Loss 2.1869 (2.1839)	Entropy 0.70939 (0.71116)	Top-1 acc 71.484 (72.083)	Top-5 acc 85.547 (88.810)	lr 0.00088
Train [106][1800/3239]	Time 0.229 (0.638)	Data Time 0.001 (0.024)	Loss 2.3394 (2.1838)	Entropy 0.70944 (0.71115)	Top-1 acc 67.578 (72.089)	Top-5 acc 87.109 (88.812)	lr 0.00088
Train [106][1810/3239]	Time 0.236 (0.637)	Data Time 0.001 (0.023)	Loss 2.2542 (2.1839)	Entropy 0.70933 (0.71114)	Top-1 acc 73.047 (72.087)	Top-5 acc 87.500 (88.810)	lr 0.00088
Train [106][1820/3239]	Time 0.240 (0.636)	Data Time 0.001 (0.023)	Loss 2.1410 (2.1838)	Entropy 0.70922 (0.71113)	Top-1 acc 73.047 (72.091)	Top-5 acc 90.625 (88.816)	lr 0.00088
Train [106][1830/3239]	Time 0.219 (0.635)	Data Time 0.001 (0.023)	Loss 2.1708 (2.1837)	Entropy 0.70917 (0.71112)	Top-1 acc 74.219 (72.094)	Top-5 acc 89.844 (88.818)	lr 0.00088
Train [106][1840/3239]	Time 0.242 (0.634)	Data Time 0.001 (0.023)	Loss 2.1890 (2.1836)	Entropy 0.70918 (0.71111)	Top-1 acc 71.094 (72.096)	Top-5 acc 88.672 (88.820)	lr 0.00088
Train [106][1850/3239]	Time 0.237 (0.634)	Data Time 0.001 (0.023)	Loss 2.1810 (2.1837)	Entropy 0.70912 (0.71110)	Top-1 acc 73.828 (72.089)	Top-5 acc 87.500 (88.815)	lr 0.00088
Train [106][1860/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.023)	Loss 2.1102 (2.1835)	Entropy 0.70914 (0.71109)	Top-1 acc 73.047 (72.094)	Top-5 acc 91.016 (88.820)	lr 0.00088
Train [106][1870/3239]	Time 0.271 (0.632)	Data Time 0.001 (0.023)	Loss 2.2489 (2.1837)	Entropy 0.70910 (0.71108)	Top-1 acc 71.094 (72.089)	Top-5 acc 89.062 (88.816)	lr 0.00088
Train [106][1880/3239]	Time 2.511 (0.631)	Data Time 0.001 (0.023)	Loss 2.0168 (2.1836)	Entropy 0.70910 (0.71107)	Top-1 acc 77.344 (72.100)	Top-5 acc 91.016 (88.813)	lr 0.00088
Train [106][1890/3239]	Time 0.260 (0.629)	Data Time 0.002 (0.023)	Loss 1.8787 (2.1837)	Entropy 0.70914 (0.71106)	Top-1 acc 77.734 (72.093)	Top-5 acc 93.750 (88.812)	lr 0.00088
Train [106][1900/3239]	Time 0.335 (0.628)	Data Time 0.001 (0.022)	Loss 2.1708 (2.1836)	Entropy 0.70910 (0.71105)	Top-1 acc 67.969 (72.094)	Top-5 acc 90.625 (88.813)	lr 0.00088
Train [106][1910/3239]	Time 0.234 (0.628)	Data Time 0.001 (0.022)	Loss 2.1120 (2.1836)	Entropy 0.70907 (0.71104)	Top-1 acc 74.609 (72.096)	Top-5 acc 88.672 (88.813)	lr 0.00088
Train [106][1920/3239]	Time 0.252 (0.627)	Data Time 0.001 (0.022)	Loss 2.3164 (2.1837)	Entropy 0.70903 (0.71103)	Top-1 acc 71.875 (72.093)	Top-5 acc 84.766 (88.806)	lr 0.00088
Train [106][1930/3239]	Time 0.246 (0.626)	Data Time 0.002 (0.022)	Loss 2.3462 (2.1835)	Entropy 0.70899 (0.71102)	Top-1 acc 67.969 (72.101)	Top-5 acc 87.500 (88.809)	lr 0.00088
Train [106][1940/3239]	Time 0.350 (0.625)	Data Time 0.002 (0.022)	Loss 2.2757 (2.1835)	Entropy 0.70894 (0.71101)	Top-1 acc 70.312 (72.101)	Top-5 acc 87.109 (88.807)	lr 0.00088
Train [106][1950/3239]	Time 0.240 (0.625)	Data Time 0.001 (0.022)	Loss 2.2391 (2.1834)	Entropy 0.70885 (0.71100)	Top-1 acc 71.875 (72.102)	Top-5 acc 86.719 (88.809)	lr 0.00088
Train [106][1960/3239]	Time 0.225 (0.624)	Data Time 0.001 (0.022)	Loss 2.2744 (2.1834)	Entropy 0.70881 (0.71099)	Top-1 acc 70.312 (72.102)	Top-5 acc 87.500 (88.809)	lr 0.00088
Train [106][1970/3239]	Time 0.233 (0.623)	Data Time 0.001 (0.022)	Loss 2.1864 (2.1837)	Entropy 0.70871 (0.71098)	Top-1 acc 73.047 (72.097)	Top-5 acc 87.109 (88.800)	lr 0.00088
Train [106][1980/3239]	Time 0.271 (0.623)	Data Time 0.007 (0.022)	Loss 2.3118 (2.1836)	Entropy 0.70868 (0.71096)	Top-1 acc 70.703 (72.102)	Top-5 acc 85.938 (88.801)	lr 0.00088
Train [106][1990/3239]	Time 2.555 (0.622)	Data Time 0.001 (0.022)	Loss 2.0916 (2.1836)	Entropy 0.70868 (0.71095)	Top-1 acc 70.703 (72.099)	Top-5 acc 92.578 (88.802)	lr 0.00088
Train [106][2000/3239]	Time 0.233 (0.620)	Data Time 0.001 (0.021)	Loss 2.2403 (2.1837)	Entropy 0.70863 (0.71094)	Top-1 acc 69.531 (72.094)	Top-5 acc 86.328 (88.802)	lr 0.00088
Train [106][2010/3239]	Time 0.244 (0.619)	Data Time 0.001 (0.021)	Loss 2.1644 (2.1834)	Entropy 0.70866 (0.71093)	Top-1 acc 71.094 (72.099)	Top-5 acc 89.844 (88.807)	lr 0.00088
Train [106][2020/3239]	Time 0.236 (0.619)	Data Time 0.001 (0.021)	Loss 2.2167 (2.1837)	Entropy 0.70854 (0.71092)	Top-1 acc 71.875 (72.090)	Top-5 acc 87.500 (88.803)	lr 0.00087
Train [106][2030/3239]	Time 0.247 (0.646)	Data Time 0.002 (0.021)	Loss 2.3972 (2.1839)	Entropy 0.70849 (0.71091)	Top-1 acc 67.578 (72.085)	Top-5 acc 83.203 (88.795)	lr 0.00087
Train [106][2040/3239]	Time 0.234 (0.645)	Data Time 0.002 (0.021)	Loss 2.1999 (2.1839)	Entropy 0.70839 (0.71089)	Top-1 acc 70.312 (72.084)	Top-5 acc 89.062 (88.796)	lr 0.00087
Train [106][2050/3239]	Time 0.234 (0.644)	Data Time 0.002 (0.021)	Loss 2.2716 (2.1839)	Entropy 0.70840 (0.71088)	Top-1 acc 69.531 (72.083)	Top-5 acc 86.719 (88.795)	lr 0.00087
Train [106][2060/3239]	Time 0.228 (0.643)	Data Time 0.001 (0.021)	Loss 2.2264 (2.1838)	Entropy 0.70833 (0.71087)	Top-1 acc 71.875 (72.083)	Top-5 acc 88.281 (88.798)	lr 0.00087
Train [106][2070/3239]	Time 0.232 (0.643)	Data Time 0.005 (0.021)	Loss 2.0939 (2.1839)	Entropy 0.70820 (0.71086)	Top-1 acc 73.828 (72.079)	Top-5 acc 92.578 (88.801)	lr 0.00087
Train [106][2080/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.021)	Loss 2.1960 (2.1840)	Entropy 0.70814 (0.71084)	Top-1 acc 71.484 (72.079)	Top-5 acc 87.109 (88.798)	lr 0.00087
Train [106][2090/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.021)	Loss 2.1663 (2.1839)	Entropy 0.70816 (0.71083)	Top-1 acc 72.266 (72.076)	Top-5 acc 89.062 (88.799)	lr 0.00087
Train [106][2100/3239]	Time 2.585 (0.640)	Data Time 0.001 (0.020)	Loss 2.2217 (2.1840)	Entropy 0.70816 (0.71082)	Top-1 acc 71.875 (72.070)	Top-5 acc 87.891 (88.798)	lr 0.00087
Train [106][2110/3239]	Time 0.365 (0.639)	Data Time 0.002 (0.020)	Loss 2.2672 (2.1838)	Entropy 0.70809 (0.71081)	Top-1 acc 68.750 (72.073)	Top-5 acc 89.844 (88.800)	lr 0.00087
Train [106][2120/3239]	Time 0.248 (0.638)	Data Time 0.001 (0.020)	Loss 2.0542 (2.1837)	Entropy 0.70801 (0.71079)	Top-1 acc 76.562 (72.077)	Top-5 acc 91.016 (88.804)	lr 0.00087
Train [106][2130/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.020)	Loss 2.4291 (2.1838)	Entropy 0.70798 (0.71078)	Top-1 acc 67.578 (72.078)	Top-5 acc 82.422 (88.802)	lr 0.00087
Train [106][2140/3239]	Time 0.237 (0.636)	Data Time 0.001 (0.020)	Loss 2.2645 (2.1839)	Entropy 0.70794 (0.71077)	Top-1 acc 68.359 (72.076)	Top-5 acc 86.719 (88.798)	lr 0.00087
Train [106][2150/3239]	Time 0.353 (0.636)	Data Time 0.002 (0.020)	Loss 2.2887 (2.1839)	Entropy 0.70789 (0.71075)	Top-1 acc 71.484 (72.076)	Top-5 acc 88.281 (88.797)	lr 0.00087
Train [106][2160/3239]	Time 0.239 (0.635)	Data Time 0.001 (0.020)	Loss 2.1656 (2.1839)	Entropy 0.70794 (0.71074)	Top-1 acc 75.000 (72.079)	Top-5 acc 86.719 (88.795)	lr 0.00087
Train [106][2170/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.020)	Loss 2.2069 (2.1838)	Entropy 0.70795 (0.71073)	Top-1 acc 74.219 (72.080)	Top-5 acc 88.281 (88.794)	lr 0.00087
Train [106][2180/3239]	Time 0.233 (0.634)	Data Time 0.001 (0.020)	Loss 2.2768 (2.1839)	Entropy 0.70786 (0.71071)	Top-1 acc 71.094 (72.080)	Top-5 acc 87.500 (88.792)	lr 0.00087
Train [106][2190/3239]	Time 0.225 (0.633)	Data Time 0.001 (0.020)	Loss 2.1733 (2.1839)	Entropy 0.70785 (0.71070)	Top-1 acc 73.438 (72.079)	Top-5 acc 88.672 (88.791)	lr 0.00087
Train [106][2200/3239]	Time 0.254 (0.632)	Data Time 0.001 (0.020)	Loss 2.3727 (2.1840)	Entropy 0.70772 (0.71069)	Top-1 acc 67.969 (72.072)	Top-5 acc 85.938 (88.793)	lr 0.00087
Train [106][2210/3239]	Time 2.458 (0.632)	Data Time 0.001 (0.020)	Loss 2.2018 (2.1839)	Entropy 0.70772 (0.71068)	Top-1 acc 69.141 (72.070)	Top-5 acc 88.672 (88.797)	lr 0.00087
Train [106][2220/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.019)	Loss 2.1051 (2.1837)	Entropy 0.70771 (0.71066)	Top-1 acc 70.312 (72.074)	Top-5 acc 93.359 (88.803)	lr 0.00087
Train [106][2230/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.019)	Loss 2.1662 (2.1837)	Entropy 0.70762 (0.71065)	Top-1 acc 75.391 (72.077)	Top-5 acc 88.281 (88.805)	lr 0.00087
Train [106][2240/3239]	Time 0.239 (0.628)	Data Time 0.001 (0.019)	Loss 2.0806 (2.1839)	Entropy 0.70762 (0.71063)	Top-1 acc 72.266 (72.074)	Top-5 acc 91.797 (88.800)	lr 0.00087
Train [106][2250/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.019)	Loss 2.0930 (2.1840)	Entropy 0.70746 (0.71062)	Top-1 acc 72.266 (72.075)	Top-5 acc 90.234 (88.803)	lr 0.00087
Train [106][2260/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.019)	Loss 2.3209 (2.1841)	Entropy 0.70743 (0.71061)	Top-1 acc 67.578 (72.072)	Top-5 acc 85.547 (88.797)	lr 0.00087
Train [106][2270/3239]	Time 0.227 (0.627)	Data Time 0.001 (0.019)	Loss 2.3291 (2.1842)	Entropy 0.70741 (0.71059)	Top-1 acc 68.359 (72.076)	Top-5 acc 87.109 (88.798)	lr 0.00087
Train [106][2280/3239]	Time 0.330 (0.626)	Data Time 0.001 (0.019)	Loss 2.2221 (2.1840)	Entropy 0.70731 (0.71058)	Top-1 acc 70.312 (72.079)	Top-5 acc 87.500 (88.801)	lr 0.00087
Train [106][2290/3239]	Time 0.218 (0.625)	Data Time 0.001 (0.019)	Loss 2.0442 (2.1842)	Entropy 0.70721 (0.71056)	Top-1 acc 77.344 (72.078)	Top-5 acc 90.234 (88.797)	lr 0.00086
Train [106][2300/3239]	Time 0.264 (0.625)	Data Time 0.001 (0.019)	Loss 2.2362 (2.1839)	Entropy 0.70721 (0.71055)	Top-1 acc 70.312 (72.088)	Top-5 acc 89.062 (88.804)	lr 0.00086
Train [106][2310/3239]	Time 0.256 (0.624)	Data Time 0.001 (0.019)	Loss 2.1952 (2.1839)	Entropy 0.70730 (0.71054)	Top-1 acc 70.703 (72.086)	Top-5 acc 87.891 (88.805)	lr 0.00086
Train [106][2320/3239]	Time 2.706 (0.623)	Data Time 0.001 (0.019)	Loss 2.0305 (2.1840)	Entropy 0.70730 (0.71052)	Top-1 acc 71.094 (72.082)	Top-5 acc 91.406 (88.802)	lr 0.00086
Train [106][2330/3239]	Time 0.241 (0.622)	Data Time 0.001 (0.019)	Loss 2.2284 (2.1839)	Entropy 0.70727 (0.71051)	Top-1 acc 71.875 (72.083)	Top-5 acc 88.672 (88.801)	lr 0.00086
Train [106][2340/3239]	Time 0.213 (0.621)	Data Time 0.001 (0.019)	Loss 2.0940 (2.1837)	Entropy 0.70721 (0.71049)	Top-1 acc 76.172 (72.089)	Top-5 acc 89.844 (88.806)	lr 0.00086
Train [106][2350/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.018)	Loss 2.1993 (2.1838)	Entropy 0.70716 (0.71048)	Top-1 acc 71.094 (72.084)	Top-5 acc 90.234 (88.804)	lr 0.00086
Train [106][2360/3239]	Time 0.285 (0.620)	Data Time 0.001 (0.018)	Loss 2.1529 (2.1839)	Entropy 0.70710 (0.71046)	Top-1 acc 71.094 (72.083)	Top-5 acc 91.016 (88.803)	lr 0.00086
Train [106][2370/3239]	Time 0.253 (0.619)	Data Time 0.002 (0.018)	Loss 2.0739 (2.1839)	Entropy 0.70709 (0.71045)	Top-1 acc 73.438 (72.083)	Top-5 acc 88.281 (88.803)	lr 0.00086
Train [106][2380/3239]	Time 0.237 (0.619)	Data Time 0.001 (0.018)	Loss 2.1580 (2.1840)	Entropy 0.70710 (0.71044)	Top-1 acc 75.000 (72.078)	Top-5 acc 88.672 (88.802)	lr 0.00086
Train [106][2390/3239]	Time 0.242 (0.641)	Data Time 0.002 (0.018)	Loss 2.1196 (2.1840)	Entropy 0.70703 (0.71042)	Top-1 acc 73.047 (72.078)	Top-5 acc 89.844 (88.799)	lr 0.00086
Train [106][2400/3239]	Time 0.240 (0.641)	Data Time 0.002 (0.018)	Loss 2.2718 (2.1839)	Entropy 0.70704 (0.71041)	Top-1 acc 70.703 (72.083)	Top-5 acc 89.062 (88.803)	lr 0.00086
Train [106][2410/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.018)	Loss 2.1017 (2.1839)	Entropy 0.70693 (0.71039)	Top-1 acc 71.875 (72.078)	Top-5 acc 94.141 (88.802)	lr 0.00086
Train [106][2420/3239]	Time 0.250 (0.639)	Data Time 0.001 (0.018)	Loss 2.1786 (2.1839)	Entropy 0.70700 (0.71038)	Top-1 acc 70.312 (72.078)	Top-5 acc 87.891 (88.801)	lr 0.00086
Train [106][2430/3239]	Time 2.718 (0.639)	Data Time 0.002 (0.018)	Loss 2.1211 (2.1837)	Entropy 0.70700 (0.71037)	Top-1 acc 73.438 (72.081)	Top-5 acc 92.578 (88.804)	lr 0.00086
Train [106][2440/3239]	Time 0.243 (0.637)	Data Time 0.002 (0.018)	Loss 2.1521 (2.1836)	Entropy 0.70702 (0.71035)	Top-1 acc 71.484 (72.084)	Top-5 acc 87.891 (88.805)	lr 0.00086
Train [106][2450/3239]	Time 0.239 (0.637)	Data Time 0.001 (0.018)	Loss 2.0854 (2.1835)	Entropy 0.70712 (0.71034)	Top-1 acc 73.047 (72.086)	Top-5 acc 91.016 (88.807)	lr 0.00086
Train [106][2460/3239]	Time 0.238 (0.636)	Data Time 0.001 (0.018)	Loss 2.0992 (2.1834)	Entropy 0.70706 (0.71033)	Top-1 acc 73.438 (72.090)	Top-5 acc 91.406 (88.813)	lr 0.00086
Train [106][2470/3239]	Time 0.244 (0.636)	Data Time 0.001 (0.018)	Loss 2.2115 (2.1833)	Entropy 0.70701 (0.71031)	Top-1 acc 70.703 (72.091)	Top-5 acc 89.062 (88.814)	lr 0.00086
Train [106][2480/3239]	Time 0.227 (0.635)	Data Time 0.001 (0.018)	Loss 2.2376 (2.1832)	Entropy 0.70700 (0.71030)	Top-1 acc 70.312 (72.092)	Top-5 acc 87.500 (88.813)	lr 0.00086
Train [106][2490/3239]	Time 0.252 (0.634)	Data Time 0.001 (0.018)	Loss 2.1007 (2.1832)	Entropy 0.70696 (0.71029)	Top-1 acc 72.656 (72.092)	Top-5 acc 90.234 (88.813)	lr 0.00086
Train [106][2500/3239]	Time 0.240 (0.634)	Data Time 0.001 (0.017)	Loss 2.2181 (2.1833)	Entropy 0.70695 (0.71027)	Top-1 acc 69.922 (72.084)	Top-5 acc 88.281 (88.809)	lr 0.00086
Train [106][2510/3239]	Time 0.229 (0.633)	Data Time 0.001 (0.017)	Loss 2.0918 (2.1833)	Entropy 0.70686 (0.71026)	Top-1 acc 73.047 (72.084)	Top-5 acc 91.406 (88.809)	lr 0.00086
Train [106][2520/3239]	Time 0.244 (0.633)	Data Time 0.002 (0.017)	Loss 1.9787 (2.1833)	Entropy 0.70683 (0.71025)	Top-1 acc 75.781 (72.086)	Top-5 acc 92.188 (88.812)	lr 0.00086
Train [106][2530/3239]	Time 0.335 (0.632)	Data Time 0.001 (0.017)	Loss 2.0611 (2.1831)	Entropy 0.70708 (0.71023)	Top-1 acc 75.000 (72.092)	Top-5 acc 90.234 (88.815)	lr 0.00086
Train [106][2540/3239]	Time 2.674 (0.632)	Data Time 0.001 (0.017)	Loss 2.1732 (2.1832)	Entropy 0.70708 (0.71022)	Top-1 acc 72.656 (72.087)	Top-5 acc 88.281 (88.811)	lr 0.00086
Train [106][2550/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.017)	Loss 2.0193 (2.1832)	Entropy 0.70703 (0.71021)	Top-1 acc 75.781 (72.089)	Top-5 acc 91.016 (88.810)	lr 0.00086
Train [106][2560/3239]	Time 0.238 (0.629)	Data Time 0.002 (0.017)	Loss 2.0860 (2.1832)	Entropy 0.70710 (0.71020)	Top-1 acc 76.953 (72.086)	Top-5 acc 87.500 (88.805)	lr 0.00086
Train [106][2570/3239]	Time 0.261 (0.629)	Data Time 0.001 (0.017)	Loss 2.1136 (2.1834)	Entropy 0.70709 (0.71018)	Top-1 acc 73.828 (72.078)	Top-5 acc 88.672 (88.802)	lr 0.00085
Train [106][2580/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.017)	Loss 2.2263 (2.1833)	Entropy 0.70709 (0.71017)	Top-1 acc 69.922 (72.079)	Top-5 acc 87.891 (88.804)	lr 0.00085
Train [106][2590/3239]	Time 0.225 (0.628)	Data Time 0.001 (0.017)	Loss 2.0943 (2.1832)	Entropy 0.70705 (0.71016)	Top-1 acc 73.828 (72.082)	Top-5 acc 91.406 (88.806)	lr 0.00085
Train [106][2600/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.017)	Loss 2.2212 (2.1831)	Entropy 0.70702 (0.71015)	Top-1 acc 70.312 (72.086)	Top-5 acc 86.328 (88.807)	lr 0.00085
Train [106][2610/3239]	Time 0.230 (0.627)	Data Time 0.001 (0.017)	Loss 2.2152 (2.1828)	Entropy 0.70706 (0.71014)	Top-1 acc 71.094 (72.096)	Top-5 acc 90.625 (88.814)	lr 0.00085
Train [106][2620/3239]	Time 0.240 (0.626)	Data Time 0.001 (0.017)	Loss 2.0341 (2.1826)	Entropy 0.70697 (0.71012)	Top-1 acc 77.344 (72.104)	Top-5 acc 90.625 (88.818)	lr 0.00085
Train [106][2630/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.017)	Loss 2.1257 (2.1824)	Entropy 0.70690 (0.71011)	Top-1 acc 73.047 (72.109)	Top-5 acc 89.453 (88.820)	lr 0.00085
Train [106][2640/3239]	Time 0.216 (0.625)	Data Time 0.001 (0.017)	Loss 2.1610 (2.1823)	Entropy 0.70691 (0.71010)	Top-1 acc 74.609 (72.114)	Top-5 acc 89.453 (88.822)	lr 0.00085
Train [106][2650/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.017)	Loss 2.1208 (2.1822)	Entropy 0.70691 (0.71009)	Top-1 acc 72.266 (72.115)	Top-5 acc 89.453 (88.822)	lr 0.00085
Train [106][2660/3239]	Time 0.237 (0.624)	Data Time 0.001 (0.017)	Loss 2.3071 (2.1824)	Entropy 0.70687 (0.71008)	Top-1 acc 71.484 (72.111)	Top-5 acc 85.938 (88.820)	lr 0.00085
Train [106][2670/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.016)	Loss 2.1554 (2.1823)	Entropy 0.70686 (0.71006)	Top-1 acc 73.828 (72.113)	Top-5 acc 87.891 (88.822)	lr 0.00085
Train [106][2680/3239]	Time 0.269 (0.623)	Data Time 0.001 (0.016)	Loss 2.2005 (2.1823)	Entropy 0.70687 (0.71005)	Top-1 acc 71.094 (72.111)	Top-5 acc 89.453 (88.819)	lr 0.00085
Train [106][2690/3239]	Time 0.236 (0.622)	Data Time 0.001 (0.016)	Loss 2.0764 (2.1824)	Entropy 0.70681 (0.71004)	Top-1 acc 75.781 (72.111)	Top-5 acc 88.672 (88.816)	lr 0.00085
Train [106][2700/3239]	Time 0.227 (0.622)	Data Time 0.001 (0.016)	Loss 2.2184 (2.1825)	Entropy 0.70688 (0.71003)	Top-1 acc 72.266 (72.111)	Top-5 acc 87.500 (88.813)	lr 0.00085
Train [106][2710/3239]	Time 0.229 (0.621)	Data Time 0.001 (0.016)	Loss 2.2584 (2.1826)	Entropy 0.70690 (0.71002)	Top-1 acc 66.797 (72.107)	Top-5 acc 88.281 (88.811)	lr 0.00085
Train [106][2720/3239]	Time 0.227 (0.621)	Data Time 0.001 (0.016)	Loss 2.0908 (2.1826)	Entropy 0.70690 (0.71000)	Top-1 acc 76.953 (72.109)	Top-5 acc 90.625 (88.811)	lr 0.00085
Train [106][2730/3239]	Time 0.237 (0.620)	Data Time 0.002 (0.016)	Loss 2.1741 (2.1825)	Entropy 0.70691 (0.70999)	Top-1 acc 71.484 (72.113)	Top-5 acc 89.062 (88.812)	lr 0.00085
Train [106][2740/3239]	Time 0.262 (0.638)	Data Time 0.004 (0.016)	Loss 2.0229 (2.1825)	Entropy 0.70681 (0.70998)	Top-1 acc 73.828 (72.112)	Top-5 acc 89.453 (88.812)	lr 0.00085
Train [106][2750/3239]	Time 0.233 (0.638)	Data Time 0.002 (0.016)	Loss 1.9733 (2.1825)	Entropy 0.70679 (0.70997)	Top-1 acc 78.125 (72.113)	Top-5 acc 89.844 (88.811)	lr 0.00085
Train [106][2760/3239]	Time 0.238 (0.637)	Data Time 0.002 (0.016)	Loss 2.2750 (2.1826)	Entropy 0.70681 (0.70996)	Top-1 acc 73.047 (72.113)	Top-5 acc 88.672 (88.810)	lr 0.00085
Train [106][2770/3239]	Time 0.244 (0.637)	Data Time 0.001 (0.016)	Loss 2.2168 (2.1826)	Entropy 0.70675 (0.70995)	Top-1 acc 73.047 (72.110)	Top-5 acc 89.453 (88.810)	lr 0.00085
Train [106][2780/3239]	Time 0.315 (0.636)	Data Time 0.001 (0.016)	Loss 2.2765 (2.1826)	Entropy 0.70675 (0.70994)	Top-1 acc 67.969 (72.111)	Top-5 acc 86.328 (88.807)	lr 0.00085
Train [106][2790/3239]	Time 0.239 (0.636)	Data Time 0.001 (0.016)	Loss 2.2277 (2.1826)	Entropy 0.70673 (0.70992)	Top-1 acc 71.484 (72.108)	Top-5 acc 87.891 (88.809)	lr 0.00085
Train [106][2800/3239]	Time 0.261 (0.635)	Data Time 0.001 (0.016)	Loss 2.1435 (2.1827)	Entropy 0.70673 (0.70991)	Top-1 acc 73.047 (72.108)	Top-5 acc 90.234 (88.809)	lr 0.00085
Train [106][2810/3239]	Time 0.252 (0.635)	Data Time 0.002 (0.016)	Loss 2.1548 (2.1826)	Entropy 0.70678 (0.70990)	Top-1 acc 73.828 (72.106)	Top-5 acc 91.016 (88.809)	lr 0.00085
Train [106][2820/3239]	Time 0.295 (0.634)	Data Time 0.001 (0.016)	Loss 2.0738 (2.1824)	Entropy 0.70677 (0.70989)	Top-1 acc 75.781 (72.114)	Top-5 acc 90.234 (88.812)	lr 0.00085
Train [106][2830/3239]	Time 0.223 (0.634)	Data Time 0.001 (0.016)	Loss 2.3563 (2.1824)	Entropy 0.70683 (0.70988)	Top-1 acc 70.703 (72.117)	Top-5 acc 87.891 (88.813)	lr 0.00085
Train [106][2840/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.016)	Loss 2.1781 (2.1825)	Entropy 0.70678 (0.70987)	Top-1 acc 74.609 (72.112)	Top-5 acc 87.500 (88.813)	lr 0.00084
Train [106][2850/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.016)	Loss 2.2161 (2.1826)	Entropy 0.70676 (0.70986)	Top-1 acc 70.312 (72.111)	Top-5 acc 88.281 (88.811)	lr 0.00084
Train [106][2860/3239]	Time 0.221 (0.632)	Data Time 0.001 (0.015)	Loss 2.1092 (2.1826)	Entropy 0.70662 (0.70985)	Top-1 acc 75.000 (72.111)	Top-5 acc 89.453 (88.809)	lr 0.00084
Train [106][2870/3239]	Time 0.251 (0.632)	Data Time 0.002 (0.015)	Loss 2.2339 (2.1829)	Entropy 0.70666 (0.70984)	Top-1 acc 69.922 (72.104)	Top-5 acc 89.062 (88.804)	lr 0.00084
Train [106][2880/3239]	Time 0.286 (0.631)	Data Time 0.002 (0.015)	Loss 2.2322 (2.1828)	Entropy 0.70667 (0.70982)	Top-1 acc 70.703 (72.104)	Top-5 acc 88.281 (88.805)	lr 0.00084
Train [106][2890/3239]	Time 0.218 (0.631)	Data Time 0.001 (0.015)	Loss 2.1241 (2.1828)	Entropy 0.70666 (0.70981)	Top-1 acc 76.562 (72.108)	Top-5 acc 87.500 (88.807)	lr 0.00084
Train [106][2900/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.015)	Loss 2.0970 (2.1826)	Entropy 0.70671 (0.70980)	Top-1 acc 73.438 (72.112)	Top-5 acc 89.844 (88.811)	lr 0.00084
Train [106][2910/3239]	Time 0.225 (0.630)	Data Time 0.001 (0.015)	Loss 2.2520 (2.1826)	Entropy 0.70675 (0.70979)	Top-1 acc 70.703 (72.115)	Top-5 acc 87.500 (88.811)	lr 0.00084
Train [106][2920/3239]	Time 0.261 (0.629)	Data Time 0.001 (0.015)	Loss 2.1248 (2.1826)	Entropy 0.70676 (0.70978)	Top-1 acc 75.391 (72.115)	Top-5 acc 87.500 (88.809)	lr 0.00084
Train [106][2930/3239]	Time 0.245 (0.629)	Data Time 0.001 (0.015)	Loss 2.0560 (2.1825)	Entropy 0.70674 (0.70977)	Top-1 acc 75.000 (72.116)	Top-5 acc 90.625 (88.810)	lr 0.00084
Train [106][2940/3239]	Time 0.240 (0.628)	Data Time 0.001 (0.015)	Loss 2.0588 (2.1826)	Entropy 0.70655 (0.70976)	Top-1 acc 75.000 (72.114)	Top-5 acc 91.797 (88.809)	lr 0.00084
Train [106][2950/3239]	Time 0.320 (0.628)	Data Time 0.001 (0.015)	Loss 2.2439 (2.1827)	Entropy 0.70653 (0.70975)	Top-1 acc 67.188 (72.109)	Top-5 acc 87.109 (88.808)	lr 0.00084
Train [106][2960/3239]	Time 0.260 (0.627)	Data Time 0.001 (0.015)	Loss 2.0787 (2.1825)	Entropy 0.70653 (0.70974)	Top-1 acc 75.391 (72.110)	Top-5 acc 88.672 (88.809)	lr 0.00084
Train [106][2970/3239]	Time 0.243 (0.627)	Data Time 0.001 (0.015)	Loss 2.0946 (2.1825)	Entropy 0.70649 (0.70973)	Top-1 acc 73.438 (72.110)	Top-5 acc 90.234 (88.811)	lr 0.00084
Train [106][2980/3239]	Time 0.260 (0.626)	Data Time 0.001 (0.015)	Loss 2.2772 (2.1826)	Entropy 0.70643 (0.70972)	Top-1 acc 67.578 (72.103)	Top-5 acc 88.281 (88.808)	lr 0.00084
Train [106][2990/3239]	Time 0.315 (0.626)	Data Time 0.001 (0.015)	Loss 2.3989 (2.1827)	Entropy 0.70642 (0.70971)	Top-1 acc 65.234 (72.102)	Top-5 acc 83.203 (88.804)	lr 0.00084
Train [106][3000/3239]	Time 0.220 (0.625)	Data Time 0.001 (0.015)	Loss 2.1040 (2.1827)	Entropy 0.70648 (0.70970)	Top-1 acc 73.828 (72.103)	Top-5 acc 89.844 (88.804)	lr 0.00084
Train [106][3010/3239]	Time 0.220 (0.625)	Data Time 0.001 (0.015)	Loss 2.3345 (2.1829)	Entropy 0.70632 (0.70969)	Top-1 acc 67.578 (72.100)	Top-5 acc 85.547 (88.799)	lr 0.00084
Train [106][3020/3239]	Time 0.249 (0.624)	Data Time 0.001 (0.015)	Loss 2.0867 (2.1829)	Entropy 0.70626 (0.70967)	Top-1 acc 76.172 (72.102)	Top-5 acc 90.234 (88.801)	lr 0.00084
Train [106][3030/3239]	Time 0.309 (0.624)	Data Time 0.001 (0.015)	Loss 2.2423 (2.1829)	Entropy 0.70624 (0.70966)	Top-1 acc 69.141 (72.098)	Top-5 acc 87.891 (88.800)	lr 0.00084
Train [106][3040/3239]	Time 0.216 (0.623)	Data Time 0.001 (0.015)	Loss 1.9912 (2.1828)	Entropy 0.70624 (0.70965)	Top-1 acc 78.516 (72.102)	Top-5 acc 91.406 (88.800)	lr 0.00084
Train [106][3050/3239]	Time 0.216 (0.623)	Data Time 0.001 (0.015)	Loss 2.2365 (2.1827)	Entropy 0.70625 (0.70964)	Top-1 acc 71.875 (72.104)	Top-5 acc 88.281 (88.801)	lr 0.00084
Train [106][3060/3239]	Time 0.215 (0.622)	Data Time 0.001 (0.015)	Loss 2.2601 (2.1828)	Entropy 0.70619 (0.70963)	Top-1 acc 71.875 (72.102)	Top-5 acc 86.719 (88.799)	lr 0.00084
Train [106][3070/3239]	Time 0.456 (0.638)	Data Time 0.004 (0.015)	Loss 2.0029 (2.1829)	Entropy 0.70617 (0.70962)	Top-1 acc 77.344 (72.103)	Top-5 acc 92.188 (88.800)	lr 0.00084
Train [106][3080/3239]	Time 0.246 (0.638)	Data Time 0.002 (0.014)	Loss 2.2161 (2.1828)	Entropy 0.70629 (0.70961)	Top-1 acc 70.312 (72.100)	Top-5 acc 87.891 (88.800)	lr 0.00084
Train [106][3090/3239]	Time 0.218 (0.638)	Data Time 0.002 (0.014)	Loss 2.1003 (2.1827)	Entropy 0.70624 (0.70960)	Top-1 acc 74.219 (72.106)	Top-5 acc 90.625 (88.800)	lr 0.00084
Train [106][3100/3239]	Time 0.241 (0.637)	Data Time 0.002 (0.014)	Loss 2.1792 (2.1826)	Entropy 0.70624 (0.70959)	Top-1 acc 72.266 (72.106)	Top-5 acc 91.016 (88.804)	lr 0.00084
Train [106][3110/3239]	Time 0.301 (0.637)	Data Time 0.001 (0.014)	Loss 2.1775 (2.1827)	Entropy 0.70627 (0.70957)	Top-1 acc 74.609 (72.104)	Top-5 acc 87.500 (88.801)	lr 0.00083
Train [106][3120/3239]	Time 0.262 (0.636)	Data Time 0.001 (0.014)	Loss 2.1487 (2.1827)	Entropy 0.70626 (0.70956)	Top-1 acc 76.562 (72.105)	Top-5 acc 88.672 (88.799)	lr 0.00083
Train [106][3130/3239]	Time 0.264 (0.636)	Data Time 0.001 (0.014)	Loss 2.1516 (2.1827)	Entropy 0.70623 (0.70955)	Top-1 acc 71.484 (72.103)	Top-5 acc 87.891 (88.797)	lr 0.00083
Train [106][3140/3239]	Time 0.223 (0.635)	Data Time 0.001 (0.014)	Loss 2.2659 (2.1827)	Entropy 0.70616 (0.70954)	Top-1 acc 68.750 (72.103)	Top-5 acc 87.500 (88.799)	lr 0.00083
Train [106][3150/3239]	Time 0.242 (0.635)	Data Time 0.001 (0.014)	Loss 2.1994 (2.1827)	Entropy 0.70608 (0.70953)	Top-1 acc 70.703 (72.102)	Top-5 acc 87.891 (88.799)	lr 0.00083
Train [106][3160/3239]	Time 0.230 (0.634)	Data Time 0.001 (0.014)	Loss 2.1840 (2.1825)	Entropy 0.70606 (0.70952)	Top-1 acc 70.703 (72.106)	Top-5 acc 87.500 (88.801)	lr 0.00083
Train [106][3170/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.014)	Loss 2.1511 (2.1825)	Entropy 0.70602 (0.70951)	Top-1 acc 75.781 (72.106)	Top-5 acc 88.281 (88.800)	lr 0.00083
Train [106][3180/3239]	Time 0.235 (0.633)	Data Time 0.000 (0.014)	Loss 2.2255 (2.1825)	Entropy 0.70598 (0.70950)	Top-1 acc 72.656 (72.110)	Top-5 acc 87.500 (88.801)	lr 0.00083
Train [106][3190/3239]	Time 0.240 (0.633)	Data Time 0.000 (0.014)	Loss 2.1213 (2.1824)	Entropy 0.70623 (0.70949)	Top-1 acc 70.703 (72.110)	Top-5 acc 89.453 (88.799)	lr 0.00083
Train [106][3200/3239]	Time 0.227 (0.632)	Data Time 0.000 (0.014)	Loss 2.2266 (2.1824)	Entropy 0.70626 (0.70948)	Top-1 acc 69.922 (72.111)	Top-5 acc 85.938 (88.801)	lr 0.00083
Train [106][3210/3239]	Time 0.223 (0.632)	Data Time 0.000 (0.014)	Loss 2.3300 (2.1824)	Entropy 0.70621 (0.70947)	Top-1 acc 67.188 (72.109)	Top-5 acc 86.719 (88.801)	lr 0.00083
Train [106][3220/3239]	Time 0.222 (0.631)	Data Time 0.000 (0.014)	Loss 2.1602 (2.1824)	Entropy 0.70613 (0.70946)	Top-1 acc 70.703 (72.107)	Top-5 acc 90.234 (88.802)	lr 0.00083
Train [106][3230/3239]	Time 0.226 (0.631)	Data Time 0.000 (0.014)	Loss 2.2578 (2.1824)	Entropy 0.70612 (0.70945)	Top-1 acc 71.484 (72.107)	Top-5 acc 86.719 (88.802)	lr 0.00083
Train [106][3239/3239]	Time 2.482 (0.630)	Data Time 0.000 (0.014)	Loss 2.1881 (2.1823)	Entropy 0.70612 (0.70944)	Top-1 acc 70.370 (72.114)	Top-5 acc 91.358 (88.803)	lr 0.00083
==========Valid [106/120]	loss 1.210	top-1 acc 72.332 (72.484)	top-5 acc 89.745	Train top-1 72.114	top-5 88.803	Entropy 0.70612	Latency-None: 0.000ms	Flops: 546.53M
Train [107][0/3239]	Time 43.599 (43.599)	Data Time 38.732 (38.732)	Loss 2.1725 (2.1725)	Entropy 0.70601 (0.70601)	Top-1 acc 74.609 (74.609)	Top-5 acc 89.844 (89.844)	lr 0.00083
Train [107][10/3239]	Time 2.648 (4.580)	Data Time 0.002 (3.596)	Loss 2.0373 (2.1632)	Entropy 0.70601 (0.70601)	Top-1 acc 74.219 (72.408)	Top-5 acc 89.453 (88.601)	lr 0.00083
Train [107][20/3239]	Time 0.233 (2.523)	Data Time 0.002 (1.884)	Loss 2.0757 (2.1771)	Entropy 0.70590 (0.70596)	Top-1 acc 73.438 (72.228)	Top-5 acc 90.234 (88.783)	lr 0.00083
Train [107][30/3239]	Time 0.253 (1.872)	Data Time 0.001 (1.277)	Loss 2.1512 (2.1604)	Entropy 0.70591 (0.70594)	Top-1 acc 71.484 (72.732)	Top-5 acc 88.672 (89.138)	lr 0.00083
Train [107][40/3239]	Time 0.331 (1.541)	Data Time 0.001 (0.966)	Loss 2.1843 (2.1596)	Entropy 0.70579 (0.70591)	Top-1 acc 72.656 (72.837)	Top-5 acc 89.453 (89.139)	lr 0.00083
Train [107][50/3239]	Time 0.228 (1.331)	Data Time 0.001 (0.777)	Loss 2.1444 (2.1649)	Entropy 0.70573 (0.70588)	Top-1 acc 70.312 (72.587)	Top-5 acc 89.844 (88.963)	lr 0.00083
Train [107][60/3239]	Time 0.237 (1.197)	Data Time 0.003 (0.650)	Loss 2.0652 (2.1575)	Entropy 0.70570 (0.70585)	Top-1 acc 75.781 (72.733)	Top-5 acc 89.453 (89.146)	lr 0.00083
Train [107][70/3239]	Time 0.260 (1.098)	Data Time 0.001 (0.559)	Loss 2.0596 (2.1559)	Entropy 0.70569 (0.70583)	Top-1 acc 75.781 (72.843)	Top-5 acc 91.016 (89.244)	lr 0.00083
Train [107][80/3239]	Time 0.269 (1.023)	Data Time 0.001 (0.490)	Loss 2.3795 (2.1636)	Entropy 0.70565 (0.70581)	Top-1 acc 67.578 (72.618)	Top-5 acc 83.594 (89.106)	lr 0.00083
Train [107][90/3239]	Time 0.240 (0.963)	Data Time 0.001 (0.436)	Loss 2.1905 (2.1643)	Entropy 0.70562 (0.70579)	Top-1 acc 71.875 (72.691)	Top-5 acc 87.500 (89.105)	lr 0.00083
Train [107][100/3239]	Time 0.216 (0.915)	Data Time 0.001 (0.393)	Loss 2.5503 (2.1635)	Entropy 0.70564 (0.70577)	Top-1 acc 62.891 (72.645)	Top-5 acc 82.422 (89.128)	lr 0.00083
Train [107][110/3239]	Time 0.258 (0.875)	Data Time 0.001 (0.358)	Loss 2.0705 (2.1627)	Entropy 0.70551 (0.70576)	Top-1 acc 74.609 (72.744)	Top-5 acc 90.234 (89.101)	lr 0.00083
Train [107][120/3239]	Time 2.611 (0.843)	Data Time 0.001 (0.328)	Loss 2.0999 (2.1623)	Entropy 0.70551 (0.70574)	Top-1 acc 73.047 (72.785)	Top-5 acc 91.406 (89.130)	lr 0.00083
Train [107][130/3239]	Time 0.218 (0.797)	Data Time 0.001 (0.303)	Loss 2.2198 (2.1649)	Entropy 0.70550 (0.70572)	Top-1 acc 69.922 (72.710)	Top-5 acc 88.672 (89.048)	lr 0.00083
Train [107][140/3239]	Time 0.228 (0.774)	Data Time 0.001 (0.282)	Loss 2.2146 (2.1654)	Entropy 0.70545 (0.70570)	Top-1 acc 72.656 (72.703)	Top-5 acc 87.109 (89.051)	lr 0.00083
Train [107][150/3239]	Time 0.228 (0.754)	Data Time 0.001 (0.263)	Loss 2.1812 (2.1652)	Entropy 0.70539 (0.70568)	Top-1 acc 74.219 (72.685)	Top-5 acc 87.500 (89.070)	lr 0.00082
Train [107][160/3239]	Time 0.230 (0.737)	Data Time 0.001 (0.247)	Loss 2.2884 (2.1670)	Entropy 0.70524 (0.70566)	Top-1 acc 66.406 (72.598)	Top-5 acc 86.328 (89.038)	lr 0.00082
Train [107][170/3239]	Time 0.309 (0.722)	Data Time 0.002 (0.233)	Loss 2.1179 (2.1673)	Entropy 0.70522 (0.70563)	Top-1 acc 72.656 (72.574)	Top-5 acc 89.062 (89.033)	lr 0.00082
Train [107][180/3239]	Time 0.286 (1.016)	Data Time 0.003 (0.220)	Loss 2.2187 (2.1683)	Entropy 0.70520 (0.70561)	Top-1 acc 71.875 (72.589)	Top-5 acc 89.062 (89.013)	lr 0.00082
Train [107][190/3239]	Time 0.234 (0.989)	Data Time 0.002 (0.209)	Loss 2.2856 (2.1684)	Entropy 0.70521 (0.70559)	Top-1 acc 71.094 (72.581)	Top-5 acc 85.938 (89.022)	lr 0.00082
Train [107][200/3239]	Time 0.234 (0.963)	Data Time 0.001 (0.198)	Loss 2.1664 (2.1681)	Entropy 0.70518 (0.70557)	Top-1 acc 75.781 (72.608)	Top-5 acc 87.109 (89.016)	lr 0.00082
Train [107][210/3239]	Time 0.310 (0.941)	Data Time 0.001 (0.189)	Loss 2.2130 (2.1765)	Entropy 0.70514 (0.70555)	Top-1 acc 69.141 (72.445)	Top-5 acc 91.016 (88.951)	lr 0.00082
Train [107][220/3239]	Time 0.242 (0.921)	Data Time 0.001 (0.181)	Loss 2.1241 (2.1773)	Entropy 0.70516 (0.70553)	Top-1 acc 73.047 (72.377)	Top-5 acc 91.016 (88.962)	lr 0.00082
Train [107][230/3239]	Time 2.624 (0.902)	Data Time 0.001 (0.173)	Loss 2.2003 (2.1752)	Entropy 0.70516 (0.70552)	Top-1 acc 70.312 (72.428)	Top-5 acc 89.062 (88.991)	lr 0.00082
Train [107][240/3239]	Time 0.243 (0.875)	Data Time 0.002 (0.166)	Loss 2.0947 (2.1750)	Entropy 0.70517 (0.70550)	Top-1 acc 75.000 (72.459)	Top-5 acc 91.016 (89.020)	lr 0.00082
Train [107][250/3239]	Time 0.389 (0.861)	Data Time 0.002 (0.159)	Loss 2.3153 (2.1743)	Entropy 0.70516 (0.70549)	Top-1 acc 69.531 (72.440)	Top-5 acc 86.328 (89.048)	lr 0.00082
Train [107][260/3239]	Time 0.246 (0.847)	Data Time 0.001 (0.153)	Loss 2.2428 (2.1739)	Entropy 0.70518 (0.70548)	Top-1 acc 71.484 (72.489)	Top-5 acc 88.281 (89.052)	lr 0.00082
Train [107][270/3239]	Time 0.230 (0.834)	Data Time 0.001 (0.148)	Loss 2.1811 (2.1740)	Entropy 0.70515 (0.70546)	Top-1 acc 71.875 (72.473)	Top-5 acc 89.844 (89.074)	lr 0.00082
Train [107][280/3239]	Time 0.243 (0.821)	Data Time 0.001 (0.142)	Loss 2.1860 (2.1738)	Entropy 0.70513 (0.70545)	Top-1 acc 71.484 (72.480)	Top-5 acc 91.016 (89.067)	lr 0.00082
Train [107][290/3239]	Time 0.303 (0.810)	Data Time 0.002 (0.138)	Loss 2.2474 (2.1744)	Entropy 0.70528 (0.70544)	Top-1 acc 69.922 (72.467)	Top-5 acc 86.328 (89.044)	lr 0.00082
Train [107][300/3239]	Time 0.236 (0.799)	Data Time 0.001 (0.133)	Loss 2.3261 (2.1755)	Entropy 0.70527 (0.70544)	Top-1 acc 68.750 (72.440)	Top-5 acc 85.156 (89.009)	lr 0.00082
Train [107][310/3239]	Time 0.232 (0.790)	Data Time 0.001 (0.129)	Loss 2.0111 (2.1750)	Entropy 0.70530 (0.70543)	Top-1 acc 76.562 (72.446)	Top-5 acc 90.234 (88.996)	lr 0.00082
Train [107][320/3239]	Time 0.238 (0.780)	Data Time 0.001 (0.125)	Loss 2.1275 (2.1743)	Entropy 0.70527 (0.70543)	Top-1 acc 72.266 (72.437)	Top-5 acc 91.016 (89.016)	lr 0.00082
Train [107][330/3239]	Time 0.260 (0.772)	Data Time 0.001 (0.121)	Loss 2.1546 (2.1729)	Entropy 0.70524 (0.70542)	Top-1 acc 74.609 (72.493)	Top-5 acc 88.672 (89.027)	lr 0.00082
Train [107][340/3239]	Time 2.562 (0.763)	Data Time 0.001 (0.118)	Loss 2.2430 (2.1744)	Entropy 0.70524 (0.70542)	Top-1 acc 70.703 (72.460)	Top-5 acc 87.109 (89.002)	lr 0.00082
Train [107][350/3239]	Time 0.232 (0.748)	Data Time 0.001 (0.114)	Loss 2.1633 (2.1741)	Entropy 0.70524 (0.70541)	Top-1 acc 71.094 (72.464)	Top-5 acc 85.938 (89.009)	lr 0.00082
Train [107][360/3239]	Time 0.230 (0.742)	Data Time 0.001 (0.111)	Loss 2.1555 (2.1728)	Entropy 0.70520 (0.70541)	Top-1 acc 71.094 (72.484)	Top-5 acc 87.891 (89.041)	lr 0.00082
Train [107][370/3239]	Time 0.232 (0.734)	Data Time 0.001 (0.108)	Loss 2.0034 (2.1709)	Entropy 0.70524 (0.70540)	Top-1 acc 75.781 (72.497)	Top-5 acc 92.969 (89.098)	lr 0.00082
Train [107][380/3239]	Time 0.252 (0.728)	Data Time 0.001 (0.106)	Loss 1.9783 (2.1705)	Entropy 0.70525 (0.70540)	Top-1 acc 77.344 (72.513)	Top-5 acc 95.312 (89.132)	lr 0.00082
Train [107][390/3239]	Time 0.217 (0.722)	Data Time 0.001 (0.103)	Loss 2.0976 (2.1699)	Entropy 0.70519 (0.70539)	Top-1 acc 75.781 (72.520)	Top-5 acc 90.234 (89.140)	lr 0.00082
Train [107][400/3239]	Time 0.245 (0.716)	Data Time 0.001 (0.100)	Loss 2.1704 (2.1702)	Entropy 0.70519 (0.70539)	Top-1 acc 74.219 (72.523)	Top-5 acc 89.062 (89.120)	lr 0.00082
Train [107][410/3239]	Time 0.255 (0.710)	Data Time 0.002 (0.098)	Loss 2.1634 (2.1700)	Entropy 0.70511 (0.70538)	Top-1 acc 71.094 (72.515)	Top-5 acc 88.281 (89.129)	lr 0.00082
Train [107][420/3239]	Time 0.219 (0.706)	Data Time 0.001 (0.096)	Loss 2.1516 (2.1690)	Entropy 0.70515 (0.70538)	Top-1 acc 72.656 (72.550)	Top-5 acc 88.672 (89.130)	lr 0.00082
Train [107][430/3239]	Time 0.223 (0.701)	Data Time 0.001 (0.094)	Loss 2.1143 (2.1690)	Entropy 0.70513 (0.70537)	Top-1 acc 71.875 (72.533)	Top-5 acc 90.625 (89.145)	lr 0.00081
Train [107][440/3239]	Time 0.267 (0.696)	Data Time 0.001 (0.091)	Loss 2.0494 (2.1695)	Entropy 0.70500 (0.70537)	Top-1 acc 77.734 (72.518)	Top-5 acc 91.016 (89.123)	lr 0.00081
Train [107][450/3239]	Time 2.603 (0.691)	Data Time 0.001 (0.089)	Loss 2.0190 (2.1691)	Entropy 0.70500 (0.70536)	Top-1 acc 74.609 (72.533)	Top-5 acc 91.016 (89.139)	lr 0.00081
Train [107][460/3239]	Time 0.324 (0.682)	Data Time 0.001 (0.088)	Loss 2.1281 (2.1685)	Entropy 0.70504 (0.70535)	Top-1 acc 72.656 (72.553)	Top-5 acc 89.453 (89.149)	lr 0.00081
Train [107][470/3239]	Time 0.235 (0.678)	Data Time 0.001 (0.086)	Loss 2.2505 (2.1679)	Entropy 0.70502 (0.70534)	Top-1 acc 68.359 (72.548)	Top-5 acc 89.844 (89.151)	lr 0.00081
Train [107][480/3239]	Time 0.234 (0.673)	Data Time 0.001 (0.084)	Loss 2.2628 (2.1681)	Entropy 0.70503 (0.70534)	Top-1 acc 72.656 (72.533)	Top-5 acc 86.719 (89.149)	lr 0.00081
Train [107][490/3239]	Time 0.224 (0.670)	Data Time 0.001 (0.082)	Loss 2.2063 (2.1689)	Entropy 0.70498 (0.70533)	Top-1 acc 71.094 (72.499)	Top-5 acc 87.500 (89.134)	lr 0.00081
Train [107][500/3239]	Time 0.322 (0.666)	Data Time 0.001 (0.081)	Loss 2.1880 (2.1701)	Entropy 0.70500 (0.70532)	Top-1 acc 71.094 (72.480)	Top-5 acc 88.281 (89.105)	lr 0.00081
Train [107][510/3239]	Time 0.235 (0.662)	Data Time 0.001 (0.079)	Loss 2.2489 (2.1708)	Entropy 0.70485 (0.70532)	Top-1 acc 71.875 (72.483)	Top-5 acc 87.109 (89.092)	lr 0.00081
Train [107][520/3239]	Time 0.228 (0.659)	Data Time 0.001 (0.078)	Loss 2.2015 (2.1698)	Entropy 0.70486 (0.70531)	Top-1 acc 70.703 (72.509)	Top-5 acc 88.281 (89.110)	lr 0.00081
Train [107][530/3239]	Time 0.231 (0.655)	Data Time 0.001 (0.076)	Loss 2.0195 (2.1694)	Entropy 0.70481 (0.70530)	Top-1 acc 76.172 (72.518)	Top-5 acc 92.969 (89.118)	lr 0.00081
Train [107][540/3239]	Time 0.435 (0.748)	Data Time 0.003 (0.075)	Loss 2.3084 (2.1693)	Entropy 0.70475 (0.70529)	Top-1 acc 70.703 (72.515)	Top-5 acc 86.328 (89.114)	lr 0.00081
Train [107][550/3239]	Time 0.247 (0.746)	Data Time 0.002 (0.074)	Loss 2.1192 (2.1704)	Entropy 0.70469 (0.70528)	Top-1 acc 73.828 (72.488)	Top-5 acc 88.672 (89.084)	lr 0.00081
Train [107][560/3239]	Time 2.736 (0.741)	Data Time 0.002 (0.072)	Loss 2.1830 (2.1716)	Entropy 0.70469 (0.70527)	Top-1 acc 70.312 (72.452)	Top-5 acc 89.844 (89.065)	lr 0.00081
Train [107][570/3239]	Time 0.230 (0.733)	Data Time 0.001 (0.071)	Loss 2.3046 (2.1721)	Entropy 0.70463 (0.70526)	Top-1 acc 70.312 (72.467)	Top-5 acc 85.938 (89.047)	lr 0.00081
Train [107][580/3239]	Time 0.237 (0.728)	Data Time 0.001 (0.070)	Loss 2.1097 (2.1723)	Entropy 0.70464 (0.70525)	Top-1 acc 72.266 (72.453)	Top-5 acc 90.625 (89.045)	lr 0.00081
Train [107][590/3239]	Time 0.242 (0.725)	Data Time 0.001 (0.069)	Loss 2.0990 (2.1720)	Entropy 0.70436 (0.70523)	Top-1 acc 71.875 (72.439)	Top-5 acc 91.016 (89.055)	lr 0.00081
Train [107][600/3239]	Time 0.252 (0.721)	Data Time 0.002 (0.068)	Loss 2.1828 (2.1723)	Entropy 0.70423 (0.70522)	Top-1 acc 73.047 (72.419)	Top-5 acc 90.625 (89.055)	lr 0.00081
Train [107][610/3239]	Time 0.235 (0.717)	Data Time 0.001 (0.066)	Loss 2.1539 (2.1725)	Entropy 0.70419 (0.70520)	Top-1 acc 74.219 (72.420)	Top-5 acc 90.234 (89.052)	lr 0.00081
Train [107][620/3239]	Time 0.235 (0.713)	Data Time 0.001 (0.065)	Loss 2.1361 (2.1728)	Entropy 0.70416 (0.70518)	Top-1 acc 73.438 (72.432)	Top-5 acc 90.234 (89.044)	lr 0.00081
Train [107][630/3239]	Time 0.233 (0.710)	Data Time 0.001 (0.064)	Loss 2.2288 (2.1719)	Entropy 0.70416 (0.70517)	Top-1 acc 69.922 (72.443)	Top-5 acc 89.062 (89.055)	lr 0.00081
Train [107][640/3239]	Time 0.225 (0.706)	Data Time 0.001 (0.063)	Loss 2.1531 (2.1720)	Entropy 0.70416 (0.70515)	Top-1 acc 73.438 (72.441)	Top-5 acc 89.844 (89.055)	lr 0.00081
Train [107][650/3239]	Time 0.241 (0.702)	Data Time 0.001 (0.062)	Loss 2.0550 (2.1722)	Entropy 0.70410 (0.70514)	Top-1 acc 75.781 (72.425)	Top-5 acc 90.625 (89.053)	lr 0.00081
Train [107][660/3239]	Time 0.256 (0.699)	Data Time 0.001 (0.062)	Loss 2.1612 (2.1723)	Entropy 0.70406 (0.70512)	Top-1 acc 74.219 (72.422)	Top-5 acc 89.453 (89.058)	lr 0.00081
Train [107][670/3239]	Time 2.786 (0.696)	Data Time 0.001 (0.061)	Loss 2.1405 (2.1725)	Entropy 0.70406 (0.70510)	Top-1 acc 75.391 (72.412)	Top-5 acc 90.625 (89.054)	lr 0.00081
Train [107][680/3239]	Time 0.264 (0.690)	Data Time 0.001 (0.060)	Loss 2.0616 (2.1729)	Entropy 0.70406 (0.70509)	Top-1 acc 77.344 (72.409)	Top-5 acc 90.625 (89.038)	lr 0.00081
Train [107][690/3239]	Time 0.242 (0.687)	Data Time 0.001 (0.059)	Loss 2.0770 (2.1723)	Entropy 0.70383 (0.70507)	Top-1 acc 74.609 (72.418)	Top-5 acc 90.625 (89.048)	lr 0.00081
Train [107][700/3239]	Time 0.230 (0.684)	Data Time 0.001 (0.058)	Loss 2.0812 (2.1720)	Entropy 0.70383 (0.70505)	Top-1 acc 75.000 (72.427)	Top-5 acc 91.797 (89.051)	lr 0.00081
Train [107][710/3239]	Time 0.328 (0.681)	Data Time 0.001 (0.057)	Loss 2.2846 (2.1718)	Entropy 0.70379 (0.70504)	Top-1 acc 71.484 (72.436)	Top-5 acc 88.672 (89.058)	lr 0.00080
Train [107][720/3239]	Time 0.230 (0.678)	Data Time 0.001 (0.057)	Loss 2.1762 (2.1713)	Entropy 0.70373 (0.70502)	Top-1 acc 74.609 (72.453)	Top-5 acc 89.844 (89.068)	lr 0.00080
Train [107][730/3239]	Time 0.231 (0.675)	Data Time 0.001 (0.056)	Loss 2.1820 (2.1711)	Entropy 0.70365 (0.70500)	Top-1 acc 71.875 (72.449)	Top-5 acc 88.281 (89.078)	lr 0.00080
Train [107][740/3239]	Time 0.232 (0.673)	Data Time 0.001 (0.055)	Loss 2.2672 (2.1711)	Entropy 0.70367 (0.70498)	Top-1 acc 73.438 (72.453)	Top-5 acc 84.766 (89.076)	lr 0.00080
Train [107][750/3239]	Time 0.320 (0.670)	Data Time 0.001 (0.054)	Loss 2.4288 (2.1714)	Entropy 0.70361 (0.70496)	Top-1 acc 67.188 (72.441)	Top-5 acc 85.156 (89.076)	lr 0.00080
Train [107][760/3239]	Time 0.231 (0.668)	Data Time 0.001 (0.054)	Loss 2.1982 (2.1711)	Entropy 0.70357 (0.70495)	Top-1 acc 71.094 (72.446)	Top-5 acc 88.672 (89.084)	lr 0.00080
Train [107][770/3239]	Time 0.228 (0.666)	Data Time 0.001 (0.053)	Loss 2.1822 (2.1718)	Entropy 0.70355 (0.70493)	Top-1 acc 74.219 (72.428)	Top-5 acc 87.500 (89.065)	lr 0.00080
Train [107][780/3239]	Time 2.567 (0.663)	Data Time 0.001 (0.052)	Loss 2.1514 (2.1716)	Entropy 0.70355 (0.70491)	Top-1 acc 74.219 (72.433)	Top-5 acc 89.453 (89.063)	lr 0.00080
Train [107][790/3239]	Time 0.276 (0.658)	Data Time 0.001 (0.052)	Loss 2.1006 (2.1716)	Entropy 0.70350 (0.70489)	Top-1 acc 73.438 (72.435)	Top-5 acc 91.797 (89.061)	lr 0.00080
Train [107][800/3239]	Time 0.227 (0.656)	Data Time 0.001 (0.051)	Loss 2.1015 (2.1721)	Entropy 0.70356 (0.70488)	Top-1 acc 73.828 (72.412)	Top-5 acc 91.016 (89.054)	lr 0.00080
Train [107][810/3239]	Time 0.236 (0.654)	Data Time 0.001 (0.050)	Loss 2.2082 (2.1725)	Entropy 0.70345 (0.70486)	Top-1 acc 71.484 (72.400)	Top-5 acc 89.844 (89.043)	lr 0.00080
Train [107][820/3239]	Time 0.241 (0.652)	Data Time 0.002 (0.050)	Loss 2.2878 (2.1725)	Entropy 0.70345 (0.70484)	Top-1 acc 69.922 (72.397)	Top-5 acc 86.328 (89.044)	lr 0.00080
Train [107][830/3239]	Time 0.250 (0.650)	Data Time 0.001 (0.049)	Loss 2.1405 (2.1722)	Entropy 0.70339 (0.70482)	Top-1 acc 73.047 (72.413)	Top-5 acc 90.234 (89.049)	lr 0.00080
Train [107][840/3239]	Time 0.222 (0.648)	Data Time 0.001 (0.049)	Loss 2.1252 (2.1721)	Entropy 0.70332 (0.70481)	Top-1 acc 71.484 (72.409)	Top-5 acc 89.844 (89.054)	lr 0.00080
Train [107][850/3239]	Time 0.228 (0.646)	Data Time 0.001 (0.048)	Loss 2.2389 (2.1726)	Entropy 0.70330 (0.70479)	Top-1 acc 74.609 (72.413)	Top-5 acc 87.109 (89.040)	lr 0.00080
Train [107][860/3239]	Time 0.239 (0.644)	Data Time 0.002 (0.048)	Loss 2.1222 (2.1721)	Entropy 0.70328 (0.70477)	Top-1 acc 73.438 (72.422)	Top-5 acc 92.188 (89.054)	lr 0.00080
Train [107][870/3239]	Time 0.230 (0.642)	Data Time 0.001 (0.047)	Loss 2.1502 (2.1724)	Entropy 0.70327 (0.70476)	Top-1 acc 72.266 (72.416)	Top-5 acc 90.625 (89.050)	lr 0.00080
Train [107][880/3239]	Time 0.243 (0.641)	Data Time 0.001 (0.047)	Loss 2.2127 (2.1727)	Entropy 0.70322 (0.70474)	Top-1 acc 72.266 (72.404)	Top-5 acc 88.281 (89.045)	lr 0.00080
Train [107][890/3239]	Time 2.569 (0.639)	Data Time 0.002 (0.046)	Loss 2.1538 (2.1726)	Entropy 0.70322 (0.70472)	Top-1 acc 73.828 (72.411)	Top-5 acc 88.281 (89.044)	lr 0.00080
Train [107][900/3239]	Time 0.242 (0.634)	Data Time 0.001 (0.046)	Loss 2.1531 (2.1731)	Entropy 0.70314 (0.70470)	Top-1 acc 73.828 (72.403)	Top-5 acc 91.016 (89.037)	lr 0.00080
Train [107][910/3239]	Time 0.254 (0.695)	Data Time 0.002 (0.045)	Loss 2.3822 (2.1735)	Entropy 0.70314 (0.70469)	Top-1 acc 66.016 (72.384)	Top-5 acc 83.594 (89.032)	lr 0.00080
Train [107][920/3239]	Time 0.229 (0.693)	Data Time 0.002 (0.045)	Loss 2.2256 (2.1743)	Entropy 0.70313 (0.70467)	Top-1 acc 69.141 (72.361)	Top-5 acc 89.062 (89.010)	lr 0.00080
Train [107][930/3239]	Time 0.259 (0.691)	Data Time 0.005 (0.044)	Loss 2.3039 (2.1747)	Entropy 0.70310 (0.70465)	Top-1 acc 68.359 (72.350)	Top-5 acc 85.938 (89.003)	lr 0.00080
Train [107][940/3239]	Time 0.231 (0.689)	Data Time 0.001 (0.044)	Loss 2.2155 (2.1746)	Entropy 0.70316 (0.70464)	Top-1 acc 71.094 (72.347)	Top-5 acc 89.453 (89.006)	lr 0.00080
Train [107][950/3239]	Time 0.235 (0.687)	Data Time 0.001 (0.043)	Loss 2.3829 (2.1744)	Entropy 0.70318 (0.70462)	Top-1 acc 71.094 (72.357)	Top-5 acc 84.766 (89.011)	lr 0.00080
Train [107][960/3239]	Time 0.235 (0.685)	Data Time 0.001 (0.043)	Loss 2.0665 (2.1745)	Entropy 0.70324 (0.70461)	Top-1 acc 75.391 (72.352)	Top-5 acc 91.797 (89.015)	lr 0.00080
Train [107][970/3239]	Time 0.221 (0.682)	Data Time 0.001 (0.042)	Loss 2.0538 (2.1739)	Entropy 0.70324 (0.70459)	Top-1 acc 75.000 (72.359)	Top-5 acc 91.016 (89.026)	lr 0.00080
Train [107][980/3239]	Time 0.248 (0.681)	Data Time 0.001 (0.042)	Loss 2.0853 (2.1737)	Entropy 0.70312 (0.70458)	Top-1 acc 73.047 (72.360)	Top-5 acc 90.234 (89.028)	lr 0.00080
Train [107][990/3239]	Time 0.235 (0.679)	Data Time 0.001 (0.042)	Loss 2.0451 (2.1736)	Entropy 0.70313 (0.70456)	Top-1 acc 75.391 (72.359)	Top-5 acc 92.188 (89.035)	lr 0.00079
Train [107][1000/3239]	Time 2.675 (0.677)	Data Time 0.002 (0.041)	Loss 2.0584 (2.1732)	Entropy 0.70313 (0.70455)	Top-1 acc 76.172 (72.369)	Top-5 acc 90.234 (89.041)	lr 0.00079
Train [107][1010/3239]	Time 0.261 (0.673)	Data Time 0.001 (0.041)	Loss 2.2941 (2.1731)	Entropy 0.70317 (0.70454)	Top-1 acc 68.750 (72.365)	Top-5 acc 87.500 (89.040)	lr 0.00079
Train [107][1020/3239]	Time 0.240 (0.671)	Data Time 0.001 (0.040)	Loss 2.2025 (2.1732)	Entropy 0.70313 (0.70452)	Top-1 acc 73.828 (72.354)	Top-5 acc 87.109 (89.043)	lr 0.00079
Train [107][1030/3239]	Time 0.233 (0.669)	Data Time 0.001 (0.040)	Loss 2.2454 (2.1734)	Entropy 0.70301 (0.70451)	Top-1 acc 71.484 (72.338)	Top-5 acc 88.281 (89.039)	lr 0.00079
Train [107][1040/3239]	Time 0.234 (0.667)	Data Time 0.001 (0.040)	Loss 2.1960 (2.1735)	Entropy 0.70301 (0.70449)	Top-1 acc 71.484 (72.332)	Top-5 acc 89.062 (89.046)	lr 0.00079
Train [107][1050/3239]	Time 0.262 (0.666)	Data Time 0.001 (0.039)	Loss 2.3177 (2.1734)	Entropy 0.70294 (0.70448)	Top-1 acc 67.578 (72.343)	Top-5 acc 86.719 (89.041)	lr 0.00079
Train [107][1060/3239]	Time 0.240 (0.664)	Data Time 0.001 (0.039)	Loss 2.1450 (2.1733)	Entropy 0.70286 (0.70446)	Top-1 acc 71.484 (72.341)	Top-5 acc 90.625 (89.044)	lr 0.00079
Train [107][1070/3239]	Time 0.233 (0.662)	Data Time 0.001 (0.039)	Loss 2.0039 (2.1730)	Entropy 0.70388 (0.70445)	Top-1 acc 76.562 (72.345)	Top-5 acc 90.234 (89.044)	lr 0.00079
Train [107][1080/3239]	Time 0.235 (0.661)	Data Time 0.001 (0.038)	Loss 2.3564 (2.1729)	Entropy 0.70374 (0.70445)	Top-1 acc 69.141 (72.349)	Top-5 acc 86.328 (89.045)	lr 0.00079
Train [107][1090/3239]	Time 0.219 (0.659)	Data Time 0.001 (0.038)	Loss 2.1839 (2.1733)	Entropy 0.70364 (0.70444)	Top-1 acc 71.875 (72.344)	Top-5 acc 90.234 (89.040)	lr 0.00079
Train [107][1100/3239]	Time 0.220 (0.658)	Data Time 0.001 (0.038)	Loss 1.9991 (2.1734)	Entropy 0.70362 (0.70443)	Top-1 acc 78.906 (72.345)	Top-5 acc 92.188 (89.040)	lr 0.00079
Train [107][1110/3239]	Time 2.790 (0.656)	Data Time 0.001 (0.037)	Loss 2.2131 (2.1735)	Entropy 0.70362 (0.70443)	Top-1 acc 74.219 (72.351)	Top-5 acc 89.062 (89.038)	lr 0.00079
Train [107][1120/3239]	Time 0.246 (0.652)	Data Time 0.001 (0.037)	Loss 2.1001 (2.1739)	Entropy 0.70365 (0.70442)	Top-1 acc 75.391 (72.344)	Top-5 acc 89.453 (89.032)	lr 0.00079
Train [107][1130/3239]	Time 0.337 (0.651)	Data Time 0.001 (0.037)	Loss 2.1674 (2.1740)	Entropy 0.70367 (0.70441)	Top-1 acc 70.312 (72.337)	Top-5 acc 89.453 (89.035)	lr 0.00079
Train [107][1140/3239]	Time 0.244 (0.649)	Data Time 0.001 (0.036)	Loss 2.0452 (2.1736)	Entropy 0.70368 (0.70441)	Top-1 acc 76.172 (72.347)	Top-5 acc 92.578 (89.047)	lr 0.00079
Train [107][1150/3239]	Time 0.244 (0.648)	Data Time 0.002 (0.036)	Loss 2.0841 (2.1738)	Entropy 0.70361 (0.70440)	Top-1 acc 71.875 (72.339)	Top-5 acc 90.625 (89.038)	lr 0.00079
Train [107][1160/3239]	Time 0.238 (0.647)	Data Time 0.001 (0.036)	Loss 2.2401 (2.1745)	Entropy 0.70354 (0.70439)	Top-1 acc 71.094 (72.315)	Top-5 acc 86.719 (89.029)	lr 0.00079
Train [107][1170/3239]	Time 0.326 (0.645)	Data Time 0.001 (0.035)	Loss 2.3058 (2.1743)	Entropy 0.70347 (0.70438)	Top-1 acc 71.875 (72.327)	Top-5 acc 87.500 (89.027)	lr 0.00079
Train [107][1180/3239]	Time 0.235 (0.644)	Data Time 0.001 (0.035)	Loss 2.2540 (2.1751)	Entropy 0.70345 (0.70438)	Top-1 acc 72.656 (72.310)	Top-5 acc 86.719 (89.016)	lr 0.00079
Train [107][1190/3239]	Time 0.240 (0.642)	Data Time 0.001 (0.035)	Loss 2.1574 (2.1752)	Entropy 0.70357 (0.70437)	Top-1 acc 73.828 (72.307)	Top-5 acc 89.062 (89.014)	lr 0.00079
Train [107][1200/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.035)	Loss 2.2278 (2.1753)	Entropy 0.70355 (0.70436)	Top-1 acc 71.875 (72.302)	Top-5 acc 87.500 (89.016)	lr 0.00079
Train [107][1210/3239]	Time 0.349 (0.640)	Data Time 0.002 (0.034)	Loss 2.1248 (2.1757)	Entropy 0.70354 (0.70436)	Top-1 acc 73.047 (72.287)	Top-5 acc 92.578 (89.008)	lr 0.00079
Train [107][1220/3239]	Time 2.522 (0.638)	Data Time 0.001 (0.034)	Loss 2.1916 (2.1756)	Entropy 0.70354 (0.70435)	Top-1 acc 72.266 (72.283)	Top-5 acc 88.672 (89.011)	lr 0.00079
Train [107][1230/3239]	Time 0.258 (0.635)	Data Time 0.001 (0.034)	Loss 2.2128 (2.1756)	Entropy 0.70347 (0.70434)	Top-1 acc 71.484 (72.287)	Top-5 acc 86.328 (89.009)	lr 0.00079
Train [107][1240/3239]	Time 0.259 (0.634)	Data Time 0.002 (0.034)	Loss 2.1363 (2.1754)	Entropy 0.70342 (0.70433)	Top-1 acc 71.484 (72.292)	Top-5 acc 88.281 (89.016)	lr 0.00079
Train [107][1250/3239]	Time 0.333 (0.633)	Data Time 0.001 (0.033)	Loss 2.2134 (2.1754)	Entropy 0.70346 (0.70433)	Top-1 acc 73.438 (72.296)	Top-5 acc 88.281 (89.013)	lr 0.00079
Train [107][1260/3239]	Time 0.230 (0.632)	Data Time 0.002 (0.033)	Loss 2.1815 (2.1754)	Entropy 0.70357 (0.70432)	Top-1 acc 71.094 (72.291)	Top-5 acc 89.453 (89.012)	lr 0.00079
Train [107][1270/3239]	Time 0.242 (0.675)	Data Time 0.002 (0.033)	Loss 2.1718 (2.1751)	Entropy 0.70338 (0.70431)	Top-1 acc 71.484 (72.299)	Top-5 acc 89.844 (89.012)	lr 0.00078
Train [107][1280/3239]	Time 0.252 (0.674)	Data Time 0.003 (0.033)	Loss 2.3819 (2.1754)	Entropy 0.70338 (0.70431)	Top-1 acc 66.797 (72.292)	Top-5 acc 83.594 (89.008)	lr 0.00078
Train [107][1290/3239]	Time 0.341 (0.672)	Data Time 0.001 (0.032)	Loss 2.1266 (2.1758)	Entropy 0.70344 (0.70430)	Top-1 acc 76.172 (72.279)	Top-5 acc 90.234 (89.001)	lr 0.00078
Train [107][1300/3239]	Time 0.220 (0.671)	Data Time 0.001 (0.032)	Loss 2.1310 (2.1754)	Entropy 0.70342 (0.70429)	Top-1 acc 74.219 (72.293)	Top-5 acc 89.062 (89.006)	lr 0.00078
Train [107][1310/3239]	Time 0.234 (0.669)	Data Time 0.001 (0.032)	Loss 2.2962 (2.1754)	Entropy 0.70336 (0.70429)	Top-1 acc 67.188 (72.292)	Top-5 acc 87.500 (89.006)	lr 0.00078
Train [107][1320/3239]	Time 0.222 (0.668)	Data Time 0.001 (0.032)	Loss 2.1870 (2.1758)	Entropy 0.70338 (0.70428)	Top-1 acc 72.266 (72.290)	Top-5 acc 91.016 (88.993)	lr 0.00078
Train [107][1330/3239]	Time 2.506 (0.666)	Data Time 0.001 (0.031)	Loss 2.1862 (2.1758)	Entropy 0.70338 (0.70427)	Top-1 acc 69.531 (72.293)	Top-5 acc 87.891 (88.990)	lr 0.00078
Train [107][1340/3239]	Time 0.242 (0.663)	Data Time 0.002 (0.031)	Loss 2.2907 (2.1758)	Entropy 0.70332 (0.70427)	Top-1 acc 71.484 (72.301)	Top-5 acc 84.766 (88.986)	lr 0.00078
Train [107][1350/3239]	Time 0.238 (0.662)	Data Time 0.001 (0.031)	Loss 2.1346 (2.1753)	Entropy 0.70329 (0.70426)	Top-1 acc 73.438 (72.322)	Top-5 acc 90.625 (88.998)	lr 0.00078
Train [107][1360/3239]	Time 0.245 (0.661)	Data Time 0.001 (0.031)	Loss 2.2533 (2.1752)	Entropy 0.70331 (0.70425)	Top-1 acc 69.531 (72.328)	Top-5 acc 89.062 (88.999)	lr 0.00078
Train [107][1370/3239]	Time 0.235 (0.660)	Data Time 0.001 (0.031)	Loss 2.0033 (2.1752)	Entropy 0.70331 (0.70424)	Top-1 acc 76.172 (72.324)	Top-5 acc 92.188 (88.999)	lr 0.00078
Train [107][1380/3239]	Time 0.232 (0.658)	Data Time 0.001 (0.030)	Loss 2.0416 (2.1754)	Entropy 0.70335 (0.70424)	Top-1 acc 73.438 (72.319)	Top-5 acc 92.578 (88.997)	lr 0.00078
Train [107][1390/3239]	Time 0.221 (0.657)	Data Time 0.001 (0.030)	Loss 2.3932 (2.1758)	Entropy 0.70331 (0.70423)	Top-1 acc 67.969 (72.315)	Top-5 acc 84.766 (88.996)	lr 0.00078
Train [107][1400/3239]	Time 0.232 (0.656)	Data Time 0.001 (0.030)	Loss 2.0859 (2.1762)	Entropy 0.70326 (0.70422)	Top-1 acc 76.172 (72.305)	Top-5 acc 90.625 (88.989)	lr 0.00078
Train [107][1410/3239]	Time 0.233 (0.654)	Data Time 0.001 (0.030)	Loss 2.2042 (2.1763)	Entropy 0.70316 (0.70422)	Top-1 acc 71.094 (72.299)	Top-5 acc 87.109 (88.985)	lr 0.00078
Train [107][1420/3239]	Time 0.241 (0.653)	Data Time 0.001 (0.030)	Loss 2.1026 (2.1760)	Entropy 0.70319 (0.70421)	Top-1 acc 73.047 (72.304)	Top-5 acc 89.844 (88.990)	lr 0.00078
Train [107][1430/3239]	Time 0.244 (0.652)	Data Time 0.001 (0.029)	Loss 2.0743 (2.1759)	Entropy 0.70312 (0.70420)	Top-1 acc 72.656 (72.306)	Top-5 acc 89.844 (88.987)	lr 0.00078
Train [107][1440/3239]	Time 2.604 (0.651)	Data Time 0.001 (0.029)	Loss 2.1058 (2.1758)	Entropy 0.70312 (0.70420)	Top-1 acc 74.219 (72.305)	Top-5 acc 89.062 (88.986)	lr 0.00078
Train [107][1450/3239]	Time 0.261 (0.648)	Data Time 0.003 (0.029)	Loss 2.2782 (2.1758)	Entropy 0.70307 (0.70419)	Top-1 acc 68.359 (72.300)	Top-5 acc 87.891 (88.982)	lr 0.00078
Train [107][1460/3239]	Time 0.317 (0.647)	Data Time 0.001 (0.029)	Loss 2.2553 (2.1760)	Entropy 0.70309 (0.70418)	Top-1 acc 71.094 (72.303)	Top-5 acc 87.891 (88.978)	lr 0.00078
Train [107][1470/3239]	Time 0.241 (0.646)	Data Time 0.001 (0.029)	Loss 2.1639 (2.1759)	Entropy 0.70311 (0.70417)	Top-1 acc 76.562 (72.307)	Top-5 acc 89.844 (88.981)	lr 0.00078
Train [107][1480/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.028)	Loss 2.2537 (2.1759)	Entropy 0.70309 (0.70417)	Top-1 acc 70.312 (72.304)	Top-5 acc 87.500 (88.977)	lr 0.00078
Train [107][1490/3239]	Time 0.222 (0.644)	Data Time 0.001 (0.028)	Loss 2.2487 (2.1760)	Entropy 0.70307 (0.70416)	Top-1 acc 69.922 (72.302)	Top-5 acc 88.672 (88.973)	lr 0.00078
Train [107][1500/3239]	Time 0.315 (0.642)	Data Time 0.001 (0.028)	Loss 2.0894 (2.1758)	Entropy 0.70296 (0.70415)	Top-1 acc 73.438 (72.310)	Top-5 acc 91.797 (88.975)	lr 0.00078
Train [107][1510/3239]	Time 0.229 (0.641)	Data Time 0.001 (0.028)	Loss 2.3234 (2.1758)	Entropy 0.70288 (0.70414)	Top-1 acc 65.625 (72.311)	Top-5 acc 87.500 (88.974)	lr 0.00078
Train [107][1520/3239]	Time 0.234 (0.640)	Data Time 0.003 (0.028)	Loss 2.1656 (2.1762)	Entropy 0.70274 (0.70413)	Top-1 acc 71.094 (72.300)	Top-5 acc 87.891 (88.966)	lr 0.00078
Train [107][1530/3239]	Time 0.221 (0.639)	Data Time 0.001 (0.027)	Loss 2.2486 (2.1766)	Entropy 0.70258 (0.70412)	Top-1 acc 73.828 (72.290)	Top-5 acc 88.281 (88.959)	lr 0.00078
Train [107][1540/3239]	Time 0.336 (0.639)	Data Time 0.001 (0.027)	Loss 2.0666 (2.1768)	Entropy 0.70249 (0.70411)	Top-1 acc 76.562 (72.284)	Top-5 acc 89.844 (88.960)	lr 0.00078
Train [107][1550/3239]	Time 2.892 (0.638)	Data Time 0.001 (0.027)	Loss 2.0899 (2.1767)	Entropy 0.70249 (0.70410)	Top-1 acc 73.828 (72.287)	Top-5 acc 91.406 (88.961)	lr 0.00078
Train [107][1560/3239]	Time 0.288 (0.636)	Data Time 0.003 (0.027)	Loss 2.2490 (2.1765)	Entropy 0.70255 (0.70409)	Top-1 acc 69.922 (72.287)	Top-5 acc 85.156 (88.966)	lr 0.00077
Train [107][1570/3239]	Time 0.273 (0.635)	Data Time 0.002 (0.027)	Loss 2.2090 (2.1764)	Entropy 0.70262 (0.70408)	Top-1 acc 69.531 (72.292)	Top-5 acc 89.453 (88.972)	lr 0.00077
Train [107][1580/3239]	Time 0.254 (0.635)	Data Time 0.002 (0.027)	Loss 2.1830 (2.1761)	Entropy 0.70264 (0.70408)	Top-1 acc 75.000 (72.299)	Top-5 acc 90.234 (88.981)	lr 0.00077
Train [107][1590/3239]	Time 0.384 (0.634)	Data Time 0.001 (0.027)	Loss 2.0719 (2.1760)	Entropy 0.70256 (0.70407)	Top-1 acc 74.219 (72.301)	Top-5 acc 89.844 (88.981)	lr 0.00077
Train [107][1600/3239]	Time 0.236 (0.633)	Data Time 0.001 (0.026)	Loss 2.2498 (2.1760)	Entropy 0.70259 (0.70406)	Top-1 acc 70.312 (72.298)	Top-5 acc 88.281 (88.981)	lr 0.00077
Train [107][1610/3239]	Time 0.237 (0.632)	Data Time 0.001 (0.026)	Loss 2.1737 (2.1761)	Entropy 0.70251 (0.70405)	Top-1 acc 70.312 (72.294)	Top-5 acc 89.062 (88.978)	lr 0.00077
Train [107][1620/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.026)	Loss 2.2015 (2.1764)	Entropy 0.70248 (0.70404)	Top-1 acc 72.266 (72.280)	Top-5 acc 89.844 (88.977)	lr 0.00077
Train [107][1630/3239]	Time 0.537 (0.664)	Data Time 0.003 (0.026)	Loss 2.1085 (2.1765)	Entropy 0.70243 (0.70403)	Top-1 acc 71.484 (72.281)	Top-5 acc 89.453 (88.974)	lr 0.00077
Train [107][1640/3239]	Time 0.234 (0.663)	Data Time 0.002 (0.026)	Loss 2.2637 (2.1766)	Entropy 0.70246 (0.70402)	Top-1 acc 74.219 (72.280)	Top-5 acc 85.156 (88.969)	lr 0.00077
Train [107][1650/3239]	Time 0.269 (0.662)	Data Time 0.001 (0.026)	Loss 2.0816 (2.1766)	Entropy 0.70244 (0.70401)	Top-1 acc 73.047 (72.284)	Top-5 acc 90.234 (88.967)	lr 0.00077
Train [107][1660/3239]	Time 2.551 (0.661)	Data Time 0.001 (0.025)	Loss 2.0813 (2.1764)	Entropy 0.70244 (0.70400)	Top-1 acc 73.047 (72.290)	Top-5 acc 90.234 (88.965)	lr 0.00077
Train [107][1670/3239]	Time 0.350 (0.658)	Data Time 0.001 (0.025)	Loss 2.0933 (2.1763)	Entropy 0.70241 (0.70399)	Top-1 acc 73.047 (72.294)	Top-5 acc 90.234 (88.967)	lr 0.00077
Train [107][1680/3239]	Time 0.226 (0.657)	Data Time 0.001 (0.025)	Loss 2.1860 (2.1765)	Entropy 0.70238 (0.70398)	Top-1 acc 71.875 (72.283)	Top-5 acc 87.109 (88.967)	lr 0.00077
Train [107][1690/3239]	Time 0.257 (0.656)	Data Time 0.001 (0.025)	Loss 2.0206 (2.1765)	Entropy 0.70236 (0.70397)	Top-1 acc 75.391 (72.283)	Top-5 acc 90.234 (88.969)	lr 0.00077
Train [107][1700/3239]	Time 0.260 (0.655)	Data Time 0.001 (0.025)	Loss 2.1808 (2.1766)	Entropy 0.70231 (0.70396)	Top-1 acc 72.656 (72.282)	Top-5 acc 89.062 (88.967)	lr 0.00077
Train [107][1710/3239]	Time 0.373 (0.654)	Data Time 0.001 (0.025)	Loss 2.2377 (2.1766)	Entropy 0.70227 (0.70395)	Top-1 acc 70.703 (72.283)	Top-5 acc 91.016 (88.967)	lr 0.00077
Train [107][1720/3239]	Time 0.284 (0.654)	Data Time 0.001 (0.025)	Loss 2.1123 (2.1766)	Entropy 0.70220 (0.70394)	Top-1 acc 76.172 (72.286)	Top-5 acc 90.234 (88.967)	lr 0.00077
Train [107][1730/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.025)	Loss 2.2566 (2.1764)	Entropy 0.70211 (0.70393)	Top-1 acc 69.141 (72.286)	Top-5 acc 87.891 (88.970)	lr 0.00077
Train [107][1740/3239]	Time 0.232 (0.652)	Data Time 0.001 (0.024)	Loss 2.2783 (2.1766)	Entropy 0.70203 (0.70392)	Top-1 acc 70.703 (72.283)	Top-5 acc 85.938 (88.965)	lr 0.00077
Train [107][1750/3239]	Time 0.351 (0.651)	Data Time 0.001 (0.024)	Loss 2.1031 (2.1769)	Entropy 0.70200 (0.70391)	Top-1 acc 71.875 (72.278)	Top-5 acc 88.672 (88.959)	lr 0.00077
Train [107][1760/3239]	Time 0.228 (0.650)	Data Time 0.001 (0.024)	Loss 2.0523 (2.1768)	Entropy 0.70190 (0.70390)	Top-1 acc 74.609 (72.281)	Top-5 acc 91.797 (88.961)	lr 0.00077
Train [107][1770/3239]	Time 2.498 (0.649)	Data Time 0.001 (0.024)	Loss 2.1640 (2.1769)	Entropy 0.70190 (0.70389)	Top-1 acc 72.656 (72.279)	Top-5 acc 87.500 (88.956)	lr 0.00077
Train [107][1780/3239]	Time 0.241 (0.647)	Data Time 0.002 (0.024)	Loss 2.3072 (2.1772)	Entropy 0.70187 (0.70388)	Top-1 acc 71.094 (72.272)	Top-5 acc 86.719 (88.949)	lr 0.00077
Train [107][1790/3239]	Time 0.252 (0.646)	Data Time 0.002 (0.024)	Loss 2.1662 (2.1771)	Entropy 0.70179 (0.70386)	Top-1 acc 70.703 (72.278)	Top-5 acc 87.891 (88.949)	lr 0.00077
Train [107][1800/3239]	Time 0.247 (0.645)	Data Time 0.001 (0.024)	Loss 2.3009 (2.1772)	Entropy 0.70178 (0.70385)	Top-1 acc 67.969 (72.270)	Top-5 acc 85.938 (88.946)	lr 0.00077
Train [107][1810/3239]	Time 0.227 (0.644)	Data Time 0.001 (0.023)	Loss 2.2830 (2.1773)	Entropy 0.70180 (0.70384)	Top-1 acc 68.359 (72.260)	Top-5 acc 87.109 (88.943)	lr 0.00077
Train [107][1820/3239]	Time 0.255 (0.644)	Data Time 0.001 (0.023)	Loss 2.2615 (2.1775)	Entropy 0.70179 (0.70383)	Top-1 acc 69.531 (72.254)	Top-5 acc 86.328 (88.937)	lr 0.00077
Train [107][1830/3239]	Time 0.258 (0.643)	Data Time 0.001 (0.023)	Loss 2.2475 (2.1776)	Entropy 0.70178 (0.70382)	Top-1 acc 69.922 (72.251)	Top-5 acc 88.281 (88.935)	lr 0.00077
Train [107][1840/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.023)	Loss 2.2152 (2.1777)	Entropy 0.70178 (0.70381)	Top-1 acc 69.922 (72.250)	Top-5 acc 87.891 (88.930)	lr 0.00076
Train [107][1850/3239]	Time 0.243 (0.641)	Data Time 0.001 (0.023)	Loss 2.2376 (2.1777)	Entropy 0.70191 (0.70380)	Top-1 acc 71.094 (72.253)	Top-5 acc 89.453 (88.933)	lr 0.00076
Train [107][1860/3239]	Time 0.238 (0.640)	Data Time 0.001 (0.023)	Loss 2.1091 (2.1776)	Entropy 0.70193 (0.70379)	Top-1 acc 74.219 (72.249)	Top-5 acc 89.453 (88.934)	lr 0.00076
Train [107][1870/3239]	Time 0.229 (0.639)	Data Time 0.001 (0.023)	Loss 2.2872 (2.1777)	Entropy 0.70194 (0.70378)	Top-1 acc 72.656 (72.249)	Top-5 acc 86.719 (88.933)	lr 0.00076
Train [107][1880/3239]	Time 2.633 (0.639)	Data Time 0.002 (0.023)	Loss 2.2245 (2.1776)	Entropy 0.70194 (0.70377)	Top-1 acc 68.359 (72.248)	Top-5 acc 85.938 (88.934)	lr 0.00076
Train [107][1890/3239]	Time 0.240 (0.637)	Data Time 0.001 (0.023)	Loss 2.2498 (2.1777)	Entropy 0.70192 (0.70376)	Top-1 acc 71.484 (72.244)	Top-5 acc 87.500 (88.933)	lr 0.00076
Train [107][1900/3239]	Time 0.240 (0.636)	Data Time 0.001 (0.022)	Loss 2.0456 (2.1777)	Entropy 0.70195 (0.70375)	Top-1 acc 76.562 (72.245)	Top-5 acc 91.016 (88.933)	lr 0.00076
Train [107][1910/3239]	Time 0.231 (0.635)	Data Time 0.001 (0.022)	Loss 2.2892 (2.1778)	Entropy 0.70185 (0.70374)	Top-1 acc 72.656 (72.243)	Top-5 acc 85.547 (88.935)	lr 0.00076
Train [107][1920/3239]	Time 0.352 (0.634)	Data Time 0.001 (0.022)	Loss 2.0951 (2.1779)	Entropy 0.70180 (0.70373)	Top-1 acc 76.172 (72.243)	Top-5 acc 88.672 (88.933)	lr 0.00076
Train [107][1930/3239]	Time 0.238 (0.634)	Data Time 0.002 (0.022)	Loss 2.2280 (2.1781)	Entropy 0.70185 (0.70372)	Top-1 acc 69.922 (72.235)	Top-5 acc 87.109 (88.928)	lr 0.00076
Train [107][1940/3239]	Time 0.231 (0.633)	Data Time 0.001 (0.022)	Loss 2.0251 (2.1781)	Entropy 0.70182 (0.70371)	Top-1 acc 75.391 (72.234)	Top-5 acc 92.578 (88.929)	lr 0.00076
Train [107][1950/3239]	Time 0.232 (0.632)	Data Time 0.001 (0.022)	Loss 2.2003 (2.1780)	Entropy 0.70183 (0.70370)	Top-1 acc 72.266 (72.240)	Top-5 acc 88.281 (88.931)	lr 0.00076
Train [107][1960/3239]	Time 0.330 (0.631)	Data Time 0.001 (0.022)	Loss 2.3111 (2.1781)	Entropy 0.70179 (0.70369)	Top-1 acc 69.141 (72.237)	Top-5 acc 87.891 (88.932)	lr 0.00076
Train [107][1970/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.022)	Loss 2.0350 (2.1778)	Entropy 0.70179 (0.70368)	Top-1 acc 76.172 (72.242)	Top-5 acc 89.844 (88.938)	lr 0.00076
Train [107][1980/3239]	Time 0.270 (0.630)	Data Time 0.002 (0.022)	Loss 2.1508 (2.1779)	Entropy 0.70183 (0.70367)	Top-1 acc 69.922 (72.238)	Top-5 acc 90.625 (88.938)	lr 0.00076
Train [107][1990/3239]	Time 51.591 (0.654)	Data Time 0.001 (0.022)	Loss 2.3324 (2.1779)	Entropy 0.70183 (0.70366)	Top-1 acc 70.703 (72.241)	Top-5 acc 83.984 (88.936)	lr 0.00076
Train [107][2000/3239]	Time 0.417 (0.652)	Data Time 0.003 (0.021)	Loss 2.1336 (2.1780)	Entropy 0.70180 (0.70365)	Top-1 acc 71.484 (72.239)	Top-5 acc 88.281 (88.931)	lr 0.00076
Train [107][2010/3239]	Time 0.240 (0.652)	Data Time 0.002 (0.021)	Loss 2.1369 (2.1779)	Entropy 0.70172 (0.70364)	Top-1 acc 71.094 (72.244)	Top-5 acc 90.234 (88.936)	lr 0.00076
Train [107][2020/3239]	Time 0.245 (0.652)	Data Time 0.003 (0.021)	Loss 2.0685 (2.1777)	Entropy 0.70156 (0.70363)	Top-1 acc 74.219 (72.251)	Top-5 acc 90.625 (88.940)	lr 0.00076
Train [107][2030/3239]	Time 0.244 (0.651)	Data Time 0.001 (0.021)	Loss 2.1023 (2.1777)	Entropy 0.70156 (0.70362)	Top-1 acc 71.875 (72.254)	Top-5 acc 89.453 (88.940)	lr 0.00076
Train [107][2040/3239]	Time 0.247 (0.650)	Data Time 0.002 (0.021)	Loss 2.1857 (2.1776)	Entropy 0.70152 (0.70361)	Top-1 acc 70.703 (72.255)	Top-5 acc 89.453 (88.939)	lr 0.00076
Train [107][2050/3239]	Time 0.334 (0.649)	Data Time 0.001 (0.021)	Loss 2.2467 (2.1775)	Entropy 0.70159 (0.70360)	Top-1 acc 71.484 (72.255)	Top-5 acc 86.719 (88.945)	lr 0.00076
Train [107][2060/3239]	Time 0.240 (0.649)	Data Time 0.001 (0.021)	Loss 2.2981 (2.1774)	Entropy 0.70158 (0.70359)	Top-1 acc 69.531 (72.259)	Top-5 acc 86.719 (88.947)	lr 0.00076
Train [107][2070/3239]	Time 0.242 (0.648)	Data Time 0.001 (0.021)	Loss 2.1923 (2.1777)	Entropy 0.70162 (0.70358)	Top-1 acc 75.391 (72.252)	Top-5 acc 86.719 (88.940)	lr 0.00076
Train [107][2080/3239]	Time 0.250 (0.647)	Data Time 0.002 (0.021)	Loss 2.1291 (2.1778)	Entropy 0.70159 (0.70357)	Top-1 acc 75.000 (72.252)	Top-5 acc 91.016 (88.938)	lr 0.00076
Train [107][2090/3239]	Time 0.320 (0.646)	Data Time 0.001 (0.021)	Loss 2.1738 (2.1778)	Entropy 0.70159 (0.70356)	Top-1 acc 71.094 (72.250)	Top-5 acc 90.234 (88.939)	lr 0.00076
Train [107][2100/3239]	Time 2.635 (0.646)	Data Time 0.001 (0.020)	Loss 2.1296 (2.1780)	Entropy 0.70159 (0.70355)	Top-1 acc 76.172 (72.243)	Top-5 acc 89.844 (88.938)	lr 0.00076
Train [107][2110/3239]	Time 0.291 (0.644)	Data Time 0.001 (0.020)	Loss 2.1457 (2.1780)	Entropy 0.70146 (0.70354)	Top-1 acc 74.609 (72.244)	Top-5 acc 91.016 (88.936)	lr 0.00076
Train [107][2120/3239]	Time 0.247 (0.643)	Data Time 0.001 (0.020)	Loss 2.2654 (2.1780)	Entropy 0.70149 (0.70353)	Top-1 acc 72.656 (72.244)	Top-5 acc 86.719 (88.939)	lr 0.00076
Train [107][2130/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.020)	Loss 2.1807 (2.1780)	Entropy 0.70136 (0.70352)	Top-1 acc 74.609 (72.244)	Top-5 acc 89.062 (88.936)	lr 0.00075
Train [107][2140/3239]	Time 0.226 (0.642)	Data Time 0.001 (0.020)	Loss 2.0730 (2.1782)	Entropy 0.70128 (0.70351)	Top-1 acc 75.391 (72.238)	Top-5 acc 89.844 (88.930)	lr 0.00075
Train [107][2150/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.020)	Loss 2.0500 (2.1781)	Entropy 0.70116 (0.70350)	Top-1 acc 75.781 (72.245)	Top-5 acc 90.234 (88.932)	lr 0.00075
Train [107][2160/3239]	Time 0.224 (0.640)	Data Time 0.001 (0.020)	Loss 2.0771 (2.1779)	Entropy 0.70112 (0.70349)	Top-1 acc 75.000 (72.252)	Top-5 acc 90.625 (88.933)	lr 0.00075
Train [107][2170/3239]	Time 0.238 (0.639)	Data Time 0.001 (0.020)	Loss 2.0458 (2.1780)	Entropy 0.70106 (0.70348)	Top-1 acc 77.734 (72.251)	Top-5 acc 89.453 (88.933)	lr 0.00075
Train [107][2180/3239]	Time 0.231 (0.639)	Data Time 0.001 (0.020)	Loss 2.2897 (2.1781)	Entropy 0.70098 (0.70347)	Top-1 acc 69.922 (72.249)	Top-5 acc 86.719 (88.930)	lr 0.00075
Train [107][2190/3239]	Time 0.235 (0.638)	Data Time 0.001 (0.020)	Loss 2.0884 (2.1780)	Entropy 0.70091 (0.70346)	Top-1 acc 75.391 (72.255)	Top-5 acc 89.062 (88.932)	lr 0.00075
Train [107][2200/3239]	Time 0.253 (0.637)	Data Time 0.001 (0.020)	Loss 2.2535 (2.1781)	Entropy 0.70093 (0.70345)	Top-1 acc 70.312 (72.253)	Top-5 acc 85.938 (88.928)	lr 0.00075
Train [107][2210/3239]	Time 2.693 (0.637)	Data Time 0.001 (0.020)	Loss 2.2940 (2.1779)	Entropy 0.70093 (0.70344)	Top-1 acc 68.359 (72.254)	Top-5 acc 85.938 (88.933)	lr 0.00075
Train [107][2220/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.019)	Loss 2.1853 (2.1780)	Entropy 0.70091 (0.70342)	Top-1 acc 73.828 (72.253)	Top-5 acc 87.500 (88.933)	lr 0.00075
Train [107][2230/3239]	Time 0.247 (0.634)	Data Time 0.001 (0.019)	Loss 2.0812 (2.1779)	Entropy 0.70092 (0.70341)	Top-1 acc 74.219 (72.252)	Top-5 acc 91.016 (88.931)	lr 0.00075
Train [107][2240/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.019)	Loss 2.0323 (2.1778)	Entropy 0.70089 (0.70340)	Top-1 acc 74.609 (72.256)	Top-5 acc 92.969 (88.932)	lr 0.00075
Train [107][2250/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.019)	Loss 2.3052 (2.1778)	Entropy 0.70078 (0.70339)	Top-1 acc 68.750 (72.256)	Top-5 acc 89.062 (88.934)	lr 0.00075
Train [107][2260/3239]	Time 0.233 (0.632)	Data Time 0.001 (0.019)	Loss 2.1575 (2.1779)	Entropy 0.70083 (0.70338)	Top-1 acc 75.000 (72.257)	Top-5 acc 90.625 (88.934)	lr 0.00075
Train [107][2270/3239]	Time 0.226 (0.632)	Data Time 0.001 (0.019)	Loss 2.1619 (2.1778)	Entropy 0.70078 (0.70337)	Top-1 acc 72.656 (72.260)	Top-5 acc 88.281 (88.935)	lr 0.00075
Train [107][2280/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.019)	Loss 2.1850 (2.1777)	Entropy 0.70079 (0.70336)	Top-1 acc 73.828 (72.264)	Top-5 acc 88.672 (88.933)	lr 0.00075
Train [107][2290/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.019)	Loss 2.1192 (2.1777)	Entropy 0.70072 (0.70335)	Top-1 acc 72.266 (72.264)	Top-5 acc 91.016 (88.935)	lr 0.00075
Train [107][2300/3239]	Time 0.350 (0.630)	Data Time 0.001 (0.019)	Loss 2.2089 (2.1778)	Entropy 0.70081 (0.70333)	Top-1 acc 72.266 (72.262)	Top-5 acc 89.453 (88.937)	lr 0.00075
Train [107][2310/3239]	Time 0.250 (0.629)	Data Time 0.001 (0.019)	Loss 2.6337 (2.1780)	Entropy 0.70079 (0.70332)	Top-1 acc 61.328 (72.261)	Top-5 acc 83.594 (88.934)	lr 0.00075
Train [107][2320/3239]	Time 2.665 (0.629)	Data Time 0.001 (0.019)	Loss 2.0661 (2.1782)	Entropy 0.70079 (0.70331)	Top-1 acc 74.609 (72.257)	Top-5 acc 91.797 (88.932)	lr 0.00075
Train [107][2330/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.019)	Loss 2.1411 (2.1783)	Entropy 0.70066 (0.70330)	Top-1 acc 70.312 (72.253)	Top-5 acc 89.844 (88.930)	lr 0.00075
Train [107][2340/3239]	Time 0.324 (0.626)	Data Time 0.001 (0.019)	Loss 2.1701 (2.1782)	Entropy 0.70063 (0.70329)	Top-1 acc 71.484 (72.256)	Top-5 acc 88.672 (88.932)	lr 0.00075
Train [107][2350/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.018)	Loss 2.2172 (2.1782)	Entropy 0.70062 (0.70328)	Top-1 acc 72.266 (72.254)	Top-5 acc 86.719 (88.933)	lr 0.00075
Train [107][2360/3239]	Time 0.246 (0.649)	Data Time 0.002 (0.018)	Loss 2.1655 (2.1783)	Entropy 0.70055 (0.70327)	Top-1 acc 71.484 (72.253)	Top-5 acc 89.062 (88.932)	lr 0.00075
Train [107][2370/3239]	Time 0.251 (0.648)	Data Time 0.002 (0.018)	Loss 2.9864 (2.1785)	Entropy 0.70054 (0.70326)	Top-1 acc 51.953 (72.247)	Top-5 acc 73.828 (88.926)	lr 0.00075
Train [107][2380/3239]	Time 0.228 (0.647)	Data Time 0.001 (0.018)	Loss 2.1561 (2.1786)	Entropy 0.70051 (0.70324)	Top-1 acc 76.562 (72.245)	Top-5 acc 89.062 (88.925)	lr 0.00075
Train [107][2390/3239]	Time 0.235 (0.647)	Data Time 0.001 (0.018)	Loss 2.2509 (2.1785)	Entropy 0.70052 (0.70323)	Top-1 acc 70.312 (72.248)	Top-5 acc 88.281 (88.924)	lr 0.00075
Train [107][2400/3239]	Time 0.228 (0.646)	Data Time 0.001 (0.018)	Loss 2.2354 (2.1785)	Entropy 0.70046 (0.70322)	Top-1 acc 69.531 (72.251)	Top-5 acc 87.109 (88.925)	lr 0.00075
Train [107][2410/3239]	Time 0.216 (0.645)	Data Time 0.001 (0.018)	Loss 2.3300 (2.1784)	Entropy 0.70040 (0.70321)	Top-1 acc 68.750 (72.255)	Top-5 acc 85.547 (88.926)	lr 0.00075
Train [107][2420/3239]	Time 0.259 (0.645)	Data Time 0.001 (0.018)	Loss 2.3290 (2.1784)	Entropy 0.70047 (0.70320)	Top-1 acc 70.312 (72.256)	Top-5 acc 86.719 (88.925)	lr 0.00074
Train [107][2430/3239]	Time 2.520 (0.644)	Data Time 0.001 (0.018)	Loss 2.1202 (2.1783)	Entropy 0.70047 (0.70319)	Top-1 acc 73.438 (72.256)	Top-5 acc 89.453 (88.924)	lr 0.00074
Train [107][2440/3239]	Time 0.242 (0.642)	Data Time 0.001 (0.018)	Loss 2.2577 (2.1784)	Entropy 0.70043 (0.70318)	Top-1 acc 67.188 (72.251)	Top-5 acc 85.938 (88.924)	lr 0.00074
Train [107][2450/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.018)	Loss 2.1613 (2.1783)	Entropy 0.70036 (0.70316)	Top-1 acc 76.562 (72.257)	Top-5 acc 89.453 (88.926)	lr 0.00074
Train [107][2460/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.018)	Loss 2.2539 (2.1785)	Entropy 0.70029 (0.70315)	Top-1 acc 69.922 (72.253)	Top-5 acc 85.938 (88.921)	lr 0.00074
Train [107][2470/3239]	Time 0.253 (0.641)	Data Time 0.001 (0.018)	Loss 2.1334 (2.1784)	Entropy 0.70034 (0.70314)	Top-1 acc 71.484 (72.258)	Top-5 acc 90.234 (88.922)	lr 0.00074
Train [107][2480/3239]	Time 0.227 (0.640)	Data Time 0.002 (0.018)	Loss 2.0399 (2.1784)	Entropy 0.70029 (0.70313)	Top-1 acc 76.953 (72.259)	Top-5 acc 88.281 (88.921)	lr 0.00074
Train [107][2490/3239]	Time 0.227 (0.639)	Data Time 0.001 (0.018)	Loss 2.1399 (2.1782)	Entropy 0.70025 (0.70312)	Top-1 acc 75.781 (72.265)	Top-5 acc 89.453 (88.926)	lr 0.00074
Train [107][2500/3239]	Time 0.282 (0.639)	Data Time 0.001 (0.017)	Loss 2.0799 (2.1780)	Entropy 0.70020 (0.70311)	Top-1 acc 75.391 (72.268)	Top-5 acc 91.016 (88.928)	lr 0.00074
Train [107][2510/3239]	Time 0.344 (0.638)	Data Time 0.002 (0.017)	Loss 2.1948 (2.1780)	Entropy 0.70017 (0.70309)	Top-1 acc 72.656 (72.266)	Top-5 acc 86.328 (88.927)	lr 0.00074
Train [107][2520/3239]	Time 0.234 (0.638)	Data Time 0.001 (0.017)	Loss 2.1980 (2.1780)	Entropy 0.70021 (0.70308)	Top-1 acc 75.781 (72.268)	Top-5 acc 87.891 (88.924)	lr 0.00074
Train [107][2530/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.017)	Loss 2.2312 (2.1779)	Entropy 0.70013 (0.70307)	Top-1 acc 69.531 (72.267)	Top-5 acc 86.719 (88.924)	lr 0.00074
Train [107][2540/3239]	Time 2.498 (0.636)	Data Time 0.001 (0.017)	Loss 2.2329 (2.1778)	Entropy 0.70013 (0.70306)	Top-1 acc 73.828 (72.274)	Top-5 acc 89.062 (88.929)	lr 0.00074
Train [107][2550/3239]	Time 0.467 (0.635)	Data Time 0.001 (0.017)	Loss 2.0937 (2.1776)	Entropy 0.70008 (0.70305)	Top-1 acc 74.609 (72.280)	Top-5 acc 90.234 (88.934)	lr 0.00074
Train [107][2560/3239]	Time 0.240 (0.634)	Data Time 0.001 (0.017)	Loss 2.0306 (2.1776)	Entropy 0.70000 (0.70304)	Top-1 acc 73.047 (72.280)	Top-5 acc 92.188 (88.936)	lr 0.00074
Train [107][2570/3239]	Time 0.246 (0.634)	Data Time 0.001 (0.017)	Loss 2.1814 (2.1777)	Entropy 0.70011 (0.70303)	Top-1 acc 71.875 (72.276)	Top-5 acc 87.891 (88.933)	lr 0.00074
Train [107][2580/3239]	Time 0.238 (0.633)	Data Time 0.001 (0.017)	Loss 2.1157 (2.1776)	Entropy 0.70003 (0.70301)	Top-1 acc 73.828 (72.276)	Top-5 acc 87.891 (88.934)	lr 0.00074
Train [107][2590/3239]	Time 0.341 (0.632)	Data Time 0.001 (0.017)	Loss 2.2603 (2.1778)	Entropy 0.70014 (0.70300)	Top-1 acc 72.266 (72.271)	Top-5 acc 89.062 (88.934)	lr 0.00074
Train [107][2600/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.017)	Loss 1.9804 (2.1777)	Entropy 0.70014 (0.70299)	Top-1 acc 77.734 (72.273)	Top-5 acc 91.406 (88.936)	lr 0.00074
Train [107][2610/3239]	Time 0.229 (0.631)	Data Time 0.001 (0.017)	Loss 2.1012 (2.1776)	Entropy 0.70009 (0.70298)	Top-1 acc 73.047 (72.276)	Top-5 acc 90.234 (88.938)	lr 0.00074
Train [107][2620/3239]	Time 0.220 (0.631)	Data Time 0.001 (0.017)	Loss 2.0621 (2.1777)	Entropy 0.70002 (0.70297)	Top-1 acc 75.781 (72.270)	Top-5 acc 89.844 (88.935)	lr 0.00074
Train [107][2630/3239]	Time 0.232 (0.630)	Data Time 0.001 (0.017)	Loss 2.2203 (2.1777)	Entropy 0.69995 (0.70296)	Top-1 acc 71.875 (72.271)	Top-5 acc 87.891 (88.935)	lr 0.00074
Train [107][2640/3239]	Time 0.263 (0.630)	Data Time 0.001 (0.017)	Loss 2.1546 (2.1778)	Entropy 0.69989 (0.70295)	Top-1 acc 71.875 (72.268)	Top-5 acc 89.453 (88.933)	lr 0.00074
Train [107][2650/3239]	Time 0.226 (0.629)	Data Time 0.001 (0.017)	Loss 2.2968 (2.1778)	Entropy 0.69991 (0.70293)	Top-1 acc 71.484 (72.267)	Top-5 acc 86.328 (88.935)	lr 0.00074
Train [107][2660/3239]	Time 0.220 (0.629)	Data Time 0.001 (0.017)	Loss 2.2008 (2.1778)	Entropy 0.69996 (0.70292)	Top-1 acc 73.828 (72.267)	Top-5 acc 87.109 (88.930)	lr 0.00074
Train [107][2670/3239]	Time 0.258 (0.628)	Data Time 0.001 (0.016)	Loss 2.1149 (2.1777)	Entropy 0.69933 (0.70291)	Top-1 acc 73.047 (72.267)	Top-5 acc 91.016 (88.932)	lr 0.00074
Train [107][2680/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.016)	Loss 2.3314 (2.1779)	Entropy 0.69937 (0.70290)	Top-1 acc 71.484 (72.266)	Top-5 acc 85.938 (88.928)	lr 0.00074
Train [107][2690/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.016)	Loss 2.1622 (2.1778)	Entropy 0.69930 (0.70289)	Top-1 acc 75.391 (72.271)	Top-5 acc 86.328 (88.929)	lr 0.00074
Train [107][2700/3239]	Time 0.221 (0.626)	Data Time 0.001 (0.016)	Loss 2.2660 (2.1778)	Entropy 0.69926 (0.70287)	Top-1 acc 73.047 (72.275)	Top-5 acc 86.719 (88.926)	lr 0.00074
Train [107][2710/3239]	Time 0.275 (0.645)	Data Time 0.004 (0.016)	Loss 2.5357 (2.1779)	Entropy 0.69922 (0.70286)	Top-1 acc 61.719 (72.274)	Top-5 acc 85.156 (88.927)	lr 0.00073
Train [107][2720/3239]	Time 0.226 (0.645)	Data Time 0.002 (0.016)	Loss 2.0386 (2.1779)	Entropy 0.69915 (0.70285)	Top-1 acc 76.172 (72.276)	Top-5 acc 91.797 (88.927)	lr 0.00073
Train [107][2730/3239]	Time 0.234 (0.644)	Data Time 0.002 (0.016)	Loss 2.2010 (2.1778)	Entropy 0.69912 (0.70283)	Top-1 acc 70.703 (72.277)	Top-5 acc 89.062 (88.927)	lr 0.00073
Train [107][2740/3239]	Time 0.228 (0.644)	Data Time 0.001 (0.016)	Loss 2.1489 (2.1778)	Entropy 0.69914 (0.70282)	Top-1 acc 73.828 (72.278)	Top-5 acc 89.844 (88.927)	lr 0.00073
Train [107][2750/3239]	Time 0.242 (0.643)	Data Time 0.001 (0.016)	Loss 2.0172 (2.1778)	Entropy 0.69916 (0.70281)	Top-1 acc 76.172 (72.282)	Top-5 acc 90.625 (88.926)	lr 0.00073
Train [107][2760/3239]	Time 0.234 (0.643)	Data Time 0.001 (0.016)	Loss 2.3413 (2.1777)	Entropy 0.69920 (0.70279)	Top-1 acc 72.266 (72.286)	Top-5 acc 85.156 (88.928)	lr 0.00073
Train [107][2770/3239]	Time 0.265 (0.642)	Data Time 0.001 (0.016)	Loss 2.1487 (2.1778)	Entropy 0.69917 (0.70278)	Top-1 acc 76.172 (72.284)	Top-5 acc 88.672 (88.927)	lr 0.00073
Train [107][2780/3239]	Time 0.233 (0.642)	Data Time 0.001 (0.016)	Loss 2.0687 (2.1777)	Entropy 0.69915 (0.70277)	Top-1 acc 76.562 (72.286)	Top-5 acc 92.969 (88.929)	lr 0.00073
Train [107][2790/3239]	Time 0.216 (0.641)	Data Time 0.001 (0.016)	Loss 2.1340 (2.1777)	Entropy 0.69901 (0.70275)	Top-1 acc 75.000 (72.287)	Top-5 acc 89.062 (88.930)	lr 0.00073
Train [107][2800/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.016)	Loss 2.2530 (2.1777)	Entropy 0.69890 (0.70274)	Top-1 acc 71.094 (72.288)	Top-5 acc 89.453 (88.931)	lr 0.00073
Train [107][2810/3239]	Time 0.237 (0.640)	Data Time 0.001 (0.016)	Loss 2.1671 (2.1778)	Entropy 0.69885 (0.70273)	Top-1 acc 72.266 (72.287)	Top-5 acc 90.234 (88.931)	lr 0.00073
Train [107][2820/3239]	Time 0.256 (0.639)	Data Time 0.001 (0.016)	Loss 2.2473 (2.1778)	Entropy 0.69880 (0.70271)	Top-1 acc 71.484 (72.288)	Top-5 acc 86.719 (88.931)	lr 0.00073
Train [107][2830/3239]	Time 0.303 (0.639)	Data Time 0.002 (0.016)	Loss 2.0897 (2.1778)	Entropy 0.69873 (0.70270)	Top-1 acc 73.438 (72.288)	Top-5 acc 89.453 (88.930)	lr 0.00073
Train [107][2840/3239]	Time 0.268 (0.638)	Data Time 0.002 (0.016)	Loss 2.2763 (2.1777)	Entropy 0.69868 (0.70268)	Top-1 acc 70.312 (72.290)	Top-5 acc 85.156 (88.931)	lr 0.00073
Train [107][2850/3239]	Time 0.267 (0.638)	Data Time 0.002 (0.016)	Loss 2.1696 (2.1777)	Entropy 0.69867 (0.70267)	Top-1 acc 74.609 (72.292)	Top-5 acc 87.109 (88.930)	lr 0.00073
Train [107][2860/3239]	Time 0.236 (0.637)	Data Time 0.001 (0.015)	Loss 2.1561 (2.1777)	Entropy 0.69870 (0.70266)	Top-1 acc 71.875 (72.290)	Top-5 acc 89.844 (88.930)	lr 0.00073
Train [107][2870/3239]	Time 0.290 (0.637)	Data Time 0.001 (0.015)	Loss 2.2375 (2.1778)	Entropy 0.69883 (0.70264)	Top-1 acc 71.875 (72.289)	Top-5 acc 85.938 (88.926)	lr 0.00073
Train [107][2880/3239]	Time 0.336 (0.636)	Data Time 0.001 (0.015)	Loss 2.1720 (2.1778)	Entropy 0.69871 (0.70263)	Top-1 acc 70.703 (72.290)	Top-5 acc 88.672 (88.927)	lr 0.00073
Train [107][2890/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.015)	Loss 2.1133 (2.1776)	Entropy 0.69867 (0.70262)	Top-1 acc 73.047 (72.293)	Top-5 acc 89.453 (88.929)	lr 0.00073
Train [107][2900/3239]	Time 0.220 (0.635)	Data Time 0.001 (0.015)	Loss 2.0656 (2.1776)	Entropy 0.69864 (0.70260)	Top-1 acc 75.781 (72.291)	Top-5 acc 93.359 (88.928)	lr 0.00073
Train [107][2910/3239]	Time 0.234 (0.635)	Data Time 0.001 (0.015)	Loss 2.0185 (2.1776)	Entropy 0.69857 (0.70259)	Top-1 acc 75.781 (72.290)	Top-5 acc 92.188 (88.928)	lr 0.00073
Train [107][2920/3239]	Time 0.327 (0.634)	Data Time 0.001 (0.015)	Loss 2.1073 (2.1776)	Entropy 0.69854 (0.70257)	Top-1 acc 74.609 (72.289)	Top-5 acc 89.844 (88.927)	lr 0.00073
Train [107][2930/3239]	Time 0.225 (0.634)	Data Time 0.001 (0.015)	Loss 2.1773 (2.1776)	Entropy 0.69847 (0.70256)	Top-1 acc 71.484 (72.288)	Top-5 acc 88.281 (88.927)	lr 0.00073
Train [107][2940/3239]	Time 0.263 (0.633)	Data Time 0.001 (0.015)	Loss 2.2475 (2.1776)	Entropy 0.69851 (0.70255)	Top-1 acc 72.656 (72.288)	Top-5 acc 87.891 (88.927)	lr 0.00073
Train [107][2950/3239]	Time 0.262 (0.633)	Data Time 0.001 (0.015)	Loss 2.1652 (2.1776)	Entropy 0.69846 (0.70253)	Top-1 acc 75.000 (72.292)	Top-5 acc 89.062 (88.925)	lr 0.00073
Train [107][2960/3239]	Time 0.328 (0.632)	Data Time 0.001 (0.015)	Loss 2.1820 (2.1776)	Entropy 0.69844 (0.70252)	Top-1 acc 75.391 (72.292)	Top-5 acc 88.672 (88.925)	lr 0.00073
Train [107][2970/3239]	Time 0.242 (0.632)	Data Time 0.001 (0.015)	Loss 2.1811 (2.1776)	Entropy 0.69835 (0.70251)	Top-1 acc 72.266 (72.290)	Top-5 acc 87.891 (88.926)	lr 0.00073
Train [107][2980/3239]	Time 0.219 (0.631)	Data Time 0.001 (0.015)	Loss 2.1013 (2.1775)	Entropy 0.69830 (0.70249)	Top-1 acc 71.875 (72.291)	Top-5 acc 91.406 (88.926)	lr 0.00073
Train [107][2990/3239]	Time 0.228 (0.631)	Data Time 0.001 (0.015)	Loss 2.1118 (2.1776)	Entropy 0.69826 (0.70248)	Top-1 acc 71.484 (72.290)	Top-5 acc 90.234 (88.927)	lr 0.00073
Train [107][3000/3239]	Time 0.356 (0.630)	Data Time 0.001 (0.015)	Loss 2.2021 (2.1775)	Entropy 0.69820 (0.70246)	Top-1 acc 71.484 (72.293)	Top-5 acc 89.844 (88.928)	lr 0.00073
Train [107][3010/3239]	Time 0.254 (0.630)	Data Time 0.001 (0.015)	Loss 2.2016 (2.1777)	Entropy 0.69821 (0.70245)	Top-1 acc 71.094 (72.292)	Top-5 acc 86.719 (88.924)	lr 0.00072
Train [107][3020/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.015)	Loss 2.3023 (2.1777)	Entropy 0.69814 (0.70244)	Top-1 acc 65.625 (72.289)	Top-5 acc 85.156 (88.922)	lr 0.00072
Train [107][3030/3239]	Time 0.249 (0.629)	Data Time 0.001 (0.015)	Loss 2.1685 (2.1778)	Entropy 0.69813 (0.70242)	Top-1 acc 70.312 (72.286)	Top-5 acc 89.453 (88.922)	lr 0.00072
Train [107][3040/3239]	Time 0.584 (0.644)	Data Time 0.004 (0.015)	Loss 2.2478 (2.1779)	Entropy 0.69802 (0.70241)	Top-1 acc 70.703 (72.282)	Top-5 acc 87.109 (88.917)	lr 0.00072
Train [107][3050/3239]	Time 0.227 (0.644)	Data Time 0.002 (0.015)	Loss 2.2426 (2.1779)	Entropy 0.69803 (0.70239)	Top-1 acc 67.969 (72.278)	Top-5 acc 88.672 (88.919)	lr 0.00072
Train [107][3060/3239]	Time 0.230 (0.644)	Data Time 0.002 (0.015)	Loss 2.2518 (2.1779)	Entropy 0.69801 (0.70238)	Top-1 acc 73.828 (72.281)	Top-5 acc 89.062 (88.919)	lr 0.00072
Train [107][3070/3239]	Time 0.228 (0.643)	Data Time 0.001 (0.015)	Loss 2.1804 (2.1779)	Entropy 0.69801 (0.70236)	Top-1 acc 73.438 (72.279)	Top-5 acc 89.453 (88.919)	lr 0.00072
Train [107][3080/3239]	Time 0.313 (0.643)	Data Time 0.001 (0.014)	Loss 2.1762 (2.1781)	Entropy 0.69800 (0.70235)	Top-1 acc 72.656 (72.275)	Top-5 acc 89.844 (88.917)	lr 0.00072
Train [107][3090/3239]	Time 0.229 (0.642)	Data Time 0.001 (0.014)	Loss 2.2695 (2.1781)	Entropy 0.69801 (0.70234)	Top-1 acc 70.312 (72.271)	Top-5 acc 88.281 (88.918)	lr 0.00072
Train [107][3100/3239]	Time 0.262 (0.642)	Data Time 0.001 (0.014)	Loss 2.3643 (2.1783)	Entropy 0.69792 (0.70232)	Top-1 acc 66.797 (72.264)	Top-5 acc 86.328 (88.916)	lr 0.00072
Train [107][3110/3239]	Time 0.264 (0.641)	Data Time 0.001 (0.014)	Loss 2.1071 (2.1783)	Entropy 0.69792 (0.70231)	Top-1 acc 75.000 (72.267)	Top-5 acc 90.625 (88.915)	lr 0.00072
Train [107][3120/3239]	Time 0.229 (0.641)	Data Time 0.001 (0.014)	Loss 2.1677 (2.1782)	Entropy 0.69794 (0.70229)	Top-1 acc 73.438 (72.266)	Top-5 acc 88.281 (88.916)	lr 0.00072
Train [107][3130/3239]	Time 0.225 (0.640)	Data Time 0.001 (0.014)	Loss 2.2363 (2.1783)	Entropy 0.69797 (0.70228)	Top-1 acc 69.531 (72.265)	Top-5 acc 88.281 (88.913)	lr 0.00072
Train [107][3140/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.014)	Loss 2.2756 (2.1786)	Entropy 0.69806 (0.70227)	Top-1 acc 70.312 (72.257)	Top-5 acc 87.109 (88.909)	lr 0.00072
Train [107][3150/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.014)	Loss 2.2487 (2.1786)	Entropy 0.69801 (0.70225)	Top-1 acc 69.922 (72.252)	Top-5 acc 86.719 (88.906)	lr 0.00072
Train [107][3160/3239]	Time 0.235 (0.639)	Data Time 0.002 (0.014)	Loss 2.4213 (2.1787)	Entropy 0.69802 (0.70224)	Top-1 acc 64.844 (72.250)	Top-5 acc 83.594 (88.903)	lr 0.00072
Train [107][3170/3239]	Time 0.236 (0.638)	Data Time 0.001 (0.014)	Loss 2.1843 (2.1787)	Entropy 0.69800 (0.70223)	Top-1 acc 71.875 (72.250)	Top-5 acc 88.281 (88.899)	lr 0.00072
Train [107][3180/3239]	Time 0.234 (0.638)	Data Time 0.000 (0.014)	Loss 2.1943 (2.1787)	Entropy 0.69799 (0.70221)	Top-1 acc 72.266 (72.250)	Top-5 acc 90.625 (88.901)	lr 0.00072
Train [107][3190/3239]	Time 0.230 (0.637)	Data Time 0.000 (0.014)	Loss 2.2910 (2.1787)	Entropy 0.69811 (0.70220)	Top-1 acc 69.922 (72.249)	Top-5 acc 85.547 (88.900)	lr 0.00072
Train [107][3200/3239]	Time 0.236 (0.637)	Data Time 0.000 (0.014)	Loss 2.0172 (2.1786)	Entropy 0.69813 (0.70219)	Top-1 acc 75.000 (72.252)	Top-5 acc 90.625 (88.901)	lr 0.00072
Train [107][3210/3239]	Time 0.236 (0.636)	Data Time 0.000 (0.014)	Loss 2.1961 (2.1786)	Entropy 0.69812 (0.70217)	Top-1 acc 73.438 (72.251)	Top-5 acc 88.672 (88.903)	lr 0.00072
Train [107][3220/3239]	Time 0.228 (0.636)	Data Time 0.000 (0.014)	Loss 2.3051 (2.1785)	Entropy 0.69807 (0.70216)	Top-1 acc 67.578 (72.252)	Top-5 acc 84.766 (88.902)	lr 0.00072
Train [107][3230/3239]	Time 0.246 (0.635)	Data Time 0.000 (0.014)	Loss 2.1594 (2.1785)	Entropy 0.69807 (0.70215)	Top-1 acc 67.969 (72.249)	Top-5 acc 90.234 (88.905)	lr 0.00072
Train [107][3239/3239]	Time 2.326 (0.635)	Data Time 0.000 (0.014)	Loss 2.2165 (2.1785)	Entropy 0.69807 (0.70214)	Top-1 acc 66.667 (72.250)	Top-5 acc 90.123 (88.905)	lr 0.00072
==========Valid [107/120]	loss 1.206	top-1 acc 72.401 (72.484)	top-5 acc 89.896	Train top-1 72.250	top-5 88.905	Entropy 0.69807	Latency-None: 0.000ms	Flops: 544.27M
Train [108][0/3239]	Time 41.628 (41.628)	Data Time 39.143 (39.143)	Loss 2.2790 (2.2790)	Entropy 0.69804 (0.69804)	Top-1 acc 69.141 (69.141)	Top-5 acc 88.281 (88.281)	lr 0.00072
Train [108][10/3239]	Time 2.970 (4.362)	Data Time 0.002 (3.563)	Loss 2.1162 (2.1773)	Entropy 0.69804 (0.69804)	Top-1 acc 74.609 (72.408)	Top-5 acc 89.453 (88.530)	lr 0.00072
Train [108][20/3239]	Time 0.332 (2.410)	Data Time 0.001 (1.867)	Loss 2.2465 (2.1667)	Entropy 0.69794 (0.69799)	Top-1 acc 71.484 (73.251)	Top-5 acc 86.719 (88.951)	lr 0.00072
Train [108][30/3239]	Time 0.242 (1.790)	Data Time 0.001 (1.265)	Loss 2.2207 (2.1650)	Entropy 0.69797 (0.69798)	Top-1 acc 71.484 (73.148)	Top-5 acc 87.891 (89.088)	lr 0.00072
Train [108][40/3239]	Time 0.227 (1.472)	Data Time 0.001 (0.957)	Loss 2.2246 (2.1685)	Entropy 0.69794 (0.69797)	Top-1 acc 70.703 (72.761)	Top-5 acc 88.672 (89.053)	lr 0.00072
Train [108][50/3239]	Time 0.330 (1.279)	Data Time 0.001 (0.770)	Loss 2.1185 (2.1743)	Entropy 0.69792 (0.69797)	Top-1 acc 79.297 (72.664)	Top-5 acc 87.891 (89.078)	lr 0.00072
Train [108][60/3239]	Time 0.238 (1.151)	Data Time 0.001 (0.644)	Loss 2.1303 (2.1774)	Entropy 0.69797 (0.69796)	Top-1 acc 75.391 (72.586)	Top-5 acc 87.891 (88.960)	lr 0.00071
Train [108][70/3239]	Time 0.244 (1.057)	Data Time 0.002 (0.553)	Loss 2.3411 (2.1822)	Entropy 0.69801 (0.69797)	Top-1 acc 69.922 (72.524)	Top-5 acc 87.500 (88.875)	lr 0.00071
Train [108][80/3239]	Time 0.241 (0.988)	Data Time 0.001 (0.485)	Loss 2.0864 (2.1835)	Entropy 0.69797 (0.69797)	Top-1 acc 75.391 (72.401)	Top-5 acc 89.844 (88.976)	lr 0.00071
Train [108][90/3239]	Time 0.317 (0.933)	Data Time 0.001 (0.432)	Loss 2.0331 (2.1782)	Entropy 0.69777 (0.69796)	Top-1 acc 75.391 (72.506)	Top-5 acc 91.406 (89.032)	lr 0.00071
Train [108][100/3239]	Time 0.234 (0.888)	Data Time 0.001 (0.389)	Loss 2.2668 (2.1816)	Entropy 0.69779 (0.69795)	Top-1 acc 69.531 (72.401)	Top-5 acc 86.719 (88.966)	lr 0.00071
Train [108][110/3239]	Time 0.255 (0.851)	Data Time 0.001 (0.354)	Loss 2.1487 (2.1823)	Entropy 0.69776 (0.69793)	Top-1 acc 71.094 (72.273)	Top-5 acc 88.672 (88.911)	lr 0.00071
Train [108][120/3239]	Time 2.680 (0.821)	Data Time 0.002 (0.325)	Loss 2.1135 (2.1781)	Entropy 0.69776 (0.69792)	Top-1 acc 75.000 (72.469)	Top-5 acc 90.234 (88.946)	lr 0.00071
Train [108][130/3239]	Time 0.241 (0.776)	Data Time 0.001 (0.301)	Loss 2.0619 (2.1755)	Entropy 0.69779 (0.69791)	Top-1 acc 75.391 (72.507)	Top-5 acc 90.234 (88.967)	lr 0.00071
Train [108][140/3239]	Time 0.242 (0.756)	Data Time 0.001 (0.279)	Loss 2.1248 (2.1718)	Entropy 0.69773 (0.69789)	Top-1 acc 76.953 (72.565)	Top-5 acc 87.891 (89.057)	lr 0.00071
Train [108][150/3239]	Time 0.249 (1.125)	Data Time 0.002 (0.261)	Loss 2.2501 (2.1737)	Entropy 0.69769 (0.69788)	Top-1 acc 72.266 (72.519)	Top-5 acc 87.500 (89.029)	lr 0.00071
Train [108][160/3239]	Time 0.231 (1.085)	Data Time 0.002 (0.245)	Loss 2.1064 (2.1713)	Entropy 0.69769 (0.69787)	Top-1 acc 75.391 (72.508)	Top-5 acc 90.625 (89.053)	lr 0.00071
Train [108][170/3239]	Time 0.240 (1.050)	Data Time 0.001 (0.231)	Loss 2.1149 (2.1698)	Entropy 0.69760 (0.69786)	Top-1 acc 71.875 (72.515)	Top-5 acc 89.062 (89.051)	lr 0.00071
Train [108][180/3239]	Time 0.236 (1.019)	Data Time 0.001 (0.218)	Loss 2.3651 (2.1716)	Entropy 0.69761 (0.69784)	Top-1 acc 65.625 (72.423)	Top-5 acc 86.719 (89.006)	lr 0.00071
Train [108][190/3239]	Time 0.235 (0.991)	Data Time 0.001 (0.207)	Loss 2.2249 (2.1740)	Entropy 0.69755 (0.69783)	Top-1 acc 74.609 (72.392)	Top-5 acc 86.328 (88.975)	lr 0.00071
Train [108][200/3239]	Time 0.230 (0.965)	Data Time 0.001 (0.196)	Loss 2.2935 (2.1755)	Entropy 0.69747 (0.69781)	Top-1 acc 68.750 (72.339)	Top-5 acc 84.766 (88.938)	lr 0.00071
Train [108][210/3239]	Time 0.227 (0.943)	Data Time 0.002 (0.187)	Loss 2.0994 (2.1764)	Entropy 0.69746 (0.69780)	Top-1 acc 74.219 (72.325)	Top-5 acc 90.625 (88.907)	lr 0.00071
Train [108][220/3239]	Time 0.233 (0.922)	Data Time 0.001 (0.179)	Loss 2.1908 (2.1762)	Entropy 0.69748 (0.69778)	Top-1 acc 71.484 (72.296)	Top-5 acc 87.891 (88.928)	lr 0.00071
Train [108][230/3239]	Time 2.591 (0.903)	Data Time 0.002 (0.171)	Loss 2.2838 (2.1753)	Entropy 0.69748 (0.69777)	Top-1 acc 71.094 (72.325)	Top-5 acc 85.938 (88.931)	lr 0.00071
Train [108][240/3239]	Time 0.224 (0.875)	Data Time 0.001 (0.164)	Loss 2.1045 (2.1764)	Entropy 0.69746 (0.69776)	Top-1 acc 73.047 (72.343)	Top-5 acc 89.453 (88.912)	lr 0.00071
Train [108][250/3239]	Time 0.246 (0.859)	Data Time 0.001 (0.158)	Loss 2.1033 (2.1767)	Entropy 0.69744 (0.69774)	Top-1 acc 75.000 (72.351)	Top-5 acc 87.891 (88.890)	lr 0.00071
Train [108][260/3239]	Time 0.325 (0.845)	Data Time 0.001 (0.152)	Loss 1.9672 (2.1772)	Entropy 0.69740 (0.69773)	Top-1 acc 77.734 (72.320)	Top-5 acc 92.578 (88.875)	lr 0.00071
Train [108][270/3239]	Time 0.238 (0.832)	Data Time 0.001 (0.146)	Loss 1.9613 (2.1751)	Entropy 0.69744 (0.69772)	Top-1 acc 81.641 (72.364)	Top-5 acc 92.578 (88.910)	lr 0.00071
Train [108][280/3239]	Time 0.243 (0.820)	Data Time 0.002 (0.141)	Loss 2.1628 (2.1764)	Entropy 0.69743 (0.69771)	Top-1 acc 76.172 (72.362)	Top-5 acc 87.891 (88.876)	lr 0.00071
Train [108][290/3239]	Time 0.234 (0.808)	Data Time 0.002 (0.136)	Loss 2.1473 (2.1774)	Entropy 0.69743 (0.69770)	Top-1 acc 69.531 (72.295)	Top-5 acc 90.234 (88.867)	lr 0.00071
Train [108][300/3239]	Time 0.318 (0.798)	Data Time 0.001 (0.132)	Loss 2.2452 (2.1773)	Entropy 0.69740 (0.69769)	Top-1 acc 67.188 (72.266)	Top-5 acc 88.281 (88.881)	lr 0.00071
Train [108][310/3239]	Time 0.221 (0.788)	Data Time 0.001 (0.128)	Loss 2.1290 (2.1762)	Entropy 0.69741 (0.69768)	Top-1 acc 71.484 (72.276)	Top-5 acc 89.453 (88.911)	lr 0.00071
Train [108][320/3239]	Time 0.228 (0.778)	Data Time 0.001 (0.124)	Loss 2.1088 (2.1759)	Entropy 0.69738 (0.69767)	Top-1 acc 75.781 (72.283)	Top-5 acc 91.016 (88.906)	lr 0.00071
Train [108][330/3239]	Time 0.225 (0.769)	Data Time 0.001 (0.120)	Loss 2.0360 (2.1750)	Entropy 0.69738 (0.69766)	Top-1 acc 75.781 (72.305)	Top-5 acc 91.406 (88.909)	lr 0.00071
Train [108][340/3239]	Time 2.609 (0.760)	Data Time 0.001 (0.116)	Loss 2.1562 (2.1759)	Entropy 0.69738 (0.69766)	Top-1 acc 73.047 (72.287)	Top-5 acc 88.281 (88.893)	lr 0.00071
Train [108][350/3239]	Time 0.231 (0.746)	Data Time 0.001 (0.113)	Loss 2.2120 (2.1749)	Entropy 0.69726 (0.69764)	Top-1 acc 69.531 (72.301)	Top-5 acc 89.062 (88.923)	lr 0.00071
Train [108][360/3239]	Time 0.246 (0.738)	Data Time 0.001 (0.110)	Loss 2.0143 (2.1747)	Entropy 0.69728 (0.69763)	Top-1 acc 76.953 (72.283)	Top-5 acc 93.359 (88.926)	lr 0.00070
Train [108][370/3239]	Time 0.240 (0.731)	Data Time 0.001 (0.107)	Loss 2.2594 (2.1757)	Entropy 0.69727 (0.69762)	Top-1 acc 71.094 (72.251)	Top-5 acc 86.328 (88.897)	lr 0.00070
Train [108][380/3239]	Time 0.239 (0.725)	Data Time 0.001 (0.104)	Loss 2.2058 (2.1751)	Entropy 0.69718 (0.69761)	Top-1 acc 69.922 (72.277)	Top-5 acc 87.891 (88.911)	lr 0.00070
Train [108][390/3239]	Time 0.232 (0.719)	Data Time 0.001 (0.102)	Loss 2.3521 (2.1753)	Entropy 0.69719 (0.69760)	Top-1 acc 67.969 (72.265)	Top-5 acc 86.328 (88.906)	lr 0.00070
Train [108][400/3239]	Time 0.239 (0.713)	Data Time 0.002 (0.099)	Loss 2.0544 (2.1755)	Entropy 0.69722 (0.69759)	Top-1 acc 75.781 (72.261)	Top-5 acc 90.234 (88.898)	lr 0.00070
Train [108][410/3239]	Time 0.214 (0.707)	Data Time 0.001 (0.097)	Loss 2.0026 (2.1754)	Entropy 0.69712 (0.69758)	Top-1 acc 79.688 (72.275)	Top-5 acc 92.188 (88.898)	lr 0.00070
Train [108][420/3239]	Time 0.233 (0.702)	Data Time 0.001 (0.095)	Loss 2.2544 (2.1749)	Entropy 0.69710 (0.69757)	Top-1 acc 73.047 (72.301)	Top-5 acc 87.500 (88.894)	lr 0.00070
Train [108][430/3239]	Time 0.233 (0.697)	Data Time 0.001 (0.092)	Loss 2.1992 (2.1749)	Entropy 0.69704 (0.69756)	Top-1 acc 69.922 (72.318)	Top-5 acc 89.062 (88.888)	lr 0.00070
Train [108][440/3239]	Time 0.225 (0.692)	Data Time 0.001 (0.090)	Loss 2.4224 (2.1737)	Entropy 0.69708 (0.69755)	Top-1 acc 66.797 (72.339)	Top-5 acc 85.156 (88.922)	lr 0.00070
Train [108][450/3239]	Time 2.600 (0.687)	Data Time 0.001 (0.088)	Loss 2.1931 (2.1735)	Entropy 0.69708 (0.69754)	Top-1 acc 73.438 (72.336)	Top-5 acc 87.500 (88.915)	lr 0.00070
Train [108][460/3239]	Time 0.249 (0.678)	Data Time 0.001 (0.087)	Loss 2.1099 (2.1727)	Entropy 0.69703 (0.69753)	Top-1 acc 75.000 (72.354)	Top-5 acc 88.281 (88.938)	lr 0.00070
Train [108][470/3239]	Time 0.328 (0.674)	Data Time 0.001 (0.085)	Loss 2.1477 (2.1736)	Entropy 0.69696 (0.69751)	Top-1 acc 72.656 (72.312)	Top-5 acc 89.844 (88.914)	lr 0.00070
Train [108][480/3239]	Time 0.273 (0.670)	Data Time 0.001 (0.083)	Loss 2.2524 (2.1743)	Entropy 0.69695 (0.69750)	Top-1 acc 70.703 (72.285)	Top-5 acc 89.453 (88.894)	lr 0.00070
Train [108][490/3239]	Time 0.231 (0.666)	Data Time 0.001 (0.081)	Loss 2.2001 (2.1746)	Entropy 0.69694 (0.69749)	Top-1 acc 72.266 (72.270)	Top-5 acc 87.500 (88.887)	lr 0.00070
Train [108][500/3239]	Time 0.230 (0.662)	Data Time 0.001 (0.080)	Loss 2.0203 (2.1740)	Entropy 0.69698 (0.69748)	Top-1 acc 76.172 (72.291)	Top-5 acc 89.844 (88.903)	lr 0.00070
Train [108][510/3239]	Time 0.243 (0.769)	Data Time 0.002 (0.078)	Loss 2.0023 (2.1737)	Entropy 0.69699 (0.69747)	Top-1 acc 78.906 (72.295)	Top-5 acc 91.797 (88.912)	lr 0.00070
Train [108][520/3239]	Time 0.243 (0.764)	Data Time 0.002 (0.077)	Loss 2.0979 (2.1731)	Entropy 0.69696 (0.69746)	Top-1 acc 74.219 (72.299)	Top-5 acc 90.625 (88.923)	lr 0.00070
Train [108][530/3239]	Time 0.228 (0.759)	Data Time 0.002 (0.075)	Loss 2.1034 (2.1737)	Entropy 0.69695 (0.69745)	Top-1 acc 76.953 (72.295)	Top-5 acc 92.188 (88.918)	lr 0.00070
Train [108][540/3239]	Time 0.237 (0.754)	Data Time 0.002 (0.074)	Loss 2.1322 (2.1735)	Entropy 0.69690 (0.69744)	Top-1 acc 73.438 (72.298)	Top-5 acc 88.281 (88.914)	lr 0.00070
Train [108][550/3239]	Time 0.265 (0.749)	Data Time 0.001 (0.073)	Loss 2.2537 (2.1732)	Entropy 0.69681 (0.69743)	Top-1 acc 69.531 (72.315)	Top-5 acc 86.328 (88.913)	lr 0.00070
Train [108][560/3239]	Time 2.621 (0.744)	Data Time 0.003 (0.071)	Loss 2.3012 (2.1729)	Entropy 0.69681 (0.69742)	Top-1 acc 69.141 (72.327)	Top-5 acc 85.547 (88.916)	lr 0.00070
Train [108][570/3239]	Time 0.251 (0.735)	Data Time 0.002 (0.070)	Loss 2.1649 (2.1728)	Entropy 0.69683 (0.69741)	Top-1 acc 71.484 (72.318)	Top-5 acc 88.281 (88.908)	lr 0.00070
Train [108][580/3239]	Time 0.224 (0.731)	Data Time 0.001 (0.069)	Loss 2.1665 (2.1730)	Entropy 0.69687 (0.69740)	Top-1 acc 73.438 (72.311)	Top-5 acc 90.625 (88.911)	lr 0.00070
Train [108][590/3239]	Time 0.341 (0.727)	Data Time 0.001 (0.068)	Loss 2.2200 (2.1726)	Entropy 0.69689 (0.69739)	Top-1 acc 76.172 (72.324)	Top-5 acc 87.891 (88.904)	lr 0.00070
Train [108][600/3239]	Time 0.229 (0.723)	Data Time 0.001 (0.067)	Loss 2.0404 (2.1724)	Entropy 0.69688 (0.69739)	Top-1 acc 74.609 (72.320)	Top-5 acc 92.188 (88.916)	lr 0.00070
Train [108][610/3239]	Time 0.255 (0.719)	Data Time 0.001 (0.066)	Loss 2.1345 (2.1724)	Entropy 0.69695 (0.69738)	Top-1 acc 73.047 (72.319)	Top-5 acc 89.453 (88.917)	lr 0.00070
Train [108][620/3239]	Time 0.233 (0.715)	Data Time 0.001 (0.065)	Loss 2.1747 (2.1719)	Entropy 0.69687 (0.69737)	Top-1 acc 73.828 (72.340)	Top-5 acc 88.281 (88.922)	lr 0.00070
Train [108][630/3239]	Time 0.322 (0.712)	Data Time 0.001 (0.064)	Loss 2.1135 (2.1718)	Entropy 0.69689 (0.69736)	Top-1 acc 73.438 (72.340)	Top-5 acc 91.016 (88.920)	lr 0.00070
Train [108][640/3239]	Time 0.219 (0.708)	Data Time 0.001 (0.063)	Loss 2.2454 (2.1724)	Entropy 0.69682 (0.69735)	Top-1 acc 71.875 (72.331)	Top-5 acc 87.500 (88.905)	lr 0.00070
Train [108][650/3239]	Time 0.233 (0.705)	Data Time 0.001 (0.062)	Loss 2.0598 (2.1727)	Entropy 0.69675 (0.69735)	Top-1 acc 75.781 (72.323)	Top-5 acc 90.625 (88.896)	lr 0.00070
Train [108][660/3239]	Time 0.213 (0.702)	Data Time 0.001 (0.061)	Loss 2.0879 (2.1727)	Entropy 0.69651 (0.69734)	Top-1 acc 75.781 (72.327)	Top-5 acc 90.234 (88.904)	lr 0.00069
Train [108][670/3239]	Time 2.634 (0.698)	Data Time 0.001 (0.060)	Loss 2.2880 (2.1734)	Entropy 0.69651 (0.69732)	Top-1 acc 71.484 (72.315)	Top-5 acc 84.766 (88.887)	lr 0.00069
Train [108][680/3239]	Time 0.231 (0.692)	Data Time 0.002 (0.059)	Loss 2.1563 (2.1743)	Entropy 0.69650 (0.69731)	Top-1 acc 71.875 (72.287)	Top-5 acc 88.672 (88.874)	lr 0.00069
Train [108][690/3239]	Time 0.241 (0.689)	Data Time 0.001 (0.058)	Loss 2.2270 (2.1737)	Entropy 0.69649 (0.69730)	Top-1 acc 73.438 (72.307)	Top-5 acc 87.109 (88.884)	lr 0.00069
Train [108][700/3239]	Time 0.246 (0.686)	Data Time 0.001 (0.057)	Loss 2.0761 (2.1739)	Entropy 0.69646 (0.69729)	Top-1 acc 74.219 (72.301)	Top-5 acc 88.672 (88.874)	lr 0.00069
Train [108][710/3239]	Time 0.257 (0.683)	Data Time 0.001 (0.057)	Loss 2.2088 (2.1739)	Entropy 0.69646 (0.69728)	Top-1 acc 70.312 (72.300)	Top-5 acc 89.062 (88.882)	lr 0.00069
Train [108][720/3239]	Time 0.238 (0.680)	Data Time 0.001 (0.056)	Loss 2.1003 (2.1748)	Entropy 0.69646 (0.69727)	Top-1 acc 71.875 (72.282)	Top-5 acc 92.578 (88.872)	lr 0.00069
Train [108][730/3239]	Time 0.235 (0.677)	Data Time 0.002 (0.055)	Loss 2.6312 (2.1757)	Entropy 0.69649 (0.69725)	Top-1 acc 62.891 (72.258)	Top-5 acc 82.031 (88.864)	lr 0.00069
Train [108][740/3239]	Time 0.228 (0.674)	Data Time 0.001 (0.054)	Loss 2.0900 (2.1755)	Entropy 0.69630 (0.69724)	Top-1 acc 75.391 (72.265)	Top-5 acc 90.234 (88.863)	lr 0.00069
Train [108][750/3239]	Time 0.221 (0.672)	Data Time 0.001 (0.054)	Loss 2.2116 (2.1750)	Entropy 0.69604 (0.69723)	Top-1 acc 70.312 (72.286)	Top-5 acc 86.328 (88.874)	lr 0.00069
Train [108][760/3239]	Time 0.242 (0.669)	Data Time 0.001 (0.053)	Loss 2.1543 (2.1756)	Entropy 0.69603 (0.69721)	Top-1 acc 72.656 (72.276)	Top-5 acc 89.062 (88.863)	lr 0.00069
Train [108][770/3239]	Time 0.230 (0.667)	Data Time 0.001 (0.052)	Loss 2.2953 (2.1757)	Entropy 0.69600 (0.69720)	Top-1 acc 70.703 (72.272)	Top-5 acc 85.547 (88.865)	lr 0.00069
Train [108][780/3239]	Time 2.675 (0.665)	Data Time 0.002 (0.052)	Loss 2.1996 (2.1757)	Entropy 0.69600 (0.69718)	Top-1 acc 70.703 (72.268)	Top-5 acc 87.500 (88.868)	lr 0.00069
Train [108][790/3239]	Time 0.275 (0.659)	Data Time 0.001 (0.051)	Loss 2.0643 (2.1753)	Entropy 0.69592 (0.69717)	Top-1 acc 73.828 (72.290)	Top-5 acc 90.625 (88.877)	lr 0.00069
Train [108][800/3239]	Time 0.334 (0.657)	Data Time 0.001 (0.050)	Loss 2.0864 (2.1749)	Entropy 0.69587 (0.69715)	Top-1 acc 80.078 (72.295)	Top-5 acc 89.453 (88.888)	lr 0.00069
Train [108][810/3239]	Time 0.229 (0.655)	Data Time 0.001 (0.050)	Loss 2.2729 (2.1752)	Entropy 0.69586 (0.69714)	Top-1 acc 68.359 (72.293)	Top-5 acc 87.500 (88.886)	lr 0.00069
Train [108][820/3239]	Time 0.250 (0.653)	Data Time 0.002 (0.049)	Loss 2.2247 (2.1752)	Entropy 0.69584 (0.69712)	Top-1 acc 69.141 (72.291)	Top-5 acc 86.719 (88.888)	lr 0.00069
Train [108][830/3239]	Time 0.237 (0.651)	Data Time 0.001 (0.049)	Loss 2.0874 (2.1754)	Entropy 0.69580 (0.69710)	Top-1 acc 77.734 (72.298)	Top-5 acc 88.672 (88.880)	lr 0.00069
Train [108][840/3239]	Time 0.328 (0.649)	Data Time 0.001 (0.048)	Loss 2.2553 (2.1757)	Entropy 0.69576 (0.69709)	Top-1 acc 73.828 (72.295)	Top-5 acc 88.672 (88.874)	lr 0.00069
Train [108][850/3239]	Time 0.239 (0.647)	Data Time 0.001 (0.048)	Loss 2.2451 (2.1756)	Entropy 0.69576 (0.69707)	Top-1 acc 71.484 (72.290)	Top-5 acc 88.672 (88.879)	lr 0.00069
Train [108][860/3239]	Time 0.225 (0.645)	Data Time 0.001 (0.047)	Loss 2.1875 (2.1752)	Entropy 0.69573 (0.69706)	Top-1 acc 70.703 (72.303)	Top-5 acc 86.328 (88.879)	lr 0.00069
Train [108][870/3239]	Time 0.250 (0.705)	Data Time 0.002 (0.047)	Loss 2.2002 (2.1752)	Entropy 0.69573 (0.69704)	Top-1 acc 70.312 (72.297)	Top-5 acc 89.844 (88.884)	lr 0.00069
Train [108][880/3239]	Time 0.316 (0.703)	Data Time 0.002 (0.046)	Loss 2.1930 (2.1751)	Entropy 0.69577 (0.69703)	Top-1 acc 71.875 (72.298)	Top-5 acc 89.844 (88.889)	lr 0.00069
Train [108][890/3239]	Time 2.506 (0.700)	Data Time 0.002 (0.046)	Loss 2.1822 (2.1754)	Entropy 0.69577 (0.69701)	Top-1 acc 72.656 (72.286)	Top-5 acc 89.062 (88.878)	lr 0.00069
Train [108][900/3239]	Time 0.244 (0.695)	Data Time 0.001 (0.045)	Loss 2.2294 (2.1755)	Entropy 0.69563 (0.69700)	Top-1 acc 67.969 (72.273)	Top-5 acc 90.234 (88.880)	lr 0.00069
Train [108][910/3239]	Time 0.246 (0.693)	Data Time 0.002 (0.045)	Loss 2.0953 (2.1753)	Entropy 0.69546 (0.69698)	Top-1 acc 74.219 (72.277)	Top-5 acc 89.453 (88.879)	lr 0.00069
Train [108][920/3239]	Time 0.267 (0.690)	Data Time 0.001 (0.044)	Loss 2.1801 (2.1757)	Entropy 0.69539 (0.69696)	Top-1 acc 72.656 (72.266)	Top-5 acc 87.891 (88.872)	lr 0.00069
Train [108][930/3239]	Time 0.242 (0.688)	Data Time 0.002 (0.044)	Loss 2.2632 (2.1754)	Entropy 0.69545 (0.69695)	Top-1 acc 69.141 (72.275)	Top-5 acc 86.719 (88.874)	lr 0.00069
Train [108][940/3239]	Time 0.239 (0.686)	Data Time 0.001 (0.043)	Loss 2.2647 (2.1754)	Entropy 0.69537 (0.69693)	Top-1 acc 72.656 (72.280)	Top-5 acc 86.328 (88.885)	lr 0.00069
Train [108][950/3239]	Time 0.249 (0.684)	Data Time 0.001 (0.043)	Loss 2.2280 (2.1751)	Entropy 0.69530 (0.69691)	Top-1 acc 70.312 (72.284)	Top-5 acc 87.500 (88.897)	lr 0.00069
Train [108][960/3239]	Time 0.244 (0.682)	Data Time 0.001 (0.042)	Loss 2.1148 (2.1745)	Entropy 0.69526 (0.69690)	Top-1 acc 77.734 (72.310)	Top-5 acc 88.672 (88.905)	lr 0.00068
Train [108][970/3239]	Time 0.224 (0.680)	Data Time 0.001 (0.042)	Loss 1.9162 (2.1738)	Entropy 0.69530 (0.69688)	Top-1 acc 77.734 (72.328)	Top-5 acc 93.750 (88.916)	lr 0.00068
Train [108][980/3239]	Time 0.241 (0.678)	Data Time 0.001 (0.042)	Loss 2.1580 (2.1734)	Entropy 0.69521 (0.69686)	Top-1 acc 71.484 (72.333)	Top-5 acc 89.453 (88.923)	lr 0.00068
Train [108][990/3239]	Time 0.232 (0.676)	Data Time 0.001 (0.041)	Loss 2.0670 (2.1733)	Entropy 0.69526 (0.69685)	Top-1 acc 75.000 (72.330)	Top-5 acc 91.016 (88.924)	lr 0.00068
Train [108][1000/3239]	Time 2.727 (0.674)	Data Time 0.002 (0.041)	Loss 2.2265 (2.1733)	Entropy 0.69526 (0.69683)	Top-1 acc 70.312 (72.332)	Top-5 acc 87.891 (88.924)	lr 0.00068
Train [108][1010/3239]	Time 0.279 (0.670)	Data Time 0.001 (0.040)	Loss 2.2515 (2.1737)	Entropy 0.69529 (0.69682)	Top-1 acc 72.656 (72.332)	Top-5 acc 88.281 (88.917)	lr 0.00068
Train [108][1020/3239]	Time 0.238 (0.668)	Data Time 0.001 (0.040)	Loss 2.1579 (2.1740)	Entropy 0.69524 (0.69680)	Top-1 acc 71.484 (72.319)	Top-5 acc 89.453 (88.916)	lr 0.00068
Train [108][1030/3239]	Time 0.220 (0.666)	Data Time 0.001 (0.040)	Loss 2.1643 (2.1741)	Entropy 0.69518 (0.69679)	Top-1 acc 68.359 (72.317)	Top-5 acc 89.453 (88.914)	lr 0.00068
Train [108][1040/3239]	Time 0.240 (0.664)	Data Time 0.001 (0.039)	Loss 1.9867 (2.1739)	Entropy 0.69553 (0.69677)	Top-1 acc 78.906 (72.321)	Top-5 acc 91.406 (88.914)	lr 0.00068
Train [108][1050/3239]	Time 0.249 (0.663)	Data Time 0.001 (0.039)	Loss 2.1292 (2.1740)	Entropy 0.69563 (0.69676)	Top-1 acc 76.953 (72.310)	Top-5 acc 90.234 (88.920)	lr 0.00068
Train [108][1060/3239]	Time 0.232 (0.661)	Data Time 0.001 (0.038)	Loss 2.1623 (2.1737)	Entropy 0.69560 (0.69675)	Top-1 acc 73.047 (72.316)	Top-5 acc 90.625 (88.923)	lr 0.00068
Train [108][1070/3239]	Time 0.236 (0.659)	Data Time 0.001 (0.038)	Loss 2.1861 (2.1735)	Entropy 0.69544 (0.69674)	Top-1 acc 70.703 (72.327)	Top-5 acc 88.281 (88.923)	lr 0.00068
Train [108][1080/3239]	Time 0.271 (0.658)	Data Time 0.001 (0.038)	Loss 2.1663 (2.1732)	Entropy 0.69541 (0.69673)	Top-1 acc 70.312 (72.330)	Top-5 acc 90.625 (88.930)	lr 0.00068
Train [108][1090/3239]	Time 0.307 (0.656)	Data Time 0.001 (0.037)	Loss 2.2158 (2.1730)	Entropy 0.69539 (0.69671)	Top-1 acc 69.141 (72.330)	Top-5 acc 88.281 (88.933)	lr 0.00068
Train [108][1100/3239]	Time 0.250 (0.654)	Data Time 0.001 (0.037)	Loss 2.1207 (2.1729)	Entropy 0.69534 (0.69670)	Top-1 acc 75.781 (72.330)	Top-5 acc 89.453 (88.933)	lr 0.00068
Train [108][1110/3239]	Time 2.605 (0.653)	Data Time 0.001 (0.037)	Loss 2.3176 (2.1730)	Entropy 0.69534 (0.69669)	Top-1 acc 70.312 (72.324)	Top-5 acc 87.109 (88.930)	lr 0.00068
Train [108][1120/3239]	Time 0.229 (0.649)	Data Time 0.001 (0.037)	Loss 2.2546 (2.1730)	Entropy 0.69529 (0.69668)	Top-1 acc 67.969 (72.315)	Top-5 acc 88.281 (88.934)	lr 0.00068
Train [108][1130/3239]	Time 0.254 (0.648)	Data Time 0.001 (0.036)	Loss 2.1197 (2.1730)	Entropy 0.69532 (0.69667)	Top-1 acc 73.047 (72.314)	Top-5 acc 91.406 (88.942)	lr 0.00068
Train [108][1140/3239]	Time 0.230 (0.646)	Data Time 0.001 (0.036)	Loss 2.2560 (2.1727)	Entropy 0.69517 (0.69665)	Top-1 acc 69.531 (72.315)	Top-5 acc 87.109 (88.945)	lr 0.00068
Train [108][1150/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.036)	Loss 2.1219 (2.1727)	Entropy 0.69520 (0.69664)	Top-1 acc 71.094 (72.312)	Top-5 acc 87.500 (88.943)	lr 0.00068
Train [108][1160/3239]	Time 0.246 (0.643)	Data Time 0.002 (0.035)	Loss 2.0696 (2.1724)	Entropy 0.69512 (0.69663)	Top-1 acc 76.172 (72.313)	Top-5 acc 92.969 (88.951)	lr 0.00068
Train [108][1170/3239]	Time 0.224 (0.642)	Data Time 0.001 (0.035)	Loss 2.0985 (2.1728)	Entropy 0.69504 (0.69661)	Top-1 acc 72.656 (72.299)	Top-5 acc 91.797 (88.948)	lr 0.00068
Train [108][1180/3239]	Time 0.227 (0.641)	Data Time 0.001 (0.035)	Loss 2.0610 (2.1725)	Entropy 0.69505 (0.69660)	Top-1 acc 75.391 (72.312)	Top-5 acc 90.625 (88.958)	lr 0.00068
Train [108][1190/3239]	Time 0.233 (0.639)	Data Time 0.001 (0.034)	Loss 2.0410 (2.1721)	Entropy 0.69501 (0.69659)	Top-1 acc 75.000 (72.328)	Top-5 acc 92.188 (88.965)	lr 0.00068
Train [108][1200/3239]	Time 0.243 (0.638)	Data Time 0.001 (0.034)	Loss 2.1511 (2.1723)	Entropy 0.69497 (0.69657)	Top-1 acc 72.656 (72.328)	Top-5 acc 90.625 (88.963)	lr 0.00068
Train [108][1210/3239]	Time 0.244 (0.637)	Data Time 0.002 (0.034)	Loss 2.1855 (2.1725)	Entropy 0.69487 (0.69656)	Top-1 acc 74.219 (72.315)	Top-5 acc 88.281 (88.961)	lr 0.00068
Train [108][1220/3239]	Time 2.644 (0.636)	Data Time 0.001 (0.034)	Loss 2.0662 (2.1720)	Entropy 0.69487 (0.69655)	Top-1 acc 74.219 (72.323)	Top-5 acc 88.672 (88.971)	lr 0.00068
Train [108][1230/3239]	Time 0.232 (0.633)	Data Time 0.001 (0.033)	Loss 2.2419 (2.1718)	Entropy 0.69495 (0.69653)	Top-1 acc 73.047 (72.334)	Top-5 acc 85.938 (88.975)	lr 0.00068
Train [108][1240/3239]	Time 0.240 (0.675)	Data Time 0.002 (0.033)	Loss 2.3019 (2.1715)	Entropy 0.69492 (0.69652)	Top-1 acc 67.578 (72.344)	Top-5 acc 88.281 (88.982)	lr 0.00068
Train [108][1250/3239]	Time 0.238 (0.674)	Data Time 0.002 (0.033)	Loss 2.2174 (2.1715)	Entropy 0.69481 (0.69651)	Top-1 acc 70.703 (72.347)	Top-5 acc 88.281 (88.983)	lr 0.00068
Train [108][1260/3239]	Time 0.235 (0.672)	Data Time 0.001 (0.033)	Loss 2.0748 (2.1716)	Entropy 0.69474 (0.69649)	Top-1 acc 77.344 (72.342)	Top-5 acc 92.188 (88.986)	lr 0.00068
Train [108][1270/3239]	Time 0.226 (0.671)	Data Time 0.001 (0.032)	Loss 2.3532 (2.1715)	Entropy 0.69469 (0.69648)	Top-1 acc 69.141 (72.341)	Top-5 acc 87.109 (88.986)	lr 0.00067
Train [108][1280/3239]	Time 0.228 (0.669)	Data Time 0.001 (0.032)	Loss 2.3065 (2.1718)	Entropy 0.69467 (0.69647)	Top-1 acc 69.922 (72.335)	Top-5 acc 89.453 (88.983)	lr 0.00067
Train [108][1290/3239]	Time 0.230 (0.668)	Data Time 0.001 (0.032)	Loss 2.1826 (2.1720)	Entropy 0.69463 (0.69645)	Top-1 acc 73.828 (72.333)	Top-5 acc 89.062 (88.976)	lr 0.00067
Train [108][1300/3239]	Time 0.240 (0.666)	Data Time 0.001 (0.032)	Loss 2.1503 (2.1721)	Entropy 0.69452 (0.69644)	Top-1 acc 75.391 (72.329)	Top-5 acc 89.453 (88.978)	lr 0.00067
Train [108][1310/3239]	Time 0.248 (0.665)	Data Time 0.001 (0.031)	Loss 2.1289 (2.1717)	Entropy 0.69456 (0.69642)	Top-1 acc 72.656 (72.334)	Top-5 acc 90.234 (88.982)	lr 0.00067
Train [108][1320/3239]	Time 0.219 (0.663)	Data Time 0.001 (0.031)	Loss 2.2129 (2.1715)	Entropy 0.69459 (0.69641)	Top-1 acc 71.094 (72.334)	Top-5 acc 88.672 (88.990)	lr 0.00067
Train [108][1330/3239]	Time 2.674 (0.662)	Data Time 0.001 (0.031)	Loss 2.1403 (2.1712)	Entropy 0.69459 (0.69640)	Top-1 acc 71.094 (72.338)	Top-5 acc 89.453 (88.997)	lr 0.00067
Train [108][1340/3239]	Time 0.339 (0.659)	Data Time 0.001 (0.031)	Loss 2.2423 (2.1711)	Entropy 0.69454 (0.69638)	Top-1 acc 69.141 (72.340)	Top-5 acc 88.281 (88.998)	lr 0.00067
Train [108][1350/3239]	Time 0.233 (0.658)	Data Time 0.001 (0.031)	Loss 2.1822 (2.1714)	Entropy 0.69457 (0.69637)	Top-1 acc 73.047 (72.333)	Top-5 acc 89.453 (88.993)	lr 0.00067
Train [108][1360/3239]	Time 0.238 (0.656)	Data Time 0.001 (0.030)	Loss 2.1981 (2.1715)	Entropy 0.69450 (0.69635)	Top-1 acc 71.484 (72.324)	Top-5 acc 87.891 (88.989)	lr 0.00067
Train [108][1370/3239]	Time 0.237 (0.655)	Data Time 0.001 (0.030)	Loss 2.3188 (2.1715)	Entropy 0.69446 (0.69634)	Top-1 acc 71.484 (72.326)	Top-5 acc 87.500 (88.988)	lr 0.00067
Train [108][1380/3239]	Time 0.239 (0.654)	Data Time 0.001 (0.030)	Loss 2.3110 (2.1717)	Entropy 0.69447 (0.69633)	Top-1 acc 64.844 (72.316)	Top-5 acc 87.109 (88.983)	lr 0.00067
Train [108][1390/3239]	Time 0.233 (0.652)	Data Time 0.001 (0.030)	Loss 2.3008 (2.1716)	Entropy 0.69454 (0.69631)	Top-1 acc 64.453 (72.311)	Top-5 acc 85.547 (88.986)	lr 0.00067
Train [108][1400/3239]	Time 0.234 (0.651)	Data Time 0.001 (0.030)	Loss 2.1963 (2.1716)	Entropy 0.69449 (0.69630)	Top-1 acc 71.484 (72.316)	Top-5 acc 89.062 (88.985)	lr 0.00067
Train [108][1410/3239]	Time 0.225 (0.650)	Data Time 0.001 (0.029)	Loss 2.1806 (2.1719)	Entropy 0.69453 (0.69629)	Top-1 acc 73.828 (72.304)	Top-5 acc 89.062 (88.977)	lr 0.00067
Train [108][1420/3239]	Time 0.223 (0.649)	Data Time 0.001 (0.029)	Loss 2.1411 (2.1722)	Entropy 0.69450 (0.69628)	Top-1 acc 72.266 (72.299)	Top-5 acc 91.016 (88.973)	lr 0.00067
Train [108][1430/3239]	Time 0.217 (0.648)	Data Time 0.001 (0.029)	Loss 2.2455 (2.1724)	Entropy 0.69493 (0.69626)	Top-1 acc 73.438 (72.300)	Top-5 acc 87.500 (88.966)	lr 0.00067
Train [108][1440/3239]	Time 2.588 (0.646)	Data Time 0.001 (0.029)	Loss 2.1041 (2.1722)	Entropy 0.69493 (0.69625)	Top-1 acc 74.609 (72.306)	Top-5 acc 89.844 (88.971)	lr 0.00067
Train [108][1450/3239]	Time 0.248 (0.644)	Data Time 0.001 (0.029)	Loss 2.3043 (2.1724)	Entropy 0.69495 (0.69625)	Top-1 acc 67.969 (72.303)	Top-5 acc 86.719 (88.972)	lr 0.00067
Train [108][1460/3239]	Time 0.233 (0.642)	Data Time 0.001 (0.028)	Loss 2.0843 (2.1721)	Entropy 0.69495 (0.69624)	Top-1 acc 76.562 (72.306)	Top-5 acc 91.016 (88.978)	lr 0.00067
Train [108][1470/3239]	Time 0.243 (0.641)	Data Time 0.002 (0.028)	Loss 2.0598 (2.1718)	Entropy 0.69494 (0.69623)	Top-1 acc 75.391 (72.310)	Top-5 acc 90.625 (88.983)	lr 0.00067
Train [108][1480/3239]	Time 0.241 (0.640)	Data Time 0.001 (0.028)	Loss 2.2003 (2.1717)	Entropy 0.69492 (0.69622)	Top-1 acc 70.703 (72.316)	Top-5 acc 88.672 (88.984)	lr 0.00067
Train [108][1490/3239]	Time 0.229 (0.639)	Data Time 0.001 (0.028)	Loss 2.1879 (2.1716)	Entropy 0.69491 (0.69621)	Top-1 acc 72.266 (72.328)	Top-5 acc 89.453 (88.987)	lr 0.00067
Train [108][1500/3239]	Time 0.252 (0.638)	Data Time 0.001 (0.028)	Loss 2.1380 (2.1713)	Entropy 0.69490 (0.69620)	Top-1 acc 74.219 (72.336)	Top-5 acc 90.625 (88.994)	lr 0.00067
Train [108][1510/3239]	Time 0.240 (0.637)	Data Time 0.001 (0.027)	Loss 2.3299 (2.1716)	Entropy 0.69479 (0.69619)	Top-1 acc 68.750 (72.331)	Top-5 acc 89.062 (88.992)	lr 0.00067
Train [108][1520/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.027)	Loss 2.1515 (2.1716)	Entropy 0.69470 (0.69618)	Top-1 acc 73.828 (72.334)	Top-5 acc 86.328 (88.990)	lr 0.00067
Train [108][1530/3239]	Time 0.236 (0.635)	Data Time 0.001 (0.027)	Loss 1.9951 (2.1710)	Entropy 0.69459 (0.69617)	Top-1 acc 80.859 (72.349)	Top-5 acc 90.234 (89.001)	lr 0.00067
Train [108][1540/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.027)	Loss 2.0720 (2.1711)	Entropy 0.69462 (0.69616)	Top-1 acc 77.734 (72.350)	Top-5 acc 90.625 (88.999)	lr 0.00067
Train [108][1550/3239]	Time 2.757 (0.633)	Data Time 0.002 (0.027)	Loss 2.1397 (2.1711)	Entropy 0.69462 (0.69615)	Top-1 acc 73.828 (72.347)	Top-5 acc 89.062 (89.001)	lr 0.00067
Train [108][1560/3239]	Time 0.268 (0.631)	Data Time 0.001 (0.027)	Loss 2.0610 (2.1714)	Entropy 0.69464 (0.69614)	Top-1 acc 76.953 (72.343)	Top-5 acc 91.406 (88.993)	lr 0.00067
Train [108][1570/3239]	Time 0.226 (0.630)	Data Time 0.001 (0.026)	Loss 2.2179 (2.1714)	Entropy 0.69461 (0.69613)	Top-1 acc 73.828 (72.348)	Top-5 acc 87.500 (88.992)	lr 0.00066
Train [108][1580/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.026)	Loss 2.1752 (2.1715)	Entropy 0.69452 (0.69612)	Top-1 acc 67.578 (72.344)	Top-5 acc 89.844 (88.991)	lr 0.00066
Train [108][1590/3239]	Time 0.342 (0.628)	Data Time 0.002 (0.026)	Loss 2.0481 (2.1716)	Entropy 0.69442 (0.69611)	Top-1 acc 77.344 (72.348)	Top-5 acc 89.844 (88.992)	lr 0.00066
Train [108][1600/3239]	Time 0.231 (0.662)	Data Time 0.002 (0.026)	Loss 2.3186 (2.1719)	Entropy 0.69436 (0.69610)	Top-1 acc 70.312 (72.343)	Top-5 acc 85.547 (88.985)	lr 0.00066
Train [108][1610/3239]	Time 0.237 (0.661)	Data Time 0.002 (0.026)	Loss 2.2418 (2.1720)	Entropy 0.69431 (0.69609)	Top-1 acc 74.219 (72.341)	Top-5 acc 86.328 (88.988)	lr 0.00066
Train [108][1620/3239]	Time 0.238 (0.659)	Data Time 0.006 (0.026)	Loss 2.2052 (2.1720)	Entropy 0.69433 (0.69608)	Top-1 acc 73.047 (72.344)	Top-5 acc 89.453 (88.988)	lr 0.00066
Train [108][1630/3239]	Time 0.323 (0.658)	Data Time 0.001 (0.026)	Loss 2.1111 (2.1718)	Entropy 0.69435 (0.69607)	Top-1 acc 73.438 (72.355)	Top-5 acc 91.016 (88.995)	lr 0.00066
Train [108][1640/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.025)	Loss 2.0307 (2.1714)	Entropy 0.69429 (0.69606)	Top-1 acc 80.078 (72.368)	Top-5 acc 90.234 (88.997)	lr 0.00066
Train [108][1650/3239]	Time 0.233 (0.656)	Data Time 0.001 (0.025)	Loss 2.2478 (2.1716)	Entropy 0.69433 (0.69605)	Top-1 acc 69.531 (72.367)	Top-5 acc 86.328 (88.994)	lr 0.00066
Train [108][1660/3239]	Time 2.549 (0.655)	Data Time 0.001 (0.025)	Loss 2.1584 (2.1717)	Entropy 0.69433 (0.69604)	Top-1 acc 71.094 (72.358)	Top-5 acc 88.281 (88.991)	lr 0.00066
Train [108][1670/3239]	Time 0.235 (0.652)	Data Time 0.002 (0.025)	Loss 2.3089 (2.1717)	Entropy 0.69425 (0.69603)	Top-1 acc 66.797 (72.354)	Top-5 acc 87.891 (88.995)	lr 0.00066
Train [108][1680/3239]	Time 0.236 (0.651)	Data Time 0.002 (0.025)	Loss 2.0588 (2.1717)	Entropy 0.69421 (0.69602)	Top-1 acc 74.219 (72.354)	Top-5 acc 90.625 (88.995)	lr 0.00066
Train [108][1690/3239]	Time 0.248 (0.651)	Data Time 0.001 (0.025)	Loss 2.1858 (2.1717)	Entropy 0.69420 (0.69601)	Top-1 acc 70.703 (72.351)	Top-5 acc 88.672 (88.996)	lr 0.00066
Train [108][1700/3239]	Time 0.270 (0.650)	Data Time 0.002 (0.025)	Loss 2.0151 (2.1717)	Entropy 0.69423 (0.69600)	Top-1 acc 73.438 (72.346)	Top-5 acc 90.234 (88.997)	lr 0.00066
Train [108][1710/3239]	Time 0.231 (0.649)	Data Time 0.001 (0.024)	Loss 2.2335 (2.1718)	Entropy 0.69423 (0.69599)	Top-1 acc 70.312 (72.346)	Top-5 acc 88.281 (88.996)	lr 0.00066
Train [108][1720/3239]	Time 0.250 (0.648)	Data Time 0.001 (0.024)	Loss 2.1233 (2.1721)	Entropy 0.69425 (0.69598)	Top-1 acc 70.703 (72.332)	Top-5 acc 90.234 (88.992)	lr 0.00066
Train [108][1730/3239]	Time 0.242 (0.647)	Data Time 0.001 (0.024)	Loss 2.2430 (2.1722)	Entropy 0.69425 (0.69597)	Top-1 acc 70.703 (72.335)	Top-5 acc 88.672 (88.991)	lr 0.00066
Train [108][1740/3239]	Time 0.244 (0.646)	Data Time 0.001 (0.024)	Loss 2.2370 (2.1721)	Entropy 0.69420 (0.69596)	Top-1 acc 72.266 (72.335)	Top-5 acc 86.328 (88.993)	lr 0.00066
Train [108][1750/3239]	Time 0.245 (0.645)	Data Time 0.001 (0.024)	Loss 2.1204 (2.1721)	Entropy 0.69414 (0.69595)	Top-1 acc 72.266 (72.335)	Top-5 acc 89.062 (88.992)	lr 0.00066
Train [108][1760/3239]	Time 0.316 (0.644)	Data Time 0.001 (0.024)	Loss 2.1887 (2.1723)	Entropy 0.69408 (0.69594)	Top-1 acc 73.828 (72.335)	Top-5 acc 87.891 (88.990)	lr 0.00066
Train [108][1770/3239]	Time 2.633 (0.643)	Data Time 0.001 (0.024)	Loss 2.1675 (2.1725)	Entropy 0.69408 (0.69592)	Top-1 acc 71.875 (72.327)	Top-5 acc 88.672 (88.984)	lr 0.00066
Train [108][1780/3239]	Time 0.260 (0.641)	Data Time 0.001 (0.024)	Loss 2.2468 (2.1722)	Entropy 0.69398 (0.69591)	Top-1 acc 71.875 (72.335)	Top-5 acc 85.938 (88.989)	lr 0.00066
Train [108][1790/3239]	Time 0.243 (0.641)	Data Time 0.001 (0.023)	Loss 2.1512 (2.1722)	Entropy 0.69388 (0.69590)	Top-1 acc 73.047 (72.332)	Top-5 acc 85.938 (88.991)	lr 0.00066
Train [108][1800/3239]	Time 0.362 (0.640)	Data Time 0.001 (0.023)	Loss 2.2222 (2.1724)	Entropy 0.69381 (0.69589)	Top-1 acc 69.922 (72.324)	Top-5 acc 86.719 (88.987)	lr 0.00066
Train [108][1810/3239]	Time 0.245 (0.639)	Data Time 0.001 (0.023)	Loss 2.4137 (2.1725)	Entropy 0.69382 (0.69588)	Top-1 acc 70.703 (72.328)	Top-5 acc 86.719 (88.985)	lr 0.00066
Train [108][1820/3239]	Time 0.236 (0.638)	Data Time 0.001 (0.023)	Loss 2.3104 (2.1726)	Entropy 0.69374 (0.69587)	Top-1 acc 67.969 (72.325)	Top-5 acc 86.719 (88.983)	lr 0.00066
Train [108][1830/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.023)	Loss 2.2278 (2.1726)	Entropy 0.69375 (0.69586)	Top-1 acc 74.609 (72.329)	Top-5 acc 89.453 (88.982)	lr 0.00066
Train [108][1840/3239]	Time 0.343 (0.636)	Data Time 0.001 (0.023)	Loss 2.1523 (2.1726)	Entropy 0.69382 (0.69584)	Top-1 acc 71.094 (72.331)	Top-5 acc 89.844 (88.984)	lr 0.00066
Train [108][1850/3239]	Time 0.222 (0.636)	Data Time 0.001 (0.023)	Loss 2.3194 (2.1726)	Entropy 0.69382 (0.69583)	Top-1 acc 70.312 (72.332)	Top-5 acc 83.984 (88.980)	lr 0.00066
Train [108][1860/3239]	Time 0.231 (0.635)	Data Time 0.001 (0.023)	Loss 2.2213 (2.1726)	Entropy 0.69382 (0.69582)	Top-1 acc 71.484 (72.330)	Top-5 acc 87.500 (88.979)	lr 0.00066
Train [108][1870/3239]	Time 0.230 (0.634)	Data Time 0.001 (0.022)	Loss 2.0111 (2.1725)	Entropy 0.69379 (0.69581)	Top-1 acc 75.391 (72.331)	Top-5 acc 90.234 (88.981)	lr 0.00066
Train [108][1880/3239]	Time 2.768 (0.633)	Data Time 0.004 (0.022)	Loss 2.1752 (2.1725)	Entropy 0.69379 (0.69580)	Top-1 acc 73.438 (72.331)	Top-5 acc 88.281 (88.981)	lr 0.00065
Train [108][1890/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.022)	Loss 2.1938 (2.1723)	Entropy 0.69376 (0.69579)	Top-1 acc 69.922 (72.332)	Top-5 acc 85.547 (88.982)	lr 0.00065
Train [108][1900/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.022)	Loss 2.2218 (2.1723)	Entropy 0.69363 (0.69578)	Top-1 acc 72.656 (72.337)	Top-5 acc 88.281 (88.981)	lr 0.00065
Train [108][1910/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.022)	Loss 2.1009 (2.1721)	Entropy 0.69356 (0.69577)	Top-1 acc 76.562 (72.344)	Top-5 acc 91.016 (88.984)	lr 0.00065
Train [108][1920/3239]	Time 0.242 (0.629)	Data Time 0.001 (0.022)	Loss 2.4060 (2.1722)	Entropy 0.69346 (0.69576)	Top-1 acc 67.188 (72.343)	Top-5 acc 83.203 (88.982)	lr 0.00065
Train [108][1930/3239]	Time 0.243 (0.628)	Data Time 0.001 (0.022)	Loss 2.1723 (2.1723)	Entropy 0.69342 (0.69574)	Top-1 acc 73.047 (72.338)	Top-5 acc 88.281 (88.982)	lr 0.00065
Train [108][1940/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.022)	Loss 2.3753 (2.1726)	Entropy 0.69342 (0.69573)	Top-1 acc 67.188 (72.329)	Top-5 acc 86.719 (88.980)	lr 0.00065
Train [108][1950/3239]	Time 0.238 (0.627)	Data Time 0.001 (0.022)	Loss 2.0905 (2.1725)	Entropy 0.69341 (0.69572)	Top-1 acc 73.438 (72.335)	Top-5 acc 90.625 (88.981)	lr 0.00065
Train [108][1960/3239]	Time 0.230 (0.654)	Data Time 0.002 (0.022)	Loss 2.0674 (2.1724)	Entropy 0.69347 (0.69571)	Top-1 acc 71.484 (72.336)	Top-5 acc 89.844 (88.982)	lr 0.00065
Train [108][1970/3239]	Time 0.247 (0.654)	Data Time 0.002 (0.021)	Loss 2.1839 (2.1725)	Entropy 0.69345 (0.69570)	Top-1 acc 72.656 (72.335)	Top-5 acc 90.625 (88.981)	lr 0.00065
Train [108][1980/3239]	Time 0.242 (0.653)	Data Time 0.002 (0.021)	Loss 2.0604 (2.1723)	Entropy 0.69342 (0.69569)	Top-1 acc 75.781 (72.339)	Top-5 acc 90.234 (88.984)	lr 0.00065
Train [108][1990/3239]	Time 2.590 (0.652)	Data Time 0.002 (0.021)	Loss 2.0826 (2.1720)	Entropy 0.69342 (0.69567)	Top-1 acc 72.656 (72.344)	Top-5 acc 91.406 (88.986)	lr 0.00065
Train [108][2000/3239]	Time 0.247 (0.650)	Data Time 0.002 (0.021)	Loss 2.2123 (2.1718)	Entropy 0.69338 (0.69566)	Top-1 acc 73.828 (72.350)	Top-5 acc 87.500 (88.990)	lr 0.00065
Train [108][2010/3239]	Time 0.234 (0.649)	Data Time 0.001 (0.021)	Loss 2.1117 (2.1717)	Entropy 0.69336 (0.69565)	Top-1 acc 73.828 (72.350)	Top-5 acc 90.234 (88.989)	lr 0.00065
Train [108][2020/3239]	Time 0.252 (0.649)	Data Time 0.003 (0.021)	Loss 2.2737 (2.1718)	Entropy 0.69335 (0.69564)	Top-1 acc 68.359 (72.347)	Top-5 acc 89.453 (88.990)	lr 0.00065
Train [108][2030/3239]	Time 0.239 (0.648)	Data Time 0.002 (0.021)	Loss 2.1631 (2.1717)	Entropy 0.69336 (0.69563)	Top-1 acc 73.438 (72.353)	Top-5 acc 89.844 (88.990)	lr 0.00065
Train [108][2040/3239]	Time 0.232 (0.647)	Data Time 0.001 (0.021)	Loss 2.2557 (2.1717)	Entropy 0.69332 (0.69562)	Top-1 acc 71.094 (72.352)	Top-5 acc 86.719 (88.989)	lr 0.00065
Train [108][2050/3239]	Time 0.319 (0.646)	Data Time 0.001 (0.021)	Loss 2.0957 (2.1718)	Entropy 0.69333 (0.69561)	Top-1 acc 74.219 (72.347)	Top-5 acc 91.797 (88.990)	lr 0.00065
Train [108][2060/3239]	Time 0.236 (0.645)	Data Time 0.001 (0.021)	Loss 2.2122 (2.1717)	Entropy 0.69331 (0.69560)	Top-1 acc 70.312 (72.350)	Top-5 acc 87.109 (88.992)	lr 0.00065
Train [108][2070/3239]	Time 0.235 (0.645)	Data Time 0.003 (0.020)	Loss 2.2708 (2.1718)	Entropy 0.69326 (0.69558)	Top-1 acc 73.438 (72.353)	Top-5 acc 85.156 (88.990)	lr 0.00065
Train [108][2080/3239]	Time 0.234 (0.644)	Data Time 0.001 (0.020)	Loss 2.1731 (2.1717)	Entropy 0.69329 (0.69557)	Top-1 acc 69.922 (72.354)	Top-5 acc 88.672 (88.991)	lr 0.00065
Train [108][2090/3239]	Time 0.317 (0.643)	Data Time 0.001 (0.020)	Loss 2.0287 (2.1717)	Entropy 0.69325 (0.69556)	Top-1 acc 79.688 (72.358)	Top-5 acc 90.234 (88.992)	lr 0.00065
Train [108][2100/3239]	Time 2.498 (0.642)	Data Time 0.001 (0.020)	Loss 2.3850 (2.1719)	Entropy 0.69325 (0.69555)	Top-1 acc 63.281 (72.347)	Top-5 acc 86.328 (88.990)	lr 0.00065
Train [108][2110/3239]	Time 0.232 (0.641)	Data Time 0.001 (0.020)	Loss 2.2355 (2.1720)	Entropy 0.69310 (0.69554)	Top-1 acc 70.312 (72.341)	Top-5 acc 87.500 (88.990)	lr 0.00065
Train [108][2120/3239]	Time 0.237 (0.640)	Data Time 0.001 (0.020)	Loss 2.2624 (2.1719)	Entropy 0.69312 (0.69553)	Top-1 acc 74.609 (72.345)	Top-5 acc 85.938 (88.989)	lr 0.00065
Train [108][2130/3239]	Time 0.326 (0.639)	Data Time 0.001 (0.020)	Loss 2.1262 (2.1721)	Entropy 0.69311 (0.69552)	Top-1 acc 75.391 (72.341)	Top-5 acc 90.625 (88.986)	lr 0.00065
Train [108][2140/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.020)	Loss 2.1499 (2.1720)	Entropy 0.69317 (0.69551)	Top-1 acc 72.266 (72.343)	Top-5 acc 89.453 (88.987)	lr 0.00065
Train [108][2150/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.020)	Loss 2.1166 (2.1719)	Entropy 0.69303 (0.69549)	Top-1 acc 76.953 (72.345)	Top-5 acc 90.234 (88.990)	lr 0.00065
Train [108][2160/3239]	Time 0.220 (0.637)	Data Time 0.001 (0.020)	Loss 2.2458 (2.1717)	Entropy 0.69293 (0.69548)	Top-1 acc 71.875 (72.347)	Top-5 acc 85.938 (88.989)	lr 0.00065
Train [108][2170/3239]	Time 0.237 (0.636)	Data Time 0.001 (0.020)	Loss 2.1132 (2.1719)	Entropy 0.69292 (0.69547)	Top-1 acc 75.781 (72.344)	Top-5 acc 90.625 (88.988)	lr 0.00065
Train [108][2180/3239]	Time 0.203 (0.636)	Data Time 0.001 (0.020)	Loss 2.4488 (2.1723)	Entropy 0.69292 (0.69546)	Top-1 acc 61.719 (72.336)	Top-5 acc 84.766 (88.980)	lr 0.00065
Train [108][2190/3239]	Time 0.235 (0.635)	Data Time 0.002 (0.019)	Loss 2.1570 (2.1724)	Entropy 0.69294 (0.69545)	Top-1 acc 73.828 (72.341)	Top-5 acc 89.844 (88.979)	lr 0.00064
Train [108][2200/3239]	Time 0.243 (0.634)	Data Time 0.001 (0.019)	Loss 2.0697 (2.1723)	Entropy 0.69296 (0.69544)	Top-1 acc 75.000 (72.348)	Top-5 acc 90.625 (88.979)	lr 0.00064
Train [108][2210/3239]	Time 2.608 (0.634)	Data Time 0.001 (0.019)	Loss 2.1750 (2.1723)	Entropy 0.69296 (0.69543)	Top-1 acc 70.703 (72.349)	Top-5 acc 89.062 (88.980)	lr 0.00064
Train [108][2220/3239]	Time 0.328 (0.632)	Data Time 0.001 (0.019)	Loss 2.1136 (2.1723)	Entropy 0.69287 (0.69541)	Top-1 acc 74.219 (72.352)	Top-5 acc 91.016 (88.981)	lr 0.00064
Train [108][2230/3239]	Time 0.239 (0.631)	Data Time 0.001 (0.019)	Loss 1.9813 (2.1723)	Entropy 0.69291 (0.69540)	Top-1 acc 76.953 (72.349)	Top-5 acc 91.016 (88.981)	lr 0.00064
Train [108][2240/3239]	Time 0.244 (0.630)	Data Time 0.001 (0.019)	Loss 2.2194 (2.1729)	Entropy 0.69287 (0.69539)	Top-1 acc 71.484 (72.337)	Top-5 acc 87.891 (88.970)	lr 0.00064
Train [108][2250/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.019)	Loss 2.2493 (2.1729)	Entropy 0.69285 (0.69538)	Top-1 acc 71.875 (72.341)	Top-5 acc 87.500 (88.971)	lr 0.00064
Train [108][2260/3239]	Time 0.331 (0.629)	Data Time 0.001 (0.019)	Loss 2.2073 (2.1732)	Entropy 0.69284 (0.69537)	Top-1 acc 73.047 (72.332)	Top-5 acc 87.500 (88.964)	lr 0.00064
Train [108][2270/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.019)	Loss 1.9886 (2.1729)	Entropy 0.69286 (0.69536)	Top-1 acc 76.562 (72.339)	Top-5 acc 93.750 (88.968)	lr 0.00064
Train [108][2280/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.019)	Loss 2.2268 (2.1731)	Entropy 0.69283 (0.69535)	Top-1 acc 70.312 (72.334)	Top-5 acc 87.109 (88.966)	lr 0.00064
Train [108][2290/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.019)	Loss 1.9691 (2.1735)	Entropy 0.69271 (0.69534)	Top-1 acc 75.000 (72.324)	Top-5 acc 92.188 (88.961)	lr 0.00064
Train [108][2300/3239]	Time 0.334 (0.627)	Data Time 0.001 (0.019)	Loss 2.0937 (2.1735)	Entropy 0.69264 (0.69532)	Top-1 acc 76.172 (72.325)	Top-5 acc 90.625 (88.964)	lr 0.00064
Train [108][2310/3239]	Time 0.228 (0.626)	Data Time 0.001 (0.019)	Loss 2.0539 (2.1734)	Entropy 0.69258 (0.69531)	Top-1 acc 79.297 (72.330)	Top-5 acc 90.625 (88.965)	lr 0.00064
Train [108][2320/3239]	Time 53.516 (0.647)	Data Time 0.001 (0.018)	Loss 2.1885 (2.1735)	Entropy 0.69258 (0.69530)	Top-1 acc 72.266 (72.328)	Top-5 acc 88.281 (88.964)	lr 0.00064
Train [108][2330/3239]	Time 0.372 (0.646)	Data Time 0.002 (0.018)	Loss 2.1974 (2.1734)	Entropy 0.69258 (0.69529)	Top-1 acc 70.312 (72.331)	Top-5 acc 89.062 (88.964)	lr 0.00064
Train [108][2340/3239]	Time 0.243 (0.646)	Data Time 0.002 (0.018)	Loss 2.2168 (2.1733)	Entropy 0.69247 (0.69528)	Top-1 acc 72.656 (72.331)	Top-5 acc 88.672 (88.966)	lr 0.00064
Train [108][2350/3239]	Time 0.247 (0.645)	Data Time 0.002 (0.018)	Loss 2.1510 (2.1732)	Entropy 0.69249 (0.69527)	Top-1 acc 70.703 (72.334)	Top-5 acc 90.625 (88.966)	lr 0.00064
Train [108][2360/3239]	Time 0.233 (0.644)	Data Time 0.001 (0.018)	Loss 2.1899 (2.1732)	Entropy 0.69244 (0.69525)	Top-1 acc 75.781 (72.336)	Top-5 acc 88.672 (88.968)	lr 0.00064
Train [108][2370/3239]	Time 0.240 (0.644)	Data Time 0.001 (0.018)	Loss 2.1539 (2.1730)	Entropy 0.69247 (0.69524)	Top-1 acc 73.047 (72.342)	Top-5 acc 90.625 (88.968)	lr 0.00064
Train [108][2380/3239]	Time 0.233 (0.643)	Data Time 0.001 (0.018)	Loss 2.0777 (2.1731)	Entropy 0.69241 (0.69523)	Top-1 acc 76.562 (72.339)	Top-5 acc 89.844 (88.968)	lr 0.00064
Train [108][2390/3239]	Time 0.224 (0.642)	Data Time 0.001 (0.018)	Loss 2.0661 (2.1730)	Entropy 0.69239 (0.69522)	Top-1 acc 74.609 (72.341)	Top-5 acc 91.016 (88.969)	lr 0.00064
Train [108][2400/3239]	Time 0.228 (0.641)	Data Time 0.001 (0.018)	Loss 2.1800 (2.1730)	Entropy 0.69231 (0.69521)	Top-1 acc 74.609 (72.344)	Top-5 acc 88.672 (88.968)	lr 0.00064
Train [108][2410/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.018)	Loss 2.1472 (2.1730)	Entropy 0.69234 (0.69519)	Top-1 acc 71.484 (72.344)	Top-5 acc 89.844 (88.969)	lr 0.00064
Train [108][2420/3239]	Time 0.233 (0.640)	Data Time 0.001 (0.018)	Loss 2.0790 (2.1732)	Entropy 0.69237 (0.69518)	Top-1 acc 73.438 (72.334)	Top-5 acc 90.234 (88.964)	lr 0.00064
Train [108][2430/3239]	Time 2.496 (0.640)	Data Time 0.002 (0.018)	Loss 2.0574 (2.1730)	Entropy 0.69237 (0.69517)	Top-1 acc 73.047 (72.341)	Top-5 acc 91.406 (88.968)	lr 0.00064
Train [108][2440/3239]	Time 0.231 (0.638)	Data Time 0.001 (0.018)	Loss 2.1486 (2.1728)	Entropy 0.69230 (0.69516)	Top-1 acc 75.000 (72.344)	Top-5 acc 89.453 (88.971)	lr 0.00064
Train [108][2450/3239]	Time 0.242 (0.637)	Data Time 0.001 (0.018)	Loss 2.1242 (2.1728)	Entropy 0.69224 (0.69515)	Top-1 acc 74.219 (72.345)	Top-5 acc 89.062 (88.973)	lr 0.00064
Train [108][2460/3239]	Time 0.266 (0.637)	Data Time 0.002 (0.018)	Loss 2.3025 (2.1726)	Entropy 0.69227 (0.69514)	Top-1 acc 70.312 (72.348)	Top-5 acc 87.109 (88.976)	lr 0.00064
Train [108][2470/3239]	Time 0.236 (0.636)	Data Time 0.001 (0.017)	Loss 2.0749 (2.1729)	Entropy 0.69224 (0.69512)	Top-1 acc 76.172 (72.338)	Top-5 acc 88.672 (88.973)	lr 0.00064
Train [108][2480/3239]	Time 0.240 (0.636)	Data Time 0.001 (0.017)	Loss 2.1733 (2.1728)	Entropy 0.69236 (0.69511)	Top-1 acc 72.266 (72.340)	Top-5 acc 91.016 (88.971)	lr 0.00064
Train [108][2490/3239]	Time 0.246 (0.635)	Data Time 0.001 (0.017)	Loss 2.2595 (2.1728)	Entropy 0.69238 (0.69510)	Top-1 acc 70.703 (72.342)	Top-5 acc 87.500 (88.969)	lr 0.00064
Train [108][2500/3239]	Time 0.241 (0.635)	Data Time 0.001 (0.017)	Loss 2.2604 (2.1727)	Entropy 0.69236 (0.69509)	Top-1 acc 67.969 (72.343)	Top-5 acc 87.109 (88.968)	lr 0.00064
Train [108][2510/3239]	Time 0.243 (0.634)	Data Time 0.001 (0.017)	Loss 2.3050 (2.1728)	Entropy 0.69232 (0.69508)	Top-1 acc 69.141 (72.344)	Top-5 acc 86.328 (88.967)	lr 0.00063
Train [108][2520/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.017)	Loss 2.1704 (2.1728)	Entropy 0.69230 (0.69507)	Top-1 acc 72.266 (72.343)	Top-5 acc 89.062 (88.969)	lr 0.00063
Train [108][2530/3239]	Time 0.242 (0.633)	Data Time 0.001 (0.017)	Loss 2.0810 (2.1728)	Entropy 0.69225 (0.69506)	Top-1 acc 78.125 (72.347)	Top-5 acc 90.625 (88.971)	lr 0.00063
Train [108][2540/3239]	Time 2.620 (0.632)	Data Time 0.001 (0.017)	Loss 2.1877 (2.1727)	Entropy 0.69225 (0.69505)	Top-1 acc 71.484 (72.349)	Top-5 acc 90.625 (88.972)	lr 0.00063
Train [108][2550/3239]	Time 0.380 (0.631)	Data Time 0.002 (0.017)	Loss 2.2121 (2.1729)	Entropy 0.69222 (0.69504)	Top-1 acc 71.094 (72.346)	Top-5 acc 86.719 (88.968)	lr 0.00063
Train [108][2560/3239]	Time 0.264 (0.630)	Data Time 0.001 (0.017)	Loss 2.1359 (2.1729)	Entropy 0.69221 (0.69502)	Top-1 acc 73.438 (72.348)	Top-5 acc 89.062 (88.966)	lr 0.00063
Train [108][2570/3239]	Time 0.235 (0.630)	Data Time 0.001 (0.017)	Loss 2.1563 (2.1728)	Entropy 0.69221 (0.69501)	Top-1 acc 71.484 (72.348)	Top-5 acc 88.672 (88.969)	lr 0.00063
Train [108][2580/3239]	Time 0.221 (0.629)	Data Time 0.001 (0.017)	Loss 2.3224 (2.1730)	Entropy 0.69210 (0.69500)	Top-1 acc 71.094 (72.342)	Top-5 acc 87.109 (88.967)	lr 0.00063
Train [108][2590/3239]	Time 0.371 (0.629)	Data Time 0.002 (0.017)	Loss 2.1015 (2.1730)	Entropy 0.69211 (0.69499)	Top-1 acc 73.438 (72.338)	Top-5 acc 90.625 (88.968)	lr 0.00063
Train [108][2600/3239]	Time 0.282 (0.628)	Data Time 0.001 (0.017)	Loss 2.0060 (2.1730)	Entropy 0.69217 (0.69498)	Top-1 acc 77.734 (72.340)	Top-5 acc 91.406 (88.969)	lr 0.00063
Train [108][2610/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.017)	Loss 2.1042 (2.1730)	Entropy 0.69209 (0.69497)	Top-1 acc 76.172 (72.340)	Top-5 acc 90.625 (88.969)	lr 0.00063
Train [108][2620/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.017)	Loss 2.0696 (2.1729)	Entropy 0.69213 (0.69496)	Top-1 acc 76.953 (72.345)	Top-5 acc 89.844 (88.970)	lr 0.00063
Train [108][2630/3239]	Time 0.315 (0.627)	Data Time 0.001 (0.016)	Loss 2.1718 (2.1728)	Entropy 0.69203 (0.69495)	Top-1 acc 72.656 (72.347)	Top-5 acc 88.672 (88.969)	lr 0.00063
Train [108][2640/3239]	Time 0.234 (0.626)	Data Time 0.001 (0.016)	Loss 1.9666 (2.1726)	Entropy 0.69196 (0.69494)	Top-1 acc 76.562 (72.354)	Top-5 acc 92.578 (88.973)	lr 0.00063
Train [108][2650/3239]	Time 0.252 (0.626)	Data Time 0.001 (0.016)	Loss 2.2573 (2.1726)	Entropy 0.69188 (0.69493)	Top-1 acc 71.094 (72.356)	Top-5 acc 89.062 (88.975)	lr 0.00063
Train [108][2660/3239]	Time 0.260 (0.625)	Data Time 0.001 (0.016)	Loss 2.0750 (2.1725)	Entropy 0.69194 (0.69491)	Top-1 acc 76.172 (72.362)	Top-5 acc 91.797 (88.975)	lr 0.00063
Train [108][2670/3239]	Time 0.287 (0.625)	Data Time 0.001 (0.016)	Loss 2.1687 (2.1726)	Entropy 0.69186 (0.69490)	Top-1 acc 72.266 (72.359)	Top-5 acc 90.625 (88.973)	lr 0.00063
Train [108][2680/3239]	Time 0.271 (0.643)	Data Time 0.004 (0.016)	Loss 2.1651 (2.1726)	Entropy 0.69194 (0.69489)	Top-1 acc 73.047 (72.358)	Top-5 acc 88.672 (88.976)	lr 0.00063
Train [108][2690/3239]	Time 0.226 (0.643)	Data Time 0.002 (0.016)	Loss 2.0803 (2.1727)	Entropy 0.69199 (0.69488)	Top-1 acc 75.000 (72.358)	Top-5 acc 90.625 (88.975)	lr 0.00063
Train [108][2700/3239]	Time 0.212 (0.642)	Data Time 0.001 (0.016)	Loss 2.1623 (2.1727)	Entropy 0.69200 (0.69487)	Top-1 acc 69.531 (72.355)	Top-5 acc 88.672 (88.975)	lr 0.00063
Train [108][2710/3239]	Time 0.258 (0.641)	Data Time 0.002 (0.016)	Loss 2.3947 (2.1729)	Entropy 0.69195 (0.69486)	Top-1 acc 65.234 (72.352)	Top-5 acc 87.500 (88.971)	lr 0.00063
Train [108][2720/3239]	Time 0.236 (0.641)	Data Time 0.001 (0.016)	Loss 2.1229 (2.1727)	Entropy 0.69187 (0.69485)	Top-1 acc 75.000 (72.357)	Top-5 acc 89.062 (88.974)	lr 0.00063
Train [108][2730/3239]	Time 0.258 (0.640)	Data Time 0.001 (0.016)	Loss 2.0444 (2.1726)	Entropy 0.69173 (0.69484)	Top-1 acc 75.000 (72.361)	Top-5 acc 90.625 (88.978)	lr 0.00063
Train [108][2740/3239]	Time 0.247 (0.640)	Data Time 0.001 (0.016)	Loss 2.2032 (2.1727)	Entropy 0.69164 (0.69483)	Top-1 acc 73.047 (72.361)	Top-5 acc 86.719 (88.976)	lr 0.00063
Train [108][2750/3239]	Time 0.224 (0.639)	Data Time 0.002 (0.016)	Loss 2.1865 (2.1728)	Entropy 0.69154 (0.69481)	Top-1 acc 71.094 (72.356)	Top-5 acc 91.406 (88.977)	lr 0.00063
Train [108][2760/3239]	Time 0.223 (0.639)	Data Time 0.001 (0.016)	Loss 2.1203 (2.1729)	Entropy 0.69152 (0.69480)	Top-1 acc 74.609 (72.352)	Top-5 acc 87.891 (88.976)	lr 0.00063
Train [108][2770/3239]	Time 0.293 (0.638)	Data Time 0.001 (0.016)	Loss 2.1405 (2.1728)	Entropy 0.69152 (0.69479)	Top-1 acc 71.875 (72.356)	Top-5 acc 88.281 (88.978)	lr 0.00063
Train [108][2780/3239]	Time 0.268 (0.637)	Data Time 0.001 (0.016)	Loss 2.2188 (2.1728)	Entropy 0.69151 (0.69478)	Top-1 acc 68.359 (72.358)	Top-5 acc 89.844 (88.978)	lr 0.00063
Train [108][2790/3239]	Time 0.259 (0.637)	Data Time 0.001 (0.016)	Loss 2.3292 (2.1729)	Entropy 0.69146 (0.69477)	Top-1 acc 70.312 (72.353)	Top-5 acc 86.328 (88.974)	lr 0.00063
Train [108][2800/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.016)	Loss 2.0986 (2.1731)	Entropy 0.69144 (0.69476)	Top-1 acc 73.047 (72.350)	Top-5 acc 91.797 (88.973)	lr 0.00063
Train [108][2810/3239]	Time 0.241 (0.636)	Data Time 0.001 (0.016)	Loss 2.0790 (2.1731)	Entropy 0.69138 (0.69474)	Top-1 acc 74.219 (72.348)	Top-5 acc 90.234 (88.974)	lr 0.00063
Train [108][2820/3239]	Time 0.263 (0.635)	Data Time 0.001 (0.015)	Loss 2.1314 (2.1733)	Entropy 0.69136 (0.69473)	Top-1 acc 73.828 (72.342)	Top-5 acc 91.406 (88.970)	lr 0.00062
Train [108][2830/3239]	Time 0.232 (0.635)	Data Time 0.001 (0.015)	Loss 2.0397 (2.1733)	Entropy 0.69132 (0.69472)	Top-1 acc 76.172 (72.342)	Top-5 acc 92.578 (88.971)	lr 0.00062
Train [108][2840/3239]	Time 0.236 (0.634)	Data Time 0.001 (0.015)	Loss 2.0634 (2.1732)	Entropy 0.69119 (0.69471)	Top-1 acc 76.172 (72.342)	Top-5 acc 91.016 (88.972)	lr 0.00062
Train [108][2850/3239]	Time 0.277 (0.634)	Data Time 0.001 (0.015)	Loss 2.2415 (2.1731)	Entropy 0.69120 (0.69470)	Top-1 acc 73.047 (72.345)	Top-5 acc 88.672 (88.972)	lr 0.00062
Train [108][2860/3239]	Time 0.241 (0.633)	Data Time 0.001 (0.015)	Loss 2.1602 (2.1730)	Entropy 0.69114 (0.69468)	Top-1 acc 73.047 (72.350)	Top-5 acc 88.672 (88.975)	lr 0.00062
Train [108][2870/3239]	Time 0.226 (0.633)	Data Time 0.001 (0.015)	Loss 2.1902 (2.1727)	Entropy 0.69123 (0.69467)	Top-1 acc 71.484 (72.356)	Top-5 acc 87.109 (88.981)	lr 0.00062
Train [108][2880/3239]	Time 0.324 (0.632)	Data Time 0.001 (0.015)	Loss 2.1123 (2.1727)	Entropy 0.69119 (0.69466)	Top-1 acc 74.219 (72.355)	Top-5 acc 89.844 (88.982)	lr 0.00062
Train [108][2890/3239]	Time 0.241 (0.632)	Data Time 0.001 (0.015)	Loss 2.3391 (2.1729)	Entropy 0.69120 (0.69465)	Top-1 acc 68.359 (72.350)	Top-5 acc 85.938 (88.979)	lr 0.00062
Train [108][2900/3239]	Time 0.241 (0.631)	Data Time 0.001 (0.015)	Loss 2.0614 (2.1729)	Entropy 0.69113 (0.69463)	Top-1 acc 76.562 (72.353)	Top-5 acc 91.797 (88.980)	lr 0.00062
Train [108][2910/3239]	Time 0.219 (0.631)	Data Time 0.001 (0.015)	Loss 2.2008 (2.1728)	Entropy 0.69115 (0.69462)	Top-1 acc 71.484 (72.354)	Top-5 acc 87.109 (88.983)	lr 0.00062
Train [108][2920/3239]	Time 0.352 (0.630)	Data Time 0.001 (0.015)	Loss 2.1583 (2.1729)	Entropy 0.69113 (0.69461)	Top-1 acc 72.266 (72.351)	Top-5 acc 89.062 (88.982)	lr 0.00062
Train [108][2930/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.015)	Loss 2.0938 (2.1729)	Entropy 0.69114 (0.69460)	Top-1 acc 72.266 (72.349)	Top-5 acc 88.281 (88.981)	lr 0.00062
Train [108][2940/3239]	Time 0.258 (0.629)	Data Time 0.002 (0.015)	Loss 2.2500 (2.1730)	Entropy 0.69109 (0.69459)	Top-1 acc 69.141 (72.348)	Top-5 acc 87.891 (88.981)	lr 0.00062
Train [108][2950/3239]	Time 0.234 (0.629)	Data Time 0.002 (0.015)	Loss 2.1803 (2.1732)	Entropy 0.69089 (0.69458)	Top-1 acc 71.875 (72.345)	Top-5 acc 89.062 (88.978)	lr 0.00062
Train [108][2960/3239]	Time 0.361 (0.628)	Data Time 0.001 (0.015)	Loss 2.2576 (2.1733)	Entropy 0.69083 (0.69456)	Top-1 acc 70.703 (72.341)	Top-5 acc 87.109 (88.979)	lr 0.00062
Train [108][2970/3239]	Time 0.262 (0.628)	Data Time 0.001 (0.015)	Loss 2.2322 (2.1734)	Entropy 0.69085 (0.69455)	Top-1 acc 68.750 (72.338)	Top-5 acc 88.672 (88.978)	lr 0.00062
Train [108][2980/3239]	Time 0.220 (0.627)	Data Time 0.001 (0.015)	Loss 2.1859 (2.1735)	Entropy 0.69086 (0.69454)	Top-1 acc 74.219 (72.334)	Top-5 acc 87.500 (88.975)	lr 0.00062
Train [108][2990/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.015)	Loss 2.2301 (2.1735)	Entropy 0.69077 (0.69453)	Top-1 acc 72.266 (72.334)	Top-5 acc 89.062 (88.974)	lr 0.00062
Train [108][3000/3239]	Time 0.335 (0.626)	Data Time 0.001 (0.015)	Loss 1.9776 (2.1735)	Entropy 0.69071 (0.69451)	Top-1 acc 76.562 (72.333)	Top-5 acc 91.016 (88.974)	lr 0.00062
Train [108][3010/3239]	Time 0.360 (0.642)	Data Time 0.091 (0.015)	Loss 2.1627 (2.1735)	Entropy 0.69065 (0.69450)	Top-1 acc 72.656 (72.330)	Top-5 acc 90.234 (88.973)	lr 0.00062
Train [108][3020/3239]	Time 0.241 (0.642)	Data Time 0.002 (0.015)	Loss 1.9944 (2.1734)	Entropy 0.69056 (0.69449)	Top-1 acc 74.609 (72.332)	Top-5 acc 91.797 (88.977)	lr 0.00062
Train [108][3030/3239]	Time 0.220 (0.642)	Data Time 0.001 (0.015)	Loss 2.2006 (2.1734)	Entropy 0.69046 (0.69447)	Top-1 acc 71.484 (72.332)	Top-5 acc 88.672 (88.979)	lr 0.00062
Train [108][3040/3239]	Time 0.245 (0.641)	Data Time 0.002 (0.015)	Loss 2.2336 (2.1734)	Entropy 0.69050 (0.69446)	Top-1 acc 68.359 (72.330)	Top-5 acc 87.109 (88.980)	lr 0.00062
Train [108][3050/3239]	Time 0.263 (0.641)	Data Time 0.001 (0.014)	Loss 2.1968 (2.1734)	Entropy 0.69050 (0.69445)	Top-1 acc 70.703 (72.331)	Top-5 acc 90.234 (88.980)	lr 0.00062
Train [108][3060/3239]	Time 0.274 (0.640)	Data Time 0.003 (0.014)	Loss 2.2061 (2.1733)	Entropy 0.69050 (0.69444)	Top-1 acc 69.141 (72.332)	Top-5 acc 88.672 (88.980)	lr 0.00062
Train [108][3070/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.014)	Loss 2.1861 (2.1734)	Entropy 0.69049 (0.69442)	Top-1 acc 71.484 (72.331)	Top-5 acc 89.844 (88.980)	lr 0.00062
Train [108][3080/3239]	Time 0.225 (0.639)	Data Time 0.001 (0.014)	Loss 2.1445 (2.1734)	Entropy 0.69041 (0.69441)	Top-1 acc 71.094 (72.332)	Top-5 acc 89.844 (88.981)	lr 0.00062
Train [108][3090/3239]	Time 0.220 (0.639)	Data Time 0.001 (0.014)	Loss 2.2120 (2.1732)	Entropy 0.69040 (0.69440)	Top-1 acc 72.266 (72.336)	Top-5 acc 88.672 (88.983)	lr 0.00062
Train [108][3100/3239]	Time 0.273 (0.638)	Data Time 0.001 (0.014)	Loss 2.2014 (2.1733)	Entropy 0.69041 (0.69438)	Top-1 acc 71.875 (72.335)	Top-5 acc 89.062 (88.984)	lr 0.00062
Train [108][3110/3239]	Time 0.237 (0.637)	Data Time 0.001 (0.014)	Loss 2.1913 (2.1734)	Entropy 0.69039 (0.69437)	Top-1 acc 71.484 (72.331)	Top-5 acc 90.625 (88.981)	lr 0.00062
Train [108][3120/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.014)	Loss 2.1796 (2.1734)	Entropy 0.69036 (0.69436)	Top-1 acc 68.750 (72.331)	Top-5 acc 88.672 (88.982)	lr 0.00062
Train [108][3130/3239]	Time 0.238 (0.637)	Data Time 0.001 (0.014)	Loss 2.0872 (2.1733)	Entropy 0.69037 (0.69435)	Top-1 acc 71.484 (72.329)	Top-5 acc 92.578 (88.983)	lr 0.00062
Train [108][3140/3239]	Time 0.258 (0.636)	Data Time 0.001 (0.014)	Loss 2.2733 (2.1734)	Entropy 0.69029 (0.69433)	Top-1 acc 67.578 (72.326)	Top-5 acc 86.719 (88.982)	lr 0.00061
Train [108][3150/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.014)	Loss 2.1560 (2.1733)	Entropy 0.69028 (0.69432)	Top-1 acc 73.047 (72.329)	Top-5 acc 89.453 (88.981)	lr 0.00061
Train [108][3160/3239]	Time 0.225 (0.635)	Data Time 0.001 (0.014)	Loss 1.9388 (2.1733)	Entropy 0.69028 (0.69431)	Top-1 acc 80.859 (72.332)	Top-5 acc 93.359 (88.980)	lr 0.00061
Train [108][3170/3239]	Time 0.320 (0.635)	Data Time 0.001 (0.014)	Loss 2.1520 (2.1733)	Entropy 0.69028 (0.69429)	Top-1 acc 71.484 (72.330)	Top-5 acc 89.844 (88.981)	lr 0.00061
Train [108][3180/3239]	Time 0.227 (0.634)	Data Time 0.000 (0.014)	Loss 2.1097 (2.1734)	Entropy 0.69020 (0.69428)	Top-1 acc 73.047 (72.325)	Top-5 acc 92.578 (88.981)	lr 0.00061
Train [108][3190/3239]	Time 0.243 (0.634)	Data Time 0.000 (0.014)	Loss 2.1056 (2.1734)	Entropy 0.69023 (0.69427)	Top-1 acc 75.391 (72.326)	Top-5 acc 89.844 (88.982)	lr 0.00061
Train [108][3200/3239]	Time 0.232 (0.633)	Data Time 0.000 (0.014)	Loss 2.0059 (2.1735)	Entropy 0.69022 (0.69426)	Top-1 acc 76.953 (72.324)	Top-5 acc 92.578 (88.981)	lr 0.00061
Train [108][3210/3239]	Time 0.320 (0.633)	Data Time 0.000 (0.014)	Loss 2.0620 (2.1736)	Entropy 0.69024 (0.69424)	Top-1 acc 74.609 (72.321)	Top-5 acc 91.797 (88.982)	lr 0.00061
Train [108][3220/3239]	Time 0.234 (0.632)	Data Time 0.000 (0.014)	Loss 2.1873 (2.1734)	Entropy 0.69025 (0.69423)	Top-1 acc 71.484 (72.324)	Top-5 acc 89.453 (88.984)	lr 0.00061
Train [108][3230/3239]	Time 0.224 (0.632)	Data Time 0.000 (0.014)	Loss 2.2422 (2.1733)	Entropy 0.69029 (0.69422)	Top-1 acc 70.703 (72.325)	Top-5 acc 88.672 (88.986)	lr 0.00061
Train [108][3239/3239]	Time 2.432 (0.631)	Data Time 0.000 (0.014)	Loss 2.3697 (2.1732)	Entropy 0.69029 (0.69421)	Top-1 acc 71.605 (72.327)	Top-5 acc 87.654 (88.989)	lr 0.00061
==========Valid [108/120]	loss 1.204	top-1 acc 72.525 (72.525)	top-5 acc 89.899	Train top-1 72.327	top-5 88.989	Entropy 0.69029	Latency-None: 0.000ms	Flops: 546.53M
Train [109][0/3239]	Time 40.281 (40.281)	Data Time 38.716 (38.716)	Loss 2.3531 (2.3531)	Entropy 0.69029 (0.69029)	Top-1 acc 67.188 (67.188)	Top-5 acc 85.547 (85.547)	lr 0.00061
Train [109][10/3239]	Time 2.768 (4.292)	Data Time 0.002 (3.572)	Loss 2.0579 (2.2021)	Entropy 0.69029 (0.69029)	Top-1 acc 75.781 (71.378)	Top-5 acc 89.453 (88.423)	lr 0.00061
Train [109][20/3239]	Time 0.247 (2.373)	Data Time 0.001 (1.872)	Loss 2.0672 (2.1643)	Entropy 0.69024 (0.69027)	Top-1 acc 77.344 (72.526)	Top-5 acc 91.406 (89.156)	lr 0.00061
Train [109][30/3239]	Time 0.243 (1.763)	Data Time 0.001 (1.268)	Loss 2.3164 (2.1602)	Entropy 0.69023 (0.69025)	Top-1 acc 71.094 (72.770)	Top-5 acc 86.328 (89.189)	lr 0.00061
Train [109][40/3239]	Time 0.249 (1.451)	Data Time 0.001 (0.959)	Loss 2.0602 (2.1568)	Entropy 0.69025 (0.69025)	Top-1 acc 73.438 (73.066)	Top-5 acc 90.625 (89.167)	lr 0.00061
Train [109][50/3239]	Time 0.236 (1.261)	Data Time 0.001 (0.772)	Loss 2.3581 (2.1627)	Entropy 0.69025 (0.69025)	Top-1 acc 66.016 (72.886)	Top-5 acc 83.984 (89.047)	lr 0.00061
Train [109][60/3239]	Time 0.222 (1.135)	Data Time 0.001 (0.645)	Loss 2.1924 (2.1702)	Entropy 0.69022 (0.69025)	Top-1 acc 73.828 (72.701)	Top-5 acc 86.328 (88.826)	lr 0.00061
Train [109][70/3239]	Time 0.234 (1.043)	Data Time 0.001 (0.555)	Loss 2.0050 (2.1638)	Entropy 0.69014 (0.69024)	Top-1 acc 78.516 (72.893)	Top-5 acc 89.453 (88.914)	lr 0.00061
Train [109][80/3239]	Time 0.260 (0.974)	Data Time 0.001 (0.486)	Loss 2.1747 (2.1625)	Entropy 0.69017 (0.69023)	Top-1 acc 73.828 (72.854)	Top-5 acc 90.234 (88.985)	lr 0.00061
Train [109][90/3239]	Time 0.261 (0.920)	Data Time 0.029 (0.433)	Loss 2.1700 (2.1611)	Entropy 0.69014 (0.69022)	Top-1 acc 75.781 (72.918)	Top-5 acc 85.938 (89.032)	lr 0.00061
Train [109][100/3239]	Time 0.259 (0.877)	Data Time 0.001 (0.391)	Loss 1.9948 (2.1565)	Entropy 0.69002 (0.69021)	Top-1 acc 77.734 (72.935)	Top-5 acc 92.188 (89.194)	lr 0.00061
Train [109][110/3239]	Time 0.299 (1.333)	Data Time 0.004 (0.356)	Loss 2.0742 (2.1558)	Entropy 0.69009 (0.69019)	Top-1 acc 73.828 (72.952)	Top-5 acc 89.453 (89.245)	lr 0.00061
Train [109][120/3239]	Time 2.884 (1.273)	Data Time 0.124 (0.328)	Loss 2.0766 (2.1571)	Entropy 0.69009 (0.69018)	Top-1 acc 75.000 (72.908)	Top-5 acc 91.406 (89.198)	lr 0.00061
Train [109][130/3239]	Time 0.246 (1.194)	Data Time 0.002 (0.303)	Loss 2.2858 (2.1589)	Entropy 0.69006 (0.69018)	Top-1 acc 69.531 (72.814)	Top-5 acc 85.938 (89.167)	lr 0.00061
Train [109][140/3239]	Time 0.362 (1.145)	Data Time 0.002 (0.281)	Loss 2.1417 (2.1606)	Entropy 0.69007 (0.69017)	Top-1 acc 72.266 (72.731)	Top-5 acc 90.234 (89.165)	lr 0.00061
Train [109][150/3239]	Time 0.227 (1.101)	Data Time 0.001 (0.263)	Loss 2.0851 (2.1591)	Entropy 0.68996 (0.69016)	Top-1 acc 75.781 (72.765)	Top-5 acc 89.453 (89.205)	lr 0.00061
Train [109][160/3239]	Time 0.240 (1.063)	Data Time 0.001 (0.247)	Loss 2.0289 (2.1598)	Entropy 0.68995 (0.69014)	Top-1 acc 74.219 (72.775)	Top-5 acc 92.578 (89.174)	lr 0.00061
Train [109][170/3239]	Time 0.222 (1.029)	Data Time 0.001 (0.232)	Loss 2.1361 (2.1596)	Entropy 0.68995 (0.69013)	Top-1 acc 72.266 (72.761)	Top-5 acc 89.453 (89.174)	lr 0.00061
Train [109][180/3239]	Time 0.321 (1.000)	Data Time 0.001 (0.220)	Loss 2.2064 (2.1602)	Entropy 0.68989 (0.69012)	Top-1 acc 74.219 (72.775)	Top-5 acc 86.328 (89.149)	lr 0.00061
Train [109][190/3239]	Time 0.230 (0.974)	Data Time 0.001 (0.208)	Loss 2.2515 (2.1583)	Entropy 0.68986 (0.69011)	Top-1 acc 71.094 (72.859)	Top-5 acc 87.891 (89.173)	lr 0.00061
Train [109][200/3239]	Time 0.240 (0.951)	Data Time 0.002 (0.198)	Loss 2.2334 (2.1587)	Entropy 0.68985 (0.69010)	Top-1 acc 69.922 (72.862)	Top-5 acc 86.719 (89.158)	lr 0.00061
Train [109][210/3239]	Time 0.247 (0.929)	Data Time 0.001 (0.189)	Loss 2.0149 (2.1575)	Entropy 0.68985 (0.69008)	Top-1 acc 76.172 (72.884)	Top-5 acc 92.578 (89.146)	lr 0.00061
Train [109][220/3239]	Time 0.238 (0.910)	Data Time 0.001 (0.180)	Loss 2.1369 (2.1576)	Entropy 0.68987 (0.69007)	Top-1 acc 73.047 (72.868)	Top-5 acc 89.453 (89.133)	lr 0.00060
Train [109][230/3239]	Time 2.742 (0.893)	Data Time 0.002 (0.172)	Loss 2.2919 (2.1601)	Entropy 0.68987 (0.69006)	Top-1 acc 70.703 (72.781)	Top-5 acc 87.500 (89.088)	lr 0.00060
Train [109][240/3239]	Time 0.231 (0.867)	Data Time 0.002 (0.165)	Loss 2.1712 (2.1601)	Entropy 0.68987 (0.69006)	Top-1 acc 72.656 (72.763)	Top-5 acc 88.281 (89.106)	lr 0.00060
Train [109][250/3239]	Time 0.247 (0.852)	Data Time 0.001 (0.159)	Loss 2.3129 (2.1598)	Entropy 0.68985 (0.69005)	Top-1 acc 66.016 (72.720)	Top-5 acc 85.938 (89.109)	lr 0.00060
Train [109][260/3239]	Time 0.261 (0.837)	Data Time 0.001 (0.153)	Loss 2.2399 (2.1628)	Entropy 0.68990 (0.69004)	Top-1 acc 67.969 (72.628)	Top-5 acc 88.281 (89.058)	lr 0.00060
Train [109][270/3239]	Time 0.233 (0.825)	Data Time 0.001 (0.147)	Loss 2.0437 (2.1633)	Entropy 0.68987 (0.69004)	Top-1 acc 70.312 (72.587)	Top-5 acc 92.969 (89.081)	lr 0.00060
Train [109][280/3239]	Time 0.231 (0.813)	Data Time 0.001 (0.142)	Loss 2.2841 (2.1647)	Entropy 0.68976 (0.69003)	Top-1 acc 72.656 (72.555)	Top-5 acc 88.281 (89.058)	lr 0.00060
Train [109][290/3239]	Time 0.227 (0.801)	Data Time 0.001 (0.137)	Loss 2.2351 (2.1656)	Entropy 0.68980 (0.69002)	Top-1 acc 69.922 (72.525)	Top-5 acc 86.719 (89.024)	lr 0.00060
Train [109][300/3239]	Time 0.233 (0.791)	Data Time 0.001 (0.133)	Loss 2.1994 (2.1650)	Entropy 0.68972 (0.69001)	Top-1 acc 70.703 (72.534)	Top-5 acc 88.672 (89.017)	lr 0.00060
Train [109][310/3239]	Time 0.238 (0.781)	Data Time 0.001 (0.128)	Loss 2.2188 (2.1646)	Entropy 0.68968 (0.69000)	Top-1 acc 67.969 (72.529)	Top-5 acc 87.500 (89.051)	lr 0.00060
Train [109][320/3239]	Time 0.236 (0.772)	Data Time 0.001 (0.124)	Loss 2.1457 (2.1645)	Entropy 0.68956 (0.68999)	Top-1 acc 74.219 (72.553)	Top-5 acc 88.281 (89.053)	lr 0.00060
Train [109][330/3239]	Time 0.232 (0.763)	Data Time 0.001 (0.121)	Loss 2.0563 (2.1626)	Entropy 0.68952 (0.68998)	Top-1 acc 74.219 (72.590)	Top-5 acc 91.406 (89.077)	lr 0.00060
Train [109][340/3239]	Time 2.584 (0.754)	Data Time 0.002 (0.117)	Loss 2.1839 (2.1627)	Entropy 0.68952 (0.68996)	Top-1 acc 71.094 (72.610)	Top-5 acc 87.500 (89.061)	lr 0.00060
Train [109][350/3239]	Time 0.334 (0.740)	Data Time 0.001 (0.114)	Loss 2.0686 (2.1633)	Entropy 0.68951 (0.68995)	Top-1 acc 73.828 (72.588)	Top-5 acc 89.844 (89.040)	lr 0.00060
Train [109][360/3239]	Time 0.242 (0.734)	Data Time 0.001 (0.111)	Loss 2.2225 (2.1638)	Entropy 0.68947 (0.68994)	Top-1 acc 68.359 (72.578)	Top-5 acc 87.500 (89.026)	lr 0.00060
Train [109][370/3239]	Time 0.250 (0.727)	Data Time 0.001 (0.108)	Loss 2.1692 (2.1626)	Entropy 0.68939 (0.68992)	Top-1 acc 70.312 (72.623)	Top-5 acc 89.453 (89.052)	lr 0.00060
Train [109][380/3239]	Time 0.247 (0.721)	Data Time 0.001 (0.105)	Loss 2.0881 (2.1630)	Entropy 0.68934 (0.68991)	Top-1 acc 75.000 (72.603)	Top-5 acc 89.844 (89.053)	lr 0.00060
Train [109][390/3239]	Time 0.331 (0.715)	Data Time 0.001 (0.102)	Loss 2.0748 (2.1617)	Entropy 0.68934 (0.68989)	Top-1 acc 72.266 (72.643)	Top-5 acc 91.406 (89.079)	lr 0.00060
Train [109][400/3239]	Time 0.232 (0.709)	Data Time 0.001 (0.100)	Loss 2.0811 (2.1615)	Entropy 0.68926 (0.68988)	Top-1 acc 73.438 (72.668)	Top-5 acc 91.016 (89.077)	lr 0.00060
Train [109][410/3239]	Time 0.247 (0.704)	Data Time 0.001 (0.098)	Loss 2.1709 (2.1615)	Entropy 0.68920 (0.68986)	Top-1 acc 75.000 (72.672)	Top-5 acc 87.891 (89.082)	lr 0.00060
Train [109][420/3239]	Time 0.241 (0.698)	Data Time 0.001 (0.095)	Loss 2.2351 (2.1624)	Entropy 0.68917 (0.68985)	Top-1 acc 72.656 (72.677)	Top-5 acc 87.500 (89.073)	lr 0.00060
Train [109][430/3239]	Time 0.321 (0.693)	Data Time 0.001 (0.093)	Loss 2.2151 (2.1618)	Entropy 0.68917 (0.68983)	Top-1 acc 73.828 (72.696)	Top-5 acc 86.719 (89.091)	lr 0.00060
Train [109][440/3239]	Time 0.224 (0.688)	Data Time 0.001 (0.091)	Loss 2.1990 (2.1619)	Entropy 0.68917 (0.68982)	Top-1 acc 73.047 (72.697)	Top-5 acc 88.281 (89.096)	lr 0.00060
Train [109][450/3239]	Time 2.756 (0.684)	Data Time 0.001 (0.089)	Loss 2.2717 (2.1619)	Entropy 0.68917 (0.68980)	Top-1 acc 73.828 (72.706)	Top-5 acc 87.891 (89.104)	lr 0.00060
Train [109][460/3239]	Time 0.240 (0.674)	Data Time 0.001 (0.087)	Loss 2.0525 (2.1630)	Entropy 0.68916 (0.68979)	Top-1 acc 76.562 (72.673)	Top-5 acc 91.016 (89.087)	lr 0.00060
Train [109][470/3239]	Time 0.239 (0.670)	Data Time 0.001 (0.085)	Loss 2.2815 (2.1633)	Entropy 0.68912 (0.68977)	Top-1 acc 68.750 (72.670)	Top-5 acc 85.547 (89.085)	lr 0.00060
Train [109][480/3239]	Time 0.361 (0.787)	Data Time 0.003 (0.084)	Loss 2.2401 (2.1633)	Entropy 0.68913 (0.68976)	Top-1 acc 73.438 (72.672)	Top-5 acc 89.062 (89.089)	lr 0.00060
Train [109][490/3239]	Time 0.239 (0.780)	Data Time 0.002 (0.082)	Loss 2.1264 (2.1628)	Entropy 0.68915 (0.68975)	Top-1 acc 75.781 (72.683)	Top-5 acc 87.500 (89.091)	lr 0.00060
Train [109][500/3239]	Time 0.245 (0.775)	Data Time 0.002 (0.080)	Loss 2.3106 (2.1635)	Entropy 0.68917 (0.68974)	Top-1 acc 70.312 (72.658)	Top-5 acc 85.547 (89.084)	lr 0.00060
Train [109][510/3239]	Time 0.243 (0.769)	Data Time 0.001 (0.079)	Loss 2.1413 (2.1633)	Entropy 0.68915 (0.68973)	Top-1 acc 73.047 (72.669)	Top-5 acc 90.234 (89.082)	lr 0.00060
Train [109][520/3239]	Time 0.220 (0.763)	Data Time 0.001 (0.077)	Loss 2.1193 (2.1632)	Entropy 0.68917 (0.68971)	Top-1 acc 75.781 (72.681)	Top-5 acc 89.453 (89.080)	lr 0.00060
Train [109][530/3239]	Time 0.240 (0.758)	Data Time 0.002 (0.076)	Loss 2.1625 (2.1638)	Entropy 0.68912 (0.68970)	Top-1 acc 69.922 (72.662)	Top-5 acc 90.625 (89.071)	lr 0.00060
Train [109][540/3239]	Time 0.213 (0.753)	Data Time 0.001 (0.075)	Loss 2.1086 (2.1641)	Entropy 0.68910 (0.68969)	Top-1 acc 74.219 (72.669)	Top-5 acc 89.844 (89.064)	lr 0.00060
Train [109][550/3239]	Time 0.219 (0.748)	Data Time 0.001 (0.073)	Loss 2.2523 (2.1647)	Entropy 0.68900 (0.68968)	Top-1 acc 70.312 (72.656)	Top-5 acc 87.891 (89.052)	lr 0.00059
Train [109][560/3239]	Time 2.628 (0.743)	Data Time 0.001 (0.072)	Loss 2.0555 (2.1649)	Entropy 0.68900 (0.68967)	Top-1 acc 75.000 (72.631)	Top-5 acc 90.234 (89.052)	lr 0.00059
Train [109][570/3239]	Time 0.229 (0.735)	Data Time 0.001 (0.071)	Loss 2.0598 (2.1650)	Entropy 0.68897 (0.68966)	Top-1 acc 78.906 (72.644)	Top-5 acc 89.844 (89.037)	lr 0.00059
Train [109][580/3239]	Time 0.230 (0.730)	Data Time 0.001 (0.069)	Loss 2.1659 (2.1655)	Entropy 0.68903 (0.68965)	Top-1 acc 71.484 (72.627)	Top-5 acc 88.672 (89.039)	lr 0.00059
Train [109][590/3239]	Time 0.235 (0.726)	Data Time 0.001 (0.068)	Loss 2.1813 (2.1648)	Entropy 0.68892 (0.68963)	Top-1 acc 73.047 (72.648)	Top-5 acc 87.500 (89.054)	lr 0.00059
Train [109][600/3239]	Time 0.330 (0.722)	Data Time 0.001 (0.067)	Loss 1.9658 (2.1639)	Entropy 0.68895 (0.68962)	Top-1 acc 78.125 (72.668)	Top-5 acc 92.578 (89.066)	lr 0.00059
Train [109][610/3239]	Time 0.264 (0.718)	Data Time 0.002 (0.066)	Loss 2.1974 (2.1646)	Entropy 0.68894 (0.68961)	Top-1 acc 75.391 (72.654)	Top-5 acc 88.281 (89.050)	lr 0.00059
Train [109][620/3239]	Time 0.251 (0.715)	Data Time 0.001 (0.065)	Loss 2.1479 (2.1649)	Entropy 0.68890 (0.68960)	Top-1 acc 74.219 (72.647)	Top-5 acc 90.625 (89.044)	lr 0.00059
Train [109][630/3239]	Time 0.235 (0.711)	Data Time 0.001 (0.064)	Loss 2.1387 (2.1649)	Entropy 0.68890 (0.68959)	Top-1 acc 73.438 (72.647)	Top-5 acc 92.188 (89.050)	lr 0.00059
Train [109][640/3239]	Time 0.321 (0.708)	Data Time 0.001 (0.063)	Loss 2.1826 (2.1655)	Entropy 0.68892 (0.68958)	Top-1 acc 71.484 (72.630)	Top-5 acc 88.672 (89.038)	lr 0.00059
Train [109][650/3239]	Time 0.245 (0.704)	Data Time 0.001 (0.062)	Loss 2.0454 (2.1643)	Entropy 0.68890 (0.68957)	Top-1 acc 77.734 (72.668)	Top-5 acc 91.016 (89.067)	lr 0.00059
Train [109][660/3239]	Time 0.274 (0.701)	Data Time 0.001 (0.061)	Loss 2.2328 (2.1639)	Entropy 0.68893 (0.68956)	Top-1 acc 70.312 (72.670)	Top-5 acc 89.062 (89.085)	lr 0.00059
Train [109][670/3239]	Time 2.723 (0.698)	Data Time 0.001 (0.060)	Loss 2.2544 (2.1649)	Entropy 0.68893 (0.68955)	Top-1 acc 67.188 (72.643)	Top-5 acc 86.719 (89.070)	lr 0.00059
Train [109][680/3239]	Time 0.334 (0.692)	Data Time 0.001 (0.060)	Loss 1.9886 (2.1648)	Entropy 0.68896 (0.68954)	Top-1 acc 78.516 (72.650)	Top-5 acc 92.969 (89.078)	lr 0.00059
Train [109][690/3239]	Time 0.280 (0.688)	Data Time 0.001 (0.059)	Loss 2.0683 (2.1646)	Entropy 0.68899 (0.68953)	Top-1 acc 76.562 (72.651)	Top-5 acc 90.234 (89.071)	lr 0.00059
Train [109][700/3239]	Time 0.231 (0.686)	Data Time 0.001 (0.058)	Loss 2.2909 (2.1648)	Entropy 0.68895 (0.68952)	Top-1 acc 71.875 (72.649)	Top-5 acc 86.328 (89.061)	lr 0.00059
Train [109][710/3239]	Time 0.235 (0.683)	Data Time 0.001 (0.057)	Loss 2.1468 (2.1639)	Entropy 0.68890 (0.68952)	Top-1 acc 73.828 (72.659)	Top-5 acc 87.500 (89.074)	lr 0.00059
Train [109][720/3239]	Time 0.251 (0.680)	Data Time 0.001 (0.056)	Loss 2.0955 (2.1636)	Entropy 0.68882 (0.68951)	Top-1 acc 74.219 (72.665)	Top-5 acc 92.578 (89.085)	lr 0.00059
Train [109][730/3239]	Time 0.238 (0.677)	Data Time 0.001 (0.056)	Loss 2.2716 (2.1636)	Entropy 0.68875 (0.68950)	Top-1 acc 69.141 (72.674)	Top-5 acc 87.109 (89.081)	lr 0.00059
Train [109][740/3239]	Time 0.220 (0.675)	Data Time 0.001 (0.055)	Loss 2.1730 (2.1632)	Entropy 0.68868 (0.68949)	Top-1 acc 73.047 (72.695)	Top-5 acc 89.844 (89.085)	lr 0.00059
Train [109][750/3239]	Time 0.226 (0.672)	Data Time 0.001 (0.054)	Loss 2.0034 (2.1628)	Entropy 0.68868 (0.68948)	Top-1 acc 77.344 (72.714)	Top-5 acc 93.359 (89.093)	lr 0.00059
Train [109][760/3239]	Time 0.222 (0.669)	Data Time 0.001 (0.053)	Loss 2.3884 (2.1637)	Entropy 0.68860 (0.68947)	Top-1 acc 64.062 (72.678)	Top-5 acc 85.547 (89.078)	lr 0.00059
Train [109][770/3239]	Time 0.228 (0.667)	Data Time 0.001 (0.053)	Loss 2.1732 (2.1641)	Entropy 0.68864 (0.68945)	Top-1 acc 69.531 (72.663)	Top-5 acc 89.453 (89.074)	lr 0.00059
Train [109][780/3239]	Time 2.608 (0.665)	Data Time 0.001 (0.052)	Loss 2.1939 (2.1641)	Entropy 0.68864 (0.68944)	Top-1 acc 70.312 (72.663)	Top-5 acc 88.281 (89.077)	lr 0.00059
Train [109][790/3239]	Time 0.275 (0.660)	Data Time 0.001 (0.051)	Loss 2.1090 (2.1643)	Entropy 0.68864 (0.68943)	Top-1 acc 74.609 (72.656)	Top-5 acc 89.062 (89.078)	lr 0.00059
Train [109][800/3239]	Time 0.236 (0.657)	Data Time 0.001 (0.051)	Loss 2.2384 (2.1645)	Entropy 0.68856 (0.68942)	Top-1 acc 69.922 (72.651)	Top-5 acc 88.672 (89.079)	lr 0.00059
Train [109][810/3239]	Time 0.221 (0.655)	Data Time 0.001 (0.050)	Loss 2.1703 (2.1649)	Entropy 0.68858 (0.68941)	Top-1 acc 72.266 (72.635)	Top-5 acc 90.234 (89.072)	lr 0.00059
Train [109][820/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.050)	Loss 2.1501 (2.1646)	Entropy 0.68856 (0.68940)	Top-1 acc 71.484 (72.630)	Top-5 acc 90.234 (89.077)	lr 0.00059
Train [109][830/3239]	Time 0.241 (0.651)	Data Time 0.001 (0.049)	Loss 2.2236 (2.1641)	Entropy 0.68848 (0.68939)	Top-1 acc 71.875 (72.637)	Top-5 acc 87.891 (89.093)	lr 0.00059
Train [109][840/3239]	Time 0.296 (0.709)	Data Time 0.003 (0.048)	Loss 2.2224 (2.1639)	Entropy 0.68839 (0.68938)	Top-1 acc 70.703 (72.634)	Top-5 acc 87.891 (89.093)	lr 0.00059
Train [109][850/3239]	Time 0.245 (0.709)	Data Time 0.002 (0.048)	Loss 2.2790 (2.1646)	Entropy 0.68840 (0.68937)	Top-1 acc 69.531 (72.609)	Top-5 acc 87.109 (89.081)	lr 0.00059
Train [109][860/3239]	Time 0.231 (0.706)	Data Time 0.002 (0.047)	Loss 2.0896 (2.1646)	Entropy 0.68846 (0.68936)	Top-1 acc 75.000 (72.616)	Top-5 acc 90.625 (89.081)	lr 0.00059
Train [109][870/3239]	Time 0.223 (0.704)	Data Time 0.001 (0.047)	Loss 2.1497 (2.1648)	Entropy 0.68843 (0.68935)	Top-1 acc 70.703 (72.606)	Top-5 acc 89.844 (89.076)	lr 0.00058
Train [109][880/3239]	Time 0.269 (0.701)	Data Time 0.002 (0.046)	Loss 2.0617 (2.1646)	Entropy 0.68846 (0.68934)	Top-1 acc 72.656 (72.600)	Top-5 acc 90.625 (89.080)	lr 0.00058
Train [109][890/3239]	Time 2.724 (0.699)	Data Time 0.002 (0.046)	Loss 2.1108 (2.1644)	Entropy 0.68846 (0.68933)	Top-1 acc 74.219 (72.605)	Top-5 acc 91.016 (89.082)	lr 0.00058
Train [109][900/3239]	Time 0.234 (0.694)	Data Time 0.001 (0.045)	Loss 2.2540 (2.1646)	Entropy 0.68847 (0.68932)	Top-1 acc 69.141 (72.611)	Top-5 acc 87.891 (89.075)	lr 0.00058
Train [109][910/3239]	Time 0.238 (0.691)	Data Time 0.001 (0.045)	Loss 2.0475 (2.1643)	Entropy 0.68852 (0.68931)	Top-1 acc 76.953 (72.625)	Top-5 acc 89.844 (89.084)	lr 0.00058
Train [109][920/3239]	Time 0.230 (0.689)	Data Time 0.001 (0.044)	Loss 2.1051 (2.1641)	Entropy 0.68847 (0.68930)	Top-1 acc 74.609 (72.631)	Top-5 acc 88.672 (89.084)	lr 0.00058
Train [109][930/3239]	Time 0.249 (0.687)	Data Time 0.001 (0.044)	Loss 2.2938 (2.1646)	Entropy 0.68848 (0.68929)	Top-1 acc 66.797 (72.605)	Top-5 acc 86.328 (89.077)	lr 0.00058
Train [109][940/3239]	Time 0.240 (0.685)	Data Time 0.001 (0.044)	Loss 2.2041 (2.1654)	Entropy 0.68844 (0.68928)	Top-1 acc 75.000 (72.587)	Top-5 acc 87.109 (89.065)	lr 0.00058
Train [109][950/3239]	Time 0.257 (0.683)	Data Time 0.002 (0.043)	Loss 2.1331 (2.1657)	Entropy 0.68845 (0.68927)	Top-1 acc 73.438 (72.588)	Top-5 acc 89.062 (89.057)	lr 0.00058
Train [109][960/3239]	Time 0.224 (0.681)	Data Time 0.001 (0.043)	Loss 2.1745 (2.1662)	Entropy 0.68844 (0.68926)	Top-1 acc 71.875 (72.576)	Top-5 acc 86.719 (89.051)	lr 0.00058
Train [109][970/3239]	Time 0.232 (0.679)	Data Time 0.001 (0.042)	Loss 2.1210 (2.1665)	Entropy 0.68834 (0.68926)	Top-1 acc 75.781 (72.571)	Top-5 acc 91.016 (89.047)	lr 0.00058
Train [109][980/3239]	Time 0.253 (0.677)	Data Time 0.001 (0.042)	Loss 2.2608 (2.1670)	Entropy 0.68832 (0.68925)	Top-1 acc 68.750 (72.570)	Top-5 acc 87.500 (89.039)	lr 0.00058
Train [109][990/3239]	Time 0.242 (0.675)	Data Time 0.001 (0.041)	Loss 2.0698 (2.1674)	Entropy 0.68832 (0.68924)	Top-1 acc 75.391 (72.557)	Top-5 acc 91.016 (89.033)	lr 0.00058
Train [109][1000/3239]	Time 2.662 (0.673)	Data Time 0.001 (0.041)	Loss 2.3985 (2.1678)	Entropy 0.68832 (0.68923)	Top-1 acc 69.141 (72.549)	Top-5 acc 85.547 (89.023)	lr 0.00058
Train [109][1010/3239]	Time 0.249 (0.669)	Data Time 0.005 (0.041)	Loss 2.1665 (2.1680)	Entropy 0.68834 (0.68922)	Top-1 acc 71.094 (72.537)	Top-5 acc 89.062 (89.021)	lr 0.00058
Train [109][1020/3239]	Time 0.236 (0.667)	Data Time 0.001 (0.040)	Loss 2.2335 (2.1679)	Entropy 0.68836 (0.68921)	Top-1 acc 70.312 (72.541)	Top-5 acc 87.891 (89.026)	lr 0.00058
Train [109][1030/3239]	Time 0.226 (0.666)	Data Time 0.001 (0.040)	Loss 2.1508 (2.1680)	Entropy 0.68832 (0.68920)	Top-1 acc 73.828 (72.542)	Top-5 acc 89.844 (89.025)	lr 0.00058
Train [109][1040/3239]	Time 0.239 (0.664)	Data Time 0.001 (0.039)	Loss 2.2164 (2.1681)	Entropy 0.68833 (0.68919)	Top-1 acc 70.312 (72.532)	Top-5 acc 89.062 (89.026)	lr 0.00058
Train [109][1050/3239]	Time 0.232 (0.662)	Data Time 0.002 (0.039)	Loss 2.2555 (2.1682)	Entropy 0.68826 (0.68918)	Top-1 acc 70.312 (72.526)	Top-5 acc 87.109 (89.025)	lr 0.00058
Train [109][1060/3239]	Time 0.343 (0.660)	Data Time 0.001 (0.039)	Loss 2.2828 (2.1685)	Entropy 0.68827 (0.68918)	Top-1 acc 71.484 (72.521)	Top-5 acc 88.672 (89.026)	lr 0.00058
Train [109][1070/3239]	Time 0.237 (0.659)	Data Time 0.001 (0.038)	Loss 2.2388 (2.1687)	Entropy 0.68824 (0.68917)	Top-1 acc 69.141 (72.510)	Top-5 acc 88.281 (89.030)	lr 0.00058
Train [109][1080/3239]	Time 0.224 (0.657)	Data Time 0.001 (0.038)	Loss 2.3350 (2.1685)	Entropy 0.68813 (0.68916)	Top-1 acc 68.359 (72.512)	Top-5 acc 85.938 (89.032)	lr 0.00058
Train [109][1090/3239]	Time 0.234 (0.655)	Data Time 0.001 (0.038)	Loss 2.1203 (2.1682)	Entropy 0.68810 (0.68915)	Top-1 acc 72.266 (72.516)	Top-5 acc 89.453 (89.039)	lr 0.00058
Train [109][1100/3239]	Time 0.311 (0.654)	Data Time 0.001 (0.037)	Loss 2.1937 (2.1680)	Entropy 0.68802 (0.68914)	Top-1 acc 71.094 (72.522)	Top-5 acc 89.062 (89.043)	lr 0.00058
Train [109][1110/3239]	Time 2.589 (0.652)	Data Time 0.001 (0.037)	Loss 2.2265 (2.1682)	Entropy 0.68802 (0.68913)	Top-1 acc 69.531 (72.513)	Top-5 acc 89.062 (89.041)	lr 0.00058
Train [109][1120/3239]	Time 0.238 (0.648)	Data Time 0.001 (0.037)	Loss 2.1322 (2.1685)	Entropy 0.68805 (0.68912)	Top-1 acc 74.219 (72.506)	Top-5 acc 88.672 (89.038)	lr 0.00058
Train [109][1130/3239]	Time 0.235 (0.647)	Data Time 0.001 (0.036)	Loss 2.1621 (2.1681)	Entropy 0.68811 (0.68911)	Top-1 acc 71.484 (72.516)	Top-5 acc 90.234 (89.043)	lr 0.00058
Train [109][1140/3239]	Time 0.339 (0.646)	Data Time 0.002 (0.036)	Loss 2.1835 (2.1683)	Entropy 0.68800 (0.68910)	Top-1 acc 71.094 (72.510)	Top-5 acc 89.844 (89.042)	lr 0.00058
Train [109][1150/3239]	Time 0.234 (0.644)	Data Time 0.001 (0.036)	Loss 2.1371 (2.1684)	Entropy 0.68795 (0.68909)	Top-1 acc 74.219 (72.507)	Top-5 acc 89.453 (89.039)	lr 0.00058
Train [109][1160/3239]	Time 0.262 (0.643)	Data Time 0.002 (0.036)	Loss 2.1810 (2.1681)	Entropy 0.68795 (0.68908)	Top-1 acc 75.000 (72.520)	Top-5 acc 86.719 (89.040)	lr 0.00058
Train [109][1170/3239]	Time 0.233 (0.642)	Data Time 0.002 (0.035)	Loss 2.1125 (2.1682)	Entropy 0.68789 (0.68907)	Top-1 acc 72.266 (72.514)	Top-5 acc 90.625 (89.036)	lr 0.00058
Train [109][1180/3239]	Time 0.391 (0.641)	Data Time 0.001 (0.035)	Loss 2.1944 (2.1681)	Entropy 0.68787 (0.68906)	Top-1 acc 73.047 (72.517)	Top-5 acc 89.844 (89.036)	lr 0.00058
Train [109][1190/3239]	Time 0.225 (0.639)	Data Time 0.001 (0.035)	Loss 2.2525 (2.1680)	Entropy 0.68775 (0.68905)	Top-1 acc 71.484 (72.528)	Top-5 acc 86.719 (89.038)	lr 0.00058
Train [109][1200/3239]	Time 0.275 (0.682)	Data Time 0.003 (0.034)	Loss 2.0660 (2.1681)	Entropy 0.68771 (0.68904)	Top-1 acc 73.828 (72.524)	Top-5 acc 89.844 (89.034)	lr 0.00057
Train [109][1210/3239]	Time 0.234 (0.681)	Data Time 0.002 (0.034)	Loss 2.3241 (2.1683)	Entropy 0.68766 (0.68903)	Top-1 acc 67.969 (72.511)	Top-5 acc 87.500 (89.032)	lr 0.00057
Train [109][1220/3239]	Time 2.743 (0.679)	Data Time 0.002 (0.034)	Loss 2.0916 (2.1682)	Entropy 0.68766 (0.68902)	Top-1 acc 73.828 (72.508)	Top-5 acc 91.406 (89.034)	lr 0.00057
Train [109][1230/3239]	Time 0.255 (0.676)	Data Time 0.003 (0.034)	Loss 2.3243 (2.1682)	Entropy 0.68765 (0.68901)	Top-1 acc 67.969 (72.508)	Top-5 acc 84.766 (89.029)	lr 0.00057
Train [109][1240/3239]	Time 0.232 (0.674)	Data Time 0.001 (0.033)	Loss 2.1055 (2.1680)	Entropy 0.68764 (0.68900)	Top-1 acc 74.609 (72.511)	Top-5 acc 92.188 (89.034)	lr 0.00057
Train [109][1250/3239]	Time 0.241 (0.673)	Data Time 0.001 (0.033)	Loss 2.3148 (2.1681)	Entropy 0.68760 (0.68899)	Top-1 acc 68.750 (72.513)	Top-5 acc 86.719 (89.030)	lr 0.00057
Train [109][1260/3239]	Time 0.229 (0.671)	Data Time 0.001 (0.033)	Loss 2.1673 (2.1681)	Entropy 0.68744 (0.68897)	Top-1 acc 74.219 (72.513)	Top-5 acc 88.672 (89.030)	lr 0.00057
Train [109][1270/3239]	Time 0.247 (0.670)	Data Time 0.002 (0.033)	Loss 2.2085 (2.1683)	Entropy 0.68736 (0.68896)	Top-1 acc 72.266 (72.502)	Top-5 acc 87.500 (89.030)	lr 0.00057
Train [109][1280/3239]	Time 0.255 (0.668)	Data Time 0.002 (0.032)	Loss 2.1939 (2.1686)	Entropy 0.68739 (0.68895)	Top-1 acc 69.922 (72.487)	Top-5 acc 87.500 (89.027)	lr 0.00057
Train [109][1290/3239]	Time 0.239 (0.667)	Data Time 0.002 (0.032)	Loss 2.2172 (2.1689)	Entropy 0.68735 (0.68894)	Top-1 acc 72.266 (72.474)	Top-5 acc 87.500 (89.022)	lr 0.00057
Train [109][1300/3239]	Time 0.230 (0.666)	Data Time 0.001 (0.032)	Loss 2.2751 (2.1692)	Entropy 0.68727 (0.68892)	Top-1 acc 71.484 (72.460)	Top-5 acc 86.328 (89.018)	lr 0.00057
Train [109][1310/3239]	Time 0.224 (0.664)	Data Time 0.001 (0.032)	Loss 2.1134 (2.1689)	Entropy 0.68730 (0.68891)	Top-1 acc 71.875 (72.464)	Top-5 acc 87.891 (89.017)	lr 0.00057
Train [109][1320/3239]	Time 0.257 (0.663)	Data Time 0.001 (0.031)	Loss 2.2604 (2.1690)	Entropy 0.68733 (0.68890)	Top-1 acc 70.703 (72.464)	Top-5 acc 86.328 (89.017)	lr 0.00057
Train [109][1330/3239]	Time 2.613 (0.662)	Data Time 0.001 (0.031)	Loss 2.3068 (2.1697)	Entropy 0.68733 (0.68889)	Top-1 acc 68.750 (72.450)	Top-5 acc 87.500 (89.006)	lr 0.00057
Train [109][1340/3239]	Time 0.219 (0.658)	Data Time 0.001 (0.031)	Loss 2.0505 (2.1699)	Entropy 0.68728 (0.68888)	Top-1 acc 77.734 (72.446)	Top-5 acc 91.016 (89.001)	lr 0.00057
Train [109][1350/3239]	Time 0.254 (0.657)	Data Time 0.001 (0.031)	Loss 2.0735 (2.1698)	Entropy 0.68725 (0.68886)	Top-1 acc 76.562 (72.453)	Top-5 acc 89.844 (89.005)	lr 0.00057
Train [109][1360/3239]	Time 0.230 (0.656)	Data Time 0.001 (0.031)	Loss 1.9035 (2.1695)	Entropy 0.68731 (0.68885)	Top-1 acc 77.344 (72.460)	Top-5 acc 94.141 (89.010)	lr 0.00057
Train [109][1370/3239]	Time 0.282 (0.655)	Data Time 0.001 (0.030)	Loss 2.1827 (2.1691)	Entropy 0.68729 (0.68884)	Top-1 acc 72.656 (72.474)	Top-5 acc 90.234 (89.016)	lr 0.00057
Train [109][1380/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.030)	Loss 2.2298 (2.1691)	Entropy 0.68729 (0.68883)	Top-1 acc 68.750 (72.471)	Top-5 acc 88.672 (89.016)	lr 0.00057
Train [109][1390/3239]	Time 0.232 (0.652)	Data Time 0.001 (0.030)	Loss 2.1196 (2.1688)	Entropy 0.68730 (0.68882)	Top-1 acc 71.484 (72.469)	Top-5 acc 90.625 (89.025)	lr 0.00057
Train [109][1400/3239]	Time 0.241 (0.651)	Data Time 0.001 (0.030)	Loss 2.1999 (2.1687)	Entropy 0.68729 (0.68881)	Top-1 acc 71.875 (72.465)	Top-5 acc 87.500 (89.024)	lr 0.00057
Train [109][1410/3239]	Time 0.271 (0.650)	Data Time 0.001 (0.030)	Loss 2.1801 (2.1685)	Entropy 0.68717 (0.68880)	Top-1 acc 73.047 (72.470)	Top-5 acc 89.453 (89.030)	lr 0.00057
Train [109][1420/3239]	Time 0.223 (0.649)	Data Time 0.001 (0.029)	Loss 2.2848 (2.1685)	Entropy 0.68711 (0.68878)	Top-1 acc 67.969 (72.472)	Top-5 acc 87.500 (89.029)	lr 0.00057
Train [109][1430/3239]	Time 0.362 (0.647)	Data Time 0.001 (0.029)	Loss 2.1854 (2.1689)	Entropy 0.68696 (0.68877)	Top-1 acc 70.312 (72.465)	Top-5 acc 87.500 (89.022)	lr 0.00057
Train [109][1440/3239]	Time 2.584 (0.646)	Data Time 0.001 (0.029)	Loss 2.4695 (2.1694)	Entropy 0.68696 (0.68876)	Top-1 acc 66.016 (72.451)	Top-5 acc 84.766 (89.017)	lr 0.00057
Train [109][1450/3239]	Time 0.261 (0.643)	Data Time 0.001 (0.029)	Loss 2.1006 (2.1694)	Entropy 0.68697 (0.68875)	Top-1 acc 70.703 (72.456)	Top-5 acc 91.016 (89.019)	lr 0.00057
Train [109][1460/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.029)	Loss 2.1326 (2.1694)	Entropy 0.68693 (0.68874)	Top-1 acc 73.047 (72.454)	Top-5 acc 88.672 (89.019)	lr 0.00057
Train [109][1470/3239]	Time 0.340 (0.641)	Data Time 0.001 (0.028)	Loss 2.3048 (2.1693)	Entropy 0.68697 (0.68872)	Top-1 acc 70.312 (72.455)	Top-5 acc 85.938 (89.024)	lr 0.00057
Train [109][1480/3239]	Time 0.253 (0.640)	Data Time 0.001 (0.028)	Loss 2.0424 (2.1689)	Entropy 0.68697 (0.68871)	Top-1 acc 75.781 (72.461)	Top-5 acc 90.234 (89.030)	lr 0.00057
Train [109][1490/3239]	Time 0.259 (0.639)	Data Time 0.001 (0.028)	Loss 2.2928 (2.1689)	Entropy 0.68691 (0.68870)	Top-1 acc 70.312 (72.460)	Top-5 acc 85.547 (89.029)	lr 0.00057
Train [109][1500/3239]	Time 0.212 (0.638)	Data Time 0.001 (0.028)	Loss 2.2630 (2.1688)	Entropy 0.68681 (0.68869)	Top-1 acc 71.094 (72.461)	Top-5 acc 85.938 (89.032)	lr 0.00057
Train [109][1510/3239]	Time 0.234 (0.637)	Data Time 0.001 (0.028)	Loss 2.1443 (2.1686)	Entropy 0.68679 (0.68868)	Top-1 acc 72.656 (72.463)	Top-5 acc 88.672 (89.039)	lr 0.00057
Train [109][1520/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.028)	Loss 2.0806 (2.1686)	Entropy 0.68676 (0.68866)	Top-1 acc 75.000 (72.469)	Top-5 acc 89.062 (89.037)	lr 0.00057
Train [109][1530/3239]	Time 0.225 (0.635)	Data Time 0.001 (0.027)	Loss 2.0412 (2.1684)	Entropy 0.68679 (0.68865)	Top-1 acc 74.609 (72.473)	Top-5 acc 91.406 (89.041)	lr 0.00056
Train [109][1540/3239]	Time 0.289 (0.634)	Data Time 0.001 (0.027)	Loss 2.0540 (2.1682)	Entropy 0.68679 (0.68864)	Top-1 acc 78.125 (72.475)	Top-5 acc 93.359 (89.047)	lr 0.00056
Train [109][1550/3239]	Time 2.561 (0.633)	Data Time 0.001 (0.027)	Loss 1.9974 (2.1681)	Entropy 0.68679 (0.68863)	Top-1 acc 78.125 (72.475)	Top-5 acc 93.359 (89.053)	lr 0.00056
Train [109][1560/3239]	Time 0.258 (0.631)	Data Time 0.001 (0.027)	Loss 2.1243 (2.1679)	Entropy 0.68671 (0.68861)	Top-1 acc 75.000 (72.477)	Top-5 acc 90.234 (89.058)	lr 0.00056
Train [109][1570/3239]	Time 0.311 (0.664)	Data Time 0.002 (0.027)	Loss 2.0821 (2.1681)	Entropy 0.68671 (0.68860)	Top-1 acc 75.000 (72.479)	Top-5 acc 91.016 (89.053)	lr 0.00056
Train [109][1580/3239]	Time 0.261 (0.663)	Data Time 0.002 (0.027)	Loss 2.0311 (2.1680)	Entropy 0.68670 (0.68859)	Top-1 acc 73.828 (72.479)	Top-5 acc 91.797 (89.052)	lr 0.00056
Train [109][1590/3239]	Time 0.240 (0.662)	Data Time 0.002 (0.026)	Loss 2.1602 (2.1683)	Entropy 0.68670 (0.68858)	Top-1 acc 76.953 (72.474)	Top-5 acc 87.500 (89.048)	lr 0.00056
Train [109][1600/3239]	Time 0.235 (0.661)	Data Time 0.001 (0.026)	Loss 2.1664 (2.1684)	Entropy 0.68673 (0.68857)	Top-1 acc 73.047 (72.470)	Top-5 acc 88.281 (89.047)	lr 0.00056
Train [109][1610/3239]	Time 0.228 (0.660)	Data Time 0.001 (0.026)	Loss 2.2689 (2.1685)	Entropy 0.68672 (0.68855)	Top-1 acc 69.141 (72.469)	Top-5 acc 87.891 (89.045)	lr 0.00056
Train [109][1620/3239]	Time 0.243 (0.659)	Data Time 0.001 (0.026)	Loss 2.2465 (2.1686)	Entropy 0.68670 (0.68854)	Top-1 acc 69.531 (72.465)	Top-5 acc 87.500 (89.044)	lr 0.00056
Train [109][1630/3239]	Time 0.234 (0.658)	Data Time 0.002 (0.026)	Loss 2.0911 (2.1691)	Entropy 0.68664 (0.68853)	Top-1 acc 77.734 (72.455)	Top-5 acc 89.062 (89.034)	lr 0.00056
Train [109][1640/3239]	Time 0.235 (0.657)	Data Time 0.001 (0.026)	Loss 2.1812 (2.1692)	Entropy 0.68661 (0.68852)	Top-1 acc 71.484 (72.453)	Top-5 acc 88.281 (89.035)	lr 0.00056
Train [109][1650/3239]	Time 0.240 (0.656)	Data Time 0.001 (0.025)	Loss 2.1371 (2.1693)	Entropy 0.68661 (0.68851)	Top-1 acc 74.219 (72.454)	Top-5 acc 89.453 (89.029)	lr 0.00056
Train [109][1660/3239]	Time 2.698 (0.655)	Data Time 0.002 (0.025)	Loss 2.2221 (2.1690)	Entropy 0.68661 (0.68850)	Top-1 acc 70.312 (72.453)	Top-5 acc 89.062 (89.035)	lr 0.00056
Train [109][1670/3239]	Time 0.257 (0.653)	Data Time 0.002 (0.025)	Loss 2.1664 (2.1691)	Entropy 0.68658 (0.68849)	Top-1 acc 73.438 (72.449)	Top-5 acc 89.062 (89.034)	lr 0.00056
Train [109][1680/3239]	Time 0.333 (0.652)	Data Time 0.001 (0.025)	Loss 2.2057 (2.1693)	Entropy 0.68656 (0.68847)	Top-1 acc 71.484 (72.449)	Top-5 acc 89.844 (89.029)	lr 0.00056
Train [109][1690/3239]	Time 0.243 (0.651)	Data Time 0.001 (0.025)	Loss 2.1937 (2.1694)	Entropy 0.68659 (0.68846)	Top-1 acc 70.703 (72.448)	Top-5 acc 89.844 (89.029)	lr 0.00056
Train [109][1700/3239]	Time 0.252 (0.650)	Data Time 0.001 (0.025)	Loss 2.2033 (2.1692)	Entropy 0.68658 (0.68845)	Top-1 acc 70.703 (72.455)	Top-5 acc 88.281 (89.027)	lr 0.00056
Train [109][1710/3239]	Time 0.228 (0.649)	Data Time 0.001 (0.025)	Loss 2.1558 (2.1698)	Entropy 0.68653 (0.68844)	Top-1 acc 71.875 (72.442)	Top-5 acc 87.500 (89.020)	lr 0.00056
Train [109][1720/3239]	Time 0.340 (0.648)	Data Time 0.001 (0.025)	Loss 2.2101 (2.1697)	Entropy 0.68643 (0.68843)	Top-1 acc 71.094 (72.441)	Top-5 acc 88.281 (89.024)	lr 0.00056
Train [109][1730/3239]	Time 0.237 (0.647)	Data Time 0.001 (0.024)	Loss 2.1341 (2.1697)	Entropy 0.68643 (0.68842)	Top-1 acc 73.828 (72.443)	Top-5 acc 89.453 (89.026)	lr 0.00056
Train [109][1740/3239]	Time 0.246 (0.646)	Data Time 0.001 (0.024)	Loss 2.2038 (2.1699)	Entropy 0.68625 (0.68841)	Top-1 acc 71.484 (72.440)	Top-5 acc 87.891 (89.023)	lr 0.00056
Train [109][1750/3239]	Time 0.242 (0.645)	Data Time 0.001 (0.024)	Loss 2.1404 (2.1696)	Entropy 0.68626 (0.68839)	Top-1 acc 72.656 (72.445)	Top-5 acc 89.844 (89.026)	lr 0.00056
Train [109][1760/3239]	Time 0.335 (0.644)	Data Time 0.001 (0.024)	Loss 2.2756 (2.1698)	Entropy 0.68627 (0.68838)	Top-1 acc 67.969 (72.440)	Top-5 acc 86.719 (89.022)	lr 0.00056
Train [109][1770/3239]	Time 2.603 (0.643)	Data Time 0.001 (0.024)	Loss 2.1262 (2.1700)	Entropy 0.68627 (0.68837)	Top-1 acc 72.266 (72.430)	Top-5 acc 88.672 (89.016)	lr 0.00056
Train [109][1780/3239]	Time 0.259 (0.641)	Data Time 0.001 (0.024)	Loss 2.2229 (2.1700)	Entropy 0.68620 (0.68836)	Top-1 acc 71.875 (72.431)	Top-5 acc 89.453 (89.019)	lr 0.00056
Train [109][1790/3239]	Time 0.236 (0.640)	Data Time 0.001 (0.024)	Loss 2.1808 (2.1699)	Entropy 0.68624 (0.68835)	Top-1 acc 71.094 (72.428)	Top-5 acc 89.844 (89.022)	lr 0.00056
Train [109][1800/3239]	Time 0.243 (0.639)	Data Time 0.001 (0.024)	Loss 2.1678 (2.1699)	Entropy 0.68618 (0.68833)	Top-1 acc 71.484 (72.429)	Top-5 acc 86.719 (89.019)	lr 0.00056
Train [109][1810/3239]	Time 0.230 (0.639)	Data Time 0.001 (0.023)	Loss 2.1519 (2.1698)	Entropy 0.68625 (0.68832)	Top-1 acc 74.609 (72.437)	Top-5 acc 90.234 (89.024)	lr 0.00056
Train [109][1820/3239]	Time 0.224 (0.638)	Data Time 0.001 (0.023)	Loss 2.2415 (2.1701)	Entropy 0.68621 (0.68831)	Top-1 acc 68.750 (72.428)	Top-5 acc 89.062 (89.020)	lr 0.00056
Train [109][1830/3239]	Time 0.243 (0.637)	Data Time 0.002 (0.023)	Loss 2.2074 (2.1701)	Entropy 0.68620 (0.68830)	Top-1 acc 75.781 (72.432)	Top-5 acc 87.500 (89.021)	lr 0.00056
Train [109][1840/3239]	Time 0.235 (0.636)	Data Time 0.001 (0.023)	Loss 2.4041 (2.1704)	Entropy 0.68619 (0.68829)	Top-1 acc 69.531 (72.421)	Top-5 acc 83.203 (89.018)	lr 0.00056
Train [109][1850/3239]	Time 0.322 (0.635)	Data Time 0.001 (0.023)	Loss 2.0794 (2.1704)	Entropy 0.68627 (0.68828)	Top-1 acc 75.000 (72.422)	Top-5 acc 89.844 (89.015)	lr 0.00056
Train [109][1860/3239]	Time 0.236 (0.634)	Data Time 0.001 (0.023)	Loss 2.1632 (2.1704)	Entropy 0.68622 (0.68827)	Top-1 acc 72.266 (72.420)	Top-5 acc 89.453 (89.017)	lr 0.00056
Train [109][1870/3239]	Time 0.223 (0.634)	Data Time 0.001 (0.023)	Loss 2.2470 (2.1704)	Entropy 0.68610 (0.68825)	Top-1 acc 72.656 (72.424)	Top-5 acc 86.719 (89.013)	lr 0.00055
Train [109][1880/3239]	Time 2.615 (0.633)	Data Time 0.001 (0.023)	Loss 2.1636 (2.1703)	Entropy 0.68610 (0.68824)	Top-1 acc 74.219 (72.425)	Top-5 acc 89.453 (89.013)	lr 0.00055
Train [109][1890/3239]	Time 0.363 (0.631)	Data Time 0.001 (0.022)	Loss 2.2290 (2.1702)	Entropy 0.68608 (0.68823)	Top-1 acc 69.922 (72.429)	Top-5 acc 86.719 (89.018)	lr 0.00055
Train [109][1900/3239]	Time 0.237 (0.630)	Data Time 0.001 (0.022)	Loss 2.1460 (2.1703)	Entropy 0.68613 (0.68822)	Top-1 acc 71.094 (72.430)	Top-5 acc 91.797 (89.016)	lr 0.00055
Train [109][1910/3239]	Time 0.229 (0.629)	Data Time 0.001 (0.022)	Loss 2.1656 (2.1698)	Entropy 0.68611 (0.68821)	Top-1 acc 72.266 (72.443)	Top-5 acc 91.016 (89.029)	lr 0.00055
Train [109][1920/3239]	Time 0.255 (0.629)	Data Time 0.001 (0.022)	Loss 2.1895 (2.1698)	Entropy 0.68606 (0.68820)	Top-1 acc 70.703 (72.437)	Top-5 acc 88.281 (89.027)	lr 0.00055
Train [109][1930/3239]	Time 0.539 (0.654)	Data Time 0.003 (0.022)	Loss 2.3581 (2.1702)	Entropy 0.68621 (0.68819)	Top-1 acc 69.141 (72.428)	Top-5 acc 83.984 (89.019)	lr 0.00055
Train [109][1940/3239]	Time 0.237 (0.654)	Data Time 0.002 (0.022)	Loss 2.4065 (2.1704)	Entropy 0.68618 (0.68818)	Top-1 acc 66.016 (72.422)	Top-5 acc 83.984 (89.016)	lr 0.00055
Train [109][1950/3239]	Time 0.237 (0.653)	Data Time 0.002 (0.022)	Loss 2.1663 (2.1705)	Entropy 0.68615 (0.68817)	Top-1 acc 76.562 (72.424)	Top-5 acc 89.062 (89.013)	lr 0.00055
Train [109][1960/3239]	Time 0.242 (0.652)	Data Time 0.001 (0.022)	Loss 2.2142 (2.1706)	Entropy 0.68620 (0.68816)	Top-1 acc 70.312 (72.423)	Top-5 acc 86.719 (89.010)	lr 0.00055
Train [109][1970/3239]	Time 0.230 (0.651)	Data Time 0.001 (0.022)	Loss 2.2089 (2.1706)	Entropy 0.68619 (0.68815)	Top-1 acc 71.484 (72.426)	Top-5 acc 89.062 (89.011)	lr 0.00055
Train [109][1980/3239]	Time 0.247 (0.650)	Data Time 0.001 (0.022)	Loss 2.0142 (2.1707)	Entropy 0.68615 (0.68814)	Top-1 acc 75.000 (72.421)	Top-5 acc 93.359 (89.006)	lr 0.00055
Train [109][1990/3239]	Time 2.614 (0.649)	Data Time 0.001 (0.021)	Loss 2.0743 (2.1705)	Entropy 0.68615 (0.68813)	Top-1 acc 73.047 (72.424)	Top-5 acc 91.406 (89.011)	lr 0.00055
Train [109][2000/3239]	Time 0.224 (0.647)	Data Time 0.001 (0.021)	Loss 2.3048 (2.1706)	Entropy 0.68612 (0.68812)	Top-1 acc 65.625 (72.417)	Top-5 acc 85.938 (89.009)	lr 0.00055
Train [109][2010/3239]	Time 0.242 (0.647)	Data Time 0.001 (0.021)	Loss 2.2017 (2.1704)	Entropy 0.68614 (0.68811)	Top-1 acc 71.094 (72.427)	Top-5 acc 89.844 (89.011)	lr 0.00055
Train [109][2020/3239]	Time 0.233 (0.646)	Data Time 0.001 (0.021)	Loss 2.1871 (2.1704)	Entropy 0.68614 (0.68810)	Top-1 acc 70.312 (72.424)	Top-5 acc 89.453 (89.013)	lr 0.00055
Train [109][2030/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.021)	Loss 2.0732 (2.1703)	Entropy 0.68610 (0.68809)	Top-1 acc 72.656 (72.423)	Top-5 acc 91.406 (89.011)	lr 0.00055
Train [109][2040/3239]	Time 0.248 (0.644)	Data Time 0.002 (0.021)	Loss 2.2302 (2.1702)	Entropy 0.68606 (0.68808)	Top-1 acc 73.047 (72.428)	Top-5 acc 88.672 (89.011)	lr 0.00055
Train [109][2050/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.021)	Loss 2.1372 (2.1702)	Entropy 0.68601 (0.68807)	Top-1 acc 72.656 (72.429)	Top-5 acc 87.891 (89.007)	lr 0.00055
Train [109][2060/3239]	Time 0.258 (0.643)	Data Time 0.001 (0.021)	Loss 2.1995 (2.1702)	Entropy 0.68599 (0.68806)	Top-1 acc 69.922 (72.427)	Top-5 acc 88.281 (89.009)	lr 0.00055
Train [109][2070/3239]	Time 0.236 (0.642)	Data Time 0.002 (0.021)	Loss 2.2731 (2.1702)	Entropy 0.68603 (0.68805)	Top-1 acc 69.922 (72.427)	Top-5 acc 88.672 (89.006)	lr 0.00055
Train [109][2080/3239]	Time 0.240 (0.642)	Data Time 0.001 (0.021)	Loss 2.1161 (2.1703)	Entropy 0.68601 (0.68804)	Top-1 acc 75.000 (72.425)	Top-5 acc 90.625 (89.005)	lr 0.00055
Train [109][2090/3239]	Time 0.227 (0.641)	Data Time 0.001 (0.020)	Loss 2.2537 (2.1704)	Entropy 0.68601 (0.68803)	Top-1 acc 70.703 (72.423)	Top-5 acc 87.500 (89.005)	lr 0.00055
Train [109][2100/3239]	Time 2.596 (0.640)	Data Time 0.001 (0.020)	Loss 2.1503 (2.1703)	Entropy 0.68601 (0.68802)	Top-1 acc 74.609 (72.423)	Top-5 acc 90.625 (89.008)	lr 0.00055
Train [109][2110/3239]	Time 0.231 (0.638)	Data Time 0.001 (0.020)	Loss 2.1723 (2.1707)	Entropy 0.68597 (0.68801)	Top-1 acc 74.219 (72.416)	Top-5 acc 88.281 (89.005)	lr 0.00055
Train [109][2120/3239]	Time 0.239 (0.638)	Data Time 0.001 (0.020)	Loss 2.2284 (2.1705)	Entropy 0.68585 (0.68800)	Top-1 acc 70.312 (72.419)	Top-5 acc 89.453 (89.008)	lr 0.00055
Train [109][2130/3239]	Time 0.235 (0.637)	Data Time 0.002 (0.020)	Loss 2.1460 (2.1703)	Entropy 0.68605 (0.68799)	Top-1 acc 73.828 (72.423)	Top-5 acc 87.500 (89.010)	lr 0.00055
Train [109][2140/3239]	Time 0.241 (0.636)	Data Time 0.001 (0.020)	Loss 2.4010 (2.1704)	Entropy 0.68605 (0.68798)	Top-1 acc 67.188 (72.421)	Top-5 acc 85.938 (89.011)	lr 0.00055
Train [109][2150/3239]	Time 0.235 (0.636)	Data Time 0.001 (0.020)	Loss 2.1721 (2.1704)	Entropy 0.68601 (0.68797)	Top-1 acc 72.266 (72.422)	Top-5 acc 89.062 (89.010)	lr 0.00055
Train [109][2160/3239]	Time 0.231 (0.635)	Data Time 0.001 (0.020)	Loss 2.2569 (2.1705)	Entropy 0.68603 (0.68796)	Top-1 acc 72.266 (72.423)	Top-5 acc 86.328 (89.009)	lr 0.00055
Train [109][2170/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.020)	Loss 2.0664 (2.1705)	Entropy 0.68601 (0.68795)	Top-1 acc 76.562 (72.420)	Top-5 acc 92.188 (89.008)	lr 0.00055
Train [109][2180/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.020)	Loss 2.2280 (2.1705)	Entropy 0.68602 (0.68795)	Top-1 acc 70.703 (72.420)	Top-5 acc 88.281 (89.007)	lr 0.00055
Train [109][2190/3239]	Time 0.262 (0.633)	Data Time 0.002 (0.020)	Loss 2.1459 (2.1704)	Entropy 0.68600 (0.68794)	Top-1 acc 71.875 (72.423)	Top-5 acc 89.453 (89.011)	lr 0.00055
Train [109][2200/3239]	Time 0.250 (0.632)	Data Time 0.001 (0.020)	Loss 2.1676 (2.1705)	Entropy 0.68580 (0.68793)	Top-1 acc 71.875 (72.419)	Top-5 acc 87.500 (89.008)	lr 0.00055
Train [109][2210/3239]	Time 2.544 (0.632)	Data Time 0.001 (0.019)	Loss 2.0626 (2.1703)	Entropy 0.68580 (0.68792)	Top-1 acc 76.953 (72.427)	Top-5 acc 89.844 (89.009)	lr 0.00054
Train [109][2220/3239]	Time 0.267 (0.630)	Data Time 0.001 (0.019)	Loss 2.0743 (2.1701)	Entropy 0.68579 (0.68791)	Top-1 acc 76.562 (72.431)	Top-5 acc 90.234 (89.011)	lr 0.00054
Train [109][2230/3239]	Time 0.229 (0.629)	Data Time 0.001 (0.019)	Loss 2.2270 (2.1703)	Entropy 0.68577 (0.68790)	Top-1 acc 68.359 (72.427)	Top-5 acc 89.062 (89.008)	lr 0.00054
Train [109][2240/3239]	Time 0.243 (0.629)	Data Time 0.001 (0.019)	Loss 2.0758 (2.1700)	Entropy 0.68573 (0.68789)	Top-1 acc 71.875 (72.438)	Top-5 acc 91.797 (89.016)	lr 0.00054
Train [109][2250/3239]	Time 0.225 (0.628)	Data Time 0.001 (0.019)	Loss 2.1449 (2.1702)	Entropy 0.68580 (0.68788)	Top-1 acc 73.828 (72.433)	Top-5 acc 89.453 (89.010)	lr 0.00054
Train [109][2260/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.019)	Loss 2.1019 (2.1704)	Entropy 0.68588 (0.68787)	Top-1 acc 73.047 (72.426)	Top-5 acc 90.625 (89.008)	lr 0.00054
Train [109][2270/3239]	Time 0.313 (0.627)	Data Time 0.001 (0.019)	Loss 2.1185 (2.1706)	Entropy 0.68584 (0.68786)	Top-1 acc 73.047 (72.423)	Top-5 acc 91.406 (89.005)	lr 0.00054
Train [109][2280/3239]	Time 0.248 (0.626)	Data Time 0.001 (0.019)	Loss 2.3134 (2.1705)	Entropy 0.68575 (0.68785)	Top-1 acc 69.531 (72.422)	Top-5 acc 87.500 (89.009)	lr 0.00054
Train [109][2290/3239]	Time 0.283 (0.648)	Data Time 0.004 (0.019)	Loss 2.0166 (2.1706)	Entropy 0.68572 (0.68784)	Top-1 acc 75.391 (72.422)	Top-5 acc 90.625 (89.006)	lr 0.00054
Train [109][2300/3239]	Time 0.232 (0.648)	Data Time 0.002 (0.019)	Loss 2.0352 (2.1705)	Entropy 0.68583 (0.68783)	Top-1 acc 75.391 (72.422)	Top-5 acc 89.844 (89.006)	lr 0.00054
Train [109][2310/3239]	Time 0.367 (0.647)	Data Time 0.002 (0.019)	Loss 2.2283 (2.1706)	Entropy 0.68579 (0.68783)	Top-1 acc 70.703 (72.418)	Top-5 acc 87.891 (89.005)	lr 0.00054
Train [109][2320/3239]	Time 2.624 (0.646)	Data Time 0.001 (0.019)	Loss 2.1291 (2.1707)	Entropy 0.68579 (0.68782)	Top-1 acc 71.094 (72.415)	Top-5 acc 91.797 (89.006)	lr 0.00054
Train [109][2330/3239]	Time 0.267 (0.645)	Data Time 0.001 (0.019)	Loss 2.0754 (2.1708)	Entropy 0.68580 (0.68781)	Top-1 acc 75.781 (72.418)	Top-5 acc 90.234 (89.003)	lr 0.00054
Train [109][2340/3239]	Time 0.244 (0.644)	Data Time 0.001 (0.018)	Loss 2.0511 (2.1705)	Entropy 0.68581 (0.68780)	Top-1 acc 77.344 (72.427)	Top-5 acc 91.016 (89.010)	lr 0.00054
Train [109][2350/3239]	Time 0.351 (0.643)	Data Time 0.001 (0.018)	Loss 2.1199 (2.1705)	Entropy 0.68578 (0.68779)	Top-1 acc 77.734 (72.432)	Top-5 acc 89.062 (89.007)	lr 0.00054
Train [109][2360/3239]	Time 0.282 (0.643)	Data Time 0.002 (0.018)	Loss 2.2036 (2.1706)	Entropy 0.68580 (0.68778)	Top-1 acc 71.094 (72.435)	Top-5 acc 89.453 (89.006)	lr 0.00054
Train [109][2370/3239]	Time 0.227 (0.642)	Data Time 0.001 (0.018)	Loss 2.1526 (2.1704)	Entropy 0.68576 (0.68777)	Top-1 acc 76.562 (72.435)	Top-5 acc 89.453 (89.009)	lr 0.00054
Train [109][2380/3239]	Time 0.244 (0.641)	Data Time 0.001 (0.018)	Loss 2.1168 (2.1705)	Entropy 0.68568 (0.68777)	Top-1 acc 74.219 (72.435)	Top-5 acc 90.234 (89.007)	lr 0.00054
Train [109][2390/3239]	Time 0.234 (0.641)	Data Time 0.001 (0.018)	Loss 2.1262 (2.1704)	Entropy 0.68562 (0.68776)	Top-1 acc 73.828 (72.438)	Top-5 acc 88.281 (89.005)	lr 0.00054
Train [109][2400/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.018)	Loss 2.2301 (2.1703)	Entropy 0.68560 (0.68775)	Top-1 acc 71.484 (72.442)	Top-5 acc 89.062 (89.009)	lr 0.00054
Train [109][2410/3239]	Time 0.238 (0.640)	Data Time 0.001 (0.018)	Loss 2.2132 (2.1704)	Entropy 0.68562 (0.68774)	Top-1 acc 71.875 (72.438)	Top-5 acc 87.109 (89.010)	lr 0.00054
Train [109][2420/3239]	Time 0.256 (0.639)	Data Time 0.001 (0.018)	Loss 1.9790 (2.1703)	Entropy 0.68556 (0.68773)	Top-1 acc 79.297 (72.440)	Top-5 acc 92.969 (89.009)	lr 0.00054
Train [109][2430/3239]	Time 2.583 (0.638)	Data Time 0.002 (0.018)	Loss 2.1081 (2.1701)	Entropy 0.68556 (0.68772)	Top-1 acc 71.875 (72.444)	Top-5 acc 91.406 (89.013)	lr 0.00054
Train [109][2440/3239]	Time 0.234 (0.637)	Data Time 0.002 (0.018)	Loss 2.2411 (2.1702)	Entropy 0.68556 (0.68771)	Top-1 acc 68.359 (72.446)	Top-5 acc 88.281 (89.012)	lr 0.00054
Train [109][2450/3239]	Time 0.238 (0.636)	Data Time 0.001 (0.018)	Loss 2.2214 (2.1701)	Entropy 0.68547 (0.68770)	Top-1 acc 67.969 (72.448)	Top-5 acc 89.062 (89.016)	lr 0.00054
Train [109][2460/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.018)	Loss 2.1985 (2.1702)	Entropy 0.68532 (0.68769)	Top-1 acc 69.531 (72.447)	Top-5 acc 87.109 (89.014)	lr 0.00054
Train [109][2470/3239]	Time 0.240 (0.635)	Data Time 0.001 (0.018)	Loss 2.2353 (2.1703)	Entropy 0.68540 (0.68768)	Top-1 acc 67.188 (72.446)	Top-5 acc 86.719 (89.011)	lr 0.00054
Train [109][2480/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.018)	Loss 2.2189 (2.1704)	Entropy 0.68538 (0.68768)	Top-1 acc 70.703 (72.442)	Top-5 acc 87.109 (89.011)	lr 0.00054
Train [109][2490/3239]	Time 0.237 (0.634)	Data Time 0.002 (0.017)	Loss 2.3205 (2.1704)	Entropy 0.68538 (0.68767)	Top-1 acc 66.406 (72.443)	Top-5 acc 86.719 (89.011)	lr 0.00054
Train [109][2500/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.017)	Loss 2.1644 (2.1704)	Entropy 0.68544 (0.68766)	Top-1 acc 73.438 (72.440)	Top-5 acc 89.844 (89.012)	lr 0.00054
Train [109][2510/3239]	Time 0.236 (0.633)	Data Time 0.001 (0.017)	Loss 2.0893 (2.1703)	Entropy 0.68550 (0.68765)	Top-1 acc 75.000 (72.444)	Top-5 acc 90.234 (89.014)	lr 0.00054
Train [109][2520/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.017)	Loss 2.1273 (2.1702)	Entropy 0.68551 (0.68764)	Top-1 acc 72.266 (72.442)	Top-5 acc 87.891 (89.014)	lr 0.00054
Train [109][2530/3239]	Time 0.258 (0.632)	Data Time 0.001 (0.017)	Loss 2.0888 (2.1700)	Entropy 0.68552 (0.68763)	Top-1 acc 71.094 (72.445)	Top-5 acc 91.016 (89.016)	lr 0.00054
Train [109][2540/3239]	Time 2.509 (0.631)	Data Time 0.001 (0.017)	Loss 2.2624 (2.1701)	Entropy 0.68552 (0.68762)	Top-1 acc 71.094 (72.442)	Top-5 acc 85.156 (89.012)	lr 0.00054
Train [109][2550/3239]	Time 0.279 (0.629)	Data Time 0.002 (0.017)	Loss 2.1597 (2.1700)	Entropy 0.68551 (0.68761)	Top-1 acc 75.391 (72.446)	Top-5 acc 89.062 (89.014)	lr 0.00053
Train [109][2560/3239]	Time 0.348 (0.629)	Data Time 0.001 (0.017)	Loss 2.2009 (2.1700)	Entropy 0.68546 (0.68761)	Top-1 acc 73.047 (72.444)	Top-5 acc 87.109 (89.014)	lr 0.00053
Train [109][2570/3239]	Time 0.240 (0.628)	Data Time 0.001 (0.017)	Loss 2.2167 (2.1700)	Entropy 0.68549 (0.68760)	Top-1 acc 71.875 (72.448)	Top-5 acc 88.281 (89.013)	lr 0.00053
Train [109][2580/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.017)	Loss 2.0557 (2.1698)	Entropy 0.68550 (0.68759)	Top-1 acc 74.609 (72.452)	Top-5 acc 88.281 (89.015)	lr 0.00053
Train [109][2590/3239]	Time 0.239 (0.627)	Data Time 0.001 (0.017)	Loss 2.1755 (2.1698)	Entropy 0.68545 (0.68758)	Top-1 acc 69.531 (72.456)	Top-5 acc 89.844 (89.018)	lr 0.00053
Train [109][2600/3239]	Time 0.334 (0.627)	Data Time 0.001 (0.017)	Loss 2.1525 (2.1697)	Entropy 0.68543 (0.68757)	Top-1 acc 76.172 (72.460)	Top-5 acc 89.062 (89.021)	lr 0.00053
Train [109][2610/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.017)	Loss 2.1431 (2.1696)	Entropy 0.68544 (0.68757)	Top-1 acc 74.609 (72.463)	Top-5 acc 89.844 (89.020)	lr 0.00053
Train [109][2620/3239]	Time 0.220 (0.626)	Data Time 0.001 (0.017)	Loss 2.0542 (2.1696)	Entropy 0.68537 (0.68756)	Top-1 acc 77.344 (72.465)	Top-5 acc 89.062 (89.017)	lr 0.00053
Train [109][2630/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.017)	Loss 2.0824 (2.1698)	Entropy 0.68538 (0.68755)	Top-1 acc 71.875 (72.462)	Top-5 acc 89.844 (89.014)	lr 0.00053
Train [109][2640/3239]	Time 0.353 (0.625)	Data Time 0.001 (0.017)	Loss 2.1816 (2.1697)	Entropy 0.68534 (0.68754)	Top-1 acc 73.828 (72.463)	Top-5 acc 88.672 (89.014)	lr 0.00053
Train [109][2650/3239]	Time 0.370 (0.643)	Data Time 0.004 (0.017)	Loss 2.1264 (2.1699)	Entropy 0.68530 (0.68753)	Top-1 acc 72.266 (72.458)	Top-5 acc 90.234 (89.011)	lr 0.00053
Train [109][2660/3239]	Time 0.237 (0.643)	Data Time 0.002 (0.016)	Loss 2.3193 (2.1699)	Entropy 0.68535 (0.68752)	Top-1 acc 67.578 (72.460)	Top-5 acc 85.938 (89.010)	lr 0.00053
Train [109][2670/3239]	Time 0.252 (0.642)	Data Time 0.002 (0.016)	Loss 2.1441 (2.1700)	Entropy 0.68532 (0.68752)	Top-1 acc 73.828 (72.460)	Top-5 acc 89.453 (89.007)	lr 0.00053
Train [109][2680/3239]	Time 0.229 (0.642)	Data Time 0.001 (0.016)	Loss 2.0419 (2.1698)	Entropy 0.68532 (0.68751)	Top-1 acc 76.562 (72.466)	Top-5 acc 89.453 (89.011)	lr 0.00053
Train [109][2690/3239]	Time 0.285 (0.641)	Data Time 0.002 (0.016)	Loss 2.0042 (2.1697)	Entropy 0.68528 (0.68750)	Top-1 acc 75.391 (72.467)	Top-5 acc 92.578 (89.014)	lr 0.00053
Train [109][2700/3239]	Time 0.221 (0.641)	Data Time 0.002 (0.016)	Loss 2.1034 (2.1696)	Entropy 0.68525 (0.68749)	Top-1 acc 75.000 (72.470)	Top-5 acc 88.672 (89.014)	lr 0.00053
Train [109][2710/3239]	Time 0.267 (0.640)	Data Time 0.001 (0.016)	Loss 2.0819 (2.1696)	Entropy 0.68519 (0.68748)	Top-1 acc 75.781 (72.473)	Top-5 acc 89.453 (89.016)	lr 0.00053
Train [109][2720/3239]	Time 0.256 (0.639)	Data Time 0.001 (0.016)	Loss 2.1937 (2.1697)	Entropy 0.68513 (0.68747)	Top-1 acc 72.656 (72.469)	Top-5 acc 87.109 (89.014)	lr 0.00053
Train [109][2730/3239]	Time 0.223 (0.639)	Data Time 0.001 (0.016)	Loss 2.1691 (2.1698)	Entropy 0.68504 (0.68747)	Top-1 acc 71.094 (72.467)	Top-5 acc 87.500 (89.013)	lr 0.00053
Train [109][2740/3239]	Time 0.237 (0.638)	Data Time 0.001 (0.016)	Loss 2.1930 (2.1699)	Entropy 0.68498 (0.68746)	Top-1 acc 71.484 (72.466)	Top-5 acc 88.672 (89.012)	lr 0.00053
Train [109][2750/3239]	Time 0.271 (0.638)	Data Time 0.001 (0.016)	Loss 2.1610 (2.1699)	Entropy 0.68502 (0.68745)	Top-1 acc 73.438 (72.467)	Top-5 acc 87.109 (89.008)	lr 0.00053
Train [109][2760/3239]	Time 0.228 (0.637)	Data Time 0.001 (0.016)	Loss 2.1408 (2.1699)	Entropy 0.68503 (0.68744)	Top-1 acc 72.656 (72.465)	Top-5 acc 89.453 (89.009)	lr 0.00053
Train [109][2770/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.016)	Loss 2.1461 (2.1699)	Entropy 0.68506 (0.68743)	Top-1 acc 71.875 (72.466)	Top-5 acc 89.453 (89.009)	lr 0.00053
Train [109][2780/3239]	Time 0.219 (0.636)	Data Time 0.001 (0.016)	Loss 2.2634 (2.1700)	Entropy 0.68502 (0.68742)	Top-1 acc 71.094 (72.465)	Top-5 acc 86.328 (89.009)	lr 0.00053
Train [109][2790/3239]	Time 0.234 (0.636)	Data Time 0.001 (0.016)	Loss 2.0083 (2.1697)	Entropy 0.68503 (0.68741)	Top-1 acc 76.953 (72.473)	Top-5 acc 92.578 (89.014)	lr 0.00053
Train [109][2800/3239]	Time 0.231 (0.635)	Data Time 0.001 (0.016)	Loss 2.3257 (2.1699)	Entropy 0.68494 (0.68740)	Top-1 acc 67.188 (72.467)	Top-5 acc 85.938 (89.009)	lr 0.00053
Train [109][2810/3239]	Time 0.236 (0.635)	Data Time 0.001 (0.016)	Loss 2.3025 (2.1702)	Entropy 0.68463 (0.68740)	Top-1 acc 68.359 (72.458)	Top-5 acc 87.500 (89.007)	lr 0.00053
Train [109][2820/3239]	Time 0.273 (0.634)	Data Time 0.002 (0.016)	Loss 2.1691 (2.1702)	Entropy 0.68462 (0.68739)	Top-1 acc 72.266 (72.458)	Top-5 acc 87.891 (89.006)	lr 0.00053
Train [109][2830/3239]	Time 0.232 (0.633)	Data Time 0.002 (0.016)	Loss 2.0688 (2.1700)	Entropy 0.68457 (0.68738)	Top-1 acc 72.656 (72.463)	Top-5 acc 93.359 (89.012)	lr 0.00053
Train [109][2840/3239]	Time 0.226 (0.633)	Data Time 0.001 (0.016)	Loss 2.2147 (2.1701)	Entropy 0.68453 (0.68737)	Top-1 acc 73.047 (72.460)	Top-5 acc 87.109 (89.009)	lr 0.00053
Train [109][2850/3239]	Time 0.233 (0.632)	Data Time 0.001 (0.015)	Loss 2.2804 (2.1702)	Entropy 0.68453 (0.68736)	Top-1 acc 71.484 (72.457)	Top-5 acc 86.719 (89.008)	lr 0.00053
Train [109][2860/3239]	Time 0.264 (0.632)	Data Time 0.001 (0.015)	Loss 2.2561 (2.1703)	Entropy 0.68451 (0.68735)	Top-1 acc 68.359 (72.455)	Top-5 acc 86.719 (89.008)	lr 0.00053
Train [109][2870/3239]	Time 0.252 (0.631)	Data Time 0.002 (0.015)	Loss 2.2292 (2.1704)	Entropy 0.68449 (0.68734)	Top-1 acc 70.703 (72.451)	Top-5 acc 88.281 (89.006)	lr 0.00053
Train [109][2880/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.015)	Loss 2.0352 (2.1703)	Entropy 0.68451 (0.68733)	Top-1 acc 74.219 (72.457)	Top-5 acc 91.016 (89.007)	lr 0.00053
Train [109][2890/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.015)	Loss 2.1200 (2.1702)	Entropy 0.68449 (0.68732)	Top-1 acc 75.000 (72.456)	Top-5 acc 89.844 (89.009)	lr 0.00052
Train [109][2900/3239]	Time 0.254 (0.630)	Data Time 0.001 (0.015)	Loss 2.3021 (2.1703)	Entropy 0.68454 (0.68731)	Top-1 acc 68.359 (72.456)	Top-5 acc 87.891 (89.007)	lr 0.00052
Train [109][2910/3239]	Time 0.259 (0.629)	Data Time 0.001 (0.015)	Loss 2.0955 (2.1704)	Entropy 0.68451 (0.68730)	Top-1 acc 73.828 (72.452)	Top-5 acc 91.797 (89.008)	lr 0.00052
Train [109][2920/3239]	Time 0.238 (0.629)	Data Time 0.001 (0.015)	Loss 2.1512 (2.1705)	Entropy 0.68442 (0.68729)	Top-1 acc 75.781 (72.450)	Top-5 acc 88.672 (89.006)	lr 0.00052
Train [109][2930/3239]	Time 0.233 (0.628)	Data Time 0.002 (0.015)	Loss 2.2779 (2.1707)	Entropy 0.68445 (0.68728)	Top-1 acc 68.359 (72.443)	Top-5 acc 88.281 (89.003)	lr 0.00052
Train [109][2940/3239]	Time 0.228 (0.628)	Data Time 0.001 (0.015)	Loss 2.1900 (2.1708)	Entropy 0.68446 (0.68727)	Top-1 acc 72.266 (72.442)	Top-5 acc 88.672 (89.003)	lr 0.00052
Train [109][2950/3239]	Time 0.258 (0.627)	Data Time 0.006 (0.015)	Loss 2.0446 (2.1707)	Entropy 0.68446 (0.68726)	Top-1 acc 77.734 (72.445)	Top-5 acc 90.625 (89.006)	lr 0.00052
Train [109][2960/3239]	Time 0.235 (0.627)	Data Time 0.001 (0.015)	Loss 2.3705 (2.1708)	Entropy 0.68440 (0.68725)	Top-1 acc 65.234 (72.444)	Top-5 acc 85.547 (89.002)	lr 0.00052
Train [109][2970/3239]	Time 0.266 (0.626)	Data Time 0.001 (0.015)	Loss 2.2676 (2.1709)	Entropy 0.68438 (0.68724)	Top-1 acc 67.188 (72.441)	Top-5 acc 85.547 (88.998)	lr 0.00052
Train [109][2980/3239]	Time 0.416 (0.643)	Data Time 0.005 (0.015)	Loss 2.2565 (2.1709)	Entropy 0.68445 (0.68723)	Top-1 acc 69.922 (72.440)	Top-5 acc 87.109 (88.998)	lr 0.00052
Train [109][2990/3239]	Time 0.254 (0.643)	Data Time 0.002 (0.015)	Loss 2.0727 (2.1708)	Entropy 0.68444 (0.68722)	Top-1 acc 74.609 (72.441)	Top-5 acc 92.969 (88.999)	lr 0.00052
Train [109][3000/3239]	Time 0.251 (0.642)	Data Time 0.002 (0.015)	Loss 2.1686 (2.1708)	Entropy 0.68443 (0.68721)	Top-1 acc 75.781 (72.442)	Top-5 acc 88.281 (88.999)	lr 0.00052
Train [109][3010/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.015)	Loss 2.1651 (2.1708)	Entropy 0.68449 (0.68720)	Top-1 acc 72.266 (72.442)	Top-5 acc 89.844 (88.999)	lr 0.00052
Train [109][3020/3239]	Time 0.232 (0.641)	Data Time 0.001 (0.015)	Loss 2.2375 (2.1708)	Entropy 0.68448 (0.68719)	Top-1 acc 70.312 (72.442)	Top-5 acc 89.062 (89.002)	lr 0.00052
Train [109][3030/3239]	Time 0.234 (0.641)	Data Time 0.001 (0.015)	Loss 2.2143 (2.1710)	Entropy 0.68443 (0.68719)	Top-1 acc 71.875 (72.439)	Top-5 acc 87.109 (88.999)	lr 0.00052
Train [109][3040/3239]	Time 0.234 (0.640)	Data Time 0.002 (0.015)	Loss 2.2009 (2.1709)	Entropy 0.68433 (0.68718)	Top-1 acc 67.188 (72.438)	Top-5 acc 89.844 (88.999)	lr 0.00052
Train [109][3050/3239]	Time 0.331 (0.640)	Data Time 0.002 (0.015)	Loss 2.3483 (2.1710)	Entropy 0.68428 (0.68717)	Top-1 acc 66.797 (72.439)	Top-5 acc 86.328 (89.000)	lr 0.00052
Train [109][3060/3239]	Time 0.233 (0.639)	Data Time 0.001 (0.015)	Loss 2.1989 (2.1710)	Entropy 0.68425 (0.68716)	Top-1 acc 71.094 (72.438)	Top-5 acc 90.234 (89.001)	lr 0.00052
Train [109][3070/3239]	Time 0.233 (0.639)	Data Time 0.001 (0.015)	Loss 2.2129 (2.1711)	Entropy 0.68406 (0.68715)	Top-1 acc 70.312 (72.438)	Top-5 acc 89.453 (89.000)	lr 0.00052
Train [109][3080/3239]	Time 0.269 (0.638)	Data Time 0.001 (0.014)	Loss 2.3148 (2.1711)	Entropy 0.68394 (0.68714)	Top-1 acc 71.094 (72.435)	Top-5 acc 85.938 (88.999)	lr 0.00052
Train [109][3090/3239]	Time 0.330 (0.638)	Data Time 0.001 (0.014)	Loss 2.1648 (2.1712)	Entropy 0.68387 (0.68713)	Top-1 acc 72.266 (72.431)	Top-5 acc 86.719 (88.994)	lr 0.00052
Train [109][3100/3239]	Time 0.239 (0.637)	Data Time 0.001 (0.014)	Loss 2.0985 (2.1713)	Entropy 0.68378 (0.68712)	Top-1 acc 75.391 (72.431)	Top-5 acc 89.844 (88.994)	lr 0.00052
Train [109][3110/3239]	Time 0.309 (0.637)	Data Time 0.002 (0.014)	Loss 2.1002 (2.1713)	Entropy 0.68385 (0.68711)	Top-1 acc 73.438 (72.431)	Top-5 acc 89.844 (88.995)	lr 0.00052
Train [109][3120/3239]	Time 0.256 (0.636)	Data Time 0.001 (0.014)	Loss 2.2147 (2.1714)	Entropy 0.68375 (0.68710)	Top-1 acc 72.266 (72.432)	Top-5 acc 87.500 (88.992)	lr 0.00052
Train [109][3130/3239]	Time 0.396 (0.636)	Data Time 0.001 (0.014)	Loss 2.0316 (2.1713)	Entropy 0.68362 (0.68708)	Top-1 acc 76.562 (72.435)	Top-5 acc 92.188 (88.993)	lr 0.00052
Train [109][3140/3239]	Time 0.233 (0.635)	Data Time 0.001 (0.014)	Loss 2.1422 (2.1713)	Entropy 0.68367 (0.68707)	Top-1 acc 67.969 (72.433)	Top-5 acc 88.672 (88.992)	lr 0.00052
Train [109][3150/3239]	Time 0.239 (0.635)	Data Time 0.001 (0.014)	Loss 2.1946 (2.1714)	Entropy 0.68366 (0.68706)	Top-1 acc 71.484 (72.431)	Top-5 acc 89.453 (88.991)	lr 0.00052
Train [109][3160/3239]	Time 0.242 (0.634)	Data Time 0.001 (0.014)	Loss 2.1152 (2.1715)	Entropy 0.68364 (0.68705)	Top-1 acc 73.047 (72.425)	Top-5 acc 88.672 (88.988)	lr 0.00052
Train [109][3170/3239]	Time 0.382 (0.634)	Data Time 0.001 (0.014)	Loss 2.2278 (2.1716)	Entropy 0.68364 (0.68704)	Top-1 acc 73.438 (72.423)	Top-5 acc 89.062 (88.985)	lr 0.00052
Train [109][3180/3239]	Time 0.246 (0.634)	Data Time 0.000 (0.014)	Loss 2.1442 (2.1716)	Entropy 0.68362 (0.68703)	Top-1 acc 71.484 (72.425)	Top-5 acc 88.672 (88.987)	lr 0.00052
Train [109][3190/3239]	Time 0.235 (0.633)	Data Time 0.000 (0.014)	Loss 2.1510 (2.1716)	Entropy 0.68366 (0.68702)	Top-1 acc 73.438 (72.426)	Top-5 acc 87.891 (88.987)	lr 0.00052
Train [109][3200/3239]	Time 0.225 (0.632)	Data Time 0.000 (0.014)	Loss 2.0516 (2.1715)	Entropy 0.68364 (0.68701)	Top-1 acc 75.391 (72.426)	Top-5 acc 91.797 (88.989)	lr 0.00052
Train [109][3210/3239]	Time 0.322 (0.632)	Data Time 0.000 (0.014)	Loss 2.2763 (2.1715)	Entropy 0.68360 (0.68700)	Top-1 acc 71.875 (72.425)	Top-5 acc 88.281 (88.989)	lr 0.00052
Train [109][3220/3239]	Time 0.232 (0.631)	Data Time 0.000 (0.014)	Loss 2.1814 (2.1714)	Entropy 0.68361 (0.68699)	Top-1 acc 72.656 (72.428)	Top-5 acc 87.500 (88.991)	lr 0.00052
Train [109][3230/3239]	Time 0.238 (0.631)	Data Time 0.000 (0.014)	Loss 2.1469 (2.1713)	Entropy 0.68360 (0.68698)	Top-1 acc 72.266 (72.431)	Top-5 acc 89.453 (88.993)	lr 0.00052
Train [109][3239/3239]	Time 2.366 (0.631)	Data Time 0.000 (0.014)	Loss 2.3493 (2.1711)	Entropy 0.68360 (0.68697)	Top-1 acc 66.667 (72.436)	Top-5 acc 82.716 (88.996)	lr 0.00051
==========Valid [109/120]	loss 1.204	top-1 acc 72.537 (72.537)	top-5 acc 89.846	Train top-1 72.436	top-5 88.996	Entropy 0.68360	Latency-None: 0.000ms	Flops: 546.53M
Train [110][0/3239]	Time 42.859 (42.859)	Data Time 38.641 (38.641)	Loss 2.1141 (2.1141)	Entropy 0.68358 (0.68358)	Top-1 acc 75.391 (75.391)	Top-5 acc 88.672 (88.672)	lr 0.00051
Train [110][10/3239]	Time 2.856 (4.446)	Data Time 0.002 (3.514)	Loss 2.2410 (2.1467)	Entropy 0.68358 (0.68358)	Top-1 acc 69.922 (73.295)	Top-5 acc 90.234 (89.418)	lr 0.00051
Train [110][20/3239]	Time 0.364 (2.455)	Data Time 0.001 (1.842)	Loss 1.9723 (2.1259)	Entropy 0.68363 (0.68361)	Top-1 acc 76.562 (74.014)	Top-5 acc 92.188 (89.807)	lr 0.00051
Train [110][30/3239]	Time 0.249 (1.816)	Data Time 0.001 (1.248)	Loss 2.1087 (2.1218)	Entropy 0.68359 (0.68360)	Top-1 acc 73.047 (73.828)	Top-5 acc 89.062 (89.894)	lr 0.00051
Train [110][40/3239]	Time 0.239 (1.492)	Data Time 0.001 (0.944)	Loss 2.1471 (2.1290)	Entropy 0.68358 (0.68360)	Top-1 acc 73.047 (73.638)	Top-5 acc 89.062 (89.787)	lr 0.00051
Train [110][50/3239]	Time 0.226 (1.294)	Data Time 0.001 (0.759)	Loss 2.0029 (2.1291)	Entropy 0.68339 (0.68357)	Top-1 acc 76.953 (73.575)	Top-5 acc 91.406 (89.683)	lr 0.00051
Train [110][60/3239]	Time 0.321 (1.162)	Data Time 0.001 (0.635)	Loss 2.0569 (2.1317)	Entropy 0.68336 (0.68354)	Top-1 acc 78.516 (73.476)	Top-5 acc 90.234 (89.632)	lr 0.00051
Train [110][70/3239]	Time 0.231 (1.067)	Data Time 0.001 (0.546)	Loss 2.1708 (2.1370)	Entropy 0.68332 (0.68351)	Top-1 acc 74.609 (73.366)	Top-5 acc 87.891 (89.475)	lr 0.00051
Train [110][80/3239]	Time 0.265 (1.672)	Data Time 0.005 (0.479)	Loss 2.1276 (2.1403)	Entropy 0.68338 (0.68349)	Top-1 acc 75.000 (73.356)	Top-5 acc 89.844 (89.410)	lr 0.00051
Train [110][90/3239]	Time 0.228 (1.550)	Data Time 0.002 (0.426)	Loss 2.1059 (2.1406)	Entropy 0.68339 (0.68348)	Top-1 acc 73.047 (73.322)	Top-5 acc 88.672 (89.363)	lr 0.00051
Train [110][100/3239]	Time 0.322 (1.445)	Data Time 0.001 (0.384)	Loss 2.1028 (2.1400)	Entropy 0.68329 (0.68347)	Top-1 acc 76.953 (73.387)	Top-5 acc 90.625 (89.353)	lr 0.00051
Train [110][110/3239]	Time 0.227 (1.358)	Data Time 0.001 (0.350)	Loss 2.0692 (2.1425)	Entropy 0.68335 (0.68345)	Top-1 acc 76.953 (73.276)	Top-5 acc 90.234 (89.323)	lr 0.00051
Train [110][120/3239]	Time 2.652 (1.287)	Data Time 0.001 (0.321)	Loss 2.1678 (2.1442)	Entropy 0.68335 (0.68344)	Top-1 acc 74.219 (73.195)	Top-5 acc 89.062 (89.285)	lr 0.00051
Train [110][130/3239]	Time 0.234 (1.207)	Data Time 0.001 (0.297)	Loss 2.0257 (2.1429)	Entropy 0.68327 (0.68343)	Top-1 acc 75.781 (73.160)	Top-5 acc 92.188 (89.295)	lr 0.00051
Train [110][140/3239]	Time 0.330 (1.157)	Data Time 0.001 (0.276)	Loss 2.1481 (2.1436)	Entropy 0.68326 (0.68342)	Top-1 acc 72.266 (73.180)	Top-5 acc 87.891 (89.304)	lr 0.00051
Train [110][150/3239]	Time 0.236 (1.112)	Data Time 0.001 (0.258)	Loss 2.0622 (2.1466)	Entropy 0.68324 (0.68341)	Top-1 acc 76.953 (73.078)	Top-5 acc 91.016 (89.264)	lr 0.00051
Train [110][160/3239]	Time 0.225 (1.073)	Data Time 0.001 (0.242)	Loss 2.1106 (2.1480)	Entropy 0.68320 (0.68339)	Top-1 acc 74.219 (73.047)	Top-5 acc 87.500 (89.230)	lr 0.00051
Train [110][170/3239]	Time 0.263 (1.038)	Data Time 0.001 (0.228)	Loss 2.0989 (2.1490)	Entropy 0.68316 (0.68338)	Top-1 acc 73.438 (73.067)	Top-5 acc 90.234 (89.195)	lr 0.00051
Train [110][180/3239]	Time 0.368 (1.008)	Data Time 0.001 (0.215)	Loss 2.2982 (2.1503)	Entropy 0.68315 (0.68337)	Top-1 acc 70.312 (73.012)	Top-5 acc 85.938 (89.157)	lr 0.00051
Train [110][190/3239]	Time 0.232 (0.981)	Data Time 0.001 (0.204)	Loss 2.0129 (2.1482)	Entropy 0.68324 (0.68336)	Top-1 acc 80.469 (73.082)	Top-5 acc 93.750 (89.181)	lr 0.00051
Train [110][200/3239]	Time 0.235 (0.956)	Data Time 0.001 (0.194)	Loss 2.1187 (2.1483)	Entropy 0.68325 (0.68335)	Top-1 acc 76.562 (73.148)	Top-5 acc 90.234 (89.202)	lr 0.00051
Train [110][210/3239]	Time 0.245 (0.934)	Data Time 0.001 (0.185)	Loss 2.1791 (2.1487)	Entropy 0.68322 (0.68335)	Top-1 acc 71.484 (73.106)	Top-5 acc 87.109 (89.175)	lr 0.00051
Train [110][220/3239]	Time 0.233 (0.914)	Data Time 0.001 (0.177)	Loss 2.3487 (2.1479)	Entropy 0.68324 (0.68334)	Top-1 acc 65.234 (73.133)	Top-5 acc 87.891 (89.237)	lr 0.00051
Train [110][230/3239]	Time 2.627 (0.896)	Data Time 0.002 (0.169)	Loss 2.1960 (2.1480)	Entropy 0.68324 (0.68334)	Top-1 acc 69.531 (73.120)	Top-5 acc 87.891 (89.264)	lr 0.00051
Train [110][240/3239]	Time 0.260 (0.869)	Data Time 0.001 (0.162)	Loss 2.1921 (2.1504)	Entropy 0.68322 (0.68333)	Top-1 acc 71.484 (73.089)	Top-5 acc 87.109 (89.208)	lr 0.00051
Train [110][250/3239]	Time 0.237 (0.854)	Data Time 0.001 (0.156)	Loss 2.1845 (2.1502)	Entropy 0.68367 (0.68335)	Top-1 acc 74.219 (73.089)	Top-5 acc 88.281 (89.215)	lr 0.00051
Train [110][260/3239]	Time 0.243 (0.840)	Data Time 0.001 (0.150)	Loss 2.0564 (2.1505)	Entropy 0.68361 (0.68336)	Top-1 acc 77.734 (73.086)	Top-5 acc 91.016 (89.215)	lr 0.00051
Train [110][270/3239]	Time 0.237 (0.827)	Data Time 0.001 (0.144)	Loss 1.8554 (2.1514)	Entropy 0.68360 (0.68336)	Top-1 acc 78.906 (73.030)	Top-5 acc 95.703 (89.220)	lr 0.00051
Train [110][280/3239]	Time 0.239 (0.815)	Data Time 0.001 (0.139)	Loss 2.1220 (2.1522)	Entropy 0.68355 (0.68337)	Top-1 acc 75.391 (73.039)	Top-5 acc 88.281 (89.178)	lr 0.00051
Train [110][290/3239]	Time 0.230 (0.803)	Data Time 0.001 (0.135)	Loss 2.1959 (2.1534)	Entropy 0.68344 (0.68338)	Top-1 acc 69.922 (72.978)	Top-5 acc 89.453 (89.175)	lr 0.00051
Train [110][300/3239]	Time 0.231 (0.792)	Data Time 0.001 (0.130)	Loss 2.3251 (2.1528)	Entropy 0.68334 (0.68338)	Top-1 acc 70.703 (73.047)	Top-5 acc 87.500 (89.199)	lr 0.00051
Train [110][310/3239]	Time 0.245 (0.782)	Data Time 0.001 (0.126)	Loss 2.1147 (2.1534)	Entropy 0.68328 (0.68338)	Top-1 acc 73.828 (73.024)	Top-5 acc 89.453 (89.182)	lr 0.00051
Train [110][320/3239]	Time 0.235 (0.773)	Data Time 0.001 (0.122)	Loss 2.2112 (2.1536)	Entropy 0.68324 (0.68337)	Top-1 acc 71.484 (73.040)	Top-5 acc 87.891 (89.174)	lr 0.00051
Train [110][330/3239]	Time 0.259 (0.764)	Data Time 0.002 (0.119)	Loss 2.3258 (2.1539)	Entropy 0.68324 (0.68337)	Top-1 acc 68.359 (73.036)	Top-5 acc 87.500 (89.171)	lr 0.00051
Train [110][340/3239]	Time 2.517 (0.756)	Data Time 0.001 (0.115)	Loss 2.2363 (2.1553)	Entropy 0.68324 (0.68336)	Top-1 acc 71.484 (72.984)	Top-5 acc 87.891 (89.146)	lr 0.00051
Train [110][350/3239]	Time 0.325 (0.742)	Data Time 0.001 (0.112)	Loss 2.2464 (2.1550)	Entropy 0.68317 (0.68336)	Top-1 acc 69.531 (72.982)	Top-5 acc 87.500 (89.165)	lr 0.00050
Train [110][360/3239]	Time 0.230 (0.734)	Data Time 0.001 (0.109)	Loss 2.2167 (2.1542)	Entropy 0.68323 (0.68336)	Top-1 acc 71.484 (72.988)	Top-5 acc 89.062 (89.171)	lr 0.00050
Train [110][370/3239]	Time 0.236 (0.728)	Data Time 0.001 (0.106)	Loss 2.1144 (2.1541)	Entropy 0.68324 (0.68335)	Top-1 acc 75.391 (72.998)	Top-5 acc 91.016 (89.164)	lr 0.00050
Train [110][380/3239]	Time 0.228 (0.721)	Data Time 0.001 (0.103)	Loss 2.3137 (2.1546)	Entropy 0.68323 (0.68335)	Top-1 acc 67.969 (72.980)	Top-5 acc 88.672 (89.149)	lr 0.00050
Train [110][390/3239]	Time 0.322 (0.715)	Data Time 0.002 (0.101)	Loss 2.4586 (2.1549)	Entropy 0.68324 (0.68335)	Top-1 acc 66.016 (72.975)	Top-5 acc 84.766 (89.149)	lr 0.00050
Train [110][400/3239]	Time 0.219 (0.709)	Data Time 0.001 (0.098)	Loss 2.0697 (2.1553)	Entropy 0.68324 (0.68334)	Top-1 acc 73.828 (72.955)	Top-5 acc 90.625 (89.143)	lr 0.00050
Train [110][410/3239]	Time 0.226 (0.703)	Data Time 0.001 (0.096)	Loss 2.2191 (2.1546)	Entropy 0.68329 (0.68334)	Top-1 acc 70.312 (72.956)	Top-5 acc 88.672 (89.164)	lr 0.00050
Train [110][420/3239]	Time 0.236 (0.698)	Data Time 0.002 (0.094)	Loss 2.0477 (2.1548)	Entropy 0.68331 (0.68334)	Top-1 acc 76.562 (72.961)	Top-5 acc 91.797 (89.164)	lr 0.00050
Train [110][430/3239]	Time 0.236 (0.693)	Data Time 0.002 (0.091)	Loss 2.0671 (2.1550)	Entropy 0.68334 (0.68334)	Top-1 acc 75.000 (72.961)	Top-5 acc 91.016 (89.177)	lr 0.00050
Train [110][440/3239]	Time 0.441 (0.806)	Data Time 0.005 (0.089)	Loss 2.1984 (2.1559)	Entropy 0.68324 (0.68334)	Top-1 acc 69.531 (72.926)	Top-5 acc 89.062 (89.160)	lr 0.00050
Train [110][450/3239]	Time 3.094 (0.803)	Data Time 0.003 (0.087)	Loss 2.2247 (2.1563)	Entropy 0.68324 (0.68334)	Top-1 acc 70.312 (72.928)	Top-5 acc 91.797 (89.166)	lr 0.00050
Train [110][460/3239]	Time 0.251 (0.791)	Data Time 0.002 (0.086)	Loss 2.2233 (2.1561)	Entropy 0.68328 (0.68334)	Top-1 acc 71.484 (72.925)	Top-5 acc 87.500 (89.174)	lr 0.00050
Train [110][470/3239]	Time 0.249 (0.785)	Data Time 0.002 (0.084)	Loss 2.1239 (2.1559)	Entropy 0.68327 (0.68333)	Top-1 acc 73.828 (72.913)	Top-5 acc 87.891 (89.173)	lr 0.00050
Train [110][480/3239]	Time 0.238 (0.779)	Data Time 0.001 (0.082)	Loss 2.0453 (2.1559)	Entropy 0.68320 (0.68333)	Top-1 acc 73.828 (72.902)	Top-5 acc 90.625 (89.183)	lr 0.00050
Train [110][490/3239]	Time 0.248 (0.773)	Data Time 0.001 (0.080)	Loss 2.1040 (2.1560)	Entropy 0.68321 (0.68333)	Top-1 acc 72.266 (72.880)	Top-5 acc 89.453 (89.183)	lr 0.00050
Train [110][500/3239]	Time 0.224 (0.767)	Data Time 0.001 (0.079)	Loss 2.1497 (2.1559)	Entropy 0.68317 (0.68333)	Top-1 acc 74.219 (72.887)	Top-5 acc 89.062 (89.176)	lr 0.00050
Train [110][510/3239]	Time 0.230 (0.762)	Data Time 0.001 (0.077)	Loss 2.2379 (2.1566)	Entropy 0.68317 (0.68332)	Top-1 acc 71.484 (72.854)	Top-5 acc 86.719 (89.166)	lr 0.00050
Train [110][520/3239]	Time 0.242 (0.756)	Data Time 0.001 (0.076)	Loss 2.1610 (2.1567)	Entropy 0.68304 (0.68332)	Top-1 acc 71.484 (72.870)	Top-5 acc 88.281 (89.160)	lr 0.00050
Train [110][530/3239]	Time 0.233 (0.751)	Data Time 0.001 (0.075)	Loss 2.2700 (2.1573)	Entropy 0.68300 (0.68331)	Top-1 acc 69.531 (72.867)	Top-5 acc 86.719 (89.141)	lr 0.00050
Train [110][540/3239]	Time 0.229 (0.746)	Data Time 0.001 (0.073)	Loss 2.1889 (2.1575)	Entropy 0.68297 (0.68331)	Top-1 acc 70.312 (72.858)	Top-5 acc 88.672 (89.146)	lr 0.00050
Train [110][550/3239]	Time 0.225 (0.742)	Data Time 0.001 (0.072)	Loss 2.0792 (2.1574)	Entropy 0.68326 (0.68330)	Top-1 acc 76.172 (72.855)	Top-5 acc 89.844 (89.148)	lr 0.00050
Train [110][560/3239]	Time 2.681 (0.737)	Data Time 0.001 (0.071)	Loss 2.1329 (2.1581)	Entropy 0.68326 (0.68330)	Top-1 acc 72.266 (72.837)	Top-5 acc 92.578 (89.145)	lr 0.00050
Train [110][570/3239]	Time 0.278 (0.729)	Data Time 0.001 (0.069)	Loss 2.2004 (2.1576)	Entropy 0.68325 (0.68330)	Top-1 acc 70.703 (72.855)	Top-5 acc 89.844 (89.157)	lr 0.00050
Train [110][580/3239]	Time 0.235 (0.724)	Data Time 0.001 (0.068)	Loss 2.1320 (2.1582)	Entropy 0.68311 (0.68330)	Top-1 acc 74.219 (72.835)	Top-5 acc 90.625 (89.157)	lr 0.00050
Train [110][590/3239]	Time 0.226 (0.720)	Data Time 0.001 (0.067)	Loss 2.1485 (2.1579)	Entropy 0.68309 (0.68329)	Top-1 acc 71.094 (72.837)	Top-5 acc 89.453 (89.168)	lr 0.00050
Train [110][600/3239]	Time 0.332 (0.716)	Data Time 0.001 (0.066)	Loss 2.0401 (2.1579)	Entropy 0.68299 (0.68329)	Top-1 acc 75.391 (72.820)	Top-5 acc 91.016 (89.168)	lr 0.00050
Train [110][610/3239]	Time 0.229 (0.712)	Data Time 0.002 (0.065)	Loss 2.2677 (2.1587)	Entropy 0.68296 (0.68328)	Top-1 acc 70.312 (72.788)	Top-5 acc 88.281 (89.166)	lr 0.00050
Train [110][620/3239]	Time 0.233 (0.709)	Data Time 0.001 (0.064)	Loss 2.1739 (2.1592)	Entropy 0.68293 (0.68328)	Top-1 acc 73.047 (72.768)	Top-5 acc 89.062 (89.163)	lr 0.00050
Train [110][630/3239]	Time 0.222 (0.705)	Data Time 0.001 (0.063)	Loss 2.2555 (2.1589)	Entropy 0.68286 (0.68327)	Top-1 acc 72.656 (72.771)	Top-5 acc 86.328 (89.171)	lr 0.00050
Train [110][640/3239]	Time 0.248 (0.702)	Data Time 0.001 (0.062)	Loss 2.3915 (2.1599)	Entropy 0.68283 (0.68327)	Top-1 acc 66.406 (72.741)	Top-5 acc 85.938 (89.158)	lr 0.00050
Train [110][650/3239]	Time 0.231 (0.698)	Data Time 0.001 (0.061)	Loss 2.1987 (2.1602)	Entropy 0.68279 (0.68326)	Top-1 acc 69.922 (72.737)	Top-5 acc 87.109 (89.146)	lr 0.00050
Train [110][660/3239]	Time 0.243 (0.695)	Data Time 0.001 (0.060)	Loss 1.9903 (2.1608)	Entropy 0.68282 (0.68325)	Top-1 acc 74.609 (72.696)	Top-5 acc 91.406 (89.127)	lr 0.00050
Train [110][670/3239]	Time 2.578 (0.692)	Data Time 0.001 (0.059)	Loss 2.1107 (2.1610)	Entropy 0.68282 (0.68325)	Top-1 acc 75.000 (72.700)	Top-5 acc 89.453 (89.120)	lr 0.00050
Train [110][680/3239]	Time 0.241 (0.685)	Data Time 0.001 (0.058)	Loss 2.0439 (2.1606)	Entropy 0.68276 (0.68324)	Top-1 acc 75.781 (72.708)	Top-5 acc 92.188 (89.134)	lr 0.00050
Train [110][690/3239]	Time 0.238 (0.682)	Data Time 0.001 (0.058)	Loss 2.1237 (2.1606)	Entropy 0.68272 (0.68323)	Top-1 acc 74.219 (72.714)	Top-5 acc 88.672 (89.130)	lr 0.00050
Train [110][700/3239]	Time 0.234 (0.679)	Data Time 0.001 (0.057)	Loss 2.0476 (2.1603)	Entropy 0.68272 (0.68322)	Top-1 acc 76.562 (72.721)	Top-5 acc 91.797 (89.129)	lr 0.00049
Train [110][710/3239]	Time 0.258 (0.676)	Data Time 0.002 (0.056)	Loss 2.0127 (2.1603)	Entropy 0.68272 (0.68322)	Top-1 acc 75.391 (72.713)	Top-5 acc 91.797 (89.122)	lr 0.00049
Train [110][720/3239]	Time 0.248 (0.674)	Data Time 0.001 (0.055)	Loss 2.1121 (2.1609)	Entropy 0.68260 (0.68321)	Top-1 acc 74.219 (72.698)	Top-5 acc 88.672 (89.117)	lr 0.00049
Train [110][730/3239]	Time 0.261 (0.671)	Data Time 0.001 (0.055)	Loss 2.1995 (2.1613)	Entropy 0.68258 (0.68320)	Top-1 acc 70.312 (72.686)	Top-5 acc 87.109 (89.112)	lr 0.00049
Train [110][740/3239]	Time 0.245 (0.669)	Data Time 0.001 (0.054)	Loss 2.2960 (2.1610)	Entropy 0.68255 (0.68319)	Top-1 acc 70.703 (72.679)	Top-5 acc 87.891 (89.123)	lr 0.00049
Train [110][750/3239]	Time 0.220 (0.667)	Data Time 0.001 (0.053)	Loss 2.1545 (2.1614)	Entropy 0.68247 (0.68318)	Top-1 acc 71.875 (72.661)	Top-5 acc 90.234 (89.127)	lr 0.00049
Train [110][760/3239]	Time 0.241 (0.665)	Data Time 0.001 (0.053)	Loss 2.2180 (2.1615)	Entropy 0.68241 (0.68317)	Top-1 acc 69.922 (72.657)	Top-5 acc 86.328 (89.125)	lr 0.00049
Train [110][770/3239]	Time 0.246 (0.662)	Data Time 0.001 (0.052)	Loss 2.1295 (2.1615)	Entropy 0.68235 (0.68316)	Top-1 acc 73.438 (72.669)	Top-5 acc 89.453 (89.124)	lr 0.00049
Train [110][780/3239]	Time 2.500 (0.660)	Data Time 0.001 (0.051)	Loss 2.1745 (2.1619)	Entropy 0.68235 (0.68315)	Top-1 acc 71.484 (72.662)	Top-5 acc 89.844 (89.123)	lr 0.00049
Train [110][790/3239]	Time 0.266 (0.655)	Data Time 0.001 (0.051)	Loss 2.2817 (2.1621)	Entropy 0.68231 (0.68314)	Top-1 acc 69.922 (72.642)	Top-5 acc 85.938 (89.120)	lr 0.00049
Train [110][800/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.050)	Loss 2.1485 (2.1622)	Entropy 0.68217 (0.68313)	Top-1 acc 73.047 (72.641)	Top-5 acc 89.453 (89.112)	lr 0.00049
Train [110][810/3239]	Time 0.350 (0.719)	Data Time 0.002 (0.049)	Loss 1.9943 (2.1628)	Entropy 0.68216 (0.68312)	Top-1 acc 77.344 (72.629)	Top-5 acc 91.016 (89.100)	lr 0.00049
Train [110][820/3239]	Time 0.234 (0.716)	Data Time 0.002 (0.049)	Loss 2.1967 (2.1630)	Entropy 0.68210 (0.68311)	Top-1 acc 71.094 (72.631)	Top-5 acc 87.500 (89.088)	lr 0.00049
Train [110][830/3239]	Time 0.252 (0.714)	Data Time 0.002 (0.048)	Loss 2.0890 (2.1630)	Entropy 0.68209 (0.68309)	Top-1 acc 73.828 (72.634)	Top-5 acc 91.406 (89.093)	lr 0.00049
Train [110][840/3239]	Time 0.227 (0.711)	Data Time 0.001 (0.048)	Loss 2.0771 (2.1627)	Entropy 0.68204 (0.68308)	Top-1 acc 73.828 (72.631)	Top-5 acc 90.234 (89.100)	lr 0.00049
Train [110][850/3239]	Time 0.316 (0.708)	Data Time 0.001 (0.047)	Loss 2.2631 (2.1634)	Entropy 0.68200 (0.68307)	Top-1 acc 71.094 (72.620)	Top-5 acc 87.109 (89.089)	lr 0.00049
Train [110][860/3239]	Time 0.223 (0.706)	Data Time 0.002 (0.047)	Loss 2.1689 (2.1636)	Entropy 0.68196 (0.68306)	Top-1 acc 71.875 (72.615)	Top-5 acc 88.672 (89.087)	lr 0.00049
Train [110][870/3239]	Time 0.236 (0.703)	Data Time 0.001 (0.046)	Loss 2.1536 (2.1631)	Entropy 0.68193 (0.68304)	Top-1 acc 74.219 (72.634)	Top-5 acc 89.453 (89.095)	lr 0.00049
Train [110][880/3239]	Time 0.258 (0.701)	Data Time 0.004 (0.046)	Loss 2.1286 (2.1634)	Entropy 0.68185 (0.68303)	Top-1 acc 74.219 (72.629)	Top-5 acc 92.188 (89.096)	lr 0.00049
Train [110][890/3239]	Time 2.574 (0.698)	Data Time 0.001 (0.045)	Loss 2.1420 (2.1634)	Entropy 0.68185 (0.68302)	Top-1 acc 76.562 (72.626)	Top-5 acc 88.672 (89.092)	lr 0.00049
Train [110][900/3239]	Time 0.276 (0.693)	Data Time 0.001 (0.045)	Loss 2.2175 (2.1629)	Entropy 0.68184 (0.68301)	Top-1 acc 71.875 (72.646)	Top-5 acc 89.453 (89.103)	lr 0.00049
Train [110][910/3239]	Time 0.240 (0.691)	Data Time 0.001 (0.044)	Loss 2.1928 (2.1630)	Entropy 0.68182 (0.68299)	Top-1 acc 71.875 (72.644)	Top-5 acc 88.281 (89.099)	lr 0.00049
Train [110][920/3239]	Time 0.240 (0.689)	Data Time 0.001 (0.044)	Loss 2.0324 (2.1628)	Entropy 0.68183 (0.68298)	Top-1 acc 76.562 (72.655)	Top-5 acc 92.969 (89.094)	lr 0.00049
Train [110][930/3239]	Time 0.230 (0.687)	Data Time 0.002 (0.043)	Loss 1.9419 (2.1630)	Entropy 0.68181 (0.68297)	Top-1 acc 79.688 (72.654)	Top-5 acc 93.750 (89.091)	lr 0.00049
Train [110][940/3239]	Time 0.248 (0.685)	Data Time 0.001 (0.043)	Loss 2.2138 (2.1632)	Entropy 0.68181 (0.68295)	Top-1 acc 69.141 (72.641)	Top-5 acc 89.062 (89.082)	lr 0.00049
Train [110][950/3239]	Time 0.241 (0.683)	Data Time 0.001 (0.042)	Loss 2.2185 (2.1636)	Entropy 0.68178 (0.68294)	Top-1 acc 70.312 (72.632)	Top-5 acc 87.500 (89.072)	lr 0.00049
Train [110][960/3239]	Time 0.238 (0.681)	Data Time 0.002 (0.042)	Loss 2.3373 (2.1638)	Entropy 0.68182 (0.68293)	Top-1 acc 69.531 (72.631)	Top-5 acc 85.938 (89.066)	lr 0.00049
Train [110][970/3239]	Time 0.211 (0.679)	Data Time 0.001 (0.042)	Loss 2.2084 (2.1637)	Entropy 0.68177 (0.68292)	Top-1 acc 71.875 (72.637)	Top-5 acc 87.500 (89.066)	lr 0.00049
Train [110][980/3239]	Time 0.228 (0.677)	Data Time 0.001 (0.041)	Loss 2.1540 (2.1636)	Entropy 0.68168 (0.68291)	Top-1 acc 73.438 (72.627)	Top-5 acc 90.234 (89.076)	lr 0.00049
Train [110][990/3239]	Time 0.288 (0.675)	Data Time 0.001 (0.041)	Loss 2.0888 (2.1636)	Entropy 0.68159 (0.68289)	Top-1 acc 71.875 (72.619)	Top-5 acc 90.625 (89.075)	lr 0.00049
Train [110][1000/3239]	Time 2.528 (0.673)	Data Time 0.001 (0.040)	Loss 2.1938 (2.1636)	Entropy 0.68159 (0.68288)	Top-1 acc 71.875 (72.623)	Top-5 acc 86.328 (89.075)	lr 0.00049
Train [110][1010/3239]	Time 0.260 (0.669)	Data Time 0.001 (0.040)	Loss 2.1917 (2.1638)	Entropy 0.68158 (0.68287)	Top-1 acc 69.141 (72.615)	Top-5 acc 88.281 (89.066)	lr 0.00049
Train [110][1020/3239]	Time 0.324 (0.667)	Data Time 0.001 (0.040)	Loss 2.3254 (2.1636)	Entropy 0.68156 (0.68286)	Top-1 acc 65.234 (72.615)	Top-5 acc 87.500 (89.062)	lr 0.00049
Train [110][1030/3239]	Time 0.233 (0.665)	Data Time 0.001 (0.039)	Loss 2.3264 (2.1642)	Entropy 0.68153 (0.68284)	Top-1 acc 70.703 (72.601)	Top-5 acc 89.062 (89.055)	lr 0.00049
Train [110][1040/3239]	Time 0.235 (0.663)	Data Time 0.001 (0.039)	Loss 2.3284 (2.1647)	Entropy 0.68151 (0.68283)	Top-1 acc 67.188 (72.590)	Top-5 acc 85.547 (89.049)	lr 0.00049
Train [110][1050/3239]	Time 0.238 (0.662)	Data Time 0.002 (0.038)	Loss 2.3570 (2.1650)	Entropy 0.68143 (0.68282)	Top-1 acc 67.578 (72.579)	Top-5 acc 84.766 (89.039)	lr 0.00049
Train [110][1060/3239]	Time 0.326 (0.660)	Data Time 0.001 (0.038)	Loss 2.3110 (2.1657)	Entropy 0.68144 (0.68280)	Top-1 acc 67.969 (72.544)	Top-5 acc 87.891 (89.027)	lr 0.00048
Train [110][1070/3239]	Time 0.237 (0.659)	Data Time 0.001 (0.038)	Loss 2.1132 (2.1657)	Entropy 0.68147 (0.68279)	Top-1 acc 71.875 (72.527)	Top-5 acc 90.234 (89.032)	lr 0.00048
Train [110][1080/3239]	Time 0.235 (0.657)	Data Time 0.001 (0.037)	Loss 2.0660 (2.1654)	Entropy 0.68152 (0.68278)	Top-1 acc 75.391 (72.534)	Top-5 acc 91.406 (89.040)	lr 0.00048
Train [110][1090/3239]	Time 0.236 (0.656)	Data Time 0.001 (0.037)	Loss 2.2005 (2.1657)	Entropy 0.68152 (0.68277)	Top-1 acc 71.484 (72.530)	Top-5 acc 88.281 (89.041)	lr 0.00048
Train [110][1100/3239]	Time 0.390 (0.654)	Data Time 0.001 (0.037)	Loss 2.1035 (2.1654)	Entropy 0.68150 (0.68276)	Top-1 acc 75.781 (72.542)	Top-5 acc 90.625 (89.048)	lr 0.00048
Train [110][1110/3239]	Time 2.669 (0.653)	Data Time 0.001 (0.036)	Loss 2.1948 (2.1659)	Entropy 0.68150 (0.68275)	Top-1 acc 69.531 (72.526)	Top-5 acc 87.109 (89.045)	lr 0.00048
Train [110][1120/3239]	Time 0.255 (0.649)	Data Time 0.001 (0.036)	Loss 2.2909 (2.1660)	Entropy 0.68149 (0.68273)	Top-1 acc 67.188 (72.522)	Top-5 acc 88.281 (89.046)	lr 0.00048
Train [110][1130/3239]	Time 0.247 (0.647)	Data Time 0.001 (0.036)	Loss 2.1178 (2.1659)	Entropy 0.68153 (0.68272)	Top-1 acc 75.000 (72.527)	Top-5 acc 90.625 (89.049)	lr 0.00048
Train [110][1140/3239]	Time 0.230 (0.646)	Data Time 0.001 (0.036)	Loss 2.1669 (2.1663)	Entropy 0.68153 (0.68271)	Top-1 acc 69.922 (72.513)	Top-5 acc 89.453 (89.041)	lr 0.00048
Train [110][1150/3239]	Time 0.246 (0.645)	Data Time 0.001 (0.035)	Loss 2.1149 (2.1661)	Entropy 0.68153 (0.68270)	Top-1 acc 75.781 (72.524)	Top-5 acc 90.625 (89.048)	lr 0.00048
Train [110][1160/3239]	Time 0.241 (0.643)	Data Time 0.001 (0.035)	Loss 2.2585 (2.1660)	Entropy 0.68155 (0.68269)	Top-1 acc 71.094 (72.528)	Top-5 acc 86.719 (89.051)	lr 0.00048
Train [110][1170/3239]	Time 0.270 (0.686)	Data Time 0.003 (0.035)	Loss 1.9634 (2.1655)	Entropy 0.68153 (0.68268)	Top-1 acc 75.000 (72.540)	Top-5 acc 93.750 (89.061)	lr 0.00048
Train [110][1180/3239]	Time 0.250 (0.685)	Data Time 0.002 (0.034)	Loss 2.1460 (2.1652)	Entropy 0.68158 (0.68267)	Top-1 acc 70.312 (72.541)	Top-5 acc 91.016 (89.070)	lr 0.00048
Train [110][1190/3239]	Time 0.243 (0.684)	Data Time 0.001 (0.034)	Loss 2.0604 (2.1653)	Entropy 0.68158 (0.68266)	Top-1 acc 78.906 (72.543)	Top-5 acc 91.016 (89.066)	lr 0.00048
Train [110][1200/3239]	Time 0.222 (0.682)	Data Time 0.001 (0.034)	Loss 2.1035 (2.1654)	Entropy 0.68151 (0.68266)	Top-1 acc 74.219 (72.534)	Top-5 acc 90.625 (89.066)	lr 0.00048
Train [110][1210/3239]	Time 0.243 (0.680)	Data Time 0.001 (0.034)	Loss 2.1368 (2.1655)	Entropy 0.68139 (0.68265)	Top-1 acc 75.781 (72.535)	Top-5 acc 89.062 (89.065)	lr 0.00048
Train [110][1220/3239]	Time 2.559 (0.679)	Data Time 0.001 (0.033)	Loss 2.0109 (2.1651)	Entropy 0.68139 (0.68264)	Top-1 acc 77.344 (72.550)	Top-5 acc 91.016 (89.070)	lr 0.00048
Train [110][1230/3239]	Time 0.242 (0.675)	Data Time 0.002 (0.033)	Loss 2.0944 (2.1652)	Entropy 0.68134 (0.68262)	Top-1 acc 71.875 (72.545)	Top-5 acc 91.406 (89.068)	lr 0.00048
Train [110][1240/3239]	Time 0.243 (0.674)	Data Time 0.001 (0.033)	Loss 2.0283 (2.1651)	Entropy 0.68135 (0.68261)	Top-1 acc 76.172 (72.545)	Top-5 acc 92.969 (89.073)	lr 0.00048
Train [110][1250/3239]	Time 0.244 (0.672)	Data Time 0.001 (0.033)	Loss 2.2237 (2.1656)	Entropy 0.68133 (0.68260)	Top-1 acc 72.266 (72.533)	Top-5 acc 87.109 (89.071)	lr 0.00048
Train [110][1260/3239]	Time 0.234 (0.671)	Data Time 0.001 (0.032)	Loss 2.2230 (2.1655)	Entropy 0.68135 (0.68259)	Top-1 acc 71.484 (72.532)	Top-5 acc 85.547 (89.067)	lr 0.00048
Train [110][1270/3239]	Time 0.361 (0.670)	Data Time 0.001 (0.032)	Loss 2.3802 (2.1658)	Entropy 0.68129 (0.68258)	Top-1 acc 68.359 (72.531)	Top-5 acc 85.547 (89.063)	lr 0.00048
Train [110][1280/3239]	Time 0.249 (0.668)	Data Time 0.001 (0.032)	Loss 2.1576 (2.1659)	Entropy 0.68123 (0.68257)	Top-1 acc 73.438 (72.535)	Top-5 acc 86.719 (89.056)	lr 0.00048
Train [110][1290/3239]	Time 0.235 (0.667)	Data Time 0.001 (0.032)	Loss 2.3932 (2.1658)	Entropy 0.68122 (0.68256)	Top-1 acc 66.016 (72.539)	Top-5 acc 84.375 (89.059)	lr 0.00048
Train [110][1300/3239]	Time 0.224 (0.665)	Data Time 0.001 (0.031)	Loss 2.1450 (2.1661)	Entropy 0.68122 (0.68255)	Top-1 acc 75.000 (72.532)	Top-5 acc 87.500 (89.056)	lr 0.00048
Train [110][1310/3239]	Time 0.338 (0.664)	Data Time 0.001 (0.031)	Loss 2.2000 (2.1659)	Entropy 0.68123 (0.68254)	Top-1 acc 73.438 (72.540)	Top-5 acc 88.672 (89.062)	lr 0.00048
Train [110][1320/3239]	Time 0.237 (0.663)	Data Time 0.001 (0.031)	Loss 2.1757 (2.1660)	Entropy 0.68126 (0.68253)	Top-1 acc 71.875 (72.537)	Top-5 acc 91.016 (89.062)	lr 0.00048
Train [110][1330/3239]	Time 2.529 (0.661)	Data Time 0.001 (0.031)	Loss 2.2527 (2.1660)	Entropy 0.68126 (0.68252)	Top-1 acc 69.922 (72.542)	Top-5 acc 88.672 (89.062)	lr 0.00048
Train [110][1340/3239]	Time 0.243 (0.658)	Data Time 0.001 (0.031)	Loss 2.1845 (2.1660)	Entropy 0.68121 (0.68251)	Top-1 acc 72.266 (72.547)	Top-5 acc 87.891 (89.062)	lr 0.00048
Train [110][1350/3239]	Time 0.328 (0.657)	Data Time 0.001 (0.030)	Loss 2.1246 (2.1656)	Entropy 0.68110 (0.68250)	Top-1 acc 71.094 (72.555)	Top-5 acc 90.234 (89.071)	lr 0.00048
Train [110][1360/3239]	Time 0.242 (0.656)	Data Time 0.001 (0.030)	Loss 2.0707 (2.1655)	Entropy 0.68109 (0.68249)	Top-1 acc 74.219 (72.556)	Top-5 acc 91.016 (89.069)	lr 0.00048
Train [110][1370/3239]	Time 0.225 (0.654)	Data Time 0.001 (0.030)	Loss 2.3347 (2.1656)	Entropy 0.68109 (0.68248)	Top-1 acc 67.578 (72.549)	Top-5 acc 85.156 (89.065)	lr 0.00048
Train [110][1380/3239]	Time 0.232 (0.653)	Data Time 0.001 (0.030)	Loss 2.1964 (2.1656)	Entropy 0.68104 (0.68247)	Top-1 acc 71.484 (72.551)	Top-5 acc 88.672 (89.066)	lr 0.00048
Train [110][1390/3239]	Time 0.351 (0.652)	Data Time 0.002 (0.029)	Loss 2.0304 (2.1653)	Entropy 0.68108 (0.68246)	Top-1 acc 78.516 (72.560)	Top-5 acc 92.188 (89.071)	lr 0.00048
Train [110][1400/3239]	Time 0.243 (0.651)	Data Time 0.002 (0.029)	Loss 2.1483 (2.1655)	Entropy 0.68107 (0.68245)	Top-1 acc 70.703 (72.551)	Top-5 acc 88.672 (89.065)	lr 0.00048
Train [110][1410/3239]	Time 0.238 (0.650)	Data Time 0.001 (0.029)	Loss 2.3780 (2.1656)	Entropy 0.68124 (0.68244)	Top-1 acc 68.750 (72.550)	Top-5 acc 87.109 (89.064)	lr 0.00048
Train [110][1420/3239]	Time 0.228 (0.648)	Data Time 0.001 (0.029)	Loss 2.0882 (2.1655)	Entropy 0.68122 (0.68243)	Top-1 acc 75.391 (72.553)	Top-5 acc 89.062 (89.065)	lr 0.00047
Train [110][1430/3239]	Time 0.239 (0.647)	Data Time 0.001 (0.029)	Loss 2.1940 (2.1656)	Entropy 0.68120 (0.68243)	Top-1 acc 71.484 (72.556)	Top-5 acc 89.453 (89.064)	lr 0.00047
Train [110][1440/3239]	Time 2.531 (0.646)	Data Time 0.030 (0.029)	Loss 2.2083 (2.1659)	Entropy 0.68120 (0.68242)	Top-1 acc 71.094 (72.548)	Top-5 acc 89.844 (89.057)	lr 0.00047
Train [110][1450/3239]	Time 0.242 (0.643)	Data Time 0.001 (0.028)	Loss 2.1526 (2.1659)	Entropy 0.68111 (0.68241)	Top-1 acc 74.609 (72.552)	Top-5 acc 88.281 (89.054)	lr 0.00047
Train [110][1460/3239]	Time 0.238 (0.642)	Data Time 0.001 (0.028)	Loss 2.1433 (2.1660)	Entropy 0.68102 (0.68240)	Top-1 acc 71.875 (72.551)	Top-5 acc 90.625 (89.056)	lr 0.00047
Train [110][1470/3239]	Time 0.237 (0.641)	Data Time 0.001 (0.028)	Loss 2.0753 (2.1660)	Entropy 0.68098 (0.68239)	Top-1 acc 74.219 (72.548)	Top-5 acc 91.016 (89.058)	lr 0.00047
Train [110][1480/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.028)	Loss 2.2488 (2.1661)	Entropy 0.68092 (0.68238)	Top-1 acc 68.750 (72.539)	Top-5 acc 85.547 (89.056)	lr 0.00047
Train [110][1490/3239]	Time 0.231 (0.639)	Data Time 0.001 (0.028)	Loss 2.2933 (2.1662)	Entropy 0.68089 (0.68237)	Top-1 acc 71.094 (72.537)	Top-5 acc 84.766 (89.054)	lr 0.00047
Train [110][1500/3239]	Time 0.242 (0.638)	Data Time 0.001 (0.027)	Loss 2.1220 (2.1663)	Entropy 0.68086 (0.68236)	Top-1 acc 73.828 (72.539)	Top-5 acc 87.109 (89.051)	lr 0.00047
Train [110][1510/3239]	Time 0.240 (0.637)	Data Time 0.001 (0.027)	Loss 2.1646 (2.1665)	Entropy 0.68086 (0.68235)	Top-1 acc 75.000 (72.534)	Top-5 acc 88.281 (89.045)	lr 0.00047
Train [110][1520/3239]	Time 0.337 (0.636)	Data Time 0.001 (0.027)	Loss 2.1869 (2.1664)	Entropy 0.68081 (0.68234)	Top-1 acc 70.703 (72.534)	Top-5 acc 89.453 (89.051)	lr 0.00047
Train [110][1530/3239]	Time 0.245 (0.671)	Data Time 0.002 (0.027)	Loss 2.2490 (2.1662)	Entropy 0.68080 (0.68233)	Top-1 acc 72.656 (72.532)	Top-5 acc 86.328 (89.057)	lr 0.00047
Train [110][1540/3239]	Time 0.260 (0.670)	Data Time 0.002 (0.027)	Loss 2.3102 (2.1663)	Entropy 0.68078 (0.68232)	Top-1 acc 67.188 (72.531)	Top-5 acc 86.328 (89.054)	lr 0.00047
Train [110][1550/3239]	Time 2.654 (0.669)	Data Time 0.002 (0.027)	Loss 2.2259 (2.1663)	Entropy 0.68078 (0.68231)	Top-1 acc 70.703 (72.532)	Top-5 acc 88.672 (89.055)	lr 0.00047
Train [110][1560/3239]	Time 0.378 (0.666)	Data Time 0.001 (0.026)	Loss 2.1541 (2.1663)	Entropy 0.68079 (0.68230)	Top-1 acc 75.000 (72.536)	Top-5 acc 90.234 (89.061)	lr 0.00047
Train [110][1570/3239]	Time 0.237 (0.665)	Data Time 0.001 (0.026)	Loss 2.1287 (2.1666)	Entropy 0.68084 (0.68229)	Top-1 acc 73.047 (72.522)	Top-5 acc 89.453 (89.053)	lr 0.00047
Train [110][1580/3239]	Time 0.233 (0.664)	Data Time 0.001 (0.026)	Loss 2.0446 (2.1664)	Entropy 0.68088 (0.68228)	Top-1 acc 73.828 (72.524)	Top-5 acc 89.844 (89.054)	lr 0.00047
Train [110][1590/3239]	Time 0.242 (0.663)	Data Time 0.002 (0.026)	Loss 2.0912 (2.1664)	Entropy 0.68071 (0.68227)	Top-1 acc 72.266 (72.526)	Top-5 acc 89.453 (89.053)	lr 0.00047
Train [110][1600/3239]	Time 0.253 (0.661)	Data Time 0.001 (0.026)	Loss 2.3303 (2.1663)	Entropy 0.68070 (0.68226)	Top-1 acc 66.797 (72.528)	Top-5 acc 86.328 (89.059)	lr 0.00047
Train [110][1610/3239]	Time 0.235 (0.660)	Data Time 0.001 (0.026)	Loss 2.0209 (2.1662)	Entropy 0.68076 (0.68225)	Top-1 acc 75.000 (72.531)	Top-5 acc 90.625 (89.061)	lr 0.00047
Train [110][1620/3239]	Time 0.233 (0.659)	Data Time 0.001 (0.026)	Loss 2.2739 (2.1661)	Entropy 0.68068 (0.68224)	Top-1 acc 67.578 (72.539)	Top-5 acc 87.109 (89.065)	lr 0.00047
Train [110][1630/3239]	Time 0.238 (0.658)	Data Time 0.001 (0.025)	Loss 2.2053 (2.1660)	Entropy 0.68061 (0.68223)	Top-1 acc 71.875 (72.540)	Top-5 acc 89.062 (89.069)	lr 0.00047
Train [110][1640/3239]	Time 0.219 (0.657)	Data Time 0.001 (0.025)	Loss 2.1072 (2.1657)	Entropy 0.68061 (0.68222)	Top-1 acc 79.297 (72.551)	Top-5 acc 89.844 (89.075)	lr 0.00047
Train [110][1650/3239]	Time 0.294 (0.656)	Data Time 0.001 (0.025)	Loss 2.2304 (2.1659)	Entropy 0.68061 (0.68221)	Top-1 acc 71.094 (72.543)	Top-5 acc 87.891 (89.075)	lr 0.00047
Train [110][1660/3239]	Time 2.537 (0.655)	Data Time 0.001 (0.025)	Loss 2.1503 (2.1659)	Entropy 0.68061 (0.68220)	Top-1 acc 73.047 (72.547)	Top-5 acc 91.016 (89.073)	lr 0.00047
Train [110][1670/3239]	Time 0.228 (0.652)	Data Time 0.001 (0.025)	Loss 2.2068 (2.1659)	Entropy 0.68059 (0.68220)	Top-1 acc 72.266 (72.549)	Top-5 acc 88.281 (89.073)	lr 0.00047
Train [110][1680/3239]	Time 0.236 (0.651)	Data Time 0.001 (0.025)	Loss 2.1754 (2.1659)	Entropy 0.68055 (0.68219)	Top-1 acc 73.047 (72.551)	Top-5 acc 88.672 (89.074)	lr 0.00047
Train [110][1690/3239]	Time 0.232 (0.650)	Data Time 0.001 (0.025)	Loss 2.1069 (2.1658)	Entropy 0.68053 (0.68218)	Top-1 acc 73.828 (72.556)	Top-5 acc 87.500 (89.075)	lr 0.00047
Train [110][1700/3239]	Time 0.238 (0.649)	Data Time 0.001 (0.024)	Loss 2.0557 (2.1660)	Entropy 0.68037 (0.68217)	Top-1 acc 73.828 (72.551)	Top-5 acc 91.016 (89.071)	lr 0.00047
Train [110][1710/3239]	Time 0.225 (0.648)	Data Time 0.001 (0.024)	Loss 2.1386 (2.1660)	Entropy 0.68032 (0.68215)	Top-1 acc 71.484 (72.547)	Top-5 acc 91.406 (89.068)	lr 0.00047
Train [110][1720/3239]	Time 0.230 (0.647)	Data Time 0.001 (0.024)	Loss 2.2448 (2.1661)	Entropy 0.68036 (0.68214)	Top-1 acc 68.750 (72.542)	Top-5 acc 88.672 (89.071)	lr 0.00047
Train [110][1730/3239]	Time 0.328 (0.647)	Data Time 0.001 (0.024)	Loss 2.1895 (2.1661)	Entropy 0.68033 (0.68213)	Top-1 acc 69.531 (72.540)	Top-5 acc 89.844 (89.072)	lr 0.00047
Train [110][1740/3239]	Time 0.231 (0.646)	Data Time 0.001 (0.024)	Loss 2.0859 (2.1662)	Entropy 0.68026 (0.68212)	Top-1 acc 76.172 (72.542)	Top-5 acc 87.109 (89.068)	lr 0.00047
Train [110][1750/3239]	Time 0.230 (0.645)	Data Time 0.002 (0.024)	Loss 2.2553 (2.1662)	Entropy 0.68025 (0.68211)	Top-1 acc 72.266 (72.540)	Top-5 acc 84.375 (89.070)	lr 0.00047
Train [110][1760/3239]	Time 0.233 (0.644)	Data Time 0.001 (0.024)	Loss 2.1822 (2.1663)	Entropy 0.68020 (0.68210)	Top-1 acc 74.609 (72.546)	Top-5 acc 89.062 (89.068)	lr 0.00047
Train [110][1770/3239]	Time 2.689 (0.643)	Data Time 0.001 (0.023)	Loss 2.2101 (2.1663)	Entropy 0.68020 (0.68209)	Top-1 acc 69.922 (72.546)	Top-5 acc 87.109 (89.066)	lr 0.00047
Train [110][1780/3239]	Time 0.276 (0.641)	Data Time 0.001 (0.023)	Loss 2.1862 (2.1666)	Entropy 0.68015 (0.68208)	Top-1 acc 74.219 (72.540)	Top-5 acc 89.453 (89.061)	lr 0.00046
Train [110][1790/3239]	Time 0.230 (0.640)	Data Time 0.001 (0.023)	Loss 2.1270 (2.1666)	Entropy 0.68015 (0.68207)	Top-1 acc 74.219 (72.542)	Top-5 acc 91.016 (89.059)	lr 0.00046
Train [110][1800/3239]	Time 0.261 (0.639)	Data Time 0.001 (0.023)	Loss 2.2349 (2.1665)	Entropy 0.68012 (0.68206)	Top-1 acc 70.312 (72.546)	Top-5 acc 90.234 (89.062)	lr 0.00046
Train [110][1810/3239]	Time 0.335 (0.638)	Data Time 0.001 (0.023)	Loss 2.2361 (2.1667)	Entropy 0.68013 (0.68205)	Top-1 acc 70.703 (72.538)	Top-5 acc 87.891 (89.063)	lr 0.00046
Train [110][1820/3239]	Time 0.246 (0.637)	Data Time 0.001 (0.023)	Loss 2.3026 (2.1667)	Entropy 0.68014 (0.68204)	Top-1 acc 71.875 (72.540)	Top-5 acc 85.938 (89.061)	lr 0.00046
Train [110][1830/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.023)	Loss 2.1221 (2.1669)	Entropy 0.68016 (0.68203)	Top-1 acc 75.000 (72.536)	Top-5 acc 90.234 (89.060)	lr 0.00046
Train [110][1840/3239]	Time 0.226 (0.635)	Data Time 0.001 (0.023)	Loss 2.1761 (2.1667)	Entropy 0.68020 (0.68202)	Top-1 acc 72.266 (72.548)	Top-5 acc 89.844 (89.066)	lr 0.00046
Train [110][1850/3239]	Time 0.327 (0.635)	Data Time 0.001 (0.023)	Loss 2.1372 (2.1668)	Entropy 0.68013 (0.68201)	Top-1 acc 77.734 (72.552)	Top-5 acc 89.453 (89.063)	lr 0.00046
Train [110][1860/3239]	Time 0.237 (0.634)	Data Time 0.001 (0.022)	Loss 2.1312 (2.1669)	Entropy 0.67999 (0.68200)	Top-1 acc 74.219 (72.550)	Top-5 acc 89.453 (89.063)	lr 0.00046
Train [110][1870/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.022)	Loss 2.2561 (2.1668)	Entropy 0.67997 (0.68199)	Top-1 acc 69.922 (72.549)	Top-5 acc 87.109 (89.063)	lr 0.00046
Train [110][1880/3239]	Time 2.663 (0.632)	Data Time 0.001 (0.022)	Loss 2.1561 (2.1668)	Entropy 0.67997 (0.68198)	Top-1 acc 73.438 (72.548)	Top-5 acc 89.062 (89.065)	lr 0.00046
Train [110][1890/3239]	Time 0.255 (0.630)	Data Time 0.001 (0.022)	Loss 2.0988 (2.1668)	Entropy 0.68000 (0.68197)	Top-1 acc 73.828 (72.547)	Top-5 acc 91.016 (89.065)	lr 0.00046
Train [110][1900/3239]	Time 0.307 (0.657)	Data Time 0.003 (0.022)	Loss 2.2548 (2.1671)	Entropy 0.67996 (0.68195)	Top-1 acc 69.141 (72.538)	Top-5 acc 89.062 (89.061)	lr 0.00046
Train [110][1910/3239]	Time 0.235 (0.656)	Data Time 0.002 (0.022)	Loss 2.0570 (2.1672)	Entropy 0.67991 (0.68194)	Top-1 acc 76.953 (72.535)	Top-5 acc 90.234 (89.060)	lr 0.00046
Train [110][1920/3239]	Time 0.236 (0.655)	Data Time 0.002 (0.022)	Loss 2.2682 (2.1670)	Entropy 0.67988 (0.68193)	Top-1 acc 69.141 (72.540)	Top-5 acc 87.109 (89.058)	lr 0.00046
Train [110][1930/3239]	Time 0.248 (0.654)	Data Time 0.002 (0.022)	Loss 2.1845 (2.1671)	Entropy 0.67981 (0.68192)	Top-1 acc 72.266 (72.535)	Top-5 acc 89.062 (89.056)	lr 0.00046
Train [110][1940/3239]	Time 0.238 (0.654)	Data Time 0.002 (0.022)	Loss 2.0162 (2.1670)	Entropy 0.67980 (0.68191)	Top-1 acc 75.781 (72.536)	Top-5 acc 92.578 (89.058)	lr 0.00046
Train [110][1950/3239]	Time 0.238 (0.653)	Data Time 0.002 (0.021)	Loss 2.1591 (2.1673)	Entropy 0.67973 (0.68190)	Top-1 acc 71.484 (72.535)	Top-5 acc 89.062 (89.055)	lr 0.00046
Train [110][1960/3239]	Time 0.229 (0.652)	Data Time 0.001 (0.021)	Loss 2.1348 (2.1676)	Entropy 0.67971 (0.68189)	Top-1 acc 72.656 (72.525)	Top-5 acc 90.625 (89.051)	lr 0.00046
Train [110][1970/3239]	Time 0.228 (0.651)	Data Time 0.001 (0.021)	Loss 2.3036 (2.1678)	Entropy 0.67968 (0.68188)	Top-1 acc 67.969 (72.522)	Top-5 acc 87.500 (89.045)	lr 0.00046
Train [110][1980/3239]	Time 0.264 (0.650)	Data Time 0.001 (0.021)	Loss 2.4091 (2.1679)	Entropy 0.67969 (0.68187)	Top-1 acc 66.797 (72.515)	Top-5 acc 82.031 (89.041)	lr 0.00046
Train [110][1990/3239]	Time 2.689 (0.649)	Data Time 0.001 (0.021)	Loss 2.1370 (2.1683)	Entropy 0.67969 (0.68186)	Top-1 acc 76.953 (72.508)	Top-5 acc 88.672 (89.034)	lr 0.00046
Train [110][2000/3239]	Time 0.245 (0.647)	Data Time 0.001 (0.021)	Loss 2.0480 (2.1683)	Entropy 0.67964 (0.68185)	Top-1 acc 75.391 (72.512)	Top-5 acc 92.578 (89.033)	lr 0.00046
Train [110][2010/3239]	Time 0.243 (0.647)	Data Time 0.002 (0.021)	Loss 2.0649 (2.1683)	Entropy 0.67959 (0.68183)	Top-1 acc 75.781 (72.511)	Top-5 acc 90.625 (89.032)	lr 0.00046
Train [110][2020/3239]	Time 0.234 (0.646)	Data Time 0.001 (0.021)	Loss 2.0905 (2.1683)	Entropy 0.67955 (0.68182)	Top-1 acc 72.656 (72.507)	Top-5 acc 91.406 (89.038)	lr 0.00046
Train [110][2030/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.021)	Loss 2.1768 (2.1682)	Entropy 0.67951 (0.68181)	Top-1 acc 69.922 (72.507)	Top-5 acc 89.062 (89.039)	lr 0.00046
Train [110][2040/3239]	Time 0.239 (0.644)	Data Time 0.001 (0.021)	Loss 2.2247 (2.1681)	Entropy 0.67954 (0.68180)	Top-1 acc 71.484 (72.509)	Top-5 acc 89.844 (89.043)	lr 0.00046
Train [110][2050/3239]	Time 0.242 (0.643)	Data Time 0.002 (0.021)	Loss 2.2577 (2.1680)	Entropy 0.67951 (0.68179)	Top-1 acc 69.922 (72.512)	Top-5 acc 86.719 (89.043)	lr 0.00046
Train [110][2060/3239]	Time 0.236 (0.643)	Data Time 0.001 (0.020)	Loss 2.0951 (2.1680)	Entropy 0.67949 (0.68178)	Top-1 acc 75.391 (72.512)	Top-5 acc 88.672 (89.040)	lr 0.00046
Train [110][2070/3239]	Time 0.216 (0.642)	Data Time 0.001 (0.020)	Loss 2.0929 (2.1680)	Entropy 0.67947 (0.68177)	Top-1 acc 73.828 (72.510)	Top-5 acc 90.234 (89.040)	lr 0.00046
Train [110][2080/3239]	Time 0.239 (0.641)	Data Time 0.001 (0.020)	Loss 2.1546 (2.1681)	Entropy 0.67949 (0.68176)	Top-1 acc 72.656 (72.506)	Top-5 acc 90.625 (89.041)	lr 0.00046
Train [110][2090/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.020)	Loss 2.1706 (2.1679)	Entropy 0.67952 (0.68175)	Top-1 acc 70.703 (72.511)	Top-5 acc 88.672 (89.042)	lr 0.00046
Train [110][2100/3239]	Time 2.658 (0.640)	Data Time 0.001 (0.020)	Loss 2.1699 (2.1677)	Entropy 0.67952 (0.68173)	Top-1 acc 72.266 (72.513)	Top-5 acc 87.891 (89.045)	lr 0.00046
Train [110][2110/3239]	Time 0.257 (0.638)	Data Time 0.002 (0.020)	Loss 2.1430 (2.1676)	Entropy 0.67947 (0.68172)	Top-1 acc 74.609 (72.515)	Top-5 acc 89.453 (89.050)	lr 0.00046
Train [110][2120/3239]	Time 0.250 (0.637)	Data Time 0.001 (0.020)	Loss 2.1635 (2.1674)	Entropy 0.67949 (0.68171)	Top-1 acc 71.875 (72.518)	Top-5 acc 88.281 (89.052)	lr 0.00046
Train [110][2130/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.020)	Loss 1.9993 (2.1675)	Entropy 0.67949 (0.68170)	Top-1 acc 75.781 (72.514)	Top-5 acc 91.797 (89.055)	lr 0.00046
Train [110][2140/3239]	Time 0.245 (0.636)	Data Time 0.001 (0.020)	Loss 2.5255 (2.1677)	Entropy 0.67944 (0.68169)	Top-1 acc 64.062 (72.509)	Top-5 acc 83.203 (89.047)	lr 0.00046
Train [110][2150/3239]	Time 0.233 (0.635)	Data Time 0.001 (0.020)	Loss 2.3048 (2.1679)	Entropy 0.67943 (0.68168)	Top-1 acc 69.141 (72.507)	Top-5 acc 85.938 (89.043)	lr 0.00045
Train [110][2160/3239]	Time 0.241 (0.634)	Data Time 0.001 (0.020)	Loss 2.2316 (2.1678)	Entropy 0.67943 (0.68167)	Top-1 acc 68.750 (72.506)	Top-5 acc 88.672 (89.044)	lr 0.00045
Train [110][2170/3239]	Time 0.242 (0.633)	Data Time 0.002 (0.019)	Loss 2.2307 (2.1682)	Entropy 0.67928 (0.68166)	Top-1 acc 71.875 (72.497)	Top-5 acc 87.891 (89.036)	lr 0.00045
Train [110][2180/3239]	Time 0.232 (0.633)	Data Time 0.001 (0.019)	Loss 2.1828 (2.1681)	Entropy 0.67928 (0.68165)	Top-1 acc 71.875 (72.502)	Top-5 acc 89.062 (89.040)	lr 0.00045
Train [110][2190/3239]	Time 0.245 (0.632)	Data Time 0.001 (0.019)	Loss 2.1174 (2.1680)	Entropy 0.67926 (0.68164)	Top-1 acc 69.922 (72.501)	Top-5 acc 90.234 (89.045)	lr 0.00045
Train [110][2200/3239]	Time 0.243 (0.631)	Data Time 0.002 (0.019)	Loss 2.1141 (2.1681)	Entropy 0.67923 (0.68163)	Top-1 acc 72.266 (72.494)	Top-5 acc 91.797 (89.045)	lr 0.00045
Train [110][2210/3239]	Time 2.515 (0.631)	Data Time 0.001 (0.019)	Loss 2.2440 (2.1681)	Entropy 0.67923 (0.68162)	Top-1 acc 68.750 (72.491)	Top-5 acc 88.672 (89.046)	lr 0.00045
Train [110][2220/3239]	Time 0.271 (0.629)	Data Time 0.001 (0.019)	Loss 2.1527 (2.1681)	Entropy 0.67913 (0.68161)	Top-1 acc 71.484 (72.489)	Top-5 acc 87.891 (89.047)	lr 0.00045
Train [110][2230/3239]	Time 0.335 (0.628)	Data Time 0.001 (0.019)	Loss 2.2240 (2.1679)	Entropy 0.67914 (0.68160)	Top-1 acc 72.656 (72.491)	Top-5 acc 88.281 (89.047)	lr 0.00045
Train [110][2240/3239]	Time 0.225 (0.628)	Data Time 0.001 (0.019)	Loss 2.2777 (2.1679)	Entropy 0.67899 (0.68158)	Top-1 acc 69.531 (72.489)	Top-5 acc 87.891 (89.048)	lr 0.00045
Train [110][2250/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.019)	Loss 2.2313 (2.1678)	Entropy 0.67897 (0.68157)	Top-1 acc 72.266 (72.491)	Top-5 acc 89.453 (89.053)	lr 0.00045
Train [110][2260/3239]	Time 0.258 (0.650)	Data Time 0.002 (0.019)	Loss 2.1583 (2.1678)	Entropy 0.67897 (0.68156)	Top-1 acc 71.484 (72.490)	Top-5 acc 87.891 (89.051)	lr 0.00045
Train [110][2270/3239]	Time 0.236 (0.649)	Data Time 0.002 (0.019)	Loss 1.9749 (2.1678)	Entropy 0.67897 (0.68155)	Top-1 acc 78.516 (72.490)	Top-5 acc 91.797 (89.051)	lr 0.00045
Train [110][2280/3239]	Time 0.230 (0.648)	Data Time 0.001 (0.019)	Loss 2.1575 (2.1677)	Entropy 0.67895 (0.68154)	Top-1 acc 70.703 (72.497)	Top-5 acc 91.797 (89.053)	lr 0.00045
Train [110][2290/3239]	Time 0.232 (0.647)	Data Time 0.001 (0.019)	Loss 2.1543 (2.1677)	Entropy 0.67892 (0.68153)	Top-1 acc 72.266 (72.494)	Top-5 acc 89.453 (89.054)	lr 0.00045
Train [110][2300/3239]	Time 0.234 (0.647)	Data Time 0.001 (0.018)	Loss 2.1790 (2.1679)	Entropy 0.67876 (0.68151)	Top-1 acc 71.875 (72.490)	Top-5 acc 91.797 (89.051)	lr 0.00045
Train [110][2310/3239]	Time 0.503 (0.646)	Data Time 0.002 (0.018)	Loss 2.3144 (2.1680)	Entropy 0.67869 (0.68150)	Top-1 acc 67.578 (72.489)	Top-5 acc 87.500 (89.048)	lr 0.00045
Train [110][2320/3239]	Time 2.839 (0.646)	Data Time 0.002 (0.018)	Loss 2.1881 (2.1682)	Entropy 0.67869 (0.68149)	Top-1 acc 71.094 (72.483)	Top-5 acc 89.453 (89.046)	lr 0.00045
Train [110][2330/3239]	Time 0.310 (0.644)	Data Time 0.002 (0.018)	Loss 2.1966 (2.1683)	Entropy 0.67866 (0.68148)	Top-1 acc 71.484 (72.479)	Top-5 acc 86.719 (89.044)	lr 0.00045
Train [110][2340/3239]	Time 0.281 (0.644)	Data Time 0.002 (0.018)	Loss 2.0490 (2.1683)	Entropy 0.67865 (0.68147)	Top-1 acc 73.828 (72.478)	Top-5 acc 91.797 (89.046)	lr 0.00045
Train [110][2350/3239]	Time 0.268 (0.643)	Data Time 0.002 (0.018)	Loss 2.3686 (2.1683)	Entropy 0.67872 (0.68145)	Top-1 acc 64.844 (72.477)	Top-5 acc 83.594 (89.044)	lr 0.00045
Train [110][2360/3239]	Time 0.246 (0.643)	Data Time 0.001 (0.018)	Loss 2.3021 (2.1683)	Entropy 0.67875 (0.68144)	Top-1 acc 69.531 (72.476)	Top-5 acc 88.281 (89.044)	lr 0.00045
Train [110][2370/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.018)	Loss 2.1081 (2.1683)	Entropy 0.67872 (0.68143)	Top-1 acc 74.609 (72.480)	Top-5 acc 89.062 (89.044)	lr 0.00045
Train [110][2380/3239]	Time 0.249 (0.642)	Data Time 0.002 (0.018)	Loss 2.0844 (2.1683)	Entropy 0.67873 (0.68142)	Top-1 acc 75.391 (72.476)	Top-5 acc 91.406 (89.047)	lr 0.00045
Train [110][2390/3239]	Time 0.295 (0.641)	Data Time 0.002 (0.018)	Loss 2.2453 (2.1682)	Entropy 0.67884 (0.68141)	Top-1 acc 67.969 (72.476)	Top-5 acc 89.844 (89.050)	lr 0.00045
Train [110][2400/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.018)	Loss 2.1504 (2.1684)	Entropy 0.67882 (0.68140)	Top-1 acc 71.094 (72.472)	Top-5 acc 89.062 (89.046)	lr 0.00045
Train [110][2410/3239]	Time 0.232 (0.640)	Data Time 0.001 (0.018)	Loss 2.1868 (2.1682)	Entropy 0.67882 (0.68139)	Top-1 acc 71.094 (72.476)	Top-5 acc 87.891 (89.050)	lr 0.00045
Train [110][2420/3239]	Time 0.262 (0.639)	Data Time 0.001 (0.018)	Loss 2.1648 (2.1681)	Entropy 0.67879 (0.68138)	Top-1 acc 75.000 (72.475)	Top-5 acc 89.062 (89.052)	lr 0.00045
Train [110][2430/3239]	Time 2.577 (0.639)	Data Time 0.001 (0.018)	Loss 1.9780 (2.1680)	Entropy 0.67879 (0.68137)	Top-1 acc 75.781 (72.479)	Top-5 acc 92.969 (89.053)	lr 0.00045
Train [110][2440/3239]	Time 0.362 (0.637)	Data Time 0.001 (0.018)	Loss 2.1121 (2.1679)	Entropy 0.67873 (0.68136)	Top-1 acc 73.047 (72.483)	Top-5 acc 92.578 (89.056)	lr 0.00045
Train [110][2450/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.017)	Loss 2.1719 (2.1680)	Entropy 0.67864 (0.68134)	Top-1 acc 73.828 (72.482)	Top-5 acc 90.234 (89.055)	lr 0.00045
Train [110][2460/3239]	Time 0.244 (0.636)	Data Time 0.001 (0.017)	Loss 2.1128 (2.1679)	Entropy 0.67854 (0.68133)	Top-1 acc 75.781 (72.484)	Top-5 acc 89.453 (89.058)	lr 0.00045
Train [110][2470/3239]	Time 0.239 (0.635)	Data Time 0.001 (0.017)	Loss 2.1649 (2.1677)	Entropy 0.67857 (0.68132)	Top-1 acc 74.219 (72.491)	Top-5 acc 88.672 (89.061)	lr 0.00045
Train [110][2480/3239]	Time 0.333 (0.635)	Data Time 0.001 (0.017)	Loss 2.1043 (2.1676)	Entropy 0.67848 (0.68131)	Top-1 acc 74.609 (72.496)	Top-5 acc 91.797 (89.065)	lr 0.00045
Train [110][2490/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.017)	Loss 2.1751 (2.1677)	Entropy 0.67839 (0.68130)	Top-1 acc 73.438 (72.494)	Top-5 acc 87.891 (89.064)	lr 0.00045
Train [110][2500/3239]	Time 0.220 (0.633)	Data Time 0.001 (0.017)	Loss 2.0636 (2.1676)	Entropy 0.67839 (0.68129)	Top-1 acc 74.609 (72.495)	Top-5 acc 91.797 (89.065)	lr 0.00045
Train [110][2510/3239]	Time 0.224 (0.633)	Data Time 0.001 (0.017)	Loss 2.1349 (2.1676)	Entropy 0.67842 (0.68128)	Top-1 acc 71.484 (72.492)	Top-5 acc 89.453 (89.063)	lr 0.00045
Train [110][2520/3239]	Time 0.244 (0.632)	Data Time 0.001 (0.017)	Loss 2.0818 (2.1675)	Entropy 0.67839 (0.68127)	Top-1 acc 76.953 (72.494)	Top-5 acc 89.062 (89.067)	lr 0.00044
Train [110][2530/3239]	Time 0.228 (0.632)	Data Time 0.001 (0.017)	Loss 2.1628 (2.1674)	Entropy 0.67836 (0.68125)	Top-1 acc 71.484 (72.492)	Top-5 acc 88.281 (89.070)	lr 0.00044
Train [110][2540/3239]	Time 2.589 (0.631)	Data Time 0.001 (0.017)	Loss 2.4950 (2.1675)	Entropy 0.67836 (0.68124)	Top-1 acc 66.016 (72.495)	Top-5 acc 83.984 (89.069)	lr 0.00044
Train [110][2550/3239]	Time 0.244 (0.630)	Data Time 0.001 (0.017)	Loss 2.2086 (2.1674)	Entropy 0.67833 (0.68123)	Top-1 acc 70.312 (72.498)	Top-5 acc 89.453 (89.071)	lr 0.00044
Train [110][2560/3239]	Time 0.254 (0.629)	Data Time 0.001 (0.017)	Loss 2.1721 (2.1673)	Entropy 0.67827 (0.68122)	Top-1 acc 73.438 (72.502)	Top-5 acc 88.672 (89.076)	lr 0.00044
Train [110][2570/3239]	Time 0.245 (0.629)	Data Time 0.001 (0.017)	Loss 2.2302 (2.1670)	Entropy 0.67821 (0.68121)	Top-1 acc 69.922 (72.506)	Top-5 acc 89.453 (89.083)	lr 0.00044
Train [110][2580/3239]	Time 0.233 (0.628)	Data Time 0.001 (0.017)	Loss 2.2218 (2.1671)	Entropy 0.67823 (0.68120)	Top-1 acc 70.312 (72.503)	Top-5 acc 89.062 (89.082)	lr 0.00044
Train [110][2590/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.017)	Loss 2.2101 (2.1671)	Entropy 0.67814 (0.68118)	Top-1 acc 71.875 (72.504)	Top-5 acc 88.281 (89.081)	lr 0.00044
Train [110][2600/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.017)	Loss 2.1504 (2.1672)	Entropy 0.67805 (0.68117)	Top-1 acc 73.828 (72.498)	Top-5 acc 89.844 (89.078)	lr 0.00044
Train [110][2610/3239]	Time 0.234 (0.626)	Data Time 0.001 (0.016)	Loss 2.0932 (2.1673)	Entropy 0.67795 (0.68116)	Top-1 acc 73.438 (72.498)	Top-5 acc 89.062 (89.078)	lr 0.00044
Train [110][2620/3239]	Time 0.239 (0.647)	Data Time 0.002 (0.016)	Loss 2.1713 (2.1672)	Entropy 0.67791 (0.68115)	Top-1 acc 73.047 (72.498)	Top-5 acc 88.281 (89.080)	lr 0.00044
Train [110][2630/3239]	Time 0.236 (0.646)	Data Time 0.002 (0.016)	Loss 2.1979 (2.1672)	Entropy 0.67786 (0.68114)	Top-1 acc 71.484 (72.499)	Top-5 acc 88.281 (89.079)	lr 0.00044
Train [110][2640/3239]	Time 0.239 (0.646)	Data Time 0.001 (0.016)	Loss 2.0494 (2.1672)	Entropy 0.67790 (0.68112)	Top-1 acc 72.656 (72.496)	Top-5 acc 91.016 (89.080)	lr 0.00044
Train [110][2650/3239]	Time 0.218 (0.645)	Data Time 0.001 (0.016)	Loss 2.2032 (2.1673)	Entropy 0.67794 (0.68111)	Top-1 acc 70.703 (72.491)	Top-5 acc 87.109 (89.078)	lr 0.00044
Train [110][2660/3239]	Time 0.234 (0.645)	Data Time 0.001 (0.016)	Loss 2.2557 (2.1673)	Entropy 0.67793 (0.68110)	Top-1 acc 71.094 (72.488)	Top-5 acc 86.719 (89.079)	lr 0.00044
Train [110][2670/3239]	Time 0.232 (0.644)	Data Time 0.001 (0.016)	Loss 2.0590 (2.1672)	Entropy 0.67791 (0.68109)	Top-1 acc 76.953 (72.490)	Top-5 acc 89.062 (89.080)	lr 0.00044
Train [110][2680/3239]	Time 0.244 (0.644)	Data Time 0.001 (0.016)	Loss 2.2414 (2.1673)	Entropy 0.67786 (0.68108)	Top-1 acc 68.750 (72.488)	Top-5 acc 86.719 (89.075)	lr 0.00044
Train [110][2690/3239]	Time 0.242 (0.643)	Data Time 0.001 (0.016)	Loss 2.0321 (2.1672)	Entropy 0.67785 (0.68106)	Top-1 acc 76.953 (72.491)	Top-5 acc 90.625 (89.078)	lr 0.00044
Train [110][2700/3239]	Time 0.234 (0.643)	Data Time 0.001 (0.016)	Loss 2.2911 (2.1673)	Entropy 0.67779 (0.68105)	Top-1 acc 69.531 (72.489)	Top-5 acc 85.156 (89.077)	lr 0.00044
Train [110][2710/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.016)	Loss 2.2106 (2.1674)	Entropy 0.67775 (0.68104)	Top-1 acc 68.359 (72.481)	Top-5 acc 91.016 (89.076)	lr 0.00044
Train [110][2720/3239]	Time 0.266 (0.641)	Data Time 0.001 (0.016)	Loss 2.2316 (2.1675)	Entropy 0.67775 (0.68103)	Top-1 acc 71.484 (72.479)	Top-5 acc 88.672 (89.075)	lr 0.00044
Train [110][2730/3239]	Time 0.237 (0.641)	Data Time 0.001 (0.016)	Loss 2.2099 (2.1674)	Entropy 0.67773 (0.68102)	Top-1 acc 71.875 (72.482)	Top-5 acc 89.062 (89.075)	lr 0.00044
Train [110][2740/3239]	Time 0.228 (0.640)	Data Time 0.001 (0.016)	Loss 2.1508 (2.1672)	Entropy 0.67774 (0.68100)	Top-1 acc 72.656 (72.486)	Top-5 acc 89.062 (89.076)	lr 0.00044
Train [110][2750/3239]	Time 0.215 (0.640)	Data Time 0.001 (0.016)	Loss 2.1752 (2.1672)	Entropy 0.67773 (0.68099)	Top-1 acc 70.312 (72.487)	Top-5 acc 87.109 (89.073)	lr 0.00044
Train [110][2760/3239]	Time 0.264 (0.639)	Data Time 0.001 (0.016)	Loss 2.2155 (2.1672)	Entropy 0.67772 (0.68098)	Top-1 acc 70.703 (72.485)	Top-5 acc 87.109 (89.070)	lr 0.00044
Train [110][2770/3239]	Time 0.237 (0.639)	Data Time 0.001 (0.016)	Loss 2.1270 (2.1672)	Entropy 0.67760 (0.68097)	Top-1 acc 73.438 (72.489)	Top-5 acc 89.453 (89.071)	lr 0.00044
Train [110][2780/3239]	Time 0.288 (0.638)	Data Time 0.001 (0.016)	Loss 2.2248 (2.1671)	Entropy 0.67755 (0.68096)	Top-1 acc 72.266 (72.491)	Top-5 acc 87.891 (89.072)	lr 0.00044
Train [110][2790/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.016)	Loss 2.1976 (2.1671)	Entropy 0.67750 (0.68094)	Top-1 acc 72.656 (72.490)	Top-5 acc 86.328 (89.070)	lr 0.00044
Train [110][2800/3239]	Time 0.234 (0.637)	Data Time 0.001 (0.015)	Loss 2.3148 (2.1672)	Entropy 0.67744 (0.68093)	Top-1 acc 69.141 (72.490)	Top-5 acc 87.109 (89.071)	lr 0.00044
Train [110][2810/3239]	Time 0.325 (0.636)	Data Time 0.001 (0.015)	Loss 2.3214 (2.1673)	Entropy 0.67738 (0.68092)	Top-1 acc 69.141 (72.487)	Top-5 acc 89.453 (89.070)	lr 0.00044
Train [110][2820/3239]	Time 0.233 (0.636)	Data Time 0.001 (0.015)	Loss 2.1125 (2.1673)	Entropy 0.67738 (0.68091)	Top-1 acc 75.391 (72.488)	Top-5 acc 90.234 (89.071)	lr 0.00044
Train [110][2830/3239]	Time 0.247 (0.635)	Data Time 0.001 (0.015)	Loss 2.1990 (2.1672)	Entropy 0.67715 (0.68089)	Top-1 acc 71.875 (72.492)	Top-5 acc 88.672 (89.074)	lr 0.00044
Train [110][2840/3239]	Time 0.223 (0.635)	Data Time 0.001 (0.015)	Loss 2.2002 (2.1674)	Entropy 0.67712 (0.68088)	Top-1 acc 70.312 (72.483)	Top-5 acc 89.844 (89.071)	lr 0.00044
Train [110][2850/3239]	Time 0.326 (0.634)	Data Time 0.002 (0.015)	Loss 1.9628 (2.1674)	Entropy 0.67710 (0.68087)	Top-1 acc 78.125 (72.485)	Top-5 acc 91.797 (89.071)	lr 0.00044
Train [110][2860/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.015)	Loss 2.2108 (2.1675)	Entropy 0.67705 (0.68085)	Top-1 acc 69.141 (72.483)	Top-5 acc 90.625 (89.067)	lr 0.00044
Train [110][2870/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.015)	Loss 2.1518 (2.1674)	Entropy 0.67701 (0.68084)	Top-1 acc 69.141 (72.485)	Top-5 acc 88.672 (89.070)	lr 0.00044
Train [110][2880/3239]	Time 0.232 (0.633)	Data Time 0.001 (0.015)	Loss 2.1636 (2.1674)	Entropy 0.67700 (0.68083)	Top-1 acc 73.438 (72.487)	Top-5 acc 88.281 (89.072)	lr 0.00044
Train [110][2890/3239]	Time 0.322 (0.632)	Data Time 0.001 (0.015)	Loss 2.1655 (2.1673)	Entropy 0.67696 (0.68081)	Top-1 acc 73.828 (72.490)	Top-5 acc 90.234 (89.072)	lr 0.00044
Train [110][2900/3239]	Time 0.240 (0.632)	Data Time 0.001 (0.015)	Loss 2.1903 (2.1673)	Entropy 0.67688 (0.68080)	Top-1 acc 71.484 (72.487)	Top-5 acc 88.281 (89.074)	lr 0.00043
Train [110][2910/3239]	Time 0.266 (0.631)	Data Time 0.001 (0.015)	Loss 2.2076 (2.1674)	Entropy 0.67685 (0.68079)	Top-1 acc 73.438 (72.487)	Top-5 acc 88.281 (89.071)	lr 0.00043
Train [110][2920/3239]	Time 0.240 (0.631)	Data Time 0.001 (0.015)	Loss 2.0913 (2.1674)	Entropy 0.67682 (0.68077)	Top-1 acc 72.656 (72.490)	Top-5 acc 90.625 (89.072)	lr 0.00043
Train [110][2930/3239]	Time 0.331 (0.630)	Data Time 0.001 (0.015)	Loss 2.1045 (2.1672)	Entropy 0.67673 (0.68076)	Top-1 acc 72.656 (72.492)	Top-5 acc 91.406 (89.077)	lr 0.00043
Train [110][2940/3239]	Time 0.218 (0.630)	Data Time 0.001 (0.015)	Loss 2.1879 (2.1672)	Entropy 0.67674 (0.68075)	Top-1 acc 67.188 (72.491)	Top-5 acc 89.453 (89.077)	lr 0.00043
Train [110][2950/3239]	Time 0.280 (0.647)	Data Time 0.004 (0.015)	Loss 2.1511 (2.1672)	Entropy 0.67671 (0.68073)	Top-1 acc 72.266 (72.493)	Top-5 acc 88.281 (89.078)	lr 0.00043
Train [110][2960/3239]	Time 0.240 (0.646)	Data Time 0.002 (0.015)	Loss 2.1026 (2.1672)	Entropy 0.67669 (0.68072)	Top-1 acc 74.609 (72.492)	Top-5 acc 89.844 (89.076)	lr 0.00043
Train [110][2970/3239]	Time 0.245 (0.646)	Data Time 0.001 (0.015)	Loss 2.3795 (2.1674)	Entropy 0.67669 (0.68071)	Top-1 acc 67.969 (72.489)	Top-5 acc 82.812 (89.072)	lr 0.00043
Train [110][2980/3239]	Time 0.234 (0.645)	Data Time 0.001 (0.015)	Loss 2.2141 (2.1673)	Entropy 0.67668 (0.68069)	Top-1 acc 72.266 (72.492)	Top-5 acc 89.453 (89.075)	lr 0.00043
Train [110][2990/3239]	Time 0.260 (0.645)	Data Time 0.001 (0.015)	Loss 2.1072 (2.1672)	Entropy 0.67675 (0.68068)	Top-1 acc 77.344 (72.494)	Top-5 acc 89.453 (89.076)	lr 0.00043
Train [110][3000/3239]	Time 0.242 (0.644)	Data Time 0.001 (0.015)	Loss 2.1587 (2.1672)	Entropy 0.67677 (0.68067)	Top-1 acc 71.484 (72.493)	Top-5 acc 89.062 (89.077)	lr 0.00043
Train [110][3010/3239]	Time 0.261 (0.644)	Data Time 0.001 (0.015)	Loss 2.2004 (2.1672)	Entropy 0.67679 (0.68065)	Top-1 acc 70.312 (72.492)	Top-5 acc 89.453 (89.076)	lr 0.00043
Train [110][3020/3239]	Time 0.233 (0.643)	Data Time 0.001 (0.014)	Loss 2.2397 (2.1672)	Entropy 0.67681 (0.68064)	Top-1 acc 71.484 (72.492)	Top-5 acc 87.109 (89.076)	lr 0.00043
Train [110][3030/3239]	Time 0.234 (0.643)	Data Time 0.001 (0.014)	Loss 2.0998 (2.1671)	Entropy 0.67683 (0.68063)	Top-1 acc 75.391 (72.493)	Top-5 acc 89.453 (89.078)	lr 0.00043
Train [110][3040/3239]	Time 0.225 (0.642)	Data Time 0.001 (0.014)	Loss 2.1474 (2.1671)	Entropy 0.67691 (0.68062)	Top-1 acc 73.438 (72.494)	Top-5 acc 89.062 (89.079)	lr 0.00043
Train [110][3050/3239]	Time 0.231 (0.641)	Data Time 0.002 (0.014)	Loss 2.1490 (2.1671)	Entropy 0.67706 (0.68060)	Top-1 acc 70.703 (72.493)	Top-5 acc 89.844 (89.080)	lr 0.00043
Train [110][3060/3239]	Time 0.232 (0.641)	Data Time 0.001 (0.014)	Loss 2.1118 (2.1672)	Entropy 0.67703 (0.68059)	Top-1 acc 75.000 (72.494)	Top-5 acc 88.672 (89.077)	lr 0.00043
Train [110][3070/3239]	Time 0.270 (0.641)	Data Time 0.002 (0.014)	Loss 2.2508 (2.1673)	Entropy 0.67705 (0.68058)	Top-1 acc 68.750 (72.486)	Top-5 acc 87.109 (89.076)	lr 0.00043
Train [110][3080/3239]	Time 0.276 (0.640)	Data Time 0.001 (0.014)	Loss 2.0760 (2.1673)	Entropy 0.67714 (0.68057)	Top-1 acc 72.266 (72.483)	Top-5 acc 90.234 (89.076)	lr 0.00043
Train [110][3090/3239]	Time 0.276 (0.640)	Data Time 0.001 (0.014)	Loss 2.3546 (2.1674)	Entropy 0.67713 (0.68056)	Top-1 acc 66.406 (72.483)	Top-5 acc 87.109 (89.075)	lr 0.00043
Train [110][3100/3239]	Time 0.214 (0.639)	Data Time 0.001 (0.014)	Loss 2.0484 (2.1674)	Entropy 0.67710 (0.68055)	Top-1 acc 79.688 (72.487)	Top-5 acc 89.844 (89.075)	lr 0.00043
Train [110][3110/3239]	Time 0.231 (0.639)	Data Time 0.001 (0.014)	Loss 2.2117 (2.1674)	Entropy 0.67707 (0.68054)	Top-1 acc 69.922 (72.485)	Top-5 acc 89.844 (89.076)	lr 0.00043
Train [110][3120/3239]	Time 0.219 (0.638)	Data Time 0.001 (0.014)	Loss 2.2548 (2.1674)	Entropy 0.67713 (0.68052)	Top-1 acc 73.047 (72.485)	Top-5 acc 87.109 (89.074)	lr 0.00043
Train [110][3130/3239]	Time 0.240 (0.638)	Data Time 0.018 (0.014)	Loss 2.2354 (2.1675)	Entropy 0.67716 (0.68051)	Top-1 acc 72.656 (72.485)	Top-5 acc 86.328 (89.073)	lr 0.00043
Train [110][3140/3239]	Time 0.325 (0.637)	Data Time 0.001 (0.014)	Loss 2.0279 (2.1675)	Entropy 0.67702 (0.68050)	Top-1 acc 76.562 (72.483)	Top-5 acc 91.406 (89.073)	lr 0.00043
Train [110][3150/3239]	Time 0.258 (0.637)	Data Time 0.001 (0.014)	Loss 1.9883 (2.1676)	Entropy 0.67698 (0.68049)	Top-1 acc 77.734 (72.479)	Top-5 acc 92.578 (89.072)	lr 0.00043
Train [110][3160/3239]	Time 0.227 (0.636)	Data Time 0.001 (0.014)	Loss 2.0897 (2.1675)	Entropy 0.67682 (0.68048)	Top-1 acc 77.344 (72.483)	Top-5 acc 89.844 (89.075)	lr 0.00043
Train [110][3170/3239]	Time 0.236 (0.636)	Data Time 0.001 (0.014)	Loss 2.1739 (2.1675)	Entropy 0.67675 (0.68047)	Top-1 acc 70.312 (72.479)	Top-5 acc 89.062 (89.073)	lr 0.00043
Train [110][3180/3239]	Time 0.342 (0.635)	Data Time 0.000 (0.014)	Loss 2.0996 (2.1675)	Entropy 0.67670 (0.68046)	Top-1 acc 76.562 (72.478)	Top-5 acc 87.891 (89.072)	lr 0.00043
Train [110][3190/3239]	Time 0.225 (0.635)	Data Time 0.000 (0.014)	Loss 2.0477 (2.1676)	Entropy 0.67670 (0.68045)	Top-1 acc 73.828 (72.476)	Top-5 acc 90.234 (89.068)	lr 0.00043
Train [110][3200/3239]	Time 0.232 (0.634)	Data Time 0.000 (0.014)	Loss 2.1450 (2.1675)	Entropy 0.67671 (0.68043)	Top-1 acc 71.875 (72.479)	Top-5 acc 90.625 (89.071)	lr 0.00043
Train [110][3210/3239]	Time 0.233 (0.634)	Data Time 0.000 (0.014)	Loss 2.0307 (2.1674)	Entropy 0.67663 (0.68042)	Top-1 acc 75.391 (72.481)	Top-5 acc 92.188 (89.071)	lr 0.00043
Train [110][3220/3239]	Time 0.328 (0.633)	Data Time 0.000 (0.014)	Loss 2.2017 (2.1675)	Entropy 0.67653 (0.68041)	Top-1 acc 73.828 (72.480)	Top-5 acc 86.719 (89.071)	lr 0.00043
Train [110][3230/3239]	Time 0.227 (0.633)	Data Time 0.000 (0.014)	Loss 2.1587 (2.1676)	Entropy 0.67649 (0.68040)	Top-1 acc 74.219 (72.480)	Top-5 acc 90.625 (89.071)	lr 0.00043
Train [110][3239/3239]	Time 2.296 (0.632)	Data Time 0.000 (0.014)	Loss 2.1265 (2.1676)	Entropy 0.67649 (0.68039)	Top-1 acc 75.309 (72.479)	Top-5 acc 90.123 (89.070)	lr 0.00043
==========Valid [110/120]	loss 1.203	top-1 acc 72.549 (72.549)	top-5 acc 89.893	Train top-1 72.479	top-5 89.070	Entropy 0.67649	Latency-None: 0.000ms	Flops: 546.53M
Train [111][0/3239]	Time 40.675 (40.675)	Data Time 39.038 (39.038)	Loss 2.0243 (2.0243)	Entropy 0.67637 (0.67637)	Top-1 acc 78.125 (78.125)	Top-5 acc 92.578 (92.578)	lr 0.00043
Train [111][10/3239]	Time 2.673 (4.362)	Data Time 0.002 (3.606)	Loss 2.3414 (2.1416)	Entropy 0.67637 (0.67637)	Top-1 acc 69.141 (73.402)	Top-5 acc 85.156 (89.524)	lr 0.00043
Train [111][20/3239]	Time 0.247 (2.404)	Data Time 0.001 (1.889)	Loss 2.1961 (2.1440)	Entropy 0.67651 (0.67644)	Top-1 acc 73.047 (73.400)	Top-5 acc 90.625 (89.472)	lr 0.00043
Train [111][30/3239]	Time 0.368 (1.792)	Data Time 0.002 (1.280)	Loss 2.0410 (2.1432)	Entropy 0.67623 (0.67638)	Top-1 acc 76.172 (73.425)	Top-5 acc 89.844 (89.365)	lr 0.00043
Train [111][40/3239]	Time 0.249 (1.477)	Data Time 0.001 (0.969)	Loss 2.2439 (2.1424)	Entropy 0.67628 (0.67635)	Top-1 acc 71.484 (73.457)	Top-5 acc 86.719 (89.396)	lr 0.00042
Train [111][50/3239]	Time 0.281 (2.319)	Data Time 0.004 (0.779)	Loss 2.2982 (2.1557)	Entropy 0.67653 (0.67637)	Top-1 acc 68.359 (73.185)	Top-5 acc 87.500 (89.208)	lr 0.00042
Train [111][60/3239]	Time 0.223 (2.041)	Data Time 0.002 (0.652)	Loss 2.1362 (2.1519)	Entropy 0.67648 (0.67639)	Top-1 acc 71.875 (73.207)	Top-5 acc 89.844 (89.306)	lr 0.00042
Train [111][70/3239]	Time 0.319 (1.823)	Data Time 0.001 (0.560)	Loss 2.1094 (2.1491)	Entropy 0.67648 (0.67640)	Top-1 acc 76.953 (73.212)	Top-5 acc 88.672 (89.448)	lr 0.00042
Train [111][80/3239]	Time 0.228 (1.657)	Data Time 0.001 (0.491)	Loss 2.2022 (2.1514)	Entropy 0.67647 (0.67641)	Top-1 acc 71.484 (73.172)	Top-5 acc 89.062 (89.434)	lr 0.00042
Train [111][90/3239]	Time 0.228 (1.528)	Data Time 0.001 (0.438)	Loss 2.0637 (2.1494)	Entropy 0.67635 (0.67642)	Top-1 acc 74.609 (73.158)	Top-5 acc 91.797 (89.445)	lr 0.00042
Train [111][100/3239]	Time 0.226 (1.424)	Data Time 0.001 (0.394)	Loss 2.0072 (2.1460)	Entropy 0.67633 (0.67641)	Top-1 acc 75.781 (73.205)	Top-5 acc 91.016 (89.488)	lr 0.00042
Train [111][110/3239]	Time 0.334 (1.340)	Data Time 0.002 (0.359)	Loss 2.1990 (2.1485)	Entropy 0.67634 (0.67640)	Top-1 acc 72.266 (73.156)	Top-5 acc 88.672 (89.471)	lr 0.00042
Train [111][120/3239]	Time 2.700 (1.271)	Data Time 0.002 (0.330)	Loss 2.1517 (2.1523)	Entropy 0.67634 (0.67640)	Top-1 acc 73.047 (73.076)	Top-5 acc 89.062 (89.405)	lr 0.00042
Train [111][130/3239]	Time 0.226 (1.192)	Data Time 0.005 (0.304)	Loss 2.3034 (2.1525)	Entropy 0.67632 (0.67639)	Top-1 acc 69.531 (73.139)	Top-5 acc 86.719 (89.367)	lr 0.00042
Train [111][140/3239]	Time 0.231 (1.142)	Data Time 0.001 (0.283)	Loss 2.1513 (2.1545)	Entropy 0.67632 (0.67639)	Top-1 acc 73.047 (73.063)	Top-5 acc 88.281 (89.392)	lr 0.00042
Train [111][150/3239]	Time 0.366 (1.100)	Data Time 0.001 (0.264)	Loss 2.3880 (2.1542)	Entropy 0.67632 (0.67638)	Top-1 acc 67.578 (73.034)	Top-5 acc 86.328 (89.432)	lr 0.00042
Train [111][160/3239]	Time 0.246 (1.062)	Data Time 0.002 (0.248)	Loss 1.9990 (2.1531)	Entropy 0.67629 (0.67638)	Top-1 acc 78.906 (73.103)	Top-5 acc 89.844 (89.397)	lr 0.00042
Train [111][170/3239]	Time 0.241 (1.029)	Data Time 0.001 (0.234)	Loss 2.1510 (2.1542)	Entropy 0.67626 (0.67637)	Top-1 acc 73.047 (73.049)	Top-5 acc 90.625 (89.421)	lr 0.00042
Train [111][180/3239]	Time 0.233 (0.998)	Data Time 0.001 (0.221)	Loss 2.1693 (2.1566)	Entropy 0.67622 (0.67636)	Top-1 acc 73.438 (73.015)	Top-5 acc 88.672 (89.401)	lr 0.00042
Train [111][190/3239]	Time 0.236 (0.971)	Data Time 0.001 (0.209)	Loss 2.0600 (2.1560)	Entropy 0.67622 (0.67636)	Top-1 acc 76.172 (73.004)	Top-5 acc 91.797 (89.373)	lr 0.00042
Train [111][200/3239]	Time 0.235 (0.946)	Data Time 0.001 (0.199)	Loss 2.1263 (2.1554)	Entropy 0.67614 (0.67635)	Top-1 acc 75.000 (73.016)	Top-5 acc 89.062 (89.368)	lr 0.00042
Train [111][210/3239]	Time 0.224 (0.924)	Data Time 0.001 (0.190)	Loss 2.1641 (2.1564)	Entropy 0.67618 (0.67634)	Top-1 acc 74.609 (72.967)	Top-5 acc 89.844 (89.338)	lr 0.00042
Train [111][220/3239]	Time 0.347 (0.904)	Data Time 0.001 (0.181)	Loss 2.2054 (2.1557)	Entropy 0.67622 (0.67633)	Top-1 acc 71.875 (72.943)	Top-5 acc 87.500 (89.347)	lr 0.00042
Train [111][230/3239]	Time 2.539 (0.886)	Data Time 0.001 (0.173)	Loss 2.0472 (2.1543)	Entropy 0.67622 (0.67633)	Top-1 acc 76.953 (72.962)	Top-5 acc 91.797 (89.379)	lr 0.00042
Train [111][240/3239]	Time 0.239 (0.860)	Data Time 0.001 (0.166)	Loss 2.0740 (2.1554)	Entropy 0.67611 (0.67632)	Top-1 acc 75.391 (72.886)	Top-5 acc 91.406 (89.364)	lr 0.00042
Train [111][250/3239]	Time 0.235 (0.846)	Data Time 0.001 (0.160)	Loss 1.9590 (2.1551)	Entropy 0.67613 (0.67631)	Top-1 acc 78.516 (72.921)	Top-5 acc 92.969 (89.358)	lr 0.00042
Train [111][260/3239]	Time 0.214 (0.831)	Data Time 0.001 (0.154)	Loss 2.0113 (2.1545)	Entropy 0.67616 (0.67630)	Top-1 acc 77.734 (72.918)	Top-5 acc 90.625 (89.363)	lr 0.00042
Train [111][270/3239]	Time 0.229 (0.818)	Data Time 0.001 (0.148)	Loss 2.3023 (2.1559)	Entropy 0.67604 (0.67629)	Top-1 acc 68.359 (72.894)	Top-5 acc 84.766 (89.344)	lr 0.00042
Train [111][280/3239]	Time 0.244 (0.806)	Data Time 0.001 (0.143)	Loss 2.1941 (2.1544)	Entropy 0.67600 (0.67628)	Top-1 acc 71.875 (72.931)	Top-5 acc 90.234 (89.359)	lr 0.00042
Train [111][290/3239]	Time 0.221 (0.795)	Data Time 0.001 (0.138)	Loss 2.3080 (2.1550)	Entropy 0.67611 (0.67628)	Top-1 acc 69.141 (72.907)	Top-5 acc 85.547 (89.340)	lr 0.00042
Train [111][300/3239]	Time 0.224 (0.784)	Data Time 0.001 (0.133)	Loss 2.0428 (2.1548)	Entropy 0.67612 (0.67627)	Top-1 acc 74.219 (72.894)	Top-5 acc 91.797 (89.342)	lr 0.00042
Train [111][310/3239]	Time 0.232 (0.775)	Data Time 0.002 (0.129)	Loss 2.2003 (2.1563)	Entropy 0.67611 (0.67627)	Top-1 acc 72.266 (72.848)	Top-5 acc 87.891 (89.292)	lr 0.00042
Train [111][320/3239]	Time 0.321 (0.766)	Data Time 0.001 (0.125)	Loss 2.2149 (2.1546)	Entropy 0.67610 (0.67626)	Top-1 acc 70.703 (72.869)	Top-5 acc 87.891 (89.317)	lr 0.00042
Train [111][330/3239]	Time 0.231 (0.758)	Data Time 0.001 (0.121)	Loss 2.2721 (2.1570)	Entropy 0.67615 (0.67626)	Top-1 acc 69.922 (72.788)	Top-5 acc 87.109 (89.284)	lr 0.00042
Train [111][340/3239]	Time 2.630 (0.750)	Data Time 0.001 (0.118)	Loss 2.1989 (2.1579)	Entropy 0.67615 (0.67625)	Top-1 acc 72.266 (72.770)	Top-5 acc 87.109 (89.258)	lr 0.00042
Train [111][350/3239]	Time 0.236 (0.735)	Data Time 0.001 (0.115)	Loss 2.2427 (2.1587)	Entropy 0.67603 (0.67625)	Top-1 acc 70.312 (72.745)	Top-5 acc 88.281 (89.247)	lr 0.00042
Train [111][360/3239]	Time 0.331 (0.729)	Data Time 0.001 (0.111)	Loss 2.0201 (2.1579)	Entropy 0.67590 (0.67624)	Top-1 acc 75.391 (72.767)	Top-5 acc 93.359 (89.256)	lr 0.00042
Train [111][370/3239]	Time 0.234 (0.722)	Data Time 0.001 (0.109)	Loss 2.0750 (2.1577)	Entropy 0.67583 (0.67623)	Top-1 acc 74.219 (72.757)	Top-5 acc 89.062 (89.254)	lr 0.00042
Train [111][380/3239]	Time 0.232 (0.716)	Data Time 0.001 (0.106)	Loss 2.2649 (2.1584)	Entropy 0.67591 (0.67622)	Top-1 acc 69.141 (72.743)	Top-5 acc 86.719 (89.244)	lr 0.00042
Train [111][390/3239]	Time 0.231 (0.710)	Data Time 0.001 (0.103)	Loss 2.1123 (2.1582)	Entropy 0.67576 (0.67621)	Top-1 acc 74.609 (72.757)	Top-5 acc 89.453 (89.239)	lr 0.00042
Train [111][400/3239]	Time 0.328 (0.704)	Data Time 0.001 (0.101)	Loss 2.4769 (2.1594)	Entropy 0.67576 (0.67620)	Top-1 acc 62.891 (72.729)	Top-5 acc 85.156 (89.217)	lr 0.00042
Train [111][410/3239]	Time 0.302 (0.824)	Data Time 0.003 (0.098)	Loss 2.1830 (2.1599)	Entropy 0.67581 (0.67619)	Top-1 acc 71.094 (72.716)	Top-5 acc 90.234 (89.216)	lr 0.00042
Train [111][420/3239]	Time 0.230 (0.819)	Data Time 0.002 (0.096)	Loss 1.9988 (2.1594)	Entropy 0.67577 (0.67618)	Top-1 acc 78.125 (72.723)	Top-5 acc 92.188 (89.231)	lr 0.00042
Train [111][430/3239]	Time 0.255 (0.811)	Data Time 0.002 (0.094)	Loss 2.0852 (2.1593)	Entropy 0.67572 (0.67617)	Top-1 acc 77.734 (72.736)	Top-5 acc 91.016 (89.230)	lr 0.00041
Train [111][440/3239]	Time 0.315 (0.804)	Data Time 0.002 (0.092)	Loss 2.1085 (2.1595)	Entropy 0.67574 (0.67616)	Top-1 acc 71.094 (72.709)	Top-5 acc 91.406 (89.223)	lr 0.00041
Train [111][450/3239]	Time 2.644 (0.797)	Data Time 0.001 (0.090)	Loss 2.1829 (2.1596)	Entropy 0.67574 (0.67615)	Top-1 acc 71.875 (72.710)	Top-5 acc 89.453 (89.211)	lr 0.00041
Train [111][460/3239]	Time 0.229 (0.785)	Data Time 0.001 (0.088)	Loss 2.1536 (2.1593)	Entropy 0.67574 (0.67614)	Top-1 acc 75.391 (72.708)	Top-5 acc 89.062 (89.218)	lr 0.00041
Train [111][470/3239]	Time 0.245 (0.779)	Data Time 0.001 (0.086)	Loss 2.1526 (2.1591)	Entropy 0.67583 (0.67613)	Top-1 acc 73.438 (72.699)	Top-5 acc 87.500 (89.219)	lr 0.00041
Train [111][480/3239]	Time 0.229 (0.773)	Data Time 0.001 (0.084)	Loss 2.0085 (2.1595)	Entropy 0.67580 (0.67613)	Top-1 acc 76.172 (72.693)	Top-5 acc 92.578 (89.213)	lr 0.00041
Train [111][490/3239]	Time 0.239 (0.767)	Data Time 0.001 (0.082)	Loss 2.2240 (2.1605)	Entropy 0.67577 (0.67612)	Top-1 acc 69.922 (72.687)	Top-5 acc 88.281 (89.199)	lr 0.00041
Train [111][500/3239]	Time 0.236 (0.761)	Data Time 0.001 (0.081)	Loss 1.9706 (2.1603)	Entropy 0.67579 (0.67611)	Top-1 acc 77.344 (72.713)	Top-5 acc 92.188 (89.190)	lr 0.00041
Train [111][510/3239]	Time 0.221 (0.756)	Data Time 0.001 (0.079)	Loss 2.2444 (2.1600)	Entropy 0.67583 (0.67611)	Top-1 acc 71.875 (72.724)	Top-5 acc 87.109 (89.202)	lr 0.00041
Train [111][520/3239]	Time 0.231 (0.751)	Data Time 0.001 (0.078)	Loss 2.1810 (2.1601)	Entropy 0.67581 (0.67610)	Top-1 acc 74.609 (72.720)	Top-5 acc 86.719 (89.183)	lr 0.00041
Train [111][530/3239]	Time 0.220 (0.746)	Data Time 0.001 (0.076)	Loss 2.1641 (2.1610)	Entropy 0.67582 (0.67610)	Top-1 acc 71.484 (72.695)	Top-5 acc 88.672 (89.184)	lr 0.00041
Train [111][540/3239]	Time 0.236 (0.741)	Data Time 0.001 (0.075)	Loss 2.2373 (2.1606)	Entropy 0.67580 (0.67609)	Top-1 acc 70.703 (72.702)	Top-5 acc 88.281 (89.193)	lr 0.00041
Train [111][550/3239]	Time 0.267 (0.736)	Data Time 0.001 (0.074)	Loss 2.4484 (2.1607)	Entropy 0.67586 (0.67609)	Top-1 acc 63.672 (72.697)	Top-5 acc 85.938 (89.196)	lr 0.00041
Train [111][560/3239]	Time 2.600 (0.732)	Data Time 0.001 (0.072)	Loss 2.0757 (2.1607)	Entropy 0.67586 (0.67608)	Top-1 acc 72.656 (72.697)	Top-5 acc 91.406 (89.197)	lr 0.00041
Train [111][570/3239]	Time 0.275 (0.723)	Data Time 0.002 (0.071)	Loss 2.1778 (2.1619)	Entropy 0.67583 (0.67608)	Top-1 acc 72.266 (72.673)	Top-5 acc 89.453 (89.177)	lr 0.00041
Train [111][580/3239]	Time 0.218 (0.719)	Data Time 0.001 (0.070)	Loss 2.1610 (2.1620)	Entropy 0.67576 (0.67607)	Top-1 acc 71.484 (72.678)	Top-5 acc 87.891 (89.171)	lr 0.00041
Train [111][590/3239]	Time 0.242 (0.715)	Data Time 0.001 (0.069)	Loss 2.1343 (2.1623)	Entropy 0.67577 (0.67607)	Top-1 acc 73.438 (72.656)	Top-5 acc 90.625 (89.172)	lr 0.00041
Train [111][600/3239]	Time 0.252 (0.712)	Data Time 0.002 (0.068)	Loss 2.2240 (2.1625)	Entropy 0.67573 (0.67606)	Top-1 acc 70.703 (72.647)	Top-5 acc 88.281 (89.167)	lr 0.00041
Train [111][610/3239]	Time 0.339 (0.708)	Data Time 0.001 (0.067)	Loss 2.0815 (2.1623)	Entropy 0.67566 (0.67605)	Top-1 acc 76.562 (72.658)	Top-5 acc 91.406 (89.170)	lr 0.00041
Train [111][620/3239]	Time 0.225 (0.705)	Data Time 0.001 (0.065)	Loss 2.1392 (2.1624)	Entropy 0.67556 (0.67605)	Top-1 acc 74.219 (72.651)	Top-5 acc 90.234 (89.162)	lr 0.00041
Train [111][630/3239]	Time 0.241 (0.701)	Data Time 0.001 (0.064)	Loss 2.3227 (2.1620)	Entropy 0.67552 (0.67604)	Top-1 acc 66.797 (72.655)	Top-5 acc 87.109 (89.171)	lr 0.00041
Train [111][640/3239]	Time 0.230 (0.697)	Data Time 0.001 (0.063)	Loss 2.0335 (2.1621)	Entropy 0.67549 (0.67603)	Top-1 acc 74.219 (72.657)	Top-5 acc 92.578 (89.163)	lr 0.00041
Train [111][650/3239]	Time 0.312 (0.694)	Data Time 0.001 (0.063)	Loss 2.1450 (2.1619)	Entropy 0.67551 (0.67602)	Top-1 acc 72.656 (72.647)	Top-5 acc 87.891 (89.166)	lr 0.00041
Train [111][660/3239]	Time 0.224 (0.691)	Data Time 0.001 (0.062)	Loss 2.2451 (2.1621)	Entropy 0.67550 (0.67601)	Top-1 acc 70.703 (72.640)	Top-5 acc 86.719 (89.177)	lr 0.00041
Train [111][670/3239]	Time 2.530 (0.687)	Data Time 0.001 (0.061)	Loss 2.0988 (2.1615)	Entropy 0.67550 (0.67601)	Top-1 acc 71.875 (72.643)	Top-5 acc 89.844 (89.181)	lr 0.00041
Train [111][680/3239]	Time 0.228 (0.681)	Data Time 0.001 (0.060)	Loss 2.1523 (2.1615)	Entropy 0.67550 (0.67600)	Top-1 acc 73.047 (72.628)	Top-5 acc 91.016 (89.190)	lr 0.00041
Train [111][690/3239]	Time 0.339 (0.678)	Data Time 0.001 (0.059)	Loss 2.2377 (2.1615)	Entropy 0.67547 (0.67599)	Top-1 acc 67.969 (72.619)	Top-5 acc 86.328 (89.184)	lr 0.00041
Train [111][700/3239]	Time 0.205 (0.675)	Data Time 0.001 (0.058)	Loss 2.0853 (2.1611)	Entropy 0.67546 (0.67598)	Top-1 acc 73.828 (72.638)	Top-5 acc 90.625 (89.195)	lr 0.00041
Train [111][710/3239]	Time 0.222 (0.672)	Data Time 0.001 (0.057)	Loss 2.3051 (2.1610)	Entropy 0.67542 (0.67598)	Top-1 acc 67.969 (72.648)	Top-5 acc 85.938 (89.189)	lr 0.00041
Train [111][720/3239]	Time 0.248 (0.669)	Data Time 0.001 (0.057)	Loss 2.1879 (2.1604)	Entropy 0.67536 (0.67597)	Top-1 acc 73.828 (72.673)	Top-5 acc 88.281 (89.197)	lr 0.00041
Train [111][730/3239]	Time 0.234 (0.667)	Data Time 0.001 (0.056)	Loss 2.2826 (2.1611)	Entropy 0.67536 (0.67596)	Top-1 acc 69.141 (72.648)	Top-5 acc 85.547 (89.178)	lr 0.00041
Train [111][740/3239]	Time 0.244 (0.664)	Data Time 0.001 (0.055)	Loss 2.1827 (2.1613)	Entropy 0.67533 (0.67595)	Top-1 acc 73.047 (72.647)	Top-5 acc 89.062 (89.177)	lr 0.00041
Train [111][750/3239]	Time 0.220 (0.662)	Data Time 0.001 (0.054)	Loss 2.2099 (2.1612)	Entropy 0.67527 (0.67594)	Top-1 acc 69.141 (72.638)	Top-5 acc 89.062 (89.176)	lr 0.00041
Train [111][760/3239]	Time 0.227 (0.660)	Data Time 0.002 (0.054)	Loss 2.2936 (2.1608)	Entropy 0.67526 (0.67593)	Top-1 acc 67.969 (72.638)	Top-5 acc 87.109 (89.187)	lr 0.00041
Train [111][770/3239]	Time 0.245 (0.728)	Data Time 0.003 (0.053)	Loss 2.1704 (2.1607)	Entropy 0.67519 (0.67593)	Top-1 acc 72.266 (72.636)	Top-5 acc 89.062 (89.191)	lr 0.00041
Train [111][780/3239]	Time 2.541 (0.725)	Data Time 0.002 (0.052)	Loss 2.1995 (2.1609)	Entropy 0.67519 (0.67592)	Top-1 acc 72.656 (72.637)	Top-5 acc 90.234 (89.196)	lr 0.00041
Train [111][790/3239]	Time 0.240 (0.719)	Data Time 0.002 (0.052)	Loss 1.9508 (2.1608)	Entropy 0.67514 (0.67591)	Top-1 acc 77.734 (72.648)	Top-5 acc 93.750 (89.193)	lr 0.00041
Train [111][800/3239]	Time 0.240 (0.716)	Data Time 0.001 (0.051)	Loss 2.2637 (2.1606)	Entropy 0.67501 (0.67590)	Top-1 acc 68.750 (72.650)	Top-5 acc 86.719 (89.198)	lr 0.00041
Train [111][810/3239]	Time 0.247 (0.713)	Data Time 0.001 (0.051)	Loss 2.1960 (2.1605)	Entropy 0.67497 (0.67588)	Top-1 acc 71.875 (72.650)	Top-5 acc 89.453 (89.204)	lr 0.00041
Train [111][820/3239]	Time 0.237 (0.710)	Data Time 0.001 (0.050)	Loss 2.2052 (2.1606)	Entropy 0.67499 (0.67587)	Top-1 acc 71.094 (72.651)	Top-5 acc 89.062 (89.201)	lr 0.00040
Train [111][830/3239]	Time 0.225 (0.708)	Data Time 0.001 (0.049)	Loss 2.2420 (2.1608)	Entropy 0.67494 (0.67586)	Top-1 acc 71.484 (72.640)	Top-5 acc 88.672 (89.196)	lr 0.00040
Train [111][840/3239]	Time 0.225 (0.705)	Data Time 0.001 (0.049)	Loss 2.1251 (2.1605)	Entropy 0.67492 (0.67585)	Top-1 acc 73.047 (72.652)	Top-5 acc 89.844 (89.200)	lr 0.00040
Train [111][850/3239]	Time 0.228 (0.702)	Data Time 0.001 (0.048)	Loss 2.1378 (2.1604)	Entropy 0.67486 (0.67584)	Top-1 acc 74.609 (72.671)	Top-5 acc 88.281 (89.195)	lr 0.00040
Train [111][860/3239]	Time 0.232 (0.700)	Data Time 0.001 (0.048)	Loss 2.0104 (2.1596)	Entropy 0.67477 (0.67583)	Top-1 acc 75.781 (72.691)	Top-5 acc 91.406 (89.206)	lr 0.00040
Train [111][870/3239]	Time 0.244 (0.697)	Data Time 0.001 (0.047)	Loss 2.0414 (2.1591)	Entropy 0.67463 (0.67582)	Top-1 acc 73.828 (72.694)	Top-5 acc 90.234 (89.212)	lr 0.00040
Train [111][880/3239]	Time 0.234 (0.695)	Data Time 0.001 (0.047)	Loss 2.1806 (2.1594)	Entropy 0.67441 (0.67580)	Top-1 acc 73.828 (72.685)	Top-5 acc 88.672 (89.214)	lr 0.00040
Train [111][890/3239]	Time 2.570 (0.692)	Data Time 0.001 (0.046)	Loss 2.1638 (2.1592)	Entropy 0.67441 (0.67579)	Top-1 acc 70.703 (72.694)	Top-5 acc 90.625 (89.216)	lr 0.00040
Train [111][900/3239]	Time 0.325 (0.688)	Data Time 0.001 (0.046)	Loss 2.2448 (2.1597)	Entropy 0.67433 (0.67577)	Top-1 acc 71.875 (72.680)	Top-5 acc 88.672 (89.205)	lr 0.00040
Train [111][910/3239]	Time 0.237 (0.685)	Data Time 0.001 (0.045)	Loss 2.1877 (2.1597)	Entropy 0.67432 (0.67575)	Top-1 acc 74.609 (72.685)	Top-5 acc 89.453 (89.201)	lr 0.00040
Train [111][920/3239]	Time 0.229 (0.683)	Data Time 0.001 (0.045)	Loss 2.0580 (2.1596)	Entropy 0.67425 (0.67574)	Top-1 acc 75.000 (72.690)	Top-5 acc 91.016 (89.204)	lr 0.00040
Train [111][930/3239]	Time 0.215 (0.681)	Data Time 0.001 (0.044)	Loss 2.1906 (2.1597)	Entropy 0.67425 (0.67572)	Top-1 acc 75.391 (72.694)	Top-5 acc 87.500 (89.199)	lr 0.00040
Train [111][940/3239]	Time 0.318 (0.679)	Data Time 0.001 (0.044)	Loss 2.1110 (2.1595)	Entropy 0.67422 (0.67571)	Top-1 acc 73.828 (72.692)	Top-5 acc 91.406 (89.204)	lr 0.00040
Train [111][950/3239]	Time 0.240 (0.677)	Data Time 0.001 (0.043)	Loss 2.0858 (2.1591)	Entropy 0.67423 (0.67569)	Top-1 acc 75.000 (72.703)	Top-5 acc 90.625 (89.204)	lr 0.00040
Train [111][960/3239]	Time 0.226 (0.675)	Data Time 0.001 (0.043)	Loss 2.4384 (2.1594)	Entropy 0.67424 (0.67568)	Top-1 acc 64.453 (72.701)	Top-5 acc 82.812 (89.194)	lr 0.00040
Train [111][970/3239]	Time 0.237 (0.673)	Data Time 0.001 (0.042)	Loss 2.0501 (2.1594)	Entropy 0.67424 (0.67566)	Top-1 acc 77.344 (72.700)	Top-5 acc 89.453 (89.188)	lr 0.00040
Train [111][980/3239]	Time 0.224 (0.671)	Data Time 0.001 (0.042)	Loss 2.0336 (2.1593)	Entropy 0.67419 (0.67565)	Top-1 acc 73.828 (72.704)	Top-5 acc 91.406 (89.182)	lr 0.00040
Train [111][990/3239]	Time 0.291 (0.669)	Data Time 0.001 (0.042)	Loss 2.1844 (2.1593)	Entropy 0.67415 (0.67563)	Top-1 acc 71.094 (72.706)	Top-5 acc 88.281 (89.181)	lr 0.00040
Train [111][1000/3239]	Time 2.519 (0.667)	Data Time 0.001 (0.041)	Loss 2.1935 (2.1596)	Entropy 0.67415 (0.67562)	Top-1 acc 71.484 (72.697)	Top-5 acc 87.500 (89.176)	lr 0.00040
Train [111][1010/3239]	Time 0.297 (0.663)	Data Time 0.001 (0.041)	Loss 2.2881 (2.1592)	Entropy 0.67410 (0.67560)	Top-1 acc 70.312 (72.706)	Top-5 acc 86.328 (89.184)	lr 0.00040
Train [111][1020/3239]	Time 0.243 (0.661)	Data Time 0.001 (0.040)	Loss 2.1758 (2.1595)	Entropy 0.67411 (0.67559)	Top-1 acc 73.828 (72.703)	Top-5 acc 88.672 (89.178)	lr 0.00040
Train [111][1030/3239]	Time 0.233 (0.660)	Data Time 0.001 (0.040)	Loss 2.1630 (2.1592)	Entropy 0.67413 (0.67557)	Top-1 acc 69.922 (72.704)	Top-5 acc 90.625 (89.186)	lr 0.00040
Train [111][1040/3239]	Time 0.225 (0.658)	Data Time 0.001 (0.040)	Loss 2.1167 (2.1594)	Entropy 0.67413 (0.67556)	Top-1 acc 73.828 (72.704)	Top-5 acc 92.578 (89.186)	lr 0.00040
Train [111][1050/3239]	Time 0.232 (0.656)	Data Time 0.001 (0.039)	Loss 2.1398 (2.1591)	Entropy 0.67407 (0.67555)	Top-1 acc 71.484 (72.706)	Top-5 acc 90.625 (89.195)	lr 0.00040
Train [111][1060/3239]	Time 0.246 (0.655)	Data Time 0.001 (0.039)	Loss 1.9797 (2.1587)	Entropy 0.67409 (0.67553)	Top-1 acc 75.391 (72.715)	Top-5 acc 94.141 (89.201)	lr 0.00040
Train [111][1070/3239]	Time 0.334 (0.653)	Data Time 0.001 (0.039)	Loss 2.0697 (2.1590)	Entropy 0.67403 (0.67552)	Top-1 acc 75.000 (72.708)	Top-5 acc 89.062 (89.197)	lr 0.00040
Train [111][1080/3239]	Time 0.227 (0.652)	Data Time 0.001 (0.038)	Loss 2.1293 (2.1591)	Entropy 0.67393 (0.67550)	Top-1 acc 73.047 (72.700)	Top-5 acc 88.672 (89.193)	lr 0.00040
Train [111][1090/3239]	Time 0.232 (0.650)	Data Time 0.001 (0.038)	Loss 2.0466 (2.1595)	Entropy 0.67390 (0.67549)	Top-1 acc 77.734 (72.695)	Top-5 acc 90.234 (89.187)	lr 0.00040
Train [111][1100/3239]	Time 0.226 (0.649)	Data Time 0.001 (0.038)	Loss 2.1403 (2.1595)	Entropy 0.67383 (0.67547)	Top-1 acc 75.391 (72.705)	Top-5 acc 90.625 (89.192)	lr 0.00040
Train [111][1110/3239]	Time 2.767 (0.647)	Data Time 0.001 (0.037)	Loss 2.1262 (2.1590)	Entropy 0.67383 (0.67546)	Top-1 acc 73.438 (72.718)	Top-5 acc 90.234 (89.197)	lr 0.00040
Train [111][1120/3239]	Time 0.230 (0.644)	Data Time 0.001 (0.037)	Loss 2.0524 (2.1586)	Entropy 0.67376 (0.67544)	Top-1 acc 76.172 (72.734)	Top-5 acc 90.234 (89.202)	lr 0.00040
Train [111][1130/3239]	Time 0.229 (0.642)	Data Time 0.001 (0.037)	Loss 2.1071 (2.1587)	Entropy 0.67378 (0.67543)	Top-1 acc 72.656 (72.722)	Top-5 acc 91.406 (89.206)	lr 0.00040
Train [111][1140/3239]	Time 0.266 (0.688)	Data Time 0.002 (0.036)	Loss 2.2086 (2.1589)	Entropy 0.67369 (0.67541)	Top-1 acc 70.312 (72.722)	Top-5 acc 90.234 (89.206)	lr 0.00040
Train [111][1150/3239]	Time 0.228 (0.687)	Data Time 0.002 (0.036)	Loss 2.1667 (2.1587)	Entropy 0.67366 (0.67540)	Top-1 acc 74.609 (72.723)	Top-5 acc 87.500 (89.207)	lr 0.00040
Train [111][1160/3239]	Time 0.247 (0.685)	Data Time 0.001 (0.036)	Loss 2.2028 (2.1588)	Entropy 0.67366 (0.67538)	Top-1 acc 73.828 (72.722)	Top-5 acc 88.672 (89.206)	lr 0.00040
Train [111][1170/3239]	Time 0.222 (0.683)	Data Time 0.001 (0.035)	Loss 2.1802 (2.1587)	Entropy 0.67359 (0.67537)	Top-1 acc 73.828 (72.727)	Top-5 acc 88.672 (89.204)	lr 0.00040
Train [111][1180/3239]	Time 0.231 (0.681)	Data Time 0.001 (0.035)	Loss 2.0334 (2.1588)	Entropy 0.67351 (0.67535)	Top-1 acc 72.656 (72.726)	Top-5 acc 92.188 (89.204)	lr 0.00040
Train [111][1190/3239]	Time 0.251 (0.680)	Data Time 0.001 (0.035)	Loss 2.1619 (2.1591)	Entropy 0.67352 (0.67534)	Top-1 acc 73.828 (72.720)	Top-5 acc 87.891 (89.199)	lr 0.00040
Train [111][1200/3239]	Time 0.248 (0.678)	Data Time 0.001 (0.035)	Loss 2.2550 (2.1590)	Entropy 0.67355 (0.67532)	Top-1 acc 71.094 (72.718)	Top-5 acc 89.062 (89.204)	lr 0.00040
Train [111][1210/3239]	Time 0.264 (0.676)	Data Time 0.001 (0.034)	Loss 2.1625 (2.1589)	Entropy 0.67358 (0.67531)	Top-1 acc 69.922 (72.716)	Top-5 acc 89.844 (89.205)	lr 0.00039
Train [111][1220/3239]	Time 2.497 (0.675)	Data Time 0.002 (0.034)	Loss 2.2734 (2.1586)	Entropy 0.67358 (0.67529)	Top-1 acc 66.406 (72.723)	Top-5 acc 86.328 (89.212)	lr 0.00039
Train [111][1230/3239]	Time 0.249 (0.671)	Data Time 0.001 (0.034)	Loss 2.2070 (2.1585)	Entropy 0.67360 (0.67528)	Top-1 acc 70.703 (72.731)	Top-5 acc 89.062 (89.212)	lr 0.00039
Train [111][1240/3239]	Time 0.228 (0.670)	Data Time 0.001 (0.034)	Loss 2.2309 (2.1586)	Entropy 0.67357 (0.67527)	Top-1 acc 71.875 (72.731)	Top-5 acc 87.891 (89.215)	lr 0.00039
Train [111][1250/3239]	Time 0.233 (0.668)	Data Time 0.001 (0.033)	Loss 2.2256 (2.1589)	Entropy 0.67350 (0.67525)	Top-1 acc 67.969 (72.724)	Top-5 acc 88.672 (89.206)	lr 0.00039
Train [111][1260/3239]	Time 0.239 (0.667)	Data Time 0.001 (0.033)	Loss 2.1626 (2.1590)	Entropy 0.67363 (0.67524)	Top-1 acc 72.266 (72.722)	Top-5 acc 88.672 (89.202)	lr 0.00039
Train [111][1270/3239]	Time 0.237 (0.665)	Data Time 0.001 (0.033)	Loss 2.2093 (2.1593)	Entropy 0.67360 (0.67523)	Top-1 acc 73.047 (72.723)	Top-5 acc 86.328 (89.194)	lr 0.00039
Train [111][1280/3239]	Time 0.235 (0.664)	Data Time 0.001 (0.033)	Loss 2.2256 (2.1592)	Entropy 0.67365 (0.67521)	Top-1 acc 71.094 (72.731)	Top-5 acc 87.500 (89.192)	lr 0.00039
Train [111][1290/3239]	Time 0.257 (0.663)	Data Time 0.001 (0.032)	Loss 2.2991 (2.1596)	Entropy 0.67368 (0.67520)	Top-1 acc 71.094 (72.724)	Top-5 acc 86.328 (89.186)	lr 0.00039
Train [111][1300/3239]	Time 0.235 (0.661)	Data Time 0.001 (0.032)	Loss 2.0415 (2.1593)	Entropy 0.67329 (0.67519)	Top-1 acc 76.172 (72.736)	Top-5 acc 89.062 (89.190)	lr 0.00039
Train [111][1310/3239]	Time 0.218 (0.660)	Data Time 0.001 (0.032)	Loss 2.2326 (2.1593)	Entropy 0.67328 (0.67518)	Top-1 acc 71.094 (72.736)	Top-5 acc 87.891 (89.184)	lr 0.00039
Train [111][1320/3239]	Time 0.350 (0.659)	Data Time 0.002 (0.032)	Loss 2.0696 (2.1590)	Entropy 0.67326 (0.67516)	Top-1 acc 69.922 (72.735)	Top-5 acc 89.844 (89.190)	lr 0.00039
Train [111][1330/3239]	Time 2.584 (0.657)	Data Time 0.001 (0.031)	Loss 2.2414 (2.1592)	Entropy 0.67326 (0.67515)	Top-1 acc 71.484 (72.734)	Top-5 acc 87.109 (89.186)	lr 0.00039
Train [111][1340/3239]	Time 0.221 (0.654)	Data Time 0.001 (0.031)	Loss 2.1790 (2.1594)	Entropy 0.67326 (0.67513)	Top-1 acc 73.438 (72.735)	Top-5 acc 89.844 (89.185)	lr 0.00039
Train [111][1350/3239]	Time 0.237 (0.653)	Data Time 0.001 (0.031)	Loss 2.2024 (2.1595)	Entropy 0.67326 (0.67512)	Top-1 acc 67.188 (72.724)	Top-5 acc 88.281 (89.187)	lr 0.00039
Train [111][1360/3239]	Time 0.334 (0.652)	Data Time 0.001 (0.031)	Loss 2.1695 (2.1597)	Entropy 0.67329 (0.67511)	Top-1 acc 70.312 (72.719)	Top-5 acc 91.016 (89.183)	lr 0.00039
Train [111][1370/3239]	Time 0.239 (0.650)	Data Time 0.001 (0.031)	Loss 2.2151 (2.1593)	Entropy 0.67329 (0.67509)	Top-1 acc 70.312 (72.728)	Top-5 acc 87.891 (89.187)	lr 0.00039
Train [111][1380/3239]	Time 0.228 (0.649)	Data Time 0.001 (0.030)	Loss 1.9015 (2.1594)	Entropy 0.67323 (0.67508)	Top-1 acc 80.078 (72.720)	Top-5 acc 92.578 (89.188)	lr 0.00039
Train [111][1390/3239]	Time 0.230 (0.648)	Data Time 0.001 (0.030)	Loss 2.0932 (2.1594)	Entropy 0.67324 (0.67507)	Top-1 acc 78.516 (72.722)	Top-5 acc 90.234 (89.184)	lr 0.00039
Train [111][1400/3239]	Time 0.266 (0.647)	Data Time 0.001 (0.030)	Loss 2.2284 (2.1596)	Entropy 0.67311 (0.67505)	Top-1 acc 68.359 (72.712)	Top-5 acc 88.672 (89.181)	lr 0.00039
Train [111][1410/3239]	Time 0.230 (0.645)	Data Time 0.001 (0.030)	Loss 2.0931 (2.1595)	Entropy 0.67311 (0.67504)	Top-1 acc 77.734 (72.718)	Top-5 acc 88.281 (89.179)	lr 0.00039
Train [111][1420/3239]	Time 0.239 (0.644)	Data Time 0.001 (0.029)	Loss 2.1555 (2.1595)	Entropy 0.67312 (0.67502)	Top-1 acc 73.828 (72.719)	Top-5 acc 89.062 (89.181)	lr 0.00039
Train [111][1430/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.029)	Loss 2.1672 (2.1595)	Entropy 0.67310 (0.67501)	Top-1 acc 74.609 (72.721)	Top-5 acc 88.281 (89.179)	lr 0.00039
Train [111][1440/3239]	Time 2.681 (0.642)	Data Time 0.001 (0.029)	Loss 2.2235 (2.1596)	Entropy 0.67310 (0.67500)	Top-1 acc 70.312 (72.719)	Top-5 acc 87.500 (89.177)	lr 0.00039
Train [111][1450/3239]	Time 0.217 (0.639)	Data Time 0.001 (0.029)	Loss 2.1392 (2.1597)	Entropy 0.67307 (0.67499)	Top-1 acc 70.703 (72.722)	Top-5 acc 89.453 (89.179)	lr 0.00039
Train [111][1460/3239]	Time 0.231 (0.638)	Data Time 0.001 (0.029)	Loss 2.2610 (2.1596)	Entropy 0.67301 (0.67497)	Top-1 acc 68.750 (72.724)	Top-5 acc 88.281 (89.178)	lr 0.00039
Train [111][1470/3239]	Time 0.242 (0.637)	Data Time 0.002 (0.029)	Loss 2.0169 (2.1596)	Entropy 0.67303 (0.67496)	Top-1 acc 73.047 (72.724)	Top-5 acc 91.406 (89.177)	lr 0.00039
Train [111][1480/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.028)	Loss 2.2554 (2.1601)	Entropy 0.67298 (0.67495)	Top-1 acc 72.266 (72.719)	Top-5 acc 85.938 (89.172)	lr 0.00039
Train [111][1490/3239]	Time 0.236 (0.635)	Data Time 0.001 (0.028)	Loss 2.0967 (2.1599)	Entropy 0.67298 (0.67493)	Top-1 acc 73.828 (72.729)	Top-5 acc 89.844 (89.174)	lr 0.00039
Train [111][1500/3239]	Time 0.254 (0.672)	Data Time 0.002 (0.028)	Loss 2.2785 (2.1598)	Entropy 0.67286 (0.67492)	Top-1 acc 69.922 (72.735)	Top-5 acc 86.328 (89.174)	lr 0.00039
Train [111][1510/3239]	Time 0.226 (0.671)	Data Time 0.002 (0.028)	Loss 2.0917 (2.1597)	Entropy 0.67285 (0.67490)	Top-1 acc 74.219 (72.735)	Top-5 acc 91.406 (89.174)	lr 0.00039
Train [111][1520/3239]	Time 0.238 (0.670)	Data Time 0.001 (0.028)	Loss 2.2447 (2.1596)	Entropy 0.67302 (0.67489)	Top-1 acc 71.094 (72.737)	Top-5 acc 87.109 (89.178)	lr 0.00039
Train [111][1530/3239]	Time 0.273 (0.669)	Data Time 0.001 (0.027)	Loss 2.0663 (2.1593)	Entropy 0.67296 (0.67488)	Top-1 acc 76.953 (72.749)	Top-5 acc 89.844 (89.183)	lr 0.00039
Train [111][1540/3239]	Time 0.234 (0.668)	Data Time 0.001 (0.027)	Loss 2.1132 (2.1592)	Entropy 0.67301 (0.67487)	Top-1 acc 70.703 (72.747)	Top-5 acc 90.234 (89.185)	lr 0.00039
Train [111][1550/3239]	Time 2.621 (0.666)	Data Time 0.001 (0.027)	Loss 2.2271 (2.1592)	Entropy 0.67301 (0.67485)	Top-1 acc 71.484 (72.747)	Top-5 acc 87.109 (89.188)	lr 0.00039
Train [111][1560/3239]	Time 0.240 (0.664)	Data Time 0.001 (0.027)	Loss 2.1274 (2.1590)	Entropy 0.67292 (0.67484)	Top-1 acc 75.391 (72.757)	Top-5 acc 89.844 (89.196)	lr 0.00039
Train [111][1570/3239]	Time 0.330 (0.662)	Data Time 0.001 (0.027)	Loss 2.1954 (2.1592)	Entropy 0.67287 (0.67483)	Top-1 acc 69.922 (72.748)	Top-5 acc 88.281 (89.192)	lr 0.00039
Train [111][1580/3239]	Time 0.236 (0.661)	Data Time 0.001 (0.027)	Loss 2.3056 (2.1593)	Entropy 0.67287 (0.67482)	Top-1 acc 66.797 (72.744)	Top-5 acc 87.109 (89.184)	lr 0.00039
Train [111][1590/3239]	Time 0.228 (0.660)	Data Time 0.001 (0.027)	Loss 2.2607 (2.1594)	Entropy 0.67269 (0.67480)	Top-1 acc 69.922 (72.744)	Top-5 acc 86.719 (89.182)	lr 0.00039
Train [111][1600/3239]	Time 0.223 (0.659)	Data Time 0.001 (0.026)	Loss 2.0856 (2.1591)	Entropy 0.67273 (0.67479)	Top-1 acc 73.047 (72.753)	Top-5 acc 91.406 (89.186)	lr 0.00039
Train [111][1610/3239]	Time 0.326 (0.658)	Data Time 0.001 (0.026)	Loss 2.0950 (2.1591)	Entropy 0.67269 (0.67478)	Top-1 acc 76.953 (72.751)	Top-5 acc 90.625 (89.190)	lr 0.00038
Train [111][1620/3239]	Time 0.240 (0.657)	Data Time 0.001 (0.026)	Loss 2.1058 (2.1593)	Entropy 0.67266 (0.67477)	Top-1 acc 71.484 (72.744)	Top-5 acc 90.625 (89.185)	lr 0.00038
Train [111][1630/3239]	Time 0.236 (0.656)	Data Time 0.001 (0.026)	Loss 2.1966 (2.1593)	Entropy 0.67265 (0.67475)	Top-1 acc 72.266 (72.748)	Top-5 acc 89.062 (89.186)	lr 0.00038
Train [111][1640/3239]	Time 0.230 (0.655)	Data Time 0.001 (0.026)	Loss 2.2903 (2.1592)	Entropy 0.67270 (0.67474)	Top-1 acc 71.094 (72.756)	Top-5 acc 86.719 (89.185)	lr 0.00038
Train [111][1650/3239]	Time 0.359 (0.654)	Data Time 0.001 (0.026)	Loss 2.1134 (2.1593)	Entropy 0.67270 (0.67473)	Top-1 acc 75.781 (72.758)	Top-5 acc 88.672 (89.182)	lr 0.00038
Train [111][1660/3239]	Time 2.544 (0.653)	Data Time 0.001 (0.025)	Loss 2.2401 (2.1596)	Entropy 0.67270 (0.67472)	Top-1 acc 73.438 (72.746)	Top-5 acc 85.547 (89.175)	lr 0.00038
Train [111][1670/3239]	Time 0.228 (0.650)	Data Time 0.001 (0.025)	Loss 2.0070 (2.1595)	Entropy 0.67256 (0.67470)	Top-1 acc 76.562 (72.753)	Top-5 acc 91.406 (89.181)	lr 0.00038
Train [111][1680/3239]	Time 0.297 (0.650)	Data Time 0.001 (0.025)	Loss 2.2733 (2.1598)	Entropy 0.67251 (0.67469)	Top-1 acc 69.531 (72.742)	Top-5 acc 87.891 (89.178)	lr 0.00038
Train [111][1690/3239]	Time 0.262 (0.648)	Data Time 0.001 (0.025)	Loss 2.1155 (2.1597)	Entropy 0.67252 (0.67468)	Top-1 acc 74.219 (72.746)	Top-5 acc 91.797 (89.176)	lr 0.00038
Train [111][1700/3239]	Time 0.217 (0.647)	Data Time 0.001 (0.025)	Loss 2.2269 (2.1597)	Entropy 0.67253 (0.67466)	Top-1 acc 69.922 (72.749)	Top-5 acc 87.891 (89.174)	lr 0.00038
Train [111][1710/3239]	Time 0.233 (0.646)	Data Time 0.001 (0.025)	Loss 2.2652 (2.1599)	Entropy 0.67250 (0.67465)	Top-1 acc 71.094 (72.741)	Top-5 acc 86.719 (89.168)	lr 0.00038
Train [111][1720/3239]	Time 0.236 (0.646)	Data Time 0.001 (0.025)	Loss 2.0663 (2.1598)	Entropy 0.67247 (0.67464)	Top-1 acc 76.172 (72.751)	Top-5 acc 91.016 (89.172)	lr 0.00038
Train [111][1730/3239]	Time 0.247 (0.645)	Data Time 0.001 (0.024)	Loss 2.2024 (2.1600)	Entropy 0.67237 (0.67463)	Top-1 acc 70.312 (72.745)	Top-5 acc 91.016 (89.172)	lr 0.00038
Train [111][1740/3239]	Time 0.232 (0.644)	Data Time 0.001 (0.024)	Loss 2.0435 (2.1598)	Entropy 0.67230 (0.67461)	Top-1 acc 76.562 (72.750)	Top-5 acc 88.672 (89.172)	lr 0.00038
Train [111][1750/3239]	Time 0.230 (0.643)	Data Time 0.001 (0.024)	Loss 2.1374 (2.1599)	Entropy 0.67231 (0.67460)	Top-1 acc 75.000 (72.747)	Top-5 acc 89.844 (89.170)	lr 0.00038
Train [111][1760/3239]	Time 0.229 (0.642)	Data Time 0.001 (0.024)	Loss 2.1321 (2.1598)	Entropy 0.67229 (0.67459)	Top-1 acc 73.438 (72.751)	Top-5 acc 89.453 (89.169)	lr 0.00038
Train [111][1770/3239]	Time 2.639 (0.641)	Data Time 0.001 (0.024)	Loss 2.3728 (2.1599)	Entropy 0.67229 (0.67457)	Top-1 acc 69.922 (72.748)	Top-5 acc 86.719 (89.169)	lr 0.00038
Train [111][1780/3239]	Time 0.254 (0.639)	Data Time 0.001 (0.024)	Loss 2.1184 (2.1598)	Entropy 0.67233 (0.67456)	Top-1 acc 70.703 (72.750)	Top-5 acc 91.797 (89.172)	lr 0.00038
Train [111][1790/3239]	Time 0.241 (0.638)	Data Time 0.001 (0.024)	Loss 2.1164 (2.1598)	Entropy 0.67227 (0.67455)	Top-1 acc 73.047 (72.747)	Top-5 acc 89.453 (89.175)	lr 0.00038
Train [111][1800/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.024)	Loss 2.2441 (2.1601)	Entropy 0.67225 (0.67454)	Top-1 acc 69.531 (72.735)	Top-5 acc 87.109 (89.172)	lr 0.00038
Train [111][1810/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.023)	Loss 2.0547 (2.1600)	Entropy 0.67220 (0.67452)	Top-1 acc 76.953 (72.738)	Top-5 acc 92.578 (89.176)	lr 0.00038
Train [111][1820/3239]	Time 0.244 (0.635)	Data Time 0.001 (0.023)	Loss 2.1037 (2.1599)	Entropy 0.67216 (0.67451)	Top-1 acc 71.875 (72.743)	Top-5 acc 91.797 (89.179)	lr 0.00038
Train [111][1830/3239]	Time 0.231 (0.635)	Data Time 0.001 (0.023)	Loss 2.6190 (2.1601)	Entropy 0.67204 (0.67450)	Top-1 acc 63.281 (72.734)	Top-5 acc 84.766 (89.174)	lr 0.00038
Train [111][1840/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.023)	Loss 2.1948 (2.1601)	Entropy 0.67197 (0.67448)	Top-1 acc 73.438 (72.739)	Top-5 acc 87.891 (89.175)	lr 0.00038
Train [111][1850/3239]	Time 0.222 (0.633)	Data Time 0.001 (0.023)	Loss 2.2561 (2.1603)	Entropy 0.67198 (0.67447)	Top-1 acc 72.266 (72.736)	Top-5 acc 84.375 (89.171)	lr 0.00038
Train [111][1860/3239]	Time 0.434 (0.660)	Data Time 0.002 (0.023)	Loss 2.2667 (2.1602)	Entropy 0.67196 (0.67446)	Top-1 acc 68.750 (72.738)	Top-5 acc 86.719 (89.174)	lr 0.00038
Train [111][1870/3239]	Time 0.234 (0.660)	Data Time 0.002 (0.023)	Loss 2.1065 (2.1602)	Entropy 0.67194 (0.67444)	Top-1 acc 72.656 (72.736)	Top-5 acc 89.453 (89.172)	lr 0.00038
Train [111][1880/3239]	Time 2.552 (0.659)	Data Time 0.002 (0.023)	Loss 2.0755 (2.1601)	Entropy 0.67194 (0.67443)	Top-1 acc 74.219 (72.739)	Top-5 acc 89.453 (89.175)	lr 0.00038
Train [111][1890/3239]	Time 0.249 (0.657)	Data Time 0.001 (0.023)	Loss 1.9581 (2.1602)	Entropy 0.67191 (0.67442)	Top-1 acc 78.125 (72.734)	Top-5 acc 92.188 (89.174)	lr 0.00038
Train [111][1900/3239]	Time 0.235 (0.656)	Data Time 0.001 (0.022)	Loss 2.2583 (2.1600)	Entropy 0.67191 (0.67440)	Top-1 acc 67.188 (72.730)	Top-5 acc 87.500 (89.176)	lr 0.00038
Train [111][1910/3239]	Time 0.248 (0.655)	Data Time 0.001 (0.022)	Loss 2.2406 (2.1601)	Entropy 0.67187 (0.67439)	Top-1 acc 73.438 (72.732)	Top-5 acc 87.109 (89.176)	lr 0.00038
Train [111][1920/3239]	Time 0.255 (0.654)	Data Time 0.002 (0.022)	Loss 2.1706 (2.1600)	Entropy 0.67171 (0.67438)	Top-1 acc 71.875 (72.730)	Top-5 acc 91.016 (89.178)	lr 0.00038
Train [111][1930/3239]	Time 0.242 (0.653)	Data Time 0.001 (0.022)	Loss 2.1482 (2.1604)	Entropy 0.67170 (0.67436)	Top-1 acc 71.875 (72.717)	Top-5 acc 90.234 (89.170)	lr 0.00038
Train [111][1940/3239]	Time 0.323 (0.652)	Data Time 0.001 (0.022)	Loss 2.1342 (2.1603)	Entropy 0.67170 (0.67435)	Top-1 acc 73.438 (72.719)	Top-5 acc 87.500 (89.170)	lr 0.00038
Train [111][1950/3239]	Time 0.222 (0.651)	Data Time 0.001 (0.022)	Loss 2.0630 (2.1603)	Entropy 0.67174 (0.67434)	Top-1 acc 75.391 (72.716)	Top-5 acc 91.016 (89.172)	lr 0.00038
Train [111][1960/3239]	Time 0.248 (0.651)	Data Time 0.002 (0.022)	Loss 2.0388 (2.1603)	Entropy 0.67171 (0.67432)	Top-1 acc 73.828 (72.713)	Top-5 acc 92.578 (89.172)	lr 0.00038
Train [111][1970/3239]	Time 0.229 (0.650)	Data Time 0.001 (0.022)	Loss 1.9955 (2.1601)	Entropy 0.67170 (0.67431)	Top-1 acc 76.953 (72.716)	Top-5 acc 92.969 (89.176)	lr 0.00038
Train [111][1980/3239]	Time 0.347 (0.649)	Data Time 0.001 (0.022)	Loss 2.0926 (2.1600)	Entropy 0.67176 (0.67430)	Top-1 acc 75.391 (72.718)	Top-5 acc 89.453 (89.177)	lr 0.00038
Train [111][1990/3239]	Time 2.553 (0.648)	Data Time 0.001 (0.021)	Loss 2.1300 (2.1600)	Entropy 0.67176 (0.67428)	Top-1 acc 74.609 (72.719)	Top-5 acc 91.406 (89.181)	lr 0.00038
Train [111][2000/3239]	Time 0.303 (0.646)	Data Time 0.001 (0.021)	Loss 2.1077 (2.1600)	Entropy 0.67170 (0.67427)	Top-1 acc 73.438 (72.719)	Top-5 acc 91.016 (89.182)	lr 0.00038
Train [111][2010/3239]	Time 0.253 (0.645)	Data Time 0.001 (0.021)	Loss 2.0847 (2.1598)	Entropy 0.67160 (0.67426)	Top-1 acc 74.219 (72.723)	Top-5 acc 91.406 (89.184)	lr 0.00037
Train [111][2020/3239]	Time 0.236 (0.645)	Data Time 0.001 (0.021)	Loss 2.2914 (2.1597)	Entropy 0.67154 (0.67424)	Top-1 acc 69.922 (72.722)	Top-5 acc 88.281 (89.188)	lr 0.00037
Train [111][2030/3239]	Time 0.238 (0.644)	Data Time 0.001 (0.021)	Loss 2.1075 (2.1596)	Entropy 0.67157 (0.67423)	Top-1 acc 73.438 (72.728)	Top-5 acc 89.844 (89.188)	lr 0.00037
Train [111][2040/3239]	Time 0.231 (0.643)	Data Time 0.002 (0.021)	Loss 2.1906 (2.1596)	Entropy 0.67155 (0.67422)	Top-1 acc 73.047 (72.728)	Top-5 acc 88.672 (89.189)	lr 0.00037
Train [111][2050/3239]	Time 0.236 (0.642)	Data Time 0.001 (0.021)	Loss 2.1826 (2.1594)	Entropy 0.67150 (0.67420)	Top-1 acc 74.609 (72.729)	Top-5 acc 88.281 (89.195)	lr 0.00037
Train [111][2060/3239]	Time 0.243 (0.641)	Data Time 0.001 (0.021)	Loss 2.1705 (2.1596)	Entropy 0.67152 (0.67419)	Top-1 acc 71.875 (72.724)	Top-5 acc 88.281 (89.194)	lr 0.00037
Train [111][2070/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.021)	Loss 2.1904 (2.1597)	Entropy 0.67149 (0.67418)	Top-1 acc 73.438 (72.721)	Top-5 acc 89.844 (89.190)	lr 0.00037
Train [111][2080/3239]	Time 0.213 (0.640)	Data Time 0.001 (0.021)	Loss 2.1247 (2.1597)	Entropy 0.67147 (0.67416)	Top-1 acc 73.047 (72.722)	Top-5 acc 89.453 (89.190)	lr 0.00037
Train [111][2090/3239]	Time 0.241 (0.639)	Data Time 0.001 (0.021)	Loss 2.1032 (2.1596)	Entropy 0.67145 (0.67415)	Top-1 acc 74.609 (72.723)	Top-5 acc 90.234 (89.191)	lr 0.00037
Train [111][2100/3239]	Time 2.558 (0.638)	Data Time 0.001 (0.020)	Loss 2.3084 (2.1597)	Entropy 0.67145 (0.67414)	Top-1 acc 70.703 (72.719)	Top-5 acc 86.719 (89.188)	lr 0.00037
Train [111][2110/3239]	Time 0.272 (0.637)	Data Time 0.001 (0.020)	Loss 2.0736 (2.1594)	Entropy 0.67153 (0.67413)	Top-1 acc 74.609 (72.725)	Top-5 acc 89.844 (89.188)	lr 0.00037
Train [111][2120/3239]	Time 0.236 (0.636)	Data Time 0.001 (0.020)	Loss 2.1162 (2.1597)	Entropy 0.67152 (0.67411)	Top-1 acc 71.484 (72.717)	Top-5 acc 91.016 (89.182)	lr 0.00037
Train [111][2130/3239]	Time 0.231 (0.635)	Data Time 0.001 (0.020)	Loss 2.0847 (2.1598)	Entropy 0.67150 (0.67410)	Top-1 acc 74.219 (72.717)	Top-5 acc 91.406 (89.182)	lr 0.00037
Train [111][2140/3239]	Time 0.258 (0.634)	Data Time 0.001 (0.020)	Loss 2.1459 (2.1597)	Entropy 0.67172 (0.67409)	Top-1 acc 74.219 (72.721)	Top-5 acc 89.062 (89.183)	lr 0.00037
Train [111][2150/3239]	Time 0.332 (0.634)	Data Time 0.001 (0.020)	Loss 2.2220 (2.1598)	Entropy 0.67169 (0.67408)	Top-1 acc 72.266 (72.723)	Top-5 acc 88.672 (89.182)	lr 0.00037
Train [111][2160/3239]	Time 0.231 (0.633)	Data Time 0.001 (0.020)	Loss 2.2058 (2.1600)	Entropy 0.67167 (0.67407)	Top-1 acc 71.484 (72.720)	Top-5 acc 88.672 (89.176)	lr 0.00037
Train [111][2170/3239]	Time 0.235 (0.632)	Data Time 0.001 (0.020)	Loss 2.0038 (2.1599)	Entropy 0.67166 (0.67406)	Top-1 acc 77.734 (72.723)	Top-5 acc 92.969 (89.179)	lr 0.00037
Train [111][2180/3239]	Time 0.246 (0.632)	Data Time 0.001 (0.020)	Loss 2.1305 (2.1600)	Entropy 0.67166 (0.67405)	Top-1 acc 73.438 (72.719)	Top-5 acc 91.406 (89.179)	lr 0.00037
Train [111][2190/3239]	Time 0.343 (0.631)	Data Time 0.001 (0.020)	Loss 2.1389 (2.1604)	Entropy 0.67162 (0.67404)	Top-1 acc 73.438 (72.713)	Top-5 acc 89.062 (89.172)	lr 0.00037
Train [111][2200/3239]	Time 0.267 (0.630)	Data Time 0.001 (0.020)	Loss 2.2869 (2.1604)	Entropy 0.67158 (0.67402)	Top-1 acc 66.797 (72.711)	Top-5 acc 89.062 (89.171)	lr 0.00037
Train [111][2210/3239]	Time 2.553 (0.630)	Data Time 0.001 (0.019)	Loss 2.1579 (2.1604)	Entropy 0.67158 (0.67401)	Top-1 acc 74.609 (72.710)	Top-5 acc 89.844 (89.170)	lr 0.00037
Train [111][2220/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.019)	Loss 2.2023 (2.1604)	Entropy 0.67156 (0.67400)	Top-1 acc 70.703 (72.712)	Top-5 acc 88.672 (89.169)	lr 0.00037
Train [111][2230/3239]	Time 0.283 (0.650)	Data Time 0.003 (0.019)	Loss 2.1229 (2.1604)	Entropy 0.67151 (0.67399)	Top-1 acc 75.000 (72.710)	Top-5 acc 88.672 (89.170)	lr 0.00037
Train [111][2240/3239]	Time 0.231 (0.649)	Data Time 0.005 (0.019)	Loss 2.1408 (2.1605)	Entropy 0.67149 (0.67398)	Top-1 acc 71.094 (72.708)	Top-5 acc 91.016 (89.171)	lr 0.00037
Train [111][2250/3239]	Time 0.237 (0.649)	Data Time 0.001 (0.019)	Loss 2.1042 (2.1605)	Entropy 0.67147 (0.67397)	Top-1 acc 73.047 (72.705)	Top-5 acc 91.406 (89.171)	lr 0.00037
Train [111][2260/3239]	Time 0.239 (0.648)	Data Time 0.001 (0.019)	Loss 2.0391 (2.1604)	Entropy 0.67147 (0.67396)	Top-1 acc 75.391 (72.708)	Top-5 acc 91.016 (89.171)	lr 0.00037
Train [111][2270/3239]	Time 0.231 (0.647)	Data Time 0.001 (0.019)	Loss 2.1143 (2.1604)	Entropy 0.67146 (0.67395)	Top-1 acc 73.438 (72.708)	Top-5 acc 91.406 (89.171)	lr 0.00037
Train [111][2280/3239]	Time 0.231 (0.646)	Data Time 0.001 (0.019)	Loss 2.2133 (2.1604)	Entropy 0.67143 (0.67394)	Top-1 acc 72.656 (72.702)	Top-5 acc 87.500 (89.170)	lr 0.00037
Train [111][2290/3239]	Time 0.232 (0.646)	Data Time 0.001 (0.019)	Loss 2.1923 (2.1606)	Entropy 0.67147 (0.67393)	Top-1 acc 71.484 (72.696)	Top-5 acc 90.625 (89.166)	lr 0.00037
Train [111][2300/3239]	Time 0.235 (0.645)	Data Time 0.001 (0.019)	Loss 2.1413 (2.1607)	Entropy 0.67148 (0.67391)	Top-1 acc 70.703 (72.689)	Top-5 acc 88.672 (89.165)	lr 0.00037
Train [111][2310/3239]	Time 0.229 (0.644)	Data Time 0.001 (0.019)	Loss 1.9828 (2.1605)	Entropy 0.67146 (0.67390)	Top-1 acc 75.781 (72.694)	Top-5 acc 92.969 (89.167)	lr 0.00037
Train [111][2320/3239]	Time 2.503 (0.644)	Data Time 0.001 (0.019)	Loss 2.2231 (2.1606)	Entropy 0.67146 (0.67389)	Top-1 acc 68.359 (72.690)	Top-5 acc 88.672 (89.168)	lr 0.00037
Train [111][2330/3239]	Time 0.242 (0.642)	Data Time 0.001 (0.019)	Loss 2.1760 (2.1603)	Entropy 0.67142 (0.67388)	Top-1 acc 74.219 (72.701)	Top-5 acc 87.891 (89.169)	lr 0.00037
Train [111][2340/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.019)	Loss 2.0876 (2.1603)	Entropy 0.67141 (0.67387)	Top-1 acc 74.609 (72.706)	Top-5 acc 88.672 (89.169)	lr 0.00037
Train [111][2350/3239]	Time 0.248 (0.641)	Data Time 0.001 (0.018)	Loss 2.2552 (2.1604)	Entropy 0.67135 (0.67386)	Top-1 acc 69.922 (72.700)	Top-5 acc 88.281 (89.168)	lr 0.00037
Train [111][2360/3239]	Time 0.338 (0.640)	Data Time 0.001 (0.018)	Loss 2.1824 (2.1607)	Entropy 0.67140 (0.67385)	Top-1 acc 72.266 (72.695)	Top-5 acc 89.453 (89.164)	lr 0.00037
Train [111][2370/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.018)	Loss 2.2597 (2.1606)	Entropy 0.67140 (0.67384)	Top-1 acc 71.484 (72.694)	Top-5 acc 86.328 (89.164)	lr 0.00037
Train [111][2380/3239]	Time 0.281 (0.639)	Data Time 0.001 (0.018)	Loss 2.1595 (2.1608)	Entropy 0.67135 (0.67383)	Top-1 acc 73.438 (72.689)	Top-5 acc 90.625 (89.160)	lr 0.00037
Train [111][2390/3239]	Time 0.236 (0.638)	Data Time 0.001 (0.018)	Loss 2.2764 (2.1608)	Entropy 0.67138 (0.67382)	Top-1 acc 69.922 (72.691)	Top-5 acc 89.062 (89.162)	lr 0.00037
Train [111][2400/3239]	Time 0.347 (0.637)	Data Time 0.001 (0.018)	Loss 2.1322 (2.1609)	Entropy 0.67138 (0.67381)	Top-1 acc 71.875 (72.687)	Top-5 acc 89.453 (89.157)	lr 0.00037
Train [111][2410/3239]	Time 0.233 (0.637)	Data Time 0.001 (0.018)	Loss 2.2309 (2.1611)	Entropy 0.67137 (0.67380)	Top-1 acc 65.625 (72.684)	Top-5 acc 88.672 (89.154)	lr 0.00037
Train [111][2420/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.018)	Loss 2.2594 (2.1611)	Entropy 0.67131 (0.67379)	Top-1 acc 70.703 (72.679)	Top-5 acc 87.891 (89.155)	lr 0.00036
Train [111][2430/3239]	Time 2.591 (0.636)	Data Time 0.002 (0.018)	Loss 2.2416 (2.1612)	Entropy 0.67131 (0.67378)	Top-1 acc 69.141 (72.674)	Top-5 acc 86.328 (89.153)	lr 0.00036
Train [111][2440/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.018)	Loss 2.2936 (2.1615)	Entropy 0.67131 (0.67377)	Top-1 acc 67.578 (72.669)	Top-5 acc 83.203 (89.146)	lr 0.00036
Train [111][2450/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.018)	Loss 2.2090 (2.1615)	Entropy 0.67108 (0.67376)	Top-1 acc 69.922 (72.668)	Top-5 acc 87.891 (89.145)	lr 0.00036
Train [111][2460/3239]	Time 0.234 (0.633)	Data Time 0.001 (0.018)	Loss 2.2051 (2.1616)	Entropy 0.67107 (0.67375)	Top-1 acc 72.266 (72.666)	Top-5 acc 87.891 (89.144)	lr 0.00036
Train [111][2470/3239]	Time 0.243 (0.632)	Data Time 0.001 (0.018)	Loss 2.1676 (2.1615)	Entropy 0.67103 (0.67374)	Top-1 acc 76.953 (72.671)	Top-5 acc 87.500 (89.146)	lr 0.00036
Train [111][2480/3239]	Time 0.242 (0.631)	Data Time 0.001 (0.018)	Loss 2.1863 (2.1614)	Entropy 0.67100 (0.67373)	Top-1 acc 75.000 (72.674)	Top-5 acc 89.453 (89.147)	lr 0.00036
Train [111][2490/3239]	Time 0.222 (0.631)	Data Time 0.001 (0.017)	Loss 2.1019 (2.1615)	Entropy 0.67088 (0.67371)	Top-1 acc 74.219 (72.671)	Top-5 acc 90.625 (89.144)	lr 0.00036
Train [111][2500/3239]	Time 0.238 (0.630)	Data Time 0.001 (0.017)	Loss 2.0536 (2.1615)	Entropy 0.67081 (0.67370)	Top-1 acc 73.438 (72.667)	Top-5 acc 91.406 (89.145)	lr 0.00036
Train [111][2510/3239]	Time 0.248 (0.630)	Data Time 0.002 (0.017)	Loss 2.1364 (2.1615)	Entropy 0.67077 (0.67369)	Top-1 acc 73.438 (72.666)	Top-5 acc 88.281 (89.145)	lr 0.00036
Train [111][2520/3239]	Time 0.241 (0.629)	Data Time 0.001 (0.017)	Loss 2.1533 (2.1614)	Entropy 0.67065 (0.67368)	Top-1 acc 71.484 (72.666)	Top-5 acc 92.188 (89.146)	lr 0.00036
Train [111][2530/3239]	Time 0.222 (0.629)	Data Time 0.001 (0.017)	Loss 2.1210 (2.1614)	Entropy 0.67065 (0.67367)	Top-1 acc 73.438 (72.667)	Top-5 acc 90.234 (89.148)	lr 0.00036
Train [111][2540/3239]	Time 2.592 (0.628)	Data Time 0.001 (0.017)	Loss 2.2020 (2.1615)	Entropy 0.67065 (0.67366)	Top-1 acc 70.312 (72.663)	Top-5 acc 87.891 (89.145)	lr 0.00036
Train [111][2550/3239]	Time 0.236 (0.626)	Data Time 0.001 (0.017)	Loss 2.1282 (2.1615)	Entropy 0.67060 (0.67364)	Top-1 acc 69.922 (72.662)	Top-5 acc 90.625 (89.147)	lr 0.00036
Train [111][2560/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.017)	Loss 2.0569 (2.1616)	Entropy 0.67068 (0.67363)	Top-1 acc 76.172 (72.661)	Top-5 acc 91.797 (89.146)	lr 0.00036
Train [111][2570/3239]	Time 0.323 (0.625)	Data Time 0.001 (0.017)	Loss 2.0532 (2.1620)	Entropy 0.67067 (0.67362)	Top-1 acc 73.438 (72.651)	Top-5 acc 91.016 (89.136)	lr 0.00036
Train [111][2580/3239]	Time 0.231 (0.625)	Data Time 0.001 (0.017)	Loss 2.0590 (2.1618)	Entropy 0.67070 (0.67361)	Top-1 acc 74.609 (72.655)	Top-5 acc 89.062 (89.139)	lr 0.00036
Train [111][2590/3239]	Time 0.238 (0.645)	Data Time 0.002 (0.017)	Loss 2.1006 (2.1618)	Entropy 0.67071 (0.67360)	Top-1 acc 71.875 (72.653)	Top-5 acc 88.672 (89.140)	lr 0.00036
Train [111][2600/3239]	Time 0.233 (0.644)	Data Time 0.002 (0.017)	Loss 2.1833 (2.1618)	Entropy 0.67072 (0.67359)	Top-1 acc 72.266 (72.651)	Top-5 acc 88.672 (89.139)	lr 0.00036
Train [111][2610/3239]	Time 0.339 (0.643)	Data Time 0.001 (0.017)	Loss 2.2354 (2.1618)	Entropy 0.67072 (0.67358)	Top-1 acc 72.266 (72.650)	Top-5 acc 88.281 (89.137)	lr 0.00036
Train [111][2620/3239]	Time 0.223 (0.643)	Data Time 0.001 (0.017)	Loss 2.1920 (2.1618)	Entropy 0.67069 (0.67357)	Top-1 acc 70.312 (72.652)	Top-5 acc 87.891 (89.137)	lr 0.00036
Train [111][2630/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.017)	Loss 1.9876 (2.1619)	Entropy 0.67069 (0.67355)	Top-1 acc 78.516 (72.648)	Top-5 acc 92.578 (89.133)	lr 0.00036
Train [111][2640/3239]	Time 0.268 (0.642)	Data Time 0.001 (0.017)	Loss 2.1932 (2.1620)	Entropy 0.67068 (0.67354)	Top-1 acc 73.828 (72.648)	Top-5 acc 88.281 (89.132)	lr 0.00036
Train [111][2650/3239]	Time 0.311 (0.641)	Data Time 0.001 (0.017)	Loss 2.0793 (2.1619)	Entropy 0.67067 (0.67353)	Top-1 acc 76.953 (72.653)	Top-5 acc 91.797 (89.135)	lr 0.00036
Train [111][2660/3239]	Time 0.248 (0.640)	Data Time 0.001 (0.016)	Loss 2.0222 (2.1620)	Entropy 0.67063 (0.67352)	Top-1 acc 75.781 (72.656)	Top-5 acc 92.578 (89.134)	lr 0.00036
Train [111][2670/3239]	Time 0.236 (0.640)	Data Time 0.001 (0.016)	Loss 2.1581 (2.1622)	Entropy 0.67054 (0.67351)	Top-1 acc 73.047 (72.652)	Top-5 acc 87.891 (89.129)	lr 0.00036
Train [111][2680/3239]	Time 0.235 (0.639)	Data Time 0.001 (0.016)	Loss 1.8945 (2.1620)	Entropy 0.67054 (0.67350)	Top-1 acc 78.125 (72.653)	Top-5 acc 93.359 (89.133)	lr 0.00036
Train [111][2690/3239]	Time 0.333 (0.639)	Data Time 0.001 (0.016)	Loss 2.1256 (2.1620)	Entropy 0.67044 (0.67349)	Top-1 acc 69.922 (72.653)	Top-5 acc 91.406 (89.136)	lr 0.00036
Train [111][2700/3239]	Time 0.245 (0.638)	Data Time 0.001 (0.016)	Loss 2.1689 (2.1621)	Entropy 0.67050 (0.67348)	Top-1 acc 71.875 (72.647)	Top-5 acc 91.406 (89.136)	lr 0.00036
Train [111][2710/3239]	Time 0.229 (0.638)	Data Time 0.001 (0.016)	Loss 2.1671 (2.1620)	Entropy 0.67050 (0.67347)	Top-1 acc 71.484 (72.652)	Top-5 acc 90.234 (89.135)	lr 0.00036
Train [111][2720/3239]	Time 0.229 (0.637)	Data Time 0.001 (0.016)	Loss 2.1405 (2.1621)	Entropy 0.67055 (0.67346)	Top-1 acc 73.438 (72.652)	Top-5 acc 91.016 (89.135)	lr 0.00036
Train [111][2730/3239]	Time 0.311 (0.636)	Data Time 0.002 (0.016)	Loss 2.0897 (2.1622)	Entropy 0.67056 (0.67345)	Top-1 acc 75.781 (72.648)	Top-5 acc 88.672 (89.130)	lr 0.00036
Train [111][2740/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.016)	Loss 2.0709 (2.1623)	Entropy 0.67056 (0.67343)	Top-1 acc 76.562 (72.646)	Top-5 acc 89.062 (89.127)	lr 0.00036
Train [111][2750/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.016)	Loss 2.2002 (2.1622)	Entropy 0.67060 (0.67342)	Top-1 acc 70.703 (72.650)	Top-5 acc 87.109 (89.130)	lr 0.00036
Train [111][2760/3239]	Time 0.265 (0.635)	Data Time 0.001 (0.016)	Loss 2.0515 (2.1623)	Entropy 0.67059 (0.67341)	Top-1 acc 78.125 (72.651)	Top-5 acc 91.797 (89.131)	lr 0.00036
Train [111][2770/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.016)	Loss 2.0430 (2.1621)	Entropy 0.67055 (0.67340)	Top-1 acc 77.344 (72.655)	Top-5 acc 91.016 (89.134)	lr 0.00036
Train [111][2780/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.016)	Loss 2.2245 (2.1622)	Entropy 0.67062 (0.67339)	Top-1 acc 71.875 (72.653)	Top-5 acc 88.281 (89.131)	lr 0.00036
Train [111][2790/3239]	Time 0.267 (0.633)	Data Time 0.001 (0.016)	Loss 2.0600 (2.1623)	Entropy 0.67052 (0.67338)	Top-1 acc 74.219 (72.651)	Top-5 acc 92.578 (89.131)	lr 0.00036
Train [111][2800/3239]	Time 0.265 (0.633)	Data Time 0.002 (0.016)	Loss 2.0280 (2.1622)	Entropy 0.67052 (0.67337)	Top-1 acc 78.906 (72.655)	Top-5 acc 91.797 (89.133)	lr 0.00036
Train [111][2810/3239]	Time 0.245 (0.632)	Data Time 0.001 (0.016)	Loss 2.1999 (2.1622)	Entropy 0.67047 (0.67336)	Top-1 acc 73.438 (72.655)	Top-5 acc 86.719 (89.132)	lr 0.00036
Train [111][2820/3239]	Time 0.203 (0.631)	Data Time 0.001 (0.016)	Loss 2.1053 (2.1621)	Entropy 0.67034 (0.67335)	Top-1 acc 75.391 (72.658)	Top-5 acc 91.797 (89.132)	lr 0.00036
Train [111][2830/3239]	Time 0.219 (0.631)	Data Time 0.001 (0.016)	Loss 2.2229 (2.1622)	Entropy 0.67030 (0.67334)	Top-1 acc 69.141 (72.654)	Top-5 acc 88.672 (89.127)	lr 0.00036
Train [111][2840/3239]	Time 0.260 (0.630)	Data Time 0.001 (0.016)	Loss 2.1213 (2.1622)	Entropy 0.67021 (0.67333)	Top-1 acc 73.438 (72.652)	Top-5 acc 89.062 (89.126)	lr 0.00035
Train [111][2850/3239]	Time 0.220 (0.630)	Data Time 0.001 (0.015)	Loss 2.0768 (2.1622)	Entropy 0.67009 (0.67332)	Top-1 acc 72.266 (72.650)	Top-5 acc 92.969 (89.127)	lr 0.00035
Train [111][2860/3239]	Time 0.220 (0.629)	Data Time 0.001 (0.015)	Loss 2.1009 (2.1622)	Entropy 0.67000 (0.67331)	Top-1 acc 73.438 (72.649)	Top-5 acc 88.672 (89.128)	lr 0.00035
Train [111][2870/3239]	Time 0.254 (0.629)	Data Time 0.001 (0.015)	Loss 2.1164 (2.1624)	Entropy 0.67009 (0.67330)	Top-1 acc 75.391 (72.644)	Top-5 acc 90.625 (89.125)	lr 0.00035
Train [111][2880/3239]	Time 0.226 (0.628)	Data Time 0.001 (0.015)	Loss 2.0964 (2.1623)	Entropy 0.67012 (0.67329)	Top-1 acc 75.781 (72.647)	Top-5 acc 87.891 (89.124)	lr 0.00035
Train [111][2890/3239]	Time 0.227 (0.628)	Data Time 0.002 (0.015)	Loss 2.0166 (2.1623)	Entropy 0.67013 (0.67328)	Top-1 acc 77.344 (72.649)	Top-5 acc 92.578 (89.127)	lr 0.00035
Train [111][2900/3239]	Time 0.220 (0.627)	Data Time 0.001 (0.015)	Loss 2.1145 (2.1622)	Entropy 0.67014 (0.67326)	Top-1 acc 73.438 (72.649)	Top-5 acc 89.844 (89.128)	lr 0.00035
Train [111][2910/3239]	Time 0.256 (0.627)	Data Time 0.001 (0.015)	Loss 2.2654 (2.1622)	Entropy 0.67013 (0.67325)	Top-1 acc 71.094 (72.652)	Top-5 acc 87.109 (89.129)	lr 0.00035
Train [111][2920/3239]	Time 0.302 (0.644)	Data Time 0.004 (0.015)	Loss 2.1821 (2.1622)	Entropy 0.67018 (0.67324)	Top-1 acc 70.312 (72.649)	Top-5 acc 90.234 (89.130)	lr 0.00035
Train [111][2930/3239]	Time 0.242 (0.644)	Data Time 0.002 (0.015)	Loss 2.1966 (2.1622)	Entropy 0.67005 (0.67323)	Top-1 acc 71.094 (72.649)	Top-5 acc 87.891 (89.132)	lr 0.00035
Train [111][2940/3239]	Time 0.264 (0.643)	Data Time 0.002 (0.015)	Loss 2.1949 (2.1622)	Entropy 0.67000 (0.67322)	Top-1 acc 73.047 (72.649)	Top-5 acc 89.453 (89.132)	lr 0.00035
Train [111][2950/3239]	Time 0.237 (0.643)	Data Time 0.001 (0.015)	Loss 2.0699 (2.1622)	Entropy 0.66992 (0.67321)	Top-1 acc 75.781 (72.649)	Top-5 acc 91.016 (89.134)	lr 0.00035
Train [111][2960/3239]	Time 0.225 (0.642)	Data Time 0.001 (0.015)	Loss 2.1986 (2.1622)	Entropy 0.66992 (0.67320)	Top-1 acc 72.266 (72.650)	Top-5 acc 85.547 (89.131)	lr 0.00035
Train [111][2970/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.015)	Loss 2.0010 (2.1621)	Entropy 0.66991 (0.67319)	Top-1 acc 74.609 (72.651)	Top-5 acc 90.234 (89.131)	lr 0.00035
Train [111][2980/3239]	Time 0.232 (0.641)	Data Time 0.001 (0.015)	Loss 2.1646 (2.1621)	Entropy 0.66992 (0.67318)	Top-1 acc 71.094 (72.650)	Top-5 acc 89.844 (89.132)	lr 0.00035
Train [111][2990/3239]	Time 0.211 (0.640)	Data Time 0.001 (0.015)	Loss 2.3041 (2.1622)	Entropy 0.66988 (0.67317)	Top-1 acc 68.750 (72.647)	Top-5 acc 86.328 (89.127)	lr 0.00035
Train [111][3000/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.015)	Loss 2.0542 (2.1624)	Entropy 0.66986 (0.67316)	Top-1 acc 75.000 (72.641)	Top-5 acc 92.188 (89.124)	lr 0.00035
Train [111][3010/3239]	Time 0.259 (0.639)	Data Time 0.001 (0.015)	Loss 2.1654 (2.1625)	Entropy 0.66980 (0.67315)	Top-1 acc 72.656 (72.640)	Top-5 acc 87.891 (89.123)	lr 0.00035
Train [111][3020/3239]	Time 0.244 (0.639)	Data Time 0.001 (0.015)	Loss 2.0297 (2.1624)	Entropy 0.66971 (0.67313)	Top-1 acc 78.906 (72.644)	Top-5 acc 90.234 (89.125)	lr 0.00035
Train [111][3030/3239]	Time 0.259 (0.638)	Data Time 0.002 (0.015)	Loss 2.2695 (2.1626)	Entropy 0.66979 (0.67312)	Top-1 acc 69.531 (72.640)	Top-5 acc 88.281 (89.122)	lr 0.00035
Train [111][3040/3239]	Time 0.228 (0.638)	Data Time 0.001 (0.015)	Loss 2.0668 (2.1627)	Entropy 0.66971 (0.67311)	Top-1 acc 72.656 (72.635)	Top-5 acc 91.406 (89.122)	lr 0.00035
Train [111][3050/3239]	Time 0.231 (0.637)	Data Time 0.001 (0.015)	Loss 2.1810 (2.1627)	Entropy 0.66970 (0.67310)	Top-1 acc 74.609 (72.638)	Top-5 acc 90.234 (89.122)	lr 0.00035
Train [111][3060/3239]	Time 0.228 (0.637)	Data Time 0.001 (0.015)	Loss 2.1444 (2.1625)	Entropy 0.66968 (0.67309)	Top-1 acc 73.828 (72.646)	Top-5 acc 90.234 (89.126)	lr 0.00035
Train [111][3070/3239]	Time 0.268 (0.636)	Data Time 0.001 (0.014)	Loss 2.2316 (2.1625)	Entropy 0.66968 (0.67308)	Top-1 acc 71.875 (72.645)	Top-5 acc 86.719 (89.126)	lr 0.00035
Train [111][3080/3239]	Time 0.237 (0.636)	Data Time 0.001 (0.014)	Loss 2.1131 (2.1626)	Entropy 0.66954 (0.67307)	Top-1 acc 73.438 (72.642)	Top-5 acc 91.016 (89.126)	lr 0.00035
Train [111][3090/3239]	Time 0.246 (0.635)	Data Time 0.001 (0.014)	Loss 2.0089 (2.1625)	Entropy 0.66947 (0.67306)	Top-1 acc 76.172 (72.639)	Top-5 acc 92.188 (89.126)	lr 0.00035
Train [111][3100/3239]	Time 0.384 (0.635)	Data Time 0.001 (0.014)	Loss 2.1646 (2.1625)	Entropy 0.66939 (0.67304)	Top-1 acc 72.656 (72.637)	Top-5 acc 90.234 (89.126)	lr 0.00035
Train [111][3110/3239]	Time 0.263 (0.634)	Data Time 0.001 (0.014)	Loss 2.0194 (2.1624)	Entropy 0.66928 (0.67303)	Top-1 acc 77.734 (72.644)	Top-5 acc 91.797 (89.131)	lr 0.00035
Train [111][3120/3239]	Time 0.228 (0.634)	Data Time 0.001 (0.014)	Loss 2.0883 (2.1624)	Entropy 0.66928 (0.67302)	Top-1 acc 74.219 (72.641)	Top-5 acc 90.625 (89.131)	lr 0.00035
Train [111][3130/3239]	Time 0.242 (0.634)	Data Time 0.001 (0.014)	Loss 2.2256 (2.1624)	Entropy 0.66929 (0.67301)	Top-1 acc 72.266 (72.642)	Top-5 acc 87.891 (89.130)	lr 0.00035
Train [111][3140/3239]	Time 0.403 (0.633)	Data Time 0.001 (0.014)	Loss 2.2848 (2.1627)	Entropy 0.66917 (0.67300)	Top-1 acc 70.312 (72.638)	Top-5 acc 87.891 (89.126)	lr 0.00035
Train [111][3150/3239]	Time 0.236 (0.633)	Data Time 0.001 (0.014)	Loss 2.0676 (2.1627)	Entropy 0.66919 (0.67298)	Top-1 acc 75.781 (72.640)	Top-5 acc 91.797 (89.128)	lr 0.00035
Train [111][3160/3239]	Time 0.275 (0.632)	Data Time 0.001 (0.014)	Loss 2.2610 (2.1627)	Entropy 0.66913 (0.67297)	Top-1 acc 73.828 (72.638)	Top-5 acc 85.547 (89.128)	lr 0.00035
Train [111][3170/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.014)	Loss 2.2399 (2.1627)	Entropy 0.66911 (0.67296)	Top-1 acc 67.969 (72.639)	Top-5 acc 89.453 (89.127)	lr 0.00035
Train [111][3180/3239]	Time 0.324 (0.631)	Data Time 0.000 (0.014)	Loss 2.1939 (2.1626)	Entropy 0.66905 (0.67295)	Top-1 acc 73.047 (72.645)	Top-5 acc 90.234 (89.127)	lr 0.00035
Train [111][3190/3239]	Time 0.237 (0.631)	Data Time 0.000 (0.014)	Loss 2.2420 (2.1627)	Entropy 0.66903 (0.67294)	Top-1 acc 69.531 (72.645)	Top-5 acc 88.672 (89.125)	lr 0.00035
Train [111][3200/3239]	Time 0.224 (0.630)	Data Time 0.000 (0.014)	Loss 2.0462 (2.1627)	Entropy 0.66900 (0.67292)	Top-1 acc 75.000 (72.642)	Top-5 acc 90.625 (89.124)	lr 0.00035
Train [111][3210/3239]	Time 0.226 (0.630)	Data Time 0.000 (0.014)	Loss 2.2375 (2.1628)	Entropy 0.66891 (0.67291)	Top-1 acc 73.438 (72.642)	Top-5 acc 88.672 (89.125)	lr 0.00035
Train [111][3220/3239]	Time 0.313 (0.629)	Data Time 0.000 (0.014)	Loss 2.2071 (2.1630)	Entropy 0.66889 (0.67290)	Top-1 acc 71.875 (72.642)	Top-5 acc 90.234 (89.121)	lr 0.00035
Train [111][3230/3239]	Time 0.209 (0.629)	Data Time 0.000 (0.014)	Loss 2.2565 (2.1630)	Entropy 0.66886 (0.67289)	Top-1 acc 69.141 (72.639)	Top-5 acc 87.109 (89.120)	lr 0.00035
Train [111][3239/3239]	Time 2.328 (0.628)	Data Time 0.000 (0.014)	Loss 2.3544 (2.1631)	Entropy 0.66886 (0.67288)	Top-1 acc 69.136 (72.641)	Top-5 acc 86.420 (89.120)	lr 0.00035
==========Valid [111/120]	loss 1.201	top-1 acc 72.534 (72.549)	top-5 acc 89.982	Train top-1 72.641	top-5 89.120	Entropy 0.66886	Latency-None: 0.000ms	Flops: 544.27M
Train [112][0/3239]	Time 39.943 (39.943)	Data Time 38.603 (38.603)	Loss 2.2595 (2.2595)	Entropy 0.66883 (0.66883)	Top-1 acc 69.922 (69.922)	Top-5 acc 87.109 (87.109)	lr 0.00035
Train [112][10/3239]	Time 58.965 (9.350)	Data Time 0.003 (3.569)	Loss 2.1057 (2.1424)	Entropy 0.66883 (0.66883)	Top-1 acc 74.219 (73.153)	Top-5 acc 91.406 (89.418)	lr 0.00035
Train [112][20/3239]	Time 0.289 (5.026)	Data Time 0.002 (1.871)	Loss 2.1980 (2.1593)	Entropy 0.66884 (0.66883)	Top-1 acc 69.922 (72.396)	Top-5 acc 90.625 (89.156)	lr 0.00034
Train [112][30/3239]	Time 0.271 (3.567)	Data Time 0.001 (1.268)	Loss 2.2453 (2.1555)	Entropy 0.66882 (0.66883)	Top-1 acc 71.094 (72.505)	Top-5 acc 87.891 (89.352)	lr 0.00034
Train [112][40/3239]	Time 0.235 (2.814)	Data Time 0.001 (0.959)	Loss 2.1659 (2.1437)	Entropy 0.66871 (0.66880)	Top-1 acc 71.875 (72.694)	Top-5 acc 89.062 (89.510)	lr 0.00034
Train [112][50/3239]	Time 0.224 (2.358)	Data Time 0.001 (0.772)	Loss 2.1266 (2.1461)	Entropy 0.66867 (0.66878)	Top-1 acc 71.875 (72.786)	Top-5 acc 92.188 (89.453)	lr 0.00034
Train [112][60/3239]	Time 0.235 (2.051)	Data Time 0.001 (0.646)	Loss 2.0677 (2.1491)	Entropy 0.66870 (0.66876)	Top-1 acc 73.047 (72.784)	Top-5 acc 91.797 (89.351)	lr 0.00034
Train [112][70/3239]	Time 0.244 (1.828)	Data Time 0.001 (0.555)	Loss 2.1146 (2.1474)	Entropy 0.66860 (0.66875)	Top-1 acc 73.438 (72.904)	Top-5 acc 89.844 (89.299)	lr 0.00034
Train [112][80/3239]	Time 0.234 (1.661)	Data Time 0.001 (0.487)	Loss 2.2086 (2.1492)	Entropy 0.66853 (0.66873)	Top-1 acc 73.047 (72.931)	Top-5 acc 87.109 (89.222)	lr 0.00034
Train [112][90/3239]	Time 0.239 (1.532)	Data Time 0.001 (0.433)	Loss 2.2642 (2.1554)	Entropy 0.66853 (0.66870)	Top-1 acc 69.922 (72.755)	Top-5 acc 86.719 (89.097)	lr 0.00034
Train [112][100/3239]	Time 0.243 (1.429)	Data Time 0.001 (0.391)	Loss 2.1830 (2.1540)	Entropy 0.66855 (0.66869)	Top-1 acc 71.484 (72.792)	Top-5 acc 88.672 (89.179)	lr 0.00034
Train [112][110/3239]	Time 0.364 (1.343)	Data Time 0.001 (0.356)	Loss 2.2895 (2.1581)	Entropy 0.66845 (0.66867)	Top-1 acc 70.703 (72.762)	Top-5 acc 88.281 (89.147)	lr 0.00034
Train [112][120/3239]	Time 2.634 (1.273)	Data Time 0.002 (0.326)	Loss 2.0617 (2.1565)	Entropy 0.66845 (0.66866)	Top-1 acc 75.391 (72.895)	Top-5 acc 92.578 (89.211)	lr 0.00034
Train [112][130/3239]	Time 0.238 (1.195)	Data Time 0.001 (0.302)	Loss 2.1768 (2.1581)	Entropy 0.66839 (0.66864)	Top-1 acc 71.875 (72.862)	Top-5 acc 89.453 (89.170)	lr 0.00034
Train [112][140/3239]	Time 0.250 (1.145)	Data Time 0.001 (0.280)	Loss 2.2099 (2.1601)	Entropy 0.66841 (0.66862)	Top-1 acc 74.219 (72.831)	Top-5 acc 87.500 (89.112)	lr 0.00034
Train [112][150/3239]	Time 0.361 (1.103)	Data Time 0.001 (0.262)	Loss 2.1404 (2.1609)	Entropy 0.66818 (0.66859)	Top-1 acc 71.875 (72.778)	Top-5 acc 90.234 (89.106)	lr 0.00034
Train [112][160/3239]	Time 0.230 (1.063)	Data Time 0.001 (0.246)	Loss 2.1403 (2.1609)	Entropy 0.66819 (0.66857)	Top-1 acc 76.562 (72.765)	Top-5 acc 88.281 (89.075)	lr 0.00034
Train [112][170/3239]	Time 0.236 (1.029)	Data Time 0.001 (0.231)	Loss 2.2364 (2.1608)	Entropy 0.66817 (0.66855)	Top-1 acc 72.656 (72.770)	Top-5 acc 86.328 (89.101)	lr 0.00034
Train [112][180/3239]	Time 0.239 (0.999)	Data Time 0.001 (0.219)	Loss 2.1505 (2.1588)	Entropy 0.66815 (0.66852)	Top-1 acc 71.875 (72.805)	Top-5 acc 91.016 (89.129)	lr 0.00034
Train [112][190/3239]	Time 0.261 (0.971)	Data Time 0.001 (0.207)	Loss 2.1588 (2.1575)	Entropy 0.66811 (0.66850)	Top-1 acc 76.172 (72.838)	Top-5 acc 89.844 (89.165)	lr 0.00034
Train [112][200/3239]	Time 0.235 (0.947)	Data Time 0.001 (0.197)	Loss 2.3802 (2.1592)	Entropy 0.66807 (0.66848)	Top-1 acc 67.578 (72.773)	Top-5 acc 85.156 (89.158)	lr 0.00034
Train [112][210/3239]	Time 0.244 (0.925)	Data Time 0.001 (0.188)	Loss 2.0405 (2.1595)	Entropy 0.66801 (0.66846)	Top-1 acc 74.609 (72.754)	Top-5 acc 91.797 (89.138)	lr 0.00034
Train [112][220/3239]	Time 0.233 (0.905)	Data Time 0.001 (0.179)	Loss 2.1415 (2.1586)	Entropy 0.66802 (0.66844)	Top-1 acc 70.312 (72.773)	Top-5 acc 89.844 (89.142)	lr 0.00034
Train [112][230/3239]	Time 2.665 (0.887)	Data Time 0.001 (0.172)	Loss 2.1611 (2.1581)	Entropy 0.66802 (0.66842)	Top-1 acc 74.609 (72.803)	Top-5 acc 87.109 (89.132)	lr 0.00034
Train [112][240/3239]	Time 0.240 (0.861)	Data Time 0.001 (0.165)	Loss 2.3938 (2.1592)	Entropy 0.66803 (0.66841)	Top-1 acc 66.406 (72.758)	Top-5 acc 85.156 (89.088)	lr 0.00034
Train [112][250/3239]	Time 0.227 (0.845)	Data Time 0.001 (0.158)	Loss 2.0982 (2.1587)	Entropy 0.66791 (0.66839)	Top-1 acc 74.609 (72.711)	Top-5 acc 89.844 (89.115)	lr 0.00034
Train [112][260/3239]	Time 0.241 (0.831)	Data Time 0.001 (0.152)	Loss 2.1993 (2.1590)	Entropy 0.66787 (0.66837)	Top-1 acc 69.531 (72.679)	Top-5 acc 87.891 (89.106)	lr 0.00034
Train [112][270/3239]	Time 0.236 (0.818)	Data Time 0.002 (0.147)	Loss 2.0950 (2.1599)	Entropy 0.66785 (0.66835)	Top-1 acc 75.781 (72.662)	Top-5 acc 89.453 (89.087)	lr 0.00034
Train [112][280/3239]	Time 0.228 (0.807)	Data Time 0.001 (0.141)	Loss 2.3164 (2.1610)	Entropy 0.66771 (0.66833)	Top-1 acc 69.531 (72.652)	Top-5 acc 86.719 (89.060)	lr 0.00034
Train [112][290/3239]	Time 0.232 (0.796)	Data Time 0.001 (0.137)	Loss 2.0671 (2.1602)	Entropy 0.66768 (0.66831)	Top-1 acc 76.953 (72.698)	Top-5 acc 89.453 (89.060)	lr 0.00034
Train [112][300/3239]	Time 0.235 (0.785)	Data Time 0.001 (0.132)	Loss 2.2926 (2.1603)	Entropy 0.66764 (0.66829)	Top-1 acc 68.359 (72.677)	Top-5 acc 84.766 (89.046)	lr 0.00034
Train [112][310/3239]	Time 0.229 (0.776)	Data Time 0.001 (0.128)	Loss 2.1904 (2.1616)	Entropy 0.66755 (0.66826)	Top-1 acc 70.703 (72.680)	Top-5 acc 87.891 (89.037)	lr 0.00034
Train [112][320/3239]	Time 0.385 (0.768)	Data Time 0.001 (0.124)	Loss 2.0875 (2.1610)	Entropy 0.66749 (0.66824)	Top-1 acc 76.172 (72.717)	Top-5 acc 91.016 (89.054)	lr 0.00034
Train [112][330/3239]	Time 0.233 (0.759)	Data Time 0.001 (0.120)	Loss 2.1883 (2.1630)	Entropy 0.66747 (0.66822)	Top-1 acc 71.484 (72.673)	Top-5 acc 88.281 (89.018)	lr 0.00034
Train [112][340/3239]	Time 2.602 (0.751)	Data Time 0.001 (0.117)	Loss 2.2716 (2.1641)	Entropy 0.66747 (0.66820)	Top-1 acc 68.359 (72.656)	Top-5 acc 87.500 (88.997)	lr 0.00034
Train [112][350/3239]	Time 0.240 (0.737)	Data Time 0.001 (0.114)	Loss 2.0451 (2.1631)	Entropy 0.66740 (0.66817)	Top-1 acc 73.828 (72.692)	Top-5 acc 92.578 (89.025)	lr 0.00034
Train [112][360/3239]	Time 0.359 (0.730)	Data Time 0.001 (0.110)	Loss 2.1706 (2.1638)	Entropy 0.66744 (0.66815)	Top-1 acc 72.266 (72.682)	Top-5 acc 89.062 (89.020)	lr 0.00034
Train [112][370/3239]	Time 0.233 (0.724)	Data Time 0.001 (0.107)	Loss 2.1568 (2.1636)	Entropy 0.66741 (0.66813)	Top-1 acc 69.922 (72.677)	Top-5 acc 92.188 (89.027)	lr 0.00034
Train [112][380/3239]	Time 0.498 (0.858)	Data Time 0.003 (0.105)	Loss 2.0776 (2.1647)	Entropy 0.66738 (0.66811)	Top-1 acc 73.438 (72.637)	Top-5 acc 90.625 (89.017)	lr 0.00034
Train [112][390/3239]	Time 0.249 (0.849)	Data Time 0.002 (0.102)	Loss 2.1945 (2.1640)	Entropy 0.66738 (0.66810)	Top-1 acc 71.875 (72.654)	Top-5 acc 90.234 (89.046)	lr 0.00034
Train [112][400/3239]	Time 0.340 (0.840)	Data Time 0.002 (0.100)	Loss 2.3203 (2.1636)	Entropy 0.66725 (0.66808)	Top-1 acc 65.625 (72.671)	Top-5 acc 86.328 (89.038)	lr 0.00034
Train [112][410/3239]	Time 0.229 (0.832)	Data Time 0.001 (0.097)	Loss 2.1272 (2.1634)	Entropy 0.66718 (0.66806)	Top-1 acc 72.656 (72.676)	Top-5 acc 91.406 (89.045)	lr 0.00034
Train [112][420/3239]	Time 0.237 (0.823)	Data Time 0.001 (0.095)	Loss 2.1307 (2.1629)	Entropy 0.66717 (0.66803)	Top-1 acc 73.047 (72.707)	Top-5 acc 90.625 (89.067)	lr 0.00034
Train [112][430/3239]	Time 0.255 (0.815)	Data Time 0.001 (0.093)	Loss 2.2803 (2.1626)	Entropy 0.66714 (0.66801)	Top-1 acc 69.922 (72.730)	Top-5 acc 86.719 (89.062)	lr 0.00034
Train [112][440/3239]	Time 0.315 (0.808)	Data Time 0.001 (0.091)	Loss 2.1791 (2.1623)	Entropy 0.66712 (0.66799)	Top-1 acc 71.094 (72.740)	Top-5 acc 90.234 (89.079)	lr 0.00034
Train [112][450/3239]	Time 2.602 (0.801)	Data Time 0.001 (0.089)	Loss 2.1165 (2.1624)	Entropy 0.66712 (0.66798)	Top-1 acc 75.391 (72.730)	Top-5 acc 89.062 (89.079)	lr 0.00033
Train [112][460/3239]	Time 0.233 (0.789)	Data Time 0.001 (0.087)	Loss 2.2395 (2.1625)	Entropy 0.66711 (0.66796)	Top-1 acc 70.312 (72.724)	Top-5 acc 89.453 (89.083)	lr 0.00033
Train [112][470/3239]	Time 0.255 (0.782)	Data Time 0.001 (0.085)	Loss 2.1286 (2.1625)	Entropy 0.66704 (0.66794)	Top-1 acc 73.438 (72.727)	Top-5 acc 91.016 (89.079)	lr 0.00033
Train [112][480/3239]	Time 0.226 (0.776)	Data Time 0.001 (0.084)	Loss 2.1226 (2.1626)	Entropy 0.66714 (0.66792)	Top-1 acc 73.438 (72.738)	Top-5 acc 89.453 (89.080)	lr 0.00033
Train [112][490/3239]	Time 0.227 (0.770)	Data Time 0.001 (0.082)	Loss 2.0875 (2.1621)	Entropy 0.66712 (0.66790)	Top-1 acc 74.219 (72.737)	Top-5 acc 90.625 (89.094)	lr 0.00033
Train [112][500/3239]	Time 0.240 (0.764)	Data Time 0.001 (0.080)	Loss 2.1928 (2.1617)	Entropy 0.66702 (0.66789)	Top-1 acc 71.875 (72.740)	Top-5 acc 87.500 (89.103)	lr 0.00033
Train [112][510/3239]	Time 0.222 (0.759)	Data Time 0.001 (0.079)	Loss 2.3086 (2.1623)	Entropy 0.66699 (0.66787)	Top-1 acc 70.703 (72.723)	Top-5 acc 89.062 (89.085)	lr 0.00033
Train [112][520/3239]	Time 0.240 (0.754)	Data Time 0.001 (0.077)	Loss 2.2617 (2.1616)	Entropy 0.66702 (0.66785)	Top-1 acc 70.312 (72.740)	Top-5 acc 89.062 (89.073)	lr 0.00033
Train [112][530/3239]	Time 0.230 (0.749)	Data Time 0.001 (0.076)	Loss 2.1930 (2.1617)	Entropy 0.66700 (0.66784)	Top-1 acc 68.359 (72.720)	Top-5 acc 89.453 (89.081)	lr 0.00033
Train [112][540/3239]	Time 0.230 (0.744)	Data Time 0.001 (0.074)	Loss 2.0079 (2.1616)	Entropy 0.66685 (0.66782)	Top-1 acc 76.562 (72.713)	Top-5 acc 93.359 (89.098)	lr 0.00033
Train [112][550/3239]	Time 0.257 (0.739)	Data Time 0.001 (0.073)	Loss 2.1835 (2.1618)	Entropy 0.66687 (0.66780)	Top-1 acc 70.312 (72.720)	Top-5 acc 89.453 (89.083)	lr 0.00033
Train [112][560/3239]	Time 2.606 (0.735)	Data Time 0.001 (0.072)	Loss 2.2261 (2.1616)	Entropy 0.66687 (0.66779)	Top-1 acc 71.875 (72.718)	Top-5 acc 89.453 (89.088)	lr 0.00033
Train [112][570/3239]	Time 0.340 (0.726)	Data Time 0.001 (0.071)	Loss 2.1909 (2.1615)	Entropy 0.66680 (0.66777)	Top-1 acc 73.438 (72.734)	Top-5 acc 89.062 (89.086)	lr 0.00033
Train [112][580/3239]	Time 0.226 (0.722)	Data Time 0.001 (0.069)	Loss 2.1376 (2.1611)	Entropy 0.66674 (0.66775)	Top-1 acc 73.047 (72.746)	Top-5 acc 91.406 (89.098)	lr 0.00033
Train [112][590/3239]	Time 0.229 (0.718)	Data Time 0.001 (0.068)	Loss 2.2256 (2.1608)	Entropy 0.66665 (0.66773)	Top-1 acc 71.875 (72.750)	Top-5 acc 87.891 (89.102)	lr 0.00033
Train [112][600/3239]	Time 0.233 (0.714)	Data Time 0.001 (0.067)	Loss 2.2838 (2.1609)	Entropy 0.66660 (0.66772)	Top-1 acc 69.531 (72.758)	Top-5 acc 87.500 (89.105)	lr 0.00033
Train [112][610/3239]	Time 0.329 (0.710)	Data Time 0.001 (0.066)	Loss 2.3459 (2.1617)	Entropy 0.66649 (0.66770)	Top-1 acc 69.141 (72.749)	Top-5 acc 86.328 (89.092)	lr 0.00033
Train [112][620/3239]	Time 0.237 (0.707)	Data Time 0.001 (0.065)	Loss 2.2658 (2.1621)	Entropy 0.66651 (0.66768)	Top-1 acc 71.875 (72.746)	Top-5 acc 87.109 (89.081)	lr 0.00033
Train [112][630/3239]	Time 0.238 (0.703)	Data Time 0.001 (0.064)	Loss 2.2527 (2.1618)	Entropy 0.66645 (0.66766)	Top-1 acc 71.094 (72.757)	Top-5 acc 87.891 (89.097)	lr 0.00033
Train [112][640/3239]	Time 0.228 (0.700)	Data Time 0.001 (0.063)	Loss 2.2285 (2.1611)	Entropy 0.66634 (0.66764)	Top-1 acc 72.656 (72.773)	Top-5 acc 86.328 (89.103)	lr 0.00033
Train [112][650/3239]	Time 0.268 (0.696)	Data Time 0.001 (0.062)	Loss 2.1114 (2.1609)	Entropy 0.66638 (0.66762)	Top-1 acc 75.781 (72.767)	Top-5 acc 90.234 (89.111)	lr 0.00033
Train [112][660/3239]	Time 0.237 (0.693)	Data Time 0.001 (0.061)	Loss 2.1351 (2.1607)	Entropy 0.66627 (0.66760)	Top-1 acc 72.266 (72.776)	Top-5 acc 90.625 (89.117)	lr 0.00033
Train [112][670/3239]	Time 2.602 (0.690)	Data Time 0.001 (0.060)	Loss 2.1178 (2.1606)	Entropy 0.66627 (0.66758)	Top-1 acc 73.047 (72.774)	Top-5 acc 89.453 (89.113)	lr 0.00033
Train [112][680/3239]	Time 0.246 (0.684)	Data Time 0.001 (0.059)	Loss 2.0720 (2.1609)	Entropy 0.66632 (0.66756)	Top-1 acc 76.953 (72.764)	Top-5 acc 91.016 (89.106)	lr 0.00033
Train [112][690/3239]	Time 0.230 (0.681)	Data Time 0.001 (0.059)	Loss 2.0822 (2.1608)	Entropy 0.66621 (0.66754)	Top-1 acc 75.391 (72.767)	Top-5 acc 92.188 (89.109)	lr 0.00033
Train [112][700/3239]	Time 0.231 (0.678)	Data Time 0.001 (0.058)	Loss 2.1775 (2.1610)	Entropy 0.66621 (0.66752)	Top-1 acc 72.266 (72.762)	Top-5 acc 89.062 (89.108)	lr 0.00033
Train [112][710/3239]	Time 0.243 (0.675)	Data Time 0.001 (0.057)	Loss 2.0805 (2.1606)	Entropy 0.66615 (0.66750)	Top-1 acc 75.781 (72.760)	Top-5 acc 90.625 (89.109)	lr 0.00033
Train [112][720/3239]	Time 0.246 (0.673)	Data Time 0.002 (0.056)	Loss 2.1891 (2.1606)	Entropy 0.66614 (0.66749)	Top-1 acc 71.094 (72.764)	Top-5 acc 88.672 (89.113)	lr 0.00033
Train [112][730/3239]	Time 0.289 (0.670)	Data Time 0.001 (0.056)	Loss 2.1780 (2.1604)	Entropy 0.66618 (0.66747)	Top-1 acc 75.000 (72.762)	Top-5 acc 91.406 (89.124)	lr 0.00033
Train [112][740/3239]	Time 0.241 (0.743)	Data Time 0.002 (0.055)	Loss 2.2536 (2.1620)	Entropy 0.66618 (0.66745)	Top-1 acc 70.312 (72.706)	Top-5 acc 86.719 (89.099)	lr 0.00033
Train [112][750/3239]	Time 0.238 (0.739)	Data Time 0.002 (0.054)	Loss 2.1888 (2.1625)	Entropy 0.66617 (0.66743)	Top-1 acc 70.312 (72.692)	Top-5 acc 88.281 (89.087)	lr 0.00033
Train [112][760/3239]	Time 0.238 (0.736)	Data Time 0.001 (0.053)	Loss 2.0702 (2.1619)	Entropy 0.66609 (0.66742)	Top-1 acc 78.906 (72.712)	Top-5 acc 89.844 (89.086)	lr 0.00033
Train [112][770/3239]	Time 0.229 (0.733)	Data Time 0.001 (0.053)	Loss 2.1025 (2.1618)	Entropy 0.66613 (0.66740)	Top-1 acc 75.781 (72.713)	Top-5 acc 90.625 (89.098)	lr 0.00033
Train [112][780/3239]	Time 2.555 (0.730)	Data Time 0.001 (0.052)	Loss 2.0445 (2.1620)	Entropy 0.66613 (0.66738)	Top-1 acc 75.000 (72.715)	Top-5 acc 93.359 (89.097)	lr 0.00033
Train [112][790/3239]	Time 0.250 (0.724)	Data Time 0.001 (0.051)	Loss 2.0673 (2.1612)	Entropy 0.66611 (0.66737)	Top-1 acc 73.828 (72.741)	Top-5 acc 89.453 (89.112)	lr 0.00033
Train [112][800/3239]	Time 0.235 (0.721)	Data Time 0.001 (0.051)	Loss 2.0351 (2.1612)	Entropy 0.66610 (0.66735)	Top-1 acc 75.781 (72.737)	Top-5 acc 89.844 (89.109)	lr 0.00033
Train [112][810/3239]	Time 0.254 (0.718)	Data Time 0.001 (0.050)	Loss 2.2136 (2.1607)	Entropy 0.66605 (0.66733)	Top-1 acc 71.484 (72.732)	Top-5 acc 89.062 (89.126)	lr 0.00033
Train [112][820/3239]	Time 0.335 (0.715)	Data Time 0.001 (0.050)	Loss 2.2181 (2.1608)	Entropy 0.66607 (0.66732)	Top-1 acc 67.578 (72.727)	Top-5 acc 87.109 (89.120)	lr 0.00033
Train [112][830/3239]	Time 0.227 (0.712)	Data Time 0.001 (0.049)	Loss 2.1808 (2.1608)	Entropy 0.66606 (0.66730)	Top-1 acc 76.562 (72.739)	Top-5 acc 86.719 (89.119)	lr 0.00033
Train [112][840/3239]	Time 0.238 (0.709)	Data Time 0.001 (0.048)	Loss 2.0771 (2.1610)	Entropy 0.66603 (0.66729)	Top-1 acc 73.828 (72.738)	Top-5 acc 90.625 (89.118)	lr 0.00033
Train [112][850/3239]	Time 0.261 (0.707)	Data Time 0.001 (0.048)	Loss 2.0237 (2.1613)	Entropy 0.66594 (0.66727)	Top-1 acc 76.953 (72.733)	Top-5 acc 90.234 (89.106)	lr 0.00033
Train [112][860/3239]	Time 0.328 (0.704)	Data Time 0.001 (0.047)	Loss 2.1691 (2.1609)	Entropy 0.66599 (0.66726)	Top-1 acc 73.828 (72.741)	Top-5 acc 87.109 (89.109)	lr 0.00033
Train [112][870/3239]	Time 0.235 (0.702)	Data Time 0.001 (0.047)	Loss 2.1856 (2.1610)	Entropy 0.66600 (0.66724)	Top-1 acc 73.438 (72.748)	Top-5 acc 86.328 (89.104)	lr 0.00033
Train [112][880/3239]	Time 0.234 (0.699)	Data Time 0.001 (0.046)	Loss 2.1869 (2.1605)	Entropy 0.66603 (0.66723)	Top-1 acc 71.875 (72.765)	Top-5 acc 87.891 (89.110)	lr 0.00032
Train [112][890/3239]	Time 2.541 (0.697)	Data Time 0.001 (0.046)	Loss 2.1400 (2.1601)	Entropy 0.66603 (0.66722)	Top-1 acc 73.828 (72.779)	Top-5 acc 91.797 (89.121)	lr 0.00032
Train [112][900/3239]	Time 0.340 (0.692)	Data Time 0.001 (0.045)	Loss 2.1014 (2.1595)	Entropy 0.66601 (0.66720)	Top-1 acc 74.609 (72.795)	Top-5 acc 92.578 (89.127)	lr 0.00032
Train [112][910/3239]	Time 0.225 (0.689)	Data Time 0.001 (0.045)	Loss 2.3828 (2.1597)	Entropy 0.66596 (0.66719)	Top-1 acc 63.672 (72.787)	Top-5 acc 85.156 (89.127)	lr 0.00032
Train [112][920/3239]	Time 0.226 (0.687)	Data Time 0.001 (0.044)	Loss 2.0507 (2.1599)	Entropy 0.66582 (0.66717)	Top-1 acc 75.391 (72.782)	Top-5 acc 89.844 (89.126)	lr 0.00032
Train [112][930/3239]	Time 0.247 (0.685)	Data Time 0.002 (0.044)	Loss 2.1114 (2.1596)	Entropy 0.66578 (0.66716)	Top-1 acc 73.047 (72.788)	Top-5 acc 90.234 (89.133)	lr 0.00032
Train [112][940/3239]	Time 0.314 (0.683)	Data Time 0.001 (0.043)	Loss 2.3072 (2.1592)	Entropy 0.66577 (0.66715)	Top-1 acc 67.578 (72.792)	Top-5 acc 87.109 (89.138)	lr 0.00032
Train [112][950/3239]	Time 0.228 (0.681)	Data Time 0.001 (0.043)	Loss 2.1287 (2.1592)	Entropy 0.66571 (0.66713)	Top-1 acc 75.000 (72.781)	Top-5 acc 89.844 (89.145)	lr 0.00032
Train [112][960/3239]	Time 0.237 (0.678)	Data Time 0.001 (0.043)	Loss 2.0336 (2.1587)	Entropy 0.66570 (0.66712)	Top-1 acc 78.516 (72.796)	Top-5 acc 91.016 (89.155)	lr 0.00032
Train [112][970/3239]	Time 0.225 (0.676)	Data Time 0.001 (0.042)	Loss 2.1277 (2.1585)	Entropy 0.66567 (0.66710)	Top-1 acc 73.047 (72.804)	Top-5 acc 89.062 (89.155)	lr 0.00032
Train [112][980/3239]	Time 0.234 (0.674)	Data Time 0.001 (0.042)	Loss 2.1710 (2.1587)	Entropy 0.66556 (0.66709)	Top-1 acc 71.094 (72.785)	Top-5 acc 88.672 (89.156)	lr 0.00032
Train [112][990/3239]	Time 0.246 (0.672)	Data Time 0.001 (0.041)	Loss 2.1630 (2.1587)	Entropy 0.66553 (0.66707)	Top-1 acc 72.266 (72.784)	Top-5 acc 88.672 (89.154)	lr 0.00032
Train [112][1000/3239]	Time 2.619 (0.670)	Data Time 0.002 (0.041)	Loss 2.0773 (2.1590)	Entropy 0.66553 (0.66706)	Top-1 acc 74.219 (72.773)	Top-5 acc 89.453 (89.153)	lr 0.00032
Train [112][1010/3239]	Time 0.237 (0.666)	Data Time 0.001 (0.041)	Loss 2.1195 (2.1585)	Entropy 0.66549 (0.66704)	Top-1 acc 76.172 (72.784)	Top-5 acc 90.625 (89.171)	lr 0.00032
Train [112][1020/3239]	Time 0.284 (0.664)	Data Time 0.001 (0.040)	Loss 2.1881 (2.1585)	Entropy 0.66559 (0.66703)	Top-1 acc 70.703 (72.775)	Top-5 acc 88.281 (89.172)	lr 0.00032
Train [112][1030/3239]	Time 0.359 (0.663)	Data Time 0.001 (0.040)	Loss 2.1604 (2.1585)	Entropy 0.66550 (0.66701)	Top-1 acc 71.484 (72.777)	Top-5 acc 89.453 (89.167)	lr 0.00032
Train [112][1040/3239]	Time 0.226 (0.661)	Data Time 0.001 (0.039)	Loss 2.3145 (2.1591)	Entropy 0.66543 (0.66700)	Top-1 acc 70.312 (72.768)	Top-5 acc 83.984 (89.154)	lr 0.00032
Train [112][1050/3239]	Time 0.228 (0.659)	Data Time 0.001 (0.039)	Loss 2.3014 (2.1596)	Entropy 0.66538 (0.66698)	Top-1 acc 70.312 (72.757)	Top-5 acc 86.328 (89.143)	lr 0.00032
Train [112][1060/3239]	Time 0.234 (0.657)	Data Time 0.001 (0.039)	Loss 2.1810 (2.1595)	Entropy 0.66531 (0.66697)	Top-1 acc 73.047 (72.760)	Top-5 acc 87.109 (89.148)	lr 0.00032
Train [112][1070/3239]	Time 0.233 (0.656)	Data Time 0.001 (0.038)	Loss 2.1645 (2.1597)	Entropy 0.66528 (0.66695)	Top-1 acc 72.656 (72.755)	Top-5 acc 90.625 (89.149)	lr 0.00032
Train [112][1080/3239]	Time 0.227 (0.654)	Data Time 0.001 (0.038)	Loss 2.1662 (2.1597)	Entropy 0.66518 (0.66693)	Top-1 acc 73.828 (72.750)	Top-5 acc 88.672 (89.152)	lr 0.00032
Train [112][1090/3239]	Time 0.241 (0.653)	Data Time 0.002 (0.038)	Loss 2.1523 (2.1595)	Entropy 0.66519 (0.66692)	Top-1 acc 73.438 (72.755)	Top-5 acc 89.453 (89.157)	lr 0.00032
Train [112][1100/3239]	Time 0.268 (0.697)	Data Time 0.004 (0.037)	Loss 2.1034 (2.1594)	Entropy 0.66517 (0.66690)	Top-1 acc 76.562 (72.763)	Top-5 acc 89.453 (89.160)	lr 0.00032
Train [112][1110/3239]	Time 3.677 (0.697)	Data Time 0.003 (0.037)	Loss 2.3237 (2.1593)	Entropy 0.66517 (0.66689)	Top-1 acc 70.703 (72.769)	Top-5 acc 84.375 (89.158)	lr 0.00032
Train [112][1120/3239]	Time 0.283 (0.693)	Data Time 0.002 (0.037)	Loss 2.1861 (2.1592)	Entropy 0.66508 (0.66687)	Top-1 acc 71.484 (72.765)	Top-5 acc 89.453 (89.157)	lr 0.00032
Train [112][1130/3239]	Time 0.244 (0.691)	Data Time 0.002 (0.036)	Loss 2.1892 (2.1591)	Entropy 0.66503 (0.66685)	Top-1 acc 71.875 (72.771)	Top-5 acc 86.328 (89.159)	lr 0.00032
Train [112][1140/3239]	Time 0.244 (0.689)	Data Time 0.002 (0.036)	Loss 2.2548 (2.1598)	Entropy 0.66500 (0.66684)	Top-1 acc 67.969 (72.748)	Top-5 acc 88.672 (89.147)	lr 0.00032
Train [112][1150/3239]	Time 0.336 (0.688)	Data Time 0.002 (0.036)	Loss 2.2649 (2.1596)	Entropy 0.66490 (0.66682)	Top-1 acc 69.531 (72.759)	Top-5 acc 85.938 (89.146)	lr 0.00032
Train [112][1160/3239]	Time 0.241 (0.686)	Data Time 0.001 (0.036)	Loss 2.2103 (2.1597)	Entropy 0.66484 (0.66680)	Top-1 acc 67.969 (72.758)	Top-5 acc 88.281 (89.143)	lr 0.00032
Train [112][1170/3239]	Time 0.235 (0.684)	Data Time 0.001 (0.035)	Loss 2.1776 (2.1597)	Entropy 0.66478 (0.66679)	Top-1 acc 71.094 (72.761)	Top-5 acc 85.938 (89.139)	lr 0.00032
Train [112][1180/3239]	Time 0.237 (0.682)	Data Time 0.001 (0.035)	Loss 2.0874 (2.1594)	Entropy 0.66479 (0.66677)	Top-1 acc 73.438 (72.770)	Top-5 acc 92.578 (89.144)	lr 0.00032
Train [112][1190/3239]	Time 0.320 (0.681)	Data Time 0.001 (0.035)	Loss 2.1703 (2.1592)	Entropy 0.66477 (0.66675)	Top-1 acc 69.531 (72.768)	Top-5 acc 89.062 (89.148)	lr 0.00032
Train [112][1200/3239]	Time 0.231 (0.679)	Data Time 0.001 (0.034)	Loss 2.2231 (2.1594)	Entropy 0.66481 (0.66674)	Top-1 acc 70.703 (72.770)	Top-5 acc 88.672 (89.147)	lr 0.00032
Train [112][1210/3239]	Time 0.239 (0.678)	Data Time 0.001 (0.034)	Loss 2.2126 (2.1595)	Entropy 0.66479 (0.66672)	Top-1 acc 70.703 (72.766)	Top-5 acc 88.672 (89.143)	lr 0.00032
Train [112][1220/3239]	Time 2.585 (0.676)	Data Time 0.003 (0.034)	Loss 2.0899 (2.1593)	Entropy 0.66479 (0.66671)	Top-1 acc 75.391 (72.771)	Top-5 acc 89.062 (89.143)	lr 0.00032
Train [112][1230/3239]	Time 0.234 (0.672)	Data Time 0.002 (0.034)	Loss 1.9976 (2.1596)	Entropy 0.66477 (0.66669)	Top-1 acc 76.953 (72.753)	Top-5 acc 91.797 (89.142)	lr 0.00032
Train [112][1240/3239]	Time 0.233 (0.671)	Data Time 0.001 (0.033)	Loss 2.2644 (2.1597)	Entropy 0.66472 (0.66667)	Top-1 acc 69.531 (72.753)	Top-5 acc 88.672 (89.140)	lr 0.00032
Train [112][1250/3239]	Time 0.241 (0.669)	Data Time 0.001 (0.033)	Loss 2.1637 (2.1596)	Entropy 0.66475 (0.66666)	Top-1 acc 72.266 (72.754)	Top-5 acc 90.625 (89.142)	lr 0.00032
Train [112][1260/3239]	Time 0.226 (0.668)	Data Time 0.001 (0.033)	Loss 2.0818 (2.1598)	Entropy 0.66472 (0.66664)	Top-1 acc 76.172 (72.754)	Top-5 acc 91.406 (89.139)	lr 0.00032
Train [112][1270/3239]	Time 0.231 (0.666)	Data Time 0.001 (0.033)	Loss 2.0661 (2.1597)	Entropy 0.66475 (0.66663)	Top-1 acc 76.562 (72.763)	Top-5 acc 89.453 (89.140)	lr 0.00032
Train [112][1280/3239]	Time 0.223 (0.665)	Data Time 0.001 (0.032)	Loss 2.2337 (2.1600)	Entropy 0.66358 (0.66661)	Top-1 acc 69.531 (72.754)	Top-5 acc 87.500 (89.134)	lr 0.00032
Train [112][1290/3239]	Time 0.231 (0.664)	Data Time 0.001 (0.032)	Loss 2.2062 (2.1599)	Entropy 0.66354 (0.66659)	Top-1 acc 70.312 (72.755)	Top-5 acc 87.500 (89.134)	lr 0.00032
Train [112][1300/3239]	Time 0.237 (0.662)	Data Time 0.001 (0.032)	Loss 2.1989 (2.1603)	Entropy 0.66361 (0.66656)	Top-1 acc 71.875 (72.750)	Top-5 acc 86.719 (89.122)	lr 0.00032
Train [112][1310/3239]	Time 0.232 (0.661)	Data Time 0.001 (0.032)	Loss 2.0389 (2.1602)	Entropy 0.66354 (0.66654)	Top-1 acc 78.516 (72.750)	Top-5 acc 92.578 (89.126)	lr 0.00032
Train [112][1320/3239]	Time 0.226 (0.659)	Data Time 0.001 (0.031)	Loss 2.2915 (2.1605)	Entropy 0.66360 (0.66652)	Top-1 acc 69.531 (72.741)	Top-5 acc 87.891 (89.124)	lr 0.00031
Train [112][1330/3239]	Time 2.589 (0.658)	Data Time 0.001 (0.031)	Loss 2.1765 (2.1604)	Entropy 0.66360 (0.66650)	Top-1 acc 70.312 (72.741)	Top-5 acc 91.406 (89.126)	lr 0.00031
Train [112][1340/3239]	Time 0.266 (0.655)	Data Time 0.001 (0.031)	Loss 2.1085 (2.1606)	Entropy 0.66350 (0.66647)	Top-1 acc 72.656 (72.729)	Top-5 acc 91.016 (89.125)	lr 0.00031
Train [112][1350/3239]	Time 0.230 (0.654)	Data Time 0.001 (0.031)	Loss 2.1608 (2.1609)	Entropy 0.66353 (0.66645)	Top-1 acc 71.484 (72.720)	Top-5 acc 89.453 (89.119)	lr 0.00031
Train [112][1360/3239]	Time 0.331 (0.653)	Data Time 0.002 (0.031)	Loss 2.0736 (2.1608)	Entropy 0.66349 (0.66643)	Top-1 acc 75.000 (72.721)	Top-5 acc 91.016 (89.126)	lr 0.00031
Train [112][1370/3239]	Time 0.243 (0.652)	Data Time 0.001 (0.030)	Loss 2.1992 (2.1606)	Entropy 0.66346 (0.66641)	Top-1 acc 71.094 (72.724)	Top-5 acc 87.109 (89.127)	lr 0.00031
Train [112][1380/3239]	Time 0.241 (0.650)	Data Time 0.002 (0.030)	Loss 2.0220 (2.1606)	Entropy 0.66349 (0.66639)	Top-1 acc 74.219 (72.725)	Top-5 acc 92.188 (89.131)	lr 0.00031
Train [112][1390/3239]	Time 0.236 (0.649)	Data Time 0.001 (0.030)	Loss 2.1909 (2.1608)	Entropy 0.66336 (0.66637)	Top-1 acc 70.703 (72.716)	Top-5 acc 89.062 (89.131)	lr 0.00031
Train [112][1400/3239]	Time 0.327 (0.648)	Data Time 0.001 (0.030)	Loss 1.9781 (2.1609)	Entropy 0.66332 (0.66634)	Top-1 acc 76.953 (72.719)	Top-5 acc 91.797 (89.124)	lr 0.00031
Train [112][1410/3239]	Time 0.237 (0.647)	Data Time 0.001 (0.030)	Loss 2.1135 (2.1610)	Entropy 0.66336 (0.66632)	Top-1 acc 73.828 (72.712)	Top-5 acc 91.406 (89.119)	lr 0.00031
Train [112][1420/3239]	Time 0.222 (0.646)	Data Time 0.001 (0.029)	Loss 2.0461 (2.1607)	Entropy 0.66335 (0.66630)	Top-1 acc 74.609 (72.724)	Top-5 acc 89.844 (89.124)	lr 0.00031
Train [112][1430/3239]	Time 0.244 (0.645)	Data Time 0.001 (0.029)	Loss 2.1407 (2.1611)	Entropy 0.66338 (0.66628)	Top-1 acc 73.828 (72.714)	Top-5 acc 90.625 (89.118)	lr 0.00031
Train [112][1440/3239]	Time 2.625 (0.644)	Data Time 0.001 (0.029)	Loss 2.0366 (2.1608)	Entropy 0.66338 (0.66626)	Top-1 acc 75.781 (72.721)	Top-5 acc 91.016 (89.116)	lr 0.00031
Train [112][1450/3239]	Time 0.241 (0.641)	Data Time 0.001 (0.029)	Loss 2.0572 (2.1610)	Entropy 0.66337 (0.66624)	Top-1 acc 76.953 (72.717)	Top-5 acc 90.625 (89.112)	lr 0.00031
Train [112][1460/3239]	Time 0.231 (0.640)	Data Time 0.001 (0.029)	Loss 2.1252 (2.1611)	Entropy 0.66333 (0.66622)	Top-1 acc 73.047 (72.711)	Top-5 acc 89.453 (89.113)	lr 0.00031
Train [112][1470/3239]	Time 0.288 (0.676)	Data Time 0.002 (0.028)	Loss 2.1316 (2.1611)	Entropy 0.66331 (0.66620)	Top-1 acc 74.609 (72.711)	Top-5 acc 89.453 (89.113)	lr 0.00031
Train [112][1480/3239]	Time 0.384 (0.675)	Data Time 0.002 (0.028)	Loss 2.2471 (2.1610)	Entropy 0.66326 (0.66618)	Top-1 acc 71.094 (72.711)	Top-5 acc 87.891 (89.119)	lr 0.00031
Train [112][1490/3239]	Time 0.250 (0.673)	Data Time 0.002 (0.028)	Loss 2.2212 (2.1610)	Entropy 0.66321 (0.66616)	Top-1 acc 73.047 (72.717)	Top-5 acc 86.719 (89.116)	lr 0.00031
Train [112][1500/3239]	Time 0.236 (0.672)	Data Time 0.001 (0.028)	Loss 2.1279 (2.1608)	Entropy 0.66319 (0.66614)	Top-1 acc 75.391 (72.725)	Top-5 acc 90.625 (89.123)	lr 0.00031
Train [112][1510/3239]	Time 0.246 (0.671)	Data Time 0.001 (0.028)	Loss 2.0734 (2.1609)	Entropy 0.66317 (0.66612)	Top-1 acc 75.391 (72.721)	Top-5 acc 91.016 (89.122)	lr 0.00031
Train [112][1520/3239]	Time 0.339 (0.670)	Data Time 0.001 (0.028)	Loss 2.1980 (2.1610)	Entropy 0.66316 (0.66610)	Top-1 acc 68.359 (72.720)	Top-5 acc 87.891 (89.118)	lr 0.00031
Train [112][1530/3239]	Time 0.238 (0.669)	Data Time 0.001 (0.027)	Loss 2.1358 (2.1611)	Entropy 0.66316 (0.66608)	Top-1 acc 71.875 (72.719)	Top-5 acc 89.453 (89.118)	lr 0.00031
Train [112][1540/3239]	Time 0.236 (0.667)	Data Time 0.001 (0.027)	Loss 2.2391 (2.1612)	Entropy 0.66319 (0.66606)	Top-1 acc 68.359 (72.720)	Top-5 acc 87.109 (89.116)	lr 0.00031
Train [112][1550/3239]	Time 2.575 (0.666)	Data Time 0.001 (0.027)	Loss 2.1868 (2.1612)	Entropy 0.66319 (0.66605)	Top-1 acc 69.531 (72.712)	Top-5 acc 87.500 (89.111)	lr 0.00031
Train [112][1560/3239]	Time 0.240 (0.664)	Data Time 0.001 (0.027)	Loss 2.1614 (2.1610)	Entropy 0.66322 (0.66603)	Top-1 acc 73.438 (72.718)	Top-5 acc 87.891 (89.112)	lr 0.00031
Train [112][1570/3239]	Time 0.248 (0.662)	Data Time 0.001 (0.027)	Loss 2.1127 (2.1609)	Entropy 0.66318 (0.66601)	Top-1 acc 75.781 (72.719)	Top-5 acc 91.406 (89.115)	lr 0.00031
Train [112][1580/3239]	Time 0.227 (0.661)	Data Time 0.001 (0.027)	Loss 2.2420 (2.1609)	Entropy 0.66317 (0.66599)	Top-1 acc 73.047 (72.716)	Top-5 acc 86.719 (89.114)	lr 0.00031
Train [112][1590/3239]	Time 0.228 (0.660)	Data Time 0.001 (0.026)	Loss 2.1937 (2.1608)	Entropy 0.66321 (0.66597)	Top-1 acc 71.875 (72.711)	Top-5 acc 89.062 (89.115)	lr 0.00031
Train [112][1600/3239]	Time 0.236 (0.659)	Data Time 0.001 (0.026)	Loss 2.0299 (2.1608)	Entropy 0.66308 (0.66596)	Top-1 acc 74.219 (72.714)	Top-5 acc 91.406 (89.118)	lr 0.00031
Train [112][1610/3239]	Time 0.229 (0.658)	Data Time 0.001 (0.026)	Loss 2.0275 (2.1606)	Entropy 0.66308 (0.66594)	Top-1 acc 80.078 (72.721)	Top-5 acc 92.188 (89.125)	lr 0.00031
Train [112][1620/3239]	Time 0.244 (0.657)	Data Time 0.001 (0.026)	Loss 2.1866 (2.1607)	Entropy 0.66303 (0.66592)	Top-1 acc 71.094 (72.718)	Top-5 acc 89.062 (89.125)	lr 0.00031
Train [112][1630/3239]	Time 0.232 (0.656)	Data Time 0.001 (0.026)	Loss 2.2680 (2.1609)	Entropy 0.66292 (0.66590)	Top-1 acc 67.578 (72.713)	Top-5 acc 87.891 (89.121)	lr 0.00031
Train [112][1640/3239]	Time 0.225 (0.655)	Data Time 0.001 (0.026)	Loss 2.1663 (2.1613)	Entropy 0.66289 (0.66588)	Top-1 acc 71.875 (72.703)	Top-5 acc 89.062 (89.112)	lr 0.00031
Train [112][1650/3239]	Time 0.229 (0.654)	Data Time 0.001 (0.025)	Loss 2.1722 (2.1614)	Entropy 0.66282 (0.66587)	Top-1 acc 71.094 (72.699)	Top-5 acc 89.453 (89.109)	lr 0.00031
Train [112][1660/3239]	Time 2.581 (0.653)	Data Time 0.002 (0.025)	Loss 2.1978 (2.1614)	Entropy 0.66282 (0.66585)	Top-1 acc 71.484 (72.699)	Top-5 acc 89.844 (89.111)	lr 0.00031
Train [112][1670/3239]	Time 0.228 (0.650)	Data Time 0.001 (0.025)	Loss 2.1227 (2.1613)	Entropy 0.66276 (0.66583)	Top-1 acc 75.000 (72.703)	Top-5 acc 90.625 (89.113)	lr 0.00031
Train [112][1680/3239]	Time 0.240 (0.649)	Data Time 0.001 (0.025)	Loss 2.1606 (2.1614)	Entropy 0.66274 (0.66581)	Top-1 acc 74.609 (72.702)	Top-5 acc 87.500 (89.111)	lr 0.00031
Train [112][1690/3239]	Time 0.333 (0.649)	Data Time 0.001 (0.025)	Loss 2.0230 (2.1615)	Entropy 0.66275 (0.66579)	Top-1 acc 75.781 (72.695)	Top-5 acc 91.016 (89.111)	lr 0.00031
Train [112][1700/3239]	Time 0.229 (0.647)	Data Time 0.001 (0.025)	Loss 2.2431 (2.1615)	Entropy 0.66271 (0.66578)	Top-1 acc 71.094 (72.697)	Top-5 acc 89.062 (89.108)	lr 0.00031
Train [112][1710/3239]	Time 0.233 (0.647)	Data Time 0.001 (0.025)	Loss 2.1826 (2.1616)	Entropy 0.66268 (0.66576)	Top-1 acc 69.531 (72.694)	Top-5 acc 90.234 (89.105)	lr 0.00031
Train [112][1720/3239]	Time 0.258 (0.646)	Data Time 0.002 (0.024)	Loss 2.2269 (2.1615)	Entropy 0.66266 (0.66574)	Top-1 acc 73.828 (72.699)	Top-5 acc 87.891 (89.107)	lr 0.00031
Train [112][1730/3239]	Time 0.345 (0.645)	Data Time 0.002 (0.024)	Loss 2.0175 (2.1616)	Entropy 0.66262 (0.66572)	Top-1 acc 75.000 (72.698)	Top-5 acc 90.625 (89.107)	lr 0.00031
Train [112][1740/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.024)	Loss 2.1071 (2.1617)	Entropy 0.66261 (0.66570)	Top-1 acc 72.266 (72.694)	Top-5 acc 91.406 (89.108)	lr 0.00031
Train [112][1750/3239]	Time 0.218 (0.643)	Data Time 0.001 (0.024)	Loss 2.2031 (2.1615)	Entropy 0.66258 (0.66569)	Top-1 acc 69.141 (72.695)	Top-5 acc 89.844 (89.112)	lr 0.00031
Train [112][1760/3239]	Time 0.291 (0.642)	Data Time 0.001 (0.024)	Loss 2.3131 (2.1616)	Entropy 0.66257 (0.66567)	Top-1 acc 70.703 (72.694)	Top-5 acc 86.719 (89.113)	lr 0.00031
Train [112][1770/3239]	Time 2.705 (0.642)	Data Time 0.001 (0.024)	Loss 2.2162 (2.1616)	Entropy 0.66257 (0.66565)	Top-1 acc 73.047 (72.693)	Top-5 acc 89.844 (89.108)	lr 0.00030
Train [112][1780/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.024)	Loss 2.0752 (2.1616)	Entropy 0.66256 (0.66563)	Top-1 acc 75.781 (72.698)	Top-5 acc 91.016 (89.111)	lr 0.00030
Train [112][1790/3239]	Time 0.237 (0.638)	Data Time 0.001 (0.024)	Loss 2.0176 (2.1618)	Entropy 0.66256 (0.66562)	Top-1 acc 74.609 (72.690)	Top-5 acc 91.016 (89.107)	lr 0.00030
Train [112][1800/3239]	Time 0.238 (0.638)	Data Time 0.001 (0.023)	Loss 2.2332 (2.1619)	Entropy 0.66256 (0.66560)	Top-1 acc 71.094 (72.686)	Top-5 acc 87.891 (89.106)	lr 0.00030
Train [112][1810/3239]	Time 0.239 (0.637)	Data Time 0.001 (0.023)	Loss 2.3230 (2.1617)	Entropy 0.66260 (0.66558)	Top-1 acc 69.922 (72.686)	Top-5 acc 86.328 (89.109)	lr 0.00030
Train [112][1820/3239]	Time 0.240 (0.636)	Data Time 0.001 (0.023)	Loss 2.1208 (2.1616)	Entropy 0.66263 (0.66557)	Top-1 acc 72.266 (72.686)	Top-5 acc 90.625 (89.112)	lr 0.00030
Train [112][1830/3239]	Time 0.240 (0.665)	Data Time 0.002 (0.023)	Loss 2.2276 (2.1615)	Entropy 0.66264 (0.66555)	Top-1 acc 72.266 (72.685)	Top-5 acc 86.719 (89.115)	lr 0.00030
Train [112][1840/3239]	Time 0.243 (0.665)	Data Time 0.002 (0.023)	Loss 2.0888 (2.1614)	Entropy 0.66267 (0.66553)	Top-1 acc 74.219 (72.689)	Top-5 acc 91.797 (89.118)	lr 0.00030
Train [112][1850/3239]	Time 0.227 (0.664)	Data Time 0.001 (0.023)	Loss 2.0804 (2.1614)	Entropy 0.66239 (0.66552)	Top-1 acc 75.000 (72.687)	Top-5 acc 91.797 (89.121)	lr 0.00030
Train [112][1860/3239]	Time 0.232 (0.663)	Data Time 0.002 (0.023)	Loss 2.1690 (2.1615)	Entropy 0.66236 (0.66550)	Top-1 acc 74.219 (72.681)	Top-5 acc 89.453 (89.119)	lr 0.00030
Train [112][1870/3239]	Time 0.235 (0.662)	Data Time 0.001 (0.023)	Loss 2.2116 (2.1617)	Entropy 0.66234 (0.66549)	Top-1 acc 73.047 (72.678)	Top-5 acc 87.891 (89.114)	lr 0.00030
Train [112][1880/3239]	Time 2.569 (0.661)	Data Time 0.001 (0.023)	Loss 2.2647 (2.1619)	Entropy 0.66234 (0.66547)	Top-1 acc 68.750 (72.677)	Top-5 acc 87.109 (89.111)	lr 0.00030
Train [112][1890/3239]	Time 0.255 (0.659)	Data Time 0.005 (0.022)	Loss 2.1772 (2.1617)	Entropy 0.66224 (0.66545)	Top-1 acc 71.484 (72.679)	Top-5 acc 87.500 (89.113)	lr 0.00030
Train [112][1900/3239]	Time 0.231 (0.658)	Data Time 0.001 (0.022)	Loss 2.0977 (2.1614)	Entropy 0.66215 (0.66543)	Top-1 acc 74.609 (72.687)	Top-5 acc 90.625 (89.118)	lr 0.00030
Train [112][1910/3239]	Time 0.245 (0.657)	Data Time 0.001 (0.022)	Loss 2.1961 (2.1617)	Entropy 0.66219 (0.66542)	Top-1 acc 72.656 (72.683)	Top-5 acc 87.109 (89.113)	lr 0.00030
Train [112][1920/3239]	Time 0.238 (0.656)	Data Time 0.001 (0.022)	Loss 2.1837 (2.1617)	Entropy 0.66216 (0.66540)	Top-1 acc 69.141 (72.681)	Top-5 acc 89.062 (89.112)	lr 0.00030
Train [112][1930/3239]	Time 0.248 (0.655)	Data Time 0.001 (0.022)	Loss 2.2055 (2.1618)	Entropy 0.66216 (0.66538)	Top-1 acc 71.875 (72.682)	Top-5 acc 86.328 (89.107)	lr 0.00030
Train [112][1940/3239]	Time 0.233 (0.654)	Data Time 0.001 (0.022)	Loss 2.1347 (2.1619)	Entropy 0.66220 (0.66537)	Top-1 acc 71.484 (72.683)	Top-5 acc 89.844 (89.105)	lr 0.00030
Train [112][1950/3239]	Time 0.240 (0.653)	Data Time 0.001 (0.022)	Loss 2.1956 (2.1621)	Entropy 0.66222 (0.66535)	Top-1 acc 76.953 (72.684)	Top-5 acc 88.281 (89.100)	lr 0.00030
Train [112][1960/3239]	Time 0.218 (0.653)	Data Time 0.001 (0.022)	Loss 2.1597 (2.1623)	Entropy 0.66217 (0.66533)	Top-1 acc 70.703 (72.678)	Top-5 acc 91.016 (89.098)	lr 0.00030
Train [112][1970/3239]	Time 0.244 (0.652)	Data Time 0.001 (0.022)	Loss 2.0834 (2.1624)	Entropy 0.66214 (0.66532)	Top-1 acc 74.219 (72.676)	Top-5 acc 89.844 (89.094)	lr 0.00030
Train [112][1980/3239]	Time 0.329 (0.651)	Data Time 0.001 (0.021)	Loss 2.1601 (2.1626)	Entropy 0.66207 (0.66530)	Top-1 acc 73.828 (72.672)	Top-5 acc 85.938 (89.087)	lr 0.00030
Train [112][1990/3239]	Time 2.647 (0.650)	Data Time 0.001 (0.021)	Loss 2.2325 (2.1623)	Entropy 0.66207 (0.66529)	Top-1 acc 73.438 (72.677)	Top-5 acc 87.500 (89.094)	lr 0.00030
Train [112][2000/3239]	Time 0.219 (0.648)	Data Time 0.001 (0.021)	Loss 2.1115 (2.1624)	Entropy 0.66207 (0.66527)	Top-1 acc 69.922 (72.673)	Top-5 acc 89.844 (89.094)	lr 0.00030
Train [112][2010/3239]	Time 0.232 (0.647)	Data Time 0.001 (0.021)	Loss 2.3026 (2.1628)	Entropy 0.66200 (0.66525)	Top-1 acc 69.141 (72.662)	Top-5 acc 85.156 (89.089)	lr 0.00030
Train [112][2020/3239]	Time 0.336 (0.647)	Data Time 0.001 (0.021)	Loss 2.1235 (2.1627)	Entropy 0.66208 (0.66524)	Top-1 acc 73.047 (72.672)	Top-5 acc 87.500 (89.087)	lr 0.00030
Train [112][2030/3239]	Time 0.243 (0.646)	Data Time 0.001 (0.021)	Loss 2.2500 (2.1626)	Entropy 0.66203 (0.66522)	Top-1 acc 69.531 (72.674)	Top-5 acc 88.672 (89.089)	lr 0.00030
Train [112][2040/3239]	Time 0.247 (0.645)	Data Time 0.001 (0.021)	Loss 2.1735 (2.1626)	Entropy 0.66204 (0.66521)	Top-1 acc 73.438 (72.673)	Top-5 acc 87.500 (89.088)	lr 0.00030
Train [112][2050/3239]	Time 0.225 (0.644)	Data Time 0.001 (0.021)	Loss 2.1382 (2.1625)	Entropy 0.66205 (0.66519)	Top-1 acc 70.703 (72.677)	Top-5 acc 90.234 (89.092)	lr 0.00030
Train [112][2060/3239]	Time 0.330 (0.644)	Data Time 0.001 (0.021)	Loss 2.0923 (2.1623)	Entropy 0.66203 (0.66518)	Top-1 acc 76.562 (72.680)	Top-5 acc 89.844 (89.095)	lr 0.00030
Train [112][2070/3239]	Time 0.227 (0.643)	Data Time 0.001 (0.021)	Loss 1.9274 (2.1623)	Entropy 0.66206 (0.66516)	Top-1 acc 75.391 (72.679)	Top-5 acc 93.750 (89.096)	lr 0.00030
Train [112][2080/3239]	Time 0.233 (0.642)	Data Time 0.001 (0.021)	Loss 2.1180 (2.1623)	Entropy 0.66197 (0.66515)	Top-1 acc 75.781 (72.679)	Top-5 acc 89.453 (89.095)	lr 0.00030
Train [112][2090/3239]	Time 0.251 (0.641)	Data Time 0.001 (0.020)	Loss 2.1847 (2.1623)	Entropy 0.66201 (0.66513)	Top-1 acc 70.312 (72.680)	Top-5 acc 89.453 (89.095)	lr 0.00030
Train [112][2100/3239]	Time 2.729 (0.641)	Data Time 0.001 (0.020)	Loss 2.0834 (2.1622)	Entropy 0.66201 (0.66512)	Top-1 acc 76.172 (72.685)	Top-5 acc 90.625 (89.096)	lr 0.00030
Train [112][2110/3239]	Time 0.242 (0.639)	Data Time 0.001 (0.020)	Loss 2.1928 (2.1623)	Entropy 0.66204 (0.66510)	Top-1 acc 73.828 (72.677)	Top-5 acc 89.062 (89.094)	lr 0.00030
Train [112][2120/3239]	Time 0.243 (0.638)	Data Time 0.001 (0.020)	Loss 2.2278 (2.1624)	Entropy 0.66200 (0.66509)	Top-1 acc 71.484 (72.672)	Top-5 acc 87.891 (89.093)	lr 0.00030
Train [112][2130/3239]	Time 0.249 (0.637)	Data Time 0.002 (0.020)	Loss 2.0553 (2.1624)	Entropy 0.66193 (0.66507)	Top-1 acc 76.562 (72.675)	Top-5 acc 91.406 (89.093)	lr 0.00030
Train [112][2140/3239]	Time 0.232 (0.637)	Data Time 0.001 (0.020)	Loss 2.1737 (2.1624)	Entropy 0.66185 (0.66506)	Top-1 acc 75.000 (72.675)	Top-5 acc 88.281 (89.094)	lr 0.00030
Train [112][2150/3239]	Time 0.235 (0.636)	Data Time 0.001 (0.020)	Loss 2.2321 (2.1624)	Entropy 0.66182 (0.66504)	Top-1 acc 69.531 (72.672)	Top-5 acc 88.281 (89.095)	lr 0.00030
Train [112][2160/3239]	Time 0.239 (0.635)	Data Time 0.001 (0.020)	Loss 2.1991 (2.1623)	Entropy 0.66178 (0.66503)	Top-1 acc 70.312 (72.673)	Top-5 acc 89.844 (89.097)	lr 0.00030
Train [112][2170/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.020)	Loss 2.1522 (2.1625)	Entropy 0.66177 (0.66501)	Top-1 acc 74.219 (72.665)	Top-5 acc 90.625 (89.096)	lr 0.00030
Train [112][2180/3239]	Time 0.231 (0.634)	Data Time 0.001 (0.020)	Loss 2.1603 (2.1624)	Entropy 0.66176 (0.66500)	Top-1 acc 73.047 (72.667)	Top-5 acc 90.625 (89.099)	lr 0.00030
Train [112][2190/3239]	Time 0.236 (0.659)	Data Time 0.002 (0.020)	Loss 2.1900 (2.1623)	Entropy 0.66174 (0.66498)	Top-1 acc 71.875 (72.670)	Top-5 acc 89.453 (89.102)	lr 0.00030
Train [112][2200/3239]	Time 0.240 (0.658)	Data Time 0.002 (0.020)	Loss 2.2715 (2.1625)	Entropy 0.66168 (0.66497)	Top-1 acc 69.531 (72.662)	Top-5 acc 87.500 (89.099)	lr 0.00030
Train [112][2210/3239]	Time 2.584 (0.657)	Data Time 0.002 (0.019)	Loss 2.1770 (2.1624)	Entropy 0.66168 (0.66495)	Top-1 acc 74.219 (72.671)	Top-5 acc 89.453 (89.101)	lr 0.00030
Train [112][2220/3239]	Time 0.248 (0.656)	Data Time 0.002 (0.019)	Loss 2.0721 (2.1623)	Entropy 0.66169 (0.66494)	Top-1 acc 74.609 (72.675)	Top-5 acc 91.016 (89.103)	lr 0.00029
Train [112][2230/3239]	Time 0.245 (0.655)	Data Time 0.002 (0.019)	Loss 2.0796 (2.1623)	Entropy 0.66168 (0.66492)	Top-1 acc 74.219 (72.676)	Top-5 acc 91.797 (89.106)	lr 0.00029
Train [112][2240/3239]	Time 0.249 (0.654)	Data Time 0.001 (0.019)	Loss 1.9852 (2.1622)	Entropy 0.66162 (0.66491)	Top-1 acc 78.516 (72.678)	Top-5 acc 92.578 (89.109)	lr 0.00029
Train [112][2250/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.019)	Loss 2.0708 (2.1619)	Entropy 0.66161 (0.66489)	Top-1 acc 78.906 (72.682)	Top-5 acc 91.406 (89.112)	lr 0.00029
Train [112][2260/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.019)	Loss 2.1840 (2.1622)	Entropy 0.66158 (0.66488)	Top-1 acc 74.609 (72.676)	Top-5 acc 88.281 (89.108)	lr 0.00029
Train [112][2270/3239]	Time 0.314 (0.652)	Data Time 0.001 (0.019)	Loss 1.9781 (2.1619)	Entropy 0.66158 (0.66487)	Top-1 acc 76.953 (72.685)	Top-5 acc 90.234 (89.112)	lr 0.00029
Train [112][2280/3239]	Time 0.236 (0.651)	Data Time 0.001 (0.019)	Loss 2.3219 (2.1617)	Entropy 0.66158 (0.66485)	Top-1 acc 66.016 (72.690)	Top-5 acc 86.719 (89.116)	lr 0.00029
Train [112][2290/3239]	Time 0.228 (0.651)	Data Time 0.001 (0.019)	Loss 2.1310 (2.1618)	Entropy 0.66162 (0.66484)	Top-1 acc 69.922 (72.685)	Top-5 acc 91.406 (89.115)	lr 0.00029
Train [112][2300/3239]	Time 0.235 (0.650)	Data Time 0.001 (0.019)	Loss 2.0861 (2.1618)	Entropy 0.66158 (0.66482)	Top-1 acc 74.609 (72.685)	Top-5 acc 89.062 (89.114)	lr 0.00029
Train [112][2310/3239]	Time 0.325 (0.649)	Data Time 0.001 (0.019)	Loss 2.1703 (2.1619)	Entropy 0.66152 (0.66481)	Top-1 acc 75.000 (72.683)	Top-5 acc 87.500 (89.112)	lr 0.00029
Train [112][2320/3239]	Time 2.884 (0.649)	Data Time 0.001 (0.019)	Loss 2.2995 (2.1619)	Entropy 0.66152 (0.66479)	Top-1 acc 66.406 (72.678)	Top-5 acc 86.719 (89.110)	lr 0.00029
Train [112][2330/3239]	Time 0.257 (0.647)	Data Time 0.001 (0.019)	Loss 2.1938 (2.1620)	Entropy 0.66145 (0.66478)	Top-1 acc 74.609 (72.677)	Top-5 acc 91.016 (89.110)	lr 0.00029
Train [112][2340/3239]	Time 0.236 (0.646)	Data Time 0.001 (0.018)	Loss 2.2101 (2.1620)	Entropy 0.66140 (0.66477)	Top-1 acc 71.094 (72.674)	Top-5 acc 88.672 (89.109)	lr 0.00029
Train [112][2350/3239]	Time 0.381 (0.646)	Data Time 0.002 (0.018)	Loss 2.0838 (2.1618)	Entropy 0.66138 (0.66475)	Top-1 acc 73.438 (72.676)	Top-5 acc 92.188 (89.112)	lr 0.00029
Train [112][2360/3239]	Time 0.235 (0.645)	Data Time 0.001 (0.018)	Loss 2.1341 (2.1619)	Entropy 0.66139 (0.66474)	Top-1 acc 73.438 (72.676)	Top-5 acc 89.062 (89.111)	lr 0.00029
Train [112][2370/3239]	Time 0.232 (0.644)	Data Time 0.001 (0.018)	Loss 2.0286 (2.1620)	Entropy 0.66140 (0.66472)	Top-1 acc 76.172 (72.675)	Top-5 acc 91.797 (89.110)	lr 0.00029
Train [112][2380/3239]	Time 0.237 (0.644)	Data Time 0.006 (0.018)	Loss 2.0471 (2.1620)	Entropy 0.66140 (0.66471)	Top-1 acc 73.828 (72.674)	Top-5 acc 92.188 (89.112)	lr 0.00029
Train [112][2390/3239]	Time 0.345 (0.643)	Data Time 0.001 (0.018)	Loss 2.0531 (2.1621)	Entropy 0.66146 (0.66470)	Top-1 acc 74.609 (72.671)	Top-5 acc 91.797 (89.108)	lr 0.00029
Train [112][2400/3239]	Time 0.238 (0.642)	Data Time 0.001 (0.018)	Loss 2.2307 (2.1623)	Entropy 0.66141 (0.66468)	Top-1 acc 68.750 (72.666)	Top-5 acc 87.891 (89.107)	lr 0.00029
Train [112][2410/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.018)	Loss 2.1657 (2.1624)	Entropy 0.66141 (0.66467)	Top-1 acc 73.828 (72.664)	Top-5 acc 90.625 (89.106)	lr 0.00029
Train [112][2420/3239]	Time 0.259 (0.641)	Data Time 0.001 (0.018)	Loss 2.2385 (2.1624)	Entropy 0.66135 (0.66465)	Top-1 acc 69.922 (72.662)	Top-5 acc 86.719 (89.104)	lr 0.00029
Train [112][2430/3239]	Time 2.808 (0.641)	Data Time 0.001 (0.018)	Loss 2.1230 (2.1626)	Entropy 0.66135 (0.66464)	Top-1 acc 71.094 (72.660)	Top-5 acc 91.797 (89.101)	lr 0.00029
Train [112][2440/3239]	Time 0.252 (0.639)	Data Time 0.001 (0.018)	Loss 2.0936 (2.1626)	Entropy 0.66130 (0.66463)	Top-1 acc 73.438 (72.650)	Top-5 acc 92.188 (89.102)	lr 0.00029
Train [112][2450/3239]	Time 0.226 (0.638)	Data Time 0.001 (0.018)	Loss 2.0571 (2.1625)	Entropy 0.66135 (0.66461)	Top-1 acc 75.000 (72.655)	Top-5 acc 91.016 (89.103)	lr 0.00029
Train [112][2460/3239]	Time 0.252 (0.638)	Data Time 0.002 (0.018)	Loss 2.1143 (2.1624)	Entropy 0.66133 (0.66460)	Top-1 acc 71.484 (72.654)	Top-5 acc 90.625 (89.103)	lr 0.00029
Train [112][2470/3239]	Time 0.246 (0.637)	Data Time 0.001 (0.018)	Loss 1.9652 (2.1623)	Entropy 0.66138 (0.66459)	Top-1 acc 78.516 (72.658)	Top-5 acc 93.359 (89.105)	lr 0.00029
Train [112][2480/3239]	Time 0.230 (0.637)	Data Time 0.001 (0.017)	Loss 2.1437 (2.1622)	Entropy 0.66135 (0.66457)	Top-1 acc 78.516 (72.664)	Top-5 acc 87.500 (89.109)	lr 0.00029
Train [112][2490/3239]	Time 0.268 (0.636)	Data Time 0.003 (0.017)	Loss 2.1419 (2.1621)	Entropy 0.66121 (0.66456)	Top-1 acc 74.219 (72.663)	Top-5 acc 90.234 (89.113)	lr 0.00029
Train [112][2500/3239]	Time 0.222 (0.635)	Data Time 0.004 (0.017)	Loss 2.2629 (2.1620)	Entropy 0.66116 (0.66455)	Top-1 acc 69.141 (72.664)	Top-5 acc 89.844 (89.113)	lr 0.00029
Train [112][2510/3239]	Time 0.239 (0.635)	Data Time 0.001 (0.017)	Loss 2.2629 (2.1620)	Entropy 0.66105 (0.66453)	Top-1 acc 67.188 (72.661)	Top-5 acc 88.281 (89.113)	lr 0.00029
Train [112][2520/3239]	Time 0.236 (0.634)	Data Time 0.001 (0.017)	Loss 2.1082 (2.1620)	Entropy 0.66103 (0.66452)	Top-1 acc 71.484 (72.661)	Top-5 acc 90.625 (89.114)	lr 0.00029
Train [112][2530/3239]	Time 0.227 (0.634)	Data Time 0.001 (0.017)	Loss 2.2236 (2.1620)	Entropy 0.66096 (0.66451)	Top-1 acc 70.703 (72.659)	Top-5 acc 86.719 (89.111)	lr 0.00029
Train [112][2540/3239]	Time 2.636 (0.633)	Data Time 0.001 (0.017)	Loss 2.2768 (2.1621)	Entropy 0.66096 (0.66449)	Top-1 acc 67.969 (72.657)	Top-5 acc 86.719 (89.108)	lr 0.00029
Train [112][2550/3239]	Time 0.259 (0.632)	Data Time 0.001 (0.017)	Loss 2.1394 (2.1621)	Entropy 0.66098 (0.66448)	Top-1 acc 74.609 (72.655)	Top-5 acc 88.672 (89.107)	lr 0.00029
Train [112][2560/3239]	Time 0.275 (0.652)	Data Time 0.002 (0.017)	Loss 2.1313 (2.1621)	Entropy 0.66099 (0.66447)	Top-1 acc 74.219 (72.656)	Top-5 acc 89.062 (89.107)	lr 0.00029
Train [112][2570/3239]	Time 0.236 (0.651)	Data Time 0.002 (0.017)	Loss 2.0329 (2.1620)	Entropy 0.66094 (0.66445)	Top-1 acc 75.391 (72.658)	Top-5 acc 89.453 (89.107)	lr 0.00029
Train [112][2580/3239]	Time 0.240 (0.651)	Data Time 0.001 (0.017)	Loss 1.9815 (2.1621)	Entropy 0.66091 (0.66444)	Top-1 acc 76.953 (72.653)	Top-5 acc 90.625 (89.104)	lr 0.00029
Train [112][2590/3239]	Time 0.241 (0.650)	Data Time 0.001 (0.017)	Loss 2.2415 (2.1621)	Entropy 0.66084 (0.66442)	Top-1 acc 71.875 (72.652)	Top-5 acc 87.891 (89.101)	lr 0.00029
Train [112][2600/3239]	Time 0.334 (0.649)	Data Time 0.001 (0.017)	Loss 2.0901 (2.1621)	Entropy 0.66080 (0.66441)	Top-1 acc 74.219 (72.654)	Top-5 acc 91.016 (89.102)	lr 0.00029
Train [112][2610/3239]	Time 0.240 (0.649)	Data Time 0.001 (0.017)	Loss 2.2234 (2.1619)	Entropy 0.66069 (0.66440)	Top-1 acc 71.875 (72.656)	Top-5 acc 87.500 (89.106)	lr 0.00029
Train [112][2620/3239]	Time 0.244 (0.648)	Data Time 0.002 (0.017)	Loss 2.1479 (2.1619)	Entropy 0.66064 (0.66438)	Top-1 acc 73.828 (72.658)	Top-5 acc 88.281 (89.104)	lr 0.00029
Train [112][2630/3239]	Time 0.241 (0.648)	Data Time 0.001 (0.017)	Loss 2.1307 (2.1619)	Entropy 0.66057 (0.66437)	Top-1 acc 73.047 (72.658)	Top-5 acc 89.062 (89.102)	lr 0.00029
Train [112][2640/3239]	Time 0.322 (0.647)	Data Time 0.001 (0.017)	Loss 2.1508 (2.1618)	Entropy 0.66053 (0.66435)	Top-1 acc 71.484 (72.663)	Top-5 acc 89.844 (89.106)	lr 0.00029
Train [112][2650/3239]	Time 0.238 (0.646)	Data Time 0.001 (0.016)	Loss 2.1118 (2.1617)	Entropy 0.66046 (0.66434)	Top-1 acc 74.219 (72.668)	Top-5 acc 89.062 (89.110)	lr 0.00029
Train [112][2660/3239]	Time 0.229 (0.646)	Data Time 0.001 (0.016)	Loss 2.1642 (2.1617)	Entropy 0.66038 (0.66432)	Top-1 acc 71.875 (72.667)	Top-5 acc 90.625 (89.108)	lr 0.00029
Train [112][2670/3239]	Time 0.239 (0.645)	Data Time 0.001 (0.016)	Loss 2.2331 (2.1617)	Entropy 0.66037 (0.66431)	Top-1 acc 70.703 (72.672)	Top-5 acc 90.234 (89.109)	lr 0.00029
Train [112][2680/3239]	Time 0.227 (0.645)	Data Time 0.001 (0.016)	Loss 2.1346 (2.1617)	Entropy 0.66032 (0.66429)	Top-1 acc 71.875 (72.672)	Top-5 acc 91.406 (89.108)	lr 0.00029
Train [112][2690/3239]	Time 0.232 (0.644)	Data Time 0.001 (0.016)	Loss 2.1552 (2.1618)	Entropy 0.66028 (0.66428)	Top-1 acc 69.922 (72.668)	Top-5 acc 90.234 (89.110)	lr 0.00028
Train [112][2700/3239]	Time 0.252 (0.643)	Data Time 0.001 (0.016)	Loss 2.2393 (2.1617)	Entropy 0.66025 (0.66427)	Top-1 acc 69.141 (72.669)	Top-5 acc 83.594 (89.106)	lr 0.00028
Train [112][2710/3239]	Time 0.232 (0.643)	Data Time 0.001 (0.016)	Loss 2.2394 (2.1617)	Entropy 0.66004 (0.66425)	Top-1 acc 70.703 (72.672)	Top-5 acc 87.109 (89.105)	lr 0.00028
Train [112][2720/3239]	Time 0.248 (0.642)	Data Time 0.002 (0.016)	Loss 2.0744 (2.1616)	Entropy 0.65999 (0.66423)	Top-1 acc 75.391 (72.673)	Top-5 acc 91.406 (89.109)	lr 0.00028
Train [112][2730/3239]	Time 0.239 (0.642)	Data Time 0.002 (0.016)	Loss 2.3002 (2.1617)	Entropy 0.65998 (0.66422)	Top-1 acc 73.438 (72.670)	Top-5 acc 86.719 (89.106)	lr 0.00028
Train [112][2740/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.016)	Loss 2.1840 (2.1616)	Entropy 0.65999 (0.66420)	Top-1 acc 73.828 (72.672)	Top-5 acc 87.500 (89.105)	lr 0.00028
Train [112][2750/3239]	Time 0.223 (0.641)	Data Time 0.001 (0.016)	Loss 2.3187 (2.1616)	Entropy 0.66009 (0.66419)	Top-1 acc 67.188 (72.677)	Top-5 acc 84.766 (89.107)	lr 0.00028
Train [112][2760/3239]	Time 0.268 (0.640)	Data Time 0.001 (0.016)	Loss 2.0687 (2.1616)	Entropy 0.66009 (0.66417)	Top-1 acc 73.438 (72.679)	Top-5 acc 93.750 (89.107)	lr 0.00028
Train [112][2770/3239]	Time 0.339 (0.640)	Data Time 0.001 (0.016)	Loss 2.2360 (2.1614)	Entropy 0.66012 (0.66416)	Top-1 acc 69.141 (72.681)	Top-5 acc 87.500 (89.111)	lr 0.00028
Train [112][2780/3239]	Time 0.225 (0.639)	Data Time 0.002 (0.016)	Loss 2.0774 (2.1614)	Entropy 0.66005 (0.66414)	Top-1 acc 76.172 (72.678)	Top-5 acc 91.406 (89.111)	lr 0.00028
Train [112][2790/3239]	Time 0.225 (0.638)	Data Time 0.001 (0.016)	Loss 2.1383 (2.1616)	Entropy 0.66004 (0.66413)	Top-1 acc 74.609 (72.671)	Top-5 acc 88.281 (89.110)	lr 0.00028
Train [112][2800/3239]	Time 0.247 (0.638)	Data Time 0.001 (0.016)	Loss 2.1829 (2.1614)	Entropy 0.65998 (0.66412)	Top-1 acc 72.266 (72.677)	Top-5 acc 89.062 (89.113)	lr 0.00028
Train [112][2810/3239]	Time 0.316 (0.637)	Data Time 0.001 (0.016)	Loss 2.2386 (2.1615)	Entropy 0.65993 (0.66410)	Top-1 acc 69.922 (72.674)	Top-5 acc 90.234 (89.113)	lr 0.00028
Train [112][2820/3239]	Time 0.255 (0.637)	Data Time 0.003 (0.016)	Loss 2.1355 (2.1617)	Entropy 0.65985 (0.66409)	Top-1 acc 72.266 (72.674)	Top-5 acc 91.016 (89.110)	lr 0.00028
Train [112][2830/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.016)	Loss 2.2076 (2.1615)	Entropy 0.65987 (0.66407)	Top-1 acc 70.312 (72.679)	Top-5 acc 87.891 (89.113)	lr 0.00028
Train [112][2840/3239]	Time 0.226 (0.635)	Data Time 0.001 (0.015)	Loss 2.2165 (2.1615)	Entropy 0.65985 (0.66406)	Top-1 acc 70.312 (72.682)	Top-5 acc 88.672 (89.113)	lr 0.00028
Train [112][2850/3239]	Time 0.342 (0.635)	Data Time 0.001 (0.015)	Loss 2.1201 (2.1615)	Entropy 0.65982 (0.66404)	Top-1 acc 73.828 (72.682)	Top-5 acc 88.672 (89.113)	lr 0.00028
Train [112][2860/3239]	Time 0.245 (0.634)	Data Time 0.001 (0.015)	Loss 2.2825 (2.1615)	Entropy 0.65986 (0.66403)	Top-1 acc 70.703 (72.680)	Top-5 acc 87.500 (89.114)	lr 0.00028
Train [112][2870/3239]	Time 0.224 (0.634)	Data Time 0.001 (0.015)	Loss 2.2301 (2.1615)	Entropy 0.65982 (0.66401)	Top-1 acc 69.922 (72.680)	Top-5 acc 89.844 (89.112)	lr 0.00028
Train [112][2880/3239]	Time 0.236 (0.633)	Data Time 0.001 (0.015)	Loss 2.2213 (2.1615)	Entropy 0.65976 (0.66400)	Top-1 acc 69.531 (72.681)	Top-5 acc 88.672 (89.112)	lr 0.00028
Train [112][2890/3239]	Time 0.355 (0.652)	Data Time 0.003 (0.015)	Loss 2.2289 (2.1614)	Entropy 0.65972 (0.66398)	Top-1 acc 69.922 (72.684)	Top-5 acc 88.672 (89.114)	lr 0.00028
Train [112][2900/3239]	Time 0.258 (0.652)	Data Time 0.002 (0.015)	Loss 2.1112 (2.1613)	Entropy 0.65974 (0.66397)	Top-1 acc 76.562 (72.691)	Top-5 acc 91.016 (89.117)	lr 0.00028
Train [112][2910/3239]	Time 0.267 (0.651)	Data Time 0.002 (0.015)	Loss 2.1188 (2.1612)	Entropy 0.65970 (0.66395)	Top-1 acc 72.656 (72.689)	Top-5 acc 91.797 (89.119)	lr 0.00028
Train [112][2920/3239]	Time 0.239 (0.651)	Data Time 0.001 (0.015)	Loss 2.1617 (2.1611)	Entropy 0.65971 (0.66394)	Top-1 acc 72.266 (72.691)	Top-5 acc 88.672 (89.121)	lr 0.00028
Train [112][2930/3239]	Time 0.356 (0.650)	Data Time 0.001 (0.015)	Loss 2.2099 (2.1612)	Entropy 0.65960 (0.66392)	Top-1 acc 68.750 (72.689)	Top-5 acc 89.453 (89.121)	lr 0.00028
Train [112][2940/3239]	Time 0.237 (0.650)	Data Time 0.001 (0.015)	Loss 2.2508 (2.1613)	Entropy 0.65960 (0.66391)	Top-1 acc 70.703 (72.686)	Top-5 acc 86.328 (89.118)	lr 0.00028
Train [112][2950/3239]	Time 0.227 (0.649)	Data Time 0.001 (0.015)	Loss 2.0312 (2.1612)	Entropy 0.65960 (0.66390)	Top-1 acc 78.516 (72.690)	Top-5 acc 91.406 (89.117)	lr 0.00028
Train [112][2960/3239]	Time 0.268 (0.648)	Data Time 0.001 (0.015)	Loss 4.0806 (2.1618)	Entropy 0.65962 (0.66388)	Top-1 acc 37.500 (72.677)	Top-5 acc 67.188 (89.113)	lr 0.00028
Train [112][2970/3239]	Time 0.228 (0.648)	Data Time 0.001 (0.015)	Loss 2.1786 (2.1617)	Entropy 0.65959 (0.66387)	Top-1 acc 68.750 (72.677)	Top-5 acc 87.891 (89.115)	lr 0.00028
Train [112][2980/3239]	Time 0.234 (0.647)	Data Time 0.001 (0.015)	Loss 2.1861 (2.1616)	Entropy 0.65946 (0.66385)	Top-1 acc 72.266 (72.679)	Top-5 acc 87.500 (89.116)	lr 0.00028
Train [112][2990/3239]	Time 0.238 (0.647)	Data Time 0.001 (0.015)	Loss 2.2330 (2.1616)	Entropy 0.65939 (0.66384)	Top-1 acc 68.750 (72.675)	Top-5 acc 88.672 (89.115)	lr 0.00028
Train [112][3000/3239]	Time 0.264 (0.646)	Data Time 0.001 (0.015)	Loss 2.1350 (2.1616)	Entropy 0.65937 (0.66382)	Top-1 acc 73.438 (72.673)	Top-5 acc 89.844 (89.116)	lr 0.00028
Train [112][3010/3239]	Time 0.229 (0.646)	Data Time 0.001 (0.015)	Loss 2.2030 (2.1617)	Entropy 0.65939 (0.66381)	Top-1 acc 69.922 (72.671)	Top-5 acc 88.672 (89.114)	lr 0.00028
Train [112][3020/3239]	Time 0.252 (0.645)	Data Time 0.001 (0.015)	Loss 2.2271 (2.1617)	Entropy 0.65938 (0.66379)	Top-1 acc 72.656 (72.670)	Top-5 acc 88.281 (89.115)	lr 0.00028
Train [112][3030/3239]	Time 0.222 (0.645)	Data Time 0.001 (0.015)	Loss 2.1880 (2.1617)	Entropy 0.65931 (0.66378)	Top-1 acc 75.391 (72.668)	Top-5 acc 88.281 (89.114)	lr 0.00028
Train [112][3040/3239]	Time 0.237 (0.644)	Data Time 0.001 (0.015)	Loss 2.0082 (2.1618)	Entropy 0.65941 (0.66376)	Top-1 acc 75.391 (72.668)	Top-5 acc 90.625 (89.113)	lr 0.00028
Train [112][3050/3239]	Time 0.258 (0.644)	Data Time 0.001 (0.015)	Loss 2.0655 (2.1618)	Entropy 0.65943 (0.66375)	Top-1 acc 74.609 (72.667)	Top-5 acc 91.016 (89.113)	lr 0.00028
Train [112][3060/3239]	Time 0.215 (0.643)	Data Time 0.001 (0.014)	Loss 2.1555 (2.1619)	Entropy 0.65940 (0.66374)	Top-1 acc 74.609 (72.661)	Top-5 acc 91.016 (89.114)	lr 0.00028
Train [112][3070/3239]	Time 0.236 (0.643)	Data Time 0.001 (0.014)	Loss 2.2918 (2.1621)	Entropy 0.65942 (0.66372)	Top-1 acc 69.922 (72.654)	Top-5 acc 86.328 (89.110)	lr 0.00028
Train [112][3080/3239]	Time 0.220 (0.642)	Data Time 0.001 (0.014)	Loss 2.2173 (2.1621)	Entropy 0.65938 (0.66371)	Top-1 acc 73.047 (72.655)	Top-5 acc 85.938 (89.108)	lr 0.00028
Train [112][3090/3239]	Time 0.232 (0.642)	Data Time 0.001 (0.014)	Loss 2.2189 (2.1621)	Entropy 0.65941 (0.66369)	Top-1 acc 69.531 (72.653)	Top-5 acc 88.281 (89.110)	lr 0.00028
Train [112][3100/3239]	Time 0.273 (0.641)	Data Time 0.001 (0.014)	Loss 2.1124 (2.1620)	Entropy 0.65937 (0.66368)	Top-1 acc 73.828 (72.655)	Top-5 acc 87.891 (89.110)	lr 0.00028
Train [112][3110/3239]	Time 0.231 (0.641)	Data Time 0.001 (0.014)	Loss 2.1947 (2.1622)	Entropy 0.65930 (0.66367)	Top-1 acc 70.703 (72.653)	Top-5 acc 85.547 (89.106)	lr 0.00028
Train [112][3120/3239]	Time 0.231 (0.640)	Data Time 0.001 (0.014)	Loss 2.0403 (2.1622)	Entropy 0.65923 (0.66365)	Top-1 acc 72.656 (72.650)	Top-5 acc 92.188 (89.108)	lr 0.00028
Train [112][3130/3239]	Time 0.229 (0.640)	Data Time 0.001 (0.014)	Loss 2.1727 (2.1621)	Entropy 0.65921 (0.66364)	Top-1 acc 73.438 (72.651)	Top-5 acc 87.500 (89.109)	lr 0.00028
Train [112][3140/3239]	Time 0.233 (0.639)	Data Time 0.001 (0.014)	Loss 2.1354 (2.1621)	Entropy 0.65921 (0.66362)	Top-1 acc 76.172 (72.655)	Top-5 acc 87.891 (89.109)	lr 0.00028
Train [112][3150/3239]	Time 0.290 (0.639)	Data Time 0.001 (0.014)	Loss 2.1937 (2.1620)	Entropy 0.65914 (0.66361)	Top-1 acc 73.047 (72.660)	Top-5 acc 88.281 (89.108)	lr 0.00028
Train [112][3160/3239]	Time 0.270 (0.638)	Data Time 0.001 (0.014)	Loss 2.1762 (2.1621)	Entropy 0.65909 (0.66360)	Top-1 acc 73.828 (72.660)	Top-5 acc 87.891 (89.106)	lr 0.00027
Train [112][3170/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.014)	Loss 2.1803 (2.1621)	Entropy 0.65906 (0.66358)	Top-1 acc 72.656 (72.658)	Top-5 acc 88.672 (89.104)	lr 0.00027
Train [112][3180/3239]	Time 0.226 (0.637)	Data Time 0.000 (0.014)	Loss 2.1612 (2.1620)	Entropy 0.65901 (0.66357)	Top-1 acc 73.828 (72.660)	Top-5 acc 89.062 (89.105)	lr 0.00027
Train [112][3190/3239]	Time 0.230 (0.636)	Data Time 0.000 (0.014)	Loss 2.2320 (2.1621)	Entropy 0.65897 (0.66355)	Top-1 acc 71.094 (72.657)	Top-5 acc 87.500 (89.104)	lr 0.00027
Train [112][3200/3239]	Time 0.221 (0.636)	Data Time 0.000 (0.014)	Loss 2.1785 (2.1620)	Entropy 0.65900 (0.66354)	Top-1 acc 68.359 (72.659)	Top-5 acc 87.891 (89.105)	lr 0.00027
Train [112][3210/3239]	Time 0.239 (0.635)	Data Time 0.000 (0.014)	Loss 2.2896 (2.1620)	Entropy 0.65903 (0.66352)	Top-1 acc 67.969 (72.658)	Top-5 acc 87.500 (89.105)	lr 0.00027
Train [112][3220/3239]	Time 0.348 (0.651)	Data Time 0.000 (0.014)	Loss 2.0924 (2.1621)	Entropy 0.65899 (0.66351)	Top-1 acc 71.875 (72.658)	Top-5 acc 92.969 (89.106)	lr 0.00027
Train [112][3230/3239]	Time 0.216 (0.651)	Data Time 0.000 (0.014)	Loss 2.3284 (2.1619)	Entropy 0.65892 (0.66350)	Top-1 acc 68.359 (72.663)	Top-5 acc 89.062 (89.109)	lr 0.00027
Train [112][3239/3239]	Time 2.384 (0.650)	Data Time 0.000 (0.014)	Loss 2.4309 (2.1619)	Entropy 0.65892 (0.66348)	Top-1 acc 67.901 (72.664)	Top-5 acc 82.716 (89.110)	lr 0.00027
==========Valid [112/120]	loss 1.198	top-1 acc 72.682 (72.682)	top-5 acc 89.950	Train top-1 72.664	top-5 89.110	Entropy 0.65892	Latency-None: 0.000ms	Flops: 544.27M
Train [113][0/3239]	Time 42.579 (42.579)	Data Time 41.010 (41.010)	Loss 2.1779 (2.1779)	Entropy 0.65894 (0.65894)	Top-1 acc 75.781 (75.781)	Top-5 acc 87.891 (87.891)	lr 0.00027
Train [113][10/3239]	Time 2.684 (4.439)	Data Time 0.002 (3.730)	Loss 2.1270 (2.1374)	Entropy 0.65894 (0.65894)	Top-1 acc 74.609 (73.224)	Top-5 acc 87.891 (89.773)	lr 0.00027
Train [113][20/3239]	Time 0.359 (2.450)	Data Time 0.001 (1.954)	Loss 2.0781 (2.1449)	Entropy 0.65896 (0.65895)	Top-1 acc 76.953 (73.177)	Top-5 acc 89.062 (89.267)	lr 0.00027
Train [113][30/3239]	Time 0.260 (1.817)	Data Time 0.001 (1.324)	Loss 2.2038 (2.1404)	Entropy 0.65890 (0.65894)	Top-1 acc 73.438 (73.311)	Top-5 acc 87.500 (89.138)	lr 0.00027
Train [113][40/3239]	Time 0.242 (1.497)	Data Time 0.001 (1.002)	Loss 2.1100 (2.1490)	Entropy 0.65975 (0.65909)	Top-1 acc 75.781 (72.990)	Top-5 acc 90.234 (89.158)	lr 0.00027
Train [113][50/3239]	Time 0.239 (1.297)	Data Time 0.001 (0.806)	Loss 2.1328 (2.1541)	Entropy 0.65976 (0.65922)	Top-1 acc 72.266 (72.786)	Top-5 acc 88.672 (89.093)	lr 0.00027
Train [113][60/3239]	Time 0.230 (1.162)	Data Time 0.001 (0.674)	Loss 1.9726 (2.1446)	Entropy 0.65969 (0.65930)	Top-1 acc 78.516 (73.105)	Top-5 acc 91.016 (89.159)	lr 0.00027
Train [113][70/3239]	Time 0.238 (1.067)	Data Time 0.001 (0.579)	Loss 2.0852 (2.1380)	Entropy 0.65970 (0.65936)	Top-1 acc 75.391 (73.404)	Top-5 acc 90.234 (89.294)	lr 0.00027
Train [113][80/3239]	Time 0.235 (0.995)	Data Time 0.001 (0.508)	Loss 2.2602 (2.1445)	Entropy 0.65970 (0.65940)	Top-1 acc 69.141 (73.201)	Top-5 acc 86.328 (89.275)	lr 0.00027
Train [113][90/3239]	Time 0.229 (0.938)	Data Time 0.001 (0.452)	Loss 2.1528 (2.1475)	Entropy 0.65965 (0.65943)	Top-1 acc 72.266 (73.077)	Top-5 acc 87.500 (89.226)	lr 0.00027
Train [113][100/3239]	Time 0.225 (0.892)	Data Time 0.001 (0.408)	Loss 2.2411 (2.1529)	Entropy 0.65963 (0.65945)	Top-1 acc 69.531 (72.970)	Top-5 acc 87.109 (89.171)	lr 0.00027
Train [113][110/3239]	Time 0.227 (0.857)	Data Time 0.001 (0.371)	Loss 2.0238 (2.1503)	Entropy 0.65962 (0.65947)	Top-1 acc 76.172 (73.005)	Top-5 acc 91.016 (89.263)	lr 0.00027
Train [113][120/3239]	Time 2.577 (0.825)	Data Time 0.001 (0.340)	Loss 2.0466 (2.1509)	Entropy 0.65962 (0.65948)	Top-1 acc 74.609 (73.028)	Top-5 acc 91.016 (89.247)	lr 0.00027
Train [113][130/3239]	Time 0.256 (0.781)	Data Time 0.001 (0.314)	Loss 2.0699 (2.1560)	Entropy 0.65954 (0.65949)	Top-1 acc 75.391 (72.895)	Top-5 acc 87.500 (89.140)	lr 0.00027
Train [113][140/3239]	Time 0.220 (0.761)	Data Time 0.001 (0.292)	Loss 2.2014 (2.1576)	Entropy 0.65956 (0.65949)	Top-1 acc 68.750 (72.792)	Top-5 acc 89.062 (89.151)	lr 0.00027
Train [113][150/3239]	Time 0.240 (0.743)	Data Time 0.001 (0.273)	Loss 2.1954 (2.1569)	Entropy 0.65948 (0.65949)	Top-1 acc 73.438 (72.796)	Top-5 acc 88.672 (89.200)	lr 0.00027
Train [113][160/3239]	Time 0.230 (0.727)	Data Time 0.001 (0.256)	Loss 2.1251 (2.1546)	Entropy 0.65945 (0.65949)	Top-1 acc 73.047 (72.797)	Top-5 acc 89.844 (89.230)	lr 0.00027
Train [113][170/3239]	Time 0.229 (0.712)	Data Time 0.001 (0.241)	Loss 2.3327 (2.1570)	Entropy 0.65941 (0.65949)	Top-1 acc 66.406 (72.759)	Top-5 acc 84.766 (89.163)	lr 0.00027
Train [113][180/3239]	Time 0.225 (0.699)	Data Time 0.001 (0.228)	Loss 2.0933 (2.1604)	Entropy 0.65940 (0.65948)	Top-1 acc 74.609 (72.650)	Top-5 acc 89.844 (89.132)	lr 0.00027
Train [113][190/3239]	Time 0.230 (0.688)	Data Time 0.001 (0.216)	Loss 2.1348 (2.1602)	Entropy 0.65937 (0.65948)	Top-1 acc 74.219 (72.654)	Top-5 acc 91.016 (89.124)	lr 0.00027
Train [113][200/3239]	Time 0.233 (0.677)	Data Time 0.001 (0.205)	Loss 2.1595 (2.1595)	Entropy 0.65930 (0.65947)	Top-1 acc 74.219 (72.654)	Top-5 acc 89.062 (89.162)	lr 0.00027
Train [113][210/3239]	Time 0.210 (0.668)	Data Time 0.001 (0.196)	Loss 2.0498 (2.1592)	Entropy 0.65933 (0.65946)	Top-1 acc 74.609 (72.664)	Top-5 acc 91.406 (89.148)	lr 0.00027
Train [113][220/3239]	Time 0.231 (0.659)	Data Time 0.001 (0.187)	Loss 2.1989 (2.1568)	Entropy 0.65933 (0.65946)	Top-1 acc 73.438 (72.723)	Top-5 acc 89.062 (89.207)	lr 0.00027
Train [113][230/3239]	Time 2.650 (0.652)	Data Time 0.002 (0.179)	Loss 2.1301 (2.1577)	Entropy 0.65933 (0.65945)	Top-1 acc 74.609 (72.734)	Top-5 acc 90.625 (89.188)	lr 0.00027
Train [113][240/3239]	Time 0.221 (0.634)	Data Time 0.001 (0.172)	Loss 2.0611 (2.1574)	Entropy 0.65929 (0.65944)	Top-1 acc 75.781 (72.739)	Top-5 acc 92.188 (89.191)	lr 0.00027
Train [113][250/3239]	Time 0.221 (0.628)	Data Time 0.001 (0.165)	Loss 2.1113 (2.1560)	Entropy 0.65921 (0.65944)	Top-1 acc 76.562 (72.759)	Top-5 acc 89.844 (89.207)	lr 0.00027
Train [113][260/3239]	Time 0.234 (0.622)	Data Time 0.001 (0.159)	Loss 2.2501 (2.1565)	Entropy 0.65922 (0.65943)	Top-1 acc 71.484 (72.766)	Top-5 acc 87.891 (89.182)	lr 0.00027
Train [113][270/3239]	Time 0.383 (0.617)	Data Time 0.001 (0.153)	Loss 2.2589 (2.1557)	Entropy 0.65939 (0.65942)	Top-1 acc 68.750 (72.790)	Top-5 acc 88.672 (89.228)	lr 0.00027
Train [113][280/3239]	Time 0.224 (0.612)	Data Time 0.001 (0.147)	Loss 2.0793 (2.1552)	Entropy 0.65942 (0.65942)	Top-1 acc 74.609 (72.794)	Top-5 acc 90.625 (89.222)	lr 0.00027
Train [113][290/3239]	Time 0.227 (0.607)	Data Time 0.001 (0.142)	Loss 2.0578 (2.1547)	Entropy 0.65943 (0.65942)	Top-1 acc 77.344 (72.800)	Top-5 acc 89.844 (89.218)	lr 0.00027
Train [113][300/3239]	Time 0.224 (0.603)	Data Time 0.001 (0.138)	Loss 2.2414 (2.1546)	Entropy 0.65946 (0.65942)	Top-1 acc 67.578 (72.767)	Top-5 acc 87.109 (89.204)	lr 0.00027
Train [113][310/3239]	Time 0.231 (0.599)	Data Time 0.001 (0.133)	Loss 2.0472 (2.1535)	Entropy 0.65947 (0.65943)	Top-1 acc 78.125 (72.801)	Top-5 acc 90.625 (89.236)	lr 0.00027
Train [113][320/3239]	Time 0.235 (0.596)	Data Time 0.001 (0.129)	Loss 2.2519 (2.1539)	Entropy 0.65947 (0.65943)	Top-1 acc 70.703 (72.785)	Top-5 acc 86.328 (89.238)	lr 0.00027
Train [113][330/3239]	Time 0.256 (0.593)	Data Time 0.001 (0.125)	Loss 2.2337 (2.1542)	Entropy 0.65951 (0.65943)	Top-1 acc 72.266 (72.779)	Top-5 acc 86.719 (89.236)	lr 0.00027
Train [113][340/3239]	Time 55.196 (0.744)	Data Time 0.001 (0.122)	Loss 2.0174 (2.1544)	Entropy 0.65951 (0.65943)	Top-1 acc 74.609 (72.770)	Top-5 acc 91.406 (89.211)	lr 0.00027
Train [113][350/3239]	Time 0.290 (0.733)	Data Time 0.003 (0.118)	Loss 2.0933 (2.1544)	Entropy 0.65946 (0.65943)	Top-1 acc 73.828 (72.751)	Top-5 acc 90.625 (89.225)	lr 0.00027
Train [113][360/3239]	Time 0.232 (0.727)	Data Time 0.001 (0.115)	Loss 2.2135 (2.1544)	Entropy 0.65943 (0.65943)	Top-1 acc 71.484 (72.764)	Top-5 acc 87.500 (89.230)	lr 0.00027
Train [113][370/3239]	Time 0.226 (0.720)	Data Time 0.001 (0.112)	Loss 1.9641 (2.1535)	Entropy 0.65943 (0.65943)	Top-1 acc 76.562 (72.795)	Top-5 acc 92.578 (89.255)	lr 0.00027
Train [113][380/3239]	Time 0.257 (0.714)	Data Time 0.002 (0.109)	Loss 2.1882 (2.1545)	Entropy 0.65945 (0.65943)	Top-1 acc 72.266 (72.780)	Top-5 acc 86.328 (89.209)	lr 0.00027
Train [113][390/3239]	Time 0.239 (0.708)	Data Time 0.001 (0.106)	Loss 2.2687 (2.1551)	Entropy 0.65945 (0.65943)	Top-1 acc 69.922 (72.751)	Top-5 acc 87.500 (89.201)	lr 0.00027
Train [113][400/3239]	Time 0.236 (0.703)	Data Time 0.001 (0.104)	Loss 2.1721 (2.1557)	Entropy 0.65942 (0.65943)	Top-1 acc 69.531 (72.730)	Top-5 acc 91.406 (89.195)	lr 0.00026
Train [113][410/3239]	Time 0.235 (0.697)	Data Time 0.001 (0.101)	Loss 2.1879 (2.1547)	Entropy 0.65940 (0.65943)	Top-1 acc 72.266 (72.767)	Top-5 acc 88.281 (89.217)	lr 0.00026
Train [113][420/3239]	Time 0.233 (0.692)	Data Time 0.001 (0.099)	Loss 2.2242 (2.1541)	Entropy 0.65939 (0.65943)	Top-1 acc 71.875 (72.786)	Top-5 acc 89.062 (89.227)	lr 0.00026
Train [113][430/3239]	Time 0.220 (0.687)	Data Time 0.001 (0.097)	Loss 2.2391 (2.1540)	Entropy 0.65936 (0.65943)	Top-1 acc 69.531 (72.789)	Top-5 acc 88.672 (89.237)	lr 0.00026
Train [113][440/3239]	Time 0.240 (0.683)	Data Time 0.001 (0.095)	Loss 2.1725 (2.1540)	Entropy 0.65926 (0.65943)	Top-1 acc 70.703 (72.793)	Top-5 acc 88.672 (89.235)	lr 0.00026
Train [113][450/3239]	Time 2.645 (0.679)	Data Time 0.001 (0.092)	Loss 2.1489 (2.1538)	Entropy 0.65926 (0.65942)	Top-1 acc 74.609 (72.791)	Top-5 acc 89.062 (89.242)	lr 0.00026
Train [113][460/3239]	Time 0.274 (0.669)	Data Time 0.002 (0.090)	Loss 2.2010 (2.1549)	Entropy 0.65909 (0.65942)	Top-1 acc 70.703 (72.745)	Top-5 acc 89.062 (89.222)	lr 0.00026
Train [113][470/3239]	Time 0.260 (0.665)	Data Time 0.001 (0.089)	Loss 2.1041 (2.1547)	Entropy 0.65911 (0.65941)	Top-1 acc 72.266 (72.747)	Top-5 acc 90.234 (89.229)	lr 0.00026
Train [113][480/3239]	Time 0.238 (0.661)	Data Time 0.001 (0.087)	Loss 2.3104 (2.1552)	Entropy 0.65908 (0.65940)	Top-1 acc 67.188 (72.729)	Top-5 acc 86.328 (89.219)	lr 0.00026
Train [113][490/3239]	Time 0.239 (0.658)	Data Time 0.001 (0.085)	Loss 2.0929 (2.1548)	Entropy 0.65904 (0.65940)	Top-1 acc 73.047 (72.753)	Top-5 acc 91.016 (89.222)	lr 0.00026
Train [113][500/3239]	Time 0.242 (0.654)	Data Time 0.001 (0.083)	Loss 2.0984 (2.1541)	Entropy 0.65899 (0.65939)	Top-1 acc 77.344 (72.785)	Top-5 acc 91.406 (89.239)	lr 0.00026
Train [113][510/3239]	Time 0.231 (0.651)	Data Time 0.001 (0.082)	Loss 2.1789 (2.1537)	Entropy 0.65889 (0.65938)	Top-1 acc 73.438 (72.800)	Top-5 acc 90.625 (89.243)	lr 0.00026
Train [113][520/3239]	Time 0.370 (0.648)	Data Time 0.002 (0.080)	Loss 2.0339 (2.1534)	Entropy 0.65887 (0.65937)	Top-1 acc 75.000 (72.819)	Top-5 acc 90.234 (89.251)	lr 0.00026
Train [113][530/3239]	Time 0.231 (0.645)	Data Time 0.001 (0.079)	Loss 2.0424 (2.1532)	Entropy 0.65882 (0.65936)	Top-1 acc 75.391 (72.820)	Top-5 acc 92.188 (89.259)	lr 0.00026
Train [113][540/3239]	Time 0.238 (0.642)	Data Time 0.001 (0.077)	Loss 2.1259 (2.1532)	Entropy 0.65888 (0.65935)	Top-1 acc 73.828 (72.820)	Top-5 acc 89.453 (89.263)	lr 0.00026
Train [113][550/3239]	Time 0.268 (0.639)	Data Time 0.001 (0.076)	Loss 2.0835 (2.1531)	Entropy 0.65899 (0.65934)	Top-1 acc 69.922 (72.832)	Top-5 acc 89.844 (89.265)	lr 0.00026
Train [113][560/3239]	Time 2.703 (0.636)	Data Time 0.002 (0.075)	Loss 2.1167 (2.1534)	Entropy 0.65899 (0.65934)	Top-1 acc 71.875 (72.822)	Top-5 acc 92.578 (89.263)	lr 0.00026
Train [113][570/3239]	Time 0.241 (0.629)	Data Time 0.001 (0.073)	Loss 2.0870 (2.1528)	Entropy 0.65891 (0.65933)	Top-1 acc 74.609 (72.840)	Top-5 acc 91.406 (89.273)	lr 0.00026
Train [113][580/3239]	Time 0.249 (0.627)	Data Time 0.001 (0.072)	Loss 2.2191 (2.1533)	Entropy 0.65887 (0.65932)	Top-1 acc 69.531 (72.830)	Top-5 acc 87.109 (89.260)	lr 0.00026
Train [113][590/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.071)	Loss 2.1718 (2.1532)	Entropy 0.65880 (0.65931)	Top-1 acc 69.531 (72.829)	Top-5 acc 89.062 (89.265)	lr 0.00026
Train [113][600/3239]	Time 0.320 (0.623)	Data Time 0.001 (0.070)	Loss 2.0384 (2.1526)	Entropy 0.65880 (0.65930)	Top-1 acc 74.219 (72.849)	Top-5 acc 92.578 (89.277)	lr 0.00026
Train [113][610/3239]	Time 0.230 (0.620)	Data Time 0.001 (0.069)	Loss 2.0929 (2.1533)	Entropy 0.65873 (0.65930)	Top-1 acc 75.391 (72.832)	Top-5 acc 90.625 (89.259)	lr 0.00026
Train [113][620/3239]	Time 0.249 (0.618)	Data Time 0.001 (0.068)	Loss 2.1689 (2.1540)	Entropy 0.65883 (0.65929)	Top-1 acc 71.094 (72.802)	Top-5 acc 88.672 (89.244)	lr 0.00026
Train [113][630/3239]	Time 0.242 (0.616)	Data Time 0.002 (0.067)	Loss 2.1960 (2.1539)	Entropy 0.65882 (0.65928)	Top-1 acc 69.531 (72.802)	Top-5 acc 87.500 (89.242)	lr 0.00026
Train [113][640/3239]	Time 0.334 (0.614)	Data Time 0.001 (0.066)	Loss 2.1737 (2.1542)	Entropy 0.65888 (0.65927)	Top-1 acc 72.656 (72.781)	Top-5 acc 87.891 (89.235)	lr 0.00026
Train [113][650/3239]	Time 0.248 (0.612)	Data Time 0.001 (0.065)	Loss 2.0492 (2.1538)	Entropy 0.65881 (0.65927)	Top-1 acc 77.344 (72.792)	Top-5 acc 89.844 (89.231)	lr 0.00026
Train [113][660/3239]	Time 0.223 (0.610)	Data Time 0.002 (0.064)	Loss 2.0425 (2.1533)	Entropy 0.65882 (0.65926)	Top-1 acc 75.391 (72.807)	Top-5 acc 91.016 (89.240)	lr 0.00026
Train [113][670/3239]	Time 2.665 (0.608)	Data Time 0.001 (0.063)	Loss 2.1200 (2.1534)	Entropy 0.65882 (0.65925)	Top-1 acc 74.609 (72.801)	Top-5 acc 90.234 (89.235)	lr 0.00026
Train [113][680/3239]	Time 0.265 (0.603)	Data Time 0.001 (0.062)	Loss 2.2159 (2.1533)	Entropy 0.65881 (0.65925)	Top-1 acc 69.922 (72.808)	Top-5 acc 88.281 (89.239)	lr 0.00026
Train [113][690/3239]	Time 0.278 (0.602)	Data Time 0.001 (0.061)	Loss 2.1548 (2.1529)	Entropy 0.65890 (0.65924)	Top-1 acc 71.875 (72.802)	Top-5 acc 91.016 (89.252)	lr 0.00026
Train [113][700/3239]	Time 0.234 (0.600)	Data Time 0.001 (0.060)	Loss 2.0397 (2.1536)	Entropy 0.65892 (0.65924)	Top-1 acc 78.906 (72.785)	Top-5 acc 91.797 (89.240)	lr 0.00026
Train [113][710/3239]	Time 0.254 (0.676)	Data Time 0.002 (0.059)	Loss 2.0552 (2.1531)	Entropy 0.65891 (0.65923)	Top-1 acc 76.172 (72.793)	Top-5 acc 90.234 (89.247)	lr 0.00026
Train [113][720/3239]	Time 0.232 (0.674)	Data Time 0.002 (0.058)	Loss 2.2706 (2.1533)	Entropy 0.65885 (0.65923)	Top-1 acc 69.922 (72.792)	Top-5 acc 85.938 (89.241)	lr 0.00026
Train [113][730/3239]	Time 0.231 (0.671)	Data Time 0.001 (0.058)	Loss 2.3413 (2.1535)	Entropy 0.65889 (0.65922)	Top-1 acc 68.359 (72.780)	Top-5 acc 87.500 (89.244)	lr 0.00026
Train [113][740/3239]	Time 0.253 (0.669)	Data Time 0.002 (0.057)	Loss 2.1993 (2.1538)	Entropy 0.65884 (0.65922)	Top-1 acc 70.312 (72.775)	Top-5 acc 89.062 (89.233)	lr 0.00026
Train [113][750/3239]	Time 0.237 (0.666)	Data Time 0.001 (0.056)	Loss 2.1229 (2.1538)	Entropy 0.65883 (0.65921)	Top-1 acc 77.344 (72.774)	Top-5 acc 89.453 (89.232)	lr 0.00026
Train [113][760/3239]	Time 0.228 (0.664)	Data Time 0.001 (0.055)	Loss 1.9497 (2.1535)	Entropy 0.65879 (0.65921)	Top-1 acc 78.125 (72.785)	Top-5 acc 92.578 (89.244)	lr 0.00026
Train [113][770/3239]	Time 0.262 (0.662)	Data Time 0.001 (0.055)	Loss 2.2081 (2.1547)	Entropy 0.65879 (0.65920)	Top-1 acc 70.312 (72.752)	Top-5 acc 90.234 (89.224)	lr 0.00026
Train [113][780/3239]	Time 2.563 (0.659)	Data Time 0.001 (0.054)	Loss 2.0435 (2.1542)	Entropy 0.65879 (0.65920)	Top-1 acc 76.562 (72.768)	Top-5 acc 92.188 (89.229)	lr 0.00026
Train [113][790/3239]	Time 0.238 (0.654)	Data Time 0.001 (0.053)	Loss 2.2484 (2.1544)	Entropy 0.65874 (0.65919)	Top-1 acc 71.875 (72.762)	Top-5 acc 85.547 (89.226)	lr 0.00026
Train [113][800/3239]	Time 0.232 (0.652)	Data Time 0.001 (0.053)	Loss 2.0621 (2.1541)	Entropy 0.65875 (0.65919)	Top-1 acc 72.656 (72.761)	Top-5 acc 90.625 (89.231)	lr 0.00026
Train [113][810/3239]	Time 0.242 (0.650)	Data Time 0.001 (0.052)	Loss 2.1061 (2.1541)	Entropy 0.65867 (0.65918)	Top-1 acc 77.344 (72.760)	Top-5 acc 89.453 (89.230)	lr 0.00026
Train [113][820/3239]	Time 0.230 (0.648)	Data Time 0.001 (0.052)	Loss 2.1379 (2.1547)	Entropy 0.65864 (0.65917)	Top-1 acc 73.438 (72.751)	Top-5 acc 89.844 (89.220)	lr 0.00026
Train [113][830/3239]	Time 0.226 (0.646)	Data Time 0.001 (0.051)	Loss 2.1238 (2.1548)	Entropy 0.65852 (0.65917)	Top-1 acc 74.609 (72.756)	Top-5 acc 90.234 (89.225)	lr 0.00026
Train [113][840/3239]	Time 0.232 (0.644)	Data Time 0.001 (0.050)	Loss 2.1885 (2.1552)	Entropy 0.65858 (0.65916)	Top-1 acc 70.703 (72.756)	Top-5 acc 88.281 (89.214)	lr 0.00026
Train [113][850/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.050)	Loss 1.9822 (2.1549)	Entropy 0.65856 (0.65915)	Top-1 acc 78.906 (72.769)	Top-5 acc 92.188 (89.222)	lr 0.00026
Train [113][860/3239]	Time 0.232 (0.640)	Data Time 0.002 (0.049)	Loss 2.1671 (2.1550)	Entropy 0.65850 (0.65914)	Top-1 acc 70.312 (72.761)	Top-5 acc 89.062 (89.224)	lr 0.00026
Train [113][870/3239]	Time 0.223 (0.638)	Data Time 0.001 (0.049)	Loss 2.2760 (2.1551)	Entropy 0.65836 (0.65914)	Top-1 acc 73.047 (72.774)	Top-5 acc 88.281 (89.215)	lr 0.00026
Train [113][880/3239]	Time 0.233 (0.637)	Data Time 0.001 (0.048)	Loss 2.1026 (2.1546)	Entropy 0.65841 (0.65913)	Top-1 acc 73.438 (72.780)	Top-5 acc 91.016 (89.223)	lr 0.00025
Train [113][890/3239]	Time 2.593 (0.635)	Data Time 0.001 (0.048)	Loss 2.1497 (2.1549)	Entropy 0.65841 (0.65912)	Top-1 acc 73.828 (72.771)	Top-5 acc 88.672 (89.217)	lr 0.00025
Train [113][900/3239]	Time 0.253 (0.631)	Data Time 0.001 (0.047)	Loss 2.1062 (2.1549)	Entropy 0.65832 (0.65911)	Top-1 acc 75.000 (72.777)	Top-5 acc 91.406 (89.220)	lr 0.00025
Train [113][910/3239]	Time 0.224 (0.629)	Data Time 0.001 (0.047)	Loss 2.1207 (2.1550)	Entropy 0.65833 (0.65910)	Top-1 acc 73.828 (72.763)	Top-5 acc 89.453 (89.215)	lr 0.00025
Train [113][920/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.046)	Loss 2.2798 (2.1553)	Entropy 0.65832 (0.65909)	Top-1 acc 68.359 (72.755)	Top-5 acc 88.281 (89.210)	lr 0.00025
Train [113][930/3239]	Time 0.326 (0.626)	Data Time 0.001 (0.046)	Loss 2.0962 (2.1547)	Entropy 0.65830 (0.65908)	Top-1 acc 71.484 (72.762)	Top-5 acc 92.969 (89.227)	lr 0.00025
Train [113][940/3239]	Time 0.227 (0.624)	Data Time 0.001 (0.045)	Loss 2.1544 (2.1554)	Entropy 0.65815 (0.65908)	Top-1 acc 73.828 (72.753)	Top-5 acc 89.062 (89.212)	lr 0.00025
Train [113][950/3239]	Time 0.235 (0.623)	Data Time 0.001 (0.045)	Loss 2.1143 (2.1553)	Entropy 0.65816 (0.65907)	Top-1 acc 75.391 (72.752)	Top-5 acc 88.672 (89.217)	lr 0.00025
Train [113][960/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.044)	Loss 2.1444 (2.1555)	Entropy 0.65811 (0.65906)	Top-1 acc 73.828 (72.755)	Top-5 acc 88.672 (89.210)	lr 0.00025
Train [113][970/3239]	Time 0.237 (0.620)	Data Time 0.002 (0.044)	Loss 2.1359 (2.1554)	Entropy 0.65816 (0.65905)	Top-1 acc 73.047 (72.754)	Top-5 acc 91.797 (89.216)	lr 0.00025
Train [113][980/3239]	Time 0.232 (0.619)	Data Time 0.001 (0.043)	Loss 2.1295 (2.1553)	Entropy 0.65823 (0.65904)	Top-1 acc 72.656 (72.748)	Top-5 acc 91.016 (89.221)	lr 0.00025
Train [113][990/3239]	Time 0.268 (0.617)	Data Time 0.001 (0.043)	Loss 2.2922 (2.1555)	Entropy 0.65825 (0.65903)	Top-1 acc 67.969 (72.741)	Top-5 acc 89.062 (89.221)	lr 0.00025
Train [113][1000/3239]	Time 2.593 (0.616)	Data Time 0.001 (0.043)	Loss 2.1820 (2.1557)	Entropy 0.65825 (0.65902)	Top-1 acc 69.922 (72.747)	Top-5 acc 88.281 (89.217)	lr 0.00025
Train [113][1010/3239]	Time 0.242 (0.612)	Data Time 0.001 (0.042)	Loss 2.0990 (2.1556)	Entropy 0.65817 (0.65901)	Top-1 acc 75.781 (72.764)	Top-5 acc 89.844 (89.219)	lr 0.00025
Train [113][1020/3239]	Time 0.260 (0.611)	Data Time 0.001 (0.042)	Loss 2.1852 (2.1556)	Entropy 0.65820 (0.65901)	Top-1 acc 71.094 (72.761)	Top-5 acc 87.891 (89.224)	lr 0.00025
Train [113][1030/3239]	Time 0.236 (0.610)	Data Time 0.001 (0.041)	Loss 2.0158 (2.1552)	Entropy 0.65817 (0.65900)	Top-1 acc 76.953 (72.773)	Top-5 acc 91.016 (89.229)	lr 0.00025
Train [113][1040/3239]	Time 0.232 (0.609)	Data Time 0.001 (0.041)	Loss 2.0807 (2.1551)	Entropy 0.65811 (0.65899)	Top-1 acc 75.391 (72.782)	Top-5 acc 90.234 (89.230)	lr 0.00025
Train [113][1050/3239]	Time 0.235 (0.608)	Data Time 0.002 (0.041)	Loss 2.2108 (2.1553)	Entropy 0.65806 (0.65898)	Top-1 acc 71.094 (72.789)	Top-5 acc 87.891 (89.223)	lr 0.00025
Train [113][1060/3239]	Time 0.230 (0.607)	Data Time 0.001 (0.040)	Loss 2.3990 (2.1564)	Entropy 0.65805 (0.65897)	Top-1 acc 64.844 (72.765)	Top-5 acc 83.203 (89.202)	lr 0.00025
Train [113][1070/3239]	Time 0.286 (0.654)	Data Time 0.003 (0.040)	Loss 2.1450 (2.1565)	Entropy 0.65801 (0.65896)	Top-1 acc 74.609 (72.762)	Top-5 acc 88.672 (89.199)	lr 0.00025
Train [113][1080/3239]	Time 0.228 (0.653)	Data Time 0.002 (0.040)	Loss 2.0990 (2.1562)	Entropy 0.65794 (0.65895)	Top-1 acc 75.000 (72.771)	Top-5 acc 91.016 (89.204)	lr 0.00025
Train [113][1090/3239]	Time 0.232 (0.651)	Data Time 0.002 (0.039)	Loss 2.2373 (2.1562)	Entropy 0.65793 (0.65894)	Top-1 acc 71.875 (72.768)	Top-5 acc 87.500 (89.204)	lr 0.00025
Train [113][1100/3239]	Time 0.246 (0.650)	Data Time 0.001 (0.039)	Loss 2.2162 (2.1562)	Entropy 0.65791 (0.65894)	Top-1 acc 71.094 (72.774)	Top-5 acc 86.719 (89.202)	lr 0.00025
Train [113][1110/3239]	Time 2.521 (0.649)	Data Time 0.001 (0.039)	Loss 2.1899 (2.1563)	Entropy 0.65791 (0.65893)	Top-1 acc 71.875 (72.771)	Top-5 acc 86.719 (89.198)	lr 0.00025
Train [113][1120/3239]	Time 0.245 (0.645)	Data Time 0.001 (0.038)	Loss 2.0537 (2.1565)	Entropy 0.65790 (0.65892)	Top-1 acc 75.781 (72.769)	Top-5 acc 92.578 (89.199)	lr 0.00025
Train [113][1130/3239]	Time 0.244 (0.644)	Data Time 0.001 (0.038)	Loss 2.1587 (2.1570)	Entropy 0.65790 (0.65891)	Top-1 acc 71.484 (72.761)	Top-5 acc 88.281 (89.186)	lr 0.00025
Train [113][1140/3239]	Time 0.343 (0.642)	Data Time 0.001 (0.038)	Loss 2.0504 (2.1563)	Entropy 0.65790 (0.65890)	Top-1 acc 73.047 (72.773)	Top-5 acc 91.797 (89.197)	lr 0.00025
Train [113][1150/3239]	Time 0.237 (0.641)	Data Time 0.001 (0.037)	Loss 2.2238 (2.1564)	Entropy 0.65795 (0.65889)	Top-1 acc 69.531 (72.769)	Top-5 acc 88.672 (89.202)	lr 0.00025
Train [113][1160/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.037)	Loss 2.1711 (2.1565)	Entropy 0.65790 (0.65888)	Top-1 acc 71.484 (72.769)	Top-5 acc 88.672 (89.193)	lr 0.00025
Train [113][1170/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.037)	Loss 2.1173 (2.1564)	Entropy 0.65784 (0.65887)	Top-1 acc 77.344 (72.784)	Top-5 acc 89.844 (89.197)	lr 0.00025
Train [113][1180/3239]	Time 0.320 (0.637)	Data Time 0.001 (0.036)	Loss 2.0638 (2.1567)	Entropy 0.65784 (0.65886)	Top-1 acc 77.344 (72.783)	Top-5 acc 91.016 (89.198)	lr 0.00025
Train [113][1190/3239]	Time 0.236 (0.635)	Data Time 0.001 (0.036)	Loss 2.3755 (2.1569)	Entropy 0.65784 (0.65886)	Top-1 acc 66.406 (72.775)	Top-5 acc 86.328 (89.197)	lr 0.00025
Train [113][1200/3239]	Time 0.221 (0.634)	Data Time 0.001 (0.036)	Loss 2.2321 (2.1571)	Entropy 0.65785 (0.65885)	Top-1 acc 71.875 (72.765)	Top-5 acc 89.453 (89.196)	lr 0.00025
Train [113][1210/3239]	Time 0.244 (0.633)	Data Time 0.001 (0.035)	Loss 2.3518 (2.1575)	Entropy 0.65781 (0.65884)	Top-1 acc 67.188 (72.758)	Top-5 acc 87.500 (89.197)	lr 0.00025
Train [113][1220/3239]	Time 2.667 (0.631)	Data Time 0.001 (0.035)	Loss 2.0706 (2.1576)	Entropy 0.65781 (0.65883)	Top-1 acc 75.391 (72.755)	Top-5 acc 91.016 (89.199)	lr 0.00025
Train [113][1230/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.035)	Loss 2.1010 (2.1573)	Entropy 0.65784 (0.65882)	Top-1 acc 73.828 (72.758)	Top-5 acc 90.625 (89.207)	lr 0.00025
Train [113][1240/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.035)	Loss 2.0926 (2.1572)	Entropy 0.65776 (0.65881)	Top-1 acc 73.438 (72.769)	Top-5 acc 89.844 (89.208)	lr 0.00025
Train [113][1250/3239]	Time 0.283 (0.626)	Data Time 0.002 (0.034)	Loss 2.1070 (2.1570)	Entropy 0.65778 (0.65881)	Top-1 acc 74.219 (72.766)	Top-5 acc 90.625 (89.216)	lr 0.00025
Train [113][1260/3239]	Time 0.281 (0.625)	Data Time 0.001 (0.034)	Loss 2.1568 (2.1569)	Entropy 0.65775 (0.65880)	Top-1 acc 72.656 (72.776)	Top-5 acc 89.062 (89.215)	lr 0.00025
Train [113][1270/3239]	Time 0.275 (0.625)	Data Time 0.001 (0.034)	Loss 2.1100 (2.1568)	Entropy 0.65773 (0.65879)	Top-1 acc 75.000 (72.775)	Top-5 acc 91.797 (89.220)	lr 0.00025
Train [113][1280/3239]	Time 0.238 (0.624)	Data Time 0.001 (0.034)	Loss 2.3812 (2.1567)	Entropy 0.65769 (0.65878)	Top-1 acc 68.359 (72.778)	Top-5 acc 85.547 (89.221)	lr 0.00025
Train [113][1290/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.033)	Loss 2.1947 (2.1566)	Entropy 0.65764 (0.65877)	Top-1 acc 69.141 (72.773)	Top-5 acc 90.234 (89.225)	lr 0.00025
Train [113][1300/3239]	Time 0.233 (0.621)	Data Time 0.001 (0.033)	Loss 2.1431 (2.1565)	Entropy 0.65755 (0.65876)	Top-1 acc 72.656 (72.775)	Top-5 acc 89.062 (89.222)	lr 0.00025
Train [113][1310/3239]	Time 0.229 (0.620)	Data Time 0.002 (0.033)	Loss 2.2525 (2.1568)	Entropy 0.65757 (0.65875)	Top-1 acc 69.141 (72.764)	Top-5 acc 86.328 (89.213)	lr 0.00025
Train [113][1320/3239]	Time 0.244 (0.619)	Data Time 0.010 (0.033)	Loss 2.1831 (2.1580)	Entropy 0.65762 (0.65875)	Top-1 acc 68.750 (72.743)	Top-5 acc 89.453 (89.199)	lr 0.00025
Train [113][1330/3239]	Time 2.519 (0.618)	Data Time 0.001 (0.032)	Loss 2.1706 (2.1580)	Entropy 0.65762 (0.65874)	Top-1 acc 72.656 (72.741)	Top-5 acc 87.500 (89.193)	lr 0.00025
Train [113][1340/3239]	Time 0.272 (0.616)	Data Time 0.001 (0.032)	Loss 2.0940 (2.1580)	Entropy 0.65758 (0.65873)	Top-1 acc 73.438 (72.738)	Top-5 acc 91.406 (89.194)	lr 0.00025
Train [113][1350/3239]	Time 0.269 (0.615)	Data Time 0.002 (0.032)	Loss 2.1935 (2.1577)	Entropy 0.65760 (0.65872)	Top-1 acc 69.531 (72.747)	Top-5 acc 87.109 (89.203)	lr 0.00025
Train [113][1360/3239]	Time 0.224 (0.614)	Data Time 0.001 (0.032)	Loss 2.0843 (2.1577)	Entropy 0.65758 (0.65871)	Top-1 acc 73.828 (72.751)	Top-5 acc 91.016 (89.209)	lr 0.00025
Train [113][1370/3239]	Time 0.238 (0.613)	Data Time 0.001 (0.032)	Loss 2.0364 (2.1578)	Entropy 0.65754 (0.65870)	Top-1 acc 75.781 (72.749)	Top-5 acc 91.406 (89.204)	lr 0.00025
Train [113][1380/3239]	Time 0.247 (0.612)	Data Time 0.001 (0.031)	Loss 2.1349 (2.1577)	Entropy 0.65735 (0.65869)	Top-1 acc 73.047 (72.754)	Top-5 acc 90.234 (89.207)	lr 0.00024
Train [113][1390/3239]	Time 0.234 (0.611)	Data Time 0.001 (0.031)	Loss 2.2437 (2.1575)	Entropy 0.65731 (0.65868)	Top-1 acc 70.703 (72.758)	Top-5 acc 87.891 (89.211)	lr 0.00024
Train [113][1400/3239]	Time 0.239 (0.610)	Data Time 0.001 (0.031)	Loss 2.1907 (2.1572)	Entropy 0.65723 (0.65867)	Top-1 acc 70.703 (72.771)	Top-5 acc 88.672 (89.213)	lr 0.00024
Train [113][1410/3239]	Time 0.232 (0.609)	Data Time 0.001 (0.031)	Loss 2.1742 (2.1576)	Entropy 0.65728 (0.65866)	Top-1 acc 71.484 (72.759)	Top-5 acc 91.406 (89.208)	lr 0.00024
Train [113][1420/3239]	Time 0.227 (0.608)	Data Time 0.001 (0.030)	Loss 2.3660 (2.1575)	Entropy 0.65722 (0.65865)	Top-1 acc 67.969 (72.765)	Top-5 acc 84.766 (89.208)	lr 0.00024
Train [113][1430/3239]	Time 0.467 (0.645)	Data Time 0.004 (0.030)	Loss 2.0670 (2.1577)	Entropy 0.65722 (0.65864)	Top-1 acc 75.391 (72.760)	Top-5 acc 90.625 (89.206)	lr 0.00024
Train [113][1440/3239]	Time 2.649 (0.645)	Data Time 0.002 (0.030)	Loss 2.1784 (2.1578)	Entropy 0.65722 (0.65863)	Top-1 acc 74.219 (72.759)	Top-5 acc 89.844 (89.204)	lr 0.00024
Train [113][1450/3239]	Time 0.243 (0.642)	Data Time 0.002 (0.030)	Loss 2.2097 (2.1580)	Entropy 0.65729 (0.65863)	Top-1 acc 70.703 (72.752)	Top-5 acc 88.281 (89.197)	lr 0.00024
Train [113][1460/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.030)	Loss 2.1524 (2.1581)	Entropy 0.65720 (0.65862)	Top-1 acc 73.438 (72.749)	Top-5 acc 89.844 (89.200)	lr 0.00024
Train [113][1470/3239]	Time 0.338 (0.640)	Data Time 0.001 (0.029)	Loss 2.1730 (2.1578)	Entropy 0.65716 (0.65861)	Top-1 acc 73.047 (72.748)	Top-5 acc 86.328 (89.200)	lr 0.00024
Train [113][1480/3239]	Time 0.262 (0.639)	Data Time 0.002 (0.029)	Loss 2.1766 (2.1575)	Entropy 0.65715 (0.65860)	Top-1 acc 67.578 (72.752)	Top-5 acc 90.625 (89.211)	lr 0.00024
Train [113][1490/3239]	Time 0.253 (0.638)	Data Time 0.001 (0.029)	Loss 2.2438 (2.1579)	Entropy 0.65715 (0.65859)	Top-1 acc 71.875 (72.740)	Top-5 acc 87.891 (89.206)	lr 0.00024
Train [113][1500/3239]	Time 0.234 (0.637)	Data Time 0.001 (0.029)	Loss 2.1799 (2.1575)	Entropy 0.65710 (0.65858)	Top-1 acc 71.094 (72.752)	Top-5 acc 89.453 (89.211)	lr 0.00024
Train [113][1510/3239]	Time 0.342 (0.636)	Data Time 0.001 (0.029)	Loss 2.0489 (2.1575)	Entropy 0.65698 (0.65857)	Top-1 acc 72.656 (72.759)	Top-5 acc 91.797 (89.210)	lr 0.00024
Train [113][1520/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.029)	Loss 2.1287 (2.1575)	Entropy 0.65698 (0.65856)	Top-1 acc 75.000 (72.760)	Top-5 acc 89.062 (89.210)	lr 0.00024
Train [113][1530/3239]	Time 0.290 (0.634)	Data Time 0.001 (0.028)	Loss 2.0683 (2.1576)	Entropy 0.65691 (0.65855)	Top-1 acc 75.781 (72.752)	Top-5 acc 90.234 (89.209)	lr 0.00024
Train [113][1540/3239]	Time 0.238 (0.633)	Data Time 0.001 (0.028)	Loss 2.1604 (2.1578)	Entropy 0.65695 (0.65853)	Top-1 acc 71.094 (72.752)	Top-5 acc 90.625 (89.208)	lr 0.00024
Train [113][1550/3239]	Time 2.438 (0.632)	Data Time 0.001 (0.028)	Loss 1.9267 (2.1574)	Entropy 0.65695 (0.65852)	Top-1 acc 79.297 (72.760)	Top-5 acc 92.578 (89.215)	lr 0.00024
Train [113][1560/3239]	Time 0.245 (0.630)	Data Time 0.002 (0.028)	Loss 2.1239 (2.1571)	Entropy 0.65690 (0.65851)	Top-1 acc 73.438 (72.766)	Top-5 acc 92.188 (89.219)	lr 0.00024
Train [113][1570/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.028)	Loss 2.1824 (2.1570)	Entropy 0.65685 (0.65850)	Top-1 acc 71.875 (72.772)	Top-5 acc 90.625 (89.223)	lr 0.00024
Train [113][1580/3239]	Time 0.272 (0.628)	Data Time 0.001 (0.028)	Loss 2.3226 (2.1570)	Entropy 0.65679 (0.65849)	Top-1 acc 69.141 (72.773)	Top-5 acc 85.547 (89.227)	lr 0.00024
Train [113][1590/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.027)	Loss 2.1175 (2.1569)	Entropy 0.65678 (0.65848)	Top-1 acc 74.219 (72.772)	Top-5 acc 89.453 (89.224)	lr 0.00024
Train [113][1600/3239]	Time 0.238 (0.626)	Data Time 0.001 (0.027)	Loss 2.1159 (2.1566)	Entropy 0.65670 (0.65847)	Top-1 acc 73.047 (72.780)	Top-5 acc 91.016 (89.229)	lr 0.00024
Train [113][1610/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.027)	Loss 2.1187 (2.1566)	Entropy 0.65666 (0.65846)	Top-1 acc 74.219 (72.776)	Top-5 acc 89.062 (89.231)	lr 0.00024
Train [113][1620/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.027)	Loss 1.9447 (2.1566)	Entropy 0.65659 (0.65845)	Top-1 acc 77.734 (72.775)	Top-5 acc 92.969 (89.229)	lr 0.00024
Train [113][1630/3239]	Time 0.216 (0.624)	Data Time 0.001 (0.027)	Loss 2.2062 (2.1568)	Entropy 0.65659 (0.65844)	Top-1 acc 72.266 (72.772)	Top-5 acc 85.938 (89.222)	lr 0.00024
Train [113][1640/3239]	Time 0.273 (0.623)	Data Time 0.001 (0.027)	Loss 2.1783 (2.1570)	Entropy 0.65660 (0.65843)	Top-1 acc 75.000 (72.771)	Top-5 acc 88.281 (89.222)	lr 0.00024
Train [113][1650/3239]	Time 0.264 (0.622)	Data Time 0.001 (0.026)	Loss 2.1597 (2.1571)	Entropy 0.65650 (0.65841)	Top-1 acc 71.094 (72.769)	Top-5 acc 88.672 (89.217)	lr 0.00024
Train [113][1660/3239]	Time 2.485 (0.621)	Data Time 0.001 (0.026)	Loss 2.1296 (2.1571)	Entropy 0.65650 (0.65840)	Top-1 acc 73.047 (72.765)	Top-5 acc 88.281 (89.215)	lr 0.00024
Train [113][1670/3239]	Time 0.233 (0.619)	Data Time 0.001 (0.026)	Loss 2.1014 (2.1572)	Entropy 0.65649 (0.65839)	Top-1 acc 75.391 (72.767)	Top-5 acc 88.672 (89.212)	lr 0.00024
Train [113][1680/3239]	Time 0.380 (0.618)	Data Time 0.001 (0.026)	Loss 2.0944 (2.1572)	Entropy 0.65641 (0.65838)	Top-1 acc 76.953 (72.768)	Top-5 acc 91.797 (89.218)	lr 0.00024
Train [113][1690/3239]	Time 0.245 (0.617)	Data Time 0.001 (0.026)	Loss 2.1482 (2.1572)	Entropy 0.65646 (0.65837)	Top-1 acc 74.219 (72.771)	Top-5 acc 88.281 (89.219)	lr 0.00024
Train [113][1700/3239]	Time 0.225 (0.617)	Data Time 0.001 (0.026)	Loss 2.2460 (2.1572)	Entropy 0.65638 (0.65836)	Top-1 acc 69.141 (72.769)	Top-5 acc 88.672 (89.218)	lr 0.00024
Train [113][1710/3239]	Time 0.231 (0.616)	Data Time 0.002 (0.026)	Loss 2.0503 (2.1571)	Entropy 0.65635 (0.65835)	Top-1 acc 75.000 (72.770)	Top-5 acc 92.188 (89.219)	lr 0.00024
Train [113][1720/3239]	Time 0.353 (0.615)	Data Time 0.002 (0.025)	Loss 2.3004 (2.1570)	Entropy 0.65634 (0.65833)	Top-1 acc 71.875 (72.774)	Top-5 acc 85.938 (89.219)	lr 0.00024
Train [113][1730/3239]	Time 0.260 (0.614)	Data Time 0.002 (0.025)	Loss 2.0163 (2.1568)	Entropy 0.65624 (0.65832)	Top-1 acc 76.562 (72.780)	Top-5 acc 90.625 (89.223)	lr 0.00024
Train [113][1740/3239]	Time 0.240 (0.614)	Data Time 0.001 (0.025)	Loss 2.1242 (2.1568)	Entropy 0.65620 (0.65831)	Top-1 acc 73.047 (72.777)	Top-5 acc 91.797 (89.227)	lr 0.00024
Train [113][1750/3239]	Time 0.237 (0.613)	Data Time 0.001 (0.025)	Loss 2.1181 (2.1566)	Entropy 0.65618 (0.65830)	Top-1 acc 75.781 (72.781)	Top-5 acc 92.578 (89.237)	lr 0.00024
Train [113][1760/3239]	Time 0.336 (0.612)	Data Time 0.001 (0.025)	Loss 2.1182 (2.1566)	Entropy 0.65621 (0.65829)	Top-1 acc 74.219 (72.780)	Top-5 acc 88.672 (89.234)	lr 0.00024
Train [113][1770/3239]	Time 2.450 (0.611)	Data Time 0.002 (0.025)	Loss 2.1249 (2.1565)	Entropy 0.65621 (0.65827)	Top-1 acc 71.875 (72.784)	Top-5 acc 90.625 (89.233)	lr 0.00024
Train [113][1780/3239]	Time 0.240 (0.609)	Data Time 0.002 (0.025)	Loss 2.0078 (2.1565)	Entropy 0.65620 (0.65826)	Top-1 acc 76.562 (72.780)	Top-5 acc 92.188 (89.234)	lr 0.00024
Train [113][1790/3239]	Time 0.230 (0.608)	Data Time 0.001 (0.024)	Loss 2.0633 (2.1566)	Entropy 0.65608 (0.65825)	Top-1 acc 77.344 (72.778)	Top-5 acc 90.234 (89.230)	lr 0.00024
Train [113][1800/3239]	Time 0.354 (0.638)	Data Time 0.002 (0.024)	Loss 2.2774 (2.1564)	Entropy 0.65613 (0.65824)	Top-1 acc 69.531 (72.787)	Top-5 acc 88.281 (89.235)	lr 0.00024
Train [113][1810/3239]	Time 0.274 (0.638)	Data Time 0.002 (0.024)	Loss 2.2367 (2.1565)	Entropy 0.65613 (0.65823)	Top-1 acc 73.828 (72.785)	Top-5 acc 89.062 (89.235)	lr 0.00024
Train [113][1820/3239]	Time 0.239 (0.637)	Data Time 0.001 (0.024)	Loss 2.1017 (2.1563)	Entropy 0.65603 (0.65822)	Top-1 acc 74.609 (72.792)	Top-5 acc 88.672 (89.240)	lr 0.00024
Train [113][1830/3239]	Time 0.236 (0.636)	Data Time 0.001 (0.024)	Loss 2.1023 (2.1563)	Entropy 0.65603 (0.65820)	Top-1 acc 73.047 (72.794)	Top-5 acc 89.062 (89.240)	lr 0.00024
Train [113][1840/3239]	Time 0.247 (0.635)	Data Time 0.001 (0.024)	Loss 2.0540 (2.1564)	Entropy 0.65606 (0.65819)	Top-1 acc 78.516 (72.787)	Top-5 acc 91.016 (89.240)	lr 0.00024
Train [113][1850/3239]	Time 0.279 (0.634)	Data Time 0.001 (0.024)	Loss 2.2051 (2.1563)	Entropy 0.65600 (0.65818)	Top-1 acc 75.781 (72.787)	Top-5 acc 87.891 (89.240)	lr 0.00024
Train [113][1860/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.024)	Loss 1.9739 (2.1563)	Entropy 0.65594 (0.65817)	Top-1 acc 77.344 (72.787)	Top-5 acc 92.188 (89.237)	lr 0.00024
Train [113][1870/3239]	Time 0.249 (0.633)	Data Time 0.001 (0.024)	Loss 2.1328 (2.1562)	Entropy 0.65595 (0.65816)	Top-1 acc 73.047 (72.786)	Top-5 acc 90.625 (89.239)	lr 0.00024
Train [113][1880/3239]	Time 2.484 (0.632)	Data Time 0.001 (0.023)	Loss 2.2535 (2.1560)	Entropy 0.65595 (0.65814)	Top-1 acc 72.656 (72.799)	Top-5 acc 87.500 (89.243)	lr 0.00024
Train [113][1890/3239]	Time 0.276 (0.630)	Data Time 0.001 (0.023)	Loss 2.2041 (2.1558)	Entropy 0.65602 (0.65813)	Top-1 acc 72.656 (72.805)	Top-5 acc 89.844 (89.247)	lr 0.00023
Train [113][1900/3239]	Time 0.230 (0.630)	Data Time 0.001 (0.023)	Loss 2.1803 (2.1560)	Entropy 0.65610 (0.65812)	Top-1 acc 70.703 (72.803)	Top-5 acc 88.672 (89.244)	lr 0.00023
Train [113][1910/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.023)	Loss 2.1328 (2.1557)	Entropy 0.65606 (0.65811)	Top-1 acc 73.047 (72.810)	Top-5 acc 89.062 (89.249)	lr 0.00023
Train [113][1920/3239]	Time 0.283 (0.628)	Data Time 0.001 (0.023)	Loss 2.1460 (2.1557)	Entropy 0.65612 (0.65810)	Top-1 acc 71.875 (72.809)	Top-5 acc 87.500 (89.247)	lr 0.00023
Train [113][1930/3239]	Time 0.325 (0.627)	Data Time 0.001 (0.023)	Loss 2.0607 (2.1557)	Entropy 0.65612 (0.65809)	Top-1 acc 77.734 (72.814)	Top-5 acc 91.406 (89.247)	lr 0.00023
Train [113][1940/3239]	Time 0.266 (0.626)	Data Time 0.001 (0.023)	Loss 2.0351 (2.1558)	Entropy 0.65603 (0.65808)	Top-1 acc 74.219 (72.809)	Top-5 acc 92.578 (89.245)	lr 0.00023
Train [113][1950/3239]	Time 0.239 (0.626)	Data Time 0.002 (0.023)	Loss 2.1020 (2.1557)	Entropy 0.65597 (0.65807)	Top-1 acc 74.219 (72.815)	Top-5 acc 89.844 (89.246)	lr 0.00023
Train [113][1960/3239]	Time 0.241 (0.625)	Data Time 0.001 (0.023)	Loss 2.1530 (2.1557)	Entropy 0.65587 (0.65806)	Top-1 acc 71.094 (72.817)	Top-5 acc 92.188 (89.248)	lr 0.00023
Train [113][1970/3239]	Time 0.350 (0.624)	Data Time 0.001 (0.022)	Loss 2.3118 (2.1559)	Entropy 0.65582 (0.65805)	Top-1 acc 69.531 (72.813)	Top-5 acc 86.719 (89.246)	lr 0.00023
Train [113][1980/3239]	Time 0.252 (0.624)	Data Time 0.001 (0.022)	Loss 2.2759 (2.1560)	Entropy 0.65582 (0.65804)	Top-1 acc 70.703 (72.808)	Top-5 acc 85.156 (89.241)	lr 0.00023
Train [113][1990/3239]	Time 2.398 (0.623)	Data Time 0.002 (0.022)	Loss 1.9668 (2.1559)	Entropy 0.65582 (0.65803)	Top-1 acc 75.781 (72.808)	Top-5 acc 93.750 (89.243)	lr 0.00023
Train [113][2000/3239]	Time 0.257 (0.621)	Data Time 0.002 (0.022)	Loss 2.0392 (2.1558)	Entropy 0.65572 (0.65801)	Top-1 acc 78.516 (72.811)	Top-5 acc 89.062 (89.242)	lr 0.00023
Train [113][2010/3239]	Time 0.320 (0.621)	Data Time 0.001 (0.022)	Loss 2.1451 (2.1559)	Entropy 0.65570 (0.65800)	Top-1 acc 73.438 (72.809)	Top-5 acc 90.234 (89.240)	lr 0.00023
Train [113][2020/3239]	Time 0.285 (0.620)	Data Time 0.001 (0.022)	Loss 2.2339 (2.1558)	Entropy 0.65571 (0.65799)	Top-1 acc 71.875 (72.811)	Top-5 acc 86.719 (89.243)	lr 0.00023
Train [113][2030/3239]	Time 0.254 (0.619)	Data Time 0.001 (0.022)	Loss 2.2140 (2.1561)	Entropy 0.65570 (0.65798)	Top-1 acc 72.656 (72.800)	Top-5 acc 87.109 (89.238)	lr 0.00023
Train [113][2040/3239]	Time 0.255 (0.619)	Data Time 0.002 (0.022)	Loss 2.2199 (2.1561)	Entropy 0.65564 (0.65797)	Top-1 acc 71.484 (72.802)	Top-5 acc 88.281 (89.239)	lr 0.00023
Train [113][2050/3239]	Time 0.240 (0.618)	Data Time 0.002 (0.022)	Loss 2.0224 (2.1561)	Entropy 0.65565 (0.65796)	Top-1 acc 78.125 (72.802)	Top-5 acc 92.578 (89.241)	lr 0.00023
Train [113][2060/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.021)	Loss 2.2288 (2.1560)	Entropy 0.65563 (0.65795)	Top-1 acc 67.969 (72.803)	Top-5 acc 88.281 (89.244)	lr 0.00023
Train [113][2070/3239]	Time 0.231 (0.617)	Data Time 0.001 (0.021)	Loss 2.0557 (2.1558)	Entropy 0.65567 (0.65793)	Top-1 acc 76.562 (72.810)	Top-5 acc 90.625 (89.249)	lr 0.00023
Train [113][2080/3239]	Time 0.297 (0.616)	Data Time 0.001 (0.021)	Loss 2.2101 (2.1558)	Entropy 0.65562 (0.65792)	Top-1 acc 70.703 (72.815)	Top-5 acc 89.062 (89.250)	lr 0.00023
Train [113][2090/3239]	Time 0.252 (0.616)	Data Time 0.001 (0.021)	Loss 2.2609 (2.1559)	Entropy 0.65559 (0.65791)	Top-1 acc 71.094 (72.814)	Top-5 acc 88.281 (89.251)	lr 0.00023
Train [113][2100/3239]	Time 2.592 (0.615)	Data Time 0.001 (0.021)	Loss 2.0517 (2.1559)	Entropy 0.65559 (0.65790)	Top-1 acc 74.219 (72.812)	Top-5 acc 91.797 (89.253)	lr 0.00023
Train [113][2110/3239]	Time 0.226 (0.613)	Data Time 0.001 (0.021)	Loss 2.0917 (2.1558)	Entropy 0.65559 (0.65789)	Top-1 acc 74.219 (72.817)	Top-5 acc 91.406 (89.255)	lr 0.00023
Train [113][2120/3239]	Time 0.283 (0.613)	Data Time 0.001 (0.021)	Loss 2.1224 (2.1556)	Entropy 0.65555 (0.65788)	Top-1 acc 72.656 (72.822)	Top-5 acc 91.016 (89.256)	lr 0.00023
Train [113][2130/3239]	Time 0.235 (0.612)	Data Time 0.002 (0.021)	Loss 2.2405 (2.1555)	Entropy 0.65549 (0.65787)	Top-1 acc 68.359 (72.822)	Top-5 acc 86.719 (89.259)	lr 0.00023
Train [113][2140/3239]	Time 0.250 (0.612)	Data Time 0.001 (0.021)	Loss 2.1086 (2.1555)	Entropy 0.65542 (0.65786)	Top-1 acc 75.391 (72.823)	Top-5 acc 89.062 (89.258)	lr 0.00023
Train [113][2150/3239]	Time 0.241 (0.611)	Data Time 0.001 (0.021)	Loss 2.2932 (2.1557)	Entropy 0.65534 (0.65785)	Top-1 acc 73.828 (72.817)	Top-5 acc 87.109 (89.254)	lr 0.00023
Train [113][2160/3239]	Time 0.291 (0.635)	Data Time 0.003 (0.021)	Loss 1.9144 (2.1558)	Entropy 0.65529 (0.65783)	Top-1 acc 76.953 (72.811)	Top-5 acc 94.922 (89.254)	lr 0.00023
Train [113][2170/3239]	Time 0.237 (0.635)	Data Time 0.002 (0.020)	Loss 2.0500 (2.1558)	Entropy 0.65531 (0.65782)	Top-1 acc 73.438 (72.810)	Top-5 acc 90.234 (89.253)	lr 0.00023
Train [113][2180/3239]	Time 0.237 (0.634)	Data Time 0.001 (0.020)	Loss 2.1347 (2.1557)	Entropy 0.65532 (0.65781)	Top-1 acc 75.391 (72.812)	Top-5 acc 89.062 (89.254)	lr 0.00023
Train [113][2190/3239]	Time 0.235 (0.634)	Data Time 0.001 (0.020)	Loss 2.0208 (2.1557)	Entropy 0.65533 (0.65780)	Top-1 acc 78.516 (72.810)	Top-5 acc 89.844 (89.251)	lr 0.00023
Train [113][2200/3239]	Time 0.248 (0.633)	Data Time 0.001 (0.020)	Loss 2.1030 (2.1556)	Entropy 0.65522 (0.65779)	Top-1 acc 75.000 (72.817)	Top-5 acc 90.625 (89.253)	lr 0.00023
Train [113][2210/3239]	Time 2.701 (0.632)	Data Time 0.001 (0.020)	Loss 2.2864 (2.1555)	Entropy 0.65522 (0.65778)	Top-1 acc 70.703 (72.820)	Top-5 acc 88.281 (89.253)	lr 0.00023
Train [113][2220/3239]	Time 0.331 (0.631)	Data Time 0.001 (0.020)	Loss 2.2236 (2.1554)	Entropy 0.65521 (0.65777)	Top-1 acc 67.578 (72.819)	Top-5 acc 88.672 (89.255)	lr 0.00023
Train [113][2230/3239]	Time 0.312 (0.630)	Data Time 0.002 (0.020)	Loss 2.0989 (2.1554)	Entropy 0.65517 (0.65775)	Top-1 acc 75.781 (72.822)	Top-5 acc 90.234 (89.256)	lr 0.00023
Train [113][2240/3239]	Time 0.229 (0.629)	Data Time 0.001 (0.020)	Loss 2.1246 (2.1552)	Entropy 0.65515 (0.65774)	Top-1 acc 74.219 (72.826)	Top-5 acc 89.453 (89.258)	lr 0.00023
Train [113][2250/3239]	Time 0.229 (0.629)	Data Time 0.001 (0.020)	Loss 2.0944 (2.1551)	Entropy 0.65509 (0.65773)	Top-1 acc 75.391 (72.828)	Top-5 acc 91.797 (89.262)	lr 0.00023
Train [113][2260/3239]	Time 0.234 (0.628)	Data Time 0.001 (0.020)	Loss 2.2480 (2.1552)	Entropy 0.65506 (0.65772)	Top-1 acc 68.359 (72.826)	Top-5 acc 87.109 (89.259)	lr 0.00023
Train [113][2270/3239]	Time 0.224 (0.628)	Data Time 0.001 (0.020)	Loss 2.0488 (2.1552)	Entropy 0.65484 (0.65771)	Top-1 acc 76.172 (72.823)	Top-5 acc 89.062 (89.255)	lr 0.00023
Train [113][2280/3239]	Time 0.246 (0.627)	Data Time 0.001 (0.020)	Loss 2.2581 (2.1553)	Entropy 0.65478 (0.65769)	Top-1 acc 67.578 (72.824)	Top-5 acc 85.547 (89.253)	lr 0.00023
Train [113][2290/3239]	Time 0.234 (0.626)	Data Time 0.001 (0.020)	Loss 2.3069 (2.1553)	Entropy 0.65473 (0.65768)	Top-1 acc 71.484 (72.826)	Top-5 acc 86.719 (89.253)	lr 0.00023
Train [113][2300/3239]	Time 0.332 (0.626)	Data Time 0.001 (0.019)	Loss 2.1055 (2.1553)	Entropy 0.65474 (0.65767)	Top-1 acc 75.000 (72.823)	Top-5 acc 89.453 (89.252)	lr 0.00023
Train [113][2310/3239]	Time 0.229 (0.625)	Data Time 0.001 (0.019)	Loss 1.9889 (2.1552)	Entropy 0.65452 (0.65766)	Top-1 acc 76.953 (72.827)	Top-5 acc 91.406 (89.253)	lr 0.00023
Train [113][2320/3239]	Time 2.593 (0.625)	Data Time 0.001 (0.019)	Loss 2.2139 (2.1551)	Entropy 0.65452 (0.65764)	Top-1 acc 70.703 (72.829)	Top-5 acc 88.281 (89.255)	lr 0.00023
Train [113][2330/3239]	Time 0.258 (0.623)	Data Time 0.002 (0.019)	Loss 2.1689 (2.1549)	Entropy 0.65450 (0.65763)	Top-1 acc 74.609 (72.834)	Top-5 acc 87.891 (89.257)	lr 0.00023
Train [113][2340/3239]	Time 0.325 (0.623)	Data Time 0.001 (0.019)	Loss 2.1123 (2.1551)	Entropy 0.65445 (0.65761)	Top-1 acc 75.000 (72.830)	Top-5 acc 88.672 (89.253)	lr 0.00023
Train [113][2350/3239]	Time 0.224 (0.622)	Data Time 0.001 (0.019)	Loss 2.1350 (2.1549)	Entropy 0.65452 (0.65760)	Top-1 acc 73.828 (72.836)	Top-5 acc 90.234 (89.261)	lr 0.00023
Train [113][2360/3239]	Time 0.250 (0.621)	Data Time 0.002 (0.019)	Loss 2.1897 (2.1549)	Entropy 0.65451 (0.65759)	Top-1 acc 71.094 (72.836)	Top-5 acc 86.328 (89.259)	lr 0.00023
Train [113][2370/3239]	Time 0.228 (0.621)	Data Time 0.001 (0.019)	Loss 2.1104 (2.1547)	Entropy 0.65446 (0.65758)	Top-1 acc 73.828 (72.839)	Top-5 acc 88.281 (89.262)	lr 0.00023
Train [113][2380/3239]	Time 0.339 (0.620)	Data Time 0.001 (0.019)	Loss 2.2072 (2.1548)	Entropy 0.65438 (0.65756)	Top-1 acc 70.703 (72.839)	Top-5 acc 89.062 (89.261)	lr 0.00023
Train [113][2390/3239]	Time 0.298 (0.620)	Data Time 0.002 (0.019)	Loss 2.0393 (2.1546)	Entropy 0.65431 (0.65755)	Top-1 acc 74.609 (72.846)	Top-5 acc 92.578 (89.266)	lr 0.00023
Train [113][2400/3239]	Time 0.242 (0.619)	Data Time 0.001 (0.019)	Loss 2.2983 (2.1547)	Entropy 0.65422 (0.65754)	Top-1 acc 66.016 (72.843)	Top-5 acc 85.156 (89.263)	lr 0.00023
Train [113][2410/3239]	Time 0.288 (0.619)	Data Time 0.038 (0.019)	Loss 2.0598 (2.1545)	Entropy 0.65415 (0.65752)	Top-1 acc 73.438 (72.849)	Top-5 acc 92.969 (89.266)	lr 0.00022
Train [113][2420/3239]	Time 0.258 (0.618)	Data Time 0.001 (0.019)	Loss 2.0866 (2.1544)	Entropy 0.65414 (0.65751)	Top-1 acc 75.781 (72.853)	Top-5 acc 90.625 (89.266)	lr 0.00022
Train [113][2430/3239]	Time 2.617 (0.618)	Data Time 0.002 (0.018)	Loss 2.0761 (2.1543)	Entropy 0.65414 (0.65749)	Top-1 acc 76.562 (72.851)	Top-5 acc 89.844 (89.268)	lr 0.00022
Train [113][2440/3239]	Time 0.231 (0.616)	Data Time 0.001 (0.018)	Loss 2.1479 (2.1545)	Entropy 0.65408 (0.65748)	Top-1 acc 72.656 (72.849)	Top-5 acc 89.453 (89.264)	lr 0.00022
Train [113][2450/3239]	Time 0.242 (0.616)	Data Time 0.001 (0.018)	Loss 2.3696 (2.1545)	Entropy 0.65408 (0.65747)	Top-1 acc 71.094 (72.849)	Top-5 acc 85.938 (89.268)	lr 0.00022
Train [113][2460/3239]	Time 0.241 (0.615)	Data Time 0.001 (0.018)	Loss 2.1124 (2.1544)	Entropy 0.65413 (0.65745)	Top-1 acc 73.438 (72.849)	Top-5 acc 91.016 (89.269)	lr 0.00022
Train [113][2470/3239]	Time 0.237 (0.615)	Data Time 0.002 (0.018)	Loss 2.2714 (2.1545)	Entropy 0.65412 (0.65744)	Top-1 acc 68.750 (72.846)	Top-5 acc 89.844 (89.269)	lr 0.00022
Train [113][2480/3239]	Time 0.239 (0.614)	Data Time 0.001 (0.018)	Loss 2.0422 (2.1544)	Entropy 0.65410 (0.65743)	Top-1 acc 76.953 (72.848)	Top-5 acc 91.016 (89.271)	lr 0.00022
Train [113][2490/3239]	Time 0.232 (0.614)	Data Time 0.002 (0.018)	Loss 2.3026 (2.1546)	Entropy 0.65409 (0.65741)	Top-1 acc 67.969 (72.845)	Top-5 acc 86.719 (89.268)	lr 0.00022
Train [113][2500/3239]	Time 0.227 (0.613)	Data Time 0.001 (0.018)	Loss 2.0439 (2.1546)	Entropy 0.65403 (0.65740)	Top-1 acc 78.125 (72.843)	Top-5 acc 90.625 (89.267)	lr 0.00022
Train [113][2510/3239]	Time 0.259 (0.612)	Data Time 0.001 (0.018)	Loss 2.1260 (2.1549)	Entropy 0.65407 (0.65739)	Top-1 acc 72.656 (72.835)	Top-5 acc 89.453 (89.265)	lr 0.00022
Train [113][2520/3239]	Time 0.285 (0.633)	Data Time 0.003 (0.018)	Loss 2.0722 (2.1549)	Entropy 0.65403 (0.65737)	Top-1 acc 72.266 (72.835)	Top-5 acc 90.625 (89.264)	lr 0.00022
Train [113][2530/3239]	Time 0.237 (0.633)	Data Time 0.002 (0.018)	Loss 2.2669 (2.1550)	Entropy 0.65402 (0.65736)	Top-1 acc 67.188 (72.829)	Top-5 acc 86.719 (89.263)	lr 0.00022
Train [113][2540/3239]	Time 2.675 (0.633)	Data Time 0.002 (0.018)	Loss 2.2854 (2.1551)	Entropy 0.65402 (0.65735)	Top-1 acc 70.703 (72.829)	Top-5 acc 89.844 (89.265)	lr 0.00022
Train [113][2550/3239]	Time 0.338 (0.631)	Data Time 0.002 (0.018)	Loss 2.1309 (2.1551)	Entropy 0.65400 (0.65733)	Top-1 acc 73.438 (72.829)	Top-5 acc 91.016 (89.265)	lr 0.00022
Train [113][2560/3239]	Time 0.253 (0.631)	Data Time 0.001 (0.018)	Loss 2.2485 (2.1551)	Entropy 0.65393 (0.65732)	Top-1 acc 70.703 (72.832)	Top-5 acc 87.891 (89.264)	lr 0.00022
Train [113][2570/3239]	Time 0.245 (0.630)	Data Time 0.001 (0.018)	Loss 2.2687 (2.1552)	Entropy 0.65395 (0.65731)	Top-1 acc 70.703 (72.830)	Top-5 acc 87.109 (89.263)	lr 0.00022
Train [113][2580/3239]	Time 0.257 (0.630)	Data Time 0.002 (0.018)	Loss 2.2447 (2.1551)	Entropy 0.65385 (0.65729)	Top-1 acc 71.875 (72.833)	Top-5 acc 85.938 (89.263)	lr 0.00022
Train [113][2590/3239]	Time 0.332 (0.629)	Data Time 0.001 (0.017)	Loss 2.2031 (2.1552)	Entropy 0.65384 (0.65728)	Top-1 acc 73.047 (72.834)	Top-5 acc 87.500 (89.260)	lr 0.00022
Train [113][2600/3239]	Time 0.262 (0.629)	Data Time 0.001 (0.017)	Loss 2.2227 (2.1553)	Entropy 0.65384 (0.65727)	Top-1 acc 71.094 (72.834)	Top-5 acc 87.109 (89.259)	lr 0.00022
Train [113][2610/3239]	Time 0.263 (0.628)	Data Time 0.001 (0.017)	Loss 2.2310 (2.1552)	Entropy 0.65382 (0.65725)	Top-1 acc 70.312 (72.833)	Top-5 acc 87.891 (89.258)	lr 0.00022
Train [113][2620/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.017)	Loss 2.3750 (2.1552)	Entropy 0.65368 (0.65724)	Top-1 acc 69.531 (72.835)	Top-5 acc 87.500 (89.260)	lr 0.00022
Train [113][2630/3239]	Time 0.348 (0.627)	Data Time 0.001 (0.017)	Loss 2.1119 (2.1552)	Entropy 0.65352 (0.65723)	Top-1 acc 74.609 (72.837)	Top-5 acc 89.453 (89.260)	lr 0.00022
Train [113][2640/3239]	Time 0.232 (0.627)	Data Time 0.001 (0.017)	Loss 2.1068 (2.1551)	Entropy 0.65351 (0.65721)	Top-1 acc 74.219 (72.839)	Top-5 acc 90.234 (89.261)	lr 0.00022
Train [113][2650/3239]	Time 0.237 (0.626)	Data Time 0.001 (0.017)	Loss 2.1845 (2.1550)	Entropy 0.65336 (0.65720)	Top-1 acc 75.781 (72.846)	Top-5 acc 87.891 (89.263)	lr 0.00022
Train [113][2660/3239]	Time 0.238 (0.626)	Data Time 0.001 (0.017)	Loss 2.3399 (2.1552)	Entropy 0.65322 (0.65718)	Top-1 acc 68.359 (72.837)	Top-5 acc 86.328 (89.261)	lr 0.00022
Train [113][2670/3239]	Time 0.323 (0.625)	Data Time 0.001 (0.017)	Loss 2.1334 (2.1550)	Entropy 0.65313 (0.65717)	Top-1 acc 71.875 (72.841)	Top-5 acc 89.453 (89.265)	lr 0.00022
Train [113][2680/3239]	Time 0.277 (0.625)	Data Time 0.001 (0.017)	Loss 2.1605 (2.1551)	Entropy 0.65322 (0.65715)	Top-1 acc 76.172 (72.843)	Top-5 acc 88.672 (89.262)	lr 0.00022
Train [113][2690/3239]	Time 0.256 (0.624)	Data Time 0.001 (0.017)	Loss 2.1275 (2.1552)	Entropy 0.65320 (0.65714)	Top-1 acc 73.047 (72.838)	Top-5 acc 91.406 (89.261)	lr 0.00022
Train [113][2700/3239]	Time 0.240 (0.624)	Data Time 0.001 (0.017)	Loss 2.2442 (2.1551)	Entropy 0.65318 (0.65712)	Top-1 acc 71.094 (72.840)	Top-5 acc 86.328 (89.262)	lr 0.00022
Train [113][2710/3239]	Time 0.380 (0.623)	Data Time 0.001 (0.017)	Loss 2.2054 (2.1551)	Entropy 0.65314 (0.65711)	Top-1 acc 67.969 (72.839)	Top-5 acc 87.500 (89.264)	lr 0.00022
Train [113][2720/3239]	Time 0.239 (0.623)	Data Time 0.001 (0.017)	Loss 2.2313 (2.1551)	Entropy 0.65310 (0.65710)	Top-1 acc 71.094 (72.838)	Top-5 acc 87.891 (89.263)	lr 0.00022
Train [113][2730/3239]	Time 0.248 (0.622)	Data Time 0.001 (0.017)	Loss 2.1561 (2.1552)	Entropy 0.65311 (0.65708)	Top-1 acc 73.047 (72.838)	Top-5 acc 89.062 (89.261)	lr 0.00022
Train [113][2740/3239]	Time 0.239 (0.622)	Data Time 0.001 (0.017)	Loss 2.2374 (2.1552)	Entropy 0.65316 (0.65707)	Top-1 acc 69.922 (72.836)	Top-5 acc 87.500 (89.262)	lr 0.00022
Train [113][2750/3239]	Time 0.347 (0.621)	Data Time 0.002 (0.017)	Loss 2.1226 (2.1553)	Entropy 0.65320 (0.65705)	Top-1 acc 74.609 (72.839)	Top-5 acc 90.234 (89.259)	lr 0.00022
Train [113][2760/3239]	Time 0.259 (0.621)	Data Time 0.001 (0.016)	Loss 2.2029 (2.1553)	Entropy 0.65315 (0.65704)	Top-1 acc 73.438 (72.839)	Top-5 acc 89.453 (89.257)	lr 0.00022
Train [113][2770/3239]	Time 0.243 (0.620)	Data Time 0.001 (0.016)	Loss 2.3700 (2.1555)	Entropy 0.65319 (0.65702)	Top-1 acc 67.578 (72.832)	Top-5 acc 85.938 (89.254)	lr 0.00022
Train [113][2780/3239]	Time 0.253 (0.620)	Data Time 0.001 (0.016)	Loss 2.1633 (2.1557)	Entropy 0.65313 (0.65701)	Top-1 acc 76.953 (72.828)	Top-5 acc 88.672 (89.251)	lr 0.00022
Train [113][2790/3239]	Time 0.300 (0.619)	Data Time 0.001 (0.016)	Loss 2.3293 (2.1558)	Entropy 0.65316 (0.65700)	Top-1 acc 71.094 (72.826)	Top-5 acc 85.156 (89.251)	lr 0.00022
Train [113][2800/3239]	Time 0.230 (0.619)	Data Time 0.001 (0.016)	Loss 2.0907 (2.1556)	Entropy 0.65305 (0.65698)	Top-1 acc 75.000 (72.831)	Top-5 acc 90.234 (89.252)	lr 0.00022
Train [113][2810/3239]	Time 0.254 (0.618)	Data Time 0.001 (0.016)	Loss 2.1919 (2.1555)	Entropy 0.65309 (0.65697)	Top-1 acc 70.703 (72.830)	Top-5 acc 89.844 (89.257)	lr 0.00022
Train [113][2820/3239]	Time 0.238 (0.618)	Data Time 0.001 (0.016)	Loss 2.1665 (2.1555)	Entropy 0.65311 (0.65695)	Top-1 acc 73.828 (72.832)	Top-5 acc 90.625 (89.255)	lr 0.00022
Train [113][2830/3239]	Time 0.218 (0.618)	Data Time 0.001 (0.016)	Loss 2.1286 (2.1555)	Entropy 0.65312 (0.65694)	Top-1 acc 73.047 (72.831)	Top-5 acc 89.844 (89.256)	lr 0.00022
Train [113][2840/3239]	Time 0.259 (0.617)	Data Time 0.001 (0.016)	Loss 2.2170 (2.1557)	Entropy 0.65308 (0.65693)	Top-1 acc 71.094 (72.828)	Top-5 acc 89.453 (89.256)	lr 0.00022
Train [113][2850/3239]	Time 0.236 (0.617)	Data Time 0.001 (0.016)	Loss 1.9906 (2.1554)	Entropy 0.65311 (0.65691)	Top-1 acc 77.344 (72.836)	Top-5 acc 91.797 (89.261)	lr 0.00022
Train [113][2860/3239]	Time 0.246 (0.636)	Data Time 0.003 (0.016)	Loss 2.0115 (2.1553)	Entropy 0.65313 (0.65690)	Top-1 acc 78.906 (72.841)	Top-5 acc 90.625 (89.260)	lr 0.00022
Train [113][2870/3239]	Time 0.238 (0.635)	Data Time 0.002 (0.016)	Loss 2.1049 (2.1552)	Entropy 0.65310 (0.65689)	Top-1 acc 73.438 (72.844)	Top-5 acc 90.234 (89.262)	lr 0.00022
Train [113][2880/3239]	Time 0.245 (0.635)	Data Time 0.001 (0.016)	Loss 2.0476 (2.1552)	Entropy 0.65304 (0.65687)	Top-1 acc 78.125 (72.846)	Top-5 acc 90.625 (89.262)	lr 0.00022
Train [113][2890/3239]	Time 0.302 (0.635)	Data Time 0.002 (0.016)	Loss 2.1494 (2.1551)	Entropy 0.65299 (0.65686)	Top-1 acc 69.922 (72.846)	Top-5 acc 88.281 (89.262)	lr 0.00022
Train [113][2900/3239]	Time 0.254 (0.634)	Data Time 0.001 (0.016)	Loss 2.3181 (2.1551)	Entropy 0.65298 (0.65685)	Top-1 acc 67.969 (72.846)	Top-5 acc 86.328 (89.260)	lr 0.00022
Train [113][2910/3239]	Time 0.284 (0.634)	Data Time 0.001 (0.016)	Loss 2.3785 (2.1553)	Entropy 0.65297 (0.65683)	Top-1 acc 68.750 (72.845)	Top-5 acc 83.984 (89.258)	lr 0.00022
Train [113][2920/3239]	Time 0.328 (0.633)	Data Time 0.001 (0.016)	Loss 2.1144 (2.1552)	Entropy 0.65298 (0.65682)	Top-1 acc 74.219 (72.847)	Top-5 acc 88.281 (89.257)	lr 0.00022
Train [113][2930/3239]	Time 0.286 (0.633)	Data Time 0.002 (0.016)	Loss 2.1797 (2.1554)	Entropy 0.65298 (0.65681)	Top-1 acc 74.219 (72.849)	Top-5 acc 89.062 (89.255)	lr 0.00022
Train [113][2940/3239]	Time 0.254 (0.632)	Data Time 0.001 (0.016)	Loss 2.3755 (2.1555)	Entropy 0.65294 (0.65680)	Top-1 acc 68.359 (72.846)	Top-5 acc 85.938 (89.254)	lr 0.00021
Train [113][2950/3239]	Time 0.257 (0.632)	Data Time 0.001 (0.016)	Loss 2.2817 (2.1555)	Entropy 0.65292 (0.65678)	Top-1 acc 70.312 (72.845)	Top-5 acc 86.719 (89.252)	lr 0.00021
Train [113][2960/3239]	Time 0.318 (0.631)	Data Time 0.001 (0.015)	Loss 2.2244 (2.1556)	Entropy 0.65287 (0.65677)	Top-1 acc 71.875 (72.842)	Top-5 acc 88.672 (89.252)	lr 0.00021
Train [113][2970/3239]	Time 0.227 (0.631)	Data Time 0.001 (0.015)	Loss 2.1233 (2.1557)	Entropy 0.65292 (0.65676)	Top-1 acc 70.703 (72.841)	Top-5 acc 87.500 (89.252)	lr 0.00021
Train [113][2980/3239]	Time 0.237 (0.630)	Data Time 0.001 (0.015)	Loss 2.1231 (2.1556)	Entropy 0.65293 (0.65674)	Top-1 acc 74.219 (72.842)	Top-5 acc 91.797 (89.256)	lr 0.00021
Train [113][2990/3239]	Time 0.246 (0.630)	Data Time 0.001 (0.015)	Loss 2.2128 (2.1556)	Entropy 0.65288 (0.65673)	Top-1 acc 71.484 (72.844)	Top-5 acc 87.500 (89.256)	lr 0.00021
Train [113][3000/3239]	Time 0.384 (0.630)	Data Time 0.002 (0.015)	Loss 2.1868 (2.1554)	Entropy 0.65292 (0.65672)	Top-1 acc 73.047 (72.851)	Top-5 acc 89.453 (89.259)	lr 0.00021
Train [113][3010/3239]	Time 0.245 (0.629)	Data Time 0.002 (0.015)	Loss 2.1661 (2.1555)	Entropy 0.65293 (0.65671)	Top-1 acc 69.141 (72.843)	Top-5 acc 88.672 (89.257)	lr 0.00021
Train [113][3020/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.015)	Loss 2.0183 (2.1556)	Entropy 0.65288 (0.65669)	Top-1 acc 77.734 (72.840)	Top-5 acc 91.406 (89.254)	lr 0.00021
Train [113][3030/3239]	Time 0.274 (0.628)	Data Time 0.001 (0.015)	Loss 2.3181 (2.1557)	Entropy 0.65284 (0.65668)	Top-1 acc 67.578 (72.836)	Top-5 acc 88.281 (89.254)	lr 0.00021
Train [113][3040/3239]	Time 0.361 (0.628)	Data Time 0.001 (0.015)	Loss 2.1288 (2.1556)	Entropy 0.65280 (0.65667)	Top-1 acc 74.219 (72.839)	Top-5 acc 90.625 (89.256)	lr 0.00021
Train [113][3050/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.015)	Loss 2.0651 (2.1557)	Entropy 0.65283 (0.65665)	Top-1 acc 75.000 (72.839)	Top-5 acc 90.625 (89.255)	lr 0.00021
Train [113][3060/3239]	Time 0.245 (0.627)	Data Time 0.001 (0.015)	Loss 2.2504 (2.1558)	Entropy 0.65275 (0.65664)	Top-1 acc 72.656 (72.837)	Top-5 acc 85.156 (89.254)	lr 0.00021
Train [113][3070/3239]	Time 0.272 (0.627)	Data Time 0.001 (0.015)	Loss 2.2150 (2.1557)	Entropy 0.65270 (0.65663)	Top-1 acc 69.922 (72.840)	Top-5 acc 91.016 (89.255)	lr 0.00021
Train [113][3080/3239]	Time 0.393 (0.626)	Data Time 0.001 (0.015)	Loss 2.2534 (2.1558)	Entropy 0.65265 (0.65662)	Top-1 acc 68.750 (72.835)	Top-5 acc 88.672 (89.252)	lr 0.00021
Train [113][3090/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.015)	Loss 2.1339 (2.1558)	Entropy 0.65270 (0.65660)	Top-1 acc 74.219 (72.837)	Top-5 acc 89.453 (89.253)	lr 0.00021
Train [113][3100/3239]	Time 0.231 (0.625)	Data Time 0.001 (0.015)	Loss 2.3090 (2.1559)	Entropy 0.65274 (0.65659)	Top-1 acc 69.141 (72.836)	Top-5 acc 87.500 (89.250)	lr 0.00021
Train [113][3110/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.015)	Loss 2.1502 (2.1559)	Entropy 0.65269 (0.65658)	Top-1 acc 71.875 (72.835)	Top-5 acc 89.453 (89.250)	lr 0.00021
Train [113][3120/3239]	Time 0.344 (0.625)	Data Time 0.002 (0.015)	Loss 2.1883 (2.1563)	Entropy 0.65271 (0.65657)	Top-1 acc 72.266 (72.827)	Top-5 acc 87.500 (89.243)	lr 0.00021
Train [113][3130/3239]	Time 0.249 (0.624)	Data Time 0.001 (0.015)	Loss 2.1529 (2.1563)	Entropy 0.65266 (0.65655)	Top-1 acc 71.094 (72.824)	Top-5 acc 89.453 (89.241)	lr 0.00021
Train [113][3140/3239]	Time 0.239 (0.624)	Data Time 0.001 (0.015)	Loss 2.1484 (2.1564)	Entropy 0.65261 (0.65654)	Top-1 acc 70.703 (72.820)	Top-5 acc 88.672 (89.240)	lr 0.00021
Train [113][3150/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.015)	Loss 2.1933 (2.1564)	Entropy 0.65244 (0.65653)	Top-1 acc 71.875 (72.819)	Top-5 acc 87.500 (89.239)	lr 0.00021
Train [113][3160/3239]	Time 0.310 (0.623)	Data Time 0.001 (0.015)	Loss 2.0078 (2.1563)	Entropy 0.65242 (0.65652)	Top-1 acc 75.781 (72.822)	Top-5 acc 91.406 (89.241)	lr 0.00021
Train [113][3170/3239]	Time 0.238 (0.623)	Data Time 0.001 (0.015)	Loss 2.1319 (2.1565)	Entropy 0.65230 (0.65650)	Top-1 acc 74.609 (72.817)	Top-5 acc 89.062 (89.239)	lr 0.00021
Train [113][3180/3239]	Time 0.255 (0.622)	Data Time 0.000 (0.015)	Loss 2.1121 (2.1564)	Entropy 0.65225 (0.65649)	Top-1 acc 73.047 (72.817)	Top-5 acc 91.016 (89.242)	lr 0.00021
Train [113][3190/3239]	Time 0.277 (0.638)	Data Time 0.000 (0.014)	Loss 2.1384 (2.1564)	Entropy 0.65219 (0.65648)	Top-1 acc 73.438 (72.820)	Top-5 acc 89.453 (89.243)	lr 0.00021
Train [113][3200/3239]	Time 0.219 (0.638)	Data Time 0.000 (0.014)	Loss 2.2103 (2.1563)	Entropy 0.65218 (0.65646)	Top-1 acc 70.703 (72.823)	Top-5 acc 88.281 (89.244)	lr 0.00021
Train [113][3210/3239]	Time 0.236 (0.637)	Data Time 0.000 (0.014)	Loss 2.1845 (2.1563)	Entropy 0.65217 (0.65645)	Top-1 acc 70.703 (72.820)	Top-5 acc 89.453 (89.244)	lr 0.00021
Train [113][3220/3239]	Time 0.232 (0.637)	Data Time 0.000 (0.014)	Loss 2.0600 (2.1562)	Entropy 0.65204 (0.65644)	Top-1 acc 78.125 (72.821)	Top-5 acc 91.797 (89.247)	lr 0.00021
Train [113][3230/3239]	Time 0.235 (0.636)	Data Time 0.000 (0.014)	Loss 2.0858 (2.1562)	Entropy 0.65205 (0.65642)	Top-1 acc 74.219 (72.820)	Top-5 acc 91.016 (89.245)	lr 0.00021
Train [113][3239/3239]	Time 2.326 (0.636)	Data Time 0.000 (0.014)	Loss 2.3493 (2.1563)	Entropy 0.65205 (0.65641)	Top-1 acc 66.667 (72.818)	Top-5 acc 90.123 (89.244)	lr 0.00021
==========Valid [113/120]	loss 1.198	top-1 acc 72.745 (72.745)	top-5 acc 89.985	Train top-1 72.818	top-5 89.244	Entropy 0.65205	Latency-None: 0.000ms	Flops: 544.27M
Train [114][0/3239]	Time 41.407 (41.407)	Data Time 40.118 (40.118)	Loss 2.0544 (2.0544)	Entropy 0.65212 (0.65212)	Top-1 acc 74.609 (74.609)	Top-5 acc 91.406 (91.406)	lr 0.00021
Train [114][10/3239]	Time 2.754 (4.285)	Data Time 0.001 (3.669)	Loss 2.3303 (2.1265)	Entropy 0.65212 (0.65212)	Top-1 acc 67.578 (73.047)	Top-5 acc 85.547 (89.524)	lr 0.00021
Train [114][20/3239]	Time 0.267 (2.403)	Data Time 0.002 (1.929)	Loss 1.9861 (2.1347)	Entropy 0.65220 (0.65216)	Top-1 acc 74.609 (72.786)	Top-5 acc 92.578 (89.490)	lr 0.00021
Train [114][30/3239]	Time 0.277 (1.793)	Data Time 0.002 (1.307)	Loss 2.1987 (2.1179)	Entropy 0.65221 (0.65217)	Top-1 acc 69.922 (73.400)	Top-5 acc 87.500 (89.730)	lr 0.00021
Train [114][40/3239]	Time 0.256 (1.477)	Data Time 0.001 (0.989)	Loss 2.0750 (2.1188)	Entropy 0.65222 (0.65219)	Top-1 acc 73.047 (73.580)	Top-5 acc 93.359 (89.768)	lr 0.00021
Train [114][50/3239]	Time 0.337 (1.287)	Data Time 0.001 (0.795)	Loss 2.2376 (2.1278)	Entropy 0.65238 (0.65221)	Top-1 acc 72.266 (73.361)	Top-5 acc 86.719 (89.706)	lr 0.00021
Train [114][60/3239]	Time 0.293 (1.159)	Data Time 0.002 (0.665)	Loss 2.2114 (2.1247)	Entropy 0.65238 (0.65224)	Top-1 acc 70.312 (73.482)	Top-5 acc 90.625 (89.728)	lr 0.00021
Train [114][70/3239]	Time 0.246 (1.067)	Data Time 0.001 (0.572)	Loss 1.9582 (2.1251)	Entropy 0.65233 (0.65226)	Top-1 acc 77.344 (73.399)	Top-5 acc 92.578 (89.734)	lr 0.00021
Train [114][80/3239]	Time 0.275 (0.995)	Data Time 0.001 (0.502)	Loss 2.1173 (2.1295)	Entropy 0.65234 (0.65227)	Top-1 acc 72.266 (73.293)	Top-5 acc 91.797 (89.632)	lr 0.00021
Train [114][90/3239]	Time 0.377 (0.941)	Data Time 0.001 (0.447)	Loss 2.0650 (2.1318)	Entropy 0.65228 (0.65227)	Top-1 acc 75.391 (73.313)	Top-5 acc 91.406 (89.659)	lr 0.00021
Train [114][100/3239]	Time 0.256 (0.897)	Data Time 0.001 (0.403)	Loss 2.1991 (2.1347)	Entropy 0.65249 (0.65228)	Top-1 acc 72.656 (73.205)	Top-5 acc 89.062 (89.650)	lr 0.00021
Train [114][110/3239]	Time 0.257 (0.862)	Data Time 0.001 (0.367)	Loss 2.1449 (2.1328)	Entropy 0.65241 (0.65230)	Top-1 acc 72.656 (73.297)	Top-5 acc 88.281 (89.650)	lr 0.00021
Train [114][120/3239]	Time 2.491 (0.832)	Data Time 0.001 (0.336)	Loss 2.1825 (2.1339)	Entropy 0.65241 (0.65231)	Top-1 acc 72.656 (73.283)	Top-5 acc 88.672 (89.650)	lr 0.00021
Train [114][130/3239]	Time 0.251 (0.790)	Data Time 0.001 (0.311)	Loss 2.0920 (2.1363)	Entropy 0.65243 (0.65232)	Top-1 acc 72.656 (73.193)	Top-5 acc 88.281 (89.626)	lr 0.00021
Train [114][140/3239]	Time 0.238 (0.769)	Data Time 0.002 (0.289)	Loss 2.2207 (2.1423)	Entropy 0.65247 (0.65233)	Top-1 acc 71.875 (73.102)	Top-5 acc 87.500 (89.539)	lr 0.00021
Train [114][150/3239]	Time 0.246 (0.752)	Data Time 0.001 (0.270)	Loss 2.1500 (2.1438)	Entropy 0.65245 (0.65233)	Top-1 acc 72.656 (73.096)	Top-5 acc 91.016 (89.495)	lr 0.00021
Train [114][160/3239]	Time 0.236 (0.736)	Data Time 0.001 (0.253)	Loss 2.3163 (2.1434)	Entropy 0.65244 (0.65234)	Top-1 acc 69.141 (73.105)	Top-5 acc 87.500 (89.521)	lr 0.00021
Train [114][170/3239]	Time 0.244 (0.722)	Data Time 0.002 (0.238)	Loss 2.1597 (2.1451)	Entropy 0.65243 (0.65235)	Top-1 acc 73.828 (73.056)	Top-5 acc 88.672 (89.467)	lr 0.00021
Train [114][180/3239]	Time 0.258 (0.711)	Data Time 0.001 (0.225)	Loss 2.2252 (2.1500)	Entropy 0.65245 (0.65235)	Top-1 acc 69.922 (72.920)	Top-5 acc 87.500 (89.399)	lr 0.00021
Train [114][190/3239]	Time 0.283 (0.700)	Data Time 0.006 (0.214)	Loss 2.0985 (2.1494)	Entropy 0.65250 (0.65236)	Top-1 acc 75.000 (72.971)	Top-5 acc 90.234 (89.398)	lr 0.00021
Train [114][200/3239]	Time 0.275 (0.690)	Data Time 0.002 (0.203)	Loss 2.1617 (2.1512)	Entropy 0.65252 (0.65237)	Top-1 acc 73.828 (72.921)	Top-5 acc 88.672 (89.342)	lr 0.00021
Train [114][210/3239]	Time 0.235 (0.680)	Data Time 0.001 (0.194)	Loss 2.1201 (2.1501)	Entropy 0.65233 (0.65237)	Top-1 acc 75.781 (72.947)	Top-5 acc 90.234 (89.368)	lr 0.00021
Train [114][220/3239]	Time 0.280 (0.673)	Data Time 0.001 (0.185)	Loss 2.1081 (2.1504)	Entropy 0.65238 (0.65237)	Top-1 acc 75.781 (72.939)	Top-5 acc 89.844 (89.349)	lr 0.00021
Train [114][230/3239]	Time 2.468 (0.665)	Data Time 0.001 (0.177)	Loss 2.2026 (2.1506)	Entropy 0.65238 (0.65237)	Top-1 acc 72.266 (72.922)	Top-5 acc 88.281 (89.331)	lr 0.00021
Train [114][240/3239]	Time 0.229 (0.648)	Data Time 0.001 (0.170)	Loss 2.1209 (2.1499)	Entropy 0.65241 (0.65237)	Top-1 acc 76.953 (72.950)	Top-5 acc 88.672 (89.336)	lr 0.00020
Train [114][250/3239]	Time 0.246 (0.641)	Data Time 0.002 (0.163)	Loss 2.1384 (2.1490)	Entropy 0.65241 (0.65237)	Top-1 acc 72.656 (72.992)	Top-5 acc 89.453 (89.357)	lr 0.00020
Train [114][260/3239]	Time 0.357 (0.636)	Data Time 0.002 (0.157)	Loss 2.1298 (2.1501)	Entropy 0.65242 (0.65237)	Top-1 acc 73.828 (72.956)	Top-5 acc 88.281 (89.357)	lr 0.00020
Train [114][270/3239]	Time 0.283 (0.631)	Data Time 0.001 (0.151)	Loss 2.0517 (2.1499)	Entropy 0.65241 (0.65238)	Top-1 acc 75.391 (72.930)	Top-5 acc 90.234 (89.348)	lr 0.00020
Train [114][280/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.146)	Loss 2.1198 (2.1500)	Entropy 0.65242 (0.65238)	Top-1 acc 73.828 (72.916)	Top-5 acc 90.234 (89.354)	lr 0.00020
Train [114][290/3239]	Time 0.260 (0.622)	Data Time 0.003 (0.141)	Loss 2.2219 (2.1520)	Entropy 0.65223 (0.65238)	Top-1 acc 69.141 (72.880)	Top-5 acc 87.891 (89.297)	lr 0.00020
Train [114][300/3239]	Time 0.395 (0.618)	Data Time 0.002 (0.136)	Loss 2.0502 (2.1515)	Entropy 0.65222 (0.65237)	Top-1 acc 76.172 (72.912)	Top-5 acc 89.844 (89.303)	lr 0.00020
Train [114][310/3239]	Time 0.275 (0.780)	Data Time 0.003 (0.132)	Loss 2.2256 (2.1525)	Entropy 0.65222 (0.65237)	Top-1 acc 69.922 (72.901)	Top-5 acc 85.547 (89.284)	lr 0.00020
Train [114][320/3239]	Time 0.233 (0.776)	Data Time 0.002 (0.128)	Loss 2.1291 (2.1524)	Entropy 0.65214 (0.65236)	Top-1 acc 70.312 (72.906)	Top-5 acc 88.672 (89.290)	lr 0.00020
Train [114][330/3239]	Time 0.282 (0.767)	Data Time 0.002 (0.124)	Loss 2.1483 (2.1515)	Entropy 0.65201 (0.65235)	Top-1 acc 73.828 (72.926)	Top-5 acc 89.844 (89.287)	lr 0.00020
Train [114][340/3239]	Time 2.676 (0.759)	Data Time 0.002 (0.121)	Loss 2.0433 (2.1504)	Entropy 0.65201 (0.65234)	Top-1 acc 76.172 (72.958)	Top-5 acc 92.188 (89.290)	lr 0.00020
Train [114][350/3239]	Time 0.251 (0.745)	Data Time 0.002 (0.117)	Loss 2.2128 (2.1511)	Entropy 0.65193 (0.65233)	Top-1 acc 73.828 (72.963)	Top-5 acc 87.891 (89.267)	lr 0.00020
Train [114][360/3239]	Time 0.259 (0.739)	Data Time 0.002 (0.114)	Loss 2.2985 (2.1520)	Entropy 0.65170 (0.65231)	Top-1 acc 68.750 (72.931)	Top-5 acc 87.109 (89.250)	lr 0.00020
Train [114][370/3239]	Time 0.229 (0.733)	Data Time 0.001 (0.111)	Loss 2.1500 (2.1533)	Entropy 0.65168 (0.65230)	Top-1 acc 74.609 (72.922)	Top-5 acc 88.672 (89.207)	lr 0.00020
Train [114][380/3239]	Time 0.383 (0.726)	Data Time 0.001 (0.108)	Loss 2.2401 (2.1548)	Entropy 0.65170 (0.65228)	Top-1 acc 69.922 (72.879)	Top-5 acc 87.891 (89.190)	lr 0.00020
Train [114][390/3239]	Time 0.237 (0.720)	Data Time 0.002 (0.106)	Loss 2.1657 (2.1537)	Entropy 0.65169 (0.65227)	Top-1 acc 68.750 (72.908)	Top-5 acc 89.844 (89.206)	lr 0.00020
Train [114][400/3239]	Time 0.254 (0.715)	Data Time 0.001 (0.103)	Loss 2.1704 (2.1533)	Entropy 0.65160 (0.65225)	Top-1 acc 71.094 (72.909)	Top-5 acc 89.844 (89.220)	lr 0.00020
Train [114][410/3239]	Time 0.261 (0.709)	Data Time 0.001 (0.101)	Loss 2.1762 (2.1527)	Entropy 0.65157 (0.65224)	Top-1 acc 71.875 (72.935)	Top-5 acc 89.844 (89.227)	lr 0.00020
Train [114][420/3239]	Time 0.268 (0.704)	Data Time 0.001 (0.098)	Loss 2.1441 (2.1522)	Entropy 0.65155 (0.65222)	Top-1 acc 74.219 (72.969)	Top-5 acc 89.453 (89.229)	lr 0.00020
Train [114][430/3239]	Time 0.230 (0.699)	Data Time 0.001 (0.096)	Loss 2.1518 (2.1519)	Entropy 0.65152 (0.65220)	Top-1 acc 73.438 (72.989)	Top-5 acc 87.891 (89.232)	lr 0.00020
Train [114][440/3239]	Time 0.262 (0.694)	Data Time 0.001 (0.094)	Loss 2.1414 (2.1522)	Entropy 0.65150 (0.65219)	Top-1 acc 73.438 (72.966)	Top-5 acc 89.453 (89.224)	lr 0.00020
Train [114][450/3239]	Time 2.515 (0.690)	Data Time 0.001 (0.092)	Loss 2.1744 (2.1516)	Entropy 0.65150 (0.65217)	Top-1 acc 71.094 (72.984)	Top-5 acc 90.234 (89.231)	lr 0.00020
Train [114][460/3239]	Time 0.247 (0.681)	Data Time 0.001 (0.090)	Loss 2.2028 (2.1527)	Entropy 0.65144 (0.65216)	Top-1 acc 73.828 (72.950)	Top-5 acc 88.672 (89.213)	lr 0.00020
Train [114][470/3239]	Time 0.229 (0.677)	Data Time 0.001 (0.088)	Loss 2.2656 (2.1530)	Entropy 0.65140 (0.65214)	Top-1 acc 68.750 (72.940)	Top-5 acc 86.328 (89.202)	lr 0.00020
Train [114][480/3239]	Time 0.226 (0.673)	Data Time 0.001 (0.086)	Loss 2.1351 (2.1537)	Entropy 0.65139 (0.65213)	Top-1 acc 70.703 (72.930)	Top-5 acc 89.844 (89.187)	lr 0.00020
Train [114][490/3239]	Time 0.243 (0.670)	Data Time 0.001 (0.084)	Loss 2.2035 (2.1543)	Entropy 0.65137 (0.65211)	Top-1 acc 69.531 (72.904)	Top-5 acc 89.062 (89.163)	lr 0.00020
Train [114][500/3239]	Time 0.251 (0.666)	Data Time 0.001 (0.083)	Loss 2.2244 (2.1546)	Entropy 0.65132 (0.65209)	Top-1 acc 72.266 (72.919)	Top-5 acc 87.891 (89.148)	lr 0.00020
Train [114][510/3239]	Time 0.333 (0.663)	Data Time 0.001 (0.081)	Loss 2.2580 (2.1552)	Entropy 0.65114 (0.65208)	Top-1 acc 71.094 (72.910)	Top-5 acc 86.719 (89.141)	lr 0.00020
Train [114][520/3239]	Time 0.236 (0.659)	Data Time 0.001 (0.080)	Loss 2.2824 (2.1552)	Entropy 0.65111 (0.65206)	Top-1 acc 70.312 (72.918)	Top-5 acc 86.719 (89.143)	lr 0.00020
Train [114][530/3239]	Time 0.256 (0.656)	Data Time 0.001 (0.078)	Loss 2.0152 (2.1549)	Entropy 0.65108 (0.65204)	Top-1 acc 75.781 (72.928)	Top-5 acc 92.188 (89.147)	lr 0.00020
Train [114][540/3239]	Time 0.251 (0.653)	Data Time 0.001 (0.077)	Loss 2.1465 (2.1550)	Entropy 0.65102 (0.65202)	Top-1 acc 73.438 (72.924)	Top-5 acc 90.625 (89.156)	lr 0.00020
Train [114][550/3239]	Time 0.351 (0.651)	Data Time 0.001 (0.075)	Loss 2.0975 (2.1541)	Entropy 0.65100 (0.65201)	Top-1 acc 73.438 (72.933)	Top-5 acc 89.062 (89.180)	lr 0.00020
Train [114][560/3239]	Time 2.528 (0.648)	Data Time 0.001 (0.074)	Loss 2.1269 (2.1536)	Entropy 0.65100 (0.65199)	Top-1 acc 71.875 (72.948)	Top-5 acc 90.234 (89.205)	lr 0.00020
Train [114][570/3239]	Time 0.300 (0.641)	Data Time 0.001 (0.073)	Loss 2.1727 (2.1537)	Entropy 0.65092 (0.65197)	Top-1 acc 72.656 (72.956)	Top-5 acc 86.719 (89.192)	lr 0.00020
Train [114][580/3239]	Time 0.238 (0.639)	Data Time 0.001 (0.072)	Loss 2.2163 (2.1546)	Entropy 0.65084 (0.65195)	Top-1 acc 75.781 (72.929)	Top-5 acc 85.938 (89.165)	lr 0.00020
Train [114][590/3239]	Time 0.406 (0.636)	Data Time 0.001 (0.070)	Loss 2.1582 (2.1548)	Entropy 0.65076 (0.65193)	Top-1 acc 71.094 (72.917)	Top-5 acc 86.719 (89.167)	lr 0.00020
Train [114][600/3239]	Time 0.249 (0.634)	Data Time 0.001 (0.069)	Loss 2.1636 (2.1545)	Entropy 0.65063 (0.65191)	Top-1 acc 71.484 (72.935)	Top-5 acc 91.016 (89.172)	lr 0.00020
Train [114][610/3239]	Time 0.252 (0.632)	Data Time 0.001 (0.068)	Loss 2.2164 (2.1547)	Entropy 0.65060 (0.65189)	Top-1 acc 71.484 (72.927)	Top-5 acc 88.672 (89.185)	lr 0.00020
Train [114][620/3239]	Time 0.281 (0.630)	Data Time 0.001 (0.067)	Loss 2.2703 (2.1544)	Entropy 0.65056 (0.65187)	Top-1 acc 72.656 (72.934)	Top-5 acc 87.500 (89.194)	lr 0.00020
Train [114][630/3239]	Time 0.349 (0.628)	Data Time 0.001 (0.066)	Loss 2.0310 (2.1544)	Entropy 0.65056 (0.65185)	Top-1 acc 75.000 (72.924)	Top-5 acc 91.797 (89.187)	lr 0.00020
Train [114][640/3239]	Time 0.238 (0.626)	Data Time 0.002 (0.065)	Loss 2.1766 (2.1547)	Entropy 0.65053 (0.65183)	Top-1 acc 73.828 (72.903)	Top-5 acc 88.281 (89.190)	lr 0.00020
Train [114][650/3239]	Time 0.240 (0.624)	Data Time 0.001 (0.064)	Loss 2.2225 (2.1545)	Entropy 0.65048 (0.65181)	Top-1 acc 73.047 (72.909)	Top-5 acc 88.672 (89.205)	lr 0.00020
Train [114][660/3239]	Time 0.279 (0.622)	Data Time 0.001 (0.063)	Loss 2.0697 (2.1549)	Entropy 0.65049 (0.65179)	Top-1 acc 76.953 (72.912)	Top-5 acc 89.844 (89.195)	lr 0.00020
Train [114][670/3239]	Time 51.581 (0.693)	Data Time 0.001 (0.062)	Loss 2.2109 (2.1549)	Entropy 0.65049 (0.65177)	Top-1 acc 70.703 (72.916)	Top-5 acc 86.719 (89.188)	lr 0.00020
Train [114][680/3239]	Time 0.457 (0.688)	Data Time 0.004 (0.061)	Loss 2.2290 (2.1545)	Entropy 0.65035 (0.65175)	Top-1 acc 73.438 (72.927)	Top-5 acc 87.109 (89.196)	lr 0.00020
Train [114][690/3239]	Time 0.245 (0.686)	Data Time 0.002 (0.061)	Loss 2.1622 (2.1546)	Entropy 0.65032 (0.65172)	Top-1 acc 73.438 (72.920)	Top-5 acc 91.016 (89.199)	lr 0.00020
Train [114][700/3239]	Time 0.278 (0.683)	Data Time 0.002 (0.060)	Loss 2.1401 (2.1549)	Entropy 0.65031 (0.65170)	Top-1 acc 70.703 (72.914)	Top-5 acc 89.844 (89.188)	lr 0.00020
Train [114][710/3239]	Time 0.317 (0.681)	Data Time 0.002 (0.059)	Loss 2.1225 (2.1553)	Entropy 0.65022 (0.65168)	Top-1 acc 76.172 (72.902)	Top-5 acc 88.672 (89.189)	lr 0.00020
Train [114][720/3239]	Time 0.312 (0.678)	Data Time 0.001 (0.058)	Loss 2.1278 (2.1554)	Entropy 0.65035 (0.65166)	Top-1 acc 73.438 (72.894)	Top-5 acc 90.625 (89.185)	lr 0.00020
Train [114][730/3239]	Time 0.257 (0.676)	Data Time 0.001 (0.057)	Loss 2.2048 (2.1553)	Entropy 0.65042 (0.65165)	Top-1 acc 69.531 (72.895)	Top-5 acc 87.891 (89.192)	lr 0.00020
Train [114][740/3239]	Time 0.247 (0.674)	Data Time 0.001 (0.057)	Loss 2.1153 (2.1558)	Entropy 0.65042 (0.65163)	Top-1 acc 76.562 (72.897)	Top-5 acc 88.281 (89.174)	lr 0.00020
Train [114][750/3239]	Time 0.325 (0.672)	Data Time 0.001 (0.056)	Loss 2.1277 (2.1559)	Entropy 0.65034 (0.65161)	Top-1 acc 73.438 (72.906)	Top-5 acc 89.453 (89.173)	lr 0.00020
Train [114][760/3239]	Time 0.235 (0.670)	Data Time 0.001 (0.055)	Loss 2.0677 (2.1558)	Entropy 0.65038 (0.65160)	Top-1 acc 75.781 (72.907)	Top-5 acc 90.625 (89.172)	lr 0.00020
Train [114][770/3239]	Time 0.279 (0.667)	Data Time 0.001 (0.055)	Loss 2.3621 (2.1558)	Entropy 0.65038 (0.65158)	Top-1 acc 68.750 (72.907)	Top-5 acc 84.375 (89.173)	lr 0.00020
Train [114][780/3239]	Time 2.470 (0.665)	Data Time 0.001 (0.054)	Loss 2.0953 (2.1557)	Entropy 0.65038 (0.65157)	Top-1 acc 72.266 (72.904)	Top-5 acc 91.797 (89.179)	lr 0.00020
Train [114][790/3239]	Time 0.244 (0.660)	Data Time 0.001 (0.053)	Loss 2.2738 (2.1554)	Entropy 0.65036 (0.65155)	Top-1 acc 70.312 (72.897)	Top-5 acc 85.938 (89.184)	lr 0.00020
Train [114][800/3239]	Time 0.268 (0.658)	Data Time 0.002 (0.053)	Loss 2.2413 (2.1555)	Entropy 0.65034 (0.65154)	Top-1 acc 70.703 (72.894)	Top-5 acc 87.500 (89.182)	lr 0.00019
Train [114][810/3239]	Time 0.252 (0.657)	Data Time 0.001 (0.052)	Loss 2.0553 (2.1546)	Entropy 0.65036 (0.65152)	Top-1 acc 72.656 (72.898)	Top-5 acc 91.016 (89.196)	lr 0.00019
Train [114][820/3239]	Time 0.348 (0.655)	Data Time 0.002 (0.051)	Loss 2.0875 (2.1546)	Entropy 0.65033 (0.65151)	Top-1 acc 75.000 (72.903)	Top-5 acc 90.234 (89.200)	lr 0.00019
Train [114][830/3239]	Time 0.303 (0.653)	Data Time 0.001 (0.051)	Loss 2.1407 (2.1544)	Entropy 0.65035 (0.65149)	Top-1 acc 74.609 (72.911)	Top-5 acc 90.234 (89.203)	lr 0.00019
Train [114][840/3239]	Time 0.265 (0.651)	Data Time 0.001 (0.050)	Loss 2.0541 (2.1550)	Entropy 0.65033 (0.65148)	Top-1 acc 77.344 (72.901)	Top-5 acc 90.625 (89.189)	lr 0.00019
Train [114][850/3239]	Time 0.248 (0.649)	Data Time 0.001 (0.050)	Loss 2.0080 (2.1549)	Entropy 0.65025 (0.65147)	Top-1 acc 77.344 (72.902)	Top-5 acc 91.406 (89.180)	lr 0.00019
Train [114][860/3239]	Time 0.274 (0.647)	Data Time 0.001 (0.049)	Loss 2.0647 (2.1548)	Entropy 0.65020 (0.65145)	Top-1 acc 75.781 (72.901)	Top-5 acc 90.625 (89.185)	lr 0.00019
Train [114][870/3239]	Time 0.327 (0.646)	Data Time 0.001 (0.048)	Loss 2.0585 (2.1546)	Entropy 0.65019 (0.65144)	Top-1 acc 72.656 (72.902)	Top-5 acc 91.797 (89.199)	lr 0.00019
Train [114][880/3239]	Time 0.342 (0.644)	Data Time 0.001 (0.048)	Loss 2.1110 (2.1545)	Entropy 0.65009 (0.65142)	Top-1 acc 70.703 (72.916)	Top-5 acc 92.188 (89.200)	lr 0.00019
Train [114][890/3239]	Time 2.542 (0.642)	Data Time 0.001 (0.047)	Loss 2.1350 (2.1548)	Entropy 0.65009 (0.65141)	Top-1 acc 71.094 (72.900)	Top-5 acc 89.062 (89.188)	lr 0.00019
Train [114][900/3239]	Time 0.263 (0.638)	Data Time 0.002 (0.047)	Loss 2.0335 (2.1546)	Entropy 0.64993 (0.65139)	Top-1 acc 75.391 (72.904)	Top-5 acc 91.406 (89.194)	lr 0.00019
Train [114][910/3239]	Time 0.223 (0.637)	Data Time 0.001 (0.046)	Loss 2.0550 (2.1544)	Entropy 0.64987 (0.65137)	Top-1 acc 76.562 (72.908)	Top-5 acc 91.406 (89.203)	lr 0.00019
Train [114][920/3239]	Time 0.352 (0.635)	Data Time 0.001 (0.046)	Loss 2.2079 (2.1548)	Entropy 0.64976 (0.65136)	Top-1 acc 73.047 (72.893)	Top-5 acc 89.062 (89.197)	lr 0.00019
Train [114][930/3239]	Time 0.262 (0.634)	Data Time 0.001 (0.045)	Loss 2.1593 (2.1551)	Entropy 0.64977 (0.65134)	Top-1 acc 72.266 (72.891)	Top-5 acc 87.891 (89.189)	lr 0.00019
Train [114][940/3239]	Time 0.247 (0.632)	Data Time 0.002 (0.045)	Loss 2.1436 (2.1546)	Entropy 0.64978 (0.65132)	Top-1 acc 73.047 (72.908)	Top-5 acc 90.625 (89.202)	lr 0.00019
Train [114][950/3239]	Time 0.276 (0.631)	Data Time 0.001 (0.045)	Loss 2.0679 (2.1543)	Entropy 0.64978 (0.65131)	Top-1 acc 76.562 (72.908)	Top-5 acc 93.750 (89.213)	lr 0.00019
Train [114][960/3239]	Time 0.385 (0.629)	Data Time 0.001 (0.044)	Loss 2.1857 (2.1540)	Entropy 0.64978 (0.65129)	Top-1 acc 69.922 (72.915)	Top-5 acc 88.281 (89.225)	lr 0.00019
Train [114][970/3239]	Time 0.241 (0.628)	Data Time 0.001 (0.044)	Loss 2.2983 (2.1540)	Entropy 0.64970 (0.65128)	Top-1 acc 69.141 (72.917)	Top-5 acc 87.109 (89.226)	lr 0.00019
Train [114][980/3239]	Time 0.273 (0.627)	Data Time 0.001 (0.043)	Loss 2.0392 (2.1539)	Entropy 0.64974 (0.65126)	Top-1 acc 76.953 (72.912)	Top-5 acc 93.359 (89.230)	lr 0.00019
Train [114][990/3239]	Time 0.232 (0.625)	Data Time 0.001 (0.043)	Loss 2.1310 (2.1541)	Entropy 0.64972 (0.65124)	Top-1 acc 75.391 (72.914)	Top-5 acc 90.234 (89.220)	lr 0.00019
Train [114][1000/3239]	Time 2.603 (0.624)	Data Time 0.001 (0.042)	Loss 2.1913 (2.1541)	Entropy 0.64972 (0.65123)	Top-1 acc 73.047 (72.917)	Top-5 acc 88.281 (89.215)	lr 0.00019
Train [114][1010/3239]	Time 0.232 (0.621)	Data Time 0.001 (0.042)	Loss 1.9529 (2.1550)	Entropy 0.64966 (0.65121)	Top-1 acc 78.906 (72.900)	Top-5 acc 92.188 (89.201)	lr 0.00019
Train [114][1020/3239]	Time 0.243 (0.619)	Data Time 0.001 (0.042)	Loss 2.0531 (2.1548)	Entropy 0.64965 (0.65120)	Top-1 acc 73.828 (72.906)	Top-5 acc 92.969 (89.211)	lr 0.00019
Train [114][1030/3239]	Time 0.244 (0.618)	Data Time 0.001 (0.041)	Loss 2.1004 (2.1550)	Entropy 0.64961 (0.65118)	Top-1 acc 74.609 (72.907)	Top-5 acc 89.844 (89.208)	lr 0.00019
Train [114][1040/3239]	Time 0.261 (0.667)	Data Time 0.002 (0.041)	Loss 2.0328 (2.1552)	Entropy 0.64950 (0.65117)	Top-1 acc 77.344 (72.902)	Top-5 acc 90.234 (89.196)	lr 0.00019
Train [114][1050/3239]	Time 0.293 (0.666)	Data Time 0.002 (0.040)	Loss 2.2388 (2.1555)	Entropy 0.64958 (0.65115)	Top-1 acc 69.141 (72.891)	Top-5 acc 87.109 (89.198)	lr 0.00019
Train [114][1060/3239]	Time 0.250 (0.664)	Data Time 0.002 (0.040)	Loss 2.1761 (2.1555)	Entropy 0.64969 (0.65114)	Top-1 acc 75.391 (72.890)	Top-5 acc 87.891 (89.198)	lr 0.00019
Train [114][1070/3239]	Time 0.279 (0.663)	Data Time 0.002 (0.040)	Loss 2.0851 (2.1550)	Entropy 0.64965 (0.65112)	Top-1 acc 71.484 (72.899)	Top-5 acc 91.406 (89.212)	lr 0.00019
Train [114][1080/3239]	Time 0.241 (0.661)	Data Time 0.001 (0.039)	Loss 2.3607 (2.1551)	Entropy 0.64955 (0.65111)	Top-1 acc 67.578 (72.902)	Top-5 acc 83.594 (89.206)	lr 0.00019
Train [114][1090/3239]	Time 0.259 (0.660)	Data Time 0.002 (0.039)	Loss 2.1041 (2.1550)	Entropy 0.64954 (0.65110)	Top-1 acc 75.391 (72.908)	Top-5 acc 89.062 (89.206)	lr 0.00019
Train [114][1100/3239]	Time 0.243 (0.659)	Data Time 0.001 (0.039)	Loss 2.2067 (2.1551)	Entropy 0.64943 (0.65108)	Top-1 acc 69.141 (72.903)	Top-5 acc 89.844 (89.205)	lr 0.00019
Train [114][1110/3239]	Time 2.524 (0.657)	Data Time 0.001 (0.038)	Loss 2.1847 (2.1553)	Entropy 0.64943 (0.65107)	Top-1 acc 70.703 (72.891)	Top-5 acc 88.281 (89.208)	lr 0.00019
Train [114][1120/3239]	Time 0.234 (0.654)	Data Time 0.001 (0.038)	Loss 1.9584 (2.1553)	Entropy 0.64947 (0.65105)	Top-1 acc 76.953 (72.894)	Top-5 acc 91.406 (89.205)	lr 0.00019
Train [114][1130/3239]	Time 0.241 (0.652)	Data Time 0.001 (0.038)	Loss 2.1889 (2.1554)	Entropy 0.64948 (0.65104)	Top-1 acc 74.219 (72.897)	Top-5 acc 88.672 (89.203)	lr 0.00019
Train [114][1140/3239]	Time 0.331 (0.651)	Data Time 0.001 (0.037)	Loss 2.1789 (2.1554)	Entropy 0.64944 (0.65102)	Top-1 acc 71.484 (72.900)	Top-5 acc 89.844 (89.203)	lr 0.00019
Train [114][1150/3239]	Time 0.246 (0.650)	Data Time 0.001 (0.037)	Loss 2.0273 (2.1551)	Entropy 0.64935 (0.65101)	Top-1 acc 75.000 (72.900)	Top-5 acc 91.016 (89.206)	lr 0.00019
Train [114][1160/3239]	Time 0.295 (0.648)	Data Time 0.002 (0.037)	Loss 2.0476 (2.1552)	Entropy 0.64932 (0.65100)	Top-1 acc 77.734 (72.901)	Top-5 acc 91.797 (89.200)	lr 0.00019
Train [114][1170/3239]	Time 0.364 (0.647)	Data Time 0.001 (0.037)	Loss 2.1783 (2.1554)	Entropy 0.64925 (0.65098)	Top-1 acc 75.000 (72.895)	Top-5 acc 89.453 (89.200)	lr 0.00019
Train [114][1180/3239]	Time 0.256 (0.646)	Data Time 0.001 (0.036)	Loss 2.2062 (2.1556)	Entropy 0.64928 (0.65097)	Top-1 acc 71.094 (72.884)	Top-5 acc 90.625 (89.201)	lr 0.00019
Train [114][1190/3239]	Time 0.236 (0.644)	Data Time 0.001 (0.036)	Loss 2.1754 (2.1554)	Entropy 0.64927 (0.65095)	Top-1 acc 69.531 (72.889)	Top-5 acc 89.844 (89.207)	lr 0.00019
Train [114][1200/3239]	Time 0.229 (0.643)	Data Time 0.001 (0.036)	Loss 2.1896 (2.1556)	Entropy 0.64922 (0.65094)	Top-1 acc 72.266 (72.884)	Top-5 acc 87.891 (89.200)	lr 0.00019
Train [114][1210/3239]	Time 0.341 (0.642)	Data Time 0.001 (0.035)	Loss 2.3546 (2.1557)	Entropy 0.64924 (0.65092)	Top-1 acc 66.797 (72.873)	Top-5 acc 83.203 (89.195)	lr 0.00019
Train [114][1220/3239]	Time 2.536 (0.641)	Data Time 0.001 (0.035)	Loss 2.2208 (2.1561)	Entropy 0.64924 (0.65091)	Top-1 acc 71.094 (72.867)	Top-5 acc 87.891 (89.193)	lr 0.00019
Train [114][1230/3239]	Time 0.241 (0.638)	Data Time 0.002 (0.035)	Loss 2.1108 (2.1567)	Entropy 0.64920 (0.65090)	Top-1 acc 75.391 (72.849)	Top-5 acc 90.234 (89.184)	lr 0.00019
Train [114][1240/3239]	Time 0.238 (0.637)	Data Time 0.001 (0.035)	Loss 2.1693 (2.1568)	Entropy 0.64899 (0.65088)	Top-1 acc 74.219 (72.839)	Top-5 acc 88.281 (89.183)	lr 0.00019
Train [114][1250/3239]	Time 0.282 (0.636)	Data Time 0.001 (0.034)	Loss 2.1368 (2.1568)	Entropy 0.64892 (0.65087)	Top-1 acc 71.094 (72.840)	Top-5 acc 88.281 (89.180)	lr 0.00019
Train [114][1260/3239]	Time 0.272 (0.634)	Data Time 0.001 (0.034)	Loss 2.0683 (2.1568)	Entropy 0.64885 (0.65085)	Top-1 acc 75.000 (72.848)	Top-5 acc 89.844 (89.180)	lr 0.00019
Train [114][1270/3239]	Time 0.255 (0.633)	Data Time 0.001 (0.034)	Loss 2.2199 (2.1568)	Entropy 0.64881 (0.65083)	Top-1 acc 71.875 (72.840)	Top-5 acc 87.891 (89.177)	lr 0.00019
Train [114][1280/3239]	Time 0.245 (0.632)	Data Time 0.001 (0.034)	Loss 2.0943 (2.1567)	Entropy 0.64877 (0.65082)	Top-1 acc 76.562 (72.848)	Top-5 acc 89.844 (89.182)	lr 0.00019
Train [114][1290/3239]	Time 0.274 (0.631)	Data Time 0.001 (0.033)	Loss 2.1085 (2.1567)	Entropy 0.64877 (0.65080)	Top-1 acc 70.703 (72.841)	Top-5 acc 89.844 (89.176)	lr 0.00019
Train [114][1300/3239]	Time 0.246 (0.630)	Data Time 0.001 (0.033)	Loss 2.2230 (2.1566)	Entropy 0.64876 (0.65079)	Top-1 acc 71.875 (72.842)	Top-5 acc 88.281 (89.182)	lr 0.00019
Train [114][1310/3239]	Time 0.244 (0.629)	Data Time 0.001 (0.033)	Loss 2.0990 (2.1565)	Entropy 0.64872 (0.65077)	Top-1 acc 75.000 (72.851)	Top-5 acc 90.625 (89.185)	lr 0.00019
Train [114][1320/3239]	Time 0.280 (0.628)	Data Time 0.001 (0.033)	Loss 2.0222 (2.1562)	Entropy 0.64868 (0.65076)	Top-1 acc 75.781 (72.861)	Top-5 acc 92.969 (89.195)	lr 0.00019
Train [114][1330/3239]	Time 2.469 (0.627)	Data Time 0.001 (0.032)	Loss 2.1228 (2.1560)	Entropy 0.64868 (0.65074)	Top-1 acc 71.875 (72.858)	Top-5 acc 89.844 (89.202)	lr 0.00019
Train [114][1340/3239]	Time 0.228 (0.624)	Data Time 0.001 (0.032)	Loss 2.0591 (2.1561)	Entropy 0.64861 (0.65072)	Top-1 acc 72.656 (72.859)	Top-5 acc 89.062 (89.198)	lr 0.00019
Train [114][1350/3239]	Time 0.302 (0.623)	Data Time 0.001 (0.032)	Loss 2.1750 (2.1560)	Entropy 0.64857 (0.65071)	Top-1 acc 71.484 (72.862)	Top-5 acc 87.500 (89.203)	lr 0.00019
Train [114][1360/3239]	Time 0.244 (0.622)	Data Time 0.001 (0.032)	Loss 2.0532 (2.1559)	Entropy 0.64847 (0.65069)	Top-1 acc 75.000 (72.863)	Top-5 acc 89.844 (89.205)	lr 0.00019
Train [114][1370/3239]	Time 0.251 (0.621)	Data Time 0.001 (0.031)	Loss 2.1677 (2.1561)	Entropy 0.64833 (0.65067)	Top-1 acc 75.000 (72.864)	Top-5 acc 90.625 (89.204)	lr 0.00018
Train [114][1380/3239]	Time 0.266 (0.621)	Data Time 0.001 (0.031)	Loss 2.2086 (2.1558)	Entropy 0.64830 (0.65066)	Top-1 acc 71.094 (72.870)	Top-5 acc 88.281 (89.205)	lr 0.00018
Train [114][1390/3239]	Time 0.240 (0.620)	Data Time 0.001 (0.031)	Loss 2.2331 (2.1560)	Entropy 0.64839 (0.65064)	Top-1 acc 69.141 (72.860)	Top-5 acc 86.719 (89.201)	lr 0.00018
Train [114][1400/3239]	Time 0.245 (0.658)	Data Time 0.002 (0.031)	Loss 2.0066 (2.1557)	Entropy 0.64842 (0.65063)	Top-1 acc 77.344 (72.868)	Top-5 acc 91.406 (89.207)	lr 0.00018
Train [114][1410/3239]	Time 0.244 (0.657)	Data Time 0.002 (0.031)	Loss 2.4078 (2.1557)	Entropy 0.64843 (0.65061)	Top-1 acc 65.234 (72.872)	Top-5 acc 81.641 (89.205)	lr 0.00018
Train [114][1420/3239]	Time 0.284 (0.656)	Data Time 0.002 (0.030)	Loss 2.0078 (2.1554)	Entropy 0.64841 (0.65059)	Top-1 acc 78.906 (72.879)	Top-5 acc 91.797 (89.209)	lr 0.00018
Train [114][1430/3239]	Time 0.254 (0.655)	Data Time 0.001 (0.030)	Loss 2.0964 (2.1554)	Entropy 0.64837 (0.65058)	Top-1 acc 74.219 (72.876)	Top-5 acc 90.234 (89.211)	lr 0.00018
Train [114][1440/3239]	Time 2.621 (0.654)	Data Time 0.001 (0.030)	Loss 2.3046 (2.1558)	Entropy 0.64837 (0.65056)	Top-1 acc 68.750 (72.863)	Top-5 acc 87.500 (89.204)	lr 0.00018
Train [114][1450/3239]	Time 0.234 (0.651)	Data Time 0.004 (0.030)	Loss 2.2826 (2.1558)	Entropy 0.64833 (0.65055)	Top-1 acc 69.922 (72.865)	Top-5 acc 89.062 (89.208)	lr 0.00018
Train [114][1460/3239]	Time 0.344 (0.650)	Data Time 0.001 (0.030)	Loss 1.9998 (2.1555)	Entropy 0.64830 (0.65053)	Top-1 acc 76.172 (72.870)	Top-5 acc 92.969 (89.211)	lr 0.00018
Train [114][1470/3239]	Time 0.237 (0.649)	Data Time 0.001 (0.029)	Loss 2.1170 (2.1551)	Entropy 0.64831 (0.65052)	Top-1 acc 74.609 (72.883)	Top-5 acc 91.406 (89.221)	lr 0.00018
Train [114][1480/3239]	Time 0.268 (0.648)	Data Time 0.001 (0.029)	Loss 2.2032 (2.1550)	Entropy 0.64808 (0.65050)	Top-1 acc 75.000 (72.881)	Top-5 acc 89.062 (89.227)	lr 0.00018
Train [114][1490/3239]	Time 0.306 (0.647)	Data Time 0.001 (0.029)	Loss 2.1138 (2.1547)	Entropy 0.64814 (0.65049)	Top-1 acc 75.000 (72.886)	Top-5 acc 89.453 (89.234)	lr 0.00018
Train [114][1500/3239]	Time 0.260 (0.646)	Data Time 0.002 (0.029)	Loss 2.2918 (2.1549)	Entropy 0.64813 (0.65047)	Top-1 acc 71.094 (72.882)	Top-5 acc 85.547 (89.231)	lr 0.00018
Train [114][1510/3239]	Time 0.252 (0.646)	Data Time 0.001 (0.029)	Loss 2.0745 (2.1546)	Entropy 0.64810 (0.65045)	Top-1 acc 76.172 (72.890)	Top-5 acc 89.453 (89.239)	lr 0.00018
Train [114][1520/3239]	Time 0.261 (0.644)	Data Time 0.001 (0.029)	Loss 2.1209 (2.1542)	Entropy 0.64801 (0.65044)	Top-1 acc 73.438 (72.897)	Top-5 acc 88.281 (89.245)	lr 0.00018
Train [114][1530/3239]	Time 0.284 (0.644)	Data Time 0.001 (0.028)	Loss 2.2546 (2.1544)	Entropy 0.64803 (0.65042)	Top-1 acc 72.266 (72.891)	Top-5 acc 87.500 (89.242)	lr 0.00018
Train [114][1540/3239]	Time 0.252 (0.643)	Data Time 0.001 (0.028)	Loss 2.0683 (2.1542)	Entropy 0.64804 (0.65041)	Top-1 acc 75.000 (72.895)	Top-5 acc 90.234 (89.244)	lr 0.00018
Train [114][1550/3239]	Time 2.698 (0.642)	Data Time 0.001 (0.028)	Loss 2.1112 (2.1540)	Entropy 0.64804 (0.65039)	Top-1 acc 73.828 (72.903)	Top-5 acc 89.062 (89.244)	lr 0.00018
Train [114][1560/3239]	Time 0.250 (0.639)	Data Time 0.002 (0.028)	Loss 2.3084 (2.1540)	Entropy 0.64803 (0.65038)	Top-1 acc 73.438 (72.903)	Top-5 acc 87.891 (89.244)	lr 0.00018
Train [114][1570/3239]	Time 0.269 (0.638)	Data Time 0.001 (0.028)	Loss 2.1865 (2.1551)	Entropy 0.64803 (0.65036)	Top-1 acc 72.266 (72.877)	Top-5 acc 89.062 (89.228)	lr 0.00018
Train [114][1580/3239]	Time 0.245 (0.637)	Data Time 0.001 (0.028)	Loss 2.2205 (2.1552)	Entropy 0.64806 (0.65035)	Top-1 acc 74.219 (72.876)	Top-5 acc 88.672 (89.232)	lr 0.00018
Train [114][1590/3239]	Time 0.320 (0.637)	Data Time 0.001 (0.027)	Loss 2.2082 (2.1547)	Entropy 0.64807 (0.65033)	Top-1 acc 69.922 (72.888)	Top-5 acc 88.281 (89.242)	lr 0.00018
Train [114][1600/3239]	Time 0.235 (0.636)	Data Time 0.002 (0.027)	Loss 2.1463 (2.1548)	Entropy 0.64802 (0.65032)	Top-1 acc 75.391 (72.888)	Top-5 acc 89.453 (89.244)	lr 0.00018
Train [114][1610/3239]	Time 0.254 (0.635)	Data Time 0.002 (0.027)	Loss 2.1533 (2.1550)	Entropy 0.64803 (0.65030)	Top-1 acc 75.000 (72.884)	Top-5 acc 89.844 (89.237)	lr 0.00018
Train [114][1620/3239]	Time 0.267 (0.634)	Data Time 0.001 (0.027)	Loss 2.2245 (2.1550)	Entropy 0.64807 (0.65029)	Top-1 acc 74.219 (72.884)	Top-5 acc 89.062 (89.240)	lr 0.00018
Train [114][1630/3239]	Time 0.314 (0.633)	Data Time 0.002 (0.027)	Loss 2.1144 (2.1553)	Entropy 0.64806 (0.65028)	Top-1 acc 72.266 (72.875)	Top-5 acc 91.797 (89.233)	lr 0.00018
Train [114][1640/3239]	Time 0.229 (0.632)	Data Time 0.002 (0.027)	Loss 2.0540 (2.1551)	Entropy 0.64802 (0.65026)	Top-1 acc 76.562 (72.886)	Top-5 acc 91.406 (89.239)	lr 0.00018
Train [114][1650/3239]	Time 0.259 (0.631)	Data Time 0.001 (0.026)	Loss 2.0767 (2.1552)	Entropy 0.64797 (0.65025)	Top-1 acc 71.094 (72.879)	Top-5 acc 90.625 (89.238)	lr 0.00018
Train [114][1660/3239]	Time 2.487 (0.630)	Data Time 0.001 (0.026)	Loss 2.2390 (2.1552)	Entropy 0.64797 (0.65024)	Top-1 acc 73.828 (72.878)	Top-5 acc 87.500 (89.240)	lr 0.00018
Train [114][1670/3239]	Time 0.320 (0.628)	Data Time 0.001 (0.026)	Loss 2.0392 (2.1549)	Entropy 0.64789 (0.65022)	Top-1 acc 75.781 (72.886)	Top-5 acc 93.359 (89.249)	lr 0.00018
Train [114][1680/3239]	Time 0.236 (0.627)	Data Time 0.001 (0.026)	Loss 2.2960 (2.1549)	Entropy 0.64780 (0.65021)	Top-1 acc 69.141 (72.884)	Top-5 acc 87.891 (89.250)	lr 0.00018
Train [114][1690/3239]	Time 0.268 (0.626)	Data Time 0.001 (0.026)	Loss 2.2804 (2.1549)	Entropy 0.64773 (0.65019)	Top-1 acc 67.578 (72.881)	Top-5 acc 88.281 (89.248)	lr 0.00018
Train [114][1700/3239]	Time 0.285 (0.625)	Data Time 0.001 (0.026)	Loss 2.3104 (2.1548)	Entropy 0.64766 (0.65018)	Top-1 acc 70.703 (72.888)	Top-5 acc 84.766 (89.249)	lr 0.00018
Train [114][1710/3239]	Time 0.282 (0.624)	Data Time 0.001 (0.026)	Loss 2.2618 (2.1550)	Entropy 0.64766 (0.65016)	Top-1 acc 66.406 (72.884)	Top-5 acc 90.234 (89.247)	lr 0.00018
Train [114][1720/3239]	Time 0.292 (0.624)	Data Time 0.002 (0.025)	Loss 2.1151 (2.1551)	Entropy 0.64764 (0.65015)	Top-1 acc 71.875 (72.881)	Top-5 acc 89.844 (89.243)	lr 0.00018
Train [114][1730/3239]	Time 0.263 (0.623)	Data Time 0.001 (0.025)	Loss 2.0799 (2.1548)	Entropy 0.64765 (0.65013)	Top-1 acc 75.000 (72.883)	Top-5 acc 88.672 (89.246)	lr 0.00018
Train [114][1740/3239]	Time 0.261 (0.623)	Data Time 0.001 (0.025)	Loss 2.1754 (2.1549)	Entropy 0.64755 (0.65012)	Top-1 acc 75.000 (72.886)	Top-5 acc 88.672 (89.243)	lr 0.00018
Train [114][1750/3239]	Time 0.261 (0.622)	Data Time 0.001 (0.025)	Loss 2.0513 (2.1546)	Entropy 0.64753 (0.65011)	Top-1 acc 77.344 (72.897)	Top-5 acc 93.359 (89.248)	lr 0.00018
Train [114][1760/3239]	Time 0.244 (0.653)	Data Time 0.003 (0.025)	Loss 2.0703 (2.1545)	Entropy 0.64755 (0.65009)	Top-1 acc 69.531 (72.894)	Top-5 acc 93.359 (89.250)	lr 0.00018
Train [114][1770/3239]	Time 2.462 (0.652)	Data Time 0.002 (0.025)	Loss 2.0379 (2.1544)	Entropy 0.64755 (0.65008)	Top-1 acc 75.781 (72.895)	Top-5 acc 92.969 (89.255)	lr 0.00018
Train [114][1780/3239]	Time 0.240 (0.650)	Data Time 0.001 (0.025)	Loss 2.2794 (2.1547)	Entropy 0.64740 (0.65006)	Top-1 acc 69.922 (72.886)	Top-5 acc 85.938 (89.250)	lr 0.00018
Train [114][1790/3239]	Time 0.254 (0.649)	Data Time 0.002 (0.025)	Loss 2.1875 (2.1549)	Entropy 0.64735 (0.65005)	Top-1 acc 71.094 (72.879)	Top-5 acc 88.281 (89.242)	lr 0.00018
Train [114][1800/3239]	Time 0.232 (0.648)	Data Time 0.001 (0.024)	Loss 2.1385 (2.1547)	Entropy 0.64731 (0.65003)	Top-1 acc 73.047 (72.885)	Top-5 acc 91.016 (89.249)	lr 0.00018
Train [114][1810/3239]	Time 0.247 (0.647)	Data Time 0.001 (0.024)	Loss 2.1537 (2.1548)	Entropy 0.64728 (0.65002)	Top-1 acc 73.828 (72.883)	Top-5 acc 90.234 (89.248)	lr 0.00018
Train [114][1820/3239]	Time 0.271 (0.646)	Data Time 0.001 (0.024)	Loss 2.0591 (2.1547)	Entropy 0.64727 (0.65000)	Top-1 acc 74.609 (72.887)	Top-5 acc 90.234 (89.252)	lr 0.00018
Train [114][1830/3239]	Time 0.251 (0.645)	Data Time 0.002 (0.024)	Loss 2.1119 (2.1545)	Entropy 0.64720 (0.64999)	Top-1 acc 76.562 (72.894)	Top-5 acc 89.062 (89.258)	lr 0.00018
Train [114][1840/3239]	Time 0.272 (0.645)	Data Time 0.002 (0.024)	Loss 2.2435 (2.1545)	Entropy 0.64715 (0.64997)	Top-1 acc 71.094 (72.896)	Top-5 acc 85.938 (89.257)	lr 0.00018
Train [114][1850/3239]	Time 0.239 (0.644)	Data Time 0.001 (0.024)	Loss 2.1604 (2.1546)	Entropy 0.64703 (0.64996)	Top-1 acc 73.047 (72.896)	Top-5 acc 87.500 (89.253)	lr 0.00018
Train [114][1860/3239]	Time 0.272 (0.643)	Data Time 0.025 (0.024)	Loss 2.2698 (2.1547)	Entropy 0.64700 (0.64994)	Top-1 acc 68.750 (72.894)	Top-5 acc 85.938 (89.252)	lr 0.00018
Train [114][1870/3239]	Time 0.260 (0.643)	Data Time 0.001 (0.024)	Loss 2.1717 (2.1547)	Entropy 0.64699 (0.64992)	Top-1 acc 75.000 (72.898)	Top-5 acc 86.719 (89.251)	lr 0.00018
Train [114][1880/3239]	Time 3.007 (0.642)	Data Time 0.002 (0.023)	Loss 2.1068 (2.1546)	Entropy 0.64699 (0.64991)	Top-1 acc 75.781 (72.903)	Top-5 acc 86.719 (89.252)	lr 0.00018
Train [114][1890/3239]	Time 0.271 (0.640)	Data Time 0.002 (0.023)	Loss 1.9748 (2.1547)	Entropy 0.64706 (0.64989)	Top-1 acc 76.172 (72.897)	Top-5 acc 94.922 (89.248)	lr 0.00018
Train [114][1900/3239]	Time 0.253 (0.640)	Data Time 0.001 (0.023)	Loss 2.2364 (2.1550)	Entropy 0.64704 (0.64988)	Top-1 acc 70.703 (72.889)	Top-5 acc 88.672 (89.247)	lr 0.00018
Train [114][1910/3239]	Time 0.273 (0.639)	Data Time 0.002 (0.023)	Loss 2.1703 (2.1549)	Entropy 0.64706 (0.64986)	Top-1 acc 71.484 (72.890)	Top-5 acc 90.234 (89.249)	lr 0.00018
Train [114][1920/3239]	Time 0.401 (0.638)	Data Time 0.001 (0.023)	Loss 2.1509 (2.1551)	Entropy 0.64700 (0.64985)	Top-1 acc 74.219 (72.887)	Top-5 acc 90.234 (89.247)	lr 0.00018
Train [114][1930/3239]	Time 0.279 (0.638)	Data Time 0.001 (0.023)	Loss 2.0343 (2.1550)	Entropy 0.64698 (0.64983)	Top-1 acc 75.781 (72.890)	Top-5 acc 90.625 (89.249)	lr 0.00018
Train [114][1940/3239]	Time 0.274 (0.637)	Data Time 0.002 (0.023)	Loss 2.1825 (2.1549)	Entropy 0.64699 (0.64982)	Top-1 acc 71.484 (72.894)	Top-5 acc 88.672 (89.249)	lr 0.00018
Train [114][1950/3239]	Time 0.310 (0.637)	Data Time 0.001 (0.023)	Loss 2.2342 (2.1549)	Entropy 0.64706 (0.64980)	Top-1 acc 70.312 (72.894)	Top-5 acc 86.719 (89.249)	lr 0.00017
Train [114][1960/3239]	Time 0.360 (0.636)	Data Time 0.002 (0.023)	Loss 2.1666 (2.1551)	Entropy 0.64707 (0.64979)	Top-1 acc 73.438 (72.889)	Top-5 acc 88.281 (89.242)	lr 0.00017
Train [114][1970/3239]	Time 0.239 (0.636)	Data Time 0.001 (0.022)	Loss 2.2228 (2.1551)	Entropy 0.64708 (0.64978)	Top-1 acc 71.875 (72.889)	Top-5 acc 87.109 (89.243)	lr 0.00017
Train [114][1980/3239]	Time 0.238 (0.635)	Data Time 0.001 (0.022)	Loss 2.2544 (2.1551)	Entropy 0.64711 (0.64976)	Top-1 acc 72.656 (72.889)	Top-5 acc 87.109 (89.245)	lr 0.00017
Train [114][1990/3239]	Time 2.566 (0.634)	Data Time 0.001 (0.022)	Loss 2.0429 (2.1550)	Entropy 0.64711 (0.64975)	Top-1 acc 75.781 (72.891)	Top-5 acc 90.234 (89.245)	lr 0.00017
Train [114][2000/3239]	Time 0.227 (0.632)	Data Time 0.001 (0.022)	Loss 2.2618 (2.1550)	Entropy 0.64709 (0.64974)	Top-1 acc 68.750 (72.890)	Top-5 acc 88.672 (89.244)	lr 0.00017
Train [114][2010/3239]	Time 0.275 (0.632)	Data Time 0.001 (0.022)	Loss 2.2348 (2.1551)	Entropy 0.64707 (0.64972)	Top-1 acc 69.922 (72.887)	Top-5 acc 85.938 (89.241)	lr 0.00017
Train [114][2020/3239]	Time 0.286 (0.631)	Data Time 0.002 (0.022)	Loss 2.1954 (2.1552)	Entropy 0.64710 (0.64971)	Top-1 acc 68.359 (72.881)	Top-5 acc 90.234 (89.238)	lr 0.00017
Train [114][2030/3239]	Time 0.289 (0.631)	Data Time 0.001 (0.022)	Loss 2.0145 (2.1556)	Entropy 0.64706 (0.64970)	Top-1 acc 79.688 (72.879)	Top-5 acc 90.625 (89.234)	lr 0.00017
Train [114][2040/3239]	Time 0.264 (0.630)	Data Time 0.001 (0.022)	Loss 2.1255 (2.1556)	Entropy 0.64703 (0.64968)	Top-1 acc 74.219 (72.878)	Top-5 acc 91.406 (89.231)	lr 0.00017
Train [114][2050/3239]	Time 0.264 (0.630)	Data Time 0.001 (0.022)	Loss 2.0389 (2.1556)	Entropy 0.64708 (0.64967)	Top-1 acc 76.953 (72.878)	Top-5 acc 89.453 (89.229)	lr 0.00017
Train [114][2060/3239]	Time 0.250 (0.629)	Data Time 0.002 (0.022)	Loss 2.1146 (2.1558)	Entropy 0.64707 (0.64966)	Top-1 acc 71.484 (72.870)	Top-5 acc 90.625 (89.224)	lr 0.00017
Train [114][2070/3239]	Time 0.243 (0.628)	Data Time 0.001 (0.021)	Loss 2.2297 (2.1559)	Entropy 0.64701 (0.64965)	Top-1 acc 69.531 (72.868)	Top-5 acc 86.328 (89.224)	lr 0.00017
Train [114][2080/3239]	Time 0.271 (0.628)	Data Time 0.002 (0.021)	Loss 2.2157 (2.1559)	Entropy 0.64693 (0.64963)	Top-1 acc 71.094 (72.867)	Top-5 acc 87.891 (89.225)	lr 0.00017
Train [114][2090/3239]	Time 0.259 (0.627)	Data Time 0.001 (0.021)	Loss 2.1426 (2.1561)	Entropy 0.64682 (0.64962)	Top-1 acc 72.266 (72.862)	Top-5 acc 87.891 (89.220)	lr 0.00017
Train [114][2100/3239]	Time 2.566 (0.627)	Data Time 0.001 (0.021)	Loss 2.1731 (2.1562)	Entropy 0.64682 (0.64961)	Top-1 acc 72.266 (72.861)	Top-5 acc 89.844 (89.219)	lr 0.00017
Train [114][2110/3239]	Time 0.268 (0.625)	Data Time 0.001 (0.021)	Loss 1.9891 (2.1561)	Entropy 0.64674 (0.64959)	Top-1 acc 75.781 (72.869)	Top-5 acc 92.969 (89.221)	lr 0.00017
Train [114][2120/3239]	Time 0.229 (0.624)	Data Time 0.001 (0.021)	Loss 2.0867 (2.1560)	Entropy 0.64675 (0.64958)	Top-1 acc 74.219 (72.869)	Top-5 acc 88.672 (89.218)	lr 0.00017
Train [114][2130/3239]	Time 0.327 (0.652)	Data Time 0.002 (0.021)	Loss 1.9802 (2.1560)	Entropy 0.64671 (0.64957)	Top-1 acc 77.734 (72.869)	Top-5 acc 92.188 (89.217)	lr 0.00017
Train [114][2140/3239]	Time 0.252 (0.651)	Data Time 0.002 (0.021)	Loss 2.1228 (2.1561)	Entropy 0.64670 (0.64955)	Top-1 acc 75.000 (72.867)	Top-5 acc 91.406 (89.220)	lr 0.00017
Train [114][2150/3239]	Time 0.246 (0.650)	Data Time 0.002 (0.021)	Loss 2.1919 (2.1562)	Entropy 0.64674 (0.64954)	Top-1 acc 72.266 (72.866)	Top-5 acc 88.281 (89.217)	lr 0.00017
Train [114][2160/3239]	Time 0.312 (0.650)	Data Time 0.001 (0.021)	Loss 2.2904 (2.1563)	Entropy 0.64672 (0.64953)	Top-1 acc 70.703 (72.861)	Top-5 acc 88.281 (89.216)	lr 0.00017
Train [114][2170/3239]	Time 0.449 (0.649)	Data Time 0.002 (0.021)	Loss 2.0837 (2.1561)	Entropy 0.64668 (0.64951)	Top-1 acc 72.266 (72.863)	Top-5 acc 91.016 (89.219)	lr 0.00017
Train [114][2180/3239]	Time 0.291 (0.648)	Data Time 0.001 (0.020)	Loss 2.1998 (2.1561)	Entropy 0.64670 (0.64950)	Top-1 acc 68.359 (72.861)	Top-5 acc 88.281 (89.218)	lr 0.00017
Train [114][2190/3239]	Time 0.234 (0.648)	Data Time 0.001 (0.020)	Loss 2.3078 (2.1560)	Entropy 0.64671 (0.64949)	Top-1 acc 67.578 (72.861)	Top-5 acc 85.547 (89.215)	lr 0.00017
Train [114][2200/3239]	Time 0.231 (0.647)	Data Time 0.001 (0.020)	Loss 2.2057 (2.1560)	Entropy 0.64669 (0.64948)	Top-1 acc 70.703 (72.864)	Top-5 acc 89.453 (89.216)	lr 0.00017
Train [114][2210/3239]	Time 2.691 (0.646)	Data Time 0.001 (0.020)	Loss 2.3052 (2.1560)	Entropy 0.64669 (0.64946)	Top-1 acc 68.750 (72.862)	Top-5 acc 85.938 (89.214)	lr 0.00017
Train [114][2220/3239]	Time 0.274 (0.645)	Data Time 0.001 (0.020)	Loss 2.2028 (2.1561)	Entropy 0.64654 (0.64945)	Top-1 acc 69.922 (72.861)	Top-5 acc 86.328 (89.218)	lr 0.00017
Train [114][2230/3239]	Time 0.257 (0.644)	Data Time 0.001 (0.020)	Loss 2.2130 (2.1560)	Entropy 0.64654 (0.64944)	Top-1 acc 74.609 (72.861)	Top-5 acc 89.062 (89.219)	lr 0.00017
Train [114][2240/3239]	Time 0.242 (0.644)	Data Time 0.001 (0.020)	Loss 2.1548 (2.1560)	Entropy 0.64649 (0.64942)	Top-1 acc 71.875 (72.858)	Top-5 acc 89.844 (89.219)	lr 0.00017
Train [114][2250/3239]	Time 0.256 (0.643)	Data Time 0.001 (0.020)	Loss 2.2721 (2.1561)	Entropy 0.64648 (0.64941)	Top-1 acc 69.141 (72.853)	Top-5 acc 88.672 (89.217)	lr 0.00017
Train [114][2260/3239]	Time 0.256 (0.642)	Data Time 0.001 (0.020)	Loss 2.2419 (2.1562)	Entropy 0.64638 (0.64940)	Top-1 acc 70.312 (72.855)	Top-5 acc 88.672 (89.217)	lr 0.00017
Train [114][2270/3239]	Time 0.257 (0.642)	Data Time 0.001 (0.020)	Loss 2.1011 (2.1562)	Entropy 0.64639 (0.64938)	Top-1 acc 75.781 (72.856)	Top-5 acc 90.234 (89.217)	lr 0.00017
Train [114][2280/3239]	Time 0.230 (0.641)	Data Time 0.002 (0.020)	Loss 2.1101 (2.1561)	Entropy 0.64638 (0.64937)	Top-1 acc 73.438 (72.858)	Top-5 acc 89.062 (89.219)	lr 0.00017
Train [114][2290/3239]	Time 0.307 (0.641)	Data Time 0.001 (0.020)	Loss 2.3059 (2.1561)	Entropy 0.64632 (0.64936)	Top-1 acc 66.797 (72.858)	Top-5 acc 83.984 (89.219)	lr 0.00017
Train [114][2300/3239]	Time 0.314 (0.640)	Data Time 0.003 (0.020)	Loss 2.2170 (2.1562)	Entropy 0.64624 (0.64934)	Top-1 acc 70.312 (72.857)	Top-5 acc 89.062 (89.218)	lr 0.00017
Train [114][2310/3239]	Time 0.229 (0.639)	Data Time 0.001 (0.019)	Loss 2.1853 (2.1564)	Entropy 0.64612 (0.64933)	Top-1 acc 75.391 (72.853)	Top-5 acc 89.062 (89.215)	lr 0.00017
Train [114][2320/3239]	Time 2.514 (0.639)	Data Time 0.001 (0.019)	Loss 1.9477 (2.1561)	Entropy 0.64612 (0.64932)	Top-1 acc 78.906 (72.866)	Top-5 acc 92.578 (89.219)	lr 0.00017
Train [114][2330/3239]	Time 0.246 (0.637)	Data Time 0.002 (0.019)	Loss 2.0753 (2.1560)	Entropy 0.64612 (0.64930)	Top-1 acc 73.047 (72.871)	Top-5 acc 92.969 (89.222)	lr 0.00017
Train [114][2340/3239]	Time 0.244 (0.637)	Data Time 0.001 (0.019)	Loss 2.0136 (2.1559)	Entropy 0.64604 (0.64929)	Top-1 acc 76.953 (72.872)	Top-5 acc 91.797 (89.226)	lr 0.00017
Train [114][2350/3239]	Time 0.227 (0.636)	Data Time 0.001 (0.019)	Loss 2.2057 (2.1558)	Entropy 0.64598 (0.64928)	Top-1 acc 69.531 (72.868)	Top-5 acc 89.453 (89.226)	lr 0.00017
Train [114][2360/3239]	Time 0.301 (0.636)	Data Time 0.001 (0.019)	Loss 2.2335 (2.1559)	Entropy 0.64595 (0.64926)	Top-1 acc 74.219 (72.870)	Top-5 acc 87.500 (89.224)	lr 0.00017
Train [114][2370/3239]	Time 0.283 (0.635)	Data Time 0.001 (0.019)	Loss 2.1148 (2.1558)	Entropy 0.64587 (0.64925)	Top-1 acc 73.047 (72.873)	Top-5 acc 89.453 (89.226)	lr 0.00017
Train [114][2380/3239]	Time 0.277 (0.634)	Data Time 0.002 (0.019)	Loss 2.0913 (2.1557)	Entropy 0.64584 (0.64923)	Top-1 acc 74.609 (72.872)	Top-5 acc 90.234 (89.225)	lr 0.00017
Train [114][2390/3239]	Time 0.241 (0.634)	Data Time 0.001 (0.019)	Loss 2.1731 (2.1562)	Entropy 0.64580 (0.64922)	Top-1 acc 72.266 (72.862)	Top-5 acc 91.016 (89.217)	lr 0.00017
Train [114][2400/3239]	Time 0.242 (0.633)	Data Time 0.001 (0.019)	Loss 2.1342 (2.1561)	Entropy 0.64575 (0.64920)	Top-1 acc 72.656 (72.861)	Top-5 acc 87.891 (89.219)	lr 0.00017
Train [114][2410/3239]	Time 0.263 (0.633)	Data Time 0.001 (0.019)	Loss 2.1892 (2.1559)	Entropy 0.64579 (0.64919)	Top-1 acc 73.047 (72.869)	Top-5 acc 88.672 (89.222)	lr 0.00017
Train [114][2420/3239]	Time 0.287 (0.632)	Data Time 0.001 (0.019)	Loss 2.0555 (2.1559)	Entropy 0.64559 (0.64918)	Top-1 acc 75.000 (72.870)	Top-5 acc 89.844 (89.220)	lr 0.00017
Train [114][2430/3239]	Time 2.596 (0.632)	Data Time 0.001 (0.019)	Loss 2.1369 (2.1560)	Entropy 0.64559 (0.64916)	Top-1 acc 77.344 (72.871)	Top-5 acc 90.234 (89.218)	lr 0.00017
Train [114][2440/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.018)	Loss 2.1516 (2.1560)	Entropy 0.64551 (0.64915)	Top-1 acc 73.047 (72.871)	Top-5 acc 88.281 (89.217)	lr 0.00017
Train [114][2450/3239]	Time 0.244 (0.630)	Data Time 0.002 (0.018)	Loss 2.2276 (2.1559)	Entropy 0.64542 (0.64913)	Top-1 acc 72.656 (72.868)	Top-5 acc 86.328 (89.217)	lr 0.00017
Train [114][2460/3239]	Time 0.343 (0.629)	Data Time 0.001 (0.018)	Loss 2.1248 (2.1560)	Entropy 0.64545 (0.64912)	Top-1 acc 73.438 (72.868)	Top-5 acc 91.016 (89.217)	lr 0.00017
Train [114][2470/3239]	Time 0.242 (0.629)	Data Time 0.002 (0.018)	Loss 2.2356 (2.1559)	Entropy 0.64530 (0.64910)	Top-1 acc 67.969 (72.870)	Top-5 acc 88.672 (89.219)	lr 0.00017
Train [114][2480/3239]	Time 0.270 (0.628)	Data Time 0.001 (0.018)	Loss 2.1315 (2.1562)	Entropy 0.64522 (0.64909)	Top-1 acc 75.000 (72.865)	Top-5 acc 90.234 (89.216)	lr 0.00017
Train [114][2490/3239]	Time 0.298 (0.649)	Data Time 0.003 (0.018)	Loss 2.3324 (2.1564)	Entropy 0.64523 (0.64907)	Top-1 acc 67.578 (72.860)	Top-5 acc 87.109 (89.216)	lr 0.00017
Train [114][2500/3239]	Time 0.366 (0.649)	Data Time 0.002 (0.018)	Loss 2.0254 (2.1563)	Entropy 0.64522 (0.64905)	Top-1 acc 76.172 (72.864)	Top-5 acc 90.234 (89.214)	lr 0.00017
Train [114][2510/3239]	Time 0.274 (0.648)	Data Time 0.002 (0.018)	Loss 2.2260 (2.1564)	Entropy 0.64516 (0.64904)	Top-1 acc 70.703 (72.860)	Top-5 acc 88.281 (89.213)	lr 0.00017
Train [114][2520/3239]	Time 0.329 (0.648)	Data Time 0.002 (0.018)	Loss 1.9902 (2.1564)	Entropy 0.64514 (0.64902)	Top-1 acc 79.297 (72.862)	Top-5 acc 93.359 (89.216)	lr 0.00017
Train [114][2530/3239]	Time 0.246 (0.647)	Data Time 0.001 (0.018)	Loss 2.1462 (2.1561)	Entropy 0.64508 (0.64901)	Top-1 acc 73.438 (72.869)	Top-5 acc 89.844 (89.221)	lr 0.00017
Train [114][2540/3239]	Time 2.597 (0.647)	Data Time 0.001 (0.018)	Loss 2.0079 (2.1560)	Entropy 0.64508 (0.64899)	Top-1 acc 75.391 (72.874)	Top-5 acc 93.359 (89.224)	lr 0.00017
Train [114][2550/3239]	Time 0.245 (0.645)	Data Time 0.001 (0.018)	Loss 2.2401 (2.1561)	Entropy 0.64507 (0.64898)	Top-1 acc 71.484 (72.872)	Top-5 acc 87.109 (89.221)	lr 0.00016
Train [114][2560/3239]	Time 0.257 (0.645)	Data Time 0.001 (0.018)	Loss 2.2494 (2.1561)	Entropy 0.64502 (0.64896)	Top-1 acc 69.922 (72.871)	Top-5 acc 89.844 (89.225)	lr 0.00016
Train [114][2570/3239]	Time 0.278 (0.644)	Data Time 0.001 (0.018)	Loss 2.0581 (2.1562)	Entropy 0.64494 (0.64895)	Top-1 acc 75.781 (72.864)	Top-5 acc 92.188 (89.223)	lr 0.00016
Train [114][2580/3239]	Time 0.245 (0.644)	Data Time 0.001 (0.018)	Loss 2.0561 (2.1562)	Entropy 0.64489 (0.64893)	Top-1 acc 71.484 (72.864)	Top-5 acc 91.016 (89.223)	lr 0.00016
Train [114][2590/3239]	Time 0.301 (0.643)	Data Time 0.001 (0.018)	Loss 2.1846 (2.1563)	Entropy 0.64496 (0.64892)	Top-1 acc 71.875 (72.860)	Top-5 acc 88.672 (89.220)	lr 0.00016
Train [114][2600/3239]	Time 0.233 (0.643)	Data Time 0.001 (0.017)	Loss 2.2281 (2.1565)	Entropy 0.64495 (0.64890)	Top-1 acc 70.703 (72.852)	Top-5 acc 87.891 (89.217)	lr 0.00016
Train [114][2610/3239]	Time 0.250 (0.642)	Data Time 0.001 (0.017)	Loss 2.2253 (2.1564)	Entropy 0.64458 (0.64888)	Top-1 acc 71.484 (72.855)	Top-5 acc 87.500 (89.218)	lr 0.00016
Train [114][2620/3239]	Time 0.240 (0.642)	Data Time 0.001 (0.017)	Loss 2.2206 (2.1564)	Entropy 0.64456 (0.64887)	Top-1 acc 71.875 (72.854)	Top-5 acc 87.109 (89.216)	lr 0.00016
Train [114][2630/3239]	Time 0.261 (0.641)	Data Time 0.002 (0.017)	Loss 2.0268 (2.1563)	Entropy 0.64444 (0.64885)	Top-1 acc 77.734 (72.858)	Top-5 acc 91.016 (89.217)	lr 0.00016
Train [114][2640/3239]	Time 0.247 (0.640)	Data Time 0.002 (0.017)	Loss 2.1608 (2.1564)	Entropy 0.64444 (0.64884)	Top-1 acc 76.562 (72.859)	Top-5 acc 88.281 (89.215)	lr 0.00016
Train [114][2650/3239]	Time 0.232 (0.640)	Data Time 0.001 (0.017)	Loss 2.1776 (2.1564)	Entropy 0.64440 (0.64882)	Top-1 acc 72.266 (72.857)	Top-5 acc 87.500 (89.214)	lr 0.00016
Train [114][2660/3239]	Time 0.233 (0.639)	Data Time 0.001 (0.017)	Loss 2.2705 (2.1565)	Entropy 0.64446 (0.64880)	Top-1 acc 72.656 (72.855)	Top-5 acc 86.719 (89.211)	lr 0.00016
Train [114][2670/3239]	Time 0.285 (0.639)	Data Time 0.002 (0.017)	Loss 2.1066 (2.1564)	Entropy 0.64445 (0.64879)	Top-1 acc 75.000 (72.859)	Top-5 acc 89.062 (89.212)	lr 0.00016
Train [114][2680/3239]	Time 0.237 (0.638)	Data Time 0.001 (0.017)	Loss 2.1679 (2.1565)	Entropy 0.64438 (0.64877)	Top-1 acc 68.750 (72.857)	Top-5 acc 87.109 (89.210)	lr 0.00016
Train [114][2690/3239]	Time 0.226 (0.638)	Data Time 0.001 (0.017)	Loss 2.1521 (2.1564)	Entropy 0.64433 (0.64875)	Top-1 acc 72.266 (72.859)	Top-5 acc 89.453 (89.209)	lr 0.00016
Train [114][2700/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.017)	Loss 2.3088 (2.1564)	Entropy 0.64432 (0.64874)	Top-1 acc 70.312 (72.860)	Top-5 acc 85.547 (89.208)	lr 0.00016
Train [114][2710/3239]	Time 0.239 (0.637)	Data Time 0.001 (0.017)	Loss 2.0485 (2.1563)	Entropy 0.64435 (0.64872)	Top-1 acc 76.172 (72.860)	Top-5 acc 94.141 (89.209)	lr 0.00016
Train [114][2720/3239]	Time 0.222 (0.636)	Data Time 0.001 (0.017)	Loss 2.2371 (2.1563)	Entropy 0.64442 (0.64870)	Top-1 acc 71.484 (72.864)	Top-5 acc 88.672 (89.211)	lr 0.00016
Train [114][2730/3239]	Time 0.237 (0.636)	Data Time 0.001 (0.017)	Loss 2.4045 (2.1564)	Entropy 0.64441 (0.64869)	Top-1 acc 69.531 (72.858)	Top-5 acc 84.766 (89.209)	lr 0.00016
Train [114][2740/3239]	Time 0.230 (0.635)	Data Time 0.001 (0.017)	Loss 2.1789 (2.1564)	Entropy 0.64440 (0.64867)	Top-1 acc 75.000 (72.859)	Top-5 acc 87.891 (89.208)	lr 0.00016
Train [114][2750/3239]	Time 0.350 (0.635)	Data Time 0.002 (0.017)	Loss 2.1137 (2.1564)	Entropy 0.64437 (0.64866)	Top-1 acc 74.219 (72.858)	Top-5 acc 89.453 (89.208)	lr 0.00016
Train [114][2760/3239]	Time 0.245 (0.634)	Data Time 0.001 (0.017)	Loss 2.1135 (2.1564)	Entropy 0.64437 (0.64864)	Top-1 acc 75.781 (72.864)	Top-5 acc 89.453 (89.210)	lr 0.00016
Train [114][2770/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.017)	Loss 2.0795 (2.1563)	Entropy 0.64433 (0.64863)	Top-1 acc 75.000 (72.865)	Top-5 acc 89.844 (89.212)	lr 0.00016
Train [114][2780/3239]	Time 0.255 (0.633)	Data Time 0.001 (0.016)	Loss 2.1504 (2.1563)	Entropy 0.64432 (0.64861)	Top-1 acc 70.703 (72.863)	Top-5 acc 89.062 (89.212)	lr 0.00016
Train [114][2790/3239]	Time 0.385 (0.633)	Data Time 0.001 (0.016)	Loss 2.1952 (2.1563)	Entropy 0.64430 (0.64860)	Top-1 acc 72.656 (72.865)	Top-5 acc 90.234 (89.212)	lr 0.00016
Train [114][2800/3239]	Time 0.263 (0.632)	Data Time 0.001 (0.016)	Loss 2.1969 (2.1563)	Entropy 0.64422 (0.64858)	Top-1 acc 72.266 (72.868)	Top-5 acc 87.891 (89.213)	lr 0.00016
Train [114][2810/3239]	Time 0.274 (0.632)	Data Time 0.001 (0.016)	Loss 2.2093 (2.1563)	Entropy 0.64409 (0.64857)	Top-1 acc 70.312 (72.867)	Top-5 acc 85.938 (89.209)	lr 0.00016
Train [114][2820/3239]	Time 0.243 (0.631)	Data Time 0.001 (0.016)	Loss 2.1833 (2.1565)	Entropy 0.64410 (0.64855)	Top-1 acc 73.047 (72.860)	Top-5 acc 90.234 (89.208)	lr 0.00016
Train [114][2830/3239]	Time 0.933 (0.651)	Data Time 0.003 (0.016)	Loss 2.1879 (2.1566)	Entropy 0.64404 (0.64853)	Top-1 acc 75.781 (72.862)	Top-5 acc 87.891 (89.209)	lr 0.00016
Train [114][2840/3239]	Time 0.235 (0.651)	Data Time 0.002 (0.016)	Loss 2.1423 (2.1564)	Entropy 0.64399 (0.64852)	Top-1 acc 75.781 (72.866)	Top-5 acc 89.062 (89.212)	lr 0.00016
Train [114][2850/3239]	Time 0.237 (0.650)	Data Time 0.001 (0.016)	Loss 2.3174 (2.1564)	Entropy 0.64399 (0.64850)	Top-1 acc 69.922 (72.866)	Top-5 acc 87.109 (89.212)	lr 0.00016
Train [114][2860/3239]	Time 0.261 (0.650)	Data Time 0.001 (0.016)	Loss 2.0678 (2.1563)	Entropy 0.64395 (0.64849)	Top-1 acc 72.266 (72.865)	Top-5 acc 92.188 (89.215)	lr 0.00016
Train [114][2870/3239]	Time 0.329 (0.649)	Data Time 0.001 (0.016)	Loss 2.1460 (2.1563)	Entropy 0.64388 (0.64847)	Top-1 acc 71.484 (72.862)	Top-5 acc 88.281 (89.216)	lr 0.00016
Train [114][2880/3239]	Time 0.236 (0.649)	Data Time 0.001 (0.016)	Loss 2.0783 (2.1563)	Entropy 0.64382 (0.64845)	Top-1 acc 76.172 (72.864)	Top-5 acc 91.016 (89.219)	lr 0.00016
Train [114][2890/3239]	Time 0.252 (0.648)	Data Time 0.002 (0.016)	Loss 2.1307 (2.1564)	Entropy 0.64367 (0.64844)	Top-1 acc 73.047 (72.859)	Top-5 acc 87.500 (89.212)	lr 0.00016
Train [114][2900/3239]	Time 0.243 (0.648)	Data Time 0.002 (0.016)	Loss 2.2319 (2.1566)	Entropy 0.64358 (0.64842)	Top-1 acc 70.312 (72.855)	Top-5 acc 88.672 (89.211)	lr 0.00016
Train [114][2910/3239]	Time 0.379 (0.647)	Data Time 0.002 (0.016)	Loss 2.2284 (2.1565)	Entropy 0.64197 (0.64840)	Top-1 acc 70.703 (72.855)	Top-5 acc 87.891 (89.213)	lr 0.00016
Train [114][2920/3239]	Time 0.247 (0.647)	Data Time 0.001 (0.016)	Loss 2.0812 (2.1565)	Entropy 0.64196 (0.64838)	Top-1 acc 74.219 (72.859)	Top-5 acc 91.406 (89.212)	lr 0.00016
Train [114][2930/3239]	Time 0.243 (0.646)	Data Time 0.001 (0.016)	Loss 2.1502 (2.1564)	Entropy 0.64198 (0.64836)	Top-1 acc 75.000 (72.860)	Top-5 acc 87.891 (89.213)	lr 0.00016
Train [114][2940/3239]	Time 0.240 (0.646)	Data Time 0.001 (0.016)	Loss 2.2078 (2.1563)	Entropy 0.64191 (0.64834)	Top-1 acc 71.094 (72.860)	Top-5 acc 87.891 (89.212)	lr 0.00016
Train [114][2950/3239]	Time 0.343 (0.645)	Data Time 0.001 (0.016)	Loss 2.1082 (2.1564)	Entropy 0.64187 (0.64832)	Top-1 acc 69.531 (72.858)	Top-5 acc 92.188 (89.213)	lr 0.00016
Train [114][2960/3239]	Time 0.234 (0.645)	Data Time 0.001 (0.016)	Loss 2.1507 (2.1564)	Entropy 0.64184 (0.64830)	Top-1 acc 71.875 (72.856)	Top-5 acc 90.625 (89.213)	lr 0.00016
Train [114][2970/3239]	Time 0.280 (0.644)	Data Time 0.001 (0.016)	Loss 2.1598 (2.1565)	Entropy 0.64180 (0.64827)	Top-1 acc 73.047 (72.853)	Top-5 acc 90.234 (89.213)	lr 0.00016
Train [114][2980/3239]	Time 0.243 (0.644)	Data Time 0.001 (0.016)	Loss 2.1120 (2.1563)	Entropy 0.64169 (0.64825)	Top-1 acc 71.875 (72.858)	Top-5 acc 88.672 (89.215)	lr 0.00016
Train [114][2990/3239]	Time 0.228 (0.643)	Data Time 0.001 (0.015)	Loss 2.2234 (2.1562)	Entropy 0.64143 (0.64823)	Top-1 acc 67.969 (72.858)	Top-5 acc 87.109 (89.215)	lr 0.00016
Train [114][3000/3239]	Time 0.241 (0.643)	Data Time 0.002 (0.015)	Loss 2.2003 (2.1563)	Entropy 0.64144 (0.64821)	Top-1 acc 71.875 (72.856)	Top-5 acc 88.281 (89.214)	lr 0.00016
Train [114][3010/3239]	Time 0.211 (0.643)	Data Time 0.001 (0.015)	Loss 2.1751 (2.1562)	Entropy 0.64133 (0.64818)	Top-1 acc 71.484 (72.856)	Top-5 acc 89.062 (89.216)	lr 0.00016
Train [114][3020/3239]	Time 0.275 (0.642)	Data Time 0.001 (0.015)	Loss 2.1603 (2.1562)	Entropy 0.64136 (0.64816)	Top-1 acc 74.219 (72.857)	Top-5 acc 87.891 (89.218)	lr 0.00016
Train [114][3030/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.015)	Loss 2.1977 (2.1561)	Entropy 0.64133 (0.64814)	Top-1 acc 68.750 (72.858)	Top-5 acc 89.844 (89.220)	lr 0.00016
Train [114][3040/3239]	Time 0.245 (0.641)	Data Time 0.001 (0.015)	Loss 2.0819 (2.1562)	Entropy 0.64133 (0.64812)	Top-1 acc 75.391 (72.859)	Top-5 acc 91.797 (89.220)	lr 0.00016
Train [114][3050/3239]	Time 0.241 (0.641)	Data Time 0.001 (0.015)	Loss 2.1545 (2.1562)	Entropy 0.64135 (0.64809)	Top-1 acc 71.094 (72.858)	Top-5 acc 89.844 (89.219)	lr 0.00016
Train [114][3060/3239]	Time 0.231 (0.640)	Data Time 0.001 (0.015)	Loss 1.8789 (2.1562)	Entropy 0.64134 (0.64807)	Top-1 acc 80.859 (72.860)	Top-5 acc 94.141 (89.217)	lr 0.00016
Train [114][3070/3239]	Time 0.256 (0.640)	Data Time 0.002 (0.015)	Loss 2.2056 (2.1562)	Entropy 0.64136 (0.64805)	Top-1 acc 69.531 (72.860)	Top-5 acc 90.234 (89.216)	lr 0.00016
Train [114][3080/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.015)	Loss 2.2080 (2.1562)	Entropy 0.64119 (0.64803)	Top-1 acc 71.094 (72.861)	Top-5 acc 90.234 (89.217)	lr 0.00016
Train [114][3090/3239]	Time 0.224 (0.639)	Data Time 0.001 (0.015)	Loss 2.2682 (2.1562)	Entropy 0.64116 (0.64801)	Top-1 acc 71.094 (72.859)	Top-5 acc 87.500 (89.216)	lr 0.00016
Train [114][3100/3239]	Time 0.240 (0.638)	Data Time 0.002 (0.015)	Loss 2.1043 (2.1561)	Entropy 0.64108 (0.64798)	Top-1 acc 75.781 (72.861)	Top-5 acc 89.453 (89.219)	lr 0.00016
Train [114][3110/3239]	Time 0.255 (0.638)	Data Time 0.002 (0.015)	Loss 2.0778 (2.1560)	Entropy 0.64106 (0.64796)	Top-1 acc 73.438 (72.864)	Top-5 acc 90.625 (89.220)	lr 0.00016
Train [114][3120/3239]	Time 0.242 (0.638)	Data Time 0.002 (0.015)	Loss 2.1028 (2.1561)	Entropy 0.64103 (0.64794)	Top-1 acc 76.562 (72.862)	Top-5 acc 89.062 (89.219)	lr 0.00016
Train [114][3130/3239]	Time 0.232 (0.637)	Data Time 0.001 (0.015)	Loss 2.1706 (2.1560)	Entropy 0.64097 (0.64792)	Top-1 acc 74.609 (72.865)	Top-5 acc 89.453 (89.220)	lr 0.00016
Train [114][3140/3239]	Time 0.287 (0.637)	Data Time 0.002 (0.015)	Loss 2.3173 (2.1560)	Entropy 0.64094 (0.64790)	Top-1 acc 69.922 (72.866)	Top-5 acc 85.547 (89.219)	lr 0.00016
Train [114][3150/3239]	Time 0.225 (0.636)	Data Time 0.001 (0.015)	Loss 2.0664 (2.1558)	Entropy 0.64094 (0.64787)	Top-1 acc 75.000 (72.870)	Top-5 acc 92.578 (89.222)	lr 0.00016
Train [114][3160/3239]	Time 0.486 (0.653)	Data Time 0.004 (0.015)	Loss 2.2007 (2.1558)	Entropy 0.64093 (0.64785)	Top-1 acc 72.266 (72.871)	Top-5 acc 90.625 (89.224)	lr 0.00016
Train [114][3170/3239]	Time 0.237 (0.653)	Data Time 0.002 (0.015)	Loss 2.0294 (2.1557)	Entropy 0.64095 (0.64783)	Top-1 acc 77.734 (72.872)	Top-5 acc 90.625 (89.226)	lr 0.00016
Train [114][3180/3239]	Time 0.239 (0.652)	Data Time 0.000 (0.015)	Loss 2.2578 (2.1557)	Entropy 0.64093 (0.64781)	Top-1 acc 71.484 (72.871)	Top-5 acc 87.500 (89.228)	lr 0.00015
Train [114][3190/3239]	Time 0.247 (0.652)	Data Time 0.000 (0.015)	Loss 2.2045 (2.1556)	Entropy 0.64096 (0.64779)	Top-1 acc 69.922 (72.874)	Top-5 acc 87.891 (89.228)	lr 0.00015
Train [114][3200/3239]	Time 0.358 (0.651)	Data Time 0.000 (0.015)	Loss 2.2417 (2.1556)	Entropy 0.64095 (0.64777)	Top-1 acc 71.875 (72.874)	Top-5 acc 87.891 (89.228)	lr 0.00015
Train [114][3210/3239]	Time 0.253 (0.651)	Data Time 0.000 (0.015)	Loss 2.1523 (2.1557)	Entropy 0.64098 (0.64774)	Top-1 acc 71.484 (72.872)	Top-5 acc 87.891 (89.228)	lr 0.00015
Train [114][3220/3239]	Time 0.259 (0.650)	Data Time 0.000 (0.014)	Loss 2.2575 (2.1559)	Entropy 0.64094 (0.64772)	Top-1 acc 68.750 (72.866)	Top-5 acc 90.234 (89.226)	lr 0.00015
Train [114][3230/3239]	Time 0.243 (0.650)	Data Time 0.000 (0.014)	Loss 2.2504 (2.1559)	Entropy 0.64078 (0.64770)	Top-1 acc 71.094 (72.869)	Top-5 acc 87.109 (89.227)	lr 0.00015
Train [114][3239/3239]	Time 2.426 (0.649)	Data Time 0.000 (0.014)	Loss 2.4120 (2.1560)	Entropy 0.64078 (0.64768)	Top-1 acc 69.136 (72.868)	Top-5 acc 86.420 (89.225)	lr 0.00015
==========Valid [114/120]	loss 1.197	top-1 acc 72.611 (72.745)	top-5 acc 89.935	Train top-1 72.868	top-5 89.225	Entropy 0.64078	Latency-None: 0.000ms	Flops: 544.27M
Train [115][0/3239]	Time 41.837 (41.837)	Data Time 40.267 (40.267)	Loss 2.0785 (2.0785)	Entropy 0.64071 (0.64071)	Top-1 acc 73.828 (73.828)	Top-5 acc 92.969 (92.969)	lr 0.00015
Train [115][10/3239]	Time 2.698 (4.350)	Data Time 0.002 (3.663)	Loss 1.9884 (2.1002)	Entropy 0.64071 (0.64071)	Top-1 acc 77.344 (73.153)	Top-5 acc 91.016 (90.554)	lr 0.00015
Train [115][20/3239]	Time 0.227 (2.402)	Data Time 0.001 (1.919)	Loss 2.0765 (2.1343)	Entropy 0.64070 (0.64070)	Top-1 acc 76.172 (73.307)	Top-5 acc 90.234 (89.807)	lr 0.00015
Train [115][30/3239]	Time 0.236 (1.791)	Data Time 0.001 (1.301)	Loss 2.0578 (2.1314)	Entropy 0.64061 (0.64067)	Top-1 acc 73.828 (73.261)	Top-5 acc 89.844 (89.743)	lr 0.00015
Train [115][40/3239]	Time 0.251 (1.474)	Data Time 0.001 (0.984)	Loss 2.1323 (2.1393)	Entropy 0.64050 (0.64064)	Top-1 acc 73.828 (72.952)	Top-5 acc 91.406 (89.625)	lr 0.00015
Train [115][50/3239]	Time 0.245 (1.286)	Data Time 0.001 (0.791)	Loss 2.2183 (2.1432)	Entropy 0.64049 (0.64061)	Top-1 acc 71.484 (72.871)	Top-5 acc 88.672 (89.530)	lr 0.00015
Train [115][60/3239]	Time 0.259 (1.156)	Data Time 0.001 (0.662)	Loss 2.1026 (2.1417)	Entropy 0.64046 (0.64059)	Top-1 acc 75.000 (72.925)	Top-5 acc 91.016 (89.504)	lr 0.00015
Train [115][70/3239]	Time 0.233 (1.064)	Data Time 0.001 (0.569)	Loss 2.1478 (2.1441)	Entropy 0.64035 (0.64056)	Top-1 acc 71.875 (72.887)	Top-5 acc 90.234 (89.536)	lr 0.00015
Train [115][80/3239]	Time 0.243 (0.995)	Data Time 0.001 (0.499)	Loss 2.0526 (2.1470)	Entropy 0.64031 (0.64053)	Top-1 acc 75.781 (72.854)	Top-5 acc 91.797 (89.477)	lr 0.00015
Train [115][90/3239]	Time 0.290 (0.942)	Data Time 0.001 (0.444)	Loss 2.1495 (2.1505)	Entropy 0.64029 (0.64051)	Top-1 acc 73.828 (72.699)	Top-5 acc 89.062 (89.449)	lr 0.00015
Train [115][100/3239]	Time 0.261 (0.897)	Data Time 0.001 (0.401)	Loss 2.2481 (2.1518)	Entropy 0.64038 (0.64049)	Top-1 acc 69.531 (72.730)	Top-5 acc 86.328 (89.302)	lr 0.00015
Train [115][110/3239]	Time 0.264 (0.860)	Data Time 0.001 (0.365)	Loss 2.0168 (2.1537)	Entropy 0.64036 (0.64048)	Top-1 acc 77.734 (72.674)	Top-5 acc 91.797 (89.231)	lr 0.00015
Train [115][120/3239]	Time 2.644 (0.831)	Data Time 0.001 (0.335)	Loss 2.2453 (2.1531)	Entropy 0.64036 (0.64047)	Top-1 acc 68.750 (72.705)	Top-5 acc 89.062 (89.282)	lr 0.00015
Train [115][130/3239]	Time 0.243 (0.788)	Data Time 0.003 (0.310)	Loss 2.1631 (2.1542)	Entropy 0.64033 (0.64046)	Top-1 acc 72.266 (72.686)	Top-5 acc 89.844 (89.262)	lr 0.00015
Train [115][140/3239]	Time 0.246 (0.766)	Data Time 0.001 (0.288)	Loss 2.0746 (2.1508)	Entropy 0.64030 (0.64045)	Top-1 acc 76.953 (72.822)	Top-5 acc 89.062 (89.304)	lr 0.00015
Train [115][150/3239]	Time 0.240 (0.749)	Data Time 0.001 (0.269)	Loss 2.1748 (2.1512)	Entropy 0.64031 (0.64044)	Top-1 acc 70.703 (72.837)	Top-5 acc 89.062 (89.300)	lr 0.00015
Train [115][160/3239]	Time 0.237 (0.734)	Data Time 0.002 (0.252)	Loss 2.0420 (2.1508)	Entropy 0.64030 (0.64043)	Top-1 acc 74.219 (72.855)	Top-5 acc 91.797 (89.305)	lr 0.00015
Train [115][170/3239]	Time 0.354 (0.720)	Data Time 0.001 (0.238)	Loss 2.2543 (2.1532)	Entropy 0.64022 (0.64042)	Top-1 acc 71.484 (72.782)	Top-5 acc 88.281 (89.238)	lr 0.00015
Train [115][180/3239]	Time 0.238 (0.708)	Data Time 0.001 (0.225)	Loss 2.0459 (2.1524)	Entropy 0.64021 (0.64041)	Top-1 acc 73.828 (72.797)	Top-5 acc 92.969 (89.242)	lr 0.00015
Train [115][190/3239]	Time 0.231 (0.697)	Data Time 0.001 (0.213)	Loss 2.0972 (2.1516)	Entropy 0.64014 (0.64039)	Top-1 acc 72.266 (72.806)	Top-5 acc 91.797 (89.267)	lr 0.00015
Train [115][200/3239]	Time 0.244 (0.686)	Data Time 0.001 (0.202)	Loss 2.3043 (2.1526)	Entropy 0.64007 (0.64038)	Top-1 acc 69.141 (72.755)	Top-5 acc 86.719 (89.269)	lr 0.00015
Train [115][210/3239]	Time 0.353 (0.678)	Data Time 0.001 (0.193)	Loss 2.1827 (2.1524)	Entropy 0.64006 (0.64037)	Top-1 acc 72.656 (72.795)	Top-5 acc 89.062 (89.266)	lr 0.00015
Train [115][220/3239]	Time 0.227 (0.669)	Data Time 0.001 (0.184)	Loss 2.1399 (2.1526)	Entropy 0.63997 (0.64035)	Top-1 acc 74.219 (72.815)	Top-5 acc 90.234 (89.282)	lr 0.00015
Train [115][230/3239]	Time 2.537 (0.660)	Data Time 0.001 (0.176)	Loss 2.2367 (2.1568)	Entropy 0.63997 (0.64034)	Top-1 acc 69.141 (72.675)	Top-5 acc 88.281 (89.210)	lr 0.00015
Train [115][240/3239]	Time 0.248 (0.644)	Data Time 0.001 (0.169)	Loss 2.1671 (2.1554)	Entropy 0.63995 (0.64032)	Top-1 acc 71.484 (72.669)	Top-5 acc 90.234 (89.270)	lr 0.00015
Train [115][250/3239]	Time 0.423 (0.638)	Data Time 0.001 (0.162)	Loss 2.2687 (2.1566)	Entropy 0.63992 (0.64030)	Top-1 acc 68.359 (72.683)	Top-5 acc 88.281 (89.238)	lr 0.00015
Train [115][260/3239]	Time 0.250 (0.633)	Data Time 0.001 (0.156)	Loss 2.1452 (2.1561)	Entropy 0.63984 (0.64029)	Top-1 acc 71.094 (72.679)	Top-5 acc 89.453 (89.251)	lr 0.00015
Train [115][270/3239]	Time 0.244 (0.627)	Data Time 0.001 (0.150)	Loss 2.1919 (2.1575)	Entropy 0.63967 (0.64027)	Top-1 acc 73.438 (72.671)	Top-5 acc 88.281 (89.222)	lr 0.00015
Train [115][280/3239]	Time 0.393 (0.811)	Data Time 0.003 (0.145)	Loss 2.2004 (2.1565)	Entropy 0.63965 (0.64024)	Top-1 acc 69.922 (72.698)	Top-5 acc 88.281 (89.249)	lr 0.00015
Train [115][290/3239]	Time 0.321 (0.801)	Data Time 0.002 (0.140)	Loss 2.1786 (2.1554)	Entropy 0.63966 (0.64022)	Top-1 acc 72.656 (72.727)	Top-5 acc 89.062 (89.293)	lr 0.00015
Train [115][300/3239]	Time 0.237 (0.791)	Data Time 0.001 (0.136)	Loss 2.0687 (2.1555)	Entropy 0.63964 (0.64020)	Top-1 acc 75.781 (72.744)	Top-5 acc 91.406 (89.273)	lr 0.00015
Train [115][310/3239]	Time 0.231 (0.781)	Data Time 0.002 (0.131)	Loss 2.1447 (2.1552)	Entropy 0.63957 (0.64019)	Top-1 acc 71.875 (72.738)	Top-5 acc 91.016 (89.301)	lr 0.00015
Train [115][320/3239]	Time 0.258 (0.771)	Data Time 0.001 (0.127)	Loss 2.1145 (2.1544)	Entropy 0.63965 (0.64017)	Top-1 acc 74.609 (72.754)	Top-5 acc 90.234 (89.319)	lr 0.00015
Train [115][330/3239]	Time 0.290 (0.763)	Data Time 0.002 (0.124)	Loss 2.1601 (2.1554)	Entropy 0.63961 (0.64015)	Top-1 acc 72.656 (72.733)	Top-5 acc 88.281 (89.282)	lr 0.00015
Train [115][340/3239]	Time 2.701 (0.755)	Data Time 0.002 (0.120)	Loss 2.1475 (2.1548)	Entropy 0.63961 (0.64014)	Top-1 acc 70.703 (72.738)	Top-5 acc 89.062 (89.297)	lr 0.00015
Train [115][350/3239]	Time 0.228 (0.741)	Data Time 0.001 (0.117)	Loss 2.0231 (2.1558)	Entropy 0.63948 (0.64012)	Top-1 acc 74.219 (72.721)	Top-5 acc 91.016 (89.271)	lr 0.00015
Train [115][360/3239]	Time 0.244 (0.734)	Data Time 0.001 (0.114)	Loss 2.1567 (2.1556)	Entropy 0.63968 (0.64010)	Top-1 acc 74.609 (72.728)	Top-5 acc 89.844 (89.291)	lr 0.00015
Train [115][370/3239]	Time 0.240 (0.728)	Data Time 0.001 (0.111)	Loss 2.1372 (2.1559)	Entropy 0.63972 (0.64009)	Top-1 acc 75.000 (72.719)	Top-5 acc 89.453 (89.292)	lr 0.00015
Train [115][380/3239]	Time 0.254 (0.722)	Data Time 0.002 (0.108)	Loss 2.1142 (2.1553)	Entropy 0.63962 (0.64008)	Top-1 acc 74.219 (72.776)	Top-5 acc 89.062 (89.285)	lr 0.00015
Train [115][390/3239]	Time 0.243 (0.717)	Data Time 0.003 (0.105)	Loss 2.1923 (2.1551)	Entropy 0.63963 (0.64007)	Top-1 acc 70.312 (72.784)	Top-5 acc 88.672 (89.284)	lr 0.00015
Train [115][400/3239]	Time 0.238 (0.711)	Data Time 0.002 (0.102)	Loss 2.0832 (2.1535)	Entropy 0.63960 (0.64006)	Top-1 acc 74.609 (72.823)	Top-5 acc 88.672 (89.306)	lr 0.00015
Train [115][410/3239]	Time 0.261 (0.706)	Data Time 0.002 (0.100)	Loss 2.2514 (2.1526)	Entropy 0.63955 (0.64005)	Top-1 acc 67.969 (72.843)	Top-5 acc 86.719 (89.309)	lr 0.00015
Train [115][420/3239]	Time 0.228 (0.701)	Data Time 0.002 (0.098)	Loss 2.1707 (2.1519)	Entropy 0.63951 (0.64004)	Top-1 acc 73.047 (72.871)	Top-5 acc 87.109 (89.314)	lr 0.00015
Train [115][430/3239]	Time 0.250 (0.697)	Data Time 0.001 (0.095)	Loss 2.1657 (2.1521)	Entropy 0.63944 (0.64002)	Top-1 acc 73.438 (72.889)	Top-5 acc 89.453 (89.304)	lr 0.00015
Train [115][440/3239]	Time 0.251 (0.692)	Data Time 0.001 (0.093)	Loss 2.3344 (2.1523)	Entropy 0.63945 (0.64001)	Top-1 acc 69.141 (72.879)	Top-5 acc 85.938 (89.303)	lr 0.00015
Train [115][450/3239]	Time 2.634 (0.687)	Data Time 0.001 (0.091)	Loss 2.0720 (2.1510)	Entropy 0.63945 (0.64000)	Top-1 acc 77.344 (72.930)	Top-5 acc 92.188 (89.329)	lr 0.00015
Train [115][460/3239]	Time 0.375 (0.678)	Data Time 0.001 (0.089)	Loss 2.2151 (2.1515)	Entropy 0.63940 (0.63998)	Top-1 acc 70.703 (72.930)	Top-5 acc 86.328 (89.303)	lr 0.00015
Train [115][470/3239]	Time 0.255 (0.674)	Data Time 0.001 (0.087)	Loss 2.2112 (2.1526)	Entropy 0.63943 (0.63997)	Top-1 acc 71.094 (72.906)	Top-5 acc 90.234 (89.291)	lr 0.00015
Train [115][480/3239]	Time 0.243 (0.671)	Data Time 0.001 (0.086)	Loss 2.1133 (2.1526)	Entropy 0.63947 (0.63996)	Top-1 acc 72.266 (72.894)	Top-5 acc 91.406 (89.287)	lr 0.00015
Train [115][490/3239]	Time 0.238 (0.667)	Data Time 0.001 (0.084)	Loss 2.1290 (2.1523)	Entropy 0.63940 (0.63995)	Top-1 acc 73.438 (72.887)	Top-5 acc 87.500 (89.299)	lr 0.00015
Train [115][500/3239]	Time 0.341 (0.664)	Data Time 0.001 (0.082)	Loss 2.1860 (2.1535)	Entropy 0.63937 (0.63994)	Top-1 acc 69.922 (72.865)	Top-5 acc 87.500 (89.281)	lr 0.00015
Train [115][510/3239]	Time 0.241 (0.660)	Data Time 0.002 (0.081)	Loss 2.1067 (2.1542)	Entropy 0.63935 (0.63993)	Top-1 acc 75.000 (72.840)	Top-5 acc 90.234 (89.264)	lr 0.00015
Train [115][520/3239]	Time 0.246 (0.657)	Data Time 0.001 (0.079)	Loss 2.0517 (2.1542)	Entropy 0.63931 (0.63992)	Top-1 acc 73.438 (72.862)	Top-5 acc 91.797 (89.263)	lr 0.00015
Train [115][530/3239]	Time 0.232 (0.654)	Data Time 0.002 (0.078)	Loss 2.0415 (2.1543)	Entropy 0.63930 (0.63990)	Top-1 acc 76.953 (72.864)	Top-5 acc 91.797 (89.252)	lr 0.00015
Train [115][540/3239]	Time 0.370 (0.651)	Data Time 0.001 (0.076)	Loss 2.0750 (2.1540)	Entropy 0.63932 (0.63989)	Top-1 acc 75.781 (72.882)	Top-5 acc 90.234 (89.247)	lr 0.00015
Train [115][550/3239]	Time 0.227 (0.648)	Data Time 0.001 (0.075)	Loss 2.1848 (2.1542)	Entropy 0.63931 (0.63988)	Top-1 acc 71.094 (72.895)	Top-5 acc 86.328 (89.232)	lr 0.00015
Train [115][560/3239]	Time 2.621 (0.645)	Data Time 0.001 (0.074)	Loss 2.2142 (2.1545)	Entropy 0.63931 (0.63987)	Top-1 acc 71.875 (72.904)	Top-5 acc 87.500 (89.223)	lr 0.00015
Train [115][570/3239]	Time 0.238 (0.638)	Data Time 0.001 (0.072)	Loss 2.2081 (2.1547)	Entropy 0.63924 (0.63986)	Top-1 acc 71.875 (72.897)	Top-5 acc 87.500 (89.218)	lr 0.00015
Train [115][580/3239]	Time 0.245 (0.635)	Data Time 0.001 (0.071)	Loss 2.1585 (2.1545)	Entropy 0.63918 (0.63985)	Top-1 acc 72.656 (72.889)	Top-5 acc 90.234 (89.228)	lr 0.00014
Train [115][590/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.070)	Loss 2.0490 (2.1541)	Entropy 0.63916 (0.63984)	Top-1 acc 75.391 (72.906)	Top-5 acc 92.578 (89.238)	lr 0.00014
Train [115][600/3239]	Time 0.253 (0.631)	Data Time 0.006 (0.069)	Loss 2.0551 (2.1545)	Entropy 0.63911 (0.63983)	Top-1 acc 78.125 (72.903)	Top-5 acc 91.016 (89.236)	lr 0.00014
Train [115][610/3239]	Time 0.273 (0.629)	Data Time 0.001 (0.068)	Loss 2.2126 (2.1545)	Entropy 0.63904 (0.63981)	Top-1 acc 71.094 (72.902)	Top-5 acc 87.109 (89.233)	lr 0.00014
Train [115][620/3239]	Time 0.232 (0.626)	Data Time 0.001 (0.067)	Loss 2.0503 (2.1542)	Entropy 0.63901 (0.63980)	Top-1 acc 75.000 (72.902)	Top-5 acc 88.672 (89.242)	lr 0.00014
Train [115][630/3239]	Time 0.239 (0.625)	Data Time 0.001 (0.066)	Loss 2.1222 (2.1535)	Entropy 0.63906 (0.63979)	Top-1 acc 71.875 (72.921)	Top-5 acc 89.062 (89.262)	lr 0.00014
Train [115][640/3239]	Time 0.235 (0.710)	Data Time 0.002 (0.065)	Loss 2.0812 (2.1535)	Entropy 0.63904 (0.63978)	Top-1 acc 75.781 (72.918)	Top-5 acc 91.016 (89.267)	lr 0.00014
Train [115][650/3239]	Time 0.266 (0.707)	Data Time 0.002 (0.064)	Loss 2.0844 (2.1527)	Entropy 0.63904 (0.63977)	Top-1 acc 73.047 (72.928)	Top-5 acc 89.062 (89.282)	lr 0.00014
Train [115][660/3239]	Time 0.262 (0.703)	Data Time 0.002 (0.063)	Loss 2.1063 (2.1524)	Entropy 0.63900 (0.63976)	Top-1 acc 73.047 (72.938)	Top-5 acc 89.062 (89.285)	lr 0.00014
Train [115][670/3239]	Time 2.605 (0.701)	Data Time 0.002 (0.062)	Loss 2.0744 (2.1511)	Entropy 0.63900 (0.63974)	Top-1 acc 75.781 (72.968)	Top-5 acc 90.234 (89.305)	lr 0.00014
Train [115][680/3239]	Time 0.251 (0.694)	Data Time 0.002 (0.061)	Loss 2.1115 (2.1514)	Entropy 0.63905 (0.63973)	Top-1 acc 70.703 (72.948)	Top-5 acc 90.625 (89.302)	lr 0.00014
Train [115][690/3239]	Time 0.249 (0.691)	Data Time 0.002 (0.060)	Loss 2.1145 (2.1515)	Entropy 0.63906 (0.63972)	Top-1 acc 74.609 (72.950)	Top-5 acc 89.453 (89.295)	lr 0.00014
Train [115][700/3239]	Time 0.260 (0.688)	Data Time 0.001 (0.059)	Loss 2.1096 (2.1519)	Entropy 0.63911 (0.63972)	Top-1 acc 73.828 (72.930)	Top-5 acc 90.234 (89.285)	lr 0.00014
Train [115][710/3239]	Time 0.246 (0.686)	Data Time 0.002 (0.059)	Loss 2.2581 (2.1528)	Entropy 0.63907 (0.63971)	Top-1 acc 70.312 (72.903)	Top-5 acc 88.281 (89.265)	lr 0.00014
Train [115][720/3239]	Time 0.238 (0.683)	Data Time 0.002 (0.058)	Loss 2.2409 (2.1529)	Entropy 0.63901 (0.63970)	Top-1 acc 72.266 (72.898)	Top-5 acc 89.844 (89.264)	lr 0.00014
Train [115][730/3239]	Time 0.249 (0.681)	Data Time 0.001 (0.057)	Loss 2.2228 (2.1529)	Entropy 0.63891 (0.63969)	Top-1 acc 69.141 (72.884)	Top-5 acc 88.281 (89.265)	lr 0.00014
Train [115][740/3239]	Time 0.231 (0.678)	Data Time 0.001 (0.056)	Loss 1.9813 (2.1528)	Entropy 0.63888 (0.63968)	Top-1 acc 76.172 (72.882)	Top-5 acc 92.969 (89.274)	lr 0.00014
Train [115][750/3239]	Time 0.377 (0.676)	Data Time 0.001 (0.056)	Loss 2.2617 (2.1534)	Entropy 0.63890 (0.63967)	Top-1 acc 69.531 (72.876)	Top-5 acc 85.156 (89.258)	lr 0.00014
Train [115][760/3239]	Time 0.287 (0.673)	Data Time 0.002 (0.055)	Loss 2.1587 (2.1538)	Entropy 0.63885 (0.63966)	Top-1 acc 72.656 (72.859)	Top-5 acc 89.062 (89.254)	lr 0.00014
Train [115][770/3239]	Time 0.227 (0.671)	Data Time 0.001 (0.054)	Loss 2.1770 (2.1540)	Entropy 0.63873 (0.63965)	Top-1 acc 73.828 (72.857)	Top-5 acc 90.625 (89.256)	lr 0.00014
Train [115][780/3239]	Time 2.601 (0.669)	Data Time 0.001 (0.053)	Loss 2.0773 (2.1538)	Entropy 0.63873 (0.63963)	Top-1 acc 75.000 (72.863)	Top-5 acc 89.844 (89.262)	lr 0.00014
Train [115][790/3239]	Time 0.363 (0.664)	Data Time 0.001 (0.053)	Loss 2.1639 (2.1536)	Entropy 0.63863 (0.63962)	Top-1 acc 75.391 (72.878)	Top-5 acc 87.109 (89.259)	lr 0.00014
Train [115][800/3239]	Time 0.240 (0.662)	Data Time 0.001 (0.052)	Loss 2.0610 (2.1536)	Entropy 0.63854 (0.63961)	Top-1 acc 76.562 (72.881)	Top-5 acc 89.844 (89.265)	lr 0.00014
Train [115][810/3239]	Time 0.240 (0.660)	Data Time 0.001 (0.052)	Loss 2.1290 (2.1541)	Entropy 0.63853 (0.63959)	Top-1 acc 74.609 (72.866)	Top-5 acc 88.672 (89.257)	lr 0.00014
Train [115][820/3239]	Time 0.244 (0.657)	Data Time 0.001 (0.051)	Loss 2.0393 (2.1538)	Entropy 0.63858 (0.63958)	Top-1 acc 75.391 (72.879)	Top-5 acc 92.578 (89.263)	lr 0.00014
Train [115][830/3239]	Time 0.351 (0.656)	Data Time 0.001 (0.050)	Loss 2.1990 (2.1536)	Entropy 0.63860 (0.63957)	Top-1 acc 71.094 (72.879)	Top-5 acc 90.234 (89.268)	lr 0.00014
Train [115][840/3239]	Time 0.250 (0.654)	Data Time 0.002 (0.050)	Loss 2.0494 (2.1536)	Entropy 0.63863 (0.63956)	Top-1 acc 73.828 (72.869)	Top-5 acc 92.578 (89.275)	lr 0.00014
Train [115][850/3239]	Time 0.224 (0.652)	Data Time 0.001 (0.049)	Loss 2.1628 (2.1534)	Entropy 0.63856 (0.63955)	Top-1 acc 73.828 (72.880)	Top-5 acc 88.281 (89.270)	lr 0.00014
Train [115][860/3239]	Time 0.235 (0.650)	Data Time 0.001 (0.049)	Loss 2.1615 (2.1538)	Entropy 0.63843 (0.63954)	Top-1 acc 72.266 (72.866)	Top-5 acc 89.844 (89.260)	lr 0.00014
Train [115][870/3239]	Time 0.330 (0.648)	Data Time 0.001 (0.048)	Loss 2.1853 (2.1541)	Entropy 0.63838 (0.63952)	Top-1 acc 76.562 (72.860)	Top-5 acc 88.672 (89.258)	lr 0.00014
Train [115][880/3239]	Time 0.282 (0.646)	Data Time 0.001 (0.048)	Loss 2.1571 (2.1539)	Entropy 0.63837 (0.63951)	Top-1 acc 71.094 (72.859)	Top-5 acc 87.500 (89.263)	lr 0.00014
Train [115][890/3239]	Time 2.500 (0.644)	Data Time 0.001 (0.047)	Loss 2.1675 (2.1536)	Entropy 0.63837 (0.63950)	Top-1 acc 73.438 (72.869)	Top-5 acc 86.328 (89.267)	lr 0.00014
Train [115][900/3239]	Time 0.251 (0.640)	Data Time 0.001 (0.047)	Loss 2.1289 (2.1536)	Entropy 0.63824 (0.63948)	Top-1 acc 74.609 (72.871)	Top-5 acc 91.016 (89.275)	lr 0.00014
Train [115][910/3239]	Time 0.252 (0.638)	Data Time 0.001 (0.046)	Loss 2.0344 (2.1539)	Entropy 0.63803 (0.63947)	Top-1 acc 76.172 (72.865)	Top-5 acc 90.625 (89.269)	lr 0.00014
Train [115][920/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.046)	Loss 2.1300 (2.1535)	Entropy 0.63805 (0.63945)	Top-1 acc 74.219 (72.867)	Top-5 acc 90.625 (89.280)	lr 0.00014
Train [115][930/3239]	Time 0.226 (0.635)	Data Time 0.001 (0.045)	Loss 2.1877 (2.1529)	Entropy 0.63804 (0.63944)	Top-1 acc 71.875 (72.883)	Top-5 acc 87.500 (89.289)	lr 0.00014
Train [115][940/3239]	Time 0.268 (0.633)	Data Time 0.002 (0.045)	Loss 2.1305 (2.1529)	Entropy 0.63802 (0.63942)	Top-1 acc 73.047 (72.884)	Top-5 acc 89.453 (89.290)	lr 0.00014
Train [115][950/3239]	Time 0.258 (0.632)	Data Time 0.001 (0.044)	Loss 2.1667 (2.1533)	Entropy 0.63808 (0.63941)	Top-1 acc 73.438 (72.869)	Top-5 acc 87.500 (89.289)	lr 0.00014
Train [115][960/3239]	Time 0.278 (0.630)	Data Time 0.001 (0.044)	Loss 2.1290 (2.1536)	Entropy 0.63807 (0.63939)	Top-1 acc 73.047 (72.862)	Top-5 acc 90.234 (89.286)	lr 0.00014
Train [115][970/3239]	Time 0.238 (0.629)	Data Time 0.001 (0.043)	Loss 2.2017 (2.1532)	Entropy 0.63798 (0.63938)	Top-1 acc 70.703 (72.868)	Top-5 acc 88.281 (89.293)	lr 0.00014
Train [115][980/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.043)	Loss 2.2414 (2.1537)	Entropy 0.63790 (0.63936)	Top-1 acc 69.922 (72.857)	Top-5 acc 88.672 (89.279)	lr 0.00014
Train [115][990/3239]	Time 0.290 (0.626)	Data Time 0.001 (0.042)	Loss 2.2167 (2.1533)	Entropy 0.63787 (0.63935)	Top-1 acc 72.656 (72.873)	Top-5 acc 86.328 (89.283)	lr 0.00014
Train [115][1000/3239]	Time 52.950 (0.675)	Data Time 0.001 (0.042)	Loss 2.0471 (2.1532)	Entropy 0.63787 (0.63934)	Top-1 acc 72.266 (72.871)	Top-5 acc 90.625 (89.290)	lr 0.00014
Train [115][1010/3239]	Time 0.240 (0.672)	Data Time 0.002 (0.042)	Loss 2.2919 (2.1533)	Entropy 0.63788 (0.63932)	Top-1 acc 69.922 (72.861)	Top-5 acc 86.719 (89.286)	lr 0.00014
Train [115][1020/3239]	Time 0.224 (0.670)	Data Time 0.001 (0.041)	Loss 2.2489 (2.1538)	Entropy 0.63782 (0.63931)	Top-1 acc 69.922 (72.853)	Top-5 acc 85.938 (89.277)	lr 0.00014
Train [115][1030/3239]	Time 0.264 (0.668)	Data Time 0.001 (0.041)	Loss 2.1539 (2.1543)	Entropy 0.63777 (0.63929)	Top-1 acc 72.656 (72.838)	Top-5 acc 89.844 (89.265)	lr 0.00014
Train [115][1040/3239]	Time 0.325 (0.667)	Data Time 0.002 (0.041)	Loss 2.1203 (2.1542)	Entropy 0.63779 (0.63928)	Top-1 acc 75.000 (72.841)	Top-5 acc 89.453 (89.262)	lr 0.00014
Train [115][1050/3239]	Time 0.236 (0.665)	Data Time 0.001 (0.040)	Loss 2.1386 (2.1547)	Entropy 0.63767 (0.63926)	Top-1 acc 71.094 (72.835)	Top-5 acc 90.625 (89.253)	lr 0.00014
Train [115][1060/3239]	Time 0.217 (0.663)	Data Time 0.001 (0.040)	Loss 2.0570 (2.1547)	Entropy 0.63768 (0.63925)	Top-1 acc 77.344 (72.836)	Top-5 acc 89.844 (89.250)	lr 0.00014
Train [115][1070/3239]	Time 0.239 (0.662)	Data Time 0.001 (0.039)	Loss 2.0773 (2.1546)	Entropy 0.63751 (0.63923)	Top-1 acc 74.219 (72.839)	Top-5 acc 91.797 (89.257)	lr 0.00014
Train [115][1080/3239]	Time 0.368 (0.660)	Data Time 0.001 (0.039)	Loss 2.0944 (2.1547)	Entropy 0.63750 (0.63922)	Top-1 acc 73.047 (72.846)	Top-5 acc 91.406 (89.248)	lr 0.00014
Train [115][1090/3239]	Time 0.279 (0.659)	Data Time 0.001 (0.039)	Loss 2.1460 (2.1542)	Entropy 0.63756 (0.63920)	Top-1 acc 71.094 (72.853)	Top-5 acc 88.672 (89.256)	lr 0.00014
Train [115][1100/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.038)	Loss 2.2455 (2.1545)	Entropy 0.63759 (0.63919)	Top-1 acc 71.094 (72.853)	Top-5 acc 87.109 (89.253)	lr 0.00014
Train [115][1110/3239]	Time 2.545 (0.656)	Data Time 0.001 (0.038)	Loss 2.1590 (2.1545)	Entropy 0.63759 (0.63917)	Top-1 acc 74.219 (72.856)	Top-5 acc 88.672 (89.250)	lr 0.00014
Train [115][1120/3239]	Time 0.325 (0.652)	Data Time 0.001 (0.038)	Loss 2.0896 (2.1542)	Entropy 0.63769 (0.63916)	Top-1 acc 76.562 (72.868)	Top-5 acc 91.797 (89.256)	lr 0.00014
Train [115][1130/3239]	Time 0.245 (0.651)	Data Time 0.001 (0.037)	Loss 2.1849 (2.1543)	Entropy 0.63760 (0.63914)	Top-1 acc 69.922 (72.870)	Top-5 acc 88.281 (89.259)	lr 0.00014
Train [115][1140/3239]	Time 0.210 (0.650)	Data Time 0.001 (0.037)	Loss 2.0229 (2.1539)	Entropy 0.63757 (0.63913)	Top-1 acc 75.000 (72.887)	Top-5 acc 92.188 (89.262)	lr 0.00014
Train [115][1150/3239]	Time 0.227 (0.648)	Data Time 0.001 (0.037)	Loss 2.0706 (2.1536)	Entropy 0.63761 (0.63912)	Top-1 acc 76.562 (72.888)	Top-5 acc 91.406 (89.266)	lr 0.00014
Train [115][1160/3239]	Time 0.234 (0.647)	Data Time 0.001 (0.037)	Loss 2.0417 (2.1537)	Entropy 0.63751 (0.63910)	Top-1 acc 76.953 (72.893)	Top-5 acc 89.062 (89.262)	lr 0.00014
Train [115][1170/3239]	Time 0.250 (0.645)	Data Time 0.002 (0.036)	Loss 2.1527 (2.1540)	Entropy 0.63747 (0.63909)	Top-1 acc 75.781 (72.882)	Top-5 acc 88.672 (89.257)	lr 0.00014
Train [115][1180/3239]	Time 0.293 (0.644)	Data Time 0.001 (0.036)	Loss 2.0323 (2.1536)	Entropy 0.63748 (0.63908)	Top-1 acc 76.172 (72.890)	Top-5 acc 91.016 (89.264)	lr 0.00014
Train [115][1190/3239]	Time 0.279 (0.643)	Data Time 0.001 (0.036)	Loss 2.3436 (2.1537)	Entropy 0.63744 (0.63906)	Top-1 acc 71.484 (72.889)	Top-5 acc 85.938 (89.265)	lr 0.00014
Train [115][1200/3239]	Time 0.232 (0.642)	Data Time 0.001 (0.035)	Loss 2.2075 (2.1538)	Entropy 0.63741 (0.63905)	Top-1 acc 72.266 (72.889)	Top-5 acc 87.500 (89.261)	lr 0.00014
Train [115][1210/3239]	Time 0.256 (0.640)	Data Time 0.001 (0.035)	Loss 2.1513 (2.1537)	Entropy 0.63739 (0.63904)	Top-1 acc 73.047 (72.895)	Top-5 acc 90.234 (89.262)	lr 0.00014
Train [115][1220/3239]	Time 2.552 (0.639)	Data Time 0.001 (0.035)	Loss 2.3302 (2.1540)	Entropy 0.63739 (0.63902)	Top-1 acc 68.750 (72.889)	Top-5 acc 85.938 (89.256)	lr 0.00014
Train [115][1230/3239]	Time 0.244 (0.636)	Data Time 0.001 (0.035)	Loss 2.1217 (2.1539)	Entropy 0.63741 (0.63901)	Top-1 acc 72.656 (72.887)	Top-5 acc 89.844 (89.262)	lr 0.00014
Train [115][1240/3239]	Time 0.250 (0.635)	Data Time 0.001 (0.034)	Loss 2.1374 (2.1536)	Entropy 0.63735 (0.63900)	Top-1 acc 75.781 (72.888)	Top-5 acc 90.234 (89.269)	lr 0.00013
Train [115][1250/3239]	Time 0.235 (0.634)	Data Time 0.001 (0.034)	Loss 2.0892 (2.1535)	Entropy 0.63729 (0.63898)	Top-1 acc 71.484 (72.892)	Top-5 acc 91.797 (89.272)	lr 0.00013
Train [115][1260/3239]	Time 0.244 (0.632)	Data Time 0.001 (0.034)	Loss 2.0386 (2.1549)	Entropy 0.63723 (0.63897)	Top-1 acc 76.172 (72.862)	Top-5 acc 92.578 (89.254)	lr 0.00013
Train [115][1270/3239]	Time 0.246 (0.631)	Data Time 0.001 (0.034)	Loss 2.1384 (2.1546)	Entropy 0.63728 (0.63896)	Top-1 acc 74.219 (72.872)	Top-5 acc 88.281 (89.257)	lr 0.00013
Train [115][1280/3239]	Time 0.243 (0.630)	Data Time 0.001 (0.033)	Loss 2.0309 (2.1548)	Entropy 0.63730 (0.63894)	Top-1 acc 75.781 (72.872)	Top-5 acc 91.016 (89.249)	lr 0.00013
Train [115][1290/3239]	Time 0.263 (0.629)	Data Time 0.001 (0.033)	Loss 2.1966 (2.1546)	Entropy 0.63734 (0.63893)	Top-1 acc 69.922 (72.862)	Top-5 acc 87.109 (89.254)	lr 0.00013
Train [115][1300/3239]	Time 0.259 (0.628)	Data Time 0.001 (0.033)	Loss 2.1395 (2.1547)	Entropy 0.63730 (0.63892)	Top-1 acc 74.219 (72.864)	Top-5 acc 89.062 (89.251)	lr 0.00013
Train [115][1310/3239]	Time 0.267 (0.627)	Data Time 0.001 (0.033)	Loss 2.2589 (2.1544)	Entropy 0.63717 (0.63890)	Top-1 acc 70.312 (72.879)	Top-5 acc 86.328 (89.253)	lr 0.00013
Train [115][1320/3239]	Time 0.229 (0.626)	Data Time 0.001 (0.032)	Loss 2.1625 (2.1544)	Entropy 0.63714 (0.63889)	Top-1 acc 75.000 (72.883)	Top-5 acc 88.281 (89.251)	lr 0.00013
Train [115][1330/3239]	Time 2.573 (0.625)	Data Time 0.001 (0.032)	Loss 2.0945 (2.1543)	Entropy 0.63714 (0.63888)	Top-1 acc 74.219 (72.884)	Top-5 acc 89.844 (89.253)	lr 0.00013
Train [115][1340/3239]	Time 0.218 (0.622)	Data Time 0.001 (0.032)	Loss 2.1439 (2.1542)	Entropy 0.63715 (0.63887)	Top-1 acc 73.828 (72.883)	Top-5 acc 87.891 (89.253)	lr 0.00013
Train [115][1350/3239]	Time 0.249 (0.621)	Data Time 0.001 (0.032)	Loss 1.9841 (2.1538)	Entropy 0.63712 (0.63885)	Top-1 acc 78.516 (72.898)	Top-5 acc 92.188 (89.261)	lr 0.00013
Train [115][1360/3239]	Time 0.238 (0.620)	Data Time 0.001 (0.031)	Loss 2.2514 (2.1540)	Entropy 0.63711 (0.63884)	Top-1 acc 69.922 (72.887)	Top-5 acc 88.672 (89.253)	lr 0.00013
Train [115][1370/3239]	Time 0.339 (0.659)	Data Time 0.002 (0.031)	Loss 2.2398 (2.1541)	Entropy 0.63716 (0.63883)	Top-1 acc 73.438 (72.882)	Top-5 acc 89.844 (89.257)	lr 0.00013
Train [115][1380/3239]	Time 0.288 (0.658)	Data Time 0.002 (0.031)	Loss 2.1026 (2.1542)	Entropy 0.63701 (0.63881)	Top-1 acc 75.391 (72.876)	Top-5 acc 90.234 (89.259)	lr 0.00013
Train [115][1390/3239]	Time 0.244 (0.657)	Data Time 0.001 (0.031)	Loss 2.2157 (2.1541)	Entropy 0.63698 (0.63880)	Top-1 acc 70.703 (72.878)	Top-5 acc 89.062 (89.259)	lr 0.00013
Train [115][1400/3239]	Time 0.290 (0.656)	Data Time 0.002 (0.031)	Loss 2.1430 (2.1543)	Entropy 0.63687 (0.63879)	Top-1 acc 73.438 (72.878)	Top-5 acc 89.453 (89.253)	lr 0.00013
Train [115][1410/3239]	Time 0.399 (0.655)	Data Time 0.003 (0.030)	Loss 2.1328 (2.1545)	Entropy 0.63681 (0.63877)	Top-1 acc 75.781 (72.870)	Top-5 acc 88.672 (89.254)	lr 0.00013
Train [115][1420/3239]	Time 0.256 (0.654)	Data Time 0.001 (0.030)	Loss 2.2038 (2.1546)	Entropy 0.63681 (0.63876)	Top-1 acc 75.391 (72.875)	Top-5 acc 88.281 (89.250)	lr 0.00013
Train [115][1430/3239]	Time 0.271 (0.653)	Data Time 0.001 (0.030)	Loss 2.2185 (2.1548)	Entropy 0.63677 (0.63875)	Top-1 acc 71.094 (72.869)	Top-5 acc 90.625 (89.249)	lr 0.00013
Train [115][1440/3239]	Time 2.536 (0.652)	Data Time 0.001 (0.030)	Loss 2.0197 (2.1553)	Entropy 0.63677 (0.63873)	Top-1 acc 74.609 (72.855)	Top-5 acc 91.406 (89.239)	lr 0.00013
Train [115][1450/3239]	Time 0.372 (0.649)	Data Time 0.004 (0.030)	Loss 2.1083 (2.1556)	Entropy 0.63669 (0.63872)	Top-1 acc 73.828 (72.846)	Top-5 acc 90.234 (89.237)	lr 0.00013
Train [115][1460/3239]	Time 0.254 (0.648)	Data Time 0.002 (0.029)	Loss 2.0332 (2.1557)	Entropy 0.63666 (0.63871)	Top-1 acc 76.953 (72.842)	Top-5 acc 91.016 (89.237)	lr 0.00013
Train [115][1470/3239]	Time 0.251 (0.647)	Data Time 0.001 (0.029)	Loss 2.1375 (2.1557)	Entropy 0.63663 (0.63869)	Top-1 acc 75.781 (72.846)	Top-5 acc 91.016 (89.237)	lr 0.00013
Train [115][1480/3239]	Time 0.277 (0.646)	Data Time 0.001 (0.029)	Loss 2.0225 (2.1556)	Entropy 0.63660 (0.63868)	Top-1 acc 76.562 (72.852)	Top-5 acc 92.188 (89.243)	lr 0.00013
Train [115][1490/3239]	Time 0.317 (0.645)	Data Time 0.001 (0.029)	Loss 2.1300 (2.1557)	Entropy 0.63659 (0.63866)	Top-1 acc 70.312 (72.847)	Top-5 acc 89.844 (89.242)	lr 0.00013
Train [115][1500/3239]	Time 0.240 (0.644)	Data Time 0.002 (0.029)	Loss 2.0770 (2.1554)	Entropy 0.63656 (0.63865)	Top-1 acc 76.953 (72.858)	Top-5 acc 89.062 (89.246)	lr 0.00013
Train [115][1510/3239]	Time 0.284 (0.643)	Data Time 0.001 (0.028)	Loss 2.1668 (2.1554)	Entropy 0.63657 (0.63864)	Top-1 acc 73.047 (72.857)	Top-5 acc 88.281 (89.246)	lr 0.00013
Train [115][1520/3239]	Time 0.238 (0.642)	Data Time 0.001 (0.028)	Loss 2.1451 (2.1549)	Entropy 0.63643 (0.63862)	Top-1 acc 73.047 (72.865)	Top-5 acc 91.406 (89.250)	lr 0.00013
Train [115][1530/3239]	Time 0.312 (0.641)	Data Time 0.001 (0.028)	Loss 2.2025 (2.1552)	Entropy 0.63642 (0.63861)	Top-1 acc 73.047 (72.866)	Top-5 acc 89.453 (89.252)	lr 0.00013
Train [115][1540/3239]	Time 0.328 (0.640)	Data Time 0.001 (0.028)	Loss 2.2693 (2.1554)	Entropy 0.63639 (0.63859)	Top-1 acc 71.484 (72.863)	Top-5 acc 87.500 (89.246)	lr 0.00013
Train [115][1550/3239]	Time 2.490 (0.639)	Data Time 0.001 (0.028)	Loss 2.1990 (2.1559)	Entropy 0.63639 (0.63858)	Top-1 acc 72.266 (72.853)	Top-5 acc 88.281 (89.238)	lr 0.00013
Train [115][1560/3239]	Time 0.256 (0.637)	Data Time 0.001 (0.028)	Loss 2.0960 (2.1558)	Entropy 0.63636 (0.63856)	Top-1 acc 73.828 (72.854)	Top-5 acc 91.016 (89.240)	lr 0.00013
Train [115][1570/3239]	Time 0.255 (0.636)	Data Time 0.002 (0.027)	Loss 1.9999 (2.1556)	Entropy 0.63639 (0.63855)	Top-1 acc 76.953 (72.857)	Top-5 acc 92.969 (89.247)	lr 0.00013
Train [115][1580/3239]	Time 0.235 (0.635)	Data Time 0.001 (0.027)	Loss 2.0855 (2.1556)	Entropy 0.63642 (0.63854)	Top-1 acc 73.438 (72.856)	Top-5 acc 90.234 (89.246)	lr 0.00013
Train [115][1590/3239]	Time 0.249 (0.634)	Data Time 0.001 (0.027)	Loss 2.2867 (2.1556)	Entropy 0.63637 (0.63852)	Top-1 acc 73.047 (72.854)	Top-5 acc 86.328 (89.244)	lr 0.00013
Train [115][1600/3239]	Time 0.228 (0.633)	Data Time 0.001 (0.027)	Loss 2.0100 (2.1556)	Entropy 0.63628 (0.63851)	Top-1 acc 76.172 (72.855)	Top-5 acc 91.797 (89.243)	lr 0.00013
Train [115][1610/3239]	Time 0.237 (0.632)	Data Time 0.002 (0.027)	Loss 2.3080 (2.1558)	Entropy 0.63625 (0.63850)	Top-1 acc 69.922 (72.849)	Top-5 acc 85.938 (89.241)	lr 0.00013
Train [115][1620/3239]	Time 0.343 (0.631)	Data Time 0.001 (0.027)	Loss 2.1538 (2.1557)	Entropy 0.63618 (0.63848)	Top-1 acc 71.875 (72.854)	Top-5 acc 89.453 (89.245)	lr 0.00013
Train [115][1630/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.027)	Loss 2.1614 (2.1558)	Entropy 0.63619 (0.63847)	Top-1 acc 74.219 (72.851)	Top-5 acc 87.500 (89.238)	lr 0.00013
Train [115][1640/3239]	Time 0.238 (0.630)	Data Time 0.001 (0.026)	Loss 2.1503 (2.1562)	Entropy 0.63613 (0.63845)	Top-1 acc 71.875 (72.842)	Top-5 acc 87.891 (89.229)	lr 0.00013
Train [115][1650/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.026)	Loss 2.1776 (2.1559)	Entropy 0.63608 (0.63844)	Top-1 acc 70.703 (72.843)	Top-5 acc 91.016 (89.236)	lr 0.00013
Train [115][1660/3239]	Time 2.546 (0.628)	Data Time 0.001 (0.026)	Loss 2.1628 (2.1560)	Entropy 0.63608 (0.63843)	Top-1 acc 71.875 (72.840)	Top-5 acc 90.234 (89.236)	lr 0.00013
Train [115][1670/3239]	Time 0.222 (0.625)	Data Time 0.001 (0.026)	Loss 2.1870 (2.1560)	Entropy 0.63607 (0.63841)	Top-1 acc 73.438 (72.840)	Top-5 acc 86.719 (89.234)	lr 0.00013
Train [115][1680/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.026)	Loss 2.0759 (2.1558)	Entropy 0.63606 (0.63840)	Top-1 acc 80.078 (72.851)	Top-5 acc 87.891 (89.235)	lr 0.00013
Train [115][1690/3239]	Time 0.237 (0.624)	Data Time 0.001 (0.026)	Loss 2.2968 (2.1557)	Entropy 0.63607 (0.63838)	Top-1 acc 69.141 (72.854)	Top-5 acc 83.984 (89.235)	lr 0.00013
Train [115][1700/3239]	Time 0.360 (0.623)	Data Time 0.002 (0.025)	Loss 2.0896 (2.1559)	Entropy 0.63602 (0.63837)	Top-1 acc 76.172 (72.853)	Top-5 acc 91.797 (89.231)	lr 0.00013
Train [115][1710/3239]	Time 0.259 (0.622)	Data Time 0.001 (0.025)	Loss 2.2458 (2.1559)	Entropy 0.63595 (0.63836)	Top-1 acc 71.484 (72.852)	Top-5 acc 87.500 (89.228)	lr 0.00013
Train [115][1720/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.025)	Loss 2.0308 (2.1557)	Entropy 0.63588 (0.63834)	Top-1 acc 76.562 (72.858)	Top-5 acc 91.016 (89.229)	lr 0.00013
Train [115][1730/3239]	Time 0.393 (0.653)	Data Time 0.003 (0.025)	Loss 1.9644 (2.1555)	Entropy 0.63575 (0.63833)	Top-1 acc 76.953 (72.861)	Top-5 acc 93.750 (89.236)	lr 0.00013
Train [115][1740/3239]	Time 0.422 (0.652)	Data Time 0.003 (0.025)	Loss 2.1215 (2.1552)	Entropy 0.63566 (0.63831)	Top-1 acc 75.781 (72.866)	Top-5 acc 88.672 (89.241)	lr 0.00013
Train [115][1750/3239]	Time 0.308 (0.652)	Data Time 0.002 (0.025)	Loss 2.2128 (2.1553)	Entropy 0.63560 (0.63830)	Top-1 acc 73.047 (72.873)	Top-5 acc 89.062 (89.237)	lr 0.00013
Train [115][1760/3239]	Time 0.241 (0.651)	Data Time 0.001 (0.025)	Loss 2.1511 (2.1555)	Entropy 0.63567 (0.63828)	Top-1 acc 72.656 (72.869)	Top-5 acc 89.062 (89.233)	lr 0.00013
Train [115][1770/3239]	Time 2.616 (0.650)	Data Time 0.001 (0.025)	Loss 2.0282 (2.1551)	Entropy 0.63567 (0.63827)	Top-1 acc 77.734 (72.881)	Top-5 acc 92.188 (89.237)	lr 0.00013
Train [115][1780/3239]	Time 0.262 (0.648)	Data Time 0.001 (0.024)	Loss 2.1075 (2.1550)	Entropy 0.63556 (0.63825)	Top-1 acc 81.250 (72.890)	Top-5 acc 87.500 (89.240)	lr 0.00013
Train [115][1790/3239]	Time 0.233 (0.647)	Data Time 0.001 (0.024)	Loss 2.1272 (2.1550)	Entropy 0.63549 (0.63824)	Top-1 acc 70.312 (72.893)	Top-5 acc 92.188 (89.239)	lr 0.00013
Train [115][1800/3239]	Time 0.248 (0.646)	Data Time 0.001 (0.024)	Loss 2.2204 (2.1553)	Entropy 0.63551 (0.63822)	Top-1 acc 69.922 (72.883)	Top-5 acc 88.672 (89.235)	lr 0.00013
Train [115][1810/3239]	Time 0.264 (0.645)	Data Time 0.002 (0.024)	Loss 2.1380 (2.1552)	Entropy 0.63547 (0.63821)	Top-1 acc 74.609 (72.883)	Top-5 acc 91.797 (89.240)	lr 0.00013
Train [115][1820/3239]	Time 0.255 (0.644)	Data Time 0.002 (0.024)	Loss 2.2472 (2.1552)	Entropy 0.63548 (0.63819)	Top-1 acc 70.312 (72.885)	Top-5 acc 85.547 (89.237)	lr 0.00013
Train [115][1830/3239]	Time 0.244 (0.644)	Data Time 0.001 (0.024)	Loss 2.0756 (2.1550)	Entropy 0.63550 (0.63818)	Top-1 acc 75.000 (72.890)	Top-5 acc 90.625 (89.241)	lr 0.00013
Train [115][1840/3239]	Time 0.248 (0.643)	Data Time 0.001 (0.024)	Loss 2.0735 (2.1551)	Entropy 0.63553 (0.63816)	Top-1 acc 70.703 (72.884)	Top-5 acc 89.062 (89.235)	lr 0.00013
Train [115][1850/3239]	Time 0.237 (0.642)	Data Time 0.001 (0.024)	Loss 2.1665 (2.1548)	Entropy 0.63550 (0.63815)	Top-1 acc 75.000 (72.893)	Top-5 acc 86.328 (89.237)	lr 0.00013
Train [115][1860/3239]	Time 0.246 (0.641)	Data Time 0.002 (0.023)	Loss 1.9896 (2.1546)	Entropy 0.63550 (0.63813)	Top-1 acc 76.562 (72.896)	Top-5 acc 92.188 (89.241)	lr 0.00013
Train [115][1870/3239]	Time 0.261 (0.640)	Data Time 0.001 (0.023)	Loss 2.1896 (2.1548)	Entropy 0.63547 (0.63812)	Top-1 acc 72.656 (72.894)	Top-5 acc 87.891 (89.239)	lr 0.00013
Train [115][1880/3239]	Time 2.529 (0.640)	Data Time 0.003 (0.023)	Loss 2.2601 (2.1548)	Entropy 0.63547 (0.63811)	Top-1 acc 73.047 (72.897)	Top-5 acc 87.500 (89.236)	lr 0.00013
Train [115][1890/3239]	Time 0.232 (0.637)	Data Time 0.002 (0.023)	Loss 2.2573 (2.1549)	Entropy 0.63544 (0.63809)	Top-1 acc 70.703 (72.897)	Top-5 acc 87.109 (89.237)	lr 0.00013
Train [115][1900/3239]	Time 0.221 (0.637)	Data Time 0.001 (0.023)	Loss 2.1385 (2.1550)	Entropy 0.63542 (0.63808)	Top-1 acc 73.828 (72.897)	Top-5 acc 90.625 (89.235)	lr 0.00013
Train [115][1910/3239]	Time 0.238 (0.636)	Data Time 0.002 (0.023)	Loss 2.2103 (2.1549)	Entropy 0.63547 (0.63806)	Top-1 acc 71.875 (72.903)	Top-5 acc 89.453 (89.237)	lr 0.00013
Train [115][1920/3239]	Time 0.239 (0.635)	Data Time 0.001 (0.023)	Loss 2.1206 (2.1547)	Entropy 0.63547 (0.63805)	Top-1 acc 73.438 (72.909)	Top-5 acc 87.500 (89.241)	lr 0.00013
Train [115][1930/3239]	Time 0.221 (0.634)	Data Time 0.001 (0.023)	Loss 2.0523 (2.1545)	Entropy 0.63551 (0.63804)	Top-1 acc 77.734 (72.914)	Top-5 acc 89.062 (89.241)	lr 0.00012
Train [115][1940/3239]	Time 0.230 (0.634)	Data Time 0.001 (0.023)	Loss 2.2066 (2.1549)	Entropy 0.63548 (0.63802)	Top-1 acc 71.875 (72.903)	Top-5 acc 87.500 (89.235)	lr 0.00012
Train [115][1950/3239]	Time 0.330 (0.633)	Data Time 0.001 (0.022)	Loss 2.1720 (2.1547)	Entropy 0.63551 (0.63801)	Top-1 acc 70.312 (72.904)	Top-5 acc 90.234 (89.239)	lr 0.00012
Train [115][1960/3239]	Time 0.254 (0.632)	Data Time 0.002 (0.022)	Loss 2.1966 (2.1549)	Entropy 0.63553 (0.63800)	Top-1 acc 73.438 (72.900)	Top-5 acc 91.016 (89.238)	lr 0.00012
Train [115][1970/3239]	Time 0.237 (0.631)	Data Time 0.001 (0.022)	Loss 2.1452 (2.1548)	Entropy 0.63553 (0.63799)	Top-1 acc 76.172 (72.900)	Top-5 acc 87.500 (89.237)	lr 0.00012
Train [115][1980/3239]	Time 0.252 (0.631)	Data Time 0.001 (0.022)	Loss 2.0023 (2.1548)	Entropy 0.63555 (0.63797)	Top-1 acc 76.172 (72.900)	Top-5 acc 91.016 (89.236)	lr 0.00012
Train [115][1990/3239]	Time 2.739 (0.630)	Data Time 0.002 (0.022)	Loss 2.1958 (2.1548)	Entropy 0.63555 (0.63796)	Top-1 acc 70.703 (72.898)	Top-5 acc 88.281 (89.237)	lr 0.00012
Train [115][2000/3239]	Time 0.237 (0.628)	Data Time 0.001 (0.022)	Loss 2.1517 (2.1547)	Entropy 0.63551 (0.63795)	Top-1 acc 71.094 (72.902)	Top-5 acc 90.625 (89.239)	lr 0.00012
Train [115][2010/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.022)	Loss 2.2089 (2.1547)	Entropy 0.63539 (0.63794)	Top-1 acc 69.922 (72.898)	Top-5 acc 86.719 (89.239)	lr 0.00012
Train [115][2020/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.022)	Loss 2.1997 (2.1548)	Entropy 0.63540 (0.63792)	Top-1 acc 72.656 (72.899)	Top-5 acc 89.844 (89.236)	lr 0.00012
Train [115][2030/3239]	Time 0.340 (0.626)	Data Time 0.002 (0.022)	Loss 2.3219 (2.1550)	Entropy 0.63523 (0.63791)	Top-1 acc 67.969 (72.892)	Top-5 acc 88.281 (89.233)	lr 0.00012
Train [115][2040/3239]	Time 0.237 (0.625)	Data Time 0.001 (0.022)	Loss 2.2630 (2.1548)	Entropy 0.63507 (0.63790)	Top-1 acc 72.656 (72.898)	Top-5 acc 85.938 (89.235)	lr 0.00012
Train [115][2050/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.021)	Loss 2.1009 (2.1548)	Entropy 0.63505 (0.63788)	Top-1 acc 72.656 (72.901)	Top-5 acc 91.016 (89.236)	lr 0.00012
Train [115][2060/3239]	Time 0.239 (0.624)	Data Time 0.001 (0.021)	Loss 2.2249 (2.1548)	Entropy 0.63542 (0.63787)	Top-1 acc 69.922 (72.901)	Top-5 acc 87.500 (89.238)	lr 0.00012
Train [115][2070/3239]	Time 0.329 (0.623)	Data Time 0.001 (0.021)	Loss 2.1037 (2.1548)	Entropy 0.63534 (0.63786)	Top-1 acc 75.000 (72.901)	Top-5 acc 90.234 (89.240)	lr 0.00012
Train [115][2080/3239]	Time 0.269 (0.623)	Data Time 0.001 (0.021)	Loss 2.0426 (2.1548)	Entropy 0.63534 (0.63785)	Top-1 acc 79.297 (72.902)	Top-5 acc 92.188 (89.239)	lr 0.00012
Train [115][2090/3239]	Time 0.373 (0.647)	Data Time 0.004 (0.021)	Loss 2.0120 (2.1551)	Entropy 0.63495 (0.63783)	Top-1 acc 78.516 (72.894)	Top-5 acc 91.797 (89.234)	lr 0.00012
Train [115][2100/3239]	Time 2.585 (0.646)	Data Time 0.002 (0.021)	Loss 2.2553 (2.1553)	Entropy 0.63495 (0.63782)	Top-1 acc 69.531 (72.888)	Top-5 acc 87.500 (89.229)	lr 0.00012
Train [115][2110/3239]	Time 0.256 (0.644)	Data Time 0.002 (0.021)	Loss 2.0740 (2.1555)	Entropy 0.63497 (0.63781)	Top-1 acc 75.781 (72.883)	Top-5 acc 89.453 (89.229)	lr 0.00012
Train [115][2120/3239]	Time 0.233 (0.644)	Data Time 0.001 (0.021)	Loss 2.2571 (2.1556)	Entropy 0.63490 (0.63779)	Top-1 acc 71.094 (72.878)	Top-5 acc 89.062 (89.229)	lr 0.00012
Train [115][2130/3239]	Time 0.241 (0.643)	Data Time 0.001 (0.021)	Loss 2.1489 (2.1555)	Entropy 0.63484 (0.63778)	Top-1 acc 72.266 (72.880)	Top-5 acc 89.844 (89.228)	lr 0.00012
Train [115][2140/3239]	Time 0.220 (0.642)	Data Time 0.001 (0.021)	Loss 2.0709 (2.1557)	Entropy 0.63490 (0.63777)	Top-1 acc 74.219 (72.874)	Top-5 acc 89.453 (89.224)	lr 0.00012
Train [115][2150/3239]	Time 0.237 (0.642)	Data Time 0.002 (0.021)	Loss 1.9771 (2.1558)	Entropy 0.63487 (0.63775)	Top-1 acc 76.562 (72.871)	Top-5 acc 93.750 (89.221)	lr 0.00012
Train [115][2160/3239]	Time 0.284 (0.641)	Data Time 0.001 (0.020)	Loss 1.9471 (2.1558)	Entropy 0.63481 (0.63774)	Top-1 acc 75.781 (72.872)	Top-5 acc 93.359 (89.221)	lr 0.00012
Train [115][2170/3239]	Time 0.224 (0.640)	Data Time 0.001 (0.020)	Loss 2.1577 (2.1559)	Entropy 0.63482 (0.63773)	Top-1 acc 69.531 (72.871)	Top-5 acc 89.844 (89.220)	lr 0.00012
Train [115][2180/3239]	Time 0.240 (0.639)	Data Time 0.001 (0.020)	Loss 2.3249 (2.1558)	Entropy 0.63482 (0.63771)	Top-1 acc 68.750 (72.874)	Top-5 acc 85.547 (89.224)	lr 0.00012
Train [115][2190/3239]	Time 0.259 (0.639)	Data Time 0.001 (0.020)	Loss 2.2050 (2.1558)	Entropy 0.63488 (0.63770)	Top-1 acc 71.875 (72.871)	Top-5 acc 87.500 (89.224)	lr 0.00012
Train [115][2200/3239]	Time 0.327 (0.638)	Data Time 0.002 (0.020)	Loss 2.1272 (2.1555)	Entropy 0.63482 (0.63769)	Top-1 acc 73.438 (72.879)	Top-5 acc 90.234 (89.227)	lr 0.00012
Train [115][2210/3239]	Time 2.518 (0.637)	Data Time 0.001 (0.020)	Loss 2.0974 (2.1556)	Entropy 0.63482 (0.63767)	Top-1 acc 70.312 (72.873)	Top-5 acc 92.578 (89.227)	lr 0.00012
Train [115][2220/3239]	Time 0.230 (0.636)	Data Time 0.001 (0.020)	Loss 2.2654 (2.1555)	Entropy 0.63479 (0.63766)	Top-1 acc 69.531 (72.876)	Top-5 acc 86.328 (89.227)	lr 0.00012
Train [115][2230/3239]	Time 0.287 (0.635)	Data Time 0.001 (0.020)	Loss 2.0084 (2.1553)	Entropy 0.63479 (0.63765)	Top-1 acc 74.219 (72.879)	Top-5 acc 93.359 (89.233)	lr 0.00012
Train [115][2240/3239]	Time 0.328 (0.634)	Data Time 0.001 (0.020)	Loss 2.0318 (2.1553)	Entropy 0.63479 (0.63763)	Top-1 acc 74.219 (72.882)	Top-5 acc 92.188 (89.232)	lr 0.00012
Train [115][2250/3239]	Time 0.242 (0.634)	Data Time 0.001 (0.020)	Loss 2.2075 (2.1553)	Entropy 0.63476 (0.63762)	Top-1 acc 72.656 (72.877)	Top-5 acc 89.844 (89.231)	lr 0.00012
Train [115][2260/3239]	Time 0.257 (0.633)	Data Time 0.002 (0.020)	Loss 2.3708 (2.1557)	Entropy 0.63482 (0.63761)	Top-1 acc 67.578 (72.867)	Top-5 acc 84.375 (89.226)	lr 0.00012
Train [115][2270/3239]	Time 0.232 (0.632)	Data Time 0.001 (0.020)	Loss 2.1593 (2.1555)	Entropy 0.63478 (0.63760)	Top-1 acc 70.703 (72.873)	Top-5 acc 88.281 (89.230)	lr 0.00012
Train [115][2280/3239]	Time 0.347 (0.632)	Data Time 0.001 (0.019)	Loss 2.0946 (2.1554)	Entropy 0.63465 (0.63758)	Top-1 acc 72.656 (72.873)	Top-5 acc 90.625 (89.232)	lr 0.00012
Train [115][2290/3239]	Time 0.256 (0.631)	Data Time 0.002 (0.019)	Loss 2.1050 (2.1554)	Entropy 0.63470 (0.63757)	Top-1 acc 73.438 (72.874)	Top-5 acc 89.062 (89.234)	lr 0.00012
Train [115][2300/3239]	Time 0.231 (0.630)	Data Time 0.001 (0.019)	Loss 2.1709 (2.1553)	Entropy 0.63505 (0.63756)	Top-1 acc 72.656 (72.877)	Top-5 acc 88.672 (89.236)	lr 0.00012
Train [115][2310/3239]	Time 0.258 (0.630)	Data Time 0.001 (0.019)	Loss 2.2541 (2.1556)	Entropy 0.63502 (0.63755)	Top-1 acc 72.656 (72.876)	Top-5 acc 85.938 (89.228)	lr 0.00012
Train [115][2320/3239]	Time 2.672 (0.629)	Data Time 0.002 (0.019)	Loss 2.1185 (2.1557)	Entropy 0.63502 (0.63754)	Top-1 acc 73.047 (72.873)	Top-5 acc 89.844 (89.225)	lr 0.00012
Train [115][2330/3239]	Time 0.227 (0.628)	Data Time 0.001 (0.019)	Loss 2.0908 (2.1554)	Entropy 0.63502 (0.63753)	Top-1 acc 73.828 (72.881)	Top-5 acc 90.234 (89.228)	lr 0.00012
Train [115][2340/3239]	Time 0.246 (0.627)	Data Time 0.001 (0.019)	Loss 2.0148 (2.1553)	Entropy 0.63502 (0.63752)	Top-1 acc 76.172 (72.882)	Top-5 acc 91.797 (89.231)	lr 0.00012
Train [115][2350/3239]	Time 0.235 (0.626)	Data Time 0.001 (0.019)	Loss 2.1922 (2.1554)	Entropy 0.63498 (0.63751)	Top-1 acc 69.531 (72.878)	Top-5 acc 88.281 (89.233)	lr 0.00012
Train [115][2360/3239]	Time 0.263 (0.626)	Data Time 0.001 (0.019)	Loss 2.2419 (2.1554)	Entropy 0.63488 (0.63749)	Top-1 acc 67.969 (72.874)	Top-5 acc 87.891 (89.233)	lr 0.00012
Train [115][2370/3239]	Time 0.237 (0.625)	Data Time 0.001 (0.019)	Loss 1.9935 (2.1555)	Entropy 0.63494 (0.63748)	Top-1 acc 79.688 (72.872)	Top-5 acc 91.016 (89.228)	lr 0.00012
Train [115][2380/3239]	Time 0.218 (0.624)	Data Time 0.001 (0.019)	Loss 2.3176 (2.1558)	Entropy 0.63492 (0.63747)	Top-1 acc 69.141 (72.863)	Top-5 acc 86.328 (89.226)	lr 0.00012
Train [115][2390/3239]	Time 0.219 (0.624)	Data Time 0.001 (0.019)	Loss 2.1991 (2.1558)	Entropy 0.63490 (0.63746)	Top-1 acc 72.266 (72.864)	Top-5 acc 88.281 (89.225)	lr 0.00012
Train [115][2400/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.019)	Loss 2.1731 (2.1557)	Entropy 0.63485 (0.63745)	Top-1 acc 73.047 (72.870)	Top-5 acc 87.891 (89.226)	lr 0.00012
Train [115][2410/3239]	Time 0.234 (0.623)	Data Time 0.001 (0.018)	Loss 1.9640 (2.1558)	Entropy 0.63487 (0.63744)	Top-1 acc 76.953 (72.866)	Top-5 acc 92.188 (89.223)	lr 0.00012
Train [115][2420/3239]	Time 0.321 (0.622)	Data Time 0.001 (0.018)	Loss 2.1391 (2.1558)	Entropy 0.63484 (0.63743)	Top-1 acc 76.562 (72.866)	Top-5 acc 88.281 (89.226)	lr 0.00012
Train [115][2430/3239]	Time 2.488 (0.622)	Data Time 0.001 (0.018)	Loss 2.2420 (2.1558)	Entropy 0.63484 (0.63742)	Top-1 acc 73.047 (72.867)	Top-5 acc 88.281 (89.226)	lr 0.00012
Train [115][2440/3239]	Time 0.229 (0.620)	Data Time 0.001 (0.018)	Loss 2.1996 (2.1558)	Entropy 0.63488 (0.63741)	Top-1 acc 71.484 (72.865)	Top-5 acc 89.062 (89.227)	lr 0.00012
Train [115][2450/3239]	Time 0.227 (0.619)	Data Time 0.001 (0.018)	Loss 2.0237 (2.1557)	Entropy 0.63467 (0.63740)	Top-1 acc 74.609 (72.864)	Top-5 acc 91.016 (89.230)	lr 0.00012
Train [115][2460/3239]	Time 0.250 (0.641)	Data Time 0.002 (0.018)	Loss 2.1760 (2.1557)	Entropy 0.63450 (0.63739)	Top-1 acc 71.094 (72.866)	Top-5 acc 89.062 (89.229)	lr 0.00012
Train [115][2470/3239]	Time 0.241 (0.640)	Data Time 0.002 (0.018)	Loss 2.2233 (2.1557)	Entropy 0.63443 (0.63737)	Top-1 acc 71.484 (72.868)	Top-5 acc 88.281 (89.230)	lr 0.00012
Train [115][2480/3239]	Time 0.254 (0.640)	Data Time 0.002 (0.018)	Loss 2.1000 (2.1555)	Entropy 0.63445 (0.63736)	Top-1 acc 76.172 (72.875)	Top-5 acc 90.234 (89.233)	lr 0.00012
Train [115][2490/3239]	Time 0.311 (0.639)	Data Time 0.001 (0.018)	Loss 2.1270 (2.1553)	Entropy 0.63437 (0.63735)	Top-1 acc 73.438 (72.881)	Top-5 acc 89.844 (89.233)	lr 0.00012
Train [115][2500/3239]	Time 0.232 (0.638)	Data Time 0.001 (0.018)	Loss 2.2442 (2.1554)	Entropy 0.63436 (0.63734)	Top-1 acc 72.656 (72.882)	Top-5 acc 85.938 (89.230)	lr 0.00012
Train [115][2510/3239]	Time 0.246 (0.638)	Data Time 0.002 (0.018)	Loss 2.1731 (2.1552)	Entropy 0.63431 (0.63733)	Top-1 acc 72.656 (72.885)	Top-5 acc 89.844 (89.236)	lr 0.00012
Train [115][2520/3239]	Time 0.239 (0.637)	Data Time 0.001 (0.018)	Loss 2.1245 (2.1552)	Entropy 0.63429 (0.63731)	Top-1 acc 71.484 (72.884)	Top-5 acc 89.062 (89.235)	lr 0.00012
Train [115][2530/3239]	Time 0.257 (0.637)	Data Time 0.001 (0.018)	Loss 2.0660 (2.1552)	Entropy 0.63428 (0.63730)	Top-1 acc 77.344 (72.882)	Top-5 acc 90.234 (89.238)	lr 0.00012
Train [115][2540/3239]	Time 2.486 (0.636)	Data Time 0.001 (0.018)	Loss 2.2470 (2.1553)	Entropy 0.63428 (0.63729)	Top-1 acc 70.312 (72.879)	Top-5 acc 87.500 (89.236)	lr 0.00012
Train [115][2550/3239]	Time 0.241 (0.635)	Data Time 0.002 (0.018)	Loss 2.2786 (2.1553)	Entropy 0.63423 (0.63728)	Top-1 acc 69.922 (72.873)	Top-5 acc 87.891 (89.238)	lr 0.00012
Train [115][2560/3239]	Time 0.237 (0.634)	Data Time 0.002 (0.018)	Loss 2.1642 (2.1552)	Entropy 0.63425 (0.63727)	Top-1 acc 72.656 (72.874)	Top-5 acc 89.453 (89.241)	lr 0.00012
Train [115][2570/3239]	Time 0.385 (0.634)	Data Time 0.001 (0.017)	Loss 2.1973 (2.1553)	Entropy 0.63423 (0.63726)	Top-1 acc 71.484 (72.872)	Top-5 acc 85.547 (89.238)	lr 0.00012
Train [115][2580/3239]	Time 0.254 (0.633)	Data Time 0.001 (0.017)	Loss 2.0795 (2.1553)	Entropy 0.63420 (0.63724)	Top-1 acc 75.391 (72.873)	Top-5 acc 89.062 (89.235)	lr 0.00012
Train [115][2590/3239]	Time 0.264 (0.633)	Data Time 0.001 (0.017)	Loss 2.1435 (2.1553)	Entropy 0.63414 (0.63723)	Top-1 acc 73.828 (72.877)	Top-5 acc 89.844 (89.236)	lr 0.00012
Train [115][2600/3239]	Time 0.233 (0.632)	Data Time 0.001 (0.017)	Loss 1.9906 (2.1552)	Entropy 0.63409 (0.63722)	Top-1 acc 75.000 (72.878)	Top-5 acc 93.359 (89.240)	lr 0.00012
Train [115][2610/3239]	Time 0.387 (0.632)	Data Time 0.001 (0.017)	Loss 2.2227 (2.1553)	Entropy 0.63404 (0.63721)	Top-1 acc 71.094 (72.877)	Top-5 acc 88.281 (89.240)	lr 0.00012
Train [115][2620/3239]	Time 0.294 (0.631)	Data Time 0.001 (0.017)	Loss 2.0179 (2.1552)	Entropy 0.63403 (0.63720)	Top-1 acc 75.000 (72.878)	Top-5 acc 91.797 (89.240)	lr 0.00012
Train [115][2630/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.017)	Loss 2.3000 (2.1553)	Entropy 0.63411 (0.63718)	Top-1 acc 68.750 (72.877)	Top-5 acc 86.719 (89.240)	lr 0.00012
Train [115][2640/3239]	Time 0.239 (0.630)	Data Time 0.001 (0.017)	Loss 2.1818 (2.1552)	Entropy 0.63412 (0.63717)	Top-1 acc 72.266 (72.877)	Top-5 acc 88.281 (89.244)	lr 0.00011
Train [115][2650/3239]	Time 0.397 (0.630)	Data Time 0.001 (0.017)	Loss 2.2103 (2.1554)	Entropy 0.63420 (0.63716)	Top-1 acc 67.969 (72.867)	Top-5 acc 85.547 (89.239)	lr 0.00011
Train [115][2660/3239]	Time 0.276 (0.629)	Data Time 0.001 (0.017)	Loss 2.2738 (2.1555)	Entropy 0.63410 (0.63715)	Top-1 acc 71.484 (72.868)	Top-5 acc 89.062 (89.237)	lr 0.00011
Train [115][2670/3239]	Time 0.237 (0.628)	Data Time 0.001 (0.017)	Loss 2.1139 (2.1554)	Entropy 0.63401 (0.63714)	Top-1 acc 74.219 (72.872)	Top-5 acc 88.281 (89.240)	lr 0.00011
Train [115][2680/3239]	Time 0.243 (0.628)	Data Time 0.001 (0.017)	Loss 2.2742 (2.1555)	Entropy 0.63388 (0.63713)	Top-1 acc 70.703 (72.868)	Top-5 acc 86.328 (89.237)	lr 0.00011
Train [115][2690/3239]	Time 0.370 (0.628)	Data Time 0.001 (0.017)	Loss 2.0379 (2.1556)	Entropy 0.63383 (0.63711)	Top-1 acc 75.391 (72.865)	Top-5 acc 92.188 (89.237)	lr 0.00011
Train [115][2700/3239]	Time 0.229 (0.627)	Data Time 0.002 (0.017)	Loss 2.2373 (2.1558)	Entropy 0.63379 (0.63710)	Top-1 acc 70.703 (72.860)	Top-5 acc 86.719 (89.232)	lr 0.00011
Train [115][2710/3239]	Time 0.284 (0.627)	Data Time 0.001 (0.017)	Loss 2.1425 (2.1558)	Entropy 0.63381 (0.63709)	Top-1 acc 73.438 (72.858)	Top-5 acc 89.453 (89.232)	lr 0.00011
Train [115][2720/3239]	Time 0.236 (0.626)	Data Time 0.001 (0.017)	Loss 2.2678 (2.1558)	Entropy 0.63373 (0.63708)	Top-1 acc 69.141 (72.861)	Top-5 acc 88.672 (89.234)	lr 0.00011
Train [115][2730/3239]	Time 0.319 (0.626)	Data Time 0.002 (0.017)	Loss 2.1820 (2.1557)	Entropy 0.63372 (0.63707)	Top-1 acc 73.047 (72.863)	Top-5 acc 86.719 (89.234)	lr 0.00011
Train [115][2740/3239]	Time 0.254 (0.625)	Data Time 0.001 (0.017)	Loss 2.3108 (2.1557)	Entropy 0.63368 (0.63705)	Top-1 acc 70.703 (72.862)	Top-5 acc 86.719 (89.234)	lr 0.00011
Train [115][2750/3239]	Time 0.237 (0.625)	Data Time 0.001 (0.016)	Loss 2.0905 (2.1556)	Entropy 0.63376 (0.63704)	Top-1 acc 71.094 (72.868)	Top-5 acc 90.625 (89.236)	lr 0.00011
Train [115][2760/3239]	Time 0.294 (0.624)	Data Time 0.001 (0.016)	Loss 2.0429 (2.1556)	Entropy 0.63372 (0.63703)	Top-1 acc 77.734 (72.868)	Top-5 acc 90.625 (89.237)	lr 0.00011
Train [115][2770/3239]	Time 0.240 (0.624)	Data Time 0.001 (0.016)	Loss 2.1683 (2.1556)	Entropy 0.63367 (0.63702)	Top-1 acc 73.828 (72.868)	Top-5 acc 87.891 (89.238)	lr 0.00011
Train [115][2780/3239]	Time 0.245 (0.623)	Data Time 0.001 (0.016)	Loss 2.1630 (2.1554)	Entropy 0.63378 (0.63700)	Top-1 acc 71.875 (72.875)	Top-5 acc 86.328 (89.237)	lr 0.00011
Train [115][2790/3239]	Time 0.243 (0.623)	Data Time 0.002 (0.016)	Loss 2.2260 (2.1553)	Entropy 0.63382 (0.63699)	Top-1 acc 69.922 (72.878)	Top-5 acc 89.453 (89.239)	lr 0.00011
Train [115][2800/3239]	Time 0.332 (0.640)	Data Time 0.004 (0.016)	Loss 2.2063 (2.1553)	Entropy 0.63375 (0.63698)	Top-1 acc 73.438 (72.876)	Top-5 acc 85.938 (89.237)	lr 0.00011
Train [115][2810/3239]	Time 0.348 (0.640)	Data Time 0.003 (0.016)	Loss 2.1832 (2.1553)	Entropy 0.63371 (0.63697)	Top-1 acc 73.438 (72.874)	Top-5 acc 88.281 (89.238)	lr 0.00011
Train [115][2820/3239]	Time 0.240 (0.640)	Data Time 0.001 (0.016)	Loss 2.1015 (2.1552)	Entropy 0.63365 (0.63696)	Top-1 acc 74.609 (72.873)	Top-5 acc 91.016 (89.239)	lr 0.00011
Train [115][2830/3239]	Time 0.264 (0.639)	Data Time 0.001 (0.016)	Loss 2.0850 (2.1551)	Entropy 0.63368 (0.63695)	Top-1 acc 71.094 (72.875)	Top-5 acc 91.797 (89.242)	lr 0.00011
Train [115][2840/3239]	Time 0.254 (0.639)	Data Time 0.002 (0.016)	Loss 2.1297 (2.1550)	Entropy 0.63363 (0.63694)	Top-1 acc 73.438 (72.877)	Top-5 acc 89.844 (89.245)	lr 0.00011
Train [115][2850/3239]	Time 0.347 (0.638)	Data Time 0.002 (0.016)	Loss 2.0444 (2.1550)	Entropy 0.63357 (0.63692)	Top-1 acc 76.172 (72.878)	Top-5 acc 90.234 (89.246)	lr 0.00011
Train [115][2860/3239]	Time 0.229 (0.638)	Data Time 0.001 (0.016)	Loss 2.1800 (2.1551)	Entropy 0.63359 (0.63691)	Top-1 acc 73.828 (72.879)	Top-5 acc 89.062 (89.246)	lr 0.00011
Train [115][2870/3239]	Time 0.302 (0.637)	Data Time 0.001 (0.016)	Loss 2.1294 (2.1551)	Entropy 0.63364 (0.63690)	Top-1 acc 70.703 (72.877)	Top-5 acc 91.406 (89.245)	lr 0.00011
Train [115][2880/3239]	Time 0.238 (0.637)	Data Time 0.001 (0.016)	Loss 2.2766 (2.1551)	Entropy 0.63359 (0.63689)	Top-1 acc 70.703 (72.876)	Top-5 acc 85.938 (89.244)	lr 0.00011
Train [115][2890/3239]	Time 0.280 (0.636)	Data Time 0.002 (0.016)	Loss 2.0908 (2.1552)	Entropy 0.63346 (0.63688)	Top-1 acc 75.391 (72.872)	Top-5 acc 91.016 (89.243)	lr 0.00011
Train [115][2900/3239]	Time 0.243 (0.636)	Data Time 0.001 (0.016)	Loss 2.1451 (2.1553)	Entropy 0.63346 (0.63687)	Top-1 acc 73.828 (72.870)	Top-5 acc 89.844 (89.241)	lr 0.00011
Train [115][2910/3239]	Time 0.285 (0.636)	Data Time 0.001 (0.016)	Loss 2.0958 (2.1552)	Entropy 0.63343 (0.63685)	Top-1 acc 71.484 (72.874)	Top-5 acc 91.406 (89.243)	lr 0.00011
Train [115][2920/3239]	Time 0.218 (0.635)	Data Time 0.001 (0.016)	Loss 2.2028 (2.1553)	Entropy 0.63330 (0.63684)	Top-1 acc 73.828 (72.872)	Top-5 acc 87.891 (89.239)	lr 0.00011
Train [115][2930/3239]	Time 0.277 (0.635)	Data Time 0.001 (0.016)	Loss 1.9954 (2.1552)	Entropy 0.63330 (0.63683)	Top-1 acc 78.516 (72.876)	Top-5 acc 89.453 (89.243)	lr 0.00011
Train [115][2940/3239]	Time 0.233 (0.634)	Data Time 0.001 (0.016)	Loss 2.1036 (2.1551)	Entropy 0.63329 (0.63682)	Top-1 acc 73.438 (72.877)	Top-5 acc 89.062 (89.244)	lr 0.00011
Train [115][2950/3239]	Time 0.346 (0.634)	Data Time 0.001 (0.015)	Loss 2.1248 (2.1551)	Entropy 0.63334 (0.63681)	Top-1 acc 73.438 (72.876)	Top-5 acc 89.844 (89.244)	lr 0.00011
Train [115][2960/3239]	Time 0.242 (0.633)	Data Time 0.001 (0.015)	Loss 2.2094 (2.1552)	Entropy 0.63333 (0.63680)	Top-1 acc 69.141 (72.872)	Top-5 acc 87.109 (89.242)	lr 0.00011
Train [115][2970/3239]	Time 0.249 (0.633)	Data Time 0.001 (0.015)	Loss 2.1726 (2.1553)	Entropy 0.63332 (0.63678)	Top-1 acc 70.703 (72.867)	Top-5 acc 87.500 (89.241)	lr 0.00011
Train [115][2980/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.015)	Loss 2.1838 (2.1553)	Entropy 0.63330 (0.63677)	Top-1 acc 74.219 (72.867)	Top-5 acc 89.844 (89.240)	lr 0.00011
Train [115][2990/3239]	Time 0.226 (0.632)	Data Time 0.001 (0.015)	Loss 2.0956 (2.1552)	Entropy 0.63332 (0.63676)	Top-1 acc 73.828 (72.868)	Top-5 acc 92.578 (89.244)	lr 0.00011
Train [115][3000/3239]	Time 0.229 (0.631)	Data Time 0.001 (0.015)	Loss 2.1878 (2.1550)	Entropy 0.63334 (0.63675)	Top-1 acc 72.656 (72.872)	Top-5 acc 88.672 (89.246)	lr 0.00011
Train [115][3010/3239]	Time 0.214 (0.631)	Data Time 0.001 (0.015)	Loss 2.1748 (2.1550)	Entropy 0.63334 (0.63674)	Top-1 acc 72.656 (72.873)	Top-5 acc 90.234 (89.248)	lr 0.00011
Train [115][3020/3239]	Time 0.235 (0.631)	Data Time 0.001 (0.015)	Loss 2.0444 (2.1549)	Entropy 0.63334 (0.63673)	Top-1 acc 71.875 (72.871)	Top-5 acc 92.969 (89.249)	lr 0.00011
Train [115][3030/3239]	Time 0.296 (0.630)	Data Time 0.001 (0.015)	Loss 2.0208 (2.1549)	Entropy 0.63325 (0.63671)	Top-1 acc 78.125 (72.872)	Top-5 acc 90.625 (89.251)	lr 0.00011
Train [115][3040/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.015)	Loss 2.0669 (2.1548)	Entropy 0.63313 (0.63670)	Top-1 acc 75.391 (72.875)	Top-5 acc 91.016 (89.253)	lr 0.00011
Train [115][3050/3239]	Time 0.271 (0.629)	Data Time 0.001 (0.015)	Loss 2.1508 (2.1548)	Entropy 0.63313 (0.63669)	Top-1 acc 73.047 (72.878)	Top-5 acc 92.188 (89.254)	lr 0.00011
Train [115][3060/3239]	Time 0.369 (0.629)	Data Time 0.001 (0.015)	Loss 2.1740 (2.1548)	Entropy 0.63319 (0.63668)	Top-1 acc 70.703 (72.880)	Top-5 acc 89.844 (89.254)	lr 0.00011
Train [115][3070/3239]	Time 0.241 (0.628)	Data Time 0.001 (0.015)	Loss 2.0412 (2.1548)	Entropy 0.63313 (0.63667)	Top-1 acc 76.172 (72.880)	Top-5 acc 91.016 (89.254)	lr 0.00011
Train [115][3080/3239]	Time 0.314 (0.628)	Data Time 0.001 (0.015)	Loss 1.9808 (2.1547)	Entropy 0.63303 (0.63666)	Top-1 acc 75.000 (72.880)	Top-5 acc 93.359 (89.254)	lr 0.00011
Train [115][3090/3239]	Time 0.230 (0.628)	Data Time 0.001 (0.015)	Loss 2.1003 (2.1546)	Entropy 0.63313 (0.63665)	Top-1 acc 75.391 (72.882)	Top-5 acc 90.625 (89.256)	lr 0.00011
Train [115][3100/3239]	Time 0.424 (0.627)	Data Time 0.001 (0.015)	Loss 2.3145 (2.1546)	Entropy 0.63315 (0.63663)	Top-1 acc 66.797 (72.879)	Top-5 acc 88.281 (89.259)	lr 0.00011
Train [115][3110/3239]	Time 0.228 (0.627)	Data Time 0.001 (0.015)	Loss 2.0573 (2.1547)	Entropy 0.63312 (0.63662)	Top-1 acc 75.781 (72.875)	Top-5 acc 91.406 (89.257)	lr 0.00011
Train [115][3120/3239]	Time 0.259 (0.626)	Data Time 0.001 (0.015)	Loss 2.0485 (2.1547)	Entropy 0.63319 (0.63661)	Top-1 acc 73.438 (72.875)	Top-5 acc 92.969 (89.257)	lr 0.00011
Train [115][3130/3239]	Time 0.297 (0.642)	Data Time 0.004 (0.015)	Loss 2.2298 (2.1549)	Entropy 0.63312 (0.63660)	Top-1 acc 71.094 (72.869)	Top-5 acc 87.500 (89.253)	lr 0.00011
Train [115][3140/3239]	Time 0.338 (0.642)	Data Time 0.002 (0.015)	Loss 2.3032 (2.1549)	Entropy 0.63315 (0.63659)	Top-1 acc 69.922 (72.872)	Top-5 acc 84.766 (89.254)	lr 0.00011
Train [115][3150/3239]	Time 0.270 (0.642)	Data Time 0.001 (0.015)	Loss 2.2354 (2.1549)	Entropy 0.63314 (0.63658)	Top-1 acc 69.141 (72.871)	Top-5 acc 87.500 (89.252)	lr 0.00011
Train [115][3160/3239]	Time 0.270 (0.641)	Data Time 0.002 (0.015)	Loss 2.2838 (2.1549)	Entropy 0.63318 (0.63657)	Top-1 acc 69.922 (72.869)	Top-5 acc 87.109 (89.256)	lr 0.00011
Train [115][3170/3239]	Time 0.256 (0.641)	Data Time 0.001 (0.015)	Loss 2.2271 (2.1550)	Entropy 0.63311 (0.63656)	Top-1 acc 71.484 (72.864)	Top-5 acc 89.062 (89.254)	lr 0.00011
Train [115][3180/3239]	Time 0.374 (0.640)	Data Time 0.000 (0.015)	Loss 2.1991 (2.1552)	Entropy 0.63311 (0.63655)	Top-1 acc 71.875 (72.860)	Top-5 acc 87.500 (89.252)	lr 0.00011
Train [115][3190/3239]	Time 0.232 (0.640)	Data Time 0.000 (0.014)	Loss 2.2282 (2.1552)	Entropy 0.63309 (0.63654)	Top-1 acc 73.438 (72.860)	Top-5 acc 87.500 (89.253)	lr 0.00011
Train [115][3200/3239]	Time 0.232 (0.639)	Data Time 0.000 (0.014)	Loss 2.2303 (2.1553)	Entropy 0.63309 (0.63652)	Top-1 acc 70.312 (72.857)	Top-5 acc 89.062 (89.251)	lr 0.00011
Train [115][3210/3239]	Time 0.248 (0.639)	Data Time 0.000 (0.014)	Loss 2.1733 (2.1551)	Entropy 0.63300 (0.63651)	Top-1 acc 73.438 (72.865)	Top-5 acc 87.891 (89.254)	lr 0.00011
Train [115][3220/3239]	Time 0.327 (0.638)	Data Time 0.000 (0.014)	Loss 2.1069 (2.1550)	Entropy 0.63300 (0.63650)	Top-1 acc 71.484 (72.867)	Top-5 acc 90.234 (89.254)	lr 0.00011
Train [115][3230/3239]	Time 0.239 (0.638)	Data Time 0.000 (0.014)	Loss 2.1130 (2.1549)	Entropy 0.63301 (0.63649)	Top-1 acc 73.047 (72.873)	Top-5 acc 91.406 (89.256)	lr 0.00011
Train [115][3239/3239]	Time 2.398 (0.638)	Data Time 0.000 (0.014)	Loss 2.2879 (2.1549)	Entropy 0.63301 (0.63648)	Top-1 acc 70.370 (72.871)	Top-5 acc 86.420 (89.257)	lr 0.00011
==========Valid [115/120]	loss 1.196	top-1 acc 72.638 (72.745)	top-5 acc 89.982	Train top-1 72.871	top-5 89.257	Entropy 0.63301	Latency-None: 0.000ms	Flops: 544.27M
Train [116][0/3239]	Time 39.277 (39.277)	Data Time 38.533 (38.533)	Loss 2.1577 (2.1577)	Entropy 0.63307 (0.63307)	Top-1 acc 74.609 (74.609)	Top-5 acc 87.891 (87.891)	lr 0.00011
Train [116][10/3239]	Time 2.744 (4.153)	Data Time 0.002 (3.512)	Loss 2.2106 (2.1534)	Entropy 0.63307 (0.63307)	Top-1 acc 70.703 (72.372)	Top-5 acc 88.672 (89.737)	lr 0.00011
Train [116][20/3239]	Time 0.254 (2.295)	Data Time 0.001 (1.840)	Loss 2.2112 (2.1625)	Entropy 0.63315 (0.63311)	Top-1 acc 71.484 (72.154)	Top-5 acc 87.891 (89.490)	lr 0.00011
Train [116][30/3239]	Time 0.262 (1.714)	Data Time 0.002 (1.247)	Loss 2.1019 (2.1537)	Entropy 0.63312 (0.63311)	Top-1 acc 76.172 (72.807)	Top-5 acc 91.016 (89.226)	lr 0.00011
Train [116][40/3239]	Time 0.270 (1.420)	Data Time 0.001 (0.943)	Loss 2.1006 (2.1513)	Entropy 0.63303 (0.63310)	Top-1 acc 74.219 (72.752)	Top-5 acc 89.062 (89.263)	lr 0.00011
Train [116][50/3239]	Time 0.250 (1.241)	Data Time 0.001 (0.759)	Loss 2.0550 (2.1482)	Entropy 0.63310 (0.63309)	Top-1 acc 77.734 (73.093)	Top-5 acc 91.016 (89.223)	lr 0.00011
Train [116][60/3239]	Time 0.233 (1.120)	Data Time 0.001 (0.635)	Loss 2.0096 (2.1592)	Entropy 0.63308 (0.63309)	Top-1 acc 73.828 (72.906)	Top-5 acc 92.188 (89.043)	lr 0.00011
Train [116][70/3239]	Time 0.241 (1.035)	Data Time 0.002 (0.545)	Loss 2.0653 (2.1523)	Entropy 0.63311 (0.63309)	Top-1 acc 76.562 (72.953)	Top-5 acc 91.016 (89.327)	lr 0.00011
Train [116][80/3239]	Time 0.236 (0.969)	Data Time 0.001 (0.478)	Loss 2.0662 (2.1532)	Entropy 0.63312 (0.63309)	Top-1 acc 74.609 (72.960)	Top-5 acc 90.234 (89.275)	lr 0.00011
Train [116][90/3239]	Time 0.230 (0.914)	Data Time 0.002 (0.426)	Loss 2.0617 (2.1550)	Entropy 0.63307 (0.63310)	Top-1 acc 75.781 (72.854)	Top-5 acc 91.016 (89.260)	lr 0.00011
Train [116][100/3239]	Time 0.227 (0.872)	Data Time 0.002 (0.384)	Loss 2.1912 (2.1560)	Entropy 0.63313 (0.63310)	Top-1 acc 69.531 (72.784)	Top-5 acc 89.453 (89.244)	lr 0.00011
Train [116][110/3239]	Time 0.355 (0.838)	Data Time 0.002 (0.350)	Loss 2.1340 (2.1575)	Entropy 0.63314 (0.63310)	Top-1 acc 70.312 (72.737)	Top-5 acc 89.062 (89.256)	lr 0.00011
Train [116][120/3239]	Time 2.697 (0.809)	Data Time 0.002 (0.321)	Loss 2.1600 (2.1583)	Entropy 0.63314 (0.63310)	Top-1 acc 71.875 (72.798)	Top-5 acc 91.016 (89.175)	lr 0.00011
Train [116][130/3239]	Time 0.239 (0.766)	Data Time 0.001 (0.296)	Loss 2.1742 (2.1591)	Entropy 0.63313 (0.63310)	Top-1 acc 74.219 (72.752)	Top-5 acc 90.234 (89.179)	lr 0.00011
Train [116][140/3239]	Time 0.247 (0.747)	Data Time 0.001 (0.275)	Loss 2.2431 (2.1569)	Entropy 0.63315 (0.63311)	Top-1 acc 72.266 (72.839)	Top-5 acc 88.281 (89.209)	lr 0.00011
Train [116][150/3239]	Time 0.339 (0.731)	Data Time 0.001 (0.257)	Loss 2.2151 (2.1565)	Entropy 0.63292 (0.63310)	Top-1 acc 73.828 (72.868)	Top-5 acc 85.547 (89.179)	lr 0.00010
Train [116][160/3239]	Time 0.234 (0.715)	Data Time 0.001 (0.241)	Loss 2.0444 (2.1577)	Entropy 0.63290 (0.63309)	Top-1 acc 77.344 (72.843)	Top-5 acc 88.281 (89.128)	lr 0.00010
Train [116][170/3239]	Time 0.248 (0.702)	Data Time 0.001 (0.227)	Loss 2.2233 (2.1571)	Entropy 0.63287 (0.63307)	Top-1 acc 71.094 (72.878)	Top-5 acc 90.234 (89.131)	lr 0.00010
Train [116][180/3239]	Time 0.232 (0.691)	Data Time 0.001 (0.215)	Loss 2.1069 (2.1598)	Entropy 0.63284 (0.63306)	Top-1 acc 75.781 (72.818)	Top-5 acc 90.625 (89.078)	lr 0.00010
Train [116][190/3239]	Time 0.277 (0.680)	Data Time 0.001 (0.204)	Loss 2.2247 (2.1598)	Entropy 0.63279 (0.63305)	Top-1 acc 69.922 (72.814)	Top-5 acc 89.062 (89.052)	lr 0.00010
Train [116][200/3239]	Time 0.226 (0.671)	Data Time 0.001 (0.194)	Loss 2.2243 (2.1602)	Entropy 0.63262 (0.63303)	Top-1 acc 70.312 (72.804)	Top-5 acc 88.672 (89.045)	lr 0.00010
Train [116][210/3239]	Time 0.243 (0.662)	Data Time 0.001 (0.185)	Loss 2.0676 (2.1599)	Entropy 0.63254 (0.63301)	Top-1 acc 77.734 (72.810)	Top-5 acc 90.625 (89.085)	lr 0.00010
Train [116][220/3239]	Time 0.267 (0.653)	Data Time 0.001 (0.176)	Loss 2.2204 (2.1588)	Entropy 0.63245 (0.63299)	Top-1 acc 70.312 (72.773)	Top-5 acc 87.891 (89.142)	lr 0.00010
Train [116][230/3239]	Time 2.590 (0.646)	Data Time 0.001 (0.169)	Loss 2.0156 (2.1555)	Entropy 0.63245 (0.63297)	Top-1 acc 77.734 (72.859)	Top-5 acc 92.578 (89.221)	lr 0.00010
Train [116][240/3239]	Time 0.236 (0.629)	Data Time 0.001 (0.162)	Loss 2.1814 (2.1538)	Entropy 0.63241 (0.63295)	Top-1 acc 70.312 (72.898)	Top-5 acc 89.453 (89.259)	lr 0.00010
Train [116][250/3239]	Time 0.297 (0.838)	Data Time 0.003 (0.155)	Loss 2.1790 (2.1526)	Entropy 0.63242 (0.63292)	Top-1 acc 73.828 (72.952)	Top-5 acc 89.453 (89.294)	lr 0.00010
Train [116][260/3239]	Time 0.272 (0.826)	Data Time 0.002 (0.150)	Loss 2.1752 (2.1520)	Entropy 0.63241 (0.63290)	Top-1 acc 73.828 (72.915)	Top-5 acc 89.062 (89.321)	lr 0.00010
Train [116][270/3239]	Time 0.251 (0.814)	Data Time 0.001 (0.144)	Loss 2.0942 (2.1515)	Entropy 0.63240 (0.63289)	Top-1 acc 74.609 (72.924)	Top-5 acc 90.234 (89.345)	lr 0.00010
Train [116][280/3239]	Time 0.221 (0.803)	Data Time 0.001 (0.139)	Loss 2.4013 (2.1540)	Entropy 0.63238 (0.63287)	Top-1 acc 65.625 (72.838)	Top-5 acc 85.156 (89.313)	lr 0.00010
Train [116][290/3239]	Time 0.250 (0.792)	Data Time 0.002 (0.134)	Loss 2.3143 (2.1522)	Entropy 0.63238 (0.63285)	Top-1 acc 68.359 (72.897)	Top-5 acc 87.500 (89.338)	lr 0.00010
Train [116][300/3239]	Time 0.234 (0.781)	Data Time 0.001 (0.130)	Loss 2.1174 (2.1520)	Entropy 0.63236 (0.63284)	Top-1 acc 73.438 (72.905)	Top-5 acc 90.625 (89.348)	lr 0.00010
Train [116][310/3239]	Time 0.267 (0.772)	Data Time 0.002 (0.126)	Loss 2.1276 (2.1524)	Entropy 0.63231 (0.63282)	Top-1 acc 71.484 (72.902)	Top-5 acc 91.016 (89.343)	lr 0.00010
Train [116][320/3239]	Time 0.227 (0.764)	Data Time 0.001 (0.122)	Loss 2.2459 (2.1523)	Entropy 0.63231 (0.63280)	Top-1 acc 72.266 (72.906)	Top-5 acc 85.938 (89.336)	lr 0.00010
Train [116][330/3239]	Time 0.248 (0.755)	Data Time 0.001 (0.118)	Loss 2.1796 (2.1525)	Entropy 0.63233 (0.63279)	Top-1 acc 71.484 (72.908)	Top-5 acc 89.062 (89.315)	lr 0.00010
Train [116][340/3239]	Time 2.613 (0.747)	Data Time 0.006 (0.115)	Loss 2.2467 (2.1536)	Entropy 0.63233 (0.63278)	Top-1 acc 70.703 (72.861)	Top-5 acc 89.062 (89.289)	lr 0.00010
Train [116][350/3239]	Time 0.259 (0.733)	Data Time 0.001 (0.112)	Loss 2.1315 (2.1541)	Entropy 0.63225 (0.63276)	Top-1 acc 73.047 (72.868)	Top-5 acc 88.672 (89.257)	lr 0.00010
Train [116][360/3239]	Time 0.345 (0.727)	Data Time 0.001 (0.109)	Loss 2.3598 (2.1558)	Entropy 0.63224 (0.63275)	Top-1 acc 66.406 (72.799)	Top-5 acc 86.328 (89.228)	lr 0.00010
Train [116][370/3239]	Time 0.244 (0.720)	Data Time 0.001 (0.106)	Loss 2.0789 (2.1559)	Entropy 0.63212 (0.63273)	Top-1 acc 76.172 (72.786)	Top-5 acc 90.234 (89.232)	lr 0.00010
Train [116][380/3239]	Time 0.240 (0.714)	Data Time 0.001 (0.103)	Loss 2.2621 (2.1564)	Entropy 0.63210 (0.63271)	Top-1 acc 67.969 (72.789)	Top-5 acc 88.672 (89.215)	lr 0.00010
Train [116][390/3239]	Time 0.257 (0.709)	Data Time 0.001 (0.100)	Loss 2.1649 (2.1572)	Entropy 0.63204 (0.63270)	Top-1 acc 73.828 (72.751)	Top-5 acc 89.844 (89.202)	lr 0.00010
Train [116][400/3239]	Time 0.363 (0.704)	Data Time 0.001 (0.098)	Loss 2.1896 (2.1564)	Entropy 0.63214 (0.63268)	Top-1 acc 69.141 (72.755)	Top-5 acc 88.281 (89.211)	lr 0.00010
Train [116][410/3239]	Time 0.235 (0.699)	Data Time 0.001 (0.096)	Loss 2.2180 (2.1578)	Entropy 0.63209 (0.63267)	Top-1 acc 71.875 (72.723)	Top-5 acc 88.281 (89.174)	lr 0.00010
Train [116][420/3239]	Time 0.235 (0.694)	Data Time 0.001 (0.093)	Loss 2.1272 (2.1584)	Entropy 0.63204 (0.63265)	Top-1 acc 71.094 (72.693)	Top-5 acc 90.625 (89.181)	lr 0.00010
Train [116][430/3239]	Time 0.225 (0.689)	Data Time 0.001 (0.091)	Loss 2.0873 (2.1585)	Entropy 0.63204 (0.63264)	Top-1 acc 75.000 (72.702)	Top-5 acc 87.891 (89.181)	lr 0.00010
Train [116][440/3239]	Time 0.336 (0.685)	Data Time 0.002 (0.089)	Loss 2.0757 (2.1586)	Entropy 0.63202 (0.63263)	Top-1 acc 76.562 (72.704)	Top-5 acc 90.625 (89.181)	lr 0.00010
Train [116][450/3239]	Time 2.682 (0.680)	Data Time 0.003 (0.087)	Loss 2.1436 (2.1579)	Entropy 0.63202 (0.63261)	Top-1 acc 70.703 (72.715)	Top-5 acc 88.672 (89.196)	lr 0.00010
Train [116][460/3239]	Time 0.237 (0.671)	Data Time 0.001 (0.085)	Loss 2.5465 (2.1585)	Entropy 0.63208 (0.63260)	Top-1 acc 58.984 (72.687)	Top-5 acc 82.422 (89.188)	lr 0.00010
Train [116][470/3239]	Time 0.234 (0.667)	Data Time 0.001 (0.084)	Loss 2.0060 (2.1580)	Entropy 0.63205 (0.63259)	Top-1 acc 75.781 (72.705)	Top-5 acc 92.188 (89.198)	lr 0.00010
Train [116][480/3239]	Time 0.262 (0.664)	Data Time 0.001 (0.082)	Loss 2.2270 (2.1577)	Entropy 0.63204 (0.63258)	Top-1 acc 70.312 (72.690)	Top-5 acc 88.281 (89.197)	lr 0.00010
Train [116][490/3239]	Time 0.242 (0.661)	Data Time 0.001 (0.080)	Loss 2.0591 (2.1572)	Entropy 0.63202 (0.63257)	Top-1 acc 74.609 (72.710)	Top-5 acc 88.281 (89.203)	lr 0.00010
Train [116][500/3239]	Time 0.245 (0.658)	Data Time 0.001 (0.079)	Loss 2.1466 (2.1577)	Entropy 0.63203 (0.63256)	Top-1 acc 74.219 (72.698)	Top-5 acc 89.844 (89.198)	lr 0.00010
Train [116][510/3239]	Time 0.237 (0.655)	Data Time 0.001 (0.077)	Loss 2.1661 (2.1565)	Entropy 0.63203 (0.63255)	Top-1 acc 69.922 (72.727)	Top-5 acc 91.016 (89.215)	lr 0.00010
Train [116][520/3239]	Time 0.234 (0.652)	Data Time 0.001 (0.076)	Loss 2.1921 (2.1575)	Entropy 0.63199 (0.63254)	Top-1 acc 72.656 (72.700)	Top-5 acc 89.062 (89.190)	lr 0.00010
Train [116][530/3239]	Time 0.239 (0.649)	Data Time 0.001 (0.074)	Loss 2.2144 (2.1578)	Entropy 0.63195 (0.63253)	Top-1 acc 73.438 (72.713)	Top-5 acc 87.500 (89.186)	lr 0.00010
Train [116][540/3239]	Time 0.243 (0.646)	Data Time 0.001 (0.073)	Loss 2.1994 (2.1577)	Entropy 0.63193 (0.63251)	Top-1 acc 72.656 (72.726)	Top-5 acc 90.234 (89.193)	lr 0.00010
Train [116][550/3239]	Time 0.260 (0.644)	Data Time 0.001 (0.072)	Loss 2.0574 (2.1576)	Entropy 0.63192 (0.63250)	Top-1 acc 75.781 (72.731)	Top-5 acc 90.625 (89.189)	lr 0.00010
Train [116][560/3239]	Time 2.536 (0.641)	Data Time 0.001 (0.071)	Loss 2.0687 (2.1567)	Entropy 0.63192 (0.63249)	Top-1 acc 75.000 (72.761)	Top-5 acc 89.844 (89.195)	lr 0.00010
Train [116][570/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.069)	Loss 2.2278 (2.1566)	Entropy 0.63184 (0.63248)	Top-1 acc 71.875 (72.753)	Top-5 acc 85.938 (89.196)	lr 0.00010
Train [116][580/3239]	Time 0.245 (0.631)	Data Time 0.001 (0.068)	Loss 2.1884 (2.1558)	Entropy 0.63179 (0.63247)	Top-1 acc 71.875 (72.789)	Top-5 acc 85.938 (89.212)	lr 0.00010
Train [116][590/3239]	Time 0.230 (0.629)	Data Time 0.001 (0.067)	Loss 2.1094 (2.1562)	Entropy 0.63185 (0.63246)	Top-1 acc 75.000 (72.797)	Top-5 acc 88.281 (89.191)	lr 0.00010
Train [116][600/3239]	Time 0.240 (0.627)	Data Time 0.001 (0.066)	Loss 2.1422 (2.1561)	Entropy 0.63185 (0.63245)	Top-1 acc 75.391 (72.810)	Top-5 acc 90.625 (89.193)	lr 0.00010
Train [116][610/3239]	Time 0.243 (0.713)	Data Time 0.002 (0.065)	Loss 2.1119 (2.1569)	Entropy 0.63186 (0.63244)	Top-1 acc 70.703 (72.787)	Top-5 acc 90.625 (89.183)	lr 0.00010
Train [116][620/3239]	Time 0.251 (0.710)	Data Time 0.002 (0.064)	Loss 1.9373 (2.1562)	Entropy 0.63185 (0.63243)	Top-1 acc 75.781 (72.805)	Top-5 acc 94.531 (89.189)	lr 0.00010
Train [116][630/3239]	Time 0.230 (0.707)	Data Time 0.001 (0.063)	Loss 2.0319 (2.1562)	Entropy 0.63179 (0.63242)	Top-1 acc 78.125 (72.803)	Top-5 acc 92.578 (89.188)	lr 0.00010
Train [116][640/3239]	Time 0.231 (0.703)	Data Time 0.001 (0.062)	Loss 2.1188 (2.1555)	Entropy 0.63184 (0.63241)	Top-1 acc 73.047 (72.814)	Top-5 acc 89.844 (89.196)	lr 0.00010
Train [116][650/3239]	Time 0.235 (0.700)	Data Time 0.001 (0.061)	Loss 2.0998 (2.1552)	Entropy 0.63185 (0.63240)	Top-1 acc 72.656 (72.837)	Top-5 acc 90.625 (89.206)	lr 0.00010
Train [116][660/3239]	Time 0.265 (0.697)	Data Time 0.001 (0.060)	Loss 2.0718 (2.1550)	Entropy 0.63185 (0.63239)	Top-1 acc 74.219 (72.847)	Top-5 acc 92.188 (89.209)	lr 0.00010
Train [116][670/3239]	Time 2.587 (0.693)	Data Time 0.001 (0.059)	Loss 2.1573 (2.1553)	Entropy 0.63185 (0.63239)	Top-1 acc 71.875 (72.854)	Top-5 acc 88.672 (89.202)	lr 0.00010
Train [116][680/3239]	Time 0.230 (0.687)	Data Time 0.001 (0.058)	Loss 2.2594 (2.1554)	Entropy 0.63181 (0.63238)	Top-1 acc 71.094 (72.853)	Top-5 acc 87.109 (89.193)	lr 0.00010
Train [116][690/3239]	Time 0.335 (0.684)	Data Time 0.002 (0.058)	Loss 2.2102 (2.1553)	Entropy 0.63165 (0.63237)	Top-1 acc 74.609 (72.863)	Top-5 acc 85.938 (89.196)	lr 0.00010
Train [116][700/3239]	Time 0.247 (0.681)	Data Time 0.002 (0.057)	Loss 2.1114 (2.1554)	Entropy 0.63154 (0.63236)	Top-1 acc 72.656 (72.849)	Top-5 acc 89.844 (89.200)	lr 0.00010
Train [116][710/3239]	Time 0.256 (0.678)	Data Time 0.001 (0.056)	Loss 2.3132 (2.1555)	Entropy 0.63149 (0.63234)	Top-1 acc 67.969 (72.852)	Top-5 acc 86.719 (89.196)	lr 0.00010
Train [116][720/3239]	Time 0.230 (0.676)	Data Time 0.001 (0.055)	Loss 2.1430 (2.1549)	Entropy 0.63143 (0.63233)	Top-1 acc 71.094 (72.858)	Top-5 acc 91.016 (89.209)	lr 0.00010
Train [116][730/3239]	Time 0.324 (0.673)	Data Time 0.001 (0.055)	Loss 2.2316 (2.1559)	Entropy 0.63141 (0.63232)	Top-1 acc 71.875 (72.827)	Top-5 acc 85.547 (89.186)	lr 0.00010
Train [116][740/3239]	Time 0.232 (0.671)	Data Time 0.002 (0.054)	Loss 2.1114 (2.1557)	Entropy 0.63134 (0.63231)	Top-1 acc 74.609 (72.832)	Top-5 acc 91.016 (89.190)	lr 0.00010
Train [116][750/3239]	Time 0.239 (0.668)	Data Time 0.001 (0.053)	Loss 2.3591 (2.1565)	Entropy 0.63134 (0.63229)	Top-1 acc 69.141 (72.810)	Top-5 acc 87.109 (89.176)	lr 0.00010
Train [116][760/3239]	Time 0.223 (0.665)	Data Time 0.001 (0.052)	Loss 2.1893 (2.1566)	Entropy 0.63131 (0.63228)	Top-1 acc 72.266 (72.813)	Top-5 acc 89.062 (89.178)	lr 0.00010
Train [116][770/3239]	Time 0.345 (0.663)	Data Time 0.001 (0.052)	Loss 2.1966 (2.1564)	Entropy 0.63135 (0.63227)	Top-1 acc 67.188 (72.803)	Top-5 acc 88.281 (89.182)	lr 0.00010
Train [116][780/3239]	Time 2.685 (0.661)	Data Time 0.001 (0.051)	Loss 2.0132 (2.1563)	Entropy 0.63135 (0.63226)	Top-1 acc 77.734 (72.800)	Top-5 acc 91.797 (89.187)	lr 0.00010
Train [116][790/3239]	Time 0.236 (0.655)	Data Time 0.001 (0.051)	Loss 2.1830 (2.1565)	Entropy 0.63132 (0.63224)	Top-1 acc 71.875 (72.789)	Top-5 acc 87.109 (89.177)	lr 0.00010
Train [116][800/3239]	Time 0.232 (0.653)	Data Time 0.001 (0.050)	Loss 2.2157 (2.1567)	Entropy 0.63135 (0.63223)	Top-1 acc 71.875 (72.780)	Top-5 acc 87.109 (89.172)	lr 0.00010
Train [116][810/3239]	Time 0.239 (0.651)	Data Time 0.001 (0.049)	Loss 2.2540 (2.1564)	Entropy 0.63135 (0.63222)	Top-1 acc 69.531 (72.791)	Top-5 acc 87.109 (89.175)	lr 0.00010
Train [116][820/3239]	Time 0.248 (0.649)	Data Time 0.001 (0.049)	Loss 2.1826 (2.1563)	Entropy 0.63144 (0.63221)	Top-1 acc 74.219 (72.794)	Top-5 acc 88.281 (89.184)	lr 0.00010
Train [116][830/3239]	Time 0.228 (0.647)	Data Time 0.001 (0.048)	Loss 2.2268 (2.1567)	Entropy 0.63146 (0.63220)	Top-1 acc 72.656 (72.784)	Top-5 acc 87.500 (89.179)	lr 0.00010
Train [116][840/3239]	Time 0.234 (0.645)	Data Time 0.002 (0.048)	Loss 2.1185 (2.1566)	Entropy 0.63136 (0.63219)	Top-1 acc 74.219 (72.796)	Top-5 acc 90.234 (89.182)	lr 0.00010
Train [116][850/3239]	Time 0.246 (0.644)	Data Time 0.001 (0.047)	Loss 2.2899 (2.1567)	Entropy 0.63131 (0.63218)	Top-1 acc 68.750 (72.787)	Top-5 acc 87.891 (89.187)	lr 0.00010
Train [116][860/3239]	Time 0.239 (0.642)	Data Time 0.001 (0.047)	Loss 2.3615 (2.1566)	Entropy 0.63134 (0.63217)	Top-1 acc 66.797 (72.797)	Top-5 acc 85.156 (89.181)	lr 0.00010
Train [116][870/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.046)	Loss 2.0126 (2.1566)	Entropy 0.63133 (0.63216)	Top-1 acc 75.000 (72.789)	Top-5 acc 91.797 (89.185)	lr 0.00010
Train [116][880/3239]	Time 0.308 (0.638)	Data Time 0.001 (0.046)	Loss 2.1087 (2.1569)	Entropy 0.63130 (0.63216)	Top-1 acc 76.562 (72.785)	Top-5 acc 88.281 (89.180)	lr 0.00010
Train [116][890/3239]	Time 2.624 (0.636)	Data Time 0.001 (0.045)	Loss 2.0750 (2.1568)	Entropy 0.63130 (0.63215)	Top-1 acc 73.047 (72.787)	Top-5 acc 91.016 (89.181)	lr 0.00010
Train [116][900/3239]	Time 0.237 (0.632)	Data Time 0.001 (0.045)	Loss 2.0957 (2.1571)	Entropy 0.63132 (0.63214)	Top-1 acc 71.875 (72.772)	Top-5 acc 89.844 (89.180)	lr 0.00010
Train [116][910/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.044)	Loss 2.1413 (2.1572)	Entropy 0.63136 (0.63213)	Top-1 acc 73.438 (72.776)	Top-5 acc 90.625 (89.179)	lr 0.00010
Train [116][920/3239]	Time 0.204 (0.629)	Data Time 0.001 (0.044)	Loss 2.1373 (2.1571)	Entropy 0.63134 (0.63212)	Top-1 acc 73.047 (72.777)	Top-5 acc 90.234 (89.180)	lr 0.00010
Train [116][930/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.043)	Loss 2.1183 (2.1568)	Entropy 0.63132 (0.63211)	Top-1 acc 75.391 (72.788)	Top-5 acc 89.453 (89.194)	lr 0.00010
Train [116][940/3239]	Time 0.377 (0.626)	Data Time 0.001 (0.043)	Loss 2.1539 (2.1567)	Entropy 0.63123 (0.63210)	Top-1 acc 72.656 (72.797)	Top-5 acc 91.797 (89.200)	lr 0.00009
Train [116][950/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.042)	Loss 2.0273 (2.1564)	Entropy 0.63128 (0.63209)	Top-1 acc 75.391 (72.807)	Top-5 acc 89.844 (89.205)	lr 0.00009
Train [116][960/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.042)	Loss 2.1887 (2.1570)	Entropy 0.63128 (0.63208)	Top-1 acc 67.578 (72.792)	Top-5 acc 89.844 (89.191)	lr 0.00009
Train [116][970/3239]	Time 0.247 (0.676)	Data Time 0.002 (0.041)	Loss 2.2074 (2.1573)	Entropy 0.63141 (0.63208)	Top-1 acc 69.922 (72.779)	Top-5 acc 86.719 (89.188)	lr 0.00009
Train [116][980/3239]	Time 0.332 (0.675)	Data Time 0.002 (0.041)	Loss 2.1362 (2.1573)	Entropy 0.63139 (0.63207)	Top-1 acc 70.312 (72.777)	Top-5 acc 89.062 (89.189)	lr 0.00009
Train [116][990/3239]	Time 0.237 (0.673)	Data Time 0.001 (0.041)	Loss 2.2287 (2.1572)	Entropy 0.63140 (0.63206)	Top-1 acc 67.969 (72.776)	Top-5 acc 87.500 (89.185)	lr 0.00009
Train [116][1000/3239]	Time 2.549 (0.671)	Data Time 0.001 (0.040)	Loss 2.1577 (2.1569)	Entropy 0.63140 (0.63206)	Top-1 acc 73.047 (72.788)	Top-5 acc 89.453 (89.188)	lr 0.00009
Train [116][1010/3239]	Time 0.242 (0.667)	Data Time 0.001 (0.040)	Loss 2.1733 (2.1566)	Entropy 0.63124 (0.63205)	Top-1 acc 73.047 (72.792)	Top-5 acc 89.844 (89.202)	lr 0.00009
Train [116][1020/3239]	Time 0.252 (0.665)	Data Time 0.001 (0.040)	Loss 2.2543 (2.1566)	Entropy 0.63116 (0.63204)	Top-1 acc 69.141 (72.785)	Top-5 acc 89.062 (89.204)	lr 0.00009
Train [116][1030/3239]	Time 0.240 (0.663)	Data Time 0.001 (0.039)	Loss 2.2083 (2.1571)	Entropy 0.63119 (0.63203)	Top-1 acc 71.094 (72.772)	Top-5 acc 88.281 (89.189)	lr 0.00009
Train [116][1040/3239]	Time 0.231 (0.661)	Data Time 0.001 (0.039)	Loss 2.0462 (2.1567)	Entropy 0.63122 (0.63202)	Top-1 acc 76.172 (72.778)	Top-5 acc 90.625 (89.198)	lr 0.00009
Train [116][1050/3239]	Time 0.231 (0.660)	Data Time 0.001 (0.038)	Loss 2.3743 (2.1568)	Entropy 0.63122 (0.63202)	Top-1 acc 66.797 (72.778)	Top-5 acc 85.938 (89.201)	lr 0.00009
Train [116][1060/3239]	Time 0.238 (0.658)	Data Time 0.001 (0.038)	Loss 2.1940 (2.1569)	Entropy 0.63127 (0.63201)	Top-1 acc 73.828 (72.782)	Top-5 acc 86.719 (89.194)	lr 0.00009
Train [116][1070/3239]	Time 0.233 (0.656)	Data Time 0.002 (0.038)	Loss 2.1569 (2.1568)	Entropy 0.63127 (0.63200)	Top-1 acc 73.828 (72.789)	Top-5 acc 88.281 (89.197)	lr 0.00009
Train [116][1080/3239]	Time 0.239 (0.655)	Data Time 0.001 (0.037)	Loss 2.1939 (2.1565)	Entropy 0.63131 (0.63199)	Top-1 acc 75.000 (72.796)	Top-5 acc 88.672 (89.202)	lr 0.00009
Train [116][1090/3239]	Time 0.300 (0.653)	Data Time 0.001 (0.037)	Loss 2.2571 (2.1567)	Entropy 0.63135 (0.63199)	Top-1 acc 69.531 (72.794)	Top-5 acc 86.328 (89.198)	lr 0.00009
Train [116][1100/3239]	Time 0.256 (0.652)	Data Time 0.001 (0.037)	Loss 2.1947 (2.1565)	Entropy 0.63136 (0.63198)	Top-1 acc 69.922 (72.801)	Top-5 acc 89.062 (89.204)	lr 0.00009
Train [116][1110/3239]	Time 2.730 (0.650)	Data Time 0.002 (0.036)	Loss 1.9167 (2.1564)	Entropy 0.63136 (0.63198)	Top-1 acc 78.906 (72.802)	Top-5 acc 93.359 (89.206)	lr 0.00009
Train [116][1120/3239]	Time 0.235 (0.647)	Data Time 0.001 (0.036)	Loss 2.1229 (2.1559)	Entropy 0.63135 (0.63197)	Top-1 acc 74.219 (72.810)	Top-5 acc 90.625 (89.217)	lr 0.00009
Train [116][1130/3239]	Time 0.243 (0.645)	Data Time 0.001 (0.036)	Loss 2.1335 (2.1561)	Entropy 0.63145 (0.63197)	Top-1 acc 73.828 (72.806)	Top-5 acc 88.672 (89.213)	lr 0.00009
Train [116][1140/3239]	Time 0.290 (0.644)	Data Time 0.001 (0.036)	Loss 1.9245 (2.1560)	Entropy 0.63146 (0.63196)	Top-1 acc 80.469 (72.813)	Top-5 acc 92.578 (89.217)	lr 0.00009
Train [116][1150/3239]	Time 0.317 (0.642)	Data Time 0.001 (0.035)	Loss 2.1793 (2.1561)	Entropy 0.63137 (0.63196)	Top-1 acc 69.922 (72.808)	Top-5 acc 91.016 (89.222)	lr 0.00009
Train [116][1160/3239]	Time 0.228 (0.641)	Data Time 0.001 (0.035)	Loss 2.3542 (2.1562)	Entropy 0.63139 (0.63195)	Top-1 acc 68.359 (72.811)	Top-5 acc 86.328 (89.219)	lr 0.00009
Train [116][1170/3239]	Time 0.246 (0.640)	Data Time 0.002 (0.035)	Loss 2.0932 (2.1561)	Entropy 0.63134 (0.63195)	Top-1 acc 76.562 (72.818)	Top-5 acc 91.406 (89.221)	lr 0.00009
Train [116][1180/3239]	Time 0.280 (0.638)	Data Time 0.001 (0.034)	Loss 2.2225 (2.1567)	Entropy 0.63124 (0.63194)	Top-1 acc 71.484 (72.804)	Top-5 acc 86.328 (89.216)	lr 0.00009
Train [116][1190/3239]	Time 0.316 (0.637)	Data Time 0.001 (0.034)	Loss 2.1516 (2.1567)	Entropy 0.63128 (0.63194)	Top-1 acc 75.391 (72.804)	Top-5 acc 88.672 (89.218)	lr 0.00009
Train [116][1200/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.034)	Loss 2.2272 (2.1564)	Entropy 0.63134 (0.63193)	Top-1 acc 69.531 (72.809)	Top-5 acc 87.109 (89.221)	lr 0.00009
Train [116][1210/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.034)	Loss 2.2005 (2.1568)	Entropy 0.63139 (0.63193)	Top-1 acc 71.094 (72.795)	Top-5 acc 88.672 (89.213)	lr 0.00009
Train [116][1220/3239]	Time 2.572 (0.633)	Data Time 0.001 (0.033)	Loss 2.0625 (2.1568)	Entropy 0.63139 (0.63192)	Top-1 acc 76.172 (72.797)	Top-5 acc 91.797 (89.208)	lr 0.00009
Train [116][1230/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.033)	Loss 2.0917 (2.1568)	Entropy 0.63130 (0.63192)	Top-1 acc 73.438 (72.792)	Top-5 acc 91.406 (89.210)	lr 0.00009
Train [116][1240/3239]	Time 0.234 (0.629)	Data Time 0.001 (0.033)	Loss 2.3024 (2.1571)	Entropy 0.63129 (0.63191)	Top-1 acc 69.922 (72.779)	Top-5 acc 88.281 (89.209)	lr 0.00009
Train [116][1250/3239]	Time 0.241 (0.627)	Data Time 0.001 (0.033)	Loss 2.0956 (2.1568)	Entropy 0.63133 (0.63191)	Top-1 acc 76.953 (72.790)	Top-5 acc 89.844 (89.211)	lr 0.00009
Train [116][1260/3239]	Time 0.252 (0.626)	Data Time 0.001 (0.032)	Loss 2.2033 (2.1566)	Entropy 0.63136 (0.63190)	Top-1 acc 69.531 (72.793)	Top-5 acc 90.625 (89.216)	lr 0.00009
Train [116][1270/3239]	Time 0.240 (0.625)	Data Time 0.001 (0.032)	Loss 2.1448 (2.1562)	Entropy 0.63132 (0.63190)	Top-1 acc 73.828 (72.800)	Top-5 acc 87.891 (89.223)	lr 0.00009
Train [116][1280/3239]	Time 0.224 (0.624)	Data Time 0.001 (0.032)	Loss 2.1610 (2.1561)	Entropy 0.63123 (0.63189)	Top-1 acc 74.609 (72.803)	Top-5 acc 90.625 (89.227)	lr 0.00009
Train [116][1290/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.032)	Loss 2.1306 (2.1563)	Entropy 0.63124 (0.63189)	Top-1 acc 71.484 (72.795)	Top-5 acc 89.453 (89.226)	lr 0.00009
Train [116][1300/3239]	Time 0.229 (0.622)	Data Time 0.002 (0.031)	Loss 2.1944 (2.1565)	Entropy 0.63125 (0.63188)	Top-1 acc 71.094 (72.797)	Top-5 acc 89.453 (89.220)	lr 0.00009
Train [116][1310/3239]	Time 0.234 (0.621)	Data Time 0.001 (0.031)	Loss 2.1460 (2.1563)	Entropy 0.63110 (0.63188)	Top-1 acc 72.266 (72.801)	Top-5 acc 89.844 (89.225)	lr 0.00009
Train [116][1320/3239]	Time 0.330 (0.620)	Data Time 0.001 (0.031)	Loss 2.1845 (2.1561)	Entropy 0.63109 (0.63187)	Top-1 acc 73.047 (72.806)	Top-5 acc 89.453 (89.232)	lr 0.00009
Train [116][1330/3239]	Time 53.884 (0.657)	Data Time 0.001 (0.031)	Loss 2.1380 (2.1561)	Entropy 0.63109 (0.63187)	Top-1 acc 73.047 (72.808)	Top-5 acc 91.016 (89.233)	lr 0.00009
Train [116][1340/3239]	Time 0.395 (0.655)	Data Time 0.002 (0.030)	Loss 2.1985 (2.1563)	Entropy 0.63105 (0.63186)	Top-1 acc 73.438 (72.809)	Top-5 acc 89.844 (89.226)	lr 0.00009
Train [116][1350/3239]	Time 0.229 (0.653)	Data Time 0.002 (0.030)	Loss 2.2734 (2.1562)	Entropy 0.63095 (0.63185)	Top-1 acc 69.922 (72.808)	Top-5 acc 85.156 (89.226)	lr 0.00009
Train [116][1360/3239]	Time 0.327 (0.652)	Data Time 0.001 (0.030)	Loss 2.1137 (2.1564)	Entropy 0.63091 (0.63185)	Top-1 acc 73.828 (72.810)	Top-5 acc 90.625 (89.222)	lr 0.00009
Train [116][1370/3239]	Time 0.239 (0.651)	Data Time 0.002 (0.030)	Loss 2.1121 (2.1563)	Entropy 0.63091 (0.63184)	Top-1 acc 71.484 (72.808)	Top-5 acc 89.844 (89.227)	lr 0.00009
Train [116][1380/3239]	Time 0.232 (0.650)	Data Time 0.001 (0.030)	Loss 2.0950 (2.1566)	Entropy 0.63092 (0.63183)	Top-1 acc 75.000 (72.802)	Top-5 acc 92.578 (89.220)	lr 0.00009
Train [116][1390/3239]	Time 0.247 (0.649)	Data Time 0.001 (0.029)	Loss 2.1786 (2.1563)	Entropy 0.63092 (0.63183)	Top-1 acc 71.484 (72.806)	Top-5 acc 89.062 (89.224)	lr 0.00009
Train [116][1400/3239]	Time 0.326 (0.647)	Data Time 0.001 (0.029)	Loss 2.2092 (2.1563)	Entropy 0.63225 (0.63182)	Top-1 acc 72.266 (72.803)	Top-5 acc 88.281 (89.225)	lr 0.00009
Train [116][1410/3239]	Time 0.239 (0.646)	Data Time 0.001 (0.029)	Loss 2.1459 (2.1564)	Entropy 0.63225 (0.63183)	Top-1 acc 71.094 (72.802)	Top-5 acc 89.062 (89.222)	lr 0.00009
Train [116][1420/3239]	Time 0.261 (0.645)	Data Time 0.001 (0.029)	Loss 2.2743 (2.1565)	Entropy 0.63228 (0.63183)	Top-1 acc 70.312 (72.800)	Top-5 acc 88.672 (89.223)	lr 0.00009
Train [116][1430/3239]	Time 0.268 (0.644)	Data Time 0.001 (0.029)	Loss 2.2934 (2.1564)	Entropy 0.63224 (0.63183)	Top-1 acc 70.703 (72.803)	Top-5 acc 84.766 (89.224)	lr 0.00009
Train [116][1440/3239]	Time 2.676 (0.643)	Data Time 0.001 (0.028)	Loss 2.2526 (2.1560)	Entropy 0.63224 (0.63184)	Top-1 acc 71.094 (72.819)	Top-5 acc 84.375 (89.232)	lr 0.00009
Train [116][1450/3239]	Time 0.259 (0.640)	Data Time 0.001 (0.028)	Loss 2.0452 (2.1557)	Entropy 0.63223 (0.63184)	Top-1 acc 78.125 (72.831)	Top-5 acc 91.016 (89.232)	lr 0.00009
Train [116][1460/3239]	Time 0.231 (0.639)	Data Time 0.001 (0.028)	Loss 2.1799 (2.1558)	Entropy 0.63218 (0.63184)	Top-1 acc 72.656 (72.834)	Top-5 acc 87.500 (89.227)	lr 0.00009
Train [116][1470/3239]	Time 0.230 (0.638)	Data Time 0.001 (0.028)	Loss 2.1819 (2.1555)	Entropy 0.63214 (0.63184)	Top-1 acc 70.312 (72.839)	Top-5 acc 89.062 (89.230)	lr 0.00009
Train [116][1480/3239]	Time 0.225 (0.637)	Data Time 0.001 (0.028)	Loss 2.2085 (2.1554)	Entropy 0.63210 (0.63184)	Top-1 acc 72.266 (72.842)	Top-5 acc 89.844 (89.231)	lr 0.00009
Train [116][1490/3239]	Time 0.224 (0.636)	Data Time 0.001 (0.028)	Loss 2.2149 (2.1555)	Entropy 0.63214 (0.63185)	Top-1 acc 69.531 (72.835)	Top-5 acc 88.281 (89.233)	lr 0.00009
Train [116][1500/3239]	Time 0.228 (0.635)	Data Time 0.001 (0.027)	Loss 2.0796 (2.1556)	Entropy 0.63212 (0.63185)	Top-1 acc 71.875 (72.831)	Top-5 acc 89.844 (89.227)	lr 0.00009
Train [116][1510/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.027)	Loss 2.2469 (2.1558)	Entropy 0.63306 (0.63185)	Top-1 acc 74.219 (72.821)	Top-5 acc 89.062 (89.226)	lr 0.00009
Train [116][1520/3239]	Time 0.227 (0.633)	Data Time 0.001 (0.027)	Loss 2.0629 (2.1555)	Entropy 0.63308 (0.63186)	Top-1 acc 75.000 (72.827)	Top-5 acc 90.625 (89.230)	lr 0.00009
Train [116][1530/3239]	Time 0.231 (0.632)	Data Time 0.001 (0.027)	Loss 2.2551 (2.1554)	Entropy 0.63307 (0.63187)	Top-1 acc 71.094 (72.833)	Top-5 acc 88.281 (89.232)	lr 0.00009
Train [116][1540/3239]	Time 0.221 (0.631)	Data Time 0.001 (0.027)	Loss 2.3686 (2.1559)	Entropy 0.63306 (0.63188)	Top-1 acc 67.969 (72.819)	Top-5 acc 85.547 (89.228)	lr 0.00009
Train [116][1550/3239]	Time 2.561 (0.630)	Data Time 0.001 (0.027)	Loss 2.1813 (2.1561)	Entropy 0.63306 (0.63188)	Top-1 acc 71.875 (72.813)	Top-5 acc 90.234 (89.228)	lr 0.00009
Train [116][1560/3239]	Time 0.232 (0.628)	Data Time 0.001 (0.026)	Loss 2.2478 (2.1567)	Entropy 0.63301 (0.63189)	Top-1 acc 69.141 (72.799)	Top-5 acc 87.500 (89.217)	lr 0.00009
Train [116][1570/3239]	Time 0.231 (0.627)	Data Time 0.001 (0.026)	Loss 2.1749 (2.1563)	Entropy 0.63299 (0.63190)	Top-1 acc 71.094 (72.808)	Top-5 acc 88.672 (89.226)	lr 0.00009
Train [116][1580/3239]	Time 0.237 (0.626)	Data Time 0.001 (0.026)	Loss 2.1630 (2.1558)	Entropy 0.63297 (0.63191)	Top-1 acc 73.438 (72.822)	Top-5 acc 89.062 (89.235)	lr 0.00009
Train [116][1590/3239]	Time 0.235 (0.625)	Data Time 0.001 (0.026)	Loss 2.2391 (2.1557)	Entropy 0.63287 (0.63191)	Top-1 acc 67.969 (72.826)	Top-5 acc 87.109 (89.233)	lr 0.00009
Train [116][1600/3239]	Time 0.235 (0.624)	Data Time 0.001 (0.026)	Loss 2.2568 (2.1559)	Entropy 0.63284 (0.63192)	Top-1 acc 67.969 (72.828)	Top-5 acc 87.109 (89.229)	lr 0.00009
Train [116][1610/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.026)	Loss 2.2501 (2.1559)	Entropy 0.63277 (0.63192)	Top-1 acc 69.531 (72.826)	Top-5 acc 86.328 (89.226)	lr 0.00009
Train [116][1620/3239]	Time 0.242 (0.623)	Data Time 0.001 (0.025)	Loss 2.1675 (2.1558)	Entropy 0.63268 (0.63193)	Top-1 acc 74.609 (72.830)	Top-5 acc 90.234 (89.229)	lr 0.00009
Train [116][1630/3239]	Time 0.233 (0.622)	Data Time 0.001 (0.025)	Loss 1.9811 (2.1554)	Entropy 0.63272 (0.63193)	Top-1 acc 78.125 (72.840)	Top-5 acc 92.188 (89.233)	lr 0.00009
Train [116][1640/3239]	Time 0.226 (0.621)	Data Time 0.001 (0.025)	Loss 2.2041 (2.1554)	Entropy 0.63273 (0.63194)	Top-1 acc 70.703 (72.841)	Top-5 acc 88.672 (89.234)	lr 0.00009
Train [116][1650/3239]	Time 0.345 (0.620)	Data Time 0.001 (0.025)	Loss 2.1130 (2.1555)	Entropy 0.63265 (0.63194)	Top-1 acc 75.781 (72.844)	Top-5 acc 90.234 (89.230)	lr 0.00009
Train [116][1660/3239]	Time 2.590 (0.619)	Data Time 0.001 (0.025)	Loss 2.1834 (2.1556)	Entropy 0.63265 (0.63195)	Top-1 acc 74.609 (72.842)	Top-5 acc 87.891 (89.230)	lr 0.00009
Train [116][1670/3239]	Time 0.243 (0.617)	Data Time 0.001 (0.025)	Loss 2.1137 (2.1555)	Entropy 0.63262 (0.63195)	Top-1 acc 73.047 (72.849)	Top-5 acc 90.625 (89.236)	lr 0.00009
Train [116][1680/3239]	Time 0.231 (0.616)	Data Time 0.001 (0.025)	Loss 2.1693 (2.1554)	Entropy 0.63257 (0.63195)	Top-1 acc 71.484 (72.850)	Top-5 acc 88.672 (89.238)	lr 0.00009
Train [116][1690/3239]	Time 0.340 (0.615)	Data Time 0.001 (0.024)	Loss 2.0979 (2.1554)	Entropy 0.63252 (0.63196)	Top-1 acc 74.609 (72.845)	Top-5 acc 91.797 (89.239)	lr 0.00009
Train [116][1700/3239]	Time 0.333 (0.645)	Data Time 0.003 (0.024)	Loss 2.1733 (2.1556)	Entropy 0.63253 (0.63196)	Top-1 acc 70.703 (72.842)	Top-5 acc 87.109 (89.233)	lr 0.00009
Train [116][1710/3239]	Time 0.234 (0.645)	Data Time 0.002 (0.024)	Loss 2.1194 (2.1554)	Entropy 0.63247 (0.63196)	Top-1 acc 74.609 (72.843)	Top-5 acc 88.281 (89.235)	lr 0.00009
Train [116][1720/3239]	Time 0.230 (0.644)	Data Time 0.001 (0.024)	Loss 2.1380 (2.1552)	Entropy 0.63246 (0.63197)	Top-1 acc 74.219 (72.849)	Top-5 acc 88.281 (89.236)	lr 0.00009
Train [116][1730/3239]	Time 0.265 (0.643)	Data Time 0.001 (0.024)	Loss 2.1573 (2.1550)	Entropy 0.63244 (0.63197)	Top-1 acc 73.047 (72.856)	Top-5 acc 89.062 (89.239)	lr 0.00009
Train [116][1740/3239]	Time 0.256 (0.642)	Data Time 0.002 (0.024)	Loss 2.2185 (2.1550)	Entropy 0.63250 (0.63197)	Top-1 acc 71.094 (72.862)	Top-5 acc 85.938 (89.234)	lr 0.00009
Train [116][1750/3239]	Time 0.225 (0.641)	Data Time 0.001 (0.024)	Loss 2.2687 (2.1551)	Entropy 0.63246 (0.63198)	Top-1 acc 67.969 (72.856)	Top-5 acc 87.891 (89.234)	lr 0.00009
Train [116][1760/3239]	Time 0.293 (0.640)	Data Time 0.001 (0.024)	Loss 2.1751 (2.1552)	Entropy 0.63284 (0.63198)	Top-1 acc 73.047 (72.856)	Top-5 acc 89.844 (89.233)	lr 0.00008
Train [116][1770/3239]	Time 2.556 (0.639)	Data Time 0.001 (0.023)	Loss 2.2729 (2.1550)	Entropy 0.63284 (0.63198)	Top-1 acc 68.750 (72.859)	Top-5 acc 87.500 (89.235)	lr 0.00008
Train [116][1780/3239]	Time 0.233 (0.637)	Data Time 0.001 (0.023)	Loss 2.2359 (2.1552)	Entropy 0.63275 (0.63199)	Top-1 acc 69.531 (72.854)	Top-5 acc 90.625 (89.234)	lr 0.00008
Train [116][1790/3239]	Time 0.237 (0.636)	Data Time 0.001 (0.023)	Loss 2.1162 (2.1550)	Entropy 0.63272 (0.63199)	Top-1 acc 71.875 (72.857)	Top-5 acc 91.016 (89.241)	lr 0.00008
Train [116][1800/3239]	Time 0.230 (0.636)	Data Time 0.001 (0.023)	Loss 2.3131 (2.1552)	Entropy 0.63267 (0.63200)	Top-1 acc 69.531 (72.853)	Top-5 acc 88.672 (89.236)	lr 0.00008
Train [116][1810/3239]	Time 0.237 (0.635)	Data Time 0.001 (0.023)	Loss 2.1374 (2.1553)	Entropy 0.63264 (0.63200)	Top-1 acc 71.484 (72.850)	Top-5 acc 90.625 (89.233)	lr 0.00008
Train [116][1820/3239]	Time 0.248 (0.634)	Data Time 0.001 (0.023)	Loss 2.2076 (2.1554)	Entropy 0.63255 (0.63200)	Top-1 acc 71.094 (72.846)	Top-5 acc 87.500 (89.233)	lr 0.00008
Train [116][1830/3239]	Time 0.233 (0.633)	Data Time 0.001 (0.023)	Loss 2.3916 (2.1555)	Entropy 0.63237 (0.63200)	Top-1 acc 64.062 (72.837)	Top-5 acc 85.547 (89.233)	lr 0.00008
Train [116][1840/3239]	Time 0.261 (0.632)	Data Time 0.002 (0.023)	Loss 2.2335 (2.1554)	Entropy 0.63234 (0.63201)	Top-1 acc 71.875 (72.837)	Top-5 acc 87.500 (89.235)	lr 0.00008
Train [116][1850/3239]	Time 0.232 (0.631)	Data Time 0.001 (0.022)	Loss 2.1309 (2.1554)	Entropy 0.63237 (0.63201)	Top-1 acc 71.094 (72.835)	Top-5 acc 91.016 (89.234)	lr 0.00008
Train [116][1860/3239]	Time 0.226 (0.631)	Data Time 0.001 (0.022)	Loss 2.0689 (2.1551)	Entropy 0.63238 (0.63201)	Top-1 acc 76.172 (72.842)	Top-5 acc 90.234 (89.242)	lr 0.00008
Train [116][1870/3239]	Time 0.231 (0.630)	Data Time 0.001 (0.022)	Loss 2.2056 (2.1552)	Entropy 0.63236 (0.63201)	Top-1 acc 71.484 (72.837)	Top-5 acc 89.453 (89.239)	lr 0.00008
Train [116][1880/3239]	Time 2.599 (0.629)	Data Time 0.001 (0.022)	Loss 2.0922 (2.1551)	Entropy 0.63236 (0.63201)	Top-1 acc 74.609 (72.841)	Top-5 acc 89.062 (89.239)	lr 0.00008
Train [116][1890/3239]	Time 0.245 (0.627)	Data Time 0.001 (0.022)	Loss 2.2163 (2.1551)	Entropy 0.63225 (0.63202)	Top-1 acc 73.047 (72.843)	Top-5 acc 87.500 (89.241)	lr 0.00008
Train [116][1900/3239]	Time 0.337 (0.626)	Data Time 0.001 (0.022)	Loss 2.2053 (2.1550)	Entropy 0.63229 (0.63202)	Top-1 acc 73.047 (72.842)	Top-5 acc 89.062 (89.242)	lr 0.00008
Train [116][1910/3239]	Time 0.224 (0.626)	Data Time 0.001 (0.022)	Loss 2.0843 (2.1549)	Entropy 0.63229 (0.63202)	Top-1 acc 76.562 (72.848)	Top-5 acc 90.234 (89.246)	lr 0.00008
Train [116][1920/3239]	Time 0.227 (0.625)	Data Time 0.002 (0.022)	Loss 2.1089 (2.1549)	Entropy 0.63236 (0.63202)	Top-1 acc 74.219 (72.849)	Top-5 acc 91.406 (89.246)	lr 0.00008
Train [116][1930/3239]	Time 0.251 (0.624)	Data Time 0.001 (0.022)	Loss 2.4018 (2.1550)	Entropy 0.63227 (0.63202)	Top-1 acc 67.188 (72.848)	Top-5 acc 86.328 (89.246)	lr 0.00008
Train [116][1940/3239]	Time 0.356 (0.624)	Data Time 0.001 (0.022)	Loss 2.2692 (2.1550)	Entropy 0.63216 (0.63202)	Top-1 acc 70.312 (72.848)	Top-5 acc 86.328 (89.241)	lr 0.00008
Train [116][1950/3239]	Time 0.243 (0.623)	Data Time 0.001 (0.021)	Loss 2.2293 (2.1551)	Entropy 0.63218 (0.63202)	Top-1 acc 66.797 (72.837)	Top-5 acc 88.281 (89.239)	lr 0.00008
Train [116][1960/3239]	Time 0.220 (0.622)	Data Time 0.001 (0.021)	Loss 2.1850 (2.1550)	Entropy 0.63217 (0.63202)	Top-1 acc 73.047 (72.836)	Top-5 acc 89.844 (89.239)	lr 0.00008
Train [116][1970/3239]	Time 0.235 (0.622)	Data Time 0.001 (0.021)	Loss 2.0388 (2.1550)	Entropy 0.63220 (0.63202)	Top-1 acc 78.516 (72.839)	Top-5 acc 90.234 (89.240)	lr 0.00008
Train [116][1980/3239]	Time 0.313 (0.621)	Data Time 0.001 (0.021)	Loss 2.0556 (2.1547)	Entropy 0.63218 (0.63203)	Top-1 acc 73.047 (72.842)	Top-5 acc 89.844 (89.242)	lr 0.00008
Train [116][1990/3239]	Time 2.566 (0.620)	Data Time 0.001 (0.021)	Loss 2.1940 (2.1548)	Entropy 0.63218 (0.63203)	Top-1 acc 71.875 (72.840)	Top-5 acc 88.672 (89.240)	lr 0.00008
Train [116][2000/3239]	Time 0.253 (0.618)	Data Time 0.001 (0.021)	Loss 2.2936 (2.1547)	Entropy 0.63220 (0.63203)	Top-1 acc 69.141 (72.846)	Top-5 acc 86.328 (89.242)	lr 0.00008
Train [116][2010/3239]	Time 0.240 (0.617)	Data Time 0.001 (0.021)	Loss 2.0748 (2.1547)	Entropy 0.63221 (0.63203)	Top-1 acc 71.875 (72.847)	Top-5 acc 90.625 (89.244)	lr 0.00008
Train [116][2020/3239]	Time 0.249 (0.617)	Data Time 0.001 (0.021)	Loss 2.2636 (2.1547)	Entropy 0.63221 (0.63203)	Top-1 acc 69.531 (72.851)	Top-5 acc 88.672 (89.246)	lr 0.00008
Train [116][2030/3239]	Time 0.232 (0.616)	Data Time 0.001 (0.021)	Loss 2.0706 (2.1545)	Entropy 0.63216 (0.63203)	Top-1 acc 76.562 (72.855)	Top-5 acc 89.453 (89.249)	lr 0.00008
Train [116][2040/3239]	Time 0.221 (0.615)	Data Time 0.001 (0.021)	Loss 2.2122 (2.1545)	Entropy 0.63217 (0.63203)	Top-1 acc 70.312 (72.854)	Top-5 acc 89.844 (89.252)	lr 0.00008
Train [116][2050/3239]	Time 0.228 (0.615)	Data Time 0.001 (0.020)	Loss 2.2183 (2.1547)	Entropy 0.63220 (0.63203)	Top-1 acc 73.828 (72.846)	Top-5 acc 85.547 (89.246)	lr 0.00008
Train [116][2060/3239]	Time 0.240 (0.641)	Data Time 0.002 (0.020)	Loss 3.4585 (2.1552)	Entropy 0.63220 (0.63203)	Top-1 acc 44.141 (72.834)	Top-5 acc 73.047 (89.239)	lr 0.00008
Train [116][2070/3239]	Time 0.280 (0.640)	Data Time 0.002 (0.020)	Loss 2.1289 (2.1553)	Entropy 0.63219 (0.63203)	Top-1 acc 75.000 (72.834)	Top-5 acc 91.016 (89.238)	lr 0.00008
Train [116][2080/3239]	Time 0.236 (0.639)	Data Time 0.002 (0.020)	Loss 2.1925 (2.1554)	Entropy 0.63214 (0.63203)	Top-1 acc 69.922 (72.832)	Top-5 acc 89.844 (89.237)	lr 0.00008
Train [116][2090/3239]	Time 0.227 (0.639)	Data Time 0.001 (0.020)	Loss 2.0930 (2.1555)	Entropy 0.63209 (0.63203)	Top-1 acc 75.781 (72.827)	Top-5 acc 88.281 (89.233)	lr 0.00008
Train [116][2100/3239]	Time 2.589 (0.638)	Data Time 0.001 (0.020)	Loss 2.0912 (2.1555)	Entropy 0.63209 (0.63203)	Top-1 acc 74.219 (72.829)	Top-5 acc 91.016 (89.232)	lr 0.00008
Train [116][2110/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.020)	Loss 2.0998 (2.1555)	Entropy 0.63204 (0.63203)	Top-1 acc 73.828 (72.829)	Top-5 acc 87.891 (89.231)	lr 0.00008
Train [116][2120/3239]	Time 0.247 (0.635)	Data Time 0.001 (0.020)	Loss 1.9949 (2.1554)	Entropy 0.63212 (0.63203)	Top-1 acc 78.125 (72.831)	Top-5 acc 93.359 (89.232)	lr 0.00008
Train [116][2130/3239]	Time 0.224 (0.635)	Data Time 0.002 (0.020)	Loss 2.2118 (2.1553)	Entropy 0.63204 (0.63203)	Top-1 acc 70.703 (72.828)	Top-5 acc 89.062 (89.236)	lr 0.00008
Train [116][2140/3239]	Time 0.268 (0.634)	Data Time 0.001 (0.020)	Loss 2.2148 (2.1553)	Entropy 0.63208 (0.63204)	Top-1 acc 68.750 (72.829)	Top-5 acc 87.891 (89.235)	lr 0.00008
Train [116][2150/3239]	Time 0.319 (0.633)	Data Time 0.001 (0.020)	Loss 2.1292 (2.1552)	Entropy 0.63208 (0.63204)	Top-1 acc 73.828 (72.833)	Top-5 acc 90.234 (89.239)	lr 0.00008
Train [116][2160/3239]	Time 0.243 (0.633)	Data Time 0.001 (0.019)	Loss 2.1204 (2.1554)	Entropy 0.63211 (0.63204)	Top-1 acc 75.781 (72.829)	Top-5 acc 89.062 (89.235)	lr 0.00008
Train [116][2170/3239]	Time 0.238 (0.632)	Data Time 0.001 (0.019)	Loss 2.3017 (2.1554)	Entropy 0.63218 (0.63204)	Top-1 acc 71.484 (72.835)	Top-5 acc 89.062 (89.238)	lr 0.00008
Train [116][2180/3239]	Time 0.225 (0.631)	Data Time 0.001 (0.019)	Loss 2.2860 (2.1555)	Entropy 0.63205 (0.63204)	Top-1 acc 68.359 (72.831)	Top-5 acc 87.109 (89.235)	lr 0.00008
Train [116][2190/3239]	Time 0.311 (0.631)	Data Time 0.001 (0.019)	Loss 2.1171 (2.1556)	Entropy 0.63194 (0.63204)	Top-1 acc 72.656 (72.828)	Top-5 acc 89.844 (89.231)	lr 0.00008
Train [116][2200/3239]	Time 0.258 (0.630)	Data Time 0.001 (0.019)	Loss 2.0186 (2.1556)	Entropy 0.63191 (0.63204)	Top-1 acc 78.125 (72.830)	Top-5 acc 90.234 (89.231)	lr 0.00008
Train [116][2210/3239]	Time 2.591 (0.629)	Data Time 0.002 (0.019)	Loss 2.3060 (2.1556)	Entropy 0.63191 (0.63204)	Top-1 acc 67.578 (72.832)	Top-5 acc 87.500 (89.229)	lr 0.00008
Train [116][2220/3239]	Time 0.242 (0.627)	Data Time 0.001 (0.019)	Loss 2.1986 (2.1555)	Entropy 0.63187 (0.63203)	Top-1 acc 73.438 (72.837)	Top-5 acc 90.625 (89.233)	lr 0.00008
Train [116][2230/3239]	Time 0.349 (0.627)	Data Time 0.001 (0.019)	Loss 2.1016 (2.1555)	Entropy 0.63186 (0.63203)	Top-1 acc 72.656 (72.836)	Top-5 acc 89.453 (89.235)	lr 0.00008
Train [116][2240/3239]	Time 0.239 (0.626)	Data Time 0.001 (0.019)	Loss 2.1551 (2.1557)	Entropy 0.63184 (0.63203)	Top-1 acc 73.047 (72.835)	Top-5 acc 90.625 (89.233)	lr 0.00008
Train [116][2250/3239]	Time 0.223 (0.625)	Data Time 0.001 (0.019)	Loss 2.2271 (2.1557)	Entropy 0.63174 (0.63203)	Top-1 acc 73.828 (72.839)	Top-5 acc 86.328 (89.233)	lr 0.00008
Train [116][2260/3239]	Time 0.232 (0.625)	Data Time 0.001 (0.019)	Loss 2.1682 (2.1557)	Entropy 0.63169 (0.63203)	Top-1 acc 71.094 (72.838)	Top-5 acc 87.500 (89.234)	lr 0.00008
Train [116][2270/3239]	Time 0.385 (0.624)	Data Time 0.001 (0.019)	Loss 2.1095 (2.1556)	Entropy 0.63174 (0.63203)	Top-1 acc 74.609 (72.841)	Top-5 acc 89.453 (89.236)	lr 0.00008
Train [116][2280/3239]	Time 0.240 (0.624)	Data Time 0.001 (0.019)	Loss 2.2036 (2.1556)	Entropy 0.63170 (0.63203)	Top-1 acc 71.484 (72.845)	Top-5 acc 88.281 (89.234)	lr 0.00008
Train [116][2290/3239]	Time 0.230 (0.623)	Data Time 0.001 (0.018)	Loss 2.2091 (2.1555)	Entropy 0.63168 (0.63203)	Top-1 acc 71.484 (72.847)	Top-5 acc 87.891 (89.237)	lr 0.00008
Train [116][2300/3239]	Time 0.251 (0.622)	Data Time 0.002 (0.018)	Loss 2.2338 (2.1555)	Entropy 0.63165 (0.63202)	Top-1 acc 70.312 (72.846)	Top-5 acc 85.156 (89.236)	lr 0.00008
Train [116][2310/3239]	Time 0.245 (0.622)	Data Time 0.001 (0.018)	Loss 2.2009 (2.1555)	Entropy 0.63158 (0.63202)	Top-1 acc 68.750 (72.844)	Top-5 acc 89.453 (89.239)	lr 0.00008
Train [116][2320/3239]	Time 2.510 (0.621)	Data Time 0.001 (0.018)	Loss 2.2444 (2.1554)	Entropy 0.63158 (0.63202)	Top-1 acc 69.141 (72.845)	Top-5 acc 87.500 (89.239)	lr 0.00008
Train [116][2330/3239]	Time 0.235 (0.619)	Data Time 0.001 (0.018)	Loss 2.1785 (2.1555)	Entropy 0.63162 (0.63202)	Top-1 acc 73.047 (72.841)	Top-5 acc 88.281 (89.239)	lr 0.00008
Train [116][2340/3239]	Time 0.228 (0.619)	Data Time 0.001 (0.018)	Loss 2.1805 (2.1555)	Entropy 0.63157 (0.63202)	Top-1 acc 72.266 (72.840)	Top-5 acc 89.062 (89.239)	lr 0.00008
Train [116][2350/3239]	Time 0.235 (0.618)	Data Time 0.001 (0.018)	Loss 2.1578 (2.1554)	Entropy 0.63153 (0.63202)	Top-1 acc 71.875 (72.843)	Top-5 acc 88.672 (89.240)	lr 0.00008
Train [116][2360/3239]	Time 0.239 (0.618)	Data Time 0.001 (0.018)	Loss 2.0611 (2.1553)	Entropy 0.63159 (0.63201)	Top-1 acc 73.047 (72.845)	Top-5 acc 92.578 (89.242)	lr 0.00008
Train [116][2370/3239]	Time 0.230 (0.617)	Data Time 0.001 (0.018)	Loss 2.0903 (2.1551)	Entropy 0.63145 (0.63201)	Top-1 acc 75.000 (72.851)	Top-5 acc 90.625 (89.245)	lr 0.00008
Train [116][2380/3239]	Time 0.239 (0.617)	Data Time 0.001 (0.018)	Loss 2.1341 (2.1550)	Entropy 0.63147 (0.63201)	Top-1 acc 72.656 (72.857)	Top-5 acc 89.844 (89.248)	lr 0.00008
Train [116][2390/3239]	Time 0.231 (0.616)	Data Time 0.001 (0.018)	Loss 2.1307 (2.1549)	Entropy 0.63143 (0.63201)	Top-1 acc 74.609 (72.859)	Top-5 acc 87.500 (89.248)	lr 0.00008
Train [116][2400/3239]	Time 0.251 (0.616)	Data Time 0.001 (0.018)	Loss 2.2175 (2.1550)	Entropy 0.63139 (0.63200)	Top-1 acc 69.922 (72.857)	Top-5 acc 87.500 (89.247)	lr 0.00008
Train [116][2410/3239]	Time 0.229 (0.615)	Data Time 0.001 (0.018)	Loss 2.3191 (2.1549)	Entropy 0.63130 (0.63200)	Top-1 acc 71.875 (72.860)	Top-5 acc 83.984 (89.247)	lr 0.00008
Train [116][2420/3239]	Time 0.275 (0.636)	Data Time 0.004 (0.018)	Loss 2.0638 (2.1550)	Entropy 0.63123 (0.63200)	Top-1 acc 76.953 (72.859)	Top-5 acc 90.625 (89.241)	lr 0.00008
Train [116][2430/3239]	Time 3.125 (0.636)	Data Time 0.003 (0.017)	Loss 2.2609 (2.1551)	Entropy 0.63123 (0.63200)	Top-1 acc 71.094 (72.857)	Top-5 acc 85.938 (89.239)	lr 0.00008
Train [116][2440/3239]	Time 0.240 (0.634)	Data Time 0.001 (0.017)	Loss 2.1246 (2.1550)	Entropy 0.63127 (0.63199)	Top-1 acc 75.781 (72.861)	Top-5 acc 91.016 (89.242)	lr 0.00008
Train [116][2450/3239]	Time 0.250 (0.633)	Data Time 0.002 (0.017)	Loss 2.0932 (2.1549)	Entropy 0.63125 (0.63199)	Top-1 acc 72.266 (72.862)	Top-5 acc 89.844 (89.243)	lr 0.00008
Train [116][2460/3239]	Time 0.240 (0.633)	Data Time 0.001 (0.017)	Loss 2.1371 (2.1548)	Entropy 0.63120 (0.63199)	Top-1 acc 74.609 (72.864)	Top-5 acc 90.234 (89.246)	lr 0.00008
Train [116][2470/3239]	Time 0.241 (0.632)	Data Time 0.001 (0.017)	Loss 2.2896 (2.1551)	Entropy 0.63116 (0.63198)	Top-1 acc 68.750 (72.860)	Top-5 acc 84.766 (89.241)	lr 0.00008
Train [116][2480/3239]	Time 0.318 (0.632)	Data Time 0.001 (0.017)	Loss 2.1948 (2.1551)	Entropy 0.63114 (0.63198)	Top-1 acc 69.531 (72.859)	Top-5 acc 89.453 (89.239)	lr 0.00008
Train [116][2490/3239]	Time 0.237 (0.631)	Data Time 0.001 (0.017)	Loss 2.1200 (2.1550)	Entropy 0.63110 (0.63198)	Top-1 acc 74.609 (72.863)	Top-5 acc 88.281 (89.243)	lr 0.00008
Train [116][2500/3239]	Time 0.244 (0.630)	Data Time 0.001 (0.017)	Loss 2.0321 (2.1548)	Entropy 0.63105 (0.63197)	Top-1 acc 76.953 (72.869)	Top-5 acc 90.234 (89.245)	lr 0.00008
Train [116][2510/3239]	Time 0.229 (0.630)	Data Time 0.001 (0.017)	Loss 2.2400 (2.1546)	Entropy 0.63107 (0.63197)	Top-1 acc 71.484 (72.875)	Top-5 acc 86.719 (89.247)	lr 0.00008
Train [116][2520/3239]	Time 0.336 (0.629)	Data Time 0.001 (0.017)	Loss 2.0637 (2.1548)	Entropy 0.63106 (0.63197)	Top-1 acc 75.781 (72.868)	Top-5 acc 91.406 (89.245)	lr 0.00008
Train [116][2530/3239]	Time 0.272 (0.629)	Data Time 0.002 (0.017)	Loss 2.1873 (2.1548)	Entropy 0.63106 (0.63196)	Top-1 acc 72.656 (72.867)	Top-5 acc 88.672 (89.243)	lr 0.00008
Train [116][2540/3239]	Time 2.606 (0.628)	Data Time 0.001 (0.017)	Loss 2.2464 (2.1550)	Entropy 0.63106 (0.63196)	Top-1 acc 70.312 (72.866)	Top-5 acc 85.938 (89.239)	lr 0.00008
Train [116][2550/3239]	Time 0.249 (0.627)	Data Time 0.001 (0.017)	Loss 2.2681 (2.1551)	Entropy 0.63102 (0.63195)	Top-1 acc 68.750 (72.864)	Top-5 acc 87.109 (89.238)	lr 0.00008
Train [116][2560/3239]	Time 0.284 (0.626)	Data Time 0.001 (0.017)	Loss 2.3061 (2.1552)	Entropy 0.63096 (0.63195)	Top-1 acc 69.141 (72.861)	Top-5 acc 85.938 (89.238)	lr 0.00008
Train [116][2570/3239]	Time 0.246 (0.626)	Data Time 0.001 (0.017)	Loss 2.1576 (2.1553)	Entropy 0.63095 (0.63195)	Top-1 acc 73.828 (72.858)	Top-5 acc 90.625 (89.233)	lr 0.00008
Train [116][2580/3239]	Time 0.243 (0.625)	Data Time 0.001 (0.017)	Loss 2.2490 (2.1554)	Entropy 0.63094 (0.63194)	Top-1 acc 72.656 (72.857)	Top-5 acc 87.109 (89.231)	lr 0.00008
Train [116][2590/3239]	Time 0.232 (0.624)	Data Time 0.001 (0.017)	Loss 2.0272 (2.1554)	Entropy 0.63086 (0.63194)	Top-1 acc 76.562 (72.859)	Top-5 acc 90.625 (89.231)	lr 0.00008
Train [116][2600/3239]	Time 0.275 (0.624)	Data Time 0.001 (0.016)	Loss 2.1991 (2.1555)	Entropy 0.63086 (0.63194)	Top-1 acc 71.484 (72.856)	Top-5 acc 89.062 (89.230)	lr 0.00008
Train [116][2610/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.016)	Loss 2.0395 (2.1555)	Entropy 0.63084 (0.63193)	Top-1 acc 75.781 (72.857)	Top-5 acc 91.797 (89.232)	lr 0.00008
Train [116][2620/3239]	Time 0.223 (0.623)	Data Time 0.001 (0.016)	Loss 2.0740 (2.1555)	Entropy 0.63077 (0.63193)	Top-1 acc 74.609 (72.854)	Top-5 acc 91.406 (89.232)	lr 0.00008
Train [116][2630/3239]	Time 0.256 (0.622)	Data Time 0.001 (0.016)	Loss 2.1315 (2.1556)	Entropy 0.63081 (0.63192)	Top-1 acc 72.656 (72.850)	Top-5 acc 88.672 (89.231)	lr 0.00008
Train [116][2640/3239]	Time 0.229 (0.622)	Data Time 0.001 (0.016)	Loss 2.0881 (2.1558)	Entropy 0.63081 (0.63192)	Top-1 acc 77.344 (72.848)	Top-5 acc 89.844 (89.228)	lr 0.00007
Train [116][2650/3239]	Time 0.279 (0.621)	Data Time 0.001 (0.016)	Loss 2.0569 (2.1557)	Entropy 0.63078 (0.63191)	Top-1 acc 73.047 (72.853)	Top-5 acc 90.234 (89.230)	lr 0.00007
Train [116][2660/3239]	Time 0.296 (0.621)	Data Time 0.001 (0.016)	Loss 2.2785 (2.1558)	Entropy 0.63084 (0.63191)	Top-1 acc 68.359 (72.848)	Top-5 acc 86.328 (89.226)	lr 0.00007
Train [116][2670/3239]	Time 0.294 (0.620)	Data Time 0.001 (0.016)	Loss 2.2334 (2.1559)	Entropy 0.63083 (0.63191)	Top-1 acc 71.484 (72.844)	Top-5 acc 86.719 (89.225)	lr 0.00007
Train [116][2680/3239]	Time 0.249 (0.620)	Data Time 0.002 (0.016)	Loss 2.7114 (2.1563)	Entropy 0.63076 (0.63190)	Top-1 acc 59.766 (72.838)	Top-5 acc 83.594 (89.220)	lr 0.00007
Train [116][2690/3239]	Time 0.250 (0.619)	Data Time 0.002 (0.016)	Loss 2.1203 (2.1563)	Entropy 0.63074 (0.63190)	Top-1 acc 73.047 (72.841)	Top-5 acc 91.406 (89.221)	lr 0.00007
Train [116][2700/3239]	Time 0.262 (0.619)	Data Time 0.001 (0.016)	Loss 2.1421 (2.1563)	Entropy 0.63069 (0.63189)	Top-1 acc 74.609 (72.841)	Top-5 acc 89.844 (89.223)	lr 0.00007
Train [116][2710/3239]	Time 0.236 (0.618)	Data Time 0.001 (0.016)	Loss 2.1729 (2.1563)	Entropy 0.63063 (0.63189)	Top-1 acc 74.219 (72.844)	Top-5 acc 88.281 (89.223)	lr 0.00007
Train [116][2720/3239]	Time 0.306 (0.618)	Data Time 0.001 (0.016)	Loss 2.1129 (2.1564)	Entropy 0.63054 (0.63188)	Top-1 acc 72.266 (72.839)	Top-5 acc 87.500 (89.221)	lr 0.00007
Train [116][2730/3239]	Time 0.235 (0.617)	Data Time 0.001 (0.016)	Loss 2.2867 (2.1563)	Entropy 0.63058 (0.63188)	Top-1 acc 71.484 (72.842)	Top-5 acc 88.281 (89.224)	lr 0.00007
Train [116][2740/3239]	Time 0.268 (0.617)	Data Time 0.001 (0.016)	Loss 2.9515 (2.1567)	Entropy 0.63057 (0.63187)	Top-1 acc 51.953 (72.832)	Top-5 acc 77.344 (89.217)	lr 0.00007
Train [116][2750/3239]	Time 0.229 (0.616)	Data Time 0.001 (0.016)	Loss 2.0625 (2.1566)	Entropy 0.63050 (0.63187)	Top-1 acc 77.344 (72.835)	Top-5 acc 90.234 (89.216)	lr 0.00007
Train [116][2760/3239]	Time 0.298 (0.616)	Data Time 0.001 (0.016)	Loss 2.1977 (2.1566)	Entropy 0.63053 (0.63186)	Top-1 acc 70.703 (72.836)	Top-5 acc 87.891 (89.219)	lr 0.00007
Train [116][2770/3239]	Time 0.455 (0.634)	Data Time 0.004 (0.016)	Loss 2.1587 (2.1571)	Entropy 0.63053 (0.63186)	Top-1 acc 76.172 (72.829)	Top-5 acc 87.891 (89.211)	lr 0.00007
Train [116][2780/3239]	Time 0.242 (0.634)	Data Time 0.004 (0.015)	Loss 2.1865 (2.1571)	Entropy 0.63055 (0.63185)	Top-1 acc 71.875 (72.826)	Top-5 acc 89.062 (89.214)	lr 0.00007
Train [116][2790/3239]	Time 0.255 (0.633)	Data Time 0.002 (0.015)	Loss 2.0356 (2.1572)	Entropy 0.63058 (0.63185)	Top-1 acc 75.000 (72.824)	Top-5 acc 92.578 (89.214)	lr 0.00007
Train [116][2800/3239]	Time 0.239 (0.632)	Data Time 0.002 (0.015)	Loss 2.1525 (2.1571)	Entropy 0.63055 (0.63185)	Top-1 acc 75.391 (72.824)	Top-5 acc 89.062 (89.216)	lr 0.00007
Train [116][2810/3239]	Time 0.221 (0.632)	Data Time 0.001 (0.015)	Loss 2.1671 (2.1571)	Entropy 0.63048 (0.63184)	Top-1 acc 73.047 (72.823)	Top-5 acc 88.672 (89.215)	lr 0.00007
Train [116][2820/3239]	Time 0.270 (0.631)	Data Time 0.001 (0.015)	Loss 2.2109 (2.1573)	Entropy 0.63046 (0.63184)	Top-1 acc 69.531 (72.818)	Top-5 acc 87.891 (89.212)	lr 0.00007
Train [116][2830/3239]	Time 0.263 (0.631)	Data Time 0.001 (0.015)	Loss 2.2604 (2.1573)	Entropy 0.63043 (0.63183)	Top-1 acc 69.922 (72.816)	Top-5 acc 87.891 (89.211)	lr 0.00007
Train [116][2840/3239]	Time 0.234 (0.630)	Data Time 0.001 (0.015)	Loss 2.1723 (2.1573)	Entropy 0.63038 (0.63183)	Top-1 acc 70.312 (72.816)	Top-5 acc 88.672 (89.209)	lr 0.00007
Train [116][2850/3239]	Time 0.236 (0.630)	Data Time 0.001 (0.015)	Loss 2.1833 (2.1571)	Entropy 0.63149 (0.63182)	Top-1 acc 73.047 (72.819)	Top-5 acc 89.453 (89.212)	lr 0.00007
Train [116][2860/3239]	Time 0.228 (0.629)	Data Time 0.001 (0.015)	Loss 2.3546 (2.1570)	Entropy 0.63145 (0.63182)	Top-1 acc 72.266 (72.825)	Top-5 acc 83.984 (89.215)	lr 0.00007
Train [116][2870/3239]	Time 0.227 (0.629)	Data Time 0.001 (0.015)	Loss 2.0741 (2.1569)	Entropy 0.63140 (0.63182)	Top-1 acc 73.047 (72.824)	Top-5 acc 91.406 (89.216)	lr 0.00007
Train [116][2880/3239]	Time 0.238 (0.628)	Data Time 0.001 (0.015)	Loss 2.1074 (2.1569)	Entropy 0.63136 (0.63182)	Top-1 acc 73.047 (72.822)	Top-5 acc 91.406 (89.216)	lr 0.00007
Train [116][2890/3239]	Time 0.222 (0.628)	Data Time 0.001 (0.015)	Loss 2.0602 (2.1570)	Entropy 0.63105 (0.63182)	Top-1 acc 76.172 (72.820)	Top-5 acc 91.797 (89.216)	lr 0.00007
Train [116][2900/3239]	Time 0.263 (0.627)	Data Time 0.001 (0.015)	Loss 2.1071 (2.1569)	Entropy 0.63103 (0.63181)	Top-1 acc 75.391 (72.822)	Top-5 acc 91.016 (89.216)	lr 0.00007
Train [116][2910/3239]	Time 0.237 (0.627)	Data Time 0.001 (0.015)	Loss 2.1695 (2.1570)	Entropy 0.63104 (0.63181)	Top-1 acc 70.312 (72.819)	Top-5 acc 87.500 (89.215)	lr 0.00007
Train [116][2920/3239]	Time 0.269 (0.626)	Data Time 0.001 (0.015)	Loss 2.1973 (2.1571)	Entropy 0.63099 (0.63181)	Top-1 acc 72.656 (72.816)	Top-5 acc 88.672 (89.215)	lr 0.00007
Train [116][2930/3239]	Time 0.227 (0.626)	Data Time 0.001 (0.015)	Loss 2.1483 (2.1572)	Entropy 0.63090 (0.63181)	Top-1 acc 69.141 (72.810)	Top-5 acc 90.234 (89.214)	lr 0.00007
Train [116][2940/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.015)	Loss 2.0578 (2.1570)	Entropy 0.63086 (0.63180)	Top-1 acc 76.172 (72.812)	Top-5 acc 92.578 (89.217)	lr 0.00007
Train [116][2950/3239]	Time 0.226 (0.625)	Data Time 0.001 (0.015)	Loss 2.2142 (2.1570)	Entropy 0.63081 (0.63180)	Top-1 acc 69.141 (72.813)	Top-5 acc 85.547 (89.215)	lr 0.00007
Train [116][2960/3239]	Time 0.244 (0.624)	Data Time 0.001 (0.015)	Loss 2.0565 (2.1569)	Entropy 0.63084 (0.63180)	Top-1 acc 73.047 (72.814)	Top-5 acc 91.797 (89.215)	lr 0.00007
Train [116][2970/3239]	Time 0.449 (0.624)	Data Time 0.001 (0.015)	Loss 2.1871 (2.1569)	Entropy 0.63082 (0.63179)	Top-1 acc 72.266 (72.815)	Top-5 acc 87.891 (89.216)	lr 0.00007
Train [116][2980/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.015)	Loss 2.0604 (2.1568)	Entropy 0.63064 (0.63179)	Top-1 acc 73.828 (72.818)	Top-5 acc 91.406 (89.220)	lr 0.00007
Train [116][2990/3239]	Time 0.281 (0.623)	Data Time 0.001 (0.015)	Loss 2.1316 (2.1569)	Entropy 0.63064 (0.63179)	Top-1 acc 77.734 (72.819)	Top-5 acc 86.719 (89.219)	lr 0.00007
Train [116][3000/3239]	Time 0.230 (0.622)	Data Time 0.001 (0.014)	Loss 2.0012 (2.1566)	Entropy 0.63056 (0.63178)	Top-1 acc 78.516 (72.827)	Top-5 acc 93.359 (89.221)	lr 0.00007
Train [116][3010/3239]	Time 0.349 (0.622)	Data Time 0.001 (0.014)	Loss 2.2700 (2.1566)	Entropy 0.63056 (0.63178)	Top-1 acc 70.703 (72.828)	Top-5 acc 89.062 (89.222)	lr 0.00007
Train [116][3020/3239]	Time 0.238 (0.622)	Data Time 0.001 (0.014)	Loss 2.2605 (2.1566)	Entropy 0.63045 (0.63177)	Top-1 acc 73.438 (72.830)	Top-5 acc 86.719 (89.222)	lr 0.00007
Train [116][3030/3239]	Time 0.235 (0.621)	Data Time 0.001 (0.014)	Loss 1.9884 (2.1565)	Entropy 0.63045 (0.63177)	Top-1 acc 78.516 (72.832)	Top-5 acc 92.188 (89.224)	lr 0.00007
Train [116][3040/3239]	Time 0.231 (0.621)	Data Time 0.001 (0.014)	Loss 2.1685 (2.1566)	Entropy 0.63040 (0.63176)	Top-1 acc 73.828 (72.830)	Top-5 acc 89.453 (89.224)	lr 0.00007
Train [116][3050/3239]	Time 0.370 (0.620)	Data Time 0.001 (0.014)	Loss 2.1663 (2.1565)	Entropy 0.63037 (0.63176)	Top-1 acc 75.000 (72.833)	Top-5 acc 87.891 (89.224)	lr 0.00007
Train [116][3060/3239]	Time 0.283 (0.620)	Data Time 0.001 (0.014)	Loss 2.2185 (2.1564)	Entropy 0.63034 (0.63176)	Top-1 acc 72.266 (72.836)	Top-5 acc 88.672 (89.227)	lr 0.00007
Train [116][3070/3239]	Time 0.242 (0.619)	Data Time 0.001 (0.014)	Loss 2.1526 (2.1565)	Entropy 0.63030 (0.63175)	Top-1 acc 73.047 (72.830)	Top-5 acc 89.844 (89.224)	lr 0.00007
Train [116][3080/3239]	Time 0.266 (0.619)	Data Time 0.001 (0.014)	Loss 2.0862 (2.1564)	Entropy 0.63024 (0.63175)	Top-1 acc 75.391 (72.830)	Top-5 acc 89.844 (89.225)	lr 0.00007
Train [116][3090/3239]	Time 0.350 (0.619)	Data Time 0.002 (0.014)	Loss 2.1076 (2.1564)	Entropy 0.63012 (0.63174)	Top-1 acc 73.438 (72.829)	Top-5 acc 88.281 (89.225)	lr 0.00007
Train [116][3100/3239]	Time 0.430 (0.635)	Data Time 0.004 (0.014)	Loss 2.3255 (2.1566)	Entropy 0.63001 (0.63174)	Top-1 acc 69.141 (72.827)	Top-5 acc 85.156 (89.224)	lr 0.00007
Train [116][3110/3239]	Time 0.246 (0.635)	Data Time 0.002 (0.014)	Loss 2.2442 (2.1567)	Entropy 0.62994 (0.63173)	Top-1 acc 71.484 (72.822)	Top-5 acc 87.109 (89.222)	lr 0.00007
Train [116][3120/3239]	Time 0.227 (0.634)	Data Time 0.001 (0.014)	Loss 2.0596 (2.1566)	Entropy 0.62989 (0.63172)	Top-1 acc 74.609 (72.823)	Top-5 acc 91.797 (89.223)	lr 0.00007
Train [116][3130/3239]	Time 0.365 (0.634)	Data Time 0.002 (0.014)	Loss 2.0955 (2.1567)	Entropy 0.62992 (0.63172)	Top-1 acc 69.531 (72.819)	Top-5 acc 91.406 (89.225)	lr 0.00007
Train [116][3140/3239]	Time 0.268 (0.633)	Data Time 0.001 (0.014)	Loss 2.2205 (2.1566)	Entropy 0.62988 (0.63171)	Top-1 acc 68.750 (72.819)	Top-5 acc 87.109 (89.224)	lr 0.00007
Train [116][3150/3239]	Time 0.238 (0.633)	Data Time 0.001 (0.014)	Loss 2.0179 (2.1565)	Entropy 0.62989 (0.63171)	Top-1 acc 76.172 (72.822)	Top-5 acc 91.016 (89.226)	lr 0.00007
Train [116][3160/3239]	Time 0.259 (0.632)	Data Time 0.001 (0.014)	Loss 2.3685 (2.1564)	Entropy 0.62969 (0.63170)	Top-1 acc 65.234 (72.822)	Top-5 acc 89.062 (89.230)	lr 0.00007
Train [116][3170/3239]	Time 0.340 (0.632)	Data Time 0.001 (0.014)	Loss 2.2715 (2.1564)	Entropy 0.62970 (0.63170)	Top-1 acc 72.656 (72.821)	Top-5 acc 86.328 (89.230)	lr 0.00007
Train [116][3180/3239]	Time 0.231 (0.631)	Data Time 0.000 (0.014)	Loss 2.1861 (2.1564)	Entropy 0.62970 (0.63169)	Top-1 acc 71.094 (72.821)	Top-5 acc 88.672 (89.229)	lr 0.00007
Train [116][3190/3239]	Time 0.217 (0.631)	Data Time 0.000 (0.014)	Loss 2.1594 (2.1564)	Entropy 0.62969 (0.63168)	Top-1 acc 71.094 (72.819)	Top-5 acc 90.625 (89.233)	lr 0.00007
Train [116][3200/3239]	Time 0.231 (0.630)	Data Time 0.000 (0.014)	Loss 2.1921 (2.1563)	Entropy 0.62963 (0.63168)	Top-1 acc 70.312 (72.822)	Top-5 acc 88.281 (89.235)	lr 0.00007
Train [116][3210/3239]	Time 0.228 (0.630)	Data Time 0.000 (0.014)	Loss 2.2697 (2.1563)	Entropy 0.62957 (0.63167)	Top-1 acc 68.750 (72.821)	Top-5 acc 88.281 (89.235)	lr 0.00007
Train [116][3220/3239]	Time 0.239 (0.629)	Data Time 0.000 (0.014)	Loss 2.1133 (2.1563)	Entropy 0.62951 (0.63166)	Top-1 acc 75.391 (72.824)	Top-5 acc 90.625 (89.236)	lr 0.00007
Train [116][3230/3239]	Time 0.236 (0.629)	Data Time 0.000 (0.014)	Loss 2.1478 (2.1564)	Entropy 0.62951 (0.63166)	Top-1 acc 75.000 (72.822)	Top-5 acc 86.719 (89.235)	lr 0.00007
Train [116][3239/3239]	Time 2.354 (0.628)	Data Time 0.000 (0.014)	Loss 2.3036 (2.1563)	Entropy 0.62951 (0.63165)	Top-1 acc 70.370 (72.826)	Top-5 acc 83.951 (89.235)	lr 0.00007
==========Valid [116/120]	loss 1.196	top-1 acc 72.700 (72.745)	top-5 acc 90.024	Train top-1 72.826	top-5 89.235	Entropy 0.62951	Latency-None: 0.000ms	Flops: 544.27M
Train [117][0/3239]	Time 42.541 (42.541)	Data Time 39.388 (39.388)	Loss 2.1358 (2.1358)	Entropy 0.62925 (0.62925)	Top-1 acc 77.344 (77.344)	Top-5 acc 88.672 (88.672)	lr 0.00007
Train [117][10/3239]	Time 2.657 (4.565)	Data Time 0.002 (3.748)	Loss 2.1657 (2.1201)	Entropy 0.62925 (0.62925)	Top-1 acc 71.875 (74.467)	Top-5 acc 89.844 (89.311)	lr 0.00007
Train [117][20/3239]	Time 0.341 (2.512)	Data Time 0.001 (1.964)	Loss 2.1487 (2.1493)	Entropy 0.62919 (0.62922)	Top-1 acc 73.438 (73.289)	Top-5 acc 89.844 (89.230)	lr 0.00007
Train [117][30/3239]	Time 0.220 (1.858)	Data Time 0.001 (1.331)	Loss 2.2294 (2.1544)	Entropy 0.62910 (0.62918)	Top-1 acc 71.094 (73.059)	Top-5 acc 87.891 (89.138)	lr 0.00007
Train [117][40/3239]	Time 0.244 (1.526)	Data Time 0.001 (1.007)	Loss 2.0305 (2.1529)	Entropy 0.62904 (0.62915)	Top-1 acc 78.125 (73.161)	Top-5 acc 89.844 (89.015)	lr 0.00007
Train [117][50/3239]	Time 0.239 (1.323)	Data Time 0.001 (0.810)	Loss 2.1988 (2.1436)	Entropy 0.62900 (0.62912)	Top-1 acc 72.656 (73.568)	Top-5 acc 86.719 (89.239)	lr 0.00007
Train [117][60/3239]	Time 0.372 (1.188)	Data Time 0.001 (0.677)	Loss 2.0587 (2.1355)	Entropy 0.62886 (0.62909)	Top-1 acc 74.219 (73.809)	Top-5 acc 90.625 (89.389)	lr 0.00007
Train [117][70/3239]	Time 0.246 (1.091)	Data Time 0.001 (0.582)	Loss 2.0354 (2.1309)	Entropy 0.62878 (0.62905)	Top-1 acc 75.391 (73.773)	Top-5 acc 92.578 (89.508)	lr 0.00007
Train [117][80/3239]	Time 0.235 (1.016)	Data Time 0.001 (0.510)	Loss 2.1966 (2.1361)	Entropy 0.62878 (0.62902)	Top-1 acc 74.609 (73.703)	Top-5 acc 87.500 (89.419)	lr 0.00007
Train [117][90/3239]	Time 0.225 (0.957)	Data Time 0.001 (0.454)	Loss 2.0862 (2.1362)	Entropy 0.62874 (0.62899)	Top-1 acc 73.047 (73.635)	Top-5 acc 87.500 (89.479)	lr 0.00007
Train [117][100/3239]	Time 0.323 (0.912)	Data Time 0.001 (0.410)	Loss 2.1251 (2.1376)	Entropy 0.62876 (0.62897)	Top-1 acc 74.609 (73.523)	Top-5 acc 89.844 (89.469)	lr 0.00007
Train [117][110/3239]	Time 0.223 (0.873)	Data Time 0.001 (0.373)	Loss 2.0447 (2.1381)	Entropy 0.62878 (0.62895)	Top-1 acc 75.000 (73.459)	Top-5 acc 91.406 (89.432)	lr 0.00007
Train [117][120/3239]	Time 2.657 (0.841)	Data Time 0.001 (0.342)	Loss 1.9997 (2.1356)	Entropy 0.62878 (0.62893)	Top-1 acc 76.953 (73.586)	Top-5 acc 92.578 (89.514)	lr 0.00007
Train [117][130/3239]	Time 0.239 (0.795)	Data Time 0.002 (0.316)	Loss 2.0084 (2.1364)	Entropy 0.62877 (0.62892)	Top-1 acc 78.906 (73.509)	Top-5 acc 91.016 (89.510)	lr 0.00007
Train [117][140/3239]	Time 0.235 (0.773)	Data Time 0.001 (0.294)	Loss 2.1326 (2.1383)	Entropy 0.62871 (0.62891)	Top-1 acc 75.000 (73.438)	Top-5 acc 89.062 (89.431)	lr 0.00007
Train [117][150/3239]	Time 0.234 (0.755)	Data Time 0.001 (0.274)	Loss 2.0543 (2.1357)	Entropy 0.62868 (0.62889)	Top-1 acc 76.172 (73.479)	Top-5 acc 89.453 (89.471)	lr 0.00007
Train [117][160/3239]	Time 0.239 (0.738)	Data Time 0.001 (0.258)	Loss 2.1717 (2.1374)	Entropy 0.62875 (0.62888)	Top-1 acc 74.219 (73.438)	Top-5 acc 87.891 (89.458)	lr 0.00007
Train [117][170/3239]	Time 0.240 (0.723)	Data Time 0.001 (0.243)	Loss 2.3103 (2.1394)	Entropy 0.62871 (0.62887)	Top-1 acc 68.750 (73.412)	Top-5 acc 88.672 (89.433)	lr 0.00007
Train [117][180/3239]	Time 0.230 (0.710)	Data Time 0.001 (0.229)	Loss 2.2870 (2.1418)	Entropy 0.62871 (0.62886)	Top-1 acc 68.359 (73.371)	Top-5 acc 87.891 (89.378)	lr 0.00007
Train [117][190/3239]	Time 0.230 (0.698)	Data Time 0.001 (0.217)	Loss 2.3936 (2.1402)	Entropy 0.62876 (0.62886)	Top-1 acc 66.016 (73.366)	Top-5 acc 85.547 (89.420)	lr 0.00007
Train [117][200/3239]	Time 0.231 (0.688)	Data Time 0.002 (0.207)	Loss 2.2493 (2.1414)	Entropy 0.62873 (0.62885)	Top-1 acc 70.703 (73.317)	Top-5 acc 83.984 (89.391)	lr 0.00007
Train [117][210/3239]	Time 0.283 (0.943)	Data Time 0.002 (0.197)	Loss 2.0731 (2.1424)	Entropy 0.62879 (0.62885)	Top-1 acc 74.219 (73.284)	Top-5 acc 90.234 (89.386)	lr 0.00007
Train [117][220/3239]	Time 0.234 (0.923)	Data Time 0.002 (0.188)	Loss 2.1980 (2.1428)	Entropy 0.62881 (0.62884)	Top-1 acc 72.266 (73.238)	Top-5 acc 87.891 (89.352)	lr 0.00007
Train [117][230/3239]	Time 2.528 (0.904)	Data Time 0.002 (0.180)	Loss 2.0921 (2.1443)	Entropy 0.62881 (0.62884)	Top-1 acc 77.344 (73.201)	Top-5 acc 91.406 (89.352)	lr 0.00007
Train [117][240/3239]	Time 0.240 (0.877)	Data Time 0.001 (0.173)	Loss 2.2107 (2.1464)	Entropy 0.62879 (0.62884)	Top-1 acc 70.312 (73.121)	Top-5 acc 88.672 (89.333)	lr 0.00007
Train [117][250/3239]	Time 0.230 (0.861)	Data Time 0.001 (0.166)	Loss 2.1373 (2.1453)	Entropy 0.62878 (0.62884)	Top-1 acc 73.438 (73.132)	Top-5 acc 89.062 (89.357)	lr 0.00007
Train [117][260/3239]	Time 0.222 (0.847)	Data Time 0.001 (0.160)	Loss 2.2696 (2.1481)	Entropy 0.62878 (0.62884)	Top-1 acc 71.875 (73.092)	Top-5 acc 85.547 (89.302)	lr 0.00007
Train [117][270/3239]	Time 0.237 (0.833)	Data Time 0.001 (0.154)	Loss 2.2194 (2.1475)	Entropy 0.62879 (0.62883)	Top-1 acc 71.484 (73.123)	Top-5 acc 90.625 (89.309)	lr 0.00007
Train [117][280/3239]	Time 0.239 (0.821)	Data Time 0.001 (0.148)	Loss 2.0484 (2.1469)	Entropy 0.62876 (0.62883)	Top-1 acc 74.219 (73.121)	Top-5 acc 91.797 (89.302)	lr 0.00007
Train [117][290/3239]	Time 0.238 (0.809)	Data Time 0.001 (0.143)	Loss 2.1461 (2.1476)	Entropy 0.62874 (0.62883)	Top-1 acc 71.484 (73.075)	Top-5 acc 87.891 (89.283)	lr 0.00007
Train [117][300/3239]	Time 0.225 (0.797)	Data Time 0.001 (0.139)	Loss 2.2007 (2.1494)	Entropy 0.62872 (0.62883)	Top-1 acc 71.094 (73.047)	Top-5 acc 87.500 (89.255)	lr 0.00007
Train [117][310/3239]	Time 0.220 (0.787)	Data Time 0.001 (0.134)	Loss 2.2250 (2.1503)	Entropy 0.62863 (0.62882)	Top-1 acc 75.391 (73.051)	Top-5 acc 86.719 (89.246)	lr 0.00007
Train [117][320/3239]	Time 0.231 (0.777)	Data Time 0.001 (0.130)	Loss 2.2942 (2.1500)	Entropy 0.62864 (0.62882)	Top-1 acc 67.969 (73.058)	Top-5 acc 87.891 (89.274)	lr 0.00007
Train [117][330/3239]	Time 0.230 (0.768)	Data Time 0.001 (0.126)	Loss 2.1843 (2.1498)	Entropy 0.62859 (0.62881)	Top-1 acc 72.266 (73.057)	Top-5 acc 91.016 (89.267)	lr 0.00007
Train [117][340/3239]	Time 2.581 (0.759)	Data Time 0.001 (0.122)	Loss 2.0326 (2.1506)	Entropy 0.62859 (0.62880)	Top-1 acc 77.344 (73.045)	Top-5 acc 89.453 (89.258)	lr 0.00006
Train [117][350/3239]	Time 0.328 (0.744)	Data Time 0.001 (0.119)	Loss 2.2772 (2.1507)	Entropy 0.62861 (0.62880)	Top-1 acc 71.875 (73.042)	Top-5 acc 84.375 (89.254)	lr 0.00006
Train [117][360/3239]	Time 0.224 (0.737)	Data Time 0.001 (0.116)	Loss 2.0895 (2.1513)	Entropy 0.62853 (0.62879)	Top-1 acc 75.000 (73.031)	Top-5 acc 88.281 (89.236)	lr 0.00006
Train [117][370/3239]	Time 0.236 (0.731)	Data Time 0.001 (0.113)	Loss 2.1048 (2.1512)	Entropy 0.62849 (0.62878)	Top-1 acc 75.391 (73.043)	Top-5 acc 89.844 (89.238)	lr 0.00006
Train [117][380/3239]	Time 0.261 (0.724)	Data Time 0.001 (0.110)	Loss 2.1894 (2.1520)	Entropy 0.62846 (0.62877)	Top-1 acc 73.438 (73.045)	Top-5 acc 87.109 (89.207)	lr 0.00006
Train [117][390/3239]	Time 0.320 (0.718)	Data Time 0.001 (0.107)	Loss 2.0546 (2.1504)	Entropy 0.62845 (0.62877)	Top-1 acc 75.000 (73.091)	Top-5 acc 91.406 (89.242)	lr 0.00006
Train [117][400/3239]	Time 0.229 (0.712)	Data Time 0.001 (0.104)	Loss 2.0909 (2.1495)	Entropy 0.62846 (0.62876)	Top-1 acc 75.781 (73.112)	Top-5 acc 91.016 (89.255)	lr 0.00006
Train [117][410/3239]	Time 0.243 (0.707)	Data Time 0.007 (0.102)	Loss 2.0967 (2.1492)	Entropy 0.62849 (0.62875)	Top-1 acc 74.609 (73.118)	Top-5 acc 92.188 (89.275)	lr 0.00006
Train [117][420/3239]	Time 0.235 (0.701)	Data Time 0.002 (0.099)	Loss 2.0581 (2.1487)	Entropy 0.62855 (0.62875)	Top-1 acc 76.562 (73.131)	Top-5 acc 91.016 (89.289)	lr 0.00006
Train [117][430/3239]	Time 0.340 (0.696)	Data Time 0.001 (0.097)	Loss 2.1685 (2.1489)	Entropy 0.62852 (0.62874)	Top-1 acc 73.047 (73.129)	Top-5 acc 87.109 (89.287)	lr 0.00006
Train [117][440/3239]	Time 0.221 (0.692)	Data Time 0.001 (0.095)	Loss 2.1922 (2.1488)	Entropy 0.62852 (0.62874)	Top-1 acc 70.703 (73.135)	Top-5 acc 91.016 (89.289)	lr 0.00006
Train [117][450/3239]	Time 2.590 (0.687)	Data Time 0.001 (0.093)	Loss 2.1458 (2.1488)	Entropy 0.62852 (0.62873)	Top-1 acc 69.922 (73.138)	Top-5 acc 90.234 (89.298)	lr 0.00006
Train [117][460/3239]	Time 0.265 (0.677)	Data Time 0.001 (0.091)	Loss 2.3282 (2.1493)	Entropy 0.62846 (0.62873)	Top-1 acc 69.922 (73.143)	Top-5 acc 86.328 (89.282)	lr 0.00006
Train [117][470/3239]	Time 0.224 (0.673)	Data Time 0.001 (0.089)	Loss 2.1729 (2.1502)	Entropy 0.62836 (0.62872)	Top-1 acc 71.875 (73.123)	Top-5 acc 87.891 (89.274)	lr 0.00006
Train [117][480/3239]	Time 0.236 (0.669)	Data Time 0.001 (0.087)	Loss 2.1707 (2.1500)	Entropy 0.62819 (0.62871)	Top-1 acc 68.359 (73.114)	Top-5 acc 89.062 (89.285)	lr 0.00006
Train [117][490/3239]	Time 0.225 (0.665)	Data Time 0.001 (0.086)	Loss 2.1623 (2.1508)	Entropy 0.62816 (0.62870)	Top-1 acc 72.656 (73.090)	Top-5 acc 89.453 (89.284)	lr 0.00006
Train [117][500/3239]	Time 0.256 (0.662)	Data Time 0.001 (0.084)	Loss 2.1851 (2.1516)	Entropy 0.62812 (0.62869)	Top-1 acc 73.438 (73.064)	Top-5 acc 88.672 (89.274)	lr 0.00006
Train [117][510/3239]	Time 0.220 (0.658)	Data Time 0.001 (0.082)	Loss 2.2637 (2.1523)	Entropy 0.62811 (0.62867)	Top-1 acc 69.922 (73.034)	Top-5 acc 85.938 (89.254)	lr 0.00006
Train [117][520/3239]	Time 0.243 (0.655)	Data Time 0.001 (0.081)	Loss 2.1609 (2.1523)	Entropy 0.62806 (0.62866)	Top-1 acc 75.000 (73.032)	Top-5 acc 88.672 (89.248)	lr 0.00006
Train [117][530/3239]	Time 0.269 (0.652)	Data Time 0.001 (0.079)	Loss 2.0743 (2.1528)	Entropy 0.62799 (0.62865)	Top-1 acc 75.000 (73.019)	Top-5 acc 90.234 (89.243)	lr 0.00006
Train [117][540/3239]	Time 0.230 (0.649)	Data Time 0.001 (0.078)	Loss 2.1412 (2.1532)	Entropy 0.62801 (0.62864)	Top-1 acc 73.047 (73.011)	Top-5 acc 88.672 (89.242)	lr 0.00006
Train [117][550/3239]	Time 0.260 (0.645)	Data Time 0.001 (0.076)	Loss 2.1699 (2.1533)	Entropy 0.62803 (0.62863)	Top-1 acc 69.922 (73.005)	Top-5 acc 91.016 (89.243)	lr 0.00006
Train [117][560/3239]	Time 2.597 (0.643)	Data Time 0.001 (0.075)	Loss 2.1760 (2.1530)	Entropy 0.62803 (0.62862)	Top-1 acc 71.484 (73.009)	Top-5 acc 88.672 (89.248)	lr 0.00006
Train [117][570/3239]	Time 0.247 (0.636)	Data Time 0.001 (0.074)	Loss 2.0564 (2.1523)	Entropy 0.62811 (0.62861)	Top-1 acc 73.828 (73.023)	Top-5 acc 91.016 (89.260)	lr 0.00006
Train [117][580/3239]	Time 0.231 (0.727)	Data Time 0.002 (0.073)	Loss 2.1658 (2.1528)	Entropy 0.62806 (0.62860)	Top-1 acc 73.438 (73.008)	Top-5 acc 88.281 (89.241)	lr 0.00006
Train [117][590/3239]	Time 0.237 (0.723)	Data Time 0.002 (0.071)	Loss 2.1194 (2.1524)	Entropy 0.62802 (0.62859)	Top-1 acc 73.047 (73.016)	Top-5 acc 90.625 (89.240)	lr 0.00006
Train [117][600/3239]	Time 0.234 (0.719)	Data Time 0.001 (0.070)	Loss 2.1826 (2.1528)	Entropy 0.62799 (0.62858)	Top-1 acc 70.703 (73.007)	Top-5 acc 89.844 (89.240)	lr 0.00006
Train [117][610/3239]	Time 0.234 (0.715)	Data Time 0.001 (0.069)	Loss 2.2377 (2.1529)	Entropy 0.62789 (0.62857)	Top-1 acc 68.359 (73.005)	Top-5 acc 87.891 (89.238)	lr 0.00006
Train [117][620/3239]	Time 0.238 (0.711)	Data Time 0.001 (0.068)	Loss 2.1021 (2.1530)	Entropy 0.62798 (0.62856)	Top-1 acc 74.219 (72.992)	Top-5 acc 91.797 (89.238)	lr 0.00006
Train [117][630/3239]	Time 0.242 (0.707)	Data Time 0.001 (0.067)	Loss 2.1500 (2.1531)	Entropy 0.62792 (0.62855)	Top-1 acc 71.484 (72.986)	Top-5 acc 91.016 (89.246)	lr 0.00006
Train [117][640/3239]	Time 0.359 (0.704)	Data Time 0.001 (0.066)	Loss 2.2366 (2.1535)	Entropy 0.62807 (0.62854)	Top-1 acc 70.703 (72.970)	Top-5 acc 87.109 (89.237)	lr 0.00006
Train [117][650/3239]	Time 0.221 (0.701)	Data Time 0.001 (0.065)	Loss 2.0936 (2.1531)	Entropy 0.62806 (0.62853)	Top-1 acc 76.562 (72.981)	Top-5 acc 89.844 (89.242)	lr 0.00006
Train [117][660/3239]	Time 0.289 (0.698)	Data Time 0.001 (0.064)	Loss 2.2424 (2.1535)	Entropy 0.62803 (0.62853)	Top-1 acc 69.922 (72.968)	Top-5 acc 89.062 (89.239)	lr 0.00006
Train [117][670/3239]	Time 2.611 (0.695)	Data Time 0.001 (0.063)	Loss 2.0899 (2.1529)	Entropy 0.62803 (0.62852)	Top-1 acc 75.000 (73.000)	Top-5 acc 89.062 (89.253)	lr 0.00006
Train [117][680/3239]	Time 0.320 (0.688)	Data Time 0.001 (0.062)	Loss 2.2303 (2.1527)	Entropy 0.62801 (0.62851)	Top-1 acc 70.312 (73.010)	Top-5 acc 87.891 (89.259)	lr 0.00006
Train [117][690/3239]	Time 0.245 (0.685)	Data Time 0.001 (0.061)	Loss 2.1504 (2.1525)	Entropy 0.62805 (0.62850)	Top-1 acc 72.656 (73.024)	Top-5 acc 87.500 (89.264)	lr 0.00006
Train [117][700/3239]	Time 0.237 (0.682)	Data Time 0.001 (0.060)	Loss 2.1621 (2.1521)	Entropy 0.62809 (0.62850)	Top-1 acc 71.484 (73.024)	Top-5 acc 89.062 (89.270)	lr 0.00006
Train [117][710/3239]	Time 0.206 (0.679)	Data Time 0.001 (0.060)	Loss 2.1533 (2.1528)	Entropy 0.62809 (0.62849)	Top-1 acc 72.266 (73.006)	Top-5 acc 87.891 (89.255)	lr 0.00006
Train [117][720/3239]	Time 0.364 (0.677)	Data Time 0.002 (0.059)	Loss 2.2910 (2.1523)	Entropy 0.62809 (0.62849)	Top-1 acc 73.828 (73.034)	Top-5 acc 87.109 (89.264)	lr 0.00006
Train [117][730/3239]	Time 0.224 (0.674)	Data Time 0.001 (0.058)	Loss 2.2826 (2.1526)	Entropy 0.62801 (0.62848)	Top-1 acc 67.969 (73.027)	Top-5 acc 87.891 (89.259)	lr 0.00006
Train [117][740/3239]	Time 0.221 (0.671)	Data Time 0.001 (0.057)	Loss 2.0662 (2.1523)	Entropy 0.62796 (0.62847)	Top-1 acc 74.609 (73.043)	Top-5 acc 91.797 (89.263)	lr 0.00006
Train [117][750/3239]	Time 0.234 (0.669)	Data Time 0.001 (0.056)	Loss 2.0863 (2.1528)	Entropy 0.62794 (0.62847)	Top-1 acc 75.781 (73.037)	Top-5 acc 91.797 (89.263)	lr 0.00006
Train [117][760/3239]	Time 0.348 (0.667)	Data Time 0.002 (0.056)	Loss 2.2161 (2.1518)	Entropy 0.62793 (0.62846)	Top-1 acc 71.484 (73.057)	Top-5 acc 86.719 (89.279)	lr 0.00006
Train [117][770/3239]	Time 0.219 (0.664)	Data Time 0.001 (0.055)	Loss 2.1843 (2.1516)	Entropy 0.62794 (0.62845)	Top-1 acc 68.750 (73.058)	Top-5 acc 87.500 (89.282)	lr 0.00006
Train [117][780/3239]	Time 2.582 (0.662)	Data Time 0.002 (0.054)	Loss 2.1159 (2.1520)	Entropy 0.62794 (0.62845)	Top-1 acc 73.828 (73.047)	Top-5 acc 89.453 (89.279)	lr 0.00006
Train [117][790/3239]	Time 0.237 (0.657)	Data Time 0.001 (0.054)	Loss 2.0909 (2.1519)	Entropy 0.62786 (0.62844)	Top-1 acc 75.000 (73.036)	Top-5 acc 89.844 (89.278)	lr 0.00006
Train [117][800/3239]	Time 0.254 (0.654)	Data Time 0.001 (0.053)	Loss 2.1096 (2.1523)	Entropy 0.62778 (0.62843)	Top-1 acc 74.219 (73.012)	Top-5 acc 88.672 (89.270)	lr 0.00006
Train [117][810/3239]	Time 0.243 (0.652)	Data Time 0.001 (0.052)	Loss 2.0980 (2.1528)	Entropy 0.62780 (0.62842)	Top-1 acc 71.484 (72.985)	Top-5 acc 89.062 (89.256)	lr 0.00006
Train [117][820/3239]	Time 0.226 (0.650)	Data Time 0.001 (0.052)	Loss 2.2077 (2.1527)	Entropy 0.62778 (0.62842)	Top-1 acc 72.656 (72.987)	Top-5 acc 89.453 (89.257)	lr 0.00006
Train [117][830/3239]	Time 0.226 (0.648)	Data Time 0.001 (0.051)	Loss 2.0151 (2.1525)	Entropy 0.62777 (0.62841)	Top-1 acc 77.734 (72.993)	Top-5 acc 91.797 (89.257)	lr 0.00006
Train [117][840/3239]	Time 0.233 (0.646)	Data Time 0.001 (0.051)	Loss 2.1507 (2.1524)	Entropy 0.62768 (0.62840)	Top-1 acc 73.438 (72.999)	Top-5 acc 91.016 (89.262)	lr 0.00006
Train [117][850/3239]	Time 0.228 (0.645)	Data Time 0.001 (0.050)	Loss 2.2106 (2.1522)	Entropy 0.62766 (0.62839)	Top-1 acc 72.266 (73.012)	Top-5 acc 87.891 (89.265)	lr 0.00006
Train [117][860/3239]	Time 0.231 (0.643)	Data Time 0.001 (0.049)	Loss 2.2792 (2.1524)	Entropy 0.62759 (0.62838)	Top-1 acc 69.141 (73.005)	Top-5 acc 87.109 (89.261)	lr 0.00006
Train [117][870/3239]	Time 0.245 (0.641)	Data Time 0.001 (0.049)	Loss 2.2204 (2.1523)	Entropy 0.62751 (0.62837)	Top-1 acc 71.094 (73.020)	Top-5 acc 87.109 (89.260)	lr 0.00006
Train [117][880/3239]	Time 0.254 (0.639)	Data Time 0.001 (0.048)	Loss 2.0266 (2.1523)	Entropy 0.62744 (0.62836)	Top-1 acc 77.344 (73.026)	Top-5 acc 89.453 (89.262)	lr 0.00006
Train [117][890/3239]	Time 2.644 (0.638)	Data Time 0.001 (0.048)	Loss 2.1686 (2.1523)	Entropy 0.62744 (0.62835)	Top-1 acc 71.875 (73.034)	Top-5 acc 90.234 (89.261)	lr 0.00006
Train [117][900/3239]	Time 0.230 (0.633)	Data Time 0.001 (0.047)	Loss 2.1451 (2.1523)	Entropy 0.62736 (0.62834)	Top-1 acc 70.703 (73.018)	Top-5 acc 89.844 (89.258)	lr 0.00006
Train [117][910/3239]	Time 0.246 (0.632)	Data Time 0.001 (0.047)	Loss 2.0414 (2.1522)	Entropy 0.62729 (0.62833)	Top-1 acc 77.344 (73.018)	Top-5 acc 89.844 (89.259)	lr 0.00006
Train [117][920/3239]	Time 0.252 (0.630)	Data Time 0.001 (0.046)	Loss 2.0526 (2.1523)	Entropy 0.62729 (0.62832)	Top-1 acc 76.172 (73.019)	Top-5 acc 92.188 (89.253)	lr 0.00006
Train [117][930/3239]	Time 0.325 (0.628)	Data Time 0.001 (0.046)	Loss 2.2346 (2.1523)	Entropy 0.62727 (0.62831)	Top-1 acc 72.656 (73.020)	Top-5 acc 87.891 (89.255)	lr 0.00006
Train [117][940/3239]	Time 0.315 (0.681)	Data Time 0.006 (0.045)	Loss 2.2726 (2.1523)	Entropy 0.62723 (0.62830)	Top-1 acc 72.266 (73.026)	Top-5 acc 85.938 (89.255)	lr 0.00006
Train [117][950/3239]	Time 0.213 (0.680)	Data Time 0.002 (0.045)	Loss 2.2200 (2.1523)	Entropy 0.62717 (0.62829)	Top-1 acc 69.141 (73.012)	Top-5 acc 89.844 (89.257)	lr 0.00006
Train [117][960/3239]	Time 0.232 (0.678)	Data Time 0.002 (0.045)	Loss 2.1782 (2.1519)	Entropy 0.62717 (0.62827)	Top-1 acc 68.750 (73.018)	Top-5 acc 89.062 (89.267)	lr 0.00006
Train [117][970/3239]	Time 0.336 (0.676)	Data Time 0.001 (0.044)	Loss 2.0673 (2.1518)	Entropy 0.62717 (0.62826)	Top-1 acc 75.000 (73.017)	Top-5 acc 89.453 (89.272)	lr 0.00006
Train [117][980/3239]	Time 0.237 (0.674)	Data Time 0.001 (0.044)	Loss 2.0573 (2.1514)	Entropy 0.62717 (0.62825)	Top-1 acc 73.828 (73.018)	Top-5 acc 91.406 (89.279)	lr 0.00006
Train [117][990/3239]	Time 0.278 (0.672)	Data Time 0.001 (0.043)	Loss 1.9602 (2.1514)	Entropy 0.62687 (0.62824)	Top-1 acc 79.297 (73.022)	Top-5 acc 94.141 (89.280)	lr 0.00006
Train [117][1000/3239]	Time 2.545 (0.670)	Data Time 0.001 (0.043)	Loss 1.9945 (2.1513)	Entropy 0.62687 (0.62823)	Top-1 acc 78.516 (73.024)	Top-5 acc 92.578 (89.280)	lr 0.00006
Train [117][1010/3239]	Time 0.254 (0.666)	Data Time 0.003 (0.042)	Loss 2.2027 (2.1513)	Entropy 0.62689 (0.62821)	Top-1 acc 70.703 (73.012)	Top-5 acc 90.625 (89.285)	lr 0.00006
Train [117][1020/3239]	Time 0.234 (0.664)	Data Time 0.001 (0.042)	Loss 2.0966 (2.1517)	Entropy 0.62687 (0.62820)	Top-1 acc 74.219 (73.007)	Top-5 acc 89.062 (89.278)	lr 0.00006
Train [117][1030/3239]	Time 0.232 (0.662)	Data Time 0.002 (0.042)	Loss 2.2019 (2.1520)	Entropy 0.62689 (0.62819)	Top-1 acc 72.656 (72.999)	Top-5 acc 87.891 (89.271)	lr 0.00006
Train [117][1040/3239]	Time 0.232 (0.660)	Data Time 0.001 (0.041)	Loss 2.1533 (2.1521)	Entropy 0.62692 (0.62818)	Top-1 acc 72.656 (72.994)	Top-5 acc 89.062 (89.274)	lr 0.00006
Train [117][1050/3239]	Time 0.233 (0.659)	Data Time 0.001 (0.041)	Loss 2.0491 (2.1517)	Entropy 0.62681 (0.62816)	Top-1 acc 75.000 (73.002)	Top-5 acc 90.625 (89.280)	lr 0.00006
Train [117][1060/3239]	Time 0.231 (0.657)	Data Time 0.001 (0.040)	Loss 2.2703 (2.1518)	Entropy 0.62674 (0.62815)	Top-1 acc 69.922 (73.008)	Top-5 acc 85.547 (89.275)	lr 0.00006
Train [117][1070/3239]	Time 0.299 (0.656)	Data Time 0.001 (0.040)	Loss 2.0053 (2.1518)	Entropy 0.62671 (0.62814)	Top-1 acc 76.562 (73.004)	Top-5 acc 93.359 (89.273)	lr 0.00006
Train [117][1080/3239]	Time 0.236 (0.654)	Data Time 0.001 (0.040)	Loss 1.9643 (2.1515)	Entropy 0.62672 (0.62812)	Top-1 acc 78.906 (73.012)	Top-5 acc 92.188 (89.278)	lr 0.00006
Train [117][1090/3239]	Time 0.240 (0.652)	Data Time 0.001 (0.039)	Loss 2.1706 (2.1512)	Entropy 0.62669 (0.62811)	Top-1 acc 71.484 (73.022)	Top-5 acc 89.844 (89.279)	lr 0.00006
Train [117][1100/3239]	Time 0.230 (0.651)	Data Time 0.001 (0.039)	Loss 2.1368 (2.1512)	Entropy 0.62667 (0.62810)	Top-1 acc 73.438 (73.022)	Top-5 acc 88.281 (89.279)	lr 0.00006
Train [117][1110/3239]	Time 2.561 (0.649)	Data Time 0.001 (0.039)	Loss 2.1266 (2.1513)	Entropy 0.62667 (0.62808)	Top-1 acc 75.000 (73.022)	Top-5 acc 88.281 (89.275)	lr 0.00006
Train [117][1120/3239]	Time 0.239 (0.646)	Data Time 0.001 (0.038)	Loss 2.1677 (2.1515)	Entropy 0.62659 (0.62807)	Top-1 acc 69.922 (73.006)	Top-5 acc 90.625 (89.274)	lr 0.00006
Train [117][1130/3239]	Time 0.220 (0.644)	Data Time 0.001 (0.038)	Loss 2.1173 (2.1518)	Entropy 0.62658 (0.62806)	Top-1 acc 74.219 (73.003)	Top-5 acc 89.453 (89.271)	lr 0.00006
Train [117][1140/3239]	Time 0.244 (0.643)	Data Time 0.001 (0.038)	Loss 2.1243 (2.1517)	Entropy 0.62660 (0.62804)	Top-1 acc 75.391 (73.002)	Top-5 acc 87.109 (89.273)	lr 0.00006
Train [117][1150/3239]	Time 0.230 (0.642)	Data Time 0.001 (0.037)	Loss 2.3212 (2.1523)	Entropy 0.62654 (0.62803)	Top-1 acc 67.578 (72.985)	Top-5 acc 87.109 (89.266)	lr 0.00006
Train [117][1160/3239]	Time 0.240 (0.640)	Data Time 0.001 (0.037)	Loss 2.1300 (2.1521)	Entropy 0.62654 (0.62802)	Top-1 acc 70.312 (72.990)	Top-5 acc 90.234 (89.272)	lr 0.00006
Train [117][1170/3239]	Time 0.221 (0.639)	Data Time 0.001 (0.037)	Loss 2.2077 (2.1523)	Entropy 0.62654 (0.62801)	Top-1 acc 71.484 (72.980)	Top-5 acc 86.328 (89.261)	lr 0.00006
Train [117][1180/3239]	Time 0.313 (0.638)	Data Time 0.001 (0.036)	Loss 2.1866 (2.1519)	Entropy 0.62656 (0.62799)	Top-1 acc 68.750 (72.980)	Top-5 acc 89.844 (89.277)	lr 0.00006
Train [117][1190/3239]	Time 0.246 (0.636)	Data Time 0.001 (0.036)	Loss 2.2160 (2.1522)	Entropy 0.62658 (0.62798)	Top-1 acc 68.359 (72.973)	Top-5 acc 88.672 (89.270)	lr 0.00006
Train [117][1200/3239]	Time 0.226 (0.635)	Data Time 0.001 (0.036)	Loss 2.2651 (2.1523)	Entropy 0.62656 (0.62797)	Top-1 acc 70.312 (72.972)	Top-5 acc 88.281 (89.272)	lr 0.00006
Train [117][1210/3239]	Time 0.229 (0.634)	Data Time 0.001 (0.036)	Loss 2.1618 (2.1522)	Entropy 0.62658 (0.62796)	Top-1 acc 71.094 (72.974)	Top-5 acc 88.672 (89.278)	lr 0.00006
Train [117][1220/3239]	Time 2.653 (0.632)	Data Time 0.001 (0.035)	Loss 2.1000 (2.1522)	Entropy 0.62658 (0.62795)	Top-1 acc 74.609 (72.976)	Top-5 acc 90.234 (89.276)	lr 0.00006
Train [117][1230/3239]	Time 0.236 (0.629)	Data Time 0.001 (0.035)	Loss 2.1803 (2.1523)	Entropy 0.62660 (0.62794)	Top-1 acc 75.000 (72.977)	Top-5 acc 89.453 (89.277)	lr 0.00006
Train [117][1240/3239]	Time 0.219 (0.628)	Data Time 0.001 (0.035)	Loss 2.2446 (2.1519)	Entropy 0.62655 (0.62793)	Top-1 acc 72.266 (72.989)	Top-5 acc 87.891 (89.287)	lr 0.00006
Train [117][1250/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.035)	Loss 2.2331 (2.1523)	Entropy 0.62654 (0.62791)	Top-1 acc 72.266 (72.976)	Top-5 acc 88.672 (89.280)	lr 0.00006
Train [117][1260/3239]	Time 0.267 (0.626)	Data Time 0.001 (0.034)	Loss 2.0583 (2.1522)	Entropy 0.62657 (0.62790)	Top-1 acc 74.219 (72.974)	Top-5 acc 91.406 (89.277)	lr 0.00006
Train [117][1270/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.034)	Loss 2.1205 (2.1523)	Entropy 0.62653 (0.62789)	Top-1 acc 73.047 (72.976)	Top-5 acc 91.016 (89.269)	lr 0.00006
Train [117][1280/3239]	Time 0.227 (0.623)	Data Time 0.001 (0.034)	Loss 2.0910 (2.1523)	Entropy 0.62648 (0.62788)	Top-1 acc 73.828 (72.971)	Top-5 acc 90.234 (89.271)	lr 0.00006
Train [117][1290/3239]	Time 0.233 (0.622)	Data Time 0.001 (0.034)	Loss 2.0708 (2.1524)	Entropy 0.62641 (0.62787)	Top-1 acc 75.781 (72.968)	Top-5 acc 90.234 (89.273)	lr 0.00006
Train [117][1300/3239]	Time 0.341 (0.665)	Data Time 0.002 (0.033)	Loss 2.1069 (2.1527)	Entropy 0.62630 (0.62786)	Top-1 acc 72.266 (72.957)	Top-5 acc 89.062 (89.263)	lr 0.00006
Train [117][1310/3239]	Time 0.228 (0.663)	Data Time 0.002 (0.033)	Loss 2.1446 (2.1525)	Entropy 0.62624 (0.62785)	Top-1 acc 72.266 (72.966)	Top-5 acc 88.281 (89.266)	lr 0.00006
Train [117][1320/3239]	Time 0.258 (0.662)	Data Time 0.001 (0.033)	Loss 2.1486 (2.1525)	Entropy 0.62618 (0.62784)	Top-1 acc 73.828 (72.960)	Top-5 acc 88.672 (89.267)	lr 0.00006
Train [117][1330/3239]	Time 2.612 (0.661)	Data Time 0.001 (0.033)	Loss 2.1198 (2.1524)	Entropy 0.62618 (0.62782)	Top-1 acc 74.609 (72.965)	Top-5 acc 89.453 (89.268)	lr 0.00006
Train [117][1340/3239]	Time 0.236 (0.657)	Data Time 0.001 (0.032)	Loss 2.0636 (2.1524)	Entropy 0.62616 (0.62781)	Top-1 acc 76.562 (72.959)	Top-5 acc 92.188 (89.265)	lr 0.00006
Train [117][1350/3239]	Time 0.246 (0.656)	Data Time 0.001 (0.032)	Loss 2.1116 (2.1524)	Entropy 0.62614 (0.62780)	Top-1 acc 74.219 (72.954)	Top-5 acc 91.406 (89.268)	lr 0.00005
Train [117][1360/3239]	Time 0.242 (0.655)	Data Time 0.001 (0.032)	Loss 2.1630 (2.1525)	Entropy 0.62611 (0.62779)	Top-1 acc 72.266 (72.955)	Top-5 acc 89.062 (89.265)	lr 0.00005
Train [117][1370/3239]	Time 0.226 (0.654)	Data Time 0.001 (0.032)	Loss 2.1634 (2.1523)	Entropy 0.62603 (0.62777)	Top-1 acc 73.828 (72.958)	Top-5 acc 89.062 (89.267)	lr 0.00005
Train [117][1380/3239]	Time 0.242 (0.652)	Data Time 0.001 (0.031)	Loss 2.0597 (2.1525)	Entropy 0.62597 (0.62776)	Top-1 acc 75.000 (72.957)	Top-5 acc 90.625 (89.261)	lr 0.00005
Train [117][1390/3239]	Time 0.223 (0.651)	Data Time 0.001 (0.031)	Loss 2.2392 (2.1528)	Entropy 0.62590 (0.62775)	Top-1 acc 72.656 (72.956)	Top-5 acc 89.453 (89.250)	lr 0.00005
Train [117][1400/3239]	Time 0.226 (0.650)	Data Time 0.001 (0.031)	Loss 2.0284 (2.1529)	Entropy 0.62593 (0.62773)	Top-1 acc 75.000 (72.948)	Top-5 acc 91.406 (89.247)	lr 0.00005
Train [117][1410/3239]	Time 0.228 (0.649)	Data Time 0.001 (0.031)	Loss 2.0575 (2.1530)	Entropy 0.62593 (0.62772)	Top-1 acc 72.266 (72.945)	Top-5 acc 90.234 (89.242)	lr 0.00005
Train [117][1420/3239]	Time 0.231 (0.648)	Data Time 0.001 (0.031)	Loss 2.3439 (2.1531)	Entropy 0.62588 (0.62771)	Top-1 acc 67.188 (72.947)	Top-5 acc 86.328 (89.243)	lr 0.00005
Train [117][1430/3239]	Time 0.289 (0.647)	Data Time 0.001 (0.030)	Loss 2.1766 (2.1529)	Entropy 0.62581 (0.62770)	Top-1 acc 73.438 (72.949)	Top-5 acc 89.844 (89.246)	lr 0.00005
Train [117][1440/3239]	Time 2.675 (0.646)	Data Time 0.002 (0.030)	Loss 2.0636 (2.1525)	Entropy 0.62581 (0.62768)	Top-1 acc 76.562 (72.961)	Top-5 acc 90.234 (89.255)	lr 0.00005
Train [117][1450/3239]	Time 0.237 (0.643)	Data Time 0.001 (0.030)	Loss 2.1811 (2.1523)	Entropy 0.62589 (0.62767)	Top-1 acc 73.438 (72.970)	Top-5 acc 86.719 (89.255)	lr 0.00005
Train [117][1460/3239]	Time 0.238 (0.642)	Data Time 0.001 (0.030)	Loss 2.0499 (2.1523)	Entropy 0.62589 (0.62766)	Top-1 acc 74.609 (72.971)	Top-5 acc 89.453 (89.249)	lr 0.00005
Train [117][1470/3239]	Time 0.251 (0.641)	Data Time 0.001 (0.030)	Loss 2.0298 (2.1522)	Entropy 0.62584 (0.62765)	Top-1 acc 76.172 (72.972)	Top-5 acc 91.016 (89.250)	lr 0.00005
Train [117][1480/3239]	Time 0.220 (0.640)	Data Time 0.002 (0.029)	Loss 2.0074 (2.1520)	Entropy 0.62579 (0.62763)	Top-1 acc 75.000 (72.976)	Top-5 acc 91.016 (89.252)	lr 0.00005
Train [117][1490/3239]	Time 0.245 (0.639)	Data Time 0.001 (0.029)	Loss 2.0661 (2.1518)	Entropy 0.62578 (0.62762)	Top-1 acc 74.219 (72.979)	Top-5 acc 92.578 (89.256)	lr 0.00005
Train [117][1500/3239]	Time 0.242 (0.638)	Data Time 0.001 (0.029)	Loss 2.0213 (2.1518)	Entropy 0.62577 (0.62761)	Top-1 acc 73.047 (72.974)	Top-5 acc 91.797 (89.258)	lr 0.00005
Train [117][1510/3239]	Time 0.325 (0.637)	Data Time 0.001 (0.029)	Loss 2.0726 (2.1518)	Entropy 0.62587 (0.62760)	Top-1 acc 74.219 (72.974)	Top-5 acc 91.406 (89.261)	lr 0.00005
Train [117][1520/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.029)	Loss 2.1407 (2.1517)	Entropy 0.62587 (0.62759)	Top-1 acc 71.875 (72.972)	Top-5 acc 91.016 (89.268)	lr 0.00005
Train [117][1530/3239]	Time 0.230 (0.635)	Data Time 0.002 (0.029)	Loss 2.1807 (2.1516)	Entropy 0.62579 (0.62757)	Top-1 acc 71.875 (72.981)	Top-5 acc 90.234 (89.272)	lr 0.00005
Train [117][1540/3239]	Time 0.283 (0.634)	Data Time 0.002 (0.028)	Loss 2.2317 (2.1517)	Entropy 0.62580 (0.62756)	Top-1 acc 71.875 (72.970)	Top-5 acc 87.109 (89.275)	lr 0.00005
Train [117][1550/3239]	Time 2.597 (0.633)	Data Time 0.001 (0.028)	Loss 2.1370 (2.1518)	Entropy 0.62580 (0.62755)	Top-1 acc 74.609 (72.969)	Top-5 acc 89.062 (89.271)	lr 0.00005
Train [117][1560/3239]	Time 0.232 (0.630)	Data Time 0.001 (0.028)	Loss 2.0995 (2.1521)	Entropy 0.62579 (0.62754)	Top-1 acc 74.609 (72.964)	Top-5 acc 89.062 (89.263)	lr 0.00005
Train [117][1570/3239]	Time 0.241 (0.630)	Data Time 0.001 (0.028)	Loss 2.1003 (2.1520)	Entropy 0.62583 (0.62753)	Top-1 acc 75.000 (72.966)	Top-5 acc 91.016 (89.264)	lr 0.00005
Train [117][1580/3239]	Time 0.244 (0.629)	Data Time 0.001 (0.028)	Loss 2.0814 (2.1518)	Entropy 0.62578 (0.62752)	Top-1 acc 72.656 (72.966)	Top-5 acc 91.406 (89.273)	lr 0.00005
Train [117][1590/3239]	Time 0.260 (0.628)	Data Time 0.001 (0.028)	Loss 2.1751 (2.1516)	Entropy 0.62574 (0.62751)	Top-1 acc 73.438 (72.972)	Top-5 acc 86.719 (89.272)	lr 0.00005
Train [117][1600/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.027)	Loss 2.2725 (2.1519)	Entropy 0.62567 (0.62750)	Top-1 acc 70.312 (72.968)	Top-5 acc 85.938 (89.268)	lr 0.00005
Train [117][1610/3239]	Time 0.245 (0.626)	Data Time 0.001 (0.027)	Loss 2.1223 (2.1516)	Entropy 0.62562 (0.62748)	Top-1 acc 76.562 (72.977)	Top-5 acc 88.281 (89.272)	lr 0.00005
Train [117][1620/3239]	Time 0.225 (0.625)	Data Time 0.001 (0.027)	Loss 2.1409 (2.1517)	Entropy 0.62569 (0.62747)	Top-1 acc 74.219 (72.976)	Top-5 acc 91.016 (89.274)	lr 0.00005
Train [117][1630/3239]	Time 0.254 (0.624)	Data Time 0.001 (0.027)	Loss 2.2095 (2.1515)	Entropy 0.62567 (0.62746)	Top-1 acc 73.438 (72.983)	Top-5 acc 87.891 (89.279)	lr 0.00005
Train [117][1640/3239]	Time 0.236 (0.623)	Data Time 0.001 (0.027)	Loss 2.0750 (2.1514)	Entropy 0.62564 (0.62745)	Top-1 acc 75.000 (72.986)	Top-5 acc 90.625 (89.277)	lr 0.00005
Train [117][1650/3239]	Time 0.213 (0.623)	Data Time 0.001 (0.027)	Loss 2.1429 (2.1516)	Entropy 0.62558 (0.62744)	Top-1 acc 71.875 (72.980)	Top-5 acc 88.281 (89.273)	lr 0.00005
Train [117][1660/3239]	Time 58.880 (0.656)	Data Time 0.001 (0.026)	Loss 2.2061 (2.1515)	Entropy 0.62558 (0.62743)	Top-1 acc 73.047 (72.982)	Top-5 acc 86.719 (89.276)	lr 0.00005
Train [117][1670/3239]	Time 0.243 (0.653)	Data Time 0.002 (0.026)	Loss 2.1685 (2.1515)	Entropy 0.62559 (0.62742)	Top-1 acc 73.438 (72.986)	Top-5 acc 91.016 (89.273)	lr 0.00005
Train [117][1680/3239]	Time 0.243 (0.652)	Data Time 0.002 (0.026)	Loss 2.1248 (2.1515)	Entropy 0.62563 (0.62741)	Top-1 acc 73.047 (72.979)	Top-5 acc 89.453 (89.276)	lr 0.00005
Train [117][1690/3239]	Time 0.233 (0.651)	Data Time 0.001 (0.026)	Loss 2.0354 (2.1515)	Entropy 0.62562 (0.62740)	Top-1 acc 76.172 (72.982)	Top-5 acc 91.016 (89.275)	lr 0.00005
Train [117][1700/3239]	Time 0.241 (0.650)	Data Time 0.001 (0.026)	Loss 1.9879 (2.1512)	Entropy 0.62564 (0.62739)	Top-1 acc 75.781 (72.992)	Top-5 acc 92.969 (89.281)	lr 0.00005
Train [117][1710/3239]	Time 0.259 (0.649)	Data Time 0.006 (0.026)	Loss 2.1613 (2.1511)	Entropy 0.62563 (0.62738)	Top-1 acc 71.875 (72.995)	Top-5 acc 87.891 (89.280)	lr 0.00005
Train [117][1720/3239]	Time 0.258 (0.649)	Data Time 0.001 (0.026)	Loss 2.1779 (2.1509)	Entropy 0.62566 (0.62737)	Top-1 acc 73.047 (72.998)	Top-5 acc 87.891 (89.282)	lr 0.00005
Train [117][1730/3239]	Time 0.216 (0.648)	Data Time 0.001 (0.025)	Loss 2.2381 (2.1509)	Entropy 0.62562 (0.62736)	Top-1 acc 70.312 (73.002)	Top-5 acc 88.281 (89.283)	lr 0.00005
Train [117][1740/3239]	Time 0.256 (0.647)	Data Time 0.002 (0.025)	Loss 2.1548 (2.1511)	Entropy 0.62565 (0.62735)	Top-1 acc 71.875 (72.996)	Top-5 acc 91.797 (89.280)	lr 0.00005
Train [117][1750/3239]	Time 0.285 (0.646)	Data Time 0.002 (0.025)	Loss 2.1006 (2.1512)	Entropy 0.62554 (0.62734)	Top-1 acc 72.266 (72.993)	Top-5 acc 91.016 (89.275)	lr 0.00005
Train [117][1760/3239]	Time 0.236 (0.645)	Data Time 0.001 (0.025)	Loss 2.1568 (2.1510)	Entropy 0.62553 (0.62733)	Top-1 acc 72.266 (72.996)	Top-5 acc 86.328 (89.275)	lr 0.00005
Train [117][1770/3239]	Time 2.645 (0.644)	Data Time 0.001 (0.025)	Loss 2.0908 (2.1509)	Entropy 0.62553 (0.62732)	Top-1 acc 73.438 (73.001)	Top-5 acc 90.234 (89.277)	lr 0.00005
Train [117][1780/3239]	Time 0.250 (0.642)	Data Time 0.001 (0.025)	Loss 2.1603 (2.1510)	Entropy 0.62552 (0.62731)	Top-1 acc 73.828 (72.996)	Top-5 acc 87.500 (89.275)	lr 0.00005
Train [117][1790/3239]	Time 0.233 (0.641)	Data Time 0.001 (0.025)	Loss 2.2272 (2.1511)	Entropy 0.62548 (0.62730)	Top-1 acc 64.062 (72.991)	Top-5 acc 91.016 (89.277)	lr 0.00005
Train [117][1800/3239]	Time 0.262 (0.640)	Data Time 0.001 (0.025)	Loss 1.9212 (2.1509)	Entropy 0.62549 (0.62729)	Top-1 acc 76.562 (72.990)	Top-5 acc 92.578 (89.282)	lr 0.00005
Train [117][1810/3239]	Time 0.237 (0.639)	Data Time 0.001 (0.024)	Loss 2.1385 (2.1507)	Entropy 0.62548 (0.62728)	Top-1 acc 69.922 (72.994)	Top-5 acc 89.844 (89.285)	lr 0.00005
Train [117][1820/3239]	Time 0.273 (0.638)	Data Time 0.001 (0.024)	Loss 2.1217 (2.1507)	Entropy 0.62541 (0.62727)	Top-1 acc 76.172 (72.991)	Top-5 acc 90.234 (89.288)	lr 0.00005
Train [117][1830/3239]	Time 0.254 (0.638)	Data Time 0.001 (0.024)	Loss 2.2263 (2.1508)	Entropy 0.62552 (0.62726)	Top-1 acc 71.484 (72.986)	Top-5 acc 87.500 (89.286)	lr 0.00005
Train [117][1840/3239]	Time 0.312 (0.637)	Data Time 0.001 (0.024)	Loss 2.1338 (2.1507)	Entropy 0.62548 (0.62725)	Top-1 acc 75.391 (72.987)	Top-5 acc 88.281 (89.282)	lr 0.00005
Train [117][1850/3239]	Time 0.309 (0.636)	Data Time 0.004 (0.024)	Loss 2.1410 (2.1506)	Entropy 0.62547 (0.62724)	Top-1 acc 73.438 (72.991)	Top-5 acc 90.625 (89.283)	lr 0.00005
Train [117][1860/3239]	Time 0.291 (0.635)	Data Time 0.001 (0.024)	Loss 2.1123 (2.1506)	Entropy 0.62547 (0.62723)	Top-1 acc 73.828 (72.990)	Top-5 acc 90.625 (89.286)	lr 0.00005
Train [117][1870/3239]	Time 0.233 (0.635)	Data Time 0.001 (0.024)	Loss 2.1422 (2.1508)	Entropy 0.62536 (0.62722)	Top-1 acc 78.906 (72.987)	Top-5 acc 88.672 (89.283)	lr 0.00005
Train [117][1880/3239]	Time 2.608 (0.634)	Data Time 0.001 (0.024)	Loss 2.1850 (2.1507)	Entropy 0.62536 (0.62721)	Top-1 acc 69.531 (72.991)	Top-5 acc 87.891 (89.286)	lr 0.00005
Train [117][1890/3239]	Time 0.232 (0.632)	Data Time 0.001 (0.023)	Loss 2.2509 (2.1506)	Entropy 0.62539 (0.62720)	Top-1 acc 71.875 (72.994)	Top-5 acc 85.938 (89.288)	lr 0.00005
Train [117][1900/3239]	Time 0.235 (0.631)	Data Time 0.001 (0.023)	Loss 2.0226 (2.1502)	Entropy 0.62540 (0.62719)	Top-1 acc 74.219 (73.001)	Top-5 acc 94.141 (89.300)	lr 0.00005
Train [117][1910/3239]	Time 0.301 (0.630)	Data Time 0.001 (0.023)	Loss 2.1634 (2.1504)	Entropy 0.62540 (0.62718)	Top-1 acc 73.828 (72.995)	Top-5 acc 87.500 (89.295)	lr 0.00005
Train [117][1920/3239]	Time 0.333 (0.630)	Data Time 0.001 (0.023)	Loss 2.1641 (2.1504)	Entropy 0.62546 (0.62717)	Top-1 acc 69.531 (72.990)	Top-5 acc 89.844 (89.295)	lr 0.00005
Train [117][1930/3239]	Time 0.275 (0.629)	Data Time 0.001 (0.023)	Loss 2.1400 (2.1504)	Entropy 0.62546 (0.62716)	Top-1 acc 75.000 (72.990)	Top-5 acc 88.672 (89.294)	lr 0.00005
Train [117][1940/3239]	Time 0.235 (0.628)	Data Time 0.001 (0.023)	Loss 2.1740 (2.1506)	Entropy 0.62553 (0.62715)	Top-1 acc 71.875 (72.983)	Top-5 acc 89.453 (89.291)	lr 0.00005
Train [117][1950/3239]	Time 0.233 (0.627)	Data Time 0.001 (0.023)	Loss 2.2070 (2.1506)	Entropy 0.62538 (0.62714)	Top-1 acc 73.828 (72.981)	Top-5 acc 89.453 (89.295)	lr 0.00005
Train [117][1960/3239]	Time 0.336 (0.626)	Data Time 0.001 (0.023)	Loss 2.0059 (2.1508)	Entropy 0.62536 (0.62714)	Top-1 acc 77.344 (72.978)	Top-5 acc 90.625 (89.291)	lr 0.00005
Train [117][1970/3239]	Time 0.271 (0.626)	Data Time 0.001 (0.023)	Loss 2.3159 (2.1508)	Entropy 0.62532 (0.62713)	Top-1 acc 70.312 (72.979)	Top-5 acc 84.766 (89.287)	lr 0.00005
Train [117][1980/3239]	Time 0.312 (0.625)	Data Time 0.001 (0.022)	Loss 2.1119 (2.1508)	Entropy 0.62529 (0.62712)	Top-1 acc 72.266 (72.975)	Top-5 acc 90.625 (89.291)	lr 0.00005
Train [117][1990/3239]	Time 2.584 (0.624)	Data Time 0.001 (0.022)	Loss 2.1899 (2.1508)	Entropy 0.62529 (0.62711)	Top-1 acc 69.141 (72.972)	Top-5 acc 91.016 (89.290)	lr 0.00005
Train [117][2000/3239]	Time 0.237 (0.623)	Data Time 0.001 (0.022)	Loss 2.1369 (2.1506)	Entropy 0.62530 (0.62710)	Top-1 acc 71.094 (72.978)	Top-5 acc 90.234 (89.293)	lr 0.00005
Train [117][2010/3239]	Time 0.235 (0.622)	Data Time 0.001 (0.022)	Loss 1.9847 (2.1504)	Entropy 0.62536 (0.62709)	Top-1 acc 76.953 (72.984)	Top-5 acc 92.969 (89.298)	lr 0.00005
Train [117][2020/3239]	Time 0.241 (0.621)	Data Time 0.001 (0.022)	Loss 2.2211 (2.1505)	Entropy 0.62535 (0.62708)	Top-1 acc 74.609 (72.981)	Top-5 acc 87.109 (89.298)	lr 0.00005
Train [117][2030/3239]	Time 0.241 (0.647)	Data Time 0.002 (0.022)	Loss 2.3012 (2.1505)	Entropy 0.62540 (0.62707)	Top-1 acc 69.531 (72.978)	Top-5 acc 85.938 (89.298)	lr 0.00005
Train [117][2040/3239]	Time 0.259 (0.647)	Data Time 0.003 (0.022)	Loss 2.2995 (2.1503)	Entropy 0.62538 (0.62706)	Top-1 acc 70.312 (72.984)	Top-5 acc 87.500 (89.302)	lr 0.00005
Train [117][2050/3239]	Time 0.304 (0.646)	Data Time 0.002 (0.022)	Loss 2.0974 (2.1503)	Entropy 0.62526 (0.62706)	Top-1 acc 76.172 (72.986)	Top-5 acc 90.625 (89.303)	lr 0.00005
Train [117][2060/3239]	Time 0.260 (0.645)	Data Time 0.001 (0.022)	Loss 2.2181 (2.1504)	Entropy 0.62525 (0.62705)	Top-1 acc 70.703 (72.984)	Top-5 acc 89.062 (89.304)	lr 0.00005
Train [117][2070/3239]	Time 0.305 (0.645)	Data Time 0.004 (0.022)	Loss 2.0597 (2.1503)	Entropy 0.62525 (0.62704)	Top-1 acc 74.609 (72.989)	Top-5 acc 91.016 (89.306)	lr 0.00005
Train [117][2080/3239]	Time 0.326 (0.644)	Data Time 0.001 (0.021)	Loss 2.1499 (2.1503)	Entropy 0.62527 (0.62703)	Top-1 acc 71.484 (72.990)	Top-5 acc 87.500 (89.306)	lr 0.00005
Train [117][2090/3239]	Time 0.278 (0.644)	Data Time 0.002 (0.021)	Loss 2.1079 (2.1503)	Entropy 0.62532 (0.62702)	Top-1 acc 77.344 (72.991)	Top-5 acc 91.016 (89.307)	lr 0.00005
Train [117][2100/3239]	Time 2.505 (0.643)	Data Time 0.001 (0.021)	Loss 2.3369 (2.1503)	Entropy 0.62532 (0.62701)	Top-1 acc 68.750 (72.988)	Top-5 acc 84.766 (89.307)	lr 0.00005
Train [117][2110/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.021)	Loss 2.1303 (2.1504)	Entropy 0.62531 (0.62701)	Top-1 acc 75.781 (72.987)	Top-5 acc 89.062 (89.307)	lr 0.00005
Train [117][2120/3239]	Time 0.385 (0.640)	Data Time 0.033 (0.021)	Loss 2.0699 (2.1504)	Entropy 0.62528 (0.62700)	Top-1 acc 76.172 (72.989)	Top-5 acc 91.016 (89.307)	lr 0.00005
Train [117][2130/3239]	Time 0.367 (0.640)	Data Time 0.003 (0.021)	Loss 2.0384 (2.1506)	Entropy 0.62533 (0.62699)	Top-1 acc 77.734 (72.983)	Top-5 acc 90.234 (89.300)	lr 0.00005
Train [117][2140/3239]	Time 0.285 (0.639)	Data Time 0.001 (0.021)	Loss 2.2261 (2.1508)	Entropy 0.62529 (0.62698)	Top-1 acc 68.750 (72.977)	Top-5 acc 87.109 (89.299)	lr 0.00005
Train [117][2150/3239]	Time 0.236 (0.638)	Data Time 0.001 (0.021)	Loss 2.2118 (2.1508)	Entropy 0.62534 (0.62697)	Top-1 acc 73.828 (72.978)	Top-5 acc 87.109 (89.296)	lr 0.00005
Train [117][2160/3239]	Time 0.270 (0.638)	Data Time 0.002 (0.021)	Loss 1.9112 (2.1506)	Entropy 0.62531 (0.62697)	Top-1 acc 78.906 (72.981)	Top-5 acc 92.578 (89.301)	lr 0.00005
Train [117][2170/3239]	Time 0.400 (0.637)	Data Time 0.002 (0.021)	Loss 2.1059 (2.1504)	Entropy 0.62529 (0.62696)	Top-1 acc 73.438 (72.985)	Top-5 acc 89.844 (89.305)	lr 0.00005
Train [117][2180/3239]	Time 0.280 (0.636)	Data Time 0.001 (0.021)	Loss 2.3610 (2.1507)	Entropy 0.62528 (0.62695)	Top-1 acc 68.750 (72.980)	Top-5 acc 83.203 (89.301)	lr 0.00005
Train [117][2190/3239]	Time 0.312 (0.636)	Data Time 0.002 (0.021)	Loss 2.1113 (2.1507)	Entropy 0.62520 (0.62694)	Top-1 acc 75.391 (72.979)	Top-5 acc 89.453 (89.302)	lr 0.00005
Train [117][2200/3239]	Time 0.233 (0.635)	Data Time 0.002 (0.020)	Loss 2.0824 (2.1505)	Entropy 0.62517 (0.62694)	Top-1 acc 75.781 (72.985)	Top-5 acc 89.844 (89.304)	lr 0.00005
Train [117][2210/3239]	Time 2.849 (0.635)	Data Time 0.001 (0.020)	Loss 2.1579 (2.1507)	Entropy 0.62517 (0.62693)	Top-1 acc 69.531 (72.979)	Top-5 acc 87.891 (89.298)	lr 0.00005
Train [117][2220/3239]	Time 0.259 (0.633)	Data Time 0.002 (0.020)	Loss 2.0169 (2.1505)	Entropy 0.62514 (0.62692)	Top-1 acc 77.344 (72.982)	Top-5 acc 92.578 (89.300)	lr 0.00005
Train [117][2230/3239]	Time 0.242 (0.632)	Data Time 0.001 (0.020)	Loss 2.2663 (2.1503)	Entropy 0.62524 (0.62691)	Top-1 acc 69.141 (72.985)	Top-5 acc 87.891 (89.303)	lr 0.00005
Train [117][2240/3239]	Time 0.247 (0.632)	Data Time 0.001 (0.020)	Loss 2.1854 (2.1503)	Entropy 0.62519 (0.62690)	Top-1 acc 69.531 (72.987)	Top-5 acc 91.016 (89.305)	lr 0.00005
Train [117][2250/3239]	Time 0.304 (0.631)	Data Time 0.001 (0.020)	Loss 2.1560 (2.1501)	Entropy 0.62515 (0.62690)	Top-1 acc 72.656 (72.986)	Top-5 acc 89.453 (89.308)	lr 0.00005
Train [117][2260/3239]	Time 0.250 (0.630)	Data Time 0.001 (0.020)	Loss 2.0741 (2.1501)	Entropy 0.62500 (0.62689)	Top-1 acc 75.000 (72.987)	Top-5 acc 90.625 (89.307)	lr 0.00005
Train [117][2270/3239]	Time 0.249 (0.630)	Data Time 0.001 (0.020)	Loss 2.1233 (2.1499)	Entropy 0.62494 (0.62688)	Top-1 acc 73.438 (72.990)	Top-5 acc 89.062 (89.309)	lr 0.00005
Train [117][2280/3239]	Time 0.250 (0.629)	Data Time 0.001 (0.020)	Loss 2.1546 (2.1500)	Entropy 0.62502 (0.62687)	Top-1 acc 75.391 (72.990)	Top-5 acc 87.891 (89.308)	lr 0.00005
Train [117][2290/3239]	Time 0.264 (0.629)	Data Time 0.001 (0.020)	Loss 1.9940 (2.1500)	Entropy 0.62501 (0.62686)	Top-1 acc 76.562 (72.991)	Top-5 acc 92.578 (89.307)	lr 0.00005
Train [117][2300/3239]	Time 0.231 (0.628)	Data Time 0.001 (0.020)	Loss 2.1173 (2.1500)	Entropy 0.62501 (0.62686)	Top-1 acc 72.656 (72.989)	Top-5 acc 89.453 (89.307)	lr 0.00005
Train [117][2310/3239]	Time 0.211 (0.628)	Data Time 0.001 (0.020)	Loss 2.2141 (2.1502)	Entropy 0.62501 (0.62685)	Top-1 acc 72.656 (72.985)	Top-5 acc 89.844 (89.306)	lr 0.00005
Train [117][2320/3239]	Time 2.576 (0.627)	Data Time 0.001 (0.019)	Loss 2.2983 (2.1503)	Entropy 0.62501 (0.62684)	Top-1 acc 71.094 (72.986)	Top-5 acc 87.109 (89.303)	lr 0.00005
Train [117][2330/3239]	Time 0.250 (0.626)	Data Time 0.002 (0.019)	Loss 2.1489 (2.1505)	Entropy 0.62496 (0.62683)	Top-1 acc 72.656 (72.980)	Top-5 acc 89.062 (89.299)	lr 0.00005
Train [117][2340/3239]	Time 0.260 (0.625)	Data Time 0.001 (0.019)	Loss 2.1619 (2.1502)	Entropy 0.62487 (0.62682)	Top-1 acc 71.875 (72.982)	Top-5 acc 89.844 (89.304)	lr 0.00005
Train [117][2350/3239]	Time 0.263 (0.624)	Data Time 0.001 (0.019)	Loss 2.2061 (2.1502)	Entropy 0.62484 (0.62681)	Top-1 acc 68.750 (72.982)	Top-5 acc 87.500 (89.306)	lr 0.00005
Train [117][2360/3239]	Time 0.313 (0.624)	Data Time 0.001 (0.019)	Loss 2.2082 (2.1502)	Entropy 0.62484 (0.62681)	Top-1 acc 70.703 (72.982)	Top-5 acc 86.719 (89.304)	lr 0.00005
Train [117][2370/3239]	Time 0.286 (0.623)	Data Time 0.001 (0.019)	Loss 2.1654 (2.1501)	Entropy 0.62474 (0.62680)	Top-1 acc 71.094 (72.985)	Top-5 acc 86.719 (89.303)	lr 0.00005
Train [117][2380/3239]	Time 0.232 (0.623)	Data Time 0.001 (0.019)	Loss 2.1755 (2.1501)	Entropy 0.62468 (0.62679)	Top-1 acc 71.875 (72.984)	Top-5 acc 90.234 (89.306)	lr 0.00005
Train [117][2390/3239]	Time 0.235 (0.645)	Data Time 0.002 (0.019)	Loss 2.1112 (2.1498)	Entropy 0.62469 (0.62678)	Top-1 acc 73.438 (72.987)	Top-5 acc 89.844 (89.310)	lr 0.00005
Train [117][2400/3239]	Time 0.248 (0.644)	Data Time 0.002 (0.019)	Loss 2.2721 (2.1498)	Entropy 0.62472 (0.62677)	Top-1 acc 72.656 (72.988)	Top-5 acc 85.938 (89.311)	lr 0.00005
Train [117][2410/3239]	Time 0.253 (0.644)	Data Time 0.002 (0.019)	Loss 2.1451 (2.1497)	Entropy 0.62467 (0.62676)	Top-1 acc 71.094 (72.988)	Top-5 acc 89.453 (89.312)	lr 0.00005
Train [117][2420/3239]	Time 0.332 (0.643)	Data Time 0.001 (0.019)	Loss 2.0082 (2.1498)	Entropy 0.62453 (0.62675)	Top-1 acc 78.125 (72.988)	Top-5 acc 92.188 (89.309)	lr 0.00005
Train [117][2430/3239]	Time 2.630 (0.642)	Data Time 0.001 (0.019)	Loss 2.1655 (2.1498)	Entropy 0.62453 (0.62675)	Top-1 acc 72.266 (72.987)	Top-5 acc 88.672 (89.308)	lr 0.00005
Train [117][2440/3239]	Time 0.252 (0.641)	Data Time 0.001 (0.019)	Loss 2.0541 (2.1498)	Entropy 0.62463 (0.62674)	Top-1 acc 73.047 (72.988)	Top-5 acc 91.406 (89.308)	lr 0.00005
Train [117][2450/3239]	Time 0.251 (0.640)	Data Time 0.001 (0.019)	Loss 2.2536 (2.1498)	Entropy 0.62455 (0.62673)	Top-1 acc 72.266 (72.989)	Top-5 acc 86.719 (89.309)	lr 0.00005
Train [117][2460/3239]	Time 0.406 (0.640)	Data Time 0.001 (0.018)	Loss 2.3419 (2.1497)	Entropy 0.62453 (0.62672)	Top-1 acc 65.625 (72.989)	Top-5 acc 85.156 (89.309)	lr 0.00004
Train [117][2470/3239]	Time 0.242 (0.639)	Data Time 0.002 (0.018)	Loss 2.1761 (2.1498)	Entropy 0.62463 (0.62671)	Top-1 acc 71.484 (72.990)	Top-5 acc 88.281 (89.308)	lr 0.00004
Train [117][2480/3239]	Time 0.257 (0.639)	Data Time 0.001 (0.018)	Loss 2.1407 (2.1498)	Entropy 0.62474 (0.62670)	Top-1 acc 73.828 (72.990)	Top-5 acc 89.062 (89.310)	lr 0.00004
Train [117][2490/3239]	Time 0.260 (0.638)	Data Time 0.001 (0.018)	Loss 2.1993 (2.1499)	Entropy 0.62482 (0.62669)	Top-1 acc 71.484 (72.990)	Top-5 acc 89.453 (89.307)	lr 0.00004
Train [117][2500/3239]	Time 0.293 (0.638)	Data Time 0.001 (0.018)	Loss 2.1408 (2.1500)	Entropy 0.62482 (0.62669)	Top-1 acc 73.047 (72.991)	Top-5 acc 88.672 (89.306)	lr 0.00004
Train [117][2510/3239]	Time 0.249 (0.637)	Data Time 0.002 (0.018)	Loss 2.1478 (2.1500)	Entropy 0.62474 (0.62668)	Top-1 acc 73.828 (72.992)	Top-5 acc 89.062 (89.306)	lr 0.00004
Train [117][2520/3239]	Time 0.225 (0.637)	Data Time 0.001 (0.018)	Loss 2.1688 (2.1500)	Entropy 0.62469 (0.62667)	Top-1 acc 70.312 (72.986)	Top-5 acc 88.672 (89.304)	lr 0.00004
Train [117][2530/3239]	Time 0.232 (0.636)	Data Time 0.001 (0.018)	Loss 2.0816 (2.1499)	Entropy 0.62461 (0.62666)	Top-1 acc 73.828 (72.991)	Top-5 acc 91.406 (89.305)	lr 0.00004
Train [117][2540/3239]	Time 2.482 (0.635)	Data Time 0.001 (0.018)	Loss 2.0128 (2.1497)	Entropy 0.62461 (0.62666)	Top-1 acc 76.953 (72.997)	Top-5 acc 91.406 (89.308)	lr 0.00004
Train [117][2550/3239]	Time 0.237 (0.634)	Data Time 0.001 (0.018)	Loss 2.1388 (2.1498)	Entropy 0.62461 (0.62665)	Top-1 acc 73.047 (72.994)	Top-5 acc 91.797 (89.306)	lr 0.00004
Train [117][2560/3239]	Time 0.234 (0.634)	Data Time 0.001 (0.018)	Loss 2.1683 (2.1496)	Entropy 0.62457 (0.62664)	Top-1 acc 72.266 (72.999)	Top-5 acc 91.406 (89.312)	lr 0.00004
Train [117][2570/3239]	Time 0.241 (0.633)	Data Time 0.001 (0.018)	Loss 2.2205 (2.1496)	Entropy 0.62453 (0.62663)	Top-1 acc 71.094 (72.999)	Top-5 acc 86.328 (89.313)	lr 0.00004
Train [117][2580/3239]	Time 0.253 (0.632)	Data Time 0.001 (0.018)	Loss 2.1762 (2.1497)	Entropy 0.62450 (0.62662)	Top-1 acc 69.141 (72.996)	Top-5 acc 89.844 (89.312)	lr 0.00004
Train [117][2590/3239]	Time 0.258 (0.632)	Data Time 0.001 (0.018)	Loss 2.1698 (2.1498)	Entropy 0.62451 (0.62661)	Top-1 acc 71.484 (72.993)	Top-5 acc 88.281 (89.307)	lr 0.00004
Train [117][2600/3239]	Time 0.246 (0.631)	Data Time 0.002 (0.018)	Loss 2.1535 (2.1499)	Entropy 0.62435 (0.62661)	Top-1 acc 73.828 (72.992)	Top-5 acc 89.062 (89.308)	lr 0.00004
Train [117][2610/3239]	Time 0.271 (0.631)	Data Time 0.001 (0.018)	Loss 2.2219 (2.1498)	Entropy 0.62430 (0.62660)	Top-1 acc 70.312 (72.995)	Top-5 acc 88.672 (89.311)	lr 0.00004
Train [117][2620/3239]	Time 0.242 (0.630)	Data Time 0.001 (0.017)	Loss 2.0669 (2.1498)	Entropy 0.62433 (0.62659)	Top-1 acc 75.391 (72.998)	Top-5 acc 92.188 (89.313)	lr 0.00004
Train [117][2630/3239]	Time 0.255 (0.630)	Data Time 0.002 (0.017)	Loss 2.0003 (2.1497)	Entropy 0.62428 (0.62658)	Top-1 acc 77.734 (73.001)	Top-5 acc 93.750 (89.318)	lr 0.00004
Train [117][2640/3239]	Time 0.231 (0.629)	Data Time 0.001 (0.017)	Loss 1.9980 (2.1496)	Entropy 0.62421 (0.62657)	Top-1 acc 76.172 (72.999)	Top-5 acc 92.188 (89.319)	lr 0.00004
Train [117][2650/3239]	Time 0.297 (0.629)	Data Time 0.001 (0.017)	Loss 2.0642 (2.1496)	Entropy 0.62415 (0.62656)	Top-1 acc 72.266 (72.999)	Top-5 acc 89.844 (89.320)	lr 0.00004
Train [117][2660/3239]	Time 0.317 (0.628)	Data Time 0.001 (0.017)	Loss 2.1172 (2.1494)	Entropy 0.62416 (0.62655)	Top-1 acc 74.609 (73.001)	Top-5 acc 87.891 (89.324)	lr 0.00004
Train [117][2670/3239]	Time 0.247 (0.628)	Data Time 0.001 (0.017)	Loss 2.0342 (2.1494)	Entropy 0.62413 (0.62654)	Top-1 acc 75.000 (73.000)	Top-5 acc 91.016 (89.325)	lr 0.00004
Train [117][2680/3239]	Time 0.274 (0.627)	Data Time 0.001 (0.017)	Loss 2.1463 (2.1494)	Entropy 0.62407 (0.62654)	Top-1 acc 73.438 (72.998)	Top-5 acc 90.234 (89.326)	lr 0.00004
Train [117][2690/3239]	Time 0.229 (0.627)	Data Time 0.001 (0.017)	Loss 2.0459 (2.1494)	Entropy 0.62401 (0.62653)	Top-1 acc 75.000 (72.999)	Top-5 acc 90.234 (89.326)	lr 0.00004
Train [117][2700/3239]	Time 0.230 (0.627)	Data Time 0.001 (0.017)	Loss 2.1319 (2.1495)	Entropy 0.62395 (0.62652)	Top-1 acc 71.875 (72.994)	Top-5 acc 90.234 (89.323)	lr 0.00004
Train [117][2710/3239]	Time 0.335 (0.626)	Data Time 0.001 (0.017)	Loss 2.0847 (2.1494)	Entropy 0.62386 (0.62651)	Top-1 acc 74.609 (72.994)	Top-5 acc 90.234 (89.322)	lr 0.00004
Train [117][2720/3239]	Time 0.273 (0.626)	Data Time 0.001 (0.017)	Loss 2.1446 (2.1495)	Entropy 0.62383 (0.62650)	Top-1 acc 73.828 (72.989)	Top-5 acc 90.625 (89.320)	lr 0.00004
Train [117][2730/3239]	Time 0.294 (0.625)	Data Time 0.001 (0.017)	Loss 2.1448 (2.1496)	Entropy 0.62383 (0.62649)	Top-1 acc 71.094 (72.986)	Top-5 acc 91.016 (89.317)	lr 0.00004
Train [117][2740/3239]	Time 0.307 (0.645)	Data Time 0.004 (0.017)	Loss 2.1017 (2.1498)	Entropy 0.62379 (0.62648)	Top-1 acc 76.562 (72.983)	Top-5 acc 90.625 (89.316)	lr 0.00004
Train [117][2750/3239]	Time 0.363 (0.644)	Data Time 0.002 (0.017)	Loss 1.9720 (2.1497)	Entropy 0.62356 (0.62647)	Top-1 acc 78.516 (72.982)	Top-5 acc 91.797 (89.317)	lr 0.00004
Train [117][2760/3239]	Time 0.309 (0.644)	Data Time 0.002 (0.017)	Loss 2.1280 (2.1498)	Entropy 0.62353 (0.62646)	Top-1 acc 74.219 (72.981)	Top-5 acc 91.406 (89.315)	lr 0.00004
Train [117][2770/3239]	Time 0.236 (0.643)	Data Time 0.001 (0.017)	Loss 2.2447 (2.1498)	Entropy 0.62350 (0.62645)	Top-1 acc 68.750 (72.981)	Top-5 acc 85.938 (89.315)	lr 0.00004
Train [117][2780/3239]	Time 0.237 (0.643)	Data Time 0.002 (0.017)	Loss 2.1828 (2.1498)	Entropy 0.62349 (0.62644)	Top-1 acc 72.266 (72.984)	Top-5 acc 89.453 (89.315)	lr 0.00004
Train [117][2790/3239]	Time 0.442 (0.642)	Data Time 0.001 (0.017)	Loss 2.1004 (2.1498)	Entropy 0.62347 (0.62643)	Top-1 acc 71.875 (72.981)	Top-5 acc 91.016 (89.316)	lr 0.00004
Train [117][2800/3239]	Time 0.272 (0.642)	Data Time 0.001 (0.016)	Loss 2.3024 (2.1498)	Entropy 0.62335 (0.62642)	Top-1 acc 69.531 (72.982)	Top-5 acc 87.109 (89.315)	lr 0.00004
Train [117][2810/3239]	Time 0.240 (0.641)	Data Time 0.001 (0.016)	Loss 2.2771 (2.1500)	Entropy 0.62336 (0.62640)	Top-1 acc 67.188 (72.976)	Top-5 acc 86.719 (89.311)	lr 0.00004
Train [117][2820/3239]	Time 0.258 (0.641)	Data Time 0.001 (0.016)	Loss 2.1062 (2.1498)	Entropy 0.62335 (0.62639)	Top-1 acc 73.828 (72.982)	Top-5 acc 88.672 (89.314)	lr 0.00004
Train [117][2830/3239]	Time 0.350 (0.640)	Data Time 0.001 (0.016)	Loss 2.1483 (2.1499)	Entropy 0.62339 (0.62638)	Top-1 acc 70.703 (72.978)	Top-5 acc 90.625 (89.313)	lr 0.00004
Train [117][2840/3239]	Time 0.243 (0.640)	Data Time 0.001 (0.016)	Loss 2.3335 (2.1500)	Entropy 0.62335 (0.62637)	Top-1 acc 66.016 (72.977)	Top-5 acc 85.156 (89.312)	lr 0.00004
Train [117][2850/3239]	Time 0.237 (0.639)	Data Time 0.001 (0.016)	Loss 2.1556 (2.1501)	Entropy 0.62332 (0.62636)	Top-1 acc 70.703 (72.971)	Top-5 acc 87.891 (89.308)	lr 0.00004
Train [117][2860/3239]	Time 0.238 (0.639)	Data Time 0.001 (0.016)	Loss 2.1264 (2.1502)	Entropy 0.62325 (0.62635)	Top-1 acc 75.391 (72.974)	Top-5 acc 88.672 (89.308)	lr 0.00004
Train [117][2870/3239]	Time 0.359 (0.638)	Data Time 0.001 (0.016)	Loss 2.0261 (2.1501)	Entropy 0.62322 (0.62634)	Top-1 acc 73.047 (72.973)	Top-5 acc 91.797 (89.310)	lr 0.00004
Train [117][2880/3239]	Time 0.258 (0.638)	Data Time 0.001 (0.016)	Loss 2.1719 (2.1503)	Entropy 0.62322 (0.62633)	Top-1 acc 73.047 (72.969)	Top-5 acc 87.500 (89.307)	lr 0.00004
Train [117][2890/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.016)	Loss 2.2373 (2.1503)	Entropy 0.62320 (0.62632)	Top-1 acc 70.703 (72.966)	Top-5 acc 87.891 (89.308)	lr 0.00004
Train [117][2900/3239]	Time 0.285 (0.637)	Data Time 0.002 (0.016)	Loss 2.2006 (2.1503)	Entropy 0.62316 (0.62631)	Top-1 acc 73.828 (72.969)	Top-5 acc 87.500 (89.308)	lr 0.00004
Train [117][2910/3239]	Time 0.336 (0.636)	Data Time 0.001 (0.016)	Loss 2.1172 (2.1502)	Entropy 0.62317 (0.62630)	Top-1 acc 73.828 (72.972)	Top-5 acc 90.234 (89.310)	lr 0.00004
Train [117][2920/3239]	Time 0.235 (0.636)	Data Time 0.001 (0.016)	Loss 2.1842 (2.1503)	Entropy 0.62311 (0.62629)	Top-1 acc 70.703 (72.967)	Top-5 acc 90.625 (89.309)	lr 0.00004
Train [117][2930/3239]	Time 0.265 (0.635)	Data Time 0.001 (0.016)	Loss 2.0995 (2.1503)	Entropy 0.62311 (0.62628)	Top-1 acc 75.391 (72.965)	Top-5 acc 88.672 (89.308)	lr 0.00004
Train [117][2940/3239]	Time 0.293 (0.635)	Data Time 0.001 (0.016)	Loss 2.1769 (2.1502)	Entropy 0.62304 (0.62627)	Top-1 acc 73.828 (72.967)	Top-5 acc 87.109 (89.312)	lr 0.00004
Train [117][2950/3239]	Time 0.344 (0.634)	Data Time 0.001 (0.016)	Loss 2.2106 (2.1501)	Entropy 0.62301 (0.62625)	Top-1 acc 70.703 (72.965)	Top-5 acc 88.281 (89.314)	lr 0.00004
Train [117][2960/3239]	Time 0.281 (0.634)	Data Time 0.001 (0.016)	Loss 2.3165 (2.1502)	Entropy 0.62301 (0.62624)	Top-1 acc 71.875 (72.967)	Top-5 acc 85.547 (89.314)	lr 0.00004
Train [117][2970/3239]	Time 0.328 (0.634)	Data Time 0.001 (0.016)	Loss 2.1493 (2.1501)	Entropy 0.62298 (0.62623)	Top-1 acc 72.656 (72.973)	Top-5 acc 88.672 (89.315)	lr 0.00004
Train [117][2980/3239]	Time 0.275 (0.633)	Data Time 0.001 (0.016)	Loss 2.2061 (2.1503)	Entropy 0.62294 (0.62622)	Top-1 acc 72.656 (72.969)	Top-5 acc 89.062 (89.314)	lr 0.00004
Train [117][2990/3239]	Time 0.239 (0.633)	Data Time 0.002 (0.016)	Loss 2.1088 (2.1504)	Entropy 0.62282 (0.62621)	Top-1 acc 77.344 (72.968)	Top-5 acc 89.844 (89.311)	lr 0.00004
Train [117][3000/3239]	Time 0.241 (0.632)	Data Time 0.002 (0.016)	Loss 2.1797 (2.1503)	Entropy 0.62280 (0.62620)	Top-1 acc 75.000 (72.970)	Top-5 acc 87.891 (89.313)	lr 0.00004
Train [117][3010/3239]	Time 0.273 (0.632)	Data Time 0.001 (0.015)	Loss 2.0542 (2.1502)	Entropy 0.62276 (0.62619)	Top-1 acc 73.047 (72.970)	Top-5 acc 91.406 (89.316)	lr 0.00004
Train [117][3020/3239]	Time 0.305 (0.631)	Data Time 0.001 (0.015)	Loss 2.0826 (2.1500)	Entropy 0.62267 (0.62618)	Top-1 acc 71.875 (72.976)	Top-5 acc 90.234 (89.321)	lr 0.00004
Train [117][3030/3239]	Time 0.309 (0.631)	Data Time 0.001 (0.015)	Loss 2.2904 (2.1501)	Entropy 0.62267 (0.62616)	Top-1 acc 66.797 (72.972)	Top-5 acc 86.328 (89.318)	lr 0.00004
Train [117][3040/3239]	Time 0.293 (0.630)	Data Time 0.001 (0.015)	Loss 2.2654 (2.1503)	Entropy 0.62265 (0.62615)	Top-1 acc 70.312 (72.969)	Top-5 acc 87.891 (89.315)	lr 0.00004
Train [117][3050/3239]	Time 0.317 (0.630)	Data Time 0.002 (0.015)	Loss 2.0186 (2.1503)	Entropy 0.62264 (0.62614)	Top-1 acc 75.781 (72.969)	Top-5 acc 91.406 (89.316)	lr 0.00004
Train [117][3060/3239]	Time 0.240 (0.630)	Data Time 0.001 (0.015)	Loss 2.2426 (2.1503)	Entropy 0.62269 (0.62613)	Top-1 acc 70.312 (72.968)	Top-5 acc 88.281 (89.317)	lr 0.00004
Train [117][3070/3239]	Time 0.399 (0.647)	Data Time 0.003 (0.015)	Loss 2.1959 (2.1502)	Entropy 0.62272 (0.62612)	Top-1 acc 75.781 (72.976)	Top-5 acc 86.328 (89.318)	lr 0.00004
Train [117][3080/3239]	Time 0.272 (0.646)	Data Time 0.002 (0.015)	Loss 2.0430 (2.1502)	Entropy 0.62258 (0.62611)	Top-1 acc 76.953 (72.974)	Top-5 acc 92.188 (89.318)	lr 0.00004
Train [117][3090/3239]	Time 0.285 (0.646)	Data Time 0.002 (0.015)	Loss 2.2447 (2.1503)	Entropy 0.62255 (0.62610)	Top-1 acc 71.875 (72.973)	Top-5 acc 86.328 (89.316)	lr 0.00004
Train [117][3100/3239]	Time 0.278 (0.645)	Data Time 0.002 (0.015)	Loss 2.0834 (2.1502)	Entropy 0.62253 (0.62609)	Top-1 acc 73.828 (72.975)	Top-5 acc 89.844 (89.317)	lr 0.00004
Train [117][3110/3239]	Time 0.252 (0.645)	Data Time 0.001 (0.015)	Loss 2.0639 (2.1501)	Entropy 0.62242 (0.62607)	Top-1 acc 76.562 (72.978)	Top-5 acc 89.844 (89.322)	lr 0.00004
Train [117][3120/3239]	Time 0.242 (0.644)	Data Time 0.001 (0.015)	Loss 1.9981 (2.1501)	Entropy 0.62247 (0.62606)	Top-1 acc 79.297 (72.980)	Top-5 acc 92.578 (89.322)	lr 0.00004
Train [117][3130/3239]	Time 0.261 (0.644)	Data Time 0.001 (0.015)	Loss 2.0449 (2.1501)	Entropy 0.62243 (0.62605)	Top-1 acc 76.172 (72.978)	Top-5 acc 91.797 (89.322)	lr 0.00004
Train [117][3140/3239]	Time 0.245 (0.643)	Data Time 0.001 (0.015)	Loss 2.1124 (2.1501)	Entropy 0.62243 (0.62604)	Top-1 acc 74.219 (72.975)	Top-5 acc 90.234 (89.321)	lr 0.00004
Train [117][3150/3239]	Time 0.241 (0.643)	Data Time 0.001 (0.015)	Loss 2.2578 (2.1500)	Entropy 0.62246 (0.62603)	Top-1 acc 69.922 (72.977)	Top-5 acc 86.719 (89.321)	lr 0.00004
Train [117][3160/3239]	Time 0.237 (0.642)	Data Time 0.001 (0.015)	Loss 2.1753 (2.1501)	Entropy 0.62242 (0.62602)	Top-1 acc 73.047 (72.973)	Top-5 acc 89.062 (89.321)	lr 0.00004
Train [117][3170/3239]	Time 0.235 (0.642)	Data Time 0.001 (0.015)	Loss 2.1478 (2.1501)	Entropy 0.62239 (0.62601)	Top-1 acc 70.312 (72.970)	Top-5 acc 89.844 (89.321)	lr 0.00004
Train [117][3180/3239]	Time 0.229 (0.642)	Data Time 0.000 (0.015)	Loss 2.2420 (2.1501)	Entropy 0.62223 (0.62599)	Top-1 acc 69.531 (72.970)	Top-5 acc 86.719 (89.321)	lr 0.00004
Train [117][3190/3239]	Time 0.246 (0.641)	Data Time 0.000 (0.015)	Loss 2.3484 (2.1501)	Entropy 0.62220 (0.62598)	Top-1 acc 69.141 (72.972)	Top-5 acc 87.109 (89.321)	lr 0.00004
Train [117][3200/3239]	Time 0.227 (0.640)	Data Time 0.000 (0.015)	Loss 2.0856 (2.1502)	Entropy 0.62220 (0.62597)	Top-1 acc 76.172 (72.970)	Top-5 acc 91.797 (89.321)	lr 0.00004
Train [117][3210/3239]	Time 0.239 (0.640)	Data Time 0.000 (0.015)	Loss 2.1293 (2.1502)	Entropy 0.62214 (0.62596)	Top-1 acc 71.875 (72.968)	Top-5 acc 88.672 (89.318)	lr 0.00004
Train [117][3220/3239]	Time 0.227 (0.639)	Data Time 0.000 (0.015)	Loss 2.2292 (2.1503)	Entropy 0.62213 (0.62595)	Top-1 acc 70.703 (72.965)	Top-5 acc 89.062 (89.317)	lr 0.00004
Train [117][3230/3239]	Time 0.237 (0.639)	Data Time 0.000 (0.015)	Loss 2.1825 (2.1505)	Entropy 0.62197 (0.62593)	Top-1 acc 71.094 (72.962)	Top-5 acc 87.891 (89.314)	lr 0.00004
Train [117][3239/3239]	Time 2.489 (0.638)	Data Time 0.000 (0.014)	Loss 2.2106 (2.1504)	Entropy 0.62197 (0.62592)	Top-1 acc 69.136 (72.964)	Top-5 acc 88.889 (89.316)	lr 0.00004
==========Valid [117/120]	loss 1.196	top-1 acc 72.596 (72.745)	top-5 acc 90.027	Train top-1 72.964	top-5 89.316	Entropy 0.62197	Latency-None: 0.000ms	Flops: 544.27M
Train [118][0/3239]	Time 42.491 (42.491)	Data Time 40.956 (40.956)	Loss 2.0560 (2.0560)	Entropy 0.62191 (0.62191)	Top-1 acc 73.438 (73.438)	Top-5 acc 91.797 (91.797)	lr 0.00004
Train [118][10/3239]	Time 2.584 (4.395)	Data Time 0.001 (3.725)	Loss 2.1972 (2.1602)	Entropy 0.62191 (0.62191)	Top-1 acc 72.656 (73.118)	Top-5 acc 88.281 (89.134)	lr 0.00004
Train [118][20/3239]	Time 0.251 (2.421)	Data Time 0.001 (1.952)	Loss 2.2245 (2.1656)	Entropy 0.62192 (0.62191)	Top-1 acc 70.703 (72.582)	Top-5 acc 89.062 (89.118)	lr 0.00004
Train [118][30/3239]	Time 0.242 (1.799)	Data Time 0.001 (1.323)	Loss 2.1687 (2.1588)	Entropy 0.62190 (0.62191)	Top-1 acc 72.656 (72.379)	Top-5 acc 85.938 (89.201)	lr 0.00004
Train [118][40/3239]	Time 0.337 (1.484)	Data Time 0.001 (1.000)	Loss 2.0875 (2.1558)	Entropy 0.62189 (0.62191)	Top-1 acc 75.000 (72.361)	Top-5 acc 89.844 (89.282)	lr 0.00004
Train [118][50/3239]	Time 0.240 (1.291)	Data Time 0.001 (0.805)	Loss 2.0684 (2.1632)	Entropy 0.62188 (0.62190)	Top-1 acc 75.000 (72.197)	Top-5 acc 89.062 (89.055)	lr 0.00004
Train [118][60/3239]	Time 0.246 (1.161)	Data Time 0.001 (0.673)	Loss 1.9883 (2.1547)	Entropy 0.62187 (0.62190)	Top-1 acc 76.172 (72.298)	Top-5 acc 92.969 (89.152)	lr 0.00004
Train [118][70/3239]	Time 0.231 (1.064)	Data Time 0.001 (0.578)	Loss 2.2138 (2.1551)	Entropy 0.62185 (0.62189)	Top-1 acc 71.484 (72.326)	Top-5 acc 89.062 (89.173)	lr 0.00004
Train [118][80/3239]	Time 0.321 (0.992)	Data Time 0.001 (0.507)	Loss 2.0950 (2.1486)	Entropy 0.62184 (0.62189)	Top-1 acc 75.781 (72.589)	Top-5 acc 89.062 (89.289)	lr 0.00004
Train [118][90/3239]	Time 0.262 (0.937)	Data Time 0.001 (0.452)	Loss 2.2008 (2.1504)	Entropy 0.62181 (0.62188)	Top-1 acc 69.531 (72.467)	Top-5 acc 88.672 (89.311)	lr 0.00004
Train [118][100/3239]	Time 0.232 (0.892)	Data Time 0.001 (0.407)	Loss 2.1324 (2.1519)	Entropy 0.62175 (0.62187)	Top-1 acc 75.391 (72.540)	Top-5 acc 90.625 (89.271)	lr 0.00004
Train [118][110/3239]	Time 0.239 (0.854)	Data Time 0.001 (0.371)	Loss 2.0499 (2.1503)	Entropy 0.62182 (0.62186)	Top-1 acc 75.781 (72.695)	Top-5 acc 89.844 (89.263)	lr 0.00004
Train [118][120/3239]	Time 2.557 (0.823)	Data Time 0.001 (0.340)	Loss 2.2691 (2.1484)	Entropy 0.62182 (0.62186)	Top-1 acc 71.094 (72.779)	Top-5 acc 87.500 (89.318)	lr 0.00004
Train [118][130/3239]	Time 0.242 (0.780)	Data Time 0.001 (0.314)	Loss 2.0963 (2.1452)	Entropy 0.62178 (0.62185)	Top-1 acc 75.781 (72.883)	Top-5 acc 89.453 (89.367)	lr 0.00004
Train [118][140/3239]	Time 0.226 (0.759)	Data Time 0.001 (0.292)	Loss 2.0282 (2.1440)	Entropy 0.62172 (0.62184)	Top-1 acc 75.391 (72.931)	Top-5 acc 92.969 (89.351)	lr 0.00004
Train [118][150/3239]	Time 0.240 (0.741)	Data Time 0.001 (0.273)	Loss 1.9900 (2.1449)	Entropy 0.62163 (0.62183)	Top-1 acc 76.953 (72.892)	Top-5 acc 92.188 (89.370)	lr 0.00004
Train [118][160/3239]	Time 0.330 (0.724)	Data Time 0.001 (0.256)	Loss 2.0843 (2.1442)	Entropy 0.62165 (0.62182)	Top-1 acc 76.172 (72.935)	Top-5 acc 87.891 (89.334)	lr 0.00004
Train [118][170/3239]	Time 0.228 (0.710)	Data Time 0.001 (0.241)	Loss 2.0062 (2.1430)	Entropy 0.62168 (0.62181)	Top-1 acc 78.516 (73.003)	Top-5 acc 91.016 (89.385)	lr 0.00004
Train [118][180/3239]	Time 0.276 (0.989)	Data Time 0.003 (0.228)	Loss 2.0860 (2.1424)	Entropy 0.62172 (0.62180)	Top-1 acc 73.047 (72.997)	Top-5 acc 88.672 (89.384)	lr 0.00004
Train [118][190/3239]	Time 0.238 (0.967)	Data Time 0.002 (0.216)	Loss 2.1264 (2.1448)	Entropy 0.62172 (0.62180)	Top-1 acc 71.875 (72.947)	Top-5 acc 89.453 (89.318)	lr 0.00004
Train [118][200/3239]	Time 0.325 (0.944)	Data Time 0.001 (0.205)	Loss 2.1235 (2.1426)	Entropy 0.62170 (0.62180)	Top-1 acc 72.266 (73.006)	Top-5 acc 91.016 (89.356)	lr 0.00004
Train [118][210/3239]	Time 0.240 (0.922)	Data Time 0.001 (0.196)	Loss 2.1268 (2.1400)	Entropy 0.62169 (0.62179)	Top-1 acc 75.781 (73.126)	Top-5 acc 91.016 (89.427)	lr 0.00004
Train [118][220/3239]	Time 0.274 (0.902)	Data Time 0.001 (0.187)	Loss 2.2951 (2.1419)	Entropy 0.62164 (0.62179)	Top-1 acc 68.750 (73.086)	Top-5 acc 86.719 (89.405)	lr 0.00004
Train [118][230/3239]	Time 2.513 (0.884)	Data Time 0.002 (0.179)	Loss 2.3667 (2.1440)	Entropy 0.62164 (0.62178)	Top-1 acc 66.406 (72.998)	Top-5 acc 84.766 (89.372)	lr 0.00004
Train [118][240/3239]	Time 0.254 (0.858)	Data Time 0.001 (0.172)	Loss 2.1293 (2.1438)	Entropy 0.62165 (0.62177)	Top-1 acc 72.656 (72.993)	Top-5 acc 91.797 (89.380)	lr 0.00004
Train [118][250/3239]	Time 0.264 (0.843)	Data Time 0.002 (0.165)	Loss 2.1649 (2.1436)	Entropy 0.62181 (0.62177)	Top-1 acc 72.266 (73.005)	Top-5 acc 87.109 (89.377)	lr 0.00004
Train [118][260/3239]	Time 0.240 (0.830)	Data Time 0.002 (0.159)	Loss 2.1933 (2.1446)	Entropy 0.62173 (0.62177)	Top-1 acc 70.703 (72.987)	Top-5 acc 90.234 (89.387)	lr 0.00004
Train [118][270/3239]	Time 0.220 (0.818)	Data Time 0.001 (0.153)	Loss 2.0367 (2.1433)	Entropy 0.62181 (0.62177)	Top-1 acc 76.172 (73.018)	Top-5 acc 90.234 (89.413)	lr 0.00004
Train [118][280/3239]	Time 0.243 (0.806)	Data Time 0.001 (0.147)	Loss 2.1947 (2.1465)	Entropy 0.62179 (0.62178)	Top-1 acc 71.875 (72.963)	Top-5 acc 88.672 (89.359)	lr 0.00004
Train [118][290/3239]	Time 0.234 (0.795)	Data Time 0.001 (0.142)	Loss 2.1644 (2.1470)	Entropy 0.62175 (0.62178)	Top-1 acc 76.172 (72.981)	Top-5 acc 89.844 (89.356)	lr 0.00004
Train [118][300/3239]	Time 0.236 (0.785)	Data Time 0.002 (0.138)	Loss 2.1213 (2.1493)	Entropy 0.62174 (0.62177)	Top-1 acc 73.828 (72.926)	Top-5 acc 89.844 (89.306)	lr 0.00004
Train [118][310/3239]	Time 0.270 (0.775)	Data Time 0.001 (0.133)	Loss 2.0634 (2.1493)	Entropy 0.62167 (0.62177)	Top-1 acc 74.609 (72.916)	Top-5 acc 90.234 (89.305)	lr 0.00004
Train [118][320/3239]	Time 0.246 (0.766)	Data Time 0.002 (0.129)	Loss 2.1577 (2.1483)	Entropy 0.62167 (0.62177)	Top-1 acc 72.656 (72.943)	Top-5 acc 87.500 (89.320)	lr 0.00004
Train [118][330/3239]	Time 0.265 (0.758)	Data Time 0.001 (0.125)	Loss 2.0494 (2.1472)	Entropy 0.62160 (0.62177)	Top-1 acc 73.828 (72.955)	Top-5 acc 91.406 (89.341)	lr 0.00004
Train [118][340/3239]	Time 2.566 (0.750)	Data Time 0.001 (0.122)	Loss 2.1269 (2.1463)	Entropy 0.62160 (0.62176)	Top-1 acc 72.656 (72.990)	Top-5 acc 89.453 (89.349)	lr 0.00004
Train [118][350/3239]	Time 0.237 (0.735)	Data Time 0.001 (0.118)	Loss 2.0625 (2.1467)	Entropy 0.62162 (0.62176)	Top-1 acc 75.391 (72.987)	Top-5 acc 90.625 (89.341)	lr 0.00004
Train [118][360/3239]	Time 0.250 (0.729)	Data Time 0.002 (0.115)	Loss 2.0450 (2.1464)	Entropy 0.62154 (0.62175)	Top-1 acc 76.953 (72.992)	Top-5 acc 89.453 (89.348)	lr 0.00004
Train [118][370/3239]	Time 0.336 (0.723)	Data Time 0.001 (0.112)	Loss 2.0810 (2.1474)	Entropy 0.62157 (0.62175)	Top-1 acc 72.656 (72.957)	Top-5 acc 89.844 (89.333)	lr 0.00004
Train [118][380/3239]	Time 0.236 (0.716)	Data Time 0.001 (0.109)	Loss 2.2853 (2.1474)	Entropy 0.62163 (0.62174)	Top-1 acc 70.312 (72.979)	Top-5 acc 87.891 (89.340)	lr 0.00004
Train [118][390/3239]	Time 0.240 (0.710)	Data Time 0.001 (0.106)	Loss 2.2102 (2.1476)	Entropy 0.62139 (0.62174)	Top-1 acc 67.578 (72.981)	Top-5 acc 87.500 (89.333)	lr 0.00004
Train [118][400/3239]	Time 0.232 (0.705)	Data Time 0.001 (0.104)	Loss 2.1273 (2.1471)	Entropy 0.62137 (0.62173)	Top-1 acc 76.172 (73.006)	Top-5 acc 90.234 (89.352)	lr 0.00004
Train [118][410/3239]	Time 0.332 (0.700)	Data Time 0.001 (0.101)	Loss 2.2927 (2.1473)	Entropy 0.62133 (0.62172)	Top-1 acc 70.703 (73.016)	Top-5 acc 86.328 (89.338)	lr 0.00004
Train [118][420/3239]	Time 0.226 (0.694)	Data Time 0.001 (0.099)	Loss 2.0232 (2.1483)	Entropy 0.62136 (0.62171)	Top-1 acc 76.562 (72.971)	Top-5 acc 91.797 (89.332)	lr 0.00004
Train [118][430/3239]	Time 0.233 (0.689)	Data Time 0.001 (0.097)	Loss 2.2213 (2.1479)	Entropy 0.62133 (0.62170)	Top-1 acc 69.141 (72.998)	Top-5 acc 88.281 (89.339)	lr 0.00004
Train [118][440/3239]	Time 0.237 (0.684)	Data Time 0.001 (0.094)	Loss 2.1179 (2.1477)	Entropy 0.62132 (0.62169)	Top-1 acc 75.000 (73.012)	Top-5 acc 91.016 (89.345)	lr 0.00004
Train [118][450/3239]	Time 2.612 (0.680)	Data Time 0.001 (0.092)	Loss 2.1728 (2.1479)	Entropy 0.62132 (0.62168)	Top-1 acc 72.656 (73.012)	Top-5 acc 89.062 (89.336)	lr 0.00004
Train [118][460/3239]	Time 0.236 (0.671)	Data Time 0.001 (0.090)	Loss 2.2259 (2.1479)	Entropy 0.62126 (0.62168)	Top-1 acc 73.047 (73.009)	Top-5 acc 87.109 (89.340)	lr 0.00003
Train [118][470/3239]	Time 0.240 (0.666)	Data Time 0.001 (0.089)	Loss 2.0523 (2.1485)	Entropy 0.62118 (0.62167)	Top-1 acc 76.172 (73.005)	Top-5 acc 91.406 (89.320)	lr 0.00003
Train [118][480/3239]	Time 0.222 (0.662)	Data Time 0.001 (0.087)	Loss 2.1693 (2.1493)	Entropy 0.62107 (0.62165)	Top-1 acc 73.047 (72.979)	Top-5 acc 89.844 (89.314)	lr 0.00003
Train [118][490/3239]	Time 0.232 (0.659)	Data Time 0.001 (0.085)	Loss 2.1374 (2.1490)	Entropy 0.62103 (0.62164)	Top-1 acc 73.047 (72.969)	Top-5 acc 91.016 (89.326)	lr 0.00003
Train [118][500/3239]	Time 0.239 (0.655)	Data Time 0.001 (0.083)	Loss 2.1048 (2.1485)	Entropy 0.62103 (0.62163)	Top-1 acc 73.828 (72.982)	Top-5 acc 90.234 (89.331)	lr 0.00003
Train [118][510/3239]	Time 0.230 (0.652)	Data Time 0.001 (0.082)	Loss 2.0308 (2.1476)	Entropy 0.62099 (0.62162)	Top-1 acc 76.172 (73.003)	Top-5 acc 92.188 (89.356)	lr 0.00003
Train [118][520/3239]	Time 0.226 (0.648)	Data Time 0.001 (0.080)	Loss 2.0961 (2.1478)	Entropy 0.62093 (0.62160)	Top-1 acc 74.609 (73.007)	Top-5 acc 91.406 (89.354)	lr 0.00003
Train [118][530/3239]	Time 0.225 (0.645)	Data Time 0.002 (0.079)	Loss 2.0225 (2.1478)	Entropy 0.62083 (0.62159)	Top-1 acc 77.344 (73.020)	Top-5 acc 91.016 (89.347)	lr 0.00003
Train [118][540/3239]	Time 0.451 (0.738)	Data Time 0.002 (0.077)	Loss 2.0216 (2.1482)	Entropy 0.62075 (0.62158)	Top-1 acc 78.125 (73.027)	Top-5 acc 89.453 (89.337)	lr 0.00003
Train [118][550/3239]	Time 0.236 (0.736)	Data Time 0.002 (0.076)	Loss 2.2781 (2.1488)	Entropy 0.62074 (0.62156)	Top-1 acc 67.578 (72.998)	Top-5 acc 84.375 (89.316)	lr 0.00003
Train [118][560/3239]	Time 2.642 (0.732)	Data Time 0.002 (0.075)	Loss 2.0067 (2.1492)	Entropy 0.62074 (0.62155)	Top-1 acc 75.000 (72.969)	Top-5 acc 91.016 (89.306)	lr 0.00003
Train [118][570/3239]	Time 0.242 (0.723)	Data Time 0.001 (0.073)	Loss 2.1334 (2.1494)	Entropy 0.62063 (0.62153)	Top-1 acc 73.438 (72.958)	Top-5 acc 89.453 (89.309)	lr 0.00003
Train [118][580/3239]	Time 0.241 (0.720)	Data Time 0.001 (0.072)	Loss 2.1465 (2.1498)	Entropy 0.62050 (0.62151)	Top-1 acc 75.000 (72.951)	Top-5 acc 89.844 (89.303)	lr 0.00003
Train [118][590/3239]	Time 0.235 (0.716)	Data Time 0.001 (0.071)	Loss 2.1415 (2.1497)	Entropy 0.62048 (0.62150)	Top-1 acc 74.219 (72.952)	Top-5 acc 90.234 (89.306)	lr 0.00003
Train [118][600/3239]	Time 0.239 (0.712)	Data Time 0.001 (0.070)	Loss 2.2431 (2.1503)	Entropy 0.62041 (0.62148)	Top-1 acc 67.578 (72.925)	Top-5 acc 88.281 (89.299)	lr 0.00003
Train [118][610/3239]	Time 0.238 (0.708)	Data Time 0.001 (0.069)	Loss 2.0979 (2.1509)	Entropy 0.62043 (0.62146)	Top-1 acc 75.000 (72.902)	Top-5 acc 90.234 (89.293)	lr 0.00003
Train [118][620/3239]	Time 0.247 (0.704)	Data Time 0.001 (0.068)	Loss 2.0920 (2.1507)	Entropy 0.62041 (0.62144)	Top-1 acc 73.828 (72.905)	Top-5 acc 91.797 (89.295)	lr 0.00003
Train [118][630/3239]	Time 0.235 (0.701)	Data Time 0.001 (0.067)	Loss 2.1396 (2.1509)	Entropy 0.62035 (0.62143)	Top-1 acc 72.266 (72.899)	Top-5 acc 89.062 (89.307)	lr 0.00003
Train [118][640/3239]	Time 0.282 (0.698)	Data Time 0.001 (0.066)	Loss 2.1371 (2.1508)	Entropy 0.62033 (0.62141)	Top-1 acc 72.266 (72.899)	Top-5 acc 88.281 (89.303)	lr 0.00003
Train [118][650/3239]	Time 0.230 (0.694)	Data Time 0.001 (0.065)	Loss 2.0936 (2.1503)	Entropy 0.62023 (0.62139)	Top-1 acc 73.047 (72.918)	Top-5 acc 89.844 (89.312)	lr 0.00003
Train [118][660/3239]	Time 0.236 (0.691)	Data Time 0.001 (0.064)	Loss 2.2085 (2.1504)	Entropy 0.62022 (0.62138)	Top-1 acc 73.828 (72.919)	Top-5 acc 88.672 (89.313)	lr 0.00003
Train [118][670/3239]	Time 2.613 (0.688)	Data Time 0.001 (0.063)	Loss 2.1981 (2.1497)	Entropy 0.62022 (0.62136)	Top-1 acc 72.266 (72.944)	Top-5 acc 87.891 (89.329)	lr 0.00003
Train [118][680/3239]	Time 0.242 (0.681)	Data Time 0.001 (0.062)	Loss 2.0065 (2.1492)	Entropy 0.62019 (0.62134)	Top-1 acc 78.906 (72.976)	Top-5 acc 91.406 (89.333)	lr 0.00003
Train [118][690/3239]	Time 0.246 (0.679)	Data Time 0.001 (0.061)	Loss 2.0086 (2.1488)	Entropy 0.62021 (0.62132)	Top-1 acc 73.828 (72.982)	Top-5 acc 92.578 (89.334)	lr 0.00003
Train [118][700/3239]	Time 0.351 (0.676)	Data Time 0.001 (0.060)	Loss 2.1017 (2.1484)	Entropy 0.62020 (0.62131)	Top-1 acc 74.219 (72.987)	Top-5 acc 90.625 (89.341)	lr 0.00003
Train [118][710/3239]	Time 0.240 (0.674)	Data Time 0.001 (0.059)	Loss 2.2958 (2.1487)	Entropy 0.62017 (0.62129)	Top-1 acc 69.141 (72.977)	Top-5 acc 86.328 (89.333)	lr 0.00003
Train [118][720/3239]	Time 0.250 (0.671)	Data Time 0.002 (0.058)	Loss 2.2219 (2.1485)	Entropy 0.62014 (0.62128)	Top-1 acc 67.969 (72.967)	Top-5 acc 87.891 (89.340)	lr 0.00003
Train [118][730/3239]	Time 0.226 (0.669)	Data Time 0.001 (0.058)	Loss 2.2788 (2.1488)	Entropy 0.62027 (0.62126)	Top-1 acc 70.703 (72.970)	Top-5 acc 87.891 (89.345)	lr 0.00003
Train [118][740/3239]	Time 0.332 (0.666)	Data Time 0.001 (0.057)	Loss 2.2964 (2.1493)	Entropy 0.62027 (0.62125)	Top-1 acc 68.750 (72.954)	Top-5 acc 88.672 (89.338)	lr 0.00003
Train [118][750/3239]	Time 0.208 (0.664)	Data Time 0.001 (0.056)	Loss 2.1505 (2.1494)	Entropy 0.62019 (0.62124)	Top-1 acc 73.047 (72.956)	Top-5 acc 88.281 (89.338)	lr 0.00003
Train [118][760/3239]	Time 0.221 (0.661)	Data Time 0.001 (0.055)	Loss 2.2503 (2.1495)	Entropy 0.62017 (0.62122)	Top-1 acc 67.969 (72.956)	Top-5 acc 87.500 (89.331)	lr 0.00003
Train [118][770/3239]	Time 0.269 (0.659)	Data Time 0.001 (0.055)	Loss 2.1089 (2.1491)	Entropy 0.62014 (0.62121)	Top-1 acc 76.172 (72.965)	Top-5 acc 89.844 (89.334)	lr 0.00003
Train [118][780/3239]	Time 2.618 (0.657)	Data Time 0.001 (0.054)	Loss 2.1027 (2.1489)	Entropy 0.62014 (0.62119)	Top-1 acc 73.828 (72.977)	Top-5 acc 89.062 (89.328)	lr 0.00003
Train [118][790/3239]	Time 0.259 (0.652)	Data Time 0.001 (0.053)	Loss 2.0694 (2.1482)	Entropy 0.62009 (0.62118)	Top-1 acc 76.562 (72.993)	Top-5 acc 91.797 (89.336)	lr 0.00003
Train [118][800/3239]	Time 0.260 (0.650)	Data Time 0.002 (0.053)	Loss 2.0079 (2.1479)	Entropy 0.62011 (0.62117)	Top-1 acc 75.781 (72.993)	Top-5 acc 93.359 (89.344)	lr 0.00003
Train [118][810/3239]	Time 0.229 (0.648)	Data Time 0.001 (0.052)	Loss 2.2520 (2.1476)	Entropy 0.62008 (0.62115)	Top-1 acc 68.359 (73.003)	Top-5 acc 89.453 (89.351)	lr 0.00003
Train [118][820/3239]	Time 0.242 (0.646)	Data Time 0.001 (0.052)	Loss 2.1437 (2.1479)	Entropy 0.62000 (0.62114)	Top-1 acc 72.266 (72.993)	Top-5 acc 88.281 (89.342)	lr 0.00003
Train [118][830/3239]	Time 0.229 (0.644)	Data Time 0.001 (0.051)	Loss 2.1608 (2.1477)	Entropy 0.61996 (0.62113)	Top-1 acc 73.828 (73.001)	Top-5 acc 90.234 (89.355)	lr 0.00003
Train [118][840/3239]	Time 0.231 (0.642)	Data Time 0.001 (0.050)	Loss 2.1735 (2.1473)	Entropy 0.61989 (0.62111)	Top-1 acc 72.266 (73.007)	Top-5 acc 87.500 (89.356)	lr 0.00003
Train [118][850/3239]	Time 0.233 (0.640)	Data Time 0.001 (0.050)	Loss 2.1884 (2.1475)	Entropy 0.61994 (0.62110)	Top-1 acc 68.359 (72.986)	Top-5 acc 88.672 (89.352)	lr 0.00003
Train [118][860/3239]	Time 0.227 (0.638)	Data Time 0.001 (0.049)	Loss 2.2488 (2.1474)	Entropy 0.61992 (0.62108)	Top-1 acc 69.531 (72.990)	Top-5 acc 85.156 (89.354)	lr 0.00003
Train [118][870/3239]	Time 0.212 (0.637)	Data Time 0.001 (0.049)	Loss 2.1704 (2.1476)	Entropy 0.61987 (0.62107)	Top-1 acc 72.656 (72.985)	Top-5 acc 87.891 (89.351)	lr 0.00003
Train [118][880/3239]	Time 0.255 (0.635)	Data Time 0.001 (0.048)	Loss 2.1187 (2.1475)	Entropy 0.61984 (0.62106)	Top-1 acc 73.438 (72.990)	Top-5 acc 89.844 (89.350)	lr 0.00003
Train [118][890/3239]	Time 2.633 (0.633)	Data Time 0.001 (0.048)	Loss 2.1600 (2.1479)	Entropy 0.61984 (0.62104)	Top-1 acc 74.219 (72.979)	Top-5 acc 89.844 (89.342)	lr 0.00003
Train [118][900/3239]	Time 0.253 (0.629)	Data Time 0.001 (0.047)	Loss 2.2249 (2.1481)	Entropy 0.61982 (0.62103)	Top-1 acc 70.312 (72.976)	Top-5 acc 88.672 (89.334)	lr 0.00003
Train [118][910/3239]	Time 0.365 (0.682)	Data Time 0.002 (0.047)	Loss 2.1159 (2.1477)	Entropy 0.61981 (0.62102)	Top-1 acc 73.047 (72.993)	Top-5 acc 91.406 (89.342)	lr 0.00003
Train [118][920/3239]	Time 0.244 (0.681)	Data Time 0.002 (0.046)	Loss 2.2663 (2.1478)	Entropy 0.61980 (0.62100)	Top-1 acc 68.750 (72.989)	Top-5 acc 87.891 (89.341)	lr 0.00003
Train [118][930/3239]	Time 0.231 (0.679)	Data Time 0.001 (0.046)	Loss 2.1903 (2.1478)	Entropy 0.61980 (0.62099)	Top-1 acc 71.484 (72.991)	Top-5 acc 87.500 (89.335)	lr 0.00003
Train [118][940/3239]	Time 0.238 (0.676)	Data Time 0.001 (0.045)	Loss 2.2183 (2.1482)	Entropy 0.61968 (0.62098)	Top-1 acc 69.141 (72.979)	Top-5 acc 87.891 (89.332)	lr 0.00003
Train [118][950/3239]	Time 0.259 (0.674)	Data Time 0.002 (0.045)	Loss 2.0385 (2.1480)	Entropy 0.61959 (0.62096)	Top-1 acc 76.562 (72.988)	Top-5 acc 91.016 (89.339)	lr 0.00003
Train [118][960/3239]	Time 0.235 (0.673)	Data Time 0.001 (0.044)	Loss 2.2456 (2.1484)	Entropy 0.61948 (0.62095)	Top-1 acc 70.312 (72.980)	Top-5 acc 88.281 (89.336)	lr 0.00003
Train [118][970/3239]	Time 0.253 (0.671)	Data Time 0.002 (0.044)	Loss 2.2692 (2.1485)	Entropy 0.61945 (0.62093)	Top-1 acc 68.359 (72.973)	Top-5 acc 88.672 (89.336)	lr 0.00003
Train [118][980/3239]	Time 0.300 (0.669)	Data Time 0.001 (0.043)	Loss 2.1043 (2.1486)	Entropy 0.61936 (0.62092)	Top-1 acc 74.219 (72.968)	Top-5 acc 90.625 (89.337)	lr 0.00003
Train [118][990/3239]	Time 0.368 (0.668)	Data Time 0.002 (0.043)	Loss 2.0438 (2.1488)	Entropy 0.61933 (0.62090)	Top-1 acc 74.609 (72.963)	Top-5 acc 89.453 (89.329)	lr 0.00003
Train [118][1000/3239]	Time 2.509 (0.666)	Data Time 0.001 (0.043)	Loss 2.2722 (2.1488)	Entropy 0.61933 (0.62089)	Top-1 acc 68.750 (72.971)	Top-5 acc 89.062 (89.334)	lr 0.00003
Train [118][1010/3239]	Time 0.269 (0.662)	Data Time 0.002 (0.042)	Loss 2.2196 (2.1486)	Entropy 0.61931 (0.62087)	Top-1 acc 72.656 (72.983)	Top-5 acc 88.281 (89.326)	lr 0.00003
Train [118][1020/3239]	Time 0.246 (0.661)	Data Time 0.001 (0.042)	Loss 2.1419 (2.1486)	Entropy 0.61918 (0.62085)	Top-1 acc 71.875 (72.991)	Top-5 acc 85.547 (89.322)	lr 0.00003
Train [118][1030/3239]	Time 0.229 (0.659)	Data Time 0.001 (0.041)	Loss 2.1660 (2.1483)	Entropy 0.61918 (0.62084)	Top-1 acc 71.094 (73.006)	Top-5 acc 87.109 (89.321)	lr 0.00003
Train [118][1040/3239]	Time 0.286 (0.657)	Data Time 0.001 (0.041)	Loss 2.1757 (2.1486)	Entropy 0.61921 (0.62082)	Top-1 acc 71.484 (72.997)	Top-5 acc 89.453 (89.312)	lr 0.00003
Train [118][1050/3239]	Time 0.251 (0.656)	Data Time 0.001 (0.041)	Loss 2.0002 (2.1483)	Entropy 0.61916 (0.62081)	Top-1 acc 76.953 (73.003)	Top-5 acc 89.844 (89.319)	lr 0.00003
Train [118][1060/3239]	Time 0.236 (0.655)	Data Time 0.001 (0.040)	Loss 2.2030 (2.1487)	Entropy 0.61916 (0.62079)	Top-1 acc 69.922 (72.983)	Top-5 acc 88.672 (89.312)	lr 0.00003
Train [118][1070/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.040)	Loss 1.9316 (2.1485)	Entropy 0.61911 (0.62078)	Top-1 acc 78.125 (72.983)	Top-5 acc 93.359 (89.316)	lr 0.00003
Train [118][1080/3239]	Time 0.211 (0.651)	Data Time 0.002 (0.040)	Loss 2.3242 (2.1486)	Entropy 0.61909 (0.62076)	Top-1 acc 67.188 (72.985)	Top-5 acc 86.328 (89.314)	lr 0.00003
Train [118][1090/3239]	Time 0.261 (0.650)	Data Time 0.001 (0.039)	Loss 2.1476 (2.1484)	Entropy 0.61904 (0.62074)	Top-1 acc 73.828 (72.990)	Top-5 acc 90.234 (89.313)	lr 0.00003
Train [118][1100/3239]	Time 0.320 (0.648)	Data Time 0.003 (0.039)	Loss 2.0791 (2.1487)	Entropy 0.61920 (0.62073)	Top-1 acc 76.172 (72.983)	Top-5 acc 90.234 (89.311)	lr 0.00003
Train [118][1110/3239]	Time 2.641 (0.647)	Data Time 0.001 (0.039)	Loss 2.1210 (2.1484)	Entropy 0.61920 (0.62072)	Top-1 acc 75.781 (72.992)	Top-5 acc 90.625 (89.317)	lr 0.00003
Train [118][1120/3239]	Time 0.383 (0.644)	Data Time 0.002 (0.038)	Loss 2.1721 (2.1485)	Entropy 0.61923 (0.62070)	Top-1 acc 73.828 (72.992)	Top-5 acc 89.062 (89.307)	lr 0.00003
Train [118][1130/3239]	Time 0.273 (0.642)	Data Time 0.002 (0.038)	Loss 2.1552 (2.1489)	Entropy 0.61918 (0.62069)	Top-1 acc 73.047 (72.985)	Top-5 acc 87.891 (89.303)	lr 0.00003
Train [118][1140/3239]	Time 0.241 (0.641)	Data Time 0.001 (0.038)	Loss 2.1908 (2.1487)	Entropy 0.61918 (0.62068)	Top-1 acc 75.000 (72.990)	Top-5 acc 87.109 (89.306)	lr 0.00003
Train [118][1150/3239]	Time 0.235 (0.640)	Data Time 0.001 (0.037)	Loss 2.1619 (2.1489)	Entropy 0.61918 (0.62066)	Top-1 acc 73.438 (72.991)	Top-5 acc 89.453 (89.305)	lr 0.00003
Train [118][1160/3239]	Time 0.326 (0.639)	Data Time 0.001 (0.037)	Loss 2.1110 (2.1490)	Entropy 0.61915 (0.62065)	Top-1 acc 71.875 (72.992)	Top-5 acc 89.453 (89.303)	lr 0.00003
Train [118][1170/3239]	Time 0.240 (0.638)	Data Time 0.001 (0.037)	Loss 2.1464 (2.1494)	Entropy 0.61909 (0.62064)	Top-1 acc 70.312 (72.987)	Top-5 acc 89.844 (89.293)	lr 0.00003
Train [118][1180/3239]	Time 0.237 (0.636)	Data Time 0.001 (0.036)	Loss 2.1705 (2.1505)	Entropy 0.61908 (0.62062)	Top-1 acc 69.531 (72.961)	Top-5 acc 89.062 (89.271)	lr 0.00003
Train [118][1190/3239]	Time 0.239 (0.635)	Data Time 0.001 (0.036)	Loss 2.1077 (2.1509)	Entropy 0.61903 (0.62061)	Top-1 acc 73.828 (72.947)	Top-5 acc 91.406 (89.266)	lr 0.00003
Train [118][1200/3239]	Time 0.363 (0.634)	Data Time 0.002 (0.036)	Loss 2.1811 (2.1510)	Entropy 0.61908 (0.62060)	Top-1 acc 71.875 (72.941)	Top-5 acc 87.109 (89.261)	lr 0.00003
Train [118][1210/3239]	Time 0.291 (0.633)	Data Time 0.001 (0.036)	Loss 2.1065 (2.1517)	Entropy 0.61895 (0.62058)	Top-1 acc 75.391 (72.925)	Top-5 acc 91.016 (89.252)	lr 0.00003
Train [118][1220/3239]	Time 2.569 (0.632)	Data Time 0.001 (0.035)	Loss 2.1637 (2.1516)	Entropy 0.61895 (0.62057)	Top-1 acc 72.656 (72.928)	Top-5 acc 88.281 (89.257)	lr 0.00003
Train [118][1230/3239]	Time 0.246 (0.629)	Data Time 0.001 (0.035)	Loss 2.2038 (2.1513)	Entropy 0.61882 (0.62056)	Top-1 acc 72.656 (72.935)	Top-5 acc 88.281 (89.262)	lr 0.00003
Train [118][1240/3239]	Time 0.239 (0.627)	Data Time 0.001 (0.035)	Loss 2.1760 (2.1517)	Entropy 0.61883 (0.62054)	Top-1 acc 69.922 (72.924)	Top-5 acc 89.453 (89.256)	lr 0.00003
Train [118][1250/3239]	Time 0.233 (0.626)	Data Time 0.001 (0.034)	Loss 2.1989 (2.1518)	Entropy 0.61876 (0.62053)	Top-1 acc 73.438 (72.914)	Top-5 acc 87.500 (89.257)	lr 0.00003
Train [118][1260/3239]	Time 0.230 (0.625)	Data Time 0.001 (0.034)	Loss 2.1297 (2.1521)	Entropy 0.61875 (0.62052)	Top-1 acc 73.828 (72.912)	Top-5 acc 90.234 (89.251)	lr 0.00003
Train [118][1270/3239]	Time 0.442 (0.666)	Data Time 0.002 (0.034)	Loss 2.0735 (2.1522)	Entropy 0.61867 (0.62050)	Top-1 acc 77.344 (72.919)	Top-5 acc 92.969 (89.252)	lr 0.00003
Train [118][1280/3239]	Time 0.255 (0.665)	Data Time 0.002 (0.034)	Loss 2.1090 (2.1522)	Entropy 0.61864 (0.62049)	Top-1 acc 73.047 (72.927)	Top-5 acc 91.406 (89.251)	lr 0.00003
Train [118][1290/3239]	Time 0.293 (0.663)	Data Time 0.002 (0.033)	Loss 2.1942 (2.1526)	Entropy 0.61851 (0.62047)	Top-1 acc 72.656 (72.921)	Top-5 acc 89.453 (89.240)	lr 0.00003
Train [118][1300/3239]	Time 0.229 (0.662)	Data Time 0.002 (0.033)	Loss 2.1567 (2.1527)	Entropy 0.61850 (0.62046)	Top-1 acc 74.219 (72.922)	Top-5 acc 88.672 (89.240)	lr 0.00003
Train [118][1310/3239]	Time 0.249 (0.661)	Data Time 0.001 (0.033)	Loss 1.9798 (2.1530)	Entropy 0.61850 (0.62044)	Top-1 acc 76.172 (72.921)	Top-5 acc 91.406 (89.234)	lr 0.00003
Train [118][1320/3239]	Time 0.240 (0.660)	Data Time 0.001 (0.033)	Loss 2.1447 (2.1528)	Entropy 0.61853 (0.62043)	Top-1 acc 76.172 (72.935)	Top-5 acc 88.281 (89.240)	lr 0.00003
Train [118][1330/3239]	Time 2.561 (0.658)	Data Time 0.001 (0.033)	Loss 2.2337 (2.1527)	Entropy 0.61853 (0.62041)	Top-1 acc 70.703 (72.937)	Top-5 acc 89.453 (89.238)	lr 0.00003
Train [118][1340/3239]	Time 0.239 (0.655)	Data Time 0.001 (0.032)	Loss 2.2110 (2.1528)	Entropy 0.61847 (0.62040)	Top-1 acc 70.703 (72.933)	Top-5 acc 89.453 (89.238)	lr 0.00003
Train [118][1350/3239]	Time 0.217 (0.654)	Data Time 0.001 (0.032)	Loss 2.0774 (2.1526)	Entropy 0.61834 (0.62038)	Top-1 acc 76.172 (72.940)	Top-5 acc 90.625 (89.241)	lr 0.00003
Train [118][1360/3239]	Time 0.234 (0.653)	Data Time 0.002 (0.032)	Loss 2.1734 (2.1527)	Entropy 0.61824 (0.62037)	Top-1 acc 69.922 (72.935)	Top-5 acc 90.234 (89.236)	lr 0.00003
Train [118][1370/3239]	Time 0.242 (0.652)	Data Time 0.002 (0.032)	Loss 2.3167 (2.1528)	Entropy 0.61833 (0.62035)	Top-1 acc 70.312 (72.933)	Top-5 acc 87.109 (89.238)	lr 0.00003
Train [118][1380/3239]	Time 0.249 (0.650)	Data Time 0.001 (0.031)	Loss 2.2015 (2.1528)	Entropy 0.61830 (0.62034)	Top-1 acc 72.656 (72.927)	Top-5 acc 88.281 (89.239)	lr 0.00003
Train [118][1390/3239]	Time 0.242 (0.649)	Data Time 0.001 (0.031)	Loss 1.9948 (2.1528)	Entropy 0.61827 (0.62032)	Top-1 acc 76.953 (72.926)	Top-5 acc 90.625 (89.243)	lr 0.00003
Train [118][1400/3239]	Time 0.269 (0.648)	Data Time 0.001 (0.031)	Loss 2.0118 (2.1527)	Entropy 0.61823 (0.62031)	Top-1 acc 73.438 (72.925)	Top-5 acc 90.625 (89.243)	lr 0.00003
Train [118][1410/3239]	Time 0.264 (0.647)	Data Time 0.001 (0.031)	Loss 2.1252 (2.1527)	Entropy 0.61806 (0.62029)	Top-1 acc 76.953 (72.927)	Top-5 acc 88.281 (89.241)	lr 0.00003
Train [118][1420/3239]	Time 0.230 (0.646)	Data Time 0.002 (0.031)	Loss 2.1782 (2.1527)	Entropy 0.61813 (0.62028)	Top-1 acc 73.828 (72.928)	Top-5 acc 88.672 (89.233)	lr 0.00003
Train [118][1430/3239]	Time 0.241 (0.645)	Data Time 0.001 (0.030)	Loss 2.2489 (2.1529)	Entropy 0.61814 (0.62026)	Top-1 acc 75.000 (72.927)	Top-5 acc 86.719 (89.226)	lr 0.00003
Train [118][1440/3239]	Time 2.545 (0.644)	Data Time 0.001 (0.030)	Loss 2.1029 (2.1529)	Entropy 0.61814 (0.62025)	Top-1 acc 74.219 (72.928)	Top-5 acc 89.453 (89.229)	lr 0.00003
Train [118][1450/3239]	Time 0.327 (0.641)	Data Time 0.002 (0.030)	Loss 2.3118 (2.1532)	Entropy 0.61797 (0.62023)	Top-1 acc 71.094 (72.923)	Top-5 acc 86.719 (89.222)	lr 0.00003
Train [118][1460/3239]	Time 0.236 (0.640)	Data Time 0.001 (0.030)	Loss 2.2772 (2.1532)	Entropy 0.61787 (0.62022)	Top-1 acc 70.312 (72.925)	Top-5 acc 87.500 (89.221)	lr 0.00003
Train [118][1470/3239]	Time 0.234 (0.639)	Data Time 0.001 (0.030)	Loss 2.2416 (2.1535)	Entropy 0.61769 (0.62020)	Top-1 acc 71.875 (72.917)	Top-5 acc 87.500 (89.215)	lr 0.00003
Train [118][1480/3239]	Time 0.222 (0.638)	Data Time 0.001 (0.029)	Loss 2.0783 (2.1532)	Entropy 0.61762 (0.62018)	Top-1 acc 75.000 (72.930)	Top-5 acc 90.625 (89.219)	lr 0.00003
Train [118][1490/3239]	Time 0.322 (0.637)	Data Time 0.001 (0.029)	Loss 2.0300 (2.1531)	Entropy 0.61765 (0.62016)	Top-1 acc 77.734 (72.937)	Top-5 acc 91.016 (89.217)	lr 0.00003
Train [118][1500/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.029)	Loss 2.1859 (2.1532)	Entropy 0.61755 (0.62015)	Top-1 acc 70.703 (72.934)	Top-5 acc 88.672 (89.212)	lr 0.00003
Train [118][1510/3239]	Time 0.260 (0.635)	Data Time 0.001 (0.029)	Loss 2.1522 (2.1531)	Entropy 0.61749 (0.62013)	Top-1 acc 72.656 (72.938)	Top-5 acc 90.625 (89.212)	lr 0.00003
Train [118][1520/3239]	Time 0.249 (0.634)	Data Time 0.001 (0.029)	Loss 2.1578 (2.1533)	Entropy 0.61745 (0.62011)	Top-1 acc 74.219 (72.927)	Top-5 acc 87.891 (89.211)	lr 0.00003
Train [118][1530/3239]	Time 0.369 (0.633)	Data Time 0.001 (0.028)	Loss 2.0516 (2.1531)	Entropy 0.61731 (0.62010)	Top-1 acc 76.953 (72.932)	Top-5 acc 91.016 (89.214)	lr 0.00003
Train [118][1540/3239]	Time 0.239 (0.632)	Data Time 0.001 (0.028)	Loss 2.2040 (2.1531)	Entropy 0.61739 (0.62008)	Top-1 acc 71.875 (72.933)	Top-5 acc 88.672 (89.217)	lr 0.00003
Train [118][1550/3239]	Time 2.592 (0.631)	Data Time 0.001 (0.028)	Loss 2.0359 (2.1527)	Entropy 0.61739 (0.62006)	Top-1 acc 76.562 (72.941)	Top-5 acc 89.062 (89.220)	lr 0.00003
Train [118][1560/3239]	Time 0.245 (0.629)	Data Time 0.001 (0.028)	Loss 2.0418 (2.1526)	Entropy 0.61735 (0.62004)	Top-1 acc 73.047 (72.948)	Top-5 acc 92.188 (89.224)	lr 0.00003
Train [118][1570/3239]	Time 0.229 (0.628)	Data Time 0.001 (0.028)	Loss 2.2168 (2.1525)	Entropy 0.61736 (0.62003)	Top-1 acc 68.359 (72.951)	Top-5 acc 89.062 (89.224)	lr 0.00003
Train [118][1580/3239]	Time 0.284 (0.627)	Data Time 0.001 (0.028)	Loss 2.2365 (2.1526)	Entropy 0.61730 (0.62001)	Top-1 acc 71.484 (72.947)	Top-5 acc 87.891 (89.221)	lr 0.00003
Train [118][1590/3239]	Time 0.231 (0.626)	Data Time 0.001 (0.027)	Loss 2.1544 (2.1527)	Entropy 0.61726 (0.61999)	Top-1 acc 74.609 (72.944)	Top-5 acc 89.062 (89.221)	lr 0.00003
Train [118][1600/3239]	Time 0.234 (0.625)	Data Time 0.001 (0.027)	Loss 2.0655 (2.1527)	Entropy 0.61726 (0.61997)	Top-1 acc 73.047 (72.943)	Top-5 acc 91.016 (89.223)	lr 0.00003
Train [118][1610/3239]	Time 0.233 (0.624)	Data Time 0.001 (0.027)	Loss 2.1293 (2.1526)	Entropy 0.61726 (0.61996)	Top-1 acc 73.438 (72.946)	Top-5 acc 89.062 (89.228)	lr 0.00003
Train [118][1620/3239]	Time 0.236 (0.624)	Data Time 0.001 (0.027)	Loss 1.9815 (2.1527)	Entropy 0.61718 (0.61994)	Top-1 acc 76.562 (72.945)	Top-5 acc 93.359 (89.230)	lr 0.00003
Train [118][1630/3239]	Time 0.324 (0.654)	Data Time 0.003 (0.027)	Loss 2.1494 (2.1529)	Entropy 0.61710 (0.61992)	Top-1 acc 74.219 (72.940)	Top-5 acc 90.234 (89.225)	lr 0.00003
Train [118][1640/3239]	Time 0.232 (0.654)	Data Time 0.002 (0.027)	Loss 2.2887 (2.1530)	Entropy 0.61695 (0.61991)	Top-1 acc 66.406 (72.939)	Top-5 acc 89.062 (89.225)	lr 0.00003
Train [118][1650/3239]	Time 0.304 (0.653)	Data Time 0.002 (0.027)	Loss 2.0295 (2.1526)	Entropy 0.61698 (0.61989)	Top-1 acc 76.953 (72.950)	Top-5 acc 91.406 (89.233)	lr 0.00003
Train [118][1660/3239]	Time 2.587 (0.652)	Data Time 0.002 (0.026)	Loss 2.1501 (2.1526)	Entropy 0.61698 (0.61987)	Top-1 acc 74.219 (72.945)	Top-5 acc 90.234 (89.231)	lr 0.00003
Train [118][1670/3239]	Time 0.232 (0.649)	Data Time 0.002 (0.026)	Loss 2.0247 (2.1525)	Entropy 0.61705 (0.61985)	Top-1 acc 78.125 (72.946)	Top-5 acc 92.578 (89.232)	lr 0.00003
Train [118][1680/3239]	Time 0.233 (0.649)	Data Time 0.001 (0.026)	Loss 2.2724 (2.1526)	Entropy 0.61694 (0.61984)	Top-1 acc 68.359 (72.947)	Top-5 acc 88.281 (89.226)	lr 0.00003
Train [118][1690/3239]	Time 0.239 (0.648)	Data Time 0.001 (0.026)	Loss 2.3421 (2.1526)	Entropy 0.61696 (0.61982)	Top-1 acc 67.969 (72.943)	Top-5 acc 86.719 (89.232)	lr 0.00003
Train [118][1700/3239]	Time 0.232 (0.647)	Data Time 0.001 (0.026)	Loss 2.1822 (2.1527)	Entropy 0.61691 (0.61980)	Top-1 acc 73.047 (72.943)	Top-5 acc 89.062 (89.231)	lr 0.00003
Train [118][1710/3239]	Time 0.235 (0.646)	Data Time 0.001 (0.026)	Loss 2.1580 (2.1526)	Entropy 0.61690 (0.61979)	Top-1 acc 74.219 (72.945)	Top-5 acc 89.453 (89.235)	lr 0.00003
Train [118][1720/3239]	Time 0.236 (0.645)	Data Time 0.001 (0.026)	Loss 2.1698 (2.1524)	Entropy 0.61688 (0.61977)	Top-1 acc 76.172 (72.955)	Top-5 acc 88.672 (89.240)	lr 0.00003
Train [118][1730/3239]	Time 0.244 (0.644)	Data Time 0.001 (0.025)	Loss 2.1396 (2.1531)	Entropy 0.61681 (0.61975)	Top-1 acc 75.000 (72.943)	Top-5 acc 89.062 (89.227)	lr 0.00003
Train [118][1740/3239]	Time 0.325 (0.643)	Data Time 0.002 (0.025)	Loss 2.1521 (2.1531)	Entropy 0.61681 (0.61973)	Top-1 acc 74.609 (72.947)	Top-5 acc 89.062 (89.225)	lr 0.00003
Train [118][1750/3239]	Time 0.269 (0.642)	Data Time 0.025 (0.025)	Loss 2.0824 (2.1530)	Entropy 0.61679 (0.61972)	Top-1 acc 74.219 (72.948)	Top-5 acc 90.625 (89.230)	lr 0.00003
Train [118][1760/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.025)	Loss 2.1119 (2.1531)	Entropy 0.61679 (0.61970)	Top-1 acc 75.391 (72.942)	Top-5 acc 89.453 (89.230)	lr 0.00003
Train [118][1770/3239]	Time 2.550 (0.640)	Data Time 0.001 (0.025)	Loss 2.3241 (2.1530)	Entropy 0.61679 (0.61968)	Top-1 acc 69.922 (72.944)	Top-5 acc 84.375 (89.234)	lr 0.00003
Train [118][1780/3239]	Time 0.354 (0.638)	Data Time 0.001 (0.025)	Loss 2.0574 (2.1526)	Entropy 0.61684 (0.61967)	Top-1 acc 78.125 (72.960)	Top-5 acc 91.406 (89.244)	lr 0.00003
Train [118][1790/3239]	Time 0.235 (0.637)	Data Time 0.001 (0.025)	Loss 2.0947 (2.1527)	Entropy 0.61678 (0.61965)	Top-1 acc 74.609 (72.961)	Top-5 acc 91.016 (89.245)	lr 0.00003
Train [118][1800/3239]	Time 0.235 (0.636)	Data Time 0.001 (0.024)	Loss 2.1439 (2.1526)	Entropy 0.61678 (0.61964)	Top-1 acc 75.391 (72.960)	Top-5 acc 88.672 (89.245)	lr 0.00003
Train [118][1810/3239]	Time 0.220 (0.636)	Data Time 0.001 (0.024)	Loss 2.1741 (2.1528)	Entropy 0.61689 (0.61962)	Top-1 acc 73.828 (72.959)	Top-5 acc 89.844 (89.241)	lr 0.00003
Train [118][1820/3239]	Time 0.335 (0.635)	Data Time 0.001 (0.024)	Loss 2.2078 (2.1528)	Entropy 0.61688 (0.61961)	Top-1 acc 71.875 (72.961)	Top-5 acc 86.719 (89.240)	lr 0.00003
Train [118][1830/3239]	Time 0.222 (0.634)	Data Time 0.001 (0.024)	Loss 2.1181 (2.1526)	Entropy 0.61687 (0.61959)	Top-1 acc 75.000 (72.967)	Top-5 acc 89.062 (89.242)	lr 0.00003
Train [118][1840/3239]	Time 0.232 (0.633)	Data Time 0.001 (0.024)	Loss 2.1543 (2.1526)	Entropy 0.61681 (0.61958)	Top-1 acc 72.656 (72.968)	Top-5 acc 89.062 (89.242)	lr 0.00003
Train [118][1850/3239]	Time 0.240 (0.632)	Data Time 0.001 (0.024)	Loss 1.9023 (2.1525)	Entropy 0.61683 (0.61956)	Top-1 acc 80.859 (72.976)	Top-5 acc 94.531 (89.242)	lr 0.00003
Train [118][1860/3239]	Time 0.430 (0.632)	Data Time 0.001 (0.024)	Loss 2.0725 (2.1525)	Entropy 0.61673 (0.61955)	Top-1 acc 77.344 (72.974)	Top-5 acc 90.234 (89.244)	lr 0.00003
Train [118][1870/3239]	Time 0.299 (0.631)	Data Time 0.001 (0.024)	Loss 2.0487 (2.1522)	Entropy 0.61660 (0.61953)	Top-1 acc 78.125 (72.982)	Top-5 acc 91.797 (89.249)	lr 0.00003
Train [118][1880/3239]	Time 2.577 (0.630)	Data Time 0.002 (0.024)	Loss 2.2972 (2.1518)	Entropy 0.61660 (0.61952)	Top-1 acc 70.703 (72.989)	Top-5 acc 88.672 (89.257)	lr 0.00003
Train [118][1890/3239]	Time 0.266 (0.628)	Data Time 0.001 (0.023)	Loss 2.1009 (2.1521)	Entropy 0.61656 (0.61950)	Top-1 acc 76.172 (72.983)	Top-5 acc 92.188 (89.253)	lr 0.00003
Train [118][1900/3239]	Time 0.242 (0.628)	Data Time 0.001 (0.023)	Loss 2.1781 (2.1521)	Entropy 0.61656 (0.61949)	Top-1 acc 72.656 (72.983)	Top-5 acc 87.500 (89.252)	lr 0.00002
Train [118][1910/3239]	Time 0.235 (0.627)	Data Time 0.001 (0.023)	Loss 2.0751 (2.1520)	Entropy 0.61653 (0.61947)	Top-1 acc 71.484 (72.989)	Top-5 acc 91.797 (89.252)	lr 0.00002
Train [118][1920/3239]	Time 0.312 (0.626)	Data Time 0.001 (0.023)	Loss 2.0941 (2.1521)	Entropy 0.61650 (0.61945)	Top-1 acc 73.828 (72.986)	Top-5 acc 89.062 (89.250)	lr 0.00002
Train [118][1930/3239]	Time 0.241 (0.625)	Data Time 0.001 (0.023)	Loss 2.2083 (2.1522)	Entropy 0.61648 (0.61944)	Top-1 acc 68.750 (72.983)	Top-5 acc 90.234 (89.249)	lr 0.00002
Train [118][1940/3239]	Time 0.314 (0.625)	Data Time 0.001 (0.023)	Loss 2.1312 (2.1521)	Entropy 0.61638 (0.61942)	Top-1 acc 72.656 (72.986)	Top-5 acc 90.625 (89.252)	lr 0.00002
Train [118][1950/3239]	Time 0.260 (0.624)	Data Time 0.001 (0.023)	Loss 2.3210 (2.1521)	Entropy 0.61624 (0.61941)	Top-1 acc 64.844 (72.987)	Top-5 acc 88.672 (89.250)	lr 0.00002
Train [118][1960/3239]	Time 0.229 (0.623)	Data Time 0.001 (0.023)	Loss 2.2343 (2.1523)	Entropy 0.61624 (0.61939)	Top-1 acc 66.406 (72.982)	Top-5 acc 88.672 (89.249)	lr 0.00002
Train [118][1970/3239]	Time 0.260 (0.623)	Data Time 0.001 (0.023)	Loss 2.1191 (2.1520)	Entropy 0.61598 (0.61938)	Top-1 acc 71.484 (72.989)	Top-5 acc 89.062 (89.253)	lr 0.00002
Train [118][1980/3239]	Time 0.225 (0.622)	Data Time 0.001 (0.022)	Loss 2.1368 (2.1520)	Entropy 0.61596 (0.61936)	Top-1 acc 72.266 (72.986)	Top-5 acc 89.844 (89.258)	lr 0.00002
Train [118][1990/3239]	Time 57.679 (0.649)	Data Time 0.001 (0.022)	Loss 2.2528 (2.1521)	Entropy 0.61596 (0.61934)	Top-1 acc 69.141 (72.985)	Top-5 acc 87.891 (89.260)	lr 0.00002
Train [118][2000/3239]	Time 0.230 (0.647)	Data Time 0.002 (0.022)	Loss 2.1443 (2.1522)	Entropy 0.61588 (0.61932)	Top-1 acc 71.875 (72.981)	Top-5 acc 87.891 (89.257)	lr 0.00002
Train [118][2010/3239]	Time 0.249 (0.646)	Data Time 0.002 (0.022)	Loss 2.0982 (2.1522)	Entropy 0.61594 (0.61931)	Top-1 acc 72.656 (72.980)	Top-5 acc 91.797 (89.258)	lr 0.00002
Train [118][2020/3239]	Time 0.245 (0.645)	Data Time 0.001 (0.022)	Loss 2.1327 (2.1524)	Entropy 0.61599 (0.61929)	Top-1 acc 73.047 (72.978)	Top-5 acc 89.844 (89.254)	lr 0.00002
Train [118][2030/3239]	Time 0.236 (0.645)	Data Time 0.001 (0.022)	Loss 2.1071 (2.1523)	Entropy 0.61597 (0.61927)	Top-1 acc 76.562 (72.979)	Top-5 acc 90.234 (89.257)	lr 0.00002
Train [118][2040/3239]	Time 0.242 (0.644)	Data Time 0.001 (0.022)	Loss 2.1434 (2.1523)	Entropy 0.61587 (0.61926)	Top-1 acc 72.656 (72.976)	Top-5 acc 88.672 (89.258)	lr 0.00002
Train [118][2050/3239]	Time 0.257 (0.643)	Data Time 0.002 (0.022)	Loss 2.0650 (2.1525)	Entropy 0.61579 (0.61924)	Top-1 acc 76.172 (72.969)	Top-5 acc 91.797 (89.253)	lr 0.00002
Train [118][2060/3239]	Time 0.293 (0.642)	Data Time 0.002 (0.022)	Loss 2.1156 (2.1522)	Entropy 0.61575 (0.61922)	Top-1 acc 76.562 (72.982)	Top-5 acc 88.672 (89.257)	lr 0.00002
Train [118][2070/3239]	Time 0.233 (0.642)	Data Time 0.001 (0.022)	Loss 2.0857 (2.1523)	Entropy 0.61575 (0.61921)	Top-1 acc 75.391 (72.981)	Top-5 acc 91.016 (89.257)	lr 0.00002
Train [118][2080/3239]	Time 0.235 (0.641)	Data Time 0.001 (0.021)	Loss 2.1955 (2.1522)	Entropy 0.61571 (0.61919)	Top-1 acc 69.922 (72.982)	Top-5 acc 85.938 (89.256)	lr 0.00002
Train [118][2090/3239]	Time 0.231 (0.640)	Data Time 0.001 (0.021)	Loss 2.1373 (2.1523)	Entropy 0.61570 (0.61917)	Top-1 acc 74.219 (72.983)	Top-5 acc 88.672 (89.255)	lr 0.00002
Train [118][2100/3239]	Time 2.559 (0.640)	Data Time 0.001 (0.021)	Loss 2.2030 (2.1524)	Entropy 0.61570 (0.61916)	Top-1 acc 71.875 (72.983)	Top-5 acc 85.547 (89.250)	lr 0.00002
Train [118][2110/3239]	Time 0.356 (0.638)	Data Time 0.001 (0.021)	Loss 2.2215 (2.1523)	Entropy 0.61569 (0.61914)	Top-1 acc 72.266 (72.987)	Top-5 acc 87.109 (89.249)	lr 0.00002
Train [118][2120/3239]	Time 0.234 (0.637)	Data Time 0.002 (0.021)	Loss 2.0497 (2.1524)	Entropy 0.61568 (0.61912)	Top-1 acc 75.000 (72.984)	Top-5 acc 92.578 (89.245)	lr 0.00002
Train [118][2130/3239]	Time 0.226 (0.636)	Data Time 0.001 (0.021)	Loss 2.1358 (2.1525)	Entropy 0.61556 (0.61911)	Top-1 acc 72.266 (72.988)	Top-5 acc 87.500 (89.243)	lr 0.00002
Train [118][2140/3239]	Time 0.268 (0.636)	Data Time 0.001 (0.021)	Loss 2.1295 (2.1525)	Entropy 0.61557 (0.61909)	Top-1 acc 72.656 (72.988)	Top-5 acc 89.453 (89.243)	lr 0.00002
Train [118][2150/3239]	Time 0.335 (0.635)	Data Time 0.001 (0.021)	Loss 2.0989 (2.1525)	Entropy 0.61552 (0.61907)	Top-1 acc 76.562 (72.983)	Top-5 acc 88.281 (89.244)	lr 0.00002
Train [118][2160/3239]	Time 0.233 (0.634)	Data Time 0.001 (0.021)	Loss 2.0701 (2.1525)	Entropy 0.61554 (0.61906)	Top-1 acc 73.828 (72.984)	Top-5 acc 90.234 (89.246)	lr 0.00002
Train [118][2170/3239]	Time 0.241 (0.634)	Data Time 0.001 (0.021)	Loss 2.0549 (2.1523)	Entropy 0.61550 (0.61904)	Top-1 acc 76.172 (72.992)	Top-5 acc 91.406 (89.249)	lr 0.00002
Train [118][2180/3239]	Time 0.272 (0.633)	Data Time 0.001 (0.021)	Loss 2.1592 (2.1523)	Entropy 0.61542 (0.61903)	Top-1 acc 69.141 (72.989)	Top-5 acc 87.891 (89.249)	lr 0.00002
Train [118][2190/3239]	Time 0.237 (0.632)	Data Time 0.001 (0.020)	Loss 2.1148 (2.1523)	Entropy 0.61535 (0.61901)	Top-1 acc 70.703 (72.989)	Top-5 acc 91.797 (89.251)	lr 0.00002
Train [118][2200/3239]	Time 0.245 (0.632)	Data Time 0.001 (0.020)	Loss 2.1755 (2.1523)	Entropy 0.61533 (0.61899)	Top-1 acc 72.266 (72.990)	Top-5 acc 88.281 (89.251)	lr 0.00002
Train [118][2210/3239]	Time 2.572 (0.631)	Data Time 0.001 (0.020)	Loss 2.3299 (2.1522)	Entropy 0.61533 (0.61898)	Top-1 acc 66.016 (72.991)	Top-5 acc 87.500 (89.251)	lr 0.00002
Train [118][2220/3239]	Time 0.223 (0.629)	Data Time 0.001 (0.020)	Loss 2.2201 (2.1521)	Entropy 0.61533 (0.61896)	Top-1 acc 75.000 (72.992)	Top-5 acc 89.453 (89.251)	lr 0.00002
Train [118][2230/3239]	Time 0.247 (0.629)	Data Time 0.001 (0.020)	Loss 2.1529 (2.1521)	Entropy 0.61529 (0.61894)	Top-1 acc 69.531 (72.993)	Top-5 acc 89.844 (89.250)	lr 0.00002
Train [118][2240/3239]	Time 0.244 (0.628)	Data Time 0.001 (0.020)	Loss 2.1686 (2.1521)	Entropy 0.61531 (0.61893)	Top-1 acc 72.266 (72.993)	Top-5 acc 89.062 (89.254)	lr 0.00002
Train [118][2250/3239]	Time 0.255 (0.627)	Data Time 0.008 (0.020)	Loss 2.1822 (2.1524)	Entropy 0.61525 (0.61891)	Top-1 acc 69.922 (72.985)	Top-5 acc 89.453 (89.250)	lr 0.00002
Train [118][2260/3239]	Time 0.248 (0.627)	Data Time 0.001 (0.020)	Loss 2.1990 (2.1524)	Entropy 0.61527 (0.61889)	Top-1 acc 71.484 (72.987)	Top-5 acc 90.625 (89.249)	lr 0.00002
Train [118][2270/3239]	Time 0.225 (0.626)	Data Time 0.001 (0.020)	Loss 1.9915 (2.1523)	Entropy 0.61518 (0.61888)	Top-1 acc 77.344 (72.990)	Top-5 acc 91.016 (89.252)	lr 0.00002
Train [118][2280/3239]	Time 0.250 (0.626)	Data Time 0.003 (0.020)	Loss 2.0444 (2.1520)	Entropy 0.61518 (0.61886)	Top-1 acc 77.734 (72.996)	Top-5 acc 90.234 (89.257)	lr 0.00002
Train [118][2290/3239]	Time 0.245 (0.625)	Data Time 0.001 (0.020)	Loss 2.0758 (2.1519)	Entropy 0.61520 (0.61885)	Top-1 acc 76.172 (72.992)	Top-5 acc 90.234 (89.256)	lr 0.00002
Train [118][2300/3239]	Time 0.231 (0.624)	Data Time 0.001 (0.020)	Loss 2.2797 (2.1521)	Entropy 0.61520 (0.61883)	Top-1 acc 66.797 (72.983)	Top-5 acc 87.891 (89.257)	lr 0.00002
Train [118][2310/3239]	Time 0.239 (0.624)	Data Time 0.001 (0.019)	Loss 2.2580 (2.1520)	Entropy 0.61522 (0.61881)	Top-1 acc 71.484 (72.988)	Top-5 acc 87.500 (89.256)	lr 0.00002
Train [118][2320/3239]	Time 2.603 (0.623)	Data Time 0.001 (0.019)	Loss 2.0425 (2.1519)	Entropy 0.61522 (0.61880)	Top-1 acc 76.953 (72.989)	Top-5 acc 92.578 (89.258)	lr 0.00002
Train [118][2330/3239]	Time 0.277 (0.622)	Data Time 0.001 (0.019)	Loss 2.0750 (2.1518)	Entropy 0.61520 (0.61878)	Top-1 acc 73.828 (72.992)	Top-5 acc 91.016 (89.258)	lr 0.00002
Train [118][2340/3239]	Time 0.240 (0.621)	Data Time 0.001 (0.019)	Loss 2.2277 (2.1519)	Entropy 0.61508 (0.61877)	Top-1 acc 68.359 (72.987)	Top-5 acc 90.625 (89.257)	lr 0.00002
Train [118][2350/3239]	Time 0.236 (0.621)	Data Time 0.001 (0.019)	Loss 2.2213 (2.1520)	Entropy 0.61509 (0.61875)	Top-1 acc 73.047 (72.986)	Top-5 acc 90.234 (89.256)	lr 0.00002
Train [118][2360/3239]	Time 0.371 (0.644)	Data Time 0.002 (0.019)	Loss 2.0916 (2.1519)	Entropy 0.61506 (0.61874)	Top-1 acc 71.484 (72.985)	Top-5 acc 90.625 (89.258)	lr 0.00002
Train [118][2370/3239]	Time 0.271 (0.643)	Data Time 0.002 (0.019)	Loss 2.0843 (2.1518)	Entropy 0.61498 (0.61872)	Top-1 acc 74.219 (72.988)	Top-5 acc 91.406 (89.262)	lr 0.00002
Train [118][2380/3239]	Time 0.242 (0.642)	Data Time 0.001 (0.019)	Loss 2.0066 (2.1518)	Entropy 0.61496 (0.61871)	Top-1 acc 79.297 (72.987)	Top-5 acc 91.797 (89.259)	lr 0.00002
Train [118][2390/3239]	Time 0.241 (0.642)	Data Time 0.002 (0.019)	Loss 2.0514 (2.1518)	Entropy 0.61499 (0.61869)	Top-1 acc 74.609 (72.985)	Top-5 acc 92.969 (89.259)	lr 0.00002
Train [118][2400/3239]	Time 0.386 (0.641)	Data Time 0.001 (0.019)	Loss 2.1278 (2.1519)	Entropy 0.61494 (0.61867)	Top-1 acc 71.875 (72.980)	Top-5 acc 91.797 (89.258)	lr 0.00002
Train [118][2410/3239]	Time 0.233 (0.641)	Data Time 0.002 (0.019)	Loss 2.1359 (2.1517)	Entropy 0.61484 (0.61866)	Top-1 acc 73.438 (72.987)	Top-5 acc 88.672 (89.263)	lr 0.00002
Train [118][2420/3239]	Time 0.264 (0.640)	Data Time 0.001 (0.019)	Loss 2.0637 (2.1515)	Entropy 0.61474 (0.61864)	Top-1 acc 74.609 (72.993)	Top-5 acc 91.406 (89.267)	lr 0.00002
Train [118][2430/3239]	Time 2.507 (0.639)	Data Time 0.001 (0.019)	Loss 2.1906 (2.1515)	Entropy 0.61474 (0.61863)	Top-1 acc 70.703 (72.992)	Top-5 acc 89.062 (89.267)	lr 0.00002
Train [118][2440/3239]	Time 0.261 (0.638)	Data Time 0.002 (0.019)	Loss 2.1373 (2.1515)	Entropy 0.61467 (0.61861)	Top-1 acc 72.656 (72.995)	Top-5 acc 89.062 (89.267)	lr 0.00002
Train [118][2450/3239]	Time 0.259 (0.637)	Data Time 0.002 (0.018)	Loss 2.1679 (2.1515)	Entropy 0.61461 (0.61859)	Top-1 acc 69.531 (72.991)	Top-5 acc 89.844 (89.268)	lr 0.00002
Train [118][2460/3239]	Time 0.243 (0.637)	Data Time 0.001 (0.018)	Loss 2.1273 (2.1515)	Entropy 0.61459 (0.61858)	Top-1 acc 75.391 (72.997)	Top-5 acc 89.062 (89.265)	lr 0.00002
Train [118][2470/3239]	Time 0.240 (0.636)	Data Time 0.001 (0.018)	Loss 2.1334 (2.1517)	Entropy 0.61465 (0.61856)	Top-1 acc 74.609 (72.992)	Top-5 acc 87.891 (89.264)	lr 0.00002
Train [118][2480/3239]	Time 0.278 (0.636)	Data Time 0.001 (0.018)	Loss 2.1546 (2.1519)	Entropy 0.61460 (0.61855)	Top-1 acc 69.141 (72.983)	Top-5 acc 89.844 (89.259)	lr 0.00002
Train [118][2490/3239]	Time 0.273 (0.635)	Data Time 0.001 (0.018)	Loss 2.0113 (2.1518)	Entropy 0.61458 (0.61853)	Top-1 acc 74.219 (72.986)	Top-5 acc 91.797 (89.261)	lr 0.00002
Train [118][2500/3239]	Time 0.273 (0.635)	Data Time 0.001 (0.018)	Loss 2.1969 (2.1518)	Entropy 0.61460 (0.61851)	Top-1 acc 67.578 (72.985)	Top-5 acc 88.672 (89.262)	lr 0.00002
Train [118][2510/3239]	Time 0.253 (0.634)	Data Time 0.001 (0.018)	Loss 2.1225 (2.1520)	Entropy 0.61444 (0.61850)	Top-1 acc 73.438 (72.980)	Top-5 acc 92.578 (89.257)	lr 0.00002
Train [118][2520/3239]	Time 0.240 (0.634)	Data Time 0.001 (0.018)	Loss 2.2031 (2.1519)	Entropy 0.61443 (0.61848)	Top-1 acc 72.266 (72.979)	Top-5 acc 88.281 (89.257)	lr 0.00002
Train [118][2530/3239]	Time 0.290 (0.633)	Data Time 0.001 (0.018)	Loss 2.1256 (2.1518)	Entropy 0.61432 (0.61847)	Top-1 acc 75.000 (72.984)	Top-5 acc 91.016 (89.263)	lr 0.00002
Train [118][2540/3239]	Time 2.520 (0.633)	Data Time 0.001 (0.018)	Loss 1.9333 (2.1516)	Entropy 0.61432 (0.61845)	Top-1 acc 78.516 (72.988)	Top-5 acc 93.359 (89.265)	lr 0.00002
Train [118][2550/3239]	Time 0.310 (0.631)	Data Time 0.002 (0.018)	Loss 1.9848 (2.1517)	Entropy 0.61435 (0.61843)	Top-1 acc 77.344 (72.988)	Top-5 acc 93.359 (89.265)	lr 0.00002
Train [118][2560/3239]	Time 0.242 (0.631)	Data Time 0.001 (0.018)	Loss 2.0113 (2.1517)	Entropy 0.61431 (0.61842)	Top-1 acc 74.219 (72.984)	Top-5 acc 92.578 (89.265)	lr 0.00002
Train [118][2570/3239]	Time 0.351 (0.630)	Data Time 0.001 (0.018)	Loss 2.1013 (2.1516)	Entropy 0.61430 (0.61840)	Top-1 acc 75.391 (72.989)	Top-5 acc 89.062 (89.267)	lr 0.00002
Train [118][2580/3239]	Time 0.291 (0.630)	Data Time 0.002 (0.018)	Loss 2.0803 (2.1515)	Entropy 0.61433 (0.61839)	Top-1 acc 75.000 (72.990)	Top-5 acc 89.453 (89.267)	lr 0.00002
Train [118][2590/3239]	Time 0.238 (0.629)	Data Time 0.001 (0.018)	Loss 2.3359 (2.1516)	Entropy 0.61433 (0.61837)	Top-1 acc 67.969 (72.984)	Top-5 acc 86.328 (89.267)	lr 0.00002
Train [118][2600/3239]	Time 0.233 (0.629)	Data Time 0.001 (0.017)	Loss 2.1980 (2.1516)	Entropy 0.61424 (0.61836)	Top-1 acc 72.266 (72.985)	Top-5 acc 88.672 (89.266)	lr 0.00002
Train [118][2610/3239]	Time 0.372 (0.628)	Data Time 0.001 (0.017)	Loss 2.1880 (2.1515)	Entropy 0.61417 (0.61834)	Top-1 acc 71.875 (72.983)	Top-5 acc 89.062 (89.268)	lr 0.00002
Train [118][2620/3239]	Time 0.248 (0.628)	Data Time 0.001 (0.017)	Loss 2.2023 (2.1517)	Entropy 0.61401 (0.61832)	Top-1 acc 72.656 (72.982)	Top-5 acc 88.672 (89.265)	lr 0.00002
Train [118][2630/3239]	Time 0.271 (0.627)	Data Time 0.002 (0.017)	Loss 2.2735 (2.1520)	Entropy 0.61399 (0.61831)	Top-1 acc 65.234 (72.972)	Top-5 acc 87.891 (89.261)	lr 0.00002
Train [118][2640/3239]	Time 0.240 (0.627)	Data Time 0.001 (0.017)	Loss 2.1960 (2.1519)	Entropy 0.61391 (0.61829)	Top-1 acc 69.531 (72.976)	Top-5 acc 92.188 (89.265)	lr 0.00002
Train [118][2650/3239]	Time 0.385 (0.626)	Data Time 0.001 (0.017)	Loss 2.0538 (2.1518)	Entropy 0.61390 (0.61827)	Top-1 acc 75.000 (72.976)	Top-5 acc 92.969 (89.267)	lr 0.00002
Train [118][2660/3239]	Time 0.233 (0.626)	Data Time 0.001 (0.017)	Loss 2.0398 (2.1519)	Entropy 0.61386 (0.61826)	Top-1 acc 75.781 (72.974)	Top-5 acc 91.016 (89.268)	lr 0.00002
Train [118][2670/3239]	Time 0.248 (0.625)	Data Time 0.002 (0.017)	Loss 2.1608 (2.1517)	Entropy 0.61381 (0.61824)	Top-1 acc 74.219 (72.984)	Top-5 acc 87.891 (89.270)	lr 0.00002
Train [118][2680/3239]	Time 0.315 (0.625)	Data Time 0.001 (0.017)	Loss 2.0107 (2.1516)	Entropy 0.61379 (0.61822)	Top-1 acc 78.125 (72.988)	Top-5 acc 92.188 (89.271)	lr 0.00002
Train [118][2690/3239]	Time 0.337 (0.624)	Data Time 0.001 (0.017)	Loss 2.3405 (2.1518)	Entropy 0.61373 (0.61821)	Top-1 acc 70.312 (72.980)	Top-5 acc 83.984 (89.266)	lr 0.00002
Train [118][2700/3239]	Time 0.255 (0.624)	Data Time 0.001 (0.017)	Loss 1.9829 (2.1516)	Entropy 0.61367 (0.61819)	Top-1 acc 76.172 (72.987)	Top-5 acc 91.406 (89.270)	lr 0.00002
Train [118][2710/3239]	Time 0.290 (0.642)	Data Time 0.004 (0.017)	Loss 2.0438 (2.1515)	Entropy 0.61357 (0.61817)	Top-1 acc 75.000 (72.990)	Top-5 acc 90.625 (89.271)	lr 0.00002
Train [118][2720/3239]	Time 0.291 (0.642)	Data Time 0.002 (0.017)	Loss 2.1947 (2.1516)	Entropy 0.61352 (0.61816)	Top-1 acc 73.438 (72.987)	Top-5 acc 87.500 (89.270)	lr 0.00002
Train [118][2730/3239]	Time 0.381 (0.641)	Data Time 0.002 (0.017)	Loss 2.2934 (2.1516)	Entropy 0.61344 (0.61814)	Top-1 acc 73.438 (72.987)	Top-5 acc 83.594 (89.269)	lr 0.00002
Train [118][2740/3239]	Time 0.223 (0.641)	Data Time 0.001 (0.017)	Loss 2.1026 (2.1516)	Entropy 0.61337 (0.61812)	Top-1 acc 76.562 (72.988)	Top-5 acc 91.016 (89.269)	lr 0.00002
Train [118][2750/3239]	Time 0.239 (0.640)	Data Time 0.002 (0.017)	Loss 2.1730 (2.1515)	Entropy 0.61333 (0.61811)	Top-1 acc 74.219 (72.989)	Top-5 acc 89.453 (89.273)	lr 0.00002
Train [118][2760/3239]	Time 0.239 (0.640)	Data Time 0.001 (0.017)	Loss 2.1269 (2.1514)	Entropy 0.61333 (0.61809)	Top-1 acc 71.094 (72.992)	Top-5 acc 90.234 (89.276)	lr 0.00002
Train [118][2770/3239]	Time 0.327 (0.639)	Data Time 0.001 (0.017)	Loss 2.0888 (2.1512)	Entropy 0.61320 (0.61807)	Top-1 acc 73.047 (72.998)	Top-5 acc 91.016 (89.280)	lr 0.00002
Train [118][2780/3239]	Time 0.261 (0.639)	Data Time 0.001 (0.016)	Loss 2.0732 (2.1513)	Entropy 0.61324 (0.61805)	Top-1 acc 75.391 (72.995)	Top-5 acc 89.844 (89.277)	lr 0.00002
Train [118][2790/3239]	Time 0.234 (0.638)	Data Time 0.001 (0.016)	Loss 2.3052 (2.1515)	Entropy 0.61323 (0.61804)	Top-1 acc 68.359 (72.988)	Top-5 acc 87.891 (89.274)	lr 0.00002
Train [118][2800/3239]	Time 0.277 (0.638)	Data Time 0.001 (0.016)	Loss 2.0102 (2.1514)	Entropy 0.61320 (0.61802)	Top-1 acc 77.734 (72.989)	Top-5 acc 93.359 (89.276)	lr 0.00002
Train [118][2810/3239]	Time 0.295 (0.637)	Data Time 0.001 (0.016)	Loss 2.2916 (2.1513)	Entropy 0.61319 (0.61800)	Top-1 acc 68.750 (72.993)	Top-5 acc 84.375 (89.277)	lr 0.00002
Train [118][2820/3239]	Time 0.232 (0.637)	Data Time 0.001 (0.016)	Loss 2.2036 (2.1515)	Entropy 0.61320 (0.61799)	Top-1 acc 71.875 (72.991)	Top-5 acc 87.500 (89.276)	lr 0.00002
Train [118][2830/3239]	Time 0.261 (0.636)	Data Time 0.001 (0.016)	Loss 2.1921 (2.1516)	Entropy 0.61320 (0.61797)	Top-1 acc 73.047 (72.990)	Top-5 acc 88.672 (89.273)	lr 0.00002
Train [118][2840/3239]	Time 0.238 (0.636)	Data Time 0.002 (0.016)	Loss 2.1682 (2.1516)	Entropy 0.61319 (0.61795)	Top-1 acc 70.703 (72.988)	Top-5 acc 89.062 (89.274)	lr 0.00002
Train [118][2850/3239]	Time 0.259 (0.635)	Data Time 0.001 (0.016)	Loss 2.1988 (2.1515)	Entropy 0.61317 (0.61794)	Top-1 acc 73.047 (72.989)	Top-5 acc 89.453 (89.275)	lr 0.00002
Train [118][2860/3239]	Time 0.275 (0.635)	Data Time 0.001 (0.016)	Loss 2.2193 (2.1515)	Entropy 0.61316 (0.61792)	Top-1 acc 70.703 (72.987)	Top-5 acc 88.281 (89.276)	lr 0.00002
Train [118][2870/3239]	Time 0.241 (0.634)	Data Time 0.001 (0.016)	Loss 2.0971 (2.1515)	Entropy 0.61316 (0.61790)	Top-1 acc 72.266 (72.985)	Top-5 acc 91.016 (89.275)	lr 0.00002
Train [118][2880/3239]	Time 0.230 (0.634)	Data Time 0.001 (0.016)	Loss 2.1373 (2.1515)	Entropy 0.61313 (0.61789)	Top-1 acc 73.828 (72.986)	Top-5 acc 89.453 (89.276)	lr 0.00002
Train [118][2890/3239]	Time 0.311 (0.633)	Data Time 0.001 (0.016)	Loss 2.1618 (2.1514)	Entropy 0.61303 (0.61787)	Top-1 acc 72.656 (72.987)	Top-5 acc 87.500 (89.276)	lr 0.00002
Train [118][2900/3239]	Time 0.241 (0.633)	Data Time 0.001 (0.016)	Loss 2.1167 (2.1514)	Entropy 0.61296 (0.61785)	Top-1 acc 72.656 (72.987)	Top-5 acc 90.625 (89.277)	lr 0.00002
Train [118][2910/3239]	Time 0.236 (0.632)	Data Time 0.001 (0.016)	Loss 2.0773 (2.1512)	Entropy 0.61289 (0.61784)	Top-1 acc 72.266 (72.992)	Top-5 acc 91.016 (89.277)	lr 0.00002
Train [118][2920/3239]	Time 0.347 (0.632)	Data Time 0.002 (0.016)	Loss 2.2125 (2.1514)	Entropy 0.61287 (0.61782)	Top-1 acc 69.922 (72.989)	Top-5 acc 89.062 (89.274)	lr 0.00002
Train [118][2930/3239]	Time 0.234 (0.631)	Data Time 0.001 (0.016)	Loss 2.0782 (2.1515)	Entropy 0.61276 (0.61780)	Top-1 acc 72.266 (72.986)	Top-5 acc 92.188 (89.271)	lr 0.00002
Train [118][2940/3239]	Time 0.249 (0.631)	Data Time 0.001 (0.016)	Loss 2.0935 (2.1513)	Entropy 0.61268 (0.61778)	Top-1 acc 74.609 (72.993)	Top-5 acc 90.625 (89.274)	lr 0.00002
Train [118][2950/3239]	Time 0.284 (0.631)	Data Time 0.001 (0.016)	Loss 2.3211 (2.1514)	Entropy 0.61267 (0.61777)	Top-1 acc 69.141 (72.991)	Top-5 acc 82.812 (89.268)	lr 0.00002
Train [118][2960/3239]	Time 0.227 (0.630)	Data Time 0.001 (0.016)	Loss 2.2741 (2.1514)	Entropy 0.61263 (0.61775)	Top-1 acc 66.797 (72.989)	Top-5 acc 87.109 (89.268)	lr 0.00002
Train [118][2970/3239]	Time 0.261 (0.630)	Data Time 0.001 (0.016)	Loss 2.3359 (2.1517)	Entropy 0.61265 (0.61773)	Top-1 acc 69.141 (72.986)	Top-5 acc 85.938 (89.264)	lr 0.00002
Train [118][2980/3239]	Time 0.281 (0.629)	Data Time 0.001 (0.015)	Loss 2.2379 (2.1516)	Entropy 0.61253 (0.61772)	Top-1 acc 73.047 (72.987)	Top-5 acc 85.156 (89.265)	lr 0.00002
Train [118][2990/3239]	Time 0.244 (0.629)	Data Time 0.002 (0.015)	Loss 2.1355 (2.1516)	Entropy 0.61253 (0.61770)	Top-1 acc 69.922 (72.987)	Top-5 acc 89.844 (89.266)	lr 0.00002
Train [118][3000/3239]	Time 0.239 (0.628)	Data Time 0.001 (0.015)	Loss 2.2104 (2.1517)	Entropy 0.61249 (0.61768)	Top-1 acc 71.484 (72.985)	Top-5 acc 89.062 (89.268)	lr 0.00002
Train [118][3010/3239]	Time 0.240 (0.628)	Data Time 0.001 (0.015)	Loss 1.9929 (2.1515)	Entropy 0.61249 (0.61766)	Top-1 acc 77.344 (72.989)	Top-5 acc 91.406 (89.270)	lr 0.00002
Train [118][3020/3239]	Time 0.253 (0.628)	Data Time 0.001 (0.015)	Loss 2.0796 (2.1514)	Entropy 0.61244 (0.61765)	Top-1 acc 75.391 (72.992)	Top-5 acc 92.188 (89.271)	lr 0.00002
Train [118][3030/3239]	Time 0.286 (0.627)	Data Time 0.001 (0.015)	Loss 2.2768 (2.1514)	Entropy 0.61242 (0.61763)	Top-1 acc 68.750 (72.993)	Top-5 acc 87.109 (89.270)	lr 0.00002
Train [118][3040/3239]	Time 0.295 (0.643)	Data Time 0.004 (0.015)	Loss 2.2611 (2.1514)	Entropy 0.61242 (0.61761)	Top-1 acc 74.609 (72.994)	Top-5 acc 88.672 (89.270)	lr 0.00002
Train [118][3050/3239]	Time 0.251 (0.643)	Data Time 0.002 (0.015)	Loss 2.2239 (2.1515)	Entropy 0.61239 (0.61760)	Top-1 acc 70.312 (72.990)	Top-5 acc 88.281 (89.269)	lr 0.00002
Train [118][3060/3239]	Time 0.237 (0.643)	Data Time 0.001 (0.015)	Loss 2.2234 (2.1516)	Entropy 0.61247 (0.61758)	Top-1 acc 71.484 (72.989)	Top-5 acc 88.281 (89.268)	lr 0.00002
Train [118][3070/3239]	Time 0.301 (0.642)	Data Time 0.001 (0.015)	Loss 2.1877 (2.1516)	Entropy 0.61242 (0.61756)	Top-1 acc 70.703 (72.986)	Top-5 acc 89.453 (89.267)	lr 0.00002
Train [118][3080/3239]	Time 0.225 (0.642)	Data Time 0.001 (0.015)	Loss 2.0758 (2.1517)	Entropy 0.61248 (0.61754)	Top-1 acc 74.219 (72.986)	Top-5 acc 91.406 (89.268)	lr 0.00002
Train [118][3090/3239]	Time 0.255 (0.642)	Data Time 0.001 (0.015)	Loss 1.9808 (2.1515)	Entropy 0.61246 (0.61753)	Top-1 acc 78.906 (72.990)	Top-5 acc 92.188 (89.269)	lr 0.00002
Train [118][3100/3239]	Time 0.253 (0.641)	Data Time 0.001 (0.015)	Loss 2.2482 (2.1515)	Entropy 0.61243 (0.61751)	Top-1 acc 71.484 (72.989)	Top-5 acc 85.938 (89.271)	lr 0.00002
Train [118][3110/3239]	Time 0.275 (0.641)	Data Time 0.001 (0.015)	Loss 2.1282 (2.1514)	Entropy 0.61241 (0.61750)	Top-1 acc 73.828 (72.992)	Top-5 acc 89.062 (89.271)	lr 0.00002
Train [118][3120/3239]	Time 0.327 (0.640)	Data Time 0.003 (0.015)	Loss 2.1255 (2.1514)	Entropy 0.61245 (0.61748)	Top-1 acc 73.047 (72.990)	Top-5 acc 90.625 (89.271)	lr 0.00002
Train [118][3130/3239]	Time 0.237 (0.640)	Data Time 0.001 (0.015)	Loss 2.0212 (2.1514)	Entropy 0.61234 (0.61746)	Top-1 acc 77.344 (72.989)	Top-5 acc 92.188 (89.272)	lr 0.00002
Train [118][3140/3239]	Time 0.320 (0.640)	Data Time 0.001 (0.015)	Loss 2.0909 (2.1515)	Entropy 0.61226 (0.61745)	Top-1 acc 72.656 (72.984)	Top-5 acc 92.188 (89.272)	lr 0.00002
Train [118][3150/3239]	Time 0.296 (0.639)	Data Time 0.001 (0.015)	Loss 2.2101 (2.1515)	Entropy 0.61224 (0.61743)	Top-1 acc 70.312 (72.986)	Top-5 acc 88.672 (89.272)	lr 0.00002
Train [118][3160/3239]	Time 0.280 (0.639)	Data Time 0.002 (0.015)	Loss 2.1589 (2.1515)	Entropy 0.61213 (0.61741)	Top-1 acc 75.781 (72.986)	Top-5 acc 87.500 (89.272)	lr 0.00002
Train [118][3170/3239]	Time 0.236 (0.638)	Data Time 0.001 (0.015)	Loss 2.0074 (2.1516)	Entropy 0.61215 (0.61740)	Top-1 acc 78.906 (72.988)	Top-5 acc 92.188 (89.270)	lr 0.00002
Train [118][3180/3239]	Time 0.233 (0.638)	Data Time 0.000 (0.015)	Loss 2.2922 (2.1515)	Entropy 0.61217 (0.61738)	Top-1 acc 70.312 (72.990)	Top-5 acc 86.719 (89.273)	lr 0.00002
Train [118][3190/3239]	Time 0.229 (0.637)	Data Time 0.000 (0.015)	Loss 2.1316 (2.1516)	Entropy 0.61206 (0.61736)	Top-1 acc 69.531 (72.986)	Top-5 acc 89.453 (89.273)	lr 0.00002
Train [118][3200/3239]	Time 0.226 (0.637)	Data Time 0.000 (0.015)	Loss 2.2119 (2.1516)	Entropy 0.61201 (0.61735)	Top-1 acc 69.531 (72.985)	Top-5 acc 88.281 (89.274)	lr 0.00002
Train [118][3210/3239]	Time 0.232 (0.636)	Data Time 0.000 (0.015)	Loss 2.1099 (2.1517)	Entropy 0.61233 (0.61733)	Top-1 acc 72.266 (72.983)	Top-5 acc 88.672 (89.272)	lr 0.00002
Train [118][3220/3239]	Time 0.236 (0.636)	Data Time 0.000 (0.014)	Loss 2.1747 (2.1517)	Entropy 0.61226 (0.61732)	Top-1 acc 71.094 (72.980)	Top-5 acc 88.672 (89.273)	lr 0.00002
Train [118][3230/3239]	Time 0.231 (0.635)	Data Time 0.000 (0.014)	Loss 2.2530 (2.1518)	Entropy 0.61221 (0.61730)	Top-1 acc 68.359 (72.976)	Top-5 acc 86.719 (89.273)	lr 0.00002
Train [118][3239/3239]	Time 2.357 (0.635)	Data Time 0.000 (0.014)	Loss 2.1606 (2.1519)	Entropy 0.61221 (0.61729)	Top-1 acc 66.667 (72.972)	Top-5 acc 88.889 (89.270)	lr 0.00002
==========Valid [118/120]	loss 1.195	top-1 acc 72.653 (72.745)	top-5 acc 89.979	Train top-1 72.972	top-5 89.270	Entropy 0.61221	Latency-None: 0.000ms	Flops: 544.27M
Train [119][0/3239]	Time 40.330 (40.330)	Data Time 39.033 (39.033)	Loss 2.0755 (2.0755)	Entropy 0.61222 (0.61222)	Top-1 acc 73.438 (73.438)	Top-5 acc 92.969 (92.969)	lr 0.00002
Train [119][10/3239]	Time 3.273 (4.340)	Data Time 0.002 (3.625)	Loss 2.0865 (2.1114)	Entropy 0.61222 (0.61222)	Top-1 acc 75.391 (73.615)	Top-5 acc 91.016 (89.879)	lr 0.00002
Train [119][20/3239]	Time 0.242 (2.395)	Data Time 0.001 (1.900)	Loss 2.0673 (2.1095)	Entropy 0.61220 (0.61221)	Top-1 acc 72.266 (73.438)	Top-5 acc 89.453 (89.900)	lr 0.00002
Train [119][30/3239]	Time 0.234 (1.789)	Data Time 0.001 (1.287)	Loss 2.0310 (2.1253)	Entropy 0.61216 (0.61220)	Top-1 acc 75.781 (73.286)	Top-5 acc 90.625 (89.491)	lr 0.00002
Train [119][40/3239]	Time 0.270 (1.472)	Data Time 0.002 (0.974)	Loss 2.1073 (2.1243)	Entropy 0.61218 (0.61219)	Top-1 acc 73.828 (73.399)	Top-5 acc 89.844 (89.634)	lr 0.00002
Train [119][50/3239]	Time 0.246 (1.277)	Data Time 0.001 (0.783)	Loss 2.2929 (2.1357)	Entropy 0.61212 (0.61218)	Top-1 acc 70.312 (73.277)	Top-5 acc 87.891 (89.438)	lr 0.00002
Train [119][60/3239]	Time 0.230 (1.150)	Data Time 0.001 (0.655)	Loss 2.1452 (2.1343)	Entropy 0.61209 (0.61217)	Top-1 acc 73.438 (73.297)	Top-5 acc 89.844 (89.428)	lr 0.00002
Train [119][70/3239]	Time 0.231 (1.058)	Data Time 0.001 (0.563)	Loss 2.2072 (2.1359)	Entropy 0.61204 (0.61215)	Top-1 acc 71.484 (73.228)	Top-5 acc 88.281 (89.431)	lr 0.00002
Train [119][80/3239]	Time 0.240 (0.989)	Data Time 0.001 (0.494)	Loss 2.1206 (2.1381)	Entropy 0.61207 (0.61214)	Top-1 acc 74.609 (73.259)	Top-5 acc 87.109 (89.357)	lr 0.00002
Train [119][90/3239]	Time 0.236 (0.936)	Data Time 0.001 (0.440)	Loss 1.9784 (2.1375)	Entropy 0.61199 (0.61213)	Top-1 acc 78.125 (73.176)	Top-5 acc 94.141 (89.406)	lr 0.00002
Train [119][100/3239]	Time 0.237 (0.893)	Data Time 0.001 (0.396)	Loss 2.1331 (2.1399)	Entropy 0.61203 (0.61212)	Top-1 acc 73.047 (73.194)	Top-5 acc 89.453 (89.356)	lr 0.00002
Train [119][110/3239]	Time 0.229 (0.856)	Data Time 0.001 (0.361)	Loss 2.2253 (2.1433)	Entropy 0.61201 (0.61211)	Top-1 acc 70.703 (73.093)	Top-5 acc 87.500 (89.284)	lr 0.00002
Train [119][120/3239]	Time 2.737 (0.826)	Data Time 0.001 (0.331)	Loss 2.2137 (2.1480)	Entropy 0.61201 (0.61210)	Top-1 acc 71.484 (73.002)	Top-5 acc 87.109 (89.172)	lr 0.00002
Train [119][130/3239]	Time 0.228 (0.781)	Data Time 0.001 (0.306)	Loss 2.6011 (2.1540)	Entropy 0.61204 (0.61210)	Top-1 acc 64.844 (72.880)	Top-5 acc 82.422 (89.113)	lr 0.00002
Train [119][140/3239]	Time 0.262 (0.761)	Data Time 0.003 (0.284)	Loss 2.1922 (2.1550)	Entropy 0.61204 (0.61209)	Top-1 acc 69.922 (72.767)	Top-5 acc 91.406 (89.146)	lr 0.00002
Train [119][150/3239]	Time 0.238 (1.105)	Data Time 0.002 (0.266)	Loss 2.1674 (2.1552)	Entropy 0.61201 (0.61209)	Top-1 acc 73.438 (72.801)	Top-5 acc 88.672 (89.138)	lr 0.00002
Train [119][160/3239]	Time 0.245 (1.069)	Data Time 0.002 (0.249)	Loss 2.1545 (2.1516)	Entropy 0.61205 (0.61208)	Top-1 acc 71.875 (72.904)	Top-5 acc 89.453 (89.181)	lr 0.00002
Train [119][170/3239]	Time 0.239 (1.035)	Data Time 0.002 (0.235)	Loss 2.2597 (2.1514)	Entropy 0.61203 (0.61208)	Top-1 acc 68.750 (72.930)	Top-5 acc 87.891 (89.211)	lr 0.00002
Train [119][180/3239]	Time 0.239 (1.005)	Data Time 0.002 (0.222)	Loss 2.1217 (2.1492)	Entropy 0.61195 (0.61208)	Top-1 acc 73.828 (72.976)	Top-5 acc 89.844 (89.289)	lr 0.00002
Train [119][190/3239]	Time 0.239 (0.979)	Data Time 0.001 (0.210)	Loss 2.0088 (2.1452)	Entropy 0.61198 (0.61207)	Top-1 acc 78.125 (73.076)	Top-5 acc 91.016 (89.337)	lr 0.00002
Train [119][200/3239]	Time 0.240 (0.954)	Data Time 0.005 (0.200)	Loss 2.3702 (2.1483)	Entropy 0.61205 (0.61207)	Top-1 acc 69.141 (73.002)	Top-5 acc 84.766 (89.294)	lr 0.00002
Train [119][210/3239]	Time 0.239 (0.931)	Data Time 0.001 (0.191)	Loss 2.1832 (2.1475)	Entropy 0.61209 (0.61207)	Top-1 acc 69.141 (73.019)	Top-5 acc 89.062 (89.325)	lr 0.00002
Train [119][220/3239]	Time 0.263 (0.911)	Data Time 0.001 (0.182)	Loss 2.0716 (2.1477)	Entropy 0.61208 (0.61207)	Top-1 acc 75.781 (73.031)	Top-5 acc 91.406 (89.331)	lr 0.00002
Train [119][230/3239]	Time 2.615 (0.893)	Data Time 0.001 (0.174)	Loss 2.2237 (2.1477)	Entropy 0.61208 (0.61207)	Top-1 acc 69.141 (73.047)	Top-5 acc 89.453 (89.335)	lr 0.00002
Train [119][240/3239]	Time 0.256 (0.866)	Data Time 0.002 (0.167)	Loss 2.2602 (2.1475)	Entropy 0.61212 (0.61207)	Top-1 acc 71.484 (73.047)	Top-5 acc 87.109 (89.341)	lr 0.00002
Train [119][250/3239]	Time 0.246 (0.852)	Data Time 0.002 (0.161)	Loss 2.2552 (2.1488)	Entropy 0.61212 (0.61207)	Top-1 acc 69.141 (73.028)	Top-5 acc 89.844 (89.338)	lr 0.00002
Train [119][260/3239]	Time 0.242 (0.838)	Data Time 0.001 (0.154)	Loss 2.0972 (2.1494)	Entropy 0.61208 (0.61207)	Top-1 acc 73.047 (73.003)	Top-5 acc 89.062 (89.323)	lr 0.00002
Train [119][270/3239]	Time 0.238 (0.825)	Data Time 0.001 (0.149)	Loss 2.2706 (2.1497)	Entropy 0.61197 (0.61207)	Top-1 acc 71.094 (73.019)	Top-5 acc 85.547 (89.296)	lr 0.00002
Train [119][280/3239]	Time 0.236 (0.813)	Data Time 0.001 (0.144)	Loss 2.2692 (2.1513)	Entropy 0.61191 (0.61207)	Top-1 acc 68.359 (73.001)	Top-5 acc 87.109 (89.261)	lr 0.00002
Train [119][290/3239]	Time 0.236 (0.802)	Data Time 0.001 (0.139)	Loss 2.0543 (2.1517)	Entropy 0.61188 (0.61206)	Top-1 acc 74.219 (72.966)	Top-5 acc 91.406 (89.263)	lr 0.00002
Train [119][300/3239]	Time 0.237 (0.791)	Data Time 0.001 (0.134)	Loss 2.1087 (2.1519)	Entropy 0.61182 (0.61205)	Top-1 acc 70.703 (73.024)	Top-5 acc 91.406 (89.261)	lr 0.00002
Train [119][310/3239]	Time 0.342 (0.781)	Data Time 0.001 (0.130)	Loss 2.2189 (2.1522)	Entropy 0.61180 (0.61205)	Top-1 acc 74.219 (73.039)	Top-5 acc 89.453 (89.263)	lr 0.00002
Train [119][320/3239]	Time 0.237 (0.772)	Data Time 0.001 (0.126)	Loss 2.0607 (2.1520)	Entropy 0.61163 (0.61204)	Top-1 acc 75.781 (73.057)	Top-5 acc 91.406 (89.285)	lr 0.00002
Train [119][330/3239]	Time 0.271 (0.763)	Data Time 0.001 (0.122)	Loss 2.1393 (2.1531)	Entropy 0.61164 (0.61202)	Top-1 acc 75.000 (73.029)	Top-5 acc 88.672 (89.269)	lr 0.00002
Train [119][340/3239]	Time 2.558 (0.755)	Data Time 0.001 (0.119)	Loss 2.2094 (2.1526)	Entropy 0.61164 (0.61201)	Top-1 acc 72.266 (73.037)	Top-5 acc 88.672 (89.304)	lr 0.00002
Train [119][350/3239]	Time 0.365 (0.741)	Data Time 0.001 (0.115)	Loss 2.1119 (2.1522)	Entropy 0.61165 (0.61200)	Top-1 acc 73.828 (73.050)	Top-5 acc 89.844 (89.295)	lr 0.00002
Train [119][360/3239]	Time 0.258 (0.734)	Data Time 0.001 (0.112)	Loss 2.0882 (2.1514)	Entropy 0.61156 (0.61199)	Top-1 acc 74.219 (73.052)	Top-5 acc 90.625 (89.307)	lr 0.00002
Train [119][370/3239]	Time 0.268 (0.727)	Data Time 0.001 (0.109)	Loss 2.1912 (2.1519)	Entropy 0.61151 (0.61198)	Top-1 acc 72.266 (73.024)	Top-5 acc 87.500 (89.312)	lr 0.00002
Train [119][380/3239]	Time 0.233 (0.721)	Data Time 0.001 (0.106)	Loss 2.1662 (2.1537)	Entropy 0.61158 (0.61197)	Top-1 acc 74.219 (72.980)	Top-5 acc 87.500 (89.268)	lr 0.00002
Train [119][390/3239]	Time 0.258 (0.716)	Data Time 0.002 (0.104)	Loss 1.9828 (2.1539)	Entropy 0.61164 (0.61196)	Top-1 acc 79.688 (72.962)	Top-5 acc 92.969 (89.266)	lr 0.00002
Train [119][400/3239]	Time 0.231 (0.710)	Data Time 0.001 (0.101)	Loss 2.0455 (2.1535)	Entropy 0.61144 (0.61195)	Top-1 acc 74.219 (72.981)	Top-5 acc 92.578 (89.268)	lr 0.00002
Train [119][410/3239]	Time 0.247 (0.705)	Data Time 0.001 (0.099)	Loss 2.0699 (2.1535)	Entropy 0.61144 (0.61194)	Top-1 acc 73.438 (72.989)	Top-5 acc 91.016 (89.268)	lr 0.00002
Train [119][420/3239]	Time 0.238 (0.699)	Data Time 0.001 (0.096)	Loss 2.1909 (2.1531)	Entropy 0.61136 (0.61192)	Top-1 acc 70.703 (72.990)	Top-5 acc 88.281 (89.276)	lr 0.00001
Train [119][430/3239]	Time 0.234 (0.694)	Data Time 0.001 (0.094)	Loss 2.0687 (2.1525)	Entropy 0.61129 (0.61191)	Top-1 acc 77.734 (73.031)	Top-5 acc 92.578 (89.278)	lr 0.00001
Train [119][440/3239]	Time 0.279 (0.690)	Data Time 0.001 (0.092)	Loss 2.0234 (2.1526)	Entropy 0.61130 (0.61190)	Top-1 acc 76.172 (73.050)	Top-5 acc 91.016 (89.272)	lr 0.00001
Train [119][450/3239]	Time 2.625 (0.685)	Data Time 0.001 (0.090)	Loss 2.1537 (2.1522)	Entropy 0.61130 (0.61188)	Top-1 acc 74.219 (73.058)	Top-5 acc 89.062 (89.278)	lr 0.00001
Train [119][460/3239]	Time 0.226 (0.676)	Data Time 0.001 (0.088)	Loss 2.0880 (2.1526)	Entropy 0.61122 (0.61187)	Top-1 acc 73.047 (73.024)	Top-5 acc 89.844 (89.276)	lr 0.00001
Train [119][470/3239]	Time 0.233 (0.672)	Data Time 0.001 (0.086)	Loss 2.1094 (2.1539)	Entropy 0.61120 (0.61185)	Top-1 acc 74.219 (72.983)	Top-5 acc 89.062 (89.244)	lr 0.00001
Train [119][480/3239]	Time 0.241 (0.668)	Data Time 0.001 (0.085)	Loss 2.2829 (2.1543)	Entropy 0.61117 (0.61184)	Top-1 acc 70.312 (72.972)	Top-5 acc 88.281 (89.241)	lr 0.00001
Train [119][490/3239]	Time 0.230 (0.664)	Data Time 0.001 (0.083)	Loss 2.0737 (2.1541)	Entropy 0.61111 (0.61183)	Top-1 acc 73.438 (72.974)	Top-5 acc 88.281 (89.242)	lr 0.00001
Train [119][500/3239]	Time 0.265 (0.661)	Data Time 0.001 (0.081)	Loss 2.0040 (2.1543)	Entropy 0.61114 (0.61181)	Top-1 acc 76.953 (72.960)	Top-5 acc 90.625 (89.233)	lr 0.00001
Train [119][510/3239]	Time 0.247 (0.763)	Data Time 0.002 (0.080)	Loss 2.1455 (2.1545)	Entropy 0.61111 (0.61180)	Top-1 acc 72.656 (72.951)	Top-5 acc 86.719 (89.208)	lr 0.00001
Train [119][520/3239]	Time 0.239 (0.758)	Data Time 0.002 (0.078)	Loss 2.2071 (2.1541)	Entropy 0.61105 (0.61178)	Top-1 acc 69.531 (72.970)	Top-5 acc 90.234 (89.220)	lr 0.00001
Train [119][530/3239]	Time 0.223 (0.753)	Data Time 0.001 (0.077)	Loss 2.2798 (2.1537)	Entropy 0.61096 (0.61177)	Top-1 acc 68.750 (72.972)	Top-5 acc 86.328 (89.227)	lr 0.00001
Train [119][540/3239]	Time 0.250 (0.747)	Data Time 0.002 (0.075)	Loss 2.2246 (2.1539)	Entropy 0.61090 (0.61175)	Top-1 acc 66.406 (72.960)	Top-5 acc 86.719 (89.224)	lr 0.00001
Train [119][550/3239]	Time 0.266 (0.743)	Data Time 0.001 (0.074)	Loss 2.3335 (2.1541)	Entropy 0.61089 (0.61174)	Top-1 acc 68.359 (72.950)	Top-5 acc 84.766 (89.230)	lr 0.00001
Train [119][560/3239]	Time 2.636 (0.738)	Data Time 0.001 (0.073)	Loss 2.0535 (2.1537)	Entropy 0.61089 (0.61172)	Top-1 acc 76.172 (72.965)	Top-5 acc 91.406 (89.237)	lr 0.00001
Train [119][570/3239]	Time 0.237 (0.729)	Data Time 0.001 (0.072)	Loss 2.2096 (2.1540)	Entropy 0.61081 (0.61171)	Top-1 acc 71.484 (72.956)	Top-5 acc 89.844 (89.236)	lr 0.00001
Train [119][580/3239]	Time 0.234 (0.725)	Data Time 0.001 (0.070)	Loss 2.1409 (2.1542)	Entropy 0.61075 (0.61169)	Top-1 acc 73.047 (72.953)	Top-5 acc 89.844 (89.229)	lr 0.00001
Train [119][590/3239]	Time 0.231 (0.721)	Data Time 0.001 (0.069)	Loss 2.1906 (2.1548)	Entropy 0.61067 (0.61167)	Top-1 acc 71.875 (72.932)	Top-5 acc 90.234 (89.221)	lr 0.00001
Train [119][600/3239]	Time 0.349 (0.717)	Data Time 0.001 (0.068)	Loss 2.2297 (2.1549)	Entropy 0.61059 (0.61166)	Top-1 acc 69.531 (72.931)	Top-5 acc 89.453 (89.230)	lr 0.00001
Train [119][610/3239]	Time 0.234 (0.713)	Data Time 0.001 (0.067)	Loss 2.2580 (2.1576)	Entropy 0.61061 (0.61164)	Top-1 acc 69.141 (72.851)	Top-5 acc 86.719 (89.183)	lr 0.00001
Train [119][620/3239]	Time 0.233 (0.710)	Data Time 0.001 (0.066)	Loss 2.1660 (2.1574)	Entropy 0.61066 (0.61162)	Top-1 acc 72.656 (72.868)	Top-5 acc 88.672 (89.180)	lr 0.00001
Train [119][630/3239]	Time 0.229 (0.706)	Data Time 0.001 (0.065)	Loss 2.2187 (2.1576)	Entropy 0.61065 (0.61161)	Top-1 acc 73.828 (72.856)	Top-5 acc 86.328 (89.176)	lr 0.00001
Train [119][640/3239]	Time 0.357 (0.703)	Data Time 0.001 (0.064)	Loss 2.0476 (2.1577)	Entropy 0.61064 (0.61159)	Top-1 acc 77.734 (72.862)	Top-5 acc 92.188 (89.172)	lr 0.00001
Train [119][650/3239]	Time 0.229 (0.699)	Data Time 0.001 (0.063)	Loss 2.1027 (2.1570)	Entropy 0.61066 (0.61158)	Top-1 acc 73.828 (72.886)	Top-5 acc 90.625 (89.183)	lr 0.00001
Train [119][660/3239]	Time 0.240 (0.696)	Data Time 0.001 (0.062)	Loss 2.1799 (2.1575)	Entropy 0.61063 (0.61156)	Top-1 acc 72.266 (72.872)	Top-5 acc 89.062 (89.163)	lr 0.00001
Train [119][670/3239]	Time 2.535 (0.693)	Data Time 0.001 (0.061)	Loss 2.0263 (2.1578)	Entropy 0.61063 (0.61155)	Top-1 acc 76.172 (72.863)	Top-5 acc 92.188 (89.160)	lr 0.00001
Train [119][680/3239]	Time 0.234 (0.686)	Data Time 0.001 (0.060)	Loss 2.0366 (2.1575)	Entropy 0.61069 (0.61154)	Top-1 acc 76.953 (72.875)	Top-5 acc 92.188 (89.171)	lr 0.00001
Train [119][690/3239]	Time 0.236 (0.683)	Data Time 0.001 (0.059)	Loss 2.1712 (2.1577)	Entropy 0.61067 (0.61153)	Top-1 acc 75.000 (72.889)	Top-5 acc 85.547 (89.165)	lr 0.00001
Train [119][700/3239]	Time 0.235 (0.681)	Data Time 0.001 (0.059)	Loss 2.2430 (2.1579)	Entropy 0.61067 (0.61151)	Top-1 acc 72.266 (72.885)	Top-5 acc 87.109 (89.155)	lr 0.00001
Train [119][710/3239]	Time 0.227 (0.678)	Data Time 0.001 (0.058)	Loss 2.2373 (2.1577)	Entropy 0.61065 (0.61150)	Top-1 acc 70.703 (72.883)	Top-5 acc 86.328 (89.153)	lr 0.00001
Train [119][720/3239]	Time 0.235 (0.675)	Data Time 0.001 (0.057)	Loss 2.2041 (2.1574)	Entropy 0.61058 (0.61149)	Top-1 acc 70.703 (72.895)	Top-5 acc 87.891 (89.160)	lr 0.00001
Train [119][730/3239]	Time 0.232 (0.673)	Data Time 0.001 (0.056)	Loss 2.2353 (2.1577)	Entropy 0.61063 (0.61148)	Top-1 acc 68.750 (72.888)	Top-5 acc 86.328 (89.150)	lr 0.00001
Train [119][740/3239]	Time 0.234 (0.670)	Data Time 0.001 (0.055)	Loss 1.9275 (2.1568)	Entropy 0.61064 (0.61147)	Top-1 acc 78.906 (72.899)	Top-5 acc 94.531 (89.164)	lr 0.00001
Train [119][750/3239]	Time 0.264 (0.668)	Data Time 0.002 (0.055)	Loss 2.1249 (2.1566)	Entropy 0.61063 (0.61145)	Top-1 acc 69.141 (72.889)	Top-5 acc 90.625 (89.166)	lr 0.00001
Train [119][760/3239]	Time 0.236 (0.665)	Data Time 0.001 (0.054)	Loss 2.3098 (2.1558)	Entropy 0.61062 (0.61144)	Top-1 acc 69.141 (72.908)	Top-5 acc 86.719 (89.182)	lr 0.00001
Train [119][770/3239]	Time 0.249 (0.663)	Data Time 0.001 (0.053)	Loss 2.2036 (2.1554)	Entropy 0.61059 (0.61143)	Top-1 acc 70.703 (72.917)	Top-5 acc 85.938 (89.191)	lr 0.00001
Train [119][780/3239]	Time 2.524 (0.661)	Data Time 0.001 (0.053)	Loss 2.0537 (2.1552)	Entropy 0.61059 (0.61142)	Top-1 acc 76.172 (72.923)	Top-5 acc 88.672 (89.190)	lr 0.00001
Train [119][790/3239]	Time 0.238 (0.656)	Data Time 0.001 (0.052)	Loss 2.1049 (2.1550)	Entropy 0.61054 (0.61141)	Top-1 acc 73.828 (72.927)	Top-5 acc 89.844 (89.196)	lr 0.00001
Train [119][800/3239]	Time 0.233 (0.654)	Data Time 0.001 (0.051)	Loss 2.1484 (2.1555)	Entropy 0.61054 (0.61140)	Top-1 acc 73.438 (72.921)	Top-5 acc 89.062 (89.193)	lr 0.00001
Train [119][810/3239]	Time 0.228 (0.652)	Data Time 0.001 (0.051)	Loss 2.2529 (2.1554)	Entropy 0.61043 (0.61139)	Top-1 acc 69.141 (72.917)	Top-5 acc 87.109 (89.195)	lr 0.00001
Train [119][820/3239]	Time 0.239 (0.649)	Data Time 0.001 (0.050)	Loss 2.1622 (2.1555)	Entropy 0.61038 (0.61138)	Top-1 acc 76.172 (72.914)	Top-5 acc 88.672 (89.199)	lr 0.00001
Train [119][830/3239]	Time 0.215 (0.647)	Data Time 0.001 (0.050)	Loss 2.3254 (2.1560)	Entropy 0.61036 (0.61136)	Top-1 acc 70.703 (72.906)	Top-5 acc 85.938 (89.189)	lr 0.00001
Train [119][840/3239]	Time 0.238 (0.645)	Data Time 0.001 (0.049)	Loss 2.1798 (2.1555)	Entropy 0.61033 (0.61135)	Top-1 acc 72.656 (72.913)	Top-5 acc 87.109 (89.189)	lr 0.00001
Train [119][850/3239]	Time 0.242 (0.643)	Data Time 0.001 (0.048)	Loss 2.1810 (2.1551)	Entropy 0.61035 (0.61134)	Top-1 acc 71.484 (72.921)	Top-5 acc 88.672 (89.192)	lr 0.00001
Train [119][860/3239]	Time 0.252 (0.642)	Data Time 0.001 (0.048)	Loss 2.2476 (2.1550)	Entropy 0.61031 (0.61133)	Top-1 acc 69.141 (72.922)	Top-5 acc 87.891 (89.187)	lr 0.00001
Train [119][870/3239]	Time 0.299 (0.699)	Data Time 0.003 (0.047)	Loss 2.0491 (2.1552)	Entropy 0.61027 (0.61132)	Top-1 acc 76.953 (72.919)	Top-5 acc 90.234 (89.180)	lr 0.00001
Train [119][880/3239]	Time 0.318 (0.697)	Data Time 0.003 (0.047)	Loss 2.0920 (2.1548)	Entropy 0.61013 (0.61130)	Top-1 acc 73.828 (72.925)	Top-5 acc 91.406 (89.185)	lr 0.00001
Train [119][890/3239]	Time 2.673 (0.695)	Data Time 0.002 (0.046)	Loss 2.0360 (2.1545)	Entropy 0.61013 (0.61129)	Top-1 acc 76.172 (72.936)	Top-5 acc 92.188 (89.190)	lr 0.00001
Train [119][900/3239]	Time 0.241 (0.690)	Data Time 0.001 (0.046)	Loss 2.3043 (2.1542)	Entropy 0.61004 (0.61128)	Top-1 acc 73.047 (72.945)	Top-5 acc 87.891 (89.197)	lr 0.00001
Train [119][910/3239]	Time 0.251 (0.688)	Data Time 0.002 (0.045)	Loss 2.2280 (2.1545)	Entropy 0.61003 (0.61126)	Top-1 acc 70.703 (72.937)	Top-5 acc 87.109 (89.198)	lr 0.00001
Train [119][920/3239]	Time 0.253 (0.686)	Data Time 0.001 (0.045)	Loss 2.0549 (2.1542)	Entropy 0.61007 (0.61125)	Top-1 acc 75.391 (72.943)	Top-5 acc 89.062 (89.207)	lr 0.00001
Train [119][930/3239]	Time 0.351 (0.684)	Data Time 0.002 (0.045)	Loss 2.1035 (2.1540)	Entropy 0.61011 (0.61124)	Top-1 acc 73.438 (72.939)	Top-5 acc 89.844 (89.211)	lr 0.00001
Train [119][940/3239]	Time 0.241 (0.682)	Data Time 0.001 (0.044)	Loss 2.1669 (2.1536)	Entropy 0.61007 (0.61123)	Top-1 acc 75.000 (72.953)	Top-5 acc 90.625 (89.219)	lr 0.00001
Train [119][950/3239]	Time 0.256 (0.680)	Data Time 0.001 (0.044)	Loss 2.2090 (2.1538)	Entropy 0.61005 (0.61121)	Top-1 acc 71.484 (72.943)	Top-5 acc 89.844 (89.223)	lr 0.00001
Train [119][960/3239]	Time 0.221 (0.678)	Data Time 0.001 (0.043)	Loss 2.0688 (2.1539)	Entropy 0.60998 (0.61120)	Top-1 acc 71.875 (72.931)	Top-5 acc 92.578 (89.225)	lr 0.00001
Train [119][970/3239]	Time 0.238 (0.676)	Data Time 0.001 (0.043)	Loss 2.0680 (2.1544)	Entropy 0.60997 (0.61119)	Top-1 acc 78.125 (72.931)	Top-5 acc 90.234 (89.214)	lr 0.00001
Train [119][980/3239]	Time 0.239 (0.674)	Data Time 0.001 (0.042)	Loss 2.1457 (2.1542)	Entropy 0.60997 (0.61118)	Top-1 acc 73.438 (72.927)	Top-5 acc 89.062 (89.221)	lr 0.00001
Train [119][990/3239]	Time 0.261 (0.673)	Data Time 0.001 (0.042)	Loss 2.2043 (2.1548)	Entropy 0.61000 (0.61116)	Top-1 acc 71.875 (72.916)	Top-5 acc 88.672 (89.211)	lr 0.00001
Train [119][1000/3239]	Time 2.608 (0.671)	Data Time 0.001 (0.041)	Loss 2.2017 (2.1547)	Entropy 0.61000 (0.61115)	Top-1 acc 73.828 (72.920)	Top-5 acc 87.500 (89.209)	lr 0.00001
Train [119][1010/3239]	Time 0.342 (0.667)	Data Time 0.001 (0.041)	Loss 1.9590 (2.1543)	Entropy 0.60991 (0.61114)	Top-1 acc 78.906 (72.927)	Top-5 acc 92.188 (89.214)	lr 0.00001
Train [119][1020/3239]	Time 0.240 (0.665)	Data Time 0.001 (0.041)	Loss 2.1682 (2.1542)	Entropy 0.60972 (0.61113)	Top-1 acc 75.781 (72.931)	Top-5 acc 87.500 (89.214)	lr 0.00001
Train [119][1030/3239]	Time 0.241 (0.663)	Data Time 0.002 (0.040)	Loss 2.1587 (2.1543)	Entropy 0.60966 (0.61111)	Top-1 acc 75.391 (72.928)	Top-5 acc 89.062 (89.211)	lr 0.00001
Train [119][1040/3239]	Time 0.236 (0.661)	Data Time 0.001 (0.040)	Loss 2.2229 (2.1541)	Entropy 0.60965 (0.61110)	Top-1 acc 71.094 (72.941)	Top-5 acc 86.328 (89.217)	lr 0.00001
Train [119][1050/3239]	Time 0.326 (0.660)	Data Time 0.001 (0.040)	Loss 2.1996 (2.1540)	Entropy 0.60969 (0.61108)	Top-1 acc 69.531 (72.945)	Top-5 acc 87.891 (89.217)	lr 0.00001
Train [119][1060/3239]	Time 0.232 (0.658)	Data Time 0.001 (0.039)	Loss 2.1802 (2.1542)	Entropy 0.60967 (0.61107)	Top-1 acc 69.922 (72.944)	Top-5 acc 87.891 (89.218)	lr 0.00001
Train [119][1070/3239]	Time 0.224 (0.656)	Data Time 0.002 (0.039)	Loss 2.1973 (2.1540)	Entropy 0.60963 (0.61106)	Top-1 acc 75.000 (72.952)	Top-5 acc 87.109 (89.221)	lr 0.00001
Train [119][1080/3239]	Time 0.238 (0.655)	Data Time 0.001 (0.039)	Loss 2.1883 (2.1539)	Entropy 0.60960 (0.61105)	Top-1 acc 69.922 (72.941)	Top-5 acc 87.891 (89.221)	lr 0.00001
Train [119][1090/3239]	Time 0.339 (0.653)	Data Time 0.001 (0.038)	Loss 2.2400 (2.1541)	Entropy 0.60959 (0.61103)	Top-1 acc 71.094 (72.936)	Top-5 acc 86.719 (89.218)	lr 0.00001
Train [119][1100/3239]	Time 0.252 (0.652)	Data Time 0.001 (0.038)	Loss 2.1712 (2.1541)	Entropy 0.60966 (0.61102)	Top-1 acc 70.703 (72.933)	Top-5 acc 88.672 (89.218)	lr 0.00001
Train [119][1110/3239]	Time 2.556 (0.650)	Data Time 0.001 (0.038)	Loss 2.1522 (2.1540)	Entropy 0.60966 (0.61101)	Top-1 acc 73.438 (72.936)	Top-5 acc 91.406 (89.222)	lr 0.00001
Train [119][1120/3239]	Time 0.238 (0.647)	Data Time 0.001 (0.037)	Loss 2.1705 (2.1537)	Entropy 0.60964 (0.61099)	Top-1 acc 73.438 (72.943)	Top-5 acc 87.891 (89.228)	lr 0.00001
Train [119][1130/3239]	Time 0.267 (0.645)	Data Time 0.001 (0.037)	Loss 2.1660 (2.1539)	Entropy 0.60965 (0.61098)	Top-1 acc 73.438 (72.931)	Top-5 acc 89.062 (89.223)	lr 0.00001
Train [119][1140/3239]	Time 0.233 (0.644)	Data Time 0.001 (0.037)	Loss 2.1983 (2.1540)	Entropy 0.60961 (0.61097)	Top-1 acc 71.484 (72.928)	Top-5 acc 89.062 (89.223)	lr 0.00001
Train [119][1150/3239]	Time 0.233 (0.642)	Data Time 0.001 (0.036)	Loss 2.1410 (2.1542)	Entropy 0.60959 (0.61096)	Top-1 acc 69.531 (72.912)	Top-5 acc 89.844 (89.222)	lr 0.00001
Train [119][1160/3239]	Time 0.227 (0.641)	Data Time 0.001 (0.036)	Loss 1.9830 (2.1542)	Entropy 0.60947 (0.61095)	Top-1 acc 78.516 (72.919)	Top-5 acc 92.969 (89.225)	lr 0.00001
Train [119][1170/3239]	Time 0.242 (0.640)	Data Time 0.001 (0.036)	Loss 2.1976 (2.1546)	Entropy 0.60948 (0.61093)	Top-1 acc 71.484 (72.912)	Top-5 acc 88.672 (89.219)	lr 0.00001
Train [119][1180/3239]	Time 0.226 (0.638)	Data Time 0.001 (0.035)	Loss 2.0006 (2.1546)	Entropy 0.60952 (0.61092)	Top-1 acc 76.562 (72.913)	Top-5 acc 91.016 (89.219)	lr 0.00001
Train [119][1190/3239]	Time 0.225 (0.637)	Data Time 0.001 (0.035)	Loss 2.1392 (2.1543)	Entropy 0.60949 (0.61091)	Top-1 acc 73.047 (72.925)	Top-5 acc 89.453 (89.218)	lr 0.00001
Train [119][1200/3239]	Time 0.262 (0.636)	Data Time 0.002 (0.035)	Loss 2.3381 (2.1543)	Entropy 0.60945 (0.61090)	Top-1 acc 66.797 (72.923)	Top-5 acc 85.156 (89.221)	lr 0.00001
Train [119][1210/3239]	Time 0.273 (0.634)	Data Time 0.001 (0.035)	Loss 2.1753 (2.1546)	Entropy 0.60940 (0.61089)	Top-1 acc 73.047 (72.907)	Top-5 acc 88.281 (89.217)	lr 0.00001
Train [119][1220/3239]	Time 2.623 (0.633)	Data Time 0.001 (0.034)	Loss 2.3326 (2.1547)	Entropy 0.60940 (0.61087)	Top-1 acc 66.406 (72.905)	Top-5 acc 87.109 (89.213)	lr 0.00001
Train [119][1230/3239]	Time 0.240 (0.630)	Data Time 0.001 (0.034)	Loss 2.2552 (2.1545)	Entropy 0.60938 (0.61086)	Top-1 acc 72.266 (72.907)	Top-5 acc 86.328 (89.218)	lr 0.00001
Train [119][1240/3239]	Time 0.303 (0.669)	Data Time 0.003 (0.034)	Loss 2.2682 (2.1548)	Entropy 0.60940 (0.61085)	Top-1 acc 70.703 (72.903)	Top-5 acc 88.281 (89.217)	lr 0.00001
Train [119][1250/3239]	Time 0.235 (0.668)	Data Time 0.001 (0.034)	Loss 1.9963 (2.1545)	Entropy 0.60939 (0.61084)	Top-1 acc 74.219 (72.900)	Top-5 acc 92.969 (89.224)	lr 0.00001
Train [119][1260/3239]	Time 0.223 (0.667)	Data Time 0.001 (0.033)	Loss 2.2451 (2.1546)	Entropy 0.60924 (0.61083)	Top-1 acc 71.094 (72.890)	Top-5 acc 88.281 (89.224)	lr 0.00001
Train [119][1270/3239]	Time 0.279 (0.665)	Data Time 0.001 (0.033)	Loss 2.0849 (2.1543)	Entropy 0.60926 (0.61081)	Top-1 acc 75.391 (72.901)	Top-5 acc 91.016 (89.226)	lr 0.00001
Train [119][1280/3239]	Time 0.247 (0.664)	Data Time 0.001 (0.033)	Loss 2.1917 (2.1541)	Entropy 0.60927 (0.61080)	Top-1 acc 69.531 (72.908)	Top-5 acc 87.891 (89.231)	lr 0.00001
Train [119][1290/3239]	Time 0.244 (0.663)	Data Time 0.001 (0.033)	Loss 2.1976 (2.1541)	Entropy 0.60914 (0.61079)	Top-1 acc 70.703 (72.904)	Top-5 acc 88.672 (89.232)	lr 0.00001
Train [119][1300/3239]	Time 0.233 (0.662)	Data Time 0.001 (0.032)	Loss 2.1599 (2.1539)	Entropy 0.60924 (0.61078)	Top-1 acc 71.875 (72.908)	Top-5 acc 90.234 (89.236)	lr 0.00001
Train [119][1310/3239]	Time 0.261 (0.660)	Data Time 0.001 (0.032)	Loss 1.9594 (2.1535)	Entropy 0.60920 (0.61076)	Top-1 acc 78.516 (72.922)	Top-5 acc 89.844 (89.239)	lr 0.00001
Train [119][1320/3239]	Time 0.293 (0.659)	Data Time 0.003 (0.032)	Loss 2.3670 (2.1536)	Entropy 0.60916 (0.61075)	Top-1 acc 62.891 (72.919)	Top-5 acc 84.375 (89.236)	lr 0.00001
Train [119][1330/3239]	Time 2.562 (0.658)	Data Time 0.001 (0.032)	Loss 2.1646 (2.1542)	Entropy 0.60916 (0.61074)	Top-1 acc 70.312 (72.904)	Top-5 acc 91.016 (89.227)	lr 0.00001
Train [119][1340/3239]	Time 0.378 (0.655)	Data Time 0.002 (0.031)	Loss 2.1870 (2.1543)	Entropy 0.60913 (0.61073)	Top-1 acc 70.312 (72.902)	Top-5 acc 89.062 (89.222)	lr 0.00001
Train [119][1350/3239]	Time 0.235 (0.653)	Data Time 0.001 (0.031)	Loss 2.1280 (2.1543)	Entropy 0.60913 (0.61072)	Top-1 acc 75.781 (72.903)	Top-5 acc 88.281 (89.219)	lr 0.00001
Train [119][1360/3239]	Time 0.245 (0.652)	Data Time 0.001 (0.031)	Loss 1.9859 (2.1540)	Entropy 0.60914 (0.61071)	Top-1 acc 78.906 (72.910)	Top-5 acc 90.625 (89.230)	lr 0.00001
Train [119][1370/3239]	Time 0.241 (0.651)	Data Time 0.001 (0.031)	Loss 2.0850 (2.1537)	Entropy 0.60910 (0.61069)	Top-1 acc 72.266 (72.918)	Top-5 acc 88.281 (89.237)	lr 0.00001
Train [119][1380/3239]	Time 0.354 (0.650)	Data Time 0.001 (0.031)	Loss 2.0878 (2.1534)	Entropy 0.60908 (0.61068)	Top-1 acc 77.734 (72.921)	Top-5 acc 88.672 (89.244)	lr 0.00001
Train [119][1390/3239]	Time 0.226 (0.649)	Data Time 0.001 (0.030)	Loss 2.1849 (2.1534)	Entropy 0.60907 (0.61067)	Top-1 acc 71.875 (72.926)	Top-5 acc 87.891 (89.245)	lr 0.00001
Train [119][1400/3239]	Time 0.222 (0.648)	Data Time 0.001 (0.030)	Loss 1.9256 (2.1531)	Entropy 0.60898 (0.61066)	Top-1 acc 80.859 (72.943)	Top-5 acc 94.141 (89.250)	lr 0.00001
Train [119][1410/3239]	Time 0.236 (0.647)	Data Time 0.001 (0.030)	Loss 2.3325 (2.1531)	Entropy 0.60889 (0.61065)	Top-1 acc 66.406 (72.945)	Top-5 acc 86.328 (89.247)	lr 0.00001
Train [119][1420/3239]	Time 0.376 (0.646)	Data Time 0.001 (0.030)	Loss 2.1914 (2.1534)	Entropy 0.60879 (0.61063)	Top-1 acc 70.703 (72.933)	Top-5 acc 88.281 (89.240)	lr 0.00001
Train [119][1430/3239]	Time 0.239 (0.644)	Data Time 0.001 (0.030)	Loss 2.0732 (2.1533)	Entropy 0.60872 (0.61062)	Top-1 acc 73.828 (72.932)	Top-5 acc 90.234 (89.242)	lr 0.00001
Train [119][1440/3239]	Time 2.506 (0.643)	Data Time 0.001 (0.029)	Loss 2.1126 (2.1534)	Entropy 0.60872 (0.61061)	Top-1 acc 72.656 (72.932)	Top-5 acc 90.625 (89.243)	lr 0.00001
Train [119][1450/3239]	Time 0.242 (0.641)	Data Time 0.001 (0.029)	Loss 2.1475 (2.1532)	Entropy 0.60865 (0.61059)	Top-1 acc 73.438 (72.936)	Top-5 acc 89.453 (89.249)	lr 0.00001
Train [119][1460/3239]	Time 0.281 (0.640)	Data Time 0.002 (0.029)	Loss 1.9812 (2.1530)	Entropy 0.60865 (0.61058)	Top-1 acc 75.000 (72.940)	Top-5 acc 92.188 (89.248)	lr 0.00001
Train [119][1470/3239]	Time 0.233 (0.639)	Data Time 0.001 (0.029)	Loss 2.1146 (2.1529)	Entropy 0.60856 (0.61057)	Top-1 acc 75.781 (72.937)	Top-5 acc 89.062 (89.248)	lr 0.00001
Train [119][1480/3239]	Time 0.227 (0.637)	Data Time 0.001 (0.029)	Loss 2.0828 (2.1529)	Entropy 0.60856 (0.61055)	Top-1 acc 73.047 (72.934)	Top-5 acc 91.016 (89.251)	lr 0.00001
Train [119][1490/3239]	Time 0.228 (0.636)	Data Time 0.001 (0.028)	Loss 2.1671 (2.1528)	Entropy 0.60852 (0.61054)	Top-1 acc 71.484 (72.937)	Top-5 acc 88.281 (89.252)	lr 0.00001
Train [119][1500/3239]	Time 0.241 (0.635)	Data Time 0.001 (0.028)	Loss 2.1336 (2.1529)	Entropy 0.60852 (0.61053)	Top-1 acc 76.953 (72.939)	Top-5 acc 88.672 (89.250)	lr 0.00001
Train [119][1510/3239]	Time 0.227 (0.634)	Data Time 0.001 (0.028)	Loss 2.0483 (2.1528)	Entropy 0.60847 (0.61051)	Top-1 acc 76.562 (72.939)	Top-5 acc 90.234 (89.253)	lr 0.00001
Train [119][1520/3239]	Time 0.237 (0.633)	Data Time 0.001 (0.028)	Loss 2.2086 (2.1528)	Entropy 0.60845 (0.61050)	Top-1 acc 75.391 (72.938)	Top-5 acc 89.453 (89.256)	lr 0.00001
Train [119][1530/3239]	Time 0.238 (0.633)	Data Time 0.001 (0.028)	Loss 2.0481 (2.1526)	Entropy 0.60839 (0.61049)	Top-1 acc 76.562 (72.943)	Top-5 acc 91.406 (89.260)	lr 0.00001
Train [119][1540/3239]	Time 0.262 (0.632)	Data Time 0.001 (0.028)	Loss 2.1708 (2.1529)	Entropy 0.60835 (0.61047)	Top-1 acc 74.219 (72.938)	Top-5 acc 89.062 (89.253)	lr 0.00001
Train [119][1550/3239]	Time 2.656 (0.631)	Data Time 0.001 (0.027)	Loss 2.1597 (2.1526)	Entropy 0.60835 (0.61046)	Top-1 acc 72.656 (72.941)	Top-5 acc 89.844 (89.263)	lr 0.00001
Train [119][1560/3239]	Time 0.244 (0.628)	Data Time 0.001 (0.027)	Loss 2.0415 (2.1522)	Entropy 0.60828 (0.61045)	Top-1 acc 74.609 (72.955)	Top-5 acc 91.797 (89.270)	lr 0.00001
Train [119][1570/3239]	Time 0.226 (0.627)	Data Time 0.001 (0.027)	Loss 2.1771 (2.1521)	Entropy 0.60824 (0.61043)	Top-1 acc 68.750 (72.956)	Top-5 acc 89.453 (89.273)	lr 0.00001
Train [119][1580/3239]	Time 0.234 (0.627)	Data Time 0.001 (0.027)	Loss 2.2557 (2.1526)	Entropy 0.60831 (0.61042)	Top-1 acc 68.750 (72.944)	Top-5 acc 90.234 (89.266)	lr 0.00001
Train [119][1590/3239]	Time 0.230 (0.626)	Data Time 0.001 (0.027)	Loss 2.2491 (2.1525)	Entropy 0.60837 (0.61041)	Top-1 acc 68.359 (72.946)	Top-5 acc 88.281 (89.266)	lr 0.00001
Train [119][1600/3239]	Time 0.278 (0.656)	Data Time 0.002 (0.027)	Loss 2.2694 (2.1528)	Entropy 0.60833 (0.61039)	Top-1 acc 70.703 (72.934)	Top-5 acc 88.672 (89.266)	lr 0.00001
Train [119][1610/3239]	Time 0.231 (0.656)	Data Time 0.002 (0.026)	Loss 2.2141 (2.1529)	Entropy 0.60829 (0.61038)	Top-1 acc 73.438 (72.931)	Top-5 acc 88.281 (89.262)	lr 0.00001
Train [119][1620/3239]	Time 0.274 (0.655)	Data Time 0.001 (0.026)	Loss 2.1351 (2.1532)	Entropy 0.60831 (0.61037)	Top-1 acc 73.828 (72.927)	Top-5 acc 88.672 (89.256)	lr 0.00001
Train [119][1630/3239]	Time 0.279 (0.654)	Data Time 0.001 (0.026)	Loss 1.9604 (2.1533)	Entropy 0.60806 (0.61035)	Top-1 acc 76.562 (72.920)	Top-5 acc 92.969 (89.258)	lr 0.00001
Train [119][1640/3239]	Time 0.234 (0.653)	Data Time 0.001 (0.026)	Loss 2.3314 (2.1534)	Entropy 0.60796 (0.61034)	Top-1 acc 70.703 (72.923)	Top-5 acc 86.328 (89.256)	lr 0.00001
Train [119][1650/3239]	Time 0.285 (0.651)	Data Time 0.001 (0.026)	Loss 2.1353 (2.1534)	Entropy 0.60793 (0.61032)	Top-1 acc 75.781 (72.922)	Top-5 acc 87.500 (89.256)	lr 0.00001
Train [119][1660/3239]	Time 2.539 (0.651)	Data Time 0.001 (0.026)	Loss 2.2143 (2.1533)	Entropy 0.60793 (0.61031)	Top-1 acc 71.094 (72.925)	Top-5 acc 87.109 (89.259)	lr 0.00001
Train [119][1670/3239]	Time 0.321 (0.648)	Data Time 0.001 (0.026)	Loss 2.2388 (2.1534)	Entropy 0.60791 (0.61030)	Top-1 acc 71.484 (72.915)	Top-5 acc 87.109 (89.256)	lr 0.00001
Train [119][1680/3239]	Time 0.234 (0.647)	Data Time 0.001 (0.025)	Loss 2.2413 (2.1533)	Entropy 0.60782 (0.61028)	Top-1 acc 68.750 (72.917)	Top-5 acc 88.281 (89.256)	lr 0.00001
Train [119][1690/3239]	Time 0.270 (0.646)	Data Time 0.001 (0.025)	Loss 2.2862 (2.1534)	Entropy 0.60776 (0.61027)	Top-1 acc 70.312 (72.912)	Top-5 acc 88.281 (89.253)	lr 0.00001
Train [119][1700/3239]	Time 0.246 (0.645)	Data Time 0.001 (0.025)	Loss 2.1233 (2.1535)	Entropy 0.60777 (0.61025)	Top-1 acc 73.828 (72.915)	Top-5 acc 89.844 (89.250)	lr 0.00001
Train [119][1710/3239]	Time 0.323 (0.644)	Data Time 0.001 (0.025)	Loss 2.0183 (2.1535)	Entropy 0.60773 (0.61024)	Top-1 acc 74.609 (72.913)	Top-5 acc 94.141 (89.254)	lr 0.00001
Train [119][1720/3239]	Time 0.267 (0.644)	Data Time 0.001 (0.025)	Loss 2.1579 (2.1534)	Entropy 0.60767 (0.61022)	Top-1 acc 74.219 (72.912)	Top-5 acc 89.844 (89.256)	lr 0.00001
Train [119][1730/3239]	Time 0.244 (0.643)	Data Time 0.001 (0.025)	Loss 2.1721 (2.1533)	Entropy 0.60763 (0.61021)	Top-1 acc 71.484 (72.915)	Top-5 acc 86.719 (89.255)	lr 0.00001
Train [119][1740/3239]	Time 0.271 (0.642)	Data Time 0.001 (0.025)	Loss 2.2534 (2.1533)	Entropy 0.60762 (0.61019)	Top-1 acc 71.484 (72.922)	Top-5 acc 88.672 (89.256)	lr 0.00001
Train [119][1750/3239]	Time 0.379 (0.641)	Data Time 0.001 (0.024)	Loss 2.1197 (2.1532)	Entropy 0.60754 (0.61018)	Top-1 acc 75.000 (72.925)	Top-5 acc 89.844 (89.259)	lr 0.00001
Train [119][1760/3239]	Time 0.277 (0.640)	Data Time 0.001 (0.024)	Loss 2.2149 (2.1533)	Entropy 0.60745 (0.61016)	Top-1 acc 72.266 (72.928)	Top-5 acc 88.281 (89.261)	lr 0.00001
Train [119][1770/3239]	Time 2.510 (0.639)	Data Time 0.001 (0.024)	Loss 2.1532 (2.1534)	Entropy 0.60745 (0.61015)	Top-1 acc 73.047 (72.924)	Top-5 acc 87.891 (89.258)	lr 0.00001
Train [119][1780/3239]	Time 0.236 (0.637)	Data Time 0.001 (0.024)	Loss 2.0547 (2.1533)	Entropy 0.60748 (0.61013)	Top-1 acc 72.266 (72.923)	Top-5 acc 89.844 (89.257)	lr 0.00001
Train [119][1790/3239]	Time 0.241 (0.636)	Data Time 0.001 (0.024)	Loss 2.0738 (2.1529)	Entropy 0.60745 (0.61012)	Top-1 acc 76.172 (72.936)	Top-5 acc 89.844 (89.263)	lr 0.00001
Train [119][1800/3239]	Time 0.229 (0.635)	Data Time 0.001 (0.024)	Loss 2.0979 (2.1530)	Entropy 0.60738 (0.61010)	Top-1 acc 75.000 (72.935)	Top-5 acc 90.234 (89.260)	lr 0.00001
Train [119][1810/3239]	Time 0.266 (0.634)	Data Time 0.001 (0.024)	Loss 2.0770 (2.1532)	Entropy 0.60747 (0.61009)	Top-1 acc 75.781 (72.929)	Top-5 acc 89.062 (89.254)	lr 0.00001
Train [119][1820/3239]	Time 0.224 (0.634)	Data Time 0.001 (0.024)	Loss 2.1154 (2.1533)	Entropy 0.60734 (0.61007)	Top-1 acc 76.172 (72.931)	Top-5 acc 87.500 (89.250)	lr 0.00001
Train [119][1830/3239]	Time 0.265 (0.633)	Data Time 0.001 (0.023)	Loss 2.2604 (2.1531)	Entropy 0.60730 (0.61006)	Top-1 acc 70.703 (72.938)	Top-5 acc 83.984 (89.250)	lr 0.00001
Train [119][1840/3239]	Time 0.263 (0.632)	Data Time 0.001 (0.023)	Loss 2.2687 (2.1528)	Entropy 0.60729 (0.61004)	Top-1 acc 70.312 (72.943)	Top-5 acc 87.109 (89.258)	lr 0.00001
Train [119][1850/3239]	Time 0.250 (0.631)	Data Time 0.001 (0.023)	Loss 2.1249 (2.1530)	Entropy 0.60740 (0.61003)	Top-1 acc 73.828 (72.939)	Top-5 acc 91.016 (89.259)	lr 0.00001
Train [119][1860/3239]	Time 0.233 (0.630)	Data Time 0.001 (0.023)	Loss 2.1362 (2.1529)	Entropy 0.60727 (0.61001)	Top-1 acc 73.438 (72.938)	Top-5 acc 89.453 (89.262)	lr 0.00001
Train [119][1870/3239]	Time 0.285 (0.630)	Data Time 0.001 (0.023)	Loss 2.1418 (2.1527)	Entropy 0.60724 (0.61000)	Top-1 acc 73.047 (72.941)	Top-5 acc 89.062 (89.271)	lr 0.00001
Train [119][1880/3239]	Time 2.593 (0.629)	Data Time 0.001 (0.023)	Loss 2.0980 (2.1528)	Entropy 0.60724 (0.60998)	Top-1 acc 74.219 (72.938)	Top-5 acc 91.016 (89.270)	lr 0.00001
Train [119][1890/3239]	Time 0.240 (0.627)	Data Time 0.001 (0.023)	Loss 2.1792 (2.1527)	Entropy 0.60711 (0.60997)	Top-1 acc 71.875 (72.938)	Top-5 acc 88.672 (89.272)	lr 0.00001
Train [119][1900/3239]	Time 0.243 (0.626)	Data Time 0.001 (0.023)	Loss 2.2127 (2.1528)	Entropy 0.60700 (0.60995)	Top-1 acc 71.875 (72.932)	Top-5 acc 87.109 (89.269)	lr 0.00001
Train [119][1910/3239]	Time 0.221 (0.626)	Data Time 0.001 (0.023)	Loss 2.0499 (2.1526)	Entropy 0.60695 (0.60994)	Top-1 acc 74.219 (72.937)	Top-5 acc 91.016 (89.270)	lr 0.00001
Train [119][1920/3239]	Time 0.220 (0.625)	Data Time 0.001 (0.022)	Loss 2.1841 (2.1523)	Entropy 0.60688 (0.60992)	Top-1 acc 70.312 (72.941)	Top-5 acc 88.672 (89.278)	lr 0.00001
Train [119][1930/3239]	Time 0.240 (0.624)	Data Time 0.001 (0.022)	Loss 2.0006 (2.1523)	Entropy 0.60689 (0.60991)	Top-1 acc 73.828 (72.940)	Top-5 acc 93.750 (89.280)	lr 0.00001
Train [119][1940/3239]	Time 0.274 (0.624)	Data Time 0.001 (0.022)	Loss 2.0891 (2.1523)	Entropy 0.60688 (0.60989)	Top-1 acc 73.828 (72.935)	Top-5 acc 92.188 (89.282)	lr 0.00001
Train [119][1950/3239]	Time 0.226 (0.623)	Data Time 0.001 (0.022)	Loss 2.0693 (2.1522)	Entropy 0.60674 (0.60988)	Top-1 acc 74.219 (72.940)	Top-5 acc 91.797 (89.284)	lr 0.00001
Train [119][1960/3239]	Time 0.253 (0.650)	Data Time 0.002 (0.022)	Loss 1.9949 (2.1524)	Entropy 0.60668 (0.60986)	Top-1 acc 76.562 (72.932)	Top-5 acc 92.188 (89.280)	lr 0.00001
Train [119][1970/3239]	Time 0.288 (0.649)	Data Time 0.003 (0.022)	Loss 2.0324 (2.1524)	Entropy 0.60668 (0.60984)	Top-1 acc 77.344 (72.934)	Top-5 acc 90.234 (89.281)	lr 0.00001
Train [119][1980/3239]	Time 0.313 (0.648)	Data Time 0.002 (0.022)	Loss 2.1627 (2.1523)	Entropy 0.60674 (0.60983)	Top-1 acc 71.875 (72.934)	Top-5 acc 89.844 (89.281)	lr 0.00001
Train [119][1990/3239]	Time 2.550 (0.648)	Data Time 0.001 (0.022)	Loss 2.2193 (2.1525)	Entropy 0.60674 (0.60981)	Top-1 acc 72.266 (72.936)	Top-5 acc 86.719 (89.278)	lr 0.00001
Train [119][2000/3239]	Time 0.340 (0.646)	Data Time 0.001 (0.022)	Loss 2.1795 (2.1526)	Entropy 0.60663 (0.60980)	Top-1 acc 75.000 (72.935)	Top-5 acc 89.453 (89.274)	lr 0.00001
Train [119][2010/3239]	Time 0.257 (0.645)	Data Time 0.002 (0.021)	Loss 2.2536 (2.1527)	Entropy 0.60657 (0.60978)	Top-1 acc 69.141 (72.935)	Top-5 acc 87.891 (89.272)	lr 0.00001
Train [119][2020/3239]	Time 0.276 (0.644)	Data Time 0.001 (0.021)	Loss 2.1099 (2.1529)	Entropy 0.60646 (0.60976)	Top-1 acc 74.219 (72.933)	Top-5 acc 91.016 (89.269)	lr 0.00001
Train [119][2030/3239]	Time 0.316 (0.644)	Data Time 0.002 (0.021)	Loss 2.0515 (2.1528)	Entropy 0.60641 (0.60975)	Top-1 acc 74.219 (72.935)	Top-5 acc 90.625 (89.271)	lr 0.00001
Train [119][2040/3239]	Time 0.387 (0.643)	Data Time 0.001 (0.021)	Loss 2.0207 (2.1528)	Entropy 0.60645 (0.60973)	Top-1 acc 74.609 (72.935)	Top-5 acc 91.797 (89.272)	lr 0.00001
Train [119][2050/3239]	Time 0.267 (0.642)	Data Time 0.002 (0.021)	Loss 2.1504 (2.1526)	Entropy 0.60649 (0.60972)	Top-1 acc 74.219 (72.943)	Top-5 acc 87.500 (89.269)	lr 0.00001
Train [119][2060/3239]	Time 0.260 (0.642)	Data Time 0.001 (0.021)	Loss 2.1232 (2.1525)	Entropy 0.60648 (0.60970)	Top-1 acc 74.219 (72.944)	Top-5 acc 90.234 (89.272)	lr 0.00001
Train [119][2070/3239]	Time 0.222 (0.641)	Data Time 0.001 (0.021)	Loss 2.0824 (2.1522)	Entropy 0.60639 (0.60968)	Top-1 acc 73.047 (72.948)	Top-5 acc 89.062 (89.276)	lr 0.00001
Train [119][2080/3239]	Time 0.412 (0.640)	Data Time 0.001 (0.021)	Loss 1.9391 (2.1520)	Entropy 0.60639 (0.60967)	Top-1 acc 78.125 (72.952)	Top-5 acc 93.359 (89.281)	lr 0.00001
Train [119][2090/3239]	Time 0.306 (0.640)	Data Time 0.001 (0.021)	Loss 2.0835 (2.1520)	Entropy 0.60638 (0.60965)	Top-1 acc 75.781 (72.952)	Top-5 acc 91.406 (89.282)	lr 0.00001
Train [119][2100/3239]	Time 2.617 (0.639)	Data Time 0.002 (0.021)	Loss 2.1256 (2.1523)	Entropy 0.60638 (0.60964)	Top-1 acc 73.438 (72.943)	Top-5 acc 89.453 (89.275)	lr 0.00001
Train [119][2110/3239]	Time 0.233 (0.637)	Data Time 0.001 (0.021)	Loss 2.1792 (2.1524)	Entropy 0.60640 (0.60962)	Top-1 acc 73.047 (72.939)	Top-5 acc 87.500 (89.275)	lr 0.00001
Train [119][2120/3239]	Time 0.269 (0.637)	Data Time 0.001 (0.020)	Loss 2.3548 (2.1525)	Entropy 0.60639 (0.60961)	Top-1 acc 67.188 (72.940)	Top-5 acc 84.766 (89.271)	lr 0.00001
Train [119][2130/3239]	Time 0.230 (0.636)	Data Time 0.001 (0.020)	Loss 2.0901 (2.1524)	Entropy 0.60641 (0.60959)	Top-1 acc 74.609 (72.939)	Top-5 acc 91.016 (89.272)	lr 0.00001
Train [119][2140/3239]	Time 0.240 (0.635)	Data Time 0.001 (0.020)	Loss 2.1833 (2.1524)	Entropy 0.60630 (0.60958)	Top-1 acc 69.922 (72.938)	Top-5 acc 89.453 (89.271)	lr 0.00001
Train [119][2150/3239]	Time 0.246 (0.635)	Data Time 0.001 (0.020)	Loss 2.1132 (2.1524)	Entropy 0.60630 (0.60956)	Top-1 acc 73.828 (72.945)	Top-5 acc 91.797 (89.272)	lr 0.00001
Train [119][2160/3239]	Time 0.255 (0.634)	Data Time 0.001 (0.020)	Loss 1.9349 (2.1523)	Entropy 0.60626 (0.60955)	Top-1 acc 78.125 (72.943)	Top-5 acc 93.359 (89.273)	lr 0.00001
Train [119][2170/3239]	Time 0.279 (0.633)	Data Time 0.001 (0.020)	Loss 2.3109 (2.1521)	Entropy 0.60619 (0.60953)	Top-1 acc 67.188 (72.950)	Top-5 acc 88.672 (89.279)	lr 0.00001
Train [119][2180/3239]	Time 0.237 (0.633)	Data Time 0.001 (0.020)	Loss 2.2024 (2.1520)	Entropy 0.60613 (0.60951)	Top-1 acc 72.656 (72.952)	Top-5 acc 89.844 (89.279)	lr 0.00001
Train [119][2190/3239]	Time 0.256 (0.632)	Data Time 0.001 (0.020)	Loss 2.1150 (2.1519)	Entropy 0.60613 (0.60950)	Top-1 acc 71.875 (72.958)	Top-5 acc 90.234 (89.282)	lr 0.00001
Train [119][2200/3239]	Time 0.293 (0.631)	Data Time 0.003 (0.020)	Loss 2.0348 (2.1516)	Entropy 0.60613 (0.60948)	Top-1 acc 74.609 (72.964)	Top-5 acc 91.797 (89.290)	lr 0.00001
Train [119][2210/3239]	Time 2.536 (0.631)	Data Time 0.001 (0.020)	Loss 2.1186 (2.1516)	Entropy 0.60613 (0.60947)	Top-1 acc 73.047 (72.964)	Top-5 acc 89.453 (89.290)	lr 0.00001
Train [119][2220/3239]	Time 0.241 (0.629)	Data Time 0.001 (0.020)	Loss 2.1791 (2.1514)	Entropy 0.60608 (0.60945)	Top-1 acc 69.922 (72.970)	Top-5 acc 87.500 (89.295)	lr 0.00001
Train [119][2230/3239]	Time 0.240 (0.629)	Data Time 0.001 (0.020)	Loss 2.1093 (2.1513)	Entropy 0.60594 (0.60944)	Top-1 acc 73.047 (72.972)	Top-5 acc 89.062 (89.296)	lr 0.00001
Train [119][2240/3239]	Time 0.270 (0.628)	Data Time 0.002 (0.020)	Loss 2.0948 (2.1513)	Entropy 0.60595 (0.60942)	Top-1 acc 73.828 (72.975)	Top-5 acc 89.062 (89.293)	lr 0.00001
Train [119][2250/3239]	Time 0.269 (0.628)	Data Time 0.002 (0.019)	Loss 2.1947 (2.1513)	Entropy 0.60601 (0.60941)	Top-1 acc 69.922 (72.974)	Top-5 acc 90.625 (89.293)	lr 0.00001
Train [119][2260/3239]	Time 0.267 (0.627)	Data Time 0.001 (0.019)	Loss 2.1728 (2.1512)	Entropy 0.60607 (0.60939)	Top-1 acc 71.484 (72.979)	Top-5 acc 89.062 (89.294)	lr 0.00001
Train [119][2270/3239]	Time 0.248 (0.626)	Data Time 0.002 (0.019)	Loss 2.1842 (2.1511)	Entropy 0.60608 (0.60938)	Top-1 acc 74.219 (72.986)	Top-5 acc 88.672 (89.297)	lr 0.00001
Train [119][2280/3239]	Time 0.273 (0.626)	Data Time 0.001 (0.019)	Loss 2.1065 (2.1511)	Entropy 0.60604 (0.60936)	Top-1 acc 74.219 (72.986)	Top-5 acc 90.625 (89.299)	lr 0.00001
Train [119][2290/3239]	Time 0.238 (0.625)	Data Time 0.001 (0.019)	Loss 2.1837 (2.1511)	Entropy 0.60601 (0.60935)	Top-1 acc 73.047 (72.985)	Top-5 acc 87.109 (89.298)	lr 0.00001
Train [119][2300/3239]	Time 0.261 (0.625)	Data Time 0.001 (0.019)	Loss 2.1639 (2.1512)	Entropy 0.60600 (0.60933)	Top-1 acc 73.047 (72.984)	Top-5 acc 89.453 (89.297)	lr 0.00001
Train [119][2310/3239]	Time 0.234 (0.624)	Data Time 0.001 (0.019)	Loss 2.2188 (2.1511)	Entropy 0.60597 (0.60932)	Top-1 acc 72.266 (72.989)	Top-5 acc 87.500 (89.298)	lr 0.00001
Train [119][2320/3239]	Time 57.076 (0.647)	Data Time 0.002 (0.019)	Loss 1.9521 (2.1508)	Entropy 0.60597 (0.60931)	Top-1 acc 78.906 (72.994)	Top-5 acc 93.750 (89.304)	lr 0.00001
Train [119][2330/3239]	Time 0.356 (0.645)	Data Time 0.002 (0.019)	Loss 2.0929 (2.1508)	Entropy 0.60589 (0.60929)	Top-1 acc 73.047 (72.994)	Top-5 acc 88.672 (89.303)	lr 0.00001
Train [119][2340/3239]	Time 0.265 (0.645)	Data Time 0.002 (0.019)	Loss 2.0967 (2.1507)	Entropy 0.60575 (0.60928)	Top-1 acc 74.609 (72.998)	Top-5 acc 91.797 (89.303)	lr 0.00001
Train [119][2350/3239]	Time 0.247 (0.644)	Data Time 0.002 (0.019)	Loss 2.2990 (2.1507)	Entropy 0.60571 (0.60926)	Top-1 acc 70.312 (73.001)	Top-5 acc 85.156 (89.302)	lr 0.00001
Train [119][2360/3239]	Time 0.222 (0.644)	Data Time 0.002 (0.019)	Loss 2.1764 (2.1507)	Entropy 0.60568 (0.60925)	Top-1 acc 73.047 (73.000)	Top-5 acc 88.672 (89.301)	lr 0.00001
Train [119][2370/3239]	Time 0.420 (0.643)	Data Time 0.001 (0.019)	Loss 2.1852 (2.1507)	Entropy 0.60567 (0.60923)	Top-1 acc 73.047 (73.005)	Top-5 acc 88.672 (89.304)	lr 0.00001
Train [119][2380/3239]	Time 0.307 (0.642)	Data Time 0.001 (0.018)	Loss 2.2015 (2.1507)	Entropy 0.60554 (0.60921)	Top-1 acc 73.438 (73.000)	Top-5 acc 89.062 (89.303)	lr 0.00001
Train [119][2390/3239]	Time 0.283 (0.642)	Data Time 0.001 (0.018)	Loss 2.2640 (2.1508)	Entropy 0.60559 (0.60920)	Top-1 acc 71.875 (72.998)	Top-5 acc 87.500 (89.300)	lr 0.00001
Train [119][2400/3239]	Time 0.294 (0.641)	Data Time 0.002 (0.018)	Loss 2.1487 (2.1509)	Entropy 0.60558 (0.60918)	Top-1 acc 74.219 (72.995)	Top-5 acc 90.234 (89.297)	lr 0.00001
Train [119][2410/3239]	Time 0.253 (0.641)	Data Time 0.001 (0.018)	Loss 2.2561 (2.1510)	Entropy 0.60552 (0.60917)	Top-1 acc 66.406 (72.991)	Top-5 acc 89.062 (89.294)	lr 0.00001
Train [119][2420/3239]	Time 0.248 (0.640)	Data Time 0.001 (0.018)	Loss 2.2490 (2.1512)	Entropy 0.60544 (0.60915)	Top-1 acc 71.094 (72.985)	Top-5 acc 85.547 (89.290)	lr 0.00001
Train [119][2430/3239]	Time 2.538 (0.640)	Data Time 0.001 (0.018)	Loss 2.2788 (2.1512)	Entropy 0.60544 (0.60914)	Top-1 acc 69.141 (72.982)	Top-5 acc 87.500 (89.292)	lr 0.00001
Train [119][2440/3239]	Time 0.263 (0.638)	Data Time 0.002 (0.018)	Loss 2.1774 (2.1511)	Entropy 0.60541 (0.60912)	Top-1 acc 73.438 (72.984)	Top-5 acc 89.453 (89.293)	lr 0.00001
Train [119][2450/3239]	Time 0.232 (0.637)	Data Time 0.001 (0.018)	Loss 2.0058 (2.1511)	Entropy 0.60535 (0.60911)	Top-1 acc 78.516 (72.982)	Top-5 acc 90.234 (89.294)	lr 0.00001
Train [119][2460/3239]	Time 0.308 (0.637)	Data Time 0.001 (0.018)	Loss 2.0743 (2.1509)	Entropy 0.60529 (0.60909)	Top-1 acc 73.438 (72.987)	Top-5 acc 91.797 (89.298)	lr 0.00001
Train [119][2470/3239]	Time 0.281 (0.636)	Data Time 0.001 (0.018)	Loss 2.0700 (2.1509)	Entropy 0.60527 (0.60908)	Top-1 acc 77.734 (72.987)	Top-5 acc 88.672 (89.296)	lr 0.00001
Train [119][2480/3239]	Time 0.244 (0.636)	Data Time 0.001 (0.018)	Loss 2.1857 (2.1509)	Entropy 0.60527 (0.60906)	Top-1 acc 71.875 (72.986)	Top-5 acc 88.281 (89.294)	lr 0.00001
Train [119][2490/3239]	Time 0.290 (0.635)	Data Time 0.001 (0.018)	Loss 2.2527 (2.1508)	Entropy 0.60529 (0.60905)	Top-1 acc 69.922 (72.989)	Top-5 acc 87.891 (89.299)	lr 0.00001
Train [119][2500/3239]	Time 0.364 (0.635)	Data Time 0.002 (0.018)	Loss 2.0433 (2.1506)	Entropy 0.60526 (0.60903)	Top-1 acc 72.656 (72.992)	Top-5 acc 91.406 (89.302)	lr 0.00001
Train [119][2510/3239]	Time 0.272 (0.634)	Data Time 0.002 (0.018)	Loss 2.1774 (2.1508)	Entropy 0.60534 (0.60902)	Top-1 acc 72.656 (72.988)	Top-5 acc 89.062 (89.297)	lr 0.00001
Train [119][2520/3239]	Time 0.271 (0.634)	Data Time 0.032 (0.018)	Loss 2.0611 (2.1508)	Entropy 0.60526 (0.60900)	Top-1 acc 75.391 (72.989)	Top-5 acc 89.453 (89.297)	lr 0.00001
Train [119][2530/3239]	Time 0.215 (0.633)	Data Time 0.001 (0.018)	Loss 2.1548 (2.1509)	Entropy 0.60533 (0.60899)	Top-1 acc 72.266 (72.983)	Top-5 acc 88.281 (89.295)	lr 0.00001
Train [119][2540/3239]	Time 2.733 (0.633)	Data Time 0.001 (0.017)	Loss 2.2022 (2.1509)	Entropy 0.60533 (0.60897)	Top-1 acc 72.266 (72.982)	Top-5 acc 88.281 (89.296)	lr 0.00001
Train [119][2550/3239]	Time 0.248 (0.631)	Data Time 0.001 (0.017)	Loss 2.0213 (2.1508)	Entropy 0.60527 (0.60896)	Top-1 acc 75.000 (72.984)	Top-5 acc 91.406 (89.294)	lr 0.00001
Train [119][2560/3239]	Time 0.260 (0.631)	Data Time 0.002 (0.017)	Loss 2.0819 (2.1510)	Entropy 0.60526 (0.60894)	Top-1 acc 76.953 (72.977)	Top-5 acc 90.625 (89.291)	lr 0.00001
Train [119][2570/3239]	Time 0.252 (0.630)	Data Time 0.001 (0.017)	Loss 2.2104 (2.1511)	Entropy 0.60522 (0.60893)	Top-1 acc 70.703 (72.976)	Top-5 acc 87.500 (89.289)	lr 0.00001
Train [119][2580/3239]	Time 0.345 (0.630)	Data Time 0.002 (0.017)	Loss 2.1171 (2.1511)	Entropy 0.60518 (0.60892)	Top-1 acc 73.438 (72.973)	Top-5 acc 92.188 (89.289)	lr 0.00001
Train [119][2590/3239]	Time 0.260 (0.629)	Data Time 0.001 (0.017)	Loss 2.1766 (2.1512)	Entropy 0.60511 (0.60890)	Top-1 acc 73.438 (72.970)	Top-5 acc 87.109 (89.285)	lr 0.00001
Train [119][2600/3239]	Time 0.235 (0.629)	Data Time 0.001 (0.017)	Loss 2.1072 (2.1512)	Entropy 0.60508 (0.60889)	Top-1 acc 75.391 (72.970)	Top-5 acc 89.453 (89.286)	lr 0.00001
Train [119][2610/3239]	Time 0.282 (0.628)	Data Time 0.003 (0.017)	Loss 1.9940 (2.1511)	Entropy 0.60507 (0.60887)	Top-1 acc 75.391 (72.973)	Top-5 acc 91.016 (89.286)	lr 0.00001
Train [119][2620/3239]	Time 0.274 (0.628)	Data Time 0.001 (0.017)	Loss 2.1767 (2.1511)	Entropy 0.60510 (0.60886)	Top-1 acc 71.875 (72.976)	Top-5 acc 89.453 (89.286)	lr 0.00001
Train [119][2630/3239]	Time 0.291 (0.627)	Data Time 0.001 (0.017)	Loss 2.1998 (2.1513)	Entropy 0.60505 (0.60884)	Top-1 acc 71.094 (72.972)	Top-5 acc 90.625 (89.283)	lr 0.00001
Train [119][2640/3239]	Time 0.230 (0.627)	Data Time 0.001 (0.017)	Loss 2.3156 (2.1514)	Entropy 0.60502 (0.60883)	Top-1 acc 66.797 (72.970)	Top-5 acc 88.281 (89.282)	lr 0.00001
Train [119][2650/3239]	Time 0.256 (0.626)	Data Time 0.001 (0.017)	Loss 2.1082 (2.1514)	Entropy 0.60500 (0.60881)	Top-1 acc 73.828 (72.969)	Top-5 acc 89.844 (89.283)	lr 0.00001
Train [119][2660/3239]	Time 0.238 (0.626)	Data Time 0.001 (0.017)	Loss 2.1829 (2.1515)	Entropy 0.60506 (0.60880)	Top-1 acc 71.484 (72.962)	Top-5 acc 88.281 (89.279)	lr 0.00001
Train [119][2670/3239]	Time 0.245 (0.625)	Data Time 0.001 (0.017)	Loss 2.1121 (2.1514)	Entropy 0.60500 (0.60879)	Top-1 acc 73.438 (72.963)	Top-5 acc 88.281 (89.282)	lr 0.00001
Train [119][2680/3239]	Time 0.244 (0.645)	Data Time 0.003 (0.017)	Loss 2.2389 (2.1513)	Entropy 0.60505 (0.60877)	Top-1 acc 71.875 (72.965)	Top-5 acc 88.281 (89.286)	lr 0.00001
Train [119][2690/3239]	Time 0.253 (0.644)	Data Time 0.002 (0.017)	Loss 2.2667 (2.1514)	Entropy 0.60503 (0.60876)	Top-1 acc 71.875 (72.959)	Top-5 acc 85.156 (89.283)	lr 0.00001
Train [119][2700/3239]	Time 0.271 (0.644)	Data Time 0.002 (0.017)	Loss 2.1803 (2.1515)	Entropy 0.60501 (0.60874)	Top-1 acc 73.438 (72.955)	Top-5 acc 86.719 (89.283)	lr 0.00001
Train [119][2710/3239]	Time 0.260 (0.643)	Data Time 0.001 (0.016)	Loss 2.1419 (2.1516)	Entropy 0.60501 (0.60873)	Top-1 acc 73.828 (72.952)	Top-5 acc 87.891 (89.282)	lr 0.00001
Train [119][2720/3239]	Time 0.250 (0.643)	Data Time 0.002 (0.016)	Loss 2.0281 (2.1515)	Entropy 0.60476 (0.60872)	Top-1 acc 75.391 (72.954)	Top-5 acc 91.797 (89.286)	lr 0.00001
Train [119][2730/3239]	Time 0.302 (0.642)	Data Time 0.001 (0.016)	Loss 2.1395 (2.1516)	Entropy 0.60474 (0.60870)	Top-1 acc 73.438 (72.948)	Top-5 acc 90.625 (89.288)	lr 0.00001
Train [119][2740/3239]	Time 0.234 (0.642)	Data Time 0.001 (0.016)	Loss 2.3390 (2.1517)	Entropy 0.60473 (0.60869)	Top-1 acc 66.406 (72.946)	Top-5 acc 86.719 (89.289)	lr 0.00001
Train [119][2750/3239]	Time 0.252 (0.641)	Data Time 0.001 (0.016)	Loss 2.1384 (2.1516)	Entropy 0.60476 (0.60867)	Top-1 acc 71.094 (72.951)	Top-5 acc 88.672 (89.290)	lr 0.00001
Train [119][2760/3239]	Time 0.248 (0.641)	Data Time 0.001 (0.016)	Loss 2.0771 (2.1516)	Entropy 0.60473 (0.60866)	Top-1 acc 77.344 (72.958)	Top-5 acc 90.234 (89.288)	lr 0.00001
Train [119][2770/3239]	Time 0.278 (0.640)	Data Time 0.001 (0.016)	Loss 2.1952 (2.1516)	Entropy 0.60458 (0.60864)	Top-1 acc 70.703 (72.959)	Top-5 acc 89.062 (89.287)	lr 0.00001
Train [119][2780/3239]	Time 0.276 (0.640)	Data Time 0.001 (0.016)	Loss 2.0490 (2.1516)	Entropy 0.60457 (0.60863)	Top-1 acc 76.953 (72.960)	Top-5 acc 89.844 (89.286)	lr 0.00001
Train [119][2790/3239]	Time 0.365 (0.639)	Data Time 0.001 (0.016)	Loss 2.1971 (2.1517)	Entropy 0.60456 (0.60862)	Top-1 acc 71.484 (72.956)	Top-5 acc 86.719 (89.285)	lr 0.00001
Train [119][2800/3239]	Time 0.325 (0.639)	Data Time 0.001 (0.016)	Loss 2.0456 (2.1517)	Entropy 0.60457 (0.60860)	Top-1 acc 73.828 (72.952)	Top-5 acc 90.625 (89.286)	lr 0.00001
Train [119][2810/3239]	Time 0.235 (0.638)	Data Time 0.001 (0.016)	Loss 2.1270 (2.1516)	Entropy 0.60464 (0.60859)	Top-1 acc 71.484 (72.954)	Top-5 acc 89.062 (89.288)	lr 0.00001
Train [119][2820/3239]	Time 0.241 (0.638)	Data Time 0.001 (0.016)	Loss 2.2057 (2.1514)	Entropy 0.60468 (0.60857)	Top-1 acc 73.438 (72.962)	Top-5 acc 87.500 (89.292)	lr 0.00001
Train [119][2830/3239]	Time 0.240 (0.637)	Data Time 0.001 (0.016)	Loss 2.0142 (2.1513)	Entropy 0.60457 (0.60856)	Top-1 acc 73.438 (72.964)	Top-5 acc 92.188 (89.294)	lr 0.00001
Train [119][2840/3239]	Time 0.302 (0.637)	Data Time 0.001 (0.016)	Loss 2.0337 (2.1513)	Entropy 0.60461 (0.60855)	Top-1 acc 75.781 (72.962)	Top-5 acc 91.797 (89.295)	lr 0.00001
Train [119][2850/3239]	Time 0.338 (0.637)	Data Time 0.001 (0.016)	Loss 2.1770 (2.1511)	Entropy 0.60454 (0.60853)	Top-1 acc 74.609 (72.964)	Top-5 acc 89.062 (89.299)	lr 0.00001
Train [119][2860/3239]	Time 0.225 (0.636)	Data Time 0.001 (0.016)	Loss 2.2240 (2.1512)	Entropy 0.60455 (0.60852)	Top-1 acc 72.266 (72.964)	Top-5 acc 85.938 (89.296)	lr 0.00001
Train [119][2870/3239]	Time 0.261 (0.636)	Data Time 0.001 (0.016)	Loss 2.0895 (2.1512)	Entropy 0.60453 (0.60850)	Top-1 acc 74.219 (72.965)	Top-5 acc 89.453 (89.296)	lr 0.00001
Train [119][2880/3239]	Time 0.252 (0.635)	Data Time 0.001 (0.016)	Loss 2.1804 (2.1512)	Entropy 0.60452 (0.60849)	Top-1 acc 73.438 (72.964)	Top-5 acc 89.062 (89.297)	lr 0.00001
Train [119][2890/3239]	Time 0.282 (0.635)	Data Time 0.001 (0.016)	Loss 2.1520 (2.1513)	Entropy 0.60434 (0.60848)	Top-1 acc 72.656 (72.963)	Top-5 acc 89.453 (89.292)	lr 0.00001
Train [119][2900/3239]	Time 0.240 (0.634)	Data Time 0.001 (0.016)	Loss 2.1398 (2.1512)	Entropy 0.60430 (0.60846)	Top-1 acc 74.219 (72.967)	Top-5 acc 90.625 (89.294)	lr 0.00001
Train [119][2910/3239]	Time 0.277 (0.634)	Data Time 0.002 (0.015)	Loss 2.2965 (2.1513)	Entropy 0.60427 (0.60845)	Top-1 acc 70.703 (72.967)	Top-5 acc 86.719 (89.294)	lr 0.00001
Train [119][2920/3239]	Time 0.279 (0.633)	Data Time 0.001 (0.015)	Loss 2.0743 (2.1513)	Entropy 0.60427 (0.60843)	Top-1 acc 76.562 (72.965)	Top-5 acc 89.453 (89.295)	lr 0.00001
Train [119][2930/3239]	Time 0.307 (0.633)	Data Time 0.001 (0.015)	Loss 2.2092 (2.1515)	Entropy 0.60413 (0.60842)	Top-1 acc 72.656 (72.961)	Top-5 acc 88.672 (89.294)	lr 0.00001
Train [119][2940/3239]	Time 0.243 (0.633)	Data Time 0.001 (0.015)	Loss 2.3952 (2.1515)	Entropy 0.60410 (0.60840)	Top-1 acc 65.625 (72.961)	Top-5 acc 83.594 (89.296)	lr 0.00001
Train [119][2950/3239]	Time 0.234 (0.632)	Data Time 0.001 (0.015)	Loss 2.3240 (2.1516)	Entropy 0.60410 (0.60839)	Top-1 acc 70.703 (72.960)	Top-5 acc 84.766 (89.293)	lr 0.00001
Train [119][2960/3239]	Time 0.233 (0.632)	Data Time 0.001 (0.015)	Loss 2.0100 (2.1515)	Entropy 0.60402 (0.60837)	Top-1 acc 77.344 (72.962)	Top-5 acc 92.188 (89.295)	lr 0.00001
Train [119][2970/3239]	Time 0.231 (0.631)	Data Time 0.001 (0.015)	Loss 2.0576 (2.1514)	Entropy 0.60394 (0.60836)	Top-1 acc 76.953 (72.963)	Top-5 acc 92.578 (89.296)	lr 0.00001
Train [119][2980/3239]	Time 0.248 (0.631)	Data Time 0.001 (0.015)	Loss 2.1554 (2.1515)	Entropy 0.60392 (0.60835)	Top-1 acc 73.438 (72.962)	Top-5 acc 89.453 (89.294)	lr 0.00000
Train [119][2990/3239]	Time 0.241 (0.630)	Data Time 0.001 (0.015)	Loss 2.1399 (2.1514)	Entropy 0.60390 (0.60833)	Top-1 acc 74.219 (72.963)	Top-5 acc 90.234 (89.296)	lr 0.00000
Train [119][3000/3239]	Time 0.280 (0.630)	Data Time 0.001 (0.015)	Loss 2.0922 (2.1517)	Entropy 0.60385 (0.60832)	Top-1 acc 74.609 (72.957)	Top-5 acc 91.406 (89.291)	lr 0.00000
Train [119][3010/3239]	Time 0.491 (0.648)	Data Time 0.005 (0.015)	Loss 2.1890 (2.1517)	Entropy 0.60381 (0.60830)	Top-1 acc 70.703 (72.959)	Top-5 acc 90.234 (89.291)	lr 0.00000
Train [119][3020/3239]	Time 0.255 (0.647)	Data Time 0.002 (0.015)	Loss 2.1663 (2.1516)	Entropy 0.60385 (0.60829)	Top-1 acc 74.219 (72.966)	Top-5 acc 87.500 (89.291)	lr 0.00000
Train [119][3030/3239]	Time 0.240 (0.647)	Data Time 0.001 (0.015)	Loss 2.1836 (2.1515)	Entropy 0.60382 (0.60827)	Top-1 acc 72.266 (72.966)	Top-5 acc 88.672 (89.288)	lr 0.00000
Train [119][3040/3239]	Time 0.236 (0.646)	Data Time 0.002 (0.015)	Loss 2.0842 (2.1514)	Entropy 0.60382 (0.60826)	Top-1 acc 73.438 (72.968)	Top-5 acc 90.234 (89.289)	lr 0.00000
Train [119][3050/3239]	Time 0.245 (0.646)	Data Time 0.001 (0.015)	Loss 2.2215 (2.1512)	Entropy 0.60380 (0.60824)	Top-1 acc 71.484 (72.971)	Top-5 acc 88.281 (89.293)	lr 0.00000
Train [119][3060/3239]	Time 0.239 (0.645)	Data Time 0.001 (0.015)	Loss 2.0828 (2.1511)	Entropy 0.60386 (0.60823)	Top-1 acc 72.266 (72.974)	Top-5 acc 89.062 (89.295)	lr 0.00000
Train [119][3070/3239]	Time 0.248 (0.645)	Data Time 0.001 (0.015)	Loss 2.1822 (2.1510)	Entropy 0.60378 (0.60821)	Top-1 acc 74.609 (72.978)	Top-5 acc 87.891 (89.295)	lr 0.00000
Train [119][3080/3239]	Time 0.242 (0.645)	Data Time 0.002 (0.015)	Loss 2.2720 (2.1510)	Entropy 0.60373 (0.60820)	Top-1 acc 67.188 (72.974)	Top-5 acc 89.453 (89.297)	lr 0.00000
Train [119][3090/3239]	Time 0.232 (0.644)	Data Time 0.001 (0.015)	Loss 2.1200 (2.1511)	Entropy 0.60374 (0.60818)	Top-1 acc 73.047 (72.976)	Top-5 acc 88.672 (89.294)	lr 0.00000
Train [119][3100/3239]	Time 0.239 (0.644)	Data Time 0.001 (0.015)	Loss 2.0078 (2.1511)	Entropy 0.60365 (0.60817)	Top-1 acc 75.000 (72.975)	Top-5 acc 91.016 (89.294)	lr 0.00000
Train [119][3110/3239]	Time 0.247 (0.643)	Data Time 0.001 (0.015)	Loss 2.0100 (2.1510)	Entropy 0.60363 (0.60816)	Top-1 acc 78.906 (72.977)	Top-5 acc 92.969 (89.295)	lr 0.00000
Train [119][3120/3239]	Time 0.287 (0.643)	Data Time 0.001 (0.015)	Loss 2.0652 (2.1510)	Entropy 0.60360 (0.60814)	Top-1 acc 73.047 (72.977)	Top-5 acc 91.016 (89.297)	lr 0.00000
Train [119][3130/3239]	Time 0.233 (0.642)	Data Time 0.001 (0.015)	Loss 2.1902 (2.1510)	Entropy 0.60352 (0.60813)	Top-1 acc 74.609 (72.978)	Top-5 acc 87.891 (89.300)	lr 0.00000
Train [119][3140/3239]	Time 0.238 (0.642)	Data Time 0.001 (0.015)	Loss 2.2095 (2.1510)	Entropy 0.60348 (0.60811)	Top-1 acc 73.438 (72.979)	Top-5 acc 88.281 (89.299)	lr 0.00000
Train [119][3150/3239]	Time 0.244 (0.642)	Data Time 0.001 (0.014)	Loss 2.2709 (2.1511)	Entropy 0.60346 (0.60810)	Top-1 acc 75.000 (72.978)	Top-5 acc 86.328 (89.295)	lr 0.00000
Train [119][3160/3239]	Time 0.243 (0.641)	Data Time 0.001 (0.014)	Loss 2.0393 (2.1511)	Entropy 0.60345 (0.60808)	Top-1 acc 72.656 (72.972)	Top-5 acc 90.625 (89.293)	lr 0.00000
Train [119][3170/3239]	Time 0.241 (0.641)	Data Time 0.001 (0.014)	Loss 2.0616 (2.1512)	Entropy 0.60340 (0.60807)	Top-1 acc 73.047 (72.969)	Top-5 acc 89.844 (89.292)	lr 0.00000
Train [119][3180/3239]	Time 0.273 (0.640)	Data Time 0.000 (0.014)	Loss 1.9823 (2.1513)	Entropy 0.60333 (0.60805)	Top-1 acc 77.734 (72.969)	Top-5 acc 92.578 (89.290)	lr 0.00000
Train [119][3190/3239]	Time 0.220 (0.640)	Data Time 0.000 (0.014)	Loss 2.1039 (2.1513)	Entropy 0.60322 (0.60804)	Top-1 acc 71.484 (72.968)	Top-5 acc 89.453 (89.287)	lr 0.00000
Train [119][3200/3239]	Time 0.231 (0.639)	Data Time 0.000 (0.014)	Loss 2.2609 (2.1514)	Entropy 0.60323 (0.60802)	Top-1 acc 72.656 (72.968)	Top-5 acc 89.453 (89.287)	lr 0.00000
Train [119][3210/3239]	Time 0.225 (0.639)	Data Time 0.000 (0.014)	Loss 2.1432 (2.1513)	Entropy 0.60328 (0.60801)	Top-1 acc 72.266 (72.969)	Top-5 acc 89.453 (89.290)	lr 0.00000
Train [119][3220/3239]	Time 0.233 (0.638)	Data Time 0.000 (0.014)	Loss 2.0903 (2.1515)	Entropy 0.60317 (0.60799)	Top-1 acc 75.000 (72.964)	Top-5 acc 89.844 (89.286)	lr 0.00000
Train [119][3230/3239]	Time 0.309 (0.638)	Data Time 0.000 (0.014)	Loss 2.0636 (2.1514)	Entropy 0.60320 (0.60798)	Top-1 acc 76.172 (72.964)	Top-5 acc 91.406 (89.288)	lr 0.00000
Train [119][3239/3239]	Time 2.380 (0.637)	Data Time 0.000 (0.014)	Loss 2.2973 (2.1513)	Entropy 0.60320 (0.60797)	Top-1 acc 69.136 (72.967)	Top-5 acc 86.420 (89.289)	lr 0.00000
==========Valid [119/120]	loss 1.198	top-1 acc 72.718 (72.745)	top-5 acc 89.976	Train top-1 72.967	top-5 89.289	Entropy 0.60320	Latency-None: 0.000ms	Flops: 544.27M
Train [120][0/3239]	Time 44.975 (44.975)	Data Time 41.790 (41.790)	Loss 2.0111 (2.0111)	Entropy 0.60305 (0.60305)	Top-1 acc 76.953 (76.953)	Top-5 acc 93.750 (93.750)	lr 0.00000
Train [120][10/3239]	Time 2.747 (4.655)	Data Time 0.002 (3.837)	Loss 2.1285 (2.1256)	Entropy 0.60305 (0.60305)	Top-1 acc 72.656 (73.686)	Top-5 acc 89.844 (90.447)	lr 0.00000
Train [120][20/3239]	Time 0.248 (2.559)	Data Time 0.001 (2.011)	Loss 1.9687 (2.1507)	Entropy 0.60295 (0.60300)	Top-1 acc 76.172 (73.214)	Top-5 acc 93.359 (89.807)	lr 0.00000
Train [120][30/3239]	Time 0.252 (1.894)	Data Time 0.001 (1.363)	Loss 2.1992 (2.1628)	Entropy 0.60288 (0.60297)	Top-1 acc 73.828 (72.694)	Top-5 acc 87.109 (89.504)	lr 0.00000
Train [120][40/3239]	Time 0.241 (1.555)	Data Time 0.002 (1.031)	Loss 2.0482 (2.1644)	Entropy 0.60288 (0.60294)	Top-1 acc 76.562 (72.713)	Top-5 acc 91.016 (89.310)	lr 0.00000
Train [120][50/3239]	Time 0.244 (1.347)	Data Time 0.001 (0.829)	Loss 2.0606 (2.1561)	Entropy 0.60294 (0.60294)	Top-1 acc 72.266 (72.748)	Top-5 acc 90.234 (89.476)	lr 0.00000
Train [120][60/3239]	Time 0.238 (1.206)	Data Time 0.001 (0.693)	Loss 2.1106 (2.1485)	Entropy 0.60275 (0.60292)	Top-1 acc 72.266 (72.976)	Top-5 acc 90.625 (89.498)	lr 0.00000
Train [120][70/3239]	Time 0.260 (1.105)	Data Time 0.001 (0.596)	Loss 2.2483 (2.1521)	Entropy 0.60285 (0.60290)	Top-1 acc 71.094 (72.893)	Top-5 acc 88.281 (89.349)	lr 0.00000
Train [120][80/3239]	Time 0.234 (1.033)	Data Time 0.001 (0.522)	Loss 2.1440 (2.1530)	Entropy 0.60282 (0.60290)	Top-1 acc 71.484 (72.844)	Top-5 acc 90.234 (89.352)	lr 0.00000
Train [120][90/3239]	Time 0.230 (0.975)	Data Time 0.001 (0.465)	Loss 2.0750 (2.1468)	Entropy 0.60283 (0.60289)	Top-1 acc 76.953 (73.120)	Top-5 acc 91.797 (89.496)	lr 0.00000
Train [120][100/3239]	Time 0.176 (0.927)	Data Time 0.001 (0.419)	Loss 2.1575 (2.1458)	Entropy 0.60282 (0.60288)	Top-1 acc 71.875 (73.182)	Top-5 acc 87.500 (89.534)	lr 0.00000
Train [120][110/3239]	Time 0.325 (1.403)	Data Time 0.005 (0.382)	Loss 2.2304 (2.1491)	Entropy 0.60278 (0.60288)	Top-1 acc 71.094 (73.029)	Top-5 acc 87.109 (89.478)	lr 0.00000
Train [120][120/3239]	Time 3.308 (1.343)	Data Time 0.003 (0.351)	Loss 2.0446 (2.1543)	Entropy 0.60278 (0.60287)	Top-1 acc 77.344 (72.950)	Top-5 acc 89.453 (89.340)	lr 0.00000
Train [120][130/3239]	Time 0.239 (1.260)	Data Time 0.002 (0.324)	Loss 2.0797 (2.1550)	Entropy 0.60280 (0.60286)	Top-1 acc 75.391 (72.960)	Top-5 acc 89.844 (89.304)	lr 0.00000
Train [120][140/3239]	Time 0.263 (1.206)	Data Time 0.002 (0.301)	Loss 2.3373 (2.1542)	Entropy 0.60279 (0.60286)	Top-1 acc 64.453 (72.944)	Top-5 acc 87.500 (89.312)	lr 0.00000
Train [120][150/3239]	Time 0.233 (1.159)	Data Time 0.002 (0.281)	Loss 2.1714 (2.1575)	Entropy 0.60277 (0.60285)	Top-1 acc 71.094 (72.822)	Top-5 acc 88.281 (89.218)	lr 0.00000
Train [120][160/3239]	Time 0.350 (1.118)	Data Time 0.001 (0.264)	Loss 2.2946 (2.1566)	Entropy 0.60258 (0.60284)	Top-1 acc 67.578 (72.797)	Top-5 acc 86.719 (89.220)	lr 0.00000
Train [120][170/3239]	Time 0.240 (1.082)	Data Time 0.001 (0.249)	Loss 2.0921 (2.1565)	Entropy 0.60252 (0.60282)	Top-1 acc 75.000 (72.816)	Top-5 acc 89.453 (89.188)	lr 0.00000
Train [120][180/3239]	Time 0.240 (1.050)	Data Time 0.002 (0.235)	Loss 2.0259 (2.1541)	Entropy 0.60253 (0.60280)	Top-1 acc 76.172 (72.909)	Top-5 acc 92.578 (89.216)	lr 0.00000
Train [120][190/3239]	Time 0.250 (1.021)	Data Time 0.001 (0.223)	Loss 2.3114 (2.1523)	Entropy 0.60251 (0.60279)	Top-1 acc 69.922 (72.951)	Top-5 acc 85.547 (89.257)	lr 0.00000
Train [120][200/3239]	Time 0.320 (0.995)	Data Time 0.001 (0.212)	Loss 2.2816 (2.1534)	Entropy 0.60253 (0.60278)	Top-1 acc 69.531 (72.921)	Top-5 acc 85.547 (89.220)	lr 0.00000
Train [120][210/3239]	Time 0.277 (0.971)	Data Time 0.001 (0.202)	Loss 2.0993 (2.1559)	Entropy 0.60252 (0.60276)	Top-1 acc 75.000 (72.838)	Top-5 acc 90.234 (89.168)	lr 0.00000
Train [120][220/3239]	Time 0.284 (0.948)	Data Time 0.001 (0.193)	Loss 2.0533 (2.1556)	Entropy 0.60253 (0.60275)	Top-1 acc 73.047 (72.856)	Top-5 acc 90.625 (89.167)	lr 0.00000
Train [120][230/3239]	Time 2.678 (0.929)	Data Time 0.001 (0.184)	Loss 2.0480 (2.1541)	Entropy 0.60253 (0.60274)	Top-1 acc 76.562 (72.920)	Top-5 acc 91.406 (89.194)	lr 0.00000
Train [120][240/3239]	Time 0.301 (0.902)	Data Time 0.002 (0.177)	Loss 2.1428 (2.1531)	Entropy 0.60250 (0.60273)	Top-1 acc 72.266 (72.977)	Top-5 acc 91.016 (89.191)	lr 0.00000
Train [120][250/3239]	Time 0.235 (0.885)	Data Time 0.001 (0.170)	Loss 2.1477 (2.1542)	Entropy 0.60253 (0.60273)	Top-1 acc 74.219 (72.961)	Top-5 acc 89.062 (89.170)	lr 0.00000
Train [120][260/3239]	Time 0.262 (0.870)	Data Time 0.001 (0.163)	Loss 2.1066 (2.1539)	Entropy 0.60259 (0.60272)	Top-1 acc 74.609 (72.975)	Top-5 acc 89.062 (89.178)	lr 0.00000
Train [120][270/3239]	Time 0.238 (0.856)	Data Time 0.001 (0.157)	Loss 2.1390 (2.1549)	Entropy 0.60249 (0.60271)	Top-1 acc 73.047 (72.914)	Top-5 acc 90.234 (89.207)	lr 0.00000
Train [120][280/3239]	Time 0.238 (0.844)	Data Time 0.002 (0.152)	Loss 2.1841 (2.1537)	Entropy 0.60252 (0.60271)	Top-1 acc 72.656 (72.973)	Top-5 acc 89.062 (89.207)	lr 0.00000
Train [120][290/3239]	Time 0.237 (0.832)	Data Time 0.001 (0.147)	Loss 2.1896 (2.1553)	Entropy 0.60260 (0.60270)	Top-1 acc 72.266 (72.909)	Top-5 acc 85.156 (89.169)	lr 0.00000
Train [120][300/3239]	Time 0.231 (0.821)	Data Time 0.001 (0.142)	Loss 2.2042 (2.1556)	Entropy 0.60262 (0.60270)	Top-1 acc 71.484 (72.889)	Top-5 acc 88.672 (89.165)	lr 0.00000
Train [120][310/3239]	Time 0.285 (0.810)	Data Time 0.001 (0.137)	Loss 2.3092 (2.1557)	Entropy 0.60254 (0.60269)	Top-1 acc 68.750 (72.912)	Top-5 acc 87.109 (89.174)	lr 0.00000
Train [120][320/3239]	Time 0.234 (0.800)	Data Time 0.001 (0.133)	Loss 2.1658 (2.1556)	Entropy 0.60259 (0.60269)	Top-1 acc 74.609 (72.914)	Top-5 acc 89.453 (89.177)	lr 0.00000
Train [120][330/3239]	Time 0.278 (0.790)	Data Time 0.001 (0.129)	Loss 2.3279 (2.1557)	Entropy 0.60258 (0.60269)	Top-1 acc 68.359 (72.918)	Top-5 acc 85.938 (89.175)	lr 0.00000
Train [120][340/3239]	Time 2.591 (0.782)	Data Time 0.001 (0.126)	Loss 2.0166 (2.1552)	Entropy 0.60258 (0.60268)	Top-1 acc 75.391 (72.940)	Top-5 acc 91.016 (89.189)	lr 0.00000
Train [120][350/3239]	Time 0.235 (0.767)	Data Time 0.001 (0.122)	Loss 2.1982 (2.1547)	Entropy 0.60257 (0.60268)	Top-1 acc 73.047 (72.952)	Top-5 acc 87.891 (89.197)	lr 0.00000
Train [120][360/3239]	Time 0.235 (0.759)	Data Time 0.002 (0.119)	Loss 2.1360 (2.1552)	Entropy 0.60260 (0.60268)	Top-1 acc 72.266 (72.914)	Top-5 acc 89.062 (89.196)	lr 0.00000
Train [120][370/3239]	Time 0.238 (0.752)	Data Time 0.001 (0.116)	Loss 2.1638 (2.1543)	Entropy 0.60255 (0.60267)	Top-1 acc 72.656 (72.932)	Top-5 acc 90.625 (89.220)	lr 0.00000
Train [120][380/3239]	Time 0.232 (0.745)	Data Time 0.001 (0.113)	Loss 2.1069 (2.1543)	Entropy 0.60243 (0.60267)	Top-1 acc 72.656 (72.937)	Top-5 acc 91.406 (89.220)	lr 0.00000
Train [120][390/3239]	Time 0.242 (0.738)	Data Time 0.001 (0.110)	Loss 1.9537 (2.1545)	Entropy 0.60240 (0.60266)	Top-1 acc 78.125 (72.933)	Top-5 acc 94.141 (89.249)	lr 0.00000
Train [120][400/3239]	Time 0.249 (0.732)	Data Time 0.001 (0.107)	Loss 2.3801 (2.1552)	Entropy 0.60239 (0.60266)	Top-1 acc 70.312 (72.931)	Top-5 acc 83.594 (89.236)	lr 0.00000
Train [120][410/3239]	Time 0.234 (0.726)	Data Time 0.001 (0.104)	Loss 2.0078 (2.1564)	Entropy 0.60239 (0.60265)	Top-1 acc 73.828 (72.871)	Top-5 acc 90.625 (89.219)	lr 0.00000
Train [120][420/3239]	Time 0.270 (0.721)	Data Time 0.001 (0.102)	Loss 2.1601 (2.1563)	Entropy 0.60237 (0.60264)	Top-1 acc 71.875 (72.876)	Top-5 acc 87.109 (89.203)	lr 0.00000
Train [120][430/3239]	Time 0.252 (0.715)	Data Time 0.008 (0.100)	Loss 2.2381 (2.1554)	Entropy 0.60235 (0.60264)	Top-1 acc 70.312 (72.913)	Top-5 acc 89.453 (89.218)	lr 0.00000
Train [120][440/3239]	Time 0.220 (0.710)	Data Time 0.001 (0.097)	Loss 2.2071 (2.1555)	Entropy 0.60229 (0.60263)	Top-1 acc 70.703 (72.901)	Top-5 acc 89.062 (89.219)	lr 0.00000
Train [120][450/3239]	Time 2.713 (0.705)	Data Time 0.001 (0.095)	Loss 2.1213 (2.1547)	Entropy 0.60229 (0.60262)	Top-1 acc 70.703 (72.923)	Top-5 acc 89.453 (89.243)	lr 0.00000
Train [120][460/3239]	Time 0.229 (0.695)	Data Time 0.001 (0.093)	Loss 2.2881 (2.1535)	Entropy 0.60225 (0.60261)	Top-1 acc 67.969 (72.948)	Top-5 acc 86.719 (89.263)	lr 0.00000
Train [120][470/3239]	Time 0.246 (0.690)	Data Time 0.001 (0.091)	Loss 2.1551 (2.1532)	Entropy 0.60224 (0.60261)	Top-1 acc 73.047 (72.954)	Top-5 acc 88.672 (89.275)	lr 0.00000
Train [120][480/3239]	Time 0.272 (0.803)	Data Time 0.002 (0.089)	Loss 1.9798 (2.1530)	Entropy 0.60221 (0.60260)	Top-1 acc 80.469 (72.966)	Top-5 acc 92.188 (89.279)	lr 0.00000
Train [120][490/3239]	Time 0.350 (0.797)	Data Time 0.003 (0.088)	Loss 2.2683 (2.1527)	Entropy 0.60215 (0.60259)	Top-1 acc 71.484 (72.974)	Top-5 acc 87.109 (89.285)	lr 0.00000
Train [120][500/3239]	Time 0.246 (0.791)	Data Time 0.002 (0.086)	Loss 2.2608 (2.1528)	Entropy 0.60215 (0.60258)	Top-1 acc 71.094 (72.961)	Top-5 acc 86.719 (89.304)	lr 0.00000
Train [120][510/3239]	Time 0.237 (0.785)	Data Time 0.001 (0.084)	Loss 2.2520 (2.1523)	Entropy 0.60208 (0.60257)	Top-1 acc 67.578 (72.944)	Top-5 acc 86.719 (89.317)	lr 0.00000
Train [120][520/3239]	Time 0.234 (0.779)	Data Time 0.001 (0.083)	Loss 2.2770 (2.1526)	Entropy 0.60201 (0.60256)	Top-1 acc 69.531 (72.940)	Top-5 acc 87.109 (89.316)	lr 0.00000
Train [120][530/3239]	Time 0.380 (0.774)	Data Time 0.001 (0.081)	Loss 2.2254 (2.1532)	Entropy 0.60199 (0.60255)	Top-1 acc 71.875 (72.925)	Top-5 acc 88.672 (89.316)	lr 0.00000
Train [120][540/3239]	Time 0.238 (0.768)	Data Time 0.001 (0.080)	Loss 2.2256 (2.1536)	Entropy 0.60192 (0.60254)	Top-1 acc 71.484 (72.905)	Top-5 acc 88.672 (89.306)	lr 0.00000
Train [120][550/3239]	Time 0.261 (0.763)	Data Time 0.001 (0.078)	Loss 2.0952 (2.1532)	Entropy 0.60198 (0.60253)	Top-1 acc 75.000 (72.904)	Top-5 acc 89.453 (89.311)	lr 0.00000
Train [120][560/3239]	Time 2.582 (0.758)	Data Time 0.001 (0.077)	Loss 2.2957 (2.1543)	Entropy 0.60198 (0.60252)	Top-1 acc 64.844 (72.859)	Top-5 acc 88.281 (89.293)	lr 0.00000
Train [120][570/3239]	Time 0.235 (0.749)	Data Time 0.001 (0.076)	Loss 2.2145 (2.1544)	Entropy 0.60198 (0.60251)	Top-1 acc 69.531 (72.852)	Top-5 acc 88.281 (89.288)	lr 0.00000
Train [120][580/3239]	Time 0.242 (0.744)	Data Time 0.001 (0.074)	Loss 2.1522 (2.1536)	Entropy 0.60197 (0.60250)	Top-1 acc 68.750 (72.873)	Top-5 acc 90.234 (89.304)	lr 0.00000
Train [120][590/3239]	Time 0.236 (0.740)	Data Time 0.001 (0.073)	Loss 2.1714 (2.1537)	Entropy 0.60194 (0.60249)	Top-1 acc 74.609 (72.871)	Top-5 acc 89.844 (89.301)	lr 0.00000
Train [120][600/3239]	Time 0.233 (0.736)	Data Time 0.001 (0.072)	Loss 2.0843 (2.1533)	Entropy 0.60194 (0.60248)	Top-1 acc 75.781 (72.880)	Top-5 acc 90.625 (89.310)	lr 0.00000
Train [120][610/3239]	Time 0.239 (0.731)	Data Time 0.001 (0.071)	Loss 2.0433 (2.1534)	Entropy 0.60179 (0.60247)	Top-1 acc 73.438 (72.873)	Top-5 acc 90.625 (89.298)	lr 0.00000
Train [120][620/3239]	Time 0.232 (0.728)	Data Time 0.002 (0.070)	Loss 2.2851 (2.1529)	Entropy 0.60182 (0.60246)	Top-1 acc 70.312 (72.877)	Top-5 acc 88.281 (89.316)	lr 0.00000
Train [120][630/3239]	Time 0.233 (0.724)	Data Time 0.001 (0.069)	Loss 2.0052 (2.1527)	Entropy 0.60181 (0.60245)	Top-1 acc 78.125 (72.886)	Top-5 acc 91.406 (89.321)	lr 0.00000
Train [120][640/3239]	Time 0.229 (0.720)	Data Time 0.001 (0.068)	Loss 2.1605 (2.1523)	Entropy 0.60180 (0.60244)	Top-1 acc 71.484 (72.885)	Top-5 acc 89.453 (89.331)	lr 0.00000
Train [120][650/3239]	Time 0.240 (0.717)	Data Time 0.001 (0.067)	Loss 2.0626 (2.1521)	Entropy 0.60167 (0.60243)	Top-1 acc 72.266 (72.891)	Top-5 acc 91.406 (89.333)	lr 0.00000
Train [120][660/3239]	Time 0.286 (0.713)	Data Time 0.002 (0.066)	Loss 2.1812 (2.1519)	Entropy 0.60170 (0.60242)	Top-1 acc 71.484 (72.891)	Top-5 acc 89.062 (89.331)	lr 0.00000
Train [120][670/3239]	Time 2.523 (0.710)	Data Time 0.001 (0.065)	Loss 2.0597 (2.1514)	Entropy 0.60170 (0.60241)	Top-1 acc 75.000 (72.908)	Top-5 acc 90.625 (89.342)	lr 0.00000
Train [120][680/3239]	Time 0.252 (0.703)	Data Time 0.001 (0.064)	Loss 2.0480 (2.1506)	Entropy 0.60164 (0.60240)	Top-1 acc 74.219 (72.924)	Top-5 acc 90.625 (89.360)	lr 0.00000
Train [120][690/3239]	Time 0.231 (0.700)	Data Time 0.001 (0.063)	Loss 2.0751 (2.1501)	Entropy 0.60159 (0.60239)	Top-1 acc 75.391 (72.939)	Top-5 acc 91.797 (89.375)	lr 0.00000
Train [120][700/3239]	Time 0.316 (0.697)	Data Time 0.001 (0.062)	Loss 2.2825 (2.1500)	Entropy 0.60164 (0.60238)	Top-1 acc 68.359 (72.951)	Top-5 acc 87.500 (89.376)	lr 0.00000
Train [120][710/3239]	Time 0.235 (0.694)	Data Time 0.001 (0.061)	Loss 2.2388 (2.1504)	Entropy 0.60160 (0.60236)	Top-1 acc 69.922 (72.944)	Top-5 acc 89.062 (89.371)	lr 0.00000
Train [120][720/3239]	Time 0.232 (0.690)	Data Time 0.001 (0.060)	Loss 2.2802 (2.1511)	Entropy 0.60155 (0.60235)	Top-1 acc 70.312 (72.933)	Top-5 acc 87.109 (89.358)	lr 0.00000
Train [120][730/3239]	Time 0.229 (0.688)	Data Time 0.001 (0.059)	Loss 2.2796 (2.1505)	Entropy 0.60152 (0.60234)	Top-1 acc 68.750 (72.957)	Top-5 acc 88.281 (89.367)	lr 0.00000
Train [120][740/3239]	Time 0.373 (0.685)	Data Time 0.001 (0.059)	Loss 2.2713 (2.1504)	Entropy 0.60144 (0.60233)	Top-1 acc 67.188 (72.951)	Top-5 acc 87.891 (89.370)	lr 0.00000
Train [120][750/3239]	Time 0.221 (0.683)	Data Time 0.001 (0.058)	Loss 2.1681 (2.1503)	Entropy 0.60145 (0.60232)	Top-1 acc 74.219 (72.959)	Top-5 acc 90.234 (89.376)	lr 0.00000
Train [120][760/3239]	Time 0.247 (0.680)	Data Time 0.001 (0.057)	Loss 2.1988 (2.1505)	Entropy 0.60135 (0.60231)	Top-1 acc 70.703 (72.965)	Top-5 acc 87.500 (89.376)	lr 0.00000
Train [120][770/3239]	Time 0.247 (0.677)	Data Time 0.001 (0.056)	Loss 2.2012 (2.1506)	Entropy 0.60126 (0.60229)	Top-1 acc 70.703 (72.951)	Top-5 acc 89.453 (89.385)	lr 0.00000
Train [120][780/3239]	Time 2.695 (0.675)	Data Time 0.001 (0.056)	Loss 2.1476 (2.1505)	Entropy 0.60126 (0.60228)	Top-1 acc 71.875 (72.942)	Top-5 acc 90.625 (89.388)	lr 0.00000
Train [120][790/3239]	Time 0.230 (0.669)	Data Time 0.001 (0.055)	Loss 2.3715 (2.1506)	Entropy 0.60125 (0.60227)	Top-1 acc 67.969 (72.945)	Top-5 acc 86.719 (89.380)	lr 0.00000
Train [120][800/3239]	Time 0.241 (0.667)	Data Time 0.001 (0.054)	Loss 2.1565 (2.1509)	Entropy 0.60132 (0.60226)	Top-1 acc 71.875 (72.930)	Top-5 acc 89.453 (89.379)	lr 0.00000
Train [120][810/3239]	Time 0.224 (0.665)	Data Time 0.001 (0.054)	Loss 2.0446 (2.1505)	Entropy 0.60123 (0.60224)	Top-1 acc 77.344 (72.940)	Top-5 acc 89.062 (89.388)	lr 0.00000
Train [120][820/3239]	Time 0.327 (0.663)	Data Time 0.001 (0.053)	Loss 2.1752 (2.1503)	Entropy 0.60114 (0.60223)	Top-1 acc 74.219 (72.940)	Top-5 acc 87.891 (89.389)	lr 0.00000
Train [120][830/3239]	Time 0.235 (0.661)	Data Time 0.001 (0.052)	Loss 2.0726 (2.1506)	Entropy 0.60099 (0.60222)	Top-1 acc 74.609 (72.924)	Top-5 acc 90.234 (89.379)	lr 0.00000
Train [120][840/3239]	Time 0.364 (0.727)	Data Time 0.003 (0.052)	Loss 2.0976 (2.1504)	Entropy 0.60089 (0.60220)	Top-1 acc 77.734 (72.923)	Top-5 acc 90.234 (89.386)	lr 0.00000
Train [120][850/3239]	Time 0.248 (0.725)	Data Time 0.002 (0.051)	Loss 2.1901 (2.1503)	Entropy 0.60082 (0.60219)	Top-1 acc 73.438 (72.925)	Top-5 acc 88.672 (89.387)	lr 0.00000
Train [120][860/3239]	Time 0.336 (0.722)	Data Time 0.002 (0.051)	Loss 2.2341 (2.1501)	Entropy 0.60084 (0.60217)	Top-1 acc 71.484 (72.931)	Top-5 acc 89.062 (89.390)	lr 0.00000
Train [120][870/3239]	Time 0.236 (0.720)	Data Time 0.001 (0.050)	Loss 2.1970 (2.1506)	Entropy 0.60081 (0.60215)	Top-1 acc 71.094 (72.918)	Top-5 acc 87.500 (89.384)	lr 0.00000
Train [120][880/3239]	Time 0.272 (0.717)	Data Time 0.001 (0.050)	Loss 2.1655 (2.1506)	Entropy 0.60077 (0.60214)	Top-1 acc 74.219 (72.915)	Top-5 acc 88.281 (89.384)	lr 0.00000
Train [120][890/3239]	Time 2.665 (0.715)	Data Time 0.001 (0.049)	Loss 2.1170 (2.1506)	Entropy 0.60077 (0.60212)	Top-1 acc 73.438 (72.918)	Top-5 acc 90.625 (89.382)	lr 0.00000
Train [120][900/3239]	Time 0.232 (0.709)	Data Time 0.001 (0.049)	Loss 2.1364 (2.1505)	Entropy 0.60075 (0.60211)	Top-1 acc 75.391 (72.923)	Top-5 acc 88.672 (89.385)	lr 0.00000
Train [120][910/3239]	Time 0.239 (0.707)	Data Time 0.001 (0.048)	Loss 2.2618 (2.1504)	Entropy 0.60078 (0.60209)	Top-1 acc 72.656 (72.924)	Top-5 acc 85.938 (89.386)	lr 0.00000
Train [120][920/3239]	Time 0.240 (0.705)	Data Time 0.001 (0.048)	Loss 2.1791 (2.1506)	Entropy 0.60082 (0.60208)	Top-1 acc 71.094 (72.915)	Top-5 acc 88.672 (89.379)	lr 0.00000
Train [120][930/3239]	Time 0.229 (0.702)	Data Time 0.001 (0.047)	Loss 2.1134 (2.1507)	Entropy 0.60082 (0.60207)	Top-1 acc 73.047 (72.903)	Top-5 acc 89.844 (89.378)	lr 0.00000
Train [120][940/3239]	Time 0.248 (0.700)	Data Time 0.002 (0.047)	Loss 2.1711 (2.1507)	Entropy 0.60084 (0.60205)	Top-1 acc 70.312 (72.895)	Top-5 acc 90.234 (89.375)	lr 0.00000
Train [120][950/3239]	Time 0.239 (0.698)	Data Time 0.001 (0.046)	Loss 2.1178 (2.1502)	Entropy 0.60081 (0.60204)	Top-1 acc 74.219 (72.914)	Top-5 acc 90.625 (89.383)	lr 0.00000
Train [120][960/3239]	Time 0.225 (0.695)	Data Time 0.001 (0.046)	Loss 2.2327 (2.1505)	Entropy 0.60079 (0.60203)	Top-1 acc 71.875 (72.907)	Top-5 acc 85.156 (89.373)	lr 0.00000
Train [120][970/3239]	Time 0.237 (0.693)	Data Time 0.002 (0.045)	Loss 2.1389 (2.1506)	Entropy 0.60073 (0.60201)	Top-1 acc 71.875 (72.908)	Top-5 acc 87.891 (89.370)	lr 0.00000
Train [120][980/3239]	Time 0.271 (0.691)	Data Time 0.002 (0.045)	Loss 2.2569 (2.1509)	Entropy 0.60063 (0.60200)	Top-1 acc 70.312 (72.905)	Top-5 acc 86.719 (89.368)	lr 0.00000
Train [120][990/3239]	Time 0.331 (0.689)	Data Time 0.001 (0.044)	Loss 2.0823 (2.1510)	Entropy 0.60063 (0.60199)	Top-1 acc 74.219 (72.898)	Top-5 acc 88.672 (89.361)	lr 0.00000
Train [120][1000/3239]	Time 2.487 (0.687)	Data Time 0.001 (0.044)	Loss 2.0228 (2.1506)	Entropy 0.60063 (0.60197)	Top-1 acc 76.562 (72.915)	Top-5 acc 91.406 (89.369)	lr 0.00000
Train [120][1010/3239]	Time 0.238 (0.683)	Data Time 0.001 (0.044)	Loss 2.3306 (2.1508)	Entropy 0.60056 (0.60196)	Top-1 acc 68.359 (72.914)	Top-5 acc 86.328 (89.365)	lr 0.00000
Train [120][1020/3239]	Time 0.238 (0.681)	Data Time 0.001 (0.043)	Loss 2.1841 (2.1508)	Entropy 0.60058 (0.60195)	Top-1 acc 69.531 (72.910)	Top-5 acc 90.625 (89.365)	lr 0.00000
Train [120][1030/3239]	Time 0.244 (0.679)	Data Time 0.001 (0.043)	Loss 2.0013 (2.1507)	Entropy 0.60059 (0.60193)	Top-1 acc 76.562 (72.912)	Top-5 acc 92.969 (89.364)	lr 0.00000
Train [120][1040/3239]	Time 0.239 (0.677)	Data Time 0.001 (0.042)	Loss 2.2515 (2.1505)	Entropy 0.60056 (0.60192)	Top-1 acc 71.875 (72.918)	Top-5 acc 88.672 (89.370)	lr 0.00000
Train [120][1050/3239]	Time 0.251 (0.675)	Data Time 0.001 (0.042)	Loss 2.0960 (2.1505)	Entropy 0.60057 (0.60191)	Top-1 acc 75.000 (72.918)	Top-5 acc 89.844 (89.367)	lr 0.00000
Train [120][1060/3239]	Time 0.228 (0.674)	Data Time 0.001 (0.042)	Loss 2.1905 (2.1503)	Entropy 0.60054 (0.60189)	Top-1 acc 72.656 (72.925)	Top-5 acc 87.891 (89.370)	lr 0.00000
Train [120][1070/3239]	Time 0.318 (0.672)	Data Time 0.001 (0.041)	Loss 1.9805 (2.1499)	Entropy 0.60051 (0.60188)	Top-1 acc 77.344 (72.935)	Top-5 acc 92.578 (89.378)	lr 0.00000
Train [120][1080/3239]	Time 0.231 (0.670)	Data Time 0.001 (0.041)	Loss 2.2468 (2.1498)	Entropy 0.60039 (0.60187)	Top-1 acc 67.188 (72.932)	Top-5 acc 89.062 (89.378)	lr 0.00000
Train [120][1090/3239]	Time 0.264 (0.669)	Data Time 0.001 (0.040)	Loss 2.1659 (2.1499)	Entropy 0.60026 (0.60186)	Top-1 acc 69.922 (72.923)	Top-5 acc 88.281 (89.371)	lr 0.00000
Train [120][1100/3239]	Time 0.226 (0.667)	Data Time 0.001 (0.040)	Loss 1.9045 (2.1501)	Entropy 0.60016 (0.60184)	Top-1 acc 80.469 (72.912)	Top-5 acc 93.750 (89.367)	lr 0.00000
Train [120][1110/3239]	Time 2.569 (0.665)	Data Time 0.001 (0.040)	Loss 2.0697 (2.1502)	Entropy 0.60016 (0.60183)	Top-1 acc 75.781 (72.902)	Top-5 acc 92.188 (89.366)	lr 0.00000
Train [120][1120/3239]	Time 0.262 (0.661)	Data Time 0.002 (0.039)	Loss 2.2713 (2.1503)	Entropy 0.60004 (0.60181)	Top-1 acc 68.750 (72.897)	Top-5 acc 85.938 (89.361)	lr 0.00000
Train [120][1130/3239]	Time 0.227 (0.660)	Data Time 0.001 (0.039)	Loss 2.2513 (2.1502)	Entropy 0.60005 (0.60179)	Top-1 acc 72.266 (72.897)	Top-5 acc 86.328 (89.363)	lr 0.00000
Train [120][1140/3239]	Time 0.242 (0.658)	Data Time 0.001 (0.039)	Loss 2.0513 (2.1502)	Entropy 0.60008 (0.60178)	Top-1 acc 73.438 (72.894)	Top-5 acc 92.578 (89.363)	lr 0.00000
Train [120][1150/3239]	Time 0.329 (0.657)	Data Time 0.001 (0.038)	Loss 2.0368 (2.1501)	Entropy 0.60013 (0.60176)	Top-1 acc 74.219 (72.899)	Top-5 acc 94.141 (89.365)	lr 0.00000
Train [120][1160/3239]	Time 0.224 (0.655)	Data Time 0.001 (0.038)	Loss 2.0273 (2.1503)	Entropy 0.60002 (0.60175)	Top-1 acc 73.828 (72.895)	Top-5 acc 93.359 (89.368)	lr 0.00000
Train [120][1170/3239]	Time 0.228 (0.654)	Data Time 0.001 (0.038)	Loss 2.0585 (2.1504)	Entropy 0.60002 (0.60173)	Top-1 acc 76.953 (72.909)	Top-5 acc 89.062 (89.358)	lr 0.00000
Train [120][1180/3239]	Time 0.223 (0.652)	Data Time 0.001 (0.037)	Loss 2.1770 (2.1505)	Entropy 0.60006 (0.60172)	Top-1 acc 72.266 (72.906)	Top-5 acc 89.062 (89.356)	lr 0.00000
Train [120][1190/3239]	Time 0.230 (0.651)	Data Time 0.001 (0.037)	Loss 2.4219 (2.1507)	Entropy 0.59999 (0.60171)	Top-1 acc 66.406 (72.901)	Top-5 acc 85.156 (89.356)	lr 0.00000
Train [120][1200/3239]	Time 0.241 (0.697)	Data Time 0.002 (0.037)	Loss 2.1315 (2.1505)	Entropy 0.59996 (0.60169)	Top-1 acc 73.828 (72.903)	Top-5 acc 89.062 (89.353)	lr 0.00000
Train [120][1210/3239]	Time 0.292 (0.695)	Data Time 0.003 (0.037)	Loss 2.2476 (2.1503)	Entropy 0.60000 (0.60168)	Top-1 acc 69.141 (72.908)	Top-5 acc 87.109 (89.354)	lr 0.00000
Train [120][1220/3239]	Time 2.581 (0.694)	Data Time 0.002 (0.036)	Loss 2.0521 (2.1500)	Entropy 0.60000 (0.60166)	Top-1 acc 76.172 (72.916)	Top-5 acc 91.797 (89.360)	lr 0.00000
Train [120][1230/3239]	Time 0.250 (0.690)	Data Time 0.002 (0.036)	Loss 2.2540 (2.1506)	Entropy 0.59994 (0.60165)	Top-1 acc 72.656 (72.912)	Top-5 acc 85.547 (89.348)	lr 0.00000
Train [120][1240/3239]	Time 0.238 (0.689)	Data Time 0.001 (0.036)	Loss 2.1599 (2.1510)	Entropy 0.59990 (0.60164)	Top-1 acc 71.875 (72.907)	Top-5 acc 88.281 (89.338)	lr 0.00000
Train [120][1250/3239]	Time 0.228 (0.687)	Data Time 0.001 (0.036)	Loss 2.2112 (2.1509)	Entropy 0.59982 (0.60162)	Top-1 acc 72.656 (72.913)	Top-5 acc 87.500 (89.335)	lr 0.00000
Train [120][1260/3239]	Time 0.242 (0.685)	Data Time 0.001 (0.035)	Loss 2.1004 (2.1507)	Entropy 0.59979 (0.60161)	Top-1 acc 76.562 (72.923)	Top-5 acc 88.281 (89.339)	lr 0.00000
Train [120][1270/3239]	Time 0.233 (0.684)	Data Time 0.001 (0.035)	Loss 2.2161 (2.1508)	Entropy 0.59978 (0.60159)	Top-1 acc 73.828 (72.926)	Top-5 acc 88.281 (89.339)	lr 0.00000
Train [120][1280/3239]	Time 0.237 (0.682)	Data Time 0.001 (0.035)	Loss 2.2387 (2.1509)	Entropy 0.59985 (0.60158)	Top-1 acc 70.703 (72.925)	Top-5 acc 87.500 (89.336)	lr 0.00000
Train [120][1290/3239]	Time 0.237 (0.680)	Data Time 0.002 (0.034)	Loss 2.1213 (2.1507)	Entropy 0.59992 (0.60157)	Top-1 acc 75.000 (72.928)	Top-5 acc 88.672 (89.339)	lr 0.00000
Train [120][1300/3239]	Time 0.232 (0.679)	Data Time 0.001 (0.034)	Loss 2.0133 (2.1506)	Entropy 0.59995 (0.60155)	Top-1 acc 77.344 (72.933)	Top-5 acc 92.188 (89.343)	lr 0.00000
Train [120][1310/3239]	Time 0.236 (0.677)	Data Time 0.001 (0.034)	Loss 2.2471 (2.1506)	Entropy 0.59999 (0.60154)	Top-1 acc 69.531 (72.938)	Top-5 acc 88.281 (89.342)	lr 0.00000
Train [120][1320/3239]	Time 0.260 (0.676)	Data Time 0.001 (0.034)	Loss 2.0335 (2.1505)	Entropy 0.59992 (0.60153)	Top-1 acc 76.953 (72.937)	Top-5 acc 91.406 (89.343)	lr 0.00000
Train [120][1330/3239]	Time 2.604 (0.675)	Data Time 0.001 (0.033)	Loss 2.2949 (2.1506)	Entropy 0.59992 (0.60152)	Top-1 acc 69.141 (72.933)	Top-5 acc 85.547 (89.342)	lr 0.00000
Train [120][1340/3239]	Time 0.231 (0.671)	Data Time 0.001 (0.033)	Loss 2.1181 (2.1502)	Entropy 0.59991 (0.60150)	Top-1 acc 72.266 (72.944)	Top-5 acc 91.016 (89.349)	lr 0.00000
Train [120][1350/3239]	Time 0.261 (0.670)	Data Time 0.001 (0.033)	Loss 2.0433 (2.1499)	Entropy 0.59981 (0.60149)	Top-1 acc 76.172 (72.954)	Top-5 acc 91.016 (89.355)	lr 0.00000
Train [120][1360/3239]	Time 0.231 (0.669)	Data Time 0.002 (0.033)	Loss 2.0717 (2.1500)	Entropy 0.59990 (0.60148)	Top-1 acc 75.391 (72.958)	Top-5 acc 90.625 (89.359)	lr 0.00000
Train [120][1370/3239]	Time 0.229 (0.667)	Data Time 0.001 (0.033)	Loss 2.1003 (2.1500)	Entropy 0.59991 (0.60147)	Top-1 acc 76.953 (72.963)	Top-5 acc 89.062 (89.357)	lr 0.00000
Train [120][1380/3239]	Time 0.227 (0.666)	Data Time 0.002 (0.032)	Loss 1.9731 (2.1500)	Entropy 0.59982 (0.60146)	Top-1 acc 78.906 (72.960)	Top-5 acc 92.969 (89.366)	lr 0.00000
Train [120][1390/3239]	Time 0.245 (0.665)	Data Time 0.001 (0.032)	Loss 2.0841 (2.1502)	Entropy 0.59979 (0.60145)	Top-1 acc 73.828 (72.951)	Top-5 acc 88.281 (89.359)	lr 0.00000
Train [120][1400/3239]	Time 0.236 (0.663)	Data Time 0.001 (0.032)	Loss 2.0344 (2.1497)	Entropy 0.59979 (0.60143)	Top-1 acc 75.391 (72.963)	Top-5 acc 90.234 (89.366)	lr 0.00000
Train [120][1410/3239]	Time 0.241 (0.662)	Data Time 0.001 (0.032)	Loss 2.1327 (2.1496)	Entropy 0.59970 (0.60142)	Top-1 acc 73.047 (72.971)	Top-5 acc 89.844 (89.366)	lr 0.00000
Train [120][1420/3239]	Time 0.223 (0.661)	Data Time 0.002 (0.031)	Loss 2.3537 (2.1500)	Entropy 0.59956 (0.60141)	Top-1 acc 66.797 (72.959)	Top-5 acc 86.719 (89.363)	lr 0.00000
Train [120][1430/3239]	Time 0.265 (0.660)	Data Time 0.001 (0.031)	Loss 2.1401 (2.1500)	Entropy 0.59964 (0.60140)	Top-1 acc 72.656 (72.955)	Top-5 acc 89.844 (89.362)	lr 0.00000
Train [120][1440/3239]	Time 2.686 (0.659)	Data Time 0.001 (0.031)	Loss 2.2102 (2.1500)	Entropy 0.59964 (0.60138)	Top-1 acc 69.531 (72.953)	Top-5 acc 88.672 (89.368)	lr 0.00000
Train [120][1450/3239]	Time 0.260 (0.656)	Data Time 0.002 (0.031)	Loss 2.1665 (2.1502)	Entropy 0.59957 (0.60137)	Top-1 acc 69.141 (72.948)	Top-5 acc 90.625 (89.366)	lr 0.00000
Train [120][1460/3239]	Time 0.248 (0.655)	Data Time 0.002 (0.031)	Loss 2.2100 (2.1504)	Entropy 0.59962 (0.60136)	Top-1 acc 69.922 (72.946)	Top-5 acc 88.281 (89.364)	lr 0.00000
Train [120][1470/3239]	Time 0.230 (0.653)	Data Time 0.001 (0.030)	Loss 2.3095 (2.1504)	Entropy 0.59972 (0.60135)	Top-1 acc 67.188 (72.943)	Top-5 acc 87.891 (89.367)	lr 0.00000
Train [120][1480/3239]	Time 0.325 (0.652)	Data Time 0.001 (0.030)	Loss 2.0381 (2.1503)	Entropy 0.59974 (0.60134)	Top-1 acc 76.562 (72.954)	Top-5 acc 92.578 (89.367)	lr 0.00000
Train [120][1490/3239]	Time 0.228 (0.651)	Data Time 0.001 (0.030)	Loss 2.2799 (2.1506)	Entropy 0.59969 (0.60133)	Top-1 acc 69.922 (72.948)	Top-5 acc 85.938 (89.361)	lr 0.00000
Train [120][1500/3239]	Time 0.225 (0.650)	Data Time 0.002 (0.030)	Loss 1.9252 (2.1504)	Entropy 0.59963 (0.60132)	Top-1 acc 77.344 (72.956)	Top-5 acc 92.578 (89.366)	lr 0.00000
Train [120][1510/3239]	Time 0.236 (0.649)	Data Time 0.001 (0.030)	Loss 2.0277 (2.1501)	Entropy 0.59948 (0.60130)	Top-1 acc 75.000 (72.964)	Top-5 acc 91.797 (89.372)	lr 0.00000
Train [120][1520/3239]	Time 0.239 (0.648)	Data Time 0.001 (0.030)	Loss 2.3377 (2.1503)	Entropy 0.59947 (0.60129)	Top-1 acc 70.703 (72.966)	Top-5 acc 85.938 (89.370)	lr 0.00000
Train [120][1530/3239]	Time 0.232 (0.647)	Data Time 0.001 (0.029)	Loss 2.0872 (2.1502)	Entropy 0.59946 (0.60128)	Top-1 acc 74.219 (72.972)	Top-5 acc 90.625 (89.370)	lr 0.00000
Train [120][1540/3239]	Time 0.302 (0.646)	Data Time 0.002 (0.029)	Loss 2.1032 (2.1499)	Entropy 0.59944 (0.60127)	Top-1 acc 73.438 (72.979)	Top-5 acc 89.844 (89.374)	lr 0.00000
Train [120][1550/3239]	Time 2.574 (0.645)	Data Time 0.001 (0.029)	Loss 2.3318 (2.1499)	Entropy 0.59944 (0.60126)	Top-1 acc 69.922 (72.983)	Top-5 acc 85.938 (89.374)	lr 0.00000
Train [120][1560/3239]	Time 0.235 (0.642)	Data Time 0.001 (0.029)	Loss 2.2249 (2.1501)	Entropy 0.59937 (0.60124)	Top-1 acc 71.484 (72.978)	Top-5 acc 87.891 (89.370)	lr 0.00000
Train [120][1570/3239]	Time 0.309 (0.674)	Data Time 0.004 (0.029)	Loss 2.3609 (2.1500)	Entropy 0.59920 (0.60123)	Top-1 acc 69.922 (72.987)	Top-5 acc 85.156 (89.374)	lr 0.00000
Train [120][1580/3239]	Time 0.213 (0.673)	Data Time 0.001 (0.028)	Loss 2.1394 (2.1501)	Entropy 0.59925 (0.60122)	Top-1 acc 73.047 (72.980)	Top-5 acc 89.062 (89.368)	lr 0.00000
Train [120][1590/3239]	Time 0.229 (0.671)	Data Time 0.001 (0.028)	Loss 2.3471 (2.1501)	Entropy 0.59924 (0.60121)	Top-1 acc 68.359 (72.984)	Top-5 acc 83.984 (89.367)	lr 0.00000
Train [120][1600/3239]	Time 0.225 (0.670)	Data Time 0.001 (0.028)	Loss 2.1231 (2.1499)	Entropy 0.59923 (0.60119)	Top-1 acc 71.484 (72.993)	Top-5 acc 90.234 (89.372)	lr 0.00000
Train [120][1610/3239]	Time 0.241 (0.669)	Data Time 0.002 (0.028)	Loss 2.0898 (2.1499)	Entropy 0.59922 (0.60118)	Top-1 acc 78.125 (72.999)	Top-5 acc 90.625 (89.372)	lr 0.00000
Train [120][1620/3239]	Time 0.226 (0.668)	Data Time 0.001 (0.028)	Loss 2.2924 (2.1499)	Entropy 0.59916 (0.60117)	Top-1 acc 68.359 (72.992)	Top-5 acc 86.719 (89.375)	lr 0.00000
Train [120][1630/3239]	Time 0.245 (0.667)	Data Time 0.001 (0.028)	Loss 2.1450 (2.1501)	Entropy 0.59930 (0.60116)	Top-1 acc 73.047 (72.989)	Top-5 acc 88.672 (89.375)	lr 0.00000
Train [120][1640/3239]	Time 0.238 (0.666)	Data Time 0.001 (0.027)	Loss 1.9812 (2.1502)	Entropy 0.59931 (0.60115)	Top-1 acc 78.516 (72.977)	Top-5 acc 91.406 (89.374)	lr 0.00000
Train [120][1650/3239]	Time 0.262 (0.665)	Data Time 0.001 (0.027)	Loss 2.3195 (2.1504)	Entropy 0.59927 (0.60114)	Top-1 acc 68.359 (72.973)	Top-5 acc 87.500 (89.372)	lr 0.00000
Train [120][1660/3239]	Time 2.538 (0.664)	Data Time 0.001 (0.027)	Loss 2.2623 (2.1505)	Entropy 0.59927 (0.60112)	Top-1 acc 66.406 (72.971)	Top-5 acc 87.891 (89.371)	lr 0.00000
Train [120][1670/3239]	Time 0.240 (0.661)	Data Time 0.001 (0.027)	Loss 2.2207 (2.1506)	Entropy 0.59929 (0.60111)	Top-1 acc 73.438 (72.966)	Top-5 acc 89.453 (89.370)	lr 0.00000
Train [120][1680/3239]	Time 0.225 (0.660)	Data Time 0.001 (0.027)	Loss 2.0547 (2.1505)	Entropy 0.59928 (0.60110)	Top-1 acc 76.172 (72.971)	Top-5 acc 89.844 (89.372)	lr 0.00000
Train [120][1690/3239]	Time 0.329 (0.659)	Data Time 0.001 (0.027)	Loss 2.1714 (2.1505)	Entropy 0.59926 (0.60109)	Top-1 acc 73.438 (72.971)	Top-5 acc 89.453 (89.377)	lr 0.00000
Train [120][1700/3239]	Time 0.239 (0.658)	Data Time 0.001 (0.027)	Loss 2.0894 (2.1506)	Entropy 0.59921 (0.60108)	Top-1 acc 73.828 (72.966)	Top-5 acc 89.453 (89.373)	lr 0.00000
Train [120][1710/3239]	Time 0.228 (0.657)	Data Time 0.001 (0.026)	Loss 2.0888 (2.1503)	Entropy 0.59923 (0.60107)	Top-1 acc 73.047 (72.970)	Top-5 acc 91.406 (89.380)	lr 0.00000
Train [120][1720/3239]	Time 0.233 (0.656)	Data Time 0.001 (0.026)	Loss 2.1036 (2.1503)	Entropy 0.59916 (0.60106)	Top-1 acc 72.266 (72.968)	Top-5 acc 92.188 (89.381)	lr 0.00000
Train [120][1730/3239]	Time 0.329 (0.655)	Data Time 0.001 (0.026)	Loss 2.1296 (2.1505)	Entropy 0.59915 (0.60105)	Top-1 acc 73.047 (72.965)	Top-5 acc 88.672 (89.379)	lr 0.00000
Train [120][1740/3239]	Time 0.237 (0.654)	Data Time 0.001 (0.026)	Loss 2.2074 (2.1504)	Entropy 0.59914 (0.60104)	Top-1 acc 73.828 (72.964)	Top-5 acc 87.891 (89.378)	lr 0.00000
Train [120][1750/3239]	Time 0.239 (0.653)	Data Time 0.001 (0.026)	Loss 2.1515 (2.1506)	Entropy 0.59904 (0.60103)	Top-1 acc 73.828 (72.962)	Top-5 acc 91.016 (89.379)	lr 0.00000
Train [120][1760/3239]	Time 0.243 (0.652)	Data Time 0.001 (0.026)	Loss 2.1893 (2.1506)	Entropy 0.59900 (0.60101)	Top-1 acc 71.875 (72.961)	Top-5 acc 89.062 (89.378)	lr 0.00000
Train [120][1770/3239]	Time 2.643 (0.651)	Data Time 0.001 (0.026)	Loss 2.2502 (2.1507)	Entropy 0.59900 (0.60100)	Top-1 acc 72.266 (72.959)	Top-5 acc 86.328 (89.375)	lr 0.00000
Train [120][1780/3239]	Time 0.221 (0.649)	Data Time 0.001 (0.025)	Loss 2.0919 (2.1505)	Entropy 0.59900 (0.60099)	Top-1 acc 75.781 (72.968)	Top-5 acc 90.234 (89.375)	lr 0.00000
Train [120][1790/3239]	Time 0.232 (0.648)	Data Time 0.001 (0.025)	Loss 2.1157 (2.1506)	Entropy 0.59902 (0.60098)	Top-1 acc 71.484 (72.962)	Top-5 acc 91.797 (89.377)	lr 0.00000
Train [120][1800/3239]	Time 0.234 (0.647)	Data Time 0.001 (0.025)	Loss 2.0696 (2.1507)	Entropy 0.59902 (0.60097)	Top-1 acc 71.094 (72.957)	Top-5 acc 89.453 (89.376)	lr 0.00000
Train [120][1810/3239]	Time 0.273 (0.646)	Data Time 0.002 (0.025)	Loss 2.0770 (2.1509)	Entropy 0.59890 (0.60096)	Top-1 acc 76.562 (72.954)	Top-5 acc 92.188 (89.376)	lr 0.00000
Train [120][1820/3239]	Time 0.235 (0.645)	Data Time 0.001 (0.025)	Loss 2.0510 (2.1510)	Entropy 0.59896 (0.60095)	Top-1 acc 76.562 (72.954)	Top-5 acc 94.141 (89.379)	lr 0.00000
Train [120][1830/3239]	Time 0.242 (0.644)	Data Time 0.001 (0.025)	Loss 1.9811 (2.1511)	Entropy 0.59892 (0.60094)	Top-1 acc 74.219 (72.952)	Top-5 acc 92.578 (89.378)	lr 0.00000
Train [120][1840/3239]	Time 0.236 (0.643)	Data Time 0.001 (0.025)	Loss 2.1013 (2.1512)	Entropy 0.59893 (0.60093)	Top-1 acc 73.438 (72.952)	Top-5 acc 90.625 (89.375)	lr 0.00000
Train [120][1850/3239]	Time 0.233 (0.642)	Data Time 0.001 (0.025)	Loss 2.2012 (2.1513)	Entropy 0.59886 (0.60091)	Top-1 acc 71.484 (72.946)	Top-5 acc 87.891 (89.375)	lr 0.00000
Train [120][1860/3239]	Time 0.245 (0.641)	Data Time 0.002 (0.024)	Loss 2.0785 (2.1511)	Entropy 0.59887 (0.60090)	Top-1 acc 73.828 (72.948)	Top-5 acc 91.406 (89.380)	lr 0.00000
Train [120][1870/3239]	Time 0.298 (0.641)	Data Time 0.002 (0.024)	Loss 2.1219 (2.1512)	Entropy 0.59882 (0.60089)	Top-1 acc 73.438 (72.948)	Top-5 acc 91.406 (89.374)	lr 0.00000
Train [120][1880/3239]	Time 2.619 (0.640)	Data Time 0.002 (0.024)	Loss 2.3989 (2.1512)	Entropy 0.59882 (0.60088)	Top-1 acc 67.969 (72.948)	Top-5 acc 82.812 (89.372)	lr 0.00000
Train [120][1890/3239]	Time 0.248 (0.638)	Data Time 0.001 (0.024)	Loss 2.0954 (2.1513)	Entropy 0.59876 (0.60087)	Top-1 acc 74.609 (72.948)	Top-5 acc 90.234 (89.369)	lr 0.00000
Train [120][1900/3239]	Time 0.239 (0.637)	Data Time 0.001 (0.024)	Loss 2.2450 (2.1514)	Entropy 0.59870 (0.60086)	Top-1 acc 72.656 (72.947)	Top-5 acc 89.844 (89.368)	lr 0.00000
Train [120][1910/3239]	Time 0.260 (0.636)	Data Time 0.001 (0.024)	Loss 2.1460 (2.1514)	Entropy 0.59866 (0.60085)	Top-1 acc 70.312 (72.949)	Top-5 acc 90.625 (89.367)	lr 0.00000
Train [120][1920/3239]	Time 0.249 (0.636)	Data Time 0.001 (0.024)	Loss 2.2502 (2.1512)	Entropy 0.59863 (0.60084)	Top-1 acc 70.312 (72.954)	Top-5 acc 86.719 (89.369)	lr 0.00000
Train [120][1930/3239]	Time 0.311 (0.665)	Data Time 0.002 (0.024)	Loss 2.1006 (2.1510)	Entropy 0.59862 (0.60082)	Top-1 acc 74.219 (72.963)	Top-5 acc 90.625 (89.369)	lr 0.00000
Train [120][1940/3239]	Time 0.253 (0.664)	Data Time 0.002 (0.023)	Loss 2.1322 (2.1510)	Entropy 0.59866 (0.60081)	Top-1 acc 75.391 (72.966)	Top-5 acc 88.281 (89.366)	lr 0.00000
Train [120][1950/3239]	Time 0.235 (0.663)	Data Time 0.001 (0.023)	Loss 2.0818 (2.1508)	Entropy 0.59858 (0.60080)	Top-1 acc 74.609 (72.970)	Top-5 acc 89.453 (89.368)	lr 0.00000
Train [120][1960/3239]	Time 0.237 (0.662)	Data Time 0.001 (0.023)	Loss 2.1578 (2.1507)	Entropy 0.59865 (0.60079)	Top-1 acc 69.922 (72.971)	Top-5 acc 89.844 (89.370)	lr 0.00000
Train [120][1970/3239]	Time 0.250 (0.662)	Data Time 0.001 (0.023)	Loss 2.0856 (2.1505)	Entropy 0.59853 (0.60078)	Top-1 acc 73.828 (72.977)	Top-5 acc 90.234 (89.372)	lr 0.00000
Train [120][1980/3239]	Time 0.236 (0.661)	Data Time 0.001 (0.023)	Loss 2.0416 (2.1506)	Entropy 0.59849 (0.60077)	Top-1 acc 75.391 (72.973)	Top-5 acc 90.625 (89.372)	lr 0.00000
Train [120][1990/3239]	Time 2.633 (0.660)	Data Time 0.001 (0.023)	Loss 2.1559 (2.1504)	Entropy 0.59849 (0.60076)	Top-1 acc 74.609 (72.981)	Top-5 acc 89.453 (89.376)	lr 0.00000
Train [120][2000/3239]	Time 0.232 (0.658)	Data Time 0.001 (0.023)	Loss 2.0312 (2.1504)	Entropy 0.59848 (0.60075)	Top-1 acc 75.781 (72.985)	Top-5 acc 91.016 (89.375)	lr 0.00000
Train [120][2010/3239]	Time 0.230 (0.657)	Data Time 0.001 (0.023)	Loss 2.1210 (2.1503)	Entropy 0.59841 (0.60073)	Top-1 acc 71.875 (72.985)	Top-5 acc 90.234 (89.379)	lr 0.00000
Train [120][2020/3239]	Time 0.267 (0.656)	Data Time 0.002 (0.023)	Loss 2.2347 (2.1504)	Entropy 0.59818 (0.60072)	Top-1 acc 69.141 (72.980)	Top-5 acc 86.719 (89.377)	lr 0.00000
Train [120][2030/3239]	Time 0.233 (0.655)	Data Time 0.001 (0.023)	Loss 2.1897 (2.1507)	Entropy 0.59814 (0.60071)	Top-1 acc 71.875 (72.974)	Top-5 acc 88.672 (89.374)	lr 0.00000
Train [120][2040/3239]	Time 0.241 (0.654)	Data Time 0.001 (0.022)	Loss 1.9363 (2.1504)	Entropy 0.59810 (0.60070)	Top-1 acc 78.516 (72.980)	Top-5 acc 92.578 (89.378)	lr 0.00000
Train [120][2050/3239]	Time 0.231 (0.653)	Data Time 0.001 (0.022)	Loss 2.2400 (2.1503)	Entropy 0.59808 (0.60068)	Top-1 acc 71.094 (72.982)	Top-5 acc 86.719 (89.380)	lr 0.00000
Train [120][2060/3239]	Time 0.318 (0.653)	Data Time 0.001 (0.022)	Loss 2.1723 (2.1503)	Entropy 0.59812 (0.60067)	Top-1 acc 73.047 (72.991)	Top-5 acc 90.234 (89.380)	lr 0.00000
Train [120][2070/3239]	Time 0.231 (0.652)	Data Time 0.001 (0.022)	Loss 2.1252 (2.1501)	Entropy 0.59808 (0.60066)	Top-1 acc 75.391 (72.996)	Top-5 acc 90.234 (89.384)	lr 0.00000
Train [120][2080/3239]	Time 0.269 (0.651)	Data Time 0.001 (0.022)	Loss 2.1091 (2.1501)	Entropy 0.59817 (0.60065)	Top-1 acc 76.562 (72.999)	Top-5 acc 89.844 (89.386)	lr 0.00000
Train [120][2090/3239]	Time 0.227 (0.650)	Data Time 0.001 (0.022)	Loss 2.1526 (2.1501)	Entropy 0.59822 (0.60063)	Top-1 acc 68.750 (72.997)	Top-5 acc 89.453 (89.388)	lr 0.00000
Train [120][2100/3239]	Time 2.535 (0.650)	Data Time 0.001 (0.022)	Loss 2.0919 (2.1502)	Entropy 0.59822 (0.60062)	Top-1 acc 74.219 (72.993)	Top-5 acc 90.234 (89.387)	lr 0.00000
Train [120][2110/3239]	Time 0.232 (0.648)	Data Time 0.001 (0.022)	Loss 2.1784 (2.1503)	Entropy 0.59816 (0.60061)	Top-1 acc 73.047 (72.992)	Top-5 acc 88.281 (89.385)	lr 0.00000
Train [120][2120/3239]	Time 0.236 (0.647)	Data Time 0.001 (0.022)	Loss 2.0266 (2.1501)	Entropy 0.59807 (0.60060)	Top-1 acc 76.172 (73.000)	Top-5 acc 93.750 (89.390)	lr 0.00000
Train [120][2130/3239]	Time 0.235 (0.646)	Data Time 0.001 (0.022)	Loss 2.1885 (2.1504)	Entropy 0.59804 (0.60059)	Top-1 acc 68.750 (72.987)	Top-5 acc 88.672 (89.386)	lr 0.00000
Train [120][2140/3239]	Time 0.228 (0.645)	Data Time 0.001 (0.021)	Loss 2.1055 (2.1503)	Entropy 0.59804 (0.60058)	Top-1 acc 74.609 (72.990)	Top-5 acc 92.188 (89.384)	lr 0.00000
Train [120][2150/3239]	Time 0.226 (0.645)	Data Time 0.001 (0.021)	Loss 2.2857 (2.1503)	Entropy 0.59808 (0.60056)	Top-1 acc 70.703 (72.989)	Top-5 acc 84.766 (89.383)	lr 0.00000
Train [120][2160/3239]	Time 0.237 (0.644)	Data Time 0.001 (0.021)	Loss 2.1855 (2.1503)	Entropy 0.59818 (0.60055)	Top-1 acc 73.047 (72.991)	Top-5 acc 87.109 (89.384)	lr 0.00000
Train [120][2170/3239]	Time 0.235 (0.643)	Data Time 0.001 (0.021)	Loss 2.0887 (2.1503)	Entropy 0.59811 (0.60054)	Top-1 acc 71.484 (72.997)	Top-5 acc 90.625 (89.383)	lr 0.00000
Train [120][2180/3239]	Time 0.265 (0.642)	Data Time 0.001 (0.021)	Loss 2.0577 (2.1502)	Entropy 0.59817 (0.60053)	Top-1 acc 75.781 (72.998)	Top-5 acc 91.406 (89.386)	lr 0.00000
Train [120][2190/3239]	Time 0.259 (0.642)	Data Time 0.001 (0.021)	Loss 2.1816 (2.1502)	Entropy 0.59812 (0.60052)	Top-1 acc 69.141 (73.000)	Top-5 acc 88.672 (89.382)	lr 0.00000
Train [120][2200/3239]	Time 0.257 (0.641)	Data Time 0.001 (0.021)	Loss 2.1697 (2.1499)	Entropy 0.59804 (0.60051)	Top-1 acc 72.656 (73.006)	Top-5 acc 91.016 (89.387)	lr 0.00000
Train [120][2210/3239]	Time 2.554 (0.640)	Data Time 0.001 (0.021)	Loss 2.1793 (2.1499)	Entropy 0.59804 (0.60050)	Top-1 acc 70.312 (73.004)	Top-5 acc 90.625 (89.388)	lr 0.00000
Train [120][2220/3239]	Time 0.233 (0.638)	Data Time 0.001 (0.021)	Loss 2.1849 (2.1497)	Entropy 0.59802 (0.60049)	Top-1 acc 71.094 (73.011)	Top-5 acc 88.281 (89.391)	lr 0.00000
Train [120][2230/3239]	Time 0.226 (0.638)	Data Time 0.001 (0.021)	Loss 2.0993 (2.1498)	Entropy 0.59799 (0.60048)	Top-1 acc 75.000 (73.012)	Top-5 acc 89.062 (89.390)	lr 0.00000
Train [120][2240/3239]	Time 0.232 (0.637)	Data Time 0.001 (0.021)	Loss 2.0327 (2.1499)	Entropy 0.59790 (0.60046)	Top-1 acc 78.125 (73.012)	Top-5 acc 91.016 (89.390)	lr 0.00000
Train [120][2250/3239]	Time 0.245 (0.636)	Data Time 0.001 (0.020)	Loss 2.2326 (2.1500)	Entropy 0.59785 (0.60045)	Top-1 acc 70.312 (73.008)	Top-5 acc 90.625 (89.391)	lr 0.00000
Train [120][2260/3239]	Time 0.236 (0.636)	Data Time 0.001 (0.020)	Loss 2.0790 (2.1502)	Entropy 0.59779 (0.60044)	Top-1 acc 75.000 (73.002)	Top-5 acc 89.844 (89.389)	lr 0.00000
Train [120][2270/3239]	Time 0.227 (0.635)	Data Time 0.001 (0.020)	Loss 2.2014 (2.1503)	Entropy 0.59776 (0.60043)	Top-1 acc 71.484 (72.998)	Top-5 acc 89.844 (89.387)	lr 0.00000
Train [120][2280/3239]	Time 0.246 (0.634)	Data Time 0.001 (0.020)	Loss 2.2252 (2.1502)	Entropy 0.59762 (0.60042)	Top-1 acc 71.484 (72.998)	Top-5 acc 88.281 (89.388)	lr 0.00000
Train [120][2290/3239]	Time 0.250 (0.658)	Data Time 0.002 (0.020)	Loss 2.0482 (2.1501)	Entropy 0.59761 (0.60040)	Top-1 acc 72.656 (73.005)	Top-5 acc 91.016 (89.391)	lr 0.00000
Train [120][2300/3239]	Time 0.256 (0.657)	Data Time 0.002 (0.020)	Loss 2.1025 (2.1501)	Entropy 0.59757 (0.60039)	Top-1 acc 75.000 (73.001)	Top-5 acc 89.844 (89.389)	lr 0.00000
Train [120][2310/3239]	Time 0.265 (0.657)	Data Time 0.002 (0.020)	Loss 2.2401 (2.1502)	Entropy 0.59749 (0.60038)	Top-1 acc 69.922 (73.002)	Top-5 acc 88.281 (89.390)	lr 0.00000
Train [120][2320/3239]	Time 2.572 (0.656)	Data Time 0.001 (0.020)	Loss 2.0941 (2.1505)	Entropy 0.59749 (0.60037)	Top-1 acc 71.875 (72.996)	Top-5 acc 91.406 (89.386)	lr 0.00000
Train [120][2330/3239]	Time 0.234 (0.654)	Data Time 0.001 (0.020)	Loss 2.1058 (2.1503)	Entropy 0.59746 (0.60036)	Top-1 acc 75.000 (72.999)	Top-5 acc 91.406 (89.387)	lr 0.00000
Train [120][2340/3239]	Time 0.236 (0.653)	Data Time 0.002 (0.020)	Loss 2.0574 (2.1504)	Entropy 0.59748 (0.60034)	Top-1 acc 75.391 (72.995)	Top-5 acc 91.797 (89.385)	lr 0.00000
Train [120][2350/3239]	Time 0.330 (0.653)	Data Time 0.002 (0.020)	Loss 2.1836 (2.1504)	Entropy 0.59743 (0.60033)	Top-1 acc 68.359 (72.996)	Top-5 acc 90.234 (89.386)	lr 0.00000
Train [120][2360/3239]	Time 0.243 (0.652)	Data Time 0.001 (0.020)	Loss 2.2729 (2.1504)	Entropy 0.59730 (0.60032)	Top-1 acc 68.750 (72.996)	Top-5 acc 87.891 (89.382)	lr 0.00000
Train [120][2370/3239]	Time 0.227 (0.651)	Data Time 0.001 (0.020)	Loss 1.9979 (2.1504)	Entropy 0.59728 (0.60031)	Top-1 acc 77.344 (72.995)	Top-5 acc 91.797 (89.387)	lr 0.00000
Train [120][2380/3239]	Time 0.257 (0.651)	Data Time 0.002 (0.019)	Loss 2.0471 (2.1503)	Entropy 0.59725 (0.60029)	Top-1 acc 74.609 (72.995)	Top-5 acc 92.188 (89.389)	lr 0.00000
Train [120][2390/3239]	Time 0.352 (0.650)	Data Time 0.001 (0.019)	Loss 1.9005 (2.1503)	Entropy 0.59726 (0.60028)	Top-1 acc 79.297 (72.995)	Top-5 acc 92.969 (89.391)	lr 0.00000
Train [120][2400/3239]	Time 0.244 (0.649)	Data Time 0.001 (0.019)	Loss 2.1742 (2.1502)	Entropy 0.59726 (0.60027)	Top-1 acc 73.047 (72.995)	Top-5 acc 89.453 (89.392)	lr 0.00000
Train [120][2410/3239]	Time 0.270 (0.648)	Data Time 0.001 (0.019)	Loss 2.0920 (2.1500)	Entropy 0.59721 (0.60025)	Top-1 acc 74.219 (73.000)	Top-5 acc 89.844 (89.394)	lr 0.00000
Train [120][2420/3239]	Time 0.233 (0.648)	Data Time 0.001 (0.019)	Loss 2.2905 (2.1498)	Entropy 0.59720 (0.60024)	Top-1 acc 71.094 (73.002)	Top-5 acc 84.766 (89.398)	lr 0.00000
Train [120][2430/3239]	Time 2.680 (0.647)	Data Time 0.001 (0.019)	Loss 2.2606 (2.1498)	Entropy 0.59720 (0.60023)	Top-1 acc 70.312 (73.003)	Top-5 acc 88.672 (89.397)	lr 0.00000
Train [120][2440/3239]	Time 0.240 (0.645)	Data Time 0.001 (0.019)	Loss 2.1549 (2.1500)	Entropy 0.59719 (0.60022)	Top-1 acc 72.656 (72.995)	Top-5 acc 89.844 (89.394)	lr 0.00000
Train [120][2450/3239]	Time 0.237 (0.645)	Data Time 0.001 (0.019)	Loss 2.2687 (2.1501)	Entropy 0.59701 (0.60020)	Top-1 acc 72.656 (72.996)	Top-5 acc 88.281 (89.394)	lr 0.00000
Train [120][2460/3239]	Time 0.231 (0.644)	Data Time 0.001 (0.019)	Loss 2.0938 (2.1502)	Entropy 0.59691 (0.60019)	Top-1 acc 73.438 (72.990)	Top-5 acc 91.016 (89.393)	lr 0.00000
Train [120][2470/3239]	Time 0.237 (0.644)	Data Time 0.001 (0.019)	Loss 2.2599 (2.1504)	Entropy 0.59691 (0.60018)	Top-1 acc 64.844 (72.984)	Top-5 acc 87.500 (89.390)	lr 0.00000
Train [120][2480/3239]	Time 0.245 (0.643)	Data Time 0.001 (0.019)	Loss 2.1525 (2.1505)	Entropy 0.59690 (0.60016)	Top-1 acc 73.438 (72.982)	Top-5 acc 86.719 (89.386)	lr 0.00000
Train [120][2490/3239]	Time 0.237 (0.642)	Data Time 0.001 (0.019)	Loss 2.1091 (2.1504)	Entropy 0.59695 (0.60015)	Top-1 acc 73.047 (72.986)	Top-5 acc 90.234 (89.386)	lr 0.00000
Train [120][2500/3239]	Time 0.232 (0.642)	Data Time 0.001 (0.019)	Loss 2.0903 (2.1504)	Entropy 0.59688 (0.60014)	Top-1 acc 72.656 (72.990)	Top-5 acc 91.797 (89.388)	lr 0.00000
Train [120][2510/3239]	Time 0.241 (0.641)	Data Time 0.001 (0.019)	Loss 2.1884 (2.1503)	Entropy 0.59683 (0.60013)	Top-1 acc 72.656 (72.990)	Top-5 acc 89.453 (89.390)	lr 0.00000
Train [120][2520/3239]	Time 0.233 (0.640)	Data Time 0.001 (0.018)	Loss 2.1366 (2.1504)	Entropy 0.59687 (0.60011)	Top-1 acc 72.656 (72.983)	Top-5 acc 88.672 (89.390)	lr 0.00000
Train [120][2530/3239]	Time 0.236 (0.640)	Data Time 0.001 (0.018)	Loss 2.2757 (2.1506)	Entropy 0.59685 (0.60010)	Top-1 acc 69.531 (72.974)	Top-5 acc 86.328 (89.385)	lr 0.00000
Train [120][2540/3239]	Time 2.544 (0.639)	Data Time 0.001 (0.018)	Loss 2.2080 (2.1506)	Entropy 0.59685 (0.60009)	Top-1 acc 71.484 (72.973)	Top-5 acc 88.281 (89.385)	lr 0.00000
Train [120][2550/3239]	Time 0.229 (0.638)	Data Time 0.001 (0.018)	Loss 2.2171 (2.1505)	Entropy 0.59684 (0.60007)	Top-1 acc 72.656 (72.979)	Top-5 acc 86.328 (89.387)	lr 0.00000
Train [120][2560/3239]	Time 0.328 (0.637)	Data Time 0.001 (0.018)	Loss 2.1752 (2.1506)	Entropy 0.59682 (0.60006)	Top-1 acc 71.094 (72.979)	Top-5 acc 89.062 (89.385)	lr 0.00000
Train [120][2570/3239]	Time 0.227 (0.636)	Data Time 0.001 (0.018)	Loss 2.1180 (2.1506)	Entropy 0.59681 (0.60005)	Top-1 acc 74.609 (72.981)	Top-5 acc 90.234 (89.386)	lr 0.00000
Train [120][2580/3239]	Time 0.223 (0.636)	Data Time 0.001 (0.018)	Loss 2.1427 (2.1507)	Entropy 0.59673 (0.60004)	Top-1 acc 73.047 (72.984)	Top-5 acc 90.625 (89.386)	lr 0.00000
Train [120][2590/3239]	Time 0.243 (0.635)	Data Time 0.001 (0.018)	Loss 2.0048 (2.1507)	Entropy 0.59670 (0.60002)	Top-1 acc 76.172 (72.986)	Top-5 acc 93.750 (89.384)	lr 0.00000
Train [120][2600/3239]	Time 0.328 (0.635)	Data Time 0.001 (0.018)	Loss 1.9561 (2.1507)	Entropy 0.59669 (0.60001)	Top-1 acc 76.562 (72.985)	Top-5 acc 93.359 (89.387)	lr 0.00000
Train [120][2610/3239]	Time 0.232 (0.634)	Data Time 0.001 (0.018)	Loss 2.0590 (2.1508)	Entropy 0.59668 (0.60000)	Top-1 acc 74.219 (72.982)	Top-5 acc 89.453 (89.385)	lr 0.00000
Train [120][2620/3239]	Time 0.274 (0.633)	Data Time 0.001 (0.018)	Loss 2.0862 (2.1508)	Entropy 0.59652 (0.59998)	Top-1 acc 76.562 (72.986)	Top-5 acc 90.234 (89.385)	lr 0.00000
Train [120][2630/3239]	Time 0.257 (0.633)	Data Time 0.002 (0.018)	Loss 2.2383 (2.1508)	Entropy 0.59650 (0.59997)	Top-1 acc 70.312 (72.986)	Top-5 acc 86.719 (89.385)	lr 0.00000
Train [120][2640/3239]	Time 0.353 (0.632)	Data Time 0.001 (0.018)	Loss 2.2119 (2.1508)	Entropy 0.59642 (0.59996)	Top-1 acc 71.875 (72.989)	Top-5 acc 88.672 (89.385)	lr 0.00000
Train [120][2650/3239]	Time 0.445 (0.653)	Data Time 0.005 (0.018)	Loss 2.0410 (2.1508)	Entropy 0.59641 (0.59995)	Top-1 acc 75.000 (72.991)	Top-5 acc 92.969 (89.384)	lr 0.00000
Train [120][2660/3239]	Time 0.310 (0.653)	Data Time 0.003 (0.018)	Loss 2.1197 (2.1509)	Entropy 0.59645 (0.59993)	Top-1 acc 74.609 (72.988)	Top-5 acc 89.844 (89.383)	lr 0.00000
Train [120][2670/3239]	Time 0.273 (0.652)	Data Time 0.002 (0.017)	Loss 2.1770 (2.1510)	Entropy 0.59641 (0.59992)	Top-1 acc 70.703 (72.986)	Top-5 acc 86.719 (89.377)	lr 0.00000
Train [120][2680/3239]	Time 0.329 (0.651)	Data Time 0.001 (0.017)	Loss 2.2013 (2.1510)	Entropy 0.59621 (0.59991)	Top-1 acc 73.438 (72.985)	Top-5 acc 89.453 (89.378)	lr 0.00000
Train [120][2690/3239]	Time 0.227 (0.651)	Data Time 0.001 (0.017)	Loss 2.0577 (2.1510)	Entropy 0.59588 (0.59989)	Top-1 acc 76.953 (72.980)	Top-5 acc 92.578 (89.378)	lr 0.00000
Train [120][2700/3239]	Time 0.235 (0.650)	Data Time 0.001 (0.017)	Loss 2.0644 (2.1511)	Entropy 0.59593 (0.59988)	Top-1 acc 75.391 (72.975)	Top-5 acc 92.188 (89.378)	lr 0.00000
Train [120][2710/3239]	Time 0.236 (0.649)	Data Time 0.001 (0.017)	Loss 2.2457 (2.1510)	Entropy 0.59588 (0.59986)	Top-1 acc 70.312 (72.978)	Top-5 acc 87.891 (89.378)	lr 0.00000
Train [120][2720/3239]	Time 0.377 (0.649)	Data Time 0.001 (0.017)	Loss 2.0874 (2.1511)	Entropy 0.59583 (0.59985)	Top-1 acc 73.438 (72.975)	Top-5 acc 88.672 (89.375)	lr 0.00000
Train [120][2730/3239]	Time 0.267 (0.648)	Data Time 0.001 (0.017)	Loss 2.0223 (2.1510)	Entropy 0.59577 (0.59983)	Top-1 acc 74.609 (72.978)	Top-5 acc 91.797 (89.376)	lr 0.00000
Train [120][2740/3239]	Time 0.271 (0.648)	Data Time 0.001 (0.017)	Loss 2.2572 (2.1512)	Entropy 0.59569 (0.59982)	Top-1 acc 70.703 (72.974)	Top-5 acc 86.719 (89.373)	lr 0.00000
Train [120][2750/3239]	Time 0.266 (0.647)	Data Time 0.001 (0.017)	Loss 2.1559 (2.1512)	Entropy 0.59572 (0.59980)	Top-1 acc 73.438 (72.975)	Top-5 acc 89.062 (89.373)	lr 0.00000
Train [120][2760/3239]	Time 0.362 (0.646)	Data Time 0.001 (0.017)	Loss 2.3017 (2.1512)	Entropy 0.59566 (0.59979)	Top-1 acc 71.094 (72.980)	Top-5 acc 88.281 (89.374)	lr 0.00000
Train [120][2770/3239]	Time 0.233 (0.646)	Data Time 0.001 (0.017)	Loss 2.1207 (2.1511)	Entropy 0.59571 (0.59977)	Top-1 acc 75.000 (72.984)	Top-5 acc 90.625 (89.374)	lr 0.00000
Train [120][2780/3239]	Time 0.255 (0.645)	Data Time 0.001 (0.017)	Loss 2.1503 (2.1511)	Entropy 0.59572 (0.59976)	Top-1 acc 72.266 (72.982)	Top-5 acc 88.672 (89.374)	lr 0.00000
Train [120][2790/3239]	Time 0.249 (0.645)	Data Time 0.001 (0.017)	Loss 2.1039 (2.1512)	Entropy 0.59573 (0.59974)	Top-1 acc 74.219 (72.983)	Top-5 acc 89.844 (89.372)	lr 0.00000
Train [120][2800/3239]	Time 0.383 (0.644)	Data Time 0.001 (0.017)	Loss 2.2732 (2.1513)	Entropy 0.59570 (0.59973)	Top-1 acc 71.484 (72.981)	Top-5 acc 88.281 (89.370)	lr 0.00000
Train [120][2810/3239]	Time 0.289 (0.644)	Data Time 0.001 (0.017)	Loss 1.9719 (2.1513)	Entropy 0.59569 (0.59972)	Top-1 acc 79.688 (72.982)	Top-5 acc 91.406 (89.368)	lr 0.00000
Train [120][2820/3239]	Time 0.223 (0.643)	Data Time 0.001 (0.017)	Loss 2.1510 (2.1513)	Entropy 0.59567 (0.59970)	Top-1 acc 72.266 (72.985)	Top-5 acc 87.891 (89.365)	lr 0.00000
Train [120][2830/3239]	Time 0.269 (0.642)	Data Time 0.001 (0.017)	Loss 2.0943 (2.1514)	Entropy 0.59557 (0.59969)	Top-1 acc 74.609 (72.981)	Top-5 acc 89.453 (89.363)	lr 0.00000
Train [120][2840/3239]	Time 0.332 (0.642)	Data Time 0.001 (0.017)	Loss 2.0857 (2.1513)	Entropy 0.59545 (0.59967)	Top-1 acc 74.219 (72.983)	Top-5 acc 91.406 (89.364)	lr 0.00000
Train [120][2850/3239]	Time 0.256 (0.641)	Data Time 0.001 (0.016)	Loss 2.1733 (2.1512)	Entropy 0.59528 (0.59966)	Top-1 acc 72.656 (72.985)	Top-5 acc 89.062 (89.368)	lr 0.00000
Train [120][2860/3239]	Time 0.277 (0.641)	Data Time 0.001 (0.016)	Loss 2.1534 (2.1511)	Entropy 0.59517 (0.59964)	Top-1 acc 75.000 (72.988)	Top-5 acc 89.062 (89.369)	lr 0.00000
Train [120][2870/3239]	Time 0.263 (0.640)	Data Time 0.001 (0.016)	Loss 2.0293 (2.1510)	Entropy 0.59520 (0.59963)	Top-1 acc 77.344 (72.991)	Top-5 acc 91.016 (89.369)	lr 0.00000
Train [120][2880/3239]	Time 0.259 (0.640)	Data Time 0.001 (0.016)	Loss 2.2678 (2.1511)	Entropy 0.59516 (0.59961)	Top-1 acc 68.750 (72.990)	Top-5 acc 88.672 (89.368)	lr 0.00000
Train [120][2890/3239]	Time 0.244 (0.639)	Data Time 0.001 (0.016)	Loss 1.9494 (2.1510)	Entropy 0.59512 (0.59960)	Top-1 acc 78.906 (72.993)	Top-5 acc 93.750 (89.373)	lr 0.00000
Train [120][2900/3239]	Time 0.236 (0.639)	Data Time 0.001 (0.016)	Loss 2.0854 (2.1509)	Entropy 0.59516 (0.59958)	Top-1 acc 73.438 (72.996)	Top-5 acc 90.625 (89.374)	lr 0.00000
Train [120][2910/3239]	Time 0.271 (0.638)	Data Time 0.001 (0.016)	Loss 2.1161 (2.1509)	Entropy 0.59521 (0.59957)	Top-1 acc 75.391 (73.000)	Top-5 acc 87.891 (89.372)	lr 0.00000
Train [120][2920/3239]	Time 0.267 (0.638)	Data Time 0.001 (0.016)	Loss 2.0944 (2.1508)	Entropy 0.59522 (0.59955)	Top-1 acc 75.000 (73.004)	Top-5 acc 90.234 (89.372)	lr 0.00000
Train [120][2930/3239]	Time 0.261 (0.637)	Data Time 0.001 (0.016)	Loss 2.2650 (2.1509)	Entropy 0.59509 (0.59954)	Top-1 acc 69.922 (73.002)	Top-5 acc 87.109 (89.371)	lr 0.00000
Train [120][2940/3239]	Time 0.242 (0.637)	Data Time 0.001 (0.016)	Loss 2.0647 (2.1509)	Entropy 0.59514 (0.59952)	Top-1 acc 78.516 (73.006)	Top-5 acc 91.016 (89.372)	lr 0.00000
Train [120][2950/3239]	Time 0.262 (0.636)	Data Time 0.001 (0.016)	Loss 2.1512 (2.1509)	Entropy 0.59511 (0.59951)	Top-1 acc 70.703 (73.003)	Top-5 acc 89.844 (89.372)	lr 0.00000
Train [120][2960/3239]	Time 0.257 (0.636)	Data Time 0.001 (0.016)	Loss 2.2518 (2.1510)	Entropy 0.59510 (0.59949)	Top-1 acc 71.484 (73.001)	Top-5 acc 87.500 (89.370)	lr 0.00000
Train [120][2970/3239]	Time 0.267 (0.635)	Data Time 0.001 (0.016)	Loss 2.2527 (2.1511)	Entropy 0.59497 (0.59948)	Top-1 acc 69.531 (73.000)	Top-5 acc 87.109 (89.369)	lr 0.00000
Train [120][2980/3239]	Time 0.479 (0.653)	Data Time 0.005 (0.016)	Loss 1.9243 (2.1510)	Entropy 0.59574 (0.59946)	Top-1 acc 78.516 (73.003)	Top-5 acc 94.531 (89.368)	lr 0.00000
Train [120][2990/3239]	Time 0.278 (0.652)	Data Time 0.003 (0.016)	Loss 2.1189 (2.1509)	Entropy 0.59589 (0.59945)	Top-1 acc 70.703 (73.006)	Top-5 acc 89.453 (89.370)	lr 0.00000
Train [120][3000/3239]	Time 0.283 (0.652)	Data Time 0.002 (0.016)	Loss 2.1173 (2.1509)	Entropy 0.59588 (0.59944)	Top-1 acc 73.047 (73.005)	Top-5 acc 88.672 (89.370)	lr 0.00000
Train [120][3010/3239]	Time 0.242 (0.651)	Data Time 0.001 (0.016)	Loss 2.1921 (2.1509)	Entropy 0.59592 (0.59943)	Top-1 acc 71.875 (73.009)	Top-5 acc 88.281 (89.370)	lr 0.00000
Train [120][3020/3239]	Time 0.278 (0.651)	Data Time 0.001 (0.016)	Loss 1.9824 (2.1509)	Entropy 0.59582 (0.59941)	Top-1 acc 76.953 (73.007)	Top-5 acc 91.016 (89.370)	lr 0.00000
Train [120][3030/3239]	Time 0.230 (0.650)	Data Time 0.001 (0.016)	Loss 1.9916 (2.1507)	Entropy 0.59581 (0.59940)	Top-1 acc 77.344 (73.010)	Top-5 acc 91.406 (89.371)	lr 0.00000
Train [120][3040/3239]	Time 0.233 (0.650)	Data Time 0.001 (0.016)	Loss 1.9624 (2.1507)	Entropy 0.59576 (0.59939)	Top-1 acc 78.516 (73.012)	Top-5 acc 92.578 (89.373)	lr 0.00000
Train [120][3050/3239]	Time 0.260 (0.649)	Data Time 0.001 (0.016)	Loss 2.1286 (2.1507)	Entropy 0.59577 (0.59938)	Top-1 acc 73.047 (73.013)	Top-5 acc 89.453 (89.374)	lr 0.00000
Train [120][3060/3239]	Time 0.287 (0.649)	Data Time 0.001 (0.015)	Loss 2.0774 (2.1507)	Entropy 0.59562 (0.59937)	Top-1 acc 77.344 (73.014)	Top-5 acc 89.062 (89.372)	lr 0.00000
Train [120][3070/3239]	Time 0.275 (0.648)	Data Time 0.001 (0.015)	Loss 2.0584 (2.1506)	Entropy 0.59558 (0.59935)	Top-1 acc 73.828 (73.015)	Top-5 acc 91.406 (89.374)	lr 0.00000
Train [120][3080/3239]	Time 0.263 (0.648)	Data Time 0.001 (0.015)	Loss 2.3157 (2.1507)	Entropy 0.59566 (0.59934)	Top-1 acc 68.359 (73.013)	Top-5 acc 85.547 (89.372)	lr 0.00000
Train [120][3090/3239]	Time 0.242 (0.647)	Data Time 0.001 (0.015)	Loss 2.0929 (2.1507)	Entropy 0.59560 (0.59933)	Top-1 acc 75.000 (73.013)	Top-5 acc 91.406 (89.372)	lr 0.00000
Train [120][3100/3239]	Time 0.260 (0.647)	Data Time 0.001 (0.015)	Loss 2.1307 (2.1507)	Entropy 0.59559 (0.59932)	Top-1 acc 72.656 (73.012)	Top-5 acc 89.844 (89.372)	lr 0.00000
Train [120][3110/3239]	Time 0.264 (0.646)	Data Time 0.001 (0.015)	Loss 2.3117 (2.1508)	Entropy 0.59560 (0.59931)	Top-1 acc 69.922 (73.010)	Top-5 acc 85.938 (89.369)	lr 0.00000
Train [120][3120/3239]	Time 0.262 (0.645)	Data Time 0.001 (0.015)	Loss 2.0520 (2.1509)	Entropy 0.59569 (0.59929)	Top-1 acc 76.562 (73.007)	Top-5 acc 89.844 (89.368)	lr 0.00000
Train [120][3130/3239]	Time 0.369 (0.645)	Data Time 0.001 (0.015)	Loss 2.0688 (2.1507)	Entropy 0.59570 (0.59928)	Top-1 acc 76.172 (73.012)	Top-5 acc 90.234 (89.370)	lr 0.00000
Train [120][3140/3239]	Time 0.306 (0.644)	Data Time 0.001 (0.015)	Loss 2.0927 (2.1507)	Entropy 0.59558 (0.59927)	Top-1 acc 76.562 (73.014)	Top-5 acc 92.188 (89.372)	lr 0.00000
Train [120][3150/3239]	Time 0.227 (0.644)	Data Time 0.001 (0.015)	Loss 2.0503 (2.1506)	Entropy 0.59559 (0.59926)	Top-1 acc 76.953 (73.018)	Top-5 acc 92.188 (89.372)	lr 0.00000
Train [120][3160/3239]	Time 0.286 (0.643)	Data Time 0.001 (0.015)	Loss 2.2289 (2.1509)	Entropy 0.59556 (0.59925)	Top-1 acc 73.047 (73.014)	Top-5 acc 89.453 (89.366)	lr 0.00000
Train [120][3170/3239]	Time 0.352 (0.643)	Data Time 0.001 (0.015)	Loss 2.1257 (2.1507)	Entropy 0.59563 (0.59924)	Top-1 acc 74.609 (73.017)	Top-5 acc 87.891 (89.370)	lr 0.00000
Train [120][3180/3239]	Time 0.255 (0.642)	Data Time 0.000 (0.015)	Loss 2.0797 (2.1508)	Entropy 0.59565 (0.59922)	Top-1 acc 73.828 (73.015)	Top-5 acc 91.797 (89.368)	lr 0.00000
Train [120][3190/3239]	Time 0.232 (0.642)	Data Time 0.000 (0.015)	Loss 2.1971 (2.1508)	Entropy 0.59578 (0.59921)	Top-1 acc 71.875 (73.016)	Top-5 acc 87.891 (89.369)	lr 0.00000
Train [120][3200/3239]	Time 0.230 (0.641)	Data Time 0.000 (0.015)	Loss 2.1181 (2.1507)	Entropy 0.59575 (0.59920)	Top-1 acc 74.609 (73.017)	Top-5 acc 89.844 (89.371)	lr 0.00000
Train [120][3210/3239]	Time 0.337 (0.641)	Data Time 0.000 (0.015)	Loss 2.2061 (2.1507)	Entropy 0.59569 (0.59919)	Top-1 acc 75.391 (73.018)	Top-5 acc 89.844 (89.370)	lr 0.00000
Train [120][3220/3239]	Time 0.234 (0.640)	Data Time 0.000 (0.015)	Loss 2.0798 (2.1507)	Entropy 0.59562 (0.59918)	Top-1 acc 73.438 (73.017)	Top-5 acc 89.844 (89.371)	lr 0.00000
Train [120][3230/3239]	Time 0.246 (0.640)	Data Time 0.000 (0.015)	Loss 2.0872 (2.1507)	Entropy 0.59552 (0.59917)	Top-1 acc 73.438 (73.017)	Top-5 acc 89.062 (89.371)	lr 0.00000
Train [120][3239/3239]	Time 2.392 (0.639)	Data Time 0.000 (0.015)	Loss 2.2204 (2.1507)	Entropy 0.59552 (0.59916)	Top-1 acc 74.074 (73.013)	Top-5 acc 87.654 (89.368)	lr 0.00000
==========Valid [120/120]	loss 1.195	top-1 acc 72.786 (72.786)	top-5 acc 90.042	Train top-1 73.013	top-5 89.368	Entropy 0.59552	Latency-None: 0.000ms	Flops: 544.27M
